arxivarius 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/README.md +125 -0
- data/lib/arxivarius/author.rb +20 -0
- data/lib/arxivarius/category.rb +25 -0
- data/lib/arxivarius/data/categories.yml +156 -0
- data/lib/arxivarius/link.rb +10 -0
- data/lib/arxivarius/paper.rb +68 -0
- data/lib/arxivarius/text.rb +11 -0
- data/lib/arxivarius/version.rb +5 -0
- data/lib/arxivarius.rb +80 -0
- metadata +79 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: ae569c1030d082bb94e10a63472ef34aeb89bb9bef56db25c57f68254dc00be8
|
|
4
|
+
data.tar.gz: f3f664f4a8feb5b7a585166187590018ae68f7e98cd5ae290a85259baab40222
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: f218c5e45f1f1305ab9437a205f9c35db5494bc5491af3d6d19c560e025ba452990caff7fb709fcde5e07302fe73ad8ea9e2a6f043831873cdd076c2fd8a8ccf
|
|
7
|
+
data.tar.gz: 95396dc42e302e54b4e6ae42475bd375ee77d8f49b04ee6383e57dfb0a8d144c2cc1c59cd135ea57c043fa1be8c9fc575c8cf09240dea03179e939097415ca7b
|
data/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
The MIT License (MIT)
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 antlypls
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
# ArXivarius
|
|
2
|
+
|
|
3
|
+
A Ruby gem for fetching paper metadata from the [arXiv](https://arxiv.org/).
|
|
4
|
+
Retrieve titles, abstracts, authors, categories, and links for any paper on arXiv.
|
|
5
|
+
|
|
6
|
+
## Installation
|
|
7
|
+
|
|
8
|
+
Add to your Gemfile:
|
|
9
|
+
|
|
10
|
+
```ruby
|
|
11
|
+
gem 'arxivarius'
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
Or install directly:
|
|
15
|
+
|
|
16
|
+
```
|
|
17
|
+
gem install arxivarius
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
## Usage
|
|
21
|
+
|
|
22
|
+
### Fetching a paper
|
|
23
|
+
|
|
24
|
+
Pass any arXiv ID to `Arxivarius.get`:
|
|
25
|
+
|
|
26
|
+
```ruby
|
|
27
|
+
paper = Arxivarius.get('2601.00470')
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
All common ID formats work:
|
|
31
|
+
|
|
32
|
+
```ruby
|
|
33
|
+
Arxivarius.get('2601.00470') # current format
|
|
34
|
+
Arxivarius.get('2601.00470v1') # with version
|
|
35
|
+
Arxivarius.get('hep-th/9901001') # legacy format
|
|
36
|
+
Arxivarius.get('math.DG/0510097') # legacy with subcategory (auto-normalized)
|
|
37
|
+
Arxivarius.get('https://arxiv.org/abs/2601.00470') # full URL
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
### Paper metadata
|
|
41
|
+
|
|
42
|
+
```ruby
|
|
43
|
+
paper.title # => "Betelgeuse: Detection of the Expanding Wake of the Companion Star"
|
|
44
|
+
paper.abstract # => "Recent analyses conclude that Betelgeuse, a red supergiant star..."
|
|
45
|
+
paper.comment # => "20 pages + 1 appendix, 9 figures, accepted by ApJ"
|
|
46
|
+
paper.arxiv_url # => "https://arxiv.org/abs/2601.00470v1"
|
|
47
|
+
paper.created_at # => 2026-01-01 20:56:05 UTC
|
|
48
|
+
paper.updated_at # => 2026-01-01 20:56:05 UTC
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
### IDs and versions
|
|
52
|
+
|
|
53
|
+
```ruby
|
|
54
|
+
paper.arxiv_id # => "2601.00470"
|
|
55
|
+
paper.arxiv_versioned_id # => "2601.00470v1"
|
|
56
|
+
paper.version # => 1
|
|
57
|
+
paper.revision? # => false (true when the paper has been updated)
|
|
58
|
+
paper.legacy_article? # => false (true for pre-2007 IDs like hep-th/9901001)
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
### PDF access
|
|
62
|
+
|
|
63
|
+
```ruby
|
|
64
|
+
paper.available_in_pdf? # => true
|
|
65
|
+
paper.pdf_url # => "https://arxiv.org/pdf/2601.00470v1"
|
|
66
|
+
paper.content_types # => ["text/html", "application/pdf"]
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
### Links
|
|
70
|
+
|
|
71
|
+
Each paper has a list of links with URLs and content types:
|
|
72
|
+
|
|
73
|
+
```ruby
|
|
74
|
+
paper.links.each do |link|
|
|
75
|
+
link.url # => "https://arxiv.org/pdf/2601.00470v1"
|
|
76
|
+
link.content_type # => "application/pdf"
|
|
77
|
+
end
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
### Authors
|
|
81
|
+
|
|
82
|
+
```ruby
|
|
83
|
+
paper.authors.map(&:name)
|
|
84
|
+
# => ["Andrea K. Dupree", "Paul I. Cristofari", "Morgan MacLeod", "Kateryna Kravchenko"]
|
|
85
|
+
|
|
86
|
+
author = paper.authors.first
|
|
87
|
+
author.name # => "Andrea K. Dupree"
|
|
88
|
+
author.first_name # => "Andrea K."
|
|
89
|
+
author.last_name # => "Dupree"
|
|
90
|
+
author.affiliations # => ["Center for Astrophysics | Harvard & Smithsonian, Cambridge, USA"]
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
### Categories
|
|
94
|
+
|
|
95
|
+
The gem includes descriptions for all 150+ arXiv categories:
|
|
96
|
+
|
|
97
|
+
```ruby
|
|
98
|
+
paper.categories.map(&:name) # => ["astro-ph.SR", "physics.space-ph"]
|
|
99
|
+
|
|
100
|
+
cat = paper.primary_category
|
|
101
|
+
cat.name # => "astro-ph.SR"
|
|
102
|
+
cat.description # => "Solar and Stellar Astrophysics"
|
|
103
|
+
cat.long_description # => "astro-ph.SR (Solar and Stellar Astrophysics)"
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
## Error handling
|
|
107
|
+
|
|
108
|
+
```ruby
|
|
109
|
+
begin
|
|
110
|
+
paper = Arxivarius.get('2601.00470')
|
|
111
|
+
rescue Arxivarius::Error::PaperNotFound
|
|
112
|
+
# paper does not exist on arXiv
|
|
113
|
+
rescue Arxivarius::Error::MalformedId
|
|
114
|
+
# the ID format is invalid
|
|
115
|
+
end
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
## Credits
|
|
119
|
+
|
|
120
|
+
ArXivarius is based on the outdated [arxiv](https://rubygems.org/gems/arxiv) gem.
|
|
121
|
+
Arxivarius picks up where it left off, with updated dependencies and a few improvements.
|
|
122
|
+
|
|
123
|
+
## License
|
|
124
|
+
|
|
125
|
+
MIT
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Arxivarius
|
|
4
|
+
class Author
|
|
5
|
+
include HappyMapper
|
|
6
|
+
|
|
7
|
+
element :name, Text, parser: :squish
|
|
8
|
+
has_many :affiliations, Text, parser: :squish, tag: 'affiliation'
|
|
9
|
+
|
|
10
|
+
def first_name = name_parts.first
|
|
11
|
+
|
|
12
|
+
def last_name = name_parts.last
|
|
13
|
+
|
|
14
|
+
private
|
|
15
|
+
|
|
16
|
+
def name_parts
|
|
17
|
+
@name_parts ||= FullNameSplitter.split(name)
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Arxivarius
|
|
4
|
+
class Category
|
|
5
|
+
include HappyMapper
|
|
6
|
+
|
|
7
|
+
# Maps category names to human-readable labels.
|
|
8
|
+
# Not available through the arXiv API, so we maintain a local copy.
|
|
9
|
+
CATEGORIES_PATH = File.expand_path('data/categories.yml', __dir__)
|
|
10
|
+
|
|
11
|
+
def self.types
|
|
12
|
+
@types ||= YAML.safe_load_file(CATEGORIES_PATH)
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
attribute :name, String, tag: 'term'
|
|
16
|
+
|
|
17
|
+
def description
|
|
18
|
+
Category.types[name]
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def long_description
|
|
22
|
+
description ? "#{name} (#{description})" : name
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
---
|
|
2
|
+
astro-ph.CO: Cosmology and Nongalactic Astrophysics
|
|
3
|
+
astro-ph.EP: Earth and Planetary Astrophysics
|
|
4
|
+
astro-ph.GA: Astrophysics of Galaxies
|
|
5
|
+
astro-ph.HE: High Energy Astrophysical Phenomena
|
|
6
|
+
astro-ph.IM: Instrumentation and Methods for Astrophysics
|
|
7
|
+
astro-ph.SR: Solar and Stellar Astrophysics
|
|
8
|
+
cond-mat.dis-nn: Disordered Systems and Neural Networks
|
|
9
|
+
cond-mat.mes-hall: Mesoscale and Nanoscale Physics
|
|
10
|
+
cond-mat.mtrl-sci: Materials Science
|
|
11
|
+
cond-mat.other: Other Condensed Matter
|
|
12
|
+
cond-mat.quant-gas: Quantum Gases
|
|
13
|
+
cond-mat.soft: Soft Condensed Matter
|
|
14
|
+
cond-mat.stat-mech: Statistical Mechanics
|
|
15
|
+
cond-mat.str-el: Strongly Correlated Electrons
|
|
16
|
+
cond-mat.supr-con: Superconductivity
|
|
17
|
+
cs.AI: Artificial Intelligence
|
|
18
|
+
cs.AR: Hardware Architecture
|
|
19
|
+
cs.CC: Computational Complexity
|
|
20
|
+
cs.CE: Computational Engineering, Finance, and Science
|
|
21
|
+
cs.CG: Computational Geometry
|
|
22
|
+
cs.CL: Computation and Language
|
|
23
|
+
cs.CR: Cryptography and Security
|
|
24
|
+
cs.CV: Computer Vision and Pattern Recognition
|
|
25
|
+
cs.CY: Computers and Society
|
|
26
|
+
cs.DB: Databases
|
|
27
|
+
cs.DC: Distributed, Parallel, and Cluster Computing
|
|
28
|
+
cs.DL: Digital Libraries
|
|
29
|
+
cs.DM: Discrete Mathematics
|
|
30
|
+
cs.DS: Data Structures and Algorithms
|
|
31
|
+
cs.ET: Emerging Technologies
|
|
32
|
+
cs.FL: Formal Languages and Automata Theory
|
|
33
|
+
cs.GL: General Literature
|
|
34
|
+
cs.GR: Graphics
|
|
35
|
+
cs.GT: Computer Science and Game Theory
|
|
36
|
+
cs.HC: Human-Computer Interaction
|
|
37
|
+
cs.IR: Information Retrieval
|
|
38
|
+
cs.IT: Information Theory
|
|
39
|
+
cs.LG: Machine Learning
|
|
40
|
+
cs.LO: Logic in Computer Science
|
|
41
|
+
cs.MA: Multiagent Systems
|
|
42
|
+
cs.MM: Multimedia
|
|
43
|
+
cs.MS: Mathematical Software
|
|
44
|
+
cs.NA: Numerical Analysis
|
|
45
|
+
cs.NE: Neural and Evolutionary Computing
|
|
46
|
+
cs.NI: Networking and Internet Architecture
|
|
47
|
+
cs.OH: Other Computer Science
|
|
48
|
+
cs.OS: Operating Systems
|
|
49
|
+
cs.PF: Performance
|
|
50
|
+
cs.PL: Programming Languages
|
|
51
|
+
cs.RO: Robotics
|
|
52
|
+
cs.SC: Symbolic Computation
|
|
53
|
+
cs.SD: Sound
|
|
54
|
+
cs.SE: Software Engineering
|
|
55
|
+
cs.SI: Social and Information Networks
|
|
56
|
+
cs.SY: Systems and Control
|
|
57
|
+
econ.EM: Econometrics
|
|
58
|
+
econ.GN: General Economics
|
|
59
|
+
econ.TH: Theoretical Economics
|
|
60
|
+
eess.AS: Audio and Speech Processing
|
|
61
|
+
eess.IV: Image and Video Processing
|
|
62
|
+
eess.SP: Signal Processing
|
|
63
|
+
eess.SY: Systems and Control
|
|
64
|
+
gr-qc: General Relativity and Quantum Cosmology
|
|
65
|
+
hep-ex: High Energy Physics - Experiment
|
|
66
|
+
hep-lat: High Energy Physics - Lattice
|
|
67
|
+
hep-ph: High Energy Physics - Phenomenology
|
|
68
|
+
hep-th: High Energy Physics - Theory
|
|
69
|
+
math-ph: Mathematical Physics
|
|
70
|
+
math.AC: Commutative Algebra
|
|
71
|
+
math.AG: Algebraic Geometry
|
|
72
|
+
math.AP: Analysis of PDEs
|
|
73
|
+
math.AT: Algebraic Topology
|
|
74
|
+
math.CA: Classical Analysis and ODEs
|
|
75
|
+
math.CO: Combinatorics
|
|
76
|
+
math.CT: Category Theory
|
|
77
|
+
math.CV: Complex Variables
|
|
78
|
+
math.DG: Differential Geometry
|
|
79
|
+
math.DS: Dynamical Systems
|
|
80
|
+
math.FA: Functional Analysis
|
|
81
|
+
math.GM: General Mathematics
|
|
82
|
+
math.GN: General Topology
|
|
83
|
+
math.GR: Group Theory
|
|
84
|
+
math.GT: Geometric Topology
|
|
85
|
+
math.HO: History and Overview
|
|
86
|
+
math.IT: Information Theory
|
|
87
|
+
math.KT: K-Theory and Homology
|
|
88
|
+
math.LO: Logic
|
|
89
|
+
math.MG: Metric Geometry
|
|
90
|
+
math.MP: Mathematical Physics
|
|
91
|
+
math.NA: Numerical Analysis
|
|
92
|
+
math.NT: Number Theory
|
|
93
|
+
math.OA: Operator Algebras
|
|
94
|
+
math.OC: Optimization and Control
|
|
95
|
+
math.PR: Probability
|
|
96
|
+
math.QA: Quantum Algebra
|
|
97
|
+
math.RA: Rings and Algebras
|
|
98
|
+
math.RT: Representation Theory
|
|
99
|
+
math.SG: Symplectic Geometry
|
|
100
|
+
math.SP: Spectral Theory
|
|
101
|
+
math.ST: Statistics Theory
|
|
102
|
+
nlin.AO: Adaptation and Self-Organizing Systems
|
|
103
|
+
nlin.CD: Chaotic Dynamics
|
|
104
|
+
nlin.CG: Cellular Automata and Lattice Gases
|
|
105
|
+
nlin.PS: Pattern Formation and Solitons
|
|
106
|
+
nlin.SI: Exactly Solvable and Integrable Systems
|
|
107
|
+
nucl-ex: Nuclear Experiment
|
|
108
|
+
nucl-th: Nuclear Theory
|
|
109
|
+
physics.acc-ph: Accelerator Physics
|
|
110
|
+
physics.ao-ph: Atmospheric and Oceanic Physics
|
|
111
|
+
physics.app-ph: Applied Physics
|
|
112
|
+
physics.atm-clus: Atomic and Molecular Clusters
|
|
113
|
+
physics.atom-ph: Atomic Physics
|
|
114
|
+
physics.bio-ph: Biological Physics
|
|
115
|
+
physics.chem-ph: Chemical Physics
|
|
116
|
+
physics.class-ph: Classical Physics
|
|
117
|
+
physics.comp-ph: Computational Physics
|
|
118
|
+
physics.data-an: Data Analysis, Statistics and Probability
|
|
119
|
+
physics.ed-ph: Physics Education
|
|
120
|
+
physics.flu-dyn: Fluid Dynamics
|
|
121
|
+
physics.gen-ph: General Physics
|
|
122
|
+
physics.geo-ph: Geophysics
|
|
123
|
+
physics.hist-ph: History and Philosophy of Physics
|
|
124
|
+
physics.ins-det: Instrumentation and Detectors
|
|
125
|
+
physics.med-ph: Medical Physics
|
|
126
|
+
physics.optics: Optics
|
|
127
|
+
physics.plasm-ph: Plasma Physics
|
|
128
|
+
physics.pop-ph: Popular Physics
|
|
129
|
+
physics.soc-ph: Physics and Society
|
|
130
|
+
physics.space-ph: Space Physics
|
|
131
|
+
q-bio.BM: Biomolecules
|
|
132
|
+
q-bio.CB: Cell Behavior
|
|
133
|
+
q-bio.GN: Genomics
|
|
134
|
+
q-bio.MN: Molecular Networks
|
|
135
|
+
q-bio.NC: Neurons and Cognition
|
|
136
|
+
q-bio.OT: Other Quantitative Biology
|
|
137
|
+
q-bio.PE: Populations and Evolution
|
|
138
|
+
q-bio.QM: Quantitative Methods
|
|
139
|
+
q-bio.SC: Subcellular Processes
|
|
140
|
+
q-bio.TO: Tissues and Organs
|
|
141
|
+
q-fin.CP: Computational Finance
|
|
142
|
+
q-fin.EC: Economics
|
|
143
|
+
q-fin.GN: General Finance
|
|
144
|
+
q-fin.MF: Mathematical Finance
|
|
145
|
+
q-fin.PM: Portfolio Management
|
|
146
|
+
q-fin.PR: Pricing of Securities
|
|
147
|
+
q-fin.RM: Risk Management
|
|
148
|
+
q-fin.ST: Statistical Finance
|
|
149
|
+
q-fin.TR: Trading and Market Microstructure
|
|
150
|
+
quant-ph: Quantum Physics
|
|
151
|
+
stat.AP: Applications
|
|
152
|
+
stat.CO: Computation
|
|
153
|
+
stat.ME: Methodology
|
|
154
|
+
stat.ML: Machine Learning
|
|
155
|
+
stat.OT: Other Statistics
|
|
156
|
+
stat.TH: Statistics Theory
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Arxivarius
|
|
4
|
+
class Paper
|
|
5
|
+
include HappyMapper
|
|
6
|
+
|
|
7
|
+
tag 'entry'
|
|
8
|
+
element :arxiv_url, String, tag: 'id'
|
|
9
|
+
element :created_at, Time, tag: 'published'
|
|
10
|
+
element :updated_at, Time, tag: 'updated'
|
|
11
|
+
element :title, Text, parser: :squish
|
|
12
|
+
element :summary, Text, parser: :squish
|
|
13
|
+
element :comment, Text, parser: :squish
|
|
14
|
+
has_one :primary_category, Category
|
|
15
|
+
has_many :categories, Category
|
|
16
|
+
has_many :authors, Author
|
|
17
|
+
has_many :links, Link
|
|
18
|
+
|
|
19
|
+
alias_method :abstract, :summary
|
|
20
|
+
|
|
21
|
+
def arxiv_url
|
|
22
|
+
force_https(@arxiv_url)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def revision?
|
|
26
|
+
created_at != updated_at
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def legacy_article?
|
|
30
|
+
arxiv_url.match?(Arxivarius::LEGACY_URL_FORMAT)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def arxiv_id
|
|
34
|
+
arxiv_versioned_id.match(/([^v]+)v\d+$/)[1]
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def arxiv_versioned_id
|
|
38
|
+
@arxiv_versioned_id ||= if legacy_article?
|
|
39
|
+
arxiv_url.match(/(#{Arxivarius::LEGACY_URL_FORMAT})/)[1]
|
|
40
|
+
else
|
|
41
|
+
arxiv_url.match(/(#{Arxivarius::CURRENT_URL_FORMAT})/)[1]
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def version
|
|
46
|
+
arxiv_url.match(/v(\d+)$/)[1].to_i
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def content_types
|
|
50
|
+
@content_types ||= links.map(&:content_type).compact.grep_v(/^\s*$/)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def available_in_pdf?
|
|
54
|
+
content_types.any? { |type| type == 'application/pdf' }
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def pdf_url
|
|
58
|
+
link = links.find { |l| l.content_type == 'application/pdf' }
|
|
59
|
+
force_https(link.url) if link
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
private
|
|
63
|
+
|
|
64
|
+
def force_https(url)
|
|
65
|
+
url.sub(/^http:/, 'https:')
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|
data/lib/arxivarius.rb
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'net/http'
|
|
4
|
+
require 'time'
|
|
5
|
+
require 'nokogiri'
|
|
6
|
+
require 'happymapper'
|
|
7
|
+
require 'full-name-splitter'
|
|
8
|
+
require 'yaml'
|
|
9
|
+
|
|
10
|
+
require 'arxivarius/version'
|
|
11
|
+
require 'arxivarius/text'
|
|
12
|
+
|
|
13
|
+
require 'arxivarius/author'
|
|
14
|
+
require 'arxivarius/link'
|
|
15
|
+
require 'arxivarius/category'
|
|
16
|
+
require 'arxivarius/paper'
|
|
17
|
+
|
|
18
|
+
module Arxivarius
|
|
19
|
+
module Error
|
|
20
|
+
class PaperNotFound < StandardError; end
|
|
21
|
+
class MalformedId < StandardError; end
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# ArXiv uses two ID formats:
|
|
25
|
+
# Legacy: math/0510097v1 (pre-2007)
|
|
26
|
+
# Current: 1202.0819v1 (2007+)
|
|
27
|
+
LEGACY_URL_FORMAT = /[^\/]+\/\d+(?:v\d+)?$/
|
|
28
|
+
CURRENT_URL_FORMAT = /\d{4,}\.\d{4,}(?:v\d+)?$/
|
|
29
|
+
|
|
30
|
+
LEGACY_ID_FORMAT = /^#{LEGACY_URL_FORMAT}/
|
|
31
|
+
ID_FORMAT = /^#{CURRENT_URL_FORMAT}/
|
|
32
|
+
|
|
33
|
+
class << self
|
|
34
|
+
def get(identifier)
|
|
35
|
+
id = parse_arxiv_identifier(identifier)
|
|
36
|
+
|
|
37
|
+
raise Arxivarius::Error::MalformedId, 'Paper ID format is invalid' unless valid_id?(id)
|
|
38
|
+
|
|
39
|
+
id = normalize_legacy_id(id)
|
|
40
|
+
|
|
41
|
+
url = URI("https://export.arxiv.org/api/query?id_list=#{id}")
|
|
42
|
+
response = ::Nokogiri::XML(Net::HTTP.get(url)).remove_namespaces!
|
|
43
|
+
paper = Arxivarius::Paper.parse(response.to_s, single: true)
|
|
44
|
+
|
|
45
|
+
# Paper is nil when the API returns no <entry> for the given ID.
|
|
46
|
+
raise Arxivarius::Error::PaperNotFound, "Paper #{id} doesn't exist on arXiv" unless paper&.title
|
|
47
|
+
|
|
48
|
+
paper
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
private
|
|
52
|
+
|
|
53
|
+
def parse_arxiv_identifier(identifier)
|
|
54
|
+
if valid_url?(identifier)
|
|
55
|
+
format = legacy_url?(identifier) ? LEGACY_URL_FORMAT : CURRENT_URL_FORMAT
|
|
56
|
+
identifier.match(/(#{format})/)[1]
|
|
57
|
+
else
|
|
58
|
+
identifier
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def valid_id?(identifier)
|
|
63
|
+
identifier.match?(ID_FORMAT) || identifier.match?(LEGACY_ID_FORMAT)
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def valid_url?(identifier)
|
|
67
|
+
identifier.match?(LEGACY_URL_FORMAT) || identifier.match?(CURRENT_URL_FORMAT)
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def legacy_url?(identifier)
|
|
71
|
+
identifier.match?(LEGACY_URL_FORMAT)
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# The arXiv API no longer resolves subcategory legacy IDs.
|
|
75
|
+
# Strips the subcategory: math.DG/0510097 -> math/0510097.
|
|
76
|
+
def normalize_legacy_id(id)
|
|
77
|
+
id.sub(/^([a-z-]+)\.[A-Z][A-Za-z]*\//, '\1/')
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: arxivarius
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.10.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- antlypls
|
|
8
|
+
bindir: bin
|
|
9
|
+
cert_chain: []
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
11
|
+
dependencies:
|
|
12
|
+
- !ruby/object:Gem::Dependency
|
|
13
|
+
name: full-name-splitter
|
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
|
15
|
+
requirements:
|
|
16
|
+
- - "~>"
|
|
17
|
+
- !ruby/object:Gem::Version
|
|
18
|
+
version: 0.1.2
|
|
19
|
+
type: :runtime
|
|
20
|
+
prerelease: false
|
|
21
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
22
|
+
requirements:
|
|
23
|
+
- - "~>"
|
|
24
|
+
- !ruby/object:Gem::Version
|
|
25
|
+
version: 0.1.2
|
|
26
|
+
- !ruby/object:Gem::Dependency
|
|
27
|
+
name: nokogiri-happymapper
|
|
28
|
+
requirement: !ruby/object:Gem::Requirement
|
|
29
|
+
requirements:
|
|
30
|
+
- - "~>"
|
|
31
|
+
- !ruby/object:Gem::Version
|
|
32
|
+
version: '0.10'
|
|
33
|
+
type: :runtime
|
|
34
|
+
prerelease: false
|
|
35
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
36
|
+
requirements:
|
|
37
|
+
- - "~>"
|
|
38
|
+
- !ruby/object:Gem::Version
|
|
39
|
+
version: '0.10'
|
|
40
|
+
description: 'Look up any arXiv paper by ID or URL and get structured metadata: titles,
|
|
41
|
+
authors, abstracts, categories, and PDF links.'
|
|
42
|
+
email:
|
|
43
|
+
- hello@antlypls.com
|
|
44
|
+
executables: []
|
|
45
|
+
extensions: []
|
|
46
|
+
extra_rdoc_files: []
|
|
47
|
+
files:
|
|
48
|
+
- LICENSE
|
|
49
|
+
- README.md
|
|
50
|
+
- lib/arxivarius.rb
|
|
51
|
+
- lib/arxivarius/author.rb
|
|
52
|
+
- lib/arxivarius/category.rb
|
|
53
|
+
- lib/arxivarius/data/categories.yml
|
|
54
|
+
- lib/arxivarius/link.rb
|
|
55
|
+
- lib/arxivarius/paper.rb
|
|
56
|
+
- lib/arxivarius/text.rb
|
|
57
|
+
- lib/arxivarius/version.rb
|
|
58
|
+
homepage: https://github.com/antlypls/arxivarius
|
|
59
|
+
licenses:
|
|
60
|
+
- MIT
|
|
61
|
+
metadata: {}
|
|
62
|
+
rdoc_options: []
|
|
63
|
+
require_paths:
|
|
64
|
+
- lib
|
|
65
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
66
|
+
requirements:
|
|
67
|
+
- - ">="
|
|
68
|
+
- !ruby/object:Gem::Version
|
|
69
|
+
version: '3.2'
|
|
70
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
71
|
+
requirements:
|
|
72
|
+
- - ">="
|
|
73
|
+
- !ruby/object:Gem::Version
|
|
74
|
+
version: '0'
|
|
75
|
+
requirements: []
|
|
76
|
+
rubygems_version: 4.0.6
|
|
77
|
+
specification_version: 4
|
|
78
|
+
summary: Fetch and parse papers metadata from arXiv
|
|
79
|
+
test_files: []
|