arx 0.1.0.alpha → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +2 -4
- data/Gemfile +0 -1
- data/LICENSE +1 -1
- data/README.md +3 -1
- data/arx.gemspec +3 -2
- data/lib/arx/categories.rb +161 -0
- data/lib/arx/cleaner.rb +13 -0
- data/lib/arx/entities/author.rb +29 -0
- data/lib/arx/entities/category.rb +24 -0
- data/lib/arx/entities/link.rb +22 -0
- data/lib/arx/entities/paper.rb +159 -0
- data/lib/arx/exceptions.rb +23 -0
- data/lib/arx/query/query.rb +266 -0
- data/lib/arx/query/validate.rb +123 -0
- data/lib/arx/version.rb +6 -2
- data/lib/arx.rb +49 -1
- metadata +28 -5
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 9e2022e15b66aaccb897b5596132b924ed9104040f8d090789312636be309eaf
|
|
4
|
+
data.tar.gz: 844f3ca659a5e60168f7368db64942f90c4ab035c0d0708d726cf0bed65d6c4b
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 2d95949ba20917ea3f6d570a25011275b56259917fcff040cd1f933f4befe4af1aa037faac82a778b9a62b83c8fc82e618102cfab3e3d5e2c04fdebbbbeef396
|
|
7
|
+
data.tar.gz: 9db9805a3328cc2cfb9c63b8c0aed45370f79ac0a6042ad5365f26156c966a43c3e818e3dfad424367350e956d92dbfeacd1bcf2cd1d88d7dcd98f81f40d6c33
|
data/CHANGELOG.md
CHANGED
data/Gemfile
CHANGED
data/LICENSE
CHANGED
|
@@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
|
18
18
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
19
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
20
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
-
SOFTWARE.
|
|
21
|
+
SOFTWARE.
|
data/README.md
CHANGED
data/arx.gemspec
CHANGED
|
@@ -8,7 +8,7 @@ Gem::Specification.new do |spec|
|
|
|
8
8
|
spec.authors = ['Edwin Onuonga']
|
|
9
9
|
spec.email = ['edwinonuonga@gmail.com']
|
|
10
10
|
|
|
11
|
-
spec.summary = %q{A Ruby
|
|
11
|
+
spec.summary = %q{A Ruby interface for querying academic papers on the arXiv search API.}
|
|
12
12
|
spec.license = 'MIT'
|
|
13
13
|
spec.require_paths = ['lib']
|
|
14
14
|
spec.files = Dir.glob('lib/**/*', File::FNM_DOTMATCH) + %w[
|
|
@@ -16,8 +16,9 @@ Gem::Specification.new do |spec|
|
|
|
16
16
|
]
|
|
17
17
|
|
|
18
18
|
spec.add_runtime_dependency 'nokogiri', '~> 1.10'
|
|
19
|
+
spec.add_runtime_dependency 'nokogiri-happymapper', '~> 0.8'
|
|
19
20
|
|
|
20
21
|
spec.add_development_dependency 'bundler', '~> 2.0'
|
|
21
22
|
spec.add_development_dependency 'rake', '~> 12.3'
|
|
22
23
|
spec.add_development_dependency 'rspec', '~> 3.7'
|
|
23
|
-
end
|
|
24
|
+
end
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Arx
|
|
4
|
+
|
|
5
|
+
# arXiv categories and their full names
|
|
6
|
+
CATEGORIES = {
|
|
7
|
+
'astro-ph' => 'Astrophysics',
|
|
8
|
+
'astro-ph.CO' => 'Cosmology and Nongalactic Astrophysics',
|
|
9
|
+
'astro-ph.EP' => 'Earth and Planetary Astrophysics',
|
|
10
|
+
'astro-ph.GA' => 'Astrophysics of Galaxies',
|
|
11
|
+
'astro-ph.HE' => 'High Energy Astrophysical Phenomena',
|
|
12
|
+
'astro-ph.IM' => 'Instrumentation and Methods for Astrophysics',
|
|
13
|
+
'astro-ph.SR' => 'Solar and Stellar Astrophysics',
|
|
14
|
+
'cond-mat.dis-nn' => 'Disordered Systems and Neural Networks',
|
|
15
|
+
'cond-mat.mes-hall' => 'Mesoscale and Nanoscale Physics',
|
|
16
|
+
'cond-mat.mtrl-sci' => 'Materials Science',
|
|
17
|
+
'cond-mat.other' => 'Other Condensed Matter',
|
|
18
|
+
'cond-mat.quant-gas' => 'Quantum Gases',
|
|
19
|
+
'cond-mat.soft' => 'Soft Condensed Matter',
|
|
20
|
+
'cond-mat.stat-mech' => 'Statistical Mechanics',
|
|
21
|
+
'cond-mat.str-el' => 'Strongly Correlated Electrons',
|
|
22
|
+
'cond-mat.supr-con' => 'Superconductivity',
|
|
23
|
+
'cs.AI' => 'Artificial Intelligence',
|
|
24
|
+
'cs.AR' => 'Hardware Architecture',
|
|
25
|
+
'cs.CC' => 'Computational Complexity',
|
|
26
|
+
'cs.CE' => 'Computational Engineering, Finance, and Science',
|
|
27
|
+
'cs.CG' => 'Computational Geometry',
|
|
28
|
+
'cs.CL' => 'Computation and Language',
|
|
29
|
+
'cs.CR' => 'Cryptography and Security',
|
|
30
|
+
'cs.CV' => 'Computer Vision and Pattern Recognition',
|
|
31
|
+
'cs.CY' => 'Computers and Society',
|
|
32
|
+
'cs.DB' => 'Databases',
|
|
33
|
+
'cs.DC' => 'Distributed, Parallel, and Cluster Computing',
|
|
34
|
+
'cs.DL' => 'Digital Libraries',
|
|
35
|
+
'cs.DM' => 'Discrete Mathematics',
|
|
36
|
+
'cs.DS' => 'Data Structures and Algorithms',
|
|
37
|
+
'cs.ET' => 'Emerging Technologies',
|
|
38
|
+
'cs.FL' => 'Formal Languages and Automata Theory',
|
|
39
|
+
'cs.GL' => 'General Literature',
|
|
40
|
+
'cs.GR' => 'Graphics',
|
|
41
|
+
'cs.GT' => 'Computer Science and Game Theory',
|
|
42
|
+
'cs.HC' => 'Human-Computer Interaction',
|
|
43
|
+
'cs.IR' => 'Information Retrieval',
|
|
44
|
+
'cs.IT' => 'Information Theory',
|
|
45
|
+
'cs.LG' => 'Learning',
|
|
46
|
+
'cs.LO' => 'Logic in Computer Science',
|
|
47
|
+
'cs.MA' => 'Multiagent Systems',
|
|
48
|
+
'cs.MM' => 'Multimedia',
|
|
49
|
+
'cs.MS' => 'Mathematical Software',
|
|
50
|
+
'cs.NA' => 'Numerical Analysis',
|
|
51
|
+
'cs.NE' => 'Neural and Evolutionary Computing',
|
|
52
|
+
'cs.NI' => 'Networking and Internet Architecture',
|
|
53
|
+
'cs.OH' => 'Other Computer Science',
|
|
54
|
+
'cs.OS' => 'Operating Systems',
|
|
55
|
+
'cs.PF' => 'Performance',
|
|
56
|
+
'cs.PL' => 'Programming Languages',
|
|
57
|
+
'cs.RO' => 'Robotics',
|
|
58
|
+
'cs.SC' => 'Symbolic Computation',
|
|
59
|
+
'cs.SD' => 'Sound',
|
|
60
|
+
'cs.SE' => 'Software Engineering',
|
|
61
|
+
'cs.SI' => 'Social and Information Networks',
|
|
62
|
+
'cs.SY' => 'Systems and Control',
|
|
63
|
+
'econ.EM' => 'Econometrics',
|
|
64
|
+
'eess.AS' => 'Audio and Speech Processing',
|
|
65
|
+
'eess.IV' => 'Image and Video Processing',
|
|
66
|
+
'eess.SP' => 'Signal Processing',
|
|
67
|
+
'gr-qc' => 'General Relativity and Quantum Cosmology',
|
|
68
|
+
'hep-ex' => 'High Energy Physics - Experiment',
|
|
69
|
+
'hep-lat' => 'High Energy Physics - Lattice',
|
|
70
|
+
'hep-ph' => 'High Energy Physics - Phenomenology',
|
|
71
|
+
'hep-th' => 'High Energy Physics - Theory',
|
|
72
|
+
'math.AC' => 'Commutative Algebra',
|
|
73
|
+
'math.AG' => 'Algebraic Geometry',
|
|
74
|
+
'math.AP' => 'Analysis of PDEs',
|
|
75
|
+
'math.AT' => 'Algebraic Topology',
|
|
76
|
+
'math.CA' => 'Classical Analysis and ODEs',
|
|
77
|
+
'math.CO' => 'Combinatorics',
|
|
78
|
+
'math.CT' => 'Category Theory',
|
|
79
|
+
'math.CV' => 'Complex Variables',
|
|
80
|
+
'math.DG' => 'Differential Geometry',
|
|
81
|
+
'math.DS' => 'Dynamical Systems',
|
|
82
|
+
'math.FA' => 'Functional Analysis',
|
|
83
|
+
'math.GM' => 'General Mathematics',
|
|
84
|
+
'math.GN' => 'General Topology',
|
|
85
|
+
'math.GR' => 'Group Theory',
|
|
86
|
+
'math.GT' => 'Geometric Topology',
|
|
87
|
+
'math.HO' => 'History and Overview',
|
|
88
|
+
'math.IT' => 'Information Theory',
|
|
89
|
+
'math.KT' => 'K-Theory and Homology',
|
|
90
|
+
'math.LO' => 'Logic',
|
|
91
|
+
'math.MG' => 'Metric Geometry',
|
|
92
|
+
'math.MP' => 'Mathematical Physics',
|
|
93
|
+
'math.NA' => 'Numerical Analysis',
|
|
94
|
+
'math.NT' => 'Number Theory',
|
|
95
|
+
'math.OA' => 'Operator Algebras',
|
|
96
|
+
'math.OC' => 'Optimization and Control',
|
|
97
|
+
'math.PR' => 'Probability',
|
|
98
|
+
'math.QA' => 'Quantum Algebra',
|
|
99
|
+
'math.RA' => 'Rings and Algebras',
|
|
100
|
+
'math.RT' => 'Representation Theory',
|
|
101
|
+
'math.SG' => 'Symplectic Geometry',
|
|
102
|
+
'math.SP' => 'Spectral Theory',
|
|
103
|
+
'math.ST' => 'Statistics Theory',
|
|
104
|
+
'math-ph' => 'Mathematical Physics',
|
|
105
|
+
'nlin.AO' => 'Adaptation and Self-Organizing Systems',
|
|
106
|
+
'nlin.CD' => 'Chaotic Dynamics',
|
|
107
|
+
'nlin.CG' => 'Cellular Automata and Lattice Gases',
|
|
108
|
+
'nlin.PS' => 'Pattern Formation and Solitons',
|
|
109
|
+
'nlin.SI' => 'Exactly Solvable and Integrable Systems',
|
|
110
|
+
'nucl-ex' => 'Nuclear Experiment',
|
|
111
|
+
'nucl-th' => 'Nuclear Theory',
|
|
112
|
+
'physics.acc-ph' => 'Accelerator Physics',
|
|
113
|
+
'physics.ao-ph' => 'Atmospheric and Oceanic Physics',
|
|
114
|
+
'physics.app-ph' => 'Applied Physics',
|
|
115
|
+
'physics.atm-clus' => 'Atomic and Molecular Clusters',
|
|
116
|
+
'physics.atom-ph' => 'Atomic Physics',
|
|
117
|
+
'physics.bio-ph' => 'Biological Physics',
|
|
118
|
+
'physics.chem-ph' => 'Chemical Physics',
|
|
119
|
+
'physics.class-ph' => 'Classical Physics',
|
|
120
|
+
'physics.comp-ph' => 'Computational Physics',
|
|
121
|
+
'physics.data-an' => 'Data Analysis, Statistics and Probability',
|
|
122
|
+
'physics.ed-ph' => 'Physics Education',
|
|
123
|
+
'physics.flu-dyn' => 'Fluid Dynamics',
|
|
124
|
+
'physics.gen-ph' => 'General Physics',
|
|
125
|
+
'physics.geo-ph' => 'Geophysics',
|
|
126
|
+
'physics.hist-ph' => 'History and Philosophy of Physics',
|
|
127
|
+
'physics.ins-det' => 'Instrumentation and Detectors',
|
|
128
|
+
'physics.med-ph' => 'Medical Physics',
|
|
129
|
+
'physics.optics' => 'Optics',
|
|
130
|
+
'physics.plasm-ph' => 'Plasma Physics',
|
|
131
|
+
'physics.pop-ph' => 'Popular Physics',
|
|
132
|
+
'physics.soc-ph' => 'Physics and Society',
|
|
133
|
+
'physics.space-ph' => 'Space Physics',
|
|
134
|
+
'q-bio.BM' => 'Biomolecules',
|
|
135
|
+
'q-bio.CB' => 'Cell Behavior',
|
|
136
|
+
'q-bio.GN' => 'Genomics',
|
|
137
|
+
'q-bio.MN' => 'Molecular Networks',
|
|
138
|
+
'q-bio.NC' => 'Neurons and Cognition',
|
|
139
|
+
'q-bio.OT' => 'Other Quantitative Biology',
|
|
140
|
+
'q-bio.PE' => 'Populations and Evolution',
|
|
141
|
+
'q-bio.QM' => 'Quantitative Methods',
|
|
142
|
+
'q-bio.SC' => 'Subcellular Processes',
|
|
143
|
+
'q-bio.TO' => 'Tissues and Organs',
|
|
144
|
+
'q-fin.CP' => 'Computational Finance',
|
|
145
|
+
'q-fin.EC' => 'Economics',
|
|
146
|
+
'q-fin.GN' => 'General Finance',
|
|
147
|
+
'q-fin.MF' => 'Mathematical Finance',
|
|
148
|
+
'q-fin.PM' => 'Portfolio Management',
|
|
149
|
+
'q-fin.PR' => 'Pricing of Securities',
|
|
150
|
+
'q-fin.RM' => 'Risk Management',
|
|
151
|
+
'q-fin.ST' => 'Statistical Finance',
|
|
152
|
+
'q-fin.TR' => 'Trading and Market Microstructure',
|
|
153
|
+
'quant-ph' => 'Quantum Physics',
|
|
154
|
+
'stat.AP' => 'Applications',
|
|
155
|
+
'stat.CO' => 'Computation',
|
|
156
|
+
'stat.ME' => 'Methodology',
|
|
157
|
+
'stat.ML' => 'Machine Learning',
|
|
158
|
+
'stat.OT' => 'Other Statistics',
|
|
159
|
+
'stat.TH' => 'Statistics Theory'
|
|
160
|
+
}.freeze
|
|
161
|
+
end
|
data/lib/arx/cleaner.rb
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
module Arx
|
|
2
|
+
|
|
3
|
+
# Class for cleaning strings.
|
|
4
|
+
class Cleaner
|
|
5
|
+
|
|
6
|
+
# Cleans strings.
|
|
7
|
+
# @param [String] string Removes newline/return characters and multiple spaces from a string.
|
|
8
|
+
# @return [String] The cleaned string.
|
|
9
|
+
def self.clean(string)
|
|
10
|
+
string.gsub(/\r\n|\r|\n/, ' ').strip.squeeze ' '
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
end
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
require 'happymapper'
|
|
2
|
+
require 'arx/cleaner'
|
|
3
|
+
|
|
4
|
+
module Arx
|
|
5
|
+
|
|
6
|
+
# Entity/model representing an arXiv paper's author.
|
|
7
|
+
class Author
|
|
8
|
+
include HappyMapper
|
|
9
|
+
|
|
10
|
+
tag 'author'
|
|
11
|
+
|
|
12
|
+
# @!method name
|
|
13
|
+
# The name of the author.
|
|
14
|
+
# @return [String]
|
|
15
|
+
element :name, Cleaner, tag: 'name', parser: :clean
|
|
16
|
+
|
|
17
|
+
# @!method affiliations
|
|
18
|
+
# The author's affiliations.
|
|
19
|
+
# @return [Array<String>]
|
|
20
|
+
has_many :affiliations, Cleaner, tag: 'affiliation', parser: :clean
|
|
21
|
+
|
|
22
|
+
# @!method affiliations?
|
|
23
|
+
# Whether or not the author has any affiliations.
|
|
24
|
+
# @return [Boolean]
|
|
25
|
+
def affiliations?
|
|
26
|
+
!affiliations.empty?
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
require 'arx/categories'
|
|
2
|
+
require 'arx/cleaner'
|
|
3
|
+
|
|
4
|
+
module Arx
|
|
5
|
+
|
|
6
|
+
# Entity/model representing an arXiv paper's category.
|
|
7
|
+
class Category
|
|
8
|
+
include HappyMapper
|
|
9
|
+
|
|
10
|
+
tag 'category'
|
|
11
|
+
|
|
12
|
+
# @!method name
|
|
13
|
+
# The abbreviated name of the category.
|
|
14
|
+
# @return [String]
|
|
15
|
+
attribute :name, Cleaner, parser: :clean, tag: 'term'
|
|
16
|
+
|
|
17
|
+
# The full name of the category.
|
|
18
|
+
# @see CATEGORIES
|
|
19
|
+
# @return [String]
|
|
20
|
+
def full_name
|
|
21
|
+
CATEGORIES[term]
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
require 'happymapper'
|
|
2
|
+
|
|
3
|
+
module Arx
|
|
4
|
+
|
|
5
|
+
# Helper entity/model representing a link on an arXiv paper.
|
|
6
|
+
class Link
|
|
7
|
+
include HappyMapper
|
|
8
|
+
|
|
9
|
+
tag 'link'
|
|
10
|
+
|
|
11
|
+
attribute :title, String
|
|
12
|
+
attribute :rel, String
|
|
13
|
+
attribute :type, String
|
|
14
|
+
attribute :href, String
|
|
15
|
+
|
|
16
|
+
%w[pdf doi].each do |link_type|
|
|
17
|
+
define_method "#{link_type}?" do
|
|
18
|
+
@title == link_type
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
require 'happymapper'
|
|
2
|
+
require 'arx/exceptions'
|
|
3
|
+
require 'arx/cleaner'
|
|
4
|
+
require_relative 'author'
|
|
5
|
+
require_relative 'category'
|
|
6
|
+
require_relative 'link'
|
|
7
|
+
|
|
8
|
+
module Arx
|
|
9
|
+
|
|
10
|
+
# Entity/model representing an arXiv paper.
|
|
11
|
+
class Paper
|
|
12
|
+
include HappyMapper
|
|
13
|
+
|
|
14
|
+
tag 'entry'
|
|
15
|
+
|
|
16
|
+
element :id, Cleaner, parser: :clean, tag: 'id'
|
|
17
|
+
# The identifier of the paper.
|
|
18
|
+
# @note This is either in {Validate::OLD_IDENTIFIER_FORMAT} or {Validate::NEW_IDENTIFIER_FORMAT}.
|
|
19
|
+
# @example
|
|
20
|
+
# 1705.01662v1
|
|
21
|
+
# cond-mat/0211034
|
|
22
|
+
# @return [String] The paper's identifier.
|
|
23
|
+
def id
|
|
24
|
+
@id.sub /https?\:\/\/arxiv\.org\/abs\//, ''
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# The URL of the paper on the arXiv website.
|
|
28
|
+
# @example
|
|
29
|
+
# http://arxiv.org/abs/1705.01662v1
|
|
30
|
+
# http://arxiv.org/abs/cond-mat/0211034
|
|
31
|
+
# @return [String] The paper's arXiv URL.
|
|
32
|
+
def url
|
|
33
|
+
@id
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# @!method last_updated
|
|
37
|
+
# The date that the paper was last updated.
|
|
38
|
+
# @return [DateTime]
|
|
39
|
+
element :last_updated, DateTime, tag: 'updated'
|
|
40
|
+
|
|
41
|
+
# @!method publish_date
|
|
42
|
+
# The original publish/submission date of the paper.
|
|
43
|
+
# @return [DateTime]
|
|
44
|
+
element :publish_date, DateTime, tag: 'published'
|
|
45
|
+
|
|
46
|
+
# @!method title
|
|
47
|
+
# The title of the paper.
|
|
48
|
+
# @return [DateTime]
|
|
49
|
+
element :title, Cleaner, parser: :clean, tag: 'title'
|
|
50
|
+
|
|
51
|
+
# @!method authors
|
|
52
|
+
# The authors of the paper.
|
|
53
|
+
# @return [Array<Author>]
|
|
54
|
+
has_many :authors, Author, tag: 'author'
|
|
55
|
+
|
|
56
|
+
# @!method primary_category
|
|
57
|
+
# The primary category of the paper.
|
|
58
|
+
# @return [Category]
|
|
59
|
+
element :primary_category, Category, tag: 'primary_category'
|
|
60
|
+
|
|
61
|
+
# @!method categories
|
|
62
|
+
# The categories of the paper.
|
|
63
|
+
# @return [Array<Category>]
|
|
64
|
+
has_many :categories, Category, tag: 'category'
|
|
65
|
+
|
|
66
|
+
# Whether the paper is a revision or not.
|
|
67
|
+
# @note A paper is a revision if {last_updated} differs from {publish_date}.
|
|
68
|
+
# @return [Boolean]
|
|
69
|
+
def revision?
|
|
70
|
+
@publish_date != @last_updated
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# @!method summary
|
|
74
|
+
# The summary (or abstract) of the paper.
|
|
75
|
+
# @return [String]
|
|
76
|
+
element :summary, Cleaner, parser: :clean, tag: 'summary'
|
|
77
|
+
alias_method :abstract, :summary
|
|
78
|
+
|
|
79
|
+
# @!method comment?
|
|
80
|
+
# Whether or not the paper has a comment.
|
|
81
|
+
# @return [Boolean]
|
|
82
|
+
|
|
83
|
+
# @!method comment
|
|
84
|
+
# The comment of the paper.
|
|
85
|
+
# @note This is an optional metadata field on an arXiv paper. To check whether the paper has a comment, use {comment?}
|
|
86
|
+
# @raise {MissingFieldError} If the paper does not have a comment.
|
|
87
|
+
# @return [String]
|
|
88
|
+
element :comment, Cleaner, parser: :clean, tag: 'comment'
|
|
89
|
+
|
|
90
|
+
# @!method journal?
|
|
91
|
+
# Whether or not the paper has a journal reference.
|
|
92
|
+
# @return [Boolean]
|
|
93
|
+
|
|
94
|
+
# @!method journal
|
|
95
|
+
# The journal reference of the paper.
|
|
96
|
+
# @note This is an optional metadata field on an arXiv paper. To check whether the paper has a journal reference, use {journal?}
|
|
97
|
+
# @raise {MissingFieldError} If the paper does not have a journal reference.
|
|
98
|
+
# @return [String]
|
|
99
|
+
element :journal, Cleaner, parser: :clean, tag: 'journal_ref'
|
|
100
|
+
|
|
101
|
+
%i[comment journal].each do |optional|
|
|
102
|
+
exists = "#{optional}?"
|
|
103
|
+
|
|
104
|
+
define_method exists do
|
|
105
|
+
!instance_variable_get("@#{optional}").empty?
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
define_method optional do
|
|
109
|
+
if self.send "#{optional}?"
|
|
110
|
+
instance_variable_get("@#{optional}")
|
|
111
|
+
else
|
|
112
|
+
raise MissingFieldError.new(optional)
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
has_many :links, Link, tag: 'link'
|
|
118
|
+
|
|
119
|
+
# @!method pdf?
|
|
120
|
+
# Whether or not the paper has a PDF link.
|
|
121
|
+
# @return [Boolean]
|
|
122
|
+
|
|
123
|
+
# @!method pdf_url
|
|
124
|
+
# Link to the PDF version of the paper.
|
|
125
|
+
# @note This is an optional metadata field on an arXiv paper. To check whether the paper has a PDF link, use {pdf?}
|
|
126
|
+
# @raise {MissingLinkError} If the paper does not have a PDF link.
|
|
127
|
+
# @return [String]
|
|
128
|
+
|
|
129
|
+
# @!method doi?
|
|
130
|
+
# Whether or not the paper has a DOI (Digital Object Identifier) link.
|
|
131
|
+
# @see https://arxiv.org/help/jref#doi
|
|
132
|
+
# @see https://arxiv.org/help/prep#doi
|
|
133
|
+
# @return [Boolean]
|
|
134
|
+
|
|
135
|
+
# @!method doi_url
|
|
136
|
+
# Link to the DOI (Digital Object Identifier) of the paper.
|
|
137
|
+
# @see https://arxiv.org/help/jref#doi
|
|
138
|
+
# @see https://arxiv.org/help/prep#doi
|
|
139
|
+
# @note This is an optional metadata field on an arXiv paper. To check whether the paper has a DOI link, use {doi?}
|
|
140
|
+
# @raise {MissingLinkError} If the paper does not have a DOI link.
|
|
141
|
+
# @return [String]
|
|
142
|
+
|
|
143
|
+
%i[pdf doi].each do |link_type|
|
|
144
|
+
exists = "#{link_type}?".to_sym
|
|
145
|
+
|
|
146
|
+
define_method exists do
|
|
147
|
+
links.any? &exists
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
define_method "#{link_type}_url" do
|
|
151
|
+
if self.send exists
|
|
152
|
+
links.find(&exists).href
|
|
153
|
+
else
|
|
154
|
+
raise MissingLinkError.new link_type.to_s.upcase
|
|
155
|
+
end
|
|
156
|
+
end
|
|
157
|
+
end
|
|
158
|
+
end
|
|
159
|
+
end
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
module Arx
|
|
2
|
+
|
|
3
|
+
# Custom error for missing links on an arXiv paper.
|
|
4
|
+
class MissingLinkError < StandardError
|
|
5
|
+
def initialize(link_type)
|
|
6
|
+
super "This arXiv paper does not have a #{link_type} link"
|
|
7
|
+
end
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
# Custom error for missing fields on an arXiv paper.
|
|
11
|
+
class MissingFieldError < StandardError
|
|
12
|
+
def initialize(field)
|
|
13
|
+
super "This arXiv paper is missing the `#{field}` field"
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# Custom error for missing arXiv papers.
|
|
18
|
+
class MissingPaper < StandardError
|
|
19
|
+
def initialize(id)
|
|
20
|
+
super "Couldn't find an arXiv paper with ID: #{id}"
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'cgi'
|
|
4
|
+
require_relative 'validate'
|
|
5
|
+
|
|
6
|
+
module Arx
|
|
7
|
+
|
|
8
|
+
# Class for generating arXiv search API query strings.
|
|
9
|
+
#
|
|
10
|
+
# @attr query [String] The string representing the search query.
|
|
11
|
+
class Query
|
|
12
|
+
|
|
13
|
+
# Mapping for URL query parameters supported by the arXiv search API.
|
|
14
|
+
PARAMS = {
|
|
15
|
+
search_query: 'search_query',
|
|
16
|
+
id_list: 'id_list',
|
|
17
|
+
sort_by: 'sortBy',
|
|
18
|
+
sort_order: 'sortOrder'
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
# Logical connectives supported by the arXiv search API.
|
|
22
|
+
CONNECTIVES = {
|
|
23
|
+
and: 'AND',
|
|
24
|
+
or: 'OR',
|
|
25
|
+
and_not: 'ANDNOT'
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
# Logical connective method names.
|
|
29
|
+
CONNECTIVE_METHODS = {
|
|
30
|
+
'&': :and,
|
|
31
|
+
'!': :and_not,
|
|
32
|
+
'|': :or
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
# Supported fields for the search queries made to the arXiv search API.
|
|
36
|
+
# @see https://arxiv.org/help/prep arXiv metadata fields
|
|
37
|
+
# @see https://arxiv.org/help/api/user-manual#query_details arXiv user manual (query details)
|
|
38
|
+
FIELDS = {
|
|
39
|
+
title: 'ti', # Title
|
|
40
|
+
author: 'au', # Author
|
|
41
|
+
abstract: 'abs', # Abstract
|
|
42
|
+
comment: 'co', # Comment
|
|
43
|
+
journal: 'jr', # Journal reference
|
|
44
|
+
category: 'cat', # Subject category
|
|
45
|
+
report: 'rn', # Report number
|
|
46
|
+
all: 'all' # All (of the above)
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
# Supported criteria for the +sortBy+ parameter.
|
|
50
|
+
SORT_BY = {
|
|
51
|
+
relevance: 'relevance',
|
|
52
|
+
last_updated: 'lastUpdated',
|
|
53
|
+
date_submitted: 'submittedDate'
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
# Supported criteria for the +sortOrder+ parameter.
|
|
57
|
+
SORT_ORDER = {
|
|
58
|
+
ascending: 'ascending',
|
|
59
|
+
descending: 'descending'
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
# Initializes a new Query object.
|
|
63
|
+
#
|
|
64
|
+
# @param ids [Array<String>] The IDs of the arXiv papers to restrict the query to.
|
|
65
|
+
# @param sort_by [Symbol] The sorting criteria for the returned results (see {SORT_BY}).
|
|
66
|
+
# @param sort_order [Symbol] The sorting order for the returned results (see {SORT_ORDER}).
|
|
67
|
+
# @return [Query] The initialized query object.
|
|
68
|
+
def initialize(*ids, sort_by: :relevance, sort_order: :descending)
|
|
69
|
+
@query = String.new
|
|
70
|
+
|
|
71
|
+
Validate.sort_by sort_by, permitted: SORT_BY.keys
|
|
72
|
+
@query << "&#{PARAMS[:sort_by]}=#{SORT_BY[sort_by]}"
|
|
73
|
+
|
|
74
|
+
Validate.sort_order sort_order, permitted: SORT_ORDER.keys
|
|
75
|
+
@query << "&#{PARAMS[:sort_order]}=#{SORT_ORDER[sort_order]}"
|
|
76
|
+
|
|
77
|
+
ids.flatten!
|
|
78
|
+
unless ids.empty?
|
|
79
|
+
ids.map! {|id| extract_id id}
|
|
80
|
+
Validate.ids ids
|
|
81
|
+
@query << "&#{PARAMS[:id_list]}=#{ids * ','}"
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
yield self if block_given?
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# @!method &
|
|
88
|
+
# Logical conjunction (+AND+) of subqueries.
|
|
89
|
+
# @see https://arxiv.org/help/api/user-manual#query_details arXiv user manual
|
|
90
|
+
# @return [self]
|
|
91
|
+
|
|
92
|
+
# @!method !
|
|
93
|
+
# Logical negated conjunction (+ANDNOT+) of subqueries.
|
|
94
|
+
# @see https://arxiv.org/help/api/user-manual#query_details arXiv user manual
|
|
95
|
+
# @return [self]
|
|
96
|
+
|
|
97
|
+
# @!method |
|
|
98
|
+
# Logical disjunction (+OR+) of subqueries.
|
|
99
|
+
# @see https://arxiv.org/help/api/user-manual#query_details arXiv user manual
|
|
100
|
+
# @return [self]
|
|
101
|
+
|
|
102
|
+
CONNECTIVE_METHODS.each do |symbol, connective|
|
|
103
|
+
define_method(symbol) { add_connective connective }
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# @!method title(*values, exact: false, connective: :and)
|
|
107
|
+
# Search for papers by {https://arxiv.org/help/prep#title title}.
|
|
108
|
+
# @param values [Array<String>] Title(s) of papers to search for.
|
|
109
|
+
# @param exact [Boolean] Whether to search for an exact match of the title(s).
|
|
110
|
+
# @param connective [Symbol] The logical connective to use (see {CONNECTIVES}). Only applies if there are multiple values.
|
|
111
|
+
# @return [self]
|
|
112
|
+
|
|
113
|
+
# @!method author(*values, exact: false, connective: :and)
|
|
114
|
+
# Search for papers by {https://arxiv.org/help/prep#author author}.
|
|
115
|
+
# @param values [Array<String>] Author(s) of papers to search for.
|
|
116
|
+
# @param exact [Boolean] Whether to search for an exact match of the author's name(s).
|
|
117
|
+
# @param connective [Symbol] The logical connective to use (see {CONNECTIVES}). Only applies if there are multiple values.
|
|
118
|
+
# @return [self]
|
|
119
|
+
|
|
120
|
+
# @!method abstract(*values, exact: false, connective: :and)
|
|
121
|
+
# Search for papers by {https://arxiv.org/help/prep#abstract abstract}.
|
|
122
|
+
# @param values [Array<String>] Abstract(s) of papers to search for.
|
|
123
|
+
# @param exact [Boolean] Whether to search for an exact match of the abstract(s).
|
|
124
|
+
# @param connective [Symbol] The logical connective to use (see {CONNECTIVES}). Only applies if there are multiple values.
|
|
125
|
+
# @return [self]
|
|
126
|
+
|
|
127
|
+
# @!method comment(*values, exact: false, connective: :and)
|
|
128
|
+
# Search for papers by {https://arxiv.org/help/prep#comments comment}.
|
|
129
|
+
# @param values [Array<String>] Comment(s) of papers to search for.
|
|
130
|
+
# @param exact [Boolean] Whether to search for an exact match of the comment(s).
|
|
131
|
+
# @param connective [Symbol] The logical connective to use (see {CONNECTIVES}). Only applies if there are multiple values.
|
|
132
|
+
# @return [self]
|
|
133
|
+
|
|
134
|
+
# @!method journal(*values, exact: false, connective: :and)
|
|
135
|
+
# Search for papers by {https://arxiv.org/help/prep#journal journal reference}.
|
|
136
|
+
# @param values [Array<String>] Journal reference(s) of papers to search for.
|
|
137
|
+
# @param exact [Boolean] Whether to search for an exact match of the journal refernece(s).
|
|
138
|
+
# @param connective [Symbol] The logical connective to use (see {CONNECTIVES}). Only applies if there are multiple values.
|
|
139
|
+
# @return [self]
|
|
140
|
+
|
|
141
|
+
# @!method category(*values, connective: :and)
|
|
142
|
+
# Search for papers by {https://arxiv.org/help/prep#category category}.
|
|
143
|
+
# @param values [Array<String>] Category(s) of papers to search for.
|
|
144
|
+
# @param connective [Symbol] The logical connective to use (see {CONNECTIVES}). Only applies if there are multiple values.
|
|
145
|
+
# @return [self]
|
|
146
|
+
|
|
147
|
+
# @!method report(*values, connective: :and)
|
|
148
|
+
# Search for papers by {https://arxiv.org/help/prep#report report number}.
|
|
149
|
+
# @param values [Array<String>] Report number(s) of papers to search for.
|
|
150
|
+
# @param connective [Symbol] The logical connective to use (see {CONNECTIVES}). Only applies if there are multiple values.
|
|
151
|
+
# @return [self]
|
|
152
|
+
|
|
153
|
+
# @!method all(*values, exact: true, connective: :and)
|
|
154
|
+
# Search for papers by all fields (see {FIELDS}).
|
|
155
|
+
# @param values [Array<String>] Field value(s) of papers to search for.
|
|
156
|
+
# @param exact [Boolean] Whether to search for an exact match of the comment(s).
|
|
157
|
+
# @param connective [Symbol] The logical connective to use (see {CONNECTIVES}). Only applies if there are multiple values.
|
|
158
|
+
# @return [self]
|
|
159
|
+
|
|
160
|
+
FIELDS.each do |name, field|
|
|
161
|
+
define_method(name) do |*values, exact: true, connective: :and|
|
|
162
|
+
return if values.empty?
|
|
163
|
+
|
|
164
|
+
Validate.values values
|
|
165
|
+
Validate.categories values if name == :category
|
|
166
|
+
Validate.exact exact
|
|
167
|
+
Validate.connective connective, permitted: CONNECTIVES.keys
|
|
168
|
+
|
|
169
|
+
values.map! &CGI.method(:escape)
|
|
170
|
+
|
|
171
|
+
# Forms a field:value pair
|
|
172
|
+
pair = ->(value){"#{field}:#{exact ? enquote(value) : value}"}
|
|
173
|
+
|
|
174
|
+
subquery = if values.size > 1
|
|
175
|
+
parenthesize values.map(&pair).join("+#{CONNECTIVES[connective]}+")
|
|
176
|
+
else
|
|
177
|
+
pair.(values.first)
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
add_subquery subquery
|
|
181
|
+
self
|
|
182
|
+
end
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
# Returns the query string.
|
|
186
|
+
#
|
|
187
|
+
# @return [String]
|
|
188
|
+
def to_s
|
|
189
|
+
@query
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
private
|
|
193
|
+
|
|
194
|
+
# Appends a logical connective to the end of the query string.
|
|
195
|
+
#
|
|
196
|
+
# @see CONNECTIVES
|
|
197
|
+
# @param connective [Symbol] The symbol of the logical connective to add.
|
|
198
|
+
# @return [self]
|
|
199
|
+
def add_connective(connective)
|
|
200
|
+
return unless search_query?
|
|
201
|
+
@query << "+#{CONNECTIVES[connective]}" unless ends_with_connective?
|
|
202
|
+
self
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
# Appends a subquery to the end of the query string.
|
|
206
|
+
#
|
|
207
|
+
# @param subquery [String] The subquery to add.
|
|
208
|
+
def add_subquery(subquery)
|
|
209
|
+
if search_query?
|
|
210
|
+
if ends_with_connective?
|
|
211
|
+
@query << "+#{subquery}"
|
|
212
|
+
else
|
|
213
|
+
add_connective :and
|
|
214
|
+
@query << "+#{subquery}"
|
|
215
|
+
end
|
|
216
|
+
else
|
|
217
|
+
@query << "&#{PARAMS[:search_query]}=#{subquery}"
|
|
218
|
+
end
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
# Whether the query string contains the +search_query+ parameter.
|
|
222
|
+
#
|
|
223
|
+
# @see PARAMS
|
|
224
|
+
# @return [Boolean]
|
|
225
|
+
def search_query?
|
|
226
|
+
@query.include? PARAMS[:search_query]
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
# Whether the query string ends in a logical connective.
|
|
230
|
+
#
|
|
231
|
+
# @see CONNECTIVES
|
|
232
|
+
# @return [Boolean]
|
|
233
|
+
def ends_with_connective?
|
|
234
|
+
CONNECTIVES.values.any? &@query.method(:end_with?)
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
# Parenthesizes a string with CGI-escaped parentheses.
|
|
238
|
+
#
|
|
239
|
+
# @param string [String] The string to parenthesize.
|
|
240
|
+
# @return [String] The parenthesized string.
|
|
241
|
+
def parenthesize(string)
|
|
242
|
+
CGI.escape('(') + string + CGI.escape(')')
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
# Enquotes a string with CGI-escaped double quotes.
|
|
246
|
+
#
|
|
247
|
+
# @param string [String] The string to enquote.
|
|
248
|
+
# @return [String] The enquoted string.
|
|
249
|
+
def enquote(string)
|
|
250
|
+
CGI.escape("\"") + string + CGI.escape("\"")
|
|
251
|
+
end
|
|
252
|
+
|
|
253
|
+
# Attempt to extract an ID from an arXiv URL.
|
|
254
|
+
#
|
|
255
|
+
# @param url [String] The URL to extract the ID from.
|
|
256
|
+
# @return [String] The extracted ID if successful, otherwise the original string.
|
|
257
|
+
def extract_id(url)
|
|
258
|
+
prefix = %r"^(https?\:\/\/)?(www.)?arxiv\.org\/abs\/"
|
|
259
|
+
if %r"#{prefix}.*$".match? url
|
|
260
|
+
url.sub(prefix, '').sub(%r"\/$", '')
|
|
261
|
+
else
|
|
262
|
+
url
|
|
263
|
+
end
|
|
264
|
+
end
|
|
265
|
+
end
|
|
266
|
+
end
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
require_relative '../categories'
|
|
2
|
+
|
|
3
|
+
module Arx
|
|
4
|
+
|
|
5
|
+
# Validations for arXiv search query fields and identifier schemes.
|
|
6
|
+
module Validate
|
|
7
|
+
|
|
8
|
+
# The current arxiv paper identifier scheme (1 April 2007 and onwards).
|
|
9
|
+
# The last block of digits can either be five digits (if the paper was published after 1501 - January 2015),
|
|
10
|
+
# or four digits (if the paper was published before 1501).
|
|
11
|
+
#
|
|
12
|
+
# @see https://arxiv.org/help/arxiv_identifier#new arXiv identifier (new)
|
|
13
|
+
# @example
|
|
14
|
+
# 1501.00001
|
|
15
|
+
# 1705.01662v1
|
|
16
|
+
# 1412.0135
|
|
17
|
+
# 0706.0001v2
|
|
18
|
+
NEW_IDENTIFIER_FORMAT = %r"^\d{4}\.\d{4,5}(v\d+)?$"
|
|
19
|
+
|
|
20
|
+
# The legacy arXiv paper identifier scheme (before 1 April 2007).
|
|
21
|
+
#
|
|
22
|
+
# @see https://arxiv.org/help/arxiv_identifier#old arXiv identifier (old)
|
|
23
|
+
# @example
|
|
24
|
+
# math/0309136v1
|
|
25
|
+
# cond-mat/0211034
|
|
26
|
+
OLD_IDENTIFIER_FORMAT = %r"^[a-z]+(\-[a-z]+)?\/\d{7}(v\d+)?$"
|
|
27
|
+
|
|
28
|
+
class << self
|
|
29
|
+
# Validates the +sortBy+ field of the query string.
|
|
30
|
+
#
|
|
31
|
+
# @param value [Symbol] The value to validate.
|
|
32
|
+
# @param permitted [Array<Symbol>] Permitted values for the field.
|
|
33
|
+
# @raise
|
|
34
|
+
# [TypeError] If the value is not a +Symbol+.
|
|
35
|
+
# [ArgumentError] If the value is not permitted.
|
|
36
|
+
def sort_by(value, permitted:)
|
|
37
|
+
raise TypeError.new("Expected `sort_by` to be a Symbol, got: #{value.class}") unless value.is_a? Symbol
|
|
38
|
+
raise ArgumentError.new("Expected `sort_by` to be one of #{permitted}, got: #{value}") unless permitted.include? value
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Validates the +sortOrder+ field of the query string.
|
|
42
|
+
#
|
|
43
|
+
# @param value [Symbol] The value to validate.
|
|
44
|
+
# @param permitted [Array<Symbol>] Permitted values for the field.
|
|
45
|
+
# @raise
|
|
46
|
+
# [TypeError] If the value is not a +Symbol+.
|
|
47
|
+
# [ArgumentError] If the value is not permitted.
|
|
48
|
+
def sort_order(value, permitted:)
|
|
49
|
+
raise TypeError.new("Expected `sort_order` to be a Symbol, got: #{value.class}") unless value.is_a? Symbol
|
|
50
|
+
raise ArgumentError.new("Expected `sort_order` to be one of #{permitted}, got: #{value}") unless permitted.include? value
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Validates a list of arXiv paper identifiers.
|
|
54
|
+
#
|
|
55
|
+
# @param ids [Array<String>] The identifiers to validate.
|
|
56
|
+
# @raise
|
|
57
|
+
# [TypeError] If +ids+ is not an +Array+.
|
|
58
|
+
# [TypeError] If any identifier is not a +String+.
|
|
59
|
+
# [ArgumentError] If the identifier is invalid.
|
|
60
|
+
def ids(ids)
|
|
61
|
+
raise TypeError.new("Expected `ids` to be an Array, got: #{ids.class}") unless ids.is_a? Array
|
|
62
|
+
ids.each do |id|
|
|
63
|
+
raise TypeError.new("Expected identifier to be a String, got: #{id.class} (#{id})") unless id.is_a? String
|
|
64
|
+
raise ArgumentError.new("Malformed arXiv identifier: #{id}") unless id? id
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Validates the +exact+ parameter.
|
|
69
|
+
#
|
|
70
|
+
# @param value [Boolean] The value to validate.
|
|
71
|
+
# @raise
|
|
72
|
+
# [TypeError] If the value is not a boolean (+TrueClass+ or +FalseClass+).
|
|
73
|
+
def exact(value)
|
|
74
|
+
raise TypeError.new("Expected `exact` to be boolean (TrueClass or FalseClass), got: #{value.class}") unless value == !!value
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# Validates a logical connective.
|
|
78
|
+
#
|
|
79
|
+
# @param value [Symbol] The value to validate.
|
|
80
|
+
# @param permitted [Array<Symbol>] Permitted values for the field.
|
|
81
|
+
# @raise
|
|
82
|
+
# [TypeError] If the value is not a +Symbol+.
|
|
83
|
+
# [ArgumentError] If the value is not permitted.
|
|
84
|
+
def connective(value, permitted:)
|
|
85
|
+
raise TypeError.new("Expected `connective` to be a Symbol, got: #{value.class}") unless value.is_a? Symbol
|
|
86
|
+
raise ArgumentError.new("Expected `connective` to be one of #{permitted}, got: #{value}") unless permitted.include? value
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Validates a list of values for the fields of the search query string.
|
|
90
|
+
#
|
|
91
|
+
# @param values [Array<String>] The values to validate.
|
|
92
|
+
# @raise
|
|
93
|
+
# [TypeError] If +values+ is not an +Array+.
|
|
94
|
+
# [TypeError] If any value is not a +String+.
|
|
95
|
+
def values(values)
|
|
96
|
+
raise TypeError.new("Expected `values` to be an Array, got: #{values.class}") unless values.is_a? Array
|
|
97
|
+
values.each do |value|
|
|
98
|
+
raise TypeError.new("Expected value to be a String, got: #{value.class} (#{value})") unless value.is_a? String
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
# Validates a list of arXiv categories.
|
|
103
|
+
#
|
|
104
|
+
# @note This is only called after {values}, so there is no need to check types.
|
|
105
|
+
# @param categories [Array<String>] The categories to validate.
|
|
106
|
+
# @raise [ArgumentError] If any category is unrecognized (not a valid arXiv category).
|
|
107
|
+
# @see Arx::CATEGORIES
|
|
108
|
+
def categories(categories)
|
|
109
|
+
categories.each do |category|
|
|
110
|
+
raise ArgumentError.new("Unrecognized arXiv category (#{category}). See Arx::CATEGORIES.") unless Arx::CATEGORIES.keys.include? category
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
# Validates an arXiv identifier of both the old and new schemes.
|
|
115
|
+
#
|
|
116
|
+
# @see NEW_IDENTIFIER_FORMAT
|
|
117
|
+
# @see OLD_IDENTIFIER_FORMAT
|
|
118
|
+
def id?(id)
|
|
119
|
+
NEW_IDENTIFIER_FORMAT.match?(id) || OLD_IDENTIFIER_FORMAT.match?(id)
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
end
|
data/lib/arx/version.rb
CHANGED
data/lib/arx.rb
CHANGED
|
@@ -1,5 +1,53 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'nokogiri'
|
|
4
|
+
require 'open-uri'
|
|
1
5
|
require 'arx/version'
|
|
6
|
+
require 'arx/categories'
|
|
7
|
+
require 'arx/query/query'
|
|
8
|
+
require 'arx/query/validate'
|
|
9
|
+
require 'arx/entities/author'
|
|
10
|
+
require 'arx/entities/category'
|
|
11
|
+
require 'arx/entities/paper'
|
|
2
12
|
|
|
13
|
+
# A Ruby interface for querying academic papers on the arXiv search API.
|
|
3
14
|
module Arx
|
|
4
|
-
|
|
15
|
+
|
|
16
|
+
# The arXiv search API endpoint.
|
|
17
|
+
ENDPOINT = 'http://export.arxiv.org/api/query?'
|
|
18
|
+
|
|
19
|
+
# Performs a search query for papers on the arXiv search API.
|
|
20
|
+
#
|
|
21
|
+
# @param ids [Array<String>] The IDs of the arXiv papers to restrict the query to.
|
|
22
|
+
# @param sort_by [Symbol] The sorting criteria for the returned results (see {Query::SORT_BY}).
|
|
23
|
+
# @param sort_order [Symbol] The sorting order for the returned results (see {Query::SORT_ORDER}).
|
|
24
|
+
# @return [Array<Paper>, Paper] The {Paper}(s) found by the search query.
|
|
25
|
+
def self.search(*ids, sort_by: :relevance, sort_order: :descending)
|
|
26
|
+
query = Query.new(*ids, sort_by: sort_by, sort_order: sort_order)
|
|
27
|
+
|
|
28
|
+
yield query if block_given?
|
|
29
|
+
|
|
30
|
+
document = Nokogiri::XML open(ENDPOINT + query.to_s + '&max_results=10000')
|
|
31
|
+
document.remove_namespaces!
|
|
32
|
+
|
|
33
|
+
results = Paper.parse(document, single: false).reject {|paper| paper.id.empty?}
|
|
34
|
+
raise MissingPaper.new(ids.first) if results.empty? && ids.size == 1
|
|
35
|
+
ids.size == 1 && results.size == 1 ? results.first : results
|
|
36
|
+
end
|
|
5
37
|
end
|
|
38
|
+
|
|
39
|
+
# Performs a search query for papers on the arXiv search API.
|
|
40
|
+
#
|
|
41
|
+
# @note This is an alias of the {Arx.search} method.
|
|
42
|
+
# @see Arx.search
|
|
43
|
+
# @param ids [Array<String>] The IDs of the arXiv papers to restrict the query to.
|
|
44
|
+
# @param sort_by [Symbol] The sorting criteria for the returned results (see {Arx::Query::SORT_BY}).
|
|
45
|
+
# @param sort_order [Symbol] The sorting order for the returned results (see {Arx::Query::SORT_ORDER}).
|
|
46
|
+
# @return [Array<Paper>, Paper] The {Arx::Paper}(s) found by the search query.
|
|
47
|
+
def Arx(*ids, sort_by: :relevance, sort_order: :descending, &block)
|
|
48
|
+
if block_given?
|
|
49
|
+
Arx.search *ids, sort_by: sort_by, sort_order: sort_order, &block
|
|
50
|
+
else
|
|
51
|
+
Arx.search *ids, sort_by: sort_by, sort_order: sort_order
|
|
52
|
+
end
|
|
53
|
+
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: arx
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.0
|
|
4
|
+
version: 0.1.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Edwin Onuonga
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2019-03-
|
|
11
|
+
date: 2019-03-24 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: nokogiri
|
|
@@ -24,6 +24,20 @@ dependencies:
|
|
|
24
24
|
- - "~>"
|
|
25
25
|
- !ruby/object:Gem::Version
|
|
26
26
|
version: '1.10'
|
|
27
|
+
- !ruby/object:Gem::Dependency
|
|
28
|
+
name: nokogiri-happymapper
|
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
|
30
|
+
requirements:
|
|
31
|
+
- - "~>"
|
|
32
|
+
- !ruby/object:Gem::Version
|
|
33
|
+
version: '0.8'
|
|
34
|
+
type: :runtime
|
|
35
|
+
prerelease: false
|
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
37
|
+
requirements:
|
|
38
|
+
- - "~>"
|
|
39
|
+
- !ruby/object:Gem::Version
|
|
40
|
+
version: '0.8'
|
|
27
41
|
- !ruby/object:Gem::Dependency
|
|
28
42
|
name: bundler
|
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -80,6 +94,15 @@ files:
|
|
|
80
94
|
- Rakefile
|
|
81
95
|
- arx.gemspec
|
|
82
96
|
- lib/arx.rb
|
|
97
|
+
- lib/arx/categories.rb
|
|
98
|
+
- lib/arx/cleaner.rb
|
|
99
|
+
- lib/arx/entities/author.rb
|
|
100
|
+
- lib/arx/entities/category.rb
|
|
101
|
+
- lib/arx/entities/link.rb
|
|
102
|
+
- lib/arx/entities/paper.rb
|
|
103
|
+
- lib/arx/exceptions.rb
|
|
104
|
+
- lib/arx/query/query.rb
|
|
105
|
+
- lib/arx/query/validate.rb
|
|
83
106
|
- lib/arx/version.rb
|
|
84
107
|
homepage:
|
|
85
108
|
licenses:
|
|
@@ -96,12 +119,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
96
119
|
version: '0'
|
|
97
120
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
98
121
|
requirements:
|
|
99
|
-
- - "
|
|
122
|
+
- - ">="
|
|
100
123
|
- !ruby/object:Gem::Version
|
|
101
|
-
version:
|
|
124
|
+
version: '0'
|
|
102
125
|
requirements: []
|
|
103
126
|
rubygems_version: 3.0.3
|
|
104
127
|
signing_key:
|
|
105
128
|
specification_version: 4
|
|
106
|
-
summary: A Ruby
|
|
129
|
+
summary: A Ruby interface for querying academic papers on the arXiv search API.
|
|
107
130
|
test_files: []
|