arx 0.1.0.alpha → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2974e28242d0751921e108ca3b70a6abe391f74f295dd33779c9e78d612d3480
4
- data.tar.gz: 4e08201c35084a7d7ba748dcf0e3ae8de15aae14f4b279136867c04e45ebab59
3
+ metadata.gz: 9e2022e15b66aaccb897b5596132b924ed9104040f8d090789312636be309eaf
4
+ data.tar.gz: 844f3ca659a5e60168f7368db64942f90c4ab035c0d0708d726cf0bed65d6c4b
5
5
  SHA512:
6
- metadata.gz: bf6b001b64149b1f27ae5b4be4aaff5a7ca2e7dee8927baf8fd06a9c1395faaa81fa6686b5ae203b495c7924e1758664cdb0337532e667b10b21ee5de9709ba7
7
- data.tar.gz: 2c45c7fd16a04ed2887bd6174c1ae2b2c809d8cf36ec7a83f894b35f0673dbde7a44f3eeb663309169bfe9943d6579e4076015a118137603ae641d9a6da5e7ea
6
+ metadata.gz: 2d95949ba20917ea3f6d570a25011275b56259917fcff040cd1f933f4befe4af1aa037faac82a778b9a62b83c8fc82e618102cfab3e3d5e2c04fdebbbbeef396
7
+ data.tar.gz: 9db9805a3328cc2cfb9c63b8c0aed45370f79ac0a6042ad5365f26156c966a43c3e818e3dfad424367350e956d92dbfeacd1bcf2cd1d88d7dcd98f81f40d6c33
data/CHANGELOG.md CHANGED
@@ -1,5 +1,3 @@
1
- # 0.1.0.alpha
1
+ # 0.1.0
2
2
 
3
- #### Major changes
4
-
5
- [💎] Claiming gem!
3
+ Initial commit! 🎉
data/Gemfile CHANGED
@@ -1,3 +1,2 @@
1
1
  source 'https://rubygems.org'
2
- # Specify your gem's dependencies in arx.gemspec
3
2
  gemspec
data/LICENSE CHANGED
@@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
18
  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
19
  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
20
  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
- SOFTWARE.
21
+ SOFTWARE.
data/README.md CHANGED
@@ -1 +1,3 @@
1
- # TODO
1
+ # Arx
2
+
3
+ A Ruby interface for querying academic papers on the arXiv search API.
data/arx.gemspec CHANGED
@@ -8,7 +8,7 @@ Gem::Specification.new do |spec|
8
8
  spec.authors = ['Edwin Onuonga']
9
9
  spec.email = ['edwinonuonga@gmail.com']
10
10
 
11
- spec.summary = %q{A Ruby wrapper for the arXiv API.}
11
+ spec.summary = %q{A Ruby interface for querying academic papers on the arXiv search API.}
12
12
  spec.license = 'MIT'
13
13
  spec.require_paths = ['lib']
14
14
  spec.files = Dir.glob('lib/**/*', File::FNM_DOTMATCH) + %w[
@@ -16,8 +16,9 @@ Gem::Specification.new do |spec|
16
16
  ]
17
17
 
18
18
  spec.add_runtime_dependency 'nokogiri', '~> 1.10'
19
+ spec.add_runtime_dependency 'nokogiri-happymapper', '~> 0.8'
19
20
 
20
21
  spec.add_development_dependency 'bundler', '~> 2.0'
21
22
  spec.add_development_dependency 'rake', '~> 12.3'
22
23
  spec.add_development_dependency 'rspec', '~> 3.7'
23
- end
24
+ end
@@ -0,0 +1,161 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Arx
4
+
5
+ # arXiv categories and their full names
6
+ CATEGORIES = {
7
+ 'astro-ph' => 'Astrophysics',
8
+ 'astro-ph.CO' => 'Cosmology and Nongalactic Astrophysics',
9
+ 'astro-ph.EP' => 'Earth and Planetary Astrophysics',
10
+ 'astro-ph.GA' => 'Astrophysics of Galaxies',
11
+ 'astro-ph.HE' => 'High Energy Astrophysical Phenomena',
12
+ 'astro-ph.IM' => 'Instrumentation and Methods for Astrophysics',
13
+ 'astro-ph.SR' => 'Solar and Stellar Astrophysics',
14
+ 'cond-mat.dis-nn' => 'Disordered Systems and Neural Networks',
15
+ 'cond-mat.mes-hall' => 'Mesoscale and Nanoscale Physics',
16
+ 'cond-mat.mtrl-sci' => 'Materials Science',
17
+ 'cond-mat.other' => 'Other Condensed Matter',
18
+ 'cond-mat.quant-gas' => 'Quantum Gases',
19
+ 'cond-mat.soft' => 'Soft Condensed Matter',
20
+ 'cond-mat.stat-mech' => 'Statistical Mechanics',
21
+ 'cond-mat.str-el' => 'Strongly Correlated Electrons',
22
+ 'cond-mat.supr-con' => 'Superconductivity',
23
+ 'cs.AI' => 'Artificial Intelligence',
24
+ 'cs.AR' => 'Hardware Architecture',
25
+ 'cs.CC' => 'Computational Complexity',
26
+ 'cs.CE' => 'Computational Engineering, Finance, and Science',
27
+ 'cs.CG' => 'Computational Geometry',
28
+ 'cs.CL' => 'Computation and Language',
29
+ 'cs.CR' => 'Cryptography and Security',
30
+ 'cs.CV' => 'Computer Vision and Pattern Recognition',
31
+ 'cs.CY' => 'Computers and Society',
32
+ 'cs.DB' => 'Databases',
33
+ 'cs.DC' => 'Distributed, Parallel, and Cluster Computing',
34
+ 'cs.DL' => 'Digital Libraries',
35
+ 'cs.DM' => 'Discrete Mathematics',
36
+ 'cs.DS' => 'Data Structures and Algorithms',
37
+ 'cs.ET' => 'Emerging Technologies',
38
+ 'cs.FL' => 'Formal Languages and Automata Theory',
39
+ 'cs.GL' => 'General Literature',
40
+ 'cs.GR' => 'Graphics',
41
+ 'cs.GT' => 'Computer Science and Game Theory',
42
+ 'cs.HC' => 'Human-Computer Interaction',
43
+ 'cs.IR' => 'Information Retrieval',
44
+ 'cs.IT' => 'Information Theory',
45
+ 'cs.LG' => 'Learning',
46
+ 'cs.LO' => 'Logic in Computer Science',
47
+ 'cs.MA' => 'Multiagent Systems',
48
+ 'cs.MM' => 'Multimedia',
49
+ 'cs.MS' => 'Mathematical Software',
50
+ 'cs.NA' => 'Numerical Analysis',
51
+ 'cs.NE' => 'Neural and Evolutionary Computing',
52
+ 'cs.NI' => 'Networking and Internet Architecture',
53
+ 'cs.OH' => 'Other Computer Science',
54
+ 'cs.OS' => 'Operating Systems',
55
+ 'cs.PF' => 'Performance',
56
+ 'cs.PL' => 'Programming Languages',
57
+ 'cs.RO' => 'Robotics',
58
+ 'cs.SC' => 'Symbolic Computation',
59
+ 'cs.SD' => 'Sound',
60
+ 'cs.SE' => 'Software Engineering',
61
+ 'cs.SI' => 'Social and Information Networks',
62
+ 'cs.SY' => 'Systems and Control',
63
+ 'econ.EM' => 'Econometrics',
64
+ 'eess.AS' => 'Audio and Speech Processing',
65
+ 'eess.IV' => 'Image and Video Processing',
66
+ 'eess.SP' => 'Signal Processing',
67
+ 'gr-qc' => 'General Relativity and Quantum Cosmology',
68
+ 'hep-ex' => 'High Energy Physics - Experiment',
69
+ 'hep-lat' => 'High Energy Physics - Lattice',
70
+ 'hep-ph' => 'High Energy Physics - Phenomenology',
71
+ 'hep-th' => 'High Energy Physics - Theory',
72
+ 'math.AC' => 'Commutative Algebra',
73
+ 'math.AG' => 'Algebraic Geometry',
74
+ 'math.AP' => 'Analysis of PDEs',
75
+ 'math.AT' => 'Algebraic Topology',
76
+ 'math.CA' => 'Classical Analysis and ODEs',
77
+ 'math.CO' => 'Combinatorics',
78
+ 'math.CT' => 'Category Theory',
79
+ 'math.CV' => 'Complex Variables',
80
+ 'math.DG' => 'Differential Geometry',
81
+ 'math.DS' => 'Dynamical Systems',
82
+ 'math.FA' => 'Functional Analysis',
83
+ 'math.GM' => 'General Mathematics',
84
+ 'math.GN' => 'General Topology',
85
+ 'math.GR' => 'Group Theory',
86
+ 'math.GT' => 'Geometric Topology',
87
+ 'math.HO' => 'History and Overview',
88
+ 'math.IT' => 'Information Theory',
89
+ 'math.KT' => 'K-Theory and Homology',
90
+ 'math.LO' => 'Logic',
91
+ 'math.MG' => 'Metric Geometry',
92
+ 'math.MP' => 'Mathematical Physics',
93
+ 'math.NA' => 'Numerical Analysis',
94
+ 'math.NT' => 'Number Theory',
95
+ 'math.OA' => 'Operator Algebras',
96
+ 'math.OC' => 'Optimization and Control',
97
+ 'math.PR' => 'Probability',
98
+ 'math.QA' => 'Quantum Algebra',
99
+ 'math.RA' => 'Rings and Algebras',
100
+ 'math.RT' => 'Representation Theory',
101
+ 'math.SG' => 'Symplectic Geometry',
102
+ 'math.SP' => 'Spectral Theory',
103
+ 'math.ST' => 'Statistics Theory',
104
+ 'math-ph' => 'Mathematical Physics',
105
+ 'nlin.AO' => 'Adaptation and Self-Organizing Systems',
106
+ 'nlin.CD' => 'Chaotic Dynamics',
107
+ 'nlin.CG' => 'Cellular Automata and Lattice Gases',
108
+ 'nlin.PS' => 'Pattern Formation and Solitons',
109
+ 'nlin.SI' => 'Exactly Solvable and Integrable Systems',
110
+ 'nucl-ex' => 'Nuclear Experiment',
111
+ 'nucl-th' => 'Nuclear Theory',
112
+ 'physics.acc-ph' => 'Accelerator Physics',
113
+ 'physics.ao-ph' => 'Atmospheric and Oceanic Physics',
114
+ 'physics.app-ph' => 'Applied Physics',
115
+ 'physics.atm-clus' => 'Atomic and Molecular Clusters',
116
+ 'physics.atom-ph' => 'Atomic Physics',
117
+ 'physics.bio-ph' => 'Biological Physics',
118
+ 'physics.chem-ph' => 'Chemical Physics',
119
+ 'physics.class-ph' => 'Classical Physics',
120
+ 'physics.comp-ph' => 'Computational Physics',
121
+ 'physics.data-an' => 'Data Analysis, Statistics and Probability',
122
+ 'physics.ed-ph' => 'Physics Education',
123
+ 'physics.flu-dyn' => 'Fluid Dynamics',
124
+ 'physics.gen-ph' => 'General Physics',
125
+ 'physics.geo-ph' => 'Geophysics',
126
+ 'physics.hist-ph' => 'History and Philosophy of Physics',
127
+ 'physics.ins-det' => 'Instrumentation and Detectors',
128
+ 'physics.med-ph' => 'Medical Physics',
129
+ 'physics.optics' => 'Optics',
130
+ 'physics.plasm-ph' => 'Plasma Physics',
131
+ 'physics.pop-ph' => 'Popular Physics',
132
+ 'physics.soc-ph' => 'Physics and Society',
133
+ 'physics.space-ph' => 'Space Physics',
134
+ 'q-bio.BM' => 'Biomolecules',
135
+ 'q-bio.CB' => 'Cell Behavior',
136
+ 'q-bio.GN' => 'Genomics',
137
+ 'q-bio.MN' => 'Molecular Networks',
138
+ 'q-bio.NC' => 'Neurons and Cognition',
139
+ 'q-bio.OT' => 'Other Quantitative Biology',
140
+ 'q-bio.PE' => 'Populations and Evolution',
141
+ 'q-bio.QM' => 'Quantitative Methods',
142
+ 'q-bio.SC' => 'Subcellular Processes',
143
+ 'q-bio.TO' => 'Tissues and Organs',
144
+ 'q-fin.CP' => 'Computational Finance',
145
+ 'q-fin.EC' => 'Economics',
146
+ 'q-fin.GN' => 'General Finance',
147
+ 'q-fin.MF' => 'Mathematical Finance',
148
+ 'q-fin.PM' => 'Portfolio Management',
149
+ 'q-fin.PR' => 'Pricing of Securities',
150
+ 'q-fin.RM' => 'Risk Management',
151
+ 'q-fin.ST' => 'Statistical Finance',
152
+ 'q-fin.TR' => 'Trading and Market Microstructure',
153
+ 'quant-ph' => 'Quantum Physics',
154
+ 'stat.AP' => 'Applications',
155
+ 'stat.CO' => 'Computation',
156
+ 'stat.ME' => 'Methodology',
157
+ 'stat.ML' => 'Machine Learning',
158
+ 'stat.OT' => 'Other Statistics',
159
+ 'stat.TH' => 'Statistics Theory'
160
+ }.freeze
161
+ end
@@ -0,0 +1,13 @@
1
+ module Arx
2
+
3
+ # Class for cleaning strings.
4
+ class Cleaner
5
+
6
+ # Cleans strings.
7
+ # @param [String] string Removes newline/return characters and multiple spaces from a string.
8
+ # @return [String] The cleaned string.
9
+ def self.clean(string)
10
+ string.gsub(/\r\n|\r|\n/, ' ').strip.squeeze ' '
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,29 @@
1
+ require 'happymapper'
2
+ require 'arx/cleaner'
3
+
4
+ module Arx
5
+
6
+ # Entity/model representing an arXiv paper's author.
7
+ class Author
8
+ include HappyMapper
9
+
10
+ tag 'author'
11
+
12
+ # @!method name
13
+ # The name of the author.
14
+ # @return [String]
15
+ element :name, Cleaner, tag: 'name', parser: :clean
16
+
17
+ # @!method affiliations
18
+ # The author's affiliations.
19
+ # @return [Array<String>]
20
+ has_many :affiliations, Cleaner, tag: 'affiliation', parser: :clean
21
+
22
+ # @!method affiliations?
23
+ # Whether or not the author has any affiliations.
24
+ # @return [Boolean]
25
+ def affiliations?
26
+ !affiliations.empty?
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,24 @@
1
+ require 'arx/categories'
2
+ require 'arx/cleaner'
3
+
4
+ module Arx
5
+
6
+ # Entity/model representing an arXiv paper's category.
7
+ class Category
8
+ include HappyMapper
9
+
10
+ tag 'category'
11
+
12
+ # @!method name
13
+ # The abbreviated name of the category.
14
+ # @return [String]
15
+ attribute :name, Cleaner, parser: :clean, tag: 'term'
16
+
17
+ # The full name of the category.
18
+ # @see CATEGORIES
19
+ # @return [String]
20
+ def full_name
21
+ CATEGORIES[term]
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,22 @@
1
+ require 'happymapper'
2
+
3
+ module Arx
4
+
5
+ # Helper entity/model representing a link on an arXiv paper.
6
+ class Link
7
+ include HappyMapper
8
+
9
+ tag 'link'
10
+
11
+ attribute :title, String
12
+ attribute :rel, String
13
+ attribute :type, String
14
+ attribute :href, String
15
+
16
+ %w[pdf doi].each do |link_type|
17
+ define_method "#{link_type}?" do
18
+ @title == link_type
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,159 @@
1
+ require 'happymapper'
2
+ require 'arx/exceptions'
3
+ require 'arx/cleaner'
4
+ require_relative 'author'
5
+ require_relative 'category'
6
+ require_relative 'link'
7
+
8
+ module Arx
9
+
10
+ # Entity/model representing an arXiv paper.
11
+ class Paper
12
+ include HappyMapper
13
+
14
+ tag 'entry'
15
+
16
+ element :id, Cleaner, parser: :clean, tag: 'id'
17
+ # The identifier of the paper.
18
+ # @note This is either in {Validate::OLD_IDENTIFIER_FORMAT} or {Validate::NEW_IDENTIFIER_FORMAT}.
19
+ # @example
20
+ # 1705.01662v1
21
+ # cond-mat/0211034
22
+ # @return [String] The paper's identifier.
23
+ def id
24
+ @id.sub /https?\:\/\/arxiv\.org\/abs\//, ''
25
+ end
26
+
27
+ # The URL of the paper on the arXiv website.
28
+ # @example
29
+ # http://arxiv.org/abs/1705.01662v1
30
+ # http://arxiv.org/abs/cond-mat/0211034
31
+ # @return [String] The paper's arXiv URL.
32
+ def url
33
+ @id
34
+ end
35
+
36
+ # @!method last_updated
37
+ # The date that the paper was last updated.
38
+ # @return [DateTime]
39
+ element :last_updated, DateTime, tag: 'updated'
40
+
41
+ # @!method publish_date
42
+ # The original publish/submission date of the paper.
43
+ # @return [DateTime]
44
+ element :publish_date, DateTime, tag: 'published'
45
+
46
+ # @!method title
47
+ # The title of the paper.
48
+ # @return [DateTime]
49
+ element :title, Cleaner, parser: :clean, tag: 'title'
50
+
51
+ # @!method authors
52
+ # The authors of the paper.
53
+ # @return [Array<Author>]
54
+ has_many :authors, Author, tag: 'author'
55
+
56
+ # @!method primary_category
57
+ # The primary category of the paper.
58
+ # @return [Category]
59
+ element :primary_category, Category, tag: 'primary_category'
60
+
61
+ # @!method categories
62
+ # The categories of the paper.
63
+ # @return [Array<Category>]
64
+ has_many :categories, Category, tag: 'category'
65
+
66
+ # Whether the paper is a revision or not.
67
+ # @note A paper is a revision if {last_updated} differs from {publish_date}.
68
+ # @return [Boolean]
69
+ def revision?
70
+ @publish_date != @last_updated
71
+ end
72
+
73
+ # @!method summary
74
+ # The summary (or abstract) of the paper.
75
+ # @return [String]
76
+ element :summary, Cleaner, parser: :clean, tag: 'summary'
77
+ alias_method :abstract, :summary
78
+
79
+ # @!method comment?
80
+ # Whether or not the paper has a comment.
81
+ # @return [Boolean]
82
+
83
+ # @!method comment
84
+ # The comment of the paper.
85
+ # @note This is an optional metadata field on an arXiv paper. To check whether the paper has a comment, use {comment?}
86
+ # @raise {MissingFieldError} If the paper does not have a comment.
87
+ # @return [String]
88
+ element :comment, Cleaner, parser: :clean, tag: 'comment'
89
+
90
+ # @!method journal?
91
+ # Whether or not the paper has a journal reference.
92
+ # @return [Boolean]
93
+
94
+ # @!method journal
95
+ # The journal reference of the paper.
96
+ # @note This is an optional metadata field on an arXiv paper. To check whether the paper has a journal reference, use {journal?}
97
+ # @raise {MissingFieldError} If the paper does not have a journal reference.
98
+ # @return [String]
99
+ element :journal, Cleaner, parser: :clean, tag: 'journal_ref'
100
+
101
+ %i[comment journal].each do |optional|
102
+ exists = "#{optional}?"
103
+
104
+ define_method exists do
105
+ !instance_variable_get("@#{optional}").empty?
106
+ end
107
+
108
+ define_method optional do
109
+ if self.send "#{optional}?"
110
+ instance_variable_get("@#{optional}")
111
+ else
112
+ raise MissingFieldError.new(optional)
113
+ end
114
+ end
115
+ end
116
+
117
+ has_many :links, Link, tag: 'link'
118
+
119
+ # @!method pdf?
120
+ # Whether or not the paper has a PDF link.
121
+ # @return [Boolean]
122
+
123
+ # @!method pdf_url
124
+ # Link to the PDF version of the paper.
125
+ # @note This is an optional metadata field on an arXiv paper. To check whether the paper has a PDF link, use {pdf?}
126
+ # @raise {MissingLinkError} If the paper does not have a PDF link.
127
+ # @return [String]
128
+
129
+ # @!method doi?
130
+ # Whether or not the paper has a DOI (Digital Object Identifier) link.
131
+ # @see https://arxiv.org/help/jref#doi
132
+ # @see https://arxiv.org/help/prep#doi
133
+ # @return [Boolean]
134
+
135
+ # @!method doi_url
136
+ # Link to the DOI (Digital Object Identifier) of the paper.
137
+ # @see https://arxiv.org/help/jref#doi
138
+ # @see https://arxiv.org/help/prep#doi
139
+ # @note This is an optional metadata field on an arXiv paper. To check whether the paper has a DOI link, use {doi?}
140
+ # @raise {MissingLinkError} If the paper does not have a DOI link.
141
+ # @return [String]
142
+
143
+ %i[pdf doi].each do |link_type|
144
+ exists = "#{link_type}?".to_sym
145
+
146
+ define_method exists do
147
+ links.any? &exists
148
+ end
149
+
150
+ define_method "#{link_type}_url" do
151
+ if self.send exists
152
+ links.find(&exists).href
153
+ else
154
+ raise MissingLinkError.new link_type.to_s.upcase
155
+ end
156
+ end
157
+ end
158
+ end
159
+ end
@@ -0,0 +1,23 @@
1
+ module Arx
2
+
3
+ # Custom error for missing links on an arXiv paper.
4
+ class MissingLinkError < StandardError
5
+ def initialize(link_type)
6
+ super "This arXiv paper does not have a #{link_type} link"
7
+ end
8
+ end
9
+
10
+ # Custom error for missing fields on an arXiv paper.
11
+ class MissingFieldError < StandardError
12
+ def initialize(field)
13
+ super "This arXiv paper is missing the `#{field}` field"
14
+ end
15
+ end
16
+
17
+ # Custom error for missing arXiv papers.
18
+ class MissingPaper < StandardError
19
+ def initialize(id)
20
+ super "Couldn't find an arXiv paper with ID: #{id}"
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,266 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'cgi'
4
+ require_relative 'validate'
5
+
6
+ module Arx
7
+
8
+ # Class for generating arXiv search API query strings.
9
+ #
10
+ # @attr query [String] The string representing the search query.
11
+ class Query
12
+
13
+ # Mapping for URL query parameters supported by the arXiv search API.
14
+ PARAMS = {
15
+ search_query: 'search_query',
16
+ id_list: 'id_list',
17
+ sort_by: 'sortBy',
18
+ sort_order: 'sortOrder'
19
+ }
20
+
21
+ # Logical connectives supported by the arXiv search API.
22
+ CONNECTIVES = {
23
+ and: 'AND',
24
+ or: 'OR',
25
+ and_not: 'ANDNOT'
26
+ }
27
+
28
+ # Logical connective method names.
29
+ CONNECTIVE_METHODS = {
30
+ '&': :and,
31
+ '!': :and_not,
32
+ '|': :or
33
+ }
34
+
35
+ # Supported fields for the search queries made to the arXiv search API.
36
+ # @see https://arxiv.org/help/prep arXiv metadata fields
37
+ # @see https://arxiv.org/help/api/user-manual#query_details arXiv user manual (query details)
38
+ FIELDS = {
39
+ title: 'ti', # Title
40
+ author: 'au', # Author
41
+ abstract: 'abs', # Abstract
42
+ comment: 'co', # Comment
43
+ journal: 'jr', # Journal reference
44
+ category: 'cat', # Subject category
45
+ report: 'rn', # Report number
46
+ all: 'all' # All (of the above)
47
+ }
48
+
49
+ # Supported criteria for the +sortBy+ parameter.
50
+ SORT_BY = {
51
+ relevance: 'relevance',
52
+ last_updated: 'lastUpdated',
53
+ date_submitted: 'submittedDate'
54
+ }
55
+
56
+ # Supported criteria for the +sortOrder+ parameter.
57
+ SORT_ORDER = {
58
+ ascending: 'ascending',
59
+ descending: 'descending'
60
+ }
61
+
62
+ # Initializes a new Query object.
63
+ #
64
+ # @param ids [Array<String>] The IDs of the arXiv papers to restrict the query to.
65
+ # @param sort_by [Symbol] The sorting criteria for the returned results (see {SORT_BY}).
66
+ # @param sort_order [Symbol] The sorting order for the returned results (see {SORT_ORDER}).
67
+ # @return [Query] The initialized query object.
68
+ def initialize(*ids, sort_by: :relevance, sort_order: :descending)
69
+ @query = String.new
70
+
71
+ Validate.sort_by sort_by, permitted: SORT_BY.keys
72
+ @query << "&#{PARAMS[:sort_by]}=#{SORT_BY[sort_by]}"
73
+
74
+ Validate.sort_order sort_order, permitted: SORT_ORDER.keys
75
+ @query << "&#{PARAMS[:sort_order]}=#{SORT_ORDER[sort_order]}"
76
+
77
+ ids.flatten!
78
+ unless ids.empty?
79
+ ids.map! {|id| extract_id id}
80
+ Validate.ids ids
81
+ @query << "&#{PARAMS[:id_list]}=#{ids * ','}"
82
+ end
83
+
84
+ yield self if block_given?
85
+ end
86
+
87
+ # @!method &
88
+ # Logical conjunction (+AND+) of subqueries.
89
+ # @see https://arxiv.org/help/api/user-manual#query_details arXiv user manual
90
+ # @return [self]
91
+
92
+ # @!method !
93
+ # Logical negated conjunction (+ANDNOT+) of subqueries.
94
+ # @see https://arxiv.org/help/api/user-manual#query_details arXiv user manual
95
+ # @return [self]
96
+
97
+ # @!method |
98
+ # Logical disjunction (+OR+) of subqueries.
99
+ # @see https://arxiv.org/help/api/user-manual#query_details arXiv user manual
100
+ # @return [self]
101
+
102
+ CONNECTIVE_METHODS.each do |symbol, connective|
103
+ define_method(symbol) { add_connective connective }
104
+ end
105
+
106
+ # @!method title(*values, exact: false, connective: :and)
107
+ # Search for papers by {https://arxiv.org/help/prep#title title}.
108
+ # @param values [Array<String>] Title(s) of papers to search for.
109
+ # @param exact [Boolean] Whether to search for an exact match of the title(s).
110
+ # @param connective [Symbol] The logical connective to use (see {CONNECTIVES}). Only applies if there are multiple values.
111
+ # @return [self]
112
+
113
+ # @!method author(*values, exact: false, connective: :and)
114
+ # Search for papers by {https://arxiv.org/help/prep#author author}.
115
+ # @param values [Array<String>] Author(s) of papers to search for.
116
+ # @param exact [Boolean] Whether to search for an exact match of the author's name(s).
117
+ # @param connective [Symbol] The logical connective to use (see {CONNECTIVES}). Only applies if there are multiple values.
118
+ # @return [self]
119
+
120
+ # @!method abstract(*values, exact: false, connective: :and)
121
+ # Search for papers by {https://arxiv.org/help/prep#abstract abstract}.
122
+ # @param values [Array<String>] Abstract(s) of papers to search for.
123
+ # @param exact [Boolean] Whether to search for an exact match of the abstract(s).
124
+ # @param connective [Symbol] The logical connective to use (see {CONNECTIVES}). Only applies if there are multiple values.
125
+ # @return [self]
126
+
127
+ # @!method comment(*values, exact: false, connective: :and)
128
+ # Search for papers by {https://arxiv.org/help/prep#comments comment}.
129
+ # @param values [Array<String>] Comment(s) of papers to search for.
130
+ # @param exact [Boolean] Whether to search for an exact match of the comment(s).
131
+ # @param connective [Symbol] The logical connective to use (see {CONNECTIVES}). Only applies if there are multiple values.
132
+ # @return [self]
133
+
134
+ # @!method journal(*values, exact: false, connective: :and)
135
+ # Search for papers by {https://arxiv.org/help/prep#journal journal reference}.
136
+ # @param values [Array<String>] Journal reference(s) of papers to search for.
137
+ # @param exact [Boolean] Whether to search for an exact match of the journal refernece(s).
138
+ # @param connective [Symbol] The logical connective to use (see {CONNECTIVES}). Only applies if there are multiple values.
139
+ # @return [self]
140
+
141
+ # @!method category(*values, connective: :and)
142
+ # Search for papers by {https://arxiv.org/help/prep#category category}.
143
+ # @param values [Array<String>] Category(s) of papers to search for.
144
+ # @param connective [Symbol] The logical connective to use (see {CONNECTIVES}). Only applies if there are multiple values.
145
+ # @return [self]
146
+
147
+ # @!method report(*values, connective: :and)
148
+ # Search for papers by {https://arxiv.org/help/prep#report report number}.
149
+ # @param values [Array<String>] Report number(s) of papers to search for.
150
+ # @param connective [Symbol] The logical connective to use (see {CONNECTIVES}). Only applies if there are multiple values.
151
+ # @return [self]
152
+
153
+ # @!method all(*values, exact: true, connective: :and)
154
+ # Search for papers by all fields (see {FIELDS}).
155
+ # @param values [Array<String>] Field value(s) of papers to search for.
156
+ # @param exact [Boolean] Whether to search for an exact match of the comment(s).
157
+ # @param connective [Symbol] The logical connective to use (see {CONNECTIVES}). Only applies if there are multiple values.
158
+ # @return [self]
159
+
160
+ FIELDS.each do |name, field|
161
+ define_method(name) do |*values, exact: true, connective: :and|
162
+ return if values.empty?
163
+
164
+ Validate.values values
165
+ Validate.categories values if name == :category
166
+ Validate.exact exact
167
+ Validate.connective connective, permitted: CONNECTIVES.keys
168
+
169
+ values.map! &CGI.method(:escape)
170
+
171
+ # Forms a field:value pair
172
+ pair = ->(value){"#{field}:#{exact ? enquote(value) : value}"}
173
+
174
+ subquery = if values.size > 1
175
+ parenthesize values.map(&pair).join("+#{CONNECTIVES[connective]}+")
176
+ else
177
+ pair.(values.first)
178
+ end
179
+
180
+ add_subquery subquery
181
+ self
182
+ end
183
+ end
184
+
185
+ # Returns the query string.
186
+ #
187
+ # @return [String]
188
+ def to_s
189
+ @query
190
+ end
191
+
192
+ private
193
+
194
+ # Appends a logical connective to the end of the query string.
195
+ #
196
+ # @see CONNECTIVES
197
+ # @param connective [Symbol] The symbol of the logical connective to add.
198
+ # @return [self]
199
+ def add_connective(connective)
200
+ return unless search_query?
201
+ @query << "+#{CONNECTIVES[connective]}" unless ends_with_connective?
202
+ self
203
+ end
204
+
205
+ # Appends a subquery to the end of the query string.
206
+ #
207
+ # @param subquery [String] The subquery to add.
208
+ def add_subquery(subquery)
209
+ if search_query?
210
+ if ends_with_connective?
211
+ @query << "+#{subquery}"
212
+ else
213
+ add_connective :and
214
+ @query << "+#{subquery}"
215
+ end
216
+ else
217
+ @query << "&#{PARAMS[:search_query]}=#{subquery}"
218
+ end
219
+ end
220
+
221
+ # Whether the query string contains the +search_query+ parameter.
222
+ #
223
+ # @see PARAMS
224
+ # @return [Boolean]
225
+ def search_query?
226
+ @query.include? PARAMS[:search_query]
227
+ end
228
+
229
+ # Whether the query string ends in a logical connective.
230
+ #
231
+ # @see CONNECTIVES
232
+ # @return [Boolean]
233
+ def ends_with_connective?
234
+ CONNECTIVES.values.any? &@query.method(:end_with?)
235
+ end
236
+
237
+ # Parenthesizes a string with CGI-escaped parentheses.
238
+ #
239
+ # @param string [String] The string to parenthesize.
240
+ # @return [String] The parenthesized string.
241
+ def parenthesize(string)
242
+ CGI.escape('(') + string + CGI.escape(')')
243
+ end
244
+
245
+ # Enquotes a string with CGI-escaped double quotes.
246
+ #
247
+ # @param string [String] The string to enquote.
248
+ # @return [String] The enquoted string.
249
+ def enquote(string)
250
+ CGI.escape("\"") + string + CGI.escape("\"")
251
+ end
252
+
253
+ # Attempt to extract an ID from an arXiv URL.
254
+ #
255
+ # @param url [String] The URL to extract the ID from.
256
+ # @return [String] The extracted ID if successful, otherwise the original string.
257
+ def extract_id(url)
258
+ prefix = %r"^(https?\:\/\/)?(www.)?arxiv\.org\/abs\/"
259
+ if %r"#{prefix}.*$".match? url
260
+ url.sub(prefix, '').sub(%r"\/$", '')
261
+ else
262
+ url
263
+ end
264
+ end
265
+ end
266
+ end
@@ -0,0 +1,123 @@
1
+ require_relative '../categories'
2
+
3
+ module Arx
4
+
5
+ # Validations for arXiv search query fields and identifier schemes.
6
+ module Validate
7
+
8
+ # The current arxiv paper identifier scheme (1 April 2007 and onwards).
9
+ # The last block of digits can either be five digits (if the paper was published after 1501 - January 2015),
10
+ # or four digits (if the paper was published before 1501).
11
+ #
12
+ # @see https://arxiv.org/help/arxiv_identifier#new arXiv identifier (new)
13
+ # @example
14
+ # 1501.00001
15
+ # 1705.01662v1
16
+ # 1412.0135
17
+ # 0706.0001v2
18
+ NEW_IDENTIFIER_FORMAT = %r"^\d{4}\.\d{4,5}(v\d+)?$"
19
+
20
+ # The legacy arXiv paper identifier scheme (before 1 April 2007).
21
+ #
22
+ # @see https://arxiv.org/help/arxiv_identifier#old arXiv identifier (old)
23
+ # @example
24
+ # math/0309136v1
25
+ # cond-mat/0211034
26
+ OLD_IDENTIFIER_FORMAT = %r"^[a-z]+(\-[a-z]+)?\/\d{7}(v\d+)?$"
27
+
28
+ class << self
29
+ # Validates the +sortBy+ field of the query string.
30
+ #
31
+ # @param value [Symbol] The value to validate.
32
+ # @param permitted [Array<Symbol>] Permitted values for the field.
33
+ # @raise
34
+ # [TypeError] If the value is not a +Symbol+.
35
+ # [ArgumentError] If the value is not permitted.
36
+ def sort_by(value, permitted:)
37
+ raise TypeError.new("Expected `sort_by` to be a Symbol, got: #{value.class}") unless value.is_a? Symbol
38
+ raise ArgumentError.new("Expected `sort_by` to be one of #{permitted}, got: #{value}") unless permitted.include? value
39
+ end
40
+
41
+ # Validates the +sortOrder+ field of the query string.
42
+ #
43
+ # @param value [Symbol] The value to validate.
44
+ # @param permitted [Array<Symbol>] Permitted values for the field.
45
+ # @raise
46
+ # [TypeError] If the value is not a +Symbol+.
47
+ # [ArgumentError] If the value is not permitted.
48
+ def sort_order(value, permitted:)
49
+ raise TypeError.new("Expected `sort_order` to be a Symbol, got: #{value.class}") unless value.is_a? Symbol
50
+ raise ArgumentError.new("Expected `sort_order` to be one of #{permitted}, got: #{value}") unless permitted.include? value
51
+ end
52
+
53
+ # Validates a list of arXiv paper identifiers.
54
+ #
55
+ # @param ids [Array<String>] The identifiers to validate.
56
+ # @raise
57
+ # [TypeError] If +ids+ is not an +Array+.
58
+ # [TypeError] If any identifier is not a +String+.
59
+ # [ArgumentError] If the identifier is invalid.
60
+ def ids(ids)
61
+ raise TypeError.new("Expected `ids` to be an Array, got: #{ids.class}") unless ids.is_a? Array
62
+ ids.each do |id|
63
+ raise TypeError.new("Expected identifier to be a String, got: #{id.class} (#{id})") unless id.is_a? String
64
+ raise ArgumentError.new("Malformed arXiv identifier: #{id}") unless id? id
65
+ end
66
+ end
67
+
68
+ # Validates the +exact+ parameter.
69
+ #
70
+ # @param value [Boolean] The value to validate.
71
+ # @raise
72
+ # [TypeError] If the value is not a boolean (+TrueClass+ or +FalseClass+).
73
+ def exact(value)
74
+ raise TypeError.new("Expected `exact` to be boolean (TrueClass or FalseClass), got: #{value.class}") unless value == !!value
75
+ end
76
+
77
+ # Validates a logical connective.
78
+ #
79
+ # @param value [Symbol] The value to validate.
80
+ # @param permitted [Array<Symbol>] Permitted values for the field.
81
+ # @raise
82
+ # [TypeError] If the value is not a +Symbol+.
83
+ # [ArgumentError] If the value is not permitted.
84
+ def connective(value, permitted:)
85
+ raise TypeError.new("Expected `connective` to be a Symbol, got: #{value.class}") unless value.is_a? Symbol
86
+ raise ArgumentError.new("Expected `connective` to be one of #{permitted}, got: #{value}") unless permitted.include? value
87
+ end
88
+
89
+ # Validates a list of values for the fields of the search query string.
90
+ #
91
+ # @param values [Array<String>] The values to validate.
92
+ # @raise
93
+ # [TypeError] If +values+ is not an +Array+.
94
+ # [TypeError] If any value is not a +String+.
95
+ def values(values)
96
+ raise TypeError.new("Expected `values` to be an Array, got: #{values.class}") unless values.is_a? Array
97
+ values.each do |value|
98
+ raise TypeError.new("Expected value to be a String, got: #{value.class} (#{value})") unless value.is_a? String
99
+ end
100
+ end
101
+
102
+ # Validates a list of arXiv categories.
103
+ #
104
+ # @note This is only called after {values}, so there is no need to check types.
105
+ # @param categories [Array<String>] The categories to validate.
106
+ # @raise [ArgumentError] If any category is unrecognized (not a valid arXiv category).
107
+ # @see Arx::CATEGORIES
108
+ def categories(categories)
109
+ categories.each do |category|
110
+ raise ArgumentError.new("Unrecognized arXiv category (#{category}). See Arx::CATEGORIES.") unless Arx::CATEGORIES.keys.include? category
111
+ end
112
+ end
113
+
114
+ # Validates an arXiv identifier of both the old and new schemes.
115
+ #
116
+ # @see NEW_IDENTIFIER_FORMAT
117
+ # @see OLD_IDENTIFIER_FORMAT
118
+ def id?(id)
119
+ NEW_IDENTIFIER_FORMAT.match?(id) || OLD_IDENTIFIER_FORMAT.match?(id)
120
+ end
121
+ end
122
+ end
123
+ end
data/lib/arx/version.rb CHANGED
@@ -1,8 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Arx
4
+
5
+ # The current version of Arx.
2
6
  VERSION = {
3
7
  major: 0,
4
8
  minor: 1,
5
9
  patch: 0,
6
- meta: 'alpha'
7
- }.values.reject(&:nil?).map(&:to_s)*?.
10
+ meta: nil
11
+ }.compact.values.join('.').freeze
8
12
  end
data/lib/arx.rb CHANGED
@@ -1,5 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'nokogiri'
4
+ require 'open-uri'
1
5
  require 'arx/version'
6
+ require 'arx/categories'
7
+ require 'arx/query/query'
8
+ require 'arx/query/validate'
9
+ require 'arx/entities/author'
10
+ require 'arx/entities/category'
11
+ require 'arx/entities/paper'
2
12
 
13
+ # A Ruby interface for querying academic papers on the arXiv search API.
3
14
  module Arx
4
- # Your code goes here...
15
+
16
+ # The arXiv search API endpoint.
17
+ ENDPOINT = 'http://export.arxiv.org/api/query?'
18
+
19
+ # Performs a search query for papers on the arXiv search API.
20
+ #
21
+ # @param ids [Array<String>] The IDs of the arXiv papers to restrict the query to.
22
+ # @param sort_by [Symbol] The sorting criteria for the returned results (see {Query::SORT_BY}).
23
+ # @param sort_order [Symbol] The sorting order for the returned results (see {Query::SORT_ORDER}).
24
+ # @return [Array<Paper>, Paper] The {Paper}(s) found by the search query.
25
+ def self.search(*ids, sort_by: :relevance, sort_order: :descending)
26
+ query = Query.new(*ids, sort_by: sort_by, sort_order: sort_order)
27
+
28
+ yield query if block_given?
29
+
30
+ document = Nokogiri::XML open(ENDPOINT + query.to_s + '&max_results=10000')
31
+ document.remove_namespaces!
32
+
33
+ results = Paper.parse(document, single: false).reject {|paper| paper.id.empty?}
34
+ raise MissingPaper.new(ids.first) if results.empty? && ids.size == 1
35
+ ids.size == 1 && results.size == 1 ? results.first : results
36
+ end
5
37
  end
38
+
39
+ # Performs a search query for papers on the arXiv search API.
40
+ #
41
+ # @note This is an alias of the {Arx.search} method.
42
+ # @see Arx.search
43
+ # @param ids [Array<String>] The IDs of the arXiv papers to restrict the query to.
44
+ # @param sort_by [Symbol] The sorting criteria for the returned results (see {Arx::Query::SORT_BY}).
45
+ # @param sort_order [Symbol] The sorting order for the returned results (see {Arx::Query::SORT_ORDER}).
46
+ # @return [Array<Paper>, Paper] The {Arx::Paper}(s) found by the search query.
47
+ def Arx(*ids, sort_by: :relevance, sort_order: :descending, &block)
48
+ if block_given?
49
+ Arx.search *ids, sort_by: sort_by, sort_order: sort_order, &block
50
+ else
51
+ Arx.search *ids, sort_by: sort_by, sort_order: sort_order
52
+ end
53
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: arx
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0.alpha
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Edwin Onuonga
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-03-19 00:00:00.000000000 Z
11
+ date: 2019-03-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -24,6 +24,20 @@ dependencies:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: '1.10'
27
+ - !ruby/object:Gem::Dependency
28
+ name: nokogiri-happymapper
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '0.8'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '0.8'
27
41
  - !ruby/object:Gem::Dependency
28
42
  name: bundler
29
43
  requirement: !ruby/object:Gem::Requirement
@@ -80,6 +94,15 @@ files:
80
94
  - Rakefile
81
95
  - arx.gemspec
82
96
  - lib/arx.rb
97
+ - lib/arx/categories.rb
98
+ - lib/arx/cleaner.rb
99
+ - lib/arx/entities/author.rb
100
+ - lib/arx/entities/category.rb
101
+ - lib/arx/entities/link.rb
102
+ - lib/arx/entities/paper.rb
103
+ - lib/arx/exceptions.rb
104
+ - lib/arx/query/query.rb
105
+ - lib/arx/query/validate.rb
83
106
  - lib/arx/version.rb
84
107
  homepage:
85
108
  licenses:
@@ -96,12 +119,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
96
119
  version: '0'
97
120
  required_rubygems_version: !ruby/object:Gem::Requirement
98
121
  requirements:
99
- - - ">"
122
+ - - ">="
100
123
  - !ruby/object:Gem::Version
101
- version: 1.3.1
124
+ version: '0'
102
125
  requirements: []
103
126
  rubygems_version: 3.0.3
104
127
  signing_key:
105
128
  specification_version: 4
106
- summary: A Ruby wrapper for the arXiv API.
129
+ summary: A Ruby interface for querying academic papers on the arXiv search API.
107
130
  test_files: []