arx 0.1.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +125 -0
- data/Gemfile +1 -1
- data/LICENSE +1 -1
- data/README.md +407 -1
- data/Rakefile +60 -2
- data/arx.gemspec +15 -2
- data/lib/arx/categories.rb +10 -0
- data/lib/arx/cleaner.rb +45 -5
- data/lib/arx/entities/author.rb +4 -5
- data/lib/arx/entities/category.rb +4 -4
- data/lib/arx/entities/link.rb +1 -2
- data/lib/arx/entities/paper.rb +43 -29
- data/lib/arx/error.rb +27 -0
- data/lib/arx/inspector.rb +42 -0
- data/lib/arx/query/query.rb +42 -45
- data/lib/arx/query/validate.rb +4 -24
- data/lib/arx/version.rb +2 -2
- data/lib/arx.rb +62 -20
- metadata +46 -12
- data/lib/arx/exceptions.rb +0 -23
data/lib/arx/categories.rb
CHANGED
|
@@ -11,6 +11,7 @@ module Arx
|
|
|
11
11
|
'astro-ph.HE' => 'High Energy Astrophysical Phenomena',
|
|
12
12
|
'astro-ph.IM' => 'Instrumentation and Methods for Astrophysics',
|
|
13
13
|
'astro-ph.SR' => 'Solar and Stellar Astrophysics',
|
|
14
|
+
'cond-mat' => 'Condensed Matter',
|
|
14
15
|
'cond-mat.dis-nn' => 'Disordered Systems and Neural Networks',
|
|
15
16
|
'cond-mat.mes-hall' => 'Mesoscale and Nanoscale Physics',
|
|
16
17
|
'cond-mat.mtrl-sci' => 'Materials Science',
|
|
@@ -20,6 +21,7 @@ module Arx
|
|
|
20
21
|
'cond-mat.stat-mech' => 'Statistical Mechanics',
|
|
21
22
|
'cond-mat.str-el' => 'Strongly Correlated Electrons',
|
|
22
23
|
'cond-mat.supr-con' => 'Superconductivity',
|
|
24
|
+
'cs' => 'Computer Science',
|
|
23
25
|
'cs.AI' => 'Artificial Intelligence',
|
|
24
26
|
'cs.AR' => 'Hardware Architecture',
|
|
25
27
|
'cs.CC' => 'Computational Complexity',
|
|
@@ -60,7 +62,9 @@ module Arx
|
|
|
60
62
|
'cs.SE' => 'Software Engineering',
|
|
61
63
|
'cs.SI' => 'Social and Information Networks',
|
|
62
64
|
'cs.SY' => 'Systems and Control',
|
|
65
|
+
'econ' => 'Economics',
|
|
63
66
|
'econ.EM' => 'Econometrics',
|
|
67
|
+
'eess' => 'Electrical Engineering and Systems Science',
|
|
64
68
|
'eess.AS' => 'Audio and Speech Processing',
|
|
65
69
|
'eess.IV' => 'Image and Video Processing',
|
|
66
70
|
'eess.SP' => 'Signal Processing',
|
|
@@ -69,6 +73,7 @@ module Arx
|
|
|
69
73
|
'hep-lat' => 'High Energy Physics - Lattice',
|
|
70
74
|
'hep-ph' => 'High Energy Physics - Phenomenology',
|
|
71
75
|
'hep-th' => 'High Energy Physics - Theory',
|
|
76
|
+
'math' => 'Mathematics',
|
|
72
77
|
'math.AC' => 'Commutative Algebra',
|
|
73
78
|
'math.AG' => 'Algebraic Geometry',
|
|
74
79
|
'math.AP' => 'Analysis of PDEs',
|
|
@@ -102,6 +107,7 @@ module Arx
|
|
|
102
107
|
'math.SP' => 'Spectral Theory',
|
|
103
108
|
'math.ST' => 'Statistics Theory',
|
|
104
109
|
'math-ph' => 'Mathematical Physics',
|
|
110
|
+
'nlin' => 'Nonlinear Sciences',
|
|
105
111
|
'nlin.AO' => 'Adaptation and Self-Organizing Systems',
|
|
106
112
|
'nlin.CD' => 'Chaotic Dynamics',
|
|
107
113
|
'nlin.CG' => 'Cellular Automata and Lattice Gases',
|
|
@@ -109,6 +115,7 @@ module Arx
|
|
|
109
115
|
'nlin.SI' => 'Exactly Solvable and Integrable Systems',
|
|
110
116
|
'nucl-ex' => 'Nuclear Experiment',
|
|
111
117
|
'nucl-th' => 'Nuclear Theory',
|
|
118
|
+
'physics' => 'Physics',
|
|
112
119
|
'physics.acc-ph' => 'Accelerator Physics',
|
|
113
120
|
'physics.ao-ph' => 'Atmospheric and Oceanic Physics',
|
|
114
121
|
'physics.app-ph' => 'Applied Physics',
|
|
@@ -131,6 +138,7 @@ module Arx
|
|
|
131
138
|
'physics.pop-ph' => 'Popular Physics',
|
|
132
139
|
'physics.soc-ph' => 'Physics and Society',
|
|
133
140
|
'physics.space-ph' => 'Space Physics',
|
|
141
|
+
'q-bio' => 'Quantitative Biology',
|
|
134
142
|
'q-bio.BM' => 'Biomolecules',
|
|
135
143
|
'q-bio.CB' => 'Cell Behavior',
|
|
136
144
|
'q-bio.GN' => 'Genomics',
|
|
@@ -141,6 +149,7 @@ module Arx
|
|
|
141
149
|
'q-bio.QM' => 'Quantitative Methods',
|
|
142
150
|
'q-bio.SC' => 'Subcellular Processes',
|
|
143
151
|
'q-bio.TO' => 'Tissues and Organs',
|
|
152
|
+
'q-fin' => 'Quantitative Finance',
|
|
144
153
|
'q-fin.CP' => 'Computational Finance',
|
|
145
154
|
'q-fin.EC' => 'Economics',
|
|
146
155
|
'q-fin.GN' => 'General Finance',
|
|
@@ -151,6 +160,7 @@ module Arx
|
|
|
151
160
|
'q-fin.ST' => 'Statistical Finance',
|
|
152
161
|
'q-fin.TR' => 'Trading and Market Microstructure',
|
|
153
162
|
'quant-ph' => 'Quantum Physics',
|
|
163
|
+
'stat' => 'Statistics',
|
|
154
164
|
'stat.AP' => 'Applications',
|
|
155
165
|
'stat.CO' => 'Computation',
|
|
156
166
|
'stat.ME' => 'Methodology',
|
data/lib/arx/cleaner.rb
CHANGED
|
@@ -1,13 +1,53 @@
|
|
|
1
1
|
module Arx
|
|
2
2
|
|
|
3
3
|
# Class for cleaning strings.
|
|
4
|
+
# @private
|
|
4
5
|
class Cleaner
|
|
5
6
|
|
|
6
|
-
#
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
7
|
+
# arXiv paper URL prefix format
|
|
8
|
+
URL_PREFIX = /^(https?\:\/\/)?(www.)?arxiv\.org\/abs\//
|
|
9
|
+
|
|
10
|
+
class << self
|
|
11
|
+
|
|
12
|
+
# Cleans strings.
|
|
13
|
+
# @param [String] string Removes newline/return characters and multiple spaces from a string.
|
|
14
|
+
# @return [String] The cleaned string.
|
|
15
|
+
def clean(string)
|
|
16
|
+
string.gsub(/\r\n|\r|\n/, ' ').strip.squeeze ' '
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
# Attempt to extract an arXiv identifier from a string such as a URL.
|
|
20
|
+
#
|
|
21
|
+
# @param string [String] The string to extract the ID from.
|
|
22
|
+
# @param version [Boolean] Whether or not to include the paper's version.
|
|
23
|
+
# @return [String] The extracted ID.
|
|
24
|
+
def extract_id(string, version: false)
|
|
25
|
+
if version == !!version
|
|
26
|
+
if string.is_a? String
|
|
27
|
+
trimmed = /#{URL_PREFIX}.+\/?$/.match?(string) ? string.gsub(/(#{URL_PREFIX})|(\/$)/, '') : string
|
|
28
|
+
raise ArgumentError.new("Couldn't extract arXiv identifier from: #{string}") unless Validate.id? trimmed
|
|
29
|
+
version ? trimmed : trimmed.sub(/v[0-9]+$/, '')
|
|
30
|
+
else
|
|
31
|
+
raise TypeError.new("Expected `string` to be a String, got: #{string.class}")
|
|
32
|
+
end
|
|
33
|
+
else
|
|
34
|
+
raise TypeError.new("Expected `version` to be boolean (TrueClass or FalseClass), got: #{version.class}")
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Attempt to extract a version number from an arXiv identifier.
|
|
39
|
+
#
|
|
40
|
+
# @param string [String] The arXiv identifier to extract the version number from.
|
|
41
|
+
# @return [String] The extracted version number.
|
|
42
|
+
def extract_version(string)
|
|
43
|
+
reversed = extract_id(string, version: true).reverse
|
|
44
|
+
|
|
45
|
+
if /^[0-9]+v/.match? reversed
|
|
46
|
+
reversed.partition('v').first.reverse.to_i
|
|
47
|
+
else
|
|
48
|
+
raise ArgumentError.new("Couldn't extract version number from identifier: #{string}")
|
|
49
|
+
end
|
|
50
|
+
end
|
|
11
51
|
end
|
|
12
52
|
end
|
|
13
53
|
end
|
data/lib/arx/entities/author.rb
CHANGED
|
@@ -1,11 +1,9 @@
|
|
|
1
|
-
require 'happymapper'
|
|
2
|
-
require 'arx/cleaner'
|
|
3
|
-
|
|
4
1
|
module Arx
|
|
5
2
|
|
|
6
3
|
# Entity/model representing an arXiv paper's author.
|
|
7
4
|
class Author
|
|
8
5
|
include HappyMapper
|
|
6
|
+
include Inspector
|
|
9
7
|
|
|
10
8
|
tag 'author'
|
|
11
9
|
|
|
@@ -19,11 +17,12 @@ module Arx
|
|
|
19
17
|
# @return [Array<String>]
|
|
20
18
|
has_many :affiliations, Cleaner, tag: 'affiliation', parser: :clean
|
|
21
19
|
|
|
22
|
-
# @!method affiliations?
|
|
23
20
|
# Whether or not the author has any affiliations.
|
|
24
21
|
# @return [Boolean]
|
|
25
|
-
def
|
|
22
|
+
def affiliated?
|
|
26
23
|
!affiliations.empty?
|
|
27
24
|
end
|
|
25
|
+
|
|
26
|
+
inspector :name, :affiliated?, :affiliations
|
|
28
27
|
end
|
|
29
28
|
end
|
|
@@ -1,11 +1,9 @@
|
|
|
1
|
-
require 'arx/categories'
|
|
2
|
-
require 'arx/cleaner'
|
|
3
|
-
|
|
4
1
|
module Arx
|
|
5
2
|
|
|
6
3
|
# Entity/model representing an arXiv paper's category.
|
|
7
4
|
class Category
|
|
8
5
|
include HappyMapper
|
|
6
|
+
include Inspector
|
|
9
7
|
|
|
10
8
|
tag 'category'
|
|
11
9
|
|
|
@@ -18,7 +16,9 @@ module Arx
|
|
|
18
16
|
# @see CATEGORIES
|
|
19
17
|
# @return [String]
|
|
20
18
|
def full_name
|
|
21
|
-
CATEGORIES[
|
|
19
|
+
CATEGORIES[name]
|
|
22
20
|
end
|
|
21
|
+
|
|
22
|
+
inspector :name, :full_name
|
|
23
23
|
end
|
|
24
24
|
end
|
data/lib/arx/entities/link.rb
CHANGED
data/lib/arx/entities/paper.rb
CHANGED
|
@@ -1,47 +1,56 @@
|
|
|
1
|
-
require 'happymapper'
|
|
2
|
-
require 'arx/exceptions'
|
|
3
|
-
require 'arx/cleaner'
|
|
4
|
-
require_relative 'author'
|
|
5
|
-
require_relative 'category'
|
|
6
|
-
require_relative 'link'
|
|
7
|
-
|
|
8
1
|
module Arx
|
|
9
2
|
|
|
10
3
|
# Entity/model representing an arXiv paper.
|
|
11
4
|
class Paper
|
|
12
5
|
include HappyMapper
|
|
6
|
+
include Inspector
|
|
13
7
|
|
|
14
8
|
tag 'entry'
|
|
15
9
|
|
|
16
10
|
element :id, Cleaner, parser: :clean, tag: 'id'
|
|
17
11
|
# The identifier of the paper.
|
|
18
|
-
# @note This is either in {
|
|
12
|
+
# @note This is either in {OLD_IDENTIFIER_FORMAT} or {NEW_IDENTIFIER_FORMAT}.
|
|
19
13
|
# @example
|
|
20
14
|
# 1705.01662v1
|
|
21
15
|
# cond-mat/0211034
|
|
16
|
+
# @param version [Boolean] Whether or not to include the paper's version.
|
|
22
17
|
# @return [String] The paper's identifier.
|
|
23
|
-
def id
|
|
24
|
-
@id
|
|
18
|
+
def id(version: false)
|
|
19
|
+
Cleaner.extract_id @id, version: version
|
|
25
20
|
end
|
|
26
21
|
|
|
27
22
|
# The URL of the paper on the arXiv website.
|
|
28
23
|
# @example
|
|
29
24
|
# http://arxiv.org/abs/1705.01662v1
|
|
30
25
|
# http://arxiv.org/abs/cond-mat/0211034
|
|
26
|
+
# @param version [Boolean] Whether or not to include the paper's version.
|
|
31
27
|
# @return [String] The paper's arXiv URL.
|
|
32
|
-
def url
|
|
33
|
-
|
|
28
|
+
def url(version: false)
|
|
29
|
+
"http://arxiv.org/abs/#{id version: version}"
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# The version of the paper.
|
|
33
|
+
# @return [Integer] The paper's version.
|
|
34
|
+
def version
|
|
35
|
+
Cleaner.extract_version @id
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Whether the paper is a revision or not.
|
|
39
|
+
# @note A paper is a revision if its {version} is greater than 1.
|
|
40
|
+
# @return [Boolean]
|
|
41
|
+
def revision?
|
|
42
|
+
version > 1
|
|
34
43
|
end
|
|
35
44
|
|
|
36
|
-
# @!method
|
|
45
|
+
# @!method updated_at
|
|
37
46
|
# The date that the paper was last updated.
|
|
38
47
|
# @return [DateTime]
|
|
39
|
-
element :
|
|
48
|
+
element :updated_at, DateTime, tag: 'updated'
|
|
40
49
|
|
|
41
|
-
# @!method
|
|
50
|
+
# @!method published_at
|
|
42
51
|
# The original publish/submission date of the paper.
|
|
43
52
|
# @return [DateTime]
|
|
44
|
-
element :
|
|
53
|
+
element :published_at, DateTime, tag: 'published'
|
|
45
54
|
|
|
46
55
|
# @!method title
|
|
47
56
|
# The title of the paper.
|
|
@@ -57,19 +66,13 @@ module Arx
|
|
|
57
66
|
# The primary category of the paper.
|
|
58
67
|
# @return [Category]
|
|
59
68
|
element :primary_category, Category, tag: 'primary_category'
|
|
69
|
+
alias_method :category, :primary_category
|
|
60
70
|
|
|
61
71
|
# @!method categories
|
|
62
72
|
# The categories of the paper.
|
|
63
73
|
# @return [Array<Category>]
|
|
64
74
|
has_many :categories, Category, tag: 'category'
|
|
65
75
|
|
|
66
|
-
# Whether the paper is a revision or not.
|
|
67
|
-
# @note A paper is a revision if {last_updated} differs from {publish_date}.
|
|
68
|
-
# @return [Boolean]
|
|
69
|
-
def revision?
|
|
70
|
-
@publish_date != @last_updated
|
|
71
|
-
end
|
|
72
|
-
|
|
73
76
|
# @!method summary
|
|
74
77
|
# The summary (or abstract) of the paper.
|
|
75
78
|
# @return [String]
|
|
@@ -83,7 +86,7 @@ module Arx
|
|
|
83
86
|
# @!method comment
|
|
84
87
|
# The comment of the paper.
|
|
85
88
|
# @note This is an optional metadata field on an arXiv paper. To check whether the paper has a comment, use {comment?}
|
|
86
|
-
# @raise {
|
|
89
|
+
# @raise {Error::MissingField} If the paper does not have a comment.
|
|
87
90
|
# @return [String]
|
|
88
91
|
element :comment, Cleaner, parser: :clean, tag: 'comment'
|
|
89
92
|
|
|
@@ -94,7 +97,7 @@ module Arx
|
|
|
94
97
|
# @!method journal
|
|
95
98
|
# The journal reference of the paper.
|
|
96
99
|
# @note This is an optional metadata field on an arXiv paper. To check whether the paper has a journal reference, use {journal?}
|
|
97
|
-
# @raise {
|
|
100
|
+
# @raise {Error::MissingField} If the paper does not have a journal reference.
|
|
98
101
|
# @return [String]
|
|
99
102
|
element :journal, Cleaner, parser: :clean, tag: 'journal_ref'
|
|
100
103
|
|
|
@@ -109,7 +112,7 @@ module Arx
|
|
|
109
112
|
if self.send "#{optional}?"
|
|
110
113
|
instance_variable_get("@#{optional}")
|
|
111
114
|
else
|
|
112
|
-
raise
|
|
115
|
+
raise Error::MissingField.new id, optional
|
|
113
116
|
end
|
|
114
117
|
end
|
|
115
118
|
end
|
|
@@ -123,7 +126,7 @@ module Arx
|
|
|
123
126
|
# @!method pdf_url
|
|
124
127
|
# Link to the PDF version of the paper.
|
|
125
128
|
# @note This is an optional metadata field on an arXiv paper. To check whether the paper has a PDF link, use {pdf?}
|
|
126
|
-
# @raise {
|
|
129
|
+
# @raise {Error::MissingLink} If the paper does not have a PDF link.
|
|
127
130
|
# @return [String]
|
|
128
131
|
|
|
129
132
|
# @!method doi?
|
|
@@ -137,7 +140,7 @@ module Arx
|
|
|
137
140
|
# @see https://arxiv.org/help/jref#doi
|
|
138
141
|
# @see https://arxiv.org/help/prep#doi
|
|
139
142
|
# @note This is an optional metadata field on an arXiv paper. To check whether the paper has a DOI link, use {doi?}
|
|
140
|
-
# @raise {
|
|
143
|
+
# @raise {Error::MissingLink} If the paper does not have a DOI link.
|
|
141
144
|
# @return [String]
|
|
142
145
|
|
|
143
146
|
%i[pdf doi].each do |link_type|
|
|
@@ -151,9 +154,20 @@ module Arx
|
|
|
151
154
|
if self.send exists
|
|
152
155
|
links.find(&exists).href
|
|
153
156
|
else
|
|
154
|
-
raise
|
|
157
|
+
raise Error::MissingLink.new id, link_type.to_s.upcase
|
|
155
158
|
end
|
|
156
159
|
end
|
|
157
160
|
end
|
|
161
|
+
|
|
162
|
+
inspector *%i[
|
|
163
|
+
id url version revision?
|
|
164
|
+
title summary authors
|
|
165
|
+
primary_category categories
|
|
166
|
+
published_at updated_at
|
|
167
|
+
comment? comment
|
|
168
|
+
journal? journal
|
|
169
|
+
pdf? pdf_url
|
|
170
|
+
doi? doi_url
|
|
171
|
+
]
|
|
158
172
|
end
|
|
159
173
|
end
|
data/lib/arx/error.rb
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
module Arx
|
|
2
|
+
|
|
3
|
+
# Various arXiv-related errors.
|
|
4
|
+
module Error
|
|
5
|
+
|
|
6
|
+
# Custom error for missing links on an arXiv paper.
|
|
7
|
+
class MissingLink < StandardError
|
|
8
|
+
def initialize(id, link_type)
|
|
9
|
+
super "arXiv paper #{id} does not have a #{link_type} link"
|
|
10
|
+
end
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
# Custom error for missing fields on an arXiv paper.
|
|
14
|
+
class MissingField < StandardError
|
|
15
|
+
def initialize(id, field)
|
|
16
|
+
super "arXiv paper #{id} is missing the `#{field}` metadata field"
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# Custom error for missing arXiv papers.
|
|
21
|
+
class MissingPaper < StandardError
|
|
22
|
+
def initialize(id)
|
|
23
|
+
super "Couldn't find an arXiv paper with ID: #{id}"
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
module Arx
|
|
2
|
+
|
|
3
|
+
# Restricts +inspect+ to dump a whitelist of methods on an object.
|
|
4
|
+
# It will always provide `object_id` at a minimum.
|
|
5
|
+
# @private
|
|
6
|
+
module Inspector
|
|
7
|
+
|
|
8
|
+
# Overwrites the object's own inspect method.
|
|
9
|
+
def inspect
|
|
10
|
+
pairs = {}
|
|
11
|
+
|
|
12
|
+
self.class.inspector_fields.each do |field|
|
|
13
|
+
pairs[field] = self.send(field).inspect
|
|
14
|
+
rescue
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
"#<#{self.class.name}:#{self.object_id} #{pairs.map {|k,v| "#{k}=#{v}"}.join(", ")}>"
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
class << self
|
|
21
|
+
# Returns the +inspected+ instance variable, or sets it if undefined.
|
|
22
|
+
def inspected
|
|
23
|
+
@inspected ||= []
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# Defines helper +inspector_fields+ instance variable & method, and +inspector+ instance method on the target object.
|
|
27
|
+
# @param [Object] source An arbitrary object (the object that +includes+ the +Inspector+ module).
|
|
28
|
+
def included(source)
|
|
29
|
+
inspected << source
|
|
30
|
+
source.class_eval do
|
|
31
|
+
def self.inspector(*fields)
|
|
32
|
+
@inspector_fields = *fields
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def self.inspector_fields
|
|
36
|
+
@inspector_fields ||= []
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
data/lib/arx/query/query.rb
CHANGED
|
@@ -1,8 +1,5 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require 'cgi'
|
|
4
|
-
require_relative 'validate'
|
|
5
|
-
|
|
6
3
|
module Arx
|
|
7
4
|
|
|
8
5
|
# Class for generating arXiv search API query strings.
|
|
@@ -25,13 +22,6 @@ module Arx
|
|
|
25
22
|
and_not: 'ANDNOT'
|
|
26
23
|
}
|
|
27
24
|
|
|
28
|
-
# Logical connective method names.
|
|
29
|
-
CONNECTIVE_METHODS = {
|
|
30
|
-
'&': :and,
|
|
31
|
-
'!': :and_not,
|
|
32
|
-
'|': :or
|
|
33
|
-
}
|
|
34
|
-
|
|
35
25
|
# Supported fields for the search queries made to the arXiv search API.
|
|
36
26
|
# @see https://arxiv.org/help/prep arXiv metadata fields
|
|
37
27
|
# @see https://arxiv.org/help/api/user-manual#query_details arXiv user manual (query details)
|
|
@@ -69,69 +59,68 @@ module Arx
|
|
|
69
59
|
@query = String.new
|
|
70
60
|
|
|
71
61
|
Validate.sort_by sort_by, permitted: SORT_BY.keys
|
|
72
|
-
@query << "
|
|
62
|
+
@query << "#{PARAMS[:sort_by]}=#{SORT_BY[sort_by]}"
|
|
73
63
|
|
|
74
64
|
Validate.sort_order sort_order, permitted: SORT_ORDER.keys
|
|
75
65
|
@query << "&#{PARAMS[:sort_order]}=#{SORT_ORDER[sort_order]}"
|
|
76
66
|
|
|
77
67
|
ids.flatten!
|
|
78
68
|
unless ids.empty?
|
|
79
|
-
ids.map!
|
|
80
|
-
Validate.ids ids
|
|
69
|
+
ids.map! &Cleaner.method(:extract_id)
|
|
81
70
|
@query << "&#{PARAMS[:id_list]}=#{ids * ','}"
|
|
82
71
|
end
|
|
83
72
|
|
|
84
73
|
yield self if block_given?
|
|
85
74
|
end
|
|
86
75
|
|
|
87
|
-
# @!method
|
|
76
|
+
# @!method and
|
|
88
77
|
# Logical conjunction (+AND+) of subqueries.
|
|
89
78
|
# @see https://arxiv.org/help/api/user-manual#query_details arXiv user manual
|
|
90
79
|
# @return [self]
|
|
91
80
|
|
|
92
|
-
# @!method
|
|
81
|
+
# @!method and_not
|
|
93
82
|
# Logical negated conjunction (+ANDNOT+) of subqueries.
|
|
94
83
|
# @see https://arxiv.org/help/api/user-manual#query_details arXiv user manual
|
|
95
84
|
# @return [self]
|
|
96
85
|
|
|
97
|
-
# @!method
|
|
86
|
+
# @!method or
|
|
98
87
|
# Logical disjunction (+OR+) of subqueries.
|
|
99
88
|
# @see https://arxiv.org/help/api/user-manual#query_details arXiv user manual
|
|
100
89
|
# @return [self]
|
|
101
90
|
|
|
102
|
-
|
|
103
|
-
define_method(
|
|
91
|
+
CONNECTIVES.keys.each do |connective|
|
|
92
|
+
define_method(connective) { add_connective connective }
|
|
104
93
|
end
|
|
105
94
|
|
|
106
|
-
# @!method title(*values, exact:
|
|
95
|
+
# @!method title(*values, exact: true, connective: :and)
|
|
107
96
|
# Search for papers by {https://arxiv.org/help/prep#title title}.
|
|
108
97
|
# @param values [Array<String>] Title(s) of papers to search for.
|
|
109
98
|
# @param exact [Boolean] Whether to search for an exact match of the title(s).
|
|
110
99
|
# @param connective [Symbol] The logical connective to use (see {CONNECTIVES}). Only applies if there are multiple values.
|
|
111
100
|
# @return [self]
|
|
112
101
|
|
|
113
|
-
# @!method author(*values, exact:
|
|
102
|
+
# @!method author(*values, exact: true, connective: :and)
|
|
114
103
|
# Search for papers by {https://arxiv.org/help/prep#author author}.
|
|
115
104
|
# @param values [Array<String>] Author(s) of papers to search for.
|
|
116
105
|
# @param exact [Boolean] Whether to search for an exact match of the author's name(s).
|
|
117
106
|
# @param connective [Symbol] The logical connective to use (see {CONNECTIVES}). Only applies if there are multiple values.
|
|
118
107
|
# @return [self]
|
|
119
108
|
|
|
120
|
-
# @!method abstract(*values, exact:
|
|
109
|
+
# @!method abstract(*values, exact: true, connective: :and)
|
|
121
110
|
# Search for papers by {https://arxiv.org/help/prep#abstract abstract}.
|
|
122
111
|
# @param values [Array<String>] Abstract(s) of papers to search for.
|
|
123
112
|
# @param exact [Boolean] Whether to search for an exact match of the abstract(s).
|
|
124
113
|
# @param connective [Symbol] The logical connective to use (see {CONNECTIVES}). Only applies if there are multiple values.
|
|
125
114
|
# @return [self]
|
|
126
115
|
|
|
127
|
-
# @!method comment(*values, exact:
|
|
116
|
+
# @!method comment(*values, exact: true, connective: :and)
|
|
128
117
|
# Search for papers by {https://arxiv.org/help/prep#comments comment}.
|
|
129
118
|
# @param values [Array<String>] Comment(s) of papers to search for.
|
|
130
119
|
# @param exact [Boolean] Whether to search for an exact match of the comment(s).
|
|
131
120
|
# @param connective [Symbol] The logical connective to use (see {CONNECTIVES}). Only applies if there are multiple values.
|
|
132
121
|
# @return [self]
|
|
133
122
|
|
|
134
|
-
# @!method journal(*values, exact:
|
|
123
|
+
# @!method journal(*values, exact: true, connective: :and)
|
|
135
124
|
# Search for papers by {https://arxiv.org/help/prep#journal journal reference}.
|
|
136
125
|
# @param values [Array<String>] Journal reference(s) of papers to search for.
|
|
137
126
|
# @param exact [Boolean] Whether to search for an exact match of the journal refernece(s).
|
|
@@ -161,6 +150,8 @@ module Arx
|
|
|
161
150
|
define_method(name) do |*values, exact: true, connective: :and|
|
|
162
151
|
return if values.empty?
|
|
163
152
|
|
|
153
|
+
values.flatten!
|
|
154
|
+
|
|
164
155
|
Validate.values values
|
|
165
156
|
Validate.categories values if name == :category
|
|
166
157
|
Validate.exact exact
|
|
@@ -182,6 +173,20 @@ module Arx
|
|
|
182
173
|
end
|
|
183
174
|
end
|
|
184
175
|
|
|
176
|
+
# Creates a nested subquery (grouped statements with parentheses).
|
|
177
|
+
#
|
|
178
|
+
# @return [self]
|
|
179
|
+
def group
|
|
180
|
+
add_connective :and unless end_with_connective?
|
|
181
|
+
@query << (search_query? ? '+' : "&#{PARAMS[:search_query]}=")
|
|
182
|
+
|
|
183
|
+
@query << CGI.escape('(')
|
|
184
|
+
yield
|
|
185
|
+
@query << CGI.escape(')')
|
|
186
|
+
|
|
187
|
+
self
|
|
188
|
+
end
|
|
189
|
+
|
|
185
190
|
# Returns the query string.
|
|
186
191
|
#
|
|
187
192
|
# @return [String]
|
|
@@ -197,8 +202,9 @@ module Arx
|
|
|
197
202
|
# @param connective [Symbol] The symbol of the logical connective to add.
|
|
198
203
|
# @return [self]
|
|
199
204
|
def add_connective(connective)
|
|
200
|
-
|
|
201
|
-
|
|
205
|
+
if search_query?
|
|
206
|
+
@query << "+#{CONNECTIVES[connective]}" unless end_with_connective? || start_of_group?
|
|
207
|
+
end
|
|
202
208
|
self
|
|
203
209
|
end
|
|
204
210
|
|
|
@@ -206,13 +212,10 @@ module Arx
|
|
|
206
212
|
#
|
|
207
213
|
# @param subquery [String] The subquery to add.
|
|
208
214
|
def add_subquery(subquery)
|
|
215
|
+
add_connective :and unless end_with_connective?
|
|
216
|
+
|
|
209
217
|
if search_query?
|
|
210
|
-
|
|
211
|
-
@query << "+#{subquery}"
|
|
212
|
-
else
|
|
213
|
-
add_connective :and
|
|
214
|
-
@query << "+#{subquery}"
|
|
215
|
-
end
|
|
218
|
+
@query << (start_of_group? ? "#{subquery}" : "+#{subquery}")
|
|
216
219
|
else
|
|
217
220
|
@query << "&#{PARAMS[:search_query]}=#{subquery}"
|
|
218
221
|
end
|
|
@@ -230,10 +233,17 @@ module Arx
|
|
|
230
233
|
#
|
|
231
234
|
# @see CONNECTIVES
|
|
232
235
|
# @return [Boolean]
|
|
233
|
-
def
|
|
236
|
+
def end_with_connective?
|
|
234
237
|
CONNECTIVES.values.any? &@query.method(:end_with?)
|
|
235
238
|
end
|
|
236
239
|
|
|
240
|
+
# Whether the query string ends in a start-of-group character '('.
|
|
241
|
+
#
|
|
242
|
+
# @return [Boolean]
|
|
243
|
+
def start_of_group?
|
|
244
|
+
@query.end_with? CGI.escape('(')
|
|
245
|
+
end
|
|
246
|
+
|
|
237
247
|
# Parenthesizes a string with CGI-escaped parentheses.
|
|
238
248
|
#
|
|
239
249
|
# @param string [String] The string to parenthesize.
|
|
@@ -249,18 +259,5 @@ module Arx
|
|
|
249
259
|
def enquote(string)
|
|
250
260
|
CGI.escape("\"") + string + CGI.escape("\"")
|
|
251
261
|
end
|
|
252
|
-
|
|
253
|
-
# Attempt to extract an ID from an arXiv URL.
|
|
254
|
-
#
|
|
255
|
-
# @param url [String] The URL to extract the ID from.
|
|
256
|
-
# @return [String] The extracted ID if successful, otherwise the original string.
|
|
257
|
-
def extract_id(url)
|
|
258
|
-
prefix = %r"^(https?\:\/\/)?(www.)?arxiv\.org\/abs\/"
|
|
259
|
-
if %r"#{prefix}.*$".match? url
|
|
260
|
-
url.sub(prefix, '').sub(%r"\/$", '')
|
|
261
|
-
else
|
|
262
|
-
url
|
|
263
|
-
end
|
|
264
|
-
end
|
|
265
262
|
end
|
|
266
263
|
end
|
data/lib/arx/query/validate.rb
CHANGED
|
@@ -1,30 +1,8 @@
|
|
|
1
|
-
require_relative '../categories'
|
|
2
|
-
|
|
3
1
|
module Arx
|
|
4
2
|
|
|
5
3
|
# Validations for arXiv search query fields and identifier schemes.
|
|
4
|
+
# @private
|
|
6
5
|
module Validate
|
|
7
|
-
|
|
8
|
-
# The current arxiv paper identifier scheme (1 April 2007 and onwards).
|
|
9
|
-
# The last block of digits can either be five digits (if the paper was published after 1501 - January 2015),
|
|
10
|
-
# or four digits (if the paper was published before 1501).
|
|
11
|
-
#
|
|
12
|
-
# @see https://arxiv.org/help/arxiv_identifier#new arXiv identifier (new)
|
|
13
|
-
# @example
|
|
14
|
-
# 1501.00001
|
|
15
|
-
# 1705.01662v1
|
|
16
|
-
# 1412.0135
|
|
17
|
-
# 0706.0001v2
|
|
18
|
-
NEW_IDENTIFIER_FORMAT = %r"^\d{4}\.\d{4,5}(v\d+)?$"
|
|
19
|
-
|
|
20
|
-
# The legacy arXiv paper identifier scheme (before 1 April 2007).
|
|
21
|
-
#
|
|
22
|
-
# @see https://arxiv.org/help/arxiv_identifier#old arXiv identifier (old)
|
|
23
|
-
# @example
|
|
24
|
-
# math/0309136v1
|
|
25
|
-
# cond-mat/0211034
|
|
26
|
-
OLD_IDENTIFIER_FORMAT = %r"^[a-z]+(\-[a-z]+)?\/\d{7}(v\d+)?$"
|
|
27
|
-
|
|
28
6
|
class << self
|
|
29
7
|
# Validates the +sortBy+ field of the query string.
|
|
30
8
|
#
|
|
@@ -116,7 +94,9 @@ module Arx
|
|
|
116
94
|
# @see NEW_IDENTIFIER_FORMAT
|
|
117
95
|
# @see OLD_IDENTIFIER_FORMAT
|
|
118
96
|
def id?(id)
|
|
119
|
-
NEW_IDENTIFIER_FORMAT.match?
|
|
97
|
+
return true if NEW_IDENTIFIER_FORMAT.match? id
|
|
98
|
+
return true if OLD_IDENTIFIER_FORMAT.match?(id) && Arx::CATEGORIES.keys.include?(id.split('/').first)
|
|
99
|
+
false
|
|
120
100
|
end
|
|
121
101
|
end
|
|
122
102
|
end
|