arx 0.3.2 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +124 -0
- data/LICENSE +1 -1
- data/README.md +107 -27
- data/Rakefile +1 -1
- data/arx.gemspec +7 -5
- data/lib/arx.rb +19 -7
- data/lib/arx/categories.rb +30 -0
- data/lib/arx/cleaner.rb +46 -5
- data/lib/arx/entities/author.rb +40 -1
- data/lib/arx/entities/category.rb +38 -1
- data/lib/arx/entities/link.rb +1 -0
- data/lib/arx/entities/paper.rb +108 -20
- data/lib/arx/inspector.rb +2 -0
- data/lib/arx/query/query.rb +46 -36
- data/lib/arx/query/validate.rb +4 -1
- data/lib/arx/version.rb +3 -3
- metadata +51 -17
data/lib/arx/categories.rb
CHANGED
@@ -21,6 +21,14 @@ module Arx
|
|
21
21
|
'cond-mat.stat-mech' => 'Statistical Mechanics',
|
22
22
|
'cond-mat.str-el' => 'Strongly Correlated Electrons',
|
23
23
|
'cond-mat.supr-con' => 'Superconductivity',
|
24
|
+
'dis-nn' => 'Disordered Systems and Neural Networks', # cond-mat
|
25
|
+
'mes-hall' => 'Mesoscale and Nanoscale Physics', # cond-mat
|
26
|
+
'mtrl-sci' => 'Materials Science', # cond-mat
|
27
|
+
'quant-gas' => 'Quantum Gases', # cond-mat
|
28
|
+
'soft' => 'Soft Condensed Matter', # cond-mat
|
29
|
+
'stat-mech' => 'Statistical Mechanics', # cond-mat
|
30
|
+
'str-el' => 'Strongly Correlated Electrons', # cond-mat
|
31
|
+
'supr-con' => 'Superconductivity', # cond-mat
|
24
32
|
'cs' => 'Computer Science',
|
25
33
|
'cs.AI' => 'Artificial Intelligence',
|
26
34
|
'cs.AR' => 'Hardware Architecture',
|
@@ -138,6 +146,28 @@ module Arx
|
|
138
146
|
'physics.pop-ph' => 'Popular Physics',
|
139
147
|
'physics.soc-ph' => 'Physics and Society',
|
140
148
|
'physics.space-ph' => 'Space Physics',
|
149
|
+
'acc-ph' => 'Accelerator Physics', # physics
|
150
|
+
'ao-ph' => 'Atmospheric and Oceanic Physics', # physics
|
151
|
+
'app-ph' => 'Applied Physics', # physics
|
152
|
+
'atm-clus' => 'Atomic and Molecular Clusters', # physics
|
153
|
+
'atom-ph' => 'Atomic Physics', # physics
|
154
|
+
'bio-ph' => 'Biological Physics', # physics
|
155
|
+
'chem-ph' => 'Chemical Physics', # physics
|
156
|
+
'class-ph' => 'Classical Physics', # physics
|
157
|
+
'comp-ph' => 'Computational Physics', # physics
|
158
|
+
'data-an' => 'Data Analysis, Statistics and Probability', # physics
|
159
|
+
'ed-ph' => 'Physics Education', # physics
|
160
|
+
'flu-dyn' => 'Fluid Dynamics', # physics
|
161
|
+
'gen-ph' => 'General Physics', # physics
|
162
|
+
'geo-ph' => 'Geophysics', # physics
|
163
|
+
'hist-ph' => 'History and Philosophy of Physics', # physics
|
164
|
+
'ins-det' => 'Instrumentation and Detectors', # physics
|
165
|
+
'med-ph' => 'Medical Physics', # physics
|
166
|
+
'optics' => 'Optics', # physics
|
167
|
+
'plasm-ph' => 'Plasma Physics', # physics
|
168
|
+
'pop-ph' => 'Popular Physics', # physics
|
169
|
+
'soc-ph' => 'Physics and Society', # physics
|
170
|
+
'space-ph' => 'Space Physics', # physics
|
141
171
|
'q-bio' => 'Quantitative Biology',
|
142
172
|
'q-bio.BM' => 'Biomolecules',
|
143
173
|
'q-bio.CB' => 'Cell Behavior',
|
data/lib/arx/cleaner.rb
CHANGED
@@ -1,14 +1,55 @@
|
|
1
1
|
module Arx
|
2
2
|
|
3
3
|
# Class for cleaning strings.
|
4
|
+
#
|
4
5
|
# @private
|
5
6
|
class Cleaner
|
6
7
|
|
7
|
-
#
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
8
|
+
# arXiv paper URL prefix format
|
9
|
+
URL_PREFIX = /^(https?\:\/\/)?(www.)?arxiv\.org\/abs\//
|
10
|
+
|
11
|
+
class << self
|
12
|
+
|
13
|
+
# Cleans strings.
|
14
|
+
#
|
15
|
+
# @param [String] string Removes newline/return characters and multiple spaces from a string.
|
16
|
+
# @return [String] The cleaned string.
|
17
|
+
def clean(string)
|
18
|
+
string.gsub(/\r\n|\r|\n/, ' ').strip.squeeze ' '
|
19
|
+
end
|
20
|
+
|
21
|
+
# Attempt to extract an arXiv identifier from a string such as a URL.
|
22
|
+
#
|
23
|
+
# @param string [String] The string to extract the ID from.
|
24
|
+
# @param version [Boolean] Whether or not to include the paper's version.
|
25
|
+
# @return [String] The extracted ID.
|
26
|
+
def extract_id(string, version: false)
|
27
|
+
if version == !!version
|
28
|
+
if string.is_a? String
|
29
|
+
trimmed = /#{URL_PREFIX}.+\/?$/.match?(string) ? string.gsub(/(#{URL_PREFIX})|(\/$)/, '') : string
|
30
|
+
raise ArgumentError.new("Couldn't extract arXiv identifier from: #{string}") unless Validate.id? trimmed
|
31
|
+
version ? trimmed : trimmed.sub(/v[0-9]+$/, '')
|
32
|
+
else
|
33
|
+
raise TypeError.new("Expected `string` to be a String, got: #{string.class}")
|
34
|
+
end
|
35
|
+
else
|
36
|
+
raise TypeError.new("Expected `version` to be boolean (TrueClass or FalseClass), got: #{version.class}")
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
# Attempt to extract a version number from an arXiv identifier.
|
41
|
+
#
|
42
|
+
# @param string [String] The arXiv identifier to extract the version number from.
|
43
|
+
# @return [String] The extracted version number.
|
44
|
+
def extract_version(string)
|
45
|
+
reversed = extract_id(string, version: true).reverse
|
46
|
+
|
47
|
+
if /^[0-9]+v/.match? reversed
|
48
|
+
reversed.partition('v').first.reverse.to_i
|
49
|
+
else
|
50
|
+
raise ArgumentError.new("Couldn't extract version number from identifier: #{string}")
|
51
|
+
end
|
52
|
+
end
|
12
53
|
end
|
13
54
|
end
|
14
55
|
end
|
data/lib/arx/entities/author.rb
CHANGED
@@ -5,24 +5,63 @@ module Arx
|
|
5
5
|
include HappyMapper
|
6
6
|
include Inspector
|
7
7
|
|
8
|
+
# The attributes of an arXiv paper's author.
|
9
|
+
ATTRIBUTES = %i[name affiliated? affiliations]
|
10
|
+
|
8
11
|
tag 'author'
|
9
12
|
|
10
13
|
# @!method name
|
11
14
|
# The name of the author.
|
15
|
+
#
|
12
16
|
# @return [String]
|
13
17
|
element :name, Cleaner, tag: 'name', parser: :clean
|
14
18
|
|
15
19
|
# @!method affiliations
|
16
20
|
# The author's affiliations.
|
21
|
+
#
|
17
22
|
# @return [Array<String>]
|
18
23
|
has_many :affiliations, Cleaner, tag: 'affiliation', parser: :clean
|
19
24
|
|
20
25
|
# Whether or not the author has any affiliations.
|
26
|
+
#
|
21
27
|
# @return [Boolean]
|
22
28
|
def affiliated?
|
23
29
|
!affiliations.empty?
|
24
30
|
end
|
25
31
|
|
26
|
-
|
32
|
+
# Serializes the {Author} object into a +Hash+.
|
33
|
+
#
|
34
|
+
# @return [Hash]
|
35
|
+
def to_h
|
36
|
+
Hash[*ATTRIBUTES.map {|_| [_, send(_)]}.flatten(1)]
|
37
|
+
end
|
38
|
+
|
39
|
+
# Serializes the {Author} object into a valid JSON hash.
|
40
|
+
#
|
41
|
+
# @return [Hash] The resulting JSON hash.
|
42
|
+
def as_json
|
43
|
+
JSON.parse to_json
|
44
|
+
end
|
45
|
+
|
46
|
+
# Serializes the {Author} object into a valid JSON string.
|
47
|
+
#
|
48
|
+
# @return [String] The resulting JSON string.
|
49
|
+
def to_json
|
50
|
+
to_h.to_json
|
51
|
+
end
|
52
|
+
|
53
|
+
# Equality check against another author.
|
54
|
+
#
|
55
|
+
# @note This only performs a basic equality check between the authors' names.
|
56
|
+
# @param author [Author] The author to compare against.
|
57
|
+
def ==(author)
|
58
|
+
if author.is_a? Author
|
59
|
+
name == author.name
|
60
|
+
else
|
61
|
+
false
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
inspector *ATTRIBUTES
|
27
66
|
end
|
28
67
|
end
|
@@ -5,20 +5,57 @@ module Arx
|
|
5
5
|
include HappyMapper
|
6
6
|
include Inspector
|
7
7
|
|
8
|
+
# The attributes of an arXiv paper's category.
|
9
|
+
ATTRIBUTES = %i[name full_name]
|
10
|
+
|
8
11
|
tag 'category'
|
9
12
|
|
10
13
|
# @!method name
|
11
14
|
# The abbreviated name of the category.
|
15
|
+
#
|
12
16
|
# @return [String]
|
13
17
|
attribute :name, Cleaner, parser: :clean, tag: 'term'
|
14
18
|
|
15
19
|
# The full name of the category.
|
20
|
+
#
|
16
21
|
# @see CATEGORIES
|
17
22
|
# @return [String]
|
18
23
|
def full_name
|
19
24
|
CATEGORIES[name]
|
20
25
|
end
|
21
26
|
|
22
|
-
|
27
|
+
# Serializes the {Category} object into a +Hash+.
|
28
|
+
#
|
29
|
+
# @return [Hash]
|
30
|
+
def to_h
|
31
|
+
Hash[*ATTRIBUTES.map {|_| [_, send(_)]}.flatten(1)]
|
32
|
+
end
|
33
|
+
|
34
|
+
# Serializes the {Category} object into a valid JSON hash.
|
35
|
+
#
|
36
|
+
# @return [Hash] The resulting JSON hash.
|
37
|
+
def as_json
|
38
|
+
JSON.parse to_json
|
39
|
+
end
|
40
|
+
|
41
|
+
# Serializes the {Category} object into a valid JSON string.
|
42
|
+
#
|
43
|
+
# @return [String] The resulting JSON string.
|
44
|
+
def to_json
|
45
|
+
to_h.to_json
|
46
|
+
end
|
47
|
+
|
48
|
+
# Equality check against another category.
|
49
|
+
#
|
50
|
+
# @param category [Category] The category to compare against.
|
51
|
+
def ==(category)
|
52
|
+
if category.is_a? Category
|
53
|
+
name == category.name
|
54
|
+
else
|
55
|
+
false
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
inspector *ATTRIBUTES
|
23
60
|
end
|
24
61
|
end
|
data/lib/arx/entities/link.rb
CHANGED
data/lib/arx/entities/paper.rb
CHANGED
@@ -5,78 +5,112 @@ module Arx
|
|
5
5
|
include HappyMapper
|
6
6
|
include Inspector
|
7
7
|
|
8
|
+
# The attributes of an arXiv paper.
|
9
|
+
# @note {comment}, {journal}, {pdf_url} and {doi_url} may raise errors when called.
|
10
|
+
ATTRIBUTES = %i[
|
11
|
+
id url version revision?
|
12
|
+
title summary authors
|
13
|
+
primary_category categories
|
14
|
+
published_at updated_at
|
15
|
+
comment? comment
|
16
|
+
journal? journal
|
17
|
+
pdf? pdf_url
|
18
|
+
doi? doi_url
|
19
|
+
]
|
20
|
+
|
8
21
|
tag 'entry'
|
9
22
|
|
10
23
|
element :id, Cleaner, parser: :clean, tag: 'id'
|
11
24
|
# The identifier of the paper.
|
25
|
+
#
|
12
26
|
# @note This is either in {OLD_IDENTIFIER_FORMAT} or {NEW_IDENTIFIER_FORMAT}.
|
13
27
|
# @example
|
14
28
|
# 1705.01662v1
|
15
29
|
# cond-mat/0211034
|
30
|
+
# @param version [Boolean] Whether or not to include the paper's version.
|
16
31
|
# @return [String] The paper's identifier.
|
17
|
-
def id
|
18
|
-
@id
|
32
|
+
def id(version = false)
|
33
|
+
Cleaner.extract_id @id, version: version
|
19
34
|
end
|
20
35
|
|
21
36
|
# The URL of the paper on the arXiv website.
|
37
|
+
#
|
22
38
|
# @example
|
23
39
|
# http://arxiv.org/abs/1705.01662v1
|
24
40
|
# http://arxiv.org/abs/cond-mat/0211034
|
41
|
+
# @param version [Boolean] Whether or not to include the paper's version.
|
25
42
|
# @return [String] The paper's arXiv URL.
|
26
|
-
def url
|
27
|
-
|
43
|
+
def url(version = false)
|
44
|
+
"http://arxiv.org/abs/#{id version}"
|
45
|
+
end
|
46
|
+
|
47
|
+
# The version of the paper.
|
48
|
+
#
|
49
|
+
# @return [Integer] The paper's version.
|
50
|
+
def version
|
51
|
+
Cleaner.extract_version @id
|
52
|
+
end
|
53
|
+
|
54
|
+
# Whether the paper is a revision or not.
|
55
|
+
#
|
56
|
+
# @note A paper is a revision if its {version} is greater than 1.
|
57
|
+
# @return [Boolean]
|
58
|
+
def revision?
|
59
|
+
version > 1
|
28
60
|
end
|
29
61
|
|
30
62
|
# @!method updated_at
|
31
63
|
# The date that the paper was last updated.
|
64
|
+
#
|
32
65
|
# @return [DateTime]
|
33
66
|
element :updated_at, DateTime, tag: 'updated'
|
34
67
|
|
35
68
|
# @!method published_at
|
36
69
|
# The original publish/submission date of the paper.
|
70
|
+
#
|
37
71
|
# @return [DateTime]
|
38
72
|
element :published_at, DateTime, tag: 'published'
|
39
73
|
|
40
74
|
# @!method title
|
41
75
|
# The title of the paper.
|
76
|
+
#
|
42
77
|
# @return [DateTime]
|
43
78
|
element :title, Cleaner, parser: :clean, tag: 'title'
|
44
79
|
|
45
80
|
# @!method authors
|
46
81
|
# The authors of the paper.
|
82
|
+
#
|
47
83
|
# @return [Array<Author>]
|
48
84
|
has_many :authors, Author, tag: 'author'
|
49
85
|
|
50
86
|
# @!method primary_category
|
51
87
|
# The primary category of the paper.
|
88
|
+
#
|
52
89
|
# @return [Category]
|
53
90
|
element :primary_category, Category, tag: 'primary_category'
|
54
91
|
alias_method :category, :primary_category
|
55
92
|
|
56
93
|
# @!method categories
|
57
94
|
# The categories of the paper.
|
95
|
+
#
|
58
96
|
# @return [Array<Category>]
|
59
97
|
has_many :categories, Category, tag: 'category'
|
60
98
|
|
61
|
-
# Whether the paper is a revision or not.
|
62
|
-
# @note A paper is a revision if {updated_at} differs from {published_at}.
|
63
|
-
# @return [Boolean]
|
64
|
-
def revision?
|
65
|
-
@published_at != @updated_at
|
66
|
-
end
|
67
|
-
|
68
99
|
# @!method summary
|
69
100
|
# The summary (or abstract) of the paper.
|
101
|
+
#
|
70
102
|
# @return [String]
|
71
103
|
element :summary, Cleaner, parser: :clean, tag: 'summary'
|
72
104
|
alias_method :abstract, :summary
|
73
105
|
|
74
106
|
# @!method comment?
|
75
107
|
# Whether or not the paper has a comment.
|
108
|
+
#
|
76
109
|
# @return [Boolean]
|
77
110
|
|
78
111
|
# @!method comment
|
79
112
|
# The comment of the paper.
|
113
|
+
#
|
80
114
|
# @note This is an optional metadata field on an arXiv paper. To check whether the paper has a comment, use {comment?}
|
81
115
|
# @raise {Error::MissingField} If the paper does not have a comment.
|
82
116
|
# @return [String]
|
@@ -84,10 +118,12 @@ module Arx
|
|
84
118
|
|
85
119
|
# @!method journal?
|
86
120
|
# Whether or not the paper has a journal reference.
|
121
|
+
#
|
87
122
|
# @return [Boolean]
|
88
123
|
|
89
124
|
# @!method journal
|
90
125
|
# The journal reference of the paper.
|
126
|
+
#
|
91
127
|
# @note This is an optional metadata field on an arXiv paper. To check whether the paper has a journal reference, use {journal?}
|
92
128
|
# @raise {Error::MissingField} If the paper does not have a journal reference.
|
93
129
|
# @return [String]
|
@@ -113,22 +149,26 @@ module Arx
|
|
113
149
|
|
114
150
|
# @!method pdf?
|
115
151
|
# Whether or not the paper has a PDF link.
|
152
|
+
#
|
116
153
|
# @return [Boolean]
|
117
154
|
|
118
155
|
# @!method pdf_url
|
119
156
|
# Link to the PDF version of the paper.
|
157
|
+
#
|
120
158
|
# @note This is an optional metadata field on an arXiv paper. To check whether the paper has a PDF link, use {pdf?}
|
121
159
|
# @raise {Error::MissingLink} If the paper does not have a PDF link.
|
122
160
|
# @return [String]
|
123
161
|
|
124
162
|
# @!method doi?
|
125
163
|
# Whether or not the paper has a DOI (Digital Object Identifier) link.
|
164
|
+
#
|
126
165
|
# @see https://arxiv.org/help/jref#doi
|
127
166
|
# @see https://arxiv.org/help/prep#doi
|
128
167
|
# @return [Boolean]
|
129
168
|
|
130
169
|
# @!method doi_url
|
131
170
|
# Link to the DOI (Digital Object Identifier) of the paper.
|
171
|
+
#
|
132
172
|
# @see https://arxiv.org/help/jref#doi
|
133
173
|
# @see https://arxiv.org/help/prep#doi
|
134
174
|
# @note This is an optional metadata field on an arXiv paper. To check whether the paper has a DOI link, use {doi?}
|
@@ -151,14 +191,62 @@ module Arx
|
|
151
191
|
end
|
152
192
|
end
|
153
193
|
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
194
|
+
# Serializes the {Paper} object into a +Hash+.
|
195
|
+
#
|
196
|
+
# @param deep [Boolean] Whether to deep-serialize {Author} and {Category} objects.
|
197
|
+
# @return [Hash]
|
198
|
+
def to_h(deep = false)
|
199
|
+
Hash[*ATTRIBUTES.map {|_| [_, send(_)] rescue nil}.compact.flatten(1)].tap do |hash|
|
200
|
+
if deep
|
201
|
+
hash[:authors].map! &:to_h
|
202
|
+
hash[:categories].map! &:to_h
|
203
|
+
hash[:primary_category] = hash[:primary_category].to_h
|
204
|
+
end
|
205
|
+
end
|
206
|
+
end
|
207
|
+
|
208
|
+
# Serializes the {Paper} object into a valid JSON hash.
|
209
|
+
#
|
210
|
+
# @note Deep-serializes {Author} and {Category} objects.
|
211
|
+
# @return [Hash] The resulting JSON hash.
|
212
|
+
def as_json
|
213
|
+
JSON.parse to_json
|
214
|
+
end
|
215
|
+
|
216
|
+
# Serializes the {Paper} object into a valid JSON string.
|
217
|
+
#
|
218
|
+
# @note Deep-serializes {Author} and {Category} objects.
|
219
|
+
# @return [String] The resulting JSON string.
|
220
|
+
def to_json
|
221
|
+
to_h(true).to_json
|
222
|
+
end
|
223
|
+
|
224
|
+
# Equality check against another paper.
|
225
|
+
#
|
226
|
+
# @note This only performs a basic equality check between the papers' identifiers (disregarding version).
|
227
|
+
# This means that a different version of the same paper will be viewed as equal.
|
228
|
+
# @param paper [Paper] The paper to compare against.
|
229
|
+
def ==(paper)
|
230
|
+
if paper.is_a? Paper
|
231
|
+
id == paper.id
|
232
|
+
else
|
233
|
+
false
|
234
|
+
end
|
235
|
+
end
|
236
|
+
|
237
|
+
# Downloads the paper and saves it in PDF format at the specified path.
|
238
|
+
#
|
239
|
+
# @param path [String] The file path to store the PDF at.
|
240
|
+
def save(path)
|
241
|
+
begin
|
242
|
+
pdf_content = URI.open(pdf_url).read
|
243
|
+
File.open(path, 'wb') {|f| f.write pdf_content}
|
244
|
+
rescue
|
245
|
+
File.delete(path) if File.file? path
|
246
|
+
raise
|
247
|
+
end
|
248
|
+
end
|
249
|
+
|
250
|
+
inspector *ATTRIBUTES
|
163
251
|
end
|
164
252
|
end
|