arx 0.3.2 → 1.2.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -21,6 +21,14 @@ module Arx
21
21
  'cond-mat.stat-mech' => 'Statistical Mechanics',
22
22
  'cond-mat.str-el' => 'Strongly Correlated Electrons',
23
23
  'cond-mat.supr-con' => 'Superconductivity',
24
+ 'dis-nn' => 'Disordered Systems and Neural Networks', # cond-mat
25
+ 'mes-hall' => 'Mesoscale and Nanoscale Physics', # cond-mat
26
+ 'mtrl-sci' => 'Materials Science', # cond-mat
27
+ 'quant-gas' => 'Quantum Gases', # cond-mat
28
+ 'soft' => 'Soft Condensed Matter', # cond-mat
29
+ 'stat-mech' => 'Statistical Mechanics', # cond-mat
30
+ 'str-el' => 'Strongly Correlated Electrons', # cond-mat
31
+ 'supr-con' => 'Superconductivity', # cond-mat
24
32
  'cs' => 'Computer Science',
25
33
  'cs.AI' => 'Artificial Intelligence',
26
34
  'cs.AR' => 'Hardware Architecture',
@@ -138,6 +146,28 @@ module Arx
138
146
  'physics.pop-ph' => 'Popular Physics',
139
147
  'physics.soc-ph' => 'Physics and Society',
140
148
  'physics.space-ph' => 'Space Physics',
149
+ 'acc-ph' => 'Accelerator Physics', # physics
150
+ 'ao-ph' => 'Atmospheric and Oceanic Physics', # physics
151
+ 'app-ph' => 'Applied Physics', # physics
152
+ 'atm-clus' => 'Atomic and Molecular Clusters', # physics
153
+ 'atom-ph' => 'Atomic Physics', # physics
154
+ 'bio-ph' => 'Biological Physics', # physics
155
+ 'chem-ph' => 'Chemical Physics', # physics
156
+ 'class-ph' => 'Classical Physics', # physics
157
+ 'comp-ph' => 'Computational Physics', # physics
158
+ 'data-an' => 'Data Analysis, Statistics and Probability', # physics
159
+ 'ed-ph' => 'Physics Education', # physics
160
+ 'flu-dyn' => 'Fluid Dynamics', # physics
161
+ 'gen-ph' => 'General Physics', # physics
162
+ 'geo-ph' => 'Geophysics', # physics
163
+ 'hist-ph' => 'History and Philosophy of Physics', # physics
164
+ 'ins-det' => 'Instrumentation and Detectors', # physics
165
+ 'med-ph' => 'Medical Physics', # physics
166
+ 'optics' => 'Optics', # physics
167
+ 'plasm-ph' => 'Plasma Physics', # physics
168
+ 'pop-ph' => 'Popular Physics', # physics
169
+ 'soc-ph' => 'Physics and Society', # physics
170
+ 'space-ph' => 'Space Physics', # physics
141
171
  'q-bio' => 'Quantitative Biology',
142
172
  'q-bio.BM' => 'Biomolecules',
143
173
  'q-bio.CB' => 'Cell Behavior',
@@ -1,14 +1,55 @@
1
1
  module Arx
2
2
 
3
3
  # Class for cleaning strings.
4
+ #
4
5
  # @private
5
6
  class Cleaner
6
7
 
7
- # Cleans strings.
8
- # @param [String] string Removes newline/return characters and multiple spaces from a string.
9
- # @return [String] The cleaned string.
10
- def self.clean(string)
11
- string.gsub(/\r\n|\r|\n/, ' ').strip.squeeze ' '
8
+ # arXiv paper URL prefix format
9
+ URL_PREFIX = /^(https?\:\/\/)?(www.)?arxiv\.org\/abs\//
10
+
11
+ class << self
12
+
13
+ # Cleans strings.
14
+ #
15
+ # @param [String] string Removes newline/return characters and multiple spaces from a string.
16
+ # @return [String] The cleaned string.
17
+ def clean(string)
18
+ string.gsub(/\r\n|\r|\n/, ' ').strip.squeeze ' '
19
+ end
20
+
21
+ # Attempt to extract an arXiv identifier from a string such as a URL.
22
+ #
23
+ # @param string [String] The string to extract the ID from.
24
+ # @param version [Boolean] Whether or not to include the paper's version.
25
+ # @return [String] The extracted ID.
26
+ def extract_id(string, version: false)
27
+ if version == !!version
28
+ if string.is_a? String
29
+ trimmed = /#{URL_PREFIX}.+\/?$/.match?(string) ? string.gsub(/(#{URL_PREFIX})|(\/$)/, '') : string
30
+ raise ArgumentError.new("Couldn't extract arXiv identifier from: #{string}") unless Validate.id? trimmed
31
+ version ? trimmed : trimmed.sub(/v[0-9]+$/, '')
32
+ else
33
+ raise TypeError.new("Expected `string` to be a String, got: #{string.class}")
34
+ end
35
+ else
36
+ raise TypeError.new("Expected `version` to be boolean (TrueClass or FalseClass), got: #{version.class}")
37
+ end
38
+ end
39
+
40
+ # Attempt to extract a version number from an arXiv identifier.
41
+ #
42
+ # @param string [String] The arXiv identifier to extract the version number from.
43
+ # @return [String] The extracted version number.
44
+ def extract_version(string)
45
+ reversed = extract_id(string, version: true).reverse
46
+
47
+ if /^[0-9]+v/.match? reversed
48
+ reversed.partition('v').first.reverse.to_i
49
+ else
50
+ raise ArgumentError.new("Couldn't extract version number from identifier: #{string}")
51
+ end
52
+ end
12
53
  end
13
54
  end
14
55
  end
@@ -5,24 +5,63 @@ module Arx
5
5
  include HappyMapper
6
6
  include Inspector
7
7
 
8
+ # The attributes of an arXiv paper's author.
9
+ ATTRIBUTES = %i[name affiliated? affiliations]
10
+
8
11
  tag 'author'
9
12
 
10
13
  # @!method name
11
14
  # The name of the author.
15
+ #
12
16
  # @return [String]
13
17
  element :name, Cleaner, tag: 'name', parser: :clean
14
18
 
15
19
  # @!method affiliations
16
20
  # The author's affiliations.
21
+ #
17
22
  # @return [Array<String>]
18
23
  has_many :affiliations, Cleaner, tag: 'affiliation', parser: :clean
19
24
 
20
25
  # Whether or not the author has any affiliations.
26
+ #
21
27
  # @return [Boolean]
22
28
  def affiliated?
23
29
  !affiliations.empty?
24
30
  end
25
31
 
26
- inspector :name, :affiliated?, :affiliations
32
+ # Serializes the {Author} object into a +Hash+.
33
+ #
34
+ # @return [Hash]
35
+ def to_h
36
+ Hash[*ATTRIBUTES.map {|_| [_, send(_)]}.flatten(1)]
37
+ end
38
+
39
+ # Serializes the {Author} object into a valid JSON hash.
40
+ #
41
+ # @return [Hash] The resulting JSON hash.
42
+ def as_json
43
+ JSON.parse to_json
44
+ end
45
+
46
+ # Serializes the {Author} object into a valid JSON string.
47
+ #
48
+ # @return [String] The resulting JSON string.
49
+ def to_json
50
+ to_h.to_json
51
+ end
52
+
53
+ # Equality check against another author.
54
+ #
55
+ # @note This only performs a basic equality check between the authors' names.
56
+ # @param author [Author] The author to compare against.
57
+ def ==(author)
58
+ if author.is_a? Author
59
+ name == author.name
60
+ else
61
+ false
62
+ end
63
+ end
64
+
65
+ inspector *ATTRIBUTES
27
66
  end
28
67
  end
@@ -5,20 +5,57 @@ module Arx
5
5
  include HappyMapper
6
6
  include Inspector
7
7
 
8
+ # The attributes of an arXiv paper's category.
9
+ ATTRIBUTES = %i[name full_name]
10
+
8
11
  tag 'category'
9
12
 
10
13
  # @!method name
11
14
  # The abbreviated name of the category.
15
+ #
12
16
  # @return [String]
13
17
  attribute :name, Cleaner, parser: :clean, tag: 'term'
14
18
 
15
19
  # The full name of the category.
20
+ #
16
21
  # @see CATEGORIES
17
22
  # @return [String]
18
23
  def full_name
19
24
  CATEGORIES[name]
20
25
  end
21
26
 
22
- inspector :name, :full_name
27
+ # Serializes the {Category} object into a +Hash+.
28
+ #
29
+ # @return [Hash]
30
+ def to_h
31
+ Hash[*ATTRIBUTES.map {|_| [_, send(_)]}.flatten(1)]
32
+ end
33
+
34
+ # Serializes the {Category} object into a valid JSON hash.
35
+ #
36
+ # @return [Hash] The resulting JSON hash.
37
+ def as_json
38
+ JSON.parse to_json
39
+ end
40
+
41
+ # Serializes the {Category} object into a valid JSON string.
42
+ #
43
+ # @return [String] The resulting JSON string.
44
+ def to_json
45
+ to_h.to_json
46
+ end
47
+
48
+ # Equality check against another category.
49
+ #
50
+ # @param category [Category] The category to compare against.
51
+ def ==(category)
52
+ if category.is_a? Category
53
+ name == category.name
54
+ else
55
+ false
56
+ end
57
+ end
58
+
59
+ inspector *ATTRIBUTES
23
60
  end
24
61
  end
@@ -1,6 +1,7 @@
1
1
  module Arx
2
2
 
3
3
  # Helper entity/model representing a link on an arXiv paper.
4
+ #
4
5
  # @private
5
6
  class Link
6
7
  include HappyMapper
@@ -5,78 +5,112 @@ module Arx
5
5
  include HappyMapper
6
6
  include Inspector
7
7
 
8
+ # The attributes of an arXiv paper.
9
+ # @note {comment}, {journal}, {pdf_url} and {doi_url} may raise errors when called.
10
+ ATTRIBUTES = %i[
11
+ id url version revision?
12
+ title summary authors
13
+ primary_category categories
14
+ published_at updated_at
15
+ comment? comment
16
+ journal? journal
17
+ pdf? pdf_url
18
+ doi? doi_url
19
+ ]
20
+
8
21
  tag 'entry'
9
22
 
10
23
  element :id, Cleaner, parser: :clean, tag: 'id'
11
24
  # The identifier of the paper.
25
+ #
12
26
  # @note This is either in {OLD_IDENTIFIER_FORMAT} or {NEW_IDENTIFIER_FORMAT}.
13
27
  # @example
14
28
  # 1705.01662v1
15
29
  # cond-mat/0211034
30
+ # @param version [Boolean] Whether or not to include the paper's version.
16
31
  # @return [String] The paper's identifier.
17
- def id
18
- @id.sub /https?\:\/\/arxiv\.org\/abs\//, ''
32
+ def id(version = false)
33
+ Cleaner.extract_id @id, version: version
19
34
  end
20
35
 
21
36
  # The URL of the paper on the arXiv website.
37
+ #
22
38
  # @example
23
39
  # http://arxiv.org/abs/1705.01662v1
24
40
  # http://arxiv.org/abs/cond-mat/0211034
41
+ # @param version [Boolean] Whether or not to include the paper's version.
25
42
  # @return [String] The paper's arXiv URL.
26
- def url
27
- @id
43
+ def url(version = false)
44
+ "http://arxiv.org/abs/#{id version}"
45
+ end
46
+
47
+ # The version of the paper.
48
+ #
49
+ # @return [Integer] The paper's version.
50
+ def version
51
+ Cleaner.extract_version @id
52
+ end
53
+
54
+ # Whether the paper is a revision or not.
55
+ #
56
+ # @note A paper is a revision if its {version} is greater than 1.
57
+ # @return [Boolean]
58
+ def revision?
59
+ version > 1
28
60
  end
29
61
 
30
62
  # @!method updated_at
31
63
  # The date that the paper was last updated.
64
+ #
32
65
  # @return [DateTime]
33
66
  element :updated_at, DateTime, tag: 'updated'
34
67
 
35
68
  # @!method published_at
36
69
  # The original publish/submission date of the paper.
70
+ #
37
71
  # @return [DateTime]
38
72
  element :published_at, DateTime, tag: 'published'
39
73
 
40
74
  # @!method title
41
75
  # The title of the paper.
76
+ #
42
77
  # @return [DateTime]
43
78
  element :title, Cleaner, parser: :clean, tag: 'title'
44
79
 
45
80
  # @!method authors
46
81
  # The authors of the paper.
82
+ #
47
83
  # @return [Array<Author>]
48
84
  has_many :authors, Author, tag: 'author'
49
85
 
50
86
  # @!method primary_category
51
87
  # The primary category of the paper.
88
+ #
52
89
  # @return [Category]
53
90
  element :primary_category, Category, tag: 'primary_category'
54
91
  alias_method :category, :primary_category
55
92
 
56
93
  # @!method categories
57
94
  # The categories of the paper.
95
+ #
58
96
  # @return [Array<Category>]
59
97
  has_many :categories, Category, tag: 'category'
60
98
 
61
- # Whether the paper is a revision or not.
62
- # @note A paper is a revision if {updated_at} differs from {published_at}.
63
- # @return [Boolean]
64
- def revision?
65
- @published_at != @updated_at
66
- end
67
-
68
99
  # @!method summary
69
100
  # The summary (or abstract) of the paper.
101
+ #
70
102
  # @return [String]
71
103
  element :summary, Cleaner, parser: :clean, tag: 'summary'
72
104
  alias_method :abstract, :summary
73
105
 
74
106
  # @!method comment?
75
107
  # Whether or not the paper has a comment.
108
+ #
76
109
  # @return [Boolean]
77
110
 
78
111
  # @!method comment
79
112
  # The comment of the paper.
113
+ #
80
114
  # @note This is an optional metadata field on an arXiv paper. To check whether the paper has a comment, use {comment?}
81
115
  # @raise {Error::MissingField} If the paper does not have a comment.
82
116
  # @return [String]
@@ -84,10 +118,12 @@ module Arx
84
118
 
85
119
  # @!method journal?
86
120
  # Whether or not the paper has a journal reference.
121
+ #
87
122
  # @return [Boolean]
88
123
 
89
124
  # @!method journal
90
125
  # The journal reference of the paper.
126
+ #
91
127
  # @note This is an optional metadata field on an arXiv paper. To check whether the paper has a journal reference, use {journal?}
92
128
  # @raise {Error::MissingField} If the paper does not have a journal reference.
93
129
  # @return [String]
@@ -113,22 +149,26 @@ module Arx
113
149
 
114
150
  # @!method pdf?
115
151
  # Whether or not the paper has a PDF link.
152
+ #
116
153
  # @return [Boolean]
117
154
 
118
155
  # @!method pdf_url
119
156
  # Link to the PDF version of the paper.
157
+ #
120
158
  # @note This is an optional metadata field on an arXiv paper. To check whether the paper has a PDF link, use {pdf?}
121
159
  # @raise {Error::MissingLink} If the paper does not have a PDF link.
122
160
  # @return [String]
123
161
 
124
162
  # @!method doi?
125
163
  # Whether or not the paper has a DOI (Digital Object Identifier) link.
164
+ #
126
165
  # @see https://arxiv.org/help/jref#doi
127
166
  # @see https://arxiv.org/help/prep#doi
128
167
  # @return [Boolean]
129
168
 
130
169
  # @!method doi_url
131
170
  # Link to the DOI (Digital Object Identifier) of the paper.
171
+ #
132
172
  # @see https://arxiv.org/help/jref#doi
133
173
  # @see https://arxiv.org/help/prep#doi
134
174
  # @note This is an optional metadata field on an arXiv paper. To check whether the paper has a DOI link, use {doi?}
@@ -151,14 +191,62 @@ module Arx
151
191
  end
152
192
  end
153
193
 
154
- inspector *%i[
155
- id url title summary authors
156
- primary_category categories
157
- published_at updated_at revision?
158
- comment? comment
159
- journal? journal
160
- pdf? pdf_url
161
- doi? doi_url
162
- ]
194
+ # Serializes the {Paper} object into a +Hash+.
195
+ #
196
+ # @param deep [Boolean] Whether to deep-serialize {Author} and {Category} objects.
197
+ # @return [Hash]
198
+ def to_h(deep = false)
199
+ Hash[*ATTRIBUTES.map {|_| [_, send(_)] rescue nil}.compact.flatten(1)].tap do |hash|
200
+ if deep
201
+ hash[:authors].map! &:to_h
202
+ hash[:categories].map! &:to_h
203
+ hash[:primary_category] = hash[:primary_category].to_h
204
+ end
205
+ end
206
+ end
207
+
208
+ # Serializes the {Paper} object into a valid JSON hash.
209
+ #
210
+ # @note Deep-serializes {Author} and {Category} objects.
211
+ # @return [Hash] The resulting JSON hash.
212
+ def as_json
213
+ JSON.parse to_json
214
+ end
215
+
216
+ # Serializes the {Paper} object into a valid JSON string.
217
+ #
218
+ # @note Deep-serializes {Author} and {Category} objects.
219
+ # @return [String] The resulting JSON string.
220
+ def to_json
221
+ to_h(true).to_json
222
+ end
223
+
224
+ # Equality check against another paper.
225
+ #
226
+ # @note This only performs a basic equality check between the papers' identifiers (disregarding version).
227
+ # This means that a different version of the same paper will be viewed as equal.
228
+ # @param paper [Paper] The paper to compare against.
229
+ def ==(paper)
230
+ if paper.is_a? Paper
231
+ id == paper.id
232
+ else
233
+ false
234
+ end
235
+ end
236
+
237
+ # Downloads the paper and saves it in PDF format at the specified path.
238
+ #
239
+ # @param path [String] The file path to store the PDF at.
240
+ def save(path)
241
+ begin
242
+ pdf_content = URI.open(pdf_url).read
243
+ File.open(path, 'wb') {|f| f.write pdf_content}
244
+ rescue
245
+ File.delete(path) if File.file? path
246
+ raise
247
+ end
248
+ end
249
+
250
+ inspector *ATTRIBUTES
163
251
  end
164
252
  end