arx 1.0.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -13,7 +13,7 @@ namespace :gem do
13
13
 
14
14
  desc 'Debug the gem (load into IRB)'
15
15
  task :debug do
16
- exec 'bundle exec rake install && irb -I lib/arx.rb -r arx'
16
+ exec 'bin/console'
17
17
  end
18
18
 
19
19
  desc 'Prepare a new gem release'
data/arx.gemspec CHANGED
@@ -6,7 +6,7 @@ Gem::Specification.new do |spec|
6
6
  spec.name = 'arx'
7
7
  spec.version = Arx::VERSION
8
8
  spec.authors = ['Edwin Onuonga']
9
- spec.email = ['ed@mail.eonu.net']
9
+ spec.email = ['ed@eonu.net']
10
10
  spec.homepage = 'https://github.com/eonu/arx'
11
11
 
12
12
  spec.summary = %q{A Ruby interface for querying academic papers on the arXiv search API.}
@@ -16,15 +16,17 @@ Gem::Specification.new do |spec|
16
16
  Gemfile LICENSE CHANGELOG.md README.md Rakefile arx.gemspec
17
17
  ]
18
18
 
19
- spec.required_ruby_version = '~> 2.5'
19
+ spec.required_ruby_version = '>= 2.5'
20
20
 
21
21
  spec.add_runtime_dependency 'nokogiri', '~> 1.10'
22
22
  spec.add_runtime_dependency 'nokogiri-happymapper', '~> 0.8'
23
23
 
24
- spec.add_development_dependency 'bundler', '~> 2.0'
25
- spec.add_development_dependency 'rake', '~> 12.3'
26
- spec.add_development_dependency 'thor', '~> 0.20'
24
+ spec.add_development_dependency 'bundler'
25
+ spec.add_development_dependency 'rake', '~> 13.0'
26
+ spec.add_development_dependency 'thor', '~> 0.20.3'
27
27
  spec.add_development_dependency 'rspec', '~> 3.7'
28
+ spec.add_development_dependency 'coveralls', '0.8.23'
29
+ spec.add_development_dependency 'yard', '~> 0.9', '>= 0.9.10'
28
30
 
29
31
  spec.metadata = {
30
32
  'source_code_uri' => spec.homepage,
data/lib/arx.rb CHANGED
@@ -1,6 +1,19 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'cgi'
4
+ require 'json'
5
+
6
+ # Temporary fix for JSON warning in Ruby >= 2.7.0
7
+ # See: https://github.com/flori/json/issues/399#issuecomment-734863279
8
+ if Gem::Version.new(RUBY_VERSION) >= Gem::Version.new('2.7.0')
9
+ module JSON
10
+ module_function
11
+ def parse(source, opts = {})
12
+ Parser.new(source, **opts).parse
13
+ end
14
+ end
15
+ end
16
+
4
17
  require 'nokogiri'
5
18
  require 'open-uri'
6
19
  require 'happymapper'
@@ -46,31 +59,38 @@ module Arx
46
59
 
47
60
  # Performs a search query for papers on the arXiv search API.
48
61
  #
49
- # @note The +sort_by+ and +sort_order+ arguments are ignored if passing in your own +query+.
62
+ # @note The +sort_by+, +sort_order+, +start+ and +max_results+ arguments are ignored if passing in your own +query+.
50
63
  # @param ids [Array<String>] The IDs of the arXiv papers to restrict the query to.
51
64
  # @param query [Query, NilClass] Predefined search query object.
52
65
  # @param sort_by [Symbol] The sorting criteria for the returned results (see {Query::SORT_BY}).
53
66
  # @param sort_order [Symbol] The sorting order for the returned results (see {Query::SORT_ORDER}).
67
+ # @param start [Integer] The index of the first returned result.
68
+ # @param max_results [Integer] The number of results returned by the query
54
69
  # @return [Array<Paper>, Paper] The {Paper}(s) found by the search query.
55
- def search(*ids, query: nil, sort_by: :relevance, sort_order: :descending)
56
- query ||= Query.new(*ids, sort_by: sort_by, sort_order: sort_order)
70
+ def search(*ids, query: nil, sort_by: :relevance, sort_order: :descending, start: 0, max_results: 10)
71
+ query ||= Query.new(*ids, sort_by: sort_by, sort_order: sort_order, start: start, max_results: max_results)
57
72
  raise TypeError.new("Expected `query` to be an Arx::Query, got: #{query.class}") unless query.is_a? Query
58
73
 
59
74
  yield query if block_given?
60
75
 
61
- document = Nokogiri::XML(open ENDPOINT + query.to_s + '&max_results=10000').remove_namespaces!
76
+ document = Nokogiri::XML(URI.open ENDPOINT + query.to_s).remove_namespaces!
62
77
  results = Paper.parse(document, single: ids.size == 1)
63
78
 
64
79
  if results.is_a? Paper
65
80
  raise Error::MissingPaper.new(ids.first) if results.title.empty?
66
81
  elsif results.is_a? Array
67
82
  results.reject! {|paper| paper.title.empty?}
83
+ elsif results.nil?
84
+ if ids.size == 1
85
+ raise Error::MissingPaper.new(ids.first)
86
+ else
87
+ results = []
88
+ end
68
89
  end
69
90
 
70
91
  results
71
92
  end
72
93
 
73
- alias_method :find, :search
74
94
  alias_method :get, :search
75
95
  end
76
96
  end
@@ -84,7 +104,9 @@ end
84
104
  # @param query [Query, NilClass] Predefined search query object.
85
105
  # @param sort_by [Symbol] The sorting criteria for the returned results (see {Arx::Query::SORT_BY}).
86
106
  # @param sort_order [Symbol] The sorting order for the returned results (see {Arx::Query::SORT_ORDER}).
107
+ # @param start [Integer] The index of the first returned result.
108
+ # @param max_results [Integer] The number of results returned by the query
87
109
  # @return [Array<Paper>, Paper] The {Arx::Paper}(s) found by the search query.
88
- def Arx(*ids, query: nil, sort_by: :relevance, sort_order: :descending, &block)
89
- Arx.search *ids, query: query, sort_by: sort_by, sort_order: sort_order, &block
110
+ def Arx(*ids, query: nil, sort_by: :relevance, sort_order: :descending, start: 0, max_results: 10, &block)
111
+ Arx.search *ids, query: query, sort_by: sort_by, sort_order: sort_order, start: start, max_results: max_results, &block
90
112
  end
@@ -21,6 +21,14 @@ module Arx
21
21
  'cond-mat.stat-mech' => 'Statistical Mechanics',
22
22
  'cond-mat.str-el' => 'Strongly Correlated Electrons',
23
23
  'cond-mat.supr-con' => 'Superconductivity',
24
+ 'dis-nn' => 'Disordered Systems and Neural Networks', # cond-mat
25
+ 'mes-hall' => 'Mesoscale and Nanoscale Physics', # cond-mat
26
+ 'mtrl-sci' => 'Materials Science', # cond-mat
27
+ 'quant-gas' => 'Quantum Gases', # cond-mat
28
+ 'soft' => 'Soft Condensed Matter', # cond-mat
29
+ 'stat-mech' => 'Statistical Mechanics', # cond-mat
30
+ 'str-el' => 'Strongly Correlated Electrons', # cond-mat
31
+ 'supr-con' => 'Superconductivity', # cond-mat
24
32
  'cs' => 'Computer Science',
25
33
  'cs.AI' => 'Artificial Intelligence',
26
34
  'cs.AR' => 'Hardware Architecture',
@@ -138,6 +146,28 @@ module Arx
138
146
  'physics.pop-ph' => 'Popular Physics',
139
147
  'physics.soc-ph' => 'Physics and Society',
140
148
  'physics.space-ph' => 'Space Physics',
149
+ 'acc-ph' => 'Accelerator Physics', # physics
150
+ 'ao-ph' => 'Atmospheric and Oceanic Physics', # physics
151
+ 'app-ph' => 'Applied Physics', # physics
152
+ 'atm-clus' => 'Atomic and Molecular Clusters', # physics
153
+ 'atom-ph' => 'Atomic Physics', # physics
154
+ 'bio-ph' => 'Biological Physics', # physics
155
+ 'chem-ph' => 'Chemical Physics', # physics
156
+ 'class-ph' => 'Classical Physics', # physics
157
+ 'comp-ph' => 'Computational Physics', # physics
158
+ 'data-an' => 'Data Analysis, Statistics and Probability', # physics
159
+ 'ed-ph' => 'Physics Education', # physics
160
+ 'flu-dyn' => 'Fluid Dynamics', # physics
161
+ 'gen-ph' => 'General Physics', # physics
162
+ 'geo-ph' => 'Geophysics', # physics
163
+ 'hist-ph' => 'History and Philosophy of Physics', # physics
164
+ 'ins-det' => 'Instrumentation and Detectors', # physics
165
+ 'med-ph' => 'Medical Physics', # physics
166
+ 'optics' => 'Optics', # physics
167
+ 'plasm-ph' => 'Plasma Physics', # physics
168
+ 'pop-ph' => 'Popular Physics', # physics
169
+ 'soc-ph' => 'Physics and Society', # physics
170
+ 'space-ph' => 'Space Physics', # physics
141
171
  'q-bio' => 'Quantitative Biology',
142
172
  'q-bio.BM' => 'Biomolecules',
143
173
  'q-bio.CB' => 'Cell Behavior',
data/lib/arx/cleaner.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  module Arx
2
2
 
3
3
  # Class for cleaning strings.
4
+ #
4
5
  # @private
5
6
  class Cleaner
6
7
 
@@ -10,6 +11,7 @@ module Arx
10
11
  class << self
11
12
 
12
13
  # Cleans strings.
14
+ #
13
15
  # @param [String] string Removes newline/return characters and multiple spaces from a string.
14
16
  # @return [String] The cleaned string.
15
17
  def clean(string)
@@ -5,24 +5,71 @@ module Arx
5
5
  include HappyMapper
6
6
  include Inspector
7
7
 
8
+ # The attributes of an arXiv paper's author.
9
+ ATTRIBUTES = %i[name affiliated? affiliations]
10
+
8
11
  tag 'author'
9
12
 
10
13
  # @!method name
11
14
  # The name of the author.
15
+ #
12
16
  # @return [String]
13
17
  element :name, Cleaner, tag: 'name', parser: :clean
14
18
 
15
19
  # @!method affiliations
16
20
  # The author's affiliations.
21
+ #
17
22
  # @return [Array<String>]
18
23
  has_many :affiliations, Cleaner, tag: 'affiliation', parser: :clean
19
24
 
20
25
  # Whether or not the author has any affiliations.
26
+ #
21
27
  # @return [Boolean]
22
28
  def affiliated?
23
29
  !affiliations.empty?
24
30
  end
25
31
 
26
- inspector :name, :affiliated?, :affiliations
32
+ # Serializes the {Author} object into a +Hash+.
33
+ #
34
+ # @return [Hash]
35
+ def to_h
36
+ Hash[*ATTRIBUTES.map {|_| [_, send(_)]}.flatten(1)]
37
+ end
38
+
39
+ # Serializes the {Author} object into a valid JSON hash.
40
+ #
41
+ # @return [Hash] The resulting JSON hash.
42
+ def as_json
43
+ JSON.parse to_json
44
+ end
45
+
46
+ # Serializes the {Author} object into a valid JSON string.
47
+ #
48
+ # @return [String] The resulting JSON string.
49
+ def to_json
50
+ to_h.to_json
51
+ end
52
+
53
+ # Equality check against another author.
54
+ #
55
+ # @note This only performs a basic equality check between the authors' names.
56
+ # @param author [Author] The author to compare against.
57
+ # @return [Boolean]
58
+ def ==(author)
59
+ if author.is_a? Author
60
+ name == author.name
61
+ else
62
+ false
63
+ end
64
+ end
65
+
66
+ # A string representation of the {Author} object.
67
+ #
68
+ # @return [String]
69
+ def to_s
70
+ "Arx::Author(name: #{name}, affiliations: [#{affiliations.join(', ')}])"
71
+ end
72
+
73
+ inspector *ATTRIBUTES
27
74
  end
28
75
  end
@@ -5,20 +5,65 @@ module Arx
5
5
  include HappyMapper
6
6
  include Inspector
7
7
 
8
+ # The attributes of an arXiv paper's category.
9
+ ATTRIBUTES = %i[name full_name]
10
+
8
11
  tag 'category'
9
12
 
10
13
  # @!method name
11
14
  # The abbreviated name of the category.
15
+ #
12
16
  # @return [String]
13
17
  attribute :name, Cleaner, parser: :clean, tag: 'term'
14
18
 
15
19
  # The full name of the category.
20
+ #
16
21
  # @see CATEGORIES
17
22
  # @return [String]
18
23
  def full_name
19
24
  CATEGORIES[name]
20
25
  end
21
26
 
22
- inspector :name, :full_name
27
+ # Serializes the {Category} object into a +Hash+.
28
+ #
29
+ # @return [Hash]
30
+ def to_h
31
+ Hash[*ATTRIBUTES.map {|_| [_, send(_)]}.flatten(1)]
32
+ end
33
+
34
+ # Serializes the {Category} object into a valid JSON hash.
35
+ #
36
+ # @return [Hash] The resulting JSON hash.
37
+ def as_json
38
+ JSON.parse to_json
39
+ end
40
+
41
+ # Serializes the {Category} object into a valid JSON string.
42
+ #
43
+ # @return [String] The resulting JSON string.
44
+ def to_json
45
+ to_h.to_json
46
+ end
47
+
48
+ # Equality check against another category.
49
+ #
50
+ # @param category [Category] The category to compare against.
51
+ # @return [Boolean]
52
+ def ==(category)
53
+ if category.is_a? Category
54
+ name == category.name
55
+ else
56
+ false
57
+ end
58
+ end
59
+
60
+ # A string representation of the {Category} object.
61
+ #
62
+ # @return [String]
63
+ def to_s
64
+ "Arx::Category(name: #{name}, full_name: #{full_name || 'nil'})"
65
+ end
66
+
67
+ inspector *ATTRIBUTES
23
68
  end
24
69
  end
@@ -1,6 +1,7 @@
1
1
  module Arx
2
2
 
3
3
  # Helper entity/model representing a link on an arXiv paper.
4
+ #
4
5
  # @private
5
6
  class Link
6
7
  include HappyMapper
@@ -5,37 +5,54 @@ module Arx
5
5
  include HappyMapper
6
6
  include Inspector
7
7
 
8
+ # The attributes of an arXiv paper.
9
+ # @note {comment}, {journal}, {pdf_url} and {doi_url} may raise errors when called.
10
+ ATTRIBUTES = %i[
11
+ id url version revision?
12
+ title summary authors
13
+ primary_category categories
14
+ published_at updated_at
15
+ comment? comment
16
+ journal? journal
17
+ pdf? pdf_url
18
+ doi? doi_url
19
+ ]
20
+
8
21
  tag 'entry'
9
22
 
10
23
  element :id, Cleaner, parser: :clean, tag: 'id'
11
24
  # The identifier of the paper.
25
+ #
12
26
  # @note This is either in {OLD_IDENTIFIER_FORMAT} or {NEW_IDENTIFIER_FORMAT}.
13
27
  # @example
14
28
  # 1705.01662v1
15
29
  # cond-mat/0211034
16
30
  # @param version [Boolean] Whether or not to include the paper's version.
17
31
  # @return [String] The paper's identifier.
18
- def id(version: false)
32
+ def id(version = false)
19
33
  Cleaner.extract_id @id, version: version
20
34
  end
21
35
 
22
36
  # The URL of the paper on the arXiv website.
37
+ #
23
38
  # @example
24
39
  # http://arxiv.org/abs/1705.01662v1
25
40
  # http://arxiv.org/abs/cond-mat/0211034
26
41
  # @param version [Boolean] Whether or not to include the paper's version.
27
42
  # @return [String] The paper's arXiv URL.
28
- def url(version: false)
29
- "http://arxiv.org/abs/#{id version: version}"
43
+ def url(version = false)
44
+ "http://arxiv.org/abs/#{id version}"
30
45
  end
31
46
 
32
47
  # The version of the paper.
48
+ #
33
49
  # @return [Integer] The paper's version.
34
50
  def version
35
51
  Cleaner.extract_version @id
36
52
  end
37
53
 
38
54
  # Whether the paper is a revision or not.
55
+ #
39
56
  # @note A paper is a revision if its {version} is greater than 1.
40
57
  # @return [Boolean]
41
58
  def revision?
@@ -44,47 +61,56 @@ module Arx
44
61
 
45
62
  # @!method updated_at
46
63
  # The date that the paper was last updated.
64
+ #
47
65
  # @return [DateTime]
48
66
  element :updated_at, DateTime, tag: 'updated'
49
67
 
50
68
  # @!method published_at
51
69
  # The original publish/submission date of the paper.
70
+ #
52
71
  # @return [DateTime]
53
72
  element :published_at, DateTime, tag: 'published'
54
73
 
55
74
  # @!method title
56
75
  # The title of the paper.
76
+ #
57
77
  # @return [DateTime]
58
78
  element :title, Cleaner, parser: :clean, tag: 'title'
59
79
 
60
80
  # @!method authors
61
81
  # The authors of the paper.
82
+ #
62
83
  # @return [Array<Author>]
63
84
  has_many :authors, Author, tag: 'author'
64
85
 
65
86
  # @!method primary_category
66
87
  # The primary category of the paper.
88
+ #
67
89
  # @return [Category]
68
90
  element :primary_category, Category, tag: 'primary_category'
69
91
  alias_method :category, :primary_category
70
92
 
71
93
  # @!method categories
72
94
  # The categories of the paper.
95
+ #
73
96
  # @return [Array<Category>]
74
97
  has_many :categories, Category, tag: 'category'
75
98
 
76
99
  # @!method summary
77
100
  # The summary (or abstract) of the paper.
101
+ #
78
102
  # @return [String]
79
103
  element :summary, Cleaner, parser: :clean, tag: 'summary'
80
104
  alias_method :abstract, :summary
81
105
 
82
106
  # @!method comment?
83
107
  # Whether or not the paper has a comment.
108
+ #
84
109
  # @return [Boolean]
85
110
 
86
111
  # @!method comment
87
112
  # The comment of the paper.
113
+ #
88
114
  # @note This is an optional metadata field on an arXiv paper. To check whether the paper has a comment, use {comment?}
89
115
  # @raise {Error::MissingField} If the paper does not have a comment.
90
116
  # @return [String]
@@ -92,10 +118,12 @@ module Arx
92
118
 
93
119
  # @!method journal?
94
120
  # Whether or not the paper has a journal reference.
121
+ #
95
122
  # @return [Boolean]
96
123
 
97
124
  # @!method journal
98
125
  # The journal reference of the paper.
126
+ #
99
127
  # @note This is an optional metadata field on an arXiv paper. To check whether the paper has a journal reference, use {journal?}
100
128
  # @raise {Error::MissingField} If the paper does not have a journal reference.
101
129
  # @return [String]
@@ -121,22 +149,26 @@ module Arx
121
149
 
122
150
  # @!method pdf?
123
151
  # Whether or not the paper has a PDF link.
152
+ #
124
153
  # @return [Boolean]
125
154
 
126
155
  # @!method pdf_url
127
156
  # Link to the PDF version of the paper.
157
+ #
128
158
  # @note This is an optional metadata field on an arXiv paper. To check whether the paper has a PDF link, use {pdf?}
129
159
  # @raise {Error::MissingLink} If the paper does not have a PDF link.
130
160
  # @return [String]
131
161
 
132
162
  # @!method doi?
133
163
  # Whether or not the paper has a DOI (Digital Object Identifier) link.
164
+ #
134
165
  # @see https://arxiv.org/help/jref#doi
135
166
  # @see https://arxiv.org/help/prep#doi
136
167
  # @return [Boolean]
137
168
 
138
169
  # @!method doi_url
139
170
  # Link to the DOI (Digital Object Identifier) of the paper.
171
+ #
140
172
  # @see https://arxiv.org/help/jref#doi
141
173
  # @see https://arxiv.org/help/prep#doi
142
174
  # @note This is an optional metadata field on an arXiv paper. To check whether the paper has a DOI link, use {doi?}
@@ -159,15 +191,74 @@ module Arx
159
191
  end
160
192
  end
161
193
 
162
- inspector *%i[
163
- id url version revision?
164
- title summary authors
165
- primary_category categories
166
- published_at updated_at
167
- comment? comment
168
- journal? journal
169
- pdf? pdf_url
170
- doi? doi_url
171
- ]
194
+ # Serializes the {Paper} object into a +Hash+.
195
+ #
196
+ # @param deep [Boolean] Whether to deep-serialize {Author} and {Category} objects.
197
+ # @return [Hash]
198
+ def to_h(deep = false)
199
+ Hash[*ATTRIBUTES.map {|_| [_, send(_)] rescue nil}.compact.flatten(1)].tap do |hash|
200
+ if deep
201
+ hash[:authors].map! &:to_h
202
+ hash[:categories].map! &:to_h
203
+ hash[:primary_category] = hash[:primary_category].to_h
204
+ end
205
+ end
206
+ end
207
+
208
+ # Serializes the {Paper} object into a valid JSON hash.
209
+ #
210
+ # @note Deep-serializes {Author} and {Category} objects.
211
+ # @return [Hash] The resulting JSON hash.
212
+ def as_json
213
+ JSON.parse to_json
214
+ end
215
+
216
+ # Serializes the {Paper} object into a valid JSON string.
217
+ #
218
+ # @note Deep-serializes {Author} and {Category} objects.
219
+ # @return [String] The resulting JSON string.
220
+ def to_json
221
+ to_h(true).to_json
222
+ end
223
+
224
+ # Equality check against another paper.
225
+ #
226
+ # @note This only performs a basic equality check between the papers' identifiers (disregarding version).
227
+ # This means that a different version of the same paper will be viewed as equal.
228
+ # @param paper [Paper] The paper to compare against.
229
+ # @return [Boolean]
230
+ def ==(paper)
231
+ if paper.is_a? Paper
232
+ id == paper.id
233
+ else
234
+ false
235
+ end
236
+ end
237
+
238
+ # Downloads the paper and saves it in PDF format at the specified path.
239
+ #
240
+ # @param path [String] The file path to store the PDF at.
241
+ def save(path)
242
+ begin
243
+ pdf_content = URI.open(pdf_url).read
244
+ File.open(path, 'wb') {|f| f.write pdf_content}
245
+ rescue
246
+ File.delete(path) if File.file? path
247
+ raise
248
+ end
249
+ end
250
+
251
+ # A string representation of the {Paper} object.
252
+ #
253
+ # @return [String]
254
+ def to_s
255
+ _id = id true
256
+ _published_at = published_at.strftime("%Y-%m-%d")
257
+ _authors = authors.map(&:name)
258
+ _authors = [*_authors.first(2), '...'] if _authors.size > 2
259
+ "Arx::Paper(id: #{_id}, published_at: #{_published_at}, authors: [#{_authors.join(', ')}], title: #{title})"
260
+ end
261
+
262
+ inspector *ATTRIBUTES
172
263
  end
173
264
  end