arx 1.0.0 → 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile CHANGED
@@ -13,7 +13,7 @@ namespace :gem do
13
13
 
14
14
  desc 'Debug the gem (load into IRB)'
15
15
  task :debug do
16
- exec 'bundle exec rake install && irb -I lib/arx.rb -r arx'
16
+ exec 'bin/console'
17
17
  end
18
18
 
19
19
  desc 'Prepare a new gem release'
data/arx.gemspec CHANGED
@@ -6,7 +6,7 @@ Gem::Specification.new do |spec|
6
6
  spec.name = 'arx'
7
7
  spec.version = Arx::VERSION
8
8
  spec.authors = ['Edwin Onuonga']
9
- spec.email = ['ed@mail.eonu.net']
9
+ spec.email = ['ed@eonu.net']
10
10
  spec.homepage = 'https://github.com/eonu/arx'
11
11
 
12
12
  spec.summary = %q{A Ruby interface for querying academic papers on the arXiv search API.}
@@ -16,15 +16,17 @@ Gem::Specification.new do |spec|
16
16
  Gemfile LICENSE CHANGELOG.md README.md Rakefile arx.gemspec
17
17
  ]
18
18
 
19
- spec.required_ruby_version = '~> 2.5'
19
+ spec.required_ruby_version = '>= 2.5'
20
20
 
21
21
  spec.add_runtime_dependency 'nokogiri', '~> 1.10'
22
22
  spec.add_runtime_dependency 'nokogiri-happymapper', '~> 0.8'
23
23
 
24
- spec.add_development_dependency 'bundler', '~> 2.0'
25
- spec.add_development_dependency 'rake', '~> 12.3'
26
- spec.add_development_dependency 'thor', '~> 0.20'
24
+ spec.add_development_dependency 'bundler'
25
+ spec.add_development_dependency 'rake', '~> 13.0'
26
+ spec.add_development_dependency 'thor', '~> 0.20.3'
27
27
  spec.add_development_dependency 'rspec', '~> 3.7'
28
+ spec.add_development_dependency 'coveralls', '0.8.23'
29
+ spec.add_development_dependency 'yard', '~> 0.9', '>= 0.9.10'
28
30
 
29
31
  spec.metadata = {
30
32
  'source_code_uri' => spec.homepage,
data/lib/arx.rb CHANGED
@@ -1,6 +1,19 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'cgi'
4
+ require 'json'
5
+
6
+ # Temporary fix for JSON warning in Ruby >= 2.7.0
7
+ # See: https://github.com/flori/json/issues/399#issuecomment-734863279
8
+ if Gem::Version.new(RUBY_VERSION) >= Gem::Version.new('2.7.0')
9
+ module JSON
10
+ module_function
11
+ def parse(source, opts = {})
12
+ Parser.new(source, **opts).parse
13
+ end
14
+ end
15
+ end
16
+
4
17
  require 'nokogiri'
5
18
  require 'open-uri'
6
19
  require 'happymapper'
@@ -46,31 +59,38 @@ module Arx
46
59
 
47
60
  # Performs a search query for papers on the arXiv search API.
48
61
  #
49
- # @note The +sort_by+ and +sort_order+ arguments are ignored if passing in your own +query+.
62
+ # @note The +sort_by+, +sort_order+, +start+ and +max_results+ arguments are ignored if passing in your own +query+.
50
63
  # @param ids [Array<String>] The IDs of the arXiv papers to restrict the query to.
51
64
  # @param query [Query, NilClass] Predefined search query object.
52
65
  # @param sort_by [Symbol] The sorting criteria for the returned results (see {Query::SORT_BY}).
53
66
  # @param sort_order [Symbol] The sorting order for the returned results (see {Query::SORT_ORDER}).
67
+ # @param start [Integer] The index of the first returned result.
68
+ # @param max_results [Integer] The number of results returned by the query
54
69
  # @return [Array<Paper>, Paper] The {Paper}(s) found by the search query.
55
- def search(*ids, query: nil, sort_by: :relevance, sort_order: :descending)
56
- query ||= Query.new(*ids, sort_by: sort_by, sort_order: sort_order)
70
+ def search(*ids, query: nil, sort_by: :relevance, sort_order: :descending, start: 0, max_results: 10)
71
+ query ||= Query.new(*ids, sort_by: sort_by, sort_order: sort_order, start: start, max_results: max_results)
57
72
  raise TypeError.new("Expected `query` to be an Arx::Query, got: #{query.class}") unless query.is_a? Query
58
73
 
59
74
  yield query if block_given?
60
75
 
61
- document = Nokogiri::XML(open ENDPOINT + query.to_s + '&max_results=10000').remove_namespaces!
76
+ document = Nokogiri::XML(URI.open ENDPOINT + query.to_s).remove_namespaces!
62
77
  results = Paper.parse(document, single: ids.size == 1)
63
78
 
64
79
  if results.is_a? Paper
65
80
  raise Error::MissingPaper.new(ids.first) if results.title.empty?
66
81
  elsif results.is_a? Array
67
82
  results.reject! {|paper| paper.title.empty?}
83
+ elsif results.nil?
84
+ if ids.size == 1
85
+ raise Error::MissingPaper.new(ids.first)
86
+ else
87
+ results = []
88
+ end
68
89
  end
69
90
 
70
91
  results
71
92
  end
72
93
 
73
- alias_method :find, :search
74
94
  alias_method :get, :search
75
95
  end
76
96
  end
@@ -84,7 +104,9 @@ end
84
104
  # @param query [Query, NilClass] Predefined search query object.
85
105
  # @param sort_by [Symbol] The sorting criteria for the returned results (see {Arx::Query::SORT_BY}).
86
106
  # @param sort_order [Symbol] The sorting order for the returned results (see {Arx::Query::SORT_ORDER}).
107
+ # @param start [Integer] The index of the first returned result.
108
+ # @param max_results [Integer] The number of results returned by the query
87
109
  # @return [Array<Paper>, Paper] The {Arx::Paper}(s) found by the search query.
88
- def Arx(*ids, query: nil, sort_by: :relevance, sort_order: :descending, &block)
89
- Arx.search *ids, query: query, sort_by: sort_by, sort_order: sort_order, &block
110
+ def Arx(*ids, query: nil, sort_by: :relevance, sort_order: :descending, start: 0, max_results: 10, &block)
111
+ Arx.search *ids, query: query, sort_by: sort_by, sort_order: sort_order, start: start, max_results: max_results, &block
90
112
  end
@@ -21,6 +21,14 @@ module Arx
21
21
  'cond-mat.stat-mech' => 'Statistical Mechanics',
22
22
  'cond-mat.str-el' => 'Strongly Correlated Electrons',
23
23
  'cond-mat.supr-con' => 'Superconductivity',
24
+ 'dis-nn' => 'Disordered Systems and Neural Networks', # cond-mat
25
+ 'mes-hall' => 'Mesoscale and Nanoscale Physics', # cond-mat
26
+ 'mtrl-sci' => 'Materials Science', # cond-mat
27
+ 'quant-gas' => 'Quantum Gases', # cond-mat
28
+ 'soft' => 'Soft Condensed Matter', # cond-mat
29
+ 'stat-mech' => 'Statistical Mechanics', # cond-mat
30
+ 'str-el' => 'Strongly Correlated Electrons', # cond-mat
31
+ 'supr-con' => 'Superconductivity', # cond-mat
24
32
  'cs' => 'Computer Science',
25
33
  'cs.AI' => 'Artificial Intelligence',
26
34
  'cs.AR' => 'Hardware Architecture',
@@ -138,6 +146,28 @@ module Arx
138
146
  'physics.pop-ph' => 'Popular Physics',
139
147
  'physics.soc-ph' => 'Physics and Society',
140
148
  'physics.space-ph' => 'Space Physics',
149
+ 'acc-ph' => 'Accelerator Physics', # physics
150
+ 'ao-ph' => 'Atmospheric and Oceanic Physics', # physics
151
+ 'app-ph' => 'Applied Physics', # physics
152
+ 'atm-clus' => 'Atomic and Molecular Clusters', # physics
153
+ 'atom-ph' => 'Atomic Physics', # physics
154
+ 'bio-ph' => 'Biological Physics', # physics
155
+ 'chem-ph' => 'Chemical Physics', # physics
156
+ 'class-ph' => 'Classical Physics', # physics
157
+ 'comp-ph' => 'Computational Physics', # physics
158
+ 'data-an' => 'Data Analysis, Statistics and Probability', # physics
159
+ 'ed-ph' => 'Physics Education', # physics
160
+ 'flu-dyn' => 'Fluid Dynamics', # physics
161
+ 'gen-ph' => 'General Physics', # physics
162
+ 'geo-ph' => 'Geophysics', # physics
163
+ 'hist-ph' => 'History and Philosophy of Physics', # physics
164
+ 'ins-det' => 'Instrumentation and Detectors', # physics
165
+ 'med-ph' => 'Medical Physics', # physics
166
+ 'optics' => 'Optics', # physics
167
+ 'plasm-ph' => 'Plasma Physics', # physics
168
+ 'pop-ph' => 'Popular Physics', # physics
169
+ 'soc-ph' => 'Physics and Society', # physics
170
+ 'space-ph' => 'Space Physics', # physics
141
171
  'q-bio' => 'Quantitative Biology',
142
172
  'q-bio.BM' => 'Biomolecules',
143
173
  'q-bio.CB' => 'Cell Behavior',
data/lib/arx/cleaner.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  module Arx
2
2
 
3
3
  # Class for cleaning strings.
4
+ #
4
5
  # @private
5
6
  class Cleaner
6
7
 
@@ -10,6 +11,7 @@ module Arx
10
11
  class << self
11
12
 
12
13
  # Cleans strings.
14
+ #
13
15
  # @param [String] string Removes newline/return characters and multiple spaces from a string.
14
16
  # @return [String] The cleaned string.
15
17
  def clean(string)
@@ -5,24 +5,71 @@ module Arx
5
5
  include HappyMapper
6
6
  include Inspector
7
7
 
8
+ # The attributes of an arXiv paper's author.
9
+ ATTRIBUTES = %i[name affiliated? affiliations]
10
+
8
11
  tag 'author'
9
12
 
10
13
  # @!method name
11
14
  # The name of the author.
15
+ #
12
16
  # @return [String]
13
17
  element :name, Cleaner, tag: 'name', parser: :clean
14
18
 
15
19
  # @!method affiliations
16
20
  # The author's affiliations.
21
+ #
17
22
  # @return [Array<String>]
18
23
  has_many :affiliations, Cleaner, tag: 'affiliation', parser: :clean
19
24
 
20
25
  # Whether or not the author has any affiliations.
26
+ #
21
27
  # @return [Boolean]
22
28
  def affiliated?
23
29
  !affiliations.empty?
24
30
  end
25
31
 
26
- inspector :name, :affiliated?, :affiliations
32
+ # Serializes the {Author} object into a +Hash+.
33
+ #
34
+ # @return [Hash]
35
+ def to_h
36
+ Hash[*ATTRIBUTES.map {|_| [_, send(_)]}.flatten(1)]
37
+ end
38
+
39
+ # Serializes the {Author} object into a valid JSON hash.
40
+ #
41
+ # @return [Hash] The resulting JSON hash.
42
+ def as_json
43
+ JSON.parse to_json
44
+ end
45
+
46
+ # Serializes the {Author} object into a valid JSON string.
47
+ #
48
+ # @return [String] The resulting JSON string.
49
+ def to_json
50
+ to_h.to_json
51
+ end
52
+
53
+ # Equality check against another author.
54
+ #
55
+ # @note This only performs a basic equality check between the authors' names.
56
+ # @param author [Author] The author to compare against.
57
+ # @return [Boolean]
58
+ def ==(author)
59
+ if author.is_a? Author
60
+ name == author.name
61
+ else
62
+ false
63
+ end
64
+ end
65
+
66
+ # A string representation of the {Author} object.
67
+ #
68
+ # @return [String]
69
+ def to_s
70
+ "Arx::Author(name: #{name}, affiliations: [#{affiliations.join(', ')}])"
71
+ end
72
+
73
+ inspector *ATTRIBUTES
27
74
  end
28
75
  end
@@ -5,20 +5,65 @@ module Arx
5
5
  include HappyMapper
6
6
  include Inspector
7
7
 
8
+ # The attributes of an arXiv paper's category.
9
+ ATTRIBUTES = %i[name full_name]
10
+
8
11
  tag 'category'
9
12
 
10
13
  # @!method name
11
14
  # The abbreviated name of the category.
15
+ #
12
16
  # @return [String]
13
17
  attribute :name, Cleaner, parser: :clean, tag: 'term'
14
18
 
15
19
  # The full name of the category.
20
+ #
16
21
  # @see CATEGORIES
17
22
  # @return [String]
18
23
  def full_name
19
24
  CATEGORIES[name]
20
25
  end
21
26
 
22
- inspector :name, :full_name
27
+ # Serializes the {Category} object into a +Hash+.
28
+ #
29
+ # @return [Hash]
30
+ def to_h
31
+ Hash[*ATTRIBUTES.map {|_| [_, send(_)]}.flatten(1)]
32
+ end
33
+
34
+ # Serializes the {Category} object into a valid JSON hash.
35
+ #
36
+ # @return [Hash] The resulting JSON hash.
37
+ def as_json
38
+ JSON.parse to_json
39
+ end
40
+
41
+ # Serializes the {Category} object into a valid JSON string.
42
+ #
43
+ # @return [String] The resulting JSON string.
44
+ def to_json
45
+ to_h.to_json
46
+ end
47
+
48
+ # Equality check against another category.
49
+ #
50
+ # @param category [Category] The category to compare against.
51
+ # @return [Boolean]
52
+ def ==(category)
53
+ if category.is_a? Category
54
+ name == category.name
55
+ else
56
+ false
57
+ end
58
+ end
59
+
60
+ # A string representation of the {Category} object.
61
+ #
62
+ # @return [String]
63
+ def to_s
64
+ "Arx::Category(name: #{name}, full_name: #{full_name || 'nil'})"
65
+ end
66
+
67
+ inspector *ATTRIBUTES
23
68
  end
24
69
  end
@@ -1,6 +1,7 @@
1
1
  module Arx
2
2
 
3
3
  # Helper entity/model representing a link on an arXiv paper.
4
+ #
4
5
  # @private
5
6
  class Link
6
7
  include HappyMapper
@@ -5,37 +5,54 @@ module Arx
5
5
  include HappyMapper
6
6
  include Inspector
7
7
 
8
+ # The attributes of an arXiv paper.
9
+ # @note {comment}, {journal}, {pdf_url} and {doi_url} may raise errors when called.
10
+ ATTRIBUTES = %i[
11
+ id url version revision?
12
+ title summary authors
13
+ primary_category categories
14
+ published_at updated_at
15
+ comment? comment
16
+ journal? journal
17
+ pdf? pdf_url
18
+ doi? doi_url
19
+ ]
20
+
8
21
  tag 'entry'
9
22
 
10
23
  element :id, Cleaner, parser: :clean, tag: 'id'
11
24
  # The identifier of the paper.
25
+ #
12
26
  # @note This is either in {OLD_IDENTIFIER_FORMAT} or {NEW_IDENTIFIER_FORMAT}.
13
27
  # @example
14
28
  # 1705.01662v1
15
29
  # cond-mat/0211034
16
30
  # @param version [Boolean] Whether or not to include the paper's version.
17
31
  # @return [String] The paper's identifier.
18
- def id(version: false)
32
+ def id(version = false)
19
33
  Cleaner.extract_id @id, version: version
20
34
  end
21
35
 
22
36
  # The URL of the paper on the arXiv website.
37
+ #
23
38
  # @example
24
39
  # http://arxiv.org/abs/1705.01662v1
25
40
  # http://arxiv.org/abs/cond-mat/0211034
26
41
  # @param version [Boolean] Whether or not to include the paper's version.
27
42
  # @return [String] The paper's arXiv URL.
28
- def url(version: false)
29
- "http://arxiv.org/abs/#{id version: version}"
43
+ def url(version = false)
44
+ "http://arxiv.org/abs/#{id version}"
30
45
  end
31
46
 
32
47
  # The version of the paper.
48
+ #
33
49
  # @return [Integer] The paper's version.
34
50
  def version
35
51
  Cleaner.extract_version @id
36
52
  end
37
53
 
38
54
  # Whether the paper is a revision or not.
55
+ #
39
56
  # @note A paper is a revision if its {version} is greater than 1.
40
57
  # @return [Boolean]
41
58
  def revision?
@@ -44,47 +61,56 @@ module Arx
44
61
 
45
62
  # @!method updated_at
46
63
  # The date that the paper was last updated.
64
+ #
47
65
  # @return [DateTime]
48
66
  element :updated_at, DateTime, tag: 'updated'
49
67
 
50
68
  # @!method published_at
51
69
  # The original publish/submission date of the paper.
70
+ #
52
71
  # @return [DateTime]
53
72
  element :published_at, DateTime, tag: 'published'
54
73
 
55
74
  # @!method title
56
75
  # The title of the paper.
76
+ #
57
77
  # @return [DateTime]
58
78
  element :title, Cleaner, parser: :clean, tag: 'title'
59
79
 
60
80
  # @!method authors
61
81
  # The authors of the paper.
82
+ #
62
83
  # @return [Array<Author>]
63
84
  has_many :authors, Author, tag: 'author'
64
85
 
65
86
  # @!method primary_category
66
87
  # The primary category of the paper.
88
+ #
67
89
  # @return [Category]
68
90
  element :primary_category, Category, tag: 'primary_category'
69
91
  alias_method :category, :primary_category
70
92
 
71
93
  # @!method categories
72
94
  # The categories of the paper.
95
+ #
73
96
  # @return [Array<Category>]
74
97
  has_many :categories, Category, tag: 'category'
75
98
 
76
99
  # @!method summary
77
100
  # The summary (or abstract) of the paper.
101
+ #
78
102
  # @return [String]
79
103
  element :summary, Cleaner, parser: :clean, tag: 'summary'
80
104
  alias_method :abstract, :summary
81
105
 
82
106
  # @!method comment?
83
107
  # Whether or not the paper has a comment.
108
+ #
84
109
  # @return [Boolean]
85
110
 
86
111
  # @!method comment
87
112
  # The comment of the paper.
113
+ #
88
114
  # @note This is an optional metadata field on an arXiv paper. To check whether the paper has a comment, use {comment?}
89
115
  # @raise {Error::MissingField} If the paper does not have a comment.
90
116
  # @return [String]
@@ -92,10 +118,12 @@ module Arx
92
118
 
93
119
  # @!method journal?
94
120
  # Whether or not the paper has a journal reference.
121
+ #
95
122
  # @return [Boolean]
96
123
 
97
124
  # @!method journal
98
125
  # The journal reference of the paper.
126
+ #
99
127
  # @note This is an optional metadata field on an arXiv paper. To check whether the paper has a journal reference, use {journal?}
100
128
  # @raise {Error::MissingField} If the paper does not have a journal reference.
101
129
  # @return [String]
@@ -121,22 +149,26 @@ module Arx
121
149
 
122
150
  # @!method pdf?
123
151
  # Whether or not the paper has a PDF link.
152
+ #
124
153
  # @return [Boolean]
125
154
 
126
155
  # @!method pdf_url
127
156
  # Link to the PDF version of the paper.
157
+ #
128
158
  # @note This is an optional metadata field on an arXiv paper. To check whether the paper has a PDF link, use {pdf?}
129
159
  # @raise {Error::MissingLink} If the paper does not have a PDF link.
130
160
  # @return [String]
131
161
 
132
162
  # @!method doi?
133
163
  # Whether or not the paper has a DOI (Digital Object Identifier) link.
164
+ #
134
165
  # @see https://arxiv.org/help/jref#doi
135
166
  # @see https://arxiv.org/help/prep#doi
136
167
  # @return [Boolean]
137
168
 
138
169
  # @!method doi_url
139
170
  # Link to the DOI (Digital Object Identifier) of the paper.
171
+ #
140
172
  # @see https://arxiv.org/help/jref#doi
141
173
  # @see https://arxiv.org/help/prep#doi
142
174
  # @note This is an optional metadata field on an arXiv paper. To check whether the paper has a DOI link, use {doi?}
@@ -159,15 +191,74 @@ module Arx
159
191
  end
160
192
  end
161
193
 
162
- inspector *%i[
163
- id url version revision?
164
- title summary authors
165
- primary_category categories
166
- published_at updated_at
167
- comment? comment
168
- journal? journal
169
- pdf? pdf_url
170
- doi? doi_url
171
- ]
194
+ # Serializes the {Paper} object into a +Hash+.
195
+ #
196
+ # @param deep [Boolean] Whether to deep-serialize {Author} and {Category} objects.
197
+ # @return [Hash]
198
+ def to_h(deep = false)
199
+ Hash[*ATTRIBUTES.map {|_| [_, send(_)] rescue nil}.compact.flatten(1)].tap do |hash|
200
+ if deep
201
+ hash[:authors].map! &:to_h
202
+ hash[:categories].map! &:to_h
203
+ hash[:primary_category] = hash[:primary_category].to_h
204
+ end
205
+ end
206
+ end
207
+
208
+ # Serializes the {Paper} object into a valid JSON hash.
209
+ #
210
+ # @note Deep-serializes {Author} and {Category} objects.
211
+ # @return [Hash] The resulting JSON hash.
212
+ def as_json
213
+ JSON.parse to_json
214
+ end
215
+
216
+ # Serializes the {Paper} object into a valid JSON string.
217
+ #
218
+ # @note Deep-serializes {Author} and {Category} objects.
219
+ # @return [String] The resulting JSON string.
220
+ def to_json
221
+ to_h(true).to_json
222
+ end
223
+
224
+ # Equality check against another paper.
225
+ #
226
+ # @note This only performs a basic equality check between the papers' identifiers (disregarding version).
227
+ # This means that a different version of the same paper will be viewed as equal.
228
+ # @param paper [Paper] The paper to compare against.
229
+ # @return [Boolean]
230
+ def ==(paper)
231
+ if paper.is_a? Paper
232
+ id == paper.id
233
+ else
234
+ false
235
+ end
236
+ end
237
+
238
+ # Downloads the paper and saves it in PDF format at the specified path.
239
+ #
240
+ # @param path [String] The file path to store the PDF at.
241
+ def save(path)
242
+ begin
243
+ pdf_content = URI.open(pdf_url).read
244
+ File.open(path, 'wb') {|f| f.write pdf_content}
245
+ rescue
246
+ File.delete(path) if File.file? path
247
+ raise
248
+ end
249
+ end
250
+
251
+ # A string representation of the {Paper} object.
252
+ #
253
+ # @return [String]
254
+ def to_s
255
+ _id = id true
256
+ _published_at = published_at.strftime("%Y-%m-%d")
257
+ _authors = authors.map(&:name)
258
+ _authors = [*_authors.first(2), '...'] if _authors.size > 2
259
+ "Arx::Paper(id: #{_id}, published_at: #{_published_at}, authors: [#{_authors.join(', ')}], title: #{title})"
260
+ end
261
+
262
+ inspector *ATTRIBUTES
172
263
  end
173
264
  end