arx 0.3.2 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +20 -0
- data/README.md +30 -10
- data/lib/arx.rb +11 -5
- data/lib/arx/cleaner.rb +44 -5
- data/lib/arx/entities/paper.rb +22 -13
- data/lib/arx/query/query.rb +11 -35
- data/lib/arx/query/validate.rb +3 -1
- data/lib/arx/version.rb +3 -3
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b23b9ea5d15ed9ed58108f69176cebe48f6d5ce44f7fd4f4bd6e2937d36e8a20
|
4
|
+
data.tar.gz: 0f0928e5737bbd6d5a0c2f058e4fc198b75f5968f94630d0648755616d2085a8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: edf64375f0efffb2722afb5746d5fd5e7ce150c88f2a8ebecdf58db3ca8b1de9c24cb950630e33f418db54d98c7cc351d5ee370e3fc481de55a4b3da59b658bd
|
7
|
+
data.tar.gz: bdb9966e13ede3888ff707c8c5c1350cb74af45b276b392242985adc9e6f48ab41ca5ecc1b5f53289ac862adadcaabdd58f509932d930feac75aefc47a8ba6e1
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,23 @@
|
|
1
|
+
# 1.0.0
|
2
|
+
|
3
|
+
#### Major changes
|
4
|
+
|
5
|
+
- Change `Query` connective instance methods ([#38](https://github.com/eonu/arx/pull/38)):
|
6
|
+
- `#&` -> `#and`
|
7
|
+
- `#|` -> `#or`
|
8
|
+
- `#!` -> `#and_not`
|
9
|
+
- Split version number from paper identifier in `Paper` (add `version` key-word argument to `#id` and `#url`, and add `#version`). ([#39](https://github.com/eonu/arx/pull/39))
|
10
|
+
- Add `Cleaner.extract_id` and `Cleaner.extract_version`. ([#39](https://github.com/eonu/arx/pull/39))
|
11
|
+
- Make `Query#add_connective` always return `self`. ([#40](https://github.com/eonu/arx/pull/40))
|
12
|
+
- Redefine `Arx.search` to user `Paper.parse`'s `search` key-word argument. ([#40](https://github.com/eonu/arx/pull/40))
|
13
|
+
- Implement all tests. ([#40](https://github.com/eonu/arx/pull/40))
|
14
|
+
|
15
|
+
#### Minor changes
|
16
|
+
|
17
|
+
- Change declared regular expression literals from `%r""` to standard `//`. ([#39](https://github.com/eonu/arx/pull/39))
|
18
|
+
- Remove `#extract_id` from `Query` and use `Cleaner.extract_id` instead. ([#39](https://github.com/eonu/arx/pull/39))
|
19
|
+
- Redefine `Paper#revision?` to use the new `#version` instead of `#updated_at` and`#published_at`. ([#39](https://github.com/eonu/arx/pull/39))
|
20
|
+
|
1
21
|
# 0.3.2
|
2
22
|
|
3
23
|
#### Major changes
|
data/README.md
CHANGED
@@ -24,6 +24,23 @@ Although [Scholastica](https://github.com/scholastica) offer a great [Ruby gem](
|
|
24
24
|
|
25
25
|
*Arx is a gem that allows for quick and easy querying of the arXiv search API, without having to worry about manually writing your own search query strings or parse the resulting XML query response to find the data you need.*
|
26
26
|
|
27
|
+
## Example
|
28
|
+
|
29
|
+
Suppose we wish to search for:
|
30
|
+
|
31
|
+
> Papers in the `cs.FL` (Formal Languages and Automata Theory) category whose title contains `"Buchi Automata"`, not authored by `Tomáš Babiak`, sorted by submission date (latest first).
|
32
|
+
|
33
|
+
This query can be executed with the following code:
|
34
|
+
|
35
|
+
```ruby
|
36
|
+
require 'arx'
|
37
|
+
|
38
|
+
papers = Arx(sort_by: :date_submitted) do |query|
|
39
|
+
query.category('cs.FL')
|
40
|
+
query.title('Buchi Automata').and_not.author('Tomáš Babiak')
|
41
|
+
end
|
42
|
+
```
|
43
|
+
|
27
44
|
## Features
|
28
45
|
|
29
46
|
- Ruby classes `Arx::Paper`, `Arx::Author` and `Arx::Category` that wrap the resulting Atom XML query result from the search API.
|
@@ -178,13 +195,13 @@ q.author('Dominik Edelmann')
|
|
178
195
|
q.category('math.NA')
|
179
196
|
```
|
180
197
|
|
181
|
-
To change the logical connective used to chain subqueries, use the
|
198
|
+
To change the logical connective used to chain subqueries, use the `and`, `or`, `and_not` instance methods between the subquery calls:
|
182
199
|
|
183
200
|
```ruby
|
184
201
|
# Papers authored by "Eleonora Andreotti" in neither the "Numerical Analysis" (math.NA) or "Combinatorics (math.CO)" categories.
|
185
202
|
q = Arx::Query.new
|
186
203
|
q.author('Eleonora Andreotti')
|
187
|
-
q
|
204
|
+
q.and_not
|
188
205
|
q.category('math.NA', 'math.CO', connective: :or)
|
189
206
|
```
|
190
207
|
|
@@ -202,9 +219,7 @@ Calling the `Arx()` method with a block allows for the construction and executio
|
|
202
219
|
# Papers in the cs.FL category whose title contains "Buchi Automata", not authored by Tomáš Babiak
|
203
220
|
results = Arx(sort_by: :date_submitted) do |query|
|
204
221
|
query.category('cs.FL')
|
205
|
-
query.title('Buchi Automata')
|
206
|
-
query.!()
|
207
|
-
query.author('Tomáš Babiak')
|
222
|
+
query.title('Buchi Automata').and_not.author('Tomáš Babiak')
|
208
223
|
end
|
209
224
|
|
210
225
|
results.size #=> 18
|
@@ -220,9 +235,7 @@ The `Arx()` method accepts a predefined `Arx::Query` object through the `query`
|
|
220
235
|
# Papers in the cs.FL category whose title contains "Buchi Automata", not authored by Tomáš Babiak
|
221
236
|
q = Arx::Query.new(sort_by: :date_submitted)
|
222
237
|
q.category('cs.FL')
|
223
|
-
q.title('Buchi Automata')
|
224
|
-
q.!()
|
225
|
-
q.author('Tomáš Babiak')
|
238
|
+
q.title('Buchi Automata').and_not.author('Tomáš Babiak')
|
226
239
|
|
227
240
|
results = Arx(query: q)
|
228
241
|
results.size #=> 18
|
@@ -259,9 +272,18 @@ paper = Arx('1809.09415')
|
|
259
272
|
#=> #<Arx::Paper:0x00007fb657b59bd0>
|
260
273
|
|
261
274
|
paper.id
|
275
|
+
#=> "1809.09415"
|
276
|
+
paper.id(version: true)
|
262
277
|
#=> "1809.09415v1"
|
263
278
|
paper.url
|
279
|
+
#=> "http://arxiv.org/abs/1809.09415"
|
280
|
+
paper.url(version: true)
|
264
281
|
#=> "http://arxiv.org/abs/1809.09415v1"
|
282
|
+
paper.version
|
283
|
+
#=> 1
|
284
|
+
paper.revision?
|
285
|
+
#=> false
|
286
|
+
|
265
287
|
paper.title
|
266
288
|
#=> "On finitely ambiguous Büchi automata"
|
267
289
|
paper.summary
|
@@ -280,8 +302,6 @@ paper.published_at
|
|
280
302
|
#=> #<DateTime: 2018-09-25T11:40:39+00:00 ((2458387j,42039s,0n),+0s,2299161j)>
|
281
303
|
paper.updated_at
|
282
304
|
#=> #<DateTime: 2018-09-25T11:40:39+00:00 ((2458387j,42039s,0n),+0s,2299161j)>
|
283
|
-
paper.revision?
|
284
|
-
#=> false
|
285
305
|
|
286
306
|
# Paper's comment
|
287
307
|
paper.comment?
|
data/lib/arx.rb
CHANGED
@@ -32,7 +32,7 @@ module Arx
|
|
32
32
|
# 1705.01662v1
|
33
33
|
# 1412.0135
|
34
34
|
# 0706.0001v2
|
35
|
-
NEW_IDENTIFIER_FORMAT =
|
35
|
+
NEW_IDENTIFIER_FORMAT = /^\d{4}\.\d{4,5}(v\d+)?$/
|
36
36
|
|
37
37
|
# The legacy arXiv paper identifier scheme (before 1 April 2007).
|
38
38
|
#
|
@@ -40,7 +40,7 @@ module Arx
|
|
40
40
|
# @example
|
41
41
|
# math/0309136v1
|
42
42
|
# cond-mat/0211034
|
43
|
-
OLD_IDENTIFIER_FORMAT =
|
43
|
+
OLD_IDENTIFIER_FORMAT = /^[a-z]+(\-[a-z]+)?\/\d{7}(v\d+)?$/
|
44
44
|
|
45
45
|
class << self
|
46
46
|
|
@@ -59,9 +59,15 @@ module Arx
|
|
59
59
|
yield query if block_given?
|
60
60
|
|
61
61
|
document = Nokogiri::XML(open ENDPOINT + query.to_s + '&max_results=10000').remove_namespaces!
|
62
|
-
results = Paper.parse(document, single:
|
63
|
-
|
64
|
-
|
62
|
+
results = Paper.parse(document, single: ids.size == 1)
|
63
|
+
|
64
|
+
if results.is_a? Paper
|
65
|
+
raise Error::MissingPaper.new(ids.first) if results.title.empty?
|
66
|
+
elsif results.is_a? Array
|
67
|
+
results.reject! {|paper| paper.title.empty?}
|
68
|
+
end
|
69
|
+
|
70
|
+
results
|
65
71
|
end
|
66
72
|
|
67
73
|
alias_method :find, :search
|
data/lib/arx/cleaner.rb
CHANGED
@@ -4,11 +4,50 @@ module Arx
|
|
4
4
|
# @private
|
5
5
|
class Cleaner
|
6
6
|
|
7
|
-
#
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
7
|
+
# arXiv paper URL prefix format
|
8
|
+
URL_PREFIX = /^(https?\:\/\/)?(www.)?arxiv\.org\/abs\//
|
9
|
+
|
10
|
+
class << self
|
11
|
+
|
12
|
+
# Cleans strings.
|
13
|
+
# @param [String] string Removes newline/return characters and multiple spaces from a string.
|
14
|
+
# @return [String] The cleaned string.
|
15
|
+
def clean(string)
|
16
|
+
string.gsub(/\r\n|\r|\n/, ' ').strip.squeeze ' '
|
17
|
+
end
|
18
|
+
|
19
|
+
# Attempt to extract an arXiv identifier from a string such as a URL.
|
20
|
+
#
|
21
|
+
# @param string [String] The string to extract the ID from.
|
22
|
+
# @param version [Boolean] Whether or not to include the paper's version.
|
23
|
+
# @return [String] The extracted ID.
|
24
|
+
def extract_id(string, version: false)
|
25
|
+
if version == !!version
|
26
|
+
if string.is_a? String
|
27
|
+
trimmed = /#{URL_PREFIX}.+\/?$/.match?(string) ? string.gsub(/(#{URL_PREFIX})|(\/$)/, '') : string
|
28
|
+
raise ArgumentError.new("Couldn't extract arXiv identifier from: #{string}") unless Validate.id? trimmed
|
29
|
+
version ? trimmed : trimmed.sub(/v[0-9]+$/, '')
|
30
|
+
else
|
31
|
+
raise TypeError.new("Expected `string` to be a String, got: #{string.class}")
|
32
|
+
end
|
33
|
+
else
|
34
|
+
raise TypeError.new("Expected `version` to be boolean (TrueClass or FalseClass), got: #{version.class}")
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
# Attempt to extract a version number from an arXiv identifier.
|
39
|
+
#
|
40
|
+
# @param string [String] The arXiv identifier to extract the version number from.
|
41
|
+
# @return [String] The extracted version number.
|
42
|
+
def extract_version(string)
|
43
|
+
reversed = extract_id(string, version: true).reverse
|
44
|
+
|
45
|
+
if /^[0-9]+v/.match? reversed
|
46
|
+
reversed.partition('v').first.reverse.to_i
|
47
|
+
else
|
48
|
+
raise ArgumentError.new("Couldn't extract version number from identifier: #{string}")
|
49
|
+
end
|
50
|
+
end
|
12
51
|
end
|
13
52
|
end
|
14
53
|
end
|
data/lib/arx/entities/paper.rb
CHANGED
@@ -13,18 +13,33 @@ module Arx
|
|
13
13
|
# @example
|
14
14
|
# 1705.01662v1
|
15
15
|
# cond-mat/0211034
|
16
|
+
# @param version [Boolean] Whether or not to include the paper's version.
|
16
17
|
# @return [String] The paper's identifier.
|
17
|
-
def id
|
18
|
-
@id
|
18
|
+
def id(version: false)
|
19
|
+
Cleaner.extract_id @id, version: version
|
19
20
|
end
|
20
21
|
|
21
22
|
# The URL of the paper on the arXiv website.
|
22
23
|
# @example
|
23
24
|
# http://arxiv.org/abs/1705.01662v1
|
24
25
|
# http://arxiv.org/abs/cond-mat/0211034
|
26
|
+
# @param version [Boolean] Whether or not to include the paper's version.
|
25
27
|
# @return [String] The paper's arXiv URL.
|
26
|
-
def url
|
27
|
-
|
28
|
+
def url(version: false)
|
29
|
+
"http://arxiv.org/abs/#{id version: version}"
|
30
|
+
end
|
31
|
+
|
32
|
+
# The version of the paper.
|
33
|
+
# @return [Integer] The paper's version.
|
34
|
+
def version
|
35
|
+
Cleaner.extract_version @id
|
36
|
+
end
|
37
|
+
|
38
|
+
# Whether the paper is a revision or not.
|
39
|
+
# @note A paper is a revision if its {version} is greater than 1.
|
40
|
+
# @return [Boolean]
|
41
|
+
def revision?
|
42
|
+
version > 1
|
28
43
|
end
|
29
44
|
|
30
45
|
# @!method updated_at
|
@@ -58,13 +73,6 @@ module Arx
|
|
58
73
|
# @return [Array<Category>]
|
59
74
|
has_many :categories, Category, tag: 'category'
|
60
75
|
|
61
|
-
# Whether the paper is a revision or not.
|
62
|
-
# @note A paper is a revision if {updated_at} differs from {published_at}.
|
63
|
-
# @return [Boolean]
|
64
|
-
def revision?
|
65
|
-
@published_at != @updated_at
|
66
|
-
end
|
67
|
-
|
68
76
|
# @!method summary
|
69
77
|
# The summary (or abstract) of the paper.
|
70
78
|
# @return [String]
|
@@ -152,9 +160,10 @@ module Arx
|
|
152
160
|
end
|
153
161
|
|
154
162
|
inspector *%i[
|
155
|
-
id url
|
163
|
+
id url version revision?
|
164
|
+
title summary authors
|
156
165
|
primary_category categories
|
157
|
-
published_at updated_at
|
166
|
+
published_at updated_at
|
158
167
|
comment? comment
|
159
168
|
journal? journal
|
160
169
|
pdf? pdf_url
|
data/lib/arx/query/query.rb
CHANGED
@@ -22,13 +22,6 @@ module Arx
|
|
22
22
|
and_not: 'ANDNOT'
|
23
23
|
}
|
24
24
|
|
25
|
-
# Logical connective method names.
|
26
|
-
CONNECTIVE_METHODS = {
|
27
|
-
'&': :and,
|
28
|
-
'!': :and_not,
|
29
|
-
'|': :or
|
30
|
-
}
|
31
|
-
|
32
25
|
# Supported fields for the search queries made to the arXiv search API.
|
33
26
|
# @see https://arxiv.org/help/prep arXiv metadata fields
|
34
27
|
# @see https://arxiv.org/help/api/user-manual#query_details arXiv user manual (query details)
|
@@ -73,31 +66,30 @@ module Arx
|
|
73
66
|
|
74
67
|
ids.flatten!
|
75
68
|
unless ids.empty?
|
76
|
-
ids.map!
|
77
|
-
Validate.ids ids
|
69
|
+
ids.map! &Cleaner.method(:extract_id)
|
78
70
|
@query << "&#{PARAMS[:id_list]}=#{ids * ','}"
|
79
71
|
end
|
80
72
|
|
81
73
|
yield self if block_given?
|
82
74
|
end
|
83
75
|
|
84
|
-
# @!method
|
76
|
+
# @!method and
|
85
77
|
# Logical conjunction (+AND+) of subqueries.
|
86
78
|
# @see https://arxiv.org/help/api/user-manual#query_details arXiv user manual
|
87
79
|
# @return [self]
|
88
80
|
|
89
|
-
# @!method
|
81
|
+
# @!method and_not
|
90
82
|
# Logical negated conjunction (+ANDNOT+) of subqueries.
|
91
83
|
# @see https://arxiv.org/help/api/user-manual#query_details arXiv user manual
|
92
84
|
# @return [self]
|
93
85
|
|
94
|
-
# @!method
|
86
|
+
# @!method or
|
95
87
|
# Logical disjunction (+OR+) of subqueries.
|
96
88
|
# @see https://arxiv.org/help/api/user-manual#query_details arXiv user manual
|
97
89
|
# @return [self]
|
98
90
|
|
99
|
-
|
100
|
-
define_method(
|
91
|
+
CONNECTIVES.keys.each do |connective|
|
92
|
+
define_method(connective) { add_connective connective }
|
101
93
|
end
|
102
94
|
|
103
95
|
# @!method title(*values, exact: true, connective: :and)
|
@@ -196,8 +188,9 @@ module Arx
|
|
196
188
|
# @param connective [Symbol] The symbol of the logical connective to add.
|
197
189
|
# @return [self]
|
198
190
|
def add_connective(connective)
|
199
|
-
|
200
|
-
|
191
|
+
if search_query?
|
192
|
+
@query << "+#{CONNECTIVES[connective]}" unless ends_with_connective?
|
193
|
+
end
|
201
194
|
self
|
202
195
|
end
|
203
196
|
|
@@ -206,12 +199,8 @@ module Arx
|
|
206
199
|
# @param subquery [String] The subquery to add.
|
207
200
|
def add_subquery(subquery)
|
208
201
|
if search_query?
|
209
|
-
|
210
|
-
|
211
|
-
else
|
212
|
-
add_connective :and
|
213
|
-
@query << "+#{subquery}"
|
214
|
-
end
|
202
|
+
add_connective :and unless ends_with_connective?
|
203
|
+
@query << "+#{subquery}"
|
215
204
|
else
|
216
205
|
@query << "&#{PARAMS[:search_query]}=#{subquery}"
|
217
206
|
end
|
@@ -248,18 +237,5 @@ module Arx
|
|
248
237
|
def enquote(string)
|
249
238
|
CGI.escape("\"") + string + CGI.escape("\"")
|
250
239
|
end
|
251
|
-
|
252
|
-
# Attempt to extract an ID from an arXiv URL.
|
253
|
-
#
|
254
|
-
# @param url [String] The URL to extract the ID from.
|
255
|
-
# @return [String] The extracted ID if successful, otherwise the original string.
|
256
|
-
def extract_id(url)
|
257
|
-
prefix = %r"^(https?\:\/\/)?(www.)?arxiv\.org\/abs\/"
|
258
|
-
if %r"#{prefix}.*$".match? url
|
259
|
-
url.sub(prefix, '').sub(%r"\/$", '')
|
260
|
-
else
|
261
|
-
url
|
262
|
-
end
|
263
|
-
end
|
264
240
|
end
|
265
241
|
end
|
data/lib/arx/query/validate.rb
CHANGED
@@ -94,7 +94,9 @@ module Arx
|
|
94
94
|
# @see NEW_IDENTIFIER_FORMAT
|
95
95
|
# @see OLD_IDENTIFIER_FORMAT
|
96
96
|
def id?(id)
|
97
|
-
NEW_IDENTIFIER_FORMAT.match?
|
97
|
+
return true if NEW_IDENTIFIER_FORMAT.match? id
|
98
|
+
return true if OLD_IDENTIFIER_FORMAT.match?(id) && Arx::CATEGORIES.keys.include?(id.split('/').first)
|
99
|
+
false
|
98
100
|
end
|
99
101
|
end
|
100
102
|
end
|
data/lib/arx/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: arx
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Edwin Onuonga
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-04-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|