arx 0.1.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +125 -0
- data/Gemfile +1 -1
- data/LICENSE +1 -1
- data/README.md +407 -1
- data/Rakefile +60 -2
- data/arx.gemspec +15 -2
- data/lib/arx/categories.rb +10 -0
- data/lib/arx/cleaner.rb +45 -5
- data/lib/arx/entities/author.rb +4 -5
- data/lib/arx/entities/category.rb +4 -4
- data/lib/arx/entities/link.rb +1 -2
- data/lib/arx/entities/paper.rb +43 -29
- data/lib/arx/error.rb +27 -0
- data/lib/arx/inspector.rb +42 -0
- data/lib/arx/query/query.rb +42 -45
- data/lib/arx/query/validate.rb +4 -24
- data/lib/arx/version.rb +2 -2
- data/lib/arx.rb +62 -20
- metadata +46 -12
- data/lib/arx/exceptions.rb +0 -23
data/lib/arx.rb
CHANGED
|
@@ -1,13 +1,19 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require 'cgi'
|
|
3
4
|
require 'nokogiri'
|
|
4
5
|
require 'open-uri'
|
|
6
|
+
require 'happymapper'
|
|
5
7
|
require 'arx/version'
|
|
8
|
+
require 'arx/cleaner'
|
|
9
|
+
require 'arx/inspector'
|
|
6
10
|
require 'arx/categories'
|
|
7
|
-
require 'arx/
|
|
11
|
+
require 'arx/error'
|
|
8
12
|
require 'arx/query/validate'
|
|
13
|
+
require 'arx/query/query'
|
|
9
14
|
require 'arx/entities/author'
|
|
10
15
|
require 'arx/entities/category'
|
|
16
|
+
require 'arx/entities/link'
|
|
11
17
|
require 'arx/entities/paper'
|
|
12
18
|
|
|
13
19
|
# A Ruby interface for querying academic papers on the arXiv search API.
|
|
@@ -16,38 +22,74 @@ module Arx
|
|
|
16
22
|
# The arXiv search API endpoint.
|
|
17
23
|
ENDPOINT = 'http://export.arxiv.org/api/query?'
|
|
18
24
|
|
|
19
|
-
#
|
|
25
|
+
# The current arxiv paper identifier scheme (1 April 2007 and onwards).
|
|
26
|
+
# The last block of digits can either be five digits (if the paper was published after 1501 - January 2015),
|
|
27
|
+
# or four digits (if the paper was published before 1501).
|
|
20
28
|
#
|
|
21
|
-
# @
|
|
22
|
-
# @
|
|
23
|
-
#
|
|
24
|
-
#
|
|
25
|
-
|
|
26
|
-
|
|
29
|
+
# @see https://arxiv.org/help/arxiv_identifier#new arXiv identifier (new)
|
|
30
|
+
# @example
|
|
31
|
+
# 1501.00001
|
|
32
|
+
# 1705.01662v1
|
|
33
|
+
# 1412.0135
|
|
34
|
+
# 0706.0001v2
|
|
35
|
+
NEW_IDENTIFIER_FORMAT = /^\d{4}\.\d{4,5}(v\d+)?$/
|
|
36
|
+
|
|
37
|
+
# The legacy arXiv paper identifier scheme (before 1 April 2007).
|
|
38
|
+
#
|
|
39
|
+
# @see https://arxiv.org/help/arxiv_identifier#old arXiv identifier (old)
|
|
40
|
+
# @example
|
|
41
|
+
# math/0309136v1
|
|
42
|
+
# cond-mat/0211034
|
|
43
|
+
OLD_IDENTIFIER_FORMAT = /^[a-z]+(\-[a-z]+)?\/\d{7}(v\d+)?$/
|
|
44
|
+
|
|
45
|
+
class << self
|
|
27
46
|
|
|
28
|
-
|
|
47
|
+
# Performs a search query for papers on the arXiv search API.
|
|
48
|
+
#
|
|
49
|
+
# @note The +sort_by+ and +sort_order+ arguments are ignored if passing in your own +query+.
|
|
50
|
+
# @param ids [Array<String>] The IDs of the arXiv papers to restrict the query to.
|
|
51
|
+
# @param query [Query, NilClass] Predefined search query object.
|
|
52
|
+
# @param sort_by [Symbol] The sorting criteria for the returned results (see {Query::SORT_BY}).
|
|
53
|
+
# @param sort_order [Symbol] The sorting order for the returned results (see {Query::SORT_ORDER}).
|
|
54
|
+
# @return [Array<Paper>, Paper] The {Paper}(s) found by the search query.
|
|
55
|
+
def search(*ids, query: nil, sort_by: :relevance, sort_order: :descending)
|
|
56
|
+
query ||= Query.new(*ids, sort_by: sort_by, sort_order: sort_order)
|
|
57
|
+
raise TypeError.new("Expected `query` to be an Arx::Query, got: #{query.class}") unless query.is_a? Query
|
|
29
58
|
|
|
30
|
-
|
|
31
|
-
document.remove_namespaces!
|
|
59
|
+
yield query if block_given?
|
|
32
60
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
61
|
+
document = Nokogiri::XML(open ENDPOINT + query.to_s + '&max_results=10000').remove_namespaces!
|
|
62
|
+
results = Paper.parse(document, single: ids.size == 1)
|
|
63
|
+
|
|
64
|
+
if results.is_a? Paper
|
|
65
|
+
raise Error::MissingPaper.new(ids.first) if results.title.empty?
|
|
66
|
+
elsif results.is_a? Array
|
|
67
|
+
results.reject! {|paper| paper.title.empty?}
|
|
68
|
+
elsif results.nil?
|
|
69
|
+
if ids.size == 1
|
|
70
|
+
raise Error::MissingPaper.new(ids.first)
|
|
71
|
+
else
|
|
72
|
+
results = []
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
results
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
alias_method :get, :search
|
|
36
80
|
end
|
|
37
81
|
end
|
|
38
82
|
|
|
39
83
|
# Performs a search query for papers on the arXiv search API.
|
|
40
84
|
#
|
|
41
85
|
# @note This is an alias of the {Arx.search} method.
|
|
86
|
+
# @note The +sort_by+ and +sort_order+ arguments are ignored if passing in your own +query+.
|
|
42
87
|
# @see Arx.search
|
|
43
88
|
# @param ids [Array<String>] The IDs of the arXiv papers to restrict the query to.
|
|
89
|
+
# @param query [Query, NilClass] Predefined search query object.
|
|
44
90
|
# @param sort_by [Symbol] The sorting criteria for the returned results (see {Arx::Query::SORT_BY}).
|
|
45
91
|
# @param sort_order [Symbol] The sorting order for the returned results (see {Arx::Query::SORT_ORDER}).
|
|
46
92
|
# @return [Array<Paper>, Paper] The {Arx::Paper}(s) found by the search query.
|
|
47
|
-
def Arx(*ids, sort_by: :relevance, sort_order: :descending, &block)
|
|
48
|
-
|
|
49
|
-
Arx.search *ids, sort_by: sort_by, sort_order: sort_order, &block
|
|
50
|
-
else
|
|
51
|
-
Arx.search *ids, sort_by: sort_by, sort_order: sort_order
|
|
52
|
-
end
|
|
93
|
+
def Arx(*ids, query: nil, sort_by: :relevance, sort_order: :descending, &block)
|
|
94
|
+
Arx.search *ids, query: query, sort_by: sort_by, sort_order: sort_order, &block
|
|
53
95
|
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: arx
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version:
|
|
4
|
+
version: 1.1.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Edwin Onuonga
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2019-
|
|
11
|
+
date: 2019-04-24 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: nokogiri
|
|
@@ -42,16 +42,16 @@ dependencies:
|
|
|
42
42
|
name: bundler
|
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
|
44
44
|
requirements:
|
|
45
|
-
- - "
|
|
45
|
+
- - ">="
|
|
46
46
|
- !ruby/object:Gem::Version
|
|
47
|
-
version: '
|
|
47
|
+
version: '1.17'
|
|
48
48
|
type: :development
|
|
49
49
|
prerelease: false
|
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
|
51
51
|
requirements:
|
|
52
|
-
- - "
|
|
52
|
+
- - ">="
|
|
53
53
|
- !ruby/object:Gem::Version
|
|
54
|
-
version: '
|
|
54
|
+
version: '1.17'
|
|
55
55
|
- !ruby/object:Gem::Dependency
|
|
56
56
|
name: rake
|
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -66,6 +66,20 @@ dependencies:
|
|
|
66
66
|
- - "~>"
|
|
67
67
|
- !ruby/object:Gem::Version
|
|
68
68
|
version: '12.3'
|
|
69
|
+
- !ruby/object:Gem::Dependency
|
|
70
|
+
name: thor
|
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
|
72
|
+
requirements:
|
|
73
|
+
- - "~>"
|
|
74
|
+
- !ruby/object:Gem::Version
|
|
75
|
+
version: 0.19.4
|
|
76
|
+
type: :development
|
|
77
|
+
prerelease: false
|
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
79
|
+
requirements:
|
|
80
|
+
- - "~>"
|
|
81
|
+
- !ruby/object:Gem::Version
|
|
82
|
+
version: 0.19.4
|
|
69
83
|
- !ruby/object:Gem::Dependency
|
|
70
84
|
name: rspec
|
|
71
85
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -80,9 +94,23 @@ dependencies:
|
|
|
80
94
|
- - "~>"
|
|
81
95
|
- !ruby/object:Gem::Version
|
|
82
96
|
version: '3.7'
|
|
97
|
+
- !ruby/object:Gem::Dependency
|
|
98
|
+
name: coveralls
|
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
|
100
|
+
requirements:
|
|
101
|
+
- - '='
|
|
102
|
+
- !ruby/object:Gem::Version
|
|
103
|
+
version: 0.8.22
|
|
104
|
+
type: :development
|
|
105
|
+
prerelease: false
|
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
107
|
+
requirements:
|
|
108
|
+
- - '='
|
|
109
|
+
- !ruby/object:Gem::Version
|
|
110
|
+
version: 0.8.22
|
|
83
111
|
description:
|
|
84
112
|
email:
|
|
85
|
-
-
|
|
113
|
+
- ed@eonu.net
|
|
86
114
|
executables: []
|
|
87
115
|
extensions: []
|
|
88
116
|
extra_rdoc_files: []
|
|
@@ -100,23 +128,29 @@ files:
|
|
|
100
128
|
- lib/arx/entities/category.rb
|
|
101
129
|
- lib/arx/entities/link.rb
|
|
102
130
|
- lib/arx/entities/paper.rb
|
|
103
|
-
- lib/arx/
|
|
131
|
+
- lib/arx/error.rb
|
|
132
|
+
- lib/arx/inspector.rb
|
|
104
133
|
- lib/arx/query/query.rb
|
|
105
134
|
- lib/arx/query/validate.rb
|
|
106
135
|
- lib/arx/version.rb
|
|
107
|
-
homepage:
|
|
136
|
+
homepage: https://github.com/eonu/arx
|
|
108
137
|
licenses:
|
|
109
138
|
- MIT
|
|
110
|
-
metadata:
|
|
139
|
+
metadata:
|
|
140
|
+
source_code_uri: https://github.com/eonu/arx
|
|
141
|
+
homepage_uri: https://github.com/eonu/arx
|
|
142
|
+
documentation_uri: https://www.rubydoc.info/github/eonu/arx/master/toplevel
|
|
143
|
+
bug_tracker_uri: https://github.com/eonu/arx/issues
|
|
144
|
+
changelog_uri: https://github.com/eonu/arx/blob/master/CHANGELOG.md
|
|
111
145
|
post_install_message:
|
|
112
146
|
rdoc_options: []
|
|
113
147
|
require_paths:
|
|
114
148
|
- lib
|
|
115
149
|
required_ruby_version: !ruby/object:Gem::Requirement
|
|
116
150
|
requirements:
|
|
117
|
-
- - "
|
|
151
|
+
- - "~>"
|
|
118
152
|
- !ruby/object:Gem::Version
|
|
119
|
-
version: '
|
|
153
|
+
version: '2.5'
|
|
120
154
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
121
155
|
requirements:
|
|
122
156
|
- - ">="
|
data/lib/arx/exceptions.rb
DELETED
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
module Arx
|
|
2
|
-
|
|
3
|
-
# Custom error for missing links on an arXiv paper.
|
|
4
|
-
class MissingLinkError < StandardError
|
|
5
|
-
def initialize(link_type)
|
|
6
|
-
super "This arXiv paper does not have a #{link_type} link"
|
|
7
|
-
end
|
|
8
|
-
end
|
|
9
|
-
|
|
10
|
-
# Custom error for missing fields on an arXiv paper.
|
|
11
|
-
class MissingFieldError < StandardError
|
|
12
|
-
def initialize(field)
|
|
13
|
-
super "This arXiv paper is missing the `#{field}` field"
|
|
14
|
-
end
|
|
15
|
-
end
|
|
16
|
-
|
|
17
|
-
# Custom error for missing arXiv papers.
|
|
18
|
-
class MissingPaper < StandardError
|
|
19
|
-
def initialize(id)
|
|
20
|
-
super "Couldn't find an arXiv paper with ID: #{id}"
|
|
21
|
-
end
|
|
22
|
-
end
|
|
23
|
-
end
|