arx 0.1.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +125 -0
- data/Gemfile +1 -1
- data/LICENSE +1 -1
- data/README.md +407 -1
- data/Rakefile +60 -2
- data/arx.gemspec +15 -2
- data/lib/arx/categories.rb +10 -0
- data/lib/arx/cleaner.rb +45 -5
- data/lib/arx/entities/author.rb +4 -5
- data/lib/arx/entities/category.rb +4 -4
- data/lib/arx/entities/link.rb +1 -2
- data/lib/arx/entities/paper.rb +43 -29
- data/lib/arx/error.rb +27 -0
- data/lib/arx/inspector.rb +42 -0
- data/lib/arx/query/query.rb +42 -45
- data/lib/arx/query/validate.rb +4 -24
- data/lib/arx/version.rb +2 -2
- data/lib/arx.rb +62 -20
- metadata +46 -12
- data/lib/arx/exceptions.rb +0 -23
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 71eb1bee2ff468ea9327736e613e40b414c2b630d4f21b148da648500af3a47e
|
|
4
|
+
data.tar.gz: 8592a476d3abbeedfe2bef11637498b551fd478d643c5985a7c6a119aa79ca80
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: c9d708bd3f7d244f8557da0c9dba42d66c9a344a8f0316463879e76cc385fcf895c4c4089a5340543e166d771958d975fac685d27908755be0575e97f70625c2
|
|
7
|
+
data.tar.gz: 382f4ec892499c9e3b062693755aaa4af6c86790a830aaee39a2435d8af902d7e6bdcfc91712eaa539ef3e066c6a6ae04c151327951477194b0ca722848b0d2c
|
data/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,128 @@
|
|
|
1
|
+
# 1.1.0
|
|
2
|
+
|
|
3
|
+
#### Major changes
|
|
4
|
+
|
|
5
|
+
- Change `bundler` requirement to `>= 1.17` in `arx.gemspec`. ([#53](https://github.com/eonu/arx/pull/53))
|
|
6
|
+
- Remove `Arx.find` alias of `Arx.search`. ([#57](https://github.com/eonu/arx/pull/57))
|
|
7
|
+
- Add `Query#group` for subquery grouping support. ([#59](https://github.com/eonu/arx/pull/59))
|
|
8
|
+
|
|
9
|
+
#### Minor changes
|
|
10
|
+
|
|
11
|
+
- Add contributing guidelines (`CONTRIBUTING.md`). ([#48](https://github.com/eonu/arx/pull/48))
|
|
12
|
+
- Add issue templates to `./github/ISSUE_TEMPLATE` for ([#49](https://github.com/eonu/arx/pull/49), [#54](https://github.com/eonu/arx/pull/54), [#55](https://github.com/eonu/arx/pull/55)):
|
|
13
|
+
- **Error or warning**<br>For reporting an error or warning generated by Arx.
|
|
14
|
+
- **Unexpected or incorrect functionality**<br>For reporting something that doesn't seem to be working correctly or is unexpected.
|
|
15
|
+
- **Improvement to an existing feature**<br>For suggesting an improvement to a feature already offered by Arx.
|
|
16
|
+
- **Suggesting a new feature**<br>For proposing a new feature to Arx that would be beneficial.
|
|
17
|
+
- Add a pull request template at `./github/PULL_REQUEST_TEMPLATE.md`. ([#49](https://github.com/eonu/arx/pull/49))
|
|
18
|
+
- Remove issue templates from `CONTRIBUTING.md`. ([#49](https://github.com/eonu/arx/pull/49))
|
|
19
|
+
- Remove `LICENSE` from YARD documentation (remove from `.yardopts`). ([#50](https://github.com/eonu/arx/pull/50))
|
|
20
|
+
- Add RVM ruby version `2.6` to `.travis.yml`. ([#53](https://github.com/eonu/arx/pull/53))
|
|
21
|
+
- Add contributor code-of-conduct (`CODE_OF_CONDUCT.md`). ([#56](https://github.com/eonu/arx/pull/56))
|
|
22
|
+
- Thank Scholastica in `README.md`. ([#58](https://github.com/eonu/arx/pull/58))
|
|
23
|
+
- Add `bin/console` for gem debugging. ([#60](https://github.com/eonu/arx/pull/60))
|
|
24
|
+
- Modify `gem:debug` rake task to run `bin/console`. ([#60](https://github.com/eonu/arx/pull/60))
|
|
25
|
+
|
|
26
|
+
# 1.0.1
|
|
27
|
+
|
|
28
|
+
#### Major changes
|
|
29
|
+
|
|
30
|
+
- Add cases to handle `nil` query returns. ([#45](https://github.com/eonu/arx/pull/45))
|
|
31
|
+
- Add support for the `coveralls` gem (`.coveralls.yml` configuration file). ([#42](https://github.com/eonu/arx/pull/42))
|
|
32
|
+
|
|
33
|
+
#### Minor changes
|
|
34
|
+
|
|
35
|
+
- Add code coverage badge to `README.md`. ([#42](https://github.com/eonu/arx/pull/42))
|
|
36
|
+
- Remove documentation badge from top of `README.md`. ([#42](https://github.com/eonu/arx/pull/42))
|
|
37
|
+
- Change author email from `ed@mail.eonu.net` to `ed@eonu.net`. ([#43](https://github.com/eonu/arx/pull/43))
|
|
38
|
+
- Change `ends_with_connective?` to `end_with_connective?` to follow typical Ruby patterns. ([#44](https://github.com/eonu/arx/pull/44))
|
|
39
|
+
- Add `/coverage/` directory to `.gitignore`. ([#45](https://github.com/eonu/arx/pull/45))
|
|
40
|
+
- Remove version numbers from paper identifiers in error message in `README.md`. ([#46](https://github.com/eonu/arx/pull/46))
|
|
41
|
+
|
|
42
|
+
# 1.0.0
|
|
43
|
+
|
|
44
|
+
#### Major changes
|
|
45
|
+
|
|
46
|
+
- Change `Query` connective instance methods ([#38](https://github.com/eonu/arx/pull/38)):
|
|
47
|
+
- `#&` -> `#and`
|
|
48
|
+
- `#|` -> `#or`
|
|
49
|
+
- `#!` -> `#and_not`
|
|
50
|
+
- Split version number from paper identifier in `Paper` (add `version` key-word argument to `#id` and `#url`, and add `#version`). ([#39](https://github.com/eonu/arx/pull/39))
|
|
51
|
+
- Add `Cleaner.extract_id` and `Cleaner.extract_version`. ([#39](https://github.com/eonu/arx/pull/39))
|
|
52
|
+
- Make `Query#add_connective` always return `self`. ([#40](https://github.com/eonu/arx/pull/40))
|
|
53
|
+
- Redefine `Arx.search` to user `Paper.parse`'s `search` key-word argument. ([#40](https://github.com/eonu/arx/pull/40))
|
|
54
|
+
- Implement all tests. ([#40](https://github.com/eonu/arx/pull/40))
|
|
55
|
+
|
|
56
|
+
#### Minor changes
|
|
57
|
+
|
|
58
|
+
- Change declared regular expression literals from `%r""` to standard `//`. ([#39](https://github.com/eonu/arx/pull/39))
|
|
59
|
+
- Remove `#extract_id` from `Query` and use `Cleaner.extract_id` instead. ([#39](https://github.com/eonu/arx/pull/39))
|
|
60
|
+
- Redefine `Paper#revision?` to use the new `#version` instead of `#updated_at` and`#published_at`. ([#39](https://github.com/eonu/arx/pull/39))
|
|
61
|
+
|
|
62
|
+
# 0.3.2
|
|
63
|
+
|
|
64
|
+
#### Major changes
|
|
65
|
+
|
|
66
|
+
- Add `Paper#category` alias for `Paper#primary_category`. ([#34](https://github.com/eonu/arx/pull/34))
|
|
67
|
+
- Change `Author#affiliations?` to `Author#affiliated?`. ([#34](https://github.com/eonu/arx/pull/34))
|
|
68
|
+
- Change `Paper#last_updated` to `Paper#updated_at` (and remove `updated_at` alias). ([#34](https://github.com/eonu/arx/pull/34))
|
|
69
|
+
- Change `Paper#publish_date` to `Paper#published_at` (and remove `published_at` alias). ([#34](https://github.com/eonu/arx/pull/34))
|
|
70
|
+
- Conditionally assign query object in `Arx.search` with `||=` operator. ([#33](https://github.com/eonu/arx/pull/33))
|
|
71
|
+
- Add `gem:debug` rake task for loading the gem into an interactive console. ([#28](https://github.com/eonu/arx/pull/28))
|
|
72
|
+
- Add `gem:release` rake task for preparing gem releases. ([#36](https://github.com/eonu/arx/pull/36))
|
|
73
|
+
- Add `thor` gem development dependency. ([#36](https://github.com/eonu/arx/pull/36))
|
|
74
|
+
|
|
75
|
+
#### Minor changes
|
|
76
|
+
|
|
77
|
+
- Update documentation links to `rubydoc.info`'s GitHub service. ([#30](https://github.com/eonu/arx/pull/30))
|
|
78
|
+
- Add email address to `LICENSE`. ([#31](https://github.com/eonu/arx/pull/31))
|
|
79
|
+
- Improve `Error::MissingField` and `Error::MissingLink` error messages. ([#35](https://github.com/eonu/arx/pull/35))
|
|
80
|
+
|
|
81
|
+
# 0.3.1
|
|
82
|
+
|
|
83
|
+
#### Major changes
|
|
84
|
+
|
|
85
|
+
- Add `.yardopts` for document generation configuration. ([#26](https://github.com/eonu/arx/pull/26))
|
|
86
|
+
- Namespace errors in `Arx::Error` module and remove `Error` prefix from error classes. ([#26](https://github.com/eonu/arx/pull/26))
|
|
87
|
+
- Move identifier format regular expression constant definitions from `Arx::Validate` to top-level namespace `Arx`. ([#26](https://github.com/eonu/arx/pull/26))
|
|
88
|
+
|
|
89
|
+
#### Minor changes
|
|
90
|
+
|
|
91
|
+
- Rename `lib/arx/exceptions.rb` to `lib/arx/errors.rb`. ([#26](https://github.com/eonu/arx/pull/26))
|
|
92
|
+
- Make `Arx::Cleaner`, `Arx::Validate`, `Arx::Inspector`, `Arx::Link` private (hidden from `yard` documentation). ([#26](https://github.com/eonu/arx/pull/26))
|
|
93
|
+
|
|
94
|
+
# 0.3.0
|
|
95
|
+
|
|
96
|
+
#### Major changes
|
|
97
|
+
|
|
98
|
+
- Add documentation, images, installation and usage instructions to `README.md`. ([#22](https://github.com/eonu/arx/pull/22), [#17](https://github.com/eonu/arx/pull/17))
|
|
99
|
+
- Allow prior construction of a search query in `Arx.search`. ([#18](https://github.com/eonu/arx/pull/18))
|
|
100
|
+
- Fix `Arx.search` query object yielding. ([#20](https://github.com/eonu/arx/pull/20))
|
|
101
|
+
|
|
102
|
+
#### Minor changes
|
|
103
|
+
|
|
104
|
+
- Remove conditional with `block_given?` in `Arx()` method. ([#16](https://github.com/eonu/arx/pull/16))
|
|
105
|
+
- Remove leading ampersand (&) from search query string. ([#19](https://github.com/eonu/arx/pull/19))
|
|
106
|
+
- Add base paper categories and more aliases. ([#21](https://github.com/eonu/arx/pull/21))
|
|
107
|
+
|
|
108
|
+
# 0.2.0
|
|
109
|
+
|
|
110
|
+
#### Major changes
|
|
111
|
+
|
|
112
|
+
- Flatten provided values in `Arx::Paper`'s field instance methods (allow an array as the `values` splat parameter). ([#5](https://github.com/eonu/arx/pull/5))
|
|
113
|
+
- Add `Arx.find` and `Arx.get` as aliases for `Arx.search`. ([#6](https://github.com/eonu/arx/pull/6), [#8](https://github.com/eonu/arx/pull/8))
|
|
114
|
+
|
|
115
|
+
#### Minor changes
|
|
116
|
+
|
|
117
|
+
- Add `homepage` and `metadata` fields to `arx.gemspec`. ([#1](https://github.com/eonu/arx/pull/1), [#14](https://github.com/eonu/arx/pull/14))
|
|
118
|
+
- Specify required ruby version (`~> 2.5`) in `arx.gemspec`. ([#2](https://github.com/eonu/arx/pull/2))
|
|
119
|
+
- Add badges to `README.md`. ([#3](https://github.com/eonu/arx/pull/3), [#9](https://github.com/eonu/arx/pull/9))
|
|
120
|
+
- Fix documentation for `Arx::Paper`'s field methods `exact` argument. ([#4](https://github.com/eonu/arx/pull/4))
|
|
121
|
+
- Update documentation links in `arx.gemspec` and `README.md`. ([#7](https://github.com/eonu/arx/pull/7))
|
|
122
|
+
- Remove newline from end of `Gemfile`. ([#11](https://github.com/eonu/arx/pull/11))
|
|
123
|
+
- Add ruby-head version to RVM rubies in `.travis.yml`. ([#12](https://github.com/eonu/arx/pull/12))
|
|
124
|
+
- Remove unnecessary git-ignored files. ([#13](https://github.com/eonu/arx/pull/13), [#10](https://github.com/eonu/arx/pull/10))
|
|
125
|
+
|
|
1
126
|
# 0.1.0
|
|
2
127
|
|
|
3
128
|
Initial commit! 🎉
|
data/Gemfile
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
source 'https://rubygems.org'
|
|
2
|
-
gemspec
|
|
2
|
+
gemspec
|
data/LICENSE
CHANGED
data/README.md
CHANGED
|
@@ -1,3 +1,409 @@
|
|
|
1
1
|
# Arx
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
<img src="https://upload.wikimedia.org/wikipedia/commons/thumb/a/a8/ArXiv_web.svg/1200px-ArXiv_web.svg.png" width="15%" align="right"></img>
|
|
4
|
+
|
|
5
|
+
[](https://github.com/eonu/arx/blob/503a1c95ac450dbc20623491060c3fc32d213627/arx.gemspec#L19)
|
|
6
|
+
[](https://rubygems.org/gems/arx)
|
|
7
|
+
[](https://github.com/eonu/arx/blob/master/LICENSE)
|
|
8
|
+
|
|
9
|
+
[](https://codeclimate.com/github/eonu/arx/maintainability)
|
|
10
|
+
[](https://travis-ci.com/eonu/arx)
|
|
11
|
+
[](https://coveralls.io/github/eonu/arx?branch=feature%2Fcoveralls)
|
|
12
|
+
|
|
13
|
+
**A Ruby interface for querying academic papers on the arXiv search API.**
|
|
14
|
+
|
|
15
|
+
<img src="https://i.ibb.co/19Djpzk/arxiv.png" width="25%" align="left"></img>
|
|
16
|
+
|
|
17
|
+
> arXiv is an e-print service in the fields of physics, mathematics, non-linear science, computer science, quantitative biology, quantitative finance and statistics.
|
|
18
|
+
|
|
19
|
+
---
|
|
20
|
+
|
|
21
|
+
[arXiv](https://arxiv.org/) provides an advanced search utility (shown left) on their website, as well as an extensive [search API](https://arxiv.org/help/api) that allows for the external querying of academic papers hosted on their website.
|
|
22
|
+
|
|
23
|
+
Although [Scholastica](https://github.com/scholastica) offer a great [Ruby gem](https://github.com/scholastica/arxiv) for retrieving papers from arXiv through the search API, this gem is only intended for retrieving one paper at a time, and only supports searching for paper by ID.
|
|
24
|
+
|
|
25
|
+
*Arx is a gem that allows for quick and easy querying of the arXiv search API, without having to worry about manually writing your own search query strings or parse the resulting XML query response to find the data you need.*
|
|
26
|
+
|
|
27
|
+
## Example
|
|
28
|
+
|
|
29
|
+
Suppose we wish to search for:
|
|
30
|
+
|
|
31
|
+
> Papers in the `cs.FL` (Formal Languages and Automata Theory) category whose title contains `"Buchi Automata"`, not authored by `Tomáš Babiak`, sorted by submission date (latest first).
|
|
32
|
+
|
|
33
|
+
This query can be executed with the following code:
|
|
34
|
+
|
|
35
|
+
```ruby
|
|
36
|
+
require 'arx'
|
|
37
|
+
|
|
38
|
+
papers = Arx(sort_by: :date_submitted) do |query|
|
|
39
|
+
query.category('cs.FL')
|
|
40
|
+
query.title('Buchi Automata').and_not.author('Tomáš Babiak')
|
|
41
|
+
end
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Features
|
|
45
|
+
|
|
46
|
+
- Ruby classes `Arx::Paper`, `Arx::Author` and `Arx::Category` that wrap the resulting Atom XML query result from the search API.
|
|
47
|
+
- Supports querying by a paper's ID, title, author(s), abstract, subject category, comment, journal reference, or report number.
|
|
48
|
+
- Provides a small embedded DSL for writing queries.
|
|
49
|
+
- Supports searching fields by exact match.
|
|
50
|
+
|
|
51
|
+
## Installation
|
|
52
|
+
|
|
53
|
+
To install Arx, run the following in your terminal:
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
$ gem install arx
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
## Documentation
|
|
60
|
+
|
|
61
|
+
The documentation for Arx is hosted on [](https://www.rubydoc.info/github/eonu/arx/master/toplevel).
|
|
62
|
+
|
|
63
|
+
## Contributing
|
|
64
|
+
|
|
65
|
+
All contributions to Arx are greatly appreciated. Contribution guidelines can be found [here](/CONTRIBUTING.md).
|
|
66
|
+
|
|
67
|
+
## Usage
|
|
68
|
+
|
|
69
|
+
Before you start using Arx, you'll have to ensure that the gem is required (either in your current working file, or shell such as [IRB](https://en.wikipedia.org/wiki/Interactive_Ruby_Shell)):
|
|
70
|
+
|
|
71
|
+
```ruby
|
|
72
|
+
require 'arx'
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
### Building search queries
|
|
76
|
+
|
|
77
|
+
Query requests submitted to the arXiv search API are typically of the following form (where the query string is indicated in bold):
|
|
78
|
+
|
|
79
|
+
[http://export.arxiv.org/api/query?**search_query=ti:%22Buchi+Automata%22+AND+cat:%22cs.FL%22**](http://export.arxiv.org/api/query?search_query=ti:%22Buchi+Automata%22+AND+cat:%22cs.FL%22)
|
|
80
|
+
|
|
81
|
+
> This particular query searches for papers whose title includes the string `Buchi Automata`, and are in the *Formal Languages and Automata Theory* (`cs.FL`) category.
|
|
82
|
+
|
|
83
|
+
Obviously writing out queries like this can quickly become time-consuming and tedious.
|
|
84
|
+
|
|
85
|
+
---
|
|
86
|
+
|
|
87
|
+
The `Arx::Query` class provides a small embedded DSL for writing these query strings.
|
|
88
|
+
|
|
89
|
+
#### Sorting criteria and order
|
|
90
|
+
|
|
91
|
+
The order in which search results are returned can be modified through the `sort_by` and `sort_order` keyword arguments (in the `Arx::Query` initializer):
|
|
92
|
+
|
|
93
|
+
- `sort_by` accepts the symbols: `:relevance`, `:last_updated` or `:date_submitted`
|
|
94
|
+
|
|
95
|
+
- `sort_order` accepts the symbols: `:ascending` or `:descending`
|
|
96
|
+
|
|
97
|
+
```ruby
|
|
98
|
+
# Sort by submission date in ascending order (earliest first)
|
|
99
|
+
Arx::Query.new(sort_by: :date_submitted, sort_order: :ascending)
|
|
100
|
+
#=> sortBy=submittedDate&sortOrder=ascending
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
**Note**: The default setting is to sort by `:relevance` in `:descending` order:
|
|
104
|
+
|
|
105
|
+
```ruby
|
|
106
|
+
Arx::Query.new #=> sortBy=relevance&sortOrder=descending
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
#### Searching by ID
|
|
110
|
+
|
|
111
|
+
The arXiv search API doesn't only support searching for papers by metadata fields, but also by ID. When searching by ID, a different URL query string parameter `id_list` is used (instead of `search_query` as seen before).
|
|
112
|
+
|
|
113
|
+
Although the `id_list` can be used to *"search by ID"*, it is better to **think of it as restricting the search space to the papers with the provided IDs**:
|
|
114
|
+
|
|
115
|
+
| `search_query` present? | `id_list` present? | Returns |
|
|
116
|
+
| ----------------------- | ------------------ | ---------------------------------------------------- |
|
|
117
|
+
| Yes | No | Articles that match `search_query` |
|
|
118
|
+
| No | Yes | Articles that are in `id_list` |
|
|
119
|
+
| Yes | Yes | Articles in `id_list` that also match `search_query` |
|
|
120
|
+
|
|
121
|
+
To search by ID, simply pass the arXiv paper identifiers (ID) or URLs into the `Arx::Query` initializer method:
|
|
122
|
+
|
|
123
|
+
```ruby
|
|
124
|
+
Arx::Query.new('https://arxiv.org/abs/1711.05738', '1809.09415')
|
|
125
|
+
#=> sortBy=relevance&sortOrder=descending&id_list=1711.05738,1809.09415
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
#### Searching by metadata fields
|
|
129
|
+
|
|
130
|
+
The arXiv search API supports searches for the following paper metadata fields:
|
|
131
|
+
|
|
132
|
+
```ruby
|
|
133
|
+
FIELDS = {
|
|
134
|
+
title: 'ti', # Title
|
|
135
|
+
author: 'au', # Author
|
|
136
|
+
abstract: 'abs', # Abstract
|
|
137
|
+
comment: 'co', # Comment
|
|
138
|
+
journal: 'jr', # Journal reference
|
|
139
|
+
category: 'cat', # Subject category
|
|
140
|
+
report: 'rn', # Report number
|
|
141
|
+
all: 'all' # All (of the above)
|
|
142
|
+
}
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
Each of these fields has an instance method defined under the `Arx::Query` class. For example:
|
|
146
|
+
|
|
147
|
+
```ruby
|
|
148
|
+
# Papers whose title contains the string "Buchi Automata".
|
|
149
|
+
q = Arx::Query.new
|
|
150
|
+
q.title('Buchi Automata')
|
|
151
|
+
#=> sortBy=relevance&sortOrder=descending&search_query=ti:%22Buchi+Automata%22
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
##### Exact matches
|
|
155
|
+
|
|
156
|
+
By default, this searches for exact matches of the provided string (by adding double quotes around the string - in the query string, this is represented by the `%22`s). To disable this, you can use the `exact` keyword argument (which defaults to `true`):
|
|
157
|
+
|
|
158
|
+
```ruby
|
|
159
|
+
# Papers whose title contains either the words "Buchi" or "Automata".
|
|
160
|
+
q = Arx::Query.new
|
|
161
|
+
q.title('Buchi Automata', exact: false)
|
|
162
|
+
#=> sortBy=relevance&sortOrder=descending&search_query=ti:Buchi+Automata
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
##### Multiple values for one field
|
|
166
|
+
|
|
167
|
+
Sometimes you might want to provide multiple field values to search for a paper by. This can simply be done by adding them as another argument (or providing an `Array`):
|
|
168
|
+
|
|
169
|
+
**Note**: The default logical connective used when there are multiple values for one field is `and`.
|
|
170
|
+
|
|
171
|
+
```ruby
|
|
172
|
+
# Papers authored by both "Eleonora Andreotti" and "Dominik Edelmann".
|
|
173
|
+
q = Arx::Query.new
|
|
174
|
+
q.author('Eleonora Andreotti', 'Dominik Edelmann')
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
To change the logical connective to `or` or `not` (and not), use the `connective` keyword argument:
|
|
178
|
+
|
|
179
|
+
```ruby
|
|
180
|
+
# Papers authored by either "Eleonora Andreotti" or "Dominik Edelmann".
|
|
181
|
+
q = Arx::Query.new
|
|
182
|
+
q.author('Eleonora Andreotti', 'Dominik Edelmann', connective: :or)
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
```ruby
|
|
186
|
+
# Papers authored by "Eleonora Andreotti" and not "Dominik Edelmann".
|
|
187
|
+
q = Arx::Query.new
|
|
188
|
+
q.author('Eleonora Andreotti', 'Dominik Edelmann', connective: :and_not)
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
#### Chaining subqueries (logical connectives)
|
|
192
|
+
|
|
193
|
+
**Note**: By default, subqueries (successive instance method calls) are chained with a logical `and` connective.
|
|
194
|
+
|
|
195
|
+
```ruby
|
|
196
|
+
# Papers authored by "Dominik Edelmann" in the "Numerical Analysis" (math.NA) category.
|
|
197
|
+
q = Arx::Query.new
|
|
198
|
+
q.author('Dominik Edelmann')
|
|
199
|
+
q.category('math.NA')
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
To change the logical connective used to chain subqueries, use the `and`, `or`, `and_not` instance methods between the subquery calls:
|
|
203
|
+
|
|
204
|
+
```ruby
|
|
205
|
+
# Papers authored by "Eleonora Andreotti" in neither the "Numerical Analysis" (math.NA) or "Combinatorics (math.CO)" categories.
|
|
206
|
+
q = Arx::Query.new
|
|
207
|
+
q.author('Eleonora Andreotti')
|
|
208
|
+
q.and_not
|
|
209
|
+
q.category('math.NA', 'math.CO', connective: :or)
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
#### Grouping subqueries
|
|
213
|
+
|
|
214
|
+
Sometimes you'll have a query that requires nested or grouped logic, using parentheses. This can be done using the `Arx::Query#group` method.
|
|
215
|
+
|
|
216
|
+
This method accepts a block and basically parenthesises the result of whichever methods were called within the block.
|
|
217
|
+
|
|
218
|
+
For example, this will allow the last query from the previous section to be written as:
|
|
219
|
+
|
|
220
|
+
```ruby
|
|
221
|
+
# Papers authored by "Eleonora Andreotti" in neither the "Numerical Analysis" (math.NA) or "Combinatorics (math.CO)" categories.
|
|
222
|
+
q = Arx::Query.new
|
|
223
|
+
q.author('Eleonora Andreotti')
|
|
224
|
+
q.and_not
|
|
225
|
+
q.group do
|
|
226
|
+
q.category('math.NA').or.category('math.CO')
|
|
227
|
+
end
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
Another more complicated example with two grouped subqueries:
|
|
231
|
+
|
|
232
|
+
```ruby
|
|
233
|
+
# Papers whose title contains "Buchi Automata", either authored by "Tomáš Babiak", or in the "Formal Languages and Automata Theory (cs.FL)" category and not the "Computational Complexity (cs.CC)" category.
|
|
234
|
+
q = Arx::Query.new
|
|
235
|
+
q.title('Buchi Automata')
|
|
236
|
+
q.group do
|
|
237
|
+
q.author('Tomáš Babiak')
|
|
238
|
+
q.or
|
|
239
|
+
q.group do
|
|
240
|
+
q.category('cs.FL').and_not.category('cs.CC')
|
|
241
|
+
end
|
|
242
|
+
end
|
|
243
|
+
```
|
|
244
|
+
|
|
245
|
+
### Running search queries
|
|
246
|
+
|
|
247
|
+
Search queries can be executed with the `Arx()` method (alias of `Arx.search`). This method contains the same parameters as the `Arx::Query` initializer - including the list of IDs.
|
|
248
|
+
|
|
249
|
+
#### Without a predefined query
|
|
250
|
+
|
|
251
|
+
Calling the `Arx()` method with a block allows for the construction and execution of a new query.
|
|
252
|
+
|
|
253
|
+
**Note**: If running a search query this way, then the `sort_by` and `sort_order` parameters can be added as additional keyword arguments.
|
|
254
|
+
|
|
255
|
+
```ruby
|
|
256
|
+
# Papers in the cs.FL category whose title contains "Buchi Automata", not authored by Tomáš Babiak
|
|
257
|
+
results = Arx(sort_by: :date_submitted) do |query|
|
|
258
|
+
query.category('cs.FL')
|
|
259
|
+
query.title('Buchi Automata').and_not.author('Tomáš Babiak')
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
results.size #=> 18
|
|
263
|
+
```
|
|
264
|
+
|
|
265
|
+
#### With a predefined query
|
|
266
|
+
|
|
267
|
+
The `Arx()` method accepts a predefined `Arx::Query` object through the `query` keyword parameter.
|
|
268
|
+
|
|
269
|
+
**Note**: If using the `query` parameter, the `sort_by` and `sort_order` criteria should be defined in the `Arx::Query` object initializer rather than as arguments in `Arx()`.
|
|
270
|
+
|
|
271
|
+
```ruby
|
|
272
|
+
# Papers in the cs.FL category whose title contains "Buchi Automata", not authored by Tomáš Babiak
|
|
273
|
+
q = Arx::Query.new(sort_by: :date_submitted)
|
|
274
|
+
q.category('cs.FL')
|
|
275
|
+
q.title('Buchi Automata').and_not.author('Tomáš Babiak')
|
|
276
|
+
|
|
277
|
+
results = Arx(query: q)
|
|
278
|
+
results.size #=> 18
|
|
279
|
+
```
|
|
280
|
+
|
|
281
|
+
#### With IDs
|
|
282
|
+
|
|
283
|
+
The `Arx()` methods accepts a list of IDs as a splat parameter, just like the `Arx::Query` initializer.
|
|
284
|
+
|
|
285
|
+
If only one ID is specified, then a single `Arx::Paper` is returned:
|
|
286
|
+
|
|
287
|
+
```ruby
|
|
288
|
+
result = Arx('1809.09415')
|
|
289
|
+
result.class #=> Arx::Paper
|
|
290
|
+
```
|
|
291
|
+
|
|
292
|
+
Otherwise, an `Array` of `Arx::Paper`s is returned.
|
|
293
|
+
|
|
294
|
+
### Query results
|
|
295
|
+
|
|
296
|
+
Search results are typically:
|
|
297
|
+
|
|
298
|
+
- an `Array`, either empty if no papers matched the supplied query, or containing `Arx::Paper` objects.
|
|
299
|
+
- a single `Arx::Paper` object (when the search method is only supplied with one ID).
|
|
300
|
+
|
|
301
|
+
### Entities
|
|
302
|
+
|
|
303
|
+
The `Arx::Paper`, `Arx::Author` and `Arx::Category` classes provide a simple interface for the metadata concerning a single arXiv paper:
|
|
304
|
+
|
|
305
|
+
#### `Arx::Paper`
|
|
306
|
+
|
|
307
|
+
```ruby
|
|
308
|
+
paper = Arx('1809.09415')
|
|
309
|
+
#=> #<Arx::Paper:0x00007fb657b59bd0>
|
|
310
|
+
|
|
311
|
+
paper.id
|
|
312
|
+
#=> "1809.09415"
|
|
313
|
+
paper.id(version: true)
|
|
314
|
+
#=> "1809.09415v1"
|
|
315
|
+
paper.url
|
|
316
|
+
#=> "http://arxiv.org/abs/1809.09415"
|
|
317
|
+
paper.url(version: true)
|
|
318
|
+
#=> "http://arxiv.org/abs/1809.09415v1"
|
|
319
|
+
paper.version
|
|
320
|
+
#=> 1
|
|
321
|
+
paper.revision?
|
|
322
|
+
#=> false
|
|
323
|
+
|
|
324
|
+
paper.title
|
|
325
|
+
#=> "On finitely ambiguous Büchi automata"
|
|
326
|
+
paper.summary
|
|
327
|
+
#=> "Unambiguous B\\\"uchi automata, i.e. B\\\"uchi automata allowing..."
|
|
328
|
+
paper.authors
|
|
329
|
+
#=> [#<Arx::Author:0x00007fb657b63108>, #<Arx::Author:0x00007fb657b62438>]
|
|
330
|
+
|
|
331
|
+
# Paper's categories
|
|
332
|
+
paper.primary_category
|
|
333
|
+
#=> #<Arx::Category:0x00007fb657b61830>
|
|
334
|
+
paper.categories
|
|
335
|
+
#=> [#<Arx::Category:0x00007fb657b60e80>]
|
|
336
|
+
|
|
337
|
+
# Dates
|
|
338
|
+
paper.published_at
|
|
339
|
+
#=> #<DateTime: 2018-09-25T11:40:39+00:00 ((2458387j,42039s,0n),+0s,2299161j)>
|
|
340
|
+
paper.updated_at
|
|
341
|
+
#=> #<DateTime: 2018-09-25T11:40:39+00:00 ((2458387j,42039s,0n),+0s,2299161j)>
|
|
342
|
+
|
|
343
|
+
# Paper's comment
|
|
344
|
+
paper.comment?
|
|
345
|
+
#=> false
|
|
346
|
+
paper.comment
|
|
347
|
+
#=> Arx::Error::MissingField (arXiv paper 1809.09415 is missing the `comment` metadata field)
|
|
348
|
+
|
|
349
|
+
# Paper's journal reference
|
|
350
|
+
paper.journal?
|
|
351
|
+
#=> false
|
|
352
|
+
paper.journal
|
|
353
|
+
#=> Arx::Error::MissingField (arXiv paper 1809.09415 is missing the `journal` metadata field)
|
|
354
|
+
|
|
355
|
+
# Paper's PDF URL
|
|
356
|
+
paper.pdf?
|
|
357
|
+
#=> true
|
|
358
|
+
paper.pdf_url
|
|
359
|
+
#=> "http://arxiv.org/pdf/1809.09415v1"
|
|
360
|
+
|
|
361
|
+
# Paper's DOI (Digital Object Identifier) URL
|
|
362
|
+
paper.doi?
|
|
363
|
+
#=> true
|
|
364
|
+
paper.doi_url
|
|
365
|
+
#=> "http://dx.doi.org/10.1007/978-3-319-98654-8_41"
|
|
366
|
+
```
|
|
367
|
+
|
|
368
|
+
#### `Arx::Author`
|
|
369
|
+
|
|
370
|
+
```ruby
|
|
371
|
+
paper = Arx('cond-mat/9609089')
|
|
372
|
+
#=> #<Arx::Paper:0x00007fb657a7b8d0>
|
|
373
|
+
|
|
374
|
+
author = paper.authors.first
|
|
375
|
+
#=> #<Arx::Author:0x00007fb657a735e0>
|
|
376
|
+
|
|
377
|
+
author.name
|
|
378
|
+
#=> "F. Gebhard"
|
|
379
|
+
|
|
380
|
+
author.affiliated?
|
|
381
|
+
#=> true
|
|
382
|
+
author.affiliations
|
|
383
|
+
#=> ["ILL Grenoble, France"]
|
|
384
|
+
```
|
|
385
|
+
|
|
386
|
+
#### `Arx::Category`
|
|
387
|
+
|
|
388
|
+
```ruby
|
|
389
|
+
paper = Arx('cond-mat/9609089')
|
|
390
|
+
#=> #<Arx::Paper:0x00007fb657b59bd0>
|
|
391
|
+
|
|
392
|
+
category = paper.primary_category
|
|
393
|
+
#=> #<Arx::Category:0x00007fb6570609b8>
|
|
394
|
+
|
|
395
|
+
category.name
|
|
396
|
+
#=> "cond-mat"
|
|
397
|
+
category.full_name
|
|
398
|
+
#=> "Condensed Matter"
|
|
399
|
+
```
|
|
400
|
+
|
|
401
|
+
# Thanks
|
|
402
|
+
|
|
403
|
+
A large portion of this library is based on the brilliant work done by [Scholastica](https://github.com/scholastica) in their [`arxiv`](https://github.com/scholastica/arxiv) gem for retrieving individual papers from arXiv through the search API.
|
|
404
|
+
|
|
405
|
+
Arx was created mostly due to the seemingly inactive nature of Scholastica's repository. Additionally, it would have been infeasible to contribute such large changes to an already well-established gem, especially since https://scholasticahq.com/ appears to be dependent upon this gem.
|
|
406
|
+
|
|
407
|
+
---
|
|
408
|
+
|
|
409
|
+
Nevertheless, a special thanks goes out to Scholastica for providing the influence for Arx.
|
data/Rakefile
CHANGED
|
@@ -1,7 +1,65 @@
|
|
|
1
|
+
require 'thor'
|
|
1
2
|
require 'bundler/gem_tasks'
|
|
2
3
|
require 'rspec/core/rake_task'
|
|
4
|
+
RSpec::Core::RakeTask.new :spec
|
|
3
5
|
|
|
4
6
|
desc 'Run application specs'
|
|
5
|
-
|
|
7
|
+
task default: [:spec]
|
|
8
|
+
|
|
9
|
+
namespace :gem do
|
|
10
|
+
class T < Thor
|
|
11
|
+
include Thor::Actions
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
desc 'Debug the gem (load into IRB)'
|
|
15
|
+
task :debug do
|
|
16
|
+
exec 'bin/console'
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
desc 'Prepare a new gem release'
|
|
20
|
+
task :release, %i[major minor patch meta] do |task, args|
|
|
21
|
+
array = args.to_a
|
|
22
|
+
raise ArgumentError.new("Expected at least 3 SemVer segments, got #{array.size}") if array.size < 3
|
|
23
|
+
raise ArgumentError.new("Expected no more than 4 SemVer segments, got #{array.size}") if array.size > 4
|
|
24
|
+
args.to_h.each_with_index do |(segment, value), index|
|
|
25
|
+
next if index == array.size - 1 && array.size == 4
|
|
26
|
+
raise TypeError.new("Invalid #{segment} SemVer segment: #{value}") unless value == value.to_i.to_s
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
versions = args.to_h.transform_values {|v| v.to_i if Integer(v) rescue v}
|
|
30
|
+
versions[:meta] ||= nil
|
|
31
|
+
update_version versions
|
|
32
|
+
|
|
33
|
+
version = versions.compact.values.join('.')
|
|
34
|
+
add_changelog_entry version
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
private
|
|
38
|
+
|
|
39
|
+
def update_version(versions)
|
|
40
|
+
versions.each do |segment, value|
|
|
41
|
+
thor :gsub_file, File.join(__dir__, 'lib', 'arx', 'version.rb'), /#{segment}: .*,/, "#{segment}: #{value.inspect},"
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def add_changelog_entry(version)
|
|
46
|
+
thor :insert_into_file, File.join(__dir__, 'CHANGELOG.md'), after: /\A/ do
|
|
47
|
+
<<-ENTRY
|
|
48
|
+
# #{version}
|
|
49
|
+
|
|
50
|
+
#### Major changes
|
|
51
|
+
|
|
52
|
+
- TODO
|
|
53
|
+
|
|
54
|
+
#### Minor changes
|
|
55
|
+
|
|
56
|
+
- TODO
|
|
57
|
+
|
|
58
|
+
ENTRY
|
|
59
|
+
end
|
|
60
|
+
end
|
|
6
61
|
|
|
7
|
-
|
|
62
|
+
def thor(*args, &block)
|
|
63
|
+
T.new.send *args, &block
|
|
64
|
+
end
|
|
65
|
+
end
|
data/arx.gemspec
CHANGED
|
@@ -6,7 +6,8 @@ Gem::Specification.new do |spec|
|
|
|
6
6
|
spec.name = 'arx'
|
|
7
7
|
spec.version = Arx::VERSION
|
|
8
8
|
spec.authors = ['Edwin Onuonga']
|
|
9
|
-
spec.email = ['
|
|
9
|
+
spec.email = ['ed@eonu.net']
|
|
10
|
+
spec.homepage = 'https://github.com/eonu/arx'
|
|
10
11
|
|
|
11
12
|
spec.summary = %q{A Ruby interface for querying academic papers on the arXiv search API.}
|
|
12
13
|
spec.license = 'MIT'
|
|
@@ -15,10 +16,22 @@ Gem::Specification.new do |spec|
|
|
|
15
16
|
Gemfile LICENSE CHANGELOG.md README.md Rakefile arx.gemspec
|
|
16
17
|
]
|
|
17
18
|
|
|
19
|
+
spec.required_ruby_version = '~> 2.5'
|
|
20
|
+
|
|
18
21
|
spec.add_runtime_dependency 'nokogiri', '~> 1.10'
|
|
19
22
|
spec.add_runtime_dependency 'nokogiri-happymapper', '~> 0.8'
|
|
20
23
|
|
|
21
|
-
spec.add_development_dependency 'bundler', '
|
|
24
|
+
spec.add_development_dependency 'bundler', '>= 1.17'
|
|
22
25
|
spec.add_development_dependency 'rake', '~> 12.3'
|
|
26
|
+
spec.add_development_dependency 'thor', '~> 0.19.4'
|
|
23
27
|
spec.add_development_dependency 'rspec', '~> 3.7'
|
|
28
|
+
spec.add_development_dependency 'coveralls', '0.8.22'
|
|
29
|
+
|
|
30
|
+
spec.metadata = {
|
|
31
|
+
'source_code_uri' => spec.homepage,
|
|
32
|
+
'homepage_uri' => spec.homepage,
|
|
33
|
+
'documentation_uri' => 'https://www.rubydoc.info/github/eonu/arx/master/toplevel',
|
|
34
|
+
'bug_tracker_uri' => "#{spec.homepage}/issues",
|
|
35
|
+
'changelog_uri' => "#{spec.homepage}/blob/master/CHANGELOG.md"
|
|
36
|
+
}
|
|
24
37
|
end
|