arxiv 0.1.11 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/arxiv/version.rb +1 -1
- data/lib/arxiv.rb +13 -2
- data/spec/arxiv/arxiv_spec.rb +31 -0
- data/spec/arxiv/models/category_spec.rb +3 -2
- data/spec/arxiv/models/link_spec.rb +2 -2
- metadata +5 -5
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: f96a4cbb6849b0e5cdefefc3e4b6aa5669bed2dbdd0cfe5897c2a3ee0976574a
|
|
4
|
+
data.tar.gz: eed04e2c3b94e36b776cb78847004cb9fb4f26a76a1440211fbe56ae069a867f
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: e692653d2f1c2e34508885271d46f60a392c3f0040518372158b30d6f8ae22a339228e75d4aa1a8576f098cbcbb03ef6e34cc31cca3533cf2a7d8d1915bbe672
|
|
7
|
+
data.tar.gz: 8c2e810da1f736972298073c1e5fbf74334217fb602d405f4d78f13773bf82c8b5635a0d7a95518ff216f93b16fe4738af8b0d0bd12b46134f245cef589d92df
|
data/lib/arxiv/version.rb
CHANGED
data/lib/arxiv.rb
CHANGED
|
@@ -42,12 +42,12 @@ module Arxiv
|
|
|
42
42
|
response = ::Nokogiri::XML(URI.open(url)).remove_namespaces!
|
|
43
43
|
manuscript = Arxiv::Manuscript.parse(response.to_s, single: id)
|
|
44
44
|
|
|
45
|
-
raise Arxiv::Error::ManuscriptNotFound, "Manuscript #{id} doesn't exist on arXiv" if manuscript
|
|
45
|
+
raise Arxiv::Error::ManuscriptNotFound, "Manuscript #{id} doesn't exist on arXiv" if manuscript&.title.nil?
|
|
46
46
|
manuscript
|
|
47
47
|
end
|
|
48
48
|
|
|
49
49
|
def self.parse_arxiv_identifier(identifier)
|
|
50
|
-
if valid_id?(identifier)
|
|
50
|
+
id = if valid_id?(identifier)
|
|
51
51
|
identifier
|
|
52
52
|
elsif valid_url?(identifier)
|
|
53
53
|
format = legacy_url?(identifier) ? LEGACY_URL_FORMAT : CURRENT_URL_FORMAT
|
|
@@ -55,9 +55,20 @@ module Arxiv
|
|
|
55
55
|
else
|
|
56
56
|
identifier # probably an error
|
|
57
57
|
end
|
|
58
|
+
|
|
59
|
+
normalize_legacy_id(id)
|
|
58
60
|
end
|
|
59
61
|
private_class_method :parse_arxiv_identifier
|
|
60
62
|
|
|
63
|
+
# In April 2007, arxiv dropped the subject-class suffix from legacy identifiers
|
|
64
|
+
# (e.g. `math.DG/0510097` became `math/0510097`). The website still 301-redirects
|
|
65
|
+
# the old form, but the API at /api/query?id_list=math.DG/0510097 silently returns
|
|
66
|
+
# no results. Normalize so callers can pass either form.
|
|
67
|
+
def self.normalize_legacy_id(id)
|
|
68
|
+
id.sub(/\A([^.\/]+)\.[^\/]+\//, '\1/')
|
|
69
|
+
end
|
|
70
|
+
private_class_method :normalize_legacy_id
|
|
71
|
+
|
|
61
72
|
def self.valid_id?(identifier)
|
|
62
73
|
identifier =~ ID_FORMAT || identifier =~ LEGACY_ID_FORMAT
|
|
63
74
|
end
|
data/spec/arxiv/arxiv_spec.rb
CHANGED
|
@@ -51,4 +51,35 @@ module Arxiv
|
|
|
51
51
|
end
|
|
52
52
|
|
|
53
53
|
end
|
|
54
|
+
|
|
55
|
+
# NOTE: This describe block tests a private method (via `.send`) which is
|
|
56
|
+
# unusual for this codebase. It's here to make the regex's behavior visible
|
|
57
|
+
# at PR-review time across the various legacy ID forms arxiv has historically
|
|
58
|
+
# used. Feel free to drop this block — the behavior is also covered indirectly
|
|
59
|
+
# by the legacy-id specs above.
|
|
60
|
+
describe "normalize_legacy_id (private)" do
|
|
61
|
+
cases = {
|
|
62
|
+
# legacy with subject class -> stripped to bare archive
|
|
63
|
+
"math.DG/0510097" => "math/0510097",
|
|
64
|
+
"math.DG/0510097v1" => "math/0510097v1",
|
|
65
|
+
"cond-mat.dis-nn/9912001" => "cond-mat/9912001",
|
|
66
|
+
|
|
67
|
+
# already-canonical legacy -> unchanged
|
|
68
|
+
"math/0510097" => "math/0510097",
|
|
69
|
+
"math/0510097v1" => "math/0510097v1",
|
|
70
|
+
"cs/0002001" => "cs/0002001",
|
|
71
|
+
"cond-mat/9912001" => "cond-mat/9912001",
|
|
72
|
+
|
|
73
|
+
# current id format (no slash) -> unchanged
|
|
74
|
+
"1202.0819" => "1202.0819",
|
|
75
|
+
"1202.0819v1" => "1202.0819v1",
|
|
76
|
+
"1509.06369" => "1509.06369",
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
cases.each do |input, expected|
|
|
80
|
+
it "#{input.inspect} -> #{expected.inspect}" do
|
|
81
|
+
expect(Arxiv.send(:normalize_legacy_id, input)).to eql(expected)
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
end
|
|
54
85
|
end
|
|
@@ -4,7 +4,6 @@ module Arxiv
|
|
|
4
4
|
describe Category do
|
|
5
5
|
before(:all) do
|
|
6
6
|
@category = Arxiv.get('1202.0819').primary_category
|
|
7
|
-
@legacy_category = Arxiv.get('math.DG/0510097v1').categories.last
|
|
8
7
|
end
|
|
9
8
|
|
|
10
9
|
describe "abbreviation" do
|
|
@@ -25,7 +24,9 @@ module Arxiv
|
|
|
25
24
|
end
|
|
26
25
|
|
|
27
26
|
it "should return only the abbreviation when a description cannot be found (e.g. MSC classes)"do
|
|
28
|
-
|
|
27
|
+
category = Category.new
|
|
28
|
+
category.abbreviation = "58D15 (Primary); 58B10 (Secondary)"
|
|
29
|
+
expect(category.long_description).to eql("58D15 (Primary); 58B10 (Secondary)")
|
|
29
30
|
end
|
|
30
31
|
end
|
|
31
32
|
|
|
@@ -2,7 +2,7 @@ require 'spec_helper'
|
|
|
2
2
|
|
|
3
3
|
module Arxiv
|
|
4
4
|
describe Link do
|
|
5
|
-
before(:all) { @link = Arxiv.get('1202.0819').links.
|
|
5
|
+
before(:all) { @link = Arxiv.get('1202.0819').links.find { |l| l.content_type == 'application/pdf' } }
|
|
6
6
|
|
|
7
7
|
describe "content_type" do
|
|
8
8
|
it "should fetch the link's content type" do
|
|
@@ -12,7 +12,7 @@ module Arxiv
|
|
|
12
12
|
|
|
13
13
|
describe "url" do
|
|
14
14
|
it "should fetch the link's url" do
|
|
15
|
-
expect(@link.url).to eql('
|
|
15
|
+
expect(@link.url).to eql('https://arxiv.org/pdf/1202.0819v1')
|
|
16
16
|
end
|
|
17
17
|
end
|
|
18
18
|
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: arxiv
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.3.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Scholastica
|
|
8
|
-
autorequire:
|
|
8
|
+
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2026-05-13 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: happymapper
|
|
@@ -134,7 +134,7 @@ homepage: https://github.com/scholastica/arxiv
|
|
|
134
134
|
licenses:
|
|
135
135
|
- MIT
|
|
136
136
|
metadata: {}
|
|
137
|
-
post_install_message:
|
|
137
|
+
post_install_message:
|
|
138
138
|
rdoc_options: []
|
|
139
139
|
require_paths:
|
|
140
140
|
- lib
|
|
@@ -150,7 +150,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
150
150
|
version: '0'
|
|
151
151
|
requirements: []
|
|
152
152
|
rubygems_version: 3.4.10
|
|
153
|
-
signing_key:
|
|
153
|
+
signing_key:
|
|
154
154
|
specification_version: 4
|
|
155
155
|
summary: Ruby wrapper accessing the arXiv API
|
|
156
156
|
test_files: []
|