arxiv 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/arxiv.rb +41 -11
- data/lib/arxiv/models/category.rb +1 -1
- data/lib/arxiv/models/manuscript.rb +10 -2
- data/lib/arxiv/string_scrubber.rb +1 -1
- data/lib/arxiv/version.rb +1 -1
- data/spec/arxiv/arxiv_spec.rb +25 -13
- data/spec/arxiv/models/category_spec.rb +8 -2
- data/spec/arxiv/models/manuscript_spec.rb +25 -5
- metadata +8 -8
data/lib/arxiv.rb
CHANGED
@@ -17,13 +17,26 @@ module Arxiv
|
|
17
17
|
class MalformedId < StandardError ; end
|
18
18
|
end
|
19
19
|
|
20
|
-
|
20
|
+
# In 2007, the ArXiv API changed document ID formats:
|
21
|
+
#
|
22
|
+
# http://arxiv.org/abs/math/0510097v1 (legacy)
|
23
|
+
# http://arxiv.org/abs/1202.0819v1 (current)
|
24
|
+
#
|
25
|
+
# These constants help us deal with both use cases.
|
26
|
+
#
|
27
|
+
LEGACY_URL_FORMAT = /[^\/]+\/\d+(?:v\d+)?$/
|
28
|
+
CURRENT_URL_FORMAT = /\d{4}\.\d{4}(?:v\d+)?$/
|
21
29
|
|
22
|
-
|
30
|
+
LEGACY_ID_FORMAT = /^#{LEGACY_URL_FORMAT}/
|
31
|
+
ID_FORMAT = /^#{CURRENT_URL_FORMAT}/
|
23
32
|
|
24
|
-
|
33
|
+
def self.get(identifier)
|
25
34
|
|
26
|
-
|
35
|
+
id = parse_arxiv_identifier(identifier)
|
36
|
+
|
37
|
+
unless id =~ ID_FORMAT || id =~ LEGACY_ID_FORMAT
|
38
|
+
raise Arxiv::Error::MalformedId, "Manuscript ID format is invalid"
|
39
|
+
end
|
27
40
|
|
28
41
|
url = ::URI.parse("http://export.arxiv.org/api/query?id_list=#{id}")
|
29
42
|
response = ::Nokogiri::XML(open(url)).remove_namespaces!
|
@@ -35,15 +48,32 @@ module Arxiv
|
|
35
48
|
|
36
49
|
private
|
37
50
|
|
38
|
-
def self.
|
39
|
-
if
|
40
|
-
|
41
|
-
elsif
|
42
|
-
|
43
|
-
match[
|
51
|
+
def self.parse_arxiv_identifier(identifier)
|
52
|
+
if valid_id?(identifier)
|
53
|
+
identifier
|
54
|
+
elsif valid_url?(identifier)
|
55
|
+
format = legacy_url?(identifier) ? LEGACY_URL_FORMAT : CURRENT_URL_FORMAT
|
56
|
+
identifier.match(/(#{format})/)[1]
|
44
57
|
else
|
45
|
-
|
58
|
+
identifier # probably an error
|
46
59
|
end
|
47
60
|
end
|
48
61
|
|
62
|
+
def self.valid_id?(identifier)
|
63
|
+
identifier =~ ID_FORMAT || identifier =~ LEGACY_ID_FORMAT
|
64
|
+
end
|
65
|
+
|
66
|
+
def self.valid_url?(identifier)
|
67
|
+
identifier =~ LEGACY_URL_FORMAT || identifier =~ CURRENT_URL_FORMAT
|
68
|
+
end
|
69
|
+
|
70
|
+
def self.legacy_url?(identifier)
|
71
|
+
identifier =~ LEGACY_URL_FORMAT
|
72
|
+
end
|
73
|
+
|
74
|
+
|
75
|
+
|
76
|
+
|
77
|
+
|
78
|
+
|
49
79
|
end
|
@@ -20,12 +20,20 @@ module Arxiv
|
|
20
20
|
created_at != updated_at
|
21
21
|
end
|
22
22
|
|
23
|
+
def legacy_article?
|
24
|
+
arxiv_url =~ Arxiv::LEGACY_URL_FORMAT
|
25
|
+
end
|
26
|
+
|
23
27
|
def arxiv_id
|
24
|
-
|
28
|
+
arxiv_versioned_id.match(/([^v]+)v\d+$/)[1]
|
25
29
|
end
|
26
30
|
|
27
31
|
def arxiv_versioned_id
|
28
|
-
|
32
|
+
if legacy_article?
|
33
|
+
arxiv_url.match(/(#{Arxiv::LEGACY_URL_FORMAT})/)[1]
|
34
|
+
else
|
35
|
+
arxiv_url.match(/(#{Arxiv::CURRENT_URL_FORMAT})/)[1]
|
36
|
+
end
|
29
37
|
end
|
30
38
|
|
31
39
|
def version
|
data/lib/arxiv/version.rb
CHANGED
data/spec/arxiv/arxiv_spec.rb
CHANGED
@@ -2,31 +2,43 @@ require 'spec_helper'
|
|
2
2
|
|
3
3
|
module Arxiv
|
4
4
|
|
5
|
-
RSpec::Matchers.define :
|
5
|
+
RSpec::Matchers.define :fetch do |expected|
|
6
6
|
match do |actual|
|
7
|
-
|
8
|
-
actual.is_a?(Arxiv::Manuscript) && actual.title == expected_title
|
7
|
+
actual.is_a?(Arxiv::Manuscript) && actual.title == expected
|
9
8
|
end
|
10
9
|
end
|
11
10
|
|
12
11
|
describe "get" do
|
13
|
-
it "should fetch a manuscript when passed a valid id" do
|
14
|
-
Arxiv.get('1202.0819').should fetch_valid_manuscript
|
15
|
-
end
|
16
12
|
|
17
|
-
|
18
|
-
|
13
|
+
context "when using the current arXiv id format" do
|
14
|
+
it "should fetch a manuscript when passed an id" do
|
15
|
+
Arxiv.get('1202.0819').should fetch("Laser frequency comb techniques for precise astronomical spectroscopy")
|
16
|
+
end
|
17
|
+
it "should fetch a manuscript when passed a valid id with a version number" do
|
18
|
+
Arxiv.get('1202.0819v1').should fetch("Laser frequency comb techniques for precise astronomical spectroscopy")
|
19
|
+
end
|
20
|
+
it "should fetch a manuscript when passed full URL" do
|
21
|
+
Arxiv.get('http://arxiv.org/abs/1202.0819').should fetch("Laser frequency comb techniques for precise astronomical spectroscopy")
|
22
|
+
end
|
19
23
|
end
|
20
24
|
|
21
|
-
|
22
|
-
|
25
|
+
context "when using the legacy arXiv id format" do
|
26
|
+
it "should fetch a manuscript when passed an id" do
|
27
|
+
Arxiv.get('math.DG/0510097').should fetch("The differential topology of loop spaces")
|
28
|
+
end
|
29
|
+
it "should fetch a manuscript when passed a valid id with a version number" do
|
30
|
+
Arxiv.get('math.DG/0510097v1').should fetch("The differential topology of loop spaces")
|
31
|
+
end
|
32
|
+
it "should fetch a manuscript when passed full URL" do
|
33
|
+
Arxiv.get('http://arxiv.org/abs/math.DG/0510097').should fetch("The differential topology of loop spaces")
|
34
|
+
end
|
23
35
|
end
|
24
36
|
|
25
|
-
context "
|
26
|
-
it "should raise
|
37
|
+
context "when something goes wrong" do
|
38
|
+
it "should raise an error if the manuscript cannot be found on arXiv" do
|
27
39
|
lambda { Arxiv.get('1234.1234') }.should raise_error(Arxiv::Error::ManuscriptNotFound)
|
28
40
|
end
|
29
|
-
it "should raise
|
41
|
+
it "should raise an error if the manuscript has an incorrectly formatted id" do
|
30
42
|
lambda { Arxiv.get('cond-mat0709123') }.should raise_error(Arxiv::Error::MalformedId)
|
31
43
|
end
|
32
44
|
end
|
@@ -2,7 +2,10 @@ require 'spec_helper'
|
|
2
2
|
|
3
3
|
module Arxiv
|
4
4
|
describe Category do
|
5
|
-
before(:all)
|
5
|
+
before(:all) do
|
6
|
+
@category = Arxiv.get('1202.0819').primary_category
|
7
|
+
@legacy_category = Arxiv.get('math.DG/0510097v1').categories.last
|
8
|
+
end
|
6
9
|
|
7
10
|
describe "abbreviation" do
|
8
11
|
it "should fetch the category's abbreviation" do
|
@@ -17,9 +20,12 @@ module Arxiv
|
|
17
20
|
end
|
18
21
|
|
19
22
|
describe "long_description" do
|
20
|
-
it "should fetch the category's
|
23
|
+
it "should fetch the category's abbreviation and description"do
|
21
24
|
@category.long_description.should == "astro-ph.IM (Physics - Instrumentation and Methods for Astrophysics)"
|
22
25
|
end
|
26
|
+
it "should just return the abbreviation when a description cannot be found (e.g. MSC classes)"do
|
27
|
+
@legacy_category.long_description.should == "58D15 (Primary); 58B10 (Secondary)"
|
28
|
+
end
|
23
29
|
end
|
24
30
|
|
25
31
|
end
|
@@ -2,7 +2,10 @@ require 'spec_helper'
|
|
2
2
|
|
3
3
|
module Arxiv
|
4
4
|
describe Manuscript do
|
5
|
-
before(:all)
|
5
|
+
before(:all) do
|
6
|
+
@manuscript = Arxiv.get('1202.0819')
|
7
|
+
@legacy_manuscript = Arxiv.get('math.DG/0510097v1')
|
8
|
+
end
|
6
9
|
|
7
10
|
describe "arxiv_url" do
|
8
11
|
it "should fetch the link to the manuscript's page on arXiv" do
|
@@ -47,21 +50,30 @@ module Arxiv
|
|
47
50
|
end
|
48
51
|
|
49
52
|
describe "arxiv_versioned_id" do
|
50
|
-
it "should return the unique document id used by arXiv" do
|
53
|
+
it "should return the unique versioned document id used by arXiv for a current manuscript" do
|
51
54
|
@manuscript.arxiv_versioned_id.should == '1202.0819v1'
|
52
55
|
end
|
56
|
+
it "should return the unique versioned document id used by arXiv for a legacy manuscript" do
|
57
|
+
@legacy_manuscript.arxiv_versioned_id.should == 'math/0510097v1'
|
58
|
+
end
|
53
59
|
end
|
54
60
|
|
55
61
|
describe "arxiv_id" do
|
56
|
-
it "should return the unique document id used by arXiv" do
|
62
|
+
it "should return the unique document id used by arXiv for a current manuscript" do
|
57
63
|
@manuscript.arxiv_id.should == '1202.0819'
|
58
64
|
end
|
65
|
+
it "should return the unique document id used by arXiv for a legacy manuscript" do
|
66
|
+
@legacy_manuscript.arxiv_id.should == 'math/0510097'
|
67
|
+
end
|
59
68
|
end
|
60
69
|
|
61
70
|
describe "version" do
|
62
|
-
it "should return the manuscript's version number" do
|
71
|
+
it "should return the manuscript's version number for a current manuscript" do
|
63
72
|
@manuscript.version.should == 1
|
64
73
|
end
|
74
|
+
it "should return the manuscript's version number for a legacy manuscript" do
|
75
|
+
@legacy_manuscript.version.should == 1
|
76
|
+
end
|
65
77
|
end
|
66
78
|
|
67
79
|
describe "content_types" do
|
@@ -96,10 +108,18 @@ module Arxiv
|
|
96
108
|
end
|
97
109
|
|
98
110
|
describe "primary_category" do
|
99
|
-
it "should description" do
|
111
|
+
it "should description return the manuscript's primary category" do
|
100
112
|
@manuscript.primary_category.abbreviation.should == "astro-ph.IM"
|
101
113
|
end
|
102
114
|
end
|
103
115
|
|
116
|
+
describe "legacy_article?" do
|
117
|
+
it "should return true if the manuscript was upload while the legacy API was still in use" do
|
118
|
+
@legacy_manuscript.should be_legacy_article
|
119
|
+
end
|
120
|
+
it "should return false if the manuscript was uploaded after the transition to the new API" do
|
121
|
+
@manuscript.should_not be_legacy_article
|
122
|
+
end
|
123
|
+
end
|
104
124
|
end
|
105
125
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: arxiv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -11,11 +11,11 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date: 2012-02-
|
14
|
+
date: 2012-02-16 00:00:00.000000000Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: happymapper
|
18
|
-
requirement: &
|
18
|
+
requirement: &2156723380 !ruby/object:Gem::Requirement
|
19
19
|
none: false
|
20
20
|
requirements:
|
21
21
|
- - ! '>='
|
@@ -23,10 +23,10 @@ dependencies:
|
|
23
23
|
version: '0'
|
24
24
|
type: :runtime
|
25
25
|
prerelease: false
|
26
|
-
version_requirements: *
|
26
|
+
version_requirements: *2156723380
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: nokogiri
|
29
|
-
requirement: &
|
29
|
+
requirement: &2156722840 !ruby/object:Gem::Requirement
|
30
30
|
none: false
|
31
31
|
requirements:
|
32
32
|
- - ! '>='
|
@@ -34,10 +34,10 @@ dependencies:
|
|
34
34
|
version: '0'
|
35
35
|
type: :runtime
|
36
36
|
prerelease: false
|
37
|
-
version_requirements: *
|
37
|
+
version_requirements: *2156722840
|
38
38
|
- !ruby/object:Gem::Dependency
|
39
39
|
name: rspec
|
40
|
-
requirement: &
|
40
|
+
requirement: &2156722300 !ruby/object:Gem::Requirement
|
41
41
|
none: false
|
42
42
|
requirements:
|
43
43
|
- - ! '>='
|
@@ -45,7 +45,7 @@ dependencies:
|
|
45
45
|
version: '0'
|
46
46
|
type: :development
|
47
47
|
prerelease: false
|
48
|
-
version_requirements: *
|
48
|
+
version_requirements: *2156722300
|
49
49
|
description: Makes interacting with arXiv data really easy.
|
50
50
|
email:
|
51
51
|
- coryschires@gmail.com
|