arxiv 0.0.4 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -17,13 +17,26 @@ module Arxiv
17
17
  class MalformedId < StandardError ; end
18
18
  end
19
19
 
20
- ID_FORMAT = /^\d{4}\.\d{4}(?:v\d+)?$/
20
+ # In 2007, the ArXiv API changed document ID formats:
21
+ #
22
+ # http://arxiv.org/abs/math/0510097v1 (legacy)
23
+ # http://arxiv.org/abs/1202.0819v1 (current)
24
+ #
25
+ # These constants help us deal with both use cases.
26
+ #
27
+ LEGACY_URL_FORMAT = /[^\/]+\/\d+(?:v\d+)?$/
28
+ CURRENT_URL_FORMAT = /\d{4}\.\d{4}(?:v\d+)?$/
21
29
 
22
- def self.get(id)
30
+ LEGACY_ID_FORMAT = /^#{LEGACY_URL_FORMAT}/
31
+ ID_FORMAT = /^#{CURRENT_URL_FORMAT}/
23
32
 
24
- id = parse_arxiv_id(id)
33
+ def self.get(identifier)
25
34
 
26
- raise Arxiv::Error::MalformedId, "Manuscript ID format is invalid" unless id =~ ID_FORMAT
35
+ id = parse_arxiv_identifier(identifier)
36
+
37
+ unless id =~ ID_FORMAT || id =~ LEGACY_ID_FORMAT
38
+ raise Arxiv::Error::MalformedId, "Manuscript ID format is invalid"
39
+ end
27
40
 
28
41
  url = ::URI.parse("http://export.arxiv.org/api/query?id_list=#{id}")
29
42
  response = ::Nokogiri::XML(open(url)).remove_namespaces!
@@ -35,15 +48,32 @@ module Arxiv
35
48
 
36
49
  private
37
50
 
38
- def self.parse_arxiv_id(id)
39
- if id =~ ID_FORMAT
40
- id
41
- elsif id =~ /arxiv.org/
42
- match = id.match(/[^\/]+$/)
43
- match[0] if match
51
+ def self.parse_arxiv_identifier(identifier)
52
+ if valid_id?(identifier)
53
+ identifier
54
+ elsif valid_url?(identifier)
55
+ format = legacy_url?(identifier) ? LEGACY_URL_FORMAT : CURRENT_URL_FORMAT
56
+ identifier.match(/(#{format})/)[1]
44
57
  else
45
- id
58
+ identifier # probably an error
46
59
  end
47
60
  end
48
61
 
62
+ def self.valid_id?(identifier)
63
+ identifier =~ ID_FORMAT || identifier =~ LEGACY_ID_FORMAT
64
+ end
65
+
66
+ def self.valid_url?(identifier)
67
+ identifier =~ LEGACY_URL_FORMAT || identifier =~ CURRENT_URL_FORMAT
68
+ end
69
+
70
+ def self.legacy_url?(identifier)
71
+ identifier =~ LEGACY_URL_FORMAT
72
+ end
73
+
74
+
75
+
76
+
77
+
78
+
49
79
  end
@@ -24,7 +24,7 @@ module Arxiv
24
24
  end
25
25
 
26
26
  def long_description
27
- "#{abbreviation} (#{description})"
27
+ description ? "#{abbreviation} (#{description})" : abbreviation
28
28
  end
29
29
 
30
30
  end
@@ -20,12 +20,20 @@ module Arxiv
20
20
  created_at != updated_at
21
21
  end
22
22
 
23
+ def legacy_article?
24
+ arxiv_url =~ Arxiv::LEGACY_URL_FORMAT
25
+ end
26
+
23
27
  def arxiv_id
24
- arxiv_url.match(/([^\/]+)v\d+$/)[1]
28
+ arxiv_versioned_id.match(/([^v]+)v\d+$/)[1]
25
29
  end
26
30
 
27
31
  def arxiv_versioned_id
28
- arxiv_url.match(/([^\/]+)$/)[1]
32
+ if legacy_article?
33
+ arxiv_url.match(/(#{Arxiv::LEGACY_URL_FORMAT})/)[1]
34
+ else
35
+ arxiv_url.match(/(#{Arxiv::CURRENT_URL_FORMAT})/)[1]
36
+ end
29
37
  end
30
38
 
31
39
  def version
@@ -1,6 +1,6 @@
1
1
  module Arxiv
2
2
  class StringScrubber
3
- def self.scrub(string)
3
+ def self.scrub(string)
4
4
  string.gsub("\n", ' ').strip.squeeze(" ")
5
5
  end
6
6
  end
@@ -1,3 +1,3 @@
1
1
  module Arxiv
2
- VERSION = "0.0.4"
2
+ VERSION = "0.0.5"
3
3
  end
@@ -2,31 +2,43 @@ require 'spec_helper'
2
2
 
3
3
  module Arxiv
4
4
 
5
- RSpec::Matchers.define :fetch_valid_manuscript do |expected|
5
+ RSpec::Matchers.define :fetch do |expected|
6
6
  match do |actual|
7
- expected_title = "Laser frequency comb techniques for precise astronomical spectroscopy"
8
- actual.is_a?(Arxiv::Manuscript) && actual.title == expected_title
7
+ actual.is_a?(Arxiv::Manuscript) && actual.title == expected
9
8
  end
10
9
  end
11
10
 
12
11
  describe "get" do
13
- it "should fetch a manuscript when passed a valid id" do
14
- Arxiv.get('1202.0819').should fetch_valid_manuscript
15
- end
16
12
 
17
- it "should fetch a manuscript when passed a valid id with a version number" do
18
- Arxiv.get('1202.0819v1').should fetch_valid_manuscript
13
+ context "when using the current arXiv id format" do
14
+ it "should fetch a manuscript when passed an id" do
15
+ Arxiv.get('1202.0819').should fetch("Laser frequency comb techniques for precise astronomical spectroscopy")
16
+ end
17
+ it "should fetch a manuscript when passed a valid id with a version number" do
18
+ Arxiv.get('1202.0819v1').should fetch("Laser frequency comb techniques for precise astronomical spectroscopy")
19
+ end
20
+ it "should fetch a manuscript when passed full URL" do
21
+ Arxiv.get('http://arxiv.org/abs/1202.0819').should fetch("Laser frequency comb techniques for precise astronomical spectroscopy")
22
+ end
19
23
  end
20
24
 
21
- it "should fetch a manuscript when passed full URL for a manuscript" do
22
- Arxiv.get('http://arxiv.org/abs/1202.0819').should fetch_valid_manuscript
25
+ context "when using the legacy arXiv id format" do
26
+ it "should fetch a manuscript when passed an id" do
27
+ Arxiv.get('math.DG/0510097').should fetch("The differential topology of loop spaces")
28
+ end
29
+ it "should fetch a manuscript when passed a valid id with a version number" do
30
+ Arxiv.get('math.DG/0510097v1').should fetch("The differential topology of loop spaces")
31
+ end
32
+ it "should fetch a manuscript when passed full URL" do
33
+ Arxiv.get('http://arxiv.org/abs/math.DG/0510097').should fetch("The differential topology of loop spaces")
34
+ end
23
35
  end
24
36
 
25
- context "errors" do
26
- it "should raise a manuscript not found error when the manuscript cannot be found on arXiv" do
37
+ context "when something goes wrong" do
38
+ it "should raise an error if the manuscript cannot be found on arXiv" do
27
39
  lambda { Arxiv.get('1234.1234') }.should raise_error(Arxiv::Error::ManuscriptNotFound)
28
40
  end
29
- it "should raise a malformed id error when the manuscript id has an incorrect format" do
41
+ it "should raise an error if the manuscript has an incorrectly formatted id" do
30
42
  lambda { Arxiv.get('cond-mat0709123') }.should raise_error(Arxiv::Error::MalformedId)
31
43
  end
32
44
  end
@@ -2,7 +2,10 @@ require 'spec_helper'
2
2
 
3
3
  module Arxiv
4
4
  describe Category do
5
- before(:all) { @category = Arxiv.get('1202.0819').primary_category }
5
+ before(:all) do
6
+ @category = Arxiv.get('1202.0819').primary_category
7
+ @legacy_category = Arxiv.get('math.DG/0510097v1').categories.last
8
+ end
6
9
 
7
10
  describe "abbreviation" do
8
11
  it "should fetch the category's abbreviation" do
@@ -17,9 +20,12 @@ module Arxiv
17
20
  end
18
21
 
19
22
  describe "long_description" do
20
- it "should fetch the category's #long_description" do
23
+ it "should fetch the category's abbreviation and description"do
21
24
  @category.long_description.should == "astro-ph.IM (Physics - Instrumentation and Methods for Astrophysics)"
22
25
  end
26
+ it "should just return the abbreviation when a description cannot be found (e.g. MSC classes)"do
27
+ @legacy_category.long_description.should == "58D15 (Primary); 58B10 (Secondary)"
28
+ end
23
29
  end
24
30
 
25
31
  end
@@ -2,7 +2,10 @@ require 'spec_helper'
2
2
 
3
3
  module Arxiv
4
4
  describe Manuscript do
5
- before(:all) { @manuscript = Arxiv.get('1202.0819') }
5
+ before(:all) do
6
+ @manuscript = Arxiv.get('1202.0819')
7
+ @legacy_manuscript = Arxiv.get('math.DG/0510097v1')
8
+ end
6
9
 
7
10
  describe "arxiv_url" do
8
11
  it "should fetch the link to the manuscript's page on arXiv" do
@@ -47,21 +50,30 @@ module Arxiv
47
50
  end
48
51
 
49
52
  describe "arxiv_versioned_id" do
50
- it "should return the unique document id used by arXiv" do
53
+ it "should return the unique versioned document id used by arXiv for a current manuscript" do
51
54
  @manuscript.arxiv_versioned_id.should == '1202.0819v1'
52
55
  end
56
+ it "should return the unique versioned document id used by arXiv for a legacy manuscript" do
57
+ @legacy_manuscript.arxiv_versioned_id.should == 'math/0510097v1'
58
+ end
53
59
  end
54
60
 
55
61
  describe "arxiv_id" do
56
- it "should return the unique document id used by arXiv" do
62
+ it "should return the unique document id used by arXiv for a current manuscript" do
57
63
  @manuscript.arxiv_id.should == '1202.0819'
58
64
  end
65
+ it "should return the unique document id used by arXiv for a legacy manuscript" do
66
+ @legacy_manuscript.arxiv_id.should == 'math/0510097'
67
+ end
59
68
  end
60
69
 
61
70
  describe "version" do
62
- it "should return the manuscript's version number" do
71
+ it "should return the manuscript's version number for a current manuscript" do
63
72
  @manuscript.version.should == 1
64
73
  end
74
+ it "should return the manuscript's version number for a legacy manuscript" do
75
+ @legacy_manuscript.version.should == 1
76
+ end
65
77
  end
66
78
 
67
79
  describe "content_types" do
@@ -96,10 +108,18 @@ module Arxiv
96
108
  end
97
109
 
98
110
  describe "primary_category" do
99
- it "should description" do
111
+ it "should description return the manuscript's primary category" do
100
112
  @manuscript.primary_category.abbreviation.should == "astro-ph.IM"
101
113
  end
102
114
  end
103
115
 
116
+ describe "legacy_article?" do
117
+ it "should return true if the manuscript was upload while the legacy API was still in use" do
118
+ @legacy_manuscript.should be_legacy_article
119
+ end
120
+ it "should return false if the manuscript was uploaded after the transition to the new API" do
121
+ @manuscript.should_not be_legacy_article
122
+ end
123
+ end
104
124
  end
105
125
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: arxiv
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -11,11 +11,11 @@ authors:
11
11
  autorequire:
12
12
  bindir: bin
13
13
  cert_chain: []
14
- date: 2012-02-15 00:00:00.000000000Z
14
+ date: 2012-02-16 00:00:00.000000000Z
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
17
  name: happymapper
18
- requirement: &2157600480 !ruby/object:Gem::Requirement
18
+ requirement: &2156723380 !ruby/object:Gem::Requirement
19
19
  none: false
20
20
  requirements:
21
21
  - - ! '>='
@@ -23,10 +23,10 @@ dependencies:
23
23
  version: '0'
24
24
  type: :runtime
25
25
  prerelease: false
26
- version_requirements: *2157600480
26
+ version_requirements: *2156723380
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: nokogiri
29
- requirement: &2157600060 !ruby/object:Gem::Requirement
29
+ requirement: &2156722840 !ruby/object:Gem::Requirement
30
30
  none: false
31
31
  requirements:
32
32
  - - ! '>='
@@ -34,10 +34,10 @@ dependencies:
34
34
  version: '0'
35
35
  type: :runtime
36
36
  prerelease: false
37
- version_requirements: *2157600060
37
+ version_requirements: *2156722840
38
38
  - !ruby/object:Gem::Dependency
39
39
  name: rspec
40
- requirement: &2157599640 !ruby/object:Gem::Requirement
40
+ requirement: &2156722300 !ruby/object:Gem::Requirement
41
41
  none: false
42
42
  requirements:
43
43
  - - ! '>='
@@ -45,7 +45,7 @@ dependencies:
45
45
  version: '0'
46
46
  type: :development
47
47
  prerelease: false
48
- version_requirements: *2157599640
48
+ version_requirements: *2156722300
49
49
  description: Makes interacting with arXiv data really easy.
50
50
  email:
51
51
  - coryschires@gmail.com