arxiv 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,13 +17,26 @@ module Arxiv
17
17
  class MalformedId < StandardError ; end
18
18
  end
19
19
 
20
- ID_FORMAT = /^\d{4}\.\d{4}(?:v\d+)?$/
20
+ # In 2007, the ArXiv API changed document ID formats:
21
+ #
22
+ # http://arxiv.org/abs/math/0510097v1 (legacy)
23
+ # http://arxiv.org/abs/1202.0819v1 (current)
24
+ #
25
+ # These constants help us deal with both use cases.
26
+ #
27
+ LEGACY_URL_FORMAT = /[^\/]+\/\d+(?:v\d+)?$/
28
+ CURRENT_URL_FORMAT = /\d{4}\.\d{4}(?:v\d+)?$/
21
29
 
22
- def self.get(id)
30
+ LEGACY_ID_FORMAT = /^#{LEGACY_URL_FORMAT}/
31
+ ID_FORMAT = /^#{CURRENT_URL_FORMAT}/
23
32
 
24
- id = parse_arxiv_id(id)
33
+ def self.get(identifier)
25
34
 
26
- raise Arxiv::Error::MalformedId, "Manuscript ID format is invalid" unless id =~ ID_FORMAT
35
+ id = parse_arxiv_identifier(identifier)
36
+
37
+ unless id =~ ID_FORMAT || id =~ LEGACY_ID_FORMAT
38
+ raise Arxiv::Error::MalformedId, "Manuscript ID format is invalid"
39
+ end
27
40
 
28
41
  url = ::URI.parse("http://export.arxiv.org/api/query?id_list=#{id}")
29
42
  response = ::Nokogiri::XML(open(url)).remove_namespaces!
@@ -35,15 +48,32 @@ module Arxiv
35
48
 
36
49
  private
37
50
 
38
- def self.parse_arxiv_id(id)
39
- if id =~ ID_FORMAT
40
- id
41
- elsif id =~ /arxiv.org/
42
- match = id.match(/[^\/]+$/)
43
- match[0] if match
51
+ def self.parse_arxiv_identifier(identifier)
52
+ if valid_id?(identifier)
53
+ identifier
54
+ elsif valid_url?(identifier)
55
+ format = legacy_url?(identifier) ? LEGACY_URL_FORMAT : CURRENT_URL_FORMAT
56
+ identifier.match(/(#{format})/)[1]
44
57
  else
45
- id
58
+ identifier # probably an error
46
59
  end
47
60
  end
48
61
 
62
+ def self.valid_id?(identifier)
63
+ identifier =~ ID_FORMAT || identifier =~ LEGACY_ID_FORMAT
64
+ end
65
+
66
+ def self.valid_url?(identifier)
67
+ identifier =~ LEGACY_URL_FORMAT || identifier =~ CURRENT_URL_FORMAT
68
+ end
69
+
70
+ def self.legacy_url?(identifier)
71
+ identifier =~ LEGACY_URL_FORMAT
72
+ end
73
+
74
+
75
+
76
+
77
+
78
+
49
79
  end
@@ -24,7 +24,7 @@ module Arxiv
24
24
  end
25
25
 
26
26
  def long_description
27
- "#{abbreviation} (#{description})"
27
+ description ? "#{abbreviation} (#{description})" : abbreviation
28
28
  end
29
29
 
30
30
  end
@@ -20,12 +20,20 @@ module Arxiv
20
20
  created_at != updated_at
21
21
  end
22
22
 
23
+ def legacy_article?
24
+ arxiv_url =~ Arxiv::LEGACY_URL_FORMAT
25
+ end
26
+
23
27
  def arxiv_id
24
- arxiv_url.match(/([^\/]+)v\d+$/)[1]
28
+ arxiv_versioned_id.match(/([^v]+)v\d+$/)[1]
25
29
  end
26
30
 
27
31
  def arxiv_versioned_id
28
- arxiv_url.match(/([^\/]+)$/)[1]
32
+ if legacy_article?
33
+ arxiv_url.match(/(#{Arxiv::LEGACY_URL_FORMAT})/)[1]
34
+ else
35
+ arxiv_url.match(/(#{Arxiv::CURRENT_URL_FORMAT})/)[1]
36
+ end
29
37
  end
30
38
 
31
39
  def version
@@ -1,6 +1,6 @@
1
1
  module Arxiv
2
2
  class StringScrubber
3
- def self.scrub(string)
3
+ def self.scrub(string)
4
4
  string.gsub("\n", ' ').strip.squeeze(" ")
5
5
  end
6
6
  end
@@ -1,3 +1,3 @@
1
1
  module Arxiv
2
- VERSION = "0.0.4"
2
+ VERSION = "0.0.5"
3
3
  end
@@ -2,31 +2,43 @@ require 'spec_helper'
2
2
 
3
3
  module Arxiv
4
4
 
5
- RSpec::Matchers.define :fetch_valid_manuscript do |expected|
5
+ RSpec::Matchers.define :fetch do |expected|
6
6
  match do |actual|
7
- expected_title = "Laser frequency comb techniques for precise astronomical spectroscopy"
8
- actual.is_a?(Arxiv::Manuscript) && actual.title == expected_title
7
+ actual.is_a?(Arxiv::Manuscript) && actual.title == expected
9
8
  end
10
9
  end
11
10
 
12
11
  describe "get" do
13
- it "should fetch a manuscript when passed a valid id" do
14
- Arxiv.get('1202.0819').should fetch_valid_manuscript
15
- end
16
12
 
17
- it "should fetch a manuscript when passed a valid id with a version number" do
18
- Arxiv.get('1202.0819v1').should fetch_valid_manuscript
13
+ context "when using the current arXiv id format" do
14
+ it "should fetch a manuscript when passed an id" do
15
+ Arxiv.get('1202.0819').should fetch("Laser frequency comb techniques for precise astronomical spectroscopy")
16
+ end
17
+ it "should fetch a manuscript when passed a valid id with a version number" do
18
+ Arxiv.get('1202.0819v1').should fetch("Laser frequency comb techniques for precise astronomical spectroscopy")
19
+ end
20
+ it "should fetch a manuscript when passed full URL" do
21
+ Arxiv.get('http://arxiv.org/abs/1202.0819').should fetch("Laser frequency comb techniques for precise astronomical spectroscopy")
22
+ end
19
23
  end
20
24
 
21
- it "should fetch a manuscript when passed full URL for a manuscript" do
22
- Arxiv.get('http://arxiv.org/abs/1202.0819').should fetch_valid_manuscript
25
+ context "when using the legacy arXiv id format" do
26
+ it "should fetch a manuscript when passed an id" do
27
+ Arxiv.get('math.DG/0510097').should fetch("The differential topology of loop spaces")
28
+ end
29
+ it "should fetch a manuscript when passed a valid id with a version number" do
30
+ Arxiv.get('math.DG/0510097v1').should fetch("The differential topology of loop spaces")
31
+ end
32
+ it "should fetch a manuscript when passed full URL" do
33
+ Arxiv.get('http://arxiv.org/abs/math.DG/0510097').should fetch("The differential topology of loop spaces")
34
+ end
23
35
  end
24
36
 
25
- context "errors" do
26
- it "should raise a manuscript not found error when the manuscript cannot be found on arXiv" do
37
+ context "when something goes wrong" do
38
+ it "should raise an error if the manuscript cannot be found on arXiv" do
27
39
  lambda { Arxiv.get('1234.1234') }.should raise_error(Arxiv::Error::ManuscriptNotFound)
28
40
  end
29
- it "should raise a malformed id error when the manuscript id has an incorrect format" do
41
+ it "should raise an error if the manuscript has an incorrectly formatted id" do
30
42
  lambda { Arxiv.get('cond-mat0709123') }.should raise_error(Arxiv::Error::MalformedId)
31
43
  end
32
44
  end
@@ -2,7 +2,10 @@ require 'spec_helper'
2
2
 
3
3
  module Arxiv
4
4
  describe Category do
5
- before(:all) { @category = Arxiv.get('1202.0819').primary_category }
5
+ before(:all) do
6
+ @category = Arxiv.get('1202.0819').primary_category
7
+ @legacy_category = Arxiv.get('math.DG/0510097v1').categories.last
8
+ end
6
9
 
7
10
  describe "abbreviation" do
8
11
  it "should fetch the category's abbreviation" do
@@ -17,9 +20,12 @@ module Arxiv
17
20
  end
18
21
 
19
22
  describe "long_description" do
20
- it "should fetch the category's #long_description" do
23
+ it "should fetch the category's abbreviation and description"do
21
24
  @category.long_description.should == "astro-ph.IM (Physics - Instrumentation and Methods for Astrophysics)"
22
25
  end
26
+ it "should just return the abbreviation when a description cannot be found (e.g. MSC classes)"do
27
+ @legacy_category.long_description.should == "58D15 (Primary); 58B10 (Secondary)"
28
+ end
23
29
  end
24
30
 
25
31
  end
@@ -2,7 +2,10 @@ require 'spec_helper'
2
2
 
3
3
  module Arxiv
4
4
  describe Manuscript do
5
- before(:all) { @manuscript = Arxiv.get('1202.0819') }
5
+ before(:all) do
6
+ @manuscript = Arxiv.get('1202.0819')
7
+ @legacy_manuscript = Arxiv.get('math.DG/0510097v1')
8
+ end
6
9
 
7
10
  describe "arxiv_url" do
8
11
  it "should fetch the link to the manuscript's page on arXiv" do
@@ -47,21 +50,30 @@ module Arxiv
47
50
  end
48
51
 
49
52
  describe "arxiv_versioned_id" do
50
- it "should return the unique document id used by arXiv" do
53
+ it "should return the unique versioned document id used by arXiv for a current manuscript" do
51
54
  @manuscript.arxiv_versioned_id.should == '1202.0819v1'
52
55
  end
56
+ it "should return the unique versioned document id used by arXiv for a legacy manuscript" do
57
+ @legacy_manuscript.arxiv_versioned_id.should == 'math/0510097v1'
58
+ end
53
59
  end
54
60
 
55
61
  describe "arxiv_id" do
56
- it "should return the unique document id used by arXiv" do
62
+ it "should return the unique document id used by arXiv for a current manuscript" do
57
63
  @manuscript.arxiv_id.should == '1202.0819'
58
64
  end
65
+ it "should return the unique document id used by arXiv for a legacy manuscript" do
66
+ @legacy_manuscript.arxiv_id.should == 'math/0510097'
67
+ end
59
68
  end
60
69
 
61
70
  describe "version" do
62
- it "should return the manuscript's version number" do
71
+ it "should return the manuscript's version number for a current manuscript" do
63
72
  @manuscript.version.should == 1
64
73
  end
74
+ it "should return the manuscript's version number for a legacy manuscript" do
75
+ @legacy_manuscript.version.should == 1
76
+ end
65
77
  end
66
78
 
67
79
  describe "content_types" do
@@ -96,10 +108,18 @@ module Arxiv
96
108
  end
97
109
 
98
110
  describe "primary_category" do
99
- it "should description" do
111
+ it "should description return the manuscript's primary category" do
100
112
  @manuscript.primary_category.abbreviation.should == "astro-ph.IM"
101
113
  end
102
114
  end
103
115
 
116
+ describe "legacy_article?" do
117
+ it "should return true if the manuscript was upload while the legacy API was still in use" do
118
+ @legacy_manuscript.should be_legacy_article
119
+ end
120
+ it "should return false if the manuscript was uploaded after the transition to the new API" do
121
+ @manuscript.should_not be_legacy_article
122
+ end
123
+ end
104
124
  end
105
125
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: arxiv
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -11,11 +11,11 @@ authors:
11
11
  autorequire:
12
12
  bindir: bin
13
13
  cert_chain: []
14
- date: 2012-02-15 00:00:00.000000000Z
14
+ date: 2012-02-16 00:00:00.000000000Z
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
17
  name: happymapper
18
- requirement: &2157600480 !ruby/object:Gem::Requirement
18
+ requirement: &2156723380 !ruby/object:Gem::Requirement
19
19
  none: false
20
20
  requirements:
21
21
  - - ! '>='
@@ -23,10 +23,10 @@ dependencies:
23
23
  version: '0'
24
24
  type: :runtime
25
25
  prerelease: false
26
- version_requirements: *2157600480
26
+ version_requirements: *2156723380
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: nokogiri
29
- requirement: &2157600060 !ruby/object:Gem::Requirement
29
+ requirement: &2156722840 !ruby/object:Gem::Requirement
30
30
  none: false
31
31
  requirements:
32
32
  - - ! '>='
@@ -34,10 +34,10 @@ dependencies:
34
34
  version: '0'
35
35
  type: :runtime
36
36
  prerelease: false
37
- version_requirements: *2157600060
37
+ version_requirements: *2156722840
38
38
  - !ruby/object:Gem::Dependency
39
39
  name: rspec
40
- requirement: &2157599640 !ruby/object:Gem::Requirement
40
+ requirement: &2156722300 !ruby/object:Gem::Requirement
41
41
  none: false
42
42
  requirements:
43
43
  - - ! '>='
@@ -45,7 +45,7 @@ dependencies:
45
45
  version: '0'
46
46
  type: :development
47
47
  prerelease: false
48
- version_requirements: *2157599640
48
+ version_requirements: *2156722300
49
49
  description: Makes interacting with arXiv data really easy.
50
50
  email:
51
51
  - coryschires@gmail.com