arxiv 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +10 -0
- data/.rspec +2 -0
- data/.rvmrc +1 -0
- data/Gemfile +4 -0
- data/README.rdoc +3 -0
- data/Rakefile +1 -0
- data/arxiv.gemspec +27 -0
- data/lib/arxiv.rb +109 -0
- data/lib/arxiv/version.rb +3 -0
- data/spec/arxiv_spec.rb +102 -0
- data/spec/spec_helper.rb +5 -0
- metadata +93 -0
data/.gitignore
ADDED
data/.rspec
ADDED
data/.rvmrc
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
rvm use ruby-1.9.2-p290@arxiv --create
|
data/Gemfile
ADDED
data/README.rdoc
ADDED
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
data/arxiv.gemspec
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "arxiv/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "arxiv"
|
7
|
+
s.version = Arxiv::VERSION
|
8
|
+
s.authors = ["Cory Schires", "Brian Cody", "Robert Walsh"]
|
9
|
+
s.email = ["coryschires@gmail.com"]
|
10
|
+
s.homepage = ""
|
11
|
+
s.summary = %q{Ruby wrapper accessing the arXiv API}
|
12
|
+
s.description = %q{Makes interacting with arXiv data really easy.}
|
13
|
+
|
14
|
+
s.rubyforge_project = "arxiv"
|
15
|
+
|
16
|
+
s.files = `git ls-files`.split("\n")
|
17
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
18
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
19
|
+
s.require_paths = ["lib"]
|
20
|
+
|
21
|
+
# specify any dependencies here; for example:
|
22
|
+
s.add_runtime_dependency "happymapper"
|
23
|
+
s.add_runtime_dependency "nokogiri"
|
24
|
+
|
25
|
+
s.add_development_dependency "rspec"
|
26
|
+
|
27
|
+
end
|
data/lib/arxiv.rb
ADDED
@@ -0,0 +1,109 @@
|
|
1
|
+
require 'arxiv/version'
|
2
|
+
require 'open-uri'
|
3
|
+
require 'nokogiri'
|
4
|
+
require 'happymapper'
|
5
|
+
|
6
|
+
module Arxiv
|
7
|
+
|
8
|
+
def self.get(id)
|
9
|
+
url = ::URI.parse("http://export.arxiv.org/api/query?id_list=#{id}")
|
10
|
+
response = ::Nokogiri::XML(open(url)).remove_namespaces!
|
11
|
+
manuscript = Arxiv::Manuscript.parse(response.to_s, single: id)
|
12
|
+
end
|
13
|
+
|
14
|
+
class StringScrubber
|
15
|
+
def self.scrub(string)
|
16
|
+
string.gsub("\n", ' ').strip.squeeze(" ")
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
class Author
|
21
|
+
include HappyMapper
|
22
|
+
element :name, StringScrubber, parser: :scrub
|
23
|
+
has_many :affiliations, StringScrubber, parser: :scrub, tag: 'affiliation'
|
24
|
+
end
|
25
|
+
|
26
|
+
class Link
|
27
|
+
include HappyMapper
|
28
|
+
attribute :url, String, tag: 'href'
|
29
|
+
attribute :content_type, String, tag: 'type'
|
30
|
+
end
|
31
|
+
|
32
|
+
class Category
|
33
|
+
include HappyMapper
|
34
|
+
|
35
|
+
def self.types
|
36
|
+
url = ::URI.parse("http://arxiv-api.googlegroups.com/attach/5e540c5aa16cd1a1/servicedocument.xml?gda=GkSq-0UAAACv8MuSQ9shr-Fm8egpLVNUyoJFgZHB152DBrQX4ANeXa_N1TJg9KB-8oF-EwbRpI6O3f1cykW9hbJ1ju6H3kglGu1iLHeqhw4ZZRj3RjJ_-A&view=1&part=2")
|
37
|
+
xml = ::Nokogiri::XML(open(url)).remove_namespaces!
|
38
|
+
category_mapping = {}
|
39
|
+
categories = xml.xpath("/service/workspace/collection/categories/category")
|
40
|
+
categories.each do |category|
|
41
|
+
abbreviation = category.attributes["term"].value.match(/[^\/]+$/)[0]
|
42
|
+
description = category.attributes["label"].value
|
43
|
+
category_mapping.merge!(abbreviation => description)
|
44
|
+
end
|
45
|
+
category_mapping
|
46
|
+
end
|
47
|
+
|
48
|
+
Types = Category.types
|
49
|
+
|
50
|
+
attribute :abbreviation, String, tag: 'term'
|
51
|
+
|
52
|
+
def description
|
53
|
+
Types[abbreviation]
|
54
|
+
end
|
55
|
+
|
56
|
+
def long_description
|
57
|
+
"#{abbreviation} (#{description})"
|
58
|
+
end
|
59
|
+
|
60
|
+
end
|
61
|
+
|
62
|
+
class Manuscript
|
63
|
+
include HappyMapper
|
64
|
+
|
65
|
+
tag 'entry'
|
66
|
+
element :arxiv_url, String, tag: 'id'
|
67
|
+
element :created_at, DateTime, tag: 'published'
|
68
|
+
element :updated_at, DateTime, tag: 'published'
|
69
|
+
element :title, StringScrubber, parser: :scrub
|
70
|
+
element :summary, StringScrubber, parser: :scrub
|
71
|
+
element :comment, StringScrubber, parser: :scrub
|
72
|
+
has_one :primary_category, Category
|
73
|
+
has_many :categories, Category
|
74
|
+
has_many :authors, Author
|
75
|
+
has_many :links, Link
|
76
|
+
|
77
|
+
alias :abstract :summary
|
78
|
+
|
79
|
+
def revision?
|
80
|
+
created_at != updated_at
|
81
|
+
end
|
82
|
+
|
83
|
+
def arxiv_id
|
84
|
+
arxiv_url.match(/([^\/]+)v\d+$/)[1]
|
85
|
+
end
|
86
|
+
|
87
|
+
def arxiv_versioned_id
|
88
|
+
arxiv_url.match(/([^\/]+)$/)[1]
|
89
|
+
end
|
90
|
+
|
91
|
+
def version
|
92
|
+
arxiv_url.match(/v(\d+)$/)[1].to_i
|
93
|
+
end
|
94
|
+
|
95
|
+
def content_types
|
96
|
+
links.map(&:content_type)
|
97
|
+
end
|
98
|
+
|
99
|
+
def available_in_pdf?
|
100
|
+
content_types.any? { |type| type == "application/pdf" }
|
101
|
+
end
|
102
|
+
|
103
|
+
def pdf_url
|
104
|
+
links.find { |l| l.content_type == "application/pdf" }.url if available_in_pdf?
|
105
|
+
end
|
106
|
+
|
107
|
+
end
|
108
|
+
|
109
|
+
end
|
data/spec/arxiv_spec.rb
ADDED
@@ -0,0 +1,102 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Arxiv do
|
4
|
+
before :all do
|
5
|
+
@manuscript = Arxiv.get('1202.0819') # export.arxiv.org/api/query?id_list=1202.0819
|
6
|
+
end
|
7
|
+
|
8
|
+
it "should fetch the link to the manuscript's page on arXiv" do
|
9
|
+
@manuscript.arxiv_url.should == "http://arxiv.org/abs/1202.0819v1"
|
10
|
+
end
|
11
|
+
it "should fetch the datetime when the manuscript was first published to arXiv" do
|
12
|
+
@manuscript.created_at.should == DateTime.parse("2012-02-03T21:00:00Z")
|
13
|
+
end
|
14
|
+
it "should fetch the datetime when the manuscript was last updated" do
|
15
|
+
@manuscript.updated_at.should == DateTime.parse("2012-02-03T21:00:00Z")
|
16
|
+
end
|
17
|
+
it "should fetch the manuscript's title" do
|
18
|
+
@manuscript.title.should == "Laser frequency comb techniques for precise astronomical spectroscopy"
|
19
|
+
end
|
20
|
+
it "should fetch the manuscript's abstract" do
|
21
|
+
@manuscript.abstract.should == "Precise astronomical spectroscopic analyses routinely assume that individual pixels in charge-coupled devices (CCDs) have uniform sensitivity to photons. Intra-pixel sensitivity (IPS) variations may already cause small systematic errors in, for example, studies of extra-solar planets via stellar radial velocities and cosmological variability in fundamental constants via quasar spectroscopy, but future experiments requiring velocity precisions approaching ~1 cm/s will be more strongly affected. Laser frequency combs have been shown to provide highly precise wavelength calibration for astronomical spectrographs, but here we show that they can also be used to measure IPS variations in astronomical CCDs in situ. We successfully tested a laser frequency comb system on the Ultra-High Resolution Facility spectrograph at the Anglo-Australian Telescope. By modelling the 2-dimensional comb signal recorded in a single CCD exposure, we find that the average IPS deviates by <8 per cent if it is assumed to vary symmetrically about the pixel centre. We also demonstrate that series of comb exposures with absolutely known offsets between them can yield tighter constraints on symmetric IPS variations from ~100 pixels. We discuss measurement of asymmetric IPS variations and absolute wavelength calibration of astronomical spectrographs and CCDs using frequency combs."
|
22
|
+
end
|
23
|
+
it "should fetch the manuscript's comment" do
|
24
|
+
@manuscript.comment.should == "11 pages, 7 figures. Accepted for publication in MNRAS"
|
25
|
+
end
|
26
|
+
|
27
|
+
context "authors" do
|
28
|
+
it "should fetch the authors" do
|
29
|
+
@manuscript.authors.should have(5).authors
|
30
|
+
end
|
31
|
+
it "should fetch the author's affiliations" do
|
32
|
+
author = @manuscript.authors.first
|
33
|
+
author.affiliations.should include("Swinburne University of Technology")
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
context "categories" do
|
38
|
+
it "should fetch the manuscript's categories" do
|
39
|
+
@manuscript.categories.map(&:abbreviation).should include("astro-ph.IM", "astro-ph.CO", "astro-ph.EP")
|
40
|
+
end
|
41
|
+
it "should fetch the category's abbreviation" do
|
42
|
+
@manuscript.primary_category.abbreviation.should == "astro-ph.IM"
|
43
|
+
end
|
44
|
+
it "should fetch the category's description" do
|
45
|
+
@manuscript.primary_category.description.should == "Physics - Instrumentation and Methods for Astrophysics"
|
46
|
+
end
|
47
|
+
it "should fetch the category's #long_description" do
|
48
|
+
@manuscript.primary_category.long_description.should == "astro-ph.IM (Physics - Instrumentation and Methods for Astrophysics)"
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
52
|
+
|
53
|
+
context "links" do
|
54
|
+
let(:pdf) {@manuscript.links.last}
|
55
|
+
it "should fetch the link's content type" do
|
56
|
+
pdf.content_type.should == 'application/pdf'
|
57
|
+
end
|
58
|
+
it "should fetch the link's url" do
|
59
|
+
pdf.url.should == 'http://arxiv.org/pdf/1202.0819v1'
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
context "instance methods" do
|
64
|
+
describe "revision?" do
|
65
|
+
it "should return true if the manuscript has been revised" do
|
66
|
+
@manuscript.should_not be_revision
|
67
|
+
end
|
68
|
+
end
|
69
|
+
describe "arxiv_versioned_id" do
|
70
|
+
it "should return the unique document id used by arXiv" do
|
71
|
+
@manuscript.arxiv_versioned_id.should == '1202.0819v1'
|
72
|
+
end
|
73
|
+
end
|
74
|
+
describe "arxiv_id" do
|
75
|
+
it "should return the unique document id used by arXiv" do
|
76
|
+
@manuscript.arxiv_id.should == '1202.0819'
|
77
|
+
end
|
78
|
+
end
|
79
|
+
describe "version" do
|
80
|
+
it "should return the manuscript's version number" do
|
81
|
+
@manuscript.version.should == 1
|
82
|
+
end
|
83
|
+
end
|
84
|
+
describe "content_types" do
|
85
|
+
it "return an array of available content_types" do
|
86
|
+
@manuscript.content_types.should include("text/html", "application/pdf")
|
87
|
+
@manuscript.content_types.should have(2).content_types
|
88
|
+
end
|
89
|
+
end
|
90
|
+
describe "available_in_pdf?" do
|
91
|
+
it "should return true if the manuscript is available to be downloaded in PDF" do
|
92
|
+
@manuscript.should be_available_in_pdf
|
93
|
+
end
|
94
|
+
end
|
95
|
+
describe "pdf_url" do
|
96
|
+
it "should return the url to download the manuscript in PDF format" do
|
97
|
+
@manuscript.pdf_url.should == 'http://arxiv.org/pdf/1202.0819v1'
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
end
|
data/spec/spec_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,93 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: arxiv
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Cory Schires
|
9
|
+
- Brian Cody
|
10
|
+
- Robert Walsh
|
11
|
+
autorequire:
|
12
|
+
bindir: bin
|
13
|
+
cert_chain: []
|
14
|
+
date: 2012-02-14 00:00:00.000000000Z
|
15
|
+
dependencies:
|
16
|
+
- !ruby/object:Gem::Dependency
|
17
|
+
name: happymapper
|
18
|
+
requirement: &2152849100 !ruby/object:Gem::Requirement
|
19
|
+
none: false
|
20
|
+
requirements:
|
21
|
+
- - ! '>='
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: '0'
|
24
|
+
type: :runtime
|
25
|
+
prerelease: false
|
26
|
+
version_requirements: *2152849100
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: nokogiri
|
29
|
+
requirement: &2152848680 !ruby/object:Gem::Requirement
|
30
|
+
none: false
|
31
|
+
requirements:
|
32
|
+
- - ! '>='
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: '0'
|
35
|
+
type: :runtime
|
36
|
+
prerelease: false
|
37
|
+
version_requirements: *2152848680
|
38
|
+
- !ruby/object:Gem::Dependency
|
39
|
+
name: rspec
|
40
|
+
requirement: &2152848260 !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
type: :development
|
47
|
+
prerelease: false
|
48
|
+
version_requirements: *2152848260
|
49
|
+
description: Makes interacting with arXiv data really easy.
|
50
|
+
email:
|
51
|
+
- coryschires@gmail.com
|
52
|
+
executables: []
|
53
|
+
extensions: []
|
54
|
+
extra_rdoc_files: []
|
55
|
+
files:
|
56
|
+
- .gitignore
|
57
|
+
- .rspec
|
58
|
+
- .rvmrc
|
59
|
+
- Gemfile
|
60
|
+
- README.rdoc
|
61
|
+
- Rakefile
|
62
|
+
- arxiv.gemspec
|
63
|
+
- lib/arxiv.rb
|
64
|
+
- lib/arxiv/version.rb
|
65
|
+
- spec/arxiv_spec.rb
|
66
|
+
- spec/spec_helper.rb
|
67
|
+
homepage: ''
|
68
|
+
licenses: []
|
69
|
+
post_install_message:
|
70
|
+
rdoc_options: []
|
71
|
+
require_paths:
|
72
|
+
- lib
|
73
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
74
|
+
none: false
|
75
|
+
requirements:
|
76
|
+
- - ! '>='
|
77
|
+
- !ruby/object:Gem::Version
|
78
|
+
version: '0'
|
79
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
80
|
+
none: false
|
81
|
+
requirements:
|
82
|
+
- - ! '>='
|
83
|
+
- !ruby/object:Gem::Version
|
84
|
+
version: '0'
|
85
|
+
requirements: []
|
86
|
+
rubyforge_project: arxiv
|
87
|
+
rubygems_version: 1.8.10
|
88
|
+
signing_key:
|
89
|
+
specification_version: 3
|
90
|
+
summary: Ruby wrapper accessing the arXiv API
|
91
|
+
test_files:
|
92
|
+
- spec/arxiv_spec.rb
|
93
|
+
- spec/spec_helper.rb
|