arxivsync 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # arxivsync-0.0.3
1
+ # arxivsync-0.0.4
2
2
 
3
3
  Ruby OAI interface for harvesting the arXiv. Can be used to store and update an XML mirror of paper metadata, and parse the XML into Ruby objects to allow conversion into a friendlier format.
4
4
 
@@ -2,6 +2,12 @@ module ArxivSync
2
2
  class Downloader
3
3
  def initialize(initial_params={})
4
4
  @initial_params = initial_params
5
+
6
+ if @initial_params[:from] == Date.today
7
+ puts "Last responseDate was today. arXiv lacks date granularity beyond the day level; please wait before continuing harvest."
8
+ return false
9
+ end
10
+
5
11
  unless @initial_params[:resumptionToken]
6
12
  @initial_params[:metadataPrefix] ||= 'arXiv'
7
13
  end
@@ -1,10 +1,14 @@
1
1
  module ArxivSync
2
+ # Layout reference: http://www.xmlns.me/?op=visualize&id=643
2
3
  Author = Struct.new(
3
- :keyname, :forenames
4
+ :keyname, :forenames, :suffix, :affiliation
4
5
  )
5
6
 
6
7
  Paper = Struct.new(
7
- :id, :created, :updated, :title, :abstract, :authors,
8
+ :id, :created, :updated, :authors, :title,
9
+ :msc_class, :report_no, :journal_ref, :comments,
10
+ :abstract, :categories, :doi, :proxy, :license,
11
+
8
12
  :primary_category, :crosslists
9
13
  )
10
14
 
@@ -38,15 +42,34 @@ module ArxivSync
38
42
  @model.updated = Date.parse(str)
39
43
  when :title
40
44
  @model.title = clean(str)
45
+ when :"msc-class"
46
+ @model.msc_class = str
47
+ when :"report-no"
48
+ @model.report_no = str
49
+ when :"journal-ref"
50
+ @model.journal_ref = str
51
+ when :comments
52
+ @model.comments = clean(str)
41
53
  when :abstract
42
54
  @model.abstract = clean(str)
43
55
  when :categories
56
+ @model.categories = str.split
44
57
  @model.primary_category = str.split[0]
45
58
  @model.crosslists = str.split.drop(1)
59
+ when :doi
60
+ @model.doi = str
61
+ when :proxy
62
+ @model.proxy = str
63
+ when :license
64
+ @model.license = str
46
65
  when :keyname
47
66
  @author.keyname = str
48
67
  when :forenames
49
68
  @author.forenames = str
69
+ when :suffix
70
+ @author.suffix = str
71
+ when :affiliation
72
+ @author.affiliation = str
50
73
  end
51
74
  end
52
75
 
@@ -1,3 +1,3 @@
1
1
  module ArxivSync
2
- VERSION = "0.0.3"
2
+ VERSION = "0.0.4"
3
3
  end
@@ -41,12 +41,8 @@ module ArxivSync
41
41
 
42
42
  if last_token.empty? # Previous sync completed successfully
43
43
  responseDate = Date.parse(last_response.css('responseDate').text)
44
- if responseDate == Date.today
45
- puts "Last responseDate was today. arXiv lacks date granularity beyond the day level; please wait before continuing harvest."
46
- return false
47
- end
48
44
  puts "Downloading from last responseDate: #{responseDate}"
49
- oai_params[:from] ||= responseDate
45
+ oai_params[:from] = responseDate
50
46
  else # Previous sync aborted prematurely, resume
51
47
  puts "Resuming download using previous resumptionToken: #{last_token}"
52
48
  oai_params = { resumptionToken: last_token }
data/test/parser.rb CHANGED
@@ -6,7 +6,7 @@ TEST_ROOT = File.dirname(__FILE__)
6
6
  class TestParser < Minitest::Test
7
7
  def test_parser
8
8
  archive = ArxivSync::XMLArchive.new(File.join(TEST_ROOT, 'fixtures'))
9
- tested_a_paper = false
9
+ tested = 0
10
10
  archive.read_metadata do |papers|
11
11
  assert_equal papers.count, 1000
12
12
  papers.each do |paper|
@@ -14,16 +14,24 @@ class TestParser < Minitest::Test
14
14
  assert_equal paper.created, Date.parse("2013-02-04")
15
15
  assert_equal paper.updated, nil
16
16
  assert_equal paper.title, "Correlation effects in the electronic structure of the Ni-based superconducting KNi2S2"
17
+ assert_equal paper.license, "http://creativecommons.org/licenses/by/3.0/"
17
18
  assert_equal paper.primary_category, "cond-mat.supr-con"
18
19
  assert_equal paper.crosslists, []
19
20
  assert_includes paper.abstract, "using Gutzwiller approximation method."
20
21
  assert_equal paper.authors.map(&:keyname), ["Lu", "Wang", "Xie", "Zhang"]
21
22
  assert_equal paper.authors.map(&:forenames), ["Feng", "Wei-Hua", "Xinjian", "Fu-Chun"]
22
- tested_a_paper = true
23
+ tested += 1
24
+ end
25
+
26
+ if paper.id == "1302.0758"
27
+ assert_equal paper.authors[0].affiliation, "Baylor University, Waco, TX, USA"
28
+ assert_equal paper.comments, "8 pages, 4 figures; presented by BFLW at ICHEP 2012"
29
+ assert_equal paper.report_no, "BU-HEPP-12-05"
30
+ tested += 1
23
31
  end
24
32
  end
25
33
  end
26
34
 
27
- assert tested_a_paper
35
+ assert_equal tested, 2
28
36
  end
29
37
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: arxivsync
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-06-11 00:00:00.000000000 Z
12
+ date: 2013-06-18 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler