arxivsync 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +1 -1
- data/lib/arxivsync/downloader.rb +6 -0
- data/lib/arxivsync/parser.rb +25 -2
- data/lib/arxivsync/version.rb +1 -1
- data/lib/arxivsync/xmlarchive.rb +1 -5
- data/test/parser.rb +11 -3
- metadata +2 -2
data/README.md
CHANGED
data/lib/arxivsync/downloader.rb
CHANGED
@@ -2,6 +2,12 @@ module ArxivSync
|
|
2
2
|
class Downloader
|
3
3
|
def initialize(initial_params={})
|
4
4
|
@initial_params = initial_params
|
5
|
+
|
6
|
+
if @initial_params[:from] == Date.today
|
7
|
+
puts "Last responseDate was today. arXiv lacks date granularity beyond the day level; please wait before continuing harvest."
|
8
|
+
return false
|
9
|
+
end
|
10
|
+
|
5
11
|
unless @initial_params[:resumptionToken]
|
6
12
|
@initial_params[:metadataPrefix] ||= 'arXiv'
|
7
13
|
end
|
data/lib/arxivsync/parser.rb
CHANGED
@@ -1,10 +1,14 @@
|
|
1
1
|
module ArxivSync
|
2
|
+
# Layout reference: http://www.xmlns.me/?op=visualize&id=643
|
2
3
|
Author = Struct.new(
|
3
|
-
:keyname, :forenames
|
4
|
+
:keyname, :forenames, :suffix, :affiliation
|
4
5
|
)
|
5
6
|
|
6
7
|
Paper = Struct.new(
|
7
|
-
:id, :created, :updated, :
|
8
|
+
:id, :created, :updated, :authors, :title,
|
9
|
+
:msc_class, :report_no, :journal_ref, :comments,
|
10
|
+
:abstract, :categories, :doi, :proxy, :license,
|
11
|
+
|
8
12
|
:primary_category, :crosslists
|
9
13
|
)
|
10
14
|
|
@@ -38,15 +42,34 @@ module ArxivSync
|
|
38
42
|
@model.updated = Date.parse(str)
|
39
43
|
when :title
|
40
44
|
@model.title = clean(str)
|
45
|
+
when :"msc-class"
|
46
|
+
@model.msc_class = str
|
47
|
+
when :"report-no"
|
48
|
+
@model.report_no = str
|
49
|
+
when :"journal-ref"
|
50
|
+
@model.journal_ref = str
|
51
|
+
when :comments
|
52
|
+
@model.comments = clean(str)
|
41
53
|
when :abstract
|
42
54
|
@model.abstract = clean(str)
|
43
55
|
when :categories
|
56
|
+
@model.categories = str.split
|
44
57
|
@model.primary_category = str.split[0]
|
45
58
|
@model.crosslists = str.split.drop(1)
|
59
|
+
when :doi
|
60
|
+
@model.doi = str
|
61
|
+
when :proxy
|
62
|
+
@model.proxy = str
|
63
|
+
when :license
|
64
|
+
@model.license = str
|
46
65
|
when :keyname
|
47
66
|
@author.keyname = str
|
48
67
|
when :forenames
|
49
68
|
@author.forenames = str
|
69
|
+
when :suffix
|
70
|
+
@author.suffix = str
|
71
|
+
when :affiliation
|
72
|
+
@author.affiliation = str
|
50
73
|
end
|
51
74
|
end
|
52
75
|
|
data/lib/arxivsync/version.rb
CHANGED
data/lib/arxivsync/xmlarchive.rb
CHANGED
@@ -41,12 +41,8 @@ module ArxivSync
|
|
41
41
|
|
42
42
|
if last_token.empty? # Previous sync completed successfully
|
43
43
|
responseDate = Date.parse(last_response.css('responseDate').text)
|
44
|
-
if responseDate == Date.today
|
45
|
-
puts "Last responseDate was today. arXiv lacks date granularity beyond the day level; please wait before continuing harvest."
|
46
|
-
return false
|
47
|
-
end
|
48
44
|
puts "Downloading from last responseDate: #{responseDate}"
|
49
|
-
oai_params[:from]
|
45
|
+
oai_params[:from] = responseDate
|
50
46
|
else # Previous sync aborted prematurely, resume
|
51
47
|
puts "Resuming download using previous resumptionToken: #{last_token}"
|
52
48
|
oai_params = { resumptionToken: last_token }
|
data/test/parser.rb
CHANGED
@@ -6,7 +6,7 @@ TEST_ROOT = File.dirname(__FILE__)
|
|
6
6
|
class TestParser < Minitest::Test
|
7
7
|
def test_parser
|
8
8
|
archive = ArxivSync::XMLArchive.new(File.join(TEST_ROOT, 'fixtures'))
|
9
|
-
|
9
|
+
tested = 0
|
10
10
|
archive.read_metadata do |papers|
|
11
11
|
assert_equal papers.count, 1000
|
12
12
|
papers.each do |paper|
|
@@ -14,16 +14,24 @@ class TestParser < Minitest::Test
|
|
14
14
|
assert_equal paper.created, Date.parse("2013-02-04")
|
15
15
|
assert_equal paper.updated, nil
|
16
16
|
assert_equal paper.title, "Correlation effects in the electronic structure of the Ni-based superconducting KNi2S2"
|
17
|
+
assert_equal paper.license, "http://creativecommons.org/licenses/by/3.0/"
|
17
18
|
assert_equal paper.primary_category, "cond-mat.supr-con"
|
18
19
|
assert_equal paper.crosslists, []
|
19
20
|
assert_includes paper.abstract, "using Gutzwiller approximation method."
|
20
21
|
assert_equal paper.authors.map(&:keyname), ["Lu", "Wang", "Xie", "Zhang"]
|
21
22
|
assert_equal paper.authors.map(&:forenames), ["Feng", "Wei-Hua", "Xinjian", "Fu-Chun"]
|
22
|
-
|
23
|
+
tested += 1
|
24
|
+
end
|
25
|
+
|
26
|
+
if paper.id == "1302.0758"
|
27
|
+
assert_equal paper.authors[0].affiliation, "Baylor University, Waco, TX, USA"
|
28
|
+
assert_equal paper.comments, "8 pages, 4 figures; presented by BFLW at ICHEP 2012"
|
29
|
+
assert_equal paper.report_no, "BU-HEPP-12-05"
|
30
|
+
tested += 1
|
23
31
|
end
|
24
32
|
end
|
25
33
|
end
|
26
34
|
|
27
|
-
|
35
|
+
assert_equal tested, 2
|
28
36
|
end
|
29
37
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: arxivsync
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-06-
|
12
|
+
date: 2013-06-18 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|