paper_metadata 0.0.4 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/paper_metadata.rb +39 -2
- data/lib/paper_metadata/version.rb +1 -1
- data/test/test_paper_metadata.rb +35 -6
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 70781715c1398e4f11015d9e03ccec207db08c3a
|
4
|
+
data.tar.gz: c8338bcad90badd674fa2d227710dabb669554a9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6bf5144e34d59eca67c71d8a8a59959afc9ff322cee8d9371f37d5ab893837c52f9f90556575f431633bef3670335f42d85ad7814a6e5e6f7cb88fadd3233be9
|
7
|
+
data.tar.gz: 9f3a9c84e07ed5c0f5666b8649abd87e4cf626ff2825bd03649cec9a1d94835f2f513174a0046ec761939ce7774910a43a7160b4949c92caaf1e846e08c0109a
|
data/lib/paper_metadata.rb
CHANGED
@@ -4,6 +4,7 @@ require 'net/http'
|
|
4
4
|
require 'open-uri'
|
5
5
|
require 'cgi'
|
6
6
|
require 'uri'
|
7
|
+
require 'json'
|
7
8
|
|
8
9
|
module PaperMetadata
|
9
10
|
class << self
|
@@ -17,6 +18,8 @@ module PaperMetadata
|
|
17
18
|
def metadata_for(identifier)
|
18
19
|
if identifier =~ /^arxiv\:(.*)/i
|
19
20
|
metadata_for_arxiv($1.strip)
|
21
|
+
elsif identifier =~ /^doi\:\s*(10\.6084\/.*)/i
|
22
|
+
metadata_for_doi_from_figshare($1.strip)
|
20
23
|
elsif identifier =~ /^doi\:(.*)/i
|
21
24
|
metadata_for_doi($1.strip)
|
22
25
|
end
|
@@ -45,11 +48,44 @@ module PaperMetadata
|
|
45
48
|
doc.xpath("//journal_issue/publication_date/year").first.inner_html
|
46
49
|
paper[:resource] = doc.xpath("//journal_article/doi_data/resource").inner_html
|
47
50
|
else
|
48
|
-
paper
|
51
|
+
paper = metadata_for_doi_from_datacite(doi)
|
49
52
|
end
|
50
53
|
paper
|
51
54
|
end
|
52
55
|
|
56
|
+
def metadata_for_doi_from_datacite(doi)
|
57
|
+
paper = {}
|
58
|
+
response = JSON.parse(open("http://search.datacite.org/api?q=#{CGI.escape(doi)}&fl=doi,creator,title,publisher,publicationYear,datacentre&wt=json").read)
|
59
|
+
if response && !response['response']['docs'].empty? && response['response']['docs'].first['title']
|
60
|
+
result = response['response']['docs'].first
|
61
|
+
paper[:title] = result['title'].first
|
62
|
+
paper[:authors] = result['creator'].map{|c| c.split(', ').reverse.join(' ')}.join(', ')
|
63
|
+
paper[:published] = result['publicationYear']
|
64
|
+
paper[:publisher] = result['publisher']
|
65
|
+
paper[:datacentre] = result['datacentre']
|
66
|
+
paper[:journal] = 'DataCite'
|
67
|
+
paper
|
68
|
+
else
|
69
|
+
{status: :NODOI}
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
def metadata_for_doi_from_figshare(doi)
|
74
|
+
paper = {}
|
75
|
+
response = JSON.parse(open("http://api.figshare.com/articles/#{doi}").read)
|
76
|
+
|
77
|
+
if response && response['items'] && response['items'].first
|
78
|
+
result = response['items'].first
|
79
|
+
paper[:title] = result['title']
|
80
|
+
paper[:authors] = result['authors'].map{|a| a['full_name']}.join(', ')
|
81
|
+
paper[:published] = result['published_date']
|
82
|
+
paper[:journal] = 'FigShare'
|
83
|
+
paper
|
84
|
+
else
|
85
|
+
{status: :NODOI}
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
53
89
|
def metadata_for_arxiv(identifier)
|
54
90
|
identifier.gsub!(/^arXiv\:/i, '')
|
55
91
|
url = URI.parse("http://export.arxiv.org/api/query?search_query=#{CGI.escape(identifier)}&start=0&max_results=1")
|
@@ -59,11 +95,12 @@ module PaperMetadata
|
|
59
95
|
paper = Hash.new
|
60
96
|
if entry = doc.xpath("//entry").first
|
61
97
|
paper[:title] = entry.xpath('title').text
|
62
|
-
paper[:
|
98
|
+
paper[:authors] = entry.xpath('author').text.split("\n").map{|a| a.strip if a.strip != ""}.compact.join(', ')
|
63
99
|
paper[:id] = entry.xpath('id').text
|
64
100
|
paper[:updated] = entry.xpath('updated').text
|
65
101
|
paper[:summary] = entry.xpath('summary').text
|
66
102
|
paper[:published] = entry.xpath('published').text
|
103
|
+
paper[:journal] = 'arXiv'
|
67
104
|
end
|
68
105
|
paper
|
69
106
|
end
|
data/test/test_paper_metadata.rb
CHANGED
@@ -9,33 +9,62 @@ class PaperMetadataTest < Test::Unit::TestCase
|
|
9
9
|
stub_request(:any, /www.crossref.org\/.*/).
|
10
10
|
to_return(:body => doi_response, :status => 200, :headers => { 'Content-Length' => doi_response.length } )
|
11
11
|
|
12
|
-
PaperMetadata.doi_username = 'test@example.com'
|
13
|
-
|
14
12
|
assert_equal "Basic Modeling Approach To Optimize Elemental Imaging by Laser Ablation ICPMS",
|
15
13
|
PaperMetadata.metadata_for('doi:10.1021/ac1014832')[:title]
|
16
14
|
end
|
17
15
|
|
18
16
|
def test_doi_parsing_live
|
19
17
|
WebMock.allow_net_connect!
|
20
|
-
doi_response = File.read(File.join(File.dirname(__FILE__), 'doi.xml'))
|
21
|
-
PaperMetadata.doi_username = 'test@crossref.org'
|
22
18
|
assert_equal "Basic Modeling Approach To Optimize Elemental Imaging by Laser Ablation ICPMS",
|
23
19
|
PaperMetadata.metadata_for('doi:10.1021/ac1014832')[:title]
|
24
20
|
WebMock.disable_net_connect!
|
25
21
|
end
|
26
22
|
|
23
|
+
def test_datacite_doi_parsing_live
|
24
|
+
WebMock.allow_net_connect!
|
25
|
+
|
26
|
+
metadata = PaperMetadata.metadata_for('doi:10.6092/issn.1973-9494/3396')
|
27
|
+
assert_equal "AlmaDL Journals: Quality Services for Open Access Scientific Publications at the University of Bologna",
|
28
|
+
metadata[:title]
|
29
|
+
|
30
|
+
assert_equal "Marialaura Vignocchi, Roberta Lauriola, Andrea Zanni, Antonio Puglisi, Raffaele Messuti",
|
31
|
+
metadata[:authors]
|
32
|
+
|
33
|
+
WebMock.disable_net_connect!
|
34
|
+
end
|
35
|
+
|
36
|
+
def test_figshare_doi_parsing_live
|
37
|
+
WebMock.allow_net_connect!
|
38
|
+
|
39
|
+
metadata = PaperMetadata.metadata_for('doi:10.6084/m9.figshare.736442')
|
40
|
+
assert_equal "XML4NGS : A XML-based description of a Next-Generation sequencing project allowing the generation of a ’Makefile’-driven workflow.",
|
41
|
+
metadata[:title]
|
42
|
+
|
43
|
+
assert_equal "Pierre Lindenbaum, Raluca Teusan, Richard Redon, Audrey Bihouée, Solena LeScouarnec",
|
44
|
+
metadata[:authors]
|
45
|
+
|
46
|
+
WebMock.disable_net_connect!
|
47
|
+
|
48
|
+
end
|
49
|
+
|
27
50
|
def test_arxiv_parsing
|
28
51
|
arxiv_response = File.read(File.join(File.dirname(__FILE__), 'arxiv.xml'))
|
29
52
|
stub_request(:any, /.*arxiv.org\/.*/).
|
30
53
|
to_return(:body => arxiv_response, :status => 200, :headers => { 'Content-Length' => arxiv_response.length } )
|
31
54
|
assert_equal "Thomas Vojta",
|
32
|
-
PaperMetadata.metadata_for('arXiv:1301.7746')[:
|
55
|
+
PaperMetadata.metadata_for('arXiv:1301.7746')[:authors]
|
33
56
|
end
|
34
57
|
|
35
58
|
def test_arxiv_parsing_live
|
36
59
|
WebMock.allow_net_connect!
|
37
60
|
assert_equal "Thomas Vojta",
|
38
|
-
PaperMetadata.metadata_for('arXiv:1301.7746')[:
|
61
|
+
PaperMetadata.metadata_for('arXiv:1301.7746')[:authors]
|
62
|
+
WebMock.disable_net_connect!
|
63
|
+
end
|
64
|
+
|
65
|
+
def test_nonexistent_doi
|
66
|
+
WebMock.allow_net_connect!
|
67
|
+
assert_equal({status: :NODOI}, PaperMetadata.metadata_for('doi:thisdoi/doesntexist'))
|
39
68
|
WebMock.disable_net_connect!
|
40
69
|
end
|
41
70
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: paper_metadata
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jure Triglav
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-
|
11
|
+
date: 2013-08-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: pry
|