paper_metadata 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/paper_metadata.rb +39 -2
- data/lib/paper_metadata/version.rb +1 -1
- data/test/test_paper_metadata.rb +35 -6
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 70781715c1398e4f11015d9e03ccec207db08c3a
|
4
|
+
data.tar.gz: c8338bcad90badd674fa2d227710dabb669554a9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6bf5144e34d59eca67c71d8a8a59959afc9ff322cee8d9371f37d5ab893837c52f9f90556575f431633bef3670335f42d85ad7814a6e5e6f7cb88fadd3233be9
|
7
|
+
data.tar.gz: 9f3a9c84e07ed5c0f5666b8649abd87e4cf626ff2825bd03649cec9a1d94835f2f513174a0046ec761939ce7774910a43a7160b4949c92caaf1e846e08c0109a
|
data/lib/paper_metadata.rb
CHANGED
@@ -4,6 +4,7 @@ require 'net/http'
|
|
4
4
|
require 'open-uri'
|
5
5
|
require 'cgi'
|
6
6
|
require 'uri'
|
7
|
+
require 'json'
|
7
8
|
|
8
9
|
module PaperMetadata
|
9
10
|
class << self
|
@@ -17,6 +18,8 @@ module PaperMetadata
|
|
17
18
|
def metadata_for(identifier)
|
18
19
|
if identifier =~ /^arxiv\:(.*)/i
|
19
20
|
metadata_for_arxiv($1.strip)
|
21
|
+
elsif identifier =~ /^doi\:\s*(10\.6084\/.*)/i
|
22
|
+
metadata_for_doi_from_figshare($1.strip)
|
20
23
|
elsif identifier =~ /^doi\:(.*)/i
|
21
24
|
metadata_for_doi($1.strip)
|
22
25
|
end
|
@@ -45,11 +48,44 @@ module PaperMetadata
|
|
45
48
|
doc.xpath("//journal_issue/publication_date/year").first.inner_html
|
46
49
|
paper[:resource] = doc.xpath("//journal_article/doi_data/resource").inner_html
|
47
50
|
else
|
48
|
-
paper
|
51
|
+
paper = metadata_for_doi_from_datacite(doi)
|
49
52
|
end
|
50
53
|
paper
|
51
54
|
end
|
52
55
|
|
56
|
+
def metadata_for_doi_from_datacite(doi)
|
57
|
+
paper = {}
|
58
|
+
response = JSON.parse(open("http://search.datacite.org/api?q=#{CGI.escape(doi)}&fl=doi,creator,title,publisher,publicationYear,datacentre&wt=json").read)
|
59
|
+
if response && !response['response']['docs'].empty? && response['response']['docs'].first['title']
|
60
|
+
result = response['response']['docs'].first
|
61
|
+
paper[:title] = result['title'].first
|
62
|
+
paper[:authors] = result['creator'].map{|c| c.split(', ').reverse.join(' ')}.join(', ')
|
63
|
+
paper[:published] = result['publicationYear']
|
64
|
+
paper[:publisher] = result['publisher']
|
65
|
+
paper[:datacentre] = result['datacentre']
|
66
|
+
paper[:journal] = 'DataCite'
|
67
|
+
paper
|
68
|
+
else
|
69
|
+
{status: :NODOI}
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
def metadata_for_doi_from_figshare(doi)
|
74
|
+
paper = {}
|
75
|
+
response = JSON.parse(open("http://api.figshare.com/articles/#{doi}").read)
|
76
|
+
|
77
|
+
if response && response['items'] && response['items'].first
|
78
|
+
result = response['items'].first
|
79
|
+
paper[:title] = result['title']
|
80
|
+
paper[:authors] = result['authors'].map{|a| a['full_name']}.join(', ')
|
81
|
+
paper[:published] = result['published_date']
|
82
|
+
paper[:journal] = 'FigShare'
|
83
|
+
paper
|
84
|
+
else
|
85
|
+
{status: :NODOI}
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
53
89
|
def metadata_for_arxiv(identifier)
|
54
90
|
identifier.gsub!(/^arXiv\:/i, '')
|
55
91
|
url = URI.parse("http://export.arxiv.org/api/query?search_query=#{CGI.escape(identifier)}&start=0&max_results=1")
|
@@ -59,11 +95,12 @@ module PaperMetadata
|
|
59
95
|
paper = Hash.new
|
60
96
|
if entry = doc.xpath("//entry").first
|
61
97
|
paper[:title] = entry.xpath('title').text
|
62
|
-
paper[:
|
98
|
+
paper[:authors] = entry.xpath('author').text.split("\n").map{|a| a.strip if a.strip != ""}.compact.join(', ')
|
63
99
|
paper[:id] = entry.xpath('id').text
|
64
100
|
paper[:updated] = entry.xpath('updated').text
|
65
101
|
paper[:summary] = entry.xpath('summary').text
|
66
102
|
paper[:published] = entry.xpath('published').text
|
103
|
+
paper[:journal] = 'arXiv'
|
67
104
|
end
|
68
105
|
paper
|
69
106
|
end
|
data/test/test_paper_metadata.rb
CHANGED
@@ -9,33 +9,62 @@ class PaperMetadataTest < Test::Unit::TestCase
|
|
9
9
|
stub_request(:any, /www.crossref.org\/.*/).
|
10
10
|
to_return(:body => doi_response, :status => 200, :headers => { 'Content-Length' => doi_response.length } )
|
11
11
|
|
12
|
-
PaperMetadata.doi_username = 'test@example.com'
|
13
|
-
|
14
12
|
assert_equal "Basic Modeling Approach To Optimize Elemental Imaging by Laser Ablation ICPMS",
|
15
13
|
PaperMetadata.metadata_for('doi:10.1021/ac1014832')[:title]
|
16
14
|
end
|
17
15
|
|
18
16
|
def test_doi_parsing_live
|
19
17
|
WebMock.allow_net_connect!
|
20
|
-
doi_response = File.read(File.join(File.dirname(__FILE__), 'doi.xml'))
|
21
|
-
PaperMetadata.doi_username = 'test@crossref.org'
|
22
18
|
assert_equal "Basic Modeling Approach To Optimize Elemental Imaging by Laser Ablation ICPMS",
|
23
19
|
PaperMetadata.metadata_for('doi:10.1021/ac1014832')[:title]
|
24
20
|
WebMock.disable_net_connect!
|
25
21
|
end
|
26
22
|
|
23
|
+
def test_datacite_doi_parsing_live
|
24
|
+
WebMock.allow_net_connect!
|
25
|
+
|
26
|
+
metadata = PaperMetadata.metadata_for('doi:10.6092/issn.1973-9494/3396')
|
27
|
+
assert_equal "AlmaDL Journals: Quality Services for Open Access Scientific Publications at the University of Bologna",
|
28
|
+
metadata[:title]
|
29
|
+
|
30
|
+
assert_equal "Marialaura Vignocchi, Roberta Lauriola, Andrea Zanni, Antonio Puglisi, Raffaele Messuti",
|
31
|
+
metadata[:authors]
|
32
|
+
|
33
|
+
WebMock.disable_net_connect!
|
34
|
+
end
|
35
|
+
|
36
|
+
def test_figshare_doi_parsing_live
|
37
|
+
WebMock.allow_net_connect!
|
38
|
+
|
39
|
+
metadata = PaperMetadata.metadata_for('doi:10.6084/m9.figshare.736442')
|
40
|
+
assert_equal "XML4NGS : A XML-based description of a Next-Generation sequencing project allowing the generation of a ’Makefile’-driven workflow.",
|
41
|
+
metadata[:title]
|
42
|
+
|
43
|
+
assert_equal "Pierre Lindenbaum, Raluca Teusan, Richard Redon, Audrey Bihouée, Solena LeScouarnec",
|
44
|
+
metadata[:authors]
|
45
|
+
|
46
|
+
WebMock.disable_net_connect!
|
47
|
+
|
48
|
+
end
|
49
|
+
|
27
50
|
def test_arxiv_parsing
|
28
51
|
arxiv_response = File.read(File.join(File.dirname(__FILE__), 'arxiv.xml'))
|
29
52
|
stub_request(:any, /.*arxiv.org\/.*/).
|
30
53
|
to_return(:body => arxiv_response, :status => 200, :headers => { 'Content-Length' => arxiv_response.length } )
|
31
54
|
assert_equal "Thomas Vojta",
|
32
|
-
PaperMetadata.metadata_for('arXiv:1301.7746')[:
|
55
|
+
PaperMetadata.metadata_for('arXiv:1301.7746')[:authors]
|
33
56
|
end
|
34
57
|
|
35
58
|
def test_arxiv_parsing_live
|
36
59
|
WebMock.allow_net_connect!
|
37
60
|
assert_equal "Thomas Vojta",
|
38
|
-
PaperMetadata.metadata_for('arXiv:1301.7746')[:
|
61
|
+
PaperMetadata.metadata_for('arXiv:1301.7746')[:authors]
|
62
|
+
WebMock.disable_net_connect!
|
63
|
+
end
|
64
|
+
|
65
|
+
def test_nonexistent_doi
|
66
|
+
WebMock.allow_net_connect!
|
67
|
+
assert_equal({status: :NODOI}, PaperMetadata.metadata_for('doi:thisdoi/doesntexist'))
|
39
68
|
WebMock.disable_net_connect!
|
40
69
|
end
|
41
70
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: paper_metadata
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jure Triglav
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-
|
11
|
+
date: 2013-08-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: pry
|