paper_metadata 0.0.4 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f404b9b484329f732c2b71bd2e4abdf84ca97cee
4
- data.tar.gz: 8f4b35221f91a12d49a445028462d9561ea8424a
3
+ metadata.gz: 70781715c1398e4f11015d9e03ccec207db08c3a
4
+ data.tar.gz: c8338bcad90badd674fa2d227710dabb669554a9
5
5
  SHA512:
6
- metadata.gz: 9baba7b45129a21cf0ad08a6db0469d0f395bd848a9e619bce67d77cfb9d9ab2f08423d5444ad4a87ef7c44d5b9d4b3cccc4a3b124bbac98d157f50a66fb4d26
7
- data.tar.gz: bd61fc685c0419d584040a7fe01b13d5630769378b72ce48f8c369982851cd02def110229c245c5601b38ae3ea8b2da37ad71ef5da5a12b07fbebd9a8f9ab113
6
+ metadata.gz: 6bf5144e34d59eca67c71d8a8a59959afc9ff322cee8d9371f37d5ab893837c52f9f90556575f431633bef3670335f42d85ad7814a6e5e6f7cb88fadd3233be9
7
+ data.tar.gz: 9f3a9c84e07ed5c0f5666b8649abd87e4cf626ff2825bd03649cec9a1d94835f2f513174a0046ec761939ce7774910a43a7160b4949c92caaf1e846e08c0109a
@@ -4,6 +4,7 @@ require 'net/http'
4
4
  require 'open-uri'
5
5
  require 'cgi'
6
6
  require 'uri'
7
+ require 'json'
7
8
 
8
9
  module PaperMetadata
9
10
  class << self
@@ -17,6 +18,8 @@ module PaperMetadata
17
18
  def metadata_for(identifier)
18
19
  if identifier =~ /^arxiv\:(.*)/i
19
20
  metadata_for_arxiv($1.strip)
21
+ elsif identifier =~ /^doi\:\s*(10\.6084\/.*)/i
22
+ metadata_for_doi_from_figshare($1.strip)
20
23
  elsif identifier =~ /^doi\:(.*)/i
21
24
  metadata_for_doi($1.strip)
22
25
  end
@@ -45,11 +48,44 @@ module PaperMetadata
45
48
  doc.xpath("//journal_issue/publication_date/year").first.inner_html
46
49
  paper[:resource] = doc.xpath("//journal_article/doi_data/resource").inner_html
47
50
  else
48
- paper[:status] = :NODOI
51
+ paper = metadata_for_doi_from_datacite(doi)
49
52
  end
50
53
  paper
51
54
  end
52
55
 
56
+ def metadata_for_doi_from_datacite(doi)
57
+ paper = {}
58
+ response = JSON.parse(open("http://search.datacite.org/api?q=#{CGI.escape(doi)}&fl=doi,creator,title,publisher,publicationYear,datacentre&wt=json").read)
59
+ if response && !response['response']['docs'].empty? && response['response']['docs'].first['title']
60
+ result = response['response']['docs'].first
61
+ paper[:title] = result['title'].first
62
+ paper[:authors] = result['creator'].map{|c| c.split(', ').reverse.join(' ')}.join(', ')
63
+ paper[:published] = result['publicationYear']
64
+ paper[:publisher] = result['publisher']
65
+ paper[:datacentre] = result['datacentre']
66
+ paper[:journal] = 'DataCite'
67
+ paper
68
+ else
69
+ {status: :NODOI}
70
+ end
71
+ end
72
+
73
+ def metadata_for_doi_from_figshare(doi)
74
+ paper = {}
75
+ response = JSON.parse(open("http://api.figshare.com/articles/#{doi}").read)
76
+
77
+ if response && response['items'] && response['items'].first
78
+ result = response['items'].first
79
+ paper[:title] = result['title']
80
+ paper[:authors] = result['authors'].map{|a| a['full_name']}.join(', ')
81
+ paper[:published] = result['published_date']
82
+ paper[:journal] = 'FigShare'
83
+ paper
84
+ else
85
+ {status: :NODOI}
86
+ end
87
+ end
88
+
53
89
  def metadata_for_arxiv(identifier)
54
90
  identifier.gsub!(/^arXiv\:/i, '')
55
91
  url = URI.parse("http://export.arxiv.org/api/query?search_query=#{CGI.escape(identifier)}&start=0&max_results=1")
@@ -59,11 +95,12 @@ module PaperMetadata
59
95
  paper = Hash.new
60
96
  if entry = doc.xpath("//entry").first
61
97
  paper[:title] = entry.xpath('title').text
62
- paper[:author] = entry.xpath('author').text.strip
98
+ paper[:authors] = entry.xpath('author').text.split("\n").map{|a| a.strip if a.strip != ""}.compact.join(', ')
63
99
  paper[:id] = entry.xpath('id').text
64
100
  paper[:updated] = entry.xpath('updated').text
65
101
  paper[:summary] = entry.xpath('summary').text
66
102
  paper[:published] = entry.xpath('published').text
103
+ paper[:journal] = 'arXiv'
67
104
  end
68
105
  paper
69
106
  end
@@ -1,3 +1,3 @@
1
1
  module PaperMetadata
2
- VERSION = "0.0.4"
2
+ VERSION = "0.0.5"
3
3
  end
@@ -9,33 +9,62 @@ class PaperMetadataTest < Test::Unit::TestCase
9
9
  stub_request(:any, /www.crossref.org\/.*/).
10
10
  to_return(:body => doi_response, :status => 200, :headers => { 'Content-Length' => doi_response.length } )
11
11
 
12
- PaperMetadata.doi_username = 'test@example.com'
13
-
14
12
  assert_equal "Basic Modeling Approach To Optimize Elemental Imaging by Laser Ablation ICPMS",
15
13
  PaperMetadata.metadata_for('doi:10.1021/ac1014832')[:title]
16
14
  end
17
15
 
18
16
  def test_doi_parsing_live
19
17
  WebMock.allow_net_connect!
20
- doi_response = File.read(File.join(File.dirname(__FILE__), 'doi.xml'))
21
- PaperMetadata.doi_username = 'test@crossref.org'
22
18
  assert_equal "Basic Modeling Approach To Optimize Elemental Imaging by Laser Ablation ICPMS",
23
19
  PaperMetadata.metadata_for('doi:10.1021/ac1014832')[:title]
24
20
  WebMock.disable_net_connect!
25
21
  end
26
22
 
23
+ def test_datacite_doi_parsing_live
24
+ WebMock.allow_net_connect!
25
+
26
+ metadata = PaperMetadata.metadata_for('doi:10.6092/issn.1973-9494/3396')
27
+ assert_equal "AlmaDL Journals: Quality Services for Open Access Scientific Publications at the University of Bologna",
28
+ metadata[:title]
29
+
30
+ assert_equal "Marialaura Vignocchi, Roberta Lauriola, Andrea Zanni, Antonio Puglisi, Raffaele Messuti",
31
+ metadata[:authors]
32
+
33
+ WebMock.disable_net_connect!
34
+ end
35
+
36
+ def test_figshare_doi_parsing_live
37
+ WebMock.allow_net_connect!
38
+
39
+ metadata = PaperMetadata.metadata_for('doi:10.6084/m9.figshare.736442')
40
+ assert_equal "XML4NGS : A XML-based description of a Next-Generation sequencing project allowing the generation of a ’Makefile’-driven workflow.",
41
+ metadata[:title]
42
+
43
+ assert_equal "Pierre Lindenbaum, Raluca Teusan, Richard Redon, Audrey Bihouée, Solena LeScouarnec",
44
+ metadata[:authors]
45
+
46
+ WebMock.disable_net_connect!
47
+
48
+ end
49
+
27
50
  def test_arxiv_parsing
28
51
  arxiv_response = File.read(File.join(File.dirname(__FILE__), 'arxiv.xml'))
29
52
  stub_request(:any, /.*arxiv.org\/.*/).
30
53
  to_return(:body => arxiv_response, :status => 200, :headers => { 'Content-Length' => arxiv_response.length } )
31
54
  assert_equal "Thomas Vojta",
32
- PaperMetadata.metadata_for('arXiv:1301.7746')[:author]
55
+ PaperMetadata.metadata_for('arXiv:1301.7746')[:authors]
33
56
  end
34
57
 
35
58
  def test_arxiv_parsing_live
36
59
  WebMock.allow_net_connect!
37
60
  assert_equal "Thomas Vojta",
38
- PaperMetadata.metadata_for('arXiv:1301.7746')[:author]
61
+ PaperMetadata.metadata_for('arXiv:1301.7746')[:authors]
62
+ WebMock.disable_net_connect!
63
+ end
64
+
65
+ def test_nonexistent_doi
66
+ WebMock.allow_net_connect!
67
+ assert_equal({status: :NODOI}, PaperMetadata.metadata_for('doi:thisdoi/doesntexist'))
39
68
  WebMock.disable_net_connect!
40
69
  end
41
70
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: paper_metadata
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jure Triglav
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-07-07 00:00:00.000000000 Z
11
+ date: 2013-08-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: pry