paper_metadata 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f404b9b484329f732c2b71bd2e4abdf84ca97cee
4
- data.tar.gz: 8f4b35221f91a12d49a445028462d9561ea8424a
3
+ metadata.gz: 70781715c1398e4f11015d9e03ccec207db08c3a
4
+ data.tar.gz: c8338bcad90badd674fa2d227710dabb669554a9
5
5
  SHA512:
6
- metadata.gz: 9baba7b45129a21cf0ad08a6db0469d0f395bd848a9e619bce67d77cfb9d9ab2f08423d5444ad4a87ef7c44d5b9d4b3cccc4a3b124bbac98d157f50a66fb4d26
7
- data.tar.gz: bd61fc685c0419d584040a7fe01b13d5630769378b72ce48f8c369982851cd02def110229c245c5601b38ae3ea8b2da37ad71ef5da5a12b07fbebd9a8f9ab113
6
+ metadata.gz: 6bf5144e34d59eca67c71d8a8a59959afc9ff322cee8d9371f37d5ab893837c52f9f90556575f431633bef3670335f42d85ad7814a6e5e6f7cb88fadd3233be9
7
+ data.tar.gz: 9f3a9c84e07ed5c0f5666b8649abd87e4cf626ff2825bd03649cec9a1d94835f2f513174a0046ec761939ce7774910a43a7160b4949c92caaf1e846e08c0109a
@@ -4,6 +4,7 @@ require 'net/http'
4
4
  require 'open-uri'
5
5
  require 'cgi'
6
6
  require 'uri'
7
+ require 'json'
7
8
 
8
9
  module PaperMetadata
9
10
  class << self
@@ -17,6 +18,8 @@ module PaperMetadata
17
18
  def metadata_for(identifier)
18
19
  if identifier =~ /^arxiv\:(.*)/i
19
20
  metadata_for_arxiv($1.strip)
21
+ elsif identifier =~ /^doi\:\s*(10\.6084\/.*)/i
22
+ metadata_for_doi_from_figshare($1.strip)
20
23
  elsif identifier =~ /^doi\:(.*)/i
21
24
  metadata_for_doi($1.strip)
22
25
  end
@@ -45,11 +48,44 @@ module PaperMetadata
45
48
  doc.xpath("//journal_issue/publication_date/year").first.inner_html
46
49
  paper[:resource] = doc.xpath("//journal_article/doi_data/resource").inner_html
47
50
  else
48
- paper[:status] = :NODOI
51
+ paper = metadata_for_doi_from_datacite(doi)
49
52
  end
50
53
  paper
51
54
  end
52
55
 
56
+ def metadata_for_doi_from_datacite(doi)
57
+ paper = {}
58
+ response = JSON.parse(open("http://search.datacite.org/api?q=#{CGI.escape(doi)}&fl=doi,creator,title,publisher,publicationYear,datacentre&wt=json").read)
59
+ if response && !response['response']['docs'].empty? && response['response']['docs'].first['title']
60
+ result = response['response']['docs'].first
61
+ paper[:title] = result['title'].first
62
+ paper[:authors] = result['creator'].map{|c| c.split(', ').reverse.join(' ')}.join(', ')
63
+ paper[:published] = result['publicationYear']
64
+ paper[:publisher] = result['publisher']
65
+ paper[:datacentre] = result['datacentre']
66
+ paper[:journal] = 'DataCite'
67
+ paper
68
+ else
69
+ {status: :NODOI}
70
+ end
71
+ end
72
+
73
+ def metadata_for_doi_from_figshare(doi)
74
+ paper = {}
75
+ response = JSON.parse(open("http://api.figshare.com/articles/#{doi}").read)
76
+
77
+ if response && response['items'] && response['items'].first
78
+ result = response['items'].first
79
+ paper[:title] = result['title']
80
+ paper[:authors] = result['authors'].map{|a| a['full_name']}.join(', ')
81
+ paper[:published] = result['published_date']
82
+ paper[:journal] = 'FigShare'
83
+ paper
84
+ else
85
+ {status: :NODOI}
86
+ end
87
+ end
88
+
53
89
  def metadata_for_arxiv(identifier)
54
90
  identifier.gsub!(/^arXiv\:/i, '')
55
91
  url = URI.parse("http://export.arxiv.org/api/query?search_query=#{CGI.escape(identifier)}&start=0&max_results=1")
@@ -59,11 +95,12 @@ module PaperMetadata
59
95
  paper = Hash.new
60
96
  if entry = doc.xpath("//entry").first
61
97
  paper[:title] = entry.xpath('title').text
62
- paper[:author] = entry.xpath('author').text.strip
98
+ paper[:authors] = entry.xpath('author').text.split("\n").map{|a| a.strip if a.strip != ""}.compact.join(', ')
63
99
  paper[:id] = entry.xpath('id').text
64
100
  paper[:updated] = entry.xpath('updated').text
65
101
  paper[:summary] = entry.xpath('summary').text
66
102
  paper[:published] = entry.xpath('published').text
103
+ paper[:journal] = 'arXiv'
67
104
  end
68
105
  paper
69
106
  end
@@ -1,3 +1,3 @@
1
1
  module PaperMetadata
2
- VERSION = "0.0.4"
2
+ VERSION = "0.0.5"
3
3
  end
@@ -9,33 +9,62 @@ class PaperMetadataTest < Test::Unit::TestCase
9
9
  stub_request(:any, /www.crossref.org\/.*/).
10
10
  to_return(:body => doi_response, :status => 200, :headers => { 'Content-Length' => doi_response.length } )
11
11
 
12
- PaperMetadata.doi_username = 'test@example.com'
13
-
14
12
  assert_equal "Basic Modeling Approach To Optimize Elemental Imaging by Laser Ablation ICPMS",
15
13
  PaperMetadata.metadata_for('doi:10.1021/ac1014832')[:title]
16
14
  end
17
15
 
18
16
  def test_doi_parsing_live
19
17
  WebMock.allow_net_connect!
20
- doi_response = File.read(File.join(File.dirname(__FILE__), 'doi.xml'))
21
- PaperMetadata.doi_username = 'test@crossref.org'
22
18
  assert_equal "Basic Modeling Approach To Optimize Elemental Imaging by Laser Ablation ICPMS",
23
19
  PaperMetadata.metadata_for('doi:10.1021/ac1014832')[:title]
24
20
  WebMock.disable_net_connect!
25
21
  end
26
22
 
23
+ def test_datacite_doi_parsing_live
24
+ WebMock.allow_net_connect!
25
+
26
+ metadata = PaperMetadata.metadata_for('doi:10.6092/issn.1973-9494/3396')
27
+ assert_equal "AlmaDL Journals: Quality Services for Open Access Scientific Publications at the University of Bologna",
28
+ metadata[:title]
29
+
30
+ assert_equal "Marialaura Vignocchi, Roberta Lauriola, Andrea Zanni, Antonio Puglisi, Raffaele Messuti",
31
+ metadata[:authors]
32
+
33
+ WebMock.disable_net_connect!
34
+ end
35
+
36
+ def test_figshare_doi_parsing_live
37
+ WebMock.allow_net_connect!
38
+
39
+ metadata = PaperMetadata.metadata_for('doi:10.6084/m9.figshare.736442')
40
+ assert_equal "XML4NGS : A XML-based description of a Next-Generation sequencing project allowing the generation of a ’Makefile’-driven workflow.",
41
+ metadata[:title]
42
+
43
+ assert_equal "Pierre Lindenbaum, Raluca Teusan, Richard Redon, Audrey Bihouée, Solena LeScouarnec",
44
+ metadata[:authors]
45
+
46
+ WebMock.disable_net_connect!
47
+
48
+ end
49
+
27
50
  def test_arxiv_parsing
28
51
  arxiv_response = File.read(File.join(File.dirname(__FILE__), 'arxiv.xml'))
29
52
  stub_request(:any, /.*arxiv.org\/.*/).
30
53
  to_return(:body => arxiv_response, :status => 200, :headers => { 'Content-Length' => arxiv_response.length } )
31
54
  assert_equal "Thomas Vojta",
32
- PaperMetadata.metadata_for('arXiv:1301.7746')[:author]
55
+ PaperMetadata.metadata_for('arXiv:1301.7746')[:authors]
33
56
  end
34
57
 
35
58
  def test_arxiv_parsing_live
36
59
  WebMock.allow_net_connect!
37
60
  assert_equal "Thomas Vojta",
38
- PaperMetadata.metadata_for('arXiv:1301.7746')[:author]
61
+ PaperMetadata.metadata_for('arXiv:1301.7746')[:authors]
62
+ WebMock.disable_net_connect!
63
+ end
64
+
65
+ def test_nonexistent_doi
66
+ WebMock.allow_net_connect!
67
+ assert_equal({status: :NODOI}, PaperMetadata.metadata_for('doi:thisdoi/doesntexist'))
39
68
  WebMock.disable_net_connect!
40
69
  end
41
70
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: paper_metadata
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jure Triglav
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-07-07 00:00:00.000000000 Z
11
+ date: 2013-08-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: pry