pubmed_api 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +15 -1
- data/lib/pubmed_api.rb +42 -14
- data/lib/pubmed_api/parsers.rb +28 -1
- data/lib/pubmed_api/version.rb +1 -1
- data/spec/lib/pubmed_api_spec.rb +17 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 19077723b1f29404e04bc59e2bb97f2c6d516b7c
|
4
|
+
data.tar.gz: 086727eb2181e56a9c95067e8707354addf23cf6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2dc9e9e7caca527a127804c35ca78d391f86c34dbffc120e3552a87fe156326be434a4c22d5f2eed962153fdfa1a0b00885336b80480b7cdaed1b5c7d1accbfe
|
7
|
+
data.tar.gz: d4b68d0317e3f78b70c02bade94a1360ae32c9ffa4a7f7c774723289012e85b8925618ad0ebd2c41442c6249c31fc8875f016f59ba046b7c43c22f0107081399
|
data/README.md
CHANGED
@@ -20,7 +20,21 @@ Or install it yourself as:
|
|
20
20
|
|
21
21
|
## Usage
|
22
22
|
|
23
|
-
|
23
|
+
This is a work in progress. But you can use it
|
24
|
+
|
25
|
+
To search for papers:
|
26
|
+
|
27
|
+
results = PubmedAPI::Interface.search("quantum physics")
|
28
|
+
results.pmids gives you a list of the matching pubmed ids
|
29
|
+
|
30
|
+
To get a paper:
|
31
|
+
|
32
|
+
strucs = PubmedAPI::Interface.fetch_papers([id])
|
33
|
+
paper = struc[0]
|
34
|
+
paper.title = "A paper title"
|
35
|
+
paper.url = "http://alinktofulltext.com"
|
36
|
+
|
37
|
+
Look in the spec for further examples
|
24
38
|
|
25
39
|
## Contributing
|
26
40
|
|
data/lib/pubmed_api.rb
CHANGED
@@ -11,17 +11,18 @@ module PubmedAPI
|
|
11
11
|
|
12
12
|
|
13
13
|
DEFAULT_OPTIONS = {:tool => 'ruby-pubmed-api',
|
14
|
-
:database => 'pubmed', #which database eq pubmed/nlmcatalog
|
14
|
+
:database => 'db=pubmed', #which database eq pubmed/nlmcatalog
|
15
15
|
:verb => 'search', #which API verb to use e.g. search/fetch
|
16
16
|
:email => '',
|
17
|
-
#:reldate => 90, #How far back shall we go in days
|
17
|
+
#:reldate => 90, #How far back shall we go in days
|
18
|
+
:add =>'',
|
18
19
|
:retmax => 100000,
|
19
20
|
:retstart => 0,
|
20
21
|
:load_all_pmids => true }
|
21
22
|
|
22
23
|
|
23
|
-
URI_TEMPLATE = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/e{verb}.fcgi?
|
24
|
-
'&reldate={reldate}&retmax={retmax}&retstart={retstart}&{query}&
|
24
|
+
URI_TEMPLATE = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/e{verb}.fcgi?{database}&tool={tool}&email={email}'+
|
25
|
+
'&reldate={reldate}&retmax={retmax}&retstart={retstart}&{query}&retmode=xml&{add}'
|
25
26
|
|
26
27
|
class << self
|
27
28
|
|
@@ -50,24 +51,44 @@ module PubmedAPI
|
|
50
51
|
end
|
51
52
|
|
52
53
|
def fetch_papers(ids)
|
53
|
-
xml = fetch_records(ids, 'pubmed')
|
54
|
+
xml = fetch_records(ids, {:verb => 'fetch',:database => 'db=pubmed'})
|
54
55
|
parser = XMLParser.new
|
55
|
-
parser.parse_papers(xml)
|
56
|
+
papers = parser.parse_papers(xml)
|
57
|
+
lookup_hash = get_fulltext_links(ids)
|
58
|
+
|
59
|
+
papers.each do |p|
|
60
|
+
if p.nil?
|
61
|
+
next
|
62
|
+
else
|
63
|
+
p.url = lookup_hash[p.pmid].first.url
|
64
|
+
end
|
65
|
+
end
|
56
66
|
end
|
57
67
|
|
58
68
|
def fetch_journals(nlmids)
|
59
69
|
#Change the ids of those wierd journals
|
60
70
|
nlmids = nlmids.map { |e| ((e.include? 'R') ? convert_odd_journal_ids(e) : e ) }
|
61
|
-
xml = fetch_records(nlmids, 'nlmcatalog')
|
71
|
+
xml = fetch_records(nlmids, {:verb => 'fetch',:database => 'db=nlmcatalog'})
|
62
72
|
parser = XMLParser.new
|
63
73
|
parser.parse_journals(xml)
|
64
74
|
end
|
65
75
|
|
66
|
-
|
76
|
+
def get_fulltext_links(ids)
|
77
|
+
opts = {:verb => 'link', :add => 'cmd=llinks', :database => 'dbfrom=pubmed'}
|
78
|
+
xml = fetch_records(ids, opts)
|
79
|
+
|
80
|
+
parser = XMLParser.new
|
81
|
+
lookup_hash = parser.parse_links(xml)
|
82
|
+
missing = (ids - lookup_hash.keys)
|
83
|
+
lookup_hash
|
84
|
+
end
|
85
|
+
|
86
|
+
|
87
|
+
def fetch_records(ids, opts={})
|
67
88
|
|
68
89
|
xml_records = []
|
69
90
|
|
70
|
-
options = DEFAULT_OPTIONS
|
91
|
+
options = DEFAULT_OPTIONS.merge(opts)
|
71
92
|
|
72
93
|
#dice array into reasonable length chunks for download
|
73
94
|
n_length = 500
|
@@ -76,14 +97,18 @@ module PubmedAPI
|
|
76
97
|
|
77
98
|
#Turn string to something html friendly
|
78
99
|
id_string = slice.join(",")
|
79
|
-
doc = make_api_request(options.merge({
|
100
|
+
doc = make_api_request(options.merge({ :query => 'id='+id_string}))
|
80
101
|
records = doc.xpath('./*/*')
|
81
|
-
xml_records
|
102
|
+
xml_records += records
|
82
103
|
|
83
104
|
end
|
84
|
-
|
105
|
+
|
106
|
+
xml_records
|
85
107
|
end
|
86
108
|
|
109
|
+
|
110
|
+
|
111
|
+
|
87
112
|
#Maked the HTTP request and return the responce
|
88
113
|
#TODO handle failures
|
89
114
|
def make_api_request(options)
|
@@ -96,7 +121,7 @@ module PubmedAPI
|
|
96
121
|
def convert_odd_journal_ids(id)
|
97
122
|
|
98
123
|
new_id = nil
|
99
|
-
results = search(id, {:database => 'nlmcatalog'})
|
124
|
+
results = search(id, {:database => 'db=nlmcatalog'})
|
100
125
|
if results.pmids.length ==1
|
101
126
|
new_id = results.pmids[0]
|
102
127
|
else
|
@@ -111,7 +136,7 @@ module PubmedAPI
|
|
111
136
|
id = nil
|
112
137
|
term = issn + "[ISSN]+AND+ncbijournals[filter]"
|
113
138
|
|
114
|
-
results = search(term, {:database => 'nlmcatalog'})
|
139
|
+
results = search(term, {:database => 'db=nlmcatalog'})
|
115
140
|
if results.pmids.length ==1
|
116
141
|
id = results.pmids[0]
|
117
142
|
else
|
@@ -122,6 +147,9 @@ module PubmedAPI
|
|
122
147
|
end
|
123
148
|
|
124
149
|
|
150
|
+
|
151
|
+
|
152
|
+
|
125
153
|
# 300ms minimum wait.
|
126
154
|
def wait
|
127
155
|
sleep WAIT_TIME
|
data/lib/pubmed_api/parsers.rb
CHANGED
@@ -13,7 +13,7 @@ module PubmedAPI
|
|
13
13
|
|
14
14
|
results.count = doc.xpath('/eSearchResult/Count').first.content.to_i
|
15
15
|
|
16
|
-
doc.xpath('/eSearchResult/IdList/Id').each {|n| results.pmids << n.content.
|
16
|
+
doc.xpath('/eSearchResult/IdList/Id').each {|n| results.pmids << n.content.to_s}
|
17
17
|
|
18
18
|
doc.xpath('/eSearchResult/TranslationStack/TermSet/Term').each do |n|
|
19
19
|
if n.content =~ /"(.*)"\[MeSH Terms\]/
|
@@ -134,6 +134,33 @@ module PubmedAPI
|
|
134
134
|
pmid
|
135
135
|
end
|
136
136
|
|
137
|
+
|
138
|
+
LinkStruct = Struct.new( :url, :pub_id, :pub_name, :cat)
|
139
|
+
|
140
|
+
def parse_links(links_xml)
|
141
|
+
|
142
|
+
l_struc_arr = []
|
143
|
+
link_arr = []
|
144
|
+
lookup_hash = Hash.new{ |a,b| a[b] = Array.new }
|
145
|
+
|
146
|
+
links_xml.each do |node|
|
147
|
+
|
148
|
+
node.css('IdUrlList/IdUrlSet').each do |links|
|
149
|
+
id = links.xpath('Id').text
|
150
|
+
|
151
|
+
links.css('ObjUrl').each do |l|
|
152
|
+
l_struc = LinkStruct.new(l.xpath('Url').text, l.xpath('Provider/Id').text, l.xpath('Name').text,
|
153
|
+
l.xpath('Category').text)
|
154
|
+
|
155
|
+
lookup_hash[id] << l_struc
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
lookup_hash
|
161
|
+
end
|
162
|
+
|
163
|
+
|
137
164
|
|
138
165
|
AuthorStruct = Struct.new( :fore_name, :initials, :last_name)
|
139
166
|
|
data/lib/pubmed_api/version.rb
CHANGED
data/spec/lib/pubmed_api_spec.rb
CHANGED
@@ -2,10 +2,13 @@ require 'spec_helper'
|
|
2
2
|
|
3
3
|
describe PubmedAPI do
|
4
4
|
|
5
|
+
before(:all) do
|
6
|
+
@q_results = PubmedAPI::Interface.search("quantum physics")
|
7
|
+
end
|
8
|
+
|
5
9
|
|
6
10
|
it "should perform a search" do
|
7
|
-
|
8
|
-
expect(results.pmids.length).to be > 10
|
11
|
+
expect(@q_results.pmids.length).to be > 10
|
9
12
|
end
|
10
13
|
|
11
14
|
it "should handle phrases not found" do
|
@@ -29,10 +32,12 @@ describe PubmedAPI do
|
|
29
32
|
it "should fetch a paper" do
|
30
33
|
id = '25554862'
|
31
34
|
title = "Completing the picture for the smallest eigenvalue of real Wishart matrices."
|
35
|
+
url = "http://link.aps.org/abstract/PRL/v113/p250201"
|
32
36
|
strucs = PubmedAPI::Interface.fetch_papers([id])
|
33
37
|
paper = strucs[0]
|
34
38
|
expect(paper.title).to eql(title)
|
35
39
|
expect(paper.pmid).to eql(id)
|
40
|
+
expect(paper.url).to eql(url)
|
36
41
|
end
|
37
42
|
|
38
43
|
it "should fetch a journal" do
|
@@ -54,4 +59,14 @@ describe PubmedAPI do
|
|
54
59
|
expect(fixed).to eql('100381')
|
55
60
|
end
|
56
61
|
|
62
|
+
it "should get fulltext urls for ids" do
|
63
|
+
ids = ["25933345","25933144","23933344"]
|
64
|
+
|
65
|
+
hash = PubmedAPI::Interface.get_fulltext_links(ids)
|
66
|
+
paper = hash[ids[0]][0]
|
67
|
+
expect(paper.url).to eql('http://link.aps.org/abstract/PRL/v114/p158701')
|
68
|
+
end
|
69
|
+
|
70
|
+
|
71
|
+
|
57
72
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pubmed_api
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kieran Higgins
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-05-
|
11
|
+
date: 2015-05-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|