pubmed_api 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +15 -1
- data/lib/pubmed_api.rb +42 -14
- data/lib/pubmed_api/parsers.rb +28 -1
- data/lib/pubmed_api/version.rb +1 -1
- data/spec/lib/pubmed_api_spec.rb +17 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 19077723b1f29404e04bc59e2bb97f2c6d516b7c
|
4
|
+
data.tar.gz: 086727eb2181e56a9c95067e8707354addf23cf6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2dc9e9e7caca527a127804c35ca78d391f86c34dbffc120e3552a87fe156326be434a4c22d5f2eed962153fdfa1a0b00885336b80480b7cdaed1b5c7d1accbfe
|
7
|
+
data.tar.gz: d4b68d0317e3f78b70c02bade94a1360ae32c9ffa4a7f7c774723289012e85b8925618ad0ebd2c41442c6249c31fc8875f016f59ba046b7c43c22f0107081399
|
data/README.md
CHANGED
@@ -20,7 +20,21 @@ Or install it yourself as:
|
|
20
20
|
|
21
21
|
## Usage
|
22
22
|
|
23
|
-
|
23
|
+
This is a work in progress. But you can use it
|
24
|
+
|
25
|
+
To search for papers:
|
26
|
+
|
27
|
+
results = PubmedAPI::Interface.search("quantum physics")
|
28
|
+
results.pmids gives you a list of the matching pubmed ids
|
29
|
+
|
30
|
+
To get a paper:
|
31
|
+
|
32
|
+
strucs = PubmedAPI::Interface.fetch_papers([id])
|
33
|
+
paper = struc[0]
|
34
|
+
paper.title = "A paper title"
|
35
|
+
paper.url = "http://alinktofulltext.com"
|
36
|
+
|
37
|
+
Look in the spec for further examples
|
24
38
|
|
25
39
|
## Contributing
|
26
40
|
|
data/lib/pubmed_api.rb
CHANGED
@@ -11,17 +11,18 @@ module PubmedAPI
|
|
11
11
|
|
12
12
|
|
13
13
|
DEFAULT_OPTIONS = {:tool => 'ruby-pubmed-api',
|
14
|
-
:database => 'pubmed', #which database eq pubmed/nlmcatalog
|
14
|
+
:database => 'db=pubmed', #which database eq pubmed/nlmcatalog
|
15
15
|
:verb => 'search', #which API verb to use e.g. search/fetch
|
16
16
|
:email => '',
|
17
|
-
#:reldate => 90, #How far back shall we go in days
|
17
|
+
#:reldate => 90, #How far back shall we go in days
|
18
|
+
:add =>'',
|
18
19
|
:retmax => 100000,
|
19
20
|
:retstart => 0,
|
20
21
|
:load_all_pmids => true }
|
21
22
|
|
22
23
|
|
23
|
-
URI_TEMPLATE = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/e{verb}.fcgi?
|
24
|
-
'&reldate={reldate}&retmax={retmax}&retstart={retstart}&{query}&
|
24
|
+
URI_TEMPLATE = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/e{verb}.fcgi?{database}&tool={tool}&email={email}'+
|
25
|
+
'&reldate={reldate}&retmax={retmax}&retstart={retstart}&{query}&retmode=xml&{add}'
|
25
26
|
|
26
27
|
class << self
|
27
28
|
|
@@ -50,24 +51,44 @@ module PubmedAPI
|
|
50
51
|
end
|
51
52
|
|
52
53
|
def fetch_papers(ids)
|
53
|
-
xml = fetch_records(ids, 'pubmed')
|
54
|
+
xml = fetch_records(ids, {:verb => 'fetch',:database => 'db=pubmed'})
|
54
55
|
parser = XMLParser.new
|
55
|
-
parser.parse_papers(xml)
|
56
|
+
papers = parser.parse_papers(xml)
|
57
|
+
lookup_hash = get_fulltext_links(ids)
|
58
|
+
|
59
|
+
papers.each do |p|
|
60
|
+
if p.nil?
|
61
|
+
next
|
62
|
+
else
|
63
|
+
p.url = lookup_hash[p.pmid].first.url
|
64
|
+
end
|
65
|
+
end
|
56
66
|
end
|
57
67
|
|
58
68
|
def fetch_journals(nlmids)
|
59
69
|
#Change the ids of those wierd journals
|
60
70
|
nlmids = nlmids.map { |e| ((e.include? 'R') ? convert_odd_journal_ids(e) : e ) }
|
61
|
-
xml = fetch_records(nlmids, 'nlmcatalog')
|
71
|
+
xml = fetch_records(nlmids, {:verb => 'fetch',:database => 'db=nlmcatalog'})
|
62
72
|
parser = XMLParser.new
|
63
73
|
parser.parse_journals(xml)
|
64
74
|
end
|
65
75
|
|
66
|
-
|
76
|
+
def get_fulltext_links(ids)
|
77
|
+
opts = {:verb => 'link', :add => 'cmd=llinks', :database => 'dbfrom=pubmed'}
|
78
|
+
xml = fetch_records(ids, opts)
|
79
|
+
|
80
|
+
parser = XMLParser.new
|
81
|
+
lookup_hash = parser.parse_links(xml)
|
82
|
+
missing = (ids - lookup_hash.keys)
|
83
|
+
lookup_hash
|
84
|
+
end
|
85
|
+
|
86
|
+
|
87
|
+
def fetch_records(ids, opts={})
|
67
88
|
|
68
89
|
xml_records = []
|
69
90
|
|
70
|
-
options = DEFAULT_OPTIONS
|
91
|
+
options = DEFAULT_OPTIONS.merge(opts)
|
71
92
|
|
72
93
|
#dice array into reasonable length chunks for download
|
73
94
|
n_length = 500
|
@@ -76,14 +97,18 @@ module PubmedAPI
|
|
76
97
|
|
77
98
|
#Turn string to something html friendly
|
78
99
|
id_string = slice.join(",")
|
79
|
-
doc = make_api_request(options.merge({
|
100
|
+
doc = make_api_request(options.merge({ :query => 'id='+id_string}))
|
80
101
|
records = doc.xpath('./*/*')
|
81
|
-
xml_records
|
102
|
+
xml_records += records
|
82
103
|
|
83
104
|
end
|
84
|
-
|
105
|
+
|
106
|
+
xml_records
|
85
107
|
end
|
86
108
|
|
109
|
+
|
110
|
+
|
111
|
+
|
87
112
|
#Maked the HTTP request and return the responce
|
88
113
|
#TODO handle failures
|
89
114
|
def make_api_request(options)
|
@@ -96,7 +121,7 @@ module PubmedAPI
|
|
96
121
|
def convert_odd_journal_ids(id)
|
97
122
|
|
98
123
|
new_id = nil
|
99
|
-
results = search(id, {:database => 'nlmcatalog'})
|
124
|
+
results = search(id, {:database => 'db=nlmcatalog'})
|
100
125
|
if results.pmids.length ==1
|
101
126
|
new_id = results.pmids[0]
|
102
127
|
else
|
@@ -111,7 +136,7 @@ module PubmedAPI
|
|
111
136
|
id = nil
|
112
137
|
term = issn + "[ISSN]+AND+ncbijournals[filter]"
|
113
138
|
|
114
|
-
results = search(term, {:database => 'nlmcatalog'})
|
139
|
+
results = search(term, {:database => 'db=nlmcatalog'})
|
115
140
|
if results.pmids.length ==1
|
116
141
|
id = results.pmids[0]
|
117
142
|
else
|
@@ -122,6 +147,9 @@ module PubmedAPI
|
|
122
147
|
end
|
123
148
|
|
124
149
|
|
150
|
+
|
151
|
+
|
152
|
+
|
125
153
|
# 300ms minimum wait.
|
126
154
|
def wait
|
127
155
|
sleep WAIT_TIME
|
data/lib/pubmed_api/parsers.rb
CHANGED
@@ -13,7 +13,7 @@ module PubmedAPI
|
|
13
13
|
|
14
14
|
results.count = doc.xpath('/eSearchResult/Count').first.content.to_i
|
15
15
|
|
16
|
-
doc.xpath('/eSearchResult/IdList/Id').each {|n| results.pmids << n.content.
|
16
|
+
doc.xpath('/eSearchResult/IdList/Id').each {|n| results.pmids << n.content.to_s}
|
17
17
|
|
18
18
|
doc.xpath('/eSearchResult/TranslationStack/TermSet/Term').each do |n|
|
19
19
|
if n.content =~ /"(.*)"\[MeSH Terms\]/
|
@@ -134,6 +134,33 @@ module PubmedAPI
|
|
134
134
|
pmid
|
135
135
|
end
|
136
136
|
|
137
|
+
|
138
|
+
LinkStruct = Struct.new( :url, :pub_id, :pub_name, :cat)
|
139
|
+
|
140
|
+
def parse_links(links_xml)
|
141
|
+
|
142
|
+
l_struc_arr = []
|
143
|
+
link_arr = []
|
144
|
+
lookup_hash = Hash.new{ |a,b| a[b] = Array.new }
|
145
|
+
|
146
|
+
links_xml.each do |node|
|
147
|
+
|
148
|
+
node.css('IdUrlList/IdUrlSet').each do |links|
|
149
|
+
id = links.xpath('Id').text
|
150
|
+
|
151
|
+
links.css('ObjUrl').each do |l|
|
152
|
+
l_struc = LinkStruct.new(l.xpath('Url').text, l.xpath('Provider/Id').text, l.xpath('Name').text,
|
153
|
+
l.xpath('Category').text)
|
154
|
+
|
155
|
+
lookup_hash[id] << l_struc
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
lookup_hash
|
161
|
+
end
|
162
|
+
|
163
|
+
|
137
164
|
|
138
165
|
AuthorStruct = Struct.new( :fore_name, :initials, :last_name)
|
139
166
|
|
data/lib/pubmed_api/version.rb
CHANGED
data/spec/lib/pubmed_api_spec.rb
CHANGED
@@ -2,10 +2,13 @@ require 'spec_helper'
|
|
2
2
|
|
3
3
|
describe PubmedAPI do
|
4
4
|
|
5
|
+
before(:all) do
|
6
|
+
@q_results = PubmedAPI::Interface.search("quantum physics")
|
7
|
+
end
|
8
|
+
|
5
9
|
|
6
10
|
it "should perform a search" do
|
7
|
-
|
8
|
-
expect(results.pmids.length).to be > 10
|
11
|
+
expect(@q_results.pmids.length).to be > 10
|
9
12
|
end
|
10
13
|
|
11
14
|
it "should handle phrases not found" do
|
@@ -29,10 +32,12 @@ describe PubmedAPI do
|
|
29
32
|
it "should fetch a paper" do
|
30
33
|
id = '25554862'
|
31
34
|
title = "Completing the picture for the smallest eigenvalue of real Wishart matrices."
|
35
|
+
url = "http://link.aps.org/abstract/PRL/v113/p250201"
|
32
36
|
strucs = PubmedAPI::Interface.fetch_papers([id])
|
33
37
|
paper = strucs[0]
|
34
38
|
expect(paper.title).to eql(title)
|
35
39
|
expect(paper.pmid).to eql(id)
|
40
|
+
expect(paper.url).to eql(url)
|
36
41
|
end
|
37
42
|
|
38
43
|
it "should fetch a journal" do
|
@@ -54,4 +59,14 @@ describe PubmedAPI do
|
|
54
59
|
expect(fixed).to eql('100381')
|
55
60
|
end
|
56
61
|
|
62
|
+
it "should get fulltext urls for ids" do
|
63
|
+
ids = ["25933345","25933144","23933344"]
|
64
|
+
|
65
|
+
hash = PubmedAPI::Interface.get_fulltext_links(ids)
|
66
|
+
paper = hash[ids[0]][0]
|
67
|
+
expect(paper.url).to eql('http://link.aps.org/abstract/PRL/v114/p158701')
|
68
|
+
end
|
69
|
+
|
70
|
+
|
71
|
+
|
57
72
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pubmed_api
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kieran Higgins
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-05-
|
11
|
+
date: 2015-05-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|