pubmed_api 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: dd746c28ff6467d98cc9453f4099ebb3a51be2f5
4
- data.tar.gz: 466279c7008a90605975055c48e1e011818b4ab8
3
+ metadata.gz: 19077723b1f29404e04bc59e2bb97f2c6d516b7c
4
+ data.tar.gz: 086727eb2181e56a9c95067e8707354addf23cf6
5
5
  SHA512:
6
- metadata.gz: b886ca7fc3aa85f3a60dfd33653b6fff9c8152034550c6b78f2884f74a25dc391b9c00b6f14faf21feab79bb97460a9b4f728029621a7d10b1a2b2078e98b294
7
- data.tar.gz: 93b6be04e8817a988b8c710d35c22db2c606e98fa6834ef9f08af98b33191c8aba6e4b4779a2a05e0b157245aa9ccbeaa0b69ee327baadc1696a5a2890e3752f
6
+ metadata.gz: 2dc9e9e7caca527a127804c35ca78d391f86c34dbffc120e3552a87fe156326be434a4c22d5f2eed962153fdfa1a0b00885336b80480b7cdaed1b5c7d1accbfe
7
+ data.tar.gz: d4b68d0317e3f78b70c02bade94a1360ae32c9ffa4a7f7c774723289012e85b8925618ad0ebd2c41442c6249c31fc8875f016f59ba046b7c43c22f0107081399
data/README.md CHANGED
@@ -20,7 +20,21 @@ Or install it yourself as:
20
20
 
21
21
  ## Usage
22
22
 
23
- TODO: Write usage instructions here
23
+ This is a work in progress. But you can use it
24
+
25
+ To search for papers:
26
+
27
+ results = PubmedAPI::Interface.search("quantum physics")
28
+ results.pmids gives you a list of the matching pubmed ids
29
+
30
+ To get a paper:
31
+
32
+ strucs = PubmedAPI::Interface.fetch_papers([id])
33
+ paper = struc[0]
34
+ paper.title = "A paper title"
35
+ paper.url = "http://alinktofulltext.com"
36
+
37
+ Look in the spec for further examples
24
38
 
25
39
  ## Contributing
26
40
 
data/lib/pubmed_api.rb CHANGED
@@ -11,17 +11,18 @@ module PubmedAPI
11
11
 
12
12
 
13
13
  DEFAULT_OPTIONS = {:tool => 'ruby-pubmed-api',
14
- :database => 'pubmed', #which database eq pubmed/nlmcatalog
14
+ :database => 'db=pubmed', #which database eq pubmed/nlmcatalog
15
15
  :verb => 'search', #which API verb to use e.g. search/fetch
16
16
  :email => '',
17
- #:reldate => 90, #How far back shall we go in days
17
+ #:reldate => 90, #How far back shall we go in days
18
+ :add =>'',
18
19
  :retmax => 100000,
19
20
  :retstart => 0,
20
21
  :load_all_pmids => true }
21
22
 
22
23
 
23
- URI_TEMPLATE = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/e{verb}.fcgi?db={database}&tool={tool}&email={email}'+
24
- '&reldate={reldate}&retmax={retmax}&retstart={retstart}&{query}&rettype=fasta&retmode=xml'
24
+ URI_TEMPLATE = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/e{verb}.fcgi?{database}&tool={tool}&email={email}'+
25
+ '&reldate={reldate}&retmax={retmax}&retstart={retstart}&{query}&retmode=xml&{add}'
25
26
 
26
27
  class << self
27
28
 
@@ -50,24 +51,44 @@ module PubmedAPI
50
51
  end
51
52
 
52
53
  def fetch_papers(ids)
53
- xml = fetch_records(ids, 'pubmed')
54
+ xml = fetch_records(ids, {:verb => 'fetch',:database => 'db=pubmed'})
54
55
  parser = XMLParser.new
55
- parser.parse_papers(xml)
56
+ papers = parser.parse_papers(xml)
57
+ lookup_hash = get_fulltext_links(ids)
58
+
59
+ papers.each do |p|
60
+ if p.nil?
61
+ next
62
+ else
63
+ p.url = lookup_hash[p.pmid].first.url
64
+ end
65
+ end
56
66
  end
57
67
 
58
68
  def fetch_journals(nlmids)
59
69
  #Change the ids of those wierd journals
60
70
  nlmids = nlmids.map { |e| ((e.include? 'R') ? convert_odd_journal_ids(e) : e ) }
61
- xml = fetch_records(nlmids, 'nlmcatalog')
71
+ xml = fetch_records(nlmids, {:verb => 'fetch',:database => 'db=nlmcatalog'})
62
72
  parser = XMLParser.new
63
73
  parser.parse_journals(xml)
64
74
  end
65
75
 
66
- def fetch_records(ids, database)
76
+ def get_fulltext_links(ids)
77
+ opts = {:verb => 'link', :add => 'cmd=llinks', :database => 'dbfrom=pubmed'}
78
+ xml = fetch_records(ids, opts)
79
+
80
+ parser = XMLParser.new
81
+ lookup_hash = parser.parse_links(xml)
82
+ missing = (ids - lookup_hash.keys)
83
+ lookup_hash
84
+ end
85
+
86
+
87
+ def fetch_records(ids, opts={})
67
88
 
68
89
  xml_records = []
69
90
 
70
- options = DEFAULT_OPTIONS
91
+ options = DEFAULT_OPTIONS.merge(opts)
71
92
 
72
93
  #dice array into reasonable length chunks for download
73
94
  n_length = 500
@@ -76,14 +97,18 @@ module PubmedAPI
76
97
 
77
98
  #Turn string to something html friendly
78
99
  id_string = slice.join(",")
79
- doc = make_api_request(options.merge({:verb => 'fetch',:database => database, :query => 'id='+id_string}))
100
+ doc = make_api_request(options.merge({ :query => 'id='+id_string}))
80
101
  records = doc.xpath('./*/*')
81
- xml_records << records
102
+ xml_records += records
82
103
 
83
104
  end
84
- xml_records.flatten
105
+
106
+ xml_records
85
107
  end
86
108
 
109
+
110
+
111
+
87
112
  #Maked the HTTP request and return the responce
88
113
  #TODO handle failures
89
114
  def make_api_request(options)
@@ -96,7 +121,7 @@ module PubmedAPI
96
121
  def convert_odd_journal_ids(id)
97
122
 
98
123
  new_id = nil
99
- results = search(id, {:database => 'nlmcatalog'})
124
+ results = search(id, {:database => 'db=nlmcatalog'})
100
125
  if results.pmids.length ==1
101
126
  new_id = results.pmids[0]
102
127
  else
@@ -111,7 +136,7 @@ module PubmedAPI
111
136
  id = nil
112
137
  term = issn + "[ISSN]+AND+ncbijournals[filter]"
113
138
 
114
- results = search(term, {:database => 'nlmcatalog'})
139
+ results = search(term, {:database => 'db=nlmcatalog'})
115
140
  if results.pmids.length ==1
116
141
  id = results.pmids[0]
117
142
  else
@@ -122,6 +147,9 @@ module PubmedAPI
122
147
  end
123
148
 
124
149
 
150
+
151
+
152
+
125
153
  # 300ms minimum wait.
126
154
  def wait
127
155
  sleep WAIT_TIME
@@ -13,7 +13,7 @@ module PubmedAPI
13
13
 
14
14
  results.count = doc.xpath('/eSearchResult/Count').first.content.to_i
15
15
 
16
- doc.xpath('/eSearchResult/IdList/Id').each {|n| results.pmids << n.content.to_i}
16
+ doc.xpath('/eSearchResult/IdList/Id').each {|n| results.pmids << n.content.to_s}
17
17
 
18
18
  doc.xpath('/eSearchResult/TranslationStack/TermSet/Term').each do |n|
19
19
  if n.content =~ /"(.*)"\[MeSH Terms\]/
@@ -134,6 +134,33 @@ module PubmedAPI
134
134
  pmid
135
135
  end
136
136
 
137
+
138
+ LinkStruct = Struct.new( :url, :pub_id, :pub_name, :cat)
139
+
140
+ def parse_links(links_xml)
141
+
142
+ l_struc_arr = []
143
+ link_arr = []
144
+ lookup_hash = Hash.new{ |a,b| a[b] = Array.new }
145
+
146
+ links_xml.each do |node|
147
+
148
+ node.css('IdUrlList/IdUrlSet').each do |links|
149
+ id = links.xpath('Id').text
150
+
151
+ links.css('ObjUrl').each do |l|
152
+ l_struc = LinkStruct.new(l.xpath('Url').text, l.xpath('Provider/Id').text, l.xpath('Name').text,
153
+ l.xpath('Category').text)
154
+
155
+ lookup_hash[id] << l_struc
156
+ end
157
+ end
158
+ end
159
+
160
+ lookup_hash
161
+ end
162
+
163
+
137
164
 
138
165
  AuthorStruct = Struct.new( :fore_name, :initials, :last_name)
139
166
 
@@ -1,3 +1,3 @@
1
1
  module PubmedAPI
2
- VERSION = "0.0.2"
2
+ VERSION = "0.0.3"
3
3
  end
@@ -2,10 +2,13 @@ require 'spec_helper'
2
2
 
3
3
  describe PubmedAPI do
4
4
 
5
+ before(:all) do
6
+ @q_results = PubmedAPI::Interface.search("quantum physics")
7
+ end
8
+
5
9
 
6
10
  it "should perform a search" do
7
- results = PubmedAPI::Interface.search("quantum physics", {:reldate => 90})
8
- expect(results.pmids.length).to be > 10
11
+ expect(@q_results.pmids.length).to be > 10
9
12
  end
10
13
 
11
14
  it "should handle phrases not found" do
@@ -29,10 +32,12 @@ describe PubmedAPI do
29
32
  it "should fetch a paper" do
30
33
  id = '25554862'
31
34
  title = "Completing the picture for the smallest eigenvalue of real Wishart matrices."
35
+ url = "http://link.aps.org/abstract/PRL/v113/p250201"
32
36
  strucs = PubmedAPI::Interface.fetch_papers([id])
33
37
  paper = strucs[0]
34
38
  expect(paper.title).to eql(title)
35
39
  expect(paper.pmid).to eql(id)
40
+ expect(paper.url).to eql(url)
36
41
  end
37
42
 
38
43
  it "should fetch a journal" do
@@ -54,4 +59,14 @@ describe PubmedAPI do
54
59
  expect(fixed).to eql('100381')
55
60
  end
56
61
 
62
+ it "should get fulltext urls for ids" do
63
+ ids = ["25933345","25933144","23933344"]
64
+
65
+ hash = PubmedAPI::Interface.get_fulltext_links(ids)
66
+ paper = hash[ids[0]][0]
67
+ expect(paper.url).to eql('http://link.aps.org/abstract/PRL/v114/p158701')
68
+ end
69
+
70
+
71
+
57
72
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pubmed_api
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kieran Higgins
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-05-03 00:00:00.000000000 Z
11
+ date: 2015-05-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler