pubmed_api 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: dd746c28ff6467d98cc9453f4099ebb3a51be2f5
4
- data.tar.gz: 466279c7008a90605975055c48e1e011818b4ab8
3
+ metadata.gz: 19077723b1f29404e04bc59e2bb97f2c6d516b7c
4
+ data.tar.gz: 086727eb2181e56a9c95067e8707354addf23cf6
5
5
  SHA512:
6
- metadata.gz: b886ca7fc3aa85f3a60dfd33653b6fff9c8152034550c6b78f2884f74a25dc391b9c00b6f14faf21feab79bb97460a9b4f728029621a7d10b1a2b2078e98b294
7
- data.tar.gz: 93b6be04e8817a988b8c710d35c22db2c606e98fa6834ef9f08af98b33191c8aba6e4b4779a2a05e0b157245aa9ccbeaa0b69ee327baadc1696a5a2890e3752f
6
+ metadata.gz: 2dc9e9e7caca527a127804c35ca78d391f86c34dbffc120e3552a87fe156326be434a4c22d5f2eed962153fdfa1a0b00885336b80480b7cdaed1b5c7d1accbfe
7
+ data.tar.gz: d4b68d0317e3f78b70c02bade94a1360ae32c9ffa4a7f7c774723289012e85b8925618ad0ebd2c41442c6249c31fc8875f016f59ba046b7c43c22f0107081399
data/README.md CHANGED
@@ -20,7 +20,21 @@ Or install it yourself as:
20
20
 
21
21
  ## Usage
22
22
 
23
- TODO: Write usage instructions here
23
+ This is a work in progress. But you can use it
24
+
25
+ To search for papers:
26
+
27
+ results = PubmedAPI::Interface.search("quantum physics")
28
+ results.pmids gives you a list of the matching pubmed ids
29
+
30
+ To get a paper:
31
+
32
+ strucs = PubmedAPI::Interface.fetch_papers([id])
33
+ paper = struc[0]
34
+ paper.title = "A paper title"
35
+ paper.url = "http://alinktofulltext.com"
36
+
37
+ Look in the spec for further examples
24
38
 
25
39
  ## Contributing
26
40
 
data/lib/pubmed_api.rb CHANGED
@@ -11,17 +11,18 @@ module PubmedAPI
11
11
 
12
12
 
13
13
  DEFAULT_OPTIONS = {:tool => 'ruby-pubmed-api',
14
- :database => 'pubmed', #which database eq pubmed/nlmcatalog
14
+ :database => 'db=pubmed', #which database eq pubmed/nlmcatalog
15
15
  :verb => 'search', #which API verb to use e.g. search/fetch
16
16
  :email => '',
17
- #:reldate => 90, #How far back shall we go in days
17
+ #:reldate => 90, #How far back shall we go in days
18
+ :add =>'',
18
19
  :retmax => 100000,
19
20
  :retstart => 0,
20
21
  :load_all_pmids => true }
21
22
 
22
23
 
23
- URI_TEMPLATE = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/e{verb}.fcgi?db={database}&tool={tool}&email={email}'+
24
- '&reldate={reldate}&retmax={retmax}&retstart={retstart}&{query}&rettype=fasta&retmode=xml'
24
+ URI_TEMPLATE = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/e{verb}.fcgi?{database}&tool={tool}&email={email}'+
25
+ '&reldate={reldate}&retmax={retmax}&retstart={retstart}&{query}&retmode=xml&{add}'
25
26
 
26
27
  class << self
27
28
 
@@ -50,24 +51,44 @@ module PubmedAPI
50
51
  end
51
52
 
52
53
  def fetch_papers(ids)
53
- xml = fetch_records(ids, 'pubmed')
54
+ xml = fetch_records(ids, {:verb => 'fetch',:database => 'db=pubmed'})
54
55
  parser = XMLParser.new
55
- parser.parse_papers(xml)
56
+ papers = parser.parse_papers(xml)
57
+ lookup_hash = get_fulltext_links(ids)
58
+
59
+ papers.each do |p|
60
+ if p.nil?
61
+ next
62
+ else
63
+ p.url = lookup_hash[p.pmid].first.url
64
+ end
65
+ end
56
66
  end
57
67
 
58
68
  def fetch_journals(nlmids)
59
69
  #Change the ids of those wierd journals
60
70
  nlmids = nlmids.map { |e| ((e.include? 'R') ? convert_odd_journal_ids(e) : e ) }
61
- xml = fetch_records(nlmids, 'nlmcatalog')
71
+ xml = fetch_records(nlmids, {:verb => 'fetch',:database => 'db=nlmcatalog'})
62
72
  parser = XMLParser.new
63
73
  parser.parse_journals(xml)
64
74
  end
65
75
 
66
- def fetch_records(ids, database)
76
+ def get_fulltext_links(ids)
77
+ opts = {:verb => 'link', :add => 'cmd=llinks', :database => 'dbfrom=pubmed'}
78
+ xml = fetch_records(ids, opts)
79
+
80
+ parser = XMLParser.new
81
+ lookup_hash = parser.parse_links(xml)
82
+ missing = (ids - lookup_hash.keys)
83
+ lookup_hash
84
+ end
85
+
86
+
87
+ def fetch_records(ids, opts={})
67
88
 
68
89
  xml_records = []
69
90
 
70
- options = DEFAULT_OPTIONS
91
+ options = DEFAULT_OPTIONS.merge(opts)
71
92
 
72
93
  #dice array into reasonable length chunks for download
73
94
  n_length = 500
@@ -76,14 +97,18 @@ module PubmedAPI
76
97
 
77
98
  #Turn string to something html friendly
78
99
  id_string = slice.join(",")
79
- doc = make_api_request(options.merge({:verb => 'fetch',:database => database, :query => 'id='+id_string}))
100
+ doc = make_api_request(options.merge({ :query => 'id='+id_string}))
80
101
  records = doc.xpath('./*/*')
81
- xml_records << records
102
+ xml_records += records
82
103
 
83
104
  end
84
- xml_records.flatten
105
+
106
+ xml_records
85
107
  end
86
108
 
109
+
110
+
111
+
87
112
  #Maked the HTTP request and return the responce
88
113
  #TODO handle failures
89
114
  def make_api_request(options)
@@ -96,7 +121,7 @@ module PubmedAPI
96
121
  def convert_odd_journal_ids(id)
97
122
 
98
123
  new_id = nil
99
- results = search(id, {:database => 'nlmcatalog'})
124
+ results = search(id, {:database => 'db=nlmcatalog'})
100
125
  if results.pmids.length ==1
101
126
  new_id = results.pmids[0]
102
127
  else
@@ -111,7 +136,7 @@ module PubmedAPI
111
136
  id = nil
112
137
  term = issn + "[ISSN]+AND+ncbijournals[filter]"
113
138
 
114
- results = search(term, {:database => 'nlmcatalog'})
139
+ results = search(term, {:database => 'db=nlmcatalog'})
115
140
  if results.pmids.length ==1
116
141
  id = results.pmids[0]
117
142
  else
@@ -122,6 +147,9 @@ module PubmedAPI
122
147
  end
123
148
 
124
149
 
150
+
151
+
152
+
125
153
  # 300ms minimum wait.
126
154
  def wait
127
155
  sleep WAIT_TIME
@@ -13,7 +13,7 @@ module PubmedAPI
13
13
 
14
14
  results.count = doc.xpath('/eSearchResult/Count').first.content.to_i
15
15
 
16
- doc.xpath('/eSearchResult/IdList/Id').each {|n| results.pmids << n.content.to_i}
16
+ doc.xpath('/eSearchResult/IdList/Id').each {|n| results.pmids << n.content.to_s}
17
17
 
18
18
  doc.xpath('/eSearchResult/TranslationStack/TermSet/Term').each do |n|
19
19
  if n.content =~ /"(.*)"\[MeSH Terms\]/
@@ -134,6 +134,33 @@ module PubmedAPI
134
134
  pmid
135
135
  end
136
136
 
137
+
138
+ LinkStruct = Struct.new( :url, :pub_id, :pub_name, :cat)
139
+
140
+ def parse_links(links_xml)
141
+
142
+ l_struc_arr = []
143
+ link_arr = []
144
+ lookup_hash = Hash.new{ |a,b| a[b] = Array.new }
145
+
146
+ links_xml.each do |node|
147
+
148
+ node.css('IdUrlList/IdUrlSet').each do |links|
149
+ id = links.xpath('Id').text
150
+
151
+ links.css('ObjUrl').each do |l|
152
+ l_struc = LinkStruct.new(l.xpath('Url').text, l.xpath('Provider/Id').text, l.xpath('Name').text,
153
+ l.xpath('Category').text)
154
+
155
+ lookup_hash[id] << l_struc
156
+ end
157
+ end
158
+ end
159
+
160
+ lookup_hash
161
+ end
162
+
163
+
137
164
 
138
165
  AuthorStruct = Struct.new( :fore_name, :initials, :last_name)
139
166
 
@@ -1,3 +1,3 @@
1
1
  module PubmedAPI
2
- VERSION = "0.0.2"
2
+ VERSION = "0.0.3"
3
3
  end
@@ -2,10 +2,13 @@ require 'spec_helper'
2
2
 
3
3
  describe PubmedAPI do
4
4
 
5
+ before(:all) do
6
+ @q_results = PubmedAPI::Interface.search("quantum physics")
7
+ end
8
+
5
9
 
6
10
  it "should perform a search" do
7
- results = PubmedAPI::Interface.search("quantum physics", {:reldate => 90})
8
- expect(results.pmids.length).to be > 10
11
+ expect(@q_results.pmids.length).to be > 10
9
12
  end
10
13
 
11
14
  it "should handle phrases not found" do
@@ -29,10 +32,12 @@ describe PubmedAPI do
29
32
  it "should fetch a paper" do
30
33
  id = '25554862'
31
34
  title = "Completing the picture for the smallest eigenvalue of real Wishart matrices."
35
+ url = "http://link.aps.org/abstract/PRL/v113/p250201"
32
36
  strucs = PubmedAPI::Interface.fetch_papers([id])
33
37
  paper = strucs[0]
34
38
  expect(paper.title).to eql(title)
35
39
  expect(paper.pmid).to eql(id)
40
+ expect(paper.url).to eql(url)
36
41
  end
37
42
 
38
43
  it "should fetch a journal" do
@@ -54,4 +59,14 @@ describe PubmedAPI do
54
59
  expect(fixed).to eql('100381')
55
60
  end
56
61
 
62
+ it "should get fulltext urls for ids" do
63
+ ids = ["25933345","25933144","23933344"]
64
+
65
+ hash = PubmedAPI::Interface.get_fulltext_links(ids)
66
+ paper = hash[ids[0]][0]
67
+ expect(paper.url).to eql('http://link.aps.org/abstract/PRL/v114/p158701')
68
+ end
69
+
70
+
71
+
57
72
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pubmed_api
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kieran Higgins
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-05-03 00:00:00.000000000 Z
11
+ date: 2015-05-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler