pubmed_api 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 69035040fd451f5846e90b7512f6d5b2e68253a6
4
+ data.tar.gz: ba519175eb78466e8030b06079e7269864b35a5c
5
+ SHA512:
6
+ metadata.gz: 68e45eb159acc8ed52bd9bc0641e12a745bb692a4289903222c7e979ac3a8825d5ab94d237eae211b504c0ebf38e6c91cae760607348ac054dae3d7a764d1844
7
+ data.tar.gz: dadc39958aab5210b494547cfad68ab00a43b790ab84ee9f5723c468865241c84c9c6b2205bedaa81e5abf8870d8a468dd7931ece13076ff5dc998df5c1f76fb
data/.gitignore ADDED
@@ -0,0 +1,14 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.bundle
11
+ *.so
12
+ *.o
13
+ *.a
14
+ mkmf.log
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in pubmed_api.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2015 Kieran Higgins
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,31 @@
1
+ # PubmedApi
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem 'pubmed_api'
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install pubmed_api
20
+
21
+ ## Usage
22
+
23
+ TODO: Write usage instructions here
24
+
25
+ ## Contributing
26
+
27
+ 1. Fork it ( https://github.com/[my-github-username]/pubmed_api/fork )
28
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
29
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
30
+ 4. Push to the branch (`git push origin my-new-feature`)
31
+ 5. Create a new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,7 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new
5
+
6
+ task :default => :spec
7
+ task :test => :spec
@@ -0,0 +1,163 @@
1
+ module PubmedAPI
2
+
3
+ class XMLParser
4
+
5
+ SearchResult = Struct.new(:count, :pmids, :mesh_terms, :phrases_not_found)
6
+
7
+ def parse_search(doc)
8
+
9
+ results = SearchResult.new
10
+ results.pmids = []
11
+ results.mesh_terms = []
12
+
13
+ results.count = doc.xpath('/eSearchResult/Count').first.content.to_i
14
+
15
+ doc.xpath('/eSearchResult/IdList/Id').each {|n| results.pmids << n.content.to_i}
16
+
17
+ doc.xpath('/eSearchResult/TranslationStack/TermSet/Term').each do |n|
18
+ if n.content =~ /"(.*)"\[MeSH Terms\]/
19
+ results.mesh_terms << $1
20
+ end
21
+ end
22
+
23
+ doc.xpath('/eSearchResult/ErrorList/PhraseNotFound').each {|n| results.phrases_not_found << n.content }
24
+ results
25
+
26
+ end
27
+
28
+
29
+ PaperStruct = Struct.new( :title, :abstract, :article_date, :pubmed_date, :date_appeared,
30
+ :doi, :authors, :pmid, :nlmid, :journal, :complete, :url, :pdf_url)
31
+
32
+ def parse_papers(papers_xml)
33
+
34
+ results = []
35
+
36
+ papers_xml.each do |paper|
37
+
38
+ #check it's actually a paper
39
+ if paper.xpath('/*/*').first.name().eql?('PubmedArticle')
40
+
41
+
42
+ paper_output = PaperStruct.new
43
+
44
+ paper_output.title = paper.at('ArticleTitle').text
45
+
46
+ begin
47
+ paper_output.abstract = paper.at('Abstract').text
48
+ rescue NoMethodError
49
+
50
+ end
51
+
52
+ begin
53
+ #Date in Y/M/D format
54
+ article_date = Date.new( paper.at('ArticleDate/Year').text.to_i, paper.at('ArticleDate/Month').text.to_i, paper.at('ArticleDate/Day').text.to_i)
55
+ paper_output.article_date = article_date
56
+ rescue NoMethodError
57
+ #puts "no date " + " " + paper.css('PMID').text + " " + paper.css('ArticleTitle').text
58
+ paper_output.article_date = Date.new()
59
+ end
60
+
61
+ #Parse mutlitple PubMedPubDate dates
62
+ dates = paper.css('PubMedPubDate')
63
+
64
+ paper_output.pmid = parse_pmid(paper.css('PMID').text)
65
+
66
+ pub_date = [0,0,0]
67
+
68
+ dates.each do |node|
69
+ if node.attributes["PubStatus"].to_s == "entrez"
70
+ pub_date = Date.new( node.at('Year').text.to_i, node.at('Month').text.to_i, node.at('Day').text.to_i)
71
+ paper_output.pubmed_date = pub_date
72
+ paper_output.date_appeared = pub_date
73
+ end
74
+ end
75
+
76
+ ids = paper.css('ArticleId')
77
+
78
+ ids.each do |node|
79
+ v = node.attributes["IdType"].to_s
80
+ if v == 'doi'
81
+ paper_output.doi = node.text
82
+ end
83
+ end
84
+
85
+
86
+ #Extract the authors as friendly string for now...
87
+ #TODO handle authors properly
88
+ authors = paper.css('Author')
89
+ auth_arr = parse_authors(authors)
90
+
91
+ author_string = ''
92
+
93
+ auth_arr.each do |a|
94
+ author_string += a[1] + ' ' + a[2] +', '
95
+ end
96
+
97
+ #cut additional ', ' off end
98
+ author_string = author_string[0..-3]
99
+ paper_output.authors = author_string
100
+ paper_output.nlmid = paper.css('NlmUniqueID').text
101
+
102
+
103
+ results << paper_output
104
+ end
105
+ end
106
+
107
+ return results
108
+ end
109
+
110
+ JournalStruct = Struct.new( :issn, :nlmid, :title_long, :title_short, :started,:frequency)
111
+
112
+ def parse_journals(journals_xml)
113
+
114
+ j_struc_arr = []
115
+
116
+ journals_xml.each do |j|
117
+ j_struc = JournalStruct.new(j.css('ISSN').text, j.css('NlmUniqueID').text, j.xpath('./TitleMain/Title').text,
118
+ j.css('MedlineTA').text, j.css('PublicationFirstYear').text, j.css('Frequency').text)
119
+ j_struc_arr << j_struc
120
+ end
121
+
122
+ j_struc_arr
123
+ end
124
+
125
+ def parse_pmid(pmid)
126
+ pmid = pmid.gsub('.', '')
127
+
128
+ if pmid.length > 8
129
+ pmid = pmid[0,8]
130
+ end
131
+ pmid
132
+ end
133
+
134
+
135
+ AuthorStruct = Struct.new( :fore_name, :initials, :last_name)
136
+
137
+ def parse_authors(authors)
138
+
139
+ authors_output =[]
140
+
141
+ authors.each do |node|
142
+ author_arr = Array.new(3,"")
143
+
144
+ if v = node.at_css('ForeName')
145
+ author_arr[0] = v.text
146
+ end
147
+
148
+ if v = node.at_css('Initials')
149
+ author_arr[1] = v.text
150
+ end
151
+
152
+ if v = node.at_css('LastName')
153
+ author_arr[2] = v.text
154
+ end
155
+
156
+ authors_output << author_arr
157
+ end
158
+
159
+ return authors_output
160
+ end
161
+
162
+ end
163
+ end
@@ -0,0 +1,3 @@
1
+ module PubmedAPI
2
+ VERSION = "0.0.1"
3
+ end
data/lib/pubmed_api.rb ADDED
@@ -0,0 +1,125 @@
1
+ require 'pubmed_api/version'
2
+ require 'pubmed_api/parsers'
3
+ require 'open-uri'
4
+ require 'nokogiri'
5
+
6
+ module PubmedAPI
7
+
8
+ class Interface
9
+
10
+ WAIT_TIME = 0.5 # seconds
11
+
12
+
13
+ DEFAULT_OPTIONS = {:tool => 'ruby-pubmed-api',
14
+ :database => 'pubmed', #which database eq pubmed/nlmcatalog
15
+ :verb => 'search', #which API verb to use e.g. search/fetch
16
+ :email => '',
17
+ :reldate => 90, #How far back shall we go in days
18
+ :retmax => 100000,
19
+ :retstart => 0,
20
+ :load_all_pmids => false }
21
+
22
+
23
+ URI_TEMPLATE = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/e{verb}.fcgi?db={database}&tool={tool}&email={email}'+
24
+ '&reldate={reldate}&retmax={retmax}&retstart={retstart}&{query}&rettype=fasta&retmode=xml'
25
+
26
+ class << self
27
+
28
+ def search(term, options={})
29
+
30
+ options = DEFAULT_OPTIONS.merge(options)
31
+
32
+ results = do_search(term, options)
33
+
34
+ if options[:load_all_pmids]
35
+ # Send off subsequent requests to load all the PMIDs, add them to the results
36
+ (options[:retmax]..results.count).step(options[:retmax]) do |step|
37
+ results.pmids << do_search(term, options.merge({:retstart => step})).pmids
38
+ end
39
+ end
40
+
41
+ results
42
+ end
43
+
44
+ # Performs a search and parses the response
45
+ def do_search(search_term, options)
46
+ wait
47
+ doc = make_api_request(options.merge({:query => 'term='+search_term}))
48
+ parser = XMLParser.new
49
+ parser.parse_search(doc)
50
+ end
51
+
52
+ def fetch_papers(ids)
53
+ xml = fetch_records(ids, 'pubmed')
54
+ parser = XMLParser.new
55
+ parser.parse_papers(xml)
56
+ end
57
+
58
+ def fetch_journals(nlmids)
59
+ #Change the ids of those wierd journals
60
+ nlmids = nlmids.map { |e| ((e.include? 'R') ? convert_odd_journal_ids(e) : e ) }
61
+ xml = fetch_records(nlmids, 'nlmcatalog')
62
+ parser = XMLParser.new
63
+ parser.parse_journals(xml)
64
+ end
65
+
66
+ def fetch_records(ids, database)
67
+
68
+ xml_records = []
69
+
70
+ options = DEFAULT_OPTIONS
71
+
72
+ #dice array into reasonable length chunks for download
73
+ n_length = 500
74
+ # TODO paralellise?
75
+ ids.each_slice(n_length) do |slice|
76
+
77
+ #Turn string to something html friendly
78
+ id_string = slice.join(",")
79
+ doc = make_api_request(options.merge({:verb => 'fetch',:database => database, :query => 'id='+id_string}))
80
+ records = doc.xpath('./*/*')
81
+ xml_records << records
82
+
83
+ end
84
+ xml_records.flatten
85
+ end
86
+
87
+ #Maked the HTTP request and return the responce
88
+ #TODO handle failures
89
+ #Log API calls?
90
+ def make_api_request(options)
91
+ url = expand_uri(URI_TEMPLATE, options)
92
+ Nokogiri::XML( open url )
93
+ end
94
+
95
+
96
+ #Some journals have odd NLMIDs that need to be searched for rarther than accessed directly.
97
+ #TODO combine into single API request
98
+ def convert_odd_journal_ids(id)
99
+
100
+ new_id = nil
101
+ results = search(id, {:database => 'nlmcatalog', :reldate => '100000'})
102
+ if results.pmids.length ==1
103
+ new_id = results.pmids[0]
104
+ else
105
+ puts "failed to convert " + id.to_s
106
+ end
107
+ new_id.to_s
108
+ end
109
+
110
+ # 300ms minimum wait.
111
+ def wait
112
+ sleep WAIT_TIME
113
+ end
114
+
115
+
116
+ private
117
+
118
+ def expand_uri(uri, options)
119
+ uri.gsub(/\{(.*?)\}/) { URI.encode( (options[$1] || options[$1.to_sym] || '').to_s ) rescue '' }
120
+ end
121
+
122
+ end
123
+ end
124
+
125
+ end
@@ -0,0 +1,26 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'pubmed_api/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "pubmed_api"
8
+ spec.version = PubmedAPI::VERSION
9
+ spec.authors = ["Kieran Higgins"]
10
+ spec.email = ["kieran.higgins@gmail.com"]
11
+ spec.summary = %q{A Ruby gem for downloading paper and journal information from Pubmed Entrez.}
12
+ spec.description = %q{A Ruby gem for downloading paper and journal information from Pubmed Entrez.}
13
+ spec.homepage = ""
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.7"
22
+ spec.add_development_dependency "rake", "~> 10.0"
23
+ spec.add_development_dependency "rspec"
24
+ spec.add_dependency "nokogiri"
25
+
26
+ end
@@ -0,0 +1,46 @@
1
+ require 'spec_helper'
2
+
3
+ describe PubmedAPI do
4
+
5
+
6
+ it "should perform a search" do
7
+ strucs = PubmedAPI::Interface.search("quantum physics", {:load_all_pmids => true, :reldate => 90})
8
+ expect(strucs.length > 10)
9
+ end
10
+
11
+ it "should make an API call" do
12
+ options = PubmedAPI::Interface::DEFAULT_OPTIONS
13
+ options.merge({:query => 'term=scrotum'})
14
+
15
+ doc = PubmedAPI::Interface.make_api_request(options)
16
+ records = doc.xpath('./*/*')
17
+ count = doc.xpath('/eSearchResult/Count').first.content.to_i
18
+ expect(count > 0 )
19
+ expect(records.length == count)
20
+ end
21
+
22
+
23
+ it "should fetch a paper" do
24
+ id = '25554862'
25
+ title = "Completing the picture for the smallest eigenvalue of real Wishart matrices."
26
+ strucs = PubmedAPI::Interface.fetch_papers([id])
27
+ paper = strucs[0]
28
+ expect(paper.title.eql?(title))
29
+ expect(paper.pmid.eql?(id))
30
+ end
31
+
32
+ it "should fetch a journal" do
33
+ id = '0401141'
34
+ title = 'Physical review letters.'
35
+ strucs = PubmedAPI::Interface.fetch_journals([id])
36
+ j = strucs[0]
37
+ expect(j.title_long.eql?(title))
38
+ expect(j.nlmid.eql?(id))
39
+ end
40
+
41
+ it "it should fix strange journal ids" do
42
+ fixed = PubmedAPI::Interface.convert_odd_journal_ids('16930290R')
43
+ expect( fixed.eql?('100381'))
44
+ end
45
+
46
+ end
@@ -0,0 +1 @@
1
+ require 'pubmed_api'
metadata ADDED
@@ -0,0 +1,114 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pubmed_api
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Kieran Higgins
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-04-28 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.7'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.7'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: nokogiri
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ description: A Ruby gem for downloading paper and journal information from Pubmed
70
+ Entrez.
71
+ email:
72
+ - kieran.higgins@gmail.com
73
+ executables: []
74
+ extensions: []
75
+ extra_rdoc_files: []
76
+ files:
77
+ - ".gitignore"
78
+ - Gemfile
79
+ - LICENSE.txt
80
+ - README.md
81
+ - Rakefile
82
+ - lib/pubmed_api.rb
83
+ - lib/pubmed_api/parsers.rb
84
+ - lib/pubmed_api/version.rb
85
+ - pubmed_api.gemspec
86
+ - spec/lib/pubmed_api_spec.rb
87
+ - spec/spec_helper.rb
88
+ homepage: ''
89
+ licenses:
90
+ - MIT
91
+ metadata: {}
92
+ post_install_message:
93
+ rdoc_options: []
94
+ require_paths:
95
+ - lib
96
+ required_ruby_version: !ruby/object:Gem::Requirement
97
+ requirements:
98
+ - - ">="
99
+ - !ruby/object:Gem::Version
100
+ version: '0'
101
+ required_rubygems_version: !ruby/object:Gem::Requirement
102
+ requirements:
103
+ - - ">="
104
+ - !ruby/object:Gem::Version
105
+ version: '0'
106
+ requirements: []
107
+ rubyforge_project:
108
+ rubygems_version: 2.4.5
109
+ signing_key:
110
+ specification_version: 4
111
+ summary: A Ruby gem for downloading paper and journal information from Pubmed Entrez.
112
+ test_files:
113
+ - spec/lib/pubmed_api_spec.rb
114
+ - spec/spec_helper.rb