pubmed_api 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 69035040fd451f5846e90b7512f6d5b2e68253a6
4
+ data.tar.gz: ba519175eb78466e8030b06079e7269864b35a5c
5
+ SHA512:
6
+ metadata.gz: 68e45eb159acc8ed52bd9bc0641e12a745bb692a4289903222c7e979ac3a8825d5ab94d237eae211b504c0ebf38e6c91cae760607348ac054dae3d7a764d1844
7
+ data.tar.gz: dadc39958aab5210b494547cfad68ab00a43b790ab84ee9f5723c468865241c84c9c6b2205bedaa81e5abf8870d8a468dd7931ece13076ff5dc998df5c1f76fb
data/.gitignore ADDED
@@ -0,0 +1,14 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.bundle
11
+ *.so
12
+ *.o
13
+ *.a
14
+ mkmf.log
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in pubmed_api.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2015 Kieran Higgins
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,31 @@
1
+ # PubmedApi
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem 'pubmed_api'
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install pubmed_api
20
+
21
+ ## Usage
22
+
23
+ TODO: Write usage instructions here
24
+
25
+ ## Contributing
26
+
27
+ 1. Fork it ( https://github.com/[my-github-username]/pubmed_api/fork )
28
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
29
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
30
+ 4. Push to the branch (`git push origin my-new-feature`)
31
+ 5. Create a new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,7 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new
5
+
6
+ task :default => :spec
7
+ task :test => :spec
@@ -0,0 +1,163 @@
1
+ module PubmedAPI
2
+
3
+ class XMLParser
4
+
5
+ SearchResult = Struct.new(:count, :pmids, :mesh_terms, :phrases_not_found)
6
+
7
+ def parse_search(doc)
8
+
9
+ results = SearchResult.new
10
+ results.pmids = []
11
+ results.mesh_terms = []
12
+
13
+ results.count = doc.xpath('/eSearchResult/Count').first.content.to_i
14
+
15
+ doc.xpath('/eSearchResult/IdList/Id').each {|n| results.pmids << n.content.to_i}
16
+
17
+ doc.xpath('/eSearchResult/TranslationStack/TermSet/Term').each do |n|
18
+ if n.content =~ /"(.*)"\[MeSH Terms\]/
19
+ results.mesh_terms << $1
20
+ end
21
+ end
22
+
23
+ doc.xpath('/eSearchResult/ErrorList/PhraseNotFound').each {|n| results.phrases_not_found << n.content }
24
+ results
25
+
26
+ end
27
+
28
+
29
+ PaperStruct = Struct.new( :title, :abstract, :article_date, :pubmed_date, :date_appeared,
30
+ :doi, :authors, :pmid, :nlmid, :journal, :complete, :url, :pdf_url)
31
+
32
+ def parse_papers(papers_xml)
33
+
34
+ results = []
35
+
36
+ papers_xml.each do |paper|
37
+
38
+ #check it's actually a paper
39
+ if paper.xpath('/*/*').first.name().eql?('PubmedArticle')
40
+
41
+
42
+ paper_output = PaperStruct.new
43
+
44
+ paper_output.title = paper.at('ArticleTitle').text
45
+
46
+ begin
47
+ paper_output.abstract = paper.at('Abstract').text
48
+ rescue NoMethodError
49
+
50
+ end
51
+
52
+ begin
53
+ #Date in Y/M/D format
54
+ article_date = Date.new( paper.at('ArticleDate/Year').text.to_i, paper.at('ArticleDate/Month').text.to_i, paper.at('ArticleDate/Day').text.to_i)
55
+ paper_output.article_date = article_date
56
+ rescue NoMethodError
57
+ #puts "no date " + " " + paper.css('PMID').text + " " + paper.css('ArticleTitle').text
58
+ paper_output.article_date = Date.new()
59
+ end
60
+
61
+ #Parse mutlitple PubMedPubDate dates
62
+ dates = paper.css('PubMedPubDate')
63
+
64
+ paper_output.pmid = parse_pmid(paper.css('PMID').text)
65
+
66
+ pub_date = [0,0,0]
67
+
68
+ dates.each do |node|
69
+ if node.attributes["PubStatus"].to_s == "entrez"
70
+ pub_date = Date.new( node.at('Year').text.to_i, node.at('Month').text.to_i, node.at('Day').text.to_i)
71
+ paper_output.pubmed_date = pub_date
72
+ paper_output.date_appeared = pub_date
73
+ end
74
+ end
75
+
76
+ ids = paper.css('ArticleId')
77
+
78
+ ids.each do |node|
79
+ v = node.attributes["IdType"].to_s
80
+ if v == 'doi'
81
+ paper_output.doi = node.text
82
+ end
83
+ end
84
+
85
+
86
+ #Extract the authors as friendly string for now...
87
+ #TODO handle authors properly
88
+ authors = paper.css('Author')
89
+ auth_arr = parse_authors(authors)
90
+
91
+ author_string = ''
92
+
93
+ auth_arr.each do |a|
94
+ author_string += a[1] + ' ' + a[2] +', '
95
+ end
96
+
97
+ #cut additional ', ' off end
98
+ author_string = author_string[0..-3]
99
+ paper_output.authors = author_string
100
+ paper_output.nlmid = paper.css('NlmUniqueID').text
101
+
102
+
103
+ results << paper_output
104
+ end
105
+ end
106
+
107
+ return results
108
+ end
109
+
110
+ JournalStruct = Struct.new( :issn, :nlmid, :title_long, :title_short, :started,:frequency)
111
+
112
+ def parse_journals(journals_xml)
113
+
114
+ j_struc_arr = []
115
+
116
+ journals_xml.each do |j|
117
+ j_struc = JournalStruct.new(j.css('ISSN').text, j.css('NlmUniqueID').text, j.xpath('./TitleMain/Title').text,
118
+ j.css('MedlineTA').text, j.css('PublicationFirstYear').text, j.css('Frequency').text)
119
+ j_struc_arr << j_struc
120
+ end
121
+
122
+ j_struc_arr
123
+ end
124
+
125
+ def parse_pmid(pmid)
126
+ pmid = pmid.gsub('.', '')
127
+
128
+ if pmid.length > 8
129
+ pmid = pmid[0,8]
130
+ end
131
+ pmid
132
+ end
133
+
134
+
135
+ AuthorStruct = Struct.new( :fore_name, :initials, :last_name)
136
+
137
+ def parse_authors(authors)
138
+
139
+ authors_output =[]
140
+
141
+ authors.each do |node|
142
+ author_arr = Array.new(3,"")
143
+
144
+ if v = node.at_css('ForeName')
145
+ author_arr[0] = v.text
146
+ end
147
+
148
+ if v = node.at_css('Initials')
149
+ author_arr[1] = v.text
150
+ end
151
+
152
+ if v = node.at_css('LastName')
153
+ author_arr[2] = v.text
154
+ end
155
+
156
+ authors_output << author_arr
157
+ end
158
+
159
+ return authors_output
160
+ end
161
+
162
+ end
163
+ end
@@ -0,0 +1,3 @@
1
+ module PubmedAPI
2
+ VERSION = "0.0.1"
3
+ end
data/lib/pubmed_api.rb ADDED
@@ -0,0 +1,125 @@
1
+ require 'pubmed_api/version'
2
+ require 'pubmed_api/parsers'
3
+ require 'open-uri'
4
+ require 'nokogiri'
5
+
6
+ module PubmedAPI
7
+
8
+ class Interface
9
+
10
+ WAIT_TIME = 0.5 # seconds
11
+
12
+
13
+ DEFAULT_OPTIONS = {:tool => 'ruby-pubmed-api',
14
+ :database => 'pubmed', #which database eq pubmed/nlmcatalog
15
+ :verb => 'search', #which API verb to use e.g. search/fetch
16
+ :email => '',
17
+ :reldate => 90, #How far back shall we go in days
18
+ :retmax => 100000,
19
+ :retstart => 0,
20
+ :load_all_pmids => false }
21
+
22
+
23
+ URI_TEMPLATE = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/e{verb}.fcgi?db={database}&tool={tool}&email={email}'+
24
+ '&reldate={reldate}&retmax={retmax}&retstart={retstart}&{query}&rettype=fasta&retmode=xml'
25
+
26
+ class << self
27
+
28
+ def search(term, options={})
29
+
30
+ options = DEFAULT_OPTIONS.merge(options)
31
+
32
+ results = do_search(term, options)
33
+
34
+ if options[:load_all_pmids]
35
+ # Send off subsequent requests to load all the PMIDs, add them to the results
36
+ (options[:retmax]..results.count).step(options[:retmax]) do |step|
37
+ results.pmids << do_search(term, options.merge({:retstart => step})).pmids
38
+ end
39
+ end
40
+
41
+ results
42
+ end
43
+
44
+ # Performs a search and parses the response
45
+ def do_search(search_term, options)
46
+ wait
47
+ doc = make_api_request(options.merge({:query => 'term='+search_term}))
48
+ parser = XMLParser.new
49
+ parser.parse_search(doc)
50
+ end
51
+
52
+ def fetch_papers(ids)
53
+ xml = fetch_records(ids, 'pubmed')
54
+ parser = XMLParser.new
55
+ parser.parse_papers(xml)
56
+ end
57
+
58
+ def fetch_journals(nlmids)
59
+ #Change the ids of those wierd journals
60
+ nlmids = nlmids.map { |e| ((e.include? 'R') ? convert_odd_journal_ids(e) : e ) }
61
+ xml = fetch_records(nlmids, 'nlmcatalog')
62
+ parser = XMLParser.new
63
+ parser.parse_journals(xml)
64
+ end
65
+
66
+ def fetch_records(ids, database)
67
+
68
+ xml_records = []
69
+
70
+ options = DEFAULT_OPTIONS
71
+
72
+ #dice array into reasonable length chunks for download
73
+ n_length = 500
74
+ # TODO paralellise?
75
+ ids.each_slice(n_length) do |slice|
76
+
77
+ #Turn string to something html friendly
78
+ id_string = slice.join(",")
79
+ doc = make_api_request(options.merge({:verb => 'fetch',:database => database, :query => 'id='+id_string}))
80
+ records = doc.xpath('./*/*')
81
+ xml_records << records
82
+
83
+ end
84
+ xml_records.flatten
85
+ end
86
+
87
+ #Maked the HTTP request and return the responce
88
+ #TODO handle failures
89
+ #Log API calls?
90
+ def make_api_request(options)
91
+ url = expand_uri(URI_TEMPLATE, options)
92
+ Nokogiri::XML( open url )
93
+ end
94
+
95
+
96
+ #Some journals have odd NLMIDs that need to be searched for rarther than accessed directly.
97
+ #TODO combine into single API request
98
+ def convert_odd_journal_ids(id)
99
+
100
+ new_id = nil
101
+ results = search(id, {:database => 'nlmcatalog', :reldate => '100000'})
102
+ if results.pmids.length ==1
103
+ new_id = results.pmids[0]
104
+ else
105
+ puts "failed to convert " + id.to_s
106
+ end
107
+ new_id.to_s
108
+ end
109
+
110
+ # 300ms minimum wait.
111
+ def wait
112
+ sleep WAIT_TIME
113
+ end
114
+
115
+
116
+ private
117
+
118
+ def expand_uri(uri, options)
119
+ uri.gsub(/\{(.*?)\}/) { URI.encode( (options[$1] || options[$1.to_sym] || '').to_s ) rescue '' }
120
+ end
121
+
122
+ end
123
+ end
124
+
125
+ end
@@ -0,0 +1,26 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'pubmed_api/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "pubmed_api"
8
+ spec.version = PubmedAPI::VERSION
9
+ spec.authors = ["Kieran Higgins"]
10
+ spec.email = ["kieran.higgins@gmail.com"]
11
+ spec.summary = %q{A Ruby gem for downloading paper and journal information from Pubmed Entrez.}
12
+ spec.description = %q{A Ruby gem for downloading paper and journal information from Pubmed Entrez.}
13
+ spec.homepage = ""
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.7"
22
+ spec.add_development_dependency "rake", "~> 10.0"
23
+ spec.add_development_dependency "rspec"
24
+ spec.add_dependency "nokogiri"
25
+
26
+ end
@@ -0,0 +1,46 @@
1
+ require 'spec_helper'
2
+
3
+ describe PubmedAPI do
4
+
5
+
6
+ it "should perform a search" do
7
+ strucs = PubmedAPI::Interface.search("quantum physics", {:load_all_pmids => true, :reldate => 90})
8
+ expect(strucs.length > 10)
9
+ end
10
+
11
+ it "should make an API call" do
12
+ options = PubmedAPI::Interface::DEFAULT_OPTIONS
13
+ options.merge({:query => 'term=scrotum'})
14
+
15
+ doc = PubmedAPI::Interface.make_api_request(options)
16
+ records = doc.xpath('./*/*')
17
+ count = doc.xpath('/eSearchResult/Count').first.content.to_i
18
+ expect(count > 0 )
19
+ expect(records.length == count)
20
+ end
21
+
22
+
23
+ it "should fetch a paper" do
24
+ id = '25554862'
25
+ title = "Completing the picture for the smallest eigenvalue of real Wishart matrices."
26
+ strucs = PubmedAPI::Interface.fetch_papers([id])
27
+ paper = strucs[0]
28
+ expect(paper.title.eql?(title))
29
+ expect(paper.pmid.eql?(id))
30
+ end
31
+
32
+ it "should fetch a journal" do
33
+ id = '0401141'
34
+ title = 'Physical review letters.'
35
+ strucs = PubmedAPI::Interface.fetch_journals([id])
36
+ j = strucs[0]
37
+ expect(j.title_long.eql?(title))
38
+ expect(j.nlmid.eql?(id))
39
+ end
40
+
41
+ it "it should fix strange journal ids" do
42
+ fixed = PubmedAPI::Interface.convert_odd_journal_ids('16930290R')
43
+ expect( fixed.eql?('100381'))
44
+ end
45
+
46
+ end
@@ -0,0 +1 @@
1
+ require 'pubmed_api'
metadata ADDED
@@ -0,0 +1,114 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pubmed_api
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Kieran Higgins
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-04-28 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.7'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.7'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: nokogiri
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ description: A Ruby gem for downloading paper and journal information from Pubmed
70
+ Entrez.
71
+ email:
72
+ - kieran.higgins@gmail.com
73
+ executables: []
74
+ extensions: []
75
+ extra_rdoc_files: []
76
+ files:
77
+ - ".gitignore"
78
+ - Gemfile
79
+ - LICENSE.txt
80
+ - README.md
81
+ - Rakefile
82
+ - lib/pubmed_api.rb
83
+ - lib/pubmed_api/parsers.rb
84
+ - lib/pubmed_api/version.rb
85
+ - pubmed_api.gemspec
86
+ - spec/lib/pubmed_api_spec.rb
87
+ - spec/spec_helper.rb
88
+ homepage: ''
89
+ licenses:
90
+ - MIT
91
+ metadata: {}
92
+ post_install_message:
93
+ rdoc_options: []
94
+ require_paths:
95
+ - lib
96
+ required_ruby_version: !ruby/object:Gem::Requirement
97
+ requirements:
98
+ - - ">="
99
+ - !ruby/object:Gem::Version
100
+ version: '0'
101
+ required_rubygems_version: !ruby/object:Gem::Requirement
102
+ requirements:
103
+ - - ">="
104
+ - !ruby/object:Gem::Version
105
+ version: '0'
106
+ requirements: []
107
+ rubyforge_project:
108
+ rubygems_version: 2.4.5
109
+ signing_key:
110
+ specification_version: 4
111
+ summary: A Ruby gem for downloading paper and journal information from Pubmed Entrez.
112
+ test_files:
113
+ - spec/lib/pubmed_api_spec.rb
114
+ - spec/spec_helper.rb