RubyGems - pubmed_api - Versions diffs - 0.0.1 - Mend

pubmed_api 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

checksums.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+SHA1:
+  metadata.gz: 69035040fd451f5846e90b7512f6d5b2e68253a6
+  data.tar.gz: ba519175eb78466e8030b06079e7269864b35a5c
+SHA512:
+  metadata.gz: 68e45eb159acc8ed52bd9bc0641e12a745bb692a4289903222c7e979ac3a8825d5ab94d237eae211b504c0ebf38e6c91cae760607348ac054dae3d7a764d1844
+  data.tar.gz: dadc39958aab5210b494547cfad68ab00a43b790ab84ee9f5723c468865241c84c9c6b2205bedaa81e5abf8870d8a468dd7931ece13076ff5dc998df5c1f76fb

data/.gitignore ADDED Viewed

@@ -0,0 +1,14 @@
+/.bundle/
+/.yardoc
+/Gemfile.lock
+/_yardoc/
+/coverage/
+/doc/
+/pkg/
+/spec/reports/
+/tmp/
+*.bundle
+*.so
+*.o
+*.a
+mkmf.log

data/Gemfile ADDED Viewed

@@ -0,0 +1,4 @@
+source 'https://rubygems.org'
+# Specify your gem's dependencies in pubmed_api.gemspec
+gemspec

data/LICENSE.txt ADDED Viewed

@@ -0,0 +1,22 @@
+Copyright (c) 2015 Kieran Higgins
+MIT License
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

data/README.md ADDED Viewed

@@ -0,0 +1,31 @@
+# PubmedApi
+TODO: Write a gem description
+## Installation
+Add this line to your application's Gemfile:
+```ruby
+gem 'pubmed_api'
+```
+And then execute:
+    $ bundle
+Or install it yourself as:
+    $ gem install pubmed_api
+## Usage
+TODO: Write usage instructions here
+## Contributing
+1. Fork it ( https://github.com/[my-github-username]/pubmed_api/fork )
+2. Create your feature branch (`git checkout -b my-new-feature`)
+3. Commit your changes (`git commit -am 'Add some feature'`)
+4. Push to the branch (`git push origin my-new-feature`)
+5. Create a new Pull Request

data/Rakefile ADDED Viewed

@@ -0,0 +1,7 @@
+require "bundler/gem_tasks"
+require "rspec/core/rake_task"
+RSpec::Core::RakeTask.new
+task :default => :spec
+task :test => :spec

data/lib/pubmed_api/parsers.rb ADDED Viewed

@@ -0,0 +1,163 @@
+module PubmedAPI
+  class XMLParser
+  	SearchResult = Struct.new(:count, :pmids, :mesh_terms, :phrases_not_found)
+  	def parse_search(doc)
+  	  results = SearchResult.new
+  	  results.pmids = []
+  	  results.mesh_terms = []
+  	  results.count = doc.xpath('/eSearchResult/Count').first.content.to_i
+  	  doc.xpath('/eSearchResult/IdList/Id').each {|n| results.pmids << n.content.to_i}
+  	  doc.xpath('/eSearchResult/TranslationStack/TermSet/Term').each do |n|
+  	    if n.content =~ /"(.*)"\[MeSH Terms\]/
+  	      results.mesh_terms << $1
+  	    end
+  	  end
+  	  doc.xpath('/eSearchResult/ErrorList/PhraseNotFound').each {|n| results.phrases_not_found << n.content }
+  	  results
+    end
+    PaperStruct = Struct.new( :title, :abstract, :article_date, :pubmed_date, :date_appeared,
+                              :doi, :authors, :pmid, :nlmid, :journal, :complete, :url, :pdf_url)
+    def parse_papers(papers_xml)
+      results = []
+      papers_xml.each do |paper|
+        #check it's actually a paper
+        if paper.xpath('/*/*').first.name().eql?('PubmedArticle')
+          paper_output = PaperStruct.new
+          paper_output.title = paper.at('ArticleTitle').text
+          begin
+            paper_output.abstract = paper.at('Abstract').text
+          rescue NoMethodError
+          end
+          begin
+            #Date in Y/M/D format
+            article_date =  Date.new( paper.at('ArticleDate/Year').text.to_i,  paper.at('ArticleDate/Month').text.to_i, paper.at('ArticleDate/Day').text.to_i)
+            paper_output.article_date = article_date
+          rescue NoMethodError
+             #puts "no date " +  " " + paper.css('PMID').text + " " + paper.css('ArticleTitle').text
+             paper_output.article_date =  Date.new()
+          end
+          #Parse mutlitple PubMedPubDate dates
+          dates = paper.css('PubMedPubDate')
+          paper_output.pmid =  parse_pmid(paper.css('PMID').text)
+          pub_date = [0,0,0]
+          dates.each do |node|
+            if node.attributes["PubStatus"].to_s == "entrez"
+              pub_date = Date.new( node.at('Year').text.to_i,  node.at('Month').text.to_i, node.at('Day').text.to_i)
+              paper_output.pubmed_date = pub_date
+              paper_output.date_appeared = pub_date
+            end
+          end
+          ids = paper.css('ArticleId')
+          ids.each do |node|
+            v = node.attributes["IdType"].to_s
+            if v == 'doi'
+              paper_output.doi = node.text
+            end
+          end
+          #Extract the authors as friendly string for now...
+          #TODO handle authors properly
+          authors = paper.css('Author')
+          auth_arr = parse_authors(authors)
+          author_string = ''
+          auth_arr.each do |a|
+            author_string += a[1] + ' ' + a[2] +', '
+          end
+          #cut additional ', ' off end
+          author_string = author_string[0..-3]
+          paper_output.authors = author_string
+          paper_output.nlmid = paper.css('NlmUniqueID').text
+          results << paper_output
+        end
+      end
+      return results
+    end
+    JournalStruct = Struct.new( :issn, :nlmid, :title_long, :title_short, :started,:frequency)
+    def parse_journals(journals_xml)
+      j_struc_arr = []
+      journals_xml.each do |j|
+        j_struc = JournalStruct.new(j.css('ISSN').text, j.css('NlmUniqueID').text, j.xpath('./TitleMain/Title').text,
+                                    j.css('MedlineTA').text, j.css('PublicationFirstYear').text, j.css('Frequency').text)
+        j_struc_arr << j_struc
+      end
+      j_struc_arr
+    end
+    def parse_pmid(pmid)
+      pmid = pmid.gsub('.', '')
+      if pmid.length > 8
+        pmid = pmid[0,8]
+      end
+      pmid
+    end
+    AuthorStruct = Struct.new( :fore_name, :initials, :last_name)
+    def parse_authors(authors)
+      authors_output  =[]
+      authors.each do |node|
+        author_arr =  Array.new(3,"")
+        if v = node.at_css('ForeName')
+          author_arr[0] = v.text
+        end
+        if v = node.at_css('Initials')
+          author_arr[1] = v.text
+        end
+        if v = node.at_css('LastName')
+          author_arr[2] = v.text
+        end
+        authors_output << author_arr
+      end
+      return authors_output
+    end
+  end
+end

data/lib/pubmed_api/version.rb ADDED Viewed

@@ -0,0 +1,3 @@
+module PubmedAPI
+  VERSION = "0.0.1"
+end

data/lib/pubmed_api.rb ADDED Viewed

@@ -0,0 +1,125 @@
+require 'pubmed_api/version'
+require 'pubmed_api/parsers'
+require 'open-uri'
+require 'nokogiri'
+module PubmedAPI
+  class Interface
+    WAIT_TIME = 0.5 # seconds
+    DEFAULT_OPTIONS = {:tool => 'ruby-pubmed-api',
+                       :database => 'pubmed', #which database eq pubmed/nlmcatalog
+                       :verb => 'search', #which API verb to use e.g. search/fetch
+                       :email => '',
+                       :reldate => 90, #How far back shall we go in days
+                       :retmax => 100000,
+                       :retstart => 0,
+                       :load_all_pmids => false }
+    URI_TEMPLATE = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/e{verb}.fcgi?db={database}&tool={tool}&email={email}'+
+                   '&reldate={reldate}&retmax={retmax}&retstart={retstart}&{query}&rettype=fasta&retmode=xml'
+    class << self
+      def search(term, options={})
+        options = DEFAULT_OPTIONS.merge(options)
+        results = do_search(term, options)
+        if options[:load_all_pmids]
+          # Send off subsequent requests to load all the PMIDs, add them to the results
+          (options[:retmax]..results.count).step(options[:retmax]) do |step|
+            results.pmids << do_search(term, options.merge({:retstart => step})).pmids
+          end
+        end
+        results
+      end
+      # Performs a search and parses the response
+      def do_search(search_term, options)
+        wait
+        doc = make_api_request(options.merge({:query => 'term='+search_term}))
+        parser = XMLParser.new
+        parser.parse_search(doc)
+      end
+      def fetch_papers(ids)
+        xml = fetch_records(ids, 'pubmed')
+        parser = XMLParser.new
+        parser.parse_papers(xml)
+      end
+      def fetch_journals(nlmids)
+        #Change the ids of those wierd journals
+        nlmids = nlmids.map { |e|  ((e.include? 'R') ? convert_odd_journal_ids(e) : e ) }
+        xml = fetch_records(nlmids, 'nlmcatalog')
+        parser = XMLParser.new
+        parser.parse_journals(xml)
+      end
+      def fetch_records(ids, database)
+        xml_records = []
+        options = DEFAULT_OPTIONS
+        #dice array into reasonable length chunks for download
+        n_length = 500
+        # TODO paralellise?
+        ids.each_slice(n_length) do |slice|
+          #Turn string to something html friendly
+          id_string = slice.join(",")
+          doc = make_api_request(options.merge({:verb => 'fetch',:database => database, :query => 'id='+id_string}))
+          records = doc.xpath('./*/*')
+          xml_records << records
+        end
+        xml_records.flatten
+      end
+      #Maked the HTTP request and return the responce
+      #TODO handle failures
+      #Log API calls?
+      def make_api_request(options)
+          url = expand_uri(URI_TEMPLATE, options)
+          Nokogiri::XML( open url )
+      end
+      #Some journals have odd NLMIDs that need to be searched for rarther than accessed directly.
+      #TODO combine into single API request
+      def convert_odd_journal_ids(id)
+        new_id = nil
+        results = search(id, {:database => 'nlmcatalog', :reldate => '100000'})
+        if results.pmids.length ==1
+          new_id = results.pmids[0]
+        else
+          puts "failed to convert " + id.to_s
+        end
+        new_id.to_s
+      end
+      # 300ms minimum wait.
+      def wait
+        sleep WAIT_TIME
+      end
+      private
+      def expand_uri(uri, options)
+        uri.gsub(/\{(.*?)\}/) { URI.encode( (options[$1] || options[$1.to_sym] || '').to_s ) rescue '' }
+      end
+    end
+  end
+end

data/pubmed_api.gemspec ADDED Viewed

@@ -0,0 +1,26 @@
+# coding: utf-8
+lib = File.expand_path('../lib', __FILE__)
+$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
+require 'pubmed_api/version'
+Gem::Specification.new do |spec|
+  spec.name          = "pubmed_api"
+  spec.version       = PubmedAPI::VERSION
+  spec.authors       = ["Kieran Higgins"]
+  spec.email         = ["kieran.higgins@gmail.com"]
+  spec.summary       = %q{A Ruby gem for downloading paper and journal information from Pubmed Entrez.}
+  spec.description   = %q{A Ruby gem for downloading paper and journal information from Pubmed Entrez.}
+  spec.homepage      = ""
+  spec.license       = "MIT"
+  spec.files         = `git ls-files -z`.split("\x0")
+  spec.executables   = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
+  spec.test_files    = spec.files.grep(%r{^(test|spec|features)/})
+  spec.require_paths = ["lib"]
+  spec.add_development_dependency "bundler", "~> 1.7"
+  spec.add_development_dependency "rake", "~> 10.0"
+  spec.add_development_dependency "rspec"
+  spec.add_dependency "nokogiri"
+end

data/spec/lib/pubmed_api_spec.rb ADDED Viewed

@@ -0,0 +1,46 @@
+require 'spec_helper'
+describe PubmedAPI do
+  it "should perform a search" do
+    strucs = PubmedAPI::Interface.search("quantum physics", {:load_all_pmids => true, :reldate => 90})
+    expect(strucs.length > 10)
+  end
+  it "should make an API call" do
+  	options = PubmedAPI::Interface::DEFAULT_OPTIONS
+  	options.merge({:query => 'term=scrotum'})
+  	doc = PubmedAPI::Interface.make_api_request(options)
+    records = doc.xpath('./*/*')
+    count = doc.xpath('/eSearchResult/Count').first.content.to_i
+    expect(count > 0 )
+    expect(records.length == count)
+  end
+  it "should fetch a paper" do
+    id = '25554862'
+    title = "Completing the picture for the smallest eigenvalue of real Wishart matrices."
+    strucs = PubmedAPI::Interface.fetch_papers([id])
+    paper = strucs[0]
+    expect(paper.title.eql?(title))
+    expect(paper.pmid.eql?(id))
+  end
+  it "should fetch a journal" do
+    id = '0401141'
+    title = 'Physical review letters.'
+    strucs = PubmedAPI::Interface.fetch_journals([id])
+    j = strucs[0]
+    expect(j.title_long.eql?(title))
+    expect(j.nlmid.eql?(id))
+  end
+  it "it should fix strange journal ids" do
+     fixed = PubmedAPI::Interface.convert_odd_journal_ids('16930290R')
+     expect( fixed.eql?('100381'))
+  end
+end

data/spec/spec_helper.rb ADDED Viewed

	@@ -0,0 +1 @@
1	+ require 'pubmed_api'

metadata ADDED Viewed

@@ -0,0 +1,114 @@
+--- !ruby/object:Gem::Specification
+name: pubmed_api
+version: !ruby/object:Gem::Version
+  version: 0.0.1
+platform: ruby
+authors:
+- Kieran Higgins
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2015-04-28 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: bundler
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.7'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.7'
+- !ruby/object:Gem::Dependency
+  name: rake
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '10.0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '10.0'
+- !ruby/object:Gem::Dependency
+  name: rspec
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+- !ruby/object:Gem::Dependency
+  name: nokogiri
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+description: A Ruby gem for downloading paper and journal information from Pubmed
+  Entrez.
+email:
+- kieran.higgins@gmail.com
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- ".gitignore"
+- Gemfile
+- LICENSE.txt
+- README.md
+- Rakefile
+- lib/pubmed_api.rb
+- lib/pubmed_api/parsers.rb
+- lib/pubmed_api/version.rb
+- pubmed_api.gemspec
+- spec/lib/pubmed_api_spec.rb
+- spec/spec_helper.rb
+homepage: ''
+licenses:
+- MIT
+metadata: {}
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubyforge_project:
+rubygems_version: 2.4.5
+signing_key:
+specification_version: 4
+summary: A Ruby gem for downloading paper and journal information from Pubmed Entrez.
+test_files:
+- spec/lib/pubmed_api_spec.rb
+- spec/spec_helper.rb