RubyGems - rbbt-sources - Versions diffs - 3.2.16 → 3.4.0 - Mend

rbbt-sources 3.2.16 → 3.4.0

Files changed (11) hide show

checksums.yaml +4 -4
data/etc/allowed_biomart_archives +1 -0
data/lib/rbbt/sources/biomart.rb +2 -2
data/lib/rbbt/sources/mesh.rb +26 -0
data/lib/rbbt/sources/pubmed.rb +16 -3
data/lib/rbbt/sources/signor.rb +5 -1
data/share/install/Organism/organism_helpers.rb +3 -3
data/share/install/lib/rake_helper.rb +2 -2
data/test/rbbt/sources/test_mesh.rb +10 -0
data/test/rbbt/sources/test_pubmed.rb +18 -8
metadata +6 -3

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 0745fa19486f2c9c8c86b24ddf5f44689aa010c500db0bf7fadfc67e03072bf8
-  data.tar.gz: b6d5f1481202d3f191b725f5c0021c371a4f044e14661d3534d31a260ffb480e
+  metadata.gz: 87f97c6af3dab4a1b39cb258acdf9bf4d105df5703a04d6264f960ff79e81faa
+  data.tar.gz: ff91f67bc0775e0a20678ede8eeb312fa1e7a42d18095c2d9bcb1a5c0e4fc000
 SHA512:
-  metadata.gz: 1241babce1692c616242e1ef5a501c5e337f3b042110fca932ee3e320ade576538be40723968eabac97ea02133d58144275f5949c3288926d87fc03192417522
-  data.tar.gz: 53b45cbb861f44f2f4f51f4e14a36c3f70210d729f45138228ac06a542857162e94f736b0b43a107af413da0ff68247061e4d99a774932fa5b4f65b3547205c3
+  metadata.gz: 0b23136a81511a1ad55d5bb2af5784fd74512b9355bf40023a5197180bf25b69aefa966a3dafc8347f1864da174637fa0a2f95bb687a8973a4b23f5e6778398d
+  data.tar.gz: 4fca8a03899b980a18da56d9cdd56bc0136ce126c718ca61836fa3cf55313f77664b7ddecadba2ad45491c2e309604da3ce7288c5de70c660f498bfcc2849aec

data/etc/allowed_biomart_archives CHANGED Viewed

@@ -1,6 +1,7 @@
 may2009
 feb2014
 may2017
+oct2018
 apr2019
 feb2021
 feb2023

data/lib/rbbt/sources/biomart.rb CHANGED Viewed

@@ -15,7 +15,7 @@ module BioMart
   BIOMART_URL = 'http://www.ensembl.org/biomart/martservice?query='
-  MISSING_IN_ARCHIVE = Rbbt.etc.biomart.missing_in_archive.exists? ? Rbbt.etc.biomart.missing_in_archive.yaml : {}
+  MISSING_IN_ARCHIVE = Rbbt.etc.biomart.missing_in_archive.exists? ? Rbbt.etc.biomart.missing_in_archive.find.yaml : {}
   private
@@ -33,7 +33,7 @@ module BioMart
   def self.set_archive(date)
     if defined? Rbbt and Rbbt.etc.allowed_biomart_archives.exists?
-      raise "Biomart archive #{ date } is not allowed in this installation" unless Rbbt.etc.allowed_biomart_archives.read.split("\n").include? date
+      raise "Biomart archive #{ date } is not allowed in this installation" unless Rbbt.etc.allowed_biomart_archives.find.read.split("\n").include? date
     end
     Thread.current['archive'] = date
     Thread.current['archive_url'] = BIOMART_URL.sub(/www/, date + '.archive')

data/lib/rbbt/sources/mesh.rb ADDED Viewed

@@ -0,0 +1,26 @@
+require 'rbbt-util'
+require 'rbbt/resource'
+module MeSH
+  extend Resource
+  self.subdir = "share/databases/MeSH"
+  MeSH.claim MeSH["data.gz"], :url, "https://nlmpubs.nlm.nih.gov/projects/mesh/rdf/mesh.nt.gz"
+  MeSH.claim MeSH.vocabulary, :proc do
+    dumper = TSV::Dumper.new :key_field => "MeSH ID", :fields => ["Label"], :type => :single
+    dumper.init
+    TSV.traverse MeSH.data, :type => :array, :into => dumper, :bar => "Processing MeSH vocab" do |line|
+      sub, verb, obj = line.split("\t")
+      next unless verb && verb.include?("rdf-schema#label")
+      id = sub.split("/").last[0..-2]
+      label = obj.split('"')[1]
+      [id, label]
+    end
+  end
+end

data/lib/rbbt/sources/pubmed.rb CHANGED Viewed

@@ -51,6 +51,7 @@ module PubMed
       end
       [lastname.gsub(/\s/,'_'), year || "NOYEAR", abrev] * ""
     end
     def self.parse_xml(xml)
       require 'nokogiri'
@@ -91,6 +92,16 @@ module PubMed
         [lastname, forename] * ", "
       end * " and "
+      info[:mesh] = parser.search("MeshHeadingList/MeshHeading").collect do |mesh|
+        descriptor = mesh.search("DescriptorName").first.attr('UI')
+        qualifiers = mesh.search("QualifierName").collect{|q| q.attr('UI')}
+        [descriptor] + qualifiers.collect{|q| descriptor + q }
+      end.compact.flatten
+      info[:substance] = parser.search("NameOfSubstance").collect do |substance|
+        substance.attr('UI')
+      end
       info[:bibentry] = bibentry.downcase if bibentry
       info[:pmc_pdf] = parser.search("PubmedData/ArticleIdList/ArticleId").select{|id| id[:IdType] == "pmc"}.first
@@ -102,7 +113,7 @@ module PubMed
       info
     end
-    attr_accessor :title, :abstract, :journal, :author, :pmid, :bibentry, :pmc_pdf, :gscholar_pdf, :pdf_url
+    attr_accessor :title, :abstract, :journal, :author, :pmid, :bibentry, :pmc_pdf, :gscholar_pdf, :pdf_url, :mesh, :substance
     attr_accessor *XML_KEYS.collect{|p| p.first }
     def initialize(xml)
@@ -141,7 +152,7 @@ module PubMed
                  `wget --user-agent=firefox #{ pdf_url } -O #{ pdf } -t 3`
                  TmpFile.with_file do |txt|
                    `pdftotext #{ pdf } #{ txt }`
-                   text = Open.read(txt) if File.exists? txt
+                   text = Open.read(txt) if File.exist?(txt)
                  end
                end
                text
@@ -241,11 +252,13 @@ module PubMed
     pmids = [pmids] unless Array === pmids
     pmids = pmids.compact.collect{|id| id}
+    chunk_size = 50
     result_files = FileCache.cache_online_elements(pmids, 'pubmed-{ID}.xml') do |ids|
       result = {}
       values = []
-      chunks = Misc.divide(ids, (ids.length / 20) + 1)
+      chunks = Misc.divide(ids, (ids.length / chunk_size) + 1)
       Log::ProgressBar.with_bar(chunks.length, :desc => "Downloading articles from PubMed") do |bar|
+        bar.init
         chunks.each do |list|
           begin
             Misc.try3times do

data/lib/rbbt/sources/signor.rb CHANGED Viewed

@@ -19,7 +19,11 @@ module Signor
   end
   Signor.claim Signor.data, :proc do
-    Signor[".source/all.csv"].tsv :header_hash => '', :merge => true, :zipped => true
+    begin
+      Signor[".source/all.csv"].tsv :header_hash => '', :merge => true, :zipped => true
+    rescue
+      Signor[".source/all.csv"].tsv :header_hash => '', :merge => true, :one2one => true
+    end
   end
   Signor.claim Signor.protein_protein, :proc do

data/share/install/Organism/organism_helpers.rb CHANGED Viewed

@@ -806,7 +806,7 @@ file 'gene_set' do |t|
   build_code = Organism.GRC_build(organism)
   scientific_name = $scientific_name
   url = "ftp://ftp.ensembl.org/pub/release-#{num}/gtf/#{scientific_name.downcase.sub(" ", '_')}/#{scientific_name.sub(" ", '_')}.#{build_code}.#{num}.gtf.gz"
-  CMD.cmd("wget '#{url}' -O #{t.name}.gz")
+  Open.download(url, "#{t.name}.gz")
   nil
 end
@@ -825,7 +825,7 @@ file 'cdna_fasta' do |t|
   num = release.split("-").last
   build_code = Organism.GRC_build(organism)
   scientific_name = Organism.scientific_name(organism)
-  url = "ftp://ftp.ensembl.org/pub/release-#{num}/fasta/#{scientific_name.downcase.sub(" ", '_')}/cdna/#{scientific_name.sub(" ", '_')}.#{build_code}.cdna.all.fa.gz"
-  CMD.cmd("wget '#{url}' -O #{t.name}.gz")
+  url = "ftp://ftp.ensembl.org/pub/release-#{num}/fasta/#{scientific_name.downcase.sub(" ", '_')}/cdna/#{scientific_name.sub(" ", '_')}.#{build_code}.#{num}.cdna.all.fa.gz"
+  Open.download(url, "#{t.name}.gz")
   nil
 end

data/share/install/lib/rake_helper.rb CHANGED Viewed

@@ -9,7 +9,7 @@ SOURCE_DIR = 'source'
 def define_source_tasks(sources)
   sources.each do |name, url|
     file File.join(SOURCE_DIR, name) do |t|
-      FileUtils.mkdir SOURCE_DIR unless File.exists? SOURCE_DIR
+      FileUtils.mkdir SOURCE_DIR unless File.exist? SOURCE_DIR
       Log.log "Retrieving file '#{name}' into '#{t.name}': '#{url}'", Log::LOW
       Open.write(t.name, Open.open(url, :cache => false, :wget_options => {"--no-check-certificate" => true, "--quiet" => false, :pipe => true}))
     end
@@ -87,5 +87,5 @@ end
 task :all => :default
 task :clean do
-  ($__tsv_tasks + $__files).each do |file| FileUtils.rm file.to_s if File.exists?(file.to_s) end
+  ($__tsv_tasks + $__files).each do |file| FileUtils.rm file.to_s if File.exist?(file.to_s) end
 end

data/test/rbbt/sources/test_mesh.rb ADDED Viewed

@@ -0,0 +1,10 @@
+require File.expand_path(__FILE__).sub(%r(/test/.*), '/test/test_helper.rb')
+require File.expand_path(__FILE__).sub(%r(.*/test/), '').sub(/test_(.*)\.rb/,'\1')
+class TestMESH < Test::Unit::TestCase
+  def test_vocab
+    tsv = MeSH.vocabulary.tsv
+    assert_equal "3T3 Cells", tsv["D016475"]
+  end
+end

data/test/rbbt/sources/test_pubmed.rb CHANGED Viewed

@@ -5,7 +5,17 @@ require 'test/unit'
 class TestPubMed < Test::Unit::TestCase
-  def test_get_article
+  def test_mesh
+    pmid = '10866666'
+    assert_include PubMed.get_article(pmid).mesh, "D016475"
+  end
+  def _test_substance
+    pmid = '10866666'
+    assert_include PubMed.get_article(pmid).substance, "C000717247"
+  end
+  def _test_get_article
     pmid = '16438716'
     assert(PubMed.get_article(pmid).title == "Discovering semantic features in the literature: a foundation for building functional associations.")
@@ -13,38 +23,38 @@ class TestPubMed < Test::Unit::TestCase
     assert(PubMed.get_article(pmids)[pmid].title == "Discovering semantic features in the literature: a foundation for building functional associations.")
   end
-  def test_get_multi_abstract
+  def _test_get_multi_abstract
     pmid = "32141403"
     assert PubMed.get_article(pmid).abstract.include?("This study shows PCOS patients are at increased risk of incident schizophrenia, and the metformin treatment has a protective effect against incident schizophrenia.")
   end
-  def test_full_text
+  def _test_full_text
     pmid = '16438716'
     assert(PubMed.get_article(pmid).full_text =~ /Discovering/)
   end
-  def test_pmc_full_xml
+  def _test_pmc_full_xml
     pmid = '4304705'
     assert PubMed.get_article(pmid).pmc_full_xml.include?("HBV antigen")
   end
-  def test_query
+  def _test_query
     assert(PubMed.query('chagoyen[All Fields] AND ("loattrfull text"[sb] AND hasabstract[text])').include? '16438716')
   end
-  def test_year
+  def _test_year
     pmid = '16438716'
     assert_equal "2006", PubMed.get_article(pmid).year
   end
-  def test_bibentry
+  def _test_bibentry
     assert("vazquez2008sent", PubMed::Article.make_bibentry('vazquez', 2008, "SENT: Semantic features in text"))
     assert("vazquez2008aes", PubMed::Article.make_bibentry('vazquez', 2008, "An Example System"))
   end
-  def test_missing
+  def _test_missing
     pmids = '18627426,014966295'.split(",")
     Log.severity = 0
     assert PubMed.get_article(pmids).include? "014966295"

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: rbbt-sources
 version: !ruby/object:Gem::Version
-  version: 3.2.16
+  version: 3.4.0
 platform: ruby
 authors:
 - Miguel Vazquez
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2023-10-18 00:00:00.000000000 Z
+date: 2024-05-08 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rbbt-util
@@ -95,6 +95,7 @@ files:
 - lib/rbbt/sources/jochem.rb
 - lib/rbbt/sources/kegg.rb
 - lib/rbbt/sources/matador.rb
+- lib/rbbt/sources/mesh.rb
 - lib/rbbt/sources/oncodrive_role.rb
 - lib/rbbt/sources/oreganno.rb
 - lib/rbbt/sources/organism.rb
@@ -137,6 +138,7 @@ files:
 - test/rbbt/sources/test_gscholar.rb
 - test/rbbt/sources/test_kegg.rb
 - test/rbbt/sources/test_matador.rb
+- test/rbbt/sources/test_mesh.rb
 - test/rbbt/sources/test_organism.rb
 - test/rbbt/sources/test_pharmagkb.rb
 - test/rbbt/sources/test_pina.rb
@@ -164,7 +166,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.4.19
+rubygems_version: 3.5.9
 signing_key:
 specification_version: 4
 summary: Data sources for the Ruby Bioinformatics Toolkit (rbbt)
@@ -176,6 +178,7 @@ test_files:
 - test/rbbt/sources/test_gscholar.rb
 - test/rbbt/sources/test_kegg.rb
 - test/rbbt/sources/test_matador.rb
+- test/rbbt/sources/test_mesh.rb
 - test/rbbt/sources/test_organism.rb
 - test/rbbt/sources/test_pharmagkb.rb
 - test/rbbt/sources/test_pina.rb