rbbt-sources 3.2.16 → 3.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0745fa19486f2c9c8c86b24ddf5f44689aa010c500db0bf7fadfc67e03072bf8
4
- data.tar.gz: b6d5f1481202d3f191b725f5c0021c371a4f044e14661d3534d31a260ffb480e
3
+ metadata.gz: 87f97c6af3dab4a1b39cb258acdf9bf4d105df5703a04d6264f960ff79e81faa
4
+ data.tar.gz: ff91f67bc0775e0a20678ede8eeb312fa1e7a42d18095c2d9bcb1a5c0e4fc000
5
5
  SHA512:
6
- metadata.gz: 1241babce1692c616242e1ef5a501c5e337f3b042110fca932ee3e320ade576538be40723968eabac97ea02133d58144275f5949c3288926d87fc03192417522
7
- data.tar.gz: 53b45cbb861f44f2f4f51f4e14a36c3f70210d729f45138228ac06a542857162e94f736b0b43a107af413da0ff68247061e4d99a774932fa5b4f65b3547205c3
6
+ metadata.gz: 0b23136a81511a1ad55d5bb2af5784fd74512b9355bf40023a5197180bf25b69aefa966a3dafc8347f1864da174637fa0a2f95bb687a8973a4b23f5e6778398d
7
+ data.tar.gz: 4fca8a03899b980a18da56d9cdd56bc0136ce126c718ca61836fa3cf55313f77664b7ddecadba2ad45491c2e309604da3ce7288c5de70c660f498bfcc2849aec
@@ -1,6 +1,7 @@
1
1
  may2009
2
2
  feb2014
3
3
  may2017
4
+ oct2018
4
5
  apr2019
5
6
  feb2021
6
7
  feb2023
@@ -15,7 +15,7 @@ module BioMart
15
15
 
16
16
  BIOMART_URL = 'http://www.ensembl.org/biomart/martservice?query='
17
17
 
18
- MISSING_IN_ARCHIVE = Rbbt.etc.biomart.missing_in_archive.exists? ? Rbbt.etc.biomart.missing_in_archive.yaml : {}
18
+ MISSING_IN_ARCHIVE = Rbbt.etc.biomart.missing_in_archive.exists? ? Rbbt.etc.biomart.missing_in_archive.find.yaml : {}
19
19
 
20
20
  private
21
21
 
@@ -33,7 +33,7 @@ module BioMart
33
33
 
34
34
  def self.set_archive(date)
35
35
  if defined? Rbbt and Rbbt.etc.allowed_biomart_archives.exists?
36
- raise "Biomart archive #{ date } is not allowed in this installation" unless Rbbt.etc.allowed_biomart_archives.read.split("\n").include? date
36
+ raise "Biomart archive #{ date } is not allowed in this installation" unless Rbbt.etc.allowed_biomart_archives.find.read.split("\n").include? date
37
37
  end
38
38
  Thread.current['archive'] = date
39
39
  Thread.current['archive_url'] = BIOMART_URL.sub(/www/, date + '.archive')
@@ -0,0 +1,26 @@
1
+ require 'rbbt-util'
2
+ require 'rbbt/resource'
3
+
4
+ module MeSH
5
+ extend Resource
6
+
7
+ self.subdir = "share/databases/MeSH"
8
+
9
+ MeSH.claim MeSH["data.gz"], :url, "https://nlmpubs.nlm.nih.gov/projects/mesh/rdf/mesh.nt.gz"
10
+
11
+ MeSH.claim MeSH.vocabulary, :proc do
12
+ dumper = TSV::Dumper.new :key_field => "MeSH ID", :fields => ["Label"], :type => :single
13
+ dumper.init
14
+ TSV.traverse MeSH.data, :type => :array, :into => dumper, :bar => "Processing MeSH vocab" do |line|
15
+ sub, verb, obj = line.split("\t")
16
+
17
+ next unless verb && verb.include?("rdf-schema#label")
18
+
19
+ id = sub.split("/").last[0..-2]
20
+ label = obj.split('"')[1]
21
+
22
+ [id, label]
23
+ end
24
+ end
25
+
26
+ end
@@ -51,6 +51,7 @@ module PubMed
51
51
  end
52
52
  [lastname.gsub(/\s/,'_'), year || "NOYEAR", abrev] * ""
53
53
  end
54
+
54
55
  def self.parse_xml(xml)
55
56
  require 'nokogiri'
56
57
 
@@ -91,6 +92,16 @@ module PubMed
91
92
  [lastname, forename] * ", "
92
93
  end * " and "
93
94
 
95
+ info[:mesh] = parser.search("MeshHeadingList/MeshHeading").collect do |mesh|
96
+ descriptor = mesh.search("DescriptorName").first.attr('UI')
97
+ qualifiers = mesh.search("QualifierName").collect{|q| q.attr('UI')}
98
+ [descriptor] + qualifiers.collect{|q| descriptor + q }
99
+ end.compact.flatten
100
+
101
+ info[:substance] = parser.search("NameOfSubstance").collect do |substance|
102
+ substance.attr('UI')
103
+ end
104
+
94
105
  info[:bibentry] = bibentry.downcase if bibentry
95
106
 
96
107
  info[:pmc_pdf] = parser.search("PubmedData/ArticleIdList/ArticleId").select{|id| id[:IdType] == "pmc"}.first
@@ -102,7 +113,7 @@ module PubMed
102
113
  info
103
114
  end
104
115
 
105
- attr_accessor :title, :abstract, :journal, :author, :pmid, :bibentry, :pmc_pdf, :gscholar_pdf, :pdf_url
116
+ attr_accessor :title, :abstract, :journal, :author, :pmid, :bibentry, :pmc_pdf, :gscholar_pdf, :pdf_url, :mesh, :substance
106
117
  attr_accessor *XML_KEYS.collect{|p| p.first }
107
118
 
108
119
  def initialize(xml)
@@ -141,7 +152,7 @@ module PubMed
141
152
  `wget --user-agent=firefox #{ pdf_url } -O #{ pdf } -t 3`
142
153
  TmpFile.with_file do |txt|
143
154
  `pdftotext #{ pdf } #{ txt }`
144
- text = Open.read(txt) if File.exists? txt
155
+ text = Open.read(txt) if File.exist?(txt)
145
156
  end
146
157
  end
147
158
  text
@@ -241,11 +252,13 @@ module PubMed
241
252
  pmids = [pmids] unless Array === pmids
242
253
  pmids = pmids.compact.collect{|id| id}
243
254
 
255
+ chunk_size = 50
244
256
  result_files = FileCache.cache_online_elements(pmids, 'pubmed-{ID}.xml') do |ids|
245
257
  result = {}
246
258
  values = []
247
- chunks = Misc.divide(ids, (ids.length / 20) + 1)
259
+ chunks = Misc.divide(ids, (ids.length / chunk_size) + 1)
248
260
  Log::ProgressBar.with_bar(chunks.length, :desc => "Downloading articles from PubMed") do |bar|
261
+ bar.init
249
262
  chunks.each do |list|
250
263
  begin
251
264
  Misc.try3times do
@@ -19,7 +19,11 @@ module Signor
19
19
  end
20
20
 
21
21
  Signor.claim Signor.data, :proc do
22
- Signor[".source/all.csv"].tsv :header_hash => '', :merge => true, :zipped => true
22
+ begin
23
+ Signor[".source/all.csv"].tsv :header_hash => '', :merge => true, :zipped => true
24
+ rescue
25
+ Signor[".source/all.csv"].tsv :header_hash => '', :merge => true, :one2one => true
26
+ end
23
27
  end
24
28
 
25
29
  Signor.claim Signor.protein_protein, :proc do
@@ -806,7 +806,7 @@ file 'gene_set' do |t|
806
806
  build_code = Organism.GRC_build(organism)
807
807
  scientific_name = $scientific_name
808
808
  url = "ftp://ftp.ensembl.org/pub/release-#{num}/gtf/#{scientific_name.downcase.sub(" ", '_')}/#{scientific_name.sub(" ", '_')}.#{build_code}.#{num}.gtf.gz"
809
- CMD.cmd("wget '#{url}' -O #{t.name}.gz")
809
+ Open.download(url, "#{t.name}.gz")
810
810
  nil
811
811
  end
812
812
 
@@ -825,7 +825,7 @@ file 'cdna_fasta' do |t|
825
825
  num = release.split("-").last
826
826
  build_code = Organism.GRC_build(organism)
827
827
  scientific_name = Organism.scientific_name(organism)
828
- url = "ftp://ftp.ensembl.org/pub/release-#{num}/fasta/#{scientific_name.downcase.sub(" ", '_')}/cdna/#{scientific_name.sub(" ", '_')}.#{build_code}.cdna.all.fa.gz"
829
- CMD.cmd("wget '#{url}' -O #{t.name}.gz")
828
+ url = "ftp://ftp.ensembl.org/pub/release-#{num}/fasta/#{scientific_name.downcase.sub(" ", '_')}/cdna/#{scientific_name.sub(" ", '_')}.#{build_code}.#{num}.cdna.all.fa.gz"
829
+ Open.download(url, "#{t.name}.gz")
830
830
  nil
831
831
  end
@@ -9,7 +9,7 @@ SOURCE_DIR = 'source'
9
9
  def define_source_tasks(sources)
10
10
  sources.each do |name, url|
11
11
  file File.join(SOURCE_DIR, name) do |t|
12
- FileUtils.mkdir SOURCE_DIR unless File.exists? SOURCE_DIR
12
+ FileUtils.mkdir SOURCE_DIR unless File.exist? SOURCE_DIR
13
13
  Log.log "Retrieving file '#{name}' into '#{t.name}': '#{url}'", Log::LOW
14
14
  Open.write(t.name, Open.open(url, :cache => false, :wget_options => {"--no-check-certificate" => true, "--quiet" => false, :pipe => true}))
15
15
  end
@@ -87,5 +87,5 @@ end
87
87
  task :all => :default
88
88
 
89
89
  task :clean do
90
- ($__tsv_tasks + $__files).each do |file| FileUtils.rm file.to_s if File.exists?(file.to_s) end
90
+ ($__tsv_tasks + $__files).each do |file| FileUtils.rm file.to_s if File.exist?(file.to_s) end
91
91
  end
@@ -0,0 +1,10 @@
1
+ require File.expand_path(__FILE__).sub(%r(/test/.*), '/test/test_helper.rb')
2
+ require File.expand_path(__FILE__).sub(%r(.*/test/), '').sub(/test_(.*)\.rb/,'\1')
3
+
4
+ class TestMESH < Test::Unit::TestCase
5
+ def test_vocab
6
+ tsv = MeSH.vocabulary.tsv
7
+ assert_equal "3T3 Cells", tsv["D016475"]
8
+ end
9
+ end
10
+
@@ -5,7 +5,17 @@ require 'test/unit'
5
5
 
6
6
  class TestPubMed < Test::Unit::TestCase
7
7
 
8
- def test_get_article
8
+ def test_mesh
9
+ pmid = '10866666'
10
+ assert_include PubMed.get_article(pmid).mesh, "D016475"
11
+ end
12
+
13
+ def _test_substance
14
+ pmid = '10866666'
15
+ assert_include PubMed.get_article(pmid).substance, "C000717247"
16
+ end
17
+
18
+ def _test_get_article
9
19
  pmid = '16438716'
10
20
  assert(PubMed.get_article(pmid).title == "Discovering semantic features in the literature: a foundation for building functional associations.")
11
21
 
@@ -13,38 +23,38 @@ class TestPubMed < Test::Unit::TestCase
13
23
  assert(PubMed.get_article(pmids)[pmid].title == "Discovering semantic features in the literature: a foundation for building functional associations.")
14
24
  end
15
25
 
16
- def test_get_multi_abstract
26
+ def _test_get_multi_abstract
17
27
  pmid = "32141403"
18
28
 
19
29
  assert PubMed.get_article(pmid).abstract.include?("This study shows PCOS patients are at increased risk of incident schizophrenia, and the metformin treatment has a protective effect against incident schizophrenia.")
20
30
  end
21
31
 
22
- def test_full_text
32
+ def _test_full_text
23
33
  pmid = '16438716'
24
34
  assert(PubMed.get_article(pmid).full_text =~ /Discovering/)
25
35
  end
26
36
 
27
- def test_pmc_full_xml
37
+ def _test_pmc_full_xml
28
38
  pmid = '4304705'
29
39
  assert PubMed.get_article(pmid).pmc_full_xml.include?("HBV antigen")
30
40
  end
31
41
 
32
42
 
33
- def test_query
43
+ def _test_query
34
44
  assert(PubMed.query('chagoyen[All Fields] AND ("loattrfull text"[sb] AND hasabstract[text])').include? '16438716')
35
45
  end
36
46
 
37
- def test_year
47
+ def _test_year
38
48
  pmid = '16438716'
39
49
  assert_equal "2006", PubMed.get_article(pmid).year
40
50
  end
41
51
 
42
- def test_bibentry
52
+ def _test_bibentry
43
53
  assert("vazquez2008sent", PubMed::Article.make_bibentry('vazquez', 2008, "SENT: Semantic features in text"))
44
54
  assert("vazquez2008aes", PubMed::Article.make_bibentry('vazquez', 2008, "An Example System"))
45
55
  end
46
56
 
47
- def test_missing
57
+ def _test_missing
48
58
  pmids = '18627426,014966295'.split(",")
49
59
  Log.severity = 0
50
60
  assert PubMed.get_article(pmids).include? "014966295"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-sources
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.2.16
4
+ version: 3.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-10-18 00:00:00.000000000 Z
11
+ date: 2024-05-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util
@@ -95,6 +95,7 @@ files:
95
95
  - lib/rbbt/sources/jochem.rb
96
96
  - lib/rbbt/sources/kegg.rb
97
97
  - lib/rbbt/sources/matador.rb
98
+ - lib/rbbt/sources/mesh.rb
98
99
  - lib/rbbt/sources/oncodrive_role.rb
99
100
  - lib/rbbt/sources/oreganno.rb
100
101
  - lib/rbbt/sources/organism.rb
@@ -137,6 +138,7 @@ files:
137
138
  - test/rbbt/sources/test_gscholar.rb
138
139
  - test/rbbt/sources/test_kegg.rb
139
140
  - test/rbbt/sources/test_matador.rb
141
+ - test/rbbt/sources/test_mesh.rb
140
142
  - test/rbbt/sources/test_organism.rb
141
143
  - test/rbbt/sources/test_pharmagkb.rb
142
144
  - test/rbbt/sources/test_pina.rb
@@ -164,7 +166,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
164
166
  - !ruby/object:Gem::Version
165
167
  version: '0'
166
168
  requirements: []
167
- rubygems_version: 3.4.19
169
+ rubygems_version: 3.5.9
168
170
  signing_key:
169
171
  specification_version: 4
170
172
  summary: Data sources for the Ruby Bioinformatics Toolkit (rbbt)
@@ -176,6 +178,7 @@ test_files:
176
178
  - test/rbbt/sources/test_gscholar.rb
177
179
  - test/rbbt/sources/test_kegg.rb
178
180
  - test/rbbt/sources/test_matador.rb
181
+ - test/rbbt/sources/test_mesh.rb
179
182
  - test/rbbt/sources/test_organism.rb
180
183
  - test/rbbt/sources/test_pharmagkb.rb
181
184
  - test/rbbt/sources/test_pina.rb