rbbt-sources 3.3.0 → 3.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b10dbe140b4c0733476823e5f5d94e57a3d9a755fc370f6b9640d1e7b8efc368
4
- data.tar.gz: 38aaf56670a07537ad0ef0c025d17e655fc5d7fb87d97ee1c08d0af82c44fbbd
3
+ metadata.gz: 87f97c6af3dab4a1b39cb258acdf9bf4d105df5703a04d6264f960ff79e81faa
4
+ data.tar.gz: ff91f67bc0775e0a20678ede8eeb312fa1e7a42d18095c2d9bcb1a5c0e4fc000
5
5
  SHA512:
6
- metadata.gz: a8ac9df1da30fc7aec3c54a5a200a0c7a9629807b9238089a1e8064e78b0ecd5bad36c4b6a77fac7e7cfdf332ad56be06149b12d0e0fd7f6506b0b82d2e03bcf
7
- data.tar.gz: acff50e8bdb0d4443c3e1dbd237539953206b7d5dcb886db64ec0677f7bba43cf3a9782e4147985a9b3fc1b34df692e89fb2b7185f2aa0f93ccd196a4d19d54a
6
+ metadata.gz: 0b23136a81511a1ad55d5bb2af5784fd74512b9355bf40023a5197180bf25b69aefa966a3dafc8347f1864da174637fa0a2f95bb687a8973a4b23f5e6778398d
7
+ data.tar.gz: 4fca8a03899b980a18da56d9cdd56bc0136ce126c718ca61836fa3cf55313f77664b7ddecadba2ad45491c2e309604da3ce7288c5de70c660f498bfcc2849aec
@@ -0,0 +1,26 @@
1
+ require 'rbbt-util'
2
+ require 'rbbt/resource'
3
+
4
+ module MeSH
5
+ extend Resource
6
+
7
+ self.subdir = "share/databases/MeSH"
8
+
9
+ MeSH.claim MeSH["data.gz"], :url, "https://nlmpubs.nlm.nih.gov/projects/mesh/rdf/mesh.nt.gz"
10
+
11
+ MeSH.claim MeSH.vocabulary, :proc do
12
+ dumper = TSV::Dumper.new :key_field => "MeSH ID", :fields => ["Label"], :type => :single
13
+ dumper.init
14
+ TSV.traverse MeSH.data, :type => :array, :into => dumper, :bar => "Processing MeSH vocab" do |line|
15
+ sub, verb, obj = line.split("\t")
16
+
17
+ next unless verb && verb.include?("rdf-schema#label")
18
+
19
+ id = sub.split("/").last[0..-2]
20
+ label = obj.split('"')[1]
21
+
22
+ [id, label]
23
+ end
24
+ end
25
+
26
+ end
@@ -51,6 +51,7 @@ module PubMed
51
51
  end
52
52
  [lastname.gsub(/\s/,'_'), year || "NOYEAR", abrev] * ""
53
53
  end
54
+
54
55
  def self.parse_xml(xml)
55
56
  require 'nokogiri'
56
57
 
@@ -91,6 +92,16 @@ module PubMed
91
92
  [lastname, forename] * ", "
92
93
  end * " and "
93
94
 
95
+ info[:mesh] = parser.search("MeshHeadingList/MeshHeading").collect do |mesh|
96
+ descriptor = mesh.search("DescriptorName").first.attr('UI')
97
+ qualifiers = mesh.search("QualifierName").collect{|q| q.attr('UI')}
98
+ [descriptor] + qualifiers.collect{|q| descriptor + q }
99
+ end.compact.flatten
100
+
101
+ info[:substance] = parser.search("NameOfSubstance").collect do |substance|
102
+ substance.attr('UI')
103
+ end
104
+
94
105
  info[:bibentry] = bibentry.downcase if bibentry
95
106
 
96
107
  info[:pmc_pdf] = parser.search("PubmedData/ArticleIdList/ArticleId").select{|id| id[:IdType] == "pmc"}.first
@@ -102,7 +113,7 @@ module PubMed
102
113
  info
103
114
  end
104
115
 
105
- attr_accessor :title, :abstract, :journal, :author, :pmid, :bibentry, :pmc_pdf, :gscholar_pdf, :pdf_url
116
+ attr_accessor :title, :abstract, :journal, :author, :pmid, :bibentry, :pmc_pdf, :gscholar_pdf, :pdf_url, :mesh, :substance
106
117
  attr_accessor *XML_KEYS.collect{|p| p.first }
107
118
 
108
119
  def initialize(xml)
@@ -141,7 +152,7 @@ module PubMed
141
152
  `wget --user-agent=firefox #{ pdf_url } -O #{ pdf } -t 3`
142
153
  TmpFile.with_file do |txt|
143
154
  `pdftotext #{ pdf } #{ txt }`
144
- text = Open.read(txt) if File.exists? txt
155
+ text = Open.read(txt) if File.exist?(txt)
145
156
  end
146
157
  end
147
158
  text
@@ -806,7 +806,7 @@ file 'gene_set' do |t|
806
806
  build_code = Organism.GRC_build(organism)
807
807
  scientific_name = $scientific_name
808
808
  url = "ftp://ftp.ensembl.org/pub/release-#{num}/gtf/#{scientific_name.downcase.sub(" ", '_')}/#{scientific_name.sub(" ", '_')}.#{build_code}.#{num}.gtf.gz"
809
- CMD.cmd("wget '#{url}' -O #{t.name}.gz")
809
+ Open.download(url, "#{t.name}.gz")
810
810
  nil
811
811
  end
812
812
 
@@ -825,7 +825,7 @@ file 'cdna_fasta' do |t|
825
825
  num = release.split("-").last
826
826
  build_code = Organism.GRC_build(organism)
827
827
  scientific_name = Organism.scientific_name(organism)
828
- url = "ftp://ftp.ensembl.org/pub/release-#{num}/fasta/#{scientific_name.downcase.sub(" ", '_')}/cdna/#{scientific_name.sub(" ", '_')}.#{build_code}.cdna.all.fa.gz"
829
- CMD.cmd("wget '#{url}' -O #{t.name}.gz")
828
+ url = "ftp://ftp.ensembl.org/pub/release-#{num}/fasta/#{scientific_name.downcase.sub(" ", '_')}/cdna/#{scientific_name.sub(" ", '_')}.#{build_code}.#{num}.cdna.all.fa.gz"
829
+ Open.download(url, "#{t.name}.gz")
830
830
  nil
831
831
  end
@@ -9,7 +9,7 @@ SOURCE_DIR = 'source'
9
9
  def define_source_tasks(sources)
10
10
  sources.each do |name, url|
11
11
  file File.join(SOURCE_DIR, name) do |t|
12
- FileUtils.mkdir SOURCE_DIR unless File.exists? SOURCE_DIR
12
+ FileUtils.mkdir SOURCE_DIR unless File.exist? SOURCE_DIR
13
13
  Log.log "Retrieving file '#{name}' into '#{t.name}': '#{url}'", Log::LOW
14
14
  Open.write(t.name, Open.open(url, :cache => false, :wget_options => {"--no-check-certificate" => true, "--quiet" => false, :pipe => true}))
15
15
  end
@@ -87,5 +87,5 @@ end
87
87
  task :all => :default
88
88
 
89
89
  task :clean do
90
- ($__tsv_tasks + $__files).each do |file| FileUtils.rm file.to_s if File.exists?(file.to_s) end
90
+ ($__tsv_tasks + $__files).each do |file| FileUtils.rm file.to_s if File.exist?(file.to_s) end
91
91
  end
@@ -0,0 +1,10 @@
1
+ require File.expand_path(__FILE__).sub(%r(/test/.*), '/test/test_helper.rb')
2
+ require File.expand_path(__FILE__).sub(%r(.*/test/), '').sub(/test_(.*)\.rb/,'\1')
3
+
4
+ class TestMESH < Test::Unit::TestCase
5
+ def test_vocab
6
+ tsv = MeSH.vocabulary.tsv
7
+ assert_equal "3T3 Cells", tsv["D016475"]
8
+ end
9
+ end
10
+
@@ -5,7 +5,17 @@ require 'test/unit'
5
5
 
6
6
  class TestPubMed < Test::Unit::TestCase
7
7
 
8
- def test_get_article
8
+ def test_mesh
9
+ pmid = '10866666'
10
+ assert_include PubMed.get_article(pmid).mesh, "D016475"
11
+ end
12
+
13
+ def _test_substance
14
+ pmid = '10866666'
15
+ assert_include PubMed.get_article(pmid).substance, "C000717247"
16
+ end
17
+
18
+ def _test_get_article
9
19
  pmid = '16438716'
10
20
  assert(PubMed.get_article(pmid).title == "Discovering semantic features in the literature: a foundation for building functional associations.")
11
21
 
@@ -13,38 +23,38 @@ class TestPubMed < Test::Unit::TestCase
13
23
  assert(PubMed.get_article(pmids)[pmid].title == "Discovering semantic features in the literature: a foundation for building functional associations.")
14
24
  end
15
25
 
16
- def test_get_multi_abstract
26
+ def _test_get_multi_abstract
17
27
  pmid = "32141403"
18
28
 
19
29
  assert PubMed.get_article(pmid).abstract.include?("This study shows PCOS patients are at increased risk of incident schizophrenia, and the metformin treatment has a protective effect against incident schizophrenia.")
20
30
  end
21
31
 
22
- def test_full_text
32
+ def _test_full_text
23
33
  pmid = '16438716'
24
34
  assert(PubMed.get_article(pmid).full_text =~ /Discovering/)
25
35
  end
26
36
 
27
- def test_pmc_full_xml
37
+ def _test_pmc_full_xml
28
38
  pmid = '4304705'
29
39
  assert PubMed.get_article(pmid).pmc_full_xml.include?("HBV antigen")
30
40
  end
31
41
 
32
42
 
33
- def test_query
43
+ def _test_query
34
44
  assert(PubMed.query('chagoyen[All Fields] AND ("loattrfull text"[sb] AND hasabstract[text])').include? '16438716')
35
45
  end
36
46
 
37
- def test_year
47
+ def _test_year
38
48
  pmid = '16438716'
39
49
  assert_equal "2006", PubMed.get_article(pmid).year
40
50
  end
41
51
 
42
- def test_bibentry
52
+ def _test_bibentry
43
53
  assert("vazquez2008sent", PubMed::Article.make_bibentry('vazquez', 2008, "SENT: Semantic features in text"))
44
54
  assert("vazquez2008aes", PubMed::Article.make_bibentry('vazquez', 2008, "An Example System"))
45
55
  end
46
56
 
47
- def test_missing
57
+ def _test_missing
48
58
  pmids = '18627426,014966295'.split(",")
49
59
  Log.severity = 0
50
60
  assert PubMed.get_article(pmids).include? "014966295"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-sources
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.3.0
4
+ version: 3.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-12-21 00:00:00.000000000 Z
11
+ date: 2024-05-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util
@@ -95,6 +95,7 @@ files:
95
95
  - lib/rbbt/sources/jochem.rb
96
96
  - lib/rbbt/sources/kegg.rb
97
97
  - lib/rbbt/sources/matador.rb
98
+ - lib/rbbt/sources/mesh.rb
98
99
  - lib/rbbt/sources/oncodrive_role.rb
99
100
  - lib/rbbt/sources/oreganno.rb
100
101
  - lib/rbbt/sources/organism.rb
@@ -137,6 +138,7 @@ files:
137
138
  - test/rbbt/sources/test_gscholar.rb
138
139
  - test/rbbt/sources/test_kegg.rb
139
140
  - test/rbbt/sources/test_matador.rb
141
+ - test/rbbt/sources/test_mesh.rb
140
142
  - test/rbbt/sources/test_organism.rb
141
143
  - test/rbbt/sources/test_pharmagkb.rb
142
144
  - test/rbbt/sources/test_pina.rb
@@ -164,7 +166,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
164
166
  - !ruby/object:Gem::Version
165
167
  version: '0'
166
168
  requirements: []
167
- rubygems_version: 3.5.0.dev
169
+ rubygems_version: 3.5.9
168
170
  signing_key:
169
171
  specification_version: 4
170
172
  summary: Data sources for the Ruby Bioinformatics Toolkit (rbbt)
@@ -176,6 +178,7 @@ test_files:
176
178
  - test/rbbt/sources/test_gscholar.rb
177
179
  - test/rbbt/sources/test_kegg.rb
178
180
  - test/rbbt/sources/test_matador.rb
181
+ - test/rbbt/sources/test_mesh.rb
179
182
  - test/rbbt/sources/test_organism.rb
180
183
  - test/rbbt/sources/test_pharmagkb.rb
181
184
  - test/rbbt/sources/test_pina.rb