rbbt-sources 3.3.0 → 3.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b10dbe140b4c0733476823e5f5d94e57a3d9a755fc370f6b9640d1e7b8efc368
4
- data.tar.gz: 38aaf56670a07537ad0ef0c025d17e655fc5d7fb87d97ee1c08d0af82c44fbbd
3
+ metadata.gz: 87f97c6af3dab4a1b39cb258acdf9bf4d105df5703a04d6264f960ff79e81faa
4
+ data.tar.gz: ff91f67bc0775e0a20678ede8eeb312fa1e7a42d18095c2d9bcb1a5c0e4fc000
5
5
  SHA512:
6
- metadata.gz: a8ac9df1da30fc7aec3c54a5a200a0c7a9629807b9238089a1e8064e78b0ecd5bad36c4b6a77fac7e7cfdf332ad56be06149b12d0e0fd7f6506b0b82d2e03bcf
7
- data.tar.gz: acff50e8bdb0d4443c3e1dbd237539953206b7d5dcb886db64ec0677f7bba43cf3a9782e4147985a9b3fc1b34df692e89fb2b7185f2aa0f93ccd196a4d19d54a
6
+ metadata.gz: 0b23136a81511a1ad55d5bb2af5784fd74512b9355bf40023a5197180bf25b69aefa966a3dafc8347f1864da174637fa0a2f95bb687a8973a4b23f5e6778398d
7
+ data.tar.gz: 4fca8a03899b980a18da56d9cdd56bc0136ce126c718ca61836fa3cf55313f77664b7ddecadba2ad45491c2e309604da3ce7288c5de70c660f498bfcc2849aec
@@ -0,0 +1,26 @@
1
+ require 'rbbt-util'
2
+ require 'rbbt/resource'
3
+
4
+ module MeSH
5
+ extend Resource
6
+
7
+ self.subdir = "share/databases/MeSH"
8
+
9
+ MeSH.claim MeSH["data.gz"], :url, "https://nlmpubs.nlm.nih.gov/projects/mesh/rdf/mesh.nt.gz"
10
+
11
+ MeSH.claim MeSH.vocabulary, :proc do
12
+ dumper = TSV::Dumper.new :key_field => "MeSH ID", :fields => ["Label"], :type => :single
13
+ dumper.init
14
+ TSV.traverse MeSH.data, :type => :array, :into => dumper, :bar => "Processing MeSH vocab" do |line|
15
+ sub, verb, obj = line.split("\t")
16
+
17
+ next unless verb && verb.include?("rdf-schema#label")
18
+
19
+ id = sub.split("/").last[0..-2]
20
+ label = obj.split('"')[1]
21
+
22
+ [id, label]
23
+ end
24
+ end
25
+
26
+ end
@@ -51,6 +51,7 @@ module PubMed
51
51
  end
52
52
  [lastname.gsub(/\s/,'_'), year || "NOYEAR", abrev] * ""
53
53
  end
54
+
54
55
  def self.parse_xml(xml)
55
56
  require 'nokogiri'
56
57
 
@@ -91,6 +92,16 @@ module PubMed
91
92
  [lastname, forename] * ", "
92
93
  end * " and "
93
94
 
95
+ info[:mesh] = parser.search("MeshHeadingList/MeshHeading").collect do |mesh|
96
+ descriptor = mesh.search("DescriptorName").first.attr('UI')
97
+ qualifiers = mesh.search("QualifierName").collect{|q| q.attr('UI')}
98
+ [descriptor] + qualifiers.collect{|q| descriptor + q }
99
+ end.compact.flatten
100
+
101
+ info[:substance] = parser.search("NameOfSubstance").collect do |substance|
102
+ substance.attr('UI')
103
+ end
104
+
94
105
  info[:bibentry] = bibentry.downcase if bibentry
95
106
 
96
107
  info[:pmc_pdf] = parser.search("PubmedData/ArticleIdList/ArticleId").select{|id| id[:IdType] == "pmc"}.first
@@ -102,7 +113,7 @@ module PubMed
102
113
  info
103
114
  end
104
115
 
105
- attr_accessor :title, :abstract, :journal, :author, :pmid, :bibentry, :pmc_pdf, :gscholar_pdf, :pdf_url
116
+ attr_accessor :title, :abstract, :journal, :author, :pmid, :bibentry, :pmc_pdf, :gscholar_pdf, :pdf_url, :mesh, :substance
106
117
  attr_accessor *XML_KEYS.collect{|p| p.first }
107
118
 
108
119
  def initialize(xml)
@@ -141,7 +152,7 @@ module PubMed
141
152
  `wget --user-agent=firefox #{ pdf_url } -O #{ pdf } -t 3`
142
153
  TmpFile.with_file do |txt|
143
154
  `pdftotext #{ pdf } #{ txt }`
144
- text = Open.read(txt) if File.exists? txt
155
+ text = Open.read(txt) if File.exist?(txt)
145
156
  end
146
157
  end
147
158
  text
@@ -806,7 +806,7 @@ file 'gene_set' do |t|
806
806
  build_code = Organism.GRC_build(organism)
807
807
  scientific_name = $scientific_name
808
808
  url = "ftp://ftp.ensembl.org/pub/release-#{num}/gtf/#{scientific_name.downcase.sub(" ", '_')}/#{scientific_name.sub(" ", '_')}.#{build_code}.#{num}.gtf.gz"
809
- CMD.cmd("wget '#{url}' -O #{t.name}.gz")
809
+ Open.download(url, "#{t.name}.gz")
810
810
  nil
811
811
  end
812
812
 
@@ -825,7 +825,7 @@ file 'cdna_fasta' do |t|
825
825
  num = release.split("-").last
826
826
  build_code = Organism.GRC_build(organism)
827
827
  scientific_name = Organism.scientific_name(organism)
828
- url = "ftp://ftp.ensembl.org/pub/release-#{num}/fasta/#{scientific_name.downcase.sub(" ", '_')}/cdna/#{scientific_name.sub(" ", '_')}.#{build_code}.cdna.all.fa.gz"
829
- CMD.cmd("wget '#{url}' -O #{t.name}.gz")
828
+ url = "ftp://ftp.ensembl.org/pub/release-#{num}/fasta/#{scientific_name.downcase.sub(" ", '_')}/cdna/#{scientific_name.sub(" ", '_')}.#{build_code}.#{num}.cdna.all.fa.gz"
829
+ Open.download(url, "#{t.name}.gz")
830
830
  nil
831
831
  end
@@ -9,7 +9,7 @@ SOURCE_DIR = 'source'
9
9
  def define_source_tasks(sources)
10
10
  sources.each do |name, url|
11
11
  file File.join(SOURCE_DIR, name) do |t|
12
- FileUtils.mkdir SOURCE_DIR unless File.exists? SOURCE_DIR
12
+ FileUtils.mkdir SOURCE_DIR unless File.exist? SOURCE_DIR
13
13
  Log.log "Retrieving file '#{name}' into '#{t.name}': '#{url}'", Log::LOW
14
14
  Open.write(t.name, Open.open(url, :cache => false, :wget_options => {"--no-check-certificate" => true, "--quiet" => false, :pipe => true}))
15
15
  end
@@ -87,5 +87,5 @@ end
87
87
  task :all => :default
88
88
 
89
89
  task :clean do
90
- ($__tsv_tasks + $__files).each do |file| FileUtils.rm file.to_s if File.exists?(file.to_s) end
90
+ ($__tsv_tasks + $__files).each do |file| FileUtils.rm file.to_s if File.exist?(file.to_s) end
91
91
  end
@@ -0,0 +1,10 @@
1
+ require File.expand_path(__FILE__).sub(%r(/test/.*), '/test/test_helper.rb')
2
+ require File.expand_path(__FILE__).sub(%r(.*/test/), '').sub(/test_(.*)\.rb/,'\1')
3
+
4
+ class TestMESH < Test::Unit::TestCase
5
+ def test_vocab
6
+ tsv = MeSH.vocabulary.tsv
7
+ assert_equal "3T3 Cells", tsv["D016475"]
8
+ end
9
+ end
10
+
@@ -5,7 +5,17 @@ require 'test/unit'
5
5
 
6
6
  class TestPubMed < Test::Unit::TestCase
7
7
 
8
- def test_get_article
8
+ def test_mesh
9
+ pmid = '10866666'
10
+ assert_include PubMed.get_article(pmid).mesh, "D016475"
11
+ end
12
+
13
+ def _test_substance
14
+ pmid = '10866666'
15
+ assert_include PubMed.get_article(pmid).substance, "C000717247"
16
+ end
17
+
18
+ def _test_get_article
9
19
  pmid = '16438716'
10
20
  assert(PubMed.get_article(pmid).title == "Discovering semantic features in the literature: a foundation for building functional associations.")
11
21
 
@@ -13,38 +23,38 @@ class TestPubMed < Test::Unit::TestCase
13
23
  assert(PubMed.get_article(pmids)[pmid].title == "Discovering semantic features in the literature: a foundation for building functional associations.")
14
24
  end
15
25
 
16
- def test_get_multi_abstract
26
+ def _test_get_multi_abstract
17
27
  pmid = "32141403"
18
28
 
19
29
  assert PubMed.get_article(pmid).abstract.include?("This study shows PCOS patients are at increased risk of incident schizophrenia, and the metformin treatment has a protective effect against incident schizophrenia.")
20
30
  end
21
31
 
22
- def test_full_text
32
+ def _test_full_text
23
33
  pmid = '16438716'
24
34
  assert(PubMed.get_article(pmid).full_text =~ /Discovering/)
25
35
  end
26
36
 
27
- def test_pmc_full_xml
37
+ def _test_pmc_full_xml
28
38
  pmid = '4304705'
29
39
  assert PubMed.get_article(pmid).pmc_full_xml.include?("HBV antigen")
30
40
  end
31
41
 
32
42
 
33
- def test_query
43
+ def _test_query
34
44
  assert(PubMed.query('chagoyen[All Fields] AND ("loattrfull text"[sb] AND hasabstract[text])').include? '16438716')
35
45
  end
36
46
 
37
- def test_year
47
+ def _test_year
38
48
  pmid = '16438716'
39
49
  assert_equal "2006", PubMed.get_article(pmid).year
40
50
  end
41
51
 
42
- def test_bibentry
52
+ def _test_bibentry
43
53
  assert("vazquez2008sent", PubMed::Article.make_bibentry('vazquez', 2008, "SENT: Semantic features in text"))
44
54
  assert("vazquez2008aes", PubMed::Article.make_bibentry('vazquez', 2008, "An Example System"))
45
55
  end
46
56
 
47
- def test_missing
57
+ def _test_missing
48
58
  pmids = '18627426,014966295'.split(",")
49
59
  Log.severity = 0
50
60
  assert PubMed.get_article(pmids).include? "014966295"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-sources
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.3.0
4
+ version: 3.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-12-21 00:00:00.000000000 Z
11
+ date: 2024-05-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util
@@ -95,6 +95,7 @@ files:
95
95
  - lib/rbbt/sources/jochem.rb
96
96
  - lib/rbbt/sources/kegg.rb
97
97
  - lib/rbbt/sources/matador.rb
98
+ - lib/rbbt/sources/mesh.rb
98
99
  - lib/rbbt/sources/oncodrive_role.rb
99
100
  - lib/rbbt/sources/oreganno.rb
100
101
  - lib/rbbt/sources/organism.rb
@@ -137,6 +138,7 @@ files:
137
138
  - test/rbbt/sources/test_gscholar.rb
138
139
  - test/rbbt/sources/test_kegg.rb
139
140
  - test/rbbt/sources/test_matador.rb
141
+ - test/rbbt/sources/test_mesh.rb
140
142
  - test/rbbt/sources/test_organism.rb
141
143
  - test/rbbt/sources/test_pharmagkb.rb
142
144
  - test/rbbt/sources/test_pina.rb
@@ -164,7 +166,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
164
166
  - !ruby/object:Gem::Version
165
167
  version: '0'
166
168
  requirements: []
167
- rubygems_version: 3.5.0.dev
169
+ rubygems_version: 3.5.9
168
170
  signing_key:
169
171
  specification_version: 4
170
172
  summary: Data sources for the Ruby Bioinformatics Toolkit (rbbt)
@@ -176,6 +178,7 @@ test_files:
176
178
  - test/rbbt/sources/test_gscholar.rb
177
179
  - test/rbbt/sources/test_kegg.rb
178
180
  - test/rbbt/sources/test_matador.rb
181
+ - test/rbbt/sources/test_mesh.rb
179
182
  - test/rbbt/sources/test_organism.rb
180
183
  - test/rbbt/sources/test_pharmagkb.rb
181
184
  - test/rbbt/sources/test_pina.rb