rbbt-sources 3.3.0 → 3.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 87f97c6af3dab4a1b39cb258acdf9bf4d105df5703a04d6264f960ff79e81faa
|
4
|
+
data.tar.gz: ff91f67bc0775e0a20678ede8eeb312fa1e7a42d18095c2d9bcb1a5c0e4fc000
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0b23136a81511a1ad55d5bb2af5784fd74512b9355bf40023a5197180bf25b69aefa966a3dafc8347f1864da174637fa0a2f95bb687a8973a4b23f5e6778398d
|
7
|
+
data.tar.gz: 4fca8a03899b980a18da56d9cdd56bc0136ce126c718ca61836fa3cf55313f77664b7ddecadba2ad45491c2e309604da3ce7288c5de70c660f498bfcc2849aec
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'rbbt-util'
|
2
|
+
require 'rbbt/resource'
|
3
|
+
|
4
|
+
module MeSH
|
5
|
+
extend Resource
|
6
|
+
|
7
|
+
self.subdir = "share/databases/MeSH"
|
8
|
+
|
9
|
+
MeSH.claim MeSH["data.gz"], :url, "https://nlmpubs.nlm.nih.gov/projects/mesh/rdf/mesh.nt.gz"
|
10
|
+
|
11
|
+
MeSH.claim MeSH.vocabulary, :proc do
|
12
|
+
dumper = TSV::Dumper.new :key_field => "MeSH ID", :fields => ["Label"], :type => :single
|
13
|
+
dumper.init
|
14
|
+
TSV.traverse MeSH.data, :type => :array, :into => dumper, :bar => "Processing MeSH vocab" do |line|
|
15
|
+
sub, verb, obj = line.split("\t")
|
16
|
+
|
17
|
+
next unless verb && verb.include?("rdf-schema#label")
|
18
|
+
|
19
|
+
id = sub.split("/").last[0..-2]
|
20
|
+
label = obj.split('"')[1]
|
21
|
+
|
22
|
+
[id, label]
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
data/lib/rbbt/sources/pubmed.rb
CHANGED
@@ -51,6 +51,7 @@ module PubMed
|
|
51
51
|
end
|
52
52
|
[lastname.gsub(/\s/,'_'), year || "NOYEAR", abrev] * ""
|
53
53
|
end
|
54
|
+
|
54
55
|
def self.parse_xml(xml)
|
55
56
|
require 'nokogiri'
|
56
57
|
|
@@ -91,6 +92,16 @@ module PubMed
|
|
91
92
|
[lastname, forename] * ", "
|
92
93
|
end * " and "
|
93
94
|
|
95
|
+
info[:mesh] = parser.search("MeshHeadingList/MeshHeading").collect do |mesh|
|
96
|
+
descriptor = mesh.search("DescriptorName").first.attr('UI')
|
97
|
+
qualifiers = mesh.search("QualifierName").collect{|q| q.attr('UI')}
|
98
|
+
[descriptor] + qualifiers.collect{|q| descriptor + q }
|
99
|
+
end.compact.flatten
|
100
|
+
|
101
|
+
info[:substance] = parser.search("NameOfSubstance").collect do |substance|
|
102
|
+
substance.attr('UI')
|
103
|
+
end
|
104
|
+
|
94
105
|
info[:bibentry] = bibentry.downcase if bibentry
|
95
106
|
|
96
107
|
info[:pmc_pdf] = parser.search("PubmedData/ArticleIdList/ArticleId").select{|id| id[:IdType] == "pmc"}.first
|
@@ -102,7 +113,7 @@ module PubMed
|
|
102
113
|
info
|
103
114
|
end
|
104
115
|
|
105
|
-
attr_accessor :title, :abstract, :journal, :author, :pmid, :bibentry, :pmc_pdf, :gscholar_pdf, :pdf_url
|
116
|
+
attr_accessor :title, :abstract, :journal, :author, :pmid, :bibentry, :pmc_pdf, :gscholar_pdf, :pdf_url, :mesh, :substance
|
106
117
|
attr_accessor *XML_KEYS.collect{|p| p.first }
|
107
118
|
|
108
119
|
def initialize(xml)
|
@@ -141,7 +152,7 @@ module PubMed
|
|
141
152
|
`wget --user-agent=firefox #{ pdf_url } -O #{ pdf } -t 3`
|
142
153
|
TmpFile.with_file do |txt|
|
143
154
|
`pdftotext #{ pdf } #{ txt }`
|
144
|
-
text = Open.read(txt) if File.
|
155
|
+
text = Open.read(txt) if File.exist?(txt)
|
145
156
|
end
|
146
157
|
end
|
147
158
|
text
|
@@ -806,7 +806,7 @@ file 'gene_set' do |t|
|
|
806
806
|
build_code = Organism.GRC_build(organism)
|
807
807
|
scientific_name = $scientific_name
|
808
808
|
url = "ftp://ftp.ensembl.org/pub/release-#{num}/gtf/#{scientific_name.downcase.sub(" ", '_')}/#{scientific_name.sub(" ", '_')}.#{build_code}.#{num}.gtf.gz"
|
809
|
-
|
809
|
+
Open.download(url, "#{t.name}.gz")
|
810
810
|
nil
|
811
811
|
end
|
812
812
|
|
@@ -825,7 +825,7 @@ file 'cdna_fasta' do |t|
|
|
825
825
|
num = release.split("-").last
|
826
826
|
build_code = Organism.GRC_build(organism)
|
827
827
|
scientific_name = Organism.scientific_name(organism)
|
828
|
-
url = "ftp://ftp.ensembl.org/pub/release-#{num}/fasta/#{scientific_name.downcase.sub(" ", '_')}/cdna/#{scientific_name.sub(" ", '_')}.#{build_code}.cdna.all.fa.gz"
|
829
|
-
|
828
|
+
url = "ftp://ftp.ensembl.org/pub/release-#{num}/fasta/#{scientific_name.downcase.sub(" ", '_')}/cdna/#{scientific_name.sub(" ", '_')}.#{build_code}.#{num}.cdna.all.fa.gz"
|
829
|
+
Open.download(url, "#{t.name}.gz")
|
830
830
|
nil
|
831
831
|
end
|
@@ -9,7 +9,7 @@ SOURCE_DIR = 'source'
|
|
9
9
|
def define_source_tasks(sources)
|
10
10
|
sources.each do |name, url|
|
11
11
|
file File.join(SOURCE_DIR, name) do |t|
|
12
|
-
FileUtils.mkdir SOURCE_DIR unless File.
|
12
|
+
FileUtils.mkdir SOURCE_DIR unless File.exist? SOURCE_DIR
|
13
13
|
Log.log "Retrieving file '#{name}' into '#{t.name}': '#{url}'", Log::LOW
|
14
14
|
Open.write(t.name, Open.open(url, :cache => false, :wget_options => {"--no-check-certificate" => true, "--quiet" => false, :pipe => true}))
|
15
15
|
end
|
@@ -87,5 +87,5 @@ end
|
|
87
87
|
task :all => :default
|
88
88
|
|
89
89
|
task :clean do
|
90
|
-
($__tsv_tasks + $__files).each do |file| FileUtils.rm file.to_s if File.
|
90
|
+
($__tsv_tasks + $__files).each do |file| FileUtils.rm file.to_s if File.exist?(file.to_s) end
|
91
91
|
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
require File.expand_path(__FILE__).sub(%r(/test/.*), '/test/test_helper.rb')
|
2
|
+
require File.expand_path(__FILE__).sub(%r(.*/test/), '').sub(/test_(.*)\.rb/,'\1')
|
3
|
+
|
4
|
+
class TestMESH < Test::Unit::TestCase
|
5
|
+
def test_vocab
|
6
|
+
tsv = MeSH.vocabulary.tsv
|
7
|
+
assert_equal "3T3 Cells", tsv["D016475"]
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
@@ -5,7 +5,17 @@ require 'test/unit'
|
|
5
5
|
|
6
6
|
class TestPubMed < Test::Unit::TestCase
|
7
7
|
|
8
|
-
def
|
8
|
+
def test_mesh
|
9
|
+
pmid = '10866666'
|
10
|
+
assert_include PubMed.get_article(pmid).mesh, "D016475"
|
11
|
+
end
|
12
|
+
|
13
|
+
def _test_substance
|
14
|
+
pmid = '10866666'
|
15
|
+
assert_include PubMed.get_article(pmid).substance, "C000717247"
|
16
|
+
end
|
17
|
+
|
18
|
+
def _test_get_article
|
9
19
|
pmid = '16438716'
|
10
20
|
assert(PubMed.get_article(pmid).title == "Discovering semantic features in the literature: a foundation for building functional associations.")
|
11
21
|
|
@@ -13,38 +23,38 @@ class TestPubMed < Test::Unit::TestCase
|
|
13
23
|
assert(PubMed.get_article(pmids)[pmid].title == "Discovering semantic features in the literature: a foundation for building functional associations.")
|
14
24
|
end
|
15
25
|
|
16
|
-
def
|
26
|
+
def _test_get_multi_abstract
|
17
27
|
pmid = "32141403"
|
18
28
|
|
19
29
|
assert PubMed.get_article(pmid).abstract.include?("This study shows PCOS patients are at increased risk of incident schizophrenia, and the metformin treatment has a protective effect against incident schizophrenia.")
|
20
30
|
end
|
21
31
|
|
22
|
-
def
|
32
|
+
def _test_full_text
|
23
33
|
pmid = '16438716'
|
24
34
|
assert(PubMed.get_article(pmid).full_text =~ /Discovering/)
|
25
35
|
end
|
26
36
|
|
27
|
-
def
|
37
|
+
def _test_pmc_full_xml
|
28
38
|
pmid = '4304705'
|
29
39
|
assert PubMed.get_article(pmid).pmc_full_xml.include?("HBV antigen")
|
30
40
|
end
|
31
41
|
|
32
42
|
|
33
|
-
def
|
43
|
+
def _test_query
|
34
44
|
assert(PubMed.query('chagoyen[All Fields] AND ("loattrfull text"[sb] AND hasabstract[text])').include? '16438716')
|
35
45
|
end
|
36
46
|
|
37
|
-
def
|
47
|
+
def _test_year
|
38
48
|
pmid = '16438716'
|
39
49
|
assert_equal "2006", PubMed.get_article(pmid).year
|
40
50
|
end
|
41
51
|
|
42
|
-
def
|
52
|
+
def _test_bibentry
|
43
53
|
assert("vazquez2008sent", PubMed::Article.make_bibentry('vazquez', 2008, "SENT: Semantic features in text"))
|
44
54
|
assert("vazquez2008aes", PubMed::Article.make_bibentry('vazquez', 2008, "An Example System"))
|
45
55
|
end
|
46
56
|
|
47
|
-
def
|
57
|
+
def _test_missing
|
48
58
|
pmids = '18627426,014966295'.split(",")
|
49
59
|
Log.severity = 0
|
50
60
|
assert PubMed.get_article(pmids).include? "014966295"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-sources
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-05-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|
@@ -95,6 +95,7 @@ files:
|
|
95
95
|
- lib/rbbt/sources/jochem.rb
|
96
96
|
- lib/rbbt/sources/kegg.rb
|
97
97
|
- lib/rbbt/sources/matador.rb
|
98
|
+
- lib/rbbt/sources/mesh.rb
|
98
99
|
- lib/rbbt/sources/oncodrive_role.rb
|
99
100
|
- lib/rbbt/sources/oreganno.rb
|
100
101
|
- lib/rbbt/sources/organism.rb
|
@@ -137,6 +138,7 @@ files:
|
|
137
138
|
- test/rbbt/sources/test_gscholar.rb
|
138
139
|
- test/rbbt/sources/test_kegg.rb
|
139
140
|
- test/rbbt/sources/test_matador.rb
|
141
|
+
- test/rbbt/sources/test_mesh.rb
|
140
142
|
- test/rbbt/sources/test_organism.rb
|
141
143
|
- test/rbbt/sources/test_pharmagkb.rb
|
142
144
|
- test/rbbt/sources/test_pina.rb
|
@@ -164,7 +166,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
164
166
|
- !ruby/object:Gem::Version
|
165
167
|
version: '0'
|
166
168
|
requirements: []
|
167
|
-
rubygems_version: 3.5.
|
169
|
+
rubygems_version: 3.5.9
|
168
170
|
signing_key:
|
169
171
|
specification_version: 4
|
170
172
|
summary: Data sources for the Ruby Bioinformatics Toolkit (rbbt)
|
@@ -176,6 +178,7 @@ test_files:
|
|
176
178
|
- test/rbbt/sources/test_gscholar.rb
|
177
179
|
- test/rbbt/sources/test_kegg.rb
|
178
180
|
- test/rbbt/sources/test_matador.rb
|
181
|
+
- test/rbbt/sources/test_mesh.rb
|
179
182
|
- test/rbbt/sources/test_organism.rb
|
180
183
|
- test/rbbt/sources/test_pharmagkb.rb
|
181
184
|
- test/rbbt/sources/test_pina.rb
|