rbbt-sources 3.3.0 → 3.4.0
Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 87f97c6af3dab4a1b39cb258acdf9bf4d105df5703a04d6264f960ff79e81faa
|
4
|
+
data.tar.gz: ff91f67bc0775e0a20678ede8eeb312fa1e7a42d18095c2d9bcb1a5c0e4fc000
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0b23136a81511a1ad55d5bb2af5784fd74512b9355bf40023a5197180bf25b69aefa966a3dafc8347f1864da174637fa0a2f95bb687a8973a4b23f5e6778398d
|
7
|
+
data.tar.gz: 4fca8a03899b980a18da56d9cdd56bc0136ce126c718ca61836fa3cf55313f77664b7ddecadba2ad45491c2e309604da3ce7288c5de70c660f498bfcc2849aec
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'rbbt-util'
|
2
|
+
require 'rbbt/resource'
|
3
|
+
|
4
|
+
module MeSH
|
5
|
+
extend Resource
|
6
|
+
|
7
|
+
self.subdir = "share/databases/MeSH"
|
8
|
+
|
9
|
+
MeSH.claim MeSH["data.gz"], :url, "https://nlmpubs.nlm.nih.gov/projects/mesh/rdf/mesh.nt.gz"
|
10
|
+
|
11
|
+
MeSH.claim MeSH.vocabulary, :proc do
|
12
|
+
dumper = TSV::Dumper.new :key_field => "MeSH ID", :fields => ["Label"], :type => :single
|
13
|
+
dumper.init
|
14
|
+
TSV.traverse MeSH.data, :type => :array, :into => dumper, :bar => "Processing MeSH vocab" do |line|
|
15
|
+
sub, verb, obj = line.split("\t")
|
16
|
+
|
17
|
+
next unless verb && verb.include?("rdf-schema#label")
|
18
|
+
|
19
|
+
id = sub.split("/").last[0..-2]
|
20
|
+
label = obj.split('"')[1]
|
21
|
+
|
22
|
+
[id, label]
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
data/lib/rbbt/sources/pubmed.rb
CHANGED
@@ -51,6 +51,7 @@ module PubMed
|
|
51
51
|
end
|
52
52
|
[lastname.gsub(/\s/,'_'), year || "NOYEAR", abrev] * ""
|
53
53
|
end
|
54
|
+
|
54
55
|
def self.parse_xml(xml)
|
55
56
|
require 'nokogiri'
|
56
57
|
|
@@ -91,6 +92,16 @@ module PubMed
|
|
91
92
|
[lastname, forename] * ", "
|
92
93
|
end * " and "
|
93
94
|
|
95
|
+
info[:mesh] = parser.search("MeshHeadingList/MeshHeading").collect do |mesh|
|
96
|
+
descriptor = mesh.search("DescriptorName").first.attr('UI')
|
97
|
+
qualifiers = mesh.search("QualifierName").collect{|q| q.attr('UI')}
|
98
|
+
[descriptor] + qualifiers.collect{|q| descriptor + q }
|
99
|
+
end.compact.flatten
|
100
|
+
|
101
|
+
info[:substance] = parser.search("NameOfSubstance").collect do |substance|
|
102
|
+
substance.attr('UI')
|
103
|
+
end
|
104
|
+
|
94
105
|
info[:bibentry] = bibentry.downcase if bibentry
|
95
106
|
|
96
107
|
info[:pmc_pdf] = parser.search("PubmedData/ArticleIdList/ArticleId").select{|id| id[:IdType] == "pmc"}.first
|
@@ -102,7 +113,7 @@ module PubMed
|
|
102
113
|
info
|
103
114
|
end
|
104
115
|
|
105
|
-
attr_accessor :title, :abstract, :journal, :author, :pmid, :bibentry, :pmc_pdf, :gscholar_pdf, :pdf_url
|
116
|
+
attr_accessor :title, :abstract, :journal, :author, :pmid, :bibentry, :pmc_pdf, :gscholar_pdf, :pdf_url, :mesh, :substance
|
106
117
|
attr_accessor *XML_KEYS.collect{|p| p.first }
|
107
118
|
|
108
119
|
def initialize(xml)
|
@@ -141,7 +152,7 @@ module PubMed
|
|
141
152
|
`wget --user-agent=firefox #{ pdf_url } -O #{ pdf } -t 3`
|
142
153
|
TmpFile.with_file do |txt|
|
143
154
|
`pdftotext #{ pdf } #{ txt }`
|
144
|
-
text = Open.read(txt) if File.
|
155
|
+
text = Open.read(txt) if File.exist?(txt)
|
145
156
|
end
|
146
157
|
end
|
147
158
|
text
|
@@ -806,7 +806,7 @@ file 'gene_set' do |t|
|
|
806
806
|
build_code = Organism.GRC_build(organism)
|
807
807
|
scientific_name = $scientific_name
|
808
808
|
url = "ftp://ftp.ensembl.org/pub/release-#{num}/gtf/#{scientific_name.downcase.sub(" ", '_')}/#{scientific_name.sub(" ", '_')}.#{build_code}.#{num}.gtf.gz"
|
809
|
-
|
809
|
+
Open.download(url, "#{t.name}.gz")
|
810
810
|
nil
|
811
811
|
end
|
812
812
|
|
@@ -825,7 +825,7 @@ file 'cdna_fasta' do |t|
|
|
825
825
|
num = release.split("-").last
|
826
826
|
build_code = Organism.GRC_build(organism)
|
827
827
|
scientific_name = Organism.scientific_name(organism)
|
828
|
-
url = "ftp://ftp.ensembl.org/pub/release-#{num}/fasta/#{scientific_name.downcase.sub(" ", '_')}/cdna/#{scientific_name.sub(" ", '_')}.#{build_code}.cdna.all.fa.gz"
|
829
|
-
|
828
|
+
url = "ftp://ftp.ensembl.org/pub/release-#{num}/fasta/#{scientific_name.downcase.sub(" ", '_')}/cdna/#{scientific_name.sub(" ", '_')}.#{build_code}.#{num}.cdna.all.fa.gz"
|
829
|
+
Open.download(url, "#{t.name}.gz")
|
830
830
|
nil
|
831
831
|
end
|
@@ -9,7 +9,7 @@ SOURCE_DIR = 'source'
|
|
9
9
|
def define_source_tasks(sources)
|
10
10
|
sources.each do |name, url|
|
11
11
|
file File.join(SOURCE_DIR, name) do |t|
|
12
|
-
FileUtils.mkdir SOURCE_DIR unless File.
|
12
|
+
FileUtils.mkdir SOURCE_DIR unless File.exist? SOURCE_DIR
|
13
13
|
Log.log "Retrieving file '#{name}' into '#{t.name}': '#{url}'", Log::LOW
|
14
14
|
Open.write(t.name, Open.open(url, :cache => false, :wget_options => {"--no-check-certificate" => true, "--quiet" => false, :pipe => true}))
|
15
15
|
end
|
@@ -87,5 +87,5 @@ end
|
|
87
87
|
task :all => :default
|
88
88
|
|
89
89
|
task :clean do
|
90
|
-
($__tsv_tasks + $__files).each do |file| FileUtils.rm file.to_s if File.
|
90
|
+
($__tsv_tasks + $__files).each do |file| FileUtils.rm file.to_s if File.exist?(file.to_s) end
|
91
91
|
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
require File.expand_path(__FILE__).sub(%r(/test/.*), '/test/test_helper.rb')
|
2
|
+
require File.expand_path(__FILE__).sub(%r(.*/test/), '').sub(/test_(.*)\.rb/,'\1')
|
3
|
+
|
4
|
+
class TestMESH < Test::Unit::TestCase
|
5
|
+
def test_vocab
|
6
|
+
tsv = MeSH.vocabulary.tsv
|
7
|
+
assert_equal "3T3 Cells", tsv["D016475"]
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
@@ -5,7 +5,17 @@ require 'test/unit'
|
|
5
5
|
|
6
6
|
class TestPubMed < Test::Unit::TestCase
|
7
7
|
|
8
|
-
def
|
8
|
+
def test_mesh
|
9
|
+
pmid = '10866666'
|
10
|
+
assert_include PubMed.get_article(pmid).mesh, "D016475"
|
11
|
+
end
|
12
|
+
|
13
|
+
def _test_substance
|
14
|
+
pmid = '10866666'
|
15
|
+
assert_include PubMed.get_article(pmid).substance, "C000717247"
|
16
|
+
end
|
17
|
+
|
18
|
+
def _test_get_article
|
9
19
|
pmid = '16438716'
|
10
20
|
assert(PubMed.get_article(pmid).title == "Discovering semantic features in the literature: a foundation for building functional associations.")
|
11
21
|
|
@@ -13,38 +23,38 @@ class TestPubMed < Test::Unit::TestCase
|
|
13
23
|
assert(PubMed.get_article(pmids)[pmid].title == "Discovering semantic features in the literature: a foundation for building functional associations.")
|
14
24
|
end
|
15
25
|
|
16
|
-
def
|
26
|
+
def _test_get_multi_abstract
|
17
27
|
pmid = "32141403"
|
18
28
|
|
19
29
|
assert PubMed.get_article(pmid).abstract.include?("This study shows PCOS patients are at increased risk of incident schizophrenia, and the metformin treatment has a protective effect against incident schizophrenia.")
|
20
30
|
end
|
21
31
|
|
22
|
-
def
|
32
|
+
def _test_full_text
|
23
33
|
pmid = '16438716'
|
24
34
|
assert(PubMed.get_article(pmid).full_text =~ /Discovering/)
|
25
35
|
end
|
26
36
|
|
27
|
-
def
|
37
|
+
def _test_pmc_full_xml
|
28
38
|
pmid = '4304705'
|
29
39
|
assert PubMed.get_article(pmid).pmc_full_xml.include?("HBV antigen")
|
30
40
|
end
|
31
41
|
|
32
42
|
|
33
|
-
def
|
43
|
+
def _test_query
|
34
44
|
assert(PubMed.query('chagoyen[All Fields] AND ("loattrfull text"[sb] AND hasabstract[text])').include? '16438716')
|
35
45
|
end
|
36
46
|
|
37
|
-
def
|
47
|
+
def _test_year
|
38
48
|
pmid = '16438716'
|
39
49
|
assert_equal "2006", PubMed.get_article(pmid).year
|
40
50
|
end
|
41
51
|
|
42
|
-
def
|
52
|
+
def _test_bibentry
|
43
53
|
assert("vazquez2008sent", PubMed::Article.make_bibentry('vazquez', 2008, "SENT: Semantic features in text"))
|
44
54
|
assert("vazquez2008aes", PubMed::Article.make_bibentry('vazquez', 2008, "An Example System"))
|
45
55
|
end
|
46
56
|
|
47
|
-
def
|
57
|
+
def _test_missing
|
48
58
|
pmids = '18627426,014966295'.split(",")
|
49
59
|
Log.severity = 0
|
50
60
|
assert PubMed.get_article(pmids).include? "014966295"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-sources
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-05-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|
@@ -95,6 +95,7 @@ files:
|
|
95
95
|
- lib/rbbt/sources/jochem.rb
|
96
96
|
- lib/rbbt/sources/kegg.rb
|
97
97
|
- lib/rbbt/sources/matador.rb
|
98
|
+
- lib/rbbt/sources/mesh.rb
|
98
99
|
- lib/rbbt/sources/oncodrive_role.rb
|
99
100
|
- lib/rbbt/sources/oreganno.rb
|
100
101
|
- lib/rbbt/sources/organism.rb
|
@@ -137,6 +138,7 @@ files:
|
|
137
138
|
- test/rbbt/sources/test_gscholar.rb
|
138
139
|
- test/rbbt/sources/test_kegg.rb
|
139
140
|
- test/rbbt/sources/test_matador.rb
|
141
|
+
- test/rbbt/sources/test_mesh.rb
|
140
142
|
- test/rbbt/sources/test_organism.rb
|
141
143
|
- test/rbbt/sources/test_pharmagkb.rb
|
142
144
|
- test/rbbt/sources/test_pina.rb
|
@@ -164,7 +166,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
164
166
|
- !ruby/object:Gem::Version
|
165
167
|
version: '0'
|
166
168
|
requirements: []
|
167
|
-
rubygems_version: 3.5.
|
169
|
+
rubygems_version: 3.5.9
|
168
170
|
signing_key:
|
169
171
|
specification_version: 4
|
170
172
|
summary: Data sources for the Ruby Bioinformatics Toolkit (rbbt)
|
@@ -176,6 +178,7 @@ test_files:
|
|
176
178
|
- test/rbbt/sources/test_gscholar.rb
|
177
179
|
- test/rbbt/sources/test_kegg.rb
|
178
180
|
- test/rbbt/sources/test_matador.rb
|
181
|
+
- test/rbbt/sources/test_mesh.rb
|
179
182
|
- test/rbbt/sources/test_organism.rb
|
180
183
|
- test/rbbt/sources/test_pharmagkb.rb
|
181
184
|
- test/rbbt/sources/test_pina.rb
|