rbbt-sources 3.1.41 → 3.1.47

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e0a35626b221e15867cb54ca9e8bbeb007762b5a99afed75d57d6b473385c287
4
- data.tar.gz: 59d067abbfdcd686f39b3604282a280ca9bf4a0e8f3284698202dae8d9d50d55
3
+ metadata.gz: 6cecde427390fc444e5ef5b6bef42072bf87714b404647c6c086da92a1eaa13a
4
+ data.tar.gz: 673429c8eab433ec9854c81d52450f8885c57e4b3f8b6779ded44585e6f88343
5
5
  SHA512:
6
- metadata.gz: 1c9c6a2dc5184319e94af671762227bba331b49784cf3e4ac0550b6c023c7b072920d75175b0547c2d8cb4034dbca53dda9514238773cf98f103cc6be978c4ec
7
- data.tar.gz: df52691bf65b891a906217f5e2647577bc74940bbd3fa1752115f624a313472a70a43fb96b786e30a4612bc71badb7b3e68c539e0024fe0f0958ee5f33d910af
6
+ metadata.gz: e39a3a6418bfb514ec786da0128f6fb8437da9728224dacd41b837300680c6d34e821f08954820cb5b7fd064546fee21fd0557d179c469eda969485ffd5cd965
7
+ data.tar.gz: 9f332f35fcd9c66db7506875efcc7c5276b1f588dd3b70ca299aaa2a7f25c33a650e496ba207af4e12e3bb2a6dfbc9ec8b1f0729e3acedc354145572dd268836
@@ -1,3 +1,7 @@
1
+ ">oct2014":
2
+ - rgd~rgd_id
3
+ ">jun2019":
4
+ - entrezgene~entrezgene_id
1
5
  ">dec2017":
2
6
  - unigene
3
7
  ">dec2016":
@@ -134,11 +134,14 @@ module BioMart
134
134
  # cause an error if the BioMart WS does not allow filtering with that
135
135
  # attribute.
136
136
  def self.query(database, main, attrs = nil, filters = nil, data = nil, open_options = {})
137
+ IndiferentHash.setup(open_options)
137
138
  open_options = Misc.add_defaults open_options, :nocache => false, :filename => nil, :field_names => nil, :by_chr => false
138
139
  filename, field_names, by_chr = Misc.process_options open_options, :filename, :field_names, :by_chr
139
140
  attrs ||= []
140
141
  open_options = Misc.add_defaults open_options, :keep_empty => false, :merge => true
141
142
 
143
+ IndiferentHash.setup(open_options)
144
+
142
145
  Log.low "BioMart query: '#{main}' [#{(attrs || []) * ', '}] [#{(filters || []) * ', '}] #{open_options.inspect}"
143
146
 
144
147
  max_items = 2
@@ -167,6 +170,7 @@ module BioMart
167
170
  end
168
171
 
169
172
  open_options[:filename] = "BioMart[#{main}+#{attrs.length}]"
173
+
170
174
  if filename.nil?
171
175
  results = TSV.open data, open_options
172
176
  results.key_field = main
@@ -200,6 +204,7 @@ module BioMart
200
204
  missing+=v if Organism.compare_archives(current_archive, t) == -1
201
205
  elsif k=~ /^>(.*)/
202
206
  t = $1.strip
207
+ iii [current_archive, t, Organism.compare_archives(current_archive, t)]
203
208
  missing+=v if Organism.compare_archives(current_archive, t) == 1
204
209
  end
205
210
  end
@@ -14,7 +14,7 @@ module Entrez
14
14
  options = Misc.add_defaults options, :key_field => 1, :fields => [5], :persist => true, :merge => true
15
15
 
16
16
  taxs = [taxs] unless Array === taxs
17
- options.merge! :grep => taxs.collect{|t| "^" + t.to_s}
17
+ options.merge! :grep => taxs.collect{|t| "^" + t.to_s}, :fixed_grep => false
18
18
 
19
19
  tsv = Rbbt.share.databases.entrez.gene_info.tsv :flat, options
20
20
  tsv.key_field = "Entrez Gene ID"
@@ -26,7 +26,7 @@ module Entrez
26
26
  options = Misc.add_defaults options, :key_field => 1, :fields => [2], :persist => true, :merge => true
27
27
 
28
28
  taxs = [taxs] unless Array === taxs
29
- options.merge! :grep => taxs.collect{|t| "^" + t.to_s}
29
+ options.merge! :grep => taxs.collect{|t| "^" + t.to_s}, :fixed_grep => false
30
30
 
31
31
  tsv = Rbbt.share.databases.entrez.gene_info.tsv :flat, options
32
32
  tsv.key_field = "Entrez Gene ID"
@@ -39,7 +39,7 @@ module Entrez
39
39
  options = {:key_field => 1, :fields => [2], :persist => true, :merge => true}
40
40
 
41
41
  taxs = [taxs] unless taxs.is_a?(Array)
42
- options.merge! :grep => taxs.collect{|t| "^" + t.to_s}
42
+ options.merge! :grep => taxs.collect{|t| "^" + t.to_s}, :fixed_grep => false
43
43
 
44
44
  Rbbt.share.databases.entrez.gene2pubmed.tsv :flat, options
45
45
  end
@@ -27,7 +27,7 @@ module GO
27
27
  def self.init
28
28
  Persist.persist_tsv(nil, 'gene_ontology', {}, :persist => true) do |info|
29
29
  info.serializer = :marshal if info.respond_to? :serializer
30
- Rbbt.share.databases.GO.gene_ontology.read.split(/\[Term\]/).each{|term|
30
+ Rbbt.share.databases.GO.gene_ontology.produce.read.split(/\[Term\]/).each{|term|
31
31
  term_info = {}
32
32
 
33
33
  term.split(/\n/). select{|l| l =~ /:/}.each{|l|
@@ -7,7 +7,7 @@ module Pina
7
7
  Pina.claim Pina.protein_protein, :proc do
8
8
  require 'rbbt/sources/organism'
9
9
 
10
- url = "http://cbg.garvan.unsw.edu.au/pina/download/Homo%20sapiens-20121210.txt"
10
+ url = "https://omics.bjcancer.org/pina/download/Homo%20sapiens-20140521.tsv"
11
11
 
12
12
  dumper = TSV::Dumper.new :type => :double,
13
13
  :key_field => 'UniProt/SwissProt Accession', :namespace => Organism.default_code("Hsa"),
@@ -109,6 +109,14 @@ module PubMed
109
109
  end
110
110
  end
111
111
 
112
+ def pmc_full_xml
113
+ begin
114
+ Open.read("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pmc&id=#{pmid}")
115
+ rescue
116
+ nil
117
+ end
118
+ end
119
+
112
120
  def pdf_url
113
121
  return pmc_pdf if pmc_pdf
114
122
  @gscholar_pdf ||= begin
@@ -121,18 +129,22 @@ module PubMed
121
129
  end
122
130
 
123
131
  def full_text
124
- return nil if pdf_url.nil?
125
-
126
- text = nil
127
- TmpFile.with_file do |pdf|
128
-
129
- # Change user-agent, oh well...
130
- `wget --user-agent=firefox #{ pdf_url } -O #{ pdf } -t 3`
131
- TmpFile.with_file do |txt|
132
- `pdftotext #{ pdf } #{ txt }`
133
- text = Open.read(txt) if File.exists? txt
134
- end
135
- end
132
+ text = if pdf_url
133
+ text = nil
134
+ TmpFile.with_file do |pdf|
135
+ # Change user-agent, oh well...
136
+ `wget --user-agent=firefox #{ pdf_url } -O #{ pdf } -t 3`
137
+ TmpFile.with_file do |txt|
138
+ `pdftotext #{ pdf } #{ txt }`
139
+ text = Open.read(txt) if File.exists? txt
140
+ end
141
+ end
142
+ text
143
+ elsif pmc_full_xml
144
+ pmc_full_xml
145
+ else
146
+ nil
147
+ end
136
148
 
137
149
  Misc.fixutf8(text)
138
150
  end
@@ -177,46 +189,46 @@ module PubMed
177
189
  # ID specified as an argument. If +pmid+ is an array instead of a single
178
190
  # identifier it returns an hash with the Article object for each id.
179
191
  # It uses the Rbbt cache to save the articles xml.
180
- def self.get_article(pmid)
181
-
182
- if pmid.is_a? Array
183
- missing = []
184
- list = {}
185
-
186
- pmid.each{|p|
187
- filename = p.to_s + '.xml'
188
- if File.exists? FileCache.path(filename)
189
- list[p] = Article.new(Open.read(FileCache.path(filename)))
190
- else
191
- missing << p
192
- end
193
- }
194
-
195
- return list unless missing.any?
196
-
197
- articles = get_online(missing)
198
-
199
- articles.each{|p, xml|
200
- filename = p + '.xml'
201
- FileCache.add(filename,xml)
202
- list[p] = Article.new(xml)
203
- }
204
-
205
- return list
206
-
207
- else
208
- filename = pmid.to_s + '.xml'
209
-
210
- if File.exists? FileCache.path(filename)
211
- return Article.new(Open.read(FileCache.path(filename)))
212
- else
213
- xml = get_online(pmid)
214
- FileCache.add(filename,xml)
215
-
216
- return Article.new(xml)
217
- end
218
- end
219
- end
192
+ #def self.get_article(pmid)
193
+
194
+ # if pmid.is_a? Array
195
+ # missing = []
196
+ # list = {}
197
+
198
+ # pmid.each{|p|
199
+ # filename = p.to_s + '.xml'
200
+ # if File.exists? FileCache.path(filename)
201
+ # list[p] = Article.new(Open.read(FileCache.path(filename)))
202
+ # else
203
+ # missing << p
204
+ # end
205
+ # }
206
+
207
+ # return list unless missing.any?
208
+
209
+ # articles = get_online(missing)
210
+
211
+ # articles.each{|p, xml|
212
+ # filename = p + '.xml'
213
+ # FileCache.add(filename,xml)
214
+ # list[p] = Article.new(xml)
215
+ # }
216
+
217
+ # return list
218
+
219
+ # else
220
+ # filename = pmid.to_s + '.xml'
221
+
222
+ # if File.exists? FileCache.path(filename)
223
+ # return Article.new(Open.read(FileCache.path(filename)))
224
+ # else
225
+ # xml = get_online(pmid)
226
+ # FileCache.add(filename,xml)
227
+
228
+ # return Article.new(xml)
229
+ # end
230
+ # end
231
+ #end
220
232
 
221
233
  def self.get_article(pmids)
222
234
  _array = Array === pmids
@@ -2,15 +2,15 @@ require 'rbbt'
2
2
  require 'rbbt/tsv'
3
3
  require 'rbbt/resource'
4
4
 
5
- module TFacts
5
+ module TFactS
6
6
  extend Resource
7
- self.subdir = "share/databases/TFacts"
7
+ self.subdir = "share/databases/TFactS"
8
8
 
9
- TFacts.claim TFacts[".source"]["Catalogues.xls"], :url, "http://www.tfacts.org/TFactS-new/TFactS-v2/tfacts/data/Catalogues.xls"
9
+ TFactS.claim TFactS[".source"]["Catalogues.xls"], :url, "http://www.tfacts.org/TFactS-new/TFactS-v2/tfacts/data/Catalogues.xls"
10
10
 
11
- TFacts.claim TFacts.targets, :proc do
11
+ TFactS.claim TFactS.targets, :proc do
12
12
  require 'spreadsheet'
13
- book = Spreadsheet.open TFacts[".source"]["Catalogues.xls"].produce.find
13
+ book = Spreadsheet.open TFactS[".source"]["Catalogues.xls"].produce.find
14
14
  sheet = book.worksheet 0
15
15
 
16
16
  tsv = TSV.setup({}, :key_field => "Target Gene (Associated Gene Name)", :fields => ["Transcription Factor (Associated Gene Name)"], :namespace => "Hsa", :type => :flat)
@@ -24,9 +24,9 @@ module TFacts
24
24
  tsv.to_s
25
25
  end
26
26
 
27
- TFacts.claim TFacts.targets_signed, :proc do
27
+ TFactS.claim TFactS.targets_signed, :proc do
28
28
  require 'spreadsheet'
29
- book = Spreadsheet.open TFacts[".source"]["Catalogues.xls"].produce.find
29
+ book = Spreadsheet.open TFactS[".source"]["Catalogues.xls"].produce.find
30
30
  sheet = book.worksheet 1
31
31
 
32
32
  tsv = TSV.setup({}, :key_field => "Target Gene (Associated Gene Name)", :fields => ["Transcription Factor (Associated Gene Name)", "Sign", "PMID"], :namespace => "Hsa", :type => :double)
@@ -43,13 +43,13 @@ module TFacts
43
43
  tsv.to_s
44
44
  end
45
45
 
46
- TFacts.claim TFacts.regulators, :proc do
47
- TFacts.targets.tsv.reorder("Transcription Factor (Associated Gene Name)").to_s
46
+ TFactS.claim TFactS.regulators, :proc do
47
+ TFactS.targets.tsv.reorder("Transcription Factor (Associated Gene Name)").to_s
48
48
  end
49
49
 
50
- TFacts.claim TFacts.tf_tg, :proc do
50
+ TFactS.claim TFactS.tf_tg, :proc do
51
51
  require 'spreadsheet'
52
- book = Spreadsheet.open TFacts[".source"]["Catalogues.xls"].produce.find
52
+ book = Spreadsheet.open TFactS[".source"]["Catalogues.xls"].produce.find
53
53
 
54
54
  tsv = TSV.setup({}, :key_field => "Transcription Factor (Associated Gene Name)", :fields => ["Target Gene (Associated Gene Name)", "Sign", "Species", "Source", "PMID"], :namespace => "Hsa", :type => :double)
55
55
 
@@ -128,16 +128,16 @@ if defined? Entity and defined? Gene and Entity === Gene
128
128
 
129
129
  module Gene
130
130
  property :is_transcription_factor? => :array2single do
131
- tfs = TFacts.targets.keys
131
+ tfs = TFactS.targets.keys
132
132
  self.name.collect{|gene| tfs.include? gene}
133
133
  end
134
134
 
135
135
  property :transcription_regulators => :array2single do
136
- Gene.setup(TFacts.regulators.tsv(:persist => true).values_at(*self.name), "Associated Gene Name", self.organism)
136
+ Gene.setup(TFactS.regulators.tsv(:persist => true).values_at(*self.name), "Associated Gene Name", self.organism)
137
137
  end
138
138
 
139
139
  property :transcription_targets => :array2single do
140
- Gene.setup(TFacts.targets.tsv(:persist => true).values_at(*self.name), "Associated Gene Name", self.organism)
140
+ Gene.setup(TFactS.targets.tsv(:persist => true).values_at(*self.name), "Associated Gene Name", self.organism)
141
141
  end
142
142
  end
143
143
  end
@@ -5,7 +5,7 @@ require File.join(File.dirname(__FILE__), '../../lib/helpers')
5
5
 
6
6
  $taxs = [9606]
7
7
  $scientific_name = "Homo sapiens"
8
- $ortholog_key = "human_ensembl_gene"
8
+ $ortholog_key = "hsapiens_homolog_ensembl_gene"
9
9
 
10
10
  $biomart_db = 'hsapiens_gene_ensembl'
11
11
  $biomart_db_germline_variation = 'hsapiens_snp'
@@ -5,7 +5,7 @@ require File.join(File.dirname(__FILE__), '../../lib/helpers')
5
5
 
6
6
  $taxs = [10090]
7
7
  $scientific_name = "Mus musculus"
8
- $ortholog_key = "mouse_ensembl_gene"
8
+ $ortholog_key = "mmusculus_homolog_ensembl_gene"
9
9
 
10
10
  $biomart_db = 'mmusculus_gene_ensembl'
11
11
  $biomart_db_germline_variation = 'mmusculus_snp'
@@ -9,7 +9,7 @@ $scientific_name = "Rattus norvegicus"
9
9
  $biomart_db = 'rnorvegicus_gene_ensembl'
10
10
  $biomart_db_germline_variation = 'rnorvegicus_snp'
11
11
  $biomart_db_somatic_variation = 'rnorvegicus_snp_som'
12
- $ortholog_key = "rat_ensembl_gene"
12
+ $ortholog_key = "rnorvegicus_homolog_ensembl_gene"
13
13
 
14
14
  $biomart_lexicon = [
15
15
  [ 'Associated Gene Name' , "external_gene_id"],
@@ -547,13 +547,13 @@ end
547
547
  rule /^possible_ortholog_(.*)/ do |t|
548
548
  other = t.name.match(/ortholog_(.*)/)[1]
549
549
  other_key = Organism.ortholog_key(other).produce.read
550
- BioMart.tsv($biomart_db, $biomart_ensembl_gene, [["Ortholog Ensembl Gene ID", "inter_paralog_" + other_key]], [], nil, :keep_empty => false, :type => :flat, :filename => t.name, :namespace => Thread.current['namespace'])
550
+ BioMart.tsv($biomart_db, $biomart_ensembl_gene, [["Ortholog Ensembl Gene ID", "inter_paralog_" + other_key]], [], nil, :keep_empty => false, :type => :double, :filename => t.name, :namespace => Thread.current['namespace'])
551
551
  end
552
552
 
553
553
  rule /^ortholog_(.*)/ do |t|
554
554
  other = t.name.match(/ortholog_(.*)/)[1]
555
555
  other_key = Organism.ortholog_key(other).produce.read
556
- BioMart.tsv($biomart_db, $biomart_ensembl_gene, [["Ortholog Ensembl Gene ID", other_key]], [], nil, :keep_empty => false, :type => :flat, :filename => t.name, :namespace => Thread.current['namespace'])
556
+ BioMart.tsv($biomart_db, $biomart_ensembl_gene, [["Ortholog Ensembl Gene ID", other_key]], [], nil, :keep_empty => false, :type => :double, :filename => t.name, :namespace => Thread.current['namespace'])
557
557
  end
558
558
 
559
559
  rule /[a-z]{3}[0-9]{4}\/.*/i do |t|
@@ -17,7 +17,10 @@ class TestEntrez < Test::Unit::TestCase
17
17
  def test_entrez2pubmed
18
18
  tax = $yeast_tax
19
19
 
20
+ Log.severity = 0
20
21
  data = Entrez.entrez2pubmed(tax)
22
+ data.read
23
+ Log.tsv data
21
24
  assert(data['850320'].include? '1574125')
22
25
  end
23
26
 
@@ -17,6 +17,12 @@ class TestPubMed < Test::Unit::TestCase
17
17
  pmid = '16438716'
18
18
  assert(PubMed.get_article(pmid).full_text =~ /Discovering/)
19
19
  end
20
+
21
+ def test_pmc_full_xml
22
+ pmid = '4304705'
23
+ assert PubMed.get_article(pmid).pmc_full_xml.include?("HBV antigen")
24
+ end
25
+
20
26
 
21
27
  def test_query
22
28
  assert(PubMed.query('chagoyen[All Fields] AND ("loattrfull text"[sb] AND hasabstract[text])').include? '16438716')
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-sources
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.1.41
4
+ version: 3.1.47
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-11-15 00:00:00.000000000 Z
11
+ date: 2020-07-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util
@@ -24,20 +24,6 @@ dependencies:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: 4.0.0
27
- - !ruby/object:Gem::Dependency
28
- name: rbbt-text
29
- requirement: !ruby/object:Gem::Requirement
30
- requirements:
31
- - - ">="
32
- - !ruby/object:Gem::Version
33
- version: '0'
34
- type: :runtime
35
- prerelease: false
36
- version_requirements: !ruby/object:Gem::Requirement
37
- requirements:
38
- - - ">="
39
- - !ruby/object:Gem::Version
40
- version: '0'
41
27
  - !ruby/object:Gem::Dependency
42
28
  name: mechanize
43
29
  requirement: !ruby/object:Gem::Requirement