rbbt-sources 3.1.41 → 3.1.47

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e0a35626b221e15867cb54ca9e8bbeb007762b5a99afed75d57d6b473385c287
4
- data.tar.gz: 59d067abbfdcd686f39b3604282a280ca9bf4a0e8f3284698202dae8d9d50d55
3
+ metadata.gz: 6cecde427390fc444e5ef5b6bef42072bf87714b404647c6c086da92a1eaa13a
4
+ data.tar.gz: 673429c8eab433ec9854c81d52450f8885c57e4b3f8b6779ded44585e6f88343
5
5
  SHA512:
6
- metadata.gz: 1c9c6a2dc5184319e94af671762227bba331b49784cf3e4ac0550b6c023c7b072920d75175b0547c2d8cb4034dbca53dda9514238773cf98f103cc6be978c4ec
7
- data.tar.gz: df52691bf65b891a906217f5e2647577bc74940bbd3fa1752115f624a313472a70a43fb96b786e30a4612bc71badb7b3e68c539e0024fe0f0958ee5f33d910af
6
+ metadata.gz: e39a3a6418bfb514ec786da0128f6fb8437da9728224dacd41b837300680c6d34e821f08954820cb5b7fd064546fee21fd0557d179c469eda969485ffd5cd965
7
+ data.tar.gz: 9f332f35fcd9c66db7506875efcc7c5276b1f588dd3b70ca299aaa2a7f25c33a650e496ba207af4e12e3bb2a6dfbc9ec8b1f0729e3acedc354145572dd268836
@@ -1,3 +1,7 @@
1
+ ">oct2014":
2
+ - rgd~rgd_id
3
+ ">jun2019":
4
+ - entrezgene~entrezgene_id
1
5
  ">dec2017":
2
6
  - unigene
3
7
  ">dec2016":
@@ -134,11 +134,14 @@ module BioMart
134
134
  # cause an error if the BioMart WS does not allow filtering with that
135
135
  # attribute.
136
136
  def self.query(database, main, attrs = nil, filters = nil, data = nil, open_options = {})
137
+ IndiferentHash.setup(open_options)
137
138
  open_options = Misc.add_defaults open_options, :nocache => false, :filename => nil, :field_names => nil, :by_chr => false
138
139
  filename, field_names, by_chr = Misc.process_options open_options, :filename, :field_names, :by_chr
139
140
  attrs ||= []
140
141
  open_options = Misc.add_defaults open_options, :keep_empty => false, :merge => true
141
142
 
143
+ IndiferentHash.setup(open_options)
144
+
142
145
  Log.low "BioMart query: '#{main}' [#{(attrs || []) * ', '}] [#{(filters || []) * ', '}] #{open_options.inspect}"
143
146
 
144
147
  max_items = 2
@@ -167,6 +170,7 @@ module BioMart
167
170
  end
168
171
 
169
172
  open_options[:filename] = "BioMart[#{main}+#{attrs.length}]"
173
+
170
174
  if filename.nil?
171
175
  results = TSV.open data, open_options
172
176
  results.key_field = main
@@ -200,6 +204,7 @@ module BioMart
200
204
  missing+=v if Organism.compare_archives(current_archive, t) == -1
201
205
  elsif k=~ /^>(.*)/
202
206
  t = $1.strip
207
+ iii [current_archive, t, Organism.compare_archives(current_archive, t)]
203
208
  missing+=v if Organism.compare_archives(current_archive, t) == 1
204
209
  end
205
210
  end
@@ -14,7 +14,7 @@ module Entrez
14
14
  options = Misc.add_defaults options, :key_field => 1, :fields => [5], :persist => true, :merge => true
15
15
 
16
16
  taxs = [taxs] unless Array === taxs
17
- options.merge! :grep => taxs.collect{|t| "^" + t.to_s}
17
+ options.merge! :grep => taxs.collect{|t| "^" + t.to_s}, :fixed_grep => false
18
18
 
19
19
  tsv = Rbbt.share.databases.entrez.gene_info.tsv :flat, options
20
20
  tsv.key_field = "Entrez Gene ID"
@@ -26,7 +26,7 @@ module Entrez
26
26
  options = Misc.add_defaults options, :key_field => 1, :fields => [2], :persist => true, :merge => true
27
27
 
28
28
  taxs = [taxs] unless Array === taxs
29
- options.merge! :grep => taxs.collect{|t| "^" + t.to_s}
29
+ options.merge! :grep => taxs.collect{|t| "^" + t.to_s}, :fixed_grep => false
30
30
 
31
31
  tsv = Rbbt.share.databases.entrez.gene_info.tsv :flat, options
32
32
  tsv.key_field = "Entrez Gene ID"
@@ -39,7 +39,7 @@ module Entrez
39
39
  options = {:key_field => 1, :fields => [2], :persist => true, :merge => true}
40
40
 
41
41
  taxs = [taxs] unless taxs.is_a?(Array)
42
- options.merge! :grep => taxs.collect{|t| "^" + t.to_s}
42
+ options.merge! :grep => taxs.collect{|t| "^" + t.to_s}, :fixed_grep => false
43
43
 
44
44
  Rbbt.share.databases.entrez.gene2pubmed.tsv :flat, options
45
45
  end
@@ -27,7 +27,7 @@ module GO
27
27
  def self.init
28
28
  Persist.persist_tsv(nil, 'gene_ontology', {}, :persist => true) do |info|
29
29
  info.serializer = :marshal if info.respond_to? :serializer
30
- Rbbt.share.databases.GO.gene_ontology.read.split(/\[Term\]/).each{|term|
30
+ Rbbt.share.databases.GO.gene_ontology.produce.read.split(/\[Term\]/).each{|term|
31
31
  term_info = {}
32
32
 
33
33
  term.split(/\n/). select{|l| l =~ /:/}.each{|l|
@@ -7,7 +7,7 @@ module Pina
7
7
  Pina.claim Pina.protein_protein, :proc do
8
8
  require 'rbbt/sources/organism'
9
9
 
10
- url = "http://cbg.garvan.unsw.edu.au/pina/download/Homo%20sapiens-20121210.txt"
10
+ url = "https://omics.bjcancer.org/pina/download/Homo%20sapiens-20140521.tsv"
11
11
 
12
12
  dumper = TSV::Dumper.new :type => :double,
13
13
  :key_field => 'UniProt/SwissProt Accession', :namespace => Organism.default_code("Hsa"),
@@ -109,6 +109,14 @@ module PubMed
109
109
  end
110
110
  end
111
111
 
112
+ def pmc_full_xml
113
+ begin
114
+ Open.read("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pmc&id=#{pmid}")
115
+ rescue
116
+ nil
117
+ end
118
+ end
119
+
112
120
  def pdf_url
113
121
  return pmc_pdf if pmc_pdf
114
122
  @gscholar_pdf ||= begin
@@ -121,18 +129,22 @@ module PubMed
121
129
  end
122
130
 
123
131
  def full_text
124
- return nil if pdf_url.nil?
125
-
126
- text = nil
127
- TmpFile.with_file do |pdf|
128
-
129
- # Change user-agent, oh well...
130
- `wget --user-agent=firefox #{ pdf_url } -O #{ pdf } -t 3`
131
- TmpFile.with_file do |txt|
132
- `pdftotext #{ pdf } #{ txt }`
133
- text = Open.read(txt) if File.exists? txt
134
- end
135
- end
132
+ text = if pdf_url
133
+ text = nil
134
+ TmpFile.with_file do |pdf|
135
+ # Change user-agent, oh well...
136
+ `wget --user-agent=firefox #{ pdf_url } -O #{ pdf } -t 3`
137
+ TmpFile.with_file do |txt|
138
+ `pdftotext #{ pdf } #{ txt }`
139
+ text = Open.read(txt) if File.exists? txt
140
+ end
141
+ end
142
+ text
143
+ elsif pmc_full_xml
144
+ pmc_full_xml
145
+ else
146
+ nil
147
+ end
136
148
 
137
149
  Misc.fixutf8(text)
138
150
  end
@@ -177,46 +189,46 @@ module PubMed
177
189
  # ID specified as an argument. If +pmid+ is an array instead of a single
178
190
  # identifier it returns an hash with the Article object for each id.
179
191
  # It uses the Rbbt cache to save the articles xml.
180
- def self.get_article(pmid)
181
-
182
- if pmid.is_a? Array
183
- missing = []
184
- list = {}
185
-
186
- pmid.each{|p|
187
- filename = p.to_s + '.xml'
188
- if File.exists? FileCache.path(filename)
189
- list[p] = Article.new(Open.read(FileCache.path(filename)))
190
- else
191
- missing << p
192
- end
193
- }
194
-
195
- return list unless missing.any?
196
-
197
- articles = get_online(missing)
198
-
199
- articles.each{|p, xml|
200
- filename = p + '.xml'
201
- FileCache.add(filename,xml)
202
- list[p] = Article.new(xml)
203
- }
204
-
205
- return list
206
-
207
- else
208
- filename = pmid.to_s + '.xml'
209
-
210
- if File.exists? FileCache.path(filename)
211
- return Article.new(Open.read(FileCache.path(filename)))
212
- else
213
- xml = get_online(pmid)
214
- FileCache.add(filename,xml)
215
-
216
- return Article.new(xml)
217
- end
218
- end
219
- end
192
+ #def self.get_article(pmid)
193
+
194
+ # if pmid.is_a? Array
195
+ # missing = []
196
+ # list = {}
197
+
198
+ # pmid.each{|p|
199
+ # filename = p.to_s + '.xml'
200
+ # if File.exists? FileCache.path(filename)
201
+ # list[p] = Article.new(Open.read(FileCache.path(filename)))
202
+ # else
203
+ # missing << p
204
+ # end
205
+ # }
206
+
207
+ # return list unless missing.any?
208
+
209
+ # articles = get_online(missing)
210
+
211
+ # articles.each{|p, xml|
212
+ # filename = p + '.xml'
213
+ # FileCache.add(filename,xml)
214
+ # list[p] = Article.new(xml)
215
+ # }
216
+
217
+ # return list
218
+
219
+ # else
220
+ # filename = pmid.to_s + '.xml'
221
+
222
+ # if File.exists? FileCache.path(filename)
223
+ # return Article.new(Open.read(FileCache.path(filename)))
224
+ # else
225
+ # xml = get_online(pmid)
226
+ # FileCache.add(filename,xml)
227
+
228
+ # return Article.new(xml)
229
+ # end
230
+ # end
231
+ #end
220
232
 
221
233
  def self.get_article(pmids)
222
234
  _array = Array === pmids
@@ -2,15 +2,15 @@ require 'rbbt'
2
2
  require 'rbbt/tsv'
3
3
  require 'rbbt/resource'
4
4
 
5
- module TFacts
5
+ module TFactS
6
6
  extend Resource
7
- self.subdir = "share/databases/TFacts"
7
+ self.subdir = "share/databases/TFactS"
8
8
 
9
- TFacts.claim TFacts[".source"]["Catalogues.xls"], :url, "http://www.tfacts.org/TFactS-new/TFactS-v2/tfacts/data/Catalogues.xls"
9
+ TFactS.claim TFactS[".source"]["Catalogues.xls"], :url, "http://www.tfacts.org/TFactS-new/TFactS-v2/tfacts/data/Catalogues.xls"
10
10
 
11
- TFacts.claim TFacts.targets, :proc do
11
+ TFactS.claim TFactS.targets, :proc do
12
12
  require 'spreadsheet'
13
- book = Spreadsheet.open TFacts[".source"]["Catalogues.xls"].produce.find
13
+ book = Spreadsheet.open TFactS[".source"]["Catalogues.xls"].produce.find
14
14
  sheet = book.worksheet 0
15
15
 
16
16
  tsv = TSV.setup({}, :key_field => "Target Gene (Associated Gene Name)", :fields => ["Transcription Factor (Associated Gene Name)"], :namespace => "Hsa", :type => :flat)
@@ -24,9 +24,9 @@ module TFacts
24
24
  tsv.to_s
25
25
  end
26
26
 
27
- TFacts.claim TFacts.targets_signed, :proc do
27
+ TFactS.claim TFactS.targets_signed, :proc do
28
28
  require 'spreadsheet'
29
- book = Spreadsheet.open TFacts[".source"]["Catalogues.xls"].produce.find
29
+ book = Spreadsheet.open TFactS[".source"]["Catalogues.xls"].produce.find
30
30
  sheet = book.worksheet 1
31
31
 
32
32
  tsv = TSV.setup({}, :key_field => "Target Gene (Associated Gene Name)", :fields => ["Transcription Factor (Associated Gene Name)", "Sign", "PMID"], :namespace => "Hsa", :type => :double)
@@ -43,13 +43,13 @@ module TFacts
43
43
  tsv.to_s
44
44
  end
45
45
 
46
- TFacts.claim TFacts.regulators, :proc do
47
- TFacts.targets.tsv.reorder("Transcription Factor (Associated Gene Name)").to_s
46
+ TFactS.claim TFactS.regulators, :proc do
47
+ TFactS.targets.tsv.reorder("Transcription Factor (Associated Gene Name)").to_s
48
48
  end
49
49
 
50
- TFacts.claim TFacts.tf_tg, :proc do
50
+ TFactS.claim TFactS.tf_tg, :proc do
51
51
  require 'spreadsheet'
52
- book = Spreadsheet.open TFacts[".source"]["Catalogues.xls"].produce.find
52
+ book = Spreadsheet.open TFactS[".source"]["Catalogues.xls"].produce.find
53
53
 
54
54
  tsv = TSV.setup({}, :key_field => "Transcription Factor (Associated Gene Name)", :fields => ["Target Gene (Associated Gene Name)", "Sign", "Species", "Source", "PMID"], :namespace => "Hsa", :type => :double)
55
55
 
@@ -128,16 +128,16 @@ if defined? Entity and defined? Gene and Entity === Gene
128
128
 
129
129
  module Gene
130
130
  property :is_transcription_factor? => :array2single do
131
- tfs = TFacts.targets.keys
131
+ tfs = TFactS.targets.keys
132
132
  self.name.collect{|gene| tfs.include? gene}
133
133
  end
134
134
 
135
135
  property :transcription_regulators => :array2single do
136
- Gene.setup(TFacts.regulators.tsv(:persist => true).values_at(*self.name), "Associated Gene Name", self.organism)
136
+ Gene.setup(TFactS.regulators.tsv(:persist => true).values_at(*self.name), "Associated Gene Name", self.organism)
137
137
  end
138
138
 
139
139
  property :transcription_targets => :array2single do
140
- Gene.setup(TFacts.targets.tsv(:persist => true).values_at(*self.name), "Associated Gene Name", self.organism)
140
+ Gene.setup(TFactS.targets.tsv(:persist => true).values_at(*self.name), "Associated Gene Name", self.organism)
141
141
  end
142
142
  end
143
143
  end
@@ -5,7 +5,7 @@ require File.join(File.dirname(__FILE__), '../../lib/helpers')
5
5
 
6
6
  $taxs = [9606]
7
7
  $scientific_name = "Homo sapiens"
8
- $ortholog_key = "human_ensembl_gene"
8
+ $ortholog_key = "hsapiens_homolog_ensembl_gene"
9
9
 
10
10
  $biomart_db = 'hsapiens_gene_ensembl'
11
11
  $biomart_db_germline_variation = 'hsapiens_snp'
@@ -5,7 +5,7 @@ require File.join(File.dirname(__FILE__), '../../lib/helpers')
5
5
 
6
6
  $taxs = [10090]
7
7
  $scientific_name = "Mus musculus"
8
- $ortholog_key = "mouse_ensembl_gene"
8
+ $ortholog_key = "mmusculus_homolog_ensembl_gene"
9
9
 
10
10
  $biomart_db = 'mmusculus_gene_ensembl'
11
11
  $biomart_db_germline_variation = 'mmusculus_snp'
@@ -9,7 +9,7 @@ $scientific_name = "Rattus norvegicus"
9
9
  $biomart_db = 'rnorvegicus_gene_ensembl'
10
10
  $biomart_db_germline_variation = 'rnorvegicus_snp'
11
11
  $biomart_db_somatic_variation = 'rnorvegicus_snp_som'
12
- $ortholog_key = "rat_ensembl_gene"
12
+ $ortholog_key = "rnorvegicus_homolog_ensembl_gene"
13
13
 
14
14
  $biomart_lexicon = [
15
15
  [ 'Associated Gene Name' , "external_gene_id"],
@@ -547,13 +547,13 @@ end
547
547
  rule /^possible_ortholog_(.*)/ do |t|
548
548
  other = t.name.match(/ortholog_(.*)/)[1]
549
549
  other_key = Organism.ortholog_key(other).produce.read
550
- BioMart.tsv($biomart_db, $biomart_ensembl_gene, [["Ortholog Ensembl Gene ID", "inter_paralog_" + other_key]], [], nil, :keep_empty => false, :type => :flat, :filename => t.name, :namespace => Thread.current['namespace'])
550
+ BioMart.tsv($biomart_db, $biomart_ensembl_gene, [["Ortholog Ensembl Gene ID", "inter_paralog_" + other_key]], [], nil, :keep_empty => false, :type => :double, :filename => t.name, :namespace => Thread.current['namespace'])
551
551
  end
552
552
 
553
553
  rule /^ortholog_(.*)/ do |t|
554
554
  other = t.name.match(/ortholog_(.*)/)[1]
555
555
  other_key = Organism.ortholog_key(other).produce.read
556
- BioMart.tsv($biomart_db, $biomart_ensembl_gene, [["Ortholog Ensembl Gene ID", other_key]], [], nil, :keep_empty => false, :type => :flat, :filename => t.name, :namespace => Thread.current['namespace'])
556
+ BioMart.tsv($biomart_db, $biomart_ensembl_gene, [["Ortholog Ensembl Gene ID", other_key]], [], nil, :keep_empty => false, :type => :double, :filename => t.name, :namespace => Thread.current['namespace'])
557
557
  end
558
558
 
559
559
  rule /[a-z]{3}[0-9]{4}\/.*/i do |t|
@@ -17,7 +17,10 @@ class TestEntrez < Test::Unit::TestCase
17
17
  def test_entrez2pubmed
18
18
  tax = $yeast_tax
19
19
 
20
+ Log.severity = 0
20
21
  data = Entrez.entrez2pubmed(tax)
22
+ data.read
23
+ Log.tsv data
21
24
  assert(data['850320'].include? '1574125')
22
25
  end
23
26
 
@@ -17,6 +17,12 @@ class TestPubMed < Test::Unit::TestCase
17
17
  pmid = '16438716'
18
18
  assert(PubMed.get_article(pmid).full_text =~ /Discovering/)
19
19
  end
20
+
21
+ def test_pmc_full_xml
22
+ pmid = '4304705'
23
+ assert PubMed.get_article(pmid).pmc_full_xml.include?("HBV antigen")
24
+ end
25
+
20
26
 
21
27
  def test_query
22
28
  assert(PubMed.query('chagoyen[All Fields] AND ("loattrfull text"[sb] AND hasabstract[text])').include? '16438716')
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-sources
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.1.41
4
+ version: 3.1.47
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-11-15 00:00:00.000000000 Z
11
+ date: 2020-07-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util
@@ -24,20 +24,6 @@ dependencies:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: 4.0.0
27
- - !ruby/object:Gem::Dependency
28
- name: rbbt-text
29
- requirement: !ruby/object:Gem::Requirement
30
- requirements:
31
- - - ">="
32
- - !ruby/object:Gem::Version
33
- version: '0'
34
- type: :runtime
35
- prerelease: false
36
- version_requirements: !ruby/object:Gem::Requirement
37
- requirements:
38
- - - ">="
39
- - !ruby/object:Gem::Version
40
- version: '0'
41
27
  - !ruby/object:Gem::Dependency
42
28
  name: mechanize
43
29
  requirement: !ruby/object:Gem::Requirement