rbbt-sources 3.1.42 → 3.1.48

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d58670542b15951452edfc4607511ae6d3a4e2795bd13a1b207489c75cb869c9
4
- data.tar.gz: b53a205a5e9b0b857a39f9a3dfa0a7da9db88c8ed3f734af9b0310320504993d
3
+ metadata.gz: 92bb6e2852266c73666e0b07221484822441d19603b2ce9d4fe9ceb6faa44d37
4
+ data.tar.gz: 9eda4d907bb34ae5b5b95dbdb972b6c659111b042c5b68661d41cf3ccd93e3b8
5
5
  SHA512:
6
- metadata.gz: c84dfab9e1f914b0ecfaede54839ee90644f6bbb9777044ede0cc0358f18e0e78ff3e757583049e699ba6ff7700c2dfc74816998c74a78ac0a82580a042e6f29
7
- data.tar.gz: bb649a7250d617e9fef26068ed1c7624828ff2c4a2a0e8696a621e7b6b821918d9ddcff1ccae4729c8e26a1fe79f6d9723936f85519e57b677ff405b30e3f5ea
6
+ metadata.gz: 4ac47f700cfc54144bfbc2dc7b3eeaf2e9dce885ccfc1e47c26d978a79dd02d99802a83a888bb1bd80de96ecf9bf023ae548041f3b1867308228c319777dabd4
7
+ data.tar.gz: 05b07ac77a194f0ad87a36b050eb97d2285490974d0bca039c2163d762c11461f4ccf6c4585df62b24298524bb9f02e4d1d65dae51418e491644b4725fbd864f
@@ -1,3 +1,7 @@
1
+ ">oct2014":
2
+ - rgd~rgd_id
3
+ ">jun2019":
4
+ - entrezgene~entrezgene_id
1
5
  ">dec2017":
2
6
  - unigene
3
7
  ">dec2016":
@@ -134,11 +134,14 @@ module BioMart
134
134
  # cause an error if the BioMart WS does not allow filtering with that
135
135
  # attribute.
136
136
  def self.query(database, main, attrs = nil, filters = nil, data = nil, open_options = {})
137
+ IndiferentHash.setup(open_options)
137
138
  open_options = Misc.add_defaults open_options, :nocache => false, :filename => nil, :field_names => nil, :by_chr => false
138
139
  filename, field_names, by_chr = Misc.process_options open_options, :filename, :field_names, :by_chr
139
140
  attrs ||= []
140
141
  open_options = Misc.add_defaults open_options, :keep_empty => false, :merge => true
141
142
 
143
+ IndiferentHash.setup(open_options)
144
+
142
145
  Log.low "BioMart query: '#{main}' [#{(attrs || []) * ', '}] [#{(filters || []) * ', '}] #{open_options.inspect}"
143
146
 
144
147
  max_items = 2
@@ -167,6 +170,7 @@ module BioMart
167
170
  end
168
171
 
169
172
  open_options[:filename] = "BioMart[#{main}+#{attrs.length}]"
173
+
170
174
  if filename.nil?
171
175
  results = TSV.open data, open_options
172
176
  results.key_field = main
@@ -52,24 +52,35 @@ module Ensembl
52
52
  File.join("ftp://" + SERVER, ftp_directory_for(organism) )
53
53
  end
54
54
 
55
- def self.url_for(organism, table)
56
- "#{base_url(organism)}/#{table}.txt.gz.bz2"
55
+ def self.url_for(organism, table, extension)
56
+ File.join(base_url(organism), table) + ".#{extension}.gz"
57
+ end
58
+
59
+ def self._get_gz(url)
60
+ begin
61
+ CMD.cmd("wget '#{url}' -O - | gunzip").read
62
+ rescue
63
+ CMD.cmd("wget '#{url}.bz2' -O - | bunzip2 | gunzip").read
64
+ end
65
+ end
66
+
67
+ def self._get_file(organism, table, extension)
68
+ url = url_for(organism, table, extension)
69
+ self._get_gz(url)
57
70
  end
58
71
 
59
72
  def self.has_table?(organism, table)
60
- sql_file = CMD.cmd("wget '#{base_url(organism)}/#{File.basename(base_url(organism))}.sql.gz.bz2' -O -| bunzip2| gunzip").read
73
+ sql_file = _get_file(organism, File.basename(base_url(organism)), 'sql')
61
74
  ! sql_file.match(/^CREATE TABLE .#{table}. \((.*?)^\)/sm).nil?
62
75
  end
63
76
 
64
77
  def self.fields_for(organism, table)
65
- sql_file = CMD.cmd("wget '#{base_url(organism)}/#{File.basename(base_url(organism))}.sql.gz.bz2' -O -| bunzip2| gunzip").read
66
-
78
+ sql_file = _get_file(organism, File.basename(base_url(organism)), 'sql')
67
79
  chunk = sql_file.match(/^CREATE TABLE .#{table}. \((.*?)^\)/sm)[1]
68
80
  chunk.scan(/^\s+`(.*?)`/).flatten
69
81
  end
70
82
 
71
83
  def self.ensembl_tsv(organism, table, key_field = nil, fields = nil, options = {})
72
- url = url_for(organism, table)
73
84
  if key_field and fields
74
85
  all_fields = fields_for(organism, table)
75
86
  key_pos = all_fields.index key_field
@@ -78,7 +89,8 @@ module Ensembl
78
89
  options[:key_field] = key_pos
79
90
  options[:fields] = field_pos
80
91
  end
81
- tsv = TSV.open(CMD.cmd("wget '#{url}' -O - |bunzip2|gunzip", :pipe => true), options)
92
+
93
+ tsv = TSV.open(StringIO.new(_get_file(organism, table, "txt")), options)
82
94
  tsv.key_field = key_field
83
95
  tsv.fields = fields
84
96
  tsv
@@ -14,7 +14,7 @@ module Entrez
14
14
  options = Misc.add_defaults options, :key_field => 1, :fields => [5], :persist => true, :merge => true
15
15
 
16
16
  taxs = [taxs] unless Array === taxs
17
- options.merge! :grep => taxs.collect{|t| "^" + t.to_s}
17
+ options.merge! :grep => taxs.collect{|t| "^" + t.to_s}, :fixed_grep => false
18
18
 
19
19
  tsv = Rbbt.share.databases.entrez.gene_info.tsv :flat, options
20
20
  tsv.key_field = "Entrez Gene ID"
@@ -26,7 +26,7 @@ module Entrez
26
26
  options = Misc.add_defaults options, :key_field => 1, :fields => [2], :persist => true, :merge => true
27
27
 
28
28
  taxs = [taxs] unless Array === taxs
29
- options.merge! :grep => taxs.collect{|t| "^" + t.to_s}
29
+ options.merge! :grep => taxs.collect{|t| "^" + t.to_s}, :fixed_grep => false
30
30
 
31
31
  tsv = Rbbt.share.databases.entrez.gene_info.tsv :flat, options
32
32
  tsv.key_field = "Entrez Gene ID"
@@ -39,7 +39,7 @@ module Entrez
39
39
  options = {:key_field => 1, :fields => [2], :persist => true, :merge => true}
40
40
 
41
41
  taxs = [taxs] unless taxs.is_a?(Array)
42
- options.merge! :grep => taxs.collect{|t| "^" + t.to_s}
42
+ options.merge! :grep => taxs.collect{|t| "^" + t.to_s}, :fixed_grep => false
43
43
 
44
44
  Rbbt.share.databases.entrez.gene2pubmed.tsv :flat, options
45
45
  end
@@ -27,7 +27,7 @@ module GO
27
27
  def self.init
28
28
  Persist.persist_tsv(nil, 'gene_ontology', {}, :persist => true) do |info|
29
29
  info.serializer = :marshal if info.respond_to? :serializer
30
- Rbbt.share.databases.GO.gene_ontology.read.split(/\[Term\]/).each{|term|
30
+ Rbbt.share.databases.GO.gene_ontology.produce.read.split(/\[Term\]/).each{|term|
31
31
  term_info = {}
32
32
 
33
33
  term.split(/\n/). select{|l| l =~ /:/}.each{|l|
@@ -7,7 +7,7 @@ module Pina
7
7
  Pina.claim Pina.protein_protein, :proc do
8
8
  require 'rbbt/sources/organism'
9
9
 
10
- url = "http://cbg.garvan.unsw.edu.au/pina/download/Homo%20sapiens-20121210.txt"
10
+ url = "https://omics.bjcancer.org/pina/download/Homo%20sapiens-20140521.tsv"
11
11
 
12
12
  dumper = TSV::Dumper.new :type => :double,
13
13
  :key_field => 'UniProt/SwissProt Accession', :namespace => Organism.default_code("Hsa"),
@@ -109,6 +109,14 @@ module PubMed
109
109
  end
110
110
  end
111
111
 
112
+ def pmc_full_xml
113
+ begin
114
+ Open.read("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pmc&id=#{pmid}")
115
+ rescue
116
+ nil
117
+ end
118
+ end
119
+
112
120
  def pdf_url
113
121
  return pmc_pdf if pmc_pdf
114
122
  @gscholar_pdf ||= begin
@@ -121,18 +129,22 @@ module PubMed
121
129
  end
122
130
 
123
131
  def full_text
124
- return nil if pdf_url.nil?
125
-
126
- text = nil
127
- TmpFile.with_file do |pdf|
128
-
129
- # Change user-agent, oh well...
130
- `wget --user-agent=firefox #{ pdf_url } -O #{ pdf } -t 3`
131
- TmpFile.with_file do |txt|
132
- `pdftotext #{ pdf } #{ txt }`
133
- text = Open.read(txt) if File.exists? txt
134
- end
135
- end
132
+ text = if pdf_url
133
+ text = nil
134
+ TmpFile.with_file do |pdf|
135
+ # Change user-agent, oh well...
136
+ `wget --user-agent=firefox #{ pdf_url } -O #{ pdf } -t 3`
137
+ TmpFile.with_file do |txt|
138
+ `pdftotext #{ pdf } #{ txt }`
139
+ text = Open.read(txt) if File.exists? txt
140
+ end
141
+ end
142
+ text
143
+ elsif pmc_full_xml
144
+ pmc_full_xml
145
+ else
146
+ nil
147
+ end
136
148
 
137
149
  Misc.fixutf8(text)
138
150
  end
@@ -2,15 +2,15 @@ require 'rbbt'
2
2
  require 'rbbt/tsv'
3
3
  require 'rbbt/resource'
4
4
 
5
- module TFacts
5
+ module TFactS
6
6
  extend Resource
7
- self.subdir = "share/databases/TFacts"
7
+ self.subdir = "share/databases/TFactS"
8
8
 
9
- TFacts.claim TFacts[".source"]["Catalogues.xls"], :url, "http://www.tfacts.org/TFactS-new/TFactS-v2/tfacts/data/Catalogues.xls"
9
+ TFactS.claim TFactS[".source"]["Catalogues.xls"], :url, "http://www.tfacts.org/TFactS-new/TFactS-v2/tfacts/data/Catalogues.xls"
10
10
 
11
- TFacts.claim TFacts.targets, :proc do
11
+ TFactS.claim TFactS.targets, :proc do
12
12
  require 'spreadsheet'
13
- book = Spreadsheet.open TFacts[".source"]["Catalogues.xls"].produce.find
13
+ book = Spreadsheet.open TFactS[".source"]["Catalogues.xls"].produce.find
14
14
  sheet = book.worksheet 0
15
15
 
16
16
  tsv = TSV.setup({}, :key_field => "Target Gene (Associated Gene Name)", :fields => ["Transcription Factor (Associated Gene Name)"], :namespace => "Hsa", :type => :flat)
@@ -24,9 +24,9 @@ module TFacts
24
24
  tsv.to_s
25
25
  end
26
26
 
27
- TFacts.claim TFacts.targets_signed, :proc do
27
+ TFactS.claim TFactS.targets_signed, :proc do
28
28
  require 'spreadsheet'
29
- book = Spreadsheet.open TFacts[".source"]["Catalogues.xls"].produce.find
29
+ book = Spreadsheet.open TFactS[".source"]["Catalogues.xls"].produce.find
30
30
  sheet = book.worksheet 1
31
31
 
32
32
  tsv = TSV.setup({}, :key_field => "Target Gene (Associated Gene Name)", :fields => ["Transcription Factor (Associated Gene Name)", "Sign", "PMID"], :namespace => "Hsa", :type => :double)
@@ -43,13 +43,13 @@ module TFacts
43
43
  tsv.to_s
44
44
  end
45
45
 
46
- TFacts.claim TFacts.regulators, :proc do
47
- TFacts.targets.tsv.reorder("Transcription Factor (Associated Gene Name)").to_s
46
+ TFactS.claim TFactS.regulators, :proc do
47
+ TFactS.targets.tsv.reorder("Transcription Factor (Associated Gene Name)").to_s
48
48
  end
49
49
 
50
- TFacts.claim TFacts.tf_tg, :proc do
50
+ TFactS.claim TFactS.tf_tg, :proc do
51
51
  require 'spreadsheet'
52
- book = Spreadsheet.open TFacts[".source"]["Catalogues.xls"].produce.find
52
+ book = Spreadsheet.open TFactS[".source"]["Catalogues.xls"].produce.find
53
53
 
54
54
  tsv = TSV.setup({}, :key_field => "Transcription Factor (Associated Gene Name)", :fields => ["Target Gene (Associated Gene Name)", "Sign", "Species", "Source", "PMID"], :namespace => "Hsa", :type => :double)
55
55
 
@@ -128,16 +128,16 @@ if defined? Entity and defined? Gene and Entity === Gene
128
128
 
129
129
  module Gene
130
130
  property :is_transcription_factor? => :array2single do
131
- tfs = TFacts.targets.keys
131
+ tfs = TFactS.targets.keys
132
132
  self.name.collect{|gene| tfs.include? gene}
133
133
  end
134
134
 
135
135
  property :transcription_regulators => :array2single do
136
- Gene.setup(TFacts.regulators.tsv(:persist => true).values_at(*self.name), "Associated Gene Name", self.organism)
136
+ Gene.setup(TFactS.regulators.tsv(:persist => true).values_at(*self.name), "Associated Gene Name", self.organism)
137
137
  end
138
138
 
139
139
  property :transcription_targets => :array2single do
140
- Gene.setup(TFacts.targets.tsv(:persist => true).values_at(*self.name), "Associated Gene Name", self.organism)
140
+ Gene.setup(TFactS.targets.tsv(:persist => true).values_at(*self.name), "Associated Gene Name", self.organism)
141
141
  end
142
142
  end
143
143
  end
@@ -5,7 +5,7 @@ require File.join(File.dirname(__FILE__), '../../lib/helpers')
5
5
 
6
6
  $taxs = [9606]
7
7
  $scientific_name = "Homo sapiens"
8
- $ortholog_key = "human_ensembl_gene"
8
+ $ortholog_key = "hsapiens_homolog_ensembl_gene"
9
9
 
10
10
  $biomart_db = 'hsapiens_gene_ensembl'
11
11
  $biomart_db_germline_variation = 'hsapiens_snp'
@@ -5,7 +5,7 @@ require File.join(File.dirname(__FILE__), '../../lib/helpers')
5
5
 
6
6
  $taxs = [10090]
7
7
  $scientific_name = "Mus musculus"
8
- $ortholog_key = "mouse_ensembl_gene"
8
+ $ortholog_key = "mmusculus_homolog_ensembl_gene"
9
9
 
10
10
  $biomart_db = 'mmusculus_gene_ensembl'
11
11
  $biomart_db_germline_variation = 'mmusculus_snp'
@@ -9,7 +9,7 @@ $scientific_name = "Rattus norvegicus"
9
9
  $biomart_db = 'rnorvegicus_gene_ensembl'
10
10
  $biomart_db_germline_variation = 'rnorvegicus_snp'
11
11
  $biomart_db_somatic_variation = 'rnorvegicus_snp_som'
12
- $ortholog_key = "rat_ensembl_gene"
12
+ $ortholog_key = "rnorvegicus_homolog_ensembl_gene"
13
13
 
14
14
  $biomart_lexicon = [
15
15
  [ 'Associated Gene Name' , "external_gene_id"],
@@ -547,13 +547,13 @@ end
547
547
  rule /^possible_ortholog_(.*)/ do |t|
548
548
  other = t.name.match(/ortholog_(.*)/)[1]
549
549
  other_key = Organism.ortholog_key(other).produce.read
550
- BioMart.tsv($biomart_db, $biomart_ensembl_gene, [["Ortholog Ensembl Gene ID", "inter_paralog_" + other_key]], [], nil, :keep_empty => false, :type => :flat, :filename => t.name, :namespace => Thread.current['namespace'])
550
+ BioMart.tsv($biomart_db, $biomart_ensembl_gene, [["Ortholog Ensembl Gene ID", "inter_paralog_" + other_key]], [], nil, :keep_empty => false, :type => :double, :filename => t.name, :namespace => Thread.current['namespace'])
551
551
  end
552
552
 
553
553
  rule /^ortholog_(.*)/ do |t|
554
554
  other = t.name.match(/ortholog_(.*)/)[1]
555
555
  other_key = Organism.ortholog_key(other).produce.read
556
- BioMart.tsv($biomart_db, $biomart_ensembl_gene, [["Ortholog Ensembl Gene ID", other_key]], [], nil, :keep_empty => false, :type => :flat, :filename => t.name, :namespace => Thread.current['namespace'])
556
+ BioMart.tsv($biomart_db, $biomart_ensembl_gene, [["Ortholog Ensembl Gene ID", other_key]], [], nil, :keep_empty => false, :type => :double, :filename => t.name, :namespace => Thread.current['namespace'])
557
557
  end
558
558
 
559
559
  rule /[a-z]{3}[0-9]{4}\/.*/i do |t|
@@ -728,13 +728,18 @@ file 'protein_sequence' => ["transcripts", "transcript_5utr", "transcript_3utr",
728
728
  transcript_sequence.through do |transcript, sequence|
729
729
  protein = transcript_protein[transcript]
730
730
  next if protein.nil? or protein.empty?
731
+
731
732
  utr5 = transcript_5utr[transcript]
732
733
  utr3 = transcript_3utr[transcript]
733
734
  phase = transcript_phase[transcript] || 0
735
+
734
736
  if phase < 0
735
- utr5 = - phase if utr5 == 0
737
+ if utr5.nil? || utr5 == 0 || utr5 == "0"
738
+ utr5 = 0
739
+ end
736
740
  phase = 0
737
741
  end
742
+
738
743
  psequence = Bio::Sequence::NA.new(("N" * phase) << sequence[utr5..sequence.length-utr3-1]).translate
739
744
  protein_sequence[protein]=psequence
740
745
  end
@@ -17,7 +17,10 @@ class TestEntrez < Test::Unit::TestCase
17
17
  def test_entrez2pubmed
18
18
  tax = $yeast_tax
19
19
 
20
+ Log.severity = 0
20
21
  data = Entrez.entrez2pubmed(tax)
22
+ data.read
23
+ Log.tsv data
21
24
  assert(data['850320'].include? '1574125')
22
25
  end
23
26
 
@@ -17,6 +17,12 @@ class TestPubMed < Test::Unit::TestCase
17
17
  pmid = '16438716'
18
18
  assert(PubMed.get_article(pmid).full_text =~ /Discovering/)
19
19
  end
20
+
21
+ def test_pmc_full_xml
22
+ pmid = '4304705'
23
+ assert PubMed.get_article(pmid).pmc_full_xml.include?("HBV antigen")
24
+ end
25
+
20
26
 
21
27
  def test_query
22
28
  assert(PubMed.query('chagoyen[All Fields] AND ("loattrfull text"[sb] AND hasabstract[text])').include? '16438716')
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-sources
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.1.42
4
+ version: 3.1.48
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-01-31 00:00:00.000000000 Z
11
+ date: 2020-08-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util