rbbt-sources 3.1.22 → 3.1.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/rbbt/sources/signor.rb +3 -1
- data/lib/rbbt/sources/uniprot.rb +56 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7e56141cb916b51e2204fac0392eff2d6cd365d1
|
4
|
+
data.tar.gz: 9cc540da01980356fdef07b24158f9cc6eb1640b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d858ee55af7164f64ae8a4c60519edc242a5f26b08892138df7732c02d0acca887ac4eeda20da62e320718e98dba18457d1be1d821096a66344d33376964486d
|
7
|
+
data.tar.gz: c14fb8321b65bfdf1aa93bc490032c84f2d9fb92d98b5d77e816e651fbca4181490292e17e6ba8c6b78c71814707a2aa1a25ff51d97470da5f3fad589386af98
|
data/lib/rbbt/sources/signor.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
require 'rbbt-util'
|
2
2
|
require 'rbbt/resource'
|
3
|
+
require 'rbbt/sources/uniprot'
|
3
4
|
|
4
5
|
module Signor
|
5
6
|
extend Resource
|
@@ -63,7 +64,8 @@ module Signor
|
|
63
64
|
require 'rbbt/sources/organism'
|
64
65
|
|
65
66
|
organism = Organism.default_code("Hsa")
|
66
|
-
uni2name = Organism.identifiers(organism).index :target => "Associated Gene Name", :fields => ["UniProt/SwissProt Accession"], :persist => true
|
67
|
+
#uni2name = Organism.identifiers(organism).index :target => "Associated Gene Name", :fields => ["UniProt/SwissProt Accession"], :persist => true
|
68
|
+
uni2name = UniProt.identifiers.Hsa.index :target => "Associated Gene Name", :fields => ["UniProt/SwissProt Accession"], :persist => true
|
67
69
|
|
68
70
|
parser = TSV::Parser.new Signor.data
|
69
71
|
fields = parser.fields
|
data/lib/rbbt/sources/uniprot.rb
CHANGED
@@ -9,6 +9,62 @@ module UniProt
|
|
9
9
|
extend Resource
|
10
10
|
self.subdir = "share/databases/UniProt"
|
11
11
|
|
12
|
+
def self.get_organism_ids(url, organism = nil)
|
13
|
+
tsv = {}
|
14
|
+
fields = []
|
15
|
+
TSV.traverse url, :type => :array, :bar => "Extracting UniProt IDs #{organism}" do |line|
|
16
|
+
uni, type, value = line.split("\t")
|
17
|
+
fields << type unless fields.include?(type)
|
18
|
+
pos = fields.index type
|
19
|
+
|
20
|
+
values = tsv[uni]
|
21
|
+
values = [] if values.nil?
|
22
|
+
values[pos] ||= []
|
23
|
+
values[pos] << value
|
24
|
+
tsv[uni] = values
|
25
|
+
end
|
26
|
+
fields = fields.collect do |field|
|
27
|
+
case field
|
28
|
+
when "Gene_Name"
|
29
|
+
"Associated Gene Name"
|
30
|
+
when "Ensembl"
|
31
|
+
"Ensembl Gene ID"
|
32
|
+
when "Ensembl_TRS"
|
33
|
+
"Ensembl Transcript ID"
|
34
|
+
when "Ensembl_PRO"
|
35
|
+
"Ensembl Protein ID"
|
36
|
+
else
|
37
|
+
field
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
new = TSV.setup({}, :key_field => "UniProt/SwissProt Accession", :fields => fields, :type => :double, :namespace => organism)
|
42
|
+
num_fields = fields.length
|
43
|
+
tsv.each do |k,values|
|
44
|
+
new_values = [nil] * num_fields
|
45
|
+
new_values = values
|
46
|
+
num_fields.times do |i|
|
47
|
+
new_values[i] = [] if new_values[i].nil?
|
48
|
+
end
|
49
|
+
new[k] = new_values
|
50
|
+
end
|
51
|
+
Log.tsv new
|
52
|
+
new
|
53
|
+
end
|
54
|
+
|
55
|
+
UniProt.claim UniProt.identifiers.Hsa, :proc do
|
56
|
+
url = "ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/HUMAN_9606_idmapping.dat.gz"
|
57
|
+
tsv = UniProt.get_organism_ids(url, "Hsa")
|
58
|
+
tsv.to_s
|
59
|
+
end
|
60
|
+
|
61
|
+
UniProt.claim UniProt.identifiers.Mmu, :proc do
|
62
|
+
url = "ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/MOUSE_10090_idmapping.dat.gz"
|
63
|
+
tsv = UniProt.get_organism_ids(url, "Mmu")
|
64
|
+
tsv.to_a
|
65
|
+
end
|
66
|
+
|
67
|
+
|
12
68
|
UniProt.claim UniProt.annotated_variants, :proc do
|
13
69
|
url = "http://www.uniprot.org/docs/humsavar.txt"
|
14
70
|
tsv = TSV.open(CMD.cmd('tail -n +31 | head -n -4|grep "[[:alpha:]]"', :in => Open.open(url), :pipe => true),
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-sources
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.1.
|
4
|
+
version: 3.1.23
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-12-
|
11
|
+
date: 2017-12-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|