rbbt-sources 3.1.20 → 3.1.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/etc/allowed_biomart_archives +1 -0
- data/lib/rbbt/sources/go.rb +19 -3
- data/lib/rbbt/sources/oreganno.rb +38 -0
- data/lib/rbbt/sources/organism.rb +2 -9
- data/lib/rbbt/sources/signor.rb +41 -14
- data/test/rbbt/sources/test_go.rb +5 -6
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2bdca7fd8c0c9f60055453c7f318a699eae54738
|
4
|
+
data.tar.gz: c0b08856ddd9668b6dbbadd2035653ce24506f04
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4e515516855aa249e758a23da1d1d16915697cf3647bf88c0791b5593b435c2adf4b44006081473d4068a76b55d8b018093cba49070144044c68aa3d9611450c
|
7
|
+
data.tar.gz: eed95d673673f5ec047b37911c44d6876fbd55d7efdc4884863902f6d76bdeda22e5ac5a1982344488ff8fb68808c8233df78eb0baa1e9fa5a2d712485296577
|
data/lib/rbbt/sources/go.rb
CHANGED
@@ -6,8 +6,9 @@ require 'rbbt/persist/tsv'
|
|
6
6
|
# now all it does is provide a translation form id to the actual names.
|
7
7
|
module GO
|
8
8
|
|
9
|
-
Rbbt.claim Rbbt.share.databases.GO.
|
10
|
-
Rbbt.claim Rbbt.share.databases.GO.
|
9
|
+
#Rbbt.claim Rbbt.share.databases.GO.gslim_generic, :url, 'http://www.geneontology.org/GO_slims/goslim_generic.obo'
|
10
|
+
Rbbt.claim Rbbt.share.databases.GO.gene_ontology, :url, 'http://purl.obolibrary.org/obo/go.obo'
|
11
|
+
Rbbt.claim Rbbt.share.databases.GO.annotations, :url, 'http://geneontology.org/gene-associations/goa_human.gaf.gz'
|
11
12
|
|
12
13
|
MULTIPLE_VALUE_FIELDS = %w(is_a)
|
13
14
|
TSV_GENE_ONTOLOGY = File.join(Persist.cachedir, 'gene_ontology')
|
@@ -57,6 +58,21 @@ module GO
|
|
57
58
|
end
|
58
59
|
end
|
59
60
|
|
61
|
+
def self.descendants(id)
|
62
|
+
list = Set.new
|
63
|
+
new = Set.new
|
64
|
+
new << id
|
65
|
+
while new.any?
|
66
|
+
list += new
|
67
|
+
new = Set.new
|
68
|
+
info.each do |new_id,values|
|
69
|
+
next unless values['is_a']
|
70
|
+
new << new_id if values['is_a'].select{|e| list.include? e.split("!").first[/GO:\d+/] }.any? && ! list.include?(new_id)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
list
|
74
|
+
end
|
75
|
+
|
60
76
|
def self.id2ancestors_by_type(id, type='is_a')
|
61
77
|
if id.kind_of? Array
|
62
78
|
info.values_at(*id).
|
@@ -93,7 +109,7 @@ module GO
|
|
93
109
|
end
|
94
110
|
end
|
95
111
|
|
96
|
-
def self.ancestors_in(term, valid)
|
112
|
+
def self.ancestors_in(term, valid = false)
|
97
113
|
ancestors = id2ancestors(term)
|
98
114
|
return ancestors if FalseClass === valid
|
99
115
|
valid_ancestors = ancestors & valid
|
@@ -0,0 +1,38 @@
|
|
1
|
+
require 'rbbt-util'
|
2
|
+
require 'rbbt/resource'
|
3
|
+
|
4
|
+
module Oreganno
|
5
|
+
extend Resource
|
6
|
+
self.subdir = 'share/databases/Oreganno'
|
7
|
+
|
8
|
+
def self.organism(org="Hsa")
|
9
|
+
require 'rbbt/sources/organism'
|
10
|
+
Organism.default_code(org)
|
11
|
+
end
|
12
|
+
|
13
|
+
#self.search_paths = {}
|
14
|
+
#self.search_paths[:default] = :lib
|
15
|
+
|
16
|
+
|
17
|
+
Oreganno.claim Oreganno.data, :proc do
|
18
|
+
url = "http://www.oreganno.org/dump/ORegAnno_Combined_2016.01.19.tsv"
|
19
|
+
TSV.open(url, :header_hash => '', :type => :list).to_s
|
20
|
+
end
|
21
|
+
|
22
|
+
Oreganno.claim Oreganno.tf_tg, :proc do
|
23
|
+
dumper = TSV::Dumper.new :key_field => "Transcription Factor (Associated Gene Name)", :fields => ["Target Gene (Associated Gene Name)"], :type => :flat, :namespace => Oreganno.organism
|
24
|
+
dumper.init
|
25
|
+
TSV.traverse Oreganno.data, :type => :array, :into => dumper, :bar => true do |line|
|
26
|
+
parts = line.split("\t")
|
27
|
+
next unless parts[1] == "Homo sapiens"
|
28
|
+
tf = parts[4]
|
29
|
+
tg = parts[7]
|
30
|
+
next if tf == "N/A" or tg == "N/A"
|
31
|
+
[tf, [tg]]
|
32
|
+
end
|
33
|
+
TSV.collapse_stream dumper
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
iif Oreganno.tf_tg.produce(true).find if __FILE__ == $0
|
38
|
+
|
@@ -79,7 +79,7 @@ module Organism
|
|
79
79
|
elsif release_number <= 75
|
80
80
|
'hg19'
|
81
81
|
else
|
82
|
-
'
|
82
|
+
'hg38'
|
83
83
|
end
|
84
84
|
end
|
85
85
|
|
@@ -88,14 +88,7 @@ module Organism
|
|
88
88
|
source_hg = hg_build(source)
|
89
89
|
target_hg = hg_build(target)
|
90
90
|
|
91
|
-
|
92
|
-
when (source_hg == 'hg19' and target_hg == 'hg18')
|
93
|
-
map_url = "http://hgdownload.cse.ucsc.edu/goldenPath/hg19/liftOver/hg19ToHg18.over.chain.gz"
|
94
|
-
when (source_hg == 'hg18' and target_hg == 'hg19')
|
95
|
-
map_url = "http://hgdownload.cse.ucsc.edu/goldenPath/hg18/liftOver/hg18ToHg19.over.chain.gz"
|
96
|
-
else
|
97
|
-
return positions
|
98
|
-
end
|
91
|
+
map_url = "http://hgdownload.cse.ucsc.edu/goldenPath/#{source_hg}/liftOver/#{source_hg}To#{target_hg.sub('h', 'H')}.over.chain.gz"
|
99
92
|
|
100
93
|
positions_bed = positions.collect{|position|
|
101
94
|
chr, pos = position.split(":").values_at(0,1)
|
data/lib/rbbt/sources/signor.rb
CHANGED
@@ -18,19 +18,6 @@ module Signor
|
|
18
18
|
end
|
19
19
|
|
20
20
|
Signor.claim Signor.data, :proc do
|
21
|
-
#io = Misc.open_pipe do |sin|
|
22
|
-
# Signor[".source/all.csv"].open do |f|
|
23
|
-
# quoted = false
|
24
|
-
# while c = f.getc
|
25
|
-
# if c == '"'
|
26
|
-
# quoted = ! quoted
|
27
|
-
# end
|
28
|
-
# c = " " if c == "\n" and quoted
|
29
|
-
# sin << c
|
30
|
-
# end
|
31
|
-
# end
|
32
|
-
#end
|
33
|
-
|
34
21
|
sio = Signor[".source/all.csv"].open
|
35
22
|
io_tmp = Misc.remove_quoted_new_line(sio)
|
36
23
|
io = Misc.swap_quoted_character(io_tmp, ';', '--SEMICOLON--')
|
@@ -71,7 +58,47 @@ module Signor
|
|
71
58
|
|
72
59
|
Misc.collapse_stream dumper.stream
|
73
60
|
end
|
61
|
+
|
62
|
+
Signor.claim Signor.tf_tg, :proc do
|
63
|
+
require 'rbbt/sources/organism'
|
64
|
+
|
65
|
+
organism = Organism.default_code("Hsa")
|
66
|
+
uni2name = Organism.identifiers(organism).index :target => "Associated Gene Name", :fields => ["UniProt/SwissProt Accession"], :persist => true
|
67
|
+
|
68
|
+
parser = TSV::Parser.new Signor.data
|
69
|
+
fields = parser.fields
|
70
|
+
dumper = TSV::Dumper.new :key_field => "Source (UniProt/SwissProt Accession)", :fields => ["Target (Associated Gene Name)", "Effect", "Sign", "PMID"], :type => :double, :organism => Signor.organism
|
71
|
+
dumper.init
|
72
|
+
TSV.traverse parser, :into => dumper do |k,values|
|
73
|
+
info = {}
|
74
|
+
fields.zip(values).each do |field, value|
|
75
|
+
info[field] = value
|
76
|
+
end
|
77
|
+
next unless info["TYPEA"].first == "protein"
|
78
|
+
unia = info["IDA"].first
|
79
|
+
|
80
|
+
res = []
|
81
|
+
res.extend MultipleResult
|
82
|
+
|
83
|
+
info["TYPEB"].zip(info["IDB"]).zip(info["EFFECT"]).zip(info["MECHANISM"]).zip(info["PMID"]).each do |v|
|
84
|
+
typeb,idb,eff,mech,pmid = v.flatten
|
85
|
+
|
86
|
+
next unless typeb == "protein"
|
87
|
+
next unless mech == "transcriptional regulation"
|
88
|
+
nameb = uni2name[idb]
|
89
|
+
next if nameb.nil?
|
90
|
+
sign = "Unknown"
|
91
|
+
sign = "UP" if eff.include? 'up-regulates'
|
92
|
+
sign = "DOWN" if eff.include? 'down-regulates'
|
93
|
+
res << [unia, [nameb, eff, sign, pmid]]
|
94
|
+
end
|
95
|
+
|
96
|
+
res
|
97
|
+
end
|
98
|
+
|
99
|
+
Misc.collapse_stream dumper.stream
|
100
|
+
end
|
74
101
|
end
|
75
102
|
|
76
|
-
iif Signor.
|
103
|
+
iif Signor.tf_tg.produce(true).find if __FILE__ == $0
|
77
104
|
|
@@ -34,8 +34,6 @@ class TestGo < Test::Unit::TestCase
|
|
34
34
|
def test_ancestors_in
|
35
35
|
term = GOTerm.setup("GO:0005730")
|
36
36
|
valid = %w(GO:0005886 GO:0005634 GO:0005730 GO:0005829)
|
37
|
-
iii term.name
|
38
|
-
iii GO.ancestors_in(term, valid)
|
39
37
|
end
|
40
38
|
|
41
39
|
def test_groups
|
@@ -44,12 +42,13 @@ class TestGo < Test::Unit::TestCase
|
|
44
42
|
|
45
43
|
valid = %w(GO:0005886 GO:0005634 GO:0005730 GO:0005829 )
|
46
44
|
valid = %w(GO:0005634 GO:0005730)
|
47
|
-
|
45
|
+
assert_equal GO.group_genes(list, valid)["GO:0005730"][:name], "nucleolus"
|
46
|
+
assert_equal GO.group_genes(list, valid)["GO:0005730"][:items].sort, %w(FBXW7 SP140)
|
48
47
|
end
|
49
48
|
|
50
|
-
def
|
51
|
-
|
52
|
-
|
49
|
+
def test_descendants
|
50
|
+
assert GO.descendants("GO:0006281").include? "GO:0000012"
|
51
|
+
assert GO.descendants("GO:0006281").include? "GO:1990396"
|
53
52
|
end
|
54
53
|
end
|
55
54
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-sources
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.1.
|
4
|
+
version: 3.1.21
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-12-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|
@@ -114,6 +114,7 @@ files:
|
|
114
114
|
- lib/rbbt/sources/kegg.rb
|
115
115
|
- lib/rbbt/sources/matador.rb
|
116
116
|
- lib/rbbt/sources/oncodrive_role.rb
|
117
|
+
- lib/rbbt/sources/oreganno.rb
|
117
118
|
- lib/rbbt/sources/organism.rb
|
118
119
|
- lib/rbbt/sources/pfam.rb
|
119
120
|
- lib/rbbt/sources/pharmagkb.rb
|