rbbt-sources 3.1.20 → 3.1.21
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/etc/allowed_biomart_archives +1 -0
- data/lib/rbbt/sources/go.rb +19 -3
- data/lib/rbbt/sources/oreganno.rb +38 -0
- data/lib/rbbt/sources/organism.rb +2 -9
- data/lib/rbbt/sources/signor.rb +41 -14
- data/test/rbbt/sources/test_go.rb +5 -6
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2bdca7fd8c0c9f60055453c7f318a699eae54738
|
4
|
+
data.tar.gz: c0b08856ddd9668b6dbbadd2035653ce24506f04
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4e515516855aa249e758a23da1d1d16915697cf3647bf88c0791b5593b435c2adf4b44006081473d4068a76b55d8b018093cba49070144044c68aa3d9611450c
|
7
|
+
data.tar.gz: eed95d673673f5ec047b37911c44d6876fbd55d7efdc4884863902f6d76bdeda22e5ac5a1982344488ff8fb68808c8233df78eb0baa1e9fa5a2d712485296577
|
data/lib/rbbt/sources/go.rb
CHANGED
@@ -6,8 +6,9 @@ require 'rbbt/persist/tsv'
|
|
6
6
|
# now all it does is provide a translation form id to the actual names.
|
7
7
|
module GO
|
8
8
|
|
9
|
-
Rbbt.claim Rbbt.share.databases.GO.
|
10
|
-
Rbbt.claim Rbbt.share.databases.GO.
|
9
|
+
#Rbbt.claim Rbbt.share.databases.GO.gslim_generic, :url, 'http://www.geneontology.org/GO_slims/goslim_generic.obo'
|
10
|
+
Rbbt.claim Rbbt.share.databases.GO.gene_ontology, :url, 'http://purl.obolibrary.org/obo/go.obo'
|
11
|
+
Rbbt.claim Rbbt.share.databases.GO.annotations, :url, 'http://geneontology.org/gene-associations/goa_human.gaf.gz'
|
11
12
|
|
12
13
|
MULTIPLE_VALUE_FIELDS = %w(is_a)
|
13
14
|
TSV_GENE_ONTOLOGY = File.join(Persist.cachedir, 'gene_ontology')
|
@@ -57,6 +58,21 @@ module GO
|
|
57
58
|
end
|
58
59
|
end
|
59
60
|
|
61
|
+
def self.descendants(id)
|
62
|
+
list = Set.new
|
63
|
+
new = Set.new
|
64
|
+
new << id
|
65
|
+
while new.any?
|
66
|
+
list += new
|
67
|
+
new = Set.new
|
68
|
+
info.each do |new_id,values|
|
69
|
+
next unless values['is_a']
|
70
|
+
new << new_id if values['is_a'].select{|e| list.include? e.split("!").first[/GO:\d+/] }.any? && ! list.include?(new_id)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
list
|
74
|
+
end
|
75
|
+
|
60
76
|
def self.id2ancestors_by_type(id, type='is_a')
|
61
77
|
if id.kind_of? Array
|
62
78
|
info.values_at(*id).
|
@@ -93,7 +109,7 @@ module GO
|
|
93
109
|
end
|
94
110
|
end
|
95
111
|
|
96
|
-
def self.ancestors_in(term, valid)
|
112
|
+
def self.ancestors_in(term, valid = false)
|
97
113
|
ancestors = id2ancestors(term)
|
98
114
|
return ancestors if FalseClass === valid
|
99
115
|
valid_ancestors = ancestors & valid
|
@@ -0,0 +1,38 @@
|
|
1
|
+
require 'rbbt-util'
|
2
|
+
require 'rbbt/resource'
|
3
|
+
|
4
|
+
module Oreganno
|
5
|
+
extend Resource
|
6
|
+
self.subdir = 'share/databases/Oreganno'
|
7
|
+
|
8
|
+
def self.organism(org="Hsa")
|
9
|
+
require 'rbbt/sources/organism'
|
10
|
+
Organism.default_code(org)
|
11
|
+
end
|
12
|
+
|
13
|
+
#self.search_paths = {}
|
14
|
+
#self.search_paths[:default] = :lib
|
15
|
+
|
16
|
+
|
17
|
+
Oreganno.claim Oreganno.data, :proc do
|
18
|
+
url = "http://www.oreganno.org/dump/ORegAnno_Combined_2016.01.19.tsv"
|
19
|
+
TSV.open(url, :header_hash => '', :type => :list).to_s
|
20
|
+
end
|
21
|
+
|
22
|
+
Oreganno.claim Oreganno.tf_tg, :proc do
|
23
|
+
dumper = TSV::Dumper.new :key_field => "Transcription Factor (Associated Gene Name)", :fields => ["Target Gene (Associated Gene Name)"], :type => :flat, :namespace => Oreganno.organism
|
24
|
+
dumper.init
|
25
|
+
TSV.traverse Oreganno.data, :type => :array, :into => dumper, :bar => true do |line|
|
26
|
+
parts = line.split("\t")
|
27
|
+
next unless parts[1] == "Homo sapiens"
|
28
|
+
tf = parts[4]
|
29
|
+
tg = parts[7]
|
30
|
+
next if tf == "N/A" or tg == "N/A"
|
31
|
+
[tf, [tg]]
|
32
|
+
end
|
33
|
+
TSV.collapse_stream dumper
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
iif Oreganno.tf_tg.produce(true).find if __FILE__ == $0
|
38
|
+
|
@@ -79,7 +79,7 @@ module Organism
|
|
79
79
|
elsif release_number <= 75
|
80
80
|
'hg19'
|
81
81
|
else
|
82
|
-
'
|
82
|
+
'hg38'
|
83
83
|
end
|
84
84
|
end
|
85
85
|
|
@@ -88,14 +88,7 @@ module Organism
|
|
88
88
|
source_hg = hg_build(source)
|
89
89
|
target_hg = hg_build(target)
|
90
90
|
|
91
|
-
|
92
|
-
when (source_hg == 'hg19' and target_hg == 'hg18')
|
93
|
-
map_url = "http://hgdownload.cse.ucsc.edu/goldenPath/hg19/liftOver/hg19ToHg18.over.chain.gz"
|
94
|
-
when (source_hg == 'hg18' and target_hg == 'hg19')
|
95
|
-
map_url = "http://hgdownload.cse.ucsc.edu/goldenPath/hg18/liftOver/hg18ToHg19.over.chain.gz"
|
96
|
-
else
|
97
|
-
return positions
|
98
|
-
end
|
91
|
+
map_url = "http://hgdownload.cse.ucsc.edu/goldenPath/#{source_hg}/liftOver/#{source_hg}To#{target_hg.sub('h', 'H')}.over.chain.gz"
|
99
92
|
|
100
93
|
positions_bed = positions.collect{|position|
|
101
94
|
chr, pos = position.split(":").values_at(0,1)
|
data/lib/rbbt/sources/signor.rb
CHANGED
@@ -18,19 +18,6 @@ module Signor
|
|
18
18
|
end
|
19
19
|
|
20
20
|
Signor.claim Signor.data, :proc do
|
21
|
-
#io = Misc.open_pipe do |sin|
|
22
|
-
# Signor[".source/all.csv"].open do |f|
|
23
|
-
# quoted = false
|
24
|
-
# while c = f.getc
|
25
|
-
# if c == '"'
|
26
|
-
# quoted = ! quoted
|
27
|
-
# end
|
28
|
-
# c = " " if c == "\n" and quoted
|
29
|
-
# sin << c
|
30
|
-
# end
|
31
|
-
# end
|
32
|
-
#end
|
33
|
-
|
34
21
|
sio = Signor[".source/all.csv"].open
|
35
22
|
io_tmp = Misc.remove_quoted_new_line(sio)
|
36
23
|
io = Misc.swap_quoted_character(io_tmp, ';', '--SEMICOLON--')
|
@@ -71,7 +58,47 @@ module Signor
|
|
71
58
|
|
72
59
|
Misc.collapse_stream dumper.stream
|
73
60
|
end
|
61
|
+
|
62
|
+
Signor.claim Signor.tf_tg, :proc do
|
63
|
+
require 'rbbt/sources/organism'
|
64
|
+
|
65
|
+
organism = Organism.default_code("Hsa")
|
66
|
+
uni2name = Organism.identifiers(organism).index :target => "Associated Gene Name", :fields => ["UniProt/SwissProt Accession"], :persist => true
|
67
|
+
|
68
|
+
parser = TSV::Parser.new Signor.data
|
69
|
+
fields = parser.fields
|
70
|
+
dumper = TSV::Dumper.new :key_field => "Source (UniProt/SwissProt Accession)", :fields => ["Target (Associated Gene Name)", "Effect", "Sign", "PMID"], :type => :double, :organism => Signor.organism
|
71
|
+
dumper.init
|
72
|
+
TSV.traverse parser, :into => dumper do |k,values|
|
73
|
+
info = {}
|
74
|
+
fields.zip(values).each do |field, value|
|
75
|
+
info[field] = value
|
76
|
+
end
|
77
|
+
next unless info["TYPEA"].first == "protein"
|
78
|
+
unia = info["IDA"].first
|
79
|
+
|
80
|
+
res = []
|
81
|
+
res.extend MultipleResult
|
82
|
+
|
83
|
+
info["TYPEB"].zip(info["IDB"]).zip(info["EFFECT"]).zip(info["MECHANISM"]).zip(info["PMID"]).each do |v|
|
84
|
+
typeb,idb,eff,mech,pmid = v.flatten
|
85
|
+
|
86
|
+
next unless typeb == "protein"
|
87
|
+
next unless mech == "transcriptional regulation"
|
88
|
+
nameb = uni2name[idb]
|
89
|
+
next if nameb.nil?
|
90
|
+
sign = "Unknown"
|
91
|
+
sign = "UP" if eff.include? 'up-regulates'
|
92
|
+
sign = "DOWN" if eff.include? 'down-regulates'
|
93
|
+
res << [unia, [nameb, eff, sign, pmid]]
|
94
|
+
end
|
95
|
+
|
96
|
+
res
|
97
|
+
end
|
98
|
+
|
99
|
+
Misc.collapse_stream dumper.stream
|
100
|
+
end
|
74
101
|
end
|
75
102
|
|
76
|
-
iif Signor.
|
103
|
+
iif Signor.tf_tg.produce(true).find if __FILE__ == $0
|
77
104
|
|
@@ -34,8 +34,6 @@ class TestGo < Test::Unit::TestCase
|
|
34
34
|
def test_ancestors_in
|
35
35
|
term = GOTerm.setup("GO:0005730")
|
36
36
|
valid = %w(GO:0005886 GO:0005634 GO:0005730 GO:0005829)
|
37
|
-
iii term.name
|
38
|
-
iii GO.ancestors_in(term, valid)
|
39
37
|
end
|
40
38
|
|
41
39
|
def test_groups
|
@@ -44,12 +42,13 @@ class TestGo < Test::Unit::TestCase
|
|
44
42
|
|
45
43
|
valid = %w(GO:0005886 GO:0005634 GO:0005730 GO:0005829 )
|
46
44
|
valid = %w(GO:0005634 GO:0005730)
|
47
|
-
|
45
|
+
assert_equal GO.group_genes(list, valid)["GO:0005730"][:name], "nucleolus"
|
46
|
+
assert_equal GO.group_genes(list, valid)["GO:0005730"][:items].sort, %w(FBXW7 SP140)
|
48
47
|
end
|
49
48
|
|
50
|
-
def
|
51
|
-
|
52
|
-
|
49
|
+
def test_descendants
|
50
|
+
assert GO.descendants("GO:0006281").include? "GO:0000012"
|
51
|
+
assert GO.descendants("GO:0006281").include? "GO:1990396"
|
53
52
|
end
|
54
53
|
end
|
55
54
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-sources
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.1.
|
4
|
+
version: 3.1.21
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-12-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|
@@ -114,6 +114,7 @@ files:
|
|
114
114
|
- lib/rbbt/sources/kegg.rb
|
115
115
|
- lib/rbbt/sources/matador.rb
|
116
116
|
- lib/rbbt/sources/oncodrive_role.rb
|
117
|
+
- lib/rbbt/sources/oreganno.rb
|
117
118
|
- lib/rbbt/sources/organism.rb
|
118
119
|
- lib/rbbt/sources/pfam.rb
|
119
120
|
- lib/rbbt/sources/pharmagkb.rb
|