rbbt-sources 3.1.20 → 3.1.21

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e0d1135b2d31c8801bd915bd416feafec1443966
4
- data.tar.gz: 272bc72050bc2f739ca9460c10e405b45cfdca2d
3
+ metadata.gz: 2bdca7fd8c0c9f60055453c7f318a699eae54738
4
+ data.tar.gz: c0b08856ddd9668b6dbbadd2035653ce24506f04
5
5
  SHA512:
6
- metadata.gz: 9a6e516f2d0188889fedbc4dd6f0e4d88b1c5efe1b0a01c8182e9489387ce92bea658b89d02f22e2e2a326c8529336917693fe5023f8228b4e24e83fbad71b57
7
- data.tar.gz: 8b2bf37685804053dd25fa3de734ab62e886199253290d0dff75c1aa2ecdb24c0f62925a5102191c98e3e8b1f0025f593a02b919c17221f454a958ab607c241f
6
+ metadata.gz: 4e515516855aa249e758a23da1d1d16915697cf3647bf88c0791b5593b435c2adf4b44006081473d4068a76b55d8b018093cba49070144044c68aa3d9611450c
7
+ data.tar.gz: eed95d673673f5ec047b37911c44d6876fbd55d7efdc4884863902f6d76bdeda22e5ac5a1982344488ff8fb68808c8233df78eb0baa1e9fa5a2d712485296577
@@ -5,3 +5,4 @@ feb2014
5
5
  dec2014
6
6
  dec2015
7
7
  oct2016
8
+ may2017
@@ -6,8 +6,9 @@ require 'rbbt/persist/tsv'
6
6
  # now all it does is provide a translation form id to the actual names.
7
7
  module GO
8
8
 
9
- Rbbt.claim Rbbt.share.databases.GO.gene_ontology, :url, 'ftp://ftp.geneontology.org/pub/go/ontology/gene_ontology.obo'
10
- Rbbt.claim Rbbt.share.databases.GO.gslim_generic, :url, 'http://www.geneontology.org/GO_slims/goslim_generic.obo'
9
+ #Rbbt.claim Rbbt.share.databases.GO.gslim_generic, :url, 'http://www.geneontology.org/GO_slims/goslim_generic.obo'
10
+ Rbbt.claim Rbbt.share.databases.GO.gene_ontology, :url, 'http://purl.obolibrary.org/obo/go.obo'
11
+ Rbbt.claim Rbbt.share.databases.GO.annotations, :url, 'http://geneontology.org/gene-associations/goa_human.gaf.gz'
11
12
 
12
13
  MULTIPLE_VALUE_FIELDS = %w(is_a)
13
14
  TSV_GENE_ONTOLOGY = File.join(Persist.cachedir, 'gene_ontology')
@@ -57,6 +58,21 @@ module GO
57
58
  end
58
59
  end
59
60
 
61
+ def self.descendants(id)
62
+ list = Set.new
63
+ new = Set.new
64
+ new << id
65
+ while new.any?
66
+ list += new
67
+ new = Set.new
68
+ info.each do |new_id,values|
69
+ next unless values['is_a']
70
+ new << new_id if values['is_a'].select{|e| list.include? e.split("!").first[/GO:\d+/] }.any? && ! list.include?(new_id)
71
+ end
72
+ end
73
+ list
74
+ end
75
+
60
76
  def self.id2ancestors_by_type(id, type='is_a')
61
77
  if id.kind_of? Array
62
78
  info.values_at(*id).
@@ -93,7 +109,7 @@ module GO
93
109
  end
94
110
  end
95
111
 
96
- def self.ancestors_in(term, valid)
112
+ def self.ancestors_in(term, valid = false)
97
113
  ancestors = id2ancestors(term)
98
114
  return ancestors if FalseClass === valid
99
115
  valid_ancestors = ancestors & valid
@@ -0,0 +1,38 @@
1
+ require 'rbbt-util'
2
+ require 'rbbt/resource'
3
+
4
+ module Oreganno
5
+ extend Resource
6
+ self.subdir = 'share/databases/Oreganno'
7
+
8
+ def self.organism(org="Hsa")
9
+ require 'rbbt/sources/organism'
10
+ Organism.default_code(org)
11
+ end
12
+
13
+ #self.search_paths = {}
14
+ #self.search_paths[:default] = :lib
15
+
16
+
17
+ Oreganno.claim Oreganno.data, :proc do
18
+ url = "http://www.oreganno.org/dump/ORegAnno_Combined_2016.01.19.tsv"
19
+ TSV.open(url, :header_hash => '', :type => :list).to_s
20
+ end
21
+
22
+ Oreganno.claim Oreganno.tf_tg, :proc do
23
+ dumper = TSV::Dumper.new :key_field => "Transcription Factor (Associated Gene Name)", :fields => ["Target Gene (Associated Gene Name)"], :type => :flat, :namespace => Oreganno.organism
24
+ dumper.init
25
+ TSV.traverse Oreganno.data, :type => :array, :into => dumper, :bar => true do |line|
26
+ parts = line.split("\t")
27
+ next unless parts[1] == "Homo sapiens"
28
+ tf = parts[4]
29
+ tg = parts[7]
30
+ next if tf == "N/A" or tg == "N/A"
31
+ [tf, [tg]]
32
+ end
33
+ TSV.collapse_stream dumper
34
+ end
35
+ end
36
+
37
+ iif Oreganno.tf_tg.produce(true).find if __FILE__ == $0
38
+
@@ -79,7 +79,7 @@ module Organism
79
79
  elsif release_number <= 75
80
80
  'hg19'
81
81
  else
82
- 'hg20'
82
+ 'hg38'
83
83
  end
84
84
  end
85
85
 
@@ -88,14 +88,7 @@ module Organism
88
88
  source_hg = hg_build(source)
89
89
  target_hg = hg_build(target)
90
90
 
91
- case
92
- when (source_hg == 'hg19' and target_hg == 'hg18')
93
- map_url = "http://hgdownload.cse.ucsc.edu/goldenPath/hg19/liftOver/hg19ToHg18.over.chain.gz"
94
- when (source_hg == 'hg18' and target_hg == 'hg19')
95
- map_url = "http://hgdownload.cse.ucsc.edu/goldenPath/hg18/liftOver/hg18ToHg19.over.chain.gz"
96
- else
97
- return positions
98
- end
91
+ map_url = "http://hgdownload.cse.ucsc.edu/goldenPath/#{source_hg}/liftOver/#{source_hg}To#{target_hg.sub('h', 'H')}.over.chain.gz"
99
92
 
100
93
  positions_bed = positions.collect{|position|
101
94
  chr, pos = position.split(":").values_at(0,1)
@@ -18,19 +18,6 @@ module Signor
18
18
  end
19
19
 
20
20
  Signor.claim Signor.data, :proc do
21
- #io = Misc.open_pipe do |sin|
22
- # Signor[".source/all.csv"].open do |f|
23
- # quoted = false
24
- # while c = f.getc
25
- # if c == '"'
26
- # quoted = ! quoted
27
- # end
28
- # c = " " if c == "\n" and quoted
29
- # sin << c
30
- # end
31
- # end
32
- #end
33
-
34
21
  sio = Signor[".source/all.csv"].open
35
22
  io_tmp = Misc.remove_quoted_new_line(sio)
36
23
  io = Misc.swap_quoted_character(io_tmp, ';', '--SEMICOLON--')
@@ -71,7 +58,47 @@ module Signor
71
58
 
72
59
  Misc.collapse_stream dumper.stream
73
60
  end
61
+
62
+ Signor.claim Signor.tf_tg, :proc do
63
+ require 'rbbt/sources/organism'
64
+
65
+ organism = Organism.default_code("Hsa")
66
+ uni2name = Organism.identifiers(organism).index :target => "Associated Gene Name", :fields => ["UniProt/SwissProt Accession"], :persist => true
67
+
68
+ parser = TSV::Parser.new Signor.data
69
+ fields = parser.fields
70
+ dumper = TSV::Dumper.new :key_field => "Source (UniProt/SwissProt Accession)", :fields => ["Target (Associated Gene Name)", "Effect", "Sign", "PMID"], :type => :double, :organism => Signor.organism
71
+ dumper.init
72
+ TSV.traverse parser, :into => dumper do |k,values|
73
+ info = {}
74
+ fields.zip(values).each do |field, value|
75
+ info[field] = value
76
+ end
77
+ next unless info["TYPEA"].first == "protein"
78
+ unia = info["IDA"].first
79
+
80
+ res = []
81
+ res.extend MultipleResult
82
+
83
+ info["TYPEB"].zip(info["IDB"]).zip(info["EFFECT"]).zip(info["MECHANISM"]).zip(info["PMID"]).each do |v|
84
+ typeb,idb,eff,mech,pmid = v.flatten
85
+
86
+ next unless typeb == "protein"
87
+ next unless mech == "transcriptional regulation"
88
+ nameb = uni2name[idb]
89
+ next if nameb.nil?
90
+ sign = "Unknown"
91
+ sign = "UP" if eff.include? 'up-regulates'
92
+ sign = "DOWN" if eff.include? 'down-regulates'
93
+ res << [unia, [nameb, eff, sign, pmid]]
94
+ end
95
+
96
+ res
97
+ end
98
+
99
+ Misc.collapse_stream dumper.stream
100
+ end
74
101
  end
75
102
 
76
- iif Signor.protein_protein.produce(true).find if __FILE__ == $0
103
+ iif Signor.tf_tg.produce(true).find if __FILE__ == $0
77
104
 
@@ -34,8 +34,6 @@ class TestGo < Test::Unit::TestCase
34
34
  def test_ancestors_in
35
35
  term = GOTerm.setup("GO:0005730")
36
36
  valid = %w(GO:0005886 GO:0005634 GO:0005730 GO:0005829)
37
- iii term.name
38
- iii GO.ancestors_in(term, valid)
39
37
  end
40
38
 
41
39
  def test_groups
@@ -44,12 +42,13 @@ class TestGo < Test::Unit::TestCase
44
42
 
45
43
  valid = %w(GO:0005886 GO:0005634 GO:0005730 GO:0005829 )
46
44
  valid = %w(GO:0005634 GO:0005730)
47
- iii GO.group_genes(list, valid)
45
+ assert_equal GO.group_genes(list, valid)["GO:0005730"][:name], "nucleolus"
46
+ assert_equal GO.group_genes(list, valid)["GO:0005730"][:items].sort, %w(FBXW7 SP140)
48
47
  end
49
48
 
50
- def test_nucleolus
51
- nuo = "GO:0005730"
52
- nu = "GO:0005634"
49
+ def test_descendants
50
+ assert GO.descendants("GO:0006281").include? "GO:0000012"
51
+ assert GO.descendants("GO:0006281").include? "GO:1990396"
53
52
  end
54
53
  end
55
54
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-sources
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.1.20
4
+ version: 3.1.21
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-10-20 00:00:00.000000000 Z
11
+ date: 2017-12-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util
@@ -114,6 +114,7 @@ files:
114
114
  - lib/rbbt/sources/kegg.rb
115
115
  - lib/rbbt/sources/matador.rb
116
116
  - lib/rbbt/sources/oncodrive_role.rb
117
+ - lib/rbbt/sources/oreganno.rb
117
118
  - lib/rbbt/sources/organism.rb
118
119
  - lib/rbbt/sources/pfam.rb
119
120
  - lib/rbbt/sources/pharmagkb.rb