rbbt-sources 3.1.20 → 3.1.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e0d1135b2d31c8801bd915bd416feafec1443966
4
- data.tar.gz: 272bc72050bc2f739ca9460c10e405b45cfdca2d
3
+ metadata.gz: 2bdca7fd8c0c9f60055453c7f318a699eae54738
4
+ data.tar.gz: c0b08856ddd9668b6dbbadd2035653ce24506f04
5
5
  SHA512:
6
- metadata.gz: 9a6e516f2d0188889fedbc4dd6f0e4d88b1c5efe1b0a01c8182e9489387ce92bea658b89d02f22e2e2a326c8529336917693fe5023f8228b4e24e83fbad71b57
7
- data.tar.gz: 8b2bf37685804053dd25fa3de734ab62e886199253290d0dff75c1aa2ecdb24c0f62925a5102191c98e3e8b1f0025f593a02b919c17221f454a958ab607c241f
6
+ metadata.gz: 4e515516855aa249e758a23da1d1d16915697cf3647bf88c0791b5593b435c2adf4b44006081473d4068a76b55d8b018093cba49070144044c68aa3d9611450c
7
+ data.tar.gz: eed95d673673f5ec047b37911c44d6876fbd55d7efdc4884863902f6d76bdeda22e5ac5a1982344488ff8fb68808c8233df78eb0baa1e9fa5a2d712485296577
@@ -5,3 +5,4 @@ feb2014
5
5
  dec2014
6
6
  dec2015
7
7
  oct2016
8
+ may2017
@@ -6,8 +6,9 @@ require 'rbbt/persist/tsv'
6
6
  # now all it does is provide a translation form id to the actual names.
7
7
  module GO
8
8
 
9
- Rbbt.claim Rbbt.share.databases.GO.gene_ontology, :url, 'ftp://ftp.geneontology.org/pub/go/ontology/gene_ontology.obo'
10
- Rbbt.claim Rbbt.share.databases.GO.gslim_generic, :url, 'http://www.geneontology.org/GO_slims/goslim_generic.obo'
9
+ #Rbbt.claim Rbbt.share.databases.GO.gslim_generic, :url, 'http://www.geneontology.org/GO_slims/goslim_generic.obo'
10
+ Rbbt.claim Rbbt.share.databases.GO.gene_ontology, :url, 'http://purl.obolibrary.org/obo/go.obo'
11
+ Rbbt.claim Rbbt.share.databases.GO.annotations, :url, 'http://geneontology.org/gene-associations/goa_human.gaf.gz'
11
12
 
12
13
  MULTIPLE_VALUE_FIELDS = %w(is_a)
13
14
  TSV_GENE_ONTOLOGY = File.join(Persist.cachedir, 'gene_ontology')
@@ -57,6 +58,21 @@ module GO
57
58
  end
58
59
  end
59
60
 
61
+ def self.descendants(id)
62
+ list = Set.new
63
+ new = Set.new
64
+ new << id
65
+ while new.any?
66
+ list += new
67
+ new = Set.new
68
+ info.each do |new_id,values|
69
+ next unless values['is_a']
70
+ new << new_id if values['is_a'].select{|e| list.include? e.split("!").first[/GO:\d+/] }.any? && ! list.include?(new_id)
71
+ end
72
+ end
73
+ list
74
+ end
75
+
60
76
  def self.id2ancestors_by_type(id, type='is_a')
61
77
  if id.kind_of? Array
62
78
  info.values_at(*id).
@@ -93,7 +109,7 @@ module GO
93
109
  end
94
110
  end
95
111
 
96
- def self.ancestors_in(term, valid)
112
+ def self.ancestors_in(term, valid = false)
97
113
  ancestors = id2ancestors(term)
98
114
  return ancestors if FalseClass === valid
99
115
  valid_ancestors = ancestors & valid
@@ -0,0 +1,38 @@
1
+ require 'rbbt-util'
2
+ require 'rbbt/resource'
3
+
4
+ module Oreganno
5
+ extend Resource
6
+ self.subdir = 'share/databases/Oreganno'
7
+
8
+ def self.organism(org="Hsa")
9
+ require 'rbbt/sources/organism'
10
+ Organism.default_code(org)
11
+ end
12
+
13
+ #self.search_paths = {}
14
+ #self.search_paths[:default] = :lib
15
+
16
+
17
+ Oreganno.claim Oreganno.data, :proc do
18
+ url = "http://www.oreganno.org/dump/ORegAnno_Combined_2016.01.19.tsv"
19
+ TSV.open(url, :header_hash => '', :type => :list).to_s
20
+ end
21
+
22
+ Oreganno.claim Oreganno.tf_tg, :proc do
23
+ dumper = TSV::Dumper.new :key_field => "Transcription Factor (Associated Gene Name)", :fields => ["Target Gene (Associated Gene Name)"], :type => :flat, :namespace => Oreganno.organism
24
+ dumper.init
25
+ TSV.traverse Oreganno.data, :type => :array, :into => dumper, :bar => true do |line|
26
+ parts = line.split("\t")
27
+ next unless parts[1] == "Homo sapiens"
28
+ tf = parts[4]
29
+ tg = parts[7]
30
+ next if tf == "N/A" or tg == "N/A"
31
+ [tf, [tg]]
32
+ end
33
+ TSV.collapse_stream dumper
34
+ end
35
+ end
36
+
37
+ iif Oreganno.tf_tg.produce(true).find if __FILE__ == $0
38
+
@@ -79,7 +79,7 @@ module Organism
79
79
  elsif release_number <= 75
80
80
  'hg19'
81
81
  else
82
- 'hg20'
82
+ 'hg38'
83
83
  end
84
84
  end
85
85
 
@@ -88,14 +88,7 @@ module Organism
88
88
  source_hg = hg_build(source)
89
89
  target_hg = hg_build(target)
90
90
 
91
- case
92
- when (source_hg == 'hg19' and target_hg == 'hg18')
93
- map_url = "http://hgdownload.cse.ucsc.edu/goldenPath/hg19/liftOver/hg19ToHg18.over.chain.gz"
94
- when (source_hg == 'hg18' and target_hg == 'hg19')
95
- map_url = "http://hgdownload.cse.ucsc.edu/goldenPath/hg18/liftOver/hg18ToHg19.over.chain.gz"
96
- else
97
- return positions
98
- end
91
+ map_url = "http://hgdownload.cse.ucsc.edu/goldenPath/#{source_hg}/liftOver/#{source_hg}To#{target_hg.sub('h', 'H')}.over.chain.gz"
99
92
 
100
93
  positions_bed = positions.collect{|position|
101
94
  chr, pos = position.split(":").values_at(0,1)
@@ -18,19 +18,6 @@ module Signor
18
18
  end
19
19
 
20
20
  Signor.claim Signor.data, :proc do
21
- #io = Misc.open_pipe do |sin|
22
- # Signor[".source/all.csv"].open do |f|
23
- # quoted = false
24
- # while c = f.getc
25
- # if c == '"'
26
- # quoted = ! quoted
27
- # end
28
- # c = " " if c == "\n" and quoted
29
- # sin << c
30
- # end
31
- # end
32
- #end
33
-
34
21
  sio = Signor[".source/all.csv"].open
35
22
  io_tmp = Misc.remove_quoted_new_line(sio)
36
23
  io = Misc.swap_quoted_character(io_tmp, ';', '--SEMICOLON--')
@@ -71,7 +58,47 @@ module Signor
71
58
 
72
59
  Misc.collapse_stream dumper.stream
73
60
  end
61
+
62
+ Signor.claim Signor.tf_tg, :proc do
63
+ require 'rbbt/sources/organism'
64
+
65
+ organism = Organism.default_code("Hsa")
66
+ uni2name = Organism.identifiers(organism).index :target => "Associated Gene Name", :fields => ["UniProt/SwissProt Accession"], :persist => true
67
+
68
+ parser = TSV::Parser.new Signor.data
69
+ fields = parser.fields
70
+ dumper = TSV::Dumper.new :key_field => "Source (UniProt/SwissProt Accession)", :fields => ["Target (Associated Gene Name)", "Effect", "Sign", "PMID"], :type => :double, :organism => Signor.organism
71
+ dumper.init
72
+ TSV.traverse parser, :into => dumper do |k,values|
73
+ info = {}
74
+ fields.zip(values).each do |field, value|
75
+ info[field] = value
76
+ end
77
+ next unless info["TYPEA"].first == "protein"
78
+ unia = info["IDA"].first
79
+
80
+ res = []
81
+ res.extend MultipleResult
82
+
83
+ info["TYPEB"].zip(info["IDB"]).zip(info["EFFECT"]).zip(info["MECHANISM"]).zip(info["PMID"]).each do |v|
84
+ typeb,idb,eff,mech,pmid = v.flatten
85
+
86
+ next unless typeb == "protein"
87
+ next unless mech == "transcriptional regulation"
88
+ nameb = uni2name[idb]
89
+ next if nameb.nil?
90
+ sign = "Unknown"
91
+ sign = "UP" if eff.include? 'up-regulates'
92
+ sign = "DOWN" if eff.include? 'down-regulates'
93
+ res << [unia, [nameb, eff, sign, pmid]]
94
+ end
95
+
96
+ res
97
+ end
98
+
99
+ Misc.collapse_stream dumper.stream
100
+ end
74
101
  end
75
102
 
76
- iif Signor.protein_protein.produce(true).find if __FILE__ == $0
103
+ iif Signor.tf_tg.produce(true).find if __FILE__ == $0
77
104
 
@@ -34,8 +34,6 @@ class TestGo < Test::Unit::TestCase
34
34
  def test_ancestors_in
35
35
  term = GOTerm.setup("GO:0005730")
36
36
  valid = %w(GO:0005886 GO:0005634 GO:0005730 GO:0005829)
37
- iii term.name
38
- iii GO.ancestors_in(term, valid)
39
37
  end
40
38
 
41
39
  def test_groups
@@ -44,12 +42,13 @@ class TestGo < Test::Unit::TestCase
44
42
 
45
43
  valid = %w(GO:0005886 GO:0005634 GO:0005730 GO:0005829 )
46
44
  valid = %w(GO:0005634 GO:0005730)
47
- iii GO.group_genes(list, valid)
45
+ assert_equal GO.group_genes(list, valid)["GO:0005730"][:name], "nucleolus"
46
+ assert_equal GO.group_genes(list, valid)["GO:0005730"][:items].sort, %w(FBXW7 SP140)
48
47
  end
49
48
 
50
- def test_nucleolus
51
- nuo = "GO:0005730"
52
- nu = "GO:0005634"
49
+ def test_descendants
50
+ assert GO.descendants("GO:0006281").include? "GO:0000012"
51
+ assert GO.descendants("GO:0006281").include? "GO:1990396"
53
52
  end
54
53
  end
55
54
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-sources
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.1.20
4
+ version: 3.1.21
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-10-20 00:00:00.000000000 Z
11
+ date: 2017-12-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util
@@ -114,6 +114,7 @@ files:
114
114
  - lib/rbbt/sources/kegg.rb
115
115
  - lib/rbbt/sources/matador.rb
116
116
  - lib/rbbt/sources/oncodrive_role.rb
117
+ - lib/rbbt/sources/oreganno.rb
117
118
  - lib/rbbt/sources/organism.rb
118
119
  - lib/rbbt/sources/pfam.rb
119
120
  - lib/rbbt/sources/pharmagkb.rb