rbbt-util 5.14.34 → 5.14.35

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ec2d34290a61ca4f3f32cd875a07a15c9c7c06fd
4
- data.tar.gz: 08ce21aa7a885530dcba3859231d94874f99bba2
3
+ metadata.gz: 14a5356d56a5794296f8708a848957a4efd82264
4
+ data.tar.gz: b4e41917f06c7d28736b6ed235c61271eb25ec47
5
5
  SHA512:
6
- metadata.gz: d532ef379f2238e8dbaf311f485a4e18ac774177624e327d7b1d432a6cd0304f17f945448523407bb0cbb8f14a1067b50d36585b6d554d646dd5a01124c5b4f2
7
- data.tar.gz: b15a2660fd8be0a782fb0a139bc44b8670adf3629446be6c0b8dc3e6d27f017a2c71db0cb6d76602b306917c9909f03013226566d963c0247506a4723968ead8
6
+ metadata.gz: 66d1db7e9f85049030c34a153f114c864b239bca644aade3f2f9803e7764b3c97a9ec342416045a1bb4ae4f40119749ae58c2d572c00e920eb9d91273366bb88
7
+ data.tar.gz: e9432f28095748fd4d25e945d1a9cb4c2b100708e200216ee589a9f57eb19d999d55ac3556ae040a388775faba145212da99ff3ffbe0001cc218821ffbf53597
@@ -11,7 +11,9 @@ module Association
11
11
  tsv.through do |source, values|
12
12
  Misc.zip_fields(values).each do |info|
13
13
  target, *rest = info
14
- new.zip_new target, [source] + rest
14
+ next if target == source
15
+ rest.unshift source
16
+ new.zip_new target, rest
15
17
  end
16
18
  end
17
19
  else
@@ -60,7 +62,7 @@ module Association
60
62
  end
61
63
 
62
64
  def self.reorder_tsv(tsv, options = {})
63
- fields, undirected, persist = Misc.process_options options, :fields, :undirected, :persist
65
+ fields, persist = Misc.process_options options, :fields, :persist
64
66
  all_fields = tsv.all_fields
65
67
 
66
68
  source_pos, field_pos, source_header, field_headers, source_format, target_format = headers(all_fields, fields, options)
@@ -69,20 +71,18 @@ module Association
69
71
  info_fields = field_pos.collect{|f| f == :key ? :key : all_fields[f]}
70
72
  options = options.merge({:key_field => source_field, :fields => info_fields})
71
73
 
72
- tsv = tsv.reorder source_field, fields, :zipped => true
74
+ tsv = tsv.reorder source_field, fields if true or source_field != tsv.key_field or (fields and tsv.fields != fields)
73
75
 
74
76
  tsv.key_field = source_header
75
77
  tsv.fields = field_headers
76
78
 
77
79
  tsv = translate tsv, source_format, target_format, :persist => persist if source_format or target_format
78
80
 
79
- tsv = add_reciprocal tsv if undirected
80
-
81
81
  tsv
82
82
  end
83
83
 
84
84
  def self.open_stream(stream, options = {})
85
- fields, undirected, persist = Misc.process_options options, :fields, :undirected, :persist
85
+ fields, persist = Misc.process_options options, :fields, :persist
86
86
 
87
87
  parser = TSV::Parser.new stream, options.merge(:fields => nil, :key_field => nil)
88
88
 
@@ -132,8 +132,6 @@ module Association
132
132
 
133
133
  tsv = translate tsv, source_format, target_format, :persist => persist if source_format or target_format
134
134
 
135
- tsv = add_reciprocal tsv if undirected
136
-
137
135
  tsv
138
136
  end
139
137
 
@@ -11,12 +11,14 @@ module Association
11
11
 
12
12
  file = version_file(file, options[:namespace]) if options[:namespace] and String === file
13
13
 
14
- undirected = options[:undirected]
15
14
  Persist.persist_tsv(file, "Association Index", options, persist_options.dup) do |data|
16
15
  recycle = options[:recycle]
16
+ undirected = options[:undirected]
17
17
 
18
18
  persist_options[:file] = persist_options[:file] + '.database' if persist_options[:file]
19
- database = open(file, options, persist_options.dup)
19
+ database = open(file, options, persist_options.dup.merge(:engine => "HDB"))
20
+
21
+ undirected = true if undirected.nil? and database.key_field == database.fields.first
20
22
 
21
23
  fields = database.fields
22
24
  source_field = database.key_field
@@ -68,6 +70,10 @@ module Association
68
70
  info = old_info.zip(info).collect{|p| p * ";;" }
69
71
  data[key] = info
70
72
  end
73
+ if undirected
74
+ reverse_key = [target,source] * "~"
75
+ data[reverse_key] = info unless data.include? reverse_key
76
+ end
71
77
  end
72
78
  end
73
79
  end
@@ -123,8 +123,8 @@ module Association
123
123
 
124
124
 
125
125
  if format = options[:format]
126
- source_format = process_formats(specs[:source][1] || specs[:source][0], format) || source_format
127
- target_format = process_formats(specs[:target][1] || specs[:target][0], format) || target_format
126
+ source_format = process_formats(specs[:source][1] || specs[:source][0], format) || source_format unless source_format
127
+ target_format = process_formats(specs[:target][1] || specs[:target][0], format) || target_format unless target_format
128
128
  end
129
129
 
130
130
  Log.low "Headers -- #{[source_pos, field_pos, source_header, field_headers, source_format, target_format]}"
data/lib/rbbt/entity.rb CHANGED
@@ -7,13 +7,11 @@ module Entity
7
7
  attr_accessor :formats, :entity_property_cache
8
8
  end
9
9
 
10
-
11
10
  self.entity_property_cache = "var/entity_property"
12
11
  self.formats = {}
13
12
 
14
13
  UNPERSISTED_PREFIX = "entity_unpersisted_property_"
15
14
 
16
-
17
15
  attr_accessor :all_formats
18
16
  def self.extended(base)
19
17
  base.extend Annotation
@@ -29,6 +29,7 @@ module Entity
29
29
  source ||= self.respond_to?(:format)? self.format : nil
30
30
 
31
31
  index = TSV.translation_index(identifier_files, format, source, :persist => true)
32
+ raise "No index from #{ source } to #{ format }: #{Misc.fingerprint identifier_files}" if index.nil?
32
33
  index.unnamed = true
33
34
  index
34
35
  end
@@ -98,6 +99,7 @@ module Entity
98
99
 
99
100
  @identifier_files ||= []
100
101
  @identifier_files << file
102
+ @identifier_files.uniq!
101
103
 
102
104
 
103
105
  self.include Entity::Identified unless Entity::Identified === self
@@ -0,0 +1,9 @@
1
+ require 'rbbt/knowledge_base/registry'
2
+ require 'rbbt/statistics/hypergeometric'
3
+ class KnowledgeBase
4
+ def enrichment(name, entities, options = {})
5
+ database = get_database(name, options)
6
+ entities = identify_source name, entities
7
+ database.enrichment entities, database.fields.first, :persist => false
8
+ end
9
+ end
@@ -88,8 +88,8 @@ class KnowledgeBase
88
88
  def identify_source(name, entity)
89
89
  return :all if entity == :all
90
90
  index = source_index(name)
91
- return nil if index.nil?
92
- index.values_at *entity
91
+ return entity if index.nil?
92
+ Array === entity ? index.values_at(*entity) : index[entity]
93
93
  end
94
94
 
95
95
 
@@ -97,7 +97,7 @@ class KnowledgeBase
97
97
  return :all if entity == :all
98
98
  index = target_index(name)
99
99
  return nil if index.nil?
100
- index.values_at *entity
100
+ Array === entity ? index.values_at(*entity) : index[entity]
101
101
  end
102
102
 
103
103
  def identify(name, entity)
@@ -45,8 +45,8 @@ class KnowledgeBase
45
45
  persist_file = persist_dir[key]
46
46
  file, registered_options = registry[name]
47
47
 
48
- options = Misc.add_defaults options, :persist_file => persist_file, :persist_dir => persist_dir, :namespace => namespace, :format => format, :persist => true
49
48
  options = Misc.add_defaults options, registered_options if registered_options and registered_options.any?
49
+ options = Misc.add_defaults options, :persist_file => persist_file, :persist_dir => persist_dir, :namespace => namespace, :format => format, :persist => true
50
50
 
51
51
  persist_options = Misc.pull_keys options, :persist
52
52
 
@@ -77,12 +77,13 @@ class KnowledgeBase
77
77
  persist_file = dir.indices[key]
78
78
  file, registered_options = registry[name]
79
79
 
80
- options = Misc.add_defaults options, :persist_file => persist_file, :namespace => namespace, :format => format, :persist => true
80
+
81
81
  options = Misc.add_defaults options, registered_options if registered_options and registered_options.any?
82
+ options = Misc.add_defaults options, :persist_file => persist_file, :namespace => namespace, :format => format, :persist => true
82
83
 
83
84
  persist_options = Misc.pull_keys options, :persist
84
85
 
85
- database = if persist_file.exists?
86
+ database = if persist_file.exists? and persist_options[:persist] and not persist_options[:update]
86
87
  Log.low "Re-opening database #{ name } from #{ Misc.fingerprint persist_file }. #{options}"
87
88
  Association.open(nil, options, persist_options)
88
89
  else
@@ -602,6 +602,16 @@ module TSV
602
602
  peek
603
603
  end
604
604
 
605
+ def head(times=10)
606
+ stream = dumper_stream
607
+ str = ""
608
+ times.times do |i|
609
+ break if stream.eof?
610
+ str << stream.gets
611
+ end
612
+ str
613
+ end
614
+
605
615
  def summary
606
616
 
607
617
  key = nil
@@ -94,11 +94,16 @@ module TSV
94
94
  if TSV === file
95
95
  all_fields = file.all_fields
96
96
  target = file.fields.first if target.nil?
97
- return file.index(options.merge(:target => target, :fields => fields, :order => true)) if (source.nil? or all_fields.include? source) and all_fields.include? target
97
+ if (source.nil? or all_fields.include? source) and all_fields.include? target
98
+ return file.index(options.merge(:target => target, :fields => fields, :order => true))
99
+ end
98
100
  else
99
101
  all_fields = TSV.parse_header(file).all_fields
100
102
  target = all_fields[1] if target.nil?
101
- return TSV.index(file, options.merge(:target => target, :fields => fields, :order => true)) if (source.nil? or all_fields.include? source) and all_fields.include? target
103
+ if (source.nil? or all_fields.include? source) and all_fields.include? target
104
+ index = TSV.index(file, options.merge(:target => target, :fields => fields, :order => true))
105
+ return index
106
+ end
102
107
  end
103
108
  end
104
109
 
@@ -112,10 +117,10 @@ module TSV
112
117
 
113
118
  common_field = (all_fields & other_all_fields).first
114
119
 
115
- if common_field and (source.nil? or fields.include? source) and all_fields.include? common_field and
116
- other_all_fields.include? common_field and other_all_fields.include? target
120
+ if common_field and (source.nil? or all_fields.include? source) and other_all_fields.include? target
121
+
122
+ index = Persist.persist_tsv(nil, Misc.fingerprint(files), {:files => files, :source => source, :target => target}, :prefix => "Translation index", :persist => options[:persist]) do |data|
117
123
 
118
- return Persist.persist_tsv(nil, Misc.fingerprint(files), {:files => files, :source => source, :target => target}, :prefix => "Translation index", :persist => options[:persist]) do |data|
119
124
  index = TSV === file ?
120
125
  file.index(options.merge(:target => common_field, :fields => fields)) :
121
126
  TSV.index(file, options.merge(:target => common_field, :fields => fields))
@@ -125,10 +130,11 @@ module TSV
125
130
  TSV.index(other_file, options.merge(:target => target, :fields => [common_field]))
126
131
 
127
132
  data.serializer = :clean
133
+
134
+ # ToDo: remove the need to to the `to_list` transformation
128
135
  data.merge! index.to_list.attach(other_index.to_list).slice([target]).to_single
129
-
130
- data
131
136
  end
137
+ return index
132
138
  end
133
139
  end
134
140
  end
@@ -116,6 +116,8 @@ module Misc
116
116
  HASH2MD5_MAX_STRING_LENGTH = 1000
117
117
  HASH2MD5_MAX_ARRAY_LENGTH = 100
118
118
  def self.hash2md5(hash)
119
+ return "" if hash.nil? or hash.empty?
120
+
119
121
  str = ""
120
122
  keys = hash.keys
121
123
  keys = keys.clean_annotations if keys.respond_to? :clean_annotations
@@ -161,7 +163,10 @@ module Misc
161
163
 
162
164
  end
163
165
 
164
- str << "_" << hash2md5(v.info) if defined? Annotated and Annotated === v and not (defined? AssociationItem and AssociationItem === v)
166
+ if defined? Annotated and Annotated === v and not (defined? AssociationItem and AssociationItem === v)
167
+ info = Annotated.purge(v.info)
168
+ str << "_" << hash2md5(info)
169
+ end
165
170
  end
166
171
  hash.unnamed = unnamed if hash.respond_to? :unnamed
167
172
 
@@ -0,0 +1,42 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rbbt-util'
4
+ require 'rbbt/util/simpleopt'
5
+
6
+ $0 = "rbbt #{$previous_commands*""} #{ File.basename(__FILE__) }" if $previous_commands
7
+
8
+ options = SOPT.setup <<EOF
9
+ Subset entries in a tsv
10
+
11
+ $ rbbt tsv head [options] file.tsv
12
+
13
+ Subsets entries from a TSV file from a given list. Works with Tokyocabinet HDB and BDB as well.
14
+
15
+ -tch--tokyocabinet File is a TC HDB
16
+ -tcb--tokyocabinet_bd File is a TC BDB
17
+ -hh--header_hash* Change the character used to mark the header line (defaults to #)
18
+ -h--help Help
19
+ EOF
20
+
21
+ SOPT.usage if options[:help]
22
+
23
+ file = ARGV.shift
24
+
25
+ file = STDIN if file == '-'
26
+
27
+ raise ParameterException, "Please specify the tsv file as argument" if file.nil?
28
+
29
+
30
+ options[:fields] = options[:fields].split(/,\|/) if options[:fields]
31
+ options[:header_hash] = options["header_hash"]
32
+
33
+ case
34
+ when options[:tokyocabinet]
35
+ tsv = Persist.open_tokyocabinet(file, false)
36
+ when options[:tokyocabinet_bd]
37
+ tsv = Persist.open_tokyocabinet(file, false, nil, TokyoCabinet::BDB)
38
+ else
39
+ tsv = TSV.open(file, options)
40
+ end
41
+
42
+ puts tsv.head
@@ -28,11 +28,6 @@ TP53 NFKB1|GLI1 activation|activation true|true
28
28
  assert_equal ["false"], tsv["MDM2"]["directed?"]
29
29
  end
30
30
 
31
- def test_open_no_persist_undirected
32
- tsv = Association.open(EFFECT_TSV, :source => "SG=~Gene", :target => "TG=~Gene", :undirected => true, :persist => false)
33
- assert_equal "TP53", tsv["MDM2"]["Gene"].first
34
- assert tsv["TP53"]["Gene"].include? "MDM2"
35
- end
36
31
 
37
32
  def test_open_persist
38
33
  tsv = Association.open(EFFECT_TSV, EFFECT_OPTIONS, :persist => true, :update => true)
@@ -0,0 +1,42 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '../../../test_helper')
2
+ require 'rbbt/util/tmpfile'
3
+ require 'test/unit'
4
+ require 'rbbt/knowledge_base'
5
+ require 'rbbt/knowledge_base/enrichment'
6
+
7
+
8
+ class TestKnowledgeBaseEnrichment < Test::Unit::TestCase
9
+
10
+ EFFECT =StringIO.new <<-END
11
+ #: :sep=" "#:type=:double
12
+ #SG TG Effect
13
+ MDM2 TP53 inhibition
14
+ TP53 NFKB1|GLI1 activation|activation true|true
15
+ END
16
+
17
+ EFFECT_OPTIONS = {
18
+ :source => "SG=~Associated Gene Name",
19
+ :target => "TG=~Associated Gene Name=>Ensembl Gene ID",
20
+ :persist => false,
21
+ :identifiers => datafile_test('identifiers'),
22
+ :undirected => true,
23
+ :namespace => "Hsa"
24
+ }
25
+
26
+ EFFECT_TSV = TSV.open EFFECT, EFFECT_OPTIONS.dup
27
+
28
+ KNOWLEDGE_BASE = KnowledgeBase.new '/tmp/kb.foo', "Hsa/feb2014"
29
+ KNOWLEDGE_BASE.format = {"Gene" => "Ensembl Gene ID"}
30
+
31
+ KNOWLEDGE_BASE.register :effects, EFFECT_TSV, EFFECT_OPTIONS.dup
32
+ KNOWLEDGE_BASE.register :gene_ages, datafile_test('gene_ages')
33
+ KNOWLEDGE_BASE.register :nature, datafile_test('nature'), :source => "UniProt/SwissProt Accession", :target => "NCI Nature Pathway ID"
34
+
35
+ def test_enrichment
36
+ genes = %w(P17706-2 LMAN1 P17706-1 P29353-2 JAK3 Q8NFM1 EIF2AK2 JAK1 SRC PIAS1 KPNB1 KPNA2 STAT3 PTPN1 ATR CREBBP PTPRA SGK1 P46108-1 NCK2 O00145 PTPN1 CBL SORBS1 IRS1 SHC1 AKT2 GRB2 SOS1 RPS6KB1 AKT1 DOK1 RASA1 NCK1 FOXO3 RAPGEF1 TRIP10 EIF4EBP1 PDPK1 GRB14 PTPN11 INS GRB10 CAV1 EIF4B RPS6KB1 EEF2 EEF2K AKT1 TSC1 TSC2 CLIP1 DDIT4 SGK1 PDPK1 DEPTOR SREBF1 CYCS IRS1 RPS6KA1 BNIP3 RRN3 RICTOR IKBKB AKT1S1 PXN PML EIF4A1 PPARGC1A YY1 PRKCA RPTOR PDCD4 SIK1 P10636-8 BRSK1 MYC SMARCD3 STK11 ETV4 MARK4 MAP2 MARK2 CRTC2 PSEN2 MST4 CTSD BRSK2 SIK2 ESR1 CAB39 STK11IP SMAD4 CREB1 PRKACA EZR TP53 GSK3B SIK3 CDC37 HSP90AA1 )
37
+ assert KNOWLEDGE_BASE.enrichment(:nature, genes).any?
38
+ end
39
+
40
+ end
41
+
42
+
@@ -29,6 +29,8 @@ TP53 NFKB1|GLI1 activation|activation true|true
29
29
 
30
30
  KNOWLEDGE_BASE.register :effects, EFFECT_TSV, EFFECT_OPTIONS.dup
31
31
 
32
+ KNOWLEDGE_BASE.register :pina, datafile_test('pina'), :source => "UniProt/SwissProt Accession", :target => "Interactor UniProt/SwissProt Accession=~UniProt/SwissProt Accession", :undirected => true
33
+
32
34
  def test_database
33
35
  assert_equal "Associated Gene Name", KNOWLEDGE_BASE.get_database(:effects, :source_format => "Associated Gene Name").key_field
34
36
  end
@@ -41,12 +43,39 @@ TP53 NFKB1|GLI1 activation|activation true|true
41
43
  assert KNOWLEDGE_BASE.get_index(:effects, :source_format => "Associated Gene Name", :target_format => "Ensembl Gene ID", :persist => true).include? "MDM2~ENSG00000141510"
42
44
  end
43
45
 
44
-
45
46
  def test_index_flat
46
47
  require 'rbbt/sources/tfacts'
47
48
  file = TFacts.regulators
48
49
  KNOWLEDGE_BASE.register :tfacts, file, :type => :flat, :source => "Transcription Factor Associated Gene Name=~Associated Gene Name", :merge => true
49
50
  assert KNOWLEDGE_BASE.subset(:tfacts, :source => ["TP53"], :target => :all).length > 10
50
51
  end
52
+
53
+ def test_pina
54
+ index = KNOWLEDGE_BASE.get_index(:pina, :persist => false, :source_format => "Associated Gene Name", :target_format => "Associated Gene Name")
55
+ assert index["TP53~ARID1A"]
56
+ assert index["ARID1A~TP53"]
57
+ assert_equal index["ARID1A~TP53"], index["TP53~ARID1A"]
58
+
59
+ index = KNOWLEDGE_BASE.get_index(:pina, :persist => false, :source_format => "Associated Gene Name", :target_format => "Associated Gene Name", :undirected => false)
60
+ count = 0
61
+ index.through do |k,values|
62
+ split_values = values.collect{|v| v.split ";;" }
63
+ count += 1 if Misc.zip_fields(split_values).uniq != Misc.zip_fields(split_values)
64
+ end
65
+
66
+ index = KNOWLEDGE_BASE.get_index(:pina, :persist => false, :source_format => "Associated Gene Name", :target_format => "Associated Gene Name", :undirected => true)
67
+ count2 = 0
68
+ index.through do |k,values|
69
+ split_values = values.collect{|v| v.split ";;" }
70
+ count2 += 1 if Misc.zip_fields(split_values).uniq != Misc.zip_fields(split_values)
71
+ end
72
+
73
+ assert_equal count, count2
74
+ end
75
+
76
+
77
+ def test_pina2
78
+ index = KNOWLEDGE_BASE.get_index(:pina, :persist => true, :source_format => "Ensembl Gene ID", :target_format => "Ensembl Gene ID", :undirected => true)
79
+ end
51
80
  end
52
81
 
@@ -1,4 +1,5 @@
1
1
  require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
2
+ require 'rbbt-util'
2
3
  require 'rbbt/tsv'
3
4
  require 'rbbt/tsv/change_id'
4
5
 
@@ -290,8 +290,5 @@ row3 a C Id4
290
290
 
291
291
 
292
292
  end
293
-
294
293
  end
295
-
296
-
297
294
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-util
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.14.34
4
+ version: 5.14.35
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-10-13 00:00:00.000000000 Z
11
+ date: 2014-10-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -151,6 +151,7 @@ files:
151
151
  - lib/rbbt/entity/identifiers.rb
152
152
  - lib/rbbt/fix_width_table.rb
153
153
  - lib/rbbt/knowledge_base.rb
154
+ - lib/rbbt/knowledge_base/enrichment.rb
154
155
  - lib/rbbt/knowledge_base/entity.rb
155
156
  - lib/rbbt/knowledge_base/query.rb
156
157
  - lib/rbbt/knowledge_base/registry.rb
@@ -290,6 +291,7 @@ files:
290
291
  - share/rbbt_commands/tsv/attach
291
292
  - share/rbbt_commands/tsv/change_id
292
293
  - share/rbbt_commands/tsv/get
294
+ - share/rbbt_commands/tsv/head
293
295
  - share/rbbt_commands/tsv/info
294
296
  - share/rbbt_commands/tsv/json
295
297
  - share/rbbt_commands/tsv/read
@@ -320,6 +322,7 @@ files:
320
322
  - test/rbbt/association/test_open.rb
321
323
  - test/rbbt/association/test_util.rb
322
324
  - test/rbbt/entity/test_identifiers.rb
325
+ - test/rbbt/knowledge_base/test_enrichment.rb
323
326
  - test/rbbt/knowledge_base/test_entity.rb
324
327
  - test/rbbt/knowledge_base/test_query.rb
325
328
  - test/rbbt/knowledge_base/test_registry.rb
@@ -444,6 +447,7 @@ test_files:
444
447
  - test/rbbt/test_association.rb
445
448
  - test/rbbt/knowledge_base/test_registry.rb
446
449
  - test/rbbt/knowledge_base/test_entity.rb
450
+ - test/rbbt/knowledge_base/test_enrichment.rb
447
451
  - test/rbbt/knowledge_base/test_query.rb
448
452
  - test/rbbt/test_resource.rb
449
453
  - test/rbbt/test_entity.rb