rbbt-util 5.14.33 → 5.14.34

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/bin/rbbt +2 -0
  3. data/lib/rbbt/association/database.rb +153 -0
  4. data/lib/rbbt/association/index.rb +89 -20
  5. data/lib/rbbt/association/open.rb +37 -0
  6. data/lib/rbbt/association/util.rb +133 -0
  7. data/lib/rbbt/association.rb +1 -380
  8. data/lib/rbbt/entity/identifiers.rb +106 -0
  9. data/lib/rbbt/entity.rb +1 -0
  10. data/lib/rbbt/knowledge_base/entity.rb +107 -0
  11. data/lib/rbbt/knowledge_base/query.rb +83 -0
  12. data/lib/rbbt/knowledge_base/registry.rb +106 -0
  13. data/lib/rbbt/knowledge_base/syndicate.rb +22 -0
  14. data/lib/rbbt/knowledge_base.rb +6 -359
  15. data/lib/rbbt/tsv/accessor.rb +4 -0
  16. data/lib/rbbt/tsv/change_id.rb +119 -0
  17. data/lib/rbbt/tsv/index.rb +6 -2
  18. data/lib/rbbt/tsv/parser.rb +7 -5
  19. data/lib/rbbt/tsv/util.rb +1 -1
  20. data/lib/rbbt/tsv.rb +2 -1
  21. data/lib/rbbt/util/R/model.rb +1 -1
  22. data/lib/rbbt/util/log.rb +2 -2
  23. data/lib/rbbt/util/misc/bgzf.rb +2 -0
  24. data/lib/rbbt/util/misc/inspect.rb +1 -1
  25. data/lib/rbbt-util.rb +11 -7
  26. data/lib/rbbt.rb +0 -1
  27. data/share/rbbt_commands/app/start +1 -1
  28. data/share/rbbt_commands/tsv/change_id +2 -2
  29. data/test/rbbt/association/test_database.rb +61 -0
  30. data/test/rbbt/association/test_index.rb +67 -22
  31. data/test/rbbt/association/test_open.rb +68 -0
  32. data/test/rbbt/association/test_util.rb +108 -0
  33. data/test/rbbt/entity/test_identifiers.rb +40 -0
  34. data/test/rbbt/knowledge_base/test_entity.rb +0 -0
  35. data/test/rbbt/knowledge_base/test_query.rb +45 -0
  36. data/test/rbbt/knowledge_base/test_registry.rb +52 -0
  37. data/test/rbbt/test_association.rb +3 -3
  38. data/test/rbbt/test_knowledge_base.rb +79 -51
  39. data/test/rbbt/test_monitor.rb +0 -2
  40. data/test/rbbt/test_packed_index.rb +1 -1
  41. data/test/rbbt/test_resource.rb +6 -6
  42. data/test/rbbt/test_tsv.rb +34 -44
  43. data/test/rbbt/tsv/parallel/test_through.rb +2 -4
  44. data/test/rbbt/tsv/parallel/test_traverse.rb +30 -28
  45. data/test/rbbt/tsv/test_change_id.rb +10 -0
  46. data/test/rbbt/util/R/test_model.rb +9 -10
  47. data/test/rbbt/util/test_misc.rb +1 -1
  48. data/test/test_helper.rb +4 -1
  49. metadata +24 -2
@@ -4,72 +4,100 @@ require 'test/unit'
4
4
 
5
5
  require 'rbbt/workflow'
6
6
  require 'rbbt/entity'
7
+ require 'rbbt/entity/identifiers'
7
8
 
8
9
  require 'rbbt/association'
9
10
  require 'rbbt/knowledge_base'
10
11
 
11
- class TestKnowledgeBase < Test::Unit::TestCase
12
- def setup
13
- if not defined? Genomics
14
- Workflow.require_workflow "Genomics"
15
- require 'genomics_kb'
16
- end
17
- KnowledgeBase.knowledge_base_dir = Rbbt.tmp.knowledge_base_test.find
18
- @kb = Genomics.knowledge_base
19
- end
20
-
21
- def test_register
22
- require 'rbbt/sources/pina'
23
-
24
- TmpFile.with_file do |dir|
25
- kb = KnowledgeBase.new dir
26
-
27
- kb.register :pina, Pina.protein_protein, :target => "Interactor UniProt/SwissProt Accession=~UniProt/SwissProt Accession"
28
- assert_equal [Gene], kb.entity_types
29
- assert kb.all_databases.include? :pina
12
+ require 'rbbt/sources/organism'
13
+ require 'rbbt/sources/tfacts'
14
+ require 'rbbt/sources/kegg'
15
+
16
+ module Gene
17
+ extend Entity
18
+ add_identifiers Organism.identifiers("NAMESPACE"), "Ensembl Gene ID", "Associated Gene Name"
19
+ add_identifiers KEGG.identifiers
20
+
21
+ property :follow => :single do |kb,name,annotate=nil|
22
+ if annotate.nil? or annotate
23
+ l = kb.children(name, self).target_entity
24
+ self.annotate l if annotate and kb.source(name) == format
25
+ l
26
+ else
27
+ kb._children(name, self).collect{|v| v.partition("~").last }
30
28
  end
31
29
  end
32
30
 
33
- def test_format_Gene
34
- TmpFile.with_file do |dir|
35
- kb = KnowledgeBase.new dir, "Hsa/jan2013"
36
- kb.format["Gene"] = "Ensembl Gene ID"
37
-
38
- kb.register 'nature', NCI.nature_pathways, :merge => true, :target => "UniProt/SwissProt Accession", :key_field => 0
39
-
40
- assert kb.get_database('nature', :persist => false).slice("Ensembl Gene ID").values.flatten.uniq.length > 10
31
+ property :backtrack => :single do |kb,name,annotate=nil|
32
+ if annotate.nil? or annotate
33
+ l = kb.parents(name, self).target_entity
34
+ self.annotate l if annotate and kb.target(name) == format
35
+ l
36
+ else
37
+ kb._parents(name, self).collect{|v| v.partition("~").last }
41
38
  end
42
39
  end
43
40
 
44
- def test_fields
45
- TmpFile.with_file do |dir|
46
- kb = KnowledgeBase.new dir, "Hsa/jan2013"
47
- kb.format["Gene"] = "Ensembl Gene ID"
48
-
49
- kb.register 'nature', NCI.nature_pathways, :merge => true, :fields => [2], :key_field => 0
50
- assert kb.get_database('nature', :persist => false).slice("Ensembl Gene ID").values.flatten.uniq.length > 10
41
+ property :expand => :single do |kb,name,annotate=nil|
42
+ if annotate.nil? or annotate
43
+ n = kb.neighbours(name, self)
44
+ if kb.source(name) == kb.target(name)
45
+ self.annotate n.collect{|k,v| v.target}.flatten
46
+ else
47
+ n.collect{|k,v| v.target_entity.to_a}.flatten
48
+ end
49
+ else
50
+ n = kb._neighbours(name, self)
51
+ n.values.flatten.collect{|v| v.partition("~").last}
51
52
  end
52
53
  end
54
+ end
53
55
 
54
- def test_global
55
- assert @kb.all_databases.include? "pina"
56
- end
57
-
58
- def test_subset
59
- gene = "TP53"
60
- found = Genomics.knowledge_base.identify :pina, gene
61
- p53_interactors = Genomics.knowledge_base.children(:pina, found).target_entity
62
-
63
- assert Genomics.knowledge_base.subset(:pina, {"Gene" => p53_interactors}).target_entity.name.include? "MDM2"
64
- end
65
56
 
66
- def test_syndication
67
- kb = KnowledgeBase.new Rbbt.tmp.test.kb2, "Hsa/jan2013"
68
- kb.syndicate :genomics, @kb
57
+ class TestKnowledgeBase < Test::Unit::TestCase
69
58
 
70
- gene = "TP53"
71
- found = kb.identify "pina@genomics", gene
72
- assert found =~ /ENSG/
59
+ def test_knowledge_base
60
+ organism = Organism.default_code("Hsa")
61
+ TmpFile.with_file do |tmpdir|
62
+ kb = KnowledgeBase.new tmpdir, Organism.default_code("Hsa")
63
+ kb.format = {"Gene" => "Ensembl Gene ID"}
64
+
65
+ kb.register :tfacts, TFacts.regulators, :source =>"=~Associated Gene Name"
66
+
67
+ assert_equal "Ensembl Gene ID", kb.get_database(:tfacts).key_field
68
+
69
+ kb.register :kegg, KEGG.gene_pathway, :source_format => "Ensembl Gene ID"
70
+ assert_match "Ensembl Gene ID", kb.get_database(:kegg).key_field
71
+
72
+ gene = Gene.setup("TP53", "Associated Gene Name", organism)
73
+ assert_equal "TP53", gene.name
74
+ assert_equal "ENSG00000141510", gene.ensembl
75
+
76
+ downstream = gene.follow kb, :tfacts
77
+ upstream = gene.backtrack kb, :tfacts
78
+ close = gene.expand kb, :tfacts
79
+
80
+ assert downstream.length < downstream.follow(kb, :tfacts,false).flatten.length
81
+ assert downstream.follow(kb, :tfacts,false).flatten.length < Annotated.flatten(downstream.follow(kb, :tfacts)).follow(kb, :tfacts).flatten.length
82
+
83
+ Misc.benchmark(50) do
84
+ downstream.follow(kb, :tfacts,false)
85
+ downstream.backtrack(kb, :tfacts,false)
86
+ downstream.expand(kb, :tfacts,false)
87
+ end
88
+
89
+ Misc.benchmark(50) do
90
+ downstream.follow(kb, :tfacts)
91
+ downstream.backtrack(kb, :tfacts)
92
+ downstream.expand(kb, :tfacts)
93
+ end
94
+
95
+ Misc.benchmark(50) do
96
+ downstream.follow(kb, :tfacts, true)
97
+ downstream.backtrack(kb, :tfacts, true)
98
+ downstream.expand(kb, :tfacts, true)
99
+ end
100
+ end
73
101
  end
74
102
  end
75
103
 
@@ -2,8 +2,6 @@ require File.join(File.expand_path(File.dirname(__FILE__)), '..', 'test_helper.r
2
2
  require 'rbbt/monitor'
3
3
 
4
4
  class TestMonitor < Test::Unit::TestCase
5
- def _test_locks
6
- end
7
5
  end
8
6
 
9
7
  if __FILE__ == $0
@@ -2,7 +2,7 @@ require File.join(File.expand_path(File.dirname(__FILE__)), '..', 'test_helper.r
2
2
  require 'rbbt/packed_index'
3
3
 
4
4
  class TestPackedIndex < Test::Unit::TestCase
5
- def _test_index
5
+ def _test_packed_index
6
6
 
7
7
  TmpFile.with_file do |tmpfile|
8
8
  pi = PackedIndex.new tmpfile, true, %w(i i 23s f f f f f)
@@ -30,23 +30,23 @@ class TestTSV < Test::Unit::TestCase
30
30
  tsv = {"a" => 1, "b" => 2}
31
31
  TSV.setup(tsv, :key_field => "Letter", :fields => ["Value"], :type => :single)
32
32
  end
33
- Log.tsv TestResource.tmp.test.test_tsv.tsv
33
+ assert TSV === TestResource.tmp.test.test_tsv.tsv
34
34
  end
35
35
 
36
- def _test_rake
36
+ def test_rake
37
37
  TestResource.tmp.test.work.footest.foo.read == "TEST"
38
38
  assert TestResource.tmp.test.work.footest.foo.read == "TEST"
39
39
  end
40
40
 
41
- def _test_proc
41
+ def test_proc
42
42
  assert TestResource.tmp.test.proc.read == "PROC TEST"
43
43
  end
44
44
 
45
- def _test_string
45
+ def test_string
46
46
  assert TestResource.tmp.test.string.read == "TEST"
47
47
  end
48
48
 
49
- def _test_url
49
+ def test_url
50
50
  assert TestResource[].tmp.test.google.read =~ /google/
51
51
  end
52
52
 
@@ -68,7 +68,7 @@ class TestTSV < Test::Unit::TestCase
68
68
  assert_equal File.join(ENV['HOME'], '.phgx/etc/foo'), Path.setup('etc/foo', 'phgx').find()
69
69
  end
70
70
 
71
- def _test_libdir
71
+ def test_libdir
72
72
  assert File.exists? TestResource[].share.Rlib["util.R"].find :lib
73
73
  assert File.exists? TestResource[].share.Rlib["util.R"].find
74
74
  end
@@ -5,7 +5,7 @@ require 'test/unit'
5
5
 
6
6
  class TestTSV < Test::Unit::TestCase
7
7
 
8
- def _test_extend
8
+ def test_extend
9
9
  a = {
10
10
  "one" => "1",
11
11
  "two" => "2"
@@ -18,7 +18,7 @@ class TestTSV < Test::Unit::TestCase
18
18
  assert_equal "1", a["one"]
19
19
  end
20
20
 
21
- def _test_tsv
21
+ def test_tsv_1
22
22
  content =<<-EOF
23
23
  #Id ValueA ValueB OtherID
24
24
  row1 a|aa|aaa b Id1|Id2
@@ -41,7 +41,7 @@ row2 A B Id3
41
41
  end
42
42
  end
43
43
 
44
- def _test_headerless
44
+ def test_headerless
45
45
  content =<<-EOF
46
46
  row1 a|aa|aaa b Id1|Id2
47
47
  row2 A B Id3
@@ -54,7 +54,7 @@ row2 A B Id3
54
54
  end
55
55
  end
56
56
 
57
- def _test_headerless_fields
57
+ def test_headerless_fields
58
58
  content =<<-EOF
59
59
  row1 a|aa|aaa b Id1|Id2
60
60
  row2 A B Id3
@@ -68,7 +68,7 @@ row2 A B Id3
68
68
  end
69
69
  end
70
70
 
71
- def _test_tsv_persistence
71
+ def test_tsv_persistence
72
72
  content =<<-EOF
73
73
  #Id ValueA ValueB OtherID
74
74
  row1 a|aa|aaa b Id1|Id2
@@ -82,19 +82,10 @@ row2 A B Id3
82
82
  assert_equal :double, tsv.type
83
83
  assert_equal "Id", tsv.key_field
84
84
  assert TokyoCabinet::HDB === tsv
85
-
86
- FileUtils.rm filename
87
- tsv = TSV.open(filename, :sep => /\s+/, :persist => true)
88
- assert_equal ["a", "aa", "aaa"], tsv["row1"][0]
89
- assert_equal ["ValueA", "ValueB", "OtherID"], tsv.fields
90
- assert_equal :double, tsv.type
91
- assert_equal "Id", tsv.key_field
92
- assert TokyoCabinet::HDB === tsv
93
-
94
85
  end
95
86
  end
96
87
 
97
- def _test_tsv_field_selection
88
+ def test_tsv_field_selection
98
89
  content =<<-EOF
99
90
  #Id ValueA ValueB OtherID
100
91
  row1 a|aa|aaa b Id1|Id2
@@ -136,7 +127,7 @@ row2 A B Id3
136
127
  end
137
128
  end
138
129
 
139
- def _test_tsv_cast
130
+ def test_tsv_cast
140
131
  content =<<-EOF
141
132
  #Id Value
142
133
  row1 1|2|3
@@ -151,7 +142,7 @@ row2 4
151
142
  end
152
143
  end
153
144
 
154
- def _test_tsv_single
145
+ def test_tsv_single
155
146
  content =<<-EOF
156
147
  #Id Value
157
148
  row1 1
@@ -168,7 +159,7 @@ row2 4
168
159
  end
169
160
  end
170
161
 
171
- def _test_tsv_single_from_flat
162
+ def test_tsv_single_from_flat
172
163
  content =<<-EOF
173
164
  #: :type=:flat
174
165
  #Id Value
@@ -183,7 +174,7 @@ row2 4
183
174
  end
184
175
 
185
176
 
186
- def _test_tsv_serializer
177
+ def test_tsv_serializer
187
178
  content =<<-EOF
188
179
  #Id Value
189
180
  row1 1
@@ -197,7 +188,7 @@ row2 4
197
188
  end
198
189
  end
199
190
 
200
- def _test_tsv_header_options
191
+ def test_tsv_header_options
201
192
  content =<<-EOF
202
193
  #: :sep=/\\s+/
203
194
  #Id Value
@@ -211,7 +202,7 @@ row2 4
211
202
  end
212
203
  end
213
204
 
214
- def _test_tsv_fastimport
205
+ def test_tsv_fastimport
215
206
  content =<<-EOF
216
207
  #Id ValueA ValueB OtherID
217
208
  row1 a|aa|aaa b Id1|Id2
@@ -228,7 +219,7 @@ row3 AA B Id3
228
219
  end
229
220
  end
230
221
 
231
- def _test_header_type
222
+ def test_header_type
232
223
  content =<<-EOF
233
224
  #: :sep=/\\s+/#:type=:single
234
225
  #Id Value
@@ -244,7 +235,7 @@ row3 AA B Id3
244
235
 
245
236
  end
246
237
 
247
- def _test_single_cast
238
+ def test_single_cast
248
239
  content =<<-EOF
249
240
  #: :sep=/\\s+/#:type=:single#:cast=:to_i
250
241
  #Id Value
@@ -259,7 +250,7 @@ b 2
259
250
  end
260
251
  end
261
252
 
262
- def _test_key_field
253
+ def test_key_field
263
254
  content =<<-EOF
264
255
  #: :sep=/\\s+/#:type=:single
265
256
  #Id Value
@@ -275,7 +266,7 @@ b 2
275
266
  end
276
267
  end
277
268
 
278
- def _test_fix
269
+ def test_fix
279
270
  content =<<-EOF
280
271
  #: :sep=/\\s+/#:type=:single
281
272
  #Id Value
@@ -289,7 +280,7 @@ b 2
289
280
  end
290
281
  end
291
282
 
292
- def _test_select
283
+ def test_select
293
284
  content =<<-EOF
294
285
  #: :sep=/\\s+/#:type=:single
295
286
  #Id Value
@@ -303,7 +294,7 @@ b 2
303
294
  end
304
295
  end
305
296
 
306
- def _test_grep
297
+ def test_grep
307
298
  content =<<-EOF
308
299
  #: :sep=/\\s+/#:type=:single
309
300
  #Id Value
@@ -317,7 +308,7 @@ b 2
317
308
  end
318
309
  end
319
310
 
320
- def _test_grep_invert
311
+ def test_grep_invert
321
312
  content =<<-EOF
322
313
  #: :sep=/\\s+/#:type=:single
323
314
  #Id Value
@@ -331,7 +322,7 @@ b 2
331
322
  end
332
323
  end
333
324
 
334
- def _test_grep_header
325
+ def test_grep_header
335
326
  content =<<-EOF
336
327
  #: :sep=/\\s+/#:type=:single#:namespace=Test
337
328
  #Id Value
@@ -340,13 +331,13 @@ b 2
340
331
  EOF
341
332
 
342
333
  TmpFile.with_file(content) do |filename|
343
- tsv = TSV.open(filename, :key_field => "Value", :grep => "2")
334
+ tsv = TSV.open(filename, :key_field => "Value", :grep => "#\\|2")
344
335
  assert(! tsv.include?("1"))
345
336
  assert(tsv.include?("2"))
346
337
  end
347
338
  end
348
339
 
349
- def _test_json
340
+ def test_json
350
341
  content =<<-EOF
351
342
  #: :sep=/\\s+/#:type=:single
352
343
  #Id Value
@@ -361,7 +352,7 @@ b 2
361
352
 
362
353
  end
363
354
 
364
- def _test_flat_no_merge
355
+ def test_flat_no_merge
365
356
  content =<<-EOF
366
357
  #Id ValueA ValueB OtherID
367
358
  row1 a|aa|aaa b Id1|Id2
@@ -377,7 +368,7 @@ row2 A B Id3
377
368
  end
378
369
  end
379
370
 
380
- def _test_flat_merge
371
+ def test_flat_merge
381
372
  content =<<-EOF
382
373
  #Id ValueA ValueB OtherID
383
374
  row1 a|aa|aaa b Id1|Id2
@@ -404,7 +395,7 @@ row2 b bbb bbbb bb
404
395
  end
405
396
  end
406
397
 
407
- def _test_flat_key
398
+ def test_flat_key
408
399
  content =<<-EOF
409
400
  #Id ValueA
410
401
  row1 a aa aaa
@@ -420,7 +411,7 @@ row2 b bbb bbbb bb aa
420
411
 
421
412
 
422
413
 
423
- def _test_zipped
414
+ def test_zipped
424
415
  content =<<-EOF
425
416
  #Id ValueA ValueB
426
417
  row1 a|aa|aaa b|bb|bbb
@@ -433,7 +424,7 @@ row2 a|aa|aaa c|cc|ccc
433
424
  end
434
425
  end
435
426
 
436
- def _test_named_array_key
427
+ def test_named_array_key
437
428
  content =<<-EOF
438
429
  #Id ValueA ValueB OtherID
439
430
  row1 a|aa|aaa b Id1|Id2
@@ -448,7 +439,7 @@ row2 A B Id3
448
439
 
449
440
  end
450
441
 
451
- def _test_unnamed_key
442
+ def test_unnamed_key
452
443
  content =<<-EOF
453
444
  row1 a|aa|aaa b Id1|Id2
454
445
  row2 A B Id3
@@ -461,7 +452,7 @@ row2 A B Id3
461
452
 
462
453
  end
463
454
 
464
- def _test_float_array
455
+ def test_float_array
465
456
  content =<<-EOF
466
457
  #Id ValueA ValueB OtherID
467
458
  row1 0.2 0.3 0
@@ -476,7 +467,7 @@ row2 0.1 4.5 0
476
467
 
477
468
  end
478
469
 
479
- def _test_flat_field_select
470
+ def test_flat_field_select
480
471
  content =<<-EOF
481
472
  #: :type=:flat
482
473
  #Id Value
@@ -485,12 +476,11 @@ row2 b bb bbb
485
476
  EOF
486
477
 
487
478
  TmpFile.with_file(content) do |filename|
488
- puts TSV.open(filename, :sep => /\s+/, :key_field => "Value").to_s
489
479
  assert TSV.open(filename, :sep => /\s+/, :key_field => "Value").include? "aa"
490
480
  end
491
481
  end
492
482
 
493
- def _test_flat2
483
+ def test_flat2
494
484
  content =<<-EOF
495
485
  #: :type=:flat
496
486
  #Id Value
@@ -507,7 +497,7 @@ row2 A|AA|AAA
507
497
  end
508
498
 
509
499
 
510
- def _test_tsv_flat_double
500
+ def test_tsv_flat_double
511
501
  content =<<-EOF
512
502
  #Id ValueA ValueB OtherID
513
503
  row1 a|aa|aaa b Id1|Id2
@@ -522,7 +512,7 @@ row2 A B Id3
522
512
  end
523
513
  end
524
514
 
525
- def _test_flat2single
515
+ def test_flat2single
526
516
  content =<<-EOF
527
517
  #: :type=:flat
528
518
  #Id Value
@@ -536,7 +526,7 @@ row2 A AA AAA
536
526
 
537
527
  end
538
528
 
539
- def _test_shard
529
+ def test_shard
540
530
  shard_function = Proc.new do |key|
541
531
  key[-1]
542
532
  end
@@ -5,8 +5,7 @@ require 'rbbt/tsv/parallel'
5
5
  class TestTSVParallelThrough < Test::Unit::TestCase
6
6
 
7
7
  def test_pthrough
8
- require 'rbbt/sources/organism'
9
- tsv = Organism.identifiers("Hsa").tsv :unnamed => true, :persist => false, :fields => ["Associated Gene Name"]
8
+ tsv = datafile_test('identifiers').tsv :unnamed => true, :persist => false, :fields => ["Associated Gene Name"]
10
9
 
11
10
  h = {}
12
11
  tsv.monitor = true
@@ -21,8 +20,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
21
20
 
22
21
 
23
22
  def test_ppthrough
24
- require 'rbbt/sources/organism'
25
- tsv = Organism.identifiers("Hsa").tsv :unnamed => true, :persist => false, :fields => ["Associated Gene Name"]
23
+ tsv = datafile_test('identifiers').tsv :unnamed => true, :persist => false, :fields => ["Associated Gene Name"]
26
24
 
27
25
  h = {}
28
26