rbbt-util 5.14.33 → 5.14.34

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/bin/rbbt +2 -0
  3. data/lib/rbbt/association/database.rb +153 -0
  4. data/lib/rbbt/association/index.rb +89 -20
  5. data/lib/rbbt/association/open.rb +37 -0
  6. data/lib/rbbt/association/util.rb +133 -0
  7. data/lib/rbbt/association.rb +1 -380
  8. data/lib/rbbt/entity/identifiers.rb +106 -0
  9. data/lib/rbbt/entity.rb +1 -0
  10. data/lib/rbbt/knowledge_base/entity.rb +107 -0
  11. data/lib/rbbt/knowledge_base/query.rb +83 -0
  12. data/lib/rbbt/knowledge_base/registry.rb +106 -0
  13. data/lib/rbbt/knowledge_base/syndicate.rb +22 -0
  14. data/lib/rbbt/knowledge_base.rb +6 -359
  15. data/lib/rbbt/tsv/accessor.rb +4 -0
  16. data/lib/rbbt/tsv/change_id.rb +119 -0
  17. data/lib/rbbt/tsv/index.rb +6 -2
  18. data/lib/rbbt/tsv/parser.rb +7 -5
  19. data/lib/rbbt/tsv/util.rb +1 -1
  20. data/lib/rbbt/tsv.rb +2 -1
  21. data/lib/rbbt/util/R/model.rb +1 -1
  22. data/lib/rbbt/util/log.rb +2 -2
  23. data/lib/rbbt/util/misc/bgzf.rb +2 -0
  24. data/lib/rbbt/util/misc/inspect.rb +1 -1
  25. data/lib/rbbt-util.rb +11 -7
  26. data/lib/rbbt.rb +0 -1
  27. data/share/rbbt_commands/app/start +1 -1
  28. data/share/rbbt_commands/tsv/change_id +2 -2
  29. data/test/rbbt/association/test_database.rb +61 -0
  30. data/test/rbbt/association/test_index.rb +67 -22
  31. data/test/rbbt/association/test_open.rb +68 -0
  32. data/test/rbbt/association/test_util.rb +108 -0
  33. data/test/rbbt/entity/test_identifiers.rb +40 -0
  34. data/test/rbbt/knowledge_base/test_entity.rb +0 -0
  35. data/test/rbbt/knowledge_base/test_query.rb +45 -0
  36. data/test/rbbt/knowledge_base/test_registry.rb +52 -0
  37. data/test/rbbt/test_association.rb +3 -3
  38. data/test/rbbt/test_knowledge_base.rb +79 -51
  39. data/test/rbbt/test_monitor.rb +0 -2
  40. data/test/rbbt/test_packed_index.rb +1 -1
  41. data/test/rbbt/test_resource.rb +6 -6
  42. data/test/rbbt/test_tsv.rb +34 -44
  43. data/test/rbbt/tsv/parallel/test_through.rb +2 -4
  44. data/test/rbbt/tsv/parallel/test_traverse.rb +30 -28
  45. data/test/rbbt/tsv/test_change_id.rb +10 -0
  46. data/test/rbbt/util/R/test_model.rb +9 -10
  47. data/test/rbbt/util/test_misc.rb +1 -1
  48. data/test/test_helper.rb +4 -1
  49. metadata +24 -2
@@ -4,72 +4,100 @@ require 'test/unit'
4
4
 
5
5
  require 'rbbt/workflow'
6
6
  require 'rbbt/entity'
7
+ require 'rbbt/entity/identifiers'
7
8
 
8
9
  require 'rbbt/association'
9
10
  require 'rbbt/knowledge_base'
10
11
 
11
- class TestKnowledgeBase < Test::Unit::TestCase
12
- def setup
13
- if not defined? Genomics
14
- Workflow.require_workflow "Genomics"
15
- require 'genomics_kb'
16
- end
17
- KnowledgeBase.knowledge_base_dir = Rbbt.tmp.knowledge_base_test.find
18
- @kb = Genomics.knowledge_base
19
- end
20
-
21
- def test_register
22
- require 'rbbt/sources/pina'
23
-
24
- TmpFile.with_file do |dir|
25
- kb = KnowledgeBase.new dir
26
-
27
- kb.register :pina, Pina.protein_protein, :target => "Interactor UniProt/SwissProt Accession=~UniProt/SwissProt Accession"
28
- assert_equal [Gene], kb.entity_types
29
- assert kb.all_databases.include? :pina
12
+ require 'rbbt/sources/organism'
13
+ require 'rbbt/sources/tfacts'
14
+ require 'rbbt/sources/kegg'
15
+
16
+ module Gene
17
+ extend Entity
18
+ add_identifiers Organism.identifiers("NAMESPACE"), "Ensembl Gene ID", "Associated Gene Name"
19
+ add_identifiers KEGG.identifiers
20
+
21
+ property :follow => :single do |kb,name,annotate=nil|
22
+ if annotate.nil? or annotate
23
+ l = kb.children(name, self).target_entity
24
+ self.annotate l if annotate and kb.source(name) == format
25
+ l
26
+ else
27
+ kb._children(name, self).collect{|v| v.partition("~").last }
30
28
  end
31
29
  end
32
30
 
33
- def test_format_Gene
34
- TmpFile.with_file do |dir|
35
- kb = KnowledgeBase.new dir, "Hsa/jan2013"
36
- kb.format["Gene"] = "Ensembl Gene ID"
37
-
38
- kb.register 'nature', NCI.nature_pathways, :merge => true, :target => "UniProt/SwissProt Accession", :key_field => 0
39
-
40
- assert kb.get_database('nature', :persist => false).slice("Ensembl Gene ID").values.flatten.uniq.length > 10
31
+ property :backtrack => :single do |kb,name,annotate=nil|
32
+ if annotate.nil? or annotate
33
+ l = kb.parents(name, self).target_entity
34
+ self.annotate l if annotate and kb.target(name) == format
35
+ l
36
+ else
37
+ kb._parents(name, self).collect{|v| v.partition("~").last }
41
38
  end
42
39
  end
43
40
 
44
- def test_fields
45
- TmpFile.with_file do |dir|
46
- kb = KnowledgeBase.new dir, "Hsa/jan2013"
47
- kb.format["Gene"] = "Ensembl Gene ID"
48
-
49
- kb.register 'nature', NCI.nature_pathways, :merge => true, :fields => [2], :key_field => 0
50
- assert kb.get_database('nature', :persist => false).slice("Ensembl Gene ID").values.flatten.uniq.length > 10
41
+ property :expand => :single do |kb,name,annotate=nil|
42
+ if annotate.nil? or annotate
43
+ n = kb.neighbours(name, self)
44
+ if kb.source(name) == kb.target(name)
45
+ self.annotate n.collect{|k,v| v.target}.flatten
46
+ else
47
+ n.collect{|k,v| v.target_entity.to_a}.flatten
48
+ end
49
+ else
50
+ n = kb._neighbours(name, self)
51
+ n.values.flatten.collect{|v| v.partition("~").last}
51
52
  end
52
53
  end
54
+ end
53
55
 
54
- def test_global
55
- assert @kb.all_databases.include? "pina"
56
- end
57
-
58
- def test_subset
59
- gene = "TP53"
60
- found = Genomics.knowledge_base.identify :pina, gene
61
- p53_interactors = Genomics.knowledge_base.children(:pina, found).target_entity
62
-
63
- assert Genomics.knowledge_base.subset(:pina, {"Gene" => p53_interactors}).target_entity.name.include? "MDM2"
64
- end
65
56
 
66
- def test_syndication
67
- kb = KnowledgeBase.new Rbbt.tmp.test.kb2, "Hsa/jan2013"
68
- kb.syndicate :genomics, @kb
57
+ class TestKnowledgeBase < Test::Unit::TestCase
69
58
 
70
- gene = "TP53"
71
- found = kb.identify "pina@genomics", gene
72
- assert found =~ /ENSG/
59
+ def test_knowledge_base
60
+ organism = Organism.default_code("Hsa")
61
+ TmpFile.with_file do |tmpdir|
62
+ kb = KnowledgeBase.new tmpdir, Organism.default_code("Hsa")
63
+ kb.format = {"Gene" => "Ensembl Gene ID"}
64
+
65
+ kb.register :tfacts, TFacts.regulators, :source =>"=~Associated Gene Name"
66
+
67
+ assert_equal "Ensembl Gene ID", kb.get_database(:tfacts).key_field
68
+
69
+ kb.register :kegg, KEGG.gene_pathway, :source_format => "Ensembl Gene ID"
70
+ assert_match "Ensembl Gene ID", kb.get_database(:kegg).key_field
71
+
72
+ gene = Gene.setup("TP53", "Associated Gene Name", organism)
73
+ assert_equal "TP53", gene.name
74
+ assert_equal "ENSG00000141510", gene.ensembl
75
+
76
+ downstream = gene.follow kb, :tfacts
77
+ upstream = gene.backtrack kb, :tfacts
78
+ close = gene.expand kb, :tfacts
79
+
80
+ assert downstream.length < downstream.follow(kb, :tfacts,false).flatten.length
81
+ assert downstream.follow(kb, :tfacts,false).flatten.length < Annotated.flatten(downstream.follow(kb, :tfacts)).follow(kb, :tfacts).flatten.length
82
+
83
+ Misc.benchmark(50) do
84
+ downstream.follow(kb, :tfacts,false)
85
+ downstream.backtrack(kb, :tfacts,false)
86
+ downstream.expand(kb, :tfacts,false)
87
+ end
88
+
89
+ Misc.benchmark(50) do
90
+ downstream.follow(kb, :tfacts)
91
+ downstream.backtrack(kb, :tfacts)
92
+ downstream.expand(kb, :tfacts)
93
+ end
94
+
95
+ Misc.benchmark(50) do
96
+ downstream.follow(kb, :tfacts, true)
97
+ downstream.backtrack(kb, :tfacts, true)
98
+ downstream.expand(kb, :tfacts, true)
99
+ end
100
+ end
73
101
  end
74
102
  end
75
103
 
@@ -2,8 +2,6 @@ require File.join(File.expand_path(File.dirname(__FILE__)), '..', 'test_helper.r
2
2
  require 'rbbt/monitor'
3
3
 
4
4
  class TestMonitor < Test::Unit::TestCase
5
- def _test_locks
6
- end
7
5
  end
8
6
 
9
7
  if __FILE__ == $0
@@ -2,7 +2,7 @@ require File.join(File.expand_path(File.dirname(__FILE__)), '..', 'test_helper.r
2
2
  require 'rbbt/packed_index'
3
3
 
4
4
  class TestPackedIndex < Test::Unit::TestCase
5
- def _test_index
5
+ def _test_packed_index
6
6
 
7
7
  TmpFile.with_file do |tmpfile|
8
8
  pi = PackedIndex.new tmpfile, true, %w(i i 23s f f f f f)
@@ -30,23 +30,23 @@ class TestTSV < Test::Unit::TestCase
30
30
  tsv = {"a" => 1, "b" => 2}
31
31
  TSV.setup(tsv, :key_field => "Letter", :fields => ["Value"], :type => :single)
32
32
  end
33
- Log.tsv TestResource.tmp.test.test_tsv.tsv
33
+ assert TSV === TestResource.tmp.test.test_tsv.tsv
34
34
  end
35
35
 
36
- def _test_rake
36
+ def test_rake
37
37
  TestResource.tmp.test.work.footest.foo.read == "TEST"
38
38
  assert TestResource.tmp.test.work.footest.foo.read == "TEST"
39
39
  end
40
40
 
41
- def _test_proc
41
+ def test_proc
42
42
  assert TestResource.tmp.test.proc.read == "PROC TEST"
43
43
  end
44
44
 
45
- def _test_string
45
+ def test_string
46
46
  assert TestResource.tmp.test.string.read == "TEST"
47
47
  end
48
48
 
49
- def _test_url
49
+ def test_url
50
50
  assert TestResource[].tmp.test.google.read =~ /google/
51
51
  end
52
52
 
@@ -68,7 +68,7 @@ class TestTSV < Test::Unit::TestCase
68
68
  assert_equal File.join(ENV['HOME'], '.phgx/etc/foo'), Path.setup('etc/foo', 'phgx').find()
69
69
  end
70
70
 
71
- def _test_libdir
71
+ def test_libdir
72
72
  assert File.exists? TestResource[].share.Rlib["util.R"].find :lib
73
73
  assert File.exists? TestResource[].share.Rlib["util.R"].find
74
74
  end
@@ -5,7 +5,7 @@ require 'test/unit'
5
5
 
6
6
  class TestTSV < Test::Unit::TestCase
7
7
 
8
- def _test_extend
8
+ def test_extend
9
9
  a = {
10
10
  "one" => "1",
11
11
  "two" => "2"
@@ -18,7 +18,7 @@ class TestTSV < Test::Unit::TestCase
18
18
  assert_equal "1", a["one"]
19
19
  end
20
20
 
21
- def _test_tsv
21
+ def test_tsv_1
22
22
  content =<<-EOF
23
23
  #Id ValueA ValueB OtherID
24
24
  row1 a|aa|aaa b Id1|Id2
@@ -41,7 +41,7 @@ row2 A B Id3
41
41
  end
42
42
  end
43
43
 
44
- def _test_headerless
44
+ def test_headerless
45
45
  content =<<-EOF
46
46
  row1 a|aa|aaa b Id1|Id2
47
47
  row2 A B Id3
@@ -54,7 +54,7 @@ row2 A B Id3
54
54
  end
55
55
  end
56
56
 
57
- def _test_headerless_fields
57
+ def test_headerless_fields
58
58
  content =<<-EOF
59
59
  row1 a|aa|aaa b Id1|Id2
60
60
  row2 A B Id3
@@ -68,7 +68,7 @@ row2 A B Id3
68
68
  end
69
69
  end
70
70
 
71
- def _test_tsv_persistence
71
+ def test_tsv_persistence
72
72
  content =<<-EOF
73
73
  #Id ValueA ValueB OtherID
74
74
  row1 a|aa|aaa b Id1|Id2
@@ -82,19 +82,10 @@ row2 A B Id3
82
82
  assert_equal :double, tsv.type
83
83
  assert_equal "Id", tsv.key_field
84
84
  assert TokyoCabinet::HDB === tsv
85
-
86
- FileUtils.rm filename
87
- tsv = TSV.open(filename, :sep => /\s+/, :persist => true)
88
- assert_equal ["a", "aa", "aaa"], tsv["row1"][0]
89
- assert_equal ["ValueA", "ValueB", "OtherID"], tsv.fields
90
- assert_equal :double, tsv.type
91
- assert_equal "Id", tsv.key_field
92
- assert TokyoCabinet::HDB === tsv
93
-
94
85
  end
95
86
  end
96
87
 
97
- def _test_tsv_field_selection
88
+ def test_tsv_field_selection
98
89
  content =<<-EOF
99
90
  #Id ValueA ValueB OtherID
100
91
  row1 a|aa|aaa b Id1|Id2
@@ -136,7 +127,7 @@ row2 A B Id3
136
127
  end
137
128
  end
138
129
 
139
- def _test_tsv_cast
130
+ def test_tsv_cast
140
131
  content =<<-EOF
141
132
  #Id Value
142
133
  row1 1|2|3
@@ -151,7 +142,7 @@ row2 4
151
142
  end
152
143
  end
153
144
 
154
- def _test_tsv_single
145
+ def test_tsv_single
155
146
  content =<<-EOF
156
147
  #Id Value
157
148
  row1 1
@@ -168,7 +159,7 @@ row2 4
168
159
  end
169
160
  end
170
161
 
171
- def _test_tsv_single_from_flat
162
+ def test_tsv_single_from_flat
172
163
  content =<<-EOF
173
164
  #: :type=:flat
174
165
  #Id Value
@@ -183,7 +174,7 @@ row2 4
183
174
  end
184
175
 
185
176
 
186
- def _test_tsv_serializer
177
+ def test_tsv_serializer
187
178
  content =<<-EOF
188
179
  #Id Value
189
180
  row1 1
@@ -197,7 +188,7 @@ row2 4
197
188
  end
198
189
  end
199
190
 
200
- def _test_tsv_header_options
191
+ def test_tsv_header_options
201
192
  content =<<-EOF
202
193
  #: :sep=/\\s+/
203
194
  #Id Value
@@ -211,7 +202,7 @@ row2 4
211
202
  end
212
203
  end
213
204
 
214
- def _test_tsv_fastimport
205
+ def test_tsv_fastimport
215
206
  content =<<-EOF
216
207
  #Id ValueA ValueB OtherID
217
208
  row1 a|aa|aaa b Id1|Id2
@@ -228,7 +219,7 @@ row3 AA B Id3
228
219
  end
229
220
  end
230
221
 
231
- def _test_header_type
222
+ def test_header_type
232
223
  content =<<-EOF
233
224
  #: :sep=/\\s+/#:type=:single
234
225
  #Id Value
@@ -244,7 +235,7 @@ row3 AA B Id3
244
235
 
245
236
  end
246
237
 
247
- def _test_single_cast
238
+ def test_single_cast
248
239
  content =<<-EOF
249
240
  #: :sep=/\\s+/#:type=:single#:cast=:to_i
250
241
  #Id Value
@@ -259,7 +250,7 @@ b 2
259
250
  end
260
251
  end
261
252
 
262
- def _test_key_field
253
+ def test_key_field
263
254
  content =<<-EOF
264
255
  #: :sep=/\\s+/#:type=:single
265
256
  #Id Value
@@ -275,7 +266,7 @@ b 2
275
266
  end
276
267
  end
277
268
 
278
- def _test_fix
269
+ def test_fix
279
270
  content =<<-EOF
280
271
  #: :sep=/\\s+/#:type=:single
281
272
  #Id Value
@@ -289,7 +280,7 @@ b 2
289
280
  end
290
281
  end
291
282
 
292
- def _test_select
283
+ def test_select
293
284
  content =<<-EOF
294
285
  #: :sep=/\\s+/#:type=:single
295
286
  #Id Value
@@ -303,7 +294,7 @@ b 2
303
294
  end
304
295
  end
305
296
 
306
- def _test_grep
297
+ def test_grep
307
298
  content =<<-EOF
308
299
  #: :sep=/\\s+/#:type=:single
309
300
  #Id Value
@@ -317,7 +308,7 @@ b 2
317
308
  end
318
309
  end
319
310
 
320
- def _test_grep_invert
311
+ def test_grep_invert
321
312
  content =<<-EOF
322
313
  #: :sep=/\\s+/#:type=:single
323
314
  #Id Value
@@ -331,7 +322,7 @@ b 2
331
322
  end
332
323
  end
333
324
 
334
- def _test_grep_header
325
+ def test_grep_header
335
326
  content =<<-EOF
336
327
  #: :sep=/\\s+/#:type=:single#:namespace=Test
337
328
  #Id Value
@@ -340,13 +331,13 @@ b 2
340
331
  EOF
341
332
 
342
333
  TmpFile.with_file(content) do |filename|
343
- tsv = TSV.open(filename, :key_field => "Value", :grep => "2")
334
+ tsv = TSV.open(filename, :key_field => "Value", :grep => "#\\|2")
344
335
  assert(! tsv.include?("1"))
345
336
  assert(tsv.include?("2"))
346
337
  end
347
338
  end
348
339
 
349
- def _test_json
340
+ def test_json
350
341
  content =<<-EOF
351
342
  #: :sep=/\\s+/#:type=:single
352
343
  #Id Value
@@ -361,7 +352,7 @@ b 2
361
352
 
362
353
  end
363
354
 
364
- def _test_flat_no_merge
355
+ def test_flat_no_merge
365
356
  content =<<-EOF
366
357
  #Id ValueA ValueB OtherID
367
358
  row1 a|aa|aaa b Id1|Id2
@@ -377,7 +368,7 @@ row2 A B Id3
377
368
  end
378
369
  end
379
370
 
380
- def _test_flat_merge
371
+ def test_flat_merge
381
372
  content =<<-EOF
382
373
  #Id ValueA ValueB OtherID
383
374
  row1 a|aa|aaa b Id1|Id2
@@ -404,7 +395,7 @@ row2 b bbb bbbb bb
404
395
  end
405
396
  end
406
397
 
407
- def _test_flat_key
398
+ def test_flat_key
408
399
  content =<<-EOF
409
400
  #Id ValueA
410
401
  row1 a aa aaa
@@ -420,7 +411,7 @@ row2 b bbb bbbb bb aa
420
411
 
421
412
 
422
413
 
423
- def _test_zipped
414
+ def test_zipped
424
415
  content =<<-EOF
425
416
  #Id ValueA ValueB
426
417
  row1 a|aa|aaa b|bb|bbb
@@ -433,7 +424,7 @@ row2 a|aa|aaa c|cc|ccc
433
424
  end
434
425
  end
435
426
 
436
- def _test_named_array_key
427
+ def test_named_array_key
437
428
  content =<<-EOF
438
429
  #Id ValueA ValueB OtherID
439
430
  row1 a|aa|aaa b Id1|Id2
@@ -448,7 +439,7 @@ row2 A B Id3
448
439
 
449
440
  end
450
441
 
451
- def _test_unnamed_key
442
+ def test_unnamed_key
452
443
  content =<<-EOF
453
444
  row1 a|aa|aaa b Id1|Id2
454
445
  row2 A B Id3
@@ -461,7 +452,7 @@ row2 A B Id3
461
452
 
462
453
  end
463
454
 
464
- def _test_float_array
455
+ def test_float_array
465
456
  content =<<-EOF
466
457
  #Id ValueA ValueB OtherID
467
458
  row1 0.2 0.3 0
@@ -476,7 +467,7 @@ row2 0.1 4.5 0
476
467
 
477
468
  end
478
469
 
479
- def _test_flat_field_select
470
+ def test_flat_field_select
480
471
  content =<<-EOF
481
472
  #: :type=:flat
482
473
  #Id Value
@@ -485,12 +476,11 @@ row2 b bb bbb
485
476
  EOF
486
477
 
487
478
  TmpFile.with_file(content) do |filename|
488
- puts TSV.open(filename, :sep => /\s+/, :key_field => "Value").to_s
489
479
  assert TSV.open(filename, :sep => /\s+/, :key_field => "Value").include? "aa"
490
480
  end
491
481
  end
492
482
 
493
- def _test_flat2
483
+ def test_flat2
494
484
  content =<<-EOF
495
485
  #: :type=:flat
496
486
  #Id Value
@@ -507,7 +497,7 @@ row2 A|AA|AAA
507
497
  end
508
498
 
509
499
 
510
- def _test_tsv_flat_double
500
+ def test_tsv_flat_double
511
501
  content =<<-EOF
512
502
  #Id ValueA ValueB OtherID
513
503
  row1 a|aa|aaa b Id1|Id2
@@ -522,7 +512,7 @@ row2 A B Id3
522
512
  end
523
513
  end
524
514
 
525
- def _test_flat2single
515
+ def test_flat2single
526
516
  content =<<-EOF
527
517
  #: :type=:flat
528
518
  #Id Value
@@ -536,7 +526,7 @@ row2 A AA AAA
536
526
 
537
527
  end
538
528
 
539
- def _test_shard
529
+ def test_shard
540
530
  shard_function = Proc.new do |key|
541
531
  key[-1]
542
532
  end
@@ -5,8 +5,7 @@ require 'rbbt/tsv/parallel'
5
5
  class TestTSVParallelThrough < Test::Unit::TestCase
6
6
 
7
7
  def test_pthrough
8
- require 'rbbt/sources/organism'
9
- tsv = Organism.identifiers("Hsa").tsv :unnamed => true, :persist => false, :fields => ["Associated Gene Name"]
8
+ tsv = datafile_test('identifiers').tsv :unnamed => true, :persist => false, :fields => ["Associated Gene Name"]
10
9
 
11
10
  h = {}
12
11
  tsv.monitor = true
@@ -21,8 +20,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase
21
20
 
22
21
 
23
22
  def test_ppthrough
24
- require 'rbbt/sources/organism'
25
- tsv = Organism.identifiers("Hsa").tsv :unnamed => true, :persist => false, :fields => ["Associated Gene Name"]
23
+ tsv = datafile_test('identifiers').tsv :unnamed => true, :persist => false, :fields => ["Associated Gene Name"]
26
24
 
27
25
  h = {}
28
26