rbbt-util 5.14.33 → 5.14.34
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/bin/rbbt +2 -0
 - data/lib/rbbt/association/database.rb +153 -0
 - data/lib/rbbt/association/index.rb +89 -20
 - data/lib/rbbt/association/open.rb +37 -0
 - data/lib/rbbt/association/util.rb +133 -0
 - data/lib/rbbt/association.rb +1 -380
 - data/lib/rbbt/entity/identifiers.rb +106 -0
 - data/lib/rbbt/entity.rb +1 -0
 - data/lib/rbbt/knowledge_base/entity.rb +107 -0
 - data/lib/rbbt/knowledge_base/query.rb +83 -0
 - data/lib/rbbt/knowledge_base/registry.rb +106 -0
 - data/lib/rbbt/knowledge_base/syndicate.rb +22 -0
 - data/lib/rbbt/knowledge_base.rb +6 -359
 - data/lib/rbbt/tsv/accessor.rb +4 -0
 - data/lib/rbbt/tsv/change_id.rb +119 -0
 - data/lib/rbbt/tsv/index.rb +6 -2
 - data/lib/rbbt/tsv/parser.rb +7 -5
 - data/lib/rbbt/tsv/util.rb +1 -1
 - data/lib/rbbt/tsv.rb +2 -1
 - data/lib/rbbt/util/R/model.rb +1 -1
 - data/lib/rbbt/util/log.rb +2 -2
 - data/lib/rbbt/util/misc/bgzf.rb +2 -0
 - data/lib/rbbt/util/misc/inspect.rb +1 -1
 - data/lib/rbbt-util.rb +11 -7
 - data/lib/rbbt.rb +0 -1
 - data/share/rbbt_commands/app/start +1 -1
 - data/share/rbbt_commands/tsv/change_id +2 -2
 - data/test/rbbt/association/test_database.rb +61 -0
 - data/test/rbbt/association/test_index.rb +67 -22
 - data/test/rbbt/association/test_open.rb +68 -0
 - data/test/rbbt/association/test_util.rb +108 -0
 - data/test/rbbt/entity/test_identifiers.rb +40 -0
 - data/test/rbbt/knowledge_base/test_entity.rb +0 -0
 - data/test/rbbt/knowledge_base/test_query.rb +45 -0
 - data/test/rbbt/knowledge_base/test_registry.rb +52 -0
 - data/test/rbbt/test_association.rb +3 -3
 - data/test/rbbt/test_knowledge_base.rb +79 -51
 - data/test/rbbt/test_monitor.rb +0 -2
 - data/test/rbbt/test_packed_index.rb +1 -1
 - data/test/rbbt/test_resource.rb +6 -6
 - data/test/rbbt/test_tsv.rb +34 -44
 - data/test/rbbt/tsv/parallel/test_through.rb +2 -4
 - data/test/rbbt/tsv/parallel/test_traverse.rb +30 -28
 - data/test/rbbt/tsv/test_change_id.rb +10 -0
 - data/test/rbbt/util/R/test_model.rb +9 -10
 - data/test/rbbt/util/test_misc.rb +1 -1
 - data/test/test_helper.rb +4 -1
 - metadata +24 -2
 
| 
         @@ -4,72 +4,100 @@ require 'test/unit' 
     | 
|
| 
       4 
4 
     | 
    
         | 
| 
       5 
5 
     | 
    
         
             
            require 'rbbt/workflow'
         
     | 
| 
       6 
6 
     | 
    
         
             
            require 'rbbt/entity'
         
     | 
| 
      
 7 
     | 
    
         
            +
            require 'rbbt/entity/identifiers'
         
     | 
| 
       7 
8 
     | 
    
         | 
| 
       8 
9 
     | 
    
         
             
            require 'rbbt/association'
         
     | 
| 
       9 
10 
     | 
    
         
             
            require 'rbbt/knowledge_base'
         
     | 
| 
       10 
11 
     | 
    
         | 
| 
       11 
     | 
    
         
            -
             
     | 
| 
       12 
     | 
    
         
            -
             
     | 
| 
       13 
     | 
    
         
            -
             
     | 
| 
       14 
     | 
    
         
            -
             
     | 
| 
       15 
     | 
    
         
            -
             
     | 
| 
       16 
     | 
    
         
            -
             
     | 
| 
       17 
     | 
    
         
            -
             
     | 
| 
       18 
     | 
    
         
            -
             
     | 
| 
       19 
     | 
    
         
            -
             
     | 
| 
       20 
     | 
    
         
            -
             
     | 
| 
       21 
     | 
    
         
            -
             
     | 
| 
       22 
     | 
    
         
            -
             
     | 
| 
       23 
     | 
    
         
            -
             
     | 
| 
       24 
     | 
    
         
            -
             
     | 
| 
       25 
     | 
    
         
            -
             
     | 
| 
       26 
     | 
    
         
            -
             
     | 
| 
       27 
     | 
    
         
            -
                  kb.register :pina, Pina.protein_protein, :target => "Interactor UniProt/SwissProt Accession=~UniProt/SwissProt Accession"
         
     | 
| 
       28 
     | 
    
         
            -
                  assert_equal [Gene], kb.entity_types
         
     | 
| 
       29 
     | 
    
         
            -
                  assert kb.all_databases.include? :pina
         
     | 
| 
      
 12 
     | 
    
         
            +
            require 'rbbt/sources/organism'
         
     | 
| 
      
 13 
     | 
    
         
            +
            require 'rbbt/sources/tfacts'
         
     | 
| 
      
 14 
     | 
    
         
            +
            require 'rbbt/sources/kegg'
         
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
      
 16 
     | 
    
         
            +
            module Gene
         
     | 
| 
      
 17 
     | 
    
         
            +
              extend Entity
         
     | 
| 
      
 18 
     | 
    
         
            +
              add_identifiers Organism.identifiers("NAMESPACE"), "Ensembl Gene ID", "Associated Gene Name"
         
     | 
| 
      
 19 
     | 
    
         
            +
              add_identifiers KEGG.identifiers
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
              property :follow => :single do |kb,name,annotate=nil|
         
     | 
| 
      
 22 
     | 
    
         
            +
                if annotate.nil? or annotate
         
     | 
| 
      
 23 
     | 
    
         
            +
                  l = kb.children(name, self).target_entity
         
     | 
| 
      
 24 
     | 
    
         
            +
                  self.annotate l if annotate and kb.source(name) == format
         
     | 
| 
      
 25 
     | 
    
         
            +
                  l
         
     | 
| 
      
 26 
     | 
    
         
            +
                else
         
     | 
| 
      
 27 
     | 
    
         
            +
                  kb._children(name, self).collect{|v| v.partition("~").last }
         
     | 
| 
       30 
28 
     | 
    
         
             
                end
         
     | 
| 
       31 
29 
     | 
    
         
             
              end
         
     | 
| 
       32 
30 
     | 
    
         | 
| 
       33 
     | 
    
         
            -
               
     | 
| 
       34 
     | 
    
         
            -
                 
     | 
| 
       35 
     | 
    
         
            -
                   
     | 
| 
       36 
     | 
    
         
            -
                   
     | 
| 
       37 
     | 
    
         
            -
             
     | 
| 
       38 
     | 
    
         
            -
             
     | 
| 
       39 
     | 
    
         
            -
             
     | 
| 
       40 
     | 
    
         
            -
                  assert kb.get_database('nature', :persist => false).slice("Ensembl Gene ID").values.flatten.uniq.length > 10
         
     | 
| 
      
 31 
     | 
    
         
            +
              property :backtrack => :single do |kb,name,annotate=nil|
         
     | 
| 
      
 32 
     | 
    
         
            +
                if annotate.nil? or annotate
         
     | 
| 
      
 33 
     | 
    
         
            +
                  l = kb.parents(name, self).target_entity
         
     | 
| 
      
 34 
     | 
    
         
            +
                  self.annotate l if annotate and kb.target(name) == format
         
     | 
| 
      
 35 
     | 
    
         
            +
                  l
         
     | 
| 
      
 36 
     | 
    
         
            +
                else
         
     | 
| 
      
 37 
     | 
    
         
            +
                  kb._parents(name, self).collect{|v| v.partition("~").last }
         
     | 
| 
       41 
38 
     | 
    
         
             
                end
         
     | 
| 
       42 
39 
     | 
    
         
             
              end
         
     | 
| 
       43 
40 
     | 
    
         | 
| 
       44 
     | 
    
         
            -
               
     | 
| 
       45 
     | 
    
         
            -
                 
     | 
| 
       46 
     | 
    
         
            -
                   
     | 
| 
       47 
     | 
    
         
            -
                  kb. 
     | 
| 
       48 
     | 
    
         
            -
             
     | 
| 
       49 
     | 
    
         
            -
                   
     | 
| 
       50 
     | 
    
         
            -
             
     | 
| 
      
 41 
     | 
    
         
            +
              property :expand => :single do |kb,name,annotate=nil|
         
     | 
| 
      
 42 
     | 
    
         
            +
                if annotate.nil? or annotate
         
     | 
| 
      
 43 
     | 
    
         
            +
                  n = kb.neighbours(name, self)
         
     | 
| 
      
 44 
     | 
    
         
            +
                  if kb.source(name) == kb.target(name) 
         
     | 
| 
      
 45 
     | 
    
         
            +
                    self.annotate n.collect{|k,v| v.target}.flatten
         
     | 
| 
      
 46 
     | 
    
         
            +
                  else
         
     | 
| 
      
 47 
     | 
    
         
            +
                    n.collect{|k,v| v.target_entity.to_a}.flatten
         
     | 
| 
      
 48 
     | 
    
         
            +
                  end
         
     | 
| 
      
 49 
     | 
    
         
            +
                else
         
     | 
| 
      
 50 
     | 
    
         
            +
                  n = kb._neighbours(name, self)
         
     | 
| 
      
 51 
     | 
    
         
            +
                  n.values.flatten.collect{|v| v.partition("~").last}
         
     | 
| 
       51 
52 
     | 
    
         
             
                end
         
     | 
| 
       52 
53 
     | 
    
         
             
              end
         
     | 
| 
      
 54 
     | 
    
         
            +
            end
         
     | 
| 
       53 
55 
     | 
    
         | 
| 
       54 
     | 
    
         
            -
              def test_global
         
     | 
| 
       55 
     | 
    
         
            -
                assert @kb.all_databases.include? "pina"
         
     | 
| 
       56 
     | 
    
         
            -
              end
         
     | 
| 
       57 
     | 
    
         
            -
             
     | 
| 
       58 
     | 
    
         
            -
              def test_subset
         
     | 
| 
       59 
     | 
    
         
            -
                gene = "TP53"
         
     | 
| 
       60 
     | 
    
         
            -
                found = Genomics.knowledge_base.identify :pina, gene
         
     | 
| 
       61 
     | 
    
         
            -
                p53_interactors = Genomics.knowledge_base.children(:pina, found).target_entity 
         
     | 
| 
       62 
     | 
    
         
            -
             
     | 
| 
       63 
     | 
    
         
            -
                assert Genomics.knowledge_base.subset(:pina, {"Gene" => p53_interactors}).target_entity.name.include? "MDM2"
         
     | 
| 
       64 
     | 
    
         
            -
              end
         
     | 
| 
       65 
56 
     | 
    
         | 
| 
       66 
     | 
    
         
            -
             
     | 
| 
       67 
     | 
    
         
            -
                kb = KnowledgeBase.new Rbbt.tmp.test.kb2, "Hsa/jan2013"
         
     | 
| 
       68 
     | 
    
         
            -
                kb.syndicate :genomics, @kb
         
     | 
| 
      
 57 
     | 
    
         
            +
            class TestKnowledgeBase < Test::Unit::TestCase
         
     | 
| 
       69 
58 
     | 
    
         | 
| 
       70 
     | 
    
         
            -
             
     | 
| 
       71 
     | 
    
         
            -
                 
     | 
| 
       72 
     | 
    
         
            -
                 
     | 
| 
      
 59 
     | 
    
         
            +
              def test_knowledge_base
         
     | 
| 
      
 60 
     | 
    
         
            +
                organism = Organism.default_code("Hsa")
         
     | 
| 
      
 61 
     | 
    
         
            +
                TmpFile.with_file do |tmpdir|
         
     | 
| 
      
 62 
     | 
    
         
            +
                  kb = KnowledgeBase.new tmpdir, Organism.default_code("Hsa")
         
     | 
| 
      
 63 
     | 
    
         
            +
                  kb.format = {"Gene" => "Ensembl Gene ID"}
         
     | 
| 
      
 64 
     | 
    
         
            +
             
     | 
| 
      
 65 
     | 
    
         
            +
                  kb.register :tfacts, TFacts.regulators, :source =>"=~Associated Gene Name"
         
     | 
| 
      
 66 
     | 
    
         
            +
             
     | 
| 
      
 67 
     | 
    
         
            +
                  assert_equal "Ensembl Gene ID", kb.get_database(:tfacts).key_field
         
     | 
| 
      
 68 
     | 
    
         
            +
             
     | 
| 
      
 69 
     | 
    
         
            +
                  kb.register :kegg, KEGG.gene_pathway, :source_format => "Ensembl Gene ID"
         
     | 
| 
      
 70 
     | 
    
         
            +
                  assert_match "Ensembl Gene ID", kb.get_database(:kegg).key_field
         
     | 
| 
      
 71 
     | 
    
         
            +
             
     | 
| 
      
 72 
     | 
    
         
            +
                  gene = Gene.setup("TP53", "Associated Gene Name", organism)
         
     | 
| 
      
 73 
     | 
    
         
            +
                  assert_equal "TP53", gene.name
         
     | 
| 
      
 74 
     | 
    
         
            +
                  assert_equal "ENSG00000141510", gene.ensembl
         
     | 
| 
      
 75 
     | 
    
         
            +
             
     | 
| 
      
 76 
     | 
    
         
            +
                  downstream = gene.follow kb, :tfacts
         
     | 
| 
      
 77 
     | 
    
         
            +
                  upstream = gene.backtrack kb, :tfacts
         
     | 
| 
      
 78 
     | 
    
         
            +
                  close = gene.expand kb, :tfacts
         
     | 
| 
      
 79 
     | 
    
         
            +
             
     | 
| 
      
 80 
     | 
    
         
            +
                  assert downstream.length < downstream.follow(kb, :tfacts,false).flatten.length
         
     | 
| 
      
 81 
     | 
    
         
            +
                  assert downstream.follow(kb, :tfacts,false).flatten.length < Annotated.flatten(downstream.follow(kb, :tfacts)).follow(kb, :tfacts).flatten.length
         
     | 
| 
      
 82 
     | 
    
         
            +
             
     | 
| 
      
 83 
     | 
    
         
            +
                  Misc.benchmark(50) do
         
     | 
| 
      
 84 
     | 
    
         
            +
                    downstream.follow(kb, :tfacts,false)
         
     | 
| 
      
 85 
     | 
    
         
            +
                    downstream.backtrack(kb, :tfacts,false)
         
     | 
| 
      
 86 
     | 
    
         
            +
                    downstream.expand(kb, :tfacts,false)
         
     | 
| 
      
 87 
     | 
    
         
            +
                  end
         
     | 
| 
      
 88 
     | 
    
         
            +
             
     | 
| 
      
 89 
     | 
    
         
            +
                  Misc.benchmark(50) do
         
     | 
| 
      
 90 
     | 
    
         
            +
                    downstream.follow(kb, :tfacts)
         
     | 
| 
      
 91 
     | 
    
         
            +
                    downstream.backtrack(kb, :tfacts)
         
     | 
| 
      
 92 
     | 
    
         
            +
                    downstream.expand(kb, :tfacts)
         
     | 
| 
      
 93 
     | 
    
         
            +
                  end
         
     | 
| 
      
 94 
     | 
    
         
            +
                  
         
     | 
| 
      
 95 
     | 
    
         
            +
                  Misc.benchmark(50) do
         
     | 
| 
      
 96 
     | 
    
         
            +
                    downstream.follow(kb, :tfacts, true)
         
     | 
| 
      
 97 
     | 
    
         
            +
                    downstream.backtrack(kb, :tfacts, true)
         
     | 
| 
      
 98 
     | 
    
         
            +
                    downstream.expand(kb, :tfacts, true)
         
     | 
| 
      
 99 
     | 
    
         
            +
                  end
         
     | 
| 
      
 100 
     | 
    
         
            +
                end
         
     | 
| 
       73 
101 
     | 
    
         
             
              end
         
     | 
| 
       74 
102 
     | 
    
         
             
            end
         
     | 
| 
       75 
103 
     | 
    
         | 
    
        data/test/rbbt/test_monitor.rb
    CHANGED
    
    
| 
         @@ -2,7 +2,7 @@ require File.join(File.expand_path(File.dirname(__FILE__)), '..', 'test_helper.r 
     | 
|
| 
       2 
2 
     | 
    
         
             
            require 'rbbt/packed_index'
         
     | 
| 
       3 
3 
     | 
    
         | 
| 
       4 
4 
     | 
    
         
             
            class TestPackedIndex < Test::Unit::TestCase
         
     | 
| 
       5 
     | 
    
         
            -
              def  
     | 
| 
      
 5 
     | 
    
         
            +
              def _test_packed_index
         
     | 
| 
       6 
6 
     | 
    
         | 
| 
       7 
7 
     | 
    
         
             
                TmpFile.with_file do |tmpfile|
         
     | 
| 
       8 
8 
     | 
    
         
             
                  pi = PackedIndex.new tmpfile, true, %w(i i 23s f f f f f)
         
     | 
    
        data/test/rbbt/test_resource.rb
    CHANGED
    
    | 
         @@ -30,23 +30,23 @@ class TestTSV < Test::Unit::TestCase 
     | 
|
| 
       30 
30 
     | 
    
         
             
                  tsv = {"a" => 1, "b" => 2}
         
     | 
| 
       31 
31 
     | 
    
         
             
                  TSV.setup(tsv, :key_field => "Letter", :fields => ["Value"], :type => :single)
         
     | 
| 
       32 
32 
     | 
    
         
             
                end
         
     | 
| 
       33 
     | 
    
         
            -
                 
     | 
| 
      
 33 
     | 
    
         
            +
                assert TSV === TestResource.tmp.test.test_tsv.tsv
         
     | 
| 
       34 
34 
     | 
    
         
             
              end
         
     | 
| 
       35 
35 
     | 
    
         | 
| 
       36 
     | 
    
         
            -
              def  
     | 
| 
      
 36 
     | 
    
         
            +
              def test_rake
         
     | 
| 
       37 
37 
     | 
    
         
             
                TestResource.tmp.test.work.footest.foo.read == "TEST"
         
     | 
| 
       38 
38 
     | 
    
         
             
                assert TestResource.tmp.test.work.footest.foo.read == "TEST"
         
     | 
| 
       39 
39 
     | 
    
         
             
              end
         
     | 
| 
       40 
40 
     | 
    
         | 
| 
       41 
     | 
    
         
            -
              def  
     | 
| 
      
 41 
     | 
    
         
            +
              def test_proc
         
     | 
| 
       42 
42 
     | 
    
         
             
                assert TestResource.tmp.test.proc.read == "PROC TEST"
         
     | 
| 
       43 
43 
     | 
    
         
             
              end
         
     | 
| 
       44 
44 
     | 
    
         | 
| 
       45 
     | 
    
         
            -
              def  
     | 
| 
      
 45 
     | 
    
         
            +
              def test_string
         
     | 
| 
       46 
46 
     | 
    
         
             
                assert TestResource.tmp.test.string.read == "TEST"
         
     | 
| 
       47 
47 
     | 
    
         
             
              end
         
     | 
| 
       48 
48 
     | 
    
         | 
| 
       49 
     | 
    
         
            -
              def  
     | 
| 
      
 49 
     | 
    
         
            +
              def test_url
         
     | 
| 
       50 
50 
     | 
    
         
             
                assert TestResource[].tmp.test.google.read =~ /google/
         
     | 
| 
       51 
51 
     | 
    
         
             
              end
         
     | 
| 
       52 
52 
     | 
    
         | 
| 
         @@ -68,7 +68,7 @@ class TestTSV < Test::Unit::TestCase 
     | 
|
| 
       68 
68 
     | 
    
         
             
                assert_equal File.join(ENV['HOME'], '.phgx/etc/foo'), Path.setup('etc/foo', 'phgx').find()
         
     | 
| 
       69 
69 
     | 
    
         
             
              end
         
     | 
| 
       70 
70 
     | 
    
         | 
| 
       71 
     | 
    
         
            -
              def  
     | 
| 
      
 71 
     | 
    
         
            +
              def test_libdir
         
     | 
| 
       72 
72 
     | 
    
         
             
                assert File.exists? TestResource[].share.Rlib["util.R"].find :lib
         
     | 
| 
       73 
73 
     | 
    
         
             
                assert File.exists? TestResource[].share.Rlib["util.R"].find 
         
     | 
| 
       74 
74 
     | 
    
         
             
              end
         
     | 
    
        data/test/rbbt/test_tsv.rb
    CHANGED
    
    | 
         @@ -5,7 +5,7 @@ require 'test/unit' 
     | 
|
| 
       5 
5 
     | 
    
         | 
| 
       6 
6 
     | 
    
         
             
            class TestTSV < Test::Unit::TestCase
         
     | 
| 
       7 
7 
     | 
    
         | 
| 
       8 
     | 
    
         
            -
              def  
     | 
| 
      
 8 
     | 
    
         
            +
              def test_extend
         
     | 
| 
       9 
9 
     | 
    
         
             
                a = {
         
     | 
| 
       10 
10 
     | 
    
         
             
                  "one" => "1", 
         
     | 
| 
       11 
11 
     | 
    
         
             
                  "two" => "2"
         
     | 
| 
         @@ -18,7 +18,7 @@ class TestTSV < Test::Unit::TestCase 
     | 
|
| 
       18 
18 
     | 
    
         
             
                assert_equal "1", a["one"]
         
     | 
| 
       19 
19 
     | 
    
         
             
              end
         
     | 
| 
       20 
20 
     | 
    
         | 
| 
       21 
     | 
    
         
            -
              def  
     | 
| 
      
 21 
     | 
    
         
            +
              def test_tsv_1
         
     | 
| 
       22 
22 
     | 
    
         
             
                content =<<-EOF
         
     | 
| 
       23 
23 
     | 
    
         
             
            #Id    ValueA    ValueB    OtherID
         
     | 
| 
       24 
24 
     | 
    
         
             
            row1    a|aa|aaa    b    Id1|Id2
         
     | 
| 
         @@ -41,7 +41,7 @@ row2    A    B    Id3 
     | 
|
| 
       41 
41 
     | 
    
         
             
                end
         
     | 
| 
       42 
42 
     | 
    
         
             
              end
         
     | 
| 
       43 
43 
     | 
    
         | 
| 
       44 
     | 
    
         
            -
              def  
     | 
| 
      
 44 
     | 
    
         
            +
              def test_headerless
         
     | 
| 
       45 
45 
     | 
    
         
             
                content =<<-EOF
         
     | 
| 
       46 
46 
     | 
    
         
             
            row1    a|aa|aaa    b    Id1|Id2
         
     | 
| 
       47 
47 
     | 
    
         
             
            row2    A    B    Id3
         
     | 
| 
         @@ -54,7 +54,7 @@ row2    A    B    Id3 
     | 
|
| 
       54 
54 
     | 
    
         
             
                end
         
     | 
| 
       55 
55 
     | 
    
         
             
              end
         
     | 
| 
       56 
56 
     | 
    
         | 
| 
       57 
     | 
    
         
            -
              def  
     | 
| 
      
 57 
     | 
    
         
            +
              def test_headerless_fields
         
     | 
| 
       58 
58 
     | 
    
         
             
                content =<<-EOF
         
     | 
| 
       59 
59 
     | 
    
         
             
            row1    a|aa|aaa    b    Id1|Id2
         
     | 
| 
       60 
60 
     | 
    
         
             
            row2    A    B    Id3
         
     | 
| 
         @@ -68,7 +68,7 @@ row2    A    B    Id3 
     | 
|
| 
       68 
68 
     | 
    
         
             
                end
         
     | 
| 
       69 
69 
     | 
    
         
             
              end
         
     | 
| 
       70 
70 
     | 
    
         | 
| 
       71 
     | 
    
         
            -
              def  
     | 
| 
      
 71 
     | 
    
         
            +
              def test_tsv_persistence
         
     | 
| 
       72 
72 
     | 
    
         
             
                content =<<-EOF
         
     | 
| 
       73 
73 
     | 
    
         
             
            #Id    ValueA    ValueB    OtherID
         
     | 
| 
       74 
74 
     | 
    
         
             
            row1    a|aa|aaa    b    Id1|Id2
         
     | 
| 
         @@ -82,19 +82,10 @@ row2    A    B    Id3 
     | 
|
| 
       82 
82 
     | 
    
         
             
                  assert_equal :double, tsv.type
         
     | 
| 
       83 
83 
     | 
    
         
             
                  assert_equal "Id", tsv.key_field
         
     | 
| 
       84 
84 
     | 
    
         
             
                  assert TokyoCabinet::HDB === tsv
         
     | 
| 
       85 
     | 
    
         
            -
             
     | 
| 
       86 
     | 
    
         
            -
                  FileUtils.rm filename
         
     | 
| 
       87 
     | 
    
         
            -
                  tsv = TSV.open(filename, :sep => /\s+/, :persist =>  true)
         
     | 
| 
       88 
     | 
    
         
            -
                  assert_equal ["a", "aa", "aaa"], tsv["row1"][0]
         
     | 
| 
       89 
     | 
    
         
            -
                  assert_equal ["ValueA", "ValueB", "OtherID"], tsv.fields
         
     | 
| 
       90 
     | 
    
         
            -
                  assert_equal :double, tsv.type
         
     | 
| 
       91 
     | 
    
         
            -
                  assert_equal "Id", tsv.key_field
         
     | 
| 
       92 
     | 
    
         
            -
                  assert TokyoCabinet::HDB === tsv
         
     | 
| 
       93 
     | 
    
         
            -
             
     | 
| 
       94 
85 
     | 
    
         
             
                end
         
     | 
| 
       95 
86 
     | 
    
         
             
              end
         
     | 
| 
       96 
87 
     | 
    
         | 
| 
       97 
     | 
    
         
            -
              def  
     | 
| 
      
 88 
     | 
    
         
            +
              def test_tsv_field_selection
         
     | 
| 
       98 
89 
     | 
    
         
             
                content =<<-EOF
         
     | 
| 
       99 
90 
     | 
    
         
             
            #Id    ValueA    ValueB    OtherID
         
     | 
| 
       100 
91 
     | 
    
         
             
            row1    a|aa|aaa    b    Id1|Id2
         
     | 
| 
         @@ -136,7 +127,7 @@ row2    A    B    Id3 
     | 
|
| 
       136 
127 
     | 
    
         
             
                end
         
     | 
| 
       137 
128 
     | 
    
         
             
              end
         
     | 
| 
       138 
129 
     | 
    
         | 
| 
       139 
     | 
    
         
            -
              def  
     | 
| 
      
 130 
     | 
    
         
            +
              def test_tsv_cast
         
     | 
| 
       140 
131 
     | 
    
         
             
                content =<<-EOF
         
     | 
| 
       141 
132 
     | 
    
         
             
            #Id    Value
         
     | 
| 
       142 
133 
     | 
    
         
             
            row1    1|2|3
         
     | 
| 
         @@ -151,7 +142,7 @@ row2    4 
     | 
|
| 
       151 
142 
     | 
    
         
             
                end
         
     | 
| 
       152 
143 
     | 
    
         
             
              end
         
     | 
| 
       153 
144 
     | 
    
         | 
| 
       154 
     | 
    
         
            -
              def  
     | 
| 
      
 145 
     | 
    
         
            +
              def test_tsv_single
         
     | 
| 
       155 
146 
     | 
    
         
             
                content =<<-EOF
         
     | 
| 
       156 
147 
     | 
    
         
             
            #Id    Value
         
     | 
| 
       157 
148 
     | 
    
         
             
            row1    1
         
     | 
| 
         @@ -168,7 +159,7 @@ row2    4 
     | 
|
| 
       168 
159 
     | 
    
         
             
                end
         
     | 
| 
       169 
160 
     | 
    
         
             
              end
         
     | 
| 
       170 
161 
     | 
    
         | 
| 
       171 
     | 
    
         
            -
              def  
     | 
| 
      
 162 
     | 
    
         
            +
              def test_tsv_single_from_flat
         
     | 
| 
       172 
163 
     | 
    
         
             
                content =<<-EOF
         
     | 
| 
       173 
164 
     | 
    
         
             
            #: :type=:flat
         
     | 
| 
       174 
165 
     | 
    
         
             
            #Id    Value
         
     | 
| 
         @@ -183,7 +174,7 @@ row2    4 
     | 
|
| 
       183 
174 
     | 
    
         
             
              end
         
     | 
| 
       184 
175 
     | 
    
         | 
| 
       185 
176 
     | 
    
         | 
| 
       186 
     | 
    
         
            -
              def  
     | 
| 
      
 177 
     | 
    
         
            +
              def test_tsv_serializer
         
     | 
| 
       187 
178 
     | 
    
         
             
                content =<<-EOF
         
     | 
| 
       188 
179 
     | 
    
         
             
            #Id    Value
         
     | 
| 
       189 
180 
     | 
    
         
             
            row1    1
         
     | 
| 
         @@ -197,7 +188,7 @@ row2    4 
     | 
|
| 
       197 
188 
     | 
    
         
             
                end
         
     | 
| 
       198 
189 
     | 
    
         
             
              end
         
     | 
| 
       199 
190 
     | 
    
         | 
| 
       200 
     | 
    
         
            -
              def  
     | 
| 
      
 191 
     | 
    
         
            +
              def test_tsv_header_options
         
     | 
| 
       201 
192 
     | 
    
         
             
                content =<<-EOF
         
     | 
| 
       202 
193 
     | 
    
         
             
            #: :sep=/\\s+/
         
     | 
| 
       203 
194 
     | 
    
         
             
            #Id Value
         
     | 
| 
         @@ -211,7 +202,7 @@ row2    4 
     | 
|
| 
       211 
202 
     | 
    
         
             
                end
         
     | 
| 
       212 
203 
     | 
    
         
             
              end
         
     | 
| 
       213 
204 
     | 
    
         | 
| 
       214 
     | 
    
         
            -
              def  
     | 
| 
      
 205 
     | 
    
         
            +
              def test_tsv_fastimport
         
     | 
| 
       215 
206 
     | 
    
         
             
                content =<<-EOF
         
     | 
| 
       216 
207 
     | 
    
         
             
            #Id    ValueA    ValueB    OtherID
         
     | 
| 
       217 
208 
     | 
    
         
             
            row1    a|aa|aaa    b    Id1|Id2
         
     | 
| 
         @@ -228,7 +219,7 @@ row3    AA    B    Id3 
     | 
|
| 
       228 
219 
     | 
    
         
             
                end
         
     | 
| 
       229 
220 
     | 
    
         
             
              end
         
     | 
| 
       230 
221 
     | 
    
         | 
| 
       231 
     | 
    
         
            -
              def  
     | 
| 
      
 222 
     | 
    
         
            +
              def test_header_type
         
     | 
| 
       232 
223 
     | 
    
         
             
                content =<<-EOF
         
     | 
| 
       233 
224 
     | 
    
         
             
            #: :sep=/\\s+/#:type=:single
         
     | 
| 
       234 
225 
     | 
    
         
             
            #Id Value
         
     | 
| 
         @@ -244,7 +235,7 @@ row3    AA    B    Id3 
     | 
|
| 
       244 
235 
     | 
    
         | 
| 
       245 
236 
     | 
    
         
             
              end
         
     | 
| 
       246 
237 
     | 
    
         | 
| 
       247 
     | 
    
         
            -
              def  
     | 
| 
      
 238 
     | 
    
         
            +
              def test_single_cast
         
     | 
| 
       248 
239 
     | 
    
         
             
                content =<<-EOF
         
     | 
| 
       249 
240 
     | 
    
         
             
            #: :sep=/\\s+/#:type=:single#:cast=:to_i
         
     | 
| 
       250 
241 
     | 
    
         
             
            #Id Value
         
     | 
| 
         @@ -259,7 +250,7 @@ b 2 
     | 
|
| 
       259 
250 
     | 
    
         
             
                end
         
     | 
| 
       260 
251 
     | 
    
         
             
              end
         
     | 
| 
       261 
252 
     | 
    
         | 
| 
       262 
     | 
    
         
            -
              def  
     | 
| 
      
 253 
     | 
    
         
            +
              def test_key_field
         
     | 
| 
       263 
254 
     | 
    
         
             
                content =<<-EOF
         
     | 
| 
       264 
255 
     | 
    
         
             
            #: :sep=/\\s+/#:type=:single
         
     | 
| 
       265 
256 
     | 
    
         
             
            #Id Value
         
     | 
| 
         @@ -275,7 +266,7 @@ b 2 
     | 
|
| 
       275 
266 
     | 
    
         
             
                end
         
     | 
| 
       276 
267 
     | 
    
         
             
              end
         
     | 
| 
       277 
268 
     | 
    
         | 
| 
       278 
     | 
    
         
            -
              def  
     | 
| 
      
 269 
     | 
    
         
            +
              def test_fix
         
     | 
| 
       279 
270 
     | 
    
         
             
                content =<<-EOF
         
     | 
| 
       280 
271 
     | 
    
         
             
            #: :sep=/\\s+/#:type=:single
         
     | 
| 
       281 
272 
     | 
    
         
             
            #Id Value
         
     | 
| 
         @@ -289,7 +280,7 @@ b 2 
     | 
|
| 
       289 
280 
     | 
    
         
             
                end
         
     | 
| 
       290 
281 
     | 
    
         
             
              end
         
     | 
| 
       291 
282 
     | 
    
         | 
| 
       292 
     | 
    
         
            -
              def  
     | 
| 
      
 283 
     | 
    
         
            +
              def test_select
         
     | 
| 
       293 
284 
     | 
    
         
             
                content =<<-EOF
         
     | 
| 
       294 
285 
     | 
    
         
             
            #: :sep=/\\s+/#:type=:single
         
     | 
| 
       295 
286 
     | 
    
         
             
            #Id Value
         
     | 
| 
         @@ -303,7 +294,7 @@ b 2 
     | 
|
| 
       303 
294 
     | 
    
         
             
                end
         
     | 
| 
       304 
295 
     | 
    
         
             
              end
         
     | 
| 
       305 
296 
     | 
    
         | 
| 
       306 
     | 
    
         
            -
              def  
     | 
| 
      
 297 
     | 
    
         
            +
              def test_grep
         
     | 
| 
       307 
298 
     | 
    
         
             
                content =<<-EOF
         
     | 
| 
       308 
299 
     | 
    
         
             
            #: :sep=/\\s+/#:type=:single
         
     | 
| 
       309 
300 
     | 
    
         
             
            #Id Value
         
     | 
| 
         @@ -317,7 +308,7 @@ b 2 
     | 
|
| 
       317 
308 
     | 
    
         
             
                end
         
     | 
| 
       318 
309 
     | 
    
         
             
              end
         
     | 
| 
       319 
310 
     | 
    
         | 
| 
       320 
     | 
    
         
            -
              def  
     | 
| 
      
 311 
     | 
    
         
            +
              def test_grep_invert
         
     | 
| 
       321 
312 
     | 
    
         
             
                content =<<-EOF
         
     | 
| 
       322 
313 
     | 
    
         
             
            #: :sep=/\\s+/#:type=:single
         
     | 
| 
       323 
314 
     | 
    
         
             
            #Id Value
         
     | 
| 
         @@ -331,7 +322,7 @@ b 2 
     | 
|
| 
       331 
322 
     | 
    
         
             
                end
         
     | 
| 
       332 
323 
     | 
    
         
             
              end
         
     | 
| 
       333 
324 
     | 
    
         | 
| 
       334 
     | 
    
         
            -
              def  
     | 
| 
      
 325 
     | 
    
         
            +
              def test_grep_header
         
     | 
| 
       335 
326 
     | 
    
         
             
                content =<<-EOF
         
     | 
| 
       336 
327 
     | 
    
         
             
            #: :sep=/\\s+/#:type=:single#:namespace=Test
         
     | 
| 
       337 
328 
     | 
    
         
             
            #Id Value
         
     | 
| 
         @@ -340,13 +331,13 @@ b 2 
     | 
|
| 
       340 
331 
     | 
    
         
             
                EOF
         
     | 
| 
       341 
332 
     | 
    
         | 
| 
       342 
333 
     | 
    
         
             
                TmpFile.with_file(content) do |filename|
         
     | 
| 
       343 
     | 
    
         
            -
                  tsv = TSV.open(filename, :key_field => "Value", :grep => "2")
         
     | 
| 
      
 334 
     | 
    
         
            +
                  tsv = TSV.open(filename, :key_field => "Value", :grep => "#\\|2")
         
     | 
| 
       344 
335 
     | 
    
         
             
                  assert(! tsv.include?("1"))
         
     | 
| 
       345 
336 
     | 
    
         
             
                  assert(tsv.include?("2"))
         
     | 
| 
       346 
337 
     | 
    
         
             
                end
         
     | 
| 
       347 
338 
     | 
    
         
             
              end
         
     | 
| 
       348 
339 
     | 
    
         | 
| 
       349 
     | 
    
         
            -
              def  
     | 
| 
      
 340 
     | 
    
         
            +
              def test_json
         
     | 
| 
       350 
341 
     | 
    
         
             
                content =<<-EOF
         
     | 
| 
       351 
342 
     | 
    
         
             
            #: :sep=/\\s+/#:type=:single
         
     | 
| 
       352 
343 
     | 
    
         
             
            #Id Value
         
     | 
| 
         @@ -361,7 +352,7 @@ b 2 
     | 
|
| 
       361 
352 
     | 
    
         | 
| 
       362 
353 
     | 
    
         
             
              end
         
     | 
| 
       363 
354 
     | 
    
         | 
| 
       364 
     | 
    
         
            -
              def  
     | 
| 
      
 355 
     | 
    
         
            +
              def test_flat_no_merge
         
     | 
| 
       365 
356 
     | 
    
         
             
                content =<<-EOF
         
     | 
| 
       366 
357 
     | 
    
         
             
            #Id    ValueA    ValueB    OtherID
         
     | 
| 
       367 
358 
     | 
    
         
             
            row1    a|aa|aaa    b    Id1|Id2
         
     | 
| 
         @@ -377,7 +368,7 @@ row2    A    B    Id3 
     | 
|
| 
       377 
368 
     | 
    
         
             
                end
         
     | 
| 
       378 
369 
     | 
    
         
             
              end
         
     | 
| 
       379 
370 
     | 
    
         | 
| 
       380 
     | 
    
         
            -
              def  
     | 
| 
      
 371 
     | 
    
         
            +
              def test_flat_merge
         
     | 
| 
       381 
372 
     | 
    
         
             
                content =<<-EOF
         
     | 
| 
       382 
373 
     | 
    
         
             
            #Id    ValueA    ValueB    OtherID
         
     | 
| 
       383 
374 
     | 
    
         
             
            row1    a|aa|aaa    b    Id1|Id2
         
     | 
| 
         @@ -404,7 +395,7 @@ row2   b  bbb bbbb bb 
     | 
|
| 
       404 
395 
     | 
    
         
             
                end
         
     | 
| 
       405 
396 
     | 
    
         
             
              end
         
     | 
| 
       406 
397 
     | 
    
         | 
| 
       407 
     | 
    
         
            -
              def  
     | 
| 
      
 398 
     | 
    
         
            +
              def test_flat_key
         
     | 
| 
       408 
399 
     | 
    
         
             
                content =<<-EOF
         
     | 
| 
       409 
400 
     | 
    
         
             
            #Id    ValueA 
         
     | 
| 
       410 
401 
     | 
    
         
             
            row1   a   aa   aaa
         
     | 
| 
         @@ -420,7 +411,7 @@ row2   b  bbb bbbb bb aa 
     | 
|
| 
       420 
411 
     | 
    
         | 
| 
       421 
412 
     | 
    
         | 
| 
       422 
413 
     | 
    
         | 
| 
       423 
     | 
    
         
            -
              def  
     | 
| 
      
 414 
     | 
    
         
            +
              def test_zipped
         
     | 
| 
       424 
415 
     | 
    
         
             
                content =<<-EOF
         
     | 
| 
       425 
416 
     | 
    
         
             
            #Id    ValueA    ValueB
         
     | 
| 
       426 
417 
     | 
    
         
             
            row1    a|aa|aaa    b|bb|bbb
         
     | 
| 
         @@ -433,7 +424,7 @@ row2    a|aa|aaa    c|cc|ccc 
     | 
|
| 
       433 
424 
     | 
    
         
             
                end
         
     | 
| 
       434 
425 
     | 
    
         
             
              end
         
     | 
| 
       435 
426 
     | 
    
         | 
| 
       436 
     | 
    
         
            -
              def  
     | 
| 
      
 427 
     | 
    
         
            +
              def test_named_array_key
         
     | 
| 
       437 
428 
     | 
    
         
             
                content =<<-EOF
         
     | 
| 
       438 
429 
     | 
    
         
             
            #Id    ValueA    ValueB    OtherID
         
     | 
| 
       439 
430 
     | 
    
         
             
            row1    a|aa|aaa    b    Id1|Id2
         
     | 
| 
         @@ -448,7 +439,7 @@ row2    A    B    Id3 
     | 
|
| 
       448 
439 
     | 
    
         | 
| 
       449 
440 
     | 
    
         
             
              end
         
     | 
| 
       450 
441 
     | 
    
         | 
| 
       451 
     | 
    
         
            -
              def  
     | 
| 
      
 442 
     | 
    
         
            +
              def test_unnamed_key
         
     | 
| 
       452 
443 
     | 
    
         
             
                content =<<-EOF
         
     | 
| 
       453 
444 
     | 
    
         
             
            row1    a|aa|aaa    b    Id1|Id2
         
     | 
| 
       454 
445 
     | 
    
         
             
            row2    A    B    Id3
         
     | 
| 
         @@ -461,7 +452,7 @@ row2    A    B    Id3 
     | 
|
| 
       461 
452 
     | 
    
         | 
| 
       462 
453 
     | 
    
         
             
              end
         
     | 
| 
       463 
454 
     | 
    
         | 
| 
       464 
     | 
    
         
            -
              def  
     | 
| 
      
 455 
     | 
    
         
            +
              def test_float_array
         
     | 
| 
       465 
456 
     | 
    
         
             
                content =<<-EOF
         
     | 
| 
       466 
457 
     | 
    
         
             
            #Id    ValueA    ValueB    OtherID
         
     | 
| 
       467 
458 
     | 
    
         
             
            row1   0.2   0.3 0
         
     | 
| 
         @@ -476,7 +467,7 @@ row2    0.1  4.5 0 
     | 
|
| 
       476 
467 
     | 
    
         | 
| 
       477 
468 
     | 
    
         
             
              end
         
     | 
| 
       478 
469 
     | 
    
         | 
| 
       479 
     | 
    
         
            -
              def  
     | 
| 
      
 470 
     | 
    
         
            +
              def test_flat_field_select
         
     | 
| 
       480 
471 
     | 
    
         
             
                content =<<-EOF
         
     | 
| 
       481 
472 
     | 
    
         
             
            #: :type=:flat
         
     | 
| 
       482 
473 
     | 
    
         
             
            #Id    Value
         
     | 
| 
         @@ -485,12 +476,11 @@ row2    b  bb bbb 
     | 
|
| 
       485 
476 
     | 
    
         
             
                EOF
         
     | 
| 
       486 
477 
     | 
    
         | 
| 
       487 
478 
     | 
    
         
             
                TmpFile.with_file(content) do |filename|
         
     | 
| 
       488 
     | 
    
         
            -
                  puts TSV.open(filename, :sep => /\s+/, :key_field => "Value").to_s
         
     | 
| 
       489 
479 
     | 
    
         
             
                  assert TSV.open(filename, :sep => /\s+/, :key_field => "Value").include? "aa"
         
     | 
| 
       490 
480 
     | 
    
         
             
                end
         
     | 
| 
       491 
481 
     | 
    
         
             
              end
         
     | 
| 
       492 
482 
     | 
    
         | 
| 
       493 
     | 
    
         
            -
              def  
     | 
| 
      
 483 
     | 
    
         
            +
              def test_flat2
         
     | 
| 
       494 
484 
     | 
    
         
             
                content =<<-EOF
         
     | 
| 
       495 
485 
     | 
    
         
             
            #: :type=:flat
         
     | 
| 
       496 
486 
     | 
    
         
             
            #Id    Value
         
     | 
| 
         @@ -507,7 +497,7 @@ row2    A|AA|AAA 
     | 
|
| 
       507 
497 
     | 
    
         
             
              end
         
     | 
| 
       508 
498 
     | 
    
         | 
| 
       509 
499 
     | 
    
         | 
| 
       510 
     | 
    
         
            -
              def  
     | 
| 
      
 500 
     | 
    
         
            +
              def test_tsv_flat_double
         
     | 
| 
       511 
501 
     | 
    
         
             
                content =<<-EOF
         
     | 
| 
       512 
502 
     | 
    
         
             
            #Id    ValueA    ValueB    OtherID
         
     | 
| 
       513 
503 
     | 
    
         
             
            row1    a|aa|aaa    b    Id1|Id2
         
     | 
| 
         @@ -522,7 +512,7 @@ row2    A    B    Id3 
     | 
|
| 
       522 
512 
     | 
    
         
             
                end
         
     | 
| 
       523 
513 
     | 
    
         
             
              end
         
     | 
| 
       524 
514 
     | 
    
         | 
| 
       525 
     | 
    
         
            -
              def  
     | 
| 
      
 515 
     | 
    
         
            +
              def test_flat2single
         
     | 
| 
       526 
516 
     | 
    
         
             
                content =<<-EOF
         
     | 
| 
       527 
517 
     | 
    
         
             
            #: :type=:flat
         
     | 
| 
       528 
518 
     | 
    
         
             
            #Id    Value
         
     | 
| 
         @@ -536,7 +526,7 @@ row2    A AA AAA 
     | 
|
| 
       536 
526 
     | 
    
         | 
| 
       537 
527 
     | 
    
         
             
              end
         
     | 
| 
       538 
528 
     | 
    
         | 
| 
       539 
     | 
    
         
            -
              def  
     | 
| 
      
 529 
     | 
    
         
            +
              def test_shard
         
     | 
| 
       540 
530 
     | 
    
         
             
                shard_function = Proc.new do |key|
         
     | 
| 
       541 
531 
     | 
    
         
             
                  key[-1]
         
     | 
| 
       542 
532 
     | 
    
         
             
                end
         
     | 
| 
         @@ -5,8 +5,7 @@ require 'rbbt/tsv/parallel' 
     | 
|
| 
       5 
5 
     | 
    
         
             
            class TestTSVParallelThrough < Test::Unit::TestCase
         
     | 
| 
       6 
6 
     | 
    
         | 
| 
       7 
7 
     | 
    
         
             
              def test_pthrough
         
     | 
| 
       8 
     | 
    
         
            -
                 
     | 
| 
       9 
     | 
    
         
            -
                tsv = Organism.identifiers("Hsa").tsv :unnamed => true, :persist => false, :fields => ["Associated Gene Name"]
         
     | 
| 
      
 8 
     | 
    
         
            +
                tsv = datafile_test('identifiers').tsv :unnamed => true, :persist => false, :fields => ["Associated Gene Name"]
         
     | 
| 
       10 
9 
     | 
    
         | 
| 
       11 
10 
     | 
    
         
             
                h = {}
         
     | 
| 
       12 
11 
     | 
    
         
             
                tsv.monitor = true
         
     | 
| 
         @@ -21,8 +20,7 @@ class TestTSVParallelThrough < Test::Unit::TestCase 
     | 
|
| 
       21 
20 
     | 
    
         | 
| 
       22 
21 
     | 
    
         | 
| 
       23 
22 
     | 
    
         
             
              def test_ppthrough
         
     | 
| 
       24 
     | 
    
         
            -
                 
     | 
| 
       25 
     | 
    
         
            -
                tsv = Organism.identifiers("Hsa").tsv :unnamed => true, :persist => false, :fields => ["Associated Gene Name"]
         
     | 
| 
      
 23 
     | 
    
         
            +
                tsv = datafile_test('identifiers').tsv :unnamed => true, :persist => false, :fields => ["Associated Gene Name"]
         
     | 
| 
       26 
24 
     | 
    
         | 
| 
       27 
25 
     | 
    
         
             
                h = {}
         
     | 
| 
       28 
26 
     | 
    
         |