rbbt-util 5.14.33 → 5.14.34
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/bin/rbbt +2 -0
 - data/lib/rbbt/association/database.rb +153 -0
 - data/lib/rbbt/association/index.rb +89 -20
 - data/lib/rbbt/association/open.rb +37 -0
 - data/lib/rbbt/association/util.rb +133 -0
 - data/lib/rbbt/association.rb +1 -380
 - data/lib/rbbt/entity/identifiers.rb +106 -0
 - data/lib/rbbt/entity.rb +1 -0
 - data/lib/rbbt/knowledge_base/entity.rb +107 -0
 - data/lib/rbbt/knowledge_base/query.rb +83 -0
 - data/lib/rbbt/knowledge_base/registry.rb +106 -0
 - data/lib/rbbt/knowledge_base/syndicate.rb +22 -0
 - data/lib/rbbt/knowledge_base.rb +6 -359
 - data/lib/rbbt/tsv/accessor.rb +4 -0
 - data/lib/rbbt/tsv/change_id.rb +119 -0
 - data/lib/rbbt/tsv/index.rb +6 -2
 - data/lib/rbbt/tsv/parser.rb +7 -5
 - data/lib/rbbt/tsv/util.rb +1 -1
 - data/lib/rbbt/tsv.rb +2 -1
 - data/lib/rbbt/util/R/model.rb +1 -1
 - data/lib/rbbt/util/log.rb +2 -2
 - data/lib/rbbt/util/misc/bgzf.rb +2 -0
 - data/lib/rbbt/util/misc/inspect.rb +1 -1
 - data/lib/rbbt-util.rb +11 -7
 - data/lib/rbbt.rb +0 -1
 - data/share/rbbt_commands/app/start +1 -1
 - data/share/rbbt_commands/tsv/change_id +2 -2
 - data/test/rbbt/association/test_database.rb +61 -0
 - data/test/rbbt/association/test_index.rb +67 -22
 - data/test/rbbt/association/test_open.rb +68 -0
 - data/test/rbbt/association/test_util.rb +108 -0
 - data/test/rbbt/entity/test_identifiers.rb +40 -0
 - data/test/rbbt/knowledge_base/test_entity.rb +0 -0
 - data/test/rbbt/knowledge_base/test_query.rb +45 -0
 - data/test/rbbt/knowledge_base/test_registry.rb +52 -0
 - data/test/rbbt/test_association.rb +3 -3
 - data/test/rbbt/test_knowledge_base.rb +79 -51
 - data/test/rbbt/test_monitor.rb +0 -2
 - data/test/rbbt/test_packed_index.rb +1 -1
 - data/test/rbbt/test_resource.rb +6 -6
 - data/test/rbbt/test_tsv.rb +34 -44
 - data/test/rbbt/tsv/parallel/test_through.rb +2 -4
 - data/test/rbbt/tsv/parallel/test_traverse.rb +30 -28
 - data/test/rbbt/tsv/test_change_id.rb +10 -0
 - data/test/rbbt/util/R/test_model.rb +9 -10
 - data/test/rbbt/util/test_misc.rb +1 -1
 - data/test/test_helper.rb +4 -1
 - metadata +24 -2
 
| 
         @@ -0,0 +1,106 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require 'rbbt/association'
         
     | 
| 
      
 2 
     | 
    
         
            +
            require 'rbbt/association/item'
         
     | 
| 
      
 3 
     | 
    
         
            +
             
     | 
| 
      
 4 
     | 
    
         
            +
            class KnowledgeBase
         
     | 
| 
      
 5 
     | 
    
         
            +
             
     | 
| 
      
 6 
     | 
    
         
            +
              def register(name, file = nil, options = {}, &block)
         
     | 
| 
      
 7 
     | 
    
         
            +
                if block_given?
         
     | 
| 
      
 8 
     | 
    
         
            +
                  block.define_singleton_method(:filename) do name.to_s end
         
     | 
| 
      
 9 
     | 
    
         
            +
                  Log.debug("Registering #{ name } from code block")
         
     | 
| 
      
 10 
     | 
    
         
            +
                  @registry[name] = [block, options]
         
     | 
| 
      
 11 
     | 
    
         
            +
                else
         
     | 
| 
      
 12 
     | 
    
         
            +
                  Log.debug("Registering #{ name }: #{ Misc.fingerprint file }")
         
     | 
| 
      
 13 
     | 
    
         
            +
                  @registry[name] = [file, options]
         
     | 
| 
      
 14 
     | 
    
         
            +
                end
         
     | 
| 
      
 15 
     | 
    
         
            +
              end
         
     | 
| 
      
 16 
     | 
    
         
            +
             
     | 
| 
      
 17 
     | 
    
         
            +
              def all_databases
         
     | 
| 
      
 18 
     | 
    
         
            +
                @registry.keys 
         
     | 
| 
      
 19 
     | 
    
         
            +
              end
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
              def description(name)
         
     | 
| 
      
 22 
     | 
    
         
            +
                @descriptions[name] ||= get_index(name).key_field.split("~")
         
     | 
| 
      
 23 
     | 
    
         
            +
              end
         
     | 
| 
      
 24 
     | 
    
         
            +
             
     | 
| 
      
 25 
     | 
    
         
            +
              def source(name)
         
     | 
| 
      
 26 
     | 
    
         
            +
                description(name)[0]
         
     | 
| 
      
 27 
     | 
    
         
            +
              end
         
     | 
| 
      
 28 
     | 
    
         
            +
             
     | 
| 
      
 29 
     | 
    
         
            +
              def target(name)
         
     | 
| 
      
 30 
     | 
    
         
            +
                description(name)[1]
         
     | 
| 
      
 31 
     | 
    
         
            +
              end
         
     | 
| 
      
 32 
     | 
    
         
            +
             
     | 
| 
      
 33 
     | 
    
         
            +
              def undirected(name)
         
     | 
| 
      
 34 
     | 
    
         
            +
                description(name)[2]
         
     | 
| 
      
 35 
     | 
    
         
            +
              end
         
     | 
| 
      
 36 
     | 
    
         
            +
             
     | 
| 
      
 37 
     | 
    
         
            +
              def get_index(name, options = {})
         
     | 
| 
      
 38 
     | 
    
         
            +
                name = name.to_s
         
     | 
| 
      
 39 
     | 
    
         
            +
                key = name.to_s + "_" + Misc.digest(Misc.fingerprint([name,options]))
         
     | 
| 
      
 40 
     | 
    
         
            +
                @indices[key] ||= 
         
     | 
| 
      
 41 
     | 
    
         
            +
                  begin 
         
     | 
| 
      
 42 
     | 
    
         
            +
                    Persist.memory("Index:" << [key, dir] * "@") do
         
     | 
| 
      
 43 
     | 
    
         
            +
                      options = options.dup
         
     | 
| 
      
 44 
     | 
    
         
            +
                      persist_dir = dir
         
     | 
| 
      
 45 
     | 
    
         
            +
                      persist_file = persist_dir[key]
         
     | 
| 
      
 46 
     | 
    
         
            +
                      file, registered_options = registry[name]
         
     | 
| 
      
 47 
     | 
    
         
            +
             
     | 
| 
      
 48 
     | 
    
         
            +
                      options = Misc.add_defaults options, :persist_file => persist_file, :persist_dir => persist_dir, :namespace => namespace, :format => format, :persist => true
         
     | 
| 
      
 49 
     | 
    
         
            +
                      options = Misc.add_defaults options, registered_options if registered_options and registered_options.any?
         
     | 
| 
      
 50 
     | 
    
         
            +
             
     | 
| 
      
 51 
     | 
    
         
            +
                      persist_options = Misc.pull_keys options, :persist
         
     | 
| 
      
 52 
     | 
    
         
            +
             
     | 
| 
      
 53 
     | 
    
         
            +
                      index = if persist_file.exists? and persist_options[:persist] and not persist_options[:update]
         
     | 
| 
      
 54 
     | 
    
         
            +
                                Log.low "Re-opening index #{ name } from #{ Misc.fingerprint persist_file }. #{options}"
         
     | 
| 
      
 55 
     | 
    
         
            +
                                Association.index(nil, options, persist_options.dup)
         
     | 
| 
      
 56 
     | 
    
         
            +
                              else
         
     | 
| 
      
 57 
     | 
    
         
            +
                                options = Misc.add_defaults options, registered_options if registered_options
         
     | 
| 
      
 58 
     | 
    
         
            +
                                raise "Repo #{ name } not found and not registered" if file.nil?
         
     | 
| 
      
 59 
     | 
    
         
            +
                                Log.medium "Opening index #{ name } from #{ Misc.fingerprint file }. #{options}"
         
     | 
| 
      
 60 
     | 
    
         
            +
                                Association.index(file, options, persist_options.dup)
         
     | 
| 
      
 61 
     | 
    
         
            +
                              end
         
     | 
| 
      
 62 
     | 
    
         
            +
             
     | 
| 
      
 63 
     | 
    
         
            +
                      index.namespace = self.namespace
         
     | 
| 
      
 64 
     | 
    
         
            +
             
     | 
| 
      
 65 
     | 
    
         
            +
                      index
         
     | 
| 
      
 66 
     | 
    
         
            +
                    end
         
     | 
| 
      
 67 
     | 
    
         
            +
                  end
         
     | 
| 
      
 68 
     | 
    
         
            +
              end
         
     | 
| 
      
 69 
     | 
    
         
            +
             
     | 
| 
      
 70 
     | 
    
         
            +
              def get_database(name, options = {})
         
     | 
| 
      
 71 
     | 
    
         
            +
                name = name.to_s
         
     | 
| 
      
 72 
     | 
    
         
            +
                key = "Index:" + name.to_s + "_" + Misc.digest(Misc.fingerprint([name,options.dup]))
         
     | 
| 
      
 73 
     | 
    
         
            +
                @indices[key] ||= 
         
     | 
| 
      
 74 
     | 
    
         
            +
                  begin 
         
     | 
| 
      
 75 
     | 
    
         
            +
                    Persist.memory("Database:" << [key, dir] * "@") do
         
     | 
| 
      
 76 
     | 
    
         
            +
                      options = options.dup
         
     | 
| 
      
 77 
     | 
    
         
            +
                      persist_file = dir.indices[key]
         
     | 
| 
      
 78 
     | 
    
         
            +
                      file, registered_options = registry[name]
         
     | 
| 
      
 79 
     | 
    
         
            +
             
     | 
| 
      
 80 
     | 
    
         
            +
                      options = Misc.add_defaults options, :persist_file => persist_file, :namespace => namespace, :format => format, :persist => true
         
     | 
| 
      
 81 
     | 
    
         
            +
                      options = Misc.add_defaults options, registered_options if registered_options and registered_options.any?
         
     | 
| 
      
 82 
     | 
    
         
            +
             
     | 
| 
      
 83 
     | 
    
         
            +
                      persist_options = Misc.pull_keys options, :persist
         
     | 
| 
      
 84 
     | 
    
         
            +
             
     | 
| 
      
 85 
     | 
    
         
            +
                      database = if persist_file.exists?
         
     | 
| 
      
 86 
     | 
    
         
            +
                                Log.low "Re-opening database #{ name } from #{ Misc.fingerprint persist_file }. #{options}"
         
     | 
| 
      
 87 
     | 
    
         
            +
                                Association.open(nil, options, persist_options)
         
     | 
| 
      
 88 
     | 
    
         
            +
                              else
         
     | 
| 
      
 89 
     | 
    
         
            +
                                options = Misc.add_defaults options, registered_options if registered_options
         
     | 
| 
      
 90 
     | 
    
         
            +
                                raise "Repo #{ name } not found and not registered" if file.nil?
         
     | 
| 
      
 91 
     | 
    
         
            +
                                Log.medium "Opening database #{ name } from #{ Misc.fingerprint file }. #{options}"
         
     | 
| 
      
 92 
     | 
    
         
            +
                                Association.open(file, options, persist_options)
         
     | 
| 
      
 93 
     | 
    
         
            +
                              end
         
     | 
| 
      
 94 
     | 
    
         
            +
             
     | 
| 
      
 95 
     | 
    
         
            +
                      database.namespace = self.namespace
         
     | 
| 
      
 96 
     | 
    
         
            +
             
     | 
| 
      
 97 
     | 
    
         
            +
                      database
         
     | 
| 
      
 98 
     | 
    
         
            +
                    end
         
     | 
| 
      
 99 
     | 
    
         
            +
                  end
         
     | 
| 
      
 100 
     | 
    
         
            +
              end
         
     | 
| 
      
 101 
     | 
    
         
            +
             
     | 
| 
      
 102 
     | 
    
         
            +
              def index_fields(name)
         
     | 
| 
      
 103 
     | 
    
         
            +
                get_index(name).fields
         
     | 
| 
      
 104 
     | 
    
         
            +
              end
         
     | 
| 
      
 105 
     | 
    
         
            +
             
     | 
| 
      
 106 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,22 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            class KnowledgeBase
         
     | 
| 
      
 2 
     | 
    
         
            +
              def syndicate(name, kb)
         
     | 
| 
      
 3 
     | 
    
         
            +
                kb.all_databases.each do |database|
         
     | 
| 
      
 4 
     | 
    
         
            +
                  db_name = [database, name] * "@"
         
     | 
| 
      
 5 
     | 
    
         
            +
                  file, kb_options = kb.registry[database]
         
     | 
| 
      
 6 
     | 
    
         
            +
                  options = {}
         
     | 
| 
      
 7 
     | 
    
         
            +
                  options[:entity_options] = kb_options[:entity_options]
         
     | 
| 
      
 8 
     | 
    
         
            +
                  options[:undirected] = true if kb_options and kb_options[:undirected]
         
     | 
| 
      
 9 
     | 
    
         
            +
                  if kb.entity_options
         
     | 
| 
      
 10 
     | 
    
         
            +
                    options[:entity_options] = kb.entity_options.merge(options[:entity_options] || {})
         
     | 
| 
      
 11 
     | 
    
         
            +
                  end
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
                  register(db_name, nil, options) do
         
     | 
| 
      
 14 
     | 
    
         
            +
                    kb.get_database(database)
         
     | 
| 
      
 15 
     | 
    
         
            +
                  end
         
     | 
| 
      
 16 
     | 
    
         
            +
                end
         
     | 
| 
      
 17 
     | 
    
         
            +
              end
         
     | 
| 
      
 18 
     | 
    
         
            +
             
     | 
| 
      
 19 
     | 
    
         
            +
              def all_databases
         
     | 
| 
      
 20 
     | 
    
         
            +
                @registry.keys 
         
     | 
| 
      
 21 
     | 
    
         
            +
              end
         
     | 
| 
      
 22 
     | 
    
         
            +
            end
         
     | 
    
        data/lib/rbbt/knowledge_base.rb
    CHANGED
    
    | 
         @@ -1,23 +1,11 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
             
     | 
| 
       1 
2 
     | 
    
         
             
            require 'rbbt/association'
         
     | 
| 
       2 
3 
     | 
    
         
             
            require 'rbbt/association/item'
         
     | 
| 
       3 
     | 
    
         
            -
            require 'rbbt/entity'
         
     | 
| 
      
 4 
     | 
    
         
            +
            require 'rbbt/knowledge_base/entity'
         
     | 
| 
      
 5 
     | 
    
         
            +
            require 'rbbt/knowledge_base/query'
         
     | 
| 
      
 6 
     | 
    
         
            +
            require 'rbbt/knowledge_base/syndicate'
         
     | 
| 
       4 
7 
     | 
    
         | 
| 
       5 
8 
     | 
    
         
             
            class KnowledgeBase
         
     | 
| 
       6 
     | 
    
         
            -
              class << self
         
     | 
| 
       7 
     | 
    
         
            -
                attr_accessor :knowledge_base_dir, :registry
         
     | 
| 
       8 
     | 
    
         
            -
             
     | 
| 
       9 
     | 
    
         
            -
                def registry
         
     | 
| 
       10 
     | 
    
         
            -
                  @registry ||= IndiferentHash.setup({})
         
     | 
| 
       11 
     | 
    
         
            -
                end
         
     | 
| 
       12 
     | 
    
         
            -
                
         
     | 
| 
       13 
     | 
    
         
            -
                def knowledge_base_dir
         
     | 
| 
       14 
     | 
    
         
            -
                  @knowledge_base_dir ||= Rbbt.var.knowledge_base
         
     | 
| 
       15 
     | 
    
         
            -
                end
         
     | 
| 
       16 
     | 
    
         
            -
              end
         
     | 
| 
       17 
     | 
    
         
            -
             
     | 
| 
       18 
     | 
    
         
            -
              def setup(name, matches, reverse = false)
         
     | 
| 
       19 
     | 
    
         
            -
                AssociationItem.setup matches, self, name, reverse
         
     | 
| 
       20 
     | 
    
         
            -
              end
         
     | 
| 
       21 
9 
     | 
    
         | 
| 
       22 
10 
     | 
    
         
             
              attr_accessor :namespace, :dir, :indices, :registry, :format, :databases, :entity_options
         
     | 
| 
       23 
11 
     | 
    
         
             
              def initialize(dir, namespace = nil)
         
     | 
| 
         @@ -36,348 +24,7 @@ class KnowledgeBase 
     | 
|
| 
       36 
24 
     | 
    
         
             
                @databases = {}
         
     | 
| 
       37 
25 
     | 
    
         
             
              end
         
     | 
| 
       38 
26 
     | 
    
         | 
| 
       39 
     | 
    
         
            -
              def  
     | 
| 
       40 
     | 
    
         
            -
                 
     | 
| 
       41 
     | 
    
         
            -
                new_kb = KnowledgeBase.new dir[new_namespace], new_namespace
         
     | 
| 
       42 
     | 
    
         
            -
                new_kb.format.merge! self.format
         
     | 
| 
       43 
     | 
    
         
            -
                new_kb.entity_options.merge! self.entity_options
         
     | 
| 
       44 
     | 
    
         
            -
                new_kb.registry = self.registry
         
     | 
| 
       45 
     | 
    
         
            -
                new_kb
         
     | 
| 
       46 
     | 
    
         
            -
              end
         
     | 
| 
       47 
     | 
    
         
            -
             
     | 
| 
       48 
     | 
    
         
            -
              #{{{ Descriptions
         
     | 
| 
       49 
     | 
    
         
            -
             
         
     | 
| 
       50 
     | 
    
         
            -
              def register(name, file = nil, options = {}, &block)
         
     | 
| 
       51 
     | 
    
         
            -
                if block_given?
         
     | 
| 
       52 
     | 
    
         
            -
                  block.define_singleton_method(:filename) do name.to_s end
         
     | 
| 
       53 
     | 
    
         
            -
                  Log.debug("Registering #{ name } from code block")
         
     | 
| 
       54 
     | 
    
         
            -
                  @registry[name] = [block, options]
         
     | 
| 
       55 
     | 
    
         
            -
                else
         
     | 
| 
       56 
     | 
    
         
            -
                  Log.debug("Registering #{ name }: #{ Misc.fingerprint file }")
         
     | 
| 
       57 
     | 
    
         
            -
                  @registry[name] = [file, options]
         
     | 
| 
       58 
     | 
    
         
            -
                end
         
     | 
| 
       59 
     | 
    
         
            -
              end
         
     | 
| 
       60 
     | 
    
         
            -
             
     | 
| 
       61 
     | 
    
         
            -
              def syndicate(name, kb)
         
     | 
| 
       62 
     | 
    
         
            -
                kb.all_databases.each do |database|
         
     | 
| 
       63 
     | 
    
         
            -
                  db_name = [database, name] * "@"
         
     | 
| 
       64 
     | 
    
         
            -
                  file, kb_options = kb.registry[database]
         
     | 
| 
       65 
     | 
    
         
            -
                  options = {}
         
     | 
| 
       66 
     | 
    
         
            -
                  options[:entity_options] = kb_options[:entity_options]
         
     | 
| 
       67 
     | 
    
         
            -
                  options[:undirected] = true if kb_options and kb_options[:undirected]
         
     | 
| 
       68 
     | 
    
         
            -
                  if kb.entity_options
         
     | 
| 
       69 
     | 
    
         
            -
                    options[:entity_options] = kb.entity_options.merge(options[:entity_options] || {})
         
     | 
| 
       70 
     | 
    
         
            -
                  end
         
     | 
| 
       71 
     | 
    
         
            -
             
     | 
| 
       72 
     | 
    
         
            -
                  register(db_name, nil, options) do
         
     | 
| 
       73 
     | 
    
         
            -
                    kb.get_database(database)
         
     | 
| 
       74 
     | 
    
         
            -
                  end
         
     | 
| 
       75 
     | 
    
         
            -
                end
         
     | 
| 
       76 
     | 
    
         
            -
              end
         
     | 
| 
       77 
     | 
    
         
            -
             
     | 
| 
       78 
     | 
    
         
            -
              def all_databases
         
     | 
| 
       79 
     | 
    
         
            -
                @registry.keys 
         
     | 
| 
       80 
     | 
    
         
            -
              end
         
     | 
| 
       81 
     | 
    
         
            -
             
     | 
| 
       82 
     | 
    
         
            -
              def description(name)
         
     | 
| 
       83 
     | 
    
         
            -
                @descriptions[name] ||= get_index(name).key_field.split("~")
         
     | 
| 
       84 
     | 
    
         
            -
              end
         
     | 
| 
       85 
     | 
    
         
            -
             
     | 
| 
       86 
     | 
    
         
            -
              def source(name)
         
     | 
| 
       87 
     | 
    
         
            -
                description(name)[0]
         
     | 
| 
       88 
     | 
    
         
            -
              end
         
     | 
| 
       89 
     | 
    
         
            -
             
     | 
| 
       90 
     | 
    
         
            -
              def target(name)
         
     | 
| 
       91 
     | 
    
         
            -
                description(name)[1]
         
     | 
| 
       92 
     | 
    
         
            -
              end
         
     | 
| 
       93 
     | 
    
         
            -
             
     | 
| 
       94 
     | 
    
         
            -
              def undirected(name)
         
     | 
| 
       95 
     | 
    
         
            -
                description(name)[2]
         
     | 
| 
       96 
     | 
    
         
            -
              end
         
     | 
| 
       97 
     | 
    
         
            -
             
     | 
| 
       98 
     | 
    
         
            -
              def source_type(name)
         
     | 
| 
       99 
     | 
    
         
            -
                Entity.formats[source(name)]
         
     | 
| 
       100 
     | 
    
         
            -
              end
         
     | 
| 
       101 
     | 
    
         
            -
             
     | 
| 
       102 
     | 
    
         
            -
              def target_type(name)
         
     | 
| 
       103 
     | 
    
         
            -
                Entity.formats[target(name)]
         
     | 
| 
       104 
     | 
    
         
            -
              end
         
     | 
| 
       105 
     | 
    
         
            -
             
     | 
| 
       106 
     | 
    
         
            -
              def index_fields(name)
         
     | 
| 
       107 
     | 
    
         
            -
                get_index(name).fields
         
     | 
| 
       108 
     | 
    
         
            -
              end
         
     | 
| 
       109 
     | 
    
         
            -
             
     | 
| 
       110 
     | 
    
         
            -
              def entities
         
     | 
| 
       111 
     | 
    
         
            -
                all_databases.inject([]){|acc,name| acc << source(name); acc << target(name)}.uniq
         
     | 
| 
       112 
     | 
    
         
            -
              end
         
     | 
| 
       113 
     | 
    
         
            -
             
     | 
| 
       114 
     | 
    
         
            -
              def entity_types
         
     | 
| 
       115 
     | 
    
         
            -
                entities.collect{|entity| Entity.formats[entity] }.uniq
         
     | 
| 
       116 
     | 
    
         
            -
              end
         
     | 
| 
       117 
     | 
    
         
            -
             
     | 
| 
       118 
     | 
    
         
            -
              #{{{ Open and get
         
     | 
| 
       119 
     | 
    
         
            -
             
         
     | 
| 
       120 
     | 
    
         
            -
              def open_options
         
     | 
| 
       121 
     | 
    
         
            -
                {:namespace => namespace, :format => @format}
         
     | 
| 
       122 
     | 
    
         
            -
              end
         
     | 
| 
       123 
     | 
    
         
            -
             
         
     | 
| 
       124 
     | 
    
         
            -
              def get_database(name, options = {})
         
     | 
| 
       125 
     | 
    
         
            -
                key = name.to_s + "_" + Misc.digest(Misc.fingerprint([name,options,format,namespace]))
         
     | 
| 
       126 
     | 
    
         
            -
                @databases[key] ||= 
         
     | 
| 
       127 
     | 
    
         
            -
                  begin 
         
     | 
| 
       128 
     | 
    
         
            -
                    file, registered_options = registry[name]
         
     | 
| 
       129 
     | 
    
         
            -
                    database = Persist.memory("Database:" << [key, dir] * "@") do
         
     | 
| 
       130 
     | 
    
         
            -
                      persist_file = dir.databases[key]
         
     | 
| 
       131 
     | 
    
         
            -
             
     | 
| 
       132 
     | 
    
         
            -
                      options = Misc.add_defaults options, :persist_file => persist_file, :namespace => namespace, :format => format
         
     | 
| 
       133 
     | 
    
         
            -
                      options = Misc.add_defaults options, registered_options if registered_options
         
     | 
| 
       134 
     | 
    
         
            -
             
     | 
| 
       135 
     | 
    
         
            -
                      persist_options = Misc.pull_keys options, :persist
         
     | 
| 
       136 
     | 
    
         
            -
             
     | 
| 
       137 
     | 
    
         
            -
                      database = if persist_file.exists?
         
     | 
| 
       138 
     | 
    
         
            -
                                Log.low "Re-opening database #{ name } from #{ Misc.fingerprint persist_file }. #{options}"
         
     | 
| 
       139 
     | 
    
         
            -
                                Association.open(file, options, persist_options)
         
     | 
| 
       140 
     | 
    
         
            -
                              else
         
     | 
| 
       141 
     | 
    
         
            -
                                raise "Repo #{ name } not found and not registered" if file.nil?
         
     | 
| 
       142 
     | 
    
         
            -
                                Log.low "Opening database #{ name } from #{ Misc.fingerprint file }. #{options}"
         
     | 
| 
       143 
     | 
    
         
            -
                                Association.open(file, options, persist_options)
         
     | 
| 
       144 
     | 
    
         
            -
                              end
         
     | 
| 
       145 
     | 
    
         
            -
             
     | 
| 
       146 
     | 
    
         
            -
                      database.namespace = self.namespace
         
     | 
| 
       147 
     | 
    
         
            -
             
     | 
| 
       148 
     | 
    
         
            -
                      database
         
     | 
| 
       149 
     | 
    
         
            -
                    end
         
     | 
| 
       150 
     | 
    
         
            -
             
     | 
| 
       151 
     | 
    
         
            -
                    database.entity_options ||= {}
         
     | 
| 
       152 
     | 
    
         
            -
                    database.entity_options.merge! registered_options[:entity_options] if registered_options.include? :entity_options
         
     | 
| 
       153 
     | 
    
         
            -
                    database
         
     | 
| 
       154 
     | 
    
         
            -
                  end
         
     | 
| 
       155 
     | 
    
         
            -
              end
         
     | 
| 
       156 
     | 
    
         
            -
             
     | 
| 
       157 
     | 
    
         
            -
              def get_index(name, options = {})
         
     | 
| 
       158 
     | 
    
         
            -
                name = name.to_s
         
     | 
| 
       159 
     | 
    
         
            -
                key = name.to_s + "_" + Misc.digest(Misc.fingerprint([name,options]))
         
     | 
| 
       160 
     | 
    
         
            -
                @indices[key] ||= 
         
     | 
| 
       161 
     | 
    
         
            -
                  begin 
         
     | 
| 
       162 
     | 
    
         
            -
                    Persist.memory("Index:" << [key, dir] * "@") do
         
     | 
| 
       163 
     | 
    
         
            -
                      persist_file = dir.indices[key]
         
     | 
| 
       164 
     | 
    
         
            -
                      file, registered_options = registry[name]
         
     | 
| 
       165 
     | 
    
         
            -
             
     | 
| 
       166 
     | 
    
         
            -
                      options = Misc.add_defaults options, :persist_file => persist_file, :namespace => namespace, :format => format
         
     | 
| 
       167 
     | 
    
         
            -
                      options = Misc.add_defaults options, registered_options if registered_options and registered_options.any?
         
     | 
| 
       168 
     | 
    
         
            -
             
     | 
| 
       169 
     | 
    
         
            -
                      persist_options = Misc.pull_keys options, :persist
         
     | 
| 
       170 
     | 
    
         
            -
             
     | 
| 
       171 
     | 
    
         
            -
                      index = if persist_file.exists?
         
     | 
| 
       172 
     | 
    
         
            -
                                Log.low "Re-opening index #{ name } from #{ Misc.fingerprint persist_file }. #{options}"
         
     | 
| 
       173 
     | 
    
         
            -
                                Association.index(nil, options, persist_options)
         
     | 
| 
       174 
     | 
    
         
            -
                              else
         
     | 
| 
       175 
     | 
    
         
            -
                                options = Misc.add_defaults options, registered_options if registered_options
         
     | 
| 
       176 
     | 
    
         
            -
                                raise "Repo #{ name } not found and not registered" if file.nil?
         
     | 
| 
       177 
     | 
    
         
            -
                                Log.low "Opening index #{ name } from #{ Misc.fingerprint file }. #{options}"
         
     | 
| 
       178 
     | 
    
         
            -
                                Association.index(file, options, persist_options)
         
     | 
| 
       179 
     | 
    
         
            -
                              end
         
     | 
| 
       180 
     | 
    
         
            -
             
     | 
| 
       181 
     | 
    
         
            -
                      index.namespace = self.namespace
         
     | 
| 
       182 
     | 
    
         
            -
             
     | 
| 
       183 
     | 
    
         
            -
                      index
         
     | 
| 
       184 
     | 
    
         
            -
                    end
         
     | 
| 
       185 
     | 
    
         
            -
                  end
         
     | 
| 
       186 
     | 
    
         
            -
              end
         
     | 
| 
       187 
     | 
    
         
            -
             
     | 
| 
       188 
     | 
    
         
            -
             
     | 
| 
       189 
     | 
    
         
            -
              #{{{ Add manual database
         
     | 
| 
       190 
     | 
    
         
            -
              
         
     | 
| 
       191 
     | 
    
         
            -
              def add_index(name, source_type, target_type, *fields)
         
     | 
| 
       192 
     | 
    
         
            -
                options = fields.pop if Hash === fields.last
         
     | 
| 
       193 
     | 
    
         
            -
                options ||= {}
         
     | 
| 
       194 
     | 
    
         
            -
                undirected = Misc.process_options options, :undirected 
         
     | 
| 
       195 
     | 
    
         
            -
             
     | 
| 
       196 
     | 
    
         
            -
                undirected = nil unless undirected 
         
     | 
| 
       197 
     | 
    
         
            -
             
     | 
| 
       198 
     | 
    
         
            -
                repo_file = dir[name].find
         
     | 
| 
       199 
     | 
    
         
            -
                index = Association.index(nil, {:namespace => namespace, :key_field => [source_type, target_type, undirected].compact * "~", :fields => fields}.merge(options), :file => repo_file, :update => true)
         
     | 
| 
       200 
     | 
    
         
            -
                @indices[name] = index
         
     | 
| 
       201 
     | 
    
         
            -
              end
         
     | 
| 
       202 
     | 
    
         
            -
             
     | 
| 
       203 
     | 
    
         
            -
              def add(name, source, target, *rest)
         
     | 
| 
       204 
     | 
    
         
            -
                code = [source, target] * "~"
         
     | 
| 
       205 
     | 
    
         
            -
                repo = @indices[name]
         
     | 
| 
       206 
     | 
    
         
            -
                repo[code] = rest
         
     | 
| 
       207 
     | 
    
         
            -
              end
         
     | 
| 
       208 
     | 
    
         
            -
             
     | 
| 
       209 
     | 
    
         
            -
              def write(name)
         
     | 
| 
       210 
     | 
    
         
            -
                repo = @indices[name]
         
     | 
| 
       211 
     | 
    
         
            -
                repo.write_and_read do
         
     | 
| 
       212 
     | 
    
         
            -
                  yield
         
     | 
| 
       213 
     | 
    
         
            -
                end
         
     | 
| 
       214 
     | 
    
         
            -
              end
         
     | 
| 
       215 
     | 
    
         
            -
             
     | 
| 
       216 
     | 
    
         
            -
              #{{{ Annotate
         
     | 
| 
       217 
     | 
    
         
            -
              
         
     | 
| 
       218 
     | 
    
         
            -
              def entity_options_for(type, database_name = nil)
         
     | 
| 
       219 
     | 
    
         
            -
                options = entity_options[Entity.formats[type]] || {}
         
     | 
| 
       220 
     | 
    
         
            -
                options[:format] = @format[type] if @format.include? :type
         
     | 
| 
       221 
     | 
    
         
            -
                options = {:organism => namespace}.merge(options)
         
     | 
| 
       222 
     | 
    
         
            -
                if database_name and 
         
     | 
| 
       223 
     | 
    
         
            -
                  (database = get_database(database_name)).entity_options and
         
     | 
| 
       224 
     | 
    
         
            -
                  (database = get_database(database_name)).entity_options[type]
         
     | 
| 
       225 
     | 
    
         
            -
                  options = options.merge database.entity_options[type] 
         
     | 
| 
       226 
     | 
    
         
            -
                end
         
     | 
| 
       227 
     | 
    
         
            -
                options
         
     | 
| 
       228 
     | 
    
         
            -
              end
         
     | 
| 
       229 
     | 
    
         
            -
             
     | 
| 
       230 
     | 
    
         
            -
              def annotate(entities, type, database = nil)
         
     | 
| 
       231 
     | 
    
         
            -
                format = @format[type] || type
         
     | 
| 
       232 
     | 
    
         
            -
                Misc.prepare_entity(entities, format, entity_options_for(type, database))
         
     | 
| 
       233 
     | 
    
         
            -
              end
         
     | 
| 
       234 
     | 
    
         
            -
             
     | 
| 
       235 
     | 
    
         
            -
              #{{{ Identify
         
     | 
| 
       236 
     | 
    
         
            -
              
         
     | 
| 
       237 
     | 
    
         
            -
             
     | 
| 
       238 
     | 
    
         
            -
              def database_identify_index(database, target)
         
     | 
| 
       239 
     | 
    
         
            -
                if database.identifier_files.any?
         
     | 
| 
       240 
     | 
    
         
            -
                  id_file =  database.identifier_files.first
         
     | 
| 
       241 
     | 
    
         
            -
                  identifier_fields = TSV.parse_header(id_file).all_fields
         
     | 
| 
       242 
     | 
    
         
            -
                  if identifier_fields.include? target
         
     | 
| 
       243 
     | 
    
         
            -
                    TSV.index(database.identifiers, :target => target, :persist => true, :order => true)
         
     | 
| 
       244 
     | 
    
         
            -
                  else
         
     | 
| 
       245 
     | 
    
         
            -
                    {}
         
     | 
| 
       246 
     | 
    
         
            -
                  end
         
     | 
| 
       247 
     | 
    
         
            -
                else
         
     | 
| 
       248 
     | 
    
         
            -
                  if TSV.parse_header(Organism.identifiers(namespace)).all_fields.include? target
         
     | 
| 
       249 
     | 
    
         
            -
                    Organism.identifiers(namespace).index(:target => target, :persist => true, :order => true)
         
     | 
| 
       250 
     | 
    
         
            -
                  else
         
     | 
| 
       251 
     | 
    
         
            -
                    {}
         
     | 
| 
       252 
     | 
    
         
            -
                  end
         
     | 
| 
       253 
     | 
    
         
            -
                end
         
     | 
| 
       254 
     | 
    
         
            -
              end
         
     | 
| 
       255 
     | 
    
         
            -
             
     | 
| 
       256 
     | 
    
         
            -
              def identify_source(name, entity)
         
     | 
| 
       257 
     | 
    
         
            -
                database = get_database(name, :persist => true)
         
     | 
| 
       258 
     | 
    
         
            -
                return entity if Symbol === entity or (String === entity and database.include? entity)
         
     | 
| 
       259 
     | 
    
         
            -
                source = source(name)
         
     | 
| 
       260 
     | 
    
         
            -
                @identifiers[name] ||= {}
         
     | 
| 
       261 
     | 
    
         
            -
                @identifiers[name]['source'] ||= begin
         
     | 
| 
       262 
     | 
    
         
            -
                                                   database_identify_index(database, source)
         
     | 
| 
       263 
     | 
    
         
            -
                                                 end
         
     | 
| 
       264 
     | 
    
         
            -
             
     | 
| 
       265 
     | 
    
         
            -
                if Array === entity
         
     | 
| 
       266 
     | 
    
         
            -
                  @identifiers[name]['source'].chunked_values_at(entity).zip(entity).collect{|p|
         
     | 
| 
       267 
     | 
    
         
            -
                    p.compact.first
         
     | 
| 
       268 
     | 
    
         
            -
                  }
         
     | 
| 
       269 
     | 
    
         
            -
                else
         
     | 
| 
       270 
     | 
    
         
            -
                  @identifiers[name]['source'][entity]
         
     | 
| 
       271 
     | 
    
         
            -
                end
         
     | 
| 
       272 
     | 
    
         
            -
              end
         
     | 
| 
       273 
     | 
    
         
            -
             
     | 
| 
       274 
     | 
    
         
            -
              def identify_target(name, entity)
         
     | 
| 
       275 
     | 
    
         
            -
                database = get_database(name, :persist => true)
         
     | 
| 
       276 
     | 
    
         
            -
                return entity if Symbol === entity #or (String === entity) # and database.values.collect{|v| v.first}.compact.flatten.include?(entity))
         
     | 
| 
       277 
     | 
    
         
            -
                target = target(name)
         
     | 
| 
       278 
     | 
    
         
            -
             
     | 
| 
       279 
     | 
    
         
            -
                @identifiers[name] ||= {}
         
     | 
| 
       280 
     | 
    
         
            -
                @identifiers[name]['target'] ||= begin
         
     | 
| 
       281 
     | 
    
         
            -
                                                   database_identify_index(database, target)
         
     | 
| 
       282 
     | 
    
         
            -
                                                 end
         
     | 
| 
       283 
     | 
    
         
            -
                if Array === entity
         
     | 
| 
       284 
     | 
    
         
            -
                  @identifiers[name]['target'].chunked_values_at(entity).zip(entity).collect{|p|
         
     | 
| 
       285 
     | 
    
         
            -
                    p.compact.first
         
     | 
| 
       286 
     | 
    
         
            -
                  }
         
     | 
| 
       287 
     | 
    
         
            -
                else
         
     | 
| 
       288 
     | 
    
         
            -
                  @identifiers[name]['target'][entity] 
         
     | 
| 
       289 
     | 
    
         
            -
                end
         
     | 
| 
       290 
     | 
    
         
            -
              end
         
     | 
| 
       291 
     | 
    
         
            -
             
     | 
| 
       292 
     | 
    
         
            -
              def identify(name, entity)
         
     | 
| 
       293 
     | 
    
         
            -
                identify_source(name, entity) || identify_target(name, entity)
         
     | 
| 
       294 
     | 
    
         
            -
              end
         
     | 
| 
       295 
     | 
    
         
            -
             
     | 
| 
       296 
     | 
    
         
            -
              def normalize(entity)
         
     | 
| 
       297 
     | 
    
         
            -
                source_matches = all_databases.collect{|d|
         
     | 
| 
       298 
     | 
    
         
            -
                  identify_source(d, entity)
         
     | 
| 
       299 
     | 
    
         
            -
                }.flatten.compact.uniq
         
     | 
| 
       300 
     | 
    
         
            -
                return entity if source_matches.include? entity
         
     | 
| 
       301 
     | 
    
         
            -
             
     | 
| 
       302 
     | 
    
         
            -
                target_matches = all_databases.collect{|d|
         
     | 
| 
       303 
     | 
    
         
            -
                  identify_target(d, entity)
         
     | 
| 
       304 
     | 
    
         
            -
                }.flatten.compact.uniq
         
     | 
| 
       305 
     | 
    
         
            -
                return entity if target_matches.include? entity
         
     | 
| 
       306 
     | 
    
         
            -
             
     | 
| 
       307 
     | 
    
         
            -
                (source_matches + target_matches).first
         
     | 
| 
       308 
     | 
    
         
            -
              end
         
     | 
| 
       309 
     | 
    
         
            -
             
     | 
| 
       310 
     | 
    
         
            -
              #{{{ Query
         
     | 
| 
       311 
     | 
    
         
            -
             
     | 
| 
       312 
     | 
    
         
            -
              def all(name, options={})
         
     | 
| 
       313 
     | 
    
         
            -
                repo = get_index name, options
         
     | 
| 
       314 
     | 
    
         
            -
                setup name, repo.keys
         
     | 
| 
       315 
     | 
    
         
            -
              end
         
     | 
| 
       316 
     | 
    
         
            -
             
     | 
| 
       317 
     | 
    
         
            -
              def children(name, entity)
         
     | 
| 
       318 
     | 
    
         
            -
                repo = get_index name
         
     | 
| 
       319 
     | 
    
         
            -
                setup(name, repo.match(entity))
         
     | 
| 
       320 
     | 
    
         
            -
              end
         
     | 
| 
       321 
     | 
    
         
            -
             
     | 
| 
       322 
     | 
    
         
            -
              def parents(name, entity)
         
     | 
| 
       323 
     | 
    
         
            -
                repo = get_index name
         
     | 
| 
       324 
     | 
    
         
            -
                setup(name, repo.reverse.match(entity), true)
         
     | 
| 
       325 
     | 
    
         
            -
              end
         
     | 
| 
       326 
     | 
    
         
            -
             
     | 
| 
       327 
     | 
    
         
            -
              def neighbours(name, entity)
         
     | 
| 
       328 
     | 
    
         
            -
                if undirected(name) and source(name) == target(name)
         
     | 
| 
       329 
     | 
    
         
            -
                  IndiferentHash.setup({:children => children(name, entity)})
         
     | 
| 
       330 
     | 
    
         
            -
                else
         
     | 
| 
       331 
     | 
    
         
            -
                  IndiferentHash.setup({:parents => parents(name, entity), :children => children(name, entity)})
         
     | 
| 
       332 
     | 
    
         
            -
                end
         
     | 
| 
       333 
     | 
    
         
            -
              end
         
     | 
| 
       334 
     | 
    
         
            -
             
     | 
| 
       335 
     | 
    
         
            -
              def subset(name, entities, &block)
         
     | 
| 
       336 
     | 
    
         
            -
                entities = case entities
         
     | 
| 
       337 
     | 
    
         
            -
                           when :all
         
     | 
| 
       338 
     | 
    
         
            -
                             {:target => :all, :source => :all}
         
     | 
| 
       339 
     | 
    
         
            -
                           when AnnotatedArray
         
     | 
| 
       340 
     | 
    
         
            -
                             format = entities.format if entities.respond_to? :format 
         
     | 
| 
       341 
     | 
    
         
            -
                             format ||= entities.base_entity.to_s
         
     | 
| 
       342 
     | 
    
         
            -
                             {format => entities.clean_annotations}
         
     | 
| 
       343 
     | 
    
         
            -
                           when Hash
         
     | 
| 
       344 
     | 
    
         
            -
                             entities
         
     | 
| 
       345 
     | 
    
         
            -
                           else
         
     | 
| 
       346 
     | 
    
         
            -
                             raise "Entities are not a Hash or an AnnotatedArray: #{Misc.fingerprint entities}"
         
     | 
| 
       347 
     | 
    
         
            -
                           end
         
     | 
| 
       348 
     | 
    
         
            -
             
     | 
| 
       349 
     | 
    
         
            -
                repo = get_index name
         
     | 
| 
       350 
     | 
    
         
            -
             
     | 
| 
       351 
     | 
    
         
            -
                matches = setup(name, repo.subset_entities(entities))
         
     | 
| 
       352 
     | 
    
         
            -
             
     | 
| 
       353 
     | 
    
         
            -
                block_given? ? matches.select(&block) : matches
         
     | 
| 
       354 
     | 
    
         
            -
              end
         
     | 
| 
       355 
     | 
    
         
            -
             
     | 
| 
       356 
     | 
    
         
            -
              def translate(entities, type)
         
     | 
| 
       357 
     | 
    
         
            -
                if format = @format[type] and (entities.respond_to? :format and format != entities.format)
         
     | 
| 
       358 
     | 
    
         
            -
                  entities.to format
         
     | 
| 
       359 
     | 
    
         
            -
                else
         
     | 
| 
       360 
     | 
    
         
            -
                  entities
         
     | 
| 
       361 
     | 
    
         
            -
                end
         
     | 
| 
       362 
     | 
    
         
            -
              end
         
     | 
| 
       363 
     | 
    
         
            -
             
     | 
| 
       364 
     | 
    
         
            -
              def pair_matches(source, target, undirected = nil)
         
     | 
| 
       365 
     | 
    
         
            -
                all_databases.inject([]){|acc,database|
         
     | 
| 
       366 
     | 
    
         
            -
                  match = [source, target] * "~"
         
     | 
| 
       367 
     | 
    
         
            -
                  index = get_index(database)
         
     | 
| 
       368 
     | 
    
         
            -
             
     | 
| 
       369 
     | 
    
         
            -
                  if index.include? match 
         
     | 
| 
       370 
     | 
    
         
            -
                    acc << setup(database, match) 
         
     | 
| 
       371 
     | 
    
         
            -
             
     | 
| 
       372 
     | 
    
         
            -
                  elsif undirected or undirected(database) 
         
     | 
| 
       373 
     | 
    
         
            -
                    inv = [target, source] * "~"
         
     | 
| 
       374 
     | 
    
         
            -
                    if index.include? inv 
         
     | 
| 
       375 
     | 
    
         
            -
                      setup(database, inv)
         
     | 
| 
       376 
     | 
    
         
            -
                      acc <<  inv 
         
     | 
| 
       377 
     | 
    
         
            -
                    end
         
     | 
| 
       378 
     | 
    
         
            -
                  end
         
     | 
| 
       379 
     | 
    
         
            -
             
     | 
| 
       380 
     | 
    
         
            -
                  acc
         
     | 
| 
       381 
     | 
    
         
            -
                }
         
     | 
| 
      
 27 
     | 
    
         
            +
              def setup(name, matches, reverse = false)
         
     | 
| 
      
 28 
     | 
    
         
            +
                AssociationItem.setup matches, self, name, reverse
         
     | 
| 
       382 
29 
     | 
    
         
             
              end
         
     | 
| 
       383 
30 
     | 
    
         
             
            end
         
     | 
    
        data/lib/rbbt/tsv/accessor.rb
    CHANGED
    
    | 
         @@ -1,6 +1,8 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            require 'yaml'
         
     | 
| 
       2 
2 
     | 
    
         
             
            require 'rbbt/annotations'
         
     | 
| 
       3 
3 
     | 
    
         
             
            require 'rbbt/tsv/dumper'
         
     | 
| 
      
 4 
     | 
    
         
            +
            require 'set'
         
     | 
| 
      
 5 
     | 
    
         
            +
             
     | 
| 
       4 
6 
     | 
    
         
             
            module TSV
         
     | 
| 
       5 
7 
     | 
    
         | 
| 
       6 
8 
     | 
    
         
             
              TSV_SERIALIZER = YAML
         
     | 
| 
         @@ -601,6 +603,7 @@ module TSV 
     | 
|
| 
       601 
603 
     | 
    
         
             
              end
         
     | 
| 
       602 
604 
     | 
    
         | 
| 
       603 
605 
     | 
    
         
             
              def summary
         
     | 
| 
      
 606 
     | 
    
         
            +
             
     | 
| 
       604 
607 
     | 
    
         
             
                key = nil
         
     | 
| 
       605 
608 
     | 
    
         
             
                values = nil
         
     | 
| 
       606 
609 
     | 
    
         
             
                self.each do |k, v|
         
     | 
| 
         @@ -608,6 +611,7 @@ module TSV 
     | 
|
| 
       608 
611 
     | 
    
         
             
                  values = v
         
     | 
| 
       609 
612 
     | 
    
         
             
                  break
         
     | 
| 
       610 
613 
     | 
    
         
             
                end
         
     | 
| 
      
 614 
     | 
    
         
            +
             
     | 
| 
       611 
615 
     | 
    
         
             
                with_unnamed do
         
     | 
| 
       612 
616 
     | 
    
         
             
                  <<-EOF
         
     | 
| 
       613 
617 
     | 
    
         
             
            Filename = #{Path === filename ? filename.find : (filename || "No filename")}
         
     | 
    
        data/lib/rbbt/tsv/change_id.rb
    CHANGED
    
    | 
         @@ -1,4 +1,6 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            require 'rbbt/tsv'
         
     | 
| 
      
 2 
     | 
    
         
            +
            require 'rbbt/persist'
         
     | 
| 
      
 3 
     | 
    
         
            +
             
     | 
| 
       2 
4 
     | 
    
         | 
| 
       3 
5 
     | 
    
         
             
            module TSV
         
     | 
| 
       4 
6 
     | 
    
         
             
              def self.change_key(tsv, format, options = {}, &block)
         
     | 
| 
         @@ -84,5 +86,122 @@ module TSV 
     | 
|
| 
       84 
86 
     | 
    
         
             
                TSV.swap_id(self, *args)
         
     | 
| 
       85 
87 
     | 
    
         
             
              end
         
     | 
| 
       86 
88 
     | 
    
         | 
| 
      
 89 
     | 
    
         
            +
              def self.translation_index(files, target = nil, source = nil, options = {})
         
     | 
| 
      
 90 
     | 
    
         
            +
                return nil if source == target
         
     | 
| 
      
 91 
     | 
    
         
            +
                options = Misc.add_defaults options.dup, :persist => true
         
     | 
| 
      
 92 
     | 
    
         
            +
                fields = source ? [source] : nil
         
     | 
| 
      
 93 
     | 
    
         
            +
                files.each do |file|
         
     | 
| 
      
 94 
     | 
    
         
            +
                  if TSV === file
         
     | 
| 
      
 95 
     | 
    
         
            +
                    all_fields = file.all_fields
         
     | 
| 
      
 96 
     | 
    
         
            +
                    target = file.fields.first if target.nil?
         
     | 
| 
      
 97 
     | 
    
         
            +
                    return file.index(options.merge(:target => target, :fields => fields, :order => true)) if (source.nil? or all_fields.include? source) and all_fields.include? target
         
     | 
| 
      
 98 
     | 
    
         
            +
                  else
         
     | 
| 
      
 99 
     | 
    
         
            +
                    all_fields = TSV.parse_header(file).all_fields
         
     | 
| 
      
 100 
     | 
    
         
            +
                    target = all_fields[1] if target.nil?
         
     | 
| 
      
 101 
     | 
    
         
            +
                    return TSV.index(file, options.merge(:target => target, :fields => fields, :order => true)) if (source.nil? or all_fields.include? source) and all_fields.include? target
         
     | 
| 
      
 102 
     | 
    
         
            +
                  end
         
     | 
| 
      
 103 
     | 
    
         
            +
                end
         
     | 
| 
      
 104 
     | 
    
         
            +
             
     | 
| 
      
 105 
     | 
    
         
            +
                files.each do |file|
         
     | 
| 
      
 106 
     | 
    
         
            +
                  all_fields = TSV === file ? file.all_fields : TSV.parse_header(file).all_fields 
         
     | 
| 
      
 107 
     | 
    
         
            +
             
     | 
| 
      
 108 
     | 
    
         
            +
                  files.each do |other_file|
         
     | 
| 
      
 109 
     | 
    
         
            +
                    next if file == other_file
         
     | 
| 
      
 110 
     | 
    
         
            +
             
     | 
| 
      
 111 
     | 
    
         
            +
                    other_all_fields = TSV === other_file ? other_file.all_fields : TSV.parse_header(other_file).all_fields 
         
     | 
| 
      
 112 
     | 
    
         
            +
             
     | 
| 
      
 113 
     | 
    
         
            +
                    common_field = (all_fields & other_all_fields).first
         
     | 
| 
      
 114 
     | 
    
         
            +
             
     | 
| 
      
 115 
     | 
    
         
            +
                    if common_field and (source.nil? or fields.include? source) and all_fields.include? common_field and 
         
     | 
| 
      
 116 
     | 
    
         
            +
                      other_all_fields.include? common_field and other_all_fields.include? target 
         
     | 
| 
      
 117 
     | 
    
         
            +
             
     | 
| 
      
 118 
     | 
    
         
            +
                      return Persist.persist_tsv(nil, Misc.fingerprint(files), {:files => files, :source => source, :target => target}, :prefix => "Translation index", :persist => options[:persist]) do |data|
         
     | 
| 
      
 119 
     | 
    
         
            +
                        index = TSV === file ? 
         
     | 
| 
      
 120 
     | 
    
         
            +
                          file.index(options.merge(:target => common_field, :fields => fields)) :
         
     | 
| 
      
 121 
     | 
    
         
            +
                          TSV.index(file, options.merge(:target => common_field, :fields => fields))
         
     | 
| 
      
 122 
     | 
    
         
            +
             
     | 
| 
      
 123 
     | 
    
         
            +
                        other_index = TSV === other_file ? 
         
     | 
| 
      
 124 
     | 
    
         
            +
                          other_file.index(options.merge(:target => target, :fields => [common_field])) :
         
     | 
| 
      
 125 
     | 
    
         
            +
                          TSV.index(other_file, options.merge(:target => target, :fields => [common_field]))
         
     | 
| 
      
 126 
     | 
    
         
            +
             
     | 
| 
      
 127 
     | 
    
         
            +
                        data.serializer = :clean
         
     | 
| 
      
 128 
     | 
    
         
            +
                        data.merge! index.to_list.attach(other_index.to_list).slice([target]).to_single
         
     | 
| 
      
 129 
     | 
    
         
            +
             
     | 
| 
      
 130 
     | 
    
         
            +
                        data
         
     | 
| 
      
 131 
     | 
    
         
            +
                      end
         
     | 
| 
      
 132 
     | 
    
         
            +
                    end
         
     | 
| 
      
 133 
     | 
    
         
            +
                  end
         
     | 
| 
      
 134 
     | 
    
         
            +
                end
         
     | 
| 
      
 135 
     | 
    
         
            +
                return nil
         
     | 
| 
      
 136 
     | 
    
         
            +
              end
         
     | 
| 
      
 137 
     | 
    
         
            +
             
     | 
| 
      
 138 
     | 
    
         
            +
              def self.translate(tsv, *args)
         
     | 
| 
      
 139 
     | 
    
         
            +
                new = TSV.open translate_stream(tsv, *args)
         
     | 
| 
      
 140 
     | 
    
         
            +
                new.identifiers = tsv.identifiers
         
     | 
| 
      
 141 
     | 
    
         
            +
                new
         
     | 
| 
      
 142 
     | 
    
         
            +
              end
         
     | 
| 
      
 143 
     | 
    
         
            +
             
     | 
| 
      
 144 
     | 
    
         
            +
              def self.translate_stream(tsv, field, format, options = {}, &block)
         
     | 
| 
      
 145 
     | 
    
         
            +
                options = Misc.add_defaults options, :persist => false, :identifier_files => tsv.identifier_files, :compact => true
         
     | 
| 
      
 146 
     | 
    
         
            +
             
     | 
| 
      
 147 
     | 
    
         
            +
                identifier_files, identifiers, persist_input, compact = Misc.process_options options, :identifier_files, :identifiers, :persist, :compact
         
     | 
| 
      
 148 
     | 
    
         
            +
                identifier_files = [tsv, identifiers].compact if identifier_files.nil? or identifier_files.empty?
         
     | 
| 
      
 149 
     | 
    
         
            +
             
     | 
| 
      
 150 
     | 
    
         
            +
                identifier_files.uniq!
         
     | 
| 
      
 151 
     | 
    
         
            +
             
     | 
| 
      
 152 
     | 
    
         
            +
                index = translation_index identifier_files, format, field, options.dup
         
     | 
| 
      
 153 
     | 
    
         
            +
                raise "No index: #{Misc.fingerprint([identifier_files, field, format])}" if index.nil?
         
     | 
| 
      
 154 
     | 
    
         
            +
             
     | 
| 
      
 155 
     | 
    
         
            +
                orig_type = tsv.type 
         
     | 
| 
      
 156 
     | 
    
         
            +
                tsv = tsv.to_double if orig_type != :double
         
     | 
| 
      
 157 
     | 
    
         
            +
             
     | 
| 
      
 158 
     | 
    
         
            +
                pos = tsv.identify_field field
         
     | 
| 
       87 
159 
     | 
    
         | 
| 
      
 160 
     | 
    
         
            +
                new_options = tsv.options
         
     | 
| 
      
 161 
     | 
    
         
            +
                new_options[:identifiers] = tsv.identifiers.find if tsv.identifiers
         
     | 
| 
      
 162 
     | 
    
         
            +
             
     | 
| 
      
 163 
     | 
    
         
            +
                case pos
         
     | 
| 
      
 164 
     | 
    
         
            +
                when :key
         
     | 
| 
      
 165 
     | 
    
         
            +
                  new_options[:key_field] = format if tsv.key_field == field
         
     | 
| 
      
 166 
     | 
    
         
            +
                  dumper = TSV::Dumper.new new_options
         
     | 
| 
      
 167 
     | 
    
         
            +
                  dumper.init
         
     | 
| 
      
 168 
     | 
    
         
            +
                  TSV.traverse tsv, :into => dumper do |key,values|
         
     | 
| 
      
 169 
     | 
    
         
            +
                    new_key = index[key]
         
     | 
| 
      
 170 
     | 
    
         
            +
                    [new_key, values]
         
     | 
| 
      
 171 
     | 
    
         
            +
                  end
         
     | 
| 
      
 172 
     | 
    
         
            +
                else
         
     | 
| 
      
 173 
     | 
    
         
            +
                  new_options[:fields] = tsv.fields.collect{|f| f == field ? format : f }
         
     | 
| 
      
 174 
     | 
    
         
            +
                  dumper = TSV::Dumper.new new_options
         
     | 
| 
      
 175 
     | 
    
         
            +
                  dumper.init
         
     | 
| 
      
 176 
     | 
    
         
            +
             
     | 
| 
      
 177 
     | 
    
         
            +
                  case tsv.type
         
     | 
| 
      
 178 
     | 
    
         
            +
                  when :double
         
     | 
| 
      
 179 
     | 
    
         
            +
                    TSV.traverse tsv, :into => dumper do |key,values|
         
     | 
| 
      
 180 
     | 
    
         
            +
                      original = values[pos]
         
     | 
| 
      
 181 
     | 
    
         
            +
                      new = index.values_at *original
         
     | 
| 
      
 182 
     | 
    
         
            +
                      values[pos] = new
         
     | 
| 
      
 183 
     | 
    
         
            +
                      [key, values]
         
     | 
| 
      
 184 
     | 
    
         
            +
                    end
         
     | 
| 
      
 185 
     | 
    
         
            +
                  when :list
         
     | 
| 
      
 186 
     | 
    
         
            +
                    TSV.traverse tsv, :into => dumper do |key,values|
         
     | 
| 
      
 187 
     | 
    
         
            +
                      original = values[pos]
         
     | 
| 
      
 188 
     | 
    
         
            +
                      new = index[original]
         
     | 
| 
      
 189 
     | 
    
         
            +
                      values[pos] = new
         
     | 
| 
      
 190 
     | 
    
         
            +
                      [key, values]
         
     | 
| 
      
 191 
     | 
    
         
            +
                    end
         
     | 
| 
      
 192 
     | 
    
         
            +
                  when :flat
         
     | 
| 
      
 193 
     | 
    
         
            +
                    TSV.traverse tsv, :into => dumper do |key,values|
         
     | 
| 
      
 194 
     | 
    
         
            +
                      new = index.values_at *values
         
     | 
| 
      
 195 
     | 
    
         
            +
                      [key, new]
         
     | 
| 
      
 196 
     | 
    
         
            +
                    end
         
     | 
| 
      
 197 
     | 
    
         
            +
                  when :single
         
     | 
| 
      
 198 
     | 
    
         
            +
                    TSV.traverse tsv, :into => dumper do |key,original|
         
     | 
| 
      
 199 
     | 
    
         
            +
                      new = index[original]
         
     | 
| 
      
 200 
     | 
    
         
            +
                      [key, new]
         
     | 
| 
      
 201 
     | 
    
         
            +
                    end
         
     | 
| 
      
 202 
     | 
    
         
            +
                  end
         
     | 
| 
      
 203 
     | 
    
         
            +
                end
         
     | 
| 
      
 204 
     | 
    
         
            +
             
     | 
| 
      
 205 
     | 
    
         
            +
                dumper.stream
         
     | 
| 
      
 206 
     | 
    
         
            +
              end
         
     | 
| 
       88 
207 
     | 
    
         
             
            end
         
     | 
    
        data/lib/rbbt/tsv/index.rb
    CHANGED
    
    | 
         @@ -32,12 +32,16 @@ module TSV 
     | 
|
| 
       32 
32 
     | 
    
         | 
| 
       33 
33 
     | 
    
         
             
                      new_key_field, new_fields = through target, fields, true do |key, values|
         
     | 
| 
       34 
34 
     | 
    
         
             
                        next if key.empty? 
         
     | 
| 
       35 
     | 
    
         
            -
                         
     | 
| 
      
 35 
     | 
    
         
            +
                        case type
         
     | 
| 
      
 36 
     | 
    
         
            +
                        when :single
         
     | 
| 
       36 
37 
     | 
    
         
             
                          values = [values]
         
     | 
| 
       37 
38 
     | 
    
         
             
                          values.unshift key
         
     | 
| 
       38 
     | 
    
         
            -
                         
     | 
| 
      
 39 
     | 
    
         
            +
                        when :double
         
     | 
| 
       39 
40 
     | 
    
         
             
                          values = values.dup
         
     | 
| 
       40 
41 
     | 
    
         
             
                          values.unshift [key]
         
     | 
| 
      
 42 
     | 
    
         
            +
                        when :list, :flat
         
     | 
| 
      
 43 
     | 
    
         
            +
                          values = values.dup
         
     | 
| 
      
 44 
     | 
    
         
            +
                          values.unshift key
         
     | 
| 
       41 
45 
     | 
    
         
             
                        end
         
     | 
| 
       42 
46 
     | 
    
         | 
| 
       43 
47 
     | 
    
         
             
                        values.each_with_index do |list, i|
         
     | 
    
        data/lib/rbbt/tsv/parser.rb
    CHANGED
    
    | 
         @@ -343,6 +343,8 @@ module TSV 
     | 
|
| 
       343 
343 
     | 
    
         
             
                    when String === key_field
         
     | 
| 
       344 
344 
     | 
    
         
             
                      @key_position = @fields.dup.unshift(@key_field).index key_field
         
     | 
| 
       345 
345 
     | 
    
         
             
                      raise "Key field #{ key_field } was not found" if @key_position.nil?
         
     | 
| 
      
 346 
     | 
    
         
            +
                    when :key == key_field
         
     | 
| 
      
 347 
     | 
    
         
            +
                      @key_position = 0
         
     | 
| 
       346 
348 
     | 
    
         
             
                    else
         
     | 
| 
       347 
349 
     | 
    
         
             
                      raise "Format of key_field not understood: #{key_field.inspect}"
         
     | 
| 
       348 
350 
     | 
    
         
             
                    end
         
     | 
| 
         @@ -503,11 +505,11 @@ module TSV 
     | 
|
| 
       503 
505 
     | 
    
         
             
                  # get parser
         
     | 
| 
       504 
506 
     | 
    
         | 
| 
       505 
507 
     | 
    
         
             
                  # grep
         
     | 
| 
       506 
     | 
    
         
            -
                  if grep
         
     | 
| 
       507 
     | 
    
         
            -
             
     | 
| 
       508 
     | 
    
         
            -
             
     | 
| 
       509 
     | 
    
         
            -
             
     | 
| 
       510 
     | 
    
         
            -
                  end
         
     | 
| 
      
 508 
     | 
    
         
            +
                  #if grep and false
         
     | 
| 
      
 509 
     | 
    
         
            +
                  #  stream.rewind if stream.eof?
         
     | 
| 
      
 510 
     | 
    
         
            +
                  #  stream = Open.grep(stream, grep, invert_grep)
         
     | 
| 
      
 511 
     | 
    
         
            +
                  #  self.first_line = stream.gets
         
     | 
| 
      
 512 
     | 
    
         
            +
                  #end
         
     | 
| 
       511 
513 
     | 
    
         | 
| 
       512 
514 
     | 
    
         
             
                  # first line
         
     | 
| 
       513 
515 
     | 
    
         
             
                  line = self.rescue_first_line
         
     |