rbbt-util 5.14.33 → 5.14.34

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/bin/rbbt +2 -0
  3. data/lib/rbbt/association/database.rb +153 -0
  4. data/lib/rbbt/association/index.rb +89 -20
  5. data/lib/rbbt/association/open.rb +37 -0
  6. data/lib/rbbt/association/util.rb +133 -0
  7. data/lib/rbbt/association.rb +1 -380
  8. data/lib/rbbt/entity/identifiers.rb +106 -0
  9. data/lib/rbbt/entity.rb +1 -0
  10. data/lib/rbbt/knowledge_base/entity.rb +107 -0
  11. data/lib/rbbt/knowledge_base/query.rb +83 -0
  12. data/lib/rbbt/knowledge_base/registry.rb +106 -0
  13. data/lib/rbbt/knowledge_base/syndicate.rb +22 -0
  14. data/lib/rbbt/knowledge_base.rb +6 -359
  15. data/lib/rbbt/tsv/accessor.rb +4 -0
  16. data/lib/rbbt/tsv/change_id.rb +119 -0
  17. data/lib/rbbt/tsv/index.rb +6 -2
  18. data/lib/rbbt/tsv/parser.rb +7 -5
  19. data/lib/rbbt/tsv/util.rb +1 -1
  20. data/lib/rbbt/tsv.rb +2 -1
  21. data/lib/rbbt/util/R/model.rb +1 -1
  22. data/lib/rbbt/util/log.rb +2 -2
  23. data/lib/rbbt/util/misc/bgzf.rb +2 -0
  24. data/lib/rbbt/util/misc/inspect.rb +1 -1
  25. data/lib/rbbt-util.rb +11 -7
  26. data/lib/rbbt.rb +0 -1
  27. data/share/rbbt_commands/app/start +1 -1
  28. data/share/rbbt_commands/tsv/change_id +2 -2
  29. data/test/rbbt/association/test_database.rb +61 -0
  30. data/test/rbbt/association/test_index.rb +67 -22
  31. data/test/rbbt/association/test_open.rb +68 -0
  32. data/test/rbbt/association/test_util.rb +108 -0
  33. data/test/rbbt/entity/test_identifiers.rb +40 -0
  34. data/test/rbbt/knowledge_base/test_entity.rb +0 -0
  35. data/test/rbbt/knowledge_base/test_query.rb +45 -0
  36. data/test/rbbt/knowledge_base/test_registry.rb +52 -0
  37. data/test/rbbt/test_association.rb +3 -3
  38. data/test/rbbt/test_knowledge_base.rb +79 -51
  39. data/test/rbbt/test_monitor.rb +0 -2
  40. data/test/rbbt/test_packed_index.rb +1 -1
  41. data/test/rbbt/test_resource.rb +6 -6
  42. data/test/rbbt/test_tsv.rb +34 -44
  43. data/test/rbbt/tsv/parallel/test_through.rb +2 -4
  44. data/test/rbbt/tsv/parallel/test_traverse.rb +30 -28
  45. data/test/rbbt/tsv/test_change_id.rb +10 -0
  46. data/test/rbbt/util/R/test_model.rb +9 -10
  47. data/test/rbbt/util/test_misc.rb +1 -1
  48. data/test/test_helper.rb +4 -1
  49. metadata +24 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e156f608c54a1a71f1d7892428698d857075c4e7
4
- data.tar.gz: 535f2b8b01c561226a389050cbb90b31b82ac9e2
3
+ metadata.gz: ec2d34290a61ca4f3f32cd875a07a15c9c7c06fd
4
+ data.tar.gz: 08ce21aa7a885530dcba3859231d94874f99bba2
5
5
  SHA512:
6
- metadata.gz: 8bad7add5780e3764172ffb11c87b8b3cebc5267fe2f11dbe0efcdd46cebd92019b669346d31ff08a0ab32321356c2fc21ef5f776d1ce188d8527f2b1a4044c9
7
- data.tar.gz: 543e58c6022ccacbbd164aa5a97df5ab7ab9c168582ec212b5b5cfce7714b02ca1f5b74b2c59a50c27297aa9113ccbf548ce62e3aafa3d530371361acb180bc3
6
+ metadata.gz: d532ef379f2238e8dbaf311f485a4e18ac774177624e327d7b1d432a6cd0304f17f945448523407bb0cbb8f14a1067b50d36585b6d554d646dd5a01124c5b4f2
7
+ data.tar.gz: b15a2660fd8be0a782fb0a139bc44b8670adf3629446be6c0b8dc3e6d27f017a2c71db0cb6d76602b306917c9909f03013226566d963c0247506a4723968ead8
data/bin/rbbt CHANGED
@@ -145,6 +145,8 @@ def rbbt_usage(prev = nil)
145
145
  true
146
146
  end
147
147
 
148
+ alias usage rbbt_usage
149
+
148
150
  def print_error(error, backtrace = nil)
149
151
  puts Log.color :magenta, "## ERROR"
150
152
  puts
@@ -0,0 +1,153 @@
1
+ require 'rbbt/association/util'
2
+ require 'rbbt/tsv/change_id'
3
+
4
+ module Association
5
+
6
+ def self.add_reciprocal(tsv)
7
+ new = TSV.open(tsv.dumper_stream)
8
+ tsv.with_unnamed do
9
+ case tsv.type
10
+ when :double
11
+ tsv.through do |source, values|
12
+ Misc.zip_fields(values).each do |info|
13
+ target, *rest = info
14
+ new.zip_new target, [source] + rest
15
+ end
16
+ end
17
+ else
18
+ end
19
+ end
20
+
21
+ tsv.annotate(new)
22
+
23
+ new
24
+ end
25
+
26
+ def self.translate(tsv, source_final_format, target_final_format, options = {})
27
+ source_field = tsv.key_field
28
+ target_field = tsv.fields.first
29
+ namespace = tsv.namespace
30
+
31
+ if source_final_format and source_field != source_final_format
32
+ Log.debug("Changing source format from #{tsv.key_field} to #{source_final_format}")
33
+
34
+ identifier_files = tsv.identifier_files.dup
35
+ identifier_files.concat Entity.identifier_files(source_final_format) if defined? Entity
36
+ identifier_files.uniq!
37
+ identifier_files.collect!{|f| f.annotate(f.gsub(/\bNAMESPACE\b/, namespace))} if namespace
38
+ identifier_files.reject!{|f| f.match(/\bNAMESPACE\b/)}
39
+
40
+ tsv = TSV.translate(tsv, source_field, source_final_format, options.merge(:identifier_files => identifier_files))
41
+ end
42
+
43
+ # Translate target
44
+ if target_final_format and target_field != target_final_format
45
+ Log.debug("Changing target format from #{target_field} to #{target_final_format}")
46
+ old_key_field = tsv.key_field
47
+ tsv.key_field = "MASK"
48
+
49
+ identifier_files = tsv.identifier_files.dup
50
+ identifier_files.concat Entity.identifier_files(target_final_format) if defined? Entity
51
+ identifier_files.uniq!
52
+ identifier_files.collect!{|f| f.annotate(f.gsub(/\bNAMESPACE\b/, namespace))} if namespace
53
+ identifier_files.reject!{|f| f.match(/\bNAMESPACE\b/)}
54
+
55
+ tsv = TSV.translate(tsv, target_field, target_final_format, options.merge(:identifier_files => identifier_files))
56
+ tsv.key_field = old_key_field
57
+ end
58
+
59
+ tsv
60
+ end
61
+
62
+ def self.reorder_tsv(tsv, options = {})
63
+ fields, undirected, persist = Misc.process_options options, :fields, :undirected, :persist
64
+ all_fields = tsv.all_fields
65
+
66
+ source_pos, field_pos, source_header, field_headers, source_format, target_format = headers(all_fields, fields, options)
67
+
68
+ source_field = source_pos == :key ? :key : all_fields[source_pos]
69
+ info_fields = field_pos.collect{|f| f == :key ? :key : all_fields[f]}
70
+ options = options.merge({:key_field => source_field, :fields => info_fields})
71
+
72
+ tsv = tsv.reorder source_field, fields, :zipped => true
73
+
74
+ tsv.key_field = source_header
75
+ tsv.fields = field_headers
76
+
77
+ tsv = translate tsv, source_format, target_format, :persist => persist if source_format or target_format
78
+
79
+ tsv = add_reciprocal tsv if undirected
80
+
81
+ tsv
82
+ end
83
+
84
+ def self.open_stream(stream, options = {})
85
+ fields, undirected, persist = Misc.process_options options, :fields, :undirected, :persist
86
+
87
+ parser = TSV::Parser.new stream, options.merge(:fields => nil, :key_field => nil)
88
+
89
+ key_field, *_fields = all_fields = parser.all_fields
90
+
91
+ source_pos, field_pos, source_header, field_headers, source_format, target_format = headers parser.all_fields, fields, options
92
+
93
+ parser.key_field = source_pos
94
+ parser.fields = field_pos
95
+
96
+ case parser.type
97
+ when :single
98
+ class << parser
99
+ def get_values(parts)
100
+ [parts[@key_field], parts.values_at(*@fields).first]
101
+ end
102
+ end
103
+ when :list
104
+ class << parser
105
+ def get_values(parts)
106
+ [parts[@key_field], parts.values_at(*@fields)]
107
+ end
108
+ end
109
+ when :double, :list, :single
110
+ class << parser
111
+ def get_values(parts)
112
+ [parts[@key_field].split(@sep2,-1), parts.values_at(*@fields).collect{|v| v.nil? ? [] : v.split(@sep2,-1) }]
113
+ end
114
+ end
115
+ when :flat
116
+ class << parser
117
+ def get_values(parts)
118
+ fields = (0..parts.length-1).to_a - [@key_field]
119
+ values = parts.values_at(*fields).compact.collect{|v| v.split(@sep2,-1) }.flatten
120
+ [parts[@key_field].split(@sep2,-1), values]
121
+ end
122
+ end
123
+ end
124
+
125
+ open_options = options.merge(parser.options).merge(:parser => parser)
126
+
127
+ tsv = TSV.parse parser.stream, {}, open_options
128
+ tsv.key_field = source_header
129
+ tsv.fields = field_headers
130
+
131
+ tsv = tsv.to_double unless tsv.type == :double
132
+
133
+ tsv = translate tsv, source_format, target_format, :persist => persist if source_format or target_format
134
+
135
+ tsv = add_reciprocal tsv if undirected
136
+
137
+ tsv
138
+ end
139
+
140
+ def self.database(file, options = {})
141
+ case file
142
+ when TSV
143
+ file = file.to_double unless file.type == :double
144
+ reorder_tsv(file, options.dup)
145
+ when IO
146
+ open_stream(file, options.dup)
147
+ else
148
+ stream = TSV.get_stream(file)
149
+ open_stream(stream, options.dup)
150
+ end
151
+ end
152
+
153
+ end
@@ -1,5 +1,85 @@
1
1
  require 'rbbt/tsv'
2
+ require 'rbbt/association/open'
3
+
2
4
  module Association
5
+ def self.index(file, options = nil, persist_options = nil)
6
+ options = options.nil? ? {} : options.dup
7
+ persist_options = persist_options.nil? ? Misc.pull_keys(options, :persist) : persist_options.dup
8
+
9
+ persist_options = Misc.add_defaults persist_options.dup, :persist => true, :engine => "BDB"
10
+ persist = persist_options[:persist]
11
+
12
+ file = version_file(file, options[:namespace]) if options[:namespace] and String === file
13
+
14
+ undirected = options[:undirected]
15
+ Persist.persist_tsv(file, "Association Index", options, persist_options.dup) do |data|
16
+ recycle = options[:recycle]
17
+
18
+ persist_options[:file] = persist_options[:file] + '.database' if persist_options[:file]
19
+ database = open(file, options, persist_options.dup)
20
+
21
+ fields = database.fields
22
+ source_field = database.key_field
23
+ target_field = fields.first.split(":").last
24
+ key_field = [source_field, target_field, undirected ? "undirected" : nil].compact * "~"
25
+
26
+ TSV.setup(data, :key_field => key_field, :fields => fields[1..-1], :type => :list, :serializer => :list)
27
+
28
+ data.key_field = key_field
29
+ data.fields = fields[1..-1]
30
+ data.type = :list
31
+ data.serializer = :list
32
+
33
+ database.with_unnamed do
34
+ database.through do |source, values|
35
+ case database.type
36
+ when :single
37
+ values = [[values]]
38
+ when :list
39
+ values = values.collect{|v| [v] }
40
+ when :flat
41
+ values = [values]
42
+ end
43
+ next if values.empty?
44
+ next if source.nil? or source.empty?
45
+ next if values.empty?
46
+
47
+ targets, *rest = values
48
+
49
+ size = targets ? targets.length : 0
50
+
51
+ rest.each_with_index do |list,i|
52
+ list.replace [list.first] * size if list.length == 1
53
+ end if recycle and size > 1
54
+
55
+ rest = Misc.zip_fields rest
56
+
57
+ annotations = rest.length > 1 ?
58
+ targets.zip(rest) :
59
+ targets.zip(rest * targets.length)
60
+
61
+ annotations.each do |target, info|
62
+ next if target.nil? or target.empty?
63
+ key = [source, target] * "~"
64
+ if data[key].nil? or info.nil?
65
+ data[key] = info
66
+ else
67
+ old_info = data[key]
68
+ info = old_info.zip(info).collect{|p| p * ";;" }
69
+ data[key] = info
70
+ end
71
+ end
72
+ end
73
+ end
74
+
75
+ data.close
76
+ data
77
+ end.tap do |data|
78
+ data.read if not Hash === data and data.respond_to? :read
79
+ Association::Index.setup data
80
+ data
81
+ end
82
+ end
3
83
  module Index
4
84
 
5
85
  attr_accessor :source_field, :target_field, :undirected
@@ -16,9 +96,15 @@ module Association
16
96
 
17
97
  def reverse
18
98
  @reverse ||= begin
19
- persistence_path = self.persistence_path
20
- persistence_path = persistence_path.find if Path === persistence_path
21
- reverse_filename = persistence_path + '.reverse'
99
+ if self.respond_to? :persistence_path
100
+ persistence_path = self.persistence_path
101
+ persistence_path = persistence_path.find if Path === persistence_path
102
+ reverse_filename = persistence_path + '.reverse'
103
+ else
104
+ raise "Can only reverse a TokyoCabinet::BDB dataset at the time"
105
+ end
106
+
107
+ self.read if self.respond_to? :read
22
108
 
23
109
  if File.exists?(reverse_filename)
24
110
  new = Persist.open_tokyocabinet(reverse_filename, false, serializer, TokyoCabinet::BDB)
@@ -66,16 +152,6 @@ module Association
66
152
 
67
153
  #{{{ Subset
68
154
 
69
- def select_entities(entities)
70
- source_type = Entity.formats[source_field]
71
- target_type = Entity.formats[target_field]
72
-
73
- source_entities = entities[:source] || entities[source_field] || entities[Entity.formats[source_field].to_s]
74
- target_entities = entities[:target] || entities[target_field] || entities[Entity.formats[target_field].to_s]
75
-
76
- [source_entities, target_entities]
77
- end
78
-
79
155
  def subset(source, target)
80
156
  return [] if source.nil? or target.nil? or source.empty? or target.empty?
81
157
 
@@ -110,12 +186,5 @@ module Association
110
186
  target_matches.values_at(*target.uniq).flatten.compact
111
187
  end
112
188
 
113
- def subset_entities(entities)
114
- source, target = select_entities(entities)
115
- return [] if source.nil? or target.nil?
116
- return [] if Array === target and target.empty?
117
- return [] if Array === source and source.empty?
118
- subset source, target
119
- end
120
189
  end
121
190
  end
@@ -0,0 +1,37 @@
1
+ require 'rbbt/association/database'
2
+
3
+ module Association
4
+ def self.version_file(file, namespace)
5
+ old_file, file = file, file.sub('NAMESPACE', namespace) if namespace and String === file
6
+ old_file.annotate file if Path === old_file
7
+ file
8
+ end
9
+
10
+ def self.open(file, options = nil, persist_options = nil)
11
+ options = options.nil? ? {} : options.dup
12
+ persist_options = persist_options.nil? ? Misc.pull_keys(options, :persist) : persist_options.dup
13
+
14
+ options = Misc.add_defaults options, :zipped => true
15
+ persist_options = Misc.add_defaults persist_options, :persist => true, :dir => Rbbt.var.associations
16
+ persist = persist_options[:persist]
17
+
18
+ file = version_file(file, options[:namespace]) if options[:namespace] and String === file
19
+ file = file.call if Proc === file
20
+
21
+ data = Persist.persist_tsv(file, "Association Database", options, persist_options) do |data|
22
+ tsv = Association.database(file, options.merge(:persist => persist))
23
+ tsv = tsv.to_double unless tsv.type == :double
24
+ tsv.annotate data
25
+
26
+ data.serializer = :double if data.respond_to? :serializer
27
+ tsv.each do |k,v|
28
+ data[k] = v
29
+ end
30
+
31
+
32
+ data
33
+ end
34
+ data
35
+ end
36
+
37
+ end
@@ -0,0 +1,133 @@
1
+ require 'rbbt/entity'
2
+
3
+ module Association
4
+ def self.identify_entity_format(format, fields)
5
+ entity_type = Entity.formats[format]
6
+ raise "Field #{ format } could not be resolved: #{fields}" if entity_type.nil?
7
+ main_field = fields.select{|f| Entity.formats[f] == entity_type}.first
8
+ raise "Field #{ format } not present, options: #{Misc.fingerprint fields}" if main_field.nil?
9
+ [main_field, nil, format]
10
+ end
11
+
12
+ def self.parse_field_specification(spec)
13
+ return [2,nil,nil] if Fixnum === spec
14
+ spec = spec.split "=>" unless Array === spec
15
+ field_part, final_format = spec
16
+
17
+ field, format = field_part.split "=~", -1
18
+
19
+ field = nil if field.nil? or field.empty?
20
+
21
+ [field, format, final_format]
22
+ end
23
+
24
+ def self.normalize_specs(spec, all_fields = nil)
25
+ return nil if spec.nil?
26
+ field, header, format = parse_field_specification spec
27
+
28
+ specs = if all_fields.nil? or all_fields.include? field
29
+ [field, header, format]
30
+ else
31
+ if all_fields.nil?
32
+ begin
33
+ identify_entity_format field, all_fields
34
+ rescue
35
+ [field, header, format]
36
+ end
37
+ else
38
+ [field, header, format]
39
+ end
40
+ end
41
+ specs
42
+ end
43
+
44
+ def self.extract_specs(all_fields=nil, options = {})
45
+ source, source_format, target, target_format = Misc.process_options options, :source, :source_format, :target, :target_format
46
+
47
+ key_field, *fields = all_fields.nil? ? [nil] : all_fields
48
+
49
+ source_specs = normalize_specs source, all_fields
50
+ target_specs = normalize_specs target, all_fields
51
+
52
+ source_specs = [nil, nil, nil] if source_specs.nil?
53
+ target_specs = [nil, nil, nil] if target_specs.nil?
54
+
55
+ source_specs[2] = source_format if source_format
56
+ target_specs[2] = target_format if target_format
57
+
58
+ if source_specs[0].nil? and target_specs[0].nil?
59
+ source_specs[0] = key_field
60
+ target_specs[0] = fields[0]
61
+ elsif source_specs[0].nil?
62
+ if target_specs[0] == :key or target_specs[0] == key_field
63
+ source_specs[0] = fields[0]
64
+ else
65
+ source_specs[0] = key_field
66
+ end
67
+ elsif target_specs[0].nil?
68
+ if source_specs[0] == fields.first
69
+ target_specs[0] = key_field
70
+ else
71
+ target_specs[0] = fields.first
72
+ end
73
+ end
74
+
75
+ {:source => source_specs, :target => target_specs}
76
+ end
77
+
78
+ def self.process_formats(field, default_format = {})
79
+ return nil if default_format.nil? or default_format.empty?
80
+ default_format.each do |type, format|
81
+ entity_type = Entity.formats[field] || format
82
+ return format if entity_type.to_s === type
83
+ end
84
+ return nil
85
+ end
86
+
87
+ def self.headers(all_fields, info_fields = nil, options = {})
88
+ specs = extract_specs all_fields, options
89
+
90
+ source_field = specs[:source][0]
91
+ target_field = specs[:target][0]
92
+
93
+ source_pos = all_fields.index source_field
94
+ target_pos = all_fields.index target_field
95
+
96
+ source_header = specs[:source][1] || specs[:source][0]
97
+ target_header = specs[:target][1] || specs[:target][0]
98
+
99
+ info_fields = all_fields.dup if info_fields.nil?
100
+ info_fields.delete source_field
101
+ info_fields.delete target_field
102
+ info_fields.unshift target_field
103
+
104
+ field_headers = [target_header]
105
+ info_fields[1..-1].each do |field|
106
+ header = case field
107
+ when String
108
+ field
109
+ when Fixnum
110
+ all_fields[field]
111
+ when :key
112
+ all_fields.first
113
+ end
114
+
115
+
116
+ field_headers << header
117
+ end
118
+
119
+ field_pos = info_fields.collect{|f| raise "Field #{f} not found. Options: #{info_fields* ", "}" unless all_fields.include?(f); f == :key ? 0 : all_fields.index(f); }
120
+
121
+ source_format = specs[:source][2]
122
+ target_format = specs[:target][2]
123
+
124
+
125
+ if format = options[:format]
126
+ source_format = process_formats(specs[:source][1] || specs[:source][0], format) || source_format
127
+ target_format = process_formats(specs[:target][1] || specs[:target][0], format) || target_format
128
+ end
129
+
130
+ Log.low "Headers -- #{[source_pos, field_pos, source_header, field_headers, source_format, target_format]}"
131
+ [source_pos, field_pos, source_header, field_headers, source_format, target_format]
132
+ end
133
+ end