rbbt-util 5.14.33 → 5.14.34

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/bin/rbbt +2 -0
  3. data/lib/rbbt/association/database.rb +153 -0
  4. data/lib/rbbt/association/index.rb +89 -20
  5. data/lib/rbbt/association/open.rb +37 -0
  6. data/lib/rbbt/association/util.rb +133 -0
  7. data/lib/rbbt/association.rb +1 -380
  8. data/lib/rbbt/entity/identifiers.rb +106 -0
  9. data/lib/rbbt/entity.rb +1 -0
  10. data/lib/rbbt/knowledge_base/entity.rb +107 -0
  11. data/lib/rbbt/knowledge_base/query.rb +83 -0
  12. data/lib/rbbt/knowledge_base/registry.rb +106 -0
  13. data/lib/rbbt/knowledge_base/syndicate.rb +22 -0
  14. data/lib/rbbt/knowledge_base.rb +6 -359
  15. data/lib/rbbt/tsv/accessor.rb +4 -0
  16. data/lib/rbbt/tsv/change_id.rb +119 -0
  17. data/lib/rbbt/tsv/index.rb +6 -2
  18. data/lib/rbbt/tsv/parser.rb +7 -5
  19. data/lib/rbbt/tsv/util.rb +1 -1
  20. data/lib/rbbt/tsv.rb +2 -1
  21. data/lib/rbbt/util/R/model.rb +1 -1
  22. data/lib/rbbt/util/log.rb +2 -2
  23. data/lib/rbbt/util/misc/bgzf.rb +2 -0
  24. data/lib/rbbt/util/misc/inspect.rb +1 -1
  25. data/lib/rbbt-util.rb +11 -7
  26. data/lib/rbbt.rb +0 -1
  27. data/share/rbbt_commands/app/start +1 -1
  28. data/share/rbbt_commands/tsv/change_id +2 -2
  29. data/test/rbbt/association/test_database.rb +61 -0
  30. data/test/rbbt/association/test_index.rb +67 -22
  31. data/test/rbbt/association/test_open.rb +68 -0
  32. data/test/rbbt/association/test_util.rb +108 -0
  33. data/test/rbbt/entity/test_identifiers.rb +40 -0
  34. data/test/rbbt/knowledge_base/test_entity.rb +0 -0
  35. data/test/rbbt/knowledge_base/test_query.rb +45 -0
  36. data/test/rbbt/knowledge_base/test_registry.rb +52 -0
  37. data/test/rbbt/test_association.rb +3 -3
  38. data/test/rbbt/test_knowledge_base.rb +79 -51
  39. data/test/rbbt/test_monitor.rb +0 -2
  40. data/test/rbbt/test_packed_index.rb +1 -1
  41. data/test/rbbt/test_resource.rb +6 -6
  42. data/test/rbbt/test_tsv.rb +34 -44
  43. data/test/rbbt/tsv/parallel/test_through.rb +2 -4
  44. data/test/rbbt/tsv/parallel/test_traverse.rb +30 -28
  45. data/test/rbbt/tsv/test_change_id.rb +10 -0
  46. data/test/rbbt/util/R/test_model.rb +9 -10
  47. data/test/rbbt/util/test_misc.rb +1 -1
  48. data/test/test_helper.rb +4 -1
  49. metadata +24 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e156f608c54a1a71f1d7892428698d857075c4e7
4
- data.tar.gz: 535f2b8b01c561226a389050cbb90b31b82ac9e2
3
+ metadata.gz: ec2d34290a61ca4f3f32cd875a07a15c9c7c06fd
4
+ data.tar.gz: 08ce21aa7a885530dcba3859231d94874f99bba2
5
5
  SHA512:
6
- metadata.gz: 8bad7add5780e3764172ffb11c87b8b3cebc5267fe2f11dbe0efcdd46cebd92019b669346d31ff08a0ab32321356c2fc21ef5f776d1ce188d8527f2b1a4044c9
7
- data.tar.gz: 543e58c6022ccacbbd164aa5a97df5ab7ab9c168582ec212b5b5cfce7714b02ca1f5b74b2c59a50c27297aa9113ccbf548ce62e3aafa3d530371361acb180bc3
6
+ metadata.gz: d532ef379f2238e8dbaf311f485a4e18ac774177624e327d7b1d432a6cd0304f17f945448523407bb0cbb8f14a1067b50d36585b6d554d646dd5a01124c5b4f2
7
+ data.tar.gz: b15a2660fd8be0a782fb0a139bc44b8670adf3629446be6c0b8dc3e6d27f017a2c71db0cb6d76602b306917c9909f03013226566d963c0247506a4723968ead8
data/bin/rbbt CHANGED
@@ -145,6 +145,8 @@ def rbbt_usage(prev = nil)
145
145
  true
146
146
  end
147
147
 
148
+ alias usage rbbt_usage
149
+
148
150
  def print_error(error, backtrace = nil)
149
151
  puts Log.color :magenta, "## ERROR"
150
152
  puts
@@ -0,0 +1,153 @@
1
+ require 'rbbt/association/util'
2
+ require 'rbbt/tsv/change_id'
3
+
4
+ module Association
5
+
6
+ def self.add_reciprocal(tsv)
7
+ new = TSV.open(tsv.dumper_stream)
8
+ tsv.with_unnamed do
9
+ case tsv.type
10
+ when :double
11
+ tsv.through do |source, values|
12
+ Misc.zip_fields(values).each do |info|
13
+ target, *rest = info
14
+ new.zip_new target, [source] + rest
15
+ end
16
+ end
17
+ else
18
+ end
19
+ end
20
+
21
+ tsv.annotate(new)
22
+
23
+ new
24
+ end
25
+
26
+ def self.translate(tsv, source_final_format, target_final_format, options = {})
27
+ source_field = tsv.key_field
28
+ target_field = tsv.fields.first
29
+ namespace = tsv.namespace
30
+
31
+ if source_final_format and source_field != source_final_format
32
+ Log.debug("Changing source format from #{tsv.key_field} to #{source_final_format}")
33
+
34
+ identifier_files = tsv.identifier_files.dup
35
+ identifier_files.concat Entity.identifier_files(source_final_format) if defined? Entity
36
+ identifier_files.uniq!
37
+ identifier_files.collect!{|f| f.annotate(f.gsub(/\bNAMESPACE\b/, namespace))} if namespace
38
+ identifier_files.reject!{|f| f.match(/\bNAMESPACE\b/)}
39
+
40
+ tsv = TSV.translate(tsv, source_field, source_final_format, options.merge(:identifier_files => identifier_files))
41
+ end
42
+
43
+ # Translate target
44
+ if target_final_format and target_field != target_final_format
45
+ Log.debug("Changing target format from #{target_field} to #{target_final_format}")
46
+ old_key_field = tsv.key_field
47
+ tsv.key_field = "MASK"
48
+
49
+ identifier_files = tsv.identifier_files.dup
50
+ identifier_files.concat Entity.identifier_files(target_final_format) if defined? Entity
51
+ identifier_files.uniq!
52
+ identifier_files.collect!{|f| f.annotate(f.gsub(/\bNAMESPACE\b/, namespace))} if namespace
53
+ identifier_files.reject!{|f| f.match(/\bNAMESPACE\b/)}
54
+
55
+ tsv = TSV.translate(tsv, target_field, target_final_format, options.merge(:identifier_files => identifier_files))
56
+ tsv.key_field = old_key_field
57
+ end
58
+
59
+ tsv
60
+ end
61
+
62
+ def self.reorder_tsv(tsv, options = {})
63
+ fields, undirected, persist = Misc.process_options options, :fields, :undirected, :persist
64
+ all_fields = tsv.all_fields
65
+
66
+ source_pos, field_pos, source_header, field_headers, source_format, target_format = headers(all_fields, fields, options)
67
+
68
+ source_field = source_pos == :key ? :key : all_fields[source_pos]
69
+ info_fields = field_pos.collect{|f| f == :key ? :key : all_fields[f]}
70
+ options = options.merge({:key_field => source_field, :fields => info_fields})
71
+
72
+ tsv = tsv.reorder source_field, fields, :zipped => true
73
+
74
+ tsv.key_field = source_header
75
+ tsv.fields = field_headers
76
+
77
+ tsv = translate tsv, source_format, target_format, :persist => persist if source_format or target_format
78
+
79
+ tsv = add_reciprocal tsv if undirected
80
+
81
+ tsv
82
+ end
83
+
84
+ def self.open_stream(stream, options = {})
85
+ fields, undirected, persist = Misc.process_options options, :fields, :undirected, :persist
86
+
87
+ parser = TSV::Parser.new stream, options.merge(:fields => nil, :key_field => nil)
88
+
89
+ key_field, *_fields = all_fields = parser.all_fields
90
+
91
+ source_pos, field_pos, source_header, field_headers, source_format, target_format = headers parser.all_fields, fields, options
92
+
93
+ parser.key_field = source_pos
94
+ parser.fields = field_pos
95
+
96
+ case parser.type
97
+ when :single
98
+ class << parser
99
+ def get_values(parts)
100
+ [parts[@key_field], parts.values_at(*@fields).first]
101
+ end
102
+ end
103
+ when :list
104
+ class << parser
105
+ def get_values(parts)
106
+ [parts[@key_field], parts.values_at(*@fields)]
107
+ end
108
+ end
109
+ when :double, :list, :single
110
+ class << parser
111
+ def get_values(parts)
112
+ [parts[@key_field].split(@sep2,-1), parts.values_at(*@fields).collect{|v| v.nil? ? [] : v.split(@sep2,-1) }]
113
+ end
114
+ end
115
+ when :flat
116
+ class << parser
117
+ def get_values(parts)
118
+ fields = (0..parts.length-1).to_a - [@key_field]
119
+ values = parts.values_at(*fields).compact.collect{|v| v.split(@sep2,-1) }.flatten
120
+ [parts[@key_field].split(@sep2,-1), values]
121
+ end
122
+ end
123
+ end
124
+
125
+ open_options = options.merge(parser.options).merge(:parser => parser)
126
+
127
+ tsv = TSV.parse parser.stream, {}, open_options
128
+ tsv.key_field = source_header
129
+ tsv.fields = field_headers
130
+
131
+ tsv = tsv.to_double unless tsv.type == :double
132
+
133
+ tsv = translate tsv, source_format, target_format, :persist => persist if source_format or target_format
134
+
135
+ tsv = add_reciprocal tsv if undirected
136
+
137
+ tsv
138
+ end
139
+
140
+ def self.database(file, options = {})
141
+ case file
142
+ when TSV
143
+ file = file.to_double unless file.type == :double
144
+ reorder_tsv(file, options.dup)
145
+ when IO
146
+ open_stream(file, options.dup)
147
+ else
148
+ stream = TSV.get_stream(file)
149
+ open_stream(stream, options.dup)
150
+ end
151
+ end
152
+
153
+ end
@@ -1,5 +1,85 @@
1
1
  require 'rbbt/tsv'
2
+ require 'rbbt/association/open'
3
+
2
4
  module Association
5
+ def self.index(file, options = nil, persist_options = nil)
6
+ options = options.nil? ? {} : options.dup
7
+ persist_options = persist_options.nil? ? Misc.pull_keys(options, :persist) : persist_options.dup
8
+
9
+ persist_options = Misc.add_defaults persist_options.dup, :persist => true, :engine => "BDB"
10
+ persist = persist_options[:persist]
11
+
12
+ file = version_file(file, options[:namespace]) if options[:namespace] and String === file
13
+
14
+ undirected = options[:undirected]
15
+ Persist.persist_tsv(file, "Association Index", options, persist_options.dup) do |data|
16
+ recycle = options[:recycle]
17
+
18
+ persist_options[:file] = persist_options[:file] + '.database' if persist_options[:file]
19
+ database = open(file, options, persist_options.dup)
20
+
21
+ fields = database.fields
22
+ source_field = database.key_field
23
+ target_field = fields.first.split(":").last
24
+ key_field = [source_field, target_field, undirected ? "undirected" : nil].compact * "~"
25
+
26
+ TSV.setup(data, :key_field => key_field, :fields => fields[1..-1], :type => :list, :serializer => :list)
27
+
28
+ data.key_field = key_field
29
+ data.fields = fields[1..-1]
30
+ data.type = :list
31
+ data.serializer = :list
32
+
33
+ database.with_unnamed do
34
+ database.through do |source, values|
35
+ case database.type
36
+ when :single
37
+ values = [[values]]
38
+ when :list
39
+ values = values.collect{|v| [v] }
40
+ when :flat
41
+ values = [values]
42
+ end
43
+ next if values.empty?
44
+ next if source.nil? or source.empty?
45
+ next if values.empty?
46
+
47
+ targets, *rest = values
48
+
49
+ size = targets ? targets.length : 0
50
+
51
+ rest.each_with_index do |list,i|
52
+ list.replace [list.first] * size if list.length == 1
53
+ end if recycle and size > 1
54
+
55
+ rest = Misc.zip_fields rest
56
+
57
+ annotations = rest.length > 1 ?
58
+ targets.zip(rest) :
59
+ targets.zip(rest * targets.length)
60
+
61
+ annotations.each do |target, info|
62
+ next if target.nil? or target.empty?
63
+ key = [source, target] * "~"
64
+ if data[key].nil? or info.nil?
65
+ data[key] = info
66
+ else
67
+ old_info = data[key]
68
+ info = old_info.zip(info).collect{|p| p * ";;" }
69
+ data[key] = info
70
+ end
71
+ end
72
+ end
73
+ end
74
+
75
+ data.close
76
+ data
77
+ end.tap do |data|
78
+ data.read if not Hash === data and data.respond_to? :read
79
+ Association::Index.setup data
80
+ data
81
+ end
82
+ end
3
83
  module Index
4
84
 
5
85
  attr_accessor :source_field, :target_field, :undirected
@@ -16,9 +96,15 @@ module Association
16
96
 
17
97
  def reverse
18
98
  @reverse ||= begin
19
- persistence_path = self.persistence_path
20
- persistence_path = persistence_path.find if Path === persistence_path
21
- reverse_filename = persistence_path + '.reverse'
99
+ if self.respond_to? :persistence_path
100
+ persistence_path = self.persistence_path
101
+ persistence_path = persistence_path.find if Path === persistence_path
102
+ reverse_filename = persistence_path + '.reverse'
103
+ else
104
+ raise "Can only reverse a TokyoCabinet::BDB dataset at the time"
105
+ end
106
+
107
+ self.read if self.respond_to? :read
22
108
 
23
109
  if File.exists?(reverse_filename)
24
110
  new = Persist.open_tokyocabinet(reverse_filename, false, serializer, TokyoCabinet::BDB)
@@ -66,16 +152,6 @@ module Association
66
152
 
67
153
  #{{{ Subset
68
154
 
69
- def select_entities(entities)
70
- source_type = Entity.formats[source_field]
71
- target_type = Entity.formats[target_field]
72
-
73
- source_entities = entities[:source] || entities[source_field] || entities[Entity.formats[source_field].to_s]
74
- target_entities = entities[:target] || entities[target_field] || entities[Entity.formats[target_field].to_s]
75
-
76
- [source_entities, target_entities]
77
- end
78
-
79
155
  def subset(source, target)
80
156
  return [] if source.nil? or target.nil? or source.empty? or target.empty?
81
157
 
@@ -110,12 +186,5 @@ module Association
110
186
  target_matches.values_at(*target.uniq).flatten.compact
111
187
  end
112
188
 
113
- def subset_entities(entities)
114
- source, target = select_entities(entities)
115
- return [] if source.nil? or target.nil?
116
- return [] if Array === target and target.empty?
117
- return [] if Array === source and source.empty?
118
- subset source, target
119
- end
120
189
  end
121
190
  end
@@ -0,0 +1,37 @@
1
+ require 'rbbt/association/database'
2
+
3
+ module Association
4
+ def self.version_file(file, namespace)
5
+ old_file, file = file, file.sub('NAMESPACE', namespace) if namespace and String === file
6
+ old_file.annotate file if Path === old_file
7
+ file
8
+ end
9
+
10
+ def self.open(file, options = nil, persist_options = nil)
11
+ options = options.nil? ? {} : options.dup
12
+ persist_options = persist_options.nil? ? Misc.pull_keys(options, :persist) : persist_options.dup
13
+
14
+ options = Misc.add_defaults options, :zipped => true
15
+ persist_options = Misc.add_defaults persist_options, :persist => true, :dir => Rbbt.var.associations
16
+ persist = persist_options[:persist]
17
+
18
+ file = version_file(file, options[:namespace]) if options[:namespace] and String === file
19
+ file = file.call if Proc === file
20
+
21
+ data = Persist.persist_tsv(file, "Association Database", options, persist_options) do |data|
22
+ tsv = Association.database(file, options.merge(:persist => persist))
23
+ tsv = tsv.to_double unless tsv.type == :double
24
+ tsv.annotate data
25
+
26
+ data.serializer = :double if data.respond_to? :serializer
27
+ tsv.each do |k,v|
28
+ data[k] = v
29
+ end
30
+
31
+
32
+ data
33
+ end
34
+ data
35
+ end
36
+
37
+ end
@@ -0,0 +1,133 @@
1
+ require 'rbbt/entity'
2
+
3
+ module Association
4
+ def self.identify_entity_format(format, fields)
5
+ entity_type = Entity.formats[format]
6
+ raise "Field #{ format } could not be resolved: #{fields}" if entity_type.nil?
7
+ main_field = fields.select{|f| Entity.formats[f] == entity_type}.first
8
+ raise "Field #{ format } not present, options: #{Misc.fingerprint fields}" if main_field.nil?
9
+ [main_field, nil, format]
10
+ end
11
+
12
+ def self.parse_field_specification(spec)
13
+ return [2,nil,nil] if Fixnum === spec
14
+ spec = spec.split "=>" unless Array === spec
15
+ field_part, final_format = spec
16
+
17
+ field, format = field_part.split "=~", -1
18
+
19
+ field = nil if field.nil? or field.empty?
20
+
21
+ [field, format, final_format]
22
+ end
23
+
24
+ def self.normalize_specs(spec, all_fields = nil)
25
+ return nil if spec.nil?
26
+ field, header, format = parse_field_specification spec
27
+
28
+ specs = if all_fields.nil? or all_fields.include? field
29
+ [field, header, format]
30
+ else
31
+ if all_fields.nil?
32
+ begin
33
+ identify_entity_format field, all_fields
34
+ rescue
35
+ [field, header, format]
36
+ end
37
+ else
38
+ [field, header, format]
39
+ end
40
+ end
41
+ specs
42
+ end
43
+
44
+ def self.extract_specs(all_fields=nil, options = {})
45
+ source, source_format, target, target_format = Misc.process_options options, :source, :source_format, :target, :target_format
46
+
47
+ key_field, *fields = all_fields.nil? ? [nil] : all_fields
48
+
49
+ source_specs = normalize_specs source, all_fields
50
+ target_specs = normalize_specs target, all_fields
51
+
52
+ source_specs = [nil, nil, nil] if source_specs.nil?
53
+ target_specs = [nil, nil, nil] if target_specs.nil?
54
+
55
+ source_specs[2] = source_format if source_format
56
+ target_specs[2] = target_format if target_format
57
+
58
+ if source_specs[0].nil? and target_specs[0].nil?
59
+ source_specs[0] = key_field
60
+ target_specs[0] = fields[0]
61
+ elsif source_specs[0].nil?
62
+ if target_specs[0] == :key or target_specs[0] == key_field
63
+ source_specs[0] = fields[0]
64
+ else
65
+ source_specs[0] = key_field
66
+ end
67
+ elsif target_specs[0].nil?
68
+ if source_specs[0] == fields.first
69
+ target_specs[0] = key_field
70
+ else
71
+ target_specs[0] = fields.first
72
+ end
73
+ end
74
+
75
+ {:source => source_specs, :target => target_specs}
76
+ end
77
+
78
+ def self.process_formats(field, default_format = {})
79
+ return nil if default_format.nil? or default_format.empty?
80
+ default_format.each do |type, format|
81
+ entity_type = Entity.formats[field] || format
82
+ return format if entity_type.to_s === type
83
+ end
84
+ return nil
85
+ end
86
+
87
+ def self.headers(all_fields, info_fields = nil, options = {})
88
+ specs = extract_specs all_fields, options
89
+
90
+ source_field = specs[:source][0]
91
+ target_field = specs[:target][0]
92
+
93
+ source_pos = all_fields.index source_field
94
+ target_pos = all_fields.index target_field
95
+
96
+ source_header = specs[:source][1] || specs[:source][0]
97
+ target_header = specs[:target][1] || specs[:target][0]
98
+
99
+ info_fields = all_fields.dup if info_fields.nil?
100
+ info_fields.delete source_field
101
+ info_fields.delete target_field
102
+ info_fields.unshift target_field
103
+
104
+ field_headers = [target_header]
105
+ info_fields[1..-1].each do |field|
106
+ header = case field
107
+ when String
108
+ field
109
+ when Fixnum
110
+ all_fields[field]
111
+ when :key
112
+ all_fields.first
113
+ end
114
+
115
+
116
+ field_headers << header
117
+ end
118
+
119
+ field_pos = info_fields.collect{|f| raise "Field #{f} not found. Options: #{info_fields* ", "}" unless all_fields.include?(f); f == :key ? 0 : all_fields.index(f); }
120
+
121
+ source_format = specs[:source][2]
122
+ target_format = specs[:target][2]
123
+
124
+
125
+ if format = options[:format]
126
+ source_format = process_formats(specs[:source][1] || specs[:source][0], format) || source_format
127
+ target_format = process_formats(specs[:target][1] || specs[:target][0], format) || target_format
128
+ end
129
+
130
+ Log.low "Headers -- #{[source_pos, field_pos, source_header, field_headers, source_format, target_format]}"
131
+ [source_pos, field_pos, source_header, field_headers, source_format, target_format]
132
+ end
133
+ end