rbbt-util 1.2.1 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. data/lib/rbbt-util.rb +2 -1
  2. data/lib/rbbt/util/R.rb +18 -1
  3. data/lib/rbbt/util/cmd.rb +7 -6
  4. data/lib/rbbt/util/data_module.rb +31 -11
  5. data/lib/rbbt/util/fix_width_table.rb +209 -0
  6. data/lib/rbbt/util/log.rb +12 -2
  7. data/lib/rbbt/util/misc.rb +91 -12
  8. data/lib/rbbt/util/open.rb +18 -9
  9. data/lib/rbbt/util/path.rb +152 -0
  10. data/lib/rbbt/util/persistence.rb +282 -75
  11. data/lib/rbbt/util/pkg_data.rb +16 -59
  12. data/lib/rbbt/util/pkg_software.rb +15 -1
  13. data/lib/rbbt/util/rake.rb +5 -1
  14. data/lib/rbbt/util/tc_hash.rb +129 -59
  15. data/lib/rbbt/util/tsv.rb +109 -1284
  16. data/lib/rbbt/util/tsv/accessor.rb +273 -0
  17. data/lib/rbbt/util/tsv/attach.rb +228 -0
  18. data/lib/rbbt/util/tsv/index.rb +303 -0
  19. data/lib/rbbt/util/tsv/manipulate.rb +271 -0
  20. data/lib/rbbt/util/tsv/parse.rb +258 -0
  21. data/share/lib/R/util.R +5 -3
  22. data/test/rbbt/util/test_R.rb +9 -1
  23. data/test/rbbt/util/test_data_module.rb +5 -0
  24. data/test/rbbt/util/test_fix_width_table.rb +107 -0
  25. data/test/rbbt/util/test_misc.rb +43 -0
  26. data/test/rbbt/util/test_open.rb +0 -1
  27. data/test/rbbt/util/test_path.rb +10 -0
  28. data/test/rbbt/util/test_persistence.rb +63 -2
  29. data/test/rbbt/util/test_pkg_data.rb +29 -8
  30. data/test/rbbt/util/test_tc_hash.rb +52 -0
  31. data/test/rbbt/util/test_tsv.rb +55 -678
  32. data/test/rbbt/util/tsv/test_accessor.rb +109 -0
  33. data/test/rbbt/util/tsv/test_attach.rb +271 -0
  34. data/test/rbbt/util/tsv/test_index.rb +158 -0
  35. data/test/rbbt/util/tsv/test_manipulate.rb +226 -0
  36. data/test/rbbt/util/tsv/test_parse.rb +72 -0
  37. data/test/test_helper.rb +1 -0
  38. metadata +25 -4
@@ -0,0 +1,303 @@
1
+ require 'rbbt/util/tsv/manipulate'
2
+ require 'rbbt/util/fix_width_table'
3
+
4
+ class TSV
5
+
6
+ def index(options = {})
7
+ options = Misc.add_defaults options, :order => false, :persistence => true, :target => :key, :fields => nil, :case_insensitive => case_insensitive, :tsv_serializer => :list
8
+
9
+ prefix = case
10
+ when options[:target]
11
+ "Index[#{options[:target]}]"
12
+ else
13
+ "Index[:key]"
14
+ end
15
+
16
+ new = Persistence.persist(self, prefix, :tsv, options) do |tsv, options, filename|
17
+ order, target, fields, case_insensitive = Misc.process_options options, :order, :target, :fields, :case_insensitive
18
+
19
+ new = {}
20
+
21
+ ## Ordered
22
+ if order
23
+
24
+ # through
25
+
26
+ new_key_field, new_fields = through target, fields do |key, values|
27
+ if Array === key
28
+ keys = key
29
+ else
30
+ keys = [key]
31
+ end
32
+
33
+ values.each_with_index do |list,i|
34
+ list = [list] unless Array === list
35
+ i += 1 if fields.nil?
36
+ list.each do |elem|
37
+ elem.downcase if case_insensitive
38
+ new[elem] ||= []
39
+ new[elem][i] ||= []
40
+ new[elem][i].concat keys
41
+ end
42
+ end
43
+
44
+ if fields.nil?
45
+ keys.each do |key|
46
+ key = key.downcase if case_insensitive
47
+ new[key] ||= []
48
+ new[key][0] ||= []
49
+ new[key][0].concat keys
50
+ end
51
+ end
52
+
53
+ end
54
+
55
+ # flatten
56
+
57
+ new.each do |key, values|
58
+ values.flatten!
59
+ values.compact!
60
+ end
61
+
62
+ ## Not ordered
63
+ else
64
+ double_keys = true unless type != :double or identify_field(target) == :key
65
+ new.each do |key, fields| fields.flatten! end
66
+
67
+ new_key_field, new_fields = through target, fields do |key, values|
68
+ values.unshift type == :double ? [key] : key if fields.nil?
69
+ if type == :flat
70
+ list = values
71
+ else
72
+ list = values.flatten unless type == :flat
73
+ end
74
+ list.collect!{|e| e.downcase} if case_insensitive
75
+ list.each do |elem|
76
+ new[elem] ||= []
77
+ if double_keys
78
+ new[elem].concat key
79
+ else
80
+ new[elem] << key
81
+ end
82
+ end
83
+ end
84
+
85
+ end
86
+
87
+ new.each do |key, values|
88
+ values.uniq!
89
+ end
90
+
91
+ key_field = case
92
+ when new_key_field
93
+ new_key_field + "|" + new_fields * "|"
94
+ else
95
+ nil
96
+ end
97
+
98
+ fields = case
99
+ when new_key_field.nil?
100
+ nil
101
+ else
102
+ [new_key_field]
103
+ end
104
+ new = TSV.new([new, {:namespace => namespace, :key_field => key_field, :fields => fields, :type => :flat, :filename => (filename.nil? ? nil : "Index:" + filename), :case_insensitive => case_insensitive}])
105
+ new
106
+ end
107
+ end
108
+
109
+ def self.index(file, options = {})
110
+ options = Misc.add_defaults options,
111
+ :persistence => true, :persistence_file => nil, :persistence_update => false, :persistence_source => file, :tsv_serializer => :list,
112
+ :data_persistence => false, :data_persistence_file => nil, :data_persistence_update => false, :data_persistence_source => file
113
+
114
+ options_data = {
115
+ :persistence => Misc.process_options(options, :data_persistence),
116
+ :persistence_file => Misc.process_options(options, :data_persistence_file),
117
+ :persistence_update => Misc.process_options(options, :data_persistence_update),
118
+ :persistence_source => Misc.process_options(options, :data_persistence_source),
119
+ }
120
+
121
+ options_data[:type] = :flat if options[:order] == false
122
+
123
+ prefix = case
124
+ when options[:target]
125
+ "Index_static[#{options[:target]}]"
126
+ else
127
+ "Index_static[:key]"
128
+ end
129
+
130
+ new = Persistence.persist(file, prefix, :tsv, options) do |file, options, filename|
131
+ TSV.new(file, :double, options_data).index options.merge :persistence => false, :persistence_file => nil
132
+ end
133
+ end
134
+
135
+
136
+ def smart_merge(other, match = nil, fields2add = nil)
137
+
138
+ # Determine new fields
139
+ # both have fields => list of names
140
+ # not both have fields => nil
141
+
142
+ # fields2add = case
143
+ # when (fields2add.nil? and (other.fields.nil? or self.fields.nil?))
144
+ # nil
145
+ # when fields2add.nil?
146
+ # other.all_fields
147
+ # else
148
+ # fields2add
149
+ # end
150
+
151
+ # Determine common fields
152
+
153
+ common_fields, new_fields = case
154
+ when fields2add != nil
155
+ [fields & other.fields, fields2add]
156
+ when (other.all_fields.nil? or self.all_fields.nil?)
157
+ [nil, other_fields]
158
+ else
159
+ [(all_fields & other.all_fields), (other.all_fields - all_fields)]
160
+ end
161
+
162
+ # Load matching scheme. Index and source field
163
+
164
+ match_source, match_index = case
165
+ when (match.nil? and not key_field.nil? and other.key_field == key_field)
166
+ [:key, nil]
167
+ when match.nil?
168
+ [:key, other.index]
169
+ when TSV === match
170
+ raise "No field info in match TSV" if match.fields.nil?
171
+ match_source = (all_fields & match.all_fields).first
172
+ index = match.index :target => other.key_field, :fields => match_source
173
+ [match_source, index]
174
+ when String === match
175
+ [match, other.index]
176
+ when Array === match
177
+ [match.first, other.index(:fields => match.last)]
178
+ end
179
+
180
+ match_source_position = identify_field match_source
181
+
182
+ # through
183
+ new = {}
184
+ each do |key,values|
185
+ source_keys = match_source == :key ? key : values[match_source]
186
+ source_keys = [source_keys] unless Array === source_keys
187
+ other_keys = case
188
+ when index.nil?
189
+ source_keys
190
+ else
191
+ index.values_at(*source_keys).flatten.compact
192
+ end
193
+ other_keys = other_keys.collect do |other_key| match_index[other_key] end.flatten unless match_index.nil?
194
+
195
+ other_values = other_keys.collect do |other_key|
196
+ next unless other.include? other_key
197
+ new_fields.collect do |field|
198
+ if field == other.key_field
199
+ other_key
200
+ else
201
+ other[other_key][field]
202
+ end
203
+ end
204
+ end.compact
205
+
206
+ if type == :double
207
+ new_values = values + TSV.zip_fields(other_values)
208
+ else
209
+ new_values = values + TSV.zip_fields(other_values).collect{|v| v.first}
210
+ end
211
+ new[key] = new_values
212
+ end
213
+
214
+ new = TSV.new new
215
+ new.fields = fields + new_fields if fields
216
+ new.key_field = key_field if key_field
217
+ new.type = type
218
+
219
+ new
220
+ end
221
+
222
+ def self.field_matches(tsv, values)
223
+ if values.flatten.sort[0..9].compact.collect{|n| n.to_i} == (1..10).to_a
224
+ return {}
225
+ end
226
+
227
+ key_field = tsv.key_field
228
+ fields = tsv.fields
229
+
230
+ field_values = {}
231
+ fields.each{|field|
232
+ field_values[field] = []
233
+ }
234
+
235
+ if type == :double
236
+ tsv.through do |key,entry_values|
237
+ fields.zip(entry_values).each do |field,entry_field_values|
238
+ field_values[field].concat entry_field_values
239
+ end
240
+ end
241
+ else
242
+ tsv.through do |key,entry_values|
243
+ fields.zip(entry_values).each do |field,entry_field_values|
244
+ field_values[field] << entry_field_values
245
+ end
246
+ end
247
+ end
248
+
249
+ field_values.each do |field,field_value_list|
250
+ field_value_list.replace(values & field_value_list.flatten.uniq)
251
+ end
252
+
253
+ field_values[key_field] = values & tsv.keys
254
+
255
+ field_values
256
+ end
257
+
258
+ def field_matches(values)
259
+ TSV.field_matches(self, values)
260
+ end
261
+
262
+ def sorted_index(pos_start = nil, pos_end = nil)
263
+ raise "Please specify indexing fields" if (pos_start.nil? and fields.length > 2)
264
+
265
+ case
266
+ when (pos_start.nil? and pos_end.nil? and fields.length == 2)
267
+ pos_start = fields.first
268
+ pos_end = fields.last
269
+ when (pos_start.nil? and pos_end.nil? and fields.length == 1)
270
+ pos_start = fields.first
271
+ end
272
+
273
+ range = ! pos_end.nil?
274
+
275
+ index = Persistence.persist(filename, "SortedIndex[#{range ? pos_start + ":" + pos_end: pos_start}]", :fwt, :start => pos_start, :end => pos_end, :range => range) do |filename, options|
276
+ pos_start, pos_end, range = Misc.process_options options, :start, :end, :range
277
+ data = case
278
+ when (type == :double and range)
279
+ collect do |key, values|
280
+ p_start, p_end = values.values_at pos_start, pos_end
281
+ next if p_start.nil? or p_end.nil? or p_start.empty? or p_end.empty?
282
+ [[p_start.first, p_end.first], key]
283
+ end
284
+ when (type == :double and not range)
285
+ collect do |key, values|
286
+ p_start = values.values_at pos_start
287
+ next if p_start.nil? or p_start.empty?
288
+ [p_start.first, key]
289
+ end
290
+ when range
291
+ slice [pos_start, pos_end]
292
+ else
293
+ slice pos_start
294
+ end
295
+ data
296
+ end
297
+
298
+ index
299
+ end
300
+
301
+
302
+ end
303
+
@@ -0,0 +1,271 @@
1
+
2
+ class TSV
3
+
4
+ def through(new_key_field = :key, new_fields = nil, &block)
5
+
6
+ # Get positions
7
+
8
+ new_key_position = identify_field new_key_field
9
+
10
+ new_field_positions = case
11
+ when Integer === new_fields
12
+ [new_fields]
13
+ when String === new_fields
14
+ [identify_field new_fields]
15
+ when Array === new_fields
16
+ new_fields.collect{|new_field| identify_field new_field}
17
+ when new_fields == :key
18
+ [:key]
19
+ when new_fields == :fields
20
+ nil
21
+ when new_fields.nil?
22
+ nil
23
+ else
24
+ raise "Unknown new fields specified: #{new_fields.inspect}"
25
+ end
26
+
27
+ # Get Field Names
28
+
29
+ ## New key
30
+ new_key_field_name = new_key_position == :key ? key_field : fields[new_key_position] if fields
31
+ ## New fields
32
+ new_field_names = case
33
+ when (new_field_positions.nil? and new_key_position == :key)
34
+ fields.dup
35
+ when new_field_positions.nil?
36
+ f = fields.dup
37
+ f.delete_at(new_key_position)
38
+ f.unshift key_field
39
+ f
40
+ else
41
+ f = fields.dup
42
+ f.push key_field
43
+ f.values_at *new_field_positions.collect{|pos| pos == :key ? -1 : pos }
44
+ end if fields
45
+
46
+ # Cycle through
47
+
48
+ if new_key_position == :key and ( new_fields.nil? or new_fields == fields)
49
+ each do |key, fields| yield key, fields end
50
+ else
51
+ each do |key, fields|
52
+ new_key_value = case
53
+ when (new_key_position.nil? or new_key_position == :key)
54
+ key
55
+ else
56
+ fields[new_key_position]
57
+ end
58
+ new_field_values = case
59
+ when (new_field_positions.nil? and new_fields == :fields)
60
+ fields
61
+ when (new_fields.nil? and new_key_position == :key)
62
+ fields
63
+ when new_field_positions.nil?
64
+ f = fields.dup
65
+ f.delete_at(new_key_position)
66
+ if type == :double
67
+ f.unshift [key]
68
+ else
69
+ f.unshift key
70
+ end
71
+ f
72
+ else
73
+ f = fields.dup
74
+ if type == :double
75
+ f.push [key]
76
+ else
77
+ f.push key
78
+ end
79
+ f.values_at *new_field_positions.collect{|pos| pos == :key ? -1 : pos }
80
+ end
81
+ new_field_values = NamedArray.name new_field_values, new_field_names
82
+
83
+ next if new_key_value.nil? or (String === new_key_value and new_key_value.empty?)
84
+ yield new_key_value, new_field_values
85
+ end
86
+ end
87
+
88
+ # Return new field names
89
+
90
+ return [new_key_field_name, new_field_names]
91
+ end
92
+
93
+ def reorder(new_key_field, new_fields = nil, options = {})
94
+ options = Misc.add_defaults options, :new_key_field => new_key_field, :new_fields => new_fields, :persistence => false
95
+
96
+ new, extra = Persistence.persist(self, :Reorder, :tsv, options ) do |tsv, options, filename|
97
+ new_key_field = options[:new_key_field]
98
+ new_fields = options[:new_fields]
99
+
100
+ new = {}
101
+ new_key_field, new_fields = through new_key_field, new_fields do |key, values|
102
+ if Array === key
103
+ keys = key
104
+ else
105
+ keys = [key]
106
+ end
107
+
108
+ new_values = keys.each do |key|
109
+ if new[key].nil?
110
+ new[key] = values
111
+ else
112
+ if type == :double
113
+ new[key] = new[key].zip(values).collect{|v| v.flatten}
114
+ end
115
+ end
116
+ end
117
+ end
118
+
119
+ new = TSV.new new
120
+
121
+ new.fields = new_fields
122
+ new.key_field = new_key_field
123
+ new.filename = filename
124
+ new.type = type
125
+ new.case_insensitive = case_insensitive
126
+ new.identifiers = identifiers
127
+
128
+ new
129
+ end
130
+
131
+ new
132
+ end
133
+
134
+ def slice(fields)
135
+ reorder :key, fields
136
+ end
137
+
138
+ def slice_namespace(namespace)
139
+ fields = self.fields
140
+ namespace_fields = []
141
+ fields.each_with_index{|field,i| namespace_fields << i if field.namespace == namespace}
142
+ reorder :key, namespace_fields
143
+ end
144
+
145
+ def sort(*fields)
146
+
147
+ pos = case
148
+ when fields.length == 0
149
+ :key
150
+ when fields.length == 1
151
+ identify_field fields.first
152
+ else
153
+ fields.collect{|field| identify_field field}
154
+ end
155
+ elems = []
156
+ through :key, pos do |key, values|
157
+ elems << case
158
+ when block_given?
159
+ [key, yield(values)]
160
+ else
161
+ if type == :double
162
+ [key, values.first.first]
163
+ else
164
+ [key, values.first]
165
+ end
166
+ end
167
+ end
168
+
169
+ elems.sort_by{|k,v| v}.collect{|k,v| k}
170
+ end
171
+
172
+ def select(method = nil)
173
+ new = TSV.new({})
174
+ new.key_field = key_field
175
+ new.fields = fields.dup
176
+ new.type = type
177
+ new.filename = filename
178
+ new.case_insensitive = case_insensitive
179
+
180
+ case
181
+ when (method.nil? and block_given?)
182
+ through do |key, values|
183
+ new[key] = values if yield key, values
184
+ end
185
+ when Array === method
186
+ through do |key, values|
187
+ new[key] = values if ([key,values].flatten & method).any?
188
+ end
189
+ when Regexp === method
190
+ through do |key, values|
191
+ new[key] = values if [key,values].flatten.select{|v| v =~ method}.any?
192
+ end
193
+ when String === method
194
+ through do |key, values|
195
+ new[key] = values if [key,values].flatten.select{|v| v == method}.any?
196
+ end
197
+ when Hash === method
198
+ key = method.keys.first
199
+ method = method.values.first
200
+ case
201
+ when (Array === method and (key == :key or key_field == key))
202
+ method.each{|item| if values = self[item]; then new[item] = values; end}
203
+ when Array === method
204
+ through :key, key do |key, values|
205
+ new[key] = self[key] if (values.flatten & method).any?
206
+ end
207
+ when Regexp === method
208
+ through :key, key do |key, values|
209
+ new[key] = self[key] if values.flatten.select{|v| v =~ method}.any?
210
+ end
211
+ when String === method
212
+ through :key, key do |key, values|
213
+ new[key] = self[key] if values.flatten.select{|v| v == method}.any?
214
+ end
215
+ end
216
+ end
217
+
218
+
219
+ new
220
+ end
221
+
222
+ def process(field, &block)
223
+ through do |key, values|
224
+ if type == :flat
225
+ field_values = values
226
+ else
227
+ field_values = values[field]
228
+ end
229
+
230
+ next if values[field].nil?
231
+ new_values = case
232
+ when block.arity == 1
233
+ yield(field_values)
234
+ when block.arity == 2
235
+ yield(field_values, key)
236
+ when block.arity == 3
237
+ yield(field_values, key, values)
238
+ else
239
+ raise "Unknown arity in block"
240
+ end
241
+
242
+ if type == :flat
243
+ self[key] = new_values
244
+ else
245
+ values[field].replace new_values
246
+ end
247
+ end
248
+ end
249
+
250
+ def add_field(name = nil)
251
+ each do |key, values|
252
+ new_values = yield(key, values)
253
+ new_values = [new_values] if type == :double and not Array == new_values
254
+
255
+ self[key] = values + [yield(key, values)]
256
+ end
257
+
258
+ self.fields = self.fields + [name] if fields != nil and name != nil
259
+ end
260
+
261
+ def add_fields(names = nil)
262
+ each do |key, values|
263
+ new_values = yield(key, values)
264
+ new_values = [new_values] if type == :double and not Array == new_values
265
+
266
+ self[key] = values.concat yield(key, values)
267
+ end
268
+
269
+ self.fields = self.fields.concat names if fields != nil and names != nil
270
+ end
271
+ end