rbbt-util 1.2.1 → 2.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. data/lib/rbbt-util.rb +2 -1
  2. data/lib/rbbt/util/R.rb +18 -1
  3. data/lib/rbbt/util/cmd.rb +7 -6
  4. data/lib/rbbt/util/data_module.rb +31 -11
  5. data/lib/rbbt/util/fix_width_table.rb +209 -0
  6. data/lib/rbbt/util/log.rb +12 -2
  7. data/lib/rbbt/util/misc.rb +91 -12
  8. data/lib/rbbt/util/open.rb +18 -9
  9. data/lib/rbbt/util/path.rb +152 -0
  10. data/lib/rbbt/util/persistence.rb +282 -75
  11. data/lib/rbbt/util/pkg_data.rb +16 -59
  12. data/lib/rbbt/util/pkg_software.rb +15 -1
  13. data/lib/rbbt/util/rake.rb +5 -1
  14. data/lib/rbbt/util/tc_hash.rb +129 -59
  15. data/lib/rbbt/util/tsv.rb +109 -1284
  16. data/lib/rbbt/util/tsv/accessor.rb +273 -0
  17. data/lib/rbbt/util/tsv/attach.rb +228 -0
  18. data/lib/rbbt/util/tsv/index.rb +303 -0
  19. data/lib/rbbt/util/tsv/manipulate.rb +271 -0
  20. data/lib/rbbt/util/tsv/parse.rb +258 -0
  21. data/share/lib/R/util.R +5 -3
  22. data/test/rbbt/util/test_R.rb +9 -1
  23. data/test/rbbt/util/test_data_module.rb +5 -0
  24. data/test/rbbt/util/test_fix_width_table.rb +107 -0
  25. data/test/rbbt/util/test_misc.rb +43 -0
  26. data/test/rbbt/util/test_open.rb +0 -1
  27. data/test/rbbt/util/test_path.rb +10 -0
  28. data/test/rbbt/util/test_persistence.rb +63 -2
  29. data/test/rbbt/util/test_pkg_data.rb +29 -8
  30. data/test/rbbt/util/test_tc_hash.rb +52 -0
  31. data/test/rbbt/util/test_tsv.rb +55 -678
  32. data/test/rbbt/util/tsv/test_accessor.rb +109 -0
  33. data/test/rbbt/util/tsv/test_attach.rb +271 -0
  34. data/test/rbbt/util/tsv/test_index.rb +158 -0
  35. data/test/rbbt/util/tsv/test_manipulate.rb +226 -0
  36. data/test/rbbt/util/tsv/test_parse.rb +72 -0
  37. data/test/test_helper.rb +1 -0
  38. metadata +25 -4
@@ -0,0 +1,303 @@
1
+ require 'rbbt/util/tsv/manipulate'
2
+ require 'rbbt/util/fix_width_table'
3
+
4
+ class TSV
5
+
6
+ def index(options = {})
7
+ options = Misc.add_defaults options, :order => false, :persistence => true, :target => :key, :fields => nil, :case_insensitive => case_insensitive, :tsv_serializer => :list
8
+
9
+ prefix = case
10
+ when options[:target]
11
+ "Index[#{options[:target]}]"
12
+ else
13
+ "Index[:key]"
14
+ end
15
+
16
+ new = Persistence.persist(self, prefix, :tsv, options) do |tsv, options, filename|
17
+ order, target, fields, case_insensitive = Misc.process_options options, :order, :target, :fields, :case_insensitive
18
+
19
+ new = {}
20
+
21
+ ## Ordered
22
+ if order
23
+
24
+ # through
25
+
26
+ new_key_field, new_fields = through target, fields do |key, values|
27
+ if Array === key
28
+ keys = key
29
+ else
30
+ keys = [key]
31
+ end
32
+
33
+ values.each_with_index do |list,i|
34
+ list = [list] unless Array === list
35
+ i += 1 if fields.nil?
36
+ list.each do |elem|
37
+ elem.downcase if case_insensitive
38
+ new[elem] ||= []
39
+ new[elem][i] ||= []
40
+ new[elem][i].concat keys
41
+ end
42
+ end
43
+
44
+ if fields.nil?
45
+ keys.each do |key|
46
+ key = key.downcase if case_insensitive
47
+ new[key] ||= []
48
+ new[key][0] ||= []
49
+ new[key][0].concat keys
50
+ end
51
+ end
52
+
53
+ end
54
+
55
+ # flatten
56
+
57
+ new.each do |key, values|
58
+ values.flatten!
59
+ values.compact!
60
+ end
61
+
62
+ ## Not ordered
63
+ else
64
+ double_keys = true unless type != :double or identify_field(target) == :key
65
+ new.each do |key, fields| fields.flatten! end
66
+
67
+ new_key_field, new_fields = through target, fields do |key, values|
68
+ values.unshift type == :double ? [key] : key if fields.nil?
69
+ if type == :flat
70
+ list = values
71
+ else
72
+ list = values.flatten unless type == :flat
73
+ end
74
+ list.collect!{|e| e.downcase} if case_insensitive
75
+ list.each do |elem|
76
+ new[elem] ||= []
77
+ if double_keys
78
+ new[elem].concat key
79
+ else
80
+ new[elem] << key
81
+ end
82
+ end
83
+ end
84
+
85
+ end
86
+
87
+ new.each do |key, values|
88
+ values.uniq!
89
+ end
90
+
91
+ key_field = case
92
+ when new_key_field
93
+ new_key_field + "|" + new_fields * "|"
94
+ else
95
+ nil
96
+ end
97
+
98
+ fields = case
99
+ when new_key_field.nil?
100
+ nil
101
+ else
102
+ [new_key_field]
103
+ end
104
+ new = TSV.new([new, {:namespace => namespace, :key_field => key_field, :fields => fields, :type => :flat, :filename => (filename.nil? ? nil : "Index:" + filename), :case_insensitive => case_insensitive}])
105
+ new
106
+ end
107
+ end
108
+
109
+ def self.index(file, options = {})
110
+ options = Misc.add_defaults options,
111
+ :persistence => true, :persistence_file => nil, :persistence_update => false, :persistence_source => file, :tsv_serializer => :list,
112
+ :data_persistence => false, :data_persistence_file => nil, :data_persistence_update => false, :data_persistence_source => file
113
+
114
+ options_data = {
115
+ :persistence => Misc.process_options(options, :data_persistence),
116
+ :persistence_file => Misc.process_options(options, :data_persistence_file),
117
+ :persistence_update => Misc.process_options(options, :data_persistence_update),
118
+ :persistence_source => Misc.process_options(options, :data_persistence_source),
119
+ }
120
+
121
+ options_data[:type] = :flat if options[:order] == false
122
+
123
+ prefix = case
124
+ when options[:target]
125
+ "Index_static[#{options[:target]}]"
126
+ else
127
+ "Index_static[:key]"
128
+ end
129
+
130
+ new = Persistence.persist(file, prefix, :tsv, options) do |file, options, filename|
131
+ TSV.new(file, :double, options_data).index options.merge :persistence => false, :persistence_file => nil
132
+ end
133
+ end
134
+
135
+
136
+ def smart_merge(other, match = nil, fields2add = nil)
137
+
138
+ # Determine new fields
139
+ # both have fields => list of names
140
+ # not both have fields => nil
141
+
142
+ # fields2add = case
143
+ # when (fields2add.nil? and (other.fields.nil? or self.fields.nil?))
144
+ # nil
145
+ # when fields2add.nil?
146
+ # other.all_fields
147
+ # else
148
+ # fields2add
149
+ # end
150
+
151
+ # Determine common fields
152
+
153
+ common_fields, new_fields = case
154
+ when fields2add != nil
155
+ [fields & other.fields, fields2add]
156
+ when (other.all_fields.nil? or self.all_fields.nil?)
157
+ [nil, other_fields]
158
+ else
159
+ [(all_fields & other.all_fields), (other.all_fields - all_fields)]
160
+ end
161
+
162
+ # Load matching scheme. Index and source field
163
+
164
+ match_source, match_index = case
165
+ when (match.nil? and not key_field.nil? and other.key_field == key_field)
166
+ [:key, nil]
167
+ when match.nil?
168
+ [:key, other.index]
169
+ when TSV === match
170
+ raise "No field info in match TSV" if match.fields.nil?
171
+ match_source = (all_fields & match.all_fields).first
172
+ index = match.index :target => other.key_field, :fields => match_source
173
+ [match_source, index]
174
+ when String === match
175
+ [match, other.index]
176
+ when Array === match
177
+ [match.first, other.index(:fields => match.last)]
178
+ end
179
+
180
+ match_source_position = identify_field match_source
181
+
182
+ # through
183
+ new = {}
184
+ each do |key,values|
185
+ source_keys = match_source == :key ? key : values[match_source]
186
+ source_keys = [source_keys] unless Array === source_keys
187
+ other_keys = case
188
+ when index.nil?
189
+ source_keys
190
+ else
191
+ index.values_at(*source_keys).flatten.compact
192
+ end
193
+ other_keys = other_keys.collect do |other_key| match_index[other_key] end.flatten unless match_index.nil?
194
+
195
+ other_values = other_keys.collect do |other_key|
196
+ next unless other.include? other_key
197
+ new_fields.collect do |field|
198
+ if field == other.key_field
199
+ other_key
200
+ else
201
+ other[other_key][field]
202
+ end
203
+ end
204
+ end.compact
205
+
206
+ if type == :double
207
+ new_values = values + TSV.zip_fields(other_values)
208
+ else
209
+ new_values = values + TSV.zip_fields(other_values).collect{|v| v.first}
210
+ end
211
+ new[key] = new_values
212
+ end
213
+
214
+ new = TSV.new new
215
+ new.fields = fields + new_fields if fields
216
+ new.key_field = key_field if key_field
217
+ new.type = type
218
+
219
+ new
220
+ end
221
+
222
+ def self.field_matches(tsv, values)
223
+ if values.flatten.sort[0..9].compact.collect{|n| n.to_i} == (1..10).to_a
224
+ return {}
225
+ end
226
+
227
+ key_field = tsv.key_field
228
+ fields = tsv.fields
229
+
230
+ field_values = {}
231
+ fields.each{|field|
232
+ field_values[field] = []
233
+ }
234
+
235
+ if type == :double
236
+ tsv.through do |key,entry_values|
237
+ fields.zip(entry_values).each do |field,entry_field_values|
238
+ field_values[field].concat entry_field_values
239
+ end
240
+ end
241
+ else
242
+ tsv.through do |key,entry_values|
243
+ fields.zip(entry_values).each do |field,entry_field_values|
244
+ field_values[field] << entry_field_values
245
+ end
246
+ end
247
+ end
248
+
249
+ field_values.each do |field,field_value_list|
250
+ field_value_list.replace(values & field_value_list.flatten.uniq)
251
+ end
252
+
253
+ field_values[key_field] = values & tsv.keys
254
+
255
+ field_values
256
+ end
257
+
258
+ def field_matches(values)
259
+ TSV.field_matches(self, values)
260
+ end
261
+
262
+ def sorted_index(pos_start = nil, pos_end = nil)
263
+ raise "Please specify indexing fields" if (pos_start.nil? and fields.length > 2)
264
+
265
+ case
266
+ when (pos_start.nil? and pos_end.nil? and fields.length == 2)
267
+ pos_start = fields.first
268
+ pos_end = fields.last
269
+ when (pos_start.nil? and pos_end.nil? and fields.length == 1)
270
+ pos_start = fields.first
271
+ end
272
+
273
+ range = ! pos_end.nil?
274
+
275
+ index = Persistence.persist(filename, "SortedIndex[#{range ? pos_start + ":" + pos_end: pos_start}]", :fwt, :start => pos_start, :end => pos_end, :range => range) do |filename, options|
276
+ pos_start, pos_end, range = Misc.process_options options, :start, :end, :range
277
+ data = case
278
+ when (type == :double and range)
279
+ collect do |key, values|
280
+ p_start, p_end = values.values_at pos_start, pos_end
281
+ next if p_start.nil? or p_end.nil? or p_start.empty? or p_end.empty?
282
+ [[p_start.first, p_end.first], key]
283
+ end
284
+ when (type == :double and not range)
285
+ collect do |key, values|
286
+ p_start = values.values_at pos_start
287
+ next if p_start.nil? or p_start.empty?
288
+ [p_start.first, key]
289
+ end
290
+ when range
291
+ slice [pos_start, pos_end]
292
+ else
293
+ slice pos_start
294
+ end
295
+ data
296
+ end
297
+
298
+ index
299
+ end
300
+
301
+
302
+ end
303
+
@@ -0,0 +1,271 @@
1
+
2
+ class TSV
3
+
4
+ def through(new_key_field = :key, new_fields = nil, &block)
5
+
6
+ # Get positions
7
+
8
+ new_key_position = identify_field new_key_field
9
+
10
+ new_field_positions = case
11
+ when Integer === new_fields
12
+ [new_fields]
13
+ when String === new_fields
14
+ [identify_field new_fields]
15
+ when Array === new_fields
16
+ new_fields.collect{|new_field| identify_field new_field}
17
+ when new_fields == :key
18
+ [:key]
19
+ when new_fields == :fields
20
+ nil
21
+ when new_fields.nil?
22
+ nil
23
+ else
24
+ raise "Unknown new fields specified: #{new_fields.inspect}"
25
+ end
26
+
27
+ # Get Field Names
28
+
29
+ ## New key
30
+ new_key_field_name = new_key_position == :key ? key_field : fields[new_key_position] if fields
31
+ ## New fields
32
+ new_field_names = case
33
+ when (new_field_positions.nil? and new_key_position == :key)
34
+ fields.dup
35
+ when new_field_positions.nil?
36
+ f = fields.dup
37
+ f.delete_at(new_key_position)
38
+ f.unshift key_field
39
+ f
40
+ else
41
+ f = fields.dup
42
+ f.push key_field
43
+ f.values_at *new_field_positions.collect{|pos| pos == :key ? -1 : pos }
44
+ end if fields
45
+
46
+ # Cycle through
47
+
48
+ if new_key_position == :key and ( new_fields.nil? or new_fields == fields)
49
+ each do |key, fields| yield key, fields end
50
+ else
51
+ each do |key, fields|
52
+ new_key_value = case
53
+ when (new_key_position.nil? or new_key_position == :key)
54
+ key
55
+ else
56
+ fields[new_key_position]
57
+ end
58
+ new_field_values = case
59
+ when (new_field_positions.nil? and new_fields == :fields)
60
+ fields
61
+ when (new_fields.nil? and new_key_position == :key)
62
+ fields
63
+ when new_field_positions.nil?
64
+ f = fields.dup
65
+ f.delete_at(new_key_position)
66
+ if type == :double
67
+ f.unshift [key]
68
+ else
69
+ f.unshift key
70
+ end
71
+ f
72
+ else
73
+ f = fields.dup
74
+ if type == :double
75
+ f.push [key]
76
+ else
77
+ f.push key
78
+ end
79
+ f.values_at *new_field_positions.collect{|pos| pos == :key ? -1 : pos }
80
+ end
81
+ new_field_values = NamedArray.name new_field_values, new_field_names
82
+
83
+ next if new_key_value.nil? or (String === new_key_value and new_key_value.empty?)
84
+ yield new_key_value, new_field_values
85
+ end
86
+ end
87
+
88
+ # Return new field names
89
+
90
+ return [new_key_field_name, new_field_names]
91
+ end
92
+
93
+ def reorder(new_key_field, new_fields = nil, options = {})
94
+ options = Misc.add_defaults options, :new_key_field => new_key_field, :new_fields => new_fields, :persistence => false
95
+
96
+ new, extra = Persistence.persist(self, :Reorder, :tsv, options ) do |tsv, options, filename|
97
+ new_key_field = options[:new_key_field]
98
+ new_fields = options[:new_fields]
99
+
100
+ new = {}
101
+ new_key_field, new_fields = through new_key_field, new_fields do |key, values|
102
+ if Array === key
103
+ keys = key
104
+ else
105
+ keys = [key]
106
+ end
107
+
108
+ new_values = keys.each do |key|
109
+ if new[key].nil?
110
+ new[key] = values
111
+ else
112
+ if type == :double
113
+ new[key] = new[key].zip(values).collect{|v| v.flatten}
114
+ end
115
+ end
116
+ end
117
+ end
118
+
119
+ new = TSV.new new
120
+
121
+ new.fields = new_fields
122
+ new.key_field = new_key_field
123
+ new.filename = filename
124
+ new.type = type
125
+ new.case_insensitive = case_insensitive
126
+ new.identifiers = identifiers
127
+
128
+ new
129
+ end
130
+
131
+ new
132
+ end
133
+
134
+ def slice(fields)
135
+ reorder :key, fields
136
+ end
137
+
138
+ def slice_namespace(namespace)
139
+ fields = self.fields
140
+ namespace_fields = []
141
+ fields.each_with_index{|field,i| namespace_fields << i if field.namespace == namespace}
142
+ reorder :key, namespace_fields
143
+ end
144
+
145
+ def sort(*fields)
146
+
147
+ pos = case
148
+ when fields.length == 0
149
+ :key
150
+ when fields.length == 1
151
+ identify_field fields.first
152
+ else
153
+ fields.collect{|field| identify_field field}
154
+ end
155
+ elems = []
156
+ through :key, pos do |key, values|
157
+ elems << case
158
+ when block_given?
159
+ [key, yield(values)]
160
+ else
161
+ if type == :double
162
+ [key, values.first.first]
163
+ else
164
+ [key, values.first]
165
+ end
166
+ end
167
+ end
168
+
169
+ elems.sort_by{|k,v| v}.collect{|k,v| k}
170
+ end
171
+
172
+ def select(method = nil)
173
+ new = TSV.new({})
174
+ new.key_field = key_field
175
+ new.fields = fields.dup
176
+ new.type = type
177
+ new.filename = filename
178
+ new.case_insensitive = case_insensitive
179
+
180
+ case
181
+ when (method.nil? and block_given?)
182
+ through do |key, values|
183
+ new[key] = values if yield key, values
184
+ end
185
+ when Array === method
186
+ through do |key, values|
187
+ new[key] = values if ([key,values].flatten & method).any?
188
+ end
189
+ when Regexp === method
190
+ through do |key, values|
191
+ new[key] = values if [key,values].flatten.select{|v| v =~ method}.any?
192
+ end
193
+ when String === method
194
+ through do |key, values|
195
+ new[key] = values if [key,values].flatten.select{|v| v == method}.any?
196
+ end
197
+ when Hash === method
198
+ key = method.keys.first
199
+ method = method.values.first
200
+ case
201
+ when (Array === method and (key == :key or key_field == key))
202
+ method.each{|item| if values = self[item]; then new[item] = values; end}
203
+ when Array === method
204
+ through :key, key do |key, values|
205
+ new[key] = self[key] if (values.flatten & method).any?
206
+ end
207
+ when Regexp === method
208
+ through :key, key do |key, values|
209
+ new[key] = self[key] if values.flatten.select{|v| v =~ method}.any?
210
+ end
211
+ when String === method
212
+ through :key, key do |key, values|
213
+ new[key] = self[key] if values.flatten.select{|v| v == method}.any?
214
+ end
215
+ end
216
+ end
217
+
218
+
219
+ new
220
+ end
221
+
222
+ def process(field, &block)
223
+ through do |key, values|
224
+ if type == :flat
225
+ field_values = values
226
+ else
227
+ field_values = values[field]
228
+ end
229
+
230
+ next if values[field].nil?
231
+ new_values = case
232
+ when block.arity == 1
233
+ yield(field_values)
234
+ when block.arity == 2
235
+ yield(field_values, key)
236
+ when block.arity == 3
237
+ yield(field_values, key, values)
238
+ else
239
+ raise "Unknown arity in block"
240
+ end
241
+
242
+ if type == :flat
243
+ self[key] = new_values
244
+ else
245
+ values[field].replace new_values
246
+ end
247
+ end
248
+ end
249
+
250
+ def add_field(name = nil)
251
+ each do |key, values|
252
+ new_values = yield(key, values)
253
+ new_values = [new_values] if type == :double and not Array == new_values
254
+
255
+ self[key] = values + [yield(key, values)]
256
+ end
257
+
258
+ self.fields = self.fields + [name] if fields != nil and name != nil
259
+ end
260
+
261
+ def add_fields(names = nil)
262
+ each do |key, values|
263
+ new_values = yield(key, values)
264
+ new_values = [new_values] if type == :double and not Array == new_values
265
+
266
+ self[key] = values.concat yield(key, values)
267
+ end
268
+
269
+ self.fields = self.fields.concat names if fields != nil and names != nil
270
+ end
271
+ end