rbbt-util 3.1.0 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. data/bin/rbbt_query.rb +1 -1
  2. data/lib/rbbt/util/cmd.rb +115 -67
  3. data/lib/rbbt/util/fix_width_table.rb +18 -3
  4. data/lib/rbbt/util/misc.rb +106 -6
  5. data/lib/rbbt/util/open.rb +9 -7
  6. data/lib/rbbt/util/persistence.rb +17 -14
  7. data/lib/rbbt/util/resource.rb +10 -3
  8. data/lib/rbbt/util/task.rb +2 -2
  9. data/lib/rbbt/util/task/job.rb +16 -3
  10. data/lib/rbbt/util/tc_hash.rb +64 -27
  11. data/lib/rbbt/util/tsv.rb +44 -21
  12. data/lib/rbbt/util/tsv/accessor.rb +8 -6
  13. data/lib/rbbt/util/tsv/attach.rb +19 -28
  14. data/lib/rbbt/util/tsv/filters.rb +193 -0
  15. data/lib/rbbt/util/tsv/index.rb +80 -8
  16. data/lib/rbbt/util/tsv/manipulate.rb +17 -6
  17. data/lib/rbbt/util/tsv/misc.rb +10 -0
  18. data/lib/rbbt/util/tsv/parse.rb +18 -1
  19. data/lib/rbbt/util/workflow.rb +12 -3
  20. data/lib/rbbt/util/workflow/soap.rb +0 -1
  21. data/share/install/software/lib/install_helpers +0 -2
  22. data/share/lib/R/util.R +3 -3
  23. data/test/rbbt/util/test_cmd.rb +23 -0
  24. data/test/rbbt/util/test_excel2tsv.rb +1 -1
  25. data/test/rbbt/util/test_misc.rb +41 -11
  26. data/test/rbbt/util/test_open.rb +2 -2
  27. data/test/rbbt/util/test_persistence.rb +2 -2
  28. data/test/rbbt/util/test_resource.rb +4 -20
  29. data/test/rbbt/util/test_tc_hash.rb +38 -0
  30. data/test/rbbt/util/test_tmpfile.rb +1 -1
  31. data/test/rbbt/util/test_tsv.rb +6 -0
  32. data/test/rbbt/util/test_workflow.rb +14 -10
  33. data/test/rbbt/util/tsv/test_accessor.rb +42 -0
  34. data/test/rbbt/util/tsv/test_filters.rb +141 -0
  35. data/test/rbbt/util/tsv/test_index.rb +32 -0
  36. data/test/rbbt/util/tsv/test_manipulate.rb +18 -0
  37. data/test/test_helper.rb +3 -1
  38. metadata +41 -38
@@ -89,6 +89,7 @@ class TSV
89
89
  return nil if @key_field.nil?
90
90
  k = @key_field.dup
91
91
  k.extend Field
92
+ k.namespace = namespace unless namespace.nil?
92
93
  k
93
94
  end
94
95
 
@@ -134,6 +135,11 @@ class TSV
134
135
  TSV.identify_field(key_field, fields, field)
135
136
  end
136
137
 
138
+ def key_field=(new_key_field)
139
+ @key_field = new_key_field
140
+ @data.key_field = new_key_field if @data.respond_to? :key_field= and @data.write?
141
+ end
142
+
137
143
  def fields=(new_fields)
138
144
  new_fields.collect! do |field|
139
145
  if Field === field
@@ -150,11 +156,6 @@ class TSV
150
156
  @data.fields = new_fields if @data.respond_to? :fields= and @data.write?
151
157
  end
152
158
 
153
- def old_fields=(new_fields)
154
- @fields = new_fields
155
- @data.fields = new_fields if @data.respond_to? :fields=
156
- end
157
-
158
159
  def keys
159
160
  @data.keys
160
161
  end
@@ -183,13 +184,14 @@ class TSV
183
184
 
184
185
  # Read
185
186
 
187
+ attr_accessor :unnamed
186
188
  def follow(value)
187
189
  return nil if value.nil?
188
190
  if String === value && value =~ /__Ref:(.*)/
189
191
  return self[$1]
190
192
  else
191
193
 
192
- if Array === value and fields
194
+ if Array === value and not unnamed
193
195
  value = NamedArray.name value, fields
194
196
  end
195
197
  value
@@ -128,7 +128,7 @@ class TSV
128
128
  if other.include? key
129
129
  new_values = other[key].values_at *fields
130
130
  new_values.collect!{|v| [v]} if type == :double and not other.type == :double
131
- new_values.collect!{|v| v.nil? ? nil : v.first} if not type == :double and other.type == :double
131
+ new_values.collect!{|v| v.nil? ? nil : (other.type == :single ? v : v.first)} if not type == :double and other.type == :double
132
132
  self[key] = self[key].concat new_values
133
133
  else
134
134
  if type == :double
@@ -151,6 +151,7 @@ class TSV
151
151
 
152
152
  through do |key, values|
153
153
  source_keys = values[source]
154
+ source_keys = [source_keys] unless Array === source_keys
154
155
  if source_keys.nil? or source_keys.empty?
155
156
  all_new_values = []
156
157
  else
@@ -165,8 +166,8 @@ class TSV
165
166
  end
166
167
  end
167
168
 
168
- new_values.collect!{|v| [v]} if type == :double and not other.type == :double
169
- new_values.collect!{|v| v.nil? ? nil : v.first} if not type == :double and other.type == :double
169
+ new_values.collect!{|v| [v]} if type == :double and not other.type == :double
170
+ new_values.collect!{|v| v.nil? ? nil : (other.type == :single ? v : v.first)} if not type == :double and other.type == :double
170
171
  all_new_values << new_values
171
172
  end
172
173
  end
@@ -191,12 +192,12 @@ class TSV
191
192
 
192
193
  def attach_index(other, index, fields = nil)
193
194
  fields = other.fields - [key_field].concat(self.fields) if fields.nil?
195
+ fields = [fields] unless Array === fields
194
196
 
195
197
  other = other.tsv unless TSV === other
196
198
  field_positions = fields.collect{|field| other.identify_field field}
197
199
  field_names = field_positions.collect{|pos| pos == :key ? other.key_field : other.fields[pos] }
198
200
 
199
-
200
201
  length = self.fields.length
201
202
  through do |key, values|
202
203
  source_keys = index[key]
@@ -218,7 +219,7 @@ class TSV
218
219
  end
219
220
  end
220
221
  new_values.collect!{|v| v.nil? ? [[]] : [v]} if type == :double and not other.type == :double
221
- new_values.collect!{|v| v.nil? ? nil : v.first} if not type == :double and other.type == :double
222
+ new_values.collect!{|v| v.nil? ? nil : (other.type == :single ? v : v.first)} if not type == :double and other.type == :double
222
223
  all_new_values << new_values
223
224
  end
224
225
  end
@@ -227,7 +228,7 @@ class TSV
227
228
  if type == :double
228
229
  all_new_values = [[[]] * field_positions.length]
229
230
  else
230
- all_new_values = [[""] * field_positions.length]
231
+ all_new_values = [[""] * field_positions.length]
231
232
  end
232
233
  end
233
234
 
@@ -260,7 +261,11 @@ class TSV
260
261
  in_namespace = options[:in_namespace]
261
262
 
262
263
  if in_namespace
263
- ids = [files.first.all_namespace_fields(in_namespace)]
264
+ if files.first.all_fields.include? in_namespace
265
+ ids = [[in_namespace]]
266
+ else
267
+ ids = [files.first.all_namespace_fields(in_namespace)]
268
+ end
264
269
  ids += files[1..-1].collect{|f| f.all_fields}
265
270
  else
266
271
  ids = files.collect{|f| f.all_fields}
@@ -276,7 +281,7 @@ class TSV
276
281
  id_list << match.first
277
282
  end
278
283
 
279
- if id_list.last.first != files.last.all_fields.first
284
+ if id_list.last != files.last.all_fields.first
280
285
  id_list << files.last.all_fields.first
281
286
  id_list.zip(files)
282
287
  else
@@ -312,14 +317,14 @@ class TSV
312
317
  next_key, next_file = path.shift
313
318
 
314
319
  if current_index.nil?
315
- current_index = next_file.index :target => next_key, :fields => current_key, :persistence => persist_input
320
+ current_index = next_file.index :target => next_key, :fields => current_key, :persistence => (persist_input and path.empty?)
316
321
  else
317
322
  next_index = next_file.index :target => next_key, :fields => current_key, :persistence => persist_input
318
323
  current_index.process current_index.fields.first do |values|
319
324
  if values.nil?
320
325
  nil
321
326
  else
322
- next_index.values_at(*values).flatten.collect
327
+ next_index.values_at(*values).flatten.collect.to_a
323
328
  end
324
329
  end
325
330
  current_index.fields = [next_key]
@@ -395,27 +400,13 @@ class TSV
395
400
  reorder :key, detached_fields
396
401
  end
397
402
 
398
- def paste(other, options = {})
399
- tmpfile = TmpFile.tmp_file
400
- TSV.paste(self.to_s, other.to_s, tmpfile)
401
-
402
- new = TSV.new(tmpfile, options)
403
-
404
- new.key_field = self.key_field unless self.key_field.nil?
405
- if self.fields and other.fields
406
- new.fields = self.fields + other.fields
407
- end
408
-
409
- FileUtils.rm tmpfile if File.exists? tmpfile
410
-
411
- new
412
- end
413
-
414
-
415
403
  def paste(other, options = {})
416
404
  TmpFile.with_file do |output|
417
405
  TSV.paste_merge(self, other, output, options[:sep] || "\t")
418
- TSV.new output, options
406
+ tsv = TSV.new output, options
407
+ tsv.key_field = self.key_field unless self.key_field.nil?
408
+ tsv.fields = self.fields + other.fields unless self.fields.nil? or other.fields.nil?
409
+ tsv
419
410
  end
420
411
  end
421
412
 
@@ -0,0 +1,193 @@
1
+ require 'rbbt/util/misc'
2
+ module Filtered
3
+
4
+ class Filter
5
+ attr_accessor :data, :match, :fieldnum, :value, :list, :unsaved
6
+ attr_accessor :persistence
7
+ def initialize(data, match, value, persistence = nil)
8
+ @data = data
9
+ @value = value
10
+ @unsaved = []
11
+
12
+ case
13
+ when Hash === persistence
14
+ @persistence = persistence
15
+ when String === persistence
16
+ @persistence = TSV.new TCHash.get(persistence)
17
+ @persistence.read
18
+ end
19
+
20
+ @list = nil
21
+ case
22
+ when match.match(/field:(.*)/)
23
+ field_num = data.identify_field $1
24
+ Misc.add_method(self, :match) do |entry|
25
+ entry[field_num] == value
26
+ end
27
+ end
28
+ end
29
+
30
+ def key
31
+ case
32
+ when String === value
33
+ value
34
+ else
35
+ Marshal.dump(value)
36
+ end
37
+ end
38
+
39
+ def save(ids)
40
+ if persistence
41
+ persistence.write
42
+ persistence[self.key] = ids
43
+ persistence.read
44
+ else
45
+ if list.nil?
46
+ @list = ids
47
+ else
48
+ @list.replace ids
49
+ end
50
+ end
51
+ end
52
+
53
+ def update
54
+ ids = []
55
+ data.unfiltered_each do |key, entry|
56
+ ids << key if match(entry)
57
+ end
58
+ save(ids.sort)
59
+ end
60
+
61
+ def saved
62
+ if persistence.nil?
63
+ return nil if list.nil?
64
+ list
65
+ else
66
+ return nil if not persistence.include?(self.key)
67
+ persistence[self.key]
68
+ end
69
+ end
70
+
71
+ def add_unsaved
72
+ save(Misc.merge_sorted_arrays(unsaved.sort, saved || [])) if unsaved.any?
73
+ unsaved.clear
74
+ end
75
+
76
+ def ids
77
+ add_unsaved
78
+
79
+ list = saved
80
+ if list.nil?
81
+ update
82
+ list = saved
83
+ end
84
+ list
85
+ end
86
+
87
+ def add(id)
88
+ unsaved.push id
89
+ end
90
+
91
+ def reset
92
+ if persistence
93
+ persistence.clear
94
+ else
95
+ @list = nil
96
+ end
97
+ end
98
+ end
99
+
100
+ def self.extended(base)
101
+ class << base
102
+ attr_accessor :filter_dir, :filters
103
+ end
104
+
105
+ Misc.redefine_method base, :[]=, :unfiltered_set do |key,value|
106
+ if filters.empty?
107
+ self.send(:unfiltered_set, key, value)
108
+ else
109
+ filters.each do |filter|
110
+ filter.add key if filter.match value
111
+ end
112
+ self.send(:unfiltered_set, key, value)
113
+ end
114
+ end
115
+
116
+ Misc.redefine_method base, :keys, :unfiltered_keys do
117
+ if filters.empty?
118
+ self.send(:unfiltered_keys)
119
+ else
120
+ filters.inject(nil){|list,filter| list.nil? ? filter.ids : Misc.intersect_sorted_arrays(list, filter.ids.dup)}
121
+ end
122
+ end
123
+
124
+ Misc.redefine_method base, :values, :unfiltered_values do
125
+ if filters.empty?
126
+ self.send(:unfiltered_values)
127
+ else
128
+ ids = filters.inject(nil){|list,filter| list.nil? ? filter.ids : Misc.intersect_sorted_arrays(list, filter.ids.dup)}
129
+ self.send :values_at, *ids
130
+ end
131
+ end
132
+
133
+ Misc.redefine_method base, :each, :unfiltered_each do |&block|
134
+ if filters.empty?
135
+ self.send(:unfiltered_each, &block)
136
+ else
137
+ ids = filters.inject(nil){|list,filter| list.nil? ? filter.ids : Misc.intersect_sorted_arrays(list, filter.ids.dup)}
138
+ new = self.dup
139
+ new.data = {}
140
+
141
+ ids.zip(self.send(:values_at, *ids)).each do |id, values|
142
+ new[id] = values
143
+ end
144
+
145
+ new.send :each, &block
146
+ end
147
+ end
148
+
149
+ Misc.redefine_method base, :collect, :unfiltered_collect do |&block|
150
+ if filters.empty?
151
+ self.send(:unfiltered_collect, &block)
152
+ else
153
+ ids = filters.inject(nil){|list,filter| list = (list.nil? ? filter.ids : Misc.intersect_sorted_arrays(list, filter.ids))}
154
+ new = self.dup
155
+ new.data = {}
156
+ ids.zip(self.send(:values_at, *ids)).each do |id, values|
157
+ new[id] = values
158
+ end
159
+ new.send :collect, &block
160
+ end
161
+ end
162
+ end
163
+
164
+ def filter_name(match, value)
165
+ @filename + "&F[#{match}=#{value}]"
166
+ end
167
+
168
+ def add_filter(match, value, persistence = nil)
169
+ if persistence.nil? and filter_dir
170
+ persistence = File.join(filter_dir, match.to_s)
171
+ end
172
+
173
+ @filename = filter_name(match, value) if @filename
174
+
175
+ filters.push Filter.new self, match, value, persistence
176
+ end
177
+
178
+ def pop_filter
179
+ @filename = @filename.sub(/&F\[[^\]]*\]$/, '') if @filename
180
+ filters.pop
181
+ end
182
+
183
+ end
184
+
185
+ class TSV
186
+ def filter(filter_dir = nil)
187
+ self.extend Filtered
188
+ self.filter_dir = filter_dir
189
+ self.filters = []
190
+ self
191
+ end
192
+ end
193
+
@@ -13,7 +13,7 @@ class TSV
13
13
  "Index[:key]"
14
14
  end
15
15
 
16
- new = Persistence.persist(self, prefix, :tsv, options) do |tsv, options, filename|
16
+ Persistence.persist(self, prefix, :tsv, options) do |tsv, options, filename|
17
17
  order, target, fields, case_insensitive = Misc.process_options options, :order, :target, :fields, :case_insensitive
18
18
 
19
19
  new = {}
@@ -34,7 +34,7 @@ class TSV
34
34
  list = [list] unless Array === list
35
35
  i += 1 if fields.nil?
36
36
  list.each do |elem|
37
- next if elem.empty?
37
+ next if elem.nil? or elem.empty?
38
38
  elem.downcase if case_insensitive
39
39
  new[elem] ||= []
40
40
  new[elem][i] ||= []
@@ -56,10 +56,9 @@ class TSV
56
56
  # flatten
57
57
 
58
58
  new.each do |key, values|
59
- values.flatten!
60
- values.compact!
59
+ new[key] = values.flatten.compact
61
60
  end
62
-
61
+
63
62
  ## Not ordered
64
63
  else
65
64
  double_keys = true unless type != :double or identify_field(target) == :key
@@ -74,7 +73,7 @@ class TSV
74
73
  end
75
74
  list.collect!{|e| e.downcase} if case_insensitive
76
75
  list.each do |elem|
77
- next if elem.empty?
76
+ next if elem.nil? or elem.empty?
78
77
  new[elem] ||= []
79
78
  if double_keys
80
79
  new[elem].concat key
@@ -103,7 +102,9 @@ class TSV
103
102
  else
104
103
  [new_key_field]
105
104
  end
105
+
106
106
  new = TSV.new([new, {:namespace => namespace, :key_field => key_field, :fields => fields, :type => :flat, :filename => (filename.nil? ? nil : "Index:" + filename), :case_insensitive => case_insensitive}])
107
+
107
108
  new
108
109
  end
109
110
  end
@@ -293,7 +294,7 @@ class TSV
293
294
 
294
295
  range = ! pos_end.nil?
295
296
 
296
- index = Persistence.persist(filename, "SortedIndex[#{range ? pos_start + ":" + pos_end: pos_start}]", :fwt, :start => pos_start, :end => pos_end, :range => range) do |filename, options|
297
+ index = Persistence.persist(filename, "SortedIndex[#{range ? pos_start + ":" + pos_end : pos_start}]", :fwt, :start => pos_start, :end => pos_end, :range => range) do |filename, options|
297
298
  pos_start, pos_end, range = Misc.process_options options, :start, :end, :range
298
299
  data = case
299
300
  when (type == :double and range)
@@ -319,6 +320,77 @@ class TSV
319
320
  index
320
321
  end
321
322
 
323
+ def pos_index(pos_field, file = nil, update = false)
324
+ value_size = 0
325
+ index_data = []
322
326
 
323
- end
327
+ file ||= filename + "-PosIndex[#{ pos_field }]" if filename
328
+
329
+ through :key, pos_field do |key, values|
330
+ value_size = key.length if key.length > value_size
331
+
332
+ pos = values.first
333
+ if Array === pos
334
+ pos.each do |p|
335
+ index_data << [key, p.to_i]
336
+ end
337
+ else
338
+ index_data << [key, pos.to_i]
339
+ end
340
+ end
341
+
342
+ pos_index = case
343
+ when file == :memory
344
+ index = FixWidthTable.new(file, value_size, false)
345
+ index.add_point index_data
346
+ index
347
+ when (update or not File.exists? file)
348
+ index = FixWidthTable.new(file, value_size, false, true)
349
+ index.add_point index_data
350
+ index
351
+ else
352
+ FixWidthTable.new(file, value_size, false)
353
+ end
354
+
355
+ pos_index
356
+ end
324
357
 
358
+ def range_index(start_field, end_field, file = nil, update = false)
359
+ value_size = 0
360
+ index_data = []
361
+
362
+ file ||= filename + "-PosIndex[#{ start_field }-#{end_field}]" if filename
363
+
364
+ through :key, [start_field, end_field] do |key, values|
365
+ value_size = key.length if key.length > value_size
366
+
367
+ start_pos, end_pos = values
368
+
369
+ if Array === start_pos
370
+ start_pos.zip(end_pos).each do |s,e|
371
+ index_data << [key, [s.to_i, e.to_i]]
372
+ end
373
+ else
374
+ index_data << [key, [start_pos.to_i, end_pos.to_i]]
375
+ end
376
+ end
377
+
378
+ pos_index = case
379
+ when file == :memory
380
+ index = FixWidthTable.get(file, value_size, true)
381
+ index.add_range index_data
382
+ index.read
383
+ index
384
+ when (update or not File.exists?(file))
385
+ index = FixWidthTable.get(file, value_size, true, true)
386
+ index.add_range index_data
387
+ index.read
388
+ index
389
+ else
390
+ FixWidthTable.get(file, value_size, true)
391
+ end
392
+
393
+ pos_index
394
+ end
395
+
396
+ end