rbbt-util 3.1.0 → 3.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. data/bin/rbbt_query.rb +1 -1
  2. data/lib/rbbt/util/cmd.rb +115 -67
  3. data/lib/rbbt/util/fix_width_table.rb +18 -3
  4. data/lib/rbbt/util/misc.rb +106 -6
  5. data/lib/rbbt/util/open.rb +9 -7
  6. data/lib/rbbt/util/persistence.rb +17 -14
  7. data/lib/rbbt/util/resource.rb +10 -3
  8. data/lib/rbbt/util/task.rb +2 -2
  9. data/lib/rbbt/util/task/job.rb +16 -3
  10. data/lib/rbbt/util/tc_hash.rb +64 -27
  11. data/lib/rbbt/util/tsv.rb +44 -21
  12. data/lib/rbbt/util/tsv/accessor.rb +8 -6
  13. data/lib/rbbt/util/tsv/attach.rb +19 -28
  14. data/lib/rbbt/util/tsv/filters.rb +193 -0
  15. data/lib/rbbt/util/tsv/index.rb +80 -8
  16. data/lib/rbbt/util/tsv/manipulate.rb +17 -6
  17. data/lib/rbbt/util/tsv/misc.rb +10 -0
  18. data/lib/rbbt/util/tsv/parse.rb +18 -1
  19. data/lib/rbbt/util/workflow.rb +12 -3
  20. data/lib/rbbt/util/workflow/soap.rb +0 -1
  21. data/share/install/software/lib/install_helpers +0 -2
  22. data/share/lib/R/util.R +3 -3
  23. data/test/rbbt/util/test_cmd.rb +23 -0
  24. data/test/rbbt/util/test_excel2tsv.rb +1 -1
  25. data/test/rbbt/util/test_misc.rb +41 -11
  26. data/test/rbbt/util/test_open.rb +2 -2
  27. data/test/rbbt/util/test_persistence.rb +2 -2
  28. data/test/rbbt/util/test_resource.rb +4 -20
  29. data/test/rbbt/util/test_tc_hash.rb +38 -0
  30. data/test/rbbt/util/test_tmpfile.rb +1 -1
  31. data/test/rbbt/util/test_tsv.rb +6 -0
  32. data/test/rbbt/util/test_workflow.rb +14 -10
  33. data/test/rbbt/util/tsv/test_accessor.rb +42 -0
  34. data/test/rbbt/util/tsv/test_filters.rb +141 -0
  35. data/test/rbbt/util/tsv/test_index.rb +32 -0
  36. data/test/rbbt/util/tsv/test_manipulate.rb +18 -0
  37. data/test/test_helper.rb +3 -1
  38. metadata +41 -38
@@ -89,6 +89,7 @@ class TSV
89
89
  return nil if @key_field.nil?
90
90
  k = @key_field.dup
91
91
  k.extend Field
92
+ k.namespace = namespace unless namespace.nil?
92
93
  k
93
94
  end
94
95
 
@@ -134,6 +135,11 @@ class TSV
134
135
  TSV.identify_field(key_field, fields, field)
135
136
  end
136
137
 
138
+ def key_field=(new_key_field)
139
+ @key_field = new_key_field
140
+ @data.key_field = new_key_field if @data.respond_to? :key_field= and @data.write?
141
+ end
142
+
137
143
  def fields=(new_fields)
138
144
  new_fields.collect! do |field|
139
145
  if Field === field
@@ -150,11 +156,6 @@ class TSV
150
156
  @data.fields = new_fields if @data.respond_to? :fields= and @data.write?
151
157
  end
152
158
 
153
- def old_fields=(new_fields)
154
- @fields = new_fields
155
- @data.fields = new_fields if @data.respond_to? :fields=
156
- end
157
-
158
159
  def keys
159
160
  @data.keys
160
161
  end
@@ -183,13 +184,14 @@ class TSV
183
184
 
184
185
  # Read
185
186
 
187
+ attr_accessor :unnamed
186
188
  def follow(value)
187
189
  return nil if value.nil?
188
190
  if String === value && value =~ /__Ref:(.*)/
189
191
  return self[$1]
190
192
  else
191
193
 
192
- if Array === value and fields
194
+ if Array === value and not unnamed
193
195
  value = NamedArray.name value, fields
194
196
  end
195
197
  value
@@ -128,7 +128,7 @@ class TSV
128
128
  if other.include? key
129
129
  new_values = other[key].values_at *fields
130
130
  new_values.collect!{|v| [v]} if type == :double and not other.type == :double
131
- new_values.collect!{|v| v.nil? ? nil : v.first} if not type == :double and other.type == :double
131
+ new_values.collect!{|v| v.nil? ? nil : (other.type == :single ? v : v.first)} if not type == :double and other.type == :double
132
132
  self[key] = self[key].concat new_values
133
133
  else
134
134
  if type == :double
@@ -151,6 +151,7 @@ class TSV
151
151
 
152
152
  through do |key, values|
153
153
  source_keys = values[source]
154
+ source_keys = [source_keys] unless Array === source_keys
154
155
  if source_keys.nil? or source_keys.empty?
155
156
  all_new_values = []
156
157
  else
@@ -165,8 +166,8 @@ class TSV
165
166
  end
166
167
  end
167
168
 
168
- new_values.collect!{|v| [v]} if type == :double and not other.type == :double
169
- new_values.collect!{|v| v.nil? ? nil : v.first} if not type == :double and other.type == :double
169
+ new_values.collect!{|v| [v]} if type == :double and not other.type == :double
170
+ new_values.collect!{|v| v.nil? ? nil : (other.type == :single ? v : v.first)} if not type == :double and other.type == :double
170
171
  all_new_values << new_values
171
172
  end
172
173
  end
@@ -191,12 +192,12 @@ class TSV
191
192
 
192
193
  def attach_index(other, index, fields = nil)
193
194
  fields = other.fields - [key_field].concat(self.fields) if fields.nil?
195
+ fields = [fields] unless Array === fields
194
196
 
195
197
  other = other.tsv unless TSV === other
196
198
  field_positions = fields.collect{|field| other.identify_field field}
197
199
  field_names = field_positions.collect{|pos| pos == :key ? other.key_field : other.fields[pos] }
198
200
 
199
-
200
201
  length = self.fields.length
201
202
  through do |key, values|
202
203
  source_keys = index[key]
@@ -218,7 +219,7 @@ class TSV
218
219
  end
219
220
  end
220
221
  new_values.collect!{|v| v.nil? ? [[]] : [v]} if type == :double and not other.type == :double
221
- new_values.collect!{|v| v.nil? ? nil : v.first} if not type == :double and other.type == :double
222
+ new_values.collect!{|v| v.nil? ? nil : (other.type == :single ? v : v.first)} if not type == :double and other.type == :double
222
223
  all_new_values << new_values
223
224
  end
224
225
  end
@@ -227,7 +228,7 @@ class TSV
227
228
  if type == :double
228
229
  all_new_values = [[[]] * field_positions.length]
229
230
  else
230
- all_new_values = [[""] * field_positions.length]
231
+ all_new_values = [[""] * field_positions.length]
231
232
  end
232
233
  end
233
234
 
@@ -260,7 +261,11 @@ class TSV
260
261
  in_namespace = options[:in_namespace]
261
262
 
262
263
  if in_namespace
263
- ids = [files.first.all_namespace_fields(in_namespace)]
264
+ if files.first.all_fields.include? in_namespace
265
+ ids = [[in_namespace]]
266
+ else
267
+ ids = [files.first.all_namespace_fields(in_namespace)]
268
+ end
264
269
  ids += files[1..-1].collect{|f| f.all_fields}
265
270
  else
266
271
  ids = files.collect{|f| f.all_fields}
@@ -276,7 +281,7 @@ class TSV
276
281
  id_list << match.first
277
282
  end
278
283
 
279
- if id_list.last.first != files.last.all_fields.first
284
+ if id_list.last != files.last.all_fields.first
280
285
  id_list << files.last.all_fields.first
281
286
  id_list.zip(files)
282
287
  else
@@ -312,14 +317,14 @@ class TSV
312
317
  next_key, next_file = path.shift
313
318
 
314
319
  if current_index.nil?
315
- current_index = next_file.index :target => next_key, :fields => current_key, :persistence => persist_input
320
+ current_index = next_file.index :target => next_key, :fields => current_key, :persistence => (persist_input and path.empty?)
316
321
  else
317
322
  next_index = next_file.index :target => next_key, :fields => current_key, :persistence => persist_input
318
323
  current_index.process current_index.fields.first do |values|
319
324
  if values.nil?
320
325
  nil
321
326
  else
322
- next_index.values_at(*values).flatten.collect
327
+ next_index.values_at(*values).flatten.collect.to_a
323
328
  end
324
329
  end
325
330
  current_index.fields = [next_key]
@@ -395,27 +400,13 @@ class TSV
395
400
  reorder :key, detached_fields
396
401
  end
397
402
 
398
- def paste(other, options = {})
399
- tmpfile = TmpFile.tmp_file
400
- TSV.paste(self.to_s, other.to_s, tmpfile)
401
-
402
- new = TSV.new(tmpfile, options)
403
-
404
- new.key_field = self.key_field unless self.key_field.nil?
405
- if self.fields and other.fields
406
- new.fields = self.fields + other.fields
407
- end
408
-
409
- FileUtils.rm tmpfile if File.exists? tmpfile
410
-
411
- new
412
- end
413
-
414
-
415
403
  def paste(other, options = {})
416
404
  TmpFile.with_file do |output|
417
405
  TSV.paste_merge(self, other, output, options[:sep] || "\t")
418
- TSV.new output, options
406
+ tsv = TSV.new output, options
407
+ tsv.key_field = self.key_field unless self.key_field.nil?
408
+ tsv.fields = self.fields + other.fields unless self.fields.nil? or other.fields.nil?
409
+ tsv
419
410
  end
420
411
  end
421
412
 
@@ -0,0 +1,193 @@
1
+ require 'rbbt/util/misc'
2
+ module Filtered
3
+
4
+ class Filter
5
+ attr_accessor :data, :match, :fieldnum, :value, :list, :unsaved
6
+ attr_accessor :persistence
7
+ def initialize(data, match, value, persistence = nil)
8
+ @data = data
9
+ @value = value
10
+ @unsaved = []
11
+
12
+ case
13
+ when Hash === persistence
14
+ @persistence = persistence
15
+ when String === persistence
16
+ @persistence = TSV.new TCHash.get(persistence)
17
+ @persistence.read
18
+ end
19
+
20
+ @list = nil
21
+ case
22
+ when match.match(/field:(.*)/)
23
+ field_num = data.identify_field $1
24
+ Misc.add_method(self, :match) do |entry|
25
+ entry[field_num] == value
26
+ end
27
+ end
28
+ end
29
+
30
+ def key
31
+ case
32
+ when String === value
33
+ value
34
+ else
35
+ Marshal.dump(value)
36
+ end
37
+ end
38
+
39
+ def save(ids)
40
+ if persistence
41
+ persistence.write
42
+ persistence[self.key] = ids
43
+ persistence.read
44
+ else
45
+ if list.nil?
46
+ @list = ids
47
+ else
48
+ @list.replace ids
49
+ end
50
+ end
51
+ end
52
+
53
+ def update
54
+ ids = []
55
+ data.unfiltered_each do |key, entry|
56
+ ids << key if match(entry)
57
+ end
58
+ save(ids.sort)
59
+ end
60
+
61
+ def saved
62
+ if persistence.nil?
63
+ return nil if list.nil?
64
+ list
65
+ else
66
+ return nil if not persistence.include?(self.key)
67
+ persistence[self.key]
68
+ end
69
+ end
70
+
71
+ def add_unsaved
72
+ save(Misc.merge_sorted_arrays(unsaved.sort, saved || [])) if unsaved.any?
73
+ unsaved.clear
74
+ end
75
+
76
+ def ids
77
+ add_unsaved
78
+
79
+ list = saved
80
+ if list.nil?
81
+ update
82
+ list = saved
83
+ end
84
+ list
85
+ end
86
+
87
+ def add(id)
88
+ unsaved.push id
89
+ end
90
+
91
+ def reset
92
+ if persistence
93
+ persistence.clear
94
+ else
95
+ @list = nil
96
+ end
97
+ end
98
+ end
99
+
100
+ def self.extended(base)
101
+ class << base
102
+ attr_accessor :filter_dir, :filters
103
+ end
104
+
105
+ Misc.redefine_method base, :[]=, :unfiltered_set do |key,value|
106
+ if filters.empty?
107
+ self.send(:unfiltered_set, key, value)
108
+ else
109
+ filters.each do |filter|
110
+ filter.add key if filter.match value
111
+ end
112
+ self.send(:unfiltered_set, key, value)
113
+ end
114
+ end
115
+
116
+ Misc.redefine_method base, :keys, :unfiltered_keys do
117
+ if filters.empty?
118
+ self.send(:unfiltered_keys)
119
+ else
120
+ filters.inject(nil){|list,filter| list.nil? ? filter.ids : Misc.intersect_sorted_arrays(list, filter.ids.dup)}
121
+ end
122
+ end
123
+
124
+ Misc.redefine_method base, :values, :unfiltered_values do
125
+ if filters.empty?
126
+ self.send(:unfiltered_values)
127
+ else
128
+ ids = filters.inject(nil){|list,filter| list.nil? ? filter.ids : Misc.intersect_sorted_arrays(list, filter.ids.dup)}
129
+ self.send :values_at, *ids
130
+ end
131
+ end
132
+
133
+ Misc.redefine_method base, :each, :unfiltered_each do |&block|
134
+ if filters.empty?
135
+ self.send(:unfiltered_each, &block)
136
+ else
137
+ ids = filters.inject(nil){|list,filter| list.nil? ? filter.ids : Misc.intersect_sorted_arrays(list, filter.ids.dup)}
138
+ new = self.dup
139
+ new.data = {}
140
+
141
+ ids.zip(self.send(:values_at, *ids)).each do |id, values|
142
+ new[id] = values
143
+ end
144
+
145
+ new.send :each, &block
146
+ end
147
+ end
148
+
149
+ Misc.redefine_method base, :collect, :unfiltered_collect do |&block|
150
+ if filters.empty?
151
+ self.send(:unfiltered_collect, &block)
152
+ else
153
+ ids = filters.inject(nil){|list,filter| list = (list.nil? ? filter.ids : Misc.intersect_sorted_arrays(list, filter.ids))}
154
+ new = self.dup
155
+ new.data = {}
156
+ ids.zip(self.send(:values_at, *ids)).each do |id, values|
157
+ new[id] = values
158
+ end
159
+ new.send :collect, &block
160
+ end
161
+ end
162
+ end
163
+
164
+ def filter_name(match, value)
165
+ @filename + "&F[#{match}=#{value}]"
166
+ end
167
+
168
+ def add_filter(match, value, persistence = nil)
169
+ if persistence.nil? and filter_dir
170
+ persistence = File.join(filter_dir, match.to_s)
171
+ end
172
+
173
+ @filename = filter_name(match, value) if @filename
174
+
175
+ filters.push Filter.new self, match, value, persistence
176
+ end
177
+
178
+ def pop_filter
179
+ @filename = @filename.sub(/&F\[[^\]]*\]$/, '') if @filename
180
+ filters.pop
181
+ end
182
+
183
+ end
184
+
185
+ class TSV
186
+ def filter(filter_dir = nil)
187
+ self.extend Filtered
188
+ self.filter_dir = filter_dir
189
+ self.filters = []
190
+ self
191
+ end
192
+ end
193
+
@@ -13,7 +13,7 @@ class TSV
13
13
  "Index[:key]"
14
14
  end
15
15
 
16
- new = Persistence.persist(self, prefix, :tsv, options) do |tsv, options, filename|
16
+ Persistence.persist(self, prefix, :tsv, options) do |tsv, options, filename|
17
17
  order, target, fields, case_insensitive = Misc.process_options options, :order, :target, :fields, :case_insensitive
18
18
 
19
19
  new = {}
@@ -34,7 +34,7 @@ class TSV
34
34
  list = [list] unless Array === list
35
35
  i += 1 if fields.nil?
36
36
  list.each do |elem|
37
- next if elem.empty?
37
+ next if elem.nil? or elem.empty?
38
38
  elem.downcase if case_insensitive
39
39
  new[elem] ||= []
40
40
  new[elem][i] ||= []
@@ -56,10 +56,9 @@ class TSV
56
56
  # flatten
57
57
 
58
58
  new.each do |key, values|
59
- values.flatten!
60
- values.compact!
59
+ new[key] = values.flatten.compact
61
60
  end
62
-
61
+
63
62
  ## Not ordered
64
63
  else
65
64
  double_keys = true unless type != :double or identify_field(target) == :key
@@ -74,7 +73,7 @@ class TSV
74
73
  end
75
74
  list.collect!{|e| e.downcase} if case_insensitive
76
75
  list.each do |elem|
77
- next if elem.empty?
76
+ next if elem.nil? or elem.empty?
78
77
  new[elem] ||= []
79
78
  if double_keys
80
79
  new[elem].concat key
@@ -103,7 +102,9 @@ class TSV
103
102
  else
104
103
  [new_key_field]
105
104
  end
105
+
106
106
  new = TSV.new([new, {:namespace => namespace, :key_field => key_field, :fields => fields, :type => :flat, :filename => (filename.nil? ? nil : "Index:" + filename), :case_insensitive => case_insensitive}])
107
+
107
108
  new
108
109
  end
109
110
  end
@@ -293,7 +294,7 @@ class TSV
293
294
 
294
295
  range = ! pos_end.nil?
295
296
 
296
- index = Persistence.persist(filename, "SortedIndex[#{range ? pos_start + ":" + pos_end: pos_start}]", :fwt, :start => pos_start, :end => pos_end, :range => range) do |filename, options|
297
+ index = Persistence.persist(filename, "SortedIndex[#{range ? pos_start + ":" + pos_end : pos_start}]", :fwt, :start => pos_start, :end => pos_end, :range => range) do |filename, options|
297
298
  pos_start, pos_end, range = Misc.process_options options, :start, :end, :range
298
299
  data = case
299
300
  when (type == :double and range)
@@ -319,6 +320,77 @@ class TSV
319
320
  index
320
321
  end
321
322
 
323
+ def pos_index(pos_field, file = nil, update = false)
324
+ value_size = 0
325
+ index_data = []
322
326
 
323
- end
327
+ file ||= filename + "-PosIndex[#{ pos_field }]" if filename
328
+
329
+ through :key, pos_field do |key, values|
330
+ value_size = key.length if key.length > value_size
331
+
332
+ pos = values.first
333
+ if Array === pos
334
+ pos.each do |p|
335
+ index_data << [key, p.to_i]
336
+ end
337
+ else
338
+ index_data << [key, pos.to_i]
339
+ end
340
+ end
341
+
342
+ pos_index = case
343
+ when file == :memory
344
+ index = FixWidthTable.new(file, value_size, false)
345
+ index.add_point index_data
346
+ index
347
+ when (update or not File.exists? file)
348
+ index = FixWidthTable.new(file, value_size, false, true)
349
+ index.add_point index_data
350
+ index
351
+ else
352
+ FixWidthTable.new(file, value_size, false)
353
+ end
354
+
355
+ pos_index
356
+ end
324
357
 
358
+ def range_index(start_field, end_field, file = nil, update = false)
359
+ value_size = 0
360
+ index_data = []
361
+
362
+ file ||= filename + "-PosIndex[#{ start_field }-#{end_field}]" if filename
363
+
364
+ through :key, [start_field, end_field] do |key, values|
365
+ value_size = key.length if key.length > value_size
366
+
367
+ start_pos, end_pos = values
368
+
369
+ if Array === start_pos
370
+ start_pos.zip(end_pos).each do |s,e|
371
+ index_data << [key, [s.to_i, e.to_i]]
372
+ end
373
+ else
374
+ index_data << [key, [start_pos.to_i, end_pos.to_i]]
375
+ end
376
+ end
377
+
378
+ pos_index = case
379
+ when file == :memory
380
+ index = FixWidthTable.get(file, value_size, true)
381
+ index.add_range index_data
382
+ index.read
383
+ index
384
+ when (update or not File.exists?(file))
385
+ index = FixWidthTable.get(file, value_size, true, true)
386
+ index.add_range index_data
387
+ index.read
388
+ index
389
+ else
390
+ FixWidthTable.get(file, value_size, true)
391
+ end
392
+
393
+ pos_index
394
+ end
395
+
396
+ end