rbbt-util 3.2.1 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. data/README.rdoc +65 -0
  2. data/bin/run_workflow.rb +142 -69
  3. data/lib/rbbt-util.rb +3 -3
  4. data/lib/rbbt.rb +12 -3
  5. data/lib/rbbt/annotations.rb +215 -0
  6. data/lib/rbbt/{util/fix_width_table.rb → fix_width_table.rb} +17 -13
  7. data/lib/rbbt/persist.rb +164 -0
  8. data/lib/rbbt/persist/tsv.rb +135 -0
  9. data/lib/rbbt/resource.rb +100 -0
  10. data/lib/rbbt/resource/path.rb +180 -0
  11. data/lib/rbbt/resource/rake.rb +48 -0
  12. data/lib/rbbt/resource/util.rb +111 -0
  13. data/lib/rbbt/resource/with_key.rb +28 -0
  14. data/lib/rbbt/tsv.rb +134 -0
  15. data/lib/rbbt/tsv/accessor.rb +345 -0
  16. data/lib/rbbt/tsv/attach.rb +183 -0
  17. data/lib/rbbt/tsv/attach/util.rb +277 -0
  18. data/lib/rbbt/{util/tsv/filters.rb → tsv/filter.rb} +76 -37
  19. data/lib/rbbt/tsv/index.rb +453 -0
  20. data/lib/rbbt/tsv/manipulate.rb +361 -0
  21. data/lib/rbbt/tsv/parser.rb +231 -0
  22. data/lib/rbbt/tsv/serializers.rb +79 -0
  23. data/lib/rbbt/tsv/util.rb +67 -0
  24. data/lib/rbbt/util/R.rb +3 -3
  25. data/lib/rbbt/util/chain_methods.rb +64 -0
  26. data/lib/rbbt/util/cmd.rb +17 -13
  27. data/lib/rbbt/util/excel2tsv.rb +4 -3
  28. data/lib/rbbt/util/log.rb +1 -0
  29. data/lib/rbbt/util/misc.rb +296 -285
  30. data/lib/rbbt/util/open.rb +9 -2
  31. data/lib/rbbt/util/persistence.rb +1 -1
  32. data/lib/rbbt/util/task/job.rb +3 -1
  33. data/lib/rbbt/workflow.rb +193 -0
  34. data/lib/rbbt/workflow/accessor.rb +249 -0
  35. data/lib/rbbt/workflow/annotate.rb +60 -0
  36. data/lib/rbbt/workflow/soap.rb +100 -0
  37. data/lib/rbbt/workflow/step.rb +102 -0
  38. data/lib/rbbt/workflow/task.rb +76 -0
  39. data/test/rbbt/resource/test_path.rb +12 -0
  40. data/test/rbbt/test_annotations.rb +106 -0
  41. data/test/rbbt/{util/test_fix_width_table.rb → test_fix_width_table.rb} +8 -9
  42. data/test/rbbt/test_resource.rb +66 -0
  43. data/test/rbbt/test_tsv.rb +332 -0
  44. data/test/rbbt/test_workflow.rb +102 -0
  45. data/test/rbbt/tsv/test_accessor.rb +163 -0
  46. data/test/rbbt/{util/tsv → tsv}/test_attach.rb +86 -43
  47. data/test/rbbt/{util/tsv/test_filters.rb → tsv/test_filter.rb} +31 -13
  48. data/test/rbbt/tsv/test_index.rb +284 -0
  49. data/test/rbbt/{util/tsv → tsv}/test_manipulate.rb +35 -105
  50. data/test/rbbt/util/test_R.rb +1 -1
  51. data/test/rbbt/util/test_chain_methods.rb +22 -0
  52. data/test/rbbt/util/test_filecache.rb +0 -1
  53. data/test/rbbt/util/test_misc.rb +97 -79
  54. data/test/rbbt/util/test_open.rb +1 -0
  55. data/test/rbbt/util/test_tmpfile.rb +1 -1
  56. data/test/rbbt/workflow/test_soap.rb +103 -0
  57. data/test/rbbt/workflow/test_step.rb +142 -0
  58. data/test/rbbt/workflow/test_task.rb +84 -0
  59. data/test/test_helper.rb +7 -7
  60. metadata +80 -54
  61. data/lib/rbbt/util/rake.rb +0 -176
  62. data/lib/rbbt/util/resource.rb +0 -355
  63. data/lib/rbbt/util/task.rb +0 -183
  64. data/lib/rbbt/util/tc_hash.rb +0 -324
  65. data/lib/rbbt/util/tsv.rb +0 -236
  66. data/lib/rbbt/util/tsv/accessor.rb +0 -312
  67. data/lib/rbbt/util/tsv/attach.rb +0 -416
  68. data/lib/rbbt/util/tsv/index.rb +0 -419
  69. data/lib/rbbt/util/tsv/manipulate.rb +0 -300
  70. data/lib/rbbt/util/tsv/misc.rb +0 -41
  71. data/lib/rbbt/util/tsv/parse.rb +0 -324
  72. data/lib/rbbt/util/tsv/resource.rb +0 -88
  73. data/lib/rbbt/util/workflow.rb +0 -135
  74. data/lib/rbbt/util/workflow/soap.rb +0 -116
  75. data/test/rbbt/util/test_persistence.rb +0 -201
  76. data/test/rbbt/util/test_rake.rb +0 -54
  77. data/test/rbbt/util/test_resource.rb +0 -77
  78. data/test/rbbt/util/test_task.rb +0 -133
  79. data/test/rbbt/util/test_tc_hash.rb +0 -144
  80. data/test/rbbt/util/test_tsv.rb +0 -221
  81. data/test/rbbt/util/test_workflow.rb +0 -135
  82. data/test/rbbt/util/tsv/test_accessor.rb +0 -150
  83. data/test/rbbt/util/tsv/test_index.rb +0 -241
  84. data/test/rbbt/util/tsv/test_parse.rb +0 -87
  85. data/test/rbbt/util/tsv/test_resource.rb +0 -9
@@ -0,0 +1,361 @@
1
+ require 'progress-bar'
2
+ require 'rbbt/persist'
3
+ require 'rbbt/tsv/util'
4
+
5
+ module TSV
6
+
7
+ attr_accessor :monitor
8
+
9
+ class Traverser
10
+ attr_accessor :new_key_field, :new_fields, :new_key_field_name, :new_field_names, :type, :uniq
11
+
12
+ def process_null(key, values)
13
+ [[key], values]
14
+ end
15
+
16
+ def process_subset_list(key, values)
17
+ [key, @new_fields.collect{|field| field == :key ? key : values[field] }]
18
+ end
19
+
20
+ def process_subset_all_but_list(key, values)
21
+ new = values.dup
22
+ new.delete_at(0 - @new_fields)
23
+ [key, new]
24
+ end
25
+
26
+ def process_reorder_list(key, values)
27
+ [ [values[@new_key_field]],
28
+ @new_fields.collect{|field| field == :key ? key : values[field] }]
29
+ end
30
+
31
+ def process_reorder_double_uniq(key, values)
32
+ [ values[@new_key_field].uniq,
33
+ @new_fields.collect{|field| field == :key ?
34
+ [key] : values[field] }
35
+ ]
36
+ end
37
+
38
+ def process_subset_double(key, values)
39
+ [[key], @new_fields.collect{|field| field == :key ? [key] : values[field] }]
40
+ end
41
+
42
+ def process_subset_all_but_double(key, values)
43
+ new = values.dup
44
+ new.delete_at(0 - @new_fields)
45
+ [[key], new]
46
+ end
47
+
48
+ def process_reorder_double(key, values)
49
+ [ values[@new_key_field],
50
+ @new_fields.collect{|field| field == :key ?
51
+ [key] : values[field] }
52
+ ]
53
+ end
54
+
55
+ def initialize(key_field, fields, new_key_field, new_fields, type, uniq)
56
+ @new_key_field = TSV.identify_field(key_field, fields, new_key_field)
57
+
58
+ raise "Key field #{ new_key_field } not found" if @new_key_field.nil?
59
+ @new_fields = case
60
+ when new_fields.nil?
61
+ case
62
+ when @new_key_field == :key
63
+ :all
64
+ when fields.nil?
65
+ - @new_key_field
66
+ else
67
+ new = (0..fields.length - 1).to_a
68
+ new.delete_at(@new_key_field)
69
+ new.unshift :key
70
+ new
71
+ end
72
+ else
73
+ if Array === new_fields
74
+ new_fields.collect do |field|
75
+ TSV.identify_field(key_field, fields, field)
76
+ end
77
+ else
78
+ [TSV.identify_field(key_field, fields, new_fields)]
79
+ end
80
+ end
81
+
82
+ @new_key_field_name = case
83
+ when @new_key_field == :key
84
+ key_field
85
+ else
86
+ fields[@new_key_field] if Array === fields
87
+ end
88
+
89
+ if Array === fields
90
+ @new_field_names = case
91
+ when fields.nil?
92
+ nil
93
+ when Array === @new_fields
94
+ @new_field_names = @new_fields.collect do |field|
95
+ case
96
+ when field == :key
97
+ key_field
98
+ else
99
+ fields[field]
100
+ end
101
+ end
102
+ when @new_fields == :all
103
+ fields
104
+ when (Fixnum === @new_fields and @new_fields <= 0)
105
+ new = fields.dup
106
+ new.delete_at(- @new_fields)
107
+ new.unshift key_field
108
+ new
109
+ end
110
+ end
111
+
112
+ case
113
+ when (@new_key_field == :key and (@new_fields == :all or fields.nil? or @new_fields == (0..fields.length - 1).to_a))
114
+ self.instance_eval do alias process process_null end
115
+ when @new_key_field == :key
116
+ if type == :double
117
+ if Fixnum === @new_fields and @new_fields <= 0
118
+ self.instance_eval do alias process process_subset_all_but_double end
119
+ else
120
+ self.instance_eval do alias process process_subset_double end
121
+ end
122
+ else
123
+ if Fixnum === @new_fields and @new_fields <= 0
124
+ self.instance_eval do alias process process_subset_all_but_list end
125
+ else
126
+ self.instance_eval do alias process process_subset_list end
127
+ end
128
+ end
129
+ else
130
+ if type == :double
131
+ if uniq
132
+ self.instance_eval do alias process process_reorder_double_uniq end
133
+ else
134
+ self.instance_eval do alias process process_reorder_double end
135
+ end
136
+ else
137
+ self.instance_eval do alias process process_reorder_list end
138
+ end
139
+ end
140
+ end
141
+
142
+ end
143
+
144
+ #{{{ Methods
145
+
146
+ def through(new_key_field = nil, new_fields = nil, uniq = false)
147
+
148
+ traverser = Traverser.new @key_field, @fields, new_key_field, new_fields, type, uniq
149
+
150
+ if @monitor
151
+ desc = "Iterating TSV"
152
+ step = 100
153
+ if Hash === @monitor
154
+ desc = @monitor[:desc] if @monitor.include? :desc
155
+ step = @monitor[:step] if @monitor.include? :step
156
+ end
157
+ progress_monitor = Progress::Bar.new(size, 0, step, desc)
158
+ else
159
+ progress_monitor = nil
160
+ end
161
+
162
+ each do |key, value|
163
+ progress_monitor.tick if progress_monitor
164
+
165
+ keys, value = traverser.process(key, value)
166
+ next if keys.nil?
167
+ keys.each do |key|
168
+ NamedArray.setup value, traverser.new_field_names if Array === value and not @unnamed
169
+ yield key, value
170
+ end
171
+ end
172
+
173
+ [traverser.new_key_field_name, traverser.new_field_names]
174
+ end
175
+
176
+ def reorder(new_key_field = nil, new_fields = nil, persist = false)
177
+ Persist.persist_tsv self, self.filename, {:key_field => new_key_field, :fields => new_fields}, {:persist => persist, :persist_prefix => "Reorder:"} do |data|
178
+
179
+ with_unnamed do
180
+ new_key_field_name, new_field_names = through new_key_field, new_fields do |key, value|
181
+ if data.include?(key) and type == :double
182
+ data[key] = data[key].zip(value).collect do |old_list, new_list| old_list + new_list end
183
+ else
184
+ data[key] = value
185
+ end
186
+ end
187
+
188
+ data.extend TSV unless TSV === data
189
+ data.key_field = new_key_field_name
190
+ data.fields = new_field_names
191
+ data.filename = filename
192
+ data.type = type
193
+ end
194
+ end
195
+ end
196
+
197
+ def slice(fields)
198
+ reorder :key, fields
199
+ end
200
+
201
+ def sort(*fields)
202
+ fields = nil if fields.empty?
203
+
204
+ elems = []
205
+ through :key, fields do |key, value|
206
+ elems << case
207
+ when block_given?
208
+ [key, yield(*value)]
209
+ else
210
+ case
211
+ when type == :single
212
+ [key, value]
213
+ when type == :double
214
+ [key, value.first.first]
215
+ else
216
+ [key, value.first]
217
+ end
218
+ end
219
+ end
220
+
221
+ elems.sort_by{|k,v| v}.collect{|k,v| k}
222
+ end
223
+
224
+ def select(method = nil)
225
+ new = TSV.setup({}, :key_field => key_field, :fields => fields, :type => type, :filename => filename, :identifiers => identifiers)
226
+
227
+ new.key_field = key_field
228
+ new.fields = fields.dup
229
+ new.type = type
230
+ new.filename = filename
231
+
232
+ case
233
+ when (method.nil? and block_given?)
234
+ through do |key, values|
235
+ new[key] = values if yield key, values
236
+ end
237
+ when Array === method
238
+ with_unnamed do
239
+ through do |key, values|
240
+ new[key] = values if ([key,values].flatten & method).any?
241
+ end
242
+ end
243
+ when Regexp === method
244
+ with_unnamed do
245
+ through do |key, values|
246
+ new[key] = values if [key,values].flatten.select{|v| v =~ method}.any?
247
+ end
248
+ end
249
+ when String === method
250
+ if block_given?
251
+ pos = identify_field method
252
+ with_unnamed do
253
+ through do |key, values|
254
+ new[key] = values if yield((method == key_field or method == :key)? key : values[pos])
255
+ end
256
+ end
257
+ else
258
+ with_unnamed do
259
+ through do |key, values|
260
+ new[key] = values if [key,values].flatten.select{|v| v == method}.any?
261
+ end
262
+ end
263
+ end
264
+ when Hash === method
265
+ key = method.keys.first
266
+ method = method.values.first
267
+ case
268
+ when (Array === method and (key == :key or key_field == key))
269
+ with_unnamed do
270
+ method.each{|key|
271
+ new[key] = self[key] if self.include? key
272
+ }
273
+ end
274
+ when Array === method
275
+ with_unnamed do
276
+ through :key, key do |key, values|
277
+ values = [values] if type == :single
278
+ new[key] = self[key] if (values.flatten & method).any?
279
+ end
280
+ end
281
+ when Regexp === method
282
+ with_unnamed do
283
+ through :key, key do |key, values|
284
+ values = [values] if type == :single
285
+ new[key] = self[key] if values.flatten.select{|v| v =~ method}.any?
286
+ end
287
+ end
288
+ when String === method
289
+ with_unnamed do
290
+ through :key, key do |key, values|
291
+ values = [values] if type == :single
292
+ new[key] = self[key] if values.flatten.select{|v| v == method}.any?
293
+ end
294
+ end
295
+ when Proc === method
296
+ with_unnamed do
297
+ through :key, key do |key, values|
298
+ values = [values] if type == :single
299
+ new[key] = self[key] if values.flatten.select{|v| method.call(v)}.any?
300
+ end
301
+ end
302
+ end
303
+
304
+ end
305
+
306
+ new
307
+ end
308
+
309
+ def process(field, &block)
310
+ field_pos = identify_field field
311
+
312
+ through do |key, values|
313
+ next if values.nil?
314
+
315
+ case
316
+ when type == :single
317
+ field_values = values
318
+ when type == :flat
319
+ field_values = values
320
+ else
321
+ next if values[field_pos].nil?
322
+ field_values = values[field_pos]
323
+ end
324
+
325
+ new_values = case
326
+ when block.arity == 1
327
+ yield(field_values)
328
+ when block.arity == 2
329
+ yield(field_values, key)
330
+ when block.arity == 3
331
+ yield(field_values, key, values)
332
+ else
333
+ raise "Unexpected arity in block, must be 1, 2 or 3: #{block.arity}"
334
+ end
335
+
336
+ case
337
+ when type == :single
338
+ self[key] = new_values
339
+ when type == :flat
340
+ self[key] = new_values
341
+ else
342
+ values[field].replace new_values
343
+ self[key] = values
344
+ end
345
+ end
346
+ end
347
+
348
+ def add_field(name = nil)
349
+ through do |key, values|
350
+ new_values = yield(key, values)
351
+ new_values = [new_values] if type == :double and not Array === new_values
352
+
353
+ values << new_values
354
+ self[key] = values
355
+ end
356
+
357
+ self.fields = self.fields + [name] if fields != nil and name != nil
358
+
359
+ self
360
+ end
361
+ end
@@ -0,0 +1,231 @@
1
+ require 'rbbt/util/cmd'
2
+ module TSV
3
+ class Parser
4
+ attr_accessor :header_hash, :sep, :sep2, :type, :key_position, :field_positions, :cast, :key_field, :fields, :fix, :select, :serializer, :straight
5
+
6
+ class SKIP_LINE < Exception; end
7
+
8
+ def all_fields
9
+ all = [key_field] + fields
10
+ NamedArray.setup all, all
11
+ end
12
+
13
+ def parse_header(stream)
14
+ options = {}
15
+
16
+ # Get line
17
+
18
+ line = stream.gets
19
+ raise "Empty content" if line.nil?
20
+ line.chomp!
21
+
22
+ # Process options line
23
+
24
+ if line and line =~ /^#{@header_hash}: (.*)/
25
+ options = Misc.string2hash $1
26
+ line = stream.gets
27
+ end
28
+
29
+ # Determine separator
30
+
31
+ @sep = options[:sep] if options[:sep]
32
+
33
+ # Process fields line
34
+
35
+ if line and line =~ /^#{@header_hash}/
36
+ line.chomp!
37
+ @fields = line.split(@sep)
38
+ @key_field = @fields.shift
39
+ @key_field = @key_field[(0 + header_hash.length)..-1] # Remove initial hash character
40
+ line = stream.gets
41
+ end
42
+
43
+ @first_line = line
44
+
45
+ options
46
+ end
47
+
48
+ def process(line)
49
+ l = line.chomp
50
+ raise Parser::SKIP_LINE if Proc === @select and not @select.call l
51
+ l = @fix.call l if Proc === @fix
52
+ l
53
+ end
54
+
55
+ def cast?
56
+ !! @cast
57
+ end
58
+
59
+ def chop_line(line)
60
+ line.split(@sep, -1)
61
+ end
62
+
63
+ def get_values_single(parts)
64
+ return parts.shift, parts.first if field_positions.nil?
65
+ key = parts[key_position]
66
+ value = parts[field_positions.first]
67
+ [key, value]
68
+ end
69
+
70
+ def get_values_list(parts)
71
+ return parts.shift, parts if field_positions.nil?
72
+ key = parts[key_position]
73
+ values = parts.values_at *field_positions
74
+ [key, values]
75
+ end
76
+
77
+ def get_values_double(parts)
78
+ return parts.shift.split(@sep2, -1), parts.collect{|value| value.split(@sep2, -1)} if field_positions.nil?
79
+ keys = parts[key_position].split(@sep2, -1)
80
+ values = parts.values_at(*field_positions).collect{|value| value.split(@sep2, -1)}
81
+ [keys, values]
82
+ end
83
+
84
+ def add_to_data_no_merge_list(data, key, values)
85
+ data[key] = values unless data.include? key
86
+ end
87
+
88
+ def add_to_data_no_merge_double(data, keys, values)
89
+ keys.each do |key|
90
+ data[key] = values unless data.include? key
91
+ end
92
+ end
93
+
94
+ def add_to_data_merge(data, keys, values)
95
+ keys.each do |key|
96
+ if data.include? key
97
+ data[key] = data[key].zip(values).collect do |old, new|
98
+ old.concat new
99
+ old
100
+ end
101
+ else
102
+ data[key] = values
103
+ end
104
+ end
105
+ end
106
+
107
+ def cast_values_single(value)
108
+ case
109
+ when Symbol === cast
110
+ value.send(cast)
111
+ when Proc === cast
112
+ cast.call value
113
+ end
114
+ end
115
+
116
+ def cast_values_list(values)
117
+ case
118
+ when Symbol === cast
119
+ values.collect{|v| v.send(cast)}
120
+ when Proc === cast
121
+ values.collect{|v| cast.call v}
122
+ end
123
+ end
124
+
125
+ def cast_values_double(values)
126
+ case
127
+ when Symbol === cast
128
+ values.collect{|list| list.collect{|v| v.send(cast)}}
129
+ when Proc === cast
130
+ values.collect{|list| list.collect{|v| cast.call v }}
131
+ end
132
+ end
133
+
134
+ def rescue_first_line
135
+ @first_line
136
+ end
137
+
138
+ def fix_fields(options)
139
+ key_field = Misc.process_options options, :key_field
140
+ fields = Misc.process_options options, :fields
141
+
142
+ if (key_field.nil? or key_field == 0 or key_field == :key) and
143
+ (fields.nil? or fields == @fields or (not @fields.nil? and fields == (1..@fields.length).to_a))
144
+
145
+ @straight = true
146
+ return
147
+ else
148
+ @straight = false
149
+
150
+ case
151
+ when (key_field.nil? or key_field == @key_field or key_field == 0)
152
+ @key_position = 0
153
+ when Integer === key_field
154
+ @key_position = key_field
155
+ when String === key_field
156
+ @key_position = @fields.dup.unshift(@key_field).index key_field
157
+ else
158
+ raise "Format of key_field not understood: #{key_field.inspect}"
159
+ end
160
+
161
+ if (fields.nil? or fields == @fields or (not @fields.nil? and fields == (1..@fields.length).to_a))
162
+ @field_positions = (0..@fields.length).to_a
163
+ @field_positions.delete @key_position
164
+ else
165
+ fields = [fields] if not Array === fields
166
+ @field_positions = fields.collect{|field|
167
+ case
168
+ when Integer === field
169
+ field
170
+ when String === field
171
+ @fields.dup.unshift(@key_field).index field
172
+ else
173
+ raise "Format of fields not understood: #{fields.inspect}"
174
+ end
175
+ }
176
+ end
177
+
178
+ new_key_field = @fields.dup.unshift(@key_field)[@key_position] if not @fields.nil?
179
+ @fields = @fields.dup.unshift(@key_field).values_at *@field_positions if not @fields.nil?
180
+ @key_field = new_key_field
181
+ end
182
+ end
183
+
184
+ def initialize(stream = nil, options = {})
185
+ @header_hash = Misc.process_options(options, :header_hash) || "#"
186
+ @sep = Misc.process_options(options, :sep) || "\t"
187
+
188
+ options = parse_header(stream).merge options
189
+
190
+ @type = Misc.process_options(options, :type) || :double
191
+ merge = Misc.process_options(options, :merge) || false
192
+
193
+ @sep2 = Misc.process_options(options, :sep2) || "|"
194
+ @cast = Misc.process_options options, :cast
195
+ @type ||= Misc.process_options options, :type
196
+ @fix = Misc.process_options(options, :fix)
197
+ @select= Misc.process_options options, :select
198
+
199
+ if @type == :double
200
+ self.instance_eval do alias get_values get_values_double end
201
+ self.instance_eval do alias cast_values cast_values_double end
202
+ if merge
203
+ self.instance_eval do alias add_to_data add_to_data_merge end
204
+ else
205
+ self.instance_eval do alias add_to_data add_to_data_no_merge_double end
206
+ end
207
+ else
208
+ if @type == :single
209
+ self.instance_eval do alias get_values get_values_single end
210
+ self.instance_eval do alias cast_values cast_values_single end
211
+ else
212
+ self.instance_eval do alias get_values get_values_list end
213
+ self.instance_eval do alias cast_values cast_values_list end
214
+ end
215
+ self.instance_eval do alias add_to_data add_to_data_no_merge_list end
216
+ end
217
+
218
+ fix_fields(options)
219
+
220
+ @straight = false if @sep != "\t" or not @cast.nil? or merge
221
+ end
222
+
223
+ def setup(data)
224
+ data.extend TSV unless TSV === data
225
+ data.type = @type
226
+ data.key_field = @key_field
227
+ data.fields = @fields
228
+ data
229
+ end
230
+ end
231
+ end