rbbt-util 3.2.1 → 4.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (85) hide show
  1. data/README.rdoc +65 -0
  2. data/bin/run_workflow.rb +142 -69
  3. data/lib/rbbt-util.rb +3 -3
  4. data/lib/rbbt.rb +12 -3
  5. data/lib/rbbt/annotations.rb +215 -0
  6. data/lib/rbbt/{util/fix_width_table.rb → fix_width_table.rb} +17 -13
  7. data/lib/rbbt/persist.rb +164 -0
  8. data/lib/rbbt/persist/tsv.rb +135 -0
  9. data/lib/rbbt/resource.rb +100 -0
  10. data/lib/rbbt/resource/path.rb +180 -0
  11. data/lib/rbbt/resource/rake.rb +48 -0
  12. data/lib/rbbt/resource/util.rb +111 -0
  13. data/lib/rbbt/resource/with_key.rb +28 -0
  14. data/lib/rbbt/tsv.rb +134 -0
  15. data/lib/rbbt/tsv/accessor.rb +345 -0
  16. data/lib/rbbt/tsv/attach.rb +183 -0
  17. data/lib/rbbt/tsv/attach/util.rb +277 -0
  18. data/lib/rbbt/{util/tsv/filters.rb → tsv/filter.rb} +76 -37
  19. data/lib/rbbt/tsv/index.rb +453 -0
  20. data/lib/rbbt/tsv/manipulate.rb +361 -0
  21. data/lib/rbbt/tsv/parser.rb +231 -0
  22. data/lib/rbbt/tsv/serializers.rb +79 -0
  23. data/lib/rbbt/tsv/util.rb +67 -0
  24. data/lib/rbbt/util/R.rb +3 -3
  25. data/lib/rbbt/util/chain_methods.rb +64 -0
  26. data/lib/rbbt/util/cmd.rb +17 -13
  27. data/lib/rbbt/util/excel2tsv.rb +4 -3
  28. data/lib/rbbt/util/log.rb +1 -0
  29. data/lib/rbbt/util/misc.rb +296 -285
  30. data/lib/rbbt/util/open.rb +9 -2
  31. data/lib/rbbt/util/persistence.rb +1 -1
  32. data/lib/rbbt/util/task/job.rb +3 -1
  33. data/lib/rbbt/workflow.rb +193 -0
  34. data/lib/rbbt/workflow/accessor.rb +249 -0
  35. data/lib/rbbt/workflow/annotate.rb +60 -0
  36. data/lib/rbbt/workflow/soap.rb +100 -0
  37. data/lib/rbbt/workflow/step.rb +102 -0
  38. data/lib/rbbt/workflow/task.rb +76 -0
  39. data/test/rbbt/resource/test_path.rb +12 -0
  40. data/test/rbbt/test_annotations.rb +106 -0
  41. data/test/rbbt/{util/test_fix_width_table.rb → test_fix_width_table.rb} +8 -9
  42. data/test/rbbt/test_resource.rb +66 -0
  43. data/test/rbbt/test_tsv.rb +332 -0
  44. data/test/rbbt/test_workflow.rb +102 -0
  45. data/test/rbbt/tsv/test_accessor.rb +163 -0
  46. data/test/rbbt/{util/tsv → tsv}/test_attach.rb +86 -43
  47. data/test/rbbt/{util/tsv/test_filters.rb → tsv/test_filter.rb} +31 -13
  48. data/test/rbbt/tsv/test_index.rb +284 -0
  49. data/test/rbbt/{util/tsv → tsv}/test_manipulate.rb +35 -105
  50. data/test/rbbt/util/test_R.rb +1 -1
  51. data/test/rbbt/util/test_chain_methods.rb +22 -0
  52. data/test/rbbt/util/test_filecache.rb +0 -1
  53. data/test/rbbt/util/test_misc.rb +97 -79
  54. data/test/rbbt/util/test_open.rb +1 -0
  55. data/test/rbbt/util/test_tmpfile.rb +1 -1
  56. data/test/rbbt/workflow/test_soap.rb +103 -0
  57. data/test/rbbt/workflow/test_step.rb +142 -0
  58. data/test/rbbt/workflow/test_task.rb +84 -0
  59. data/test/test_helper.rb +7 -7
  60. metadata +80 -54
  61. data/lib/rbbt/util/rake.rb +0 -176
  62. data/lib/rbbt/util/resource.rb +0 -355
  63. data/lib/rbbt/util/task.rb +0 -183
  64. data/lib/rbbt/util/tc_hash.rb +0 -324
  65. data/lib/rbbt/util/tsv.rb +0 -236
  66. data/lib/rbbt/util/tsv/accessor.rb +0 -312
  67. data/lib/rbbt/util/tsv/attach.rb +0 -416
  68. data/lib/rbbt/util/tsv/index.rb +0 -419
  69. data/lib/rbbt/util/tsv/manipulate.rb +0 -300
  70. data/lib/rbbt/util/tsv/misc.rb +0 -41
  71. data/lib/rbbt/util/tsv/parse.rb +0 -324
  72. data/lib/rbbt/util/tsv/resource.rb +0 -88
  73. data/lib/rbbt/util/workflow.rb +0 -135
  74. data/lib/rbbt/util/workflow/soap.rb +0 -116
  75. data/test/rbbt/util/test_persistence.rb +0 -201
  76. data/test/rbbt/util/test_rake.rb +0 -54
  77. data/test/rbbt/util/test_resource.rb +0 -77
  78. data/test/rbbt/util/test_task.rb +0 -133
  79. data/test/rbbt/util/test_tc_hash.rb +0 -144
  80. data/test/rbbt/util/test_tsv.rb +0 -221
  81. data/test/rbbt/util/test_workflow.rb +0 -135
  82. data/test/rbbt/util/tsv/test_accessor.rb +0 -150
  83. data/test/rbbt/util/tsv/test_index.rb +0 -241
  84. data/test/rbbt/util/tsv/test_parse.rb +0 -87
  85. data/test/rbbt/util/tsv/test_resource.rb +0 -9
@@ -0,0 +1,361 @@
1
+ require 'progress-bar'
2
+ require 'rbbt/persist'
3
+ require 'rbbt/tsv/util'
4
+
5
+ module TSV
6
+
7
+ attr_accessor :monitor
8
+
9
+ class Traverser
10
+ attr_accessor :new_key_field, :new_fields, :new_key_field_name, :new_field_names, :type, :uniq
11
+
12
+ def process_null(key, values)
13
+ [[key], values]
14
+ end
15
+
16
+ def process_subset_list(key, values)
17
+ [key, @new_fields.collect{|field| field == :key ? key : values[field] }]
18
+ end
19
+
20
+ def process_subset_all_but_list(key, values)
21
+ new = values.dup
22
+ new.delete_at(0 - @new_fields)
23
+ [key, new]
24
+ end
25
+
26
+ def process_reorder_list(key, values)
27
+ [ [values[@new_key_field]],
28
+ @new_fields.collect{|field| field == :key ? key : values[field] }]
29
+ end
30
+
31
+ def process_reorder_double_uniq(key, values)
32
+ [ values[@new_key_field].uniq,
33
+ @new_fields.collect{|field| field == :key ?
34
+ [key] : values[field] }
35
+ ]
36
+ end
37
+
38
+ def process_subset_double(key, values)
39
+ [[key], @new_fields.collect{|field| field == :key ? [key] : values[field] }]
40
+ end
41
+
42
+ def process_subset_all_but_double(key, values)
43
+ new = values.dup
44
+ new.delete_at(0 - @new_fields)
45
+ [[key], new]
46
+ end
47
+
48
+ def process_reorder_double(key, values)
49
+ [ values[@new_key_field],
50
+ @new_fields.collect{|field| field == :key ?
51
+ [key] : values[field] }
52
+ ]
53
+ end
54
+
55
+ def initialize(key_field, fields, new_key_field, new_fields, type, uniq)
56
+ @new_key_field = TSV.identify_field(key_field, fields, new_key_field)
57
+
58
+ raise "Key field #{ new_key_field } not found" if @new_key_field.nil?
59
+ @new_fields = case
60
+ when new_fields.nil?
61
+ case
62
+ when @new_key_field == :key
63
+ :all
64
+ when fields.nil?
65
+ - @new_key_field
66
+ else
67
+ new = (0..fields.length - 1).to_a
68
+ new.delete_at(@new_key_field)
69
+ new.unshift :key
70
+ new
71
+ end
72
+ else
73
+ if Array === new_fields
74
+ new_fields.collect do |field|
75
+ TSV.identify_field(key_field, fields, field)
76
+ end
77
+ else
78
+ [TSV.identify_field(key_field, fields, new_fields)]
79
+ end
80
+ end
81
+
82
+ @new_key_field_name = case
83
+ when @new_key_field == :key
84
+ key_field
85
+ else
86
+ fields[@new_key_field] if Array === fields
87
+ end
88
+
89
+ if Array === fields
90
+ @new_field_names = case
91
+ when fields.nil?
92
+ nil
93
+ when Array === @new_fields
94
+ @new_field_names = @new_fields.collect do |field|
95
+ case
96
+ when field == :key
97
+ key_field
98
+ else
99
+ fields[field]
100
+ end
101
+ end
102
+ when @new_fields == :all
103
+ fields
104
+ when (Fixnum === @new_fields and @new_fields <= 0)
105
+ new = fields.dup
106
+ new.delete_at(- @new_fields)
107
+ new.unshift key_field
108
+ new
109
+ end
110
+ end
111
+
112
+ case
113
+ when (@new_key_field == :key and (@new_fields == :all or fields.nil? or @new_fields == (0..fields.length - 1).to_a))
114
+ self.instance_eval do alias process process_null end
115
+ when @new_key_field == :key
116
+ if type == :double
117
+ if Fixnum === @new_fields and @new_fields <= 0
118
+ self.instance_eval do alias process process_subset_all_but_double end
119
+ else
120
+ self.instance_eval do alias process process_subset_double end
121
+ end
122
+ else
123
+ if Fixnum === @new_fields and @new_fields <= 0
124
+ self.instance_eval do alias process process_subset_all_but_list end
125
+ else
126
+ self.instance_eval do alias process process_subset_list end
127
+ end
128
+ end
129
+ else
130
+ if type == :double
131
+ if uniq
132
+ self.instance_eval do alias process process_reorder_double_uniq end
133
+ else
134
+ self.instance_eval do alias process process_reorder_double end
135
+ end
136
+ else
137
+ self.instance_eval do alias process process_reorder_list end
138
+ end
139
+ end
140
+ end
141
+
142
+ end
143
+
144
+ #{{{ Methods
145
+
146
+ def through(new_key_field = nil, new_fields = nil, uniq = false)
147
+
148
+ traverser = Traverser.new @key_field, @fields, new_key_field, new_fields, type, uniq
149
+
150
+ if @monitor
151
+ desc = "Iterating TSV"
152
+ step = 100
153
+ if Hash === @monitor
154
+ desc = @monitor[:desc] if @monitor.include? :desc
155
+ step = @monitor[:step] if @monitor.include? :step
156
+ end
157
+ progress_monitor = Progress::Bar.new(size, 0, step, desc)
158
+ else
159
+ progress_monitor = nil
160
+ end
161
+
162
+ each do |key, value|
163
+ progress_monitor.tick if progress_monitor
164
+
165
+ keys, value = traverser.process(key, value)
166
+ next if keys.nil?
167
+ keys.each do |key|
168
+ NamedArray.setup value, traverser.new_field_names if Array === value and not @unnamed
169
+ yield key, value
170
+ end
171
+ end
172
+
173
+ [traverser.new_key_field_name, traverser.new_field_names]
174
+ end
175
+
176
+ def reorder(new_key_field = nil, new_fields = nil, persist = false)
177
+ Persist.persist_tsv self, self.filename, {:key_field => new_key_field, :fields => new_fields}, {:persist => persist, :persist_prefix => "Reorder:"} do |data|
178
+
179
+ with_unnamed do
180
+ new_key_field_name, new_field_names = through new_key_field, new_fields do |key, value|
181
+ if data.include?(key) and type == :double
182
+ data[key] = data[key].zip(value).collect do |old_list, new_list| old_list + new_list end
183
+ else
184
+ data[key] = value
185
+ end
186
+ end
187
+
188
+ data.extend TSV unless TSV === data
189
+ data.key_field = new_key_field_name
190
+ data.fields = new_field_names
191
+ data.filename = filename
192
+ data.type = type
193
+ end
194
+ end
195
+ end
196
+
197
+ def slice(fields)
198
+ reorder :key, fields
199
+ end
200
+
201
+ def sort(*fields)
202
+ fields = nil if fields.empty?
203
+
204
+ elems = []
205
+ through :key, fields do |key, value|
206
+ elems << case
207
+ when block_given?
208
+ [key, yield(*value)]
209
+ else
210
+ case
211
+ when type == :single
212
+ [key, value]
213
+ when type == :double
214
+ [key, value.first.first]
215
+ else
216
+ [key, value.first]
217
+ end
218
+ end
219
+ end
220
+
221
+ elems.sort_by{|k,v| v}.collect{|k,v| k}
222
+ end
223
+
224
+ def select(method = nil)
225
+ new = TSV.setup({}, :key_field => key_field, :fields => fields, :type => type, :filename => filename, :identifiers => identifiers)
226
+
227
+ new.key_field = key_field
228
+ new.fields = fields.dup
229
+ new.type = type
230
+ new.filename = filename
231
+
232
+ case
233
+ when (method.nil? and block_given?)
234
+ through do |key, values|
235
+ new[key] = values if yield key, values
236
+ end
237
+ when Array === method
238
+ with_unnamed do
239
+ through do |key, values|
240
+ new[key] = values if ([key,values].flatten & method).any?
241
+ end
242
+ end
243
+ when Regexp === method
244
+ with_unnamed do
245
+ through do |key, values|
246
+ new[key] = values if [key,values].flatten.select{|v| v =~ method}.any?
247
+ end
248
+ end
249
+ when String === method
250
+ if block_given?
251
+ pos = identify_field method
252
+ with_unnamed do
253
+ through do |key, values|
254
+ new[key] = values if yield((method == key_field or method == :key)? key : values[pos])
255
+ end
256
+ end
257
+ else
258
+ with_unnamed do
259
+ through do |key, values|
260
+ new[key] = values if [key,values].flatten.select{|v| v == method}.any?
261
+ end
262
+ end
263
+ end
264
+ when Hash === method
265
+ key = method.keys.first
266
+ method = method.values.first
267
+ case
268
+ when (Array === method and (key == :key or key_field == key))
269
+ with_unnamed do
270
+ method.each{|key|
271
+ new[key] = self[key] if self.include? key
272
+ }
273
+ end
274
+ when Array === method
275
+ with_unnamed do
276
+ through :key, key do |key, values|
277
+ values = [values] if type == :single
278
+ new[key] = self[key] if (values.flatten & method).any?
279
+ end
280
+ end
281
+ when Regexp === method
282
+ with_unnamed do
283
+ through :key, key do |key, values|
284
+ values = [values] if type == :single
285
+ new[key] = self[key] if values.flatten.select{|v| v =~ method}.any?
286
+ end
287
+ end
288
+ when String === method
289
+ with_unnamed do
290
+ through :key, key do |key, values|
291
+ values = [values] if type == :single
292
+ new[key] = self[key] if values.flatten.select{|v| v == method}.any?
293
+ end
294
+ end
295
+ when Proc === method
296
+ with_unnamed do
297
+ through :key, key do |key, values|
298
+ values = [values] if type == :single
299
+ new[key] = self[key] if values.flatten.select{|v| method.call(v)}.any?
300
+ end
301
+ end
302
+ end
303
+
304
+ end
305
+
306
+ new
307
+ end
308
+
309
+ def process(field, &block)
310
+ field_pos = identify_field field
311
+
312
+ through do |key, values|
313
+ next if values.nil?
314
+
315
+ case
316
+ when type == :single
317
+ field_values = values
318
+ when type == :flat
319
+ field_values = values
320
+ else
321
+ next if values[field_pos].nil?
322
+ field_values = values[field_pos]
323
+ end
324
+
325
+ new_values = case
326
+ when block.arity == 1
327
+ yield(field_values)
328
+ when block.arity == 2
329
+ yield(field_values, key)
330
+ when block.arity == 3
331
+ yield(field_values, key, values)
332
+ else
333
+ raise "Unexpected arity in block, must be 1, 2 or 3: #{block.arity}"
334
+ end
335
+
336
+ case
337
+ when type == :single
338
+ self[key] = new_values
339
+ when type == :flat
340
+ self[key] = new_values
341
+ else
342
+ values[field].replace new_values
343
+ self[key] = values
344
+ end
345
+ end
346
+ end
347
+
348
+ def add_field(name = nil)
349
+ through do |key, values|
350
+ new_values = yield(key, values)
351
+ new_values = [new_values] if type == :double and not Array === new_values
352
+
353
+ values << new_values
354
+ self[key] = values
355
+ end
356
+
357
+ self.fields = self.fields + [name] if fields != nil and name != nil
358
+
359
+ self
360
+ end
361
+ end
@@ -0,0 +1,231 @@
1
+ require 'rbbt/util/cmd'
2
+ module TSV
3
+ class Parser
4
+ attr_accessor :header_hash, :sep, :sep2, :type, :key_position, :field_positions, :cast, :key_field, :fields, :fix, :select, :serializer, :straight
5
+
6
+ class SKIP_LINE < Exception; end
7
+
8
+ def all_fields
9
+ all = [key_field] + fields
10
+ NamedArray.setup all, all
11
+ end
12
+
13
+ def parse_header(stream)
14
+ options = {}
15
+
16
+ # Get line
17
+
18
+ line = stream.gets
19
+ raise "Empty content" if line.nil?
20
+ line.chomp!
21
+
22
+ # Process options line
23
+
24
+ if line and line =~ /^#{@header_hash}: (.*)/
25
+ options = Misc.string2hash $1
26
+ line = stream.gets
27
+ end
28
+
29
+ # Determine separator
30
+
31
+ @sep = options[:sep] if options[:sep]
32
+
33
+ # Process fields line
34
+
35
+ if line and line =~ /^#{@header_hash}/
36
+ line.chomp!
37
+ @fields = line.split(@sep)
38
+ @key_field = @fields.shift
39
+ @key_field = @key_field[(0 + header_hash.length)..-1] # Remove initial hash character
40
+ line = stream.gets
41
+ end
42
+
43
+ @first_line = line
44
+
45
+ options
46
+ end
47
+
48
+ def process(line)
49
+ l = line.chomp
50
+ raise Parser::SKIP_LINE if Proc === @select and not @select.call l
51
+ l = @fix.call l if Proc === @fix
52
+ l
53
+ end
54
+
55
+ def cast?
56
+ !! @cast
57
+ end
58
+
59
+ def chop_line(line)
60
+ line.split(@sep, -1)
61
+ end
62
+
63
+ def get_values_single(parts)
64
+ return parts.shift, parts.first if field_positions.nil?
65
+ key = parts[key_position]
66
+ value = parts[field_positions.first]
67
+ [key, value]
68
+ end
69
+
70
+ def get_values_list(parts)
71
+ return parts.shift, parts if field_positions.nil?
72
+ key = parts[key_position]
73
+ values = parts.values_at *field_positions
74
+ [key, values]
75
+ end
76
+
77
+ def get_values_double(parts)
78
+ return parts.shift.split(@sep2, -1), parts.collect{|value| value.split(@sep2, -1)} if field_positions.nil?
79
+ keys = parts[key_position].split(@sep2, -1)
80
+ values = parts.values_at(*field_positions).collect{|value| value.split(@sep2, -1)}
81
+ [keys, values]
82
+ end
83
+
84
+ def add_to_data_no_merge_list(data, key, values)
85
+ data[key] = values unless data.include? key
86
+ end
87
+
88
+ def add_to_data_no_merge_double(data, keys, values)
89
+ keys.each do |key|
90
+ data[key] = values unless data.include? key
91
+ end
92
+ end
93
+
94
+ def add_to_data_merge(data, keys, values)
95
+ keys.each do |key|
96
+ if data.include? key
97
+ data[key] = data[key].zip(values).collect do |old, new|
98
+ old.concat new
99
+ old
100
+ end
101
+ else
102
+ data[key] = values
103
+ end
104
+ end
105
+ end
106
+
107
+ def cast_values_single(value)
108
+ case
109
+ when Symbol === cast
110
+ value.send(cast)
111
+ when Proc === cast
112
+ cast.call value
113
+ end
114
+ end
115
+
116
+ def cast_values_list(values)
117
+ case
118
+ when Symbol === cast
119
+ values.collect{|v| v.send(cast)}
120
+ when Proc === cast
121
+ values.collect{|v| cast.call v}
122
+ end
123
+ end
124
+
125
+ def cast_values_double(values)
126
+ case
127
+ when Symbol === cast
128
+ values.collect{|list| list.collect{|v| v.send(cast)}}
129
+ when Proc === cast
130
+ values.collect{|list| list.collect{|v| cast.call v }}
131
+ end
132
+ end
133
+
134
+ def rescue_first_line
135
+ @first_line
136
+ end
137
+
138
+ def fix_fields(options)
139
+ key_field = Misc.process_options options, :key_field
140
+ fields = Misc.process_options options, :fields
141
+
142
+ if (key_field.nil? or key_field == 0 or key_field == :key) and
143
+ (fields.nil? or fields == @fields or (not @fields.nil? and fields == (1..@fields.length).to_a))
144
+
145
+ @straight = true
146
+ return
147
+ else
148
+ @straight = false
149
+
150
+ case
151
+ when (key_field.nil? or key_field == @key_field or key_field == 0)
152
+ @key_position = 0
153
+ when Integer === key_field
154
+ @key_position = key_field
155
+ when String === key_field
156
+ @key_position = @fields.dup.unshift(@key_field).index key_field
157
+ else
158
+ raise "Format of key_field not understood: #{key_field.inspect}"
159
+ end
160
+
161
+ if (fields.nil? or fields == @fields or (not @fields.nil? and fields == (1..@fields.length).to_a))
162
+ @field_positions = (0..@fields.length).to_a
163
+ @field_positions.delete @key_position
164
+ else
165
+ fields = [fields] if not Array === fields
166
+ @field_positions = fields.collect{|field|
167
+ case
168
+ when Integer === field
169
+ field
170
+ when String === field
171
+ @fields.dup.unshift(@key_field).index field
172
+ else
173
+ raise "Format of fields not understood: #{fields.inspect}"
174
+ end
175
+ }
176
+ end
177
+
178
+ new_key_field = @fields.dup.unshift(@key_field)[@key_position] if not @fields.nil?
179
+ @fields = @fields.dup.unshift(@key_field).values_at *@field_positions if not @fields.nil?
180
+ @key_field = new_key_field
181
+ end
182
+ end
183
+
184
+ def initialize(stream = nil, options = {})
185
+ @header_hash = Misc.process_options(options, :header_hash) || "#"
186
+ @sep = Misc.process_options(options, :sep) || "\t"
187
+
188
+ options = parse_header(stream).merge options
189
+
190
+ @type = Misc.process_options(options, :type) || :double
191
+ merge = Misc.process_options(options, :merge) || false
192
+
193
+ @sep2 = Misc.process_options(options, :sep2) || "|"
194
+ @cast = Misc.process_options options, :cast
195
+ @type ||= Misc.process_options options, :type
196
+ @fix = Misc.process_options(options, :fix)
197
+ @select= Misc.process_options options, :select
198
+
199
+ if @type == :double
200
+ self.instance_eval do alias get_values get_values_double end
201
+ self.instance_eval do alias cast_values cast_values_double end
202
+ if merge
203
+ self.instance_eval do alias add_to_data add_to_data_merge end
204
+ else
205
+ self.instance_eval do alias add_to_data add_to_data_no_merge_double end
206
+ end
207
+ else
208
+ if @type == :single
209
+ self.instance_eval do alias get_values get_values_single end
210
+ self.instance_eval do alias cast_values cast_values_single end
211
+ else
212
+ self.instance_eval do alias get_values get_values_list end
213
+ self.instance_eval do alias cast_values cast_values_list end
214
+ end
215
+ self.instance_eval do alias add_to_data add_to_data_no_merge_list end
216
+ end
217
+
218
+ fix_fields(options)
219
+
220
+ @straight = false if @sep != "\t" or not @cast.nil? or merge
221
+ end
222
+
223
+ def setup(data)
224
+ data.extend TSV unless TSV === data
225
+ data.type = @type
226
+ data.key_field = @key_field
227
+ data.fields = @fields
228
+ data
229
+ end
230
+ end
231
+ end