rbbt-util 3.2.1 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. data/README.rdoc +65 -0
  2. data/bin/run_workflow.rb +142 -69
  3. data/lib/rbbt-util.rb +3 -3
  4. data/lib/rbbt.rb +12 -3
  5. data/lib/rbbt/annotations.rb +215 -0
  6. data/lib/rbbt/{util/fix_width_table.rb → fix_width_table.rb} +17 -13
  7. data/lib/rbbt/persist.rb +164 -0
  8. data/lib/rbbt/persist/tsv.rb +135 -0
  9. data/lib/rbbt/resource.rb +100 -0
  10. data/lib/rbbt/resource/path.rb +180 -0
  11. data/lib/rbbt/resource/rake.rb +48 -0
  12. data/lib/rbbt/resource/util.rb +111 -0
  13. data/lib/rbbt/resource/with_key.rb +28 -0
  14. data/lib/rbbt/tsv.rb +134 -0
  15. data/lib/rbbt/tsv/accessor.rb +345 -0
  16. data/lib/rbbt/tsv/attach.rb +183 -0
  17. data/lib/rbbt/tsv/attach/util.rb +277 -0
  18. data/lib/rbbt/{util/tsv/filters.rb → tsv/filter.rb} +76 -37
  19. data/lib/rbbt/tsv/index.rb +453 -0
  20. data/lib/rbbt/tsv/manipulate.rb +361 -0
  21. data/lib/rbbt/tsv/parser.rb +231 -0
  22. data/lib/rbbt/tsv/serializers.rb +79 -0
  23. data/lib/rbbt/tsv/util.rb +67 -0
  24. data/lib/rbbt/util/R.rb +3 -3
  25. data/lib/rbbt/util/chain_methods.rb +64 -0
  26. data/lib/rbbt/util/cmd.rb +17 -13
  27. data/lib/rbbt/util/excel2tsv.rb +4 -3
  28. data/lib/rbbt/util/log.rb +1 -0
  29. data/lib/rbbt/util/misc.rb +296 -285
  30. data/lib/rbbt/util/open.rb +9 -2
  31. data/lib/rbbt/util/persistence.rb +1 -1
  32. data/lib/rbbt/util/task/job.rb +3 -1
  33. data/lib/rbbt/workflow.rb +193 -0
  34. data/lib/rbbt/workflow/accessor.rb +249 -0
  35. data/lib/rbbt/workflow/annotate.rb +60 -0
  36. data/lib/rbbt/workflow/soap.rb +100 -0
  37. data/lib/rbbt/workflow/step.rb +102 -0
  38. data/lib/rbbt/workflow/task.rb +76 -0
  39. data/test/rbbt/resource/test_path.rb +12 -0
  40. data/test/rbbt/test_annotations.rb +106 -0
  41. data/test/rbbt/{util/test_fix_width_table.rb → test_fix_width_table.rb} +8 -9
  42. data/test/rbbt/test_resource.rb +66 -0
  43. data/test/rbbt/test_tsv.rb +332 -0
  44. data/test/rbbt/test_workflow.rb +102 -0
  45. data/test/rbbt/tsv/test_accessor.rb +163 -0
  46. data/test/rbbt/{util/tsv → tsv}/test_attach.rb +86 -43
  47. data/test/rbbt/{util/tsv/test_filters.rb → tsv/test_filter.rb} +31 -13
  48. data/test/rbbt/tsv/test_index.rb +284 -0
  49. data/test/rbbt/{util/tsv → tsv}/test_manipulate.rb +35 -105
  50. data/test/rbbt/util/test_R.rb +1 -1
  51. data/test/rbbt/util/test_chain_methods.rb +22 -0
  52. data/test/rbbt/util/test_filecache.rb +0 -1
  53. data/test/rbbt/util/test_misc.rb +97 -79
  54. data/test/rbbt/util/test_open.rb +1 -0
  55. data/test/rbbt/util/test_tmpfile.rb +1 -1
  56. data/test/rbbt/workflow/test_soap.rb +103 -0
  57. data/test/rbbt/workflow/test_step.rb +142 -0
  58. data/test/rbbt/workflow/test_task.rb +84 -0
  59. data/test/test_helper.rb +7 -7
  60. metadata +80 -54
  61. data/lib/rbbt/util/rake.rb +0 -176
  62. data/lib/rbbt/util/resource.rb +0 -355
  63. data/lib/rbbt/util/task.rb +0 -183
  64. data/lib/rbbt/util/tc_hash.rb +0 -324
  65. data/lib/rbbt/util/tsv.rb +0 -236
  66. data/lib/rbbt/util/tsv/accessor.rb +0 -312
  67. data/lib/rbbt/util/tsv/attach.rb +0 -416
  68. data/lib/rbbt/util/tsv/index.rb +0 -419
  69. data/lib/rbbt/util/tsv/manipulate.rb +0 -300
  70. data/lib/rbbt/util/tsv/misc.rb +0 -41
  71. data/lib/rbbt/util/tsv/parse.rb +0 -324
  72. data/lib/rbbt/util/tsv/resource.rb +0 -88
  73. data/lib/rbbt/util/workflow.rb +0 -135
  74. data/lib/rbbt/util/workflow/soap.rb +0 -116
  75. data/test/rbbt/util/test_persistence.rb +0 -201
  76. data/test/rbbt/util/test_rake.rb +0 -54
  77. data/test/rbbt/util/test_resource.rb +0 -77
  78. data/test/rbbt/util/test_task.rb +0 -133
  79. data/test/rbbt/util/test_tc_hash.rb +0 -144
  80. data/test/rbbt/util/test_tsv.rb +0 -221
  81. data/test/rbbt/util/test_workflow.rb +0 -135
  82. data/test/rbbt/util/tsv/test_accessor.rb +0 -150
  83. data/test/rbbt/util/tsv/test_index.rb +0 -241
  84. data/test/rbbt/util/tsv/test_parse.rb +0 -87
  85. data/test/rbbt/util/tsv/test_resource.rb +0 -9
@@ -0,0 +1,453 @@
1
+ require 'rbbt/util/misc'
2
+ require 'rbbt/fix_width_table'
3
+ require 'rbbt/tsv/manipulate'
4
+ require 'rbbt/tsv/filter'
5
+ require 'rbbt/persist/tsv'
6
+ require 'rbbt/persist'
7
+
8
+ module TSV
9
+
10
+ def index(options = {})
11
+ options = Misc.add_defaults options,
12
+ :target => :key, :fields => nil, :type => :single, :order => false
13
+
14
+ persist_options = Misc.pull_keys options, :persist
15
+ persist_options[:prefix] ||= "Index[#{options[:target] || :key}]"
16
+
17
+ Log.debug "Index: #{ filename } - #{options.inspect}"
18
+ Persist.persist_tsv self, filename, options, persist_options do |new|
19
+ with_unnamed do
20
+ target, fields, index_type, order = Misc.process_options options, :target, :fields, :type, :order
21
+
22
+ new.serializer = index_type if new.respond_to? :serializer and new.serializer == :type
23
+
24
+ if order
25
+
26
+ # Maybe best to do the stuff in memory first instead of the original
27
+ # object, which could be persisted
28
+ save = new
29
+ new = {}
30
+
31
+ new_key_field, new_fields = through target, fields, true do |key, values|
32
+ next if key.empty?
33
+ if type == :single
34
+ values = [values]
35
+ values.unshift key
36
+ else
37
+ values = values.dup
38
+ values.unshift [key]
39
+ end
40
+
41
+ values.each_with_index do |list, i|
42
+ list = [list] unless type == :double
43
+
44
+ list.uniq.each do |value|
45
+ if new.include? value
46
+ new_value = new[value]
47
+ else
48
+ new_value = []
49
+ end
50
+
51
+ if new_value[i].nil?
52
+ new_value[i] = key
53
+ else
54
+ new_value[i] += "|" << key
55
+ end
56
+ new[value] = new_value
57
+ end
58
+ end
59
+ end
60
+
61
+ # Update original object
62
+ new.each do |key, values|
63
+ case
64
+ when index_type == :double
65
+ save[key] = [values.compact.collect{|v| v.split "|"}.flatten.uniq]
66
+ when index_type == :flat
67
+ save[key] = values.compact.collect{|v| v.split "|"}.flatten.uniq
68
+ when index_type == :single
69
+ save[key] = values.compact.collect{|v| v.split "|"}.flatten.first
70
+ end
71
+ end
72
+
73
+ new = save
74
+ else
75
+ new_key_field, new_fields = through target, fields, true do |key, values|
76
+ case
77
+ when type == :single
78
+ values = [values]
79
+ when type == :double
80
+ values = values.flatten
81
+ else
82
+ values = values.dup
83
+ end
84
+
85
+ values.unshift key
86
+
87
+ values.uniq.each do |value|
88
+ case
89
+ when index_type == :double
90
+ if not new.include? value
91
+ new[value] = [[key]]
92
+ else
93
+ current = new[value]
94
+ current[0] << key
95
+ new[value] = current
96
+ end
97
+ else
98
+ new[value] = key unless new.include? value
99
+ end
100
+ end
101
+ end
102
+ end
103
+
104
+ TSV.setup(new, :serializer => index_type, :type => index_type, :filename => filename, :fields => [new_key_field], :key_field => new_fields * ", ")
105
+ end
106
+ end
107
+ end
108
+
109
+ def self.index(file, options = {})
110
+ persist_options = Misc.pull_keys options, :persist
111
+ persist_options[:prefix] ||= "StaticIndex[#{options[:target] || :key}]"
112
+
113
+ Log.debug "Static Index: #{ file } - #{options.inspect}"
114
+ Persist.persist_tsv nil, file, options, persist_options do |data|
115
+ data_options = Misc.pull_keys options, :data
116
+ identifiers = TSV.open(file, data_options)
117
+ identifiers.with_monitor :desc => "Creating Index for #{ file }" do
118
+ identifiers.index(options.merge :persist_data => data, :persist => persist_options[:persist])
119
+ end
120
+ end
121
+ end
122
+
123
+ def pos_index(pos_field = nil, options = {})
124
+ pos_field ||= "Position"
125
+
126
+ options = Misc.add_defaults options,
127
+ :persist => false, :persist_file => nil, :persist_update => false
128
+
129
+ persist_options = Misc.pull_keys options, :persist
130
+ persist_options[:prefix] ||= "PosIndex[#{pos_field}]"
131
+
132
+ Persist.persist(filename || self.object_id.to_s, :fwt, persist_options) do
133
+ max_key_size = 0
134
+ index_data = []
135
+ with_unnamed do
136
+ with_monitor :desc => "Creating Index Data", :step => 10000 do
137
+ through :key, pos_field do |key, values|
138
+ key_size = key.length
139
+ max_key_size = key_size if key_size > max_key_size
140
+
141
+ pos = values.first
142
+ if Array === pos
143
+ pos.each do |p|
144
+ index_data << [key, p.to_i]
145
+ end
146
+ else
147
+ index_data << [key, pos.to_i]
148
+ end
149
+ end
150
+ end
151
+ end
152
+
153
+ index = FixWidthTable.get(:memory, max_key_size, false)
154
+ index.add_point index_data
155
+ index.read
156
+ index
157
+ end
158
+ end
159
+
160
+ def self.pos_index(file, pos_field = nil, options = {})
161
+ pos_field ||= "Position"
162
+
163
+ data_options = Misc.pull_keys options, :data
164
+ filename = case
165
+ when (String === file or Path === file)
166
+ file
167
+ when file.respond_to?(:filename)
168
+ file.filename
169
+ else
170
+ file.object_id.to_s
171
+ end
172
+ persist_options = Misc.pull_keys options, :persist
173
+ persist_options[:prefix] ||= "StaticPosIndex[#{pos_field}]"
174
+
175
+ filters = Misc.process_options options, :filters
176
+
177
+ if filters
178
+ filename += ":Filtered[#{filters.collect{|f| f * "="} * ", "}]"
179
+ end
180
+
181
+ Persist.persist(filename, :fwt, persist_options) do
182
+ tsv = TSV.open(file, data_options)
183
+ if filters
184
+ tsv.filter
185
+ filters.each do |match, value|
186
+ tsv.add_filter match, value
187
+ end
188
+ end
189
+ tsv.pos_index(pos_field, options)
190
+ end
191
+ end
192
+
193
+ def range_index(start_field = nil, end_field = nil, options = {})
194
+ start_field ||= "Start"
195
+ end_field ||= "End"
196
+
197
+ options = Misc.add_defaults options,
198
+ :persist => false, :persist_file => nil, :persist_update => false
199
+
200
+ persist_options = Misc.pull_keys options, :persist
201
+ persist_options[:prefix] ||= "RangeIndex[#{start_field}-#{end_field}]"
202
+
203
+ Persist.persist(filename || self.object_id.to_s, :fwt, persist_options) do
204
+ max_key_size = 0
205
+ index_data = []
206
+ with_unnamed do
207
+ with_monitor :desc => "Creating Index Data", :step => 10000 do
208
+ through :key, [start_field, end_field] do |key, values|
209
+ key_size = key.length
210
+ max_key_size = key_size if key_size > max_key_size
211
+
212
+ start_pos, end_pos = values
213
+ if Array === start_pos
214
+ start_pos.zip(end_pos).each do |s,e|
215
+ index_data << [key, [s.to_i, e.to_i]]
216
+ end
217
+ else
218
+ index_data << [key, [start_pos.to_i, end_pos.to_i]]
219
+ end
220
+ end
221
+ end
222
+ end
223
+
224
+ index = FixWidthTable.get(:memory, max_key_size, true)
225
+ index.add_range index_data
226
+ index.read
227
+ index
228
+ end
229
+ end
230
+
231
+ def self.range_index(file, start_field = nil, end_field = nil, options = {})
232
+ start_field ||= "Start"
233
+ end_field ||= "End"
234
+
235
+ data_options = Misc.pull_keys options, :data
236
+ filename = case
237
+ when (String === file or Path === file)
238
+ file
239
+ when file.respond_to?(:filename)
240
+ file.filename
241
+ else
242
+ file.object_id.to_s
243
+ end
244
+ persist_options = Misc.pull_keys options, :persist
245
+ persist_options[:prefix] ||= "StaticRangeIndex[#{start_field}-#{end_field}]"
246
+
247
+ filters = Misc.process_options options, :filters
248
+
249
+ if filters
250
+ filename += ":Filtered[#{filters.collect{|f| f * "="} * ", "}]"
251
+ end
252
+
253
+ Persist.persist(filename, :fwt, persist_options) do
254
+ tsv = TSV.open(file, data_options)
255
+ if filters
256
+ tsv.filter
257
+ filters.each do |match, value|
258
+ tsv.add_filter match, value
259
+ end
260
+ end
261
+
262
+ tsv.range_index(start_field, end_field, options)
263
+ end
264
+ end
265
+
266
+
267
+ # def self.field_matches(tsv, values)
268
+ # values = [values] if not Array === values
269
+ # Log.debug "Matcing #{values.length} values to #{tsv.filename}"
270
+ #
271
+ # if values.flatten.sort[0..9].compact.collect{|n| n.to_i} == (1..10).to_a
272
+ # return {}
273
+ # end
274
+ #
275
+ # key_field = tsv.key_field
276
+ # fields = tsv.fields
277
+ #
278
+ # field_values = {}
279
+ # fields.each{|field|
280
+ # field_values[field] = []
281
+ # }
282
+ #
283
+ # if tsv.type == :double
284
+ # tsv.through do |key,entry_values|
285
+ # fields.zip(entry_values).each do |field,entry_field_values|
286
+ # field_values[field].concat entry_field_values unless entry_field_values.nil?
287
+ # end
288
+ # end
289
+ # else
290
+ # tsv.through do |key,entry_values|
291
+ # fields.zip(entry_values).each do |field,entry_field_values|
292
+ # field_values[field] << entry_field_values
293
+ # end
294
+ # end
295
+ # end
296
+ #
297
+ # field_values.each do |field,field_value_list|
298
+ # field_value_list.replace(values & field_value_list.flatten.uniq)
299
+ # end
300
+ #
301
+ # field_values[key_field] = values & tsv.keys
302
+ #
303
+ # field_values
304
+ # end
305
+ #
306
+ # def field_matches(values)
307
+ # TSV.field_matches(self, values)
308
+ # end
309
+ #
310
+ # def guess_field(values)
311
+ # field_matches(values).sort_by{|field, matches| matches.uniq.length}.last
312
+ # end
313
+ #
314
+ # def pos_index(pos_field = nil, options = {})
315
+ # pos_field ||= "Position"
316
+ #
317
+ # options = Misc.add_defaults options,
318
+ # :persistence => true, :persistence_file => nil, :persistence_update => false
319
+ #
320
+ # prefix = "Pos[#{pos_field}]"
321
+ #
322
+ # Persistence.persist(filename, prefix, :fwt, options.merge({
323
+ # :pos_field => pos_field,
324
+ # :filters => (self.respond_to?(:filters)? filters.collect{|f| [f.match, f.value]} : [])
325
+ # })) do |file, options, filename|
326
+ # pos_field = options[:pos_field]
327
+ # value_size = 0
328
+ # index_data = []
329
+ #
330
+ # through :key, pos_field do |key, values|
331
+ # value_size = key.length if key.length > value_size
332
+ #
333
+ # pos = values.first
334
+ # if Array === pos
335
+ # pos.each do |p|
336
+ # index_data << [key, p.to_i]
337
+ # end
338
+ # else
339
+ # index_data << [key, pos.to_i]
340
+ # end
341
+ # end
342
+ #
343
+ # index = FixWidthTable.get(:memory, value_size, false)
344
+ # index.add_point index_data
345
+ # index.read
346
+ # index
347
+ # end
348
+ # end
349
+ #
350
+ # def self.pos_index(file, pos_field = nil, options = {})
351
+ # options = Misc.add_defaults options,
352
+ # :persistence => true, :persistence_file => nil, :persistence_update => false, :persistence_source => file, :tsv_serializer => :list,
353
+ # :data_persistence => false, :data_persistence_file => nil, :data_persistence_update => false, :data_persistence_source => file
354
+ #
355
+ # #options_data = {
356
+ # # :cast => Misc.process_options(options, :data_cast),
357
+ # # :persistence => Misc.process_options(options, :data_persistence),
358
+ # # :monitor => Misc.process_options(options, :data_monitor),
359
+ # # :persistence_file => Misc.process_options(options, :data_persistence_file),
360
+ # # :persistence_update => Misc.process_options(options, :data_persistence_update),
361
+ # # :in_situ_persistence => Misc.process_options(options,:data_in_situ_persistence),
362
+ # # :persistence_source => Misc.process_options(options, :data_persistence_source),
363
+ # # :importtsv => Misc.process_options(options, :data_importtsv),
364
+ # #}
365
+ #
366
+ # options_data = Misc.pull_data_keys(options)
367
+ #
368
+ # prefix = "Pos[#{pos_field}]"
369
+ #
370
+ # new = Persistence.persist(file, prefix, :fwt, options.merge({:pos_field => pos_field})) do |file, options, filename|
371
+ # tsv = TSV.new(file, :list, options_data)
372
+ #
373
+ # if options.include?(:filters) and Array === options[:filters] and not options[:filters].empty?
374
+ # tsv.filter
375
+ # options[:filters].each do |match, value, persistence|
376
+ # tsv.add_filter(match, value, persistence)
377
+ # end
378
+ # end
379
+ #
380
+ # tsv.pos_index options[:pos_field], options.merge(:persistence => false, :persistence_file => nil)
381
+ # end
382
+ # end
383
+ #
384
+ # def range_index(start_field = nil, end_field = nil, options = {})
385
+ # start_field ||= "Start"
386
+ # end_field ||= "End"
387
+ # options = Misc.add_defaults options,
388
+ # :persistence => true, :persistence_file => nil, :persistence_update => false
389
+ #
390
+ # prefix = "Range[#{start_field}-#{end_field}]"
391
+ #
392
+ # Persistence.persist(filename, prefix, :fwt, options.merge({
393
+ # :start_field => start_field, :end_field => end_field,
394
+ # :filters => (self.respond_to?(:filters)? filters.collect{|f| [f.match, f.value]} : [])
395
+ # })) do |file, options, filename|
396
+ # start_field, end_field = options.values_at :start_field, :end_field
397
+ #
398
+ # value_size = 0
399
+ # index_data = []
400
+ #
401
+ # through :key, [start_field, end_field] do |key, values|
402
+ # value_size = key.length if key.length > value_size
403
+ #
404
+ # start_pos, end_pos = values
405
+ #
406
+ # if Array === start_pos
407
+ # start_pos.zip(end_pos).each do |s,e|
408
+ # index_data << [key, [s.to_i, e.to_i]]
409
+ # end
410
+ # else
411
+ # index_data << [key, [start_pos.to_i, end_pos.to_i]]
412
+ # end
413
+ # end
414
+ #
415
+ # index = FixWidthTable.get(:memory, value_size, true)
416
+ # index.add_range index_data
417
+ # index.read
418
+ # index
419
+ # end
420
+ # end
421
+ #
422
+ # def self.range_index(file, start_field = nil, end_field = nil, options = {})
423
+ # options = Misc.add_defaults options,
424
+ # :persistence => true, :persistence_file => nil, :persistence_update => false, :persistence_source => file, :tsv_serializer => :list,
425
+ # :data_persistence => false, :data_persistence_file => nil, :data_persistence_update => false, :data_persistence_source => file
426
+ #
427
+ # options_data = {
428
+ # :persistence => Misc.process_options(options, :data_persistence),
429
+ # :persistence_file => Misc.process_options(options, :data_persistence_file),
430
+ # :persistence_update => Misc.process_options(options, :data_persistence_update),
431
+ # :persistence_source => Misc.process_options(options, :data_persistence_source),
432
+ # }
433
+ #
434
+ # prefix = "Range[#{start_field}-#{end_field}]"
435
+ #
436
+ # options_data[:type] = :flat if options[:order] == false
437
+ #
438
+ # Persistence.persist(file, prefix, :fwt, options.merge({:start_field => start_field, :end_field => end_field})) do |file, options, filename|
439
+ # tsv = TSV.new(file, :list, options_data)
440
+ #
441
+ # if options.include?(:filters) and Array === options[:filters] and not options[:filters].empty?
442
+ # tsv.filter
443
+ # options[:filters].each do |match, value, persistence|
444
+ # tsv.add_filter(match, value, persistence)
445
+ # end
446
+ # end
447
+ #
448
+ # tsv.range_index options[:start_field], options[:end_field], options.merge(:persistence => false, :persistence_file => nil)
449
+ # end
450
+ # end
451
+ #
452
+ end
453
+