rbbt-util 5.44.0 → 6.0.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (169) hide show
  1. checksums.yaml +4 -4
  2. data/bin/rbbt +67 -90
  3. data/etc/app.d/base.rb +3 -3
  4. data/etc/app.d/semaphores.rb +3 -3
  5. data/lib/rbbt/annotations/annotated_array.rb +207 -207
  6. data/lib/rbbt/annotations/refactor.rb +27 -0
  7. data/lib/rbbt/annotations/util.rb +282 -282
  8. data/lib/rbbt/annotations.rb +343 -320
  9. data/lib/rbbt/association/database.rb +200 -225
  10. data/lib/rbbt/association/index.rb +294 -291
  11. data/lib/rbbt/association/item.rb +227 -227
  12. data/lib/rbbt/association/open.rb +35 -34
  13. data/lib/rbbt/association/util.rb +0 -169
  14. data/lib/rbbt/association.rb +2 -4
  15. data/lib/rbbt/entity/identifiers.rb +119 -118
  16. data/lib/rbbt/entity/refactor.rb +12 -0
  17. data/lib/rbbt/entity.rb +319 -315
  18. data/lib/rbbt/hpc/batch.rb +72 -53
  19. data/lib/rbbt/hpc/lsf.rb +2 -2
  20. data/lib/rbbt/hpc/orchestrate/batches.rb +2 -2
  21. data/lib/rbbt/hpc/orchestrate/chains.rb +25 -5
  22. data/lib/rbbt/hpc/orchestrate/rules.rb +2 -2
  23. data/lib/rbbt/hpc/orchestrate.rb +19 -13
  24. data/lib/rbbt/hpc/slurm.rb +18 -18
  25. data/lib/rbbt/knowledge_base/entity.rb +13 -5
  26. data/lib/rbbt/knowledge_base/query.rb +2 -2
  27. data/lib/rbbt/knowledge_base/registry.rb +32 -31
  28. data/lib/rbbt/knowledge_base/traverse.rb +1 -1
  29. data/lib/rbbt/knowledge_base.rb +1 -1
  30. data/lib/rbbt/monitor.rb +36 -25
  31. data/lib/rbbt/persist/refactor.rb +166 -0
  32. data/lib/rbbt/persist/tsv/sharder.rb +1 -1
  33. data/lib/rbbt/persist/tsv/tokyocabinet.rb +105 -105
  34. data/lib/rbbt/persist/tsv.rb +187 -185
  35. data/lib/rbbt/persist.rb +556 -551
  36. data/lib/rbbt/refactor.rb +20 -0
  37. data/lib/rbbt/resource/path/refactor.rb +178 -0
  38. data/lib/rbbt/resource/path.rb +317 -497
  39. data/lib/rbbt/resource/util.rb +0 -48
  40. data/lib/rbbt/resource.rb +3 -390
  41. data/lib/rbbt/tsv/accessor.rb +2 -838
  42. data/lib/rbbt/tsv/attach.rb +303 -299
  43. data/lib/rbbt/tsv/change_id.rb +244 -245
  44. data/lib/rbbt/tsv/csv.rb +87 -85
  45. data/lib/rbbt/tsv/dumper.rb +2 -100
  46. data/lib/rbbt/tsv/excel.rb +26 -24
  47. data/lib/rbbt/tsv/field_index.rb +4 -1
  48. data/lib/rbbt/tsv/filter.rb +3 -2
  49. data/lib/rbbt/tsv/index.rb +2 -284
  50. data/lib/rbbt/tsv/manipulate.rb +750 -747
  51. data/lib/rbbt/tsv/marshal.rb +3 -3
  52. data/lib/rbbt/tsv/matrix.rb +2 -2
  53. data/lib/rbbt/tsv/parallel/through.rb +2 -1
  54. data/lib/rbbt/tsv/parallel/traverse.rb +783 -781
  55. data/lib/rbbt/tsv/parser.rb +678 -678
  56. data/lib/rbbt/tsv/refactor.rb +195 -0
  57. data/lib/rbbt/tsv/stream.rb +253 -251
  58. data/lib/rbbt/tsv/util.rb +420 -420
  59. data/lib/rbbt/tsv.rb +210 -208
  60. data/lib/rbbt/util/R/eval.rb +4 -4
  61. data/lib/rbbt/util/R/plot.rb +62 -166
  62. data/lib/rbbt/util/R.rb +21 -18
  63. data/lib/rbbt/util/cmd.rb +2 -318
  64. data/lib/rbbt/util/color.rb +269 -269
  65. data/lib/rbbt/util/colorize.rb +89 -89
  66. data/lib/rbbt/util/concurrency/processes/refactor.rb +22 -0
  67. data/lib/rbbt/util/concurrency/processes/worker.rb +2 -2
  68. data/lib/rbbt/util/concurrency/processes.rb +389 -386
  69. data/lib/rbbt/util/config.rb +169 -167
  70. data/lib/rbbt/util/iruby.rb +20 -0
  71. data/lib/rbbt/util/log/progress/report.rb +241 -241
  72. data/lib/rbbt/util/log/progress/util.rb +99 -99
  73. data/lib/rbbt/util/log/progress.rb +102 -102
  74. data/lib/rbbt/util/log/refactor.rb +49 -0
  75. data/lib/rbbt/util/log.rb +486 -532
  76. data/lib/rbbt/util/migrate.rb +1 -1
  77. data/lib/rbbt/util/misc/concurrent_stream.rb +248 -246
  78. data/lib/rbbt/util/misc/development.rb +12 -11
  79. data/lib/rbbt/util/misc/exceptions.rb +117 -112
  80. data/lib/rbbt/util/misc/format.rb +2 -230
  81. data/lib/rbbt/util/misc/indiferent_hash.rb +2 -107
  82. data/lib/rbbt/util/misc/inspect.rb +2 -476
  83. data/lib/rbbt/util/misc/lock.rb +109 -106
  84. data/lib/rbbt/util/misc/omics.rb +10 -1
  85. data/lib/rbbt/util/misc/pipes.rb +765 -793
  86. data/lib/rbbt/util/misc/refactor.rb +20 -0
  87. data/lib/rbbt/util/misc/ssw.rb +308 -0
  88. data/lib/rbbt/util/misc/system.rb +0 -15
  89. data/lib/rbbt/util/misc.rb +39 -20
  90. data/lib/rbbt/util/named_array/refactor.rb +4 -0
  91. data/lib/rbbt/util/named_array.rb +3 -220
  92. data/lib/rbbt/util/open/refactor.rb +7 -0
  93. data/lib/rbbt/util/open.rb +3 -857
  94. data/lib/rbbt/util/procpath.rb +6 -6
  95. data/lib/rbbt/util/python/paths.rb +27 -0
  96. data/lib/rbbt/util/python/run.rb +115 -0
  97. data/lib/rbbt/util/python/script.rb +110 -0
  98. data/lib/rbbt/util/python/util.rb +3 -3
  99. data/lib/rbbt/util/python.rb +22 -81
  100. data/lib/rbbt/util/semaphore.rb +152 -148
  101. data/lib/rbbt/util/simpleopt.rb +9 -8
  102. data/lib/rbbt/util/ssh/refactor.rb +19 -0
  103. data/lib/rbbt/util/ssh.rb +122 -118
  104. data/lib/rbbt/util/tar.rb +117 -115
  105. data/lib/rbbt/util/tmpfile.rb +69 -67
  106. data/lib/rbbt/util/version.rb +2 -0
  107. data/lib/rbbt/workflow/refactor/entity.rb +11 -0
  108. data/lib/rbbt/workflow/refactor/export.rb +66 -0
  109. data/lib/rbbt/workflow/refactor/inputs.rb +24 -0
  110. data/lib/rbbt/workflow/refactor/recursive.rb +64 -0
  111. data/lib/rbbt/workflow/refactor/task_info.rb +65 -0
  112. data/lib/rbbt/workflow/refactor.rb +153 -0
  113. data/lib/rbbt/workflow/remote_workflow/driver/ssh.rb +55 -32
  114. data/lib/rbbt/workflow/remote_workflow/remote_step/rest.rb +3 -1
  115. data/lib/rbbt/workflow/remote_workflow/remote_step/ssh.rb +14 -5
  116. data/lib/rbbt/workflow/remote_workflow/remote_step.rb +19 -7
  117. data/lib/rbbt/workflow/remote_workflow.rb +6 -1
  118. data/lib/rbbt/workflow/step/info.rb +2 -2
  119. data/lib/rbbt/workflow/step/run.rb +766 -766
  120. data/lib/rbbt/workflow/step/save_load_inputs.rb +254 -254
  121. data/lib/rbbt/workflow/step.rb +2 -362
  122. data/lib/rbbt/workflow/task.rb +118 -118
  123. data/lib/rbbt/workflow/usage.rb +289 -287
  124. data/lib/rbbt/workflow/util/archive.rb +6 -5
  125. data/lib/rbbt/workflow/util/data.rb +1 -1
  126. data/lib/rbbt/workflow/util/orchestrator.rb +249 -246
  127. data/lib/rbbt/workflow/util/trace.rb +79 -44
  128. data/lib/rbbt/workflow.rb +4 -882
  129. data/lib/rbbt-util.rb +21 -13
  130. data/lib/rbbt.rb +16 -3
  131. data/python/rbbt/__init__.py +19 -1
  132. data/share/Rlib/plot.R +37 -37
  133. data/share/Rlib/svg.R +22 -5
  134. data/share/install/software/lib/install_helpers +1 -1
  135. data/share/rbbt_commands/hpc/list +2 -3
  136. data/share/rbbt_commands/hpc/orchestrate +4 -4
  137. data/share/rbbt_commands/hpc/tail +2 -0
  138. data/share/rbbt_commands/hpc/task +10 -7
  139. data/share/rbbt_commands/lsf/list +2 -3
  140. data/share/rbbt_commands/lsf/orchestrate +4 -4
  141. data/share/rbbt_commands/lsf/tail +2 -0
  142. data/share/rbbt_commands/lsf/task +10 -7
  143. data/share/rbbt_commands/migrate +1 -1
  144. data/share/rbbt_commands/pbs/list +2 -3
  145. data/share/rbbt_commands/pbs/orchestrate +4 -4
  146. data/share/rbbt_commands/pbs/tail +2 -0
  147. data/share/rbbt_commands/pbs/task +10 -7
  148. data/share/rbbt_commands/resource/produce +8 -1
  149. data/share/rbbt_commands/slurm/list +2 -3
  150. data/share/rbbt_commands/slurm/orchestrate +4 -4
  151. data/share/rbbt_commands/slurm/tail +2 -0
  152. data/share/rbbt_commands/slurm/task +10 -7
  153. data/share/rbbt_commands/system/clean +5 -5
  154. data/share/rbbt_commands/system/status +5 -5
  155. data/share/rbbt_commands/tsv/get +2 -3
  156. data/share/rbbt_commands/tsv/info +10 -13
  157. data/share/rbbt_commands/tsv/keys +18 -14
  158. data/share/rbbt_commands/tsv/slice +2 -2
  159. data/share/rbbt_commands/tsv/transpose +6 -2
  160. data/share/rbbt_commands/workflow/info +20 -24
  161. data/share/rbbt_commands/workflow/list +1 -1
  162. data/share/rbbt_commands/workflow/prov +20 -13
  163. data/share/rbbt_commands/workflow/server +16 -1
  164. data/share/rbbt_commands/workflow/task +76 -71
  165. data/share/rbbt_commands/workflow/write_info +26 -9
  166. data/share/software/opt/ssw/ssw.c +861 -0
  167. data/share/software/opt/ssw/ssw.h +130 -0
  168. data/share/workflow_config.ru +3 -3
  169. metadata +42 -3
@@ -1,781 +1,783 @@
1
- module TSV
2
-
3
- def self.obj_stream(obj)
4
- case obj
5
- when nil
6
- nil
7
- when (defined? Step and Step)
8
- obj.result
9
- when IO, File, Zlib::GzipReader, Bgzf
10
- obj
11
- when TSV::Dumper
12
- obj.stream
13
- when TSV::Parser
14
- obj.stream
15
- end
16
- end
17
-
18
- def self.guess_max(obj)
19
- begin
20
- case obj
21
- when (defined? Step and Step)
22
- if obj.done?
23
- path = obj.path
24
- path = path.find if path.respond_to? :find
25
- if File.exist? path
26
- CMD.cmd("wc -l '#{path}'").read.to_i
27
- else
28
- nil
29
- end
30
- else
31
- nil
32
- end
33
- when TSV
34
- obj.length
35
- when Array, Hash
36
- obj.size
37
- when File
38
- return nil if Open.gzip?(obj) or Open.bgzip?(obj)
39
- CMD.cmd("wc -l '#{obj.path}'").read.to_i
40
- when Path, String
41
- obj = obj.find if Path === obj
42
- if File.exist? obj
43
- return nil if Open.gzip?(obj) or Open.bgzip?(obj)
44
- CMD.cmd("wc -l '#{obj}'").read.to_i
45
- else
46
- nil
47
- end
48
- end
49
- rescue Exception
50
- Log.exception $!
51
- nil
52
- end
53
- end
54
-
55
- def self.stream_name(obj)
56
- return "nil" if obj.nil?
57
- #filename_obj = obj.respond_to?(:filename) ? obj.filename : nil
58
- #filename_obj ||= obj.respond_to?(:path) ? obj.path : nil
59
- stream_obj = obj_stream(obj) || obj
60
- obj.class.to_s << "-" << Misc.fingerprint(stream_obj)
61
- end
62
-
63
- def self.report(msg, obj, into)
64
- into = into[:into] if Hash === into and into.include? :into
65
-
66
- Log.low{"#{ msg } #{stream_name(obj)} -> #{stream_name(into)}"}
67
- end
68
-
69
- #{{{ TRAVERSE OBJECTS
70
-
71
- def self.traverse_tsv(tsv, options = {}, &block)
72
- callback, bar, join = Misc.process_options options, :callback, :bar, :join
73
-
74
- begin
75
- error = false
76
- fields = tsv.fields
77
- if callback
78
- bar.init if bar
79
- tsv.through options[:key_field], options[:fields] do |k,v|
80
- begin
81
- callback.call yield(k,v,fields)
82
- rescue Exception
83
- Log.warn "Traverse exception on element: #{Misc.fingerprint([k, v, fields])}"
84
- raise $!
85
- ensure
86
- bar.tick if bar
87
- end
88
- end
89
- else
90
- bar.init if bar
91
- tsv.through options[:key_field], options[:fields] do |k,v|
92
- begin
93
- yield k,v,fields
94
- ensure
95
- bar.tick if bar
96
- end
97
- end
98
- end
99
- rescue
100
- error = true
101
- raise $!
102
- ensure
103
- join.call(error) if join
104
- Log::ProgressBar.remove_bar(bar, error) if bar
105
- end
106
- end
107
-
108
- def self.traverse_hash(hash, options = {}, &block)
109
- callback, bar, join = Misc.process_options options, :callback, :bar, :join
110
-
111
- begin
112
- error = false
113
- if callback
114
- bar.init if bar
115
- hash.each do |k,v|
116
- begin
117
- callback.call yield(k,v)
118
- ensure
119
- bar.tick if bar
120
- end
121
- end
122
- else
123
- bar.init if bar
124
- hash.each do |k,v|
125
- begin
126
- yield k,v
127
- rescue Exception
128
- Log.warn "Traverse exception on element: #{Misc.fingerprint([k, v])}"
129
- raise $!
130
- ensure
131
- bar.tick if bar
132
- end
133
- end
134
- end
135
- rescue
136
- error = true
137
- raise $!
138
- ensure
139
- join.call(error) if join
140
- Log::ProgressBar.remove_bar(bar, error) if bar
141
- end
142
- end
143
-
144
- def self.traverse_array(array, options = {}, &block)
145
- callback, bar, join = Misc.process_options options, :callback, :bar, :join
146
-
147
- begin
148
- error = false
149
- if callback
150
- bar.init if bar
151
- array.each do |e|
152
- begin
153
- callback.call yield(e)
154
- rescue Exception
155
- Log.warn "Traverse exception on element: #{Misc.fingerprint(e)}"
156
- raise $!
157
- ensure
158
- bar.tick if bar
159
- end
160
- end
161
- else
162
- bar.init if bar
163
- array.each do |e|
164
- begin
165
- yield e
166
- rescue Exception
167
- Log.warn "Traverse exception on element: #{Misc.fingerprint(e)}"
168
- raise $!
169
- ensure
170
- bar.tick if bar
171
- end
172
- end
173
- end
174
-
175
- rescue
176
- error = true
177
- raise $!
178
- ensure
179
- join.call(error) if join
180
- Log::ProgressBar.remove_bar(bar, error) if bar
181
- end
182
- end
183
-
184
- def self.traverse_enumerable(enum, options = {}, &block)
185
- callback, bar, join = Misc.process_options options, :callback, :bar, :join
186
-
187
- begin
188
- error = false
189
- if callback
190
- bar.init if bar
191
- while enum.any?
192
- e = enum.pop
193
- begin
194
- callback.call yield(e)
195
- rescue Exception
196
- Log.warn "Traverse exception on element: #{Misc.fingerprint(e)}"
197
- raise $!
198
- ensure
199
- bar.tick if bar
200
- end
201
- end
202
- else
203
- bar.init if bar
204
- while enum.any?
205
- e = enum.pop
206
- begin
207
- yield e
208
- rescue Exception
209
- Log.warn "Traverse exception on element: #{Misc.fingerprint(e)}"
210
- raise $!
211
- ensure
212
- bar.tick if bar
213
- end
214
- end
215
- end
216
-
217
- rescue
218
- error = true
219
- raise $!
220
- ensure
221
- join.call(error) if join
222
- Log::ProgressBar.remove_bar(bar, error) if bar
223
- end
224
- end
225
-
226
- def self.traverse_priority_queue(queue, options = {}, &block)
227
- callback, bar, join = Misc.process_options options, :callback, :bar, :join
228
-
229
- begin
230
- error = false
231
- if callback
232
- bar.init if bar
233
- while queue.any?
234
- e = queue.pop
235
- begin
236
- callback.call yield(e)
237
- rescue Exception
238
- Log.warn "Traverse exception on element: #{Misc.fingerprint(e)}"
239
- raise $!
240
- ensure
241
- bar.tick if bar
242
- end
243
- end
244
- else
245
- bar.init if bar
246
- while queue.any?
247
- e = queue.pop
248
- begin
249
- yield e
250
- rescue Exception
251
- Log.warn "Traverse exception on element: #{Misc.fingerprint(e)}"
252
- raise $!
253
- ensure
254
- bar.tick if bar
255
- end
256
- end
257
- end
258
-
259
- rescue
260
- error = true
261
- raise $!
262
- ensure
263
- join.call(error) if join
264
- Log::ProgressBar.remove_bar(bar, error) if bar
265
- end
266
- end
267
-
268
- def self.traverse_io_array(io, options = {}, &block)
269
- callback, bar, join = Misc.process_options options, :callback, :bar, :join
270
- begin
271
- error = false
272
- if File === io and io.closed?
273
- begin
274
- Log.low{"Rewinding stream #{stream_name(io)}"}
275
- io.reopen io.filename, "r"
276
- rescue
277
- Log.exception $!
278
- raise "File closed and could not reopen #{stream_name(io)}"
279
- end
280
- end
281
-
282
- if callback
283
- bar.init if bar
284
- while line = io.gets
285
- if line[-1] != "\n"
286
- while c = io.getc
287
- line << c
288
- break if c=="\n"
289
- end
290
- end
291
- begin
292
- callback.call yield line.chomp
293
- rescue Exception
294
- Log.warn "Traverse exception on element: #{Misc.fingerprint(line)}"
295
- raise $!
296
- ensure
297
- bar.tick if bar
298
- end
299
- end
300
- else
301
- bar.init if bar
302
- while line = io.gets
303
- begin
304
- yield line.chomp
305
- rescue Exception
306
- Log.warn "Traverse exception on element: #{Misc.fingerprint(line)}"
307
- raise $!
308
- ensure
309
- bar.tick if bar
310
- end
311
- end
312
- end
313
- rescue
314
- error = true
315
- raise $!
316
- ensure
317
- join.call(error) if join
318
- Log::ProgressBar.remove_bar(bar, error) if bar
319
- end
320
- end
321
-
322
- def self.traverse_io(io, options = {}, &block)
323
- callback, bar, join = Misc.process_options options, :callback, :bar, :join
324
-
325
- begin
326
- error = false
327
- if File === io and io.closed?
328
- begin
329
- Log.low{"Rewinding stream #{stream_name(io)}"}
330
- io.reopen io.filename, "r"
331
- rescue
332
- Log.exception $!
333
- raise "File closed and could not reopen #{stream_name(io)}"
334
- end
335
- end
336
-
337
- options[:monitor] = bar
338
- if callback
339
- bar.init if bar
340
- exception = nil
341
- begin
342
- TSV::Parser.traverse(io, options) do |k,v,f|
343
- begin
344
- callback.call yield k, v, f
345
- rescue Exception
346
- exception = $!
347
- Log.warn "Traverse exception on element: #{Misc.fingerprint([k, v, f])}"
348
- raise $!
349
- end
350
- bar.tick if bar
351
- end
352
- ensure
353
- raise exception if exception
354
- end
355
- else
356
- TSV::Parser.traverse(io, options.merge(:monitor => bar), &block)
357
- end
358
- rescue
359
- error = true
360
- raise $!
361
- ensure
362
- join.call(error) if join
363
- Log::ProgressBar.remove_bar(bar, error) if bar
364
- end
365
- end
366
-
367
- def self.traverse_obj(obj, options = {}, &block)
368
- if options[:type] == :keys
369
- options[:fields] = []
370
- options[:type] = :single
371
- end
372
-
373
- name = stream_name(obj)
374
- Log.low{"Traversing #{name} #{Log.color :green, "->"} #{stream_name(options[:into])}"}
375
- begin
376
- case obj
377
- when (defined? FastContainers && FastContainers::PriorityQueue === obj)
378
- traverse_priority_queue(obj, options, &block)
379
- when TSV
380
- traverse_tsv(obj, options, &block)
381
- when Hash
382
- traverse_hash(obj, options, &block)
383
- when TSV::Parser
384
- callback = Misc.process_options options, :callback
385
- if callback
386
- obj.traverse(options) do |k,v|
387
- callback.call yield k, v
388
- end
389
- else
390
- obj.traverse(options, &block)
391
- end
392
- when IO, File, Zlib::GzipReader, Bgzf, StringIO
393
- begin
394
- if options[:type] == :array or options[:type] == :line
395
- traverse_io_array(obj, options, &block)
396
- else
397
- traverse_io(obj, options, &block)
398
- end
399
- rescue Aborted
400
- obj.abort if obj.respond_to? :abort
401
- raise $!
402
- rescue Exception
403
- obj.abort if obj.respond_to? :abort
404
- raise $!
405
- ensure
406
- obj.close if obj.respond_to? :close and not obj.closed?
407
- obj.join if obj.respond_to? :join
408
- end
409
- when Path
410
- obj.open do |stream|
411
- traverse_obj(stream, options, &block)
412
- end
413
- when TSV::Dumper
414
- traverse_obj(obj.stream, options, &block)
415
- when (defined? Step and Step)
416
-
417
- obj.clean if obj.aborted? or obj.recoverable_error?
418
- obj.run(true) unless obj.done? || obj.started? || obj.result
419
-
420
- stream = obj.get_stream
421
- options = {:type => :array}.merge(options) if obj.result_type == :array
422
-
423
- if stream
424
- traverse_obj(stream, options, &block)
425
- else
426
- obj.join
427
- traverse_obj(obj.path, options, &block)
428
- end
429
- when Array
430
- traverse_array(obj, options, &block)
431
- when Set
432
- traverse_array(obj.to_a, options, &block)
433
- when String
434
- if Open.remote?(obj) || Open.ssh?(obj) || Misc.is_filename?(obj)
435
- Open.open(obj) do |s|
436
- traverse_obj(s, options, &block)
437
- end
438
- else
439
- raise "Can not open obj for traversal #{Misc.fingerprint obj}"
440
- end
441
- when Enumerable
442
- traverse_enumerable(obj, options, &block)
443
- when nil
444
- raise "Can not traverse nil object into #{stream_name(options[:into])}"
445
- else
446
- raise "Unknown object for traversal: #{Misc.fingerprint obj }"
447
- end
448
- rescue IOError
449
- Log.low{"IOError traversing #{stream_name(obj)}: #{$!.message}"}
450
- abort_stream obj
451
- abort_stream options[:into], $!
452
- raise $!
453
- rescue Errno::EPIPE
454
- Log.low{"Pipe closed while traversing #{stream_name(obj)}: #{$!.message}"}
455
- abort_stream obj
456
- abort_stream options[:into], $!
457
- raise $!
458
- rescue Aborted
459
- Log.low{"Aborted traversing #{stream_name(obj)}"}
460
- abort_stream obj
461
- abort_stream options[:into], $!
462
- raise $!
463
- rescue Exception
464
- Log.low{"Exception traversing #{stream_name(obj)}"}
465
- abort_stream obj unless String === obj
466
- abort_stream options[:into], $!
467
- raise $!
468
- end
469
- end
470
-
471
- def self.traverse_threads(num, obj, options, &block)
472
- callback = Misc.process_options options, :callback
473
-
474
- q = RbbtThreadQueue.new num
475
-
476
- if callback
477
- block = Proc.new do |*args|
478
- mutex = args.pop
479
- res = yield *args
480
- mutex.synchronize do
481
- callback.call res
482
- end
483
- end
484
- end
485
-
486
- q.init true, &block
487
-
488
- traverse_obj(obj, options) do |*p|
489
- q.process p
490
- end
491
-
492
- q.join
493
- nil
494
- end
495
-
496
- def self.traverse_cpus(num, obj, options, &block)
497
- begin
498
- error = false
499
-
500
- callback, cleanup, join, respawn, bar = Misc.process_options options, :callback, :cleanup, :join, :respawn, :bar
501
- respawn = true if ENV["RBBT_RESPAWN"] and ENV["RBBT_RESPAWN"] == "true"
502
-
503
- Log.low "Traversing in #{ num } cpus: #{respawn ? "respawn" : "no respawn"}"
504
- q = RbbtProcessQueue.new num, cleanup, join, respawn, !!bar
505
- callback = Proc.new{ bar.tick } if callback.nil? and bar
506
- q.callback &callback
507
- q.init &block
508
-
509
- bar.init if bar
510
- traverse_obj(obj, options) do |*p|
511
- q.process *p
512
- end
513
-
514
- q.join
515
-
516
- rescue Interrupt, Aborted
517
- error = true
518
- Log.low{"Aborted traversal in CPUs for #{stream_name(obj) || Misc.fingerprint(obj)}: #{$!.backtrace*","}"}
519
- q.abort
520
- stream = obj_stream(obj)
521
- stream.abort if stream.respond_to? :abort
522
- stream = obj_stream(options[:into])
523
- stream.abort if stream.respond_to? :abort
524
- q.join
525
- raise "Traversal aborted"
526
- rescue Exception
527
- error = true
528
- Log.low{"Exception during traversal in CPUs for #{stream_name(obj) || Misc.fingerprint(obj)}: #{$!.message}"}
529
- q.abort
530
- stream = obj_stream(obj)
531
- stream.abort if stream.respond_to? :abort
532
- stream = obj_stream(options[:into])
533
- stream.abort if stream.respond_to? :abort
534
- q.join
535
- raise $!
536
- ensure
537
- Log::ProgressBar.remove_bar(bar, error) if bar
538
- end
539
- end
540
-
541
- def self.store_into(store, value)
542
- if MultipleResult === value
543
- value.each do |v|
544
- store_into store, v
545
- end
546
- return
547
- end
548
- begin
549
- return false if value.nil?
550
- case store
551
- when TSV
552
- if store.type == :double or store.type == :flat
553
- case value
554
- when TSV, Hash
555
- store.merge_zip value
556
- else
557
- store.zip_new *value
558
- end
559
- else
560
- k,v = value
561
- store[k] = v
562
- end
563
- when Hash
564
- case value
565
- when TSV, Hash
566
- store.merge! value
567
- else
568
- k,v = value
569
- store[k] = v
570
- end
571
- when TSV::Dumper
572
- return false if value.nil?
573
- store.add *value
574
- when IO
575
- return false if value.nil?
576
- value.chomp!
577
- store.puts value
578
- else
579
- store << value
580
- end
581
- true
582
- rescue Aborted, Interrupt
583
- Log.low "Aborted storing into #{Misc.fingerprint store}"
584
- abort_stream(store, $!)
585
- raise $!
586
- rescue Exception
587
- Log.low "Exception storing into #{Misc.fingerprint store}: #{$!.message}"
588
- abort_stream(store, $!)
589
- raise $!
590
- end
591
- end
592
-
593
- def self.get_streams_to_close(obj)
594
- close_streams = []
595
- case obj
596
- when IO, File
597
- close_streams << obj
598
- when TSV::Parser
599
- when TSV::Dumper
600
- close_streams << obj.result.in_stream
601
- when (defined? Step and Step)
602
- obj.mutex.synchronize do
603
- case obj.result
604
- when IO
605
- close_streams << obj.result
606
- when TSV::Dumper
607
- close_streams << obj.result.in_stream
608
- end
609
- end
610
- obj.inputs.each do |input|
611
- close_streams = get_streams_to_close(input) + close_streams
612
- end
613
- obj.dependencies.each do |dependency|
614
- close_streams = get_streams_to_close(dependency) + close_streams
615
- end
616
- end
617
- close_streams
618
- end
619
-
620
- def self.traverse_run(obj, threads, cpus, options = {}, &block)
621
- threads = nil if threads == 1
622
- cpus = nil if cpus == 1
623
- if ENV["RBBT_NO_MAP_REDUCE"] == "true" or (threads.nil? and cpus.nil?)
624
- traverse_obj obj, options, &block
625
- else
626
- if threads
627
- traverse_threads threads, obj, options, &block
628
- else
629
- close_streams = Misc.process_options(options, :close_streams) || []
630
- close_streams = [close_streams] unless Array === close_streams
631
-
632
- close_streams.concat(get_streams_to_close(obj))
633
- options[:close_streams] = close_streams
634
-
635
- if close_streams and close_streams.any?
636
- options[:cleanup] = Proc.new do
637
- close_streams.uniq.each do |s|
638
- s.close unless s.closed?
639
- end
640
- end
641
- end
642
-
643
- traverse_cpus cpus, obj, options, &block
644
- end
645
- end
646
- end
647
-
648
- def self.traverse_stream(obj, threads = nil, cpus = nil, options = {}, &block)
649
- into = options[:into]
650
-
651
- thread = Thread.new do
652
- begin
653
- traverse_run(obj, threads, cpus, options, &block)
654
- into.close if into.respond_to?(:close) and not (into.respond_to?(:closed?) and into.closed?)
655
- rescue Exception
656
- abort_stream obj
657
- abort_stream into
658
- raise $!
659
- end
660
- end
661
-
662
- ConcurrentStream.setup(obj_stream(into), :threads => thread)
663
- end
664
-
665
- def self.traverse(obj, options = {}, &block)
666
- into = options[:into]
667
-
668
- into = options[:into] = Open.open(into, :mode => "w") if Misc.is_filename?(into)
669
-
670
- case into
671
- when :stream
672
- sout = Misc.open_pipe false, false do |sin|
673
- begin
674
- traverse(obj, options.merge(:into => sin), &block)
675
- rescue Exception
676
- Log.exception $!
677
- begin
678
- sout.abort if sout.respond_to? :abort
679
- sout.join if sout.respond_to? :join
680
- ensure
681
- raise $!
682
- end
683
- end
684
- end
685
- return sout
686
- when :dumper
687
- obj_options = obj.respond_to?(:options) ? obj.options : {}
688
- dumper = TSV::Dumper.new obj_options.merge(options)
689
- dumper.init
690
- _options = options.merge(obj_options).merge(:into => dumper)
691
- traverse(obj, _options, &block)
692
- return dumper
693
- end
694
-
695
- threads = Misc.process_options options, :threads
696
- cpus = Misc.process_options options, :cpus
697
- threads = nil if threads and threads.to_i <= 1
698
- cpus = nil if cpus and cpus.to_i <= 1
699
-
700
- if options[:keys]
701
- case options[:keys]
702
- when TrueClass
703
- options[:type] = :keys
704
- when String
705
- options[:type] = :keys
706
- options[:key_field] = options[:keys]
707
- options[:fields] = []
708
- end
709
- end
710
-
711
- bar = Misc.process_options options, :bar
712
- bar ||= Misc.process_options options, :progress
713
- options[:bar] = case bar
714
- when String
715
- max = guess_max(obj)
716
- Log::ProgressBar.new_bar(max, {:desc => bar})
717
- when TrueClass
718
- max = guess_max(obj)
719
- Log::ProgressBar.new_bar(max, nil)
720
- when Numeric
721
- max = guess_max(obj)
722
- Log::ProgressBar.new_bar(bar)
723
- when Hash
724
- max = Misc.process_options(bar, :max) || max
725
- Log::ProgressBar.new_bar(max, bar)
726
- when Log::ProgressBar
727
- bar.max ||= guess_max(obj)
728
- bar
729
- else
730
- if (defined? Step and Step === bar)
731
- max = guess_max(obj)
732
- Log::ProgressBar.new_bar(max, {:desc => bar.status, :file => bar.file(:progress)})
733
- else
734
- bar
735
- end
736
- end
737
-
738
- if into
739
- bar = Misc.process_options options, :bar
740
-
741
- options[:join] = Proc.new do |error|
742
- error = false if error.nil?
743
- Log::ProgressBar.remove_bar(bar, error) if bar
744
- end if bar
745
-
746
- options[:callback] = Proc.new do |e|
747
- begin
748
- store_into into, e
749
- rescue Aborted
750
- Log.low "Aborted callback #{stream_name(obj)} #{Log.color :green, "->"} #{stream_name(options[:into])}"
751
- abort_stream(into, $!)
752
- raise $!
753
- rescue Exception
754
- Log.low "Exception callback #{stream_name(obj)} #{Log.color :green, "->"} #{stream_name(options[:into])}"
755
- abort_stream(into, $!)
756
- raise $!
757
- ensure
758
- bar.tick if bar
759
- end
760
- end
761
-
762
- bar.init if bar
763
- begin
764
- case into
765
- when TSV::Dumper, IO
766
- traverse_stream(obj, threads, cpus, options, &block)
767
- else
768
- traverse_run(obj, threads, cpus, options, &block)
769
- end
770
- rescue Exception
771
- Log.exception $!
772
- abort_stream(into, $!)
773
- raise $!
774
- end
775
-
776
- into
777
- else
778
- traverse_run(obj, threads, cpus, options, &block)
779
- end
780
- end
781
- end
1
+ #module TSV
2
+ #
3
+ # def self.obj_stream(obj)
4
+ # case obj
5
+ # when nil
6
+ # nil
7
+ # when (defined? Step and Step)
8
+ # obj.result
9
+ # #when IO, File, Zlib::GzipReader, Bgzf
10
+ # when IO, File, Zlib::GzipReader #, Bgzf
11
+ # obj
12
+ # when TSV::Dumper
13
+ # obj.stream
14
+ # when TSV::Parser
15
+ # obj.stream
16
+ # end
17
+ # end
18
+ #
19
+ # def self.guess_max(obj)
20
+ # begin
21
+ # case obj
22
+ # when (defined? Step and Step)
23
+ # if obj.done?
24
+ # path = obj.path
25
+ # path = path.find if path.respond_to? :find
26
+ # if File.exist? path
27
+ # CMD.cmd("wc -l '#{path}'").read.to_i
28
+ # else
29
+ # nil
30
+ # end
31
+ # else
32
+ # nil
33
+ # end
34
+ # when TSV
35
+ # obj.length
36
+ # when Array, Hash
37
+ # obj.size
38
+ # when File
39
+ # return nil if Open.gzip?(obj) or Open.bgzip?(obj)
40
+ # CMD.cmd("wc -l '#{obj.path}'").read.to_i
41
+ # when Path, String
42
+ # obj = obj.find if Path === obj
43
+ # if File.exist? obj
44
+ # return nil if Open.gzip?(obj) or Open.bgzip?(obj)
45
+ # CMD.cmd("wc -l '#{obj}'").read.to_i
46
+ # else
47
+ # nil
48
+ # end
49
+ # end
50
+ # rescue Exception
51
+ # Log.exception $!
52
+ # nil
53
+ # end
54
+ # end
55
+ #
56
+ # def self.stream_name(obj)
57
+ # return "nil" if obj.nil?
58
+ # #filename_obj = obj.respond_to?(:filename) ? obj.filename : nil
59
+ # #filename_obj ||= obj.respond_to?(:path) ? obj.path : nil
60
+ # stream_obj = obj_stream(obj) || obj
61
+ # obj.class.to_s << "-" << Misc.fingerprint(stream_obj)
62
+ # end
63
+ #
64
+ # def self.report(msg, obj, into)
65
+ # into = into[:into] if Hash === into and into.include? :into
66
+ #
67
+ # Log.low{"#{ msg } #{stream_name(obj)} -> #{stream_name(into)}"}
68
+ # end
69
+ #
70
+ # #{{{ TRAVERSE OBJECTS
71
+ #
72
+ # def self.traverse_tsv(tsv, options = {}, &block)
73
+ # callback, bar, join = Misc.process_options options, :callback, :bar, :join
74
+ #
75
+ # begin
76
+ # error = false
77
+ # fields = tsv.fields
78
+ # if callback
79
+ # bar.init if bar
80
+ # tsv.through options[:key_field], options[:fields] do |k,v|
81
+ # begin
82
+ # callback.call yield(k,v,fields)
83
+ # rescue Exception
84
+ # Log.warn "Traverse exception on element: #{Misc.fingerprint([k, v, fields])}"
85
+ # raise $!
86
+ # ensure
87
+ # bar.tick if bar
88
+ # end
89
+ # end
90
+ # else
91
+ # bar.init if bar
92
+ # tsv.through options[:key_field], options[:fields] do |k,v|
93
+ # begin
94
+ # yield k,v,fields
95
+ # ensure
96
+ # bar.tick if bar
97
+ # end
98
+ # end
99
+ # end
100
+ # rescue
101
+ # error = true
102
+ # raise $!
103
+ # ensure
104
+ # join.call(error) if join
105
+ # Log::ProgressBar.remove_bar(bar, error) if bar
106
+ # end
107
+ # end
108
+ #
109
+ # def self.traverse_hash(hash, options = {}, &block)
110
+ # callback, bar, join = Misc.process_options options, :callback, :bar, :join
111
+ #
112
+ # begin
113
+ # error = false
114
+ # if callback
115
+ # bar.init if bar
116
+ # hash.each do |k,v|
117
+ # begin
118
+ # callback.call yield(k,v)
119
+ # ensure
120
+ # bar.tick if bar
121
+ # end
122
+ # end
123
+ # else
124
+ # bar.init if bar
125
+ # hash.each do |k,v|
126
+ # begin
127
+ # yield k,v
128
+ # rescue Exception
129
+ # Log.warn "Traverse exception on element: #{Misc.fingerprint([k, v])}"
130
+ # raise $!
131
+ # ensure
132
+ # bar.tick if bar
133
+ # end
134
+ # end
135
+ # end
136
+ # rescue
137
+ # error = true
138
+ # raise $!
139
+ # ensure
140
+ # join.call(error) if join
141
+ # Log::ProgressBar.remove_bar(bar, error) if bar
142
+ # end
143
+ # end
144
+ #
145
+ # def self.traverse_array(array, options = {}, &block)
146
+ # callback, bar, join = Misc.process_options options, :callback, :bar, :join
147
+ #
148
+ # begin
149
+ # error = false
150
+ # if callback
151
+ # bar.init if bar
152
+ # array.each do |e|
153
+ # begin
154
+ # callback.call yield(e)
155
+ # rescue Exception
156
+ # Log.warn "Traverse exception on element: #{Misc.fingerprint(e)}"
157
+ # raise $!
158
+ # ensure
159
+ # bar.tick if bar
160
+ # end
161
+ # end
162
+ # else
163
+ # bar.init if bar
164
+ # array.each do |e|
165
+ # begin
166
+ # yield e
167
+ # rescue Exception
168
+ # Log.warn "Traverse exception on element: #{Misc.fingerprint(e)}"
169
+ # raise $!
170
+ # ensure
171
+ # bar.tick if bar
172
+ # end
173
+ # end
174
+ # end
175
+ #
176
+ # rescue
177
+ # error = true
178
+ # raise $!
179
+ # ensure
180
+ # join.call(error) if join
181
+ # Log::ProgressBar.remove_bar(bar, error) if bar
182
+ # end
183
+ # end
184
+ #
185
+ # def self.traverse_enumerable(enum, options = {}, &block)
186
+ # callback, bar, join = Misc.process_options options, :callback, :bar, :join
187
+ #
188
+ # begin
189
+ # error = false
190
+ # if callback
191
+ # bar.init if bar
192
+ # while enum.any?
193
+ # e = enum.pop
194
+ # begin
195
+ # callback.call yield(e)
196
+ # rescue Exception
197
+ # Log.warn "Traverse exception on element: #{Misc.fingerprint(e)}"
198
+ # raise $!
199
+ # ensure
200
+ # bar.tick if bar
201
+ # end
202
+ # end
203
+ # else
204
+ # bar.init if bar
205
+ # while enum.any?
206
+ # e = enum.pop
207
+ # begin
208
+ # yield e
209
+ # rescue Exception
210
+ # Log.warn "Traverse exception on element: #{Misc.fingerprint(e)}"
211
+ # raise $!
212
+ # ensure
213
+ # bar.tick if bar
214
+ # end
215
+ # end
216
+ # end
217
+ #
218
+ # rescue
219
+ # error = true
220
+ # raise $!
221
+ # ensure
222
+ # join.call(error) if join
223
+ # Log::ProgressBar.remove_bar(bar, error) if bar
224
+ # end
225
+ # end
226
+ #
227
+ # def self.traverse_priority_queue(queue, options = {}, &block)
228
+ # callback, bar, join = Misc.process_options options, :callback, :bar, :join
229
+ #
230
+ # begin
231
+ # error = false
232
+ # if callback
233
+ # bar.init if bar
234
+ # while queue.any?
235
+ # e = queue.pop
236
+ # begin
237
+ # callback.call yield(e)
238
+ # rescue Exception
239
+ # Log.warn "Traverse exception on element: #{Misc.fingerprint(e)}"
240
+ # raise $!
241
+ # ensure
242
+ # bar.tick if bar
243
+ # end
244
+ # end
245
+ # else
246
+ # bar.init if bar
247
+ # while queue.any?
248
+ # e = queue.pop
249
+ # begin
250
+ # yield e
251
+ # rescue Exception
252
+ # Log.warn "Traverse exception on element: #{Misc.fingerprint(e)}"
253
+ # raise $!
254
+ # ensure
255
+ # bar.tick if bar
256
+ # end
257
+ # end
258
+ # end
259
+ #
260
+ # rescue
261
+ # error = true
262
+ # raise $!
263
+ # ensure
264
+ # join.call(error) if join
265
+ # Log::ProgressBar.remove_bar(bar, error) if bar
266
+ # end
267
+ # end
268
+ #
269
+ # def self.traverse_io_array(io, options = {}, &block)
270
+ # callback, bar, join = Misc.process_options options, :callback, :bar, :join
271
+ # begin
272
+ # error = false
273
+ # if File === io and io.closed?
274
+ # begin
275
+ # Log.low{"Rewinding stream #{stream_name(io)}"}
276
+ # io.reopen io.filename, "r"
277
+ # rescue
278
+ # Log.exception $!
279
+ # raise "File closed and could not reopen #{stream_name(io)}"
280
+ # end
281
+ # end
282
+ #
283
+ # if callback
284
+ # bar.init if bar
285
+ # while line = io.gets
286
+ # if line[-1] != "\n"
287
+ # while c = io.getc
288
+ # line << c
289
+ # break if c=="\n"
290
+ # end
291
+ # end
292
+ # begin
293
+ # callback.call yield line.chomp
294
+ # rescue Exception
295
+ # Log.warn "Traverse exception on element: #{Misc.fingerprint(line)}"
296
+ # raise $!
297
+ # ensure
298
+ # bar.tick if bar
299
+ # end
300
+ # end
301
+ # else
302
+ # bar.init if bar
303
+ # while line = io.gets
304
+ # begin
305
+ # yield line.chomp
306
+ # rescue Exception
307
+ # Log.warn "Traverse exception on element: #{Misc.fingerprint(line)}"
308
+ # raise $!
309
+ # ensure
310
+ # bar.tick if bar
311
+ # end
312
+ # end
313
+ # end
314
+ # rescue
315
+ # error = true
316
+ # raise $!
317
+ # ensure
318
+ # join.call(error) if join
319
+ # Log::ProgressBar.remove_bar(bar, error) if bar
320
+ # end
321
+ # end
322
+ #
323
+ # def self.traverse_io(io, options = {}, &block)
324
+ # callback, bar, join = Misc.process_options options, :callback, :bar, :join
325
+ #
326
+ # begin
327
+ # error = false
328
+ # if File === io and io.closed?
329
+ # begin
330
+ # Log.low{"Rewinding stream #{stream_name(io)}"}
331
+ # io.reopen io.filename, "r"
332
+ # rescue
333
+ # Log.exception $!
334
+ # raise "File closed and could not reopen #{stream_name(io)}"
335
+ # end
336
+ # end
337
+ #
338
+ # options[:monitor] = bar
339
+ # if callback
340
+ # bar.init if bar
341
+ # exception = nil
342
+ # begin
343
+ # TSV::Parser.traverse(io, options) do |k,v,f|
344
+ # begin
345
+ # callback.call yield k, v, f
346
+ # rescue Exception
347
+ # exception = $!
348
+ # Log.warn "Traverse exception on element: #{Misc.fingerprint([k, v, f])}"
349
+ # raise $!
350
+ # end
351
+ # bar.tick if bar
352
+ # end
353
+ # ensure
354
+ # raise exception if exception
355
+ # end
356
+ # else
357
+ # TSV::Parser.traverse(io, options.merge(:monitor => bar), &block)
358
+ # end
359
+ # rescue
360
+ # error = true
361
+ # raise $!
362
+ # ensure
363
+ # join.call(error) if join
364
+ # Log::ProgressBar.remove_bar(bar, error) if bar
365
+ # end
366
+ # end
367
+ #
368
+ # def self.traverse_obj(obj, options = {}, &block)
369
+ # if options[:type] == :keys
370
+ # options[:fields] = []
371
+ # options[:type] = :single
372
+ # end
373
+ #
374
+ # name = stream_name(obj)
375
+ # Log.low{"Traversing #{name} #{Log.color :green, "->"} #{stream_name(options[:into])}"}
376
+ # begin
377
+ # case obj
378
+ # when (defined? FastContainers && FastContainers::PriorityQueue === obj)
379
+ # traverse_priority_queue(obj, options, &block)
380
+ # when TSV
381
+ # traverse_tsv(obj, options, &block)
382
+ # when Hash
383
+ # traverse_hash(obj, options, &block)
384
+ # when TSV::Parser
385
+ # callback = Misc.process_options options, :callback
386
+ # if callback
387
+ # obj.traverse(options) do |k,v|
388
+ # callback.call yield k, v
389
+ # end
390
+ # else
391
+ # obj.traverse(options, &block)
392
+ # end
393
+ # #when IO, File, Zlib::GzipReader, Bgzf, StringIO
394
+ # when IO, File, Zlib::GzipReader, StringIO
395
+ # begin
396
+ # if options[:type] == :array or options[:type] == :line
397
+ # traverse_io_array(obj, options, &block)
398
+ # else
399
+ # traverse_io(obj, options, &block)
400
+ # end
401
+ # rescue Aborted
402
+ # obj.abort if obj.respond_to? :abort
403
+ # raise $!
404
+ # rescue Exception
405
+ # obj.abort if obj.respond_to? :abort
406
+ # raise $!
407
+ # ensure
408
+ # obj.close if obj.respond_to? :close and not obj.closed?
409
+ # obj.join if obj.respond_to? :join
410
+ # end
411
+ # when Path
412
+ # obj.open do |stream|
413
+ # traverse_obj(stream, options, &block)
414
+ # end
415
+ # when TSV::Dumper
416
+ # traverse_obj(obj.stream, options, &block)
417
+ # when (defined? Step and Step)
418
+ #
419
+ # obj.clean if obj.aborted? or obj.recoverable_error?
420
+ # obj.run(true) unless obj.done? || obj.started? || obj.result
421
+ #
422
+ # stream = obj.get_stream
423
+ # options = {:type => :array}.merge(options) if obj.result_type == :array
424
+ #
425
+ # if stream
426
+ # traverse_obj(stream, options, &block)
427
+ # else
428
+ # obj.join
429
+ # traverse_obj(obj.path, options, &block)
430
+ # end
431
+ # when Array
432
+ # traverse_array(obj, options, &block)
433
+ # when Set
434
+ # traverse_array(obj.to_a, options, &block)
435
+ # when String
436
+ # if Open.remote?(obj) || Open.ssh?(obj) || Misc.is_filename?(obj)
437
+ # Open.open(obj) do |s|
438
+ # traverse_obj(s, options, &block)
439
+ # end
440
+ # else
441
+ # raise "Can not open obj for traversal #{Misc.fingerprint obj}"
442
+ # end
443
+ # when Enumerable
444
+ # traverse_enumerable(obj, options, &block)
445
+ # when nil
446
+ # raise "Can not traverse nil object into #{stream_name(options[:into])}"
447
+ # else
448
+ # raise "Unknown object for traversal: #{Misc.fingerprint obj }"
449
+ # end
450
+ # rescue IOError
451
+ # Log.low{"IOError traversing #{stream_name(obj)}: #{$!.message}"}
452
+ # abort_stream obj
453
+ # abort_stream options[:into], $!
454
+ # raise $!
455
+ # rescue Errno::EPIPE
456
+ # Log.low{"Pipe closed while traversing #{stream_name(obj)}: #{$!.message}"}
457
+ # abort_stream obj
458
+ # abort_stream options[:into], $!
459
+ # raise $!
460
+ # rescue Aborted
461
+ # Log.low{"Aborted traversing #{stream_name(obj)}"}
462
+ # abort_stream obj
463
+ # abort_stream options[:into], $!
464
+ # raise $!
465
+ # rescue Exception
466
+ # Log.low{"Exception traversing #{stream_name(obj)}"}
467
+ # abort_stream obj unless String === obj
468
+ # abort_stream options[:into], $!
469
+ # raise $!
470
+ # end
471
+ # end
472
+ #
473
+ # def self.traverse_threads(num, obj, options, &block)
474
+ # callback = Misc.process_options options, :callback
475
+ #
476
+ # q = RbbtThreadQueue.new num
477
+ #
478
+ # if callback
479
+ # block = Proc.new do |*args|
480
+ # mutex = args.pop
481
+ # res = yield *args
482
+ # mutex.synchronize do
483
+ # callback.call res
484
+ # end
485
+ # end
486
+ # end
487
+ #
488
+ # q.init true, &block
489
+ #
490
+ # traverse_obj(obj, options) do |*p|
491
+ # q.process p
492
+ # end
493
+ #
494
+ # q.join
495
+ # nil
496
+ # end
497
+ #
498
+ # def self.traverse_cpus(num, obj, options, &block)
499
+ # begin
500
+ # error = false
501
+ #
502
+ # callback, cleanup, join, respawn, bar = Misc.process_options options, :callback, :cleanup, :join, :respawn, :bar
503
+ # respawn = true if ENV["RBBT_RESPAWN"] and ENV["RBBT_RESPAWN"] == "true"
504
+ #
505
+ # Log.low "Traversing in #{ num } cpus: #{respawn ? "respawn" : "no respawn"}"
506
+ # q = RbbtProcessQueue.new num, cleanup, join, respawn, !!bar
507
+ # callback = Proc.new{ bar.tick } if callback.nil? and bar
508
+ # q.callback &callback
509
+ # q.init &block
510
+ #
511
+ # bar.init if bar
512
+ # traverse_obj(obj, options) do |*p|
513
+ # q.process *p
514
+ # end
515
+ #
516
+ # q.join
517
+ #
518
+ # rescue Interrupt, Aborted
519
+ # error = true
520
+ # Log.low{"Aborted traversal in CPUs for #{stream_name(obj) || Misc.fingerprint(obj)}: #{$!.backtrace*","}"}
521
+ # q.abort
522
+ # stream = obj_stream(obj)
523
+ # stream.abort if stream.respond_to? :abort
524
+ # stream = obj_stream(options[:into])
525
+ # stream.abort if stream.respond_to? :abort
526
+ # q.join
527
+ # raise "Traversal aborted"
528
+ # rescue Exception
529
+ # error = true
530
+ # Log.low{"Exception during traversal in CPUs for #{stream_name(obj) || Misc.fingerprint(obj)}: #{$!.message}"}
531
+ # q.abort
532
+ # stream = obj_stream(obj)
533
+ # stream.abort if stream.respond_to? :abort
534
+ # stream = obj_stream(options[:into])
535
+ # stream.abort if stream.respond_to? :abort
536
+ # q.join
537
+ # raise $!
538
+ # ensure
539
+ # Log::ProgressBar.remove_bar(bar, error) if bar
540
+ # end
541
+ # end
542
+ #
543
+ # def self.store_into(store, value)
544
+ # if MultipleResult === value
545
+ # value.each do |v|
546
+ # store_into store, v
547
+ # end
548
+ # return
549
+ # end
550
+ # begin
551
+ # return false if value.nil?
552
+ # case store
553
+ # when TSV
554
+ # if store.type == :double or store.type == :flat
555
+ # case value
556
+ # when TSV, Hash
557
+ # store.merge_zip value
558
+ # else
559
+ # store.zip_new *value
560
+ # end
561
+ # else
562
+ # k,v = value
563
+ # store[k] = v
564
+ # end
565
+ # when Hash
566
+ # case value
567
+ # when TSV, Hash
568
+ # store.merge! value
569
+ # else
570
+ # k,v = value
571
+ # store[k] = v
572
+ # end
573
+ # when TSV::Dumper
574
+ # return false if value.nil?
575
+ # store.add *value
576
+ # when IO
577
+ # return false if value.nil?
578
+ # value.chomp!
579
+ # store.puts value
580
+ # else
581
+ # store << value
582
+ # end
583
+ # true
584
+ # rescue Aborted, Interrupt
585
+ # Log.low "Aborted storing into #{Misc.fingerprint store}"
586
+ # abort_stream(store, $!)
587
+ # raise $!
588
+ # rescue Exception
589
+ # Log.low "Exception storing into #{Misc.fingerprint store}: #{$!.message}"
590
+ # abort_stream(store, $!)
591
+ # raise $!
592
+ # end
593
+ # end
594
+ #
595
+ # def self.get_streams_to_close(obj)
596
+ # close_streams = []
597
+ # case obj
598
+ # when IO, File
599
+ # close_streams << obj
600
+ # when TSV::Parser
601
+ # when TSV::Dumper
602
+ # close_streams << obj.result.in_stream
603
+ # when (defined? Step and Step)
604
+ # obj.mutex.synchronize do
605
+ # case obj.result
606
+ # when IO
607
+ # close_streams << obj.result
608
+ # when TSV::Dumper
609
+ # close_streams << obj.result.in_stream
610
+ # end
611
+ # end
612
+ # obj.inputs.each do |input|
613
+ # close_streams = get_streams_to_close(input) + close_streams
614
+ # end
615
+ # obj.dependencies.each do |dependency|
616
+ # close_streams = get_streams_to_close(dependency) + close_streams
617
+ # end
618
+ # end
619
+ # close_streams
620
+ # end
621
+ #
622
+ # def self.traverse_run(obj, threads, cpus, options = {}, &block)
623
+ # threads = nil if threads == 1
624
+ # cpus = nil if cpus == 1
625
+ # if ENV["RBBT_NO_MAP_REDUCE"] == "true" or (threads.nil? and cpus.nil?)
626
+ # traverse_obj obj, options, &block
627
+ # else
628
+ # if threads
629
+ # traverse_threads threads, obj, options, &block
630
+ # else
631
+ # close_streams = Misc.process_options(options, :close_streams) || []
632
+ # close_streams = [close_streams] unless Array === close_streams
633
+ #
634
+ # close_streams.concat(get_streams_to_close(obj))
635
+ # options[:close_streams] = close_streams
636
+ #
637
+ # if close_streams and close_streams.any?
638
+ # options[:cleanup] = Proc.new do
639
+ # close_streams.uniq.each do |s|
640
+ # s.close unless s.closed?
641
+ # end
642
+ # end
643
+ # end
644
+ #
645
+ # traverse_cpus cpus, obj, options, &block
646
+ # end
647
+ # end
648
+ # end
649
+ #
650
+ # def self.traverse_stream(obj, threads = nil, cpus = nil, options = {}, &block)
651
+ # into = options[:into]
652
+ #
653
+ # thread = Thread.new do
654
+ # begin
655
+ # traverse_run(obj, threads, cpus, options, &block)
656
+ # into.close if into.respond_to?(:close) and not (into.respond_to?(:closed?) and into.closed?)
657
+ # rescue Exception
658
+ # abort_stream obj
659
+ # abort_stream into
660
+ # raise $!
661
+ # end
662
+ # end
663
+ #
664
+ # ConcurrentStream.setup(obj_stream(into), :threads => thread)
665
+ # end
666
+ #
667
+ # def self.traverse(obj, options = {}, &block)
668
+ # into = options[:into]
669
+ #
670
+ # into = options[:into] = Open.open(into, :mode => "w") if Misc.is_filename?(into)
671
+ #
672
+ # case into
673
+ # when :stream
674
+ # sout = Misc.open_pipe false, false do |sin|
675
+ # begin
676
+ # traverse(obj, options.merge(:into => sin), &block)
677
+ # rescue Exception
678
+ # Log.exception $!
679
+ # begin
680
+ # sout.abort if sout.respond_to? :abort
681
+ # sout.join if sout.respond_to? :join
682
+ # ensure
683
+ # raise $!
684
+ # end
685
+ # end
686
+ # end
687
+ # return sout
688
+ # when :dumper
689
+ # obj_options = obj.respond_to?(:options) ? obj.options : {}
690
+ # dumper = TSV::Dumper.new obj_options.merge(options)
691
+ # dumper.init
692
+ # _options = options.merge(obj_options).merge(:into => dumper)
693
+ # traverse(obj, _options, &block)
694
+ # return dumper
695
+ # end
696
+ #
697
+ # threads = Misc.process_options options, :threads
698
+ # cpus = Misc.process_options options, :cpus
699
+ # threads = nil if threads and threads.to_i <= 1
700
+ # cpus = nil if cpus and cpus.to_i <= 1
701
+ #
702
+ # if options[:keys]
703
+ # case options[:keys]
704
+ # when TrueClass
705
+ # options[:type] = :keys
706
+ # when String
707
+ # options[:type] = :keys
708
+ # options[:key_field] = options[:keys]
709
+ # options[:fields] = []
710
+ # end
711
+ # end
712
+ #
713
+ # bar = Misc.process_options options, :bar
714
+ # bar ||= Misc.process_options options, :progress
715
+ # options[:bar] = case bar
716
+ # when String
717
+ # max = guess_max(obj)
718
+ # Log::ProgressBar.new_bar(max, {:desc => bar})
719
+ # when TrueClass
720
+ # max = guess_max(obj)
721
+ # Log::ProgressBar.new_bar(max, nil)
722
+ # when Numeric
723
+ # max = guess_max(obj)
724
+ # Log::ProgressBar.new_bar(bar)
725
+ # when Hash
726
+ # max = Misc.process_options(bar, :max) || max
727
+ # Log::ProgressBar.new_bar(max, bar)
728
+ # when Log::ProgressBar
729
+ # bar.max ||= guess_max(obj)
730
+ # bar
731
+ # else
732
+ # if (defined? Step and Step === bar)
733
+ # max = guess_max(obj)
734
+ # Log::ProgressBar.new_bar(max, {:desc => bar.status, :file => bar.file(:progress)})
735
+ # else
736
+ # bar
737
+ # end
738
+ # end
739
+ #
740
+ # if into
741
+ # bar = Misc.process_options options, :bar
742
+ #
743
+ # options[:join] = Proc.new do |error|
744
+ # error = false if error.nil?
745
+ # Log::ProgressBar.remove_bar(bar, error) if bar
746
+ # end if bar
747
+ #
748
+ # options[:callback] = Proc.new do |e|
749
+ # begin
750
+ # store_into into, e
751
+ # rescue Aborted
752
+ # Log.low "Aborted callback #{stream_name(obj)} #{Log.color :green, "->"} #{stream_name(options[:into])}"
753
+ # abort_stream(into, $!)
754
+ # raise $!
755
+ # rescue Exception
756
+ # Log.low "Exception callback #{stream_name(obj)} #{Log.color :green, "->"} #{stream_name(options[:into])}"
757
+ # abort_stream(into, $!)
758
+ # raise $!
759
+ # ensure
760
+ # bar.tick if bar
761
+ # end
762
+ # end
763
+ #
764
+ # bar.init if bar
765
+ # begin
766
+ # case into
767
+ # when TSV::Dumper, IO
768
+ # traverse_stream(obj, threads, cpus, options, &block)
769
+ # else
770
+ # traverse_run(obj, threads, cpus, options, &block)
771
+ # end
772
+ # rescue Exception
773
+ # Log.exception $!
774
+ # abort_stream(into, $!)
775
+ # raise $!
776
+ # end
777
+ #
778
+ # into
779
+ # else
780
+ # traverse_run(obj, threads, cpus, options, &block)
781
+ # end
782
+ # end
783
+ #end