rbbt-util 5.44.1 → 6.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (175) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +1 -1
  3. data/bin/rbbt +67 -90
  4. data/bin/rbbt_exec.rb +2 -2
  5. data/etc/app.d/base.rb +2 -2
  6. data/etc/app.d/semaphores.rb +3 -3
  7. data/lib/rbbt/annotations/annotated_array.rb +207 -207
  8. data/lib/rbbt/annotations/refactor.rb +27 -0
  9. data/lib/rbbt/annotations/util.rb +282 -282
  10. data/lib/rbbt/annotations.rb +343 -320
  11. data/lib/rbbt/association/database.rb +200 -225
  12. data/lib/rbbt/association/index.rb +294 -291
  13. data/lib/rbbt/association/item.rb +227 -227
  14. data/lib/rbbt/association/open.rb +35 -34
  15. data/lib/rbbt/association/util.rb +0 -169
  16. data/lib/rbbt/association.rb +2 -4
  17. data/lib/rbbt/entity/identifiers.rb +119 -118
  18. data/lib/rbbt/entity/refactor.rb +12 -0
  19. data/lib/rbbt/entity.rb +319 -315
  20. data/lib/rbbt/hpc/batch.rb +72 -53
  21. data/lib/rbbt/hpc/lsf.rb +2 -2
  22. data/lib/rbbt/hpc/orchestrate/batches.rb +2 -2
  23. data/lib/rbbt/hpc/orchestrate/chains.rb +25 -5
  24. data/lib/rbbt/hpc/orchestrate/rules.rb +2 -2
  25. data/lib/rbbt/hpc/orchestrate.rb +19 -13
  26. data/lib/rbbt/hpc/slurm.rb +18 -18
  27. data/lib/rbbt/knowledge_base/entity.rb +13 -5
  28. data/lib/rbbt/knowledge_base/query.rb +2 -2
  29. data/lib/rbbt/knowledge_base/registry.rb +32 -31
  30. data/lib/rbbt/knowledge_base/traverse.rb +1 -1
  31. data/lib/rbbt/knowledge_base.rb +1 -1
  32. data/lib/rbbt/monitor.rb +36 -25
  33. data/lib/rbbt/persist/refactor.rb +166 -0
  34. data/lib/rbbt/persist/tsv/tokyocabinet.rb +105 -105
  35. data/lib/rbbt/persist/tsv.rb +187 -185
  36. data/lib/rbbt/persist.rb +556 -551
  37. data/lib/rbbt/refactor.rb +20 -0
  38. data/lib/rbbt/resource/path/refactor.rb +178 -0
  39. data/lib/rbbt/resource/path.rb +317 -497
  40. data/lib/rbbt/resource/util.rb +0 -48
  41. data/lib/rbbt/resource.rb +3 -390
  42. data/lib/rbbt/tsv/accessor.rb +2 -838
  43. data/lib/rbbt/tsv/attach.rb +303 -299
  44. data/lib/rbbt/tsv/change_id.rb +244 -245
  45. data/lib/rbbt/tsv/csv.rb +87 -85
  46. data/lib/rbbt/tsv/dumper.rb +2 -100
  47. data/lib/rbbt/tsv/excel.rb +26 -24
  48. data/lib/rbbt/tsv/field_index.rb +4 -1
  49. data/lib/rbbt/tsv/filter.rb +3 -2
  50. data/lib/rbbt/tsv/index.rb +2 -284
  51. data/lib/rbbt/tsv/manipulate.rb +750 -747
  52. data/lib/rbbt/tsv/marshal.rb +3 -3
  53. data/lib/rbbt/tsv/matrix.rb +2 -2
  54. data/lib/rbbt/tsv/parallel/through.rb +2 -1
  55. data/lib/rbbt/tsv/parallel/traverse.rb +783 -781
  56. data/lib/rbbt/tsv/parser.rb +678 -678
  57. data/lib/rbbt/tsv/refactor.rb +195 -0
  58. data/lib/rbbt/tsv/stream.rb +253 -251
  59. data/lib/rbbt/tsv/util.rb +420 -420
  60. data/lib/rbbt/tsv.rb +210 -208
  61. data/lib/rbbt/util/R/eval.rb +4 -4
  62. data/lib/rbbt/util/R/plot.rb +62 -166
  63. data/lib/rbbt/util/R.rb +21 -18
  64. data/lib/rbbt/util/cmd.rb +2 -318
  65. data/lib/rbbt/util/color.rb +269 -269
  66. data/lib/rbbt/util/colorize.rb +89 -89
  67. data/lib/rbbt/util/concurrency/processes/refactor.rb +22 -0
  68. data/lib/rbbt/util/concurrency/processes/worker.rb +2 -2
  69. data/lib/rbbt/util/concurrency/processes.rb +389 -386
  70. data/lib/rbbt/util/config.rb +169 -167
  71. data/lib/rbbt/util/filecache.rb +1 -1
  72. data/lib/rbbt/util/iruby.rb +20 -0
  73. data/lib/rbbt/util/log/progress/report.rb +241 -241
  74. data/lib/rbbt/util/log/progress/util.rb +99 -99
  75. data/lib/rbbt/util/log/progress.rb +102 -102
  76. data/lib/rbbt/util/log/refactor.rb +49 -0
  77. data/lib/rbbt/util/log.rb +486 -532
  78. data/lib/rbbt/util/migrate.rb +2 -2
  79. data/lib/rbbt/util/misc/concurrent_stream.rb +248 -246
  80. data/lib/rbbt/util/misc/development.rb +12 -11
  81. data/lib/rbbt/util/misc/exceptions.rb +117 -112
  82. data/lib/rbbt/util/misc/format.rb +2 -230
  83. data/lib/rbbt/util/misc/indiferent_hash.rb +2 -107
  84. data/lib/rbbt/util/misc/inspect.rb +2 -476
  85. data/lib/rbbt/util/misc/lock.rb +109 -106
  86. data/lib/rbbt/util/misc/omics.rb +9 -1
  87. data/lib/rbbt/util/misc/pipes.rb +765 -793
  88. data/lib/rbbt/util/misc/refactor.rb +20 -0
  89. data/lib/rbbt/util/misc/ssw.rb +27 -17
  90. data/lib/rbbt/util/misc/system.rb +92 -105
  91. data/lib/rbbt/util/misc.rb +39 -20
  92. data/lib/rbbt/util/named_array/refactor.rb +4 -0
  93. data/lib/rbbt/util/named_array.rb +3 -220
  94. data/lib/rbbt/util/open/refactor.rb +7 -0
  95. data/lib/rbbt/util/open.rb +3 -857
  96. data/lib/rbbt/util/procpath.rb +6 -6
  97. data/lib/rbbt/util/python/paths.rb +27 -0
  98. data/lib/rbbt/util/python/run.rb +115 -0
  99. data/lib/rbbt/util/python/script.rb +110 -0
  100. data/lib/rbbt/util/python/util.rb +3 -3
  101. data/lib/rbbt/util/python.rb +22 -81
  102. data/lib/rbbt/util/semaphore.rb +152 -148
  103. data/lib/rbbt/util/simpleopt.rb +9 -8
  104. data/lib/rbbt/util/ssh/refactor.rb +19 -0
  105. data/lib/rbbt/util/ssh.rb +122 -118
  106. data/lib/rbbt/util/tar.rb +117 -115
  107. data/lib/rbbt/util/tmpfile.rb +69 -67
  108. data/lib/rbbt/util/version.rb +2 -0
  109. data/lib/rbbt/workflow/refactor/entity.rb +11 -0
  110. data/lib/rbbt/workflow/refactor/export.rb +66 -0
  111. data/lib/rbbt/workflow/refactor/inputs.rb +24 -0
  112. data/lib/rbbt/workflow/refactor/recursive.rb +64 -0
  113. data/lib/rbbt/workflow/refactor/task_info.rb +66 -0
  114. data/lib/rbbt/workflow/refactor.rb +150 -0
  115. data/lib/rbbt/workflow/remote_workflow/driver/rest.rb +1 -2
  116. data/lib/rbbt/workflow/remote_workflow/driver/ssh.rb +55 -32
  117. data/lib/rbbt/workflow/remote_workflow/remote_step/rest.rb +3 -1
  118. data/lib/rbbt/workflow/remote_workflow/remote_step/ssh.rb +14 -5
  119. data/lib/rbbt/workflow/remote_workflow/remote_step.rb +19 -7
  120. data/lib/rbbt/workflow/remote_workflow.rb +6 -1
  121. data/lib/rbbt/workflow/step/run.rb +766 -766
  122. data/lib/rbbt/workflow/step/save_load_inputs.rb +254 -254
  123. data/lib/rbbt/workflow/step.rb +2 -362
  124. data/lib/rbbt/workflow/task.rb +118 -118
  125. data/lib/rbbt/workflow/usage.rb +289 -287
  126. data/lib/rbbt/workflow/util/archive.rb +6 -5
  127. data/lib/rbbt/workflow/util/data.rb +1 -1
  128. data/lib/rbbt/workflow/util/orchestrator.rb +249 -246
  129. data/lib/rbbt/workflow/util/trace.rb +79 -44
  130. data/lib/rbbt/workflow.rb +4 -882
  131. data/lib/rbbt-util.rb +21 -13
  132. data/lib/rbbt.rb +16 -3
  133. data/python/rbbt/__init__.py +96 -4
  134. data/python/rbbt/workflow/remote.py +104 -0
  135. data/python/rbbt/workflow.py +64 -0
  136. data/python/test.py +10 -0
  137. data/share/Rlib/plot.R +37 -37
  138. data/share/Rlib/svg.R +22 -5
  139. data/share/install/software/lib/install_helpers +1 -1
  140. data/share/rbbt_commands/hpc/list +2 -3
  141. data/share/rbbt_commands/hpc/orchestrate +4 -4
  142. data/share/rbbt_commands/hpc/tail +2 -0
  143. data/share/rbbt_commands/hpc/task +10 -7
  144. data/share/rbbt_commands/lsf/list +2 -3
  145. data/share/rbbt_commands/lsf/orchestrate +4 -4
  146. data/share/rbbt_commands/lsf/tail +2 -0
  147. data/share/rbbt_commands/lsf/task +10 -7
  148. data/share/rbbt_commands/migrate +1 -1
  149. data/share/rbbt_commands/pbs/list +2 -3
  150. data/share/rbbt_commands/pbs/orchestrate +4 -4
  151. data/share/rbbt_commands/pbs/tail +2 -0
  152. data/share/rbbt_commands/pbs/task +10 -7
  153. data/share/rbbt_commands/resource/produce +8 -1
  154. data/share/rbbt_commands/slurm/list +2 -3
  155. data/share/rbbt_commands/slurm/orchestrate +4 -4
  156. data/share/rbbt_commands/slurm/tail +2 -0
  157. data/share/rbbt_commands/slurm/task +10 -7
  158. data/share/rbbt_commands/system/clean +5 -5
  159. data/share/rbbt_commands/system/status +5 -5
  160. data/share/rbbt_commands/tsv/get +2 -3
  161. data/share/rbbt_commands/tsv/info +10 -13
  162. data/share/rbbt_commands/tsv/keys +18 -14
  163. data/share/rbbt_commands/tsv/slice +2 -2
  164. data/share/rbbt_commands/tsv/transpose +6 -2
  165. data/share/rbbt_commands/workflow/info +20 -24
  166. data/share/rbbt_commands/workflow/list +1 -1
  167. data/share/rbbt_commands/workflow/prov +20 -13
  168. data/share/rbbt_commands/workflow/retry +43 -0
  169. data/share/rbbt_commands/workflow/server +12 -2
  170. data/share/rbbt_commands/workflow/task +80 -73
  171. data/share/rbbt_commands/workflow/write_info +26 -9
  172. data/share/software/opt/ssw/ssw.c +861 -0
  173. data/share/software/opt/ssw/ssw.h +130 -0
  174. data/share/workflow_config.ru +3 -3
  175. metadata +45 -6
@@ -1,781 +1,783 @@
1
- module TSV
2
-
3
- def self.obj_stream(obj)
4
- case obj
5
- when nil
6
- nil
7
- when (defined? Step and Step)
8
- obj.result
9
- when IO, File, Zlib::GzipReader, Bgzf
10
- obj
11
- when TSV::Dumper
12
- obj.stream
13
- when TSV::Parser
14
- obj.stream
15
- end
16
- end
17
-
18
- def self.guess_max(obj)
19
- begin
20
- case obj
21
- when (defined? Step and Step)
22
- if obj.done?
23
- path = obj.path
24
- path = path.find if path.respond_to? :find
25
- if File.exist? path
26
- CMD.cmd("wc -l '#{path}'").read.to_i
27
- else
28
- nil
29
- end
30
- else
31
- nil
32
- end
33
- when TSV
34
- obj.length
35
- when Array, Hash
36
- obj.size
37
- when File
38
- return nil if Open.gzip?(obj) or Open.bgzip?(obj)
39
- CMD.cmd("wc -l '#{obj.path}'").read.to_i
40
- when Path, String
41
- obj = obj.find if Path === obj
42
- if File.exist? obj
43
- return nil if Open.gzip?(obj) or Open.bgzip?(obj)
44
- CMD.cmd("wc -l '#{obj}'").read.to_i
45
- else
46
- nil
47
- end
48
- end
49
- rescue Exception
50
- Log.exception $!
51
- nil
52
- end
53
- end
54
-
55
- def self.stream_name(obj)
56
- return "nil" if obj.nil?
57
- #filename_obj = obj.respond_to?(:filename) ? obj.filename : nil
58
- #filename_obj ||= obj.respond_to?(:path) ? obj.path : nil
59
- stream_obj = obj_stream(obj) || obj
60
- obj.class.to_s << "-" << Misc.fingerprint(stream_obj)
61
- end
62
-
63
- def self.report(msg, obj, into)
64
- into = into[:into] if Hash === into and into.include? :into
65
-
66
- Log.low{"#{ msg } #{stream_name(obj)} -> #{stream_name(into)}"}
67
- end
68
-
69
- #{{{ TRAVERSE OBJECTS
70
-
71
- def self.traverse_tsv(tsv, options = {}, &block)
72
- callback, bar, join = Misc.process_options options, :callback, :bar, :join
73
-
74
- begin
75
- error = false
76
- fields = tsv.fields
77
- if callback
78
- bar.init if bar
79
- tsv.through options[:key_field], options[:fields] do |k,v|
80
- begin
81
- callback.call yield(k,v,fields)
82
- rescue Exception
83
- Log.warn "Traverse exception on element: #{Misc.fingerprint([k, v, fields])}"
84
- raise $!
85
- ensure
86
- bar.tick if bar
87
- end
88
- end
89
- else
90
- bar.init if bar
91
- tsv.through options[:key_field], options[:fields] do |k,v|
92
- begin
93
- yield k,v,fields
94
- ensure
95
- bar.tick if bar
96
- end
97
- end
98
- end
99
- rescue
100
- error = true
101
- raise $!
102
- ensure
103
- join.call(error) if join
104
- Log::ProgressBar.remove_bar(bar, error) if bar
105
- end
106
- end
107
-
108
- def self.traverse_hash(hash, options = {}, &block)
109
- callback, bar, join = Misc.process_options options, :callback, :bar, :join
110
-
111
- begin
112
- error = false
113
- if callback
114
- bar.init if bar
115
- hash.each do |k,v|
116
- begin
117
- callback.call yield(k,v)
118
- ensure
119
- bar.tick if bar
120
- end
121
- end
122
- else
123
- bar.init if bar
124
- hash.each do |k,v|
125
- begin
126
- yield k,v
127
- rescue Exception
128
- Log.warn "Traverse exception on element: #{Misc.fingerprint([k, v])}"
129
- raise $!
130
- ensure
131
- bar.tick if bar
132
- end
133
- end
134
- end
135
- rescue
136
- error = true
137
- raise $!
138
- ensure
139
- join.call(error) if join
140
- Log::ProgressBar.remove_bar(bar, error) if bar
141
- end
142
- end
143
-
144
- def self.traverse_array(array, options = {}, &block)
145
- callback, bar, join = Misc.process_options options, :callback, :bar, :join
146
-
147
- begin
148
- error = false
149
- if callback
150
- bar.init if bar
151
- array.each do |e|
152
- begin
153
- callback.call yield(e)
154
- rescue Exception
155
- Log.warn "Traverse exception on element: #{Misc.fingerprint(e)}"
156
- raise $!
157
- ensure
158
- bar.tick if bar
159
- end
160
- end
161
- else
162
- bar.init if bar
163
- array.each do |e|
164
- begin
165
- yield e
166
- rescue Exception
167
- Log.warn "Traverse exception on element: #{Misc.fingerprint(e)}"
168
- raise $!
169
- ensure
170
- bar.tick if bar
171
- end
172
- end
173
- end
174
-
175
- rescue
176
- error = true
177
- raise $!
178
- ensure
179
- join.call(error) if join
180
- Log::ProgressBar.remove_bar(bar, error) if bar
181
- end
182
- end
183
-
184
- def self.traverse_enumerable(enum, options = {}, &block)
185
- callback, bar, join = Misc.process_options options, :callback, :bar, :join
186
-
187
- begin
188
- error = false
189
- if callback
190
- bar.init if bar
191
- while enum.any?
192
- e = enum.pop
193
- begin
194
- callback.call yield(e)
195
- rescue Exception
196
- Log.warn "Traverse exception on element: #{Misc.fingerprint(e)}"
197
- raise $!
198
- ensure
199
- bar.tick if bar
200
- end
201
- end
202
- else
203
- bar.init if bar
204
- while enum.any?
205
- e = enum.pop
206
- begin
207
- yield e
208
- rescue Exception
209
- Log.warn "Traverse exception on element: #{Misc.fingerprint(e)}"
210
- raise $!
211
- ensure
212
- bar.tick if bar
213
- end
214
- end
215
- end
216
-
217
- rescue
218
- error = true
219
- raise $!
220
- ensure
221
- join.call(error) if join
222
- Log::ProgressBar.remove_bar(bar, error) if bar
223
- end
224
- end
225
-
226
- def self.traverse_priority_queue(queue, options = {}, &block)
227
- callback, bar, join = Misc.process_options options, :callback, :bar, :join
228
-
229
- begin
230
- error = false
231
- if callback
232
- bar.init if bar
233
- while queue.any?
234
- e = queue.pop
235
- begin
236
- callback.call yield(e)
237
- rescue Exception
238
- Log.warn "Traverse exception on element: #{Misc.fingerprint(e)}"
239
- raise $!
240
- ensure
241
- bar.tick if bar
242
- end
243
- end
244
- else
245
- bar.init if bar
246
- while queue.any?
247
- e = queue.pop
248
- begin
249
- yield e
250
- rescue Exception
251
- Log.warn "Traverse exception on element: #{Misc.fingerprint(e)}"
252
- raise $!
253
- ensure
254
- bar.tick if bar
255
- end
256
- end
257
- end
258
-
259
- rescue
260
- error = true
261
- raise $!
262
- ensure
263
- join.call(error) if join
264
- Log::ProgressBar.remove_bar(bar, error) if bar
265
- end
266
- end
267
-
268
- def self.traverse_io_array(io, options = {}, &block)
269
- callback, bar, join = Misc.process_options options, :callback, :bar, :join
270
- begin
271
- error = false
272
- if File === io and io.closed?
273
- begin
274
- Log.low{"Rewinding stream #{stream_name(io)}"}
275
- io.reopen io.filename, "r"
276
- rescue
277
- Log.exception $!
278
- raise "File closed and could not reopen #{stream_name(io)}"
279
- end
280
- end
281
-
282
- if callback
283
- bar.init if bar
284
- while line = io.gets
285
- if line[-1] != "\n"
286
- while c = io.getc
287
- line << c
288
- break if c=="\n"
289
- end
290
- end
291
- begin
292
- callback.call yield line.chomp
293
- rescue Exception
294
- Log.warn "Traverse exception on element: #{Misc.fingerprint(line)}"
295
- raise $!
296
- ensure
297
- bar.tick if bar
298
- end
299
- end
300
- else
301
- bar.init if bar
302
- while line = io.gets
303
- begin
304
- yield line.chomp
305
- rescue Exception
306
- Log.warn "Traverse exception on element: #{Misc.fingerprint(line)}"
307
- raise $!
308
- ensure
309
- bar.tick if bar
310
- end
311
- end
312
- end
313
- rescue
314
- error = true
315
- raise $!
316
- ensure
317
- join.call(error) if join
318
- Log::ProgressBar.remove_bar(bar, error) if bar
319
- end
320
- end
321
-
322
- def self.traverse_io(io, options = {}, &block)
323
- callback, bar, join = Misc.process_options options, :callback, :bar, :join
324
-
325
- begin
326
- error = false
327
- if File === io and io.closed?
328
- begin
329
- Log.low{"Rewinding stream #{stream_name(io)}"}
330
- io.reopen io.filename, "r"
331
- rescue
332
- Log.exception $!
333
- raise "File closed and could not reopen #{stream_name(io)}"
334
- end
335
- end
336
-
337
- options[:monitor] = bar
338
- if callback
339
- bar.init if bar
340
- exception = nil
341
- begin
342
- TSV::Parser.traverse(io, options) do |k,v,f|
343
- begin
344
- callback.call yield k, v, f
345
- rescue Exception
346
- exception = $!
347
- Log.warn "Traverse exception on element: #{Misc.fingerprint([k, v, f])}"
348
- raise $!
349
- end
350
- bar.tick if bar
351
- end
352
- ensure
353
- raise exception if exception
354
- end
355
- else
356
- TSV::Parser.traverse(io, options.merge(:monitor => bar), &block)
357
- end
358
- rescue
359
- error = true
360
- raise $!
361
- ensure
362
- join.call(error) if join
363
- Log::ProgressBar.remove_bar(bar, error) if bar
364
- end
365
- end
366
-
367
- def self.traverse_obj(obj, options = {}, &block)
368
- if options[:type] == :keys
369
- options[:fields] = []
370
- options[:type] = :single
371
- end
372
-
373
- name = stream_name(obj)
374
- Log.low{"Traversing #{name} #{Log.color :green, "->"} #{stream_name(options[:into])}"}
375
- begin
376
- case obj
377
- when (defined? FastContainers && FastContainers::PriorityQueue === obj)
378
- traverse_priority_queue(obj, options, &block)
379
- when TSV
380
- traverse_tsv(obj, options, &block)
381
- when Hash
382
- traverse_hash(obj, options, &block)
383
- when TSV::Parser
384
- callback = Misc.process_options options, :callback
385
- if callback
386
- obj.traverse(options) do |k,v|
387
- callback.call yield k, v
388
- end
389
- else
390
- obj.traverse(options, &block)
391
- end
392
- when IO, File, Zlib::GzipReader, Bgzf, StringIO
393
- begin
394
- if options[:type] == :array or options[:type] == :line
395
- traverse_io_array(obj, options, &block)
396
- else
397
- traverse_io(obj, options, &block)
398
- end
399
- rescue Aborted
400
- obj.abort if obj.respond_to? :abort
401
- raise $!
402
- rescue Exception
403
- obj.abort if obj.respond_to? :abort
404
- raise $!
405
- ensure
406
- obj.close if obj.respond_to? :close and not obj.closed?
407
- obj.join if obj.respond_to? :join
408
- end
409
- when Path
410
- obj.open do |stream|
411
- traverse_obj(stream, options, &block)
412
- end
413
- when TSV::Dumper
414
- traverse_obj(obj.stream, options, &block)
415
- when (defined? Step and Step)
416
-
417
- obj.clean if obj.aborted? or obj.recoverable_error?
418
- obj.run(true) unless obj.done? || obj.started? || obj.result
419
-
420
- stream = obj.get_stream
421
- options = {:type => :array}.merge(options) if obj.result_type == :array
422
-
423
- if stream
424
- traverse_obj(stream, options, &block)
425
- else
426
- obj.join
427
- traverse_obj(obj.path, options, &block)
428
- end
429
- when Array
430
- traverse_array(obj, options, &block)
431
- when Set
432
- traverse_array(obj.to_a, options, &block)
433
- when String
434
- if Open.remote?(obj) || Open.ssh?(obj) || Misc.is_filename?(obj)
435
- Open.open(obj) do |s|
436
- traverse_obj(s, options, &block)
437
- end
438
- else
439
- raise "Can not open obj for traversal #{Misc.fingerprint obj}"
440
- end
441
- when Enumerable
442
- traverse_enumerable(obj, options, &block)
443
- when nil
444
- raise "Can not traverse nil object into #{stream_name(options[:into])}"
445
- else
446
- raise "Unknown object for traversal: #{Misc.fingerprint obj }"
447
- end
448
- rescue IOError
449
- Log.low{"IOError traversing #{stream_name(obj)}: #{$!.message}"}
450
- abort_stream obj
451
- abort_stream options[:into], $!
452
- raise $!
453
- rescue Errno::EPIPE
454
- Log.low{"Pipe closed while traversing #{stream_name(obj)}: #{$!.message}"}
455
- abort_stream obj
456
- abort_stream options[:into], $!
457
- raise $!
458
- rescue Aborted
459
- Log.low{"Aborted traversing #{stream_name(obj)}"}
460
- abort_stream obj
461
- abort_stream options[:into], $!
462
- raise $!
463
- rescue Exception
464
- Log.low{"Exception traversing #{stream_name(obj)}"}
465
- abort_stream obj unless String === obj
466
- abort_stream options[:into], $!
467
- raise $!
468
- end
469
- end
470
-
471
- def self.traverse_threads(num, obj, options, &block)
472
- callback = Misc.process_options options, :callback
473
-
474
- q = RbbtThreadQueue.new num
475
-
476
- if callback
477
- block = Proc.new do |*args|
478
- mutex = args.pop
479
- res = yield *args
480
- mutex.synchronize do
481
- callback.call res
482
- end
483
- end
484
- end
485
-
486
- q.init true, &block
487
-
488
- traverse_obj(obj, options) do |*p|
489
- q.process p
490
- end
491
-
492
- q.join
493
- nil
494
- end
495
-
496
- def self.traverse_cpus(num, obj, options, &block)
497
- begin
498
- error = false
499
-
500
- callback, cleanup, join, respawn, bar = Misc.process_options options, :callback, :cleanup, :join, :respawn, :bar
501
- respawn = true if ENV["RBBT_RESPAWN"] and ENV["RBBT_RESPAWN"] == "true"
502
-
503
- Log.low "Traversing in #{ num } cpus: #{respawn ? "respawn" : "no respawn"}"
504
- q = RbbtProcessQueue.new num, cleanup, join, respawn, !!bar
505
- callback = Proc.new{ bar.tick } if callback.nil? and bar
506
- q.callback &callback
507
- q.init &block
508
-
509
- bar.init if bar
510
- traverse_obj(obj, options) do |*p|
511
- q.process *p
512
- end
513
-
514
- q.join
515
-
516
- rescue Interrupt, Aborted
517
- error = true
518
- Log.low{"Aborted traversal in CPUs for #{stream_name(obj) || Misc.fingerprint(obj)}: #{$!.backtrace*","}"}
519
- q.abort
520
- stream = obj_stream(obj)
521
- stream.abort if stream.respond_to? :abort
522
- stream = obj_stream(options[:into])
523
- stream.abort if stream.respond_to? :abort
524
- q.join
525
- raise "Traversal aborted"
526
- rescue Exception
527
- error = true
528
- Log.low{"Exception during traversal in CPUs for #{stream_name(obj) || Misc.fingerprint(obj)}: #{$!.message}"}
529
- q.abort
530
- stream = obj_stream(obj)
531
- stream.abort if stream.respond_to? :abort
532
- stream = obj_stream(options[:into])
533
- stream.abort if stream.respond_to? :abort
534
- q.join
535
- raise $!
536
- ensure
537
- Log::ProgressBar.remove_bar(bar, error) if bar
538
- end
539
- end
540
-
541
- def self.store_into(store, value)
542
- if MultipleResult === value
543
- value.each do |v|
544
- store_into store, v
545
- end
546
- return
547
- end
548
- begin
549
- return false if value.nil?
550
- case store
551
- when TSV
552
- if store.type == :double or store.type == :flat
553
- case value
554
- when TSV, Hash
555
- store.merge_zip value
556
- else
557
- store.zip_new *value
558
- end
559
- else
560
- k,v = value
561
- store[k] = v
562
- end
563
- when Hash
564
- case value
565
- when TSV, Hash
566
- store.merge! value
567
- else
568
- k,v = value
569
- store[k] = v
570
- end
571
- when TSV::Dumper
572
- return false if value.nil?
573
- store.add *value
574
- when IO
575
- return false if value.nil?
576
- value.chomp!
577
- store.puts value
578
- else
579
- store << value
580
- end
581
- true
582
- rescue Aborted, Interrupt
583
- Log.low "Aborted storing into #{Misc.fingerprint store}"
584
- abort_stream(store, $!)
585
- raise $!
586
- rescue Exception
587
- Log.low "Exception storing into #{Misc.fingerprint store}: #{$!.message}"
588
- abort_stream(store, $!)
589
- raise $!
590
- end
591
- end
592
-
593
- def self.get_streams_to_close(obj)
594
- close_streams = []
595
- case obj
596
- when IO, File
597
- close_streams << obj
598
- when TSV::Parser
599
- when TSV::Dumper
600
- close_streams << obj.result.in_stream
601
- when (defined? Step and Step)
602
- obj.mutex.synchronize do
603
- case obj.result
604
- when IO
605
- close_streams << obj.result
606
- when TSV::Dumper
607
- close_streams << obj.result.in_stream
608
- end
609
- end
610
- obj.inputs.each do |input|
611
- close_streams = get_streams_to_close(input) + close_streams
612
- end
613
- obj.dependencies.each do |dependency|
614
- close_streams = get_streams_to_close(dependency) + close_streams
615
- end
616
- end
617
- close_streams
618
- end
619
-
620
- def self.traverse_run(obj, threads, cpus, options = {}, &block)
621
- threads = nil if threads == 1
622
- cpus = nil if cpus == 1
623
- if ENV["RBBT_NO_MAP_REDUCE"] == "true" or (threads.nil? and cpus.nil?)
624
- traverse_obj obj, options, &block
625
- else
626
- if threads
627
- traverse_threads threads, obj, options, &block
628
- else
629
- close_streams = Misc.process_options(options, :close_streams) || []
630
- close_streams = [close_streams] unless Array === close_streams
631
-
632
- close_streams.concat(get_streams_to_close(obj))
633
- options[:close_streams] = close_streams
634
-
635
- if close_streams and close_streams.any?
636
- options[:cleanup] = Proc.new do
637
- close_streams.uniq.each do |s|
638
- s.close unless s.closed?
639
- end
640
- end
641
- end
642
-
643
- traverse_cpus cpus, obj, options, &block
644
- end
645
- end
646
- end
647
-
648
- def self.traverse_stream(obj, threads = nil, cpus = nil, options = {}, &block)
649
- into = options[:into]
650
-
651
- thread = Thread.new do
652
- begin
653
- traverse_run(obj, threads, cpus, options, &block)
654
- into.close if into.respond_to?(:close) and not (into.respond_to?(:closed?) and into.closed?)
655
- rescue Exception
656
- abort_stream obj
657
- abort_stream into
658
- raise $!
659
- end
660
- end
661
-
662
- ConcurrentStream.setup(obj_stream(into), :threads => thread)
663
- end
664
-
665
- def self.traverse(obj, options = {}, &block)
666
- into = options[:into]
667
-
668
- into = options[:into] = Open.open(into, :mode => "w") if Misc.is_filename?(into)
669
-
670
- case into
671
- when :stream
672
- sout = Misc.open_pipe false, false do |sin|
673
- begin
674
- traverse(obj, options.merge(:into => sin), &block)
675
- rescue Exception
676
- Log.exception $!
677
- begin
678
- sout.abort if sout.respond_to? :abort
679
- sout.join if sout.respond_to? :join
680
- ensure
681
- raise $!
682
- end
683
- end
684
- end
685
- return sout
686
- when :dumper
687
- obj_options = obj.respond_to?(:options) ? obj.options : {}
688
- dumper = TSV::Dumper.new obj_options.merge(options)
689
- dumper.init
690
- _options = options.merge(obj_options).merge(:into => dumper)
691
- traverse(obj, _options, &block)
692
- return dumper
693
- end
694
-
695
- threads = Misc.process_options options, :threads
696
- cpus = Misc.process_options options, :cpus
697
- threads = nil if threads and threads.to_i <= 1
698
- cpus = nil if cpus and cpus.to_i <= 1
699
-
700
- if options[:keys]
701
- case options[:keys]
702
- when TrueClass
703
- options[:type] = :keys
704
- when String
705
- options[:type] = :keys
706
- options[:key_field] = options[:keys]
707
- options[:fields] = []
708
- end
709
- end
710
-
711
- bar = Misc.process_options options, :bar
712
- bar ||= Misc.process_options options, :progress
713
- options[:bar] = case bar
714
- when String
715
- max = guess_max(obj)
716
- Log::ProgressBar.new_bar(max, {:desc => bar})
717
- when TrueClass
718
- max = guess_max(obj)
719
- Log::ProgressBar.new_bar(max, nil)
720
- when Numeric
721
- max = guess_max(obj)
722
- Log::ProgressBar.new_bar(bar)
723
- when Hash
724
- max = Misc.process_options(bar, :max) || max
725
- Log::ProgressBar.new_bar(max, bar)
726
- when Log::ProgressBar
727
- bar.max ||= guess_max(obj)
728
- bar
729
- else
730
- if (defined? Step and Step === bar)
731
- max = guess_max(obj)
732
- Log::ProgressBar.new_bar(max, {:desc => bar.status, :file => bar.file(:progress)})
733
- else
734
- bar
735
- end
736
- end
737
-
738
- if into
739
- bar = Misc.process_options options, :bar
740
-
741
- options[:join] = Proc.new do |error|
742
- error = false if error.nil?
743
- Log::ProgressBar.remove_bar(bar, error) if bar
744
- end if bar
745
-
746
- options[:callback] = Proc.new do |e|
747
- begin
748
- store_into into, e
749
- rescue Aborted
750
- Log.low "Aborted callback #{stream_name(obj)} #{Log.color :green, "->"} #{stream_name(options[:into])}"
751
- abort_stream(into, $!)
752
- raise $!
753
- rescue Exception
754
- Log.low "Exception callback #{stream_name(obj)} #{Log.color :green, "->"} #{stream_name(options[:into])}"
755
- abort_stream(into, $!)
756
- raise $!
757
- ensure
758
- bar.tick if bar
759
- end
760
- end
761
-
762
- bar.init if bar
763
- begin
764
- case into
765
- when TSV::Dumper, IO
766
- traverse_stream(obj, threads, cpus, options, &block)
767
- else
768
- traverse_run(obj, threads, cpus, options, &block)
769
- end
770
- rescue Exception
771
- Log.exception $!
772
- abort_stream(into, $!)
773
- raise $!
774
- end
775
-
776
- into
777
- else
778
- traverse_run(obj, threads, cpus, options, &block)
779
- end
780
- end
781
- end
1
+ #module TSV
2
+ #
3
+ # def self.obj_stream(obj)
4
+ # case obj
5
+ # when nil
6
+ # nil
7
+ # when (defined? Step and Step)
8
+ # obj.result
9
+ # #when IO, File, Zlib::GzipReader, Bgzf
10
+ # when IO, File, Zlib::GzipReader #, Bgzf
11
+ # obj
12
+ # when TSV::Dumper
13
+ # obj.stream
14
+ # when TSV::Parser
15
+ # obj.stream
16
+ # end
17
+ # end
18
+ #
19
+ # def self.guess_max(obj)
20
+ # begin
21
+ # case obj
22
+ # when (defined? Step and Step)
23
+ # if obj.done?
24
+ # path = obj.path
25
+ # path = path.find if path.respond_to? :find
26
+ # if File.exist? path
27
+ # CMD.cmd("wc -l '#{path}'").read.to_i
28
+ # else
29
+ # nil
30
+ # end
31
+ # else
32
+ # nil
33
+ # end
34
+ # when TSV
35
+ # obj.length
36
+ # when Array, Hash
37
+ # obj.size
38
+ # when File
39
+ # return nil if Open.gzip?(obj) or Open.bgzip?(obj)
40
+ # CMD.cmd("wc -l '#{obj.path}'").read.to_i
41
+ # when Path, String
42
+ # obj = obj.find if Path === obj
43
+ # if File.exist? obj
44
+ # return nil if Open.gzip?(obj) or Open.bgzip?(obj)
45
+ # CMD.cmd("wc -l '#{obj}'").read.to_i
46
+ # else
47
+ # nil
48
+ # end
49
+ # end
50
+ # rescue Exception
51
+ # Log.exception $!
52
+ # nil
53
+ # end
54
+ # end
55
+ #
56
+ # def self.stream_name(obj)
57
+ # return "nil" if obj.nil?
58
+ # #filename_obj = obj.respond_to?(:filename) ? obj.filename : nil
59
+ # #filename_obj ||= obj.respond_to?(:path) ? obj.path : nil
60
+ # stream_obj = obj_stream(obj) || obj
61
+ # obj.class.to_s << "-" << Misc.fingerprint(stream_obj)
62
+ # end
63
+ #
64
+ # def self.report(msg, obj, into)
65
+ # into = into[:into] if Hash === into and into.include? :into
66
+ #
67
+ # Log.low{"#{ msg } #{stream_name(obj)} -> #{stream_name(into)}"}
68
+ # end
69
+ #
70
+ # #{{{ TRAVERSE OBJECTS
71
+ #
72
+ # def self.traverse_tsv(tsv, options = {}, &block)
73
+ # callback, bar, join = Misc.process_options options, :callback, :bar, :join
74
+ #
75
+ # begin
76
+ # error = false
77
+ # fields = tsv.fields
78
+ # if callback
79
+ # bar.init if bar
80
+ # tsv.through options[:key_field], options[:fields] do |k,v|
81
+ # begin
82
+ # callback.call yield(k,v,fields)
83
+ # rescue Exception
84
+ # Log.warn "Traverse exception on element: #{Misc.fingerprint([k, v, fields])}"
85
+ # raise $!
86
+ # ensure
87
+ # bar.tick if bar
88
+ # end
89
+ # end
90
+ # else
91
+ # bar.init if bar
92
+ # tsv.through options[:key_field], options[:fields] do |k,v|
93
+ # begin
94
+ # yield k,v,fields
95
+ # ensure
96
+ # bar.tick if bar
97
+ # end
98
+ # end
99
+ # end
100
+ # rescue
101
+ # error = true
102
+ # raise $!
103
+ # ensure
104
+ # join.call(error) if join
105
+ # Log::ProgressBar.remove_bar(bar, error) if bar
106
+ # end
107
+ # end
108
+ #
109
+ # def self.traverse_hash(hash, options = {}, &block)
110
+ # callback, bar, join = Misc.process_options options, :callback, :bar, :join
111
+ #
112
+ # begin
113
+ # error = false
114
+ # if callback
115
+ # bar.init if bar
116
+ # hash.each do |k,v|
117
+ # begin
118
+ # callback.call yield(k,v)
119
+ # ensure
120
+ # bar.tick if bar
121
+ # end
122
+ # end
123
+ # else
124
+ # bar.init if bar
125
+ # hash.each do |k,v|
126
+ # begin
127
+ # yield k,v
128
+ # rescue Exception
129
+ # Log.warn "Traverse exception on element: #{Misc.fingerprint([k, v])}"
130
+ # raise $!
131
+ # ensure
132
+ # bar.tick if bar
133
+ # end
134
+ # end
135
+ # end
136
+ # rescue
137
+ # error = true
138
+ # raise $!
139
+ # ensure
140
+ # join.call(error) if join
141
+ # Log::ProgressBar.remove_bar(bar, error) if bar
142
+ # end
143
+ # end
144
+ #
145
+ # def self.traverse_array(array, options = {}, &block)
146
+ # callback, bar, join = Misc.process_options options, :callback, :bar, :join
147
+ #
148
+ # begin
149
+ # error = false
150
+ # if callback
151
+ # bar.init if bar
152
+ # array.each do |e|
153
+ # begin
154
+ # callback.call yield(e)
155
+ # rescue Exception
156
+ # Log.warn "Traverse exception on element: #{Misc.fingerprint(e)}"
157
+ # raise $!
158
+ # ensure
159
+ # bar.tick if bar
160
+ # end
161
+ # end
162
+ # else
163
+ # bar.init if bar
164
+ # array.each do |e|
165
+ # begin
166
+ # yield e
167
+ # rescue Exception
168
+ # Log.warn "Traverse exception on element: #{Misc.fingerprint(e)}"
169
+ # raise $!
170
+ # ensure
171
+ # bar.tick if bar
172
+ # end
173
+ # end
174
+ # end
175
+ #
176
+ # rescue
177
+ # error = true
178
+ # raise $!
179
+ # ensure
180
+ # join.call(error) if join
181
+ # Log::ProgressBar.remove_bar(bar, error) if bar
182
+ # end
183
+ # end
184
+ #
185
+ # def self.traverse_enumerable(enum, options = {}, &block)
186
+ # callback, bar, join = Misc.process_options options, :callback, :bar, :join
187
+ #
188
+ # begin
189
+ # error = false
190
+ # if callback
191
+ # bar.init if bar
192
+ # while enum.any?
193
+ # e = enum.pop
194
+ # begin
195
+ # callback.call yield(e)
196
+ # rescue Exception
197
+ # Log.warn "Traverse exception on element: #{Misc.fingerprint(e)}"
198
+ # raise $!
199
+ # ensure
200
+ # bar.tick if bar
201
+ # end
202
+ # end
203
+ # else
204
+ # bar.init if bar
205
+ # while enum.any?
206
+ # e = enum.pop
207
+ # begin
208
+ # yield e
209
+ # rescue Exception
210
+ # Log.warn "Traverse exception on element: #{Misc.fingerprint(e)}"
211
+ # raise $!
212
+ # ensure
213
+ # bar.tick if bar
214
+ # end
215
+ # end
216
+ # end
217
+ #
218
+ # rescue
219
+ # error = true
220
+ # raise $!
221
+ # ensure
222
+ # join.call(error) if join
223
+ # Log::ProgressBar.remove_bar(bar, error) if bar
224
+ # end
225
+ # end
226
+ #
227
+ # def self.traverse_priority_queue(queue, options = {}, &block)
228
+ # callback, bar, join = Misc.process_options options, :callback, :bar, :join
229
+ #
230
+ # begin
231
+ # error = false
232
+ # if callback
233
+ # bar.init if bar
234
+ # while queue.any?
235
+ # e = queue.pop
236
+ # begin
237
+ # callback.call yield(e)
238
+ # rescue Exception
239
+ # Log.warn "Traverse exception on element: #{Misc.fingerprint(e)}"
240
+ # raise $!
241
+ # ensure
242
+ # bar.tick if bar
243
+ # end
244
+ # end
245
+ # else
246
+ # bar.init if bar
247
+ # while queue.any?
248
+ # e = queue.pop
249
+ # begin
250
+ # yield e
251
+ # rescue Exception
252
+ # Log.warn "Traverse exception on element: #{Misc.fingerprint(e)}"
253
+ # raise $!
254
+ # ensure
255
+ # bar.tick if bar
256
+ # end
257
+ # end
258
+ # end
259
+ #
260
+ # rescue
261
+ # error = true
262
+ # raise $!
263
+ # ensure
264
+ # join.call(error) if join
265
+ # Log::ProgressBar.remove_bar(bar, error) if bar
266
+ # end
267
+ # end
268
+ #
269
+ # def self.traverse_io_array(io, options = {}, &block)
270
+ # callback, bar, join = Misc.process_options options, :callback, :bar, :join
271
+ # begin
272
+ # error = false
273
+ # if File === io and io.closed?
274
+ # begin
275
+ # Log.low{"Rewinding stream #{stream_name(io)}"}
276
+ # io.reopen io.filename, "r"
277
+ # rescue
278
+ # Log.exception $!
279
+ # raise "File closed and could not reopen #{stream_name(io)}"
280
+ # end
281
+ # end
282
+ #
283
+ # if callback
284
+ # bar.init if bar
285
+ # while line = io.gets
286
+ # if line[-1] != "\n"
287
+ # while c = io.getc
288
+ # line << c
289
+ # break if c=="\n"
290
+ # end
291
+ # end
292
+ # begin
293
+ # callback.call yield line.chomp
294
+ # rescue Exception
295
+ # Log.warn "Traverse exception on element: #{Misc.fingerprint(line)}"
296
+ # raise $!
297
+ # ensure
298
+ # bar.tick if bar
299
+ # end
300
+ # end
301
+ # else
302
+ # bar.init if bar
303
+ # while line = io.gets
304
+ # begin
305
+ # yield line.chomp
306
+ # rescue Exception
307
+ # Log.warn "Traverse exception on element: #{Misc.fingerprint(line)}"
308
+ # raise $!
309
+ # ensure
310
+ # bar.tick if bar
311
+ # end
312
+ # end
313
+ # end
314
+ # rescue
315
+ # error = true
316
+ # raise $!
317
+ # ensure
318
+ # join.call(error) if join
319
+ # Log::ProgressBar.remove_bar(bar, error) if bar
320
+ # end
321
+ # end
322
+ #
323
+ # def self.traverse_io(io, options = {}, &block)
324
+ # callback, bar, join = Misc.process_options options, :callback, :bar, :join
325
+ #
326
+ # begin
327
+ # error = false
328
+ # if File === io and io.closed?
329
+ # begin
330
+ # Log.low{"Rewinding stream #{stream_name(io)}"}
331
+ # io.reopen io.filename, "r"
332
+ # rescue
333
+ # Log.exception $!
334
+ # raise "File closed and could not reopen #{stream_name(io)}"
335
+ # end
336
+ # end
337
+ #
338
+ # options[:monitor] = bar
339
+ # if callback
340
+ # bar.init if bar
341
+ # exception = nil
342
+ # begin
343
+ # TSV::Parser.traverse(io, options) do |k,v,f|
344
+ # begin
345
+ # callback.call yield k, v, f
346
+ # rescue Exception
347
+ # exception = $!
348
+ # Log.warn "Traverse exception on element: #{Misc.fingerprint([k, v, f])}"
349
+ # raise $!
350
+ # end
351
+ # bar.tick if bar
352
+ # end
353
+ # ensure
354
+ # raise exception if exception
355
+ # end
356
+ # else
357
+ # TSV::Parser.traverse(io, options.merge(:monitor => bar), &block)
358
+ # end
359
+ # rescue
360
+ # error = true
361
+ # raise $!
362
+ # ensure
363
+ # join.call(error) if join
364
+ # Log::ProgressBar.remove_bar(bar, error) if bar
365
+ # end
366
+ # end
367
+ #
368
+ # def self.traverse_obj(obj, options = {}, &block)
369
+ # if options[:type] == :keys
370
+ # options[:fields] = []
371
+ # options[:type] = :single
372
+ # end
373
+ #
374
+ # name = stream_name(obj)
375
+ # Log.low{"Traversing #{name} #{Log.color :green, "->"} #{stream_name(options[:into])}"}
376
+ # begin
377
+ # case obj
378
+ # when (defined? FastContainers && FastContainers::PriorityQueue === obj)
379
+ # traverse_priority_queue(obj, options, &block)
380
+ # when TSV
381
+ # traverse_tsv(obj, options, &block)
382
+ # when Hash
383
+ # traverse_hash(obj, options, &block)
384
+ # when TSV::Parser
385
+ # callback = Misc.process_options options, :callback
386
+ # if callback
387
+ # obj.traverse(options) do |k,v|
388
+ # callback.call yield k, v
389
+ # end
390
+ # else
391
+ # obj.traverse(options, &block)
392
+ # end
393
+ # #when IO, File, Zlib::GzipReader, Bgzf, StringIO
394
+ # when IO, File, Zlib::GzipReader, StringIO
395
+ # begin
396
+ # if options[:type] == :array or options[:type] == :line
397
+ # traverse_io_array(obj, options, &block)
398
+ # else
399
+ # traverse_io(obj, options, &block)
400
+ # end
401
+ # rescue Aborted
402
+ # obj.abort if obj.respond_to? :abort
403
+ # raise $!
404
+ # rescue Exception
405
+ # obj.abort if obj.respond_to? :abort
406
+ # raise $!
407
+ # ensure
408
+ # obj.close if obj.respond_to? :close and not obj.closed?
409
+ # obj.join if obj.respond_to? :join
410
+ # end
411
+ # when Path
412
+ # obj.open do |stream|
413
+ # traverse_obj(stream, options, &block)
414
+ # end
415
+ # when TSV::Dumper
416
+ # traverse_obj(obj.stream, options, &block)
417
+ # when (defined? Step and Step)
418
+ #
419
+ # obj.clean if obj.aborted? or obj.recoverable_error?
420
+ # obj.run(true) unless obj.done? || obj.started? || obj.result
421
+ #
422
+ # stream = obj.get_stream
423
+ # options = {:type => :array}.merge(options) if obj.result_type == :array
424
+ #
425
+ # if stream
426
+ # traverse_obj(stream, options, &block)
427
+ # else
428
+ # obj.join
429
+ # traverse_obj(obj.path, options, &block)
430
+ # end
431
+ # when Array
432
+ # traverse_array(obj, options, &block)
433
+ # when Set
434
+ # traverse_array(obj.to_a, options, &block)
435
+ # when String
436
+ # if Open.remote?(obj) || Open.ssh?(obj) || Misc.is_filename?(obj)
437
+ # Open.open(obj) do |s|
438
+ # traverse_obj(s, options, &block)
439
+ # end
440
+ # else
441
+ # raise "Can not open obj for traversal #{Misc.fingerprint obj}"
442
+ # end
443
+ # when Enumerable
444
+ # traverse_enumerable(obj, options, &block)
445
+ # when nil
446
+ # raise "Can not traverse nil object into #{stream_name(options[:into])}"
447
+ # else
448
+ # raise "Unknown object for traversal: #{Misc.fingerprint obj }"
449
+ # end
450
+ # rescue IOError
451
+ # Log.low{"IOError traversing #{stream_name(obj)}: #{$!.message}"}
452
+ # abort_stream obj
453
+ # abort_stream options[:into], $!
454
+ # raise $!
455
+ # rescue Errno::EPIPE
456
+ # Log.low{"Pipe closed while traversing #{stream_name(obj)}: #{$!.message}"}
457
+ # abort_stream obj
458
+ # abort_stream options[:into], $!
459
+ # raise $!
460
+ # rescue Aborted
461
+ # Log.low{"Aborted traversing #{stream_name(obj)}"}
462
+ # abort_stream obj
463
+ # abort_stream options[:into], $!
464
+ # raise $!
465
+ # rescue Exception
466
+ # Log.low{"Exception traversing #{stream_name(obj)}"}
467
+ # abort_stream obj unless String === obj
468
+ # abort_stream options[:into], $!
469
+ # raise $!
470
+ # end
471
+ # end
472
+ #
473
+ # def self.traverse_threads(num, obj, options, &block)
474
+ # callback = Misc.process_options options, :callback
475
+ #
476
+ # q = RbbtThreadQueue.new num
477
+ #
478
+ # if callback
479
+ # block = Proc.new do |*args|
480
+ # mutex = args.pop
481
+ # res = yield *args
482
+ # mutex.synchronize do
483
+ # callback.call res
484
+ # end
485
+ # end
486
+ # end
487
+ #
488
+ # q.init true, &block
489
+ #
490
+ # traverse_obj(obj, options) do |*p|
491
+ # q.process p
492
+ # end
493
+ #
494
+ # q.join
495
+ # nil
496
+ # end
497
+ #
498
+ # def self.traverse_cpus(num, obj, options, &block)
499
+ # begin
500
+ # error = false
501
+ #
502
+ # callback, cleanup, join, respawn, bar = Misc.process_options options, :callback, :cleanup, :join, :respawn, :bar
503
+ # respawn = true if ENV["RBBT_RESPAWN"] and ENV["RBBT_RESPAWN"] == "true"
504
+ #
505
+ # Log.low "Traversing in #{ num } cpus: #{respawn ? "respawn" : "no respawn"}"
506
+ # q = RbbtProcessQueue.new num, cleanup, join, respawn, !!bar
507
+ # callback = Proc.new{ bar.tick } if callback.nil? and bar
508
+ # q.callback &callback
509
+ # q.init &block
510
+ #
511
+ # bar.init if bar
512
+ # traverse_obj(obj, options) do |*p|
513
+ # q.process *p
514
+ # end
515
+ #
516
+ # q.join
517
+ #
518
+ # rescue Interrupt, Aborted
519
+ # error = true
520
+ # Log.low{"Aborted traversal in CPUs for #{stream_name(obj) || Misc.fingerprint(obj)}: #{$!.backtrace*","}"}
521
+ # q.abort
522
+ # stream = obj_stream(obj)
523
+ # stream.abort if stream.respond_to? :abort
524
+ # stream = obj_stream(options[:into])
525
+ # stream.abort if stream.respond_to? :abort
526
+ # q.join
527
+ # raise "Traversal aborted"
528
+ # rescue Exception
529
+ # error = true
530
+ # Log.low{"Exception during traversal in CPUs for #{stream_name(obj) || Misc.fingerprint(obj)}: #{$!.message}"}
531
+ # q.abort
532
+ # stream = obj_stream(obj)
533
+ # stream.abort if stream.respond_to? :abort
534
+ # stream = obj_stream(options[:into])
535
+ # stream.abort if stream.respond_to? :abort
536
+ # q.join
537
+ # raise $!
538
+ # ensure
539
+ # Log::ProgressBar.remove_bar(bar, error) if bar
540
+ # end
541
+ # end
542
+ #
543
+ # def self.store_into(store, value)
544
+ # if MultipleResult === value
545
+ # value.each do |v|
546
+ # store_into store, v
547
+ # end
548
+ # return
549
+ # end
550
+ # begin
551
+ # return false if value.nil?
552
+ # case store
553
+ # when TSV
554
+ # if store.type == :double or store.type == :flat
555
+ # case value
556
+ # when TSV, Hash
557
+ # store.merge_zip value
558
+ # else
559
+ # store.zip_new *value
560
+ # end
561
+ # else
562
+ # k,v = value
563
+ # store[k] = v
564
+ # end
565
+ # when Hash
566
+ # case value
567
+ # when TSV, Hash
568
+ # store.merge! value
569
+ # else
570
+ # k,v = value
571
+ # store[k] = v
572
+ # end
573
+ # when TSV::Dumper
574
+ # return false if value.nil?
575
+ # store.add *value
576
+ # when IO
577
+ # return false if value.nil?
578
+ # value.chomp!
579
+ # store.puts value
580
+ # else
581
+ # store << value
582
+ # end
583
+ # true
584
+ # rescue Aborted, Interrupt
585
+ # Log.low "Aborted storing into #{Misc.fingerprint store}"
586
+ # abort_stream(store, $!)
587
+ # raise $!
588
+ # rescue Exception
589
+ # Log.low "Exception storing into #{Misc.fingerprint store}: #{$!.message}"
590
+ # abort_stream(store, $!)
591
+ # raise $!
592
+ # end
593
+ # end
594
+ #
595
+ # def self.get_streams_to_close(obj)
596
+ # close_streams = []
597
+ # case obj
598
+ # when IO, File
599
+ # close_streams << obj
600
+ # when TSV::Parser
601
+ # when TSV::Dumper
602
+ # close_streams << obj.result.in_stream
603
+ # when (defined? Step and Step)
604
+ # obj.mutex.synchronize do
605
+ # case obj.result
606
+ # when IO
607
+ # close_streams << obj.result
608
+ # when TSV::Dumper
609
+ # close_streams << obj.result.in_stream
610
+ # end
611
+ # end
612
+ # obj.inputs.each do |input|
613
+ # close_streams = get_streams_to_close(input) + close_streams
614
+ # end
615
+ # obj.dependencies.each do |dependency|
616
+ # close_streams = get_streams_to_close(dependency) + close_streams
617
+ # end
618
+ # end
619
+ # close_streams
620
+ # end
621
+ #
622
+ # def self.traverse_run(obj, threads, cpus, options = {}, &block)
623
+ # threads = nil if threads == 1
624
+ # cpus = nil if cpus == 1
625
+ # if ENV["RBBT_NO_MAP_REDUCE"] == "true" or (threads.nil? and cpus.nil?)
626
+ # traverse_obj obj, options, &block
627
+ # else
628
+ # if threads
629
+ # traverse_threads threads, obj, options, &block
630
+ # else
631
+ # close_streams = Misc.process_options(options, :close_streams) || []
632
+ # close_streams = [close_streams] unless Array === close_streams
633
+ #
634
+ # close_streams.concat(get_streams_to_close(obj))
635
+ # options[:close_streams] = close_streams
636
+ #
637
+ # if close_streams and close_streams.any?
638
+ # options[:cleanup] = Proc.new do
639
+ # close_streams.uniq.each do |s|
640
+ # s.close unless s.closed?
641
+ # end
642
+ # end
643
+ # end
644
+ #
645
+ # traverse_cpus cpus, obj, options, &block
646
+ # end
647
+ # end
648
+ # end
649
+ #
650
+ # def self.traverse_stream(obj, threads = nil, cpus = nil, options = {}, &block)
651
+ # into = options[:into]
652
+ #
653
+ # thread = Thread.new do
654
+ # begin
655
+ # traverse_run(obj, threads, cpus, options, &block)
656
+ # into.close if into.respond_to?(:close) and not (into.respond_to?(:closed?) and into.closed?)
657
+ # rescue Exception
658
+ # abort_stream obj
659
+ # abort_stream into
660
+ # raise $!
661
+ # end
662
+ # end
663
+ #
664
+ # ConcurrentStream.setup(obj_stream(into), :threads => thread)
665
+ # end
666
+ #
667
+ # def self.traverse(obj, options = {}, &block)
668
+ # into = options[:into]
669
+ #
670
+ # into = options[:into] = Open.open(into, :mode => "w") if Misc.is_filename?(into)
671
+ #
672
+ # case into
673
+ # when :stream
674
+ # sout = Misc.open_pipe false, false do |sin|
675
+ # begin
676
+ # traverse(obj, options.merge(:into => sin), &block)
677
+ # rescue Exception
678
+ # Log.exception $!
679
+ # begin
680
+ # sout.abort if sout.respond_to? :abort
681
+ # sout.join if sout.respond_to? :join
682
+ # ensure
683
+ # raise $!
684
+ # end
685
+ # end
686
+ # end
687
+ # return sout
688
+ # when :dumper
689
+ # obj_options = obj.respond_to?(:options) ? obj.options : {}
690
+ # dumper = TSV::Dumper.new obj_options.merge(options)
691
+ # dumper.init
692
+ # _options = options.merge(obj_options).merge(:into => dumper)
693
+ # traverse(obj, _options, &block)
694
+ # return dumper
695
+ # end
696
+ #
697
+ # threads = Misc.process_options options, :threads
698
+ # cpus = Misc.process_options options, :cpus
699
+ # threads = nil if threads and threads.to_i <= 1
700
+ # cpus = nil if cpus and cpus.to_i <= 1
701
+ #
702
+ # if options[:keys]
703
+ # case options[:keys]
704
+ # when TrueClass
705
+ # options[:type] = :keys
706
+ # when String
707
+ # options[:type] = :keys
708
+ # options[:key_field] = options[:keys]
709
+ # options[:fields] = []
710
+ # end
711
+ # end
712
+ #
713
+ # bar = Misc.process_options options, :bar
714
+ # bar ||= Misc.process_options options, :progress
715
+ # options[:bar] = case bar
716
+ # when String
717
+ # max = guess_max(obj)
718
+ # Log::ProgressBar.new_bar(max, {:desc => bar})
719
+ # when TrueClass
720
+ # max = guess_max(obj)
721
+ # Log::ProgressBar.new_bar(max, nil)
722
+ # when Numeric
723
+ # max = guess_max(obj)
724
+ # Log::ProgressBar.new_bar(bar)
725
+ # when Hash
726
+ # max = Misc.process_options(bar, :max) || max
727
+ # Log::ProgressBar.new_bar(max, bar)
728
+ # when Log::ProgressBar
729
+ # bar.max ||= guess_max(obj)
730
+ # bar
731
+ # else
732
+ # if (defined? Step and Step === bar)
733
+ # max = guess_max(obj)
734
+ # Log::ProgressBar.new_bar(max, {:desc => bar.status, :file => bar.file(:progress)})
735
+ # else
736
+ # bar
737
+ # end
738
+ # end
739
+ #
740
+ # if into
741
+ # bar = Misc.process_options options, :bar
742
+ #
743
+ # options[:join] = Proc.new do |error|
744
+ # error = false if error.nil?
745
+ # Log::ProgressBar.remove_bar(bar, error) if bar
746
+ # end if bar
747
+ #
748
+ # options[:callback] = Proc.new do |e|
749
+ # begin
750
+ # store_into into, e
751
+ # rescue Aborted
752
+ # Log.low "Aborted callback #{stream_name(obj)} #{Log.color :green, "->"} #{stream_name(options[:into])}"
753
+ # abort_stream(into, $!)
754
+ # raise $!
755
+ # rescue Exception
756
+ # Log.low "Exception callback #{stream_name(obj)} #{Log.color :green, "->"} #{stream_name(options[:into])}"
757
+ # abort_stream(into, $!)
758
+ # raise $!
759
+ # ensure
760
+ # bar.tick if bar
761
+ # end
762
+ # end
763
+ #
764
+ # bar.init if bar
765
+ # begin
766
+ # case into
767
+ # when TSV::Dumper, IO
768
+ # traverse_stream(obj, threads, cpus, options, &block)
769
+ # else
770
+ # traverse_run(obj, threads, cpus, options, &block)
771
+ # end
772
+ # rescue Exception
773
+ # Log.exception $!
774
+ # abort_stream(into, $!)
775
+ # raise $!
776
+ # end
777
+ #
778
+ # into
779
+ # else
780
+ # traverse_run(obj, threads, cpus, options, &block)
781
+ # end
782
+ # end
783
+ #end