rbbt-util 5.44.1 → 6.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (175) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +1 -1
  3. data/bin/rbbt +67 -90
  4. data/bin/rbbt_exec.rb +2 -2
  5. data/etc/app.d/base.rb +2 -2
  6. data/etc/app.d/semaphores.rb +3 -3
  7. data/lib/rbbt/annotations/annotated_array.rb +207 -207
  8. data/lib/rbbt/annotations/refactor.rb +27 -0
  9. data/lib/rbbt/annotations/util.rb +282 -282
  10. data/lib/rbbt/annotations.rb +343 -320
  11. data/lib/rbbt/association/database.rb +200 -225
  12. data/lib/rbbt/association/index.rb +294 -291
  13. data/lib/rbbt/association/item.rb +227 -227
  14. data/lib/rbbt/association/open.rb +35 -34
  15. data/lib/rbbt/association/util.rb +0 -169
  16. data/lib/rbbt/association.rb +2 -4
  17. data/lib/rbbt/entity/identifiers.rb +119 -118
  18. data/lib/rbbt/entity/refactor.rb +12 -0
  19. data/lib/rbbt/entity.rb +319 -315
  20. data/lib/rbbt/hpc/batch.rb +72 -53
  21. data/lib/rbbt/hpc/lsf.rb +2 -2
  22. data/lib/rbbt/hpc/orchestrate/batches.rb +2 -2
  23. data/lib/rbbt/hpc/orchestrate/chains.rb +25 -5
  24. data/lib/rbbt/hpc/orchestrate/rules.rb +2 -2
  25. data/lib/rbbt/hpc/orchestrate.rb +19 -13
  26. data/lib/rbbt/hpc/slurm.rb +18 -18
  27. data/lib/rbbt/knowledge_base/entity.rb +13 -5
  28. data/lib/rbbt/knowledge_base/query.rb +2 -2
  29. data/lib/rbbt/knowledge_base/registry.rb +32 -31
  30. data/lib/rbbt/knowledge_base/traverse.rb +1 -1
  31. data/lib/rbbt/knowledge_base.rb +1 -1
  32. data/lib/rbbt/monitor.rb +36 -25
  33. data/lib/rbbt/persist/refactor.rb +166 -0
  34. data/lib/rbbt/persist/tsv/tokyocabinet.rb +105 -105
  35. data/lib/rbbt/persist/tsv.rb +187 -185
  36. data/lib/rbbt/persist.rb +556 -551
  37. data/lib/rbbt/refactor.rb +20 -0
  38. data/lib/rbbt/resource/path/refactor.rb +178 -0
  39. data/lib/rbbt/resource/path.rb +317 -497
  40. data/lib/rbbt/resource/util.rb +0 -48
  41. data/lib/rbbt/resource.rb +3 -390
  42. data/lib/rbbt/tsv/accessor.rb +2 -838
  43. data/lib/rbbt/tsv/attach.rb +303 -299
  44. data/lib/rbbt/tsv/change_id.rb +244 -245
  45. data/lib/rbbt/tsv/csv.rb +87 -85
  46. data/lib/rbbt/tsv/dumper.rb +2 -100
  47. data/lib/rbbt/tsv/excel.rb +26 -24
  48. data/lib/rbbt/tsv/field_index.rb +4 -1
  49. data/lib/rbbt/tsv/filter.rb +3 -2
  50. data/lib/rbbt/tsv/index.rb +2 -284
  51. data/lib/rbbt/tsv/manipulate.rb +750 -747
  52. data/lib/rbbt/tsv/marshal.rb +3 -3
  53. data/lib/rbbt/tsv/matrix.rb +2 -2
  54. data/lib/rbbt/tsv/parallel/through.rb +2 -1
  55. data/lib/rbbt/tsv/parallel/traverse.rb +783 -781
  56. data/lib/rbbt/tsv/parser.rb +678 -678
  57. data/lib/rbbt/tsv/refactor.rb +195 -0
  58. data/lib/rbbt/tsv/stream.rb +253 -251
  59. data/lib/rbbt/tsv/util.rb +420 -420
  60. data/lib/rbbt/tsv.rb +210 -208
  61. data/lib/rbbt/util/R/eval.rb +4 -4
  62. data/lib/rbbt/util/R/plot.rb +62 -166
  63. data/lib/rbbt/util/R.rb +21 -18
  64. data/lib/rbbt/util/cmd.rb +2 -318
  65. data/lib/rbbt/util/color.rb +269 -269
  66. data/lib/rbbt/util/colorize.rb +89 -89
  67. data/lib/rbbt/util/concurrency/processes/refactor.rb +22 -0
  68. data/lib/rbbt/util/concurrency/processes/worker.rb +2 -2
  69. data/lib/rbbt/util/concurrency/processes.rb +389 -386
  70. data/lib/rbbt/util/config.rb +169 -167
  71. data/lib/rbbt/util/filecache.rb +1 -1
  72. data/lib/rbbt/util/iruby.rb +20 -0
  73. data/lib/rbbt/util/log/progress/report.rb +241 -241
  74. data/lib/rbbt/util/log/progress/util.rb +99 -99
  75. data/lib/rbbt/util/log/progress.rb +102 -102
  76. data/lib/rbbt/util/log/refactor.rb +49 -0
  77. data/lib/rbbt/util/log.rb +486 -532
  78. data/lib/rbbt/util/migrate.rb +2 -2
  79. data/lib/rbbt/util/misc/concurrent_stream.rb +248 -246
  80. data/lib/rbbt/util/misc/development.rb +12 -11
  81. data/lib/rbbt/util/misc/exceptions.rb +117 -112
  82. data/lib/rbbt/util/misc/format.rb +2 -230
  83. data/lib/rbbt/util/misc/indiferent_hash.rb +2 -107
  84. data/lib/rbbt/util/misc/inspect.rb +2 -476
  85. data/lib/rbbt/util/misc/lock.rb +109 -106
  86. data/lib/rbbt/util/misc/omics.rb +9 -1
  87. data/lib/rbbt/util/misc/pipes.rb +765 -793
  88. data/lib/rbbt/util/misc/refactor.rb +20 -0
  89. data/lib/rbbt/util/misc/ssw.rb +27 -17
  90. data/lib/rbbt/util/misc/system.rb +92 -105
  91. data/lib/rbbt/util/misc.rb +39 -20
  92. data/lib/rbbt/util/named_array/refactor.rb +4 -0
  93. data/lib/rbbt/util/named_array.rb +3 -220
  94. data/lib/rbbt/util/open/refactor.rb +7 -0
  95. data/lib/rbbt/util/open.rb +3 -857
  96. data/lib/rbbt/util/procpath.rb +6 -6
  97. data/lib/rbbt/util/python/paths.rb +27 -0
  98. data/lib/rbbt/util/python/run.rb +115 -0
  99. data/lib/rbbt/util/python/script.rb +110 -0
  100. data/lib/rbbt/util/python/util.rb +3 -3
  101. data/lib/rbbt/util/python.rb +22 -81
  102. data/lib/rbbt/util/semaphore.rb +152 -148
  103. data/lib/rbbt/util/simpleopt.rb +9 -8
  104. data/lib/rbbt/util/ssh/refactor.rb +19 -0
  105. data/lib/rbbt/util/ssh.rb +122 -118
  106. data/lib/rbbt/util/tar.rb +117 -115
  107. data/lib/rbbt/util/tmpfile.rb +69 -67
  108. data/lib/rbbt/util/version.rb +2 -0
  109. data/lib/rbbt/workflow/refactor/entity.rb +11 -0
  110. data/lib/rbbt/workflow/refactor/export.rb +66 -0
  111. data/lib/rbbt/workflow/refactor/inputs.rb +24 -0
  112. data/lib/rbbt/workflow/refactor/recursive.rb +64 -0
  113. data/lib/rbbt/workflow/refactor/task_info.rb +66 -0
  114. data/lib/rbbt/workflow/refactor.rb +150 -0
  115. data/lib/rbbt/workflow/remote_workflow/driver/rest.rb +1 -2
  116. data/lib/rbbt/workflow/remote_workflow/driver/ssh.rb +55 -32
  117. data/lib/rbbt/workflow/remote_workflow/remote_step/rest.rb +3 -1
  118. data/lib/rbbt/workflow/remote_workflow/remote_step/ssh.rb +14 -5
  119. data/lib/rbbt/workflow/remote_workflow/remote_step.rb +19 -7
  120. data/lib/rbbt/workflow/remote_workflow.rb +6 -1
  121. data/lib/rbbt/workflow/step/run.rb +766 -766
  122. data/lib/rbbt/workflow/step/save_load_inputs.rb +254 -254
  123. data/lib/rbbt/workflow/step.rb +2 -362
  124. data/lib/rbbt/workflow/task.rb +118 -118
  125. data/lib/rbbt/workflow/usage.rb +289 -287
  126. data/lib/rbbt/workflow/util/archive.rb +6 -5
  127. data/lib/rbbt/workflow/util/data.rb +1 -1
  128. data/lib/rbbt/workflow/util/orchestrator.rb +249 -246
  129. data/lib/rbbt/workflow/util/trace.rb +79 -44
  130. data/lib/rbbt/workflow.rb +4 -882
  131. data/lib/rbbt-util.rb +21 -13
  132. data/lib/rbbt.rb +16 -3
  133. data/python/rbbt/__init__.py +96 -4
  134. data/python/rbbt/workflow/remote.py +104 -0
  135. data/python/rbbt/workflow.py +64 -0
  136. data/python/test.py +10 -0
  137. data/share/Rlib/plot.R +37 -37
  138. data/share/Rlib/svg.R +22 -5
  139. data/share/install/software/lib/install_helpers +1 -1
  140. data/share/rbbt_commands/hpc/list +2 -3
  141. data/share/rbbt_commands/hpc/orchestrate +4 -4
  142. data/share/rbbt_commands/hpc/tail +2 -0
  143. data/share/rbbt_commands/hpc/task +10 -7
  144. data/share/rbbt_commands/lsf/list +2 -3
  145. data/share/rbbt_commands/lsf/orchestrate +4 -4
  146. data/share/rbbt_commands/lsf/tail +2 -0
  147. data/share/rbbt_commands/lsf/task +10 -7
  148. data/share/rbbt_commands/migrate +1 -1
  149. data/share/rbbt_commands/pbs/list +2 -3
  150. data/share/rbbt_commands/pbs/orchestrate +4 -4
  151. data/share/rbbt_commands/pbs/tail +2 -0
  152. data/share/rbbt_commands/pbs/task +10 -7
  153. data/share/rbbt_commands/resource/produce +8 -1
  154. data/share/rbbt_commands/slurm/list +2 -3
  155. data/share/rbbt_commands/slurm/orchestrate +4 -4
  156. data/share/rbbt_commands/slurm/tail +2 -0
  157. data/share/rbbt_commands/slurm/task +10 -7
  158. data/share/rbbt_commands/system/clean +5 -5
  159. data/share/rbbt_commands/system/status +5 -5
  160. data/share/rbbt_commands/tsv/get +2 -3
  161. data/share/rbbt_commands/tsv/info +10 -13
  162. data/share/rbbt_commands/tsv/keys +18 -14
  163. data/share/rbbt_commands/tsv/slice +2 -2
  164. data/share/rbbt_commands/tsv/transpose +6 -2
  165. data/share/rbbt_commands/workflow/info +20 -24
  166. data/share/rbbt_commands/workflow/list +1 -1
  167. data/share/rbbt_commands/workflow/prov +20 -13
  168. data/share/rbbt_commands/workflow/retry +43 -0
  169. data/share/rbbt_commands/workflow/server +12 -2
  170. data/share/rbbt_commands/workflow/task +80 -73
  171. data/share/rbbt_commands/workflow/write_info +26 -9
  172. data/share/software/opt/ssw/ssw.c +861 -0
  173. data/share/software/opt/ssw/ssw.h +130 -0
  174. data/share/workflow_config.ru +3 -3
  175. metadata +45 -6
@@ -1,766 +1,766 @@
1
- require 'rbbt/workflow/step/dependencies'
2
- require 'socket'
3
-
4
-
5
- module StreamArray; end
6
-
7
- class Step
8
-
9
- attr_reader :stream, :dupped, :saved_stream
10
-
11
- def get_stream
12
- @mutex.synchronize do
13
- Log.low "Getting stream from #{path} #{!@saved_stream} [#{object_id}-#{Misc.fingerprint(@result)}]"
14
- begin
15
- if IO === @result
16
- return nil if @saved_stream
17
- @saved_stream = @result
18
- elsif StreamArray === @result and @result.any?
19
- @saved_stream = @result.pop
20
- else
21
- nil
22
- end
23
- end
24
- end
25
- end
26
-
27
- def resolve_input_steps
28
- step = false
29
- pos = 0
30
-
31
- input_options = Workflow === workflow ? workflow.task_info(task_name)[:input_options] : {}
32
- new_inputs = inputs.collect do |i|
33
- begin
34
- if Step === i
35
- if i.error?
36
- e = i.get_exception
37
- if e
38
- raise e
39
- else
40
- raise DependencyError, "Error in dep. #{Log.blue e.path}"
41
- end
42
- end
43
- step = true
44
- i.produce unless i.done? || i.error? || i.started?
45
- if i.done?
46
- if (task.input_options[task.inputs[pos]] || {})[:stream]
47
- TSV.get_stream i
48
- else
49
- if (task.input_options[task.inputs[pos]] || {})[:nofile]
50
- i.path
51
- else
52
- i.load
53
- end
54
- end
55
- elsif i.streaming? and (task.input_options[task.inputs[pos]] || {})[:stream]
56
- TSV.get_stream i
57
- else
58
- i.join
59
- if (task.input_options[task.inputs[pos]] || {})[:stream]
60
- TSV.get_stream i
61
- else
62
- if (task.input_options[task.inputs[pos]] || {})[:nofile]
63
- i.path
64
- else
65
- i.load
66
- end
67
- end
68
- end
69
- else
70
- i
71
- end
72
- ensure
73
- pos += 1
74
- end
75
- end
76
- @inputs.replace new_inputs if step
77
- end
78
-
79
- def rewind_inputs
80
- return if @inputs.nil?
81
- Log.debug "Rewinding inputs for #{path}"
82
- @inputs.each do |input|
83
- next unless input.respond_to? :rewind
84
- begin
85
- input.rewind
86
- input.first_line = nil if TSV::Parser === input
87
- Log.debug "Rewinded #{Misc.fingerprint input}"
88
- rescue
89
- end
90
- end
91
- end
92
-
93
- def _exec
94
- resolve_input_steps
95
- rewind_inputs
96
- @exec = true if @exec.nil?
97
- begin
98
- old = Signal.trap("INT"){ Thread.current.raise Aborted }
99
- if @task.respond_to?(:exec_in)
100
- @task.exec_in((bindings || self), *@inputs)
101
- elsif @task
102
- (bindings || self).instance_exec *@inputs, &@task
103
- else
104
- raise DependencyError, "Dependency #{self.path} cannot be produced"
105
- end
106
- ensure
107
- Signal.trap("INT", old)
108
- end
109
- end
110
-
111
- def exec(no_load=false)
112
- dependencies.each{|dependency| dependency.exec(no_load) }
113
- @mutex.synchronize do
114
- @result = self._exec
115
- @result = @result.stream if TSV::Dumper === @result
116
- end
117
- (no_load || ENV["RBBT_NO_STREAM"]) ? @result : prepare_result(@result, @task.result_description)
118
- end
119
-
120
- def updatable?
121
- return true if ENV["RBBT_UPDATE_ALL_JOBS"] == 'true'
122
- return false unless ENV["RBBT_UPDATE"] == "true"
123
- return false unless Open.exists?(info_file)
124
- return true if status != :noinfo && ! (relocated? && done?)
125
- false
126
- end
127
-
128
- def dependency_checks
129
- return [] if ENV["RBBT_UPDATE"] != "true"
130
-
131
- rec_dependencies(true).
132
- reject{|dependency| (defined?(WorkflowRemoteClient) && WorkflowRemoteClient::RemoteStep === dependency) || Open.remote?(dependency.path) }.
133
- reject{|dependency| dependency.error? }.
134
- #select{|dependency| Open.exists?(dependency.path) || ((Open.exists?(dependency.info_file) && (dependency.status == :cleaned) || dependency.status == :waiting)) }.
135
- #select{|dependency| dependency.updatable? }.
136
- collect{|dependency| Workflow.relocate_dependency(self, dependency)}
137
- end
138
-
139
- def input_checks
140
- (inputs.select{|i| Step === i } + inputs.select{|i| Path === i && Step === i.resource}.collect{|i| i.resource})
141
- #select{|dependency| dependency.updatable? }
142
- end
143
-
144
- def checks
145
- (dependency_checks + input_checks).uniq
146
- end
147
-
148
- def persist_checks
149
- canfail_paths = self.canfail_paths
150
- checks.collect do |dep|
151
- path = dep.path
152
- next if ! dep.done? && canfail_paths.include?(path)
153
- path
154
- end.compact
155
- end
156
-
157
- def out_of_date
158
-
159
- checks = self.checks
160
- return [] if checks.empty?
161
- outdated_time = []
162
- outdated_dep = []
163
- canfail_paths = self.canfail_paths
164
- this_mtime = Open.mtime(self.path) if Open.exists?(self.path)
165
-
166
- #outdated_time = checks.select{|dep| dep.updatable? && dep.done? && Persist.newer?(path, dep.path) }
167
- outdated_time = checks.select{|dep| dep.done? && Persist.newer?(path, dep.path) }
168
- outdated_dep = checks.reject{|dep| dep.done? || (dep.error? && ! dep.recoverable_error? && canfail_paths.include?(dep.path)) }
169
-
170
- #checks.each do |dep|
171
- # next unless dep.updatable?
172
- # dep_done = dep.done?
173
-
174
- # begin
175
- # if this_mtime && dep_done && Open.exists?(dep.path) && (Open.mtime(dep.path) > this_mtime + 1)
176
- # outdated_time << dep
177
- # end
178
- # rescue
179
- # end
180
-
181
- # # Is this pointless? this would mean some dep got updated after a later
182
- # # dep but but before this one.
183
- # #if (! dep.done? && ! canfail_paths.include?(dep.path)) || ! dep.updated?
184
-
185
- # if (! dep_done && ! canfail_paths.include?(dep.path))
186
- # outdated_dep << dep
187
- # end
188
- #end
189
-
190
- Log.medium "Some newer files found: #{Misc.fingerprint outdated_time}" if outdated_time.any?
191
- Log.medium "Some outdated files found: #{Misc.fingerprint outdated_dep}" if outdated_dep.any?
192
-
193
- outdated_time + outdated_dep
194
- end
195
-
196
- def updated?
197
- return true if ENV["RBBT_UPDATE"] != "true"
198
- return true unless (done? || error? || ! writable?)
199
-
200
- @updated ||= out_of_date.empty?
201
- end
202
-
203
- def kill_children
204
- begin
205
- children_pids = info[:children_pids]
206
- if children_pids and children_pids.any?
207
- Log.medium("Killing children: #{ children_pids * ", " }")
208
- children_pids.each do |pid|
209
- Log.medium("Killing child #{ pid }")
210
- begin
211
- Process.kill "TERM", pid.to_i
212
- rescue Exception
213
- Log.medium("Exception killing child #{ pid }: #{$!.message}")
214
- end
215
- end
216
- end
217
- rescue
218
- Log.medium("Exception finding children")
219
- end
220
- end
221
-
222
- def run(no_load = false)
223
- result = nil
224
-
225
- if Workflow === workflow && workflow.relay_tasks && workflow.relay_tasks.include?(task_name)
226
- server, options = workflow.relay_tasks[task_name]
227
- options[:migrate] = true
228
- return RemoteWorkflow::SSH.relay_job(self, server, options)
229
- end
230
-
231
- begin
232
- no_load = :stream if no_load
233
- result_type = self.result_type || info[:result_type]
234
-
235
- res = @mutex.synchronize do
236
- time_elapsed = total_time_elapsed = nil
237
- Open.write(pid_file, Process.pid.to_s) unless Open.exists?(path) or Open.exists?(pid_file)
238
-
239
- result = Persist.persist "Job", result_type, :file => path, :check => persist_checks, :no_load => no_load do
240
-
241
- if Step === Step.log_relay_step and not self == Step.log_relay_step
242
- relay_log(Step.log_relay_step) unless self.respond_to? :relay_step and self.relay_step
243
- end
244
-
245
- Open.write(pid_file, Process.pid.to_s) unless Open.exists? pid_file
246
-
247
- @exec = false
248
- init_info(true)
249
-
250
- #workflow = @workflow || @task.respond_to?(:workflow) ? @task.workflow : nil
251
- result_type = @task.respond_to?(:result_type) ? @task.result_type : nil
252
- result_description = @task.respond_to?(:result_description) ? @task.result_description : nil
253
-
254
- log :setup, "#{Log.color :green, "Setup"} step #{Log.color :yellow, task_name}"
255
-
256
- merge_info({
257
- :issued => (issue_time = Time.now),
258
- :name => name,
259
- :pid => Process.pid.to_s,
260
- :pid_hostname => Socket.gethostname,
261
- :clean_name => clean_name,
262
- :workflow => workflow.to_s,
263
- :task_name => task_name,
264
- :result_type => result_type,
265
- :result_description => result_description,
266
- :dependencies => dependencies.collect{|dep| [dep.task_name, dep.name, dep.path]},
267
- :versions => Rbbt.versions
268
- })
269
-
270
- new_inputs = []
271
- @inputs.each_with_index do |input,i|
272
- name = @task.respond_to?(:inputs) ? @task.inputs[i] : nil
273
- type = @task.respond_to?(:input_types) ? @task.input_types[i] : nil
274
-
275
- if type == :directory
276
- directory_inputs = file('directory_inputs')
277
- input_source = directory_inputs['.source'][name].find
278
- input_dir = directory_inputs[name].find
279
-
280
- case input
281
- when Path
282
- if input.directory?
283
- new_inputs << input
284
- else
285
- input.open do |io|
286
- begin
287
- Misc.untar(io, input_source)
288
- rescue
289
- raise ParameterException, "Error unpackaging tar directory input '#{name}':\n\n#{$!.message}"
290
- end
291
- end
292
- tar_1 = input_source.glob("*")
293
- raise ParameterException, "When using tar.gz files for directories, the directory must be the single first level entry" if tar_1.length != 1
294
- FileUtils.ln_s Misc.path_relative_to(directory_inputs, tar_1.first), input_dir
295
- new_inputs << input_dir
296
- end
297
- when File, IO, Tempfile
298
- begin
299
- Misc.untar(Open.gunzip(input), input_source)
300
- rescue
301
- raise ParameterException, "Error unpackaging tar directory input '#{name}':\n\n#{$!.message}"
302
- end
303
- tar_1 = input_source.glob("*")
304
- raise ParameterException, "When using tar.gz files for directories, the directory must be the single first level entry" if tar_1.length != 1
305
- FileUtils.ln_s Misc.path_relative_to(directory_inputs, tar_1.first), input_dir
306
- new_inputs << input_dir
307
- else
308
- raise ParameterException, "Format of directory input '#{name}' not understood: #{Misc.fingerprint input}"
309
- end
310
- else
311
- new_inputs << input
312
- end
313
- end if @inputs
314
-
315
- @inputs = new_inputs if @inputs
316
-
317
- if @inputs && task.respond_to?(:inputs) && ! task.inputs.nil?
318
- info_inputs = @inputs.collect do |i|
319
- if Path === i
320
- i.to_s
321
- else
322
- i
323
- end
324
- end
325
- set_info :inputs, Misc.remove_long_items(Misc.zip2hash(task.inputs, info_inputs))
326
- end
327
-
328
- begin
329
- run_dependencies
330
- rescue Exception
331
- Open.rm pid_file if Open.exists?(pid_file)
332
- stop_dependencies
333
- raise $!
334
- end
335
-
336
- set_info :started, (start_time = Time.now)
337
- log :started, "Starting step #{Log.color :yellow, task_name}"
338
-
339
- config_keys_pre = Rbbt::Config::GOT_KEYS.dup
340
- begin
341
-
342
- result = _exec
343
- rescue Aborted, Interrupt
344
- log(:aborted, "Aborted")
345
- raise $!
346
- rescue Exception
347
- backtrace = $!.backtrace
348
-
349
- # HACK: This fixes an strange behaviour in 1.9.3 where some
350
- # backtrace strings are coded in ASCII-8BIT
351
- backtrace = backtrace.collect{|l| l.dup.force_encoding("UTF-8")} if String.instance_methods.include? :force_encoding
352
- set_info :backtrace, backtrace
353
- log(:error, "#{$!.class}: #{$!.message}")
354
- stop_dependencies
355
- raise $!
356
- end
357
-
358
- if not no_load or ENV["RBBT_NO_STREAM"] == "true"
359
- result = prepare_result result, @task.description, info if IO === result
360
- result = prepare_result result.stream, @task.description, info if TSV::Dumper === result
361
- end
362
-
363
- stream = case result
364
- when IO
365
- result
366
- when TSV::Dumper
367
- result.stream
368
- end
369
-
370
- if stream
371
- log :streaming, "Streaming step #{Log.color :yellow, task_name.to_s || ""}"
372
-
373
- callback = Proc.new do
374
- if AbortedStream === stream
375
- if stream.exception
376
- raise stream.exception
377
- else
378
- raise Aborted
379
- end
380
- end
381
- begin
382
- status = self.status
383
- if status != :done and status != :error and status != :aborted
384
- Misc.insist do
385
- merge_info({
386
- :done => (done_time = Time.now),
387
- :total_time_elapsed => (total_time_elapsed = done_time - issue_time),
388
- :time_elapsed => (time_elapsed = done_time - start_time),
389
- :versions => Rbbt.versions
390
- })
391
- log :done, "Completed step #{Log.color :yellow, task_name.to_s || ""} in #{time_elapsed.to_i}+#{(total_time_elapsed - time_elapsed).to_i} sec."
392
- end
393
- end
394
- rescue
395
- Log.exception $!
396
- ensure
397
- Step.purge_stream_cache
398
- Open.rm pid_file if Open.exist?(pid_file)
399
- end
400
- end
401
-
402
- abort_callback = Proc.new do |exception|
403
- begin
404
- if exception
405
- self.exception exception
406
- else
407
- log :aborted, "#{Log.color :red, "Aborted"} step #{Log.color :yellow, task_name.to_s || ""}" if status == :streaming
408
- end
409
- _clean_finished
410
- rescue
411
- stop_dependencies
412
- Open.rm pid_file if Open.exist?(pid_file)
413
- end
414
- end
415
-
416
- ConcurrentStream.setup stream, :callback => callback, :abort_callback => abort_callback
417
-
418
- if AbortedStream === stream
419
- exception = stream.exception || Aborted.new("Aborted stream: #{Misc.fingerprint stream}")
420
- self.exception exception
421
- _clean_finished
422
- raise exception
423
- end
424
- else
425
- merge_info({
426
- :done => (done_time = Time.now),
427
- :total_time_elapsed => (total_time_elapsed = done_time - issue_time),
428
- :time_elapsed => (time_elapsed = done_time - start_time),
429
- :versions => Rbbt.versions
430
- })
431
- log :ending
432
- Step.purge_stream_cache
433
- end
434
-
435
- set_info :dependencies, dependencies.collect{|dep| [dep.task_name, dep.name, dep.path]}
436
-
437
- config_keys = Rbbt::Config::GOT_KEYS[config_keys_pre.length..-1]
438
- set_info :config_keys, config_keys.uniq
439
-
440
- if result.nil? && File.exist?(self.tmp_path) && ! File.exist?(self.path)
441
- Open.mv self.tmp_path, self.path
442
- end
443
- Open.rm pid_file if Open.exist?(pid_file) unless stream
444
- result
445
- end # END PERSIST
446
- log :done, "Completed step #{Log.color :yellow, task_name.to_s || ""} in #{time_elapsed.to_i}+#{(total_time_elapsed - time_elapsed).to_i} sec." unless stream or time_elapsed.nil?
447
-
448
- if no_load
449
- @result ||= result
450
- self
451
- else
452
- Step.purge_stream_cache
453
- @result = prepare_result result, result_description
454
- end
455
- end # END SYNC
456
- res
457
- rescue DependencyError, DependencyRbbtException
458
- exception $!
459
- rescue LockInterrupted
460
- raise $!
461
- rescue Aborted, Interrupt
462
- abort
463
- stop_dependencies
464
- raise $!
465
- rescue Exception
466
- exception $!
467
- stop_dependencies
468
- raise $!
469
- ensure
470
- no_load = false unless IO === result
471
- Open.rm pid_file if Open.exist?(pid_file) unless no_load
472
- #set_info :pid, nil unless no_load
473
- end
474
- end
475
-
476
- def produce(force=false, dofork=false)
477
- return self if done? and not dirty?
478
-
479
- self.status_lock.synchronize do
480
- if error? || aborted? || stalled?
481
- if stalled?
482
- Log.warn "Aborting stalled job #{self.path}"
483
- abort
484
- end
485
- if force or aborted? or recoverable_error?
486
- clean
487
- else
488
- e = get_exception
489
- if e
490
- Log.error "Raising exception in produced job #{self.path}: #{e.message}"
491
- raise e
492
- else
493
- raise "Error in job: #{self.path}"
494
- end
495
- end
496
- end
497
- end
498
-
499
- update if done?
500
-
501
- if dofork
502
- fork(true) unless started?
503
-
504
- join unless done? or dofork == :nowait
505
- else
506
- run(true) unless started?
507
-
508
- join unless done?
509
- end
510
-
511
- self
512
- end
513
-
514
- def fork(no_load = false, semaphore = nil)
515
- raise "Can not fork: Step is waiting for proces #{@pid} to finish" if not @pid.nil? and not Process.pid == @pid and Misc.pid_exists?(@pid) and not done? and info[:forked]
516
- Log.debug "Fork to run #{self.path}"
517
- sout, sin = Misc.pipe if no_load == :stream
518
- @pid = Process.fork do
519
- Signal.trap(:TERM) do
520
- raise Aborted, "Recieved TERM Signal on forked process #{Process.pid}"
521
- end
522
- sout.close if sout
523
- Misc.pre_fork
524
- Open.mkdir File.dirname(path) unless Open.exist?(File.dirname(path))
525
- Open.write(pid_file, Process.pid.to_s) unless Open.exists?(path) or Open.exists?(pid_file)
526
-
527
- if semaphore
528
- init_info
529
- log :queue, "Queued over semaphore: #{semaphore}"
530
- ret = RbbtSemaphore.wait_semaphore(semaphore)
531
- raise SemaphoreInterrupted if ret == -1
532
- end
533
-
534
- begin
535
- begin
536
- @forked = true
537
- res = run no_load
538
- set_info :forked, true
539
- if sin
540
- io = TSV.get_stream res
541
- if io.respond_to? :setup
542
- io.setup(sin)
543
- sin.pair = io
544
- io.pair = sin
545
- end
546
- begin
547
- Misc.consume_stream(io, false, sin)
548
- rescue
549
- Log.warn "Could not consume stream (#{io.closed? ? 'closed' : 'open'}) into pipe for forked job: #{self.path}"
550
- Misc.consume_stream(io) unless io.closed?
551
- end
552
- end
553
- rescue Aborted, Interrupt
554
- Log.debug{"Forked process aborted: #{path}"}
555
- log :aborted, "Job aborted (#{Process.pid})"
556
- raise $!
557
- rescue Exception
558
- Log.debug("Exception '#{$!.message}' caught on forked process: #{path}")
559
- raise $!
560
- ensure
561
- join_stream
562
- end
563
-
564
- begin
565
- children_pids = info[:children_pids]
566
- if children_pids
567
- children_pids.each do |pid|
568
- if Misc.pid_exists? pid
569
- begin
570
- Process.waitpid pid
571
- rescue Errno::ECHILD
572
- Log.low "Waiting on #{ pid } failed: #{$!.message}"
573
- end
574
- end
575
- end
576
- set_info :children_done, Time.now
577
- end
578
- rescue Exception
579
- Log.debug("Exception waiting for children: #{$!.message}")
580
- RbbtSemaphore.post_semaphore(semaphore) if semaphore
581
- Kernel.exit! -1
582
- end
583
- #set_info :pid, nil
584
- ensure
585
- RbbtSemaphore.post_semaphore(semaphore) if semaphore
586
- end
587
- end
588
- sin.close if sin
589
- @result = sout if sout
590
- Process.detach(@pid)
591
- self
592
- end
593
-
594
- def abort_pid
595
- @pid ||= info[:pid] || Open.read(pid_file)
596
-
597
- case @pid
598
- when nil
599
- Log.medium "Could not abort #{path}: no pid"
600
- false
601
- when Process.pid
602
- Log.medium "Could not abort #{path}: same process"
603
- false
604
- else
605
- Log.medium "Aborting pid #{path}: #{ @pid } #{Process.pid}"
606
- begin
607
- Process.kill("TERM", @pid.to_i)
608
- s = Process.waitpid2 @pid.to_i
609
- Log.medium "Aborted pid #{path} #{s}"
610
- rescue Exception
611
- Log.debug("Aborted job #{@pid} was not killed: #{$!.message}")
612
- end
613
- true
614
- end
615
- end
616
-
617
- def abort_stream
618
- stream = @result if IO === @result
619
- @saved_stream = nil
620
- if stream and stream.respond_to? :abort and not stream.aborted?
621
- doretry = true
622
- begin
623
- Log.medium "Aborting job stream #{stream.inspect} -- #{Log.color :blue, path}"
624
- stream.abort
625
- rescue Aborted, Interrupt
626
- Log.medium "Aborting job stream #{stream.inspect} ABORTED RETRY -- #{Log.color :blue, path}"
627
- if doretry
628
- doretry = false
629
- retry
630
- end
631
- end
632
- end
633
- end
634
-
635
- def _clean_finished
636
- if Open.exists?(path) && status != :done
637
- Log.warn "Aborted job had finished. Removing result -- #{ path }"
638
- begin
639
- Open.rm path
640
- rescue Exception
641
- Log.warn "Exception removing result of aborted job: #{$!.message}"
642
- end
643
- end
644
-
645
- if Open.exists?(tmp_path) && status != :done
646
- Log.warn "Aborted job had finished. Removing tmp result -- #{ tmp_path }"
647
- begin
648
- Open.rm tmp_path
649
- rescue Exception
650
- Log.warn "Exception removing tmp result of aborted job: #{$!.message}"
651
- end
652
- end
653
- end
654
-
655
- def _abort
656
- return if @aborted
657
- @aborted = true
658
- Log.medium{"#{Log.color :red, "Aborting"} #{Log.color :blue, path}"}
659
- doretry = true
660
- begin
661
- return if done?
662
- abort_pid if running?
663
- kill_children
664
- abort_stream
665
- stop_dependencies
666
- rescue Aborted, Interrupt
667
- Log.medium{"#{Log.color :red, "Aborting ABORTED RETRY"} #{Log.color :blue, path}"}
668
- if doretry
669
- doretry = false
670
- retry
671
- end
672
- raise $!
673
- rescue Exception
674
- if doretry
675
- doretry = false
676
- retry
677
- end
678
- ensure
679
- _clean_finished
680
- end
681
- end
682
-
683
- def abort
684
- return if done? and (status == :done or status == :noinfo)
685
- _abort
686
- log(:aborted, "Job aborted") unless aborted? or error?
687
- self
688
- end
689
-
690
- def join_stream
691
- stream = get_stream if @result
692
- @result = nil
693
- if stream
694
- begin
695
- Misc.consume_stream stream
696
- stream.join if stream.respond_to? :join
697
- rescue Exception
698
- stream.abort $!
699
- self._abort
700
- end
701
- end
702
- end
703
-
704
- def soft_grace
705
- until done? or (Open.exist?(info_file) && info[:status] != :noinfo)
706
- sleep 1
707
- end
708
- self
709
- end
710
-
711
- def grace
712
- until done? || result || error? || aborted? || streaming? || waiting? || running?
713
- sleep 1
714
- end
715
- self
716
- end
717
-
718
- def join
719
-
720
- grace if Open.exists?(info_file)
721
-
722
- if streaming?
723
- join_stream
724
- end
725
-
726
- return self if not Open.exists? info_file
727
-
728
- return self if info[:joined]
729
-
730
- pid = @pid
731
-
732
- Misc.insist [0.1, 0.2, 0.5, 1] do
733
- pid ||= info[:pid]
734
- end
735
-
736
- begin
737
-
738
- if pid.nil? or Process.pid == pid
739
- dependencies.each{|dep| dep.join }
740
- else
741
- begin
742
- pid = pid.to_i if String === pid
743
- Log.debug{"Waiting for pid: #{pid}"}
744
- Process.waitpid pid
745
- rescue Errno::ECHILD
746
- Log.debug{"Process #{ pid } already finished: #{ path }"}
747
- end if Misc.pid_exists? pid
748
- pid = nil
749
- dependencies.each{|dep| dep.join }
750
- end
751
-
752
- until (Open.exists?(path) && (status == :done || status == :noinfo)) or error? or aborted? or waiting?
753
- sleep 1
754
- join_stream if streaming?
755
- end
756
-
757
- self
758
- ensure
759
- begin
760
- set_info :joined, true
761
- rescue
762
- end if Open.exists?(info_file) && writable?
763
- @result = nil
764
- end
765
- end
766
- end
1
+ #require 'rbbt/workflow/step/dependencies'
2
+ #require 'socket'
3
+ #
4
+ #
5
+ #module StreamArray; end
6
+ #
7
+ #class Step
8
+ #
9
+ # attr_reader :stream, :dupped, :saved_stream
10
+ #
11
+ # def get_stream
12
+ # @mutex.synchronize do
13
+ # Log.low "Getting stream from #{path} #{!@saved_stream} [#{object_id}-#{Misc.fingerprint(@result)}]"
14
+ # begin
15
+ # if IO === @result
16
+ # return nil if @saved_stream
17
+ # @saved_stream = @result
18
+ # elsif StreamArray === @result and @result.any?
19
+ # @saved_stream = @result.pop
20
+ # else
21
+ # nil
22
+ # end
23
+ # end
24
+ # end
25
+ # end
26
+ #
27
+ # def resolve_input_steps
28
+ # step = false
29
+ # pos = 0
30
+ #
31
+ # input_options = Workflow === workflow ? workflow.task_info(task_name)[:input_options] : {}
32
+ # new_inputs = inputs.collect do |i|
33
+ # begin
34
+ # if Step === i
35
+ # if i.error?
36
+ # e = i.get_exception
37
+ # if e
38
+ # raise e
39
+ # else
40
+ # raise DependencyError, "Error in dep. #{Log.blue e.path}"
41
+ # end
42
+ # end
43
+ # step = true
44
+ # i.produce unless i.done? || i.error? || i.started?
45
+ # if i.done?
46
+ # if (task.input_options[task.inputs[pos]] || {})[:stream]
47
+ # TSV.get_stream i
48
+ # else
49
+ # if (task.input_options[task.inputs[pos]] || {})[:nofile]
50
+ # i.path
51
+ # else
52
+ # i.load
53
+ # end
54
+ # end
55
+ # elsif i.streaming? and (task.input_options[task.inputs[pos]] || {})[:stream]
56
+ # TSV.get_stream i
57
+ # else
58
+ # i.join
59
+ # if (task.input_options[task.inputs[pos]] || {})[:stream]
60
+ # TSV.get_stream i
61
+ # else
62
+ # if (task.input_options[task.inputs[pos]] || {})[:nofile]
63
+ # i.path
64
+ # else
65
+ # i.load
66
+ # end
67
+ # end
68
+ # end
69
+ # else
70
+ # i
71
+ # end
72
+ # ensure
73
+ # pos += 1
74
+ # end
75
+ # end
76
+ # @inputs.replace new_inputs if step
77
+ # end
78
+ #
79
+ # def rewind_inputs
80
+ # return if @inputs.nil?
81
+ # Log.debug "Rewinding inputs for #{path}"
82
+ # @inputs.each do |input|
83
+ # next unless input.respond_to? :rewind
84
+ # begin
85
+ # input.rewind
86
+ # input.first_line = nil if TSV::Parser === input
87
+ # Log.debug "Rewinded #{Misc.fingerprint input}"
88
+ # rescue
89
+ # end
90
+ # end
91
+ # end
92
+ #
93
+ # def _exec
94
+ # resolve_input_steps
95
+ # rewind_inputs
96
+ # @exec = true if @exec.nil?
97
+ # begin
98
+ # old = Signal.trap("INT"){ Thread.current.raise Aborted }
99
+ # if @task.respond_to?(:exec_in)
100
+ # @task.exec_in((bindings || self), *@inputs)
101
+ # elsif @task
102
+ # (bindings || self).instance_exec *@inputs, &@task
103
+ # else
104
+ # raise DependencyError, "Dependency #{self.path} cannot be produced"
105
+ # end
106
+ # ensure
107
+ # Signal.trap("INT", old)
108
+ # end
109
+ # end
110
+ #
111
+ # def exec(no_load=false)
112
+ # dependencies.each{|dependency| dependency.exec(no_load) }
113
+ # @mutex.synchronize do
114
+ # @result = self._exec
115
+ # @result = @result.stream if TSV::Dumper === @result
116
+ # end
117
+ # (no_load || ENV["RBBT_NO_STREAM"]) ? @result : prepare_result(@result, @task.result_description)
118
+ # end
119
+ #
120
+ # def updatable?
121
+ # return true if ENV["RBBT_UPDATE_ALL_JOBS"] == 'true'
122
+ # return false unless ENV["RBBT_UPDATE"] == "true"
123
+ # return false unless Open.exists?(info_file)
124
+ # return true if status != :noinfo && ! (relocated? && done?)
125
+ # false
126
+ # end
127
+ #
128
+ # def dependency_checks
129
+ # return [] if ENV["RBBT_UPDATE"] != "true"
130
+ #
131
+ # rec_dependencies(true).
132
+ # reject{|dependency| (defined?(WorkflowRemoteClient) && WorkflowRemoteClient::RemoteStep === dependency) || Open.remote?(dependency.path) }.
133
+ # reject{|dependency| dependency.error? }.
134
+ # #select{|dependency| Open.exists?(dependency.path) || ((Open.exists?(dependency.info_file) && (dependency.status == :cleaned) || dependency.status == :waiting)) }.
135
+ # #select{|dependency| dependency.updatable? }.
136
+ # collect{|dependency| Workflow.relocate_dependency(self, dependency)}
137
+ # end
138
+ #
139
+ # def input_checks
140
+ # (inputs.select{|i| Step === i } + inputs.select{|i| Path === i && Step === i.resource}.collect{|i| i.resource})
141
+ # #select{|dependency| dependency.updatable? }
142
+ # end
143
+ #
144
+ # def checks
145
+ # (dependency_checks + input_checks).uniq
146
+ # end
147
+ #
148
+ # def persist_checks
149
+ # canfail_paths = self.canfail_paths
150
+ # checks.collect do |dep|
151
+ # path = dep.path
152
+ # next if ! dep.done? && canfail_paths.include?(path)
153
+ # path
154
+ # end.compact
155
+ # end
156
+ #
157
+ # def out_of_date
158
+ #
159
+ # checks = self.checks
160
+ # return [] if checks.empty?
161
+ # outdated_time = []
162
+ # outdated_dep = []
163
+ # canfail_paths = self.canfail_paths
164
+ # this_mtime = Open.mtime(self.path) if Open.exists?(self.path)
165
+ #
166
+ # #outdated_time = checks.select{|dep| dep.updatable? && dep.done? && Persist.newer?(path, dep.path) }
167
+ # outdated_time = checks.select{|dep| dep.done? && Persist.newer?(path, dep.path) }
168
+ # outdated_dep = checks.reject{|dep| dep.done? || (dep.error? && ! dep.recoverable_error? && canfail_paths.include?(dep.path)) }
169
+ #
170
+ # #checks.each do |dep|
171
+ # # next unless dep.updatable?
172
+ # # dep_done = dep.done?
173
+ #
174
+ # # begin
175
+ # # if this_mtime && dep_done && Open.exists?(dep.path) && (Open.mtime(dep.path) > this_mtime + 1)
176
+ # # outdated_time << dep
177
+ # # end
178
+ # # rescue
179
+ # # end
180
+ #
181
+ # # # Is this pointless? this would mean some dep got updated after a later
182
+ # # # dep but but before this one.
183
+ # # #if (! dep.done? && ! canfail_paths.include?(dep.path)) || ! dep.updated?
184
+ #
185
+ # # if (! dep_done && ! canfail_paths.include?(dep.path))
186
+ # # outdated_dep << dep
187
+ # # end
188
+ # #end
189
+ #
190
+ # Log.medium "Some newer files found: #{Misc.fingerprint outdated_time}" if outdated_time.any?
191
+ # Log.medium "Some outdated files found: #{Misc.fingerprint outdated_dep}" if outdated_dep.any?
192
+ #
193
+ # outdated_time + outdated_dep
194
+ # end
195
+ #
196
+ # def updated?
197
+ # return true if ENV["RBBT_UPDATE"] != "true"
198
+ # return true unless (done? || error? || ! writable?)
199
+ #
200
+ # @updated ||= out_of_date.empty?
201
+ # end
202
+ #
203
+ # def kill_children
204
+ # begin
205
+ # children_pids = info[:children_pids]
206
+ # if children_pids and children_pids.any?
207
+ # Log.medium("Killing children: #{ children_pids * ", " }")
208
+ # children_pids.each do |pid|
209
+ # Log.medium("Killing child #{ pid }")
210
+ # begin
211
+ # Process.kill "TERM", pid.to_i
212
+ # rescue Exception
213
+ # Log.medium("Exception killing child #{ pid }: #{$!.message}")
214
+ # end
215
+ # end
216
+ # end
217
+ # rescue
218
+ # Log.medium("Exception finding children")
219
+ # end
220
+ # end
221
+ #
222
+ # def run(no_load = false)
223
+ # result = nil
224
+ #
225
+ # if Workflow === workflow && workflow.relay_tasks && workflow.relay_tasks.include?(task_name)
226
+ # server, options = workflow.relay_tasks[task_name]
227
+ # options[:migrate] = true
228
+ # return RemoteWorkflow::SSH.relay_job(self, server, options)
229
+ # end
230
+ #
231
+ # begin
232
+ # no_load = :stream if no_load
233
+ # result_type = self.result_type || info[:result_type]
234
+ #
235
+ # res = @mutex.synchronize do
236
+ # time_elapsed = total_time_elapsed = nil
237
+ # Open.write(pid_file, Process.pid.to_s) unless Open.exists?(path) or Open.exists?(pid_file)
238
+ #
239
+ # result = Persist.persist "Job", result_type, :file => path, :check => persist_checks, :no_load => no_load do
240
+ #
241
+ # if Step === Step.log_relay_step and not self == Step.log_relay_step
242
+ # relay_log(Step.log_relay_step) unless self.respond_to? :relay_step and self.relay_step
243
+ # end
244
+ #
245
+ # Open.write(pid_file, Process.pid.to_s) unless Open.exists? pid_file
246
+ #
247
+ # @exec = false
248
+ # init_info(true)
249
+ #
250
+ # #workflow = @workflow || @task.respond_to?(:workflow) ? @task.workflow : nil
251
+ # result_type = @task.respond_to?(:result_type) ? @task.result_type : nil
252
+ # result_description = @task.respond_to?(:result_description) ? @task.result_description : nil
253
+ #
254
+ # log :setup, "#{Log.color :green, "Setup"} step #{Log.color :yellow, task_name}"
255
+ #
256
+ # merge_info({
257
+ # :issued => (issue_time = Time.now),
258
+ # :name => name,
259
+ # :pid => Process.pid.to_s,
260
+ # :pid_hostname => Socket.gethostname,
261
+ # :clean_name => clean_name,
262
+ # :workflow => workflow.to_s,
263
+ # :task_name => task_name,
264
+ # :result_type => result_type,
265
+ # :result_description => result_description,
266
+ # :dependencies => dependencies.collect{|dep| [dep.task_name, dep.name, dep.path]},
267
+ # :versions => Rbbt.versions
268
+ # })
269
+ #
270
+ # new_inputs = []
271
+ # @inputs.each_with_index do |input,i|
272
+ # name = @task.respond_to?(:inputs) ? @task.inputs[i] : nil
273
+ # type = @task.respond_to?(:input_types) ? @task.input_types[i] : nil
274
+ #
275
+ # if type == :directory
276
+ # directory_inputs = file('directory_inputs')
277
+ # input_source = directory_inputs['.source'][name].find
278
+ # input_dir = directory_inputs[name].find
279
+ #
280
+ # case input
281
+ # when Path
282
+ # if input.directory?
283
+ # new_inputs << input
284
+ # else
285
+ # input.open do |io|
286
+ # begin
287
+ # Misc.untar(io, input_source)
288
+ # rescue
289
+ # raise ParameterException, "Error unpackaging tar directory input '#{name}':\n\n#{$!.message}"
290
+ # end
291
+ # end
292
+ # tar_1 = input_source.glob("*")
293
+ # raise ParameterException, "When using tar.gz files for directories, the directory must be the single first level entry" if tar_1.length != 1
294
+ # FileUtils.ln_s Misc.path_relative_to(directory_inputs, tar_1.first), input_dir
295
+ # new_inputs << input_dir
296
+ # end
297
+ # when File, IO, Tempfile
298
+ # begin
299
+ # Misc.untar(Open.gunzip(input), input_source)
300
+ # rescue
301
+ # raise ParameterException, "Error unpackaging tar directory input '#{name}':\n\n#{$!.message}"
302
+ # end
303
+ # tar_1 = input_source.glob("*")
304
+ # raise ParameterException, "When using tar.gz files for directories, the directory must be the single first level entry" if tar_1.length != 1
305
+ # FileUtils.ln_s Misc.path_relative_to(directory_inputs, tar_1.first), input_dir
306
+ # new_inputs << input_dir
307
+ # else
308
+ # raise ParameterException, "Format of directory input '#{name}' not understood: #{Misc.fingerprint input}"
309
+ # end
310
+ # else
311
+ # new_inputs << input
312
+ # end
313
+ # end if @inputs
314
+ #
315
+ # @inputs = new_inputs if @inputs
316
+ #
317
+ # if @inputs && task.respond_to?(:inputs) && ! task.inputs.nil?
318
+ # info_inputs = @inputs.collect do |i|
319
+ # if Path === i
320
+ # i.to_s
321
+ # else
322
+ # i
323
+ # end
324
+ # end
325
+ # set_info :inputs, Misc.remove_long_items(Misc.zip2hash(task.inputs, info_inputs))
326
+ # end
327
+ #
328
+ # begin
329
+ # run_dependencies
330
+ # rescue Exception
331
+ # Open.rm pid_file if Open.exists?(pid_file)
332
+ # stop_dependencies
333
+ # raise $!
334
+ # end
335
+ #
336
+ # set_info :started, (start_time = Time.now)
337
+ # log :started, "Starting step #{Log.color :yellow, task_name}"
338
+ #
339
+ # config_keys_pre = Rbbt::Config::GOT_KEYS.dup
340
+ # begin
341
+ #
342
+ # result = _exec
343
+ # rescue Aborted, Interrupt
344
+ # log(:aborted, "Aborted")
345
+ # raise $!
346
+ # rescue Exception
347
+ # backtrace = $!.backtrace
348
+ #
349
+ # # HACK: This fixes an strange behaviour in 1.9.3 where some
350
+ # # backtrace strings are coded in ASCII-8BIT
351
+ # backtrace = backtrace.collect{|l| l.dup.force_encoding("UTF-8")} if String.instance_methods.include? :force_encoding
352
+ # set_info :backtrace, backtrace
353
+ # log(:error, "#{$!.class}: #{$!.message}")
354
+ # stop_dependencies
355
+ # raise $!
356
+ # end
357
+ #
358
+ # if not no_load or ENV["RBBT_NO_STREAM"] == "true"
359
+ # result = prepare_result result, @task.description, info if IO === result
360
+ # result = prepare_result result.stream, @task.description, info if TSV::Dumper === result
361
+ # end
362
+ #
363
+ # stream = case result
364
+ # when IO
365
+ # result
366
+ # when TSV::Dumper
367
+ # result.stream
368
+ # end
369
+ #
370
+ # if stream
371
+ # log :streaming, "Streaming step #{Log.color :yellow, task_name.to_s || ""}"
372
+ #
373
+ # callback = Proc.new do
374
+ # if AbortedStream === stream
375
+ # if stream.exception
376
+ # raise stream.exception
377
+ # else
378
+ # raise Aborted
379
+ # end
380
+ # end
381
+ # begin
382
+ # status = self.status
383
+ # if status != :done and status != :error and status != :aborted
384
+ # Misc.insist do
385
+ # merge_info({
386
+ # :done => (done_time = Time.now),
387
+ # :total_time_elapsed => (total_time_elapsed = done_time - issue_time),
388
+ # :time_elapsed => (time_elapsed = done_time - start_time),
389
+ # :versions => Rbbt.versions
390
+ # })
391
+ # log :done, "Completed step #{Log.color :yellow, task_name.to_s || ""} in #{time_elapsed.to_i}+#{(total_time_elapsed - time_elapsed).to_i} sec."
392
+ # end
393
+ # end
394
+ # rescue
395
+ # Log.exception $!
396
+ # ensure
397
+ # Step.purge_stream_cache
398
+ # Open.rm pid_file if Open.exist?(pid_file)
399
+ # end
400
+ # end
401
+ #
402
+ # abort_callback = Proc.new do |exception|
403
+ # begin
404
+ # if exception
405
+ # self.exception exception
406
+ # else
407
+ # log :aborted, "#{Log.color :red, "Aborted"} step #{Log.color :yellow, task_name.to_s || ""}" if status == :streaming
408
+ # end
409
+ # _clean_finished
410
+ # rescue
411
+ # stop_dependencies
412
+ # Open.rm pid_file if Open.exist?(pid_file)
413
+ # end
414
+ # end
415
+ #
416
+ # ConcurrentStream.setup stream, :callback => callback, :abort_callback => abort_callback
417
+ #
418
+ # if AbortedStream === stream
419
+ # exception = stream.exception || Aborted.new("Aborted stream: #{Misc.fingerprint stream}")
420
+ # self.exception exception
421
+ # _clean_finished
422
+ # raise exception
423
+ # end
424
+ # else
425
+ # merge_info({
426
+ # :done => (done_time = Time.now),
427
+ # :total_time_elapsed => (total_time_elapsed = done_time - issue_time),
428
+ # :time_elapsed => (time_elapsed = done_time - start_time),
429
+ # :versions => Rbbt.versions
430
+ # })
431
+ # log :ending
432
+ # Step.purge_stream_cache
433
+ # end
434
+ #
435
+ # set_info :dependencies, dependencies.collect{|dep| [dep.task_name, dep.name, dep.path]}
436
+ #
437
+ # config_keys = Rbbt::Config::GOT_KEYS[config_keys_pre.length..-1]
438
+ # set_info :config_keys, config_keys.uniq
439
+ #
440
+ # if result.nil? && File.exist?(self.tmp_path) && ! File.exist?(self.path)
441
+ # Open.mv self.tmp_path, self.path
442
+ # end
443
+ # Open.rm pid_file if Open.exist?(pid_file) unless stream
444
+ # result
445
+ # end # END PERSIST
446
+ # log :done, "Completed step #{Log.color :yellow, task_name.to_s || ""} in #{time_elapsed.to_i}+#{(total_time_elapsed - time_elapsed).to_i} sec." unless stream or time_elapsed.nil?
447
+ #
448
+ # if no_load
449
+ # @result ||= result
450
+ # self
451
+ # else
452
+ # Step.purge_stream_cache
453
+ # @result = prepare_result result, result_description
454
+ # end
455
+ # end # END SYNC
456
+ # res
457
+ # rescue DependencyError, DependencyRbbtException
458
+ # exception $!
459
+ # rescue LockInterrupted
460
+ # raise $!
461
+ # rescue Aborted, Interrupt
462
+ # abort
463
+ # stop_dependencies
464
+ # raise $!
465
+ # rescue Exception
466
+ # exception $!
467
+ # stop_dependencies
468
+ # raise $!
469
+ # ensure
470
+ # no_load = false unless IO === result
471
+ # Open.rm pid_file if Open.exist?(pid_file) unless no_load
472
+ # #set_info :pid, nil unless no_load
473
+ # end
474
+ # end
475
+ #
476
+ # def produce(force=false, dofork=false)
477
+ # return self if done? and not dirty?
478
+ #
479
+ # self.status_lock.synchronize do
480
+ # if error? || aborted? || stalled?
481
+ # if stalled?
482
+ # Log.warn "Aborting stalled job #{self.path}"
483
+ # abort
484
+ # end
485
+ # if force or aborted? or recoverable_error?
486
+ # clean
487
+ # else
488
+ # e = get_exception
489
+ # if e
490
+ # Log.error "Raising exception in produced job #{self.path}: #{e.message}"
491
+ # raise e
492
+ # else
493
+ # raise "Error in job: #{self.path}"
494
+ # end
495
+ # end
496
+ # end
497
+ # end
498
+ #
499
+ # update if done?
500
+ #
501
+ # if dofork
502
+ # fork(true) unless started?
503
+ #
504
+ # join unless done? or dofork == :nowait
505
+ # else
506
+ # run(true) unless started?
507
+ #
508
+ # join unless done?
509
+ # end
510
+ #
511
+ # self
512
+ # end
513
+ #
514
+ # def fork(no_load = false, semaphore = nil)
515
+ # raise "Can not fork: Step is waiting for proces #{@pid} to finish" if not @pid.nil? and not Process.pid == @pid and Misc.pid_exists?(@pid) and not done? and info[:forked]
516
+ # Log.debug "Fork to run #{self.path}"
517
+ # sout, sin = Misc.pipe if no_load == :stream
518
+ # @pid = Process.fork do
519
+ # Signal.trap(:TERM) do
520
+ # raise Aborted, "Recieved TERM Signal on forked process #{Process.pid}"
521
+ # end
522
+ # sout.close if sout
523
+ # Misc.pre_fork
524
+ # Open.mkdir File.dirname(path) unless Open.exist?(File.dirname(path))
525
+ # Open.write(pid_file, Process.pid.to_s) unless Open.exists?(path) or Open.exists?(pid_file)
526
+ #
527
+ # if semaphore
528
+ # init_info
529
+ # log :queue, "Queued over semaphore: #{semaphore}"
530
+ # ret = RbbtSemaphore.wait_semaphore(semaphore)
531
+ # raise SemaphoreInterrupted if ret == -1
532
+ # end
533
+ #
534
+ # begin
535
+ # begin
536
+ # @forked = true
537
+ # res = run no_load
538
+ # set_info :forked, true
539
+ # if sin
540
+ # io = TSV.get_stream res
541
+ # if io.respond_to? :setup
542
+ # io.setup(sin)
543
+ # sin.pair = io
544
+ # io.pair = sin
545
+ # end
546
+ # begin
547
+ # Misc.consume_stream(io, false, sin)
548
+ # rescue
549
+ # Log.warn "Could not consume stream (#{io.closed? ? 'closed' : 'open'}) into pipe for forked job: #{self.path}"
550
+ # Misc.consume_stream(io) unless io.closed?
551
+ # end
552
+ # end
553
+ # rescue Aborted, Interrupt
554
+ # Log.debug{"Forked process aborted: #{path}"}
555
+ # log :aborted, "Job aborted (#{Process.pid})"
556
+ # raise $!
557
+ # rescue Exception
558
+ # Log.debug("Exception '#{$!.message}' caught on forked process: #{path}")
559
+ # raise $!
560
+ # ensure
561
+ # join_stream
562
+ # end
563
+ #
564
+ # begin
565
+ # children_pids = info[:children_pids]
566
+ # if children_pids
567
+ # children_pids.each do |pid|
568
+ # if Misc.pid_exists? pid
569
+ # begin
570
+ # Process.waitpid pid
571
+ # rescue Errno::ECHILD
572
+ # Log.low "Waiting on #{ pid } failed: #{$!.message}"
573
+ # end
574
+ # end
575
+ # end
576
+ # set_info :children_done, Time.now
577
+ # end
578
+ # rescue Exception
579
+ # Log.debug("Exception waiting for children: #{$!.message}")
580
+ # RbbtSemaphore.post_semaphore(semaphore) if semaphore
581
+ # Kernel.exit! -1
582
+ # end
583
+ # #set_info :pid, nil
584
+ # ensure
585
+ # RbbtSemaphore.post_semaphore(semaphore) if semaphore
586
+ # end
587
+ # end
588
+ # sin.close if sin
589
+ # @result = sout if sout
590
+ # Process.detach(@pid)
591
+ # self
592
+ # end
593
+ #
594
+ # def abort_pid
595
+ # @pid ||= info[:pid] || Open.read(pid_file)
596
+ #
597
+ # case @pid
598
+ # when nil
599
+ # Log.medium "Could not abort #{path}: no pid"
600
+ # false
601
+ # when Process.pid
602
+ # Log.medium "Could not abort #{path}: same process"
603
+ # false
604
+ # else
605
+ # Log.medium "Aborting pid #{path}: #{ @pid } #{Process.pid}"
606
+ # begin
607
+ # Process.kill("TERM", @pid.to_i)
608
+ # s = Process.waitpid2 @pid.to_i
609
+ # Log.medium "Aborted pid #{path} #{s}"
610
+ # rescue Exception
611
+ # Log.debug("Aborted job #{@pid} was not killed: #{$!.message}")
612
+ # end
613
+ # true
614
+ # end
615
+ # end
616
+ #
617
+ # def abort_stream
618
+ # stream = @result if IO === @result
619
+ # @saved_stream = nil
620
+ # if stream and stream.respond_to? :abort and not stream.aborted?
621
+ # doretry = true
622
+ # begin
623
+ # Log.medium "Aborting job stream #{stream.inspect} -- #{Log.color :blue, path}"
624
+ # stream.abort
625
+ # rescue Aborted, Interrupt
626
+ # Log.medium "Aborting job stream #{stream.inspect} ABORTED RETRY -- #{Log.color :blue, path}"
627
+ # if doretry
628
+ # doretry = false
629
+ # retry
630
+ # end
631
+ # end
632
+ # end
633
+ # end
634
+ #
635
+ # def _clean_finished
636
+ # if Open.exists?(path) && status != :done
637
+ # Log.warn "Aborted job had finished. Removing result -- #{ path }"
638
+ # begin
639
+ # Open.rm path
640
+ # rescue Exception
641
+ # Log.warn "Exception removing result of aborted job: #{$!.message}"
642
+ # end
643
+ # end
644
+ #
645
+ # if Open.exists?(tmp_path) && status != :done
646
+ # Log.warn "Aborted job had finished. Removing tmp result -- #{ tmp_path }"
647
+ # begin
648
+ # Open.rm tmp_path
649
+ # rescue Exception
650
+ # Log.warn "Exception removing tmp result of aborted job: #{$!.message}"
651
+ # end
652
+ # end
653
+ # end
654
+ #
655
+ # def _abort
656
+ # return if @aborted
657
+ # @aborted = true
658
+ # Log.medium{"#{Log.color :red, "Aborting"} #{Log.color :blue, path}"}
659
+ # doretry = true
660
+ # begin
661
+ # return if done?
662
+ # abort_pid if running?
663
+ # kill_children
664
+ # abort_stream
665
+ # stop_dependencies
666
+ # rescue Aborted, Interrupt
667
+ # Log.medium{"#{Log.color :red, "Aborting ABORTED RETRY"} #{Log.color :blue, path}"}
668
+ # if doretry
669
+ # doretry = false
670
+ # retry
671
+ # end
672
+ # raise $!
673
+ # rescue Exception
674
+ # if doretry
675
+ # doretry = false
676
+ # retry
677
+ # end
678
+ # ensure
679
+ # _clean_finished
680
+ # end
681
+ # end
682
+ #
683
+ # def abort
684
+ # return if done? and (status == :done or status == :noinfo)
685
+ # _abort
686
+ # log(:aborted, "Job aborted") unless aborted? or error?
687
+ # self
688
+ # end
689
+ #
690
+ # def join_stream
691
+ # stream = get_stream if @result
692
+ # @result = nil
693
+ # if stream
694
+ # begin
695
+ # Misc.consume_stream stream
696
+ # stream.join if stream.respond_to? :join
697
+ # rescue Exception
698
+ # stream.abort $!
699
+ # self._abort
700
+ # end
701
+ # end
702
+ # end
703
+ #
704
+ # def soft_grace
705
+ # until done? or (Open.exist?(info_file) && info[:status] != :noinfo)
706
+ # sleep 1
707
+ # end
708
+ # self
709
+ # end
710
+ #
711
+ # def grace
712
+ # until done? || result || error? || aborted? || streaming? || waiting? || running?
713
+ # sleep 1
714
+ # end
715
+ # self
716
+ # end
717
+ #
718
+ # def join
719
+ #
720
+ # grace if Open.exists?(info_file)
721
+ #
722
+ # if streaming?
723
+ # join_stream
724
+ # end
725
+ #
726
+ # return self if not Open.exists? info_file
727
+ #
728
+ # return self if info[:joined]
729
+ #
730
+ # pid = @pid
731
+ #
732
+ # Misc.insist [0.1, 0.2, 0.5, 1] do
733
+ # pid ||= info[:pid]
734
+ # end
735
+ #
736
+ # begin
737
+ #
738
+ # if pid.nil? or Process.pid == pid
739
+ # dependencies.each{|dep| dep.join }
740
+ # else
741
+ # begin
742
+ # pid = pid.to_i if String === pid
743
+ # Log.debug{"Waiting for pid: #{pid}"}
744
+ # Process.waitpid pid
745
+ # rescue Errno::ECHILD
746
+ # Log.debug{"Process #{ pid } already finished: #{ path }"}
747
+ # end if Misc.pid_exists? pid
748
+ # pid = nil
749
+ # dependencies.each{|dep| dep.join }
750
+ # end
751
+ #
752
+ # until (Open.exists?(path) && (status == :done || status == :noinfo)) or error? or aborted? or waiting?
753
+ # sleep 1
754
+ # join_stream if streaming?
755
+ # end
756
+ #
757
+ # self
758
+ # ensure
759
+ # begin
760
+ # set_info :joined, true
761
+ # rescue
762
+ # end if Open.exists?(info_file) && writable?
763
+ # @result = nil
764
+ # end
765
+ # end
766
+ #end