rbbt-util 5.28.11 → 5.29.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -31,8 +31,8 @@ module Persist
31
31
  path = path.find if Path === path
32
32
  file = file.find if Path === file
33
33
  if by_link
34
- patht = File.lstat(path).mtime
35
- filet = File.lstat(file).mtime
34
+ patht = File.exists?(path) ? File.lstat(path).mtime : nil
35
+ filet = File.exists?(file) ? File.lstat(file).mtime : nil
36
36
  else
37
37
  patht = Open.mtime(path)
38
38
  filet = Open.mtime(file)
@@ -297,7 +297,7 @@ module Path
297
297
 
298
298
  raise "No resource defined to produce file: #{ self }" if resource.nil?
299
299
 
300
- resource.produce self, force
300
+ resource.produce self, force if Resource === resource
301
301
 
302
302
  self
303
303
  end
@@ -243,6 +243,7 @@ module TSV
243
243
  Log.debug("Attachment of fields:#{Misc.fingerprint fields } from #{other.filename.inspect} finished.")
244
244
 
245
245
  if complete
246
+ Log.warn "Attaching through index and completing empty rows; keys with wrong format may appear (#{other.key_field} insted of #{self.key_field})" if index
246
247
  fill = TrueClass === complete ? nil : complete
247
248
  field_length = self.fields.length
248
249
  common_fields = (other.fields & self.fields)
@@ -255,11 +256,11 @@ module TSV
255
256
  case type
256
257
  when :single
257
258
  missing.each do |k|
258
- self[k] = nil
259
+ self[k] = fill
259
260
  end
260
261
  when :list
261
262
  missing.each do |k|
262
- values = [nil] * field_length
263
+ values = [fill] * field_length
263
264
  other_values = other[k]
264
265
  other_common_pos.zip(this_common_pos).each do |o,t|
265
266
  values[t] = other_values[o]
@@ -267,8 +268,9 @@ module TSV
267
268
  self[k] = values
268
269
  end
269
270
  when :double
271
+ fill = [] if fill.nil?
270
272
  missing.each do |k|
271
- values = [[]] * field_length
273
+ values = [fill] * field_length
272
274
  other_values = other[k]
273
275
  other_common_pos.zip(this_common_pos).each do |o,t|
274
276
  values[t] = other_values[o]
@@ -276,8 +278,9 @@ module TSV
276
278
  self[k] = values
277
279
  end
278
280
  when :flat
281
+ fill = [] if fill.nil?
279
282
  missing.each do |k|
280
- self[k] = []
283
+ self[k] = fill
281
284
  end
282
285
  end
283
286
  end
@@ -2,6 +2,3 @@ require 'rbbt/util/concurrency'
2
2
 
3
3
  require 'rbbt/tsv/parallel/through'
4
4
  require 'rbbt/tsv/parallel/traverse'
5
-
6
- module TSV
7
- end
@@ -61,6 +61,14 @@ module IndiferentHash
61
61
  super(key)
62
62
  end
63
63
  end
64
+
65
+ def clean_version
66
+ clean = {}
67
+ each do |k,v|
68
+ clean[k.to_s] = v unless clean.include? k.to_s
69
+ end
70
+ clean
71
+ end
64
72
  end
65
73
 
66
74
  module CaseInsensitiveHash
@@ -287,7 +287,9 @@ module Misc
287
287
  when Symbol
288
288
  obj.to_s
289
289
  when (defined?(Path) and Path)
290
- if Step === obj.resource
290
+ if defined?(Step) && Open.exists?(Step.info_file(obj))
291
+ obj2str(Workflow.load_step(obj))
292
+ elsif defined?(Step) && Step === obj.resource
291
293
  "Step file: " + obj
292
294
  else
293
295
  if obj.exists?
@@ -322,7 +324,11 @@ module Misc
322
324
  remove_long_items(obj)
323
325
  when File
324
326
  if obj.respond_to? :filename and obj.filename
325
- "<IO:" << obj.filename << "--" << mtime_str(obj.filename) << ">"
327
+ if defined?(Step) && Open.exists?(Step.info_file(obj.filename))
328
+ obj2str(Workflow.load_step(obj.filename))
329
+ else
330
+ "<IO:" << obj.filename << "--" << mtime_str(obj.filename) << ">"
331
+ end
326
332
  else
327
333
  "<IO:" << obj.path << "--" << mtime_str(obj.path) << ">"
328
334
  end
@@ -330,7 +336,11 @@ module Misc
330
336
  "<IO:" << obj.short_path << ">"
331
337
  when IO
332
338
  if obj.respond_to? :filename and obj.filename
333
- "<IO:" << obj.filename << "--" << mtime_str(obj.filename) << ">"
339
+ if defined?(Step) && Open.exists?(Step.info_file(obj.filename))
340
+ obj2str(Workflow.load_step(obj.filename))
341
+ else
342
+ "<IO:" << obj.filename << "--" << mtime_str(obj.filename) << ">"
343
+ end
334
344
  else
335
345
 
336
346
  if obj.respond_to? :obj2str
@@ -385,7 +385,7 @@ module Workflow
385
385
  next if default == v
386
386
  next if (String === default and Symbol === v and v.to_s == default)
387
387
  next if (Symbol === default and String === v and v == default.to_s)
388
- real_inputs[k] = v
388
+ real_inputs[k.to_sym] = v
389
389
  end
390
390
 
391
391
  jobname_input_value = inputs[jobname_input] || all_defaults[jobname_input]
@@ -410,6 +410,7 @@ module Workflow
410
410
  job.workflow = self
411
411
  job.clean_name = jobname
412
412
  job.overriden = overriden
413
+ job.real_inputs = real_inputs.keys
413
414
  job
414
415
  end
415
416
 
@@ -16,6 +16,10 @@ end
16
16
 
17
17
  module Workflow
18
18
 
19
+ def self.job_path?(path)
20
+ path.split("/")[-4] == "jobs"
21
+ end
22
+
19
23
  def log(status, message = nil, &block)
20
24
  Step.log(status, message, nil, &block)
21
25
  end
@@ -301,7 +305,9 @@ module Workflow
301
305
 
302
306
  def setup_override_dependency(dep, workflow, task_name)
303
307
  dep = Step === dep ? dep : Workflow.load_step(dep)
308
+ dep.workflow = workflow
304
309
  dep.info[:name] = dep.name
310
+ dep.original_task_name ||= dep.task_name if dep.workflow
305
311
  begin
306
312
  workflow = Kernel.const_get workflow if String === workflow
307
313
  dep.task = workflow.tasks[task_name] if dep.task.nil? && workflow.tasks.include?(task_name)
@@ -309,7 +315,7 @@ module Workflow
309
315
  Log.exception $!
310
316
  end
311
317
  dep.task_name = task_name
312
- dep.overriden = true
318
+ dep.overriden = dep.original_task_name.to_sym
313
319
  dep
314
320
  end
315
321
 
@@ -77,6 +77,7 @@ module Workflow
77
77
  task name do
78
78
  raise RbbtException, "dependency not found in dep_task" if dependencies.empty?
79
79
  dep = dependencies.last.join
80
+ raise dep.get_exception if dep.error?
80
81
  set_info :result_type, dep.info[:result_type]
81
82
  forget = config :forget_dep_tasks, :forget_dep_tasks, :default => FORGET_DEP_TASKS
82
83
  if forget
@@ -50,8 +50,8 @@ module Workflow
50
50
  case input_types[input]
51
51
  when :file
52
52
  Log.debug "Pointing #{ input } to #{file}"
53
- if file =~ /\.read$/
54
- inputs[input.to_sym] = Open.read(file)
53
+ if file =~ /\.yaml/
54
+ inputs[input.to_sym] = YAML.load(Open.read(file))
55
55
  else
56
56
  inputs[input.to_sym] = Open.realpath(file)
57
57
  end
@@ -12,6 +12,9 @@ class Step
12
12
  attr_accessor :exec
13
13
  attr_accessor :relocated
14
14
  attr_accessor :result, :mutex, :seen
15
+ attr_accessor :real_inputs, :original_task_name
16
+
17
+ RBBT_DEBUG_CLEAN = ENV["RBBT_DEBUG_CLEAN"] == 'true'
15
18
 
16
19
  class << self
17
20
  attr_accessor :lock_dir
@@ -143,11 +146,13 @@ class Step
143
146
  seen = []
144
147
  while path = deps.pop
145
148
  dep_info = archived_info[path]
146
- dep_info[:inputs].each do |k,v|
147
- all_inputs[k] = v unless all_inputs.include?(k)
148
- end if dep_info[:inputs]
149
- deps.concat(dep_info[:dependencies].collect{|p| p.last } - seen) if dep_info[:dependencies]
150
- deps.concat(dep_info[:archived_dependencies].collect{|p| p.last } - seen) if dep_info[:archived_dependencies]
149
+ if dep_info
150
+ dep_info[:inputs].each do |k,v|
151
+ all_inputs[k] = v unless all_inputs.include?(k)
152
+ end if dep_info[:inputs]
153
+ deps.concat(dep_info[:dependencies].collect{|p| p.last } - seen) if dep_info[:dependencies]
154
+ deps.concat(dep_info[:archived_dependencies].collect{|p| p.last } - seen) if dep_info[:archived_dependencies]
155
+ end
151
156
  seen << path
152
157
  end
153
158
 
@@ -454,6 +459,7 @@ class Step
454
459
  status << "not running" if ! done? && ! running?
455
460
  status.unshift " " if status.any?
456
461
  Log.high "Cleaning step: #{path}#{status * " "}"
462
+ Log.stack caller if RBBT_DEBUG_CLEAN
457
463
  abort if ! done? && running?
458
464
  Step.clean(path)
459
465
  self
@@ -8,6 +8,16 @@ class Step
8
8
  end
9
9
  end
10
10
 
11
+ def self.serialize_info(info)
12
+ info = info.clean_version if IndiferentHash === info
13
+ INFO_SERIALIZER.dump(info)
14
+ end
15
+
16
+ def self.load_serialized_info(io)
17
+ IndiferentHash.setup(INFO_SERIALIZER.load(io))
18
+ end
19
+
20
+
11
21
  def self.wait_for_jobs(jobs)
12
22
  jobs = [jobs] if Step === jobs
13
23
  begin
@@ -59,7 +69,7 @@ class Step
59
69
  def self.step_info(path)
60
70
  begin
61
71
  Open.open(info_file(path), :mode => 'rb') do |f|
62
- INFO_SERIALIZER.load(f)
72
+ self.load_serialized_info(f)
63
73
  end
64
74
  rescue Exception
65
75
  Log.exception $!
@@ -83,18 +93,22 @@ class Step
83
93
 
84
94
  Log.debug "Saving job input #{name} (#{type}) into #{path}"
85
95
  case
96
+ when Step === value
97
+ Open.ln_s(value.path, path)
98
+ when type.to_s == "file"
99
+ if String === value && File.exists?(value)
100
+ Open.ln_s(value, path)
101
+ else
102
+ Open.write(path + '.yaml', value.to_yaml)
103
+ end
86
104
  when Array === value
87
- Open.write(path, value * "\n")
105
+ Open.write(path, value.collect{|v| Step === v ? v.path : v.to_s} * "\n")
88
106
  when IO === value
89
- Open.write(path, value)
90
- when type == "file"
91
- if String === value && File.exists?(value)
92
- Open.link(value, path)
107
+ if value.filename && String === value.filename && File.exists?(value.filename)
108
+ Open.ln_s(value.filename, path)
93
109
  else
94
- Open.write(path + '.read', value.to_s)
110
+ Open.write(path, value)
95
111
  end
96
- when Step === value
97
- value = value.produce.load
98
112
  else
99
113
  Open.write(path, value.to_s)
100
114
  end
@@ -104,18 +118,24 @@ class Step
104
118
  def self.save_job_inputs(job, dir, options = nil)
105
119
  options = IndiferentHash.setup options.dup if options
106
120
 
107
- task_name = job.task_name
121
+ task_name = Symbol === job.overriden ? job.overriden : job.task_name
108
122
  workflow = job.workflow
109
123
  workflow = Kernel.const_get workflow if String === workflow
110
- task_info = workflow.task_info(task_name)
111
- input_types = task_info[:input_types]
112
- task_inputs = task_info[:inputs]
113
- input_defaults = task_info[:input_defaults]
124
+ if workflow
125
+ task_info = workflow.task_info(task_name)
126
+ input_types = task_info[:input_types]
127
+ task_inputs = task_info[:inputs]
128
+ input_defaults = task_info[:input_defaults]
129
+ else
130
+ task_info = input_types = task_inputs = input_defaults = {}
131
+ end
114
132
 
115
133
  inputs = {}
134
+ real_inputs = job.real_inputs || job.info[:real_inputs]
116
135
  job.recursive_inputs.zip(job.recursive_inputs.fields).each do |value,name|
117
136
  next unless task_inputs.include? name.to_sym
118
- next if options and ! options.include?(name)
137
+ next unless real_inputs.include? name.to_sym
138
+ next if options && ! options.include?(name)
119
139
  next if value.nil?
120
140
  next if input_defaults[name] == value
121
141
  inputs[name] = value
@@ -188,7 +208,7 @@ class Step
188
208
  info_lock.lock if check_lock and false
189
209
  begin
190
210
  Open.open(info_file, :mode => 'rb') do |file|
191
- IndiferentHash.setup(INFO_SERIALIZER.load(file)) #|| {}
211
+ Step.load_serialized_info(file)
192
212
  end
193
213
  ensure
194
214
  info_lock.unlock if check_lock and false
@@ -204,7 +224,7 @@ class Step
204
224
  Log.debug{"Error loading info file: " + info_file}
205
225
  Log.exception $!
206
226
  Open.rm info_file
207
- Misc.sensiblewrite(info_file, INFO_SERIALIZER.dump({:status => :error, :messages => ["Info file lost"]}))
227
+ Misc.sensiblewrite(info_file, Step.serialize_info({:status => :error, :messages => ["Info file lost"]}))
208
228
  raise $!
209
229
  end
210
230
  end
@@ -212,10 +232,10 @@ class Step
212
232
  def init_info(force = false)
213
233
  return nil if @exec || info_file.nil? || (Open.exists?(info_file) && ! force)
214
234
  Open.lock(info_file, :lock => info_lock) do
215
- i = {:status => :waiting, :pid => Process.pid, :path => path}
235
+ i = {:status => :waiting, :pid => Process.pid, :path => path, :real_inputs => real_inputs}
216
236
  i[:dependencies] = dependencies.collect{|dep| [dep.task_name, dep.name, dep.path]} if dependencies
217
- @info_cache = i
218
- Misc.sensiblewrite(info_file, INFO_SERIALIZER.dump(i), :force => true, :lock => false)
237
+ Misc.sensiblewrite(info_file, Step.serialize_info(i), :force => true, :lock => false)
238
+ @info_cache = IndiferentHash.setup(i)
219
239
  @info_cache_time = Time.now
220
240
  end
221
241
  end
@@ -227,9 +247,9 @@ class Step
227
247
  Open.lock(info_file, :lock => info_lock) do
228
248
  i = info(false).dup
229
249
  i[key] = value
250
+ dump = Step.serialize_info(i)
230
251
  @info_cache = IndiferentHash.setup(i)
231
- dump = INFO_SERIALIZER.dump(i)
232
- Misc.sensiblewrite(info_file, dump, :force => true, :lock => false)
252
+ Misc.sensiblewrite(info_file, dump, :force => true, :lock => false) if Open.exists?(info_file)
233
253
  @info_cache_time = Time.now
234
254
  value
235
255
  end
@@ -242,9 +262,9 @@ class Step
242
262
  Open.lock(info_file, :lock => info_lock) do
243
263
  i = info(false)
244
264
  i.merge! hash
265
+ dump = Step.serialize_info(i)
245
266
  @info_cache = IndiferentHash.setup(i)
246
- dump = INFO_SERIALIZER.dump(i)
247
- Misc.sensiblewrite(info_file, dump, :force => true, :lock => false)
267
+ Misc.sensiblewrite(info_file, dump, :force => true, :lock => false) if Open.exists?(info_file)
248
268
  @info_cache_time = Time.now
249
269
  value
250
270
  end
@@ -92,12 +92,18 @@ class Step
92
92
  (job.done? && job.dirty?) || (job.error? && job.dirty?) ||
93
93
  (!(job.noinfo? || job.done? || job.error? || job.aborted? || job.running?))
94
94
 
95
- job.clean unless job.resumable? && (job.updated? && ! job.dirty?)
95
+ if ! (job.resumable? && (job.updated? && ! job.dirty?))
96
+ Log.high "About to clean -- status: #{status}, present #{File.exists?(job.path)}, " +
97
+ %w(done? error? recoverable_error? noinfo? updated? dirty? aborted? running? resumable?).
98
+ collect{|v| [v, job.send(v)]*": "} * ", " if RBBT_DEBUG_CLEAN
99
+
100
+ job.clean
101
+ end
96
102
  job.set_info :status, :cleaned
97
103
  end
98
104
 
99
105
  job.dup_inputs unless status == 'done' or job.started?
100
- job.init_info(status == 'noinfo') unless status == 'waiting' || status == 'done' || job.started?
106
+ job.init_info(status == 'noinfo') unless status == 'waiting' || status == 'done' || job.started? || ! Workflow.job_path?(job.path)
101
107
 
102
108
  canfail = ComputeDependency === job && job.canfail?
103
109
  end
@@ -121,10 +127,9 @@ class Step
121
127
  end
122
128
 
123
129
  def input_dependencies
124
- inputs.flatten.select{|i| Step === i}
130
+ (inputs.flatten.select{|i| Step === i} + inputs.flatten.select{|dep| Path === dep && Step === dep.resource}.collect{|dep| dep.resource})
125
131
  end
126
132
 
127
-
128
133
  def execute_dependency(dependency, log = true)
129
134
  task_name = self.task_name
130
135
  canfail_paths = self.canfail_paths
@@ -112,7 +112,7 @@ class Step
112
112
  end
113
113
 
114
114
  def updatable?
115
- (ENV["RBBT_UPDATE_ALL_JOBS"] == 'true' || ( ENV["RBBT_UPDATE"] == "true" && Open.exists?(info_file)) && status != :noinfo && ! (relocated? && done?))
115
+ (ENV["RBBT_UPDATE_ALL_JOBS"] == 'true' || ( ENV["RBBT_UPDATE"] == "true" && Open.exists?(info_file)) && status != :noinfo && ! (relocated? && done?)) || (ENV["RBBT_UPDATE"] && ! (done? && ! Open.exists?(info_file)))
116
116
  end
117
117
 
118
118
  def dependency_checks
@@ -122,13 +122,12 @@ class Step
122
122
  reject{|dependency| (defined?(WorkflowRemoteClient) && WorkflowRemoteClient::RemoteStep === dependency) || Open.remote?(dependency.path) }.
123
123
  reject{|dependency| dependency.error? }.
124
124
  #select{|dependency| Open.exists?(dependency.path) || ((Open.exists?(dependency.info_file) && (dependency.status == :cleaned) || dependency.status == :waiting)) }.
125
- #select{|dependency| Open.exists?(dependency.path) || ((Open.exists?(dependency.info_file) && (dependency.status == :cleaned) || dependency.status == :waiting)) }.
126
125
  select{|dependency| dependency.updatable? }.
127
126
  collect{|dependency| Workflow.relocate_dependency(self, dependency)}
128
127
  end
129
128
 
130
129
  def input_checks
131
- inputs.select{|i| Step === i }.
130
+ (inputs.select{|i| Step === i } + inputs.select{|i| Path === i && Step === i.resource}.collect{|i| i.resource}).
132
131
  select{|dependency| dependency.updatable? }
133
132
  end
134
133
 
@@ -154,25 +153,28 @@ class Step
154
153
  canfail_paths = self.canfail_paths
155
154
  this_mtime = Open.mtime(self.path) if Open.exists?(self.path)
156
155
 
157
- checks.each do |dep|
158
- next unless dep.updatable?
159
- dep_done = dep.done?
156
+ outdated_time = checks.select{|dep| dep.updatable? && dep.done? && Persist.newer?(path, dep.path) }
157
+ outdated_dep = checks.reject{|dep| dep.done? || (dep.error? && ! dep.recoverable_error? && canfail_paths.include?(dep.path)) }
160
158
 
161
- begin
162
- if this_mtime && dep_done && Open.exists?(dep.path) && (Open.mtime(dep.path) > this_mtime + 1)
163
- outdated_time << dep
164
- end
165
- rescue
166
- end
159
+ #checks.each do |dep|
160
+ # next unless dep.updatable?
161
+ # dep_done = dep.done?
167
162
 
168
- # Is this pointless? this would mean some dep got updated after a later
169
- # dep but but before this one.
170
- #if (! dep.done? && ! canfail_paths.include?(dep.path)) || ! dep.updated?
163
+ # begin
164
+ # if this_mtime && dep_done && Open.exists?(dep.path) && (Open.mtime(dep.path) > this_mtime + 1)
165
+ # outdated_time << dep
166
+ # end
167
+ # rescue
168
+ # end
171
169
 
172
- if (! dep_done && ! canfail_paths.include?(dep.path))
173
- outdated_dep << dep
174
- end
175
- end
170
+ # # Is this pointless? this would mean some dep got updated after a later
171
+ # # dep but but before this one.
172
+ # #if (! dep.done? && ! canfail_paths.include?(dep.path)) || ! dep.updated?
173
+
174
+ # if (! dep_done && ! canfail_paths.include?(dep.path))
175
+ # outdated_dep << dep
176
+ # end
177
+ #end
176
178
 
177
179
  Log.high "Some newer files found: #{Misc.fingerprint outdated_time}" if outdated_time.any?
178
180
  Log.high "Some outdated files found: #{Misc.fingerprint outdated_dep}" if outdated_dep.any?
@@ -215,7 +217,7 @@ class Step
215
217
  no_load = :stream if no_load
216
218
 
217
219
  Open.write(pid_file, Process.pid.to_s) unless Open.exists?(path) or Open.exists?(pid_file)
218
- result_type = @task.result_type
220
+ result_type = @task.result_type if @task
219
221
  result_type = info[:result_type] if result_type.nil?
220
222
  result = Persist.persist "Job", result_type, :file => path, :check => persist_checks, :no_load => no_load do
221
223
  if Step === Step.log_relay_step and not self == Step.log_relay_step