scout-gear 10.11.4 → 10.11.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (157) hide show
  1. checksums.yaml +4 -4
  2. data/.vimproject +17 -2
  3. data/VERSION +1 -1
  4. data/bin/scout +10 -10
  5. data/lib/scout/association/fields.rb +15 -15
  6. data/lib/scout/association/index.rb +6 -6
  7. data/lib/scout/association/item.rb +18 -8
  8. data/lib/scout/association.rb +4 -4
  9. data/lib/scout/entity/identifiers.rb +5 -5
  10. data/lib/scout/entity/property.rb +2 -2
  11. data/lib/scout/entity.rb +1 -1
  12. data/lib/scout/knowledge_base/description.rb +10 -10
  13. data/lib/scout/knowledge_base/entity.rb +6 -6
  14. data/lib/scout/knowledge_base/list.rb +1 -1
  15. data/lib/scout/knowledge_base/query.rb +4 -4
  16. data/lib/scout/knowledge_base/registry.rb +6 -6
  17. data/lib/scout/knowledge_base/traverse.rb +7 -40
  18. data/lib/scout/persist/engine/fix_width_table.rb +6 -6
  19. data/lib/scout/persist/engine/packed_index.rb +2 -2
  20. data/lib/scout/persist/engine/sharder.rb +4 -4
  21. data/lib/scout/persist/engine/tkrzw.rb +1 -1
  22. data/lib/scout/persist/engine/tokyocabinet.rb +2 -2
  23. data/lib/scout/persist/tsv/adapter/fix_width_table.rb +1 -1
  24. data/lib/scout/persist/tsv/adapter/packed_index.rb +1 -1
  25. data/lib/scout/persist/tsv/adapter/tkrzw.rb +1 -1
  26. data/lib/scout/persist/tsv/adapter/tokyocabinet.rb +3 -3
  27. data/lib/scout/persist/tsv/serialize.rb +3 -3
  28. data/lib/scout/persist/tsv.rb +1 -1
  29. data/lib/scout/semaphore.rb +100 -17
  30. data/lib/scout/tsv/annotation/repo.rb +4 -4
  31. data/lib/scout/tsv/annotation.rb +2 -2
  32. data/lib/scout/tsv/attach.rb +7 -7
  33. data/lib/scout/tsv/change_id/translate.rb +1 -1
  34. data/lib/scout/tsv/csv.rb +3 -3
  35. data/lib/scout/tsv/dumper.rb +8 -8
  36. data/lib/scout/tsv/index.rb +1 -1
  37. data/lib/scout/tsv/open.rb +3 -3
  38. data/lib/scout/tsv/stream.rb +2 -2
  39. data/lib/scout/tsv/traverse.rb +4 -4
  40. data/lib/scout/tsv/util/filter.rb +9 -9
  41. data/lib/scout/tsv/util/process.rb +2 -2
  42. data/lib/scout/tsv/util/reorder.rb +2 -2
  43. data/lib/scout/tsv/util/select.rb +3 -3
  44. data/lib/scout/tsv/util/unzip.rb +2 -2
  45. data/lib/scout/tsv/util.rb +1 -1
  46. data/lib/scout/tsv.rb +2 -2
  47. data/lib/scout/work_queue/socket.rb +3 -2
  48. data/lib/scout/work_queue/worker.rb +4 -4
  49. data/lib/scout/work_queue.rb +7 -7
  50. data/lib/scout/workflow/definition.rb +18 -16
  51. data/lib/scout/workflow/deployment/local.rb +81 -62
  52. data/lib/scout/workflow/deployment/orchestrator/batches.rb +66 -5
  53. data/lib/scout/workflow/deployment/orchestrator/chains.rb +47 -30
  54. data/lib/scout/workflow/deployment/orchestrator/rules.rb +3 -3
  55. data/lib/scout/workflow/deployment/orchestrator/workload.rb +11 -22
  56. data/lib/scout/workflow/deployment/scheduler/job.rb +34 -36
  57. data/lib/scout/workflow/deployment/scheduler/lfs.rb +1 -1
  58. data/lib/scout/workflow/deployment/scheduler/pbs.rb +4 -4
  59. data/lib/scout/workflow/deployment/scheduler/slurm.rb +2 -2
  60. data/lib/scout/workflow/deployment/scheduler.rb +23 -12
  61. data/lib/scout/workflow/deployment/trace.rb +2 -2
  62. data/lib/scout/workflow/documentation.rb +4 -4
  63. data/lib/scout/workflow/export.rb +1 -1
  64. data/lib/scout/workflow/path.rb +2 -2
  65. data/lib/scout/workflow/step/children.rb +1 -1
  66. data/lib/scout/workflow/step/dependencies.rb +36 -3
  67. data/lib/scout/workflow/step/info.rb +5 -19
  68. data/lib/scout/workflow/step/inputs.rb +1 -1
  69. data/lib/scout/workflow/step/progress.rb +2 -2
  70. data/lib/scout/workflow/step/provenance.rb +4 -4
  71. data/lib/scout/workflow/step/status.rb +23 -9
  72. data/lib/scout/workflow/step.rb +21 -19
  73. data/lib/scout/workflow/task/dependencies.rb +10 -3
  74. data/lib/scout/workflow/task/info.rb +3 -3
  75. data/lib/scout/workflow/task/inputs.rb +8 -8
  76. data/lib/scout/workflow/task.rb +37 -22
  77. data/lib/scout/workflow/usage.rb +13 -13
  78. data/lib/scout/workflow/util.rb +1 -1
  79. data/lib/scout/workflow.rb +6 -6
  80. data/scout-gear.gemspec +4 -3
  81. data/scout_commands/alias +1 -1
  82. data/scout_commands/batch/clean +12 -12
  83. data/scout_commands/batch/list +26 -25
  84. data/scout_commands/batch/tail +9 -5
  85. data/scout_commands/cat +1 -1
  86. data/scout_commands/doc +2 -2
  87. data/scout_commands/entity +4 -4
  88. data/scout_commands/find +1 -1
  89. data/scout_commands/kb/config +1 -1
  90. data/scout_commands/kb/entities +1 -1
  91. data/scout_commands/kb/list +1 -1
  92. data/scout_commands/kb/query +2 -2
  93. data/scout_commands/kb/register +1 -1
  94. data/scout_commands/kb/show +1 -1
  95. data/scout_commands/kb/traverse +1 -1
  96. data/scout_commands/log +6 -6
  97. data/scout_commands/resource/produce +2 -2
  98. data/scout_commands/resource/sync +1 -1
  99. data/scout_commands/system/clean +7 -7
  100. data/scout_commands/system/status +4 -4
  101. data/scout_commands/template +1 -1
  102. data/scout_commands/update +1 -1
  103. data/scout_commands/workflow/cmd +2 -1
  104. data/scout_commands/workflow/example +123 -0
  105. data/scout_commands/workflow/info +10 -1
  106. data/scout_commands/workflow/install +1 -1
  107. data/scout_commands/workflow/list +2 -2
  108. data/scout_commands/workflow/process +2 -2
  109. data/scout_commands/workflow/prov +3 -3
  110. data/scout_commands/workflow/task +36 -11
  111. data/scout_commands/workflow/trace +1 -1
  112. data/scout_commands/workflow/write_info +2 -2
  113. data/share/templates/command +1 -1
  114. data/test/scout/association/test_item.rb +5 -0
  115. data/test/scout/entity/test_property.rb +3 -3
  116. data/test/scout/knowledge_base/test_description.rb +1 -1
  117. data/test/scout/knowledge_base/test_traverse.rb +2 -2
  118. data/test/scout/persist/engine/test_packed_index.rb +6 -6
  119. data/test/scout/persist/test_tsv.rb +4 -4
  120. data/test/scout/persist/tsv/adapter/test_packed_index.rb +4 -4
  121. data/test/scout/persist/tsv/adapter/test_sharder.rb +23 -23
  122. data/test/scout/persist/tsv/adapter/test_tokyocabinet.rb +1 -1
  123. data/test/scout/persist/tsv/test_serialize.rb +1 -1
  124. data/test/scout/test_association.rb +1 -1
  125. data/test/scout/test_tsv.rb +2 -2
  126. data/test/scout/test_workflow.rb +2 -2
  127. data/test/scout/tsv/test_annotation.rb +4 -4
  128. data/test/scout/tsv/test_index.rb +1 -1
  129. data/test/scout/tsv/test_open.rb +2 -2
  130. data/test/scout/tsv/test_parser.rb +2 -2
  131. data/test/scout/tsv/test_stream.rb +1 -1
  132. data/test/scout/tsv/test_transformer.rb +1 -1
  133. data/test/scout/tsv/util/test_filter.rb +1 -1
  134. data/test/scout/tsv/util/test_melt.rb +1 -1
  135. data/test/scout/tsv/util/test_reorder.rb +1 -1
  136. data/test/scout/work_queue/test_socket.rb +3 -3
  137. data/test/scout/work_queue/test_worker.rb +2 -2
  138. data/test/scout/workflow/deployment/orchestrator/test_batches.rb +13 -3
  139. data/test/scout/workflow/deployment/orchestrator/test_chains.rb +15 -13
  140. data/test/scout/workflow/deployment/orchestrator/test_workload.rb +1 -1
  141. data/test/scout/workflow/deployment/test_local.rb +2 -2
  142. data/test/scout/workflow/deployment/test_scheduler.rb +1 -2
  143. data/test/scout/workflow/step/test_children.rb +1 -1
  144. data/test/scout/workflow/step/test_dependencies.rb +36 -1
  145. data/test/scout/workflow/step/test_info.rb +3 -35
  146. data/test/scout/workflow/step/test_load.rb +1 -1
  147. data/test/scout/workflow/step/test_provenance.rb +1 -1
  148. data/test/scout/workflow/step/test_status.rb +33 -1
  149. data/test/scout/workflow/task/test_dependencies.rb +9 -7
  150. data/test/scout/workflow/task/test_inputs.rb +1 -1
  151. data/test/scout/workflow/test_definition.rb +1 -1
  152. data/test/scout/workflow/test_documentation.rb +1 -1
  153. data/test/scout/workflow/test_entity.rb +2 -2
  154. data/test/scout/workflow/test_step.rb +13 -13
  155. data/test/scout/workflow/test_usage.rb +1 -1
  156. data/test/test_helper.rb +1 -1
  157. metadata +3 -2
@@ -42,86 +42,95 @@ class Workflow::LocalExecutor
42
42
  @available_resources = IndiferentHash.setup(available_resources)
43
43
  @resources_requested = IndiferentHash.setup({})
44
44
  @resources_used = IndiferentHash.setup({})
45
+ Log.info "LocalExecutor initiated #{Log.fingerprint available_resources}"
45
46
  end
46
47
 
47
- def process_batches(batches)
48
+ def process_batches(batches, bar: true)
48
49
  retry_jobs = []
49
50
  failed_jobs = []
50
51
 
51
- while batches.reject{|b| Workflow::Orchestrator.done_batch?(b) }.any?
52
+ bar = {desc: "Processing batches"} if TrueClass === bar
53
+ bar = {bar: bar} if Log::ProgressBar === bar
54
+ Log::ProgressBar.with_bar batches.length, bar do |bar|
55
+ bar.init if bar
52
56
 
53
- candidates = Workflow::LocalExecutor.candidates(batches)
54
- top_level_jobs = candidates.collect{|batch| batch[:top_level] }
57
+ while (missing_batches = batches.reject{|b| Workflow::Orchestrator.done_batch?(b) }).any?
55
58
 
56
- raise NoWork, "No candidates and no running jobs #{Log.fingerprint batches}" if resources_used.empty? && top_level_jobs.empty?
59
+ bar.pos batches.select{|b| Workflow::Orchestrator.done_batch?(b) }.length if bar
57
60
 
58
- if candidates.reject{|batch| failed_jobs.include? batch[:top_level] }.empty? && resources_used.empty? && top_level_jobs.empty?
59
- exception = failed_jobs.collect(&:get_exception).compact.first
60
- if exception
61
- Log.warn 'Some work failed'
62
- raise exception
63
- else
64
- raise 'Some work failed'
65
- end
66
- end
61
+ candidates = Workflow::LocalExecutor.candidates(batches)
62
+ top_level_jobs = candidates.collect{|batch| batch[:top_level] }
67
63
 
68
- candidates.each do |batch|
69
- begin
64
+ raise NoWork, "No candidates and no running jobs #{Log.fingerprint batches}" if resources_used.empty? && top_level_jobs.empty?
70
65
 
71
- job = batch[:top_level]
66
+ if candidates.reject{|batch| failed_jobs.include? batch[:top_level] }.empty? && resources_used.empty? && top_level_jobs.empty?
67
+ exception = failed_jobs.collect(&:get_exception).compact.first
68
+ if exception
69
+ Log.warn 'Some work failed'
70
+ raise exception
71
+ else
72
+ raise 'Some work failed'
73
+ end
74
+ end
72
75
 
73
- case
74
- when (job.error? || job.aborted?)
75
- begin
76
- if job.recoverable_error?
77
- if retry_jobs.include?(job)
78
- Log.warn "Failed twice #{job.path} with recoverable error"
79
- retry_jobs.delete job
76
+ candidates.each do |batch|
77
+ begin
78
+
79
+ job = batch[:top_level]
80
+
81
+ case
82
+ when (job.error? || job.aborted?)
83
+ begin
84
+ if job.recoverable_error?
85
+ if retry_jobs.include?(job)
86
+ Log.warn "Failed twice #{job.path} with recoverable error"
87
+ retry_jobs.delete job
88
+ failed_jobs << job
89
+ next
90
+ else
91
+ retry_jobs << job
92
+ job.clean
93
+ raise TryAgain
94
+ end
95
+ else
80
96
  failed_jobs << job
97
+ Log.warn "Non-recoverable error in #{job.path}"
81
98
  next
82
- else
83
- retry_jobs << job
84
- job.clean
85
- raise TryAgain
86
99
  end
87
- else
88
- failed_jobs << job
89
- Log.warn "Non-recoverable error in #{job.path}"
90
- next
100
+ ensure
101
+ Log.warn "Releases resources from failed job: #{job.path}"
102
+ release_resources(job)
91
103
  end
92
- ensure
93
- Log.warn "Releases resources from failed job: #{job.path}"
104
+ when job.done?
105
+ Log.debug "Orchestrator done #{job.path}"
94
106
  release_resources(job)
107
+ clear_batch(batches, batch)
108
+ erase_job_dependencies(job, batches)
109
+ when job.running?
110
+ next
111
+
112
+ else
113
+ check_resources(batch) do
114
+ run_batch(batch)
115
+ end
95
116
  end
96
- when job.done?
97
- Log.debug "Orchestrator done #{job.path}"
98
- release_resources(job)
117
+ rescue TryAgain
118
+ retry
119
+ end
120
+ end
121
+
122
+ batches.each do |batch|
123
+ job = batch[:top_level]
124
+ if job.done? || job.aborted? || job.error?
125
+ job.join if job.done?
99
126
  clear_batch(batches, batch)
127
+ release_resources(job)
100
128
  erase_job_dependencies(job, batches)
101
- when job.running?
102
- next
103
-
104
- else
105
- check_resources(batch) do
106
- run_batch(batch)
107
- end
108
129
  end
109
- rescue TryAgain
110
- retry
111
130
  end
112
- end
113
131
 
114
- batches.each do |batch|
115
- job = batch[:top_level]
116
- if job.done? || job.aborted? || job.error?
117
- job.join if job.done?
118
- clear_batch(batches, batch)
119
- release_resources(job)
120
- erase_job_dependencies(job, batches)
121
- end
132
+ sleep timer
122
133
  end
123
-
124
- sleep timer
125
134
  end
126
135
 
127
136
  batches.each{|batch|
@@ -141,7 +150,16 @@ class Workflow::LocalExecutor
141
150
 
142
151
  def process(rules, jobs = nil)
143
152
  jobs, rules = rules, {} if jobs.nil?
144
- jobs = [jobs] if Step === jobs
153
+
154
+ if Step === jobs
155
+ jobs = [jobs]
156
+ end
157
+
158
+ if jobs.length == 1
159
+ bar = jobs.first.progress_bar("Process batches for #{jobs.first.short_path}")
160
+ else
161
+ bar = true
162
+ end
145
163
 
146
164
  batches = Workflow::Orchestrator.job_batches(rules, jobs)
147
165
  batches.each do |batch|
@@ -153,13 +171,13 @@ class Workflow::LocalExecutor
153
171
  batch[:rules] = rules
154
172
  end
155
173
 
156
- process_batches(batches)
174
+ process_batches(batches, bar: bar)
157
175
  end
158
176
 
159
177
  def release_resources(job)
160
178
  if resources_used[job]
161
179
  Log.debug "Orchestrator releasing resouces from #{job.path}"
162
- resources_used[job].each do |resource,value|
180
+ resources_used[job].each do |resource,value|
163
181
  next if resource == 'size'
164
182
  resources_requested[resource] -= value.to_i
165
183
  end
@@ -195,8 +213,9 @@ class Workflow::LocalExecutor
195
213
  def run_batch(batch)
196
214
  job, job_rules = batch.values_at :top_level, :rules
197
215
 
198
- rules = batch[:rules]
216
+ rules = batch[:rules]
199
217
  deploy = rules[:deploy] if rules
218
+ Log.debug "Processing #{deploy} #{job.short_path} #{Log.fingerprint job_rules}"
200
219
  case deploy
201
220
  when nil, 'local', :local, :serial, 'serial'
202
221
  Scout::Config.with_config do
@@ -207,7 +226,7 @@ class Workflow::LocalExecutor
207
226
  log = job_rules[:log] if job_rules
208
227
  log = Log.severity if log.nil?
209
228
  Log.with_severity log do
210
- job.fork
229
+ job.fork(true)
211
230
  end
212
231
  end
213
232
  when 'batch', 'sched', 'slurm', 'pbs', 'lsf'
@@ -69,6 +69,7 @@ class Workflow::Orchestrator
69
69
  task_name = job.task_name
70
70
  task_rules = task_specific_rules(rules, workflow, task_name)
71
71
  acc = accumulate_rules(acc, task_rules.dup)
72
+ acc
72
73
  end
73
74
 
74
75
  if chain = batch[:chain]
@@ -92,12 +93,20 @@ class Workflow::Orchestrator
92
93
  next if batch[:deps].nil?
93
94
 
94
95
  if batch[:deps].any?
95
- batch_dep_jobs = batch[:top_level].rec_dependencies
96
+ batch_dep_jobs = batch[:top_level].rec_dependencies.to_a
96
97
  target = batch[:deps].select do |target|
97
- batch_dep_jobs.include?(target[:top_level]) &&
98
- (batch[:deps] - [target] - target[:deps]).empty?
98
+ target_deps = []
99
+ stack = [target]
100
+ while stack.any?
101
+ c = stack.pop
102
+ target_deps << c
103
+ stack.concat c[:deps]
104
+ end
105
+ (batch[:deps] - target_deps).empty?
99
106
  end.first
100
107
  next if target.nil?
108
+ all_target_jobs = ([target] + target[:deps]).collect{|d| d[:jobs] }.flatten
109
+ next if all_target_jobs.reject{|j| batch_dep_jobs.include? j }.any?
101
110
  target[:jobs] = batch[:jobs] + target[:jobs]
102
111
  target[:deps] = (target[:deps] + batch[:deps]).uniq - [target]
103
112
  target[:top_level] = batch[:top_level]
@@ -119,12 +128,64 @@ class Workflow::Orchestrator
119
128
  jobs = [jobs] unless Array === jobs
120
129
 
121
130
  workload = job_workload(jobs)
122
- job_chains_map = jobs.inject([]){|acc,job| acc.concat(self.job_chains(rules, job)) }
131
+ job_chain_list = []
123
132
 
124
- batches = chain_batches(rules, job_chains_map, workload)
133
+ jobs.each do |job|
134
+ job_chains = self.job_chains(rules, job)
135
+ job_chains.each do |chain,list|
136
+ list.each do |info|
137
+ job_chain_list << [chain,info]
138
+ end
139
+ end
140
+ end
141
+
142
+ batches = chain_batches(rules, job_chain_list, workload)
125
143
  batches = add_batch_deps(batches)
126
144
  batches = add_rules_and_consolidate(rules, batches)
127
145
 
128
146
  batches
129
147
  end
148
+
149
+ def self.sort_batches(batches)
150
+ pending = batches.dup
151
+ sorted = []
152
+ while pending.any?
153
+ leaf_nodes = batches.select{|batch| batch[:deps].nil? || (batch[:deps] - sorted).empty? }
154
+ sorted.concat(leaf_nodes - sorted)
155
+ pending -= leaf_nodes
156
+ end
157
+ sorted
158
+ end
159
+
160
+ def self.errors_in_batch(batch)
161
+ errors = batch[:jobs].select do |job|
162
+ job.error? && ! job.recoverable_error?
163
+ end
164
+
165
+ errors.empty? ? false : errors
166
+ end
167
+
168
+ def self.clean_batches(batches)
169
+ error = []
170
+ batches.collect do |batch|
171
+ if failed = Workflow::Orchestrator.errors_in_batch(batch)
172
+ Log.warn "Batch contains errors #{batch[:top_level].short_path} #{Log.fingerprint failed}"
173
+ error << batch
174
+ next
175
+ elsif (error_deps = error & batch[:deps]).any?
176
+ if error_deps.reject{|b| b[:top_level].canfail? }.any?
177
+ Log.warn "Batch depends on batches with errors #{batch[:top_level].short_path} #{Log.fingerprint(error_deps.collect{|d| d[:top_level] })}"
178
+ error << batch
179
+ next
180
+ else
181
+ batch[:deps] -= error_deps
182
+ end
183
+ end
184
+ batch
185
+ end.compact
186
+ end
187
+
188
+ def self.inspect_batch(batch)
189
+ batch.merge(deps: batch[:deps].collect{|b| b[:top_level] })
190
+ end
130
191
  end
@@ -58,47 +58,64 @@ class Workflow::Orchestrator
58
58
  chains
59
59
  end
60
60
 
61
+ def self.add_chain(job_chains, match, info)
62
+ if job_chains[match]
63
+ current = job_chains[match]
64
+ new_info = {}
65
+ new_info[:jobs] = (current[:jobs] + info[:jobs]).uniq
66
+ if current[:top_level].rec_dependencies.include?(info[:top_level]) ||
67
+ current[:top_level].input_dependencies.include?(info[:top_level])
68
+ new_info[:top_level] = current[:top_level]
69
+ else
70
+ new_info[:top_level] = info[:top_level]
71
+ end
72
+ job_chains[match] = new_info
73
+ else
74
+ job_chains[match] = info
75
+ end
76
+ end
77
+
61
78
  def self.job_chains(rules, job, computed = {})
62
- key = Log.fingerprint([rules, job.path, job.object_id])
79
+ chains = parse_chains(rules)
80
+ key = Log.fingerprint([job.path, job.object_id, chains])
63
81
  return computed[key] if computed.has_key?(key)
64
82
 
65
- chains = parse_chains(rules)
66
- matches = check_chains(chains, job)
67
- dependencies = job_dependencies(job)
83
+ job_chains = check_chains(chains, job)
84
+ job_batches = {}
85
+ new_batches = {}
86
+ job_dependencies(job).each do |dep|
87
+ dep_chains = check_chains(chains, dep)
88
+ common_chains = job_chains & dep_chains
68
89
 
69
- job_chains = []
70
- new_job_chains = {}
71
- dependencies.each do |dep|
72
- dep_matches = check_chains(chains, dep)
73
- common = matches & dep_matches
90
+ dep_batches = job_chains(rules, dep, computed)
74
91
 
75
- dep_chains = job_chains(rules, dep, computed)
76
92
  found = []
77
- dep_chains.each do |match, info|
78
- if common.include?(match)
79
- found << match
80
- new_info = new_job_chains[match] ||= {}
81
- new_info[:jobs] ||= []
82
- new_info[:jobs].concat info[:jobs]
83
- new_info[:top_level] = job
93
+ common_chains.each do |chain|
94
+ info = new_batches[chain]
95
+ info = {top_level: job, jobs: [job]} if info.nil?
96
+ if dep_batches[chain]
97
+ found << chain
98
+ dep_batches[chain].each do |dep_info|
99
+ info[:jobs] += dep_info[:jobs] - info[:jobs]
100
+ end
84
101
  else
85
- job_chains << [match, info]
102
+ info[:jobs] << dep
86
103
  end
104
+ new_batches[chain] = info
87
105
  end
88
106
 
89
- (common - found).each do |match|
90
- info = {}
91
- info[:jobs] = [job, dep]
92
- info[:top_level] = job
93
- job_chains << [match, info]
94
- end
95
- end
96
-
97
- new_job_chains.each do |match, info|
98
- info[:jobs].prepend job
99
- job_chains << [match, info]
107
+ dep_batches.each do |chain,list|
108
+ next if found.include? chain
109
+ job_batches[chain] ||= []
110
+ job_batches[chain].concat list
100
111
  end
112
+ end
101
113
 
102
- computed[key] = job_chains
114
+ new_batches.each do |match, info|
115
+ job_batches[match] ||= []
116
+ job_batches[match] << info
103
117
  end
118
+
119
+ computed[key] = job_batches
120
+ end
104
121
  end
@@ -47,7 +47,7 @@ class Workflow::Orchestrator
47
47
  case k.to_s
48
48
  when "config_keys"
49
49
  current[k] = add_config_keys current["config_keys"], value
50
- when "cpus"
50
+ when "task_cpus", 'cpus'
51
51
  # choose max
52
52
  vals = [current[k], value].compact.map{|v| v.to_i }
53
53
  current[k] = vals.max unless vals.empty?
@@ -183,7 +183,7 @@ class Workflow::Orchestrator
183
183
  r = rules_block[:resources] || {}
184
184
  r = IndiferentHash.setup r
185
185
 
186
- r = IndiferentHash.add_defaults r,
186
+ r = IndiferentHash.add_defaults r,
187
187
  cpus: rules_block[:cpus] || rules_block[:task_cpus] || 1,
188
188
  time: rules_block[:time]
189
189
 
@@ -237,7 +237,7 @@ class Workflow::Orchestrator
237
237
  merge_rule_file(acc, file_rules)
238
238
  end
239
239
  end
240
-
240
+
241
241
  def self.load_rules_for_job(jobs)
242
242
  jobs = [jobs] unless Array === jobs
243
243
 
@@ -1,32 +1,21 @@
1
1
  class Workflow::Orchestrator
2
2
 
3
- def self.job_workload(jobs)
4
- workload = []
5
- path_jobs = {}
3
+ def self.prepare_for_execution(job)
4
+ rec_dependencies = job.rec_dependencies(true)
6
5
 
7
- jobs = [jobs] unless Array === jobs
6
+ return if rec_dependencies.empty?
8
7
 
9
- jobs.each do |job|
10
- path_jobs[job.path] = job
11
- end
8
+ all_deps = rec_dependencies + [job]
12
9
 
13
- heap = []
14
- heap += jobs.collect(&:path)
15
- while job_path = heap.pop
16
- j = path_jobs[job_path]
17
- next if j.done?
18
- workload << j
19
-
20
- deps = job_dependencies(j)
21
- deps.each do |d|
22
- path_jobs[d.path] ||= d
10
+ all_deps.each do |dep|
11
+ begin
12
+ dep.clean if (dep.error? && dep.recoverable_error?) ||
13
+ dep.aborted? || (dep.done? && ! dep.updated?)
14
+ rescue RbbtException
15
+ Log.exception $!
16
+ next
23
17
  end
24
-
25
- heap.concat deps.collect(&:path)
26
- heap.uniq!
27
18
  end
28
-
29
- path_jobs
30
19
  end
31
20
 
32
21
  def self.job_workload(jobs)
@@ -1,5 +1,5 @@
1
1
  module SchedulerJob
2
- @batch_base_dir = File.expand_path(File.join('~/scout-batch'))
2
+ @batch_base_dir = File.expand_path(File.join('~/scout-batch'))
3
3
  self.singleton_class.attr_accessor :batch_base_dir
4
4
 
5
5
  module_function
@@ -27,7 +27,7 @@ module SchedulerJob
27
27
 
28
28
  singularity_img, singularity_opt_dir, singularity_ruby_inline, singularity_mounts = options.values_at :singularity_img, :singularity_opt_dir, :singularity_ruby_inline, :singularity_mounts
29
29
 
30
- singularity_cmd = %(singularity exec -e -B "#{File.expand_path singularity_opt_dir}":/singularity_opt/ -B "#{File.expand_path singularity_ruby_inline}":"/.singularity_ruby_inline":rw )
30
+ singularity_cmd = %(singularity exec -e -B "#{File.expand_path singularity_opt_dir}":/singularity_opt/ -B "#{File.expand_path singularity_ruby_inline}":"/.singularity_ruby_inline":rw )
31
31
 
32
32
  if singularity_mounts
33
33
  singularity_mounts.split(",").each do |mount|
@@ -37,7 +37,7 @@ module SchedulerJob
37
37
 
38
38
  if contain && options[:hardened]
39
39
  singularity_cmd << %( -C -H "#{contain}" \
40
- -B "/.singularity_ruby_inline":"#{contain}/.singularity_ruby_inline":rw
40
+ -B "/.singularity_ruby_inline":"#{contain}/.singularity_ruby_inline":rw
41
41
  -B "#{options[:batch_dir]}" \
42
42
  -B /scratch/tmp \
43
43
  #{ group != user_group ? "-B /gpfs/projects/#{user_group}" : "" } \
@@ -81,17 +81,15 @@ module SchedulerJob
81
81
 
82
82
  task = job.task_name
83
83
 
84
- if job.recursive_overriden_deps.any?
85
- override_deps = job.recursive_overriden_deps.
86
- select do |dep| Symbol === dep.overriden end.
87
- collect do |dep|
88
- o_workflow = dep.overriden_workflow || dep.workflow
89
- o_workflow = o_workflow.name if o_workflow.respond_to?(:name)
90
- o_task_name = dep.overriden_task || dep.task.name
91
- name = [o_workflow, o_task_name] * "#"
92
- [name, dep.path] * "="
93
- end.uniq * ","
94
- options[:override_deps] = override_deps unless override_deps.empty?
84
+ if job.overriden?
85
+ override_deps = job.recursive_overrider_deps.collect do |dep|
86
+ o_workflow = dep.overriden_workflow || dep.workflow
87
+ o_workflow = o_workflow.name if o_workflow.respond_to?(:name)
88
+ o_task_name = dep.overriden_task || dep.task.name
89
+ name = [o_workflow, o_task_name] * "#"
90
+ [name, dep.path] * "="
91
+ end.uniq * ","
92
+ options[:override_deps] = override_deps unless override_deps.empty?
95
93
  end
96
94
 
97
95
  # Save inputs into inputs_dir (only if provided)
@@ -192,7 +190,7 @@ workflow task #{workflow} #{task} #{cmds}
192
190
  keys_from_config.each do |key|
193
191
  next unless batch_options.include? key
194
192
  default_value = Scout::Config.get(key, "batch_#{key}", "batch")
195
- next if default_value.nil?
193
+ next if default_value.nil?
196
194
  IndiferentHash.add_defaults batch_options, default_value
197
195
  end
198
196
 
@@ -211,7 +209,7 @@ workflow task #{workflow} #{task} #{cmds}
211
209
  batch_options[:contain] = File.join(contain_base, random_file)
212
210
  end
213
211
 
214
- batch_options[:sync] ||= "~/.scout/var/jobs"
212
+ batch_options[:sync] ||= "~/.scout/var/jobs"
215
213
  batch_options[:wipe_container] ||= 'post'
216
214
  end
217
215
 
@@ -219,23 +217,23 @@ workflow task #{workflow} #{task} #{cmds}
219
217
  options[:workdir_all] = batch_options[:contain]
220
218
  end
221
219
 
222
- IndiferentHash.add_defaults batch_options,
220
+ IndiferentHash.add_defaults batch_options,
223
221
  :batch_name => batch_name,
224
- :inputs_dir => inputs_dir,
225
- :nodes => 1,
222
+ :inputs_dir => inputs_dir,
223
+ :nodes => 1,
226
224
  :step_path => job.path,
227
225
  :task_cpus => 1,
228
- :time => '2min',
226
+ :time => '2min',
229
227
  :env => {'JDK_JAVA_OPTIONS' => "-Xms1g -Xmx${MAX_MEMORY}m"},
230
228
  :singularity_img => ENV["SINGULARITY_IMG"] || "~/scout.singularity.img",
231
229
  :singularity_ruby_inline => ENV["SINGULARITY_RUBY_INLINE"] || "~/.singularity_ruby_inline",
232
230
  :singularity_opt_dir => ENV["SINGULARITY_OPT_DIR"] || "~/singularity_opt",
233
- :workdir => Dir.pwd
231
+ :workdir => Dir.pwd
234
232
 
235
233
  exec_cmd = exec_cmd(job, batch_options)
236
234
  scout_cmd = scout_job_exec_cmd(job, options)
237
235
 
238
- IndiferentHash.add_defaults batch_options,
236
+ IndiferentHash.add_defaults batch_options,
239
237
  :exec_cmd => exec_cmd,
240
238
  :scout_cmd => scout_cmd
241
239
 
@@ -344,7 +342,7 @@ batch_erase_contain_dir()
344
342
  function batch_sync_contain_dir(){
345
343
  mkdir -p "$(dirname '#{sync}')"
346
344
  rsync -avztAXHP --copy-unsafe-links "#{source}/" "#{sync}/" 2>1 >> '#{options[:fsync]}'
347
- sync_es="$?"
345
+ sync_es="$?"
348
346
  echo $sync_es > '#{options[:fsexit]}'
349
347
  find '#{sync}' -type l -ls | awk '$13 ~ /^#{sync.gsub('/','\/')}/ { sub("#{source}", "#{sync}", $13); print $11, $13 }' | while read A B; do rm $A; ln -s $B $A; done
350
348
  }
@@ -353,7 +351,7 @@ function batch_sync_contain_dir(){
353
351
 
354
352
  if options[:env]
355
353
  prepare_environment +=<<-EOF
356
- # Set ENV variables
354
+ # Set ENV variables
357
355
  #{options[:env].collect{|n,v| "export #{n}=\"#{v}\"" } * "\n"}
358
356
  EOF
359
357
  end
@@ -384,7 +382,7 @@ for tmpd in persist_locks produce_locks R_sockets sensiblewrite sensiblewrit
384
382
  mkdir -p "#{contain}/.scout/tmp/$tmpd"
385
383
  done
386
384
 
387
- # Copy environment
385
+ # Copy environment
388
386
  cp ~/.scout/etc/environment #{contain}/.scout/etc/
389
387
 
390
388
  # Set search_paths
@@ -406,7 +404,7 @@ echo "user_scratch: #{scratch_group_dir}/#{user}/{PKGDIR}/{TOPLEVEL}/{SUBPATH}"
406
404
  exec_cmd, job_cmd, task_cpus = options.values_at :exec_cmd, :scout_cmd, :task_cpus
407
405
 
408
406
  script=<<-EOF
409
- step_path=$(
407
+ step_path=$(
410
408
  #{exec_cmd} #{job_cmd} --printpath
411
409
  )
412
410
  exit_status=$?
@@ -426,7 +424,7 @@ fi
426
424
 
427
425
  if options[:sync]
428
426
  sync_environment +=<<-EOF
429
- if [ $exit_status == '0' ]; then
427
+ if [ $exit_status == '0' ]; then
430
428
  batch_sync_contain_dir
431
429
  else
432
430
  sync_es=$exit_status
@@ -441,8 +439,8 @@ fi
441
439
  cleanup_environment = ""
442
440
 
443
441
  cleanup_environment +=<<-EOF if options[:purge_deps]
444
- if [ $exit_status == '0' ]; then
445
- #{options[:exec_cmd]} workflow forget_deps --purge --recursive_purge "$step_path" 2>1 >> '#{options[:fsync]}'
442
+ if [ $exit_status == '0' ]; then
443
+ #{options[:exec_cmd]} workflow forget_deps --purge --recursive_purge "$step_path" 2>1 >> '#{options[:fsync]}'
446
444
  fi
447
445
  EOF
448
446
 
@@ -453,7 +451,7 @@ batch_erase_contain_dir
453
451
  EOF
454
452
  elsif options[:wipe_container] == 'post' || options[:wipe_container] == 'both'
455
453
  cleanup_environment +=<<-EOF
456
- if [ $sync_es == '0' -a $empty_contain_dir == 'true' ]; then
454
+ if [ $sync_es == '0' -a $empty_contain_dir == 'true' ]; then
457
455
  batch_erase_contain_dir
458
456
  fi
459
457
  EOF
@@ -512,7 +510,7 @@ exit $exit_status
512
510
  env > #{batch_options[:fenv]}
513
511
 
514
512
  # #{Log.color :green, "2. Execute"}
515
- #{execute}
513
+ #{execute}
516
514
 
517
515
  # #{Log.color :green, "3. Sync and cleanup environment"}
518
516
  #{sync_environment}
@@ -555,13 +553,13 @@ env > #{batch_options[:fenv]}
555
553
  def run_job(job, options = {})
556
554
  system = self.to_s.split("::").last
557
555
 
558
- batch_base_dir, clean_batch_job, remove_batch_dir, procpath, tail, batch_dependencies, dry_run, orchestration_rules_file = IndiferentHash.process_options options,
556
+ batch_base_dir, clean_batch_job, remove_batch_dir, procpath, tail, batch_dependencies, dry_run, orchestration_rules_file = IndiferentHash.process_options options,
559
557
  :batch_base_dir, :clean_batch_job, :remove_batch_dir, :batch_procpath, :tail, :batch_dependencies, :dry_run, :orchestration_rules,
560
558
  :batch_base_dir => SchedulerJob.batch_base_dir
561
559
 
562
560
  if (batch_job = job.info[:batch_job]) && job_queued(batch_job)
563
561
  Log.info "Job #{job.short_path} already queued in #{batch_job}"
564
- return batch_job, batch_dir_for_id(batch_base_dir, batch_job)
562
+ return batch_job, batch_dir_for_id(batch_base_dir, batch_job)
565
563
  end
566
564
 
567
565
  if job.running?
@@ -570,7 +568,7 @@ env > #{batch_options[:fenv]}
570
568
  if job.info[:batch_job]
571
569
  return job.info[:batch_job], batch_dir_for_id(batch_base_dir, batch_job)
572
570
  else
573
- return
571
+ return
574
572
  end
575
573
  end
576
574
 
@@ -582,8 +580,8 @@ env > #{batch_options[:fenv]}
582
580
  workflows_to_load = job.rec_dependencies.select{|d| Step === d}.collect{|d| d.workflow }.compact.collect(&:to_s) - [workflow.to_s]
583
581
 
584
582
  TmpFile.with_file(nil, remove_batch_dir, :tmpdir => batch_base_dir, :prefix => "#{system}_scout_job-#{workflow.to_s}-#{task_name}-") do |batch_dir|
585
- IndiferentHash.add_defaults options,
586
- :batch_dir => batch_dir,
583
+ IndiferentHash.add_defaults options,
584
+ :batch_dir => batch_dir,
587
585
  :inputs_dir => File.join(batch_dir, "inputs_dir"),
588
586
  :workflows => workflows_to_load.any? ? workflows_to_load.uniq * "," : nil
589
587