scout-gear 8.0.0 → 8.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. checksums.yaml +4 -4
  2. data/.vimproject +26 -9
  3. data/Rakefile +6 -1
  4. data/VERSION +1 -1
  5. data/bin/scout +15 -4
  6. data/doc/lib/scout/path.md +35 -0
  7. data/doc/lib/scout/workflow/task.md +13 -0
  8. data/lib/scout/cmd.rb +23 -24
  9. data/lib/scout/concurrent_stream.rb +36 -19
  10. data/lib/scout/exceptions.rb +10 -0
  11. data/lib/scout/log/color.rb +11 -11
  12. data/lib/scout/log/progress/report.rb +7 -5
  13. data/lib/scout/log/progress/util.rb +3 -0
  14. data/lib/scout/log/trap.rb +3 -3
  15. data/lib/scout/log.rb +64 -36
  16. data/lib/scout/meta_extension.rb +34 -0
  17. data/lib/scout/misc/digest.rb +11 -2
  18. data/lib/scout/misc/format.rb +12 -7
  19. data/lib/scout/misc/monitor.rb +11 -0
  20. data/lib/scout/misc/system.rb +48 -0
  21. data/lib/scout/named_array.rb +8 -0
  22. data/lib/scout/offsite/ssh.rb +171 -0
  23. data/lib/scout/offsite/step.rb +83 -0
  24. data/lib/scout/offsite/sync.rb +55 -0
  25. data/lib/scout/offsite.rb +3 -0
  26. data/lib/scout/open/lock.rb +5 -24
  27. data/lib/scout/open/remote.rb +12 -1
  28. data/lib/scout/open/stream.rb +110 -122
  29. data/lib/scout/open/util.rb +9 -0
  30. data/lib/scout/open.rb +5 -4
  31. data/lib/scout/path/find.rb +15 -10
  32. data/lib/scout/path/util.rb +5 -0
  33. data/lib/scout/persist/serialize.rb +3 -3
  34. data/lib/scout/persist.rb +1 -1
  35. data/lib/scout/resource/path.rb +4 -0
  36. data/lib/scout/resource/util.rb +10 -4
  37. data/lib/scout/tsv/dumper.rb +2 -0
  38. data/lib/scout/tsv/index.rb +28 -86
  39. data/lib/scout/tsv/open.rb +35 -14
  40. data/lib/scout/tsv/parser.rb +9 -2
  41. data/lib/scout/tsv/persist/tokyocabinet.rb +2 -0
  42. data/lib/scout/tsv/stream.rb +204 -0
  43. data/lib/scout/tsv/transformer.rb +11 -0
  44. data/lib/scout/tsv.rb +9 -2
  45. data/lib/scout/work_queue/worker.rb +2 -2
  46. data/lib/scout/work_queue.rb +36 -12
  47. data/lib/scout/workflow/definition.rb +2 -1
  48. data/lib/scout/workflow/deployment/orchestrator.rb +245 -0
  49. data/lib/scout/workflow/deployment.rb +1 -0
  50. data/lib/scout/workflow/step/dependencies.rb +37 -11
  51. data/lib/scout/workflow/step/file.rb +5 -0
  52. data/lib/scout/workflow/step/info.rb +5 -3
  53. data/lib/scout/workflow/step/load.rb +1 -1
  54. data/lib/scout/workflow/step/provenance.rb +1 -0
  55. data/lib/scout/workflow/step/status.rb +6 -8
  56. data/lib/scout/workflow/step.rb +75 -30
  57. data/lib/scout/workflow/task/dependencies.rb +114 -0
  58. data/lib/scout/workflow/task/inputs.rb +27 -13
  59. data/lib/scout/workflow/task.rb +9 -108
  60. data/lib/scout/workflow/usage.rb +40 -12
  61. data/lib/scout/workflow.rb +4 -2
  62. data/lib/scout-gear.rb +2 -0
  63. data/lib/scout.rb +6 -0
  64. data/scout-gear.gemspec +32 -7
  65. data/scout_commands/doc +37 -0
  66. data/scout_commands/find +1 -0
  67. data/scout_commands/offsite +30 -0
  68. data/scout_commands/update +29 -0
  69. data/scout_commands/workflow/info +15 -3
  70. data/scout_commands/workflow/install +102 -0
  71. data/scout_commands/workflow/task +26 -5
  72. data/test/scout/offsite/test_ssh.rb +15 -0
  73. data/test/scout/offsite/test_step.rb +33 -0
  74. data/test/scout/offsite/test_sync.rb +36 -0
  75. data/test/scout/offsite/test_task.rb +0 -0
  76. data/test/scout/resource/test_path.rb +6 -0
  77. data/test/scout/test_named_array.rb +6 -0
  78. data/test/scout/test_persist.rb +3 -2
  79. data/test/scout/test_tsv.rb +17 -0
  80. data/test/scout/test_work_queue.rb +63 -41
  81. data/test/scout/tsv/persist/test_adapter.rb +1 -1
  82. data/test/scout/tsv/test_index.rb +14 -0
  83. data/test/scout/tsv/test_parser.rb +14 -0
  84. data/test/scout/tsv/test_stream.rb +200 -0
  85. data/test/scout/tsv/test_transformer.rb +12 -0
  86. data/test/scout/workflow/deployment/test_orchestrator.rb +272 -0
  87. data/test/scout/workflow/step/test_dependencies.rb +68 -0
  88. data/test/scout/workflow/step/test_info.rb +18 -0
  89. data/test/scout/workflow/step/test_status.rb +0 -1
  90. data/test/scout/workflow/task/test_dependencies.rb +355 -0
  91. data/test/scout/workflow/task/test_inputs.rb +53 -0
  92. data/test/scout/workflow/test_definition.rb +18 -0
  93. data/test/scout/workflow/test_documentation.rb +24 -0
  94. data/test/scout/workflow/test_step.rb +109 -0
  95. data/test/scout/workflow/test_task.rb +0 -287
  96. data/test/test_scout.rb +9 -0
  97. metadata +83 -5
  98. data/scout_commands/workflow/task_old +0 -706
@@ -1,5 +1,6 @@
1
1
  require_relative 'work_queue/socket'
2
2
  require_relative 'work_queue/worker'
3
+ require 'timeout'
3
4
 
4
5
  class WorkQueue
5
6
  attr_accessor :workers, :worker_proc, :callback
@@ -38,7 +39,7 @@ class WorkQueue
38
39
  @worker_mutex.synchronize do
39
40
  worker = @workers.index{|w| w.pid == pid}
40
41
  if worker
41
- Log.debug "Removed worker #{pid}"
42
+ Log.low "Removed worker #{pid}"
42
43
  @workers.delete_at(worker)
43
44
  @removed_workers << pid
44
45
  end
@@ -88,19 +89,32 @@ class WorkQueue
88
89
 
89
90
  Thread.pass until @reader["name"]
90
91
 
92
+ Thread.pass until @worker_mutex.synchronize{ @workers.select{|w| w.pid.nil? }.empty? }
93
+
91
94
  @waiter = Thread.new do
92
- begin
93
- Thread.current.report_on_exception = false
94
- Thread.current["name"] = "Worker waiter #{Process.pid}"
95
- while true
96
- pid = Process.wait
97
- remove_worker(pid)
98
- break if @worker_mutex.synchronize{ @workers.empty? }
95
+ Thread.current.report_on_exception = false
96
+ Thread.current["name"] = "Worker waiter #{Process.pid}"
97
+ while true
98
+ break if @worker_mutex.synchronize{ @workers.empty? }
99
+ begin
100
+ Timeout.timeout(1) do
101
+ begin
102
+ pid, status = Process.wait2
103
+ remove_worker(pid) if pid
104
+ rescue Exception
105
+ Log.exception $!
106
+ end
107
+ end
108
+ rescue Timeout::Error
109
+ pids = @worker_mutex.synchronize{ @workers.collect{|w| w.pid } }
110
+ pids.each do |p|
111
+ pid, status = Process.wait2 p, Process::WNOHANG
112
+ remove_worker(pid) if pid
113
+ end
99
114
  end
100
115
  end
101
116
  end
102
117
 
103
- Thread.pass until @worker_mutex.synchronize{ @workers.select{|w| w.pid.nil? }.empty? }
104
118
  Thread.pass until @waiter["name"]
105
119
  end
106
120
 
@@ -128,8 +142,18 @@ class WorkQueue
128
142
  end
129
143
  end
130
144
 
131
- def join
132
- @waiter.join if @waiter
133
- @reader.join if @reader
145
+ def clean
146
+ @waiter.join if @waiter
147
+ @input.clean
148
+ @output.clean
149
+ end
150
+
151
+ def join(clean = true)
152
+ begin
153
+ @waiter.join if @waiter
154
+ @reader.join if @reader
155
+ ensure
156
+ self.clean if clean
157
+ end
134
158
  end
135
159
  end
@@ -50,7 +50,7 @@ module Workflow
50
50
 
51
51
  def directory=(directory)
52
52
  @directory = directory
53
- @tasks.each{|name,d| d.directory = directory[name] } if @tasks
53
+ @tasks.each{|name,d| d.directory = Path === directory ? directory[name] : File.join(directory, name.to_s) } if @tasks
54
54
  end
55
55
 
56
56
  def annotate_next_task(type, obj)
@@ -103,6 +103,7 @@ module Workflow
103
103
  def task(name_and_type, &block)
104
104
  name, type = name_and_type.collect.first
105
105
  @tasks ||= IndiferentHash.setup({})
106
+ block = self.method(name) if block.nil?
106
107
  begin
107
108
  @annotate_next_task ||= {}
108
109
  @annotate_next_task[:extension] ||=
@@ -0,0 +1,245 @@
1
+ module Workflow
2
+ class Orchestrator
3
+
4
+ def self.job_workload(job)
5
+ workload = {job => []}
6
+ return workload if job.done? && job.updated?
7
+
8
+ job.dependencies.each do |dep|
9
+ next if dep.done? && job.updated?
10
+ workload.merge!(job_workload(dep))
11
+ workload[job] += workload[dep]
12
+ workload[job] << dep
13
+ workload[job].uniq!
14
+ end
15
+
16
+ job.input_dependencies.each do |dep|
17
+ next if dep.done? && job.updated?
18
+ workload.merge!(job_workload(dep))
19
+ workload[job] += workload[dep]
20
+ workload[job] << dep
21
+ workload[job].uniq!
22
+ end
23
+
24
+ workload
25
+ end
26
+
27
+ def self.workload(jobs)
28
+ jobs.inject({}) do |acc,job|
29
+ Orchestrator.job_workload(job).each do |j,d|
30
+ acc[j] = d unless acc.keys.collect{|k| k.path }.include? j.path
31
+ end
32
+ acc
33
+ end
34
+ end
35
+
36
+ def self.job_rules(rules, job)
37
+ workflow = job.workflow.to_s
38
+ task_name = job.task_name.to_s
39
+ defaults = rules["defaults"] || {}
40
+
41
+ return IndiferentHash.setup(defaults) unless rules[workflow]
42
+ return IndiferentHash.setup(defaults) unless rules[workflow][task_name]
43
+
44
+ job_rules = IndiferentHash.setup(rules[workflow][task_name])
45
+ defaults.each{|k,v| job_rules[k] = v if job_rules[k].nil? } if defaults
46
+ job_rules
47
+ end
48
+
49
+ def self.purge_duplicates(candidates)
50
+ seen = Set.new
51
+ candidates.select do |job|
52
+ if seen.include? job.path
53
+ false
54
+ else
55
+ seen << job.path
56
+ true
57
+ end
58
+ end
59
+ end
60
+
61
+ def self.job_resources(rules, job)
62
+ resources = (job_rules(rules, job) || {})["resources"] || {}
63
+
64
+ IndiferentHash.setup(resources)
65
+
66
+ default_resources = rules["default_resources"]
67
+ default_resources ||= rules["defaults"]["resources"] if rules["defaults"]
68
+ default_resources ||= {}
69
+
70
+ default_resources.each{|k,v| resources[k] ||= v } if default_resources
71
+
72
+ resources = {:cpus => 1} if resources.empty?
73
+ resources
74
+ end
75
+
76
+ def self.sort_candidates(candidates, rules)
77
+ seen = Set.new
78
+ candidates.sort_by do |job|
79
+ - job_resources(rules, job).values.inject(0){|acc,e| acc += e}
80
+ end
81
+ end
82
+
83
+ def self.candidates(workload, rules)
84
+ if rules.empty?
85
+ candidates = workload.
86
+ select{|k,v| v.empty? }.
87
+ collect{|k,v| k }.
88
+ reject{|k| k.done? }
89
+ else
90
+ candidates = workload. #select{|k,v| Orchestrator.job_rules(rules, k) }.
91
+ select{|k,v| v.empty? }.
92
+ collect{|k,v| k }.
93
+ reject{|k| k.done? }
94
+ end
95
+
96
+ top_level = workload.keys - workload.values.flatten
97
+
98
+ candidates = purge_duplicates candidates
99
+ candidates = sort_candidates candidates, rules
100
+
101
+ candidates
102
+ end
103
+
104
+ def self.process(*args)
105
+ self.new.process(*args)
106
+ end
107
+
108
+ attr_accessor :available_resources, :resources_requested, :resources_used, :timer
109
+
110
+ def initialize(timer = 5, available_resources = {})
111
+ available_resources = {:cpus => Etc.nprocessors } if available_resources.nil?
112
+ @timer = timer
113
+ @available_resources = IndiferentHash.setup(available_resources)
114
+ @resources_requested = IndiferentHash.setup({})
115
+ @resources_used = IndiferentHash.setup({})
116
+ end
117
+
118
+ def release_resources(job)
119
+ if resources_used[job]
120
+ Log.debug "Orchestrator releasing resouces from #{job.path}"
121
+ resources_used[job].each do |resource,value|
122
+ next if resource == 'size'
123
+ resources_requested[resource] -= value.to_i
124
+ end
125
+ resources_used.delete job
126
+ end
127
+ end
128
+
129
+ def check_resources(rules, job)
130
+ resources = Orchestrator.job_resources(rules, job)
131
+
132
+ limit_resources = resources.select{|resource,value| available_resources[resource] && ((resources_requested[resource] || 0) + value) > available_resources[resource] }.collect{|resource,v| resource }
133
+ if limit_resources.any?
134
+ Log.debug "Orchestrator waiting on #{job.path} due to #{limit_resources * ", "}"
135
+ else
136
+
137
+ resources_used[job] = resources
138
+ resources.each do |resource,value|
139
+ resources_requested[resource] ||= 0
140
+ resources_requested[resource] += value.to_i
141
+ end
142
+ Log.low "Orchestrator producing #{job.path} with resources #{resources}"
143
+
144
+ return yield
145
+ end
146
+ end
147
+
148
+ def run_with_rules(rules, job)
149
+ job_rules = Orchestrator.job_rules(rules, job)
150
+
151
+ Scout::Config.with_config do
152
+ job_rules[:config_keys].each do |config|
153
+ Scout::Config.process_config config
154
+ end if job_rules && job_rules[:config_keys]
155
+
156
+ log = job_rules[:log] if job_rules
157
+ log = Log.severity if log.nil?
158
+ Log.with_severity log do
159
+ job.fork
160
+ end
161
+ end
162
+ end
163
+
164
+ def erase_job_dependencies(job, rules, all_jobs, top_level_jobs)
165
+ job.dependencies.each do |dep|
166
+ next if top_level_jobs.include? dep.path
167
+ next unless Orchestrator.job_rules(rules, dep)["erase"].to_s == 'true'
168
+
169
+ dep_path = dep.path
170
+ parents = all_jobs.select do |parent|
171
+ paths = parent.info[:dependencies].nil? ? parent.dependencies.collect{|d| d.path } : parent.info[:dependencies].collect{|d| d.last }
172
+ paths.include? dep_path
173
+ end
174
+
175
+ next unless parents.reject{|parent| parent.done? }.empty?
176
+
177
+ parents.each do |parent|
178
+ Log.high "Erasing #{dep.path} from #{parent.path}"
179
+ parent.archive_deps
180
+ parent.copy_files_dir
181
+ parent.dependencies = parent.dependencies - [dep]
182
+ end
183
+ dep.clean
184
+ end
185
+ end
186
+
187
+ def process(rules, jobs = nil)
188
+ jobs, rules = rules, {} if jobs.nil?
189
+ jobs = [jobs] if Step === jobs
190
+ begin
191
+
192
+ workload = Orchestrator.workload(jobs)
193
+ all_jobs = workload.keys
194
+
195
+ top_level_jobs = jobs.collect{|job| job.path }
196
+ while workload.any?
197
+
198
+ candidates = resources_used.keys + Orchestrator.candidates(workload, rules)
199
+ candidates.uniq!
200
+ raise "No candidates and no running jobs" if candidates.empty?
201
+
202
+ candidates.each do |job|
203
+ case
204
+ when (job.error? || job.aborted?)
205
+ begin
206
+ if job.recoverable_error?
207
+ job.clean
208
+ raise TryAgain
209
+ else
210
+ next
211
+ end
212
+ ensure
213
+ Log.warn "Releases resources from failed job: #{job.path}"
214
+ release_resources(job)
215
+ end
216
+ when job.done?
217
+ Log.debug "Orchestrator done #{job.path}"
218
+ release_resources(job)
219
+ erase_job_dependencies(job, rules, all_jobs, top_level_jobs)
220
+
221
+ when job.running?
222
+ next
223
+
224
+ else
225
+ check_resources(rules, job) do
226
+ run_with_rules(rules, job)
227
+ end
228
+ end
229
+ end
230
+
231
+ new_workload = {}
232
+ workload.each do |k,v|
233
+ next if k.done? || k.error? || k.aborted?
234
+ #new_workload[k] = v.reject{|d| d.done? || ((d.error? || d.aborted?) && ! d.recoverable_error?)}
235
+ new_workload[k] = v.reject{|d| d.done? || d.error? || d.aborted?}
236
+ end
237
+ workload = new_workload
238
+ sleep timer
239
+ end
240
+ rescue TryAgain
241
+ retry
242
+ end
243
+ end
244
+ end
245
+ end
@@ -0,0 +1 @@
1
+ require_relative 'deployment/orchestrator'
@@ -13,15 +13,21 @@ class Step
13
13
 
14
14
  def input_dependencies
15
15
  return [] unless inputs
16
- inputs.select do |d|
17
- Step === d
18
- end
16
+ inputs.collect do |d|
17
+ if Step === d
18
+ d
19
+ elsif (Path === d) && (Step === d.pkgdir)
20
+ d.pkgdir
21
+ else
22
+ nil
23
+ end
24
+ end.compact.uniq
19
25
  end
20
26
 
21
27
  def prepare_dependencies
22
28
  inverse_dep = {}
23
- dependencies.each{|dep|
24
- if dep.present? && ! dep.updated?
29
+ dependencies.each{|dep|
30
+ if dep.present? && ! dep.updated?
25
31
  Log.debug "Clean outdated #{dep.path}"
26
32
  dep.clean
27
33
  end
@@ -29,12 +35,12 @@ class Step
29
35
  if dep.dependencies
30
36
  dep.dependencies.each do |d|
31
37
  inverse_dep[d] ||= []
32
- inverse_dep[d] << dep
38
+ inverse_dep[d] << dep
33
39
  end
34
40
  end
35
41
  input_dependencies.each do |d|
36
42
  inverse_dep[d] ||= []
37
- inverse_dep[d] << dep
43
+ inverse_dep[d] << dep
38
44
  end
39
45
  }
40
46
  inverse_dep.each do |dep,list|
@@ -43,7 +49,25 @@ class Step
43
49
  end
44
50
 
45
51
  def run_dependencies
46
- dependencies.each{|dep| dep.run(true) unless dep.running? || dep.done? }
52
+ dependencies.each{|dep|
53
+ next if dep.running? || dep.done?
54
+ compute_options = compute[dep.path] if compute
55
+ compute_options = [] if compute_options.nil?
56
+
57
+ stream = compute_options.include?(:stream)
58
+ stream = true unless ENV["SCOUT_EXPLICIT_STREAMING"] == 'true'
59
+ stream = false if compute_options.include?(:produce)
60
+
61
+ begin
62
+ dep.run(stream)
63
+ rescue ScoutException
64
+ if compute_options.include?(:canfail)
65
+ Log.medium "Allow failing of #{dep.path}"
66
+ else
67
+ raise $!
68
+ end
69
+ end
70
+ }
47
71
  end
48
72
 
49
73
  def abort_dependencies
@@ -52,9 +76,11 @@ class Step
52
76
 
53
77
  def self.wait_for_jobs(jobs)
54
78
  threads = []
55
- jobs.each do |job|
56
- threads << job.join
79
+ jobs.each do |job|
80
+ threads << Thread.new{ job.join }
81
+ end
82
+ threads.each do |t|
83
+ t.join
57
84
  end
58
- threads.each do |t| t.join end
59
85
  end
60
86
  end
@@ -3,6 +3,7 @@ class Step
3
3
  @files_dir ||= begin
4
4
  dir = @path + ".files"
5
5
  @path.annotate(dir) if Path === @path
6
+ dir.pkgdir = self
6
7
  dir
7
8
  end
8
9
  end
@@ -12,4 +13,8 @@ class Step
12
13
  Path.setup(dir) unless Path === dir
13
14
  dir[file]
14
15
  end
16
+
17
+ def bundle_files
18
+ [path, info_file, Dir.glob(File.join(files_dir,"**/*"))].flatten.select{|f| Open.exist?(f) }
19
+ end
15
20
  end
@@ -43,7 +43,7 @@ class Step
43
43
  new_info.each do |key,value|
44
44
  if key == :status
45
45
  message = new_info[:messages]
46
- if message.nil? && value == :done || value == :error || value == :aborted
46
+ if message.nil? && (value == :done || value == :error || value == :aborted)
47
47
  start = info[:start]
48
48
  eend = new_info[:end]
49
49
  if start && eend
@@ -54,6 +54,7 @@ class Step
54
54
  end
55
55
  report_status value, message
56
56
  end
57
+
57
58
  if Exception === value
58
59
  begin
59
60
  Marshal.dump(value)
@@ -67,6 +68,7 @@ class Step
67
68
  value = new
68
69
  end
69
70
  end
71
+
70
72
  if info.include?(key)
71
73
  case info[key]
72
74
  when Array
@@ -114,11 +116,11 @@ class Step
114
116
  end
115
117
 
116
118
  def error?
117
- status == :error
119
+ status == :error || status == 'error'
118
120
  end
119
121
 
120
122
  def aborted?
121
- status == :aborted
123
+ status == :aborted || status == 'aborted'
122
124
  end
123
125
 
124
126
  def running?
@@ -12,7 +12,7 @@ class Step
12
12
 
13
13
  def self.load(path)
14
14
  path = relocate(path) unless Open.exists?(path)
15
- raise "Could not load #{path}" unless Open.exists?(path)
15
+ #raise "Could not load #{path}" unless Open.exists?(path)
16
16
  s = Step.new path
17
17
  end
18
18
  end
@@ -91,6 +91,7 @@ class Step
91
91
  info[:task_name] = task
92
92
  path = step.path
93
93
  status = info[:status] || :missing
94
+ status = status.to_sym if String === status
94
95
  status = :noinfo if status == :missing && Open.exist?(path)
95
96
  status = "remote" if Open.remote?(path) || Open.ssh?(path)
96
97
  name = info[:name] || File.basename(path)
@@ -1,7 +1,11 @@
1
1
  class Step
2
2
  def abort(exception = nil)
3
- while @result && streaming? && stream = self.stream
4
- stream.abort(exception)
3
+ if info[:pid] != Process.pid && Misc.alive?(pid)
4
+ Process.kill pid
5
+ else
6
+ while @result && streaming? && stream = self.stream
7
+ stream.abort(exception)
8
+ end
5
9
  end
6
10
  end
7
11
 
@@ -28,12 +32,6 @@ class Step
28
32
  Open.rm_rf files_dir if Open.exist?(files_dir)
29
33
  end
30
34
 
31
- def present?
32
- Open.exist?(path) &&
33
- Open.exist?(info_file) &&
34
- Open.exist?(files_dir)
35
- end
36
-
37
35
 
38
36
  def recursive_clean
39
37
  dependencies.each do |dep|