scout-gear 8.0.0 → 8.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (98) hide show
  1. checksums.yaml +4 -4
  2. data/.vimproject +26 -9
  3. data/Rakefile +6 -1
  4. data/VERSION +1 -1
  5. data/bin/scout +15 -4
  6. data/doc/lib/scout/path.md +35 -0
  7. data/doc/lib/scout/workflow/task.md +13 -0
  8. data/lib/scout/cmd.rb +23 -24
  9. data/lib/scout/concurrent_stream.rb +36 -19
  10. data/lib/scout/exceptions.rb +10 -0
  11. data/lib/scout/log/color.rb +11 -11
  12. data/lib/scout/log/progress/report.rb +7 -5
  13. data/lib/scout/log/progress/util.rb +3 -0
  14. data/lib/scout/log/trap.rb +3 -3
  15. data/lib/scout/log.rb +64 -36
  16. data/lib/scout/meta_extension.rb +34 -0
  17. data/lib/scout/misc/digest.rb +11 -2
  18. data/lib/scout/misc/format.rb +12 -7
  19. data/lib/scout/misc/monitor.rb +11 -0
  20. data/lib/scout/misc/system.rb +48 -0
  21. data/lib/scout/named_array.rb +8 -0
  22. data/lib/scout/offsite/ssh.rb +171 -0
  23. data/lib/scout/offsite/step.rb +83 -0
  24. data/lib/scout/offsite/sync.rb +55 -0
  25. data/lib/scout/offsite.rb +3 -0
  26. data/lib/scout/open/lock.rb +5 -24
  27. data/lib/scout/open/remote.rb +12 -1
  28. data/lib/scout/open/stream.rb +110 -122
  29. data/lib/scout/open/util.rb +9 -0
  30. data/lib/scout/open.rb +5 -4
  31. data/lib/scout/path/find.rb +15 -10
  32. data/lib/scout/path/util.rb +5 -0
  33. data/lib/scout/persist/serialize.rb +3 -3
  34. data/lib/scout/persist.rb +1 -1
  35. data/lib/scout/resource/path.rb +4 -0
  36. data/lib/scout/resource/util.rb +10 -4
  37. data/lib/scout/tsv/dumper.rb +2 -0
  38. data/lib/scout/tsv/index.rb +28 -86
  39. data/lib/scout/tsv/open.rb +35 -14
  40. data/lib/scout/tsv/parser.rb +9 -2
  41. data/lib/scout/tsv/persist/tokyocabinet.rb +2 -0
  42. data/lib/scout/tsv/stream.rb +204 -0
  43. data/lib/scout/tsv/transformer.rb +11 -0
  44. data/lib/scout/tsv.rb +9 -2
  45. data/lib/scout/work_queue/worker.rb +2 -2
  46. data/lib/scout/work_queue.rb +36 -12
  47. data/lib/scout/workflow/definition.rb +2 -1
  48. data/lib/scout/workflow/deployment/orchestrator.rb +245 -0
  49. data/lib/scout/workflow/deployment.rb +1 -0
  50. data/lib/scout/workflow/step/dependencies.rb +37 -11
  51. data/lib/scout/workflow/step/file.rb +5 -0
  52. data/lib/scout/workflow/step/info.rb +5 -3
  53. data/lib/scout/workflow/step/load.rb +1 -1
  54. data/lib/scout/workflow/step/provenance.rb +1 -0
  55. data/lib/scout/workflow/step/status.rb +6 -8
  56. data/lib/scout/workflow/step.rb +75 -30
  57. data/lib/scout/workflow/task/dependencies.rb +114 -0
  58. data/lib/scout/workflow/task/inputs.rb +27 -13
  59. data/lib/scout/workflow/task.rb +9 -108
  60. data/lib/scout/workflow/usage.rb +40 -12
  61. data/lib/scout/workflow.rb +4 -2
  62. data/lib/scout-gear.rb +2 -0
  63. data/lib/scout.rb +6 -0
  64. data/scout-gear.gemspec +32 -7
  65. data/scout_commands/doc +37 -0
  66. data/scout_commands/find +1 -0
  67. data/scout_commands/offsite +30 -0
  68. data/scout_commands/update +29 -0
  69. data/scout_commands/workflow/info +15 -3
  70. data/scout_commands/workflow/install +102 -0
  71. data/scout_commands/workflow/task +26 -5
  72. data/test/scout/offsite/test_ssh.rb +15 -0
  73. data/test/scout/offsite/test_step.rb +33 -0
  74. data/test/scout/offsite/test_sync.rb +36 -0
  75. data/test/scout/offsite/test_task.rb +0 -0
  76. data/test/scout/resource/test_path.rb +6 -0
  77. data/test/scout/test_named_array.rb +6 -0
  78. data/test/scout/test_persist.rb +3 -2
  79. data/test/scout/test_tsv.rb +17 -0
  80. data/test/scout/test_work_queue.rb +63 -41
  81. data/test/scout/tsv/persist/test_adapter.rb +1 -1
  82. data/test/scout/tsv/test_index.rb +14 -0
  83. data/test/scout/tsv/test_parser.rb +14 -0
  84. data/test/scout/tsv/test_stream.rb +200 -0
  85. data/test/scout/tsv/test_transformer.rb +12 -0
  86. data/test/scout/workflow/deployment/test_orchestrator.rb +272 -0
  87. data/test/scout/workflow/step/test_dependencies.rb +68 -0
  88. data/test/scout/workflow/step/test_info.rb +18 -0
  89. data/test/scout/workflow/step/test_status.rb +0 -1
  90. data/test/scout/workflow/task/test_dependencies.rb +355 -0
  91. data/test/scout/workflow/task/test_inputs.rb +53 -0
  92. data/test/scout/workflow/test_definition.rb +18 -0
  93. data/test/scout/workflow/test_documentation.rb +24 -0
  94. data/test/scout/workflow/test_step.rb +109 -0
  95. data/test/scout/workflow/test_task.rb +0 -287
  96. data/test/test_scout.rb +9 -0
  97. metadata +83 -5
  98. data/scout_commands/workflow/task_old +0 -706
@@ -1,5 +1,6 @@
1
1
  require_relative 'work_queue/socket'
2
2
  require_relative 'work_queue/worker'
3
+ require 'timeout'
3
4
 
4
5
  class WorkQueue
5
6
  attr_accessor :workers, :worker_proc, :callback
@@ -38,7 +39,7 @@ class WorkQueue
38
39
  @worker_mutex.synchronize do
39
40
  worker = @workers.index{|w| w.pid == pid}
40
41
  if worker
41
- Log.debug "Removed worker #{pid}"
42
+ Log.low "Removed worker #{pid}"
42
43
  @workers.delete_at(worker)
43
44
  @removed_workers << pid
44
45
  end
@@ -88,19 +89,32 @@ class WorkQueue
88
89
 
89
90
  Thread.pass until @reader["name"]
90
91
 
92
+ Thread.pass until @worker_mutex.synchronize{ @workers.select{|w| w.pid.nil? }.empty? }
93
+
91
94
  @waiter = Thread.new do
92
- begin
93
- Thread.current.report_on_exception = false
94
- Thread.current["name"] = "Worker waiter #{Process.pid}"
95
- while true
96
- pid = Process.wait
97
- remove_worker(pid)
98
- break if @worker_mutex.synchronize{ @workers.empty? }
95
+ Thread.current.report_on_exception = false
96
+ Thread.current["name"] = "Worker waiter #{Process.pid}"
97
+ while true
98
+ break if @worker_mutex.synchronize{ @workers.empty? }
99
+ begin
100
+ Timeout.timeout(1) do
101
+ begin
102
+ pid, status = Process.wait2
103
+ remove_worker(pid) if pid
104
+ rescue Exception
105
+ Log.exception $!
106
+ end
107
+ end
108
+ rescue Timeout::Error
109
+ pids = @worker_mutex.synchronize{ @workers.collect{|w| w.pid } }
110
+ pids.each do |p|
111
+ pid, status = Process.wait2 p, Process::WNOHANG
112
+ remove_worker(pid) if pid
113
+ end
99
114
  end
100
115
  end
101
116
  end
102
117
 
103
- Thread.pass until @worker_mutex.synchronize{ @workers.select{|w| w.pid.nil? }.empty? }
104
118
  Thread.pass until @waiter["name"]
105
119
  end
106
120
 
@@ -128,8 +142,18 @@ class WorkQueue
128
142
  end
129
143
  end
130
144
 
131
- def join
132
- @waiter.join if @waiter
133
- @reader.join if @reader
145
+ def clean
146
+ @waiter.join if @waiter
147
+ @input.clean
148
+ @output.clean
149
+ end
150
+
151
+ def join(clean = true)
152
+ begin
153
+ @waiter.join if @waiter
154
+ @reader.join if @reader
155
+ ensure
156
+ self.clean if clean
157
+ end
134
158
  end
135
159
  end
@@ -50,7 +50,7 @@ module Workflow
50
50
 
51
51
  def directory=(directory)
52
52
  @directory = directory
53
- @tasks.each{|name,d| d.directory = directory[name] } if @tasks
53
+ @tasks.each{|name,d| d.directory = Path === directory ? directory[name] : File.join(directory, name.to_s) } if @tasks
54
54
  end
55
55
 
56
56
  def annotate_next_task(type, obj)
@@ -103,6 +103,7 @@ module Workflow
103
103
  def task(name_and_type, &block)
104
104
  name, type = name_and_type.collect.first
105
105
  @tasks ||= IndiferentHash.setup({})
106
+ block = self.method(name) if block.nil?
106
107
  begin
107
108
  @annotate_next_task ||= {}
108
109
  @annotate_next_task[:extension] ||=
@@ -0,0 +1,245 @@
1
+ module Workflow
2
+ class Orchestrator
3
+
4
+ def self.job_workload(job)
5
+ workload = {job => []}
6
+ return workload if job.done? && job.updated?
7
+
8
+ job.dependencies.each do |dep|
9
+ next if dep.done? && job.updated?
10
+ workload.merge!(job_workload(dep))
11
+ workload[job] += workload[dep]
12
+ workload[job] << dep
13
+ workload[job].uniq!
14
+ end
15
+
16
+ job.input_dependencies.each do |dep|
17
+ next if dep.done? && job.updated?
18
+ workload.merge!(job_workload(dep))
19
+ workload[job] += workload[dep]
20
+ workload[job] << dep
21
+ workload[job].uniq!
22
+ end
23
+
24
+ workload
25
+ end
26
+
27
+ def self.workload(jobs)
28
+ jobs.inject({}) do |acc,job|
29
+ Orchestrator.job_workload(job).each do |j,d|
30
+ acc[j] = d unless acc.keys.collect{|k| k.path }.include? j.path
31
+ end
32
+ acc
33
+ end
34
+ end
35
+
36
+ def self.job_rules(rules, job)
37
+ workflow = job.workflow.to_s
38
+ task_name = job.task_name.to_s
39
+ defaults = rules["defaults"] || {}
40
+
41
+ return IndiferentHash.setup(defaults) unless rules[workflow]
42
+ return IndiferentHash.setup(defaults) unless rules[workflow][task_name]
43
+
44
+ job_rules = IndiferentHash.setup(rules[workflow][task_name])
45
+ defaults.each{|k,v| job_rules[k] = v if job_rules[k].nil? } if defaults
46
+ job_rules
47
+ end
48
+
49
+ def self.purge_duplicates(candidates)
50
+ seen = Set.new
51
+ candidates.select do |job|
52
+ if seen.include? job.path
53
+ false
54
+ else
55
+ seen << job.path
56
+ true
57
+ end
58
+ end
59
+ end
60
+
61
+ def self.job_resources(rules, job)
62
+ resources = (job_rules(rules, job) || {})["resources"] || {}
63
+
64
+ IndiferentHash.setup(resources)
65
+
66
+ default_resources = rules["default_resources"]
67
+ default_resources ||= rules["defaults"]["resources"] if rules["defaults"]
68
+ default_resources ||= {}
69
+
70
+ default_resources.each{|k,v| resources[k] ||= v } if default_resources
71
+
72
+ resources = {:cpus => 1} if resources.empty?
73
+ resources
74
+ end
75
+
76
+ def self.sort_candidates(candidates, rules)
77
+ seen = Set.new
78
+ candidates.sort_by do |job|
79
+ - job_resources(rules, job).values.inject(0){|acc,e| acc += e}
80
+ end
81
+ end
82
+
83
+ def self.candidates(workload, rules)
84
+ if rules.empty?
85
+ candidates = workload.
86
+ select{|k,v| v.empty? }.
87
+ collect{|k,v| k }.
88
+ reject{|k| k.done? }
89
+ else
90
+ candidates = workload. #select{|k,v| Orchestrator.job_rules(rules, k) }.
91
+ select{|k,v| v.empty? }.
92
+ collect{|k,v| k }.
93
+ reject{|k| k.done? }
94
+ end
95
+
96
+ top_level = workload.keys - workload.values.flatten
97
+
98
+ candidates = purge_duplicates candidates
99
+ candidates = sort_candidates candidates, rules
100
+
101
+ candidates
102
+ end
103
+
104
+ def self.process(*args)
105
+ self.new.process(*args)
106
+ end
107
+
108
+ attr_accessor :available_resources, :resources_requested, :resources_used, :timer
109
+
110
+ def initialize(timer = 5, available_resources = {})
111
+ available_resources = {:cpus => Etc.nprocessors } if available_resources.nil?
112
+ @timer = timer
113
+ @available_resources = IndiferentHash.setup(available_resources)
114
+ @resources_requested = IndiferentHash.setup({})
115
+ @resources_used = IndiferentHash.setup({})
116
+ end
117
+
118
+ def release_resources(job)
119
+ if resources_used[job]
120
+ Log.debug "Orchestrator releasing resouces from #{job.path}"
121
+ resources_used[job].each do |resource,value|
122
+ next if resource == 'size'
123
+ resources_requested[resource] -= value.to_i
124
+ end
125
+ resources_used.delete job
126
+ end
127
+ end
128
+
129
+ def check_resources(rules, job)
130
+ resources = Orchestrator.job_resources(rules, job)
131
+
132
+ limit_resources = resources.select{|resource,value| available_resources[resource] && ((resources_requested[resource] || 0) + value) > available_resources[resource] }.collect{|resource,v| resource }
133
+ if limit_resources.any?
134
+ Log.debug "Orchestrator waiting on #{job.path} due to #{limit_resources * ", "}"
135
+ else
136
+
137
+ resources_used[job] = resources
138
+ resources.each do |resource,value|
139
+ resources_requested[resource] ||= 0
140
+ resources_requested[resource] += value.to_i
141
+ end
142
+ Log.low "Orchestrator producing #{job.path} with resources #{resources}"
143
+
144
+ return yield
145
+ end
146
+ end
147
+
148
+ def run_with_rules(rules, job)
149
+ job_rules = Orchestrator.job_rules(rules, job)
150
+
151
+ Scout::Config.with_config do
152
+ job_rules[:config_keys].each do |config|
153
+ Scout::Config.process_config config
154
+ end if job_rules && job_rules[:config_keys]
155
+
156
+ log = job_rules[:log] if job_rules
157
+ log = Log.severity if log.nil?
158
+ Log.with_severity log do
159
+ job.fork
160
+ end
161
+ end
162
+ end
163
+
164
+ def erase_job_dependencies(job, rules, all_jobs, top_level_jobs)
165
+ job.dependencies.each do |dep|
166
+ next if top_level_jobs.include? dep.path
167
+ next unless Orchestrator.job_rules(rules, dep)["erase"].to_s == 'true'
168
+
169
+ dep_path = dep.path
170
+ parents = all_jobs.select do |parent|
171
+ paths = parent.info[:dependencies].nil? ? parent.dependencies.collect{|d| d.path } : parent.info[:dependencies].collect{|d| d.last }
172
+ paths.include? dep_path
173
+ end
174
+
175
+ next unless parents.reject{|parent| parent.done? }.empty?
176
+
177
+ parents.each do |parent|
178
+ Log.high "Erasing #{dep.path} from #{parent.path}"
179
+ parent.archive_deps
180
+ parent.copy_files_dir
181
+ parent.dependencies = parent.dependencies - [dep]
182
+ end
183
+ dep.clean
184
+ end
185
+ end
186
+
187
+ def process(rules, jobs = nil)
188
+ jobs, rules = rules, {} if jobs.nil?
189
+ jobs = [jobs] if Step === jobs
190
+ begin
191
+
192
+ workload = Orchestrator.workload(jobs)
193
+ all_jobs = workload.keys
194
+
195
+ top_level_jobs = jobs.collect{|job| job.path }
196
+ while workload.any?
197
+
198
+ candidates = resources_used.keys + Orchestrator.candidates(workload, rules)
199
+ candidates.uniq!
200
+ raise "No candidates and no running jobs" if candidates.empty?
201
+
202
+ candidates.each do |job|
203
+ case
204
+ when (job.error? || job.aborted?)
205
+ begin
206
+ if job.recoverable_error?
207
+ job.clean
208
+ raise TryAgain
209
+ else
210
+ next
211
+ end
212
+ ensure
213
+ Log.warn "Releases resources from failed job: #{job.path}"
214
+ release_resources(job)
215
+ end
216
+ when job.done?
217
+ Log.debug "Orchestrator done #{job.path}"
218
+ release_resources(job)
219
+ erase_job_dependencies(job, rules, all_jobs, top_level_jobs)
220
+
221
+ when job.running?
222
+ next
223
+
224
+ else
225
+ check_resources(rules, job) do
226
+ run_with_rules(rules, job)
227
+ end
228
+ end
229
+ end
230
+
231
+ new_workload = {}
232
+ workload.each do |k,v|
233
+ next if k.done? || k.error? || k.aborted?
234
+ #new_workload[k] = v.reject{|d| d.done? || ((d.error? || d.aborted?) && ! d.recoverable_error?)}
235
+ new_workload[k] = v.reject{|d| d.done? || d.error? || d.aborted?}
236
+ end
237
+ workload = new_workload
238
+ sleep timer
239
+ end
240
+ rescue TryAgain
241
+ retry
242
+ end
243
+ end
244
+ end
245
+ end
@@ -0,0 +1 @@
1
+ require_relative 'deployment/orchestrator'
@@ -13,15 +13,21 @@ class Step
13
13
 
14
14
  def input_dependencies
15
15
  return [] unless inputs
16
- inputs.select do |d|
17
- Step === d
18
- end
16
+ inputs.collect do |d|
17
+ if Step === d
18
+ d
19
+ elsif (Path === d) && (Step === d.pkgdir)
20
+ d.pkgdir
21
+ else
22
+ nil
23
+ end
24
+ end.compact.uniq
19
25
  end
20
26
 
21
27
  def prepare_dependencies
22
28
  inverse_dep = {}
23
- dependencies.each{|dep|
24
- if dep.present? && ! dep.updated?
29
+ dependencies.each{|dep|
30
+ if dep.present? && ! dep.updated?
25
31
  Log.debug "Clean outdated #{dep.path}"
26
32
  dep.clean
27
33
  end
@@ -29,12 +35,12 @@ class Step
29
35
  if dep.dependencies
30
36
  dep.dependencies.each do |d|
31
37
  inverse_dep[d] ||= []
32
- inverse_dep[d] << dep
38
+ inverse_dep[d] << dep
33
39
  end
34
40
  end
35
41
  input_dependencies.each do |d|
36
42
  inverse_dep[d] ||= []
37
- inverse_dep[d] << dep
43
+ inverse_dep[d] << dep
38
44
  end
39
45
  }
40
46
  inverse_dep.each do |dep,list|
@@ -43,7 +49,25 @@ class Step
43
49
  end
44
50
 
45
51
  def run_dependencies
46
- dependencies.each{|dep| dep.run(true) unless dep.running? || dep.done? }
52
+ dependencies.each{|dep|
53
+ next if dep.running? || dep.done?
54
+ compute_options = compute[dep.path] if compute
55
+ compute_options = [] if compute_options.nil?
56
+
57
+ stream = compute_options.include?(:stream)
58
+ stream = true unless ENV["SCOUT_EXPLICIT_STREAMING"] == 'true'
59
+ stream = false if compute_options.include?(:produce)
60
+
61
+ begin
62
+ dep.run(stream)
63
+ rescue ScoutException
64
+ if compute_options.include?(:canfail)
65
+ Log.medium "Allow failing of #{dep.path}"
66
+ else
67
+ raise $!
68
+ end
69
+ end
70
+ }
47
71
  end
48
72
 
49
73
  def abort_dependencies
@@ -52,9 +76,11 @@ class Step
52
76
 
53
77
  def self.wait_for_jobs(jobs)
54
78
  threads = []
55
- jobs.each do |job|
56
- threads << job.join
79
+ jobs.each do |job|
80
+ threads << Thread.new{ job.join }
81
+ end
82
+ threads.each do |t|
83
+ t.join
57
84
  end
58
- threads.each do |t| t.join end
59
85
  end
60
86
  end
@@ -3,6 +3,7 @@ class Step
3
3
  @files_dir ||= begin
4
4
  dir = @path + ".files"
5
5
  @path.annotate(dir) if Path === @path
6
+ dir.pkgdir = self
6
7
  dir
7
8
  end
8
9
  end
@@ -12,4 +13,8 @@ class Step
12
13
  Path.setup(dir) unless Path === dir
13
14
  dir[file]
14
15
  end
16
+
17
+ def bundle_files
18
+ [path, info_file, Dir.glob(File.join(files_dir,"**/*"))].flatten.select{|f| Open.exist?(f) }
19
+ end
15
20
  end
@@ -43,7 +43,7 @@ class Step
43
43
  new_info.each do |key,value|
44
44
  if key == :status
45
45
  message = new_info[:messages]
46
- if message.nil? && value == :done || value == :error || value == :aborted
46
+ if message.nil? && (value == :done || value == :error || value == :aborted)
47
47
  start = info[:start]
48
48
  eend = new_info[:end]
49
49
  if start && eend
@@ -54,6 +54,7 @@ class Step
54
54
  end
55
55
  report_status value, message
56
56
  end
57
+
57
58
  if Exception === value
58
59
  begin
59
60
  Marshal.dump(value)
@@ -67,6 +68,7 @@ class Step
67
68
  value = new
68
69
  end
69
70
  end
71
+
70
72
  if info.include?(key)
71
73
  case info[key]
72
74
  when Array
@@ -114,11 +116,11 @@ class Step
114
116
  end
115
117
 
116
118
  def error?
117
- status == :error
119
+ status == :error || status == 'error'
118
120
  end
119
121
 
120
122
  def aborted?
121
- status == :aborted
123
+ status == :aborted || status == 'aborted'
122
124
  end
123
125
 
124
126
  def running?
@@ -12,7 +12,7 @@ class Step
12
12
 
13
13
  def self.load(path)
14
14
  path = relocate(path) unless Open.exists?(path)
15
- raise "Could not load #{path}" unless Open.exists?(path)
15
+ #raise "Could not load #{path}" unless Open.exists?(path)
16
16
  s = Step.new path
17
17
  end
18
18
  end
@@ -91,6 +91,7 @@ class Step
91
91
  info[:task_name] = task
92
92
  path = step.path
93
93
  status = info[:status] || :missing
94
+ status = status.to_sym if String === status
94
95
  status = :noinfo if status == :missing && Open.exist?(path)
95
96
  status = "remote" if Open.remote?(path) || Open.ssh?(path)
96
97
  name = info[:name] || File.basename(path)
@@ -1,7 +1,11 @@
1
1
  class Step
2
2
  def abort(exception = nil)
3
- while @result && streaming? && stream = self.stream
4
- stream.abort(exception)
3
+ if info[:pid] != Process.pid && Misc.alive?(pid)
4
+ Process.kill pid
5
+ else
6
+ while @result && streaming? && stream = self.stream
7
+ stream.abort(exception)
8
+ end
5
9
  end
6
10
  end
7
11
 
@@ -28,12 +32,6 @@ class Step
28
32
  Open.rm_rf files_dir if Open.exist?(files_dir)
29
33
  end
30
34
 
31
- def present?
32
- Open.exist?(path) &&
33
- Open.exist?(info_file) &&
34
- Open.exist?(files_dir)
35
- end
36
-
37
35
 
38
36
  def recursive_clean
39
37
  dependencies.each do |dep|