scout-gear 10.3.0 → 10.6.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.vimproject +100 -657
- data/Rakefile +1 -0
- data/VERSION +1 -1
- data/bin/scout +1 -3
- data/lib/scout/association/fields.rb +170 -0
- data/lib/scout/association/index.rb +229 -0
- data/lib/scout/association/item.rb +227 -0
- data/lib/scout/association/util.rb +7 -0
- data/lib/scout/association.rb +100 -0
- data/lib/scout/entity/format.rb +62 -0
- data/lib/scout/entity/identifiers.rb +111 -0
- data/lib/scout/entity/object.rb +20 -0
- data/lib/scout/entity/property.rb +165 -0
- data/lib/scout/entity.rb +40 -0
- data/lib/scout/offsite/step.rb +2 -2
- data/lib/scout/{tsv/persist → persist/engine}/fix_width_table.rb +25 -33
- data/lib/scout/persist/engine/packed_index.rb +100 -0
- data/lib/scout/persist/engine/sharder.rb +219 -0
- data/lib/scout/{tsv/persist → persist/engine}/tkrzw.rb +0 -17
- data/lib/scout/{tsv/persist → persist/engine}/tokyocabinet.rb +55 -31
- data/lib/scout/persist/engine.rb +4 -0
- data/lib/scout/{tsv/persist/adapter.rb → persist/tsv/adapter/base.rb} +80 -51
- data/lib/scout/persist/tsv/adapter/fix_width_table.rb +106 -0
- data/lib/scout/persist/tsv/adapter/packed_index.rb +95 -0
- data/lib/scout/persist/tsv/adapter/sharder.rb +54 -0
- data/lib/scout/persist/tsv/adapter/tkrzw.rb +18 -0
- data/lib/scout/persist/tsv/adapter/tokyocabinet.rb +65 -0
- data/lib/scout/persist/tsv/adapter.rb +6 -0
- data/lib/scout/{tsv/persist → persist/tsv}/serialize.rb +5 -0
- data/lib/scout/persist/tsv.rb +107 -0
- data/lib/scout/tsv/annotation/repo.rb +83 -0
- data/lib/scout/tsv/annotation.rb +169 -0
- data/lib/scout/tsv/attach.rb +104 -20
- data/lib/scout/tsv/change_id/translate.rb +148 -0
- data/lib/scout/tsv/change_id.rb +6 -3
- data/lib/scout/tsv/csv.rb +85 -0
- data/lib/scout/tsv/dumper.rb +113 -25
- data/lib/scout/tsv/entity.rb +5 -0
- data/lib/scout/tsv/index.rb +89 -37
- data/lib/scout/tsv/open.rb +21 -8
- data/lib/scout/tsv/parser.rb +156 -91
- data/lib/scout/tsv/path.rb +7 -2
- data/lib/scout/tsv/stream.rb +48 -6
- data/lib/scout/tsv/transformer.rb +25 -3
- data/lib/scout/tsv/traverse.rb +26 -18
- data/lib/scout/tsv/util/process.rb +8 -1
- data/lib/scout/tsv/util/reorder.rb +25 -15
- data/lib/scout/tsv/util/select.rb +9 -1
- data/lib/scout/tsv/util/sort.rb +90 -2
- data/lib/scout/tsv/util/unzip.rb +56 -0
- data/lib/scout/tsv/util.rb +52 -5
- data/lib/scout/tsv.rb +85 -19
- data/lib/scout/work_queue/socket.rb +8 -0
- data/lib/scout/work_queue/worker.rb +22 -5
- data/lib/scout/work_queue.rb +38 -24
- data/lib/scout/workflow/definition.rb +19 -11
- data/lib/scout/workflow/deployment/orchestrator.rb +20 -3
- data/lib/scout/workflow/deployment/trace.rb +205 -0
- data/lib/scout/workflow/deployment.rb +1 -0
- data/lib/scout/workflow/documentation.rb +1 -1
- data/lib/scout/workflow/step/archive.rb +42 -0
- data/lib/scout/workflow/step/children.rb +51 -0
- data/lib/scout/workflow/step/config.rb +1 -1
- data/lib/scout/workflow/step/dependencies.rb +24 -7
- data/lib/scout/workflow/step/file.rb +19 -0
- data/lib/scout/workflow/step/info.rb +37 -9
- data/lib/scout/workflow/step/progress.rb +11 -2
- data/lib/scout/workflow/step/status.rb +8 -1
- data/lib/scout/workflow/step.rb +80 -25
- data/lib/scout/workflow/task/dependencies.rb +4 -1
- data/lib/scout/workflow/task/inputs.rb +91 -41
- data/lib/scout/workflow/task.rb +54 -57
- data/lib/scout/workflow/usage.rb +1 -1
- data/lib/scout/workflow/util.rb +4 -0
- data/lib/scout/workflow.rb +110 -13
- data/lib/scout-gear.rb +2 -0
- data/lib/scout.rb +0 -1
- data/scout-gear.gemspec +80 -23
- data/scout_commands/rbbt +2 -0
- data/test/data/person/brothers +4 -0
- data/test/data/person/identifiers +10 -0
- data/test/data/person/marriages +3 -0
- data/test/data/person/parents +6 -0
- data/test/scout/association/test_fields.rb +105 -0
- data/test/scout/association/test_index.rb +70 -0
- data/test/scout/association/test_item.rb +21 -0
- data/test/scout/entity/test_format.rb +19 -0
- data/test/scout/entity/test_identifiers.rb +58 -0
- data/test/scout/entity/test_object.rb +0 -0
- data/test/scout/entity/test_property.rb +345 -0
- data/test/scout/{tsv/persist → persist/engine}/test_fix_width_table.rb +0 -1
- data/test/scout/persist/engine/test_packed_index.rb +99 -0
- data/test/scout/persist/engine/test_sharder.rb +31 -0
- data/test/scout/persist/engine/test_tkrzw.rb +0 -0
- data/test/scout/persist/engine/test_tokyocabinet.rb +17 -0
- data/test/scout/persist/test_tsv.rb +146 -0
- data/test/scout/{tsv/persist/test_adapter.rb → persist/tsv/adapter/test_base.rb} +3 -4
- data/test/scout/persist/tsv/adapter/test_fix_width_table.rb +46 -0
- data/test/scout/persist/tsv/adapter/test_packed_index.rb +37 -0
- data/test/scout/persist/tsv/adapter/test_serialize.rb +0 -0
- data/test/scout/persist/tsv/adapter/test_sharder.rb +290 -0
- data/test/scout/persist/tsv/adapter/test_tkrzw.rb +126 -0
- data/test/scout/persist/tsv/adapter/test_tokyocabinet.rb +282 -0
- data/test/scout/persist/tsv/test_serialize.rb +12 -0
- data/test/scout/test_association.rb +51 -0
- data/test/scout/test_entity.rb +40 -0
- data/test/scout/test_tsv.rb +63 -4
- data/test/scout/test_work_queue.rb +3 -2
- data/test/scout/test_workflow.rb +16 -15
- data/test/scout/tsv/annotation/test_repo.rb +150 -0
- data/test/scout/tsv/change_id/test_translate.rb +178 -0
- data/test/scout/tsv/test_annotation.rb +52 -0
- data/test/scout/tsv/test_attach.rb +226 -1
- data/test/scout/tsv/test_change_id.rb +25 -0
- data/test/scout/tsv/test_csv.rb +50 -0
- data/test/scout/tsv/test_dumper.rb +38 -0
- data/test/scout/tsv/test_entity.rb +0 -0
- data/test/scout/tsv/test_index.rb +82 -0
- data/test/scout/tsv/test_open.rb +44 -0
- data/test/scout/tsv/test_parser.rb +70 -0
- data/test/scout/tsv/test_stream.rb +22 -0
- data/test/scout/tsv/test_transformer.rb +39 -3
- data/test/scout/tsv/test_traverse.rb +78 -0
- data/test/scout/tsv/util/test_process.rb +36 -0
- data/test/scout/tsv/util/test_reorder.rb +67 -0
- data/test/scout/tsv/util/test_sort.rb +28 -1
- data/test/scout/tsv/util/test_unzip.rb +32 -0
- data/test/scout/work_queue/test_socket.rb +4 -1
- data/test/scout/workflow/deployment/test_orchestrator.rb +17 -26
- data/test/scout/workflow/deployment/test_trace.rb +25 -0
- data/test/scout/workflow/step/test_archive.rb +28 -0
- data/test/scout/workflow/step/test_children.rb +25 -0
- data/test/scout/workflow/step/test_info.rb +16 -0
- data/test/scout/workflow/task/test_dependencies.rb +16 -16
- data/test/scout/workflow/task/test_inputs.rb +45 -1
- data/test/scout/workflow/test_definition.rb +52 -0
- data/test/scout/workflow/test_step.rb +57 -0
- data/test/scout/workflow/test_task.rb +26 -1
- data/test/scout/workflow/test_usage.rb +4 -4
- data/test/test_helper.rb +23 -1
- metadata +71 -14
- data/lib/scout/tsv/persist.rb +0 -27
- data/test/scout/tsv/persist/test_tkrzw.rb +0 -123
- data/test/scout/tsv/persist/test_tokyocabinet.rb +0 -120
- data/test/scout/tsv/test_persist.rb +0 -45
data/lib/scout/work_queue.rb
CHANGED
@@ -6,18 +6,29 @@ require 'timeout'
|
|
6
6
|
class WorkQueue
|
7
7
|
attr_accessor :workers, :worker_proc, :callback
|
8
8
|
|
9
|
+
def new_worker
|
10
|
+
worker = Worker.new
|
11
|
+
worker.queue_id = queue_id
|
12
|
+
worker
|
13
|
+
end
|
14
|
+
|
9
15
|
def initialize(workers = 0, &block)
|
10
16
|
workers = workers.to_i if String === workers
|
11
17
|
@input = WorkQueue::Socket.new
|
12
18
|
@output = WorkQueue::Socket.new
|
13
|
-
@workers = workers.times.collect{
|
19
|
+
@workers = workers.times.collect{ new_worker }
|
14
20
|
@worker_proc = block
|
15
21
|
@worker_mutex = Mutex.new
|
16
22
|
@removed_workers = []
|
23
|
+
Log.medium "Starting queue #{queue_id} with workers: #{Log.fingerprint @workers.collect{|w| w.worker_short_id }} and sockets #{@input.socket_id} and #{@output.socket_id}"
|
24
|
+
end
|
25
|
+
|
26
|
+
def queue_id
|
27
|
+
[object_id, Process.pid] * "@"
|
17
28
|
end
|
18
29
|
|
19
30
|
def add_worker(&block)
|
20
|
-
worker =
|
31
|
+
worker = new_worker
|
21
32
|
@worker_mutex.synchronize do
|
22
33
|
@workers.push(worker)
|
23
34
|
if block_given?
|
@@ -41,9 +52,11 @@ class WorkQueue
|
|
41
52
|
@worker_mutex.synchronize do
|
42
53
|
worker = @workers.index{|w| w.pid == pid}
|
43
54
|
if worker
|
44
|
-
Log.low "Removed worker #{pid}"
|
45
55
|
@workers.delete_at(worker)
|
46
56
|
@removed_workers << pid
|
57
|
+
Log.low "Removed worker #{pid} from #{queue_id}"
|
58
|
+
else
|
59
|
+
Log.medium "Worker #{pid} not from #{queue_id}"
|
47
60
|
end
|
48
61
|
end
|
49
62
|
end
|
@@ -56,14 +69,14 @@ class WorkQueue
|
|
56
69
|
@reader = Thread.new(Thread.current) do |parent|
|
57
70
|
begin
|
58
71
|
Thread.current.report_on_exception = false
|
59
|
-
Thread.current["name"] = "Output reader #{
|
72
|
+
Thread.current["name"] = "Output reader #{queue_id}"
|
60
73
|
@done_workers ||= []
|
61
74
|
while true
|
62
75
|
obj = @output.read
|
63
76
|
if DoneProcessing === obj
|
64
77
|
|
65
78
|
done = @worker_mutex.synchronize do
|
66
|
-
Log.low "Worker #{obj.pid} done"
|
79
|
+
Log.low "Worker #{obj.pid} from #{queue_id} done"
|
67
80
|
@done_workers << obj.pid
|
68
81
|
@closed && @done_workers.length == @removed_workers.length + @workers.length
|
69
82
|
end
|
@@ -78,12 +91,12 @@ class WorkQueue
|
|
78
91
|
rescue DoneProcessing
|
79
92
|
rescue Aborted
|
80
93
|
rescue WorkerException
|
81
|
-
Log.error "Exception in worker #{obj.pid} in queue #{
|
94
|
+
Log.error "Exception in worker #{obj.pid} in queue #{queue_id}: #{obj.worker_exception.message}"
|
82
95
|
self.abort
|
83
96
|
@input.abort obj.worker_exception
|
84
97
|
raise obj.worker_exception
|
85
98
|
rescue
|
86
|
-
Log.error "Exception processing output in queue #{
|
99
|
+
Log.error "Exception processing output in queue #{queue_id}: #{$!.message}"
|
87
100
|
self.abort
|
88
101
|
raise $!
|
89
102
|
end
|
@@ -95,25 +108,19 @@ class WorkQueue
|
|
95
108
|
|
96
109
|
@waiter = Thread.new do
|
97
110
|
Thread.current.report_on_exception = false
|
98
|
-
Thread.current["name"] = "Worker waiter #{
|
111
|
+
Thread.current["name"] = "Worker waiter #{queue_id}"
|
99
112
|
while true
|
100
113
|
break if @worker_mutex.synchronize{ @workers.empty? }
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
remove_worker(pid) if pid
|
106
|
-
rescue Exception
|
107
|
-
Log.exception $!
|
108
|
-
end
|
109
|
-
end
|
110
|
-
rescue Timeout::Error
|
111
|
-
pids = @worker_mutex.synchronize{ @workers.collect{|w| w.pid } }
|
112
|
-
pids.each do |p|
|
113
|
-
pid, status = Process.wait2 p, Process::WNOHANG
|
114
|
+
threads = @workers.collect do |w|
|
115
|
+
t = Thread.new do
|
116
|
+
Thread.current["name"] = "Worker waiter #{queue_id} worker #{w.pid}"
|
117
|
+
pid, status = Process.wait2 w.pid
|
114
118
|
remove_worker(pid) if pid
|
115
119
|
end
|
120
|
+
Thread.pass until t["name"]
|
121
|
+
t
|
116
122
|
end
|
123
|
+
threads.each do |t| t.join end
|
117
124
|
end
|
118
125
|
end
|
119
126
|
|
@@ -131,16 +138,23 @@ class WorkQueue
|
|
131
138
|
end
|
132
139
|
|
133
140
|
def abort
|
134
|
-
Log.low "Aborting #{@workers.length} workers in queue #{
|
141
|
+
Log.low "Aborting #{@workers.length} workers in queue #{queue_id}"
|
135
142
|
@worker_mutex.synchronize do
|
136
|
-
@workers.each
|
143
|
+
@workers.each do |w|
|
144
|
+
ScoutSemaphore.post_semaphore(@output.write_sem) if @output
|
145
|
+
ScoutSemaphore.post_semaphore(@input.read_sem) if @input
|
146
|
+
w.abort
|
147
|
+
end
|
137
148
|
end
|
138
149
|
end
|
139
150
|
|
140
151
|
def close
|
141
152
|
@closed = true
|
142
153
|
@worker_mutex.synchronize{ @workers.length }.times do
|
143
|
-
|
154
|
+
begin
|
155
|
+
@input.write DoneProcessing.new() unless @input.closed_write?
|
156
|
+
rescue IOError
|
157
|
+
end
|
144
158
|
end
|
145
159
|
end
|
146
160
|
|
@@ -1,8 +1,8 @@
|
|
1
|
-
require 'scout/
|
1
|
+
require 'scout/annotation'
|
2
2
|
|
3
3
|
module Workflow
|
4
|
-
extend
|
5
|
-
|
4
|
+
extend Annotation
|
5
|
+
annotation :name, :tasks, :helpers
|
6
6
|
|
7
7
|
class << self
|
8
8
|
attr_accessor :directory
|
@@ -103,7 +103,14 @@ module Workflow
|
|
103
103
|
end
|
104
104
|
|
105
105
|
def task(name_and_type, &block)
|
106
|
-
|
106
|
+
case name_and_type
|
107
|
+
when Hash
|
108
|
+
name, type = name_and_type.collect.first
|
109
|
+
when Symbol
|
110
|
+
name, type = [name_and_type, :binary]
|
111
|
+
when String
|
112
|
+
name, type = [name_and_type, :binary]
|
113
|
+
end
|
107
114
|
type = type.to_sym if String === type
|
108
115
|
name = name.to_sym if String === name
|
109
116
|
@tasks ||= IndiferentHash.setup({})
|
@@ -136,10 +143,11 @@ module Workflow
|
|
136
143
|
def task_alias(name, workflow, oname, *rest, &block)
|
137
144
|
dep(workflow, oname, *rest, &block)
|
138
145
|
extension :dep_task unless @extension
|
139
|
-
task_proc = workflow.tasks[oname]
|
140
|
-
|
141
|
-
|
142
|
-
|
146
|
+
task_proc = workflow.tasks[oname] if workflow.tasks
|
147
|
+
if task_proc
|
148
|
+
returns task_proc.returns if @returns.nil?
|
149
|
+
type = task_proc.type
|
150
|
+
end
|
143
151
|
task name => type do
|
144
152
|
raise RbbtException, "dep_task does not have any dependencies" if dependencies.empty?
|
145
153
|
Step.wait_for_jobs dependencies.select{|d| d.streaming? }
|
@@ -147,18 +155,18 @@ module Workflow
|
|
147
155
|
dep.join
|
148
156
|
raise dep.get_exception if dep.error?
|
149
157
|
raise Aborted, "Aborted dependency #{dep.path}" if dep.aborted?
|
150
|
-
set_info :
|
158
|
+
set_info :type, dep.info[:type]
|
151
159
|
forget = config :forget_dep_tasks, "forget_dep_tasks", :default => FORGET_DEP_TASKS
|
152
160
|
if forget
|
153
161
|
remove = config :remove_dep_tasks, "remove_dep_tasks", :default => REMOVE_DEP_TASKS
|
154
162
|
|
155
163
|
self.archive_deps
|
156
|
-
self.
|
164
|
+
self.copy_linked_files_dir
|
157
165
|
self.dependencies = self.dependencies - [dep]
|
158
166
|
Open.rm_rf self.files_dir if Open.exist? self.files_dir
|
159
167
|
FileUtils.cp_r dep.files_dir, self.files_dir if Open.exist?(dep.files_dir)
|
160
168
|
|
161
|
-
if dep.overriden
|
169
|
+
if dep.overriden?
|
162
170
|
Open.link dep.path, self.tmp_path
|
163
171
|
else
|
164
172
|
Open.ln_h dep.path, self.tmp_path
|
@@ -34,7 +34,7 @@ module Workflow
|
|
34
34
|
end
|
35
35
|
|
36
36
|
def self.job_rules(rules, job)
|
37
|
-
workflow = job.workflow.
|
37
|
+
workflow = job.workflow.name
|
38
38
|
task_name = job.task_name.to_s
|
39
39
|
defaults = rules["defaults"] || {}
|
40
40
|
|
@@ -168,7 +168,7 @@ module Workflow
|
|
168
168
|
|
169
169
|
dep_path = dep.path
|
170
170
|
parents = all_jobs.select do |parent|
|
171
|
-
paths = parent.info[:dependencies].nil? ? parent.dependencies.collect{|d| d.path } : parent.info[:dependencies].collect{|d| d.last }
|
171
|
+
paths = parent.info[:dependencies].nil? ? parent.dependencies.collect{|d| d.path } : parent.info[:dependencies].collect{|d| Array === d ? d.last : d }
|
172
172
|
paths.include? dep_path
|
173
173
|
end
|
174
174
|
|
@@ -177,7 +177,7 @@ module Workflow
|
|
177
177
|
parents.each do |parent|
|
178
178
|
Log.high "Erasing #{dep.path} from #{parent.path}"
|
179
179
|
parent.archive_deps
|
180
|
-
parent.
|
180
|
+
parent.copy_linked_files_dir
|
181
181
|
parent.dependencies = parent.dependencies - [dep]
|
182
182
|
end
|
183
183
|
dep.clean
|
@@ -251,4 +251,21 @@ module Workflow
|
|
251
251
|
end
|
252
252
|
end
|
253
253
|
end
|
254
|
+
|
255
|
+
def self.produce_dependencies(jobs, tasks, produce_cpus = Etc.nprocessors)
|
256
|
+
jobs = [jobs] unless Array === jobs
|
257
|
+
produce_list = []
|
258
|
+
jobs.each do |job|
|
259
|
+
next if job.done? || job.running?
|
260
|
+
job.rec_dependencies.each do |job|
|
261
|
+
produce_list << job if tasks.include?(job.task_name) ||
|
262
|
+
tasks.include?(job.task_name.to_s) ||
|
263
|
+
tasks.include?(job.full_task_name)
|
264
|
+
end
|
265
|
+
end
|
266
|
+
|
267
|
+
orchestrator = Orchestrator.new 0.1, cpus: produce_cpus.to_i
|
268
|
+
orchestrator.process({}, produce_list)
|
269
|
+
produce_list
|
270
|
+
end
|
254
271
|
end
|
@@ -0,0 +1,205 @@
|
|
1
|
+
require 'scout/tsv'
|
2
|
+
|
3
|
+
module Workflow
|
4
|
+
def self.trace_job_times(jobs, fix_gap = false, report_keys = nil)
|
5
|
+
data = TSV.setup({}, "Job~Code,Workflow,Task,Start,End#:type=:list")
|
6
|
+
min_start = nil
|
7
|
+
max_done = nil
|
8
|
+
jobs.each do |job|
|
9
|
+
next unless job.info[:end]
|
10
|
+
started = job.info[:start]
|
11
|
+
ddone = job.info[:end]
|
12
|
+
|
13
|
+
started = Time.parse started if String === started
|
14
|
+
ddone = Time.parse ddone if String === ddone
|
15
|
+
|
16
|
+
code = [job.workflow.name, job.task_name].compact.collect{|s| s.to_s} * " · "
|
17
|
+
code = job.name + " - " + code
|
18
|
+
|
19
|
+
data[job.path] = [code, job.workflow.name, job.task_name, started, ddone]
|
20
|
+
if min_start.nil?
|
21
|
+
min_start = started
|
22
|
+
else
|
23
|
+
min_start = started if started < min_start
|
24
|
+
end
|
25
|
+
|
26
|
+
if max_done.nil?
|
27
|
+
max_done = ddone
|
28
|
+
else
|
29
|
+
max_done = ddone if ddone > max_done
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
data.add_field "Start.second" do |k,value|
|
34
|
+
value["Start"] - min_start
|
35
|
+
end
|
36
|
+
|
37
|
+
data.add_field "End.second" do |k,value|
|
38
|
+
value["End"] - min_start
|
39
|
+
end
|
40
|
+
|
41
|
+
if fix_gap
|
42
|
+
ranges = []
|
43
|
+
data.through do |k,values|
|
44
|
+
start, eend = values.values_at "Start.second", "End.second"
|
45
|
+
|
46
|
+
ranges << (start..eend)
|
47
|
+
end
|
48
|
+
|
49
|
+
gaps = {}
|
50
|
+
last = nil
|
51
|
+
Misc.collapse_ranges(ranges).each do |range|
|
52
|
+
start = range.begin
|
53
|
+
eend = range.end
|
54
|
+
if last
|
55
|
+
gaps[last] = start - last
|
56
|
+
end
|
57
|
+
last = eend
|
58
|
+
end
|
59
|
+
|
60
|
+
data.process "End.second" do |value,k,values|
|
61
|
+
gap = Misc.sum(gaps.select{|pos,size| pos < values["Start.second"]}.collect{|pos,size| size})
|
62
|
+
value - gap
|
63
|
+
end
|
64
|
+
|
65
|
+
data.process "Start.second" do |value,k,values|
|
66
|
+
gap = Misc.sum(gaps.select{|pos,size| pos < values["Start.second"]}.collect{|pos,size| size})
|
67
|
+
value - gap
|
68
|
+
end
|
69
|
+
|
70
|
+
total_gaps = Misc.sum(gaps.collect{|k,v| v})
|
71
|
+
Log.info "Total gaps: #{total_gaps} seconds"
|
72
|
+
end
|
73
|
+
|
74
|
+
if report_keys && report_keys.any?
|
75
|
+
job_keys = {}
|
76
|
+
jobs.each do |job|
|
77
|
+
job_info = IndiferentHash.setup(job.info)
|
78
|
+
report_keys.each do |key|
|
79
|
+
job_keys[job.path] ||= {}
|
80
|
+
job_keys[job.path][key] = job_info[key]
|
81
|
+
end
|
82
|
+
end
|
83
|
+
report_keys.each do |key|
|
84
|
+
data.add_field Misc.humanize(key) do |p,values|
|
85
|
+
job_keys[p][key]
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
start = data.column("Start.second").values.flatten.collect{|v| v.to_f}.min
|
91
|
+
eend = data.column("End.second").values.flatten.collect{|v| v.to_f}.max
|
92
|
+
total = eend - start unless eend.nil? || start.nil?
|
93
|
+
Log.info "Total time elapsed: #{total} seconds" if total
|
94
|
+
|
95
|
+
if report_keys && report_keys.any?
|
96
|
+
job_keys = {}
|
97
|
+
report_keys.each do |key|
|
98
|
+
jobs.each do |job|
|
99
|
+
job_keys[job.path] ||= {}
|
100
|
+
job_keys[job.path][key] = job.info[key]
|
101
|
+
end
|
102
|
+
end
|
103
|
+
report_keys.each do |key|
|
104
|
+
data.add_field Misc.humanize(key) do |p,values|
|
105
|
+
job_keys[p][key]
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
data
|
111
|
+
end
|
112
|
+
|
113
|
+
def self.trace_job_summary(jobs, report_keys = [])
|
114
|
+
tasks_info = {}
|
115
|
+
|
116
|
+
report_keys = report_keys.collect{|k| k.to_s}
|
117
|
+
|
118
|
+
jobs.each do |dep|
|
119
|
+
next unless dep.info[:end]
|
120
|
+
task = [dep.workflow.name, dep.task_name].compact.collect{|s| s.to_s} * "#"
|
121
|
+
info = tasks_info[task] ||= IndiferentHash.setup({})
|
122
|
+
dep_info = IndiferentHash.setup(dep.info)
|
123
|
+
|
124
|
+
ddone = dep_info[:end]
|
125
|
+
started = dep_info[:start]
|
126
|
+
|
127
|
+
started = Time.parse started if String === started
|
128
|
+
ddone = Time.parse ddone if String === ddone
|
129
|
+
|
130
|
+
time = ddone - started
|
131
|
+
info[:time] ||= []
|
132
|
+
info[:time] << time
|
133
|
+
|
134
|
+
report_keys.each do |key|
|
135
|
+
info[key] = dep_info[key]
|
136
|
+
end
|
137
|
+
|
138
|
+
dep.info[:config_keys].each do |kinfo|
|
139
|
+
key, value, tokens = kinfo
|
140
|
+
|
141
|
+
info[key.to_s] = value if report_keys.include? key.to_s
|
142
|
+
end if dep.info[:config_keys]
|
143
|
+
end
|
144
|
+
|
145
|
+
summary = TSV.setup({}, "Task~Calls,Avg. Time,Total Time#:type=:list")
|
146
|
+
|
147
|
+
tasks_info.each do |task, info|
|
148
|
+
time_lists = info[:time]
|
149
|
+
avg_time = Misc.mean(time_lists).to_i
|
150
|
+
total_time = Misc.sum(time_lists).to_i
|
151
|
+
calls = time_lists.length
|
152
|
+
summary[task] = [calls, avg_time, total_time]
|
153
|
+
end
|
154
|
+
|
155
|
+
report_keys.each do |key|
|
156
|
+
summary.add_field Misc.humanize(key) do |task|
|
157
|
+
tasks_info[task][key]
|
158
|
+
end
|
159
|
+
end if Array === report_keys && report_keys.any?
|
160
|
+
|
161
|
+
summary
|
162
|
+
end
|
163
|
+
|
164
|
+
def self.trace(seed_jobs, options = {})
|
165
|
+
jobs = []
|
166
|
+
seed_jobs.each do |step|
|
167
|
+
jobs += step.rec_dependencies + [step]
|
168
|
+
step.info[:archived_info].each do |path,ainfo|
|
169
|
+
next unless Hash === ainfo
|
170
|
+
archived_step = Step.new path
|
171
|
+
|
172
|
+
archived_step.define_singleton_method :info do
|
173
|
+
ainfo
|
174
|
+
end
|
175
|
+
|
176
|
+
jobs << archived_step
|
177
|
+
end if step.info[:archived_info]
|
178
|
+
|
179
|
+
end
|
180
|
+
|
181
|
+
jobs = jobs.uniq.sort_by{|job| [job, job.info]; t = job.info[:started] || Open.mtime(job.path) || Time.now; Time === t ? t : Time.parse(t) }
|
182
|
+
|
183
|
+
report_keys = options[:report_keys] || ""
|
184
|
+
report_keys = report_keys.split(/,\s*/) if String === report_keys
|
185
|
+
|
186
|
+
data = trace_job_times(jobs, options[:fix_gap], report_keys)
|
187
|
+
|
188
|
+
summary = trace_job_summary(jobs, report_keys)
|
189
|
+
|
190
|
+
|
191
|
+
raise "No jobs to process" if data.size == 0
|
192
|
+
|
193
|
+
size, width, height = options.values_at :size, :width, :height
|
194
|
+
|
195
|
+
size = 800 if size.nil?
|
196
|
+
width = size.to_i * 2 if width.nil?
|
197
|
+
height = size if height.nil?
|
198
|
+
|
199
|
+
if options[:plot_data]
|
200
|
+
data
|
201
|
+
else
|
202
|
+
summary
|
203
|
+
end
|
204
|
+
end
|
205
|
+
end
|
@@ -31,7 +31,7 @@ module Workflow
|
|
31
31
|
title = doc_parse_first_line doc
|
32
32
|
description, task_info = doc_parse_up_to doc, /^# Tasks/i
|
33
33
|
task_description, tasks = doc_parse_up_to task_info, /^##/, true
|
34
|
-
tasks = doc_parse_chunks tasks,
|
34
|
+
tasks = doc_parse_chunks tasks, /^## (.*)/
|
35
35
|
{:title => title.strip, :description => description.strip, :task_description => task_description.strip, :tasks => tasks}
|
36
36
|
end
|
37
37
|
|
@@ -0,0 +1,42 @@
|
|
1
|
+
class Step
|
2
|
+
def archived_info
|
3
|
+
return {} unless Open.exists?(info_file)
|
4
|
+
info[:archived_info] || {}
|
5
|
+
end
|
6
|
+
|
7
|
+
def archived_inputs
|
8
|
+
return [] unless info[:archived_dependencies]
|
9
|
+
archived_info = self.archived_info
|
10
|
+
|
11
|
+
all_inputs = NamedArray.setup([],[])
|
12
|
+
deps = info[:archived_dependencies].dup
|
13
|
+
seen = []
|
14
|
+
while path = deps.pop
|
15
|
+
dep_info = archived_info[path]
|
16
|
+
if Hash === dep_info
|
17
|
+
dep_inputs = dep_info[:inputs]
|
18
|
+
NamedArray.setup(dep_inputs, dep_info[:input_names])
|
19
|
+
all_inputs.concat(dep_inputs)
|
20
|
+
deps.concat(dep_info[:dependencies].collect{|p| p.last } - seen) if dep_info[:dependencies]
|
21
|
+
deps.concat(dep_info[:archived_dependencies].collect{|p| p.last } - seen) if dep_info[:archived_dependencies]
|
22
|
+
end
|
23
|
+
seen << path
|
24
|
+
end
|
25
|
+
|
26
|
+
all_inputs
|
27
|
+
end
|
28
|
+
|
29
|
+
def archive_deps(jobs = nil)
|
30
|
+
jobs = dependencies if jobs.nil?
|
31
|
+
|
32
|
+
archived_info = jobs.inject({}) do |acc,dep|
|
33
|
+
next acc unless Open.exists?(dep.info_file)
|
34
|
+
acc[dep.path] = dep.info
|
35
|
+
acc.merge!(dep.archived_info)
|
36
|
+
acc
|
37
|
+
end
|
38
|
+
|
39
|
+
self.set_info :archived_info, archived_info
|
40
|
+
self.set_info :archived_dependencies, info[:dependencies]
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
class Step
|
2
|
+
def child(&block)
|
3
|
+
child_pid = Process.fork &block
|
4
|
+
children_pids = info[:children_pids]
|
5
|
+
if children_pids.nil?
|
6
|
+
children_pids = [child_pid]
|
7
|
+
else
|
8
|
+
children_pids << child_pid
|
9
|
+
end
|
10
|
+
set_info :children_pids, children_pids
|
11
|
+
child_pid
|
12
|
+
end
|
13
|
+
|
14
|
+
def cmd(*args)
|
15
|
+
all_args = *args
|
16
|
+
|
17
|
+
all_args << {} unless Hash === all_args.last
|
18
|
+
|
19
|
+
level = all_args.last[:log] || 0
|
20
|
+
level = 0 if TrueClass === level
|
21
|
+
level = 10 if FalseClass === level
|
22
|
+
level = level.to_i
|
23
|
+
|
24
|
+
all_args.last[:log] = true
|
25
|
+
all_args.last[:pipe] = true
|
26
|
+
|
27
|
+
io = CMD.cmd(*all_args)
|
28
|
+
child_pid = io.pids.first
|
29
|
+
|
30
|
+
children_pids = info[:children_pids]
|
31
|
+
if children_pids.nil?
|
32
|
+
children_pids = [child_pid]
|
33
|
+
else
|
34
|
+
children_pids << child_pid
|
35
|
+
end
|
36
|
+
set_info :children_pids, children_pids
|
37
|
+
|
38
|
+
while c = io.getc
|
39
|
+
STDERR << c if Log.severity <= level
|
40
|
+
if c == "\n"
|
41
|
+
Log.logn "STDOUT [#{child_pid}]: ", level
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
io.join
|
46
|
+
|
47
|
+
nil
|
48
|
+
end
|
49
|
+
|
50
|
+
end
|
51
|
+
|
@@ -33,37 +33,51 @@ class Step
|
|
33
33
|
|
34
34
|
def prepare_dependencies
|
35
35
|
inverse_dep = {}
|
36
|
-
|
36
|
+
|
37
|
+
dependencies.each do |dep|
|
37
38
|
if dep.present? && ! dep.updated?
|
38
39
|
Log.debug "Clean outdated #{dep.path}"
|
39
40
|
dep.clean
|
40
41
|
end
|
42
|
+
|
41
43
|
next if dep.done?
|
44
|
+
|
42
45
|
if dep.dependencies
|
43
46
|
dep.dependencies.each do |d|
|
44
47
|
inverse_dep[d] ||= []
|
45
48
|
inverse_dep[d] << dep
|
46
49
|
end
|
47
50
|
end
|
51
|
+
|
48
52
|
input_dependencies.each do |d|
|
49
53
|
inverse_dep[d] ||= []
|
50
54
|
inverse_dep[d] << dep
|
51
55
|
end
|
52
|
-
|
56
|
+
end if dependencies
|
57
|
+
|
53
58
|
inverse_dep.each do |dep,list|
|
54
59
|
dep.tee_copies = list.length
|
55
60
|
end
|
56
61
|
end
|
57
62
|
|
63
|
+
def all_dependencies
|
64
|
+
@all_dependencies ||= begin
|
65
|
+
all_dependencies = []
|
66
|
+
all_dependencies += dependencies if dependencies
|
67
|
+
all_dependencies += input_dependencies if input_dependencies
|
68
|
+
all_dependencies
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
58
72
|
def run_dependencies
|
59
|
-
|
73
|
+
all_dependencies.each do |dep|
|
60
74
|
next if dep.running? || dep.done?
|
61
75
|
compute_options = compute[dep.path] if compute
|
62
76
|
compute_options = [] if compute_options.nil?
|
63
77
|
|
64
78
|
stream = compute_options.include?(:stream)
|
65
79
|
stream = true unless ENV["SCOUT_EXPLICIT_STREAMING"] == 'true'
|
66
|
-
stream =
|
80
|
+
stream = :no_load if compute_options.include?(:produce)
|
67
81
|
|
68
82
|
begin
|
69
83
|
dep.run(stream)
|
@@ -74,17 +88,20 @@ class Step
|
|
74
88
|
raise $!
|
75
89
|
end
|
76
90
|
end
|
77
|
-
|
91
|
+
end
|
78
92
|
end
|
79
93
|
|
80
94
|
def abort_dependencies
|
81
|
-
|
95
|
+
all_dependencies.each{|dep| dep.abort if dep.running? }
|
82
96
|
end
|
83
97
|
|
84
98
|
def self.wait_for_jobs(jobs)
|
85
99
|
threads = []
|
86
100
|
jobs.each do |job|
|
87
|
-
threads << Thread.new
|
101
|
+
threads << Thread.new do
|
102
|
+
Thread.current.report_on_exception = false
|
103
|
+
job.join
|
104
|
+
end
|
88
105
|
end
|
89
106
|
threads.each do |t|
|
90
107
|
t.join
|
@@ -19,7 +19,26 @@ class Step
|
|
19
19
|
dir[file]
|
20
20
|
end
|
21
21
|
|
22
|
+
def files
|
23
|
+
Dir.glob(File.join(files_dir, '**', '*')).reject{|path| File.directory? path }.collect do |path|
|
24
|
+
Misc.path_relative_to(files_dir, path)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
22
28
|
def bundle_files
|
23
29
|
[path, info_file, Dir.glob(File.join(files_dir,"**/*"))].flatten.select{|f| Open.exist?(f) }
|
24
30
|
end
|
31
|
+
|
32
|
+
def copy_linked_files_dir
|
33
|
+
if File.symlink?(self.files_dir)
|
34
|
+
begin
|
35
|
+
realpath = Open.realpath(self.files_dir)
|
36
|
+
Open.rm self.files_dir
|
37
|
+
Open.cp realpath, self.files_dir
|
38
|
+
rescue
|
39
|
+
Log.warn "Copy files_dir for #{self.workflow_short_path} failed: " + $!.message
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
25
44
|
end
|