scout-gear 7.2.0 → 8.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.vimproject +51 -6
- data/VERSION +1 -1
- data/bin/scout +6 -3
- data/lib/rbbt-scout.rb +1 -0
- data/lib/scout/cmd.rb +1 -1
- data/lib/scout/concurrent_stream.rb +33 -29
- data/lib/scout/config.rb +1 -1
- data/lib/scout/exceptions.rb +1 -0
- data/lib/scout/log/color.rb +4 -2
- data/lib/scout/log/progress/report.rb +1 -1
- data/lib/scout/log/progress/util.rb +71 -2
- data/lib/scout/log/progress.rb +1 -1
- data/lib/scout/log/trap.rb +107 -0
- data/lib/scout/log.rb +56 -21
- data/lib/scout/meta_extension.rb +13 -6
- data/lib/scout/misc/digest.rb +1 -1
- data/lib/scout/misc/format.rb +12 -0
- data/lib/scout/misc/helper.rb +31 -0
- data/lib/scout/misc/insist.rb +1 -1
- data/lib/scout/misc/monitor.rb +12 -1
- data/lib/scout/misc/system.rb +10 -0
- data/lib/scout/misc.rb +1 -0
- data/lib/scout/named_array.rb +65 -3
- data/lib/scout/open/lock/lockfile.rb +587 -0
- data/lib/scout/open/lock.rb +28 -2
- data/lib/scout/open/remote.rb +4 -0
- data/lib/scout/open/stream.rb +111 -42
- data/lib/scout/open/util.rb +13 -3
- data/lib/scout/path/find.rb +9 -1
- data/lib/scout/path/util.rb +35 -0
- data/lib/scout/persist/serialize.rb +18 -5
- data/lib/scout/persist.rb +60 -30
- data/lib/scout/resource/path.rb +53 -0
- data/lib/scout/resource/produce.rb +0 -8
- data/lib/scout/resource/util.rb +2 -1
- data/lib/scout/semaphore.rb +8 -1
- data/lib/scout/tmpfile.rb +7 -8
- data/lib/scout/tsv/attach.rb +177 -0
- data/lib/scout/tsv/change_id.rb +40 -0
- data/lib/scout/tsv/dumper.rb +85 -54
- data/lib/scout/tsv/index.rb +188 -20
- data/lib/scout/tsv/open.rb +182 -0
- data/lib/scout/tsv/parser.rb +200 -118
- data/lib/scout/tsv/path.rb +5 -6
- data/lib/scout/tsv/persist/adapter.rb +26 -37
- data/lib/scout/tsv/persist/fix_width_table.rb +327 -0
- data/lib/scout/tsv/persist/serialize.rb +117 -0
- data/lib/scout/tsv/persist/tokyocabinet.rb +6 -3
- data/lib/scout/tsv/persist.rb +4 -2
- data/lib/scout/tsv/transformer.rb +141 -0
- data/lib/scout/tsv/traverse.rb +136 -37
- data/lib/scout/tsv/util/filter.rb +312 -0
- data/lib/scout/tsv/util/process.rb +73 -0
- data/lib/scout/tsv/util/reorder.rb +81 -0
- data/lib/scout/tsv/util/select.rb +265 -0
- data/lib/scout/tsv/util/unzip.rb +86 -0
- data/lib/scout/tsv/util.rb +126 -19
- data/lib/scout/tsv.rb +28 -5
- data/lib/scout/work_queue/socket.rb +6 -1
- data/lib/scout/work_queue/worker.rb +5 -2
- data/lib/scout/work_queue.rb +15 -8
- data/lib/scout/workflow/definition.rb +29 -2
- data/lib/scout/workflow/step/dependencies.rb +24 -4
- data/lib/scout/workflow/step/info.rb +40 -5
- data/lib/scout/workflow/step/progress.rb +14 -0
- data/lib/scout/workflow/step/provenance.rb +8 -7
- data/lib/scout/workflow/step/status.rb +45 -0
- data/lib/scout/workflow/step.rb +104 -33
- data/lib/scout/workflow/task/inputs.rb +14 -20
- data/lib/scout/workflow/task.rb +86 -47
- data/lib/scout/workflow/usage.rb +10 -6
- data/scout-gear.gemspec +30 -3
- data/scout_commands/workflow/task +37 -9
- data/scout_commands/workflow/task_old +2 -2
- data/test/scout/open/test_stream.rb +61 -59
- data/test/scout/path/test_find.rb +10 -1
- data/test/scout/resource/test_produce.rb +15 -0
- data/test/scout/test_meta_extension.rb +25 -0
- data/test/scout/test_named_array.rb +18 -0
- data/test/scout/test_persist.rb +67 -0
- data/test/scout/test_tmpfile.rb +1 -1
- data/test/scout/test_tsv.rb +222 -3
- data/test/scout/test_work_queue.rb +21 -18
- data/test/scout/tsv/persist/test_adapter.rb +11 -1
- data/test/scout/tsv/persist/test_fix_width_table.rb +134 -0
- data/test/scout/tsv/persist/test_tokyocabinet.rb +29 -1
- data/test/scout/tsv/test_attach.rb +227 -0
- data/test/scout/tsv/test_change_id.rb +98 -0
- data/test/scout/tsv/test_dumper.rb +1 -1
- data/test/scout/tsv/test_index.rb +127 -3
- data/test/scout/tsv/test_open.rb +167 -0
- data/test/scout/tsv/test_parser.rb +45 -3
- data/test/scout/tsv/test_persist.rb +9 -0
- data/test/scout/tsv/test_transformer.rb +108 -0
- data/test/scout/tsv/test_traverse.rb +195 -3
- data/test/scout/tsv/test_util.rb +24 -0
- data/test/scout/tsv/util/test_filter.rb +188 -0
- data/test/scout/tsv/util/test_process.rb +47 -0
- data/test/scout/tsv/util/test_reorder.rb +94 -0
- data/test/scout/tsv/util/test_select.rb +58 -0
- data/test/scout/tsv/util/test_unzip.rb +112 -0
- data/test/scout/work_queue/test_socket.rb +0 -1
- data/test/scout/work_queue/test_worker.rb +63 -6
- data/test/scout/workflow/step/test_load.rb +3 -3
- data/test/scout/workflow/step/test_status.rb +31 -0
- data/test/scout/workflow/task/test_inputs.rb +14 -14
- data/test/scout/workflow/test_step.rb +13 -13
- data/test/scout/workflow/test_task.rb +168 -32
- data/test/scout/workflow/test_usage.rb +33 -6
- data/test/test_helper.rb +3 -1
- metadata +29 -2
data/lib/scout/work_queue.rb
CHANGED
@@ -46,7 +46,11 @@ class WorkQueue
|
|
46
46
|
end
|
47
47
|
|
48
48
|
def process(&callback)
|
49
|
-
@
|
49
|
+
@workers.each do |w|
|
50
|
+
w.process @input, @output, &@worker_proc
|
51
|
+
end
|
52
|
+
|
53
|
+
@reader = Thread.new(Thread.current) do |parent|
|
50
54
|
begin
|
51
55
|
Thread.current.report_on_exception = false
|
52
56
|
Thread.current["name"] = "Output reader #{Process.pid}"
|
@@ -71,8 +75,9 @@ class WorkQueue
|
|
71
75
|
rescue DoneProcessing
|
72
76
|
rescue Aborted
|
73
77
|
rescue WorkerException
|
74
|
-
Log.error "Exception in worker #{obj.pid} in queue #{Process.pid}: #{obj.message}"
|
78
|
+
Log.error "Exception in worker #{obj.pid} in queue #{Process.pid}: #{obj.worker_exception.message}"
|
75
79
|
self.abort
|
80
|
+
@input.abort obj.worker_exception
|
76
81
|
raise obj.worker_exception
|
77
82
|
rescue
|
78
83
|
Log.error "Exception processing output in queue #{Process.pid}: #{$!.message}"
|
@@ -81,10 +86,6 @@ class WorkQueue
|
|
81
86
|
end
|
82
87
|
end
|
83
88
|
|
84
|
-
@workers.each do |w|
|
85
|
-
w.process @input, @output, &@worker_proc
|
86
|
-
end
|
87
|
-
|
88
89
|
Thread.pass until @reader["name"]
|
89
90
|
|
90
91
|
@waiter = Thread.new do
|
@@ -104,7 +105,13 @@ class WorkQueue
|
|
104
105
|
end
|
105
106
|
|
106
107
|
def write(obj)
|
107
|
-
|
108
|
+
begin
|
109
|
+
@input.write obj
|
110
|
+
rescue Exception
|
111
|
+
raise $! unless @input.exception
|
112
|
+
ensure
|
113
|
+
raise @input.exception if @input.exception
|
114
|
+
end
|
108
115
|
end
|
109
116
|
|
110
117
|
def abort
|
@@ -117,7 +124,7 @@ class WorkQueue
|
|
117
124
|
def close
|
118
125
|
@closed = true
|
119
126
|
@worker_mutex.synchronize{ @workers.length }.times do
|
120
|
-
@input.write DoneProcessing.new()
|
127
|
+
@input.write DoneProcessing.new() unless @input.closed_write?
|
121
128
|
end
|
122
129
|
end
|
123
130
|
|
@@ -76,9 +76,11 @@ module Workflow
|
|
76
76
|
end
|
77
77
|
when 1
|
78
78
|
task = args.first
|
79
|
+
options, task = task, nil if Hash === task
|
79
80
|
end
|
80
81
|
workflow = self if workflow.nil?
|
81
82
|
options = {} if options.nil?
|
83
|
+
task = task.to_sym if task
|
82
84
|
annotate_next_task :deps, [workflow, task, options, block, args]
|
83
85
|
end
|
84
86
|
|
@@ -103,6 +105,20 @@ module Workflow
|
|
103
105
|
@tasks ||= IndiferentHash.setup({})
|
104
106
|
begin
|
105
107
|
@annotate_next_task ||= {}
|
108
|
+
@annotate_next_task[:extension] ||=
|
109
|
+
case type
|
110
|
+
when :tsv
|
111
|
+
"tsv"
|
112
|
+
when :yaml
|
113
|
+
"yaml"
|
114
|
+
when :marshal
|
115
|
+
"marshal"
|
116
|
+
when :json
|
117
|
+
"json"
|
118
|
+
else
|
119
|
+
nil
|
120
|
+
end
|
121
|
+
|
106
122
|
task = Task.setup(block, @annotate_next_task.merge(name: name, type: type, directory: directory[name], workflow: self))
|
107
123
|
@tasks[name] = task
|
108
124
|
ensure
|
@@ -110,11 +126,14 @@ module Workflow
|
|
110
126
|
end
|
111
127
|
end
|
112
128
|
|
129
|
+
FORGET_DEP_TASKS = ENV["SCOUT_FORGET_DEP_TASKS"] == "true"
|
130
|
+
REMOVE_DEP_TASKS = ENV["SCOUT_REMOVE_DEP_TASKS"] == "true"
|
113
131
|
def task_alias(name, workflow, oname, *rest, &block)
|
114
132
|
dep(workflow, oname, *rest, &block)
|
115
133
|
extension :dep_task unless @extension
|
116
|
-
returns workflow.tasks[oname].returns if
|
117
|
-
|
134
|
+
returns workflow.tasks[oname].returns if @returns.nil?
|
135
|
+
type = workflow.tasks[oname].type
|
136
|
+
task name => type do
|
118
137
|
raise RbbtException, "dep_task does not have any dependencies" if dependencies.empty?
|
119
138
|
Step.wait_for_jobs dependencies.select{|d| d.streaming? }
|
120
139
|
dep = dependencies.last
|
@@ -164,4 +183,12 @@ module Workflow
|
|
164
183
|
end
|
165
184
|
|
166
185
|
alias dep_task task_alias
|
186
|
+
|
187
|
+
def export(*args)
|
188
|
+
end
|
189
|
+
|
190
|
+
alias export_synchronous export
|
191
|
+
alias export_asynchronous export
|
192
|
+
alias export_exec export
|
193
|
+
alias export_stream export
|
167
194
|
end
|
@@ -1,8 +1,13 @@
|
|
1
1
|
class Step
|
2
|
+
def rec_dependencies
|
3
|
+
rec_dependencies = dependencies.dup
|
4
|
+
dependencies.inject(rec_dependencies){|acc,d| acc.concat d.rec_dependencies }
|
5
|
+
end
|
6
|
+
|
2
7
|
def recursive_inputs
|
3
|
-
|
4
|
-
|
5
|
-
acc
|
8
|
+
recursive_inputs = @inputs.to_hash
|
9
|
+
dependencies.inject(recursive_inputs) do |acc,dep|
|
10
|
+
acc.merge(dep.recursive_inputs)
|
6
11
|
end
|
7
12
|
end
|
8
13
|
|
@@ -16,6 +21,10 @@ class Step
|
|
16
21
|
def prepare_dependencies
|
17
22
|
inverse_dep = {}
|
18
23
|
dependencies.each{|dep|
|
24
|
+
if dep.present? && ! dep.updated?
|
25
|
+
Log.debug "Clean outdated #{dep.path}"
|
26
|
+
dep.clean
|
27
|
+
end
|
19
28
|
next if dep.done?
|
20
29
|
if dep.dependencies
|
21
30
|
dep.dependencies.each do |d|
|
@@ -34,7 +43,18 @@ class Step
|
|
34
43
|
end
|
35
44
|
|
36
45
|
def run_dependencies
|
37
|
-
dependencies.each{|dep| dep.run unless dep.running? || dep.done? }
|
46
|
+
dependencies.each{|dep| dep.run(true) unless dep.running? || dep.done? }
|
38
47
|
end
|
39
48
|
|
49
|
+
def abort_dependencies
|
50
|
+
dependencies.each{|dep| dep.abort if dep.running? }
|
51
|
+
end
|
52
|
+
|
53
|
+
def self.wait_for_jobs(jobs)
|
54
|
+
threads = []
|
55
|
+
jobs.each do |job|
|
56
|
+
threads << job.join
|
57
|
+
end
|
58
|
+
threads.each do |t| t.join end
|
59
|
+
end
|
40
60
|
end
|
@@ -1,6 +1,7 @@
|
|
1
1
|
class Step
|
2
2
|
SERIALIZER = :marshal
|
3
3
|
def info_file
|
4
|
+
return nil if @path.nil?
|
4
5
|
@info_file ||= begin
|
5
6
|
info_file = @path + ".info"
|
6
7
|
@path.annotate info_file if Path === @path
|
@@ -19,6 +20,10 @@ class Step
|
|
19
20
|
@info_load_time = Time.now
|
20
21
|
end
|
21
22
|
|
23
|
+
def clear_info
|
24
|
+
save_info(@info = {})
|
25
|
+
end
|
26
|
+
|
22
27
|
def info
|
23
28
|
outdated = begin
|
24
29
|
@info_load_time && (mtime = Open.mtime(info_file)) && mtime > @info_load_time
|
@@ -36,7 +41,19 @@ class Step
|
|
36
41
|
def merge_info(new_info)
|
37
42
|
info = self.info
|
38
43
|
new_info.each do |key,value|
|
39
|
-
|
44
|
+
if key == :status
|
45
|
+
message = new_info[:messages]
|
46
|
+
if message.nil? && value == :done || value == :error || value == :aborted
|
47
|
+
start = info[:start]
|
48
|
+
eend = new_info[:end]
|
49
|
+
if start && eend
|
50
|
+
time = eend - start
|
51
|
+
time_str = Misc.format_seconds_short(time)
|
52
|
+
message = Log.color(:time, time_str)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
report_status value, message
|
56
|
+
end
|
40
57
|
if Exception === value
|
41
58
|
begin
|
42
59
|
Marshal.dump(value)
|
@@ -72,15 +89,21 @@ class Step
|
|
72
89
|
|
73
90
|
def report_status(status, message = nil)
|
74
91
|
if message.nil?
|
75
|
-
Log.info Log.color(:status, status, true)
|
92
|
+
Log.info [Log.color(:status, status, true), Log.color(:task, task_name, true), Log.color(:path, path)] * " "
|
76
93
|
else
|
77
|
-
Log.info Log.color(:status, status, true)
|
94
|
+
Log.info [Log.color(:status, status, true), Log.color(:task, task_name, true), message, Log.color(:path, path)] * " "
|
78
95
|
end
|
79
96
|
end
|
80
97
|
|
81
|
-
def log(status, message = nil)
|
98
|
+
def log(status, message = nil, &block)
|
99
|
+
if block_given?
|
100
|
+
time = Misc.exec_time &block
|
101
|
+
time_str = Misc.format_seconds_short time
|
102
|
+
message = message.nil? ? Log.color(:time, time_str) : "#{Log.color :time, time_str} - #{ message }"
|
103
|
+
end
|
104
|
+
|
82
105
|
if message
|
83
|
-
merge_info :status => status, :messages =>
|
106
|
+
merge_info :status => status, :messages => message
|
84
107
|
else
|
85
108
|
merge_info :status => status
|
86
109
|
end
|
@@ -101,4 +124,16 @@ class Step
|
|
101
124
|
def running?
|
102
125
|
! done? && (info[:pid] && Misc.pid_alive?(info[:pid]))
|
103
126
|
end
|
127
|
+
|
128
|
+
def exception
|
129
|
+
info[:exception]
|
130
|
+
end
|
131
|
+
|
132
|
+
def marshal_dump
|
133
|
+
@path
|
134
|
+
end
|
135
|
+
|
136
|
+
def marshal_load(path)
|
137
|
+
Step.new path
|
138
|
+
end
|
104
139
|
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
class Step
|
2
|
+
def progress_bar(msg = "Progress", options = nil)
|
3
|
+
if Hash === msg and options.nil?
|
4
|
+
options = msg
|
5
|
+
msg = nil
|
6
|
+
end
|
7
|
+
options = {} if options.nil?
|
8
|
+
|
9
|
+
max = options[:max]
|
10
|
+
Open.mkdir files_dir
|
11
|
+
Log::ProgressBar.new_bar(max, {:desc => msg, :file => (@exec ? nil : file(:progress))}.merge(options))
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
@@ -5,7 +5,7 @@ class Step
|
|
5
5
|
|
6
6
|
def self.status_color(status)
|
7
7
|
case status.to_sym
|
8
|
-
when :error, :aborted, :
|
8
|
+
when :error, :aborted, :dead, :unsync
|
9
9
|
:red
|
10
10
|
when :streaming, :started
|
11
11
|
:cyan
|
@@ -13,7 +13,7 @@ class Step
|
|
13
13
|
:green
|
14
14
|
when :dependencies, :waiting, :setup
|
15
15
|
:yellow
|
16
|
-
when :notfound, :cleaned
|
16
|
+
when :notfound, :cleaned, :missing
|
17
17
|
:blue
|
18
18
|
else
|
19
19
|
if status.to_s.index ">"
|
@@ -91,6 +91,7 @@ class Step
|
|
91
91
|
info[:task_name] = task
|
92
92
|
path = step.path
|
93
93
|
status = info[:status] || :missing
|
94
|
+
status = :noinfo if status == :missing && Open.exist?(path)
|
94
95
|
status = "remote" if Open.remote?(path) || Open.ssh?(path)
|
95
96
|
name = info[:name] || File.basename(path)
|
96
97
|
status = :unsync if status == :done and not Open.exist?(path)
|
@@ -103,9 +104,9 @@ class Step
|
|
103
104
|
step.dependencies.each do |dep|
|
104
105
|
if dep.input_dependencies.any?
|
105
106
|
dep.input_dependencies.each do |id|
|
106
|
-
input_name, _dep = dep.recursive_inputs.
|
107
|
+
input_name, _dep = dep.recursive_inputs.select{|f,d|
|
107
108
|
d == id || (String === d && d.start_with?(id.files_dir)) || (Array === d && d.include?(id))
|
108
|
-
}.last
|
109
|
+
}.keys.last
|
109
110
|
if input_name
|
110
111
|
input_dependencies[id] ||= []
|
111
112
|
input_dependencies[id] << [dep, input_name]
|
@@ -115,10 +116,10 @@ class Step
|
|
115
116
|
end if step.dependencies
|
116
117
|
|
117
118
|
str = ""
|
118
|
-
str = " " * offset + this_step_msg if ENV["
|
119
|
+
str = " " * offset + this_step_msg if ENV["SCOUT_ORIGINAL_STACK"] == 'true'
|
119
120
|
|
120
121
|
step.dependencies.dup.tap{|l|
|
121
|
-
l.reverse! if ENV["
|
122
|
+
l.reverse! if ENV["SCOUT_ORIGINAL_STACK"] == 'true'
|
122
123
|
}.each do |dep|
|
123
124
|
path = dep.path
|
124
125
|
new = ! seen.include?(path)
|
@@ -141,7 +142,7 @@ class Step
|
|
141
142
|
end
|
142
143
|
end if step.dependencies
|
143
144
|
|
144
|
-
str += (" " * offset) + this_step_msg unless ENV["
|
145
|
+
str += (" " * offset) + this_step_msg unless ENV["SCOUT_ORIGINAL_STACK"] == 'true'
|
145
146
|
|
146
147
|
str
|
147
148
|
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
class Step
|
2
|
+
def abort(exception = nil)
|
3
|
+
while @result && streaming? && stream = self.stream
|
4
|
+
stream.abort(exception)
|
5
|
+
end
|
6
|
+
end
|
7
|
+
|
8
|
+
def recoverable_error?
|
9
|
+
self.error? && ! (ScoutException === self.exception)
|
10
|
+
end
|
11
|
+
|
12
|
+
def updated?
|
13
|
+
return false if self.error? && self.recoverable_error?
|
14
|
+
return true unless ENV["SCOUT_UPDATE"]
|
15
|
+
newer = rec_dependencies.select{|dep| Path.newer?(self.path, dep.path) }
|
16
|
+
newer += input_dependencies.select{|dep| Path.newer?(self.path, dep.path) }
|
17
|
+
|
18
|
+
newer.empty?
|
19
|
+
end
|
20
|
+
|
21
|
+
def clean
|
22
|
+
@take_stream = nil
|
23
|
+
@result = nil
|
24
|
+
@info = nil
|
25
|
+
@info_load_time = nil
|
26
|
+
Open.rm path if Open.exist?(path)
|
27
|
+
Open.rm info_file if Open.exist?(info_file)
|
28
|
+
Open.rm_rf files_dir if Open.exist?(files_dir)
|
29
|
+
end
|
30
|
+
|
31
|
+
def present?
|
32
|
+
Open.exist?(path) &&
|
33
|
+
Open.exist?(info_file) &&
|
34
|
+
Open.exist?(files_dir)
|
35
|
+
end
|
36
|
+
|
37
|
+
|
38
|
+
def recursive_clean
|
39
|
+
dependencies.each do |dep|
|
40
|
+
dep.recursive_clean
|
41
|
+
end
|
42
|
+
clean
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
data/lib/scout/workflow/step.rb
CHANGED
@@ -1,19 +1,23 @@
|
|
1
1
|
require_relative '../path'
|
2
2
|
require_relative '../persist'
|
3
3
|
require_relative 'step/info'
|
4
|
+
require_relative 'step/status'
|
4
5
|
require_relative 'step/load'
|
5
6
|
require_relative 'step/file'
|
6
7
|
require_relative 'step/dependencies'
|
7
8
|
require_relative 'step/provenance'
|
8
9
|
require_relative 'step/config'
|
10
|
+
require_relative 'step/progress'
|
9
11
|
|
10
12
|
class Step
|
11
13
|
|
12
|
-
attr_accessor :path, :inputs, :dependencies, :task, :tee_copies
|
13
|
-
def initialize(path, inputs = nil, dependencies = nil, &task)
|
14
|
+
attr_accessor :path, :inputs, :dependencies, :id, :task, :tee_copies, :non_default_inputs
|
15
|
+
def initialize(path = nil, inputs = nil, dependencies = nil, id = nil, non_default_inputs = nil, &task)
|
14
16
|
@path = path
|
15
17
|
@inputs = inputs
|
16
18
|
@dependencies = dependencies
|
19
|
+
@id = id
|
20
|
+
@non_default_inputs = non_default_inputs
|
17
21
|
@task = task
|
18
22
|
@mutex = Mutex.new
|
19
23
|
@tee_copies = 1
|
@@ -25,7 +29,7 @@ class Step
|
|
25
29
|
|
26
30
|
def inputs
|
27
31
|
@inputs ||= begin
|
28
|
-
if Open.exists?(info_file)
|
32
|
+
if info_file && Open.exists?(info_file)
|
29
33
|
info[:inputs]
|
30
34
|
else
|
31
35
|
[]
|
@@ -54,6 +58,13 @@ class Step
|
|
54
58
|
@name ||= File.basename(@path)
|
55
59
|
end
|
56
60
|
|
61
|
+
def clean_name
|
62
|
+
return @id if @id
|
63
|
+
return info[:clean_name] if info.include? :clean_name
|
64
|
+
return m[1] if m = name.match(/(.*?)(?:_[a-z0-9]{32})?(?:\..*)?/)
|
65
|
+
return name.split(".").first
|
66
|
+
end
|
67
|
+
|
57
68
|
def task_name
|
58
69
|
@task_name ||= @task.name if @task.respond_to?(:name)
|
59
70
|
end
|
@@ -63,24 +74,65 @@ class Step
|
|
63
74
|
end
|
64
75
|
|
65
76
|
def exec
|
77
|
+
|
78
|
+
if inputs
|
79
|
+
if Task === task
|
80
|
+
types = task.inputs.collect{|name,type| type }
|
81
|
+
new_inputs = inputs.zip(types).collect{|input,info|
|
82
|
+
type, desc, default, options = info
|
83
|
+
next input unless Step === input
|
84
|
+
input.join if input.streaming?
|
85
|
+
Task.format_input(input.join.path, type, options)
|
86
|
+
}
|
87
|
+
else
|
88
|
+
new_inputs = inputs.collect{|input|
|
89
|
+
Step === input ? input.load : input
|
90
|
+
}
|
91
|
+
end
|
92
|
+
inputs = new_inputs
|
93
|
+
end
|
94
|
+
|
66
95
|
@result = self.instance_exec(*inputs, &task)
|
67
96
|
end
|
68
97
|
|
98
|
+
def tmp_path
|
99
|
+
@tmp_path ||= begin
|
100
|
+
basename = File.basename(@path)
|
101
|
+
dirname = File.dirname(@path)
|
102
|
+
tmp_path = File.join(dirname, '.' + basename)
|
103
|
+
@path.setup(tmp_path) if Path === @path
|
104
|
+
tmp_path
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
69
108
|
attr_reader :result
|
70
|
-
def run
|
109
|
+
def run(stream = false)
|
71
110
|
return @result || self.load if done?
|
72
111
|
prepare_dependencies
|
73
112
|
run_dependencies
|
74
|
-
@result =
|
113
|
+
@result =
|
75
114
|
begin
|
76
|
-
|
77
|
-
|
78
|
-
:
|
79
|
-
|
115
|
+
Persist.persist(name, type, :path => path, :tee_copies => tee_copies) do
|
116
|
+
clear_info
|
117
|
+
merge_info :status => :start, :start => Time.now,
|
118
|
+
:pid => Process.pid, :pid_hostname => ENV["HOSTNAME"],
|
119
|
+
:inputs => inputs, :type => type,
|
120
|
+
:dependencies => dependencies.collect{|d| d.path }
|
121
|
+
|
122
|
+
|
123
|
+
@result = exec
|
80
124
|
|
81
|
-
|
125
|
+
if @result.nil? && File.exist?(self.tmp_path) && ! File.exist?(self.path)
|
126
|
+
Open.mv self.tmp_path, self.path
|
127
|
+
else
|
128
|
+
@result = @result.stream if @result.respond_to?(:stream)
|
129
|
+
end
|
130
|
+
|
131
|
+
@result
|
132
|
+
end
|
82
133
|
rescue Exception => e
|
83
|
-
merge_info :status => :error, :exception => e
|
134
|
+
merge_info :status => :error, :exception => e, :end => Time.now
|
135
|
+
abort_dependencies
|
84
136
|
raise e
|
85
137
|
ensure
|
86
138
|
if ! (error? || aborted?)
|
@@ -103,6 +155,17 @@ class Step
|
|
103
155
|
end
|
104
156
|
end
|
105
157
|
end
|
158
|
+
|
159
|
+
if stream && ENV["SCOUT_NO_STREAM"].nil?
|
160
|
+
@result
|
161
|
+
else
|
162
|
+
if IO === @result || @result.respond_to?(:stream)
|
163
|
+
join
|
164
|
+
@result = nil
|
165
|
+
self.load
|
166
|
+
else
|
167
|
+
@result
|
168
|
+
end
|
106
169
|
end
|
107
170
|
end
|
108
171
|
|
@@ -111,10 +174,10 @@ class Step
|
|
111
174
|
end
|
112
175
|
|
113
176
|
def streaming?
|
114
|
-
@take_stream || IO === @result || StringIO === @result
|
177
|
+
@take_stream || IO === @result || StringIO === @result
|
115
178
|
end
|
116
179
|
|
117
|
-
def
|
180
|
+
def stream
|
118
181
|
synchronize do
|
119
182
|
if streaming? && ! @result.nil?
|
120
183
|
if @result.next
|
@@ -136,9 +199,24 @@ class Step
|
|
136
199
|
end
|
137
200
|
end
|
138
201
|
|
202
|
+
def consume_all_streams
|
203
|
+
threads = []
|
204
|
+
while @result && streaming? && stream = self.stream
|
205
|
+
threads << Open.consume_stream(stream, true)
|
206
|
+
end
|
207
|
+
threads.each do |t|
|
208
|
+
begin
|
209
|
+
t.join
|
210
|
+
rescue
|
211
|
+
threads.each{|t| t.raise(Aborted); t.join }
|
212
|
+
raise $!
|
213
|
+
end
|
214
|
+
end
|
215
|
+
end
|
216
|
+
|
139
217
|
def join
|
140
|
-
|
141
|
-
|
218
|
+
consume_all_streams
|
219
|
+
self
|
142
220
|
end
|
143
221
|
|
144
222
|
def produce
|
@@ -152,31 +230,24 @@ class Step
|
|
152
230
|
done? ? Persist.load(path, type) : exec
|
153
231
|
end
|
154
232
|
|
155
|
-
def clean
|
156
|
-
@take_stream = nil
|
157
|
-
@result = nil
|
158
|
-
@info = nil
|
159
|
-
@info_load_time = nil
|
160
|
-
Open.rm path if Open.exist?(path)
|
161
|
-
Open.rm info_file if Open.exist?(info_file)
|
162
|
-
Open.rm_rf files_dir if Open.exist?(files_dir)
|
163
|
-
end
|
164
|
-
|
165
|
-
def recursive_clean
|
166
|
-
dependencies.each do |dep|
|
167
|
-
dep.recursive_clean
|
168
|
-
end
|
169
|
-
clean
|
170
|
-
end
|
171
|
-
|
172
233
|
def step(task_name)
|
173
234
|
dependencies.each do |dep|
|
174
235
|
return dep if dep.task_name == task_name
|
236
|
+
rec_dep = dep.step(task_name)
|
237
|
+
return rec_dep if rec_dep
|
175
238
|
end
|
176
239
|
nil
|
177
240
|
end
|
178
241
|
|
242
|
+
def short_path
|
243
|
+
Scout.identify @path
|
244
|
+
end
|
245
|
+
|
179
246
|
def digest_str
|
180
|
-
|
247
|
+
"Step: " + short_path
|
248
|
+
end
|
249
|
+
|
250
|
+
def fingerprint
|
251
|
+
digest_str
|
181
252
|
end
|
182
253
|
end
|
@@ -1,11 +1,10 @@
|
|
1
|
+
require_relative '../../named_array'
|
1
2
|
module Task
|
3
|
+
def self.format_input(value, type, options = {})
|
4
|
+
return value if IO === value || StringIO === value || Step === value
|
2
5
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
value = value.load if Step === value
|
7
|
-
if String === value && ! [:path, :file].include?(type)
|
8
|
-
if Open.exists?(value)
|
6
|
+
if String === value && ! [:path, :file, :folder].include?(type) && (options.nil? || (! options[:noload] || options[:stream]))
|
7
|
+
if Open.exists?(value) && ! Open.directory?(value)
|
9
8
|
Persist.load(value, type)
|
10
9
|
else
|
11
10
|
Persist.deserialize(value, type)
|
@@ -13,7 +12,7 @@ module Task
|
|
13
12
|
else
|
14
13
|
if m = type.to_s.match(/(.*)_array/)
|
15
14
|
if Array === value
|
16
|
-
value.collect{|v| format_input(v, m[1].to_sym, options) }
|
15
|
+
value.collect{|v| self.format_input(v, m[1].to_sym, options) }
|
17
16
|
end
|
18
17
|
else
|
19
18
|
value
|
@@ -34,35 +33,30 @@ module Task
|
|
34
33
|
IndiferentHash.setup(provided_inputs) if Hash === provided_inputs
|
35
34
|
|
36
35
|
input_array = []
|
36
|
+
input_names = []
|
37
37
|
non_default_inputs = []
|
38
38
|
self.inputs.each_with_index do |p,i|
|
39
39
|
name, type, desc, value, options = p
|
40
|
+
input_names << name
|
40
41
|
provided = Hash === provided_inputs ? provided_inputs[name] : provided_inputs[i]
|
41
|
-
provided = format_input(provided, type, options || {})
|
42
|
+
provided = Task.format_input(provided, type, options || {})
|
42
43
|
if ! provided.nil? && provided != value
|
43
|
-
non_default_inputs << name
|
44
|
+
non_default_inputs << name.to_sym
|
44
45
|
input_array << provided
|
45
46
|
else
|
46
47
|
input_array << value
|
47
48
|
end
|
48
49
|
end
|
49
50
|
|
51
|
+
NamedArray.setup(input_array, input_names)
|
52
|
+
|
50
53
|
[input_array, non_default_inputs]
|
51
54
|
end
|
52
55
|
|
53
|
-
def digest_inputs(provided_inputs = {})
|
54
|
-
input_array, non_default_inputs = assign_inputs(provided_inputs)
|
55
|
-
if Array === provided_inputs
|
56
|
-
Misc.digest(input_array)
|
57
|
-
else
|
58
|
-
Misc.digest(input_array)
|
59
|
-
end
|
60
|
-
end
|
61
|
-
|
62
56
|
def process_inputs(provided_inputs = {})
|
63
57
|
input_array, non_default_inputs = assign_inputs provided_inputs
|
64
|
-
|
65
|
-
[input_array, non_default_inputs,
|
58
|
+
digest_str = Misc.digest_str(input_array)
|
59
|
+
[input_array, non_default_inputs, digest_str]
|
66
60
|
end
|
67
61
|
|
68
62
|
def save_file_input(orig_file, directory)
|