scout-gear 10.3.0 → 10.6.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (146) hide show
  1. checksums.yaml +4 -4
  2. data/.vimproject +100 -657
  3. data/Rakefile +1 -0
  4. data/VERSION +1 -1
  5. data/bin/scout +1 -3
  6. data/lib/scout/association/fields.rb +170 -0
  7. data/lib/scout/association/index.rb +229 -0
  8. data/lib/scout/association/item.rb +227 -0
  9. data/lib/scout/association/util.rb +7 -0
  10. data/lib/scout/association.rb +100 -0
  11. data/lib/scout/entity/format.rb +62 -0
  12. data/lib/scout/entity/identifiers.rb +111 -0
  13. data/lib/scout/entity/object.rb +20 -0
  14. data/lib/scout/entity/property.rb +165 -0
  15. data/lib/scout/entity.rb +40 -0
  16. data/lib/scout/offsite/step.rb +2 -2
  17. data/lib/scout/{tsv/persist → persist/engine}/fix_width_table.rb +25 -33
  18. data/lib/scout/persist/engine/packed_index.rb +100 -0
  19. data/lib/scout/persist/engine/sharder.rb +219 -0
  20. data/lib/scout/{tsv/persist → persist/engine}/tkrzw.rb +0 -17
  21. data/lib/scout/{tsv/persist → persist/engine}/tokyocabinet.rb +55 -31
  22. data/lib/scout/persist/engine.rb +4 -0
  23. data/lib/scout/{tsv/persist/adapter.rb → persist/tsv/adapter/base.rb} +80 -51
  24. data/lib/scout/persist/tsv/adapter/fix_width_table.rb +106 -0
  25. data/lib/scout/persist/tsv/adapter/packed_index.rb +95 -0
  26. data/lib/scout/persist/tsv/adapter/sharder.rb +54 -0
  27. data/lib/scout/persist/tsv/adapter/tkrzw.rb +18 -0
  28. data/lib/scout/persist/tsv/adapter/tokyocabinet.rb +65 -0
  29. data/lib/scout/persist/tsv/adapter.rb +6 -0
  30. data/lib/scout/{tsv/persist → persist/tsv}/serialize.rb +5 -0
  31. data/lib/scout/persist/tsv.rb +107 -0
  32. data/lib/scout/tsv/annotation/repo.rb +83 -0
  33. data/lib/scout/tsv/annotation.rb +169 -0
  34. data/lib/scout/tsv/attach.rb +104 -20
  35. data/lib/scout/tsv/change_id/translate.rb +148 -0
  36. data/lib/scout/tsv/change_id.rb +6 -3
  37. data/lib/scout/tsv/csv.rb +85 -0
  38. data/lib/scout/tsv/dumper.rb +113 -25
  39. data/lib/scout/tsv/entity.rb +5 -0
  40. data/lib/scout/tsv/index.rb +89 -37
  41. data/lib/scout/tsv/open.rb +21 -8
  42. data/lib/scout/tsv/parser.rb +156 -91
  43. data/lib/scout/tsv/path.rb +7 -2
  44. data/lib/scout/tsv/stream.rb +48 -6
  45. data/lib/scout/tsv/transformer.rb +25 -3
  46. data/lib/scout/tsv/traverse.rb +26 -18
  47. data/lib/scout/tsv/util/process.rb +8 -1
  48. data/lib/scout/tsv/util/reorder.rb +25 -15
  49. data/lib/scout/tsv/util/select.rb +9 -1
  50. data/lib/scout/tsv/util/sort.rb +90 -2
  51. data/lib/scout/tsv/util/unzip.rb +56 -0
  52. data/lib/scout/tsv/util.rb +52 -5
  53. data/lib/scout/tsv.rb +85 -19
  54. data/lib/scout/work_queue/socket.rb +8 -0
  55. data/lib/scout/work_queue/worker.rb +22 -5
  56. data/lib/scout/work_queue.rb +38 -24
  57. data/lib/scout/workflow/definition.rb +19 -11
  58. data/lib/scout/workflow/deployment/orchestrator.rb +20 -3
  59. data/lib/scout/workflow/deployment/trace.rb +205 -0
  60. data/lib/scout/workflow/deployment.rb +1 -0
  61. data/lib/scout/workflow/documentation.rb +1 -1
  62. data/lib/scout/workflow/step/archive.rb +42 -0
  63. data/lib/scout/workflow/step/children.rb +51 -0
  64. data/lib/scout/workflow/step/config.rb +1 -1
  65. data/lib/scout/workflow/step/dependencies.rb +24 -7
  66. data/lib/scout/workflow/step/file.rb +19 -0
  67. data/lib/scout/workflow/step/info.rb +37 -9
  68. data/lib/scout/workflow/step/progress.rb +11 -2
  69. data/lib/scout/workflow/step/status.rb +8 -1
  70. data/lib/scout/workflow/step.rb +80 -25
  71. data/lib/scout/workflow/task/dependencies.rb +4 -1
  72. data/lib/scout/workflow/task/inputs.rb +91 -41
  73. data/lib/scout/workflow/task.rb +54 -57
  74. data/lib/scout/workflow/usage.rb +1 -1
  75. data/lib/scout/workflow/util.rb +4 -0
  76. data/lib/scout/workflow.rb +110 -13
  77. data/lib/scout-gear.rb +2 -0
  78. data/lib/scout.rb +0 -1
  79. data/scout-gear.gemspec +80 -23
  80. data/scout_commands/rbbt +2 -0
  81. data/test/data/person/brothers +4 -0
  82. data/test/data/person/identifiers +10 -0
  83. data/test/data/person/marriages +3 -0
  84. data/test/data/person/parents +6 -0
  85. data/test/scout/association/test_fields.rb +105 -0
  86. data/test/scout/association/test_index.rb +70 -0
  87. data/test/scout/association/test_item.rb +21 -0
  88. data/test/scout/entity/test_format.rb +19 -0
  89. data/test/scout/entity/test_identifiers.rb +58 -0
  90. data/test/scout/entity/test_object.rb +0 -0
  91. data/test/scout/entity/test_property.rb +345 -0
  92. data/test/scout/{tsv/persist → persist/engine}/test_fix_width_table.rb +0 -1
  93. data/test/scout/persist/engine/test_packed_index.rb +99 -0
  94. data/test/scout/persist/engine/test_sharder.rb +31 -0
  95. data/test/scout/persist/engine/test_tkrzw.rb +0 -0
  96. data/test/scout/persist/engine/test_tokyocabinet.rb +17 -0
  97. data/test/scout/persist/test_tsv.rb +146 -0
  98. data/test/scout/{tsv/persist/test_adapter.rb → persist/tsv/adapter/test_base.rb} +3 -4
  99. data/test/scout/persist/tsv/adapter/test_fix_width_table.rb +46 -0
  100. data/test/scout/persist/tsv/adapter/test_packed_index.rb +37 -0
  101. data/test/scout/persist/tsv/adapter/test_serialize.rb +0 -0
  102. data/test/scout/persist/tsv/adapter/test_sharder.rb +290 -0
  103. data/test/scout/persist/tsv/adapter/test_tkrzw.rb +126 -0
  104. data/test/scout/persist/tsv/adapter/test_tokyocabinet.rb +282 -0
  105. data/test/scout/persist/tsv/test_serialize.rb +12 -0
  106. data/test/scout/test_association.rb +51 -0
  107. data/test/scout/test_entity.rb +40 -0
  108. data/test/scout/test_tsv.rb +63 -4
  109. data/test/scout/test_work_queue.rb +3 -2
  110. data/test/scout/test_workflow.rb +16 -15
  111. data/test/scout/tsv/annotation/test_repo.rb +150 -0
  112. data/test/scout/tsv/change_id/test_translate.rb +178 -0
  113. data/test/scout/tsv/test_annotation.rb +52 -0
  114. data/test/scout/tsv/test_attach.rb +226 -1
  115. data/test/scout/tsv/test_change_id.rb +25 -0
  116. data/test/scout/tsv/test_csv.rb +50 -0
  117. data/test/scout/tsv/test_dumper.rb +38 -0
  118. data/test/scout/tsv/test_entity.rb +0 -0
  119. data/test/scout/tsv/test_index.rb +82 -0
  120. data/test/scout/tsv/test_open.rb +44 -0
  121. data/test/scout/tsv/test_parser.rb +70 -0
  122. data/test/scout/tsv/test_stream.rb +22 -0
  123. data/test/scout/tsv/test_transformer.rb +39 -3
  124. data/test/scout/tsv/test_traverse.rb +78 -0
  125. data/test/scout/tsv/util/test_process.rb +36 -0
  126. data/test/scout/tsv/util/test_reorder.rb +67 -0
  127. data/test/scout/tsv/util/test_sort.rb +28 -1
  128. data/test/scout/tsv/util/test_unzip.rb +32 -0
  129. data/test/scout/work_queue/test_socket.rb +4 -1
  130. data/test/scout/workflow/deployment/test_orchestrator.rb +17 -26
  131. data/test/scout/workflow/deployment/test_trace.rb +25 -0
  132. data/test/scout/workflow/step/test_archive.rb +28 -0
  133. data/test/scout/workflow/step/test_children.rb +25 -0
  134. data/test/scout/workflow/step/test_info.rb +16 -0
  135. data/test/scout/workflow/task/test_dependencies.rb +16 -16
  136. data/test/scout/workflow/task/test_inputs.rb +45 -1
  137. data/test/scout/workflow/test_definition.rb +52 -0
  138. data/test/scout/workflow/test_step.rb +57 -0
  139. data/test/scout/workflow/test_task.rb +26 -1
  140. data/test/scout/workflow/test_usage.rb +4 -4
  141. data/test/test_helper.rb +23 -1
  142. metadata +71 -14
  143. data/lib/scout/tsv/persist.rb +0 -27
  144. data/test/scout/tsv/persist/test_tkrzw.rb +0 -123
  145. data/test/scout/tsv/persist/test_tokyocabinet.rb +0 -120
  146. data/test/scout/tsv/test_persist.rb +0 -45
@@ -6,18 +6,29 @@ require 'timeout'
6
6
  class WorkQueue
7
7
  attr_accessor :workers, :worker_proc, :callback
8
8
 
9
+ def new_worker
10
+ worker = Worker.new
11
+ worker.queue_id = queue_id
12
+ worker
13
+ end
14
+
9
15
  def initialize(workers = 0, &block)
10
16
  workers = workers.to_i if String === workers
11
17
  @input = WorkQueue::Socket.new
12
18
  @output = WorkQueue::Socket.new
13
- @workers = workers.times.collect{ Worker.new }
19
+ @workers = workers.times.collect{ new_worker }
14
20
  @worker_proc = block
15
21
  @worker_mutex = Mutex.new
16
22
  @removed_workers = []
23
+ Log.medium "Starting queue #{queue_id} with workers: #{Log.fingerprint @workers.collect{|w| w.worker_short_id }} and sockets #{@input.socket_id} and #{@output.socket_id}"
24
+ end
25
+
26
+ def queue_id
27
+ [object_id, Process.pid] * "@"
17
28
  end
18
29
 
19
30
  def add_worker(&block)
20
- worker = Worker.new
31
+ worker = new_worker
21
32
  @worker_mutex.synchronize do
22
33
  @workers.push(worker)
23
34
  if block_given?
@@ -41,9 +52,11 @@ class WorkQueue
41
52
  @worker_mutex.synchronize do
42
53
  worker = @workers.index{|w| w.pid == pid}
43
54
  if worker
44
- Log.low "Removed worker #{pid}"
45
55
  @workers.delete_at(worker)
46
56
  @removed_workers << pid
57
+ Log.low "Removed worker #{pid} from #{queue_id}"
58
+ else
59
+ Log.medium "Worker #{pid} not from #{queue_id}"
47
60
  end
48
61
  end
49
62
  end
@@ -56,14 +69,14 @@ class WorkQueue
56
69
  @reader = Thread.new(Thread.current) do |parent|
57
70
  begin
58
71
  Thread.current.report_on_exception = false
59
- Thread.current["name"] = "Output reader #{Process.pid}"
72
+ Thread.current["name"] = "Output reader #{queue_id}"
60
73
  @done_workers ||= []
61
74
  while true
62
75
  obj = @output.read
63
76
  if DoneProcessing === obj
64
77
 
65
78
  done = @worker_mutex.synchronize do
66
- Log.low "Worker #{obj.pid} done"
79
+ Log.low "Worker #{obj.pid} from #{queue_id} done"
67
80
  @done_workers << obj.pid
68
81
  @closed && @done_workers.length == @removed_workers.length + @workers.length
69
82
  end
@@ -78,12 +91,12 @@ class WorkQueue
78
91
  rescue DoneProcessing
79
92
  rescue Aborted
80
93
  rescue WorkerException
81
- Log.error "Exception in worker #{obj.pid} in queue #{Process.pid}: #{obj.worker_exception.message}"
94
+ Log.error "Exception in worker #{obj.pid} in queue #{queue_id}: #{obj.worker_exception.message}"
82
95
  self.abort
83
96
  @input.abort obj.worker_exception
84
97
  raise obj.worker_exception
85
98
  rescue
86
- Log.error "Exception processing output in queue #{Process.pid}: #{$!.message}"
99
+ Log.error "Exception processing output in queue #{queue_id}: #{$!.message}"
87
100
  self.abort
88
101
  raise $!
89
102
  end
@@ -95,25 +108,19 @@ class WorkQueue
95
108
 
96
109
  @waiter = Thread.new do
97
110
  Thread.current.report_on_exception = false
98
- Thread.current["name"] = "Worker waiter #{Process.pid}"
111
+ Thread.current["name"] = "Worker waiter #{queue_id}"
99
112
  while true
100
113
  break if @worker_mutex.synchronize{ @workers.empty? }
101
- begin
102
- Timeout.timeout(1) do
103
- begin
104
- pid, status = Process.wait2
105
- remove_worker(pid) if pid
106
- rescue Exception
107
- Log.exception $!
108
- end
109
- end
110
- rescue Timeout::Error
111
- pids = @worker_mutex.synchronize{ @workers.collect{|w| w.pid } }
112
- pids.each do |p|
113
- pid, status = Process.wait2 p, Process::WNOHANG
114
+ threads = @workers.collect do |w|
115
+ t = Thread.new do
116
+ Thread.current["name"] = "Worker waiter #{queue_id} worker #{w.pid}"
117
+ pid, status = Process.wait2 w.pid
114
118
  remove_worker(pid) if pid
115
119
  end
120
+ Thread.pass until t["name"]
121
+ t
116
122
  end
123
+ threads.each do |t| t.join end
117
124
  end
118
125
  end
119
126
 
@@ -131,16 +138,23 @@ class WorkQueue
131
138
  end
132
139
 
133
140
  def abort
134
- Log.low "Aborting #{@workers.length} workers in queue #{Process.pid}"
141
+ Log.low "Aborting #{@workers.length} workers in queue #{queue_id}"
135
142
  @worker_mutex.synchronize do
136
- @workers.each{|w| w.abort }
143
+ @workers.each do |w|
144
+ ScoutSemaphore.post_semaphore(@output.write_sem) if @output
145
+ ScoutSemaphore.post_semaphore(@input.read_sem) if @input
146
+ w.abort
147
+ end
137
148
  end
138
149
  end
139
150
 
140
151
  def close
141
152
  @closed = true
142
153
  @worker_mutex.synchronize{ @workers.length }.times do
143
- @input.write DoneProcessing.new() unless @input.closed_write?
154
+ begin
155
+ @input.write DoneProcessing.new() unless @input.closed_write?
156
+ rescue IOError
157
+ end
144
158
  end
145
159
  end
146
160
 
@@ -1,8 +1,8 @@
1
- require 'scout/meta_extension'
1
+ require 'scout/annotation'
2
2
 
3
3
  module Workflow
4
- extend MetaExtension
5
- extension_attr :name, :tasks, :helpers
4
+ extend Annotation
5
+ annotation :name, :tasks, :helpers
6
6
 
7
7
  class << self
8
8
  attr_accessor :directory
@@ -103,7 +103,14 @@ module Workflow
103
103
  end
104
104
 
105
105
  def task(name_and_type, &block)
106
- name, type = name_and_type.collect.first
106
+ case name_and_type
107
+ when Hash
108
+ name, type = name_and_type.collect.first
109
+ when Symbol
110
+ name, type = [name_and_type, :binary]
111
+ when String
112
+ name, type = [name_and_type, :binary]
113
+ end
107
114
  type = type.to_sym if String === type
108
115
  name = name.to_sym if String === name
109
116
  @tasks ||= IndiferentHash.setup({})
@@ -136,10 +143,11 @@ module Workflow
136
143
  def task_alias(name, workflow, oname, *rest, &block)
137
144
  dep(workflow, oname, *rest, &block)
138
145
  extension :dep_task unless @extension
139
- task_proc = workflow.tasks[oname]
140
- raise "Task #{oname} not found" if task_proc.nil?
141
- returns task_proc.returns if @returns.nil?
142
- type = task_proc.type
146
+ task_proc = workflow.tasks[oname] if workflow.tasks
147
+ if task_proc
148
+ returns task_proc.returns if @returns.nil?
149
+ type = task_proc.type
150
+ end
143
151
  task name => type do
144
152
  raise RbbtException, "dep_task does not have any dependencies" if dependencies.empty?
145
153
  Step.wait_for_jobs dependencies.select{|d| d.streaming? }
@@ -147,18 +155,18 @@ module Workflow
147
155
  dep.join
148
156
  raise dep.get_exception if dep.error?
149
157
  raise Aborted, "Aborted dependency #{dep.path}" if dep.aborted?
150
- set_info :result_type, dep.info[:result_type]
158
+ set_info :type, dep.info[:type]
151
159
  forget = config :forget_dep_tasks, "forget_dep_tasks", :default => FORGET_DEP_TASKS
152
160
  if forget
153
161
  remove = config :remove_dep_tasks, "remove_dep_tasks", :default => REMOVE_DEP_TASKS
154
162
 
155
163
  self.archive_deps
156
- self.copy_files_dir
164
+ self.copy_linked_files_dir
157
165
  self.dependencies = self.dependencies - [dep]
158
166
  Open.rm_rf self.files_dir if Open.exist? self.files_dir
159
167
  FileUtils.cp_r dep.files_dir, self.files_dir if Open.exist?(dep.files_dir)
160
168
 
161
- if dep.overriden || ! Workflow.job_path?(dep.path)
169
+ if dep.overriden?
162
170
  Open.link dep.path, self.tmp_path
163
171
  else
164
172
  Open.ln_h dep.path, self.tmp_path
@@ -34,7 +34,7 @@ module Workflow
34
34
  end
35
35
 
36
36
  def self.job_rules(rules, job)
37
- workflow = job.workflow.to_s
37
+ workflow = job.workflow.name
38
38
  task_name = job.task_name.to_s
39
39
  defaults = rules["defaults"] || {}
40
40
 
@@ -168,7 +168,7 @@ module Workflow
168
168
 
169
169
  dep_path = dep.path
170
170
  parents = all_jobs.select do |parent|
171
- paths = parent.info[:dependencies].nil? ? parent.dependencies.collect{|d| d.path } : parent.info[:dependencies].collect{|d| d.last }
171
+ paths = parent.info[:dependencies].nil? ? parent.dependencies.collect{|d| d.path } : parent.info[:dependencies].collect{|d| Array === d ? d.last : d }
172
172
  paths.include? dep_path
173
173
  end
174
174
 
@@ -177,7 +177,7 @@ module Workflow
177
177
  parents.each do |parent|
178
178
  Log.high "Erasing #{dep.path} from #{parent.path}"
179
179
  parent.archive_deps
180
- parent.copy_files_dir
180
+ parent.copy_linked_files_dir
181
181
  parent.dependencies = parent.dependencies - [dep]
182
182
  end
183
183
  dep.clean
@@ -251,4 +251,21 @@ module Workflow
251
251
  end
252
252
  end
253
253
  end
254
+
255
+ def self.produce_dependencies(jobs, tasks, produce_cpus = Etc.nprocessors)
256
+ jobs = [jobs] unless Array === jobs
257
+ produce_list = []
258
+ jobs.each do |job|
259
+ next if job.done? || job.running?
260
+ job.rec_dependencies.each do |job|
261
+ produce_list << job if tasks.include?(job.task_name) ||
262
+ tasks.include?(job.task_name.to_s) ||
263
+ tasks.include?(job.full_task_name)
264
+ end
265
+ end
266
+
267
+ orchestrator = Orchestrator.new 0.1, cpus: produce_cpus.to_i
268
+ orchestrator.process({}, produce_list)
269
+ produce_list
270
+ end
254
271
  end
@@ -0,0 +1,205 @@
1
+ require 'scout/tsv'
2
+
3
+ module Workflow
4
+ def self.trace_job_times(jobs, fix_gap = false, report_keys = nil)
5
+ data = TSV.setup({}, "Job~Code,Workflow,Task,Start,End#:type=:list")
6
+ min_start = nil
7
+ max_done = nil
8
+ jobs.each do |job|
9
+ next unless job.info[:end]
10
+ started = job.info[:start]
11
+ ddone = job.info[:end]
12
+
13
+ started = Time.parse started if String === started
14
+ ddone = Time.parse ddone if String === ddone
15
+
16
+ code = [job.workflow.name, job.task_name].compact.collect{|s| s.to_s} * " · "
17
+ code = job.name + " - " + code
18
+
19
+ data[job.path] = [code, job.workflow.name, job.task_name, started, ddone]
20
+ if min_start.nil?
21
+ min_start = started
22
+ else
23
+ min_start = started if started < min_start
24
+ end
25
+
26
+ if max_done.nil?
27
+ max_done = ddone
28
+ else
29
+ max_done = ddone if ddone > max_done
30
+ end
31
+ end
32
+
33
+ data.add_field "Start.second" do |k,value|
34
+ value["Start"] - min_start
35
+ end
36
+
37
+ data.add_field "End.second" do |k,value|
38
+ value["End"] - min_start
39
+ end
40
+
41
+ if fix_gap
42
+ ranges = []
43
+ data.through do |k,values|
44
+ start, eend = values.values_at "Start.second", "End.second"
45
+
46
+ ranges << (start..eend)
47
+ end
48
+
49
+ gaps = {}
50
+ last = nil
51
+ Misc.collapse_ranges(ranges).each do |range|
52
+ start = range.begin
53
+ eend = range.end
54
+ if last
55
+ gaps[last] = start - last
56
+ end
57
+ last = eend
58
+ end
59
+
60
+ data.process "End.second" do |value,k,values|
61
+ gap = Misc.sum(gaps.select{|pos,size| pos < values["Start.second"]}.collect{|pos,size| size})
62
+ value - gap
63
+ end
64
+
65
+ data.process "Start.second" do |value,k,values|
66
+ gap = Misc.sum(gaps.select{|pos,size| pos < values["Start.second"]}.collect{|pos,size| size})
67
+ value - gap
68
+ end
69
+
70
+ total_gaps = Misc.sum(gaps.collect{|k,v| v})
71
+ Log.info "Total gaps: #{total_gaps} seconds"
72
+ end
73
+
74
+ if report_keys && report_keys.any?
75
+ job_keys = {}
76
+ jobs.each do |job|
77
+ job_info = IndiferentHash.setup(job.info)
78
+ report_keys.each do |key|
79
+ job_keys[job.path] ||= {}
80
+ job_keys[job.path][key] = job_info[key]
81
+ end
82
+ end
83
+ report_keys.each do |key|
84
+ data.add_field Misc.humanize(key) do |p,values|
85
+ job_keys[p][key]
86
+ end
87
+ end
88
+ end
89
+
90
+ start = data.column("Start.second").values.flatten.collect{|v| v.to_f}.min
91
+ eend = data.column("End.second").values.flatten.collect{|v| v.to_f}.max
92
+ total = eend - start unless eend.nil? || start.nil?
93
+ Log.info "Total time elapsed: #{total} seconds" if total
94
+
95
+ if report_keys && report_keys.any?
96
+ job_keys = {}
97
+ report_keys.each do |key|
98
+ jobs.each do |job|
99
+ job_keys[job.path] ||= {}
100
+ job_keys[job.path][key] = job.info[key]
101
+ end
102
+ end
103
+ report_keys.each do |key|
104
+ data.add_field Misc.humanize(key) do |p,values|
105
+ job_keys[p][key]
106
+ end
107
+ end
108
+ end
109
+
110
+ data
111
+ end
112
+
113
+ def self.trace_job_summary(jobs, report_keys = [])
114
+ tasks_info = {}
115
+
116
+ report_keys = report_keys.collect{|k| k.to_s}
117
+
118
+ jobs.each do |dep|
119
+ next unless dep.info[:end]
120
+ task = [dep.workflow.name, dep.task_name].compact.collect{|s| s.to_s} * "#"
121
+ info = tasks_info[task] ||= IndiferentHash.setup({})
122
+ dep_info = IndiferentHash.setup(dep.info)
123
+
124
+ ddone = dep_info[:end]
125
+ started = dep_info[:start]
126
+
127
+ started = Time.parse started if String === started
128
+ ddone = Time.parse ddone if String === ddone
129
+
130
+ time = ddone - started
131
+ info[:time] ||= []
132
+ info[:time] << time
133
+
134
+ report_keys.each do |key|
135
+ info[key] = dep_info[key]
136
+ end
137
+
138
+ dep.info[:config_keys].each do |kinfo|
139
+ key, value, tokens = kinfo
140
+
141
+ info[key.to_s] = value if report_keys.include? key.to_s
142
+ end if dep.info[:config_keys]
143
+ end
144
+
145
+ summary = TSV.setup({}, "Task~Calls,Avg. Time,Total Time#:type=:list")
146
+
147
+ tasks_info.each do |task, info|
148
+ time_lists = info[:time]
149
+ avg_time = Misc.mean(time_lists).to_i
150
+ total_time = Misc.sum(time_lists).to_i
151
+ calls = time_lists.length
152
+ summary[task] = [calls, avg_time, total_time]
153
+ end
154
+
155
+ report_keys.each do |key|
156
+ summary.add_field Misc.humanize(key) do |task|
157
+ tasks_info[task][key]
158
+ end
159
+ end if Array === report_keys && report_keys.any?
160
+
161
+ summary
162
+ end
163
+
164
+ def self.trace(seed_jobs, options = {})
165
+ jobs = []
166
+ seed_jobs.each do |step|
167
+ jobs += step.rec_dependencies + [step]
168
+ step.info[:archived_info].each do |path,ainfo|
169
+ next unless Hash === ainfo
170
+ archived_step = Step.new path
171
+
172
+ archived_step.define_singleton_method :info do
173
+ ainfo
174
+ end
175
+
176
+ jobs << archived_step
177
+ end if step.info[:archived_info]
178
+
179
+ end
180
+
181
+ jobs = jobs.uniq.sort_by{|job| [job, job.info]; t = job.info[:started] || Open.mtime(job.path) || Time.now; Time === t ? t : Time.parse(t) }
182
+
183
+ report_keys = options[:report_keys] || ""
184
+ report_keys = report_keys.split(/,\s*/) if String === report_keys
185
+
186
+ data = trace_job_times(jobs, options[:fix_gap], report_keys)
187
+
188
+ summary = trace_job_summary(jobs, report_keys)
189
+
190
+
191
+ raise "No jobs to process" if data.size == 0
192
+
193
+ size, width, height = options.values_at :size, :width, :height
194
+
195
+ size = 800 if size.nil?
196
+ width = size.to_i * 2 if width.nil?
197
+ height = size if height.nil?
198
+
199
+ if options[:plot_data]
200
+ data
201
+ else
202
+ summary
203
+ end
204
+ end
205
+ end
@@ -1 +1,2 @@
1
1
  require_relative 'deployment/orchestrator'
2
+ require_relative 'deployment/trace'
@@ -31,7 +31,7 @@ module Workflow
31
31
  title = doc_parse_first_line doc
32
32
  description, task_info = doc_parse_up_to doc, /^# Tasks/i
33
33
  task_description, tasks = doc_parse_up_to task_info, /^##/, true
34
- tasks = doc_parse_chunks tasks, /## (.*)/
34
+ tasks = doc_parse_chunks tasks, /^## (.*)/
35
35
  {:title => title.strip, :description => description.strip, :task_description => task_description.strip, :tasks => tasks}
36
36
  end
37
37
 
@@ -0,0 +1,42 @@
1
+ class Step
2
+ def archived_info
3
+ return {} unless Open.exists?(info_file)
4
+ info[:archived_info] || {}
5
+ end
6
+
7
+ def archived_inputs
8
+ return [] unless info[:archived_dependencies]
9
+ archived_info = self.archived_info
10
+
11
+ all_inputs = NamedArray.setup([],[])
12
+ deps = info[:archived_dependencies].dup
13
+ seen = []
14
+ while path = deps.pop
15
+ dep_info = archived_info[path]
16
+ if Hash === dep_info
17
+ dep_inputs = dep_info[:inputs]
18
+ NamedArray.setup(dep_inputs, dep_info[:input_names])
19
+ all_inputs.concat(dep_inputs)
20
+ deps.concat(dep_info[:dependencies].collect{|p| p.last } - seen) if dep_info[:dependencies]
21
+ deps.concat(dep_info[:archived_dependencies].collect{|p| p.last } - seen) if dep_info[:archived_dependencies]
22
+ end
23
+ seen << path
24
+ end
25
+
26
+ all_inputs
27
+ end
28
+
29
+ def archive_deps(jobs = nil)
30
+ jobs = dependencies if jobs.nil?
31
+
32
+ archived_info = jobs.inject({}) do |acc,dep|
33
+ next acc unless Open.exists?(dep.info_file)
34
+ acc[dep.path] = dep.info
35
+ acc.merge!(dep.archived_info)
36
+ acc
37
+ end
38
+
39
+ self.set_info :archived_info, archived_info
40
+ self.set_info :archived_dependencies, info[:dependencies]
41
+ end
42
+ end
@@ -0,0 +1,51 @@
1
+ class Step
2
+ def child(&block)
3
+ child_pid = Process.fork &block
4
+ children_pids = info[:children_pids]
5
+ if children_pids.nil?
6
+ children_pids = [child_pid]
7
+ else
8
+ children_pids << child_pid
9
+ end
10
+ set_info :children_pids, children_pids
11
+ child_pid
12
+ end
13
+
14
+ def cmd(*args)
15
+ all_args = *args
16
+
17
+ all_args << {} unless Hash === all_args.last
18
+
19
+ level = all_args.last[:log] || 0
20
+ level = 0 if TrueClass === level
21
+ level = 10 if FalseClass === level
22
+ level = level.to_i
23
+
24
+ all_args.last[:log] = true
25
+ all_args.last[:pipe] = true
26
+
27
+ io = CMD.cmd(*all_args)
28
+ child_pid = io.pids.first
29
+
30
+ children_pids = info[:children_pids]
31
+ if children_pids.nil?
32
+ children_pids = [child_pid]
33
+ else
34
+ children_pids << child_pid
35
+ end
36
+ set_info :children_pids, children_pids
37
+
38
+ while c = io.getc
39
+ STDERR << c if Log.severity <= level
40
+ if c == "\n"
41
+ Log.logn "STDOUT [#{child_pid}]: ", level
42
+ end
43
+ end
44
+
45
+ io.join
46
+
47
+ nil
48
+ end
49
+
50
+ end
51
+
@@ -7,7 +7,7 @@ class Step
7
7
 
8
8
  new_tokens = []
9
9
  if workflow
10
- workflow_name = workflow.to_s
10
+ workflow_name = workflow.name
11
11
  new_tokens << ("workflow:" << workflow_name)
12
12
  new_tokens << ("task:" << workflow_name << "#" << task_name.to_s)
13
13
  end
@@ -33,37 +33,51 @@ class Step
33
33
 
34
34
  def prepare_dependencies
35
35
  inverse_dep = {}
36
- dependencies.each{|dep|
36
+
37
+ dependencies.each do |dep|
37
38
  if dep.present? && ! dep.updated?
38
39
  Log.debug "Clean outdated #{dep.path}"
39
40
  dep.clean
40
41
  end
42
+
41
43
  next if dep.done?
44
+
42
45
  if dep.dependencies
43
46
  dep.dependencies.each do |d|
44
47
  inverse_dep[d] ||= []
45
48
  inverse_dep[d] << dep
46
49
  end
47
50
  end
51
+
48
52
  input_dependencies.each do |d|
49
53
  inverse_dep[d] ||= []
50
54
  inverse_dep[d] << dep
51
55
  end
52
- }
56
+ end if dependencies
57
+
53
58
  inverse_dep.each do |dep,list|
54
59
  dep.tee_copies = list.length
55
60
  end
56
61
  end
57
62
 
63
+ def all_dependencies
64
+ @all_dependencies ||= begin
65
+ all_dependencies = []
66
+ all_dependencies += dependencies if dependencies
67
+ all_dependencies += input_dependencies if input_dependencies
68
+ all_dependencies
69
+ end
70
+ end
71
+
58
72
  def run_dependencies
59
- dependencies.each{|dep|
73
+ all_dependencies.each do |dep|
60
74
  next if dep.running? || dep.done?
61
75
  compute_options = compute[dep.path] if compute
62
76
  compute_options = [] if compute_options.nil?
63
77
 
64
78
  stream = compute_options.include?(:stream)
65
79
  stream = true unless ENV["SCOUT_EXPLICIT_STREAMING"] == 'true'
66
- stream = false if compute_options.include?(:produce)
80
+ stream = :no_load if compute_options.include?(:produce)
67
81
 
68
82
  begin
69
83
  dep.run(stream)
@@ -74,17 +88,20 @@ class Step
74
88
  raise $!
75
89
  end
76
90
  end
77
- }
91
+ end
78
92
  end
79
93
 
80
94
  def abort_dependencies
81
- dependencies.each{|dep| dep.abort if dep.running? }
95
+ all_dependencies.each{|dep| dep.abort if dep.running? }
82
96
  end
83
97
 
84
98
  def self.wait_for_jobs(jobs)
85
99
  threads = []
86
100
  jobs.each do |job|
87
- threads << Thread.new{ job.join }
101
+ threads << Thread.new do
102
+ Thread.current.report_on_exception = false
103
+ job.join
104
+ end
88
105
  end
89
106
  threads.each do |t|
90
107
  t.join
@@ -19,7 +19,26 @@ class Step
19
19
  dir[file]
20
20
  end
21
21
 
22
+ def files
23
+ Dir.glob(File.join(files_dir, '**', '*')).reject{|path| File.directory? path }.collect do |path|
24
+ Misc.path_relative_to(files_dir, path)
25
+ end
26
+ end
27
+
22
28
  def bundle_files
23
29
  [path, info_file, Dir.glob(File.join(files_dir,"**/*"))].flatten.select{|f| Open.exist?(f) }
24
30
  end
31
+
32
+ def copy_linked_files_dir
33
+ if File.symlink?(self.files_dir)
34
+ begin
35
+ realpath = Open.realpath(self.files_dir)
36
+ Open.rm self.files_dir
37
+ Open.cp realpath, self.files_dir
38
+ rescue
39
+ Log.warn "Copy files_dir for #{self.workflow_short_path} failed: " + $!.message
40
+ end
41
+ end
42
+ end
43
+
25
44
  end