rbbt-util 5.26.157 → 5.26.158

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,148 @@
1
+ class RemoteStep
2
+ module REST
3
+
4
+ def get
5
+ params ||= {}
6
+ params = params.merge(:_format => [:string, :boolean, :tsv, :annotations, :array].include?(result_type.to_sym) ? :raw : :json )
7
+ @cache_result ||= Persist.persist("REST persist", :binary, :file => cache_file + "." + Misc.obj2digest(params)) do
8
+ Misc.insist 3, rand(2) + 1 do
9
+ begin
10
+ init_job if url.nil?
11
+ @adaptor.get_raw(url, params)
12
+ rescue
13
+ Log.exception $!
14
+ raise $!
15
+ end
16
+ end
17
+ end
18
+ end
19
+
20
+ def load
21
+ params = {}
22
+ join unless done? or streaming?
23
+ raise get_exception if error? or aborted?
24
+ load_res get
25
+ end
26
+
27
+ def exec_job
28
+ res = _run_job(:exec)
29
+ load_res res, result_type == :array ? :json : result_type
30
+ end
31
+
32
+ def abort
33
+ return self if status == :done
34
+ @adaptor.get_json(@url + '?_update=abort') if @url and @name
35
+ self
36
+ end
37
+
38
+ def init_job(cache_type = nil, other_params = {})
39
+ cache_type = :asynchronous if cache_type.nil? and not @is_exec
40
+ cache_type = :exec if cache_type.nil?
41
+ @last_info_time = nil
42
+ @done = false
43
+ get_streams
44
+ @name ||= Persist.memory("RemoteSteps", :workflow => self, :task => task, :jobname => @name, :inputs => inputs, :cache_type => cache_type) do
45
+ Misc.insist do
46
+ @adaptor.post_jobname(File.join(base_url, task.to_s), inputs.merge(other_params).merge(:jobname => @name||@base_name, :_cache_type => cache_type))
47
+ end
48
+ end
49
+ if Open.remote? @name
50
+ @url = @name
51
+ @name = File.basename(@name)
52
+ else
53
+ @url = File.join(base_url, task.to_s, @name)
54
+ end
55
+ self
56
+ end
57
+
58
+ def recursive_clean
59
+ Log.warn "Not doing recursive cleans"
60
+ return
61
+ begin
62
+ _restart
63
+ params = {:_update => :recursive_clean}
64
+ @adaptor.get_raw(url, params)
65
+ rescue Exception
66
+ Log.exception $!
67
+ end
68
+ self
69
+ end
70
+
71
+ def _clean
72
+ begin
73
+ _restart
74
+ cache_files.each do |cache_file|
75
+ Open.rm cache_file
76
+ end
77
+ params = {:_update => :clean}
78
+ @adaptor.clean_url(url, params) if @url
79
+ rescue Exception
80
+ Log.exception $!
81
+ end
82
+ end
83
+
84
+ def clean
85
+ init_job
86
+ _clean
87
+ self
88
+ end
89
+
90
+ def stream_job(task_url, task_params, stream_input, cache_type = :exec)
91
+ require 'rbbt/util/misc/multipart_payload'
92
+ WorkflowRESTClient.capture_exception do
93
+ @streaming = true
94
+
95
+ Log.debug{ "RestClient stream #{Process.pid}: #{ task_url } #{stream_input} #{cache_type} - #{Misc.fingerprint task_params}" }
96
+ res = RbbtMutiplartPayload.issue task_url, task_params, stream_input, nil, nil, true
97
+ type = res.gets
98
+
99
+ out = case type.strip
100
+ when "LOCATION"
101
+ @url = res.gets
102
+ @url.sub!(/\?.*/,'')
103
+ join
104
+ WorkflowRESTClient.get_raw(@url)
105
+ @done = true
106
+ @streaming = false
107
+ when /STREAM: (.*)/
108
+ @url = $1.strip
109
+ res.callback = Proc.new do
110
+ Log.medium "Done streaming result from #{@url}"
111
+ @done = true
112
+ @streaming = false
113
+ end
114
+ res
115
+ when "BULK"
116
+ begin
117
+ res.read
118
+ ensure
119
+ @done = true
120
+ @streaming = false
121
+ end
122
+ else
123
+ raise "What? " + type
124
+ end
125
+
126
+ ConcurrentStream.setup(out, :filename => @url)
127
+
128
+ out
129
+ end
130
+ end
131
+
132
+ def _run_job(cache_type = :async)
133
+ get_streams
134
+
135
+ task_url = URI.encode(File.join(base_url, task.to_s))
136
+ @adaptor.__prepare_inputs_for_restclient(inputs)
137
+ task_params = inputs.merge(:_cache_type => cache_type, :jobname => base_name, :_format => [:string, :boolean, :tsv, :annotations].include?(result_type) ? :raw : :json)
138
+
139
+ if cache_type == :stream or cache_type == :exec and stream_input and inputs[stream_input]
140
+ io = self.stream_job(task_url, task_params, stream_input, cache_type)
141
+ return io
142
+ else
143
+ @adaptor.execute_job(base_url, task, task_params, cache_type)
144
+ end
145
+
146
+ end
147
+ end
148
+ end
@@ -0,0 +1,73 @@
1
+ class RemoteStep
2
+ module SSH
3
+ attr_accessor :override_dependencies
4
+
5
+ def init_job(cache_type = nil, other_params = {})
6
+ cache_type = :asynchronous if cache_type.nil? and not @is_exec
7
+ cache_type = :exec if cache_type.nil?
8
+ @last_info_time = nil
9
+ @done = false
10
+ @server, @server_path = RemoteWorkflow::SSH.parse_url base_url
11
+ @input_id ||= "inputs-" << rand(100000).to_s
12
+
13
+ if override_dependencies
14
+
15
+ if override_dependencies && override_dependencies.any?
16
+ override_dependencies.each do |od|
17
+ name, _sep, value = od.partition("=")
18
+ inputs[name] = value
19
+ end
20
+ end
21
+
22
+ RemoteWorkflow::SSH.upload_inputs(@server, inputs, @input_types, @input_id)
23
+ else
24
+ RemoteWorkflow::SSH.upload_inputs(@server, inputs, @input_types, @input_id)
25
+ end
26
+
27
+ @name ||= Persist.memory("RemoteSteps", :workflow => self, :task => task, :jobname => @name, :inputs => inputs, :cache_type => cache_type) do
28
+ Misc.insist do
29
+ input_types = {}
30
+ RemoteWorkflow::SSH.post_job(File.join(base_url, task.to_s), @input_id, @base_name)
31
+ end
32
+ end
33
+ if Open.remote? @name
34
+ @url = @name
35
+ @name = File.basename(@name)
36
+ else
37
+ @url = File.join(base_url, task.to_s, @name)
38
+ end
39
+ self
40
+ end
41
+
42
+ def path
43
+ @server, @server_path = RemoteWorkflow::SSH.parse_url @base_url
44
+ "ssh://" + @server + ":" + @remote_path
45
+ end
46
+
47
+ def produce(*args)
48
+ input_types = {}
49
+ init_job
50
+ @remote_path = RemoteWorkflow::SSH.run_job(File.join(base_url, task.to_s), @input_id, @base_name)
51
+ while ! done?
52
+ sleep 1
53
+ end
54
+ end
55
+
56
+ def load
57
+ load_res Open.open(path)
58
+ end
59
+
60
+ def run(*args)
61
+ produce(*args)
62
+ self.load unless args.first
63
+ end
64
+
65
+ def clean
66
+ init_job
67
+ RemoteStep::SSH.clean(@url, @input_id, @base_name) if done?
68
+ _restart
69
+ end
70
+
71
+ end
72
+ end
73
+
@@ -0,0 +1,329 @@
1
+ require 'rbbt/workflow'
2
+
3
+ class RemoteStep < Step
4
+
5
+ attr_accessor :url, :base_url, :task, :base_name, :inputs, :input_types, :result_type, :result_description, :is_exec, :is_stream, :stream_input
6
+
7
+ def initialize(base_url, task = nil, base_name = nil, inputs = nil, input_types = nil, result_type = nil, result_description = nil, is_exec = false, is_stream = false, stream_input = nil)
8
+ @base_url, @task, @base_name, @inputs, @input_types, @result_type, @result_description, @is_exec, @is_stream, @stream_input = base_url, task, base_name, inputs, input_types, result_type, result_description, is_exec, is_stream, stream_input
9
+ @base_url = "http://" << @base_url unless @base_url =~ /^[a-z]+:\/\//
10
+ @mutex = Mutex.new
11
+ rest = base_url.include?('ssh:') ? false : true
12
+
13
+ if rest
14
+ @adaptor = RemoteWorkflow::REST
15
+ self.extend RemoteStep::REST
16
+ else
17
+ @adaptor = RemoteWorkflow::SSH
18
+ self.extend RemoteStep::SSH
19
+ end
20
+
21
+ end
22
+
23
+ def clean_name
24
+ @base_name
25
+ end
26
+
27
+ def cache_file
28
+ digest = Misc.obj2digest([base_url, task, base_name, inputs])
29
+ Rbbt.var.cache.REST[[task, clean_name, digest] * "."].find
30
+ end
31
+
32
+ def cache_files
33
+ Dir.glob(cache_file + '.*')
34
+ end
35
+
36
+ def run(no_load = false)
37
+ no_load = @is_stream ? :stream : true if no_load
38
+
39
+ @result ||= @mutex.synchronize do
40
+ begin
41
+ if @is_exec
42
+ exec(no_load)
43
+ elsif no_load == :stream
44
+ _run_job(:stream)
45
+ elsif no_load
46
+ init_job
47
+ nil
48
+ else
49
+ if ! done?
50
+ init_job
51
+ join
52
+ end
53
+ self.load
54
+ end
55
+ ensure
56
+ @started = true
57
+ end
58
+ end
59
+
60
+ return @result if no_load == :stream
61
+ no_load ? Misc.add_GET_param(path, "_format", "raw") : @result
62
+ end
63
+
64
+
65
+ def self.get_streams(inputs, stream_input = nil)
66
+ new_inputs = {}
67
+ inputs.each do |k,v|
68
+ stream = stream_input.to_s == k.to_s
69
+ if Step === v
70
+ unless (v.done? or v.streaming?) # or RestClient::Step === v)
71
+ v.run(true) and v.grace
72
+ end
73
+
74
+ begin
75
+ if stream
76
+ new_inputs[k] = TSV.get_stream(v)
77
+ else
78
+ new_inputs[k] = v.load
79
+ end
80
+ rescue Exception
81
+ raise $!
82
+ end
83
+ else
84
+ new_inputs[k] = v
85
+ end
86
+ end
87
+ new_inputs
88
+ end
89
+
90
+ def get_streams
91
+ return if @inputs_done
92
+ @inputs = RemoteStep.get_streams @inputs, @stream_input
93
+ @inputs_done = true
94
+ @inputs
95
+ end
96
+
97
+ def dup_inputs
98
+ return if @dupped or ENV["RBBT_NO_STREAM"] == 'true'
99
+ Log.low "Dupping inputs for #{path}"
100
+ dupped_inputs = {}
101
+ @inputs.collect do |k,input|
102
+ dupped_inputs[k] = Step.dup_stream input
103
+ end
104
+ @inputs = dupped_inputs
105
+ @dupped = true
106
+ end
107
+
108
+ def name
109
+ return nil if @is_exec
110
+ return @path if @url.nil?
111
+ (Array === @url ? @url.first : @url).split("/").last
112
+ end
113
+
114
+ def name=(name)
115
+ @url = [base_url,task, name] * "/"
116
+ end
117
+
118
+ def task_name
119
+ return task if task
120
+ init_job
121
+ (Array === @url ? @url.first : @url).split("/")[-2]
122
+ end
123
+
124
+ def nopid?
125
+ false
126
+ end
127
+
128
+ def info(check_lock=false)
129
+ @done = @info && @info[:status] && (@info[:status].to_sym == :done || @info[:status].to_sym == :error)
130
+
131
+ if !@done && (@last_info_time.nil? || (Time.now - @last_info_time) > 0.5)
132
+ update = true
133
+ else
134
+ update = false
135
+ end
136
+
137
+ @info = Persist.memory("RemoteSteps Info", :url => @url, :persist => true, :update => update) do
138
+ @last_info_time = Time.now
139
+ init_job unless @url
140
+ info = @adaptor.get_json(File.join(@url, 'info'))
141
+ info = RemoteWorkflow.fix_hash(info)
142
+ info[:status] = info[:status].to_sym if String === info[:status]
143
+ info
144
+ end
145
+ @info
146
+ end
147
+
148
+ def status
149
+ return :done if @done
150
+ return nil unless url or started?
151
+ #return :streaming if @streaming
152
+ begin
153
+ status = info[:status]
154
+ @done = true if status and status.to_sym == :done
155
+ status
156
+ rescue
157
+ Log.exception $!
158
+ nil
159
+ ensure
160
+ @info = nil
161
+ end
162
+ end
163
+
164
+ def started?
165
+ @result != nil || @started || @streaming
166
+ end
167
+
168
+ def done?
169
+ return true if cache_files.any?
170
+ self.init_job unless @url
171
+ @done || status.to_s == 'done' || status.to_s == 'noinfo'
172
+ end
173
+
174
+ def files
175
+ @adaptor.get_json(File.join(url, 'files'))
176
+ end
177
+
178
+ def file(file)
179
+ @adaptor.get_raw(File.join(url, 'file', file))
180
+ end
181
+
182
+ def get_stream
183
+ case @result
184
+ when IO
185
+ @result
186
+ when String
187
+ StringIO.new @result
188
+ else
189
+ nil
190
+ end
191
+ end
192
+
193
+ def grace
194
+ produce unless @started
195
+ sleep 0.1 unless started?
196
+ sleep 0.5 unless started?
197
+ sleep 1 unless started?
198
+ while not (done? or started?)
199
+ sleep 1
200
+ end
201
+ end
202
+
203
+ #{{{ MANAGEMENT
204
+
205
+
206
+ def path
207
+ if @url
208
+ Misc.add_GET_param(@url, "_format", "raw")
209
+ elsif @base_name
210
+ [base_url, task, @base_name + '-' + Misc.fingerprint(inputs)] * "/"
211
+ else
212
+ nil
213
+ end
214
+ end
215
+
216
+ def fork(noload=false, semaphore=nil)
217
+ init_job(:asynchronous)
218
+ end
219
+
220
+ def running?
221
+ ! %w(done error aborted noinfo).include? status.to_s
222
+ end
223
+
224
+ def exec(noload = false)
225
+ @result ||= begin
226
+ if noload == :stream
227
+ _run_job(:exec)
228
+ else
229
+ exec_job
230
+ end
231
+ ensure
232
+ @started = true
233
+ end
234
+ end
235
+
236
+ def join
237
+ return true if cache_files.any?
238
+ init_job unless @url
239
+ Log.debug{ "Joining RemoteStep: #{path}" }
240
+ if IO === @result
241
+ res = @result
242
+ @result = nil
243
+ Misc.consume_stream(res, true)
244
+ end
245
+
246
+ if not (self.done? || self.aborted? || self.error?)
247
+ self.info
248
+ return self if self.done? || self.aborted? || self.error?
249
+ sleep 0.2 unless self.done? || self.aborted? || self.error?
250
+ sleep 1 unless self.done? || self.aborted? || self.error?
251
+ while not (self.done? || self.aborted? || self.error?)
252
+ sleep 3
253
+ end
254
+ end
255
+
256
+ self
257
+ end
258
+
259
+ def load_res(res, result_type = nil)
260
+
261
+ stream = true if res.respond_to? :read
262
+ join unless stream
263
+ result_type ||= self.result_type
264
+
265
+ case result_type.to_sym
266
+ when :string
267
+ stream ? res.read : res
268
+ when :boolean
269
+ (stream ? res.read : res) == 'true'
270
+ when :tsv
271
+ if stream
272
+ TSV.open(res, :monitor => true)
273
+ else
274
+ TSV.open(StringIO.new(res))
275
+ end
276
+ when :annotations
277
+ if stream
278
+ Annotated.load_tsv(TSV.open(res))
279
+ else
280
+ Annotated.load_tsv(TSV.open(StringIO.new(res)))
281
+ end
282
+ when :array
283
+ (stream ? res.read : res).split("\n")
284
+ else
285
+ json_text = if IO === res
286
+ res.read
287
+ else
288
+ res
289
+ end
290
+ begin
291
+ JSON.parse json_text
292
+ rescue
293
+ case
294
+ when json_text =~ /^\d+$/
295
+ json_text.to_i
296
+ when json_text =~ /^\d+\.\d/
297
+ json_text.to_f
298
+ else
299
+ raise $!
300
+ end
301
+ end
302
+ end
303
+ end
304
+
305
+ def _restart
306
+ @done = nil
307
+ @name = nil
308
+ @started = nil
309
+ @aborted = nil
310
+ new_inputs = {}
311
+ inputs.each do |k,i|
312
+ if File === i
313
+ new_inputs[k] = File.open(i.path)
314
+ else
315
+ new_inputs[k] = i
316
+ end
317
+ end
318
+ @inputs = new_inputs
319
+ @info = nil
320
+ end
321
+
322
+ def input_checks
323
+ []
324
+ end
325
+
326
+ end
327
+
328
+ require 'rbbt/workflow/remote_workflow/remote_step/rest'
329
+ require 'rbbt/workflow/remote_workflow/remote_step/ssh'
@@ -1,17 +1,5 @@
1
- require 'json'
2
1
  require 'rbbt/workflow'
3
- require 'rbbt/workflow/step'
4
- require 'rbbt/util/misc'
5
-
6
- require 'rbbt/workflow/remote/remote_step'
7
-
8
- require 'rbbt/workflow/remote/rest/get'
9
- require 'rbbt/workflow/remote/rest/adaptor'
10
-
11
- require 'rbbt/workflow/remote/ssh/get'
12
- require 'rbbt/workflow/remote/ssh/adaptor'
13
-
14
- class WorkflowRemoteClient
2
+ class RemoteWorkflow
15
3
  include Workflow
16
4
 
17
5
  attr_accessor :url, :name, :exec_exports, :synchronous_exports, :asynchronous_exports, :stream_exports
@@ -23,10 +11,11 @@ class WorkflowRemoteClient
23
11
  rest = url.include?('ssh://') ? false : true
24
12
 
25
13
  if rest
26
- self.extend WorkflowRESTClient
14
+ self.extend RemoteWorkflow::REST
27
15
  else
28
- self.extend WorkflowSSHClient
16
+ self.extend RemoteWorkflow::SSH
29
17
  end
18
+
30
19
  init_remote_tasks
31
20
  end
32
21
 
@@ -55,7 +44,7 @@ class WorkflowRemoteClient
55
44
  end
56
45
 
57
46
  stream_input = @can_stream ? task_info(task)[:input_options].select{|k,o| o[:stream] }.collect{|k,o| k }.first : nil
58
- RemoteStep.new(url, task, name, fixed_inputs, task_info[:result_type], task_info[:result_description], @exec_exports.include?(task), @stream_exports.include?(task), stream_input)
47
+ RemoteStep.new(url, task, name, fixed_inputs, task_info[:input_types], task_info[:result_type], task_info[:result_description], @exec_exports.include?(task), @stream_exports.include?(task), stream_input)
59
48
  end
60
49
 
61
50
  def load_id(id)
@@ -67,3 +56,7 @@ class WorkflowRemoteClient
67
56
  step
68
57
  end
69
58
  end
59
+
60
+ require 'rbbt/workflow/remote_workflow/driver'
61
+ require 'rbbt/workflow/remote_workflow/remote_step'
62
+
@@ -1,4 +1,3 @@
1
- require 'rbbt/workflow/remote/ssh/driver'
2
1
  class Step
3
2
 
4
3
  MAIN_RSYNC_ARGS="-avztAXHP"
@@ -132,7 +131,7 @@ class Step
132
131
  recursive = false if recursive.nil?
133
132
 
134
133
  paths = if options[:source]
135
- SSHDriver.run(options[:source], <<-EOF).split("\n")
134
+ Misc.ssh_run(options[:source], <<-EOF).split("\n")
136
135
  require 'rbbt-util'
137
136
  require 'rbbt/workflow'
138
137
 
@@ -165,7 +164,7 @@ puts files * "\n"
165
164
 
166
165
 
167
166
  target = if options[:target]
168
- target = SSHDriver.run(options[:target], <<-EOF).split("\n").first
167
+ target = Misc.ssh_run(options[:target], <<-EOF).split("\n").first
169
168
  require 'rbbt-util'
170
169
  path = "var/jobs"
171
170
  resource = #{resource.to_s}
data/lib/rbbt/workflow.rb CHANGED
@@ -48,13 +48,13 @@ module Workflow
48
48
  end
49
49
 
50
50
  def self.require_remote_workflow(wf_name, url)
51
- require 'rbbt/workflow/remote/client'
52
- eval "Object::#{wf_name} = WorkflowRemoteClient.new '#{ url }', '#{wf_name}'"
51
+ require 'rbbt/workflow/remote_workflow'
52
+ eval "Object::#{wf_name} = RemoteWorkflow.new '#{ url }', '#{wf_name}'"
53
53
  end
54
54
 
55
55
  def self.require_remote_workflow(wf_name, url)
56
- require 'rbbt/workflow/remote/client'
57
- eval "Object::#{wf_name} = WorkflowRemoteClient.new '#{ url }', '#{wf_name}'"
56
+ require 'rbbt/workflow/remote_workflow'
57
+ eval "Object::#{wf_name} = RemoteWorkflow.new '#{ url }', '#{wf_name}'"
58
58
  end
59
59
 
60
60
  def self.load_workflow_libdir(filename)
@@ -1,9 +1,35 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- require 'rbbt'
3
+ require 'rbbt-util'
4
+ require 'rbbt/util/simpleopt'
5
+
6
+ $0 = "rbbt #{$previous_commands*" "} #{ File.basename(__FILE__) }" if $previous_commands
7
+
8
+ options = SOPT.setup <<EOF
9
+
10
+ Description
11
+
12
+ $ #{$0} [options] <workflow> <url>
13
+
14
+ Use - to read from STDIN
15
+
16
+ -h--help Print this help
17
+
18
+ EOF
19
+ if options[:help]
20
+ if defined? rbbt_usage
21
+ rbbt_usage
22
+ else
23
+ puts SOPT.doc
24
+ end
25
+ exit 0
26
+ end
4
27
 
5
28
  workflow, url = ARGV
6
- url = File.join(url, workflow) unless url =~ /\/#{workflow}$/
29
+
30
+ raise ParameterException unless workflow && url
31
+
32
+ url = File.join(url, workflow) unless url =~ /[\/\:]#{workflow}\/?$/
7
33
  config_file = Rbbt.etc.remote_workflows
8
34
  remote_workflows = config_file.exists? ? config_file.yaml : {}
9
35
  remote_workflows[workflow] = url
@@ -235,6 +235,7 @@ end
235
235
  Workflow.workdir = Path.setup(File.expand_path(options.delete(:workdir_all))) if options[:workdir_all]
236
236
 
237
237
  workflow = Workflow.require_workflow workflow
238
+ workflow.init_remote_tasks
238
239
 
239
240
  if clean_task
240
241
  ENV["RBBT_UPDATE"] = 'true'
@@ -363,7 +364,7 @@ begin
363
364
 
364
365
  result_type = job.result_type
365
366
 
366
- res = JSON.parse(res.read) if (defined?(WorkflowRemoteClient) and WorkflowRemoteClient::RemoteStep === job) && %w(array float integer boolean).include?(result_type.to_s)
367
+ res = JSON.parse(res.read) if (defined?(RemoteStep) and RemoteStep === job) && %w(array float integer boolean).include?(result_type.to_s)
367
368
 
368
369
  case
369
370
  when res.respond_to?(:gets)
@@ -469,7 +470,7 @@ begin
469
470
  Log.clear_line
470
471
  end
471
472
 
472
- if Open.remote? job.path
473
+ if Open.remote?(job.path)
473
474
  out.puts job.path + Log.color(:blue, "?_format=raw")
474
475
  else
475
476
  out.puts job.path
@@ -550,7 +551,7 @@ when Step
550
551
  end
551
552
  else
552
553
  res.join
553
- out.puts Open.read(res.path) if File.exist? res.path
554
+ out.puts Open.read(res.path) if Open.exist?(res.path) || Open.remote?(res.path) || Open.ssh?(res.path)
554
555
  end
555
556
  else
556
557
  if Array === res