rbbt-util 5.26.77 → 5.26.78

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -80,6 +80,8 @@ module Workflow
80
80
  FileUtils.cp_r dep.files_dir, self.files_dir if Open.exist? dep.files_dir
81
81
  Open.ln_h dep.path, self.tmp_path
82
82
  else
83
+ Open.rm_rf self.files_dir
84
+ Open.link dep.files_dir, self.files_dir
83
85
  Open.link dep.path, self.path
84
86
  end
85
87
  nil
@@ -0,0 +1,37 @@
1
+ module Workflow
2
+ def nextflow_file(file, name = nil)
3
+ file = file + '.nf' unless File.exists?(file) || ! File.exists?(file + '.nf')
4
+ file = File.expand_path(file)
5
+ name ||= File.basename(file).sub(/\.nf$/,'')
6
+ params = Open.read(file).scan(/params\.\w+/).collect{|p| p.split(".").last}.uniq
7
+
8
+ params.each do |param|
9
+ input param, :string
10
+ end
11
+ task name => :text do
12
+ work = file('work')
13
+ output = file('output')
14
+ profile = config :profile, :nextflow
15
+ Misc.in_dir output do
16
+ if profile
17
+ cmd("nextflow run -work-dir #{work} -name #{clean_name} -ansi-log false -profile #{profile} #{file}", inputs.to_hash.merge('add_option_dashes' => true))
18
+ else
19
+ cmd("nextflow run -work-dir #{work} -name #{clean_name} -ansi-log false #{file}", inputs.to_hash.merge('add_option_dashes' => true))
20
+ end
21
+ end
22
+ end
23
+ end
24
+
25
+ def nextflow_dir(path)
26
+ main = File.join(path, 'main.nf')
27
+ nextflow_file main, File.basename(path)
28
+ end
29
+
30
+ def nextflow(path)
31
+ if File.directory?(path)
32
+ nextflow_dir path
33
+ else
34
+ nextflow_file path
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,69 @@
1
+ require 'json'
2
+ require 'rbbt/workflow'
3
+ require 'rbbt/workflow/step'
4
+ require 'rbbt/util/misc'
5
+
6
+ require 'rbbt/workflow/remote/remote_step'
7
+
8
+ require 'rbbt/workflow/remote/rest/get'
9
+ require 'rbbt/workflow/remote/rest/adaptor'
10
+
11
+ require 'rbbt/workflow/remote/ssh/get'
12
+ require 'rbbt/workflow/remote/ssh/adaptor'
13
+
14
+ class WorkflowRemoteClient
15
+ include Workflow
16
+
17
+ attr_accessor :url, :name, :exec_exports, :synchronous_exports, :asynchronous_exports, :stream_exports
18
+
19
+ def initialize(url, name)
20
+ Log.debug{ "Loading remote workflow #{ name }: #{ url }" }
21
+ @url, @name = url, name
22
+
23
+ rest = url.include?('ssh://') ? false : true
24
+
25
+ if rest
26
+ self.extend WorkflowRESTClient
27
+ else
28
+ self.extend WorkflowSSHClient
29
+ end
30
+ init_remote_tasks
31
+ end
32
+
33
+ def to_s
34
+ name
35
+ end
36
+
37
+ def job(task, name, inputs)
38
+ task_info = task_info(task)
39
+ fixed_inputs = {}
40
+ input_types = IndiferentHash.setup(task_info[:input_types])
41
+
42
+ inputs.each do |k,v|
43
+ k = k.to_sym
44
+ if TSV === v
45
+ fixed_inputs[k] = v.to_s
46
+ else
47
+ next if input_types[k].nil?
48
+ case input_types[k].to_sym
49
+ when :tsv, :array, :file, :text
50
+ fixed_inputs[k] = (String === v and Open.exists?(v)) ? Open.open(v) : v
51
+ else
52
+ fixed_inputs[k] = v
53
+ end
54
+ end
55
+ end
56
+
57
+ stream_input = @can_stream ? task_info(task)[:input_options].select{|k,o| o[:stream] }.collect{|k,o| k }.first : nil
58
+ RemoteStep.new(url, task, name, fixed_inputs, task_info[:result_type], task_info[:result_description], @exec_exports.include?(task), @stream_exports.include?(task), stream_input)
59
+ end
60
+
61
+ def load_id(id)
62
+ task, name = id.split("/")
63
+ step = RemoteStep.new url, task, nil
64
+ step.name = name
65
+ step.result_type = task_info(task)[:result_type]
66
+ step.result_description = task_info(task)[:result_description]
67
+ step
68
+ end
69
+ end
@@ -0,0 +1,308 @@
1
+ class WorkflowRemoteClient
2
+ class RemoteStep < Step
3
+
4
+ attr_accessor :url, :base_url, :task, :base_name, :inputs, :result_type, :result_description, :is_exec, :is_stream, :stream_input
5
+
6
+ def initialize(base_url, task = nil, base_name = nil, inputs = nil, result_type = nil, result_description = nil, is_exec = false, is_stream = false, stream_input = nil)
7
+ @base_url, @task, @base_name, @inputs, @result_type, @result_description, @is_exec, @is_stream, @stream_input = base_url, task, base_name, inputs, result_type, result_description, is_exec, is_stream, stream_input
8
+ @base_url = "http://" << @base_url unless @base_url =~ /^[a-z]+:\/\//
9
+ @mutex = Mutex.new
10
+ rest = base_url.include?('ssh:') ? false : true
11
+
12
+ if rest
13
+ @adaptor = WorkflowRESTClient
14
+ else
15
+ @adaptor = WorkflowSSHClient
16
+ end
17
+
18
+ self.extend @adaptor
19
+ end
20
+
21
+ def clean_name
22
+ @base_name
23
+ end
24
+
25
+ def run(no_load = false)
26
+ no_load = @is_stream ? :stream : true if no_load
27
+
28
+ @mutex.synchronize do
29
+ @result ||= begin
30
+ if @is_exec
31
+ exec(no_load)
32
+ elsif no_load == :stream
33
+ _run_job(:stream)
34
+ elsif no_load
35
+ init_job
36
+ nil
37
+ else
38
+ init_job
39
+ join
40
+ self.load
41
+ end
42
+ ensure
43
+ @started = true
44
+ end
45
+ end
46
+
47
+ return @result if no_load == :stream
48
+ no_load ? Misc.add_GET_param(path, "_format", "raw") : @result
49
+ end
50
+
51
+
52
+ def self.get_streams(inputs, stream_input = nil)
53
+ new_inputs = {}
54
+ inputs.each do |k,v|
55
+ stream = stream_input.to_s == k.to_s
56
+ if Step === v
57
+ unless (v.done? or v.streaming?) # or RestClient::Step === v)
58
+ v.run(true) and v.grace
59
+ end
60
+
61
+ begin
62
+ if stream
63
+ new_inputs[k] = TSV.get_stream(v)
64
+ else
65
+ new_inputs[k] = v.load
66
+ end
67
+ rescue Exception
68
+ raise $!
69
+ end
70
+ else
71
+ new_inputs[k] = v
72
+ end
73
+ end
74
+ new_inputs
75
+ end
76
+
77
+ def get_streams
78
+ return if @inputs_done
79
+ @inputs = RemoteStep.get_streams @inputs, @stream_input
80
+ @inputs_done = true
81
+ @inputs
82
+ end
83
+
84
+ def dup_inputs
85
+ return if @dupped or ENV["RBBT_NO_STREAM"] == 'true'
86
+ Log.low "Dupping inputs for #{path}"
87
+ dupped_inputs = {}
88
+ @inputs.collect do |k,input|
89
+ dupped_inputs[k] = Step.dup_stream input
90
+ end
91
+ @inputs = dupped_inputs
92
+ @dupped = true
93
+ end
94
+
95
+ def name
96
+ return nil if @is_exec
97
+ return @path if @url.nil?
98
+ (Array === @url ? @url.first : @url).split("/").last
99
+ end
100
+
101
+ def name=(name)
102
+ @url = [base_url,task, name] * "/"
103
+ end
104
+
105
+ def task_name
106
+ return task if task
107
+ init_job
108
+ (Array === @url ? @url.first : @url).split("/")[-2]
109
+ end
110
+
111
+ def nopid?
112
+ false
113
+ end
114
+
115
+ def info(check_lock=false)
116
+ @done = @info && @info[:status] && (@info[:status].to_sym == :done || @info[:status].to_sym == :error)
117
+
118
+ if !@done && (@last_info_time.nil? || (Time.now - @last_info_time) > 0.5)
119
+ update = true
120
+ else
121
+ update = false
122
+ end
123
+
124
+ @info = Persist.memory("RemoteSteps Info", :url => @url, :persist => true, :update => update) do
125
+ @last_info_time = Time.now
126
+ init_job unless @url
127
+ info = @adaptor.get_json(File.join(@url, 'info'))
128
+ info = @adaptor.fix_hash(info)
129
+ info[:status] = info[:status].to_sym if String === info[:status]
130
+ info
131
+ end
132
+ @info
133
+ end
134
+
135
+ def status
136
+ return :done if @done
137
+ return nil unless url or started?
138
+ #return :streaming if @streaming
139
+ begin
140
+ status = info[:status]
141
+ @done = true if status and status.to_sym == :done
142
+ status
143
+ rescue
144
+ nil
145
+ ensure
146
+ @info = nil
147
+ end
148
+ end
149
+
150
+ def started?
151
+ @result != nil || @started || @streaming
152
+ end
153
+
154
+ def done?
155
+ init_job unless @url
156
+ @done || status.to_s == 'done' || status.to_s == 'noinfo'
157
+ end
158
+
159
+ def files
160
+ @adaptor.get_json(File.join(url, 'files'))
161
+ end
162
+
163
+ def file(file)
164
+ @adaptor.get_raw(File.join(url, 'file', file))
165
+ end
166
+
167
+ def get_stream
168
+ case @result
169
+ when IO
170
+ @result
171
+ when String
172
+ StringIO.new @result
173
+ else
174
+ nil
175
+ end
176
+ end
177
+
178
+ def grace
179
+ produce unless @started
180
+ sleep 0.1 unless started?
181
+ sleep 0.5 unless started?
182
+ sleep 1 unless started?
183
+ while not (done? or started?)
184
+ sleep 1
185
+ end
186
+ end
187
+
188
+ #{{{ MANAGEMENT
189
+
190
+
191
+ def path
192
+ if @url
193
+ Misc.add_GET_param(@url, "_format", "raw")
194
+ else
195
+ [base_url, task, @base_name + '-' + Misc.fingerprint(inputs)] * "/"
196
+ end
197
+ end
198
+
199
+ def fork(noload=false, semaphore=nil)
200
+ init_job(:asynchronous)
201
+ end
202
+
203
+ def running?
204
+ ! %w(done error aborted noinfo).include? status.to_s
205
+ end
206
+
207
+ def exec(noload = false)
208
+ @result ||= begin
209
+ if noload == :stream
210
+ _run_job(:exec)
211
+ else
212
+ exec_job
213
+ end
214
+ ensure
215
+ @started = true
216
+ end
217
+ end
218
+
219
+ def join
220
+ init_job unless @url
221
+ Log.debug{ "Joining RestClient: #{path}" }
222
+ if IO === @result
223
+ res = @result
224
+ @result = nil
225
+ Misc.consume_stream(res, true)
226
+ end
227
+
228
+ if not (self.done? || self.aborted? || self.error?)
229
+ self.info
230
+ return self if self.done? || self.aborted? || self.error?
231
+ sleep 0.2 unless self.done? || self.aborted? || self.error?
232
+ sleep 1 unless self.done? || self.aborted? || self.error?
233
+ while not (self.done? || self.aborted? || self.error?)
234
+ sleep 3
235
+ end
236
+ end
237
+
238
+ self
239
+ end
240
+
241
+ def load_res(res, result_type = nil)
242
+ stream = true if res.respond_to? :read
243
+ join unless stream
244
+ result_type ||= self.result_type
245
+ case result_type
246
+ when :string
247
+ stream ? res.read : res
248
+ when :boolean
249
+ (stream ? res.read : res) == 'true'
250
+ when :tsv
251
+ if stream
252
+ TSV.open(res, :monitor => true)
253
+ else
254
+ TSV.open(StringIO.new(res))
255
+ end
256
+ when :annotations
257
+ if stream
258
+ Annotated.load_tsv(TSV.open(res))
259
+ else
260
+ Annotated.load_tsv(TSV.open(StringIO.new(res)))
261
+ end
262
+ when :array
263
+ (stream ? res.read : res).split("\n")
264
+ res.split("\n")
265
+ else
266
+ json_text = if IO === res
267
+ res.read
268
+ else
269
+ res
270
+ end
271
+ begin
272
+ JSON.parse json_text
273
+ rescue
274
+ case
275
+ when json_text =~ /^\d+$/
276
+ json_text.to_i
277
+ when json_text =~ /^\d+\.\d/
278
+ json_text.to_f
279
+ else
280
+ raise $!
281
+ end
282
+ end
283
+ end
284
+ end
285
+
286
+ def _restart
287
+ @done = nil
288
+ @name = nil
289
+ @started = nil
290
+ @aborted = nil
291
+ new_inputs = {}
292
+ inputs.each do |k,i|
293
+ if File === i
294
+ new_inputs[k] = File.open(i.path)
295
+ else
296
+ new_inputs[k] = i
297
+ end
298
+ end
299
+ @inputs = new_inputs
300
+ @info = nil
301
+ end
302
+
303
+ def input_checks
304
+ []
305
+ end
306
+
307
+ end
308
+ end
@@ -0,0 +1,158 @@
1
+ require 'rest-client'
2
+
3
+ module WorkflowRESTClient
4
+ def self.__prepare_inputs_for_restclient(inputs)
5
+ inputs.each do |k,v|
6
+ if v.respond_to? :path and not v.respond_to? :original_filename
7
+ class << v
8
+ def original_filename
9
+ File.expand_path(path)
10
+ end
11
+ end
12
+ end
13
+
14
+ if Array === v and v.empty?
15
+ inputs[k] = "EMPTY_ARRAY"
16
+ end
17
+ end
18
+ end
19
+
20
+ def workflow_description
21
+ WorkflowRESTClient.get_raw(File.join(url, 'description'))
22
+ end
23
+
24
+ def documentation
25
+ @documention ||= IndiferentHash.setup(WorkflowRESTClient.get_json(File.join(url, "documentation"),{}))
26
+ end
27
+
28
+ def task_info(task)
29
+ @task_info ||= {}
30
+ @task_info[task]
31
+
32
+ if @task_info[task].nil?
33
+ task_info = WorkflowRESTClient.get_json(File.join(url, task.to_s, 'info'))
34
+ task_info = WorkflowRESTClient.fix_hash(task_info)
35
+
36
+ task_info[:result_type] = task_info[:result_type].to_sym
37
+ task_info[:export] = task_info[:export].to_sym
38
+ task_info[:input_types] = WorkflowRESTClient.fix_hash(task_info[:input_types], true)
39
+ task_info[:inputs] = task_info[:inputs].collect{|input| input.to_sym }
40
+
41
+ @task_info[task] = task_info
42
+ end
43
+ @task_info[task]
44
+ end
45
+
46
+ def exported_tasks
47
+ (@asynchronous_exports + @synchronous_exports + @exec_exports).compact.flatten
48
+ end
49
+
50
+ def tasks
51
+ @tasks ||= Hash.new do |hash,task_name|
52
+ info = task_info(task_name)
53
+ task = Task.setup info do |*args|
54
+ raise "This is a remote task"
55
+ end
56
+ task.name = task_name.to_sym
57
+ hash[task_name] = task
58
+ end
59
+ end
60
+
61
+ def load_tasks
62
+ exported_tasks.each{|name| tasks[name]}
63
+ nil
64
+ end
65
+
66
+ def task_dependencies
67
+ @task_dependencies ||= Hash.new do |hash,task|
68
+ hash[task] = if exported_tasks.include? task
69
+ WorkflowRESTClient.get_json(File.join(url, task.to_s, 'dependencies'))
70
+ else
71
+ []
72
+ end
73
+ end
74
+ end
75
+
76
+ def init_remote_tasks
77
+ task_exports = WorkflowRESTClient.get_json(url)
78
+ @asynchronous_exports = task_exports["asynchronous"].collect{|task| task.to_sym }
79
+ @synchronous_exports = task_exports["synchronous"].collect{|task| task.to_sym }
80
+ @exec_exports = task_exports["exec"].collect{|task| task.to_sym }
81
+ @stream_exports = task_exports["stream"].collect{|task| task.to_sym }
82
+ @can_stream = task_exports["can_stream"]
83
+ end
84
+
85
+ def self.execute_job(base_url, task, task_params, cache_type)
86
+ self.capture_exception do
87
+ task_url = URI.encode(File.join(base_url, task.to_s))
88
+
89
+ sout, sin = Misc.pipe
90
+
91
+ post_thread = Thread.new(Thread.current) do |parent|
92
+ bl = lambda do |rok|
93
+ if Net::HTTPOK === rok
94
+ _url = rok["RBBT-STREAMING-JOB-URL"]
95
+ @url = File.join(task_url, File.basename(_url)) if _url
96
+ rok.read_body do |c,_a, _b|
97
+ sin.write c
98
+ end
99
+ sin.close
100
+ else
101
+ err = StringIO.new
102
+ rok.read_body do |c,_a, _b|
103
+ err.write c
104
+ end
105
+ text = begin
106
+ reader = Zlib::GzipReader.new(err)
107
+ reader.read
108
+ rescue
109
+ err.rewind
110
+ err.read
111
+ end
112
+ ne = @adaptor.parse_exception text
113
+ case ne
114
+ when String
115
+ parent.raise e.class, ne
116
+ when Exception
117
+ parent.raise ne
118
+ else
119
+ parent.raise "Error in RestClient: " << rok.message
120
+ end
121
+ end
122
+ end
123
+
124
+ task_params.each do |k,v|
125
+ task_params[k] = v.read if IO === v
126
+ end
127
+
128
+ Log.debug{ "RestClient execute: #{ task_url } - #{Misc.fingerprint task_params}" }
129
+ RestClient::Request.execute(:method => :post, :url => task_url, :payload => task_params, :block_response => bl)
130
+ end
131
+
132
+ # It seems like now response body are now decoded by Net::HTTP after 2.1
133
+ # https://github.com/rest-client/rest-client/blob/cf3e5a115bcdb8f3344aeac0e45b44d67fac1a42/history.md
134
+ decode = Gem.loaded_specs["rest-client"].version < Gem::Version.create('2.1')
135
+ if decode
136
+ reader = Zlib::GzipReader.new(sout)
137
+ res_io = Misc.open_pipe do |sin|
138
+ while c = reader.read(Misc::BLOCK_SIZE)
139
+ sin.write c
140
+ end
141
+ sin.close
142
+ @done = true
143
+ end
144
+ ConcurrentStream.setup(res_io, :threads => [post_thread]) do
145
+ @done = true
146
+ @streaming = false
147
+ end
148
+ else
149
+ ConcurrentStream.setup(sout, :threads => [post_thread]) do
150
+ @done = true
151
+ @streaming = false
152
+ end
153
+ end
154
+
155
+ end
156
+ end
157
+
158
+ end