rbbt-util 5.34.2 → 5.34.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/LICENSE +1 -1
- data/lib/rbbt/association/database.rb +6 -0
- data/lib/rbbt/association/open.rb +1 -1
- data/lib/rbbt/resource/rake.rb +9 -3
- data/lib/rbbt/resource/util.rb +8 -3
- data/lib/rbbt/resource.rb +5 -2
- data/lib/rbbt/tsv/change_id.rb +1 -1
- data/lib/rbbt/tsv/parallel/traverse.rb +1 -1
- data/lib/rbbt/tsv/parser.rb +7 -5
- data/lib/rbbt/util/log/progress/report.rb +9 -2
- data/lib/rbbt/util/misc/exceptions.rb +4 -3
- data/lib/rbbt/util/misc/inspect.rb +5 -1
- data/lib/rbbt/util/misc/omics.rb +1 -1
- data/lib/rbbt/util/misc/pipes.rb +1 -1
- data/lib/rbbt/util/named_array.rb +1 -1
- data/lib/rbbt/util/python.rb +33 -18
- data/lib/rbbt/workflow/definition.rb +7 -2
- data/lib/rbbt/workflow/dependencies.rb +1 -0
- data/lib/rbbt/workflow/remote_workflow/driver/ssh.rb +5 -0
- data/lib/rbbt/workflow/remote_workflow/remote_step/ssh.rb +9 -11
- data/lib/rbbt/workflow/step/dependencies.rb +15 -2
- data/lib/rbbt/workflow/step/save_load_inputs.rb +16 -85
- data/lib/rbbt/workflow/step.rb +1 -1
- data/lib/rbbt/workflow/util/archive.rb +4 -0
- data/lib/rbbt/workflow/util/provenance.rb +1 -1
- data/share/Rlib/util.R +8 -2
- data/share/rbbt_commands/tsv/view +76 -0
- data/share/rbbt_commands/workflow/server +1 -1
- data/share/rbbt_commands/workflow/task +2 -2
- data/test/rbbt/test_resource.rb +14 -4
- data/test/rbbt/util/misc/test_omics.rb +21 -1
- data/test/rbbt/util/misc/test_pipes.rb +20 -1
- metadata +17 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '073587b420676db7c055e58dbabaecbf04a7b30ba9d7ea912f1c12df5a6d180d'
|
4
|
+
data.tar.gz: 6d263d07b85095fefb48c15692af0db66b56ef671ada83e98c66a4765d4ca416
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 945c0f4b9d6f99024f6610dc2eb0c52c6546606937fff72558281ecf5d2aaddeb2e32ce3373b9348fa764e8e7285cdcdad6916118ba259d61c24b4b3477e3317
|
7
|
+
data.tar.gz: a8ba069b626b1a09d6da8e73eaa901119795ade4b4af33f1891b8581cfe80ccf48ca439a6a96b67f45e6bdf687ba7e90df263b8b4caabec18af45f6008e2b5d9
|
data/LICENSE
CHANGED
@@ -86,6 +86,8 @@ module Association
|
|
86
86
|
info_fields = field_pos.collect{|f| f == :key ? :key : all_fields[f]}
|
87
87
|
options = options.merge({:key_field => source_field, :fields => info_fields})
|
88
88
|
|
89
|
+
fields = field_headers if fields.nil?
|
90
|
+
|
89
91
|
data = options[:data] || {}
|
90
92
|
TmpFile.with_file do |tmpfile|
|
91
93
|
tmp_data = Persist.open_database(tmpfile, true, :double, "HDB")
|
@@ -160,6 +162,10 @@ module Association
|
|
160
162
|
|
161
163
|
data ||= {}
|
162
164
|
tsv = nil
|
165
|
+
if data.respond_to?(:close) && data.respond_to?(:write)
|
166
|
+
data.close
|
167
|
+
data.write
|
168
|
+
end
|
163
169
|
TmpFile.with_file do |tmpfile|
|
164
170
|
tmp_data = Persist.open_database(tmpfile, true, open_options[:type], "HDB")
|
165
171
|
|
@@ -24,7 +24,7 @@ module Association
|
|
24
24
|
options = options.dup
|
25
25
|
data.serializer = :double if data.respond_to? :serializer
|
26
26
|
|
27
|
-
tsv = Association.database(file, options.merge(:
|
27
|
+
tsv = Association.database(file, options.merge(:unnamed => true, :data => data, :type => :double))
|
28
28
|
|
29
29
|
data
|
30
30
|
end
|
data/lib/rbbt/resource/rake.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
require 'rake'
|
2
2
|
module Rake
|
3
3
|
class TaskNotFound < StandardError; end
|
4
|
-
def self.run(rakefile, dir, task)
|
4
|
+
def self.run(rakefile, dir, task, &block)
|
5
5
|
old_pwd = FileUtils.pwd
|
6
6
|
|
7
7
|
Rake::FileTask.module_eval do
|
@@ -32,9 +32,15 @@ module Rake
|
|
32
32
|
t = nil
|
33
33
|
pid = Process.fork{
|
34
34
|
if block_given?
|
35
|
-
|
35
|
+
TOPLEVEL_BINDING.receiver.instance_exec &block
|
36
36
|
else
|
37
|
-
|
37
|
+
if Misc.is_filename? rakefile
|
38
|
+
load rakefile
|
39
|
+
else
|
40
|
+
TmpFile.with_file(rakefile) do |tmpfile|
|
41
|
+
load tmpfile
|
42
|
+
end
|
43
|
+
end
|
38
44
|
end
|
39
45
|
|
40
46
|
raise TaskNotFound if Rake::Task[task].nil?
|
data/lib/rbbt/resource/util.rb
CHANGED
@@ -123,8 +123,8 @@ module Resource
|
|
123
123
|
|
124
124
|
|
125
125
|
def rake_for(path)
|
126
|
-
@rake_dirs.
|
127
|
-
|
126
|
+
@rake_dirs.select{|dir, content|
|
127
|
+
Misc.common_path(dir, path)
|
128
128
|
}.sort_by{|dir, content|
|
129
129
|
dir.length
|
130
130
|
}.last
|
@@ -137,12 +137,17 @@ module Resource
|
|
137
137
|
def run_rake(path, rakefile, rake_dir)
|
138
138
|
task = Misc.path_relative_to rake_dir, path
|
139
139
|
rakefile = rakefile.produce if rakefile.respond_to? :produce
|
140
|
+
rakefile = rakefile.find if rakefile.respond_to? :find
|
140
141
|
|
141
142
|
rake_dir = rake_dir.find(:user) if rake_dir.respond_to? :find
|
142
143
|
|
143
144
|
begin
|
144
145
|
require 'rbbt/resource/rake'
|
145
|
-
|
146
|
+
if Proc === rakefile
|
147
|
+
Rake.run(nil, rake_dir, task, &rakefile)
|
148
|
+
else
|
149
|
+
Rake.run(rakefile, rake_dir, task)
|
150
|
+
end
|
146
151
|
rescue Rake::TaskNotFound
|
147
152
|
raise $! if rake_dir.nil? or rake_dir.empty? or rake_dir == "/" or rake_dir == "./"
|
148
153
|
task = File.join(File.basename(rake_dir), task)
|
data/lib/rbbt/resource.rb
CHANGED
@@ -59,7 +59,7 @@ module Resource
|
|
59
59
|
|
60
60
|
def claim(path, type, content = nil, &block)
|
61
61
|
if type == :rake
|
62
|
-
@rake_dirs[path] = content
|
62
|
+
@rake_dirs[path] = content || block
|
63
63
|
else
|
64
64
|
@resources[path] = [type, content || block]
|
65
65
|
|
@@ -346,9 +346,12 @@ url='#{url}'
|
|
346
346
|
locations = (Path::STANDARD_SEARCH + resource.search_order + resource.search_paths.keys)
|
347
347
|
locations -= [:current, "current"]
|
348
348
|
locations << :current
|
349
|
+
search_paths = IndiferentHash.setup(resource.search_paths)
|
349
350
|
locations.uniq.each do |name|
|
350
|
-
pattern =
|
351
|
+
pattern = search_paths[name]
|
352
|
+
pattern = resource.search_paths[pattern] while Symbol === pattern
|
351
353
|
next if pattern.nil?
|
354
|
+
|
352
355
|
pattern = pattern.sub('{PWD}', Dir.pwd)
|
353
356
|
if String === pattern and pattern.include?('{')
|
354
357
|
regexp = "^" + pattern.gsub(/{([^}]+)}/,'(?<\1>[^/]+)') + "(?:/(?<REST>.*))?/?$"
|
data/lib/rbbt/tsv/change_id.rb
CHANGED
data/lib/rbbt/tsv/parser.rb
CHANGED
@@ -8,7 +8,9 @@ module TSV
|
|
8
8
|
|
9
9
|
def all_fields
|
10
10
|
all = [key_field] + fields
|
11
|
-
|
11
|
+
# ToDo: What was this for?
|
12
|
+
#NamedArray.setup all, all
|
13
|
+
all
|
12
14
|
end
|
13
15
|
|
14
16
|
def parse_header(stream)
|
@@ -550,12 +552,12 @@ module TSV
|
|
550
552
|
line = self.rescue_first_line
|
551
553
|
line = stream.gets if line.nil?
|
552
554
|
|
553
|
-
if @tsv_grep
|
555
|
+
if @tsv_grep || grep
|
554
556
|
|
555
|
-
stream = Open.grep(stream, @tsv_grep, invert_grep, fixed_grep)
|
557
|
+
stream = Open.grep(stream, @tsv_grep || grep, invert_grep, fixed_grep)
|
556
558
|
stream.no_fail = true
|
557
559
|
begin
|
558
|
-
match = Open.grep(StringIO.new(line), @tsv_grep, invert_grep, fixed_grep).read
|
560
|
+
match = Open.grep(StringIO.new(line), @tsv_grep || grep, invert_grep, fixed_grep).read
|
559
561
|
line = stream.gets if match.empty?
|
560
562
|
rescue Exception
|
561
563
|
Log.exception $!
|
@@ -650,7 +652,7 @@ module TSV
|
|
650
652
|
end
|
651
653
|
end
|
652
654
|
ensure
|
653
|
-
Log::ProgressBar.remove_bar(progress_monitor)
|
655
|
+
Log::ProgressBar.remove_bar(progress_monitor) if progress_monitor
|
654
656
|
stream.close unless stream.closed?
|
655
657
|
stream.join if stream.respond_to? :join and not stream.joined?
|
656
658
|
end
|
@@ -65,11 +65,17 @@ module Log
|
|
65
65
|
|
66
66
|
thr = 0.0000001 if thr == 0
|
67
67
|
|
68
|
-
if mean.nil? or mean.to_i >
|
68
|
+
if mean.nil? or mean.to_i > 2
|
69
69
|
str = "#{ Log.color :blue, thr.to_i.to_s } per sec."
|
70
70
|
#str << " #{ Log.color :yellow, mean.to_i.to_s } avg. #{Log.color :yellow, @mean_max.to_i.to_s} max." if @mean_max > 0
|
71
71
|
else
|
72
|
-
|
72
|
+
if 1.0/thr < 1
|
73
|
+
str = "#{ Log.color :blue, (1.0/thr).round(2).to_s } secs each"
|
74
|
+
elsif 1.0/thr < 2
|
75
|
+
str = "#{ Log.color :blue, (1.0/thr).round(1).to_s } secs each"
|
76
|
+
else
|
77
|
+
str = "#{ Log.color :blue, (1/thr).ceil.to_s } secs each"
|
78
|
+
end
|
73
79
|
#str << " #{ Log.color :yellow, (1/mean).ceil.to_s } avg. #{Log.color :yellow, (1/@mean_max).ceil.to_s} min." if @mean_max > 0
|
74
80
|
end
|
75
81
|
|
@@ -184,6 +190,7 @@ module Log
|
|
184
190
|
@last_time = Time.now
|
185
191
|
@last_count = ticks
|
186
192
|
@last_percent = percent if max and max > 0
|
193
|
+
Log::LAST.replace "progress"
|
187
194
|
save if file
|
188
195
|
end
|
189
196
|
|
@@ -1,7 +1,8 @@
|
|
1
1
|
class RbbtException < StandardError; end
|
2
2
|
class ParameterException < RbbtException; end
|
3
|
-
|
4
|
-
class
|
3
|
+
|
4
|
+
class FieldNotFoundError < StandardError;end
|
5
|
+
class ClosedStream < StandardError; end
|
5
6
|
|
6
7
|
class ProcessFailed < StandardError;
|
7
8
|
def initialize(pid = Process.pid)
|
@@ -26,7 +27,7 @@ end
|
|
26
27
|
class SemaphoreInterrupted < TryAgain; end
|
27
28
|
class LockInterrupted < TryAgain; end
|
28
29
|
|
29
|
-
class RemoteServerError <
|
30
|
+
class RemoteServerError < StandardError; end
|
30
31
|
|
31
32
|
class DependencyError < Aborted
|
32
33
|
def initialize(msg)
|
@@ -72,7 +72,9 @@ module Misc
|
|
72
72
|
when File
|
73
73
|
"<File:" + obj.path + ">"
|
74
74
|
when NamedArray
|
75
|
-
|
75
|
+
fields = obj.fields
|
76
|
+
fields = fields.collect if NamedArray === fields
|
77
|
+
"[<NamedArray: fields=#{fingerprint fields} -- values=#{fingerprint obj[0..-1]}]"
|
76
78
|
when Array
|
77
79
|
if (length = obj.length) > 10
|
78
80
|
"[#{length}--" << (obj.values_at(0,1, length / 2, -2, -1).collect{|e| fingerprint(e)} * ",") << "]"
|
@@ -298,6 +300,8 @@ module Misc
|
|
298
300
|
str = case obj
|
299
301
|
when nil
|
300
302
|
'nil'
|
303
|
+
when Numeric
|
304
|
+
Float === obj && obj % 1 == 0 ? obj.to_i.to_s : obj.to_s
|
301
305
|
when Symbol
|
302
306
|
obj.to_s
|
303
307
|
when TrueClass
|
data/lib/rbbt/util/misc/omics.rb
CHANGED
data/lib/rbbt/util/misc/pipes.rb
CHANGED
@@ -420,7 +420,7 @@ module Misc
|
|
420
420
|
end
|
421
421
|
|
422
422
|
Open.touch path if Open.exists? path
|
423
|
-
content.join if content.respond_to?
|
423
|
+
content.join if content.respond_to?(:join) and not Path === content and not (content.respond_to?(:joined?) && content.joined?)
|
424
424
|
|
425
425
|
Open.notify_write(path)
|
426
426
|
rescue Aborted
|
@@ -22,7 +22,7 @@ module NamedArray
|
|
22
22
|
def self.setup(array, fields, key = nil, entity_options = nil, entity_templates = nil)
|
23
23
|
return array if array.nil?
|
24
24
|
array.extend NamedArray unless NamedArray === array
|
25
|
-
array.fields = Annotated.purge fields
|
25
|
+
array.fields = Annotated === fields ? Annotated.purge(fields) : fields
|
26
26
|
array.key = key
|
27
27
|
array.entity_options = entity_options unless entity_options.nil?
|
28
28
|
array.entity_templates = entity_templates unless entity_templates.nil?
|
data/lib/rbbt/util/python.rb
CHANGED
@@ -1,9 +1,39 @@
|
|
1
1
|
require 'rbbt-util'
|
2
2
|
require 'pycall/import'
|
3
|
+
require 'rbbt/util/python/util'
|
3
4
|
|
4
5
|
module RbbtPython
|
5
6
|
extend PyCall::Import
|
6
7
|
|
8
|
+
def self.script(text, options = {})
|
9
|
+
Log.debug "Running python script:\n#{text.dup}"
|
10
|
+
text = StringIO.new text unless IO === text
|
11
|
+
CMD.cmd_log(:python, options.merge(:in => text))
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.add_path(path)
|
15
|
+
self.run 'sys' do
|
16
|
+
sys.path.append path
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.add_paths(paths)
|
21
|
+
self.run 'sys' do
|
22
|
+
paths.each do |path|
|
23
|
+
sys.path.append path
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.init_rbbt
|
29
|
+
if ! defined?(@@__init_rbbt) || ! @@__init_rbbt
|
30
|
+
Log.debug "Loading python 'rbbt' module into pycall RbbtPython module"
|
31
|
+
RbbtPython.add_paths(Rbbt.python.find_all)
|
32
|
+
RbbtPython.pyimport("rbbt")
|
33
|
+
@@__init_rbbt = true
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
7
37
|
def self.exec(script)
|
8
38
|
PyCall.exec(script)
|
9
39
|
end
|
@@ -63,7 +93,9 @@ module RbbtPython
|
|
63
93
|
|
64
94
|
def self.run_log(mod = nil, imports = nil, severity = 0, severity_err = nil, &block)
|
65
95
|
if mod
|
66
|
-
if
|
96
|
+
if imports == "*" || imports == ["*"]
|
97
|
+
pyfrom mod
|
98
|
+
elsif Array === imports
|
67
99
|
pyfrom mod, :import => imports
|
68
100
|
elsif Hash === imports
|
69
101
|
pyimport mod, imports
|
@@ -92,21 +124,4 @@ module RbbtPython
|
|
92
124
|
module_eval(&block)
|
93
125
|
end
|
94
126
|
end
|
95
|
-
|
96
|
-
def self.add_path(path)
|
97
|
-
self.run 'sys' do
|
98
|
-
sys.path.append path
|
99
|
-
end
|
100
|
-
end
|
101
|
-
|
102
|
-
def self.add_paths(paths)
|
103
|
-
self.run 'sys' do
|
104
|
-
paths.each do |path|
|
105
|
-
sys.path.append path
|
106
|
-
end
|
107
|
-
end
|
108
|
-
end
|
109
|
-
|
110
|
-
RbbtPython.add_paths Rbbt.python.find_all
|
111
|
-
RbbtPython.pyimport "rbbt"
|
112
127
|
end
|
@@ -22,8 +22,13 @@ module Workflow
|
|
22
22
|
:extension => nil)
|
23
23
|
|
24
24
|
|
25
|
-
def helper(name, &block)
|
26
|
-
|
25
|
+
def helper(name, *args, &block)
|
26
|
+
if block_given?
|
27
|
+
helpers[name] = block
|
28
|
+
else
|
29
|
+
raise RbbtException, "helper #{name} unkown in #{self} workflow" unless helpers[name]
|
30
|
+
helpers[name].call(*args)
|
31
|
+
end
|
27
32
|
end
|
28
33
|
|
29
34
|
def desc(description)
|
@@ -148,6 +148,11 @@ job.clean
|
|
148
148
|
def self.upload_inputs(server, inputs, input_types, input_id)
|
149
149
|
TmpFile.with_file do |dir|
|
150
150
|
if Step.save_inputs(inputs, input_types, dir)
|
151
|
+
Dir.glob(File.join(dir, "*.as_step")).each do |file|
|
152
|
+
path = Open.read(file).strip
|
153
|
+
new = Step.migrate(path, :user, :target => server)
|
154
|
+
Open.write(file, new)
|
155
|
+
end
|
151
156
|
CMD.cmd("ssh '#{server}' mkdir -p .rbbt/tmp/tmp-ssh_job_inputs/; scp -r '#{dir}' #{server}:.rbbt/tmp/tmp-ssh_job_inputs/#{input_id}")
|
152
157
|
end
|
153
158
|
end
|
@@ -3,6 +3,7 @@ class RemoteStep
|
|
3
3
|
attr_accessor :override_dependencies
|
4
4
|
|
5
5
|
def init_job(cache_type = nil, other_params = {})
|
6
|
+
return self if @url
|
6
7
|
cache_type = :asynchronous if cache_type.nil? and not @is_exec
|
7
8
|
cache_type = :exec if cache_type.nil?
|
8
9
|
@last_info_time = nil
|
@@ -10,20 +11,17 @@ class RemoteStep
|
|
10
11
|
@server, @server_path = RemoteWorkflow::SSH.parse_url base_url
|
11
12
|
@input_id ||= "inputs-" << rand(100000).to_s
|
12
13
|
|
13
|
-
if override_dependencies
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
name, _sep, value = od.partition("=")
|
18
|
-
inputs[name] = value
|
19
|
-
end
|
14
|
+
if override_dependencies && override_dependencies.any?
|
15
|
+
override_dependencies.each do |od|
|
16
|
+
name, _sep, value = od.partition("=")
|
17
|
+
inputs[name] = value
|
20
18
|
end
|
21
|
-
|
22
|
-
RemoteWorkflow::SSH.upload_inputs(@server, inputs, @input_types, @input_id)
|
23
|
-
else
|
24
|
-
RemoteWorkflow::SSH.upload_inputs(@server, inputs, @input_types, @input_id)
|
25
19
|
end
|
26
20
|
|
21
|
+
inputs.select{|i| Step === i }.each{|i| i.produce }
|
22
|
+
|
23
|
+
RemoteWorkflow::SSH.upload_inputs(@server, inputs, @input_types, @input_id)
|
24
|
+
|
27
25
|
@name ||= Persist.memory("RemoteSteps", :workflow => self, :task => task, :jobname => @name, :inputs => inputs, :cache_type => cache_type) do
|
28
26
|
Misc.insist do
|
29
27
|
input_types = {}
|
@@ -230,7 +230,14 @@ class Step
|
|
230
230
|
if dep_step[step.path] and dep_step[step.path].length > 1
|
231
231
|
stream = step.result
|
232
232
|
other_steps = dep_step[step.path].uniq.reject{|d| d.overriden }
|
233
|
+
|
234
|
+
other_steps = other_steps.collect{|d|
|
235
|
+
deps_using_step_input = d.rec_dependencies.select{|d| d.inputs.include? step }
|
236
|
+
deps_using_step_input.any? ? deps_using_step_input : d
|
237
|
+
}.flatten.uniq
|
238
|
+
|
233
239
|
return unless other_steps.length > 1
|
240
|
+
|
234
241
|
log_dependency_exec(step, "duplicating #{other_steps.length}")
|
235
242
|
copies = Misc.tee_stream_thread_multiple(stream, other_steps.length)
|
236
243
|
copies.extend StreamArray
|
@@ -370,7 +377,6 @@ class Step
|
|
370
377
|
end
|
371
378
|
|
372
379
|
def run_dependencies
|
373
|
-
dep_step = {}
|
374
380
|
|
375
381
|
rec_dependencies = self.rec_dependencies(true) + input_dependencies
|
376
382
|
|
@@ -385,23 +391,30 @@ class Step
|
|
385
391
|
|
386
392
|
canfail_paths = self.canfail_paths
|
387
393
|
|
394
|
+
dep_step = {}
|
388
395
|
seen_paths = Set.new
|
389
396
|
all_deps.uniq.each do |step|
|
390
397
|
next if seen_paths.include? step.path
|
391
398
|
seen_paths << step.path
|
399
|
+
|
392
400
|
begin
|
393
401
|
Step.prepare_for_execution(step) unless step == self
|
394
402
|
rescue DependencyError, DependencyRbbtException
|
395
403
|
raise $! unless canfail_paths.include? step.path
|
396
404
|
end
|
405
|
+
|
397
406
|
next unless step.dependencies and step.dependencies.any?
|
398
|
-
|
407
|
+
|
408
|
+
# ToDo is this really necessary
|
409
|
+
#(step.dependencies + step.input_dependencies).each do |step_dep|
|
410
|
+
step.dependencies.each do |step_dep|
|
399
411
|
next unless step.dependencies.include?(step_dep)
|
400
412
|
next if step_dep.done? or step_dep.running? or
|
401
413
|
(ComputeDependency === step_dep and (step_dep.compute == :nodup or step_dep.compute == :ignore))
|
402
414
|
dep_step[step_dep.path] ||= []
|
403
415
|
dep_step[step_dep.path] << step
|
404
416
|
end
|
417
|
+
|
405
418
|
end
|
406
419
|
|
407
420
|
produced = []
|
@@ -51,8 +51,8 @@ module Workflow
|
|
51
51
|
when :path
|
52
52
|
inputs[input.to_sym] = Open.read(file).strip.split("\n").first
|
53
53
|
when :io
|
54
|
-
inputs[input.to_sym] = Open.open(Open.realpath(file))
|
55
|
-
when :
|
54
|
+
inputs[input.to_sym] = Open.open(Open.realpath(file))
|
55
|
+
when :io_array
|
56
56
|
inputs[input.to_sym] = Open.realpath(file).split("\n").collect{|f| Open.open(f)}
|
57
57
|
when :step_array
|
58
58
|
steps = Open.read(file).strip.split("\n").collect{|path| Workflow.load_step(path) }
|
@@ -97,6 +97,10 @@ module Workflow
|
|
97
97
|
inputs[input.to_sym] = TSV.open(file)
|
98
98
|
when :boolean
|
99
99
|
inputs[input.to_sym] = (file.read.strip == 'true')
|
100
|
+
when :integer
|
101
|
+
inputs[input.to_sym] = file.read.to_i
|
102
|
+
when :float
|
103
|
+
inputs[input.to_sym] = file.read.to_f
|
100
104
|
else
|
101
105
|
Log.debug "Loading #{ input } from #{file}"
|
102
106
|
inputs[input.to_sym] = file.read.strip
|
@@ -125,86 +129,6 @@ module Workflow
|
|
125
129
|
inputs = task_inputs_from_directory(task_name, directory)
|
126
130
|
job(task_name, jobname, inputs)
|
127
131
|
end
|
128
|
-
|
129
|
-
#def self.load_inputs_old(dir, input_names, input_types)
|
130
|
-
# inputs = {}
|
131
|
-
# if File.exists?(dir) && ! File.directory?(dir)
|
132
|
-
# Log.debug "Loading inputs from #{dir}, not a directory trying as tar.gz"
|
133
|
-
# tarfile = dir
|
134
|
-
# digest = CMD.cmd("md5sum '#{tarfile}'").read.split(" ").first
|
135
|
-
# tmpdir = Rbbt.tmp.input_bundle[digest].find
|
136
|
-
# Misc.untar(tarfile, tmpdir) unless File.exists? tmpdir
|
137
|
-
# files = tmpdir.glob("*")
|
138
|
-
# if files.length == 1 && File.directory?(files.first)
|
139
|
-
# tmpdir = files.first
|
140
|
-
# end
|
141
|
-
# load_inputs(tmpdir, input_names, input_types)
|
142
|
-
# else
|
143
|
-
# dir = Path.setup(dir.dup)
|
144
|
-
# input_names.each do |input|
|
145
|
-
# file = dir[input].find
|
146
|
-
# file = dir.glob(input.to_s + ".*").reject{|f| f =~ /\.md5$/}.first if file.nil? or not (File.symlink?(file) || file.exists?)
|
147
|
-
# Log.debug "Trying #{ input }: #{file}"
|
148
|
-
# next unless file and (File.symlink?(file) || file.exists?)
|
149
|
-
|
150
|
-
# type = input_types[input]
|
151
|
-
|
152
|
-
# type = :io if file.split(".").last == 'as_io'
|
153
|
-
|
154
|
-
# type = :path if file.split(".").last == 'as_path'
|
155
|
-
|
156
|
-
# type = :filename if file.split(".").last == 'as_filename'
|
157
|
-
|
158
|
-
# type = :nofile if file.split(".").last == 'nofile'
|
159
|
-
|
160
|
-
# case type
|
161
|
-
# when :nofile
|
162
|
-
# inputs[input.to_sym] = Open.realpath(file)
|
163
|
-
# when :path
|
164
|
-
# inputs[input.to_sym] = Open.realpath(Open.read(file).strip)
|
165
|
-
# when :io
|
166
|
-
# inputs[input.to_sym] = Open.open(Open.realpath(file))
|
167
|
-
# when :file, :binary
|
168
|
-
# Log.debug "Pointing #{ input } to #{file}"
|
169
|
-
# if file =~ /\.yaml/
|
170
|
-
# inputs[input.to_sym] = YAML.load(Open.read(file))
|
171
|
-
# else
|
172
|
-
# if File.symlink?(file)
|
173
|
-
# link_target = File.expand_path(File.readlink(file), File.dirname(file))
|
174
|
-
# inputs[input.to_sym] = link_target
|
175
|
-
# else
|
176
|
-
# inputs[input.to_sym] = Open.realpath(file)
|
177
|
-
# end
|
178
|
-
# end
|
179
|
-
# when :text
|
180
|
-
# Log.debug "Reading #{ input } from #{file}"
|
181
|
-
# inputs[input.to_sym] = Open.read(file)
|
182
|
-
# when :array
|
183
|
-
# Log.debug "Reading array #{ input } from #{file}"
|
184
|
-
# inputs[input.to_sym] = Open.read(file).split("\n")
|
185
|
-
# when :tsv
|
186
|
-
# Log.debug "Opening tsv #{ input } from #{file}"
|
187
|
-
# inputs[input.to_sym] = TSV.open(file)
|
188
|
-
# when :boolean
|
189
|
-
# inputs[input.to_sym] = (file.read.strip == 'true')
|
190
|
-
# else
|
191
|
-
# Log.debug "Loading #{ input } from #{file}"
|
192
|
-
# inputs[input.to_sym] = file.read.strip
|
193
|
-
# end
|
194
|
-
|
195
|
-
# end
|
196
|
-
# inputs = IndiferentHash.setup(inputs)
|
197
|
-
|
198
|
-
# dir.glob("*#*").each do |od|
|
199
|
-
# name = File.basename(od)
|
200
|
-
# value = Open.read(od)
|
201
|
-
# Log.debug "Loading override dependency #{ name } as #{value}"
|
202
|
-
# inputs[name] = value.chomp
|
203
|
-
# end
|
204
|
-
|
205
|
-
# inputs
|
206
|
-
# end
|
207
|
-
#end
|
208
132
|
end
|
209
133
|
|
210
134
|
class Step
|
@@ -252,10 +176,17 @@ class Step
|
|
252
176
|
end
|
253
177
|
|
254
178
|
Log.debug "Saving job input #{name} (#{type}) into #{path}"
|
255
|
-
|
179
|
+
|
180
|
+
if value.respond_to? :filename
|
181
|
+
Open.write(path, value.filename)
|
182
|
+
elsif IO === value
|
183
|
+
Open.write(path, value)
|
184
|
+
else
|
185
|
+
Open.write(path, value.to_s)
|
186
|
+
end
|
256
187
|
end
|
257
188
|
|
258
|
-
def self.save_inputs(inputs, input_types,
|
189
|
+
def self.save_inputs(inputs, input_types, dir)
|
259
190
|
inputs.each do |name,value|
|
260
191
|
type = input_types[name]
|
261
192
|
type = type.to_s if type
|
@@ -300,7 +231,7 @@ class Step
|
|
300
231
|
input_types = IndiferentHash.setup(input_types.merge(:override_dependencies => :array))
|
301
232
|
end
|
302
233
|
|
303
|
-
save_inputs(inputs, input_types,
|
234
|
+
save_inputs(inputs, input_types, dir)
|
304
235
|
|
305
236
|
inputs.keys
|
306
237
|
end
|
data/lib/rbbt/workflow/step.rb
CHANGED
@@ -184,9 +184,13 @@ puts files * "\n"
|
|
184
184
|
|
185
185
|
target = Rbbt.migrate_target_path('var/jobs', search_path, resource, options[:target])
|
186
186
|
|
187
|
+
target_path = File.join(target, *path.split("/")[-3..-1])
|
188
|
+
|
187
189
|
subpath_files.each do |subpath, files|
|
188
190
|
Rbbt.migrate_files([subpath], target, options.merge(:files => files))
|
189
191
|
end
|
192
|
+
|
193
|
+
target_path
|
190
194
|
end
|
191
195
|
|
192
196
|
def self.purge(path, recursive = false, skip_overriden = true)
|
data/share/Rlib/util.R
CHANGED
@@ -232,20 +232,26 @@ rbbt.tsv2tibble <- function(data){
|
|
232
232
|
as_tibble(data, rownames=attr(data, 'key.field'))
|
233
233
|
}
|
234
234
|
|
235
|
-
rbbt.tsv.write <- function(filename, data, key.field = NULL, extra_headers = NULL, eol="\n", ...){
|
235
|
+
rbbt.tsv.write <- function(filename, data, key.field = NULL, extra_headers = NULL, eol="\n", type = 'list', names = NULL, ...){
|
236
236
|
|
237
237
|
if (is.null(key.field)){ key.field = attributes(data)$key.field;}
|
238
238
|
if (is.null(key.field)){ key.field = "ID";}
|
239
239
|
|
240
240
|
f = file(filename, 'wb');
|
241
241
|
|
242
|
+
if (is.null(extra_headers)){
|
243
|
+
extra_headers = paste(":type",type,sep="=")
|
244
|
+
}
|
245
|
+
|
242
246
|
if (!is.null(extra_headers)){
|
243
247
|
extra_headers = paste("#: ", extra_headers, "\n", sep="");
|
244
248
|
cat(extra_headers, file=f);
|
245
249
|
}
|
246
250
|
|
247
251
|
header = paste("#", key.field, sep="");
|
248
|
-
|
252
|
+
if (is.null(names)){ names = colnames(data)}
|
253
|
+
if (is.null(names)){ names = names(data)}
|
254
|
+
for (name in names){ header = paste(header, name, sep="\t");}
|
249
255
|
header = paste(header, "\n", sep="");
|
250
256
|
cat(header, file=f);
|
251
257
|
|
@@ -0,0 +1,76 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'rbbt-util'
|
4
|
+
require 'rbbt/util/simpleopt'
|
5
|
+
|
6
|
+
$0 = "rbbt #{$previous_commands*" "} #{ File.basename(__FILE__) }" if $previous_commands
|
7
|
+
|
8
|
+
options = SOPT.setup <<EOF
|
9
|
+
|
10
|
+
View TSV nicer
|
11
|
+
|
12
|
+
$ #{$0} [options] <filename.tsv|->
|
13
|
+
|
14
|
+
Use - to read from STDIN
|
15
|
+
|
16
|
+
-h--help Print this help
|
17
|
+
|
18
|
+
EOF
|
19
|
+
if options[:help]
|
20
|
+
if defined? rbbt_usage
|
21
|
+
rbbt_usage
|
22
|
+
else
|
23
|
+
puts SOPT.doc
|
24
|
+
end
|
25
|
+
exit 0
|
26
|
+
end
|
27
|
+
|
28
|
+
|
29
|
+
file = ARGV.shift
|
30
|
+
|
31
|
+
file = STDIN if file == '-'
|
32
|
+
|
33
|
+
case
|
34
|
+
when options[:tokyocabinet]
|
35
|
+
tsv = Persist.open_tokyocabinet(file, false)
|
36
|
+
when options[:tokyocabinet_bd]
|
37
|
+
tsv = Persist.open_tokyocabinet(file, false, nil, TokyoCabinet::BDB)
|
38
|
+
else
|
39
|
+
if String === file
|
40
|
+
file = file.dup
|
41
|
+
Path.setup(File.expand_path(file))
|
42
|
+
end
|
43
|
+
tsv = file
|
44
|
+
end
|
45
|
+
|
46
|
+
tsv = TSV.open(tsv) if IO === tsv
|
47
|
+
|
48
|
+
|
49
|
+
begin
|
50
|
+
Workflow.require_workflow "Genomics"
|
51
|
+
tsv = Genomics.job(:names, nil, :tsv => tsv).run
|
52
|
+
rescue
|
53
|
+
end
|
54
|
+
|
55
|
+
puts [Log.highlight(Log.color(:magenta, tsv.key_field)), Log.color(:yellow, tsv.fields * "\t")] * "\t"
|
56
|
+
TSV.traverse tsv do |k,values,fields|
|
57
|
+
fvalues = values.zip(fields).collect do |v,field|
|
58
|
+
if String === v
|
59
|
+
v = v.to_f.round(2) if v =~ /^-?\d+\.\d+$/
|
60
|
+
v = v.to_i if v =~ /^-?\d+$/
|
61
|
+
end
|
62
|
+
|
63
|
+
if Numeric === v
|
64
|
+
if field =~ /p.?value/i
|
65
|
+
v = v.round(4)
|
66
|
+
v < 0.05 ? Log.color(:cyan, v.to_s) : v
|
67
|
+
else
|
68
|
+
v = v.round(2)
|
69
|
+
v < 0 ? Log.color(:red, v.to_s) : Log.color(:green, v.to_s)
|
70
|
+
end
|
71
|
+
else
|
72
|
+
v
|
73
|
+
end
|
74
|
+
end
|
75
|
+
puts [Log.color(:magenta, k), fvalues] * "\t"
|
76
|
+
end
|
@@ -114,7 +114,7 @@ TmpFile.with_file do |app_dir|
|
|
114
114
|
name, _sep, value = pair.partition("=")
|
115
115
|
name = name[1..-1].to_sym if name[0] == ':'
|
116
116
|
value = value.to_i if value =~ /^\d+$/
|
117
|
-
value = true if value == "true"
|
117
|
+
value = true if value.nil? || value == "true"
|
118
118
|
value = false if value == "false"
|
119
119
|
options[name] = value
|
120
120
|
end
|
@@ -598,14 +598,14 @@ when Step
|
|
598
598
|
elsif detach
|
599
599
|
exit! 0
|
600
600
|
else
|
601
|
-
|
601
|
+
res.join if res.running?
|
602
602
|
if %w(float integer string boolean).include?(res.result_type.to_s)
|
603
603
|
out.puts res.load
|
604
604
|
else
|
605
605
|
Open.open(res.path, :mode => 'rb') do |io|
|
606
606
|
Misc.consume_stream(io, false, out)
|
607
607
|
end if Open.exist?(res.path) || Open.remote?(res.path) || Open.ssh?(res.path)
|
608
|
-
end
|
608
|
+
end if res.done?
|
609
609
|
end
|
610
610
|
else
|
611
611
|
if Array === res
|
data/test/rbbt/test_resource.rb
CHANGED
@@ -17,9 +17,18 @@ module TestResource
|
|
17
17
|
file 'foo' do |t|
|
18
18
|
Open.write(t.name, "TEST")
|
19
19
|
end
|
20
|
+
|
21
|
+
rule /.*/ do |t|
|
22
|
+
Open.write(t.name, "bar")
|
23
|
+
end
|
20
24
|
EOF
|
21
25
|
|
22
|
-
claim tmp.test.work.footest, :rake, tmp.test.rakefiles.foo
|
26
|
+
claim tmp.test.work.footest, :rake, TestResource.tmp.test.rakefiles.foo
|
27
|
+
|
28
|
+
claim tmp.test.work.file_proc, :file_proc do |file,filename|
|
29
|
+
Open.write(filename, file)
|
30
|
+
nil
|
31
|
+
end
|
23
32
|
end
|
24
33
|
|
25
34
|
class TestTSV < Test::Unit::TestCase
|
@@ -33,11 +42,12 @@ class TestTSV < Test::Unit::TestCase
|
|
33
42
|
assert TSV === TestResource.tmp.test.test_tsv.tsv
|
34
43
|
end
|
35
44
|
|
36
|
-
def
|
37
|
-
TestResource.tmp.test.work.footest.foo.read
|
38
|
-
|
45
|
+
def test_rake
|
46
|
+
assert_equal TestResource.tmp.test.work.footest.foo.read, "TEST"
|
47
|
+
assert_equal TestResource.tmp.test.work.footest.bar.read, "bar"
|
39
48
|
end
|
40
49
|
|
50
|
+
|
41
51
|
def test_proc
|
42
52
|
assert TestResource.tmp.test.proc.read == "PROC TEST"
|
43
53
|
end
|
@@ -88,8 +88,28 @@ class TestMiscOmics < Test::Unit::TestCase
|
|
88
88
|
index = Misc.index_BED(io, dir)
|
89
89
|
assert_equal ["2:2"], index["2:220:230"]
|
90
90
|
end
|
91
|
+
end
|
91
92
|
|
93
|
+
def test_sort_genomic_locations
|
94
|
+
mutations =<<-EOF.split("\n").shuffle
|
95
|
+
1:100:A
|
96
|
+
1:20:A
|
97
|
+
1:300:A
|
98
|
+
2:100:A
|
99
|
+
2:20:A
|
100
|
+
2:300:A
|
101
|
+
10:100:A
|
102
|
+
10:20:A
|
103
|
+
10:300:A
|
104
|
+
EOF
|
105
|
+
sorted = Misc.sort_mutation_stream(StringIO.new(mutations * "\n")).read.split("\n")
|
106
|
+
strict_sorted = Misc.sort_mutation_stream_strict(StringIO.new(mutations * "\n")).read.split("\n")
|
92
107
|
|
93
|
-
|
108
|
+
assert sorted.index("1:20:A") < sorted.index("1:100:A")
|
109
|
+
assert sorted.index("1:300:A") < sorted.index("10:300:A")
|
110
|
+
assert sorted.index("10:300:A") < sorted.index("2:300:A")
|
111
|
+
assert strict_sorted.index("1:20:A") < strict_sorted.index("1:100:A")
|
112
|
+
assert strict_sorted.index("1:300:A") < strict_sorted.index("10:300:A")
|
113
|
+
assert strict_sorted.index("2:300:A") < strict_sorted.index("10:300:A")
|
94
114
|
end
|
95
115
|
end
|
@@ -107,6 +107,23 @@ row1 A B C
|
|
107
107
|
assert_equal %w(## ## ## #Row row1 row2 row3), sorted.read.split("\n").collect{|l| l.split(" ").first}
|
108
108
|
end
|
109
109
|
|
110
|
+
def test_sort_long_stream
|
111
|
+
text =<<-EOF
|
112
|
+
##
|
113
|
+
##
|
114
|
+
##
|
115
|
+
#Row LabelA LabelB LabelC
|
116
|
+
row2 AA BB CC
|
117
|
+
row3 AAA BBB CCC
|
118
|
+
row1 A B C
|
119
|
+
EOF
|
120
|
+
|
121
|
+
s = StringIO.new text + (text.split("\n")[-3..-1] * "\n" + "\n") * 10000
|
122
|
+
sorted = Misc.sort_stream(s)
|
123
|
+
assert_equal %w(## ## ## #Row row2 row3 row1), text.split("\n").collect{|l| l.split(" ").first}
|
124
|
+
assert_equal %w(## ## ## #Row row1 row2 row3), sorted.read.split("\n").collect{|l| l.split(" ").first}
|
125
|
+
end
|
126
|
+
|
110
127
|
def test_sort_stream2
|
111
128
|
text =<<-EOF
|
112
129
|
##
|
@@ -318,7 +335,9 @@ line4
|
|
318
335
|
|
319
336
|
TmpFile.with_file do |tmp|
|
320
337
|
#Misc.consume_stream(sout, false, tmp)
|
321
|
-
|
338
|
+
assert_raise do
|
339
|
+
Open.write(tmp, sout)
|
340
|
+
end
|
322
341
|
end
|
323
342
|
end
|
324
343
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-util
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.34.
|
4
|
+
version: 5.34.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-07-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -164,6 +164,20 @@ dependencies:
|
|
164
164
|
- - ">="
|
165
165
|
- !ruby/object:Gem::Version
|
166
166
|
version: '0'
|
167
|
+
- !ruby/object:Gem::Dependency
|
168
|
+
name: method_source
|
169
|
+
requirement: !ruby/object:Gem::Requirement
|
170
|
+
requirements:
|
171
|
+
- - ">="
|
172
|
+
- !ruby/object:Gem::Version
|
173
|
+
version: '0'
|
174
|
+
type: :runtime
|
175
|
+
prerelease: false
|
176
|
+
version_requirements: !ruby/object:Gem::Requirement
|
177
|
+
requirements:
|
178
|
+
- - ">="
|
179
|
+
- !ruby/object:Gem::Version
|
180
|
+
version: '0'
|
167
181
|
description: Utilities for handling tsv files, caches, etc
|
168
182
|
email: miguel.vazquez.g@bsc.es
|
169
183
|
executables:
|
@@ -445,6 +459,7 @@ files:
|
|
445
459
|
- share/rbbt_commands/tsv/uncollapse
|
446
460
|
- share/rbbt_commands/tsv/unzip
|
447
461
|
- share/rbbt_commands/tsv/values
|
462
|
+
- share/rbbt_commands/tsv/view
|
448
463
|
- share/rbbt_commands/tsv/write_excel
|
449
464
|
- share/rbbt_commands/tsv/zip
|
450
465
|
- share/rbbt_commands/watch
|