rbbt-util 5.34.2 → 5.34.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/LICENSE +1 -1
- data/lib/rbbt/association/database.rb +2 -0
- data/lib/rbbt/association/open.rb +1 -1
- data/lib/rbbt/tsv/parallel/traverse.rb +1 -1
- data/lib/rbbt/tsv/parser.rb +1 -1
- data/lib/rbbt/util/log/progress/report.rb +9 -2
- data/lib/rbbt/util/misc/exceptions.rb +4 -3
- data/lib/rbbt/util/misc/inspect.rb +2 -0
- data/lib/rbbt/util/named_array.rb +1 -1
- data/lib/rbbt/util/python.rb +24 -17
- data/lib/rbbt/workflow/dependencies.rb +1 -0
- data/lib/rbbt/workflow/step/dependencies.rb +15 -2
- data/lib/rbbt/workflow/step/save_load_inputs.rb +0 -80
- data/share/rbbt_commands/workflow/server +1 -1
- data/share/rbbt_commands/workflow/task +2 -2
- data/test/rbbt/util/misc/test_omics.rb +21 -1
- data/test/rbbt/util/misc/test_pipes.rb +20 -1
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ac036317d84e57e31c930adcd9f6f3b83951fd858772a59d04045f6b83c747f1
|
4
|
+
data.tar.gz: 072e7053018bb6d38b7db1ed3164f5fe89e534264918bf1657154ef6d689f5f1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e399e00198996cfc6958e897b41359824c3277511a9ddd402cb46a8f7cebbe99ce560ffa3f645afb7e64983f1fb1c31cb1d0269adde2333e3f43f5764b9a5393
|
7
|
+
data.tar.gz: 3341baac06e6aa8f2517d1bc812a89f3ac86d30f13483c394b7557bb20fcc36ca4846402d18e83587ee6329047bc9445892e99cf4a94d48fed9555ebef2b4804
|
data/LICENSE
CHANGED
@@ -86,6 +86,8 @@ module Association
|
|
86
86
|
info_fields = field_pos.collect{|f| f == :key ? :key : all_fields[f]}
|
87
87
|
options = options.merge({:key_field => source_field, :fields => info_fields})
|
88
88
|
|
89
|
+
fields = field_headers if fields.nil?
|
90
|
+
|
89
91
|
data = options[:data] || {}
|
90
92
|
TmpFile.with_file do |tmpfile|
|
91
93
|
tmp_data = Persist.open_database(tmpfile, true, :double, "HDB")
|
@@ -24,7 +24,7 @@ module Association
|
|
24
24
|
options = options.dup
|
25
25
|
data.serializer = :double if data.respond_to? :serializer
|
26
26
|
|
27
|
-
tsv = Association.database(file, options.merge(:
|
27
|
+
tsv = Association.database(file, options.merge(:unnamed => true, :data => data, :type => :double))
|
28
28
|
|
29
29
|
data
|
30
30
|
end
|
data/lib/rbbt/tsv/parser.rb
CHANGED
@@ -650,7 +650,7 @@ module TSV
|
|
650
650
|
end
|
651
651
|
end
|
652
652
|
ensure
|
653
|
-
Log::ProgressBar.remove_bar(progress_monitor)
|
653
|
+
Log::ProgressBar.remove_bar(progress_monitor) if progress_monitor
|
654
654
|
stream.close unless stream.closed?
|
655
655
|
stream.join if stream.respond_to? :join and not stream.joined?
|
656
656
|
end
|
@@ -65,11 +65,17 @@ module Log
|
|
65
65
|
|
66
66
|
thr = 0.0000001 if thr == 0
|
67
67
|
|
68
|
-
if mean.nil? or mean.to_i >
|
68
|
+
if mean.nil? or mean.to_i > 2
|
69
69
|
str = "#{ Log.color :blue, thr.to_i.to_s } per sec."
|
70
70
|
#str << " #{ Log.color :yellow, mean.to_i.to_s } avg. #{Log.color :yellow, @mean_max.to_i.to_s} max." if @mean_max > 0
|
71
71
|
else
|
72
|
-
|
72
|
+
if 1.0/thr < 1
|
73
|
+
str = "#{ Log.color :blue, (1.0/thr).round(2).to_s } secs each"
|
74
|
+
elsif 1.0/thr < 2
|
75
|
+
str = "#{ Log.color :blue, (1.0/thr).round(1).to_s } secs each"
|
76
|
+
else
|
77
|
+
str = "#{ Log.color :blue, (1/thr).ceil.to_s } secs each"
|
78
|
+
end
|
73
79
|
#str << " #{ Log.color :yellow, (1/mean).ceil.to_s } avg. #{Log.color :yellow, (1/@mean_max).ceil.to_s} min." if @mean_max > 0
|
74
80
|
end
|
75
81
|
|
@@ -184,6 +190,7 @@ module Log
|
|
184
190
|
@last_time = Time.now
|
185
191
|
@last_count = ticks
|
186
192
|
@last_percent = percent if max and max > 0
|
193
|
+
Log::LAST.replace "progress"
|
187
194
|
save if file
|
188
195
|
end
|
189
196
|
|
@@ -1,7 +1,8 @@
|
|
1
1
|
class RbbtException < StandardError; end
|
2
2
|
class ParameterException < RbbtException; end
|
3
|
-
|
4
|
-
class
|
3
|
+
|
4
|
+
class FieldNotFoundError < StandardError;end
|
5
|
+
class ClosedStream < StandardError; end
|
5
6
|
|
6
7
|
class ProcessFailed < StandardError;
|
7
8
|
def initialize(pid = Process.pid)
|
@@ -26,7 +27,7 @@ end
|
|
26
27
|
class SemaphoreInterrupted < TryAgain; end
|
27
28
|
class LockInterrupted < TryAgain; end
|
28
29
|
|
29
|
-
class RemoteServerError <
|
30
|
+
class RemoteServerError < StandardError; end
|
30
31
|
|
31
32
|
class DependencyError < Aborted
|
32
33
|
def initialize(msg)
|
@@ -22,7 +22,7 @@ module NamedArray
|
|
22
22
|
def self.setup(array, fields, key = nil, entity_options = nil, entity_templates = nil)
|
23
23
|
return array if array.nil?
|
24
24
|
array.extend NamedArray unless NamedArray === array
|
25
|
-
array.fields = Annotated.purge fields
|
25
|
+
array.fields = Annotated === fields ? Annotated.purge(fields) : fields
|
26
26
|
array.key = key
|
27
27
|
array.entity_options = entity_options unless entity_options.nil?
|
28
28
|
array.entity_templates = entity_templates unless entity_templates.nil?
|
data/lib/rbbt/util/python.rb
CHANGED
@@ -4,6 +4,30 @@ require 'pycall/import'
|
|
4
4
|
module RbbtPython
|
5
5
|
extend PyCall::Import
|
6
6
|
|
7
|
+
|
8
|
+
def self.add_path(path)
|
9
|
+
self.run 'sys' do
|
10
|
+
sys.path.append path
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.add_paths(paths)
|
15
|
+
self.run 'sys' do
|
16
|
+
paths.each do |path|
|
17
|
+
sys.path.append path
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def self.init_rbbt
|
23
|
+
if ! defined?(@@__init_rbbt) || ! @@__init_rbbt
|
24
|
+
Log.debug "Loading python 'rbbt' module into pycall RbbtPython module"
|
25
|
+
RbbtPython.add_paths(Rbbt.python.find_all)
|
26
|
+
RbbtPython.pyimport("rbbt")
|
27
|
+
@@__init_rbbt = true
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
7
31
|
def self.exec(script)
|
8
32
|
PyCall.exec(script)
|
9
33
|
end
|
@@ -92,21 +116,4 @@ module RbbtPython
|
|
92
116
|
module_eval(&block)
|
93
117
|
end
|
94
118
|
end
|
95
|
-
|
96
|
-
def self.add_path(path)
|
97
|
-
self.run 'sys' do
|
98
|
-
sys.path.append path
|
99
|
-
end
|
100
|
-
end
|
101
|
-
|
102
|
-
def self.add_paths(paths)
|
103
|
-
self.run 'sys' do
|
104
|
-
paths.each do |path|
|
105
|
-
sys.path.append path
|
106
|
-
end
|
107
|
-
end
|
108
|
-
end
|
109
|
-
|
110
|
-
RbbtPython.add_paths Rbbt.python.find_all
|
111
|
-
RbbtPython.pyimport "rbbt"
|
112
119
|
end
|
@@ -230,7 +230,14 @@ class Step
|
|
230
230
|
if dep_step[step.path] and dep_step[step.path].length > 1
|
231
231
|
stream = step.result
|
232
232
|
other_steps = dep_step[step.path].uniq.reject{|d| d.overriden }
|
233
|
+
|
234
|
+
other_steps = other_steps.collect{|d|
|
235
|
+
deps_using_step_input = d.rec_dependencies.select{|d| d.inputs.include? step }
|
236
|
+
deps_using_step_input.any? ? deps_using_step_input : d
|
237
|
+
}.flatten.uniq
|
238
|
+
|
233
239
|
return unless other_steps.length > 1
|
240
|
+
|
234
241
|
log_dependency_exec(step, "duplicating #{other_steps.length}")
|
235
242
|
copies = Misc.tee_stream_thread_multiple(stream, other_steps.length)
|
236
243
|
copies.extend StreamArray
|
@@ -370,7 +377,6 @@ class Step
|
|
370
377
|
end
|
371
378
|
|
372
379
|
def run_dependencies
|
373
|
-
dep_step = {}
|
374
380
|
|
375
381
|
rec_dependencies = self.rec_dependencies(true) + input_dependencies
|
376
382
|
|
@@ -385,23 +391,30 @@ class Step
|
|
385
391
|
|
386
392
|
canfail_paths = self.canfail_paths
|
387
393
|
|
394
|
+
dep_step = {}
|
388
395
|
seen_paths = Set.new
|
389
396
|
all_deps.uniq.each do |step|
|
390
397
|
next if seen_paths.include? step.path
|
391
398
|
seen_paths << step.path
|
399
|
+
|
392
400
|
begin
|
393
401
|
Step.prepare_for_execution(step) unless step == self
|
394
402
|
rescue DependencyError, DependencyRbbtException
|
395
403
|
raise $! unless canfail_paths.include? step.path
|
396
404
|
end
|
405
|
+
|
397
406
|
next unless step.dependencies and step.dependencies.any?
|
398
|
-
|
407
|
+
|
408
|
+
# ToDo is this really necessary
|
409
|
+
#(step.dependencies + step.input_dependencies).each do |step_dep|
|
410
|
+
step.dependencies.each do |step_dep|
|
399
411
|
next unless step.dependencies.include?(step_dep)
|
400
412
|
next if step_dep.done? or step_dep.running? or
|
401
413
|
(ComputeDependency === step_dep and (step_dep.compute == :nodup or step_dep.compute == :ignore))
|
402
414
|
dep_step[step_dep.path] ||= []
|
403
415
|
dep_step[step_dep.path] << step
|
404
416
|
end
|
417
|
+
|
405
418
|
end
|
406
419
|
|
407
420
|
produced = []
|
@@ -125,86 +125,6 @@ module Workflow
|
|
125
125
|
inputs = task_inputs_from_directory(task_name, directory)
|
126
126
|
job(task_name, jobname, inputs)
|
127
127
|
end
|
128
|
-
|
129
|
-
#def self.load_inputs_old(dir, input_names, input_types)
|
130
|
-
# inputs = {}
|
131
|
-
# if File.exists?(dir) && ! File.directory?(dir)
|
132
|
-
# Log.debug "Loading inputs from #{dir}, not a directory trying as tar.gz"
|
133
|
-
# tarfile = dir
|
134
|
-
# digest = CMD.cmd("md5sum '#{tarfile}'").read.split(" ").first
|
135
|
-
# tmpdir = Rbbt.tmp.input_bundle[digest].find
|
136
|
-
# Misc.untar(tarfile, tmpdir) unless File.exists? tmpdir
|
137
|
-
# files = tmpdir.glob("*")
|
138
|
-
# if files.length == 1 && File.directory?(files.first)
|
139
|
-
# tmpdir = files.first
|
140
|
-
# end
|
141
|
-
# load_inputs(tmpdir, input_names, input_types)
|
142
|
-
# else
|
143
|
-
# dir = Path.setup(dir.dup)
|
144
|
-
# input_names.each do |input|
|
145
|
-
# file = dir[input].find
|
146
|
-
# file = dir.glob(input.to_s + ".*").reject{|f| f =~ /\.md5$/}.first if file.nil? or not (File.symlink?(file) || file.exists?)
|
147
|
-
# Log.debug "Trying #{ input }: #{file}"
|
148
|
-
# next unless file and (File.symlink?(file) || file.exists?)
|
149
|
-
|
150
|
-
# type = input_types[input]
|
151
|
-
|
152
|
-
# type = :io if file.split(".").last == 'as_io'
|
153
|
-
|
154
|
-
# type = :path if file.split(".").last == 'as_path'
|
155
|
-
|
156
|
-
# type = :filename if file.split(".").last == 'as_filename'
|
157
|
-
|
158
|
-
# type = :nofile if file.split(".").last == 'nofile'
|
159
|
-
|
160
|
-
# case type
|
161
|
-
# when :nofile
|
162
|
-
# inputs[input.to_sym] = Open.realpath(file)
|
163
|
-
# when :path
|
164
|
-
# inputs[input.to_sym] = Open.realpath(Open.read(file).strip)
|
165
|
-
# when :io
|
166
|
-
# inputs[input.to_sym] = Open.open(Open.realpath(file))
|
167
|
-
# when :file, :binary
|
168
|
-
# Log.debug "Pointing #{ input } to #{file}"
|
169
|
-
# if file =~ /\.yaml/
|
170
|
-
# inputs[input.to_sym] = YAML.load(Open.read(file))
|
171
|
-
# else
|
172
|
-
# if File.symlink?(file)
|
173
|
-
# link_target = File.expand_path(File.readlink(file), File.dirname(file))
|
174
|
-
# inputs[input.to_sym] = link_target
|
175
|
-
# else
|
176
|
-
# inputs[input.to_sym] = Open.realpath(file)
|
177
|
-
# end
|
178
|
-
# end
|
179
|
-
# when :text
|
180
|
-
# Log.debug "Reading #{ input } from #{file}"
|
181
|
-
# inputs[input.to_sym] = Open.read(file)
|
182
|
-
# when :array
|
183
|
-
# Log.debug "Reading array #{ input } from #{file}"
|
184
|
-
# inputs[input.to_sym] = Open.read(file).split("\n")
|
185
|
-
# when :tsv
|
186
|
-
# Log.debug "Opening tsv #{ input } from #{file}"
|
187
|
-
# inputs[input.to_sym] = TSV.open(file)
|
188
|
-
# when :boolean
|
189
|
-
# inputs[input.to_sym] = (file.read.strip == 'true')
|
190
|
-
# else
|
191
|
-
# Log.debug "Loading #{ input } from #{file}"
|
192
|
-
# inputs[input.to_sym] = file.read.strip
|
193
|
-
# end
|
194
|
-
|
195
|
-
# end
|
196
|
-
# inputs = IndiferentHash.setup(inputs)
|
197
|
-
|
198
|
-
# dir.glob("*#*").each do |od|
|
199
|
-
# name = File.basename(od)
|
200
|
-
# value = Open.read(od)
|
201
|
-
# Log.debug "Loading override dependency #{ name } as #{value}"
|
202
|
-
# inputs[name] = value.chomp
|
203
|
-
# end
|
204
|
-
|
205
|
-
# inputs
|
206
|
-
# end
|
207
|
-
#end
|
208
128
|
end
|
209
129
|
|
210
130
|
class Step
|
@@ -114,7 +114,7 @@ TmpFile.with_file do |app_dir|
|
|
114
114
|
name, _sep, value = pair.partition("=")
|
115
115
|
name = name[1..-1].to_sym if name[0] == ':'
|
116
116
|
value = value.to_i if value =~ /^\d+$/
|
117
|
-
value = true if value == "true"
|
117
|
+
value = true if value.nil? || value == "true"
|
118
118
|
value = false if value == "false"
|
119
119
|
options[name] = value
|
120
120
|
end
|
@@ -598,14 +598,14 @@ when Step
|
|
598
598
|
elsif detach
|
599
599
|
exit! 0
|
600
600
|
else
|
601
|
-
|
601
|
+
res.join if res.running?
|
602
602
|
if %w(float integer string boolean).include?(res.result_type.to_s)
|
603
603
|
out.puts res.load
|
604
604
|
else
|
605
605
|
Open.open(res.path, :mode => 'rb') do |io|
|
606
606
|
Misc.consume_stream(io, false, out)
|
607
607
|
end if Open.exist?(res.path) || Open.remote?(res.path) || Open.ssh?(res.path)
|
608
|
-
end
|
608
|
+
end if res.done?
|
609
609
|
end
|
610
610
|
else
|
611
611
|
if Array === res
|
@@ -88,8 +88,28 @@ class TestMiscOmics < Test::Unit::TestCase
|
|
88
88
|
index = Misc.index_BED(io, dir)
|
89
89
|
assert_equal ["2:2"], index["2:220:230"]
|
90
90
|
end
|
91
|
+
end
|
91
92
|
|
93
|
+
def test_sort_genomic_locations
|
94
|
+
mutations =<<-EOF.split("\n").shuffle
|
95
|
+
1:100:A
|
96
|
+
1:20:A
|
97
|
+
1:300:A
|
98
|
+
2:100:A
|
99
|
+
2:20:A
|
100
|
+
2:300:A
|
101
|
+
10:100:A
|
102
|
+
10:20:A
|
103
|
+
10:300:A
|
104
|
+
EOF
|
105
|
+
sorted = Misc.sort_mutation_stream(StringIO.new(mutations * "\n")).read.split("\n")
|
106
|
+
strict_sorted = Misc.sort_mutation_stream_strict(StringIO.new(mutations * "\n")).read.split("\n")
|
92
107
|
|
93
|
-
|
108
|
+
assert sorted.index("1:20:A") < sorted.index("1:100:A")
|
109
|
+
assert sorted.index("1:300:A") < sorted.index("10:300:A")
|
110
|
+
assert sorted.index("10:300:A") < sorted.index("2:300:A")
|
111
|
+
assert strict_sorted.index("1:20:A") < strict_sorted.index("1:100:A")
|
112
|
+
assert strict_sorted.index("1:300:A") < strict_sorted.index("10:300:A")
|
113
|
+
assert strict_sorted.index("2:300:A") < strict_sorted.index("10:300:A")
|
94
114
|
end
|
95
115
|
end
|
@@ -107,6 +107,23 @@ row1 A B C
|
|
107
107
|
assert_equal %w(## ## ## #Row row1 row2 row3), sorted.read.split("\n").collect{|l| l.split(" ").first}
|
108
108
|
end
|
109
109
|
|
110
|
+
def test_sort_long_stream
|
111
|
+
text =<<-EOF
|
112
|
+
##
|
113
|
+
##
|
114
|
+
##
|
115
|
+
#Row LabelA LabelB LabelC
|
116
|
+
row2 AA BB CC
|
117
|
+
row3 AAA BBB CCC
|
118
|
+
row1 A B C
|
119
|
+
EOF
|
120
|
+
|
121
|
+
s = StringIO.new text + (text.split("\n")[-3..-1] * "\n" + "\n") * 10000
|
122
|
+
sorted = Misc.sort_stream(s)
|
123
|
+
assert_equal %w(## ## ## #Row row2 row3 row1), text.split("\n").collect{|l| l.split(" ").first}
|
124
|
+
assert_equal %w(## ## ## #Row row1 row2 row3), sorted.read.split("\n").collect{|l| l.split(" ").first}
|
125
|
+
end
|
126
|
+
|
110
127
|
def test_sort_stream2
|
111
128
|
text =<<-EOF
|
112
129
|
##
|
@@ -318,7 +335,9 @@ line4
|
|
318
335
|
|
319
336
|
TmpFile.with_file do |tmp|
|
320
337
|
#Misc.consume_stream(sout, false, tmp)
|
321
|
-
|
338
|
+
assert_raise do
|
339
|
+
Open.write(tmp, sout)
|
340
|
+
end
|
322
341
|
end
|
323
342
|
end
|
324
343
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-util
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.34.
|
4
|
+
version: 5.34.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-07-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -164,6 +164,20 @@ dependencies:
|
|
164
164
|
- - ">="
|
165
165
|
- !ruby/object:Gem::Version
|
166
166
|
version: '0'
|
167
|
+
- !ruby/object:Gem::Dependency
|
168
|
+
name: method_source
|
169
|
+
requirement: !ruby/object:Gem::Requirement
|
170
|
+
requirements:
|
171
|
+
- - ">="
|
172
|
+
- !ruby/object:Gem::Version
|
173
|
+
version: '0'
|
174
|
+
type: :runtime
|
175
|
+
prerelease: false
|
176
|
+
version_requirements: !ruby/object:Gem::Requirement
|
177
|
+
requirements:
|
178
|
+
- - ">="
|
179
|
+
- !ruby/object:Gem::Version
|
180
|
+
version: '0'
|
167
181
|
description: Utilities for handling tsv files, caches, etc
|
168
182
|
email: miguel.vazquez.g@bsc.es
|
169
183
|
executables:
|