rbbt-util 5.27.6 → 5.27.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/rbbt/entity.rb +1 -1
- data/lib/rbbt/fix_width_table.rb +1 -1
- data/lib/rbbt/persist.rb +2 -2
- data/lib/rbbt/tsv/attach.rb +34 -30
- data/lib/rbbt/tsv/attach/util.rb +17 -0
- data/lib/rbbt/tsv/stream.rb +1 -0
- data/lib/rbbt/util/misc/development.rb +6 -4
- data/lib/rbbt/util/misc/format.rb +1 -1
- data/lib/rbbt/util/misc/pipes.rb +37 -7
- data/lib/rbbt/util/tmpfile.rb +4 -4
- data/lib/rbbt/workflow.rb +2 -5
- data/lib/rbbt/workflow/accessor.rb +1 -1
- data/lib/rbbt/workflow/remote_workflow/driver/rest.rb +6 -3
- data/lib/rbbt/workflow/remote_workflow/remote_step.rb +12 -4
- data/lib/rbbt/workflow/step/dependencies.rb +1 -1
- data/lib/rbbt/workflow/usage.rb +1 -1
- data/share/rbbt_commands/app/start +4 -0
- data/share/rbbt_commands/system/clean +2 -2
- data/share/rbbt_commands/tsv/uncollapse +29 -0
- data/share/rbbt_commands/workflow/info +1 -1
- data/share/rbbt_commands/workflow/server +4 -8
- data/share/rbbt_commands/workflow/task +2 -0
- data/test/rbbt/tsv/test_attach.rb +3 -2
- data/test/rbbt/tsv/test_stream.rb +4 -3
- data/test/rbbt/util/misc/test_format.rb +10 -0
- data/test/rbbt/util/misc/test_pipes.rb +1 -1
- data/test/rbbt/workflow/step/test_dependencies.rb +68 -11
- metadata +5 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: be6c76069ffd5a6a49876a42c95f42e7c63975e287446d786c7e4db154456790
|
|
4
|
+
data.tar.gz: c6bdbb33d38b424dc6355e6dc70a15a4bfefc65b7331d2eb471476b8522ac3a3
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 1bccd1104a3b2e6236073ca6cd267dba65c0b51a16a6af87c0ea09e1361b8eaf7a9b1a6c7d9bb7e35da3ab4a235ebb578ccd9ce64fd1cdf562282d81a78a3edf
|
|
7
|
+
data.tar.gz: edf6d4339a6ddafbc0e88d8356686fe7986ae5983ff4a7905be45f419b8d4887b0ed44332f4a537da6cf3e8a3909ce6c687dd0745c70485ff00a7231f9ee0062
|
data/lib/rbbt/entity.rb
CHANGED
|
@@ -261,7 +261,7 @@ module Entity
|
|
|
261
261
|
|
|
262
262
|
define_method method_name do |*args|
|
|
263
263
|
id = self.id
|
|
264
|
-
persist_name = orig_method_name.to_s
|
|
264
|
+
persist_name = orig_method_name.to_s + ":" << (Array === id ? Misc.obj2digest(id) : id)
|
|
265
265
|
|
|
266
266
|
persist_options = options
|
|
267
267
|
persist_options = persist_options.merge(:other => {:args => args}) if args and args.any?
|
data/lib/rbbt/fix_width_table.rb
CHANGED
|
@@ -28,7 +28,7 @@ class FixWidthTable
|
|
|
28
28
|
else
|
|
29
29
|
Log.debug "FixWidthTable up-to-date: #{ filename } - (in_memory:#{in_memory})"
|
|
30
30
|
if in_memory
|
|
31
|
-
@file = Open.open(@filename, :mode => 'r:ASCII-
|
|
31
|
+
@file = Open.open(@filename, :mode => 'r:ASCII-8BIT'){|f| StringIO.new f.read}
|
|
32
32
|
else
|
|
33
33
|
@file = File.open(@filename, 'r:ASCII-8BIT')
|
|
34
34
|
end
|
data/lib/rbbt/persist.rb
CHANGED
|
@@ -365,12 +365,12 @@ module Persist
|
|
|
365
365
|
type ||= :marshal
|
|
366
366
|
|
|
367
367
|
persist_options ||= {}
|
|
368
|
-
if type == :memory
|
|
368
|
+
if type == :memory && persist_options[:file] && persist_options[:persist]
|
|
369
369
|
repo = persist_options[:repo] || Persist::MEMORY
|
|
370
370
|
if persist_options[:persist] == :update || persist_options[:update]
|
|
371
371
|
repo.delete persist_options[:file]
|
|
372
372
|
end
|
|
373
|
-
return repo[persist_options[:file]] ||= yield
|
|
373
|
+
return repo[persist_options[:file]] ||= yield
|
|
374
374
|
end
|
|
375
375
|
|
|
376
376
|
if FalseClass === persist_options[:persist]
|
data/lib/rbbt/tsv/attach.rb
CHANGED
|
@@ -32,7 +32,8 @@ module TSV
|
|
|
32
32
|
done = false
|
|
33
33
|
Open.write(output) do |os|
|
|
34
34
|
options.delete :sep if options[:sep] == "\t"
|
|
35
|
-
|
|
35
|
+
header_lines = TSV.header_lines(key_field, fields, options)
|
|
36
|
+
os.puts header_lines unless header_lines.empty?
|
|
36
37
|
|
|
37
38
|
while line
|
|
38
39
|
key, *parts = line.sub("\n",'').split(sep, -1)
|
|
@@ -212,6 +213,35 @@ module TSV
|
|
|
212
213
|
other_filename = other.respond_to?(:filename) ? other.filename : other.inspect
|
|
213
214
|
Log.low("Attaching fields:#{Misc.fingerprint fields } from #{other_filename}.")
|
|
214
215
|
|
|
216
|
+
same_key = true
|
|
217
|
+
begin
|
|
218
|
+
case
|
|
219
|
+
when (Misc.match_fields(key_field, other.key_field) and same_key)
|
|
220
|
+
Log.debug "Attachment with same key: #{other.key_field}"
|
|
221
|
+
attach_same_key other, fields
|
|
222
|
+
when (not in_namespace and self.fields.select{|f| Misc.match_fields(f, other.key_field)}.any?)
|
|
223
|
+
Log.debug "Found other key field: #{other.key_field}"
|
|
224
|
+
attach_source_key other, other.key_field, :fields => fields, :one2one => one2one
|
|
225
|
+
when (in_namespace and self.fields_in_namespace.select{|f| Misc.match_fields(f, other.key_field)}.any?)
|
|
226
|
+
Log.debug "Found other key field in #{in_namespace}: #{other.key_field}"
|
|
227
|
+
attach_source_key other, other.key_field, :fields => fields, :one2one => one2one
|
|
228
|
+
else
|
|
229
|
+
index = TSV.find_traversal(self, other, options)
|
|
230
|
+
raise FieldNotFoundError, "Cannot traverse identifiers" if index.nil?
|
|
231
|
+
Log.debug "Attachment with index: #{other.key_field}"
|
|
232
|
+
attach_index other, index, fields
|
|
233
|
+
end
|
|
234
|
+
rescue Exception
|
|
235
|
+
if same_key
|
|
236
|
+
Log.warn "Could not translate identifiers with same_key"
|
|
237
|
+
same_key = false
|
|
238
|
+
retry
|
|
239
|
+
else
|
|
240
|
+
raise $!
|
|
241
|
+
end
|
|
242
|
+
end
|
|
243
|
+
Log.debug("Attachment of fields:#{Misc.fingerprint fields } from #{other.filename.inspect} finished.")
|
|
244
|
+
|
|
215
245
|
if complete
|
|
216
246
|
fill = TrueClass === complete ? nil : complete
|
|
217
247
|
field_length = self.fields.length
|
|
@@ -219,6 +249,9 @@ module TSV
|
|
|
219
249
|
other_common_pos = common_fields.collect{|f| other.fields.index f}
|
|
220
250
|
this_common_pos = common_fields.collect{|f| self.fields.index f}
|
|
221
251
|
missing = other.keys - self.keys
|
|
252
|
+
|
|
253
|
+
other = other.to_list if other.type == :single
|
|
254
|
+
|
|
222
255
|
case type
|
|
223
256
|
when :single
|
|
224
257
|
missing.each do |k|
|
|
@@ -249,35 +282,6 @@ module TSV
|
|
|
249
282
|
end
|
|
250
283
|
end
|
|
251
284
|
|
|
252
|
-
same_key = true
|
|
253
|
-
begin
|
|
254
|
-
case
|
|
255
|
-
when (Misc.match_fields(key_field, other.key_field) and same_key)
|
|
256
|
-
Log.debug "Attachment with same key: #{other.key_field}"
|
|
257
|
-
attach_same_key other, fields
|
|
258
|
-
when (not in_namespace and self.fields.select{|f| Misc.match_fields(f, other.key_field)}.any?)
|
|
259
|
-
Log.debug "Found other key field: #{other.key_field}"
|
|
260
|
-
attach_source_key other, other.key_field, :fields => fields, :one2one => one2one
|
|
261
|
-
when (in_namespace and self.fields_in_namespace.select{|f| Misc.match_fields(f, other.key_field)}.any?)
|
|
262
|
-
Log.debug "Found other key field in #{in_namespace}: #{other.key_field}"
|
|
263
|
-
attach_source_key other, other.key_field, :fields => fields, :one2one => one2one
|
|
264
|
-
else
|
|
265
|
-
index = TSV.find_traversal(self, other, options)
|
|
266
|
-
raise FieldNotFoundError, "Cannot traverse identifiers" if index.nil?
|
|
267
|
-
Log.debug "Attachment with index: #{other.key_field}"
|
|
268
|
-
attach_index other, index, fields
|
|
269
|
-
end
|
|
270
|
-
rescue Exception
|
|
271
|
-
if same_key
|
|
272
|
-
Log.warn "Could not translate identifiers with same_key"
|
|
273
|
-
same_key = false
|
|
274
|
-
retry
|
|
275
|
-
else
|
|
276
|
-
raise $!
|
|
277
|
-
end
|
|
278
|
-
end
|
|
279
|
-
Log.debug("Attachment of fields:#{Misc.fingerprint fields } from #{other.filename.inspect} finished.")
|
|
280
|
-
|
|
281
285
|
self
|
|
282
286
|
end
|
|
283
287
|
|
data/lib/rbbt/tsv/attach/util.rb
CHANGED
|
@@ -4,9 +4,20 @@ module TSV
|
|
|
4
4
|
fields = other.fields - [key_field].concat(self.fields) if fields.nil?
|
|
5
5
|
|
|
6
6
|
fields = [fields].compact unless Array === fields
|
|
7
|
+
|
|
8
|
+
common_fields = self.fields & fields
|
|
9
|
+
|
|
10
|
+
fields = fields - common_fields
|
|
11
|
+
|
|
7
12
|
num_fields = fields.length
|
|
8
13
|
|
|
9
14
|
field_positions = fields.collect{|field| other.identify_field field}
|
|
15
|
+
|
|
16
|
+
if common_fields.any?
|
|
17
|
+
common_field_positions = common_fields.collect{|field| self.identify_field field}
|
|
18
|
+
common_field_positions_other = common_fields.collect{|field| other.identify_field field}
|
|
19
|
+
end
|
|
20
|
+
|
|
10
21
|
other.with_unnamed do
|
|
11
22
|
with_unnamed do
|
|
12
23
|
through do |key, values|
|
|
@@ -28,6 +39,12 @@ module TSV
|
|
|
28
39
|
new_values = field_positions.collect do |pos|
|
|
29
40
|
pos == :key ? key : other_values[pos]
|
|
30
41
|
end
|
|
42
|
+
|
|
43
|
+
if common_fields.any?
|
|
44
|
+
common_field_positions.zip(common_field_positions_other).each do |p1,p2|
|
|
45
|
+
current[p1] += other_values[p2]
|
|
46
|
+
end
|
|
47
|
+
end
|
|
31
48
|
end
|
|
32
49
|
|
|
33
50
|
new_values.collect!{|v| [v]} if type == :double and not (other.type == :double or other.type == :flat)
|
data/lib/rbbt/tsv/stream.rb
CHANGED
|
@@ -292,14 +292,16 @@ def self.add_libdir(dir=nil)
|
|
|
292
292
|
end
|
|
293
293
|
end
|
|
294
294
|
|
|
295
|
-
def self.bootstrap(elems, num =
|
|
295
|
+
def self.bootstrap(elems, num = nil, options = {}, &block)
|
|
296
296
|
IndiferentHash.setup options
|
|
297
|
+
|
|
298
|
+
num = Rbbt::Config.get :cpus, :default_bootstrap_cpus, :bootstrap_cpus if num == :current || num == nil
|
|
297
299
|
num = :current if num.nil?
|
|
298
300
|
cpus = case num
|
|
299
301
|
when :current
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
302
|
+
n = Etc.nprocessors
|
|
303
|
+
n = elems.length / 2 if n > elems.length/2
|
|
304
|
+
n
|
|
303
305
|
when String
|
|
304
306
|
num.to_i
|
|
305
307
|
when Integer
|
|
@@ -163,7 +163,7 @@ module Misc
|
|
|
163
163
|
|
|
164
164
|
def self.fixutf8(string)
|
|
165
165
|
return nil if string.nil?
|
|
166
|
-
return string if string.respond_to?(:encoding) && string.encoding == "UTF-8" && (string.respond_to?(:valid_encoding?) && string.valid_encoding?) ||
|
|
166
|
+
return string if string.respond_to?(:encoding) && string.encoding.to_s == "UTF-8" && (string.respond_to?(:valid_encoding?) && string.valid_encoding?) ||
|
|
167
167
|
(string.respond_to?(:valid_encoding) && string.valid_encoding)
|
|
168
168
|
|
|
169
169
|
if string.respond_to?(:encode)
|
data/lib/rbbt/util/misc/pipes.rb
CHANGED
|
@@ -18,6 +18,8 @@ module Misc
|
|
|
18
18
|
|
|
19
19
|
BLOCK_SIZE=1024 * 8
|
|
20
20
|
|
|
21
|
+
SKIP_TAG="[SKIP TAG]"
|
|
22
|
+
|
|
21
23
|
PIPE_MUTEX = Mutex.new
|
|
22
24
|
|
|
23
25
|
OPEN_PIPE_IN = []
|
|
@@ -29,7 +31,7 @@ module Misc
|
|
|
29
31
|
|
|
30
32
|
[sout, sin]
|
|
31
33
|
end
|
|
32
|
-
Log.debug{"Creating pipe #{[res.last.inspect,res.first.inspect] * " => "}"}
|
|
34
|
+
Log.debug{"Creating pipe #{[res.last.inspect, res.first.inspect] * " => "}"}
|
|
33
35
|
res
|
|
34
36
|
end
|
|
35
37
|
|
|
@@ -255,6 +257,11 @@ module Misc
|
|
|
255
257
|
end
|
|
256
258
|
tee1, *rest = Misc.tee_stream stream_dup, num + 1
|
|
257
259
|
stream.reopen(tee1)
|
|
260
|
+
|
|
261
|
+
#ToDo: I can't explain why the @threads variable appears with the value of
|
|
262
|
+
# @filename
|
|
263
|
+
stream.instance_variable_set(:@threads, nil) if stream.instance_variables.include?(:@threads)
|
|
264
|
+
|
|
258
265
|
tee1.annotate(stream)
|
|
259
266
|
rest
|
|
260
267
|
end
|
|
@@ -537,18 +544,29 @@ module Misc
|
|
|
537
544
|
end
|
|
538
545
|
end
|
|
539
546
|
|
|
547
|
+
def self.buffer_stream(stream)
|
|
548
|
+
sout, sin = Misc.pipe
|
|
549
|
+
Misc.consume_stream(stream, true, sin)
|
|
550
|
+
sout
|
|
551
|
+
end
|
|
552
|
+
|
|
540
553
|
def self._paste_streams(streams, output, lines = nil, sep = "\t", header = nil, &block)
|
|
541
554
|
output.puts header if header
|
|
542
555
|
streams = streams.collect do |stream|
|
|
543
556
|
if defined? Step and Step === stream
|
|
544
|
-
stream.get_stream
|
|
557
|
+
io = stream.get_stream
|
|
558
|
+
if io
|
|
559
|
+
buffer_stream(io)
|
|
560
|
+
else
|
|
561
|
+
stream.join.path.open
|
|
562
|
+
end
|
|
545
563
|
else
|
|
546
564
|
stream
|
|
547
565
|
end
|
|
548
566
|
end
|
|
549
567
|
|
|
550
568
|
begin
|
|
551
|
-
|
|
569
|
+
|
|
552
570
|
lines ||= streams.collect{|s| s.gets }
|
|
553
571
|
keys = []
|
|
554
572
|
parts = []
|
|
@@ -564,6 +582,7 @@ module Misc
|
|
|
564
582
|
end
|
|
565
583
|
sizes = parts.collect{|p| p.nil? ? 0 : p.length }
|
|
566
584
|
last_min = nil
|
|
585
|
+
|
|
567
586
|
while lines.compact.any?
|
|
568
587
|
if block_given?
|
|
569
588
|
min = keys.compact.sort(&block).first
|
|
@@ -571,14 +590,23 @@ module Misc
|
|
|
571
590
|
min = keys.compact.sort.first
|
|
572
591
|
end
|
|
573
592
|
str = []
|
|
593
|
+
threads = []
|
|
574
594
|
keys.each_with_index do |key,i|
|
|
575
595
|
case key
|
|
576
596
|
when min
|
|
577
|
-
|
|
597
|
+
if parts[i] == [SKIP_TAG]
|
|
598
|
+
str << [sep * (sizes[i]-1)] if sizes[i] > 0
|
|
599
|
+
else
|
|
600
|
+
str << [parts[i] * sep]
|
|
601
|
+
end
|
|
602
|
+
|
|
578
603
|
line = lines[i] = streams[i].gets
|
|
579
|
-
|
|
604
|
+
|
|
605
|
+
if line.nil?
|
|
580
606
|
keys[i] = nil
|
|
581
607
|
parts[i] = nil
|
|
608
|
+
streams[i].close unless streams[i].closed?
|
|
609
|
+
streams[i].join if streams[i].respond_to?(:join)
|
|
582
610
|
else
|
|
583
611
|
k, *p = line.chomp.split(sep, -1)
|
|
584
612
|
keys[i] = k
|
|
@@ -589,10 +617,12 @@ module Misc
|
|
|
589
617
|
end
|
|
590
618
|
end
|
|
591
619
|
|
|
592
|
-
output.puts [min, str*sep] * sep
|
|
620
|
+
output.puts [min, str.flatten*sep] * sep
|
|
593
621
|
end
|
|
622
|
+
|
|
594
623
|
streams.each do |stream|
|
|
595
|
-
stream.
|
|
624
|
+
stream.close unless stream.closed?
|
|
625
|
+
stream.join if stream.respond_to?(:join)
|
|
596
626
|
end
|
|
597
627
|
rescue
|
|
598
628
|
Log.exception $!
|
data/lib/rbbt/util/tmpfile.rb
CHANGED
|
@@ -17,14 +17,14 @@ module TmpFile
|
|
|
17
17
|
|
|
18
18
|
# Creates a random file name, with the given suffix and a random number
|
|
19
19
|
# up to +max+
|
|
20
|
-
def self.random_name(s = "tmp-", max =
|
|
20
|
+
def self.random_name(s = "tmp-", max = 1_000_000_000)
|
|
21
21
|
n = rand(max)
|
|
22
22
|
s + n.to_s
|
|
23
23
|
end
|
|
24
24
|
|
|
25
25
|
# Creates a random filename in the temporary directory
|
|
26
|
-
def self.tmp_file(s = "tmp-", max=
|
|
27
|
-
File.expand_path(File.join(dir, random_name(s,max)))
|
|
26
|
+
def self.tmp_file(s = "tmp-", max=1_000_000_000, dir = TMPDIR)
|
|
27
|
+
File.expand_path(File.join(dir, random_name(s, max)))
|
|
28
28
|
end
|
|
29
29
|
|
|
30
30
|
def self.with_file(content = nil, erase = true, options = {})
|
|
@@ -33,7 +33,7 @@ module TmpFile
|
|
|
33
33
|
|
|
34
34
|
prefix = options[:prefix] || "tmp-"
|
|
35
35
|
tmpdir = options[:tmpdir] || TMPDIR
|
|
36
|
-
max = options[:max] ||
|
|
36
|
+
max = options[:max] || 1_000_000_000
|
|
37
37
|
tmpfile = tmp_file prefix, max, tmpdir
|
|
38
38
|
if options[:extension]
|
|
39
39
|
tmpfile += ".#{options[:extension]}"
|
data/lib/rbbt/workflow.rb
CHANGED
|
@@ -10,9 +10,6 @@ require 'rbbt/workflow/util/provenance'
|
|
|
10
10
|
|
|
11
11
|
module Workflow
|
|
12
12
|
|
|
13
|
-
STEP_CACHE = {}
|
|
14
|
-
LOAD_STEP_CACHE = {}
|
|
15
|
-
|
|
16
13
|
class TaskNotFoundException < Exception
|
|
17
14
|
def initialize(workflow, task = nil)
|
|
18
15
|
if task
|
|
@@ -264,11 +261,11 @@ module Workflow
|
|
|
264
261
|
end
|
|
265
262
|
|
|
266
263
|
def step_cache
|
|
267
|
-
|
|
264
|
+
Thread.current[:step_cache] ||= {}
|
|
268
265
|
end
|
|
269
266
|
|
|
270
267
|
def self.load_step_cache
|
|
271
|
-
|
|
268
|
+
Thread.current[:load_step_cache] ||= {}
|
|
272
269
|
end
|
|
273
270
|
|
|
274
271
|
|
|
@@ -418,7 +418,7 @@ module Workflow
|
|
|
418
418
|
when :hash
|
|
419
419
|
clean_inputs = Annotated.purge(inputs)
|
|
420
420
|
clean_inputs = clean_inputs.collect{|i| Symbol === i ? i.to_s : i }
|
|
421
|
-
deps_str = dependencies.collect{|d| Step === d ? d.short_path : d }
|
|
421
|
+
deps_str = dependencies.collect{|d| (Step === d || (defined?(RemoteStep) && RemoteStep === Step)) ? "Step: " << d.short_path : d }
|
|
422
422
|
key_obj = {:inputs => clean_inputs, :dependencies => deps_str }
|
|
423
423
|
key_str = Misc.obj2str(key_obj)
|
|
424
424
|
hash_str = Misc.digest(key_str)
|
|
@@ -137,14 +137,16 @@ class RemoteWorkflow
|
|
|
137
137
|
|
|
138
138
|
post_thread = Thread.new(Thread.current) do |parent|
|
|
139
139
|
bl = lambda do |rok|
|
|
140
|
-
|
|
140
|
+
case rok
|
|
141
|
+
when Net::HTTPOK
|
|
141
142
|
_url = rok["RBBT-STREAMING-JOB-URL"]
|
|
142
143
|
@url = File.join(task_url, File.basename(_url)) if _url
|
|
143
144
|
rok.read_body do |c,_a, _b|
|
|
144
145
|
sin.write c
|
|
145
146
|
end
|
|
146
147
|
sin.close
|
|
147
|
-
|
|
148
|
+
when Net::HTTPRedirection, Net::HTTPAccepted
|
|
149
|
+
Thread.current.report_on_exception = false
|
|
148
150
|
raise TryThis.new(rok)
|
|
149
151
|
else
|
|
150
152
|
err = StringIO.new
|
|
@@ -178,7 +180,8 @@ class RemoteWorkflow
|
|
|
178
180
|
begin
|
|
179
181
|
RestClient::Request.execute(:method => :post, :url => task_url, :payload => task_params, :block_response => bl)
|
|
180
182
|
rescue TryThis
|
|
181
|
-
|
|
183
|
+
url = $!.payload["location"]
|
|
184
|
+
RestClient::Request.execute(:method => :get, :url => url, :block_response => bl)
|
|
182
185
|
end
|
|
183
186
|
end
|
|
184
187
|
|
|
@@ -26,8 +26,8 @@ class RemoteStep < Step
|
|
|
26
26
|
|
|
27
27
|
def cache_file
|
|
28
28
|
begin
|
|
29
|
-
digest = Misc.obj2digest([base_url, task, base_name, inputs])
|
|
30
|
-
Rbbt.var.cache.REST[[
|
|
29
|
+
digest = Misc.obj2digest([base_url, task.to_s, base_name, inputs])
|
|
30
|
+
Rbbt.var.cache.REST[task.to_s][[clean_name, digest].compact * "."].find
|
|
31
31
|
rescue
|
|
32
32
|
Log.exception $!
|
|
33
33
|
raise $!
|
|
@@ -143,7 +143,11 @@ class RemoteStep < Step
|
|
|
143
143
|
@info = Persist.memory("RemoteSteps Info", :url => @url, :persist => true, :update => update) do
|
|
144
144
|
@last_info_time = Time.now
|
|
145
145
|
init_job unless @url
|
|
146
|
-
info =
|
|
146
|
+
info = begin
|
|
147
|
+
@adaptor.get_json(File.join(@url, 'info'))
|
|
148
|
+
rescue
|
|
149
|
+
{:status => :noinfo}
|
|
150
|
+
end
|
|
147
151
|
info = RemoteWorkflow.fix_hash(info)
|
|
148
152
|
info[:status] = info[:status].to_sym if String === info[:status]
|
|
149
153
|
info
|
|
@@ -243,6 +247,7 @@ class RemoteStep < Step
|
|
|
243
247
|
def join
|
|
244
248
|
return true if cache_files.any?
|
|
245
249
|
init_job unless @url
|
|
250
|
+
produce unless @started
|
|
246
251
|
Log.debug{ "Joining RemoteStep: #{path}" }
|
|
247
252
|
|
|
248
253
|
if IO === @result
|
|
@@ -258,7 +263,6 @@ class RemoteStep < Step
|
|
|
258
263
|
sleep 1 unless self.done? || self.aborted? || self.error?
|
|
259
264
|
while not (self.done? || self.aborted? || self.error?)
|
|
260
265
|
sleep 3
|
|
261
|
-
iif [self.done?, self.status, self.info]
|
|
262
266
|
end
|
|
263
267
|
end
|
|
264
268
|
|
|
@@ -311,6 +315,10 @@ class RemoteStep < Step
|
|
|
311
315
|
end
|
|
312
316
|
end
|
|
313
317
|
|
|
318
|
+
def short_path
|
|
319
|
+
init_job unless @url
|
|
320
|
+
[@task, @name] * "/"
|
|
321
|
+
end
|
|
314
322
|
|
|
315
323
|
def input_checks
|
|
316
324
|
[]
|
|
@@ -366,7 +366,7 @@ class Step
|
|
|
366
366
|
(step.dependencies + step.input_dependencies).each do |step_dep|
|
|
367
367
|
next if step_dep.done? or step_dep.running? or (ComputeDependency === step_dep and (step_dep.compute == :nodup or step_dep.compute == :ignore))
|
|
368
368
|
dep_step[step_dep.path] ||= []
|
|
369
|
-
dep_step[step_dep.path] <<
|
|
369
|
+
dep_step[step_dep.path] << step
|
|
370
370
|
end
|
|
371
371
|
end
|
|
372
372
|
|
data/lib/rbbt/workflow/usage.rb
CHANGED
|
@@ -84,6 +84,10 @@ Misc.in_dir(app_dir) do
|
|
|
84
84
|
end
|
|
85
85
|
end
|
|
86
86
|
|
|
87
|
+
fixed_options = {}
|
|
88
|
+
options.each do |k,v| fixed_options[k.to_sym] = v end
|
|
89
|
+
options = fixed_options
|
|
90
|
+
|
|
87
91
|
case server
|
|
88
92
|
when 'passenger'
|
|
89
93
|
system ENV, "env RBBT_LOG=0 passenger start -R '#{config_ru_file}' -p #{options[:Port] || "2887"}"
|
|
@@ -97,7 +97,7 @@ TSV.traverse jobs do |file,i|
|
|
|
97
97
|
else
|
|
98
98
|
info = begin
|
|
99
99
|
Open.open(i[:info_file]) do |f|
|
|
100
|
-
Step::
|
|
100
|
+
Step::INFO_SERIALIZER.load(f)
|
|
101
101
|
end
|
|
102
102
|
rescue
|
|
103
103
|
{:status => :noinfo}
|
|
@@ -134,7 +134,7 @@ TSV.traverse jobs do |file,i|
|
|
|
134
134
|
end
|
|
135
135
|
end
|
|
136
136
|
|
|
137
|
-
if (force
|
|
137
|
+
if (force && status !~ /done/) or
|
|
138
138
|
status =~ /\b(old|dirty|nopid|error|missing|aborted|dead|sync)$/ or
|
|
139
139
|
(status == "noinfo" and not done) or
|
|
140
140
|
status == ""
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
|
|
3
|
+
require 'rbbt-util'
|
|
4
|
+
require 'rbbt/util/simpleopt'
|
|
5
|
+
|
|
6
|
+
options = SOPT.setup <<EOF
|
|
7
|
+
Read a TSV file and dump it
|
|
8
|
+
|
|
9
|
+
$ rbbt tsv read [options] <filename.tsv|->
|
|
10
|
+
|
|
11
|
+
Use - to read from STDIN
|
|
12
|
+
|
|
13
|
+
-k--key_field* Key field
|
|
14
|
+
-f--fields* Fields
|
|
15
|
+
-t--type* Type
|
|
16
|
+
-m--merge* Merge from multiple rows
|
|
17
|
+
-h--help Print this help
|
|
18
|
+
|
|
19
|
+
EOF
|
|
20
|
+
rbbt_usage and exit 0 if options[:help]
|
|
21
|
+
|
|
22
|
+
file = ARGV.shift
|
|
23
|
+
|
|
24
|
+
file = STDIN if file == '-'
|
|
25
|
+
|
|
26
|
+
tsv = TSV.open file, :merge => true, :type => :double
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
puts tsv.to_unmerged_expanded_s
|
|
@@ -153,7 +153,7 @@ if recursive
|
|
|
153
153
|
dep = deps.shift
|
|
154
154
|
inputs = {} if inputs.nil?
|
|
155
155
|
inputs = inputs.merge(dep.info[:inputs] || {})
|
|
156
|
-
deps = deps.concat dep.dependencies
|
|
156
|
+
deps = deps.concat dep.dependencies if dep.dependencies
|
|
157
157
|
end
|
|
158
158
|
|
|
159
159
|
inputs = inputs.merge step.archived_inputs
|
|
@@ -123,15 +123,11 @@ TmpFile.with_file do |app_dir|
|
|
|
123
123
|
else
|
|
124
124
|
options[:config] = config_ru_file
|
|
125
125
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
clean_options[k.to_sym] = v
|
|
130
|
-
rescue
|
|
131
|
-
end
|
|
132
|
-
end
|
|
126
|
+
fixed_options = {}
|
|
127
|
+
options.each do |k,v| fixed_options[k.to_sym] = v end
|
|
128
|
+
options = fixed_options
|
|
133
129
|
|
|
134
|
-
Rack::Server.start(
|
|
130
|
+
Rack::Server.start(options)
|
|
135
131
|
end
|
|
136
132
|
end
|
|
137
133
|
end
|
|
@@ -213,8 +213,9 @@ row2 E
|
|
|
213
213
|
|
|
214
214
|
tsv1 = tsv2 = nil
|
|
215
215
|
|
|
216
|
-
tsv1 = Rbbt.tmp.test.test1.data.tsv :double, :sep => /\s+/
|
|
217
|
-
tsv2 = Rbbt.tmp.test.test2.data.tsv :double, :sep => /\s+/
|
|
216
|
+
tsv1 = Rbbt.tmp.test.test1.data.produce(true).tsv :double, :sep => /\s+/
|
|
217
|
+
tsv2 = Rbbt.tmp.test.test2.data.produce(true).tsv :double, :sep => /\s+/
|
|
218
|
+
Log.tsv tsv2
|
|
218
219
|
|
|
219
220
|
tsv2.identifiers = Rbbt.tmp.test.test2.identifiers.produce.find #.to_s
|
|
220
221
|
|
|
@@ -4,6 +4,7 @@ require 'rbbt/tsv/stream'
|
|
|
4
4
|
require 'rbbt'
|
|
5
5
|
|
|
6
6
|
class TestStream < Test::Unit::TestCase
|
|
7
|
+
|
|
7
8
|
def test_collapse_stream
|
|
8
9
|
text=<<-EOF
|
|
9
10
|
#: :sep=" "
|
|
@@ -65,7 +66,7 @@ row3 AAA BBB CCC
|
|
|
65
66
|
|
|
66
67
|
text2=<<-EOF
|
|
67
68
|
#: :sep=" "
|
|
68
|
-
#Row Labela Labelb
|
|
69
|
+
#Row Labela Labelb
|
|
69
70
|
row1 a b
|
|
70
71
|
row3 aaa bbb
|
|
71
72
|
row2 aa bb
|
|
@@ -100,7 +101,7 @@ row1 A B C
|
|
|
100
101
|
|
|
101
102
|
text2=<<-EOF
|
|
102
103
|
#: :sep=" "
|
|
103
|
-
#Row Labela Labelb
|
|
104
|
+
#Row Labela Labelb
|
|
104
105
|
row2 aa bb
|
|
105
106
|
EOF
|
|
106
107
|
|
|
@@ -132,7 +133,7 @@ row1 A B C
|
|
|
132
133
|
|
|
133
134
|
text2=<<-EOF
|
|
134
135
|
#: :sep=" "
|
|
135
|
-
#Row Labela Labelb
|
|
136
|
+
#Row Labela Labelb
|
|
136
137
|
row2 aa bb
|
|
137
138
|
EOF
|
|
138
139
|
|
|
@@ -4,6 +4,27 @@ require 'rbbt/workflow'
|
|
|
4
4
|
module DepWorkflow
|
|
5
5
|
extend Workflow
|
|
6
6
|
|
|
7
|
+
input :input_file, :file, "Input file", nil, :stream => true
|
|
8
|
+
task :s1 => :array do |input_file|
|
|
9
|
+
TSV.traverse input_file, :type => :array, :into => :stream, :bar => "Task1" do |line|
|
|
10
|
+
line + "\t" << "Task1"
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
dep :s1
|
|
15
|
+
task :s2 => :array do |input_file|
|
|
16
|
+
TSV.traverse step(:s1), :type => :array, :into => :stream, :bar => "Task2" do |line|
|
|
17
|
+
next [line.split("\t").first, Misc::SKIP_TAG] * "\t" if rand < 0.9
|
|
18
|
+
line + "\t" << "Task2"
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
dep :s1
|
|
23
|
+
dep :s2
|
|
24
|
+
task :s3 => :array do |input_file|
|
|
25
|
+
Misc.paste_streams(dependencies.reverse)
|
|
26
|
+
end
|
|
27
|
+
|
|
7
28
|
input :input_file, :file, "Input file", nil, :stream => true
|
|
8
29
|
task :task1 => :array do |input_file|
|
|
9
30
|
TSV.traverse input_file, :type => :array, :into => :stream, :bar => "Task1" do |line|
|
|
@@ -28,9 +49,7 @@ module DepWorkflow
|
|
|
28
49
|
dep :task2
|
|
29
50
|
dep :task3
|
|
30
51
|
task :task4 => :array do
|
|
31
|
-
|
|
32
|
-
s2 = TSV.get_stream step(:task3)
|
|
33
|
-
Misc.paste_streams([s1, s2])
|
|
52
|
+
Misc.paste_streams(dependencies)
|
|
34
53
|
end
|
|
35
54
|
|
|
36
55
|
dep :task4
|
|
@@ -40,12 +59,10 @@ module DepWorkflow
|
|
|
40
59
|
end
|
|
41
60
|
end
|
|
42
61
|
|
|
43
|
-
dep :task5
|
|
44
62
|
dep :task2
|
|
63
|
+
dep :task5
|
|
45
64
|
task :task6 => :array do
|
|
46
|
-
|
|
47
|
-
s2 = TSV.get_stream step(:task5)
|
|
48
|
-
Misc.paste_streams([s1, s2])
|
|
65
|
+
Misc.paste_streams(dependencies)
|
|
49
66
|
end
|
|
50
67
|
|
|
51
68
|
input :stream_file, :file, "Streamed file", nil, :stream => true
|
|
@@ -134,8 +151,25 @@ class TestWorkflowDependency < Test::Unit::TestCase
|
|
|
134
151
|
end
|
|
135
152
|
end
|
|
136
153
|
|
|
154
|
+
def test_task3
|
|
155
|
+
size = 100000
|
|
156
|
+
content = (0..size).to_a.collect{|num| "Line #{num}" } * "\n"
|
|
157
|
+
TmpFile.with_file(content) do |input_file|
|
|
158
|
+
job = DepWorkflow.job(:task3, "TEST", :input_file => input_file)
|
|
159
|
+
io = TSV.get_stream job.run(:stream)
|
|
160
|
+
last_line = nil
|
|
161
|
+
while line = io.gets
|
|
162
|
+
last_line = line.strip
|
|
163
|
+
end
|
|
164
|
+
io.join
|
|
165
|
+
|
|
166
|
+
assert_equal "Line #{size}\tTask1\tTask3", last_line
|
|
167
|
+
end
|
|
168
|
+
end
|
|
169
|
+
|
|
137
170
|
def test_task4
|
|
138
|
-
size =
|
|
171
|
+
size = 100000
|
|
172
|
+
Log.severity = 0
|
|
139
173
|
content = (0..size).to_a.collect{|num| "Line #{num}" } * "\n"
|
|
140
174
|
last_line = nil
|
|
141
175
|
TmpFile.with_file(content) do |input_file|
|
|
@@ -151,7 +185,7 @@ class TestWorkflowDependency < Test::Unit::TestCase
|
|
|
151
185
|
end
|
|
152
186
|
|
|
153
187
|
def test_task5
|
|
154
|
-
size =
|
|
188
|
+
size = 10000
|
|
155
189
|
content = (0..size).to_a.collect{|num| "Line #{num}" } * "\n"
|
|
156
190
|
last_line = nil
|
|
157
191
|
TmpFile.with_file(content) do |input_file|
|
|
@@ -165,9 +199,32 @@ class TestWorkflowDependency < Test::Unit::TestCase
|
|
|
165
199
|
assert_equal "Line #{size}\tTask1\tTask2\tTask1\tTask3\tTask5", last_line
|
|
166
200
|
end
|
|
167
201
|
|
|
202
|
+
def test_s3
|
|
203
|
+
size = 100000
|
|
204
|
+
content = (1..size).to_a.collect{|num| "Line #{num}" } * "\n"
|
|
205
|
+
last_line = nil
|
|
206
|
+
Log.severity = 0
|
|
207
|
+
TmpFile.with_file(content) do |input_file|
|
|
208
|
+
begin
|
|
209
|
+
job = DepWorkflow.job(:s3, "TEST", :input_file => input_file)
|
|
210
|
+
job.recursive_clean
|
|
211
|
+
job.run(:stream)
|
|
212
|
+
io = TSV.get_stream job
|
|
213
|
+
while line = io.gets
|
|
214
|
+
last_line = line.strip
|
|
215
|
+
end
|
|
216
|
+
io.join if io.respond_to? :join
|
|
217
|
+
rescue Exception
|
|
218
|
+
job.abort
|
|
219
|
+
raise $!
|
|
220
|
+
end
|
|
221
|
+
end
|
|
222
|
+
assert last_line.include? "Line #{size}"
|
|
223
|
+
end
|
|
224
|
+
|
|
168
225
|
def test_task6
|
|
169
226
|
size = 100000
|
|
170
|
-
content = (
|
|
227
|
+
content = (1..size).to_a.collect{|num| "Line #{num}" } * "\n"
|
|
171
228
|
last_line = nil
|
|
172
229
|
Log.severity = 0
|
|
173
230
|
TmpFile.with_file(content) do |input_file|
|
|
@@ -189,7 +246,7 @@ class TestWorkflowDependency < Test::Unit::TestCase
|
|
|
189
246
|
end
|
|
190
247
|
|
|
191
248
|
def test_task8
|
|
192
|
-
size =
|
|
249
|
+
size = 10000
|
|
193
250
|
content = (0..size).to_a.collect{|num| "Line #{num}" } * "\n"
|
|
194
251
|
last_line = nil
|
|
195
252
|
Log.severity = 0
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: rbbt-util
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 5.27.
|
|
4
|
+
version: 5.27.11
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Miguel Vazquez
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2020-
|
|
11
|
+
date: 2020-07-13 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rake
|
|
@@ -403,6 +403,7 @@ files:
|
|
|
403
403
|
- share/rbbt_commands/tsv/sort
|
|
404
404
|
- share/rbbt_commands/tsv/subset
|
|
405
405
|
- share/rbbt_commands/tsv/transpose
|
|
406
|
+
- share/rbbt_commands/tsv/uncollapse
|
|
406
407
|
- share/rbbt_commands/tsv/unzip
|
|
407
408
|
- share/rbbt_commands/tsv/values
|
|
408
409
|
- share/rbbt_commands/tsv/write_excel
|
|
@@ -487,6 +488,7 @@ files:
|
|
|
487
488
|
- test/rbbt/util/concurrency/test_threads.rb
|
|
488
489
|
- test/rbbt/util/log/test_progress.rb
|
|
489
490
|
- test/rbbt/util/misc/test_bgzf.rb
|
|
491
|
+
- test/rbbt/util/misc/test_format.rb
|
|
490
492
|
- test/rbbt/util/misc/test_lock.rb
|
|
491
493
|
- test/rbbt/util/misc/test_multipart_payload.rb
|
|
492
494
|
- test/rbbt/util/misc/test_omics.rb
|
|
@@ -552,6 +554,7 @@ test_files:
|
|
|
552
554
|
- test/rbbt/util/test_colorize.rb
|
|
553
555
|
- test/rbbt/util/misc/test_omics.rb
|
|
554
556
|
- test/rbbt/util/misc/test_pipes.rb
|
|
557
|
+
- test/rbbt/util/misc/test_format.rb
|
|
555
558
|
- test/rbbt/util/misc/test_lock.rb
|
|
556
559
|
- test/rbbt/util/misc/test_multipart_payload.rb
|
|
557
560
|
- test/rbbt/util/misc/test_bgzf.rb
|