rbbt-util 5.27.6 → 5.27.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fe7c3dec4d2333e77ea396a2d6e572512bf7033999ff4715f1a1c6c9d3d1b9d5
4
- data.tar.gz: 32a3d905c1a95f9c7e5cb359855f8781509cfeecdd8db7b77ee29eaea528d030
3
+ metadata.gz: be6c76069ffd5a6a49876a42c95f42e7c63975e287446d786c7e4db154456790
4
+ data.tar.gz: c6bdbb33d38b424dc6355e6dc70a15a4bfefc65b7331d2eb471476b8522ac3a3
5
5
  SHA512:
6
- metadata.gz: d009024642898f775d423e1ddede6aea538e8ed5ff4675eedadd64768e67b0a46f6519038378a221db76b0e196cfc713ca8dc7553788cdb9a51014c14a97de4c
7
- data.tar.gz: 5c0612feca84b00e6d402f7a68ca1cebd6f4b7a785b40899bdaed3385a3f68853824dcc98d9cfd2e641768ef5ca3d52e38e41bdbc1d7a3751ae288e8f8d7ed2d
6
+ metadata.gz: 1bccd1104a3b2e6236073ca6cd267dba65c0b51a16a6af87c0ea09e1361b8eaf7a9b1a6c7d9bb7e35da3ab4a235ebb578ccd9ce64fd1cdf562282d81a78a3edf
7
+ data.tar.gz: edf6d4339a6ddafbc0e88d8356686fe7986ae5983ff4a7905be45f419b8d4887b0ed44332f4a537da6cf3e8a3909ce6c687dd0745c70485ff00a7231f9ee0062
@@ -261,7 +261,7 @@ module Entity
261
261
 
262
262
  define_method method_name do |*args|
263
263
  id = self.id
264
- persist_name = orig_method_name.to_s << ":" << (Array === id ? Misc.obj2digest(id) : id)
264
+ persist_name = orig_method_name.to_s + ":" << (Array === id ? Misc.obj2digest(id) : id)
265
265
 
266
266
  persist_options = options
267
267
  persist_options = persist_options.merge(:other => {:args => args}) if args and args.any?
@@ -28,7 +28,7 @@ class FixWidthTable
28
28
  else
29
29
  Log.debug "FixWidthTable up-to-date: #{ filename } - (in_memory:#{in_memory})"
30
30
  if in_memory
31
- @file = Open.open(@filename, :mode => 'r:ASCII-ASCII'){|f| StringIO.new f.read}
31
+ @file = Open.open(@filename, :mode => 'r:ASCII-8BIT'){|f| StringIO.new f.read}
32
32
  else
33
33
  @file = File.open(@filename, 'r:ASCII-8BIT')
34
34
  end
@@ -365,12 +365,12 @@ module Persist
365
365
  type ||= :marshal
366
366
 
367
367
  persist_options ||= {}
368
- if type == :memory and persist_options[:file] and persist_options[:persist]
368
+ if type == :memory && persist_options[:file] && persist_options[:persist]
369
369
  repo = persist_options[:repo] || Persist::MEMORY
370
370
  if persist_options[:persist] == :update || persist_options[:update]
371
371
  repo.delete persist_options[:file]
372
372
  end
373
- return repo[persist_options[:file]] ||= yield
373
+ return repo[persist_options[:file]] ||= yield
374
374
  end
375
375
 
376
376
  if FalseClass === persist_options[:persist]
@@ -32,7 +32,8 @@ module TSV
32
32
  done = false
33
33
  Open.write(output) do |os|
34
34
  options.delete :sep if options[:sep] == "\t"
35
- os.puts TSV.header_lines(key_field, fields, options)
35
+ header_lines = TSV.header_lines(key_field, fields, options)
36
+ os.puts header_lines unless header_lines.empty?
36
37
 
37
38
  while line
38
39
  key, *parts = line.sub("\n",'').split(sep, -1)
@@ -212,6 +213,35 @@ module TSV
212
213
  other_filename = other.respond_to?(:filename) ? other.filename : other.inspect
213
214
  Log.low("Attaching fields:#{Misc.fingerprint fields } from #{other_filename}.")
214
215
 
216
+ same_key = true
217
+ begin
218
+ case
219
+ when (Misc.match_fields(key_field, other.key_field) and same_key)
220
+ Log.debug "Attachment with same key: #{other.key_field}"
221
+ attach_same_key other, fields
222
+ when (not in_namespace and self.fields.select{|f| Misc.match_fields(f, other.key_field)}.any?)
223
+ Log.debug "Found other key field: #{other.key_field}"
224
+ attach_source_key other, other.key_field, :fields => fields, :one2one => one2one
225
+ when (in_namespace and self.fields_in_namespace.select{|f| Misc.match_fields(f, other.key_field)}.any?)
226
+ Log.debug "Found other key field in #{in_namespace}: #{other.key_field}"
227
+ attach_source_key other, other.key_field, :fields => fields, :one2one => one2one
228
+ else
229
+ index = TSV.find_traversal(self, other, options)
230
+ raise FieldNotFoundError, "Cannot traverse identifiers" if index.nil?
231
+ Log.debug "Attachment with index: #{other.key_field}"
232
+ attach_index other, index, fields
233
+ end
234
+ rescue Exception
235
+ if same_key
236
+ Log.warn "Could not translate identifiers with same_key"
237
+ same_key = false
238
+ retry
239
+ else
240
+ raise $!
241
+ end
242
+ end
243
+ Log.debug("Attachment of fields:#{Misc.fingerprint fields } from #{other.filename.inspect} finished.")
244
+
215
245
  if complete
216
246
  fill = TrueClass === complete ? nil : complete
217
247
  field_length = self.fields.length
@@ -219,6 +249,9 @@ module TSV
219
249
  other_common_pos = common_fields.collect{|f| other.fields.index f}
220
250
  this_common_pos = common_fields.collect{|f| self.fields.index f}
221
251
  missing = other.keys - self.keys
252
+
253
+ other = other.to_list if other.type == :single
254
+
222
255
  case type
223
256
  when :single
224
257
  missing.each do |k|
@@ -249,35 +282,6 @@ module TSV
249
282
  end
250
283
  end
251
284
 
252
- same_key = true
253
- begin
254
- case
255
- when (Misc.match_fields(key_field, other.key_field) and same_key)
256
- Log.debug "Attachment with same key: #{other.key_field}"
257
- attach_same_key other, fields
258
- when (not in_namespace and self.fields.select{|f| Misc.match_fields(f, other.key_field)}.any?)
259
- Log.debug "Found other key field: #{other.key_field}"
260
- attach_source_key other, other.key_field, :fields => fields, :one2one => one2one
261
- when (in_namespace and self.fields_in_namespace.select{|f| Misc.match_fields(f, other.key_field)}.any?)
262
- Log.debug "Found other key field in #{in_namespace}: #{other.key_field}"
263
- attach_source_key other, other.key_field, :fields => fields, :one2one => one2one
264
- else
265
- index = TSV.find_traversal(self, other, options)
266
- raise FieldNotFoundError, "Cannot traverse identifiers" if index.nil?
267
- Log.debug "Attachment with index: #{other.key_field}"
268
- attach_index other, index, fields
269
- end
270
- rescue Exception
271
- if same_key
272
- Log.warn "Could not translate identifiers with same_key"
273
- same_key = false
274
- retry
275
- else
276
- raise $!
277
- end
278
- end
279
- Log.debug("Attachment of fields:#{Misc.fingerprint fields } from #{other.filename.inspect} finished.")
280
-
281
285
  self
282
286
  end
283
287
 
@@ -4,9 +4,20 @@ module TSV
4
4
  fields = other.fields - [key_field].concat(self.fields) if fields.nil?
5
5
 
6
6
  fields = [fields].compact unless Array === fields
7
+
8
+ common_fields = self.fields & fields
9
+
10
+ fields = fields - common_fields
11
+
7
12
  num_fields = fields.length
8
13
 
9
14
  field_positions = fields.collect{|field| other.identify_field field}
15
+
16
+ if common_fields.any?
17
+ common_field_positions = common_fields.collect{|field| self.identify_field field}
18
+ common_field_positions_other = common_fields.collect{|field| other.identify_field field}
19
+ end
20
+
10
21
  other.with_unnamed do
11
22
  with_unnamed do
12
23
  through do |key, values|
@@ -28,6 +39,12 @@ module TSV
28
39
  new_values = field_positions.collect do |pos|
29
40
  pos == :key ? key : other_values[pos]
30
41
  end
42
+
43
+ if common_fields.any?
44
+ common_field_positions.zip(common_field_positions_other).each do |p1,p2|
45
+ current[p1] += other_values[p2]
46
+ end
47
+ end
31
48
  end
32
49
 
33
50
  new_values.collect!{|v| [v]} if type == :double and not (other.type == :double or other.type == :flat)
@@ -89,6 +89,7 @@ module TSV
89
89
  stream
90
90
  end
91
91
 
92
+ all_fields = fields
92
93
  key_field = key_fields.compact.first
93
94
  if same_fields
94
95
  fields = fields.first
@@ -292,14 +292,16 @@ def self.add_libdir(dir=nil)
292
292
  end
293
293
  end
294
294
 
295
- def self.bootstrap(elems, num = :current, options = {}, &block)
295
+ def self.bootstrap(elems, num = nil, options = {}, &block)
296
296
  IndiferentHash.setup options
297
+
298
+ num = Rbbt::Config.get :cpus, :default_bootstrap_cpus, :bootstrap_cpus if num == :current || num == nil
297
299
  num = :current if num.nil?
298
300
  cpus = case num
299
301
  when :current
300
- n = 10
301
- n = elems.length / 2 if n > elems.length/2
302
- n
302
+ n = Etc.nprocessors
303
+ n = elems.length / 2 if n > elems.length/2
304
+ n
303
305
  when String
304
306
  num.to_i
305
307
  when Integer
@@ -163,7 +163,7 @@ module Misc
163
163
 
164
164
  def self.fixutf8(string)
165
165
  return nil if string.nil?
166
- return string if string.respond_to?(:encoding) && string.encoding == "UTF-8" && (string.respond_to?(:valid_encoding?) && string.valid_encoding?) ||
166
+ return string if string.respond_to?(:encoding) && string.encoding.to_s == "UTF-8" && (string.respond_to?(:valid_encoding?) && string.valid_encoding?) ||
167
167
  (string.respond_to?(:valid_encoding) && string.valid_encoding)
168
168
 
169
169
  if string.respond_to?(:encode)
@@ -18,6 +18,8 @@ module Misc
18
18
 
19
19
  BLOCK_SIZE=1024 * 8
20
20
 
21
+ SKIP_TAG="[SKIP TAG]"
22
+
21
23
  PIPE_MUTEX = Mutex.new
22
24
 
23
25
  OPEN_PIPE_IN = []
@@ -29,7 +31,7 @@ module Misc
29
31
 
30
32
  [sout, sin]
31
33
  end
32
- Log.debug{"Creating pipe #{[res.last.inspect,res.first.inspect] * " => "}"}
34
+ Log.debug{"Creating pipe #{[res.last.inspect, res.first.inspect] * " => "}"}
33
35
  res
34
36
  end
35
37
 
@@ -255,6 +257,11 @@ module Misc
255
257
  end
256
258
  tee1, *rest = Misc.tee_stream stream_dup, num + 1
257
259
  stream.reopen(tee1)
260
+
261
+ #ToDo: I can't explain why the @threads variable appears with the value of
262
+ # @filename
263
+ stream.instance_variable_set(:@threads, nil) if stream.instance_variables.include?(:@threads)
264
+
258
265
  tee1.annotate(stream)
259
266
  rest
260
267
  end
@@ -537,18 +544,29 @@ module Misc
537
544
  end
538
545
  end
539
546
 
547
+ def self.buffer_stream(stream)
548
+ sout, sin = Misc.pipe
549
+ Misc.consume_stream(stream, true, sin)
550
+ sout
551
+ end
552
+
540
553
  def self._paste_streams(streams, output, lines = nil, sep = "\t", header = nil, &block)
541
554
  output.puts header if header
542
555
  streams = streams.collect do |stream|
543
556
  if defined? Step and Step === stream
544
- stream.get_stream || stream.join.path.open
557
+ io = stream.get_stream
558
+ if io
559
+ buffer_stream(io)
560
+ else
561
+ stream.join.path.open
562
+ end
545
563
  else
546
564
  stream
547
565
  end
548
566
  end
549
567
 
550
568
  begin
551
- done_streams = []
569
+
552
570
  lines ||= streams.collect{|s| s.gets }
553
571
  keys = []
554
572
  parts = []
@@ -564,6 +582,7 @@ module Misc
564
582
  end
565
583
  sizes = parts.collect{|p| p.nil? ? 0 : p.length }
566
584
  last_min = nil
585
+
567
586
  while lines.compact.any?
568
587
  if block_given?
569
588
  min = keys.compact.sort(&block).first
@@ -571,14 +590,23 @@ module Misc
571
590
  min = keys.compact.sort.first
572
591
  end
573
592
  str = []
593
+ threads = []
574
594
  keys.each_with_index do |key,i|
575
595
  case key
576
596
  when min
577
- str << [parts[i] * sep]
597
+ if parts[i] == [SKIP_TAG]
598
+ str << [sep * (sizes[i]-1)] if sizes[i] > 0
599
+ else
600
+ str << [parts[i] * sep]
601
+ end
602
+
578
603
  line = lines[i] = streams[i].gets
579
- if line.nil?
604
+
605
+ if line.nil?
580
606
  keys[i] = nil
581
607
  parts[i] = nil
608
+ streams[i].close unless streams[i].closed?
609
+ streams[i].join if streams[i].respond_to?(:join)
582
610
  else
583
611
  k, *p = line.chomp.split(sep, -1)
584
612
  keys[i] = k
@@ -589,10 +617,12 @@ module Misc
589
617
  end
590
618
  end
591
619
 
592
- output.puts [min, str*sep] * sep
620
+ output.puts [min, str.flatten*sep] * sep
593
621
  end
622
+
594
623
  streams.each do |stream|
595
- stream.join if stream.respond_to? :join
624
+ stream.close unless stream.closed?
625
+ stream.join if stream.respond_to?(:join)
596
626
  end
597
627
  rescue
598
628
  Log.exception $!
@@ -17,14 +17,14 @@ module TmpFile
17
17
 
18
18
  # Creates a random file name, with the given suffix and a random number
19
19
  # up to +max+
20
- def self.random_name(s = "tmp-", max = 10000000)
20
+ def self.random_name(s = "tmp-", max = 1_000_000_000)
21
21
  n = rand(max)
22
22
  s + n.to_s
23
23
  end
24
24
 
25
25
  # Creates a random filename in the temporary directory
26
- def self.tmp_file(s = "tmp-", max=10000000, dir = TMPDIR)
27
- File.expand_path(File.join(dir, random_name(s,max)))
26
+ def self.tmp_file(s = "tmp-", max=1_000_000_000, dir = TMPDIR)
27
+ File.expand_path(File.join(dir, random_name(s, max)))
28
28
  end
29
29
 
30
30
  def self.with_file(content = nil, erase = true, options = {})
@@ -33,7 +33,7 @@ module TmpFile
33
33
 
34
34
  prefix = options[:prefix] || "tmp-"
35
35
  tmpdir = options[:tmpdir] || TMPDIR
36
- max = options[:max] || 10000000
36
+ max = options[:max] || 1_000_000_000
37
37
  tmpfile = tmp_file prefix, max, tmpdir
38
38
  if options[:extension]
39
39
  tmpfile += ".#{options[:extension]}"
@@ -10,9 +10,6 @@ require 'rbbt/workflow/util/provenance'
10
10
 
11
11
  module Workflow
12
12
 
13
- STEP_CACHE = {}
14
- LOAD_STEP_CACHE = {}
15
-
16
13
  class TaskNotFoundException < Exception
17
14
  def initialize(workflow, task = nil)
18
15
  if task
@@ -264,11 +261,11 @@ module Workflow
264
261
  end
265
262
 
266
263
  def step_cache
267
- @step_cache ||= Workflow::STEP_CACHE
264
+ Thread.current[:step_cache] ||= {}
268
265
  end
269
266
 
270
267
  def self.load_step_cache
271
- @load_step_cache ||= Workflow::LOAD_STEP_CACHE
268
+ Thread.current[:load_step_cache] ||= {}
272
269
  end
273
270
 
274
271
 
@@ -418,7 +418,7 @@ module Workflow
418
418
  when :hash
419
419
  clean_inputs = Annotated.purge(inputs)
420
420
  clean_inputs = clean_inputs.collect{|i| Symbol === i ? i.to_s : i }
421
- deps_str = dependencies.collect{|d| Step === d ? d.short_path : d }
421
+ deps_str = dependencies.collect{|d| (Step === d || (defined?(RemoteStep) && RemoteStep === Step)) ? "Step: " << d.short_path : d }
422
422
  key_obj = {:inputs => clean_inputs, :dependencies => deps_str }
423
423
  key_str = Misc.obj2str(key_obj)
424
424
  hash_str = Misc.digest(key_str)
@@ -137,14 +137,16 @@ class RemoteWorkflow
137
137
 
138
138
  post_thread = Thread.new(Thread.current) do |parent|
139
139
  bl = lambda do |rok|
140
- if Net::HTTPOK === rok
140
+ case rok
141
+ when Net::HTTPOK
141
142
  _url = rok["RBBT-STREAMING-JOB-URL"]
142
143
  @url = File.join(task_url, File.basename(_url)) if _url
143
144
  rok.read_body do |c,_a, _b|
144
145
  sin.write c
145
146
  end
146
147
  sin.close
147
- elsif Net::HTTPSeeOther === rok
148
+ when Net::HTTPRedirection, Net::HTTPAccepted
149
+ Thread.current.report_on_exception = false
148
150
  raise TryThis.new(rok)
149
151
  else
150
152
  err = StringIO.new
@@ -178,7 +180,8 @@ class RemoteWorkflow
178
180
  begin
179
181
  RestClient::Request.execute(:method => :post, :url => task_url, :payload => task_params, :block_response => bl)
180
182
  rescue TryThis
181
- RestClient::Request.execute(:method => :get, :url => $!.payload.header[:location], :block_response => bl)
183
+ url = $!.payload["location"]
184
+ RestClient::Request.execute(:method => :get, :url => url, :block_response => bl)
182
185
  end
183
186
  end
184
187
 
@@ -26,8 +26,8 @@ class RemoteStep < Step
26
26
 
27
27
  def cache_file
28
28
  begin
29
- digest = Misc.obj2digest([base_url, task, base_name, inputs])
30
- Rbbt.var.cache.REST[[task, clean_name, digest].compact * "."].find
29
+ digest = Misc.obj2digest([base_url, task.to_s, base_name, inputs])
30
+ Rbbt.var.cache.REST[task.to_s][[clean_name, digest].compact * "."].find
31
31
  rescue
32
32
  Log.exception $!
33
33
  raise $!
@@ -143,7 +143,11 @@ class RemoteStep < Step
143
143
  @info = Persist.memory("RemoteSteps Info", :url => @url, :persist => true, :update => update) do
144
144
  @last_info_time = Time.now
145
145
  init_job unless @url
146
- info = @adaptor.get_json(File.join(@url, 'info'))
146
+ info = begin
147
+ @adaptor.get_json(File.join(@url, 'info'))
148
+ rescue
149
+ {:status => :noinfo}
150
+ end
147
151
  info = RemoteWorkflow.fix_hash(info)
148
152
  info[:status] = info[:status].to_sym if String === info[:status]
149
153
  info
@@ -243,6 +247,7 @@ class RemoteStep < Step
243
247
  def join
244
248
  return true if cache_files.any?
245
249
  init_job unless @url
250
+ produce unless @started
246
251
  Log.debug{ "Joining RemoteStep: #{path}" }
247
252
 
248
253
  if IO === @result
@@ -258,7 +263,6 @@ class RemoteStep < Step
258
263
  sleep 1 unless self.done? || self.aborted? || self.error?
259
264
  while not (self.done? || self.aborted? || self.error?)
260
265
  sleep 3
261
- iif [self.done?, self.status, self.info]
262
266
  end
263
267
  end
264
268
 
@@ -311,6 +315,10 @@ class RemoteStep < Step
311
315
  end
312
316
  end
313
317
 
318
+ def short_path
319
+ init_job unless @url
320
+ [@task, @name] * "/"
321
+ end
314
322
 
315
323
  def input_checks
316
324
  []
@@ -366,7 +366,7 @@ class Step
366
366
  (step.dependencies + step.input_dependencies).each do |step_dep|
367
367
  next if step_dep.done? or step_dep.running? or (ComputeDependency === step_dep and (step_dep.compute == :nodup or step_dep.compute == :ignore))
368
368
  dep_step[step_dep.path] ||= []
369
- dep_step[step_dep.path] << step_dep
369
+ dep_step[step_dep.path] << step
370
370
  end
371
371
  end
372
372
 
@@ -88,7 +88,7 @@ module Workflow
88
88
  key = [workflow, task]
89
89
 
90
90
  dep_tree[key] = workflow.dep_tree(task)
91
- end if self.task_dependencies[name.to_sym]
91
+ end if name && self.task_dependencies[name.to_sym]
92
92
  dep_tree
93
93
  end
94
94
  end
@@ -84,6 +84,10 @@ Misc.in_dir(app_dir) do
84
84
  end
85
85
  end
86
86
 
87
+ fixed_options = {}
88
+ options.each do |k,v| fixed_options[k.to_sym] = v end
89
+ options = fixed_options
90
+
87
91
  case server
88
92
  when 'passenger'
89
93
  system ENV, "env RBBT_LOG=0 passenger start -R '#{config_ru_file}' -p #{options[:Port] || "2887"}"
@@ -97,7 +97,7 @@ TSV.traverse jobs do |file,i|
97
97
  else
98
98
  info = begin
99
99
  Open.open(i[:info_file]) do |f|
100
- Step::INFO_SERIALIAZER.load(f)
100
+ Step::INFO_SERIALIZER.load(f)
101
101
  end
102
102
  rescue
103
103
  {:status => :noinfo}
@@ -134,7 +134,7 @@ TSV.traverse jobs do |file,i|
134
134
  end
135
135
  end
136
136
 
137
- if (force and status !~ /done/) or
137
+ if (force && status !~ /done/) or
138
138
  status =~ /\b(old|dirty|nopid|error|missing|aborted|dead|sync)$/ or
139
139
  (status == "noinfo" and not done) or
140
140
  status == ""
@@ -0,0 +1,29 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rbbt-util'
4
+ require 'rbbt/util/simpleopt'
5
+
6
+ options = SOPT.setup <<EOF
7
+ Read a TSV file and dump it
8
+
9
+ $ rbbt tsv read [options] <filename.tsv|->
10
+
11
+ Use - to read from STDIN
12
+
13
+ -k--key_field* Key field
14
+ -f--fields* Fields
15
+ -t--type* Type
16
+ -m--merge* Merge from multiple rows
17
+ -h--help Print this help
18
+
19
+ EOF
20
+ rbbt_usage and exit 0 if options[:help]
21
+
22
+ file = ARGV.shift
23
+
24
+ file = STDIN if file == '-'
25
+
26
+ tsv = TSV.open file, :merge => true, :type => :double
27
+
28
+
29
+ puts tsv.to_unmerged_expanded_s
@@ -153,7 +153,7 @@ if recursive
153
153
  dep = deps.shift
154
154
  inputs = {} if inputs.nil?
155
155
  inputs = inputs.merge(dep.info[:inputs] || {})
156
- deps = deps.concat dep.dependencies
156
+ deps = deps.concat dep.dependencies if dep.dependencies
157
157
  end
158
158
 
159
159
  inputs = inputs.merge step.archived_inputs
@@ -123,15 +123,11 @@ TmpFile.with_file do |app_dir|
123
123
  else
124
124
  options[:config] = config_ru_file
125
125
 
126
- clean_options = {}
127
- options.each do |k,v|
128
- begin
129
- clean_options[k.to_sym] = v
130
- rescue
131
- end
132
- end
126
+ fixed_options = {}
127
+ options.each do |k,v| fixed_options[k.to_sym] = v end
128
+ options = fixed_options
133
129
 
134
- Rack::Server.start(clean_options)
130
+ Rack::Server.start(options)
135
131
  end
136
132
  end
137
133
  end
@@ -337,6 +337,8 @@ if clean_task
337
337
  dep.clean
338
338
  dep.set_info :status, :cleaned
339
339
  end
340
+
341
+ job.clean if job.task_name.to_s == clean_task.to_s
340
342
  end
341
343
  end
342
344
 
@@ -213,8 +213,9 @@ row2 E
213
213
 
214
214
  tsv1 = tsv2 = nil
215
215
 
216
- tsv1 = Rbbt.tmp.test.test1.data.tsv :double, :sep => /\s+/
217
- tsv2 = Rbbt.tmp.test.test2.data.tsv :double, :sep => /\s+/
216
+ tsv1 = Rbbt.tmp.test.test1.data.produce(true).tsv :double, :sep => /\s+/
217
+ tsv2 = Rbbt.tmp.test.test2.data.produce(true).tsv :double, :sep => /\s+/
218
+ Log.tsv tsv2
218
219
 
219
220
  tsv2.identifiers = Rbbt.tmp.test.test2.identifiers.produce.find #.to_s
220
221
 
@@ -4,6 +4,7 @@ require 'rbbt/tsv/stream'
4
4
  require 'rbbt'
5
5
 
6
6
  class TestStream < Test::Unit::TestCase
7
+
7
8
  def test_collapse_stream
8
9
  text=<<-EOF
9
10
  #: :sep=" "
@@ -65,7 +66,7 @@ row3 AAA BBB CCC
65
66
 
66
67
  text2=<<-EOF
67
68
  #: :sep=" "
68
- #Row Labela Labelb
69
+ #Row Labela Labelb
69
70
  row1 a b
70
71
  row3 aaa bbb
71
72
  row2 aa bb
@@ -100,7 +101,7 @@ row1 A B C
100
101
 
101
102
  text2=<<-EOF
102
103
  #: :sep=" "
103
- #Row Labela Labelb
104
+ #Row Labela Labelb
104
105
  row2 aa bb
105
106
  EOF
106
107
 
@@ -132,7 +133,7 @@ row1 A B C
132
133
 
133
134
  text2=<<-EOF
134
135
  #: :sep=" "
135
- #Row Labela Labelb
136
+ #Row Labela Labelb
136
137
  row2 aa bb
137
138
  EOF
138
139
 
@@ -0,0 +1,10 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
2
+ require 'rbbt/util/misc/format'
3
+
4
+ class TestClass < Test::Unit::TestCase
5
+ def test_fixutf8
6
+ a = "Camión"
7
+ assert_equal a, Misc.fixutf8(a)
8
+ end
9
+ end
10
+
@@ -285,7 +285,7 @@ line4
285
285
  begin
286
286
  Misc.consume_stream(sio, false, STDOUT)
287
287
  rescue
288
- Log.exception $!
288
+ raise $!
289
289
  end
290
290
  end
291
291
  end
@@ -4,6 +4,27 @@ require 'rbbt/workflow'
4
4
  module DepWorkflow
5
5
  extend Workflow
6
6
 
7
+ input :input_file, :file, "Input file", nil, :stream => true
8
+ task :s1 => :array do |input_file|
9
+ TSV.traverse input_file, :type => :array, :into => :stream, :bar => "Task1" do |line|
10
+ line + "\t" << "Task1"
11
+ end
12
+ end
13
+
14
+ dep :s1
15
+ task :s2 => :array do |input_file|
16
+ TSV.traverse step(:s1), :type => :array, :into => :stream, :bar => "Task2" do |line|
17
+ next [line.split("\t").first, Misc::SKIP_TAG] * "\t" if rand < 0.9
18
+ line + "\t" << "Task2"
19
+ end
20
+ end
21
+
22
+ dep :s1
23
+ dep :s2
24
+ task :s3 => :array do |input_file|
25
+ Misc.paste_streams(dependencies.reverse)
26
+ end
27
+
7
28
  input :input_file, :file, "Input file", nil, :stream => true
8
29
  task :task1 => :array do |input_file|
9
30
  TSV.traverse input_file, :type => :array, :into => :stream, :bar => "Task1" do |line|
@@ -28,9 +49,7 @@ module DepWorkflow
28
49
  dep :task2
29
50
  dep :task3
30
51
  task :task4 => :array do
31
- s1 = TSV.get_stream step(:task2)
32
- s2 = TSV.get_stream step(:task3)
33
- Misc.paste_streams([s1, s2])
52
+ Misc.paste_streams(dependencies)
34
53
  end
35
54
 
36
55
  dep :task4
@@ -40,12 +59,10 @@ module DepWorkflow
40
59
  end
41
60
  end
42
61
 
43
- dep :task5
44
62
  dep :task2
63
+ dep :task5
45
64
  task :task6 => :array do
46
- s1 = TSV.get_stream step(:task2)
47
- s2 = TSV.get_stream step(:task5)
48
- Misc.paste_streams([s1, s2])
65
+ Misc.paste_streams(dependencies)
49
66
  end
50
67
 
51
68
  input :stream_file, :file, "Streamed file", nil, :stream => true
@@ -134,8 +151,25 @@ class TestWorkflowDependency < Test::Unit::TestCase
134
151
  end
135
152
  end
136
153
 
154
+ def test_task3
155
+ size = 100000
156
+ content = (0..size).to_a.collect{|num| "Line #{num}" } * "\n"
157
+ TmpFile.with_file(content) do |input_file|
158
+ job = DepWorkflow.job(:task3, "TEST", :input_file => input_file)
159
+ io = TSV.get_stream job.run(:stream)
160
+ last_line = nil
161
+ while line = io.gets
162
+ last_line = line.strip
163
+ end
164
+ io.join
165
+
166
+ assert_equal "Line #{size}\tTask1\tTask3", last_line
167
+ end
168
+ end
169
+
137
170
  def test_task4
138
- size = 1000
171
+ size = 100000
172
+ Log.severity = 0
139
173
  content = (0..size).to_a.collect{|num| "Line #{num}" } * "\n"
140
174
  last_line = nil
141
175
  TmpFile.with_file(content) do |input_file|
@@ -151,7 +185,7 @@ class TestWorkflowDependency < Test::Unit::TestCase
151
185
  end
152
186
 
153
187
  def test_task5
154
- size = 1000
188
+ size = 10000
155
189
  content = (0..size).to_a.collect{|num| "Line #{num}" } * "\n"
156
190
  last_line = nil
157
191
  TmpFile.with_file(content) do |input_file|
@@ -165,9 +199,32 @@ class TestWorkflowDependency < Test::Unit::TestCase
165
199
  assert_equal "Line #{size}\tTask1\tTask2\tTask1\tTask3\tTask5", last_line
166
200
  end
167
201
 
202
+ def test_s3
203
+ size = 100000
204
+ content = (1..size).to_a.collect{|num| "Line #{num}" } * "\n"
205
+ last_line = nil
206
+ Log.severity = 0
207
+ TmpFile.with_file(content) do |input_file|
208
+ begin
209
+ job = DepWorkflow.job(:s3, "TEST", :input_file => input_file)
210
+ job.recursive_clean
211
+ job.run(:stream)
212
+ io = TSV.get_stream job
213
+ while line = io.gets
214
+ last_line = line.strip
215
+ end
216
+ io.join if io.respond_to? :join
217
+ rescue Exception
218
+ job.abort
219
+ raise $!
220
+ end
221
+ end
222
+ assert last_line.include? "Line #{size}"
223
+ end
224
+
168
225
  def test_task6
169
226
  size = 100000
170
- content = (0..size).to_a.collect{|num| "Line #{num}" } * "\n"
227
+ content = (1..size).to_a.collect{|num| "Line #{num}" } * "\n"
171
228
  last_line = nil
172
229
  Log.severity = 0
173
230
  TmpFile.with_file(content) do |input_file|
@@ -189,7 +246,7 @@ class TestWorkflowDependency < Test::Unit::TestCase
189
246
  end
190
247
 
191
248
  def test_task8
192
- size = 100000
249
+ size = 10000
193
250
  content = (0..size).to_a.collect{|num| "Line #{num}" } * "\n"
194
251
  last_line = nil
195
252
  Log.severity = 0
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-util
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.27.6
4
+ version: 5.27.11
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-06-03 00:00:00.000000000 Z
11
+ date: 2020-07-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -403,6 +403,7 @@ files:
403
403
  - share/rbbt_commands/tsv/sort
404
404
  - share/rbbt_commands/tsv/subset
405
405
  - share/rbbt_commands/tsv/transpose
406
+ - share/rbbt_commands/tsv/uncollapse
406
407
  - share/rbbt_commands/tsv/unzip
407
408
  - share/rbbt_commands/tsv/values
408
409
  - share/rbbt_commands/tsv/write_excel
@@ -487,6 +488,7 @@ files:
487
488
  - test/rbbt/util/concurrency/test_threads.rb
488
489
  - test/rbbt/util/log/test_progress.rb
489
490
  - test/rbbt/util/misc/test_bgzf.rb
491
+ - test/rbbt/util/misc/test_format.rb
490
492
  - test/rbbt/util/misc/test_lock.rb
491
493
  - test/rbbt/util/misc/test_multipart_payload.rb
492
494
  - test/rbbt/util/misc/test_omics.rb
@@ -552,6 +554,7 @@ test_files:
552
554
  - test/rbbt/util/test_colorize.rb
553
555
  - test/rbbt/util/misc/test_omics.rb
554
556
  - test/rbbt/util/misc/test_pipes.rb
557
+ - test/rbbt/util/misc/test_format.rb
555
558
  - test/rbbt/util/misc/test_lock.rb
556
559
  - test/rbbt/util/misc/test_multipart_payload.rb
557
560
  - test/rbbt/util/misc/test_bgzf.rb