rbbt-util 5.27.6 → 5.27.11

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fe7c3dec4d2333e77ea396a2d6e572512bf7033999ff4715f1a1c6c9d3d1b9d5
4
- data.tar.gz: 32a3d905c1a95f9c7e5cb359855f8781509cfeecdd8db7b77ee29eaea528d030
3
+ metadata.gz: be6c76069ffd5a6a49876a42c95f42e7c63975e287446d786c7e4db154456790
4
+ data.tar.gz: c6bdbb33d38b424dc6355e6dc70a15a4bfefc65b7331d2eb471476b8522ac3a3
5
5
  SHA512:
6
- metadata.gz: d009024642898f775d423e1ddede6aea538e8ed5ff4675eedadd64768e67b0a46f6519038378a221db76b0e196cfc713ca8dc7553788cdb9a51014c14a97de4c
7
- data.tar.gz: 5c0612feca84b00e6d402f7a68ca1cebd6f4b7a785b40899bdaed3385a3f68853824dcc98d9cfd2e641768ef5ca3d52e38e41bdbc1d7a3751ae288e8f8d7ed2d
6
+ metadata.gz: 1bccd1104a3b2e6236073ca6cd267dba65c0b51a16a6af87c0ea09e1361b8eaf7a9b1a6c7d9bb7e35da3ab4a235ebb578ccd9ce64fd1cdf562282d81a78a3edf
7
+ data.tar.gz: edf6d4339a6ddafbc0e88d8356686fe7986ae5983ff4a7905be45f419b8d4887b0ed44332f4a537da6cf3e8a3909ce6c687dd0745c70485ff00a7231f9ee0062
@@ -261,7 +261,7 @@ module Entity
261
261
 
262
262
  define_method method_name do |*args|
263
263
  id = self.id
264
- persist_name = orig_method_name.to_s << ":" << (Array === id ? Misc.obj2digest(id) : id)
264
+ persist_name = orig_method_name.to_s + ":" << (Array === id ? Misc.obj2digest(id) : id)
265
265
 
266
266
  persist_options = options
267
267
  persist_options = persist_options.merge(:other => {:args => args}) if args and args.any?
@@ -28,7 +28,7 @@ class FixWidthTable
28
28
  else
29
29
  Log.debug "FixWidthTable up-to-date: #{ filename } - (in_memory:#{in_memory})"
30
30
  if in_memory
31
- @file = Open.open(@filename, :mode => 'r:ASCII-ASCII'){|f| StringIO.new f.read}
31
+ @file = Open.open(@filename, :mode => 'r:ASCII-8BIT'){|f| StringIO.new f.read}
32
32
  else
33
33
  @file = File.open(@filename, 'r:ASCII-8BIT')
34
34
  end
@@ -365,12 +365,12 @@ module Persist
365
365
  type ||= :marshal
366
366
 
367
367
  persist_options ||= {}
368
- if type == :memory and persist_options[:file] and persist_options[:persist]
368
+ if type == :memory && persist_options[:file] && persist_options[:persist]
369
369
  repo = persist_options[:repo] || Persist::MEMORY
370
370
  if persist_options[:persist] == :update || persist_options[:update]
371
371
  repo.delete persist_options[:file]
372
372
  end
373
- return repo[persist_options[:file]] ||= yield
373
+ return repo[persist_options[:file]] ||= yield
374
374
  end
375
375
 
376
376
  if FalseClass === persist_options[:persist]
@@ -32,7 +32,8 @@ module TSV
32
32
  done = false
33
33
  Open.write(output) do |os|
34
34
  options.delete :sep if options[:sep] == "\t"
35
- os.puts TSV.header_lines(key_field, fields, options)
35
+ header_lines = TSV.header_lines(key_field, fields, options)
36
+ os.puts header_lines unless header_lines.empty?
36
37
 
37
38
  while line
38
39
  key, *parts = line.sub("\n",'').split(sep, -1)
@@ -212,6 +213,35 @@ module TSV
212
213
  other_filename = other.respond_to?(:filename) ? other.filename : other.inspect
213
214
  Log.low("Attaching fields:#{Misc.fingerprint fields } from #{other_filename}.")
214
215
 
216
+ same_key = true
217
+ begin
218
+ case
219
+ when (Misc.match_fields(key_field, other.key_field) and same_key)
220
+ Log.debug "Attachment with same key: #{other.key_field}"
221
+ attach_same_key other, fields
222
+ when (not in_namespace and self.fields.select{|f| Misc.match_fields(f, other.key_field)}.any?)
223
+ Log.debug "Found other key field: #{other.key_field}"
224
+ attach_source_key other, other.key_field, :fields => fields, :one2one => one2one
225
+ when (in_namespace and self.fields_in_namespace.select{|f| Misc.match_fields(f, other.key_field)}.any?)
226
+ Log.debug "Found other key field in #{in_namespace}: #{other.key_field}"
227
+ attach_source_key other, other.key_field, :fields => fields, :one2one => one2one
228
+ else
229
+ index = TSV.find_traversal(self, other, options)
230
+ raise FieldNotFoundError, "Cannot traverse identifiers" if index.nil?
231
+ Log.debug "Attachment with index: #{other.key_field}"
232
+ attach_index other, index, fields
233
+ end
234
+ rescue Exception
235
+ if same_key
236
+ Log.warn "Could not translate identifiers with same_key"
237
+ same_key = false
238
+ retry
239
+ else
240
+ raise $!
241
+ end
242
+ end
243
+ Log.debug("Attachment of fields:#{Misc.fingerprint fields } from #{other.filename.inspect} finished.")
244
+
215
245
  if complete
216
246
  fill = TrueClass === complete ? nil : complete
217
247
  field_length = self.fields.length
@@ -219,6 +249,9 @@ module TSV
219
249
  other_common_pos = common_fields.collect{|f| other.fields.index f}
220
250
  this_common_pos = common_fields.collect{|f| self.fields.index f}
221
251
  missing = other.keys - self.keys
252
+
253
+ other = other.to_list if other.type == :single
254
+
222
255
  case type
223
256
  when :single
224
257
  missing.each do |k|
@@ -249,35 +282,6 @@ module TSV
249
282
  end
250
283
  end
251
284
 
252
- same_key = true
253
- begin
254
- case
255
- when (Misc.match_fields(key_field, other.key_field) and same_key)
256
- Log.debug "Attachment with same key: #{other.key_field}"
257
- attach_same_key other, fields
258
- when (not in_namespace and self.fields.select{|f| Misc.match_fields(f, other.key_field)}.any?)
259
- Log.debug "Found other key field: #{other.key_field}"
260
- attach_source_key other, other.key_field, :fields => fields, :one2one => one2one
261
- when (in_namespace and self.fields_in_namespace.select{|f| Misc.match_fields(f, other.key_field)}.any?)
262
- Log.debug "Found other key field in #{in_namespace}: #{other.key_field}"
263
- attach_source_key other, other.key_field, :fields => fields, :one2one => one2one
264
- else
265
- index = TSV.find_traversal(self, other, options)
266
- raise FieldNotFoundError, "Cannot traverse identifiers" if index.nil?
267
- Log.debug "Attachment with index: #{other.key_field}"
268
- attach_index other, index, fields
269
- end
270
- rescue Exception
271
- if same_key
272
- Log.warn "Could not translate identifiers with same_key"
273
- same_key = false
274
- retry
275
- else
276
- raise $!
277
- end
278
- end
279
- Log.debug("Attachment of fields:#{Misc.fingerprint fields } from #{other.filename.inspect} finished.")
280
-
281
285
  self
282
286
  end
283
287
 
@@ -4,9 +4,20 @@ module TSV
4
4
  fields = other.fields - [key_field].concat(self.fields) if fields.nil?
5
5
 
6
6
  fields = [fields].compact unless Array === fields
7
+
8
+ common_fields = self.fields & fields
9
+
10
+ fields = fields - common_fields
11
+
7
12
  num_fields = fields.length
8
13
 
9
14
  field_positions = fields.collect{|field| other.identify_field field}
15
+
16
+ if common_fields.any?
17
+ common_field_positions = common_fields.collect{|field| self.identify_field field}
18
+ common_field_positions_other = common_fields.collect{|field| other.identify_field field}
19
+ end
20
+
10
21
  other.with_unnamed do
11
22
  with_unnamed do
12
23
  through do |key, values|
@@ -28,6 +39,12 @@ module TSV
28
39
  new_values = field_positions.collect do |pos|
29
40
  pos == :key ? key : other_values[pos]
30
41
  end
42
+
43
+ if common_fields.any?
44
+ common_field_positions.zip(common_field_positions_other).each do |p1,p2|
45
+ current[p1] += other_values[p2]
46
+ end
47
+ end
31
48
  end
32
49
 
33
50
  new_values.collect!{|v| [v]} if type == :double and not (other.type == :double or other.type == :flat)
@@ -89,6 +89,7 @@ module TSV
89
89
  stream
90
90
  end
91
91
 
92
+ all_fields = fields
92
93
  key_field = key_fields.compact.first
93
94
  if same_fields
94
95
  fields = fields.first
@@ -292,14 +292,16 @@ def self.add_libdir(dir=nil)
292
292
  end
293
293
  end
294
294
 
295
- def self.bootstrap(elems, num = :current, options = {}, &block)
295
+ def self.bootstrap(elems, num = nil, options = {}, &block)
296
296
  IndiferentHash.setup options
297
+
298
+ num = Rbbt::Config.get :cpus, :default_bootstrap_cpus, :bootstrap_cpus if num == :current || num == nil
297
299
  num = :current if num.nil?
298
300
  cpus = case num
299
301
  when :current
300
- n = 10
301
- n = elems.length / 2 if n > elems.length/2
302
- n
302
+ n = Etc.nprocessors
303
+ n = elems.length / 2 if n > elems.length/2
304
+ n
303
305
  when String
304
306
  num.to_i
305
307
  when Integer
@@ -163,7 +163,7 @@ module Misc
163
163
 
164
164
  def self.fixutf8(string)
165
165
  return nil if string.nil?
166
- return string if string.respond_to?(:encoding) && string.encoding == "UTF-8" && (string.respond_to?(:valid_encoding?) && string.valid_encoding?) ||
166
+ return string if string.respond_to?(:encoding) && string.encoding.to_s == "UTF-8" && (string.respond_to?(:valid_encoding?) && string.valid_encoding?) ||
167
167
  (string.respond_to?(:valid_encoding) && string.valid_encoding)
168
168
 
169
169
  if string.respond_to?(:encode)
@@ -18,6 +18,8 @@ module Misc
18
18
 
19
19
  BLOCK_SIZE=1024 * 8
20
20
 
21
+ SKIP_TAG="[SKIP TAG]"
22
+
21
23
  PIPE_MUTEX = Mutex.new
22
24
 
23
25
  OPEN_PIPE_IN = []
@@ -29,7 +31,7 @@ module Misc
29
31
 
30
32
  [sout, sin]
31
33
  end
32
- Log.debug{"Creating pipe #{[res.last.inspect,res.first.inspect] * " => "}"}
34
+ Log.debug{"Creating pipe #{[res.last.inspect, res.first.inspect] * " => "}"}
33
35
  res
34
36
  end
35
37
 
@@ -255,6 +257,11 @@ module Misc
255
257
  end
256
258
  tee1, *rest = Misc.tee_stream stream_dup, num + 1
257
259
  stream.reopen(tee1)
260
+
261
+ #ToDo: I can't explain why the @threads variable appears with the value of
262
+ # @filename
263
+ stream.instance_variable_set(:@threads, nil) if stream.instance_variables.include?(:@threads)
264
+
258
265
  tee1.annotate(stream)
259
266
  rest
260
267
  end
@@ -537,18 +544,29 @@ module Misc
537
544
  end
538
545
  end
539
546
 
547
+ def self.buffer_stream(stream)
548
+ sout, sin = Misc.pipe
549
+ Misc.consume_stream(stream, true, sin)
550
+ sout
551
+ end
552
+
540
553
  def self._paste_streams(streams, output, lines = nil, sep = "\t", header = nil, &block)
541
554
  output.puts header if header
542
555
  streams = streams.collect do |stream|
543
556
  if defined? Step and Step === stream
544
- stream.get_stream || stream.join.path.open
557
+ io = stream.get_stream
558
+ if io
559
+ buffer_stream(io)
560
+ else
561
+ stream.join.path.open
562
+ end
545
563
  else
546
564
  stream
547
565
  end
548
566
  end
549
567
 
550
568
  begin
551
- done_streams = []
569
+
552
570
  lines ||= streams.collect{|s| s.gets }
553
571
  keys = []
554
572
  parts = []
@@ -564,6 +582,7 @@ module Misc
564
582
  end
565
583
  sizes = parts.collect{|p| p.nil? ? 0 : p.length }
566
584
  last_min = nil
585
+
567
586
  while lines.compact.any?
568
587
  if block_given?
569
588
  min = keys.compact.sort(&block).first
@@ -571,14 +590,23 @@ module Misc
571
590
  min = keys.compact.sort.first
572
591
  end
573
592
  str = []
593
+ threads = []
574
594
  keys.each_with_index do |key,i|
575
595
  case key
576
596
  when min
577
- str << [parts[i] * sep]
597
+ if parts[i] == [SKIP_TAG]
598
+ str << [sep * (sizes[i]-1)] if sizes[i] > 0
599
+ else
600
+ str << [parts[i] * sep]
601
+ end
602
+
578
603
  line = lines[i] = streams[i].gets
579
- if line.nil?
604
+
605
+ if line.nil?
580
606
  keys[i] = nil
581
607
  parts[i] = nil
608
+ streams[i].close unless streams[i].closed?
609
+ streams[i].join if streams[i].respond_to?(:join)
582
610
  else
583
611
  k, *p = line.chomp.split(sep, -1)
584
612
  keys[i] = k
@@ -589,10 +617,12 @@ module Misc
589
617
  end
590
618
  end
591
619
 
592
- output.puts [min, str*sep] * sep
620
+ output.puts [min, str.flatten*sep] * sep
593
621
  end
622
+
594
623
  streams.each do |stream|
595
- stream.join if stream.respond_to? :join
624
+ stream.close unless stream.closed?
625
+ stream.join if stream.respond_to?(:join)
596
626
  end
597
627
  rescue
598
628
  Log.exception $!
@@ -17,14 +17,14 @@ module TmpFile
17
17
 
18
18
  # Creates a random file name, with the given suffix and a random number
19
19
  # up to +max+
20
- def self.random_name(s = "tmp-", max = 10000000)
20
+ def self.random_name(s = "tmp-", max = 1_000_000_000)
21
21
  n = rand(max)
22
22
  s + n.to_s
23
23
  end
24
24
 
25
25
  # Creates a random filename in the temporary directory
26
- def self.tmp_file(s = "tmp-", max=10000000, dir = TMPDIR)
27
- File.expand_path(File.join(dir, random_name(s,max)))
26
+ def self.tmp_file(s = "tmp-", max=1_000_000_000, dir = TMPDIR)
27
+ File.expand_path(File.join(dir, random_name(s, max)))
28
28
  end
29
29
 
30
30
  def self.with_file(content = nil, erase = true, options = {})
@@ -33,7 +33,7 @@ module TmpFile
33
33
 
34
34
  prefix = options[:prefix] || "tmp-"
35
35
  tmpdir = options[:tmpdir] || TMPDIR
36
- max = options[:max] || 10000000
36
+ max = options[:max] || 1_000_000_000
37
37
  tmpfile = tmp_file prefix, max, tmpdir
38
38
  if options[:extension]
39
39
  tmpfile += ".#{options[:extension]}"
@@ -10,9 +10,6 @@ require 'rbbt/workflow/util/provenance'
10
10
 
11
11
  module Workflow
12
12
 
13
- STEP_CACHE = {}
14
- LOAD_STEP_CACHE = {}
15
-
16
13
  class TaskNotFoundException < Exception
17
14
  def initialize(workflow, task = nil)
18
15
  if task
@@ -264,11 +261,11 @@ module Workflow
264
261
  end
265
262
 
266
263
  def step_cache
267
- @step_cache ||= Workflow::STEP_CACHE
264
+ Thread.current[:step_cache] ||= {}
268
265
  end
269
266
 
270
267
  def self.load_step_cache
271
- @load_step_cache ||= Workflow::LOAD_STEP_CACHE
268
+ Thread.current[:load_step_cache] ||= {}
272
269
  end
273
270
 
274
271
 
@@ -418,7 +418,7 @@ module Workflow
418
418
  when :hash
419
419
  clean_inputs = Annotated.purge(inputs)
420
420
  clean_inputs = clean_inputs.collect{|i| Symbol === i ? i.to_s : i }
421
- deps_str = dependencies.collect{|d| Step === d ? d.short_path : d }
421
+ deps_str = dependencies.collect{|d| (Step === d || (defined?(RemoteStep) && RemoteStep === Step)) ? "Step: " << d.short_path : d }
422
422
  key_obj = {:inputs => clean_inputs, :dependencies => deps_str }
423
423
  key_str = Misc.obj2str(key_obj)
424
424
  hash_str = Misc.digest(key_str)
@@ -137,14 +137,16 @@ class RemoteWorkflow
137
137
 
138
138
  post_thread = Thread.new(Thread.current) do |parent|
139
139
  bl = lambda do |rok|
140
- if Net::HTTPOK === rok
140
+ case rok
141
+ when Net::HTTPOK
141
142
  _url = rok["RBBT-STREAMING-JOB-URL"]
142
143
  @url = File.join(task_url, File.basename(_url)) if _url
143
144
  rok.read_body do |c,_a, _b|
144
145
  sin.write c
145
146
  end
146
147
  sin.close
147
- elsif Net::HTTPSeeOther === rok
148
+ when Net::HTTPRedirection, Net::HTTPAccepted
149
+ Thread.current.report_on_exception = false
148
150
  raise TryThis.new(rok)
149
151
  else
150
152
  err = StringIO.new
@@ -178,7 +180,8 @@ class RemoteWorkflow
178
180
  begin
179
181
  RestClient::Request.execute(:method => :post, :url => task_url, :payload => task_params, :block_response => bl)
180
182
  rescue TryThis
181
- RestClient::Request.execute(:method => :get, :url => $!.payload.header[:location], :block_response => bl)
183
+ url = $!.payload["location"]
184
+ RestClient::Request.execute(:method => :get, :url => url, :block_response => bl)
182
185
  end
183
186
  end
184
187
 
@@ -26,8 +26,8 @@ class RemoteStep < Step
26
26
 
27
27
  def cache_file
28
28
  begin
29
- digest = Misc.obj2digest([base_url, task, base_name, inputs])
30
- Rbbt.var.cache.REST[[task, clean_name, digest].compact * "."].find
29
+ digest = Misc.obj2digest([base_url, task.to_s, base_name, inputs])
30
+ Rbbt.var.cache.REST[task.to_s][[clean_name, digest].compact * "."].find
31
31
  rescue
32
32
  Log.exception $!
33
33
  raise $!
@@ -143,7 +143,11 @@ class RemoteStep < Step
143
143
  @info = Persist.memory("RemoteSteps Info", :url => @url, :persist => true, :update => update) do
144
144
  @last_info_time = Time.now
145
145
  init_job unless @url
146
- info = @adaptor.get_json(File.join(@url, 'info'))
146
+ info = begin
147
+ @adaptor.get_json(File.join(@url, 'info'))
148
+ rescue
149
+ {:status => :noinfo}
150
+ end
147
151
  info = RemoteWorkflow.fix_hash(info)
148
152
  info[:status] = info[:status].to_sym if String === info[:status]
149
153
  info
@@ -243,6 +247,7 @@ class RemoteStep < Step
243
247
  def join
244
248
  return true if cache_files.any?
245
249
  init_job unless @url
250
+ produce unless @started
246
251
  Log.debug{ "Joining RemoteStep: #{path}" }
247
252
 
248
253
  if IO === @result
@@ -258,7 +263,6 @@ class RemoteStep < Step
258
263
  sleep 1 unless self.done? || self.aborted? || self.error?
259
264
  while not (self.done? || self.aborted? || self.error?)
260
265
  sleep 3
261
- iif [self.done?, self.status, self.info]
262
266
  end
263
267
  end
264
268
 
@@ -311,6 +315,10 @@ class RemoteStep < Step
311
315
  end
312
316
  end
313
317
 
318
+ def short_path
319
+ init_job unless @url
320
+ [@task, @name] * "/"
321
+ end
314
322
 
315
323
  def input_checks
316
324
  []
@@ -366,7 +366,7 @@ class Step
366
366
  (step.dependencies + step.input_dependencies).each do |step_dep|
367
367
  next if step_dep.done? or step_dep.running? or (ComputeDependency === step_dep and (step_dep.compute == :nodup or step_dep.compute == :ignore))
368
368
  dep_step[step_dep.path] ||= []
369
- dep_step[step_dep.path] << step_dep
369
+ dep_step[step_dep.path] << step
370
370
  end
371
371
  end
372
372
 
@@ -88,7 +88,7 @@ module Workflow
88
88
  key = [workflow, task]
89
89
 
90
90
  dep_tree[key] = workflow.dep_tree(task)
91
- end if self.task_dependencies[name.to_sym]
91
+ end if name && self.task_dependencies[name.to_sym]
92
92
  dep_tree
93
93
  end
94
94
  end
@@ -84,6 +84,10 @@ Misc.in_dir(app_dir) do
84
84
  end
85
85
  end
86
86
 
87
+ fixed_options = {}
88
+ options.each do |k,v| fixed_options[k.to_sym] = v end
89
+ options = fixed_options
90
+
87
91
  case server
88
92
  when 'passenger'
89
93
  system ENV, "env RBBT_LOG=0 passenger start -R '#{config_ru_file}' -p #{options[:Port] || "2887"}"
@@ -97,7 +97,7 @@ TSV.traverse jobs do |file,i|
97
97
  else
98
98
  info = begin
99
99
  Open.open(i[:info_file]) do |f|
100
- Step::INFO_SERIALIAZER.load(f)
100
+ Step::INFO_SERIALIZER.load(f)
101
101
  end
102
102
  rescue
103
103
  {:status => :noinfo}
@@ -134,7 +134,7 @@ TSV.traverse jobs do |file,i|
134
134
  end
135
135
  end
136
136
 
137
- if (force and status !~ /done/) or
137
+ if (force && status !~ /done/) or
138
138
  status =~ /\b(old|dirty|nopid|error|missing|aborted|dead|sync)$/ or
139
139
  (status == "noinfo" and not done) or
140
140
  status == ""
@@ -0,0 +1,29 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rbbt-util'
4
+ require 'rbbt/util/simpleopt'
5
+
6
+ options = SOPT.setup <<EOF
7
+ Read a TSV file and dump it
8
+
9
+ $ rbbt tsv read [options] <filename.tsv|->
10
+
11
+ Use - to read from STDIN
12
+
13
+ -k--key_field* Key field
14
+ -f--fields* Fields
15
+ -t--type* Type
16
+ -m--merge* Merge from multiple rows
17
+ -h--help Print this help
18
+
19
+ EOF
20
+ rbbt_usage and exit 0 if options[:help]
21
+
22
+ file = ARGV.shift
23
+
24
+ file = STDIN if file == '-'
25
+
26
+ tsv = TSV.open file, :merge => true, :type => :double
27
+
28
+
29
+ puts tsv.to_unmerged_expanded_s
@@ -153,7 +153,7 @@ if recursive
153
153
  dep = deps.shift
154
154
  inputs = {} if inputs.nil?
155
155
  inputs = inputs.merge(dep.info[:inputs] || {})
156
- deps = deps.concat dep.dependencies
156
+ deps = deps.concat dep.dependencies if dep.dependencies
157
157
  end
158
158
 
159
159
  inputs = inputs.merge step.archived_inputs
@@ -123,15 +123,11 @@ TmpFile.with_file do |app_dir|
123
123
  else
124
124
  options[:config] = config_ru_file
125
125
 
126
- clean_options = {}
127
- options.each do |k,v|
128
- begin
129
- clean_options[k.to_sym] = v
130
- rescue
131
- end
132
- end
126
+ fixed_options = {}
127
+ options.each do |k,v| fixed_options[k.to_sym] = v end
128
+ options = fixed_options
133
129
 
134
- Rack::Server.start(clean_options)
130
+ Rack::Server.start(options)
135
131
  end
136
132
  end
137
133
  end
@@ -337,6 +337,8 @@ if clean_task
337
337
  dep.clean
338
338
  dep.set_info :status, :cleaned
339
339
  end
340
+
341
+ job.clean if job.task_name.to_s == clean_task.to_s
340
342
  end
341
343
  end
342
344
 
@@ -213,8 +213,9 @@ row2 E
213
213
 
214
214
  tsv1 = tsv2 = nil
215
215
 
216
- tsv1 = Rbbt.tmp.test.test1.data.tsv :double, :sep => /\s+/
217
- tsv2 = Rbbt.tmp.test.test2.data.tsv :double, :sep => /\s+/
216
+ tsv1 = Rbbt.tmp.test.test1.data.produce(true).tsv :double, :sep => /\s+/
217
+ tsv2 = Rbbt.tmp.test.test2.data.produce(true).tsv :double, :sep => /\s+/
218
+ Log.tsv tsv2
218
219
 
219
220
  tsv2.identifiers = Rbbt.tmp.test.test2.identifiers.produce.find #.to_s
220
221
 
@@ -4,6 +4,7 @@ require 'rbbt/tsv/stream'
4
4
  require 'rbbt'
5
5
 
6
6
  class TestStream < Test::Unit::TestCase
7
+
7
8
  def test_collapse_stream
8
9
  text=<<-EOF
9
10
  #: :sep=" "
@@ -65,7 +66,7 @@ row3 AAA BBB CCC
65
66
 
66
67
  text2=<<-EOF
67
68
  #: :sep=" "
68
- #Row Labela Labelb
69
+ #Row Labela Labelb
69
70
  row1 a b
70
71
  row3 aaa bbb
71
72
  row2 aa bb
@@ -100,7 +101,7 @@ row1 A B C
100
101
 
101
102
  text2=<<-EOF
102
103
  #: :sep=" "
103
- #Row Labela Labelb
104
+ #Row Labela Labelb
104
105
  row2 aa bb
105
106
  EOF
106
107
 
@@ -132,7 +133,7 @@ row1 A B C
132
133
 
133
134
  text2=<<-EOF
134
135
  #: :sep=" "
135
- #Row Labela Labelb
136
+ #Row Labela Labelb
136
137
  row2 aa bb
137
138
  EOF
138
139
 
@@ -0,0 +1,10 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
2
+ require 'rbbt/util/misc/format'
3
+
4
+ class TestClass < Test::Unit::TestCase
5
+ def test_fixutf8
6
+ a = "Camión"
7
+ assert_equal a, Misc.fixutf8(a)
8
+ end
9
+ end
10
+
@@ -285,7 +285,7 @@ line4
285
285
  begin
286
286
  Misc.consume_stream(sio, false, STDOUT)
287
287
  rescue
288
- Log.exception $!
288
+ raise $!
289
289
  end
290
290
  end
291
291
  end
@@ -4,6 +4,27 @@ require 'rbbt/workflow'
4
4
  module DepWorkflow
5
5
  extend Workflow
6
6
 
7
+ input :input_file, :file, "Input file", nil, :stream => true
8
+ task :s1 => :array do |input_file|
9
+ TSV.traverse input_file, :type => :array, :into => :stream, :bar => "Task1" do |line|
10
+ line + "\t" << "Task1"
11
+ end
12
+ end
13
+
14
+ dep :s1
15
+ task :s2 => :array do |input_file|
16
+ TSV.traverse step(:s1), :type => :array, :into => :stream, :bar => "Task2" do |line|
17
+ next [line.split("\t").first, Misc::SKIP_TAG] * "\t" if rand < 0.9
18
+ line + "\t" << "Task2"
19
+ end
20
+ end
21
+
22
+ dep :s1
23
+ dep :s2
24
+ task :s3 => :array do |input_file|
25
+ Misc.paste_streams(dependencies.reverse)
26
+ end
27
+
7
28
  input :input_file, :file, "Input file", nil, :stream => true
8
29
  task :task1 => :array do |input_file|
9
30
  TSV.traverse input_file, :type => :array, :into => :stream, :bar => "Task1" do |line|
@@ -28,9 +49,7 @@ module DepWorkflow
28
49
  dep :task2
29
50
  dep :task3
30
51
  task :task4 => :array do
31
- s1 = TSV.get_stream step(:task2)
32
- s2 = TSV.get_stream step(:task3)
33
- Misc.paste_streams([s1, s2])
52
+ Misc.paste_streams(dependencies)
34
53
  end
35
54
 
36
55
  dep :task4
@@ -40,12 +59,10 @@ module DepWorkflow
40
59
  end
41
60
  end
42
61
 
43
- dep :task5
44
62
  dep :task2
63
+ dep :task5
45
64
  task :task6 => :array do
46
- s1 = TSV.get_stream step(:task2)
47
- s2 = TSV.get_stream step(:task5)
48
- Misc.paste_streams([s1, s2])
65
+ Misc.paste_streams(dependencies)
49
66
  end
50
67
 
51
68
  input :stream_file, :file, "Streamed file", nil, :stream => true
@@ -134,8 +151,25 @@ class TestWorkflowDependency < Test::Unit::TestCase
134
151
  end
135
152
  end
136
153
 
154
+ def test_task3
155
+ size = 100000
156
+ content = (0..size).to_a.collect{|num| "Line #{num}" } * "\n"
157
+ TmpFile.with_file(content) do |input_file|
158
+ job = DepWorkflow.job(:task3, "TEST", :input_file => input_file)
159
+ io = TSV.get_stream job.run(:stream)
160
+ last_line = nil
161
+ while line = io.gets
162
+ last_line = line.strip
163
+ end
164
+ io.join
165
+
166
+ assert_equal "Line #{size}\tTask1\tTask3", last_line
167
+ end
168
+ end
169
+
137
170
  def test_task4
138
- size = 1000
171
+ size = 100000
172
+ Log.severity = 0
139
173
  content = (0..size).to_a.collect{|num| "Line #{num}" } * "\n"
140
174
  last_line = nil
141
175
  TmpFile.with_file(content) do |input_file|
@@ -151,7 +185,7 @@ class TestWorkflowDependency < Test::Unit::TestCase
151
185
  end
152
186
 
153
187
  def test_task5
154
- size = 1000
188
+ size = 10000
155
189
  content = (0..size).to_a.collect{|num| "Line #{num}" } * "\n"
156
190
  last_line = nil
157
191
  TmpFile.with_file(content) do |input_file|
@@ -165,9 +199,32 @@ class TestWorkflowDependency < Test::Unit::TestCase
165
199
  assert_equal "Line #{size}\tTask1\tTask2\tTask1\tTask3\tTask5", last_line
166
200
  end
167
201
 
202
+ def test_s3
203
+ size = 100000
204
+ content = (1..size).to_a.collect{|num| "Line #{num}" } * "\n"
205
+ last_line = nil
206
+ Log.severity = 0
207
+ TmpFile.with_file(content) do |input_file|
208
+ begin
209
+ job = DepWorkflow.job(:s3, "TEST", :input_file => input_file)
210
+ job.recursive_clean
211
+ job.run(:stream)
212
+ io = TSV.get_stream job
213
+ while line = io.gets
214
+ last_line = line.strip
215
+ end
216
+ io.join if io.respond_to? :join
217
+ rescue Exception
218
+ job.abort
219
+ raise $!
220
+ end
221
+ end
222
+ assert last_line.include? "Line #{size}"
223
+ end
224
+
168
225
  def test_task6
169
226
  size = 100000
170
- content = (0..size).to_a.collect{|num| "Line #{num}" } * "\n"
227
+ content = (1..size).to_a.collect{|num| "Line #{num}" } * "\n"
171
228
  last_line = nil
172
229
  Log.severity = 0
173
230
  TmpFile.with_file(content) do |input_file|
@@ -189,7 +246,7 @@ class TestWorkflowDependency < Test::Unit::TestCase
189
246
  end
190
247
 
191
248
  def test_task8
192
- size = 100000
249
+ size = 10000
193
250
  content = (0..size).to_a.collect{|num| "Line #{num}" } * "\n"
194
251
  last_line = nil
195
252
  Log.severity = 0
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-util
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.27.6
4
+ version: 5.27.11
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-06-03 00:00:00.000000000 Z
11
+ date: 2020-07-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -403,6 +403,7 @@ files:
403
403
  - share/rbbt_commands/tsv/sort
404
404
  - share/rbbt_commands/tsv/subset
405
405
  - share/rbbt_commands/tsv/transpose
406
+ - share/rbbt_commands/tsv/uncollapse
406
407
  - share/rbbt_commands/tsv/unzip
407
408
  - share/rbbt_commands/tsv/values
408
409
  - share/rbbt_commands/tsv/write_excel
@@ -487,6 +488,7 @@ files:
487
488
  - test/rbbt/util/concurrency/test_threads.rb
488
489
  - test/rbbt/util/log/test_progress.rb
489
490
  - test/rbbt/util/misc/test_bgzf.rb
491
+ - test/rbbt/util/misc/test_format.rb
490
492
  - test/rbbt/util/misc/test_lock.rb
491
493
  - test/rbbt/util/misc/test_multipart_payload.rb
492
494
  - test/rbbt/util/misc/test_omics.rb
@@ -552,6 +554,7 @@ test_files:
552
554
  - test/rbbt/util/test_colorize.rb
553
555
  - test/rbbt/util/misc/test_omics.rb
554
556
  - test/rbbt/util/misc/test_pipes.rb
557
+ - test/rbbt/util/misc/test_format.rb
555
558
  - test/rbbt/util/misc/test_lock.rb
556
559
  - test/rbbt/util/misc/test_multipart_payload.rb
557
560
  - test/rbbt/util/misc/test_bgzf.rb