rbbt-util 5.9.12 → 5.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 611734c0287b691572f51267904bec6af278eba1
4
- data.tar.gz: c718a679f44bebb62b3f235e71765f9deebbb13d
3
+ metadata.gz: f5de94d0ec4441889212dc37780f880b38809d71
4
+ data.tar.gz: 44acad91d91a0a8f7c351d32cd2dba45ca9fc058
5
5
  SHA512:
6
- metadata.gz: 2bb409b0bee93ff72b801bcdafaaa4e6c8dc569531d7e7d7606cb4c089c2606d99f5c7bc379cccd4c6979695fe781255ec361708c74cda9667d580c8722eee96
7
- data.tar.gz: 59e11ff34787e24df2e4aa6a5cf2590f98adb18594db0f6e18d740cd74a428ac1defb40bb4476ef9366de2abc51ee887c205c14e00a092ca9bffeb09aee01596
6
+ metadata.gz: 1fc109e80a9808fe201f7aa683fd2c0d9a8c0f35e6a1b2f3f9a0ee5f1f62801f192497c51ce09e69e5a721cb301d564227e543c44f2263aaf1faa3da14215c8c
7
+ data.tar.gz: 78d53fa4a1bc35af227a4d15553204a31405e89459e2b51a60cad762eba536b0e73f0719e51058ca8df03f328ab6117986f6109a2e01c531bb12c3f0b8453576
data/lib/rbbt/persist.rb CHANGED
@@ -83,7 +83,7 @@ module Persist
83
83
 
84
84
  TRUE_STRINGS = Set.new ["true", "True", "TRUE", "t", "T", "1", "yes", "Yes", "YES", "y", "Y", "ON", "on"] unless defined? TRUE_STRINGS
85
85
  def self.load_file(path, type)
86
- case (type || "nil").to_sym
86
+ case (type || :marshal).to_sym
87
87
  when :nil
88
88
  nil
89
89
  when :boolean
@@ -125,7 +125,7 @@ module Persist
125
125
 
126
126
  return if content.nil?
127
127
 
128
- case (type || "nil").to_sym
128
+ case (type || :marshal).to_sym
129
129
  when :nil
130
130
  nil
131
131
  when :boolean
@@ -134,7 +134,8 @@ module Persist
134
134
  content.file.seek 0
135
135
  Misc.sensiblewrite(path, content.file.read)
136
136
  when :tsv
137
- Misc.sensiblewrite(path, content.to_s)
137
+ content = content.to_s if TSV === content
138
+ Misc.sensiblewrite(path, content)
138
139
  when :annotations
139
140
  Misc.sensiblewrite(path, Annotated.tsv(content, :all).to_s)
140
141
  when :string, :text
@@ -146,10 +147,21 @@ module Persist
146
147
  f.close
147
148
  content
148
149
  when :array
149
- if content.empty?
150
- Misc.sensiblewrite(path, "")
150
+ case content
151
+ when Array
152
+ if content.empty?
153
+ Misc.sensiblewrite(path, "")
154
+ else
155
+ Misc.sensiblewrite(path, content * "\n" + "\n")
156
+ end
157
+ when IO
158
+ Misc.sensiblewrite(path) do |file|
159
+ while block = content.read(2048)
160
+ file.write block
161
+ end
162
+ end
151
163
  else
152
- Misc.sensiblewrite(path, content * "\n" + "\n")
164
+ Misc.sensiblewrite(path, content.to_s)
153
165
  end
154
166
  when :marshal_tsv
155
167
  Misc.sensiblewrite(path, Marshal.dump(content.dup))
@@ -164,12 +176,67 @@ module Persist
164
176
  end
165
177
  end
166
178
 
179
+ def self.tee_stream(stream, path, type, callback = nil)
180
+ file_out, file_in = IO.pipe
181
+ stream_out, stream_in = IO.pipe
182
+
183
+ saver_thread = Thread.new(Thread.current) do |parent|
184
+ begin
185
+ Misc.lock(path) do
186
+ save_file(path, type, file_out)
187
+ end
188
+ rescue Exception
189
+ Log.exception $!
190
+ parent.raise $!
191
+ end
192
+ end
193
+
194
+ splitter_thread = Thread.new(Thread.current) do |parent|
195
+ begin
196
+ while block = stream.read(2048)
197
+ begin stream_in.write block; rescue Exception; Log.exception $! end
198
+ begin file_in.write block; rescue Exception; Log.exception $! end
199
+ end
200
+ file_in.close
201
+ stream_in.close
202
+ callback.call if callback
203
+ rescue Exception
204
+ Log.exception $!
205
+ parent.raise $!
206
+ end
207
+ end
208
+
209
+ class << stream_out
210
+ attr_accessor :threads
211
+
212
+ def join
213
+ @threads.each{|t| t.join }
214
+ @threads = []
215
+ end
216
+
217
+ def close
218
+ join
219
+ super
220
+ end
221
+
222
+ def read(*args)
223
+ res = super(*args)
224
+ join if eof?
225
+ res
226
+ end
227
+ end
228
+
229
+ stream_out.threads = [splitter_thread, saver_thread]
230
+
231
+ stream_out
232
+ end
233
+
167
234
  def self.persist(name, type = nil, persist_options = {})
168
235
  type ||= :marshal
169
236
  persist_options = Misc.add_defaults persist_options, :persist => true
170
- other_options = Misc.process_options persist_options, :other
171
237
 
172
238
  if persist_options[:persist]
239
+ other_options = Misc.process_options persist_options, :other
173
240
  path = persistence_path(name, persist_options, other_options || {})
174
241
 
175
242
  case
@@ -276,8 +343,13 @@ module Persist
276
343
  Log.medium "Persist create: #{ path } - #{persist_options.inspect[0..100]}"
277
344
  res = yield
278
345
 
279
- if res.nil?
346
+ case res
347
+ when nil
280
348
  res = load_file(path) unless persist_options[:no_load]
349
+ when IO
350
+ res = tee_stream(res, path, type, res.respond_to?(:callback)? res.callback : nil)
351
+ when TSV::Dumper
352
+ res = tee_stream(res.stream, path, type, res.respond_to?(:callback)? res.callback : nil)
281
353
  else
282
354
  Misc.lock(path) do
283
355
  save_file(path, type, res)
@@ -1,5 +1,6 @@
1
1
  require 'yaml'
2
2
  require 'rbbt/annotations'
3
+ require 'rbbt/tsv/dumper'
3
4
  module TSV
4
5
 
5
6
  TSV_SERIALIZER = YAML
@@ -460,7 +461,7 @@ module TSV
460
461
  def options
461
462
  options = {}
462
463
  ENTRIES.each do |entry|
463
- options[entry] = self.send(entry)
464
+ options[entry.to_sym] = self.send(entry)
464
465
  end
465
466
  IndiferentHash.setup options
466
467
  end
@@ -498,26 +499,31 @@ module TSV
498
499
  end
499
500
  end
500
501
 
501
- str = ""
502
-
503
- entry_hash = no_options ? {} : (ENTRIES - ["key_field", "fields"]).collect{|key| [key.to_sym, self.send(key)]}
504
- str = TSV.header_lines(key_field, fields, entry_hash)
505
-
506
- with_unnamed do
507
- if keys.nil?
508
- each do |key, values|
509
- key = key.to_s if Symbol === key
510
- str << key.to_s
511
- str << values_to_s(values)
512
- end
513
- else
514
- keys.zip(values_at(*keys)).each do |key, values|
515
- key = key.to_s if Symbol === key
516
- str << key.to_s << values_to_s(values)
502
+ io = TSV::Dumper.stream self do |dumper|
503
+ dumper.init unless no_options
504
+ begin
505
+ if keys
506
+ keys.each do |key|
507
+ dumper.add key, self[key]
508
+ end
509
+ else
510
+ with_unnamed do
511
+ each do |k,v|
512
+ dumper.add k, v
513
+ end
514
+ end
517
515
  end
516
+ rescue
517
+ Log.exception $!
518
+ parent.raise $!
518
519
  end
520
+ end
519
521
 
522
+ str = ''
523
+ while block = io.read(2048)
524
+ str << block
520
525
  end
526
+
521
527
  str
522
528
  end
523
529
 
@@ -0,0 +1,56 @@
1
+ module TSV
2
+ class Dumper
3
+ attr_accessor :in_stream, :stream, :options, :filename
4
+ def self.stream(options = {}, filename = nil, &block)
5
+ dumper = TSV::Dumper.new options, filename
6
+ Thread.new do
7
+ yield dumper
8
+ dumper.close
9
+ end
10
+ dumper.stream
11
+ end
12
+
13
+ def initialize(options, filename = nil)
14
+ if TSV === options
15
+ @options = options.options.merge(:key_field => options.key_field, :fields => options.fields)
16
+ @filename ||= options.filename
17
+ else
18
+ @options = options
19
+ @filename = filename
20
+ end
21
+ @filename ||= Misc.fingerprint options
22
+ @stream, @in_stream = IO.pipe
23
+ end
24
+
25
+ def self.values_to_s(values, fields = nil)
26
+ case values
27
+ when nil
28
+ if fields.nil? or fields.empty?
29
+ "\n"
30
+ else
31
+ "\t" << ([""] * fields.length) * "\t" << "\n"
32
+ end
33
+ when Array
34
+ "\t" << values.collect{|v| Array === v ? v * "|" : v} * "\t" << "\n"
35
+ else
36
+ "\t" << values.to_s << "\n"
37
+ end
38
+ end
39
+
40
+ def init
41
+ options = @options.dup
42
+ key_field, fields = Misc.process_options options, :key_field, :fields
43
+
44
+ str = TSV.header_lines(key_field, fields, options)
45
+ @in_stream.puts str
46
+ end
47
+
48
+ def add(k,v)
49
+ @in_stream << k << TSV::Dumper.values_to_s(v, @options[:fields])
50
+ end
51
+
52
+ def close
53
+ @in_stream.close
54
+ end
55
+ end
56
+ end
@@ -54,7 +54,29 @@ module TSV
54
54
  traverse_tsv(obj, options, &block)
55
55
  when Hash
56
56
  traverse_hash(obj, options, &block)
57
- when IO
57
+ when TSV::Parser
58
+ callback = Misc.process_options options, :callback
59
+ if callback
60
+ obj.traverse(options) do |k,v|
61
+ res = yield k, v
62
+ callback.call res
63
+ end
64
+ else
65
+ obj.traverse(options, &block)
66
+ end
67
+ when (options[:type] == :array and IO)
68
+ callback = Misc.process_options options, :callback
69
+ if callback
70
+ while not obj.eof?
71
+ res = yield obj.gets.strip
72
+ callback.call res
73
+ end
74
+ else
75
+ while not obj.eof?
76
+ yield obj.gets.strip
77
+ end
78
+ end
79
+ when IO, File
58
80
  callback = Misc.process_options options, :callback
59
81
  if callback
60
82
  TSV::Parser.traverse(obj, options) do |k,v|
@@ -68,8 +90,20 @@ module TSV
68
90
  obj.open do |stream|
69
91
  traverse_obj(stream, options, &block)
70
92
  end
93
+ when (defined? Step and Step)
94
+ case obj.result
95
+ when IO
96
+ traverse_obj(obj.result, options, &block)
97
+ when TSV::Dumper
98
+ traverse_obj(obj.stream, options, &block)
99
+ else
100
+ obj.join
101
+ traverse_obj(obj.path.open, options, &block)
102
+ end
71
103
  when Array
72
104
  traverse_array(obj, options, &block)
105
+ when nil
106
+ raise "Can not traverse nil object"
73
107
  else
74
108
  raise "Unknown object for traversal: #{Misc.fingerprint obj }"
75
109
  end
@@ -80,7 +114,6 @@ module TSV
80
114
 
81
115
  q = RbbtThreadQueue.new num
82
116
 
83
-
84
117
  if callback
85
118
  block = Proc.new do |k,v,mutex|
86
119
  v, mutex = nil, v if mutex.nil?
@@ -133,35 +166,60 @@ module TSV
133
166
  k,v = value
134
167
  obj[k] = v
135
168
  end
136
- when IO
169
+ when TSV::Dumper
137
170
  return if value.nil?
138
- obj << value
171
+ obj.add *value
172
+ when IO, StringIO
173
+ return if value.nil?
174
+ obj.puts value
139
175
  else
140
176
  obj << value
141
177
  end
142
178
  end
143
179
 
180
+ def self.traverse_run(obj, threads, cpus, options = {}, &block)
181
+ if threads.nil? and cpus.nil?
182
+ traverse_obj obj, options, &block
183
+ else
184
+ if threads
185
+ traverse_threads threads, obj, options, &block
186
+ else
187
+ traverse_cpus cpus, obj, options, &block
188
+ end
189
+ end
190
+ end
191
+
144
192
  def self.traverse(obj, options = {}, &block)
145
193
  threads = Misc.process_options options, :threads
146
194
  cpus = Misc.process_options options, :cpus
147
- into = Misc.process_options options, :into
195
+ into = options[:into]
196
+
197
+ threads = nil if threads and threads.to_i <= 1
198
+ cpus = nil if cpus and cpus.to_i <= 1
148
199
 
149
200
  if into
150
201
  callback = Proc.new do |e|
151
202
  store_into into, e
152
203
  end
153
204
  options[:callback] = callback
154
- end
155
205
 
156
- if threads.nil? and cpus.nil?
157
- traverse_obj obj, options, &block
158
- else
159
- if threads
160
- traverse_threads threads, obj, options, &block
206
+ case into
207
+ when TSV::Dumper, IO, StringIO
208
+ Thread.new(Thread.current) do |parent|
209
+ begin
210
+ traverse_run(obj, threads, cpus, options, &block)
211
+ into.close
212
+ rescue Exception
213
+ parent.raise $!
214
+ end
215
+ end
161
216
  else
162
- traverse_cpus cpus, obj, options, &block
217
+ traverse_run(obj, threads, cpus, options, &block)
163
218
  end
219
+
220
+ into
221
+ else
222
+ traverse_run(obj, threads, cpus, options, &block)
164
223
  end
165
- into
166
224
  end
167
225
  end
@@ -1,7 +1,7 @@
1
1
  require 'rbbt/util/cmd'
2
2
  module TSV
3
3
  class Parser
4
- attr_accessor :stream, :header_hash, :sep, :sep2, :type, :key_position, :field_positions, :cast, :key_field, :fields, :fix, :select, :serializer, :straight, :take_all, :zipped, :namespace, :first_line, :stream
4
+ attr_accessor :stream, :filename, :header_hash, :sep, :sep2, :type, :key_position, :field_positions, :cast, :key_field, :fields, :fix, :select, :serializer, :straight, :take_all, :zipped, :namespace, :first_line, :stream
5
5
 
6
6
  class SKIP_LINE < Exception; end
7
7
  class END_PARSING < Exception; end
@@ -24,7 +24,7 @@ module TSV
24
24
  # Process options line
25
25
 
26
26
  if line and line =~ /^#{@header_hash}: (.*)/
27
- options = Misc.string2hash $1
27
+ options = Misc.string2hash $1.strip
28
28
  line = Misc.fixutf8 stream.gets
29
29
  end
30
30
 
@@ -343,11 +343,16 @@ module TSV
343
343
  @sep = Misc.process_options(options, :sep) || "\t"
344
344
  @stream = stream
345
345
 
346
+
346
347
  header_options = parse_header(stream)
348
+
347
349
  options = header_options.merge options
348
350
 
349
351
  @type = Misc.process_options(options, :type) || :double
350
352
 
353
+ @filename = Misc.process_options(options, :filename)
354
+ @filename ||= stream.filename if stream.respond_to? :filename
355
+
351
356
  @sep2 = Misc.process_options(options, :sep2) || "|"
352
357
  @cast = Misc.process_options options, :cast; @cast = @cast.to_sym if String === @cast
353
358
  @type ||= Misc.process_options options, :type
@@ -362,6 +367,7 @@ module TSV
362
367
  fields = options[:fields]
363
368
  fix_fields(options)
364
369
 
370
+ @type = @type.strip.to_sym if String === @type
365
371
  case @type
366
372
  when :double
367
373
  self.instance_eval do alias get_values get_values_double end
@@ -407,6 +413,8 @@ module TSV
407
413
  self.instance_eval do alias add_to_data add_to_data_flat end
408
414
  end
409
415
  end
416
+ else
417
+ raise "Unknown TSV type: #{@type.inspect}"
410
418
  end
411
419
 
412
420
 
@@ -419,10 +427,23 @@ module TSV
419
427
  data.key_field = @key_field
420
428
  data.fields = @fields
421
429
  data.namespace = @namespace
430
+ data.filename = @filename
422
431
  data.cast = @cast if Symbol === @cast
423
432
  data
424
433
  end
425
434
 
435
+ def annotate(data)
436
+ setup(data)
437
+ end
438
+
439
+ def options
440
+ options = {}
441
+ TSV::ENTRIES.each do |entry|
442
+ options[entry.to_sym] = self.send(entry) if self.respond_to? entry
443
+ end
444
+ IndiferentHash.setup options
445
+ end
446
+
426
447
  def traverse(options = {})
427
448
  monitor, grep, invert_grep, head = Misc.process_options options, :monitor, :grep, :invert_grep, :head
428
449
  raise "No block given in TSV::Parser#traverse" unless block_given?
@@ -493,8 +514,8 @@ module TSV
493
514
  break
494
515
  end
495
516
  end
496
- ensure
497
- stream.close unless stream.closed?
517
+ #ensure
518
+ # stream.close unless stream.closed?
498
519
  end
499
520
 
500
521
  self
data/lib/rbbt/tsv/util.rb CHANGED
@@ -92,6 +92,7 @@ module TSV
92
92
  raise "Cannot get stream from: #{file.inspect}"
93
93
  end
94
94
  end
95
+
95
96
  def self.get_stream(file, open_options = {})
96
97
  case file
97
98
  when Path
@@ -31,10 +31,11 @@ end
31
31
  Lockfile.refresh = false if ENV["RBBT_NO_LOCKFILE_REFRESH"] == "true"
32
32
  module Misc
33
33
 
34
+
34
35
  def self.format_paragraph(text, size = 80, indent = 0, offset = 0)
35
36
  i = 0
36
37
  re = /((?:\n\s*\n\s*)|(?:\n\s*(?=\*)))/
37
- text.split(re).collect do |paragraph|
38
+ text.split(re).collect do |paragraph|
38
39
  i += 1
39
40
  str = if i % 2 == 1
40
41
  words = paragraph.gsub(/\s+/, "\s").split(" ")
@@ -56,7 +57,7 @@ module Misc
56
57
  end
57
58
  offset = 0
58
59
  str
59
- end*""
60
+ end*""
60
61
  end
61
62
 
62
63
  def self.format_definition_list_item(dt, dd, size = 80, indent = 20, color = :yellow)
@@ -314,6 +315,12 @@ module Misc
314
315
  end
315
316
  when AnnotatedArray
316
317
  "<A: #{fingerprint Annotated.purge(obj)} #{fingerprint obj.info}>"
318
+ when TSV::Parser
319
+ "<TSVStream:" + obj.filename + "--" << Misc.fingerprint(obj.options) << ">"
320
+ when IO
321
+ "<IO:" + (obj.respond_to?(:filename) ? obj.filename : obj.inspect) + ">"
322
+ when File
323
+ "<File:" + obj.path + ">"
317
324
  when Array
318
325
  if (length = obj.length) > 10
319
326
  "[#{length}--" << (obj.values_at(0,1, length / 2, -2, -1).collect{|e| fingerprint(e)} * ",") << "]"
@@ -347,6 +354,12 @@ module Misc
347
354
 
348
355
  def self.remove_long_items(obj)
349
356
  case
357
+ when IO === obj
358
+ remove_long_items("IO: " + obj.filename)
359
+ when obj.respond_to?(:path)
360
+ remove_long_items("File: " + obj.path)
361
+ when TSV::Parser === obj
362
+ remove_long_items("TSV Stream: " + obj.filename + " -- " << Misc.fingerprint(obj.options))
350
363
  when TSV === obj
351
364
  remove_long_items((obj.all_fields || []) + obj.keys.sort)
352
365
  when (Array === obj and obj.length > ARRAY_MAX_LENGTH)
@@ -1276,6 +1289,8 @@ end
1276
1289
  str << k.to_s << "=>" << v
1277
1290
  when (Array === v and v.length > HASH2MD5_MAX_ARRAY_LENGTH)
1278
1291
  str << k.to_s << "=>[" << v[0..HASH2MD5_MAX_ARRAY_LENGTH] * "," << "; #{ v.length }]"
1292
+ when TSV::Parser === v
1293
+ str << remove_long_items(v)
1279
1294
  when Array === v
1280
1295
  str << k.to_s << "=>[" << v * "," << "]"
1281
1296
  else
@@ -41,7 +41,10 @@ class Step
41
41
 
42
42
  def info
43
43
  return {} if info_file.nil? or not Open.exists? info_file
44
- return @info_cache if @info_cache and File.mtime(info_file) < @info_cache_time
44
+ begin
45
+ return @info_cache if @info_cache and File.mtime(info_file) < @info_cache_time
46
+ rescue Exception
47
+ end
45
48
  begin
46
49
  @info_cache = Misc.insist(3, 5, info_file) do
47
50
  Misc.insist(2, 2, info_file) do
@@ -66,7 +69,7 @@ class Step
66
69
  value = Annotated.purge value if defined? Annotated
67
70
  Open.lock(info_file) do
68
71
  i = info
69
- i[key] = value
72
+ i[key] = value #File === value ? value.filename : value
70
73
  @info_cache = i
71
74
  Open.write(info_file, INFO_SERIALIAZER.dump(i))
72
75
  @info_cache_time = Time.now
@@ -143,7 +146,7 @@ class Step
143
146
  def running?
144
147
  return nil if not Open.exists? info_file
145
148
  return nil if info[:pid].nil?
146
- return Misc.pid_exists? info[:pid]
149
+ return Misc.pid_exists?(p = info[:pid]) && Process.pid != p
147
150
  end
148
151
 
149
152
  def error?
@@ -361,7 +364,10 @@ module Workflow
361
364
  if inputs.any? or dependencies.any?
362
365
  tagged_jobname = case TAG
363
366
  when :hash
364
- jobname + '_' + Misc.digest((inputs.collect{|i| Misc.fingerprint(i)} * "," + ";" + dependencies.collect{|dep| dep.name } * "\n"))
367
+ input_str = ""
368
+ input_str << inputs.collect{|i| Misc.fingerprint(i) } * ","
369
+ input_str << ";" << dependencies.collect{|dep| dep.name } * "\n"
370
+ jobname + '_' << Misc.digest(input_str)
365
371
  else
366
372
  jobname
367
373
  end
@@ -9,6 +9,7 @@ class Step
9
9
  attr_accessor :path, :task, :inputs, :dependencies, :bindings
10
10
  attr_accessor :pid
11
11
  attr_accessor :exec
12
+ attr_accessor :result
12
13
 
13
14
  def initialize(path, task = nil, inputs = nil, dependencies = nil, bindings = nil)
14
15
  path = Path.setup(Misc.sanitize_filename(path)) if String === path
@@ -54,7 +55,24 @@ class Step
54
55
  end
55
56
 
56
57
  def prepare_result(value, description = nil, info = {})
57
- case
58
+ case
59
+ when IO === value
60
+ begin
61
+ case @task.result_type
62
+ when :array
63
+ array = []
64
+ while line = value.gets
65
+ array << line
66
+ end
67
+ array
68
+ when :tsv
69
+ TSV.open(value)
70
+ else
71
+ value.read
72
+ end
73
+ ensure
74
+ value.join if value.respond_to? :join
75
+ end
58
76
  when (not defined? Entity or description.nil? or not Entity.formats.include? description)
59
77
  value
60
78
  when (Annotated === value and info.empty?)
@@ -70,13 +88,24 @@ class Step
70
88
  end
71
89
  end
72
90
 
73
- def exec
91
+ def _exec
74
92
  @exec = true if @exec.nil?
75
- result = @task.exec_in((bindings ? bindings : self), *@inputs)
76
- prepare_result result, @task.result_description
93
+ @task.exec_in((bindings ? bindings : self), *@inputs)
94
+ end
95
+
96
+ def exec(no_load=false)
97
+ @result = _exec
98
+ @result = @result.stream if TSV::Dumper === @result
99
+ no_load ? @result : prepare_result(@result, @task.result_description)
77
100
  end
78
101
 
79
102
  def join
103
+ case @result
104
+ when IO
105
+ while @result.read 2048; end
106
+ @result = nil
107
+ end
108
+
80
109
  if @pid.nil?
81
110
  self
82
111
  else
@@ -97,7 +126,7 @@ class Step
97
126
 
98
127
  def run(no_load = false)
99
128
 
100
- result = Persist.persist "Job", @task.result_type, :file => path, :check => checks, :no_load => no_load do
129
+ result = Persist.persist "Job", @task.result_type, :file => path, :check => checks, :no_load => false do
101
130
  if Step === Step.log_relay_step and not self == Step.log_relay_step
102
131
  relay_log(Step.log_relay_step) unless self.respond_to? :relay_step and self.relay_step
103
132
  end
@@ -126,52 +155,65 @@ class Step
126
155
 
127
156
  set_info :inputs, Misc.remove_long_items(Misc.zip2hash(task.inputs, @inputs)) unless task.inputs.nil?
128
157
 
129
- #Log.info{"#{Log.color :magenta, "Starting task"} #{Log.color :yellow, task.name.to_s || ""} [#{Process.pid}]: #{ Log.color :blue, path }"}
130
158
  set_info :started, (start_time = Time.now)
131
159
  log :started, "#{Log.color :magenta, "Starting task"} #{Log.color :yellow, task.name.to_s || ""} [#{Process.pid}]"
132
160
 
133
- res = begin
134
- exec
135
- rescue Aborted
136
- log(:error, "Aborted")
137
-
138
- children_pids = info[:children_pids]
139
- if children_pids and children_pids.any?
140
- Log.medium("Killing children: #{ children_pids * ", " }")
141
- children_pids.each do |pid|
142
- Log.medium("Killing child #{ pid }")
143
- begin
144
- Process.kill "INT", pid
145
- rescue Exception
146
- Log.medium("Exception killing child #{ pid }: #{$!.message}")
147
- end
148
- end
149
- end
150
-
151
- raise $!
161
+ begin
162
+ result = _exec
163
+ rescue Aborted
164
+ log(:error, "Aborted")
165
+
166
+ children_pids = info[:children_pids]
167
+ if children_pids and children_pids.any?
168
+ Log.medium("Killing children: #{ children_pids * ", " }")
169
+ children_pids.each do |pid|
170
+ Log.medium("Killing child #{ pid }")
171
+ begin
172
+ Process.kill "INT", pid
152
173
  rescue Exception
153
- backtrace = $!.backtrace
174
+ Log.medium("Exception killing child #{ pid }: #{$!.message}")
175
+ end
176
+ end
177
+ end
154
178
 
155
- # HACK: This fixes an strange behaviour in 1.9.3 where some
156
- # backtrace strings are coded in ASCII-8BIT
157
- backtrace.each{|l| l.force_encoding("UTF-8")} if String.instance_methods.include? :force_encoding
179
+ raise $!
180
+ rescue Exception
181
+ backtrace = $!.backtrace
158
182
 
159
- set_info :backtrace, backtrace
160
- log(:error, "#{$!.class}: #{$!.message}")
161
- raise $!
162
- end
183
+ # HACK: This fixes an strange behaviour in 1.9.3 where some
184
+ # backtrace strings are coded in ASCII-8BIT
185
+ backtrace.each{|l| l.force_encoding("UTF-8")} if String.instance_methods.include? :force_encoding
163
186
 
164
- set_info :done, (done_time = Time.now)
165
- set_info :time_elapsed, (time_elapsed = done_time - start_time)
166
- log :done, "#{Log.color :magenta, "Completed task"} #{Log.color :yellow, task.name.to_s || ""} [#{Process.pid}] +#{time_elapsed.to_i}"
187
+ set_info :backtrace, backtrace
188
+ log(:error, "#{$!.class}: #{$!.message}")
189
+ raise $!
190
+ end
191
+
192
+ case result
193
+ when IO, TSV::Dumper
194
+ log :streaming, "#{Log.color :magenta, "Streaming task result"} #{Log.color :yellow, task.name.to_s || ""} [#{Process.pid}]"
195
+ class << result
196
+ attr_accessor :callback
197
+ end
198
+ result.callback = Proc.new do
199
+ set_info :done, (done_time = Time.now)
200
+ set_info :time_elapsed, (time_elapsed = done_time - start_time)
201
+ log :done, "#{Log.color :magenta, "Completed task"} #{Log.color :yellow, task.name.to_s || ""} [#{Process.pid}] +#{time_elapsed.to_i}"
202
+ end
203
+ else
204
+ set_info :done, (done_time = Time.now)
205
+ set_info :time_elapsed, (time_elapsed = done_time - start_time)
206
+ log :done, "#{Log.color :magenta, "Completed task"} #{Log.color :yellow, task.name.to_s || ""} [#{Process.pid}] +#{time_elapsed.to_i}"
207
+ end
167
208
 
168
- res
209
+ result
169
210
  end
170
211
 
171
212
  if no_load
213
+ @result = result
172
214
  self
173
215
  else
174
- prepare_result result, @task.result_description, info
216
+ @result = prepare_result result, @task.result_description, info
175
217
  end
176
218
  end
177
219
 
@@ -179,11 +221,13 @@ class Step
179
221
  raise "Can not fork: Step is waiting for proces #{@pid} to finish" if not @pid.nil?
180
222
  @pid = Process.fork do
181
223
  begin
182
- #trap(:INT) { raise Aborted.new "INT signal recieved" }
183
224
  RbbtSemaphore.wait_semaphore(semaphore) if semaphore
184
225
  FileUtils.mkdir_p File.dirname(path) unless Open.exists? File.dirname(path)
185
226
  begin
186
- run(true)
227
+ res = run(true)
228
+ io = res.result if IO === res.result
229
+ io = res.result.stream if TSV::Dumper === res.result
230
+ while not io.eof?; io.read(2048); end if io
187
231
  rescue Aborted
188
232
  Log.debug{"Forked process aborted: #{path}"}
189
233
  log :aborted, "Aborted"
@@ -212,7 +256,6 @@ class Step
212
256
  exit -1
213
257
  end
214
258
  set_info :pid, nil
215
- exit 0
216
259
  ensure
217
260
  RbbtSemaphore.post_semaphore(semaphore) if semaphore
218
261
  end
@@ -224,13 +267,13 @@ class Step
224
267
 
225
268
  def abort
226
269
  @pid ||= info[:pid]
227
- if @pid.nil? and info[:forked]
270
+ if @pid.nil? and info[:forked]
228
271
  Log.medium "Could not abort #{path}: no pid"
229
272
  false
230
273
  else
231
274
  Log.medium "Aborting #{path}: #{ @pid }"
232
275
  begin
233
- Process.kill("KILL", @pid)
276
+ Process.kill("KILL", @pid) unless Process.pid == @pid
234
277
  Process.waitpid @pid
235
278
  rescue Exception
236
279
  Log.debug("Aborted job #{@pid} was not killed: #{$!.message}")
@@ -254,15 +297,19 @@ class Step
254
297
  end
255
298
 
256
299
  def load
257
- raise "Can not load: Step is waiting for proces #{@pid} to finish" if not done?
258
- result = Persist.persist "Job", @task.result_type, :file => @path, :check => checks do
259
- exec
260
- end
261
- prepare_result result, @task.result_description, info
300
+ return prepare_result @result, @task.result_description if @result
301
+ join if not done?
302
+ return Persist.load_file(@path, @task.result_type) if @path.exists?
303
+ exec
262
304
  end
263
305
 
264
306
  def clean
265
307
  if Open.exists?(path) or Open.exists?(info_file)
308
+ begin
309
+ self.abort if self.running?
310
+ rescue Exception
311
+ end
312
+
266
313
  begin
267
314
  Open.rm info_file if Open.exists? info_file
268
315
  Open.rm info_file + '.lock' if Open.exists? info_file + '.lock'
data/lib/rbbt/workflow.rb CHANGED
@@ -51,6 +51,12 @@ module Workflow
51
51
  end
52
52
  end
53
53
 
54
+ def self.installed_workflows
55
+ self.workflow_dir.glob('**/workflow.rb').collect do |file|
56
+ File.basename(File.dirname(file))
57
+ end
58
+ end
59
+
54
60
  def self.workflow_dir
55
61
  @workflow_dir ||= begin
56
62
  case
@@ -10,11 +10,10 @@ workflow_dir = options[:workflow_dir] || Workflow.workflow_dir
10
10
 
11
11
  Path.setup(workflow_dir) unless Path === workflow_dir
12
12
 
13
- files = workflow_dir.find_all.collect{|p| p.glob("*") }.flatten.select{|f| File.directory? f }
13
+ workflows = Workflow.installed_workflows
14
14
 
15
15
  if options[:describe]
16
- files.each do |file|
17
- workflow = File.basename(file)
16
+ workflows.each do |workflow|
18
17
  Workflow.require_workflow workflow
19
18
  workflow = Workflow.workflows.select{|w| Misc.camel_case(w.to_s) == Misc.camel_case(workflow)}.first
20
19
  puts "# "<<[Misc.camel_case(workflow.to_s), workflow.description] * ": "
@@ -25,8 +24,7 @@ if options[:describe]
25
24
 
26
25
  end
27
26
  else
28
- files.each do |file|
29
- workflow = File.basename(file)
27
+ workflows.each do |workflow|
30
28
  puts Misc.camel_case(workflow.to_s)
31
29
  end
32
30
  end
@@ -54,13 +54,32 @@ def SOPT_options(workflow, task)
54
54
  sopt_options * ":"
55
55
  end
56
56
 
57
+ def get_value_stream(value)
58
+ if value == "-"
59
+ io = Misc.open_pipe do |sin|
60
+ while not STDIN.eof?
61
+ sin.write STDIN.read(2048)
62
+ end
63
+ sin.close
64
+ end
65
+ else
66
+ io = Open.open(value)
67
+ end
68
+ class << io
69
+ attr_accessor :filename
70
+ end
71
+ io.filename = value
72
+ io
73
+ end
74
+
57
75
  def fix_options(workflow, task, job_options)
58
- option_types = IndiferentHash.setup workflow.rec_input_types(task.name)
76
+ input_types = IndiferentHash.setup workflow.rec_input_types(task.name)
77
+ input_options = IndiferentHash.setup workflow.rec_input_options(task.name)
59
78
 
60
79
  job_options_cleaned = {}
61
80
 
62
81
  job_options.each do |name, value|
63
- value = case option_types[name].to_sym
82
+ value = case input_types[name].to_sym
64
83
  when :boolean
65
84
  TrueClass == value or %w(true TRUE T yes).include? value
66
85
  when :float
@@ -68,41 +87,53 @@ def fix_options(workflow, task, job_options)
68
87
  when :integer
69
88
  value.to_i
70
89
  when :text
71
- case
72
- when value == '-'
73
- STDIN.read
74
- when (String === value and File.exists?(value) and not File.directory?(value))
75
- Open.read(value)
90
+ if input_options[name] and input_options[name][:stream] and String === value
91
+ get_value_stream(value)
76
92
  else
77
- value
78
- end
79
- when :array
80
- if Array === value
81
- value
82
- else
83
- str = case
93
+ case
84
94
  when value == '-'
85
95
  STDIN.read
86
- when (String === value and File.exists?(value))
96
+ when (String === value and File.exists?(value) and not File.directory?(value))
87
97
  Open.read(value)
88
98
  else
89
99
  value
90
100
  end
91
-
92
- if $array_separator
93
- str.split(/#{$array_separator}/)
101
+ end
102
+ when :array
103
+ if input_options[name] and input_options[name][:stream] and String === value
104
+ get_value_stream(value)
105
+ else
106
+ if Array === value
107
+ value
94
108
  else
95
- str.split(/[,|\s]/)
109
+ str = case
110
+ when value == '-'
111
+ STDIN.read
112
+ when (String === value and File.exists?(value))
113
+ Open.read(value)
114
+ else
115
+ value
116
+ end
117
+
118
+ if $array_separator
119
+ str.split(/#{$array_separator}/)
120
+ else
121
+ str.split(/[,|\s]/)
122
+ end
96
123
  end
97
124
  end
98
125
  when :tsv
99
- case value
100
- when TSV
101
- value
102
- when '-'
103
- TSV.open(STDIN, :unnamed => true, :sep => $field_separator, :sep2 => $array_separator || "|")
126
+ if input_options[name] and input_options[name][:stream] and String === value
127
+ TSV::Parser.new(value == '-' ? STDIN : Open.open(value), :filename => value )
104
128
  else
105
- TSV.open(value, :unnamed => true, :sep => $field_separator, :sep2 => $array_separator || "|")
129
+ case value
130
+ when TSV
131
+ value
132
+ when '-'
133
+ TSV.open(STDIN, :unnamed => true, :sep => $field_separator, :sep2 => $array_separator || "|")
134
+ else
135
+ TSV.open(value, :unnamed => true, :sep => $field_separator, :sep2 => $array_separator || "|")
136
+ end
106
137
  end
107
138
  else
108
139
  value
@@ -228,10 +259,8 @@ job = workflow.job(task.name, name, job_options)
228
259
 
229
260
  # clean job
230
261
  if clean
231
- job.abort if job.running?
232
262
  job.clean
233
263
  sleep 1
234
- job = workflow.job(task.name, name, job_options)
235
264
  end
236
265
 
237
266
  if recursive_clean
@@ -250,7 +279,7 @@ begin
250
279
  end
251
280
 
252
281
  if do_exec or (job.respond_to?(:is_exec) and job.is_exec)
253
- res = job.exec
282
+ res = job.exec(true)
254
283
  case
255
284
  when Array === res
256
285
  out.puts res * "\n"
@@ -258,6 +287,10 @@ begin
258
287
  out.puts res
259
288
  when Hash === res
260
289
  out.puts res.to_yaml
290
+ when IO === res
291
+ while block = res.read(2048)
292
+ out.write block
293
+ end
261
294
  else
262
295
  out.puts res
263
296
  end
@@ -281,7 +314,6 @@ begin
281
314
  end
282
315
 
283
316
  job.fork
284
-
285
317
  else
286
318
  job.run(true)
287
319
  res = job
@@ -289,22 +321,21 @@ begin
289
321
 
290
322
 
291
323
  if options.delete(:provenance)
324
+ job.join
292
325
  pp job.provenance
293
326
  exit 0
294
327
  end
295
328
 
296
329
  if options.delete(:printname)
330
+ job.join if IO === job.result
297
331
  puts job.name
298
332
  exit 0
299
- else
300
- Log.low "Job name: #{job.name}"
301
333
  end
302
334
 
303
335
  if options.delete(:printpath)
336
+ job.join
304
337
  puts job.path
305
338
  exit 0
306
- else
307
- Log.low "Job name: #{job.name}"
308
339
  end
309
340
 
310
341
  if do_fork
@@ -321,7 +352,6 @@ begin
321
352
  time = Time.now - issued
322
353
  end
323
354
 
324
-
325
355
  space.times do
326
356
  Log.clear_line
327
357
  end
@@ -372,7 +402,16 @@ case res
372
402
  when (defined?(WorkflowRESTClient) and WorkflowRESTClient::RemoteStep)
373
403
  out.puts res.load
374
404
  when Step
375
- out.puts Open.read(res.path) if File.exists? res.path
405
+ if IO === res.result
406
+ io = res.result
407
+ while line = io.gets do
408
+ out.puts line
409
+ end
410
+ io.close
411
+ else
412
+ res.join
413
+ out.puts Open.read(res.path) if File.exists? res.path
414
+ end
376
415
  else
377
416
  out.puts res.to_s
378
417
  end
@@ -37,4 +37,28 @@ class TestPersist < Test::Unit::TestCase
37
37
  end
38
38
  end
39
39
  end
40
+
41
+ def test_tsv_dumper
42
+ TmpFile.with_file do |tmpdir|
43
+ stream = Persist.persist("Dumper", :tsv, :dir => tmpdir) do
44
+ dumper = TSV::Dumper.new :key_field => "Field 1", :fields => ["Field 2"], :type => :single
45
+
46
+ dumper.init
47
+ Thread.new do
48
+ 10.times do |i|
49
+ key = i.to_s
50
+ dumper.add key, key + " - 2"
51
+ end
52
+ dumper.close
53
+ Thread.exit
54
+ end
55
+ dumper
56
+ end
57
+
58
+ while line = stream.gets do
59
+ puts line
60
+ end
61
+
62
+ end
63
+ end
40
64
  end
@@ -4,9 +4,9 @@ require 'test/unit'
4
4
 
5
5
  class TestColorize < Test::Unit::TestCase
6
6
  def test_color_array
7
- a = [:red, :red, :blue, :blue, :yellow]
8
- a = (0..16).to_a
7
+ a = (1..16).to_a
9
8
 
10
- ddd Colorize.distinct(a)
9
+ assert_equal 16, Colorize.distinct(a).length
10
+ assert_equal 16, Colorize.distinct(a).compact.uniq.length
11
11
  end
12
12
  end
@@ -159,7 +159,7 @@ class TestStep < Test::Unit::TestCase
159
159
  step.run
160
160
  while not job.done? do sleep 1 end
161
161
  assert_equal "TEST", Open.read(job.file("test"))
162
- assert_equal "WRITE", job.messages.last
162
+ assert job.messages.include? "WRITE"
163
163
  end
164
164
  end
165
165
  end
@@ -176,7 +176,7 @@ class TestStep < Test::Unit::TestCase
176
176
  job = step.fork
177
177
  while not job.done? do sleep 1 end
178
178
  assert_equal "TEST", Open.read(job.file("test"))
179
- assert_equal "WRITE", job.messages.last
179
+ assert job.messages.include? "WRITE"
180
180
  end
181
181
  end
182
182
  end
data/test/test_helper.rb CHANGED
@@ -20,11 +20,11 @@ class Test::Unit::TestCase
20
20
  end
21
21
 
22
22
  def teardown
23
- if defined? Persist
24
- FileUtils.rm_rf Path.setup("", 'rbbt').tmp.test.find :user
25
- Persist::CONNECTIONS.values.each do |c| c.close end
26
- Persist::CONNECTIONS.clear
27
- end
23
+ #if defined? Persist
24
+ # FileUtils.rm_rf Path.setup("", 'rbbt').tmp.test.find :user
25
+ # Persist::CONNECTIONS.values.each do |c| c.close end
26
+ # Persist::CONNECTIONS.clear
27
+ #end
28
28
  end
29
29
 
30
30
  def datafile_test(file)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-util
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.9.12
4
+ version: 5.10.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-03-26 00:00:00.000000000 Z
11
+ date: 2014-03-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -147,6 +147,7 @@ files:
147
147
  - lib/rbbt/tsv/attach.rb
148
148
  - lib/rbbt/tsv/attach/util.rb
149
149
  - lib/rbbt/tsv/change_id.rb
150
+ - lib/rbbt/tsv/dumper.rb
150
151
  - lib/rbbt/tsv/excel.rb
151
152
  - lib/rbbt/tsv/field_index.rb
152
153
  - lib/rbbt/tsv/filter.rb