rbbt-util 5.9.12 → 5.10.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 611734c0287b691572f51267904bec6af278eba1
4
- data.tar.gz: c718a679f44bebb62b3f235e71765f9deebbb13d
3
+ metadata.gz: f5de94d0ec4441889212dc37780f880b38809d71
4
+ data.tar.gz: 44acad91d91a0a8f7c351d32cd2dba45ca9fc058
5
5
  SHA512:
6
- metadata.gz: 2bb409b0bee93ff72b801bcdafaaa4e6c8dc569531d7e7d7606cb4c089c2606d99f5c7bc379cccd4c6979695fe781255ec361708c74cda9667d580c8722eee96
7
- data.tar.gz: 59e11ff34787e24df2e4aa6a5cf2590f98adb18594db0f6e18d740cd74a428ac1defb40bb4476ef9366de2abc51ee887c205c14e00a092ca9bffeb09aee01596
6
+ metadata.gz: 1fc109e80a9808fe201f7aa683fd2c0d9a8c0f35e6a1b2f3f9a0ee5f1f62801f192497c51ce09e69e5a721cb301d564227e543c44f2263aaf1faa3da14215c8c
7
+ data.tar.gz: 78d53fa4a1bc35af227a4d15553204a31405e89459e2b51a60cad762eba536b0e73f0719e51058ca8df03f328ab6117986f6109a2e01c531bb12c3f0b8453576
data/lib/rbbt/persist.rb CHANGED
@@ -83,7 +83,7 @@ module Persist
83
83
 
84
84
  TRUE_STRINGS = Set.new ["true", "True", "TRUE", "t", "T", "1", "yes", "Yes", "YES", "y", "Y", "ON", "on"] unless defined? TRUE_STRINGS
85
85
  def self.load_file(path, type)
86
- case (type || "nil").to_sym
86
+ case (type || :marshal).to_sym
87
87
  when :nil
88
88
  nil
89
89
  when :boolean
@@ -125,7 +125,7 @@ module Persist
125
125
 
126
126
  return if content.nil?
127
127
 
128
- case (type || "nil").to_sym
128
+ case (type || :marshal).to_sym
129
129
  when :nil
130
130
  nil
131
131
  when :boolean
@@ -134,7 +134,8 @@ module Persist
134
134
  content.file.seek 0
135
135
  Misc.sensiblewrite(path, content.file.read)
136
136
  when :tsv
137
- Misc.sensiblewrite(path, content.to_s)
137
+ content = content.to_s if TSV === content
138
+ Misc.sensiblewrite(path, content)
138
139
  when :annotations
139
140
  Misc.sensiblewrite(path, Annotated.tsv(content, :all).to_s)
140
141
  when :string, :text
@@ -146,10 +147,21 @@ module Persist
146
147
  f.close
147
148
  content
148
149
  when :array
149
- if content.empty?
150
- Misc.sensiblewrite(path, "")
150
+ case content
151
+ when Array
152
+ if content.empty?
153
+ Misc.sensiblewrite(path, "")
154
+ else
155
+ Misc.sensiblewrite(path, content * "\n" + "\n")
156
+ end
157
+ when IO
158
+ Misc.sensiblewrite(path) do |file|
159
+ while block = content.read(2048)
160
+ file.write block
161
+ end
162
+ end
151
163
  else
152
- Misc.sensiblewrite(path, content * "\n" + "\n")
164
+ Misc.sensiblewrite(path, content.to_s)
153
165
  end
154
166
  when :marshal_tsv
155
167
  Misc.sensiblewrite(path, Marshal.dump(content.dup))
@@ -164,12 +176,67 @@ module Persist
164
176
  end
165
177
  end
166
178
 
179
+ def self.tee_stream(stream, path, type, callback = nil)
180
+ file_out, file_in = IO.pipe
181
+ stream_out, stream_in = IO.pipe
182
+
183
+ saver_thread = Thread.new(Thread.current) do |parent|
184
+ begin
185
+ Misc.lock(path) do
186
+ save_file(path, type, file_out)
187
+ end
188
+ rescue Exception
189
+ Log.exception $!
190
+ parent.raise $!
191
+ end
192
+ end
193
+
194
+ splitter_thread = Thread.new(Thread.current) do |parent|
195
+ begin
196
+ while block = stream.read(2048)
197
+ begin stream_in.write block; rescue Exception; Log.exception $! end
198
+ begin file_in.write block; rescue Exception; Log.exception $! end
199
+ end
200
+ file_in.close
201
+ stream_in.close
202
+ callback.call if callback
203
+ rescue Exception
204
+ Log.exception $!
205
+ parent.raise $!
206
+ end
207
+ end
208
+
209
+ class << stream_out
210
+ attr_accessor :threads
211
+
212
+ def join
213
+ @threads.each{|t| t.join }
214
+ @threads = []
215
+ end
216
+
217
+ def close
218
+ join
219
+ super
220
+ end
221
+
222
+ def read(*args)
223
+ res = super(*args)
224
+ join if eof?
225
+ res
226
+ end
227
+ end
228
+
229
+ stream_out.threads = [splitter_thread, saver_thread]
230
+
231
+ stream_out
232
+ end
233
+
167
234
  def self.persist(name, type = nil, persist_options = {})
168
235
  type ||= :marshal
169
236
  persist_options = Misc.add_defaults persist_options, :persist => true
170
- other_options = Misc.process_options persist_options, :other
171
237
 
172
238
  if persist_options[:persist]
239
+ other_options = Misc.process_options persist_options, :other
173
240
  path = persistence_path(name, persist_options, other_options || {})
174
241
 
175
242
  case
@@ -276,8 +343,13 @@ module Persist
276
343
  Log.medium "Persist create: #{ path } - #{persist_options.inspect[0..100]}"
277
344
  res = yield
278
345
 
279
- if res.nil?
346
+ case res
347
+ when nil
280
348
  res = load_file(path) unless persist_options[:no_load]
349
+ when IO
350
+ res = tee_stream(res, path, type, res.respond_to?(:callback)? res.callback : nil)
351
+ when TSV::Dumper
352
+ res = tee_stream(res.stream, path, type, res.respond_to?(:callback)? res.callback : nil)
281
353
  else
282
354
  Misc.lock(path) do
283
355
  save_file(path, type, res)
@@ -1,5 +1,6 @@
1
1
  require 'yaml'
2
2
  require 'rbbt/annotations'
3
+ require 'rbbt/tsv/dumper'
3
4
  module TSV
4
5
 
5
6
  TSV_SERIALIZER = YAML
@@ -460,7 +461,7 @@ module TSV
460
461
  def options
461
462
  options = {}
462
463
  ENTRIES.each do |entry|
463
- options[entry] = self.send(entry)
464
+ options[entry.to_sym] = self.send(entry)
464
465
  end
465
466
  IndiferentHash.setup options
466
467
  end
@@ -498,26 +499,31 @@ module TSV
498
499
  end
499
500
  end
500
501
 
501
- str = ""
502
-
503
- entry_hash = no_options ? {} : (ENTRIES - ["key_field", "fields"]).collect{|key| [key.to_sym, self.send(key)]}
504
- str = TSV.header_lines(key_field, fields, entry_hash)
505
-
506
- with_unnamed do
507
- if keys.nil?
508
- each do |key, values|
509
- key = key.to_s if Symbol === key
510
- str << key.to_s
511
- str << values_to_s(values)
512
- end
513
- else
514
- keys.zip(values_at(*keys)).each do |key, values|
515
- key = key.to_s if Symbol === key
516
- str << key.to_s << values_to_s(values)
502
+ io = TSV::Dumper.stream self do |dumper|
503
+ dumper.init unless no_options
504
+ begin
505
+ if keys
506
+ keys.each do |key|
507
+ dumper.add key, self[key]
508
+ end
509
+ else
510
+ with_unnamed do
511
+ each do |k,v|
512
+ dumper.add k, v
513
+ end
514
+ end
517
515
  end
516
+ rescue
517
+ Log.exception $!
518
+ parent.raise $!
518
519
  end
520
+ end
519
521
 
522
+ str = ''
523
+ while block = io.read(2048)
524
+ str << block
520
525
  end
526
+
521
527
  str
522
528
  end
523
529
 
@@ -0,0 +1,56 @@
1
+ module TSV
2
+ class Dumper
3
+ attr_accessor :in_stream, :stream, :options, :filename
4
+ def self.stream(options = {}, filename = nil, &block)
5
+ dumper = TSV::Dumper.new options, filename
6
+ Thread.new do
7
+ yield dumper
8
+ dumper.close
9
+ end
10
+ dumper.stream
11
+ end
12
+
13
+ def initialize(options, filename = nil)
14
+ if TSV === options
15
+ @options = options.options.merge(:key_field => options.key_field, :fields => options.fields)
16
+ @filename ||= options.filename
17
+ else
18
+ @options = options
19
+ @filename = filename
20
+ end
21
+ @filename ||= Misc.fingerprint options
22
+ @stream, @in_stream = IO.pipe
23
+ end
24
+
25
+ def self.values_to_s(values, fields = nil)
26
+ case values
27
+ when nil
28
+ if fields.nil? or fields.empty?
29
+ "\n"
30
+ else
31
+ "\t" << ([""] * fields.length) * "\t" << "\n"
32
+ end
33
+ when Array
34
+ "\t" << values.collect{|v| Array === v ? v * "|" : v} * "\t" << "\n"
35
+ else
36
+ "\t" << values.to_s << "\n"
37
+ end
38
+ end
39
+
40
+ def init
41
+ options = @options.dup
42
+ key_field, fields = Misc.process_options options, :key_field, :fields
43
+
44
+ str = TSV.header_lines(key_field, fields, options)
45
+ @in_stream.puts str
46
+ end
47
+
48
+ def add(k,v)
49
+ @in_stream << k << TSV::Dumper.values_to_s(v, @options[:fields])
50
+ end
51
+
52
+ def close
53
+ @in_stream.close
54
+ end
55
+ end
56
+ end
@@ -54,7 +54,29 @@ module TSV
54
54
  traverse_tsv(obj, options, &block)
55
55
  when Hash
56
56
  traverse_hash(obj, options, &block)
57
- when IO
57
+ when TSV::Parser
58
+ callback = Misc.process_options options, :callback
59
+ if callback
60
+ obj.traverse(options) do |k,v|
61
+ res = yield k, v
62
+ callback.call res
63
+ end
64
+ else
65
+ obj.traverse(options, &block)
66
+ end
67
+ when (options[:type] == :array and IO)
68
+ callback = Misc.process_options options, :callback
69
+ if callback
70
+ while not obj.eof?
71
+ res = yield obj.gets.strip
72
+ callback.call res
73
+ end
74
+ else
75
+ while not obj.eof?
76
+ yield obj.gets.strip
77
+ end
78
+ end
79
+ when IO, File
58
80
  callback = Misc.process_options options, :callback
59
81
  if callback
60
82
  TSV::Parser.traverse(obj, options) do |k,v|
@@ -68,8 +90,20 @@ module TSV
68
90
  obj.open do |stream|
69
91
  traverse_obj(stream, options, &block)
70
92
  end
93
+ when (defined? Step and Step)
94
+ case obj.result
95
+ when IO
96
+ traverse_obj(obj.result, options, &block)
97
+ when TSV::Dumper
98
+ traverse_obj(obj.stream, options, &block)
99
+ else
100
+ obj.join
101
+ traverse_obj(obj.path.open, options, &block)
102
+ end
71
103
  when Array
72
104
  traverse_array(obj, options, &block)
105
+ when nil
106
+ raise "Can not traverse nil object"
73
107
  else
74
108
  raise "Unknown object for traversal: #{Misc.fingerprint obj }"
75
109
  end
@@ -80,7 +114,6 @@ module TSV
80
114
 
81
115
  q = RbbtThreadQueue.new num
82
116
 
83
-
84
117
  if callback
85
118
  block = Proc.new do |k,v,mutex|
86
119
  v, mutex = nil, v if mutex.nil?
@@ -133,35 +166,60 @@ module TSV
133
166
  k,v = value
134
167
  obj[k] = v
135
168
  end
136
- when IO
169
+ when TSV::Dumper
137
170
  return if value.nil?
138
- obj << value
171
+ obj.add *value
172
+ when IO, StringIO
173
+ return if value.nil?
174
+ obj.puts value
139
175
  else
140
176
  obj << value
141
177
  end
142
178
  end
143
179
 
180
+ def self.traverse_run(obj, threads, cpus, options = {}, &block)
181
+ if threads.nil? and cpus.nil?
182
+ traverse_obj obj, options, &block
183
+ else
184
+ if threads
185
+ traverse_threads threads, obj, options, &block
186
+ else
187
+ traverse_cpus cpus, obj, options, &block
188
+ end
189
+ end
190
+ end
191
+
144
192
  def self.traverse(obj, options = {}, &block)
145
193
  threads = Misc.process_options options, :threads
146
194
  cpus = Misc.process_options options, :cpus
147
- into = Misc.process_options options, :into
195
+ into = options[:into]
196
+
197
+ threads = nil if threads and threads.to_i <= 1
198
+ cpus = nil if cpus and cpus.to_i <= 1
148
199
 
149
200
  if into
150
201
  callback = Proc.new do |e|
151
202
  store_into into, e
152
203
  end
153
204
  options[:callback] = callback
154
- end
155
205
 
156
- if threads.nil? and cpus.nil?
157
- traverse_obj obj, options, &block
158
- else
159
- if threads
160
- traverse_threads threads, obj, options, &block
206
+ case into
207
+ when TSV::Dumper, IO, StringIO
208
+ Thread.new(Thread.current) do |parent|
209
+ begin
210
+ traverse_run(obj, threads, cpus, options, &block)
211
+ into.close
212
+ rescue Exception
213
+ parent.raise $!
214
+ end
215
+ end
161
216
  else
162
- traverse_cpus cpus, obj, options, &block
217
+ traverse_run(obj, threads, cpus, options, &block)
163
218
  end
219
+
220
+ into
221
+ else
222
+ traverse_run(obj, threads, cpus, options, &block)
164
223
  end
165
- into
166
224
  end
167
225
  end
@@ -1,7 +1,7 @@
1
1
  require 'rbbt/util/cmd'
2
2
  module TSV
3
3
  class Parser
4
- attr_accessor :stream, :header_hash, :sep, :sep2, :type, :key_position, :field_positions, :cast, :key_field, :fields, :fix, :select, :serializer, :straight, :take_all, :zipped, :namespace, :first_line, :stream
4
+ attr_accessor :stream, :filename, :header_hash, :sep, :sep2, :type, :key_position, :field_positions, :cast, :key_field, :fields, :fix, :select, :serializer, :straight, :take_all, :zipped, :namespace, :first_line, :stream
5
5
 
6
6
  class SKIP_LINE < Exception; end
7
7
  class END_PARSING < Exception; end
@@ -24,7 +24,7 @@ module TSV
24
24
  # Process options line
25
25
 
26
26
  if line and line =~ /^#{@header_hash}: (.*)/
27
- options = Misc.string2hash $1
27
+ options = Misc.string2hash $1.strip
28
28
  line = Misc.fixutf8 stream.gets
29
29
  end
30
30
 
@@ -343,11 +343,16 @@ module TSV
343
343
  @sep = Misc.process_options(options, :sep) || "\t"
344
344
  @stream = stream
345
345
 
346
+
346
347
  header_options = parse_header(stream)
348
+
347
349
  options = header_options.merge options
348
350
 
349
351
  @type = Misc.process_options(options, :type) || :double
350
352
 
353
+ @filename = Misc.process_options(options, :filename)
354
+ @filename ||= stream.filename if stream.respond_to? :filename
355
+
351
356
  @sep2 = Misc.process_options(options, :sep2) || "|"
352
357
  @cast = Misc.process_options options, :cast; @cast = @cast.to_sym if String === @cast
353
358
  @type ||= Misc.process_options options, :type
@@ -362,6 +367,7 @@ module TSV
362
367
  fields = options[:fields]
363
368
  fix_fields(options)
364
369
 
370
+ @type = @type.strip.to_sym if String === @type
365
371
  case @type
366
372
  when :double
367
373
  self.instance_eval do alias get_values get_values_double end
@@ -407,6 +413,8 @@ module TSV
407
413
  self.instance_eval do alias add_to_data add_to_data_flat end
408
414
  end
409
415
  end
416
+ else
417
+ raise "Unknown TSV type: #{@type.inspect}"
410
418
  end
411
419
 
412
420
 
@@ -419,10 +427,23 @@ module TSV
419
427
  data.key_field = @key_field
420
428
  data.fields = @fields
421
429
  data.namespace = @namespace
430
+ data.filename = @filename
422
431
  data.cast = @cast if Symbol === @cast
423
432
  data
424
433
  end
425
434
 
435
+ def annotate(data)
436
+ setup(data)
437
+ end
438
+
439
+ def options
440
+ options = {}
441
+ TSV::ENTRIES.each do |entry|
442
+ options[entry.to_sym] = self.send(entry) if self.respond_to? entry
443
+ end
444
+ IndiferentHash.setup options
445
+ end
446
+
426
447
  def traverse(options = {})
427
448
  monitor, grep, invert_grep, head = Misc.process_options options, :monitor, :grep, :invert_grep, :head
428
449
  raise "No block given in TSV::Parser#traverse" unless block_given?
@@ -493,8 +514,8 @@ module TSV
493
514
  break
494
515
  end
495
516
  end
496
- ensure
497
- stream.close unless stream.closed?
517
+ #ensure
518
+ # stream.close unless stream.closed?
498
519
  end
499
520
 
500
521
  self
data/lib/rbbt/tsv/util.rb CHANGED
@@ -92,6 +92,7 @@ module TSV
92
92
  raise "Cannot get stream from: #{file.inspect}"
93
93
  end
94
94
  end
95
+
95
96
  def self.get_stream(file, open_options = {})
96
97
  case file
97
98
  when Path
@@ -31,10 +31,11 @@ end
31
31
  Lockfile.refresh = false if ENV["RBBT_NO_LOCKFILE_REFRESH"] == "true"
32
32
  module Misc
33
33
 
34
+
34
35
  def self.format_paragraph(text, size = 80, indent = 0, offset = 0)
35
36
  i = 0
36
37
  re = /((?:\n\s*\n\s*)|(?:\n\s*(?=\*)))/
37
- text.split(re).collect do |paragraph|
38
+ text.split(re).collect do |paragraph|
38
39
  i += 1
39
40
  str = if i % 2 == 1
40
41
  words = paragraph.gsub(/\s+/, "\s").split(" ")
@@ -56,7 +57,7 @@ module Misc
56
57
  end
57
58
  offset = 0
58
59
  str
59
- end*""
60
+ end*""
60
61
  end
61
62
 
62
63
  def self.format_definition_list_item(dt, dd, size = 80, indent = 20, color = :yellow)
@@ -314,6 +315,12 @@ module Misc
314
315
  end
315
316
  when AnnotatedArray
316
317
  "<A: #{fingerprint Annotated.purge(obj)} #{fingerprint obj.info}>"
318
+ when TSV::Parser
319
+ "<TSVStream:" + obj.filename + "--" << Misc.fingerprint(obj.options) << ">"
320
+ when IO
321
+ "<IO:" + (obj.respond_to?(:filename) ? obj.filename : obj.inspect) + ">"
322
+ when File
323
+ "<File:" + obj.path + ">"
317
324
  when Array
318
325
  if (length = obj.length) > 10
319
326
  "[#{length}--" << (obj.values_at(0,1, length / 2, -2, -1).collect{|e| fingerprint(e)} * ",") << "]"
@@ -347,6 +354,12 @@ module Misc
347
354
 
348
355
  def self.remove_long_items(obj)
349
356
  case
357
+ when IO === obj
358
+ remove_long_items("IO: " + obj.filename)
359
+ when obj.respond_to?(:path)
360
+ remove_long_items("File: " + obj.path)
361
+ when TSV::Parser === obj
362
+ remove_long_items("TSV Stream: " + obj.filename + " -- " << Misc.fingerprint(obj.options))
350
363
  when TSV === obj
351
364
  remove_long_items((obj.all_fields || []) + obj.keys.sort)
352
365
  when (Array === obj and obj.length > ARRAY_MAX_LENGTH)
@@ -1276,6 +1289,8 @@ end
1276
1289
  str << k.to_s << "=>" << v
1277
1290
  when (Array === v and v.length > HASH2MD5_MAX_ARRAY_LENGTH)
1278
1291
  str << k.to_s << "=>[" << v[0..HASH2MD5_MAX_ARRAY_LENGTH] * "," << "; #{ v.length }]"
1292
+ when TSV::Parser === v
1293
+ str << remove_long_items(v)
1279
1294
  when Array === v
1280
1295
  str << k.to_s << "=>[" << v * "," << "]"
1281
1296
  else
@@ -41,7 +41,10 @@ class Step
41
41
 
42
42
  def info
43
43
  return {} if info_file.nil? or not Open.exists? info_file
44
- return @info_cache if @info_cache and File.mtime(info_file) < @info_cache_time
44
+ begin
45
+ return @info_cache if @info_cache and File.mtime(info_file) < @info_cache_time
46
+ rescue Exception
47
+ end
45
48
  begin
46
49
  @info_cache = Misc.insist(3, 5, info_file) do
47
50
  Misc.insist(2, 2, info_file) do
@@ -66,7 +69,7 @@ class Step
66
69
  value = Annotated.purge value if defined? Annotated
67
70
  Open.lock(info_file) do
68
71
  i = info
69
- i[key] = value
72
+ i[key] = value #File === value ? value.filename : value
70
73
  @info_cache = i
71
74
  Open.write(info_file, INFO_SERIALIAZER.dump(i))
72
75
  @info_cache_time = Time.now
@@ -143,7 +146,7 @@ class Step
143
146
  def running?
144
147
  return nil if not Open.exists? info_file
145
148
  return nil if info[:pid].nil?
146
- return Misc.pid_exists? info[:pid]
149
+ return Misc.pid_exists?(p = info[:pid]) && Process.pid != p
147
150
  end
148
151
 
149
152
  def error?
@@ -361,7 +364,10 @@ module Workflow
361
364
  if inputs.any? or dependencies.any?
362
365
  tagged_jobname = case TAG
363
366
  when :hash
364
- jobname + '_' + Misc.digest((inputs.collect{|i| Misc.fingerprint(i)} * "," + ";" + dependencies.collect{|dep| dep.name } * "\n"))
367
+ input_str = ""
368
+ input_str << inputs.collect{|i| Misc.fingerprint(i) } * ","
369
+ input_str << ";" << dependencies.collect{|dep| dep.name } * "\n"
370
+ jobname + '_' << Misc.digest(input_str)
365
371
  else
366
372
  jobname
367
373
  end
@@ -9,6 +9,7 @@ class Step
9
9
  attr_accessor :path, :task, :inputs, :dependencies, :bindings
10
10
  attr_accessor :pid
11
11
  attr_accessor :exec
12
+ attr_accessor :result
12
13
 
13
14
  def initialize(path, task = nil, inputs = nil, dependencies = nil, bindings = nil)
14
15
  path = Path.setup(Misc.sanitize_filename(path)) if String === path
@@ -54,7 +55,24 @@ class Step
54
55
  end
55
56
 
56
57
  def prepare_result(value, description = nil, info = {})
57
- case
58
+ case
59
+ when IO === value
60
+ begin
61
+ case @task.result_type
62
+ when :array
63
+ array = []
64
+ while line = value.gets
65
+ array << line
66
+ end
67
+ array
68
+ when :tsv
69
+ TSV.open(value)
70
+ else
71
+ value.read
72
+ end
73
+ ensure
74
+ value.join if value.respond_to? :join
75
+ end
58
76
  when (not defined? Entity or description.nil? or not Entity.formats.include? description)
59
77
  value
60
78
  when (Annotated === value and info.empty?)
@@ -70,13 +88,24 @@ class Step
70
88
  end
71
89
  end
72
90
 
73
- def exec
91
+ def _exec
74
92
  @exec = true if @exec.nil?
75
- result = @task.exec_in((bindings ? bindings : self), *@inputs)
76
- prepare_result result, @task.result_description
93
+ @task.exec_in((bindings ? bindings : self), *@inputs)
94
+ end
95
+
96
+ def exec(no_load=false)
97
+ @result = _exec
98
+ @result = @result.stream if TSV::Dumper === @result
99
+ no_load ? @result : prepare_result(@result, @task.result_description)
77
100
  end
78
101
 
79
102
  def join
103
+ case @result
104
+ when IO
105
+ while @result.read 2048; end
106
+ @result = nil
107
+ end
108
+
80
109
  if @pid.nil?
81
110
  self
82
111
  else
@@ -97,7 +126,7 @@ class Step
97
126
 
98
127
  def run(no_load = false)
99
128
 
100
- result = Persist.persist "Job", @task.result_type, :file => path, :check => checks, :no_load => no_load do
129
+ result = Persist.persist "Job", @task.result_type, :file => path, :check => checks, :no_load => false do
101
130
  if Step === Step.log_relay_step and not self == Step.log_relay_step
102
131
  relay_log(Step.log_relay_step) unless self.respond_to? :relay_step and self.relay_step
103
132
  end
@@ -126,52 +155,65 @@ class Step
126
155
 
127
156
  set_info :inputs, Misc.remove_long_items(Misc.zip2hash(task.inputs, @inputs)) unless task.inputs.nil?
128
157
 
129
- #Log.info{"#{Log.color :magenta, "Starting task"} #{Log.color :yellow, task.name.to_s || ""} [#{Process.pid}]: #{ Log.color :blue, path }"}
130
158
  set_info :started, (start_time = Time.now)
131
159
  log :started, "#{Log.color :magenta, "Starting task"} #{Log.color :yellow, task.name.to_s || ""} [#{Process.pid}]"
132
160
 
133
- res = begin
134
- exec
135
- rescue Aborted
136
- log(:error, "Aborted")
137
-
138
- children_pids = info[:children_pids]
139
- if children_pids and children_pids.any?
140
- Log.medium("Killing children: #{ children_pids * ", " }")
141
- children_pids.each do |pid|
142
- Log.medium("Killing child #{ pid }")
143
- begin
144
- Process.kill "INT", pid
145
- rescue Exception
146
- Log.medium("Exception killing child #{ pid }: #{$!.message}")
147
- end
148
- end
149
- end
150
-
151
- raise $!
161
+ begin
162
+ result = _exec
163
+ rescue Aborted
164
+ log(:error, "Aborted")
165
+
166
+ children_pids = info[:children_pids]
167
+ if children_pids and children_pids.any?
168
+ Log.medium("Killing children: #{ children_pids * ", " }")
169
+ children_pids.each do |pid|
170
+ Log.medium("Killing child #{ pid }")
171
+ begin
172
+ Process.kill "INT", pid
152
173
  rescue Exception
153
- backtrace = $!.backtrace
174
+ Log.medium("Exception killing child #{ pid }: #{$!.message}")
175
+ end
176
+ end
177
+ end
154
178
 
155
- # HACK: This fixes an strange behaviour in 1.9.3 where some
156
- # backtrace strings are coded in ASCII-8BIT
157
- backtrace.each{|l| l.force_encoding("UTF-8")} if String.instance_methods.include? :force_encoding
179
+ raise $!
180
+ rescue Exception
181
+ backtrace = $!.backtrace
158
182
 
159
- set_info :backtrace, backtrace
160
- log(:error, "#{$!.class}: #{$!.message}")
161
- raise $!
162
- end
183
+ # HACK: This fixes an strange behaviour in 1.9.3 where some
184
+ # backtrace strings are coded in ASCII-8BIT
185
+ backtrace.each{|l| l.force_encoding("UTF-8")} if String.instance_methods.include? :force_encoding
163
186
 
164
- set_info :done, (done_time = Time.now)
165
- set_info :time_elapsed, (time_elapsed = done_time - start_time)
166
- log :done, "#{Log.color :magenta, "Completed task"} #{Log.color :yellow, task.name.to_s || ""} [#{Process.pid}] +#{time_elapsed.to_i}"
187
+ set_info :backtrace, backtrace
188
+ log(:error, "#{$!.class}: #{$!.message}")
189
+ raise $!
190
+ end
191
+
192
+ case result
193
+ when IO, TSV::Dumper
194
+ log :streaming, "#{Log.color :magenta, "Streaming task result"} #{Log.color :yellow, task.name.to_s || ""} [#{Process.pid}]"
195
+ class << result
196
+ attr_accessor :callback
197
+ end
198
+ result.callback = Proc.new do
199
+ set_info :done, (done_time = Time.now)
200
+ set_info :time_elapsed, (time_elapsed = done_time - start_time)
201
+ log :done, "#{Log.color :magenta, "Completed task"} #{Log.color :yellow, task.name.to_s || ""} [#{Process.pid}] +#{time_elapsed.to_i}"
202
+ end
203
+ else
204
+ set_info :done, (done_time = Time.now)
205
+ set_info :time_elapsed, (time_elapsed = done_time - start_time)
206
+ log :done, "#{Log.color :magenta, "Completed task"} #{Log.color :yellow, task.name.to_s || ""} [#{Process.pid}] +#{time_elapsed.to_i}"
207
+ end
167
208
 
168
- res
209
+ result
169
210
  end
170
211
 
171
212
  if no_load
213
+ @result = result
172
214
  self
173
215
  else
174
- prepare_result result, @task.result_description, info
216
+ @result = prepare_result result, @task.result_description, info
175
217
  end
176
218
  end
177
219
 
@@ -179,11 +221,13 @@ class Step
179
221
  raise "Can not fork: Step is waiting for proces #{@pid} to finish" if not @pid.nil?
180
222
  @pid = Process.fork do
181
223
  begin
182
- #trap(:INT) { raise Aborted.new "INT signal recieved" }
183
224
  RbbtSemaphore.wait_semaphore(semaphore) if semaphore
184
225
  FileUtils.mkdir_p File.dirname(path) unless Open.exists? File.dirname(path)
185
226
  begin
186
- run(true)
227
+ res = run(true)
228
+ io = res.result if IO === res.result
229
+ io = res.result.stream if TSV::Dumper === res.result
230
+ while not io.eof?; io.read(2048); end if io
187
231
  rescue Aborted
188
232
  Log.debug{"Forked process aborted: #{path}"}
189
233
  log :aborted, "Aborted"
@@ -212,7 +256,6 @@ class Step
212
256
  exit -1
213
257
  end
214
258
  set_info :pid, nil
215
- exit 0
216
259
  ensure
217
260
  RbbtSemaphore.post_semaphore(semaphore) if semaphore
218
261
  end
@@ -224,13 +267,13 @@ class Step
224
267
 
225
268
  def abort
226
269
  @pid ||= info[:pid]
227
- if @pid.nil? and info[:forked]
270
+ if @pid.nil? and info[:forked]
228
271
  Log.medium "Could not abort #{path}: no pid"
229
272
  false
230
273
  else
231
274
  Log.medium "Aborting #{path}: #{ @pid }"
232
275
  begin
233
- Process.kill("KILL", @pid)
276
+ Process.kill("KILL", @pid) unless Process.pid == @pid
234
277
  Process.waitpid @pid
235
278
  rescue Exception
236
279
  Log.debug("Aborted job #{@pid} was not killed: #{$!.message}")
@@ -254,15 +297,19 @@ class Step
254
297
  end
255
298
 
256
299
  def load
257
- raise "Can not load: Step is waiting for proces #{@pid} to finish" if not done?
258
- result = Persist.persist "Job", @task.result_type, :file => @path, :check => checks do
259
- exec
260
- end
261
- prepare_result result, @task.result_description, info
300
+ return prepare_result @result, @task.result_description if @result
301
+ join if not done?
302
+ return Persist.load_file(@path, @task.result_type) if @path.exists?
303
+ exec
262
304
  end
263
305
 
264
306
  def clean
265
307
  if Open.exists?(path) or Open.exists?(info_file)
308
+ begin
309
+ self.abort if self.running?
310
+ rescue Exception
311
+ end
312
+
266
313
  begin
267
314
  Open.rm info_file if Open.exists? info_file
268
315
  Open.rm info_file + '.lock' if Open.exists? info_file + '.lock'
data/lib/rbbt/workflow.rb CHANGED
@@ -51,6 +51,12 @@ module Workflow
51
51
  end
52
52
  end
53
53
 
54
+ def self.installed_workflows
55
+ self.workflow_dir.glob('**/workflow.rb').collect do |file|
56
+ File.basename(File.dirname(file))
57
+ end
58
+ end
59
+
54
60
  def self.workflow_dir
55
61
  @workflow_dir ||= begin
56
62
  case
@@ -10,11 +10,10 @@ workflow_dir = options[:workflow_dir] || Workflow.workflow_dir
10
10
 
11
11
  Path.setup(workflow_dir) unless Path === workflow_dir
12
12
 
13
- files = workflow_dir.find_all.collect{|p| p.glob("*") }.flatten.select{|f| File.directory? f }
13
+ workflows = Workflow.installed_workflows
14
14
 
15
15
  if options[:describe]
16
- files.each do |file|
17
- workflow = File.basename(file)
16
+ workflows.each do |workflow|
18
17
  Workflow.require_workflow workflow
19
18
  workflow = Workflow.workflows.select{|w| Misc.camel_case(w.to_s) == Misc.camel_case(workflow)}.first
20
19
  puts "# "<<[Misc.camel_case(workflow.to_s), workflow.description] * ": "
@@ -25,8 +24,7 @@ if options[:describe]
25
24
 
26
25
  end
27
26
  else
28
- files.each do |file|
29
- workflow = File.basename(file)
27
+ workflows.each do |workflow|
30
28
  puts Misc.camel_case(workflow.to_s)
31
29
  end
32
30
  end
@@ -54,13 +54,32 @@ def SOPT_options(workflow, task)
54
54
  sopt_options * ":"
55
55
  end
56
56
 
57
+ def get_value_stream(value)
58
+ if value == "-"
59
+ io = Misc.open_pipe do |sin|
60
+ while not STDIN.eof?
61
+ sin.write STDIN.read(2048)
62
+ end
63
+ sin.close
64
+ end
65
+ else
66
+ io = Open.open(value)
67
+ end
68
+ class << io
69
+ attr_accessor :filename
70
+ end
71
+ io.filename = value
72
+ io
73
+ end
74
+
57
75
  def fix_options(workflow, task, job_options)
58
- option_types = IndiferentHash.setup workflow.rec_input_types(task.name)
76
+ input_types = IndiferentHash.setup workflow.rec_input_types(task.name)
77
+ input_options = IndiferentHash.setup workflow.rec_input_options(task.name)
59
78
 
60
79
  job_options_cleaned = {}
61
80
 
62
81
  job_options.each do |name, value|
63
- value = case option_types[name].to_sym
82
+ value = case input_types[name].to_sym
64
83
  when :boolean
65
84
  TrueClass == value or %w(true TRUE T yes).include? value
66
85
  when :float
@@ -68,41 +87,53 @@ def fix_options(workflow, task, job_options)
68
87
  when :integer
69
88
  value.to_i
70
89
  when :text
71
- case
72
- when value == '-'
73
- STDIN.read
74
- when (String === value and File.exists?(value) and not File.directory?(value))
75
- Open.read(value)
90
+ if input_options[name] and input_options[name][:stream] and String === value
91
+ get_value_stream(value)
76
92
  else
77
- value
78
- end
79
- when :array
80
- if Array === value
81
- value
82
- else
83
- str = case
93
+ case
84
94
  when value == '-'
85
95
  STDIN.read
86
- when (String === value and File.exists?(value))
96
+ when (String === value and File.exists?(value) and not File.directory?(value))
87
97
  Open.read(value)
88
98
  else
89
99
  value
90
100
  end
91
-
92
- if $array_separator
93
- str.split(/#{$array_separator}/)
101
+ end
102
+ when :array
103
+ if input_options[name] and input_options[name][:stream] and String === value
104
+ get_value_stream(value)
105
+ else
106
+ if Array === value
107
+ value
94
108
  else
95
- str.split(/[,|\s]/)
109
+ str = case
110
+ when value == '-'
111
+ STDIN.read
112
+ when (String === value and File.exists?(value))
113
+ Open.read(value)
114
+ else
115
+ value
116
+ end
117
+
118
+ if $array_separator
119
+ str.split(/#{$array_separator}/)
120
+ else
121
+ str.split(/[,|\s]/)
122
+ end
96
123
  end
97
124
  end
98
125
  when :tsv
99
- case value
100
- when TSV
101
- value
102
- when '-'
103
- TSV.open(STDIN, :unnamed => true, :sep => $field_separator, :sep2 => $array_separator || "|")
126
+ if input_options[name] and input_options[name][:stream] and String === value
127
+ TSV::Parser.new(value == '-' ? STDIN : Open.open(value), :filename => value )
104
128
  else
105
- TSV.open(value, :unnamed => true, :sep => $field_separator, :sep2 => $array_separator || "|")
129
+ case value
130
+ when TSV
131
+ value
132
+ when '-'
133
+ TSV.open(STDIN, :unnamed => true, :sep => $field_separator, :sep2 => $array_separator || "|")
134
+ else
135
+ TSV.open(value, :unnamed => true, :sep => $field_separator, :sep2 => $array_separator || "|")
136
+ end
106
137
  end
107
138
  else
108
139
  value
@@ -228,10 +259,8 @@ job = workflow.job(task.name, name, job_options)
228
259
 
229
260
  # clean job
230
261
  if clean
231
- job.abort if job.running?
232
262
  job.clean
233
263
  sleep 1
234
- job = workflow.job(task.name, name, job_options)
235
264
  end
236
265
 
237
266
  if recursive_clean
@@ -250,7 +279,7 @@ begin
250
279
  end
251
280
 
252
281
  if do_exec or (job.respond_to?(:is_exec) and job.is_exec)
253
- res = job.exec
282
+ res = job.exec(true)
254
283
  case
255
284
  when Array === res
256
285
  out.puts res * "\n"
@@ -258,6 +287,10 @@ begin
258
287
  out.puts res
259
288
  when Hash === res
260
289
  out.puts res.to_yaml
290
+ when IO === res
291
+ while block = res.read(2048)
292
+ out.write block
293
+ end
261
294
  else
262
295
  out.puts res
263
296
  end
@@ -281,7 +314,6 @@ begin
281
314
  end
282
315
 
283
316
  job.fork
284
-
285
317
  else
286
318
  job.run(true)
287
319
  res = job
@@ -289,22 +321,21 @@ begin
289
321
 
290
322
 
291
323
  if options.delete(:provenance)
324
+ job.join
292
325
  pp job.provenance
293
326
  exit 0
294
327
  end
295
328
 
296
329
  if options.delete(:printname)
330
+ job.join if IO === job.result
297
331
  puts job.name
298
332
  exit 0
299
- else
300
- Log.low "Job name: #{job.name}"
301
333
  end
302
334
 
303
335
  if options.delete(:printpath)
336
+ job.join
304
337
  puts job.path
305
338
  exit 0
306
- else
307
- Log.low "Job name: #{job.name}"
308
339
  end
309
340
 
310
341
  if do_fork
@@ -321,7 +352,6 @@ begin
321
352
  time = Time.now - issued
322
353
  end
323
354
 
324
-
325
355
  space.times do
326
356
  Log.clear_line
327
357
  end
@@ -372,7 +402,16 @@ case res
372
402
  when (defined?(WorkflowRESTClient) and WorkflowRESTClient::RemoteStep)
373
403
  out.puts res.load
374
404
  when Step
375
- out.puts Open.read(res.path) if File.exists? res.path
405
+ if IO === res.result
406
+ io = res.result
407
+ while line = io.gets do
408
+ out.puts line
409
+ end
410
+ io.close
411
+ else
412
+ res.join
413
+ out.puts Open.read(res.path) if File.exists? res.path
414
+ end
376
415
  else
377
416
  out.puts res.to_s
378
417
  end
@@ -37,4 +37,28 @@ class TestPersist < Test::Unit::TestCase
37
37
  end
38
38
  end
39
39
  end
40
+
41
+ def test_tsv_dumper
42
+ TmpFile.with_file do |tmpdir|
43
+ stream = Persist.persist("Dumper", :tsv, :dir => tmpdir) do
44
+ dumper = TSV::Dumper.new :key_field => "Field 1", :fields => ["Field 2"], :type => :single
45
+
46
+ dumper.init
47
+ Thread.new do
48
+ 10.times do |i|
49
+ key = i.to_s
50
+ dumper.add key, key + " - 2"
51
+ end
52
+ dumper.close
53
+ Thread.exit
54
+ end
55
+ dumper
56
+ end
57
+
58
+ while line = stream.gets do
59
+ puts line
60
+ end
61
+
62
+ end
63
+ end
40
64
  end
@@ -4,9 +4,9 @@ require 'test/unit'
4
4
 
5
5
  class TestColorize < Test::Unit::TestCase
6
6
  def test_color_array
7
- a = [:red, :red, :blue, :blue, :yellow]
8
- a = (0..16).to_a
7
+ a = (1..16).to_a
9
8
 
10
- ddd Colorize.distinct(a)
9
+ assert_equal 16, Colorize.distinct(a).length
10
+ assert_equal 16, Colorize.distinct(a).compact.uniq.length
11
11
  end
12
12
  end
@@ -159,7 +159,7 @@ class TestStep < Test::Unit::TestCase
159
159
  step.run
160
160
  while not job.done? do sleep 1 end
161
161
  assert_equal "TEST", Open.read(job.file("test"))
162
- assert_equal "WRITE", job.messages.last
162
+ assert job.messages.include? "WRITE"
163
163
  end
164
164
  end
165
165
  end
@@ -176,7 +176,7 @@ class TestStep < Test::Unit::TestCase
176
176
  job = step.fork
177
177
  while not job.done? do sleep 1 end
178
178
  assert_equal "TEST", Open.read(job.file("test"))
179
- assert_equal "WRITE", job.messages.last
179
+ assert job.messages.include? "WRITE"
180
180
  end
181
181
  end
182
182
  end
data/test/test_helper.rb CHANGED
@@ -20,11 +20,11 @@ class Test::Unit::TestCase
20
20
  end
21
21
 
22
22
  def teardown
23
- if defined? Persist
24
- FileUtils.rm_rf Path.setup("", 'rbbt').tmp.test.find :user
25
- Persist::CONNECTIONS.values.each do |c| c.close end
26
- Persist::CONNECTIONS.clear
27
- end
23
+ #if defined? Persist
24
+ # FileUtils.rm_rf Path.setup("", 'rbbt').tmp.test.find :user
25
+ # Persist::CONNECTIONS.values.each do |c| c.close end
26
+ # Persist::CONNECTIONS.clear
27
+ #end
28
28
  end
29
29
 
30
30
  def datafile_test(file)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-util
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.9.12
4
+ version: 5.10.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-03-26 00:00:00.000000000 Z
11
+ date: 2014-03-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -147,6 +147,7 @@ files:
147
147
  - lib/rbbt/tsv/attach.rb
148
148
  - lib/rbbt/tsv/attach/util.rb
149
149
  - lib/rbbt/tsv/change_id.rb
150
+ - lib/rbbt/tsv/dumper.rb
150
151
  - lib/rbbt/tsv/excel.rb
151
152
  - lib/rbbt/tsv/field_index.rb
152
153
  - lib/rbbt/tsv/filter.rb