rbbt-util 5.10.1 → 5.10.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f5de94d0ec4441889212dc37780f880b38809d71
4
- data.tar.gz: 44acad91d91a0a8f7c351d32cd2dba45ca9fc058
3
+ metadata.gz: cba82109ac8a97d8964ffd2b7eaa3e47444e6bea
4
+ data.tar.gz: ba62c2032f5225afb66da4d21cf2b92419993e5a
5
5
  SHA512:
6
- metadata.gz: 1fc109e80a9808fe201f7aa683fd2c0d9a8c0f35e6a1b2f3f9a0ee5f1f62801f192497c51ce09e69e5a721cb301d564227e543c44f2263aaf1faa3da14215c8c
7
- data.tar.gz: 78d53fa4a1bc35af227a4d15553204a31405e89459e2b51a60cad762eba536b0e73f0719e51058ca8df03f328ab6117986f6109a2e01c531bb12c3f0b8453576
6
+ metadata.gz: 70cbfbc5301a70c101102221baca363898d075e55b12bb8fd06beb3a559bad40d541b95991a627a7f896635ac580aeb9f5e639bf4c49bdbc6e6328b0b75c1068
7
+ data.tar.gz: 87d924aa5560c5698adb671b9607c928ecee243aff6a5ec6a002b5b9c600117b138ac2ae289fab9a7889a1e350b0b3105b5e843704cd7e32b0a2f9110e279349
data/lib/rbbt/persist.rb CHANGED
@@ -176,72 +176,58 @@ module Persist
176
176
  end
177
177
  end
178
178
 
179
- def self.tee_stream(stream, path, type, callback = nil)
180
- file_out, file_in = IO.pipe
181
- stream_out, stream_in = IO.pipe
179
+ def self.tee_stream_fork(stream, path, type, callback = nil)
180
+ file, out = Misc.tee_stream(stream)
182
181
 
183
- saver_thread = Thread.new(Thread.current) do |parent|
182
+ saver_pid = Process.fork do
183
+ out.close
184
+ Misc.purge_pipes(stream)
184
185
  begin
185
186
  Misc.lock(path) do
186
- save_file(path, type, file_out)
187
+ save_file(path, type, file)
187
188
  end
188
189
  rescue Exception
189
190
  Log.exception $!
190
- parent.raise $!
191
191
  end
192
192
  end
193
+ file.close
194
+ ConcurrentStream.setup(out, :pids => [saver_pid], :filename => path)
195
+ end
196
+
197
+ def self.tee_stream_thread(stream, path, type, callback = nil)
198
+ file, out = Misc.tee_stream(stream)
193
199
 
194
- splitter_thread = Thread.new(Thread.current) do |parent|
200
+ saver_thread = Thread.new(Thread.current, path, file) do |parent,path,file|
195
201
  begin
196
- while block = stream.read(2048)
197
- begin stream_in.write block; rescue Exception; Log.exception $! end
198
- begin file_in.write block; rescue Exception; Log.exception $! end
202
+ Thread.current["name"] = "file saver: " + path
203
+ Misc.lock(path) do
204
+ save_file(path, type, file)
199
205
  end
200
- file_in.close
201
- stream_in.close
202
- callback.call if callback
203
206
  rescue Exception
204
207
  Log.exception $!
205
208
  parent.raise $!
206
209
  end
207
210
  end
211
+ ConcurrentStream.setup(out, :threads => saver_thread, :filename => path)
212
+ end
208
213
 
209
- class << stream_out
210
- attr_accessor :threads
211
-
212
- def join
213
- @threads.each{|t| t.join }
214
- @threads = []
215
- end
216
-
217
- def close
218
- join
219
- super
220
- end
221
-
222
- def read(*args)
223
- res = super(*args)
224
- join if eof?
225
- res
226
- end
227
- end
228
-
229
- stream_out.threads = [splitter_thread, saver_thread]
230
-
231
- stream_out
214
+ class << self
215
+ alias tee_stream tee_stream_thread
232
216
  end
233
217
 
234
218
  def self.persist(name, type = nil, persist_options = {})
235
219
  type ||= :marshal
236
- persist_options = Misc.add_defaults persist_options, :persist => true
237
220
 
238
- if persist_options[:persist]
221
+ return (persist_options[:repo] || Persist::MEMORY)[persist_options[:file]] ||= yield if type ==:memory and persist_options[:file]
222
+
223
+ if FalseClass != persist_options[:persist]
239
224
  other_options = Misc.process_options persist_options, :other
240
225
  path = persistence_path(name, persist_options, other_options || {})
241
226
 
242
227
  case
243
228
  when type.to_sym === :memory
244
- Persist::MEMORY[path] ||= yield
229
+ repo = persist_options[:repo] || Persist::MEMORY
230
+ repo[path] ||= yield
245
231
 
246
232
  when (type.to_sym == :annotations and persist_options.include? :annotation_repo)
247
233
 
@@ -346,10 +332,12 @@ module Persist
346
332
  case res
347
333
  when nil
348
334
  res = load_file(path) unless persist_options[:no_load]
349
- when IO
335
+ when IO, StringIO
350
336
  res = tee_stream(res, path, type, res.respond_to?(:callback)? res.callback : nil)
337
+ return res if persist_options[:no_load] == :stream
351
338
  when TSV::Dumper
352
339
  res = tee_stream(res.stream, path, type, res.respond_to?(:callback)? res.callback : nil)
340
+ return res if persist_options[:no_load] == :stream
353
341
  else
354
342
  Misc.lock(path) do
355
343
  save_file(path, type, res)
@@ -373,11 +361,17 @@ module Persist
373
361
  end
374
362
 
375
363
  def self.memory(name, options = {}, &block)
376
- file = name
377
- file << "_" << Misc.hash2md5(options) if options.any?
378
- options = Misc.add_defaults options, :persist => true, :file => file
379
-
380
- persist name, :memory, options, &block
364
+ case options
365
+ when nil
366
+ persist name, :memory, :file => name, &block
367
+ when String
368
+ persist name, :memory, :file => name + "_" << options, &block
369
+ else
370
+ file = name
371
+ repo = options.delete :repo if options and options.any?
372
+ file << "_" << (options[:key] ? options[:key] : Misc.hash2md5(options)) if options and options.any?
373
+ persist name, :memory, options.merge(:repo => repo, :persist => true, :file => file), &block
374
+ end
381
375
  end
382
376
  end
383
377
 
@@ -196,6 +196,12 @@ module Path
196
196
  TSV.open(self.produce, *args)
197
197
  end
198
198
 
199
+ def tsv_options(options = {})
200
+ self.open do |stream|
201
+ TSV::Parser.new(stream, options).options
202
+ end
203
+ end
204
+
199
205
  def traverse(options = {}, &block)
200
206
  TSV::Parser.traverse(self.open, options, &block)
201
207
  end
@@ -3,9 +3,14 @@ module TSV
3
3
  attr_accessor :in_stream, :stream, :options, :filename
4
4
  def self.stream(options = {}, filename = nil, &block)
5
5
  dumper = TSV::Dumper.new options, filename
6
- Thread.new do
7
- yield dumper
8
- dumper.close
6
+ Thread.new(Thread.current) do |parent|
7
+ begin
8
+ yield dumper
9
+ dumper.close
10
+ rescue Exception
11
+ Log.exception $!
12
+ parent.raise $!
13
+ end
9
14
  end
10
15
  dumper.stream
11
16
  end
@@ -19,7 +24,7 @@ module TSV
19
24
  @filename = filename
20
25
  end
21
26
  @filename ||= Misc.fingerprint options
22
- @stream, @in_stream = IO.pipe
27
+ @stream, @in_stream = Misc.pipe
23
28
  end
24
29
 
25
30
  def self.values_to_s(values, fields = nil)
@@ -42,15 +47,33 @@ module TSV
42
47
  key_field, fields = Misc.process_options options, :key_field, :fields
43
48
 
44
49
  str = TSV.header_lines(key_field, fields, options)
50
+
51
+ Thread.pass while IO.select(nil, [@in_stream],nil,1).nil?
45
52
  @in_stream.puts str
46
53
  end
47
54
 
48
55
  def add(k,v)
49
- @in_stream << k << TSV::Dumper.values_to_s(v, @options[:fields])
56
+ @fields ||= @options[:fields]
57
+ begin
58
+ Thread.pass while IO.select(nil, [@in_stream],nil,1).nil?
59
+ @in_stream << k << TSV::Dumper.values_to_s(v, @fields)
60
+ rescue IOError
61
+ rescue Exception
62
+ Log.exception $!
63
+ end
64
+ end
65
+
66
+ def close_out
67
+ Log.debug "Close out #{@stream.inspect}"
68
+ @stream.close
69
+ end
70
+
71
+ def close_in
72
+ @in_stream.close unless @in_stream.closed?
50
73
  end
51
74
 
52
75
  def close
53
- @in_stream.close
76
+ close_in
54
77
  end
55
78
  end
56
79
  end
@@ -1,4 +1,14 @@
1
1
  module TSV
2
+ def self.obj_stream(obj)
3
+ case obj
4
+ when IO, File
5
+ obj
6
+ when TSV::Dumper
7
+ obj.stream
8
+ when TSV::Parser
9
+ obj.stream
10
+ end
11
+ end
2
12
 
3
13
  def self.traverse_tsv(tsv, options = {}, &block)
4
14
  callback = Misc.process_options options, :callback
@@ -43,7 +53,35 @@ module TSV
43
53
  end
44
54
  end
45
55
 
56
+ def self.traverse_io_array(io, options = {}, &block)
57
+ callback = Misc.process_options options, :callback
58
+ if callback
59
+ while not io.eof?
60
+ res = yield io.gets.strip
61
+ callback.call res
62
+ end
63
+ else
64
+ while line = io.gets
65
+ yield line.strip
66
+ end
67
+ end
68
+ end
69
+
70
+ def self.traverse_io(io, options = {}, &block)
71
+ filename = io.filename if io.respond_to? :filename
72
+ callback = Misc.process_options options, :callback
73
+ if callback
74
+ TSV::Parser.traverse(io, options) do |k,v|
75
+ res = yield k, v
76
+ callback.call res
77
+ end
78
+ else
79
+ TSV::Parser.traverse(io, options, &block)
80
+ end
81
+ end
82
+
46
83
  def self.traverse_obj(obj, options = {}, &block)
84
+ filename = obj.filename if obj.respond_to? :filename
47
85
  if options[:type] == :keys
48
86
  options[:fields] = []
49
87
  options[:type] = :single
@@ -64,38 +102,27 @@ module TSV
64
102
  else
65
103
  obj.traverse(options, &block)
66
104
  end
67
- when (options[:type] == :array and IO)
68
- callback = Misc.process_options options, :callback
69
- if callback
70
- while not obj.eof?
71
- res = yield obj.gets.strip
72
- callback.call res
73
- end
105
+ when IO, File, StringIO
106
+ if options[:type] == :array
107
+ traverse_io_array(obj, options, &block)
74
108
  else
75
- while not obj.eof?
76
- yield obj.gets.strip
77
- end
78
- end
79
- when IO, File
80
- callback = Misc.process_options options, :callback
81
- if callback
82
- TSV::Parser.traverse(obj, options) do |k,v|
83
- res = yield k, v
84
- callback.call res
85
- end
86
- else
87
- TSV::Parser.traverse(obj, options, &block)
109
+ traverse_io(obj, options, &block)
88
110
  end
111
+
112
+ io = obj
113
+ obj.join if io.respond_to? :join
114
+ io.close if io.respond_to? :close and not io.closed?
89
115
  when Path
90
116
  obj.open do |stream|
91
117
  traverse_obj(stream, options, &block)
92
118
  end
119
+ when TSV::Dumper
120
+ traverse_obj(obj.stream, options, &block)
93
121
  when (defined? Step and Step)
122
+
94
123
  case obj.result
95
- when IO
124
+ when IO, TSV::Dumper, TSV
96
125
  traverse_obj(obj.result, options, &block)
97
- when TSV::Dumper
98
- traverse_obj(obj.stream, options, &block)
99
126
  else
100
127
  obj.join
101
128
  traverse_obj(obj.path.open, options, &block)
@@ -117,7 +144,7 @@ module TSV
117
144
  if callback
118
145
  block = Proc.new do |k,v,mutex|
119
146
  v, mutex = nil, v if mutex.nil?
120
- res = yield k, v
147
+ res = yield k, v, mutex
121
148
  mutex.synchronize do
122
149
  callback.call res
123
150
  end
@@ -136,9 +163,9 @@ module TSV
136
163
  end
137
164
 
138
165
  def self.traverse_cpus(num, obj, options, &block)
139
- callback = Misc.process_options options, :callback
140
-
141
- q = RbbtProcessQueue.new num
166
+ filename = obj.respond_to?(:filename)? obj.filename : "none"
167
+ callback, cleanup = Misc.process_options options, :callback, :cleanup
168
+ q = RbbtProcessQueue.new num, cleanup
142
169
 
143
170
  q.callback &callback
144
171
  q.init &block
@@ -147,34 +174,59 @@ module TSV
147
174
  q.process *p
148
175
  end
149
176
 
177
+ into = options[:into]
178
+
150
179
  q.join
151
- q.clean
152
- nil
153
180
  end
154
181
 
155
- def self.store_into(obj, value)
156
- case obj
182
+ def self.store_into(store, value)
183
+ case store
157
184
  when Hash
158
185
  return if value.nil?
159
186
  if Hash === value
160
- if TSV === obj and obj.type == :double
161
- obj.merge_zip value
187
+ if TSV === store and store.type == :double
188
+ store.merge_zip value
162
189
  else
163
- obj.merge! value
190
+ store.merge! value
164
191
  end
165
192
  else
166
193
  k,v = value
167
- obj[k] = v
194
+ store[k] = v
168
195
  end
169
196
  when TSV::Dumper
170
197
  return if value.nil?
171
- obj.add *value
172
- when IO, StringIO
198
+ store.add *value
199
+ when IO
173
200
  return if value.nil?
174
- obj.puts value
201
+ store.puts value.strip
175
202
  else
176
- obj << value
203
+ store << value
204
+ end
205
+ end
206
+
207
+ def self.get_streams_to_close(obj)
208
+ close_streams = []
209
+ case obj
210
+ when IO, File
211
+ close_streams << obj
212
+ when TSV::Parser
213
+ when TSV::Dumper
214
+ close_streams << obj.result.in_stream
215
+ when (defined? Step and Step)
216
+ case obj.result
217
+ when IO
218
+ close_streams << obj.result
219
+ when TSV::Dumper
220
+ close_streams << obj.result.in_stream
221
+ end
222
+ obj.inputs.each do |input|
223
+ close_streams = get_streams_to_close(input) + close_streams
224
+ end
225
+ obj.dependencies.each do |dependency|
226
+ close_streams = get_streams_to_close(dependency) + close_streams
227
+ end
177
228
  end
229
+ close_streams
178
230
  end
179
231
 
180
232
  def self.traverse_run(obj, threads, cpus, options = {}, &block)
@@ -184,11 +236,62 @@ module TSV
184
236
  if threads
185
237
  traverse_threads threads, obj, options, &block
186
238
  else
239
+ close_streams = Misc.process_options(options, :close_streams) || []
240
+ close_streams = [close_streams] unless Array === close_streams
241
+
242
+ close_streams.concat(get_streams_to_close(obj))
243
+ options[:close_streams] = close_streams
244
+
245
+ options[:cleanup] = Proc.new do
246
+ close_streams.uniq.each do |s|
247
+ s.close unless s.closed?
248
+ end
249
+ end if close_streams and close_streams.any?
250
+
187
251
  traverse_cpus cpus, obj, options, &block
188
252
  end
189
253
  end
190
254
  end
191
255
 
256
+ def self.traverse_stream(obj, threads, cpus, options, &block)
257
+ into = options[:into]
258
+ thread = Thread.new(Thread.current, obj) do |parent,obj|
259
+ begin
260
+ traverse_run(obj, threads, cpus, options, &block)
261
+ into.close if into.respond_to? :close
262
+ rescue Exception
263
+ Log.exception $!
264
+ parent.raise $!
265
+ end
266
+ end
267
+ thread.wakeup
268
+ ConcurrentStream.setup(obj_stream(into), :threads => thread)
269
+ end
270
+
271
+ def self.stream_name(obj)
272
+ filename_obj = obj.respond_to?(:filename) ? obj.filename : nil
273
+ filename_obj ||= obj.respond_to?(:path) ? obj.path : nil
274
+ stream_obj = obj_stream(obj)
275
+ filename_obj.nil? ? stream_obj.inspect : filename_obj + "(#{stream_obj.inspect})"
276
+ end
277
+
278
+ def self.report(msg, obj, into)
279
+ into = into[:into] if Hash === into and into.include? :into
280
+
281
+ #filename_into = into.respond_to?(:filename) ? into.filename : nil
282
+ #filename_into ||= into.respond_to?(:path) ? into.path : nil
283
+ #stream_into = obj_stream(into)
284
+ #str_into = filename_into.nil? ? stream_into.inspect : filename_into + "(#{stream_into.inspect})"
285
+
286
+ #filename_obj = obj.respond_to?(:filename) ? obj.filename : nil
287
+ #filename_obj ||= obj.respond_to?(:path) ? obj.path : nil
288
+ #stream_obj = obj_stream(obj)
289
+ #str_obj = filename_obj.nil? ? stream_obj.inspect : filename_obj + "(#{stream_obj.inspect})"
290
+
291
+ #Log.error "#{ msg } #{filename_obj} - #{filename_into}"
292
+ Log.error "#{ msg } #{stream_name(obj)} -> #{stream_name(into)}"
293
+ end
294
+
192
295
  def self.traverse(obj, options = {}, &block)
193
296
  threads = Misc.process_options options, :threads
194
297
  cpus = Misc.process_options options, :cpus
@@ -198,23 +301,21 @@ module TSV
198
301
  cpus = nil if cpus and cpus.to_i <= 1
199
302
 
200
303
  if into
201
- callback = Proc.new do |e|
202
- store_into into, e
304
+ options[:callback] = Proc.new do |e|
305
+ begin
306
+ store_into into, e
307
+ rescue Exception
308
+ Log.exception $!
309
+ end
203
310
  end
204
- options[:callback] = callback
205
311
 
206
312
  case into
207
313
  when TSV::Dumper, IO, StringIO
208
- Thread.new(Thread.current) do |parent|
209
- begin
210
- traverse_run(obj, threads, cpus, options, &block)
211
- into.close
212
- rescue Exception
213
- parent.raise $!
214
- end
215
- end
314
+ traverse_stream(obj, threads, cpus, options, &block)
216
315
  else
217
316
  traverse_run(obj, threads, cpus, options, &block)
317
+ into.join if into.respond_to? :join
318
+ into.close if into.respond_to? :close
218
319
  end
219
320
 
220
321
  into