rbbt-util 5.7.0 → 5.8.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. data/lib/rbbt/annotations.rb +4 -1
  3. data/lib/rbbt/annotations/util.rb +11 -0
  4. data/lib/rbbt/persist.rb +8 -2
  5. data/lib/rbbt/resource/path.rb +1 -0
  6. data/lib/rbbt/tsv/accessor.rb +18 -15
  7. data/lib/rbbt/tsv/parallel.rb +89 -32
  8. data/lib/rbbt/tsv/util.rb +11 -0
  9. data/lib/rbbt/util/R.rb +0 -1
  10. data/lib/rbbt/util/concurrency.rb +2 -0
  11. data/lib/rbbt/util/concurrency/processes.rb +96 -0
  12. data/lib/rbbt/util/concurrency/processes/socket.rb +87 -0
  13. data/lib/rbbt/util/concurrency/processes/socket_old.rb +144 -0
  14. data/lib/rbbt/util/concurrency/processes/worker.rb +53 -0
  15. data/lib/rbbt/util/concurrency/threads.rb +76 -0
  16. data/lib/rbbt/util/log.rb +37 -5
  17. data/lib/rbbt/util/misc.rb +89 -4
  18. data/lib/rbbt/util/semaphore.rb +10 -4
  19. data/lib/rbbt/util/simpleopt/accessor.rb +5 -0
  20. data/lib/rbbt/util/simpleopt/doc.rb +2 -4
  21. data/lib/rbbt/workflow/accessor.rb +39 -12
  22. data/lib/rbbt/workflow/step.rb +5 -7
  23. data/share/rbbt_commands/benchmark/pthrough +18 -0
  24. data/share/rbbt_commands/color +41 -0
  25. data/share/rbbt_commands/stat/density +50 -0
  26. data/share/rbbt_commands/tsv/info +21 -3
  27. data/share/rbbt_commands/tsv/slice +46 -0
  28. data/share/rbbt_commands/tsv/subset +53 -0
  29. data/share/rbbt_commands/tsv/values +7 -1
  30. data/test/rbbt/annotations/test_util.rb +14 -0
  31. data/test/rbbt/tsv/test_parallel.rb +25 -3
  32. data/test/rbbt/tsv/test_util.rb +15 -0
  33. data/test/rbbt/util/concurrency/processes/test_socket.rb +37 -0
  34. data/test/rbbt/util/concurrency/test_processes.rb +53 -0
  35. data/test/rbbt/util/concurrency/test_threads.rb +42 -0
  36. data/test/rbbt/util/test_concurrency.rb +6 -0
  37. metadata +23 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2d4025fea0b4bfd41f869ae4282cd268ebe5f876
4
- data.tar.gz: 194ead2946a9a3c215a9136bc2b0c615bf73488a
3
+ metadata.gz: d2929287d81291dd772ab9ee6f415b52bafad0db
4
+ data.tar.gz: 61ad4ae0dade13e2cbc0e02c39107243d92a52e0
5
5
  SHA512:
6
- metadata.gz: 0df26701e5d7e3842a9ddd939c385b7c16884f81eee2fde670d6c7b1c0fe7cc371d05acaa65967eda8716fe7301c5e83e79a8b189e07341597b23052d648c060
7
- data.tar.gz: afcd1c9b2c88cb5858278ae50401a5140d729aa2007629f99adff8d3239f3d28d2df4c4791362a450d1892d273bef352941ab14578a6031fc7e43a9f8404a3fd
6
+ metadata.gz: 14285e88be1d35d8c23fb77ad038d2818ae1df04245bd2fd48bbab4f72b419eff1042764b8173b33c1d96955dee0df7ecfd27a84efb34171abb10bae05169b79
7
+ data.tar.gz: d96daf9acd2d79ab8b9425d849fd9442b7fd4ee1d614d364d135b9a601d0b7de1278eba414cb464f92f325cb84ab131311dff1937c1bdc2a99d631e7ea5f146b
@@ -159,9 +159,12 @@ module Annotated
159
159
  object.clean_annotations :
160
160
  object.inject([]){|acc,e| acc << Annotated.purge(e); acc}
161
161
  when Hash
162
+ new = {}
162
163
  object.each do |key, value|
163
- object[key] = Annotated.purge value
164
+ Annotated.purge key
165
+ new[key] = Annotated.purge value
164
166
  end
167
+ new
165
168
  else
166
169
  object
167
170
  end
@@ -209,6 +209,17 @@ module Annotated
209
209
  end
210
210
  end
211
211
 
212
+ def marshal_dump(depth)
213
+ clean_annotations
214
+ end
215
+ end
212
216
 
217
+ class String
218
+ def marshal_load(str)
219
+ self.replace str
213
220
  end
221
+ end
222
+
223
+
224
+
214
225
 
data/lib/rbbt/persist.rb CHANGED
@@ -276,10 +276,16 @@ module Persist
276
276
  Log.medium "Persist create: #{ path } - #{persist_options.inspect[0..100]}"
277
277
  res = yield
278
278
 
279
- Misc.lock(path) do
280
- save_file(path, type, res)
279
+ if res.nil?
280
+ res = load_file(path) unless persist_options[:no_load]
281
+ else
282
+ Misc.lock(path) do
283
+ save_file(path, type, res)
284
+ end
281
285
  end
282
286
 
287
+ return path if persist_options[:no_load]
288
+
283
289
  res
284
290
  end
285
291
  rescue
@@ -21,6 +21,7 @@ module Path
21
21
  end
22
22
 
23
23
  def join(name)
24
+ raise "Invalid path: #{ self }" if self.nil?
24
25
  if self.empty?
25
26
  self.annotate name.to_s.dup
26
27
  else
@@ -7,8 +7,12 @@ module TSV
7
7
 
8
8
  attr_accessor :unnamed, :serializer_module, :entity_options, :entity_templates
9
9
 
10
+ def info
11
+ {:key_field => key_field, :fields => fields, :namespace => namespace, :entity_options => entity_options, :type => type, :filename => filename, :identifiers => identifiers, :unnamed => unnamed}.delete_if{|k,v| v.nil? }
12
+ end
13
+
10
14
  def annotate(tsv)
11
- TSV.setup(tsv, :key_field => key_field, :fields => fields, :namespace => namespace, :entity_options => entity_options, :type => type, :filename => filename, :identifiers => identifiers, :unnamed => unnamed)
15
+ TSV.setup(tsv, info)
12
16
  end
13
17
 
14
18
  def entity_options
@@ -149,14 +153,16 @@ module TSV
149
153
  def serializer=(serializer)
150
154
  @serializer = serializer
151
155
  self.send(:[]=, KEY_PREFIX + 'serializer', (serializer.nil? ? SERIALIZED_NIL : TSV_SERIALIZER.dump(serializer)), :entry_key)
152
- @serializar_module = serializer.nil? ? nil : SERIALIZER_ALIAS[serializer.to_sym]
156
+ @serializar_module = serializer.nil? ? TSV::CleanSerializer : SERIALIZER_ALIAS[serializer.to_sym]
153
157
  end
154
158
 
155
159
 
156
160
  def serializer_module
157
- @serializar_module ||= begin
161
+ @serializer_module ||= begin
158
162
  serializer = self.serializer
159
- serializer.nil? ? TSV::CleanSerializer : SERIALIZER_ALIAS[serializer.to_sym]
163
+ mod = serializer.nil? ? TSV::CleanSerializer : SERIALIZER_ALIAS[serializer.to_sym]
164
+ raise "No serializer_module for: #{ serializer.inspect }" if mod.nil?
165
+ mod
160
166
  end
161
167
  end
162
168
 
@@ -170,8 +176,10 @@ module TSV
170
176
  def [](key, clean = false)
171
177
  value = super(key)
172
178
  return value if clean or value.nil?
179
+ @serializer_module ||= self.serializer_module
180
+
181
+ value = @serializer_module.load(value) if @serializer_module and not TSV::CleanSerializer == @serializer_module
173
182
 
174
- value = serializer_module.load(value) if serializer_module and not TSV::CleanSerializer === serializer_module
175
183
  return value if @unnamed or fields.nil?
176
184
 
177
185
  case type
@@ -186,11 +194,8 @@ module TSV
186
194
  end
187
195
 
188
196
  def []=(key, value, clean = false)
189
- if clean or serializer_module.nil? or TSV::CleanSerializer === serializer_module or value.nil?
190
- return super(key, value)
191
- else
192
- return super(key, serializer_module.dump(value))
193
- end
197
+ return super(key, value) if clean or value.nil? or TSV::CleanSerializer == self.serializer_module
198
+ super(key, @serializer_module.dump(value))
194
199
  end
195
200
 
196
201
  def zip_new(key, values)
@@ -231,20 +236,19 @@ module TSV
231
236
  def each
232
237
  fields = self.fields
233
238
 
234
- serializer = self.serializer
235
239
  serializer_module = self.serializer_module
236
240
  super do |key, value|
237
241
  next if ENTRY_KEYS.include? key
238
242
 
239
243
  # TODO Update this to be more efficient
240
- value = serializer_module.load(value) unless serializer_module.nil? or TSV::CleanSerializer === serializer_module
244
+ value = serializer_module.load(value) unless serializer_module.nil? or TSV::CleanSerializer == serializer_module
241
245
 
242
246
  # Annotated with Entity and NamedArray
243
247
  if not @unnamed
244
248
  if not fields.nil?
245
249
  case type
246
250
  when :double, :list
247
- setup_array value, fields, key, entity_options, entity_templates if Array === value
251
+ setup_array value, fields, key, entity_options, entity_templates if Array == value
248
252
  when :flat, :single
249
253
  prepare_entity(value, fields.first, entity_options)
250
254
  end
@@ -258,13 +262,12 @@ module TSV
258
262
  end
259
263
 
260
264
  def collect
261
- serializer = self.serializer
262
265
  serializer_module = self.serializer_module
263
266
  super do |key, value|
264
267
  next if ENTRY_KEYS.include? key
265
268
 
266
269
  # TODO Update this to be more efficient
267
- value = serializer_module.load(value) unless serializer_module.nil? or TSV::CleanSerializer === serializer_module
270
+ value = serializer_module.load(value) unless serializer_module.nil? or TSV::CleanSerializer == serializer_module
268
271
 
269
272
  # Annotated with Entity and NamedArray
270
273
  if not @unnamed
@@ -1,47 +1,104 @@
1
+ require 'rbbt/util/concurrency'
2
+
1
3
  module TSV
2
4
 
3
- def pthrough(num_threads = 100, new_key_field = nil, new_fields = nil, uniq = false, zipped = false)
4
- q = Queue.new
5
- mutex = Mutex.new
6
-
7
- threads = []
8
-
9
- done = false
10
- num_threads.times do |i|
11
- threads << Thread.new(Thread.current) do |current|
12
- begin
13
- loop do
14
- p = q.pop
15
- p << mutex
16
- yield *p
17
- next if q.length == 0 and done
18
- end
19
- rescue Exception
20
- current.raise $!
21
- end
22
- end
23
- end
5
+ def pthrough(num_threads = 10, new_key_field = nil, new_fields = nil, uniq = false, zipped = false, &block)
6
+ q = RbbtThreadQueue.new num_threads
24
7
 
25
- max = 10_000_000
26
- res = through(new_key_field, new_fields, uniq, zipped) do |*p|
27
- if q.length >= max
28
- Thread.pass
29
- q << p
8
+ q.init(true, &block)
9
+
10
+ begin
11
+ res = through(new_key_field, new_fields, uniq, zipped) do |*p|
12
+ q.process p
30
13
  end
31
- q << p
14
+ q.join
15
+ ensure
16
+ q.clean
32
17
  end
33
18
 
34
- done == true
19
+ end
20
+
21
+ def ppthrough_callback(&block)
22
+ @ppthrough_callback = block
23
+ end
35
24
 
36
- Thread.pass while q.length > 0
25
+ def ppthrough(num_procs = 7, new_key_field = nil, new_fields = nil, uniq = false, zipped = false, &block)
37
26
 
27
+ q = RbbtProcessQueue.new num_procs
38
28
 
39
- threads.each{|t| t.kill }
29
+ q.callback &@ppthrough_callback
30
+ @ppthrough_callback = nil
31
+
32
+ q.init do |k,v|
33
+ block.call k,v
34
+ end
35
+
36
+ begin
37
+ res = through(new_key_field, new_fields, uniq, zipped) do |*p|
38
+ q.process q
39
+ end
40
+ q.join
41
+ ensure
42
+ q.clean
43
+ end
40
44
 
41
45
  res
42
46
  end
43
47
 
44
- def _pthrough(num_threads = 1, new_key_field = nil, new_fields = nil, uniq = false, zipped = false, &block)
45
- through(new_key_field, new_fields, uniq, zipped, &block)
48
+ def ppthrough(num_procs = 7, new_key_field = nil, new_fields = nil, uniq = false, zipped = false, &block)
49
+
50
+ q = RbbtProcessQueue.new num_procs
51
+
52
+ q.callback &@ppthrough_callback
53
+ @ppthrough_callback = nil
54
+
55
+ _pat_size = 20
56
+ _pat = "A" << _pat_size.to_s
57
+
58
+ num_fields = fields.length
59
+ pattern = case type
60
+ when :single, :flat
61
+ _pat * 2
62
+ when :list, :double
63
+ _pat * (num_fields + 1)
64
+ end
65
+
66
+ q.init do |str|
67
+ _parts = str.unpack(pattern)
68
+
69
+ case type
70
+ when :single
71
+ k, v = _parts
72
+ when :list
73
+ k, *v = _parts
74
+ when :flat
75
+ k, v = _parts
76
+ v = v.split "|"
77
+ when :double
78
+ k, *v = _parts
79
+ v = v.collect{|l| l.split "|" }
80
+ end
81
+
82
+ block.call k,v
83
+ end
84
+
85
+ begin
86
+ res = through(new_key_field, new_fields, uniq, zipped) do |k,v|
87
+ case type
88
+ when :flat
89
+ v = v * "|"
90
+ when :double
91
+ v = v.collect{|l| l * "|" } if type == :double
92
+ end
93
+
94
+ str = [k,v].flatten.pack(pattern)
95
+ q.process str
96
+ end
97
+ q.join
98
+ ensure
99
+ q.clean
100
+ end
101
+
102
+ res
46
103
  end
47
104
  end
data/lib/rbbt/tsv/util.rb CHANGED
@@ -230,4 +230,15 @@ module TSV
230
230
  new
231
231
  end
232
232
 
233
+ def marshal_dump
234
+ [info, to_hash]
235
+ end
236
+ end
237
+
238
+ class Hash
239
+ def marshal_load(array)
240
+ info, to_hash = array
241
+ self.merge! to_hash
242
+ TSV.setup(self)
243
+ end
233
244
  end
data/lib/rbbt/util/R.rb CHANGED
@@ -53,7 +53,6 @@ source('#{UTIL}');
53
53
  when nil
54
54
  "NULL"
55
55
  when TSV
56
- #"as.matrix(data.frame(c(#{object.transpose("Field").collect{|k,v| "#{k}=" << R.ruby2R(v)}.flatten * ", "}), row.names=#{R.ruby2R object.keys}))"
57
56
  "matrix(#{R.ruby2R object.values},dimnames=list(#{R.ruby2R object.keys}, #{R.ruby2R object.fields}))"
58
57
  when Symbol
59
58
  "#{ object }"
@@ -0,0 +1,2 @@
1
+ require 'rbbt/util/concurrency/threads'
2
+ require 'rbbt/util/concurrency/processes'
@@ -0,0 +1,96 @@
1
+ require 'rbbt/util/concurrency/processes/worker'
2
+ require 'rbbt/util/concurrency/processes/socket'
3
+
4
+
5
+ class RbbtProcessQueue
6
+ #{{{ RbbtProcessQueue
7
+
8
+ attr_accessor :num_processes, :processes, :queue, :process_monitor
9
+ def initialize(num_processes)
10
+ @num_processes = num_processes
11
+ @processes = []
12
+ @queue = RbbtProcessSocket.new
13
+ end
14
+
15
+ attr_accessor :callback, :callback_queue, :callback_thread
16
+ def callback(&block)
17
+ if block_given?
18
+ @callback = block
19
+
20
+ @callback_queue = RbbtProcessSocket.new
21
+
22
+ @callback_thread = Thread.new(Thread.current) do |parent|
23
+ begin
24
+ loop do
25
+ p = @callback_queue.pop
26
+ raise p if Exception === p
27
+ @callback.call p
28
+ end
29
+ rescue ClosedStream
30
+ rescue Exception
31
+ Log.debug $!
32
+ parent.raise $!
33
+ Thread.exit
34
+ end
35
+ end
36
+ else
37
+ @callback, @callback_queue, @callback_thread = nil, nil, nil
38
+ end
39
+ end
40
+
41
+ def init(&block)
42
+ num_processes.times do |i|
43
+ @processes << RbbtProcessQueueWorker.new(@queue, @callback_queue, &block)
44
+ end
45
+ @queue.sread.close
46
+ @callback_queue.swrite.close if @callback_queue
47
+
48
+ @process_monitor = Thread.new(Thread.current) do |parent|
49
+ begin
50
+ while @processes.any? do
51
+ pid = Process.wait -1, Process::WNOHANG
52
+ if pid
53
+ @processes.delete_if{|p| p.pid == pid}
54
+ raise "Process #{pid} failed" unless $?.success?
55
+ else
56
+ sleep 1
57
+ end
58
+ end
59
+ rescue
60
+ parent.raise $!
61
+ ensure
62
+ Thread.exit
63
+ end
64
+ end
65
+ end
66
+
67
+ def close_callback
68
+ @callback_thread.join if @callback_thread and @callback_thread.alive?
69
+ end
70
+
71
+ def join
72
+ @queue.push ClosedStream.new
73
+ @queue.swrite.close
74
+ begin
75
+ @process_monitor.join
76
+ ensure
77
+ close_callback if @callback
78
+ end
79
+ end
80
+
81
+ def clean
82
+ @processes.each{|p| p.abort }.clear
83
+ @callback_thread.raise Aborted if @callback_thread and @callback_thread.alive?
84
+ end
85
+
86
+ def process(e)
87
+ @queue.push e
88
+ end
89
+
90
+ def self.each(list, num = 3, &block)
91
+ q = RbbtProcessQueue.new num
92
+ q.init(&block)
93
+ list.each do |elem| q.process elem end
94
+ q.join
95
+ end
96
+ end
@@ -0,0 +1,87 @@
1
+ require 'rbbt/util/semaphore'
2
+
3
+ class RbbtProcessQueue
4
+ class RbbtProcessSocket
5
+
6
+ Serializer = Marshal
7
+
8
+ attr_accessor :sread, :swrite, :write_sem, :read_sem
9
+ def initialize
10
+ @sread, @swrite = IO.pipe
11
+
12
+ key = rand(100000).to_s;
13
+ @write_sem = key + '.in'
14
+ @read_sem = key + '.out'
15
+ RbbtSemaphore.create_semaphore(@write_sem,1)
16
+ RbbtSemaphore.create_semaphore(@read_sem,1)
17
+ end
18
+
19
+ def clean
20
+ @sread.close unless @sread.closed?
21
+ @swrite.close unless @swrite.closed?
22
+ RbbtSemaphore.delete_semaphore(@write_sem)
23
+ RbbtSemaphore.delete_semaphore(@read_sem)
24
+ end
25
+
26
+
27
+ def dump(obj, stream)
28
+ case obj
29
+ when String
30
+ payload = obj
31
+ size_head = [payload.bytesize,"S"].pack 'La'
32
+ str = size_head << payload
33
+ else
34
+ payload = Serializer.dump(obj)
35
+ size_head = [payload.bytesize,"M"].pack 'La'
36
+ str = size_head << payload
37
+ end
38
+
39
+ write_length = str.length
40
+ IO.select(nil, [stream])
41
+ wrote = stream.write(str)
42
+ while wrote < write_length
43
+ wrote += stream.write(str[wrote..-1])
44
+ end
45
+ end
46
+
47
+ def load(stream)
48
+ size_head = Misc.read_stream stream, 5
49
+
50
+ size, type = size_head.unpack('La')
51
+
52
+ begin
53
+ payload = Misc.read_stream stream, size
54
+ case type
55
+ when "M"
56
+ Serializer.load(payload)
57
+ when "S"
58
+ payload
59
+ end
60
+ rescue TryAgain
61
+ retry
62
+ end
63
+ end
64
+
65
+ #{{{ ACCESSOR
66
+
67
+ def push(obj)
68
+ begin
69
+ RbbtSemaphore.synchronize(@write_sem) do
70
+ self.dump(obj, @swrite)
71
+ end
72
+ rescue
73
+ return ClosedStream.new
74
+ end
75
+ end
76
+
77
+ def pop
78
+ begin
79
+ RbbtSemaphore.synchronize(@read_sem) do
80
+ self.load(@sread)
81
+ end
82
+ rescue IOError, ClosedStream
83
+ return ClosedStream.new
84
+ end
85
+ end
86
+ end
87
+ end