rbbt-util 5.7.0 → 5.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. data/lib/rbbt/annotations.rb +4 -1
  3. data/lib/rbbt/annotations/util.rb +11 -0
  4. data/lib/rbbt/persist.rb +8 -2
  5. data/lib/rbbt/resource/path.rb +1 -0
  6. data/lib/rbbt/tsv/accessor.rb +18 -15
  7. data/lib/rbbt/tsv/parallel.rb +89 -32
  8. data/lib/rbbt/tsv/util.rb +11 -0
  9. data/lib/rbbt/util/R.rb +0 -1
  10. data/lib/rbbt/util/concurrency.rb +2 -0
  11. data/lib/rbbt/util/concurrency/processes.rb +96 -0
  12. data/lib/rbbt/util/concurrency/processes/socket.rb +87 -0
  13. data/lib/rbbt/util/concurrency/processes/socket_old.rb +144 -0
  14. data/lib/rbbt/util/concurrency/processes/worker.rb +53 -0
  15. data/lib/rbbt/util/concurrency/threads.rb +76 -0
  16. data/lib/rbbt/util/log.rb +37 -5
  17. data/lib/rbbt/util/misc.rb +89 -4
  18. data/lib/rbbt/util/semaphore.rb +10 -4
  19. data/lib/rbbt/util/simpleopt/accessor.rb +5 -0
  20. data/lib/rbbt/util/simpleopt/doc.rb +2 -4
  21. data/lib/rbbt/workflow/accessor.rb +39 -12
  22. data/lib/rbbt/workflow/step.rb +5 -7
  23. data/share/rbbt_commands/benchmark/pthrough +18 -0
  24. data/share/rbbt_commands/color +41 -0
  25. data/share/rbbt_commands/stat/density +50 -0
  26. data/share/rbbt_commands/tsv/info +21 -3
  27. data/share/rbbt_commands/tsv/slice +46 -0
  28. data/share/rbbt_commands/tsv/subset +53 -0
  29. data/share/rbbt_commands/tsv/values +7 -1
  30. data/test/rbbt/annotations/test_util.rb +14 -0
  31. data/test/rbbt/tsv/test_parallel.rb +25 -3
  32. data/test/rbbt/tsv/test_util.rb +15 -0
  33. data/test/rbbt/util/concurrency/processes/test_socket.rb +37 -0
  34. data/test/rbbt/util/concurrency/test_processes.rb +53 -0
  35. data/test/rbbt/util/concurrency/test_threads.rb +42 -0
  36. data/test/rbbt/util/test_concurrency.rb +6 -0
  37. metadata +23 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2d4025fea0b4bfd41f869ae4282cd268ebe5f876
4
- data.tar.gz: 194ead2946a9a3c215a9136bc2b0c615bf73488a
3
+ metadata.gz: d2929287d81291dd772ab9ee6f415b52bafad0db
4
+ data.tar.gz: 61ad4ae0dade13e2cbc0e02c39107243d92a52e0
5
5
  SHA512:
6
- metadata.gz: 0df26701e5d7e3842a9ddd939c385b7c16884f81eee2fde670d6c7b1c0fe7cc371d05acaa65967eda8716fe7301c5e83e79a8b189e07341597b23052d648c060
7
- data.tar.gz: afcd1c9b2c88cb5858278ae50401a5140d729aa2007629f99adff8d3239f3d28d2df4c4791362a450d1892d273bef352941ab14578a6031fc7e43a9f8404a3fd
6
+ metadata.gz: 14285e88be1d35d8c23fb77ad038d2818ae1df04245bd2fd48bbab4f72b419eff1042764b8173b33c1d96955dee0df7ecfd27a84efb34171abb10bae05169b79
7
+ data.tar.gz: d96daf9acd2d79ab8b9425d849fd9442b7fd4ee1d614d364d135b9a601d0b7de1278eba414cb464f92f325cb84ab131311dff1937c1bdc2a99d631e7ea5f146b
@@ -159,9 +159,12 @@ module Annotated
159
159
  object.clean_annotations :
160
160
  object.inject([]){|acc,e| acc << Annotated.purge(e); acc}
161
161
  when Hash
162
+ new = {}
162
163
  object.each do |key, value|
163
- object[key] = Annotated.purge value
164
+ Annotated.purge key
165
+ new[key] = Annotated.purge value
164
166
  end
167
+ new
165
168
  else
166
169
  object
167
170
  end
@@ -209,6 +209,17 @@ module Annotated
209
209
  end
210
210
  end
211
211
 
212
+ def marshal_dump(depth)
213
+ clean_annotations
214
+ end
215
+ end
212
216
 
217
+ class String
218
+ def marshal_load(str)
219
+ self.replace str
213
220
  end
221
+ end
222
+
223
+
224
+
214
225
 
data/lib/rbbt/persist.rb CHANGED
@@ -276,10 +276,16 @@ module Persist
276
276
  Log.medium "Persist create: #{ path } - #{persist_options.inspect[0..100]}"
277
277
  res = yield
278
278
 
279
- Misc.lock(path) do
280
- save_file(path, type, res)
279
+ if res.nil?
280
+ res = load_file(path) unless persist_options[:no_load]
281
+ else
282
+ Misc.lock(path) do
283
+ save_file(path, type, res)
284
+ end
281
285
  end
282
286
 
287
+ return path if persist_options[:no_load]
288
+
283
289
  res
284
290
  end
285
291
  rescue
@@ -21,6 +21,7 @@ module Path
21
21
  end
22
22
 
23
23
  def join(name)
24
+ raise "Invalid path: #{ self }" if self.nil?
24
25
  if self.empty?
25
26
  self.annotate name.to_s.dup
26
27
  else
@@ -7,8 +7,12 @@ module TSV
7
7
 
8
8
  attr_accessor :unnamed, :serializer_module, :entity_options, :entity_templates
9
9
 
10
+ def info
11
+ {:key_field => key_field, :fields => fields, :namespace => namespace, :entity_options => entity_options, :type => type, :filename => filename, :identifiers => identifiers, :unnamed => unnamed}.delete_if{|k,v| v.nil? }
12
+ end
13
+
10
14
  def annotate(tsv)
11
- TSV.setup(tsv, :key_field => key_field, :fields => fields, :namespace => namespace, :entity_options => entity_options, :type => type, :filename => filename, :identifiers => identifiers, :unnamed => unnamed)
15
+ TSV.setup(tsv, info)
12
16
  end
13
17
 
14
18
  def entity_options
@@ -149,14 +153,16 @@ module TSV
149
153
  def serializer=(serializer)
150
154
  @serializer = serializer
151
155
  self.send(:[]=, KEY_PREFIX + 'serializer', (serializer.nil? ? SERIALIZED_NIL : TSV_SERIALIZER.dump(serializer)), :entry_key)
152
- @serializar_module = serializer.nil? ? nil : SERIALIZER_ALIAS[serializer.to_sym]
156
+ @serializar_module = serializer.nil? ? TSV::CleanSerializer : SERIALIZER_ALIAS[serializer.to_sym]
153
157
  end
154
158
 
155
159
 
156
160
  def serializer_module
157
- @serializar_module ||= begin
161
+ @serializer_module ||= begin
158
162
  serializer = self.serializer
159
- serializer.nil? ? TSV::CleanSerializer : SERIALIZER_ALIAS[serializer.to_sym]
163
+ mod = serializer.nil? ? TSV::CleanSerializer : SERIALIZER_ALIAS[serializer.to_sym]
164
+ raise "No serializer_module for: #{ serializer.inspect }" if mod.nil?
165
+ mod
160
166
  end
161
167
  end
162
168
 
@@ -170,8 +176,10 @@ module TSV
170
176
  def [](key, clean = false)
171
177
  value = super(key)
172
178
  return value if clean or value.nil?
179
+ @serializer_module ||= self.serializer_module
180
+
181
+ value = @serializer_module.load(value) if @serializer_module and not TSV::CleanSerializer == @serializer_module
173
182
 
174
- value = serializer_module.load(value) if serializer_module and not TSV::CleanSerializer === serializer_module
175
183
  return value if @unnamed or fields.nil?
176
184
 
177
185
  case type
@@ -186,11 +194,8 @@ module TSV
186
194
  end
187
195
 
188
196
  def []=(key, value, clean = false)
189
- if clean or serializer_module.nil? or TSV::CleanSerializer === serializer_module or value.nil?
190
- return super(key, value)
191
- else
192
- return super(key, serializer_module.dump(value))
193
- end
197
+ return super(key, value) if clean or value.nil? or TSV::CleanSerializer == self.serializer_module
198
+ super(key, @serializer_module.dump(value))
194
199
  end
195
200
 
196
201
  def zip_new(key, values)
@@ -231,20 +236,19 @@ module TSV
231
236
  def each
232
237
  fields = self.fields
233
238
 
234
- serializer = self.serializer
235
239
  serializer_module = self.serializer_module
236
240
  super do |key, value|
237
241
  next if ENTRY_KEYS.include? key
238
242
 
239
243
  # TODO Update this to be more efficient
240
- value = serializer_module.load(value) unless serializer_module.nil? or TSV::CleanSerializer === serializer_module
244
+ value = serializer_module.load(value) unless serializer_module.nil? or TSV::CleanSerializer == serializer_module
241
245
 
242
246
  # Annotated with Entity and NamedArray
243
247
  if not @unnamed
244
248
  if not fields.nil?
245
249
  case type
246
250
  when :double, :list
247
- setup_array value, fields, key, entity_options, entity_templates if Array === value
251
+ setup_array value, fields, key, entity_options, entity_templates if Array == value
248
252
  when :flat, :single
249
253
  prepare_entity(value, fields.first, entity_options)
250
254
  end
@@ -258,13 +262,12 @@ module TSV
258
262
  end
259
263
 
260
264
  def collect
261
- serializer = self.serializer
262
265
  serializer_module = self.serializer_module
263
266
  super do |key, value|
264
267
  next if ENTRY_KEYS.include? key
265
268
 
266
269
  # TODO Update this to be more efficient
267
- value = serializer_module.load(value) unless serializer_module.nil? or TSV::CleanSerializer === serializer_module
270
+ value = serializer_module.load(value) unless serializer_module.nil? or TSV::CleanSerializer == serializer_module
268
271
 
269
272
  # Annotated with Entity and NamedArray
270
273
  if not @unnamed
@@ -1,47 +1,104 @@
1
+ require 'rbbt/util/concurrency'
2
+
1
3
  module TSV
2
4
 
3
- def pthrough(num_threads = 100, new_key_field = nil, new_fields = nil, uniq = false, zipped = false)
4
- q = Queue.new
5
- mutex = Mutex.new
6
-
7
- threads = []
8
-
9
- done = false
10
- num_threads.times do |i|
11
- threads << Thread.new(Thread.current) do |current|
12
- begin
13
- loop do
14
- p = q.pop
15
- p << mutex
16
- yield *p
17
- next if q.length == 0 and done
18
- end
19
- rescue Exception
20
- current.raise $!
21
- end
22
- end
23
- end
5
+ def pthrough(num_threads = 10, new_key_field = nil, new_fields = nil, uniq = false, zipped = false, &block)
6
+ q = RbbtThreadQueue.new num_threads
24
7
 
25
- max = 10_000_000
26
- res = through(new_key_field, new_fields, uniq, zipped) do |*p|
27
- if q.length >= max
28
- Thread.pass
29
- q << p
8
+ q.init(true, &block)
9
+
10
+ begin
11
+ res = through(new_key_field, new_fields, uniq, zipped) do |*p|
12
+ q.process p
30
13
  end
31
- q << p
14
+ q.join
15
+ ensure
16
+ q.clean
32
17
  end
33
18
 
34
- done == true
19
+ end
20
+
21
+ def ppthrough_callback(&block)
22
+ @ppthrough_callback = block
23
+ end
35
24
 
36
- Thread.pass while q.length > 0
25
+ def ppthrough(num_procs = 7, new_key_field = nil, new_fields = nil, uniq = false, zipped = false, &block)
37
26
 
27
+ q = RbbtProcessQueue.new num_procs
38
28
 
39
- threads.each{|t| t.kill }
29
+ q.callback &@ppthrough_callback
30
+ @ppthrough_callback = nil
31
+
32
+ q.init do |k,v|
33
+ block.call k,v
34
+ end
35
+
36
+ begin
37
+ res = through(new_key_field, new_fields, uniq, zipped) do |*p|
38
+ q.process q
39
+ end
40
+ q.join
41
+ ensure
42
+ q.clean
43
+ end
40
44
 
41
45
  res
42
46
  end
43
47
 
44
- def _pthrough(num_threads = 1, new_key_field = nil, new_fields = nil, uniq = false, zipped = false, &block)
45
- through(new_key_field, new_fields, uniq, zipped, &block)
48
+ def ppthrough(num_procs = 7, new_key_field = nil, new_fields = nil, uniq = false, zipped = false, &block)
49
+
50
+ q = RbbtProcessQueue.new num_procs
51
+
52
+ q.callback &@ppthrough_callback
53
+ @ppthrough_callback = nil
54
+
55
+ _pat_size = 20
56
+ _pat = "A" << _pat_size.to_s
57
+
58
+ num_fields = fields.length
59
+ pattern = case type
60
+ when :single, :flat
61
+ _pat * 2
62
+ when :list, :double
63
+ _pat * (num_fields + 1)
64
+ end
65
+
66
+ q.init do |str|
67
+ _parts = str.unpack(pattern)
68
+
69
+ case type
70
+ when :single
71
+ k, v = _parts
72
+ when :list
73
+ k, *v = _parts
74
+ when :flat
75
+ k, v = _parts
76
+ v = v.split "|"
77
+ when :double
78
+ k, *v = _parts
79
+ v = v.collect{|l| l.split "|" }
80
+ end
81
+
82
+ block.call k,v
83
+ end
84
+
85
+ begin
86
+ res = through(new_key_field, new_fields, uniq, zipped) do |k,v|
87
+ case type
88
+ when :flat
89
+ v = v * "|"
90
+ when :double
91
+ v = v.collect{|l| l * "|" } if type == :double
92
+ end
93
+
94
+ str = [k,v].flatten.pack(pattern)
95
+ q.process str
96
+ end
97
+ q.join
98
+ ensure
99
+ q.clean
100
+ end
101
+
102
+ res
46
103
  end
47
104
  end
data/lib/rbbt/tsv/util.rb CHANGED
@@ -230,4 +230,15 @@ module TSV
230
230
  new
231
231
  end
232
232
 
233
+ def marshal_dump
234
+ [info, to_hash]
235
+ end
236
+ end
237
+
238
+ class Hash
239
+ def marshal_load(array)
240
+ info, to_hash = array
241
+ self.merge! to_hash
242
+ TSV.setup(self)
243
+ end
233
244
  end
data/lib/rbbt/util/R.rb CHANGED
@@ -53,7 +53,6 @@ source('#{UTIL}');
53
53
  when nil
54
54
  "NULL"
55
55
  when TSV
56
- #"as.matrix(data.frame(c(#{object.transpose("Field").collect{|k,v| "#{k}=" << R.ruby2R(v)}.flatten * ", "}), row.names=#{R.ruby2R object.keys}))"
57
56
  "matrix(#{R.ruby2R object.values},dimnames=list(#{R.ruby2R object.keys}, #{R.ruby2R object.fields}))"
58
57
  when Symbol
59
58
  "#{ object }"
@@ -0,0 +1,2 @@
1
+ require 'rbbt/util/concurrency/threads'
2
+ require 'rbbt/util/concurrency/processes'
@@ -0,0 +1,96 @@
1
+ require 'rbbt/util/concurrency/processes/worker'
2
+ require 'rbbt/util/concurrency/processes/socket'
3
+
4
+
5
+ class RbbtProcessQueue
6
+ #{{{ RbbtProcessQueue
7
+
8
+ attr_accessor :num_processes, :processes, :queue, :process_monitor
9
+ def initialize(num_processes)
10
+ @num_processes = num_processes
11
+ @processes = []
12
+ @queue = RbbtProcessSocket.new
13
+ end
14
+
15
+ attr_accessor :callback, :callback_queue, :callback_thread
16
+ def callback(&block)
17
+ if block_given?
18
+ @callback = block
19
+
20
+ @callback_queue = RbbtProcessSocket.new
21
+
22
+ @callback_thread = Thread.new(Thread.current) do |parent|
23
+ begin
24
+ loop do
25
+ p = @callback_queue.pop
26
+ raise p if Exception === p
27
+ @callback.call p
28
+ end
29
+ rescue ClosedStream
30
+ rescue Exception
31
+ Log.debug $!
32
+ parent.raise $!
33
+ Thread.exit
34
+ end
35
+ end
36
+ else
37
+ @callback, @callback_queue, @callback_thread = nil, nil, nil
38
+ end
39
+ end
40
+
41
+ def init(&block)
42
+ num_processes.times do |i|
43
+ @processes << RbbtProcessQueueWorker.new(@queue, @callback_queue, &block)
44
+ end
45
+ @queue.sread.close
46
+ @callback_queue.swrite.close if @callback_queue
47
+
48
+ @process_monitor = Thread.new(Thread.current) do |parent|
49
+ begin
50
+ while @processes.any? do
51
+ pid = Process.wait -1, Process::WNOHANG
52
+ if pid
53
+ @processes.delete_if{|p| p.pid == pid}
54
+ raise "Process #{pid} failed" unless $?.success?
55
+ else
56
+ sleep 1
57
+ end
58
+ end
59
+ rescue
60
+ parent.raise $!
61
+ ensure
62
+ Thread.exit
63
+ end
64
+ end
65
+ end
66
+
67
+ def close_callback
68
+ @callback_thread.join if @callback_thread and @callback_thread.alive?
69
+ end
70
+
71
+ def join
72
+ @queue.push ClosedStream.new
73
+ @queue.swrite.close
74
+ begin
75
+ @process_monitor.join
76
+ ensure
77
+ close_callback if @callback
78
+ end
79
+ end
80
+
81
+ def clean
82
+ @processes.each{|p| p.abort }.clear
83
+ @callback_thread.raise Aborted if @callback_thread and @callback_thread.alive?
84
+ end
85
+
86
+ def process(e)
87
+ @queue.push e
88
+ end
89
+
90
+ def self.each(list, num = 3, &block)
91
+ q = RbbtProcessQueue.new num
92
+ q.init(&block)
93
+ list.each do |elem| q.process elem end
94
+ q.join
95
+ end
96
+ end
@@ -0,0 +1,87 @@
1
+ require 'rbbt/util/semaphore'
2
+
3
+ class RbbtProcessQueue
4
+ class RbbtProcessSocket
5
+
6
+ Serializer = Marshal
7
+
8
+ attr_accessor :sread, :swrite, :write_sem, :read_sem
9
+ def initialize
10
+ @sread, @swrite = IO.pipe
11
+
12
+ key = rand(100000).to_s;
13
+ @write_sem = key + '.in'
14
+ @read_sem = key + '.out'
15
+ RbbtSemaphore.create_semaphore(@write_sem,1)
16
+ RbbtSemaphore.create_semaphore(@read_sem,1)
17
+ end
18
+
19
+ def clean
20
+ @sread.close unless @sread.closed?
21
+ @swrite.close unless @swrite.closed?
22
+ RbbtSemaphore.delete_semaphore(@write_sem)
23
+ RbbtSemaphore.delete_semaphore(@read_sem)
24
+ end
25
+
26
+
27
+ def dump(obj, stream)
28
+ case obj
29
+ when String
30
+ payload = obj
31
+ size_head = [payload.bytesize,"S"].pack 'La'
32
+ str = size_head << payload
33
+ else
34
+ payload = Serializer.dump(obj)
35
+ size_head = [payload.bytesize,"M"].pack 'La'
36
+ str = size_head << payload
37
+ end
38
+
39
+ write_length = str.length
40
+ IO.select(nil, [stream])
41
+ wrote = stream.write(str)
42
+ while wrote < write_length
43
+ wrote += stream.write(str[wrote..-1])
44
+ end
45
+ end
46
+
47
+ def load(stream)
48
+ size_head = Misc.read_stream stream, 5
49
+
50
+ size, type = size_head.unpack('La')
51
+
52
+ begin
53
+ payload = Misc.read_stream stream, size
54
+ case type
55
+ when "M"
56
+ Serializer.load(payload)
57
+ when "S"
58
+ payload
59
+ end
60
+ rescue TryAgain
61
+ retry
62
+ end
63
+ end
64
+
65
+ #{{{ ACCESSOR
66
+
67
+ def push(obj)
68
+ begin
69
+ RbbtSemaphore.synchronize(@write_sem) do
70
+ self.dump(obj, @swrite)
71
+ end
72
+ rescue
73
+ return ClosedStream.new
74
+ end
75
+ end
76
+
77
+ def pop
78
+ begin
79
+ RbbtSemaphore.synchronize(@read_sem) do
80
+ self.load(@sread)
81
+ end
82
+ rescue IOError, ClosedStream
83
+ return ClosedStream.new
84
+ end
85
+ end
86
+ end
87
+ end