rbbt-util 5.13.22 → 5.13.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3a19d56208ed15434da77c5823b332526972ac70
4
- data.tar.gz: 0d1296b74615471612be8717e6530c1cdf687147
3
+ metadata.gz: 716be8fb2d3ec4a9e9cc76482f96771856976191
4
+ data.tar.gz: 958406db01b705360855b2a6bff88b0e0ff804b0
5
5
  SHA512:
6
- metadata.gz: 2a409449bb5de7f586d78b4b99306a94fef4a4613da789a06175586e4c16800c6094e797fc2caeab057b9ea2c45ddba173a8bd79e0f341e80733fd8fa515f6d6
7
- data.tar.gz: 5da483b8a7179b5fd9e9f9b49c1eed1024a7d3d78af15df4cc28f6af66472f8572e4de0840ca2b2bbca24f624abf1d8002e019330d42b304cea03f4cf1543481
6
+ metadata.gz: a3e9739aba63e67c6507ac8e7b31393d6eaae60dde59e5c0bea00faf3132b732a9f5248cb52e3d552c57834cd157258c2799f9993c2caff35d0a65f2c723548b
7
+ data.tar.gz: 22aa1b9d063dbe6d6150e47e9949dd863ed0c7ba8cdaa90fbb1b61ea4a588ed696694a25daf5f1dfabf54e130339083e55c684a7e78ff37625596f92a8643199
@@ -0,0 +1,168 @@
1
+ require 'rbbt-util'
2
+
3
+ module Persist
4
+ class Sharder
5
+ attr_accessor :directory, :params, :shard_function, :databases, :closed, :writable, :mutex
6
+
7
+ def initialize(directory, *rest, &block)
8
+ @shard_function = block
9
+ @params = rest
10
+ @databases = {}
11
+ @directory = directory
12
+ @mutex = Mutex.new
13
+ end
14
+
15
+ def database(key)
16
+ shard = shard_function.call(key)
17
+ databases[shard] ||= begin
18
+ path = File.join(directory, 'shard-' << shard.to_s)
19
+ Persist.open_database(path, *params)
20
+ end
21
+ end
22
+
23
+ MAX_CHAR = 255.chr
24
+
25
+ def prefix(key)
26
+ range(key, 1, key + MAX_CHAR, 1)
27
+ end
28
+
29
+ def get_prefix(key)
30
+ keys = prefix(key)
31
+ select(:key => keys)
32
+ end
33
+
34
+ def closed?
35
+ @closed
36
+ end
37
+
38
+ def close
39
+ @closed = true
40
+ super
41
+ end
42
+
43
+ def read(force = false)
44
+ return if not write? and not closed and not force
45
+ self.close
46
+ databases.each{|d| d.read }
47
+ @writable = false
48
+ @closed = false
49
+ self
50
+ end
51
+
52
+ def write(force = true)
53
+ return if write? and not closed and not force
54
+ self.close
55
+
56
+ databases.each{|d| d.write }
57
+
58
+ @writable = true
59
+ @closed = false
60
+ self
61
+ end
62
+
63
+ def write?
64
+ @writable
65
+ end
66
+
67
+ def read?
68
+ ! write?
69
+ end
70
+
71
+ def each
72
+ databases.each do |database|
73
+ database.each do |k,v|
74
+ yield k, v
75
+ end
76
+ end
77
+ end
78
+
79
+ def collect
80
+ res = []
81
+ each do |key, value|
82
+ res << if block_given?
83
+ yield key, value
84
+ else
85
+ [key, value]
86
+ end
87
+ end
88
+ res
89
+ end
90
+
91
+ def write_and_read
92
+ lock_filename = Persist.persistence_path(File.join(directory, 'write'), {:dir => TSV.lock_dir})
93
+ Misc.lock(lock_filename) do
94
+ @mutex.synchronize do
95
+ write if @closed or not write?
96
+ res = begin
97
+ yield
98
+ ensure
99
+ read
100
+ end
101
+ res
102
+ end
103
+ end
104
+ end
105
+
106
+ def write_and_close
107
+ lock_filename = Persist.persistence_path(File.join(directory, 'write'), {:dir => TSV.lock_dir})
108
+ Misc.lock(lock_filename) do
109
+ @mutex.synchronize do
110
+ write if @closed or not write?
111
+ res = begin
112
+ yield
113
+ ensure
114
+ close
115
+ end
116
+ res
117
+ end
118
+ end
119
+ end
120
+
121
+ def read_and_close
122
+ @mutex.synchronize do
123
+ read if @closed or not read?
124
+ res = begin
125
+ yield
126
+ ensure
127
+ close
128
+ end
129
+ res
130
+ end
131
+ end
132
+
133
+ def merge!(hash)
134
+ hash.each do |key,values|
135
+ self[key] = values
136
+ end
137
+ end
138
+
139
+ def keys
140
+ databases.values.collect{|d| d.keys }.flatten
141
+ end
142
+
143
+ def []=(key, value)
144
+ database(key)[key] = value
145
+ end
146
+
147
+ def [](key, value)
148
+ database(key)[key]
149
+ end
150
+
151
+ def <<(p)
152
+ return if p.nil?
153
+ self[p.first] = p.last
154
+ end
155
+
156
+ def write
157
+ databases.values.each{|database| database.write }
158
+ end
159
+
160
+ def read
161
+ databases.values.each{|database| database.read }
162
+ end
163
+
164
+ def close
165
+ databases.values.each{|database| database.close }
166
+ end
167
+ end
168
+ end
@@ -28,6 +28,8 @@ rescue Exception
28
28
  Log.debug "The kyotocabinet gem could not be loaded. Persistance using this engine will fail."
29
29
  end
30
30
 
31
+ require 'rbbt/persist/tsv/sharder'
32
+
31
33
  module Persist
32
34
  CONNECTIONS = {}
33
35
 
@@ -1,4 +1,7 @@
1
1
  module TSV
2
+
3
+ module MultipleResult; end
4
+
2
5
  def self.obj_stream(obj)
3
6
  case obj
4
7
  when (defined? Step and Step)
@@ -15,7 +18,7 @@ module TSV
15
18
  def self.guess_max(obj)
16
19
  begin
17
20
  case obj
18
- when Step
21
+ when (defined? Step and Step)
19
22
  if obj.done?
20
23
  CMD.cmd("wc -l '#{obj.path.find}'").read.to_i
21
24
  else
@@ -27,8 +30,15 @@ module TSV
27
30
  CMD.cmd("wc -l '#{obj.filename}'").read.to_i
28
31
  when Path
29
32
  CMD.cmd("wc -l '#{obj.find}'").read.to_i
33
+ when String
34
+ if File.exists? obj
35
+ CMD.cmd("wc -l '#{obj}'").read.to_i
36
+ else
37
+ nil
38
+ end
30
39
  end
31
40
  rescue Exception
41
+ Log.exception $!
32
42
  nil
33
43
  end
34
44
  end
@@ -69,6 +79,7 @@ module TSV
69
79
  end
70
80
  end
71
81
  end
82
+ Log::ProgressBar.remove_bar(bar) if bar
72
83
  join.call if join
73
84
  end
74
85
 
@@ -92,6 +103,7 @@ module TSV
92
103
  end
93
104
  end
94
105
  end
106
+ Log::ProgressBar.remove_bar(bar) if bar
95
107
  join.call if join
96
108
  end
97
109
 
@@ -115,6 +127,7 @@ module TSV
115
127
  end
116
128
  end
117
129
  end
130
+ Log::ProgressBar.remove_bar(bar) if bar
118
131
  join.call if join
119
132
  end
120
133
 
@@ -132,14 +145,22 @@ module TSV
132
145
 
133
146
  if callback
134
147
  while line = io.gets
135
- callback.call yield line.strip
136
- bar.tick if bar
148
+ begin
149
+ callback.call yield line.strip
150
+ ensure
151
+ bar.tick if bar
152
+ end
137
153
  end
138
154
  else
139
155
  while line = io.gets
140
- yield line.strip
156
+ begin
157
+ yield line.strip
158
+ ensure
159
+ bar.tick if bar
160
+ end
141
161
  end
142
162
  end
163
+ Log::ProgressBar.remove_bar(bar) if bar
143
164
  join.call if join
144
165
  end
145
166
 
@@ -332,6 +353,12 @@ module TSV
332
353
  end
333
354
 
334
355
  def self.store_into(store, value)
356
+ if MultipleResult === value
357
+ value.each do |v|
358
+ store_into store, v
359
+ end
360
+ return
361
+ end
335
362
  begin
336
363
  case store
337
364
  when Hash
@@ -0,0 +1,27 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../../../test_helper')
2
+ require 'test/unit'
3
+ require 'rbbt/persist/tsv'
4
+
5
+ class TestSharder < Test::Unit::TestCase
6
+ def test_shard
7
+ TmpFile.with_file do |dir|
8
+ sharder = Persist::Sharder.new dir, true, :float_array, 'HDB' do |key|
9
+ key[-1]
10
+ end
11
+
12
+ keys = []
13
+ size = 1_000_000
14
+ Misc.benchmark(2) do
15
+ sharder.write_and_read do
16
+ size.times do |v|
17
+ keys << v.to_s
18
+ sharder[v.to_s] = [v, v*2]
19
+ end
20
+ end
21
+
22
+ assert_equal size, sharder.keys.length
23
+ end
24
+ end
25
+ end
26
+ end
27
+
@@ -366,4 +366,14 @@ class TestTSVParallelThrough < Test::Unit::TestCase
366
366
  end
367
367
  assert_equal size, stream.read.split("\n").length
368
368
  end
369
+
370
+ def test_store_multiple
371
+ size = 1000
372
+ array = (1..size).to_a.collect{|n| n.to_s}
373
+ stream = TSV.traverse array, :bar => {:max => size, :desc => "Array"}, :cpus => 5, :into => :stream do |e|
374
+ sleep 0.01
375
+ [e,e+".alt"].extend TSV::MultipleResult
376
+ end
377
+ assert_equal size*2, stream.read.split("\n").length
378
+ end
369
379
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-util
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.13.22
4
+ version: 5.13.23
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-05-27 00:00:00.000000000 Z
11
+ date: 2014-05-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -136,6 +136,7 @@ files:
136
136
  - lib/rbbt/persist/tsv/kyotocabinet.rb
137
137
  - lib/rbbt/persist/tsv/leveldb.rb
138
138
  - lib/rbbt/persist/tsv/lmdb.rb
139
+ - lib/rbbt/persist/tsv/sharder.rb
139
140
  - lib/rbbt/persist/tsv/tokyocabinet.rb
140
141
  - lib/rbbt/resource.rb
141
142
  - lib/rbbt/resource/path.rb
@@ -278,6 +279,7 @@ files:
278
279
  - test/rbbt/persist/tsv/test_kyotocabinet.rb
279
280
  - test/rbbt/persist/tsv/test_leveldb.rb
280
281
  - test/rbbt/persist/tsv/test_lmdb.rb
282
+ - test/rbbt/persist/tsv/test_sharder.rb
281
283
  - test/rbbt/persist/tsv/test_tokyocabinet.rb
282
284
  - test/rbbt/resource/test_path.rb
283
285
  - test/rbbt/test_annotations.rb
@@ -395,6 +397,7 @@ test_files:
395
397
  - test/rbbt/persist/test_tsv.rb
396
398
  - test/rbbt/persist/tsv/test_lmdb.rb
397
399
  - test/rbbt/persist/tsv/test_kyotocabinet.rb
400
+ - test/rbbt/persist/tsv/test_sharder.rb
398
401
  - test/rbbt/persist/tsv/test_cdb.rb
399
402
  - test/rbbt/persist/tsv/test_tokyocabinet.rb
400
403
  - test/rbbt/persist/tsv/test_leveldb.rb