rbbt-util 5.13.23 → 5.13.24

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 716be8fb2d3ec4a9e9cc76482f96771856976191
4
- data.tar.gz: 958406db01b705360855b2a6bff88b0e0ff804b0
3
+ metadata.gz: 4cb02f1584dee7188f5a2bc771f733eb81c46f1a
4
+ data.tar.gz: 47850a9f40b69fc8cb761dbefea3aec895c1c955
5
5
  SHA512:
6
- metadata.gz: a3e9739aba63e67c6507ac8e7b31393d6eaae60dde59e5c0bea00faf3132b732a9f5248cb52e3d552c57834cd157258c2799f9993c2caff35d0a65f2c723548b
7
- data.tar.gz: 22aa1b9d063dbe6d6150e47e9949dd863ed0c7ba8cdaa90fbb1b61ea4a588ed696694a25daf5f1dfabf54e130339083e55c684a7e78ff37625596f92a8643199
6
+ metadata.gz: f3459787e3d4aa401917919e6a70bbb4c314477dbce5f9251f0723dbc905af70170f0e844b6d905fe9fd86a232acd1cc98553d3ed84ebca91743a06f0149610c
7
+ data.tar.gz: 2028a7f0dc8de1adfb4a69e54f139d4a2a329c6e094b4043618792314874e7ee4da08b72327912800d899cca258cccbf015dee6f10f13ffabb9283132f823929
@@ -28,8 +28,6 @@ rescue Exception
28
28
  Log.debug "The kyotocabinet gem could not be loaded. Persistance using this engine will fail."
29
29
  end
30
30
 
31
- require 'rbbt/persist/tsv/sharder'
32
-
33
31
  module Persist
34
32
  CONNECTIONS = {}
35
33
 
@@ -85,14 +83,23 @@ module Persist
85
83
 
86
84
  if is_persisted?(path) and not persist_options[:update]
87
85
  Log.debug "TSV persistence up-to-date: #{ path }"
88
- return open_database(path, false, nil, persist_options[:engine] || TokyoCabinet::HDB)
86
+ if persist_options[:shard_function]
87
+ return open_sharder(path, false, nil, persist_options[:engine], &persist_options[:shard_function])
88
+ else
89
+ return open_database(path, false, nil, persist_options[:engine] || TokyoCabinet::HDB)
90
+ end
89
91
  end
90
92
 
91
93
  Misc.lock lock_filename do
92
94
  begin
93
95
  if is_persisted?(path) and not persist_options[:update]
94
96
  Log.debug "TSV persistence (suddenly) up-to-date: #{ path }"
95
- return open_database(path, false, nil, persist_options[:engine] || TokyoCabinet::HDB)
97
+
98
+ if persist_options[:shard_function]
99
+ return open_sharder(path, false, nil, persist_options[:engine], &persist_options[:shard_function])
100
+ else
101
+ return open_database(path, false, nil, persist_options[:engine] || TokyoCabinet::HDB)
102
+ end
96
103
  end
97
104
 
98
105
  FileUtils.rm path if File.exists? path
@@ -101,8 +108,15 @@ module Persist
101
108
 
102
109
  tmp_path = path + '.persist'
103
110
 
104
- data = open_database(tmp_path, true, persist_options[:serializer], persist_options[:engine] || TokyoCabinet::HDB)
105
- data.serializer = :type if TSV === data and data.serializer.nil?
111
+ data = if persist_options[:shard_function]
112
+ open_sharder(tmp_path, true, persist_options[:serializer], persist_options[:engine], &persist_options[:shard_function])
113
+ else
114
+ open_database(tmp_path, true, persist_options[:serializer], persist_options[:engine] || TokyoCabinet::HDB)
115
+ end
116
+
117
+ if TSV === data and data.serializer.nil?
118
+ data.serializer = :type
119
+ end
106
120
 
107
121
  data.write_and_read do
108
122
  yield data
@@ -123,3 +137,6 @@ module Persist
123
137
  end
124
138
  end
125
139
  end
140
+
141
+ require 'rbbt/persist/tsv/sharder'
142
+
@@ -1,23 +1,66 @@
1
- require 'rbbt-util'
2
-
3
1
  module Persist
2
+ module SharderAdapter
3
+ def self.open(path, write, type=nil, &block)
4
+
5
+ database = CONNECTIONS[path] ||= Sharder.new(path, write, type, &block)
6
+
7
+ database.extend Persist::SharderAdapter unless Persist::SharderAdapter === database
8
+
9
+ database
10
+ end
11
+
12
+ end
13
+
4
14
  class Sharder
5
- attr_accessor :directory, :params, :shard_function, :databases, :closed, :writable, :mutex
15
+ attr_accessor :persistence_path, :shard_function, :databases, :closed, :writable, :mutex, :db_type
6
16
 
7
- def initialize(directory, *rest, &block)
17
+ def initialize(persistence_path, write = false, db_type=nil, &block)
8
18
  @shard_function = block
9
- @params = rest
10
- @databases = {}
11
- @directory = directory
19
+ @persistence_path = Path.setup(persistence_path)
12
20
  @mutex = Mutex.new
21
+ @writable = write
22
+ @db_type = db_type
23
+
24
+ if write
25
+ @databases = {}
26
+ end
27
+ end
28
+
29
+ def <<(key,value)
30
+ self[key] = value
31
+ end
32
+
33
+ def persistence_path=(path)
34
+ @persistence_path = path
35
+ end
36
+
37
+ def databases
38
+ @databases ||= begin
39
+ hash = {}
40
+ @persistence_path.glob('shard-*').each do |f|
41
+ shard = File.basename(f).match(/shard-(.*)/)[1]
42
+ hash[shard] = Persist.open_database(f, false, :clean, db_type)
43
+ end
44
+ hash
45
+ end
13
46
  end
14
47
 
15
48
  def database(key)
16
- shard = shard_function.call(key)
17
- databases[shard] ||= begin
18
- path = File.join(directory, 'shard-' << shard.to_s)
19
- Persist.open_database(path, *params)
20
- end
49
+ shard = key =~ /__tsv_/ ? "0" : shard_function.call(key)
50
+ if databases.include? shard
51
+ databases[shard]
52
+ else
53
+ database ||= begin
54
+ path = File.join(persistence_path, 'shard-' << shard.to_s)
55
+ (writable or File.exists?(path)) ? Persist.open_database(path, writable, :clean, db_type) : nil
56
+ end
57
+ if database
58
+ databases[shard] = database
59
+ else
60
+ Log.warn "Database #{ path } missing" if
61
+ nil
62
+ end
63
+ end
21
64
  end
22
65
 
23
66
  MAX_CHAR = 255.chr
@@ -69,13 +112,17 @@ module Persist
69
112
  end
70
113
 
71
114
  def each
72
- databases.each do |database|
115
+ databases.values.each do |database|
73
116
  database.each do |k,v|
74
117
  yield k, v
75
118
  end
76
119
  end
77
120
  end
78
121
 
122
+ def include?(key)
123
+ self[key] != nil
124
+ end
125
+
79
126
  def collect
80
127
  res = []
81
128
  each do |key, value|
@@ -89,7 +136,7 @@ module Persist
89
136
  end
90
137
 
91
138
  def write_and_read
92
- lock_filename = Persist.persistence_path(File.join(directory, 'write'), {:dir => TSV.lock_dir})
139
+ lock_filename = Persist.persistence_path(File.join(persistence_path, 'write'), {:dir => TSV.lock_dir})
93
140
  Misc.lock(lock_filename) do
94
141
  @mutex.synchronize do
95
142
  write if @closed or not write?
@@ -104,7 +151,7 @@ module Persist
104
151
  end
105
152
 
106
153
  def write_and_close
107
- lock_filename = Persist.persistence_path(File.join(directory, 'write'), {:dir => TSV.lock_dir})
154
+ lock_filename = Persist.persistence_path(File.join(persistence_path, 'write'), {:dir => TSV.lock_dir})
108
155
  Misc.lock(lock_filename) do
109
156
  @mutex.synchronize do
110
157
  write if @closed or not write?
@@ -137,15 +184,15 @@ module Persist
137
184
  end
138
185
 
139
186
  def keys
140
- databases.values.collect{|d| d.keys }.flatten
187
+ databases.values.collect{|d| d.keys }.flatten - TSV::ENTRY_KEYS.to_a
141
188
  end
142
189
 
143
- def []=(key, value)
144
- database(key)[key] = value
190
+ def []=(key, value, clean = false)
191
+ database(key).send(:[]=, key, value)
145
192
  end
146
193
 
147
- def [](key, value)
148
- database(key)[key]
194
+ def [](key, clean=false)
195
+ v = database(key).send(:[], key)
149
196
  end
150
197
 
151
198
  def <<(p)
@@ -165,4 +212,19 @@ module Persist
165
212
  databases.values.each{|database| database.close }
166
213
  end
167
214
  end
215
+
216
+ def self.open_sharder(path, write, serializer = nil, tokyocabinet_class = TokyoCabinet::HDB, &shard_function)
217
+ write = true unless File.exists? path
218
+
219
+ FileUtils.mkdir_p File.dirname(path) unless File.exists?(File.dirname(path))
220
+
221
+ database = Persist::SharderAdapter.open(path, write, tokyocabinet_class, &shard_function)
222
+
223
+ unless serializer == :clean
224
+ TSV.setup database
225
+ database.serializer = serializer if serializer
226
+ end
227
+
228
+ database
229
+ end
168
230
  end
@@ -5,8 +5,8 @@ module Persist
5
5
  module TCAdapter
6
6
  attr_accessor :persistence_path, :tokyocabinet_class, :closed, :writable, :mutex
7
7
 
8
- def self.open(path, write, tokyocabinet_class = TokyoCabinet::HDB)
9
- tokyocabinet_class = TokyoCabinet::HDB if tokyocabinet_class == "HDB"
8
+ def self.open(path, write, serializer, tokyocabinet_class = TokyoCabinet::HDB)
9
+ tokyocabinet_class = TokyoCabinet::HDB if tokyocabinet_class == "HDB" or tokyocabinet_class.nil?
10
10
  tokyocabinet_class = TokyoCabinet::BDB if tokyocabinet_class == "BDB"
11
11
 
12
12
  database = CONNECTIONS[path] ||= tokyocabinet_class.new
@@ -80,12 +80,6 @@ module Persist
80
80
  def read?
81
81
  ! write?
82
82
  end
83
- #def each
84
- # iterinit
85
- # while key = iternext
86
- # yield key, get(key)
87
- # end
88
- #end
89
83
 
90
84
  def collect
91
85
  res = []
@@ -164,7 +158,7 @@ module Persist
164
158
 
165
159
  FileUtils.mkdir_p File.dirname(path) unless File.exists?(File.dirname(path))
166
160
 
167
- database = Persist::TCAdapter.open(path, write, tokyocabinet_class)
161
+ database = Persist::TCAdapter.open(path, write, serializer, tokyocabinet_class)
168
162
 
169
163
  unless serializer == :clean
170
164
  TSV.setup database
@@ -243,7 +243,7 @@ module TSV
243
243
  next if ENTRY_KEYS.include? key
244
244
 
245
245
  # TODO Update this to be more efficient
246
- value = serializer_module.load(value) unless serializer_module.nil? or TSV::CleanSerializer == serializer_module
246
+ value = serializer_module.load(value) unless value.nil? or serializer_module.nil? or TSV::CleanSerializer == serializer_module
247
247
 
248
248
  # Annotated with Entity and NamedArray
249
249
  if not @unnamed
@@ -417,13 +417,15 @@ module TSV
417
417
  end
418
418
 
419
419
  def namespace=(value)
420
- self.send(:[]=, "__tsv_hash_namespace", value.nil? ? SERIALIZED_NIL : value.to_yaml, true)
420
+ self.send(:[]=, "__tsv_hash_namespace", value.nil? ? SERIALIZED_NIL : TSV::TSV_SERIALIZER.dump(value), true)
421
421
  @namespace = value
422
422
  @entity_options = nil
423
423
  end
424
424
 
425
425
  def fields=(value)
426
- self.send(:[]=, "__tsv_hash_fields", value.nil? ? SERIALIZED_NIL : value.to_yaml, true)
426
+ clean = true
427
+ value_ym = value.nil? ? SERIALIZED_NIL : TSV::TSV_SERIALIZER.dump(value)
428
+ self.send(:[]=, "__tsv_hash_fields", value_ym, clean)
427
429
  @fields = value
428
430
  @named_fields = nil
429
431
  end
@@ -54,7 +54,7 @@ module TSV
54
54
  def self.report(msg, obj, into)
55
55
  into = into[:into] if Hash === into and into.include? :into
56
56
 
57
- Log.medium "#{ msg } #{stream_name(obj)} -> #{stream_name(into)}"
57
+ Log.medium{"#{ msg } #{stream_name(obj)} -> #{stream_name(into)}"}
58
58
  end
59
59
 
60
60
  #{{{ TRAVERSE OBJECTS
@@ -135,7 +135,7 @@ module TSV
135
135
  callback, bar, join = Misc.process_options options, :callback, :bar, :join
136
136
  if File === io and io.closed?
137
137
  begin
138
- Log.medium "Rewinding stream #{stream_name(io)}"
138
+ Log.medium{"Rewinding stream #{stream_name(io)}"}
139
139
  io.reopen io.filename, "r"
140
140
  rescue
141
141
  Log.exception $!
@@ -168,7 +168,7 @@ module TSV
168
168
  callback, bar, join = Misc.process_options options, :callback, :bar, :join
169
169
  if File === io and io.closed?
170
170
  begin
171
- Log.medium "Rewinding stream #{stream_name(io)}"
171
+ Log.medium{"Rewinding stream #{stream_name(io)}"}
172
172
  io.reopen io.filename, "r"
173
173
  rescue
174
174
  Log.exception $!
@@ -192,7 +192,7 @@ module TSV
192
192
  options[:type] = :single
193
193
  end
194
194
 
195
- Log.medium "Traversing #{stream_name(obj)} #{Log.color :green, "->"} #{stream_name(options[:into])}"
195
+ Log.medium{"Traversing #{stream_name(obj)} #{Log.color :green, "->"} #{stream_name(options[:into])}"}
196
196
  begin
197
197
  case obj
198
198
  when TSV
@@ -256,28 +256,28 @@ module TSV
256
256
  raise "Unknown object for traversal: #{Misc.fingerprint obj }"
257
257
  end
258
258
  rescue IOError
259
- Log.medium "IOError traversing #{stream_name(obj)}: #{$!.message}"
259
+ Log.medium{"IOError traversing #{stream_name(obj)}: #{$!.message}"}
260
260
  stream = obj_stream(obj)
261
261
  stream.abort if stream and stream.respond_to? :abort
262
262
  stream = obj_stream(options[:into])
263
263
  stream.abort if stream.respond_to? :abort
264
264
  raise $!
265
265
  rescue Errno::EPIPE
266
- Log.medium "Pipe closed while traversing #{stream_name(obj)}: #{$!.message}"
266
+ Log.medium{"Pipe closed while traversing #{stream_name(obj)}: #{$!.message}"}
267
267
  stream = obj_stream(obj)
268
268
  stream.abort if stream and stream.respond_to? :abort
269
269
  stream = obj_stream(options[:into])
270
270
  stream.abort if stream.respond_to? :abort
271
271
  raise $!
272
272
  rescue Aborted
273
- Log.medium "Aborted traversing #{stream_name(obj)}"
273
+ Log.medium{"Aborted traversing #{stream_name(obj)}"}
274
274
  stream = obj_stream(obj)
275
275
  stream.abort if stream and stream.respond_to? :abort
276
276
  stream = obj_stream(options[:into])
277
277
  stream.abort if stream.respond_to? :abort
278
- Log.medium "Aborted traversing 2 #{stream_name(obj)}"
278
+ Log.medium{"Aborted traversing 2 #{stream_name(obj)}"}
279
279
  rescue Exception
280
- Log.medium "Exception traversing #{stream_name(obj)}"
280
+ Log.medium{"Exception traversing #{stream_name(obj)}"}
281
281
  Log.exception $!
282
282
  stream = obj_stream(obj)
283
283
  stream.abort if stream and stream.respond_to? :abort
@@ -335,7 +335,7 @@ module TSV
335
335
  q.clean
336
336
  end
337
337
  rescue Interrupt, Aborted
338
- Log.medium "Aborted traversal in CPUs for #{stream_name(obj) || Misc.fingerprint(obj)}: #{$!.backtrace*","}"
338
+ Log.medium{"Aborted traversal in CPUs for #{stream_name(obj) || Misc.fingerprint(obj)}: #{$!.backtrace*","}"}
339
339
  q.abort
340
340
  stream = obj_stream(obj)
341
341
  stream.abort if stream.respond_to? :abort
@@ -24,13 +24,13 @@ class TestPersistTSV < Test::Unit::TestCase
24
24
  Misc.benchmark(1, "Build database with #{MAX - 2} entries") do
25
25
  db = TSV.open(file, :fields => [1], :persist => true, :persist_engine => engine, :persist_dir => tmp_file, :type => :single, :unnamed => true)
26
26
  end
27
- test = db.keys.sort{rand}[1..100000]
27
+ _test = db.keys.sort{rand}[1..100000]
28
28
  Misc.benchmark(5, "Access #{test.length} random entries") do
29
- test.each do |k| db[k] end
29
+ _test.each do |k| db[k] end
30
30
  end
31
31
  Log.info "Profiling access to #{test.length} random entries"
32
32
  Misc.profile :min_percent => 0.1 do
33
- test.each do |k| db[k] end
33
+ _test.each do |k| db[k] end
34
34
  end
35
35
  assert_equal "1:10611:G", db["rs189107123"]
36
36
  end
@@ -1,26 +1,38 @@
1
1
  require File.expand_path(File.dirname(__FILE__) + '/../../../test_helper')
2
2
  require 'test/unit'
3
+ require 'rbbt-util'
3
4
  require 'rbbt/persist/tsv'
4
5
 
5
6
  class TestSharder < Test::Unit::TestCase
6
7
  def test_shard
7
8
  TmpFile.with_file do |dir|
8
- sharder = Persist::Sharder.new dir, true, :float_array, 'HDB' do |key|
9
+ shard_function = Proc.new do |key|
9
10
  key[-1]
10
11
  end
11
12
 
12
- keys = []
13
- size = 1_000_000
14
- Misc.benchmark(2) do
15
- sharder.write_and_read do
16
- size.times do |v|
17
- keys << v.to_s
18
- sharder[v.to_s] = [v, v*2]
19
- end
13
+ size = 10
14
+ sharder = Persist.persist_tsv(nil, "ShardTest", {}, :file => dir, :shard_function => shard_function, :persist => true, :serializer => :float_array) do |db|
15
+ size.times do |v|
16
+ db[v.to_s] = [v, v*2]
20
17
  end
18
+ db
19
+ end
20
+ assert_equal dir, sharder.persistence_path
21
+ assert_equal size, sharder.keys.length
22
+
23
+ assert_equal [2,4], sharder["2"]
24
+ count = 0
25
+ sharder.through do |k,v|
26
+ count += 1
27
+ end
28
+ assert_equal count, size
21
29
 
22
- assert_equal size, sharder.keys.length
30
+ sharder = Persist::Sharder.new dir do |key|
31
+ key[-1]
23
32
  end
33
+
34
+ assert_equal size, sharder.keys.length
35
+
24
36
  end
25
37
  end
26
38
  end
@@ -519,4 +519,12 @@ row2 A AA AAA
519
519
  end
520
520
 
521
521
  end
522
+
523
+ def test_shard
524
+ shard_function = Proc.new do |key|
525
+ key[-1]
526
+ end
527
+ tsv = datafile_test('identifiers').tsv :persist => true, :shard_function => shard_function
528
+ assert_equal 10000, tsv.keys.length + 2
529
+ end
522
530
  end
data/test/test_helper.rb CHANGED
@@ -28,6 +28,6 @@ class Test::Unit::TestCase
28
28
  end
29
29
 
30
30
  def datafile_test(file)
31
- File.join(File.dirname(__FILE__), 'data', file)
31
+ Path.setup(File.join(File.dirname(__FILE__), 'data', file))
32
32
  end
33
33
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-util
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.13.23
4
+ version: 5.13.24
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-05-28 00:00:00.000000000 Z
11
+ date: 2014-05-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake