rbbt-util 5.13.23 → 5.13.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 716be8fb2d3ec4a9e9cc76482f96771856976191
4
- data.tar.gz: 958406db01b705360855b2a6bff88b0e0ff804b0
3
+ metadata.gz: 4cb02f1584dee7188f5a2bc771f733eb81c46f1a
4
+ data.tar.gz: 47850a9f40b69fc8cb761dbefea3aec895c1c955
5
5
  SHA512:
6
- metadata.gz: a3e9739aba63e67c6507ac8e7b31393d6eaae60dde59e5c0bea00faf3132b732a9f5248cb52e3d552c57834cd157258c2799f9993c2caff35d0a65f2c723548b
7
- data.tar.gz: 22aa1b9d063dbe6d6150e47e9949dd863ed0c7ba8cdaa90fbb1b61ea4a588ed696694a25daf5f1dfabf54e130339083e55c684a7e78ff37625596f92a8643199
6
+ metadata.gz: f3459787e3d4aa401917919e6a70bbb4c314477dbce5f9251f0723dbc905af70170f0e844b6d905fe9fd86a232acd1cc98553d3ed84ebca91743a06f0149610c
7
+ data.tar.gz: 2028a7f0dc8de1adfb4a69e54f139d4a2a329c6e094b4043618792314874e7ee4da08b72327912800d899cca258cccbf015dee6f10f13ffabb9283132f823929
@@ -28,8 +28,6 @@ rescue Exception
28
28
  Log.debug "The kyotocabinet gem could not be loaded. Persistance using this engine will fail."
29
29
  end
30
30
 
31
- require 'rbbt/persist/tsv/sharder'
32
-
33
31
  module Persist
34
32
  CONNECTIONS = {}
35
33
 
@@ -85,14 +83,23 @@ module Persist
85
83
 
86
84
  if is_persisted?(path) and not persist_options[:update]
87
85
  Log.debug "TSV persistence up-to-date: #{ path }"
88
- return open_database(path, false, nil, persist_options[:engine] || TokyoCabinet::HDB)
86
+ if persist_options[:shard_function]
87
+ return open_sharder(path, false, nil, persist_options[:engine], &persist_options[:shard_function])
88
+ else
89
+ return open_database(path, false, nil, persist_options[:engine] || TokyoCabinet::HDB)
90
+ end
89
91
  end
90
92
 
91
93
  Misc.lock lock_filename do
92
94
  begin
93
95
  if is_persisted?(path) and not persist_options[:update]
94
96
  Log.debug "TSV persistence (suddenly) up-to-date: #{ path }"
95
- return open_database(path, false, nil, persist_options[:engine] || TokyoCabinet::HDB)
97
+
98
+ if persist_options[:shard_function]
99
+ return open_sharder(path, false, nil, persist_options[:engine], &persist_options[:shard_function])
100
+ else
101
+ return open_database(path, false, nil, persist_options[:engine] || TokyoCabinet::HDB)
102
+ end
96
103
  end
97
104
 
98
105
  FileUtils.rm path if File.exists? path
@@ -101,8 +108,15 @@ module Persist
101
108
 
102
109
  tmp_path = path + '.persist'
103
110
 
104
- data = open_database(tmp_path, true, persist_options[:serializer], persist_options[:engine] || TokyoCabinet::HDB)
105
- data.serializer = :type if TSV === data and data.serializer.nil?
111
+ data = if persist_options[:shard_function]
112
+ open_sharder(tmp_path, true, persist_options[:serializer], persist_options[:engine], &persist_options[:shard_function])
113
+ else
114
+ open_database(tmp_path, true, persist_options[:serializer], persist_options[:engine] || TokyoCabinet::HDB)
115
+ end
116
+
117
+ if TSV === data and data.serializer.nil?
118
+ data.serializer = :type
119
+ end
106
120
 
107
121
  data.write_and_read do
108
122
  yield data
@@ -123,3 +137,6 @@ module Persist
123
137
  end
124
138
  end
125
139
  end
140
+
141
+ require 'rbbt/persist/tsv/sharder'
142
+
@@ -1,23 +1,66 @@
1
- require 'rbbt-util'
2
-
3
1
  module Persist
2
+ module SharderAdapter
3
+ def self.open(path, write, type=nil, &block)
4
+
5
+ database = CONNECTIONS[path] ||= Sharder.new(path, write, type, &block)
6
+
7
+ database.extend Persist::SharderAdapter unless Persist::SharderAdapter === database
8
+
9
+ database
10
+ end
11
+
12
+ end
13
+
4
14
  class Sharder
5
- attr_accessor :directory, :params, :shard_function, :databases, :closed, :writable, :mutex
15
+ attr_accessor :persistence_path, :shard_function, :databases, :closed, :writable, :mutex, :db_type
6
16
 
7
- def initialize(directory, *rest, &block)
17
+ def initialize(persistence_path, write = false, db_type=nil, &block)
8
18
  @shard_function = block
9
- @params = rest
10
- @databases = {}
11
- @directory = directory
19
+ @persistence_path = Path.setup(persistence_path)
12
20
  @mutex = Mutex.new
21
+ @writable = write
22
+ @db_type = db_type
23
+
24
+ if write
25
+ @databases = {}
26
+ end
27
+ end
28
+
29
+ def <<(key,value)
30
+ self[key] = value
31
+ end
32
+
33
+ def persistence_path=(path)
34
+ @persistence_path = path
35
+ end
36
+
37
+ def databases
38
+ @databases ||= begin
39
+ hash = {}
40
+ @persistence_path.glob('shard-*').each do |f|
41
+ shard = File.basename(f).match(/shard-(.*)/)[1]
42
+ hash[shard] = Persist.open_database(f, false, :clean, db_type)
43
+ end
44
+ hash
45
+ end
13
46
  end
14
47
 
15
48
  def database(key)
16
- shard = shard_function.call(key)
17
- databases[shard] ||= begin
18
- path = File.join(directory, 'shard-' << shard.to_s)
19
- Persist.open_database(path, *params)
20
- end
49
+ shard = key =~ /__tsv_/ ? "0" : shard_function.call(key)
50
+ if databases.include? shard
51
+ databases[shard]
52
+ else
53
+ database ||= begin
54
+ path = File.join(persistence_path, 'shard-' << shard.to_s)
55
+ (writable or File.exists?(path)) ? Persist.open_database(path, writable, :clean, db_type) : nil
56
+ end
57
+ if database
58
+ databases[shard] = database
59
+ else
60
+ Log.warn "Database #{ path } missing" if
61
+ nil
62
+ end
63
+ end
21
64
  end
22
65
 
23
66
  MAX_CHAR = 255.chr
@@ -69,13 +112,17 @@ module Persist
69
112
  end
70
113
 
71
114
  def each
72
- databases.each do |database|
115
+ databases.values.each do |database|
73
116
  database.each do |k,v|
74
117
  yield k, v
75
118
  end
76
119
  end
77
120
  end
78
121
 
122
+ def include?(key)
123
+ self[key] != nil
124
+ end
125
+
79
126
  def collect
80
127
  res = []
81
128
  each do |key, value|
@@ -89,7 +136,7 @@ module Persist
89
136
  end
90
137
 
91
138
  def write_and_read
92
- lock_filename = Persist.persistence_path(File.join(directory, 'write'), {:dir => TSV.lock_dir})
139
+ lock_filename = Persist.persistence_path(File.join(persistence_path, 'write'), {:dir => TSV.lock_dir})
93
140
  Misc.lock(lock_filename) do
94
141
  @mutex.synchronize do
95
142
  write if @closed or not write?
@@ -104,7 +151,7 @@ module Persist
104
151
  end
105
152
 
106
153
  def write_and_close
107
- lock_filename = Persist.persistence_path(File.join(directory, 'write'), {:dir => TSV.lock_dir})
154
+ lock_filename = Persist.persistence_path(File.join(persistence_path, 'write'), {:dir => TSV.lock_dir})
108
155
  Misc.lock(lock_filename) do
109
156
  @mutex.synchronize do
110
157
  write if @closed or not write?
@@ -137,15 +184,15 @@ module Persist
137
184
  end
138
185
 
139
186
  def keys
140
- databases.values.collect{|d| d.keys }.flatten
187
+ databases.values.collect{|d| d.keys }.flatten - TSV::ENTRY_KEYS.to_a
141
188
  end
142
189
 
143
- def []=(key, value)
144
- database(key)[key] = value
190
+ def []=(key, value, clean = false)
191
+ database(key).send(:[]=, key, value)
145
192
  end
146
193
 
147
- def [](key, value)
148
- database(key)[key]
194
+ def [](key, clean=false)
195
+ v = database(key).send(:[], key)
149
196
  end
150
197
 
151
198
  def <<(p)
@@ -165,4 +212,19 @@ module Persist
165
212
  databases.values.each{|database| database.close }
166
213
  end
167
214
  end
215
+
216
+ def self.open_sharder(path, write, serializer = nil, tokyocabinet_class = TokyoCabinet::HDB, &shard_function)
217
+ write = true unless File.exists? path
218
+
219
+ FileUtils.mkdir_p File.dirname(path) unless File.exists?(File.dirname(path))
220
+
221
+ database = Persist::SharderAdapter.open(path, write, tokyocabinet_class, &shard_function)
222
+
223
+ unless serializer == :clean
224
+ TSV.setup database
225
+ database.serializer = serializer if serializer
226
+ end
227
+
228
+ database
229
+ end
168
230
  end
@@ -5,8 +5,8 @@ module Persist
5
5
  module TCAdapter
6
6
  attr_accessor :persistence_path, :tokyocabinet_class, :closed, :writable, :mutex
7
7
 
8
- def self.open(path, write, tokyocabinet_class = TokyoCabinet::HDB)
9
- tokyocabinet_class = TokyoCabinet::HDB if tokyocabinet_class == "HDB"
8
+ def self.open(path, write, serializer, tokyocabinet_class = TokyoCabinet::HDB)
9
+ tokyocabinet_class = TokyoCabinet::HDB if tokyocabinet_class == "HDB" or tokyocabinet_class.nil?
10
10
  tokyocabinet_class = TokyoCabinet::BDB if tokyocabinet_class == "BDB"
11
11
 
12
12
  database = CONNECTIONS[path] ||= tokyocabinet_class.new
@@ -80,12 +80,6 @@ module Persist
80
80
  def read?
81
81
  ! write?
82
82
  end
83
- #def each
84
- # iterinit
85
- # while key = iternext
86
- # yield key, get(key)
87
- # end
88
- #end
89
83
 
90
84
  def collect
91
85
  res = []
@@ -164,7 +158,7 @@ module Persist
164
158
 
165
159
  FileUtils.mkdir_p File.dirname(path) unless File.exists?(File.dirname(path))
166
160
 
167
- database = Persist::TCAdapter.open(path, write, tokyocabinet_class)
161
+ database = Persist::TCAdapter.open(path, write, serializer, tokyocabinet_class)
168
162
 
169
163
  unless serializer == :clean
170
164
  TSV.setup database
@@ -243,7 +243,7 @@ module TSV
243
243
  next if ENTRY_KEYS.include? key
244
244
 
245
245
  # TODO Update this to be more efficient
246
- value = serializer_module.load(value) unless serializer_module.nil? or TSV::CleanSerializer == serializer_module
246
+ value = serializer_module.load(value) unless value.nil? or serializer_module.nil? or TSV::CleanSerializer == serializer_module
247
247
 
248
248
  # Annotated with Entity and NamedArray
249
249
  if not @unnamed
@@ -417,13 +417,15 @@ module TSV
417
417
  end
418
418
 
419
419
  def namespace=(value)
420
- self.send(:[]=, "__tsv_hash_namespace", value.nil? ? SERIALIZED_NIL : value.to_yaml, true)
420
+ self.send(:[]=, "__tsv_hash_namespace", value.nil? ? SERIALIZED_NIL : TSV::TSV_SERIALIZER.dump(value), true)
421
421
  @namespace = value
422
422
  @entity_options = nil
423
423
  end
424
424
 
425
425
  def fields=(value)
426
- self.send(:[]=, "__tsv_hash_fields", value.nil? ? SERIALIZED_NIL : value.to_yaml, true)
426
+ clean = true
427
+ value_ym = value.nil? ? SERIALIZED_NIL : TSV::TSV_SERIALIZER.dump(value)
428
+ self.send(:[]=, "__tsv_hash_fields", value_ym, clean)
427
429
  @fields = value
428
430
  @named_fields = nil
429
431
  end
@@ -54,7 +54,7 @@ module TSV
54
54
  def self.report(msg, obj, into)
55
55
  into = into[:into] if Hash === into and into.include? :into
56
56
 
57
- Log.medium "#{ msg } #{stream_name(obj)} -> #{stream_name(into)}"
57
+ Log.medium{"#{ msg } #{stream_name(obj)} -> #{stream_name(into)}"}
58
58
  end
59
59
 
60
60
  #{{{ TRAVERSE OBJECTS
@@ -135,7 +135,7 @@ module TSV
135
135
  callback, bar, join = Misc.process_options options, :callback, :bar, :join
136
136
  if File === io and io.closed?
137
137
  begin
138
- Log.medium "Rewinding stream #{stream_name(io)}"
138
+ Log.medium{"Rewinding stream #{stream_name(io)}"}
139
139
  io.reopen io.filename, "r"
140
140
  rescue
141
141
  Log.exception $!
@@ -168,7 +168,7 @@ module TSV
168
168
  callback, bar, join = Misc.process_options options, :callback, :bar, :join
169
169
  if File === io and io.closed?
170
170
  begin
171
- Log.medium "Rewinding stream #{stream_name(io)}"
171
+ Log.medium{"Rewinding stream #{stream_name(io)}"}
172
172
  io.reopen io.filename, "r"
173
173
  rescue
174
174
  Log.exception $!
@@ -192,7 +192,7 @@ module TSV
192
192
  options[:type] = :single
193
193
  end
194
194
 
195
- Log.medium "Traversing #{stream_name(obj)} #{Log.color :green, "->"} #{stream_name(options[:into])}"
195
+ Log.medium{"Traversing #{stream_name(obj)} #{Log.color :green, "->"} #{stream_name(options[:into])}"}
196
196
  begin
197
197
  case obj
198
198
  when TSV
@@ -256,28 +256,28 @@ module TSV
256
256
  raise "Unknown object for traversal: #{Misc.fingerprint obj }"
257
257
  end
258
258
  rescue IOError
259
- Log.medium "IOError traversing #{stream_name(obj)}: #{$!.message}"
259
+ Log.medium{"IOError traversing #{stream_name(obj)}: #{$!.message}"}
260
260
  stream = obj_stream(obj)
261
261
  stream.abort if stream and stream.respond_to? :abort
262
262
  stream = obj_stream(options[:into])
263
263
  stream.abort if stream.respond_to? :abort
264
264
  raise $!
265
265
  rescue Errno::EPIPE
266
- Log.medium "Pipe closed while traversing #{stream_name(obj)}: #{$!.message}"
266
+ Log.medium{"Pipe closed while traversing #{stream_name(obj)}: #{$!.message}"}
267
267
  stream = obj_stream(obj)
268
268
  stream.abort if stream and stream.respond_to? :abort
269
269
  stream = obj_stream(options[:into])
270
270
  stream.abort if stream.respond_to? :abort
271
271
  raise $!
272
272
  rescue Aborted
273
- Log.medium "Aborted traversing #{stream_name(obj)}"
273
+ Log.medium{"Aborted traversing #{stream_name(obj)}"}
274
274
  stream = obj_stream(obj)
275
275
  stream.abort if stream and stream.respond_to? :abort
276
276
  stream = obj_stream(options[:into])
277
277
  stream.abort if stream.respond_to? :abort
278
- Log.medium "Aborted traversing 2 #{stream_name(obj)}"
278
+ Log.medium{"Aborted traversing 2 #{stream_name(obj)}"}
279
279
  rescue Exception
280
- Log.medium "Exception traversing #{stream_name(obj)}"
280
+ Log.medium{"Exception traversing #{stream_name(obj)}"}
281
281
  Log.exception $!
282
282
  stream = obj_stream(obj)
283
283
  stream.abort if stream and stream.respond_to? :abort
@@ -335,7 +335,7 @@ module TSV
335
335
  q.clean
336
336
  end
337
337
  rescue Interrupt, Aborted
338
- Log.medium "Aborted traversal in CPUs for #{stream_name(obj) || Misc.fingerprint(obj)}: #{$!.backtrace*","}"
338
+ Log.medium{"Aborted traversal in CPUs for #{stream_name(obj) || Misc.fingerprint(obj)}: #{$!.backtrace*","}"}
339
339
  q.abort
340
340
  stream = obj_stream(obj)
341
341
  stream.abort if stream.respond_to? :abort
@@ -24,13 +24,13 @@ class TestPersistTSV < Test::Unit::TestCase
24
24
  Misc.benchmark(1, "Build database with #{MAX - 2} entries") do
25
25
  db = TSV.open(file, :fields => [1], :persist => true, :persist_engine => engine, :persist_dir => tmp_file, :type => :single, :unnamed => true)
26
26
  end
27
- test = db.keys.sort{rand}[1..100000]
27
+ _test = db.keys.sort{rand}[1..100000]
28
28
  Misc.benchmark(5, "Access #{test.length} random entries") do
29
- test.each do |k| db[k] end
29
+ _test.each do |k| db[k] end
30
30
  end
31
31
  Log.info "Profiling access to #{test.length} random entries"
32
32
  Misc.profile :min_percent => 0.1 do
33
- test.each do |k| db[k] end
33
+ _test.each do |k| db[k] end
34
34
  end
35
35
  assert_equal "1:10611:G", db["rs189107123"]
36
36
  end
@@ -1,26 +1,38 @@
1
1
  require File.expand_path(File.dirname(__FILE__) + '/../../../test_helper')
2
2
  require 'test/unit'
3
+ require 'rbbt-util'
3
4
  require 'rbbt/persist/tsv'
4
5
 
5
6
  class TestSharder < Test::Unit::TestCase
6
7
  def test_shard
7
8
  TmpFile.with_file do |dir|
8
- sharder = Persist::Sharder.new dir, true, :float_array, 'HDB' do |key|
9
+ shard_function = Proc.new do |key|
9
10
  key[-1]
10
11
  end
11
12
 
12
- keys = []
13
- size = 1_000_000
14
- Misc.benchmark(2) do
15
- sharder.write_and_read do
16
- size.times do |v|
17
- keys << v.to_s
18
- sharder[v.to_s] = [v, v*2]
19
- end
13
+ size = 10
14
+ sharder = Persist.persist_tsv(nil, "ShardTest", {}, :file => dir, :shard_function => shard_function, :persist => true, :serializer => :float_array) do |db|
15
+ size.times do |v|
16
+ db[v.to_s] = [v, v*2]
20
17
  end
18
+ db
19
+ end
20
+ assert_equal dir, sharder.persistence_path
21
+ assert_equal size, sharder.keys.length
22
+
23
+ assert_equal [2,4], sharder["2"]
24
+ count = 0
25
+ sharder.through do |k,v|
26
+ count += 1
27
+ end
28
+ assert_equal count, size
21
29
 
22
- assert_equal size, sharder.keys.length
30
+ sharder = Persist::Sharder.new dir do |key|
31
+ key[-1]
23
32
  end
33
+
34
+ assert_equal size, sharder.keys.length
35
+
24
36
  end
25
37
  end
26
38
  end
@@ -519,4 +519,12 @@ row2 A AA AAA
519
519
  end
520
520
 
521
521
  end
522
+
523
+ def test_shard
524
+ shard_function = Proc.new do |key|
525
+ key[-1]
526
+ end
527
+ tsv = datafile_test('identifiers').tsv :persist => true, :shard_function => shard_function
528
+ assert_equal 10000, tsv.keys.length + 2
529
+ end
522
530
  end
data/test/test_helper.rb CHANGED
@@ -28,6 +28,6 @@ class Test::Unit::TestCase
28
28
  end
29
29
 
30
30
  def datafile_test(file)
31
- File.join(File.dirname(__FILE__), 'data', file)
31
+ Path.setup(File.join(File.dirname(__FILE__), 'data', file))
32
32
  end
33
33
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-util
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.13.23
4
+ version: 5.13.24
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-05-28 00:00:00.000000000 Z
11
+ date: 2014-05-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake