purecdb 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +50 -0
- data/.rspec +2 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +36 -0
- data/LICENSE +21 -0
- data/Makefile +13 -0
- data/README.md +91 -0
- data/Rakefile +1 -0
- data/bin/console +14 -0
- data/bin/setup +7 -0
- data/lib/purecdb.rb +8 -0
- data/lib/purecdb/base.rb +171 -0
- data/lib/purecdb/reader.rb +153 -0
- data/lib/purecdb/version.rb +4 -0
- data/lib/purecdb/writer.rb +118 -0
- data/purecdb.gemspec +30 -0
- metadata +125 -0
data/.gitignore
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
*.gem
|
2
|
+
*.rbc
|
3
|
+
/.config
|
4
|
+
/coverage/
|
5
|
+
/InstalledFiles
|
6
|
+
/pkg/
|
7
|
+
/spec/reports/
|
8
|
+
/spec/examples.txt
|
9
|
+
/test/tmp/
|
10
|
+
/test/version_tmp/
|
11
|
+
/tmp/
|
12
|
+
|
13
|
+
# Used by dotenv library to load environment variables.
|
14
|
+
# .env
|
15
|
+
|
16
|
+
## Specific to RubyMotion:
|
17
|
+
.dat*
|
18
|
+
.repl_history
|
19
|
+
build/
|
20
|
+
*.bridgesupport
|
21
|
+
build-iPhoneOS/
|
22
|
+
build-iPhoneSimulator/
|
23
|
+
|
24
|
+
## Specific to RubyMotion (use of CocoaPods):
|
25
|
+
#
|
26
|
+
# We recommend against adding the Pods directory to your .gitignore. However
|
27
|
+
# you should judge for yourself, the pros and cons are mentioned at:
|
28
|
+
# https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control
|
29
|
+
#
|
30
|
+
# vendor/Pods/
|
31
|
+
|
32
|
+
## Documentation cache and generated files:
|
33
|
+
/.yardoc/
|
34
|
+
/_yardoc/
|
35
|
+
/doc/
|
36
|
+
/rdoc/
|
37
|
+
|
38
|
+
## Environment normalization:
|
39
|
+
/.bundle/
|
40
|
+
/vendor/bundle
|
41
|
+
/lib/bundler/man/
|
42
|
+
|
43
|
+
# for a library or gem, you might want to ignore these files since the code is
|
44
|
+
# intended to run in multiple environments; otherwise, check them in:
|
45
|
+
# Gemfile.lock
|
46
|
+
# .ruby-version
|
47
|
+
# .ruby-gemset
|
48
|
+
|
49
|
+
# unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
|
50
|
+
.rvmrc
|
data/.rspec
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
purecdb (0.1.0)
|
5
|
+
|
6
|
+
GEM
|
7
|
+
remote: https://rubygems.org/
|
8
|
+
specs:
|
9
|
+
diff-lcs (1.2.5)
|
10
|
+
json (1.8.3)
|
11
|
+
rake (10.4.2)
|
12
|
+
rdoc (4.2.2)
|
13
|
+
json (~> 1.4)
|
14
|
+
rspec (3.5.0)
|
15
|
+
rspec-core (~> 3.5.0)
|
16
|
+
rspec-expectations (~> 3.5.0)
|
17
|
+
rspec-mocks (~> 3.5.0)
|
18
|
+
rspec-core (3.5.0)
|
19
|
+
rspec-support (~> 3.5.0)
|
20
|
+
rspec-expectations (3.5.0)
|
21
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
22
|
+
rspec-support (~> 3.5.0)
|
23
|
+
rspec-mocks (3.5.0)
|
24
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
25
|
+
rspec-support (~> 3.5.0)
|
26
|
+
rspec-support (3.5.0)
|
27
|
+
|
28
|
+
PLATFORMS
|
29
|
+
ruby
|
30
|
+
|
31
|
+
DEPENDENCIES
|
32
|
+
bundler (~> 1.9)
|
33
|
+
purecdb!
|
34
|
+
rake (~> 10.0)
|
35
|
+
rdoc
|
36
|
+
rspec
|
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2016 Hokstad Consulting Ltd
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
data/Makefile
ADDED
data/README.md
ADDED
@@ -0,0 +1,91 @@
|
|
1
|
+
# PureCDB
|
2
|
+
|
3
|
+
A Pure Ruby CDB reader/writer w/64 bit extensions
|
4
|
+
|
5
|
+
For information about CDB, see: http://cr.yp.to/cdb.html
|
6
|
+
|
7
|
+
The motivation for writing this was:
|
8
|
+
|
9
|
+
* Bernstein's CDB format can only handle files up to 4GB. For a past project
|
10
|
+
we needed a simple CDB style file for datasets several times that.
|
11
|
+
|
12
|
+
* The C library is under a license that prevents us from releasing modified versions of it,
|
13
|
+
but the format is so simple that writing our own reader and writer was easy.
|
14
|
+
|
15
|
+
* We don't like depending on C extensions for Ruby code if we don't have to.
|
16
|
+
|
17
|
+
|
18
|
+
|
19
|
+
## Installation
|
20
|
+
|
21
|
+
Add this line to your application's Gemfile:
|
22
|
+
|
23
|
+
```ruby
|
24
|
+
gem 'purecdb'
|
25
|
+
```
|
26
|
+
|
27
|
+
And then execute:
|
28
|
+
|
29
|
+
$ bundle
|
30
|
+
|
31
|
+
Or install it yourself as:
|
32
|
+
|
33
|
+
$ gem install purecdb
|
34
|
+
|
35
|
+
## Basic Usage
|
36
|
+
|
37
|
+
To create a 32 bit (standard) CDB file:
|
38
|
+
|
39
|
+
```ruby
|
40
|
+
PureCDB::Writer.open("/tmp/somecdbfile.cdb") do |cdb|
|
41
|
+
cdb.add("key","value")
|
42
|
+
end
|
43
|
+
```
|
44
|
+
|
45
|
+
To instead create a 64 bit file, pass {mode: 64} as the second argument to PureCDB::Writer#open .
|
46
|
+
|
47
|
+
|
48
|
+
To read a 32 bit (standard) CDB file:
|
49
|
+
|
50
|
+
```ruby
|
51
|
+
PureCDB::Reader.open("/tmp/somecdbfile.cdb") do |r|
|
52
|
+
p r.values("key")
|
53
|
+
end
|
54
|
+
```
|
55
|
+
|
56
|
+
To instead create a 64 bit file, pass {mode: 64} as the second argument to PureCDB::Reader#open,
|
57
|
+
or let the reader auto-detect the format.
|
58
|
+
|
59
|
+
See PureCDB::Reader#new for additional usage.
|
60
|
+
|
61
|
+
|
62
|
+
## 64-bit Format
|
63
|
+
|
64
|
+
The 64 bit file format follows http://cr.yp.to/cdb/cdb.txt *except* that any
|
65
|
+
reference to 32-bit should be replaced by 64-bit, and that a 64 bit file
|
66
|
+
*ends* with the magic cookie "cdb64:01"
|
67
|
+
|
68
|
+
|
69
|
+
|
70
|
+
|
71
|
+
## Development
|
72
|
+
|
73
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run
|
74
|
+
`bin/console` for an interactive prompt that will allow you to experiment.
|
75
|
+
|
76
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To
|
77
|
+
release a new version, update the version number in `version.rb`, and then run
|
78
|
+
`bundle exec rake release` to create a git tag for the version, push git commits
|
79
|
+
and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
80
|
+
|
81
|
+
To run the Rspec tests, you need tinycdb or a command-line compatible implementation
|
82
|
+
installed for interoperability tests.
|
83
|
+
|
84
|
+
|
85
|
+
## Contributing
|
86
|
+
|
87
|
+
1. Fork it ( https://github.com/hokstadconsulting/purecdb/fork )
|
88
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
89
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
90
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
91
|
+
5. Create a new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "purecdb"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start
|
data/bin/setup
ADDED
data/lib/purecdb.rb
ADDED
data/lib/purecdb/base.rb
ADDED
@@ -0,0 +1,171 @@
|
|
1
|
+
module PureCDB
|
2
|
+
|
3
|
+
#
|
4
|
+
# Base class with shared functionality for PureCDB::Reader and
|
5
|
+
# PureCDB::Writer that abstracts away 32 bit vs. 64 bit format
|
6
|
+
# details. You should not need to use this directly.
|
7
|
+
#
|
8
|
+
class Base
|
9
|
+
# The CDB format contains 256 separate hashes by default.
|
10
|
+
DEFAULT_NUM_HASHES = 256
|
11
|
+
|
12
|
+
# Keys and values have a length indicator. In the standard format this is 4 bytes long.
|
13
|
+
# In the 64 bit format, this value is multiplied by 2.
|
14
|
+
DEFAULT_LENGTH_SIZE = 4
|
15
|
+
|
16
|
+
# The "pointer" (offset) pointing to a given hash is this many bytes by default. In the
|
17
|
+
# 64 bit format this is multiplied by 2.
|
18
|
+
DEFAULT_HASHPTR_SIZE = 4
|
19
|
+
|
20
|
+
# Number of bytes that will be buffered
|
21
|
+
BUFFER_SIZE = 4096
|
22
|
+
|
23
|
+
# Magic cookied used to indicate that this is a 64 bit (non-standard) CDB file
|
24
|
+
# rather than a 32-bit CDB file.
|
25
|
+
CDB64_MAGIC = "cdb64:01"
|
26
|
+
|
27
|
+
|
28
|
+
# The actual number of hashes (depends on 32 vs 64 bit)
|
29
|
+
attr_reader :num_hashes
|
30
|
+
|
31
|
+
# The actual number of bytes per length field (depends on 32 vs 64 bit)
|
32
|
+
attr_reader :length_size
|
33
|
+
|
34
|
+
# The actual number of bytes per hash pointer (depends on 32 vs 64 bit)
|
35
|
+
attr_reader :hashptr_size
|
36
|
+
|
37
|
+
# 32 for 32-bit files, 64 for 64-bit files.
|
38
|
+
attr_reader :mode
|
39
|
+
|
40
|
+
# The size of each hash slot
|
41
|
+
def hashref_size
|
42
|
+
hashptr_size + length_size
|
43
|
+
end
|
44
|
+
|
45
|
+
# The size of the table of pointers to the hashes
|
46
|
+
def hash_size
|
47
|
+
hashref_size * num_hashes
|
48
|
+
end
|
49
|
+
|
50
|
+
#
|
51
|
+
# Used by PureCDB::Reader and PureCDB::Writer to set 32/64 bit mode
|
52
|
+
#
|
53
|
+
def set_mode mode
|
54
|
+
@mode = mode
|
55
|
+
@num_hashes = DEFAULT_NUM_HASHES
|
56
|
+
if @mode == 64
|
57
|
+
@length_size = DEFAULT_LENGTH_SIZE * 2
|
58
|
+
@hashptr_size = DEFAULT_HASHPTR_SIZE * 2
|
59
|
+
else
|
60
|
+
@length_size = DEFAULT_LENGTH_SIZE
|
61
|
+
@hashptr_size = DEFAULT_HASHPTR_SIZE
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
#
|
66
|
+
# Parses options and sets mode. Do not call directly -
|
67
|
+
# Use PureCDB::Reader or PureCDB::Writer
|
68
|
+
#
|
69
|
+
def initialize *options
|
70
|
+
mode = :detect
|
71
|
+
options.each do |h|
|
72
|
+
h.each do |opt,val|
|
73
|
+
mode = val
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
# Used to speed up 64bit pack/unpack
|
78
|
+
@little_endian = [123456789].pack("L") == [123456789].pack("V")
|
79
|
+
|
80
|
+
if mode == :detect
|
81
|
+
@mode = :detect
|
82
|
+
else
|
83
|
+
set_mode(mode)
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
|
88
|
+
# Used by PureCDB::Reader and PureCDB::Writer to set an IO-like object
|
89
|
+
# to read from/write to
|
90
|
+
def set_stream target
|
91
|
+
if target.respond_to?(:sysseek)
|
92
|
+
@io = target
|
93
|
+
else
|
94
|
+
@io = SysIOWrapper.new(target)
|
95
|
+
end
|
96
|
+
@name = "<stream>"
|
97
|
+
end
|
98
|
+
|
99
|
+
|
100
|
+
# As per http://cr.yp.to/cdb/cdb.txt
|
101
|
+
def hash key
|
102
|
+
h = 5381 # Magic
|
103
|
+
key.to_s.each_byte do |c|
|
104
|
+
# We & it since Ruby uses big ints,
|
105
|
+
# so it can't overflow and need to be clamped.
|
106
|
+
|
107
|
+
# FIXME: For 64 bit version we use 64 bit numbers in the slots, so could increase this.
|
108
|
+
h = (((h << 5) + h) ^ c) & 0xffffffff
|
109
|
+
end
|
110
|
+
h
|
111
|
+
end
|
112
|
+
|
113
|
+
private
|
114
|
+
|
115
|
+
# Due to Array#pack's lack of a little/big endian specific 64 bit operator
|
116
|
+
def ary_pack(ary)
|
117
|
+
if @mode == 32
|
118
|
+
ary.pack("V*")
|
119
|
+
elsif @little_endian
|
120
|
+
ary.pack("Q*")
|
121
|
+
else
|
122
|
+
ary.collect {|a| [a & 0xffffffff, (a >> 32) & 0xffffffff] }.flatten.pack("V*")
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
# Due to String#unpack's lack of a little/big endian specific 64 bit operator
|
127
|
+
def ary_unpack(data, num)
|
128
|
+
if @mode == 32
|
129
|
+
data.unpack("V#{num}")
|
130
|
+
elsif @little_endian
|
131
|
+
data.unpack("Q#{num}")
|
132
|
+
else
|
133
|
+
ret = []
|
134
|
+
data = data.unpack("V#{num*2}")
|
135
|
+
data.each_slice(2) {|a| ret << (a[0] + (a[1] << 32)) }
|
136
|
+
ret
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
#
|
142
|
+
# Wrap an object that does not have all of +sysseek/syswrite/sysread+
|
143
|
+
# but that does have +seek/write/read+ with similar semantics. This is
|
144
|
+
# primarily intended for use with +StringIO+, which lacks +syseek+
|
145
|
+
#
|
146
|
+
# It is automatically interjected on creating a +PureCDB::Reader+ or
|
147
|
+
# +PureCDB::Writer if the IO-like object that is passed lacks +sysseek+
|
148
|
+
# so you should normally not need to think about this class
|
149
|
+
#
|
150
|
+
class SysIOWrapper
|
151
|
+
def initialize target
|
152
|
+
@target = target
|
153
|
+
end
|
154
|
+
|
155
|
+
# Delegates to +seek+
|
156
|
+
def sysseek(offset,mode)
|
157
|
+
@target.seek(offset,mode)
|
158
|
+
end
|
159
|
+
|
160
|
+
# Delegates to +read+
|
161
|
+
def sysread(size)
|
162
|
+
@target.read(size)
|
163
|
+
end
|
164
|
+
|
165
|
+
# Delegates to +write+
|
166
|
+
def syswrite(buffer)
|
167
|
+
@target.write(buffer)
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
end
|
@@ -0,0 +1,153 @@
|
|
1
|
+
module PureCDB
|
2
|
+
|
3
|
+
#
|
4
|
+
# Read 32 bit or 54 bit CDB file CDB files.
|
5
|
+
#
|
6
|
+
class Reader < Base
|
7
|
+
include Enumerable
|
8
|
+
|
9
|
+
# Open a CDB file for reading.
|
10
|
+
#
|
11
|
+
# :call-seq:
|
12
|
+
# r = PureCDB::Reader.new(file)
|
13
|
+
# r = PureCDB::Reader.new(file, options)
|
14
|
+
# PureCDB::Reader.new(file) {|r| ... }
|
15
|
+
# PureCDB::Reader.new(file, options) {|r| ... }
|
16
|
+
#
|
17
|
+
# +file+ can be a String or any object that meets the minimum
|
18
|
+
# requirements, which means having #sysseek, #sysopen to and #sysclos
|
19
|
+
# which does not arbitrarily sttop access ot it.
|
20
|
+
#
|
21
|
+
# If Mmap is available, the code will attempt to use it.
|
22
|
+
#
|
23
|
+
def initialize target, *options
|
24
|
+
if target.is_a?(String)
|
25
|
+
@name = target
|
26
|
+
@io = File.new(target,"rb")
|
27
|
+
raise "Unable to open file #{target}" if !@io
|
28
|
+
else
|
29
|
+
set_stream(target)
|
30
|
+
end
|
31
|
+
|
32
|
+
@io.sysseek(-8,IO::SEEK_END)
|
33
|
+
tail = @io.sysread(8)
|
34
|
+
raise "Unable to read trailing 8 bytes for magic cookie" if tail.size != 8
|
35
|
+
mode = tail == CDB64_MAGIC ? 64 : 32
|
36
|
+
|
37
|
+
super *options
|
38
|
+
if @mode == :detect
|
39
|
+
set_mode(mode)
|
40
|
+
elsif @mode != mode
|
41
|
+
raise "#{mode}bit mode detected in file; options request #{@mode}bit mode"
|
42
|
+
end
|
43
|
+
|
44
|
+
# FIXME: It seems like there are bugs triggered if mmap fails
|
45
|
+
@m = Mmap.new(target,"r", Mmap::MAP_SHARED) rescue nil
|
46
|
+
read_hashes
|
47
|
+
|
48
|
+
raise "Invalid File (Hashes are all empty)" if @hashes.uniq == [0]
|
49
|
+
|
50
|
+
if block_given?
|
51
|
+
yield(self)
|
52
|
+
close
|
53
|
+
else
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
#
|
58
|
+
# Shortcut for PureCDB::Reader.new(target,options) ..
|
59
|
+
#
|
60
|
+
def self.open(target, *options, &block)
|
61
|
+
Reader.new(target, *options, &block)
|
62
|
+
end
|
63
|
+
|
64
|
+
def close
|
65
|
+
@io.close if @io
|
66
|
+
@m.unmap if @m
|
67
|
+
@m = nil
|
68
|
+
@io = nil
|
69
|
+
end
|
70
|
+
|
71
|
+
def each
|
72
|
+
pos = hash_size
|
73
|
+
hoff0 = @hashes[0]
|
74
|
+
while pos < hoff0
|
75
|
+
key, value = *read_entry(pos)
|
76
|
+
yield(key,value)
|
77
|
+
pos += key.length + value.length + hashref_size
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def read_entry(pos)
|
82
|
+
keylen, datalen = read_header(pos)
|
83
|
+
return nil,nil if !keylen
|
84
|
+
pos += hashref_size
|
85
|
+
rkey = read(pos .. pos + keylen - 1)
|
86
|
+
pos += keylen
|
87
|
+
value = read(pos .. pos + datalen - 1)
|
88
|
+
return rkey, value
|
89
|
+
end
|
90
|
+
|
91
|
+
def values(key)
|
92
|
+
h = hash(key)
|
93
|
+
|
94
|
+
hoff = @hashes[(h % 256)*2]
|
95
|
+
hlen = @hashes[(h % 256)*2 + 1]
|
96
|
+
|
97
|
+
return [] if hlen == 0
|
98
|
+
off = (h / 256) % hlen
|
99
|
+
|
100
|
+
vals = []
|
101
|
+
|
102
|
+
# FIXME: Is this potentially an infinite loop (if full)?
|
103
|
+
# Easy to avoid by exiting if off reaches the same value twice.
|
104
|
+
|
105
|
+
while
|
106
|
+
(slot = read(hoff + off * hashref_size .. hoff + off * hashref_size + hashref_size - 1)) &&
|
107
|
+
(dslot = ary_unpack(slot,2)) && dslot[1] != 0
|
108
|
+
|
109
|
+
if dslot[0] == h
|
110
|
+
pos = dslot[1]
|
111
|
+
|
112
|
+
rkey, value = read_entry(pos)
|
113
|
+
if rkey == key
|
114
|
+
vals << value
|
115
|
+
end
|
116
|
+
end
|
117
|
+
off = (off + 1) % hlen
|
118
|
+
end
|
119
|
+
return vals
|
120
|
+
end
|
121
|
+
|
122
|
+
private
|
123
|
+
|
124
|
+
# Warning: This will be very slow if not mmap'd
|
125
|
+
def read r
|
126
|
+
@m = nil
|
127
|
+
if @m
|
128
|
+
res = @m[r]
|
129
|
+
return res if res
|
130
|
+
|
131
|
+
# Falling back on IO.read - mmap failed"
|
132
|
+
@m = nil
|
133
|
+
end
|
134
|
+
|
135
|
+
@io.sysseek(r.first, IO::SEEK_SET)
|
136
|
+
return @io.sysread(r.last-r.first+1)
|
137
|
+
end
|
138
|
+
|
139
|
+
def read_hashes
|
140
|
+
r = read(0..(hash_size-1))
|
141
|
+
raise "Unable to read hashes for '#{@name}' / #{@target.inspect}" if !r
|
142
|
+
@hashes = ary_unpack(r,num_hashes*2)
|
143
|
+
end
|
144
|
+
|
145
|
+
def read_header pos
|
146
|
+
data = read(pos .. pos+ hashref_size - 1)
|
147
|
+
return nil,nil if !data
|
148
|
+
keylen,datalen = ary_unpack(data,2)
|
149
|
+
raise "Too large" if keylen > 1048576 || datalen > 1048576 * 1024
|
150
|
+
return keylen, datalen
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
@@ -0,0 +1,118 @@
|
|
1
|
+
module PureCDB
|
2
|
+
class Writer < Base
|
3
|
+
# This just needs to be <= 1. The lower it is, the fewer records will collide. The closer to 1 it is,
|
4
|
+
# the more frequently the reader may have to engage in potentially lengthy (worst case scanning all the
|
5
|
+
# records) probing to find the right entry
|
6
|
+
def hash_fill_factor
|
7
|
+
0.7
|
8
|
+
end
|
9
|
+
|
10
|
+
def initialize target, *options
|
11
|
+
super *options
|
12
|
+
|
13
|
+
set_mode(32) if @mode == :detect
|
14
|
+
|
15
|
+
if target.is_a?(String)
|
16
|
+
@io = File.new(target,"wb")
|
17
|
+
else
|
18
|
+
set_stream(target)
|
19
|
+
end
|
20
|
+
|
21
|
+
@hashes = [nil] * num_hashes
|
22
|
+
|
23
|
+
@hashptrs = [0] * num_hashes * 2
|
24
|
+
write_hashptrs
|
25
|
+
|
26
|
+
@pos = hash_size
|
27
|
+
|
28
|
+
if block_given?
|
29
|
+
yield(self)
|
30
|
+
close
|
31
|
+
nil
|
32
|
+
else
|
33
|
+
self
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def close
|
38
|
+
write_hashes
|
39
|
+
write_hashptrs
|
40
|
+
@io.close if @io.respond_to?(:close)
|
41
|
+
end
|
42
|
+
|
43
|
+
# For compatibility w/cdb / CDBMaker
|
44
|
+
def store key,value
|
45
|
+
add key,value
|
46
|
+
end
|
47
|
+
|
48
|
+
def add key,value
|
49
|
+
# In an attempt to save memory, we pack the hash data we gather into
|
50
|
+
# strings of BER compressed integers...
|
51
|
+
|
52
|
+
h = hash(key)
|
53
|
+
hi = (h % num_hashes)
|
54
|
+
@hashes[hi] ||= ""
|
55
|
+
|
56
|
+
header = build_header(key.length, value.length)
|
57
|
+
@io.syswrite(header+key+value)
|
58
|
+
size = header.size + key.size + value.size
|
59
|
+
@hashes[hi] += [h,@pos].pack("ww") # BER compressed
|
60
|
+
|
61
|
+
@pos += size
|
62
|
+
end
|
63
|
+
|
64
|
+
def self.open target, *options, &block
|
65
|
+
Writer.new(target, *options, &block)
|
66
|
+
end
|
67
|
+
|
68
|
+
private
|
69
|
+
def write_hashes
|
70
|
+
@hashes.each_with_index do |h,i|
|
71
|
+
if !h || h.size == 0
|
72
|
+
@hashptrs[i*2] = @pos
|
73
|
+
@hashptrs[i*2+1] = 0
|
74
|
+
else
|
75
|
+
@hashptrs[i*2] = @pos
|
76
|
+
|
77
|
+
len = (h.size / hash_fill_factor).ceil
|
78
|
+
@hashptrs[i*2+1] = len
|
79
|
+
|
80
|
+
ary = [0] * len * 2
|
81
|
+
|
82
|
+
free_slots = len
|
83
|
+
h.unpack("w*").each_slice(2) do |entry|
|
84
|
+
raise "Oops; hash buffer too small (hash size: #{h.size}, buffer size: #{len})" if free_slots <= 0
|
85
|
+
|
86
|
+
hk = entry[0]
|
87
|
+
off = ((hk / num_hashes) % len).floor
|
88
|
+
while ary[off*2] != 0
|
89
|
+
off = (off + 1) % len
|
90
|
+
end
|
91
|
+
free_slots -= 1
|
92
|
+
ary[off*2] = entry[0]
|
93
|
+
ary[off*2+1] = entry[1]
|
94
|
+
end
|
95
|
+
size = ary.size / 2 * (length_size + hashptr_size)
|
96
|
+
|
97
|
+
write_hash_slots(ary)
|
98
|
+
@pos += size
|
99
|
+
end
|
100
|
+
end
|
101
|
+
@io.syswrite(CDB64_MAGIC) if self.mode == 64
|
102
|
+
end
|
103
|
+
|
104
|
+
def build_header key_length, value_length
|
105
|
+
ary_pack([key_length, value_length])
|
106
|
+
end
|
107
|
+
|
108
|
+
def write_hash_slots(ary)
|
109
|
+
@io.syswrite(ary_pack(ary))
|
110
|
+
end
|
111
|
+
|
112
|
+
def write_hashptrs
|
113
|
+
@io.sysseek(0,IO::SEEK_SET)
|
114
|
+
@io.syswrite(ary_pack(@hashptrs))
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
end
|
data/purecdb.gemspec
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
# -*- ruby -*-
|
2
|
+
# coding: utf-8
|
3
|
+
lib = File.expand_path('../lib', __FILE__)
|
4
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
5
|
+
require 'purecdb/version'
|
6
|
+
|
7
|
+
Gem::Specification.new do |spec|
|
8
|
+
spec.name = "purecdb"
|
9
|
+
spec.version = PureCDB::VERSION
|
10
|
+
spec.authors = ["Vidar Hokstad"]
|
11
|
+
spec.email = ["vidar@hokstadconsulting.com"]
|
12
|
+
|
13
|
+
spec.summary = %q{A Pure Ruby CDB reader/writer w/64 bit extensions}
|
14
|
+
spec.description = spec.summary
|
15
|
+
spec.homepage = "https://github.com/hokstadconsulting/purecdb"
|
16
|
+
|
17
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
18
|
+
spec.bindir = "exe"
|
19
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
20
|
+
spec.require_paths = ["lib"]
|
21
|
+
|
22
|
+
if spec.respond_to?(:metadata)
|
23
|
+
spec.metadata['allowed_push_host'] = "TODO: Set to 'http://mygemserver.com' to prevent pushes to rubygems.org, or delete to allow pushes to any server."
|
24
|
+
end
|
25
|
+
|
26
|
+
spec.add_development_dependency "bundler", "~> 1.9"
|
27
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
28
|
+
spec.add_development_dependency "rspec"
|
29
|
+
spec.add_development_dependency "rdoc"
|
30
|
+
end
|
metadata
ADDED
@@ -0,0 +1,125 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: purecdb
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Vidar Hokstad
|
9
|
+
autorequire:
|
10
|
+
bindir: exe
|
11
|
+
cert_chain: []
|
12
|
+
date: 2016-07-03 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: bundler
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ~>
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '1.9'
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ~>
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '1.9'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: rake
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ~>
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '10.0'
|
38
|
+
type: :development
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ~>
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '10.0'
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: rspec
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
type: :development
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: rdoc
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ! '>='
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
type: :development
|
71
|
+
prerelease: false
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
78
|
+
description: A Pure Ruby CDB reader/writer w/64 bit extensions
|
79
|
+
email:
|
80
|
+
- vidar@hokstadconsulting.com
|
81
|
+
executables: []
|
82
|
+
extensions: []
|
83
|
+
extra_rdoc_files: []
|
84
|
+
files:
|
85
|
+
- .gitignore
|
86
|
+
- .rspec
|
87
|
+
- Gemfile
|
88
|
+
- Gemfile.lock
|
89
|
+
- LICENSE
|
90
|
+
- Makefile
|
91
|
+
- README.md
|
92
|
+
- Rakefile
|
93
|
+
- bin/console
|
94
|
+
- bin/setup
|
95
|
+
- lib/purecdb.rb
|
96
|
+
- lib/purecdb/base.rb
|
97
|
+
- lib/purecdb/reader.rb
|
98
|
+
- lib/purecdb/version.rb
|
99
|
+
- lib/purecdb/writer.rb
|
100
|
+
- purecdb.gemspec
|
101
|
+
homepage: https://github.com/hokstadconsulting/purecdb
|
102
|
+
licenses: []
|
103
|
+
post_install_message:
|
104
|
+
rdoc_options: []
|
105
|
+
require_paths:
|
106
|
+
- lib
|
107
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
108
|
+
none: false
|
109
|
+
requirements:
|
110
|
+
- - ! '>='
|
111
|
+
- !ruby/object:Gem::Version
|
112
|
+
version: '0'
|
113
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
114
|
+
none: false
|
115
|
+
requirements:
|
116
|
+
- - ! '>='
|
117
|
+
- !ruby/object:Gem::Version
|
118
|
+
version: '0'
|
119
|
+
requirements: []
|
120
|
+
rubyforge_project:
|
121
|
+
rubygems_version: 1.8.23
|
122
|
+
signing_key:
|
123
|
+
specification_version: 3
|
124
|
+
summary: A Pure Ruby CDB reader/writer w/64 bit extensions
|
125
|
+
test_files: []
|