purecdb 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +50 -0
- data/.rspec +2 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +36 -0
- data/LICENSE +21 -0
- data/Makefile +13 -0
- data/README.md +91 -0
- data/Rakefile +1 -0
- data/bin/console +14 -0
- data/bin/setup +7 -0
- data/lib/purecdb.rb +8 -0
- data/lib/purecdb/base.rb +171 -0
- data/lib/purecdb/reader.rb +153 -0
- data/lib/purecdb/version.rb +4 -0
- data/lib/purecdb/writer.rb +118 -0
- data/purecdb.gemspec +30 -0
- metadata +125 -0
data/.gitignore
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
*.gem
|
2
|
+
*.rbc
|
3
|
+
/.config
|
4
|
+
/coverage/
|
5
|
+
/InstalledFiles
|
6
|
+
/pkg/
|
7
|
+
/spec/reports/
|
8
|
+
/spec/examples.txt
|
9
|
+
/test/tmp/
|
10
|
+
/test/version_tmp/
|
11
|
+
/tmp/
|
12
|
+
|
13
|
+
# Used by dotenv library to load environment variables.
|
14
|
+
# .env
|
15
|
+
|
16
|
+
## Specific to RubyMotion:
|
17
|
+
.dat*
|
18
|
+
.repl_history
|
19
|
+
build/
|
20
|
+
*.bridgesupport
|
21
|
+
build-iPhoneOS/
|
22
|
+
build-iPhoneSimulator/
|
23
|
+
|
24
|
+
## Specific to RubyMotion (use of CocoaPods):
|
25
|
+
#
|
26
|
+
# We recommend against adding the Pods directory to your .gitignore. However
|
27
|
+
# you should judge for yourself, the pros and cons are mentioned at:
|
28
|
+
# https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control
|
29
|
+
#
|
30
|
+
# vendor/Pods/
|
31
|
+
|
32
|
+
## Documentation cache and generated files:
|
33
|
+
/.yardoc/
|
34
|
+
/_yardoc/
|
35
|
+
/doc/
|
36
|
+
/rdoc/
|
37
|
+
|
38
|
+
## Environment normalization:
|
39
|
+
/.bundle/
|
40
|
+
/vendor/bundle
|
41
|
+
/lib/bundler/man/
|
42
|
+
|
43
|
+
# for a library or gem, you might want to ignore these files since the code is
|
44
|
+
# intended to run in multiple environments; otherwise, check them in:
|
45
|
+
# Gemfile.lock
|
46
|
+
# .ruby-version
|
47
|
+
# .ruby-gemset
|
48
|
+
|
49
|
+
# unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
|
50
|
+
.rvmrc
|
data/.rspec
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
purecdb (0.1.0)
|
5
|
+
|
6
|
+
GEM
|
7
|
+
remote: https://rubygems.org/
|
8
|
+
specs:
|
9
|
+
diff-lcs (1.2.5)
|
10
|
+
json (1.8.3)
|
11
|
+
rake (10.4.2)
|
12
|
+
rdoc (4.2.2)
|
13
|
+
json (~> 1.4)
|
14
|
+
rspec (3.5.0)
|
15
|
+
rspec-core (~> 3.5.0)
|
16
|
+
rspec-expectations (~> 3.5.0)
|
17
|
+
rspec-mocks (~> 3.5.0)
|
18
|
+
rspec-core (3.5.0)
|
19
|
+
rspec-support (~> 3.5.0)
|
20
|
+
rspec-expectations (3.5.0)
|
21
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
22
|
+
rspec-support (~> 3.5.0)
|
23
|
+
rspec-mocks (3.5.0)
|
24
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
25
|
+
rspec-support (~> 3.5.0)
|
26
|
+
rspec-support (3.5.0)
|
27
|
+
|
28
|
+
PLATFORMS
|
29
|
+
ruby
|
30
|
+
|
31
|
+
DEPENDENCIES
|
32
|
+
bundler (~> 1.9)
|
33
|
+
purecdb!
|
34
|
+
rake (~> 10.0)
|
35
|
+
rdoc
|
36
|
+
rspec
|
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2016 Hokstad Consulting Ltd
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
data/Makefile
ADDED
data/README.md
ADDED
@@ -0,0 +1,91 @@
|
|
1
|
+
# PureCDB
|
2
|
+
|
3
|
+
A Pure Ruby CDB reader/writer w/64 bit extensions
|
4
|
+
|
5
|
+
For information about CDB, see: http://cr.yp.to/cdb.html
|
6
|
+
|
7
|
+
The motivation for writing this was:
|
8
|
+
|
9
|
+
* Bernstein's CDB format can only handle files up to 4GB. For a past project
|
10
|
+
we needed a simple CDB style file for datasets several times that.
|
11
|
+
|
12
|
+
* The C library is under a license that prevents us from releasing modified versions of it,
|
13
|
+
but the format is so simple that writing our own reader and writer was easy.
|
14
|
+
|
15
|
+
* We don't like depending on C extensions for Ruby code if we don't have to.
|
16
|
+
|
17
|
+
|
18
|
+
|
19
|
+
## Installation
|
20
|
+
|
21
|
+
Add this line to your application's Gemfile:
|
22
|
+
|
23
|
+
```ruby
|
24
|
+
gem 'purecdb'
|
25
|
+
```
|
26
|
+
|
27
|
+
And then execute:
|
28
|
+
|
29
|
+
$ bundle
|
30
|
+
|
31
|
+
Or install it yourself as:
|
32
|
+
|
33
|
+
$ gem install purecdb
|
34
|
+
|
35
|
+
## Basic Usage
|
36
|
+
|
37
|
+
To create a 32 bit (standard) CDB file:
|
38
|
+
|
39
|
+
```ruby
|
40
|
+
PureCDB::Writer.open("/tmp/somecdbfile.cdb") do |cdb|
|
41
|
+
cdb.add("key","value")
|
42
|
+
end
|
43
|
+
```
|
44
|
+
|
45
|
+
To instead create a 64 bit file, pass {mode: 64} as the second argument to PureCDB::Writer#open .
|
46
|
+
|
47
|
+
|
48
|
+
To read a 32 bit (standard) CDB file:
|
49
|
+
|
50
|
+
```ruby
|
51
|
+
PureCDB::Reader.open("/tmp/somecdbfile.cdb") do |r|
|
52
|
+
p r.values("key")
|
53
|
+
end
|
54
|
+
```
|
55
|
+
|
56
|
+
To instead create a 64 bit file, pass {mode: 64} as the second argument to PureCDB::Reader#open,
|
57
|
+
or let the reader auto-detect the format.
|
58
|
+
|
59
|
+
See PureCDB::Reader#new for additional usage.
|
60
|
+
|
61
|
+
|
62
|
+
## 64-bit Format
|
63
|
+
|
64
|
+
The 64 bit file format follows http://cr.yp.to/cdb/cdb.txt *except* that any
|
65
|
+
reference to 32-bit should be replaced by 64-bit, and that a 64 bit file
|
66
|
+
*ends* with the magic cookie "cdb64:01"
|
67
|
+
|
68
|
+
|
69
|
+
|
70
|
+
|
71
|
+
## Development
|
72
|
+
|
73
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run
|
74
|
+
`bin/console` for an interactive prompt that will allow you to experiment.
|
75
|
+
|
76
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To
|
77
|
+
release a new version, update the version number in `version.rb`, and then run
|
78
|
+
`bundle exec rake release` to create a git tag for the version, push git commits
|
79
|
+
and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
80
|
+
|
81
|
+
To run the Rspec tests, you need tinycdb or a command-line compatible implementation
|
82
|
+
installed for interoperability tests.
|
83
|
+
|
84
|
+
|
85
|
+
## Contributing
|
86
|
+
|
87
|
+
1. Fork it ( https://github.com/hokstadconsulting/purecdb/fork )
|
88
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
89
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
90
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
91
|
+
5. Create a new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "purecdb"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start
|
data/bin/setup
ADDED
data/lib/purecdb.rb
ADDED
data/lib/purecdb/base.rb
ADDED
@@ -0,0 +1,171 @@
|
|
1
|
+
module PureCDB
|
2
|
+
|
3
|
+
#
|
4
|
+
# Base class with shared functionality for PureCDB::Reader and
|
5
|
+
# PureCDB::Writer that abstracts away 32 bit vs. 64 bit format
|
6
|
+
# details. You should not need to use this directly.
|
7
|
+
#
|
8
|
+
class Base
|
9
|
+
# The CDB format contains 256 separate hashes by default.
|
10
|
+
DEFAULT_NUM_HASHES = 256
|
11
|
+
|
12
|
+
# Keys and values have a length indicator. In the standard format this is 4 bytes long.
|
13
|
+
# In the 64 bit format, this value is multiplied by 2.
|
14
|
+
DEFAULT_LENGTH_SIZE = 4
|
15
|
+
|
16
|
+
# The "pointer" (offset) pointing to a given hash is this many bytes by default. In the
|
17
|
+
# 64 bit format this is multiplied by 2.
|
18
|
+
DEFAULT_HASHPTR_SIZE = 4
|
19
|
+
|
20
|
+
# Number of bytes that will be buffered
|
21
|
+
BUFFER_SIZE = 4096
|
22
|
+
|
23
|
+
# Magic cookied used to indicate that this is a 64 bit (non-standard) CDB file
|
24
|
+
# rather than a 32-bit CDB file.
|
25
|
+
CDB64_MAGIC = "cdb64:01"
|
26
|
+
|
27
|
+
|
28
|
+
# The actual number of hashes (depends on 32 vs 64 bit)
|
29
|
+
attr_reader :num_hashes
|
30
|
+
|
31
|
+
# The actual number of bytes per length field (depends on 32 vs 64 bit)
|
32
|
+
attr_reader :length_size
|
33
|
+
|
34
|
+
# The actual number of bytes per hash pointer (depends on 32 vs 64 bit)
|
35
|
+
attr_reader :hashptr_size
|
36
|
+
|
37
|
+
# 32 for 32-bit files, 64 for 64-bit files.
|
38
|
+
attr_reader :mode
|
39
|
+
|
40
|
+
# The size of each hash slot
|
41
|
+
def hashref_size
|
42
|
+
hashptr_size + length_size
|
43
|
+
end
|
44
|
+
|
45
|
+
# The size of the table of pointers to the hashes
|
46
|
+
def hash_size
|
47
|
+
hashref_size * num_hashes
|
48
|
+
end
|
49
|
+
|
50
|
+
#
|
51
|
+
# Used by PureCDB::Reader and PureCDB::Writer to set 32/64 bit mode
|
52
|
+
#
|
53
|
+
def set_mode mode
|
54
|
+
@mode = mode
|
55
|
+
@num_hashes = DEFAULT_NUM_HASHES
|
56
|
+
if @mode == 64
|
57
|
+
@length_size = DEFAULT_LENGTH_SIZE * 2
|
58
|
+
@hashptr_size = DEFAULT_HASHPTR_SIZE * 2
|
59
|
+
else
|
60
|
+
@length_size = DEFAULT_LENGTH_SIZE
|
61
|
+
@hashptr_size = DEFAULT_HASHPTR_SIZE
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
#
|
66
|
+
# Parses options and sets mode. Do not call directly -
|
67
|
+
# Use PureCDB::Reader or PureCDB::Writer
|
68
|
+
#
|
69
|
+
def initialize *options
|
70
|
+
mode = :detect
|
71
|
+
options.each do |h|
|
72
|
+
h.each do |opt,val|
|
73
|
+
mode = val
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
# Used to speed up 64bit pack/unpack
|
78
|
+
@little_endian = [123456789].pack("L") == [123456789].pack("V")
|
79
|
+
|
80
|
+
if mode == :detect
|
81
|
+
@mode = :detect
|
82
|
+
else
|
83
|
+
set_mode(mode)
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
|
88
|
+
# Used by PureCDB::Reader and PureCDB::Writer to set an IO-like object
|
89
|
+
# to read from/write to
|
90
|
+
def set_stream target
|
91
|
+
if target.respond_to?(:sysseek)
|
92
|
+
@io = target
|
93
|
+
else
|
94
|
+
@io = SysIOWrapper.new(target)
|
95
|
+
end
|
96
|
+
@name = "<stream>"
|
97
|
+
end
|
98
|
+
|
99
|
+
|
100
|
+
# As per http://cr.yp.to/cdb/cdb.txt
|
101
|
+
def hash key
|
102
|
+
h = 5381 # Magic
|
103
|
+
key.to_s.each_byte do |c|
|
104
|
+
# We & it since Ruby uses big ints,
|
105
|
+
# so it can't overflow and need to be clamped.
|
106
|
+
|
107
|
+
# FIXME: For 64 bit version we use 64 bit numbers in the slots, so could increase this.
|
108
|
+
h = (((h << 5) + h) ^ c) & 0xffffffff
|
109
|
+
end
|
110
|
+
h
|
111
|
+
end
|
112
|
+
|
113
|
+
private
|
114
|
+
|
115
|
+
# Due to Array#pack's lack of a little/big endian specific 64 bit operator
|
116
|
+
def ary_pack(ary)
|
117
|
+
if @mode == 32
|
118
|
+
ary.pack("V*")
|
119
|
+
elsif @little_endian
|
120
|
+
ary.pack("Q*")
|
121
|
+
else
|
122
|
+
ary.collect {|a| [a & 0xffffffff, (a >> 32) & 0xffffffff] }.flatten.pack("V*")
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
# Due to String#unpack's lack of a little/big endian specific 64 bit operator
|
127
|
+
def ary_unpack(data, num)
|
128
|
+
if @mode == 32
|
129
|
+
data.unpack("V#{num}")
|
130
|
+
elsif @little_endian
|
131
|
+
data.unpack("Q#{num}")
|
132
|
+
else
|
133
|
+
ret = []
|
134
|
+
data = data.unpack("V#{num*2}")
|
135
|
+
data.each_slice(2) {|a| ret << (a[0] + (a[1] << 32)) }
|
136
|
+
ret
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
#
|
142
|
+
# Wrap an object that does not have all of +sysseek/syswrite/sysread+
|
143
|
+
# but that does have +seek/write/read+ with similar semantics. This is
|
144
|
+
# primarily intended for use with +StringIO+, which lacks +syseek+
|
145
|
+
#
|
146
|
+
# It is automatically interjected on creating a +PureCDB::Reader+ or
|
147
|
+
# +PureCDB::Writer if the IO-like object that is passed lacks +sysseek+
|
148
|
+
# so you should normally not need to think about this class
|
149
|
+
#
|
150
|
+
class SysIOWrapper
|
151
|
+
def initialize target
|
152
|
+
@target = target
|
153
|
+
end
|
154
|
+
|
155
|
+
# Delegates to +seek+
|
156
|
+
def sysseek(offset,mode)
|
157
|
+
@target.seek(offset,mode)
|
158
|
+
end
|
159
|
+
|
160
|
+
# Delegates to +read+
|
161
|
+
def sysread(size)
|
162
|
+
@target.read(size)
|
163
|
+
end
|
164
|
+
|
165
|
+
# Delegates to +write+
|
166
|
+
def syswrite(buffer)
|
167
|
+
@target.write(buffer)
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
end
|
@@ -0,0 +1,153 @@
|
|
1
|
+
module PureCDB
|
2
|
+
|
3
|
+
#
|
4
|
+
# Read 32 bit or 54 bit CDB file CDB files.
|
5
|
+
#
|
6
|
+
class Reader < Base
|
7
|
+
include Enumerable
|
8
|
+
|
9
|
+
# Open a CDB file for reading.
|
10
|
+
#
|
11
|
+
# :call-seq:
|
12
|
+
# r = PureCDB::Reader.new(file)
|
13
|
+
# r = PureCDB::Reader.new(file, options)
|
14
|
+
# PureCDB::Reader.new(file) {|r| ... }
|
15
|
+
# PureCDB::Reader.new(file, options) {|r| ... }
|
16
|
+
#
|
17
|
+
# +file+ can be a String or any object that meets the minimum
|
18
|
+
# requirements, which means having #sysseek, #sysopen to and #sysclos
|
19
|
+
# which does not arbitrarily sttop access ot it.
|
20
|
+
#
|
21
|
+
# If Mmap is available, the code will attempt to use it.
|
22
|
+
#
|
23
|
+
def initialize target, *options
|
24
|
+
if target.is_a?(String)
|
25
|
+
@name = target
|
26
|
+
@io = File.new(target,"rb")
|
27
|
+
raise "Unable to open file #{target}" if !@io
|
28
|
+
else
|
29
|
+
set_stream(target)
|
30
|
+
end
|
31
|
+
|
32
|
+
@io.sysseek(-8,IO::SEEK_END)
|
33
|
+
tail = @io.sysread(8)
|
34
|
+
raise "Unable to read trailing 8 bytes for magic cookie" if tail.size != 8
|
35
|
+
mode = tail == CDB64_MAGIC ? 64 : 32
|
36
|
+
|
37
|
+
super *options
|
38
|
+
if @mode == :detect
|
39
|
+
set_mode(mode)
|
40
|
+
elsif @mode != mode
|
41
|
+
raise "#{mode}bit mode detected in file; options request #{@mode}bit mode"
|
42
|
+
end
|
43
|
+
|
44
|
+
# FIXME: It seems like there are bugs triggered if mmap fails
|
45
|
+
@m = Mmap.new(target,"r", Mmap::MAP_SHARED) rescue nil
|
46
|
+
read_hashes
|
47
|
+
|
48
|
+
raise "Invalid File (Hashes are all empty)" if @hashes.uniq == [0]
|
49
|
+
|
50
|
+
if block_given?
|
51
|
+
yield(self)
|
52
|
+
close
|
53
|
+
else
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
#
|
58
|
+
# Shortcut for PureCDB::Reader.new(target,options) ..
|
59
|
+
#
|
60
|
+
def self.open(target, *options, &block)
|
61
|
+
Reader.new(target, *options, &block)
|
62
|
+
end
|
63
|
+
|
64
|
+
def close
|
65
|
+
@io.close if @io
|
66
|
+
@m.unmap if @m
|
67
|
+
@m = nil
|
68
|
+
@io = nil
|
69
|
+
end
|
70
|
+
|
71
|
+
def each
|
72
|
+
pos = hash_size
|
73
|
+
hoff0 = @hashes[0]
|
74
|
+
while pos < hoff0
|
75
|
+
key, value = *read_entry(pos)
|
76
|
+
yield(key,value)
|
77
|
+
pos += key.length + value.length + hashref_size
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def read_entry(pos)
|
82
|
+
keylen, datalen = read_header(pos)
|
83
|
+
return nil,nil if !keylen
|
84
|
+
pos += hashref_size
|
85
|
+
rkey = read(pos .. pos + keylen - 1)
|
86
|
+
pos += keylen
|
87
|
+
value = read(pos .. pos + datalen - 1)
|
88
|
+
return rkey, value
|
89
|
+
end
|
90
|
+
|
91
|
+
def values(key)
|
92
|
+
h = hash(key)
|
93
|
+
|
94
|
+
hoff = @hashes[(h % 256)*2]
|
95
|
+
hlen = @hashes[(h % 256)*2 + 1]
|
96
|
+
|
97
|
+
return [] if hlen == 0
|
98
|
+
off = (h / 256) % hlen
|
99
|
+
|
100
|
+
vals = []
|
101
|
+
|
102
|
+
# FIXME: Is this potentially an infinite loop (if full)?
|
103
|
+
# Easy to avoid by exiting if off reaches the same value twice.
|
104
|
+
|
105
|
+
while
|
106
|
+
(slot = read(hoff + off * hashref_size .. hoff + off * hashref_size + hashref_size - 1)) &&
|
107
|
+
(dslot = ary_unpack(slot,2)) && dslot[1] != 0
|
108
|
+
|
109
|
+
if dslot[0] == h
|
110
|
+
pos = dslot[1]
|
111
|
+
|
112
|
+
rkey, value = read_entry(pos)
|
113
|
+
if rkey == key
|
114
|
+
vals << value
|
115
|
+
end
|
116
|
+
end
|
117
|
+
off = (off + 1) % hlen
|
118
|
+
end
|
119
|
+
return vals
|
120
|
+
end
|
121
|
+
|
122
|
+
private
|
123
|
+
|
124
|
+
# Warning: This will be very slow if not mmap'd
|
125
|
+
def read r
|
126
|
+
@m = nil
|
127
|
+
if @m
|
128
|
+
res = @m[r]
|
129
|
+
return res if res
|
130
|
+
|
131
|
+
# Falling back on IO.read - mmap failed"
|
132
|
+
@m = nil
|
133
|
+
end
|
134
|
+
|
135
|
+
@io.sysseek(r.first, IO::SEEK_SET)
|
136
|
+
return @io.sysread(r.last-r.first+1)
|
137
|
+
end
|
138
|
+
|
139
|
+
def read_hashes
|
140
|
+
r = read(0..(hash_size-1))
|
141
|
+
raise "Unable to read hashes for '#{@name}' / #{@target.inspect}" if !r
|
142
|
+
@hashes = ary_unpack(r,num_hashes*2)
|
143
|
+
end
|
144
|
+
|
145
|
+
def read_header pos
|
146
|
+
data = read(pos .. pos+ hashref_size - 1)
|
147
|
+
return nil,nil if !data
|
148
|
+
keylen,datalen = ary_unpack(data,2)
|
149
|
+
raise "Too large" if keylen > 1048576 || datalen > 1048576 * 1024
|
150
|
+
return keylen, datalen
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
@@ -0,0 +1,118 @@
|
|
1
|
+
module PureCDB
|
2
|
+
class Writer < Base
|
3
|
+
# This just needs to be <= 1. The lower it is, the fewer records will collide. The closer to 1 it is,
|
4
|
+
# the more frequently the reader may have to engage in potentially lengthy (worst case scanning all the
|
5
|
+
# records) probing to find the right entry
|
6
|
+
def hash_fill_factor
|
7
|
+
0.7
|
8
|
+
end
|
9
|
+
|
10
|
+
def initialize target, *options
|
11
|
+
super *options
|
12
|
+
|
13
|
+
set_mode(32) if @mode == :detect
|
14
|
+
|
15
|
+
if target.is_a?(String)
|
16
|
+
@io = File.new(target,"wb")
|
17
|
+
else
|
18
|
+
set_stream(target)
|
19
|
+
end
|
20
|
+
|
21
|
+
@hashes = [nil] * num_hashes
|
22
|
+
|
23
|
+
@hashptrs = [0] * num_hashes * 2
|
24
|
+
write_hashptrs
|
25
|
+
|
26
|
+
@pos = hash_size
|
27
|
+
|
28
|
+
if block_given?
|
29
|
+
yield(self)
|
30
|
+
close
|
31
|
+
nil
|
32
|
+
else
|
33
|
+
self
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def close
|
38
|
+
write_hashes
|
39
|
+
write_hashptrs
|
40
|
+
@io.close if @io.respond_to?(:close)
|
41
|
+
end
|
42
|
+
|
43
|
+
# For compatibility w/cdb / CDBMaker
|
44
|
+
def store key,value
|
45
|
+
add key,value
|
46
|
+
end
|
47
|
+
|
48
|
+
def add key,value
|
49
|
+
# In an attempt to save memory, we pack the hash data we gather into
|
50
|
+
# strings of BER compressed integers...
|
51
|
+
|
52
|
+
h = hash(key)
|
53
|
+
hi = (h % num_hashes)
|
54
|
+
@hashes[hi] ||= ""
|
55
|
+
|
56
|
+
header = build_header(key.length, value.length)
|
57
|
+
@io.syswrite(header+key+value)
|
58
|
+
size = header.size + key.size + value.size
|
59
|
+
@hashes[hi] += [h,@pos].pack("ww") # BER compressed
|
60
|
+
|
61
|
+
@pos += size
|
62
|
+
end
|
63
|
+
|
64
|
+
def self.open target, *options, &block
|
65
|
+
Writer.new(target, *options, &block)
|
66
|
+
end
|
67
|
+
|
68
|
+
private
|
69
|
+
def write_hashes
|
70
|
+
@hashes.each_with_index do |h,i|
|
71
|
+
if !h || h.size == 0
|
72
|
+
@hashptrs[i*2] = @pos
|
73
|
+
@hashptrs[i*2+1] = 0
|
74
|
+
else
|
75
|
+
@hashptrs[i*2] = @pos
|
76
|
+
|
77
|
+
len = (h.size / hash_fill_factor).ceil
|
78
|
+
@hashptrs[i*2+1] = len
|
79
|
+
|
80
|
+
ary = [0] * len * 2
|
81
|
+
|
82
|
+
free_slots = len
|
83
|
+
h.unpack("w*").each_slice(2) do |entry|
|
84
|
+
raise "Oops; hash buffer too small (hash size: #{h.size}, buffer size: #{len})" if free_slots <= 0
|
85
|
+
|
86
|
+
hk = entry[0]
|
87
|
+
off = ((hk / num_hashes) % len).floor
|
88
|
+
while ary[off*2] != 0
|
89
|
+
off = (off + 1) % len
|
90
|
+
end
|
91
|
+
free_slots -= 1
|
92
|
+
ary[off*2] = entry[0]
|
93
|
+
ary[off*2+1] = entry[1]
|
94
|
+
end
|
95
|
+
size = ary.size / 2 * (length_size + hashptr_size)
|
96
|
+
|
97
|
+
write_hash_slots(ary)
|
98
|
+
@pos += size
|
99
|
+
end
|
100
|
+
end
|
101
|
+
@io.syswrite(CDB64_MAGIC) if self.mode == 64
|
102
|
+
end
|
103
|
+
|
104
|
+
def build_header key_length, value_length
|
105
|
+
ary_pack([key_length, value_length])
|
106
|
+
end
|
107
|
+
|
108
|
+
def write_hash_slots(ary)
|
109
|
+
@io.syswrite(ary_pack(ary))
|
110
|
+
end
|
111
|
+
|
112
|
+
def write_hashptrs
|
113
|
+
@io.sysseek(0,IO::SEEK_SET)
|
114
|
+
@io.syswrite(ary_pack(@hashptrs))
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
end
|
data/purecdb.gemspec
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
# -*- ruby -*-
|
2
|
+
# coding: utf-8
|
3
|
+
lib = File.expand_path('../lib', __FILE__)
|
4
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
5
|
+
require 'purecdb/version'
|
6
|
+
|
7
|
+
Gem::Specification.new do |spec|
|
8
|
+
spec.name = "purecdb"
|
9
|
+
spec.version = PureCDB::VERSION
|
10
|
+
spec.authors = ["Vidar Hokstad"]
|
11
|
+
spec.email = ["vidar@hokstadconsulting.com"]
|
12
|
+
|
13
|
+
spec.summary = %q{A Pure Ruby CDB reader/writer w/64 bit extensions}
|
14
|
+
spec.description = spec.summary
|
15
|
+
spec.homepage = "https://github.com/hokstadconsulting/purecdb"
|
16
|
+
|
17
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
18
|
+
spec.bindir = "exe"
|
19
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
20
|
+
spec.require_paths = ["lib"]
|
21
|
+
|
22
|
+
if spec.respond_to?(:metadata)
|
23
|
+
spec.metadata['allowed_push_host'] = "TODO: Set to 'http://mygemserver.com' to prevent pushes to rubygems.org, or delete to allow pushes to any server."
|
24
|
+
end
|
25
|
+
|
26
|
+
spec.add_development_dependency "bundler", "~> 1.9"
|
27
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
28
|
+
spec.add_development_dependency "rspec"
|
29
|
+
spec.add_development_dependency "rdoc"
|
30
|
+
end
|
metadata
ADDED
@@ -0,0 +1,125 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: purecdb
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Vidar Hokstad
|
9
|
+
autorequire:
|
10
|
+
bindir: exe
|
11
|
+
cert_chain: []
|
12
|
+
date: 2016-07-03 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: bundler
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ~>
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '1.9'
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ~>
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '1.9'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: rake
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ~>
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '10.0'
|
38
|
+
type: :development
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ~>
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '10.0'
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: rspec
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
type: :development
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: rdoc
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ! '>='
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
type: :development
|
71
|
+
prerelease: false
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
78
|
+
description: A Pure Ruby CDB reader/writer w/64 bit extensions
|
79
|
+
email:
|
80
|
+
- vidar@hokstadconsulting.com
|
81
|
+
executables: []
|
82
|
+
extensions: []
|
83
|
+
extra_rdoc_files: []
|
84
|
+
files:
|
85
|
+
- .gitignore
|
86
|
+
- .rspec
|
87
|
+
- Gemfile
|
88
|
+
- Gemfile.lock
|
89
|
+
- LICENSE
|
90
|
+
- Makefile
|
91
|
+
- README.md
|
92
|
+
- Rakefile
|
93
|
+
- bin/console
|
94
|
+
- bin/setup
|
95
|
+
- lib/purecdb.rb
|
96
|
+
- lib/purecdb/base.rb
|
97
|
+
- lib/purecdb/reader.rb
|
98
|
+
- lib/purecdb/version.rb
|
99
|
+
- lib/purecdb/writer.rb
|
100
|
+
- purecdb.gemspec
|
101
|
+
homepage: https://github.com/hokstadconsulting/purecdb
|
102
|
+
licenses: []
|
103
|
+
post_install_message:
|
104
|
+
rdoc_options: []
|
105
|
+
require_paths:
|
106
|
+
- lib
|
107
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
108
|
+
none: false
|
109
|
+
requirements:
|
110
|
+
- - ! '>='
|
111
|
+
- !ruby/object:Gem::Version
|
112
|
+
version: '0'
|
113
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
114
|
+
none: false
|
115
|
+
requirements:
|
116
|
+
- - ! '>='
|
117
|
+
- !ruby/object:Gem::Version
|
118
|
+
version: '0'
|
119
|
+
requirements: []
|
120
|
+
rubyforge_project:
|
121
|
+
rubygems_version: 1.8.23
|
122
|
+
signing_key:
|
123
|
+
specification_version: 3
|
124
|
+
summary: A Pure Ruby CDB reader/writer w/64 bit extensions
|
125
|
+
test_files: []
|