tuplex 0.1 → 0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +51 -0
- data/examples/lmdb-sorted.rb +31 -0
- data/examples/lmdb.rb +14 -0
- data/lib/tuplex.rb +98 -0
- data/lib/tuplex/version.rb +1 -1
- data/test/test-monotonic.rb +54 -0
- metadata +8 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0afb0b081f7c4846c6aa138ea975f6ed30ab7e04
|
4
|
+
data.tar.gz: d6c419f04e8c802334eb27d3a1e9fb9f0325951d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 25d656204c50d81c77fef4a49a570ada7c3960c8d1d441c879de35329e4151fa01a28d90a4a5f51e86cb98b187881efeab8001f0f943151b6334f405dcb64aaf
|
7
|
+
data.tar.gz: c290e67ad062ea39e214881a19b1385f6d36b802d013be527201d2c2261356f18b3212b36877ff217e16b57bac4433f3bd333914503fd9d6f127601b82aaf91a
|
data/README.md
CHANGED
@@ -3,6 +3,57 @@ Tuplex
|
|
3
3
|
|
4
4
|
Constructs index keys for tuples.
|
5
5
|
|
6
|
+
What is it for?
|
7
|
+
---
|
8
|
+
|
9
|
+
1. Assign keys to tuples (immutable value types) that do not have unique ids (primary keys). This can be used to store a tuplespace in a key-value store.
|
10
|
+
|
11
|
+
2. Keys for similar tuples should be similar, to keep them close in storage, reducing cache misses and page faults.
|
12
|
+
|
13
|
+
3. The key construction preserves ordering in such a way that the index can be used as an approximate secondary index for multidimensional range queries.
|
14
|
+
|
15
|
+
What does it do?
|
16
|
+
---
|
17
|
+
|
18
|
+
Tuplex gives you a function that turns a tuple into a string that can be used as a key in a key-value store. The key is non-unique, so you'll need to use the value to disambiguate (see examples).
|
19
|
+
|
20
|
+
For tuples of a given _signature_ (same array sizes, map keys, type of each value, etc.), the function is _monotonic_ on each value. For example:
|
21
|
+
|
22
|
+
["foo", 1, 2]
|
23
|
+
["foo", 1, 3]
|
24
|
+
|
25
|
+
These two tuples have the same signature: three elements of types string, number, and number, respectively.
|
26
|
+
|
27
|
+
The index keys for these tuples are as follows:
|
28
|
+
|
29
|
+
>> Tuplex.make_key(["foo", 1, 2])
|
30
|
+
=> "\x97\xB0kL\xA0\xC9\x00\xD1\x00hwn"
|
31
|
+
>> Tuplex.make_key(["foo", 1, 3])
|
32
|
+
=> "\x97\xB0kL\xA0\xC9\x00\xD1\x00hwn\x00\b"
|
33
|
+
>> Tuplex.make_key(["foo", 1, 2]) < Tuplex.make_key(["foo", 1, 3])
|
34
|
+
=> true
|
35
|
+
|
36
|
+
So, the ordering `2<3` is preserved in the key strings (lexically ordered).
|
37
|
+
|
38
|
+
This is also true when varying any number of terms, whether string or number:
|
39
|
+
|
40
|
+
>> Tuplex.make_key(["foo", 1, 2]) < Tuplex.make_key(["foozap", 7, 3])
|
41
|
+
=> true
|
42
|
+
|
43
|
+
And it's true for arbitrary nesting:
|
44
|
+
|
45
|
+
>> Tuplex.make_key(["foo", {a: 1, b: [2]}]) < Tuplex.make_key(["foozap", {a: 7, b: [3]}])
|
46
|
+
=> true
|
47
|
+
|
48
|
+
However, for tuples of different signatures, the ordering depends only on the signature and not on term values:
|
49
|
+
|
50
|
+
>> Tuplex.make_key(["a", 0]) < Tuplex.make_key([0, "a"])
|
51
|
+
=> true
|
52
|
+
>> Tuplex.make_key(["z", 1000]) < Tuplex.make_key([0, "a"])
|
53
|
+
=> true
|
54
|
+
|
55
|
+
In other words, all tuples of signature (String, Number) are contiguous in the index, and that contiguous group is separate from tuples of signature (Number, String).
|
56
|
+
|
6
57
|
Contact
|
7
58
|
=======
|
8
59
|
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require 'tuplex'
|
2
|
+
require 'lmdb'
|
3
|
+
require 'tmpdir'
|
4
|
+
|
5
|
+
include Tuplex
|
6
|
+
|
7
|
+
dir = Dir.mktmpdir
|
8
|
+
env = LMDB.new dir
|
9
|
+
db = @db = env.database("tuples", create: true, dupsort: true)
|
10
|
+
|
11
|
+
def store t
|
12
|
+
@db[make_key(t)] = make_val(t)
|
13
|
+
end
|
14
|
+
|
15
|
+
(1..10).to_a.shuffle.each do |i|
|
16
|
+
store [1, 2, i] # same signature for each i
|
17
|
+
store a: "foo", b: i # same signature for each i, but different from above
|
18
|
+
store i => nil # different signature for each i
|
19
|
+
end
|
20
|
+
|
21
|
+
puts "\n\nIterating..."
|
22
|
+
db.each do |k,v|
|
23
|
+
p unpack_val(v)
|
24
|
+
end
|
25
|
+
|
26
|
+
puts "\n\nSearching..."
|
27
|
+
db.each do |k,v|
|
28
|
+
if val_equals_tuple(v, [1, 2, 5])
|
29
|
+
puts "found!"
|
30
|
+
end
|
31
|
+
end
|
data/examples/lmdb.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
require 'tuplex'
|
2
|
+
require 'lmdb'
|
3
|
+
require 'tmpdir'
|
4
|
+
|
5
|
+
include Tuplex
|
6
|
+
|
7
|
+
dir = Dir.mktmpdir
|
8
|
+
env = LMDB.new dir
|
9
|
+
db = env.database("tuples", create: true, dupsort: true)
|
10
|
+
|
11
|
+
t = {a: 1, b: 2}
|
12
|
+
db[make_key(t)] = make_val(t)
|
13
|
+
|
14
|
+
p db.map {|k,v| [k, unpack_val(v)]}
|
data/lib/tuplex.rb
ADDED
@@ -0,0 +1,98 @@
|
|
1
|
+
require 'msgpack'
|
2
|
+
|
3
|
+
module Tuplex
|
4
|
+
module_function
|
5
|
+
|
6
|
+
# +t+ can be a tuple or a value in a tuple (that is, an entry in
|
7
|
+
# an array or a value at some key in a hash).
|
8
|
+
def signature t
|
9
|
+
case t
|
10
|
+
when nil, true, false; 0
|
11
|
+
when Numeric; 1
|
12
|
+
when String, Symbol; 2
|
13
|
+
when Array
|
14
|
+
t.map {|v| signature(v)}
|
15
|
+
when Hash
|
16
|
+
t.each_with_object({}) {|(k,v), h| h[k] = signature(v)}
|
17
|
+
else raise ArgumentError, "cannot compute signature for #{t.inspect}"
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
SIG_KEY_SIZE = 8
|
22
|
+
def sig_key t
|
23
|
+
[signature(t).hash].pack("q")
|
24
|
+
end
|
25
|
+
|
26
|
+
def str_sum acc, s
|
27
|
+
a = acc.unpack("C*")
|
28
|
+
b = s.unpack("C*")
|
29
|
+
if a.size < b.size
|
30
|
+
a,b = b,a
|
31
|
+
end
|
32
|
+
cv = 0
|
33
|
+
s = []
|
34
|
+
a.zip(b).reverse_each do |av,bv|
|
35
|
+
bv ||= 0
|
36
|
+
cv,r = (av + bv + cv).divmod 256
|
37
|
+
s << r
|
38
|
+
end
|
39
|
+
if cv != 0
|
40
|
+
raise "overflow"
|
41
|
+
end
|
42
|
+
s.reverse.pack("C*")
|
43
|
+
end
|
44
|
+
|
45
|
+
# https://en.wikipedia.org/wiki/Double-precision_floating-point_format
|
46
|
+
# "%064b" % [-1.0].pack("G").unpack("Q>")
|
47
|
+
# [0b1011111111110000000000000000000000000000000000000000000000000000].pack("Q>").unpack("G")
|
48
|
+
|
49
|
+
def expo(x) ([x].pack("G")[0..1].unpack("S>")[0] & 0b0111111111110000) >> 4; end
|
50
|
+
def mant(x) [x].pack("G").unpack("Q>")[0] & 0x000FFFFFFFFFFFFF; end
|
51
|
+
|
52
|
+
def float_to_key x
|
53
|
+
if x >= 0
|
54
|
+
[1, expo(x), mant(x)].pack("CS>Q>") # sparse
|
55
|
+
else
|
56
|
+
[0, -expo(x), -mant(x)].pack("CS>Q>")
|
57
|
+
end
|
58
|
+
end
|
59
|
+
# def fk(x); "%064b" % float_to_key(x).unpack("Q>"); end
|
60
|
+
|
61
|
+
MAX_SUM_KEY_SIZE = 500
|
62
|
+
def sum_key t, acc = "\0\0"
|
63
|
+
case t
|
64
|
+
when nil; str_sum(acc, "\0\0")
|
65
|
+
when false; str_sum(acc, "\0\1")
|
66
|
+
when true; str_sum(acc, "\0\2")
|
67
|
+
when Numeric; str_sum(acc, "\0" + float_to_key(t.to_f))
|
68
|
+
when String; str_sum(acc, "\0" + t) # truncate here
|
69
|
+
when Symbol; str_sum(acc, "\0" + t.to_s) # and here
|
70
|
+
when Array; t.inject(acc) {|s,v| sum_key(v,s)}
|
71
|
+
when Hash; t.inject(acc) {|s,(k,v)| sum_key(v,s)}
|
72
|
+
else raise ArgumentError, "bad type: #{t.inspect}"
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
MAX_KEY_SIZE = SIG_KEY_SIZE + MAX_SUM_KEY_SIZE
|
77
|
+
# note: MDB_MAXKEYSIZE is 511
|
78
|
+
|
79
|
+
def make_key t
|
80
|
+
(sig_key(t) + sum_key(t))[0..MAX_KEY_SIZE].sub(/\0+\z/, "")
|
81
|
+
end
|
82
|
+
|
83
|
+
def make_val t
|
84
|
+
make_val_hash(t) + MessagePack.pack(t)
|
85
|
+
end
|
86
|
+
|
87
|
+
def make_val_hash t
|
88
|
+
[t.hash].pack("Q>")
|
89
|
+
end
|
90
|
+
|
91
|
+
def unpack_val s
|
92
|
+
MessagePack.unpack(s[8..-1])
|
93
|
+
end
|
94
|
+
|
95
|
+
def val_equals_tuple s, t, th = make_val_hash(t)
|
96
|
+
s[0..7] == th && unpack_val(s) == t
|
97
|
+
end
|
98
|
+
end
|
data/lib/tuplex/version.rb
CHANGED
@@ -0,0 +1,54 @@
|
|
1
|
+
require 'minitest/autorun'
|
2
|
+
require 'tuplex'
|
3
|
+
|
4
|
+
class TestMonotonic < Minitest::Test
|
5
|
+
include Tuplex
|
6
|
+
|
7
|
+
def make_key_pairs vals, frame = proc {|x| [x]}
|
8
|
+
vals.sort.map { |x|
|
9
|
+
t = frame[x]
|
10
|
+
[t, make_key(t)]
|
11
|
+
}
|
12
|
+
end
|
13
|
+
|
14
|
+
def assert_monotonic pairs
|
15
|
+
pairs.each_cons(2) {|(t1,s1),(t2,s2)|
|
16
|
+
assert_operator(s1, :<, s2, "comparing #{t1.inspect} < #{t2.inspect}")
|
17
|
+
}
|
18
|
+
end
|
19
|
+
|
20
|
+
FRAMES = [
|
21
|
+
proc {|x| [x]},
|
22
|
+
proc {|x| {a: "foo", b: 123.456, c: [1, 2, x, 3, 4], d: 1.23}}
|
23
|
+
]
|
24
|
+
|
25
|
+
def test_numerics
|
26
|
+
ints = [-2**62, -123456, -10, -1, 0, 1, 2, 11, 99999, 2**62]
|
27
|
+
floats = [
|
28
|
+
-1.0/0, -1.23e300, -4.56e10, -7.89e-200,
|
29
|
+
1.23e-200, 4.56e10, 7.89e300, 1.0/0]
|
30
|
+
|
31
|
+
FRAMES.each do |frame|
|
32
|
+
assert_monotonic(make_key_pairs(ints+floats, frame))
|
33
|
+
end
|
34
|
+
|
35
|
+
assert_equal(make_key(0), make_key(0.0))
|
36
|
+
end
|
37
|
+
|
38
|
+
def test_strings
|
39
|
+
strs = ["", "a", "aa", "ab", "b", "bb"]
|
40
|
+
|
41
|
+
FRAMES.each do |frame|
|
42
|
+
assert_monotonic(make_key_pairs(strs, frame))
|
43
|
+
end
|
44
|
+
|
45
|
+
assert_equal(make_key(""), make_key("\0"))
|
46
|
+
end
|
47
|
+
|
48
|
+
def test_overflow
|
49
|
+
t = ["\xFF"] * 0x102
|
50
|
+
assert_raises(RuntimeError) {
|
51
|
+
make_key(t)
|
52
|
+
}
|
53
|
+
end
|
54
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tuplex
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.2'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Joel VanderWerf
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-09-
|
11
|
+
date: 2014-09-04 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Constructs index keys for tuples.
|
14
14
|
email: vjoel@users.sourceforge.net
|
@@ -21,13 +21,17 @@ extra_rdoc_files:
|
|
21
21
|
files:
|
22
22
|
- COPYING
|
23
23
|
- README.md
|
24
|
+
- examples/lmdb-sorted.rb
|
25
|
+
- examples/lmdb.rb
|
24
26
|
- ext/isaac/extconf.rb
|
25
27
|
- ext/isaac/isaac.c
|
26
28
|
- ext/isaac/rand.c
|
27
29
|
- ext/isaac/rand.h
|
28
30
|
- ext/isaac/rand4.c
|
29
31
|
- ext/isaac/rand4.h
|
32
|
+
- lib/tuplex.rb
|
30
33
|
- lib/tuplex/version.rb
|
34
|
+
- test/test-monotonic.rb
|
31
35
|
homepage: https://github.com/vjoel/tuplex
|
32
36
|
licenses: []
|
33
37
|
metadata: {}
|
@@ -59,5 +63,6 @@ rubygems_version: 2.4.1
|
|
59
63
|
signing_key:
|
60
64
|
specification_version: 4
|
61
65
|
summary: Tuple index.
|
62
|
-
test_files:
|
66
|
+
test_files:
|
67
|
+
- test/test-monotonic.rb
|
63
68
|
has_rdoc:
|