tuplex 0.1 → 0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +51 -0
- data/examples/lmdb-sorted.rb +31 -0
- data/examples/lmdb.rb +14 -0
- data/lib/tuplex.rb +98 -0
- data/lib/tuplex/version.rb +1 -1
- data/test/test-monotonic.rb +54 -0
- metadata +8 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0afb0b081f7c4846c6aa138ea975f6ed30ab7e04
|
4
|
+
data.tar.gz: d6c419f04e8c802334eb27d3a1e9fb9f0325951d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 25d656204c50d81c77fef4a49a570ada7c3960c8d1d441c879de35329e4151fa01a28d90a4a5f51e86cb98b187881efeab8001f0f943151b6334f405dcb64aaf
|
7
|
+
data.tar.gz: c290e67ad062ea39e214881a19b1385f6d36b802d013be527201d2c2261356f18b3212b36877ff217e16b57bac4433f3bd333914503fd9d6f127601b82aaf91a
|
data/README.md
CHANGED
@@ -3,6 +3,57 @@ Tuplex
|
|
3
3
|
|
4
4
|
Constructs index keys for tuples.
|
5
5
|
|
6
|
+
What is it for?
|
7
|
+
---
|
8
|
+
|
9
|
+
1. Assign keys to tuples (immutable value types) that do not have unique ids (primary keys). This can be used to store a tuplespace in a key-value store.
|
10
|
+
|
11
|
+
2. Keys for similar tuples should be similar, to keep them close in storage, reducing cache misses and page faults.
|
12
|
+
|
13
|
+
3. The key construction preserves ordering in such a way that the index can be used as an approximate secondary index for multidimensional range queries.
|
14
|
+
|
15
|
+
What does it do?
|
16
|
+
---
|
17
|
+
|
18
|
+
Tuplex gives you a function that turns a tuple into a string that can be used as a key in a key-value store. The key is non-unique, so you'll need to use the value to disambiguate (see examples).
|
19
|
+
|
20
|
+
For tuples of a given _signature_ (same array sizes, map keys, type of each value, etc.), the function is _monotonic_ on each value. For example:
|
21
|
+
|
22
|
+
["foo", 1, 2]
|
23
|
+
["foo", 1, 3]
|
24
|
+
|
25
|
+
These two tuples have the same signature: three elements of types string, number, and number, respectively.
|
26
|
+
|
27
|
+
The index keys for these tuples are as follows:
|
28
|
+
|
29
|
+
>> Tuplex.make_key(["foo", 1, 2])
|
30
|
+
=> "\x97\xB0kL\xA0\xC9\x00\xD1\x00hwn"
|
31
|
+
>> Tuplex.make_key(["foo", 1, 3])
|
32
|
+
=> "\x97\xB0kL\xA0\xC9\x00\xD1\x00hwn\x00\b"
|
33
|
+
>> Tuplex.make_key(["foo", 1, 2]) < Tuplex.make_key(["foo", 1, 3])
|
34
|
+
=> true
|
35
|
+
|
36
|
+
So, the ordering `2<3` is preserved in the key strings (lexically ordered).
|
37
|
+
|
38
|
+
This is also true when varying any number of terms, whether string or number:
|
39
|
+
|
40
|
+
>> Tuplex.make_key(["foo", 1, 2]) < Tuplex.make_key(["foozap", 7, 3])
|
41
|
+
=> true
|
42
|
+
|
43
|
+
And it's true for arbitrary nesting:
|
44
|
+
|
45
|
+
>> Tuplex.make_key(["foo", {a: 1, b: [2]}]) < Tuplex.make_key(["foozap", {a: 7, b: [3]}])
|
46
|
+
=> true
|
47
|
+
|
48
|
+
However, for tuples of different signatures, the ordering depends only on the signature and not on term values:
|
49
|
+
|
50
|
+
>> Tuplex.make_key(["a", 0]) < Tuplex.make_key([0, "a"])
|
51
|
+
=> true
|
52
|
+
>> Tuplex.make_key(["z", 1000]) < Tuplex.make_key([0, "a"])
|
53
|
+
=> true
|
54
|
+
|
55
|
+
In other words, all tuples of signature (String, Number) are contiguous in the index, and that contiguous group is separate from tuples of signature (Number, String).
|
56
|
+
|
6
57
|
Contact
|
7
58
|
=======
|
8
59
|
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require 'tuplex'
|
2
|
+
require 'lmdb'
|
3
|
+
require 'tmpdir'
|
4
|
+
|
5
|
+
include Tuplex
|
6
|
+
|
7
|
+
dir = Dir.mktmpdir
|
8
|
+
env = LMDB.new dir
|
9
|
+
db = @db = env.database("tuples", create: true, dupsort: true)
|
10
|
+
|
11
|
+
def store t
|
12
|
+
@db[make_key(t)] = make_val(t)
|
13
|
+
end
|
14
|
+
|
15
|
+
(1..10).to_a.shuffle.each do |i|
|
16
|
+
store [1, 2, i] # same signature for each i
|
17
|
+
store a: "foo", b: i # same signature for each i, but different from above
|
18
|
+
store i => nil # different signature for each i
|
19
|
+
end
|
20
|
+
|
21
|
+
puts "\n\nIterating..."
|
22
|
+
db.each do |k,v|
|
23
|
+
p unpack_val(v)
|
24
|
+
end
|
25
|
+
|
26
|
+
puts "\n\nSearching..."
|
27
|
+
db.each do |k,v|
|
28
|
+
if val_equals_tuple(v, [1, 2, 5])
|
29
|
+
puts "found!"
|
30
|
+
end
|
31
|
+
end
|
data/examples/lmdb.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
require 'tuplex'
|
2
|
+
require 'lmdb'
|
3
|
+
require 'tmpdir'
|
4
|
+
|
5
|
+
include Tuplex
|
6
|
+
|
7
|
+
dir = Dir.mktmpdir
|
8
|
+
env = LMDB.new dir
|
9
|
+
db = env.database("tuples", create: true, dupsort: true)
|
10
|
+
|
11
|
+
t = {a: 1, b: 2}
|
12
|
+
db[make_key(t)] = make_val(t)
|
13
|
+
|
14
|
+
p db.map {|k,v| [k, unpack_val(v)]}
|
data/lib/tuplex.rb
ADDED
@@ -0,0 +1,98 @@
|
|
1
|
+
require 'msgpack'
|
2
|
+
|
3
|
+
module Tuplex
|
4
|
+
module_function
|
5
|
+
|
6
|
+
# +t+ can be a tuple or a value in a tuple (that is, an entry in
|
7
|
+
# an array or a value at some key in a hash).
|
8
|
+
def signature t
|
9
|
+
case t
|
10
|
+
when nil, true, false; 0
|
11
|
+
when Numeric; 1
|
12
|
+
when String, Symbol; 2
|
13
|
+
when Array
|
14
|
+
t.map {|v| signature(v)}
|
15
|
+
when Hash
|
16
|
+
t.each_with_object({}) {|(k,v), h| h[k] = signature(v)}
|
17
|
+
else raise ArgumentError, "cannot compute signature for #{t.inspect}"
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
SIG_KEY_SIZE = 8
|
22
|
+
def sig_key t
|
23
|
+
[signature(t).hash].pack("q")
|
24
|
+
end
|
25
|
+
|
26
|
+
def str_sum acc, s
|
27
|
+
a = acc.unpack("C*")
|
28
|
+
b = s.unpack("C*")
|
29
|
+
if a.size < b.size
|
30
|
+
a,b = b,a
|
31
|
+
end
|
32
|
+
cv = 0
|
33
|
+
s = []
|
34
|
+
a.zip(b).reverse_each do |av,bv|
|
35
|
+
bv ||= 0
|
36
|
+
cv,r = (av + bv + cv).divmod 256
|
37
|
+
s << r
|
38
|
+
end
|
39
|
+
if cv != 0
|
40
|
+
raise "overflow"
|
41
|
+
end
|
42
|
+
s.reverse.pack("C*")
|
43
|
+
end
|
44
|
+
|
45
|
+
# https://en.wikipedia.org/wiki/Double-precision_floating-point_format
|
46
|
+
# "%064b" % [-1.0].pack("G").unpack("Q>")
|
47
|
+
# [0b1011111111110000000000000000000000000000000000000000000000000000].pack("Q>").unpack("G")
|
48
|
+
|
49
|
+
def expo(x) ([x].pack("G")[0..1].unpack("S>")[0] & 0b0111111111110000) >> 4; end
|
50
|
+
def mant(x) [x].pack("G").unpack("Q>")[0] & 0x000FFFFFFFFFFFFF; end
|
51
|
+
|
52
|
+
def float_to_key x
|
53
|
+
if x >= 0
|
54
|
+
[1, expo(x), mant(x)].pack("CS>Q>") # sparse
|
55
|
+
else
|
56
|
+
[0, -expo(x), -mant(x)].pack("CS>Q>")
|
57
|
+
end
|
58
|
+
end
|
59
|
+
# def fk(x); "%064b" % float_to_key(x).unpack("Q>"); end
|
60
|
+
|
61
|
+
MAX_SUM_KEY_SIZE = 500
|
62
|
+
def sum_key t, acc = "\0\0"
|
63
|
+
case t
|
64
|
+
when nil; str_sum(acc, "\0\0")
|
65
|
+
when false; str_sum(acc, "\0\1")
|
66
|
+
when true; str_sum(acc, "\0\2")
|
67
|
+
when Numeric; str_sum(acc, "\0" + float_to_key(t.to_f))
|
68
|
+
when String; str_sum(acc, "\0" + t) # truncate here
|
69
|
+
when Symbol; str_sum(acc, "\0" + t.to_s) # and here
|
70
|
+
when Array; t.inject(acc) {|s,v| sum_key(v,s)}
|
71
|
+
when Hash; t.inject(acc) {|s,(k,v)| sum_key(v,s)}
|
72
|
+
else raise ArgumentError, "bad type: #{t.inspect}"
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
MAX_KEY_SIZE = SIG_KEY_SIZE + MAX_SUM_KEY_SIZE
|
77
|
+
# note: MDB_MAXKEYSIZE is 511
|
78
|
+
|
79
|
+
def make_key t
|
80
|
+
(sig_key(t) + sum_key(t))[0..MAX_KEY_SIZE].sub(/\0+\z/, "")
|
81
|
+
end
|
82
|
+
|
83
|
+
def make_val t
|
84
|
+
make_val_hash(t) + MessagePack.pack(t)
|
85
|
+
end
|
86
|
+
|
87
|
+
def make_val_hash t
|
88
|
+
[t.hash].pack("Q>")
|
89
|
+
end
|
90
|
+
|
91
|
+
def unpack_val s
|
92
|
+
MessagePack.unpack(s[8..-1])
|
93
|
+
end
|
94
|
+
|
95
|
+
def val_equals_tuple s, t, th = make_val_hash(t)
|
96
|
+
s[0..7] == th && unpack_val(s) == t
|
97
|
+
end
|
98
|
+
end
|
data/lib/tuplex/version.rb
CHANGED
@@ -0,0 +1,54 @@
|
|
1
|
+
require 'minitest/autorun'
|
2
|
+
require 'tuplex'
|
3
|
+
|
4
|
+
class TestMonotonic < Minitest::Test
|
5
|
+
include Tuplex
|
6
|
+
|
7
|
+
def make_key_pairs vals, frame = proc {|x| [x]}
|
8
|
+
vals.sort.map { |x|
|
9
|
+
t = frame[x]
|
10
|
+
[t, make_key(t)]
|
11
|
+
}
|
12
|
+
end
|
13
|
+
|
14
|
+
def assert_monotonic pairs
|
15
|
+
pairs.each_cons(2) {|(t1,s1),(t2,s2)|
|
16
|
+
assert_operator(s1, :<, s2, "comparing #{t1.inspect} < #{t2.inspect}")
|
17
|
+
}
|
18
|
+
end
|
19
|
+
|
20
|
+
FRAMES = [
|
21
|
+
proc {|x| [x]},
|
22
|
+
proc {|x| {a: "foo", b: 123.456, c: [1, 2, x, 3, 4], d: 1.23}}
|
23
|
+
]
|
24
|
+
|
25
|
+
def test_numerics
|
26
|
+
ints = [-2**62, -123456, -10, -1, 0, 1, 2, 11, 99999, 2**62]
|
27
|
+
floats = [
|
28
|
+
-1.0/0, -1.23e300, -4.56e10, -7.89e-200,
|
29
|
+
1.23e-200, 4.56e10, 7.89e300, 1.0/0]
|
30
|
+
|
31
|
+
FRAMES.each do |frame|
|
32
|
+
assert_monotonic(make_key_pairs(ints+floats, frame))
|
33
|
+
end
|
34
|
+
|
35
|
+
assert_equal(make_key(0), make_key(0.0))
|
36
|
+
end
|
37
|
+
|
38
|
+
def test_strings
|
39
|
+
strs = ["", "a", "aa", "ab", "b", "bb"]
|
40
|
+
|
41
|
+
FRAMES.each do |frame|
|
42
|
+
assert_monotonic(make_key_pairs(strs, frame))
|
43
|
+
end
|
44
|
+
|
45
|
+
assert_equal(make_key(""), make_key("\0"))
|
46
|
+
end
|
47
|
+
|
48
|
+
def test_overflow
|
49
|
+
t = ["\xFF"] * 0x102
|
50
|
+
assert_raises(RuntimeError) {
|
51
|
+
make_key(t)
|
52
|
+
}
|
53
|
+
end
|
54
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tuplex
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.2'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Joel VanderWerf
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-09-
|
11
|
+
date: 2014-09-04 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Constructs index keys for tuples.
|
14
14
|
email: vjoel@users.sourceforge.net
|
@@ -21,13 +21,17 @@ extra_rdoc_files:
|
|
21
21
|
files:
|
22
22
|
- COPYING
|
23
23
|
- README.md
|
24
|
+
- examples/lmdb-sorted.rb
|
25
|
+
- examples/lmdb.rb
|
24
26
|
- ext/isaac/extconf.rb
|
25
27
|
- ext/isaac/isaac.c
|
26
28
|
- ext/isaac/rand.c
|
27
29
|
- ext/isaac/rand.h
|
28
30
|
- ext/isaac/rand4.c
|
29
31
|
- ext/isaac/rand4.h
|
32
|
+
- lib/tuplex.rb
|
30
33
|
- lib/tuplex/version.rb
|
34
|
+
- test/test-monotonic.rb
|
31
35
|
homepage: https://github.com/vjoel/tuplex
|
32
36
|
licenses: []
|
33
37
|
metadata: {}
|
@@ -59,5 +63,6 @@ rubygems_version: 2.4.1
|
|
59
63
|
signing_key:
|
60
64
|
specification_version: 4
|
61
65
|
summary: Tuple index.
|
62
|
-
test_files:
|
66
|
+
test_files:
|
67
|
+
- test/test-monotonic.rb
|
63
68
|
has_rdoc:
|