tuplex 0.1 → 0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b58ffe6b6580580163d4fe58df687ad5ea20e6de
4
- data.tar.gz: 5289b5307ea0788d2ad79ba50f8ee9b957f5b714
3
+ metadata.gz: 0afb0b081f7c4846c6aa138ea975f6ed30ab7e04
4
+ data.tar.gz: d6c419f04e8c802334eb27d3a1e9fb9f0325951d
5
5
  SHA512:
6
- metadata.gz: 652c10a64977a7b1a61b908b438615e47bd6f74c7db78e141bb6cb84552d51357dfb0078080cd8146fbbb048a95f7ff3ccbe6125d8f96761df2df4a9c550ff6d
7
- data.tar.gz: 7edc4d53d20e441c2feb42957e46e91de37e30aea649024743e59385e4b64df3723f1211c98a7e4b423d41aa73c9335401d72ad2727a5be4be293395ab799a40
6
+ metadata.gz: 25d656204c50d81c77fef4a49a570ada7c3960c8d1d441c879de35329e4151fa01a28d90a4a5f51e86cb98b187881efeab8001f0f943151b6334f405dcb64aaf
7
+ data.tar.gz: c290e67ad062ea39e214881a19b1385f6d36b802d013be527201d2c2261356f18b3212b36877ff217e16b57bac4433f3bd333914503fd9d6f127601b82aaf91a
data/README.md CHANGED
@@ -3,6 +3,57 @@ Tuplex
3
3
 
4
4
  Constructs index keys for tuples.
5
5
 
6
+ What is it for?
7
+ ---
8
+
9
+ 1. Assign keys to tuples (immutable value types) that do not have unique ids (primary keys). This can be used to store a tuplespace in a key-value store.
10
+
11
+ 2. Keys for similar tuples should be similar, to keep them close in storage, reducing cache misses and page faults.
12
+
13
+ 3. The key construction preserves ordering in such a way that the index can be used as an approximate secondary index for multidimensional range queries.
14
+
15
+ What does it do?
16
+ ---
17
+
18
+ Tuplex gives you a function that turns a tuple into a string that can be used as a key in a key-value store. The key is non-unique, so you'll need to use the value to disambiguate (see examples).
19
+
20
+ For tuples of a given _signature_ (same array sizes, map keys, type of each value, etc.), the function is _monotonic_ on each value. For example:
21
+
22
+ ["foo", 1, 2]
23
+ ["foo", 1, 3]
24
+
25
+ These two tuples have the same signature: three elements of types string, number, and number, respectively.
26
+
27
+ The index keys for these tuples are as follows:
28
+
29
+ >> Tuplex.make_key(["foo", 1, 2])
30
+ => "\x97\xB0kL\xA0\xC9\x00\xD1\x00hwn"
31
+ >> Tuplex.make_key(["foo", 1, 3])
32
+ => "\x97\xB0kL\xA0\xC9\x00\xD1\x00hwn\x00\b"
33
+ >> Tuplex.make_key(["foo", 1, 2]) < Tuplex.make_key(["foo", 1, 3])
34
+ => true
35
+
36
+ So, the ordering `2<3` is preserved in the key strings (lexically ordered).
37
+
38
+ This is also true when varying any number of terms, whether string or number:
39
+
40
+ >> Tuplex.make_key(["foo", 1, 2]) < Tuplex.make_key(["foozap", 7, 3])
41
+ => true
42
+
43
+ And it's true for arbitrary nesting:
44
+
45
+ >> Tuplex.make_key(["foo", {a: 1, b: [2]}]) < Tuplex.make_key(["foozap", {a: 7, b: [3]}])
46
+ => true
47
+
48
+ However, for tuples of different signatures, the ordering depends only on the signature and not on term values:
49
+
50
+ >> Tuplex.make_key(["a", 0]) < Tuplex.make_key([0, "a"])
51
+ => true
52
+ >> Tuplex.make_key(["z", 1000]) < Tuplex.make_key([0, "a"])
53
+ => true
54
+
55
+ In other words, all tuples of signature (String, Number) are contiguous in the index, and that contiguous group is separate from tuples of signature (Number, String).
56
+
6
57
  Contact
7
58
  =======
8
59
 
@@ -0,0 +1,31 @@
1
+ require 'tuplex'
2
+ require 'lmdb'
3
+ require 'tmpdir'
4
+
5
+ include Tuplex
6
+
7
+ dir = Dir.mktmpdir
8
+ env = LMDB.new dir
9
+ db = @db = env.database("tuples", create: true, dupsort: true)
10
+
11
+ def store t
12
+ @db[make_key(t)] = make_val(t)
13
+ end
14
+
15
+ (1..10).to_a.shuffle.each do |i|
16
+ store [1, 2, i] # same signature for each i
17
+ store a: "foo", b: i # same signature for each i, but different from above
18
+ store i => nil # different signature for each i
19
+ end
20
+
21
+ puts "\n\nIterating..."
22
+ db.each do |k,v|
23
+ p unpack_val(v)
24
+ end
25
+
26
+ puts "\n\nSearching..."
27
+ db.each do |k,v|
28
+ if val_equals_tuple(v, [1, 2, 5])
29
+ puts "found!"
30
+ end
31
+ end
@@ -0,0 +1,14 @@
1
+ require 'tuplex'
2
+ require 'lmdb'
3
+ require 'tmpdir'
4
+
5
+ include Tuplex
6
+
7
+ dir = Dir.mktmpdir
8
+ env = LMDB.new dir
9
+ db = env.database("tuples", create: true, dupsort: true)
10
+
11
+ t = {a: 1, b: 2}
12
+ db[make_key(t)] = make_val(t)
13
+
14
+ p db.map {|k,v| [k, unpack_val(v)]}
@@ -0,0 +1,98 @@
1
+ require 'msgpack'
2
+
3
+ module Tuplex
4
+ module_function
5
+
6
+ # +t+ can be a tuple or a value in a tuple (that is, an entry in
7
+ # an array or a value at some key in a hash).
8
+ def signature t
9
+ case t
10
+ when nil, true, false; 0
11
+ when Numeric; 1
12
+ when String, Symbol; 2
13
+ when Array
14
+ t.map {|v| signature(v)}
15
+ when Hash
16
+ t.each_with_object({}) {|(k,v), h| h[k] = signature(v)}
17
+ else raise ArgumentError, "cannot compute signature for #{t.inspect}"
18
+ end
19
+ end
20
+
21
+ SIG_KEY_SIZE = 8
22
+ def sig_key t
23
+ [signature(t).hash].pack("q")
24
+ end
25
+
26
+ def str_sum acc, s
27
+ a = acc.unpack("C*")
28
+ b = s.unpack("C*")
29
+ if a.size < b.size
30
+ a,b = b,a
31
+ end
32
+ cv = 0
33
+ s = []
34
+ a.zip(b).reverse_each do |av,bv|
35
+ bv ||= 0
36
+ cv,r = (av + bv + cv).divmod 256
37
+ s << r
38
+ end
39
+ if cv != 0
40
+ raise "overflow"
41
+ end
42
+ s.reverse.pack("C*")
43
+ end
44
+
45
+ # https://en.wikipedia.org/wiki/Double-precision_floating-point_format
46
+ # "%064b" % [-1.0].pack("G").unpack("Q>")
47
+ # [0b1011111111110000000000000000000000000000000000000000000000000000].pack("Q>").unpack("G")
48
+
49
+ def expo(x) ([x].pack("G")[0..1].unpack("S>")[0] & 0b0111111111110000) >> 4; end
50
+ def mant(x) [x].pack("G").unpack("Q>")[0] & 0x000FFFFFFFFFFFFF; end
51
+
52
+ def float_to_key x
53
+ if x >= 0
54
+ [1, expo(x), mant(x)].pack("CS>Q>") # sparse
55
+ else
56
+ [0, -expo(x), -mant(x)].pack("CS>Q>")
57
+ end
58
+ end
59
+ # def fk(x); "%064b" % float_to_key(x).unpack("Q>"); end
60
+
61
+ MAX_SUM_KEY_SIZE = 500
62
+ def sum_key t, acc = "\0\0"
63
+ case t
64
+ when nil; str_sum(acc, "\0\0")
65
+ when false; str_sum(acc, "\0\1")
66
+ when true; str_sum(acc, "\0\2")
67
+ when Numeric; str_sum(acc, "\0" + float_to_key(t.to_f))
68
+ when String; str_sum(acc, "\0" + t) # truncate here
69
+ when Symbol; str_sum(acc, "\0" + t.to_s) # and here
70
+ when Array; t.inject(acc) {|s,v| sum_key(v,s)}
71
+ when Hash; t.inject(acc) {|s,(k,v)| sum_key(v,s)}
72
+ else raise ArgumentError, "bad type: #{t.inspect}"
73
+ end
74
+ end
75
+
76
+ MAX_KEY_SIZE = SIG_KEY_SIZE + MAX_SUM_KEY_SIZE
77
+ # note: MDB_MAXKEYSIZE is 511
78
+
79
+ def make_key t
80
+ (sig_key(t) + sum_key(t))[0..MAX_KEY_SIZE].sub(/\0+\z/, "")
81
+ end
82
+
83
+ def make_val t
84
+ make_val_hash(t) + MessagePack.pack(t)
85
+ end
86
+
87
+ def make_val_hash t
88
+ [t.hash].pack("Q>")
89
+ end
90
+
91
+ def unpack_val s
92
+ MessagePack.unpack(s[8..-1])
93
+ end
94
+
95
+ def val_equals_tuple s, t, th = make_val_hash(t)
96
+ s[0..7] == th && unpack_val(s) == t
97
+ end
98
+ end
@@ -1,3 +1,3 @@
1
1
  module Tuplex
2
- VERSION = "0.1"
2
+ VERSION = "0.2"
3
3
  end
@@ -0,0 +1,54 @@
1
+ require 'minitest/autorun'
2
+ require 'tuplex'
3
+
4
+ class TestMonotonic < Minitest::Test
5
+ include Tuplex
6
+
7
+ def make_key_pairs vals, frame = proc {|x| [x]}
8
+ vals.sort.map { |x|
9
+ t = frame[x]
10
+ [t, make_key(t)]
11
+ }
12
+ end
13
+
14
+ def assert_monotonic pairs
15
+ pairs.each_cons(2) {|(t1,s1),(t2,s2)|
16
+ assert_operator(s1, :<, s2, "comparing #{t1.inspect} < #{t2.inspect}")
17
+ }
18
+ end
19
+
20
+ FRAMES = [
21
+ proc {|x| [x]},
22
+ proc {|x| {a: "foo", b: 123.456, c: [1, 2, x, 3, 4], d: 1.23}}
23
+ ]
24
+
25
+ def test_numerics
26
+ ints = [-2**62, -123456, -10, -1, 0, 1, 2, 11, 99999, 2**62]
27
+ floats = [
28
+ -1.0/0, -1.23e300, -4.56e10, -7.89e-200,
29
+ 1.23e-200, 4.56e10, 7.89e300, 1.0/0]
30
+
31
+ FRAMES.each do |frame|
32
+ assert_monotonic(make_key_pairs(ints+floats, frame))
33
+ end
34
+
35
+ assert_equal(make_key(0), make_key(0.0))
36
+ end
37
+
38
+ def test_strings
39
+ strs = ["", "a", "aa", "ab", "b", "bb"]
40
+
41
+ FRAMES.each do |frame|
42
+ assert_monotonic(make_key_pairs(strs, frame))
43
+ end
44
+
45
+ assert_equal(make_key(""), make_key("\0"))
46
+ end
47
+
48
+ def test_overflow
49
+ t = ["\xFF"] * 0x102
50
+ assert_raises(RuntimeError) {
51
+ make_key(t)
52
+ }
53
+ end
54
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tuplex
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.1'
4
+ version: '0.2'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Joel VanderWerf
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-09-01 00:00:00.000000000 Z
11
+ date: 2014-09-04 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Constructs index keys for tuples.
14
14
  email: vjoel@users.sourceforge.net
@@ -21,13 +21,17 @@ extra_rdoc_files:
21
21
  files:
22
22
  - COPYING
23
23
  - README.md
24
+ - examples/lmdb-sorted.rb
25
+ - examples/lmdb.rb
24
26
  - ext/isaac/extconf.rb
25
27
  - ext/isaac/isaac.c
26
28
  - ext/isaac/rand.c
27
29
  - ext/isaac/rand.h
28
30
  - ext/isaac/rand4.c
29
31
  - ext/isaac/rand4.h
32
+ - lib/tuplex.rb
30
33
  - lib/tuplex/version.rb
34
+ - test/test-monotonic.rb
31
35
  homepage: https://github.com/vjoel/tuplex
32
36
  licenses: []
33
37
  metadata: {}
@@ -59,5 +63,6 @@ rubygems_version: 2.4.1
59
63
  signing_key:
60
64
  specification_version: 4
61
65
  summary: Tuple index.
62
- test_files: []
66
+ test_files:
67
+ - test/test-monotonic.rb
63
68
  has_rdoc: