tuple 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 Justin Balthrop
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,32 @@
1
+ = Tuple
2
+
3
+ Tuple provides fast, binary-sortable serialization for arrays of simple Ruby types. This
4
+ means you do not have to deserialize your tuples to store them. This leads to significant
5
+ performance benifits when using Tuples as keys for a BTree.
6
+
7
+ A Tuple is just an Array of any number of simple Ruby types. The following types are
8
+ supported (listed in ascending sort order):
9
+
10
+ 1. NilClass
11
+ 2. FalseClass
12
+ 3. Integer (Fixnum or Bignum)
13
+ 4. String
14
+ 5. Symbol
15
+ 6. True
16
+
17
+ == Usage:
18
+
19
+ require 'tuple'
20
+
21
+ data = Tuple.dump([1, -43, :foo, "bar", true, false, nil])
22
+ => "\000\000\020\000\000\000\000\001..."
23
+ Tuple.load(data)
24
+ => [1, -43, :foo, "bar", true, false, nil]
25
+
26
+ == Install:
27
+
28
+ sudo gem install ninjudd-tuple -s http://gems.github.com
29
+
30
+ == License:
31
+
32
+ Copyright (c) 2009 Justin Balthrop, Geni.com; Published under The MIT License, see LICENSE
@@ -0,0 +1,2 @@
1
+ require 'mkmf'
2
+ create_makefile('tuple')
@@ -0,0 +1,235 @@
1
+ #include "ruby.h"
2
+ #include <netinet/in.h>
3
+
4
+ VALUE mTuple;
5
+ VALUE rb_cDate;
6
+
7
+ #define TRUE_SORT 255 // TrueClass
8
+ #define TUPLE_SORT 192 // Array
9
+ #define TUPLE_END 191 // For nested tuples
10
+ #define TIME_SORT 128 // Time
11
+ #define SYM_SORT 64 // Symbol
12
+ #define STR_SORT 32 // String
13
+ #define INTP_SORT 16 // Integer (Positive)
14
+ #define INTN_SORT 8 // Integer (Negative)
15
+ #define FALSE_SORT 1 // FalseClass
16
+ #define NIL_SORT 0 // NilClass
17
+
18
+ #define BDIGITS(x) ((BDIGIT*)RBIGNUM(x)->digits)
19
+
20
+ static void null_pad(VALUE data, int len) {
21
+ static u_int8_t null = 0;
22
+
23
+ // Pad with null bytes so subsequent fields will be aligned.
24
+ while (len % 4 != 0) {
25
+ rb_str_cat(data, (char*)&null, 1);
26
+ len++;
27
+ }
28
+ }
29
+
30
+
31
+ u_int32_t split64(int64_t num, int word) {
32
+ u_int32_t *split = (u_int32_t*)(void*)&num;
33
+
34
+ static int i = 1;
35
+ if (*(char *)&i == 1) word = word ? 1: 0;
36
+ else word = word ? 0: 1;
37
+
38
+ return split[word];
39
+ }
40
+
41
+
42
+
43
+ /*
44
+ * call-seq:
45
+ * Tuple.dump(tuple) -> string
46
+ *
47
+ * Dumps an array of simple Ruby types into a string of binary data.
48
+ *
49
+ */
50
+ static VALUE tuple_dump(VALUE self, VALUE tuple) {
51
+ VALUE data = rb_str_new2("");
52
+ VALUE item;
53
+ int i, j, len, sign;
54
+ u_int8_t header[4];
55
+ u_int32_t digit;
56
+ int64_t fixnum;
57
+ BDIGIT *digits;
58
+
59
+
60
+ if (TYPE(tuple) != T_ARRAY) tuple = rb_ary_new4(1, &tuple);
61
+
62
+ for (i = 0; i < RARRAY(tuple)->len; i++) {
63
+ item = RARRAY(tuple)->ptr[i];
64
+ header[0] = header[1] = header[2] = header[3] = 0;
65
+ if (FIXNUM_P(item)) {
66
+ fixnum = FIX2LONG(item);
67
+ sign = (fixnum >= 0);
68
+ if (!sign) fixnum = -fixnum;
69
+ len = fixnum > UINT_MAX ? 2 : 1;
70
+ header[2] = sign ? INTP_SORT : INTN_SORT;
71
+ header[3] = sign ? len : UCHAR_MAX - len;
72
+ rb_str_cat(data, (char*)&header, sizeof(header));
73
+
74
+ if (len == 2) {
75
+ digit = split64(fixnum, 1);
76
+ digit = htonl(sign ? digit : UINT_MAX - digit);
77
+ rb_str_cat(data, (char*)&digit, sizeof(digit));
78
+ }
79
+ digit = split64(fixnum, 0);
80
+ digit = htonl(sign ? digit : UINT_MAX - digit);
81
+ rb_str_cat(data, (char*)&digit, sizeof(digit));
82
+ } else if (TYPE(item) == T_BIGNUM) {
83
+ sign = RBIGNUM(item)->sign;
84
+ len = RBIGNUM(item)->len;
85
+ header[2] = sign ? INTP_SORT : INTN_SORT;
86
+ header[3] = sign ? len : UCHAR_MAX - len;
87
+ rb_str_cat(data, (char*)&header, sizeof(header));
88
+
89
+ digits = BDIGITS(item);
90
+ for (j = len-1; j >= 0; j--) {
91
+ digit = htonl(sign ? digits[j] : (UINT_MAX - digits[j]));
92
+ rb_str_cat(data, (char*)&digit, sizeof(digit));
93
+ }
94
+ } else if (SYMBOL_P(item) || TYPE(item) == T_STRING) {
95
+ if (SYMBOL_P(item)) {
96
+ header[2] = SYM_SORT;
97
+ item = rb_funcall(item, rb_intern("to_s"), 0);
98
+ } else {
99
+ header[2] = STR_SORT;
100
+ }
101
+ rb_str_cat(data, (char*)&header, sizeof(header));
102
+ len = RSTRING_LEN(item);
103
+ rb_str_cat(data, RSTRING_PTR(item), len);
104
+
105
+ null_pad(data, len);
106
+ } else if (rb_obj_class(item) == rb_cTime || rb_obj_class(item) == rb_cDate) {
107
+ header[2] = TIME_SORT;
108
+ rb_str_cat(data, (char*)&header, sizeof(header));
109
+
110
+ if (rb_obj_class(item) == rb_cTime) {
111
+ item = rb_funcall(item, rb_intern("getgm"), 0);
112
+ item = rb_funcall(item, rb_intern("strftime"), 1, rb_str_new2("%Y/%m/%d %H:%M:%S +0000"));
113
+ } else {
114
+ item = rb_funcall(item, rb_intern("strftime"), 1, rb_str_new2("%Y/%m/%d"));
115
+ }
116
+ len = RSTRING_LEN(item);
117
+ rb_str_cat(data, RSTRING_PTR(item), len);
118
+
119
+ null_pad(data, len);
120
+ } else if (TYPE(item) == T_ARRAY) {
121
+ header[2] = TUPLE_SORT;
122
+ rb_str_cat(data, (char*)&header, sizeof(header));
123
+
124
+ rb_str_concat(data, tuple_dump(mTuple, item));
125
+
126
+ header[2] = TUPLE_END;
127
+ rb_str_cat(data, (char*)&header, sizeof(header));
128
+ } else {
129
+ if (item == Qnil) header[2] = NIL_SORT;
130
+ else if (item == Qtrue) header[2] = TRUE_SORT;
131
+ else if (item == Qfalse) header[2] = FALSE_SORT;
132
+ else rb_raise(rb_eTypeError, "invalid type %s in tuple", rb_obj_classname(item));
133
+
134
+ rb_str_cat(data, (char*)&header, sizeof(header));
135
+ }
136
+ }
137
+ return data;
138
+ }
139
+
140
+ static VALUE empty_bignum(int sign, int len) {
141
+ /* Create an empty bignum with the right number of digits. */
142
+ NEWOBJ(num, struct RBignum);
143
+ OBJSETUP(num, rb_cBignum, T_BIGNUM);
144
+ num->sign = sign ? 1 : 0;
145
+ num->len = len;
146
+ num->digits = ALLOC_N(BDIGIT, len);
147
+
148
+ return (VALUE)num;
149
+ }
150
+
151
+ static VALUE tuple_parse(void **data, int data_len) {
152
+ VALUE tuple = rb_ary_new();
153
+ VALUE item;
154
+ void* ptr = *data; *data = &ptr;
155
+ void* end = ptr + data_len;
156
+ int i, len, sign;
157
+ u_int8_t header[4];
158
+ u_int32_t digit;
159
+ BDIGIT *digits;
160
+
161
+ while (ptr < end) {
162
+ memcpy(header, ptr, 4);
163
+ ptr += 4;
164
+
165
+ switch(header[2]) {
166
+ case TRUE_SORT: rb_ary_push(tuple, Qtrue); break;
167
+ case FALSE_SORT: rb_ary_push(tuple, Qfalse); break;
168
+ case NIL_SORT: rb_ary_push(tuple, Qnil); break;
169
+ case INTP_SORT:
170
+ case INTN_SORT:
171
+ sign = (header[2] == INTP_SORT);
172
+ len = sign ? header[3] : (UCHAR_MAX - header[3]);
173
+
174
+ item = empty_bignum(sign, len);
175
+ digits = BDIGITS(item);
176
+ for (i = len-1; i >= 0; i--) {
177
+ digit = ntohl(*(u_int32_t*)ptr);
178
+ digits[i] = sign ? digit : UINT_MAX - digit;
179
+ ptr += 4;
180
+ }
181
+ rb_ary_push(tuple, item);
182
+ break;
183
+ case STR_SORT:
184
+ case SYM_SORT:
185
+ item = rb_str_new2(ptr);
186
+ len = RSTRING_LEN(item);
187
+ if (header[2] == SYM_SORT) item = rb_funcall(item, rb_intern("to_sym"), 0);
188
+ rb_ary_push(tuple, item);
189
+ while (len % 4 != 0) len++; ptr += len;
190
+ break;
191
+ case TIME_SORT:
192
+ item = rb_str_new2(ptr);
193
+ len = RSTRING_LEN(item);
194
+ if (len == 10) item = rb_funcall(rb_cDate, rb_intern("parse"), 1, item);
195
+ else item = rb_funcall(rb_cTime, rb_intern("parse"), 1, item);
196
+ rb_ary_push(tuple, item);
197
+ while (len % 4 != 0) len++; ptr += len;
198
+ break;
199
+ case TUPLE_SORT:
200
+ item = tuple_parse(&ptr, end - ptr);
201
+ rb_ary_push(tuple, item);
202
+ break;
203
+ case TUPLE_END:
204
+ return tuple;
205
+ default:
206
+ rb_raise(rb_eTypeError, "invalid type code %d in tuple", header[2]);
207
+ break;
208
+ }
209
+ }
210
+ return tuple;
211
+ }
212
+
213
+ /*
214
+ * call-seq:
215
+ * Tuple.load(string) -> tuple
216
+ *
217
+ * Reads in a previously dumped tuple from a string of binary data.
218
+ *
219
+ */
220
+ static VALUE tuple_load(VALUE self, VALUE data) {
221
+ data = StringValue(data);
222
+ void* ptr = RSTRING_PTR(data);
223
+ return tuple_parse(&ptr, RSTRING_LEN(data));
224
+ }
225
+
226
+ VALUE mTuple;
227
+ void Init_tuple() {
228
+ rb_require("time");
229
+ rb_require("date");
230
+ rb_cDate = rb_const_get(rb_cObject, rb_intern("Date"));
231
+
232
+ mTuple = rb_define_module("Tuple");
233
+ rb_define_module_function(mTuple, "dump", tuple_dump, 1);
234
+ rb_define_module_function(mTuple, "load", tuple_load, 1);
235
+ }
@@ -0,0 +1,22 @@
1
+ require 'rubygems'
2
+ require 'test/unit'
3
+ require 'shoulda'
4
+ require 'mocha'
5
+
6
+ $LOAD_PATH.unshift(File.dirname(__FILE__) + '/../ext')
7
+ require 'tuple'
8
+
9
+ if not [].respond_to?(:shuffle)
10
+ class Array
11
+ def shuffle
12
+ t_self = self.dup
13
+ t_size = self.size
14
+ result=[]
15
+ t_size.times { result << t_self.slice!(rand(t_self.size)) }
16
+ result
17
+ end
18
+ end
19
+ end
20
+
21
+ class Test::Unit::TestCase
22
+ end
@@ -0,0 +1,96 @@
1
+ require File.dirname(__FILE__) + '/test_helper'
2
+
3
+ class Time
4
+ def ==(other)
5
+ # Ignore microseconds for testing.
6
+ to_i == other.to_i
7
+ end
8
+ end
9
+
10
+ class TupleTest < Test::Unit::TestCase
11
+ should "dump and load arrays of simple types" do
12
+ t = [1, true, :foo, "foo", -1001, false, nil, Time.now, Date.today - 7, [:foo, 1, 4, nil]]
13
+ assert_equal t, Tuple.load(Tuple.dump(t))
14
+ end
15
+
16
+ should "dump, load, and sort fixnums and bignums" do
17
+ t = [2**64, 2**38, 2**32, 2**32 - 1, 2**31, 2**31 - 1, 1, 0]
18
+ t = t + t.reverse.collect {|n| -n}
19
+ assert_equal t, Tuple.load(Tuple.dump(t))
20
+ assert_equal t.reverse, t.sort_by {|i| Tuple.dump(i)}
21
+ end
22
+
23
+ should "convert single value into array" do
24
+ assert_equal [1], Tuple.load(Tuple.dump(1))
25
+ end
26
+
27
+ should "dump times consistently" do
28
+ t = '2009-10-15 1:23:45 PM'
29
+ tuple = Tuple.dump(Time.parse(t))
30
+ 100000.times do
31
+ assert_equal tuple, Tuple.dump(Time.parse(t))
32
+ end
33
+ end
34
+
35
+ should "sort tuples using binary" do
36
+ now = Time.now.getgm
37
+ today = Date.parse(now.to_s)
38
+
39
+ tuples = [
40
+ [1, "foo"],
41
+ [1, true],
42
+ [2],
43
+ [1],
44
+ [nil],
45
+ [true],
46
+ [:foo, -1],
47
+ [:foo, -2**64],
48
+ [:foo, 2**64],
49
+ [1, "foo", 7, nil, false, true],
50
+ [1, "foo", 7, nil, false, false],
51
+ ["charles", "atlas"],
52
+ ["charles", "atlas", "shrugged"],
53
+ ["charles", "atlantic"],
54
+ ["charles", "atlas jr."],
55
+ ["charles", "atlas", "world's", "strongest", "man"],
56
+ ["charles", "atlas", 5],
57
+ [now, "foo"],
58
+ [now, "bar"],
59
+ [now - 24 * 60 * 60],
60
+ [today + 1],
61
+ [today - 1],
62
+ [today],
63
+ ]
64
+
65
+ expected = [
66
+ [nil],
67
+ [1],
68
+ [1, "foo"],
69
+ [1, "foo", 7, nil, false, false],
70
+ [1, "foo", 7, nil, false, true],
71
+ [1, true],
72
+ [2],
73
+ ["charles", "atlantic"],
74
+ ["charles", "atlas"],
75
+ ["charles", "atlas", 5],
76
+ ["charles", "atlas", "shrugged"],
77
+ ["charles", "atlas", "world's", "strongest", "man"],
78
+ ["charles", "atlas jr."],
79
+ [:foo, -18446744073709551616],
80
+ [:foo, -1],
81
+ [:foo, 18446744073709551616],
82
+ [today - 1],
83
+ [now - 24 * 60 * 60],
84
+ [today],
85
+ [now, "bar"],
86
+ [now, "foo"],
87
+ [today + 1],
88
+ [true]
89
+ ]
90
+ assert_equal expected, tuples.sort_by {|t| Tuple.dump(t)}
91
+
92
+ 100.times do
93
+ assert_equal expected, tuples.shuffle.sort_by {|t| Tuple.dump(t)}
94
+ end
95
+ end
96
+ end
metadata ADDED
@@ -0,0 +1,62 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: tuple
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ platform: ruby
6
+ authors:
7
+ - Justin Balthrop
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-11-20 00:00:00 -08:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description: Fast, binary-sortable serialization for arrays of simple Ruby types.
17
+ email: code@justinbalthrop.com
18
+ executables: []
19
+
20
+ extensions:
21
+ - ext/extconf.rb
22
+ extra_rdoc_files:
23
+ - LICENSE
24
+ - README.rdoc
25
+ files:
26
+ - README.rdoc
27
+ - ext/extconf.rb
28
+ - ext/tuple.c
29
+ - test/test_helper.rb
30
+ - test/tuple_test.rb
31
+ - LICENSE
32
+ has_rdoc: true
33
+ homepage: http://github.com/ninjudd/tuple
34
+ licenses: []
35
+
36
+ post_install_message:
37
+ rdoc_options:
38
+ - --charset=UTF-8
39
+ require_paths:
40
+ - ext
41
+ required_ruby_version: !ruby/object:Gem::Requirement
42
+ requirements:
43
+ - - ">="
44
+ - !ruby/object:Gem::Version
45
+ version: "0"
46
+ version:
47
+ required_rubygems_version: !ruby/object:Gem::Requirement
48
+ requirements:
49
+ - - ">="
50
+ - !ruby/object:Gem::Version
51
+ version: "0"
52
+ version:
53
+ requirements: []
54
+
55
+ rubyforge_project:
56
+ rubygems_version: 1.3.5
57
+ signing_key:
58
+ specification_version: 3
59
+ summary: Tuple serialization functions.
60
+ test_files:
61
+ - test/test_helper.rb
62
+ - test/tuple_test.rb