oinky 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. data/LICENSE +22 -0
  2. data/README.md +141 -0
  3. data/ext/extconf.rb +79 -0
  4. data/ext/include/oinky.h +424 -0
  5. data/ext/include/oinky.hpp +63 -0
  6. data/ext/include/oinky/nky_base.hpp +1116 -0
  7. data/ext/include/oinky/nky_core.hpp +1603 -0
  8. data/ext/include/oinky/nky_cursor.hpp +665 -0
  9. data/ext/include/oinky/nky_dialect.hpp +107 -0
  10. data/ext/include/oinky/nky_error.hpp +164 -0
  11. data/ext/include/oinky/nky_fixed_table.hpp +710 -0
  12. data/ext/include/oinky/nky_handle.hpp +334 -0
  13. data/ext/include/oinky/nky_index.hpp +1038 -0
  14. data/ext/include/oinky/nky_log.hpp +15 -0
  15. data/ext/include/oinky/nky_merge_itr.hpp +403 -0
  16. data/ext/include/oinky/nky_model.hpp +110 -0
  17. data/ext/include/oinky/nky_pool.hpp +760 -0
  18. data/ext/include/oinky/nky_public.hpp +808 -0
  19. data/ext/include/oinky/nky_serializer.hpp +1625 -0
  20. data/ext/include/oinky/nky_strtable.hpp +504 -0
  21. data/ext/include/oinky/nky_table.hpp +1996 -0
  22. data/ext/nky_lib.cpp +390 -0
  23. data/ext/nky_lib_core.hpp +212 -0
  24. data/ext/nky_lib_index.cpp +158 -0
  25. data/ext/nky_lib_table.cpp +224 -0
  26. data/lib/oinky.rb +1284 -0
  27. data/lib/oinky/compiler.rb +106 -0
  28. data/lib/oinky/cpp_emitter.rb +311 -0
  29. data/lib/oinky/dsl.rb +167 -0
  30. data/lib/oinky/error.rb +19 -0
  31. data/lib/oinky/modelbase.rb +12 -0
  32. data/lib/oinky/nbuffer.rb +152 -0
  33. data/lib/oinky/normalize.rb +132 -0
  34. data/lib/oinky/oc_builder.rb +44 -0
  35. data/lib/oinky/query.rb +193 -0
  36. data/lib/oinky/rb_emitter.rb +147 -0
  37. data/lib/oinky/shard.rb +40 -0
  38. data/lib/oinky/testsup.rb +104 -0
  39. data/lib/oinky/version.rb +9 -0
  40. data/oinky.gemspec +36 -0
  41. metadata +120 -0
@@ -0,0 +1,504 @@
1
+ // This source is distributed under the terms of the MIT License. Refer
2
+ // to the 'LICENSE' file for details.
3
+ //
4
+ // Copyright (c) Jacob Lacouture, 2012
5
+
6
+ namespace Oinky
7
+ {
8
+ namespace Internal
9
+ {
10
+ using namespace Oinky::Errors;
11
+
12
+
13
+ //
14
+ // The stringtable is a single-instance store for all strings. String
15
+ // references are just indexes into the table. The table is sorted so that
16
+ // string references collate identically to the strings themselves. This
17
+ // means queries operate on the references only. We only use the string table
18
+ // when converting to/from external interface values (queries/results).
19
+ //
20
+
21
+ template<typename REF_T>
22
+ class stringtable_t : boost::noncopyable
23
+ {
24
+ typedef stringtable_t<REF_T> stable_t;
25
+
26
+ // For now these are uint32s. We could probably limit the stringtable
27
+ // size to 16MB and use uint24s, but the savings would be minimal.
28
+
29
+ // These are offsets into the values buffer. The length of the string is
30
+ // determined by the difference between one offset and the next.
31
+ //
32
+ // The validity of these offsets must be checked by the deserializer. They
33
+ // should all be less than vbuf_length.
34
+ nky_allocator_t *allocator;
35
+ const uint32 *offsets;
36
+ uint32 vcount;
37
+ const char *values;
38
+ uint32 vbuf_length;
39
+
40
+ typedef u_string_value_safe<REF_T> safestr;
41
+ typedef typename safestr::header_t safe_header_t;
42
+ typedef typename safe_header_t::strmap_t strmap_t;
43
+ typedef typename strmap_t::iterator strmap_itr_t;
44
+
45
+ strmap_t strmap;
46
+ db_vector<REF_T> offset_xform;
47
+
48
+ // Count of strings
49
+ // Combined length of all string bytes.
50
+ // Total length of stringtable in serialized form.
51
+ uint32 prepare_packed_string_count;
52
+ uint32 prepare_packed_stringslength;
53
+ uint32 prepare_packed_total_length;
54
+
55
+ public:
56
+ // Default stringtable is empty.
57
+ stringtable_t(nky_allocator_t *_alloc) :
58
+ allocator(_alloc),
59
+ offsets(NULL),
60
+ vcount(0),
61
+ values(NULL),
62
+ vbuf_length(0),
63
+ offset_xform(_alloc)
64
+ {}
65
+
66
+ stringtable_t(
67
+ nky_allocator_t *_alloc,
68
+ uint32 _vcount,
69
+ const uint32 *_offsets,
70
+ uint32 _vbuf_length,
71
+ const char *_vbuf
72
+ ) :
73
+ allocator(_alloc),
74
+ offsets(_offsets),
75
+ vcount(_vcount),
76
+ values(_vbuf),
77
+ vbuf_length(_vbuf_length),
78
+ offset_xform(_alloc)
79
+ {}
80
+
81
+ private:
82
+ template<bool is_pooled_alloc>
83
+ void strtable_teardown()
84
+ {
85
+ if (is_pooled_alloc) {
86
+ // The default destructor walks the entire tree and tears it down.
87
+ // We don't need to do that, since we're about to dealloc the entire
88
+ // pool anyway. This clears the tree without walking anything.
89
+ ::new(&strmap) strmap_t();
90
+ } else {
91
+ // This will free every object, but will not call the
92
+ // object's destructor. This is most equivalent to what
93
+ // happens in the above case, with the pooled allocator.
94
+ strmap.clear_and_dispose(boost::bind(&nky_allocator_t::free, allocator, _1));
95
+ }
96
+ }
97
+
98
+ public:
99
+ ~stringtable_t()
100
+ {
101
+ strtable_teardown<is_pooled_allocator<nky_allocator_t>::value >();
102
+ }
103
+
104
+ class iterator
105
+ {
106
+ uint32 offset;
107
+ const stable_t *host;
108
+ public:
109
+ iterator() : offset(0), host(NULL) {}
110
+
111
+ iterator(const stable_t *_host, uint32 _offset) :
112
+ offset(_offset), host(_host)
113
+ {}
114
+
115
+ iterator &operator++() {
116
+ ++offset;
117
+ return *this;
118
+ }
119
+ iterator &operator--() {
120
+ --offset;
121
+ return *this;
122
+ }
123
+ iterator &operator+=(int x) {
124
+ offset += x;
125
+ return *this;
126
+ }
127
+ iterator &operator-=(int x) {
128
+ offset -= x;
129
+ return *this;
130
+ }
131
+
132
+ bool operator==(const iterator &other) const {
133
+ OINKY_ASSERT(host == other.host);
134
+ return offset == other.offset;
135
+ }
136
+ bool operator!=(const iterator &other) const {
137
+ OINKY_ASSERT(host == other.host);
138
+ return offset != other.offset;
139
+ }
140
+
141
+ int operator-(const iterator &other) const {
142
+ OINKY_ASSERT(host == other.host);
143
+ return (int) offset - (int) other.offset;
144
+ }
145
+
146
+ u_string_value_safe<REF_T> to_string() const {
147
+ return host->from_trusted_ref(value());
148
+ }
149
+ REF_T operator*() const { return value(); }
150
+ // The REF is 1 + the offset into the table
151
+ REF_T value() const { return (REF_T) (offset + 1); }
152
+
153
+ uint32 _offset() const { return offset; }
154
+ };
155
+
156
+ // This does not do a search. This is an array-lookup. This is the
157
+ // retrieval method to convert column values to real strings.
158
+ //iterator map(REF_T ref) const {
159
+ // return iterator(this, ref);
160
+ //}
161
+
162
+ // Map but skipping the iterator.
163
+ safestr from_untrusted_ref(REF_T ref) const {
164
+ // This is an internal error. The should never see the REF_T type.
165
+ // OINKY_ASSERT(ref && (ref <= vcount));
166
+ //
167
+ // This will map bad values (both above and below the range) to zero,
168
+ // which we satisfy with the NULL string.
169
+ uint32 offset = map_into_range((uint32) ref, 1, vcount, 0);
170
+ if (offset == 0) {
171
+ // The only place we get bad refs is from serialized form.
172
+ //
173
+ // TBD:
174
+ // That we get this exception lazily (after mount completes) might
175
+ // be confusing.
176
+ throw_error(bad_encoding());
177
+ }
178
+ return from_trusted_ref(ref);
179
+ }
180
+
181
+ static inline uint32 unpack_offset(const uint32 *src) {
182
+ uint32 val;
183
+ Serialization::unpack_nbc<Serialization::v1_sformat_tag, uint32>(&val, (const char *)src);
184
+ return val;
185
+ }
186
+
187
+ inline safestr from_trusted_ref(REF_T ref) const {
188
+ uint32 index = (uint32) ref - 1;
189
+ uint32 begin = unpack_offset(offsets + index);
190
+ uint32 end = (index == vcount-1) ?
191
+ vbuf_length :
192
+ unpack_offset(offsets + index + 1);
193
+ // Zero length is permissible.
194
+ // TBD: We checked this earlier, so we can probably skip it here.
195
+ if ((end < begin) || (end > vbuf_length)) {
196
+ throw_error(bad_encoding());
197
+ }
198
+ safestr ss;
199
+ ss.newstr = NULL;
200
+ ss.str = db_string(values + begin, end - begin);
201
+ ss.str.xref = (uint32) ref;
202
+ return ss;
203
+ }
204
+
205
+ iterator begin() const { return iterator(this, 0); }
206
+ iterator end() const { return iterator(this, vcount); }
207
+ iterator find(const db_string &key) const {
208
+ return std::find(key, begin(), end());
209
+ }
210
+ iterator lower_bound(const db_string &key) const {
211
+ return std::lower_bound(key, begin(), end());
212
+ }
213
+ iterator upper_bound(const db_string &key) const {
214
+ return std::upper_bound(key, begin(), end());
215
+ }
216
+
217
+ public:
218
+ void reinit(
219
+ nky_allocator_t *_alloc,
220
+ uint32 _vcount,
221
+ const uint32 *_offsets,
222
+ uint32 _vbuf_length,
223
+ const char *_vbuf
224
+ )
225
+ {
226
+ this->~stringtable_t();
227
+ ::new(this) stable_t(_alloc, _vcount, _offsets, _vbuf_length, _vbuf);
228
+ }
229
+
230
+ // This should use the DB allocator and a new-string map to
231
+ // ensure that we don't grow the DB heap pointlessly.
232
+ safestr make_safestring(const db_string &str)
233
+ {
234
+ // This check is fast.
235
+ if (str.xref && (str.xref < vcount)) {
236
+ safestr s = from_trusted_ref((REF_T) str.xref);
237
+ if (s == str) {
238
+ return s;
239
+ }
240
+ }
241
+
242
+ // If we don't know the location in the fixed table, we will not even
243
+ // search for it there. We will search/insert in the dynamic table.
244
+ //
245
+ // Q: Why?
246
+ //
247
+ // A: The assumption here is that the fixed stringtable size is MUCH
248
+ // larger than the working stringtable size. We would prefer to
249
+ // search the smaller table. Even though we make the dynamic table
250
+ // larger my adding more strings to it than we have to, we presume the
251
+ // the resulting table will STILL be much smaller than the fixed table.
252
+ // Also, there should be more redundancy among strings inserted in
253
+ // the same session. We can easily strip the duplicates in the
254
+ // serialization phase.
255
+
256
+ safestr x;
257
+ x.str.xref = 0;
258
+ x.newstr = safe_header_t::prepare(allocator, strmap, str);
259
+ x.str = x.newstr->as_string();
260
+ return x;
261
+ }
262
+
263
+ //###########################################################
264
+ // Serialization
265
+ //
266
+ void clear_references()
267
+ {
268
+ // Clear the fixed reference counts.
269
+ offset_xform.resize(vcount);
270
+ memset(offset_xform.begin(), 0, sizeof(REF_T) * vcount);
271
+
272
+ // Clear the dynamic reference counts.
273
+ safe_header_t::clear_reference_counts(strmap);
274
+ }
275
+
276
+ // Mark the string (either fixed or new, but not necessarily both)
277
+ // as in-use, so it will be stored when the DB is serialized.
278
+ void mark(const safestr &str)
279
+ {
280
+ mark(str.newstr, (REF_T) str.str.xref);
281
+ }
282
+ void mark(const safe_cv_t &cv)
283
+ {
284
+ OINKY_ASSERT(cv.is_string());
285
+ mark(cv.newstr, (REF_T) cv.string_value().xref);
286
+ }
287
+
288
+ private:
289
+ void mark(safe_header_t *header, REF_T ref)
290
+ {
291
+ // This check is fast.
292
+ if (ref) {
293
+ OINKY_ASSERT(offset_xform.size() > ref - 1);
294
+ // (re-)mark the bin
295
+ offset_xform[ref - 1] = 1;
296
+ } else if (header) {
297
+ // (re-)mark the header
298
+ header->_ref = (REF_T) 1;
299
+ } else {
300
+ // This string obviously was not safe.
301
+ OINKY_ASSERT(false);
302
+ throw_error(implementation_bug());
303
+ }
304
+ }
305
+
306
+ struct compare_strings
307
+ {
308
+ int operator()(const iterator &l, const strmap_itr_t &r) const {
309
+ return l.to_string().compare_to(r->as_string());
310
+ }
311
+ };
312
+
313
+ typedef symmetric_iterator<iterator> fixed_range_t;
314
+ typedef symmetric_iterator<strmap_itr_t> dynamic_range_t;
315
+
316
+ class s_itr : public merge_iterator_template<
317
+ s_itr,
318
+ int,
319
+ fixed_range_t, //index over offsets.
320
+ dynamic_range_t,
321
+ compare_strings>
322
+ {
323
+ typedef merge_iterator_template<
324
+ s_itr,
325
+ int,
326
+ fixed_range_t, //index over offsets.
327
+ symmetric_iterator<strmap_itr_t>,
328
+ compare_strings> base_t;
329
+ friend class boost::iterator_core_access;
330
+
331
+ int dereference() const { return 0; }
332
+
333
+ public:
334
+ s_itr(const fixed_range_t &f, const dynamic_range_t &d) :
335
+ base_t(f, d)
336
+ {}
337
+
338
+ };
339
+
340
+ public:
341
+ // This does two things.
342
+ // 1) It determines which strings are reachable, and assigns each
343
+ // reachable string an index, which corresponds to the string's
344
+ // position in the new sorted stringtable. The first index is 1.
345
+ // 2) It computes the total number of bytes required to store the new
346
+ // stringtable, including the header data.
347
+ // This should be called AFTER all database parts depending on the
348
+ // stringtable have been prepared. That is how we compute reachability.
349
+ uint32 compute_indexes_and_length()
350
+ {
351
+ s_itr i(
352
+ fixed_range_t(begin(), begin(), end()),
353
+ dynamic_range_t(strmap.begin(), strmap.begin(), strmap.end())
354
+ );
355
+
356
+ // Indexes start at 1. 0 means the string is being dropped.
357
+ uint32 target_index = 1;
358
+ uint32 total_string_bytes = 0;
359
+ while (!i.at_end()) {
360
+ uint32 this_index = 0;
361
+ if (i.left_active()) {
362
+ const iterator &l(i.left().i());
363
+ int x = l - begin();
364
+ // If this is nonzero, then it means the fixed string is still
365
+ // referenced.
366
+ if (offset_xform[x]) {
367
+ offset_xform[x] = this_index = target_index;
368
+ target_index++;
369
+ total_string_bytes += l.to_string().as_string().length();
370
+ }
371
+ }
372
+ if (i.right_active()) {
373
+ const strmap_itr_t &r(i.right().i());
374
+ typename safestr::header_t &hdr(*r);
375
+ if (r->_ref) {
376
+ if (!this_index) {
377
+ this_index = target_index;
378
+ target_index++;
379
+ total_string_bytes += r->length();
380
+ } else {
381
+ // these are the same string!
382
+ OINKY_ASSERT(i.left().i().to_string().as_string().length() == r->length());
383
+ }
384
+ r->_ref = this_index;
385
+ }
386
+ }
387
+ ++i;
388
+ }
389
+
390
+ // Total stringtable length is array of offsets plus string bytes
391
+ // plus 4-byte offset indicating first-string-offset.
392
+ uint32 string_count = target_index - 1;
393
+ uint32 byte_count = 8 + (string_count * 4) + total_string_bytes;
394
+
395
+ // Save these for later. We need to know before we start iterating.
396
+ prepare_packed_string_count = string_count;
397
+ prepare_packed_stringslength = total_string_bytes;
398
+ prepare_packed_total_length = byte_count;
399
+
400
+ return byte_count;
401
+ }
402
+
403
+ private:
404
+ inline REF_T xform_ref(safe_header_t *header, REF_T ref) const {
405
+ if (header) {
406
+ OINKY_ASSERT(header->_ref);
407
+ return header->_ref;
408
+ } else {
409
+ REF_T newref = offset_xform[ref_to_index(ref)];
410
+ OINKY_ASSERT(newref);
411
+ return newref;
412
+ }
413
+ }
414
+ public:
415
+ REF_T xform_ref(const safestr &str) const {
416
+ return xform_ref(str.newstr, (REF_T) str.str.xref);
417
+ }
418
+ REF_T xform_ref(const safe_cv_t &cv) const {
419
+ return xform_ref(cv.newstr, (REF_T) cv.string_value().xref);
420
+ }
421
+ REF_T xform_ref(const REF_T &oldref) const {
422
+ return xform_ref(NULL, oldref);
423
+ }
424
+
425
+ void mark_ref(const REF_T &oldref) {
426
+ offset_xform[ref_to_index(oldref)] = 1;
427
+ }
428
+
429
+ uint32 ref_to_index(REF_T oldref) const {
430
+ OINKY_ASSERT(oldref);
431
+ OINKY_ASSERT((uint32) oldref <= offset_xform.size());
432
+ // In case asserts aren't active, we want to map into range.
433
+ return map_into_range((uint32) oldref, 1, offset_xform.size(), 1)-1;
434
+ }
435
+
436
+ template<typename SCHEME>
437
+ void pack(char *&buf, const char *end_buf)
438
+ {
439
+ char *table_base = buf;
440
+ // This is the only bounds check we do. As long as the packing
441
+ // protocol is being followed correctly (clear/count/pack) then
442
+ // this will work out. We ASSERT as much. But we don't presume
443
+ // a caller is malicious, since they could do worse much more easily.
444
+ if (buf + prepare_packed_total_length > end_buf) {
445
+ throw_error(buffer_overflow());
446
+ }
447
+
448
+ Serialization::pack<SCHEME, uint32>(prepare_packed_total_length, buf, end_buf);
449
+ Serialization::pack<SCHEME, uint32>(prepare_packed_string_count, buf, end_buf);
450
+
451
+ char *offset_buf = buf;
452
+ char *strings_buf = buf + (4 * prepare_packed_string_count);
453
+ char *strings_base = strings_buf;
454
+
455
+ s_itr i(
456
+ fixed_range_t(begin(), begin(), end()),
457
+ dynamic_range_t(strmap.begin(), strmap.begin(), strmap.end())
458
+ );
459
+
460
+ // Indexes start at 1. 0 means the string is being dropped.
461
+ while (!i.at_end()) {
462
+ bool saved = false;
463
+ if (i.left_active()) {
464
+ const iterator &l(i.left().i());
465
+ // We only write if it's referenced.
466
+ if (offset_xform[l - begin()]) {
467
+ // Pack offset
468
+ Serialization::pack<SCHEME, uint32>(strings_buf - table_base, offset_buf, end_buf);
469
+ // Pack bytes
470
+ const db_string &str(l.to_string().as_string());
471
+ memcpy(strings_buf, str.begin(), str.length());
472
+ strings_buf += str.length();
473
+ saved = true;
474
+ }
475
+ }
476
+ // We only need to pack once.
477
+ if (!saved && i.right_active()) {
478
+ const strmap_itr_t &r(i.right().i());
479
+ // Again..
480
+ if (r->_ref) {
481
+ // Pack offset
482
+ Serialization::pack<SCHEME, uint32>(strings_buf - table_base, offset_buf, end_buf);
483
+ // Pack bytes
484
+ const db_string &str(r->as_string());
485
+ memcpy(strings_buf, str.begin(), str.length());
486
+ strings_buf += str.length();
487
+ saved = true;
488
+ }
489
+ }
490
+ ++i;
491
+ }
492
+ OINKY_ASSERT(table_base + prepare_packed_total_length == strings_buf);
493
+ OINKY_ASSERT(offset_buf == strings_base);
494
+ OINKY_ASSERT(strings_base + prepare_packed_stringslength == strings_buf);
495
+ buf = strings_buf;
496
+ }
497
+ };
498
+
499
+ typedef stringtable_t<m_strtable_ref> meta_stringtable_t;
500
+ typedef stringtable_t<u_strtable_ref> user_stringtable_t;
501
+
502
+ } //namespace Internal
503
+ } //namespace Oinky
504
+