ruby-mtbl 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +2 -0
- data/Gemfile +5 -0
- data/Gemfile.lock +33 -0
- data/LICENSE +202 -0
- data/README.md +19 -0
- data/Rakefile +7 -0
- data/bin/rmtbl_create +71 -0
- data/bin/rmtbl_dump +46 -0
- data/bin/rmtbl_info +17 -0
- data/examples/test.rb +101 -0
- data/examples/test1.mtbl +0 -0
- data/ext/mtbl/extconf.rb +5 -0
- data/ext/mtbl/ruby-mtbl.c +592 -0
- data/ruby-mtbl.gemspec +19 -0
- data/spec/mtbl_reader_spec.rb +112 -0
- data/spec/mtbl_sorter_spec.rb +29 -0
- data/spec/mtbl_spec.rb +17 -0
- data/spec/mtbl_utils_spec.rb +40 -0
- data/spec/mtbl_writer_spec.rb +101 -0
- data/spec/spec_helper.rb +96 -0
- metadata +68 -0
data/examples/test1.mtbl
ADDED
Binary file
|
data/ext/mtbl/extconf.rb
ADDED
@@ -0,0 +1,592 @@
|
|
1
|
+
/*
|
2
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
3
|
+
* you may not use this file except in compliance with the License.
|
4
|
+
* You may obtain a copy of the License at
|
5
|
+
*
|
6
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
7
|
+
*
|
8
|
+
* Unless required by applicable law or agreed to in writing, software
|
9
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
10
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
11
|
+
* See the License for the specific language governing permissions and
|
12
|
+
* limitations under the License.
|
13
|
+
*/
|
14
|
+
|
15
|
+
#include "ruby.h"
|
16
|
+
#include <stdlib.h>
|
17
|
+
#include <stdio.h>
|
18
|
+
#include <string.h>
|
19
|
+
#include <unistd.h>
|
20
|
+
#include <sys/types.h>
|
21
|
+
#include <sys/stat.h>
|
22
|
+
#include <fcntl.h>
|
23
|
+
#include <errno.h>
|
24
|
+
#include "mtbl.h"
|
25
|
+
|
26
|
+
static VALUE rb_cMTBL;
|
27
|
+
static VALUE rb_cMTBLReader;
|
28
|
+
static VALUE rb_cMTBLIterator;
|
29
|
+
static VALUE rb_cMTBLWriter;
|
30
|
+
static VALUE rb_cMTBLSorter;
|
31
|
+
static VALUE rb_cMTBLUtil;
|
32
|
+
static VALUE rb_cMTBLVersion;
|
33
|
+
|
34
|
+
#define MTBL_VERSION "1.0.0"
|
35
|
+
|
36
|
+
typedef struct {
|
37
|
+
struct mtbl_reader *r;
|
38
|
+
} rbmtbl_reader_t;
|
39
|
+
|
40
|
+
typedef struct {
|
41
|
+
struct mtbl_iter *it;
|
42
|
+
} rbmtbl_iterator_t;
|
43
|
+
|
44
|
+
typedef struct {
|
45
|
+
struct mtbl_writer *w;
|
46
|
+
struct mtbl_writer_options *o;
|
47
|
+
} rbmtbl_writer_t;
|
48
|
+
|
49
|
+
typedef struct {
|
50
|
+
struct mtbl_sorter *s;
|
51
|
+
struct mtbl_sorter_options *o;
|
52
|
+
} rbmtbl_sorter_t;
|
53
|
+
|
54
|
+
|
55
|
+
|
56
|
+
/*
|
57
|
+
* Iterator
|
58
|
+
*/
|
59
|
+
|
60
|
+
|
61
|
+
static VALUE rbmtbl_iterator_free(rbmtbl_iterator_t *iterator) {
|
62
|
+
mtbl_iter_destroy(&iterator->it);
|
63
|
+
free(iterator);
|
64
|
+
return Qnil;
|
65
|
+
}
|
66
|
+
|
67
|
+
static VALUE rbmtbl_iterator_alloc(VALUE class) {
|
68
|
+
rbmtbl_iterator_t *iterator = malloc(sizeof(rbmtbl_iterator_t));
|
69
|
+
memset(iterator, 0, sizeof(rbmtbl_iterator_t));
|
70
|
+
return Data_Wrap_Struct(class, 0, rbmtbl_iterator_free, iterator);
|
71
|
+
}
|
72
|
+
|
73
|
+
|
74
|
+
static VALUE rbmtbl_iterator_initialize(VALUE self, VALUE c_reader) {
|
75
|
+
rbmtbl_iterator_t *iterator;
|
76
|
+
rbmtbl_reader_t *reader;
|
77
|
+
Data_Get_Struct(self, rbmtbl_iterator_t, iterator);
|
78
|
+
// TODO: Verify that c_reader is a MTBL::Reader
|
79
|
+
Data_Get_Struct(c_reader, rbmtbl_reader_t, reader);
|
80
|
+
iterator->it = mtbl_source_iter(mtbl_reader_source(reader->r));
|
81
|
+
return self;
|
82
|
+
}
|
83
|
+
|
84
|
+
static VALUE rbmtbl_iterator_next(VALUE self) {
|
85
|
+
const uint8_t *key, *val;
|
86
|
+
size_t len_key, len_val;
|
87
|
+
VALUE r_key;
|
88
|
+
VALUE r_val;
|
89
|
+
VALUE r_arr;
|
90
|
+
rbmtbl_iterator_t *iterator;
|
91
|
+
Data_Get_Struct(self, rbmtbl_iterator_t, iterator);
|
92
|
+
|
93
|
+
while (mtbl_iter_next(iterator->it, &key, &len_key, &val, &len_val)) {
|
94
|
+
r_key = rb_usascii_str_new((const char *)key, len_key);
|
95
|
+
r_val = rb_usascii_str_new((const char *)val, len_val);
|
96
|
+
|
97
|
+
r_arr = rb_ary_new2(2);
|
98
|
+
rb_ary_push(r_arr, r_key);
|
99
|
+
rb_ary_push(r_arr, r_val);
|
100
|
+
|
101
|
+
if(rb_block_given_p()) {
|
102
|
+
rb_yield(r_arr);
|
103
|
+
} else {
|
104
|
+
return r_arr;
|
105
|
+
}
|
106
|
+
}
|
107
|
+
return Qnil;
|
108
|
+
}
|
109
|
+
|
110
|
+
/*
|
111
|
+
* Reader
|
112
|
+
*/
|
113
|
+
|
114
|
+
static VALUE rbmtbl_reader_free(rbmtbl_reader_t *reader) {
|
115
|
+
if(reader->r) {
|
116
|
+
mtbl_reader_destroy(&reader->r);
|
117
|
+
reader->r = NULL;
|
118
|
+
}
|
119
|
+
free(reader);
|
120
|
+
return Qnil;
|
121
|
+
}
|
122
|
+
|
123
|
+
static VALUE rbmtbl_reader_alloc(VALUE class) {
|
124
|
+
rbmtbl_reader_t *reader = malloc(sizeof(rbmtbl_reader_t));
|
125
|
+
memset(reader, 0, sizeof(rbmtbl_reader_t));
|
126
|
+
return Data_Wrap_Struct(class, 0, rbmtbl_reader_free, reader);
|
127
|
+
}
|
128
|
+
|
129
|
+
static VALUE rbmtbl_reader_initialize(VALUE self, VALUE fname) {
|
130
|
+
rbmtbl_reader_t *reader;
|
131
|
+
Data_Get_Struct(self, rbmtbl_reader_t, reader);
|
132
|
+
|
133
|
+
if (TYPE(fname) != T_STRING) {
|
134
|
+
rb_raise(rb_eArgError, "File name must be a string");
|
135
|
+
return Qnil;
|
136
|
+
}
|
137
|
+
|
138
|
+
reader->r = mtbl_reader_init(StringValueCStr(fname), NULL);
|
139
|
+
if (reader->r == NULL) {
|
140
|
+
rb_raise(rb_eRuntimeError, "Failed to open %s", StringValueCStr(fname));
|
141
|
+
return (false);
|
142
|
+
}
|
143
|
+
return self;
|
144
|
+
}
|
145
|
+
|
146
|
+
static VALUE rbmtbl_reader_iterator(VALUE self) {
|
147
|
+
VALUE argv[1];
|
148
|
+
argv[0] = self;
|
149
|
+
return rb_class_new_instance(1, argv, rb_cMTBLIterator);
|
150
|
+
}
|
151
|
+
|
152
|
+
static VALUE rbmtbl_reader_get(VALUE self, VALUE key) {
|
153
|
+
VALUE iter = rb_obj_alloc(rb_cMTBLIterator);
|
154
|
+
rbmtbl_reader_t *reader;
|
155
|
+
rbmtbl_iterator_t *iterator;
|
156
|
+
Data_Get_Struct(self, rbmtbl_reader_t, reader);
|
157
|
+
Data_Get_Struct(iter, rbmtbl_iterator_t, iterator);
|
158
|
+
|
159
|
+
if (TYPE(key) != T_STRING) {
|
160
|
+
rb_raise(rb_eArgError, "Key must be a string");
|
161
|
+
return Qnil;
|
162
|
+
}
|
163
|
+
|
164
|
+
iterator->it = mtbl_source_get(mtbl_reader_source(reader->r),
|
165
|
+
(const uint8_t *) RSTRING_PTR(key), RSTRING_LEN(key));
|
166
|
+
return iter;
|
167
|
+
}
|
168
|
+
|
169
|
+
static VALUE rbmtbl_reader_get_prefix(VALUE self, VALUE prefix) {
|
170
|
+
VALUE iter = rb_obj_alloc(rb_cMTBLIterator);
|
171
|
+
rbmtbl_reader_t *reader;
|
172
|
+
rbmtbl_iterator_t *iterator;
|
173
|
+
Data_Get_Struct(self, rbmtbl_reader_t, reader);
|
174
|
+
Data_Get_Struct(iter, rbmtbl_iterator_t, iterator);
|
175
|
+
|
176
|
+
if (TYPE(prefix) != T_STRING) {
|
177
|
+
rb_raise(rb_eArgError, "Prefix must be a string");
|
178
|
+
return Qnil;
|
179
|
+
}
|
180
|
+
|
181
|
+
iterator->it = mtbl_source_get_prefix(mtbl_reader_source(reader->r),
|
182
|
+
(const uint8_t *) RSTRING_PTR(prefix), RSTRING_LEN(prefix));
|
183
|
+
return iter;
|
184
|
+
}
|
185
|
+
|
186
|
+
static VALUE rbmtbl_reader_get_range(VALUE self, VALUE kstart, VALUE kend) {
|
187
|
+
VALUE iter = rb_obj_alloc(rb_cMTBLIterator);
|
188
|
+
rbmtbl_reader_t *reader;
|
189
|
+
rbmtbl_iterator_t *iterator;
|
190
|
+
Data_Get_Struct(self, rbmtbl_reader_t, reader);
|
191
|
+
Data_Get_Struct(iter, rbmtbl_iterator_t, iterator);
|
192
|
+
|
193
|
+
if (TYPE(kstart) != T_STRING) {
|
194
|
+
rb_raise(rb_eArgError, "Range start must be a string");
|
195
|
+
return Qnil;
|
196
|
+
}
|
197
|
+
|
198
|
+
if (TYPE(kend) != T_STRING) {
|
199
|
+
rb_raise(rb_eArgError, "Range stop must be a string");
|
200
|
+
return Qnil;
|
201
|
+
}
|
202
|
+
|
203
|
+
iterator->it = mtbl_source_get_range(mtbl_reader_source(reader->r),
|
204
|
+
(const uint8_t *) RSTRING_PTR(kstart), RSTRING_LEN(kstart),
|
205
|
+
(const uint8_t *) RSTRING_PTR(kend), RSTRING_LEN(kend));
|
206
|
+
return iter;
|
207
|
+
}
|
208
|
+
|
209
|
+
|
210
|
+
/*
|
211
|
+
* Writer
|
212
|
+
*/
|
213
|
+
|
214
|
+
void rbmtbl_writer_close_handles(rbmtbl_writer_t *writer) {
|
215
|
+
if(writer->w) {
|
216
|
+
mtbl_writer_destroy(&writer->w);
|
217
|
+
writer->w = NULL;
|
218
|
+
}
|
219
|
+
if (writer->o) {
|
220
|
+
mtbl_writer_options_destroy(&writer->o);
|
221
|
+
writer->o = NULL;
|
222
|
+
}
|
223
|
+
}
|
224
|
+
static VALUE rbmtbl_writer_free(rbmtbl_writer_t *writer) {
|
225
|
+
rbmtbl_writer_close_handles(writer);
|
226
|
+
free(writer);
|
227
|
+
return Qnil;
|
228
|
+
}
|
229
|
+
|
230
|
+
static VALUE rbmtbl_writer_alloc(VALUE class) {
|
231
|
+
rbmtbl_writer_t *writer = malloc(sizeof(rbmtbl_writer_t));
|
232
|
+
memset(writer, 0, sizeof(rbmtbl_writer_t));
|
233
|
+
writer->o = mtbl_writer_options_init();
|
234
|
+
return Data_Wrap_Struct(class, 0, rbmtbl_writer_free, writer);
|
235
|
+
}
|
236
|
+
|
237
|
+
static VALUE rbmtbl_writer_initialize(int argc, VALUE *argv, VALUE self) {
|
238
|
+
rbmtbl_writer_t *writer;
|
239
|
+
struct stat ss;
|
240
|
+
int scanc;
|
241
|
+
VALUE fname, ctype, bsize, rinterval;
|
242
|
+
|
243
|
+
Data_Get_Struct(self, rbmtbl_writer_t, writer);
|
244
|
+
|
245
|
+
scanc = rb_scan_args(argc, argv, "13", &fname, &ctype, &bsize, &rinterval);
|
246
|
+
|
247
|
+
if (scanc > 1) {
|
248
|
+
if (TYPE(ctype) != T_FIXNUM) {
|
249
|
+
rb_raise(rb_eArgError, "Compression type should be a constant (COMPRESSION_NONE, etc)");
|
250
|
+
return Qnil;
|
251
|
+
}
|
252
|
+
if (FIX2INT(ctype) < MTBL_COMPRESSION_NONE || FIX2INT(ctype) > MTBL_COMPRESSION_LZ4HC) {
|
253
|
+
rb_raise(rb_eArgError, "Invalid compression type: %d", FIX2INT(ctype));
|
254
|
+
return Qnil;
|
255
|
+
}
|
256
|
+
mtbl_writer_options_set_compression(writer->o, FIX2INT(ctype));
|
257
|
+
}
|
258
|
+
|
259
|
+
if (scanc > 2) {
|
260
|
+
if (TYPE(bsize) != T_FIXNUM) {
|
261
|
+
rb_raise(rb_eArgError, "Block size should be an integer");
|
262
|
+
return Qnil;
|
263
|
+
}
|
264
|
+
mtbl_writer_options_set_block_size(writer->o, FIX2INT(bsize));
|
265
|
+
}
|
266
|
+
|
267
|
+
if (scanc > 3) {
|
268
|
+
if (TYPE(rinterval) != T_FIXNUM) {
|
269
|
+
rb_raise(rb_eArgError, "Restart interval should be an integer");
|
270
|
+
return Qnil;
|
271
|
+
}
|
272
|
+
mtbl_writer_options_set_block_restart_interval(writer->o, FIX2INT(rinterval));
|
273
|
+
}
|
274
|
+
|
275
|
+
if (TYPE(fname) != T_STRING) {
|
276
|
+
rb_raise(rb_eArgError, "File name must be a string");
|
277
|
+
return Qnil;
|
278
|
+
}
|
279
|
+
|
280
|
+
if (! stat(StringValueCStr(fname), &ss)) {
|
281
|
+
rb_raise(rb_eArgError, "File already exists: %s", StringValueCStr(fname));
|
282
|
+
return Qnil;
|
283
|
+
}
|
284
|
+
|
285
|
+
writer->w = mtbl_writer_init(StringValueCStr(fname), writer->o);
|
286
|
+
if (writer->w == NULL) {
|
287
|
+
rb_raise(rb_eRuntimeError, "Failed to open %s", StringValueCStr(fname));
|
288
|
+
return Qnil;
|
289
|
+
}
|
290
|
+
return self;
|
291
|
+
}
|
292
|
+
|
293
|
+
static VALUE rbmtbl_writer_add(VALUE self, VALUE key, VALUE val) {
|
294
|
+
rbmtbl_writer_t *writer;
|
295
|
+
Data_Get_Struct(self, rbmtbl_writer_t, writer);
|
296
|
+
if (! writer->w) {
|
297
|
+
rb_raise(rb_eRuntimeError, "Failed to write key %s: writer closed", StringValueCStr(key));
|
298
|
+
return Qnil;
|
299
|
+
}
|
300
|
+
|
301
|
+
if (TYPE(key) != T_STRING) {
|
302
|
+
rb_raise(rb_eArgError, "Key must be a string");
|
303
|
+
return Qnil;
|
304
|
+
}
|
305
|
+
|
306
|
+
if (TYPE(val) != T_STRING) {
|
307
|
+
rb_raise(rb_eArgError, "Value must be a string");
|
308
|
+
return Qnil;
|
309
|
+
}
|
310
|
+
|
311
|
+
if( mtbl_res_success != mtbl_writer_add(writer->w,
|
312
|
+
(const uint8_t *) RSTRING_PTR(key), RSTRING_LEN(key),
|
313
|
+
(const uint8_t *) RSTRING_PTR(val), RSTRING_LEN(val))) {
|
314
|
+
rb_raise(rb_eRuntimeError, "Failed to write key %s, input must be presorted", StringValueCStr(key));
|
315
|
+
return Qnil;
|
316
|
+
}
|
317
|
+
return Qtrue;
|
318
|
+
}
|
319
|
+
|
320
|
+
static VALUE rbmtbl_writer_close(VALUE self) {
|
321
|
+
rbmtbl_writer_t *writer;
|
322
|
+
Data_Get_Struct(self, rbmtbl_writer_t, writer);
|
323
|
+
if (writer->w) {
|
324
|
+
rbmtbl_writer_close_handles(writer);
|
325
|
+
return Qtrue;
|
326
|
+
} else {
|
327
|
+
rb_raise(rb_eRuntimeError, "Writer is already closed");
|
328
|
+
return Qfalse;
|
329
|
+
}
|
330
|
+
}
|
331
|
+
|
332
|
+
/*
|
333
|
+
* Sorter
|
334
|
+
*/
|
335
|
+
|
336
|
+
void rbmbtl_default_merge_func(void *merge_info,
|
337
|
+
uint8_t *key, size_t len_key,
|
338
|
+
uint8_t *val0, size_t len_val0,
|
339
|
+
uint8_t *val1, size_t len_val1,
|
340
|
+
uint8_t **merged_val, size_t * len_merged_val ) {
|
341
|
+
// TODO: Allow Ruby callbacks for the merge
|
342
|
+
// Choose the newer value by default
|
343
|
+
rbmtbl_sorter_t *sorter = (rbmtbl_sorter_t *) merge_info;
|
344
|
+
merged_val[0] = malloc(len_val1);
|
345
|
+
*len_merged_val = len_val1;
|
346
|
+
memcpy(merged_val[0], val1, len_val1);
|
347
|
+
}
|
348
|
+
|
349
|
+
|
350
|
+
void rbmtbl_sorter_close_handles(rbmtbl_sorter_t *sorter) {
|
351
|
+
if(sorter->s) {
|
352
|
+
mtbl_sorter_destroy(&sorter->s);
|
353
|
+
sorter->s = NULL;
|
354
|
+
}
|
355
|
+
if (sorter->o) {
|
356
|
+
mtbl_sorter_options_destroy(&sorter->o);
|
357
|
+
sorter->o = NULL;
|
358
|
+
}
|
359
|
+
}
|
360
|
+
|
361
|
+
static VALUE rbmtbl_sorter_free(rbmtbl_sorter_t *sorter) {
|
362
|
+
rbmtbl_sorter_close_handles(sorter);
|
363
|
+
free(sorter);
|
364
|
+
return Qnil;
|
365
|
+
}
|
366
|
+
|
367
|
+
static VALUE rbmtbl_sorter_alloc(VALUE class) {
|
368
|
+
rbmtbl_sorter_t *sorter = malloc(sizeof(rbmtbl_sorter_t));
|
369
|
+
memset(sorter, 0, sizeof(rbmtbl_sorter_t));
|
370
|
+
sorter->o = mtbl_sorter_options_init();
|
371
|
+
return Data_Wrap_Struct(class, 0, rbmtbl_sorter_free, sorter);
|
372
|
+
}
|
373
|
+
|
374
|
+
static VALUE rbmtbl_sorter_initialize(int argc, VALUE *argv, VALUE self) {
|
375
|
+
rbmtbl_sorter_t *sorter;
|
376
|
+
struct stat ss;
|
377
|
+
int scanc;
|
378
|
+
VALUE mergef, tempd, maxm;
|
379
|
+
|
380
|
+
Data_Get_Struct(self, rbmtbl_sorter_t, sorter);
|
381
|
+
|
382
|
+
scanc = rb_scan_args(argc, argv, "03", &mergef, &tempd, &maxm);
|
383
|
+
mtbl_sorter_options_set_merge_func(sorter->o, (mtbl_merge_func) rbmbtl_default_merge_func, (void *) sorter);
|
384
|
+
|
385
|
+
if (scanc > 1 && mergef != Qnil) {
|
386
|
+
// TODO: Implement the merge callback
|
387
|
+
}
|
388
|
+
|
389
|
+
if (scanc > 2 && tempd != Qnil) {
|
390
|
+
if (TYPE(tempd) != T_STRING) {
|
391
|
+
rb_raise(rb_eArgError, "Temporary directory should be a string");
|
392
|
+
return Qnil;
|
393
|
+
}
|
394
|
+
if (stat(StringValueCStr(tempd), &ss)) {
|
395
|
+
rb_raise(rb_eArgError, "Temporary directory does not exist: %s", StringValueCStr(tempd));
|
396
|
+
return Qnil;
|
397
|
+
}
|
398
|
+
|
399
|
+
if (! S_ISDIR(ss.st_mode)) {
|
400
|
+
rb_raise(rb_eArgError, "Path %s is not a directory", StringValueCStr(tempd));
|
401
|
+
return Qnil;
|
402
|
+
}
|
403
|
+
mtbl_sorter_options_set_temp_dir(sorter->o, StringValueCStr(tempd));
|
404
|
+
}
|
405
|
+
|
406
|
+
if (scanc > 3 && maxm != Qnil) {
|
407
|
+
mtbl_sorter_options_set_max_memory(sorter->o, NUM2ULONG(maxm));
|
408
|
+
}
|
409
|
+
|
410
|
+
// Verify that c_writer is a MTBL::Writer
|
411
|
+
|
412
|
+
sorter->s = mtbl_sorter_init(sorter->o);
|
413
|
+
if (sorter->s == NULL) {
|
414
|
+
rb_raise(rb_eRuntimeError, "Failed to create sorter");
|
415
|
+
return Qnil;
|
416
|
+
}
|
417
|
+
return self;
|
418
|
+
}
|
419
|
+
|
420
|
+
static VALUE rbmtbl_sorter_add(VALUE self, VALUE key, VALUE val) {
|
421
|
+
rbmtbl_sorter_t *sorter;
|
422
|
+
Data_Get_Struct(self, rbmtbl_sorter_t, sorter);
|
423
|
+
if (! sorter->s) {
|
424
|
+
rb_raise(rb_eRuntimeError, "Failed to write key %s: sorter closed", StringValueCStr(key));
|
425
|
+
return Qnil;
|
426
|
+
}
|
427
|
+
|
428
|
+
if (TYPE(key) != T_STRING) {
|
429
|
+
rb_raise(rb_eArgError, "Key must be a string");
|
430
|
+
return Qnil;
|
431
|
+
}
|
432
|
+
|
433
|
+
if (TYPE(val) != T_STRING) {
|
434
|
+
rb_raise(rb_eArgError, "Value must be a string");
|
435
|
+
return Qnil;
|
436
|
+
}
|
437
|
+
|
438
|
+
if( mtbl_res_success != mtbl_sorter_add(sorter->s,
|
439
|
+
(const uint8_t *) RSTRING_PTR(key), RSTRING_LEN(key),
|
440
|
+
(const uint8_t *) RSTRING_PTR(val), RSTRING_LEN(val))) {
|
441
|
+
rb_raise(rb_eRuntimeError, "Failed to write key %s, input must be presorted", StringValueCStr(key));
|
442
|
+
return Qnil;
|
443
|
+
}
|
444
|
+
return Qtrue;
|
445
|
+
}
|
446
|
+
|
447
|
+
static VALUE rbmtbl_sorter_close(VALUE self) {
|
448
|
+
rbmtbl_sorter_t *sorter;
|
449
|
+
Data_Get_Struct(self, rbmtbl_sorter_t, sorter);
|
450
|
+
if (sorter->s) {
|
451
|
+
rbmtbl_sorter_close_handles(sorter);
|
452
|
+
return Qtrue;
|
453
|
+
} else {
|
454
|
+
rb_raise(rb_eRuntimeError, "Sorter is already closed");
|
455
|
+
return Qfalse;
|
456
|
+
}
|
457
|
+
}
|
458
|
+
|
459
|
+
static VALUE rbmtbl_sorter_write(VALUE self, VALUE c_writer) {
|
460
|
+
rbmtbl_sorter_t *sorter;
|
461
|
+
rbmtbl_writer_t *writer;
|
462
|
+
|
463
|
+
Data_Get_Struct(self, rbmtbl_sorter_t, sorter);
|
464
|
+
|
465
|
+
// Verify MTBL::Writer is c_writer
|
466
|
+
Data_Get_Struct(c_writer, rbmtbl_writer_t, writer);
|
467
|
+
|
468
|
+
if (mtbl_res_success != mtbl_sorter_write(sorter->s, writer->w)) {
|
469
|
+
rb_raise(rb_eRuntimeError, "Failed to write");
|
470
|
+
return Qnil;
|
471
|
+
}
|
472
|
+
|
473
|
+
return Qtrue;
|
474
|
+
}
|
475
|
+
|
476
|
+
/*
|
477
|
+
* Utils
|
478
|
+
*/
|
479
|
+
|
480
|
+
static VALUE rbmtbl_utils_metadata(VALUE class, VALUE fname) {
|
481
|
+
int fd, ret;
|
482
|
+
struct stat ss;
|
483
|
+
struct mtbl_reader *r;
|
484
|
+
const struct mtbl_metadata *m;
|
485
|
+
mtbl_compression_type compression_algorithm;
|
486
|
+
uint64_t data_block_size, count_entries, count_data_blocks, bytes_data_blocks;
|
487
|
+
uint64_t bytes_index_block, bytes_keys, bytes_values, index_block_offset;
|
488
|
+
double p_data, p_index, compactness;
|
489
|
+
VALUE metadata = rb_hash_new();
|
490
|
+
|
491
|
+
if (TYPE(fname) != T_STRING) {
|
492
|
+
rb_raise(rb_eArgError, "File name must be a string");
|
493
|
+
return Qnil;
|
494
|
+
}
|
495
|
+
|
496
|
+
fd = open(StringValueCStr(fname), O_RDONLY);
|
497
|
+
if (fd < 0) {
|
498
|
+
rb_raise(rb_eRuntimeError, "Unable to open file %s: %s", StringValueCStr(fname), strerror(errno));
|
499
|
+
return Qnil;
|
500
|
+
}
|
501
|
+
|
502
|
+
ret = fstat(fd, &ss);
|
503
|
+
if (ret < 0) {
|
504
|
+
close(fd);
|
505
|
+
rb_raise(rb_eRuntimeError, "Failed fstat on file %s: %s", StringValueCStr(fname), strerror(errno));
|
506
|
+
return Qnil;
|
507
|
+
}
|
508
|
+
|
509
|
+
r = mtbl_reader_init_fd(fd, NULL);
|
510
|
+
if (r == NULL) {
|
511
|
+
close(fd);
|
512
|
+
rb_raise(rb_eRuntimeError, "Unable to open file %s: mtbl_reader_init_fd()", StringValueCStr(fname));
|
513
|
+
return Qnil;
|
514
|
+
}
|
515
|
+
|
516
|
+
m = mtbl_reader_metadata(r);
|
517
|
+
|
518
|
+
data_block_size = mtbl_metadata_data_block_size(m);
|
519
|
+
compression_algorithm = mtbl_metadata_compression_algorithm(m);
|
520
|
+
count_entries = mtbl_metadata_count_entries(m);
|
521
|
+
count_data_blocks = mtbl_metadata_count_data_blocks(m);
|
522
|
+
bytes_data_blocks = mtbl_metadata_bytes_data_blocks(m);
|
523
|
+
bytes_index_block = mtbl_metadata_bytes_index_block(m);
|
524
|
+
bytes_keys = mtbl_metadata_bytes_keys(m);
|
525
|
+
bytes_values = mtbl_metadata_bytes_values(m);
|
526
|
+
index_block_offset = mtbl_metadata_index_block_offset(m);
|
527
|
+
p_data = 100.0 * bytes_data_blocks / ss.st_size;
|
528
|
+
p_index = 100.0 * bytes_index_block / ss.st_size;
|
529
|
+
compactness = 100.0 * ss.st_size / (bytes_keys + bytes_values);
|
530
|
+
|
531
|
+
rb_hash_aset(metadata, ID2SYM(rb_intern_const("filename")), fname);
|
532
|
+
rb_hash_aset(metadata, ID2SYM(rb_intern_const("filesize")), LL2NUM((size_t) ss.st_size));
|
533
|
+
rb_hash_aset(metadata, ID2SYM(rb_intern_const("index_block_offset")), LL2NUM(index_block_offset));
|
534
|
+
rb_hash_aset(metadata, ID2SYM(rb_intern_const("index_bytes")), LL2NUM(bytes_index_block));
|
535
|
+
rb_hash_aset(metadata, ID2SYM(rb_intern_const("index_bytes_pct")), DBL2NUM(p_index));
|
536
|
+
rb_hash_aset(metadata, ID2SYM(rb_intern_const("data_block_bytes")), LL2NUM(bytes_data_blocks));
|
537
|
+
rb_hash_aset(metadata, ID2SYM(rb_intern_const("data_block_bytes_pct")), DBL2NUM(p_data));
|
538
|
+
rb_hash_aset(metadata, ID2SYM(rb_intern_const("data_block_size")), LL2NUM(data_block_size));
|
539
|
+
rb_hash_aset(metadata, ID2SYM(rb_intern_const("data_block_count")), LL2NUM(count_data_blocks));
|
540
|
+
rb_hash_aset(metadata, ID2SYM(rb_intern_const("entry_count")), LL2NUM(count_entries));
|
541
|
+
rb_hash_aset(metadata, ID2SYM(rb_intern_const("key_bytes")), LL2NUM(bytes_keys));
|
542
|
+
rb_hash_aset(metadata, ID2SYM(rb_intern_const("value_bytes")), LL2NUM(bytes_values));
|
543
|
+
rb_hash_aset(metadata, ID2SYM(rb_intern_const("compression_algorithm")), rb_str_new2(mtbl_compression_type_to_str(compression_algorithm)));
|
544
|
+
rb_hash_aset(metadata, ID2SYM(rb_intern_const("compactness")), DBL2NUM(compactness));
|
545
|
+
|
546
|
+
mtbl_reader_destroy(&r);
|
547
|
+
close(fd);
|
548
|
+
|
549
|
+
return metadata;
|
550
|
+
}
|
551
|
+
|
552
|
+
void Init_mtbl() {
|
553
|
+
rb_cMTBL = rb_define_class("MTBL", rb_cObject);
|
554
|
+
rb_cMTBLReader = rb_define_class_under(rb_cMTBL, "Reader", rb_cObject);
|
555
|
+
rb_cMTBLIterator = rb_define_class_under(rb_cMTBL, "Iterator", rb_cObject);
|
556
|
+
rb_cMTBLWriter = rb_define_class_under(rb_cMTBL, "Writer", rb_cObject);
|
557
|
+
rb_cMTBLSorter = rb_define_class_under(rb_cMTBL, "Sorter", rb_cObject);
|
558
|
+
rb_cMTBLUtil = rb_define_class_under(rb_cMTBL, "Utils", rb_cObject);
|
559
|
+
|
560
|
+
rb_cMTBLVersion = rb_str_new2((const char *)MTBL_VERSION);
|
561
|
+
rb_define_const(rb_cMTBL, "Version", rb_cMTBLVersion);
|
562
|
+
rb_define_const(rb_cMTBL, "COMPRESSION_NONE", INT2FIX(MTBL_COMPRESSION_NONE));
|
563
|
+
rb_define_const(rb_cMTBL, "COMPRESSION_SNAPPY", INT2FIX(MTBL_COMPRESSION_SNAPPY));
|
564
|
+
rb_define_const(rb_cMTBL, "COMPRESSION_ZLIB", INT2FIX(MTBL_COMPRESSION_ZLIB));
|
565
|
+
rb_define_const(rb_cMTBL, "COMPRESSION_LZ4", INT2FIX(MTBL_COMPRESSION_LZ4));
|
566
|
+
rb_define_const(rb_cMTBL, "COMPRESSION_LZ4HC", INT2FIX(MTBL_COMPRESSION_LZ4HC));
|
567
|
+
|
568
|
+
rb_define_alloc_func(rb_cMTBLReader, rbmtbl_reader_alloc);
|
569
|
+
rb_define_method(rb_cMTBLReader, "initialize", rbmtbl_reader_initialize, 1);
|
570
|
+
rb_define_method(rb_cMTBLReader, "iterator", rbmtbl_reader_iterator, 0);
|
571
|
+
rb_define_method(rb_cMTBLReader, "get", rbmtbl_reader_get, 1);
|
572
|
+
rb_define_method(rb_cMTBLReader, "get_prefix", rbmtbl_reader_get_prefix, 1);
|
573
|
+
rb_define_method(rb_cMTBLReader, "get_range", rbmtbl_reader_get_range, 2);
|
574
|
+
|
575
|
+
rb_define_method(rb_cMTBLIterator, "initialize", rbmtbl_iterator_initialize, 1);
|
576
|
+
rb_define_alloc_func(rb_cMTBLIterator, rbmtbl_iterator_alloc);
|
577
|
+
rb_define_method(rb_cMTBLIterator, "next", rbmtbl_iterator_next, 0);
|
578
|
+
rb_define_method(rb_cMTBLIterator, "each", rbmtbl_iterator_next, 0);
|
579
|
+
|
580
|
+
rb_define_method(rb_cMTBLWriter, "initialize", rbmtbl_writer_initialize, -1);
|
581
|
+
rb_define_alloc_func(rb_cMTBLWriter, rbmtbl_writer_alloc);
|
582
|
+
rb_define_method(rb_cMTBLWriter, "add", rbmtbl_writer_add, 2);
|
583
|
+
rb_define_method(rb_cMTBLWriter, "close", rbmtbl_writer_close, 0);
|
584
|
+
|
585
|
+
rb_define_method(rb_cMTBLSorter, "initialize", rbmtbl_sorter_initialize, -1);
|
586
|
+
rb_define_alloc_func(rb_cMTBLSorter, rbmtbl_sorter_alloc);
|
587
|
+
rb_define_method(rb_cMTBLSorter, "add", rbmtbl_sorter_add, 2);
|
588
|
+
rb_define_method(rb_cMTBLSorter, "write", rbmtbl_sorter_write, 1);
|
589
|
+
rb_define_method(rb_cMTBLSorter, "close", rbmtbl_sorter_close, 0);
|
590
|
+
|
591
|
+
rb_define_singleton_method(rb_cMTBLUtil, "metadata", rbmtbl_utils_metadata, 1);
|
592
|
+
}
|