extbzip3 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,484 @@
1
+ #include "extbzip3.h"
2
+
3
+ static int32_t
4
+ aux_check_header(const char *in, const char *const inend, uint32_t *blockcount)
5
+ {
6
+ if ((inend - in) < (blockcount ? 13 : 9)) {
7
+ return BZ3_ERR_TRUNCATED_DATA;
8
+ }
9
+
10
+ if (memcmp(in, aux_bzip3_signature, 5) != 0) {
11
+ return BZ3_ERR_MALFORMED_HEADER;
12
+ }
13
+
14
+ int32_t blocksize = loadu32le(in + 5);
15
+
16
+ if (blocksize < AUX_BZIP3_BLOCKSIZE_MIN || blocksize > AUX_BZIP3_BLOCKSIZE_MAX) {
17
+ return BZ3_ERR_MALFORMED_HEADER;
18
+ }
19
+
20
+ if (blockcount) {
21
+ *blockcount = loadu32le(in + 9);
22
+
23
+ if (*blockcount > INT32_MAX) {
24
+ return BZ3_ERR_MALFORMED_HEADER;
25
+ }
26
+ }
27
+
28
+ return blocksize;
29
+ }
30
+
31
+ static int
32
+ aux_oneshot_decode(const void *in, void *out, size_t insize, size_t *outsize, int format, int32_t blocksize, int concat)
33
+ {
34
+ uint32_t blockcount = 0;
35
+ int32_t ret = aux_check_header((const char *)in, (const char *)in + insize,
36
+ (format == AUX_BZIP3_V1_FILE_FORMAT ? NULL : &blockcount));
37
+ if (ret < 0) {
38
+ return ret;
39
+ }
40
+
41
+ if (ret < AUX_BZIP3_BLOCKSIZE_MIN || ret > blocksize || ret > AUX_BZIP3_BLOCKSIZE_MAX) {
42
+ return BZ3_ERR_OUT_OF_BOUNDS;
43
+ }
44
+
45
+ uint32_t chunk_blocksize = ret;
46
+ struct bz3_state *bz3 = bz3_new(blocksize);
47
+
48
+ if (bz3 == NULL) {
49
+ return BZ3_ERR_INIT;
50
+ }
51
+
52
+ const char *inp = (const char *)in + (format == AUX_BZIP3_V1_FILE_FORMAT ? 9 : 13);
53
+ const char *const inend = (const char *)in + insize;
54
+ char *outp = (char *)out;
55
+ char *const outend = outp + *outsize;
56
+
57
+ while (inend - inp > 0) {
58
+ if (blockcount == 0) {
59
+ uint32_t blockcount1 = 0;
60
+ ret = aux_check_header(inp, inend, (format == AUX_BZIP3_V1_FILE_FORMAT ? NULL : &blockcount1));
61
+
62
+ if (ret > 0) {
63
+ if (!concat) {
64
+ break;
65
+ }
66
+
67
+ blockcount = blockcount1;
68
+
69
+ if (ret < AUX_BZIP3_BLOCKSIZE_MIN || ret > blocksize || ret > AUX_BZIP3_BLOCKSIZE_MAX) {
70
+ bz3_free(bz3);
71
+ return BZ3_ERR_OUT_OF_BOUNDS;
72
+ }
73
+
74
+ chunk_blocksize = (uint32_t)ret;
75
+ inp += (format == AUX_BZIP3_V1_FILE_FORMAT ? 9 : 13);
76
+
77
+ continue;
78
+ }
79
+ }
80
+
81
+ if (inend - inp < 8) {
82
+ bz3_free(bz3);
83
+ return BZ3_ERR_TRUNCATED_DATA;
84
+ }
85
+
86
+ uint32_t packedsize = loadu32le(inp);
87
+ uint32_t origsize = loadu32le(inp + 4);
88
+
89
+ if (origsize > chunk_blocksize || packedsize > bz3_bound(origsize)) {
90
+ bz3_free(bz3);
91
+ return BZ3_ERR_DATA_TOO_BIG;
92
+ }
93
+
94
+ inp += 8;
95
+
96
+ if (inend - inp < packedsize) {
97
+ bz3_free(bz3);
98
+ return BZ3_ERR_DATA_TOO_BIG;
99
+ }
100
+
101
+ if (outend - outp < (origsize > packedsize ? origsize : packedsize)) {
102
+ bz3_free(bz3);
103
+ return BZ3_ERR_DATA_TOO_BIG;
104
+ }
105
+
106
+ memmove(outp, inp, packedsize);
107
+ ret = aux_bz3_decode_block_nogvl(bz3, outp, packedsize, origsize);
108
+ if (ret < 0) {
109
+ bz3_free(bz3);
110
+ return ret;
111
+ }
112
+
113
+ inp += packedsize;
114
+ outp += origsize;
115
+
116
+ if (format != AUX_BZIP3_V1_FILE_FORMAT) {
117
+ blockcount--;
118
+ }
119
+ }
120
+
121
+ bz3_free(bz3);
122
+
123
+ if (blockcount > 0) {
124
+ return BZ3_ERR_TRUNCATED_DATA;
125
+ }
126
+
127
+ *outsize = (size_t)(outp - (const char *)out);
128
+
129
+ return BZ3_OK;
130
+ }
131
+
132
+ static uint64_t
133
+ aux_scan_size(int format, const char *in, const char *const inend, int concat)
134
+ {
135
+ return 16 << 20; // FIXME!
136
+ }
137
+
138
+ static int
139
+ aux_io_read(VALUE io, size_t size, VALUE buf)
140
+ {
141
+ VALUE args[2] = { SIZET2NUM(size), buf };
142
+ VALUE ret = rb_funcallv(io, rb_intern("read"), 2, args);
143
+
144
+ if (RB_NIL_P(ret)) {
145
+ return 1;
146
+ } else if (ret != buf) {
147
+ rb_check_type(ret, RUBY_T_STRING);
148
+ rb_str_set_len(buf, 0);
149
+ rb_str_cat(buf, RSTRING_PTR(ret), RSTRING_LEN(ret));
150
+ }
151
+
152
+ return 0;
153
+ }
154
+
155
+ struct decoder
156
+ {
157
+ struct bz3_state *bzip3;
158
+ uint32_t blocksize;
159
+ int concat:1;
160
+ int firstread:1;
161
+ int closed:1;
162
+ int eof:1;
163
+ VALUE inport;
164
+ VALUE readbuf;
165
+ VALUE destbuf;
166
+ };
167
+
168
+ #define DECODER_FREE_BLOCK(P) \
169
+ if ((P)->bzip3) { \
170
+ bz3_free((P)->bzip3); \
171
+ } \
172
+
173
+ #define DECODER_VALUE_FOREACH(DEF) \
174
+ DEF(inport) \
175
+ DEF(readbuf) \
176
+ DEF(destbuf) \
177
+
178
+ AUX_DEFINE_TYPED_DATA(decoder, decoder_allocate, DECODER_FREE_BLOCK, DECODER_VALUE_FOREACH)
179
+
180
+ /*
181
+ * @overload initialize(blocksize: (16 << 20), concat: true)
182
+ */
183
+ static VALUE
184
+ decoder_initialize(int argc, VALUE argv[], VALUE self)
185
+ {
186
+ struct { VALUE inport, opts; } args;
187
+ rb_scan_args(argc, argv, "1:", &args.inport, &args.opts);
188
+
189
+ enum { numkw = 2 };
190
+ ID idtab[numkw] = { rb_intern("blocksize"), rb_intern("concat") };
191
+ union { struct { VALUE blocksize, concat; }; VALUE vect[numkw]; } opts;
192
+ rb_get_kwargs(args.opts, idtab, 0, numkw, opts.vect);
193
+
194
+ struct decoder *p = (struct decoder *)rb_check_typeddata(self, &decoder_type);
195
+ if (p == NULL || p->bzip3) {
196
+ rb_raise(rb_eTypeError, "wrong initialized or re-initializing - %" PRIsVALUE, self);
197
+ }
198
+
199
+ p->blocksize = aux_conv_to_blocksize(opts.blocksize);
200
+ p->inport = args.inport;
201
+ p->readbuf = Qnil;
202
+ p->destbuf = Qnil;
203
+ p->bzip3 = aux_bz3_new(p->blocksize);
204
+ p->firstread = 1;
205
+ p->concat = RB_UNDEF_P(opts.concat) || RTEST(opts.concat);
206
+
207
+ return self;
208
+ }
209
+ static int
210
+ decoder_read_block(VALUE self, struct decoder *p)
211
+ {
212
+ if (p->eof) {
213
+ return 1;
214
+ }
215
+
216
+ if (p->firstread) {
217
+ p->readbuf = rb_str_new(NULL, 0);
218
+
219
+ if (aux_io_read(p->inport, 9, p->readbuf) != 0) {
220
+ extbzip3_check_error(BZ3_ERR_MALFORMED_HEADER);
221
+ }
222
+
223
+ if (RSTRING_LEN(p->readbuf) < 9 || memcmp(RSTRING_PTR(p->readbuf), "BZ3v1", 5) != 0) {
224
+ extbzip3_check_error(BZ3_ERR_MALFORMED_HEADER);
225
+ }
226
+
227
+ uint32_t blocksize = loadu32le(RSTRING_PTR(p->readbuf) + 5);
228
+ if (blocksize < AUX_BZIP3_BLOCKSIZE_MIN || blocksize > AUX_BZIP3_BLOCKSIZE_MAX) {
229
+ extbzip3_check_error(BZ3_ERR_MALFORMED_HEADER);
230
+ }
231
+
232
+ if (blocksize > p->blocksize) {
233
+ rb_raise(rb_eRuntimeError, "initialize で指定した blocksize が小さすぎます (期待値 %d に対して実際は %d)", (int)p->blocksize, (int)blocksize);
234
+ }
235
+
236
+ p->firstread = 0;
237
+ }
238
+
239
+ for (;;) {
240
+ if (aux_io_read(p->inport, 8, p->readbuf) != 0) {
241
+ p->eof = 1;
242
+ return 1;
243
+ }
244
+
245
+ if (RSTRING_LEN(p->readbuf) < 8) {
246
+ extbzip3_check_error(BZ3_ERR_MALFORMED_HEADER);
247
+ }
248
+
249
+ if (memcmp(RSTRING_PTR(p->readbuf), "BZ3v1", 5) == 0) {
250
+ char workbuf[4];
251
+ memcpy(workbuf, RSTRING_PTR(p->readbuf) + 5, 3);
252
+ if (aux_io_read(p->inport, 1, p->readbuf) == 0) {
253
+ workbuf[3] = RSTRING_PTR(p->readbuf)[0];
254
+
255
+ uint32_t blocksize = loadu32le(workbuf);
256
+ if (blocksize < AUX_BZIP3_BLOCKSIZE_MIN || blocksize > AUX_BZIP3_BLOCKSIZE_MAX) {
257
+ extbzip3_check_error(BZ3_ERR_MALFORMED_HEADER);
258
+ }
259
+
260
+ if (blocksize > p->blocksize) {
261
+ rb_raise(rb_eRuntimeError, "initialize で指定した blocksize が小さすぎます (期待値 %d に対して実際は %d)", (int)p->blocksize, (int)blocksize);
262
+ }
263
+
264
+ if (p->concat) {
265
+ continue;
266
+ } else {
267
+ p->eof = 1;
268
+ break;
269
+ }
270
+ }
271
+
272
+ extbzip3_check_error(BZ3_ERR_MALFORMED_HEADER);
273
+ }
274
+
275
+ uint32_t packedsize = loadu32le(RSTRING_PTR(p->readbuf) + 0);
276
+ uint32_t originsize = loadu32le(RSTRING_PTR(p->readbuf) + 4);
277
+
278
+ rb_str_set_len(p->destbuf, 0);
279
+ rb_str_modify_expand(p->destbuf, originsize);
280
+
281
+ uint32_t needsize = packedsize;
282
+ while (needsize > 0) {
283
+ if (aux_io_read(p->inport, packedsize, p->readbuf) != 0) {
284
+ rb_raise(rb_eRuntimeError, "意図しない EOF");
285
+ } else if (RSTRING_LEN(p->readbuf) > packedsize) {
286
+ rb_raise(rb_eRuntimeError, "#<%" PRIsVALUE ":0x%" PRIxVALUE ">#read は %u バイトを超過して読み込みました",
287
+ rb_class_of(p->inport), p->inport, packedsize);
288
+ }
289
+
290
+ rb_str_cat(p->destbuf, RSTRING_PTR(p->readbuf), RSTRING_LEN(p->readbuf));
291
+ needsize -= RSTRING_LEN(p->readbuf);
292
+ }
293
+
294
+ int32_t ret = aux_bz3_decode_block_nogvl(p->bzip3, RSTRING_PTR(p->destbuf), packedsize, originsize);
295
+ extbzip3_check_error(ret);
296
+
297
+ rb_str_set_len(p->destbuf, originsize);
298
+
299
+ break;
300
+ }
301
+
302
+ return 0;
303
+ }
304
+
305
+ /*
306
+ * @overload read(size = nil, dest = "")
307
+ */
308
+ static VALUE
309
+ decoder_read(int argc, VALUE argv[], VALUE self)
310
+ {
311
+ struct { VALUE size, dest; } args;
312
+ switch (rb_scan_args(argc, argv, "02", &args.size, &args.dest)) {
313
+ case 0:
314
+ args.size = Qnil;
315
+ args.dest = rb_str_new(NULL, 0);
316
+ break;
317
+ case 1:
318
+ args.dest = rb_str_new(NULL, 0);
319
+ break;
320
+ case 2:
321
+ rb_check_type(args.dest, RUBY_T_STRING);
322
+ rb_str_modify(args.dest);
323
+ rb_str_set_len(args.dest, 0);
324
+ break;
325
+ }
326
+
327
+ size_t size;
328
+ if (RB_NIL_P(args.size)) {
329
+ size = -1;
330
+ } else {
331
+ size = NUM2SIZET(args.size);
332
+ }
333
+
334
+ struct decoder *p = get_decoder(self);
335
+
336
+ if (p->closed) {
337
+ rb_raise(rb_eRuntimeError, "closed stream - %" PRIsVALUE, self);
338
+ }
339
+
340
+ if (size < 1) {
341
+ return args.dest;
342
+ } else {
343
+ RUBY_ASSERT_ALWAYS(rb_type_p(args.dest, RUBY_T_STRING) && RSTRING_LEN(args.dest) == 0);
344
+ RUBY_ASSERT_ALWAYS(RB_NIL_P(p->destbuf) || rb_type_p(p->destbuf, RUBY_T_STRING));
345
+
346
+ if (RB_NIL_P(p->destbuf)) {
347
+ p->destbuf = rb_str_new(NULL, 0);
348
+ }
349
+
350
+ for (;;) {
351
+ if ((size_t)RSTRING_LEN(p->destbuf) >= size) {
352
+ rb_str_cat(args.dest, RSTRING_PTR(p->destbuf), size);
353
+ memmove(RSTRING_PTR(p->destbuf), RSTRING_PTR(p->destbuf) + size, RSTRING_LEN(p->destbuf) - size);
354
+ rb_str_set_len(p->destbuf, RSTRING_LEN(p->destbuf) - size);
355
+
356
+ break;
357
+ }
358
+
359
+ size -= RSTRING_LEN(p->destbuf);
360
+ rb_str_cat(args.dest, RSTRING_PTR(p->destbuf), RSTRING_LEN(p->destbuf));
361
+ rb_str_set_len(p->destbuf, 0);
362
+
363
+ if (decoder_read_block(self, p) != 0) {
364
+ break;
365
+ }
366
+ }
367
+
368
+ return (RSTRING_LEN(args.dest) > 0 ? args.dest : Qnil);
369
+ }
370
+ }
371
+
372
+ static VALUE
373
+ decoder_close(VALUE self)
374
+ {
375
+ struct decoder *p = get_decoder(self);
376
+
377
+ if (p->closed) {
378
+ rb_raise(rb_eRuntimeError, "closed stream - %" PRIsVALUE, self);
379
+ }
380
+
381
+ p->closed = 1;
382
+
383
+ return Qnil;
384
+ }
385
+
386
+ static VALUE
387
+ decoder_closed(VALUE self)
388
+ {
389
+ return (get_decoder(self)->closed ? Qtrue : Qfalse);
390
+ }
391
+
392
+ static VALUE
393
+ decoder_eof(VALUE self)
394
+ {
395
+ return (get_decoder(self)->eof ? Qtrue : Qfalse);
396
+ }
397
+
398
+ /*
399
+ * @overload decode(src, maxdest = nil, dest = "", **opts)
400
+ * @overload decode(src, dest, **opts)
401
+ *
402
+ * decode bzip3 sequence.
403
+ *
404
+ * @param src [String] describe bzip3 sequence
405
+ * @param maxdest [Integer] describe maximum dest size
406
+ * @param dest [String] describe destination
407
+ * @param opts [Hash]
408
+ * @option opts [true, false] :concat (true)
409
+ * @option opts [true, false] :partial (false)
410
+ * @option opts [Integer] :blocksize ((16 << 20))
411
+ * 最大ブロックサイズを記述します。
412
+ * @option opts :format (Bzip3::V1_FILE_FORMAT)
413
+ * Bzip3::V1_FILE_FORMAT, Bzip3::V1_FRAME_FORMAT
414
+ * @return [String] dest for decoded bzip3
415
+ */
416
+ static VALUE
417
+ decoder_s_decode(int argc, VALUE argv[], VALUE mod)
418
+ {
419
+ size_t insize, outsize;
420
+ struct { VALUE src, maxdest, dest, opts; } args;
421
+ argc = rb_scan_args(argc, argv, "12:", &args.src, &args.maxdest, &args.dest, &args.opts);
422
+
423
+ enum { numkw = 4 };
424
+ ID idtab[numkw] = { rb_intern("concat"), rb_intern("partial"), rb_intern("blocksize"), rb_intern("format") };
425
+ union { struct { VALUE concat, partial, blocksize, format; }; VALUE vect[numkw]; } opts;
426
+ rb_get_kwargs(args.opts, idtab, 0, numkw, opts.vect);
427
+
428
+ switch (argc) {
429
+ case 1:
430
+ insize = RSTRING_LEN(args.src);
431
+ outsize = aux_scan_size(aux_conv_to_format(opts.format), RSTRING_PTR(args.src), RSTRING_END(args.src), RB_UNDEF_P(opts.concat) || RTEST(opts.concat));
432
+ args.dest = rb_str_buf_new(outsize);
433
+ break;
434
+ case 2:
435
+ insize = RSTRING_LEN(args.src);
436
+
437
+ if (rb_type_p(args.maxdest, RUBY_T_FIXNUM) || rb_type_p(args.maxdest, RUBY_T_BIGNUM)) {
438
+ outsize = NUM2SIZET(args.maxdest);
439
+ args.dest = rb_str_buf_new(outsize);
440
+ } else {
441
+ args.dest = args.maxdest;
442
+ outsize = aux_scan_size(aux_conv_to_format(opts.format), RSTRING_PTR(args.src), RSTRING_END(args.src), RB_UNDEF_P(opts.concat) || RTEST(opts.concat));
443
+ rb_str_modify(args.dest);
444
+ rb_str_set_len(args.dest, 0);
445
+ rb_str_modify_expand(args.dest, outsize);
446
+ }
447
+
448
+ break;
449
+ case 3:
450
+ insize = RSTRING_LEN(args.src);
451
+ outsize = NUM2SIZET(args.maxdest);
452
+ rb_str_modify(args.dest);
453
+ rb_str_set_len(args.dest, 0);
454
+ rb_str_modify_expand(args.dest, outsize);
455
+
456
+ break;
457
+ }
458
+
459
+ // TODO: maxdest, partial
460
+
461
+ int status = aux_oneshot_decode(RSTRING_PTR(args.src), RSTRING_PTR(args.dest), insize, &outsize,
462
+ aux_conv_to_format(opts.format),
463
+ (RB_NIL_OR_UNDEF_P(opts.blocksize) ? (16 << 20) : NUM2INT(opts.blocksize)),
464
+ RB_UNDEF_P(opts.concat) || RTEST(opts.concat));
465
+ extbzip3_check_error(status);
466
+
467
+ rb_str_set_len(args.dest, outsize);
468
+
469
+ return args.dest;
470
+ }
471
+
472
+ void
473
+ extbzip3_init_decoder(VALUE bzip3_module)
474
+ {
475
+ VALUE decoder_class = rb_define_class_under(bzip3_module, "Decoder", rb_cObject);
476
+ rb_define_alloc_func(decoder_class, decoder_allocate);
477
+ rb_define_singleton_method(decoder_class, "decode", decoder_s_decode, -1);
478
+ rb_define_method(decoder_class, "initialize", decoder_initialize, -1);
479
+ rb_define_method(decoder_class, "read", decoder_read, -1);
480
+ rb_define_method(decoder_class, "close", decoder_close, 0);
481
+ rb_define_method(decoder_class, "closed?", decoder_closed, 0);
482
+ rb_define_method(decoder_class, "eof?", decoder_eof, 0);
483
+ rb_define_alias(decoder_class, "eof", "eof?");
484
+ }