extbzip3 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,484 @@
1
+ #include "extbzip3.h"
2
+
3
+ static int32_t
4
+ aux_check_header(const char *in, const char *const inend, uint32_t *blockcount)
5
+ {
6
+ if ((inend - in) < (blockcount ? 13 : 9)) {
7
+ return BZ3_ERR_TRUNCATED_DATA;
8
+ }
9
+
10
+ if (memcmp(in, aux_bzip3_signature, 5) != 0) {
11
+ return BZ3_ERR_MALFORMED_HEADER;
12
+ }
13
+
14
+ int32_t blocksize = loadu32le(in + 5);
15
+
16
+ if (blocksize < AUX_BZIP3_BLOCKSIZE_MIN || blocksize > AUX_BZIP3_BLOCKSIZE_MAX) {
17
+ return BZ3_ERR_MALFORMED_HEADER;
18
+ }
19
+
20
+ if (blockcount) {
21
+ *blockcount = loadu32le(in + 9);
22
+
23
+ if (*blockcount > INT32_MAX) {
24
+ return BZ3_ERR_MALFORMED_HEADER;
25
+ }
26
+ }
27
+
28
+ return blocksize;
29
+ }
30
+
31
+ static int
32
+ aux_oneshot_decode(const void *in, void *out, size_t insize, size_t *outsize, int format, int32_t blocksize, int concat)
33
+ {
34
+ uint32_t blockcount = 0;
35
+ int32_t ret = aux_check_header((const char *)in, (const char *)in + insize,
36
+ (format == AUX_BZIP3_V1_FILE_FORMAT ? NULL : &blockcount));
37
+ if (ret < 0) {
38
+ return ret;
39
+ }
40
+
41
+ if (ret < AUX_BZIP3_BLOCKSIZE_MIN || ret > blocksize || ret > AUX_BZIP3_BLOCKSIZE_MAX) {
42
+ return BZ3_ERR_OUT_OF_BOUNDS;
43
+ }
44
+
45
+ uint32_t chunk_blocksize = ret;
46
+ struct bz3_state *bz3 = bz3_new(blocksize);
47
+
48
+ if (bz3 == NULL) {
49
+ return BZ3_ERR_INIT;
50
+ }
51
+
52
+ const char *inp = (const char *)in + (format == AUX_BZIP3_V1_FILE_FORMAT ? 9 : 13);
53
+ const char *const inend = (const char *)in + insize;
54
+ char *outp = (char *)out;
55
+ char *const outend = outp + *outsize;
56
+
57
+ while (inend - inp > 0) {
58
+ if (blockcount == 0) {
59
+ uint32_t blockcount1 = 0;
60
+ ret = aux_check_header(inp, inend, (format == AUX_BZIP3_V1_FILE_FORMAT ? NULL : &blockcount1));
61
+
62
+ if (ret > 0) {
63
+ if (!concat) {
64
+ break;
65
+ }
66
+
67
+ blockcount = blockcount1;
68
+
69
+ if (ret < AUX_BZIP3_BLOCKSIZE_MIN || ret > blocksize || ret > AUX_BZIP3_BLOCKSIZE_MAX) {
70
+ bz3_free(bz3);
71
+ return BZ3_ERR_OUT_OF_BOUNDS;
72
+ }
73
+
74
+ chunk_blocksize = (uint32_t)ret;
75
+ inp += (format == AUX_BZIP3_V1_FILE_FORMAT ? 9 : 13);
76
+
77
+ continue;
78
+ }
79
+ }
80
+
81
+ if (inend - inp < 8) {
82
+ bz3_free(bz3);
83
+ return BZ3_ERR_TRUNCATED_DATA;
84
+ }
85
+
86
+ uint32_t packedsize = loadu32le(inp);
87
+ uint32_t origsize = loadu32le(inp + 4);
88
+
89
+ if (origsize > chunk_blocksize || packedsize > bz3_bound(origsize)) {
90
+ bz3_free(bz3);
91
+ return BZ3_ERR_DATA_TOO_BIG;
92
+ }
93
+
94
+ inp += 8;
95
+
96
+ if (inend - inp < packedsize) {
97
+ bz3_free(bz3);
98
+ return BZ3_ERR_DATA_TOO_BIG;
99
+ }
100
+
101
+ if (outend - outp < (origsize > packedsize ? origsize : packedsize)) {
102
+ bz3_free(bz3);
103
+ return BZ3_ERR_DATA_TOO_BIG;
104
+ }
105
+
106
+ memmove(outp, inp, packedsize);
107
+ ret = aux_bz3_decode_block_nogvl(bz3, outp, packedsize, origsize);
108
+ if (ret < 0) {
109
+ bz3_free(bz3);
110
+ return ret;
111
+ }
112
+
113
+ inp += packedsize;
114
+ outp += origsize;
115
+
116
+ if (format != AUX_BZIP3_V1_FILE_FORMAT) {
117
+ blockcount--;
118
+ }
119
+ }
120
+
121
+ bz3_free(bz3);
122
+
123
+ if (blockcount > 0) {
124
+ return BZ3_ERR_TRUNCATED_DATA;
125
+ }
126
+
127
+ *outsize = (size_t)(outp - (const char *)out);
128
+
129
+ return BZ3_OK;
130
+ }
131
+
132
+ static uint64_t
133
+ aux_scan_size(int format, const char *in, const char *const inend, int concat)
134
+ {
135
+ return 16 << 20; // FIXME!
136
+ }
137
+
138
+ static int
139
+ aux_io_read(VALUE io, size_t size, VALUE buf)
140
+ {
141
+ VALUE args[2] = { SIZET2NUM(size), buf };
142
+ VALUE ret = rb_funcallv(io, rb_intern("read"), 2, args);
143
+
144
+ if (RB_NIL_P(ret)) {
145
+ return 1;
146
+ } else if (ret != buf) {
147
+ rb_check_type(ret, RUBY_T_STRING);
148
+ rb_str_set_len(buf, 0);
149
+ rb_str_cat(buf, RSTRING_PTR(ret), RSTRING_LEN(ret));
150
+ }
151
+
152
+ return 0;
153
+ }
154
+
155
+ struct decoder
156
+ {
157
+ struct bz3_state *bzip3;
158
+ uint32_t blocksize;
159
+ int concat:1;
160
+ int firstread:1;
161
+ int closed:1;
162
+ int eof:1;
163
+ VALUE inport;
164
+ VALUE readbuf;
165
+ VALUE destbuf;
166
+ };
167
+
168
+ #define DECODER_FREE_BLOCK(P) \
169
+ if ((P)->bzip3) { \
170
+ bz3_free((P)->bzip3); \
171
+ } \
172
+
173
+ #define DECODER_VALUE_FOREACH(DEF) \
174
+ DEF(inport) \
175
+ DEF(readbuf) \
176
+ DEF(destbuf) \
177
+
178
+ AUX_DEFINE_TYPED_DATA(decoder, decoder_allocate, DECODER_FREE_BLOCK, DECODER_VALUE_FOREACH)
179
+
180
+ /*
181
+ * @overload initialize(blocksize: (16 << 20), concat: true)
182
+ */
183
+ static VALUE
184
+ decoder_initialize(int argc, VALUE argv[], VALUE self)
185
+ {
186
+ struct { VALUE inport, opts; } args;
187
+ rb_scan_args(argc, argv, "1:", &args.inport, &args.opts);
188
+
189
+ enum { numkw = 2 };
190
+ ID idtab[numkw] = { rb_intern("blocksize"), rb_intern("concat") };
191
+ union { struct { VALUE blocksize, concat; }; VALUE vect[numkw]; } opts;
192
+ rb_get_kwargs(args.opts, idtab, 0, numkw, opts.vect);
193
+
194
+ struct decoder *p = (struct decoder *)rb_check_typeddata(self, &decoder_type);
195
+ if (p == NULL || p->bzip3) {
196
+ rb_raise(rb_eTypeError, "wrong initialized or re-initializing - %" PRIsVALUE, self);
197
+ }
198
+
199
+ p->blocksize = aux_conv_to_blocksize(opts.blocksize);
200
+ p->inport = args.inport;
201
+ p->readbuf = Qnil;
202
+ p->destbuf = Qnil;
203
+ p->bzip3 = aux_bz3_new(p->blocksize);
204
+ p->firstread = 1;
205
+ p->concat = RB_UNDEF_P(opts.concat) || RTEST(opts.concat);
206
+
207
+ return self;
208
+ }
209
+ static int
210
+ decoder_read_block(VALUE self, struct decoder *p)
211
+ {
212
+ if (p->eof) {
213
+ return 1;
214
+ }
215
+
216
+ if (p->firstread) {
217
+ p->readbuf = rb_str_new(NULL, 0);
218
+
219
+ if (aux_io_read(p->inport, 9, p->readbuf) != 0) {
220
+ extbzip3_check_error(BZ3_ERR_MALFORMED_HEADER);
221
+ }
222
+
223
+ if (RSTRING_LEN(p->readbuf) < 9 || memcmp(RSTRING_PTR(p->readbuf), "BZ3v1", 5) != 0) {
224
+ extbzip3_check_error(BZ3_ERR_MALFORMED_HEADER);
225
+ }
226
+
227
+ uint32_t blocksize = loadu32le(RSTRING_PTR(p->readbuf) + 5);
228
+ if (blocksize < AUX_BZIP3_BLOCKSIZE_MIN || blocksize > AUX_BZIP3_BLOCKSIZE_MAX) {
229
+ extbzip3_check_error(BZ3_ERR_MALFORMED_HEADER);
230
+ }
231
+
232
+ if (blocksize > p->blocksize) {
233
+ rb_raise(rb_eRuntimeError, "initialize で指定した blocksize が小さすぎます (期待値 %d に対して実際は %d)", (int)p->blocksize, (int)blocksize);
234
+ }
235
+
236
+ p->firstread = 0;
237
+ }
238
+
239
+ for (;;) {
240
+ if (aux_io_read(p->inport, 8, p->readbuf) != 0) {
241
+ p->eof = 1;
242
+ return 1;
243
+ }
244
+
245
+ if (RSTRING_LEN(p->readbuf) < 8) {
246
+ extbzip3_check_error(BZ3_ERR_MALFORMED_HEADER);
247
+ }
248
+
249
+ if (memcmp(RSTRING_PTR(p->readbuf), "BZ3v1", 5) == 0) {
250
+ char workbuf[4];
251
+ memcpy(workbuf, RSTRING_PTR(p->readbuf) + 5, 3);
252
+ if (aux_io_read(p->inport, 1, p->readbuf) == 0) {
253
+ workbuf[3] = RSTRING_PTR(p->readbuf)[0];
254
+
255
+ uint32_t blocksize = loadu32le(workbuf);
256
+ if (blocksize < AUX_BZIP3_BLOCKSIZE_MIN || blocksize > AUX_BZIP3_BLOCKSIZE_MAX) {
257
+ extbzip3_check_error(BZ3_ERR_MALFORMED_HEADER);
258
+ }
259
+
260
+ if (blocksize > p->blocksize) {
261
+ rb_raise(rb_eRuntimeError, "initialize で指定した blocksize が小さすぎます (期待値 %d に対して実際は %d)", (int)p->blocksize, (int)blocksize);
262
+ }
263
+
264
+ if (p->concat) {
265
+ continue;
266
+ } else {
267
+ p->eof = 1;
268
+ break;
269
+ }
270
+ }
271
+
272
+ extbzip3_check_error(BZ3_ERR_MALFORMED_HEADER);
273
+ }
274
+
275
+ uint32_t packedsize = loadu32le(RSTRING_PTR(p->readbuf) + 0);
276
+ uint32_t originsize = loadu32le(RSTRING_PTR(p->readbuf) + 4);
277
+
278
+ rb_str_set_len(p->destbuf, 0);
279
+ rb_str_modify_expand(p->destbuf, originsize);
280
+
281
+ uint32_t needsize = packedsize;
282
+ while (needsize > 0) {
283
+ if (aux_io_read(p->inport, packedsize, p->readbuf) != 0) {
284
+ rb_raise(rb_eRuntimeError, "意図しない EOF");
285
+ } else if (RSTRING_LEN(p->readbuf) > packedsize) {
286
+ rb_raise(rb_eRuntimeError, "#<%" PRIsVALUE ":0x%" PRIxVALUE ">#read は %u バイトを超過して読み込みました",
287
+ rb_class_of(p->inport), p->inport, packedsize);
288
+ }
289
+
290
+ rb_str_cat(p->destbuf, RSTRING_PTR(p->readbuf), RSTRING_LEN(p->readbuf));
291
+ needsize -= RSTRING_LEN(p->readbuf);
292
+ }
293
+
294
+ int32_t ret = aux_bz3_decode_block_nogvl(p->bzip3, RSTRING_PTR(p->destbuf), packedsize, originsize);
295
+ extbzip3_check_error(ret);
296
+
297
+ rb_str_set_len(p->destbuf, originsize);
298
+
299
+ break;
300
+ }
301
+
302
+ return 0;
303
+ }
304
+
305
+ /*
306
+ * @overload read(size = nil, dest = "")
307
+ */
308
+ static VALUE
309
+ decoder_read(int argc, VALUE argv[], VALUE self)
310
+ {
311
+ struct { VALUE size, dest; } args;
312
+ switch (rb_scan_args(argc, argv, "02", &args.size, &args.dest)) {
313
+ case 0:
314
+ args.size = Qnil;
315
+ args.dest = rb_str_new(NULL, 0);
316
+ break;
317
+ case 1:
318
+ args.dest = rb_str_new(NULL, 0);
319
+ break;
320
+ case 2:
321
+ rb_check_type(args.dest, RUBY_T_STRING);
322
+ rb_str_modify(args.dest);
323
+ rb_str_set_len(args.dest, 0);
324
+ break;
325
+ }
326
+
327
+ size_t size;
328
+ if (RB_NIL_P(args.size)) {
329
+ size = -1;
330
+ } else {
331
+ size = NUM2SIZET(args.size);
332
+ }
333
+
334
+ struct decoder *p = get_decoder(self);
335
+
336
+ if (p->closed) {
337
+ rb_raise(rb_eRuntimeError, "closed stream - %" PRIsVALUE, self);
338
+ }
339
+
340
+ if (size < 1) {
341
+ return args.dest;
342
+ } else {
343
+ RUBY_ASSERT_ALWAYS(rb_type_p(args.dest, RUBY_T_STRING) && RSTRING_LEN(args.dest) == 0);
344
+ RUBY_ASSERT_ALWAYS(RB_NIL_P(p->destbuf) || rb_type_p(p->destbuf, RUBY_T_STRING));
345
+
346
+ if (RB_NIL_P(p->destbuf)) {
347
+ p->destbuf = rb_str_new(NULL, 0);
348
+ }
349
+
350
+ for (;;) {
351
+ if ((size_t)RSTRING_LEN(p->destbuf) >= size) {
352
+ rb_str_cat(args.dest, RSTRING_PTR(p->destbuf), size);
353
+ memmove(RSTRING_PTR(p->destbuf), RSTRING_PTR(p->destbuf) + size, RSTRING_LEN(p->destbuf) - size);
354
+ rb_str_set_len(p->destbuf, RSTRING_LEN(p->destbuf) - size);
355
+
356
+ break;
357
+ }
358
+
359
+ size -= RSTRING_LEN(p->destbuf);
360
+ rb_str_cat(args.dest, RSTRING_PTR(p->destbuf), RSTRING_LEN(p->destbuf));
361
+ rb_str_set_len(p->destbuf, 0);
362
+
363
+ if (decoder_read_block(self, p) != 0) {
364
+ break;
365
+ }
366
+ }
367
+
368
+ return (RSTRING_LEN(args.dest) > 0 ? args.dest : Qnil);
369
+ }
370
+ }
371
+
372
+ static VALUE
373
+ decoder_close(VALUE self)
374
+ {
375
+ struct decoder *p = get_decoder(self);
376
+
377
+ if (p->closed) {
378
+ rb_raise(rb_eRuntimeError, "closed stream - %" PRIsVALUE, self);
379
+ }
380
+
381
+ p->closed = 1;
382
+
383
+ return Qnil;
384
+ }
385
+
386
+ static VALUE
387
+ decoder_closed(VALUE self)
388
+ {
389
+ return (get_decoder(self)->closed ? Qtrue : Qfalse);
390
+ }
391
+
392
+ static VALUE
393
+ decoder_eof(VALUE self)
394
+ {
395
+ return (get_decoder(self)->eof ? Qtrue : Qfalse);
396
+ }
397
+
398
+ /*
399
+ * @overload decode(src, maxdest = nil, dest = "", **opts)
400
+ * @overload decode(src, dest, **opts)
401
+ *
402
+ * decode bzip3 sequence.
403
+ *
404
+ * @param src [String] describe bzip3 sequence
405
+ * @param maxdest [Integer] describe maximum dest size
406
+ * @param dest [String] describe destination
407
+ * @param opts [Hash]
408
+ * @option opts [true, false] :concat (true)
409
+ * @option opts [true, false] :partial (false)
410
+ * @option opts [Integer] :blocksize ((16 << 20))
411
+ * 最大ブロックサイズを記述します。
412
+ * @option opts :format (Bzip3::V1_FILE_FORMAT)
413
+ * Bzip3::V1_FILE_FORMAT, Bzip3::V1_FRAME_FORMAT
414
+ * @return [String] dest for decoded bzip3
415
+ */
416
+ static VALUE
417
+ decoder_s_decode(int argc, VALUE argv[], VALUE mod)
418
+ {
419
+ size_t insize, outsize;
420
+ struct { VALUE src, maxdest, dest, opts; } args;
421
+ argc = rb_scan_args(argc, argv, "12:", &args.src, &args.maxdest, &args.dest, &args.opts);
422
+
423
+ enum { numkw = 4 };
424
+ ID idtab[numkw] = { rb_intern("concat"), rb_intern("partial"), rb_intern("blocksize"), rb_intern("format") };
425
+ union { struct { VALUE concat, partial, blocksize, format; }; VALUE vect[numkw]; } opts;
426
+ rb_get_kwargs(args.opts, idtab, 0, numkw, opts.vect);
427
+
428
+ switch (argc) {
429
+ case 1:
430
+ insize = RSTRING_LEN(args.src);
431
+ outsize = aux_scan_size(aux_conv_to_format(opts.format), RSTRING_PTR(args.src), RSTRING_END(args.src), RB_UNDEF_P(opts.concat) || RTEST(opts.concat));
432
+ args.dest = rb_str_buf_new(outsize);
433
+ break;
434
+ case 2:
435
+ insize = RSTRING_LEN(args.src);
436
+
437
+ if (rb_type_p(args.maxdest, RUBY_T_FIXNUM) || rb_type_p(args.maxdest, RUBY_T_BIGNUM)) {
438
+ outsize = NUM2SIZET(args.maxdest);
439
+ args.dest = rb_str_buf_new(outsize);
440
+ } else {
441
+ args.dest = args.maxdest;
442
+ outsize = aux_scan_size(aux_conv_to_format(opts.format), RSTRING_PTR(args.src), RSTRING_END(args.src), RB_UNDEF_P(opts.concat) || RTEST(opts.concat));
443
+ rb_str_modify(args.dest);
444
+ rb_str_set_len(args.dest, 0);
445
+ rb_str_modify_expand(args.dest, outsize);
446
+ }
447
+
448
+ break;
449
+ case 3:
450
+ insize = RSTRING_LEN(args.src);
451
+ outsize = NUM2SIZET(args.maxdest);
452
+ rb_str_modify(args.dest);
453
+ rb_str_set_len(args.dest, 0);
454
+ rb_str_modify_expand(args.dest, outsize);
455
+
456
+ break;
457
+ }
458
+
459
+ // TODO: maxdest, partial
460
+
461
+ int status = aux_oneshot_decode(RSTRING_PTR(args.src), RSTRING_PTR(args.dest), insize, &outsize,
462
+ aux_conv_to_format(opts.format),
463
+ (RB_NIL_OR_UNDEF_P(opts.blocksize) ? (16 << 20) : NUM2INT(opts.blocksize)),
464
+ RB_UNDEF_P(opts.concat) || RTEST(opts.concat));
465
+ extbzip3_check_error(status);
466
+
467
+ rb_str_set_len(args.dest, outsize);
468
+
469
+ return args.dest;
470
+ }
471
+
472
+ void
473
+ extbzip3_init_decoder(VALUE bzip3_module)
474
+ {
475
+ VALUE decoder_class = rb_define_class_under(bzip3_module, "Decoder", rb_cObject);
476
+ rb_define_alloc_func(decoder_class, decoder_allocate);
477
+ rb_define_singleton_method(decoder_class, "decode", decoder_s_decode, -1);
478
+ rb_define_method(decoder_class, "initialize", decoder_initialize, -1);
479
+ rb_define_method(decoder_class, "read", decoder_read, -1);
480
+ rb_define_method(decoder_class, "close", decoder_close, 0);
481
+ rb_define_method(decoder_class, "closed?", decoder_closed, 0);
482
+ rb_define_method(decoder_class, "eof?", decoder_eof, 0);
483
+ rb_define_alias(decoder_class, "eof", "eof?");
484
+ }