bzip2-ruby-rb20 0.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,55 @@
1
+ #include <ruby.h>
2
+ #include <bzlib.h>
3
+
4
+ #include "common.h"
5
+
6
+ void bz_file_mark(struct bz_file * bzf) {
7
+ rb_gc_mark(bzf->io);
8
+ rb_gc_mark(bzf->in);
9
+ }
10
+
11
+ void * bz_malloc(void *opaque, int m, int n) {
12
+ return malloc(m * n);
13
+ }
14
+
15
+ void bz_free(void *opaque, void *p) {
16
+ free(p);
17
+ }
18
+
19
+ VALUE bz_raise(int error) {
20
+ VALUE exc;
21
+ const char *msg;
22
+
23
+ exc = bz_eError;
24
+ switch (error) {
25
+ case BZ_SEQUENCE_ERROR:
26
+ msg = "incorrect sequence";
27
+ break;
28
+ case BZ_PARAM_ERROR:
29
+ msg = "parameter out of range";
30
+ break;
31
+ case BZ_MEM_ERROR:
32
+ msg = "not enough memory is available";
33
+ break;
34
+ case BZ_DATA_ERROR:
35
+ msg = "data integrity error is detected";
36
+ break;
37
+ case BZ_DATA_ERROR_MAGIC:
38
+ msg = "compressed stream does not start with the correct magic bytes";
39
+ break;
40
+ case BZ_IO_ERROR:
41
+ msg = "error reading or writing";
42
+ break;
43
+ case BZ_UNEXPECTED_EOF:
44
+ exc = bz_eEOZError;
45
+ msg = "compressed file finishes before the logical end of stream is detected";
46
+ break;
47
+ case BZ_OUTBUFF_FULL:
48
+ msg = "output buffer full";
49
+ break;
50
+ default:
51
+ msg = "unknown error";
52
+ exc = bz_eError;
53
+ }
54
+ rb_raise(exc, "%s", msg);
55
+ }
@@ -0,0 +1,76 @@
1
+ #ifndef _RB_BZIP2_COMMON_H_
2
+ #define _RB_BZIP2_COMMON_H_
3
+
4
+ #include <ruby.h>
5
+ #include <bzlib.h>
6
+
7
+ #ifndef RUBY_19_COMPATIBILITY
8
+ # include <rubyio.h>
9
+ # include <version.h>
10
+ #else
11
+ # include <ruby/io.h>
12
+ #endif
13
+
14
+ #define BZ2_RB_CLOSE 1
15
+ #define BZ2_RB_INTERNAL 2
16
+
17
+ #define BZ_RB_BLOCKSIZE 4096
18
+ #define DEFAULT_BLOCKS 9
19
+ #define ASIZE (1 << CHAR_BIT)
20
+
21
+ /* Older versions of Ruby (< 1.8.6) need these */
22
+ #ifndef RSTRING_PTR
23
+ # define RSTRING_PTR(s) (RSTRING(s)->ptr)
24
+ #endif
25
+ #ifndef RSTRING_LEN
26
+ # define RSTRING_LEN(s) (RSTRING(s)->len)
27
+ #endif
28
+ #ifndef RARRAY_PTR
29
+ # define RARRAY_PTR(s) (RARRAY(s)->ptr)
30
+ #endif
31
+ #ifndef RARRAY_LEN
32
+ # define RARRAY_LEN(s) (RARRAY(s)->len)
33
+ #endif
34
+
35
+ struct bz_file {
36
+ bz_stream bzs;
37
+ VALUE in, io;
38
+ char *buf;
39
+ unsigned int buflen;
40
+ int blocks, work, small;
41
+ int flags, lineno, state;
42
+ };
43
+
44
+ struct bz_str {
45
+ VALUE str;
46
+ int pos;
47
+ };
48
+
49
+ struct bz_iv {
50
+ VALUE bz2, io;
51
+ void (*finalize)();
52
+ };
53
+
54
+ #define Get_BZ2(obj, bzf) \
55
+ rb_io_taint_check(obj); \
56
+ Data_Get_Struct(obj, struct bz_file, bzf); \
57
+ if (!RTEST(bzf->io)) { \
58
+ rb_raise(rb_eIOError, "closed IO"); \
59
+ }
60
+
61
+ #ifndef ASDFasdf
62
+ extern VALUE bz_cWriter, bz_cReader, bz_cInternal;
63
+ extern VALUE bz_eError, bz_eEOZError;
64
+
65
+ extern VALUE bz_internal_ary;
66
+
67
+ extern ID id_new, id_write, id_open, id_flush, id_read;
68
+ extern ID id_closed, id_close, id_str;
69
+ #endif
70
+
71
+ void bz_file_mark(struct bz_file * bzf);
72
+ void* bz_malloc(void *opaque, int m, int n);
73
+ void bz_free(void *opaque, void *p);
74
+ VALUE bz_raise(int err);
75
+
76
+ #endif
@@ -0,0 +1,21 @@
1
+ # encoding: UTF-8
2
+ require 'mkmf'
3
+ dir_config('bz2')
4
+ have_header('bzlib.h')
5
+
6
+ $CFLAGS << ' -Wall -Wextra -Wno-unused -funroll-loops '
7
+ # $CFLAGS << ' -O0 -ggdb -Wextra'
8
+
9
+ if have_library("bz2", "BZ2_bzWriteOpen")
10
+ if enable_config("shared", true)
11
+ $static = nil
12
+ end
13
+
14
+ if RUBY_VERSION.to_f >= 1.9
15
+ $CFLAGS << ' -DRUBY_19_COMPATIBILITY'
16
+ end
17
+
18
+ create_makefile('bzip2/bzip2')
19
+ else
20
+ puts "libbz2 not found, maybe try manually specifying --with-bz2-dir to find it?"
21
+ end
@@ -0,0 +1,1032 @@
1
+ #include <bzlib.h>
2
+ #include <ruby.h>
3
+
4
+ #include "reader.h"
5
+ #include "common.h"
6
+
7
+ void bz_str_mark(struct bz_str *bzs) {
8
+ rb_gc_mark(bzs->str);
9
+ }
10
+
11
+ struct bz_file * bz_get_bzf(VALUE obj) {
12
+ struct bz_file *bzf;
13
+
14
+ Get_BZ2(obj, bzf);
15
+ if (!bzf->buf) {
16
+ if (bzf->state != BZ_OK) {
17
+ bz_raise(bzf->state);
18
+ }
19
+ bzf->state = BZ2_bzDecompressInit(&(bzf->bzs), 0, bzf->small);
20
+ if (bzf->state != BZ_OK) {
21
+ BZ2_bzDecompressEnd(&(bzf->bzs));
22
+ bz_raise(bzf->state);
23
+ }
24
+ bzf->buf = ALLOC_N(char, BZ_RB_BLOCKSIZE + 1);
25
+ bzf->buflen = BZ_RB_BLOCKSIZE;
26
+ bzf->buf[0] = bzf->buf[bzf->buflen] = '\0';
27
+ bzf->bzs.total_out_hi32 = bzf->bzs.total_out_lo32 = 0;
28
+ bzf->bzs.next_out = bzf->buf;
29
+ bzf->bzs.avail_out = 0;
30
+ }
31
+ if (bzf->state == BZ_STREAM_END && !bzf->bzs.avail_out) {
32
+ return 0;
33
+ }
34
+ return bzf;
35
+ }
36
+
37
+ int bz_next_available(struct bz_file *bzf, int in){
38
+ bzf->bzs.next_out = bzf->buf;
39
+ bzf->bzs.avail_out = 0;
40
+ if (bzf->state == BZ_STREAM_END) {
41
+ return BZ_STREAM_END;
42
+ }
43
+ if (!bzf->bzs.avail_in) {
44
+ bzf->in = rb_funcall(bzf->io, id_read, 1, INT2FIX(1024));
45
+ if (TYPE(bzf->in) != T_STRING || RSTRING_LEN(bzf->in) == 0) {
46
+ BZ2_bzDecompressEnd(&(bzf->bzs));
47
+ bzf->bzs.avail_out = 0;
48
+ bzf->state = BZ_UNEXPECTED_EOF;
49
+ bz_raise(bzf->state);
50
+ }
51
+ bzf->bzs.next_in = RSTRING_PTR(bzf->in);
52
+ bzf->bzs.avail_in = (int) RSTRING_LEN(bzf->in);
53
+ }
54
+ if ((bzf->buflen - in) < (BZ_RB_BLOCKSIZE / 2)) {
55
+ bzf->buf = REALLOC_N(bzf->buf, char, bzf->buflen+BZ_RB_BLOCKSIZE+1);
56
+ bzf->buflen += BZ_RB_BLOCKSIZE;
57
+ bzf->buf[bzf->buflen] = '\0';
58
+ }
59
+ bzf->bzs.avail_out = bzf->buflen - in;
60
+ bzf->bzs.next_out = bzf->buf + in;
61
+ bzf->state = BZ2_bzDecompress(&(bzf->bzs));
62
+ if (bzf->state != BZ_OK) {
63
+ BZ2_bzDecompressEnd(&(bzf->bzs));
64
+ if (bzf->state != BZ_STREAM_END) {
65
+ bzf->bzs.avail_out = 0;
66
+ bz_raise(bzf->state);
67
+ }
68
+ }
69
+ bzf->bzs.avail_out = bzf->buflen - bzf->bzs.avail_out;
70
+ bzf->bzs.next_out = bzf->buf;
71
+ return 0;
72
+ }
73
+
74
+ VALUE bz_read_until(struct bz_file *bzf, const char *str, int len, int *td1) {
75
+ VALUE res;
76
+ int total, i, nex = 0;
77
+ char *p, *t, *tx, *end, *pend = ((char*) str) + len;
78
+
79
+ res = rb_str_new(0, 0);
80
+ while (1) {
81
+ total = bzf->bzs.avail_out;
82
+ if (len == 1) {
83
+ tx = memchr(bzf->bzs.next_out, *str, bzf->bzs.avail_out);
84
+ if (tx) {
85
+ i = (int)(tx - bzf->bzs.next_out + len);
86
+ res = rb_str_cat(res, bzf->bzs.next_out, i);
87
+ bzf->bzs.next_out += i;
88
+ bzf->bzs.avail_out -= i;
89
+ return res;
90
+ }
91
+ } else {
92
+ tx = bzf->bzs.next_out;
93
+ end = bzf->bzs.next_out + bzf->bzs.avail_out;
94
+ while (tx + len <= end) {
95
+ for (p = (char*) str, t = tx; p != pend; ++p, ++t) {
96
+ if (*p != *t) break;
97
+ }
98
+ if (p == pend) {
99
+ i = (int)(tx - bzf->bzs.next_out + len);
100
+ res = rb_str_cat(res, bzf->bzs.next_out, i);
101
+ bzf->bzs.next_out += i;
102
+ bzf->bzs.avail_out -= i;
103
+ return res;
104
+ }
105
+ if (td1) {
106
+ tx += td1[(int)*(tx + len)];
107
+ } else {
108
+ tx += 1;
109
+ }
110
+ }
111
+ }
112
+ nex = 0;
113
+ if (total) {
114
+ nex = len - 1;
115
+ res = rb_str_cat(res, bzf->bzs.next_out, total - nex);
116
+ if (nex) {
117
+ MEMMOVE(bzf->buf, bzf->bzs.next_out + total - nex, char, nex);
118
+ }
119
+ }
120
+ if (bz_next_available(bzf, nex) == BZ_STREAM_END) {
121
+ if (nex) {
122
+ res = rb_str_cat(res, bzf->buf, nex);
123
+ }
124
+ if (RSTRING_LEN(res)) {
125
+ return res;
126
+ }
127
+ return Qnil;
128
+ }
129
+ }
130
+ return Qnil;
131
+ }
132
+
133
+ /**
134
+ * Reads a stream as long as the next character is equal to the specified
135
+ * character
136
+ *
137
+ * Returns the next character in the sequence that's not the same as the one
138
+ * given or EOF if it's there until the end of the file.
139
+ */
140
+ int bz_read_while(struct bz_file *bzf, char c) {
141
+ char *end;
142
+
143
+ while (1) {
144
+ end = bzf->bzs.next_out + bzf->bzs.avail_out;
145
+ while (bzf->bzs.next_out < end) {
146
+ if (c != *bzf->bzs.next_out) {
147
+ return *bzf->bzs.next_out;
148
+ }
149
+ ++bzf->bzs.next_out;
150
+ --bzf->bzs.avail_out;
151
+ }
152
+ if (bz_next_available(bzf, 0) == BZ_STREAM_END) {
153
+ return EOF;
154
+ }
155
+ }
156
+ return EOF;
157
+ }
158
+
159
+ /*
160
+ * Internally allocates data for a new Reader
161
+ * @private
162
+ */
163
+ VALUE bz_reader_s_alloc(VALUE obj) {
164
+ struct bz_file *bzf;
165
+ VALUE res;
166
+ res = Data_Make_Struct(obj, struct bz_file, bz_file_mark, free, bzf);
167
+ bzf->bzs.bzalloc = bz_malloc;
168
+ bzf->bzs.bzfree = bz_free;
169
+ bzf->blocks = DEFAULT_BLOCKS;
170
+ bzf->state = BZ_OK;
171
+ return res;
172
+ }
173
+
174
+ VALUE bz_reader_close __((VALUE));
175
+
176
+ /*
177
+ * call-seq:
178
+ * open(filename, &block=nil) -> Bzip2::Reader
179
+ *
180
+ * @param [String] filename the name of the file to read from
181
+ * @yieldparam [Bzip2::Reader] reader the Bzip2::Reader instance
182
+ *
183
+ * If a block is given, the created Bzip2::Reader instance is yielded to the
184
+ * block and will be closed when the block completes. It is guaranteed via
185
+ * +ensure+ that the reader is closed
186
+ *
187
+ * If a block is not given, a Bzip2::Reader instance will be returned
188
+ *
189
+ * Bzip2::Reader.open('file') { |f| puts f.gets }
190
+ *
191
+ * reader = Bzip2::Reader.open('file')
192
+ * puts reader.gets
193
+ * reader.close
194
+ *
195
+ * @return [Bzip2::Reader, nil]
196
+ */
197
+ VALUE bz_reader_s_open(int argc, VALUE *argv, VALUE obj) {
198
+ VALUE res;
199
+ struct bz_file *bzf;
200
+
201
+ if (argc < 1) {
202
+ rb_raise(rb_eArgError, "invalid number of arguments");
203
+ }
204
+ argv[0] = rb_funcall2(rb_mKernel, id_open, 1, argv);
205
+ if (NIL_P(argv[0])) {
206
+ return Qnil;
207
+ }
208
+ res = rb_funcall2(obj, id_new, argc, argv);
209
+ Data_Get_Struct(res, struct bz_file, bzf);
210
+ bzf->flags |= BZ2_RB_CLOSE;
211
+ if (rb_block_given_p()) {
212
+ return rb_ensure(rb_yield, res, bz_reader_close, res);
213
+ }
214
+ return res;
215
+ }
216
+
217
+ /*
218
+ * call-seq:
219
+ * initialize(io)
220
+ *
221
+ * Creates a new stream for reading a bzip file or string
222
+ *
223
+ * @param [File, string, #read] io the source for input data. If the source is
224
+ * a file or something responding to #read, then data will be read via #read,
225
+ * otherwise if the input is a string it will be taken as the literal data
226
+ * to decompress
227
+ */
228
+ VALUE bz_reader_init(int argc, VALUE *argv, VALUE obj) {
229
+ struct bz_file *bzf;
230
+ int small = 0;
231
+ VALUE a, b;
232
+ int internal = 0;
233
+
234
+ if (rb_scan_args(argc, argv, "11", &a, &b) == 2) {
235
+ small = RTEST(b);
236
+ }
237
+ rb_io_taint_check(a);
238
+ if (OBJ_TAINTED(a)) {
239
+ OBJ_TAINT(obj);
240
+ }
241
+ if (rb_respond_to(a, id_read)) {
242
+ if (TYPE(a) == T_FILE) {
243
+ #ifndef RUBY_19_COMPATIBILITY
244
+ OpenFile *fptr;
245
+ #else
246
+ rb_io_t *fptr;
247
+ #endif
248
+
249
+ GetOpenFile(a, fptr);
250
+ rb_io_check_readable(fptr);
251
+ } else if (rb_respond_to(a, id_closed)) {
252
+ VALUE iv = rb_funcall2(a, id_closed, 0, 0);
253
+ if (RTEST(iv)) {
254
+ rb_raise(rb_eArgError, "closed object");
255
+ }
256
+ }
257
+ } else {
258
+ struct bz_str *bzs;
259
+ VALUE res;
260
+
261
+ if (!rb_respond_to(a, id_str)) {
262
+ rb_raise(rb_eArgError, "first argument must respond to #read");
263
+ }
264
+ a = rb_funcall2(a, id_str, 0, 0);
265
+ if (TYPE(a) != T_STRING) {
266
+ rb_raise(rb_eArgError, "#to_str must return a String");
267
+ }
268
+ res = Data_Make_Struct(bz_cInternal, struct bz_str,
269
+ bz_str_mark, free, bzs);
270
+ bzs->str = a;
271
+ a = res;
272
+ internal = BZ2_RB_INTERNAL;
273
+ }
274
+ Data_Get_Struct(obj, struct bz_file, bzf);
275
+ bzf->io = a;
276
+ bzf->small = small;
277
+ bzf->flags |= internal;
278
+ return obj;
279
+ }
280
+
281
+ /*
282
+ * call-seq:
283
+ * read(len = nil)
284
+ *
285
+ * Read decompressed data from the stream.
286
+ *
287
+ * Bzip2::Reader.new(Bzip2.compress('ab')).read # => "ab"
288
+ * Bzip2::Reader.new(Bzip2.compress('ab')).read(1) # => "a"
289
+ *
290
+ * @return [String, nil] the decompressed data read or +nil+ if eoz has been
291
+ * reached
292
+ * @param [Integer] len the number of decompressed bytes which should be read.
293
+ * If nothing is specified, the entire stream is read
294
+ */
295
+ VALUE bz_reader_read(int argc, VALUE *argv, VALUE obj) {
296
+ struct bz_file *bzf;
297
+ VALUE res, length;
298
+ int total;
299
+ int n;
300
+
301
+ rb_scan_args(argc, argv, "01", &length);
302
+ if (NIL_P(length)) {
303
+ n = -1;
304
+ } else {
305
+ n = NUM2INT(length);
306
+ if (n < 0) {
307
+ rb_raise(rb_eArgError, "negative length %d given", n);
308
+ }
309
+ }
310
+ bzf = bz_get_bzf(obj);
311
+ if (!bzf) {
312
+ return Qnil;
313
+ }
314
+ res = rb_str_new(0, 0);
315
+ if (OBJ_TAINTED(obj)) {
316
+ OBJ_TAINT(res);
317
+ }
318
+ if (n == 0) {
319
+ free(bzf->buf);
320
+ return res;
321
+ }
322
+ while (1) {
323
+ total = bzf->bzs.avail_out;
324
+ if (n != -1 && (RSTRING_LEN(res) + total) >= n) {
325
+ n -= (int) RSTRING_LEN(res);
326
+ res = rb_str_cat(res, bzf->bzs.next_out, n);
327
+ bzf->bzs.next_out += n;
328
+ bzf->bzs.avail_out -= n;
329
+ free(bzf->buf);
330
+ return res;
331
+ }
332
+ if (total) {
333
+ res = rb_str_cat(res, bzf->bzs.next_out, total);
334
+ }
335
+ if (bz_next_available(bzf, 0) == BZ_STREAM_END) {
336
+ free(bzf->buf);
337
+ return res;
338
+ }
339
+ }
340
+ return Qnil;
341
+ }
342
+
343
+ int bz_getc(VALUE obj) {
344
+ VALUE length = INT2FIX(1);
345
+ VALUE res = bz_reader_read(1, &length, obj);
346
+ if (NIL_P(res) || RSTRING_LEN(res) == 0) {
347
+ return EOF;
348
+ }
349
+ return RSTRING_PTR(res)[0];
350
+ }
351
+
352
+ /*
353
+ * call-seq:
354
+ * ungetc(byte)
355
+ *
356
+ * "Ungets" a character/byte. This rewinds the stream by 1 character and inserts
357
+ * the given character into that position. The next read will return the given
358
+ * character as the first one read
359
+ *
360
+ * reader = Bzip2::Reader.new Bzip2.compress('abc')
361
+ * reader.getc # => 97
362
+ * reader.ungetc 97 # => nil
363
+ * reader.getc # => 97
364
+ * reader.ungetc 42 # => nil
365
+ * reader.getc # => 42
366
+ * reader.getc # => 98
367
+ * reader.getc # => 99
368
+ * reader.ungetc 100 # => nil
369
+ * reader.getc # => 100
370
+ *
371
+ * @param [Integer] byte the byte to 'unget'
372
+ * @return [nil] always
373
+ */
374
+ VALUE bz_reader_ungetc(VALUE obj, VALUE a) {
375
+ struct bz_file *bzf;
376
+ int c = NUM2INT(a);
377
+
378
+ Get_BZ2(obj, bzf);
379
+ if (!bzf->buf) {
380
+ bz_raise(BZ_SEQUENCE_ERROR);
381
+ }
382
+ if (bzf->bzs.avail_out < bzf->buflen) {
383
+ bzf->bzs.next_out -= 1;
384
+ bzf->bzs.next_out[0] = c;
385
+ bzf->bzs.avail_out += 1;
386
+ } else {
387
+ bzf->buf = REALLOC_N(bzf->buf, char, bzf->buflen + 2);
388
+ bzf->buf[bzf->buflen++] = c;
389
+ bzf->buf[bzf->buflen] = '\0';
390
+ bzf->bzs.next_out = bzf->buf;
391
+ bzf->bzs.avail_out = bzf->buflen;
392
+ }
393
+ return Qnil;
394
+ }
395
+
396
+ /*
397
+ * call-seq:
398
+ * ungets(str)
399
+ *
400
+ * Equivalently "unget" a string. When called on a string that was just read
401
+ * from the stream, this inserts the string back into the stream to br read
402
+ * again.
403
+ *
404
+ * When called with a string which hasn't been read from the stream, it does
405
+ * the same thing, and the next read line/data will start from the beginning
406
+ * of the given data and the continue on with the rest of the stream.
407
+ *
408
+ * reader = Bzip2::Reader.new Bzip2.compress("a\nb")
409
+ * reader.gets # => "a\n"
410
+ * reader.ungets "a\n" # => nil
411
+ * reader.gets # => "a\n"
412
+ * reader.ungets "foo" # => nil
413
+ * reader.gets # => "foob"
414
+ *
415
+ * @param [String] str the string to insert back into the stream
416
+ * @return [nil] always
417
+ */
418
+ VALUE bz_reader_ungets(VALUE obj, VALUE a) {
419
+ struct bz_file *bzf;
420
+
421
+ Check_Type(a, T_STRING);
422
+ Get_BZ2(obj, bzf);
423
+ if (!bzf->buf) {
424
+ bz_raise(BZ_SEQUENCE_ERROR);
425
+ }
426
+ if ((bzf->bzs.avail_out + RSTRING_LEN(a)) < bzf->buflen) {
427
+ bzf->bzs.next_out -= RSTRING_LEN(a);
428
+ MEMCPY(bzf->bzs.next_out, RSTRING_PTR(a), char, RSTRING_LEN(a));
429
+ bzf->bzs.avail_out += (int) RSTRING_LEN(a);
430
+ } else {
431
+ bzf->buf = REALLOC_N(bzf->buf, char, bzf->buflen + RSTRING_LEN(a) + 1);
432
+ MEMCPY(bzf->buf + bzf->buflen, RSTRING_PTR(a), char,RSTRING_LEN(a));
433
+ bzf->buflen += (int) RSTRING_LEN(a);
434
+ bzf->buf[bzf->buflen] = '\0';
435
+ bzf->bzs.next_out = bzf->buf;
436
+ bzf->bzs.avail_out = bzf->buflen;
437
+ }
438
+ return Qnil;
439
+ }
440
+
441
+ VALUE bz_reader_gets(VALUE obj) {
442
+ struct bz_file *bzf;
443
+ VALUE str = Qnil;
444
+
445
+ bzf = bz_get_bzf(obj);
446
+ if (bzf) {
447
+ str = bz_read_until(bzf, "\n", 1, 0);
448
+ if (!NIL_P(str)) {
449
+ bzf->lineno++;
450
+ OBJ_TAINT(str);
451
+ }
452
+ }
453
+ return str;
454
+ }
455
+
456
+ VALUE bz_reader_gets_internal(int argc, VALUE *argv, VALUE obj, int *td, int init) {
457
+ struct bz_file *bzf;
458
+ VALUE rs, res;
459
+ const char *rsptr;
460
+ int rslen, rspara, *td1;
461
+
462
+ rs = rb_rs;
463
+ if (argc) {
464
+ rb_scan_args(argc, argv, "1", &rs);
465
+ if (!NIL_P(rs)) {
466
+ Check_Type(rs, T_STRING);
467
+ }
468
+ }
469
+ if (NIL_P(rs)) {
470
+ return bz_reader_read(1, &rs, obj);
471
+ }
472
+ rslen = (int) RSTRING_LEN(rs);
473
+ if (rs == rb_default_rs || (rslen == 1 && RSTRING_PTR(rs)[0] == '\n')) {
474
+ return bz_reader_gets(obj);
475
+ }
476
+
477
+ if (rslen == 0) {
478
+ rsptr = "\n\n";
479
+ rslen = 2;
480
+ rspara = 1;
481
+ } else {
482
+ rsptr = RSTRING_PTR(rs);
483
+ rspara = 0;
484
+ }
485
+
486
+ bzf = bz_get_bzf(obj);
487
+ if (!bzf) {
488
+ return Qnil;
489
+ }
490
+ if (rspara) {
491
+ bz_read_while(bzf, '\n');
492
+ }
493
+ td1 = 0;
494
+ if (rslen != 1) {
495
+ if (init) {
496
+ int i;
497
+
498
+ for (i = 0; i < ASIZE; i++) {
499
+ td[i] = rslen + 1;
500
+ }
501
+ for (i = 0; i < rslen; i++) {
502
+ td[(int)*(rsptr + i)] = rslen - i;
503
+ }
504
+ }
505
+ td1 = td;
506
+ }
507
+
508
+ res = bz_read_until(bzf, rsptr, rslen, td1);
509
+ if (rspara) {
510
+ bz_read_while(bzf, '\n');
511
+ }
512
+
513
+ if (!NIL_P(res)) {
514
+ bzf->lineno++;
515
+ OBJ_TAINT(res);
516
+ }
517
+ return res;
518
+ }
519
+
520
+ /*
521
+ * Specs were missing for this method originally and playing around with it
522
+ * gave some very odd results, so unless you know what you're doing, I wouldn't
523
+ * mess around with this...
524
+ */
525
+ VALUE bz_reader_set_unused(VALUE obj, VALUE a) {
526
+ struct bz_file *bzf;
527
+
528
+ Check_Type(a, T_STRING);
529
+ Get_BZ2(obj, bzf);
530
+ if (!bzf->in) {
531
+ bzf->in = rb_str_new(RSTRING_PTR(a), RSTRING_LEN(a));
532
+ } else {
533
+ bzf->in = rb_str_cat(bzf->in, RSTRING_PTR(a), RSTRING_LEN(a));
534
+ }
535
+ bzf->bzs.next_in = RSTRING_PTR(bzf->in);
536
+ bzf->bzs.avail_in = (int) RSTRING_LEN(bzf->in);
537
+ return Qnil;
538
+ }
539
+
540
+ /*
541
+ * Reads one character from the stream, returning the byte read.
542
+ *
543
+ * reader = Bzip2::Reader.new Bzip2.compress('ab')
544
+ * reader.getc # => 97
545
+ * reader.getc # => 98
546
+ * reader.getc # => nil
547
+ *
548
+ * @return [Integer, nil] the byte value of the character read or +nil+ if eoz
549
+ * has been reached
550
+ */
551
+ VALUE bz_reader_getc(VALUE obj) {
552
+ VALUE str;
553
+ VALUE len = INT2FIX(1);
554
+
555
+ str = bz_reader_read(1, &len, obj);
556
+ if (NIL_P(str) || RSTRING_LEN(str) == 0) {
557
+ return Qnil;
558
+ }
559
+ return INT2FIX(RSTRING_PTR(str)[0] & 0xff);
560
+ }
561
+
562
+ void bz_eoz_error() {
563
+ rb_raise(bz_eEOZError, "End of Zip component reached");
564
+ }
565
+
566
+ /*
567
+ * Performs the same as Bzip2::Reader#getc except Bzip2::EOZError is raised if
568
+ * eoz has been readhed
569
+ *
570
+ * @raise [Bzip2::EOZError] if eoz has been reached
571
+ */
572
+ VALUE bz_reader_readchar(VALUE obj) {
573
+ VALUE res = bz_reader_getc(obj);
574
+
575
+ if (NIL_P(res)) {
576
+ bz_eoz_error();
577
+ }
578
+ return res;
579
+ }
580
+
581
+ /*
582
+ * call-seq:
583
+ * gets(sep = "\n")
584
+ *
585
+ * Reads a line from the stream until the separator is reached. This does not
586
+ * throw an exception, but rather returns nil if an eoz/eof error occurs
587
+ *
588
+ * reader = Bzip2::Reader.new Bzip2.compress("a\nb")
589
+ * reader.gets # => "a\n"
590
+ * reader.gets # => "b"
591
+ * reader.gets # => nil
592
+ *
593
+ * @return [String, nil] the read data or nil if eoz has been reached
594
+ * @see Bzip2::Reader#readline
595
+ */
596
+ VALUE bz_reader_gets_m(int argc, VALUE *argv, VALUE obj) {
597
+ int td[ASIZE];
598
+ VALUE str = bz_reader_gets_internal(argc, argv, obj, td, Qtrue);
599
+
600
+ if (!NIL_P(str)) {
601
+ rb_lastline_set(str);
602
+ }
603
+ return str;
604
+ }
605
+
606
+ /*
607
+ * call-seq:
608
+ * readline(sep = "\n")
609
+ *
610
+ * Reads one line from the stream and returns it (including the separator)
611
+ *
612
+ * reader = Bzip2::Reader.new Bzip2.compress("a\nb")
613
+ * reader.readline # => "a\n"
614
+ * reader.readline # => "b"
615
+ * reader.readline # => raises Bzip2::EOZError
616
+ *
617
+ *
618
+ * @param [String] sep the newline separator character
619
+ * @return [String] the read line
620
+ * @see Bzip2::Reader.readlines
621
+ * @raise [Bzip2::EOZError] if the stream has reached its end
622
+ */
623
+ VALUE bz_reader_readline(int argc, VALUE *argv, VALUE obj) {
624
+ VALUE res = bz_reader_gets_m(argc, argv, obj);
625
+
626
+ if (NIL_P(res)) {
627
+ bz_eoz_error();
628
+ }
629
+ return res;
630
+ }
631
+
632
+ /*
633
+ * call-seq:
634
+ * readlines(sep = "\n")
635
+ *
636
+ * Reads the lines of the files and returns the result as an array.
637
+ *
638
+ * If the stream has reached eoz, then an empty array is returned
639
+ *
640
+ * @param [String] sep the newline separator character
641
+ * @return [Array] an array of lines read
642
+ * @see Bzip2::Reader.readlines
643
+ */
644
+ VALUE bz_reader_readlines(int argc, VALUE *argv, VALUE obj) {
645
+ VALUE line, ary;
646
+ int td[ASIZE], in;
647
+
648
+ in = Qtrue;
649
+ ary = rb_ary_new();
650
+ while (!NIL_P(line = bz_reader_gets_internal(argc, argv, obj, td, in))) {
651
+ in = Qfalse;
652
+ rb_ary_push(ary, line);
653
+ }
654
+ return ary;
655
+ }
656
+
657
+ /*
658
+ * call-seq:
659
+ * each(sep = "\n", &block)
660
+ *
661
+ * Iterates over the lines of the stream.
662
+ *
663
+ * @param [String] sep the byte which separates lines
664
+ * @yieldparam [String] line the next line of the file (including the separator
665
+ * character)
666
+ * @see Bzip2::Reader.foreach
667
+ */
668
+ VALUE bz_reader_each_line(int argc, VALUE *argv, VALUE obj) {
669
+ VALUE line;
670
+ int td[ASIZE], in;
671
+
672
+ in = Qtrue;
673
+ while (!NIL_P(line = bz_reader_gets_internal(argc, argv, obj, td, in))) {
674
+ in = Qfalse;
675
+ rb_yield(line);
676
+ }
677
+ return obj;
678
+ }
679
+
680
+ /*
681
+ * call-seq:
682
+ * each_byte(&block)
683
+ *
684
+ * Iterates over the decompressed bytes of the file.
685
+ *
686
+ * Bzip2::Writer.open('file'){ |f| f << 'asdf' }
687
+ * reader = Bzip2::Reader.new File.open('file')
688
+ * reader.each_byte{ |b| puts "#{b} #{b.chr}" }
689
+ *
690
+ * # Output:
691
+ * # 97 a
692
+ * # 115 s
693
+ * # 100 d
694
+ * # 102 f
695
+ *
696
+ * @yieldparam [Integer] byte the decompressed bytes of the file
697
+ */
698
+ VALUE bz_reader_each_byte(VALUE obj) {
699
+ int c;
700
+
701
+ while ((c = bz_getc(obj)) != EOF) {
702
+ rb_yield(INT2FIX(c & 0xff));
703
+ }
704
+ return obj;
705
+ }
706
+
707
+ /*
708
+ * Specs were missing for this method originally and playing around with it
709
+ * gave some very odd results, so unless you know what you're doing, I wouldn't
710
+ * mess around with this...
711
+ */
712
+ VALUE bz_reader_unused(VALUE obj) {
713
+ struct bz_file *bzf;
714
+ VALUE res;
715
+
716
+ Get_BZ2(obj, bzf);
717
+ if (!bzf->in || bzf->state != BZ_STREAM_END) {
718
+ return Qnil;
719
+ }
720
+ if (bzf->bzs.avail_in) {
721
+ res = rb_tainted_str_new(bzf->bzs.next_in, bzf->bzs.avail_in);
722
+ bzf->bzs.avail_in = 0;
723
+ } else {
724
+ res = rb_tainted_str_new(0, 0);
725
+ }
726
+ return res;
727
+ }
728
+
729
+ /*
730
+ * Test whether the end of the bzip stream has been reached
731
+ *
732
+ * @return [Boolean] +true+ if the reader is at the end of the bz stream or
733
+ * +false+ otherwise
734
+ */
735
+ VALUE bz_reader_eoz(VALUE obj) {
736
+ struct bz_file *bzf;
737
+
738
+ Get_BZ2(obj, bzf);
739
+ if (!bzf->in || !bzf->buf) {
740
+ return Qnil;
741
+ }
742
+ if (bzf->state == BZ_STREAM_END && !bzf->bzs.avail_out) {
743
+ return Qtrue;
744
+ }
745
+ return Qfalse;
746
+ }
747
+
748
+ /*
749
+ * Test whether the bzip stream has reached its end (see Bzip2::Reader#eoz?)
750
+ * and then tests that the undlerying IO has also reached an eof
751
+ *
752
+ * @return [Boolean] +true+ if the stream has reached or +false+ otherwise.
753
+ */
754
+ VALUE bz_reader_eof(VALUE obj) {
755
+ struct bz_file *bzf;
756
+ VALUE res;
757
+
758
+ res = bz_reader_eoz(obj);
759
+ if (RTEST(res)) {
760
+ Get_BZ2(obj, bzf);
761
+ if (bzf->bzs.avail_in) {
762
+ res = Qfalse;
763
+ } else {
764
+ res = bz_reader_getc(obj);
765
+ if (NIL_P(res)) {
766
+ res = Qtrue;
767
+ } else {
768
+ bz_reader_ungetc(obj, res);
769
+ res = Qfalse;
770
+ }
771
+ }
772
+ }
773
+ return res;
774
+ }
775
+
776
+ /*
777
+ * Tests whether this reader has be closed.
778
+ *
779
+ * @return [Boolean] +true+ if it is or +false+ otherwise.
780
+ */
781
+ VALUE bz_reader_closed(VALUE obj) {
782
+ struct bz_file *bzf;
783
+
784
+ Data_Get_Struct(obj, struct bz_file, bzf);
785
+ return RTEST(bzf->io)?Qfalse:Qtrue;
786
+ }
787
+
788
+ /*
789
+ * Closes this reader to disallow further reads.
790
+ *
791
+ * reader = Bzip2::Reader.new File.open('file')
792
+ * reader.close
793
+ *
794
+ * reader.closed? # => true
795
+ *
796
+ * @return [File] the io with which the reader was created.
797
+ * @raise [IOError] if the stream has already been closed
798
+ */
799
+ VALUE bz_reader_close(VALUE obj) {
800
+ struct bz_file *bzf;
801
+ VALUE res;
802
+
803
+ Get_BZ2(obj, bzf);
804
+ if (bzf->buf) {
805
+ free(bzf->buf);
806
+ bzf->buf = 0;
807
+ }
808
+ if (bzf->state == BZ_OK) {
809
+ BZ2_bzDecompressEnd(&(bzf->bzs));
810
+ }
811
+ if (bzf->flags & BZ2_RB_CLOSE) {
812
+ int closed = 0;
813
+ if (rb_respond_to(bzf->io, id_closed)) {
814
+ VALUE iv = rb_funcall2(bzf->io, id_closed, 0, 0);
815
+ closed = RTEST(iv);
816
+ }
817
+ if (!closed && rb_respond_to(bzf->io, id_close)) {
818
+ rb_funcall2(bzf->io, id_close, 0, 0);
819
+ }
820
+ }
821
+ if (bzf->flags & (BZ2_RB_CLOSE|BZ2_RB_INTERNAL)) {
822
+ res = Qnil;
823
+ } else {
824
+ res = bzf->io;
825
+ }
826
+ bzf->io = 0;
827
+ return res;
828
+ }
829
+
830
+ /*
831
+ * Originally undocument and had no sepcs. Appears to call Bzip2::Reader#read
832
+ * and then mark the stream as finished, but this didn't work for me...
833
+ */
834
+ VALUE bz_reader_finish(VALUE obj) {
835
+ struct bz_file *bzf;
836
+
837
+ Get_BZ2(obj, bzf);
838
+ if (bzf->buf) {
839
+ rb_funcall2(obj, id_read, 0, 0);
840
+ free(bzf->buf);
841
+ }
842
+ bzf->buf = 0;
843
+ bzf->state = BZ_OK;
844
+ return Qnil;
845
+ }
846
+
847
+ /*
848
+ * Originally undocument and had no sepcs. Appears to work nearly the same
849
+ * as Bzip2::Reader#close...
850
+ */
851
+ VALUE bz_reader_close_bang(VALUE obj) {
852
+ struct bz_file *bzf;
853
+ int closed;
854
+
855
+ Get_BZ2(obj, bzf);
856
+ closed = bzf->flags & (BZ2_RB_CLOSE|BZ2_RB_INTERNAL);
857
+ bz_reader_close(obj);
858
+ if (!closed && rb_respond_to(bzf->io, id_close)) {
859
+ if (rb_respond_to(bzf->io, id_closed)) {
860
+ closed = RTEST(rb_funcall2(bzf->io, id_closed, 0, 0));
861
+ }
862
+ if (!closed) {
863
+ rb_funcall2(bzf->io, id_close, 0, 0);
864
+ }
865
+ }
866
+ return Qnil;
867
+ }
868
+
869
+ struct foreach_arg {
870
+ int argc;
871
+ VALUE sep;
872
+ VALUE obj;
873
+ };
874
+
875
+ VALUE bz_reader_foreach_line(struct foreach_arg *arg) {
876
+ VALUE str;
877
+ int td[ASIZE], in;
878
+
879
+ in = Qtrue;
880
+ while (!NIL_P(str = bz_reader_gets_internal(arg->argc, &arg->sep, arg->obj, td, in))) {
881
+ in = Qfalse;
882
+ rb_yield(str);
883
+ }
884
+ return Qnil;
885
+ }
886
+
887
+ /*
888
+ * call-seq:
889
+ * foreach(filename, &block)
890
+ *
891
+ * Reads a bz2 compressed file and yields each line to the block
892
+ *
893
+ * Bzip2::Writer.open('file'){ |f| f << "a\n" << "b\n" << "c\n\nd" }
894
+ * Bzip2::Reader.foreach('file'){ |l| p l }
895
+ *
896
+ * # Output:
897
+ * # "a\n"
898
+ * # "b\n"
899
+ * # "c\n"
900
+ * # "\n"
901
+ * # "d"
902
+ *
903
+ * @param [String] filename the path to the file to open
904
+ * @yieldparam [String] each line of the file
905
+ */
906
+ VALUE bz_reader_s_foreach(int argc, VALUE *argv, VALUE obj) {
907
+ VALUE fname, sep;
908
+ struct foreach_arg arg;
909
+ struct bz_file *bzf;
910
+
911
+ if (!rb_block_given_p()) {
912
+ rb_raise(rb_eArgError, "call out of a block");
913
+ }
914
+ rb_scan_args(argc, argv, "11", &fname, &sep);
915
+ #ifdef SafeStringValue
916
+ SafeStringValue(fname);
917
+ #else
918
+ Check_SafeStr(fname);
919
+ #endif
920
+ arg.argc = argc - 1;
921
+ arg.sep = sep;
922
+ arg.obj = rb_funcall2(rb_mKernel, id_open, 1, &fname);
923
+ if (NIL_P(arg.obj)) {
924
+ return Qnil;
925
+ }
926
+ arg.obj = rb_funcall2(obj, id_new, 1, &arg.obj);
927
+ Data_Get_Struct(arg.obj, struct bz_file, bzf);
928
+ bzf->flags |= BZ2_RB_CLOSE;
929
+ return rb_ensure(bz_reader_foreach_line, (VALUE)&arg, bz_reader_close, arg.obj);
930
+ }
931
+
932
+ VALUE bz_reader_i_readlines(struct foreach_arg *arg) {
933
+ VALUE str, res;
934
+ int td[ASIZE], in;
935
+
936
+ in = Qtrue;
937
+ res = rb_ary_new();
938
+ while (!NIL_P(str = bz_reader_gets_internal(arg->argc, &arg->sep, arg->obj, td, in))) {
939
+ in = Qfalse;
940
+ rb_ary_push(res, str);
941
+ }
942
+ return res;
943
+ }
944
+
945
+ /*
946
+ * call-seq:
947
+ * readlines(filename, separator="\n")
948
+ *
949
+ * Opens the given bz2 compressed file for reading and decompresses the file,
950
+ * returning an array of the lines of the file. A line is denoted by the
951
+ * separator argument.
952
+ *
953
+ * Bzip2::Writer.open('file'){ |f| f << "a\n" << "b\n" << "c\n\nd" }
954
+ *
955
+ * Bzip2::Reader.readlines('file') # => ["a\n", "b\n", "c\n", "\n", "d"]
956
+ * Bzip2::Reader.readlines('file', 'c') # => ["a\nb\nc", "\n\nd"]
957
+ *
958
+ * @param [String] filename the path to the file to read
959
+ * @param [String] separator the character to denote a newline in the file
960
+ * @see Bzip2::Reader#readlines
961
+ * @return [Array] an array of lines for the file
962
+ * @raise [Bzip2::Error] if the file is not a valid bz2 compressed file
963
+ */
964
+ VALUE bz_reader_s_readlines(int argc, VALUE *argv, VALUE obj) {
965
+ VALUE fname, sep;
966
+ struct foreach_arg arg;
967
+ struct bz_file *bzf;
968
+
969
+ rb_scan_args(argc, argv, "11", &fname, &sep);
970
+ #ifdef SafeStringValue
971
+ SafeStringValue(fname);
972
+ #else
973
+ Check_SafeStr(fname);
974
+ #endif
975
+ arg.argc = argc - 1;
976
+ arg.sep = sep;
977
+ arg.obj = rb_funcall2(rb_mKernel, id_open, 1, &fname);
978
+ if (NIL_P(arg.obj)) {
979
+ return Qnil;
980
+ }
981
+ arg.obj = rb_funcall2(obj, id_new, 1, &arg.obj);
982
+ Data_Get_Struct(arg.obj, struct bz_file, bzf);
983
+ bzf->flags |= BZ2_RB_CLOSE;
984
+ return rb_ensure(bz_reader_i_readlines, (VALUE)&arg, bz_reader_close, arg.obj);
985
+ }
986
+
987
+ /*
988
+ * Returns the current line number that the stream is at. This number is based
989
+ * on the newline separator being "\n"
990
+ *
991
+ * reader = Bzip2::Reader.new Bzip2.compress("a\nb")
992
+ * reader.lineno # => 0
993
+ * reader.readline # => "a\n"
994
+ * reader.lineno # => 1
995
+ * reader.readline # => "b"
996
+ * reader.lineno # => 2
997
+
998
+ * @return [Integer] the current line number
999
+ */
1000
+ VALUE bz_reader_lineno(VALUE obj) {
1001
+ struct bz_file *bzf;
1002
+
1003
+ Get_BZ2(obj, bzf);
1004
+ return INT2NUM(bzf->lineno);
1005
+ }
1006
+
1007
+ /*
1008
+ * call-seq:
1009
+ * lineno=(num)
1010
+ *
1011
+ * Sets the internal line number count that this stream should be set at
1012
+ *
1013
+ * reader = Bzip2::Reader.new Bzip2.compress("a\nb")
1014
+ * reader.lineno # => 0
1015
+ * reader.readline # => "a\n"
1016
+ * reader.lineno # => 1
1017
+ * reader.lineno = 0
1018
+ * reader.readline # => "b"
1019
+ * reader.lineno # => 1
1020
+ *
1021
+ * @note This does not actually rewind or move the stream forward
1022
+ * @param [Integer] lineno the line number which the stream should consider
1023
+ * being set at
1024
+ * @return [Integer] the line number provided
1025
+ */
1026
+ VALUE bz_reader_set_lineno(VALUE obj, VALUE lineno) {
1027
+ struct bz_file *bzf;
1028
+
1029
+ Get_BZ2(obj, bzf);
1030
+ bzf->lineno = NUM2INT(lineno);
1031
+ return lineno;
1032
+ }