bzip2-ruby-rb20 0.2.7

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,55 @@
1
+ #include <ruby.h>
2
+ #include <bzlib.h>
3
+
4
+ #include "common.h"
5
+
6
+ void bz_file_mark(struct bz_file * bzf) {
7
+ rb_gc_mark(bzf->io);
8
+ rb_gc_mark(bzf->in);
9
+ }
10
+
11
+ void * bz_malloc(void *opaque, int m, int n) {
12
+ return malloc(m * n);
13
+ }
14
+
15
+ void bz_free(void *opaque, void *p) {
16
+ free(p);
17
+ }
18
+
19
+ VALUE bz_raise(int error) {
20
+ VALUE exc;
21
+ const char *msg;
22
+
23
+ exc = bz_eError;
24
+ switch (error) {
25
+ case BZ_SEQUENCE_ERROR:
26
+ msg = "incorrect sequence";
27
+ break;
28
+ case BZ_PARAM_ERROR:
29
+ msg = "parameter out of range";
30
+ break;
31
+ case BZ_MEM_ERROR:
32
+ msg = "not enough memory is available";
33
+ break;
34
+ case BZ_DATA_ERROR:
35
+ msg = "data integrity error is detected";
36
+ break;
37
+ case BZ_DATA_ERROR_MAGIC:
38
+ msg = "compressed stream does not start with the correct magic bytes";
39
+ break;
40
+ case BZ_IO_ERROR:
41
+ msg = "error reading or writing";
42
+ break;
43
+ case BZ_UNEXPECTED_EOF:
44
+ exc = bz_eEOZError;
45
+ msg = "compressed file finishes before the logical end of stream is detected";
46
+ break;
47
+ case BZ_OUTBUFF_FULL:
48
+ msg = "output buffer full";
49
+ break;
50
+ default:
51
+ msg = "unknown error";
52
+ exc = bz_eError;
53
+ }
54
+ rb_raise(exc, "%s", msg);
55
+ }
@@ -0,0 +1,76 @@
1
+ #ifndef _RB_BZIP2_COMMON_H_
2
+ #define _RB_BZIP2_COMMON_H_
3
+
4
+ #include <ruby.h>
5
+ #include <bzlib.h>
6
+
7
+ #ifndef RUBY_19_COMPATIBILITY
8
+ # include <rubyio.h>
9
+ # include <version.h>
10
+ #else
11
+ # include <ruby/io.h>
12
+ #endif
13
+
14
+ #define BZ2_RB_CLOSE 1
15
+ #define BZ2_RB_INTERNAL 2
16
+
17
+ #define BZ_RB_BLOCKSIZE 4096
18
+ #define DEFAULT_BLOCKS 9
19
+ #define ASIZE (1 << CHAR_BIT)
20
+
21
+ /* Older versions of Ruby (< 1.8.6) need these */
22
+ #ifndef RSTRING_PTR
23
+ # define RSTRING_PTR(s) (RSTRING(s)->ptr)
24
+ #endif
25
+ #ifndef RSTRING_LEN
26
+ # define RSTRING_LEN(s) (RSTRING(s)->len)
27
+ #endif
28
+ #ifndef RARRAY_PTR
29
+ # define RARRAY_PTR(s) (RARRAY(s)->ptr)
30
+ #endif
31
+ #ifndef RARRAY_LEN
32
+ # define RARRAY_LEN(s) (RARRAY(s)->len)
33
+ #endif
34
+
35
+ struct bz_file {
36
+ bz_stream bzs;
37
+ VALUE in, io;
38
+ char *buf;
39
+ unsigned int buflen;
40
+ int blocks, work, small;
41
+ int flags, lineno, state;
42
+ };
43
+
44
+ struct bz_str {
45
+ VALUE str;
46
+ int pos;
47
+ };
48
+
49
+ struct bz_iv {
50
+ VALUE bz2, io;
51
+ void (*finalize)();
52
+ };
53
+
54
+ #define Get_BZ2(obj, bzf) \
55
+ rb_io_taint_check(obj); \
56
+ Data_Get_Struct(obj, struct bz_file, bzf); \
57
+ if (!RTEST(bzf->io)) { \
58
+ rb_raise(rb_eIOError, "closed IO"); \
59
+ }
60
+
61
+ #ifndef ASDFasdf
62
+ extern VALUE bz_cWriter, bz_cReader, bz_cInternal;
63
+ extern VALUE bz_eError, bz_eEOZError;
64
+
65
+ extern VALUE bz_internal_ary;
66
+
67
+ extern ID id_new, id_write, id_open, id_flush, id_read;
68
+ extern ID id_closed, id_close, id_str;
69
+ #endif
70
+
71
+ void bz_file_mark(struct bz_file * bzf);
72
+ void* bz_malloc(void *opaque, int m, int n);
73
+ void bz_free(void *opaque, void *p);
74
+ VALUE bz_raise(int err);
75
+
76
+ #endif
@@ -0,0 +1,21 @@
1
+ # encoding: UTF-8
2
+ require 'mkmf'
3
+ dir_config('bz2')
4
+ have_header('bzlib.h')
5
+
6
+ $CFLAGS << ' -Wall -Wextra -Wno-unused -funroll-loops '
7
+ # $CFLAGS << ' -O0 -ggdb -Wextra'
8
+
9
+ if have_library("bz2", "BZ2_bzWriteOpen")
10
+ if enable_config("shared", true)
11
+ $static = nil
12
+ end
13
+
14
+ if RUBY_VERSION.to_f >= 1.9
15
+ $CFLAGS << ' -DRUBY_19_COMPATIBILITY'
16
+ end
17
+
18
+ create_makefile('bzip2/bzip2')
19
+ else
20
+ puts "libbz2 not found, maybe try manually specifying --with-bz2-dir to find it?"
21
+ end
@@ -0,0 +1,1032 @@
1
+ #include <bzlib.h>
2
+ #include <ruby.h>
3
+
4
+ #include "reader.h"
5
+ #include "common.h"
6
+
7
+ void bz_str_mark(struct bz_str *bzs) {
8
+ rb_gc_mark(bzs->str);
9
+ }
10
+
11
+ struct bz_file * bz_get_bzf(VALUE obj) {
12
+ struct bz_file *bzf;
13
+
14
+ Get_BZ2(obj, bzf);
15
+ if (!bzf->buf) {
16
+ if (bzf->state != BZ_OK) {
17
+ bz_raise(bzf->state);
18
+ }
19
+ bzf->state = BZ2_bzDecompressInit(&(bzf->bzs), 0, bzf->small);
20
+ if (bzf->state != BZ_OK) {
21
+ BZ2_bzDecompressEnd(&(bzf->bzs));
22
+ bz_raise(bzf->state);
23
+ }
24
+ bzf->buf = ALLOC_N(char, BZ_RB_BLOCKSIZE + 1);
25
+ bzf->buflen = BZ_RB_BLOCKSIZE;
26
+ bzf->buf[0] = bzf->buf[bzf->buflen] = '\0';
27
+ bzf->bzs.total_out_hi32 = bzf->bzs.total_out_lo32 = 0;
28
+ bzf->bzs.next_out = bzf->buf;
29
+ bzf->bzs.avail_out = 0;
30
+ }
31
+ if (bzf->state == BZ_STREAM_END && !bzf->bzs.avail_out) {
32
+ return 0;
33
+ }
34
+ return bzf;
35
+ }
36
+
37
+ int bz_next_available(struct bz_file *bzf, int in){
38
+ bzf->bzs.next_out = bzf->buf;
39
+ bzf->bzs.avail_out = 0;
40
+ if (bzf->state == BZ_STREAM_END) {
41
+ return BZ_STREAM_END;
42
+ }
43
+ if (!bzf->bzs.avail_in) {
44
+ bzf->in = rb_funcall(bzf->io, id_read, 1, INT2FIX(1024));
45
+ if (TYPE(bzf->in) != T_STRING || RSTRING_LEN(bzf->in) == 0) {
46
+ BZ2_bzDecompressEnd(&(bzf->bzs));
47
+ bzf->bzs.avail_out = 0;
48
+ bzf->state = BZ_UNEXPECTED_EOF;
49
+ bz_raise(bzf->state);
50
+ }
51
+ bzf->bzs.next_in = RSTRING_PTR(bzf->in);
52
+ bzf->bzs.avail_in = (int) RSTRING_LEN(bzf->in);
53
+ }
54
+ if ((bzf->buflen - in) < (BZ_RB_BLOCKSIZE / 2)) {
55
+ bzf->buf = REALLOC_N(bzf->buf, char, bzf->buflen+BZ_RB_BLOCKSIZE+1);
56
+ bzf->buflen += BZ_RB_BLOCKSIZE;
57
+ bzf->buf[bzf->buflen] = '\0';
58
+ }
59
+ bzf->bzs.avail_out = bzf->buflen - in;
60
+ bzf->bzs.next_out = bzf->buf + in;
61
+ bzf->state = BZ2_bzDecompress(&(bzf->bzs));
62
+ if (bzf->state != BZ_OK) {
63
+ BZ2_bzDecompressEnd(&(bzf->bzs));
64
+ if (bzf->state != BZ_STREAM_END) {
65
+ bzf->bzs.avail_out = 0;
66
+ bz_raise(bzf->state);
67
+ }
68
+ }
69
+ bzf->bzs.avail_out = bzf->buflen - bzf->bzs.avail_out;
70
+ bzf->bzs.next_out = bzf->buf;
71
+ return 0;
72
+ }
73
+
74
+ VALUE bz_read_until(struct bz_file *bzf, const char *str, int len, int *td1) {
75
+ VALUE res;
76
+ int total, i, nex = 0;
77
+ char *p, *t, *tx, *end, *pend = ((char*) str) + len;
78
+
79
+ res = rb_str_new(0, 0);
80
+ while (1) {
81
+ total = bzf->bzs.avail_out;
82
+ if (len == 1) {
83
+ tx = memchr(bzf->bzs.next_out, *str, bzf->bzs.avail_out);
84
+ if (tx) {
85
+ i = (int)(tx - bzf->bzs.next_out + len);
86
+ res = rb_str_cat(res, bzf->bzs.next_out, i);
87
+ bzf->bzs.next_out += i;
88
+ bzf->bzs.avail_out -= i;
89
+ return res;
90
+ }
91
+ } else {
92
+ tx = bzf->bzs.next_out;
93
+ end = bzf->bzs.next_out + bzf->bzs.avail_out;
94
+ while (tx + len <= end) {
95
+ for (p = (char*) str, t = tx; p != pend; ++p, ++t) {
96
+ if (*p != *t) break;
97
+ }
98
+ if (p == pend) {
99
+ i = (int)(tx - bzf->bzs.next_out + len);
100
+ res = rb_str_cat(res, bzf->bzs.next_out, i);
101
+ bzf->bzs.next_out += i;
102
+ bzf->bzs.avail_out -= i;
103
+ return res;
104
+ }
105
+ if (td1) {
106
+ tx += td1[(int)*(tx + len)];
107
+ } else {
108
+ tx += 1;
109
+ }
110
+ }
111
+ }
112
+ nex = 0;
113
+ if (total) {
114
+ nex = len - 1;
115
+ res = rb_str_cat(res, bzf->bzs.next_out, total - nex);
116
+ if (nex) {
117
+ MEMMOVE(bzf->buf, bzf->bzs.next_out + total - nex, char, nex);
118
+ }
119
+ }
120
+ if (bz_next_available(bzf, nex) == BZ_STREAM_END) {
121
+ if (nex) {
122
+ res = rb_str_cat(res, bzf->buf, nex);
123
+ }
124
+ if (RSTRING_LEN(res)) {
125
+ return res;
126
+ }
127
+ return Qnil;
128
+ }
129
+ }
130
+ return Qnil;
131
+ }
132
+
133
+ /**
134
+ * Reads a stream as long as the next character is equal to the specified
135
+ * character
136
+ *
137
+ * Returns the next character in the sequence that's not the same as the one
138
+ * given or EOF if it's there until the end of the file.
139
+ */
140
+ int bz_read_while(struct bz_file *bzf, char c) {
141
+ char *end;
142
+
143
+ while (1) {
144
+ end = bzf->bzs.next_out + bzf->bzs.avail_out;
145
+ while (bzf->bzs.next_out < end) {
146
+ if (c != *bzf->bzs.next_out) {
147
+ return *bzf->bzs.next_out;
148
+ }
149
+ ++bzf->bzs.next_out;
150
+ --bzf->bzs.avail_out;
151
+ }
152
+ if (bz_next_available(bzf, 0) == BZ_STREAM_END) {
153
+ return EOF;
154
+ }
155
+ }
156
+ return EOF;
157
+ }
158
+
159
+ /*
160
+ * Internally allocates data for a new Reader
161
+ * @private
162
+ */
163
+ VALUE bz_reader_s_alloc(VALUE obj) {
164
+ struct bz_file *bzf;
165
+ VALUE res;
166
+ res = Data_Make_Struct(obj, struct bz_file, bz_file_mark, free, bzf);
167
+ bzf->bzs.bzalloc = bz_malloc;
168
+ bzf->bzs.bzfree = bz_free;
169
+ bzf->blocks = DEFAULT_BLOCKS;
170
+ bzf->state = BZ_OK;
171
+ return res;
172
+ }
173
+
174
+ VALUE bz_reader_close __((VALUE));
175
+
176
+ /*
177
+ * call-seq:
178
+ * open(filename, &block=nil) -> Bzip2::Reader
179
+ *
180
+ * @param [String] filename the name of the file to read from
181
+ * @yieldparam [Bzip2::Reader] reader the Bzip2::Reader instance
182
+ *
183
+ * If a block is given, the created Bzip2::Reader instance is yielded to the
184
+ * block and will be closed when the block completes. It is guaranteed via
185
+ * +ensure+ that the reader is closed
186
+ *
187
+ * If a block is not given, a Bzip2::Reader instance will be returned
188
+ *
189
+ * Bzip2::Reader.open('file') { |f| puts f.gets }
190
+ *
191
+ * reader = Bzip2::Reader.open('file')
192
+ * puts reader.gets
193
+ * reader.close
194
+ *
195
+ * @return [Bzip2::Reader, nil]
196
+ */
197
+ VALUE bz_reader_s_open(int argc, VALUE *argv, VALUE obj) {
198
+ VALUE res;
199
+ struct bz_file *bzf;
200
+
201
+ if (argc < 1) {
202
+ rb_raise(rb_eArgError, "invalid number of arguments");
203
+ }
204
+ argv[0] = rb_funcall2(rb_mKernel, id_open, 1, argv);
205
+ if (NIL_P(argv[0])) {
206
+ return Qnil;
207
+ }
208
+ res = rb_funcall2(obj, id_new, argc, argv);
209
+ Data_Get_Struct(res, struct bz_file, bzf);
210
+ bzf->flags |= BZ2_RB_CLOSE;
211
+ if (rb_block_given_p()) {
212
+ return rb_ensure(rb_yield, res, bz_reader_close, res);
213
+ }
214
+ return res;
215
+ }
216
+
217
+ /*
218
+ * call-seq:
219
+ * initialize(io)
220
+ *
221
+ * Creates a new stream for reading a bzip file or string
222
+ *
223
+ * @param [File, string, #read] io the source for input data. If the source is
224
+ * a file or something responding to #read, then data will be read via #read,
225
+ * otherwise if the input is a string it will be taken as the literal data
226
+ * to decompress
227
+ */
228
+ VALUE bz_reader_init(int argc, VALUE *argv, VALUE obj) {
229
+ struct bz_file *bzf;
230
+ int small = 0;
231
+ VALUE a, b;
232
+ int internal = 0;
233
+
234
+ if (rb_scan_args(argc, argv, "11", &a, &b) == 2) {
235
+ small = RTEST(b);
236
+ }
237
+ rb_io_taint_check(a);
238
+ if (OBJ_TAINTED(a)) {
239
+ OBJ_TAINT(obj);
240
+ }
241
+ if (rb_respond_to(a, id_read)) {
242
+ if (TYPE(a) == T_FILE) {
243
+ #ifndef RUBY_19_COMPATIBILITY
244
+ OpenFile *fptr;
245
+ #else
246
+ rb_io_t *fptr;
247
+ #endif
248
+
249
+ GetOpenFile(a, fptr);
250
+ rb_io_check_readable(fptr);
251
+ } else if (rb_respond_to(a, id_closed)) {
252
+ VALUE iv = rb_funcall2(a, id_closed, 0, 0);
253
+ if (RTEST(iv)) {
254
+ rb_raise(rb_eArgError, "closed object");
255
+ }
256
+ }
257
+ } else {
258
+ struct bz_str *bzs;
259
+ VALUE res;
260
+
261
+ if (!rb_respond_to(a, id_str)) {
262
+ rb_raise(rb_eArgError, "first argument must respond to #read");
263
+ }
264
+ a = rb_funcall2(a, id_str, 0, 0);
265
+ if (TYPE(a) != T_STRING) {
266
+ rb_raise(rb_eArgError, "#to_str must return a String");
267
+ }
268
+ res = Data_Make_Struct(bz_cInternal, struct bz_str,
269
+ bz_str_mark, free, bzs);
270
+ bzs->str = a;
271
+ a = res;
272
+ internal = BZ2_RB_INTERNAL;
273
+ }
274
+ Data_Get_Struct(obj, struct bz_file, bzf);
275
+ bzf->io = a;
276
+ bzf->small = small;
277
+ bzf->flags |= internal;
278
+ return obj;
279
+ }
280
+
281
+ /*
282
+ * call-seq:
283
+ * read(len = nil)
284
+ *
285
+ * Read decompressed data from the stream.
286
+ *
287
+ * Bzip2::Reader.new(Bzip2.compress('ab')).read # => "ab"
288
+ * Bzip2::Reader.new(Bzip2.compress('ab')).read(1) # => "a"
289
+ *
290
+ * @return [String, nil] the decompressed data read or +nil+ if eoz has been
291
+ * reached
292
+ * @param [Integer] len the number of decompressed bytes which should be read.
293
+ * If nothing is specified, the entire stream is read
294
+ */
295
+ VALUE bz_reader_read(int argc, VALUE *argv, VALUE obj) {
296
+ struct bz_file *bzf;
297
+ VALUE res, length;
298
+ int total;
299
+ int n;
300
+
301
+ rb_scan_args(argc, argv, "01", &length);
302
+ if (NIL_P(length)) {
303
+ n = -1;
304
+ } else {
305
+ n = NUM2INT(length);
306
+ if (n < 0) {
307
+ rb_raise(rb_eArgError, "negative length %d given", n);
308
+ }
309
+ }
310
+ bzf = bz_get_bzf(obj);
311
+ if (!bzf) {
312
+ return Qnil;
313
+ }
314
+ res = rb_str_new(0, 0);
315
+ if (OBJ_TAINTED(obj)) {
316
+ OBJ_TAINT(res);
317
+ }
318
+ if (n == 0) {
319
+ free(bzf->buf);
320
+ return res;
321
+ }
322
+ while (1) {
323
+ total = bzf->bzs.avail_out;
324
+ if (n != -1 && (RSTRING_LEN(res) + total) >= n) {
325
+ n -= (int) RSTRING_LEN(res);
326
+ res = rb_str_cat(res, bzf->bzs.next_out, n);
327
+ bzf->bzs.next_out += n;
328
+ bzf->bzs.avail_out -= n;
329
+ free(bzf->buf);
330
+ return res;
331
+ }
332
+ if (total) {
333
+ res = rb_str_cat(res, bzf->bzs.next_out, total);
334
+ }
335
+ if (bz_next_available(bzf, 0) == BZ_STREAM_END) {
336
+ free(bzf->buf);
337
+ return res;
338
+ }
339
+ }
340
+ return Qnil;
341
+ }
342
+
343
+ int bz_getc(VALUE obj) {
344
+ VALUE length = INT2FIX(1);
345
+ VALUE res = bz_reader_read(1, &length, obj);
346
+ if (NIL_P(res) || RSTRING_LEN(res) == 0) {
347
+ return EOF;
348
+ }
349
+ return RSTRING_PTR(res)[0];
350
+ }
351
+
352
+ /*
353
+ * call-seq:
354
+ * ungetc(byte)
355
+ *
356
+ * "Ungets" a character/byte. This rewinds the stream by 1 character and inserts
357
+ * the given character into that position. The next read will return the given
358
+ * character as the first one read
359
+ *
360
+ * reader = Bzip2::Reader.new Bzip2.compress('abc')
361
+ * reader.getc # => 97
362
+ * reader.ungetc 97 # => nil
363
+ * reader.getc # => 97
364
+ * reader.ungetc 42 # => nil
365
+ * reader.getc # => 42
366
+ * reader.getc # => 98
367
+ * reader.getc # => 99
368
+ * reader.ungetc 100 # => nil
369
+ * reader.getc # => 100
370
+ *
371
+ * @param [Integer] byte the byte to 'unget'
372
+ * @return [nil] always
373
+ */
374
+ VALUE bz_reader_ungetc(VALUE obj, VALUE a) {
375
+ struct bz_file *bzf;
376
+ int c = NUM2INT(a);
377
+
378
+ Get_BZ2(obj, bzf);
379
+ if (!bzf->buf) {
380
+ bz_raise(BZ_SEQUENCE_ERROR);
381
+ }
382
+ if (bzf->bzs.avail_out < bzf->buflen) {
383
+ bzf->bzs.next_out -= 1;
384
+ bzf->bzs.next_out[0] = c;
385
+ bzf->bzs.avail_out += 1;
386
+ } else {
387
+ bzf->buf = REALLOC_N(bzf->buf, char, bzf->buflen + 2);
388
+ bzf->buf[bzf->buflen++] = c;
389
+ bzf->buf[bzf->buflen] = '\0';
390
+ bzf->bzs.next_out = bzf->buf;
391
+ bzf->bzs.avail_out = bzf->buflen;
392
+ }
393
+ return Qnil;
394
+ }
395
+
396
+ /*
397
+ * call-seq:
398
+ * ungets(str)
399
+ *
400
+ * Equivalently "unget" a string. When called on a string that was just read
401
+ * from the stream, this inserts the string back into the stream to br read
402
+ * again.
403
+ *
404
+ * When called with a string which hasn't been read from the stream, it does
405
+ * the same thing, and the next read line/data will start from the beginning
406
+ * of the given data and the continue on with the rest of the stream.
407
+ *
408
+ * reader = Bzip2::Reader.new Bzip2.compress("a\nb")
409
+ * reader.gets # => "a\n"
410
+ * reader.ungets "a\n" # => nil
411
+ * reader.gets # => "a\n"
412
+ * reader.ungets "foo" # => nil
413
+ * reader.gets # => "foob"
414
+ *
415
+ * @param [String] str the string to insert back into the stream
416
+ * @return [nil] always
417
+ */
418
+ VALUE bz_reader_ungets(VALUE obj, VALUE a) {
419
+ struct bz_file *bzf;
420
+
421
+ Check_Type(a, T_STRING);
422
+ Get_BZ2(obj, bzf);
423
+ if (!bzf->buf) {
424
+ bz_raise(BZ_SEQUENCE_ERROR);
425
+ }
426
+ if ((bzf->bzs.avail_out + RSTRING_LEN(a)) < bzf->buflen) {
427
+ bzf->bzs.next_out -= RSTRING_LEN(a);
428
+ MEMCPY(bzf->bzs.next_out, RSTRING_PTR(a), char, RSTRING_LEN(a));
429
+ bzf->bzs.avail_out += (int) RSTRING_LEN(a);
430
+ } else {
431
+ bzf->buf = REALLOC_N(bzf->buf, char, bzf->buflen + RSTRING_LEN(a) + 1);
432
+ MEMCPY(bzf->buf + bzf->buflen, RSTRING_PTR(a), char,RSTRING_LEN(a));
433
+ bzf->buflen += (int) RSTRING_LEN(a);
434
+ bzf->buf[bzf->buflen] = '\0';
435
+ bzf->bzs.next_out = bzf->buf;
436
+ bzf->bzs.avail_out = bzf->buflen;
437
+ }
438
+ return Qnil;
439
+ }
440
+
441
+ VALUE bz_reader_gets(VALUE obj) {
442
+ struct bz_file *bzf;
443
+ VALUE str = Qnil;
444
+
445
+ bzf = bz_get_bzf(obj);
446
+ if (bzf) {
447
+ str = bz_read_until(bzf, "\n", 1, 0);
448
+ if (!NIL_P(str)) {
449
+ bzf->lineno++;
450
+ OBJ_TAINT(str);
451
+ }
452
+ }
453
+ return str;
454
+ }
455
+
456
+ VALUE bz_reader_gets_internal(int argc, VALUE *argv, VALUE obj, int *td, int init) {
457
+ struct bz_file *bzf;
458
+ VALUE rs, res;
459
+ const char *rsptr;
460
+ int rslen, rspara, *td1;
461
+
462
+ rs = rb_rs;
463
+ if (argc) {
464
+ rb_scan_args(argc, argv, "1", &rs);
465
+ if (!NIL_P(rs)) {
466
+ Check_Type(rs, T_STRING);
467
+ }
468
+ }
469
+ if (NIL_P(rs)) {
470
+ return bz_reader_read(1, &rs, obj);
471
+ }
472
+ rslen = (int) RSTRING_LEN(rs);
473
+ if (rs == rb_default_rs || (rslen == 1 && RSTRING_PTR(rs)[0] == '\n')) {
474
+ return bz_reader_gets(obj);
475
+ }
476
+
477
+ if (rslen == 0) {
478
+ rsptr = "\n\n";
479
+ rslen = 2;
480
+ rspara = 1;
481
+ } else {
482
+ rsptr = RSTRING_PTR(rs);
483
+ rspara = 0;
484
+ }
485
+
486
+ bzf = bz_get_bzf(obj);
487
+ if (!bzf) {
488
+ return Qnil;
489
+ }
490
+ if (rspara) {
491
+ bz_read_while(bzf, '\n');
492
+ }
493
+ td1 = 0;
494
+ if (rslen != 1) {
495
+ if (init) {
496
+ int i;
497
+
498
+ for (i = 0; i < ASIZE; i++) {
499
+ td[i] = rslen + 1;
500
+ }
501
+ for (i = 0; i < rslen; i++) {
502
+ td[(int)*(rsptr + i)] = rslen - i;
503
+ }
504
+ }
505
+ td1 = td;
506
+ }
507
+
508
+ res = bz_read_until(bzf, rsptr, rslen, td1);
509
+ if (rspara) {
510
+ bz_read_while(bzf, '\n');
511
+ }
512
+
513
+ if (!NIL_P(res)) {
514
+ bzf->lineno++;
515
+ OBJ_TAINT(res);
516
+ }
517
+ return res;
518
+ }
519
+
520
+ /*
521
+ * Specs were missing for this method originally and playing around with it
522
+ * gave some very odd results, so unless you know what you're doing, I wouldn't
523
+ * mess around with this...
524
+ */
525
+ VALUE bz_reader_set_unused(VALUE obj, VALUE a) {
526
+ struct bz_file *bzf;
527
+
528
+ Check_Type(a, T_STRING);
529
+ Get_BZ2(obj, bzf);
530
+ if (!bzf->in) {
531
+ bzf->in = rb_str_new(RSTRING_PTR(a), RSTRING_LEN(a));
532
+ } else {
533
+ bzf->in = rb_str_cat(bzf->in, RSTRING_PTR(a), RSTRING_LEN(a));
534
+ }
535
+ bzf->bzs.next_in = RSTRING_PTR(bzf->in);
536
+ bzf->bzs.avail_in = (int) RSTRING_LEN(bzf->in);
537
+ return Qnil;
538
+ }
539
+
540
+ /*
541
+ * Reads one character from the stream, returning the byte read.
542
+ *
543
+ * reader = Bzip2::Reader.new Bzip2.compress('ab')
544
+ * reader.getc # => 97
545
+ * reader.getc # => 98
546
+ * reader.getc # => nil
547
+ *
548
+ * @return [Integer, nil] the byte value of the character read or +nil+ if eoz
549
+ * has been reached
550
+ */
551
+ VALUE bz_reader_getc(VALUE obj) {
552
+ VALUE str;
553
+ VALUE len = INT2FIX(1);
554
+
555
+ str = bz_reader_read(1, &len, obj);
556
+ if (NIL_P(str) || RSTRING_LEN(str) == 0) {
557
+ return Qnil;
558
+ }
559
+ return INT2FIX(RSTRING_PTR(str)[0] & 0xff);
560
+ }
561
+
562
+ void bz_eoz_error() {
563
+ rb_raise(bz_eEOZError, "End of Zip component reached");
564
+ }
565
+
566
+ /*
567
+ * Performs the same as Bzip2::Reader#getc except Bzip2::EOZError is raised if
568
+ * eoz has been readhed
569
+ *
570
+ * @raise [Bzip2::EOZError] if eoz has been reached
571
+ */
572
+ VALUE bz_reader_readchar(VALUE obj) {
573
+ VALUE res = bz_reader_getc(obj);
574
+
575
+ if (NIL_P(res)) {
576
+ bz_eoz_error();
577
+ }
578
+ return res;
579
+ }
580
+
581
+ /*
582
+ * call-seq:
583
+ * gets(sep = "\n")
584
+ *
585
+ * Reads a line from the stream until the separator is reached. This does not
586
+ * throw an exception, but rather returns nil if an eoz/eof error occurs
587
+ *
588
+ * reader = Bzip2::Reader.new Bzip2.compress("a\nb")
589
+ * reader.gets # => "a\n"
590
+ * reader.gets # => "b"
591
+ * reader.gets # => nil
592
+ *
593
+ * @return [String, nil] the read data or nil if eoz has been reached
594
+ * @see Bzip2::Reader#readline
595
+ */
596
+ VALUE bz_reader_gets_m(int argc, VALUE *argv, VALUE obj) {
597
+ int td[ASIZE];
598
+ VALUE str = bz_reader_gets_internal(argc, argv, obj, td, Qtrue);
599
+
600
+ if (!NIL_P(str)) {
601
+ rb_lastline_set(str);
602
+ }
603
+ return str;
604
+ }
605
+
606
+ /*
607
+ * call-seq:
608
+ * readline(sep = "\n")
609
+ *
610
+ * Reads one line from the stream and returns it (including the separator)
611
+ *
612
+ * reader = Bzip2::Reader.new Bzip2.compress("a\nb")
613
+ * reader.readline # => "a\n"
614
+ * reader.readline # => "b"
615
+ * reader.readline # => raises Bzip2::EOZError
616
+ *
617
+ *
618
+ * @param [String] sep the newline separator character
619
+ * @return [String] the read line
620
+ * @see Bzip2::Reader.readlines
621
+ * @raise [Bzip2::EOZError] if the stream has reached its end
622
+ */
623
+ VALUE bz_reader_readline(int argc, VALUE *argv, VALUE obj) {
624
+ VALUE res = bz_reader_gets_m(argc, argv, obj);
625
+
626
+ if (NIL_P(res)) {
627
+ bz_eoz_error();
628
+ }
629
+ return res;
630
+ }
631
+
632
+ /*
633
+ * call-seq:
634
+ * readlines(sep = "\n")
635
+ *
636
+ * Reads the lines of the files and returns the result as an array.
637
+ *
638
+ * If the stream has reached eoz, then an empty array is returned
639
+ *
640
+ * @param [String] sep the newline separator character
641
+ * @return [Array] an array of lines read
642
+ * @see Bzip2::Reader.readlines
643
+ */
644
+ VALUE bz_reader_readlines(int argc, VALUE *argv, VALUE obj) {
645
+ VALUE line, ary;
646
+ int td[ASIZE], in;
647
+
648
+ in = Qtrue;
649
+ ary = rb_ary_new();
650
+ while (!NIL_P(line = bz_reader_gets_internal(argc, argv, obj, td, in))) {
651
+ in = Qfalse;
652
+ rb_ary_push(ary, line);
653
+ }
654
+ return ary;
655
+ }
656
+
657
+ /*
658
+ * call-seq:
659
+ * each(sep = "\n", &block)
660
+ *
661
+ * Iterates over the lines of the stream.
662
+ *
663
+ * @param [String] sep the byte which separates lines
664
+ * @yieldparam [String] line the next line of the file (including the separator
665
+ * character)
666
+ * @see Bzip2::Reader.foreach
667
+ */
668
+ VALUE bz_reader_each_line(int argc, VALUE *argv, VALUE obj) {
669
+ VALUE line;
670
+ int td[ASIZE], in;
671
+
672
+ in = Qtrue;
673
+ while (!NIL_P(line = bz_reader_gets_internal(argc, argv, obj, td, in))) {
674
+ in = Qfalse;
675
+ rb_yield(line);
676
+ }
677
+ return obj;
678
+ }
679
+
680
+ /*
681
+ * call-seq:
682
+ * each_byte(&block)
683
+ *
684
+ * Iterates over the decompressed bytes of the file.
685
+ *
686
+ * Bzip2::Writer.open('file'){ |f| f << 'asdf' }
687
+ * reader = Bzip2::Reader.new File.open('file')
688
+ * reader.each_byte{ |b| puts "#{b} #{b.chr}" }
689
+ *
690
+ * # Output:
691
+ * # 97 a
692
+ * # 115 s
693
+ * # 100 d
694
+ * # 102 f
695
+ *
696
+ * @yieldparam [Integer] byte the decompressed bytes of the file
697
+ */
698
+ VALUE bz_reader_each_byte(VALUE obj) {
699
+ int c;
700
+
701
+ while ((c = bz_getc(obj)) != EOF) {
702
+ rb_yield(INT2FIX(c & 0xff));
703
+ }
704
+ return obj;
705
+ }
706
+
707
+ /*
708
+ * Specs were missing for this method originally and playing around with it
709
+ * gave some very odd results, so unless you know what you're doing, I wouldn't
710
+ * mess around with this...
711
+ */
712
+ VALUE bz_reader_unused(VALUE obj) {
713
+ struct bz_file *bzf;
714
+ VALUE res;
715
+
716
+ Get_BZ2(obj, bzf);
717
+ if (!bzf->in || bzf->state != BZ_STREAM_END) {
718
+ return Qnil;
719
+ }
720
+ if (bzf->bzs.avail_in) {
721
+ res = rb_tainted_str_new(bzf->bzs.next_in, bzf->bzs.avail_in);
722
+ bzf->bzs.avail_in = 0;
723
+ } else {
724
+ res = rb_tainted_str_new(0, 0);
725
+ }
726
+ return res;
727
+ }
728
+
729
+ /*
730
+ * Test whether the end of the bzip stream has been reached
731
+ *
732
+ * @return [Boolean] +true+ if the reader is at the end of the bz stream or
733
+ * +false+ otherwise
734
+ */
735
+ VALUE bz_reader_eoz(VALUE obj) {
736
+ struct bz_file *bzf;
737
+
738
+ Get_BZ2(obj, bzf);
739
+ if (!bzf->in || !bzf->buf) {
740
+ return Qnil;
741
+ }
742
+ if (bzf->state == BZ_STREAM_END && !bzf->bzs.avail_out) {
743
+ return Qtrue;
744
+ }
745
+ return Qfalse;
746
+ }
747
+
748
+ /*
749
+ * Test whether the bzip stream has reached its end (see Bzip2::Reader#eoz?)
750
+ * and then tests that the undlerying IO has also reached an eof
751
+ *
752
+ * @return [Boolean] +true+ if the stream has reached or +false+ otherwise.
753
+ */
754
+ VALUE bz_reader_eof(VALUE obj) {
755
+ struct bz_file *bzf;
756
+ VALUE res;
757
+
758
+ res = bz_reader_eoz(obj);
759
+ if (RTEST(res)) {
760
+ Get_BZ2(obj, bzf);
761
+ if (bzf->bzs.avail_in) {
762
+ res = Qfalse;
763
+ } else {
764
+ res = bz_reader_getc(obj);
765
+ if (NIL_P(res)) {
766
+ res = Qtrue;
767
+ } else {
768
+ bz_reader_ungetc(obj, res);
769
+ res = Qfalse;
770
+ }
771
+ }
772
+ }
773
+ return res;
774
+ }
775
+
776
+ /*
777
+ * Tests whether this reader has be closed.
778
+ *
779
+ * @return [Boolean] +true+ if it is or +false+ otherwise.
780
+ */
781
+ VALUE bz_reader_closed(VALUE obj) {
782
+ struct bz_file *bzf;
783
+
784
+ Data_Get_Struct(obj, struct bz_file, bzf);
785
+ return RTEST(bzf->io)?Qfalse:Qtrue;
786
+ }
787
+
788
+ /*
789
+ * Closes this reader to disallow further reads.
790
+ *
791
+ * reader = Bzip2::Reader.new File.open('file')
792
+ * reader.close
793
+ *
794
+ * reader.closed? # => true
795
+ *
796
+ * @return [File] the io with which the reader was created.
797
+ * @raise [IOError] if the stream has already been closed
798
+ */
799
+ VALUE bz_reader_close(VALUE obj) {
800
+ struct bz_file *bzf;
801
+ VALUE res;
802
+
803
+ Get_BZ2(obj, bzf);
804
+ if (bzf->buf) {
805
+ free(bzf->buf);
806
+ bzf->buf = 0;
807
+ }
808
+ if (bzf->state == BZ_OK) {
809
+ BZ2_bzDecompressEnd(&(bzf->bzs));
810
+ }
811
+ if (bzf->flags & BZ2_RB_CLOSE) {
812
+ int closed = 0;
813
+ if (rb_respond_to(bzf->io, id_closed)) {
814
+ VALUE iv = rb_funcall2(bzf->io, id_closed, 0, 0);
815
+ closed = RTEST(iv);
816
+ }
817
+ if (!closed && rb_respond_to(bzf->io, id_close)) {
818
+ rb_funcall2(bzf->io, id_close, 0, 0);
819
+ }
820
+ }
821
+ if (bzf->flags & (BZ2_RB_CLOSE|BZ2_RB_INTERNAL)) {
822
+ res = Qnil;
823
+ } else {
824
+ res = bzf->io;
825
+ }
826
+ bzf->io = 0;
827
+ return res;
828
+ }
829
+
830
+ /*
831
+ * Originally undocument and had no sepcs. Appears to call Bzip2::Reader#read
832
+ * and then mark the stream as finished, but this didn't work for me...
833
+ */
834
+ VALUE bz_reader_finish(VALUE obj) {
835
+ struct bz_file *bzf;
836
+
837
+ Get_BZ2(obj, bzf);
838
+ if (bzf->buf) {
839
+ rb_funcall2(obj, id_read, 0, 0);
840
+ free(bzf->buf);
841
+ }
842
+ bzf->buf = 0;
843
+ bzf->state = BZ_OK;
844
+ return Qnil;
845
+ }
846
+
847
+ /*
848
+ * Originally undocument and had no sepcs. Appears to work nearly the same
849
+ * as Bzip2::Reader#close...
850
+ */
851
+ VALUE bz_reader_close_bang(VALUE obj) {
852
+ struct bz_file *bzf;
853
+ int closed;
854
+
855
+ Get_BZ2(obj, bzf);
856
+ closed = bzf->flags & (BZ2_RB_CLOSE|BZ2_RB_INTERNAL);
857
+ bz_reader_close(obj);
858
+ if (!closed && rb_respond_to(bzf->io, id_close)) {
859
+ if (rb_respond_to(bzf->io, id_closed)) {
860
+ closed = RTEST(rb_funcall2(bzf->io, id_closed, 0, 0));
861
+ }
862
+ if (!closed) {
863
+ rb_funcall2(bzf->io, id_close, 0, 0);
864
+ }
865
+ }
866
+ return Qnil;
867
+ }
868
+
869
+ struct foreach_arg {
870
+ int argc;
871
+ VALUE sep;
872
+ VALUE obj;
873
+ };
874
+
875
+ VALUE bz_reader_foreach_line(struct foreach_arg *arg) {
876
+ VALUE str;
877
+ int td[ASIZE], in;
878
+
879
+ in = Qtrue;
880
+ while (!NIL_P(str = bz_reader_gets_internal(arg->argc, &arg->sep, arg->obj, td, in))) {
881
+ in = Qfalse;
882
+ rb_yield(str);
883
+ }
884
+ return Qnil;
885
+ }
886
+
887
+ /*
888
+ * call-seq:
889
+ * foreach(filename, &block)
890
+ *
891
+ * Reads a bz2 compressed file and yields each line to the block
892
+ *
893
+ * Bzip2::Writer.open('file'){ |f| f << "a\n" << "b\n" << "c\n\nd" }
894
+ * Bzip2::Reader.foreach('file'){ |l| p l }
895
+ *
896
+ * # Output:
897
+ * # "a\n"
898
+ * # "b\n"
899
+ * # "c\n"
900
+ * # "\n"
901
+ * # "d"
902
+ *
903
+ * @param [String] filename the path to the file to open
904
+ * @yieldparam [String] each line of the file
905
+ */
906
+ VALUE bz_reader_s_foreach(int argc, VALUE *argv, VALUE obj) {
907
+ VALUE fname, sep;
908
+ struct foreach_arg arg;
909
+ struct bz_file *bzf;
910
+
911
+ if (!rb_block_given_p()) {
912
+ rb_raise(rb_eArgError, "call out of a block");
913
+ }
914
+ rb_scan_args(argc, argv, "11", &fname, &sep);
915
+ #ifdef SafeStringValue
916
+ SafeStringValue(fname);
917
+ #else
918
+ Check_SafeStr(fname);
919
+ #endif
920
+ arg.argc = argc - 1;
921
+ arg.sep = sep;
922
+ arg.obj = rb_funcall2(rb_mKernel, id_open, 1, &fname);
923
+ if (NIL_P(arg.obj)) {
924
+ return Qnil;
925
+ }
926
+ arg.obj = rb_funcall2(obj, id_new, 1, &arg.obj);
927
+ Data_Get_Struct(arg.obj, struct bz_file, bzf);
928
+ bzf->flags |= BZ2_RB_CLOSE;
929
+ return rb_ensure(bz_reader_foreach_line, (VALUE)&arg, bz_reader_close, arg.obj);
930
+ }
931
+
932
+ VALUE bz_reader_i_readlines(struct foreach_arg *arg) {
933
+ VALUE str, res;
934
+ int td[ASIZE], in;
935
+
936
+ in = Qtrue;
937
+ res = rb_ary_new();
938
+ while (!NIL_P(str = bz_reader_gets_internal(arg->argc, &arg->sep, arg->obj, td, in))) {
939
+ in = Qfalse;
940
+ rb_ary_push(res, str);
941
+ }
942
+ return res;
943
+ }
944
+
945
+ /*
946
+ * call-seq:
947
+ * readlines(filename, separator="\n")
948
+ *
949
+ * Opens the given bz2 compressed file for reading and decompresses the file,
950
+ * returning an array of the lines of the file. A line is denoted by the
951
+ * separator argument.
952
+ *
953
+ * Bzip2::Writer.open('file'){ |f| f << "a\n" << "b\n" << "c\n\nd" }
954
+ *
955
+ * Bzip2::Reader.readlines('file') # => ["a\n", "b\n", "c\n", "\n", "d"]
956
+ * Bzip2::Reader.readlines('file', 'c') # => ["a\nb\nc", "\n\nd"]
957
+ *
958
+ * @param [String] filename the path to the file to read
959
+ * @param [String] separator the character to denote a newline in the file
960
+ * @see Bzip2::Reader#readlines
961
+ * @return [Array] an array of lines for the file
962
+ * @raise [Bzip2::Error] if the file is not a valid bz2 compressed file
963
+ */
964
+ VALUE bz_reader_s_readlines(int argc, VALUE *argv, VALUE obj) {
965
+ VALUE fname, sep;
966
+ struct foreach_arg arg;
967
+ struct bz_file *bzf;
968
+
969
+ rb_scan_args(argc, argv, "11", &fname, &sep);
970
+ #ifdef SafeStringValue
971
+ SafeStringValue(fname);
972
+ #else
973
+ Check_SafeStr(fname);
974
+ #endif
975
+ arg.argc = argc - 1;
976
+ arg.sep = sep;
977
+ arg.obj = rb_funcall2(rb_mKernel, id_open, 1, &fname);
978
+ if (NIL_P(arg.obj)) {
979
+ return Qnil;
980
+ }
981
+ arg.obj = rb_funcall2(obj, id_new, 1, &arg.obj);
982
+ Data_Get_Struct(arg.obj, struct bz_file, bzf);
983
+ bzf->flags |= BZ2_RB_CLOSE;
984
+ return rb_ensure(bz_reader_i_readlines, (VALUE)&arg, bz_reader_close, arg.obj);
985
+ }
986
+
987
+ /*
988
+ * Returns the current line number that the stream is at. This number is based
989
+ * on the newline separator being "\n"
990
+ *
991
+ * reader = Bzip2::Reader.new Bzip2.compress("a\nb")
992
+ * reader.lineno # => 0
993
+ * reader.readline # => "a\n"
994
+ * reader.lineno # => 1
995
+ * reader.readline # => "b"
996
+ * reader.lineno # => 2
997
+
998
+ * @return [Integer] the current line number
999
+ */
1000
+ VALUE bz_reader_lineno(VALUE obj) {
1001
+ struct bz_file *bzf;
1002
+
1003
+ Get_BZ2(obj, bzf);
1004
+ return INT2NUM(bzf->lineno);
1005
+ }
1006
+
1007
+ /*
1008
+ * call-seq:
1009
+ * lineno=(num)
1010
+ *
1011
+ * Sets the internal line number count that this stream should be set at
1012
+ *
1013
+ * reader = Bzip2::Reader.new Bzip2.compress("a\nb")
1014
+ * reader.lineno # => 0
1015
+ * reader.readline # => "a\n"
1016
+ * reader.lineno # => 1
1017
+ * reader.lineno = 0
1018
+ * reader.readline # => "b"
1019
+ * reader.lineno # => 1
1020
+ *
1021
+ * @note This does not actually rewind or move the stream forward
1022
+ * @param [Integer] lineno the line number which the stream should consider
1023
+ * being set at
1024
+ * @return [Integer] the line number provided
1025
+ */
1026
+ VALUE bz_reader_set_lineno(VALUE obj, VALUE lineno) {
1027
+ struct bz_file *bzf;
1028
+
1029
+ Get_BZ2(obj, bzf);
1030
+ bzf->lineno = NUM2INT(lineno);
1031
+ return lineno;
1032
+ }