bzip2-ruby 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,5 @@
1
+ == 0.2.2 2008-12-22
2
+
3
+ * 1 major enhancement:
4
+ * Gemify bz2 library from http://moulon.inra.fr/ruby/bz2.html
5
+ * All credit goes to Guy Decoux <ts@moulon.inra.fr>
@@ -0,0 +1,23 @@
1
+ = Ruby C bindings to libbzip2
2
+
3
+ == Installation
4
+
5
+ sudo gem install brianmario-bzip2-ruby -s http://gems.github.com/
6
+
7
+ You may need to specify:
8
+
9
+ --with-bz2-dir=<include file directory for libbzip2>
10
+
11
+ == Documentation:
12
+
13
+ coming soon...
14
+
15
+ == Spec Tests:
16
+
17
+ coming soon...
18
+
19
+ == Copying
20
+
21
+ This extension module is copyrighted free software by Guy Decoux
22
+ You can redistribute it and/or modify it under the same term as Ruby.
23
+ Guy Decoux <ts@moulon.inra.fr>
@@ -0,0 +1,19 @@
1
+ # encoding: UTF-8
2
+ begin
3
+ require 'jeweler'
4
+ Jeweler::Tasks.new do |gem|
5
+ gem.name = "bzip2-ruby"
6
+ gem.summary = "Ruby C bindings to libbzip2."
7
+ gem.email = "seniorlopez@gmail.com"
8
+ gem.homepage = "http://github.com/brianmario/bzip2-ruby"
9
+ gem.authors = ["Guy Decoux", "Brian Lopez"]
10
+ gem.require_paths = ["ext"]
11
+ gem.extra_rdoc_files = `git ls-files *.rdoc`.split("\n")
12
+ gem.files = `git ls-files`.split("\n")
13
+ gem.extensions = ["ext/extconf.rb"]
14
+ gem.files.include %w(lib/jeweler/templates/.document lib/jeweler/templates/.gitignore)
15
+ # gem.rubyforge_project = "bzip2-ruby"
16
+ end
17
+ rescue LoadError
18
+ puts "Jeweler, or one of its dependencies, is not available. Install it with: sudo gem install technicalpickles-jeweler -s http://gems.github.com"
19
+ end
@@ -0,0 +1,4 @@
1
+ ---
2
+ :patch: 3
3
+ :major: 0
4
+ :minor: 2
@@ -0,0 +1,50 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = %q{bzip2-ruby}
5
+ s.version = "0.2.3"
6
+
7
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
8
+ s.authors = ["Guy Decoux", "Brian Lopez"]
9
+ s.date = %q{2009-05-02}
10
+ s.email = %q{seniorlopez@gmail.com}
11
+ s.extensions = ["ext/extconf.rb"]
12
+ s.extra_rdoc_files = [
13
+ "README.rdoc"
14
+ ]
15
+ s.files = [
16
+ "History.txt",
17
+ "README.rdoc",
18
+ "Rakefile",
19
+ "VERSION.yml",
20
+ "bzip2-ruby.gemspec",
21
+ "ext/bz2.c",
22
+ "ext/extconf.rb",
23
+ "tasks/extconf.rake",
24
+ "tasks/extconf/bz2.rake",
25
+ "test/reader.rb",
26
+ "test/runit_.rb",
27
+ "test/writer.rb"
28
+ ]
29
+ s.has_rdoc = true
30
+ s.homepage = %q{http://github.com/brianmario/bzip2-ruby}
31
+ s.rdoc_options = ["--charset=UTF-8"]
32
+ s.require_paths = ["ext"]
33
+ s.rubygems_version = %q{1.3.2}
34
+ s.summary = %q{Ruby C bindings to libbzip2.}
35
+ s.test_files = [
36
+ "test/reader.rb",
37
+ "test/runit_.rb",
38
+ "test/writer.rb"
39
+ ]
40
+
41
+ if s.respond_to? :specification_version then
42
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
43
+ s.specification_version = 3
44
+
45
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
46
+ else
47
+ end
48
+ else
49
+ end
50
+ end
@@ -0,0 +1,1582 @@
1
+ #include <ruby.h>
2
+ #include <rubyio.h>
3
+ #include <bzlib.h>
4
+ #include <version.h>
5
+
6
+ static VALUE bz_cWriter, bz_cReader, bz_cInternal;
7
+ static VALUE bz_eError, bz_eConfigError, bz_eEOZError;
8
+
9
+ static VALUE bz_internal_ary;
10
+
11
+ static ID id_new, id_write, id_open, id_flush, id_read;
12
+ static ID id_closed, id_close, id_str;
13
+
14
+ #define BZ2_RB_CLOSE 1
15
+ #define BZ2_RB_INTERNAL 2
16
+
17
+ struct bz_file {
18
+ bz_stream bzs;
19
+ VALUE in, io;
20
+ char *buf;
21
+ int buflen;
22
+ int blocks, work, small;
23
+ int flags, lineno, state;
24
+ };
25
+
26
+ struct bz_str {
27
+ VALUE str;
28
+ int pos;
29
+ };
30
+
31
+ struct bz_iv {
32
+ VALUE bz2, io;
33
+ void (*finalize)();
34
+ };
35
+
36
+ #define Get_BZ2(obj, bzf) \
37
+ rb_io_taint_check(obj); \
38
+ Data_Get_Struct(obj, struct bz_file, bzf); \
39
+ if (!RTEST(bzf->io)) { \
40
+ rb_raise(rb_eIOError, "closed IO"); \
41
+ }
42
+
43
+ static VALUE
44
+ bz_raise(error)
45
+ int error;
46
+ {
47
+ VALUE exc;
48
+ char *msg;
49
+
50
+ exc = bz_eError;
51
+ switch (error) {
52
+ case BZ_SEQUENCE_ERROR:
53
+ msg = "uncorrect sequence";
54
+ break;
55
+ case BZ_PARAM_ERROR:
56
+ msg = "parameter out of range";
57
+ break;
58
+ case BZ_MEM_ERROR:
59
+ msg = "not enough memory is available";
60
+ break;
61
+ case BZ_DATA_ERROR:
62
+ msg = "data integrity error is detected";
63
+ break;
64
+ case BZ_DATA_ERROR_MAGIC:
65
+ msg = "compressed stream does not start with the correct magic bytes";
66
+ break;
67
+ case BZ_IO_ERROR:
68
+ msg = "error reading or writing";
69
+ break;
70
+ case BZ_UNEXPECTED_EOF:
71
+ exc = bz_eEOZError;
72
+ msg = "compressed file finishes before the logical end of stream is detected";
73
+ break;
74
+ case BZ_OUTBUFF_FULL:
75
+ msg = "output buffer full";
76
+ break;
77
+ case BZ_CONFIG_ERROR:
78
+ exc = bz_eConfigError;
79
+ msg = "library has been improperly compiled on your platform";
80
+ break;
81
+ default:
82
+ msg = "unknown error";
83
+ exc = bz_eError;
84
+ }
85
+ rb_raise(exc, msg);
86
+ }
87
+
88
+ static void
89
+ bz_str_mark(bzs)
90
+ struct bz_str *bzs;
91
+ {
92
+ rb_gc_mark(bzs->str);
93
+ }
94
+
95
+ static void
96
+ bz_file_mark(bzf)
97
+ struct bz_file *bzf;
98
+ {
99
+ rb_gc_mark(bzf->io);
100
+ rb_gc_mark(bzf->in);
101
+ }
102
+
103
+ static struct bz_iv *
104
+ bz_find_struct(obj, ptr, posp)
105
+ VALUE obj;
106
+ void *ptr;
107
+ int *posp;
108
+ {
109
+ struct bz_iv *bziv;
110
+ int i;
111
+
112
+ for (i = 0; i < RARRAY(bz_internal_ary)->len; i++) {
113
+ Data_Get_Struct(RARRAY(bz_internal_ary)->ptr[i], struct bz_iv, bziv);
114
+ if (ptr) {
115
+ if (TYPE(bziv->io) == T_FILE &&
116
+ RFILE(bziv->io)->fptr == (OpenFile *)ptr) {
117
+ if (posp) *posp = i;
118
+ return bziv;
119
+ }
120
+ else if (TYPE(bziv->io) == T_DATA &&
121
+ DATA_PTR(bziv->io) == ptr) {
122
+ if (posp) *posp = i;
123
+ return bziv;
124
+ }
125
+ }
126
+ else if (bziv->io == obj) {
127
+ if (posp) *posp = i;
128
+ return bziv;
129
+ }
130
+ }
131
+ if (posp) *posp = -1;
132
+ return 0;
133
+ }
134
+
135
+ static VALUE
136
+ bz_writer_internal_flush(bzf)
137
+ struct bz_file *bzf;
138
+ {
139
+ int closed = 1;
140
+
141
+ if (rb_respond_to(bzf->io, id_closed)) {
142
+ closed = RTEST(rb_funcall2(bzf->io, id_closed, 0, 0));
143
+ }
144
+ if (bzf->buf) {
145
+ if (!closed && bzf->state == BZ_OK) {
146
+ bzf->bzs.next_in = NULL;
147
+ bzf->bzs.avail_in = 0;
148
+ do {
149
+ bzf->bzs.next_out = bzf->buf;
150
+ bzf->bzs.avail_out = bzf->buflen;
151
+ bzf->state = BZ2_bzCompress(&(bzf->bzs), BZ_FINISH);
152
+ if (bzf->state != BZ_FINISH_OK &&
153
+ bzf->state != BZ_STREAM_END) {
154
+ break;
155
+ }
156
+ if (bzf->bzs.avail_out < bzf->buflen) {
157
+ rb_funcall(bzf->io, id_write, 1,
158
+ rb_str_new(bzf->buf,
159
+ bzf->buflen - bzf->bzs.avail_out));
160
+ }
161
+ } while (bzf->state != BZ_STREAM_END);
162
+ }
163
+ free(bzf->buf);
164
+ bzf->buf = 0;
165
+ BZ2_bzCompressEnd(&(bzf->bzs));
166
+ bzf->state = BZ_OK;
167
+ if (!closed && rb_respond_to(bzf->io, id_flush)) {
168
+ rb_funcall2(bzf->io, id_flush, 0, 0);
169
+ }
170
+ }
171
+ return closed;
172
+ }
173
+
174
+ static VALUE
175
+ bz_writer_internal_close(bzf)
176
+ struct bz_file *bzf;
177
+ {
178
+ struct bz_iv *bziv;
179
+ int pos, closed;
180
+ VALUE res;
181
+
182
+ closed = bz_writer_internal_flush(bzf);
183
+ bziv = bz_find_struct(bzf->io, 0, &pos);
184
+ if (bziv) {
185
+ if (TYPE(bzf->io) == T_FILE) {
186
+ RFILE(bzf->io)->fptr->finalize = bziv->finalize;
187
+ }
188
+ else if (TYPE(bziv->io) == T_DATA) {
189
+ RDATA(bziv->io)->dfree = bziv->finalize;
190
+ }
191
+ RDATA(bziv->bz2)->dfree = ruby_xfree;
192
+ bziv->bz2 = 0;
193
+ rb_ary_delete_at(bz_internal_ary, pos);
194
+ }
195
+ if (bzf->flags & BZ2_RB_CLOSE) {
196
+ bzf->flags &= ~BZ2_RB_CLOSE;
197
+ if (!closed && rb_respond_to(bzf->io, id_close)) {
198
+ rb_funcall2(bzf->io, id_close, 0, 0);
199
+ }
200
+ res = Qnil;
201
+ }
202
+ else {
203
+ res = bzf->io;
204
+ }
205
+ bzf->io = Qnil;
206
+ return res;
207
+ }
208
+
209
+ static VALUE
210
+ bz_internal_finalize(ary, obj)
211
+ VALUE ary, obj;
212
+ {
213
+ VALUE elem;
214
+ int closed, i;
215
+ struct bz_iv *bziv;
216
+ struct bz_file *bzf;
217
+
218
+ for (i = 0; i < RARRAY(bz_internal_ary)->len; i++) {
219
+ elem = RARRAY(bz_internal_ary)->ptr[i];
220
+ Data_Get_Struct(elem, struct bz_iv, bziv);
221
+ if (bziv->bz2) {
222
+ RDATA(bziv->bz2)->dfree = ruby_xfree;
223
+ if (TYPE(bziv->io) == T_FILE) {
224
+ RFILE(bziv->io)->fptr->finalize = bziv->finalize;
225
+ }
226
+ else if (TYPE(bziv->io) == T_DATA) {
227
+ RDATA(bziv->io)->dfree = bziv->finalize;
228
+ }
229
+ Data_Get_Struct(bziv->bz2, struct bz_file, bzf);
230
+ closed = bz_writer_internal_flush(bzf);
231
+ if (bzf->flags & BZ2_RB_CLOSE) {
232
+ bzf->flags &= ~BZ2_RB_CLOSE;
233
+ if (!closed && rb_respond_to(bzf->io, id_close)) {
234
+ rb_funcall2(bzf->io, id_close, 0, 0);
235
+ }
236
+ }
237
+ }
238
+ }
239
+ return Qnil;
240
+ }
241
+
242
+ static VALUE
243
+ bz_writer_close(obj)
244
+ VALUE obj;
245
+ {
246
+ struct bz_file *bzf;
247
+ VALUE res;
248
+
249
+ Get_BZ2(obj, bzf);
250
+ res = bz_writer_internal_close(bzf);
251
+ if (!NIL_P(res) && (bzf->flags & BZ2_RB_INTERNAL)) {
252
+ RBASIC(res)->klass = rb_cString;
253
+ }
254
+ return res;
255
+ }
256
+
257
+ static VALUE
258
+ bz_writer_close_bang(obj)
259
+ VALUE obj;
260
+ {
261
+ struct bz_file *bzf;
262
+ int closed;
263
+
264
+ Get_BZ2(obj, bzf);
265
+ closed = bzf->flags & (BZ2_RB_INTERNAL|BZ2_RB_CLOSE);
266
+ bz_writer_close(obj);
267
+ if (!closed && rb_respond_to(bzf->io, id_close)) {
268
+ if (rb_respond_to(bzf->io, id_closed)) {
269
+ closed = RTEST(rb_funcall2(bzf->io, id_closed, 0, 0));
270
+ }
271
+ if (!closed) {
272
+ rb_funcall2(bzf->io, id_close, 0, 0);
273
+ }
274
+ }
275
+ return Qnil;
276
+ }
277
+
278
+ static void
279
+ bz_writer_free(bzf)
280
+ struct bz_file *bzf;
281
+ {
282
+ bz_writer_internal_close(bzf);
283
+ ruby_xfree(bzf);
284
+ }
285
+
286
+ static void
287
+ bz_io_data_finalize(ptr)
288
+ void *ptr;
289
+ {
290
+ struct bz_file *bzf;
291
+ struct bz_iv *bziv;
292
+ int pos;
293
+
294
+ bziv = bz_find_struct(0, ptr, &pos);
295
+ if (bziv) {
296
+ rb_ary_delete_at(bz_internal_ary, pos);
297
+ Data_Get_Struct(bziv->bz2, struct bz_file, bzf);
298
+ rb_protect(bz_writer_internal_flush, (VALUE)bzf, 0);
299
+ RDATA(bziv->bz2)->dfree = ruby_xfree;
300
+ if (bziv->finalize) {
301
+ (*bziv->finalize)(ptr);
302
+ }
303
+ else if (TYPE(bzf->io) == T_FILE) {
304
+ OpenFile *file = (OpenFile *)ptr;
305
+ if (file->f) {
306
+ fclose(file->f);
307
+ file->f = 0;
308
+ }
309
+ if (file->f2) {
310
+ fclose(file->f2);
311
+ file->f2 = 0;
312
+ }
313
+ }
314
+ }
315
+ }
316
+
317
+ static void *
318
+ bz_malloc(opaque, m, n)
319
+ void *opaque;
320
+ int m, n;
321
+ {
322
+ return ruby_xmalloc(m * n);
323
+ }
324
+
325
+ static void
326
+ bz_free(opaque, p)
327
+ void *opaque, *p;
328
+ {
329
+ ruby_xfree(p);
330
+ }
331
+
332
+ #define DEFAULT_BLOCKS 9
333
+
334
+ static VALUE
335
+ bz_writer_s_alloc(obj)
336
+ VALUE obj;
337
+ {
338
+ struct bz_file *bzf;
339
+ VALUE res;
340
+ res = Data_Make_Struct(obj, struct bz_file, bz_file_mark,
341
+ bz_writer_free, bzf);
342
+ bzf->bzs.bzalloc = bz_malloc;
343
+ bzf->bzs.bzfree = bz_free;
344
+ bzf->blocks = DEFAULT_BLOCKS;
345
+ bzf->state = BZ_OK;
346
+ return res;
347
+ }
348
+
349
+ static VALUE
350
+ bz_writer_flush(obj)
351
+ VALUE obj;
352
+ {
353
+ struct bz_file *bzf;
354
+
355
+ Get_BZ2(obj, bzf);
356
+ if (bzf->flags & BZ2_RB_INTERNAL) {
357
+ return bz_writer_close(obj);
358
+ }
359
+ bz_writer_internal_flush(bzf);
360
+ return Qnil;
361
+ }
362
+
363
+ static VALUE
364
+ bz_writer_s_open(argc, argv, obj)
365
+ int argc;
366
+ VALUE obj, *argv;
367
+ {
368
+ VALUE res;
369
+ struct bz_file *bzf;
370
+
371
+ if (argc < 1) {
372
+ rb_raise(rb_eArgError, "invalid number of arguments");
373
+ }
374
+ if (argc == 1) {
375
+ argv[0] = rb_funcall(rb_mKernel, id_open, 2, argv[0],
376
+ rb_str_new2("wb"));
377
+ }
378
+ else {
379
+ argv[1] = rb_funcall2(rb_mKernel, id_open, 2, argv);
380
+ argv += 1;
381
+ argc -= 1;
382
+ }
383
+ res = rb_funcall2(obj, id_new, argc, argv);
384
+ Data_Get_Struct(res, struct bz_file, bzf);
385
+ bzf->flags |= BZ2_RB_CLOSE;
386
+ if (rb_block_given_p()) {
387
+ return rb_ensure(rb_yield, res, bz_writer_close, res);
388
+ }
389
+ return res;
390
+ }
391
+
392
+ static VALUE
393
+ bz_str_write(obj, str)
394
+ VALUE obj, str;
395
+ {
396
+ if (TYPE(str) != T_STRING) {
397
+ rb_raise(rb_eArgError, "expected a String");
398
+ }
399
+ if (RSTRING(str)->len) {
400
+ rb_str_cat(obj, RSTRING(str)->ptr, RSTRING(str)->len);
401
+ }
402
+ return str;
403
+ }
404
+
405
+ static VALUE
406
+ bz_str_closed(obj)
407
+ VALUE obj;
408
+ {
409
+ return Qfalse;
410
+ }
411
+
412
+ static VALUE
413
+ bz_writer_init(argc, argv, obj)
414
+ int argc;
415
+ VALUE obj, *argv;
416
+ {
417
+ struct bz_file *bzf;
418
+ int blocks = DEFAULT_BLOCKS;
419
+ int work = 0;
420
+ VALUE a, b, c;
421
+
422
+ switch(rb_scan_args(argc, argv, "03", &a, &b, &c)) {
423
+ case 3:
424
+ work = NUM2INT(c);
425
+ /* ... */
426
+ case 2:
427
+ blocks = NUM2INT(b);
428
+ }
429
+ Data_Get_Struct(obj, struct bz_file, bzf);
430
+ if (NIL_P(a)) {
431
+ a = rb_str_new(0, 0);
432
+ rb_define_method(rb_singleton_class(a), "write", bz_str_write, 1);
433
+ rb_define_method(rb_singleton_class(a), "closed?", bz_str_closed, 0);
434
+ bzf->flags |= BZ2_RB_INTERNAL;
435
+ }
436
+ else {
437
+ VALUE iv;
438
+ struct bz_iv *bziv;
439
+ OpenFile *fptr;
440
+
441
+ rb_io_taint_check(a);
442
+ if (!rb_respond_to(a, id_write)) {
443
+ rb_raise(rb_eArgError, "first argument must respond to #write");
444
+ }
445
+ if (TYPE(a) == T_FILE) {
446
+ GetOpenFile(a, fptr);
447
+ rb_io_check_writable(fptr);
448
+ }
449
+ else if (rb_respond_to(a, id_closed)) {
450
+ iv = rb_funcall2(a, id_closed, 0, 0);
451
+ if (RTEST(iv)) {
452
+ rb_raise(rb_eArgError, "closed object");
453
+ }
454
+ }
455
+ bziv = bz_find_struct(a, 0, 0);
456
+ if (bziv) {
457
+ if (RTEST(bziv->bz2)) {
458
+ rb_raise(rb_eArgError, "invalid data type");
459
+ }
460
+ bziv->bz2 = obj;
461
+ }
462
+ else {
463
+ iv = Data_Make_Struct(rb_cData, struct bz_iv, 0, free, bziv);
464
+ bziv->io = a;
465
+ bziv->bz2 = obj;
466
+ rb_ary_push(bz_internal_ary, iv);
467
+ }
468
+ switch (TYPE(a)) {
469
+ case T_FILE:
470
+ bziv->finalize = RFILE(a)->fptr->finalize;
471
+ RFILE(a)->fptr->finalize = bz_io_data_finalize;
472
+ break;
473
+ case T_DATA:
474
+ bziv->finalize = RDATA(a)->dfree;
475
+ RDATA(a)->dfree = bz_io_data_finalize;
476
+ break;
477
+ }
478
+ }
479
+ bzf->io = a;
480
+ bzf->blocks = blocks;
481
+ bzf->work = work;
482
+ return obj;
483
+ }
484
+
485
+ #define BZ_RB_BLOCKSIZE 4096
486
+
487
+ static VALUE
488
+ bz_writer_write(obj, a)
489
+ VALUE obj, a;
490
+ {
491
+ struct bz_file *bzf;
492
+ int n;
493
+
494
+ a = rb_obj_as_string(a);
495
+ Get_BZ2(obj, bzf);
496
+ if (!bzf->buf) {
497
+ if (bzf->state != BZ_OK) {
498
+ bz_raise(bzf->state);
499
+ }
500
+ bzf->state = BZ2_bzCompressInit(&(bzf->bzs), bzf->blocks,
501
+ 0, bzf->work);
502
+ if (bzf->state != BZ_OK) {
503
+ bz_writer_internal_flush(bzf);
504
+ bz_raise(bzf->state);
505
+ }
506
+ bzf->buf = ALLOC_N(char, BZ_RB_BLOCKSIZE + 1);
507
+ bzf->buflen = BZ_RB_BLOCKSIZE;
508
+ bzf->buf[0] = bzf->buf[bzf->buflen] = '\0';
509
+ }
510
+ bzf->bzs.next_in = RSTRING(a)->ptr;
511
+ bzf->bzs.avail_in = RSTRING(a)->len;
512
+ while (bzf->bzs.avail_in) {
513
+ bzf->bzs.next_out = bzf->buf;
514
+ bzf->bzs.avail_out = bzf->buflen;
515
+ bzf->state = BZ2_bzCompress(&(bzf->bzs), BZ_RUN);
516
+ if (bzf->state == BZ_SEQUENCE_ERROR || bzf->state == BZ_PARAM_ERROR) {
517
+ bz_writer_internal_flush(bzf);
518
+ bz_raise(bzf->state);
519
+ }
520
+ bzf->state = BZ_OK;
521
+ if (bzf->bzs.avail_out < bzf->buflen) {
522
+ n = bzf->buflen - bzf->bzs.avail_out;
523
+ rb_funcall(bzf->io, id_write, 1, rb_str_new(bzf->buf, n));
524
+ }
525
+ }
526
+ return INT2NUM(RSTRING(a)->len);
527
+ }
528
+
529
+ static VALUE
530
+ bz_writer_putc(obj, a)
531
+ VALUE obj, a;
532
+ {
533
+ char c = NUM2CHR(a);
534
+ return bz_writer_write(obj, rb_str_new(&c, 1));
535
+ }
536
+
537
+ static VALUE
538
+ bz_compress(argc, argv, obj)
539
+ int argc;
540
+ VALUE obj, *argv;
541
+ {
542
+ VALUE bz2, str;
543
+
544
+ if (!argc) {
545
+ rb_raise(rb_eArgError, "need a String to compress");
546
+ }
547
+ str = rb_str_to_str(argv[0]);
548
+ argv[0] = Qnil;
549
+ bz2 = rb_funcall2(bz_cWriter, id_new, argc, argv);
550
+ if (OBJ_TAINTED(str)) {
551
+ struct bz_file *bzf;
552
+ Data_Get_Struct(bz2, struct bz_file, bzf);
553
+ OBJ_TAINT(bzf->io);
554
+ }
555
+ bz_writer_write(bz2, str);
556
+ return bz_writer_close(bz2);
557
+ }
558
+
559
+ static VALUE
560
+ bz_reader_s_alloc(obj)
561
+ VALUE obj;
562
+ {
563
+ struct bz_file *bzf;
564
+ VALUE res;
565
+ res = Data_Make_Struct(obj, struct bz_file, bz_file_mark,
566
+ ruby_xfree, bzf);
567
+ bzf->bzs.bzalloc = bz_malloc;
568
+ bzf->bzs.bzfree = bz_free;
569
+ bzf->blocks = DEFAULT_BLOCKS;
570
+ bzf->state = BZ_OK;
571
+ return res;
572
+ }
573
+
574
+ static VALUE bz_reader_close __((VALUE));
575
+
576
+ static VALUE
577
+ bz_reader_s_open(argc, argv, obj)
578
+ int argc;
579
+ VALUE obj, *argv;
580
+ {
581
+ VALUE res;
582
+ struct bz_file *bzf;
583
+
584
+ if (argc < 1) {
585
+ rb_raise(rb_eArgError, "invalid number of arguments");
586
+ }
587
+ argv[0] = rb_funcall2(rb_mKernel, id_open, 1, argv);
588
+ if (NIL_P(argv[0])) return Qnil;
589
+ res = rb_funcall2(obj, id_new, argc, argv);
590
+ Data_Get_Struct(res, struct bz_file, bzf);
591
+ bzf->flags |= BZ2_RB_CLOSE;
592
+ if (rb_block_given_p()) {
593
+ return rb_ensure(rb_yield, res, bz_reader_close, res);
594
+ }
595
+ return res;
596
+ }
597
+
598
+ static VALUE
599
+ bz_reader_init(argc, argv, obj)
600
+ int argc;
601
+ VALUE obj, *argv;
602
+ {
603
+ struct bz_file *bzf;
604
+ int small = 0;
605
+ VALUE a, b;
606
+ int internal = 0;
607
+
608
+ if (rb_scan_args(argc, argv, "11", &a, &b) == 2) {
609
+ small = RTEST(b);
610
+ }
611
+ rb_io_taint_check(a);
612
+ if (OBJ_TAINTED(a)) {
613
+ OBJ_TAINT(obj);
614
+ }
615
+ if (rb_respond_to(a, id_read)) {
616
+ if (TYPE(a) == T_FILE) {
617
+ OpenFile *fptr;
618
+
619
+ GetOpenFile(a, fptr);
620
+ rb_io_check_readable(fptr);
621
+ }
622
+ else if (rb_respond_to(a, id_closed)) {
623
+ VALUE iv = rb_funcall2(a, id_closed, 0, 0);
624
+ if (RTEST(iv)) {
625
+ rb_raise(rb_eArgError, "closed object");
626
+ }
627
+ }
628
+ }
629
+ else {
630
+ struct bz_str *bzs;
631
+ VALUE res;
632
+
633
+ if (!rb_respond_to(a, id_str)) {
634
+ rb_raise(rb_eArgError, "first argument must respond to #read");
635
+ }
636
+ a = rb_funcall2(a, id_str, 0, 0);
637
+ if (TYPE(a) != T_STRING) {
638
+ rb_raise(rb_eArgError, "#to_str must return a String");
639
+ }
640
+ res = Data_Make_Struct(bz_cInternal, struct bz_str,
641
+ bz_str_mark, ruby_xfree, bzs);
642
+ bzs->str = a;
643
+ a = res;
644
+ internal = BZ2_RB_INTERNAL;
645
+ }
646
+ Data_Get_Struct(obj, struct bz_file, bzf);
647
+ bzf->io = a;
648
+ bzf->small = small;
649
+ bzf->flags |= internal;
650
+ return obj;
651
+ }
652
+
653
+ static struct bz_file *
654
+ bz_get_bzf(obj)
655
+ VALUE obj;
656
+ {
657
+ struct bz_file *bzf;
658
+
659
+ Get_BZ2(obj, bzf);
660
+ if (!bzf->buf) {
661
+ if (bzf->state != BZ_OK) {
662
+ bz_raise(bzf->state);
663
+ }
664
+ bzf->state = BZ2_bzDecompressInit(&(bzf->bzs), 0, bzf->small);
665
+ if (bzf->state != BZ_OK) {
666
+ BZ2_bzDecompressEnd(&(bzf->bzs));
667
+ bz_raise(bzf->state);
668
+ }
669
+ bzf->buf = ALLOC_N(char, BZ_RB_BLOCKSIZE + 1);
670
+ bzf->buflen = BZ_RB_BLOCKSIZE;
671
+ bzf->buf[0] = bzf->buf[bzf->buflen] = '\0';
672
+ bzf->bzs.total_out_hi32 = bzf->bzs.total_out_lo32 = 0;
673
+ bzf->bzs.next_out = bzf->buf;
674
+ bzf->bzs.avail_out = 0;
675
+ }
676
+ if (bzf->state == BZ_STREAM_END && !bzf->bzs.avail_out) {
677
+ return 0;
678
+ }
679
+ return bzf;
680
+ }
681
+
682
+ static int
683
+ bz_next_available(bzf, in)
684
+ struct bz_file *bzf;
685
+ int in;
686
+ {
687
+ bzf->bzs.next_out = bzf->buf;
688
+ bzf->bzs.avail_out = 0;
689
+ if (bzf->state == BZ_STREAM_END) {
690
+ return BZ_STREAM_END;
691
+ }
692
+ if (!bzf->bzs.avail_in) {
693
+ bzf->in = rb_funcall(bzf->io, id_read, 1, INT2FIX(1024));
694
+ if (TYPE(bzf->in) != T_STRING || RSTRING(bzf->in)->len == 0) {
695
+ BZ2_bzDecompressEnd(&(bzf->bzs));
696
+ bzf->bzs.avail_out = 0;
697
+ bzf->state = BZ_UNEXPECTED_EOF;
698
+ bz_raise(bzf->state);
699
+ }
700
+ bzf->bzs.next_in = RSTRING(bzf->in)->ptr;
701
+ bzf->bzs.avail_in = RSTRING(bzf->in)->len;
702
+ }
703
+ if ((bzf->buflen - in) < (BZ_RB_BLOCKSIZE / 2)) {
704
+ bzf->buf = REALLOC_N(bzf->buf, char, bzf->buflen+BZ_RB_BLOCKSIZE+1);
705
+ bzf->buflen += BZ_RB_BLOCKSIZE;
706
+ bzf->buf[bzf->buflen] = '\0';
707
+ }
708
+ bzf->bzs.avail_out = bzf->buflen - in;
709
+ bzf->bzs.next_out = bzf->buf + in;
710
+ bzf->state = BZ2_bzDecompress(&(bzf->bzs));
711
+ if (bzf->state != BZ_OK) {
712
+ BZ2_bzDecompressEnd(&(bzf->bzs));
713
+ if (bzf->state != BZ_STREAM_END) {
714
+ bzf->bzs.avail_out = 0;
715
+ bz_raise(bzf->state);
716
+ }
717
+ }
718
+ bzf->bzs.avail_out = bzf->buflen - bzf->bzs.avail_out;
719
+ bzf->bzs.next_out = bzf->buf;
720
+ return 0;
721
+ }
722
+
723
+ #define ASIZE (1 << CHAR_BIT)
724
+
725
+ static VALUE
726
+ bz_read_until(bzf, str, len, td1)
727
+ struct bz_file *bzf;
728
+ char *str;
729
+ int len;
730
+ int *td1;
731
+ {
732
+ VALUE res;
733
+ int total, i, nex = 0;
734
+ char *p, *t, *tx, *end, *pend = str + len;
735
+
736
+ res = rb_str_new(0, 0);
737
+ while (1) {
738
+ total = bzf->bzs.avail_out;
739
+ if (len == 1) {
740
+ tx = memchr(bzf->bzs.next_out, *str, bzf->bzs.avail_out);
741
+ if (tx) {
742
+ i = tx - bzf->bzs.next_out + len;
743
+ res = rb_str_cat(res, bzf->bzs.next_out, i);
744
+ bzf->bzs.next_out += i;
745
+ bzf->bzs.avail_out -= i;
746
+ return res;
747
+ }
748
+ }
749
+ else {
750
+ tx = bzf->bzs.next_out;
751
+ end = bzf->bzs.next_out + bzf->bzs.avail_out;
752
+ while (tx + len <= end) {
753
+ for (p = str, t = tx; p != pend; ++p, ++t) {
754
+ if (*p != *t) break;
755
+ }
756
+ if (p == pend) {
757
+ i = tx - bzf->bzs.next_out + len;
758
+ res = rb_str_cat(res, bzf->bzs.next_out, i);
759
+ bzf->bzs.next_out += i;
760
+ bzf->bzs.avail_out -= i;
761
+ return res;
762
+ }
763
+ if (td1) {
764
+ tx += td1[(int)*(tx + len)];
765
+ }
766
+ else {
767
+ tx += 1;
768
+ }
769
+ }
770
+ }
771
+ nex = 0;
772
+ if (total) {
773
+ nex = len - 1;
774
+ res = rb_str_cat(res, bzf->bzs.next_out, total - nex);
775
+ if (nex) {
776
+ MEMMOVE(bzf->buf, bzf->bzs.next_out + total - nex, char, nex);
777
+ }
778
+ }
779
+ if (bz_next_available(bzf, nex) == BZ_STREAM_END) {
780
+ if (nex) {
781
+ res = rb_str_cat(res, bzf->buf, nex);
782
+ }
783
+ if (RSTRING(res)->len) {
784
+ return res;
785
+ }
786
+ return Qnil;
787
+ }
788
+ }
789
+ return Qnil;
790
+ }
791
+
792
+ static int
793
+ bz_read_while(bzf, c)
794
+ struct bz_file *bzf;
795
+ char c;
796
+ {
797
+ char *end;
798
+
799
+ while (1) {
800
+ end = bzf->bzs.next_out + bzf->bzs.avail_out;
801
+ while (bzf->bzs.next_out < end) {
802
+ if (c != *bzf->bzs.next_out) {
803
+ bzf->bzs.avail_out = end - bzf->bzs.next_out;
804
+ return *bzf->bzs.next_out;
805
+ }
806
+ ++bzf->bzs.next_out;
807
+ }
808
+ if (bz_next_available(bzf, 0) == BZ_STREAM_END) {
809
+ return EOF;
810
+ }
811
+ }
812
+ return EOF;
813
+ }
814
+
815
+ static VALUE
816
+ bz_reader_read(argc, argv, obj)
817
+ int argc;
818
+ VALUE obj, *argv;
819
+ {
820
+ struct bz_file *bzf;
821
+ VALUE res, length;
822
+ int total;
823
+ int n;
824
+
825
+ rb_scan_args(argc, argv, "01", &length);
826
+ if (NIL_P(length)) {
827
+ n = -1;
828
+ }
829
+ else {
830
+ n = NUM2INT(length);
831
+ if (n < 0) {
832
+ rb_raise(rb_eArgError, "negative length %d given", n);
833
+ }
834
+ }
835
+ bzf = bz_get_bzf(obj);
836
+ if (!bzf) {
837
+ return Qnil;
838
+ }
839
+ res = rb_str_new(0, 0);
840
+ if (OBJ_TAINTED(obj)) {
841
+ OBJ_TAINT(res);
842
+ }
843
+ if (n == 0) {
844
+ return res;
845
+ }
846
+ while (1) {
847
+ total = bzf->bzs.avail_out;
848
+ if (n != -1 && (RSTRING(res)->len + total) >= n) {
849
+ n -= RSTRING(res)->len;
850
+ res = rb_str_cat(res, bzf->bzs.next_out, n);
851
+ bzf->bzs.next_out += n;
852
+ bzf->bzs.avail_out -= n;
853
+ return res;
854
+ }
855
+ if (total) {
856
+ res = rb_str_cat(res, bzf->bzs.next_out, total);
857
+ }
858
+ if (bz_next_available(bzf, 0) == BZ_STREAM_END) {
859
+ return res;
860
+ }
861
+ }
862
+ return Qnil;
863
+ }
864
+
865
+ static int
866
+ bz_getc(obj)
867
+ VALUE obj;
868
+ {
869
+ VALUE length = INT2FIX(1);
870
+ VALUE res = bz_reader_read(1, &length, obj);
871
+ if (NIL_P(res) || RSTRING(res)->len == 0) {
872
+ return EOF;
873
+ }
874
+ return RSTRING(res)->ptr[0];
875
+ }
876
+
877
+ static VALUE
878
+ bz_reader_ungetc(obj, a)
879
+ VALUE obj, a;
880
+ {
881
+ struct bz_file *bzf;
882
+ int c = NUM2INT(a);
883
+
884
+ Get_BZ2(obj, bzf);
885
+ if (!bzf->buf) {
886
+ bz_raise(BZ_SEQUENCE_ERROR);
887
+ }
888
+ if (bzf->bzs.avail_out < bzf->buflen) {
889
+ bzf->bzs.next_out -= 1;
890
+ bzf->bzs.next_out[0] = c;
891
+ bzf->bzs.avail_out += 1;
892
+ }
893
+ else {
894
+ bzf->buf = REALLOC_N(bzf->buf, char, bzf->buflen + 2);
895
+ bzf->buf[bzf->buflen++] = c;
896
+ bzf->buf[bzf->buflen] = '\0';
897
+ bzf->bzs.next_out = bzf->buf;
898
+ bzf->bzs.avail_out = bzf->buflen;
899
+ }
900
+ return Qnil;
901
+ }
902
+
903
+ static VALUE
904
+ bz_reader_ungets(obj, a)
905
+ VALUE obj, a;
906
+ {
907
+ struct bz_file *bzf;
908
+
909
+ Check_Type(a, T_STRING);
910
+ Get_BZ2(obj, bzf);
911
+ if (!bzf->buf) {
912
+ bz_raise(BZ_SEQUENCE_ERROR);
913
+ }
914
+ if ((bzf->bzs.avail_out + RSTRING(a)->len) < bzf->buflen) {
915
+ bzf->bzs.next_out -= RSTRING(a)->len;
916
+ MEMCPY(bzf->bzs.next_out, RSTRING(a)->ptr, char, RSTRING(a)->len);
917
+ bzf->bzs.avail_out += RSTRING(a)->len;
918
+ }
919
+ else {
920
+ bzf->buf = REALLOC_N(bzf->buf, char, bzf->buflen + RSTRING(a)->len + 1);
921
+ MEMCPY(bzf->buf + bzf->buflen, RSTRING(a)->ptr, char,RSTRING(a)->len);
922
+ bzf->buflen += RSTRING(a)->len;
923
+ bzf->buf[bzf->buflen] = '\0';
924
+ bzf->bzs.next_out = bzf->buf;
925
+ bzf->bzs.avail_out = bzf->buflen;
926
+ }
927
+ return Qnil;
928
+ }
929
+
930
+ VALUE
931
+ bz_reader_gets(obj)
932
+ VALUE obj;
933
+ {
934
+ struct bz_file *bzf;
935
+ VALUE str = Qnil;
936
+
937
+ bzf = bz_get_bzf(obj);
938
+ if (bzf) {
939
+ str = bz_read_until(bzf, "\n", 1, 0);
940
+ if (!NIL_P(str)) {
941
+ bzf->lineno++;
942
+ OBJ_TAINT(str);
943
+ }
944
+ }
945
+ return str;
946
+ }
947
+
948
+ static VALUE
949
+ bz_reader_gets_internal(argc, argv, obj, td, init)
950
+ int argc;
951
+ VALUE obj, *argv;
952
+ int *td, init;
953
+ {
954
+ struct bz_file *bzf;
955
+ VALUE rs, res;
956
+ char *rsptr;
957
+ int rslen, rspara, *td1;
958
+
959
+ rs = rb_rs;
960
+ if (argc) {
961
+ rb_scan_args(argc, argv, "1", &rs);
962
+ if (!NIL_P(rs)) {
963
+ Check_Type(rs, T_STRING);
964
+ }
965
+ }
966
+ if (NIL_P(rs)) {
967
+ return bz_reader_read(1, &rs, obj);
968
+ }
969
+ rslen = RSTRING(rs)->len;
970
+ if (rs == rb_default_rs || (rslen == 1 && RSTRING(rs)->ptr[0] == '\n')) {
971
+ return bz_reader_gets(obj);
972
+ }
973
+
974
+ if (rslen == 0) {
975
+ rsptr = "\n\n";
976
+ rslen = 2;
977
+ rspara = 1;
978
+ }
979
+ else {
980
+ rsptr = RSTRING(rs)->ptr;
981
+ rspara = 0;
982
+ }
983
+
984
+ bzf = bz_get_bzf(obj);
985
+ if (!bzf) {
986
+ return Qnil;
987
+ }
988
+ if (rspara) {
989
+ bz_read_while(bzf, '\n');
990
+ }
991
+ td1 = 0;
992
+ if (rslen != 1) {
993
+ if (init) {
994
+ int i;
995
+
996
+ for (i = 0; i < ASIZE; i++) {
997
+ td[i] = rslen + 1;
998
+ }
999
+ for (i = 0; i < rslen; i++) {
1000
+ td[(int)*(rsptr + i)] = rslen - i;
1001
+ }
1002
+ }
1003
+ td1 = td;
1004
+ }
1005
+
1006
+ res = bz_read_until(bzf, rsptr, rslen, td1);
1007
+ if (rspara) {
1008
+ bz_read_while(bzf, '\n');
1009
+ }
1010
+
1011
+ if (!NIL_P(res)) {
1012
+ bzf->lineno++;
1013
+ OBJ_TAINT(res);
1014
+ }
1015
+ return res;
1016
+ }
1017
+
1018
+ static VALUE
1019
+ bz_reader_set_unused(obj, a)
1020
+ VALUE obj, a;
1021
+ {
1022
+ struct bz_file *bzf;
1023
+
1024
+ Check_Type(a, T_STRING);
1025
+ Get_BZ2(obj, bzf);
1026
+ if (!bzf->in) {
1027
+ bzf->in = rb_str_new(RSTRING(a)->ptr, RSTRING(a)->len);
1028
+ }
1029
+ else {
1030
+ bzf->in = rb_str_cat(bzf->in, RSTRING(a)->ptr, RSTRING(a)->len);
1031
+ }
1032
+ bzf->bzs.next_in = RSTRING(bzf->in)->ptr;
1033
+ bzf->bzs.avail_in = RSTRING(bzf->in)->len;
1034
+ return Qnil;
1035
+ }
1036
+
1037
+ static VALUE
1038
+ bz_reader_getc(obj)
1039
+ VALUE obj;
1040
+ {
1041
+ VALUE str;
1042
+ VALUE len = INT2FIX(1);
1043
+
1044
+ str = bz_reader_read(1, &len, obj);
1045
+ if (NIL_P(str) || RSTRING(str)->len == 0) {
1046
+ return Qnil;
1047
+ }
1048
+ return INT2FIX(RSTRING(str)->ptr[0] & 0xff);
1049
+ }
1050
+
1051
+ static void
1052
+ bz_eoz_error()
1053
+ {
1054
+ rb_raise(bz_eEOZError, "End of Zip component reached");
1055
+ }
1056
+
1057
+ static VALUE
1058
+ bz_reader_readchar(obj)
1059
+ VALUE obj;
1060
+ {
1061
+ VALUE res = bz_reader_getc(obj);
1062
+
1063
+ if (NIL_P(res)) {
1064
+ bz_eoz_error();
1065
+ }
1066
+ return res;
1067
+ }
1068
+
1069
+ static VALUE
1070
+ bz_reader_gets_m(argc, argv, obj)
1071
+ int argc;
1072
+ VALUE obj, *argv;
1073
+ {
1074
+ int td[ASIZE];
1075
+ VALUE str = bz_reader_gets_internal(argc, argv, obj, td, Qtrue);
1076
+
1077
+ if (!NIL_P(str)) {
1078
+ rb_lastline_set(str);
1079
+ }
1080
+ return str;
1081
+ }
1082
+
1083
+ static VALUE
1084
+ bz_reader_readline(argc, argv, obj)
1085
+ int argc;
1086
+ VALUE obj, *argv;
1087
+ {
1088
+ VALUE res = bz_reader_gets_m(argc, argv, obj);
1089
+
1090
+ if (NIL_P(res)) {
1091
+ bz_eoz_error();
1092
+ }
1093
+ return res;
1094
+ }
1095
+
1096
+ static VALUE
1097
+ bz_reader_readlines(argc, argv, obj)
1098
+ int argc;
1099
+ VALUE obj, *argv;
1100
+ {
1101
+ VALUE line, ary;
1102
+ int td[ASIZE], in;
1103
+
1104
+ in = Qtrue;
1105
+ ary = rb_ary_new();
1106
+ while (!NIL_P(line = bz_reader_gets_internal(argc, argv, obj, td, in))) {
1107
+ in = Qfalse;
1108
+ rb_ary_push(ary, line);
1109
+ }
1110
+ return ary;
1111
+ }
1112
+
1113
+ static VALUE
1114
+ bz_reader_each_line(argc, argv, obj)
1115
+ int argc;
1116
+ VALUE obj, *argv;
1117
+ {
1118
+ VALUE line;
1119
+ int td[ASIZE], in;
1120
+
1121
+ in = Qtrue;
1122
+ while (!NIL_P(line = bz_reader_gets_internal(argc, argv, obj, td, in))) {
1123
+ in = Qfalse;
1124
+ rb_yield(line);
1125
+ }
1126
+ return obj;
1127
+ }
1128
+
1129
+ static VALUE
1130
+ bz_reader_each_byte(obj)
1131
+ VALUE obj;
1132
+ {
1133
+ int c;
1134
+
1135
+ while ((c = bz_getc(obj)) != EOF) {
1136
+ rb_yield(INT2FIX(c & 0xff));
1137
+ }
1138
+ return obj;
1139
+ }
1140
+
1141
+ static VALUE
1142
+ bz_reader_unused(obj)
1143
+ VALUE obj;
1144
+ {
1145
+ struct bz_file *bzf;
1146
+ VALUE res;
1147
+
1148
+ Get_BZ2(obj, bzf);
1149
+ if (!bzf->in || bzf->state != BZ_STREAM_END) {
1150
+ return Qnil;
1151
+ }
1152
+ if (bzf->bzs.avail_in) {
1153
+ res = rb_tainted_str_new(bzf->bzs.next_in, bzf->bzs.avail_in);
1154
+ bzf->bzs.avail_in = 0;
1155
+ }
1156
+ else {
1157
+ res = rb_tainted_str_new(0, 0);
1158
+ }
1159
+ return res;
1160
+ }
1161
+
1162
+ static VALUE
1163
+ bz_reader_eoz(obj)
1164
+ VALUE obj;
1165
+ {
1166
+ struct bz_file *bzf;
1167
+
1168
+ Get_BZ2(obj, bzf);
1169
+ if (!bzf->in || !bzf->buf) {
1170
+ return Qnil;
1171
+ }
1172
+ if (bzf->state == BZ_STREAM_END && !bzf->bzs.avail_out) {
1173
+ return Qtrue;
1174
+ }
1175
+ return Qfalse;
1176
+ }
1177
+
1178
+ static VALUE
1179
+ bz_reader_eof(obj)
1180
+ VALUE obj;
1181
+ {
1182
+ struct bz_file *bzf;
1183
+ VALUE res;
1184
+
1185
+ res = bz_reader_eoz(obj);
1186
+ if (RTEST(res)) {
1187
+ Get_BZ2(obj, bzf);
1188
+ if (bzf->bzs.avail_in) {
1189
+ res = Qfalse;
1190
+ }
1191
+ else {
1192
+ res = bz_reader_getc(obj);
1193
+ if (NIL_P(res)) {
1194
+ res = Qtrue;
1195
+ }
1196
+ else {
1197
+ bz_reader_ungetc(res);
1198
+ res = Qfalse;
1199
+ }
1200
+ }
1201
+ }
1202
+ return res;
1203
+ }
1204
+
1205
+ static VALUE
1206
+ bz_reader_closed(obj)
1207
+ VALUE obj;
1208
+ {
1209
+ struct bz_file *bzf;
1210
+
1211
+ Data_Get_Struct(obj, struct bz_file, bzf);
1212
+ return RTEST(bzf->io)?Qfalse:Qtrue;
1213
+ }
1214
+
1215
+ static VALUE
1216
+ bz_reader_close(obj)
1217
+ VALUE obj;
1218
+ {
1219
+ struct bz_file *bzf;
1220
+ VALUE res;
1221
+
1222
+ Get_BZ2(obj, bzf);
1223
+ if (bzf->buf) {
1224
+ free(bzf->buf);
1225
+ bzf->buf = 0;
1226
+ }
1227
+ if (bzf->state == BZ_OK) {
1228
+ BZ2_bzDecompressEnd(&(bzf->bzs));
1229
+ }
1230
+ if (bzf->flags & BZ2_RB_CLOSE) {
1231
+ int closed = 0;
1232
+ if (rb_respond_to(bzf->io, id_closed)) {
1233
+ VALUE iv = rb_funcall2(bzf->io, id_closed, 0, 0);
1234
+ closed = RTEST(iv);
1235
+ }
1236
+ if (!closed && rb_respond_to(bzf->io, id_close)) {
1237
+ rb_funcall2(bzf->io, id_close, 0, 0);
1238
+ }
1239
+ }
1240
+ if (bzf->flags & (BZ2_RB_CLOSE|BZ2_RB_INTERNAL)) {
1241
+ res = Qnil;
1242
+ }
1243
+ else {
1244
+ res = bzf->io;
1245
+ }
1246
+ bzf->io = 0;
1247
+ return res;
1248
+ }
1249
+
1250
+ static VALUE
1251
+ bz_reader_finish(obj)
1252
+ VALUE obj;
1253
+ {
1254
+ struct bz_file *bzf;
1255
+
1256
+ Get_BZ2(obj, bzf);
1257
+ if (bzf->buf) {
1258
+ rb_funcall2(obj, id_read, 0, 0);
1259
+ free(bzf->buf);
1260
+ }
1261
+ bzf->buf = 0;
1262
+ bzf->state = BZ_OK;
1263
+ return Qnil;
1264
+ }
1265
+
1266
+ static VALUE
1267
+ bz_reader_close_bang(obj)
1268
+ VALUE obj;
1269
+ {
1270
+ struct bz_file *bzf;
1271
+ int closed;
1272
+
1273
+ Get_BZ2(obj, bzf);
1274
+ closed = bzf->flags & (BZ2_RB_CLOSE|BZ2_RB_INTERNAL);
1275
+ bz_reader_close(obj);
1276
+ if (!closed && rb_respond_to(bzf->io, id_close)) {
1277
+ if (rb_respond_to(bzf->io, id_closed)) {
1278
+ closed = RTEST(rb_funcall2(bzf->io, id_closed, 0, 0));
1279
+ }
1280
+ if (!closed) {
1281
+ rb_funcall2(bzf->io, id_close, 0, 0);
1282
+ }
1283
+ }
1284
+ return Qnil;
1285
+ }
1286
+
1287
+ struct foreach_arg {
1288
+ int argc;
1289
+ VALUE sep;
1290
+ VALUE obj;
1291
+ };
1292
+
1293
+ static VALUE
1294
+ bz_reader_foreach_line(arg)
1295
+ struct foreach_arg *arg;
1296
+ {
1297
+ VALUE str;
1298
+ int td[ASIZE], in;
1299
+
1300
+ in = Qtrue;
1301
+ while (!NIL_P(str = bz_reader_gets_internal(arg->argc, &arg->sep,
1302
+ arg->obj, td, in))) {
1303
+ in = Qfalse;
1304
+ rb_yield(str);
1305
+ }
1306
+ return Qnil;
1307
+ }
1308
+
1309
+ static VALUE
1310
+ bz_reader_s_foreach(argc, argv, obj)
1311
+ int argc;
1312
+ VALUE obj, *argv;
1313
+ {
1314
+ VALUE fname, sep;
1315
+ struct foreach_arg arg;
1316
+ struct bz_file *bzf;
1317
+
1318
+ if (!rb_block_given_p()) {
1319
+ rb_raise(rb_eArgError, "call out of a block");
1320
+ }
1321
+ rb_scan_args(argc, argv, "11", &fname, &sep);
1322
+ Check_SafeStr(fname);
1323
+ arg.argc = argc - 1;
1324
+ arg.sep = sep;
1325
+ arg.obj = rb_funcall2(rb_mKernel, id_open, 1, &fname);
1326
+ if (NIL_P(arg.obj)) return Qnil;
1327
+ arg.obj = rb_funcall2(obj, id_new, 1, &arg.obj);
1328
+ Data_Get_Struct(arg.obj, struct bz_file, bzf);
1329
+ bzf->flags |= BZ2_RB_CLOSE;
1330
+ return rb_ensure(bz_reader_foreach_line, (VALUE)&arg, bz_reader_close, arg.obj);
1331
+ }
1332
+
1333
+ static VALUE
1334
+ bz_reader_i_readlines(arg)
1335
+ struct foreach_arg *arg;
1336
+ {
1337
+ VALUE str, res;
1338
+ int td[ASIZE], in;
1339
+
1340
+ in = Qtrue;
1341
+ res = rb_ary_new();
1342
+ while (!NIL_P(str = bz_reader_gets_internal(arg->argc, &arg->sep,
1343
+ arg->obj, td, in))) {
1344
+ in = Qfalse;
1345
+ rb_ary_push(res, str);
1346
+ }
1347
+ return res;
1348
+ }
1349
+
1350
+ static VALUE
1351
+ bz_reader_s_readlines(argc, argv, obj)
1352
+ int argc;
1353
+ VALUE obj, *argv;
1354
+ {
1355
+ VALUE fname, sep;
1356
+ struct foreach_arg arg;
1357
+ struct bz_file *bzf;
1358
+
1359
+ rb_scan_args(argc, argv, "11", &fname, &sep);
1360
+ Check_SafeStr(fname);
1361
+ arg.argc = argc - 1;
1362
+ arg.sep = sep;
1363
+ arg.obj = rb_funcall2(rb_mKernel, id_open, 1, &fname);
1364
+ if (NIL_P(arg.obj)) return Qnil;
1365
+ arg.obj = rb_funcall2(obj, id_new, 1, &arg.obj);
1366
+ Data_Get_Struct(arg.obj, struct bz_file, bzf);
1367
+ bzf->flags |= BZ2_RB_CLOSE;
1368
+ return rb_ensure(bz_reader_i_readlines, (VALUE)&arg, bz_reader_close, arg.obj);
1369
+ }
1370
+
1371
+ static VALUE
1372
+ bz_reader_lineno(obj)
1373
+ VALUE obj;
1374
+ {
1375
+ struct bz_file *bzf;
1376
+
1377
+ Get_BZ2(obj, bzf);
1378
+ return INT2NUM(bzf->lineno);
1379
+ }
1380
+
1381
+ static VALUE
1382
+ bz_reader_set_lineno(obj, lineno)
1383
+ VALUE obj, lineno;
1384
+ {
1385
+ struct bz_file *bzf;
1386
+
1387
+ Get_BZ2(obj, bzf);
1388
+ bzf->lineno = NUM2INT(lineno);
1389
+ return lineno;
1390
+ }
1391
+
1392
+ static VALUE
1393
+ bz_to_io(obj)
1394
+ VALUE obj;
1395
+ {
1396
+ struct bz_file *bzf;
1397
+
1398
+ Get_BZ2(obj, bzf);
1399
+ return bzf->io;
1400
+ }
1401
+
1402
+ static VALUE
1403
+ bz_str_read(argc, argv, obj)
1404
+ int argc;
1405
+ VALUE obj, *argv;
1406
+ {
1407
+ struct bz_str *bzs;
1408
+ VALUE res, len;
1409
+ int count;
1410
+
1411
+ Data_Get_Struct(obj, struct bz_str, bzs);
1412
+ rb_scan_args(argc, argv, "01", &len);
1413
+ if (NIL_P(len)) {
1414
+ count = RSTRING(bzs->str)->len;
1415
+ }
1416
+ else {
1417
+ count = NUM2INT(len);
1418
+ if (count < 0) {
1419
+ rb_raise(rb_eArgError, "negative length %d given", count);
1420
+ }
1421
+ }
1422
+ if (!count || bzs->pos == -1) {
1423
+ return Qnil;
1424
+ }
1425
+ if ((bzs->pos + count) >= RSTRING(bzs->str)->len) {
1426
+ res = rb_str_new(RSTRING(bzs->str)->ptr + bzs->pos,
1427
+ RSTRING(bzs->str)->len - bzs->pos);
1428
+ bzs->pos = -1;
1429
+ }
1430
+ else {
1431
+ res = rb_str_new(RSTRING(bzs->str)->ptr + bzs->pos, count);
1432
+ bzs->pos += count;
1433
+ }
1434
+ return res;
1435
+ }
1436
+
1437
+ static VALUE
1438
+ bz_uncompress(argc, argv, obj)
1439
+ int argc;
1440
+ VALUE obj, *argv;
1441
+ {
1442
+ VALUE bz2, nilv = Qnil;
1443
+
1444
+ if (!argc) {
1445
+ rb_raise(rb_eArgError, "need a String to Uncompress");
1446
+ }
1447
+ argv[0] = rb_str_to_str(argv[0]);
1448
+ bz2 = rb_funcall2(bz_cReader, id_new, argc, argv);
1449
+ return bz_reader_read(1, &nilv, bz2);
1450
+ }
1451
+
1452
+ static VALUE
1453
+ bz_s_new(argc, argv, obj)
1454
+ int argc;
1455
+ VALUE obj, *argv;
1456
+ {
1457
+ VALUE res = rb_funcall2(obj, rb_intern("allocate"), 0, 0);
1458
+ rb_obj_call_init(res, argc, argv);
1459
+ return res;
1460
+ }
1461
+
1462
+ static VALUE
1463
+ bz_proc_new(func, val)
1464
+ VALUE (*func)(ANYARGS);
1465
+ VALUE val;
1466
+ {
1467
+ VALUE tmp = Data_Wrap_Struct(rb_cData, 0, 0, 0);
1468
+ rb_define_singleton_method(tmp, "tmp_proc", func, 1);
1469
+ return rb_funcall2(rb_funcall(tmp, rb_intern("method"), 1,
1470
+ ID2SYM(rb_intern("tmp_proc"))),
1471
+ rb_intern("to_proc"), 0, 0);
1472
+ }
1473
+
1474
+ void Init_bzip2()
1475
+ {
1476
+ VALUE bz_mBZ2;
1477
+
1478
+ if (rb_const_defined_at(rb_cObject, rb_intern("BZ2"))) {
1479
+ rb_raise(rb_eNameError, "module already defined");
1480
+ }
1481
+
1482
+ bz_internal_ary = rb_ary_new();
1483
+ rb_global_variable(&bz_internal_ary);
1484
+ rb_funcall(rb_const_get(rb_cObject, rb_intern("ObjectSpace")),
1485
+ rb_intern("define_finalizer"), 2, bz_internal_ary,
1486
+ bz_proc_new(bz_internal_finalize, 0));
1487
+
1488
+ id_new = rb_intern("new");
1489
+ id_write = rb_intern("write");
1490
+ id_open = rb_intern("open");
1491
+ id_flush = rb_intern("flush");
1492
+ id_read = rb_intern("read");
1493
+ id_close = rb_intern("close");
1494
+ id_closed = rb_intern("closed?");
1495
+ id_str = rb_intern("to_str");
1496
+
1497
+ bz_mBZ2 = rb_define_module("BZ2");
1498
+ bz_eConfigError = rb_define_class_under(bz_mBZ2, "ConfigError", rb_eFatal);
1499
+ bz_eError = rb_define_class_under(bz_mBZ2, "Error", rb_eIOError);
1500
+ bz_eEOZError = rb_define_class_under(bz_mBZ2, "EOZError", bz_eError);
1501
+
1502
+ rb_define_module_function(bz_mBZ2, "compress", bz_compress, -1);
1503
+ rb_define_module_function(bz_mBZ2, "uncompress", bz_uncompress, -1);
1504
+ rb_define_module_function(bz_mBZ2, "decompress", bz_uncompress, -1);
1505
+ rb_define_module_function(bz_mBZ2, "bzip2", bz_compress, -1);
1506
+ rb_define_module_function(bz_mBZ2, "bunzip2", bz_uncompress, -1);
1507
+ /*
1508
+ Writer
1509
+ */
1510
+ bz_cWriter = rb_define_class_under(bz_mBZ2, "Writer", rb_cData);
1511
+ #if HAVE_RB_DEFINE_ALLOC_FUNC
1512
+ rb_define_alloc_func(bz_cWriter, bz_writer_s_alloc);
1513
+ #else
1514
+ rb_define_singleton_method(bz_cWriter, "allocate", bz_writer_s_alloc, 0);
1515
+ #endif
1516
+ rb_define_singleton_method(bz_cWriter, "new", bz_s_new, -1);
1517
+ rb_define_singleton_method(bz_cWriter, "open", bz_writer_s_open, -1);
1518
+ rb_define_method(bz_cWriter, "initialize", bz_writer_init, -1);
1519
+ rb_define_method(bz_cWriter, "write", bz_writer_write, 1);
1520
+ rb_define_method(bz_cWriter, "putc", bz_writer_putc, 1);
1521
+ rb_define_method(bz_cWriter, "puts", rb_io_puts, -1);
1522
+ rb_define_method(bz_cWriter, "print", rb_io_print, -1);
1523
+ rb_define_method(bz_cWriter, "printf", rb_io_printf, -1);
1524
+ rb_define_method(bz_cWriter, "<<", rb_io_addstr, 1);
1525
+ rb_define_method(bz_cWriter, "flush", bz_writer_flush, 0);
1526
+ rb_define_method(bz_cWriter, "finish", bz_writer_flush, 0);
1527
+ rb_define_method(bz_cWriter, "close", bz_writer_close, 0);
1528
+ rb_define_method(bz_cWriter, "close!", bz_writer_close_bang, 0);
1529
+ rb_define_method(bz_cWriter, "to_io", bz_to_io, 0);
1530
+ /*
1531
+ Reader
1532
+ */
1533
+ bz_cReader = rb_define_class_under(bz_mBZ2, "Reader", rb_cData);
1534
+ rb_include_module(bz_cReader, rb_mEnumerable);
1535
+ #if HAVE_RB_DEFINE_ALLOC_FUNC
1536
+ rb_define_alloc_func(bz_cReader, bz_reader_s_alloc);
1537
+ #else
1538
+ rb_define_singleton_method(bz_cReader, "allocate", bz_reader_s_alloc, 0);
1539
+ #endif
1540
+ rb_define_singleton_method(bz_cReader, "new", bz_s_new, -1);
1541
+ rb_define_singleton_method(bz_cReader, "open", bz_reader_s_open, -1);
1542
+ rb_define_singleton_method(bz_cReader, "foreach", bz_reader_s_foreach, -1);
1543
+ rb_define_singleton_method(bz_cReader, "readlines", bz_reader_s_readlines, -1);
1544
+ rb_define_method(bz_cReader, "initialize", bz_reader_init, -1);
1545
+ rb_define_method(bz_cReader, "read", bz_reader_read, -1);
1546
+ rb_define_method(bz_cReader, "unused", bz_reader_unused, 0);
1547
+ rb_define_method(bz_cReader, "unused=", bz_reader_set_unused, 1);
1548
+ rb_define_method(bz_cReader, "ungetc", bz_reader_ungetc, 1);
1549
+ rb_define_method(bz_cReader, "ungets", bz_reader_ungets, 1);
1550
+ rb_define_method(bz_cReader, "getc", bz_reader_getc, 0);
1551
+ rb_define_method(bz_cReader, "gets", bz_reader_gets_m, -1);
1552
+ rb_define_method(bz_cReader, "readchar", bz_reader_readchar, 0);
1553
+ rb_define_method(bz_cReader, "readline", bz_reader_readline, -1);
1554
+ rb_define_method(bz_cReader, "readlines", bz_reader_readlines, -1);
1555
+ rb_define_method(bz_cReader, "each", bz_reader_each_line, -1);
1556
+ rb_define_method(bz_cReader, "each_line", bz_reader_each_line, -1);
1557
+ rb_define_method(bz_cReader, "each_byte", bz_reader_each_byte, 0);
1558
+ rb_define_method(bz_cReader, "close", bz_reader_close, 0);
1559
+ rb_define_method(bz_cReader, "close!", bz_reader_close_bang, 0);
1560
+ rb_define_method(bz_cReader, "finish", bz_reader_finish, 0);
1561
+ rb_define_method(bz_cReader, "closed", bz_reader_closed, 0);
1562
+ rb_define_method(bz_cReader, "closed?", bz_reader_closed, 0);
1563
+ rb_define_method(bz_cReader, "eoz?", bz_reader_eoz, 0);
1564
+ rb_define_method(bz_cReader, "eoz", bz_reader_eoz, 0);
1565
+ rb_define_method(bz_cReader, "eof?", bz_reader_eof, 0);
1566
+ rb_define_method(bz_cReader, "eof", bz_reader_eof, 0);
1567
+ rb_define_method(bz_cReader, "lineno", bz_reader_lineno, 0);
1568
+ rb_define_method(bz_cReader, "lineno=", bz_reader_set_lineno, 1);
1569
+ rb_define_method(bz_cReader, "to_io", bz_to_io, 0);
1570
+ /*
1571
+ Internal
1572
+ */
1573
+ bz_cInternal = rb_define_class_under(bz_mBZ2, "InternalStr", rb_cData);
1574
+ #if HAVE_RB_DEFINE_ALLOC_FUNC
1575
+ rb_undef_alloc_func(bz_cInternal);
1576
+ #else
1577
+ rb_undef_method(CLASS_OF(bz_cInternal), "allocate");
1578
+ #endif
1579
+ rb_undef_method(CLASS_OF(bz_cInternal), "new");
1580
+ rb_undef_method(bz_cInternal, "initialize");
1581
+ rb_define_method(bz_cInternal, "read", bz_str_read, -1);
1582
+ }