bzip2-ruby-rb20 0.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +10 -0
- data/.rspec +2 -0
- data/.travis.yml +17 -0
- data/.yardopts +1 -0
- data/CHANGELOG.md +94 -0
- data/Gemfile +3 -0
- data/README.md +69 -0
- data/Rakefile +35 -0
- data/bzip2-ruby.gemspec +21 -0
- data/ext/bzip2/bzip2.c +250 -0
- data/ext/bzip2/common.c +55 -0
- data/ext/bzip2/common.h +76 -0
- data/ext/bzip2/extconf.rb +21 -0
- data/ext/bzip2/reader.c +1032 -0
- data/ext/bzip2/reader.h +35 -0
- data/ext/bzip2/writer.c +453 -0
- data/ext/bzip2/writer.h +22 -0
- data/lib/bzip2.rb +13 -0
- data/lib/bzip2/internals.rb +13 -0
- data/lib/bzip2/reader.rb +27 -0
- data/lib/bzip2/version.rb +3 -0
- data/lib/bzip2/writer.rb +64 -0
- data/spec/reader_spec.rb +261 -0
- data/spec/spec_helper.rb +19 -0
- data/spec/writer_spec.rb +134 -0
- metadata +103 -0
data/ext/bzip2/common.c
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
#include <ruby.h>
|
2
|
+
#include <bzlib.h>
|
3
|
+
|
4
|
+
#include "common.h"
|
5
|
+
|
6
|
+
void bz_file_mark(struct bz_file * bzf) {
|
7
|
+
rb_gc_mark(bzf->io);
|
8
|
+
rb_gc_mark(bzf->in);
|
9
|
+
}
|
10
|
+
|
11
|
+
void * bz_malloc(void *opaque, int m, int n) {
|
12
|
+
return malloc(m * n);
|
13
|
+
}
|
14
|
+
|
15
|
+
void bz_free(void *opaque, void *p) {
|
16
|
+
free(p);
|
17
|
+
}
|
18
|
+
|
19
|
+
VALUE bz_raise(int error) {
|
20
|
+
VALUE exc;
|
21
|
+
const char *msg;
|
22
|
+
|
23
|
+
exc = bz_eError;
|
24
|
+
switch (error) {
|
25
|
+
case BZ_SEQUENCE_ERROR:
|
26
|
+
msg = "incorrect sequence";
|
27
|
+
break;
|
28
|
+
case BZ_PARAM_ERROR:
|
29
|
+
msg = "parameter out of range";
|
30
|
+
break;
|
31
|
+
case BZ_MEM_ERROR:
|
32
|
+
msg = "not enough memory is available";
|
33
|
+
break;
|
34
|
+
case BZ_DATA_ERROR:
|
35
|
+
msg = "data integrity error is detected";
|
36
|
+
break;
|
37
|
+
case BZ_DATA_ERROR_MAGIC:
|
38
|
+
msg = "compressed stream does not start with the correct magic bytes";
|
39
|
+
break;
|
40
|
+
case BZ_IO_ERROR:
|
41
|
+
msg = "error reading or writing";
|
42
|
+
break;
|
43
|
+
case BZ_UNEXPECTED_EOF:
|
44
|
+
exc = bz_eEOZError;
|
45
|
+
msg = "compressed file finishes before the logical end of stream is detected";
|
46
|
+
break;
|
47
|
+
case BZ_OUTBUFF_FULL:
|
48
|
+
msg = "output buffer full";
|
49
|
+
break;
|
50
|
+
default:
|
51
|
+
msg = "unknown error";
|
52
|
+
exc = bz_eError;
|
53
|
+
}
|
54
|
+
rb_raise(exc, "%s", msg);
|
55
|
+
}
|
data/ext/bzip2/common.h
ADDED
@@ -0,0 +1,76 @@
|
|
1
|
+
#ifndef _RB_BZIP2_COMMON_H_
|
2
|
+
#define _RB_BZIP2_COMMON_H_
|
3
|
+
|
4
|
+
#include <ruby.h>
|
5
|
+
#include <bzlib.h>
|
6
|
+
|
7
|
+
#ifndef RUBY_19_COMPATIBILITY
|
8
|
+
# include <rubyio.h>
|
9
|
+
# include <version.h>
|
10
|
+
#else
|
11
|
+
# include <ruby/io.h>
|
12
|
+
#endif
|
13
|
+
|
14
|
+
#define BZ2_RB_CLOSE 1
|
15
|
+
#define BZ2_RB_INTERNAL 2
|
16
|
+
|
17
|
+
#define BZ_RB_BLOCKSIZE 4096
|
18
|
+
#define DEFAULT_BLOCKS 9
|
19
|
+
#define ASIZE (1 << CHAR_BIT)
|
20
|
+
|
21
|
+
/* Older versions of Ruby (< 1.8.6) need these */
|
22
|
+
#ifndef RSTRING_PTR
|
23
|
+
# define RSTRING_PTR(s) (RSTRING(s)->ptr)
|
24
|
+
#endif
|
25
|
+
#ifndef RSTRING_LEN
|
26
|
+
# define RSTRING_LEN(s) (RSTRING(s)->len)
|
27
|
+
#endif
|
28
|
+
#ifndef RARRAY_PTR
|
29
|
+
# define RARRAY_PTR(s) (RARRAY(s)->ptr)
|
30
|
+
#endif
|
31
|
+
#ifndef RARRAY_LEN
|
32
|
+
# define RARRAY_LEN(s) (RARRAY(s)->len)
|
33
|
+
#endif
|
34
|
+
|
35
|
+
struct bz_file {
|
36
|
+
bz_stream bzs;
|
37
|
+
VALUE in, io;
|
38
|
+
char *buf;
|
39
|
+
unsigned int buflen;
|
40
|
+
int blocks, work, small;
|
41
|
+
int flags, lineno, state;
|
42
|
+
};
|
43
|
+
|
44
|
+
struct bz_str {
|
45
|
+
VALUE str;
|
46
|
+
int pos;
|
47
|
+
};
|
48
|
+
|
49
|
+
struct bz_iv {
|
50
|
+
VALUE bz2, io;
|
51
|
+
void (*finalize)();
|
52
|
+
};
|
53
|
+
|
54
|
+
#define Get_BZ2(obj, bzf) \
|
55
|
+
rb_io_taint_check(obj); \
|
56
|
+
Data_Get_Struct(obj, struct bz_file, bzf); \
|
57
|
+
if (!RTEST(bzf->io)) { \
|
58
|
+
rb_raise(rb_eIOError, "closed IO"); \
|
59
|
+
}
|
60
|
+
|
61
|
+
#ifndef ASDFasdf
|
62
|
+
extern VALUE bz_cWriter, bz_cReader, bz_cInternal;
|
63
|
+
extern VALUE bz_eError, bz_eEOZError;
|
64
|
+
|
65
|
+
extern VALUE bz_internal_ary;
|
66
|
+
|
67
|
+
extern ID id_new, id_write, id_open, id_flush, id_read;
|
68
|
+
extern ID id_closed, id_close, id_str;
|
69
|
+
#endif
|
70
|
+
|
71
|
+
void bz_file_mark(struct bz_file * bzf);
|
72
|
+
void* bz_malloc(void *opaque, int m, int n);
|
73
|
+
void bz_free(void *opaque, void *p);
|
74
|
+
VALUE bz_raise(int err);
|
75
|
+
|
76
|
+
#endif
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
require 'mkmf'
|
3
|
+
dir_config('bz2')
|
4
|
+
have_header('bzlib.h')
|
5
|
+
|
6
|
+
$CFLAGS << ' -Wall -Wextra -Wno-unused -funroll-loops '
|
7
|
+
# $CFLAGS << ' -O0 -ggdb -Wextra'
|
8
|
+
|
9
|
+
if have_library("bz2", "BZ2_bzWriteOpen")
|
10
|
+
if enable_config("shared", true)
|
11
|
+
$static = nil
|
12
|
+
end
|
13
|
+
|
14
|
+
if RUBY_VERSION.to_f >= 1.9
|
15
|
+
$CFLAGS << ' -DRUBY_19_COMPATIBILITY'
|
16
|
+
end
|
17
|
+
|
18
|
+
create_makefile('bzip2/bzip2')
|
19
|
+
else
|
20
|
+
puts "libbz2 not found, maybe try manually specifying --with-bz2-dir to find it?"
|
21
|
+
end
|
data/ext/bzip2/reader.c
ADDED
@@ -0,0 +1,1032 @@
|
|
1
|
+
#include <bzlib.h>
|
2
|
+
#include <ruby.h>
|
3
|
+
|
4
|
+
#include "reader.h"
|
5
|
+
#include "common.h"
|
6
|
+
|
7
|
+
void bz_str_mark(struct bz_str *bzs) {
|
8
|
+
rb_gc_mark(bzs->str);
|
9
|
+
}
|
10
|
+
|
11
|
+
struct bz_file * bz_get_bzf(VALUE obj) {
|
12
|
+
struct bz_file *bzf;
|
13
|
+
|
14
|
+
Get_BZ2(obj, bzf);
|
15
|
+
if (!bzf->buf) {
|
16
|
+
if (bzf->state != BZ_OK) {
|
17
|
+
bz_raise(bzf->state);
|
18
|
+
}
|
19
|
+
bzf->state = BZ2_bzDecompressInit(&(bzf->bzs), 0, bzf->small);
|
20
|
+
if (bzf->state != BZ_OK) {
|
21
|
+
BZ2_bzDecompressEnd(&(bzf->bzs));
|
22
|
+
bz_raise(bzf->state);
|
23
|
+
}
|
24
|
+
bzf->buf = ALLOC_N(char, BZ_RB_BLOCKSIZE + 1);
|
25
|
+
bzf->buflen = BZ_RB_BLOCKSIZE;
|
26
|
+
bzf->buf[0] = bzf->buf[bzf->buflen] = '\0';
|
27
|
+
bzf->bzs.total_out_hi32 = bzf->bzs.total_out_lo32 = 0;
|
28
|
+
bzf->bzs.next_out = bzf->buf;
|
29
|
+
bzf->bzs.avail_out = 0;
|
30
|
+
}
|
31
|
+
if (bzf->state == BZ_STREAM_END && !bzf->bzs.avail_out) {
|
32
|
+
return 0;
|
33
|
+
}
|
34
|
+
return bzf;
|
35
|
+
}
|
36
|
+
|
37
|
+
int bz_next_available(struct bz_file *bzf, int in){
|
38
|
+
bzf->bzs.next_out = bzf->buf;
|
39
|
+
bzf->bzs.avail_out = 0;
|
40
|
+
if (bzf->state == BZ_STREAM_END) {
|
41
|
+
return BZ_STREAM_END;
|
42
|
+
}
|
43
|
+
if (!bzf->bzs.avail_in) {
|
44
|
+
bzf->in = rb_funcall(bzf->io, id_read, 1, INT2FIX(1024));
|
45
|
+
if (TYPE(bzf->in) != T_STRING || RSTRING_LEN(bzf->in) == 0) {
|
46
|
+
BZ2_bzDecompressEnd(&(bzf->bzs));
|
47
|
+
bzf->bzs.avail_out = 0;
|
48
|
+
bzf->state = BZ_UNEXPECTED_EOF;
|
49
|
+
bz_raise(bzf->state);
|
50
|
+
}
|
51
|
+
bzf->bzs.next_in = RSTRING_PTR(bzf->in);
|
52
|
+
bzf->bzs.avail_in = (int) RSTRING_LEN(bzf->in);
|
53
|
+
}
|
54
|
+
if ((bzf->buflen - in) < (BZ_RB_BLOCKSIZE / 2)) {
|
55
|
+
bzf->buf = REALLOC_N(bzf->buf, char, bzf->buflen+BZ_RB_BLOCKSIZE+1);
|
56
|
+
bzf->buflen += BZ_RB_BLOCKSIZE;
|
57
|
+
bzf->buf[bzf->buflen] = '\0';
|
58
|
+
}
|
59
|
+
bzf->bzs.avail_out = bzf->buflen - in;
|
60
|
+
bzf->bzs.next_out = bzf->buf + in;
|
61
|
+
bzf->state = BZ2_bzDecompress(&(bzf->bzs));
|
62
|
+
if (bzf->state != BZ_OK) {
|
63
|
+
BZ2_bzDecompressEnd(&(bzf->bzs));
|
64
|
+
if (bzf->state != BZ_STREAM_END) {
|
65
|
+
bzf->bzs.avail_out = 0;
|
66
|
+
bz_raise(bzf->state);
|
67
|
+
}
|
68
|
+
}
|
69
|
+
bzf->bzs.avail_out = bzf->buflen - bzf->bzs.avail_out;
|
70
|
+
bzf->bzs.next_out = bzf->buf;
|
71
|
+
return 0;
|
72
|
+
}
|
73
|
+
|
74
|
+
VALUE bz_read_until(struct bz_file *bzf, const char *str, int len, int *td1) {
|
75
|
+
VALUE res;
|
76
|
+
int total, i, nex = 0;
|
77
|
+
char *p, *t, *tx, *end, *pend = ((char*) str) + len;
|
78
|
+
|
79
|
+
res = rb_str_new(0, 0);
|
80
|
+
while (1) {
|
81
|
+
total = bzf->bzs.avail_out;
|
82
|
+
if (len == 1) {
|
83
|
+
tx = memchr(bzf->bzs.next_out, *str, bzf->bzs.avail_out);
|
84
|
+
if (tx) {
|
85
|
+
i = (int)(tx - bzf->bzs.next_out + len);
|
86
|
+
res = rb_str_cat(res, bzf->bzs.next_out, i);
|
87
|
+
bzf->bzs.next_out += i;
|
88
|
+
bzf->bzs.avail_out -= i;
|
89
|
+
return res;
|
90
|
+
}
|
91
|
+
} else {
|
92
|
+
tx = bzf->bzs.next_out;
|
93
|
+
end = bzf->bzs.next_out + bzf->bzs.avail_out;
|
94
|
+
while (tx + len <= end) {
|
95
|
+
for (p = (char*) str, t = tx; p != pend; ++p, ++t) {
|
96
|
+
if (*p != *t) break;
|
97
|
+
}
|
98
|
+
if (p == pend) {
|
99
|
+
i = (int)(tx - bzf->bzs.next_out + len);
|
100
|
+
res = rb_str_cat(res, bzf->bzs.next_out, i);
|
101
|
+
bzf->bzs.next_out += i;
|
102
|
+
bzf->bzs.avail_out -= i;
|
103
|
+
return res;
|
104
|
+
}
|
105
|
+
if (td1) {
|
106
|
+
tx += td1[(int)*(tx + len)];
|
107
|
+
} else {
|
108
|
+
tx += 1;
|
109
|
+
}
|
110
|
+
}
|
111
|
+
}
|
112
|
+
nex = 0;
|
113
|
+
if (total) {
|
114
|
+
nex = len - 1;
|
115
|
+
res = rb_str_cat(res, bzf->bzs.next_out, total - nex);
|
116
|
+
if (nex) {
|
117
|
+
MEMMOVE(bzf->buf, bzf->bzs.next_out + total - nex, char, nex);
|
118
|
+
}
|
119
|
+
}
|
120
|
+
if (bz_next_available(bzf, nex) == BZ_STREAM_END) {
|
121
|
+
if (nex) {
|
122
|
+
res = rb_str_cat(res, bzf->buf, nex);
|
123
|
+
}
|
124
|
+
if (RSTRING_LEN(res)) {
|
125
|
+
return res;
|
126
|
+
}
|
127
|
+
return Qnil;
|
128
|
+
}
|
129
|
+
}
|
130
|
+
return Qnil;
|
131
|
+
}
|
132
|
+
|
133
|
+
/**
|
134
|
+
* Reads a stream as long as the next character is equal to the specified
|
135
|
+
* character
|
136
|
+
*
|
137
|
+
* Returns the next character in the sequence that's not the same as the one
|
138
|
+
* given or EOF if it's there until the end of the file.
|
139
|
+
*/
|
140
|
+
int bz_read_while(struct bz_file *bzf, char c) {
|
141
|
+
char *end;
|
142
|
+
|
143
|
+
while (1) {
|
144
|
+
end = bzf->bzs.next_out + bzf->bzs.avail_out;
|
145
|
+
while (bzf->bzs.next_out < end) {
|
146
|
+
if (c != *bzf->bzs.next_out) {
|
147
|
+
return *bzf->bzs.next_out;
|
148
|
+
}
|
149
|
+
++bzf->bzs.next_out;
|
150
|
+
--bzf->bzs.avail_out;
|
151
|
+
}
|
152
|
+
if (bz_next_available(bzf, 0) == BZ_STREAM_END) {
|
153
|
+
return EOF;
|
154
|
+
}
|
155
|
+
}
|
156
|
+
return EOF;
|
157
|
+
}
|
158
|
+
|
159
|
+
/*
|
160
|
+
* Internally allocates data for a new Reader
|
161
|
+
* @private
|
162
|
+
*/
|
163
|
+
VALUE bz_reader_s_alloc(VALUE obj) {
|
164
|
+
struct bz_file *bzf;
|
165
|
+
VALUE res;
|
166
|
+
res = Data_Make_Struct(obj, struct bz_file, bz_file_mark, free, bzf);
|
167
|
+
bzf->bzs.bzalloc = bz_malloc;
|
168
|
+
bzf->bzs.bzfree = bz_free;
|
169
|
+
bzf->blocks = DEFAULT_BLOCKS;
|
170
|
+
bzf->state = BZ_OK;
|
171
|
+
return res;
|
172
|
+
}
|
173
|
+
|
174
|
+
VALUE bz_reader_close __((VALUE));
|
175
|
+
|
176
|
+
/*
|
177
|
+
* call-seq:
|
178
|
+
* open(filename, &block=nil) -> Bzip2::Reader
|
179
|
+
*
|
180
|
+
* @param [String] filename the name of the file to read from
|
181
|
+
* @yieldparam [Bzip2::Reader] reader the Bzip2::Reader instance
|
182
|
+
*
|
183
|
+
* If a block is given, the created Bzip2::Reader instance is yielded to the
|
184
|
+
* block and will be closed when the block completes. It is guaranteed via
|
185
|
+
* +ensure+ that the reader is closed
|
186
|
+
*
|
187
|
+
* If a block is not given, a Bzip2::Reader instance will be returned
|
188
|
+
*
|
189
|
+
* Bzip2::Reader.open('file') { |f| puts f.gets }
|
190
|
+
*
|
191
|
+
* reader = Bzip2::Reader.open('file')
|
192
|
+
* puts reader.gets
|
193
|
+
* reader.close
|
194
|
+
*
|
195
|
+
* @return [Bzip2::Reader, nil]
|
196
|
+
*/
|
197
|
+
VALUE bz_reader_s_open(int argc, VALUE *argv, VALUE obj) {
|
198
|
+
VALUE res;
|
199
|
+
struct bz_file *bzf;
|
200
|
+
|
201
|
+
if (argc < 1) {
|
202
|
+
rb_raise(rb_eArgError, "invalid number of arguments");
|
203
|
+
}
|
204
|
+
argv[0] = rb_funcall2(rb_mKernel, id_open, 1, argv);
|
205
|
+
if (NIL_P(argv[0])) {
|
206
|
+
return Qnil;
|
207
|
+
}
|
208
|
+
res = rb_funcall2(obj, id_new, argc, argv);
|
209
|
+
Data_Get_Struct(res, struct bz_file, bzf);
|
210
|
+
bzf->flags |= BZ2_RB_CLOSE;
|
211
|
+
if (rb_block_given_p()) {
|
212
|
+
return rb_ensure(rb_yield, res, bz_reader_close, res);
|
213
|
+
}
|
214
|
+
return res;
|
215
|
+
}
|
216
|
+
|
217
|
+
/*
|
218
|
+
* call-seq:
|
219
|
+
* initialize(io)
|
220
|
+
*
|
221
|
+
* Creates a new stream for reading a bzip file or string
|
222
|
+
*
|
223
|
+
* @param [File, string, #read] io the source for input data. If the source is
|
224
|
+
* a file or something responding to #read, then data will be read via #read,
|
225
|
+
* otherwise if the input is a string it will be taken as the literal data
|
226
|
+
* to decompress
|
227
|
+
*/
|
228
|
+
VALUE bz_reader_init(int argc, VALUE *argv, VALUE obj) {
|
229
|
+
struct bz_file *bzf;
|
230
|
+
int small = 0;
|
231
|
+
VALUE a, b;
|
232
|
+
int internal = 0;
|
233
|
+
|
234
|
+
if (rb_scan_args(argc, argv, "11", &a, &b) == 2) {
|
235
|
+
small = RTEST(b);
|
236
|
+
}
|
237
|
+
rb_io_taint_check(a);
|
238
|
+
if (OBJ_TAINTED(a)) {
|
239
|
+
OBJ_TAINT(obj);
|
240
|
+
}
|
241
|
+
if (rb_respond_to(a, id_read)) {
|
242
|
+
if (TYPE(a) == T_FILE) {
|
243
|
+
#ifndef RUBY_19_COMPATIBILITY
|
244
|
+
OpenFile *fptr;
|
245
|
+
#else
|
246
|
+
rb_io_t *fptr;
|
247
|
+
#endif
|
248
|
+
|
249
|
+
GetOpenFile(a, fptr);
|
250
|
+
rb_io_check_readable(fptr);
|
251
|
+
} else if (rb_respond_to(a, id_closed)) {
|
252
|
+
VALUE iv = rb_funcall2(a, id_closed, 0, 0);
|
253
|
+
if (RTEST(iv)) {
|
254
|
+
rb_raise(rb_eArgError, "closed object");
|
255
|
+
}
|
256
|
+
}
|
257
|
+
} else {
|
258
|
+
struct bz_str *bzs;
|
259
|
+
VALUE res;
|
260
|
+
|
261
|
+
if (!rb_respond_to(a, id_str)) {
|
262
|
+
rb_raise(rb_eArgError, "first argument must respond to #read");
|
263
|
+
}
|
264
|
+
a = rb_funcall2(a, id_str, 0, 0);
|
265
|
+
if (TYPE(a) != T_STRING) {
|
266
|
+
rb_raise(rb_eArgError, "#to_str must return a String");
|
267
|
+
}
|
268
|
+
res = Data_Make_Struct(bz_cInternal, struct bz_str,
|
269
|
+
bz_str_mark, free, bzs);
|
270
|
+
bzs->str = a;
|
271
|
+
a = res;
|
272
|
+
internal = BZ2_RB_INTERNAL;
|
273
|
+
}
|
274
|
+
Data_Get_Struct(obj, struct bz_file, bzf);
|
275
|
+
bzf->io = a;
|
276
|
+
bzf->small = small;
|
277
|
+
bzf->flags |= internal;
|
278
|
+
return obj;
|
279
|
+
}
|
280
|
+
|
281
|
+
/*
|
282
|
+
* call-seq:
|
283
|
+
* read(len = nil)
|
284
|
+
*
|
285
|
+
* Read decompressed data from the stream.
|
286
|
+
*
|
287
|
+
* Bzip2::Reader.new(Bzip2.compress('ab')).read # => "ab"
|
288
|
+
* Bzip2::Reader.new(Bzip2.compress('ab')).read(1) # => "a"
|
289
|
+
*
|
290
|
+
* @return [String, nil] the decompressed data read or +nil+ if eoz has been
|
291
|
+
* reached
|
292
|
+
* @param [Integer] len the number of decompressed bytes which should be read.
|
293
|
+
* If nothing is specified, the entire stream is read
|
294
|
+
*/
|
295
|
+
VALUE bz_reader_read(int argc, VALUE *argv, VALUE obj) {
|
296
|
+
struct bz_file *bzf;
|
297
|
+
VALUE res, length;
|
298
|
+
int total;
|
299
|
+
int n;
|
300
|
+
|
301
|
+
rb_scan_args(argc, argv, "01", &length);
|
302
|
+
if (NIL_P(length)) {
|
303
|
+
n = -1;
|
304
|
+
} else {
|
305
|
+
n = NUM2INT(length);
|
306
|
+
if (n < 0) {
|
307
|
+
rb_raise(rb_eArgError, "negative length %d given", n);
|
308
|
+
}
|
309
|
+
}
|
310
|
+
bzf = bz_get_bzf(obj);
|
311
|
+
if (!bzf) {
|
312
|
+
return Qnil;
|
313
|
+
}
|
314
|
+
res = rb_str_new(0, 0);
|
315
|
+
if (OBJ_TAINTED(obj)) {
|
316
|
+
OBJ_TAINT(res);
|
317
|
+
}
|
318
|
+
if (n == 0) {
|
319
|
+
free(bzf->buf);
|
320
|
+
return res;
|
321
|
+
}
|
322
|
+
while (1) {
|
323
|
+
total = bzf->bzs.avail_out;
|
324
|
+
if (n != -1 && (RSTRING_LEN(res) + total) >= n) {
|
325
|
+
n -= (int) RSTRING_LEN(res);
|
326
|
+
res = rb_str_cat(res, bzf->bzs.next_out, n);
|
327
|
+
bzf->bzs.next_out += n;
|
328
|
+
bzf->bzs.avail_out -= n;
|
329
|
+
free(bzf->buf);
|
330
|
+
return res;
|
331
|
+
}
|
332
|
+
if (total) {
|
333
|
+
res = rb_str_cat(res, bzf->bzs.next_out, total);
|
334
|
+
}
|
335
|
+
if (bz_next_available(bzf, 0) == BZ_STREAM_END) {
|
336
|
+
free(bzf->buf);
|
337
|
+
return res;
|
338
|
+
}
|
339
|
+
}
|
340
|
+
return Qnil;
|
341
|
+
}
|
342
|
+
|
343
|
+
int bz_getc(VALUE obj) {
|
344
|
+
VALUE length = INT2FIX(1);
|
345
|
+
VALUE res = bz_reader_read(1, &length, obj);
|
346
|
+
if (NIL_P(res) || RSTRING_LEN(res) == 0) {
|
347
|
+
return EOF;
|
348
|
+
}
|
349
|
+
return RSTRING_PTR(res)[0];
|
350
|
+
}
|
351
|
+
|
352
|
+
/*
|
353
|
+
* call-seq:
|
354
|
+
* ungetc(byte)
|
355
|
+
*
|
356
|
+
* "Ungets" a character/byte. This rewinds the stream by 1 character and inserts
|
357
|
+
* the given character into that position. The next read will return the given
|
358
|
+
* character as the first one read
|
359
|
+
*
|
360
|
+
* reader = Bzip2::Reader.new Bzip2.compress('abc')
|
361
|
+
* reader.getc # => 97
|
362
|
+
* reader.ungetc 97 # => nil
|
363
|
+
* reader.getc # => 97
|
364
|
+
* reader.ungetc 42 # => nil
|
365
|
+
* reader.getc # => 42
|
366
|
+
* reader.getc # => 98
|
367
|
+
* reader.getc # => 99
|
368
|
+
* reader.ungetc 100 # => nil
|
369
|
+
* reader.getc # => 100
|
370
|
+
*
|
371
|
+
* @param [Integer] byte the byte to 'unget'
|
372
|
+
* @return [nil] always
|
373
|
+
*/
|
374
|
+
VALUE bz_reader_ungetc(VALUE obj, VALUE a) {
|
375
|
+
struct bz_file *bzf;
|
376
|
+
int c = NUM2INT(a);
|
377
|
+
|
378
|
+
Get_BZ2(obj, bzf);
|
379
|
+
if (!bzf->buf) {
|
380
|
+
bz_raise(BZ_SEQUENCE_ERROR);
|
381
|
+
}
|
382
|
+
if (bzf->bzs.avail_out < bzf->buflen) {
|
383
|
+
bzf->bzs.next_out -= 1;
|
384
|
+
bzf->bzs.next_out[0] = c;
|
385
|
+
bzf->bzs.avail_out += 1;
|
386
|
+
} else {
|
387
|
+
bzf->buf = REALLOC_N(bzf->buf, char, bzf->buflen + 2);
|
388
|
+
bzf->buf[bzf->buflen++] = c;
|
389
|
+
bzf->buf[bzf->buflen] = '\0';
|
390
|
+
bzf->bzs.next_out = bzf->buf;
|
391
|
+
bzf->bzs.avail_out = bzf->buflen;
|
392
|
+
}
|
393
|
+
return Qnil;
|
394
|
+
}
|
395
|
+
|
396
|
+
/*
|
397
|
+
* call-seq:
|
398
|
+
* ungets(str)
|
399
|
+
*
|
400
|
+
* Equivalently "unget" a string. When called on a string that was just read
|
401
|
+
* from the stream, this inserts the string back into the stream to br read
|
402
|
+
* again.
|
403
|
+
*
|
404
|
+
* When called with a string which hasn't been read from the stream, it does
|
405
|
+
* the same thing, and the next read line/data will start from the beginning
|
406
|
+
* of the given data and the continue on with the rest of the stream.
|
407
|
+
*
|
408
|
+
* reader = Bzip2::Reader.new Bzip2.compress("a\nb")
|
409
|
+
* reader.gets # => "a\n"
|
410
|
+
* reader.ungets "a\n" # => nil
|
411
|
+
* reader.gets # => "a\n"
|
412
|
+
* reader.ungets "foo" # => nil
|
413
|
+
* reader.gets # => "foob"
|
414
|
+
*
|
415
|
+
* @param [String] str the string to insert back into the stream
|
416
|
+
* @return [nil] always
|
417
|
+
*/
|
418
|
+
VALUE bz_reader_ungets(VALUE obj, VALUE a) {
|
419
|
+
struct bz_file *bzf;
|
420
|
+
|
421
|
+
Check_Type(a, T_STRING);
|
422
|
+
Get_BZ2(obj, bzf);
|
423
|
+
if (!bzf->buf) {
|
424
|
+
bz_raise(BZ_SEQUENCE_ERROR);
|
425
|
+
}
|
426
|
+
if ((bzf->bzs.avail_out + RSTRING_LEN(a)) < bzf->buflen) {
|
427
|
+
bzf->bzs.next_out -= RSTRING_LEN(a);
|
428
|
+
MEMCPY(bzf->bzs.next_out, RSTRING_PTR(a), char, RSTRING_LEN(a));
|
429
|
+
bzf->bzs.avail_out += (int) RSTRING_LEN(a);
|
430
|
+
} else {
|
431
|
+
bzf->buf = REALLOC_N(bzf->buf, char, bzf->buflen + RSTRING_LEN(a) + 1);
|
432
|
+
MEMCPY(bzf->buf + bzf->buflen, RSTRING_PTR(a), char,RSTRING_LEN(a));
|
433
|
+
bzf->buflen += (int) RSTRING_LEN(a);
|
434
|
+
bzf->buf[bzf->buflen] = '\0';
|
435
|
+
bzf->bzs.next_out = bzf->buf;
|
436
|
+
bzf->bzs.avail_out = bzf->buflen;
|
437
|
+
}
|
438
|
+
return Qnil;
|
439
|
+
}
|
440
|
+
|
441
|
+
VALUE bz_reader_gets(VALUE obj) {
|
442
|
+
struct bz_file *bzf;
|
443
|
+
VALUE str = Qnil;
|
444
|
+
|
445
|
+
bzf = bz_get_bzf(obj);
|
446
|
+
if (bzf) {
|
447
|
+
str = bz_read_until(bzf, "\n", 1, 0);
|
448
|
+
if (!NIL_P(str)) {
|
449
|
+
bzf->lineno++;
|
450
|
+
OBJ_TAINT(str);
|
451
|
+
}
|
452
|
+
}
|
453
|
+
return str;
|
454
|
+
}
|
455
|
+
|
456
|
+
VALUE bz_reader_gets_internal(int argc, VALUE *argv, VALUE obj, int *td, int init) {
|
457
|
+
struct bz_file *bzf;
|
458
|
+
VALUE rs, res;
|
459
|
+
const char *rsptr;
|
460
|
+
int rslen, rspara, *td1;
|
461
|
+
|
462
|
+
rs = rb_rs;
|
463
|
+
if (argc) {
|
464
|
+
rb_scan_args(argc, argv, "1", &rs);
|
465
|
+
if (!NIL_P(rs)) {
|
466
|
+
Check_Type(rs, T_STRING);
|
467
|
+
}
|
468
|
+
}
|
469
|
+
if (NIL_P(rs)) {
|
470
|
+
return bz_reader_read(1, &rs, obj);
|
471
|
+
}
|
472
|
+
rslen = (int) RSTRING_LEN(rs);
|
473
|
+
if (rs == rb_default_rs || (rslen == 1 && RSTRING_PTR(rs)[0] == '\n')) {
|
474
|
+
return bz_reader_gets(obj);
|
475
|
+
}
|
476
|
+
|
477
|
+
if (rslen == 0) {
|
478
|
+
rsptr = "\n\n";
|
479
|
+
rslen = 2;
|
480
|
+
rspara = 1;
|
481
|
+
} else {
|
482
|
+
rsptr = RSTRING_PTR(rs);
|
483
|
+
rspara = 0;
|
484
|
+
}
|
485
|
+
|
486
|
+
bzf = bz_get_bzf(obj);
|
487
|
+
if (!bzf) {
|
488
|
+
return Qnil;
|
489
|
+
}
|
490
|
+
if (rspara) {
|
491
|
+
bz_read_while(bzf, '\n');
|
492
|
+
}
|
493
|
+
td1 = 0;
|
494
|
+
if (rslen != 1) {
|
495
|
+
if (init) {
|
496
|
+
int i;
|
497
|
+
|
498
|
+
for (i = 0; i < ASIZE; i++) {
|
499
|
+
td[i] = rslen + 1;
|
500
|
+
}
|
501
|
+
for (i = 0; i < rslen; i++) {
|
502
|
+
td[(int)*(rsptr + i)] = rslen - i;
|
503
|
+
}
|
504
|
+
}
|
505
|
+
td1 = td;
|
506
|
+
}
|
507
|
+
|
508
|
+
res = bz_read_until(bzf, rsptr, rslen, td1);
|
509
|
+
if (rspara) {
|
510
|
+
bz_read_while(bzf, '\n');
|
511
|
+
}
|
512
|
+
|
513
|
+
if (!NIL_P(res)) {
|
514
|
+
bzf->lineno++;
|
515
|
+
OBJ_TAINT(res);
|
516
|
+
}
|
517
|
+
return res;
|
518
|
+
}
|
519
|
+
|
520
|
+
/*
|
521
|
+
* Specs were missing for this method originally and playing around with it
|
522
|
+
* gave some very odd results, so unless you know what you're doing, I wouldn't
|
523
|
+
* mess around with this...
|
524
|
+
*/
|
525
|
+
VALUE bz_reader_set_unused(VALUE obj, VALUE a) {
|
526
|
+
struct bz_file *bzf;
|
527
|
+
|
528
|
+
Check_Type(a, T_STRING);
|
529
|
+
Get_BZ2(obj, bzf);
|
530
|
+
if (!bzf->in) {
|
531
|
+
bzf->in = rb_str_new(RSTRING_PTR(a), RSTRING_LEN(a));
|
532
|
+
} else {
|
533
|
+
bzf->in = rb_str_cat(bzf->in, RSTRING_PTR(a), RSTRING_LEN(a));
|
534
|
+
}
|
535
|
+
bzf->bzs.next_in = RSTRING_PTR(bzf->in);
|
536
|
+
bzf->bzs.avail_in = (int) RSTRING_LEN(bzf->in);
|
537
|
+
return Qnil;
|
538
|
+
}
|
539
|
+
|
540
|
+
/*
|
541
|
+
* Reads one character from the stream, returning the byte read.
|
542
|
+
*
|
543
|
+
* reader = Bzip2::Reader.new Bzip2.compress('ab')
|
544
|
+
* reader.getc # => 97
|
545
|
+
* reader.getc # => 98
|
546
|
+
* reader.getc # => nil
|
547
|
+
*
|
548
|
+
* @return [Integer, nil] the byte value of the character read or +nil+ if eoz
|
549
|
+
* has been reached
|
550
|
+
*/
|
551
|
+
VALUE bz_reader_getc(VALUE obj) {
|
552
|
+
VALUE str;
|
553
|
+
VALUE len = INT2FIX(1);
|
554
|
+
|
555
|
+
str = bz_reader_read(1, &len, obj);
|
556
|
+
if (NIL_P(str) || RSTRING_LEN(str) == 0) {
|
557
|
+
return Qnil;
|
558
|
+
}
|
559
|
+
return INT2FIX(RSTRING_PTR(str)[0] & 0xff);
|
560
|
+
}
|
561
|
+
|
562
|
+
void bz_eoz_error() {
|
563
|
+
rb_raise(bz_eEOZError, "End of Zip component reached");
|
564
|
+
}
|
565
|
+
|
566
|
+
/*
|
567
|
+
* Performs the same as Bzip2::Reader#getc except Bzip2::EOZError is raised if
|
568
|
+
* eoz has been readhed
|
569
|
+
*
|
570
|
+
* @raise [Bzip2::EOZError] if eoz has been reached
|
571
|
+
*/
|
572
|
+
VALUE bz_reader_readchar(VALUE obj) {
|
573
|
+
VALUE res = bz_reader_getc(obj);
|
574
|
+
|
575
|
+
if (NIL_P(res)) {
|
576
|
+
bz_eoz_error();
|
577
|
+
}
|
578
|
+
return res;
|
579
|
+
}
|
580
|
+
|
581
|
+
/*
|
582
|
+
* call-seq:
|
583
|
+
* gets(sep = "\n")
|
584
|
+
*
|
585
|
+
* Reads a line from the stream until the separator is reached. This does not
|
586
|
+
* throw an exception, but rather returns nil if an eoz/eof error occurs
|
587
|
+
*
|
588
|
+
* reader = Bzip2::Reader.new Bzip2.compress("a\nb")
|
589
|
+
* reader.gets # => "a\n"
|
590
|
+
* reader.gets # => "b"
|
591
|
+
* reader.gets # => nil
|
592
|
+
*
|
593
|
+
* @return [String, nil] the read data or nil if eoz has been reached
|
594
|
+
* @see Bzip2::Reader#readline
|
595
|
+
*/
|
596
|
+
VALUE bz_reader_gets_m(int argc, VALUE *argv, VALUE obj) {
|
597
|
+
int td[ASIZE];
|
598
|
+
VALUE str = bz_reader_gets_internal(argc, argv, obj, td, Qtrue);
|
599
|
+
|
600
|
+
if (!NIL_P(str)) {
|
601
|
+
rb_lastline_set(str);
|
602
|
+
}
|
603
|
+
return str;
|
604
|
+
}
|
605
|
+
|
606
|
+
/*
|
607
|
+
* call-seq:
|
608
|
+
* readline(sep = "\n")
|
609
|
+
*
|
610
|
+
* Reads one line from the stream and returns it (including the separator)
|
611
|
+
*
|
612
|
+
* reader = Bzip2::Reader.new Bzip2.compress("a\nb")
|
613
|
+
* reader.readline # => "a\n"
|
614
|
+
* reader.readline # => "b"
|
615
|
+
* reader.readline # => raises Bzip2::EOZError
|
616
|
+
*
|
617
|
+
*
|
618
|
+
* @param [String] sep the newline separator character
|
619
|
+
* @return [String] the read line
|
620
|
+
* @see Bzip2::Reader.readlines
|
621
|
+
* @raise [Bzip2::EOZError] if the stream has reached its end
|
622
|
+
*/
|
623
|
+
VALUE bz_reader_readline(int argc, VALUE *argv, VALUE obj) {
|
624
|
+
VALUE res = bz_reader_gets_m(argc, argv, obj);
|
625
|
+
|
626
|
+
if (NIL_P(res)) {
|
627
|
+
bz_eoz_error();
|
628
|
+
}
|
629
|
+
return res;
|
630
|
+
}
|
631
|
+
|
632
|
+
/*
|
633
|
+
* call-seq:
|
634
|
+
* readlines(sep = "\n")
|
635
|
+
*
|
636
|
+
* Reads the lines of the files and returns the result as an array.
|
637
|
+
*
|
638
|
+
* If the stream has reached eoz, then an empty array is returned
|
639
|
+
*
|
640
|
+
* @param [String] sep the newline separator character
|
641
|
+
* @return [Array] an array of lines read
|
642
|
+
* @see Bzip2::Reader.readlines
|
643
|
+
*/
|
644
|
+
VALUE bz_reader_readlines(int argc, VALUE *argv, VALUE obj) {
|
645
|
+
VALUE line, ary;
|
646
|
+
int td[ASIZE], in;
|
647
|
+
|
648
|
+
in = Qtrue;
|
649
|
+
ary = rb_ary_new();
|
650
|
+
while (!NIL_P(line = bz_reader_gets_internal(argc, argv, obj, td, in))) {
|
651
|
+
in = Qfalse;
|
652
|
+
rb_ary_push(ary, line);
|
653
|
+
}
|
654
|
+
return ary;
|
655
|
+
}
|
656
|
+
|
657
|
+
/*
|
658
|
+
* call-seq:
|
659
|
+
* each(sep = "\n", &block)
|
660
|
+
*
|
661
|
+
* Iterates over the lines of the stream.
|
662
|
+
*
|
663
|
+
* @param [String] sep the byte which separates lines
|
664
|
+
* @yieldparam [String] line the next line of the file (including the separator
|
665
|
+
* character)
|
666
|
+
* @see Bzip2::Reader.foreach
|
667
|
+
*/
|
668
|
+
VALUE bz_reader_each_line(int argc, VALUE *argv, VALUE obj) {
|
669
|
+
VALUE line;
|
670
|
+
int td[ASIZE], in;
|
671
|
+
|
672
|
+
in = Qtrue;
|
673
|
+
while (!NIL_P(line = bz_reader_gets_internal(argc, argv, obj, td, in))) {
|
674
|
+
in = Qfalse;
|
675
|
+
rb_yield(line);
|
676
|
+
}
|
677
|
+
return obj;
|
678
|
+
}
|
679
|
+
|
680
|
+
/*
|
681
|
+
* call-seq:
|
682
|
+
* each_byte(&block)
|
683
|
+
*
|
684
|
+
* Iterates over the decompressed bytes of the file.
|
685
|
+
*
|
686
|
+
* Bzip2::Writer.open('file'){ |f| f << 'asdf' }
|
687
|
+
* reader = Bzip2::Reader.new File.open('file')
|
688
|
+
* reader.each_byte{ |b| puts "#{b} #{b.chr}" }
|
689
|
+
*
|
690
|
+
* # Output:
|
691
|
+
* # 97 a
|
692
|
+
* # 115 s
|
693
|
+
* # 100 d
|
694
|
+
* # 102 f
|
695
|
+
*
|
696
|
+
* @yieldparam [Integer] byte the decompressed bytes of the file
|
697
|
+
*/
|
698
|
+
VALUE bz_reader_each_byte(VALUE obj) {
|
699
|
+
int c;
|
700
|
+
|
701
|
+
while ((c = bz_getc(obj)) != EOF) {
|
702
|
+
rb_yield(INT2FIX(c & 0xff));
|
703
|
+
}
|
704
|
+
return obj;
|
705
|
+
}
|
706
|
+
|
707
|
+
/*
|
708
|
+
* Specs were missing for this method originally and playing around with it
|
709
|
+
* gave some very odd results, so unless you know what you're doing, I wouldn't
|
710
|
+
* mess around with this...
|
711
|
+
*/
|
712
|
+
VALUE bz_reader_unused(VALUE obj) {
|
713
|
+
struct bz_file *bzf;
|
714
|
+
VALUE res;
|
715
|
+
|
716
|
+
Get_BZ2(obj, bzf);
|
717
|
+
if (!bzf->in || bzf->state != BZ_STREAM_END) {
|
718
|
+
return Qnil;
|
719
|
+
}
|
720
|
+
if (bzf->bzs.avail_in) {
|
721
|
+
res = rb_tainted_str_new(bzf->bzs.next_in, bzf->bzs.avail_in);
|
722
|
+
bzf->bzs.avail_in = 0;
|
723
|
+
} else {
|
724
|
+
res = rb_tainted_str_new(0, 0);
|
725
|
+
}
|
726
|
+
return res;
|
727
|
+
}
|
728
|
+
|
729
|
+
/*
|
730
|
+
* Test whether the end of the bzip stream has been reached
|
731
|
+
*
|
732
|
+
* @return [Boolean] +true+ if the reader is at the end of the bz stream or
|
733
|
+
* +false+ otherwise
|
734
|
+
*/
|
735
|
+
VALUE bz_reader_eoz(VALUE obj) {
|
736
|
+
struct bz_file *bzf;
|
737
|
+
|
738
|
+
Get_BZ2(obj, bzf);
|
739
|
+
if (!bzf->in || !bzf->buf) {
|
740
|
+
return Qnil;
|
741
|
+
}
|
742
|
+
if (bzf->state == BZ_STREAM_END && !bzf->bzs.avail_out) {
|
743
|
+
return Qtrue;
|
744
|
+
}
|
745
|
+
return Qfalse;
|
746
|
+
}
|
747
|
+
|
748
|
+
/*
|
749
|
+
* Test whether the bzip stream has reached its end (see Bzip2::Reader#eoz?)
|
750
|
+
* and then tests that the undlerying IO has also reached an eof
|
751
|
+
*
|
752
|
+
* @return [Boolean] +true+ if the stream has reached or +false+ otherwise.
|
753
|
+
*/
|
754
|
+
VALUE bz_reader_eof(VALUE obj) {
|
755
|
+
struct bz_file *bzf;
|
756
|
+
VALUE res;
|
757
|
+
|
758
|
+
res = bz_reader_eoz(obj);
|
759
|
+
if (RTEST(res)) {
|
760
|
+
Get_BZ2(obj, bzf);
|
761
|
+
if (bzf->bzs.avail_in) {
|
762
|
+
res = Qfalse;
|
763
|
+
} else {
|
764
|
+
res = bz_reader_getc(obj);
|
765
|
+
if (NIL_P(res)) {
|
766
|
+
res = Qtrue;
|
767
|
+
} else {
|
768
|
+
bz_reader_ungetc(obj, res);
|
769
|
+
res = Qfalse;
|
770
|
+
}
|
771
|
+
}
|
772
|
+
}
|
773
|
+
return res;
|
774
|
+
}
|
775
|
+
|
776
|
+
/*
|
777
|
+
* Tests whether this reader has be closed.
|
778
|
+
*
|
779
|
+
* @return [Boolean] +true+ if it is or +false+ otherwise.
|
780
|
+
*/
|
781
|
+
VALUE bz_reader_closed(VALUE obj) {
|
782
|
+
struct bz_file *bzf;
|
783
|
+
|
784
|
+
Data_Get_Struct(obj, struct bz_file, bzf);
|
785
|
+
return RTEST(bzf->io)?Qfalse:Qtrue;
|
786
|
+
}
|
787
|
+
|
788
|
+
/*
|
789
|
+
* Closes this reader to disallow further reads.
|
790
|
+
*
|
791
|
+
* reader = Bzip2::Reader.new File.open('file')
|
792
|
+
* reader.close
|
793
|
+
*
|
794
|
+
* reader.closed? # => true
|
795
|
+
*
|
796
|
+
* @return [File] the io with which the reader was created.
|
797
|
+
* @raise [IOError] if the stream has already been closed
|
798
|
+
*/
|
799
|
+
VALUE bz_reader_close(VALUE obj) {
|
800
|
+
struct bz_file *bzf;
|
801
|
+
VALUE res;
|
802
|
+
|
803
|
+
Get_BZ2(obj, bzf);
|
804
|
+
if (bzf->buf) {
|
805
|
+
free(bzf->buf);
|
806
|
+
bzf->buf = 0;
|
807
|
+
}
|
808
|
+
if (bzf->state == BZ_OK) {
|
809
|
+
BZ2_bzDecompressEnd(&(bzf->bzs));
|
810
|
+
}
|
811
|
+
if (bzf->flags & BZ2_RB_CLOSE) {
|
812
|
+
int closed = 0;
|
813
|
+
if (rb_respond_to(bzf->io, id_closed)) {
|
814
|
+
VALUE iv = rb_funcall2(bzf->io, id_closed, 0, 0);
|
815
|
+
closed = RTEST(iv);
|
816
|
+
}
|
817
|
+
if (!closed && rb_respond_to(bzf->io, id_close)) {
|
818
|
+
rb_funcall2(bzf->io, id_close, 0, 0);
|
819
|
+
}
|
820
|
+
}
|
821
|
+
if (bzf->flags & (BZ2_RB_CLOSE|BZ2_RB_INTERNAL)) {
|
822
|
+
res = Qnil;
|
823
|
+
} else {
|
824
|
+
res = bzf->io;
|
825
|
+
}
|
826
|
+
bzf->io = 0;
|
827
|
+
return res;
|
828
|
+
}
|
829
|
+
|
830
|
+
/*
|
831
|
+
* Originally undocument and had no sepcs. Appears to call Bzip2::Reader#read
|
832
|
+
* and then mark the stream as finished, but this didn't work for me...
|
833
|
+
*/
|
834
|
+
VALUE bz_reader_finish(VALUE obj) {
|
835
|
+
struct bz_file *bzf;
|
836
|
+
|
837
|
+
Get_BZ2(obj, bzf);
|
838
|
+
if (bzf->buf) {
|
839
|
+
rb_funcall2(obj, id_read, 0, 0);
|
840
|
+
free(bzf->buf);
|
841
|
+
}
|
842
|
+
bzf->buf = 0;
|
843
|
+
bzf->state = BZ_OK;
|
844
|
+
return Qnil;
|
845
|
+
}
|
846
|
+
|
847
|
+
/*
|
848
|
+
* Originally undocument and had no sepcs. Appears to work nearly the same
|
849
|
+
* as Bzip2::Reader#close...
|
850
|
+
*/
|
851
|
+
VALUE bz_reader_close_bang(VALUE obj) {
|
852
|
+
struct bz_file *bzf;
|
853
|
+
int closed;
|
854
|
+
|
855
|
+
Get_BZ2(obj, bzf);
|
856
|
+
closed = bzf->flags & (BZ2_RB_CLOSE|BZ2_RB_INTERNAL);
|
857
|
+
bz_reader_close(obj);
|
858
|
+
if (!closed && rb_respond_to(bzf->io, id_close)) {
|
859
|
+
if (rb_respond_to(bzf->io, id_closed)) {
|
860
|
+
closed = RTEST(rb_funcall2(bzf->io, id_closed, 0, 0));
|
861
|
+
}
|
862
|
+
if (!closed) {
|
863
|
+
rb_funcall2(bzf->io, id_close, 0, 0);
|
864
|
+
}
|
865
|
+
}
|
866
|
+
return Qnil;
|
867
|
+
}
|
868
|
+
|
869
|
+
struct foreach_arg {
|
870
|
+
int argc;
|
871
|
+
VALUE sep;
|
872
|
+
VALUE obj;
|
873
|
+
};
|
874
|
+
|
875
|
+
VALUE bz_reader_foreach_line(struct foreach_arg *arg) {
|
876
|
+
VALUE str;
|
877
|
+
int td[ASIZE], in;
|
878
|
+
|
879
|
+
in = Qtrue;
|
880
|
+
while (!NIL_P(str = bz_reader_gets_internal(arg->argc, &arg->sep, arg->obj, td, in))) {
|
881
|
+
in = Qfalse;
|
882
|
+
rb_yield(str);
|
883
|
+
}
|
884
|
+
return Qnil;
|
885
|
+
}
|
886
|
+
|
887
|
+
/*
|
888
|
+
* call-seq:
|
889
|
+
* foreach(filename, &block)
|
890
|
+
*
|
891
|
+
* Reads a bz2 compressed file and yields each line to the block
|
892
|
+
*
|
893
|
+
* Bzip2::Writer.open('file'){ |f| f << "a\n" << "b\n" << "c\n\nd" }
|
894
|
+
* Bzip2::Reader.foreach('file'){ |l| p l }
|
895
|
+
*
|
896
|
+
* # Output:
|
897
|
+
* # "a\n"
|
898
|
+
* # "b\n"
|
899
|
+
* # "c\n"
|
900
|
+
* # "\n"
|
901
|
+
* # "d"
|
902
|
+
*
|
903
|
+
* @param [String] filename the path to the file to open
|
904
|
+
* @yieldparam [String] each line of the file
|
905
|
+
*/
|
906
|
+
VALUE bz_reader_s_foreach(int argc, VALUE *argv, VALUE obj) {
|
907
|
+
VALUE fname, sep;
|
908
|
+
struct foreach_arg arg;
|
909
|
+
struct bz_file *bzf;
|
910
|
+
|
911
|
+
if (!rb_block_given_p()) {
|
912
|
+
rb_raise(rb_eArgError, "call out of a block");
|
913
|
+
}
|
914
|
+
rb_scan_args(argc, argv, "11", &fname, &sep);
|
915
|
+
#ifdef SafeStringValue
|
916
|
+
SafeStringValue(fname);
|
917
|
+
#else
|
918
|
+
Check_SafeStr(fname);
|
919
|
+
#endif
|
920
|
+
arg.argc = argc - 1;
|
921
|
+
arg.sep = sep;
|
922
|
+
arg.obj = rb_funcall2(rb_mKernel, id_open, 1, &fname);
|
923
|
+
if (NIL_P(arg.obj)) {
|
924
|
+
return Qnil;
|
925
|
+
}
|
926
|
+
arg.obj = rb_funcall2(obj, id_new, 1, &arg.obj);
|
927
|
+
Data_Get_Struct(arg.obj, struct bz_file, bzf);
|
928
|
+
bzf->flags |= BZ2_RB_CLOSE;
|
929
|
+
return rb_ensure(bz_reader_foreach_line, (VALUE)&arg, bz_reader_close, arg.obj);
|
930
|
+
}
|
931
|
+
|
932
|
+
VALUE bz_reader_i_readlines(struct foreach_arg *arg) {
|
933
|
+
VALUE str, res;
|
934
|
+
int td[ASIZE], in;
|
935
|
+
|
936
|
+
in = Qtrue;
|
937
|
+
res = rb_ary_new();
|
938
|
+
while (!NIL_P(str = bz_reader_gets_internal(arg->argc, &arg->sep, arg->obj, td, in))) {
|
939
|
+
in = Qfalse;
|
940
|
+
rb_ary_push(res, str);
|
941
|
+
}
|
942
|
+
return res;
|
943
|
+
}
|
944
|
+
|
945
|
+
/*
|
946
|
+
* call-seq:
|
947
|
+
* readlines(filename, separator="\n")
|
948
|
+
*
|
949
|
+
* Opens the given bz2 compressed file for reading and decompresses the file,
|
950
|
+
* returning an array of the lines of the file. A line is denoted by the
|
951
|
+
* separator argument.
|
952
|
+
*
|
953
|
+
* Bzip2::Writer.open('file'){ |f| f << "a\n" << "b\n" << "c\n\nd" }
|
954
|
+
*
|
955
|
+
* Bzip2::Reader.readlines('file') # => ["a\n", "b\n", "c\n", "\n", "d"]
|
956
|
+
* Bzip2::Reader.readlines('file', 'c') # => ["a\nb\nc", "\n\nd"]
|
957
|
+
*
|
958
|
+
* @param [String] filename the path to the file to read
|
959
|
+
* @param [String] separator the character to denote a newline in the file
|
960
|
+
* @see Bzip2::Reader#readlines
|
961
|
+
* @return [Array] an array of lines for the file
|
962
|
+
* @raise [Bzip2::Error] if the file is not a valid bz2 compressed file
|
963
|
+
*/
|
964
|
+
VALUE bz_reader_s_readlines(int argc, VALUE *argv, VALUE obj) {
|
965
|
+
VALUE fname, sep;
|
966
|
+
struct foreach_arg arg;
|
967
|
+
struct bz_file *bzf;
|
968
|
+
|
969
|
+
rb_scan_args(argc, argv, "11", &fname, &sep);
|
970
|
+
#ifdef SafeStringValue
|
971
|
+
SafeStringValue(fname);
|
972
|
+
#else
|
973
|
+
Check_SafeStr(fname);
|
974
|
+
#endif
|
975
|
+
arg.argc = argc - 1;
|
976
|
+
arg.sep = sep;
|
977
|
+
arg.obj = rb_funcall2(rb_mKernel, id_open, 1, &fname);
|
978
|
+
if (NIL_P(arg.obj)) {
|
979
|
+
return Qnil;
|
980
|
+
}
|
981
|
+
arg.obj = rb_funcall2(obj, id_new, 1, &arg.obj);
|
982
|
+
Data_Get_Struct(arg.obj, struct bz_file, bzf);
|
983
|
+
bzf->flags |= BZ2_RB_CLOSE;
|
984
|
+
return rb_ensure(bz_reader_i_readlines, (VALUE)&arg, bz_reader_close, arg.obj);
|
985
|
+
}
|
986
|
+
|
987
|
+
/*
|
988
|
+
* Returns the current line number that the stream is at. This number is based
|
989
|
+
* on the newline separator being "\n"
|
990
|
+
*
|
991
|
+
* reader = Bzip2::Reader.new Bzip2.compress("a\nb")
|
992
|
+
* reader.lineno # => 0
|
993
|
+
* reader.readline # => "a\n"
|
994
|
+
* reader.lineno # => 1
|
995
|
+
* reader.readline # => "b"
|
996
|
+
* reader.lineno # => 2
|
997
|
+
|
998
|
+
* @return [Integer] the current line number
|
999
|
+
*/
|
1000
|
+
VALUE bz_reader_lineno(VALUE obj) {
|
1001
|
+
struct bz_file *bzf;
|
1002
|
+
|
1003
|
+
Get_BZ2(obj, bzf);
|
1004
|
+
return INT2NUM(bzf->lineno);
|
1005
|
+
}
|
1006
|
+
|
1007
|
+
/*
|
1008
|
+
* call-seq:
|
1009
|
+
* lineno=(num)
|
1010
|
+
*
|
1011
|
+
* Sets the internal line number count that this stream should be set at
|
1012
|
+
*
|
1013
|
+
* reader = Bzip2::Reader.new Bzip2.compress("a\nb")
|
1014
|
+
* reader.lineno # => 0
|
1015
|
+
* reader.readline # => "a\n"
|
1016
|
+
* reader.lineno # => 1
|
1017
|
+
* reader.lineno = 0
|
1018
|
+
* reader.readline # => "b"
|
1019
|
+
* reader.lineno # => 1
|
1020
|
+
*
|
1021
|
+
* @note This does not actually rewind or move the stream forward
|
1022
|
+
* @param [Integer] lineno the line number which the stream should consider
|
1023
|
+
* being set at
|
1024
|
+
* @return [Integer] the line number provided
|
1025
|
+
*/
|
1026
|
+
VALUE bz_reader_set_lineno(VALUE obj, VALUE lineno) {
|
1027
|
+
struct bz_file *bzf;
|
1028
|
+
|
1029
|
+
Get_BZ2(obj, bzf);
|
1030
|
+
bzf->lineno = NUM2INT(lineno);
|
1031
|
+
return lineno;
|
1032
|
+
}
|