fast-xml 1.0.1 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +145 -37
- data/ext/fastxml/ccan/build_assert/build_assert.h +40 -0
- data/ext/fastxml/ccan/check_type/check_type.h +63 -0
- data/ext/fastxml/ccan/container_of/container_of.h +142 -0
- data/ext/fastxml/ccan/list/list.h +773 -0
- data/ext/fastxml/ccan/str/str.h +16 -0
- data/ext/fastxml/fastxml.c +35 -2
- data/ext/fastxml/xh.c +19 -8
- data/ext/fastxml/xh.h +2 -1
- data/ext/fastxml/xh_config.h +3 -0
- data/ext/fastxml/xh_core.h +1 -5
- data/ext/fastxml/xh_log.h +37 -27
- data/ext/fastxml/xh_param.c +3 -11
- data/ext/fastxml/xh_param.h +1 -1
- data/ext/fastxml/xh_reader.c +528 -0
- data/ext/fastxml/xh_reader.h +43 -0
- data/ext/fastxml/xh_ruby_hash.h +384 -0
- data/ext/fastxml/xh_x2h.c +1002 -0
- data/ext/fastxml/xh_x2h.h +133 -0
- data/lib/fastxml/version.rb +1 -1
- metadata +13 -3
@@ -0,0 +1,528 @@
|
|
1
|
+
#include "xh_config.h"
|
2
|
+
#include "xh_core.h"
|
3
|
+
|
4
|
+
static void
|
5
|
+
xh_common_reader_init(xh_reader_t *reader, VALUE XH_UNUSED(input), xh_char_t *encoding, size_t buf_size)
|
6
|
+
{
|
7
|
+
reader->buf_size = buf_size;
|
8
|
+
|
9
|
+
if (encoding[0] != '\0')
|
10
|
+
reader->switch_encoding(reader, encoding, NULL, NULL);
|
11
|
+
}
|
12
|
+
|
13
|
+
static void
|
14
|
+
xh_common_reader_destroy(xh_reader_t *reader)
|
15
|
+
{
|
16
|
+
#ifdef XH_HAVE_ENCODER
|
17
|
+
xh_buffer_destroy(&reader->enc_buf);
|
18
|
+
if (reader->encoder != NULL)
|
19
|
+
xh_encoder_destroy(reader->encoder);
|
20
|
+
#endif
|
21
|
+
}
|
22
|
+
|
23
|
+
static void
|
24
|
+
xh_common_reader_switch_encoding(xh_reader_t *reader, xh_char_t *encoding, xh_char_t **buf, size_t *len)
|
25
|
+
{
|
26
|
+
xh_log_debug1("switch encoding to '%s'", encoding);
|
27
|
+
|
28
|
+
if (xh_strcasecmp(encoding, XH_INTERNAL_ENCODING) == 0) {
|
29
|
+
#ifdef XH_HAVE_ENCODER
|
30
|
+
if (reader->encoder != NULL) {
|
31
|
+
rb_raise(xh_parse_error_class, "Can't to switch encoding from %s to %s", reader->encoder->fromcode, encoding);
|
32
|
+
}
|
33
|
+
#endif
|
34
|
+
}
|
35
|
+
else {
|
36
|
+
#ifdef XH_HAVE_ENCODER
|
37
|
+
if (reader->encoder == NULL) {
|
38
|
+
/* create encoder */
|
39
|
+
xh_log_debug1("create a new encoder: %s", encoding);
|
40
|
+
|
41
|
+
reader->encoder = xh_encoder_create(XH_CHAR_CAST XH_INTERNAL_ENCODING, encoding);
|
42
|
+
if (reader->encoder == NULL) {
|
43
|
+
rb_raise(xh_parse_error_class, "Can't create encoder for '%s'", encoding);
|
44
|
+
}
|
45
|
+
|
46
|
+
xh_buffer_init(&reader->enc_buf, reader->buf_size);
|
47
|
+
|
48
|
+
if (len != NULL && *len > 0) {
|
49
|
+
reader->fake_read_pos = *buf;
|
50
|
+
reader->fake_read_len = *len;
|
51
|
+
*len = 0;
|
52
|
+
}
|
53
|
+
}
|
54
|
+
else if (xh_strcasecmp(encoding, reader->encoder->fromcode) != 0) {
|
55
|
+
rb_raise(xh_parse_error_class, "Can't to switch encoding from %s to %s", reader->encoder->fromcode, encoding);
|
56
|
+
}
|
57
|
+
#else
|
58
|
+
rb_raise(xh_parse_error_class, "Can't create encoder for '%s'", encoding);
|
59
|
+
#endif
|
60
|
+
}
|
61
|
+
}
|
62
|
+
|
63
|
+
static void
|
64
|
+
xh_string_reader_init(xh_reader_t *reader, VALUE input, xh_char_t *encoding, size_t buf_size)
|
65
|
+
{
|
66
|
+
size_t len;
|
67
|
+
xh_char_t *str;
|
68
|
+
|
69
|
+
str = XH_CHAR_CAST RSTRING_PTR(input);
|
70
|
+
len = RSTRING_LEN(input);
|
71
|
+
reader->str = str;
|
72
|
+
reader->len = len;
|
73
|
+
|
74
|
+
reader->main_buf.start = reader->main_buf.cur = str;
|
75
|
+
reader->main_buf.end = str + len;
|
76
|
+
|
77
|
+
xh_common_reader_init(reader, input, encoding, buf_size);
|
78
|
+
}
|
79
|
+
|
80
|
+
static size_t
|
81
|
+
xh_string_reader_read(xh_reader_t *reader, xh_char_t **buf, xh_char_t *XH_UNUSED(preserve), size_t *off)
|
82
|
+
{
|
83
|
+
size_t len;
|
84
|
+
xh_buffer_t *main_buf;
|
85
|
+
|
86
|
+
*off = 0;
|
87
|
+
main_buf = &reader->main_buf;
|
88
|
+
|
89
|
+
*buf = xh_buffer_pos(main_buf);
|
90
|
+
len = xh_buffer_avail(main_buf);
|
91
|
+
|
92
|
+
xh_buffer_seek_eof(main_buf);
|
93
|
+
|
94
|
+
return len;
|
95
|
+
}
|
96
|
+
|
97
|
+
#ifdef XH_HAVE_ENCODER
|
98
|
+
static size_t
|
99
|
+
xh_string_reader_read_with_encoding(xh_reader_t *reader, xh_char_t **buf, xh_char_t *preserve, size_t *off)
|
100
|
+
{
|
101
|
+
xh_char_t *old_buf_addr;
|
102
|
+
size_t src_left, dst_left;
|
103
|
+
xh_buffer_t *main_buf, *enc_buf;
|
104
|
+
|
105
|
+
*off = 0;
|
106
|
+
main_buf = &reader->main_buf;
|
107
|
+
enc_buf = &reader->enc_buf;
|
108
|
+
|
109
|
+
xh_log_debug4("enc_buf: %p[%.*s] len: %lu", enc_buf->start, enc_buf->cur - enc_buf->start, enc_buf->cur, enc_buf->cur - enc_buf->start);
|
110
|
+
|
111
|
+
xh_log_debug1("preserve data: %p", preserve);
|
112
|
+
if (preserve == NULL) {
|
113
|
+
xh_buffer_seek_top(enc_buf);
|
114
|
+
}
|
115
|
+
else {
|
116
|
+
*off = preserve - enc_buf->start;
|
117
|
+
xh_log_debug1("off: %lu", *off);
|
118
|
+
if (*off) {
|
119
|
+
xh_log_debug3("memmove dest: %p src %p size: %lu", enc_buf->start, preserve, enc_buf->end - preserve);
|
120
|
+
xh_memmove(enc_buf->start, preserve, enc_buf->end - preserve);
|
121
|
+
}
|
122
|
+
enc_buf->cur -= *off;
|
123
|
+
}
|
124
|
+
|
125
|
+
old_buf_addr = xh_buffer_start(enc_buf);
|
126
|
+
xh_buffer_grow50(enc_buf);
|
127
|
+
|
128
|
+
if (preserve != NULL && xh_buffer_start(enc_buf) != old_buf_addr) {
|
129
|
+
*off += old_buf_addr - xh_buffer_start(enc_buf);
|
130
|
+
}
|
131
|
+
|
132
|
+
*buf = xh_buffer_pos(enc_buf);
|
133
|
+
|
134
|
+
while (enc_buf->cur < enc_buf->end) {
|
135
|
+
if (reader->fake_read_pos != NULL) {
|
136
|
+
main_buf->cur = reader->fake_read_pos;
|
137
|
+
reader->fake_read_pos = NULL;
|
138
|
+
reader->fake_read_len = 0;
|
139
|
+
}
|
140
|
+
|
141
|
+
xh_log_debug2("main buf cur: %p end: %p", main_buf->cur, main_buf->end);
|
142
|
+
src_left = xh_buffer_avail(main_buf);
|
143
|
+
if (src_left == 0 && reader->encoder->state == XH_ENC_OK) {
|
144
|
+
if (main_buf->cur == main_buf->end)
|
145
|
+
break;
|
146
|
+
rb_raise(xh_parse_error_class, "Truncate char found");
|
147
|
+
}
|
148
|
+
|
149
|
+
dst_left = xh_buffer_avail(enc_buf);
|
150
|
+
|
151
|
+
xh_log_debug4("main_buf: %.*s src_left: %lu dst_left: %lu", src_left, main_buf->cur, src_left, dst_left);
|
152
|
+
|
153
|
+
xh_encoder_encode_string(reader->encoder, &main_buf->cur, &src_left, &enc_buf->cur, &dst_left);
|
154
|
+
|
155
|
+
xh_log_debug3("enc_buf: %.*s len: %lu", enc_buf->cur - enc_buf->start, enc_buf->start, enc_buf->cur - enc_buf->start);
|
156
|
+
|
157
|
+
switch (reader->encoder->state) {
|
158
|
+
case XH_ENC_TRUNCATED_CHAR_FOUND:
|
159
|
+
if (src_left == 0)
|
160
|
+
rb_raise(xh_parse_error_class, "Truncated char found but buffer is empty");
|
161
|
+
break;
|
162
|
+
case XH_ENC_BUFFER_OVERFLOW:
|
163
|
+
default:
|
164
|
+
goto DONE;
|
165
|
+
}
|
166
|
+
}
|
167
|
+
|
168
|
+
DONE:
|
169
|
+
dst_left = enc_buf->cur - *buf;
|
170
|
+
xh_log_debug4("enc_buf: %p[%.*s] len: %lu", enc_buf->start, dst_left, enc_buf->cur, dst_left);
|
171
|
+
|
172
|
+
return dst_left;
|
173
|
+
}
|
174
|
+
#endif /* XH_HAVE_ENCODER */
|
175
|
+
|
176
|
+
static void
|
177
|
+
xh_string_reader_switch_encoding(xh_reader_t *reader, xh_char_t *encoding, xh_char_t **buf, size_t *len)
|
178
|
+
{
|
179
|
+
|
180
|
+
xh_common_reader_switch_encoding(reader, encoding, buf, len);
|
181
|
+
|
182
|
+
#ifdef XH_HAVE_ENCODER
|
183
|
+
reader->read = reader->encoder == NULL
|
184
|
+
? xh_string_reader_read
|
185
|
+
: xh_string_reader_read_with_encoding;
|
186
|
+
#endif
|
187
|
+
}
|
188
|
+
|
189
|
+
static void
|
190
|
+
xh_string_reader_destroy(xh_reader_t *reader)
|
191
|
+
{
|
192
|
+
xh_common_reader_destroy(reader);
|
193
|
+
}
|
194
|
+
|
195
|
+
#ifdef XH_HAVE_MMAP
|
196
|
+
static void
|
197
|
+
xh_mmaped_file_reader_init(xh_reader_t *reader, VALUE input, xh_char_t *encoding, size_t buf_size)
|
198
|
+
{
|
199
|
+
struct stat sb;
|
200
|
+
|
201
|
+
reader->file = XH_CHAR_CAST StringValueCStr(input);
|
202
|
+
|
203
|
+
xh_log_debug1("open file: %s", reader->file);
|
204
|
+
|
205
|
+
reader->fd = open((const char *) reader->file, O_RDONLY);
|
206
|
+
if (reader->fd == -1) {
|
207
|
+
rb_raise(xh_parse_error_class, "Can't open file '%s': %s", reader->file, strerror(errno));
|
208
|
+
}
|
209
|
+
|
210
|
+
if (fstat(reader->fd, &sb) == -1) {
|
211
|
+
rb_raise(xh_parse_error_class, "Can't get stat of file '%s': %s", reader->file, strerror(errno));
|
212
|
+
}
|
213
|
+
|
214
|
+
xh_log_debug1("file size: %lu", sb.st_size);
|
215
|
+
|
216
|
+
if (sb.st_size == 0) {
|
217
|
+
rb_raise(xh_parse_error_class, "File '%s' is empty", reader->file);
|
218
|
+
}
|
219
|
+
reader->len = sb.st_size;
|
220
|
+
|
221
|
+
#ifdef WIN32
|
222
|
+
reader->fh = (HANDLE) _get_osfhandle(reader->fd);
|
223
|
+
if (reader->fh == INVALID_HANDLE_VALUE) {
|
224
|
+
rb_raise(xh_parse_error_class, "Can't get file handle of file '%s'", reader->file);
|
225
|
+
}
|
226
|
+
|
227
|
+
xh_log_debug1("create mapping for file %s", reader->file);
|
228
|
+
reader->fm = CreateFileMapping(reader->fh, NULL, PAGE_READONLY, 0, 0, NULL);
|
229
|
+
if (reader->fm == NULL) {
|
230
|
+
rb_raise(xh_parse_error_class, "Can't create file mapping of file '%s'", reader->file);
|
231
|
+
}
|
232
|
+
|
233
|
+
xh_log_debug1("create map view for file %s", reader->file);
|
234
|
+
reader->str = XH_CHAR_CAST MapViewOfFile(reader->fm, FILE_MAP_READ, 0, 0, reader->len);
|
235
|
+
if (reader->str == NULL) {
|
236
|
+
rb_raise(xh_parse_error_class, "Can't create map view of file '%s'", reader->file);
|
237
|
+
}
|
238
|
+
#else
|
239
|
+
xh_log_debug1("mmap file %s", reader->file);
|
240
|
+
reader->str = XH_CHAR_CAST mmap((caddr_t) 0, reader->len, PROT_READ, MAP_PRIVATE, reader->fd, 0);
|
241
|
+
if ((caddr_t) reader->str == (caddr_t) (-1)) {
|
242
|
+
rb_raise(xh_parse_error_class, "Can't create map of file '%s': %s", reader->file, strerror(errno));
|
243
|
+
}
|
244
|
+
#endif
|
245
|
+
|
246
|
+
reader->main_buf.start = reader->main_buf.cur = reader->str;
|
247
|
+
reader->main_buf.end = reader->str + reader->len;
|
248
|
+
|
249
|
+
xh_common_reader_init(reader, input, encoding, buf_size);
|
250
|
+
}
|
251
|
+
|
252
|
+
static void
|
253
|
+
xh_mmaped_file_reader_destroy(xh_reader_t *reader)
|
254
|
+
{
|
255
|
+
xh_common_reader_destroy(reader);
|
256
|
+
|
257
|
+
if (reader->fd == -1) return;
|
258
|
+
|
259
|
+
#ifdef WIN32
|
260
|
+
xh_log_debug1("unmap view of file %s", reader->file);
|
261
|
+
UnmapViewOfFile(reader->str);
|
262
|
+
xh_log_debug1("close handle of file %s", reader->file);
|
263
|
+
CloseHandle(reader->fm);
|
264
|
+
#else
|
265
|
+
xh_log_debug1("munmap file %s", reader->file);
|
266
|
+
if (munmap(reader->str, reader->len) == -1) {
|
267
|
+
rb_raise(xh_parse_error_class, "Can't munmap file '%s': %s", reader->file, strerror(errno));
|
268
|
+
}
|
269
|
+
#endif
|
270
|
+
|
271
|
+
xh_log_debug1("close file %s", reader->file);
|
272
|
+
if (close(reader->fd) == -1) {
|
273
|
+
rb_raise(xh_parse_error_class, "Can't close file '%s': %s", reader->file, strerror(errno));
|
274
|
+
}
|
275
|
+
}
|
276
|
+
#else
|
277
|
+
static void
|
278
|
+
xh_file_reader_init(xh_reader_t *reader, VALUE input, xh_char_t *encoding, size_t buf_size)
|
279
|
+
{
|
280
|
+
reader->file = XH_CHAR_CAST SvPV_nolen(input);
|
281
|
+
|
282
|
+
xh_log_debug1("open file: %s", reader->file);
|
283
|
+
|
284
|
+
reader->fd = open((char *) reader->file, O_RDONLY);
|
285
|
+
if (reader->fd == -1) {
|
286
|
+
rb_raise(xh_parse_error_class, "Can't open file '%s': %s", reader->file, strerror(errno));
|
287
|
+
}
|
288
|
+
|
289
|
+
xh_buffer_init(&reader->main_buf, buf_size);
|
290
|
+
|
291
|
+
xh_common_reader_init(reader, input, encoding, buf_size);
|
292
|
+
}
|
293
|
+
|
294
|
+
static void
|
295
|
+
xh_file_reader_destroy(xh_reader_t *reader)
|
296
|
+
{
|
297
|
+
xh_common_reader_destroy(reader);
|
298
|
+
|
299
|
+
if (reader->main_buf.start != NULL)
|
300
|
+
free(reader->main_buf.start);
|
301
|
+
|
302
|
+
if (close(reader->fd) == -1) {
|
303
|
+
rb_raise(xh_parse_error_class, "Can't close file '%s': %s", reader->file, strerror(errno));
|
304
|
+
}
|
305
|
+
}
|
306
|
+
#endif /* XH_HAVE_MMAP */
|
307
|
+
|
308
|
+
static size_t
|
309
|
+
xh_file_reader_read(xh_reader_t *reader, xh_char_t **buf, xh_char_t *preserve, size_t *off)
|
310
|
+
{
|
311
|
+
xh_char_t *old_buf_addr;
|
312
|
+
size_t len;
|
313
|
+
xh_buffer_t *main_buf;
|
314
|
+
|
315
|
+
main_buf = &reader->main_buf;
|
316
|
+
*off = 0;
|
317
|
+
|
318
|
+
xh_log_debug1("read preserve: %p", preserve);
|
319
|
+
if (preserve == NULL) {
|
320
|
+
main_buf->cur = main_buf->start;
|
321
|
+
}
|
322
|
+
else {
|
323
|
+
*off = preserve - main_buf->start;
|
324
|
+
xh_log_debug1("off: %lu", *off);
|
325
|
+
if (*off) {
|
326
|
+
xh_log_debug3("memmove dest: %p src %p size: %lu", main_buf->start, preserve, main_buf->end - preserve);
|
327
|
+
xh_memmove(main_buf->start, preserve, main_buf->end - preserve);
|
328
|
+
}
|
329
|
+
main_buf->cur -= *off;
|
330
|
+
xh_log_debug1("read cur: %p", main_buf->cur);
|
331
|
+
}
|
332
|
+
|
333
|
+
old_buf_addr = main_buf->start;
|
334
|
+
|
335
|
+
xh_buffer_grow50(main_buf);
|
336
|
+
|
337
|
+
if (preserve != NULL && main_buf->start != old_buf_addr) {
|
338
|
+
*off += old_buf_addr - main_buf->start;
|
339
|
+
}
|
340
|
+
|
341
|
+
len = read(reader->fd, main_buf->cur, xh_buffer_avail(main_buf));
|
342
|
+
*buf = main_buf->cur;
|
343
|
+
if (len == (size_t) (-1)) {
|
344
|
+
rb_raise(xh_parse_error_class, "Failed to read file");
|
345
|
+
}
|
346
|
+
main_buf->cur += len;
|
347
|
+
|
348
|
+
return len;
|
349
|
+
}
|
350
|
+
|
351
|
+
#ifdef XH_HAVE_ENCODER
|
352
|
+
static size_t
|
353
|
+
xh_file_reader_read_with_encoding(xh_reader_t *reader, xh_char_t **buf, xh_char_t *preserve, size_t *off)
|
354
|
+
{
|
355
|
+
xh_char_t *old_buf_addr;
|
356
|
+
size_t src_left, dst_left;
|
357
|
+
xh_buffer_t *main_buf, *enc_buf;
|
358
|
+
|
359
|
+
*off = 0;
|
360
|
+
main_buf = &reader->main_buf;
|
361
|
+
enc_buf = &reader->enc_buf;
|
362
|
+
|
363
|
+
xh_log_debug4("enc_buf: %p[%.*s] len: %lu", enc_buf->start, enc_buf->cur - enc_buf->start, enc_buf->cur, enc_buf->cur - enc_buf->start);
|
364
|
+
|
365
|
+
xh_log_debug1("preserve data: %p", preserve);
|
366
|
+
if (preserve == NULL) {
|
367
|
+
xh_buffer_seek_top(enc_buf);
|
368
|
+
}
|
369
|
+
else {
|
370
|
+
*off = preserve - enc_buf->start;
|
371
|
+
xh_log_debug1("off: %lu", *off);
|
372
|
+
if (*off) {
|
373
|
+
xh_log_debug3("memmove dest: %p src %p size: %lu", enc_buf->start, preserve, enc_buf->end - preserve);
|
374
|
+
xh_memmove(enc_buf->start, preserve, enc_buf->end - preserve);
|
375
|
+
}
|
376
|
+
enc_buf->cur -= *off;
|
377
|
+
}
|
378
|
+
|
379
|
+
old_buf_addr = enc_buf->start;
|
380
|
+
xh_buffer_grow50(enc_buf);
|
381
|
+
|
382
|
+
if (preserve != NULL && enc_buf->start != old_buf_addr) {
|
383
|
+
*off += old_buf_addr - enc_buf->start;
|
384
|
+
}
|
385
|
+
|
386
|
+
*buf = xh_buffer_pos(enc_buf);
|
387
|
+
|
388
|
+
while (enc_buf->cur < enc_buf->end) {
|
389
|
+
xh_buffer_grow50(main_buf);
|
390
|
+
|
391
|
+
if (reader->fake_read_pos == NULL) {
|
392
|
+
src_left = read(reader->fd, xh_buffer_pos(main_buf), xh_buffer_avail(main_buf));
|
393
|
+
}
|
394
|
+
else {
|
395
|
+
main_buf->cur = reader->fake_read_pos;
|
396
|
+
src_left = reader->fake_read_len;
|
397
|
+
reader->fake_read_pos = NULL;
|
398
|
+
reader->fake_read_len = 0;
|
399
|
+
}
|
400
|
+
if (src_left == 0) {
|
401
|
+
if (main_buf->cur == main_buf->end)
|
402
|
+
break;
|
403
|
+
rb_raise(xh_parse_error_class, "Truncate char found");
|
404
|
+
}
|
405
|
+
if (src_left == (size_t) (-1))
|
406
|
+
rb_raise(xh_parse_error_class, "Failed to read file");
|
407
|
+
|
408
|
+
dst_left = xh_buffer_avail(enc_buf);
|
409
|
+
|
410
|
+
xh_log_debug4("main_buf: %.*s src_left: %lu dst_left: %lu", src_left, main_buf->cur, src_left, dst_left);
|
411
|
+
|
412
|
+
xh_encoder_encode_string(reader->encoder, &main_buf->cur, &src_left, &enc_buf->cur, &dst_left);
|
413
|
+
|
414
|
+
xh_log_debug3("enc_buf: %.*s len: %lu", enc_buf->cur - enc_buf->start, enc_buf->start, enc_buf->cur - enc_buf->start);
|
415
|
+
|
416
|
+
switch (reader->encoder->state) {
|
417
|
+
case XH_ENC_TRUNCATED_CHAR_FOUND:
|
418
|
+
if (src_left == 0)
|
419
|
+
rb_raise(xh_parse_error_class, "Truncated char found but buffer is empty");
|
420
|
+
xh_memmove(main_buf->start, main_buf->cur, src_left);
|
421
|
+
main_buf->cur = main_buf->start + src_left;
|
422
|
+
break;
|
423
|
+
case XH_ENC_BUFFER_OVERFLOW:
|
424
|
+
default:
|
425
|
+
xh_buffer_seek_top(main_buf);
|
426
|
+
goto DONE;
|
427
|
+
}
|
428
|
+
}
|
429
|
+
|
430
|
+
DONE:
|
431
|
+
dst_left = enc_buf->cur - *buf;
|
432
|
+
xh_log_debug4("enc_buf: %p[%.*s] len: %lu", enc_buf->start, dst_left, enc_buf->cur, dst_left);
|
433
|
+
|
434
|
+
return dst_left;
|
435
|
+
}
|
436
|
+
#endif /* XH_HAVE_ENCODER */
|
437
|
+
|
438
|
+
static void
|
439
|
+
xh_file_reader_switch_encoding(xh_reader_t *reader, xh_char_t *encoding, xh_char_t **buf, size_t *len)
|
440
|
+
{
|
441
|
+
xh_common_reader_switch_encoding(reader, encoding, buf, len);
|
442
|
+
|
443
|
+
#ifdef XH_HAVE_ENCODER
|
444
|
+
reader->read = reader->encoder == NULL
|
445
|
+
? xh_file_reader_read
|
446
|
+
: xh_file_reader_read_with_encoding;
|
447
|
+
#endif
|
448
|
+
}
|
449
|
+
|
450
|
+
static void
|
451
|
+
xh_ruby_io_reader_init(xh_reader_t *reader, VALUE input, xh_char_t *encoding, size_t buf_size)
|
452
|
+
{
|
453
|
+
reader->fd = RFILE(reader->ruby_io)->fptr->fd;
|
454
|
+
|
455
|
+
xh_buffer_init(&reader->main_buf, buf_size);
|
456
|
+
|
457
|
+
xh_common_reader_init(reader, input, encoding, buf_size);
|
458
|
+
}
|
459
|
+
|
460
|
+
static void
|
461
|
+
xh_ruby_io_reader_destroy(xh_reader_t *reader)
|
462
|
+
{
|
463
|
+
xh_common_reader_destroy(reader);
|
464
|
+
|
465
|
+
if (reader->main_buf.start != NULL)
|
466
|
+
free(reader->main_buf.start);
|
467
|
+
}
|
468
|
+
|
469
|
+
void
|
470
|
+
xh_reader_init(xh_reader_t *reader, VALUE input, xh_char_t *encoding, size_t buf_size)
|
471
|
+
{
|
472
|
+
size_t len;
|
473
|
+
xh_char_t *str;
|
474
|
+
|
475
|
+
if (RB_TYPE_P(input, T_STRING)) {
|
476
|
+
str = XH_CHAR_CAST RSTRING_PTR(input);
|
477
|
+
len = RSTRING_LEN(input);
|
478
|
+
if (len == 0)
|
479
|
+
rb_raise(xh_parse_error_class, "String is empty");
|
480
|
+
|
481
|
+
/* Parsing string */
|
482
|
+
if (xh_str_is_xml(str)) {
|
483
|
+
reader->type = XH_READER_STRING_TYPE;
|
484
|
+
reader->init = xh_string_reader_init;
|
485
|
+
reader->read = xh_string_reader_read;
|
486
|
+
reader->switch_encoding = xh_string_reader_switch_encoding;
|
487
|
+
reader->destroy = xh_string_reader_destroy;
|
488
|
+
}
|
489
|
+
/* Parsing file */
|
490
|
+
else {
|
491
|
+
#ifdef XH_HAVE_MMAP
|
492
|
+
reader->type = XH_READER_MMAPED_FILE_TYPE;
|
493
|
+
reader->init = xh_mmaped_file_reader_init;
|
494
|
+
reader->read = xh_string_reader_read;
|
495
|
+
reader->switch_encoding = xh_string_reader_switch_encoding;
|
496
|
+
reader->destroy = xh_mmaped_file_reader_destroy;
|
497
|
+
#else
|
498
|
+
reader->type = XH_READER_FILE_TYPE;
|
499
|
+
reader->init = xh_file_reader_init;
|
500
|
+
reader->read = xh_file_reader_read;
|
501
|
+
reader->switch_encoding = xh_file_reader_switch_encoding;
|
502
|
+
reader->destroy = xh_file_reader_destroy;
|
503
|
+
#endif
|
504
|
+
}
|
505
|
+
}
|
506
|
+
else {
|
507
|
+
if (!RB_TYPE_P(input, T_FILE))
|
508
|
+
rb_raise(xh_parse_error_class, "Can't use file handle as a Ruby IO handle");
|
509
|
+
|
510
|
+
/* Ruby IO handle */
|
511
|
+
xh_log_debug0("Ruby IO handle detected");
|
512
|
+
reader->ruby_io = input;
|
513
|
+
reader->type = XH_READER_FILE_TYPE;
|
514
|
+
reader->init = xh_ruby_io_reader_init;
|
515
|
+
reader->read = xh_file_reader_read;
|
516
|
+
reader->switch_encoding = xh_file_reader_switch_encoding;
|
517
|
+
reader->destroy = xh_ruby_io_reader_destroy;
|
518
|
+
}
|
519
|
+
|
520
|
+
reader->init(reader, input, encoding, buf_size);
|
521
|
+
}
|
522
|
+
|
523
|
+
void
|
524
|
+
xh_reader_destroy(xh_reader_t *reader)
|
525
|
+
{
|
526
|
+
if (reader->destroy != NULL)
|
527
|
+
reader->destroy(reader);
|
528
|
+
}
|
@@ -0,0 +1,43 @@
|
|
1
|
+
#ifndef _XH_READER_H_
|
2
|
+
#define _XH_READER_H_
|
3
|
+
|
4
|
+
#include "xh_config.h"
|
5
|
+
#include "xh_core.h"
|
6
|
+
|
7
|
+
typedef enum {
|
8
|
+
XH_READER_STRING_TYPE,
|
9
|
+
XH_READER_FILE_TYPE,
|
10
|
+
XH_READER_MMAPED_FILE_TYPE
|
11
|
+
} xh_reader_type_t;
|
12
|
+
|
13
|
+
typedef struct _xh_reader_t xh_reader_t;
|
14
|
+
struct _xh_reader_t {
|
15
|
+
xh_reader_type_t type;
|
16
|
+
VALUE input;
|
17
|
+
xh_char_t *str;
|
18
|
+
size_t len;
|
19
|
+
xh_char_t *file;
|
20
|
+
int fd;
|
21
|
+
VALUE ruby_io;
|
22
|
+
#ifdef WIN32
|
23
|
+
HANDLE fm, fh;
|
24
|
+
#endif
|
25
|
+
#ifdef XH_HAVE_ENCODER
|
26
|
+
xh_encoder_t *encoder;
|
27
|
+
xh_buffer_t enc_buf;
|
28
|
+
#endif
|
29
|
+
xh_buffer_t main_buf;
|
30
|
+
xh_ruby_buffer_t ruby_buf;
|
31
|
+
xh_char_t *fake_read_pos;
|
32
|
+
size_t fake_read_len;
|
33
|
+
size_t buf_size;
|
34
|
+
void (*init) (xh_reader_t *reader, VALUE input, xh_char_t *encoding, size_t buf_size);
|
35
|
+
size_t (*read) (xh_reader_t *reader, xh_char_t **buf, xh_char_t *preserve, size_t *off);
|
36
|
+
void (*switch_encoding) (xh_reader_t *reader, xh_char_t *encoding, xh_char_t **buf, size_t *len);
|
37
|
+
void (*destroy) (xh_reader_t *reader);
|
38
|
+
};
|
39
|
+
|
40
|
+
void xh_reader_destroy(xh_reader_t *reader);
|
41
|
+
void xh_reader_init(xh_reader_t *reader, VALUE input, xh_char_t *encoding, size_t buf_size);
|
42
|
+
|
43
|
+
#endif /* _XH_READER_H_ */
|