rbxl 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +154 -0
- data/Rakefile +5 -0
- data/ext/rbxl_native/extconf.rb +51 -0
- data/ext/rbxl_native/native.c +677 -0
- data/lib/rbxl/cell.rb +3 -0
- data/lib/rbxl/empty_cell.rb +13 -0
- data/lib/rbxl/errors.rb +7 -0
- data/lib/rbxl/native.rb +15 -0
- data/lib/rbxl/read_only_cell.rb +3 -0
- data/lib/rbxl/read_only_workbook.rb +153 -0
- data/lib/rbxl/read_only_worksheet.rb +501 -0
- data/lib/rbxl/row.rb +23 -0
- data/lib/rbxl/version.rb +3 -0
- data/lib/rbxl/write_only_cell.rb +10 -0
- data/lib/rbxl/write_only_workbook.rb +143 -0
- data/lib/rbxl/write_only_worksheet.rb +180 -0
- data/lib/rbxl.rb +33 -0
- metadata +97 -0
|
@@ -0,0 +1,677 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* rbxl_native - Optional C extension for rbxl
|
|
3
|
+
*
|
|
4
|
+
* Parses xlsx sheet XML via libxml2 SAX2 directly, bypassing
|
|
5
|
+
* Nokogiri's per-node Ruby object allocation overhead.
|
|
6
|
+
*
|
|
7
|
+
* Security considerations:
|
|
8
|
+
* - All buffers are dynamically allocated and grown as needed (no fixed limits)
|
|
9
|
+
* - Shared string index is bounds-checked
|
|
10
|
+
* - XML parser depth is limited to prevent XML bomb attacks
|
|
11
|
+
* - Parser context is cleaned up even when Ruby exceptions occur
|
|
12
|
+
* - All string inputs are validated
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
#include <ruby.h>
|
|
16
|
+
#include <ruby/encoding.h>
|
|
17
|
+
#include <libxml/parser.h>
|
|
18
|
+
#include <libxml/SAX2.h>
|
|
19
|
+
#include <string.h>
|
|
20
|
+
#include <stdlib.h>
|
|
21
|
+
|
|
22
|
+
static rb_encoding *enc_utf8;
|
|
23
|
+
|
|
24
|
+
static inline VALUE make_utf8_str(const char *ptr, long len)
|
|
25
|
+
{
|
|
26
|
+
VALUE s = rb_str_new(ptr, len);
|
|
27
|
+
rb_enc_associate(s, enc_utf8);
|
|
28
|
+
return s;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
#define INITIAL_BUF_CAP 256
|
|
32
|
+
#define MAX_XML_DEPTH 64
|
|
33
|
+
#define MAX_TOTAL_BYTES (512 * 1024 * 1024) /* 512 MB hard limit on accumulated text */
|
|
34
|
+
|
|
35
|
+
/* ------------------------------------------------------------------ */
|
|
36
|
+
/* Dynamic buffer */
|
|
37
|
+
/* ------------------------------------------------------------------ */
|
|
38
|
+
|
|
39
|
+
typedef struct {
|
|
40
|
+
char *data;
|
|
41
|
+
size_t len;
|
|
42
|
+
size_t cap;
|
|
43
|
+
} dynbuf;
|
|
44
|
+
|
|
45
|
+
static void dynbuf_init(dynbuf *b)
|
|
46
|
+
{
|
|
47
|
+
b->data = NULL;
|
|
48
|
+
b->len = 0;
|
|
49
|
+
b->cap = 0;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
static void dynbuf_free(dynbuf *b)
|
|
53
|
+
{
|
|
54
|
+
if (b->data) { xfree(b->data); b->data = NULL; }
|
|
55
|
+
b->len = b->cap = 0;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
static void dynbuf_clear(dynbuf *b)
|
|
59
|
+
{
|
|
60
|
+
b->len = 0;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
static int dynbuf_append(dynbuf *b, const char *src, size_t n)
|
|
64
|
+
{
|
|
65
|
+
if (n == 0) return 1;
|
|
66
|
+
size_t needed = b->len + n;
|
|
67
|
+
if (needed > MAX_TOTAL_BYTES) return 0; /* refuse oversized input */
|
|
68
|
+
if (needed > b->cap) {
|
|
69
|
+
size_t newcap = b->cap ? b->cap : INITIAL_BUF_CAP;
|
|
70
|
+
while (newcap < needed) newcap *= 2;
|
|
71
|
+
char *tmp = xrealloc(b->data, newcap);
|
|
72
|
+
b->data = tmp;
|
|
73
|
+
b->cap = newcap;
|
|
74
|
+
}
|
|
75
|
+
memcpy(b->data + b->len, src, n);
|
|
76
|
+
b->len += n;
|
|
77
|
+
return 1;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/* ------------------------------------------------------------------ */
|
|
81
|
+
/* Parse context */
|
|
82
|
+
/* ------------------------------------------------------------------ */
|
|
83
|
+
|
|
84
|
+
typedef struct {
|
|
85
|
+
/* Row / cell counters */
|
|
86
|
+
int row_count;
|
|
87
|
+
int cell_count;
|
|
88
|
+
|
|
89
|
+
/* Nesting state */
|
|
90
|
+
int in_row;
|
|
91
|
+
int in_cell;
|
|
92
|
+
int collecting; /* currently inside <v> or <t> */
|
|
93
|
+
int is_v; /* distinguishes </v> from </t> */
|
|
94
|
+
int depth; /* current nesting depth */
|
|
95
|
+
|
|
96
|
+
/* Cell type attribute value ('s', 'b', 'n', ...) */
|
|
97
|
+
char cell_type;
|
|
98
|
+
int has_cell_type;
|
|
99
|
+
|
|
100
|
+
/* Buffers */
|
|
101
|
+
dynbuf text_buf; /* accumulates character data for current <v>/<t> */
|
|
102
|
+
dynbuf raw_buf; /* accumulated raw value for current cell */
|
|
103
|
+
int has_raw;
|
|
104
|
+
|
|
105
|
+
/* Cell coordinate (for full read mode) */
|
|
106
|
+
dynbuf cell_ref; /* "r" attribute of <c> */
|
|
107
|
+
|
|
108
|
+
/* Row index (for full read mode) */
|
|
109
|
+
int row_index;
|
|
110
|
+
|
|
111
|
+
/* Shared strings (Ruby Array, must be marked during GC) */
|
|
112
|
+
VALUE shared_strings;
|
|
113
|
+
long shared_strings_len;
|
|
114
|
+
|
|
115
|
+
/* Current row (Ruby Array) */
|
|
116
|
+
VALUE current_row;
|
|
117
|
+
|
|
118
|
+
/* Mode: 0 = values_only, 1 = full (ReadOnlyCell + Row) */
|
|
119
|
+
int full_mode;
|
|
120
|
+
|
|
121
|
+
/* Ruby classes for full mode (looked up once at init) */
|
|
122
|
+
VALUE cReadOnlyCell;
|
|
123
|
+
VALUE cRow;
|
|
124
|
+
|
|
125
|
+
/* Error flag — set if a callback wants to abort */
|
|
126
|
+
int error;
|
|
127
|
+
char error_msg[256];
|
|
128
|
+
} parse_ctx;
|
|
129
|
+
|
|
130
|
+
/* ------------------------------------------------------------------ */
|
|
131
|
+
/* Value coercion (mirrors Rbxl::ReadOnlyWorksheet#coerce_value) */
|
|
132
|
+
/* ------------------------------------------------------------------ */
|
|
133
|
+
|
|
134
|
+
static VALUE coerce_value(parse_ctx *c)
|
|
135
|
+
{
|
|
136
|
+
if (!c->has_raw) {
|
|
137
|
+
if (c->has_cell_type && c->cell_type == 'b') return Qfalse;
|
|
138
|
+
return Qnil;
|
|
139
|
+
}
|
|
140
|
+
/* inlineStr with empty <t></t> should return "" not nil */
|
|
141
|
+
if (c->raw_buf.len == 0) {
|
|
142
|
+
if (c->has_cell_type && c->cell_type == 'i') return make_utf8_str("", 0);
|
|
143
|
+
if (c->has_cell_type && c->cell_type == 'b') return Qfalse;
|
|
144
|
+
return Qnil;
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
const char *raw = c->raw_buf.data;
|
|
148
|
+
size_t len = c->raw_buf.len;
|
|
149
|
+
|
|
150
|
+
if (c->has_cell_type) {
|
|
151
|
+
switch (c->cell_type) {
|
|
152
|
+
case 's': { /* shared string index */
|
|
153
|
+
long idx = 0;
|
|
154
|
+
for (size_t i = 0; i < len; i++) {
|
|
155
|
+
unsigned char ch = (unsigned char)raw[i];
|
|
156
|
+
if (ch < '0' || ch > '9') {
|
|
157
|
+
/* malformed index — return raw string */
|
|
158
|
+
return make_utf8_str(raw, (long)len);
|
|
159
|
+
}
|
|
160
|
+
long next = idx * 10 + (ch - '0');
|
|
161
|
+
if (next < idx) { /* overflow */
|
|
162
|
+
return make_utf8_str(raw, (long)len);
|
|
163
|
+
}
|
|
164
|
+
idx = next;
|
|
165
|
+
}
|
|
166
|
+
if (idx < 0 || idx >= c->shared_strings_len) {
|
|
167
|
+
/* out of bounds — return raw string rather than crashing */
|
|
168
|
+
return make_utf8_str(raw, (long)len);
|
|
169
|
+
}
|
|
170
|
+
return rb_ary_entry(c->shared_strings, idx);
|
|
171
|
+
}
|
|
172
|
+
case 'b': /* boolean */
|
|
173
|
+
return (len == 1 && raw[0] == '1') ? Qtrue : Qfalse;
|
|
174
|
+
case 'i': /* inlineStr — raw is the text content */
|
|
175
|
+
return make_utf8_str(raw, (long)len);
|
|
176
|
+
default:
|
|
177
|
+
/* "str" and other text types — return as-is */
|
|
178
|
+
if (c->cell_type != 'n')
|
|
179
|
+
return make_utf8_str(raw, (long)len);
|
|
180
|
+
break;
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
/* Infer numeric scalar */
|
|
185
|
+
int has_dot = 0;
|
|
186
|
+
size_t start = 0;
|
|
187
|
+
if (raw[0] == '-') { start = 1; if (len == 1) return make_utf8_str(raw, (long)len); }
|
|
188
|
+
|
|
189
|
+
int has_digit = 0;
|
|
190
|
+
for (size_t i = start; i < len; i++) {
|
|
191
|
+
unsigned char ch = (unsigned char)raw[i];
|
|
192
|
+
if (ch >= '0' && ch <= '9') {
|
|
193
|
+
has_digit = 1;
|
|
194
|
+
} else if (ch == '.') {
|
|
195
|
+
if (has_dot) return make_utf8_str(raw, (long)len);
|
|
196
|
+
has_dot = 1;
|
|
197
|
+
} else {
|
|
198
|
+
return make_utf8_str(raw, (long)len);
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
if (!has_digit) return make_utf8_str(raw, (long)len);
|
|
202
|
+
|
|
203
|
+
/* NUL-terminate for strtod/strtol (buffer always has room) */
|
|
204
|
+
dynbuf_append(&c->raw_buf, "\0", 1);
|
|
205
|
+
c->raw_buf.len--; /* don't count NUL in logical length */
|
|
206
|
+
|
|
207
|
+
if (has_dot) {
|
|
208
|
+
return DBL2NUM(strtod(c->raw_buf.data, NULL));
|
|
209
|
+
} else {
|
|
210
|
+
return LONG2NUM(strtol(c->raw_buf.data, NULL, 10));
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
/* ------------------------------------------------------------------ */
|
|
215
|
+
/* SAX2 callbacks */
|
|
216
|
+
/* ------------------------------------------------------------------ */
|
|
217
|
+
|
|
218
|
+
static void on_start_element(void *ctx, const xmlChar *localname,
|
|
219
|
+
const xmlChar *prefix, const xmlChar *URI,
|
|
220
|
+
int nb_namespaces, const xmlChar **namespaces,
|
|
221
|
+
int nb_attributes, int nb_defaulted,
|
|
222
|
+
const xmlChar **attributes)
|
|
223
|
+
{
|
|
224
|
+
parse_ctx *c = (parse_ctx *)ctx;
|
|
225
|
+
(void)prefix; (void)URI; (void)nb_namespaces; (void)namespaces;
|
|
226
|
+
(void)nb_defaulted;
|
|
227
|
+
|
|
228
|
+
c->depth++;
|
|
229
|
+
if (c->depth > MAX_XML_DEPTH) {
|
|
230
|
+
c->error = 1;
|
|
231
|
+
snprintf(c->error_msg, sizeof(c->error_msg),
|
|
232
|
+
"XML depth exceeds limit (%d)", MAX_XML_DEPTH);
|
|
233
|
+
xmlStopParser(NULL); /* will be caught by parse loop */
|
|
234
|
+
return;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
const char *name = (const char *)localname;
|
|
238
|
+
|
|
239
|
+
if (name[0] == 'r' && name[1] == 'o' && name[2] == 'w' && name[3] == '\0') {
|
|
240
|
+
c->in_row = 1;
|
|
241
|
+
c->current_row = rb_ary_new();
|
|
242
|
+
if (c->full_mode) {
|
|
243
|
+
c->row_index = 0;
|
|
244
|
+
/* extract "r" attribute for row index */
|
|
245
|
+
for (int i = 0; i < nb_attributes; i++) {
|
|
246
|
+
const char *aname = (const char *)attributes[i * 5];
|
|
247
|
+
if (aname[0] == 'r' && aname[1] == '\0') {
|
|
248
|
+
const char *vstart = (const char *)attributes[i * 5 + 3];
|
|
249
|
+
const char *vend = (const char *)attributes[i * 5 + 4];
|
|
250
|
+
for (const char *p = vstart; p < vend; p++) {
|
|
251
|
+
c->row_index = c->row_index * 10 + (*p - '0');
|
|
252
|
+
}
|
|
253
|
+
break;
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
} else if (name[0] == 'c' && name[1] == '\0') {
|
|
258
|
+
c->in_cell = 1;
|
|
259
|
+
c->has_cell_type = 0;
|
|
260
|
+
c->has_raw = 0;
|
|
261
|
+
dynbuf_clear(&c->raw_buf);
|
|
262
|
+
if (c->full_mode) dynbuf_clear(&c->cell_ref);
|
|
263
|
+
/* extract attributes from the SAX2 attribute array */
|
|
264
|
+
for (int i = 0; i < nb_attributes; i++) {
|
|
265
|
+
const char *aname = (const char *)attributes[i * 5];
|
|
266
|
+
if (aname[0] == 't' && aname[1] == '\0') {
|
|
267
|
+
const char *vstart = (const char *)attributes[i * 5 + 3];
|
|
268
|
+
c->cell_type = vstart[0];
|
|
269
|
+
c->has_cell_type = 1;
|
|
270
|
+
} else if (c->full_mode && aname[0] == 'r' && aname[1] == '\0') {
|
|
271
|
+
const char *vstart = (const char *)attributes[i * 5 + 3];
|
|
272
|
+
const char *vend = (const char *)attributes[i * 5 + 4];
|
|
273
|
+
dynbuf_append(&c->cell_ref, vstart, (size_t)(vend - vstart));
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
} else if (name[0] == 'v' && name[1] == '\0') {
|
|
277
|
+
c->collecting = 1;
|
|
278
|
+
c->is_v = 1;
|
|
279
|
+
dynbuf_clear(&c->text_buf);
|
|
280
|
+
} else if (name[0] == 't' && name[1] == '\0') {
|
|
281
|
+
c->collecting = 1;
|
|
282
|
+
c->is_v = 0;
|
|
283
|
+
dynbuf_clear(&c->text_buf);
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
static void on_end_element(void *ctx, const xmlChar *localname,
|
|
288
|
+
const xmlChar *prefix, const xmlChar *URI)
|
|
289
|
+
{
|
|
290
|
+
parse_ctx *c = (parse_ctx *)ctx;
|
|
291
|
+
(void)prefix; (void)URI;
|
|
292
|
+
|
|
293
|
+
if (c->collecting) {
|
|
294
|
+
if (c->is_v) {
|
|
295
|
+
/* end of <v> — copy text_buf to raw_buf */
|
|
296
|
+
dynbuf_clear(&c->raw_buf);
|
|
297
|
+
dynbuf_append(&c->raw_buf, c->text_buf.data, c->text_buf.len);
|
|
298
|
+
c->has_raw = 1;
|
|
299
|
+
c->collecting = 0;
|
|
300
|
+
} else {
|
|
301
|
+
/* end of <t> — append text_buf to raw_buf */
|
|
302
|
+
dynbuf_append(&c->raw_buf, c->text_buf.data, c->text_buf.len);
|
|
303
|
+
c->has_raw = 1;
|
|
304
|
+
c->collecting = 0;
|
|
305
|
+
}
|
|
306
|
+
} else {
|
|
307
|
+
const char *name = (const char *)localname;
|
|
308
|
+
if (name[0] == 'c' && name[1] == '\0') {
|
|
309
|
+
VALUE val = coerce_value(c);
|
|
310
|
+
if (c->full_mode) {
|
|
311
|
+
/* Build ReadOnlyCell.new(coordinate, value) */
|
|
312
|
+
VALUE coord;
|
|
313
|
+
if (c->cell_ref.len > 0) {
|
|
314
|
+
coord = make_utf8_str(c->cell_ref.data, (long)c->cell_ref.len);
|
|
315
|
+
} else {
|
|
316
|
+
coord = Qnil;
|
|
317
|
+
}
|
|
318
|
+
VALUE cell = rb_funcall(c->cReadOnlyCell, rb_intern("new"), 2, coord, val);
|
|
319
|
+
rb_ary_push(c->current_row, cell);
|
|
320
|
+
} else {
|
|
321
|
+
rb_ary_push(c->current_row, val);
|
|
322
|
+
}
|
|
323
|
+
c->in_cell = 0;
|
|
324
|
+
c->cell_count++;
|
|
325
|
+
} else if (name[0] == 'r' && name[1] == 'o' && name[2] == 'w' && name[3] == '\0') {
|
|
326
|
+
if (c->full_mode) {
|
|
327
|
+
/* Build Row.new(index: row_index, cells: cells) */
|
|
328
|
+
VALUE kwargs = rb_hash_new();
|
|
329
|
+
rb_hash_aset(kwargs, ID2SYM(rb_intern("index")), INT2NUM(c->row_index));
|
|
330
|
+
rb_hash_aset(kwargs, ID2SYM(rb_intern("cells")), c->current_row);
|
|
331
|
+
VALUE argv[1] = { kwargs };
|
|
332
|
+
VALUE row = rb_funcallv_kw(c->cRow, rb_intern("new"), 1, argv, RB_PASS_KEYWORDS);
|
|
333
|
+
rb_yield(row);
|
|
334
|
+
} else {
|
|
335
|
+
rb_ary_freeze(c->current_row);
|
|
336
|
+
rb_yield(c->current_row);
|
|
337
|
+
}
|
|
338
|
+
c->current_row = Qnil;
|
|
339
|
+
c->in_row = 0;
|
|
340
|
+
c->row_count++;
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
c->depth--;
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
static void on_characters(void *ctx, const xmlChar *ch, int len)
|
|
348
|
+
{
|
|
349
|
+
parse_ctx *c = (parse_ctx *)ctx;
|
|
350
|
+
if (!c->collecting || len <= 0) return;
|
|
351
|
+
if (!dynbuf_append(&c->text_buf, (const char *)ch, (size_t)len)) {
|
|
352
|
+
c->error = 1;
|
|
353
|
+
snprintf(c->error_msg, sizeof(c->error_msg),
|
|
354
|
+
"cell text exceeds %d byte limit", MAX_TOTAL_BYTES);
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
/* ------------------------------------------------------------------ */
|
|
359
|
+
/* Ensure-style cleanup wrapper */
|
|
360
|
+
/* ------------------------------------------------------------------ */
|
|
361
|
+
|
|
362
|
+
typedef struct {
|
|
363
|
+
parse_ctx *ctx;
|
|
364
|
+
xmlParserCtxtPtr parser;
|
|
365
|
+
const char *data;
|
|
366
|
+
long data_len;
|
|
367
|
+
} parse_args;
|
|
368
|
+
|
|
369
|
+
static VALUE do_parse(VALUE arg)
|
|
370
|
+
{
|
|
371
|
+
parse_args *a = (parse_args *)arg;
|
|
372
|
+
|
|
373
|
+
xmlParseChunk(a->parser, a->data, (int)a->data_len, 1 /* terminate */);
|
|
374
|
+
|
|
375
|
+
return Qnil;
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
static VALUE cleanup_parse(VALUE arg)
|
|
379
|
+
{
|
|
380
|
+
parse_args *a = (parse_args *)arg;
|
|
381
|
+
if (a->parser) {
|
|
382
|
+
xmlFreeParserCtxt(a->parser);
|
|
383
|
+
a->parser = NULL;
|
|
384
|
+
}
|
|
385
|
+
dynbuf_free(&a->ctx->text_buf);
|
|
386
|
+
dynbuf_free(&a->ctx->raw_buf);
|
|
387
|
+
dynbuf_free(&a->ctx->cell_ref);
|
|
388
|
+
return Qnil;
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
/* ------------------------------------------------------------------ */
|
|
392
|
+
/* Common parse setup */
|
|
393
|
+
/* ------------------------------------------------------------------ */
|
|
394
|
+
|
|
395
|
+
static VALUE run_parse(parse_ctx *ctx, VALUE xml_str)
|
|
396
|
+
{
|
|
397
|
+
xmlSAXHandler handler;
|
|
398
|
+
memset(&handler, 0, sizeof(handler));
|
|
399
|
+
handler.initialized = XML_SAX2_MAGIC;
|
|
400
|
+
handler.startElementNs = on_start_element;
|
|
401
|
+
handler.endElementNs = on_end_element;
|
|
402
|
+
handler.characters = on_characters;
|
|
403
|
+
|
|
404
|
+
xmlParserCtxtPtr parser = xmlCreatePushParserCtxt(
|
|
405
|
+
&handler, ctx, NULL, 0, NULL);
|
|
406
|
+
|
|
407
|
+
if (!parser) {
|
|
408
|
+
rb_raise(rb_eRuntimeError, "failed to create libxml2 parser context");
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
/* Disable network access and limit entity expansion */
|
|
412
|
+
xmlCtxtUseOptions(parser,
|
|
413
|
+
XML_PARSE_NONET | XML_PARSE_NOENT | XML_PARSE_HUGE);
|
|
414
|
+
|
|
415
|
+
parse_args args = { ctx, parser, RSTRING_PTR(xml_str), RSTRING_LEN(xml_str) };
|
|
416
|
+
|
|
417
|
+
/* rb_ensure guarantees cleanup even if rb_yield raises */
|
|
418
|
+
rb_ensure(do_parse, (VALUE)&args, cleanup_parse, (VALUE)&args);
|
|
419
|
+
|
|
420
|
+
if (ctx->error) {
|
|
421
|
+
rb_raise(rb_eRuntimeError, "rbxl_native: %s", ctx->error_msg);
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
return INT2NUM(ctx->row_count);
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
/* ------------------------------------------------------------------ */
|
|
428
|
+
/* Ruby method: Rbxl::Native.parse_sheet(xml_string, shared_strings) */
|
|
429
|
+
/* ------------------------------------------------------------------ */
|
|
430
|
+
|
|
431
|
+
static VALUE rb_native_parse(VALUE self, VALUE xml_str, VALUE shared_strings)
|
|
432
|
+
{
|
|
433
|
+
(void)self;
|
|
434
|
+
Check_Type(xml_str, T_STRING);
|
|
435
|
+
Check_Type(shared_strings, T_ARRAY);
|
|
436
|
+
|
|
437
|
+
parse_ctx ctx;
|
|
438
|
+
memset(&ctx, 0, sizeof(ctx));
|
|
439
|
+
ctx.shared_strings = shared_strings;
|
|
440
|
+
ctx.shared_strings_len = RARRAY_LEN(shared_strings);
|
|
441
|
+
ctx.current_row = Qnil;
|
|
442
|
+
ctx.full_mode = 0;
|
|
443
|
+
dynbuf_init(&ctx.text_buf);
|
|
444
|
+
dynbuf_init(&ctx.raw_buf);
|
|
445
|
+
|
|
446
|
+
return run_parse(&ctx, xml_str);
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
/* ------------------------------------------------------------------ */
|
|
450
|
+
/* Ruby method: Rbxl::Native.parse_sheet_full(xml_string, shared_strings) */
|
|
451
|
+
/* ------------------------------------------------------------------ */
|
|
452
|
+
|
|
453
|
+
static VALUE rb_native_parse_full(VALUE self, VALUE xml_str, VALUE shared_strings)
|
|
454
|
+
{
|
|
455
|
+
(void)self;
|
|
456
|
+
Check_Type(xml_str, T_STRING);
|
|
457
|
+
Check_Type(shared_strings, T_ARRAY);
|
|
458
|
+
|
|
459
|
+
VALUE mRbxl = rb_const_get(rb_cObject, rb_intern("Rbxl"));
|
|
460
|
+
|
|
461
|
+
parse_ctx ctx;
|
|
462
|
+
memset(&ctx, 0, sizeof(ctx));
|
|
463
|
+
ctx.shared_strings = shared_strings;
|
|
464
|
+
ctx.shared_strings_len = RARRAY_LEN(shared_strings);
|
|
465
|
+
ctx.current_row = Qnil;
|
|
466
|
+
ctx.full_mode = 1;
|
|
467
|
+
ctx.cReadOnlyCell = rb_const_get(mRbxl, rb_intern("ReadOnlyCell"));
|
|
468
|
+
ctx.cRow = rb_const_get(mRbxl, rb_intern("Row"));
|
|
469
|
+
dynbuf_init(&ctx.text_buf);
|
|
470
|
+
dynbuf_init(&ctx.raw_buf);
|
|
471
|
+
dynbuf_init(&ctx.cell_ref);
|
|
472
|
+
|
|
473
|
+
return run_parse(&ctx, xml_str);
|
|
474
|
+
}
|
|
475
|
+
|
|
476
|
+
/* ================================================================== */
|
|
477
|
+
/* Native writer — generate sheet XML from Ruby Array of Arrays */
|
|
478
|
+
/* ================================================================== */
|
|
479
|
+
|
|
480
|
+
/* Column name from 1-based index: 1→A, 26→Z, 27→AA, ... */
|
|
481
|
+
static void write_column_name(dynbuf *buf, int index)
|
|
482
|
+
{
|
|
483
|
+
char tmp[8]; /* max 3 letters for 16384 columns */
|
|
484
|
+
int pos = sizeof(tmp);
|
|
485
|
+
int cur = index;
|
|
486
|
+
|
|
487
|
+
while (cur > 0) {
|
|
488
|
+
cur--;
|
|
489
|
+
tmp[--pos] = (char)('A' + (cur % 26));
|
|
490
|
+
cur /= 26;
|
|
491
|
+
}
|
|
492
|
+
dynbuf_append(buf, tmp + pos, sizeof(tmp) - (size_t)pos);
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
static void __attribute__((noinline)) write_int(dynbuf *buf, long val)
|
|
496
|
+
{
|
|
497
|
+
char tmp[32];
|
|
498
|
+
int len = snprintf(tmp, sizeof(tmp), "%ld", val);
|
|
499
|
+
dynbuf_append(buf, tmp, (size_t)len);
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
/* XML-escape string and append to buf */
|
|
503
|
+
static void write_escaped(dynbuf *buf, const char *str, long slen)
|
|
504
|
+
{
|
|
505
|
+
const char *p = str;
|
|
506
|
+
const char *end = str + slen;
|
|
507
|
+
const char *seg_start = p;
|
|
508
|
+
|
|
509
|
+
while (p < end) {
|
|
510
|
+
const char *esc = NULL;
|
|
511
|
+
int esc_len = 0;
|
|
512
|
+
switch (*p) {
|
|
513
|
+
case '&': esc = "&"; esc_len = 5; break;
|
|
514
|
+
case '<': esc = "<"; esc_len = 4; break;
|
|
515
|
+
case '>': esc = ">"; esc_len = 4; break;
|
|
516
|
+
case '"': esc = """; esc_len = 6; break;
|
|
517
|
+
}
|
|
518
|
+
if (esc) {
|
|
519
|
+
if (p > seg_start) dynbuf_append(buf, seg_start, (size_t)(p - seg_start));
|
|
520
|
+
dynbuf_append(buf, esc, (size_t)esc_len);
|
|
521
|
+
seg_start = p + 1;
|
|
522
|
+
}
|
|
523
|
+
p++;
|
|
524
|
+
}
|
|
525
|
+
if (seg_start < end) dynbuf_append(buf, seg_start, (size_t)(end - seg_start));
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
/* Write a single cell */
|
|
529
|
+
static void write_cell(dynbuf *buf, int col, int row, VALUE value, VALUE cWriteOnlyCell)
|
|
530
|
+
{
|
|
531
|
+
#define W(s) dynbuf_append(buf, s, sizeof(s) - 1)
|
|
532
|
+
|
|
533
|
+
W("<c r=\"");
|
|
534
|
+
write_column_name(buf, col);
|
|
535
|
+
write_int(buf, row);
|
|
536
|
+
|
|
537
|
+
if (rb_obj_is_kind_of(value, cWriteOnlyCell)) {
|
|
538
|
+
VALUE cell_value = rb_funcall(value, rb_intern("value"), 0);
|
|
539
|
+
VALUE style_id = rb_funcall(value, rb_intern("style_id"), 0);
|
|
540
|
+
|
|
541
|
+
W("\"");
|
|
542
|
+
if (!NIL_P(style_id)) {
|
|
543
|
+
W(" s=\"");
|
|
544
|
+
write_int(buf, NUM2LONG(style_id));
|
|
545
|
+
W("\"");
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
if (NIL_P(cell_value)) {
|
|
549
|
+
W("/>");
|
|
550
|
+
} else if (cell_value == Qtrue) {
|
|
551
|
+
W(" t=\"b\"><v>1</v></c>");
|
|
552
|
+
} else if (cell_value == Qfalse) {
|
|
553
|
+
W(" t=\"b\"><v>0</v></c>");
|
|
554
|
+
} else if (RB_INTEGER_TYPE_P(cell_value)) {
|
|
555
|
+
W("><v>");
|
|
556
|
+
write_int(buf, NUM2LONG(cell_value));
|
|
557
|
+
W("</v></c>");
|
|
558
|
+
} else if (RB_FLOAT_TYPE_P(cell_value)) {
|
|
559
|
+
W("><v>");
|
|
560
|
+
VALUE fs = rb_funcall(cell_value, rb_intern("to_s"), 0);
|
|
561
|
+
dynbuf_append(buf, RSTRING_PTR(fs), (size_t)RSTRING_LEN(fs));
|
|
562
|
+
W("</v></c>");
|
|
563
|
+
} else {
|
|
564
|
+
VALUE s = rb_funcall(cell_value, rb_intern("to_s"), 0);
|
|
565
|
+
W(" t=\"inlineStr\"><is><t>");
|
|
566
|
+
write_escaped(buf, RSTRING_PTR(s), RSTRING_LEN(s));
|
|
567
|
+
W("</t></is></c>");
|
|
568
|
+
}
|
|
569
|
+
return;
|
|
570
|
+
}
|
|
571
|
+
|
|
572
|
+
if (NIL_P(value)) {
|
|
573
|
+
W("\"/>");
|
|
574
|
+
} else if (value == Qtrue) {
|
|
575
|
+
W("\" t=\"b\"><v>1</v></c>");
|
|
576
|
+
} else if (value == Qfalse) {
|
|
577
|
+
W("\" t=\"b\"><v>0</v></c>");
|
|
578
|
+
} else if (RB_INTEGER_TYPE_P(value)) {
|
|
579
|
+
W("\"><v>");
|
|
580
|
+
write_int(buf, NUM2LONG(value));
|
|
581
|
+
W("</v></c>");
|
|
582
|
+
} else if (RB_FLOAT_TYPE_P(value)) {
|
|
583
|
+
W("\"><v>");
|
|
584
|
+
/* Use Ruby's to_s for float to match Ruby path output exactly */
|
|
585
|
+
VALUE fs = rb_funcall(value, rb_intern("to_s"), 0);
|
|
586
|
+
dynbuf_append(buf, RSTRING_PTR(fs), (size_t)RSTRING_LEN(fs));
|
|
587
|
+
W("</v></c>");
|
|
588
|
+
} else {
|
|
589
|
+
/* String, Date, Time — call to_s */
|
|
590
|
+
VALUE s;
|
|
591
|
+
if (rb_respond_to(value, rb_intern("iso8601"))) {
|
|
592
|
+
s = rb_funcall(value, rb_intern("iso8601"), 0);
|
|
593
|
+
} else {
|
|
594
|
+
s = rb_funcall(value, rb_intern("to_s"), 0);
|
|
595
|
+
}
|
|
596
|
+
W("\" t=\"inlineStr\"><is><t>");
|
|
597
|
+
write_escaped(buf, RSTRING_PTR(s), RSTRING_LEN(s));
|
|
598
|
+
W("</t></is></c>");
|
|
599
|
+
}
|
|
600
|
+
|
|
601
|
+
#undef W
|
|
602
|
+
}
|
|
603
|
+
|
|
604
|
+
/*
|
|
605
|
+
* Rbxl::Native.generate_sheet(rows) → XML string
|
|
606
|
+
*
|
|
607
|
+
* rows: Array of Arrays, each inner array is a row of cell values
|
|
608
|
+
*/
|
|
609
|
+
static VALUE rb_native_generate(VALUE self, VALUE rows)
|
|
610
|
+
{
|
|
611
|
+
(void)self;
|
|
612
|
+
Check_Type(rows, T_ARRAY);
|
|
613
|
+
|
|
614
|
+
VALUE mRbxl = rb_const_get(rb_cObject, rb_intern("Rbxl"));
|
|
615
|
+
VALUE cWriteOnlyCell = rb_const_get(mRbxl, rb_intern("WriteOnlyCell"));
|
|
616
|
+
|
|
617
|
+
long num_rows = RARRAY_LEN(rows);
|
|
618
|
+
|
|
619
|
+
/* Find max columns for dimension ref */
|
|
620
|
+
int max_cols = 1;
|
|
621
|
+
for (long i = 0; i < num_rows; i++) {
|
|
622
|
+
VALUE row = rb_ary_entry(rows, i);
|
|
623
|
+
int len = (int)RARRAY_LEN(row);
|
|
624
|
+
if (len > max_cols) max_cols = len;
|
|
625
|
+
}
|
|
626
|
+
|
|
627
|
+
dynbuf buf;
|
|
628
|
+
dynbuf_init(&buf);
|
|
629
|
+
|
|
630
|
+
#define W(s) dynbuf_append(&buf, s, sizeof(s) - 1)
|
|
631
|
+
|
|
632
|
+
W("<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n"
|
|
633
|
+
"<worksheet xmlns=\"http://schemas.openxmlformats.org/spreadsheetml/2006/main\">\n"
|
|
634
|
+
" <dimension ref=\"A1:");
|
|
635
|
+
write_column_name(&buf, max_cols);
|
|
636
|
+
write_int(&buf, num_rows);
|
|
637
|
+
W("\"/>\n <sheetData>");
|
|
638
|
+
|
|
639
|
+
for (long i = 0; i < num_rows; i++) {
|
|
640
|
+
VALUE row = rb_ary_entry(rows, i);
|
|
641
|
+
Check_Type(row, T_ARRAY);
|
|
642
|
+
long row_num = i + 1;
|
|
643
|
+
long ncols = RARRAY_LEN(row);
|
|
644
|
+
|
|
645
|
+
W("<row r=\"");
|
|
646
|
+
write_int(&buf, row_num);
|
|
647
|
+
W("\">");
|
|
648
|
+
|
|
649
|
+
for (long j = 0; j < ncols; j++) {
|
|
650
|
+
write_cell(&buf, (int)(j + 1), (int)row_num, rb_ary_entry(row, j), cWriteOnlyCell);
|
|
651
|
+
}
|
|
652
|
+
|
|
653
|
+
W("</row>");
|
|
654
|
+
}
|
|
655
|
+
|
|
656
|
+
W("</sheetData>\n</worksheet>");
|
|
657
|
+
|
|
658
|
+
#undef W
|
|
659
|
+
|
|
660
|
+
VALUE result = make_utf8_str(buf.data, (long)buf.len);
|
|
661
|
+
dynbuf_free(&buf);
|
|
662
|
+
return result;
|
|
663
|
+
}
|
|
664
|
+
|
|
665
|
+
/* ------------------------------------------------------------------ */
|
|
666
|
+
/* Init */
|
|
667
|
+
/* ------------------------------------------------------------------ */
|
|
668
|
+
|
|
669
|
+
void Init_rbxl_native(void)
|
|
670
|
+
{
|
|
671
|
+
enc_utf8 = rb_utf8_encoding();
|
|
672
|
+
VALUE mRbxl = rb_define_module("Rbxl");
|
|
673
|
+
VALUE mNative = rb_define_module_under(mRbxl, "Native");
|
|
674
|
+
rb_define_module_function(mNative, "parse_sheet", rb_native_parse, 2);
|
|
675
|
+
rb_define_module_function(mNative, "parse_sheet_full", rb_native_parse_full, 2);
|
|
676
|
+
rb_define_module_function(mNative, "generate_sheet", rb_native_generate, 1);
|
|
677
|
+
}
|
data/lib/rbxl/cell.rb
ADDED