ox 2.14.3 → 2.14.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ext/ox/parse.c CHANGED
@@ -5,13 +5,16 @@
5
5
 
6
6
  #include <stdlib.h>
7
7
  #include <errno.h>
8
+ #include <stdbool.h>
8
9
  #include <stdio.h>
9
10
  #include <string.h>
11
+ #include <strings.h>
10
12
 
11
13
  #include "ruby.h"
12
14
  #include "ox.h"
13
15
  #include "err.h"
14
16
  #include "attr.h"
17
+ #include "intern.h"
15
18
  #include "helper.h"
16
19
  #include "special.h"
17
20
 
@@ -93,6 +96,30 @@ next_white(PInfo pi) {
93
96
  }
94
97
  }
95
98
 
99
+ static void fix_newlines(char *buf) {
100
+ #if HAVE_INDEX
101
+ if (NULL != index(buf, '\r')) {
102
+ #endif
103
+ char *s = buf;
104
+ char *d = buf;
105
+
106
+ for (; '\0' != *s; s++) {
107
+ if ('\r' == *s) {
108
+ if ('\n' == *(s + 1)) {
109
+ continue;
110
+ }
111
+ *s = '\n';
112
+ } else if (d < s) {
113
+ *d = *s;
114
+ }
115
+ d++;
116
+ }
117
+ *d = '\0';
118
+ #if HAVE_INDEX
119
+ }
120
+ #endif
121
+ }
122
+
96
123
  static void
97
124
  mark_pi_cb(void *ptr) {
98
125
  if (NULL != ptr) {
@@ -211,31 +238,11 @@ ox_parse(char *xml, size_t len, ParseCallbacks pcb, char **endp, Options options
211
238
  return pi.obj;
212
239
  }
213
240
 
214
- static char*
215
- gather_content(const char *src, char *content, size_t len) {
216
- for (; 0 < len; src++, content++, len--) {
217
- switch (*src) {
218
- case '?':
219
- if ('>' == *(src + 1)) {
220
- *content = '\0';
221
- return (char*)(src + 1);
222
- }
223
- *content = *src;
224
- break;
225
- case '\0':
226
- return 0;
227
- default:
228
- *content = *src;
229
- break;
230
- }
231
- }
232
- return 0;
233
- }
234
-
235
241
  // Entered after the "<?" sequence. Ready to read the rest.
236
242
  static void
237
243
  read_instruction(PInfo pi) {
238
- char content[1024];
244
+ char content[256];
245
+ char *content_ptr;
239
246
  struct _attrStack attrs;
240
247
  char *attr_name;
241
248
  char *attr_value;
@@ -243,7 +250,8 @@ read_instruction(PInfo pi) {
243
250
  char *end;
244
251
  char c;
245
252
  char *cend;
246
- int attrs_ok = 1;
253
+ size_t size;
254
+ bool attrs_ok = true;
247
255
 
248
256
  *content = '\0';
249
257
  attr_stack_init(&attrs);
@@ -251,10 +259,33 @@ read_instruction(PInfo pi) {
251
259
  return;
252
260
  }
253
261
  end = pi->s;
254
- if (0 == (cend = gather_content(pi->s, content, sizeof(content) - 1))) {
255
- set_error(&pi->err, "processing instruction content too large or not terminated", pi->str, pi->s);
256
- return;
262
+ for (; true; pi->s++) {
263
+ switch (*pi->s) {
264
+ case '?':
265
+ if ('>' == *(pi->s + 1)) {
266
+ pi->s++;
267
+ goto DONE;
268
+ }
269
+ break;
270
+ case '\0':
271
+ set_error(&pi->err, "processing instruction not terminated", pi->str, pi->s);
272
+ return;
273
+ default:
274
+ break;
275
+ }
257
276
  }
277
+ DONE:
278
+ cend = pi->s;
279
+ size = cend - end - 1;
280
+ pi->s = end;
281
+ if (size < sizeof(content)) {
282
+ content_ptr = content;
283
+ } else {
284
+ content_ptr = ALLOC_N(char, size + 1);
285
+ }
286
+ memcpy(content_ptr, end, size);
287
+ content_ptr[size] = '\0';
288
+
258
289
  next_non_white(pi);
259
290
  c = *pi->s;
260
291
  *end = '\0'; // terminate name
@@ -274,7 +305,7 @@ read_instruction(PInfo pi) {
274
305
  end = pi->s;
275
306
  next_non_white(pi);
276
307
  if ('=' != *pi->s++) {
277
- attrs_ok = 0;
308
+ attrs_ok = false;
278
309
  break;
279
310
  }
280
311
  *end = '\0'; // terminate name
@@ -311,10 +342,13 @@ read_instruction(PInfo pi) {
311
342
  if (attrs_ok) {
312
343
  pi->pcb->instruct(pi, target, attrs.head, 0);
313
344
  } else {
314
- pi->pcb->instruct(pi, target, attrs.head, content);
345
+ pi->pcb->instruct(pi, target, attrs.head, content_ptr);
315
346
  }
316
347
  }
317
348
  attr_stack_cleanup(&attrs);
349
+ if (content_ptr != content) {
350
+ xfree(content_ptr);
351
+ }
318
352
  }
319
353
 
320
354
  static void
@@ -361,10 +395,10 @@ read_delimited(PInfo pi, char end) {
361
395
  // that. Ready to read the rest.
362
396
  static void
363
397
  read_doctype(PInfo pi) {
364
- char *docType;
398
+ char *doctype;
365
399
 
366
400
  next_non_white(pi);
367
- docType = pi->s;
401
+ doctype = pi->s;
368
402
  read_delimited(pi, '>');
369
403
  if (err_has(&pi->err)) {
370
404
  return;
@@ -373,7 +407,8 @@ read_doctype(PInfo pi) {
373
407
  *pi->s = '\0';
374
408
  pi->s++;
375
409
  if (0 != pi->pcb->add_doctype) {
376
- pi->pcb->add_doctype(pi, docType);
410
+ fix_newlines(doctype);
411
+ pi->pcb->add_doctype(pi, doctype);
377
412
  }
378
413
  }
379
414
 
@@ -409,6 +444,7 @@ read_comment(PInfo pi) {
409
444
  *end = '\0'; // in case the comment was blank
410
445
  pi->s = end + 3;
411
446
  if (0 != pi->pcb->add_comment) {
447
+ fix_newlines(comment);
412
448
  pi->pcb->add_comment(pi, comment);
413
449
  }
414
450
  }
@@ -769,9 +805,11 @@ read_text(PInfo pi) {
769
805
  }
770
806
  *b = '\0';
771
807
  if (0 != alloc_buf) {
808
+ fix_newlines(alloc_buf);
772
809
  pi->pcb->add_text(pi, alloc_buf, ('/' == *(pi->s + 1)));
773
810
  xfree(alloc_buf);
774
811
  } else {
812
+ fix_newlines(buf);
775
813
  pi->pcb->add_text(pi, buf, ('/' == *(pi->s + 1)));
776
814
  }
777
815
  }
@@ -838,9 +876,11 @@ read_reduced_text(PInfo pi) {
838
876
  }
839
877
  *b = '\0';
840
878
  if (0 != alloc_buf) {
879
+ fix_newlines(alloc_buf);
841
880
  pi->pcb->add_text(pi, alloc_buf, ('/' == *(pi->s + 1)));
842
881
  xfree(alloc_buf);
843
882
  } else {
883
+ fix_newlines(buf);
844
884
  pi->pcb->add_text(pi, buf, ('/' == *(pi->s + 1)));
845
885
  }
846
886
  }
@@ -899,6 +939,7 @@ read_cdata(PInfo pi) {
899
939
  *end = '\0';
900
940
  pi->s = end + 3;
901
941
  if (0 != pi->pcb->add_cdata) {
942
+ fix_newlines(start);
902
943
  pi->pcb->add_cdata(pi, start, end - start);
903
944
  }
904
945
  }