ox 2.14.3 → 2.14.7

Sign up to get free protection for your applications and to get access to all the features.
data/ext/ox/parse.c CHANGED
@@ -5,13 +5,16 @@
5
5
 
6
6
  #include <stdlib.h>
7
7
  #include <errno.h>
8
+ #include <stdbool.h>
8
9
  #include <stdio.h>
9
10
  #include <string.h>
11
+ #include <strings.h>
10
12
 
11
13
  #include "ruby.h"
12
14
  #include "ox.h"
13
15
  #include "err.h"
14
16
  #include "attr.h"
17
+ #include "intern.h"
15
18
  #include "helper.h"
16
19
  #include "special.h"
17
20
 
@@ -93,6 +96,30 @@ next_white(PInfo pi) {
93
96
  }
94
97
  }
95
98
 
99
+ static void fix_newlines(char *buf) {
100
+ #if HAVE_INDEX
101
+ if (NULL != index(buf, '\r')) {
102
+ #endif
103
+ char *s = buf;
104
+ char *d = buf;
105
+
106
+ for (; '\0' != *s; s++) {
107
+ if ('\r' == *s) {
108
+ if ('\n' == *(s + 1)) {
109
+ continue;
110
+ }
111
+ *s = '\n';
112
+ } else if (d < s) {
113
+ *d = *s;
114
+ }
115
+ d++;
116
+ }
117
+ *d = '\0';
118
+ #if HAVE_INDEX
119
+ }
120
+ #endif
121
+ }
122
+
96
123
  static void
97
124
  mark_pi_cb(void *ptr) {
98
125
  if (NULL != ptr) {
@@ -211,31 +238,11 @@ ox_parse(char *xml, size_t len, ParseCallbacks pcb, char **endp, Options options
211
238
  return pi.obj;
212
239
  }
213
240
 
214
- static char*
215
- gather_content(const char *src, char *content, size_t len) {
216
- for (; 0 < len; src++, content++, len--) {
217
- switch (*src) {
218
- case '?':
219
- if ('>' == *(src + 1)) {
220
- *content = '\0';
221
- return (char*)(src + 1);
222
- }
223
- *content = *src;
224
- break;
225
- case '\0':
226
- return 0;
227
- default:
228
- *content = *src;
229
- break;
230
- }
231
- }
232
- return 0;
233
- }
234
-
235
241
  // Entered after the "<?" sequence. Ready to read the rest.
236
242
  static void
237
243
  read_instruction(PInfo pi) {
238
- char content[1024];
244
+ char content[256];
245
+ char *content_ptr;
239
246
  struct _attrStack attrs;
240
247
  char *attr_name;
241
248
  char *attr_value;
@@ -243,7 +250,8 @@ read_instruction(PInfo pi) {
243
250
  char *end;
244
251
  char c;
245
252
  char *cend;
246
- int attrs_ok = 1;
253
+ size_t size;
254
+ bool attrs_ok = true;
247
255
 
248
256
  *content = '\0';
249
257
  attr_stack_init(&attrs);
@@ -251,10 +259,33 @@ read_instruction(PInfo pi) {
251
259
  return;
252
260
  }
253
261
  end = pi->s;
254
- if (0 == (cend = gather_content(pi->s, content, sizeof(content) - 1))) {
255
- set_error(&pi->err, "processing instruction content too large or not terminated", pi->str, pi->s);
256
- return;
262
+ for (; true; pi->s++) {
263
+ switch (*pi->s) {
264
+ case '?':
265
+ if ('>' == *(pi->s + 1)) {
266
+ pi->s++;
267
+ goto DONE;
268
+ }
269
+ break;
270
+ case '\0':
271
+ set_error(&pi->err, "processing instruction not terminated", pi->str, pi->s);
272
+ return;
273
+ default:
274
+ break;
275
+ }
257
276
  }
277
+ DONE:
278
+ cend = pi->s;
279
+ size = cend - end - 1;
280
+ pi->s = end;
281
+ if (size < sizeof(content)) {
282
+ content_ptr = content;
283
+ } else {
284
+ content_ptr = ALLOC_N(char, size + 1);
285
+ }
286
+ memcpy(content_ptr, end, size);
287
+ content_ptr[size] = '\0';
288
+
258
289
  next_non_white(pi);
259
290
  c = *pi->s;
260
291
  *end = '\0'; // terminate name
@@ -274,7 +305,7 @@ read_instruction(PInfo pi) {
274
305
  end = pi->s;
275
306
  next_non_white(pi);
276
307
  if ('=' != *pi->s++) {
277
- attrs_ok = 0;
308
+ attrs_ok = false;
278
309
  break;
279
310
  }
280
311
  *end = '\0'; // terminate name
@@ -311,10 +342,13 @@ read_instruction(PInfo pi) {
311
342
  if (attrs_ok) {
312
343
  pi->pcb->instruct(pi, target, attrs.head, 0);
313
344
  } else {
314
- pi->pcb->instruct(pi, target, attrs.head, content);
345
+ pi->pcb->instruct(pi, target, attrs.head, content_ptr);
315
346
  }
316
347
  }
317
348
  attr_stack_cleanup(&attrs);
349
+ if (content_ptr != content) {
350
+ xfree(content_ptr);
351
+ }
318
352
  }
319
353
 
320
354
  static void
@@ -361,10 +395,10 @@ read_delimited(PInfo pi, char end) {
361
395
  // that. Ready to read the rest.
362
396
  static void
363
397
  read_doctype(PInfo pi) {
364
- char *docType;
398
+ char *doctype;
365
399
 
366
400
  next_non_white(pi);
367
- docType = pi->s;
401
+ doctype = pi->s;
368
402
  read_delimited(pi, '>');
369
403
  if (err_has(&pi->err)) {
370
404
  return;
@@ -373,7 +407,8 @@ read_doctype(PInfo pi) {
373
407
  *pi->s = '\0';
374
408
  pi->s++;
375
409
  if (0 != pi->pcb->add_doctype) {
376
- pi->pcb->add_doctype(pi, docType);
410
+ fix_newlines(doctype);
411
+ pi->pcb->add_doctype(pi, doctype);
377
412
  }
378
413
  }
379
414
 
@@ -409,6 +444,7 @@ read_comment(PInfo pi) {
409
444
  *end = '\0'; // in case the comment was blank
410
445
  pi->s = end + 3;
411
446
  if (0 != pi->pcb->add_comment) {
447
+ fix_newlines(comment);
412
448
  pi->pcb->add_comment(pi, comment);
413
449
  }
414
450
  }
@@ -769,9 +805,11 @@ read_text(PInfo pi) {
769
805
  }
770
806
  *b = '\0';
771
807
  if (0 != alloc_buf) {
808
+ fix_newlines(alloc_buf);
772
809
  pi->pcb->add_text(pi, alloc_buf, ('/' == *(pi->s + 1)));
773
810
  xfree(alloc_buf);
774
811
  } else {
812
+ fix_newlines(buf);
775
813
  pi->pcb->add_text(pi, buf, ('/' == *(pi->s + 1)));
776
814
  }
777
815
  }
@@ -838,9 +876,11 @@ read_reduced_text(PInfo pi) {
838
876
  }
839
877
  *b = '\0';
840
878
  if (0 != alloc_buf) {
879
+ fix_newlines(alloc_buf);
841
880
  pi->pcb->add_text(pi, alloc_buf, ('/' == *(pi->s + 1)));
842
881
  xfree(alloc_buf);
843
882
  } else {
883
+ fix_newlines(buf);
844
884
  pi->pcb->add_text(pi, buf, ('/' == *(pi->s + 1)));
845
885
  }
846
886
  }
@@ -899,6 +939,7 @@ read_cdata(PInfo pi) {
899
939
  *end = '\0';
900
940
  pi->s = end + 3;
901
941
  if (0 != pi->pcb->add_cdata) {
942
+ fix_newlines(start);
902
943
  pi->pcb->add_cdata(pi, start, end - start);
903
944
  }
904
945
  }