ox 2.13.2 → 2.14.2

Sign up to get free protection for your applications and to get access to all the features.
data/ext/ox/ox.h CHANGED
@@ -16,20 +16,15 @@ extern "C" {
16
16
  #define RSTRING_NOT_MODIFIED
17
17
 
18
18
  #include "ruby.h"
19
- #if HAS_ENCODING_SUPPORT
19
+ #if HAVE_RB_ENC_ASSOCIATE
20
20
  #include "ruby/encoding.h"
21
21
  #endif
22
22
 
23
- #ifdef RUBINIUS_RUBY
24
- #undef T_COMPLEX
25
- enum st_retval {ST_CONTINUE = 0, ST_STOP = 1, ST_DELETE = 2, ST_CHECK};
23
+ #if HAVE_RUBY_ST_H
24
+ #include "ruby/st.h"
26
25
  #else
27
- #if HAS_TOP_LEVEL_ST_H
28
- /* Only on travis, local is where it is for all others. Seems to vary depending on the travis machine picked up. */
26
+ // Only on travis, local is where it is for all others. Seems to vary depending on the travis machine picked up.
29
27
  #include "st.h"
30
- #else
31
- #include "ruby/st.h"
32
- #endif
33
28
  #endif
34
29
 
35
30
  #include "cache.h"
@@ -141,15 +136,14 @@ typedef struct _options {
141
136
  char convert_special;// boolean true or false
142
137
  char allow_invalid; // YesNo
143
138
  char no_empty; // boolean - no empty elements when dumping
139
+ char with_cdata; // boolean - hash_load should include cdata
144
140
  char inv_repl[12]; // max 10 valid characters, first character is the length
145
141
  char strip_ns[64]; // namespace to strip, \0 is no-strip, \* is all, else only matches
146
142
  struct _hints *html_hints; // html hints
147
143
  VALUE attr_key_mod;
148
144
  VALUE element_key_mod;
149
- #if HAS_ENCODING_SUPPORT
145
+ #if HAVE_RB_ENC_ASSOCIATE
150
146
  rb_encoding *rb_enc;
151
- #elif HAS_PRIVATE_ENCODING
152
- VALUE rb_enc;
153
147
  #else
154
148
  void *rb_enc;
155
149
  #endif
@@ -236,10 +230,8 @@ extern ID ox_tv_nsec_id;
236
230
  extern ID ox_tv_usec_id;
237
231
  extern ID ox_value_id;
238
232
 
239
- #if HAS_ENCODING_SUPPORT
233
+ #if HAVE_RB_ENC_ASSOCIATE
240
234
  extern rb_encoding *ox_utf8_encoding;
241
- #elif HAS_PRIVATE_ENCODING
242
- extern VALUE ox_utf8_encoding;
243
235
  #else
244
236
  extern void *ox_utf8_encoding;
245
237
  #endif
data/ext/ox/parse.c CHANGED
@@ -441,7 +441,6 @@ read_element(PInfo pi) {
441
441
  // empty element, no attributes and no children
442
442
  pi->s++;
443
443
  if ('>' != *pi->s) {
444
- /*printf("*** '%s' ***\n", pi->s); */
445
444
  attr_stack_cleanup(&attrs);
446
445
  set_error(&pi->err, "invalid format, element not closed", pi->str, pi->s);
447
446
  return 0;
@@ -479,8 +478,8 @@ read_element(PInfo pi) {
479
478
  pi->s++;
480
479
  pi->pcb->add_element(pi, ename, attrs.head, hasChildren);
481
480
  pi->pcb->end_element(pi, ename);
482
-
483
481
  attr_stack_cleanup(&attrs);
482
+
484
483
  return 0;
485
484
  case '>':
486
485
  /* has either children or a value */
@@ -1033,18 +1032,9 @@ read_coded_chars(PInfo pi, char *text) {
1033
1032
  } else {
1034
1033
  if (u <= 0x000000000000007FULL) {
1035
1034
  *text++ = (char)u;
1036
- #if HAS_PRIVATE_ENCODING
1037
- } else if (ox_utf8_encoding == pi->options->rb_enc ||
1038
- 0 == strcasecmp(rb_str_ptr(rb_String(ox_utf8_encoding)), rb_str_ptr(rb_String(pi->options->rb_enc)))) {
1039
- #else
1040
1035
  } else if (ox_utf8_encoding == pi->options->rb_enc) {
1041
- #endif
1042
1036
  text = ox_ucs_to_utf8_chars(text, u);
1043
- #if HAS_PRIVATE_ENCODING
1044
- } else if (Qnil == pi->options->rb_enc) {
1045
- #else
1046
1037
  } else if (0 == pi->options->rb_enc) {
1047
- #endif
1048
1038
  pi->options->rb_enc = ox_utf8_encoding;
1049
1039
  text = ox_ucs_to_utf8_chars(text, u);
1050
1040
  } else if (TolerantEffort == pi->options->effort) {
@@ -1110,19 +1100,10 @@ collapse_special(PInfo pi, char *str) {
1110
1100
  }
1111
1101
  if (u <= 0x000000000000007FULL) {
1112
1102
  *b++ = (char)u;
1113
- #if HAS_PRIVATE_ENCODING
1114
- } else if (ox_utf8_encoding == pi->options->rb_enc ||
1115
- 0 == strcasecmp(rb_str_ptr(rb_String(ox_utf8_encoding)), rb_str_ptr(rb_String(pi->options->rb_enc)))) {
1116
- #else
1117
1103
  } else if (ox_utf8_encoding == pi->options->rb_enc) {
1118
- #endif
1119
1104
  b = ox_ucs_to_utf8_chars(b, u);
1120
1105
  /* TBD support UTF-16 */
1121
- #if HAS_PRIVATE_ENCODING
1122
- } else if (Qnil == pi->options->rb_enc) {
1123
- #else
1124
1106
  } else if (0 == pi->options->rb_enc) {
1125
- #endif
1126
1107
  pi->options->rb_enc = ox_utf8_encoding;
1127
1108
  b = ox_ucs_to_utf8_chars(b, u);
1128
1109
  } else {
data/ext/ox/sax.c CHANGED
@@ -9,13 +9,16 @@
9
9
  #include <stdio.h>
10
10
  #include <strings.h>
11
11
  #include <sys/types.h>
12
- #if NEEDS_UIO
12
+ #if HAVE_SYS_UIO_H
13
13
  #include <sys/uio.h>
14
14
  #endif
15
15
  #include <unistd.h>
16
16
  #include <time.h>
17
17
 
18
18
  #include "ruby.h"
19
+ #if HAVE_RB_ENC_ASSOCIATE
20
+ #include "ruby/encoding.h"
21
+ #endif
19
22
  #include "ox.h"
20
23
  #include "sax.h"
21
24
  #include "sax_stack.h"
@@ -68,9 +71,9 @@ static VALUE protect_parse(VALUE drp) {
68
71
  return Qnil;
69
72
  }
70
73
 
71
- #if HAS_ENCODING_SUPPORT || HAS_PRIVATE_ENCODING
74
+ #if HAVE_RB_ENC_ASSOCIATE
72
75
  static int
73
- strIsAscii(const char *s) {
76
+ str_is_ascii(const char *s) {
74
77
  for (; '\0' != *s; s++) {
75
78
  if (*s < ' ' || '~' < *s) {
76
79
  return 0;
@@ -87,8 +90,8 @@ str2sym(SaxDrive dr, const char *str, const char **strp) {
87
90
 
88
91
  if (dr->options.symbolize) {
89
92
  if (Qundef == (sym = ox_cache_get(ox_symbol_cache, str, &slot, strp))) {
90
- #if HAS_ENCODING_SUPPORT
91
- if (0 != dr->encoding && !strIsAscii(str)) {
93
+ #if HAVE_RB_ENC_ASSOCIATE
94
+ if (0 != dr->encoding && !str_is_ascii(str)) {
92
95
  VALUE rstr = rb_str_new2(str);
93
96
 
94
97
  // TBD if sym can be pinned down then use this all the time
@@ -99,20 +102,6 @@ str2sym(SaxDrive dr, const char *str, const char **strp) {
99
102
  sym = ID2SYM(rb_intern(str));
100
103
  *slot = sym;
101
104
  }
102
- #elif HAS_PRIVATE_ENCODING
103
- if (Qnil != dr->encoding && !strIsAscii(str)) {
104
- VALUE rstr = rb_str_new2(str);
105
-
106
- rb_funcall(rstr, ox_force_encoding_id, 1, dr->encoding);
107
- sym = rb_funcall(rstr, ox_to_sym_id, 0);
108
- // Needed for Ruby 2.2 to get around the GC of symbols created
109
- // with to_sym which is needed for encoded symbols.
110
- rb_ary_push(ox_sym_bank, sym);
111
- *slot = Qundef;
112
- } else {
113
- sym = ID2SYM(rb_intern(str));
114
- *slot = sym;
115
- }
116
105
  #else
117
106
  sym = ID2SYM(rb_intern(str));
118
107
  *slot = sym;
@@ -120,14 +109,10 @@ str2sym(SaxDrive dr, const char *str, const char **strp) {
120
109
  }
121
110
  } else {
122
111
  sym = rb_str_new2(str);
123
- #if HAS_ENCODING_SUPPORT
112
+ #if HAVE_RB_ENC_ASSOCIATE
124
113
  if (0 != dr->encoding) {
125
114
  rb_enc_associate(sym, dr->encoding);
126
115
  }
127
- #elif HAS_PRIVATE_ENCODING
128
- if (Qnil != dr->encoding) {
129
- rb_funcall(sym, ox_force_encoding_id, 1, dr->encoding);
130
- }
131
116
  #endif
132
117
  if (0 != strp) {
133
118
  *strp = StringValuePtr(sym);
@@ -182,7 +167,7 @@ sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, SaxOptions options) {
182
167
  dr->blocked = 0;
183
168
  dr->abort = false;
184
169
  has_init(&dr->has, handler);
185
- #if HAS_ENCODING_SUPPORT
170
+ #if HAVE_RB_ENC_FIND
186
171
  if ('\0' == *ox_default_options.encoding) {
187
172
  VALUE encoding;
188
173
 
@@ -196,18 +181,6 @@ sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, SaxOptions options) {
196
181
  } else {
197
182
  dr->encoding = rb_enc_find(ox_default_options.encoding);
198
183
  }
199
- #elif HAS_PRIVATE_ENCODING
200
- if ('\0' == *ox_default_options.encoding) {
201
- VALUE encoding;
202
-
203
- if (rb_respond_to(io, ox_external_encoding_id) && Qnil != (encoding = rb_funcall(io, ox_external_encoding_id, 0))) {
204
- dr->encoding = encoding;
205
- } else {
206
- dr->encoding = Qnil;
207
- }
208
- } else {
209
- dr->encoding = rb_str_new2(ox_default_options.encoding);
210
- }
211
184
  #else
212
185
  dr->encoding = 0;
213
186
  #endif
@@ -255,9 +228,7 @@ skipBOM(SaxDrive dr) {
255
228
 
256
229
  if (0xEF == (uint8_t)c) { /* only UTF8 is supported */
257
230
  if (0xBB == (uint8_t)buf_get(&dr->buf) && 0xBF == (uint8_t)buf_get(&dr->buf)) {
258
- #if HAS_ENCODING_SUPPORT
259
- dr->encoding = ox_utf8_encoding;
260
- #elif HAS_PRIVATE_ENCODING
231
+ #if HAVE_RB_ENC_FIND
261
232
  dr->encoding = ox_utf8_encoding;
262
233
  #else
263
234
  dr->encoding = UTF8_STR;
@@ -364,14 +335,10 @@ parse(SaxDrive dr) {
364
335
  off_t col = dr->buf.col - 1;
365
336
 
366
337
  args[0] = rb_str_new2("");
367
- #if HAS_ENCODING_SUPPORT
338
+ #if HAVE_RB_ENC_ASSOCIATE
368
339
  if (0 != dr->encoding) {
369
340
  rb_enc_associate(args[0], dr->encoding);
370
341
  }
371
- #elif HAS_PRIVATE_ENCODING
372
- if (Qnil != dr->encoding) {
373
- rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
374
- }
375
342
  #endif
376
343
  if (dr->has.pos) {
377
344
  rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
@@ -526,14 +493,10 @@ read_instruction(SaxDrive dr) {
526
493
  ox_sax_collapse_special(dr, content, (int)pos, (int)line, (int)col);
527
494
  }
528
495
  args[0] = rb_str_new2(content);
529
- #if HAS_ENCODING_SUPPORT
496
+ #if HAVE_RB_ENC_ASSOCIATE
530
497
  if (0 != dr->encoding) {
531
498
  rb_enc_associate(args[0], dr->encoding);
532
499
  }
533
- #elif HAS_PRIVATE_ENCODING
534
- if (Qnil != dr->encoding) {
535
- rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
536
- }
537
500
  #endif
538
501
  if (dr->has.line) {
539
502
  rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
@@ -732,14 +695,10 @@ read_cdata(SaxDrive dr) {
732
695
  VALUE args[1];
733
696
 
734
697
  args[0] = rb_str_new2(dr->buf.str);
735
- #if HAS_ENCODING_SUPPORT
698
+ #if HAVE_RB_ENC_ASSOCIATE
736
699
  if (0 != dr->encoding) {
737
700
  rb_enc_associate(args[0], dr->encoding);
738
701
  }
739
- #elif HAS_PRIVATE_ENCODING
740
- if (Qnil != dr->encoding) {
741
- rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
742
- }
743
702
  #endif
744
703
  if (dr->has.pos) {
745
704
  rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
@@ -826,14 +785,10 @@ read_comment(SaxDrive dr) {
826
785
  (NULL != h && (ActiveOverlay == h->overlay || ActiveOverlay == h->overlay))) {
827
786
 
828
787
  args[0] = rb_str_new2(dr->buf.str);
829
- #if HAS_ENCODING_SUPPORT
788
+ #if HAVE_RB_ENC_ASSOCIATE
830
789
  if (0 != dr->encoding) {
831
790
  rb_enc_associate(args[0], dr->encoding);
832
791
  }
833
- #elif HAS_PRIVATE_ENCODING
834
- if (Qnil != dr->encoding) {
835
- rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
836
- }
837
792
  #endif
838
793
  if (dr->has.pos) {
839
794
  rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
@@ -1158,14 +1113,10 @@ read_text(SaxDrive dr) {
1158
1113
  ((NoSkip == dr->options.skip && !isEnd) ||
1159
1114
  (OffSkip == dr->options.skip))) {
1160
1115
  args[0] = rb_str_new2(dr->buf.str);
1161
- #if HAS_ENCODING_SUPPORT
1116
+ #if HAVE_RB_ENC_ASSOCIATE
1162
1117
  if (0 != dr->encoding) {
1163
1118
  rb_enc_associate(args[0], dr->encoding);
1164
1119
  }
1165
- #elif HAS_PRIVATE_ENCODING
1166
- if (Qnil != dr->encoding) {
1167
- rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
1168
- }
1169
1120
  #endif
1170
1121
  if (dr->has.pos) {
1171
1122
  rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
@@ -1213,14 +1164,10 @@ read_text(SaxDrive dr) {
1213
1164
  break;
1214
1165
  }
1215
1166
  args[0] = rb_str_new2(dr->buf.str);
1216
- #if HAS_ENCODING_SUPPORT
1167
+ #if HAVE_RB_ENC_ASSOCIATE
1217
1168
  if (0 != dr->encoding) {
1218
1169
  rb_enc_associate(args[0], dr->encoding);
1219
1170
  }
1220
- #elif HAS_PRIVATE_ENCODING
1221
- if (Qnil != dr->encoding) {
1222
- rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
1223
- }
1224
1171
  #endif
1225
1172
  if (dr->has.pos) {
1226
1173
  rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
@@ -1299,14 +1246,10 @@ read_jump(SaxDrive dr, const char *pat) {
1299
1246
  // TBD check parent overlay
1300
1247
  if (dr->has.text && !dr->blocked) {
1301
1248
  args[0] = rb_str_new2(dr->buf.str);
1302
- #if HAS_ENCODING_SUPPORT
1249
+ #if HAVE_RB_ENC_ASSOCIATE
1303
1250
  if (0 != dr->encoding) {
1304
1251
  rb_enc_associate(args[0], dr->encoding);
1305
1252
  }
1306
- #elif HAS_PRIVATE_ENCODING
1307
- if (Qnil != dr->encoding) {
1308
- rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
1309
- }
1310
1253
  #endif
1311
1254
  if (dr->has.pos) {
1312
1255
  rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
@@ -1377,10 +1320,8 @@ read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, int eq_req,
1377
1320
  c = read_quoted_value(dr);
1378
1321
  attr_value = dr->buf.str;
1379
1322
  if (is_encoding) {
1380
- #if HAS_ENCODING_SUPPORT
1323
+ #if HAVE_RB_ENC_FIND
1381
1324
  dr->encoding = rb_enc_find(dr->buf.str);
1382
- #elif HAS_PRIVATE_ENCODING
1383
- dr->encoding = rb_str_new2(dr->buf.str);
1384
1325
  #else
1385
1326
  dr->encoding = dr->buf.str;
1386
1327
  #endif
@@ -1411,14 +1352,10 @@ read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, int eq_req,
1411
1352
  ox_sax_collapse_special(dr, dr->buf.str, pos, line, col);
1412
1353
  }
1413
1354
  args[1] = rb_str_new2(attr_value);
1414
- #if HAS_ENCODING_SUPPORT
1355
+ #if HAVE_RB_ENC_ASSOCIATE
1415
1356
  if (0 != dr->encoding) {
1416
1357
  rb_enc_associate(args[1], dr->encoding);
1417
1358
  }
1418
- #elif HAS_PRIVATE_ENCODING
1419
- if (Qnil != dr->encoding) {
1420
- rb_funcall(args[1], ox_force_encoding_id, 1, dr->encoding);
1421
- }
1422
1359
  #endif
1423
1360
  if (dr->has.pos) {
1424
1361
  rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
@@ -1614,19 +1551,12 @@ ox_sax_collapse_special(SaxDrive dr, char *str, long pos, long line, long col) {
1614
1551
  }
1615
1552
  if (u <= 0x000000000000007FULL) {
1616
1553
  *b++ = (char)u;
1617
- #if HAS_ENCODING_SUPPORT
1554
+ #if HAVE_RB_ENC_FIND
1618
1555
  } else if (ox_utf8_encoding == dr->encoding) {
1619
1556
  b = ox_ucs_to_utf8_chars(b, u);
1620
1557
  } else if (0 == dr->encoding) {
1621
1558
  dr->encoding = ox_utf8_encoding;
1622
1559
  b = ox_ucs_to_utf8_chars(b, u);
1623
- #elif HAS_PRIVATE_ENCODING
1624
- } else if (ox_utf8_encoding == dr->encoding ||
1625
- 0 == strcasecmp(rb_str_ptr(rb_String(ox_utf8_encoding)), rb_str_ptr(rb_String(dr->encoding)))) {
1626
- b = ox_ucs_to_utf8_chars(b, u);
1627
- } else if (Qnil == dr->encoding) {
1628
- dr->encoding = ox_utf8_encoding;
1629
- b = ox_ucs_to_utf8_chars(b, u);
1630
1560
  #else
1631
1561
  } else if (0 == dr->encoding) {
1632
1562
  dr->encoding = UTF8_STR;
data/ext/ox/sax.h CHANGED
@@ -33,10 +33,8 @@ typedef struct _saxDrive {
33
33
  int blocked;
34
34
  bool abort;
35
35
  struct _has has;
36
- #if HAS_ENCODING_SUPPORT
36
+ #if HAVE_RB_ENC_ASSOCIATE
37
37
  rb_encoding *encoding;
38
- #elif HAS_PRIVATE_ENCODING
39
- VALUE encoding;
40
38
  #else
41
39
  const char *encoding;
42
40
  #endif
data/ext/ox/sax_as.c CHANGED
@@ -8,7 +8,7 @@
8
8
  #include <stdio.h>
9
9
  #include <strings.h>
10
10
  #include <sys/types.h>
11
- #if NEEDS_UIO
11
+ #if HAVE_SYS_UIO_H
12
12
  #include <sys/uio.h>
13
13
  #endif
14
14
  #include <unistd.h>
@@ -136,14 +136,10 @@ sax_value_as_s(VALUE self) {
136
136
  break;
137
137
  }
138
138
  rs = rb_str_new2(dr->buf.str);
139
- #if HAS_ENCODING_SUPPORT
139
+ #if HAVE_RB_ENC_ASSOCIATE
140
140
  if (0 != dr->encoding) {
141
141
  rb_enc_associate(rs, dr->encoding);
142
142
  }
143
- #elif HAS_PRIVATE_ENCODING
144
- if (Qnil != dr->encoding) {
145
- rb_funcall(rs, ox_force_encoding_id, 1, dr->encoding);
146
- }
147
143
  #endif
148
144
  return rs;
149
145
  }
data/ext/ox/sax_buf.c CHANGED
@@ -8,7 +8,7 @@
8
8
  #include <stdio.h>
9
9
  #include <strings.h>
10
10
  #include <sys/types.h>
11
- #if NEEDS_UIO
11
+ #if HAVE_SYS_UIO_H
12
12
  #include <sys/uio.h>
13
13
  #endif
14
14
  #include <unistd.h>
data/lib/ox/element.rb CHANGED
@@ -115,17 +115,8 @@ module Ox
115
115
  # matching name will be yielded to. If the cond is a Hash then the
116
116
  # keys-value pairs in the cond must match the child attribute values with
117
117
  # the same keys. Any other cond type will yield to nothing.
118
- def each(cond=nil)
119
- if cond.nil?
120
- nodes.each { |n| yield(n) }
121
- else
122
- cond = cond.to_s if cond.is_a?(Symbol)
123
- if cond.is_a?(String)
124
- nodes.each { |n| yield(n) if n.is_a?(Element) && cond == n.name }
125
- elsif cond.is_a?(Hash)
126
- nodes.each { |n| yield(n) if n.is_a?(Element) && n.attr_match(cond) }
127
- end
128
- end
118
+ def each(cond=nil, &block)
119
+ build_enumerator(cond).each(&block)
129
120
  end
130
121
 
131
122
  # Returns an array of Nodes or Strings that correspond to the locations
@@ -413,6 +404,24 @@ module Ox
413
404
 
414
405
  private
415
406
 
407
+ # Builds an enumerator for use in `#each` call
408
+ #
409
+ # - +cond+ [Hash, String, nil] an element filter
410
+ def build_enumerator(cond)
411
+ if cond.nil?
412
+ nodes.each
413
+ else
414
+ cond = cond.to_s if cond.is_a?(Symbol)
415
+ Enumerator.new do |yielder|
416
+ if cond.is_a?(String)
417
+ nodes.each { |n| yielder.yield(n) if n.is_a?(Element) && cond == n.name }
418
+ elsif cond.is_a?(Hash)
419
+ nodes.each { |n| yielder.yield(n) if n.is_a?(Element) && n.attr_match(cond) }
420
+ end
421
+ end
422
+ end
423
+ end
424
+
416
425
  # Removes recursively children for nodes and sub_nodes
417
426
  #
418
427
  # - +found+ [Array] An array of Ox::Element