json 2.9.1 → 2.10.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -12,6 +12,7 @@ typedef struct JSON_Generator_StateStruct {
12
12
  VALUE space_before;
13
13
  VALUE object_nl;
14
14
  VALUE array_nl;
15
+ VALUE as_json;
15
16
 
16
17
  long max_nesting;
17
18
  long depth;
@@ -27,11 +28,11 @@ typedef struct JSON_Generator_StateStruct {
27
28
  #define RB_UNLIKELY(cond) (cond)
28
29
  #endif
29
30
 
30
- static VALUE mJSON, cState, mString_Extend, eGeneratorError, eNestingError, Encoding_UTF_8;
31
+ static VALUE mJSON, cState, cFragment, mString_Extend, eGeneratorError, eNestingError, Encoding_UTF_8;
31
32
 
32
33
  static ID i_to_s, i_to_json, i_new, i_pack, i_unpack, i_create_id, i_extend, i_encode;
33
- static ID sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl, sym_max_nesting, sym_allow_nan,
34
- sym_ascii_only, sym_depth, sym_buffer_initial_length, sym_script_safe, sym_escape_slash, sym_strict;
34
+ static VALUE sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl, sym_max_nesting, sym_allow_nan,
35
+ sym_ascii_only, sym_depth, sym_buffer_initial_length, sym_script_safe, sym_escape_slash, sym_strict, sym_as_json;
35
36
 
36
37
 
37
38
  #define GET_STATE_TO(self, state) \
@@ -68,6 +69,7 @@ static void generate_json_integer(FBuffer *buffer, struct generate_json_data *da
68
69
  static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj);
69
70
  static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj);
70
71
  static void generate_json_float(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj);
72
+ static void generate_json_fragment(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj);
71
73
 
72
74
  static int usascii_encindex, utf8_encindex, binary_encindex;
73
75
 
@@ -96,6 +98,75 @@ static void raise_generator_error(VALUE invalid_object, const char *fmt, ...)
96
98
  raise_generator_error_str(invalid_object, str);
97
99
  }
98
100
 
101
+ // 0 - single byte char that don't need to be escaped.
102
+ // (x | 8) - char that needs to be escaped.
103
+ static const unsigned char CHAR_LENGTH_MASK = 7;
104
+ static const unsigned char ESCAPE_MASK = 8;
105
+
106
+ typedef struct _search_state {
107
+ const char *ptr;
108
+ const char *end;
109
+ const char *cursor;
110
+ FBuffer *buffer;
111
+ } search_state;
112
+
113
+ static inline void search_flush(search_state *search)
114
+ {
115
+ fbuffer_append(search->buffer, search->cursor, search->ptr - search->cursor);
116
+ search->cursor = search->ptr;
117
+ }
118
+
119
+ static const unsigned char escape_table_basic[256] = {
120
+ // ASCII Control Characters
121
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
122
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
123
+ // ASCII Characters
124
+ 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // '"'
125
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
126
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
127
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, // '\\'
128
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
129
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
130
+ };
131
+
132
+ static inline unsigned char search_escape_basic(search_state *search)
133
+ {
134
+ while (search->ptr < search->end) {
135
+ if (RB_UNLIKELY(escape_table_basic[(const unsigned char)*search->ptr])) {
136
+ search_flush(search);
137
+ return 1;
138
+ } else {
139
+ search->ptr++;
140
+ }
141
+ }
142
+ search_flush(search);
143
+ return 0;
144
+ }
145
+
146
+ static inline void escape_UTF8_char_basic(search_state *search) {
147
+ const unsigned char ch = (unsigned char)*search->ptr;
148
+ switch (ch) {
149
+ case '"': fbuffer_append(search->buffer, "\\\"", 2); break;
150
+ case '\\': fbuffer_append(search->buffer, "\\\\", 2); break;
151
+ case '/': fbuffer_append(search->buffer, "\\/", 2); break;
152
+ case '\b': fbuffer_append(search->buffer, "\\b", 2); break;
153
+ case '\f': fbuffer_append(search->buffer, "\\f", 2); break;
154
+ case '\n': fbuffer_append(search->buffer, "\\n", 2); break;
155
+ case '\r': fbuffer_append(search->buffer, "\\r", 2); break;
156
+ case '\t': fbuffer_append(search->buffer, "\\t", 2); break;
157
+ default: {
158
+ const char *hexdig = "0123456789abcdef";
159
+ char scratch[6] = { '\\', 'u', '0', '0', 0, 0 };
160
+ scratch[4] = hexdig[(ch >> 4) & 0xf];
161
+ scratch[5] = hexdig[ch & 0xf];
162
+ fbuffer_append(search->buffer, scratch, 6);
163
+ break;
164
+ }
165
+ }
166
+ search->ptr++;
167
+ search->cursor = search->ptr;
168
+ }
169
+
99
170
  /* Converts in_string to a JSON string (without the wrapping '"'
100
171
  * characters) in FBuffer out_buffer.
101
172
  *
@@ -106,282 +177,241 @@ static void raise_generator_error(VALUE invalid_object, const char *fmt, ...)
106
177
  *
107
178
  * - If out_ascii_only: non-ASCII characters (>0x7F)
108
179
  *
109
- * - If out_script_safe: forwardslash, line separator (U+2028), and
180
+ * - If script_safe: forwardslash (/), line separator (U+2028), and
110
181
  * paragraph separator (U+2029)
111
182
  *
112
183
  * Everything else (should be UTF-8) is just passed through and
113
184
  * appended to the result.
114
185
  */
115
- static void convert_UTF8_to_JSON(FBuffer *out_buffer, VALUE str, const char escape_table[256], bool out_script_safe)
186
+ static inline void convert_UTF8_to_JSON(search_state *search)
116
187
  {
117
- const char *hexdig = "0123456789abcdef";
118
- char scratch[12] = { '\\', 'u', 0, 0, 0, 0, '\\', 'u' };
119
-
120
- const char *ptr = RSTRING_PTR(str);
121
- unsigned long len = RSTRING_LEN(str);
122
-
123
- unsigned long beg = 0, pos = 0;
124
-
125
- #define FLUSH_POS(bytes) if (pos > beg) { fbuffer_append(out_buffer, &ptr[beg], pos - beg); } pos += bytes; beg = pos;
126
-
127
- while (pos < len) {
128
- unsigned char ch = ptr[pos];
129
- unsigned char ch_len = escape_table[ch];
130
- /* JSON encoding */
188
+ while (search_escape_basic(search)) {
189
+ escape_UTF8_char_basic(search);
190
+ }
191
+ }
131
192
 
132
- if (RB_UNLIKELY(ch_len)) {
133
- switch (ch_len) {
134
- case 1: {
135
- FLUSH_POS(1);
136
- switch (ch) {
137
- case '"': fbuffer_append(out_buffer, "\\\"", 2); break;
138
- case '\\': fbuffer_append(out_buffer, "\\\\", 2); break;
139
- case '/': fbuffer_append(out_buffer, "\\/", 2); break;
140
- case '\b': fbuffer_append(out_buffer, "\\b", 2); break;
141
- case '\f': fbuffer_append(out_buffer, "\\f", 2); break;
142
- case '\n': fbuffer_append(out_buffer, "\\n", 2); break;
143
- case '\r': fbuffer_append(out_buffer, "\\r", 2); break;
144
- case '\t': fbuffer_append(out_buffer, "\\t", 2); break;
145
- default: {
146
- scratch[2] = '0';
147
- scratch[3] = '0';
148
- scratch[4] = hexdig[(ch >> 4) & 0xf];
149
- scratch[5] = hexdig[ch & 0xf];
150
- fbuffer_append(out_buffer, scratch, 6);
151
- break;
152
- }
153
- }
193
+ static inline void escape_UTF8_char(search_state *search, unsigned char ch_len) {
194
+ const unsigned char ch = (unsigned char)*search->ptr;
195
+ switch (ch_len) {
196
+ case 1: {
197
+ switch (ch) {
198
+ case '"': fbuffer_append(search->buffer, "\\\"", 2); break;
199
+ case '\\': fbuffer_append(search->buffer, "\\\\", 2); break;
200
+ case '/': fbuffer_append(search->buffer, "\\/", 2); break;
201
+ case '\b': fbuffer_append(search->buffer, "\\b", 2); break;
202
+ case '\f': fbuffer_append(search->buffer, "\\f", 2); break;
203
+ case '\n': fbuffer_append(search->buffer, "\\n", 2); break;
204
+ case '\r': fbuffer_append(search->buffer, "\\r", 2); break;
205
+ case '\t': fbuffer_append(search->buffer, "\\t", 2); break;
206
+ default: {
207
+ const char *hexdig = "0123456789abcdef";
208
+ char scratch[6] = { '\\', 'u', '0', '0', 0, 0 };
209
+ scratch[4] = hexdig[(ch >> 4) & 0xf];
210
+ scratch[5] = hexdig[ch & 0xf];
211
+ fbuffer_append(search->buffer, scratch, 6);
154
212
  break;
155
213
  }
156
- case 3: {
157
- unsigned char b2 = ptr[pos + 1];
158
- if (RB_UNLIKELY(out_script_safe && ch == 0xE2 && b2 == 0x80)) {
159
- unsigned char b3 = ptr[pos + 2];
160
- if (b3 == 0xA8) {
161
- FLUSH_POS(3);
162
- fbuffer_append(out_buffer, "\\u2028", 6);
163
- break;
164
- } else if (b3 == 0xA9) {
165
- FLUSH_POS(3);
166
- fbuffer_append(out_buffer, "\\u2029", 6);
167
- break;
168
- }
169
- }
170
- // fallthrough
171
- }
172
- default:
173
- pos += ch_len;
174
- break;
175
214
  }
176
- } else {
177
- pos++;
215
+ break;
216
+ }
217
+ case 3: {
218
+ if (search->ptr[2] & 1) {
219
+ fbuffer_append(search->buffer, "\\u2029", 6);
220
+ } else {
221
+ fbuffer_append(search->buffer, "\\u2028", 6);
222
+ }
223
+ break;
178
224
  }
179
225
  }
180
- #undef FLUSH_POS
181
-
182
- if (beg < len) {
183
- fbuffer_append(out_buffer, &ptr[beg], len - beg);
184
- }
185
-
186
- RB_GC_GUARD(str);
226
+ search->cursor = (search->ptr += ch_len);
187
227
  }
188
228
 
189
- static const char escape_table[256] = {
190
- // ASCII Control Characters
191
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
192
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
193
- // ASCII Characters
194
- 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0, // '"'
195
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
196
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
197
- 0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0, // '\\'
198
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
199
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
200
- // Continuation byte
201
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
202
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
203
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
204
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
205
- // First byte of a 2-byte code point
206
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
207
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
208
- // First byte of a 4-byte code point
209
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
210
- //First byte of a 4+byte code point
211
- 4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1,
212
- };
213
-
214
- static const char script_safe_escape_table[256] = {
229
+ static const unsigned char script_safe_escape_table[256] = {
215
230
  // ASCII Control Characters
216
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
217
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
231
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
232
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
218
233
  // ASCII Characters
219
- 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1, // '"' and '/'
220
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
221
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
222
- 0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0, // '\\'
223
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
224
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
234
+ 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, // '"' and '/'
235
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
236
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
237
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, // '\\'
238
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
239
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
225
240
  // Continuation byte
226
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
227
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
228
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
229
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
241
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
242
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
243
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
244
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
230
245
  // First byte of a 2-byte code point
231
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
232
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
233
- // First byte of a 4-byte code point
234
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
235
- //First byte of a 4+byte code point
236
- 4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1,
246
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
247
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
248
+ // First byte of a 3-byte code point
249
+ 3, 3,11, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xE2 is the start of \u2028 and \u2029
250
+ //First byte of a 4+ byte code point
251
+ 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 9, 9,
237
252
  };
238
253
 
239
- static void convert_ASCII_to_JSON(FBuffer *out_buffer, VALUE str, const char escape_table[256])
254
+ static inline unsigned char search_script_safe_escape(search_state *search)
240
255
  {
241
- const char *hexdig = "0123456789abcdef";
242
- char scratch[12] = { '\\', 'u', 0, 0, 0, 0, '\\', 'u' };
256
+ while (search->ptr < search->end) {
257
+ unsigned char ch = (unsigned char)*search->ptr;
258
+ unsigned char ch_len = script_safe_escape_table[ch];
243
259
 
244
- const char *ptr = RSTRING_PTR(str);
245
- unsigned long len = RSTRING_LEN(str);
246
-
247
- unsigned long beg = 0, pos;
248
-
249
- for (pos = 0; pos < len;) {
250
- unsigned char ch = ptr[pos];
251
- /* JSON encoding */
252
- if (escape_table[ch]) {
253
- if (pos > beg) {
254
- fbuffer_append(out_buffer, &ptr[beg], pos - beg);
255
- }
256
-
257
- beg = pos + 1;
258
- switch (ch) {
259
- case '"': fbuffer_append(out_buffer, "\\\"", 2); break;
260
- case '\\': fbuffer_append(out_buffer, "\\\\", 2); break;
261
- case '/': fbuffer_append(out_buffer, "\\/", 2); break;
262
- case '\b': fbuffer_append(out_buffer, "\\b", 2); break;
263
- case '\f': fbuffer_append(out_buffer, "\\f", 2); break;
264
- case '\n': fbuffer_append(out_buffer, "\\n", 2); break;
265
- case '\r': fbuffer_append(out_buffer, "\\r", 2); break;
266
- case '\t': fbuffer_append(out_buffer, "\\t", 2); break;
267
- default:
268
- scratch[2] = '0';
269
- scratch[3] = '0';
270
- scratch[4] = hexdig[(ch >> 4) & 0xf];
271
- scratch[5] = hexdig[ch & 0xf];
272
- fbuffer_append(out_buffer, scratch, 6);
260
+ if (RB_UNLIKELY(ch_len)) {
261
+ if (ch_len & ESCAPE_MASK) {
262
+ if (RB_UNLIKELY(ch_len == 11)) {
263
+ const unsigned char *uptr = (const unsigned char *)search->ptr;
264
+ if (!(uptr[1] == 0x80 && (uptr[2] >> 1) == 0x54)) {
265
+ search->ptr += 3;
266
+ continue;
267
+ }
268
+ }
269
+ search_flush(search);
270
+ return ch_len & CHAR_LENGTH_MASK;
271
+ } else {
272
+ search->ptr += ch_len;
273
273
  }
274
+ } else {
275
+ search->ptr++;
274
276
  }
275
-
276
- pos++;
277
277
  }
278
-
279
- if (beg < len) {
280
- fbuffer_append(out_buffer, &ptr[beg], len - beg);
281
- }
282
-
283
- RB_GC_GUARD(str);
278
+ search_flush(search);
279
+ return 0;
284
280
  }
285
281
 
286
- static void convert_UTF8_to_ASCII_only_JSON(FBuffer *out_buffer, VALUE str, const char escape_table[256], bool out_script_safe)
282
+ static void convert_UTF8_to_script_safe_JSON(search_state *search)
287
283
  {
288
- const char *hexdig = "0123456789abcdef";
289
- char scratch[12] = { '\\', 'u', 0, 0, 0, 0, '\\', 'u' };
290
-
291
- const char *ptr = RSTRING_PTR(str);
292
- unsigned long len = RSTRING_LEN(str);
293
-
294
- unsigned long beg = 0, pos = 0;
284
+ unsigned char ch_len;
285
+ while ((ch_len = search_script_safe_escape(search))) {
286
+ escape_UTF8_char(search, ch_len);
287
+ }
288
+ }
295
289
 
296
- #define FLUSH_POS(bytes) if (pos > beg) { fbuffer_append(out_buffer, &ptr[beg], pos - beg); } pos += bytes; beg = pos;
290
+ static const unsigned char ascii_only_escape_table[256] = {
291
+ // ASCII Control Characters
292
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
293
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
294
+ // ASCII Characters
295
+ 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // '"'
296
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
297
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
298
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, // '\\'
299
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
300
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
301
+ // Continuation byte
302
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
303
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
304
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
305
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
306
+ // First byte of a 2-byte code point
307
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
308
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
309
+ // First byte of a 3-byte code point
310
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
311
+ //First byte of a 4+ byte code point
312
+ 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 9, 9,
313
+ };
297
314
 
298
- while (pos < len) {
299
- unsigned char ch = ptr[pos];
315
+ static inline unsigned char search_ascii_only_escape(search_state *search, const unsigned char escape_table[256])
316
+ {
317
+ while (search->ptr < search->end) {
318
+ unsigned char ch = (unsigned char)*search->ptr;
300
319
  unsigned char ch_len = escape_table[ch];
301
320
 
302
321
  if (RB_UNLIKELY(ch_len)) {
303
- switch (ch_len) {
304
- case 1: {
305
- FLUSH_POS(1);
306
- switch (ch) {
307
- case '"': fbuffer_append(out_buffer, "\\\"", 2); break;
308
- case '\\': fbuffer_append(out_buffer, "\\\\", 2); break;
309
- case '/': fbuffer_append(out_buffer, "\\/", 2); break;
310
- case '\b': fbuffer_append(out_buffer, "\\b", 2); break;
311
- case '\f': fbuffer_append(out_buffer, "\\f", 2); break;
312
- case '\n': fbuffer_append(out_buffer, "\\n", 2); break;
313
- case '\r': fbuffer_append(out_buffer, "\\r", 2); break;
314
- case '\t': fbuffer_append(out_buffer, "\\t", 2); break;
315
- default: {
316
- scratch[2] = '0';
317
- scratch[3] = '0';
318
- scratch[4] = hexdig[(ch >> 4) & 0xf];
319
- scratch[5] = hexdig[ch & 0xf];
320
- fbuffer_append(out_buffer, scratch, 6);
321
- break;
322
- }
323
- }
322
+ search_flush(search);
323
+ return ch_len & CHAR_LENGTH_MASK;
324
+ } else {
325
+ search->ptr++;
326
+ }
327
+ }
328
+ search_flush(search);
329
+ return 0;
330
+ }
331
+
332
+ static inline void full_escape_UTF8_char(search_state *search, unsigned char ch_len) {
333
+ const unsigned char ch = (unsigned char)*search->ptr;
334
+ switch (ch_len) {
335
+ case 1: {
336
+ switch (ch) {
337
+ case '"': fbuffer_append(search->buffer, "\\\"", 2); break;
338
+ case '\\': fbuffer_append(search->buffer, "\\\\", 2); break;
339
+ case '/': fbuffer_append(search->buffer, "\\/", 2); break;
340
+ case '\b': fbuffer_append(search->buffer, "\\b", 2); break;
341
+ case '\f': fbuffer_append(search->buffer, "\\f", 2); break;
342
+ case '\n': fbuffer_append(search->buffer, "\\n", 2); break;
343
+ case '\r': fbuffer_append(search->buffer, "\\r", 2); break;
344
+ case '\t': fbuffer_append(search->buffer, "\\t", 2); break;
345
+ default: {
346
+ const char *hexdig = "0123456789abcdef";
347
+ char scratch[6] = { '\\', 'u', '0', '0', 0, 0 };
348
+ scratch[4] = hexdig[(ch >> 4) & 0xf];
349
+ scratch[5] = hexdig[ch & 0xf];
350
+ fbuffer_append(search->buffer, scratch, 6);
324
351
  break;
325
352
  }
326
- default: {
327
- uint32_t wchar = 0;
328
- switch(ch_len) {
329
- case 2:
330
- wchar = ptr[pos] & 0x1F;
331
- break;
332
- case 3:
333
- wchar = ptr[pos] & 0x0F;
334
- break;
335
- case 4:
336
- wchar = ptr[pos] & 0x07;
337
- break;
338
- }
353
+ }
354
+ break;
355
+ }
356
+ default: {
357
+ const char *hexdig = "0123456789abcdef";
358
+ char scratch[12] = { '\\', 'u', 0, 0, 0, 0, '\\', 'u' };
339
359
 
340
- for (short i = 1; i < ch_len; i++) {
341
- wchar = (wchar << 6) | (ptr[pos+i] & 0x3F);
342
- }
360
+ uint32_t wchar = 0;
343
361
 
344
- FLUSH_POS(ch_len);
362
+ switch(ch_len) {
363
+ case 2:
364
+ wchar = ch & 0x1F;
365
+ break;
366
+ case 3:
367
+ wchar = ch & 0x0F;
368
+ break;
369
+ case 4:
370
+ wchar = ch & 0x07;
371
+ break;
372
+ }
345
373
 
346
- if (wchar <= 0xFFFF) {
347
- scratch[2] = hexdig[wchar >> 12];
348
- scratch[3] = hexdig[(wchar >> 8) & 0xf];
349
- scratch[4] = hexdig[(wchar >> 4) & 0xf];
350
- scratch[5] = hexdig[wchar & 0xf];
351
- fbuffer_append(out_buffer, scratch, 6);
352
- } else {
353
- uint16_t hi, lo;
354
- wchar -= 0x10000;
355
- hi = 0xD800 + (uint16_t)(wchar >> 10);
356
- lo = 0xDC00 + (uint16_t)(wchar & 0x3FF);
357
-
358
- scratch[2] = hexdig[hi >> 12];
359
- scratch[3] = hexdig[(hi >> 8) & 0xf];
360
- scratch[4] = hexdig[(hi >> 4) & 0xf];
361
- scratch[5] = hexdig[hi & 0xf];
362
-
363
- scratch[8] = hexdig[lo >> 12];
364
- scratch[9] = hexdig[(lo >> 8) & 0xf];
365
- scratch[10] = hexdig[(lo >> 4) & 0xf];
366
- scratch[11] = hexdig[lo & 0xf];
367
-
368
- fbuffer_append(out_buffer, scratch, 12);
369
- }
374
+ for (short i = 1; i < ch_len; i++) {
375
+ wchar = (wchar << 6) | (search->ptr[i] & 0x3F);
376
+ }
370
377
 
371
- break;
372
- }
378
+ if (wchar <= 0xFFFF) {
379
+ scratch[2] = hexdig[wchar >> 12];
380
+ scratch[3] = hexdig[(wchar >> 8) & 0xf];
381
+ scratch[4] = hexdig[(wchar >> 4) & 0xf];
382
+ scratch[5] = hexdig[wchar & 0xf];
383
+ fbuffer_append(search->buffer, scratch, 6);
384
+ } else {
385
+ uint16_t hi, lo;
386
+ wchar -= 0x10000;
387
+ hi = 0xD800 + (uint16_t)(wchar >> 10);
388
+ lo = 0xDC00 + (uint16_t)(wchar & 0x3FF);
389
+
390
+ scratch[2] = hexdig[hi >> 12];
391
+ scratch[3] = hexdig[(hi >> 8) & 0xf];
392
+ scratch[4] = hexdig[(hi >> 4) & 0xf];
393
+ scratch[5] = hexdig[hi & 0xf];
394
+
395
+ scratch[8] = hexdig[lo >> 12];
396
+ scratch[9] = hexdig[(lo >> 8) & 0xf];
397
+ scratch[10] = hexdig[(lo >> 4) & 0xf];
398
+ scratch[11] = hexdig[lo & 0xf];
399
+
400
+ fbuffer_append(search->buffer, scratch, 12);
373
401
  }
374
- } else {
375
- pos++;
402
+
403
+ break;
376
404
  }
377
405
  }
378
- #undef FLUSH_POS
406
+ search->cursor = (search->ptr += ch_len);
407
+ }
379
408
 
380
- if (beg < len) {
381
- fbuffer_append(out_buffer, &ptr[beg], len - beg);
409
+ static void convert_UTF8_to_ASCII_only_JSON(search_state *search, const unsigned char escape_table[256])
410
+ {
411
+ unsigned char ch_len;
412
+ while ((ch_len = search_ascii_only_escape(search, escape_table))) {
413
+ full_escape_UTF8_char(search, ch_len);
382
414
  }
383
-
384
- RB_GC_GUARD(str);
385
415
  }
386
416
 
387
417
  /*
@@ -674,6 +704,7 @@ static void State_mark(void *ptr)
674
704
  rb_gc_mark_movable(state->space_before);
675
705
  rb_gc_mark_movable(state->object_nl);
676
706
  rb_gc_mark_movable(state->array_nl);
707
+ rb_gc_mark_movable(state->as_json);
677
708
  }
678
709
 
679
710
  static void State_compact(void *ptr)
@@ -684,6 +715,7 @@ static void State_compact(void *ptr)
684
715
  state->space_before = rb_gc_location(state->space_before);
685
716
  state->object_nl = rb_gc_location(state->object_nl);
686
717
  state->array_nl = rb_gc_location(state->array_nl);
718
+ state->as_json = rb_gc_location(state->as_json);
687
719
  }
688
720
 
689
721
  static void State_free(void *ptr)
@@ -740,6 +772,7 @@ static void vstate_spill(struct generate_json_data *data)
740
772
  RB_OBJ_WRITTEN(vstate, Qundef, state->space_before);
741
773
  RB_OBJ_WRITTEN(vstate, Qundef, state->object_nl);
742
774
  RB_OBJ_WRITTEN(vstate, Qundef, state->array_nl);
775
+ RB_OBJ_WRITTEN(vstate, Qundef, state->as_json);
743
776
  }
744
777
 
745
778
  static inline VALUE vstate_get(struct generate_json_data *data)
@@ -808,15 +841,19 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
808
841
  return ST_CONTINUE;
809
842
  }
810
843
 
811
- static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj)
844
+ static inline long increase_depth(JSON_Generator_State *state)
812
845
  {
813
- long max_nesting = state->max_nesting;
814
846
  long depth = ++state->depth;
815
- int j;
816
-
817
- if (max_nesting != 0 && depth > max_nesting) {
847
+ if (RB_UNLIKELY(depth > state->max_nesting && state->max_nesting)) {
818
848
  rb_raise(eNestingError, "nesting of %ld is too deep", --state->depth);
819
849
  }
850
+ return depth;
851
+ }
852
+
853
+ static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj)
854
+ {
855
+ int j;
856
+ long depth = increase_depth(state);
820
857
 
821
858
  if (RHASH_SIZE(obj) == 0) {
822
859
  fbuffer_append(buffer, "{}", 2);
@@ -846,12 +883,8 @@ static void generate_json_object(FBuffer *buffer, struct generate_json_data *dat
846
883
 
847
884
  static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj)
848
885
  {
849
- long max_nesting = state->max_nesting;
850
- long depth = ++state->depth;
851
886
  int i, j;
852
- if (max_nesting != 0 && depth > max_nesting) {
853
- rb_raise(eNestingError, "nesting of %ld is too deep", --state->depth);
854
- }
887
+ long depth = increase_depth(state);
855
888
 
856
889
  if (RARRAY_LEN(obj) == 0) {
857
890
  fbuffer_append(buffer, "[]", 2);
@@ -933,15 +966,22 @@ static void generate_json_string(FBuffer *buffer, struct generate_json_data *dat
933
966
 
934
967
  fbuffer_append_char(buffer, '"');
935
968
 
969
+ long len;
970
+ search_state search;
971
+ search.buffer = buffer;
972
+ RSTRING_GETMEM(obj, search.ptr, len);
973
+ search.cursor = search.ptr;
974
+ search.end = search.ptr + len;
975
+
936
976
  switch(rb_enc_str_coderange(obj)) {
937
977
  case ENC_CODERANGE_7BIT:
938
- convert_ASCII_to_JSON(buffer, obj, state->script_safe ? script_safe_escape_table : escape_table);
939
- break;
940
978
  case ENC_CODERANGE_VALID:
941
979
  if (RB_UNLIKELY(state->ascii_only)) {
942
- convert_UTF8_to_ASCII_only_JSON(buffer, obj, state->script_safe ? script_safe_escape_table : escape_table, state->script_safe);
980
+ convert_UTF8_to_ASCII_only_JSON(&search, state->script_safe ? script_safe_escape_table : ascii_only_escape_table);
981
+ } else if (RB_UNLIKELY(state->script_safe)) {
982
+ convert_UTF8_to_script_safe_JSON(&search);
943
983
  } else {
944
- convert_UTF8_to_JSON(buffer, obj, state->script_safe ? script_safe_escape_table : escape_table, state->script_safe);
984
+ convert_UTF8_to_JSON(&search);
945
985
  }
946
986
  break;
947
987
  default:
@@ -951,6 +991,29 @@ static void generate_json_string(FBuffer *buffer, struct generate_json_data *dat
951
991
  fbuffer_append_char(buffer, '"');
952
992
  }
953
993
 
994
+ static void generate_json_fallback(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj)
995
+ {
996
+ VALUE tmp;
997
+ if (rb_respond_to(obj, i_to_json)) {
998
+ tmp = rb_funcall(obj, i_to_json, 1, vstate_get(data));
999
+ Check_Type(tmp, T_STRING);
1000
+ fbuffer_append_str(buffer, tmp);
1001
+ } else {
1002
+ tmp = rb_funcall(obj, i_to_s, 0);
1003
+ Check_Type(tmp, T_STRING);
1004
+ generate_json_string(buffer, data, state, tmp);
1005
+ }
1006
+ }
1007
+
1008
+ static inline void generate_json_symbol(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj)
1009
+ {
1010
+ if (state->strict) {
1011
+ generate_json_string(buffer, data, state, rb_sym2str(obj));
1012
+ } else {
1013
+ generate_json_fallback(buffer, data, state, obj);
1014
+ }
1015
+ }
1016
+
954
1017
  static void generate_json_null(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj)
955
1018
  {
956
1019
  fbuffer_append(buffer, "null", 4);
@@ -991,18 +1054,34 @@ static void generate_json_float(FBuffer *buffer, struct generate_json_data *data
991
1054
  {
992
1055
  double value = RFLOAT_VALUE(obj);
993
1056
  char allow_nan = state->allow_nan;
994
- VALUE tmp = rb_funcall(obj, i_to_s, 0);
995
1057
  if (!allow_nan) {
996
1058
  if (isinf(value) || isnan(value)) {
997
- raise_generator_error(obj, "%"PRIsVALUE" not allowed in JSON", tmp);
1059
+ if (state->strict && state->as_json) {
1060
+ VALUE casted_obj = rb_proc_call_with_block(state->as_json, 1, &obj, Qnil);
1061
+ if (casted_obj != obj) {
1062
+ increase_depth(state);
1063
+ generate_json(buffer, data, state, casted_obj);
1064
+ state->depth--;
1065
+ return;
1066
+ }
1067
+ }
1068
+ raise_generator_error(obj, "%"PRIsVALUE" not allowed in JSON", rb_funcall(obj, i_to_s, 0));
998
1069
  }
999
1070
  }
1000
- fbuffer_append_str(buffer, tmp);
1071
+ fbuffer_append_str(buffer, rb_funcall(obj, i_to_s, 0));
1072
+ }
1073
+
1074
+ static void generate_json_fragment(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj)
1075
+ {
1076
+ VALUE fragment = RSTRUCT_GET(obj, 0);
1077
+ Check_Type(fragment, T_STRING);
1078
+ fbuffer_append_str(buffer, fragment);
1001
1079
  }
1002
1080
 
1003
1081
  static void generate_json(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj)
1004
1082
  {
1005
- VALUE tmp;
1083
+ bool as_json_called = false;
1084
+ start:
1006
1085
  if (obj == Qnil) {
1007
1086
  generate_json_null(buffer, data, state, obj);
1008
1087
  } else if (obj == Qfalse) {
@@ -1014,6 +1093,8 @@ static void generate_json(FBuffer *buffer, struct generate_json_data *data, JSON
1014
1093
  generate_json_fixnum(buffer, data, state, obj);
1015
1094
  } else if (RB_FLONUM_P(obj)) {
1016
1095
  generate_json_float(buffer, data, state, obj);
1096
+ } else if (RB_STATIC_SYM_P(obj)) {
1097
+ generate_json_symbol(buffer, data, state, obj);
1017
1098
  } else {
1018
1099
  goto general;
1019
1100
  }
@@ -1035,22 +1116,29 @@ static void generate_json(FBuffer *buffer, struct generate_json_data *data, JSON
1035
1116
  if (klass != rb_cString) goto general;
1036
1117
  generate_json_string(buffer, data, state, obj);
1037
1118
  break;
1119
+ case T_SYMBOL:
1120
+ generate_json_symbol(buffer, data, state, obj);
1121
+ break;
1038
1122
  case T_FLOAT:
1039
1123
  if (klass != rb_cFloat) goto general;
1040
1124
  generate_json_float(buffer, data, state, obj);
1041
1125
  break;
1126
+ case T_STRUCT:
1127
+ if (klass != cFragment) goto general;
1128
+ generate_json_fragment(buffer, data, state, obj);
1129
+ break;
1042
1130
  default:
1043
1131
  general:
1044
1132
  if (state->strict) {
1045
- raise_generator_error(obj, "%"PRIsVALUE" not allowed in JSON", CLASS_OF(obj));
1046
- } else if (rb_respond_to(obj, i_to_json)) {
1047
- tmp = rb_funcall(obj, i_to_json, 1, vstate_get(data));
1048
- Check_Type(tmp, T_STRING);
1049
- fbuffer_append_str(buffer, tmp);
1133
+ if (RTEST(state->as_json) && !as_json_called) {
1134
+ obj = rb_proc_call_with_block(state->as_json, 1, &obj, Qnil);
1135
+ as_json_called = true;
1136
+ goto start;
1137
+ } else {
1138
+ raise_generator_error(obj, "%"PRIsVALUE" not allowed in JSON", CLASS_OF(obj));
1139
+ }
1050
1140
  } else {
1051
- tmp = rb_funcall(obj, i_to_s, 0);
1052
- Check_Type(tmp, T_STRING);
1053
- generate_json_string(buffer, data, state, tmp);
1141
+ generate_json_fallback(buffer, data, state, obj);
1054
1142
  }
1055
1143
  }
1056
1144
  }
@@ -1097,8 +1185,19 @@ static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func func,
1097
1185
  return fbuffer_finalize(&buffer);
1098
1186
  }
1099
1187
 
1100
- static VALUE cState_generate(VALUE self, VALUE obj, VALUE io)
1188
+ /* call-seq:
1189
+ * generate(obj) -> String
1190
+ * generate(obj, anIO) -> anIO
1191
+ *
1192
+ * Generates a valid JSON document from object +obj+ and returns the
1193
+ * result. If no valid JSON document can be created this method raises a
1194
+ * GeneratorError exception.
1195
+ */
1196
+ static VALUE cState_generate(int argc, VALUE *argv, VALUE self)
1101
1197
  {
1198
+ rb_check_arity(argc, 1, 2);
1199
+ VALUE obj = argv[0];
1200
+ VALUE io = argc > 1 ? argv[1] : Qnil;
1102
1201
  VALUE result = cState_partial_generate(self, obj, generate_json, io);
1103
1202
  GET_STATE(self);
1104
1203
  (void)state;
@@ -1132,6 +1231,7 @@ static VALUE cState_init_copy(VALUE obj, VALUE orig)
1132
1231
  objState->space_before = origState->space_before;
1133
1232
  objState->object_nl = origState->object_nl;
1134
1233
  objState->array_nl = origState->array_nl;
1234
+ objState->as_json = origState->as_json;
1135
1235
  return obj;
1136
1236
  }
1137
1237
 
@@ -1283,6 +1383,28 @@ static VALUE cState_array_nl_set(VALUE self, VALUE array_nl)
1283
1383
  return Qnil;
1284
1384
  }
1285
1385
 
1386
+ /*
1387
+ * call-seq: as_json()
1388
+ *
1389
+ * This string is put at the end of a line that holds a JSON array.
1390
+ */
1391
+ static VALUE cState_as_json(VALUE self)
1392
+ {
1393
+ GET_STATE(self);
1394
+ return state->as_json;
1395
+ }
1396
+
1397
+ /*
1398
+ * call-seq: as_json=(as_json)
1399
+ *
1400
+ * This string is put at the end of a line that holds a JSON array.
1401
+ */
1402
+ static VALUE cState_as_json_set(VALUE self, VALUE as_json)
1403
+ {
1404
+ GET_STATE(self);
1405
+ RB_OBJ_WRITE(self, &state->as_json, rb_convert_type(as_json, T_DATA, "Proc", "to_proc"));
1406
+ return Qnil;
1407
+ }
1286
1408
 
1287
1409
  /*
1288
1410
  * call-seq: check_circular?
@@ -1504,6 +1626,7 @@ static int configure_state_i(VALUE key, VALUE val, VALUE _arg)
1504
1626
  else if (key == sym_script_safe) { state->script_safe = RTEST(val); }
1505
1627
  else if (key == sym_escape_slash) { state->script_safe = RTEST(val); }
1506
1628
  else if (key == sym_strict) { state->strict = RTEST(val); }
1629
+ else if (key == sym_as_json) { state->as_json = rb_convert_type(val, T_DATA, "Proc", "to_proc"); }
1507
1630
  return ST_CONTINUE;
1508
1631
  }
1509
1632
 
@@ -1564,6 +1687,10 @@ void Init_generator(void)
1564
1687
  rb_require("json/common");
1565
1688
 
1566
1689
  mJSON = rb_define_module("JSON");
1690
+
1691
+ rb_global_variable(&cFragment);
1692
+ cFragment = rb_const_get(mJSON, rb_intern("Fragment"));
1693
+
1567
1694
  VALUE mExt = rb_define_module_under(mJSON, "Ext");
1568
1695
  VALUE mGenerator = rb_define_module_under(mExt, "Generator");
1569
1696
 
@@ -1591,6 +1718,8 @@ void Init_generator(void)
1591
1718
  rb_define_method(cState, "object_nl=", cState_object_nl_set, 1);
1592
1719
  rb_define_method(cState, "array_nl", cState_array_nl, 0);
1593
1720
  rb_define_method(cState, "array_nl=", cState_array_nl_set, 1);
1721
+ rb_define_method(cState, "as_json", cState_as_json, 0);
1722
+ rb_define_method(cState, "as_json=", cState_as_json_set, 1);
1594
1723
  rb_define_method(cState, "max_nesting", cState_max_nesting, 0);
1595
1724
  rb_define_method(cState, "max_nesting=", cState_max_nesting_set, 1);
1596
1725
  rb_define_method(cState, "script_safe", cState_script_safe, 0);
@@ -1611,7 +1740,8 @@ void Init_generator(void)
1611
1740
  rb_define_method(cState, "depth=", cState_depth_set, 1);
1612
1741
  rb_define_method(cState, "buffer_initial_length", cState_buffer_initial_length, 0);
1613
1742
  rb_define_method(cState, "buffer_initial_length=", cState_buffer_initial_length_set, 1);
1614
- rb_define_private_method(cState, "_generate", cState_generate, 2);
1743
+ rb_define_method(cState, "generate", cState_generate, -1);
1744
+ rb_define_alias(cState, "generate_new", "generate"); // :nodoc:
1615
1745
 
1616
1746
  rb_define_singleton_method(cState, "generate", cState_m_generate, 3);
1617
1747
 
@@ -1682,6 +1812,7 @@ void Init_generator(void)
1682
1812
  sym_script_safe = ID2SYM(rb_intern("script_safe"));
1683
1813
  sym_escape_slash = ID2SYM(rb_intern("escape_slash"));
1684
1814
  sym_strict = ID2SYM(rb_intern("strict"));
1815
+ sym_as_json = ID2SYM(rb_intern("as_json"));
1685
1816
 
1686
1817
  usascii_encindex = rb_usascii_encindex();
1687
1818
  utf8_encindex = rb_utf8_encindex();