json 2.9.1 → 2.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,6 +12,7 @@ typedef struct JSON_Generator_StateStruct {
12
12
  VALUE space_before;
13
13
  VALUE object_nl;
14
14
  VALUE array_nl;
15
+ VALUE as_json;
15
16
 
16
17
  long max_nesting;
17
18
  long depth;
@@ -27,11 +28,11 @@ typedef struct JSON_Generator_StateStruct {
27
28
  #define RB_UNLIKELY(cond) (cond)
28
29
  #endif
29
30
 
30
- static VALUE mJSON, cState, mString_Extend, eGeneratorError, eNestingError, Encoding_UTF_8;
31
+ static VALUE mJSON, cState, cFragment, mString_Extend, eGeneratorError, eNestingError, Encoding_UTF_8;
31
32
 
32
33
  static ID i_to_s, i_to_json, i_new, i_pack, i_unpack, i_create_id, i_extend, i_encode;
33
- static ID sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl, sym_max_nesting, sym_allow_nan,
34
- sym_ascii_only, sym_depth, sym_buffer_initial_length, sym_script_safe, sym_escape_slash, sym_strict;
34
+ static VALUE sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl, sym_max_nesting, sym_allow_nan,
35
+ sym_ascii_only, sym_depth, sym_buffer_initial_length, sym_script_safe, sym_escape_slash, sym_strict, sym_as_json;
35
36
 
36
37
 
37
38
  #define GET_STATE_TO(self, state) \
@@ -68,6 +69,7 @@ static void generate_json_integer(FBuffer *buffer, struct generate_json_data *da
68
69
  static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj);
69
70
  static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj);
70
71
  static void generate_json_float(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj);
72
+ static void generate_json_fragment(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj);
71
73
 
72
74
  static int usascii_encindex, utf8_encindex, binary_encindex;
73
75
 
@@ -96,6 +98,75 @@ static void raise_generator_error(VALUE invalid_object, const char *fmt, ...)
96
98
  raise_generator_error_str(invalid_object, str);
97
99
  }
98
100
 
101
+ // 0 - single byte char that don't need to be escaped.
102
+ // (x | 8) - char that needs to be escaped.
103
+ static const unsigned char CHAR_LENGTH_MASK = 7;
104
+ static const unsigned char ESCAPE_MASK = 8;
105
+
106
+ typedef struct _search_state {
107
+ const char *ptr;
108
+ const char *end;
109
+ const char *cursor;
110
+ FBuffer *buffer;
111
+ } search_state;
112
+
113
+ static inline void search_flush(search_state *search)
114
+ {
115
+ fbuffer_append(search->buffer, search->cursor, search->ptr - search->cursor);
116
+ search->cursor = search->ptr;
117
+ }
118
+
119
+ static const unsigned char escape_table_basic[256] = {
120
+ // ASCII Control Characters
121
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
122
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
123
+ // ASCII Characters
124
+ 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // '"'
125
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
126
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
127
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, // '\\'
128
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
129
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
130
+ };
131
+
132
+ static inline unsigned char search_escape_basic(search_state *search)
133
+ {
134
+ while (search->ptr < search->end) {
135
+ if (RB_UNLIKELY(escape_table_basic[(const unsigned char)*search->ptr])) {
136
+ search_flush(search);
137
+ return 1;
138
+ } else {
139
+ search->ptr++;
140
+ }
141
+ }
142
+ search_flush(search);
143
+ return 0;
144
+ }
145
+
146
+ static inline void escape_UTF8_char_basic(search_state *search) {
147
+ const unsigned char ch = (unsigned char)*search->ptr;
148
+ switch (ch) {
149
+ case '"': fbuffer_append(search->buffer, "\\\"", 2); break;
150
+ case '\\': fbuffer_append(search->buffer, "\\\\", 2); break;
151
+ case '/': fbuffer_append(search->buffer, "\\/", 2); break;
152
+ case '\b': fbuffer_append(search->buffer, "\\b", 2); break;
153
+ case '\f': fbuffer_append(search->buffer, "\\f", 2); break;
154
+ case '\n': fbuffer_append(search->buffer, "\\n", 2); break;
155
+ case '\r': fbuffer_append(search->buffer, "\\r", 2); break;
156
+ case '\t': fbuffer_append(search->buffer, "\\t", 2); break;
157
+ default: {
158
+ const char *hexdig = "0123456789abcdef";
159
+ char scratch[6] = { '\\', 'u', '0', '0', 0, 0 };
160
+ scratch[4] = hexdig[(ch >> 4) & 0xf];
161
+ scratch[5] = hexdig[ch & 0xf];
162
+ fbuffer_append(search->buffer, scratch, 6);
163
+ break;
164
+ }
165
+ }
166
+ search->ptr++;
167
+ search->cursor = search->ptr;
168
+ }
169
+
99
170
  /* Converts in_string to a JSON string (without the wrapping '"'
100
171
  * characters) in FBuffer out_buffer.
101
172
  *
@@ -106,282 +177,241 @@ static void raise_generator_error(VALUE invalid_object, const char *fmt, ...)
106
177
  *
107
178
  * - If out_ascii_only: non-ASCII characters (>0x7F)
108
179
  *
109
- * - If out_script_safe: forwardslash, line separator (U+2028), and
180
+ * - If script_safe: forwardslash (/), line separator (U+2028), and
110
181
  * paragraph separator (U+2029)
111
182
  *
112
183
  * Everything else (should be UTF-8) is just passed through and
113
184
  * appended to the result.
114
185
  */
115
- static void convert_UTF8_to_JSON(FBuffer *out_buffer, VALUE str, const char escape_table[256], bool out_script_safe)
186
+ static inline void convert_UTF8_to_JSON(search_state *search)
116
187
  {
117
- const char *hexdig = "0123456789abcdef";
118
- char scratch[12] = { '\\', 'u', 0, 0, 0, 0, '\\', 'u' };
119
-
120
- const char *ptr = RSTRING_PTR(str);
121
- unsigned long len = RSTRING_LEN(str);
122
-
123
- unsigned long beg = 0, pos = 0;
124
-
125
- #define FLUSH_POS(bytes) if (pos > beg) { fbuffer_append(out_buffer, &ptr[beg], pos - beg); } pos += bytes; beg = pos;
126
-
127
- while (pos < len) {
128
- unsigned char ch = ptr[pos];
129
- unsigned char ch_len = escape_table[ch];
130
- /* JSON encoding */
188
+ while (search_escape_basic(search)) {
189
+ escape_UTF8_char_basic(search);
190
+ }
191
+ }
131
192
 
132
- if (RB_UNLIKELY(ch_len)) {
133
- switch (ch_len) {
134
- case 1: {
135
- FLUSH_POS(1);
136
- switch (ch) {
137
- case '"': fbuffer_append(out_buffer, "\\\"", 2); break;
138
- case '\\': fbuffer_append(out_buffer, "\\\\", 2); break;
139
- case '/': fbuffer_append(out_buffer, "\\/", 2); break;
140
- case '\b': fbuffer_append(out_buffer, "\\b", 2); break;
141
- case '\f': fbuffer_append(out_buffer, "\\f", 2); break;
142
- case '\n': fbuffer_append(out_buffer, "\\n", 2); break;
143
- case '\r': fbuffer_append(out_buffer, "\\r", 2); break;
144
- case '\t': fbuffer_append(out_buffer, "\\t", 2); break;
145
- default: {
146
- scratch[2] = '0';
147
- scratch[3] = '0';
148
- scratch[4] = hexdig[(ch >> 4) & 0xf];
149
- scratch[5] = hexdig[ch & 0xf];
150
- fbuffer_append(out_buffer, scratch, 6);
151
- break;
152
- }
153
- }
193
+ static inline void escape_UTF8_char(search_state *search, unsigned char ch_len) {
194
+ const unsigned char ch = (unsigned char)*search->ptr;
195
+ switch (ch_len) {
196
+ case 1: {
197
+ switch (ch) {
198
+ case '"': fbuffer_append(search->buffer, "\\\"", 2); break;
199
+ case '\\': fbuffer_append(search->buffer, "\\\\", 2); break;
200
+ case '/': fbuffer_append(search->buffer, "\\/", 2); break;
201
+ case '\b': fbuffer_append(search->buffer, "\\b", 2); break;
202
+ case '\f': fbuffer_append(search->buffer, "\\f", 2); break;
203
+ case '\n': fbuffer_append(search->buffer, "\\n", 2); break;
204
+ case '\r': fbuffer_append(search->buffer, "\\r", 2); break;
205
+ case '\t': fbuffer_append(search->buffer, "\\t", 2); break;
206
+ default: {
207
+ const char *hexdig = "0123456789abcdef";
208
+ char scratch[6] = { '\\', 'u', '0', '0', 0, 0 };
209
+ scratch[4] = hexdig[(ch >> 4) & 0xf];
210
+ scratch[5] = hexdig[ch & 0xf];
211
+ fbuffer_append(search->buffer, scratch, 6);
154
212
  break;
155
213
  }
156
- case 3: {
157
- unsigned char b2 = ptr[pos + 1];
158
- if (RB_UNLIKELY(out_script_safe && ch == 0xE2 && b2 == 0x80)) {
159
- unsigned char b3 = ptr[pos + 2];
160
- if (b3 == 0xA8) {
161
- FLUSH_POS(3);
162
- fbuffer_append(out_buffer, "\\u2028", 6);
163
- break;
164
- } else if (b3 == 0xA9) {
165
- FLUSH_POS(3);
166
- fbuffer_append(out_buffer, "\\u2029", 6);
167
- break;
168
- }
169
- }
170
- // fallthrough
171
- }
172
- default:
173
- pos += ch_len;
174
- break;
175
214
  }
176
- } else {
177
- pos++;
215
+ break;
216
+ }
217
+ case 3: {
218
+ if (search->ptr[2] & 1) {
219
+ fbuffer_append(search->buffer, "\\u2029", 6);
220
+ } else {
221
+ fbuffer_append(search->buffer, "\\u2028", 6);
222
+ }
223
+ break;
178
224
  }
179
225
  }
180
- #undef FLUSH_POS
181
-
182
- if (beg < len) {
183
- fbuffer_append(out_buffer, &ptr[beg], len - beg);
184
- }
185
-
186
- RB_GC_GUARD(str);
226
+ search->cursor = (search->ptr += ch_len);
187
227
  }
188
228
 
189
- static const char escape_table[256] = {
190
- // ASCII Control Characters
191
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
192
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
193
- // ASCII Characters
194
- 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0, // '"'
195
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
196
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
197
- 0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0, // '\\'
198
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
199
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
200
- // Continuation byte
201
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
202
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
203
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
204
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
205
- // First byte of a 2-byte code point
206
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
207
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
208
- // First byte of a 4-byte code point
209
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
210
- //First byte of a 4+byte code point
211
- 4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1,
212
- };
213
-
214
- static const char script_safe_escape_table[256] = {
229
+ static const unsigned char script_safe_escape_table[256] = {
215
230
  // ASCII Control Characters
216
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
217
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
231
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
232
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
218
233
  // ASCII Characters
219
- 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1, // '"' and '/'
220
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
221
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
222
- 0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0, // '\\'
223
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
224
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
234
+ 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, // '"' and '/'
235
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
236
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
237
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, // '\\'
238
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
239
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
225
240
  // Continuation byte
226
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
227
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
228
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
229
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
241
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
242
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
243
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
244
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
230
245
  // First byte of a 2-byte code point
231
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
232
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
233
- // First byte of a 4-byte code point
234
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
235
- //First byte of a 4+byte code point
236
- 4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1,
246
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
247
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
248
+ // First byte of a 3-byte code point
249
+ 3, 3,11, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xE2 is the start of \u2028 and \u2029
250
+ //First byte of a 4+ byte code point
251
+ 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 9, 9,
237
252
  };
238
253
 
239
- static void convert_ASCII_to_JSON(FBuffer *out_buffer, VALUE str, const char escape_table[256])
254
+ static inline unsigned char search_script_safe_escape(search_state *search)
240
255
  {
241
- const char *hexdig = "0123456789abcdef";
242
- char scratch[12] = { '\\', 'u', 0, 0, 0, 0, '\\', 'u' };
256
+ while (search->ptr < search->end) {
257
+ unsigned char ch = (unsigned char)*search->ptr;
258
+ unsigned char ch_len = script_safe_escape_table[ch];
243
259
 
244
- const char *ptr = RSTRING_PTR(str);
245
- unsigned long len = RSTRING_LEN(str);
246
-
247
- unsigned long beg = 0, pos;
248
-
249
- for (pos = 0; pos < len;) {
250
- unsigned char ch = ptr[pos];
251
- /* JSON encoding */
252
- if (escape_table[ch]) {
253
- if (pos > beg) {
254
- fbuffer_append(out_buffer, &ptr[beg], pos - beg);
255
- }
256
-
257
- beg = pos + 1;
258
- switch (ch) {
259
- case '"': fbuffer_append(out_buffer, "\\\"", 2); break;
260
- case '\\': fbuffer_append(out_buffer, "\\\\", 2); break;
261
- case '/': fbuffer_append(out_buffer, "\\/", 2); break;
262
- case '\b': fbuffer_append(out_buffer, "\\b", 2); break;
263
- case '\f': fbuffer_append(out_buffer, "\\f", 2); break;
264
- case '\n': fbuffer_append(out_buffer, "\\n", 2); break;
265
- case '\r': fbuffer_append(out_buffer, "\\r", 2); break;
266
- case '\t': fbuffer_append(out_buffer, "\\t", 2); break;
267
- default:
268
- scratch[2] = '0';
269
- scratch[3] = '0';
270
- scratch[4] = hexdig[(ch >> 4) & 0xf];
271
- scratch[5] = hexdig[ch & 0xf];
272
- fbuffer_append(out_buffer, scratch, 6);
260
+ if (RB_UNLIKELY(ch_len)) {
261
+ if (ch_len & ESCAPE_MASK) {
262
+ if (RB_UNLIKELY(ch_len == 11)) {
263
+ const unsigned char *uptr = (const unsigned char *)search->ptr;
264
+ if (!(uptr[1] == 0x80 && (uptr[2] >> 1) == 0x54)) {
265
+ search->ptr += 3;
266
+ continue;
267
+ }
268
+ }
269
+ search_flush(search);
270
+ return ch_len & CHAR_LENGTH_MASK;
271
+ } else {
272
+ search->ptr += ch_len;
273
273
  }
274
+ } else {
275
+ search->ptr++;
274
276
  }
275
-
276
- pos++;
277
277
  }
278
-
279
- if (beg < len) {
280
- fbuffer_append(out_buffer, &ptr[beg], len - beg);
281
- }
282
-
283
- RB_GC_GUARD(str);
278
+ search_flush(search);
279
+ return 0;
284
280
  }
285
281
 
286
- static void convert_UTF8_to_ASCII_only_JSON(FBuffer *out_buffer, VALUE str, const char escape_table[256], bool out_script_safe)
282
+ static void convert_UTF8_to_script_safe_JSON(search_state *search)
287
283
  {
288
- const char *hexdig = "0123456789abcdef";
289
- char scratch[12] = { '\\', 'u', 0, 0, 0, 0, '\\', 'u' };
290
-
291
- const char *ptr = RSTRING_PTR(str);
292
- unsigned long len = RSTRING_LEN(str);
293
-
294
- unsigned long beg = 0, pos = 0;
284
+ unsigned char ch_len;
285
+ while ((ch_len = search_script_safe_escape(search))) {
286
+ escape_UTF8_char(search, ch_len);
287
+ }
288
+ }
295
289
 
296
- #define FLUSH_POS(bytes) if (pos > beg) { fbuffer_append(out_buffer, &ptr[beg], pos - beg); } pos += bytes; beg = pos;
290
+ static const unsigned char ascii_only_escape_table[256] = {
291
+ // ASCII Control Characters
292
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
293
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
294
+ // ASCII Characters
295
+ 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // '"'
296
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
297
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
298
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, // '\\'
299
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
300
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
301
+ // Continuation byte
302
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
303
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
304
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
305
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
306
+ // First byte of a 2-byte code point
307
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
308
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
309
+ // First byte of a 3-byte code point
310
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
311
+ //First byte of a 4+ byte code point
312
+ 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 9, 9,
313
+ };
297
314
 
298
- while (pos < len) {
299
- unsigned char ch = ptr[pos];
315
+ static inline unsigned char search_ascii_only_escape(search_state *search, const unsigned char escape_table[256])
316
+ {
317
+ while (search->ptr < search->end) {
318
+ unsigned char ch = (unsigned char)*search->ptr;
300
319
  unsigned char ch_len = escape_table[ch];
301
320
 
302
321
  if (RB_UNLIKELY(ch_len)) {
303
- switch (ch_len) {
304
- case 1: {
305
- FLUSH_POS(1);
306
- switch (ch) {
307
- case '"': fbuffer_append(out_buffer, "\\\"", 2); break;
308
- case '\\': fbuffer_append(out_buffer, "\\\\", 2); break;
309
- case '/': fbuffer_append(out_buffer, "\\/", 2); break;
310
- case '\b': fbuffer_append(out_buffer, "\\b", 2); break;
311
- case '\f': fbuffer_append(out_buffer, "\\f", 2); break;
312
- case '\n': fbuffer_append(out_buffer, "\\n", 2); break;
313
- case '\r': fbuffer_append(out_buffer, "\\r", 2); break;
314
- case '\t': fbuffer_append(out_buffer, "\\t", 2); break;
315
- default: {
316
- scratch[2] = '0';
317
- scratch[3] = '0';
318
- scratch[4] = hexdig[(ch >> 4) & 0xf];
319
- scratch[5] = hexdig[ch & 0xf];
320
- fbuffer_append(out_buffer, scratch, 6);
321
- break;
322
- }
323
- }
322
+ search_flush(search);
323
+ return ch_len & CHAR_LENGTH_MASK;
324
+ } else {
325
+ search->ptr++;
326
+ }
327
+ }
328
+ search_flush(search);
329
+ return 0;
330
+ }
331
+
332
+ static inline void full_escape_UTF8_char(search_state *search, unsigned char ch_len) {
333
+ const unsigned char ch = (unsigned char)*search->ptr;
334
+ switch (ch_len) {
335
+ case 1: {
336
+ switch (ch) {
337
+ case '"': fbuffer_append(search->buffer, "\\\"", 2); break;
338
+ case '\\': fbuffer_append(search->buffer, "\\\\", 2); break;
339
+ case '/': fbuffer_append(search->buffer, "\\/", 2); break;
340
+ case '\b': fbuffer_append(search->buffer, "\\b", 2); break;
341
+ case '\f': fbuffer_append(search->buffer, "\\f", 2); break;
342
+ case '\n': fbuffer_append(search->buffer, "\\n", 2); break;
343
+ case '\r': fbuffer_append(search->buffer, "\\r", 2); break;
344
+ case '\t': fbuffer_append(search->buffer, "\\t", 2); break;
345
+ default: {
346
+ const char *hexdig = "0123456789abcdef";
347
+ char scratch[6] = { '\\', 'u', '0', '0', 0, 0 };
348
+ scratch[4] = hexdig[(ch >> 4) & 0xf];
349
+ scratch[5] = hexdig[ch & 0xf];
350
+ fbuffer_append(search->buffer, scratch, 6);
324
351
  break;
325
352
  }
326
- default: {
327
- uint32_t wchar = 0;
328
- switch(ch_len) {
329
- case 2:
330
- wchar = ptr[pos] & 0x1F;
331
- break;
332
- case 3:
333
- wchar = ptr[pos] & 0x0F;
334
- break;
335
- case 4:
336
- wchar = ptr[pos] & 0x07;
337
- break;
338
- }
353
+ }
354
+ break;
355
+ }
356
+ default: {
357
+ const char *hexdig = "0123456789abcdef";
358
+ char scratch[12] = { '\\', 'u', 0, 0, 0, 0, '\\', 'u' };
339
359
 
340
- for (short i = 1; i < ch_len; i++) {
341
- wchar = (wchar << 6) | (ptr[pos+i] & 0x3F);
342
- }
360
+ uint32_t wchar = 0;
343
361
 
344
- FLUSH_POS(ch_len);
362
+ switch(ch_len) {
363
+ case 2:
364
+ wchar = ch & 0x1F;
365
+ break;
366
+ case 3:
367
+ wchar = ch & 0x0F;
368
+ break;
369
+ case 4:
370
+ wchar = ch & 0x07;
371
+ break;
372
+ }
345
373
 
346
- if (wchar <= 0xFFFF) {
347
- scratch[2] = hexdig[wchar >> 12];
348
- scratch[3] = hexdig[(wchar >> 8) & 0xf];
349
- scratch[4] = hexdig[(wchar >> 4) & 0xf];
350
- scratch[5] = hexdig[wchar & 0xf];
351
- fbuffer_append(out_buffer, scratch, 6);
352
- } else {
353
- uint16_t hi, lo;
354
- wchar -= 0x10000;
355
- hi = 0xD800 + (uint16_t)(wchar >> 10);
356
- lo = 0xDC00 + (uint16_t)(wchar & 0x3FF);
357
-
358
- scratch[2] = hexdig[hi >> 12];
359
- scratch[3] = hexdig[(hi >> 8) & 0xf];
360
- scratch[4] = hexdig[(hi >> 4) & 0xf];
361
- scratch[5] = hexdig[hi & 0xf];
362
-
363
- scratch[8] = hexdig[lo >> 12];
364
- scratch[9] = hexdig[(lo >> 8) & 0xf];
365
- scratch[10] = hexdig[(lo >> 4) & 0xf];
366
- scratch[11] = hexdig[lo & 0xf];
367
-
368
- fbuffer_append(out_buffer, scratch, 12);
369
- }
374
+ for (short i = 1; i < ch_len; i++) {
375
+ wchar = (wchar << 6) | (search->ptr[i] & 0x3F);
376
+ }
370
377
 
371
- break;
372
- }
378
+ if (wchar <= 0xFFFF) {
379
+ scratch[2] = hexdig[wchar >> 12];
380
+ scratch[3] = hexdig[(wchar >> 8) & 0xf];
381
+ scratch[4] = hexdig[(wchar >> 4) & 0xf];
382
+ scratch[5] = hexdig[wchar & 0xf];
383
+ fbuffer_append(search->buffer, scratch, 6);
384
+ } else {
385
+ uint16_t hi, lo;
386
+ wchar -= 0x10000;
387
+ hi = 0xD800 + (uint16_t)(wchar >> 10);
388
+ lo = 0xDC00 + (uint16_t)(wchar & 0x3FF);
389
+
390
+ scratch[2] = hexdig[hi >> 12];
391
+ scratch[3] = hexdig[(hi >> 8) & 0xf];
392
+ scratch[4] = hexdig[(hi >> 4) & 0xf];
393
+ scratch[5] = hexdig[hi & 0xf];
394
+
395
+ scratch[8] = hexdig[lo >> 12];
396
+ scratch[9] = hexdig[(lo >> 8) & 0xf];
397
+ scratch[10] = hexdig[(lo >> 4) & 0xf];
398
+ scratch[11] = hexdig[lo & 0xf];
399
+
400
+ fbuffer_append(search->buffer, scratch, 12);
373
401
  }
374
- } else {
375
- pos++;
402
+
403
+ break;
376
404
  }
377
405
  }
378
- #undef FLUSH_POS
406
+ search->cursor = (search->ptr += ch_len);
407
+ }
379
408
 
380
- if (beg < len) {
381
- fbuffer_append(out_buffer, &ptr[beg], len - beg);
409
+ static void convert_UTF8_to_ASCII_only_JSON(search_state *search, const unsigned char escape_table[256])
410
+ {
411
+ unsigned char ch_len;
412
+ while ((ch_len = search_ascii_only_escape(search, escape_table))) {
413
+ full_escape_UTF8_char(search, ch_len);
382
414
  }
383
-
384
- RB_GC_GUARD(str);
385
415
  }
386
416
 
387
417
  /*
@@ -674,6 +704,7 @@ static void State_mark(void *ptr)
674
704
  rb_gc_mark_movable(state->space_before);
675
705
  rb_gc_mark_movable(state->object_nl);
676
706
  rb_gc_mark_movable(state->array_nl);
707
+ rb_gc_mark_movable(state->as_json);
677
708
  }
678
709
 
679
710
  static void State_compact(void *ptr)
@@ -684,6 +715,7 @@ static void State_compact(void *ptr)
684
715
  state->space_before = rb_gc_location(state->space_before);
685
716
  state->object_nl = rb_gc_location(state->object_nl);
686
717
  state->array_nl = rb_gc_location(state->array_nl);
718
+ state->as_json = rb_gc_location(state->as_json);
687
719
  }
688
720
 
689
721
  static void State_free(void *ptr)
@@ -740,6 +772,7 @@ static void vstate_spill(struct generate_json_data *data)
740
772
  RB_OBJ_WRITTEN(vstate, Qundef, state->space_before);
741
773
  RB_OBJ_WRITTEN(vstate, Qundef, state->object_nl);
742
774
  RB_OBJ_WRITTEN(vstate, Qundef, state->array_nl);
775
+ RB_OBJ_WRITTEN(vstate, Qundef, state->as_json);
743
776
  }
744
777
 
745
778
  static inline VALUE vstate_get(struct generate_json_data *data)
@@ -808,15 +841,19 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
808
841
  return ST_CONTINUE;
809
842
  }
810
843
 
811
- static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj)
844
+ static inline long increase_depth(JSON_Generator_State *state)
812
845
  {
813
- long max_nesting = state->max_nesting;
814
846
  long depth = ++state->depth;
815
- int j;
816
-
817
- if (max_nesting != 0 && depth > max_nesting) {
847
+ if (RB_UNLIKELY(depth > state->max_nesting && state->max_nesting)) {
818
848
  rb_raise(eNestingError, "nesting of %ld is too deep", --state->depth);
819
849
  }
850
+ return depth;
851
+ }
852
+
853
+ static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj)
854
+ {
855
+ int j;
856
+ long depth = increase_depth(state);
820
857
 
821
858
  if (RHASH_SIZE(obj) == 0) {
822
859
  fbuffer_append(buffer, "{}", 2);
@@ -846,12 +883,8 @@ static void generate_json_object(FBuffer *buffer, struct generate_json_data *dat
846
883
 
847
884
  static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj)
848
885
  {
849
- long max_nesting = state->max_nesting;
850
- long depth = ++state->depth;
851
886
  int i, j;
852
- if (max_nesting != 0 && depth > max_nesting) {
853
- rb_raise(eNestingError, "nesting of %ld is too deep", --state->depth);
854
- }
887
+ long depth = increase_depth(state);
855
888
 
856
889
  if (RARRAY_LEN(obj) == 0) {
857
890
  fbuffer_append(buffer, "[]", 2);
@@ -933,15 +966,22 @@ static void generate_json_string(FBuffer *buffer, struct generate_json_data *dat
933
966
 
934
967
  fbuffer_append_char(buffer, '"');
935
968
 
969
+ long len;
970
+ search_state search;
971
+ search.buffer = buffer;
972
+ RSTRING_GETMEM(obj, search.ptr, len);
973
+ search.cursor = search.ptr;
974
+ search.end = search.ptr + len;
975
+
936
976
  switch(rb_enc_str_coderange(obj)) {
937
977
  case ENC_CODERANGE_7BIT:
938
- convert_ASCII_to_JSON(buffer, obj, state->script_safe ? script_safe_escape_table : escape_table);
939
- break;
940
978
  case ENC_CODERANGE_VALID:
941
979
  if (RB_UNLIKELY(state->ascii_only)) {
942
- convert_UTF8_to_ASCII_only_JSON(buffer, obj, state->script_safe ? script_safe_escape_table : escape_table, state->script_safe);
980
+ convert_UTF8_to_ASCII_only_JSON(&search, state->script_safe ? script_safe_escape_table : ascii_only_escape_table);
981
+ } else if (RB_UNLIKELY(state->script_safe)) {
982
+ convert_UTF8_to_script_safe_JSON(&search);
943
983
  } else {
944
- convert_UTF8_to_JSON(buffer, obj, state->script_safe ? script_safe_escape_table : escape_table, state->script_safe);
984
+ convert_UTF8_to_JSON(&search);
945
985
  }
946
986
  break;
947
987
  default:
@@ -951,6 +991,29 @@ static void generate_json_string(FBuffer *buffer, struct generate_json_data *dat
951
991
  fbuffer_append_char(buffer, '"');
952
992
  }
953
993
 
994
+ static void generate_json_fallback(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj)
995
+ {
996
+ VALUE tmp;
997
+ if (rb_respond_to(obj, i_to_json)) {
998
+ tmp = rb_funcall(obj, i_to_json, 1, vstate_get(data));
999
+ Check_Type(tmp, T_STRING);
1000
+ fbuffer_append_str(buffer, tmp);
1001
+ } else {
1002
+ tmp = rb_funcall(obj, i_to_s, 0);
1003
+ Check_Type(tmp, T_STRING);
1004
+ generate_json_string(buffer, data, state, tmp);
1005
+ }
1006
+ }
1007
+
1008
+ static inline void generate_json_symbol(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj)
1009
+ {
1010
+ if (state->strict) {
1011
+ generate_json_string(buffer, data, state, rb_sym2str(obj));
1012
+ } else {
1013
+ generate_json_fallback(buffer, data, state, obj);
1014
+ }
1015
+ }
1016
+
954
1017
  static void generate_json_null(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj)
955
1018
  {
956
1019
  fbuffer_append(buffer, "null", 4);
@@ -991,18 +1054,34 @@ static void generate_json_float(FBuffer *buffer, struct generate_json_data *data
991
1054
  {
992
1055
  double value = RFLOAT_VALUE(obj);
993
1056
  char allow_nan = state->allow_nan;
994
- VALUE tmp = rb_funcall(obj, i_to_s, 0);
995
1057
  if (!allow_nan) {
996
1058
  if (isinf(value) || isnan(value)) {
997
- raise_generator_error(obj, "%"PRIsVALUE" not allowed in JSON", tmp);
1059
+ if (state->strict && state->as_json) {
1060
+ VALUE casted_obj = rb_proc_call_with_block(state->as_json, 1, &obj, Qnil);
1061
+ if (casted_obj != obj) {
1062
+ increase_depth(state);
1063
+ generate_json(buffer, data, state, casted_obj);
1064
+ state->depth--;
1065
+ return;
1066
+ }
1067
+ }
1068
+ raise_generator_error(obj, "%"PRIsVALUE" not allowed in JSON", rb_funcall(obj, i_to_s, 0));
998
1069
  }
999
1070
  }
1000
- fbuffer_append_str(buffer, tmp);
1071
+ fbuffer_append_str(buffer, rb_funcall(obj, i_to_s, 0));
1072
+ }
1073
+
1074
+ static void generate_json_fragment(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj)
1075
+ {
1076
+ VALUE fragment = RSTRUCT_GET(obj, 0);
1077
+ Check_Type(fragment, T_STRING);
1078
+ fbuffer_append_str(buffer, fragment);
1001
1079
  }
1002
1080
 
1003
1081
  static void generate_json(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj)
1004
1082
  {
1005
- VALUE tmp;
1083
+ bool as_json_called = false;
1084
+ start:
1006
1085
  if (obj == Qnil) {
1007
1086
  generate_json_null(buffer, data, state, obj);
1008
1087
  } else if (obj == Qfalse) {
@@ -1014,6 +1093,8 @@ static void generate_json(FBuffer *buffer, struct generate_json_data *data, JSON
1014
1093
  generate_json_fixnum(buffer, data, state, obj);
1015
1094
  } else if (RB_FLONUM_P(obj)) {
1016
1095
  generate_json_float(buffer, data, state, obj);
1096
+ } else if (RB_STATIC_SYM_P(obj)) {
1097
+ generate_json_symbol(buffer, data, state, obj);
1017
1098
  } else {
1018
1099
  goto general;
1019
1100
  }
@@ -1035,22 +1116,29 @@ static void generate_json(FBuffer *buffer, struct generate_json_data *data, JSON
1035
1116
  if (klass != rb_cString) goto general;
1036
1117
  generate_json_string(buffer, data, state, obj);
1037
1118
  break;
1119
+ case T_SYMBOL:
1120
+ generate_json_symbol(buffer, data, state, obj);
1121
+ break;
1038
1122
  case T_FLOAT:
1039
1123
  if (klass != rb_cFloat) goto general;
1040
1124
  generate_json_float(buffer, data, state, obj);
1041
1125
  break;
1126
+ case T_STRUCT:
1127
+ if (klass != cFragment) goto general;
1128
+ generate_json_fragment(buffer, data, state, obj);
1129
+ break;
1042
1130
  default:
1043
1131
  general:
1044
1132
  if (state->strict) {
1045
- raise_generator_error(obj, "%"PRIsVALUE" not allowed in JSON", CLASS_OF(obj));
1046
- } else if (rb_respond_to(obj, i_to_json)) {
1047
- tmp = rb_funcall(obj, i_to_json, 1, vstate_get(data));
1048
- Check_Type(tmp, T_STRING);
1049
- fbuffer_append_str(buffer, tmp);
1133
+ if (RTEST(state->as_json) && !as_json_called) {
1134
+ obj = rb_proc_call_with_block(state->as_json, 1, &obj, Qnil);
1135
+ as_json_called = true;
1136
+ goto start;
1137
+ } else {
1138
+ raise_generator_error(obj, "%"PRIsVALUE" not allowed in JSON", CLASS_OF(obj));
1139
+ }
1050
1140
  } else {
1051
- tmp = rb_funcall(obj, i_to_s, 0);
1052
- Check_Type(tmp, T_STRING);
1053
- generate_json_string(buffer, data, state, tmp);
1141
+ generate_json_fallback(buffer, data, state, obj);
1054
1142
  }
1055
1143
  }
1056
1144
  }
@@ -1097,8 +1185,19 @@ static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func func,
1097
1185
  return fbuffer_finalize(&buffer);
1098
1186
  }
1099
1187
 
1100
- static VALUE cState_generate(VALUE self, VALUE obj, VALUE io)
1188
+ /* call-seq:
1189
+ * generate(obj) -> String
1190
+ * generate(obj, anIO) -> anIO
1191
+ *
1192
+ * Generates a valid JSON document from object +obj+ and returns the
1193
+ * result. If no valid JSON document can be created this method raises a
1194
+ * GeneratorError exception.
1195
+ */
1196
+ static VALUE cState_generate(int argc, VALUE *argv, VALUE self)
1101
1197
  {
1198
+ rb_check_arity(argc, 1, 2);
1199
+ VALUE obj = argv[0];
1200
+ VALUE io = argc > 1 ? argv[1] : Qnil;
1102
1201
  VALUE result = cState_partial_generate(self, obj, generate_json, io);
1103
1202
  GET_STATE(self);
1104
1203
  (void)state;
@@ -1132,6 +1231,7 @@ static VALUE cState_init_copy(VALUE obj, VALUE orig)
1132
1231
  objState->space_before = origState->space_before;
1133
1232
  objState->object_nl = origState->object_nl;
1134
1233
  objState->array_nl = origState->array_nl;
1234
+ objState->as_json = origState->as_json;
1135
1235
  return obj;
1136
1236
  }
1137
1237
 
@@ -1283,6 +1383,28 @@ static VALUE cState_array_nl_set(VALUE self, VALUE array_nl)
1283
1383
  return Qnil;
1284
1384
  }
1285
1385
 
1386
+ /*
1387
+ * call-seq: as_json()
1388
+ *
1389
+ * This string is put at the end of a line that holds a JSON array.
1390
+ */
1391
+ static VALUE cState_as_json(VALUE self)
1392
+ {
1393
+ GET_STATE(self);
1394
+ return state->as_json;
1395
+ }
1396
+
1397
+ /*
1398
+ * call-seq: as_json=(as_json)
1399
+ *
1400
+ * This string is put at the end of a line that holds a JSON array.
1401
+ */
1402
+ static VALUE cState_as_json_set(VALUE self, VALUE as_json)
1403
+ {
1404
+ GET_STATE(self);
1405
+ RB_OBJ_WRITE(self, &state->as_json, rb_convert_type(as_json, T_DATA, "Proc", "to_proc"));
1406
+ return Qnil;
1407
+ }
1286
1408
 
1287
1409
  /*
1288
1410
  * call-seq: check_circular?
@@ -1504,6 +1626,7 @@ static int configure_state_i(VALUE key, VALUE val, VALUE _arg)
1504
1626
  else if (key == sym_script_safe) { state->script_safe = RTEST(val); }
1505
1627
  else if (key == sym_escape_slash) { state->script_safe = RTEST(val); }
1506
1628
  else if (key == sym_strict) { state->strict = RTEST(val); }
1629
+ else if (key == sym_as_json) { state->as_json = RTEST(val) ? rb_convert_type(val, T_DATA, "Proc", "to_proc") : Qfalse; }
1507
1630
  return ST_CONTINUE;
1508
1631
  }
1509
1632
 
@@ -1564,6 +1687,10 @@ void Init_generator(void)
1564
1687
  rb_require("json/common");
1565
1688
 
1566
1689
  mJSON = rb_define_module("JSON");
1690
+
1691
+ rb_global_variable(&cFragment);
1692
+ cFragment = rb_const_get(mJSON, rb_intern("Fragment"));
1693
+
1567
1694
  VALUE mExt = rb_define_module_under(mJSON, "Ext");
1568
1695
  VALUE mGenerator = rb_define_module_under(mExt, "Generator");
1569
1696
 
@@ -1591,6 +1718,8 @@ void Init_generator(void)
1591
1718
  rb_define_method(cState, "object_nl=", cState_object_nl_set, 1);
1592
1719
  rb_define_method(cState, "array_nl", cState_array_nl, 0);
1593
1720
  rb_define_method(cState, "array_nl=", cState_array_nl_set, 1);
1721
+ rb_define_method(cState, "as_json", cState_as_json, 0);
1722
+ rb_define_method(cState, "as_json=", cState_as_json_set, 1);
1594
1723
  rb_define_method(cState, "max_nesting", cState_max_nesting, 0);
1595
1724
  rb_define_method(cState, "max_nesting=", cState_max_nesting_set, 1);
1596
1725
  rb_define_method(cState, "script_safe", cState_script_safe, 0);
@@ -1611,7 +1740,8 @@ void Init_generator(void)
1611
1740
  rb_define_method(cState, "depth=", cState_depth_set, 1);
1612
1741
  rb_define_method(cState, "buffer_initial_length", cState_buffer_initial_length, 0);
1613
1742
  rb_define_method(cState, "buffer_initial_length=", cState_buffer_initial_length_set, 1);
1614
- rb_define_private_method(cState, "_generate", cState_generate, 2);
1743
+ rb_define_method(cState, "generate", cState_generate, -1);
1744
+ rb_define_alias(cState, "generate_new", "generate"); // :nodoc:
1615
1745
 
1616
1746
  rb_define_singleton_method(cState, "generate", cState_m_generate, 3);
1617
1747
 
@@ -1682,6 +1812,7 @@ void Init_generator(void)
1682
1812
  sym_script_safe = ID2SYM(rb_intern("script_safe"));
1683
1813
  sym_escape_slash = ID2SYM(rb_intern("escape_slash"));
1684
1814
  sym_strict = ID2SYM(rb_intern("strict"));
1815
+ sym_as_json = ID2SYM(rb_intern("as_json"));
1685
1816
 
1686
1817
  usascii_encindex = rb_usascii_encindex();
1687
1818
  utf8_encindex = rb_utf8_encindex();