json 2.7.1 → 2.7.3.rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,332 +1,306 @@
1
1
  #include "../fbuffer/fbuffer.h"
2
2
  #include "generator.h"
3
3
 
4
- static VALUE mJSON, mExt, mGenerator, cState, mGeneratorMethods, mObject,
5
- mHash, mArray,
6
- #ifdef RUBY_INTEGER_UNIFICATION
7
- mInteger,
8
- #else
9
- mFixnum, mBignum,
4
+ #ifndef RB_UNLIKELY
5
+ #define RB_UNLIKELY(cond) (cond)
10
6
  #endif
11
- mFloat, mString, mString_Extend,
12
- mTrueClass, mFalseClass, mNilClass, eGeneratorError,
13
- eNestingError;
14
7
 
15
- static ID i_to_s, i_to_json, i_new, i_indent, i_space, i_space_before,
16
- i_object_nl, i_array_nl, i_max_nesting, i_allow_nan, i_ascii_only,
17
- i_pack, i_unpack, i_create_id, i_extend, i_key_p,
18
- i_aref, i_send, i_respond_to_p, i_match, i_keys, i_depth,
19
- i_buffer_initial_length, i_dup, i_script_safe, i_escape_slash, i_strict;
8
+ static VALUE mJSON, cState, mString_Extend, eGeneratorError, eNestingError, Encoding_UTF_8;
20
9
 
21
- /*
22
- * Copyright 2001-2004 Unicode, Inc.
10
+ static ID i_to_s, i_to_json, i_new, i_pack, i_unpack, i_create_id, i_extend, i_encode;
11
+
12
+ /* Converts in_string to a JSON string (without the wrapping '"'
13
+ * characters) in FBuffer out_buffer.
23
14
  *
24
- * Disclaimer
15
+ * Character are JSON-escaped according to:
25
16
  *
26
- * This source code is provided as is by Unicode, Inc. No claims are
27
- * made as to fitness for any particular purpose. No warranties of any
28
- * kind are expressed or implied. The recipient agrees to determine
29
- * applicability of information provided. If this file has been
30
- * purchased on magnetic or optical media from Unicode, Inc., the
31
- * sole remedy for any claim will be exchange of defective media
32
- * within 90 days of receipt.
17
+ * - Always: ASCII control characters (0x00-0x1F), dquote, and
18
+ * backslash.
33
19
  *
34
- * Limitations on Rights to Redistribute This Code
20
+ * - If out_ascii_only: non-ASCII characters (>0x7F)
35
21
  *
36
- * Unicode, Inc. hereby grants the right to freely use the information
37
- * supplied in this file in the creation of products supporting the
38
- * Unicode Standard, and to make copies of this file in any form
39
- * for internal or external distribution as long as this notice
40
- * remains attached.
22
+ * - If out_script_safe: forwardslash, line separator (U+2028), and
23
+ * paragraph separator (U+2029)
24
+ *
25
+ * Everything else (should be UTF-8) is just passed through and
26
+ * appended to the result.
41
27
  */
28
+ static void convert_UTF8_to_JSON(FBuffer *out_buffer, VALUE str, const char escape_table[256], bool out_script_safe)
29
+ {
30
+ const char *hexdig = "0123456789abcdef";
31
+ char scratch[12] = { '\\', 'u', 0, 0, 0, 0, '\\', 'u' };
42
32
 
43
- /*
44
- * Index into the table below with the first byte of a UTF-8 sequence to
45
- * get the number of trailing bytes that are supposed to follow it.
46
- * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is
47
- * left as-is for anyone who may want to do such conversion, which was
48
- * allowed in earlier algorithms.
49
- */
50
- static const char trailingBytesForUTF8[256] = {
51
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
52
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
53
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
54
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
55
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
56
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
57
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
58
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
59
- };
33
+ const char *ptr = RSTRING_PTR(str);
34
+ unsigned long len = RSTRING_LEN(str);
60
35
 
61
- /*
62
- * Magic values subtracted from a buffer value during UTF8 conversion.
63
- * This table contains as many values as there might be trailing bytes
64
- * in a UTF-8 sequence.
65
- */
66
- static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
67
- 0x03C82080UL, 0xFA082080UL, 0x82082080UL };
36
+ unsigned long beg = 0, pos = 0;
68
37
 
69
- /*
70
- * Utility routine to tell whether a sequence of bytes is legal UTF-8.
71
- * This must be called with the length pre-determined by the first byte.
72
- * If not calling this from ConvertUTF8to*, then the length can be set by:
73
- * length = trailingBytesForUTF8[*source]+1;
74
- * and the sequence is illegal right away if there aren't that many bytes
75
- * available.
76
- * If presented with a length > 4, this returns 0. The Unicode
77
- * definition of UTF-8 goes up to 4-byte sequences.
78
- */
79
- static unsigned char isLegalUTF8(const UTF8 *source, unsigned long length)
80
- {
81
- UTF8 a;
82
- const UTF8 *srcptr = source+length;
83
- switch (length) {
84
- default: return 0;
85
- /* Everything else falls through when "1"... */
86
- case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
87
- case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
88
- case 2: if ((a = (*--srcptr)) > 0xBF) return 0;
89
-
90
- switch (*source) {
91
- /* no fall-through in this inner switch */
92
- case 0xE0: if (a < 0xA0) return 0; break;
93
- case 0xED: if (a > 0x9F) return 0; break;
94
- case 0xF0: if (a < 0x90) return 0; break;
95
- case 0xF4: if (a > 0x8F) return 0; break;
96
- default: if (a < 0x80) return 0;
38
+ #define FLUSH_POS(bytes) if (pos > beg) { fbuffer_append(out_buffer, &ptr[beg], pos - beg); } pos += bytes; beg = pos;
39
+
40
+ while (pos < len) {
41
+ unsigned char ch = ptr[pos];
42
+ unsigned char ch_len = escape_table[ch];
43
+ /* JSON encoding */
44
+
45
+ if (RB_UNLIKELY(ch_len)) {
46
+ switch (ch_len) {
47
+ case 0:
48
+ pos++;
49
+ break;
50
+ case 1: {
51
+ FLUSH_POS(1);
52
+ switch (ch) {
53
+ case '"': fbuffer_append(out_buffer, "\\\"", 2); break;
54
+ case '\\': fbuffer_append(out_buffer, "\\\\", 2); break;
55
+ case '/': fbuffer_append(out_buffer, "\\/", 2); break;
56
+ case '\b': fbuffer_append(out_buffer, "\\b", 2); break;
57
+ case '\f': fbuffer_append(out_buffer, "\\f", 2); break;
58
+ case '\n': fbuffer_append(out_buffer, "\\n", 2); break;
59
+ case '\r': fbuffer_append(out_buffer, "\\r", 2); break;
60
+ case '\t': fbuffer_append(out_buffer, "\\t", 2); break;
61
+ default: {
62
+ scratch[2] = hexdig[ch >> 12];
63
+ scratch[3] = hexdig[(ch >> 8) & 0xf];
64
+ scratch[4] = hexdig[(ch >> 4) & 0xf];
65
+ scratch[5] = hexdig[ch & 0xf];
66
+ fbuffer_append(out_buffer, scratch, 6);
67
+ break;
68
+ }
69
+ }
70
+ break;
71
+ }
72
+ case 3: {
73
+ unsigned char b2 = ptr[pos + 1];
74
+ if (RB_UNLIKELY(out_script_safe && b2 == 0x80)) {
75
+ unsigned char b3 = ptr[pos + 2];
76
+ if (b3 == 0xA8) {
77
+ FLUSH_POS(3);
78
+ fbuffer_append(out_buffer, "\\u2028", 6);
79
+ break;
80
+ } else if (b3 == 0xA9) {
81
+ FLUSH_POS(3);
82
+ fbuffer_append(out_buffer, "\\u2029", 6);
83
+ break;
84
+ }
97
85
  }
86
+ // fallthrough
87
+ }
88
+ default:
89
+ pos += ch_len;
90
+ break;
91
+ }
92
+ } else {
93
+ pos++;
94
+ }
95
+ }
96
+ #undef FLUSH_POS
98
97
 
99
- case 1: if (*source >= 0x80 && *source < 0xC2) return 0;
98
+ if (beg < len) {
99
+ fbuffer_append(out_buffer, &ptr[beg], len - beg);
100
100
  }
101
- if (*source > 0xF4) return 0;
102
- return 1;
103
- }
104
101
 
105
- /* Escapes the UTF16 character and stores the result in the buffer buf. */
106
- static void unicode_escape(char *buf, UTF16 character)
107
- {
108
- const char *digits = "0123456789abcdef";
102
+ RB_GC_GUARD(str);
103
+ }
104
+
105
+ static const char escape_table[256] = {
106
+ // ASCII Control Characters
107
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
108
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
109
+ // ASCII Characters
110
+ 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0, // '"'
111
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
112
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
113
+ 0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0, // '\\'
114
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
115
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
116
+ // Continuation byte
117
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
118
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
119
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
120
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
121
+ // First byte of a 2-byte code point
122
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
123
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
124
+ // First byte of a 4-byte code point
125
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
126
+ //First byte of a 4+byte code point
127
+ 4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1,
128
+ };
109
129
 
110
- buf[2] = digits[character >> 12];
111
- buf[3] = digits[(character >> 8) & 0xf];
112
- buf[4] = digits[(character >> 4) & 0xf];
113
- buf[5] = digits[character & 0xf];
114
- }
130
+ static const char script_safe_escape_table[256] = {
131
+ // ASCII Control Characters
132
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
133
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
134
+ // ASCII Characters
135
+ 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1, // '"' and '/'
136
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
137
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
138
+ 0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0, // '\\'
139
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
140
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
141
+ // Continuation byte
142
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
143
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
144
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
145
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
146
+ // First byte of a 2-byte code point
147
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
148
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
149
+ // First byte of a 4-byte code point
150
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
151
+ //First byte of a 4+byte code point
152
+ 4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1,
153
+ };
115
154
 
116
- /* Escapes the UTF16 character and stores the result in the buffer buf, then
117
- * the buffer buf is appended to the FBuffer buffer. */
118
- static void unicode_escape_to_buffer(FBuffer *buffer, char buf[6], UTF16
119
- character)
155
+ static void convert_ASCII_to_JSON(FBuffer *out_buffer, VALUE str, const char escape_table[256])
120
156
  {
121
- unicode_escape(buf, character);
122
- fbuffer_append(buffer, buf, 6);
157
+ const char *hexdig = "0123456789abcdef";
158
+ char scratch[12] = { '\\', 'u', 0, 0, 0, 0, '\\', 'u' };
159
+
160
+ const char *ptr = RSTRING_PTR(str);
161
+ unsigned long len = RSTRING_LEN(str);
162
+
163
+ unsigned long beg = 0, pos;
164
+
165
+ for (pos = 0; pos < len;) {
166
+ unsigned char ch = ptr[pos];
167
+ /* JSON encoding */
168
+ if (escape_table[ch]) {
169
+ if (pos > beg) {
170
+ fbuffer_append(out_buffer, &ptr[beg], pos - beg);
171
+ }
172
+
173
+ beg = pos + 1;
174
+ switch (ch) {
175
+ case '"': fbuffer_append(out_buffer, "\\\"", 2); break;
176
+ case '\\': fbuffer_append(out_buffer, "\\\\", 2); break;
177
+ case '/': fbuffer_append(out_buffer, "\\/", 2); break;
178
+ case '\b': fbuffer_append(out_buffer, "\\b", 2); break;
179
+ case '\f': fbuffer_append(out_buffer, "\\f", 2); break;
180
+ case '\n': fbuffer_append(out_buffer, "\\n", 2); break;
181
+ case '\r': fbuffer_append(out_buffer, "\\r", 2); break;
182
+ case '\t': fbuffer_append(out_buffer, "\\t", 2); break;
183
+ default:
184
+ scratch[2] = hexdig[ch >> 12];
185
+ scratch[3] = hexdig[(ch >> 8) & 0xf];
186
+ scratch[4] = hexdig[(ch >> 4) & 0xf];
187
+ scratch[5] = hexdig[ch & 0xf];
188
+ fbuffer_append(out_buffer, scratch, 6);
189
+ }
190
+ }
191
+
192
+ pos++;
193
+ }
194
+
195
+ if (beg < len) {
196
+ fbuffer_append(out_buffer, &ptr[beg], len - beg);
197
+ }
198
+
199
+ RB_GC_GUARD(str);
123
200
  }
124
201
 
125
- /* Converts string to a JSON string in FBuffer buffer, where all but the ASCII
126
- * and control characters are JSON escaped. */
127
- static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string, char script_safe)
202
+ static void convert_UTF8_to_ASCII_only_JSON(FBuffer *out_buffer, VALUE str, const char escape_table[256], bool out_script_safe)
128
203
  {
129
- const UTF8 *source = (UTF8 *) RSTRING_PTR(string);
130
- const UTF8 *sourceEnd = source + RSTRING_LEN(string);
131
- char buf[6] = { '\\', 'u' };
204
+ const char *hexdig = "0123456789abcdef";
205
+ char scratch[12] = { '\\', 'u', 0, 0, 0, 0, '\\', 'u' };
132
206
 
133
- while (source < sourceEnd) {
134
- UTF32 ch = 0;
135
- unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
136
- if (source + extraBytesToRead >= sourceEnd) {
137
- rb_raise(rb_path2class("JSON::GeneratorError"),
138
- "partial character in source, but hit end");
139
- }
140
- if (!isLegalUTF8(source, extraBytesToRead+1)) {
141
- rb_raise(rb_path2class("JSON::GeneratorError"),
142
- "source sequence is illegal/malformed utf-8");
143
- }
144
- /*
145
- * The cases all fall through. See "Note A" below.
146
- */
147
- switch (extraBytesToRead) {
148
- case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
149
- case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
150
- case 3: ch += *source++; ch <<= 6;
151
- case 2: ch += *source++; ch <<= 6;
152
- case 1: ch += *source++; ch <<= 6;
153
- case 0: ch += *source++;
154
- }
155
- ch -= offsetsFromUTF8[extraBytesToRead];
156
-
157
- if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
158
- /* UTF-16 surrogate values are illegal in UTF-32 */
159
- if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
160
- #if UNI_STRICT_CONVERSION
161
- source -= (extraBytesToRead+1); /* return to the illegal value itself */
162
- rb_raise(rb_path2class("JSON::GeneratorError"),
163
- "source sequence is illegal/malformed utf-8");
164
- #else
165
- unicode_escape_to_buffer(buffer, buf, UNI_REPLACEMENT_CHAR);
166
- #endif
167
- } else {
168
- /* normal case */
169
- if (ch >= 0x20 && ch <= 0x7f) {
207
+ const char *ptr = RSTRING_PTR(str);
208
+ unsigned long len = RSTRING_LEN(str);
209
+
210
+ unsigned long beg = 0, pos = 0;
211
+
212
+ #define FLUSH_POS(bytes) if (pos > beg) { fbuffer_append(out_buffer, &ptr[beg], pos - beg); } pos += bytes; beg = pos;
213
+
214
+ while (pos < len) {
215
+ unsigned char ch = ptr[pos];
216
+ unsigned char ch_len = escape_table[ch];
217
+
218
+ if (RB_UNLIKELY(ch_len)) {
219
+ switch (ch_len) {
220
+ case 0:
221
+ pos++;
222
+ break;
223
+ case 1: {
224
+ FLUSH_POS(1);
170
225
  switch (ch) {
171
- case '\\':
172
- fbuffer_append(buffer, "\\\\", 2);
173
- break;
174
- case '"':
175
- fbuffer_append(buffer, "\\\"", 2);
176
- break;
177
- case '/':
178
- if(script_safe) {
179
- fbuffer_append(buffer, "\\/", 2);
180
- break;
181
- }
182
- default:
183
- fbuffer_append_char(buffer, (char)ch);
226
+ case '"': fbuffer_append(out_buffer, "\\\"", 2); break;
227
+ case '\\': fbuffer_append(out_buffer, "\\\\", 2); break;
228
+ case '/': fbuffer_append(out_buffer, "\\/", 2); break;
229
+ case '\b': fbuffer_append(out_buffer, "\\b", 2); break;
230
+ case '\f': fbuffer_append(out_buffer, "\\f", 2); break;
231
+ case '\n': fbuffer_append(out_buffer, "\\n", 2); break;
232
+ case '\r': fbuffer_append(out_buffer, "\\r", 2); break;
233
+ case '\t': fbuffer_append(out_buffer, "\\t", 2); break;
234
+ default: {
235
+ scratch[2] = hexdig[ch >> 12];
236
+ scratch[3] = hexdig[(ch >> 8) & 0xf];
237
+ scratch[4] = hexdig[(ch >> 4) & 0xf];
238
+ scratch[5] = hexdig[ch & 0xf];
239
+ fbuffer_append(out_buffer, scratch, 6);
184
240
  break;
241
+ }
185
242
  }
186
- } else {
187
- switch (ch) {
188
- case '\n':
189
- fbuffer_append(buffer, "\\n", 2);
190
- break;
191
- case '\r':
192
- fbuffer_append(buffer, "\\r", 2);
193
- break;
194
- case '\t':
195
- fbuffer_append(buffer, "\\t", 2);
196
- break;
197
- case '\f':
198
- fbuffer_append(buffer, "\\f", 2);
243
+ break;
244
+ }
245
+ default: {
246
+ uint32_t wchar = 0;
247
+ switch(ch_len) {
248
+ case 2:
249
+ wchar = ptr[pos] & 0x1F;
199
250
  break;
200
- case '\b':
201
- fbuffer_append(buffer, "\\b", 2);
251
+ case 3:
252
+ wchar = ptr[pos] & 0x0F;
202
253
  break;
203
- default:
204
- unicode_escape_to_buffer(buffer, buf, (UTF16) ch);
254
+ case 4:
255
+ wchar = ptr[pos] & 0x07;
205
256
  break;
206
257
  }
207
- }
208
- }
209
- } else if (ch > UNI_MAX_UTF16) {
210
- #if UNI_STRICT_CONVERSION
211
- source -= (extraBytesToRead+1); /* return to the start */
212
- rb_raise(rb_path2class("JSON::GeneratorError"),
213
- "source sequence is illegal/malformed utf8");
214
- #else
215
- unicode_escape_to_buffer(buffer, buf, UNI_REPLACEMENT_CHAR);
216
- #endif
217
- } else {
218
- /* target is a character in range 0xFFFF - 0x10FFFF. */
219
- ch -= halfBase;
220
- unicode_escape_to_buffer(buffer, buf, (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START));
221
- unicode_escape_to_buffer(buffer, buf, (UTF16)((ch & halfMask) + UNI_SUR_LOW_START));
222
- }
223
- }
224
- RB_GC_GUARD(string);
225
- }
226
-
227
- /* Converts string to a JSON string in FBuffer buffer, where only the
228
- * characters required by the JSON standard are JSON escaped. The remaining
229
- * characters (should be UTF8) are just passed through and appended to the
230
- * result. */
231
- static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string, char script_safe)
232
- {
233
- const char *ptr = RSTRING_PTR(string), *p;
234
- unsigned long len = RSTRING_LEN(string), start = 0, end = 0;
235
- const char *escape = NULL;
236
- int escape_len;
237
- unsigned char c;
238
- char buf[6] = { '\\', 'u' };
239
- int ascii_only = rb_enc_str_asciionly_p(string);
240
-
241
- for (start = 0, end = 0; end < len;) {
242
- p = ptr + end;
243
- c = (unsigned char) *p;
244
- if (c < 0x20) {
245
- switch (c) {
246
- case '\n':
247
- escape = "\\n";
248
- escape_len = 2;
249
- break;
250
- case '\r':
251
- escape = "\\r";
252
- escape_len = 2;
253
- break;
254
- case '\t':
255
- escape = "\\t";
256
- escape_len = 2;
257
- break;
258
- case '\f':
259
- escape = "\\f";
260
- escape_len = 2;
261
- break;
262
- case '\b':
263
- escape = "\\b";
264
- escape_len = 2;
265
- break;
266
- default:
267
- unicode_escape(buf, (UTF16) *p);
268
- escape = buf;
269
- escape_len = 6;
270
- break;
271
- }
272
- } else {
273
- switch (c) {
274
- case '\\':
275
- escape = "\\\\";
276
- escape_len = 2;
277
- break;
278
- case '"':
279
- escape = "\\\"";
280
- escape_len = 2;
281
- break;
282
- case '/':
283
- if(script_safe) {
284
- escape = "\\/";
285
- escape_len = 2;
286
- break;
258
+
259
+ for (short i = 1; i < ch_len; i++) {
260
+ wchar = (wchar << 6) | (ptr[pos+i] & 0x3F);
287
261
  }
288
- default:
289
- {
290
- unsigned short clen = 1;
291
- if (!ascii_only) {
292
- clen += trailingBytesForUTF8[c];
293
- if (end + clen > len) {
294
- rb_raise(rb_path2class("JSON::GeneratorError"),
295
- "partial character in source, but hit end");
296
- }
297
-
298
- if (script_safe && c == 0xE2) {
299
- unsigned char c2 = (unsigned char) *(p+1);
300
- unsigned char c3 = (unsigned char) *(p+2);
301
- if (c2 == 0x80 && (c3 == 0xA8 || c3 == 0xA9)) {
302
- fbuffer_append(buffer, ptr + start, end - start);
303
- start = end = (end + clen);
304
- if (c3 == 0xA8) {
305
- fbuffer_append(buffer, "\\u2028", 6);
306
- } else {
307
- fbuffer_append(buffer, "\\u2029", 6);
308
- }
309
- continue;
310
- }
311
- }
312
-
313
- if (!isLegalUTF8((UTF8 *) p, clen)) {
314
- rb_raise(rb_path2class("JSON::GeneratorError"),
315
- "source sequence is illegal/malformed utf-8");
316
- }
317
- }
318
- end += clen;
262
+
263
+ FLUSH_POS(ch_len);
264
+
265
+ if (wchar <= 0xFFFF) {
266
+ scratch[2] = hexdig[wchar >> 12];
267
+ scratch[3] = hexdig[(wchar >> 8) & 0xf];
268
+ scratch[4] = hexdig[(wchar >> 4) & 0xf];
269
+ scratch[5] = hexdig[wchar & 0xf];
270
+ fbuffer_append(out_buffer, scratch, 6);
271
+ } else {
272
+ uint16_t hi, lo;
273
+ wchar -= 0x10000;
274
+ hi = 0xD800 + (uint16_t)(wchar >> 10);
275
+ lo = 0xDC00 + (uint16_t)(wchar & 0x3FF);
276
+
277
+ scratch[2] = hexdig[hi >> 12];
278
+ scratch[3] = hexdig[(hi >> 8) & 0xf];
279
+ scratch[4] = hexdig[(hi >> 4) & 0xf];
280
+ scratch[5] = hexdig[hi & 0xf];
281
+
282
+ scratch[8] = hexdig[lo >> 12];
283
+ scratch[9] = hexdig[(lo >> 8) & 0xf];
284
+ scratch[10] = hexdig[(lo >> 4) & 0xf];
285
+ scratch[11] = hexdig[lo & 0xf];
286
+
287
+ fbuffer_append(out_buffer, scratch, 12);
319
288
  }
320
- continue;
289
+
321
290
  break;
291
+ }
322
292
  }
293
+ } else {
294
+ pos++;
323
295
  }
324
- fbuffer_append(buffer, ptr + start, end - start);
325
- fbuffer_append(buffer, escape, escape_len);
326
- start = ++end;
327
- escape = NULL;
328
296
  }
329
- fbuffer_append(buffer, ptr + start, end - start);
297
+ #undef FLUSH_POS
298
+
299
+ if (beg < len) {
300
+ fbuffer_append(out_buffer, &ptr[beg], len - beg);
301
+ }
302
+
303
+ RB_GC_GUARD(str);
330
304
  }
331
305
 
332
306
  static char *fstrndup(const char *ptr, unsigned long len) {
@@ -610,9 +584,6 @@ static void State_free(void *ptr)
610
584
  if (state->space_before) ruby_xfree(state->space_before);
611
585
  if (state->object_nl) ruby_xfree(state->object_nl);
612
586
  if (state->array_nl) ruby_xfree(state->array_nl);
613
- if (state->array_delim) fbuffer_free(state->array_delim);
614
- if (state->object_delim) fbuffer_free(state->object_delim);
615
- if (state->object_delim2) fbuffer_free(state->object_delim2);
616
587
  ruby_xfree(state);
617
588
  }
618
589
 
@@ -625,9 +596,6 @@ static size_t State_memsize(const void *ptr)
625
596
  if (state->space_before) size += state->space_before_len + 1;
626
597
  if (state->object_nl) size += state->object_nl_len + 1;
627
598
  if (state->array_nl) size += state->array_nl_len + 1;
628
- if (state->array_delim) size += FBUFFER_CAPA(state->array_delim);
629
- if (state->object_delim) size += FBUFFER_CAPA(state->object_delim);
630
- if (state->object_delim2) size += FBUFFER_CAPA(state->object_delim2);
631
599
  return size;
632
600
  }
633
601
 
@@ -636,194 +604,20 @@ static size_t State_memsize(const void *ptr)
636
604
  # define RUBY_TYPED_FROZEN_SHAREABLE 0
637
605
  #endif
638
606
 
639
- #ifdef NEW_TYPEDDATA_WRAPPER
640
607
  static const rb_data_type_t JSON_Generator_State_type = {
641
608
  "JSON/Generator/State",
642
609
  {NULL, State_free, State_memsize,},
643
- #ifdef RUBY_TYPED_FREE_IMMEDIATELY
644
610
  0, 0,
645
- RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_FROZEN_SHAREABLE,
646
- #endif
611
+ RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_FROZEN_SHAREABLE,
647
612
  };
648
- #endif
649
613
 
650
614
  static VALUE cState_s_allocate(VALUE klass)
651
615
  {
652
616
  JSON_Generator_State *state;
653
- return TypedData_Make_Struct(klass, JSON_Generator_State,
654
- &JSON_Generator_State_type, state);
655
- }
656
-
657
- /*
658
- * call-seq: configure(opts)
659
- *
660
- * Configure this State instance with the Hash _opts_, and return
661
- * itself.
662
- */
663
- static VALUE cState_configure(VALUE self, VALUE opts)
664
- {
665
- VALUE tmp;
666
- GET_STATE(self);
667
- tmp = rb_check_convert_type(opts, T_HASH, "Hash", "to_hash");
668
- if (NIL_P(tmp)) tmp = rb_convert_type(opts, T_HASH, "Hash", "to_h");
669
- opts = tmp;
670
- tmp = rb_hash_aref(opts, ID2SYM(i_indent));
671
- if (RTEST(tmp)) {
672
- unsigned long len;
673
- Check_Type(tmp, T_STRING);
674
- len = RSTRING_LEN(tmp);
675
- state->indent = fstrndup(RSTRING_PTR(tmp), len + 1);
676
- state->indent_len = len;
677
- }
678
- tmp = rb_hash_aref(opts, ID2SYM(i_space));
679
- if (RTEST(tmp)) {
680
- unsigned long len;
681
- Check_Type(tmp, T_STRING);
682
- len = RSTRING_LEN(tmp);
683
- state->space = fstrndup(RSTRING_PTR(tmp), len + 1);
684
- state->space_len = len;
685
- }
686
- tmp = rb_hash_aref(opts, ID2SYM(i_space_before));
687
- if (RTEST(tmp)) {
688
- unsigned long len;
689
- Check_Type(tmp, T_STRING);
690
- len = RSTRING_LEN(tmp);
691
- state->space_before = fstrndup(RSTRING_PTR(tmp), len + 1);
692
- state->space_before_len = len;
693
- }
694
- tmp = rb_hash_aref(opts, ID2SYM(i_array_nl));
695
- if (RTEST(tmp)) {
696
- unsigned long len;
697
- Check_Type(tmp, T_STRING);
698
- len = RSTRING_LEN(tmp);
699
- state->array_nl = fstrndup(RSTRING_PTR(tmp), len + 1);
700
- state->array_nl_len = len;
701
- }
702
- tmp = rb_hash_aref(opts, ID2SYM(i_object_nl));
703
- if (RTEST(tmp)) {
704
- unsigned long len;
705
- Check_Type(tmp, T_STRING);
706
- len = RSTRING_LEN(tmp);
707
- state->object_nl = fstrndup(RSTRING_PTR(tmp), len + 1);
708
- state->object_nl_len = len;
709
- }
710
- tmp = ID2SYM(i_max_nesting);
617
+ VALUE obj = TypedData_Make_Struct(klass, JSON_Generator_State, &JSON_Generator_State_type, state);
711
618
  state->max_nesting = 100;
712
- if (option_given_p(opts, tmp)) {
713
- VALUE max_nesting = rb_hash_aref(opts, tmp);
714
- if (RTEST(max_nesting)) {
715
- Check_Type(max_nesting, T_FIXNUM);
716
- state->max_nesting = FIX2LONG(max_nesting);
717
- } else {
718
- state->max_nesting = 0;
719
- }
720
- }
721
- tmp = ID2SYM(i_depth);
722
- state->depth = 0;
723
- if (option_given_p(opts, tmp)) {
724
- VALUE depth = rb_hash_aref(opts, tmp);
725
- if (RTEST(depth)) {
726
- Check_Type(depth, T_FIXNUM);
727
- state->depth = FIX2LONG(depth);
728
- } else {
729
- state->depth = 0;
730
- }
731
- }
732
- tmp = ID2SYM(i_buffer_initial_length);
733
- if (option_given_p(opts, tmp)) {
734
- VALUE buffer_initial_length = rb_hash_aref(opts, tmp);
735
- if (RTEST(buffer_initial_length)) {
736
- long initial_length;
737
- Check_Type(buffer_initial_length, T_FIXNUM);
738
- initial_length = FIX2LONG(buffer_initial_length);
739
- if (initial_length > 0) state->buffer_initial_length = initial_length;
740
- }
741
- }
742
- tmp = rb_hash_aref(opts, ID2SYM(i_allow_nan));
743
- state->allow_nan = RTEST(tmp);
744
- tmp = rb_hash_aref(opts, ID2SYM(i_ascii_only));
745
- state->ascii_only = RTEST(tmp);
746
- tmp = rb_hash_aref(opts, ID2SYM(i_script_safe));
747
- state->script_safe = RTEST(tmp);
748
- if (!state->script_safe) {
749
- tmp = rb_hash_aref(opts, ID2SYM(i_escape_slash));
750
- state->script_safe = RTEST(tmp);
751
- }
752
- tmp = rb_hash_aref(opts, ID2SYM(i_strict));
753
- state->strict = RTEST(tmp);
754
- return self;
755
- }
756
-
757
- static void set_state_ivars(VALUE hash, VALUE state)
758
- {
759
- VALUE ivars = rb_obj_instance_variables(state);
760
- int i = 0;
761
- for (i = 0; i < RARRAY_LEN(ivars); i++) {
762
- VALUE key = rb_funcall(rb_ary_entry(ivars, i), i_to_s, 0);
763
- long key_len = RSTRING_LEN(key);
764
- VALUE value = rb_iv_get(state, StringValueCStr(key));
765
- rb_hash_aset(hash, rb_str_intern(rb_str_substr(key, 1, key_len - 1)), value);
766
- }
767
- }
768
-
769
- /*
770
- * call-seq: to_h
771
- *
772
- * Returns the configuration instance variables as a hash, that can be
773
- * passed to the configure method.
774
- */
775
- static VALUE cState_to_h(VALUE self)
776
- {
777
- VALUE result = rb_hash_new();
778
- GET_STATE(self);
779
- set_state_ivars(result, self);
780
- rb_hash_aset(result, ID2SYM(i_indent), rb_str_new(state->indent, state->indent_len));
781
- rb_hash_aset(result, ID2SYM(i_space), rb_str_new(state->space, state->space_len));
782
- rb_hash_aset(result, ID2SYM(i_space_before), rb_str_new(state->space_before, state->space_before_len));
783
- rb_hash_aset(result, ID2SYM(i_object_nl), rb_str_new(state->object_nl, state->object_nl_len));
784
- rb_hash_aset(result, ID2SYM(i_array_nl), rb_str_new(state->array_nl, state->array_nl_len));
785
- rb_hash_aset(result, ID2SYM(i_allow_nan), state->allow_nan ? Qtrue : Qfalse);
786
- rb_hash_aset(result, ID2SYM(i_ascii_only), state->ascii_only ? Qtrue : Qfalse);
787
- rb_hash_aset(result, ID2SYM(i_max_nesting), LONG2FIX(state->max_nesting));
788
- rb_hash_aset(result, ID2SYM(i_script_safe), state->script_safe ? Qtrue : Qfalse);
789
- rb_hash_aset(result, ID2SYM(i_strict), state->strict ? Qtrue : Qfalse);
790
- rb_hash_aset(result, ID2SYM(i_depth), LONG2FIX(state->depth));
791
- rb_hash_aset(result, ID2SYM(i_buffer_initial_length), LONG2FIX(state->buffer_initial_length));
792
- return result;
793
- }
794
-
795
- /*
796
- * call-seq: [](name)
797
- *
798
- * Returns the value returned by method +name+.
799
- */
800
- static VALUE cState_aref(VALUE self, VALUE name)
801
- {
802
- name = rb_funcall(name, i_to_s, 0);
803
- if (RTEST(rb_funcall(self, i_respond_to_p, 1, name))) {
804
- return rb_funcall(self, i_send, 1, name);
805
- } else {
806
- return rb_attr_get(self, rb_intern_str(rb_str_concat(rb_str_new2("@"), name)));
807
- }
808
- }
809
-
810
- /*
811
- * call-seq: []=(name, value)
812
- *
813
- * Sets the attribute name to value.
814
- */
815
- static VALUE cState_aset(VALUE self, VALUE name, VALUE value)
816
- {
817
- VALUE name_writer;
818
-
819
- name = rb_funcall(name, i_to_s, 0);
820
- name_writer = rb_str_cat2(rb_str_dup(name), "=");
821
- if (RTEST(rb_funcall(self, i_respond_to_p, 1, name_writer))) {
822
- return rb_funcall(self, i_send, 2, name_writer, value);
823
- } else {
824
- rb_ivar_set(self, rb_intern_str(rb_str_concat(rb_str_new2("@"), name)), value);
825
- }
826
- return Qnil;
619
+ state->buffer_initial_length = FBUFFER_INITIAL_LENGTH_DEFAULT;
620
+ return obj;
827
621
  }
828
622
 
829
623
  struct hash_foreach_arg {
@@ -841,39 +635,36 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
841
635
  JSON_Generator_State *state = arg->state;
842
636
  VALUE Vstate = arg->Vstate;
843
637
 
844
- char *object_nl = state->object_nl;
845
- long object_nl_len = state->object_nl_len;
846
- char *indent = state->indent;
847
- long indent_len = state->indent_len;
848
- char *delim = FBUFFER_PTR(state->object_delim);
849
- long delim_len = FBUFFER_LEN(state->object_delim);
850
- char *delim2 = FBUFFER_PTR(state->object_delim2);
851
- long delim2_len = FBUFFER_LEN(state->object_delim2);
852
638
  long depth = state->depth;
853
639
  int j;
854
- VALUE klass, key_to_s;
855
640
 
856
- if (arg->iter > 0) fbuffer_append(buffer, delim, delim_len);
857
- if (object_nl) {
858
- fbuffer_append(buffer, object_nl, object_nl_len);
641
+ if (arg->iter > 0) fbuffer_append_char(buffer, ',');
642
+ if (RB_UNLIKELY(state->object_nl)) {
643
+ fbuffer_append(buffer, state->object_nl, state->object_nl_len);
859
644
  }
860
- if (indent) {
645
+ if (RB_UNLIKELY(state->indent)) {
861
646
  for (j = 0; j < depth; j++) {
862
- fbuffer_append(buffer, indent, indent_len);
647
+ fbuffer_append(buffer, state->indent, state->indent_len);
863
648
  }
864
649
  }
865
650
 
866
- klass = CLASS_OF(key);
867
- if (klass == rb_cString) {
868
- key_to_s = key;
869
- } else if (klass == rb_cSymbol) {
870
- key_to_s = rb_id2str(SYM2ID(key));
871
- } else {
872
- key_to_s = rb_funcall(key, i_to_s, 0);
651
+ VALUE key_to_s;
652
+ switch(rb_type(key)) {
653
+ case T_STRING:
654
+ key_to_s = key;
655
+ break;
656
+ case T_SYMBOL:
657
+ key_to_s = rb_sym2str(key);
658
+ break;
659
+ default:
660
+ key_to_s = rb_convert_type(key, T_STRING, "String", "to_s");
661
+ break;
873
662
  }
874
- Check_Type(key_to_s, T_STRING);
875
- generate_json(buffer, Vstate, state, key_to_s);
876
- fbuffer_append(buffer, delim2, delim2_len);
663
+
664
+ generate_json_string(buffer, Vstate, state, key_to_s);
665
+ if (RB_UNLIKELY(state->space_before)) fbuffer_append(buffer, state->space_before, state->space_before_len);
666
+ fbuffer_append_char(buffer, ':');
667
+ if (RB_UNLIKELY(state->space)) fbuffer_append(buffer, state->space, state->space_len);
877
668
  generate_json(buffer, Vstate, state, val);
878
669
 
879
670
  arg->iter++;
@@ -882,17 +673,12 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
882
673
 
883
674
  static void generate_json_object(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
884
675
  {
885
- char *object_nl = state->object_nl;
886
- long object_nl_len = state->object_nl_len;
887
- char *indent = state->indent;
888
- long indent_len = state->indent_len;
889
676
  long max_nesting = state->max_nesting;
890
677
  long depth = ++state->depth;
891
678
  int j;
892
679
  struct hash_foreach_arg arg;
893
680
 
894
681
  if (max_nesting != 0 && depth > max_nesting) {
895
- fbuffer_free(buffer);
896
682
  rb_raise(eNestingError, "nesting of %ld is too deep", --state->depth);
897
683
  }
898
684
  fbuffer_append_char(buffer, '{');
@@ -904,11 +690,11 @@ static void generate_json_object(FBuffer *buffer, VALUE Vstate, JSON_Generator_S
904
690
  rb_hash_foreach(obj, json_object_i, (VALUE)&arg);
905
691
 
906
692
  depth = --state->depth;
907
- if (object_nl) {
908
- fbuffer_append(buffer, object_nl, object_nl_len);
909
- if (indent) {
693
+ if (RB_UNLIKELY(state->object_nl)) {
694
+ fbuffer_append(buffer, state->object_nl, state->object_nl_len);
695
+ if (RB_UNLIKELY(state->indent)) {
910
696
  for (j = 0; j < depth; j++) {
911
- fbuffer_append(buffer, indent, indent_len);
697
+ fbuffer_append(buffer, state->indent, state->indent_len);
912
698
  }
913
699
  }
914
700
  }
@@ -917,63 +703,90 @@ static void generate_json_object(FBuffer *buffer, VALUE Vstate, JSON_Generator_S
917
703
 
918
704
  static void generate_json_array(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
919
705
  {
920
- char *array_nl = state->array_nl;
921
- long array_nl_len = state->array_nl_len;
922
- char *indent = state->indent;
923
- long indent_len = state->indent_len;
924
706
  long max_nesting = state->max_nesting;
925
- char *delim = FBUFFER_PTR(state->array_delim);
926
- long delim_len = FBUFFER_LEN(state->array_delim);
927
707
  long depth = ++state->depth;
928
708
  int i, j;
929
709
  if (max_nesting != 0 && depth > max_nesting) {
930
- fbuffer_free(buffer);
931
710
  rb_raise(eNestingError, "nesting of %ld is too deep", --state->depth);
932
711
  }
933
712
  fbuffer_append_char(buffer, '[');
934
- if (array_nl) fbuffer_append(buffer, array_nl, array_nl_len);
713
+ if (RB_UNLIKELY(state->array_nl)) fbuffer_append(buffer, state->array_nl, state->array_nl_len);
935
714
  for(i = 0; i < RARRAY_LEN(obj); i++) {
936
- if (i > 0) fbuffer_append(buffer, delim, delim_len);
937
- if (indent) {
715
+ if (i > 0) {
716
+ fbuffer_append_char(buffer, ',');
717
+ if (RB_UNLIKELY(state->array_nl)) fbuffer_append(buffer, state->array_nl, state->array_nl_len);
718
+ }
719
+ if (RB_UNLIKELY(state->indent)) {
938
720
  for (j = 0; j < depth; j++) {
939
- fbuffer_append(buffer, indent, indent_len);
721
+ fbuffer_append(buffer, state->indent, state->indent_len);
940
722
  }
941
723
  }
942
- generate_json(buffer, Vstate, state, rb_ary_entry(obj, i));
724
+ generate_json(buffer, Vstate, state, RARRAY_AREF(obj, i));
943
725
  }
944
726
  state->depth = --depth;
945
- if (array_nl) {
946
- fbuffer_append(buffer, array_nl, array_nl_len);
947
- if (indent) {
727
+ if (RB_UNLIKELY(state->array_nl)) {
728
+ fbuffer_append(buffer, state->array_nl, state->array_nl_len);
729
+ if (RB_UNLIKELY(state->indent)) {
948
730
  for (j = 0; j < depth; j++) {
949
- fbuffer_append(buffer, indent, indent_len);
731
+ fbuffer_append(buffer, state->indent, state->indent_len);
950
732
  }
951
733
  }
952
734
  }
953
735
  fbuffer_append_char(buffer, ']');
954
736
  }
955
737
 
956
- #ifdef HAVE_RUBY_ENCODING_H
957
- static int enc_utf8_compatible_p(rb_encoding *enc)
738
+ static int usascii_encindex, utf8_encindex, binary_encindex;
739
+
740
+ static inline int enc_utf8_compatible_p(int enc_idx)
958
741
  {
959
- if (enc == rb_usascii_encoding()) return 1;
960
- if (enc == rb_utf8_encoding()) return 1;
742
+ if (enc_idx == usascii_encindex) return 1;
743
+ if (enc_idx == utf8_encindex) return 1;
961
744
  return 0;
962
745
  }
963
- #endif
746
+
747
+ static inline VALUE ensure_valid_encoding(VALUE str)
748
+ {
749
+ int encindex = RB_ENCODING_GET(str);
750
+ VALUE utf8_string;
751
+ if (RB_UNLIKELY(!enc_utf8_compatible_p(encindex))) {
752
+ if (encindex == binary_encindex) {
753
+ // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work.
754
+ // TODO: Deprecate in 2.8.0
755
+ // TODO: Remove in 3.0.0
756
+ utf8_string = rb_enc_associate_index(rb_str_dup(str), utf8_encindex);
757
+ switch (rb_enc_str_coderange(utf8_string)) {
758
+ case ENC_CODERANGE_7BIT:
759
+ case ENC_CODERANGE_VALID:
760
+ return utf8_string;
761
+ break;
762
+ }
763
+ }
764
+
765
+ str = rb_funcall(str, i_encode, 1, Encoding_UTF_8);
766
+ }
767
+ return str;
768
+ }
964
769
 
965
770
  static void generate_json_string(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
966
771
  {
772
+ obj = ensure_valid_encoding(obj);
773
+
967
774
  fbuffer_append_char(buffer, '"');
968
- #ifdef HAVE_RUBY_ENCODING_H
969
- if (!enc_utf8_compatible_p(rb_enc_get(obj))) {
970
- obj = rb_str_export_to_enc(obj, rb_utf8_encoding());
971
- }
972
- #endif
973
- if (state->ascii_only) {
974
- convert_UTF8_to_JSON_ASCII(buffer, obj, state->script_safe);
975
- } else {
976
- convert_UTF8_to_JSON(buffer, obj, state->script_safe);
775
+
776
+ switch(rb_enc_str_coderange(obj)) {
777
+ case ENC_CODERANGE_7BIT:
778
+ convert_ASCII_to_JSON(buffer, obj, state->script_safe ? script_safe_escape_table : escape_table);
779
+ break;
780
+ case ENC_CODERANGE_VALID:
781
+ if (RB_UNLIKELY(state->ascii_only)) {
782
+ convert_UTF8_to_ASCII_only_JSON(buffer, obj, state->script_safe ? script_safe_escape_table : escape_table, state->script_safe);
783
+ } else {
784
+ convert_UTF8_to_JSON(buffer, obj, state->script_safe ? script_safe_escape_table : escape_table, state->script_safe);
785
+ }
786
+ break;
787
+ default:
788
+ rb_raise(rb_path2class("JSON::GeneratorError"), "source sequence is illegal/malformed utf-8");
789
+ break;
977
790
  }
978
791
  fbuffer_append_char(buffer, '"');
979
792
  }
@@ -1020,11 +833,9 @@ static void generate_json_float(FBuffer *buffer, VALUE Vstate, JSON_Generator_St
1020
833
  VALUE tmp = rb_funcall(obj, i_to_s, 0);
1021
834
  if (!allow_nan) {
1022
835
  if (isinf(value)) {
1023
- fbuffer_free(buffer);
1024
- rb_raise(eGeneratorError, "%"PRIsVALUE" not allowed in JSON", RB_OBJ_STRING(tmp));
836
+ rb_raise(eGeneratorError, "%"PRIsVALUE" not allowed in JSON", tmp);
1025
837
  } else if (isnan(value)) {
1026
- fbuffer_free(buffer);
1027
- rb_raise(eGeneratorError, "%"PRIsVALUE" not allowed in JSON", RB_OBJ_STRING(tmp));
838
+ rb_raise(eGeneratorError, "%"PRIsVALUE" not allowed in JSON", tmp);
1028
839
  }
1029
840
  }
1030
841
  fbuffer_append_str(buffer, tmp);
@@ -1033,35 +844,56 @@ static void generate_json_float(FBuffer *buffer, VALUE Vstate, JSON_Generator_St
1033
844
  static void generate_json(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
1034
845
  {
1035
846
  VALUE tmp;
1036
- VALUE klass = CLASS_OF(obj);
1037
- if (klass == rb_cHash) {
1038
- generate_json_object(buffer, Vstate, state, obj);
1039
- } else if (klass == rb_cArray) {
1040
- generate_json_array(buffer, Vstate, state, obj);
1041
- } else if (klass == rb_cString) {
1042
- generate_json_string(buffer, Vstate, state, obj);
1043
- } else if (obj == Qnil) {
847
+ if (obj == Qnil) {
1044
848
  generate_json_null(buffer, Vstate, state, obj);
1045
849
  } else if (obj == Qfalse) {
1046
850
  generate_json_false(buffer, Vstate, state, obj);
1047
851
  } else if (obj == Qtrue) {
1048
852
  generate_json_true(buffer, Vstate, state, obj);
1049
- } else if (FIXNUM_P(obj)) {
1050
- generate_json_fixnum(buffer, Vstate, state, obj);
1051
- } else if (RB_TYPE_P(obj, T_BIGNUM)) {
1052
- generate_json_bignum(buffer, Vstate, state, obj);
1053
- } else if (klass == rb_cFloat) {
1054
- generate_json_float(buffer, Vstate, state, obj);
1055
- } else if (state->strict) {
1056
- rb_raise(eGeneratorError, "%"PRIsVALUE" not allowed in JSON", RB_OBJ_STRING(CLASS_OF(obj)));
1057
- } else if (rb_respond_to(obj, i_to_json)) {
1058
- tmp = rb_funcall(obj, i_to_json, 1, Vstate);
1059
- Check_Type(tmp, T_STRING);
1060
- fbuffer_append_str(buffer, tmp);
853
+ } else if (RB_SPECIAL_CONST_P(obj)) {
854
+ if (RB_FIXNUM_P(obj)) {
855
+ generate_json_fixnum(buffer, Vstate, state, obj);
856
+ } else if (RB_FLONUM_P(obj)) {
857
+ generate_json_float(buffer, Vstate, state, obj);
858
+ } else {
859
+ goto general;
860
+ }
1061
861
  } else {
1062
- tmp = rb_funcall(obj, i_to_s, 0);
1063
- Check_Type(tmp, T_STRING);
1064
- generate_json_string(buffer, Vstate, state, tmp);
862
+ VALUE klass = RBASIC_CLASS(obj);
863
+ switch (RB_BUILTIN_TYPE(obj)) {
864
+ case T_BIGNUM:
865
+ generate_json_bignum(buffer, Vstate, state, obj);
866
+ break;
867
+ case T_HASH:
868
+ if (klass != rb_cHash) goto general;
869
+ generate_json_object(buffer, Vstate, state, obj);
870
+ break;
871
+ case T_ARRAY:
872
+ if (klass != rb_cArray) goto general;
873
+ generate_json_array(buffer, Vstate, state, obj);
874
+ break;
875
+ case T_STRING:
876
+ if (klass != rb_cString) goto general;
877
+ generate_json_string(buffer, Vstate, state, obj);
878
+ break;
879
+ case T_FLOAT:
880
+ if (klass != rb_cFloat) goto general;
881
+ generate_json_float(buffer, Vstate, state, obj);
882
+ break;
883
+ default:
884
+ general:
885
+ if (state->strict) {
886
+ rb_raise(eGeneratorError, "%"PRIsVALUE" not allowed in JSON", CLASS_OF(obj));
887
+ } else if (rb_respond_to(obj, i_to_json)) {
888
+ tmp = rb_funcall(obj, i_to_json, 1, Vstate);
889
+ Check_Type(tmp, T_STRING);
890
+ fbuffer_append_str(buffer, tmp);
891
+ } else {
892
+ tmp = rb_funcall(obj, i_to_s, 0);
893
+ Check_Type(tmp, T_STRING);
894
+ generate_json_string(buffer, Vstate, state, tmp);
895
+ }
896
+ }
1065
897
  }
1066
898
  }
1067
899
 
@@ -1071,36 +903,48 @@ static FBuffer *cState_prepare_buffer(VALUE self)
1071
903
  GET_STATE(self);
1072
904
  buffer = fbuffer_alloc(state->buffer_initial_length);
1073
905
 
1074
- if (state->object_delim) {
1075
- fbuffer_clear(state->object_delim);
1076
- } else {
1077
- state->object_delim = fbuffer_alloc(16);
1078
- }
1079
- fbuffer_append_char(state->object_delim, ',');
1080
- if (state->object_delim2) {
1081
- fbuffer_clear(state->object_delim2);
1082
- } else {
1083
- state->object_delim2 = fbuffer_alloc(16);
1084
- }
1085
- if (state->space_before) fbuffer_append(state->object_delim2, state->space_before, state->space_before_len);
1086
- fbuffer_append_char(state->object_delim2, ':');
1087
- if (state->space) fbuffer_append(state->object_delim2, state->space, state->space_len);
1088
-
1089
- if (state->array_delim) {
1090
- fbuffer_clear(state->array_delim);
1091
- } else {
1092
- state->array_delim = fbuffer_alloc(16);
1093
- }
1094
- fbuffer_append_char(state->array_delim, ',');
1095
- if (state->array_nl) fbuffer_append(state->array_delim, state->array_nl, state->array_nl_len);
1096
906
  return buffer;
1097
907
  }
1098
908
 
909
+ struct generate_json_data {
910
+ FBuffer *buffer;
911
+ VALUE vstate;
912
+ JSON_Generator_State *state;
913
+ VALUE obj;
914
+ };
915
+
916
+ static VALUE generate_json_try(VALUE d)
917
+ {
918
+ struct generate_json_data *data = (struct generate_json_data *)d;
919
+
920
+ generate_json(data->buffer, data->vstate, data->state, data->obj);
921
+
922
+ return Qnil;
923
+ }
924
+
925
+ static VALUE generate_json_rescue(VALUE d, VALUE exc)
926
+ {
927
+ struct generate_json_data *data = (struct generate_json_data *)d;
928
+ fbuffer_free(data->buffer);
929
+
930
+ rb_exc_raise(exc);
931
+
932
+ return Qundef;
933
+ }
934
+
1099
935
  static VALUE cState_partial_generate(VALUE self, VALUE obj)
1100
936
  {
1101
937
  FBuffer *buffer = cState_prepare_buffer(self);
1102
938
  GET_STATE(self);
1103
- generate_json(buffer, self, state, obj);
939
+
940
+ struct generate_json_data data = {
941
+ .buffer = buffer,
942
+ .vstate = self,
943
+ .state = state,
944
+ .obj = obj
945
+ };
946
+ rb_rescue(generate_json_try, (VALUE)&data, generate_json_rescue, (VALUE)&data);
947
+
1104
948
  return fbuffer_to_s(buffer);
1105
949
  }
1106
950
 
@@ -1119,37 +963,6 @@ static VALUE cState_generate(VALUE self, VALUE obj)
1119
963
  return result;
1120
964
  }
1121
965
 
1122
- /*
1123
- * call-seq: new(opts = {})
1124
- *
1125
- * Instantiates a new State object, configured by _opts_.
1126
- *
1127
- * _opts_ can have the following keys:
1128
- *
1129
- * * *indent*: a string used to indent levels (default: ''),
1130
- * * *space*: a string that is put after, a : or , delimiter (default: ''),
1131
- * * *space_before*: a string that is put before a : pair delimiter (default: ''),
1132
- * * *object_nl*: a string that is put at the end of a JSON object (default: ''),
1133
- * * *array_nl*: a string that is put at the end of a JSON array (default: ''),
1134
- * * *allow_nan*: true if NaN, Infinity, and -Infinity should be
1135
- * generated, otherwise an exception is thrown, if these values are
1136
- * encountered. This options defaults to false.
1137
- * * *ascii_only*: true if only ASCII characters should be generated. This
1138
- * option defaults to false.
1139
- * * *buffer_initial_length*: sets the initial length of the generator's
1140
- * internal buffer.
1141
- */
1142
- static VALUE cState_initialize(int argc, VALUE *argv, VALUE self)
1143
- {
1144
- VALUE opts;
1145
- GET_STATE(self);
1146
- state->max_nesting = 100;
1147
- state->buffer_initial_length = FBUFFER_INITIAL_LENGTH_DEFAULT;
1148
- rb_scan_args(argc, argv, "01", &opts);
1149
- if (!NIL_P(opts)) cState_configure(self, opts);
1150
- return self;
1151
- }
1152
-
1153
966
  /*
1154
967
  * call-seq: initialize_copy(orig)
1155
968
  *
@@ -1171,9 +984,6 @@ static VALUE cState_init_copy(VALUE obj, VALUE orig)
1171
984
  objState->space_before = fstrndup(origState->space_before, origState->space_before_len);
1172
985
  objState->object_nl = fstrndup(origState->object_nl, origState->object_nl_len);
1173
986
  objState->array_nl = fstrndup(origState->array_nl, origState->array_nl_len);
1174
- if (origState->array_delim) objState->array_delim = fbuffer_dup(origState->array_delim);
1175
- if (origState->object_delim) objState->object_delim = fbuffer_dup(origState->object_delim);
1176
- if (origState->object_delim2) objState->object_delim2 = fbuffer_dup(origState->object_delim2);
1177
987
  return obj;
1178
988
  }
1179
989
 
@@ -1412,7 +1222,8 @@ static VALUE cState_max_nesting_set(VALUE self, VALUE depth)
1412
1222
  {
1413
1223
  GET_STATE(self);
1414
1224
  Check_Type(depth, T_FIXNUM);
1415
- return state->max_nesting = FIX2LONG(depth);
1225
+ state->max_nesting = FIX2LONG(depth);
1226
+ return Qnil;
1416
1227
  }
1417
1228
 
1418
1229
  /*
@@ -1483,6 +1294,18 @@ static VALUE cState_allow_nan_p(VALUE self)
1483
1294
  return state->allow_nan ? Qtrue : Qfalse;
1484
1295
  }
1485
1296
 
1297
+ /*
1298
+ * call-seq: allow_nan=(enable)
1299
+ *
1300
+ * This sets whether or not to serialize NaN, Infinity, and -Infinity
1301
+ */
1302
+ static VALUE cState_allow_nan_set(VALUE self, VALUE enable)
1303
+ {
1304
+ GET_STATE(self);
1305
+ state->allow_nan = RTEST(enable);
1306
+ return Qnil;
1307
+ }
1308
+
1486
1309
  /*
1487
1310
  * call-seq: ascii_only?
1488
1311
  *
@@ -1495,6 +1318,18 @@ static VALUE cState_ascii_only_p(VALUE self)
1495
1318
  return state->ascii_only ? Qtrue : Qfalse;
1496
1319
  }
1497
1320
 
1321
+ /*
1322
+ * call-seq: ascii_only=(enable)
1323
+ *
1324
+ * This sets whether only ASCII characters should be generated.
1325
+ */
1326
+ static VALUE cState_ascii_only_set(VALUE self, VALUE enable)
1327
+ {
1328
+ GET_STATE(self);
1329
+ state->ascii_only = RTEST(enable);
1330
+ return Qnil;
1331
+ }
1332
+
1498
1333
  /*
1499
1334
  * call-seq: depth
1500
1335
  *
@@ -1562,8 +1397,8 @@ void Init_generator(void)
1562
1397
  rb_require("json/common");
1563
1398
 
1564
1399
  mJSON = rb_define_module("JSON");
1565
- mExt = rb_define_module_under(mJSON, "Ext");
1566
- mGenerator = rb_define_module_under(mExt, "Generator");
1400
+ VALUE mExt = rb_define_module_under(mJSON, "Ext");
1401
+ VALUE mGenerator = rb_define_module_under(mExt, "Generator");
1567
1402
 
1568
1403
  eGeneratorError = rb_path2class("JSON::GeneratorError");
1569
1404
  eNestingError = rb_path2class("JSON::NestingError");
@@ -1573,7 +1408,6 @@ void Init_generator(void)
1573
1408
  cState = rb_define_class_under(mGenerator, "State", rb_cObject);
1574
1409
  rb_define_alloc_func(cState, cState_s_allocate);
1575
1410
  rb_define_singleton_method(cState, "from_state", cState_from_state_s, 1);
1576
- rb_define_method(cState, "initialize", cState_initialize, -1);
1577
1411
  rb_define_method(cState, "initialize_copy", cState_init_copy, 1);
1578
1412
  rb_define_method(cState, "indent", cState_indent, 0);
1579
1413
  rb_define_method(cState, "indent=", cState_indent_set, 1);
@@ -1598,76 +1432,70 @@ void Init_generator(void)
1598
1432
  rb_define_method(cState, "strict=", cState_strict_set, 1);
1599
1433
  rb_define_method(cState, "check_circular?", cState_check_circular_p, 0);
1600
1434
  rb_define_method(cState, "allow_nan?", cState_allow_nan_p, 0);
1435
+ rb_define_method(cState, "allow_nan=", cState_allow_nan_set, 1);
1601
1436
  rb_define_method(cState, "ascii_only?", cState_ascii_only_p, 0);
1437
+ rb_define_method(cState, "ascii_only=", cState_ascii_only_set, 1);
1602
1438
  rb_define_method(cState, "depth", cState_depth, 0);
1603
1439
  rb_define_method(cState, "depth=", cState_depth_set, 1);
1604
1440
  rb_define_method(cState, "buffer_initial_length", cState_buffer_initial_length, 0);
1605
1441
  rb_define_method(cState, "buffer_initial_length=", cState_buffer_initial_length_set, 1);
1606
- rb_define_method(cState, "configure", cState_configure, 1);
1607
- rb_define_alias(cState, "merge", "configure");
1608
- rb_define_method(cState, "to_h", cState_to_h, 0);
1609
- rb_define_alias(cState, "to_hash", "to_h");
1610
- rb_define_method(cState, "[]", cState_aref, 1);
1611
- rb_define_method(cState, "[]=", cState_aset, 2);
1612
1442
  rb_define_method(cState, "generate", cState_generate, 1);
1613
1443
 
1614
- mGeneratorMethods = rb_define_module_under(mGenerator, "GeneratorMethods");
1615
- mObject = rb_define_module_under(mGeneratorMethods, "Object");
1444
+ VALUE mGeneratorMethods = rb_define_module_under(mGenerator, "GeneratorMethods");
1445
+
1446
+ VALUE mObject = rb_define_module_under(mGeneratorMethods, "Object");
1616
1447
  rb_define_method(mObject, "to_json", mObject_to_json, -1);
1617
- mHash = rb_define_module_under(mGeneratorMethods, "Hash");
1448
+
1449
+ VALUE mHash = rb_define_module_under(mGeneratorMethods, "Hash");
1618
1450
  rb_define_method(mHash, "to_json", mHash_to_json, -1);
1619
- mArray = rb_define_module_under(mGeneratorMethods, "Array");
1451
+
1452
+ VALUE mArray = rb_define_module_under(mGeneratorMethods, "Array");
1620
1453
  rb_define_method(mArray, "to_json", mArray_to_json, -1);
1454
+
1621
1455
  #ifdef RUBY_INTEGER_UNIFICATION
1622
- mInteger = rb_define_module_under(mGeneratorMethods, "Integer");
1456
+ VALUE mInteger = rb_define_module_under(mGeneratorMethods, "Integer");
1623
1457
  rb_define_method(mInteger, "to_json", mInteger_to_json, -1);
1624
1458
  #else
1625
- mFixnum = rb_define_module_under(mGeneratorMethods, "Fixnum");
1459
+ VALUE mFixnum = rb_define_module_under(mGeneratorMethods, "Fixnum");
1626
1460
  rb_define_method(mFixnum, "to_json", mFixnum_to_json, -1);
1627
- mBignum = rb_define_module_under(mGeneratorMethods, "Bignum");
1461
+
1462
+ VALUE mBignum = rb_define_module_under(mGeneratorMethods, "Bignum");
1628
1463
  rb_define_method(mBignum, "to_json", mBignum_to_json, -1);
1629
1464
  #endif
1630
- mFloat = rb_define_module_under(mGeneratorMethods, "Float");
1465
+ VALUE mFloat = rb_define_module_under(mGeneratorMethods, "Float");
1631
1466
  rb_define_method(mFloat, "to_json", mFloat_to_json, -1);
1632
- mString = rb_define_module_under(mGeneratorMethods, "String");
1467
+
1468
+ VALUE mString = rb_define_module_under(mGeneratorMethods, "String");
1633
1469
  rb_define_singleton_method(mString, "included", mString_included_s, 1);
1634
1470
  rb_define_method(mString, "to_json", mString_to_json, -1);
1635
1471
  rb_define_method(mString, "to_json_raw", mString_to_json_raw, -1);
1636
1472
  rb_define_method(mString, "to_json_raw_object", mString_to_json_raw_object, 0);
1473
+
1637
1474
  mString_Extend = rb_define_module_under(mString, "Extend");
1638
1475
  rb_define_method(mString_Extend, "json_create", mString_Extend_json_create, 1);
1639
- mTrueClass = rb_define_module_under(mGeneratorMethods, "TrueClass");
1476
+
1477
+ VALUE mTrueClass = rb_define_module_under(mGeneratorMethods, "TrueClass");
1640
1478
  rb_define_method(mTrueClass, "to_json", mTrueClass_to_json, -1);
1641
- mFalseClass = rb_define_module_under(mGeneratorMethods, "FalseClass");
1479
+
1480
+ VALUE mFalseClass = rb_define_module_under(mGeneratorMethods, "FalseClass");
1642
1481
  rb_define_method(mFalseClass, "to_json", mFalseClass_to_json, -1);
1643
- mNilClass = rb_define_module_under(mGeneratorMethods, "NilClass");
1482
+
1483
+ VALUE mNilClass = rb_define_module_under(mGeneratorMethods, "NilClass");
1644
1484
  rb_define_method(mNilClass, "to_json", mNilClass_to_json, -1);
1645
1485
 
1486
+ rb_global_variable(&Encoding_UTF_8);
1487
+ Encoding_UTF_8 = rb_const_get(rb_path2class("Encoding"), rb_intern("UTF_8"));
1488
+
1646
1489
  i_to_s = rb_intern("to_s");
1647
1490
  i_to_json = rb_intern("to_json");
1648
1491
  i_new = rb_intern("new");
1649
- i_indent = rb_intern("indent");
1650
- i_space = rb_intern("space");
1651
- i_space_before = rb_intern("space_before");
1652
- i_object_nl = rb_intern("object_nl");
1653
- i_array_nl = rb_intern("array_nl");
1654
- i_max_nesting = rb_intern("max_nesting");
1655
- i_script_safe = rb_intern("script_safe");
1656
- i_escape_slash = rb_intern("escape_slash");
1657
- i_strict = rb_intern("strict");
1658
- i_allow_nan = rb_intern("allow_nan");
1659
- i_ascii_only = rb_intern("ascii_only");
1660
- i_depth = rb_intern("depth");
1661
- i_buffer_initial_length = rb_intern("buffer_initial_length");
1662
1492
  i_pack = rb_intern("pack");
1663
1493
  i_unpack = rb_intern("unpack");
1664
1494
  i_create_id = rb_intern("create_id");
1665
1495
  i_extend = rb_intern("extend");
1666
- i_key_p = rb_intern("key?");
1667
- i_aref = rb_intern("[]");
1668
- i_send = rb_intern("__send__");
1669
- i_respond_to_p = rb_intern("respond_to?");
1670
- i_match = rb_intern("match");
1671
- i_keys = rb_intern("keys");
1672
- i_dup = rb_intern("dup");
1496
+ i_encode = rb_intern("encode");
1497
+
1498
+ usascii_encindex = rb_usascii_encindex();
1499
+ utf8_encindex = rb_utf8_encindex();
1500
+ binary_encindex = rb_ascii8bit_encindex();
1673
1501
  }