json 2.7.2 → 2.7.3.rc1

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,332 +1,306 @@
1
1
  #include "../fbuffer/fbuffer.h"
2
2
  #include "generator.h"
3
3
 
4
- static VALUE mJSON, mExt, mGenerator, cState, mGeneratorMethods, mObject,
5
- mHash, mArray,
6
- #ifdef RUBY_INTEGER_UNIFICATION
7
- mInteger,
8
- #else
9
- mFixnum, mBignum,
4
+ #ifndef RB_UNLIKELY
5
+ #define RB_UNLIKELY(cond) (cond)
10
6
  #endif
11
- mFloat, mString, mString_Extend,
12
- mTrueClass, mFalseClass, mNilClass, eGeneratorError,
13
- eNestingError;
14
7
 
15
- static ID i_to_s, i_to_json, i_new, i_indent, i_space, i_space_before,
16
- i_object_nl, i_array_nl, i_max_nesting, i_allow_nan, i_ascii_only,
17
- i_pack, i_unpack, i_create_id, i_extend, i_key_p,
18
- i_aref, i_send, i_respond_to_p, i_match, i_keys, i_depth,
19
- i_buffer_initial_length, i_dup, i_script_safe, i_escape_slash, i_strict;
8
+ static VALUE mJSON, cState, mString_Extend, eGeneratorError, eNestingError, Encoding_UTF_8;
20
9
 
21
- /*
22
- * Copyright 2001-2004 Unicode, Inc.
10
+ static ID i_to_s, i_to_json, i_new, i_pack, i_unpack, i_create_id, i_extend, i_encode;
11
+
12
+ /* Converts in_string to a JSON string (without the wrapping '"'
13
+ * characters) in FBuffer out_buffer.
23
14
  *
24
- * Disclaimer
15
+ * Character are JSON-escaped according to:
25
16
  *
26
- * This source code is provided as is by Unicode, Inc. No claims are
27
- * made as to fitness for any particular purpose. No warranties of any
28
- * kind are expressed or implied. The recipient agrees to determine
29
- * applicability of information provided. If this file has been
30
- * purchased on magnetic or optical media from Unicode, Inc., the
31
- * sole remedy for any claim will be exchange of defective media
32
- * within 90 days of receipt.
17
+ * - Always: ASCII control characters (0x00-0x1F), dquote, and
18
+ * backslash.
33
19
  *
34
- * Limitations on Rights to Redistribute This Code
20
+ * - If out_ascii_only: non-ASCII characters (>0x7F)
35
21
  *
36
- * Unicode, Inc. hereby grants the right to freely use the information
37
- * supplied in this file in the creation of products supporting the
38
- * Unicode Standard, and to make copies of this file in any form
39
- * for internal or external distribution as long as this notice
40
- * remains attached.
22
+ * - If out_script_safe: forwardslash, line separator (U+2028), and
23
+ * paragraph separator (U+2029)
24
+ *
25
+ * Everything else (should be UTF-8) is just passed through and
26
+ * appended to the result.
41
27
  */
28
+ static void convert_UTF8_to_JSON(FBuffer *out_buffer, VALUE str, const char escape_table[256], bool out_script_safe)
29
+ {
30
+ const char *hexdig = "0123456789abcdef";
31
+ char scratch[12] = { '\\', 'u', 0, 0, 0, 0, '\\', 'u' };
42
32
 
43
- /*
44
- * Index into the table below with the first byte of a UTF-8 sequence to
45
- * get the number of trailing bytes that are supposed to follow it.
46
- * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is
47
- * left as-is for anyone who may want to do such conversion, which was
48
- * allowed in earlier algorithms.
49
- */
50
- static const char trailingBytesForUTF8[256] = {
51
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
52
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
53
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
54
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
55
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
56
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
57
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
58
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
59
- };
33
+ const char *ptr = RSTRING_PTR(str);
34
+ unsigned long len = RSTRING_LEN(str);
60
35
 
61
- /*
62
- * Magic values subtracted from a buffer value during UTF8 conversion.
63
- * This table contains as many values as there might be trailing bytes
64
- * in a UTF-8 sequence.
65
- */
66
- static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
67
- 0x03C82080UL, 0xFA082080UL, 0x82082080UL };
36
+ unsigned long beg = 0, pos = 0;
68
37
 
69
- /*
70
- * Utility routine to tell whether a sequence of bytes is legal UTF-8.
71
- * This must be called with the length pre-determined by the first byte.
72
- * If not calling this from ConvertUTF8to*, then the length can be set by:
73
- * length = trailingBytesForUTF8[*source]+1;
74
- * and the sequence is illegal right away if there aren't that many bytes
75
- * available.
76
- * If presented with a length > 4, this returns 0. The Unicode
77
- * definition of UTF-8 goes up to 4-byte sequences.
78
- */
79
- static unsigned char isLegalUTF8(const UTF8 *source, unsigned long length)
80
- {
81
- UTF8 a;
82
- const UTF8 *srcptr = source+length;
83
- switch (length) {
84
- default: return 0;
85
- /* Everything else falls through when "1"... */
86
- case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
87
- case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
88
- case 2: if ((a = (*--srcptr)) > 0xBF) return 0;
89
-
90
- switch (*source) {
91
- /* no fall-through in this inner switch */
92
- case 0xE0: if (a < 0xA0) return 0; break;
93
- case 0xED: if (a > 0x9F) return 0; break;
94
- case 0xF0: if (a < 0x90) return 0; break;
95
- case 0xF4: if (a > 0x8F) return 0; break;
96
- default: if (a < 0x80) return 0;
38
+ #define FLUSH_POS(bytes) if (pos > beg) { fbuffer_append(out_buffer, &ptr[beg], pos - beg); } pos += bytes; beg = pos;
39
+
40
+ while (pos < len) {
41
+ unsigned char ch = ptr[pos];
42
+ unsigned char ch_len = escape_table[ch];
43
+ /* JSON encoding */
44
+
45
+ if (RB_UNLIKELY(ch_len)) {
46
+ switch (ch_len) {
47
+ case 0:
48
+ pos++;
49
+ break;
50
+ case 1: {
51
+ FLUSH_POS(1);
52
+ switch (ch) {
53
+ case '"': fbuffer_append(out_buffer, "\\\"", 2); break;
54
+ case '\\': fbuffer_append(out_buffer, "\\\\", 2); break;
55
+ case '/': fbuffer_append(out_buffer, "\\/", 2); break;
56
+ case '\b': fbuffer_append(out_buffer, "\\b", 2); break;
57
+ case '\f': fbuffer_append(out_buffer, "\\f", 2); break;
58
+ case '\n': fbuffer_append(out_buffer, "\\n", 2); break;
59
+ case '\r': fbuffer_append(out_buffer, "\\r", 2); break;
60
+ case '\t': fbuffer_append(out_buffer, "\\t", 2); break;
61
+ default: {
62
+ scratch[2] = hexdig[ch >> 12];
63
+ scratch[3] = hexdig[(ch >> 8) & 0xf];
64
+ scratch[4] = hexdig[(ch >> 4) & 0xf];
65
+ scratch[5] = hexdig[ch & 0xf];
66
+ fbuffer_append(out_buffer, scratch, 6);
67
+ break;
68
+ }
97
69
  }
70
+ break;
71
+ }
72
+ case 3: {
73
+ unsigned char b2 = ptr[pos + 1];
74
+ if (RB_UNLIKELY(out_script_safe && b2 == 0x80)) {
75
+ unsigned char b3 = ptr[pos + 2];
76
+ if (b3 == 0xA8) {
77
+ FLUSH_POS(3);
78
+ fbuffer_append(out_buffer, "\\u2028", 6);
79
+ break;
80
+ } else if (b3 == 0xA9) {
81
+ FLUSH_POS(3);
82
+ fbuffer_append(out_buffer, "\\u2029", 6);
83
+ break;
84
+ }
85
+ }
86
+ // fallthrough
87
+ }
88
+ default:
89
+ pos += ch_len;
90
+ break;
91
+ }
92
+ } else {
93
+ pos++;
94
+ }
95
+ }
96
+ #undef FLUSH_POS
98
97
 
99
- case 1: if (*source >= 0x80 && *source < 0xC2) return 0;
98
+ if (beg < len) {
99
+ fbuffer_append(out_buffer, &ptr[beg], len - beg);
100
100
  }
101
- if (*source > 0xF4) return 0;
102
- return 1;
103
- }
104
101
 
105
- /* Escapes the UTF16 character and stores the result in the buffer buf. */
106
- static void unicode_escape(char *buf, UTF16 character)
107
- {
108
- const char *digits = "0123456789abcdef";
102
+ RB_GC_GUARD(str);
103
+ }
104
+
105
+ static const char escape_table[256] = {
106
+ // ASCII Control Characters
107
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
108
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
109
+ // ASCII Characters
110
+ 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0, // '"'
111
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
112
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
113
+ 0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0, // '\\'
114
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
115
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
116
+ // Continuation byte
117
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
118
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
119
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
120
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
121
+ // First byte of a 2-byte code point
122
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
123
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
124
+ // First byte of a 4-byte code point
125
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
126
+ //First byte of a 4+byte code point
127
+ 4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1,
128
+ };
109
129
 
110
- buf[2] = digits[character >> 12];
111
- buf[3] = digits[(character >> 8) & 0xf];
112
- buf[4] = digits[(character >> 4) & 0xf];
113
- buf[5] = digits[character & 0xf];
114
- }
130
+ static const char script_safe_escape_table[256] = {
131
+ // ASCII Control Characters
132
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
133
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
134
+ // ASCII Characters
135
+ 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1, // '"' and '/'
136
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
137
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
138
+ 0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0, // '\\'
139
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
140
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
141
+ // Continuation byte
142
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
143
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
144
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
145
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
146
+ // First byte of a 2-byte code point
147
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
148
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
149
+ // First byte of a 4-byte code point
150
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
151
+ //First byte of a 4+byte code point
152
+ 4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1,
153
+ };
115
154
 
116
- /* Escapes the UTF16 character and stores the result in the buffer buf, then
117
- * the buffer buf is appended to the FBuffer buffer. */
118
- static void unicode_escape_to_buffer(FBuffer *buffer, char buf[6], UTF16
119
- character)
155
+ static void convert_ASCII_to_JSON(FBuffer *out_buffer, VALUE str, const char escape_table[256])
120
156
  {
121
- unicode_escape(buf, character);
122
- fbuffer_append(buffer, buf, 6);
157
+ const char *hexdig = "0123456789abcdef";
158
+ char scratch[12] = { '\\', 'u', 0, 0, 0, 0, '\\', 'u' };
159
+
160
+ const char *ptr = RSTRING_PTR(str);
161
+ unsigned long len = RSTRING_LEN(str);
162
+
163
+ unsigned long beg = 0, pos;
164
+
165
+ for (pos = 0; pos < len;) {
166
+ unsigned char ch = ptr[pos];
167
+ /* JSON encoding */
168
+ if (escape_table[ch]) {
169
+ if (pos > beg) {
170
+ fbuffer_append(out_buffer, &ptr[beg], pos - beg);
171
+ }
172
+
173
+ beg = pos + 1;
174
+ switch (ch) {
175
+ case '"': fbuffer_append(out_buffer, "\\\"", 2); break;
176
+ case '\\': fbuffer_append(out_buffer, "\\\\", 2); break;
177
+ case '/': fbuffer_append(out_buffer, "\\/", 2); break;
178
+ case '\b': fbuffer_append(out_buffer, "\\b", 2); break;
179
+ case '\f': fbuffer_append(out_buffer, "\\f", 2); break;
180
+ case '\n': fbuffer_append(out_buffer, "\\n", 2); break;
181
+ case '\r': fbuffer_append(out_buffer, "\\r", 2); break;
182
+ case '\t': fbuffer_append(out_buffer, "\\t", 2); break;
183
+ default:
184
+ scratch[2] = hexdig[ch >> 12];
185
+ scratch[3] = hexdig[(ch >> 8) & 0xf];
186
+ scratch[4] = hexdig[(ch >> 4) & 0xf];
187
+ scratch[5] = hexdig[ch & 0xf];
188
+ fbuffer_append(out_buffer, scratch, 6);
189
+ }
190
+ }
191
+
192
+ pos++;
193
+ }
194
+
195
+ if (beg < len) {
196
+ fbuffer_append(out_buffer, &ptr[beg], len - beg);
197
+ }
198
+
199
+ RB_GC_GUARD(str);
123
200
  }
124
201
 
125
- /* Converts string to a JSON string in FBuffer buffer, where all but the ASCII
126
- * and control characters are JSON escaped. */
127
- static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string, char script_safe)
202
+ static void convert_UTF8_to_ASCII_only_JSON(FBuffer *out_buffer, VALUE str, const char escape_table[256], bool out_script_safe)
128
203
  {
129
- const UTF8 *source = (UTF8 *) RSTRING_PTR(string);
130
- const UTF8 *sourceEnd = source + RSTRING_LEN(string);
131
- char buf[6] = { '\\', 'u' };
204
+ const char *hexdig = "0123456789abcdef";
205
+ char scratch[12] = { '\\', 'u', 0, 0, 0, 0, '\\', 'u' };
132
206
 
133
- while (source < sourceEnd) {
134
- UTF32 ch = 0;
135
- unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
136
- if (source + extraBytesToRead >= sourceEnd) {
137
- rb_raise(rb_path2class("JSON::GeneratorError"),
138
- "partial character in source, but hit end");
139
- }
140
- if (!isLegalUTF8(source, extraBytesToRead+1)) {
141
- rb_raise(rb_path2class("JSON::GeneratorError"),
142
- "source sequence is illegal/malformed utf-8");
143
- }
144
- /*
145
- * The cases all fall through. See "Note A" below.
146
- */
147
- switch (extraBytesToRead) {
148
- case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
149
- case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
150
- case 3: ch += *source++; ch <<= 6;
151
- case 2: ch += *source++; ch <<= 6;
152
- case 1: ch += *source++; ch <<= 6;
153
- case 0: ch += *source++;
154
- }
155
- ch -= offsetsFromUTF8[extraBytesToRead];
156
-
157
- if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
158
- /* UTF-16 surrogate values are illegal in UTF-32 */
159
- if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
160
- #if UNI_STRICT_CONVERSION
161
- source -= (extraBytesToRead+1); /* return to the illegal value itself */
162
- rb_raise(rb_path2class("JSON::GeneratorError"),
163
- "source sequence is illegal/malformed utf-8");
164
- #else
165
- unicode_escape_to_buffer(buffer, buf, UNI_REPLACEMENT_CHAR);
166
- #endif
167
- } else {
168
- /* normal case */
169
- if (ch >= 0x20 && ch <= 0x7f) {
207
+ const char *ptr = RSTRING_PTR(str);
208
+ unsigned long len = RSTRING_LEN(str);
209
+
210
+ unsigned long beg = 0, pos = 0;
211
+
212
+ #define FLUSH_POS(bytes) if (pos > beg) { fbuffer_append(out_buffer, &ptr[beg], pos - beg); } pos += bytes; beg = pos;
213
+
214
+ while (pos < len) {
215
+ unsigned char ch = ptr[pos];
216
+ unsigned char ch_len = escape_table[ch];
217
+
218
+ if (RB_UNLIKELY(ch_len)) {
219
+ switch (ch_len) {
220
+ case 0:
221
+ pos++;
222
+ break;
223
+ case 1: {
224
+ FLUSH_POS(1);
170
225
  switch (ch) {
171
- case '\\':
172
- fbuffer_append(buffer, "\\\\", 2);
173
- break;
174
- case '"':
175
- fbuffer_append(buffer, "\\\"", 2);
176
- break;
177
- case '/':
178
- if(script_safe) {
179
- fbuffer_append(buffer, "\\/", 2);
180
- break;
181
- }
182
- default:
183
- fbuffer_append_char(buffer, (char)ch);
226
+ case '"': fbuffer_append(out_buffer, "\\\"", 2); break;
227
+ case '\\': fbuffer_append(out_buffer, "\\\\", 2); break;
228
+ case '/': fbuffer_append(out_buffer, "\\/", 2); break;
229
+ case '\b': fbuffer_append(out_buffer, "\\b", 2); break;
230
+ case '\f': fbuffer_append(out_buffer, "\\f", 2); break;
231
+ case '\n': fbuffer_append(out_buffer, "\\n", 2); break;
232
+ case '\r': fbuffer_append(out_buffer, "\\r", 2); break;
233
+ case '\t': fbuffer_append(out_buffer, "\\t", 2); break;
234
+ default: {
235
+ scratch[2] = hexdig[ch >> 12];
236
+ scratch[3] = hexdig[(ch >> 8) & 0xf];
237
+ scratch[4] = hexdig[(ch >> 4) & 0xf];
238
+ scratch[5] = hexdig[ch & 0xf];
239
+ fbuffer_append(out_buffer, scratch, 6);
184
240
  break;
241
+ }
185
242
  }
186
- } else {
187
- switch (ch) {
188
- case '\n':
189
- fbuffer_append(buffer, "\\n", 2);
190
- break;
191
- case '\r':
192
- fbuffer_append(buffer, "\\r", 2);
193
- break;
194
- case '\t':
195
- fbuffer_append(buffer, "\\t", 2);
196
- break;
197
- case '\f':
198
- fbuffer_append(buffer, "\\f", 2);
243
+ break;
244
+ }
245
+ default: {
246
+ uint32_t wchar = 0;
247
+ switch(ch_len) {
248
+ case 2:
249
+ wchar = ptr[pos] & 0x1F;
199
250
  break;
200
- case '\b':
201
- fbuffer_append(buffer, "\\b", 2);
251
+ case 3:
252
+ wchar = ptr[pos] & 0x0F;
202
253
  break;
203
- default:
204
- unicode_escape_to_buffer(buffer, buf, (UTF16) ch);
254
+ case 4:
255
+ wchar = ptr[pos] & 0x07;
205
256
  break;
206
257
  }
207
- }
208
- }
209
- } else if (ch > UNI_MAX_UTF16) {
210
- #if UNI_STRICT_CONVERSION
211
- source -= (extraBytesToRead+1); /* return to the start */
212
- rb_raise(rb_path2class("JSON::GeneratorError"),
213
- "source sequence is illegal/malformed utf8");
214
- #else
215
- unicode_escape_to_buffer(buffer, buf, UNI_REPLACEMENT_CHAR);
216
- #endif
217
- } else {
218
- /* target is a character in range 0xFFFF - 0x10FFFF. */
219
- ch -= halfBase;
220
- unicode_escape_to_buffer(buffer, buf, (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START));
221
- unicode_escape_to_buffer(buffer, buf, (UTF16)((ch & halfMask) + UNI_SUR_LOW_START));
222
- }
223
- }
224
- RB_GC_GUARD(string);
225
- }
226
-
227
- /* Converts string to a JSON string in FBuffer buffer, where only the
228
- * characters required by the JSON standard are JSON escaped. The remaining
229
- * characters (should be UTF8) are just passed through and appended to the
230
- * result. */
231
- static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string, char script_safe)
232
- {
233
- const char *ptr = RSTRING_PTR(string), *p;
234
- unsigned long len = RSTRING_LEN(string), start = 0, end = 0;
235
- const char *escape = NULL;
236
- int escape_len;
237
- unsigned char c;
238
- char buf[6] = { '\\', 'u' };
239
- int ascii_only = rb_enc_str_asciionly_p(string);
240
-
241
- for (start = 0, end = 0; end < len;) {
242
- p = ptr + end;
243
- c = (unsigned char) *p;
244
- if (c < 0x20) {
245
- switch (c) {
246
- case '\n':
247
- escape = "\\n";
248
- escape_len = 2;
249
- break;
250
- case '\r':
251
- escape = "\\r";
252
- escape_len = 2;
253
- break;
254
- case '\t':
255
- escape = "\\t";
256
- escape_len = 2;
257
- break;
258
- case '\f':
259
- escape = "\\f";
260
- escape_len = 2;
261
- break;
262
- case '\b':
263
- escape = "\\b";
264
- escape_len = 2;
265
- break;
266
- default:
267
- unicode_escape(buf, (UTF16) *p);
268
- escape = buf;
269
- escape_len = 6;
270
- break;
271
- }
272
- } else {
273
- switch (c) {
274
- case '\\':
275
- escape = "\\\\";
276
- escape_len = 2;
277
- break;
278
- case '"':
279
- escape = "\\\"";
280
- escape_len = 2;
281
- break;
282
- case '/':
283
- if(script_safe) {
284
- escape = "\\/";
285
- escape_len = 2;
286
- break;
258
+
259
+ for (short i = 1; i < ch_len; i++) {
260
+ wchar = (wchar << 6) | (ptr[pos+i] & 0x3F);
287
261
  }
288
- default:
289
- {
290
- unsigned short clen = 1;
291
- if (!ascii_only) {
292
- clen += trailingBytesForUTF8[c];
293
- if (end + clen > len) {
294
- rb_raise(rb_path2class("JSON::GeneratorError"),
295
- "partial character in source, but hit end");
296
- }
297
-
298
- if (script_safe && c == 0xE2) {
299
- unsigned char c2 = (unsigned char) *(p+1);
300
- unsigned char c3 = (unsigned char) *(p+2);
301
- if (c2 == 0x80 && (c3 == 0xA8 || c3 == 0xA9)) {
302
- fbuffer_append(buffer, ptr + start, end - start);
303
- start = end = (end + clen);
304
- if (c3 == 0xA8) {
305
- fbuffer_append(buffer, "\\u2028", 6);
306
- } else {
307
- fbuffer_append(buffer, "\\u2029", 6);
308
- }
309
- continue;
310
- }
311
- }
312
-
313
- if (!isLegalUTF8((UTF8 *) p, clen)) {
314
- rb_raise(rb_path2class("JSON::GeneratorError"),
315
- "source sequence is illegal/malformed utf-8");
316
- }
317
- }
318
- end += clen;
262
+
263
+ FLUSH_POS(ch_len);
264
+
265
+ if (wchar <= 0xFFFF) {
266
+ scratch[2] = hexdig[wchar >> 12];
267
+ scratch[3] = hexdig[(wchar >> 8) & 0xf];
268
+ scratch[4] = hexdig[(wchar >> 4) & 0xf];
269
+ scratch[5] = hexdig[wchar & 0xf];
270
+ fbuffer_append(out_buffer, scratch, 6);
271
+ } else {
272
+ uint16_t hi, lo;
273
+ wchar -= 0x10000;
274
+ hi = 0xD800 + (uint16_t)(wchar >> 10);
275
+ lo = 0xDC00 + (uint16_t)(wchar & 0x3FF);
276
+
277
+ scratch[2] = hexdig[hi >> 12];
278
+ scratch[3] = hexdig[(hi >> 8) & 0xf];
279
+ scratch[4] = hexdig[(hi >> 4) & 0xf];
280
+ scratch[5] = hexdig[hi & 0xf];
281
+
282
+ scratch[8] = hexdig[lo >> 12];
283
+ scratch[9] = hexdig[(lo >> 8) & 0xf];
284
+ scratch[10] = hexdig[(lo >> 4) & 0xf];
285
+ scratch[11] = hexdig[lo & 0xf];
286
+
287
+ fbuffer_append(out_buffer, scratch, 12);
319
288
  }
320
- continue;
289
+
321
290
  break;
291
+ }
322
292
  }
293
+ } else {
294
+ pos++;
323
295
  }
324
- fbuffer_append(buffer, ptr + start, end - start);
325
- fbuffer_append(buffer, escape, escape_len);
326
- start = ++end;
327
- escape = NULL;
328
296
  }
329
- fbuffer_append(buffer, ptr + start, end - start);
297
+ #undef FLUSH_POS
298
+
299
+ if (beg < len) {
300
+ fbuffer_append(out_buffer, &ptr[beg], len - beg);
301
+ }
302
+
303
+ RB_GC_GUARD(str);
330
304
  }
331
305
 
332
306
  static char *fstrndup(const char *ptr, unsigned long len) {
@@ -610,9 +584,6 @@ static void State_free(void *ptr)
610
584
  if (state->space_before) ruby_xfree(state->space_before);
611
585
  if (state->object_nl) ruby_xfree(state->object_nl);
612
586
  if (state->array_nl) ruby_xfree(state->array_nl);
613
- if (state->array_delim) fbuffer_free(state->array_delim);
614
- if (state->object_delim) fbuffer_free(state->object_delim);
615
- if (state->object_delim2) fbuffer_free(state->object_delim2);
616
587
  ruby_xfree(state);
617
588
  }
618
589
 
@@ -625,9 +596,6 @@ static size_t State_memsize(const void *ptr)
625
596
  if (state->space_before) size += state->space_before_len + 1;
626
597
  if (state->object_nl) size += state->object_nl_len + 1;
627
598
  if (state->array_nl) size += state->array_nl_len + 1;
628
- if (state->array_delim) size += FBUFFER_CAPA(state->array_delim);
629
- if (state->object_delim) size += FBUFFER_CAPA(state->object_delim);
630
- if (state->object_delim2) size += FBUFFER_CAPA(state->object_delim2);
631
599
  return size;
632
600
  }
633
601
 
@@ -636,194 +604,20 @@ static size_t State_memsize(const void *ptr)
636
604
  # define RUBY_TYPED_FROZEN_SHAREABLE 0
637
605
  #endif
638
606
 
639
- #ifdef NEW_TYPEDDATA_WRAPPER
640
607
  static const rb_data_type_t JSON_Generator_State_type = {
641
608
  "JSON/Generator/State",
642
609
  {NULL, State_free, State_memsize,},
643
- #ifdef RUBY_TYPED_FREE_IMMEDIATELY
644
610
  0, 0,
645
- RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_FROZEN_SHAREABLE,
646
- #endif
611
+ RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_FROZEN_SHAREABLE,
647
612
  };
648
- #endif
649
613
 
650
614
  static VALUE cState_s_allocate(VALUE klass)
651
615
  {
652
616
  JSON_Generator_State *state;
653
- return TypedData_Make_Struct(klass, JSON_Generator_State,
654
- &JSON_Generator_State_type, state);
655
- }
656
-
657
- /*
658
- * call-seq: configure(opts)
659
- *
660
- * Configure this State instance with the Hash _opts_, and return
661
- * itself.
662
- */
663
- static VALUE cState_configure(VALUE self, VALUE opts)
664
- {
665
- VALUE tmp;
666
- GET_STATE(self);
667
- tmp = rb_check_convert_type(opts, T_HASH, "Hash", "to_hash");
668
- if (NIL_P(tmp)) tmp = rb_convert_type(opts, T_HASH, "Hash", "to_h");
669
- opts = tmp;
670
- tmp = rb_hash_aref(opts, ID2SYM(i_indent));
671
- if (RTEST(tmp)) {
672
- unsigned long len;
673
- Check_Type(tmp, T_STRING);
674
- len = RSTRING_LEN(tmp);
675
- state->indent = fstrndup(RSTRING_PTR(tmp), len + 1);
676
- state->indent_len = len;
677
- }
678
- tmp = rb_hash_aref(opts, ID2SYM(i_space));
679
- if (RTEST(tmp)) {
680
- unsigned long len;
681
- Check_Type(tmp, T_STRING);
682
- len = RSTRING_LEN(tmp);
683
- state->space = fstrndup(RSTRING_PTR(tmp), len + 1);
684
- state->space_len = len;
685
- }
686
- tmp = rb_hash_aref(opts, ID2SYM(i_space_before));
687
- if (RTEST(tmp)) {
688
- unsigned long len;
689
- Check_Type(tmp, T_STRING);
690
- len = RSTRING_LEN(tmp);
691
- state->space_before = fstrndup(RSTRING_PTR(tmp), len + 1);
692
- state->space_before_len = len;
693
- }
694
- tmp = rb_hash_aref(opts, ID2SYM(i_array_nl));
695
- if (RTEST(tmp)) {
696
- unsigned long len;
697
- Check_Type(tmp, T_STRING);
698
- len = RSTRING_LEN(tmp);
699
- state->array_nl = fstrndup(RSTRING_PTR(tmp), len + 1);
700
- state->array_nl_len = len;
701
- }
702
- tmp = rb_hash_aref(opts, ID2SYM(i_object_nl));
703
- if (RTEST(tmp)) {
704
- unsigned long len;
705
- Check_Type(tmp, T_STRING);
706
- len = RSTRING_LEN(tmp);
707
- state->object_nl = fstrndup(RSTRING_PTR(tmp), len + 1);
708
- state->object_nl_len = len;
709
- }
710
- tmp = ID2SYM(i_max_nesting);
617
+ VALUE obj = TypedData_Make_Struct(klass, JSON_Generator_State, &JSON_Generator_State_type, state);
711
618
  state->max_nesting = 100;
712
- if (option_given_p(opts, tmp)) {
713
- VALUE max_nesting = rb_hash_aref(opts, tmp);
714
- if (RTEST(max_nesting)) {
715
- Check_Type(max_nesting, T_FIXNUM);
716
- state->max_nesting = FIX2LONG(max_nesting);
717
- } else {
718
- state->max_nesting = 0;
719
- }
720
- }
721
- tmp = ID2SYM(i_depth);
722
- state->depth = 0;
723
- if (option_given_p(opts, tmp)) {
724
- VALUE depth = rb_hash_aref(opts, tmp);
725
- if (RTEST(depth)) {
726
- Check_Type(depth, T_FIXNUM);
727
- state->depth = FIX2LONG(depth);
728
- } else {
729
- state->depth = 0;
730
- }
731
- }
732
- tmp = ID2SYM(i_buffer_initial_length);
733
- if (option_given_p(opts, tmp)) {
734
- VALUE buffer_initial_length = rb_hash_aref(opts, tmp);
735
- if (RTEST(buffer_initial_length)) {
736
- long initial_length;
737
- Check_Type(buffer_initial_length, T_FIXNUM);
738
- initial_length = FIX2LONG(buffer_initial_length);
739
- if (initial_length > 0) state->buffer_initial_length = initial_length;
740
- }
741
- }
742
- tmp = rb_hash_aref(opts, ID2SYM(i_allow_nan));
743
- state->allow_nan = RTEST(tmp);
744
- tmp = rb_hash_aref(opts, ID2SYM(i_ascii_only));
745
- state->ascii_only = RTEST(tmp);
746
- tmp = rb_hash_aref(opts, ID2SYM(i_script_safe));
747
- state->script_safe = RTEST(tmp);
748
- if (!state->script_safe) {
749
- tmp = rb_hash_aref(opts, ID2SYM(i_escape_slash));
750
- state->script_safe = RTEST(tmp);
751
- }
752
- tmp = rb_hash_aref(opts, ID2SYM(i_strict));
753
- state->strict = RTEST(tmp);
754
- return self;
755
- }
756
-
757
- static void set_state_ivars(VALUE hash, VALUE state)
758
- {
759
- VALUE ivars = rb_obj_instance_variables(state);
760
- int i = 0;
761
- for (i = 0; i < RARRAY_LEN(ivars); i++) {
762
- VALUE key = rb_funcall(rb_ary_entry(ivars, i), i_to_s, 0);
763
- long key_len = RSTRING_LEN(key);
764
- VALUE value = rb_iv_get(state, StringValueCStr(key));
765
- rb_hash_aset(hash, rb_str_intern(rb_str_substr(key, 1, key_len - 1)), value);
766
- }
767
- }
768
-
769
- /*
770
- * call-seq: to_h
771
- *
772
- * Returns the configuration instance variables as a hash, that can be
773
- * passed to the configure method.
774
- */
775
- static VALUE cState_to_h(VALUE self)
776
- {
777
- VALUE result = rb_hash_new();
778
- GET_STATE(self);
779
- set_state_ivars(result, self);
780
- rb_hash_aset(result, ID2SYM(i_indent), rb_str_new(state->indent, state->indent_len));
781
- rb_hash_aset(result, ID2SYM(i_space), rb_str_new(state->space, state->space_len));
782
- rb_hash_aset(result, ID2SYM(i_space_before), rb_str_new(state->space_before, state->space_before_len));
783
- rb_hash_aset(result, ID2SYM(i_object_nl), rb_str_new(state->object_nl, state->object_nl_len));
784
- rb_hash_aset(result, ID2SYM(i_array_nl), rb_str_new(state->array_nl, state->array_nl_len));
785
- rb_hash_aset(result, ID2SYM(i_allow_nan), state->allow_nan ? Qtrue : Qfalse);
786
- rb_hash_aset(result, ID2SYM(i_ascii_only), state->ascii_only ? Qtrue : Qfalse);
787
- rb_hash_aset(result, ID2SYM(i_max_nesting), LONG2FIX(state->max_nesting));
788
- rb_hash_aset(result, ID2SYM(i_script_safe), state->script_safe ? Qtrue : Qfalse);
789
- rb_hash_aset(result, ID2SYM(i_strict), state->strict ? Qtrue : Qfalse);
790
- rb_hash_aset(result, ID2SYM(i_depth), LONG2FIX(state->depth));
791
- rb_hash_aset(result, ID2SYM(i_buffer_initial_length), LONG2FIX(state->buffer_initial_length));
792
- return result;
793
- }
794
-
795
- /*
796
- * call-seq: [](name)
797
- *
798
- * Returns the value returned by method +name+.
799
- */
800
- static VALUE cState_aref(VALUE self, VALUE name)
801
- {
802
- name = rb_funcall(name, i_to_s, 0);
803
- if (RTEST(rb_funcall(self, i_respond_to_p, 1, name))) {
804
- return rb_funcall(self, i_send, 1, name);
805
- } else {
806
- return rb_attr_get(self, rb_intern_str(rb_str_concat(rb_str_new2("@"), name)));
807
- }
808
- }
809
-
810
- /*
811
- * call-seq: []=(name, value)
812
- *
813
- * Sets the attribute name to value.
814
- */
815
- static VALUE cState_aset(VALUE self, VALUE name, VALUE value)
816
- {
817
- VALUE name_writer;
818
-
819
- name = rb_funcall(name, i_to_s, 0);
820
- name_writer = rb_str_cat2(rb_str_dup(name), "=");
821
- if (RTEST(rb_funcall(self, i_respond_to_p, 1, name_writer))) {
822
- return rb_funcall(self, i_send, 2, name_writer, value);
823
- } else {
824
- rb_ivar_set(self, rb_intern_str(rb_str_concat(rb_str_new2("@"), name)), value);
825
- }
826
- return Qnil;
619
+ state->buffer_initial_length = FBUFFER_INITIAL_LENGTH_DEFAULT;
620
+ return obj;
827
621
  }
828
622
 
829
623
  struct hash_foreach_arg {
@@ -841,39 +635,36 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
841
635
  JSON_Generator_State *state = arg->state;
842
636
  VALUE Vstate = arg->Vstate;
843
637
 
844
- char *object_nl = state->object_nl;
845
- long object_nl_len = state->object_nl_len;
846
- char *indent = state->indent;
847
- long indent_len = state->indent_len;
848
- char *delim = FBUFFER_PTR(state->object_delim);
849
- long delim_len = FBUFFER_LEN(state->object_delim);
850
- char *delim2 = FBUFFER_PTR(state->object_delim2);
851
- long delim2_len = FBUFFER_LEN(state->object_delim2);
852
638
  long depth = state->depth;
853
639
  int j;
854
- VALUE klass, key_to_s;
855
640
 
856
- if (arg->iter > 0) fbuffer_append(buffer, delim, delim_len);
857
- if (object_nl) {
858
- fbuffer_append(buffer, object_nl, object_nl_len);
641
+ if (arg->iter > 0) fbuffer_append_char(buffer, ',');
642
+ if (RB_UNLIKELY(state->object_nl)) {
643
+ fbuffer_append(buffer, state->object_nl, state->object_nl_len);
859
644
  }
860
- if (indent) {
645
+ if (RB_UNLIKELY(state->indent)) {
861
646
  for (j = 0; j < depth; j++) {
862
- fbuffer_append(buffer, indent, indent_len);
647
+ fbuffer_append(buffer, state->indent, state->indent_len);
863
648
  }
864
649
  }
865
650
 
866
- klass = CLASS_OF(key);
867
- if (klass == rb_cString) {
868
- key_to_s = key;
869
- } else if (klass == rb_cSymbol) {
870
- key_to_s = rb_sym2str(key);
871
- } else {
872
- key_to_s = rb_funcall(key, i_to_s, 0);
651
+ VALUE key_to_s;
652
+ switch(rb_type(key)) {
653
+ case T_STRING:
654
+ key_to_s = key;
655
+ break;
656
+ case T_SYMBOL:
657
+ key_to_s = rb_sym2str(key);
658
+ break;
659
+ default:
660
+ key_to_s = rb_convert_type(key, T_STRING, "String", "to_s");
661
+ break;
873
662
  }
874
- Check_Type(key_to_s, T_STRING);
875
- generate_json(buffer, Vstate, state, key_to_s);
876
- fbuffer_append(buffer, delim2, delim2_len);
663
+
664
+ generate_json_string(buffer, Vstate, state, key_to_s);
665
+ if (RB_UNLIKELY(state->space_before)) fbuffer_append(buffer, state->space_before, state->space_before_len);
666
+ fbuffer_append_char(buffer, ':');
667
+ if (RB_UNLIKELY(state->space)) fbuffer_append(buffer, state->space, state->space_len);
877
668
  generate_json(buffer, Vstate, state, val);
878
669
 
879
670
  arg->iter++;
@@ -882,10 +673,6 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
882
673
 
883
674
  static void generate_json_object(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
884
675
  {
885
- char *object_nl = state->object_nl;
886
- long object_nl_len = state->object_nl_len;
887
- char *indent = state->indent;
888
- long indent_len = state->indent_len;
889
676
  long max_nesting = state->max_nesting;
890
677
  long depth = ++state->depth;
891
678
  int j;
@@ -903,11 +690,11 @@ static void generate_json_object(FBuffer *buffer, VALUE Vstate, JSON_Generator_S
903
690
  rb_hash_foreach(obj, json_object_i, (VALUE)&arg);
904
691
 
905
692
  depth = --state->depth;
906
- if (object_nl) {
907
- fbuffer_append(buffer, object_nl, object_nl_len);
908
- if (indent) {
693
+ if (RB_UNLIKELY(state->object_nl)) {
694
+ fbuffer_append(buffer, state->object_nl, state->object_nl_len);
695
+ if (RB_UNLIKELY(state->indent)) {
909
696
  for (j = 0; j < depth; j++) {
910
- fbuffer_append(buffer, indent, indent_len);
697
+ fbuffer_append(buffer, state->indent, state->indent_len);
911
698
  }
912
699
  }
913
700
  }
@@ -916,62 +703,90 @@ static void generate_json_object(FBuffer *buffer, VALUE Vstate, JSON_Generator_S
916
703
 
917
704
  static void generate_json_array(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
918
705
  {
919
- char *array_nl = state->array_nl;
920
- long array_nl_len = state->array_nl_len;
921
- char *indent = state->indent;
922
- long indent_len = state->indent_len;
923
706
  long max_nesting = state->max_nesting;
924
- char *delim = FBUFFER_PTR(state->array_delim);
925
- long delim_len = FBUFFER_LEN(state->array_delim);
926
707
  long depth = ++state->depth;
927
708
  int i, j;
928
709
  if (max_nesting != 0 && depth > max_nesting) {
929
710
  rb_raise(eNestingError, "nesting of %ld is too deep", --state->depth);
930
711
  }
931
712
  fbuffer_append_char(buffer, '[');
932
- if (array_nl) fbuffer_append(buffer, array_nl, array_nl_len);
713
+ if (RB_UNLIKELY(state->array_nl)) fbuffer_append(buffer, state->array_nl, state->array_nl_len);
933
714
  for(i = 0; i < RARRAY_LEN(obj); i++) {
934
- if (i > 0) fbuffer_append(buffer, delim, delim_len);
935
- if (indent) {
715
+ if (i > 0) {
716
+ fbuffer_append_char(buffer, ',');
717
+ if (RB_UNLIKELY(state->array_nl)) fbuffer_append(buffer, state->array_nl, state->array_nl_len);
718
+ }
719
+ if (RB_UNLIKELY(state->indent)) {
936
720
  for (j = 0; j < depth; j++) {
937
- fbuffer_append(buffer, indent, indent_len);
721
+ fbuffer_append(buffer, state->indent, state->indent_len);
938
722
  }
939
723
  }
940
- generate_json(buffer, Vstate, state, rb_ary_entry(obj, i));
724
+ generate_json(buffer, Vstate, state, RARRAY_AREF(obj, i));
941
725
  }
942
726
  state->depth = --depth;
943
- if (array_nl) {
944
- fbuffer_append(buffer, array_nl, array_nl_len);
945
- if (indent) {
727
+ if (RB_UNLIKELY(state->array_nl)) {
728
+ fbuffer_append(buffer, state->array_nl, state->array_nl_len);
729
+ if (RB_UNLIKELY(state->indent)) {
946
730
  for (j = 0; j < depth; j++) {
947
- fbuffer_append(buffer, indent, indent_len);
731
+ fbuffer_append(buffer, state->indent, state->indent_len);
948
732
  }
949
733
  }
950
734
  }
951
735
  fbuffer_append_char(buffer, ']');
952
736
  }
953
737
 
954
- #ifdef HAVE_RUBY_ENCODING_H
955
- static int enc_utf8_compatible_p(rb_encoding *enc)
738
+ static int usascii_encindex, utf8_encindex, binary_encindex;
739
+
740
+ static inline int enc_utf8_compatible_p(int enc_idx)
956
741
  {
957
- if (enc == rb_usascii_encoding()) return 1;
958
- if (enc == rb_utf8_encoding()) return 1;
742
+ if (enc_idx == usascii_encindex) return 1;
743
+ if (enc_idx == utf8_encindex) return 1;
959
744
  return 0;
960
745
  }
961
- #endif
746
+
747
+ static inline VALUE ensure_valid_encoding(VALUE str)
748
+ {
749
+ int encindex = RB_ENCODING_GET(str);
750
+ VALUE utf8_string;
751
+ if (RB_UNLIKELY(!enc_utf8_compatible_p(encindex))) {
752
+ if (encindex == binary_encindex) {
753
+ // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work.
754
+ // TODO: Deprecate in 2.8.0
755
+ // TODO: Remove in 3.0.0
756
+ utf8_string = rb_enc_associate_index(rb_str_dup(str), utf8_encindex);
757
+ switch (rb_enc_str_coderange(utf8_string)) {
758
+ case ENC_CODERANGE_7BIT:
759
+ case ENC_CODERANGE_VALID:
760
+ return utf8_string;
761
+ break;
762
+ }
763
+ }
764
+
765
+ str = rb_funcall(str, i_encode, 1, Encoding_UTF_8);
766
+ }
767
+ return str;
768
+ }
962
769
 
963
770
  static void generate_json_string(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
964
771
  {
772
+ obj = ensure_valid_encoding(obj);
773
+
965
774
  fbuffer_append_char(buffer, '"');
966
- #ifdef HAVE_RUBY_ENCODING_H
967
- if (!enc_utf8_compatible_p(rb_enc_get(obj))) {
968
- obj = rb_str_export_to_enc(obj, rb_utf8_encoding());
969
- }
970
- #endif
971
- if (state->ascii_only) {
972
- convert_UTF8_to_JSON_ASCII(buffer, obj, state->script_safe);
973
- } else {
974
- convert_UTF8_to_JSON(buffer, obj, state->script_safe);
775
+
776
+ switch(rb_enc_str_coderange(obj)) {
777
+ case ENC_CODERANGE_7BIT:
778
+ convert_ASCII_to_JSON(buffer, obj, state->script_safe ? script_safe_escape_table : escape_table);
779
+ break;
780
+ case ENC_CODERANGE_VALID:
781
+ if (RB_UNLIKELY(state->ascii_only)) {
782
+ convert_UTF8_to_ASCII_only_JSON(buffer, obj, state->script_safe ? script_safe_escape_table : escape_table, state->script_safe);
783
+ } else {
784
+ convert_UTF8_to_JSON(buffer, obj, state->script_safe ? script_safe_escape_table : escape_table, state->script_safe);
785
+ }
786
+ break;
787
+ default:
788
+ rb_raise(rb_path2class("JSON::GeneratorError"), "source sequence is illegal/malformed utf-8");
789
+ break;
975
790
  }
976
791
  fbuffer_append_char(buffer, '"');
977
792
  }
@@ -1018,9 +833,9 @@ static void generate_json_float(FBuffer *buffer, VALUE Vstate, JSON_Generator_St
1018
833
  VALUE tmp = rb_funcall(obj, i_to_s, 0);
1019
834
  if (!allow_nan) {
1020
835
  if (isinf(value)) {
1021
- rb_raise(eGeneratorError, "%"PRIsVALUE" not allowed in JSON", RB_OBJ_STRING(tmp));
836
+ rb_raise(eGeneratorError, "%"PRIsVALUE" not allowed in JSON", tmp);
1022
837
  } else if (isnan(value)) {
1023
- rb_raise(eGeneratorError, "%"PRIsVALUE" not allowed in JSON", RB_OBJ_STRING(tmp));
838
+ rb_raise(eGeneratorError, "%"PRIsVALUE" not allowed in JSON", tmp);
1024
839
  }
1025
840
  }
1026
841
  fbuffer_append_str(buffer, tmp);
@@ -1029,35 +844,56 @@ static void generate_json_float(FBuffer *buffer, VALUE Vstate, JSON_Generator_St
1029
844
  static void generate_json(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
1030
845
  {
1031
846
  VALUE tmp;
1032
- VALUE klass = CLASS_OF(obj);
1033
- if (klass == rb_cHash) {
1034
- generate_json_object(buffer, Vstate, state, obj);
1035
- } else if (klass == rb_cArray) {
1036
- generate_json_array(buffer, Vstate, state, obj);
1037
- } else if (klass == rb_cString) {
1038
- generate_json_string(buffer, Vstate, state, obj);
1039
- } else if (obj == Qnil) {
847
+ if (obj == Qnil) {
1040
848
  generate_json_null(buffer, Vstate, state, obj);
1041
849
  } else if (obj == Qfalse) {
1042
850
  generate_json_false(buffer, Vstate, state, obj);
1043
851
  } else if (obj == Qtrue) {
1044
852
  generate_json_true(buffer, Vstate, state, obj);
1045
- } else if (FIXNUM_P(obj)) {
1046
- generate_json_fixnum(buffer, Vstate, state, obj);
1047
- } else if (RB_TYPE_P(obj, T_BIGNUM)) {
1048
- generate_json_bignum(buffer, Vstate, state, obj);
1049
- } else if (klass == rb_cFloat) {
1050
- generate_json_float(buffer, Vstate, state, obj);
1051
- } else if (state->strict) {
1052
- rb_raise(eGeneratorError, "%"PRIsVALUE" not allowed in JSON", RB_OBJ_STRING(CLASS_OF(obj)));
1053
- } else if (rb_respond_to(obj, i_to_json)) {
1054
- tmp = rb_funcall(obj, i_to_json, 1, Vstate);
1055
- Check_Type(tmp, T_STRING);
1056
- fbuffer_append_str(buffer, tmp);
853
+ } else if (RB_SPECIAL_CONST_P(obj)) {
854
+ if (RB_FIXNUM_P(obj)) {
855
+ generate_json_fixnum(buffer, Vstate, state, obj);
856
+ } else if (RB_FLONUM_P(obj)) {
857
+ generate_json_float(buffer, Vstate, state, obj);
858
+ } else {
859
+ goto general;
860
+ }
1057
861
  } else {
1058
- tmp = rb_funcall(obj, i_to_s, 0);
1059
- Check_Type(tmp, T_STRING);
1060
- generate_json_string(buffer, Vstate, state, tmp);
862
+ VALUE klass = RBASIC_CLASS(obj);
863
+ switch (RB_BUILTIN_TYPE(obj)) {
864
+ case T_BIGNUM:
865
+ generate_json_bignum(buffer, Vstate, state, obj);
866
+ break;
867
+ case T_HASH:
868
+ if (klass != rb_cHash) goto general;
869
+ generate_json_object(buffer, Vstate, state, obj);
870
+ break;
871
+ case T_ARRAY:
872
+ if (klass != rb_cArray) goto general;
873
+ generate_json_array(buffer, Vstate, state, obj);
874
+ break;
875
+ case T_STRING:
876
+ if (klass != rb_cString) goto general;
877
+ generate_json_string(buffer, Vstate, state, obj);
878
+ break;
879
+ case T_FLOAT:
880
+ if (klass != rb_cFloat) goto general;
881
+ generate_json_float(buffer, Vstate, state, obj);
882
+ break;
883
+ default:
884
+ general:
885
+ if (state->strict) {
886
+ rb_raise(eGeneratorError, "%"PRIsVALUE" not allowed in JSON", CLASS_OF(obj));
887
+ } else if (rb_respond_to(obj, i_to_json)) {
888
+ tmp = rb_funcall(obj, i_to_json, 1, Vstate);
889
+ Check_Type(tmp, T_STRING);
890
+ fbuffer_append_str(buffer, tmp);
891
+ } else {
892
+ tmp = rb_funcall(obj, i_to_s, 0);
893
+ Check_Type(tmp, T_STRING);
894
+ generate_json_string(buffer, Vstate, state, tmp);
895
+ }
896
+ }
1061
897
  }
1062
898
  }
1063
899
 
@@ -1067,28 +903,6 @@ static FBuffer *cState_prepare_buffer(VALUE self)
1067
903
  GET_STATE(self);
1068
904
  buffer = fbuffer_alloc(state->buffer_initial_length);
1069
905
 
1070
- if (state->object_delim) {
1071
- fbuffer_clear(state->object_delim);
1072
- } else {
1073
- state->object_delim = fbuffer_alloc(16);
1074
- }
1075
- fbuffer_append_char(state->object_delim, ',');
1076
- if (state->object_delim2) {
1077
- fbuffer_clear(state->object_delim2);
1078
- } else {
1079
- state->object_delim2 = fbuffer_alloc(16);
1080
- }
1081
- if (state->space_before) fbuffer_append(state->object_delim2, state->space_before, state->space_before_len);
1082
- fbuffer_append_char(state->object_delim2, ':');
1083
- if (state->space) fbuffer_append(state->object_delim2, state->space, state->space_len);
1084
-
1085
- if (state->array_delim) {
1086
- fbuffer_clear(state->array_delim);
1087
- } else {
1088
- state->array_delim = fbuffer_alloc(16);
1089
- }
1090
- fbuffer_append_char(state->array_delim, ',');
1091
- if (state->array_nl) fbuffer_append(state->array_delim, state->array_nl, state->array_nl_len);
1092
906
  return buffer;
1093
907
  }
1094
908
 
@@ -1149,37 +963,6 @@ static VALUE cState_generate(VALUE self, VALUE obj)
1149
963
  return result;
1150
964
  }
1151
965
 
1152
- /*
1153
- * call-seq: new(opts = {})
1154
- *
1155
- * Instantiates a new State object, configured by _opts_.
1156
- *
1157
- * _opts_ can have the following keys:
1158
- *
1159
- * * *indent*: a string used to indent levels (default: ''),
1160
- * * *space*: a string that is put after, a : or , delimiter (default: ''),
1161
- * * *space_before*: a string that is put before a : pair delimiter (default: ''),
1162
- * * *object_nl*: a string that is put at the end of a JSON object (default: ''),
1163
- * * *array_nl*: a string that is put at the end of a JSON array (default: ''),
1164
- * * *allow_nan*: true if NaN, Infinity, and -Infinity should be
1165
- * generated, otherwise an exception is thrown, if these values are
1166
- * encountered. This options defaults to false.
1167
- * * *ascii_only*: true if only ASCII characters should be generated. This
1168
- * option defaults to false.
1169
- * * *buffer_initial_length*: sets the initial length of the generator's
1170
- * internal buffer.
1171
- */
1172
- static VALUE cState_initialize(int argc, VALUE *argv, VALUE self)
1173
- {
1174
- VALUE opts;
1175
- GET_STATE(self);
1176
- state->max_nesting = 100;
1177
- state->buffer_initial_length = FBUFFER_INITIAL_LENGTH_DEFAULT;
1178
- rb_scan_args(argc, argv, "01", &opts);
1179
- if (!NIL_P(opts)) cState_configure(self, opts);
1180
- return self;
1181
- }
1182
-
1183
966
  /*
1184
967
  * call-seq: initialize_copy(orig)
1185
968
  *
@@ -1201,9 +984,6 @@ static VALUE cState_init_copy(VALUE obj, VALUE orig)
1201
984
  objState->space_before = fstrndup(origState->space_before, origState->space_before_len);
1202
985
  objState->object_nl = fstrndup(origState->object_nl, origState->object_nl_len);
1203
986
  objState->array_nl = fstrndup(origState->array_nl, origState->array_nl_len);
1204
- if (origState->array_delim) objState->array_delim = fbuffer_dup(origState->array_delim);
1205
- if (origState->object_delim) objState->object_delim = fbuffer_dup(origState->object_delim);
1206
- if (origState->object_delim2) objState->object_delim2 = fbuffer_dup(origState->object_delim2);
1207
987
  return obj;
1208
988
  }
1209
989
 
@@ -1442,7 +1222,8 @@ static VALUE cState_max_nesting_set(VALUE self, VALUE depth)
1442
1222
  {
1443
1223
  GET_STATE(self);
1444
1224
  Check_Type(depth, T_FIXNUM);
1445
- return state->max_nesting = FIX2LONG(depth);
1225
+ state->max_nesting = FIX2LONG(depth);
1226
+ return Qnil;
1446
1227
  }
1447
1228
 
1448
1229
  /*
@@ -1513,6 +1294,18 @@ static VALUE cState_allow_nan_p(VALUE self)
1513
1294
  return state->allow_nan ? Qtrue : Qfalse;
1514
1295
  }
1515
1296
 
1297
+ /*
1298
+ * call-seq: allow_nan=(enable)
1299
+ *
1300
+ * This sets whether or not to serialize NaN, Infinity, and -Infinity
1301
+ */
1302
+ static VALUE cState_allow_nan_set(VALUE self, VALUE enable)
1303
+ {
1304
+ GET_STATE(self);
1305
+ state->allow_nan = RTEST(enable);
1306
+ return Qnil;
1307
+ }
1308
+
1516
1309
  /*
1517
1310
  * call-seq: ascii_only?
1518
1311
  *
@@ -1525,6 +1318,18 @@ static VALUE cState_ascii_only_p(VALUE self)
1525
1318
  return state->ascii_only ? Qtrue : Qfalse;
1526
1319
  }
1527
1320
 
1321
+ /*
1322
+ * call-seq: ascii_only=(enable)
1323
+ *
1324
+ * This sets whether only ASCII characters should be generated.
1325
+ */
1326
+ static VALUE cState_ascii_only_set(VALUE self, VALUE enable)
1327
+ {
1328
+ GET_STATE(self);
1329
+ state->ascii_only = RTEST(enable);
1330
+ return Qnil;
1331
+ }
1332
+
1528
1333
  /*
1529
1334
  * call-seq: depth
1530
1335
  *
@@ -1592,8 +1397,8 @@ void Init_generator(void)
1592
1397
  rb_require("json/common");
1593
1398
 
1594
1399
  mJSON = rb_define_module("JSON");
1595
- mExt = rb_define_module_under(mJSON, "Ext");
1596
- mGenerator = rb_define_module_under(mExt, "Generator");
1400
+ VALUE mExt = rb_define_module_under(mJSON, "Ext");
1401
+ VALUE mGenerator = rb_define_module_under(mExt, "Generator");
1597
1402
 
1598
1403
  eGeneratorError = rb_path2class("JSON::GeneratorError");
1599
1404
  eNestingError = rb_path2class("JSON::NestingError");
@@ -1603,7 +1408,6 @@ void Init_generator(void)
1603
1408
  cState = rb_define_class_under(mGenerator, "State", rb_cObject);
1604
1409
  rb_define_alloc_func(cState, cState_s_allocate);
1605
1410
  rb_define_singleton_method(cState, "from_state", cState_from_state_s, 1);
1606
- rb_define_method(cState, "initialize", cState_initialize, -1);
1607
1411
  rb_define_method(cState, "initialize_copy", cState_init_copy, 1);
1608
1412
  rb_define_method(cState, "indent", cState_indent, 0);
1609
1413
  rb_define_method(cState, "indent=", cState_indent_set, 1);
@@ -1628,76 +1432,70 @@ void Init_generator(void)
1628
1432
  rb_define_method(cState, "strict=", cState_strict_set, 1);
1629
1433
  rb_define_method(cState, "check_circular?", cState_check_circular_p, 0);
1630
1434
  rb_define_method(cState, "allow_nan?", cState_allow_nan_p, 0);
1435
+ rb_define_method(cState, "allow_nan=", cState_allow_nan_set, 1);
1631
1436
  rb_define_method(cState, "ascii_only?", cState_ascii_only_p, 0);
1437
+ rb_define_method(cState, "ascii_only=", cState_ascii_only_set, 1);
1632
1438
  rb_define_method(cState, "depth", cState_depth, 0);
1633
1439
  rb_define_method(cState, "depth=", cState_depth_set, 1);
1634
1440
  rb_define_method(cState, "buffer_initial_length", cState_buffer_initial_length, 0);
1635
1441
  rb_define_method(cState, "buffer_initial_length=", cState_buffer_initial_length_set, 1);
1636
- rb_define_method(cState, "configure", cState_configure, 1);
1637
- rb_define_alias(cState, "merge", "configure");
1638
- rb_define_method(cState, "to_h", cState_to_h, 0);
1639
- rb_define_alias(cState, "to_hash", "to_h");
1640
- rb_define_method(cState, "[]", cState_aref, 1);
1641
- rb_define_method(cState, "[]=", cState_aset, 2);
1642
1442
  rb_define_method(cState, "generate", cState_generate, 1);
1643
1443
 
1644
- mGeneratorMethods = rb_define_module_under(mGenerator, "GeneratorMethods");
1645
- mObject = rb_define_module_under(mGeneratorMethods, "Object");
1444
+ VALUE mGeneratorMethods = rb_define_module_under(mGenerator, "GeneratorMethods");
1445
+
1446
+ VALUE mObject = rb_define_module_under(mGeneratorMethods, "Object");
1646
1447
  rb_define_method(mObject, "to_json", mObject_to_json, -1);
1647
- mHash = rb_define_module_under(mGeneratorMethods, "Hash");
1448
+
1449
+ VALUE mHash = rb_define_module_under(mGeneratorMethods, "Hash");
1648
1450
  rb_define_method(mHash, "to_json", mHash_to_json, -1);
1649
- mArray = rb_define_module_under(mGeneratorMethods, "Array");
1451
+
1452
+ VALUE mArray = rb_define_module_under(mGeneratorMethods, "Array");
1650
1453
  rb_define_method(mArray, "to_json", mArray_to_json, -1);
1454
+
1651
1455
  #ifdef RUBY_INTEGER_UNIFICATION
1652
- mInteger = rb_define_module_under(mGeneratorMethods, "Integer");
1456
+ VALUE mInteger = rb_define_module_under(mGeneratorMethods, "Integer");
1653
1457
  rb_define_method(mInteger, "to_json", mInteger_to_json, -1);
1654
1458
  #else
1655
- mFixnum = rb_define_module_under(mGeneratorMethods, "Fixnum");
1459
+ VALUE mFixnum = rb_define_module_under(mGeneratorMethods, "Fixnum");
1656
1460
  rb_define_method(mFixnum, "to_json", mFixnum_to_json, -1);
1657
- mBignum = rb_define_module_under(mGeneratorMethods, "Bignum");
1461
+
1462
+ VALUE mBignum = rb_define_module_under(mGeneratorMethods, "Bignum");
1658
1463
  rb_define_method(mBignum, "to_json", mBignum_to_json, -1);
1659
1464
  #endif
1660
- mFloat = rb_define_module_under(mGeneratorMethods, "Float");
1465
+ VALUE mFloat = rb_define_module_under(mGeneratorMethods, "Float");
1661
1466
  rb_define_method(mFloat, "to_json", mFloat_to_json, -1);
1662
- mString = rb_define_module_under(mGeneratorMethods, "String");
1467
+
1468
+ VALUE mString = rb_define_module_under(mGeneratorMethods, "String");
1663
1469
  rb_define_singleton_method(mString, "included", mString_included_s, 1);
1664
1470
  rb_define_method(mString, "to_json", mString_to_json, -1);
1665
1471
  rb_define_method(mString, "to_json_raw", mString_to_json_raw, -1);
1666
1472
  rb_define_method(mString, "to_json_raw_object", mString_to_json_raw_object, 0);
1473
+
1667
1474
  mString_Extend = rb_define_module_under(mString, "Extend");
1668
1475
  rb_define_method(mString_Extend, "json_create", mString_Extend_json_create, 1);
1669
- mTrueClass = rb_define_module_under(mGeneratorMethods, "TrueClass");
1476
+
1477
+ VALUE mTrueClass = rb_define_module_under(mGeneratorMethods, "TrueClass");
1670
1478
  rb_define_method(mTrueClass, "to_json", mTrueClass_to_json, -1);
1671
- mFalseClass = rb_define_module_under(mGeneratorMethods, "FalseClass");
1479
+
1480
+ VALUE mFalseClass = rb_define_module_under(mGeneratorMethods, "FalseClass");
1672
1481
  rb_define_method(mFalseClass, "to_json", mFalseClass_to_json, -1);
1673
- mNilClass = rb_define_module_under(mGeneratorMethods, "NilClass");
1482
+
1483
+ VALUE mNilClass = rb_define_module_under(mGeneratorMethods, "NilClass");
1674
1484
  rb_define_method(mNilClass, "to_json", mNilClass_to_json, -1);
1675
1485
 
1486
+ rb_global_variable(&Encoding_UTF_8);
1487
+ Encoding_UTF_8 = rb_const_get(rb_path2class("Encoding"), rb_intern("UTF_8"));
1488
+
1676
1489
  i_to_s = rb_intern("to_s");
1677
1490
  i_to_json = rb_intern("to_json");
1678
1491
  i_new = rb_intern("new");
1679
- i_indent = rb_intern("indent");
1680
- i_space = rb_intern("space");
1681
- i_space_before = rb_intern("space_before");
1682
- i_object_nl = rb_intern("object_nl");
1683
- i_array_nl = rb_intern("array_nl");
1684
- i_max_nesting = rb_intern("max_nesting");
1685
- i_script_safe = rb_intern("script_safe");
1686
- i_escape_slash = rb_intern("escape_slash");
1687
- i_strict = rb_intern("strict");
1688
- i_allow_nan = rb_intern("allow_nan");
1689
- i_ascii_only = rb_intern("ascii_only");
1690
- i_depth = rb_intern("depth");
1691
- i_buffer_initial_length = rb_intern("buffer_initial_length");
1692
1492
  i_pack = rb_intern("pack");
1693
1493
  i_unpack = rb_intern("unpack");
1694
1494
  i_create_id = rb_intern("create_id");
1695
1495
  i_extend = rb_intern("extend");
1696
- i_key_p = rb_intern("key?");
1697
- i_aref = rb_intern("[]");
1698
- i_send = rb_intern("__send__");
1699
- i_respond_to_p = rb_intern("respond_to?");
1700
- i_match = rb_intern("match");
1701
- i_keys = rb_intern("keys");
1702
- i_dup = rb_intern("dup");
1496
+ i_encode = rb_intern("encode");
1497
+
1498
+ usascii_encindex = rb_usascii_encindex();
1499
+ utf8_encindex = rb_utf8_encindex();
1500
+ binary_encindex = rb_ascii8bit_encindex();
1703
1501
  }