json 2.7.2 → 2.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,332 +1,306 @@
1
1
  #include "../fbuffer/fbuffer.h"
2
2
  #include "generator.h"
3
3
 
4
- static VALUE mJSON, mExt, mGenerator, cState, mGeneratorMethods, mObject,
5
- mHash, mArray,
6
- #ifdef RUBY_INTEGER_UNIFICATION
7
- mInteger,
8
- #else
9
- mFixnum, mBignum,
4
+ #ifndef RB_UNLIKELY
5
+ #define RB_UNLIKELY(cond) (cond)
10
6
  #endif
11
- mFloat, mString, mString_Extend,
12
- mTrueClass, mFalseClass, mNilClass, eGeneratorError,
13
- eNestingError;
14
7
 
15
- static ID i_to_s, i_to_json, i_new, i_indent, i_space, i_space_before,
16
- i_object_nl, i_array_nl, i_max_nesting, i_allow_nan, i_ascii_only,
17
- i_pack, i_unpack, i_create_id, i_extend, i_key_p,
18
- i_aref, i_send, i_respond_to_p, i_match, i_keys, i_depth,
19
- i_buffer_initial_length, i_dup, i_script_safe, i_escape_slash, i_strict;
8
+ static VALUE mJSON, cState, mString_Extend, eGeneratorError, eNestingError, Encoding_UTF_8;
20
9
 
21
- /*
22
- * Copyright 2001-2004 Unicode, Inc.
10
+ static ID i_to_s, i_to_json, i_new, i_pack, i_unpack, i_create_id, i_extend, i_encode;
11
+
12
+ /* Converts in_string to a JSON string (without the wrapping '"'
13
+ * characters) in FBuffer out_buffer.
23
14
  *
24
- * Disclaimer
15
+ * Character are JSON-escaped according to:
25
16
  *
26
- * This source code is provided as is by Unicode, Inc. No claims are
27
- * made as to fitness for any particular purpose. No warranties of any
28
- * kind are expressed or implied. The recipient agrees to determine
29
- * applicability of information provided. If this file has been
30
- * purchased on magnetic or optical media from Unicode, Inc., the
31
- * sole remedy for any claim will be exchange of defective media
32
- * within 90 days of receipt.
17
+ * - Always: ASCII control characters (0x00-0x1F), dquote, and
18
+ * backslash.
33
19
  *
34
- * Limitations on Rights to Redistribute This Code
20
+ * - If out_ascii_only: non-ASCII characters (>0x7F)
35
21
  *
36
- * Unicode, Inc. hereby grants the right to freely use the information
37
- * supplied in this file in the creation of products supporting the
38
- * Unicode Standard, and to make copies of this file in any form
39
- * for internal or external distribution as long as this notice
40
- * remains attached.
22
+ * - If out_script_safe: forwardslash, line separator (U+2028), and
23
+ * paragraph separator (U+2029)
24
+ *
25
+ * Everything else (should be UTF-8) is just passed through and
26
+ * appended to the result.
41
27
  */
28
+ static void convert_UTF8_to_JSON(FBuffer *out_buffer, VALUE str, const char escape_table[256], bool out_script_safe)
29
+ {
30
+ const char *hexdig = "0123456789abcdef";
31
+ char scratch[12] = { '\\', 'u', 0, 0, 0, 0, '\\', 'u' };
42
32
 
43
- /*
44
- * Index into the table below with the first byte of a UTF-8 sequence to
45
- * get the number of trailing bytes that are supposed to follow it.
46
- * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is
47
- * left as-is for anyone who may want to do such conversion, which was
48
- * allowed in earlier algorithms.
49
- */
50
- static const char trailingBytesForUTF8[256] = {
51
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
52
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
53
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
54
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
55
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
56
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
57
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
58
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
59
- };
33
+ const char *ptr = RSTRING_PTR(str);
34
+ unsigned long len = RSTRING_LEN(str);
60
35
 
61
- /*
62
- * Magic values subtracted from a buffer value during UTF8 conversion.
63
- * This table contains as many values as there might be trailing bytes
64
- * in a UTF-8 sequence.
65
- */
66
- static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
67
- 0x03C82080UL, 0xFA082080UL, 0x82082080UL };
36
+ unsigned long beg = 0, pos = 0;
68
37
 
69
- /*
70
- * Utility routine to tell whether a sequence of bytes is legal UTF-8.
71
- * This must be called with the length pre-determined by the first byte.
72
- * If not calling this from ConvertUTF8to*, then the length can be set by:
73
- * length = trailingBytesForUTF8[*source]+1;
74
- * and the sequence is illegal right away if there aren't that many bytes
75
- * available.
76
- * If presented with a length > 4, this returns 0. The Unicode
77
- * definition of UTF-8 goes up to 4-byte sequences.
78
- */
79
- static unsigned char isLegalUTF8(const UTF8 *source, unsigned long length)
80
- {
81
- UTF8 a;
82
- const UTF8 *srcptr = source+length;
83
- switch (length) {
84
- default: return 0;
85
- /* Everything else falls through when "1"... */
86
- case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
87
- case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
88
- case 2: if ((a = (*--srcptr)) > 0xBF) return 0;
89
-
90
- switch (*source) {
91
- /* no fall-through in this inner switch */
92
- case 0xE0: if (a < 0xA0) return 0; break;
93
- case 0xED: if (a > 0x9F) return 0; break;
94
- case 0xF0: if (a < 0x90) return 0; break;
95
- case 0xF4: if (a > 0x8F) return 0; break;
96
- default: if (a < 0x80) return 0;
38
+ #define FLUSH_POS(bytes) if (pos > beg) { fbuffer_append(out_buffer, &ptr[beg], pos - beg); } pos += bytes; beg = pos;
39
+
40
+ while (pos < len) {
41
+ unsigned char ch = ptr[pos];
42
+ unsigned char ch_len = escape_table[ch];
43
+ /* JSON encoding */
44
+
45
+ if (RB_UNLIKELY(ch_len)) {
46
+ switch (ch_len) {
47
+ case 0:
48
+ pos++;
49
+ break;
50
+ case 1: {
51
+ FLUSH_POS(1);
52
+ switch (ch) {
53
+ case '"': fbuffer_append(out_buffer, "\\\"", 2); break;
54
+ case '\\': fbuffer_append(out_buffer, "\\\\", 2); break;
55
+ case '/': fbuffer_append(out_buffer, "\\/", 2); break;
56
+ case '\b': fbuffer_append(out_buffer, "\\b", 2); break;
57
+ case '\f': fbuffer_append(out_buffer, "\\f", 2); break;
58
+ case '\n': fbuffer_append(out_buffer, "\\n", 2); break;
59
+ case '\r': fbuffer_append(out_buffer, "\\r", 2); break;
60
+ case '\t': fbuffer_append(out_buffer, "\\t", 2); break;
61
+ default: {
62
+ scratch[2] = hexdig[ch >> 12];
63
+ scratch[3] = hexdig[(ch >> 8) & 0xf];
64
+ scratch[4] = hexdig[(ch >> 4) & 0xf];
65
+ scratch[5] = hexdig[ch & 0xf];
66
+ fbuffer_append(out_buffer, scratch, 6);
67
+ break;
68
+ }
97
69
  }
70
+ break;
71
+ }
72
+ case 3: {
73
+ unsigned char b2 = ptr[pos + 1];
74
+ if (RB_UNLIKELY(out_script_safe && b2 == 0x80)) {
75
+ unsigned char b3 = ptr[pos + 2];
76
+ if (b3 == 0xA8) {
77
+ FLUSH_POS(3);
78
+ fbuffer_append(out_buffer, "\\u2028", 6);
79
+ break;
80
+ } else if (b3 == 0xA9) {
81
+ FLUSH_POS(3);
82
+ fbuffer_append(out_buffer, "\\u2029", 6);
83
+ break;
84
+ }
85
+ }
86
+ // fallthrough
87
+ }
88
+ default:
89
+ pos += ch_len;
90
+ break;
91
+ }
92
+ } else {
93
+ pos++;
94
+ }
95
+ }
96
+ #undef FLUSH_POS
98
97
 
99
- case 1: if (*source >= 0x80 && *source < 0xC2) return 0;
98
+ if (beg < len) {
99
+ fbuffer_append(out_buffer, &ptr[beg], len - beg);
100
100
  }
101
- if (*source > 0xF4) return 0;
102
- return 1;
103
- }
104
101
 
105
- /* Escapes the UTF16 character and stores the result in the buffer buf. */
106
- static void unicode_escape(char *buf, UTF16 character)
107
- {
108
- const char *digits = "0123456789abcdef";
102
+ RB_GC_GUARD(str);
103
+ }
104
+
105
+ static const char escape_table[256] = {
106
+ // ASCII Control Characters
107
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
108
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
109
+ // ASCII Characters
110
+ 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0, // '"'
111
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
112
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
113
+ 0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0, // '\\'
114
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
115
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
116
+ // Continuation byte
117
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
118
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
119
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
120
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
121
+ // First byte of a 2-byte code point
122
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
123
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
124
+ // First byte of a 4-byte code point
125
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
126
+ //First byte of a 4+byte code point
127
+ 4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1,
128
+ };
109
129
 
110
- buf[2] = digits[character >> 12];
111
- buf[3] = digits[(character >> 8) & 0xf];
112
- buf[4] = digits[(character >> 4) & 0xf];
113
- buf[5] = digits[character & 0xf];
114
- }
130
+ static const char script_safe_escape_table[256] = {
131
+ // ASCII Control Characters
132
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
133
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
134
+ // ASCII Characters
135
+ 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1, // '"' and '/'
136
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
137
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
138
+ 0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0, // '\\'
139
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
140
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
141
+ // Continuation byte
142
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
143
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
144
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
145
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
146
+ // First byte of a 2-byte code point
147
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
148
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
149
+ // First byte of a 4-byte code point
150
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
151
+ //First byte of a 4+byte code point
152
+ 4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1,
153
+ };
115
154
 
116
- /* Escapes the UTF16 character and stores the result in the buffer buf, then
117
- * the buffer buf is appended to the FBuffer buffer. */
118
- static void unicode_escape_to_buffer(FBuffer *buffer, char buf[6], UTF16
119
- character)
155
+ static void convert_ASCII_to_JSON(FBuffer *out_buffer, VALUE str, const char escape_table[256])
120
156
  {
121
- unicode_escape(buf, character);
122
- fbuffer_append(buffer, buf, 6);
157
+ const char *hexdig = "0123456789abcdef";
158
+ char scratch[12] = { '\\', 'u', 0, 0, 0, 0, '\\', 'u' };
159
+
160
+ const char *ptr = RSTRING_PTR(str);
161
+ unsigned long len = RSTRING_LEN(str);
162
+
163
+ unsigned long beg = 0, pos;
164
+
165
+ for (pos = 0; pos < len;) {
166
+ unsigned char ch = ptr[pos];
167
+ /* JSON encoding */
168
+ if (escape_table[ch]) {
169
+ if (pos > beg) {
170
+ fbuffer_append(out_buffer, &ptr[beg], pos - beg);
171
+ }
172
+
173
+ beg = pos + 1;
174
+ switch (ch) {
175
+ case '"': fbuffer_append(out_buffer, "\\\"", 2); break;
176
+ case '\\': fbuffer_append(out_buffer, "\\\\", 2); break;
177
+ case '/': fbuffer_append(out_buffer, "\\/", 2); break;
178
+ case '\b': fbuffer_append(out_buffer, "\\b", 2); break;
179
+ case '\f': fbuffer_append(out_buffer, "\\f", 2); break;
180
+ case '\n': fbuffer_append(out_buffer, "\\n", 2); break;
181
+ case '\r': fbuffer_append(out_buffer, "\\r", 2); break;
182
+ case '\t': fbuffer_append(out_buffer, "\\t", 2); break;
183
+ default:
184
+ scratch[2] = hexdig[ch >> 12];
185
+ scratch[3] = hexdig[(ch >> 8) & 0xf];
186
+ scratch[4] = hexdig[(ch >> 4) & 0xf];
187
+ scratch[5] = hexdig[ch & 0xf];
188
+ fbuffer_append(out_buffer, scratch, 6);
189
+ }
190
+ }
191
+
192
+ pos++;
193
+ }
194
+
195
+ if (beg < len) {
196
+ fbuffer_append(out_buffer, &ptr[beg], len - beg);
197
+ }
198
+
199
+ RB_GC_GUARD(str);
123
200
  }
124
201
 
125
- /* Converts string to a JSON string in FBuffer buffer, where all but the ASCII
126
- * and control characters are JSON escaped. */
127
- static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string, char script_safe)
202
+ static void convert_UTF8_to_ASCII_only_JSON(FBuffer *out_buffer, VALUE str, const char escape_table[256], bool out_script_safe)
128
203
  {
129
- const UTF8 *source = (UTF8 *) RSTRING_PTR(string);
130
- const UTF8 *sourceEnd = source + RSTRING_LEN(string);
131
- char buf[6] = { '\\', 'u' };
204
+ const char *hexdig = "0123456789abcdef";
205
+ char scratch[12] = { '\\', 'u', 0, 0, 0, 0, '\\', 'u' };
132
206
 
133
- while (source < sourceEnd) {
134
- UTF32 ch = 0;
135
- unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
136
- if (source + extraBytesToRead >= sourceEnd) {
137
- rb_raise(rb_path2class("JSON::GeneratorError"),
138
- "partial character in source, but hit end");
139
- }
140
- if (!isLegalUTF8(source, extraBytesToRead+1)) {
141
- rb_raise(rb_path2class("JSON::GeneratorError"),
142
- "source sequence is illegal/malformed utf-8");
143
- }
144
- /*
145
- * The cases all fall through. See "Note A" below.
146
- */
147
- switch (extraBytesToRead) {
148
- case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
149
- case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
150
- case 3: ch += *source++; ch <<= 6;
151
- case 2: ch += *source++; ch <<= 6;
152
- case 1: ch += *source++; ch <<= 6;
153
- case 0: ch += *source++;
154
- }
155
- ch -= offsetsFromUTF8[extraBytesToRead];
156
-
157
- if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
158
- /* UTF-16 surrogate values are illegal in UTF-32 */
159
- if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
160
- #if UNI_STRICT_CONVERSION
161
- source -= (extraBytesToRead+1); /* return to the illegal value itself */
162
- rb_raise(rb_path2class("JSON::GeneratorError"),
163
- "source sequence is illegal/malformed utf-8");
164
- #else
165
- unicode_escape_to_buffer(buffer, buf, UNI_REPLACEMENT_CHAR);
166
- #endif
167
- } else {
168
- /* normal case */
169
- if (ch >= 0x20 && ch <= 0x7f) {
207
+ const char *ptr = RSTRING_PTR(str);
208
+ unsigned long len = RSTRING_LEN(str);
209
+
210
+ unsigned long beg = 0, pos = 0;
211
+
212
+ #define FLUSH_POS(bytes) if (pos > beg) { fbuffer_append(out_buffer, &ptr[beg], pos - beg); } pos += bytes; beg = pos;
213
+
214
+ while (pos < len) {
215
+ unsigned char ch = ptr[pos];
216
+ unsigned char ch_len = escape_table[ch];
217
+
218
+ if (RB_UNLIKELY(ch_len)) {
219
+ switch (ch_len) {
220
+ case 0:
221
+ pos++;
222
+ break;
223
+ case 1: {
224
+ FLUSH_POS(1);
170
225
  switch (ch) {
171
- case '\\':
172
- fbuffer_append(buffer, "\\\\", 2);
173
- break;
174
- case '"':
175
- fbuffer_append(buffer, "\\\"", 2);
176
- break;
177
- case '/':
178
- if(script_safe) {
179
- fbuffer_append(buffer, "\\/", 2);
180
- break;
181
- }
182
- default:
183
- fbuffer_append_char(buffer, (char)ch);
226
+ case '"': fbuffer_append(out_buffer, "\\\"", 2); break;
227
+ case '\\': fbuffer_append(out_buffer, "\\\\", 2); break;
228
+ case '/': fbuffer_append(out_buffer, "\\/", 2); break;
229
+ case '\b': fbuffer_append(out_buffer, "\\b", 2); break;
230
+ case '\f': fbuffer_append(out_buffer, "\\f", 2); break;
231
+ case '\n': fbuffer_append(out_buffer, "\\n", 2); break;
232
+ case '\r': fbuffer_append(out_buffer, "\\r", 2); break;
233
+ case '\t': fbuffer_append(out_buffer, "\\t", 2); break;
234
+ default: {
235
+ scratch[2] = hexdig[ch >> 12];
236
+ scratch[3] = hexdig[(ch >> 8) & 0xf];
237
+ scratch[4] = hexdig[(ch >> 4) & 0xf];
238
+ scratch[5] = hexdig[ch & 0xf];
239
+ fbuffer_append(out_buffer, scratch, 6);
184
240
  break;
241
+ }
185
242
  }
186
- } else {
187
- switch (ch) {
188
- case '\n':
189
- fbuffer_append(buffer, "\\n", 2);
190
- break;
191
- case '\r':
192
- fbuffer_append(buffer, "\\r", 2);
193
- break;
194
- case '\t':
195
- fbuffer_append(buffer, "\\t", 2);
196
- break;
197
- case '\f':
198
- fbuffer_append(buffer, "\\f", 2);
243
+ break;
244
+ }
245
+ default: {
246
+ uint32_t wchar = 0;
247
+ switch(ch_len) {
248
+ case 2:
249
+ wchar = ptr[pos] & 0x1F;
199
250
  break;
200
- case '\b':
201
- fbuffer_append(buffer, "\\b", 2);
251
+ case 3:
252
+ wchar = ptr[pos] & 0x0F;
202
253
  break;
203
- default:
204
- unicode_escape_to_buffer(buffer, buf, (UTF16) ch);
254
+ case 4:
255
+ wchar = ptr[pos] & 0x07;
205
256
  break;
206
257
  }
207
- }
208
- }
209
- } else if (ch > UNI_MAX_UTF16) {
210
- #if UNI_STRICT_CONVERSION
211
- source -= (extraBytesToRead+1); /* return to the start */
212
- rb_raise(rb_path2class("JSON::GeneratorError"),
213
- "source sequence is illegal/malformed utf8");
214
- #else
215
- unicode_escape_to_buffer(buffer, buf, UNI_REPLACEMENT_CHAR);
216
- #endif
217
- } else {
218
- /* target is a character in range 0xFFFF - 0x10FFFF. */
219
- ch -= halfBase;
220
- unicode_escape_to_buffer(buffer, buf, (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START));
221
- unicode_escape_to_buffer(buffer, buf, (UTF16)((ch & halfMask) + UNI_SUR_LOW_START));
222
- }
223
- }
224
- RB_GC_GUARD(string);
225
- }
226
-
227
- /* Converts string to a JSON string in FBuffer buffer, where only the
228
- * characters required by the JSON standard are JSON escaped. The remaining
229
- * characters (should be UTF8) are just passed through and appended to the
230
- * result. */
231
- static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string, char script_safe)
232
- {
233
- const char *ptr = RSTRING_PTR(string), *p;
234
- unsigned long len = RSTRING_LEN(string), start = 0, end = 0;
235
- const char *escape = NULL;
236
- int escape_len;
237
- unsigned char c;
238
- char buf[6] = { '\\', 'u' };
239
- int ascii_only = rb_enc_str_asciionly_p(string);
240
-
241
- for (start = 0, end = 0; end < len;) {
242
- p = ptr + end;
243
- c = (unsigned char) *p;
244
- if (c < 0x20) {
245
- switch (c) {
246
- case '\n':
247
- escape = "\\n";
248
- escape_len = 2;
249
- break;
250
- case '\r':
251
- escape = "\\r";
252
- escape_len = 2;
253
- break;
254
- case '\t':
255
- escape = "\\t";
256
- escape_len = 2;
257
- break;
258
- case '\f':
259
- escape = "\\f";
260
- escape_len = 2;
261
- break;
262
- case '\b':
263
- escape = "\\b";
264
- escape_len = 2;
265
- break;
266
- default:
267
- unicode_escape(buf, (UTF16) *p);
268
- escape = buf;
269
- escape_len = 6;
270
- break;
271
- }
272
- } else {
273
- switch (c) {
274
- case '\\':
275
- escape = "\\\\";
276
- escape_len = 2;
277
- break;
278
- case '"':
279
- escape = "\\\"";
280
- escape_len = 2;
281
- break;
282
- case '/':
283
- if(script_safe) {
284
- escape = "\\/";
285
- escape_len = 2;
286
- break;
258
+
259
+ for (short i = 1; i < ch_len; i++) {
260
+ wchar = (wchar << 6) | (ptr[pos+i] & 0x3F);
287
261
  }
288
- default:
289
- {
290
- unsigned short clen = 1;
291
- if (!ascii_only) {
292
- clen += trailingBytesForUTF8[c];
293
- if (end + clen > len) {
294
- rb_raise(rb_path2class("JSON::GeneratorError"),
295
- "partial character in source, but hit end");
296
- }
297
-
298
- if (script_safe && c == 0xE2) {
299
- unsigned char c2 = (unsigned char) *(p+1);
300
- unsigned char c3 = (unsigned char) *(p+2);
301
- if (c2 == 0x80 && (c3 == 0xA8 || c3 == 0xA9)) {
302
- fbuffer_append(buffer, ptr + start, end - start);
303
- start = end = (end + clen);
304
- if (c3 == 0xA8) {
305
- fbuffer_append(buffer, "\\u2028", 6);
306
- } else {
307
- fbuffer_append(buffer, "\\u2029", 6);
308
- }
309
- continue;
310
- }
311
- }
312
-
313
- if (!isLegalUTF8((UTF8 *) p, clen)) {
314
- rb_raise(rb_path2class("JSON::GeneratorError"),
315
- "source sequence is illegal/malformed utf-8");
316
- }
317
- }
318
- end += clen;
262
+
263
+ FLUSH_POS(ch_len);
264
+
265
+ if (wchar <= 0xFFFF) {
266
+ scratch[2] = hexdig[wchar >> 12];
267
+ scratch[3] = hexdig[(wchar >> 8) & 0xf];
268
+ scratch[4] = hexdig[(wchar >> 4) & 0xf];
269
+ scratch[5] = hexdig[wchar & 0xf];
270
+ fbuffer_append(out_buffer, scratch, 6);
271
+ } else {
272
+ uint16_t hi, lo;
273
+ wchar -= 0x10000;
274
+ hi = 0xD800 + (uint16_t)(wchar >> 10);
275
+ lo = 0xDC00 + (uint16_t)(wchar & 0x3FF);
276
+
277
+ scratch[2] = hexdig[hi >> 12];
278
+ scratch[3] = hexdig[(hi >> 8) & 0xf];
279
+ scratch[4] = hexdig[(hi >> 4) & 0xf];
280
+ scratch[5] = hexdig[hi & 0xf];
281
+
282
+ scratch[8] = hexdig[lo >> 12];
283
+ scratch[9] = hexdig[(lo >> 8) & 0xf];
284
+ scratch[10] = hexdig[(lo >> 4) & 0xf];
285
+ scratch[11] = hexdig[lo & 0xf];
286
+
287
+ fbuffer_append(out_buffer, scratch, 12);
319
288
  }
320
- continue;
289
+
321
290
  break;
291
+ }
322
292
  }
293
+ } else {
294
+ pos++;
323
295
  }
324
- fbuffer_append(buffer, ptr + start, end - start);
325
- fbuffer_append(buffer, escape, escape_len);
326
- start = ++end;
327
- escape = NULL;
328
296
  }
329
- fbuffer_append(buffer, ptr + start, end - start);
297
+ #undef FLUSH_POS
298
+
299
+ if (beg < len) {
300
+ fbuffer_append(out_buffer, &ptr[beg], len - beg);
301
+ }
302
+
303
+ RB_GC_GUARD(str);
330
304
  }
331
305
 
332
306
  static char *fstrndup(const char *ptr, unsigned long len) {
@@ -610,9 +584,6 @@ static void State_free(void *ptr)
610
584
  if (state->space_before) ruby_xfree(state->space_before);
611
585
  if (state->object_nl) ruby_xfree(state->object_nl);
612
586
  if (state->array_nl) ruby_xfree(state->array_nl);
613
- if (state->array_delim) fbuffer_free(state->array_delim);
614
- if (state->object_delim) fbuffer_free(state->object_delim);
615
- if (state->object_delim2) fbuffer_free(state->object_delim2);
616
587
  ruby_xfree(state);
617
588
  }
618
589
 
@@ -625,9 +596,6 @@ static size_t State_memsize(const void *ptr)
625
596
  if (state->space_before) size += state->space_before_len + 1;
626
597
  if (state->object_nl) size += state->object_nl_len + 1;
627
598
  if (state->array_nl) size += state->array_nl_len + 1;
628
- if (state->array_delim) size += FBUFFER_CAPA(state->array_delim);
629
- if (state->object_delim) size += FBUFFER_CAPA(state->object_delim);
630
- if (state->object_delim2) size += FBUFFER_CAPA(state->object_delim2);
631
599
  return size;
632
600
  }
633
601
 
@@ -636,194 +604,20 @@ static size_t State_memsize(const void *ptr)
636
604
  # define RUBY_TYPED_FROZEN_SHAREABLE 0
637
605
  #endif
638
606
 
639
- #ifdef NEW_TYPEDDATA_WRAPPER
640
607
  static const rb_data_type_t JSON_Generator_State_type = {
641
608
  "JSON/Generator/State",
642
609
  {NULL, State_free, State_memsize,},
643
- #ifdef RUBY_TYPED_FREE_IMMEDIATELY
644
610
  0, 0,
645
- RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_FROZEN_SHAREABLE,
646
- #endif
611
+ RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_FROZEN_SHAREABLE,
647
612
  };
648
- #endif
649
613
 
650
614
  static VALUE cState_s_allocate(VALUE klass)
651
615
  {
652
616
  JSON_Generator_State *state;
653
- return TypedData_Make_Struct(klass, JSON_Generator_State,
654
- &JSON_Generator_State_type, state);
655
- }
656
-
657
- /*
658
- * call-seq: configure(opts)
659
- *
660
- * Configure this State instance with the Hash _opts_, and return
661
- * itself.
662
- */
663
- static VALUE cState_configure(VALUE self, VALUE opts)
664
- {
665
- VALUE tmp;
666
- GET_STATE(self);
667
- tmp = rb_check_convert_type(opts, T_HASH, "Hash", "to_hash");
668
- if (NIL_P(tmp)) tmp = rb_convert_type(opts, T_HASH, "Hash", "to_h");
669
- opts = tmp;
670
- tmp = rb_hash_aref(opts, ID2SYM(i_indent));
671
- if (RTEST(tmp)) {
672
- unsigned long len;
673
- Check_Type(tmp, T_STRING);
674
- len = RSTRING_LEN(tmp);
675
- state->indent = fstrndup(RSTRING_PTR(tmp), len + 1);
676
- state->indent_len = len;
677
- }
678
- tmp = rb_hash_aref(opts, ID2SYM(i_space));
679
- if (RTEST(tmp)) {
680
- unsigned long len;
681
- Check_Type(tmp, T_STRING);
682
- len = RSTRING_LEN(tmp);
683
- state->space = fstrndup(RSTRING_PTR(tmp), len + 1);
684
- state->space_len = len;
685
- }
686
- tmp = rb_hash_aref(opts, ID2SYM(i_space_before));
687
- if (RTEST(tmp)) {
688
- unsigned long len;
689
- Check_Type(tmp, T_STRING);
690
- len = RSTRING_LEN(tmp);
691
- state->space_before = fstrndup(RSTRING_PTR(tmp), len + 1);
692
- state->space_before_len = len;
693
- }
694
- tmp = rb_hash_aref(opts, ID2SYM(i_array_nl));
695
- if (RTEST(tmp)) {
696
- unsigned long len;
697
- Check_Type(tmp, T_STRING);
698
- len = RSTRING_LEN(tmp);
699
- state->array_nl = fstrndup(RSTRING_PTR(tmp), len + 1);
700
- state->array_nl_len = len;
701
- }
702
- tmp = rb_hash_aref(opts, ID2SYM(i_object_nl));
703
- if (RTEST(tmp)) {
704
- unsigned long len;
705
- Check_Type(tmp, T_STRING);
706
- len = RSTRING_LEN(tmp);
707
- state->object_nl = fstrndup(RSTRING_PTR(tmp), len + 1);
708
- state->object_nl_len = len;
709
- }
710
- tmp = ID2SYM(i_max_nesting);
617
+ VALUE obj = TypedData_Make_Struct(klass, JSON_Generator_State, &JSON_Generator_State_type, state);
711
618
  state->max_nesting = 100;
712
- if (option_given_p(opts, tmp)) {
713
- VALUE max_nesting = rb_hash_aref(opts, tmp);
714
- if (RTEST(max_nesting)) {
715
- Check_Type(max_nesting, T_FIXNUM);
716
- state->max_nesting = FIX2LONG(max_nesting);
717
- } else {
718
- state->max_nesting = 0;
719
- }
720
- }
721
- tmp = ID2SYM(i_depth);
722
- state->depth = 0;
723
- if (option_given_p(opts, tmp)) {
724
- VALUE depth = rb_hash_aref(opts, tmp);
725
- if (RTEST(depth)) {
726
- Check_Type(depth, T_FIXNUM);
727
- state->depth = FIX2LONG(depth);
728
- } else {
729
- state->depth = 0;
730
- }
731
- }
732
- tmp = ID2SYM(i_buffer_initial_length);
733
- if (option_given_p(opts, tmp)) {
734
- VALUE buffer_initial_length = rb_hash_aref(opts, tmp);
735
- if (RTEST(buffer_initial_length)) {
736
- long initial_length;
737
- Check_Type(buffer_initial_length, T_FIXNUM);
738
- initial_length = FIX2LONG(buffer_initial_length);
739
- if (initial_length > 0) state->buffer_initial_length = initial_length;
740
- }
741
- }
742
- tmp = rb_hash_aref(opts, ID2SYM(i_allow_nan));
743
- state->allow_nan = RTEST(tmp);
744
- tmp = rb_hash_aref(opts, ID2SYM(i_ascii_only));
745
- state->ascii_only = RTEST(tmp);
746
- tmp = rb_hash_aref(opts, ID2SYM(i_script_safe));
747
- state->script_safe = RTEST(tmp);
748
- if (!state->script_safe) {
749
- tmp = rb_hash_aref(opts, ID2SYM(i_escape_slash));
750
- state->script_safe = RTEST(tmp);
751
- }
752
- tmp = rb_hash_aref(opts, ID2SYM(i_strict));
753
- state->strict = RTEST(tmp);
754
- return self;
755
- }
756
-
757
- static void set_state_ivars(VALUE hash, VALUE state)
758
- {
759
- VALUE ivars = rb_obj_instance_variables(state);
760
- int i = 0;
761
- for (i = 0; i < RARRAY_LEN(ivars); i++) {
762
- VALUE key = rb_funcall(rb_ary_entry(ivars, i), i_to_s, 0);
763
- long key_len = RSTRING_LEN(key);
764
- VALUE value = rb_iv_get(state, StringValueCStr(key));
765
- rb_hash_aset(hash, rb_str_intern(rb_str_substr(key, 1, key_len - 1)), value);
766
- }
767
- }
768
-
769
- /*
770
- * call-seq: to_h
771
- *
772
- * Returns the configuration instance variables as a hash, that can be
773
- * passed to the configure method.
774
- */
775
- static VALUE cState_to_h(VALUE self)
776
- {
777
- VALUE result = rb_hash_new();
778
- GET_STATE(self);
779
- set_state_ivars(result, self);
780
- rb_hash_aset(result, ID2SYM(i_indent), rb_str_new(state->indent, state->indent_len));
781
- rb_hash_aset(result, ID2SYM(i_space), rb_str_new(state->space, state->space_len));
782
- rb_hash_aset(result, ID2SYM(i_space_before), rb_str_new(state->space_before, state->space_before_len));
783
- rb_hash_aset(result, ID2SYM(i_object_nl), rb_str_new(state->object_nl, state->object_nl_len));
784
- rb_hash_aset(result, ID2SYM(i_array_nl), rb_str_new(state->array_nl, state->array_nl_len));
785
- rb_hash_aset(result, ID2SYM(i_allow_nan), state->allow_nan ? Qtrue : Qfalse);
786
- rb_hash_aset(result, ID2SYM(i_ascii_only), state->ascii_only ? Qtrue : Qfalse);
787
- rb_hash_aset(result, ID2SYM(i_max_nesting), LONG2FIX(state->max_nesting));
788
- rb_hash_aset(result, ID2SYM(i_script_safe), state->script_safe ? Qtrue : Qfalse);
789
- rb_hash_aset(result, ID2SYM(i_strict), state->strict ? Qtrue : Qfalse);
790
- rb_hash_aset(result, ID2SYM(i_depth), LONG2FIX(state->depth));
791
- rb_hash_aset(result, ID2SYM(i_buffer_initial_length), LONG2FIX(state->buffer_initial_length));
792
- return result;
793
- }
794
-
795
- /*
796
- * call-seq: [](name)
797
- *
798
- * Returns the value returned by method +name+.
799
- */
800
- static VALUE cState_aref(VALUE self, VALUE name)
801
- {
802
- name = rb_funcall(name, i_to_s, 0);
803
- if (RTEST(rb_funcall(self, i_respond_to_p, 1, name))) {
804
- return rb_funcall(self, i_send, 1, name);
805
- } else {
806
- return rb_attr_get(self, rb_intern_str(rb_str_concat(rb_str_new2("@"), name)));
807
- }
808
- }
809
-
810
- /*
811
- * call-seq: []=(name, value)
812
- *
813
- * Sets the attribute name to value.
814
- */
815
- static VALUE cState_aset(VALUE self, VALUE name, VALUE value)
816
- {
817
- VALUE name_writer;
818
-
819
- name = rb_funcall(name, i_to_s, 0);
820
- name_writer = rb_str_cat2(rb_str_dup(name), "=");
821
- if (RTEST(rb_funcall(self, i_respond_to_p, 1, name_writer))) {
822
- return rb_funcall(self, i_send, 2, name_writer, value);
823
- } else {
824
- rb_ivar_set(self, rb_intern_str(rb_str_concat(rb_str_new2("@"), name)), value);
825
- }
826
- return Qnil;
619
+ state->buffer_initial_length = FBUFFER_INITIAL_LENGTH_DEFAULT;
620
+ return obj;
827
621
  }
828
622
 
829
623
  struct hash_foreach_arg {
@@ -841,39 +635,36 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
841
635
  JSON_Generator_State *state = arg->state;
842
636
  VALUE Vstate = arg->Vstate;
843
637
 
844
- char *object_nl = state->object_nl;
845
- long object_nl_len = state->object_nl_len;
846
- char *indent = state->indent;
847
- long indent_len = state->indent_len;
848
- char *delim = FBUFFER_PTR(state->object_delim);
849
- long delim_len = FBUFFER_LEN(state->object_delim);
850
- char *delim2 = FBUFFER_PTR(state->object_delim2);
851
- long delim2_len = FBUFFER_LEN(state->object_delim2);
852
638
  long depth = state->depth;
853
639
  int j;
854
- VALUE klass, key_to_s;
855
640
 
856
- if (arg->iter > 0) fbuffer_append(buffer, delim, delim_len);
857
- if (object_nl) {
858
- fbuffer_append(buffer, object_nl, object_nl_len);
641
+ if (arg->iter > 0) fbuffer_append_char(buffer, ',');
642
+ if (RB_UNLIKELY(state->object_nl)) {
643
+ fbuffer_append(buffer, state->object_nl, state->object_nl_len);
859
644
  }
860
- if (indent) {
645
+ if (RB_UNLIKELY(state->indent)) {
861
646
  for (j = 0; j < depth; j++) {
862
- fbuffer_append(buffer, indent, indent_len);
647
+ fbuffer_append(buffer, state->indent, state->indent_len);
863
648
  }
864
649
  }
865
650
 
866
- klass = CLASS_OF(key);
867
- if (klass == rb_cString) {
868
- key_to_s = key;
869
- } else if (klass == rb_cSymbol) {
870
- key_to_s = rb_sym2str(key);
871
- } else {
872
- key_to_s = rb_funcall(key, i_to_s, 0);
651
+ VALUE key_to_s;
652
+ switch(rb_type(key)) {
653
+ case T_STRING:
654
+ key_to_s = key;
655
+ break;
656
+ case T_SYMBOL:
657
+ key_to_s = rb_sym2str(key);
658
+ break;
659
+ default:
660
+ key_to_s = rb_convert_type(key, T_STRING, "String", "to_s");
661
+ break;
873
662
  }
874
- Check_Type(key_to_s, T_STRING);
875
- generate_json(buffer, Vstate, state, key_to_s);
876
- fbuffer_append(buffer, delim2, delim2_len);
663
+
664
+ generate_json_string(buffer, Vstate, state, key_to_s);
665
+ if (RB_UNLIKELY(state->space_before)) fbuffer_append(buffer, state->space_before, state->space_before_len);
666
+ fbuffer_append_char(buffer, ':');
667
+ if (RB_UNLIKELY(state->space)) fbuffer_append(buffer, state->space, state->space_len);
877
668
  generate_json(buffer, Vstate, state, val);
878
669
 
879
670
  arg->iter++;
@@ -882,10 +673,6 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
882
673
 
883
674
  static void generate_json_object(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
884
675
  {
885
- char *object_nl = state->object_nl;
886
- long object_nl_len = state->object_nl_len;
887
- char *indent = state->indent;
888
- long indent_len = state->indent_len;
889
676
  long max_nesting = state->max_nesting;
890
677
  long depth = ++state->depth;
891
678
  int j;
@@ -903,11 +690,11 @@ static void generate_json_object(FBuffer *buffer, VALUE Vstate, JSON_Generator_S
903
690
  rb_hash_foreach(obj, json_object_i, (VALUE)&arg);
904
691
 
905
692
  depth = --state->depth;
906
- if (object_nl) {
907
- fbuffer_append(buffer, object_nl, object_nl_len);
908
- if (indent) {
693
+ if (RB_UNLIKELY(state->object_nl)) {
694
+ fbuffer_append(buffer, state->object_nl, state->object_nl_len);
695
+ if (RB_UNLIKELY(state->indent)) {
909
696
  for (j = 0; j < depth; j++) {
910
- fbuffer_append(buffer, indent, indent_len);
697
+ fbuffer_append(buffer, state->indent, state->indent_len);
911
698
  }
912
699
  }
913
700
  }
@@ -916,62 +703,90 @@ static void generate_json_object(FBuffer *buffer, VALUE Vstate, JSON_Generator_S
916
703
 
917
704
  static void generate_json_array(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
918
705
  {
919
- char *array_nl = state->array_nl;
920
- long array_nl_len = state->array_nl_len;
921
- char *indent = state->indent;
922
- long indent_len = state->indent_len;
923
706
  long max_nesting = state->max_nesting;
924
- char *delim = FBUFFER_PTR(state->array_delim);
925
- long delim_len = FBUFFER_LEN(state->array_delim);
926
707
  long depth = ++state->depth;
927
708
  int i, j;
928
709
  if (max_nesting != 0 && depth > max_nesting) {
929
710
  rb_raise(eNestingError, "nesting of %ld is too deep", --state->depth);
930
711
  }
931
712
  fbuffer_append_char(buffer, '[');
932
- if (array_nl) fbuffer_append(buffer, array_nl, array_nl_len);
713
+ if (RB_UNLIKELY(state->array_nl)) fbuffer_append(buffer, state->array_nl, state->array_nl_len);
933
714
  for(i = 0; i < RARRAY_LEN(obj); i++) {
934
- if (i > 0) fbuffer_append(buffer, delim, delim_len);
935
- if (indent) {
715
+ if (i > 0) {
716
+ fbuffer_append_char(buffer, ',');
717
+ if (RB_UNLIKELY(state->array_nl)) fbuffer_append(buffer, state->array_nl, state->array_nl_len);
718
+ }
719
+ if (RB_UNLIKELY(state->indent)) {
936
720
  for (j = 0; j < depth; j++) {
937
- fbuffer_append(buffer, indent, indent_len);
721
+ fbuffer_append(buffer, state->indent, state->indent_len);
938
722
  }
939
723
  }
940
- generate_json(buffer, Vstate, state, rb_ary_entry(obj, i));
724
+ generate_json(buffer, Vstate, state, RARRAY_AREF(obj, i));
941
725
  }
942
726
  state->depth = --depth;
943
- if (array_nl) {
944
- fbuffer_append(buffer, array_nl, array_nl_len);
945
- if (indent) {
727
+ if (RB_UNLIKELY(state->array_nl)) {
728
+ fbuffer_append(buffer, state->array_nl, state->array_nl_len);
729
+ if (RB_UNLIKELY(state->indent)) {
946
730
  for (j = 0; j < depth; j++) {
947
- fbuffer_append(buffer, indent, indent_len);
731
+ fbuffer_append(buffer, state->indent, state->indent_len);
948
732
  }
949
733
  }
950
734
  }
951
735
  fbuffer_append_char(buffer, ']');
952
736
  }
953
737
 
954
- #ifdef HAVE_RUBY_ENCODING_H
955
- static int enc_utf8_compatible_p(rb_encoding *enc)
738
+ static int usascii_encindex, utf8_encindex, binary_encindex;
739
+
740
+ static inline int enc_utf8_compatible_p(int enc_idx)
956
741
  {
957
- if (enc == rb_usascii_encoding()) return 1;
958
- if (enc == rb_utf8_encoding()) return 1;
742
+ if (enc_idx == usascii_encindex) return 1;
743
+ if (enc_idx == utf8_encindex) return 1;
959
744
  return 0;
960
745
  }
961
- #endif
746
+
747
+ static inline VALUE ensure_valid_encoding(VALUE str)
748
+ {
749
+ int encindex = RB_ENCODING_GET(str);
750
+ VALUE utf8_string;
751
+ if (RB_UNLIKELY(!enc_utf8_compatible_p(encindex))) {
752
+ if (encindex == binary_encindex) {
753
+ // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work.
754
+ // TODO: Deprecate in 2.8.0
755
+ // TODO: Remove in 3.0.0
756
+ utf8_string = rb_enc_associate_index(rb_str_dup(str), utf8_encindex);
757
+ switch (rb_enc_str_coderange(utf8_string)) {
758
+ case ENC_CODERANGE_7BIT:
759
+ case ENC_CODERANGE_VALID:
760
+ return utf8_string;
761
+ break;
762
+ }
763
+ }
764
+
765
+ str = rb_funcall(str, i_encode, 1, Encoding_UTF_8);
766
+ }
767
+ return str;
768
+ }
962
769
 
963
770
  static void generate_json_string(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
964
771
  {
772
+ obj = ensure_valid_encoding(obj);
773
+
965
774
  fbuffer_append_char(buffer, '"');
966
- #ifdef HAVE_RUBY_ENCODING_H
967
- if (!enc_utf8_compatible_p(rb_enc_get(obj))) {
968
- obj = rb_str_export_to_enc(obj, rb_utf8_encoding());
969
- }
970
- #endif
971
- if (state->ascii_only) {
972
- convert_UTF8_to_JSON_ASCII(buffer, obj, state->script_safe);
973
- } else {
974
- convert_UTF8_to_JSON(buffer, obj, state->script_safe);
775
+
776
+ switch(rb_enc_str_coderange(obj)) {
777
+ case ENC_CODERANGE_7BIT:
778
+ convert_ASCII_to_JSON(buffer, obj, state->script_safe ? script_safe_escape_table : escape_table);
779
+ break;
780
+ case ENC_CODERANGE_VALID:
781
+ if (RB_UNLIKELY(state->ascii_only)) {
782
+ convert_UTF8_to_ASCII_only_JSON(buffer, obj, state->script_safe ? script_safe_escape_table : escape_table, state->script_safe);
783
+ } else {
784
+ convert_UTF8_to_JSON(buffer, obj, state->script_safe ? script_safe_escape_table : escape_table, state->script_safe);
785
+ }
786
+ break;
787
+ default:
788
+ rb_raise(rb_path2class("JSON::GeneratorError"), "source sequence is illegal/malformed utf-8");
789
+ break;
975
790
  }
976
791
  fbuffer_append_char(buffer, '"');
977
792
  }
@@ -1018,9 +833,9 @@ static void generate_json_float(FBuffer *buffer, VALUE Vstate, JSON_Generator_St
1018
833
  VALUE tmp = rb_funcall(obj, i_to_s, 0);
1019
834
  if (!allow_nan) {
1020
835
  if (isinf(value)) {
1021
- rb_raise(eGeneratorError, "%"PRIsVALUE" not allowed in JSON", RB_OBJ_STRING(tmp));
836
+ rb_raise(eGeneratorError, "%"PRIsVALUE" not allowed in JSON", tmp);
1022
837
  } else if (isnan(value)) {
1023
- rb_raise(eGeneratorError, "%"PRIsVALUE" not allowed in JSON", RB_OBJ_STRING(tmp));
838
+ rb_raise(eGeneratorError, "%"PRIsVALUE" not allowed in JSON", tmp);
1024
839
  }
1025
840
  }
1026
841
  fbuffer_append_str(buffer, tmp);
@@ -1029,35 +844,56 @@ static void generate_json_float(FBuffer *buffer, VALUE Vstate, JSON_Generator_St
1029
844
  static void generate_json(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
1030
845
  {
1031
846
  VALUE tmp;
1032
- VALUE klass = CLASS_OF(obj);
1033
- if (klass == rb_cHash) {
1034
- generate_json_object(buffer, Vstate, state, obj);
1035
- } else if (klass == rb_cArray) {
1036
- generate_json_array(buffer, Vstate, state, obj);
1037
- } else if (klass == rb_cString) {
1038
- generate_json_string(buffer, Vstate, state, obj);
1039
- } else if (obj == Qnil) {
847
+ if (obj == Qnil) {
1040
848
  generate_json_null(buffer, Vstate, state, obj);
1041
849
  } else if (obj == Qfalse) {
1042
850
  generate_json_false(buffer, Vstate, state, obj);
1043
851
  } else if (obj == Qtrue) {
1044
852
  generate_json_true(buffer, Vstate, state, obj);
1045
- } else if (FIXNUM_P(obj)) {
1046
- generate_json_fixnum(buffer, Vstate, state, obj);
1047
- } else if (RB_TYPE_P(obj, T_BIGNUM)) {
1048
- generate_json_bignum(buffer, Vstate, state, obj);
1049
- } else if (klass == rb_cFloat) {
1050
- generate_json_float(buffer, Vstate, state, obj);
1051
- } else if (state->strict) {
1052
- rb_raise(eGeneratorError, "%"PRIsVALUE" not allowed in JSON", RB_OBJ_STRING(CLASS_OF(obj)));
1053
- } else if (rb_respond_to(obj, i_to_json)) {
1054
- tmp = rb_funcall(obj, i_to_json, 1, Vstate);
1055
- Check_Type(tmp, T_STRING);
1056
- fbuffer_append_str(buffer, tmp);
853
+ } else if (RB_SPECIAL_CONST_P(obj)) {
854
+ if (RB_FIXNUM_P(obj)) {
855
+ generate_json_fixnum(buffer, Vstate, state, obj);
856
+ } else if (RB_FLONUM_P(obj)) {
857
+ generate_json_float(buffer, Vstate, state, obj);
858
+ } else {
859
+ goto general;
860
+ }
1057
861
  } else {
1058
- tmp = rb_funcall(obj, i_to_s, 0);
1059
- Check_Type(tmp, T_STRING);
1060
- generate_json_string(buffer, Vstate, state, tmp);
862
+ VALUE klass = RBASIC_CLASS(obj);
863
+ switch (RB_BUILTIN_TYPE(obj)) {
864
+ case T_BIGNUM:
865
+ generate_json_bignum(buffer, Vstate, state, obj);
866
+ break;
867
+ case T_HASH:
868
+ if (klass != rb_cHash) goto general;
869
+ generate_json_object(buffer, Vstate, state, obj);
870
+ break;
871
+ case T_ARRAY:
872
+ if (klass != rb_cArray) goto general;
873
+ generate_json_array(buffer, Vstate, state, obj);
874
+ break;
875
+ case T_STRING:
876
+ if (klass != rb_cString) goto general;
877
+ generate_json_string(buffer, Vstate, state, obj);
878
+ break;
879
+ case T_FLOAT:
880
+ if (klass != rb_cFloat) goto general;
881
+ generate_json_float(buffer, Vstate, state, obj);
882
+ break;
883
+ default:
884
+ general:
885
+ if (state->strict) {
886
+ rb_raise(eGeneratorError, "%"PRIsVALUE" not allowed in JSON", CLASS_OF(obj));
887
+ } else if (rb_respond_to(obj, i_to_json)) {
888
+ tmp = rb_funcall(obj, i_to_json, 1, Vstate);
889
+ Check_Type(tmp, T_STRING);
890
+ fbuffer_append_str(buffer, tmp);
891
+ } else {
892
+ tmp = rb_funcall(obj, i_to_s, 0);
893
+ Check_Type(tmp, T_STRING);
894
+ generate_json_string(buffer, Vstate, state, tmp);
895
+ }
896
+ }
1061
897
  }
1062
898
  }
1063
899
 
@@ -1067,28 +903,6 @@ static FBuffer *cState_prepare_buffer(VALUE self)
1067
903
  GET_STATE(self);
1068
904
  buffer = fbuffer_alloc(state->buffer_initial_length);
1069
905
 
1070
- if (state->object_delim) {
1071
- fbuffer_clear(state->object_delim);
1072
- } else {
1073
- state->object_delim = fbuffer_alloc(16);
1074
- }
1075
- fbuffer_append_char(state->object_delim, ',');
1076
- if (state->object_delim2) {
1077
- fbuffer_clear(state->object_delim2);
1078
- } else {
1079
- state->object_delim2 = fbuffer_alloc(16);
1080
- }
1081
- if (state->space_before) fbuffer_append(state->object_delim2, state->space_before, state->space_before_len);
1082
- fbuffer_append_char(state->object_delim2, ':');
1083
- if (state->space) fbuffer_append(state->object_delim2, state->space, state->space_len);
1084
-
1085
- if (state->array_delim) {
1086
- fbuffer_clear(state->array_delim);
1087
- } else {
1088
- state->array_delim = fbuffer_alloc(16);
1089
- }
1090
- fbuffer_append_char(state->array_delim, ',');
1091
- if (state->array_nl) fbuffer_append(state->array_delim, state->array_nl, state->array_nl_len);
1092
906
  return buffer;
1093
907
  }
1094
908
 
@@ -1149,37 +963,6 @@ static VALUE cState_generate(VALUE self, VALUE obj)
1149
963
  return result;
1150
964
  }
1151
965
 
1152
- /*
1153
- * call-seq: new(opts = {})
1154
- *
1155
- * Instantiates a new State object, configured by _opts_.
1156
- *
1157
- * _opts_ can have the following keys:
1158
- *
1159
- * * *indent*: a string used to indent levels (default: ''),
1160
- * * *space*: a string that is put after, a : or , delimiter (default: ''),
1161
- * * *space_before*: a string that is put before a : pair delimiter (default: ''),
1162
- * * *object_nl*: a string that is put at the end of a JSON object (default: ''),
1163
- * * *array_nl*: a string that is put at the end of a JSON array (default: ''),
1164
- * * *allow_nan*: true if NaN, Infinity, and -Infinity should be
1165
- * generated, otherwise an exception is thrown, if these values are
1166
- * encountered. This options defaults to false.
1167
- * * *ascii_only*: true if only ASCII characters should be generated. This
1168
- * option defaults to false.
1169
- * * *buffer_initial_length*: sets the initial length of the generator's
1170
- * internal buffer.
1171
- */
1172
- static VALUE cState_initialize(int argc, VALUE *argv, VALUE self)
1173
- {
1174
- VALUE opts;
1175
- GET_STATE(self);
1176
- state->max_nesting = 100;
1177
- state->buffer_initial_length = FBUFFER_INITIAL_LENGTH_DEFAULT;
1178
- rb_scan_args(argc, argv, "01", &opts);
1179
- if (!NIL_P(opts)) cState_configure(self, opts);
1180
- return self;
1181
- }
1182
-
1183
966
  /*
1184
967
  * call-seq: initialize_copy(orig)
1185
968
  *
@@ -1201,9 +984,6 @@ static VALUE cState_init_copy(VALUE obj, VALUE orig)
1201
984
  objState->space_before = fstrndup(origState->space_before, origState->space_before_len);
1202
985
  objState->object_nl = fstrndup(origState->object_nl, origState->object_nl_len);
1203
986
  objState->array_nl = fstrndup(origState->array_nl, origState->array_nl_len);
1204
- if (origState->array_delim) objState->array_delim = fbuffer_dup(origState->array_delim);
1205
- if (origState->object_delim) objState->object_delim = fbuffer_dup(origState->object_delim);
1206
- if (origState->object_delim2) objState->object_delim2 = fbuffer_dup(origState->object_delim2);
1207
987
  return obj;
1208
988
  }
1209
989
 
@@ -1442,7 +1222,8 @@ static VALUE cState_max_nesting_set(VALUE self, VALUE depth)
1442
1222
  {
1443
1223
  GET_STATE(self);
1444
1224
  Check_Type(depth, T_FIXNUM);
1445
- return state->max_nesting = FIX2LONG(depth);
1225
+ state->max_nesting = FIX2LONG(depth);
1226
+ return Qnil;
1446
1227
  }
1447
1228
 
1448
1229
  /*
@@ -1513,6 +1294,18 @@ static VALUE cState_allow_nan_p(VALUE self)
1513
1294
  return state->allow_nan ? Qtrue : Qfalse;
1514
1295
  }
1515
1296
 
1297
+ /*
1298
+ * call-seq: allow_nan=(enable)
1299
+ *
1300
+ * This sets whether or not to serialize NaN, Infinity, and -Infinity
1301
+ */
1302
+ static VALUE cState_allow_nan_set(VALUE self, VALUE enable)
1303
+ {
1304
+ GET_STATE(self);
1305
+ state->allow_nan = RTEST(enable);
1306
+ return Qnil;
1307
+ }
1308
+
1516
1309
  /*
1517
1310
  * call-seq: ascii_only?
1518
1311
  *
@@ -1525,6 +1318,18 @@ static VALUE cState_ascii_only_p(VALUE self)
1525
1318
  return state->ascii_only ? Qtrue : Qfalse;
1526
1319
  }
1527
1320
 
1321
+ /*
1322
+ * call-seq: ascii_only=(enable)
1323
+ *
1324
+ * This sets whether only ASCII characters should be generated.
1325
+ */
1326
+ static VALUE cState_ascii_only_set(VALUE self, VALUE enable)
1327
+ {
1328
+ GET_STATE(self);
1329
+ state->ascii_only = RTEST(enable);
1330
+ return Qnil;
1331
+ }
1332
+
1528
1333
  /*
1529
1334
  * call-seq: depth
1530
1335
  *
@@ -1592,8 +1397,8 @@ void Init_generator(void)
1592
1397
  rb_require("json/common");
1593
1398
 
1594
1399
  mJSON = rb_define_module("JSON");
1595
- mExt = rb_define_module_under(mJSON, "Ext");
1596
- mGenerator = rb_define_module_under(mExt, "Generator");
1400
+ VALUE mExt = rb_define_module_under(mJSON, "Ext");
1401
+ VALUE mGenerator = rb_define_module_under(mExt, "Generator");
1597
1402
 
1598
1403
  eGeneratorError = rb_path2class("JSON::GeneratorError");
1599
1404
  eNestingError = rb_path2class("JSON::NestingError");
@@ -1603,7 +1408,6 @@ void Init_generator(void)
1603
1408
  cState = rb_define_class_under(mGenerator, "State", rb_cObject);
1604
1409
  rb_define_alloc_func(cState, cState_s_allocate);
1605
1410
  rb_define_singleton_method(cState, "from_state", cState_from_state_s, 1);
1606
- rb_define_method(cState, "initialize", cState_initialize, -1);
1607
1411
  rb_define_method(cState, "initialize_copy", cState_init_copy, 1);
1608
1412
  rb_define_method(cState, "indent", cState_indent, 0);
1609
1413
  rb_define_method(cState, "indent=", cState_indent_set, 1);
@@ -1628,76 +1432,70 @@ void Init_generator(void)
1628
1432
  rb_define_method(cState, "strict=", cState_strict_set, 1);
1629
1433
  rb_define_method(cState, "check_circular?", cState_check_circular_p, 0);
1630
1434
  rb_define_method(cState, "allow_nan?", cState_allow_nan_p, 0);
1435
+ rb_define_method(cState, "allow_nan=", cState_allow_nan_set, 1);
1631
1436
  rb_define_method(cState, "ascii_only?", cState_ascii_only_p, 0);
1437
+ rb_define_method(cState, "ascii_only=", cState_ascii_only_set, 1);
1632
1438
  rb_define_method(cState, "depth", cState_depth, 0);
1633
1439
  rb_define_method(cState, "depth=", cState_depth_set, 1);
1634
1440
  rb_define_method(cState, "buffer_initial_length", cState_buffer_initial_length, 0);
1635
1441
  rb_define_method(cState, "buffer_initial_length=", cState_buffer_initial_length_set, 1);
1636
- rb_define_method(cState, "configure", cState_configure, 1);
1637
- rb_define_alias(cState, "merge", "configure");
1638
- rb_define_method(cState, "to_h", cState_to_h, 0);
1639
- rb_define_alias(cState, "to_hash", "to_h");
1640
- rb_define_method(cState, "[]", cState_aref, 1);
1641
- rb_define_method(cState, "[]=", cState_aset, 2);
1642
1442
  rb_define_method(cState, "generate", cState_generate, 1);
1643
1443
 
1644
- mGeneratorMethods = rb_define_module_under(mGenerator, "GeneratorMethods");
1645
- mObject = rb_define_module_under(mGeneratorMethods, "Object");
1444
+ VALUE mGeneratorMethods = rb_define_module_under(mGenerator, "GeneratorMethods");
1445
+
1446
+ VALUE mObject = rb_define_module_under(mGeneratorMethods, "Object");
1646
1447
  rb_define_method(mObject, "to_json", mObject_to_json, -1);
1647
- mHash = rb_define_module_under(mGeneratorMethods, "Hash");
1448
+
1449
+ VALUE mHash = rb_define_module_under(mGeneratorMethods, "Hash");
1648
1450
  rb_define_method(mHash, "to_json", mHash_to_json, -1);
1649
- mArray = rb_define_module_under(mGeneratorMethods, "Array");
1451
+
1452
+ VALUE mArray = rb_define_module_under(mGeneratorMethods, "Array");
1650
1453
  rb_define_method(mArray, "to_json", mArray_to_json, -1);
1454
+
1651
1455
  #ifdef RUBY_INTEGER_UNIFICATION
1652
- mInteger = rb_define_module_under(mGeneratorMethods, "Integer");
1456
+ VALUE mInteger = rb_define_module_under(mGeneratorMethods, "Integer");
1653
1457
  rb_define_method(mInteger, "to_json", mInteger_to_json, -1);
1654
1458
  #else
1655
- mFixnum = rb_define_module_under(mGeneratorMethods, "Fixnum");
1459
+ VALUE mFixnum = rb_define_module_under(mGeneratorMethods, "Fixnum");
1656
1460
  rb_define_method(mFixnum, "to_json", mFixnum_to_json, -1);
1657
- mBignum = rb_define_module_under(mGeneratorMethods, "Bignum");
1461
+
1462
+ VALUE mBignum = rb_define_module_under(mGeneratorMethods, "Bignum");
1658
1463
  rb_define_method(mBignum, "to_json", mBignum_to_json, -1);
1659
1464
  #endif
1660
- mFloat = rb_define_module_under(mGeneratorMethods, "Float");
1465
+ VALUE mFloat = rb_define_module_under(mGeneratorMethods, "Float");
1661
1466
  rb_define_method(mFloat, "to_json", mFloat_to_json, -1);
1662
- mString = rb_define_module_under(mGeneratorMethods, "String");
1467
+
1468
+ VALUE mString = rb_define_module_under(mGeneratorMethods, "String");
1663
1469
  rb_define_singleton_method(mString, "included", mString_included_s, 1);
1664
1470
  rb_define_method(mString, "to_json", mString_to_json, -1);
1665
1471
  rb_define_method(mString, "to_json_raw", mString_to_json_raw, -1);
1666
1472
  rb_define_method(mString, "to_json_raw_object", mString_to_json_raw_object, 0);
1473
+
1667
1474
  mString_Extend = rb_define_module_under(mString, "Extend");
1668
1475
  rb_define_method(mString_Extend, "json_create", mString_Extend_json_create, 1);
1669
- mTrueClass = rb_define_module_under(mGeneratorMethods, "TrueClass");
1476
+
1477
+ VALUE mTrueClass = rb_define_module_under(mGeneratorMethods, "TrueClass");
1670
1478
  rb_define_method(mTrueClass, "to_json", mTrueClass_to_json, -1);
1671
- mFalseClass = rb_define_module_under(mGeneratorMethods, "FalseClass");
1479
+
1480
+ VALUE mFalseClass = rb_define_module_under(mGeneratorMethods, "FalseClass");
1672
1481
  rb_define_method(mFalseClass, "to_json", mFalseClass_to_json, -1);
1673
- mNilClass = rb_define_module_under(mGeneratorMethods, "NilClass");
1482
+
1483
+ VALUE mNilClass = rb_define_module_under(mGeneratorMethods, "NilClass");
1674
1484
  rb_define_method(mNilClass, "to_json", mNilClass_to_json, -1);
1675
1485
 
1486
+ rb_global_variable(&Encoding_UTF_8);
1487
+ Encoding_UTF_8 = rb_const_get(rb_path2class("Encoding"), rb_intern("UTF_8"));
1488
+
1676
1489
  i_to_s = rb_intern("to_s");
1677
1490
  i_to_json = rb_intern("to_json");
1678
1491
  i_new = rb_intern("new");
1679
- i_indent = rb_intern("indent");
1680
- i_space = rb_intern("space");
1681
- i_space_before = rb_intern("space_before");
1682
- i_object_nl = rb_intern("object_nl");
1683
- i_array_nl = rb_intern("array_nl");
1684
- i_max_nesting = rb_intern("max_nesting");
1685
- i_script_safe = rb_intern("script_safe");
1686
- i_escape_slash = rb_intern("escape_slash");
1687
- i_strict = rb_intern("strict");
1688
- i_allow_nan = rb_intern("allow_nan");
1689
- i_ascii_only = rb_intern("ascii_only");
1690
- i_depth = rb_intern("depth");
1691
- i_buffer_initial_length = rb_intern("buffer_initial_length");
1692
1492
  i_pack = rb_intern("pack");
1693
1493
  i_unpack = rb_intern("unpack");
1694
1494
  i_create_id = rb_intern("create_id");
1695
1495
  i_extend = rb_intern("extend");
1696
- i_key_p = rb_intern("key?");
1697
- i_aref = rb_intern("[]");
1698
- i_send = rb_intern("__send__");
1699
- i_respond_to_p = rb_intern("respond_to?");
1700
- i_match = rb_intern("match");
1701
- i_keys = rb_intern("keys");
1702
- i_dup = rb_intern("dup");
1496
+ i_encode = rb_intern("encode");
1497
+
1498
+ usascii_encindex = rb_usascii_encindex();
1499
+ utf8_encindex = rb_utf8_encindex();
1500
+ binary_encindex = rb_ascii8bit_encindex();
1703
1501
  }