json_pure 1.0.0 → 1.4.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. data/CHANGES +155 -1
  2. data/COPYING +58 -0
  3. data/GPL +7 -7
  4. data/README +324 -45
  5. data/Rakefile +166 -124
  6. data/TODO +1 -1
  7. data/VERSION +1 -1
  8. data/benchmarks/data-p4-3GHz-ruby18/GeneratorBenchmarkComparison.log +52 -0
  9. data/benchmarks/data-p4-3GHz-ruby18/GeneratorBenchmarkExt#generator_fast-autocorrelation.dat +1000 -0
  10. data/benchmarks/data-p4-3GHz-ruby18/GeneratorBenchmarkExt#generator_fast.dat +1001 -0
  11. data/benchmarks/data-p4-3GHz-ruby18/GeneratorBenchmarkExt#generator_pretty-autocorrelation.dat +900 -0
  12. data/benchmarks/data-p4-3GHz-ruby18/GeneratorBenchmarkExt#generator_pretty.dat +901 -0
  13. data/benchmarks/data-p4-3GHz-ruby18/GeneratorBenchmarkExt#generator_safe-autocorrelation.dat +1000 -0
  14. data/benchmarks/data-p4-3GHz-ruby18/GeneratorBenchmarkExt#generator_safe.dat +1001 -0
  15. data/benchmarks/data-p4-3GHz-ruby18/GeneratorBenchmarkExt.log +261 -0
  16. data/benchmarks/data-p4-3GHz-ruby18/GeneratorBenchmarkPure#generator_fast-autocorrelation.dat +1000 -0
  17. data/benchmarks/data-p4-3GHz-ruby18/GeneratorBenchmarkPure#generator_fast.dat +1001 -0
  18. data/benchmarks/data-p4-3GHz-ruby18/GeneratorBenchmarkPure#generator_pretty-autocorrelation.dat +1000 -0
  19. data/benchmarks/data-p4-3GHz-ruby18/GeneratorBenchmarkPure#generator_pretty.dat +1001 -0
  20. data/benchmarks/data-p4-3GHz-ruby18/GeneratorBenchmarkPure#generator_safe-autocorrelation.dat +1000 -0
  21. data/benchmarks/data-p4-3GHz-ruby18/GeneratorBenchmarkPure#generator_safe.dat +1001 -0
  22. data/benchmarks/data-p4-3GHz-ruby18/GeneratorBenchmarkPure.log +262 -0
  23. data/benchmarks/data-p4-3GHz-ruby18/GeneratorBenchmarkRails#generator-autocorrelation.dat +1000 -0
  24. data/benchmarks/data-p4-3GHz-ruby18/GeneratorBenchmarkRails#generator.dat +1001 -0
  25. data/benchmarks/data-p4-3GHz-ruby18/GeneratorBenchmarkRails.log +82 -0
  26. data/benchmarks/data-p4-3GHz-ruby18/ParserBenchmarkComparison.log +34 -0
  27. data/benchmarks/data-p4-3GHz-ruby18/ParserBenchmarkExt#parser-autocorrelation.dat +900 -0
  28. data/benchmarks/data-p4-3GHz-ruby18/ParserBenchmarkExt#parser.dat +901 -0
  29. data/benchmarks/data-p4-3GHz-ruby18/ParserBenchmarkExt.log +81 -0
  30. data/benchmarks/data-p4-3GHz-ruby18/ParserBenchmarkPure#parser-autocorrelation.dat +1000 -0
  31. data/benchmarks/data-p4-3GHz-ruby18/ParserBenchmarkPure#parser.dat +1001 -0
  32. data/benchmarks/data-p4-3GHz-ruby18/ParserBenchmarkPure.log +82 -0
  33. data/benchmarks/data-p4-3GHz-ruby18/ParserBenchmarkRails#parser-autocorrelation.dat +1000 -0
  34. data/benchmarks/data-p4-3GHz-ruby18/ParserBenchmarkRails#parser.dat +1001 -0
  35. data/benchmarks/data-p4-3GHz-ruby18/ParserBenchmarkRails.log +82 -0
  36. data/benchmarks/data-p4-3GHz-ruby18/ParserBenchmarkYAML#parser-autocorrelation.dat +1000 -0
  37. data/benchmarks/data-p4-3GHz-ruby18/ParserBenchmarkYAML#parser.dat +1001 -0
  38. data/benchmarks/data-p4-3GHz-ruby18/ParserBenchmarkYAML.log +82 -0
  39. data/benchmarks/generator2_benchmark.rb +222 -0
  40. data/benchmarks/generator_benchmark.rb +224 -0
  41. data/benchmarks/ohai.json +1216 -0
  42. data/benchmarks/ohai.ruby +1 -0
  43. data/benchmarks/parser2_benchmark.rb +251 -0
  44. data/benchmarks/parser_benchmark.rb +259 -0
  45. data/bin/edit_json.rb +1 -3
  46. data/bin/prettify_json.rb +75 -0
  47. data/data/index.html +5 -4
  48. data/data/prototype.js +2764 -1095
  49. data/ext/json/ext/generator/extconf.rb +14 -3
  50. data/ext/json/ext/generator/generator.c +1022 -334
  51. data/ext/json/ext/generator/generator.h +197 -0
  52. data/ext/json/ext/parser/extconf.rb +9 -3
  53. data/ext/json/ext/parser/parser.c +961 -577
  54. data/ext/json/ext/parser/parser.h +71 -0
  55. data/ext/json/ext/parser/parser.rl +400 -123
  56. data/install.rb +0 -0
  57. data/lib/json/add/core.rb +148 -0
  58. data/lib/json/add/rails.rb +58 -0
  59. data/lib/json/common.rb +254 -47
  60. data/lib/json/editor.rb +236 -72
  61. data/lib/json/ext.rb +2 -0
  62. data/lib/json/pure/generator.rb +235 -117
  63. data/lib/json/pure/parser.rb +124 -25
  64. data/lib/json/pure.rb +5 -3
  65. data/lib/json/version.rb +1 -1
  66. data/lib/json.rb +2 -197
  67. data/tests/fixtures/fail18.json +1 -0
  68. data/tests/test_json.rb +181 -22
  69. data/tests/test_json_addition.rb +84 -16
  70. data/tests/test_json_encoding.rb +68 -0
  71. data/tests/test_json_fixtures.rb +9 -5
  72. data/tests/test_json_generate.rb +114 -14
  73. data/tests/test_json_rails.rb +144 -0
  74. data/tests/test_json_unicode.rb +35 -14
  75. data/tools/fuzz.rb +13 -7
  76. data/tools/server.rb +0 -1
  77. metadata +156 -122
  78. data/benchmarks/benchmark.txt +0 -133
  79. data/benchmarks/benchmark_generator.rb +0 -44
  80. data/benchmarks/benchmark_parser.rb +0 -22
  81. data/benchmarks/benchmark_rails.rb +0 -26
  82. data/ext/json/ext/generator/Makefile +0 -149
  83. data/ext/json/ext/generator/unicode.c +0 -184
  84. data/ext/json/ext/generator/unicode.h +0 -40
  85. data/ext/json/ext/parser/Makefile +0 -149
  86. data/ext/json/ext/parser/unicode.c +0 -156
  87. data/ext/json/ext/parser/unicode.h +0 -44
  88. data/tests/fixtures/pass18.json +0 -1
  89. data/tests/runner.rb +0 -24
  90. /data/tests/fixtures/{fail15.json → pass15.json} +0 -0
  91. /data/tests/fixtures/{fail16.json → pass16.json} +0 -0
  92. /data/tests/fixtures/{fail17.json → pass17.json} +0 -0
  93. /data/tests/fixtures/{fail26.json → pass26.json} +0 -0
@@ -1,37 +1,83 @@
1
- /* vim: set cin et sw=4 ts=4: */
1
+ #include "parser.h"
2
+
3
+ /* unicode */
4
+
5
+ static const char digit_values[256] = {
6
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
7
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
8
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1,
9
+ -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1,
10
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
11
+ 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
12
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
13
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
14
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
15
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
16
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
17
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
18
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
19
+ -1, -1, -1, -1, -1, -1, -1
20
+ };
21
+
22
+ static UTF32 unescape_unicode(const unsigned char *p)
23
+ {
24
+ char b;
25
+ UTF32 result = 0;
26
+ b = digit_values[p[0]];
27
+ if (b < 0) return UNI_REPLACEMENT_CHAR;
28
+ result = (result << 4) | b;
29
+ b = digit_values[p[1]];
30
+ result = (result << 4) | b;
31
+ if (b < 0) return UNI_REPLACEMENT_CHAR;
32
+ b = digit_values[p[2]];
33
+ result = (result << 4) | b;
34
+ if (b < 0) return UNI_REPLACEMENT_CHAR;
35
+ b = digit_values[p[3]];
36
+ result = (result << 4) | b;
37
+ if (b < 0) return UNI_REPLACEMENT_CHAR;
38
+ return result;
39
+ }
2
40
 
3
- #include "ruby.h"
4
- #include "re.h"
5
- #include "unicode.h"
41
+ static int convert_UTF32_to_UTF8(char *buf, UTF32 ch)
42
+ {
43
+ int len = 1;
44
+ if (ch <= 0x7F) {
45
+ buf[0] = (char) ch;
46
+ } else if (ch <= 0x07FF) {
47
+ buf[0] = (char) ((ch >> 6) | 0xC0);
48
+ buf[1] = (char) ((ch & 0x3F) | 0x80);
49
+ len++;
50
+ } else if (ch <= 0xFFFF) {
51
+ buf[0] = (char) ((ch >> 12) | 0xE0);
52
+ buf[1] = (char) (((ch >> 6) & 0x3F) | 0x80);
53
+ buf[2] = (char) ((ch & 0x3F) | 0x80);
54
+ len += 2;
55
+ } else if (ch <= 0x1fffff) {
56
+ buf[0] =(char) ((ch >> 18) | 0xF0);
57
+ buf[1] =(char) (((ch >> 12) & 0x3F) | 0x80);
58
+ buf[2] =(char) (((ch >> 6) & 0x3F) | 0x80);
59
+ buf[3] =(char) ((ch & 0x3F) | 0x80);
60
+ len += 3;
61
+ } else {
62
+ buf[0] = '?';
63
+ }
64
+ return len;
65
+ }
6
66
 
7
- #ifndef swap16
8
- #define swap16(x) ((((x)&0xFF)<<8) | (((x)>>8)&0xFF))
67
+ #ifdef HAVE_RUBY_ENCODING_H
68
+ static VALUE CEncoding_ASCII_8BIT, CEncoding_UTF_8, CEncoding_UTF_16BE,
69
+ CEncoding_UTF_16LE, CEncoding_UTF_32BE, CEncoding_UTF_32LE;
70
+ static ID i_encoding, i_encode, i_encode_bang, i_force_encoding;
71
+ #else
72
+ static ID i_iconv;
9
73
  #endif
10
74
 
11
- #define EVIL 0x666
12
-
13
- static VALUE mJSON, mExt, cParser, eParserError;
14
-
15
- static ID i_json_creatable_p, i_json_create, i_create_id, i_chr;
16
-
17
- typedef struct JSON_ParserStruct {
18
- VALUE Vsource;
19
- char *source;
20
- long len;
21
- char *memo;
22
- VALUE create_id;
23
- } JSON_Parser;
24
-
25
- static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result);
26
- static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result);
27
- static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result);
28
- static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result);
29
- static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result);
30
- static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result);
75
+ static VALUE mJSON, mExt, cParser, eParserError, eNestingError;
76
+ static VALUE CNaN, CInfinity, CMinusInfinity;
31
77
 
32
- #define GET_STRUCT \
33
- JSON_Parser *json; \
34
- Data_Get_Struct(self, JSON_Parser, json);
78
+ static ID i_json_creatable_p, i_json_create, i_create_id, i_create_additions,
79
+ i_chr, i_max_nesting, i_allow_nan, i_symbolize_names, i_object_class,
80
+ i_array_class, i_key_p, i_deep_const_get;
35
81
 
36
82
  %%{
37
83
  machine JSON_common;
@@ -48,7 +94,10 @@ static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *resul
48
94
  Vnull = 'null';
49
95
  Vfalse = 'false';
50
96
  Vtrue = 'true';
51
- begin_value = [nft"\-[{] | digit;
97
+ VNaN = 'NaN';
98
+ VInfinity = 'Infinity';
99
+ VMinusInfinity = '-Infinity';
100
+ begin_value = [nft"\-[{NI] | digit;
52
101
  begin_object = '{';
53
102
  end_object = '}';
54
103
  begin_array = '[';
@@ -68,7 +117,7 @@ static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *resul
68
117
  VALUE v = Qnil;
69
118
  char *np = JSON_parse_value(json, fpc, pe, &v);
70
119
  if (np == NULL) {
71
- fbreak;
120
+ fhold; fbreak;
72
121
  } else {
73
122
  rb_hash_aset(*result, last_name, v);
74
123
  fexec np;
@@ -76,11 +125,14 @@ static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *resul
76
125
  }
77
126
 
78
127
  action parse_name {
79
- char *np = JSON_parse_string(json, fpc, pe, &last_name);
80
- if (np == NULL) fbreak; else fexec np;
128
+ char *np;
129
+ json->parsing_name = 1;
130
+ np = JSON_parse_string(json, fpc, pe, &last_name);
131
+ json->parsing_name = 0;
132
+ if (np == NULL) { fhold; fbreak; } else fexec np;
81
133
  }
82
134
 
83
- action exit { fbreak; }
135
+ action exit { fhold; fbreak; }
84
136
 
85
137
  a_pair = ignore* begin_name >parse_name
86
138
  ignore* name_separator ignore*
@@ -95,17 +147,25 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu
95
147
  {
96
148
  int cs = EVIL;
97
149
  VALUE last_name = Qnil;
98
- *result = rb_hash_new();
150
+ VALUE object_class = json->object_class;
151
+
152
+ if (json->max_nesting && json->current_nesting > json->max_nesting) {
153
+ rb_raise(eNestingError, "nesting of %d is too deep", json->current_nesting);
154
+ }
155
+
156
+ *result = NIL_P(object_class) ? rb_hash_new() : rb_class_new_instance(0, 0, object_class);
99
157
 
100
158
  %% write init;
101
159
  %% write exec;
102
160
 
103
161
  if (cs >= JSON_object_first_final) {
104
- VALUE klassname = rb_hash_aref(*result, json->create_id);
105
- if (!NIL_P(klassname)) {
106
- VALUE klass = rb_path2class(StringValueCStr(klassname));
107
- if RTEST(rb_funcall(klass, i_json_creatable_p, 0)) {
108
- *result = rb_funcall(klass, i_json_create, 1, *result);
162
+ if (RTEST(json->create_id)) {
163
+ VALUE klassname = rb_hash_aref(*result, json->create_id);
164
+ if (!NIL_P(klassname)) {
165
+ VALUE klass = rb_funcall(mJSON, i_deep_const_get, 1, klassname);
166
+ if RTEST(rb_funcall(klass, i_json_creatable_p, 0)) {
167
+ *result = rb_funcall(klass, i_json_create, 1, *result);
168
+ }
109
169
  }
110
170
  }
111
171
  return p + 1;
@@ -129,36 +189,67 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu
129
189
  action parse_true {
130
190
  *result = Qtrue;
131
191
  }
192
+ action parse_nan {
193
+ if (json->allow_nan) {
194
+ *result = CNaN;
195
+ } else {
196
+ rb_raise(eParserError, "%u: unexpected token at '%s'", __LINE__, p - 2);
197
+ }
198
+ }
199
+ action parse_infinity {
200
+ if (json->allow_nan) {
201
+ *result = CInfinity;
202
+ } else {
203
+ rb_raise(eParserError, "%u: unexpected token at '%s'", __LINE__, p - 8);
204
+ }
205
+ }
132
206
  action parse_string {
133
207
  char *np = JSON_parse_string(json, fpc, pe, result);
134
- if (np == NULL) fbreak; else fexec np;
208
+ if (np == NULL) { fhold; fbreak; } else fexec np;
135
209
  }
136
210
 
137
211
  action parse_number {
138
212
  char *np;
213
+ if(pe > fpc + 9 && !strncmp(MinusInfinity, fpc, 9)) {
214
+ if (json->allow_nan) {
215
+ *result = CMinusInfinity;
216
+ fexec p + 10;
217
+ fhold; fbreak;
218
+ } else {
219
+ rb_raise(eParserError, "%u: unexpected token at '%s'", __LINE__, p);
220
+ }
221
+ }
139
222
  np = JSON_parse_float(json, fpc, pe, result);
140
223
  if (np != NULL) fexec np;
141
224
  np = JSON_parse_integer(json, fpc, pe, result);
142
225
  if (np != NULL) fexec np;
143
- fbreak;
226
+ fhold; fbreak;
144
227
  }
145
228
 
146
229
  action parse_array {
147
- char *np = JSON_parse_array(json, fpc, pe, result);
148
- if (np == NULL) fbreak; else fexec np;
230
+ char *np;
231
+ json->current_nesting++;
232
+ np = JSON_parse_array(json, fpc, pe, result);
233
+ json->current_nesting--;
234
+ if (np == NULL) { fhold; fbreak; } else fexec np;
149
235
  }
150
236
 
151
237
  action parse_object {
152
- char *np = JSON_parse_object(json, fpc, pe, result);
153
- if (np == NULL) fbreak; else fexec np;
238
+ char *np;
239
+ json->current_nesting++;
240
+ np = JSON_parse_object(json, fpc, pe, result);
241
+ json->current_nesting--;
242
+ if (np == NULL) { fhold; fbreak; } else fexec np;
154
243
  }
155
244
 
156
- action exit { fbreak; }
245
+ action exit { fhold; fbreak; }
157
246
 
158
247
  main := (
159
248
  Vnull @parse_null |
160
249
  Vfalse @parse_false |
161
250
  Vtrue @parse_true |
251
+ VNaN @parse_nan |
252
+ VInfinity @parse_infinity |
162
253
  begin_number >parse_number |
163
254
  begin_string >parse_string |
164
255
  begin_array >parse_array |
@@ -185,7 +276,7 @@ static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *resul
185
276
 
186
277
  write data;
187
278
 
188
- action exit { fbreak; }
279
+ action exit { fhold; fbreak; }
189
280
 
190
281
  main := '-'? ('0' | [1-9][0-9]*) (^[0-9] @exit);
191
282
  }%%
@@ -213,11 +304,11 @@ static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *res
213
304
 
214
305
  write data;
215
306
 
216
- action exit { fbreak; }
307
+ action exit { fhold; fbreak; }
217
308
 
218
309
  main := '-'? (
219
310
  (('0' | [1-9][0-9]*) '.' [0-9]+ ([Ee] [+\-]?[0-9]+)?)
220
- | ([1-9][0-9]* ([Ee] [+\-]?[0-9]+))
311
+ | (('0' | [1-9][0-9]*) ([Ee] [+\-]?[0-9]+))
221
312
  ) (^[0-9Ee.\-] @exit );
222
313
  }%%
223
314
 
@@ -249,14 +340,14 @@ static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *resul
249
340
  VALUE v = Qnil;
250
341
  char *np = JSON_parse_value(json, fpc, pe, &v);
251
342
  if (np == NULL) {
252
- fbreak;
343
+ fhold; fbreak;
253
344
  } else {
254
345
  rb_ary_push(*result, v);
255
346
  fexec np;
256
347
  }
257
348
  }
258
349
 
259
- action exit { fbreak; }
350
+ action exit { fhold; fbreak; }
260
351
 
261
352
  next_element = value_separator ignore* begin_value >parse_value;
262
353
 
@@ -269,7 +360,12 @@ static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *resul
269
360
  static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result)
270
361
  {
271
362
  int cs = EVIL;
272
- *result = rb_ary_new();
363
+ VALUE array_class = json->array_class;
364
+
365
+ if (json->max_nesting && json->current_nesting > json->max_nesting) {
366
+ rb_raise(eNestingError, "nesting of %d is too deep", json->current_nesting);
367
+ }
368
+ *result = NIL_P(array_class) ? rb_ary_new() : rb_class_new_instance(0, 0, array_class);
273
369
 
274
370
  %% write init;
275
371
  %% write exec;
@@ -277,59 +373,78 @@ static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *resul
277
373
  if(cs >= JSON_array_first_final) {
278
374
  return p + 1;
279
375
  } else {
280
- rb_raise(eParserError, "unexpected token at '%s'", p);
376
+ rb_raise(eParserError, "%u: unexpected token at '%s'", __LINE__, p);
377
+ return NULL;
281
378
  }
282
379
  }
283
380
 
284
- static VALUE json_string_escape(char *p, char *pe)
381
+ static VALUE json_string_unescape(VALUE result, char *string, char *stringEnd)
285
382
  {
286
- VALUE result = rb_str_buf_new(pe - p + 1);
287
-
288
- while (p < pe) {
289
- if (*p == '\\') {
290
- p++;
291
- if (p >= pe) return Qnil; /* raise an exception later, \ at end */
292
- switch (*p) {
383
+ char *p = string, *pe = string, *unescape;
384
+ int unescape_len;
385
+
386
+ while (pe < stringEnd) {
387
+ if (*pe == '\\') {
388
+ unescape = (char *) "?";
389
+ unescape_len = 1;
390
+ if (pe > p) rb_str_buf_cat(result, p, pe - p);
391
+ switch (*++pe) {
392
+ case 'n':
393
+ unescape = (char *) "\n";
394
+ break;
395
+ case 'r':
396
+ unescape = (char *) "\r";
397
+ break;
398
+ case 't':
399
+ unescape = (char *) "\t";
400
+ break;
293
401
  case '"':
402
+ unescape = (char *) "\"";
403
+ break;
294
404
  case '\\':
295
- rb_str_buf_cat(result, p, 1);
296
- p++;
405
+ unescape = (char *) "\\";
297
406
  break;
298
407
  case 'b':
299
- rb_str_buf_cat2(result, "\b");
300
- p++;
408
+ unescape = (char *) "\b";
301
409
  break;
302
410
  case 'f':
303
- rb_str_buf_cat2(result, "\f");
304
- p++;
305
- break;
306
- case 'n':
307
- rb_str_buf_cat2(result, "\n");
308
- p++;
309
- break;
310
- case 'r':
311
- rb_str_buf_cat2(result, "\r");
312
- p++;
313
- break;
314
- case 't':
315
- rb_str_buf_cat2(result, "\t");
316
- p++;
411
+ unescape = (char *) "\f";
317
412
  break;
318
413
  case 'u':
319
- if (p > pe - 4) {
414
+ if (pe > stringEnd - 4) {
320
415
  return Qnil;
321
416
  } else {
322
- p = JSON_convert_UTF16_to_UTF8(result, p, pe, strictConversion);
417
+ char buf[4];
418
+ UTF32 ch = unescape_unicode((unsigned char *) ++pe);
419
+ pe += 3;
420
+ if (UNI_SUR_HIGH_START == (ch & 0xFC00)) {
421
+ pe++;
422
+ if (pe > stringEnd - 6) return Qnil;
423
+ if (pe[0] == '\\' && pe[1] == 'u') {
424
+ UTF32 sur = unescape_unicode((unsigned char *) pe + 2);
425
+ ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16)
426
+ | (sur & 0x3FF));
427
+ pe += 5;
428
+ } else {
429
+ unescape = (char *) "?";
430
+ break;
431
+ }
432
+ }
433
+ unescape_len = convert_UTF32_to_UTF8(buf, ch);
434
+ unescape = buf;
323
435
  }
324
436
  break;
437
+ default:
438
+ p = pe;
439
+ continue;
325
440
  }
441
+ rb_str_buf_cat(result, unescape, unescape_len);
442
+ p = ++pe;
326
443
  } else {
327
- char *q = p;
328
- while (*q != '\\' && q < pe) q++;
329
- rb_str_buf_cat(result, p, q - p);
330
- p = q;
444
+ pe++;
331
445
  }
332
446
  }
447
+ rb_str_buf_cat(result, p, pe - p);
333
448
  return result;
334
449
  }
335
450
 
@@ -340,24 +455,33 @@ static VALUE json_string_escape(char *p, char *pe)
340
455
  write data;
341
456
 
342
457
  action parse_string {
343
- *result = json_string_escape(json->memo + 1, p);
344
- if (NIL_P(*result)) fbreak; else fexec p + 1;
345
- }
346
-
347
- action exit { fbreak; }
348
-
349
- main := '"' ((^(["\\] | 0..0x1f) | '\\'["\\/bfnrt] | '\\u'[0-9a-fA-F]{4})* %parse_string) '"' @exit;
458
+ *result = json_string_unescape(*result, json->memo + 1, p);
459
+ if (NIL_P(*result)) {
460
+ fhold;
461
+ fbreak;
462
+ } else {
463
+ FORCE_UTF8(*result);
464
+ fexec p + 1;
465
+ }
466
+ }
467
+
468
+ action exit { fhold; fbreak; }
469
+
470
+ main := '"' ((^(["\\] | 0..0x1f) | '\\'["\\/bfnrt] | '\\u'[0-9a-fA-F]{4} | '\\'^(["\\/bfnrtu]|0..0x1f))* %parse_string) '"' @exit;
350
471
  }%%
351
472
 
352
473
  static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result)
353
474
  {
354
475
  int cs = EVIL;
355
476
 
356
- *result = rb_str_new("", 0);
477
+ *result = rb_str_buf_new(0);
357
478
  %% write init;
358
479
  json->memo = p;
359
480
  %% write exec;
360
481
 
482
+ if (json->symbolize_names && json->parsing_name) {
483
+ *result = rb_str_intern(*result);
484
+ }
361
485
  if (cs >= JSON_string_first_final) {
362
486
  return p + 1;
363
487
  } else {
@@ -374,13 +498,17 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu
374
498
  include JSON_common;
375
499
 
376
500
  action parse_object {
377
- char *np = JSON_parse_object(json, fpc, pe, &result);
378
- if (np == NULL) fbreak; else fexec np;
501
+ char *np;
502
+ json->current_nesting = 1;
503
+ np = JSON_parse_object(json, fpc, pe, &result);
504
+ if (np == NULL) { fhold; fbreak; } else fexec np;
379
505
  }
380
506
 
381
507
  action parse_array {
382
- char *np = JSON_parse_array(json, fpc, pe, &result);
383
- if (np == NULL) fbreak; else fexec np;
508
+ char *np;
509
+ json->current_nesting = 1;
510
+ np = JSON_parse_array(json, fpc, pe, &result);
511
+ if (np == NULL) { fhold; fbreak; } else fexec np;
384
512
  }
385
513
 
386
514
  main := ignore* (
@@ -401,38 +529,156 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu
401
529
  *
402
530
  */
403
531
 
404
- /*
405
- * call-seq: new(source)
406
- *
407
- * Creates a new JSON::Ext::Parser instance for the string _source_.
408
- */
409
- static VALUE cParser_initialize(VALUE self, VALUE source)
532
+ static VALUE convert_encoding(VALUE source)
410
533
  {
411
- char *ptr;
412
- long len;
413
- GET_STRUCT;
414
- source = StringValue(source);
415
- ptr = RSTRING(source)->ptr;
416
- len = RSTRING(source)->len;
534
+ char *ptr = RSTRING_PTR(source);
535
+ long len = RSTRING_LEN(source);
417
536
  if (len < 2) {
418
537
  rb_raise(eParserError, "A JSON text must at least contain two octets!");
419
538
  }
420
- /*
421
- Convert these?
539
+ #ifdef HAVE_RUBY_ENCODING_H
540
+ {
541
+ VALUE encoding = rb_funcall(source, i_encoding, 0);
542
+ if (encoding == CEncoding_ASCII_8BIT) {
543
+ if (len >= 4 && ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 0) {
544
+ source = rb_str_dup(source);
545
+ rb_funcall(source, i_force_encoding, 1, CEncoding_UTF_32BE);
546
+ source = rb_funcall(source, i_encode_bang, 1, CEncoding_UTF_8);
547
+ } else if (len >= 4 && ptr[0] == 0 && ptr[2] == 0) {
548
+ source = rb_str_dup(source);
549
+ rb_funcall(source, i_force_encoding, 1, CEncoding_UTF_16BE);
550
+ source = rb_funcall(source, i_encode_bang, 1, CEncoding_UTF_8);
551
+ } else if (len >= 4 && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == 0) {
552
+ source = rb_str_dup(source);
553
+ rb_funcall(source, i_force_encoding, 1, CEncoding_UTF_32LE);
554
+ source = rb_funcall(source, i_encode_bang, 1, CEncoding_UTF_8);
555
+ } else if (len >= 4 && ptr[1] == 0 && ptr[3] == 0) {
556
+ source = rb_str_dup(source);
557
+ rb_funcall(source, i_force_encoding, 1, CEncoding_UTF_16LE);
558
+ source = rb_funcall(source, i_encode_bang, 1, CEncoding_UTF_8);
559
+ } else {
560
+ FORCE_UTF8(source);
561
+ }
562
+ } else {
563
+ source = rb_funcall(source, i_encode, 1, CEncoding_UTF_8);
564
+ }
565
+ }
566
+ #else
422
567
  if (len >= 4 && ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 0) {
423
- rb_raise(eParserError, "Only UTF8 octet streams are supported atm!");
568
+ source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-32be"), source);
424
569
  } else if (len >= 4 && ptr[0] == 0 && ptr[2] == 0) {
425
- rb_raise(eParserError, "Only UTF8 octet streams are supported atm!");
570
+ source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-16be"), source);
426
571
  } else if (len >= 4 && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == 0) {
427
- rb_raise(eParserError, "Only UTF8 octet streams are supported atm!");
572
+ source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-32le"), source);
428
573
  } else if (len >= 4 && ptr[1] == 0 && ptr[3] == 0) {
429
- rb_raise(eParserError, "Only UTF8 octet streams are supported atm!");
574
+ source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-16le"), source);
575
+ }
576
+ #endif
577
+ return source;
578
+ }
579
+
580
+ /*
581
+ * call-seq: new(source, opts => {})
582
+ *
583
+ * Creates a new JSON::Ext::Parser instance for the string _source_.
584
+ *
585
+ * Creates a new JSON::Ext::Parser instance for the string _source_.
586
+ *
587
+ * It will be configured by the _opts_ hash. _opts_ can have the following
588
+ * keys:
589
+ *
590
+ * _opts_ can have the following keys:
591
+ * * *max_nesting*: The maximum depth of nesting allowed in the parsed data
592
+ * structures. Disable depth checking with :max_nesting => false|nil|0, it
593
+ * defaults to 19.
594
+ * * *allow_nan*: If set to true, allow NaN, Infinity and -Infinity in
595
+ * defiance of RFC 4627 to be parsed by the Parser. This option defaults to
596
+ * false.
597
+ * * *symbolize_names*: If set to true, returns symbols for the names
598
+ * (keys) in a JSON object. Otherwise strings are returned, which is also
599
+ * the default.
600
+ * * *create_additions*: If set to false, the Parser doesn't create
601
+ * additions even if a matchin class and create_id was found. This option
602
+ * defaults to true.
603
+ * * *object_class*: Defaults to Hash
604
+ * * *array_class*: Defaults to Array
605
+ */
606
+ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self)
607
+ {
608
+ char *ptr;
609
+ long len;
610
+ VALUE source, opts;
611
+ GET_PARSER;
612
+ rb_scan_args(argc, argv, "11", &source, &opts);
613
+ source = convert_encoding(StringValue(source));
614
+ ptr = RSTRING_PTR(source);
615
+ len = RSTRING_LEN(source);
616
+ if (!NIL_P(opts)) {
617
+ opts = rb_convert_type(opts, T_HASH, "Hash", "to_hash");
618
+ if (NIL_P(opts)) {
619
+ rb_raise(rb_eArgError, "opts needs to be like a hash");
620
+ } else {
621
+ VALUE tmp = ID2SYM(i_max_nesting);
622
+ if (option_given_p(opts, tmp)) {
623
+ VALUE max_nesting = rb_hash_aref(opts, tmp);
624
+ if (RTEST(max_nesting)) {
625
+ Check_Type(max_nesting, T_FIXNUM);
626
+ json->max_nesting = FIX2INT(max_nesting);
627
+ } else {
628
+ json->max_nesting = 0;
629
+ }
630
+ } else {
631
+ json->max_nesting = 19;
632
+ }
633
+ tmp = ID2SYM(i_allow_nan);
634
+ if (option_given_p(opts, tmp)) {
635
+ VALUE allow_nan = rb_hash_aref(opts, tmp);
636
+ json->allow_nan = RTEST(allow_nan) ? 1 : 0;
637
+ } else {
638
+ json->allow_nan = 0;
639
+ }
640
+ tmp = ID2SYM(i_symbolize_names);
641
+ if (option_given_p(opts, tmp)) {
642
+ VALUE symbolize_names = rb_hash_aref(opts, tmp);
643
+ json->symbolize_names = RTEST(symbolize_names) ? 1 : 0;
644
+ } else {
645
+ json->symbolize_names = 0;
646
+ }
647
+ tmp = ID2SYM(i_create_additions);
648
+ if (option_given_p(opts, tmp)) {
649
+ VALUE create_additions = rb_hash_aref(opts, tmp);
650
+ if (RTEST(create_additions)) {
651
+ json->create_id = rb_funcall(mJSON, i_create_id, 0);
652
+ } else {
653
+ json->create_id = Qnil;
654
+ }
655
+ } else {
656
+ json->create_id = rb_funcall(mJSON, i_create_id, 0);
657
+ }
658
+ tmp = ID2SYM(i_object_class);
659
+ if (option_given_p(opts, tmp)) {
660
+ json->object_class = rb_hash_aref(opts, tmp);
661
+ } else {
662
+ json->object_class = Qnil;
663
+ }
664
+ tmp = ID2SYM(i_array_class);
665
+ if (option_given_p(opts, tmp)) {
666
+ json->array_class = rb_hash_aref(opts, tmp);
667
+ } else {
668
+ json->array_class = Qnil;
669
+ }
670
+ }
671
+ } else {
672
+ json->max_nesting = 19;
673
+ json->allow_nan = 0;
674
+ json->create_id = rb_funcall(mJSON, i_create_id, 0);
675
+ json->object_class = Qnil;
676
+ json->array_class = Qnil;
430
677
  }
431
- */
678
+ json->current_nesting = 0;
432
679
  json->len = len;
433
680
  json->source = ptr;
434
681
  json->Vsource = source;
435
- json->create_id = rb_funcall(mJSON, i_create_id, 0);
436
682
  return self;
437
683
  }
438
684
 
@@ -444,10 +690,10 @@ static VALUE cParser_initialize(VALUE self, VALUE source)
444
690
  */
445
691
  static VALUE cParser_parse(VALUE self)
446
692
  {
447
- GET_STRUCT;
448
693
  char *p, *pe;
449
694
  int cs = EVIL;
450
695
  VALUE result = Qnil;
696
+ GET_PARSER;
451
697
 
452
698
  %% write init;
453
699
  p = json->source;
@@ -457,7 +703,8 @@ static VALUE cParser_parse(VALUE self)
457
703
  if (cs >= JSON_first_final && p == pe) {
458
704
  return result;
459
705
  } else {
460
- rb_raise(eParserError, "unexpected token at '%s'", p);
706
+ rb_raise(eParserError, "%u: unexpected token at '%s'", __LINE__, p);
707
+ return Qnil;
461
708
  }
462
709
  }
463
710
 
@@ -472,11 +719,13 @@ static void JSON_mark(JSON_Parser *json)
472
719
  {
473
720
  rb_gc_mark_maybe(json->Vsource);
474
721
  rb_gc_mark_maybe(json->create_id);
722
+ rb_gc_mark_maybe(json->object_class);
723
+ rb_gc_mark_maybe(json->array_class);
475
724
  }
476
725
 
477
726
  static void JSON_free(JSON_Parser *json)
478
727
  {
479
- free(json);
728
+ ruby_xfree(json);
480
729
  }
481
730
 
482
731
  static VALUE cJSON_parser_s_allocate(VALUE klass)
@@ -493,23 +742,51 @@ static VALUE cJSON_parser_s_allocate(VALUE klass)
493
742
  */
494
743
  static VALUE cParser_source(VALUE self)
495
744
  {
496
- GET_STRUCT;
745
+ GET_PARSER;
497
746
  return rb_str_dup(json->Vsource);
498
747
  }
499
748
 
500
749
  void Init_parser()
501
750
  {
751
+ rb_require("json/common");
502
752
  mJSON = rb_define_module("JSON");
503
753
  mExt = rb_define_module_under(mJSON, "Ext");
504
754
  cParser = rb_define_class_under(mExt, "Parser", rb_cObject);
505
755
  eParserError = rb_path2class("JSON::ParserError");
756
+ eNestingError = rb_path2class("JSON::NestingError");
506
757
  rb_define_alloc_func(cParser, cJSON_parser_s_allocate);
507
- rb_define_method(cParser, "initialize", cParser_initialize, 1);
758
+ rb_define_method(cParser, "initialize", cParser_initialize, -1);
508
759
  rb_define_method(cParser, "parse", cParser_parse, 0);
509
760
  rb_define_method(cParser, "source", cParser_source, 0);
510
761
 
762
+ CNaN = rb_const_get(mJSON, rb_intern("NaN"));
763
+ CInfinity = rb_const_get(mJSON, rb_intern("Infinity"));
764
+ CMinusInfinity = rb_const_get(mJSON, rb_intern("MinusInfinity"));
765
+
511
766
  i_json_creatable_p = rb_intern("json_creatable?");
512
767
  i_json_create = rb_intern("json_create");
513
768
  i_create_id = rb_intern("create_id");
769
+ i_create_additions = rb_intern("create_additions");
514
770
  i_chr = rb_intern("chr");
771
+ i_max_nesting = rb_intern("max_nesting");
772
+ i_allow_nan = rb_intern("allow_nan");
773
+ i_symbolize_names = rb_intern("symbolize_names");
774
+ i_object_class = rb_intern("object_class");
775
+ i_array_class = rb_intern("array_class");
776
+ i_key_p = rb_intern("key?");
777
+ i_deep_const_get = rb_intern("deep_const_get");
778
+ #ifdef HAVE_RUBY_ENCODING_H
779
+ CEncoding_UTF_8 = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-8"));
780
+ CEncoding_UTF_16BE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-16be"));
781
+ CEncoding_UTF_16LE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-16le"));
782
+ CEncoding_UTF_32BE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-32be"));
783
+ CEncoding_UTF_32LE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-32le"));
784
+ CEncoding_ASCII_8BIT = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("ascii-8bit"));
785
+ i_encoding = rb_intern("encoding");
786
+ i_encode = rb_intern("encode");
787
+ i_encode_bang = rb_intern("encode!");
788
+ i_force_encoding = rb_intern("force_encoding");
789
+ #else
790
+ i_iconv = rb_intern("iconv");
791
+ #endif
515
792
  }