json 0.4.3 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of json might be problematic. Click here for more details.

Files changed (74) hide show
  1. data/CHANGES +6 -1
  2. data/README +49 -7
  3. data/Rakefile +216 -52
  4. data/TODO +1 -0
  5. data/VERSION +1 -1
  6. data/benchmarks/benchmark.txt +133 -0
  7. data/benchmarks/benchmark_generator.rb +44 -0
  8. data/benchmarks/benchmark_parser.rb +22 -0
  9. data/benchmarks/benchmark_rails.rb +26 -0
  10. data/data/example.json +1 -0
  11. data/data/index.html +37 -0
  12. data/data/prototype.js +2515 -0
  13. data/ext/json/ext/generator/Makefile +149 -0
  14. data/ext/json/ext/generator/extconf.rb +9 -0
  15. data/ext/json/ext/generator/generator.c +729 -0
  16. data/ext/json/ext/generator/unicode.c +184 -0
  17. data/ext/json/ext/generator/unicode.h +40 -0
  18. data/ext/json/ext/parser/Makefile +149 -0
  19. data/ext/json/ext/parser/extconf.rb +9 -0
  20. data/ext/json/ext/parser/parser.c +1551 -0
  21. data/ext/json/ext/parser/parser.rl +515 -0
  22. data/ext/json/ext/parser/unicode.c +156 -0
  23. data/ext/json/ext/parser/unicode.h +44 -0
  24. data/install.rb +13 -8
  25. data/lib/json.rb +101 -614
  26. data/lib/json/common.rb +184 -0
  27. data/lib/json/editor.rb +19 -10
  28. data/lib/json/ext.rb +13 -0
  29. data/lib/json/pure.rb +75 -0
  30. data/lib/json/pure/generator.rb +321 -0
  31. data/lib/json/pure/parser.rb +210 -0
  32. data/lib/json/version.rb +8 -0
  33. data/tests/fixtures/fail1.json +1 -0
  34. data/tests/fixtures/fail10.json +1 -0
  35. data/tests/fixtures/fail11.json +1 -0
  36. data/tests/fixtures/fail12.json +1 -0
  37. data/tests/fixtures/fail13.json +1 -0
  38. data/tests/fixtures/fail14.json +1 -0
  39. data/tests/fixtures/fail15.json +1 -0
  40. data/tests/fixtures/fail16.json +1 -0
  41. data/tests/fixtures/fail17.json +1 -0
  42. data/tests/fixtures/fail19.json +1 -0
  43. data/tests/fixtures/fail2.json +1 -0
  44. data/tests/fixtures/fail20.json +1 -0
  45. data/tests/fixtures/fail21.json +1 -0
  46. data/tests/fixtures/fail22.json +1 -0
  47. data/tests/fixtures/fail23.json +1 -0
  48. data/tests/fixtures/fail24.json +1 -0
  49. data/tests/fixtures/fail25.json +1 -0
  50. data/tests/fixtures/fail26.json +1 -0
  51. data/tests/fixtures/fail27.json +2 -0
  52. data/tests/fixtures/fail28.json +2 -0
  53. data/tests/fixtures/fail3.json +1 -0
  54. data/tests/fixtures/fail4.json +1 -0
  55. data/tests/fixtures/fail5.json +1 -0
  56. data/tests/fixtures/fail6.json +1 -0
  57. data/tests/fixtures/fail7.json +1 -0
  58. data/tests/fixtures/fail8.json +1 -0
  59. data/tests/fixtures/fail9.json +1 -0
  60. data/tests/fixtures/pass1.json +56 -0
  61. data/tests/fixtures/pass18.json +1 -0
  62. data/tests/fixtures/pass2.json +1 -0
  63. data/tests/fixtures/pass3.json +6 -0
  64. data/tests/runner.rb +8 -2
  65. data/tests/test_json.rb +102 -154
  66. data/tests/test_json_addition.rb +94 -0
  67. data/tests/test_json_fixtures.rb +30 -0
  68. data/tests/test_json_generate.rb +81 -0
  69. data/tests/test_json_unicode.rb +55 -0
  70. data/tools/fuzz.rb +133 -0
  71. data/tools/server.rb +62 -0
  72. metadata +87 -10
  73. data/bla.json.tmp +0 -0
  74. data/lib/json.rb.orig +0 -708
@@ -0,0 +1,156 @@
1
+ /* vim: set cin et sw=4 ts=4: */
2
+
3
+ #include "unicode.h"
4
+
5
+ /*
6
+ * Copyright 2001-2004 Unicode, Inc.
7
+ *
8
+ * Disclaimer
9
+ *
10
+ * This source code is provided as is by Unicode, Inc. No claims are
11
+ * made as to fitness for any particular purpose. No warranties of any
12
+ * kind are expressed or implied. The recipient agrees to determine
13
+ * applicability of information provided. If this file has been
14
+ * purchased on magnetic or optical media from Unicode, Inc., the
15
+ * sole remedy for any claim will be exchange of defective media
16
+ * within 90 days of receipt.
17
+ *
18
+ * Limitations on Rights to Redistribute This Code
19
+ *
20
+ * Unicode, Inc. hereby grants the right to freely use the information
21
+ * supplied in this file in the creation of products supporting the
22
+ * Unicode Standard, and to make copies of this file in any form
23
+ * for internal or external distribution as long as this notice
24
+ * remains attached.
25
+ */
26
+
27
+ /*
28
+ * Index into the table below with the first byte of a UTF-8 sequence to
29
+ * get the number of trailing bytes that are supposed to follow it.
30
+ * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is
31
+ * left as-is for anyone who may want to do such conversion, which was
32
+ * allowed in earlier algorithms.
33
+ */
34
+ static const char trailingBytesForUTF8[256] = {
35
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
36
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
37
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
38
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
39
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
40
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
41
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
42
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
43
+ };
44
+
45
+ /*
46
+ * Magic values subtracted from a buffer value during UTF8 conversion.
47
+ * This table contains as many values as there might be trailing bytes
48
+ * in a UTF-8 sequence.
49
+ */
50
+ static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
51
+ 0x03C82080UL, 0xFA082080UL, 0x82082080UL };
52
+
53
+ /*
54
+ * Once the bits are split out into bytes of UTF-8, this is a mask OR-ed
55
+ * into the first byte, depending on how many bytes follow. There are
56
+ * as many entries in this table as there are UTF-8 sequence types.
57
+ * (I.e., one byte sequence, two byte... etc.). Remember that sequencs
58
+ * for *legal* UTF-8 will be 4 or fewer bytes total.
59
+ */
60
+ static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
61
+
62
+ inline char *JSON_convert_UTF16_to_UTF8 (
63
+ VALUE buffer,
64
+ char *source,
65
+ char *sourceEnd,
66
+ ConversionFlags flags)
67
+ {
68
+ UTF16 *tmp, *tmpPtr, *tmpEnd;
69
+ char buf[5];
70
+ long n = 0, i;
71
+ char *p = source - 1;
72
+
73
+ while (p < sourceEnd && p[0] == '\\' && p[1] == 'u') {
74
+ p += 6;
75
+ n++;
76
+ }
77
+ p = source + 1;
78
+ buf[4] = 0;
79
+ tmpPtr = tmp = ALLOC_N(UTF16, n);
80
+ tmpEnd = tmp + n;
81
+ for (i = 0; i < n; i++) {
82
+ buf[0] = *p++;
83
+ buf[1] = *p++;
84
+ buf[2] = *p++;
85
+ buf[3] = *p++;
86
+ tmpPtr[i] = strtol(buf, NULL, 16);
87
+ p += 2;
88
+ }
89
+
90
+ while (tmpPtr < tmpEnd) {
91
+ UTF32 ch;
92
+ unsigned short bytesToWrite = 0;
93
+ const UTF32 byteMask = 0xBF;
94
+ const UTF32 byteMark = 0x80;
95
+ ch = *tmpPtr++;
96
+ /* If we have a surrogate pair, convert to UTF32 first. */
97
+ if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
98
+ /* If the 16 bits following the high surrogate are in the source
99
+ * buffer... */
100
+ if (tmpPtr < tmpEnd) {
101
+ UTF32 ch2 = *tmpPtr;
102
+ /* If it's a low surrogate, convert to UTF32. */
103
+ if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) {
104
+ ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
105
+ + (ch2 - UNI_SUR_LOW_START) + halfBase;
106
+ ++tmpPtr;
107
+ } else if (flags == strictConversion) { /* it's an unpaired high surrogate */
108
+ free(tmp);
109
+ rb_raise(rb_path2class("JSON::ParserError"),
110
+ "source sequence is illegal/malformed near %s", source);
111
+ }
112
+ } else { /* We don't have the 16 bits following the high surrogate. */
113
+ free(tmp);
114
+ rb_raise(rb_path2class("JSON::ParserError"),
115
+ "partial character in source, but hit end near %s", source);
116
+ break;
117
+ }
118
+ } else if (flags == strictConversion) {
119
+ /* UTF-16 surrogate values are illegal in UTF-32 */
120
+ if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) {
121
+ free(tmp);
122
+ rb_raise(rb_path2class("JSON::ParserError"),
123
+ "source sequence is illegal/malformed near %s", source);
124
+ }
125
+ }
126
+ /* Figure out how many bytes the result will require */
127
+ if (ch < (UTF32) 0x80) {
128
+ bytesToWrite = 1;
129
+ } else if (ch < (UTF32) 0x800) {
130
+ bytesToWrite = 2;
131
+ } else if (ch < (UTF32) 0x10000) {
132
+ bytesToWrite = 3;
133
+ } else if (ch < (UTF32) 0x110000) {
134
+ bytesToWrite = 4;
135
+ } else {
136
+ bytesToWrite = 3;
137
+ ch = UNI_REPLACEMENT_CHAR;
138
+ }
139
+
140
+ buf[0] = 0;
141
+ buf[1] = 0;
142
+ buf[2] = 0;
143
+ buf[3] = 0;
144
+ p = buf + bytesToWrite;
145
+ switch (bytesToWrite) { /* note: everything falls through. */
146
+ case 4: *--p = (UTF8) ((ch | byteMark) & byteMask); ch >>= 6;
147
+ case 3: *--p = (UTF8) ((ch | byteMark) & byteMask); ch >>= 6;
148
+ case 2: *--p = (UTF8) ((ch | byteMark) & byteMask); ch >>= 6;
149
+ case 1: *--p = (UTF8) (ch | firstByteMark[bytesToWrite]);
150
+ }
151
+ rb_str_buf_cat(buffer, p, bytesToWrite);
152
+ }
153
+ free(tmp);
154
+ source += 5 + (n - 1) * 6;
155
+ return source;
156
+ }
@@ -0,0 +1,44 @@
1
+
2
+ #ifndef _PARSER_UNICODE_H_
3
+ #define _PARSER_UNICODE_H_
4
+
5
+ #include "ruby.h"
6
+
7
+ typedef unsigned long UTF32; /* at least 32 bits */
8
+ typedef unsigned short UTF16; /* at least 16 bits */
9
+ typedef unsigned char UTF8; /* typically 8 bits */
10
+
11
+ #define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD
12
+ #define UNI_MAX_BMP (UTF32)0x0000FFFF
13
+ #define UNI_MAX_UTF16 (UTF32)0x0010FFFF
14
+ #define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF
15
+ #define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF
16
+
17
+ #define UNI_SUR_HIGH_START (UTF32)0xD800
18
+ #define UNI_SUR_HIGH_END (UTF32)0xDBFF
19
+ #define UNI_SUR_LOW_START (UTF32)0xDC00
20
+ #define UNI_SUR_LOW_END (UTF32)0xDFFF
21
+
22
+ static const int halfShift = 10; /* used for shifting by 10 bits */
23
+
24
+ static const UTF32 halfBase = 0x0010000UL;
25
+ static const UTF32 halfMask = 0x3FFUL;
26
+
27
+ typedef enum {
28
+ conversionOK = 0, /* conversion successful */
29
+ sourceExhausted, /* partial character in source, but hit end */
30
+ targetExhausted, /* insuff. room in target for conversion */
31
+ sourceIllegal /* source sequence is illegal/malformed */
32
+ } ConversionResult;
33
+
34
+ typedef enum {
35
+ strictConversion = 0,
36
+ lenientConversion
37
+ } ConversionFlags;
38
+
39
+ char *JSON_convert_UTF16_to_UTF8 (
40
+ VALUE buffer,
41
+ char *source,
42
+ char *sourceEnd,
43
+ ConversionFlags flags);
44
+ #endif
data/install.rb CHANGED
@@ -6,16 +6,21 @@ include FileUtils::Verbose
6
6
 
7
7
  include Config
8
8
 
9
- dest = CONFIG["bindir"]
9
+ bindir = CONFIG["bindir"]
10
10
  cd 'bin' do
11
11
  filename = 'edit_json.rb'
12
- install(filename, dest)
12
+ #install(filename, bindir)
13
13
  end
14
- dest = CONFIG["sitelibdir"]
14
+ sitelibdir = CONFIG["sitelibdir"]
15
15
  cd 'lib' do
16
- install('json.rb', dest)
17
- mkdir_p File.join(dest,'json')
18
- install(File.join('json', 'editor.rb'), File.join(dest,'json'))
19
- install(File.join('json', 'json.xpm'), File.join(dest,'json'))
16
+ install('json.rb', sitelibdir)
17
+ mkdir_p File.join(sitelibdir, 'json')
18
+ for file in Dir['json/**/*.{rb,xpm}']
19
+ d = File.join(sitelibdir, file)
20
+ mkdir_p File.dirname(d)
21
+ install(file, d)
22
+ end
23
+ install(File.join('json', 'editor.rb'), File.join(sitelibdir,'json'))
24
+ install(File.join('json', 'json.xpm'), File.join(sitelibdir,'json'))
20
25
  end
21
- # vim: set et sw=2 ts=2:
26
+ warn " *** Installed PURE ruby library."
@@ -1,7 +1,33 @@
1
- # = json - JSON library for Ruby
1
+ require 'json/common'
2
+ # = json - JSON for Ruby
2
3
  #
3
4
  # == Description
4
5
  #
6
+ # This is a implementation of the JSON specification according to RFC 4627
7
+ # (http://www.ietf.org/rfc/rfc4627.txt). Starting from version 1.0.0 on there
8
+ # will be two variants available:
9
+ #
10
+ # * A pure ruby variant, that relies on the iconv and the stringscan
11
+ # extensions, which are both part of the ruby standard library.
12
+ # * The quite a bit faster C extension variant, which is in parts implemented
13
+ # in C and comes with its own unicode conversion functions and a parser
14
+ # generated by the ragel state machine compiler
15
+ # (http://www.cs.queensu.ca/~thurston/ragel).
16
+ #
17
+ # Both variants of the JSON generator escape all non-ASCII an control
18
+ # characters with \uXXXX escape sequences, and support UTF-16 surrogate pairs
19
+ # in order to be able to generate the whole range of unicode code points. This
20
+ # means that generated JSON text is encoded as UTF-8 (because ASCII is a subset
21
+ # of UTF-8) and at the same time avoids decoding problems for receiving
22
+ # endpoints, that don't expect UTF-8 encoded texts. On the negative side this
23
+ # may lead to a bit longer strings than necessarry.
24
+ #
25
+ # All strings, that are to be encoded as JSON strings, should be UTF-8 byte
26
+ # sequences on the Ruby side. To encode raw binary strings, that aren't UTF-8
27
+ # encoded, please use the to_json_raw_object method of String (which produces
28
+ # an object, that contains a byte array) and decode the result on the receiving
29
+ # endpoint.
30
+ #
5
31
  # == Author
6
32
  #
7
33
  # Florian Frank <mailto:flori@ping.de>
@@ -22,12 +48,61 @@
22
48
  #
23
49
  # * http://json.rubyforge.org
24
50
  #
51
+ # == Speed Comparisons
52
+ #
53
+ # I have created some benchmark results (see the benchmarks subdir of the
54
+ # package) for the JSON-Parser to estimate the speed up in the C extension:
55
+ #
56
+ # JSON::Pure::Parser:: 28.90 calls/second
57
+ # JSON::Ext::Parser:: 505.50 calls/second
58
+ #
59
+ # This is ca. <b>17.5</b> times the speed of the pure Ruby implementation.
60
+ #
61
+ # I have benchmarked the JSON-Generator as well. This generates a few more
62
+ # values, because there are different modes, that also influence the achieved
63
+ # speed:
64
+ #
65
+ # * JSON::Pure::Generator:
66
+ # generate:: 35.06 calls/second
67
+ # pretty_generate:: 34.00 calls/second
68
+ # fast_generate:: 41.06 calls/second
69
+ #
70
+ # * JSON::Ext::Generator:
71
+ # generate:: 492.11 calls/second
72
+ # pretty_generate:: 348.85 calls/second
73
+ # fast_generate:: 541.60 calls/second
74
+ #
75
+ # * Speedup Ext/Pure:
76
+ # generate safe:: 14.0 times
77
+ # generate pretty:: 10.3 times
78
+ # generate fast:: 13.2 times
79
+ #
80
+ # The rails framework includes a generator as well, also it seems to be rather
81
+ # slow: I measured only 23.87 calls/second which is slower than any of my pure
82
+ # generator results. Here a comparison of the different speedups with the Rails
83
+ # measurement as the divisor:
84
+ #
85
+ # * Speedup Pure/Rails:
86
+ # generate safe:: 1.5 times
87
+ # generate pretty:: 1.4 times
88
+ # generate fast:: 1.7 times
89
+ #
90
+ # * Speedup Ext/Rails:
91
+ # generate safe:: 20.6 times
92
+ # generate pretty:: 14.6 times
93
+ # generate fast:: 22.7 times
94
+ #
95
+ # To achieve the fastest JSON text output, you can use the
96
+ # fast_generate/fast_unparse methods. Beware, that this will disable the
97
+ # checking for circular Ruby data structures, which may cause JSON to go into
98
+ # an infinite loop.
99
+ #
25
100
  # == Examples
26
101
  #
27
- # To create a JSON string from a ruby data structure, you
28
- # can call JSON.unparse (or JSON.generate) like that:
102
+ # To create a JSON text from a ruby data structure, you
103
+ # can call JSON.generate (or JSON.unparse) like that:
29
104
  #
30
- # json = JSON.unparse [1, 2, {"a"=>3.141}, false, true, nil, 4..10]
105
+ # json = JSON.generate [1, 2, {"a"=>3.141}, false, true, nil, 4..10]
31
106
  # # => "[1,2,{\"a\":3.141},false,true,null,\"4..10\"]"
32
107
  #
33
108
  # It's also possible to call the #to_json method directly.
@@ -35,8 +110,12 @@
35
110
  # json = [1, 2, {"a"=>3.141}, false, true, nil, 4..10].to_json
36
111
  # # => "[1,2,{\"a\":3.141},false,true,null,\"4..10\"]"
37
112
  #
38
- # To get back a ruby data structure, you have to call
39
- # JSON.parse on the JSON string:
113
+ # To create a valid JSON text you have to make sure, that the output is
114
+ # embedded in either a JSON array [] or a JSON object {}. The easiest way to do
115
+ # this, is by putting your values in a Ruby Array or Hash instance.
116
+ #
117
+ # To get back a ruby data structure from a JSON text, you have to call
118
+ # JSON.parse on it:
40
119
  #
41
120
  # JSON.parse json
42
121
  # # => [1, 2, {"a"=>3.141}, false, true, nil, "4..10"]
@@ -46,7 +125,7 @@
46
125
  # or arbitrary classes. In this case the json library falls back to call
47
126
  # Object#to_json, which is the same as #to_s.to_json.
48
127
  #
49
- # It's possible to extend JSON to support serialization of arbitray classes by
128
+ # It's possible to extend JSON to support serialization of arbitrary classes by
50
129
  # simply implementing a more specialized version of the #to_json method, that
51
130
  # should return a JSON object (a hash converted to JSON with #to_json)
52
131
  # like this (don't forget the *a for all the arguments):
@@ -54,7 +133,7 @@
54
133
  # class Range
55
134
  # def to_json(*a)
56
135
  # {
57
- # 'json_class' => self.class.name,
136
+ # 'json_class' => self.class.name, # = 'Range'
58
137
  # 'data' => [ first, last, exclude_end? ]
59
138
  # }.to_json(*a)
60
139
  # end
@@ -78,18 +157,18 @@
78
157
  #
79
158
  # Now it possible to serialize/deserialize ranges as well:
80
159
  #
81
- # json = JSON.unparse [1, 2, {"a"=>3.141}, false, true, nil, 4..10]
160
+ # json = JSON.generate [1, 2, {"a"=>3.141}, false, true, nil, 4..10]
82
161
  # # => "[1,2,{\"a\":3.141},false,true,null,{\"json_class\":\"Range\",\"data\":[4,10,false]}]"
83
162
  # JSON.parse json
84
163
  # # => [1, 2, {"a"=>3.141}, false, true, nil, 4..10]
85
164
  #
86
- # JSON.unparse always creates the shortes possible string representation of a
165
+ # JSON.generate always creates the shortest possible string representation of a
87
166
  # ruby data structure in one line. This good for data storage or network
88
- # protocols, but not so good for humans to read. Fortunately there's
89
- # also JSON.pretty_unparse (or JSON.pretty_generate) that creates a more
167
+ # protocols, but not so good for humans to read. Fortunately there's also
168
+ # JSON.pretty_generate (or JSON.pretty_generate) that creates a more
90
169
  # readable output:
91
170
  #
92
- # puts JSON.pretty_unparse([1, 2, {"a"=>3.141}, false, true, nil, 4..10])
171
+ # puts JSON.pretty_generate([1, 2, {"a"=>3.141}, false, true, nil, 4..10])
93
172
  # [
94
173
  # 1,
95
174
  # 2,
@@ -110,609 +189,17 @@
110
189
  # ]
111
190
  #
112
191
  # There are also the methods Kernel#j for unparse, and Kernel#jj for
113
- # pretty_unparse output to the console, that work analogous to Kernel#p and
114
- # Kernel#pp.
115
- #
116
-
117
- require 'strscan'
118
-
119
- # This module is the namespace for all the JSON related classes. It also
120
- # defines some module functions to expose a nicer API to users, instead
121
- # of using the parser and other classes directly.
192
+ # pretty_unparse output to the console, that work analogous to Core Ruby's p
193
+ # and the pp library's pp methods.
194
+ #
195
+ # The script tools/server.rb contains a small example if you want to test, how
196
+ # receiving a JSON object from a webrick server in your browser with the
197
+ # javasript prototype library (http://www.prototypejs.org) works.
198
+ #
122
199
  module JSON
123
- # The base exception for JSON errors.
124
- JSONError = Class.new StandardError
125
-
126
- # This exception is raise, if a parser error occurs.
127
- ParserError = Class.new JSONError
128
-
129
- # This exception is raise, if a unparser error occurs.
130
- UnparserError = Class.new JSONError
131
-
132
- # If a circular data structure is encountered while unparsing
133
- # this exception is raised.
134
- CircularDatastructure = Class.new UnparserError
135
-
136
- class << self
137
- # Switches on Unicode support, if _enable_ is _true_. Otherwise switches
138
- # Unicode support off.
139
- def support_unicode=(enable)
140
- @support_unicode = enable
141
- end
142
-
143
- # Returns _true_ if JSON supports unicode, otherwise _false_ is returned.
144
- #
145
- # If loading of the iconv library fails, or it doesn't support utf8/utf16
146
- # encoding, this will be set to false, as a fallback.
147
- def support_unicode?
148
- !!@support_unicode
149
- end
150
- end
151
- JSON.support_unicode = true # default, however it's possible to switch off
152
- # full unicode support, if non-ascii bytes should be
153
- # just passed through.
154
-
155
200
  begin
156
- require 'iconv'
157
- # An iconv instance to convert from UTF8 to UTF16 Big Endian.
158
- UTF16toUTF8 = Iconv.new('utf-8', 'utf-16be')
159
- # An iconv instance to convert from UTF16 Big Endian to UTF8.
160
- UTF8toUTF16 = Iconv.new('utf-16be', 'utf-8'); UTF8toUTF16.iconv('no bom')
161
- rescue Errno::EINVAL, Iconv::InvalidEncoding
162
- # Iconv doesn't support big endian utf-16. Let's try to hack this manually
163
- # into the converters.
164
- begin
165
- old_verbose = $VERBOSE
166
- $VERBOSE = nil
167
- # An iconv instance to convert from UTF8 to UTF16 Big Endian.
168
- UTF16toUTF8 = Iconv.new('utf-8', 'utf-16')
169
- # An iconv instance to convert from UTF16 Big Endian to UTF8.
170
- UTF8toUTF16 = Iconv.new('utf-16', 'utf-8'); UTF8toUTF16.iconv('no bom')
171
- if UTF8toUTF16.iconv("\xe2\x82\xac") == "\xac\x20"
172
- swapper = Class.new do
173
- def initialize(iconv)
174
- @iconv = iconv
175
- end
176
-
177
- def iconv(string)
178
- result = @iconv.iconv(string)
179
- JSON.swap!(result)
180
- end
181
- end
182
- UTF8toUTF16 = swapper.new(UTF8toUTF16)
183
- end
184
- if UTF16toUTF8.iconv("\xac\x20") == "\xe2\x82\xac"
185
- swapper = Class.new do
186
- def initialize(iconv)
187
- @iconv = iconv
188
- end
189
-
190
- def iconv(string)
191
- string = JSON.swap!(string.dup)
192
- @iconv.iconv(string)
193
- end
194
- end
195
- UTF16toUTF8 = swapper.new(UTF16toUTF8)
196
- end
197
- rescue Errno::EINVAL, Iconv::InvalidEncoding
198
- # Enforce disabling of unicode support, if iconv doesn't support
199
- # UTF8/UTF16 at all.
200
- JSON.support_unicode = false
201
- ensure
202
- $VERBOSE = old_verbose
203
- end
201
+ require 'json/ext'
204
202
  rescue LoadError
205
- # Enforce disabling of unicode support, if iconv doesn't exist.
206
- JSON.support_unicode = false
207
- end
208
-
209
- # Swap consecutive bytes in string in place.
210
- def self.swap!(string)
211
- 0.upto(string.size / 2) do |i|
212
- break unless string[2 * i + 1]
213
- string[2 * i], string[2 * i + 1] = string[2 * i + 1], string[2 * i]
214
- end
215
- string
216
- end
217
-
218
- # This class implements the JSON parser that is used to parse a JSON string
219
- # into a Ruby data structure.
220
- class Parser < StringScanner
221
- STRING = /"((?:[^"\\]|\\.)*)"/
222
- INTEGER = /-?(?:0|[1-9]\d*)/
223
- FLOAT = /-?(?:0|[1-9]\d*)\.(\d+)(?i:e[+-]?\d+)?/
224
- OBJECT_OPEN = /\{/
225
- OBJECT_CLOSE = /\}/
226
- ARRAY_OPEN = /\[/
227
- ARRAY_CLOSE = /\]/
228
- PAIR_DELIMITER = /:/
229
- COLLECTION_DELIMITER = /,/
230
- TRUE = /true/
231
- FALSE = /false/
232
- NULL = /null/
233
- IGNORE = %r(
234
- (?:
235
- //[^\n\r]*[\n\r]| # line comments
236
- /\* # c-style comments
237
- (?:
238
- [^*/]| # normal chars
239
- /[^*]| # slashes that do not start a nested comment
240
- \*[^/]| # asterisks that do not end this comment
241
- /(?=\*/) # single slash before this comment's end
242
- )*
243
- \*/ # the end of this comment
244
- |[ \t\r\n]+ # whitespaces: space, horicontal tab, lf, cr
245
- )+
246
- )mx
247
-
248
- UNPARSED = Object.new
249
-
250
- # Parses the current JSON string and returns the complete data structure
251
- # as a result.
252
- def parse
253
- reset
254
- obj = nil
255
- until eos?
256
- case
257
- when scan(OBJECT_OPEN)
258
- obj and raise ParserError, "source '#{peek(20)}' not in JSON!"
259
- obj = parse_object
260
- when scan(ARRAY_OPEN)
261
- obj and raise ParserError, "source '#{peek(20)}' not in JSON!"
262
- obj = parse_array
263
- when skip(IGNORE)
264
- ;
265
- else
266
- raise ParserError, "source '#{peek(20)}' not in JSON!"
267
- end
268
- end
269
- obj or raise ParserError, "source did not contain any JSON!"
270
- obj
271
- end
272
-
273
- private
274
-
275
- def parse_string
276
- if scan(STRING)
277
- return '' if self[1].empty?
278
- self[1].gsub(%r(\\(?:[\\bfnrt"/]|u([A-Fa-f\d]{4})))) do
279
- case $~[0]
280
- when '\\"' then '"'
281
- when '\\\\' then '\\'
282
- when '\\/' then '/'
283
- when '\\b' then "\b"
284
- when '\\f' then "\f"
285
- when '\\n' then "\n"
286
- when '\\r' then "\r"
287
- when '\\t' then "\t"
288
- else
289
- if JSON.support_unicode? and $KCODE == 'UTF8'
290
- JSON.utf16_to_utf8($~[1])
291
- else
292
- # if utf8 mode is switched off or unicode not supported, try to
293
- # transform unicode \u-notation to bytes directly:
294
- $~[1].to_i(16).chr
295
- end
296
- end
297
- end
298
- else
299
- UNPARSED
300
- end
301
- end
302
-
303
- def parse_value
304
- case
305
- when scan(FLOAT)
306
- Float(self[0].sub(/\.([eE])/, '.0\1'))
307
- when scan(INTEGER)
308
- Integer(self[0])
309
- when scan(TRUE)
310
- true
311
- when scan(FALSE)
312
- false
313
- when scan(NULL)
314
- nil
315
- when (string = parse_string) != UNPARSED
316
- string
317
- when scan(ARRAY_OPEN)
318
- parse_array
319
- when scan(OBJECT_OPEN)
320
- parse_object
321
- else
322
- UNPARSED
323
- end
324
- end
325
-
326
- def parse_array
327
- result = []
328
- until eos?
329
- case
330
- when (value = parse_value) != UNPARSED
331
- result << value
332
- skip(IGNORE)
333
- unless scan(COLLECTION_DELIMITER) or match?(ARRAY_CLOSE)
334
- raise ParserError, "expected ',' or ']' in array at '#{peek(20)}'!"
335
- end
336
- when scan(ARRAY_CLOSE)
337
- break
338
- when skip(IGNORE)
339
- ;
340
- else
341
- raise ParserError, "unexpected token in array at '#{peek(20)}'!"
342
- end
343
- end
344
- result
345
- end
346
-
347
- def parse_object
348
- result = {}
349
- until eos?
350
- case
351
- when (string = parse_string) != UNPARSED
352
- skip(IGNORE)
353
- unless scan(PAIR_DELIMITER)
354
- raise ParserError, "expected ':' in object at '#{peek(20)}'!"
355
- end
356
- skip(IGNORE)
357
- unless (value = parse_value).equal? UNPARSED
358
- result[string] = value
359
- skip(IGNORE)
360
- unless scan(COLLECTION_DELIMITER) or match?(OBJECT_CLOSE)
361
- raise ParserError,
362
- "expected ',' or '}' in object at '#{peek(20)}'!"
363
- end
364
- else
365
- raise ParserError, "expected value in object at '#{peek(20)}'!"
366
- end
367
- when scan(OBJECT_CLOSE)
368
- if klassname = result['json_class']
369
- klass = klassname.sub(/^:+/, '').split(/::/).inject(Object) do |p,k|
370
- p.const_get(k) rescue nil
371
- end
372
- break unless klass and klass.json_creatable?
373
- result = klass.json_create(result)
374
- end
375
- break
376
- when skip(IGNORE)
377
- ;
378
- else
379
- raise ParserError, "unexpected token in object at '#{peek(20)}'!"
380
- end
381
- end
382
- result
383
- end
384
- end
385
-
386
- # This class is used to create State instances, that are use to hold data
387
- # while unparsing a Ruby data structure into a JSON string.
388
- class State
389
- # Creates a State object from _opts_, which ought to be Hash to create a
390
- # new State instance configured by opts, something else to create an
391
- # unconfigured instance. If _opts_ is a State object, it is just returned.
392
- def self.from_state(opts)
393
- case opts
394
- when self
395
- opts
396
- when Hash
397
- new(opts)
398
- else
399
- new
400
- end
401
- end
402
-
403
- # Instantiates a new State object, configured by _opts_.
404
- def initialize(opts = {})
405
- @indent = opts[:indent] || ''
406
- @space = opts[:space] || ''
407
- @object_nl = opts[:object_nl] || ''
408
- @array_nl = opts[:array_nl] || ''
409
- @seen = {}
410
- end
411
-
412
- # This string is used to indent levels in the JSON string.
413
- attr_accessor :indent
414
-
415
- # This string is used to include a space between the tokens in a JSON
416
- # string.
417
- attr_accessor :space
418
-
419
- # This string is put at the end of a line that holds a JSON object (or
420
- # Hash).
421
- attr_accessor :object_nl
422
-
423
- # This string is put at the end of a line that holds a JSON array.
424
- attr_accessor :array_nl
425
-
426
- # Returns _true_, if _object_ was already seen during this Unparsing run.
427
- def seen?(object)
428
- @seen.key?(object.__id__)
429
- end
430
-
431
- # Remember _object_, to find out if it was already encountered (to find out
432
- # if a cyclic data structure is unparsed).
433
- def remember(object)
434
- @seen[object.__id__] = true
435
- end
436
-
437
- # Forget _object_ for this Unparsing run.
438
- def forget(object)
439
- @seen.delete object.__id__
440
- end
441
- end
442
-
443
- module_function
444
-
445
- # Convert _string_ from UTF8 encoding to UTF16 (big endian) encoding and
446
- # return it.
447
- def utf8_to_utf16(string)
448
- JSON::UTF8toUTF16.iconv(string).unpack('H*')[0]
449
- end
450
-
451
- # Convert _string_ from UTF16 (big endian) encoding to UTF8 encoding and
452
- # return it.
453
- def utf16_to_utf8(string)
454
- bytes = '' << string[0, 2].to_i(16) << string[2, 2].to_i(16)
455
- JSON::UTF16toUTF8.iconv(bytes)
456
- end
457
-
458
- # Convert a UTF8 encoded Ruby string _string_ to a JSON string, encoded with
459
- # UTF16 big endian characters as \u????, and return it.
460
- def utf8_to_json(string)
461
- i, n, result = 0, string.size, ''
462
- while i < n
463
- char = string[i]
464
- case
465
- when char == ?\b then result << '\b'
466
- when char == ?\t then result << '\t'
467
- when char == ?\n then result << '\n'
468
- when char == ?\f then result << '\f'
469
- when char == ?\r then result << '\r'
470
- when char == ?" then result << '\"'
471
- when char == ?\\ then result << '\\\\'
472
- when char == ?/ then result << '\/'
473
- when char.between?(0x0, 0x1f) then result << "\\u%04x" % char
474
- when char.between?(0x20, 0x7f) then result << char
475
- when !(JSON.support_unicode? && $KCODE == 'UTF8')
476
- # if utf8 mode is switched off or unicode not supported, just pass
477
- # bytes through:
478
- result << char
479
- when char & 0xe0 == 0xc0
480
- result << '\u' << utf8_to_utf16(string[i, 2])
481
- i += 1
482
- when char & 0xf0 == 0xe0
483
- result << '\u' << utf8_to_utf16(string[i, 3])
484
- i += 2
485
- when char & 0xf8 == 0xf0
486
- result << '\u' << utf8_to_utf16(string[i, 4])
487
- i += 3
488
- when char & 0xfc == 0xf8
489
- result << '\u' << utf8_to_utf16(string[i, 5])
490
- i += 4
491
- when char & 0xfe == 0xfc
492
- result << '\u' << utf8_to_utf16(string[i, 6])
493
- i += 5
494
- else
495
- raise JSON::UnparserError, "Encountered unknown UTF-8 byte: %x!" % char
496
- end
497
- i += 1
498
- end
499
- result
500
- end
501
-
502
- # Parse the JSON string _source_ into a Ruby data structure and return it.
503
- def parse(source)
504
- Parser.new(source).parse
505
- end
506
-
507
- # Unparse the Ruby data structure _obj_ into a single line JSON string and
508
- # return it. _state_ is a JSON::State object, that can be used to configure
509
- # the output further.
510
- def unparse(obj, state = nil)
511
- obj.to_json(JSON::State.from_state(state))
512
- end
513
-
514
- alias generate unparse
515
-
516
- # Unparse the Ruby data structure _obj_ into a JSON string and return it.
517
- # The returned string is a prettier form of the string returned by #unparse.
518
- def pretty_unparse(obj)
519
- state = JSON::State.new(
520
- :indent => ' ',
521
- :space => ' ',
522
- :object_nl => "\n",
523
- :array_nl => "\n"
524
- )
525
- obj.to_json(state)
526
- end
527
-
528
- alias pretty_generate pretty_unparse
529
- end
530
-
531
- class Object
532
- # Converts this object to a string (calling #to_s), converts
533
- # it to a JSON string, and returns the result. This is a fallback, if no
534
- # special method #to_json was defined for some object.
535
- # _state_ is a JSON::State object, that can also be used
536
- # to configure the produced JSON string output further.
537
-
538
- def to_json(*) to_s.to_json end
539
- end
540
-
541
- class Hash
542
- # Returns a JSON string containing a JSON object, that is unparsed from
543
- # this Hash instance.
544
- # _state_ is a JSON::State object, that can also be used to configure the
545
- # produced JSON string output further.
546
- # _depth_ is used to find out nesting depth, to indent accordingly.
547
- def to_json(state = nil, depth = 0)
548
- state = JSON::State.from_state(state)
549
- json_check_circular(state) { json_transform(state, depth) }
550
- end
551
-
552
- private
553
-
554
- def json_check_circular(state)
555
- if state
556
- state.seen?(self) and raise JSON::CircularDatastructure,
557
- "circular data structures not supported!"
558
- state.remember self
559
- end
560
- yield
561
- ensure
562
- state and state.forget self
563
- end
564
-
565
- def json_shift(state, depth)
566
- state and not state.object_nl.empty? or return ''
567
- state.indent * depth
568
- end
569
-
570
- def json_transform(state, depth)
571
- delim = ','
572
- delim << state.object_nl if state
573
- result = '{'
574
- result << state.object_nl if state
575
- result << map { |key,value|
576
- json_shift(state, depth + 1) <<
577
- key.to_s.to_json(state, depth + 1) <<
578
- ':' << state.space << value.to_json(state, depth + 1)
579
- }.join(delim)
580
- result << state.object_nl if state
581
- result << json_shift(state, depth)
582
- result << '}'
583
- result
584
- end
585
- end
586
-
587
- class Array
588
- # Returns a JSON string containing a JSON array, that is unparsed from
589
- # this Array instance.
590
- # _state_ is a JSON::State object, that can also be used to configure the
591
- # produced JSON string output further.
592
- # _depth_ is used to find out nesting depth, to indent accordingly.
593
- def to_json(state = nil, depth = 0)
594
- state = JSON::State.from_state(state)
595
- json_check_circular(state) { json_transform(state, depth) }
596
- end
597
-
598
- private
599
-
600
- def json_check_circular(state)
601
- if state
602
- state.seen?(self) and raise JSON::CircularDatastructure,
603
- "circular data structures not supported!"
604
- state.remember self
605
- end
606
- yield
607
- ensure
608
- state and state.forget self
609
- end
610
-
611
- def json_shift(state, depth)
612
- state and not state.array_nl.empty? or return ''
613
- state.indent * depth
614
- end
615
-
616
- def json_transform(state, depth)
617
- delim = ','
618
- delim << state.array_nl if state
619
- result = '['
620
- result << state.array_nl if state
621
- result << map { |value|
622
- json_shift(state, depth + 1) << value.to_json(state, depth + 1)
623
- }.join(delim)
624
- result << state.array_nl if state
625
- result << json_shift(state, depth)
626
- result << ']'
627
- result
628
- end
629
- end
630
-
631
- class Integer
632
- # Returns a JSON string representation for this Integer number.
633
- def to_json(*) to_s end
634
- end
635
-
636
- class Float
637
- # Returns a JSON string representation for this Float number.
638
- def to_json(*) to_s end
639
- end
640
-
641
- class String
642
- # This string should be encoded with UTF-8 (if JSON unicode support is
643
- # enabled). A call to this method returns a JSON string
644
- # encoded with UTF16 big endian characters as \u????. If
645
- # JSON.support_unicode? is false only control characters are encoded this
646
- # way, all 8-bit bytes are just passed through.
647
- def to_json(*)
648
- '"' << JSON::utf8_to_json(self) << '"'
649
- end
650
-
651
- # Raw Strings are JSON Objects (the raw bytes are stored in an array for the
652
- # key "raw"). The Ruby String can be created by this class method.
653
- def self.json_create(o)
654
- o['raw'].pack('C*')
655
- end
656
-
657
- # This method creates a raw object, that can be nested into other data
658
- # structures and will be unparsed as a raw string.
659
- def to_json_raw_object
660
- {
661
- 'json_class' => self.class.name,
662
- 'raw' => self.unpack('C*'),
663
- }
664
- end
665
-
666
- # This method should be used, if you want to convert raw strings to JSON
667
- # instead of UTF-8 strings, e. g. binary data (and JSON Unicode support is
668
- # enabled).
669
- def to_json_raw(*args)
670
- to_json_raw_object.to_json(*args)
671
- end
672
- end
673
-
674
- class TrueClass
675
- # Returns a JSON string for true: 'true'.
676
- def to_json(*) to_s end
677
- end
678
-
679
- class FalseClass
680
- # Returns a JSON string for false: 'false'.
681
- def to_json(*) to_s end
682
- end
683
-
684
- class NilClass
685
- # Returns a JSON string for nil: 'null'.
686
- def to_json(*) 'null' end
687
- end
688
-
689
- module Kernel
690
- # Outputs _objs_ to STDOUT as JSON strings in the shortest form, that is in
691
- # one line.
692
- def j(*objs)
693
- objs.each do |obj|
694
- puts JSON::generate(obj)
695
- end
696
- nil
697
- end
698
-
699
- # Ouputs _objs_ to STDOUT as JSON strings in a pretty format, with
700
- # indentation and over many lines.
701
- def jj(*objs)
702
- objs.each do |obj|
703
- puts JSON::pretty_generate(obj)
704
- end
705
- nil
706
- end
707
- end
708
-
709
- class Class
710
- # Returns true, if this class can be used to create an instance
711
- # from a serialised JSON string. The class has to implement a class
712
- # method _json_create_ that expects a hash as first parameter, which includes
713
- # the required data.
714
- def json_creatable?
715
- respond_to?(:json_create)
203
+ require 'json/pure'
716
204
  end
717
205
  end
718
- # vim: set et sw=2 ts=2: