yajl-ruby 1.0.0-x86-mingw32

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of yajl-ruby might be problematic. Click here for more details.

Files changed (152) hide show
  1. data/.gitignore +12 -0
  2. data/.rspec +2 -0
  3. data/CHANGELOG.md +327 -0
  4. data/Gemfile +3 -0
  5. data/MIT-LICENSE +20 -0
  6. data/README.md +362 -0
  7. data/Rakefile +2 -0
  8. data/benchmark/encode.rb +72 -0
  9. data/benchmark/encode_json_and_marshal.rb +42 -0
  10. data/benchmark/encode_json_and_yaml.rb +53 -0
  11. data/benchmark/http.rb +32 -0
  12. data/benchmark/parse.rb +94 -0
  13. data/benchmark/parse_json_and_marshal.rb +50 -0
  14. data/benchmark/parse_json_and_yaml.rb +55 -0
  15. data/benchmark/parse_stream.rb +54 -0
  16. data/benchmark/subjects/item.json +1 -0
  17. data/benchmark/subjects/ohai.json +1216 -0
  18. data/benchmark/subjects/ohai.marshal_dump +0 -0
  19. data/benchmark/subjects/ohai.yml +975 -0
  20. data/benchmark/subjects/twitter_search.json +1 -0
  21. data/benchmark/subjects/twitter_stream.json +430 -0
  22. data/benchmark/subjects/unicode.json +1 -0
  23. data/examples/encoding/chunked_encoding.rb +27 -0
  24. data/examples/encoding/one_shot.rb +13 -0
  25. data/examples/encoding/to_an_io.rb +12 -0
  26. data/examples/http/twitter_search_api.rb +12 -0
  27. data/examples/http/twitter_stream_api.rb +26 -0
  28. data/examples/parsing/from_file.rb +14 -0
  29. data/examples/parsing/from_stdin.rb +9 -0
  30. data/examples/parsing/from_string.rb +13 -0
  31. data/ext/yajl/api/yajl_common.h +89 -0
  32. data/ext/yajl/api/yajl_gen.h +161 -0
  33. data/ext/yajl/api/yajl_parse.h +196 -0
  34. data/ext/yajl/api/yajl_version.h +23 -0
  35. data/ext/yajl/extconf.rb +7 -0
  36. data/ext/yajl/yajl.c +164 -0
  37. data/ext/yajl/yajl_alloc.c +65 -0
  38. data/ext/yajl/yajl_alloc.h +50 -0
  39. data/ext/yajl/yajl_buf.c +119 -0
  40. data/ext/yajl/yajl_buf.h +73 -0
  41. data/ext/yajl/yajl_bytestack.h +85 -0
  42. data/ext/yajl/yajl_encode.c +201 -0
  43. data/ext/yajl/yajl_encode.h +52 -0
  44. data/ext/yajl/yajl_ext.c +905 -0
  45. data/ext/yajl/yajl_ext.h +135 -0
  46. data/ext/yajl/yajl_gen.c +344 -0
  47. data/ext/yajl/yajl_lex.c +748 -0
  48. data/ext/yajl/yajl_lex.h +135 -0
  49. data/ext/yajl/yajl_parser.c +450 -0
  50. data/ext/yajl/yajl_parser.h +82 -0
  51. data/ext/yajl/yajl_version.c +7 -0
  52. data/lib/yajl.rb +75 -0
  53. data/lib/yajl/1.8/yajl.so +0 -0
  54. data/lib/yajl/1.9/yajl.so +0 -0
  55. data/lib/yajl/bzip2.rb +11 -0
  56. data/lib/yajl/bzip2/stream_reader.rb +31 -0
  57. data/lib/yajl/bzip2/stream_writer.rb +14 -0
  58. data/lib/yajl/deflate.rb +6 -0
  59. data/lib/yajl/deflate/stream_reader.rb +43 -0
  60. data/lib/yajl/deflate/stream_writer.rb +20 -0
  61. data/lib/yajl/gzip.rb +6 -0
  62. data/lib/yajl/gzip/stream_reader.rb +30 -0
  63. data/lib/yajl/gzip/stream_writer.rb +13 -0
  64. data/lib/yajl/http_stream.rb +212 -0
  65. data/lib/yajl/json_gem.rb +15 -0
  66. data/lib/yajl/json_gem/encoding.rb +51 -0
  67. data/lib/yajl/json_gem/parsing.rb +26 -0
  68. data/lib/yajl/version.rb +3 -0
  69. data/lib/yajl/yajl.rb +2 -0
  70. data/spec/encoding/encoding_spec.rb +271 -0
  71. data/spec/global/global_spec.rb +54 -0
  72. data/spec/http/fixtures/http.bzip2.dump +0 -0
  73. data/spec/http/fixtures/http.chunked.dump +11 -0
  74. data/spec/http/fixtures/http.deflate.dump +0 -0
  75. data/spec/http/fixtures/http.error.dump +12 -0
  76. data/spec/http/fixtures/http.gzip.dump +0 -0
  77. data/spec/http/fixtures/http.html.dump +1220 -0
  78. data/spec/http/fixtures/http.raw.dump +1226 -0
  79. data/spec/http/http_delete_spec.rb +98 -0
  80. data/spec/http/http_error_spec.rb +32 -0
  81. data/spec/http/http_get_spec.rb +109 -0
  82. data/spec/http/http_post_spec.rb +123 -0
  83. data/spec/http/http_put_spec.rb +105 -0
  84. data/spec/http/http_stream_options_spec.rb +27 -0
  85. data/spec/json_gem_compatibility/compatibility_spec.rb +203 -0
  86. data/spec/parsing/active_support_spec.rb +64 -0
  87. data/spec/parsing/chunked_spec.rb +96 -0
  88. data/spec/parsing/fixtures/fail.15.json +1 -0
  89. data/spec/parsing/fixtures/fail.16.json +1 -0
  90. data/spec/parsing/fixtures/fail.17.json +1 -0
  91. data/spec/parsing/fixtures/fail.26.json +1 -0
  92. data/spec/parsing/fixtures/fail11.json +1 -0
  93. data/spec/parsing/fixtures/fail12.json +1 -0
  94. data/spec/parsing/fixtures/fail13.json +1 -0
  95. data/spec/parsing/fixtures/fail14.json +1 -0
  96. data/spec/parsing/fixtures/fail19.json +1 -0
  97. data/spec/parsing/fixtures/fail20.json +1 -0
  98. data/spec/parsing/fixtures/fail21.json +1 -0
  99. data/spec/parsing/fixtures/fail22.json +1 -0
  100. data/spec/parsing/fixtures/fail23.json +1 -0
  101. data/spec/parsing/fixtures/fail24.json +1 -0
  102. data/spec/parsing/fixtures/fail25.json +1 -0
  103. data/spec/parsing/fixtures/fail27.json +2 -0
  104. data/spec/parsing/fixtures/fail28.json +2 -0
  105. data/spec/parsing/fixtures/fail3.json +1 -0
  106. data/spec/parsing/fixtures/fail4.json +1 -0
  107. data/spec/parsing/fixtures/fail5.json +1 -0
  108. data/spec/parsing/fixtures/fail6.json +1 -0
  109. data/spec/parsing/fixtures/fail9.json +1 -0
  110. data/spec/parsing/fixtures/pass.array.json +6 -0
  111. data/spec/parsing/fixtures/pass.codepoints_from_unicode_org.json +1 -0
  112. data/spec/parsing/fixtures/pass.contacts.json +1 -0
  113. data/spec/parsing/fixtures/pass.db100.xml.json +1 -0
  114. data/spec/parsing/fixtures/pass.db1000.xml.json +1 -0
  115. data/spec/parsing/fixtures/pass.dc_simple_with_comments.json +11 -0
  116. data/spec/parsing/fixtures/pass.deep_arrays.json +1 -0
  117. data/spec/parsing/fixtures/pass.difficult_json_c_test_case.json +1 -0
  118. data/spec/parsing/fixtures/pass.difficult_json_c_test_case_with_comments.json +1 -0
  119. data/spec/parsing/fixtures/pass.doubles.json +1 -0
  120. data/spec/parsing/fixtures/pass.empty_array.json +1 -0
  121. data/spec/parsing/fixtures/pass.empty_string.json +1 -0
  122. data/spec/parsing/fixtures/pass.escaped_bulgarian.json +4 -0
  123. data/spec/parsing/fixtures/pass.escaped_foobar.json +1 -0
  124. data/spec/parsing/fixtures/pass.item.json +1 -0
  125. data/spec/parsing/fixtures/pass.json-org-sample1.json +23 -0
  126. data/spec/parsing/fixtures/pass.json-org-sample2.json +11 -0
  127. data/spec/parsing/fixtures/pass.json-org-sample3.json +26 -0
  128. data/spec/parsing/fixtures/pass.json-org-sample4-nows.json +88 -0
  129. data/spec/parsing/fixtures/pass.json-org-sample4.json +89 -0
  130. data/spec/parsing/fixtures/pass.json-org-sample5.json +27 -0
  131. data/spec/parsing/fixtures/pass.map-spain.xml.json +1 -0
  132. data/spec/parsing/fixtures/pass.ns-invoice100.xml.json +1 -0
  133. data/spec/parsing/fixtures/pass.ns-soap.xml.json +1 -0
  134. data/spec/parsing/fixtures/pass.numbers-fp-4k.json +6 -0
  135. data/spec/parsing/fixtures/pass.numbers-fp-64k.json +61 -0
  136. data/spec/parsing/fixtures/pass.numbers-int-4k.json +11 -0
  137. data/spec/parsing/fixtures/pass.numbers-int-64k.json +154 -0
  138. data/spec/parsing/fixtures/pass.twitter-search.json +1 -0
  139. data/spec/parsing/fixtures/pass.twitter-search2.json +1 -0
  140. data/spec/parsing/fixtures/pass.unicode.json +3315 -0
  141. data/spec/parsing/fixtures/pass.yelp.json +1 -0
  142. data/spec/parsing/fixtures/pass1.json +56 -0
  143. data/spec/parsing/fixtures/pass2.json +1 -0
  144. data/spec/parsing/fixtures/pass3.json +6 -0
  145. data/spec/parsing/fixtures_spec.rb +40 -0
  146. data/spec/parsing/one_off_spec.rb +85 -0
  147. data/spec/rcov.opts +3 -0
  148. data/spec/spec_helper.rb +16 -0
  149. data/tasks/compile.rake +35 -0
  150. data/tasks/rspec.rake +16 -0
  151. data/yajl-ruby.gemspec +24 -0
  152. metadata +335 -0
@@ -0,0 +1,748 @@
1
+ /*
2
+ * Copyright 2010, Lloyd Hilaiel.
3
+ *
4
+ * Redistribution and use in source and binary forms, with or without
5
+ * modification, are permitted provided that the following conditions are
6
+ * met:
7
+ *
8
+ * 1. Redistributions of source code must retain the above copyright
9
+ * notice, this list of conditions and the following disclaimer.
10
+ *
11
+ * 2. Redistributions in binary form must reproduce the above copyright
12
+ * notice, this list of conditions and the following disclaimer in
13
+ * the documentation and/or other materials provided with the
14
+ * distribution.
15
+ *
16
+ * 3. Neither the name of Lloyd Hilaiel nor the names of its
17
+ * contributors may be used to endorse or promote products derived
18
+ * from this software without specific prior written permission.
19
+ *
20
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
22
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23
+ * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
24
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
25
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
28
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
29
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30
+ * POSSIBILITY OF SUCH DAMAGE.
31
+ */
32
+
33
+ #include "yajl_lex.h"
34
+ #include "yajl_buf.h"
35
+
36
+ #include <stdlib.h>
37
+ #include <stdio.h>
38
+ #include <assert.h>
39
+ #include <string.h>
40
+
41
+ #ifdef YAJL_LEXER_DEBUG
42
+ static const char *
43
+ tokToStr(yajl_tok tok)
44
+ {
45
+ switch (tok) {
46
+ case yajl_tok_bool: return "bool";
47
+ case yajl_tok_colon: return "colon";
48
+ case yajl_tok_comma: return "comma";
49
+ case yajl_tok_eof: return "eof";
50
+ case yajl_tok_error: return "error";
51
+ case yajl_tok_left_brace: return "brace";
52
+ case yajl_tok_left_bracket: return "bracket";
53
+ case yajl_tok_null: return "null";
54
+ case yajl_tok_integer: return "integer";
55
+ case yajl_tok_double: return "double";
56
+ case yajl_tok_right_brace: return "brace";
57
+ case yajl_tok_right_bracket: return "bracket";
58
+ case yajl_tok_string: return "string";
59
+ case yajl_tok_string_with_escapes: return "string_with_escapes";
60
+ }
61
+ return "unknown";
62
+ }
63
+ #endif
64
+
65
+ /* Impact of the stream parsing feature on the lexer:
66
+ *
67
+ * YAJL support stream parsing. That is, the ability to parse the first
68
+ * bits of a chunk of JSON before the last bits are available (still on
69
+ * the network or disk). This makes the lexer more complex. The
70
+ * responsibility of the lexer is to handle transparently the case where
71
+ * a chunk boundary falls in the middle of a token. This is
72
+ * accomplished is via a buffer and a character reading abstraction.
73
+ *
74
+ * Overview of implementation
75
+ *
76
+ * When we lex to end of input string before end of token is hit, we
77
+ * copy all of the input text composing the token into our lexBuf.
78
+ *
79
+ * Every time we read a character, we do so through the readChar function.
80
+ * readChar's responsibility is to handle pulling all chars from the buffer
81
+ * before pulling chars from input text
82
+ */
83
+
84
+ struct yajl_lexer_t {
85
+ /* the overal line and char offset into the data */
86
+ unsigned int lineOff;
87
+ unsigned int charOff;
88
+
89
+ /* error */
90
+ yajl_lex_error error;
91
+
92
+ /* a input buffer to handle the case where a token is spread over
93
+ * multiple chunks */
94
+ yajl_buf buf;
95
+
96
+ /* in the case where we have data in the lexBuf, bufOff holds
97
+ * the current offset into the lexBuf. */
98
+ unsigned int bufOff;
99
+
100
+ /* are we using the lex buf? */
101
+ unsigned int bufInUse;
102
+
103
+ /* shall we allow comments? */
104
+ unsigned int allowComments;
105
+
106
+ /* shall we validate utf8 inside strings? */
107
+ unsigned int validateUTF8;
108
+
109
+ yajl_alloc_funcs * alloc;
110
+ };
111
+
112
+ #define readChar(lxr, txt, off) \
113
+ (((lxr)->bufInUse && yajl_buf_len((lxr)->buf) && lxr->bufOff < yajl_buf_len((lxr)->buf)) ? \
114
+ (*((const unsigned char *) yajl_buf_data((lxr)->buf) + ((lxr)->bufOff)++)) : \
115
+ ((txt)[(*(off))++]))
116
+
117
+ #define unreadChar(lxr, off) ((*(off) > 0) ? (*(off))-- : ((lxr)->bufOff--))
118
+
119
+ yajl_lexer
120
+ yajl_lex_alloc(yajl_alloc_funcs * alloc,
121
+ unsigned int allowComments, unsigned int validateUTF8)
122
+ {
123
+ yajl_lexer lxr = (yajl_lexer) YA_MALLOC(alloc, sizeof(struct yajl_lexer_t));
124
+ memset((void *) lxr, 0, sizeof(struct yajl_lexer_t));
125
+ lxr->buf = yajl_buf_alloc(alloc);
126
+ lxr->allowComments = allowComments;
127
+ lxr->validateUTF8 = validateUTF8;
128
+ lxr->alloc = alloc;
129
+ return lxr;
130
+ }
131
+
132
+ yajl_lexer
133
+ yajl_lex_realloc(yajl_lexer orig) {
134
+ orig->lineOff = 0;
135
+ orig->charOff = 0;
136
+ orig->error = yajl_lex_e_ok;
137
+ yajl_buf_clear(orig->buf);
138
+ orig->bufOff = 0;
139
+ orig->bufInUse = 0;
140
+ return orig;
141
+ }
142
+
143
+ void
144
+ yajl_lex_free(yajl_lexer lxr)
145
+ {
146
+ yajl_buf_free(lxr->buf);
147
+ YA_FREE(lxr->alloc, lxr);
148
+ return;
149
+ }
150
+
151
+ /* a lookup table which lets us quickly determine three things:
152
+ * VEC - valid escaped conrol char
153
+ * IJC - invalid json char
154
+ * VHC - valid hex char
155
+ * note. the solidus '/' may be escaped or not.
156
+ * note. the
157
+ */
158
+ #define VEC 1
159
+ #define IJC 2
160
+ #define VHC 4
161
+ static const char charLookupTable[256] =
162
+ {
163
+ /*00*/ IJC , IJC , IJC , IJC , IJC , IJC , IJC , IJC ,
164
+ /*08*/ IJC , IJC , IJC , IJC , IJC , IJC , IJC , IJC ,
165
+ /*10*/ IJC , IJC , IJC , IJC , IJC , IJC , IJC , IJC ,
166
+ /*18*/ IJC , IJC , IJC , IJC , IJC , IJC , IJC , IJC ,
167
+
168
+ /*20*/ 0 , 0 , VEC|IJC, 0 , 0 , 0 , 0 , 0 ,
169
+ /*28*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , VEC ,
170
+ /*30*/ VHC , VHC , VHC , VHC , VHC , VHC , VHC , VHC ,
171
+ /*38*/ VHC , VHC , 0 , 0 , 0 , 0 , 0 , 0 ,
172
+
173
+ /*40*/ 0 , VHC , VHC , VHC , VHC , VHC , VHC , 0 ,
174
+ /*48*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
175
+ /*50*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
176
+ /*58*/ 0 , 0 , 0 , 0 , VEC|IJC, 0 , 0 , 0 ,
177
+
178
+ /*60*/ 0 , VHC , VEC|VHC, VHC , VHC , VHC , VEC|VHC, 0 ,
179
+ /*68*/ 0 , 0 , 0 , 0 , 0 , 0 , VEC , 0 ,
180
+ /*70*/ 0 , 0 , VEC , 0 , VEC , 0 , 0 , 0 ,
181
+ /*78*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
182
+
183
+ /* include these so we don't have to always check the range of the char */
184
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
185
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
186
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
187
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
188
+
189
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
190
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
191
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
192
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
193
+
194
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
195
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
196
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
197
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
198
+
199
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
200
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
201
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
202
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0
203
+ };
204
+
205
+ /** process a variable length utf8 encoded codepoint.
206
+ *
207
+ * returns:
208
+ * yajl_tok_string - if valid utf8 char was parsed and offset was
209
+ * advanced
210
+ * yajl_tok_eof - if end of input was hit before validation could
211
+ * complete
212
+ * yajl_tok_error - if invalid utf8 was encountered
213
+ *
214
+ * NOTE: on error the offset will point to the first char of the
215
+ * invalid utf8 */
216
+ #define UTF8_CHECK_EOF if (*offset >= jsonTextLen) { return yajl_tok_eof; }
217
+
218
+ static yajl_tok
219
+ yajl_lex_utf8_char(yajl_lexer lexer, const unsigned char * jsonText,
220
+ unsigned int jsonTextLen, unsigned int * offset,
221
+ unsigned char curChar)
222
+ {
223
+ if (curChar <= 0x7f) {
224
+ /* single byte */
225
+ return yajl_tok_string;
226
+ } else if ((curChar >> 5) == 0x6) {
227
+ /* two byte */
228
+ UTF8_CHECK_EOF;
229
+ curChar = readChar(lexer, jsonText, offset);
230
+ if ((curChar >> 6) == 0x2) return yajl_tok_string;
231
+ } else if ((curChar >> 4) == 0x0e) {
232
+ /* three byte */
233
+ UTF8_CHECK_EOF;
234
+ curChar = readChar(lexer, jsonText, offset);
235
+ if ((curChar >> 6) == 0x2) {
236
+ UTF8_CHECK_EOF;
237
+ curChar = readChar(lexer, jsonText, offset);
238
+ if ((curChar >> 6) == 0x2) return yajl_tok_string;
239
+ }
240
+ } else if ((curChar >> 3) == 0x1e) {
241
+ /* four byte */
242
+ UTF8_CHECK_EOF;
243
+ curChar = readChar(lexer, jsonText, offset);
244
+ if ((curChar >> 6) == 0x2) {
245
+ UTF8_CHECK_EOF;
246
+ curChar = readChar(lexer, jsonText, offset);
247
+ if ((curChar >> 6) == 0x2) {
248
+ UTF8_CHECK_EOF;
249
+ curChar = readChar(lexer, jsonText, offset);
250
+ if ((curChar >> 6) == 0x2) return yajl_tok_string;
251
+ }
252
+ }
253
+ }
254
+
255
+ return yajl_tok_error;
256
+ }
257
+
258
+ /* lex a string. input is the lexer, pointer to beginning of
259
+ * json text, and start of string (offset).
260
+ * a token is returned which has the following meanings:
261
+ * yajl_tok_string: lex of string was successful. offset points to
262
+ * terminating '"'.
263
+ * yajl_tok_eof: end of text was encountered before we could complete
264
+ * the lex.
265
+ * yajl_tok_error: embedded in the string were unallowable chars. offset
266
+ * points to the offending char
267
+ */
268
+ #define STR_CHECK_EOF \
269
+ if (*offset >= jsonTextLen) { \
270
+ tok = yajl_tok_eof; \
271
+ goto finish_string_lex; \
272
+ }
273
+
274
+ static yajl_tok
275
+ yajl_lex_string(yajl_lexer lexer, const unsigned char * jsonText,
276
+ unsigned int jsonTextLen, unsigned int * offset)
277
+ {
278
+ yajl_tok tok = yajl_tok_error;
279
+ int hasEscapes = 0;
280
+
281
+ for (;;) {
282
+ unsigned char curChar;
283
+
284
+ STR_CHECK_EOF;
285
+
286
+ curChar = readChar(lexer, jsonText, offset);
287
+
288
+ /* quote terminates */
289
+ if (curChar == '"') {
290
+ tok = yajl_tok_string;
291
+ break;
292
+ }
293
+ /* backslash escapes a set of control chars, */
294
+ else if (curChar == '\\') {
295
+ hasEscapes = 1;
296
+ STR_CHECK_EOF;
297
+
298
+ /* special case \u */
299
+ curChar = readChar(lexer, jsonText, offset);
300
+ if (curChar == 'u') {
301
+ unsigned int i = 0;
302
+
303
+ for (i=0;i<4;i++) {
304
+ STR_CHECK_EOF;
305
+ curChar = readChar(lexer, jsonText, offset);
306
+ if (!(charLookupTable[curChar] & VHC)) {
307
+ /* back up to offending char */
308
+ unreadChar(lexer, offset);
309
+ lexer->error = yajl_lex_string_invalid_hex_char;
310
+ goto finish_string_lex;
311
+ }
312
+ }
313
+ } else if (!(charLookupTable[curChar] & VEC)) {
314
+ /* back up to offending char */
315
+ unreadChar(lexer, offset);
316
+ lexer->error = yajl_lex_string_invalid_escaped_char;
317
+ goto finish_string_lex;
318
+ }
319
+ }
320
+ /* when not validating UTF8 it's a simple table lookup to determine
321
+ * if the present character is invalid */
322
+ else if(charLookupTable[curChar] & IJC) {
323
+ /* back up to offending char */
324
+ unreadChar(lexer, offset);
325
+ lexer->error = yajl_lex_string_invalid_json_char;
326
+ goto finish_string_lex;
327
+ }
328
+ /* when in validate UTF8 mode we need to do some extra work */
329
+ else if (lexer->validateUTF8) {
330
+ yajl_tok t = yajl_lex_utf8_char(lexer, jsonText, jsonTextLen,
331
+ offset, curChar);
332
+
333
+ if (t == yajl_tok_eof) {
334
+ tok = yajl_tok_eof;
335
+ goto finish_string_lex;
336
+ } else if (t == yajl_tok_error) {
337
+ lexer->error = yajl_lex_string_invalid_utf8;
338
+ goto finish_string_lex;
339
+ }
340
+ }
341
+ /* accept it, and move on */
342
+ }
343
+ finish_string_lex:
344
+ /* tell our buddy, the parser, wether he needs to process this string
345
+ * again */
346
+ if (hasEscapes && tok == yajl_tok_string) {
347
+ tok = yajl_tok_string_with_escapes;
348
+ }
349
+
350
+ return tok;
351
+ }
352
+
353
+ #define RETURN_IF_EOF if (*offset >= jsonTextLen) return yajl_tok_eof;
354
+
355
+ static yajl_tok
356
+ yajl_lex_number(yajl_lexer lexer, const unsigned char * jsonText,
357
+ unsigned int jsonTextLen, unsigned int * offset)
358
+ {
359
+ /** XXX: numbers are the only entities in json that we must lex
360
+ * _beyond_ in order to know that they are complete. There
361
+ * is an ambiguous case for integers at EOF. */
362
+
363
+ unsigned char c;
364
+
365
+ yajl_tok tok = yajl_tok_integer;
366
+
367
+ RETURN_IF_EOF;
368
+ c = readChar(lexer, jsonText, offset);
369
+
370
+ /* optional leading minus */
371
+ if (c == '-') {
372
+ RETURN_IF_EOF;
373
+ c = readChar(lexer, jsonText, offset);
374
+ }
375
+
376
+ /* a single zero, or a series of integers */
377
+ if (c == '0') {
378
+ RETURN_IF_EOF;
379
+ c = readChar(lexer, jsonText, offset);
380
+ } else if (c >= '1' && c <= '9') {
381
+ do {
382
+ RETURN_IF_EOF;
383
+ c = readChar(lexer, jsonText, offset);
384
+ } while (c >= '0' && c <= '9');
385
+ } else {
386
+ unreadChar(lexer, offset);
387
+ lexer->error = yajl_lex_missing_integer_after_minus;
388
+ return yajl_tok_error;
389
+ }
390
+
391
+ /* optional fraction (indicates this is floating point) */
392
+ if (c == '.') {
393
+ int numRd = 0;
394
+
395
+ RETURN_IF_EOF;
396
+ c = readChar(lexer, jsonText, offset);
397
+
398
+ while (c >= '0' && c <= '9') {
399
+ numRd++;
400
+ RETURN_IF_EOF;
401
+ c = readChar(lexer, jsonText, offset);
402
+ }
403
+
404
+ if (!numRd) {
405
+ unreadChar(lexer, offset);
406
+ lexer->error = yajl_lex_missing_integer_after_decimal;
407
+ return yajl_tok_error;
408
+ }
409
+ tok = yajl_tok_double;
410
+ }
411
+
412
+ /* optional exponent (indicates this is floating point) */
413
+ if (c == 'e' || c == 'E') {
414
+ RETURN_IF_EOF;
415
+ c = readChar(lexer, jsonText, offset);
416
+
417
+ /* optional sign */
418
+ if (c == '+' || c == '-') {
419
+ RETURN_IF_EOF;
420
+ c = readChar(lexer, jsonText, offset);
421
+ }
422
+
423
+ if (c >= '0' && c <= '9') {
424
+ do {
425
+ RETURN_IF_EOF;
426
+ c = readChar(lexer, jsonText, offset);
427
+ } while (c >= '0' && c <= '9');
428
+ } else {
429
+ unreadChar(lexer, offset);
430
+ lexer->error = yajl_lex_missing_integer_after_exponent;
431
+ return yajl_tok_error;
432
+ }
433
+ tok = yajl_tok_double;
434
+ }
435
+
436
+ /* we always go "one too far" */
437
+ unreadChar(lexer, offset);
438
+
439
+ return tok;
440
+ }
441
+
442
+ static yajl_tok
443
+ yajl_lex_comment(yajl_lexer lexer, const unsigned char * jsonText,
444
+ unsigned int jsonTextLen, unsigned int * offset)
445
+ {
446
+ unsigned char c;
447
+
448
+ yajl_tok tok = yajl_tok_comment;
449
+
450
+ RETURN_IF_EOF;
451
+ c = readChar(lexer, jsonText, offset);
452
+
453
+ /* either slash or star expected */
454
+ if (c == '/') {
455
+ /* now we throw away until end of line */
456
+ do {
457
+ RETURN_IF_EOF;
458
+ c = readChar(lexer, jsonText, offset);
459
+ } while (c != '\n');
460
+ } else if (c == '*') {
461
+ /* now we throw away until end of comment */
462
+ for (;;) {
463
+ RETURN_IF_EOF;
464
+ c = readChar(lexer, jsonText, offset);
465
+ if (c == '*') {
466
+ RETURN_IF_EOF;
467
+ c = readChar(lexer, jsonText, offset);
468
+ if (c == '/') {
469
+ break;
470
+ } else {
471
+ unreadChar(lexer, offset);
472
+ }
473
+ }
474
+ }
475
+ } else {
476
+ lexer->error = yajl_lex_invalid_char;
477
+ tok = yajl_tok_error;
478
+ }
479
+
480
+ return tok;
481
+ }
482
+
483
+ yajl_tok
484
+ yajl_lex_lex(yajl_lexer lexer, const unsigned char * jsonText,
485
+ unsigned int jsonTextLen, unsigned int * offset,
486
+ const unsigned char ** outBuf, unsigned int * outLen)
487
+ {
488
+ yajl_tok tok = yajl_tok_error;
489
+ unsigned char c;
490
+ unsigned int startOffset = *offset;
491
+
492
+ *outBuf = NULL;
493
+ *outLen = 0;
494
+
495
+ for (;;) {
496
+ assert(*offset <= jsonTextLen);
497
+
498
+ if (*offset >= jsonTextLen) {
499
+ tok = yajl_tok_eof;
500
+ goto lexed;
501
+ }
502
+
503
+ c = readChar(lexer, jsonText, offset);
504
+
505
+ switch (c) {
506
+ case '{':
507
+ tok = yajl_tok_left_bracket;
508
+ goto lexed;
509
+ case '}':
510
+ tok = yajl_tok_right_bracket;
511
+ goto lexed;
512
+ case '[':
513
+ tok = yajl_tok_left_brace;
514
+ goto lexed;
515
+ case ']':
516
+ tok = yajl_tok_right_brace;
517
+ goto lexed;
518
+ case ',':
519
+ tok = yajl_tok_comma;
520
+ goto lexed;
521
+ case ':':
522
+ tok = yajl_tok_colon;
523
+ goto lexed;
524
+ case '\t': case '\n': case '\v': case '\f': case '\r': case ' ':
525
+ startOffset++;
526
+ break;
527
+ case 't': {
528
+ const char * want = "rue";
529
+ do {
530
+ if (*offset >= jsonTextLen) {
531
+ tok = yajl_tok_eof;
532
+ goto lexed;
533
+ }
534
+ c = readChar(lexer, jsonText, offset);
535
+ if (c != *want) {
536
+ unreadChar(lexer, offset);
537
+ lexer->error = yajl_lex_invalid_string;
538
+ tok = yajl_tok_error;
539
+ goto lexed;
540
+ }
541
+ } while (*(++want));
542
+ tok = yajl_tok_bool;
543
+ goto lexed;
544
+ }
545
+ case 'f': {
546
+ const char * want = "alse";
547
+ do {
548
+ if (*offset >= jsonTextLen) {
549
+ tok = yajl_tok_eof;
550
+ goto lexed;
551
+ }
552
+ c = readChar(lexer, jsonText, offset);
553
+ if (c != *want) {
554
+ unreadChar(lexer, offset);
555
+ lexer->error = yajl_lex_invalid_string;
556
+ tok = yajl_tok_error;
557
+ goto lexed;
558
+ }
559
+ } while (*(++want));
560
+ tok = yajl_tok_bool;
561
+ goto lexed;
562
+ }
563
+ case 'n': {
564
+ const char * want = "ull";
565
+ do {
566
+ if (*offset >= jsonTextLen) {
567
+ tok = yajl_tok_eof;
568
+ goto lexed;
569
+ }
570
+ c = readChar(lexer, jsonText, offset);
571
+ if (c != *want) {
572
+ unreadChar(lexer, offset);
573
+ lexer->error = yajl_lex_invalid_string;
574
+ tok = yajl_tok_error;
575
+ goto lexed;
576
+ }
577
+ } while (*(++want));
578
+ tok = yajl_tok_null;
579
+ goto lexed;
580
+ }
581
+ case '"': {
582
+ tok = yajl_lex_string(lexer, (const unsigned char *) jsonText,
583
+ jsonTextLen, offset);
584
+ goto lexed;
585
+ }
586
+ case '-':
587
+ case '0': case '1': case '2': case '3': case '4':
588
+ case '5': case '6': case '7': case '8': case '9': {
589
+ /* integer parsing wants to start from the beginning */
590
+ unreadChar(lexer, offset);
591
+ tok = yajl_lex_number(lexer, (const unsigned char *) jsonText,
592
+ jsonTextLen, offset);
593
+ goto lexed;
594
+ }
595
+ case '/':
596
+ /* hey, look, a probable comment! If comments are disabled
597
+ * it's an error. */
598
+ if (!lexer->allowComments) {
599
+ unreadChar(lexer, offset);
600
+ lexer->error = yajl_lex_unallowed_comment;
601
+ tok = yajl_tok_error;
602
+ goto lexed;
603
+ }
604
+ /* if comments are enabled, then we should try to lex
605
+ * the thing. possible outcomes are
606
+ * - successful lex (tok_comment, which means continue),
607
+ * - malformed comment opening (slash not followed by
608
+ * '*' or '/') (tok_error)
609
+ * - eof hit. (tok_eof) */
610
+ tok = yajl_lex_comment(lexer, (const unsigned char *) jsonText,
611
+ jsonTextLen, offset);
612
+ if (tok == yajl_tok_comment) {
613
+ /* "error" is silly, but that's the initial
614
+ * state of tok. guilty until proven innocent. */
615
+ tok = yajl_tok_error;
616
+ yajl_buf_clear(lexer->buf);
617
+ lexer->bufInUse = 0;
618
+ startOffset = *offset;
619
+ break;
620
+ }
621
+ /* hit error or eof, bail */
622
+ goto lexed;
623
+ default:
624
+ lexer->error = yajl_lex_invalid_char;
625
+ tok = yajl_tok_error;
626
+ goto lexed;
627
+ }
628
+ }
629
+
630
+
631
+ lexed:
632
+ /* need to append to buffer if the buffer is in use or
633
+ * if it's an EOF token */
634
+ if (tok == yajl_tok_eof || lexer->bufInUse) {
635
+ if (!lexer->bufInUse) yajl_buf_clear(lexer->buf);
636
+ lexer->bufInUse = 1;
637
+ yajl_buf_append(lexer->buf, jsonText + startOffset, *offset - startOffset);
638
+ lexer->bufOff = 0;
639
+
640
+ if (tok != yajl_tok_eof) {
641
+ *outBuf = yajl_buf_data(lexer->buf);
642
+ *outLen = yajl_buf_len(lexer->buf);
643
+ lexer->bufInUse = 0;
644
+ }
645
+ } else if (tok != yajl_tok_error) {
646
+ *outBuf = jsonText + startOffset;
647
+ *outLen = *offset - startOffset;
648
+ }
649
+
650
+ /* special case for strings. skip the quotes. */
651
+ if (tok == yajl_tok_string || tok == yajl_tok_string_with_escapes)
652
+ {
653
+ assert(*outLen >= 2);
654
+ (*outBuf)++;
655
+ *outLen -= 2;
656
+ }
657
+
658
+
659
+ #ifdef YAJL_LEXER_DEBUG
660
+ if (tok == yajl_tok_error) {
661
+ printf("lexical error: %s\n",
662
+ yajl_lex_error_to_string(yajl_lex_get_error(lexer)));
663
+ } else if (tok == yajl_tok_eof) {
664
+ printf("EOF hit\n");
665
+ } else {
666
+ printf("lexed %s: '", tokToStr(tok));
667
+ fwrite(*outBuf, 1, *outLen, stdout);
668
+ printf("'\n");
669
+ }
670
+ #endif
671
+
672
+ return tok;
673
+ }
674
+
675
+ const char *
676
+ yajl_lex_error_to_string(yajl_lex_error error)
677
+ {
678
+ switch (error) {
679
+ case yajl_lex_e_ok:
680
+ return "ok, no error";
681
+ case yajl_lex_string_invalid_utf8:
682
+ return "invalid bytes in UTF8 string.";
683
+ case yajl_lex_string_invalid_escaped_char:
684
+ return "inside a string, '\\' occurs before a character "
685
+ "which it may not.";
686
+ case yajl_lex_string_invalid_json_char:
687
+ return "invalid character inside string.";
688
+ case yajl_lex_string_invalid_hex_char:
689
+ return "invalid (non-hex) character occurs after '\\u' inside "
690
+ "string.";
691
+ case yajl_lex_invalid_char:
692
+ return "invalid char in json text.";
693
+ case yajl_lex_invalid_string:
694
+ return "invalid string in json text.";
695
+ case yajl_lex_missing_integer_after_exponent:
696
+ return "malformed number, a digit is required after the exponent.";
697
+ case yajl_lex_missing_integer_after_decimal:
698
+ return "malformed number, a digit is required after the "
699
+ "decimal point.";
700
+ case yajl_lex_missing_integer_after_minus:
701
+ return "malformed number, a digit is required after the "
702
+ "minus sign.";
703
+ case yajl_lex_unallowed_comment:
704
+ return "probable comment found in input text, comments are "
705
+ "not enabled.";
706
+ }
707
+ return "unknown error code";
708
+ }
709
+
710
+
711
+ /** allows access to more specific information about the lexical
712
+ * error when yajl_lex_lex returns yajl_tok_error. */
713
+ yajl_lex_error
714
+ yajl_lex_get_error(yajl_lexer lexer)
715
+ {
716
+ if (lexer == NULL) return (yajl_lex_error) -1;
717
+ return lexer->error;
718
+ }
719
+
720
+ unsigned int yajl_lex_current_line(yajl_lexer lexer)
721
+ {
722
+ return lexer->lineOff;
723
+ }
724
+
725
+ unsigned int yajl_lex_current_char(yajl_lexer lexer)
726
+ {
727
+ return lexer->charOff;
728
+ }
729
+
730
+ yajl_tok yajl_lex_peek(yajl_lexer lexer, const unsigned char * jsonText,
731
+ unsigned int jsonTextLen, unsigned int offset)
732
+ {
733
+ const unsigned char * outBuf;
734
+ unsigned int outLen;
735
+ unsigned int bufLen = yajl_buf_len(lexer->buf);
736
+ unsigned int bufOff = lexer->bufOff;
737
+ unsigned int bufInUse = lexer->bufInUse;
738
+ yajl_tok tok;
739
+
740
+ tok = yajl_lex_lex(lexer, jsonText, jsonTextLen, &offset,
741
+ &outBuf, &outLen);
742
+
743
+ lexer->bufOff = bufOff;
744
+ lexer->bufInUse = bufInUse;
745
+ yajl_buf_truncate(lexer->buf, bufLen);
746
+
747
+ return tok;
748
+ }