yajl-ruby 1.0.0-x86-mswin32-60

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of yajl-ruby might be problematic. Click here for more details.

Files changed (152) hide show
  1. data/.gitignore +12 -0
  2. data/.rspec +2 -0
  3. data/CHANGELOG.md +327 -0
  4. data/Gemfile +3 -0
  5. data/MIT-LICENSE +20 -0
  6. data/README.md +362 -0
  7. data/Rakefile +2 -0
  8. data/benchmark/encode.rb +72 -0
  9. data/benchmark/encode_json_and_marshal.rb +42 -0
  10. data/benchmark/encode_json_and_yaml.rb +53 -0
  11. data/benchmark/http.rb +32 -0
  12. data/benchmark/parse.rb +94 -0
  13. data/benchmark/parse_json_and_marshal.rb +50 -0
  14. data/benchmark/parse_json_and_yaml.rb +55 -0
  15. data/benchmark/parse_stream.rb +54 -0
  16. data/benchmark/subjects/item.json +1 -0
  17. data/benchmark/subjects/ohai.json +1216 -0
  18. data/benchmark/subjects/ohai.marshal_dump +0 -0
  19. data/benchmark/subjects/ohai.yml +975 -0
  20. data/benchmark/subjects/twitter_search.json +1 -0
  21. data/benchmark/subjects/twitter_stream.json +430 -0
  22. data/benchmark/subjects/unicode.json +1 -0
  23. data/examples/encoding/chunked_encoding.rb +27 -0
  24. data/examples/encoding/one_shot.rb +13 -0
  25. data/examples/encoding/to_an_io.rb +12 -0
  26. data/examples/http/twitter_search_api.rb +12 -0
  27. data/examples/http/twitter_stream_api.rb +26 -0
  28. data/examples/parsing/from_file.rb +14 -0
  29. data/examples/parsing/from_stdin.rb +9 -0
  30. data/examples/parsing/from_string.rb +13 -0
  31. data/ext/yajl/api/yajl_common.h +89 -0
  32. data/ext/yajl/api/yajl_gen.h +161 -0
  33. data/ext/yajl/api/yajl_parse.h +196 -0
  34. data/ext/yajl/api/yajl_version.h +23 -0
  35. data/ext/yajl/extconf.rb +7 -0
  36. data/ext/yajl/yajl.c +164 -0
  37. data/ext/yajl/yajl_alloc.c +65 -0
  38. data/ext/yajl/yajl_alloc.h +50 -0
  39. data/ext/yajl/yajl_buf.c +119 -0
  40. data/ext/yajl/yajl_buf.h +73 -0
  41. data/ext/yajl/yajl_bytestack.h +85 -0
  42. data/ext/yajl/yajl_encode.c +201 -0
  43. data/ext/yajl/yajl_encode.h +52 -0
  44. data/ext/yajl/yajl_ext.c +905 -0
  45. data/ext/yajl/yajl_ext.h +135 -0
  46. data/ext/yajl/yajl_gen.c +344 -0
  47. data/ext/yajl/yajl_lex.c +748 -0
  48. data/ext/yajl/yajl_lex.h +135 -0
  49. data/ext/yajl/yajl_parser.c +450 -0
  50. data/ext/yajl/yajl_parser.h +82 -0
  51. data/ext/yajl/yajl_version.c +7 -0
  52. data/lib/yajl.rb +75 -0
  53. data/lib/yajl/1.8/yajl.so +0 -0
  54. data/lib/yajl/1.9/yajl.so +0 -0
  55. data/lib/yajl/bzip2.rb +11 -0
  56. data/lib/yajl/bzip2/stream_reader.rb +31 -0
  57. data/lib/yajl/bzip2/stream_writer.rb +14 -0
  58. data/lib/yajl/deflate.rb +6 -0
  59. data/lib/yajl/deflate/stream_reader.rb +43 -0
  60. data/lib/yajl/deflate/stream_writer.rb +20 -0
  61. data/lib/yajl/gzip.rb +6 -0
  62. data/lib/yajl/gzip/stream_reader.rb +30 -0
  63. data/lib/yajl/gzip/stream_writer.rb +13 -0
  64. data/lib/yajl/http_stream.rb +212 -0
  65. data/lib/yajl/json_gem.rb +15 -0
  66. data/lib/yajl/json_gem/encoding.rb +51 -0
  67. data/lib/yajl/json_gem/parsing.rb +26 -0
  68. data/lib/yajl/version.rb +3 -0
  69. data/lib/yajl/yajl.rb +2 -0
  70. data/spec/encoding/encoding_spec.rb +271 -0
  71. data/spec/global/global_spec.rb +54 -0
  72. data/spec/http/fixtures/http.bzip2.dump +0 -0
  73. data/spec/http/fixtures/http.chunked.dump +11 -0
  74. data/spec/http/fixtures/http.deflate.dump +0 -0
  75. data/spec/http/fixtures/http.error.dump +12 -0
  76. data/spec/http/fixtures/http.gzip.dump +0 -0
  77. data/spec/http/fixtures/http.html.dump +1220 -0
  78. data/spec/http/fixtures/http.raw.dump +1226 -0
  79. data/spec/http/http_delete_spec.rb +98 -0
  80. data/spec/http/http_error_spec.rb +32 -0
  81. data/spec/http/http_get_spec.rb +109 -0
  82. data/spec/http/http_post_spec.rb +123 -0
  83. data/spec/http/http_put_spec.rb +105 -0
  84. data/spec/http/http_stream_options_spec.rb +27 -0
  85. data/spec/json_gem_compatibility/compatibility_spec.rb +203 -0
  86. data/spec/parsing/active_support_spec.rb +64 -0
  87. data/spec/parsing/chunked_spec.rb +96 -0
  88. data/spec/parsing/fixtures/fail.15.json +1 -0
  89. data/spec/parsing/fixtures/fail.16.json +1 -0
  90. data/spec/parsing/fixtures/fail.17.json +1 -0
  91. data/spec/parsing/fixtures/fail.26.json +1 -0
  92. data/spec/parsing/fixtures/fail11.json +1 -0
  93. data/spec/parsing/fixtures/fail12.json +1 -0
  94. data/spec/parsing/fixtures/fail13.json +1 -0
  95. data/spec/parsing/fixtures/fail14.json +1 -0
  96. data/spec/parsing/fixtures/fail19.json +1 -0
  97. data/spec/parsing/fixtures/fail20.json +1 -0
  98. data/spec/parsing/fixtures/fail21.json +1 -0
  99. data/spec/parsing/fixtures/fail22.json +1 -0
  100. data/spec/parsing/fixtures/fail23.json +1 -0
  101. data/spec/parsing/fixtures/fail24.json +1 -0
  102. data/spec/parsing/fixtures/fail25.json +1 -0
  103. data/spec/parsing/fixtures/fail27.json +2 -0
  104. data/spec/parsing/fixtures/fail28.json +2 -0
  105. data/spec/parsing/fixtures/fail3.json +1 -0
  106. data/spec/parsing/fixtures/fail4.json +1 -0
  107. data/spec/parsing/fixtures/fail5.json +1 -0
  108. data/spec/parsing/fixtures/fail6.json +1 -0
  109. data/spec/parsing/fixtures/fail9.json +1 -0
  110. data/spec/parsing/fixtures/pass.array.json +6 -0
  111. data/spec/parsing/fixtures/pass.codepoints_from_unicode_org.json +1 -0
  112. data/spec/parsing/fixtures/pass.contacts.json +1 -0
  113. data/spec/parsing/fixtures/pass.db100.xml.json +1 -0
  114. data/spec/parsing/fixtures/pass.db1000.xml.json +1 -0
  115. data/spec/parsing/fixtures/pass.dc_simple_with_comments.json +11 -0
  116. data/spec/parsing/fixtures/pass.deep_arrays.json +1 -0
  117. data/spec/parsing/fixtures/pass.difficult_json_c_test_case.json +1 -0
  118. data/spec/parsing/fixtures/pass.difficult_json_c_test_case_with_comments.json +1 -0
  119. data/spec/parsing/fixtures/pass.doubles.json +1 -0
  120. data/spec/parsing/fixtures/pass.empty_array.json +1 -0
  121. data/spec/parsing/fixtures/pass.empty_string.json +1 -0
  122. data/spec/parsing/fixtures/pass.escaped_bulgarian.json +4 -0
  123. data/spec/parsing/fixtures/pass.escaped_foobar.json +1 -0
  124. data/spec/parsing/fixtures/pass.item.json +1 -0
  125. data/spec/parsing/fixtures/pass.json-org-sample1.json +23 -0
  126. data/spec/parsing/fixtures/pass.json-org-sample2.json +11 -0
  127. data/spec/parsing/fixtures/pass.json-org-sample3.json +26 -0
  128. data/spec/parsing/fixtures/pass.json-org-sample4-nows.json +88 -0
  129. data/spec/parsing/fixtures/pass.json-org-sample4.json +89 -0
  130. data/spec/parsing/fixtures/pass.json-org-sample5.json +27 -0
  131. data/spec/parsing/fixtures/pass.map-spain.xml.json +1 -0
  132. data/spec/parsing/fixtures/pass.ns-invoice100.xml.json +1 -0
  133. data/spec/parsing/fixtures/pass.ns-soap.xml.json +1 -0
  134. data/spec/parsing/fixtures/pass.numbers-fp-4k.json +6 -0
  135. data/spec/parsing/fixtures/pass.numbers-fp-64k.json +61 -0
  136. data/spec/parsing/fixtures/pass.numbers-int-4k.json +11 -0
  137. data/spec/parsing/fixtures/pass.numbers-int-64k.json +154 -0
  138. data/spec/parsing/fixtures/pass.twitter-search.json +1 -0
  139. data/spec/parsing/fixtures/pass.twitter-search2.json +1 -0
  140. data/spec/parsing/fixtures/pass.unicode.json +3315 -0
  141. data/spec/parsing/fixtures/pass.yelp.json +1 -0
  142. data/spec/parsing/fixtures/pass1.json +56 -0
  143. data/spec/parsing/fixtures/pass2.json +1 -0
  144. data/spec/parsing/fixtures/pass3.json +6 -0
  145. data/spec/parsing/fixtures_spec.rb +40 -0
  146. data/spec/parsing/one_off_spec.rb +85 -0
  147. data/spec/rcov.opts +3 -0
  148. data/spec/spec_helper.rb +16 -0
  149. data/tasks/compile.rake +35 -0
  150. data/tasks/rspec.rake +16 -0
  151. data/yajl-ruby.gemspec +24 -0
  152. metadata +335 -0
@@ -0,0 +1,748 @@
1
+ /*
2
+ * Copyright 2010, Lloyd Hilaiel.
3
+ *
4
+ * Redistribution and use in source and binary forms, with or without
5
+ * modification, are permitted provided that the following conditions are
6
+ * met:
7
+ *
8
+ * 1. Redistributions of source code must retain the above copyright
9
+ * notice, this list of conditions and the following disclaimer.
10
+ *
11
+ * 2. Redistributions in binary form must reproduce the above copyright
12
+ * notice, this list of conditions and the following disclaimer in
13
+ * the documentation and/or other materials provided with the
14
+ * distribution.
15
+ *
16
+ * 3. Neither the name of Lloyd Hilaiel nor the names of its
17
+ * contributors may be used to endorse or promote products derived
18
+ * from this software without specific prior written permission.
19
+ *
20
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
22
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23
+ * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
24
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
25
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
28
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
29
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30
+ * POSSIBILITY OF SUCH DAMAGE.
31
+ */
32
+
33
+ #include "yajl_lex.h"
34
+ #include "yajl_buf.h"
35
+
36
+ #include <stdlib.h>
37
+ #include <stdio.h>
38
+ #include <assert.h>
39
+ #include <string.h>
40
+
41
+ #ifdef YAJL_LEXER_DEBUG
42
+ static const char *
43
+ tokToStr(yajl_tok tok)
44
+ {
45
+ switch (tok) {
46
+ case yajl_tok_bool: return "bool";
47
+ case yajl_tok_colon: return "colon";
48
+ case yajl_tok_comma: return "comma";
49
+ case yajl_tok_eof: return "eof";
50
+ case yajl_tok_error: return "error";
51
+ case yajl_tok_left_brace: return "brace";
52
+ case yajl_tok_left_bracket: return "bracket";
53
+ case yajl_tok_null: return "null";
54
+ case yajl_tok_integer: return "integer";
55
+ case yajl_tok_double: return "double";
56
+ case yajl_tok_right_brace: return "brace";
57
+ case yajl_tok_right_bracket: return "bracket";
58
+ case yajl_tok_string: return "string";
59
+ case yajl_tok_string_with_escapes: return "string_with_escapes";
60
+ }
61
+ return "unknown";
62
+ }
63
+ #endif
64
+
65
+ /* Impact of the stream parsing feature on the lexer:
66
+ *
67
+ * YAJL support stream parsing. That is, the ability to parse the first
68
+ * bits of a chunk of JSON before the last bits are available (still on
69
+ * the network or disk). This makes the lexer more complex. The
70
+ * responsibility of the lexer is to handle transparently the case where
71
+ * a chunk boundary falls in the middle of a token. This is
72
+ * accomplished is via a buffer and a character reading abstraction.
73
+ *
74
+ * Overview of implementation
75
+ *
76
+ * When we lex to end of input string before end of token is hit, we
77
+ * copy all of the input text composing the token into our lexBuf.
78
+ *
79
+ * Every time we read a character, we do so through the readChar function.
80
+ * readChar's responsibility is to handle pulling all chars from the buffer
81
+ * before pulling chars from input text
82
+ */
83
+
84
+ struct yajl_lexer_t {
85
+ /* the overal line and char offset into the data */
86
+ unsigned int lineOff;
87
+ unsigned int charOff;
88
+
89
+ /* error */
90
+ yajl_lex_error error;
91
+
92
+ /* a input buffer to handle the case where a token is spread over
93
+ * multiple chunks */
94
+ yajl_buf buf;
95
+
96
+ /* in the case where we have data in the lexBuf, bufOff holds
97
+ * the current offset into the lexBuf. */
98
+ unsigned int bufOff;
99
+
100
+ /* are we using the lex buf? */
101
+ unsigned int bufInUse;
102
+
103
+ /* shall we allow comments? */
104
+ unsigned int allowComments;
105
+
106
+ /* shall we validate utf8 inside strings? */
107
+ unsigned int validateUTF8;
108
+
109
+ yajl_alloc_funcs * alloc;
110
+ };
111
+
112
+ #define readChar(lxr, txt, off) \
113
+ (((lxr)->bufInUse && yajl_buf_len((lxr)->buf) && lxr->bufOff < yajl_buf_len((lxr)->buf)) ? \
114
+ (*((const unsigned char *) yajl_buf_data((lxr)->buf) + ((lxr)->bufOff)++)) : \
115
+ ((txt)[(*(off))++]))
116
+
117
+ #define unreadChar(lxr, off) ((*(off) > 0) ? (*(off))-- : ((lxr)->bufOff--))
118
+
119
+ yajl_lexer
120
+ yajl_lex_alloc(yajl_alloc_funcs * alloc,
121
+ unsigned int allowComments, unsigned int validateUTF8)
122
+ {
123
+ yajl_lexer lxr = (yajl_lexer) YA_MALLOC(alloc, sizeof(struct yajl_lexer_t));
124
+ memset((void *) lxr, 0, sizeof(struct yajl_lexer_t));
125
+ lxr->buf = yajl_buf_alloc(alloc);
126
+ lxr->allowComments = allowComments;
127
+ lxr->validateUTF8 = validateUTF8;
128
+ lxr->alloc = alloc;
129
+ return lxr;
130
+ }
131
+
132
+ yajl_lexer
133
+ yajl_lex_realloc(yajl_lexer orig) {
134
+ orig->lineOff = 0;
135
+ orig->charOff = 0;
136
+ orig->error = yajl_lex_e_ok;
137
+ yajl_buf_clear(orig->buf);
138
+ orig->bufOff = 0;
139
+ orig->bufInUse = 0;
140
+ return orig;
141
+ }
142
+
143
+ void
144
+ yajl_lex_free(yajl_lexer lxr)
145
+ {
146
+ yajl_buf_free(lxr->buf);
147
+ YA_FREE(lxr->alloc, lxr);
148
+ return;
149
+ }
150
+
151
+ /* a lookup table which lets us quickly determine three things:
152
+ * VEC - valid escaped conrol char
153
+ * IJC - invalid json char
154
+ * VHC - valid hex char
155
+ * note. the solidus '/' may be escaped or not.
156
+ * note. the
157
+ */
158
+ #define VEC 1
159
+ #define IJC 2
160
+ #define VHC 4
161
+ static const char charLookupTable[256] =
162
+ {
163
+ /*00*/ IJC , IJC , IJC , IJC , IJC , IJC , IJC , IJC ,
164
+ /*08*/ IJC , IJC , IJC , IJC , IJC , IJC , IJC , IJC ,
165
+ /*10*/ IJC , IJC , IJC , IJC , IJC , IJC , IJC , IJC ,
166
+ /*18*/ IJC , IJC , IJC , IJC , IJC , IJC , IJC , IJC ,
167
+
168
+ /*20*/ 0 , 0 , VEC|IJC, 0 , 0 , 0 , 0 , 0 ,
169
+ /*28*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , VEC ,
170
+ /*30*/ VHC , VHC , VHC , VHC , VHC , VHC , VHC , VHC ,
171
+ /*38*/ VHC , VHC , 0 , 0 , 0 , 0 , 0 , 0 ,
172
+
173
+ /*40*/ 0 , VHC , VHC , VHC , VHC , VHC , VHC , 0 ,
174
+ /*48*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
175
+ /*50*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
176
+ /*58*/ 0 , 0 , 0 , 0 , VEC|IJC, 0 , 0 , 0 ,
177
+
178
+ /*60*/ 0 , VHC , VEC|VHC, VHC , VHC , VHC , VEC|VHC, 0 ,
179
+ /*68*/ 0 , 0 , 0 , 0 , 0 , 0 , VEC , 0 ,
180
+ /*70*/ 0 , 0 , VEC , 0 , VEC , 0 , 0 , 0 ,
181
+ /*78*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
182
+
183
+ /* include these so we don't have to always check the range of the char */
184
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
185
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
186
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
187
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
188
+
189
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
190
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
191
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
192
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
193
+
194
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
195
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
196
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
197
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
198
+
199
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
200
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
201
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
202
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0
203
+ };
204
+
205
+ /** process a variable length utf8 encoded codepoint.
206
+ *
207
+ * returns:
208
+ * yajl_tok_string - if valid utf8 char was parsed and offset was
209
+ * advanced
210
+ * yajl_tok_eof - if end of input was hit before validation could
211
+ * complete
212
+ * yajl_tok_error - if invalid utf8 was encountered
213
+ *
214
+ * NOTE: on error the offset will point to the first char of the
215
+ * invalid utf8 */
216
+ #define UTF8_CHECK_EOF if (*offset >= jsonTextLen) { return yajl_tok_eof; }
217
+
218
+ static yajl_tok
219
+ yajl_lex_utf8_char(yajl_lexer lexer, const unsigned char * jsonText,
220
+ unsigned int jsonTextLen, unsigned int * offset,
221
+ unsigned char curChar)
222
+ {
223
+ if (curChar <= 0x7f) {
224
+ /* single byte */
225
+ return yajl_tok_string;
226
+ } else if ((curChar >> 5) == 0x6) {
227
+ /* two byte */
228
+ UTF8_CHECK_EOF;
229
+ curChar = readChar(lexer, jsonText, offset);
230
+ if ((curChar >> 6) == 0x2) return yajl_tok_string;
231
+ } else if ((curChar >> 4) == 0x0e) {
232
+ /* three byte */
233
+ UTF8_CHECK_EOF;
234
+ curChar = readChar(lexer, jsonText, offset);
235
+ if ((curChar >> 6) == 0x2) {
236
+ UTF8_CHECK_EOF;
237
+ curChar = readChar(lexer, jsonText, offset);
238
+ if ((curChar >> 6) == 0x2) return yajl_tok_string;
239
+ }
240
+ } else if ((curChar >> 3) == 0x1e) {
241
+ /* four byte */
242
+ UTF8_CHECK_EOF;
243
+ curChar = readChar(lexer, jsonText, offset);
244
+ if ((curChar >> 6) == 0x2) {
245
+ UTF8_CHECK_EOF;
246
+ curChar = readChar(lexer, jsonText, offset);
247
+ if ((curChar >> 6) == 0x2) {
248
+ UTF8_CHECK_EOF;
249
+ curChar = readChar(lexer, jsonText, offset);
250
+ if ((curChar >> 6) == 0x2) return yajl_tok_string;
251
+ }
252
+ }
253
+ }
254
+
255
+ return yajl_tok_error;
256
+ }
257
+
258
+ /* lex a string. input is the lexer, pointer to beginning of
259
+ * json text, and start of string (offset).
260
+ * a token is returned which has the following meanings:
261
+ * yajl_tok_string: lex of string was successful. offset points to
262
+ * terminating '"'.
263
+ * yajl_tok_eof: end of text was encountered before we could complete
264
+ * the lex.
265
+ * yajl_tok_error: embedded in the string were unallowable chars. offset
266
+ * points to the offending char
267
+ */
268
+ #define STR_CHECK_EOF \
269
+ if (*offset >= jsonTextLen) { \
270
+ tok = yajl_tok_eof; \
271
+ goto finish_string_lex; \
272
+ }
273
+
274
+ static yajl_tok
275
+ yajl_lex_string(yajl_lexer lexer, const unsigned char * jsonText,
276
+ unsigned int jsonTextLen, unsigned int * offset)
277
+ {
278
+ yajl_tok tok = yajl_tok_error;
279
+ int hasEscapes = 0;
280
+
281
+ for (;;) {
282
+ unsigned char curChar;
283
+
284
+ STR_CHECK_EOF;
285
+
286
+ curChar = readChar(lexer, jsonText, offset);
287
+
288
+ /* quote terminates */
289
+ if (curChar == '"') {
290
+ tok = yajl_tok_string;
291
+ break;
292
+ }
293
+ /* backslash escapes a set of control chars, */
294
+ else if (curChar == '\\') {
295
+ hasEscapes = 1;
296
+ STR_CHECK_EOF;
297
+
298
+ /* special case \u */
299
+ curChar = readChar(lexer, jsonText, offset);
300
+ if (curChar == 'u') {
301
+ unsigned int i = 0;
302
+
303
+ for (i=0;i<4;i++) {
304
+ STR_CHECK_EOF;
305
+ curChar = readChar(lexer, jsonText, offset);
306
+ if (!(charLookupTable[curChar] & VHC)) {
307
+ /* back up to offending char */
308
+ unreadChar(lexer, offset);
309
+ lexer->error = yajl_lex_string_invalid_hex_char;
310
+ goto finish_string_lex;
311
+ }
312
+ }
313
+ } else if (!(charLookupTable[curChar] & VEC)) {
314
+ /* back up to offending char */
315
+ unreadChar(lexer, offset);
316
+ lexer->error = yajl_lex_string_invalid_escaped_char;
317
+ goto finish_string_lex;
318
+ }
319
+ }
320
+ /* when not validating UTF8 it's a simple table lookup to determine
321
+ * if the present character is invalid */
322
+ else if(charLookupTable[curChar] & IJC) {
323
+ /* back up to offending char */
324
+ unreadChar(lexer, offset);
325
+ lexer->error = yajl_lex_string_invalid_json_char;
326
+ goto finish_string_lex;
327
+ }
328
+ /* when in validate UTF8 mode we need to do some extra work */
329
+ else if (lexer->validateUTF8) {
330
+ yajl_tok t = yajl_lex_utf8_char(lexer, jsonText, jsonTextLen,
331
+ offset, curChar);
332
+
333
+ if (t == yajl_tok_eof) {
334
+ tok = yajl_tok_eof;
335
+ goto finish_string_lex;
336
+ } else if (t == yajl_tok_error) {
337
+ lexer->error = yajl_lex_string_invalid_utf8;
338
+ goto finish_string_lex;
339
+ }
340
+ }
341
+ /* accept it, and move on */
342
+ }
343
+ finish_string_lex:
344
+ /* tell our buddy, the parser, wether he needs to process this string
345
+ * again */
346
+ if (hasEscapes && tok == yajl_tok_string) {
347
+ tok = yajl_tok_string_with_escapes;
348
+ }
349
+
350
+ return tok;
351
+ }
352
+
353
+ #define RETURN_IF_EOF if (*offset >= jsonTextLen) return yajl_tok_eof;
354
+
355
+ static yajl_tok
356
+ yajl_lex_number(yajl_lexer lexer, const unsigned char * jsonText,
357
+ unsigned int jsonTextLen, unsigned int * offset)
358
+ {
359
+ /** XXX: numbers are the only entities in json that we must lex
360
+ * _beyond_ in order to know that they are complete. There
361
+ * is an ambiguous case for integers at EOF. */
362
+
363
+ unsigned char c;
364
+
365
+ yajl_tok tok = yajl_tok_integer;
366
+
367
+ RETURN_IF_EOF;
368
+ c = readChar(lexer, jsonText, offset);
369
+
370
+ /* optional leading minus */
371
+ if (c == '-') {
372
+ RETURN_IF_EOF;
373
+ c = readChar(lexer, jsonText, offset);
374
+ }
375
+
376
+ /* a single zero, or a series of integers */
377
+ if (c == '0') {
378
+ RETURN_IF_EOF;
379
+ c = readChar(lexer, jsonText, offset);
380
+ } else if (c >= '1' && c <= '9') {
381
+ do {
382
+ RETURN_IF_EOF;
383
+ c = readChar(lexer, jsonText, offset);
384
+ } while (c >= '0' && c <= '9');
385
+ } else {
386
+ unreadChar(lexer, offset);
387
+ lexer->error = yajl_lex_missing_integer_after_minus;
388
+ return yajl_tok_error;
389
+ }
390
+
391
+ /* optional fraction (indicates this is floating point) */
392
+ if (c == '.') {
393
+ int numRd = 0;
394
+
395
+ RETURN_IF_EOF;
396
+ c = readChar(lexer, jsonText, offset);
397
+
398
+ while (c >= '0' && c <= '9') {
399
+ numRd++;
400
+ RETURN_IF_EOF;
401
+ c = readChar(lexer, jsonText, offset);
402
+ }
403
+
404
+ if (!numRd) {
405
+ unreadChar(lexer, offset);
406
+ lexer->error = yajl_lex_missing_integer_after_decimal;
407
+ return yajl_tok_error;
408
+ }
409
+ tok = yajl_tok_double;
410
+ }
411
+
412
+ /* optional exponent (indicates this is floating point) */
413
+ if (c == 'e' || c == 'E') {
414
+ RETURN_IF_EOF;
415
+ c = readChar(lexer, jsonText, offset);
416
+
417
+ /* optional sign */
418
+ if (c == '+' || c == '-') {
419
+ RETURN_IF_EOF;
420
+ c = readChar(lexer, jsonText, offset);
421
+ }
422
+
423
+ if (c >= '0' && c <= '9') {
424
+ do {
425
+ RETURN_IF_EOF;
426
+ c = readChar(lexer, jsonText, offset);
427
+ } while (c >= '0' && c <= '9');
428
+ } else {
429
+ unreadChar(lexer, offset);
430
+ lexer->error = yajl_lex_missing_integer_after_exponent;
431
+ return yajl_tok_error;
432
+ }
433
+ tok = yajl_tok_double;
434
+ }
435
+
436
+ /* we always go "one too far" */
437
+ unreadChar(lexer, offset);
438
+
439
+ return tok;
440
+ }
441
+
442
+ static yajl_tok
443
+ yajl_lex_comment(yajl_lexer lexer, const unsigned char * jsonText,
444
+ unsigned int jsonTextLen, unsigned int * offset)
445
+ {
446
+ unsigned char c;
447
+
448
+ yajl_tok tok = yajl_tok_comment;
449
+
450
+ RETURN_IF_EOF;
451
+ c = readChar(lexer, jsonText, offset);
452
+
453
+ /* either slash or star expected */
454
+ if (c == '/') {
455
+ /* now we throw away until end of line */
456
+ do {
457
+ RETURN_IF_EOF;
458
+ c = readChar(lexer, jsonText, offset);
459
+ } while (c != '\n');
460
+ } else if (c == '*') {
461
+ /* now we throw away until end of comment */
462
+ for (;;) {
463
+ RETURN_IF_EOF;
464
+ c = readChar(lexer, jsonText, offset);
465
+ if (c == '*') {
466
+ RETURN_IF_EOF;
467
+ c = readChar(lexer, jsonText, offset);
468
+ if (c == '/') {
469
+ break;
470
+ } else {
471
+ unreadChar(lexer, offset);
472
+ }
473
+ }
474
+ }
475
+ } else {
476
+ lexer->error = yajl_lex_invalid_char;
477
+ tok = yajl_tok_error;
478
+ }
479
+
480
+ return tok;
481
+ }
482
+
483
+ yajl_tok
484
+ yajl_lex_lex(yajl_lexer lexer, const unsigned char * jsonText,
485
+ unsigned int jsonTextLen, unsigned int * offset,
486
+ const unsigned char ** outBuf, unsigned int * outLen)
487
+ {
488
+ yajl_tok tok = yajl_tok_error;
489
+ unsigned char c;
490
+ unsigned int startOffset = *offset;
491
+
492
+ *outBuf = NULL;
493
+ *outLen = 0;
494
+
495
+ for (;;) {
496
+ assert(*offset <= jsonTextLen);
497
+
498
+ if (*offset >= jsonTextLen) {
499
+ tok = yajl_tok_eof;
500
+ goto lexed;
501
+ }
502
+
503
+ c = readChar(lexer, jsonText, offset);
504
+
505
+ switch (c) {
506
+ case '{':
507
+ tok = yajl_tok_left_bracket;
508
+ goto lexed;
509
+ case '}':
510
+ tok = yajl_tok_right_bracket;
511
+ goto lexed;
512
+ case '[':
513
+ tok = yajl_tok_left_brace;
514
+ goto lexed;
515
+ case ']':
516
+ tok = yajl_tok_right_brace;
517
+ goto lexed;
518
+ case ',':
519
+ tok = yajl_tok_comma;
520
+ goto lexed;
521
+ case ':':
522
+ tok = yajl_tok_colon;
523
+ goto lexed;
524
+ case '\t': case '\n': case '\v': case '\f': case '\r': case ' ':
525
+ startOffset++;
526
+ break;
527
+ case 't': {
528
+ const char * want = "rue";
529
+ do {
530
+ if (*offset >= jsonTextLen) {
531
+ tok = yajl_tok_eof;
532
+ goto lexed;
533
+ }
534
+ c = readChar(lexer, jsonText, offset);
535
+ if (c != *want) {
536
+ unreadChar(lexer, offset);
537
+ lexer->error = yajl_lex_invalid_string;
538
+ tok = yajl_tok_error;
539
+ goto lexed;
540
+ }
541
+ } while (*(++want));
542
+ tok = yajl_tok_bool;
543
+ goto lexed;
544
+ }
545
+ case 'f': {
546
+ const char * want = "alse";
547
+ do {
548
+ if (*offset >= jsonTextLen) {
549
+ tok = yajl_tok_eof;
550
+ goto lexed;
551
+ }
552
+ c = readChar(lexer, jsonText, offset);
553
+ if (c != *want) {
554
+ unreadChar(lexer, offset);
555
+ lexer->error = yajl_lex_invalid_string;
556
+ tok = yajl_tok_error;
557
+ goto lexed;
558
+ }
559
+ } while (*(++want));
560
+ tok = yajl_tok_bool;
561
+ goto lexed;
562
+ }
563
+ case 'n': {
564
+ const char * want = "ull";
565
+ do {
566
+ if (*offset >= jsonTextLen) {
567
+ tok = yajl_tok_eof;
568
+ goto lexed;
569
+ }
570
+ c = readChar(lexer, jsonText, offset);
571
+ if (c != *want) {
572
+ unreadChar(lexer, offset);
573
+ lexer->error = yajl_lex_invalid_string;
574
+ tok = yajl_tok_error;
575
+ goto lexed;
576
+ }
577
+ } while (*(++want));
578
+ tok = yajl_tok_null;
579
+ goto lexed;
580
+ }
581
+ case '"': {
582
+ tok = yajl_lex_string(lexer, (const unsigned char *) jsonText,
583
+ jsonTextLen, offset);
584
+ goto lexed;
585
+ }
586
+ case '-':
587
+ case '0': case '1': case '2': case '3': case '4':
588
+ case '5': case '6': case '7': case '8': case '9': {
589
+ /* integer parsing wants to start from the beginning */
590
+ unreadChar(lexer, offset);
591
+ tok = yajl_lex_number(lexer, (const unsigned char *) jsonText,
592
+ jsonTextLen, offset);
593
+ goto lexed;
594
+ }
595
+ case '/':
596
+ /* hey, look, a probable comment! If comments are disabled
597
+ * it's an error. */
598
+ if (!lexer->allowComments) {
599
+ unreadChar(lexer, offset);
600
+ lexer->error = yajl_lex_unallowed_comment;
601
+ tok = yajl_tok_error;
602
+ goto lexed;
603
+ }
604
+ /* if comments are enabled, then we should try to lex
605
+ * the thing. possible outcomes are
606
+ * - successful lex (tok_comment, which means continue),
607
+ * - malformed comment opening (slash not followed by
608
+ * '*' or '/') (tok_error)
609
+ * - eof hit. (tok_eof) */
610
+ tok = yajl_lex_comment(lexer, (const unsigned char *) jsonText,
611
+ jsonTextLen, offset);
612
+ if (tok == yajl_tok_comment) {
613
+ /* "error" is silly, but that's the initial
614
+ * state of tok. guilty until proven innocent. */
615
+ tok = yajl_tok_error;
616
+ yajl_buf_clear(lexer->buf);
617
+ lexer->bufInUse = 0;
618
+ startOffset = *offset;
619
+ break;
620
+ }
621
+ /* hit error or eof, bail */
622
+ goto lexed;
623
+ default:
624
+ lexer->error = yajl_lex_invalid_char;
625
+ tok = yajl_tok_error;
626
+ goto lexed;
627
+ }
628
+ }
629
+
630
+
631
+ lexed:
632
+ /* need to append to buffer if the buffer is in use or
633
+ * if it's an EOF token */
634
+ if (tok == yajl_tok_eof || lexer->bufInUse) {
635
+ if (!lexer->bufInUse) yajl_buf_clear(lexer->buf);
636
+ lexer->bufInUse = 1;
637
+ yajl_buf_append(lexer->buf, jsonText + startOffset, *offset - startOffset);
638
+ lexer->bufOff = 0;
639
+
640
+ if (tok != yajl_tok_eof) {
641
+ *outBuf = yajl_buf_data(lexer->buf);
642
+ *outLen = yajl_buf_len(lexer->buf);
643
+ lexer->bufInUse = 0;
644
+ }
645
+ } else if (tok != yajl_tok_error) {
646
+ *outBuf = jsonText + startOffset;
647
+ *outLen = *offset - startOffset;
648
+ }
649
+
650
+ /* special case for strings. skip the quotes. */
651
+ if (tok == yajl_tok_string || tok == yajl_tok_string_with_escapes)
652
+ {
653
+ assert(*outLen >= 2);
654
+ (*outBuf)++;
655
+ *outLen -= 2;
656
+ }
657
+
658
+
659
+ #ifdef YAJL_LEXER_DEBUG
660
+ if (tok == yajl_tok_error) {
661
+ printf("lexical error: %s\n",
662
+ yajl_lex_error_to_string(yajl_lex_get_error(lexer)));
663
+ } else if (tok == yajl_tok_eof) {
664
+ printf("EOF hit\n");
665
+ } else {
666
+ printf("lexed %s: '", tokToStr(tok));
667
+ fwrite(*outBuf, 1, *outLen, stdout);
668
+ printf("'\n");
669
+ }
670
+ #endif
671
+
672
+ return tok;
673
+ }
674
+
675
+ const char *
676
+ yajl_lex_error_to_string(yajl_lex_error error)
677
+ {
678
+ switch (error) {
679
+ case yajl_lex_e_ok:
680
+ return "ok, no error";
681
+ case yajl_lex_string_invalid_utf8:
682
+ return "invalid bytes in UTF8 string.";
683
+ case yajl_lex_string_invalid_escaped_char:
684
+ return "inside a string, '\\' occurs before a character "
685
+ "which it may not.";
686
+ case yajl_lex_string_invalid_json_char:
687
+ return "invalid character inside string.";
688
+ case yajl_lex_string_invalid_hex_char:
689
+ return "invalid (non-hex) character occurs after '\\u' inside "
690
+ "string.";
691
+ case yajl_lex_invalid_char:
692
+ return "invalid char in json text.";
693
+ case yajl_lex_invalid_string:
694
+ return "invalid string in json text.";
695
+ case yajl_lex_missing_integer_after_exponent:
696
+ return "malformed number, a digit is required after the exponent.";
697
+ case yajl_lex_missing_integer_after_decimal:
698
+ return "malformed number, a digit is required after the "
699
+ "decimal point.";
700
+ case yajl_lex_missing_integer_after_minus:
701
+ return "malformed number, a digit is required after the "
702
+ "minus sign.";
703
+ case yajl_lex_unallowed_comment:
704
+ return "probable comment found in input text, comments are "
705
+ "not enabled.";
706
+ }
707
+ return "unknown error code";
708
+ }
709
+
710
+
711
+ /** allows access to more specific information about the lexical
712
+ * error when yajl_lex_lex returns yajl_tok_error. */
713
+ yajl_lex_error
714
+ yajl_lex_get_error(yajl_lexer lexer)
715
+ {
716
+ if (lexer == NULL) return (yajl_lex_error) -1;
717
+ return lexer->error;
718
+ }
719
+
720
+ unsigned int yajl_lex_current_line(yajl_lexer lexer)
721
+ {
722
+ return lexer->lineOff;
723
+ }
724
+
725
+ unsigned int yajl_lex_current_char(yajl_lexer lexer)
726
+ {
727
+ return lexer->charOff;
728
+ }
729
+
730
+ yajl_tok yajl_lex_peek(yajl_lexer lexer, const unsigned char * jsonText,
731
+ unsigned int jsonTextLen, unsigned int offset)
732
+ {
733
+ const unsigned char * outBuf;
734
+ unsigned int outLen;
735
+ unsigned int bufLen = yajl_buf_len(lexer->buf);
736
+ unsigned int bufOff = lexer->bufOff;
737
+ unsigned int bufInUse = lexer->bufInUse;
738
+ yajl_tok tok;
739
+
740
+ tok = yajl_lex_lex(lexer, jsonText, jsonTextLen, &offset,
741
+ &outBuf, &outLen);
742
+
743
+ lexer->bufOff = bufOff;
744
+ lexer->bufInUse = bufInUse;
745
+ yajl_buf_truncate(lexer->buf, bufLen);
746
+
747
+ return tok;
748
+ }