yajl-ruby 1.0.0-x86-mswin32-60
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of yajl-ruby might be problematic. Click here for more details.
- data/.gitignore +12 -0
- data/.rspec +2 -0
- data/CHANGELOG.md +327 -0
- data/Gemfile +3 -0
- data/MIT-LICENSE +20 -0
- data/README.md +362 -0
- data/Rakefile +2 -0
- data/benchmark/encode.rb +72 -0
- data/benchmark/encode_json_and_marshal.rb +42 -0
- data/benchmark/encode_json_and_yaml.rb +53 -0
- data/benchmark/http.rb +32 -0
- data/benchmark/parse.rb +94 -0
- data/benchmark/parse_json_and_marshal.rb +50 -0
- data/benchmark/parse_json_and_yaml.rb +55 -0
- data/benchmark/parse_stream.rb +54 -0
- data/benchmark/subjects/item.json +1 -0
- data/benchmark/subjects/ohai.json +1216 -0
- data/benchmark/subjects/ohai.marshal_dump +0 -0
- data/benchmark/subjects/ohai.yml +975 -0
- data/benchmark/subjects/twitter_search.json +1 -0
- data/benchmark/subjects/twitter_stream.json +430 -0
- data/benchmark/subjects/unicode.json +1 -0
- data/examples/encoding/chunked_encoding.rb +27 -0
- data/examples/encoding/one_shot.rb +13 -0
- data/examples/encoding/to_an_io.rb +12 -0
- data/examples/http/twitter_search_api.rb +12 -0
- data/examples/http/twitter_stream_api.rb +26 -0
- data/examples/parsing/from_file.rb +14 -0
- data/examples/parsing/from_stdin.rb +9 -0
- data/examples/parsing/from_string.rb +13 -0
- data/ext/yajl/api/yajl_common.h +89 -0
- data/ext/yajl/api/yajl_gen.h +161 -0
- data/ext/yajl/api/yajl_parse.h +196 -0
- data/ext/yajl/api/yajl_version.h +23 -0
- data/ext/yajl/extconf.rb +7 -0
- data/ext/yajl/yajl.c +164 -0
- data/ext/yajl/yajl_alloc.c +65 -0
- data/ext/yajl/yajl_alloc.h +50 -0
- data/ext/yajl/yajl_buf.c +119 -0
- data/ext/yajl/yajl_buf.h +73 -0
- data/ext/yajl/yajl_bytestack.h +85 -0
- data/ext/yajl/yajl_encode.c +201 -0
- data/ext/yajl/yajl_encode.h +52 -0
- data/ext/yajl/yajl_ext.c +905 -0
- data/ext/yajl/yajl_ext.h +135 -0
- data/ext/yajl/yajl_gen.c +344 -0
- data/ext/yajl/yajl_lex.c +748 -0
- data/ext/yajl/yajl_lex.h +135 -0
- data/ext/yajl/yajl_parser.c +450 -0
- data/ext/yajl/yajl_parser.h +82 -0
- data/ext/yajl/yajl_version.c +7 -0
- data/lib/yajl.rb +75 -0
- data/lib/yajl/1.8/yajl.so +0 -0
- data/lib/yajl/1.9/yajl.so +0 -0
- data/lib/yajl/bzip2.rb +11 -0
- data/lib/yajl/bzip2/stream_reader.rb +31 -0
- data/lib/yajl/bzip2/stream_writer.rb +14 -0
- data/lib/yajl/deflate.rb +6 -0
- data/lib/yajl/deflate/stream_reader.rb +43 -0
- data/lib/yajl/deflate/stream_writer.rb +20 -0
- data/lib/yajl/gzip.rb +6 -0
- data/lib/yajl/gzip/stream_reader.rb +30 -0
- data/lib/yajl/gzip/stream_writer.rb +13 -0
- data/lib/yajl/http_stream.rb +212 -0
- data/lib/yajl/json_gem.rb +15 -0
- data/lib/yajl/json_gem/encoding.rb +51 -0
- data/lib/yajl/json_gem/parsing.rb +26 -0
- data/lib/yajl/version.rb +3 -0
- data/lib/yajl/yajl.rb +2 -0
- data/spec/encoding/encoding_spec.rb +271 -0
- data/spec/global/global_spec.rb +54 -0
- data/spec/http/fixtures/http.bzip2.dump +0 -0
- data/spec/http/fixtures/http.chunked.dump +11 -0
- data/spec/http/fixtures/http.deflate.dump +0 -0
- data/spec/http/fixtures/http.error.dump +12 -0
- data/spec/http/fixtures/http.gzip.dump +0 -0
- data/spec/http/fixtures/http.html.dump +1220 -0
- data/spec/http/fixtures/http.raw.dump +1226 -0
- data/spec/http/http_delete_spec.rb +98 -0
- data/spec/http/http_error_spec.rb +32 -0
- data/spec/http/http_get_spec.rb +109 -0
- data/spec/http/http_post_spec.rb +123 -0
- data/spec/http/http_put_spec.rb +105 -0
- data/spec/http/http_stream_options_spec.rb +27 -0
- data/spec/json_gem_compatibility/compatibility_spec.rb +203 -0
- data/spec/parsing/active_support_spec.rb +64 -0
- data/spec/parsing/chunked_spec.rb +96 -0
- data/spec/parsing/fixtures/fail.15.json +1 -0
- data/spec/parsing/fixtures/fail.16.json +1 -0
- data/spec/parsing/fixtures/fail.17.json +1 -0
- data/spec/parsing/fixtures/fail.26.json +1 -0
- data/spec/parsing/fixtures/fail11.json +1 -0
- data/spec/parsing/fixtures/fail12.json +1 -0
- data/spec/parsing/fixtures/fail13.json +1 -0
- data/spec/parsing/fixtures/fail14.json +1 -0
- data/spec/parsing/fixtures/fail19.json +1 -0
- data/spec/parsing/fixtures/fail20.json +1 -0
- data/spec/parsing/fixtures/fail21.json +1 -0
- data/spec/parsing/fixtures/fail22.json +1 -0
- data/spec/parsing/fixtures/fail23.json +1 -0
- data/spec/parsing/fixtures/fail24.json +1 -0
- data/spec/parsing/fixtures/fail25.json +1 -0
- data/spec/parsing/fixtures/fail27.json +2 -0
- data/spec/parsing/fixtures/fail28.json +2 -0
- data/spec/parsing/fixtures/fail3.json +1 -0
- data/spec/parsing/fixtures/fail4.json +1 -0
- data/spec/parsing/fixtures/fail5.json +1 -0
- data/spec/parsing/fixtures/fail6.json +1 -0
- data/spec/parsing/fixtures/fail9.json +1 -0
- data/spec/parsing/fixtures/pass.array.json +6 -0
- data/spec/parsing/fixtures/pass.codepoints_from_unicode_org.json +1 -0
- data/spec/parsing/fixtures/pass.contacts.json +1 -0
- data/spec/parsing/fixtures/pass.db100.xml.json +1 -0
- data/spec/parsing/fixtures/pass.db1000.xml.json +1 -0
- data/spec/parsing/fixtures/pass.dc_simple_with_comments.json +11 -0
- data/spec/parsing/fixtures/pass.deep_arrays.json +1 -0
- data/spec/parsing/fixtures/pass.difficult_json_c_test_case.json +1 -0
- data/spec/parsing/fixtures/pass.difficult_json_c_test_case_with_comments.json +1 -0
- data/spec/parsing/fixtures/pass.doubles.json +1 -0
- data/spec/parsing/fixtures/pass.empty_array.json +1 -0
- data/spec/parsing/fixtures/pass.empty_string.json +1 -0
- data/spec/parsing/fixtures/pass.escaped_bulgarian.json +4 -0
- data/spec/parsing/fixtures/pass.escaped_foobar.json +1 -0
- data/spec/parsing/fixtures/pass.item.json +1 -0
- data/spec/parsing/fixtures/pass.json-org-sample1.json +23 -0
- data/spec/parsing/fixtures/pass.json-org-sample2.json +11 -0
- data/spec/parsing/fixtures/pass.json-org-sample3.json +26 -0
- data/spec/parsing/fixtures/pass.json-org-sample4-nows.json +88 -0
- data/spec/parsing/fixtures/pass.json-org-sample4.json +89 -0
- data/spec/parsing/fixtures/pass.json-org-sample5.json +27 -0
- data/spec/parsing/fixtures/pass.map-spain.xml.json +1 -0
- data/spec/parsing/fixtures/pass.ns-invoice100.xml.json +1 -0
- data/spec/parsing/fixtures/pass.ns-soap.xml.json +1 -0
- data/spec/parsing/fixtures/pass.numbers-fp-4k.json +6 -0
- data/spec/parsing/fixtures/pass.numbers-fp-64k.json +61 -0
- data/spec/parsing/fixtures/pass.numbers-int-4k.json +11 -0
- data/spec/parsing/fixtures/pass.numbers-int-64k.json +154 -0
- data/spec/parsing/fixtures/pass.twitter-search.json +1 -0
- data/spec/parsing/fixtures/pass.twitter-search2.json +1 -0
- data/spec/parsing/fixtures/pass.unicode.json +3315 -0
- data/spec/parsing/fixtures/pass.yelp.json +1 -0
- data/spec/parsing/fixtures/pass1.json +56 -0
- data/spec/parsing/fixtures/pass2.json +1 -0
- data/spec/parsing/fixtures/pass3.json +6 -0
- data/spec/parsing/fixtures_spec.rb +40 -0
- data/spec/parsing/one_off_spec.rb +85 -0
- data/spec/rcov.opts +3 -0
- data/spec/spec_helper.rb +16 -0
- data/tasks/compile.rake +35 -0
- data/tasks/rspec.rake +16 -0
- data/yajl-ruby.gemspec +24 -0
- metadata +335 -0
data/ext/yajl/yajl_lex.c
ADDED
@@ -0,0 +1,748 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright 2010, Lloyd Hilaiel.
|
3
|
+
*
|
4
|
+
* Redistribution and use in source and binary forms, with or without
|
5
|
+
* modification, are permitted provided that the following conditions are
|
6
|
+
* met:
|
7
|
+
*
|
8
|
+
* 1. Redistributions of source code must retain the above copyright
|
9
|
+
* notice, this list of conditions and the following disclaimer.
|
10
|
+
*
|
11
|
+
* 2. Redistributions in binary form must reproduce the above copyright
|
12
|
+
* notice, this list of conditions and the following disclaimer in
|
13
|
+
* the documentation and/or other materials provided with the
|
14
|
+
* distribution.
|
15
|
+
*
|
16
|
+
* 3. Neither the name of Lloyd Hilaiel nor the names of its
|
17
|
+
* contributors may be used to endorse or promote products derived
|
18
|
+
* from this software without specific prior written permission.
|
19
|
+
*
|
20
|
+
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
21
|
+
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
22
|
+
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
23
|
+
* DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
|
24
|
+
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
25
|
+
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
26
|
+
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
27
|
+
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
28
|
+
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
|
29
|
+
* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
30
|
+
* POSSIBILITY OF SUCH DAMAGE.
|
31
|
+
*/
|
32
|
+
|
33
|
+
#include "yajl_lex.h"
|
34
|
+
#include "yajl_buf.h"
|
35
|
+
|
36
|
+
#include <stdlib.h>
|
37
|
+
#include <stdio.h>
|
38
|
+
#include <assert.h>
|
39
|
+
#include <string.h>
|
40
|
+
|
41
|
+
#ifdef YAJL_LEXER_DEBUG
|
42
|
+
static const char *
|
43
|
+
tokToStr(yajl_tok tok)
|
44
|
+
{
|
45
|
+
switch (tok) {
|
46
|
+
case yajl_tok_bool: return "bool";
|
47
|
+
case yajl_tok_colon: return "colon";
|
48
|
+
case yajl_tok_comma: return "comma";
|
49
|
+
case yajl_tok_eof: return "eof";
|
50
|
+
case yajl_tok_error: return "error";
|
51
|
+
case yajl_tok_left_brace: return "brace";
|
52
|
+
case yajl_tok_left_bracket: return "bracket";
|
53
|
+
case yajl_tok_null: return "null";
|
54
|
+
case yajl_tok_integer: return "integer";
|
55
|
+
case yajl_tok_double: return "double";
|
56
|
+
case yajl_tok_right_brace: return "brace";
|
57
|
+
case yajl_tok_right_bracket: return "bracket";
|
58
|
+
case yajl_tok_string: return "string";
|
59
|
+
case yajl_tok_string_with_escapes: return "string_with_escapes";
|
60
|
+
}
|
61
|
+
return "unknown";
|
62
|
+
}
|
63
|
+
#endif
|
64
|
+
|
65
|
+
/* Impact of the stream parsing feature on the lexer:
|
66
|
+
*
|
67
|
+
* YAJL support stream parsing. That is, the ability to parse the first
|
68
|
+
* bits of a chunk of JSON before the last bits are available (still on
|
69
|
+
* the network or disk). This makes the lexer more complex. The
|
70
|
+
* responsibility of the lexer is to handle transparently the case where
|
71
|
+
* a chunk boundary falls in the middle of a token. This is
|
72
|
+
* accomplished is via a buffer and a character reading abstraction.
|
73
|
+
*
|
74
|
+
* Overview of implementation
|
75
|
+
*
|
76
|
+
* When we lex to end of input string before end of token is hit, we
|
77
|
+
* copy all of the input text composing the token into our lexBuf.
|
78
|
+
*
|
79
|
+
* Every time we read a character, we do so through the readChar function.
|
80
|
+
* readChar's responsibility is to handle pulling all chars from the buffer
|
81
|
+
* before pulling chars from input text
|
82
|
+
*/
|
83
|
+
|
84
|
+
struct yajl_lexer_t {
|
85
|
+
/* the overal line and char offset into the data */
|
86
|
+
unsigned int lineOff;
|
87
|
+
unsigned int charOff;
|
88
|
+
|
89
|
+
/* error */
|
90
|
+
yajl_lex_error error;
|
91
|
+
|
92
|
+
/* a input buffer to handle the case where a token is spread over
|
93
|
+
* multiple chunks */
|
94
|
+
yajl_buf buf;
|
95
|
+
|
96
|
+
/* in the case where we have data in the lexBuf, bufOff holds
|
97
|
+
* the current offset into the lexBuf. */
|
98
|
+
unsigned int bufOff;
|
99
|
+
|
100
|
+
/* are we using the lex buf? */
|
101
|
+
unsigned int bufInUse;
|
102
|
+
|
103
|
+
/* shall we allow comments? */
|
104
|
+
unsigned int allowComments;
|
105
|
+
|
106
|
+
/* shall we validate utf8 inside strings? */
|
107
|
+
unsigned int validateUTF8;
|
108
|
+
|
109
|
+
yajl_alloc_funcs * alloc;
|
110
|
+
};
|
111
|
+
|
112
|
+
#define readChar(lxr, txt, off) \
|
113
|
+
(((lxr)->bufInUse && yajl_buf_len((lxr)->buf) && lxr->bufOff < yajl_buf_len((lxr)->buf)) ? \
|
114
|
+
(*((const unsigned char *) yajl_buf_data((lxr)->buf) + ((lxr)->bufOff)++)) : \
|
115
|
+
((txt)[(*(off))++]))
|
116
|
+
|
117
|
+
#define unreadChar(lxr, off) ((*(off) > 0) ? (*(off))-- : ((lxr)->bufOff--))
|
118
|
+
|
119
|
+
yajl_lexer
|
120
|
+
yajl_lex_alloc(yajl_alloc_funcs * alloc,
|
121
|
+
unsigned int allowComments, unsigned int validateUTF8)
|
122
|
+
{
|
123
|
+
yajl_lexer lxr = (yajl_lexer) YA_MALLOC(alloc, sizeof(struct yajl_lexer_t));
|
124
|
+
memset((void *) lxr, 0, sizeof(struct yajl_lexer_t));
|
125
|
+
lxr->buf = yajl_buf_alloc(alloc);
|
126
|
+
lxr->allowComments = allowComments;
|
127
|
+
lxr->validateUTF8 = validateUTF8;
|
128
|
+
lxr->alloc = alloc;
|
129
|
+
return lxr;
|
130
|
+
}
|
131
|
+
|
132
|
+
yajl_lexer
|
133
|
+
yajl_lex_realloc(yajl_lexer orig) {
|
134
|
+
orig->lineOff = 0;
|
135
|
+
orig->charOff = 0;
|
136
|
+
orig->error = yajl_lex_e_ok;
|
137
|
+
yajl_buf_clear(orig->buf);
|
138
|
+
orig->bufOff = 0;
|
139
|
+
orig->bufInUse = 0;
|
140
|
+
return orig;
|
141
|
+
}
|
142
|
+
|
143
|
+
void
|
144
|
+
yajl_lex_free(yajl_lexer lxr)
|
145
|
+
{
|
146
|
+
yajl_buf_free(lxr->buf);
|
147
|
+
YA_FREE(lxr->alloc, lxr);
|
148
|
+
return;
|
149
|
+
}
|
150
|
+
|
151
|
+
/* a lookup table which lets us quickly determine three things:
|
152
|
+
* VEC - valid escaped conrol char
|
153
|
+
* IJC - invalid json char
|
154
|
+
* VHC - valid hex char
|
155
|
+
* note. the solidus '/' may be escaped or not.
|
156
|
+
* note. the
|
157
|
+
*/
|
158
|
+
#define VEC 1
|
159
|
+
#define IJC 2
|
160
|
+
#define VHC 4
|
161
|
+
static const char charLookupTable[256] =
|
162
|
+
{
|
163
|
+
/*00*/ IJC , IJC , IJC , IJC , IJC , IJC , IJC , IJC ,
|
164
|
+
/*08*/ IJC , IJC , IJC , IJC , IJC , IJC , IJC , IJC ,
|
165
|
+
/*10*/ IJC , IJC , IJC , IJC , IJC , IJC , IJC , IJC ,
|
166
|
+
/*18*/ IJC , IJC , IJC , IJC , IJC , IJC , IJC , IJC ,
|
167
|
+
|
168
|
+
/*20*/ 0 , 0 , VEC|IJC, 0 , 0 , 0 , 0 , 0 ,
|
169
|
+
/*28*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , VEC ,
|
170
|
+
/*30*/ VHC , VHC , VHC , VHC , VHC , VHC , VHC , VHC ,
|
171
|
+
/*38*/ VHC , VHC , 0 , 0 , 0 , 0 , 0 , 0 ,
|
172
|
+
|
173
|
+
/*40*/ 0 , VHC , VHC , VHC , VHC , VHC , VHC , 0 ,
|
174
|
+
/*48*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
|
175
|
+
/*50*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
|
176
|
+
/*58*/ 0 , 0 , 0 , 0 , VEC|IJC, 0 , 0 , 0 ,
|
177
|
+
|
178
|
+
/*60*/ 0 , VHC , VEC|VHC, VHC , VHC , VHC , VEC|VHC, 0 ,
|
179
|
+
/*68*/ 0 , 0 , 0 , 0 , 0 , 0 , VEC , 0 ,
|
180
|
+
/*70*/ 0 , 0 , VEC , 0 , VEC , 0 , 0 , 0 ,
|
181
|
+
/*78*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
|
182
|
+
|
183
|
+
/* include these so we don't have to always check the range of the char */
|
184
|
+
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
|
185
|
+
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
|
186
|
+
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
|
187
|
+
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
|
188
|
+
|
189
|
+
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
|
190
|
+
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
|
191
|
+
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
|
192
|
+
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
|
193
|
+
|
194
|
+
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
|
195
|
+
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
|
196
|
+
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
|
197
|
+
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
|
198
|
+
|
199
|
+
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
|
200
|
+
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
|
201
|
+
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
|
202
|
+
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0
|
203
|
+
};
|
204
|
+
|
205
|
+
/** process a variable length utf8 encoded codepoint.
|
206
|
+
*
|
207
|
+
* returns:
|
208
|
+
* yajl_tok_string - if valid utf8 char was parsed and offset was
|
209
|
+
* advanced
|
210
|
+
* yajl_tok_eof - if end of input was hit before validation could
|
211
|
+
* complete
|
212
|
+
* yajl_tok_error - if invalid utf8 was encountered
|
213
|
+
*
|
214
|
+
* NOTE: on error the offset will point to the first char of the
|
215
|
+
* invalid utf8 */
|
216
|
+
#define UTF8_CHECK_EOF if (*offset >= jsonTextLen) { return yajl_tok_eof; }
|
217
|
+
|
218
|
+
static yajl_tok
|
219
|
+
yajl_lex_utf8_char(yajl_lexer lexer, const unsigned char * jsonText,
|
220
|
+
unsigned int jsonTextLen, unsigned int * offset,
|
221
|
+
unsigned char curChar)
|
222
|
+
{
|
223
|
+
if (curChar <= 0x7f) {
|
224
|
+
/* single byte */
|
225
|
+
return yajl_tok_string;
|
226
|
+
} else if ((curChar >> 5) == 0x6) {
|
227
|
+
/* two byte */
|
228
|
+
UTF8_CHECK_EOF;
|
229
|
+
curChar = readChar(lexer, jsonText, offset);
|
230
|
+
if ((curChar >> 6) == 0x2) return yajl_tok_string;
|
231
|
+
} else if ((curChar >> 4) == 0x0e) {
|
232
|
+
/* three byte */
|
233
|
+
UTF8_CHECK_EOF;
|
234
|
+
curChar = readChar(lexer, jsonText, offset);
|
235
|
+
if ((curChar >> 6) == 0x2) {
|
236
|
+
UTF8_CHECK_EOF;
|
237
|
+
curChar = readChar(lexer, jsonText, offset);
|
238
|
+
if ((curChar >> 6) == 0x2) return yajl_tok_string;
|
239
|
+
}
|
240
|
+
} else if ((curChar >> 3) == 0x1e) {
|
241
|
+
/* four byte */
|
242
|
+
UTF8_CHECK_EOF;
|
243
|
+
curChar = readChar(lexer, jsonText, offset);
|
244
|
+
if ((curChar >> 6) == 0x2) {
|
245
|
+
UTF8_CHECK_EOF;
|
246
|
+
curChar = readChar(lexer, jsonText, offset);
|
247
|
+
if ((curChar >> 6) == 0x2) {
|
248
|
+
UTF8_CHECK_EOF;
|
249
|
+
curChar = readChar(lexer, jsonText, offset);
|
250
|
+
if ((curChar >> 6) == 0x2) return yajl_tok_string;
|
251
|
+
}
|
252
|
+
}
|
253
|
+
}
|
254
|
+
|
255
|
+
return yajl_tok_error;
|
256
|
+
}
|
257
|
+
|
258
|
+
/* lex a string. input is the lexer, pointer to beginning of
|
259
|
+
* json text, and start of string (offset).
|
260
|
+
* a token is returned which has the following meanings:
|
261
|
+
* yajl_tok_string: lex of string was successful. offset points to
|
262
|
+
* terminating '"'.
|
263
|
+
* yajl_tok_eof: end of text was encountered before we could complete
|
264
|
+
* the lex.
|
265
|
+
* yajl_tok_error: embedded in the string were unallowable chars. offset
|
266
|
+
* points to the offending char
|
267
|
+
*/
|
268
|
+
#define STR_CHECK_EOF \
|
269
|
+
if (*offset >= jsonTextLen) { \
|
270
|
+
tok = yajl_tok_eof; \
|
271
|
+
goto finish_string_lex; \
|
272
|
+
}
|
273
|
+
|
274
|
+
static yajl_tok
|
275
|
+
yajl_lex_string(yajl_lexer lexer, const unsigned char * jsonText,
|
276
|
+
unsigned int jsonTextLen, unsigned int * offset)
|
277
|
+
{
|
278
|
+
yajl_tok tok = yajl_tok_error;
|
279
|
+
int hasEscapes = 0;
|
280
|
+
|
281
|
+
for (;;) {
|
282
|
+
unsigned char curChar;
|
283
|
+
|
284
|
+
STR_CHECK_EOF;
|
285
|
+
|
286
|
+
curChar = readChar(lexer, jsonText, offset);
|
287
|
+
|
288
|
+
/* quote terminates */
|
289
|
+
if (curChar == '"') {
|
290
|
+
tok = yajl_tok_string;
|
291
|
+
break;
|
292
|
+
}
|
293
|
+
/* backslash escapes a set of control chars, */
|
294
|
+
else if (curChar == '\\') {
|
295
|
+
hasEscapes = 1;
|
296
|
+
STR_CHECK_EOF;
|
297
|
+
|
298
|
+
/* special case \u */
|
299
|
+
curChar = readChar(lexer, jsonText, offset);
|
300
|
+
if (curChar == 'u') {
|
301
|
+
unsigned int i = 0;
|
302
|
+
|
303
|
+
for (i=0;i<4;i++) {
|
304
|
+
STR_CHECK_EOF;
|
305
|
+
curChar = readChar(lexer, jsonText, offset);
|
306
|
+
if (!(charLookupTable[curChar] & VHC)) {
|
307
|
+
/* back up to offending char */
|
308
|
+
unreadChar(lexer, offset);
|
309
|
+
lexer->error = yajl_lex_string_invalid_hex_char;
|
310
|
+
goto finish_string_lex;
|
311
|
+
}
|
312
|
+
}
|
313
|
+
} else if (!(charLookupTable[curChar] & VEC)) {
|
314
|
+
/* back up to offending char */
|
315
|
+
unreadChar(lexer, offset);
|
316
|
+
lexer->error = yajl_lex_string_invalid_escaped_char;
|
317
|
+
goto finish_string_lex;
|
318
|
+
}
|
319
|
+
}
|
320
|
+
/* when not validating UTF8 it's a simple table lookup to determine
|
321
|
+
* if the present character is invalid */
|
322
|
+
else if(charLookupTable[curChar] & IJC) {
|
323
|
+
/* back up to offending char */
|
324
|
+
unreadChar(lexer, offset);
|
325
|
+
lexer->error = yajl_lex_string_invalid_json_char;
|
326
|
+
goto finish_string_lex;
|
327
|
+
}
|
328
|
+
/* when in validate UTF8 mode we need to do some extra work */
|
329
|
+
else if (lexer->validateUTF8) {
|
330
|
+
yajl_tok t = yajl_lex_utf8_char(lexer, jsonText, jsonTextLen,
|
331
|
+
offset, curChar);
|
332
|
+
|
333
|
+
if (t == yajl_tok_eof) {
|
334
|
+
tok = yajl_tok_eof;
|
335
|
+
goto finish_string_lex;
|
336
|
+
} else if (t == yajl_tok_error) {
|
337
|
+
lexer->error = yajl_lex_string_invalid_utf8;
|
338
|
+
goto finish_string_lex;
|
339
|
+
}
|
340
|
+
}
|
341
|
+
/* accept it, and move on */
|
342
|
+
}
|
343
|
+
finish_string_lex:
|
344
|
+
/* tell our buddy, the parser, wether he needs to process this string
|
345
|
+
* again */
|
346
|
+
if (hasEscapes && tok == yajl_tok_string) {
|
347
|
+
tok = yajl_tok_string_with_escapes;
|
348
|
+
}
|
349
|
+
|
350
|
+
return tok;
|
351
|
+
}
|
352
|
+
|
353
|
+
#define RETURN_IF_EOF if (*offset >= jsonTextLen) return yajl_tok_eof;
|
354
|
+
|
355
|
+
static yajl_tok
|
356
|
+
yajl_lex_number(yajl_lexer lexer, const unsigned char * jsonText,
|
357
|
+
unsigned int jsonTextLen, unsigned int * offset)
|
358
|
+
{
|
359
|
+
/** XXX: numbers are the only entities in json that we must lex
|
360
|
+
* _beyond_ in order to know that they are complete. There
|
361
|
+
* is an ambiguous case for integers at EOF. */
|
362
|
+
|
363
|
+
unsigned char c;
|
364
|
+
|
365
|
+
yajl_tok tok = yajl_tok_integer;
|
366
|
+
|
367
|
+
RETURN_IF_EOF;
|
368
|
+
c = readChar(lexer, jsonText, offset);
|
369
|
+
|
370
|
+
/* optional leading minus */
|
371
|
+
if (c == '-') {
|
372
|
+
RETURN_IF_EOF;
|
373
|
+
c = readChar(lexer, jsonText, offset);
|
374
|
+
}
|
375
|
+
|
376
|
+
/* a single zero, or a series of integers */
|
377
|
+
if (c == '0') {
|
378
|
+
RETURN_IF_EOF;
|
379
|
+
c = readChar(lexer, jsonText, offset);
|
380
|
+
} else if (c >= '1' && c <= '9') {
|
381
|
+
do {
|
382
|
+
RETURN_IF_EOF;
|
383
|
+
c = readChar(lexer, jsonText, offset);
|
384
|
+
} while (c >= '0' && c <= '9');
|
385
|
+
} else {
|
386
|
+
unreadChar(lexer, offset);
|
387
|
+
lexer->error = yajl_lex_missing_integer_after_minus;
|
388
|
+
return yajl_tok_error;
|
389
|
+
}
|
390
|
+
|
391
|
+
/* optional fraction (indicates this is floating point) */
|
392
|
+
if (c == '.') {
|
393
|
+
int numRd = 0;
|
394
|
+
|
395
|
+
RETURN_IF_EOF;
|
396
|
+
c = readChar(lexer, jsonText, offset);
|
397
|
+
|
398
|
+
while (c >= '0' && c <= '9') {
|
399
|
+
numRd++;
|
400
|
+
RETURN_IF_EOF;
|
401
|
+
c = readChar(lexer, jsonText, offset);
|
402
|
+
}
|
403
|
+
|
404
|
+
if (!numRd) {
|
405
|
+
unreadChar(lexer, offset);
|
406
|
+
lexer->error = yajl_lex_missing_integer_after_decimal;
|
407
|
+
return yajl_tok_error;
|
408
|
+
}
|
409
|
+
tok = yajl_tok_double;
|
410
|
+
}
|
411
|
+
|
412
|
+
/* optional exponent (indicates this is floating point) */
|
413
|
+
if (c == 'e' || c == 'E') {
|
414
|
+
RETURN_IF_EOF;
|
415
|
+
c = readChar(lexer, jsonText, offset);
|
416
|
+
|
417
|
+
/* optional sign */
|
418
|
+
if (c == '+' || c == '-') {
|
419
|
+
RETURN_IF_EOF;
|
420
|
+
c = readChar(lexer, jsonText, offset);
|
421
|
+
}
|
422
|
+
|
423
|
+
if (c >= '0' && c <= '9') {
|
424
|
+
do {
|
425
|
+
RETURN_IF_EOF;
|
426
|
+
c = readChar(lexer, jsonText, offset);
|
427
|
+
} while (c >= '0' && c <= '9');
|
428
|
+
} else {
|
429
|
+
unreadChar(lexer, offset);
|
430
|
+
lexer->error = yajl_lex_missing_integer_after_exponent;
|
431
|
+
return yajl_tok_error;
|
432
|
+
}
|
433
|
+
tok = yajl_tok_double;
|
434
|
+
}
|
435
|
+
|
436
|
+
/* we always go "one too far" */
|
437
|
+
unreadChar(lexer, offset);
|
438
|
+
|
439
|
+
return tok;
|
440
|
+
}
|
441
|
+
|
442
|
+
static yajl_tok
|
443
|
+
yajl_lex_comment(yajl_lexer lexer, const unsigned char * jsonText,
|
444
|
+
unsigned int jsonTextLen, unsigned int * offset)
|
445
|
+
{
|
446
|
+
unsigned char c;
|
447
|
+
|
448
|
+
yajl_tok tok = yajl_tok_comment;
|
449
|
+
|
450
|
+
RETURN_IF_EOF;
|
451
|
+
c = readChar(lexer, jsonText, offset);
|
452
|
+
|
453
|
+
/* either slash or star expected */
|
454
|
+
if (c == '/') {
|
455
|
+
/* now we throw away until end of line */
|
456
|
+
do {
|
457
|
+
RETURN_IF_EOF;
|
458
|
+
c = readChar(lexer, jsonText, offset);
|
459
|
+
} while (c != '\n');
|
460
|
+
} else if (c == '*') {
|
461
|
+
/* now we throw away until end of comment */
|
462
|
+
for (;;) {
|
463
|
+
RETURN_IF_EOF;
|
464
|
+
c = readChar(lexer, jsonText, offset);
|
465
|
+
if (c == '*') {
|
466
|
+
RETURN_IF_EOF;
|
467
|
+
c = readChar(lexer, jsonText, offset);
|
468
|
+
if (c == '/') {
|
469
|
+
break;
|
470
|
+
} else {
|
471
|
+
unreadChar(lexer, offset);
|
472
|
+
}
|
473
|
+
}
|
474
|
+
}
|
475
|
+
} else {
|
476
|
+
lexer->error = yajl_lex_invalid_char;
|
477
|
+
tok = yajl_tok_error;
|
478
|
+
}
|
479
|
+
|
480
|
+
return tok;
|
481
|
+
}
|
482
|
+
|
483
|
+
yajl_tok
|
484
|
+
yajl_lex_lex(yajl_lexer lexer, const unsigned char * jsonText,
|
485
|
+
unsigned int jsonTextLen, unsigned int * offset,
|
486
|
+
const unsigned char ** outBuf, unsigned int * outLen)
|
487
|
+
{
|
488
|
+
yajl_tok tok = yajl_tok_error;
|
489
|
+
unsigned char c;
|
490
|
+
unsigned int startOffset = *offset;
|
491
|
+
|
492
|
+
*outBuf = NULL;
|
493
|
+
*outLen = 0;
|
494
|
+
|
495
|
+
for (;;) {
|
496
|
+
assert(*offset <= jsonTextLen);
|
497
|
+
|
498
|
+
if (*offset >= jsonTextLen) {
|
499
|
+
tok = yajl_tok_eof;
|
500
|
+
goto lexed;
|
501
|
+
}
|
502
|
+
|
503
|
+
c = readChar(lexer, jsonText, offset);
|
504
|
+
|
505
|
+
switch (c) {
|
506
|
+
case '{':
|
507
|
+
tok = yajl_tok_left_bracket;
|
508
|
+
goto lexed;
|
509
|
+
case '}':
|
510
|
+
tok = yajl_tok_right_bracket;
|
511
|
+
goto lexed;
|
512
|
+
case '[':
|
513
|
+
tok = yajl_tok_left_brace;
|
514
|
+
goto lexed;
|
515
|
+
case ']':
|
516
|
+
tok = yajl_tok_right_brace;
|
517
|
+
goto lexed;
|
518
|
+
case ',':
|
519
|
+
tok = yajl_tok_comma;
|
520
|
+
goto lexed;
|
521
|
+
case ':':
|
522
|
+
tok = yajl_tok_colon;
|
523
|
+
goto lexed;
|
524
|
+
case '\t': case '\n': case '\v': case '\f': case '\r': case ' ':
|
525
|
+
startOffset++;
|
526
|
+
break;
|
527
|
+
case 't': {
|
528
|
+
const char * want = "rue";
|
529
|
+
do {
|
530
|
+
if (*offset >= jsonTextLen) {
|
531
|
+
tok = yajl_tok_eof;
|
532
|
+
goto lexed;
|
533
|
+
}
|
534
|
+
c = readChar(lexer, jsonText, offset);
|
535
|
+
if (c != *want) {
|
536
|
+
unreadChar(lexer, offset);
|
537
|
+
lexer->error = yajl_lex_invalid_string;
|
538
|
+
tok = yajl_tok_error;
|
539
|
+
goto lexed;
|
540
|
+
}
|
541
|
+
} while (*(++want));
|
542
|
+
tok = yajl_tok_bool;
|
543
|
+
goto lexed;
|
544
|
+
}
|
545
|
+
case 'f': {
|
546
|
+
const char * want = "alse";
|
547
|
+
do {
|
548
|
+
if (*offset >= jsonTextLen) {
|
549
|
+
tok = yajl_tok_eof;
|
550
|
+
goto lexed;
|
551
|
+
}
|
552
|
+
c = readChar(lexer, jsonText, offset);
|
553
|
+
if (c != *want) {
|
554
|
+
unreadChar(lexer, offset);
|
555
|
+
lexer->error = yajl_lex_invalid_string;
|
556
|
+
tok = yajl_tok_error;
|
557
|
+
goto lexed;
|
558
|
+
}
|
559
|
+
} while (*(++want));
|
560
|
+
tok = yajl_tok_bool;
|
561
|
+
goto lexed;
|
562
|
+
}
|
563
|
+
case 'n': {
|
564
|
+
const char * want = "ull";
|
565
|
+
do {
|
566
|
+
if (*offset >= jsonTextLen) {
|
567
|
+
tok = yajl_tok_eof;
|
568
|
+
goto lexed;
|
569
|
+
}
|
570
|
+
c = readChar(lexer, jsonText, offset);
|
571
|
+
if (c != *want) {
|
572
|
+
unreadChar(lexer, offset);
|
573
|
+
lexer->error = yajl_lex_invalid_string;
|
574
|
+
tok = yajl_tok_error;
|
575
|
+
goto lexed;
|
576
|
+
}
|
577
|
+
} while (*(++want));
|
578
|
+
tok = yajl_tok_null;
|
579
|
+
goto lexed;
|
580
|
+
}
|
581
|
+
case '"': {
|
582
|
+
tok = yajl_lex_string(lexer, (const unsigned char *) jsonText,
|
583
|
+
jsonTextLen, offset);
|
584
|
+
goto lexed;
|
585
|
+
}
|
586
|
+
case '-':
|
587
|
+
case '0': case '1': case '2': case '3': case '4':
|
588
|
+
case '5': case '6': case '7': case '8': case '9': {
|
589
|
+
/* integer parsing wants to start from the beginning */
|
590
|
+
unreadChar(lexer, offset);
|
591
|
+
tok = yajl_lex_number(lexer, (const unsigned char *) jsonText,
|
592
|
+
jsonTextLen, offset);
|
593
|
+
goto lexed;
|
594
|
+
}
|
595
|
+
case '/':
|
596
|
+
/* hey, look, a probable comment! If comments are disabled
|
597
|
+
* it's an error. */
|
598
|
+
if (!lexer->allowComments) {
|
599
|
+
unreadChar(lexer, offset);
|
600
|
+
lexer->error = yajl_lex_unallowed_comment;
|
601
|
+
tok = yajl_tok_error;
|
602
|
+
goto lexed;
|
603
|
+
}
|
604
|
+
/* if comments are enabled, then we should try to lex
|
605
|
+
* the thing. possible outcomes are
|
606
|
+
* - successful lex (tok_comment, which means continue),
|
607
|
+
* - malformed comment opening (slash not followed by
|
608
|
+
* '*' or '/') (tok_error)
|
609
|
+
* - eof hit. (tok_eof) */
|
610
|
+
tok = yajl_lex_comment(lexer, (const unsigned char *) jsonText,
|
611
|
+
jsonTextLen, offset);
|
612
|
+
if (tok == yajl_tok_comment) {
|
613
|
+
/* "error" is silly, but that's the initial
|
614
|
+
* state of tok. guilty until proven innocent. */
|
615
|
+
tok = yajl_tok_error;
|
616
|
+
yajl_buf_clear(lexer->buf);
|
617
|
+
lexer->bufInUse = 0;
|
618
|
+
startOffset = *offset;
|
619
|
+
break;
|
620
|
+
}
|
621
|
+
/* hit error or eof, bail */
|
622
|
+
goto lexed;
|
623
|
+
default:
|
624
|
+
lexer->error = yajl_lex_invalid_char;
|
625
|
+
tok = yajl_tok_error;
|
626
|
+
goto lexed;
|
627
|
+
}
|
628
|
+
}
|
629
|
+
|
630
|
+
|
631
|
+
lexed:
|
632
|
+
/* need to append to buffer if the buffer is in use or
|
633
|
+
* if it's an EOF token */
|
634
|
+
if (tok == yajl_tok_eof || lexer->bufInUse) {
|
635
|
+
if (!lexer->bufInUse) yajl_buf_clear(lexer->buf);
|
636
|
+
lexer->bufInUse = 1;
|
637
|
+
yajl_buf_append(lexer->buf, jsonText + startOffset, *offset - startOffset);
|
638
|
+
lexer->bufOff = 0;
|
639
|
+
|
640
|
+
if (tok != yajl_tok_eof) {
|
641
|
+
*outBuf = yajl_buf_data(lexer->buf);
|
642
|
+
*outLen = yajl_buf_len(lexer->buf);
|
643
|
+
lexer->bufInUse = 0;
|
644
|
+
}
|
645
|
+
} else if (tok != yajl_tok_error) {
|
646
|
+
*outBuf = jsonText + startOffset;
|
647
|
+
*outLen = *offset - startOffset;
|
648
|
+
}
|
649
|
+
|
650
|
+
/* special case for strings. skip the quotes. */
|
651
|
+
if (tok == yajl_tok_string || tok == yajl_tok_string_with_escapes)
|
652
|
+
{
|
653
|
+
assert(*outLen >= 2);
|
654
|
+
(*outBuf)++;
|
655
|
+
*outLen -= 2;
|
656
|
+
}
|
657
|
+
|
658
|
+
|
659
|
+
#ifdef YAJL_LEXER_DEBUG
|
660
|
+
if (tok == yajl_tok_error) {
|
661
|
+
printf("lexical error: %s\n",
|
662
|
+
yajl_lex_error_to_string(yajl_lex_get_error(lexer)));
|
663
|
+
} else if (tok == yajl_tok_eof) {
|
664
|
+
printf("EOF hit\n");
|
665
|
+
} else {
|
666
|
+
printf("lexed %s: '", tokToStr(tok));
|
667
|
+
fwrite(*outBuf, 1, *outLen, stdout);
|
668
|
+
printf("'\n");
|
669
|
+
}
|
670
|
+
#endif
|
671
|
+
|
672
|
+
return tok;
|
673
|
+
}
|
674
|
+
|
675
|
+
const char *
|
676
|
+
yajl_lex_error_to_string(yajl_lex_error error)
|
677
|
+
{
|
678
|
+
switch (error) {
|
679
|
+
case yajl_lex_e_ok:
|
680
|
+
return "ok, no error";
|
681
|
+
case yajl_lex_string_invalid_utf8:
|
682
|
+
return "invalid bytes in UTF8 string.";
|
683
|
+
case yajl_lex_string_invalid_escaped_char:
|
684
|
+
return "inside a string, '\\' occurs before a character "
|
685
|
+
"which it may not.";
|
686
|
+
case yajl_lex_string_invalid_json_char:
|
687
|
+
return "invalid character inside string.";
|
688
|
+
case yajl_lex_string_invalid_hex_char:
|
689
|
+
return "invalid (non-hex) character occurs after '\\u' inside "
|
690
|
+
"string.";
|
691
|
+
case yajl_lex_invalid_char:
|
692
|
+
return "invalid char in json text.";
|
693
|
+
case yajl_lex_invalid_string:
|
694
|
+
return "invalid string in json text.";
|
695
|
+
case yajl_lex_missing_integer_after_exponent:
|
696
|
+
return "malformed number, a digit is required after the exponent.";
|
697
|
+
case yajl_lex_missing_integer_after_decimal:
|
698
|
+
return "malformed number, a digit is required after the "
|
699
|
+
"decimal point.";
|
700
|
+
case yajl_lex_missing_integer_after_minus:
|
701
|
+
return "malformed number, a digit is required after the "
|
702
|
+
"minus sign.";
|
703
|
+
case yajl_lex_unallowed_comment:
|
704
|
+
return "probable comment found in input text, comments are "
|
705
|
+
"not enabled.";
|
706
|
+
}
|
707
|
+
return "unknown error code";
|
708
|
+
}
|
709
|
+
|
710
|
+
|
711
|
+
/** allows access to more specific information about the lexical
|
712
|
+
* error when yajl_lex_lex returns yajl_tok_error. */
|
713
|
+
yajl_lex_error
|
714
|
+
yajl_lex_get_error(yajl_lexer lexer)
|
715
|
+
{
|
716
|
+
if (lexer == NULL) return (yajl_lex_error) -1;
|
717
|
+
return lexer->error;
|
718
|
+
}
|
719
|
+
|
720
|
+
unsigned int yajl_lex_current_line(yajl_lexer lexer)
|
721
|
+
{
|
722
|
+
return lexer->lineOff;
|
723
|
+
}
|
724
|
+
|
725
|
+
unsigned int yajl_lex_current_char(yajl_lexer lexer)
|
726
|
+
{
|
727
|
+
return lexer->charOff;
|
728
|
+
}
|
729
|
+
|
730
|
+
yajl_tok yajl_lex_peek(yajl_lexer lexer, const unsigned char * jsonText,
|
731
|
+
unsigned int jsonTextLen, unsigned int offset)
|
732
|
+
{
|
733
|
+
const unsigned char * outBuf;
|
734
|
+
unsigned int outLen;
|
735
|
+
unsigned int bufLen = yajl_buf_len(lexer->buf);
|
736
|
+
unsigned int bufOff = lexer->bufOff;
|
737
|
+
unsigned int bufInUse = lexer->bufInUse;
|
738
|
+
yajl_tok tok;
|
739
|
+
|
740
|
+
tok = yajl_lex_lex(lexer, jsonText, jsonTextLen, &offset,
|
741
|
+
&outBuf, &outLen);
|
742
|
+
|
743
|
+
lexer->bufOff = bufOff;
|
744
|
+
lexer->bufInUse = bufInUse;
|
745
|
+
yajl_buf_truncate(lexer->buf, bufLen);
|
746
|
+
|
747
|
+
return tok;
|
748
|
+
}
|