yajl-ruby 0.5.5

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of yajl-ruby might be problematic. Click here for more details.

Files changed (129) hide show
  1. data/.gitignore +5 -0
  2. data/CHANGELOG.md +164 -0
  3. data/MIT-LICENSE +20 -0
  4. data/README.rdoc +264 -0
  5. data/Rakefile +29 -0
  6. data/VERSION.yml +4 -0
  7. data/benchmark/encode.rb +46 -0
  8. data/benchmark/encode_json_and_marshal.rb +35 -0
  9. data/benchmark/encode_json_and_yaml.rb +47 -0
  10. data/benchmark/http.rb +30 -0
  11. data/benchmark/parse.rb +49 -0
  12. data/benchmark/parse_json_and_marshal.rb +47 -0
  13. data/benchmark/parse_json_and_yaml.rb +56 -0
  14. data/benchmark/parse_stream.rb +48 -0
  15. data/benchmark/subjects/contacts.json +1 -0
  16. data/benchmark/subjects/contacts.marshal_dump +0 -0
  17. data/benchmark/subjects/contacts.yml +114685 -0
  18. data/benchmark/subjects/item.json +1 -0
  19. data/benchmark/subjects/ohai.json +1216 -0
  20. data/benchmark/subjects/twitter_search.json +1 -0
  21. data/benchmark/subjects/twitter_stream.json +430 -0
  22. data/benchmark/subjects/unicode.json +1 -0
  23. data/examples/http/twitter_search_api.rb +15 -0
  24. data/examples/http/twitter_stream_api.rb +25 -0
  25. data/examples/parsing/from_file.rb +14 -0
  26. data/examples/parsing/from_stdin.rb +9 -0
  27. data/examples/parsing/from_string.rb +15 -0
  28. data/ext/api/yajl_common.h +85 -0
  29. data/ext/api/yajl_gen.h +123 -0
  30. data/ext/api/yajl_parse.h +182 -0
  31. data/ext/extconf.rb +8 -0
  32. data/ext/yajl.c +157 -0
  33. data/ext/yajl_alloc.c +65 -0
  34. data/ext/yajl_alloc.h +50 -0
  35. data/ext/yajl_buf.c +119 -0
  36. data/ext/yajl_buf.h +73 -0
  37. data/ext/yajl_bytestack.h +85 -0
  38. data/ext/yajl_encode.c +179 -0
  39. data/ext/yajl_encode.h +44 -0
  40. data/ext/yajl_ext.c +774 -0
  41. data/ext/yajl_ext.h +74 -0
  42. data/ext/yajl_gen.c +290 -0
  43. data/ext/yajl_lex.c +744 -0
  44. data/ext/yajl_lex.h +135 -0
  45. data/ext/yajl_parser.c +447 -0
  46. data/ext/yajl_parser.h +79 -0
  47. data/lib/yajl.rb +80 -0
  48. data/lib/yajl/bzip2.rb +11 -0
  49. data/lib/yajl/bzip2/stream_reader.rb +29 -0
  50. data/lib/yajl/bzip2/stream_writer.rb +15 -0
  51. data/lib/yajl/deflate.rb +6 -0
  52. data/lib/yajl/deflate/stream_reader.rb +37 -0
  53. data/lib/yajl/deflate/stream_writer.rb +21 -0
  54. data/lib/yajl/gzip.rb +6 -0
  55. data/lib/yajl/gzip/stream_reader.rb +28 -0
  56. data/lib/yajl/gzip/stream_writer.rb +14 -0
  57. data/lib/yajl/http_stream.rb +101 -0
  58. data/lib/yajl/json_gem.rb +69 -0
  59. data/spec/encoding/encoding_spec.rb +186 -0
  60. data/spec/http/fixtures/http.bzip2.dump +0 -0
  61. data/spec/http/fixtures/http.deflate.dump +0 -0
  62. data/spec/http/fixtures/http.gzip.dump +0 -0
  63. data/spec/http/fixtures/http.raw.dump +12 -0
  64. data/spec/http/http_spec.rb +94 -0
  65. data/spec/json_gem_compatibility/compatibility_spec.rb +170 -0
  66. data/spec/parsing/active_support_spec.rb +68 -0
  67. data/spec/parsing/chunked_spec.rb +98 -0
  68. data/spec/parsing/fixtures/fail.15.json +1 -0
  69. data/spec/parsing/fixtures/fail.16.json +1 -0
  70. data/spec/parsing/fixtures/fail.17.json +1 -0
  71. data/spec/parsing/fixtures/fail.26.json +1 -0
  72. data/spec/parsing/fixtures/fail11.json +1 -0
  73. data/spec/parsing/fixtures/fail12.json +1 -0
  74. data/spec/parsing/fixtures/fail13.json +1 -0
  75. data/spec/parsing/fixtures/fail14.json +1 -0
  76. data/spec/parsing/fixtures/fail19.json +1 -0
  77. data/spec/parsing/fixtures/fail20.json +1 -0
  78. data/spec/parsing/fixtures/fail21.json +1 -0
  79. data/spec/parsing/fixtures/fail22.json +1 -0
  80. data/spec/parsing/fixtures/fail23.json +1 -0
  81. data/spec/parsing/fixtures/fail24.json +1 -0
  82. data/spec/parsing/fixtures/fail25.json +1 -0
  83. data/spec/parsing/fixtures/fail27.json +2 -0
  84. data/spec/parsing/fixtures/fail28.json +2 -0
  85. data/spec/parsing/fixtures/fail3.json +1 -0
  86. data/spec/parsing/fixtures/fail4.json +1 -0
  87. data/spec/parsing/fixtures/fail5.json +1 -0
  88. data/spec/parsing/fixtures/fail6.json +1 -0
  89. data/spec/parsing/fixtures/fail9.json +1 -0
  90. data/spec/parsing/fixtures/pass.array.json +6 -0
  91. data/spec/parsing/fixtures/pass.codepoints_from_unicode_org.json +1 -0
  92. data/spec/parsing/fixtures/pass.contacts.json +1 -0
  93. data/spec/parsing/fixtures/pass.db100.xml.json +1 -0
  94. data/spec/parsing/fixtures/pass.db1000.xml.json +1 -0
  95. data/spec/parsing/fixtures/pass.dc_simple_with_comments.json +11 -0
  96. data/spec/parsing/fixtures/pass.deep_arrays.json +1 -0
  97. data/spec/parsing/fixtures/pass.difficult_json_c_test_case.json +1 -0
  98. data/spec/parsing/fixtures/pass.difficult_json_c_test_case_with_comments.json +1 -0
  99. data/spec/parsing/fixtures/pass.doubles.json +1 -0
  100. data/spec/parsing/fixtures/pass.empty_array.json +1 -0
  101. data/spec/parsing/fixtures/pass.empty_string.json +1 -0
  102. data/spec/parsing/fixtures/pass.escaped_bulgarian.json +4 -0
  103. data/spec/parsing/fixtures/pass.escaped_foobar.json +1 -0
  104. data/spec/parsing/fixtures/pass.item.json +1 -0
  105. data/spec/parsing/fixtures/pass.json-org-sample1.json +23 -0
  106. data/spec/parsing/fixtures/pass.json-org-sample2.json +11 -0
  107. data/spec/parsing/fixtures/pass.json-org-sample3.json +26 -0
  108. data/spec/parsing/fixtures/pass.json-org-sample4-nows.json +88 -0
  109. data/spec/parsing/fixtures/pass.json-org-sample4.json +89 -0
  110. data/spec/parsing/fixtures/pass.json-org-sample5.json +27 -0
  111. data/spec/parsing/fixtures/pass.map-spain.xml.json +1 -0
  112. data/spec/parsing/fixtures/pass.ns-invoice100.xml.json +1 -0
  113. data/spec/parsing/fixtures/pass.ns-soap.xml.json +1 -0
  114. data/spec/parsing/fixtures/pass.numbers-fp-4k.json +6 -0
  115. data/spec/parsing/fixtures/pass.numbers-fp-64k.json +61 -0
  116. data/spec/parsing/fixtures/pass.numbers-int-4k.json +11 -0
  117. data/spec/parsing/fixtures/pass.numbers-int-64k.json +154 -0
  118. data/spec/parsing/fixtures/pass.twitter-search.json +1 -0
  119. data/spec/parsing/fixtures/pass.twitter-search2.json +1 -0
  120. data/spec/parsing/fixtures/pass.unicode.json +3315 -0
  121. data/spec/parsing/fixtures/pass.yelp.json +1 -0
  122. data/spec/parsing/fixtures/pass1.json +56 -0
  123. data/spec/parsing/fixtures/pass2.json +1 -0
  124. data/spec/parsing/fixtures/pass3.json +6 -0
  125. data/spec/parsing/fixtures_spec.rb +45 -0
  126. data/spec/parsing/one_off_spec.rb +58 -0
  127. data/spec/spec_helper.rb +11 -0
  128. data/yajl-ruby.gemspec +176 -0
  129. metadata +196 -0
data/ext/yajl_ext.h ADDED
@@ -0,0 +1,74 @@
1
+ #include "api/yajl_parse.h"
2
+ #include "api/yajl_gen.h"
3
+ #include <ruby.h>
4
+
5
+ #define READ_BUFSIZE 8092
6
+ #define WRITE_BUFSIZE 8092
7
+
8
+ static VALUE cParseError, cEncodeError, mYajl, cParser, cEncoder;
9
+ static ID intern_io_read, intern_eof, intern_call, intern_keys, intern_to_s, intern_to_json,
10
+ sym_allow_comments, sym_check_utf8, sym_pretty, sym_indent, sym_symbolize_keys;
11
+
12
+ #define GetParser(obj, sval) (sval = (struct yajl_parser_wrapper*)DATA_PTR(obj));
13
+ #define GetEncoder(obj, sval) (sval = (yajl_gen*)DATA_PTR(obj));
14
+
15
+ inline void yajl_check_and_fire_callback(void * ctx);
16
+ inline void yajl_set_static_value(void * ctx, VALUE val);
17
+ void yajl_encode_part(yajl_gen hand, VALUE obj, VALUE io);
18
+ void yajl_parse_chunk(const unsigned char * chunk, unsigned int len, yajl_handle parser);
19
+
20
+ static int yajl_found_null(void * ctx);
21
+ static int yajl_found_boolean(void * ctx, int boolean);
22
+ static int yajl_found_number(void * ctx, const char * numberVal, unsigned int numberLen);
23
+ static int yajl_found_string(void * ctx, const unsigned char * stringVal, unsigned int stringLen);
24
+ static int yajl_found_hash_key(void * ctx, const unsigned char * stringVal, unsigned int stringLen);
25
+ static int yajl_found_start_hash(void * ctx);
26
+ static int yajl_found_end_hash(void * ctx);
27
+ static int yajl_found_start_array(void * ctx);
28
+ static int yajl_found_end_array(void * ctx);
29
+ static yajl_callbacks callbacks = {
30
+ yajl_found_null,
31
+ yajl_found_boolean,
32
+ NULL,
33
+ NULL,
34
+ yajl_found_number,
35
+ yajl_found_string,
36
+ yajl_found_start_hash,
37
+ yajl_found_hash_key,
38
+ yajl_found_end_hash,
39
+ yajl_found_start_array,
40
+ yajl_found_end_array
41
+ };
42
+
43
+ struct yajl_parser_wrapper {
44
+ VALUE builderStack;
45
+ VALUE parse_complete_callback;
46
+ int nestedArrayLevel;
47
+ int nestedHashLevel;
48
+ int objectsFound;
49
+ int symbolizeKeys;
50
+ yajl_handle parser;
51
+ };
52
+ static void yajl_parser_wrapper_free(void * wrapper);
53
+ static void yajl_parser_wrapper_mark(void * wrapper);
54
+
55
+ static VALUE rb_yajl_parser_new(int argc, VALUE * argv, VALUE self);
56
+ static VALUE rb_yajl_parser_init(int argc, VALUE * argv, VALUE self);
57
+ static VALUE rb_yajl_parser_parse(int argc, VALUE * argv, VALUE self);
58
+ static VALUE rb_yajl_parser_parse_chunk(VALUE self, VALUE chunk);
59
+ static VALUE rb_yajl_set_complete_cb(VALUE self, VALUE callback);
60
+
61
+ static VALUE rb_yajl_encoder_new(int argc, VALUE * argv, VALUE klass);
62
+ static VALUE rb_yajl_encoder_init(int argc, VALUE * argv, VALUE self);
63
+ static VALUE rb_yajl_encoder_encode(int argc, VALUE * argv, VALUE self);
64
+
65
+ static VALUE rb_yajl_json_ext_object_to_json(int argc, VALUE * argv, VALUE self);
66
+ static VALUE rb_yajl_json_ext_hash_to_json(int argc, VALUE * argv, VALUE self);
67
+ static VALUE rb_yajl_json_ext_array_to_json(int argc, VALUE * argv, VALUE self);
68
+ static VALUE rb_yajl_json_ext_fixnum_to_json(int argc, VALUE * argv, VALUE self);
69
+ static VALUE rb_yajl_json_ext_float_to_json(int argc, VALUE * argv, VALUE self);
70
+ static VALUE rb_yajl_json_ext_string_to_json(int argc, VALUE * argv, VALUE self);
71
+ static VALUE rb_yajl_json_ext_true_to_json(int argc, VALUE * argv, VALUE self);
72
+ static VALUE rb_yajl_json_ext_false_to_json(int argc, VALUE * argv, VALUE self);
73
+ static VALUE rb_yajl_json_ext_nil_to_json(int argc, VALUE * argv, VALUE self);
74
+ static VALUE rb_yajl_encoder_enable_json_gem_ext(VALUE klass);
data/ext/yajl_gen.c ADDED
@@ -0,0 +1,290 @@
1
+ /*
2
+ * Copyright 2007-2009, Lloyd Hilaiel.
3
+ *
4
+ * Redistribution and use in source and binary forms, with or without
5
+ * modification, are permitted provided that the following conditions are
6
+ * met:
7
+ *
8
+ * 1. Redistributions of source code must retain the above copyright
9
+ * notice, this list of conditions and the following disclaimer.
10
+ *
11
+ * 2. Redistributions in binary form must reproduce the above copyright
12
+ * notice, this list of conditions and the following disclaimer in
13
+ * the documentation and/or other materials provided with the
14
+ * distribution.
15
+ *
16
+ * 3. Neither the name of Lloyd Hilaiel nor the names of its
17
+ * contributors may be used to endorse or promote products derived
18
+ * from this software without specific prior written permission.
19
+ *
20
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
22
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23
+ * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
24
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
25
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
28
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
29
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30
+ * POSSIBILITY OF SUCH DAMAGE.
31
+ */
32
+
33
+ #include "api/yajl_gen.h"
34
+ #include "yajl_buf.h"
35
+ #include "yajl_encode.h"
36
+
37
+ #include <stdlib.h>
38
+ #include <string.h>
39
+ #include <stdio.h>
40
+
41
+ typedef enum {
42
+ yajl_gen_start,
43
+ yajl_gen_map_start,
44
+ yajl_gen_map_key,
45
+ yajl_gen_map_val,
46
+ yajl_gen_array_start,
47
+ yajl_gen_in_array,
48
+ yajl_gen_complete,
49
+ yajl_gen_error
50
+ } yajl_gen_state;
51
+
52
+ struct yajl_gen_t
53
+ {
54
+ unsigned int depth;
55
+ unsigned int pretty;
56
+ const char * indentString;
57
+ yajl_gen_state state[YAJL_MAX_DEPTH];
58
+ yajl_buf buf;
59
+ /* memory allocation routines */
60
+ yajl_alloc_funcs alloc;
61
+ };
62
+
63
+ yajl_gen
64
+ yajl_gen_alloc(const yajl_gen_config * config,
65
+ const yajl_alloc_funcs * afs)
66
+ {
67
+ yajl_gen g = NULL;
68
+ yajl_alloc_funcs afsBuffer;
69
+
70
+ /* first order of business is to set up memory allocation routines */
71
+ if (afs != NULL) {
72
+ if (afs->malloc == NULL || afs->realloc == NULL || afs->free == NULL)
73
+ {
74
+ return NULL;
75
+ }
76
+ } else {
77
+ yajl_set_default_alloc_funcs(&afsBuffer);
78
+ afs = &afsBuffer;
79
+ }
80
+
81
+ g = (yajl_gen) YA_MALLOC(afs, sizeof(struct yajl_gen_t));
82
+ memset((void *) g, 0, sizeof(struct yajl_gen_t));
83
+ /* copy in pointers to allocation routines */
84
+ memcpy((void *) &(g->alloc), (void *) afs, sizeof(yajl_alloc_funcs));
85
+
86
+ if (config) {
87
+ g->pretty = config->beautify;
88
+ g->indentString = config->indentString ? config->indentString : " ";
89
+ }
90
+ g->buf = yajl_buf_alloc(&(g->alloc));
91
+
92
+ return g;
93
+ }
94
+
95
+ void
96
+ yajl_gen_free(yajl_gen g)
97
+ {
98
+ yajl_buf_free(g->buf);
99
+ YA_FREE(&(g->alloc), g);
100
+ }
101
+
102
+ #define INSERT_SEP \
103
+ if (g->state[g->depth] == yajl_gen_map_key || \
104
+ g->state[g->depth] == yajl_gen_in_array) { \
105
+ yajl_buf_append(g->buf, ",", 1); \
106
+ if (g->pretty) yajl_buf_append(g->buf, "\n", 1); \
107
+ } else if (g->state[g->depth] == yajl_gen_map_val) { \
108
+ yajl_buf_append(g->buf, ":", 1); \
109
+ if (g->pretty) yajl_buf_append(g->buf, " ", 1); \
110
+ }
111
+
112
+ #define INSERT_WHITESPACE \
113
+ if (g->pretty) { \
114
+ if (g->state[g->depth] != yajl_gen_map_val) { \
115
+ unsigned int _i; \
116
+ for (_i=0;_i<g->depth;_i++) \
117
+ yajl_buf_append(g->buf, g->indentString, \
118
+ strlen(g->indentString)); \
119
+ } \
120
+ }
121
+
122
+ #define ENSURE_NOT_KEY \
123
+ if (g->state[g->depth] == yajl_gen_map_key) { \
124
+ return yajl_gen_keys_must_be_strings; \
125
+ } \
126
+
127
+ /* check that we're not complete, or in error state. in a valid state
128
+ * to be generating */
129
+ #define ENSURE_VALID_STATE \
130
+ if (g->state[g->depth] == yajl_gen_error) { \
131
+ return yajl_gen_in_error_state;\
132
+ } else if (g->state[g->depth] == yajl_gen_complete) { \
133
+ return yajl_gen_generation_complete; \
134
+ }
135
+
136
+ #define INCREMENT_DEPTH \
137
+ if (++(g->depth) >= YAJL_MAX_DEPTH) return yajl_max_depth_exceeded;
138
+
139
+ #define APPENDED_ATOM \
140
+ switch (g->state[g->depth]) { \
141
+ case yajl_gen_map_start: \
142
+ case yajl_gen_map_key: \
143
+ g->state[g->depth] = yajl_gen_map_val; \
144
+ break; \
145
+ case yajl_gen_array_start: \
146
+ g->state[g->depth] = yajl_gen_in_array; \
147
+ break; \
148
+ case yajl_gen_map_val: \
149
+ g->state[g->depth] = yajl_gen_map_key; \
150
+ break; \
151
+ default: \
152
+ break; \
153
+ } \
154
+
155
+ #define FINAL_NEWLINE
156
+
157
+ yajl_gen_status
158
+ yajl_gen_integer(yajl_gen g, long int number)
159
+ {
160
+ char i[32];
161
+ ENSURE_VALID_STATE; ENSURE_NOT_KEY; INSERT_SEP; INSERT_WHITESPACE;
162
+ sprintf(i, "%ld", number);
163
+ yajl_buf_append(g->buf, i, strlen(i));
164
+ APPENDED_ATOM;
165
+ FINAL_NEWLINE;
166
+ return yajl_gen_status_ok;
167
+ }
168
+
169
+ yajl_gen_status
170
+ yajl_gen_double(yajl_gen g, double number)
171
+ {
172
+ char i[32];
173
+ ENSURE_VALID_STATE; ENSURE_NOT_KEY; INSERT_SEP; INSERT_WHITESPACE;
174
+ sprintf(i, "%g", number);
175
+ yajl_buf_append(g->buf, i, strlen(i));
176
+ APPENDED_ATOM;
177
+ FINAL_NEWLINE;
178
+ return yajl_gen_status_ok;
179
+ }
180
+
181
+ yajl_gen_status
182
+ yajl_gen_number(yajl_gen g, const char * s, unsigned int l)
183
+ {
184
+ ENSURE_VALID_STATE; ENSURE_NOT_KEY; INSERT_SEP; INSERT_WHITESPACE;
185
+ yajl_buf_append(g->buf, s, l);
186
+ APPENDED_ATOM;
187
+ FINAL_NEWLINE;
188
+ return yajl_gen_status_ok;
189
+ }
190
+
191
+ yajl_gen_status
192
+ yajl_gen_string(yajl_gen g, const unsigned char * str,
193
+ unsigned int len)
194
+ {
195
+ ENSURE_VALID_STATE; INSERT_SEP; INSERT_WHITESPACE;
196
+ yajl_buf_append(g->buf, "\"", 1);
197
+ yajl_string_encode(g->buf, str, len);
198
+ yajl_buf_append(g->buf, "\"", 1);
199
+ APPENDED_ATOM;
200
+ FINAL_NEWLINE;
201
+ return yajl_gen_status_ok;
202
+ }
203
+
204
+ yajl_gen_status
205
+ yajl_gen_null(yajl_gen g)
206
+ {
207
+ ENSURE_VALID_STATE; ENSURE_NOT_KEY; INSERT_SEP; INSERT_WHITESPACE;
208
+ yajl_buf_append(g->buf, "null", strlen("null"));
209
+ APPENDED_ATOM;
210
+ FINAL_NEWLINE;
211
+ return yajl_gen_status_ok;
212
+ }
213
+
214
+ yajl_gen_status
215
+ yajl_gen_bool(yajl_gen g, int boolean)
216
+ {
217
+ const char * val = boolean ? "true" : "false";
218
+
219
+ ENSURE_VALID_STATE; ENSURE_NOT_KEY; INSERT_SEP; INSERT_WHITESPACE;
220
+ yajl_buf_append(g->buf, val, strlen(val));
221
+ APPENDED_ATOM;
222
+ FINAL_NEWLINE;
223
+ return yajl_gen_status_ok;
224
+ }
225
+
226
+ yajl_gen_status
227
+ yajl_gen_map_open(yajl_gen g)
228
+ {
229
+ ENSURE_VALID_STATE; ENSURE_NOT_KEY; INSERT_SEP; INSERT_WHITESPACE;
230
+ INCREMENT_DEPTH;
231
+
232
+ g->state[g->depth] = yajl_gen_map_start;
233
+ yajl_buf_append(g->buf, "{", 1);
234
+ if (g->pretty) yajl_buf_append(g->buf, "\n", 1);
235
+ FINAL_NEWLINE;
236
+ return yajl_gen_status_ok;
237
+ }
238
+
239
+ yajl_gen_status
240
+ yajl_gen_map_close(yajl_gen g)
241
+ {
242
+ ENSURE_VALID_STATE;
243
+ (g->depth)--;
244
+ if (g->pretty) yajl_buf_append(g->buf, "\n", 1);
245
+ APPENDED_ATOM;
246
+ INSERT_WHITESPACE;
247
+ yajl_buf_append(g->buf, "}", 1);
248
+ FINAL_NEWLINE;
249
+ return yajl_gen_status_ok;
250
+ }
251
+
252
+ yajl_gen_status
253
+ yajl_gen_array_open(yajl_gen g)
254
+ {
255
+ ENSURE_VALID_STATE; ENSURE_NOT_KEY; INSERT_SEP; INSERT_WHITESPACE;
256
+ INCREMENT_DEPTH;
257
+ g->state[g->depth] = yajl_gen_array_start;
258
+ yajl_buf_append(g->buf, "[", 1);
259
+ if (g->pretty) yajl_buf_append(g->buf, "\n", 1);
260
+ FINAL_NEWLINE;
261
+ return yajl_gen_status_ok;
262
+ }
263
+
264
+ yajl_gen_status
265
+ yajl_gen_array_close(yajl_gen g)
266
+ {
267
+ ENSURE_VALID_STATE;
268
+ if (g->pretty) yajl_buf_append(g->buf, "\n", 1);
269
+ (g->depth)--;
270
+ APPENDED_ATOM;
271
+ INSERT_WHITESPACE;
272
+ yajl_buf_append(g->buf, "]", 1);
273
+ FINAL_NEWLINE;
274
+ return yajl_gen_status_ok;
275
+ }
276
+
277
+ yajl_gen_status
278
+ yajl_gen_get_buf(yajl_gen g, const unsigned char ** buf,
279
+ unsigned int * len)
280
+ {
281
+ *buf = yajl_buf_data(g->buf);
282
+ *len = yajl_buf_len(g->buf);
283
+ return yajl_gen_status_ok;
284
+ }
285
+
286
+ void
287
+ yajl_gen_clear(yajl_gen g)
288
+ {
289
+ yajl_buf_clear(g->buf);
290
+ }
data/ext/yajl_lex.c ADDED
@@ -0,0 +1,744 @@
1
+ /*
2
+ * Copyright 2007-2009, Lloyd Hilaiel.
3
+ *
4
+ * Redistribution and use in source and binary forms, with or without
5
+ * modification, are permitted provided that the following conditions are
6
+ * met:
7
+ *
8
+ * 1. Redistributions of source code must retain the above copyright
9
+ * notice, this list of conditions and the following disclaimer.
10
+ *
11
+ * 2. Redistributions in binary form must reproduce the above copyright
12
+ * notice, this list of conditions and the following disclaimer in
13
+ * the documentation and/or other materials provided with the
14
+ * distribution.
15
+ *
16
+ * 3. Neither the name of Lloyd Hilaiel nor the names of its
17
+ * contributors may be used to endorse or promote products derived
18
+ * from this software without specific prior written permission.
19
+ *
20
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
22
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23
+ * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
24
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
25
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
28
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
29
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30
+ * POSSIBILITY OF SUCH DAMAGE.
31
+ */
32
+
33
+ #include "yajl_lex.h"
34
+ #include "yajl_buf.h"
35
+
36
+ #include <stdlib.h>
37
+ #include <stdio.h>
38
+ #include <assert.h>
39
+ #include <string.h>
40
+
41
+ #ifdef YAJL_LEXER_DEBUG
42
+ static const char *
43
+ tokToStr(yajl_tok tok)
44
+ {
45
+ switch (tok) {
46
+ case yajl_tok_bool: return "bool";
47
+ case yajl_tok_colon: return "colon";
48
+ case yajl_tok_comma: return "comma";
49
+ case yajl_tok_eof: return "eof";
50
+ case yajl_tok_error: return "error";
51
+ case yajl_tok_left_brace: return "brace";
52
+ case yajl_tok_left_bracket: return "bracket";
53
+ case yajl_tok_null: return "null";
54
+ case yajl_tok_integer: return "integer";
55
+ case yajl_tok_double: return "double";
56
+ case yajl_tok_right_brace: return "brace";
57
+ case yajl_tok_right_bracket: return "bracket";
58
+ case yajl_tok_string: return "string";
59
+ case yajl_tok_string_with_escapes: return "string_with_escapes";
60
+ }
61
+ return "unknown";
62
+ }
63
+ #endif
64
+
65
+ /* Impact of the stream parsing feature on the lexer:
66
+ *
67
+ * YAJL support stream parsing. That is, the ability to parse the first
68
+ * bits of a chunk of JSON before the last bits are available (still on
69
+ * the network or disk). This makes the lexer more complex. The
70
+ * responsibility of the lexer is to handle transparently the case where
71
+ * a chunk boundary falls in the middle of a token. This is
72
+ * accomplished is via a buffer and a character reading abstraction.
73
+ *
74
+ * Overview of implementation
75
+ *
76
+ * When we lex to end of input string before end of token is hit, we
77
+ * copy all of the input text composing the token into our lexBuf.
78
+ *
79
+ * Every time we read a character, we do so through the readChar function.
80
+ * readChar's responsibility is to handle pulling all chars from the buffer
81
+ * before pulling chars from input text
82
+ */
83
+
84
+ struct yajl_lexer_t {
85
+ /* the overal line and char offset into the data */
86
+ unsigned int lineOff;
87
+ unsigned int charOff;
88
+
89
+ /* error */
90
+ yajl_lex_error error;
91
+
92
+ /* a input buffer to handle the case where a token is spread over
93
+ * multiple chunks */
94
+ yajl_buf buf;
95
+
96
+ /* in the case where we have data in the lexBuf, bufOff holds
97
+ * the current offset into the lexBuf. */
98
+ unsigned int bufOff;
99
+
100
+ /* are we using the lex buf? */
101
+ unsigned int bufInUse;
102
+
103
+ /* shall we allow comments? */
104
+ unsigned int allowComments;
105
+
106
+ /* shall we validate utf8 inside strings? */
107
+ unsigned int validateUTF8;
108
+
109
+ yajl_alloc_funcs * alloc;
110
+ };
111
+
112
+ #define readChar(lxr, txt, off) \
113
+ (((lxr)->bufInUse && yajl_buf_len((lxr)->buf) && lxr->bufOff < yajl_buf_len((lxr)->buf)) ? \
114
+ (*((const unsigned char *) yajl_buf_data((lxr)->buf) + ((lxr)->bufOff)++)) : \
115
+ ((txt)[(*(off))++]))
116
+
117
+ #define unreadChar(lxr, off) ((*(off) > 0) ? (*(off))-- : ((lxr)->bufOff--))
118
+
119
+ yajl_lexer
120
+ yajl_lex_alloc(yajl_alloc_funcs * alloc,
121
+ unsigned int allowComments, unsigned int validateUTF8)
122
+ {
123
+ yajl_lexer lxr = (yajl_lexer) YA_MALLOC(alloc, sizeof(struct yajl_lexer_t));
124
+ memset((void *) lxr, 0, sizeof(struct yajl_lexer_t));
125
+ lxr->buf = yajl_buf_alloc(alloc);
126
+ lxr->allowComments = allowComments;
127
+ lxr->validateUTF8 = validateUTF8;
128
+ lxr->alloc = alloc;
129
+ return lxr;
130
+ }
131
+
132
+ yajl_lexer
133
+ yajl_lex_realloc(yajl_lexer orig) {
134
+ yajl_lexer newLxr = yajl_lex_alloc(orig->alloc, orig->allowComments, orig->validateUTF8);
135
+ yajl_lex_free(orig);
136
+ return newLxr;
137
+ }
138
+
139
+ void
140
+ yajl_lex_free(yajl_lexer lxr)
141
+ {
142
+ yajl_buf_free(lxr->buf);
143
+ YA_FREE(lxr->alloc, lxr);
144
+ return;
145
+ }
146
+
147
+ /* a lookup table which lets us quickly determine three things:
148
+ * VEC - valid escaped conrol char
149
+ * IJC - invalid json char
150
+ * VHC - valid hex char
151
+ * note. the solidus '/' may be escaped or not.
152
+ * note. the
153
+ */
154
+ #define VEC 1
155
+ #define IJC 2
156
+ #define VHC 4
157
+ static const char charLookupTable[256] =
158
+ {
159
+ /*00*/ IJC , IJC , IJC , IJC , IJC , IJC , IJC , IJC ,
160
+ /*08*/ IJC , IJC , IJC , IJC , IJC , IJC , IJC , IJC ,
161
+ /*10*/ IJC , IJC , IJC , IJC , IJC , IJC , IJC , IJC ,
162
+ /*18*/ IJC , IJC , IJC , IJC , IJC , IJC , IJC , IJC ,
163
+
164
+ /*20*/ 0 , 0 , VEC|IJC, 0 , 0 , 0 , 0 , 0 ,
165
+ /*28*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , VEC ,
166
+ /*30*/ VHC , VHC , VHC , VHC , VHC , VHC , VHC , VHC ,
167
+ /*38*/ VHC , VHC , 0 , 0 , 0 , 0 , 0 , 0 ,
168
+
169
+ /*40*/ 0 , VHC , VHC , VHC , VHC , VHC , VHC , 0 ,
170
+ /*48*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
171
+ /*50*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
172
+ /*58*/ 0 , 0 , 0 , 0 , VEC|IJC, 0 , 0 , 0 ,
173
+
174
+ /*60*/ 0 , VHC , VEC|VHC, VHC , VHC , VHC , VEC|VHC, 0 ,
175
+ /*68*/ 0 , 0 , 0 , 0 , 0 , 0 , VEC , 0 ,
176
+ /*70*/ 0 , 0 , VEC , 0 , VEC , 0 , 0 , 0 ,
177
+ /*78*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
178
+
179
+ /* include these so we don't have to always check the range of the char */
180
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
181
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
182
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
183
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
184
+
185
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
186
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
187
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
188
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
189
+
190
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
191
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
192
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
193
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
194
+
195
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
196
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
197
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
198
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0
199
+ };
200
+
201
+ /** process a variable length utf8 encoded codepoint.
202
+ *
203
+ * returns:
204
+ * yajl_tok_string - if valid utf8 char was parsed and offset was
205
+ * advanced
206
+ * yajl_tok_eof - if end of input was hit before validation could
207
+ * complete
208
+ * yajl_tok_error - if invalid utf8 was encountered
209
+ *
210
+ * NOTE: on error the offset will point to the first char of the
211
+ * invalid utf8 */
212
+ #define UTF8_CHECK_EOF if (*offset >= jsonTextLen) { return yajl_tok_eof; }
213
+
214
+ static yajl_tok
215
+ yajl_lex_utf8_char(yajl_lexer lexer, const unsigned char * jsonText,
216
+ unsigned int jsonTextLen, unsigned int * offset,
217
+ unsigned char curChar)
218
+ {
219
+ if (curChar <= 0x7f) {
220
+ /* single byte */
221
+ return yajl_tok_string;
222
+ } else if ((curChar >> 5) == 0x6) {
223
+ /* two byte */
224
+ UTF8_CHECK_EOF;
225
+ curChar = readChar(lexer, jsonText, offset);
226
+ if ((curChar >> 6) == 0x2) return yajl_tok_string;
227
+ } else if ((curChar >> 4) == 0x0e) {
228
+ /* three byte */
229
+ UTF8_CHECK_EOF;
230
+ curChar = readChar(lexer, jsonText, offset);
231
+ if ((curChar >> 6) == 0x2) {
232
+ UTF8_CHECK_EOF;
233
+ curChar = readChar(lexer, jsonText, offset);
234
+ if ((curChar >> 6) == 0x2) return yajl_tok_string;
235
+ }
236
+ } else if ((curChar >> 3) == 0x1e) {
237
+ /* four byte */
238
+ UTF8_CHECK_EOF;
239
+ curChar = readChar(lexer, jsonText, offset);
240
+ if ((curChar >> 6) == 0x2) {
241
+ UTF8_CHECK_EOF;
242
+ curChar = readChar(lexer, jsonText, offset);
243
+ if ((curChar >> 6) == 0x2) {
244
+ UTF8_CHECK_EOF;
245
+ curChar = readChar(lexer, jsonText, offset);
246
+ if ((curChar >> 6) == 0x2) return yajl_tok_string;
247
+ }
248
+ }
249
+ }
250
+
251
+ return yajl_tok_error;
252
+ }
253
+
254
+ /* lex a string. input is the lexer, pointer to beginning of
255
+ * json text, and start of string (offset).
256
+ * a token is returned which has the following meanings:
257
+ * yajl_tok_string: lex of string was successful. offset points to
258
+ * terminating '"'.
259
+ * yajl_tok_eof: end of text was encountered before we could complete
260
+ * the lex.
261
+ * yajl_tok_error: embedded in the string were unallowable chars. offset
262
+ * points to the offending char
263
+ */
264
+ #define STR_CHECK_EOF \
265
+ if (*offset >= jsonTextLen) { \
266
+ tok = yajl_tok_eof; \
267
+ goto finish_string_lex; \
268
+ }
269
+
270
+ static yajl_tok
271
+ yajl_lex_string(yajl_lexer lexer, const unsigned char * jsonText,
272
+ unsigned int jsonTextLen, unsigned int * offset)
273
+ {
274
+ yajl_tok tok = yajl_tok_error;
275
+ int hasEscapes = 0;
276
+
277
+ for (;;) {
278
+ unsigned char curChar;
279
+
280
+ STR_CHECK_EOF;
281
+
282
+ curChar = readChar(lexer, jsonText, offset);
283
+
284
+ /* quote terminates */
285
+ if (curChar == '"') {
286
+ tok = yajl_tok_string;
287
+ break;
288
+ }
289
+ /* backslash escapes a set of control chars, */
290
+ else if (curChar == '\\') {
291
+ hasEscapes = 1;
292
+ STR_CHECK_EOF;
293
+
294
+ /* special case \u */
295
+ curChar = readChar(lexer, jsonText, offset);
296
+ if (curChar == 'u') {
297
+ unsigned int i = 0;
298
+
299
+ for (i=0;i<4;i++) {
300
+ STR_CHECK_EOF;
301
+ curChar = readChar(lexer, jsonText, offset);
302
+ if (!(charLookupTable[curChar] & VHC)) {
303
+ /* back up to offending char */
304
+ unreadChar(lexer, offset);
305
+ lexer->error = yajl_lex_string_invalid_hex_char;
306
+ goto finish_string_lex;
307
+ }
308
+ }
309
+ } else if (!(charLookupTable[curChar] & VEC)) {
310
+ /* back up to offending char */
311
+ unreadChar(lexer, offset);
312
+ lexer->error = yajl_lex_string_invalid_escaped_char;
313
+ goto finish_string_lex;
314
+ }
315
+ }
316
+ /* when not validating UTF8 it's a simple table lookup to determine
317
+ * if the present character is invalid */
318
+ else if(charLookupTable[curChar] & IJC) {
319
+ /* back up to offending char */
320
+ unreadChar(lexer, offset);
321
+ lexer->error = yajl_lex_string_invalid_json_char;
322
+ goto finish_string_lex;
323
+ }
324
+ /* when in validate UTF8 mode we need to do some extra work */
325
+ else if (lexer->validateUTF8) {
326
+ yajl_tok t = yajl_lex_utf8_char(lexer, jsonText, jsonTextLen,
327
+ offset, curChar);
328
+
329
+ if (t == yajl_tok_eof) {
330
+ tok = yajl_tok_eof;
331
+ goto finish_string_lex;
332
+ } else if (t == yajl_tok_error) {
333
+ lexer->error = yajl_lex_string_invalid_utf8;
334
+ goto finish_string_lex;
335
+ }
336
+ }
337
+ /* accept it, and move on */
338
+ }
339
+ finish_string_lex:
340
+ /* tell our buddy, the parser, wether he needs to process this string
341
+ * again */
342
+ if (hasEscapes && tok == yajl_tok_string) {
343
+ tok = yajl_tok_string_with_escapes;
344
+ }
345
+
346
+ return tok;
347
+ }
348
+
349
+ #define RETURN_IF_EOF if (*offset >= jsonTextLen) return yajl_tok_eof;
350
+
351
+ static yajl_tok
352
+ yajl_lex_number(yajl_lexer lexer, const unsigned char * jsonText,
353
+ unsigned int jsonTextLen, unsigned int * offset)
354
+ {
355
+ /** XXX: numbers are the only entities in json that we must lex
356
+ * _beyond_ in order to know that they are complete. There
357
+ * is an ambiguous case for integers at EOF. */
358
+
359
+ unsigned char c;
360
+
361
+ yajl_tok tok = yajl_tok_integer;
362
+
363
+ RETURN_IF_EOF;
364
+ c = readChar(lexer, jsonText, offset);
365
+
366
+ /* optional leading minus */
367
+ if (c == '-') {
368
+ RETURN_IF_EOF;
369
+ c = readChar(lexer, jsonText, offset);
370
+ }
371
+
372
+ /* a single zero, or a series of integers */
373
+ if (c == '0') {
374
+ RETURN_IF_EOF;
375
+ c = readChar(lexer, jsonText, offset);
376
+ } else if (c >= '1' && c <= '9') {
377
+ do {
378
+ RETURN_IF_EOF;
379
+ c = readChar(lexer, jsonText, offset);
380
+ } while (c >= '0' && c <= '9');
381
+ } else {
382
+ unreadChar(lexer, offset);
383
+ lexer->error = yajl_lex_missing_integer_after_minus;
384
+ return yajl_tok_error;
385
+ }
386
+
387
+ /* optional fraction (indicates this is floating point) */
388
+ if (c == '.') {
389
+ int numRd = 0;
390
+
391
+ RETURN_IF_EOF;
392
+ c = readChar(lexer, jsonText, offset);
393
+
394
+ while (c >= '0' && c <= '9') {
395
+ numRd++;
396
+ RETURN_IF_EOF;
397
+ c = readChar(lexer, jsonText, offset);
398
+ }
399
+
400
+ if (!numRd) {
401
+ unreadChar(lexer, offset);
402
+ lexer->error = yajl_lex_missing_integer_after_decimal;
403
+ return yajl_tok_error;
404
+ }
405
+ tok = yajl_tok_double;
406
+ }
407
+
408
+ /* optional exponent (indicates this is floating point) */
409
+ if (c == 'e' || c == 'E') {
410
+ RETURN_IF_EOF;
411
+ c = readChar(lexer, jsonText, offset);
412
+
413
+ /* optional sign */
414
+ if (c == '+' || c == '-') {
415
+ RETURN_IF_EOF;
416
+ c = readChar(lexer, jsonText, offset);
417
+ }
418
+
419
+ if (c >= '0' && c <= '9') {
420
+ do {
421
+ RETURN_IF_EOF;
422
+ c = readChar(lexer, jsonText, offset);
423
+ } while (c >= '0' && c <= '9');
424
+ } else {
425
+ unreadChar(lexer, offset);
426
+ lexer->error = yajl_lex_missing_integer_after_exponent;
427
+ return yajl_tok_error;
428
+ }
429
+ tok = yajl_tok_double;
430
+ }
431
+
432
+ /* we always go "one too far" */
433
+ unreadChar(lexer, offset);
434
+
435
+ return tok;
436
+ }
437
+
438
+ static yajl_tok
439
+ yajl_lex_comment(yajl_lexer lexer, const unsigned char * jsonText,
440
+ unsigned int jsonTextLen, unsigned int * offset)
441
+ {
442
+ unsigned char c;
443
+
444
+ yajl_tok tok = yajl_tok_comment;
445
+
446
+ RETURN_IF_EOF;
447
+ c = readChar(lexer, jsonText, offset);
448
+
449
+ /* either slash or star expected */
450
+ if (c == '/') {
451
+ /* now we throw away until end of line */
452
+ do {
453
+ RETURN_IF_EOF;
454
+ c = readChar(lexer, jsonText, offset);
455
+ } while (c != '\n');
456
+ } else if (c == '*') {
457
+ /* now we throw away until end of comment */
458
+ for (;;) {
459
+ RETURN_IF_EOF;
460
+ c = readChar(lexer, jsonText, offset);
461
+ if (c == '*') {
462
+ RETURN_IF_EOF;
463
+ c = readChar(lexer, jsonText, offset);
464
+ if (c == '/') {
465
+ break;
466
+ } else {
467
+ unreadChar(lexer, offset);
468
+ }
469
+ }
470
+ }
471
+ } else {
472
+ lexer->error = yajl_lex_invalid_char;
473
+ tok = yajl_tok_error;
474
+ }
475
+
476
+ return tok;
477
+ }
478
+
479
+ yajl_tok
480
+ yajl_lex_lex(yajl_lexer lexer, const unsigned char * jsonText,
481
+ unsigned int jsonTextLen, unsigned int * offset,
482
+ const unsigned char ** outBuf, unsigned int * outLen)
483
+ {
484
+ yajl_tok tok = yajl_tok_error;
485
+ unsigned char c;
486
+ unsigned int startOffset = *offset;
487
+
488
+ *outBuf = NULL;
489
+ *outLen = 0;
490
+
491
+ for (;;) {
492
+ assert(*offset <= jsonTextLen);
493
+
494
+ if (*offset >= jsonTextLen) {
495
+ tok = yajl_tok_eof;
496
+ goto lexed;
497
+ }
498
+
499
+ c = readChar(lexer, jsonText, offset);
500
+
501
+ switch (c) {
502
+ case '{':
503
+ tok = yajl_tok_left_bracket;
504
+ goto lexed;
505
+ case '}':
506
+ tok = yajl_tok_right_bracket;
507
+ goto lexed;
508
+ case '[':
509
+ tok = yajl_tok_left_brace;
510
+ goto lexed;
511
+ case ']':
512
+ tok = yajl_tok_right_brace;
513
+ goto lexed;
514
+ case ',':
515
+ tok = yajl_tok_comma;
516
+ goto lexed;
517
+ case ':':
518
+ tok = yajl_tok_colon;
519
+ goto lexed;
520
+ case '\t': case '\n': case '\v': case '\f': case '\r': case ' ':
521
+ startOffset++;
522
+ break;
523
+ case 't': {
524
+ const char * want = "rue";
525
+ do {
526
+ if (*offset >= jsonTextLen) {
527
+ tok = yajl_tok_eof;
528
+ goto lexed;
529
+ }
530
+ c = readChar(lexer, jsonText, offset);
531
+ if (c != *want) {
532
+ unreadChar(lexer, offset);
533
+ lexer->error = yajl_lex_invalid_string;
534
+ tok = yajl_tok_error;
535
+ goto lexed;
536
+ }
537
+ } while (*(++want));
538
+ tok = yajl_tok_bool;
539
+ goto lexed;
540
+ }
541
+ case 'f': {
542
+ const char * want = "alse";
543
+ do {
544
+ if (*offset >= jsonTextLen) {
545
+ tok = yajl_tok_eof;
546
+ goto lexed;
547
+ }
548
+ c = readChar(lexer, jsonText, offset);
549
+ if (c != *want) {
550
+ unreadChar(lexer, offset);
551
+ lexer->error = yajl_lex_invalid_string;
552
+ tok = yajl_tok_error;
553
+ goto lexed;
554
+ }
555
+ } while (*(++want));
556
+ tok = yajl_tok_bool;
557
+ goto lexed;
558
+ }
559
+ case 'n': {
560
+ const char * want = "ull";
561
+ do {
562
+ if (*offset >= jsonTextLen) {
563
+ tok = yajl_tok_eof;
564
+ goto lexed;
565
+ }
566
+ c = readChar(lexer, jsonText, offset);
567
+ if (c != *want) {
568
+ unreadChar(lexer, offset);
569
+ lexer->error = yajl_lex_invalid_string;
570
+ tok = yajl_tok_error;
571
+ goto lexed;
572
+ }
573
+ } while (*(++want));
574
+ tok = yajl_tok_null;
575
+ goto lexed;
576
+ }
577
+ case '"': {
578
+ tok = yajl_lex_string(lexer, (const unsigned char *) jsonText,
579
+ jsonTextLen, offset);
580
+ goto lexed;
581
+ }
582
+ case '-':
583
+ case '0': case '1': case '2': case '3': case '4':
584
+ case '5': case '6': case '7': case '8': case '9': {
585
+ /* integer parsing wants to start from the beginning */
586
+ unreadChar(lexer, offset);
587
+ tok = yajl_lex_number(lexer, (const unsigned char *) jsonText,
588
+ jsonTextLen, offset);
589
+ goto lexed;
590
+ }
591
+ case '/':
592
+ /* hey, look, a probable comment! If comments are disabled
593
+ * it's an error. */
594
+ if (!lexer->allowComments) {
595
+ unreadChar(lexer, offset);
596
+ lexer->error = yajl_lex_unallowed_comment;
597
+ tok = yajl_tok_error;
598
+ goto lexed;
599
+ }
600
+ /* if comments are enabled, then we should try to lex
601
+ * the thing. possible outcomes are
602
+ * - successful lex (tok_comment, which means continue),
603
+ * - malformed comment opening (slash not followed by
604
+ * '*' or '/') (tok_error)
605
+ * - eof hit. (tok_eof) */
606
+ tok = yajl_lex_comment(lexer, (const unsigned char *) jsonText,
607
+ jsonTextLen, offset);
608
+ if (tok == yajl_tok_comment) {
609
+ /* "error" is silly, but that's the initial
610
+ * state of tok. guilty until proven innocent. */
611
+ tok = yajl_tok_error;
612
+ yajl_buf_clear(lexer->buf);
613
+ lexer->bufInUse = 0;
614
+ startOffset = *offset;
615
+ break;
616
+ }
617
+ /* hit error or eof, bail */
618
+ goto lexed;
619
+ default:
620
+ lexer->error = yajl_lex_invalid_char;
621
+ tok = yajl_tok_error;
622
+ goto lexed;
623
+ }
624
+ }
625
+
626
+
627
+ lexed:
628
+ /* need to append to buffer if the buffer is in use or
629
+ * if it's an EOF token */
630
+ if (tok == yajl_tok_eof || lexer->bufInUse) {
631
+ if (!lexer->bufInUse) yajl_buf_clear(lexer->buf);
632
+ lexer->bufInUse = 1;
633
+ yajl_buf_append(lexer->buf, jsonText + startOffset, *offset - startOffset);
634
+ lexer->bufOff = 0;
635
+
636
+ if (tok != yajl_tok_eof) {
637
+ *outBuf = yajl_buf_data(lexer->buf);
638
+ *outLen = yajl_buf_len(lexer->buf);
639
+ lexer->bufInUse = 0;
640
+ }
641
+ } else if (tok != yajl_tok_error) {
642
+ *outBuf = jsonText + startOffset;
643
+ *outLen = *offset - startOffset;
644
+ }
645
+
646
+ /* special case for strings. skip the quotes. */
647
+ if (tok == yajl_tok_string || tok == yajl_tok_string_with_escapes)
648
+ {
649
+ assert(*outLen >= 2);
650
+ (*outBuf)++;
651
+ *outLen -= 2;
652
+ }
653
+
654
+
655
+ #ifdef YAJL_LEXER_DEBUG
656
+ if (tok == yajl_tok_error) {
657
+ printf("lexical error: %s\n",
658
+ yajl_lex_error_to_string(yajl_lex_get_error(lexer)));
659
+ } else if (tok == yajl_tok_eof) {
660
+ printf("EOF hit\n");
661
+ } else {
662
+ printf("lexed %s: '", tokToStr(tok));
663
+ fwrite(*outBuf, 1, *outLen, stdout);
664
+ printf("'\n");
665
+ }
666
+ #endif
667
+
668
+ return tok;
669
+ }
670
+
671
+ const char *
672
+ yajl_lex_error_to_string(yajl_lex_error error)
673
+ {
674
+ switch (error) {
675
+ case yajl_lex_e_ok:
676
+ return "ok, no error";
677
+ case yajl_lex_string_invalid_utf8:
678
+ return "invalid bytes in UTF8 string.";
679
+ case yajl_lex_string_invalid_escaped_char:
680
+ return "inside a string, '\\' occurs before a character "
681
+ "which it may not.";
682
+ case yajl_lex_string_invalid_json_char:
683
+ return "invalid character inside string.";
684
+ case yajl_lex_string_invalid_hex_char:
685
+ return "invalid (non-hex) character occurs after '\\u' inside "
686
+ "string.";
687
+ case yajl_lex_invalid_char:
688
+ return "invalid char in json text.";
689
+ case yajl_lex_invalid_string:
690
+ return "invalid string in json text.";
691
+ case yajl_lex_missing_integer_after_exponent:
692
+ return "malformed number, a digit is required after the exponent.";
693
+ case yajl_lex_missing_integer_after_decimal:
694
+ return "malformed number, a digit is required after the "
695
+ "decimal point.";
696
+ case yajl_lex_missing_integer_after_minus:
697
+ return "malformed number, a digit is required after the "
698
+ "minus sign.";
699
+ case yajl_lex_unallowed_comment:
700
+ return "probable comment found in input text, comments are "
701
+ "not enabled.";
702
+ }
703
+ return "unknown error code";
704
+ }
705
+
706
+
707
+ /** allows access to more specific information about the lexical
708
+ * error when yajl_lex_lex returns yajl_tok_error. */
709
+ yajl_lex_error
710
+ yajl_lex_get_error(yajl_lexer lexer)
711
+ {
712
+ if (lexer == NULL) return (yajl_lex_error) -1;
713
+ return lexer->error;
714
+ }
715
+
716
+ unsigned int yajl_lex_current_line(yajl_lexer lexer)
717
+ {
718
+ return lexer->lineOff;
719
+ }
720
+
721
+ unsigned int yajl_lex_current_char(yajl_lexer lexer)
722
+ {
723
+ return lexer->charOff;
724
+ }
725
+
726
+ yajl_tok yajl_lex_peek(yajl_lexer lexer, const unsigned char * jsonText,
727
+ unsigned int jsonTextLen, unsigned int offset)
728
+ {
729
+ const unsigned char * outBuf;
730
+ unsigned int outLen;
731
+ unsigned int bufLen = yajl_buf_len(lexer->buf);
732
+ unsigned int bufOff = lexer->bufOff;
733
+ unsigned int bufInUse = lexer->bufInUse;
734
+ yajl_tok tok;
735
+
736
+ tok = yajl_lex_lex(lexer, jsonText, jsonTextLen, &offset,
737
+ &outBuf, &outLen);
738
+
739
+ lexer->bufOff = bufOff;
740
+ lexer->bufInUse = bufInUse;
741
+ yajl_buf_truncate(lexer->buf, bufLen);
742
+
743
+ return tok;
744
+ }