ffi-yajl 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (214) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +201 -0
  3. data/README.md +17 -0
  4. data/Rakefile +62 -0
  5. data/bin/ffi-yajl-bench +36 -0
  6. data/ext/ffi_yajl/ext/encoder/encoder.c +240 -0
  7. data/ext/ffi_yajl/ext/encoder/extconf.rb +29 -0
  8. data/ext/ffi_yajl/ext/parser/extconf.rb +29 -0
  9. data/ext/ffi_yajl/ext/parser/parser.c +199 -0
  10. data/ext/libyajl2/extconf.rb +65 -0
  11. data/ext/libyajl2/vendored/.gitignore +3 -0
  12. data/ext/libyajl2/vendored/BUILDING +23 -0
  13. data/ext/libyajl2/vendored/BUILDING.win32 +27 -0
  14. data/ext/libyajl2/vendored/CMakeLists.txt +79 -0
  15. data/ext/libyajl2/vendored/COPYING +13 -0
  16. data/ext/libyajl2/vendored/ChangeLog +175 -0
  17. data/ext/libyajl2/vendored/README +74 -0
  18. data/ext/libyajl2/vendored/TODO +9 -0
  19. data/ext/libyajl2/vendored/YAJLDoc.cmake +26 -0
  20. data/ext/libyajl2/vendored/configure +79 -0
  21. data/ext/libyajl2/vendored/example/CMakeLists.txt +23 -0
  22. data/ext/libyajl2/vendored/example/README.md +7 -0
  23. data/ext/libyajl2/vendored/example/parse_config.c +69 -0
  24. data/ext/libyajl2/vendored/example/sample.config +101 -0
  25. data/ext/libyajl2/vendored/perf/CMakeLists.txt +23 -0
  26. data/ext/libyajl2/vendored/perf/documents.c +1418 -0
  27. data/ext/libyajl2/vendored/perf/documents.h +28 -0
  28. data/ext/libyajl2/vendored/perf/perftest.c +134 -0
  29. data/ext/libyajl2/vendored/reformatter/CMakeLists.txt +39 -0
  30. data/ext/libyajl2/vendored/reformatter/json_reformat.c +194 -0
  31. data/ext/libyajl2/vendored/src/CMakeLists.txt +86 -0
  32. data/ext/libyajl2/vendored/src/YAJL.dxy +1258 -0
  33. data/ext/libyajl2/vendored/src/api/yajl_common.h +75 -0
  34. data/ext/libyajl2/vendored/src/api/yajl_gen.h +157 -0
  35. data/ext/libyajl2/vendored/src/api/yajl_parse.h +226 -0
  36. data/ext/libyajl2/vendored/src/api/yajl_tree.h +185 -0
  37. data/ext/libyajl2/vendored/src/api/yajl_version.h.cmake +23 -0
  38. data/ext/libyajl2/vendored/src/yajl +33 -0
  39. data/ext/libyajl2/vendored/src/yajl.c +175 -0
  40. data/ext/libyajl2/vendored/src/yajl.pc.cmake +9 -0
  41. data/ext/libyajl2/vendored/src/yajl_alloc.c +52 -0
  42. data/ext/libyajl2/vendored/src/yajl_alloc.h +34 -0
  43. data/ext/libyajl2/vendored/src/yajl_buf.c +103 -0
  44. data/ext/libyajl2/vendored/src/yajl_buf.h +57 -0
  45. data/ext/libyajl2/vendored/src/yajl_bytestack.h +69 -0
  46. data/ext/libyajl2/vendored/src/yajl_encode.c +220 -0
  47. data/ext/libyajl2/vendored/src/yajl_encode.h +34 -0
  48. data/ext/libyajl2/vendored/src/yajl_gen.c +354 -0
  49. data/ext/libyajl2/vendored/src/yajl_lex.c +763 -0
  50. data/ext/libyajl2/vendored/src/yajl_lex.h +117 -0
  51. data/ext/libyajl2/vendored/src/yajl_parser.c +498 -0
  52. data/ext/libyajl2/vendored/src/yajl_parser.h +78 -0
  53. data/ext/libyajl2/vendored/src/yajl_tree.c +503 -0
  54. data/ext/libyajl2/vendored/src/yajl_version.c +7 -0
  55. data/ext/libyajl2/vendored/test/CMakeLists.txt +23 -0
  56. data/ext/libyajl2/vendored/test/cases/ac_difficult_json_c_test_case_with_comments.json +1 -0
  57. data/ext/libyajl2/vendored/test/cases/ac_difficult_json_c_test_case_with_comments.json.gold +36 -0
  58. data/ext/libyajl2/vendored/test/cases/ac_simple_with_comments.json +11 -0
  59. data/ext/libyajl2/vendored/test/cases/ac_simple_with_comments.json.gold +9 -0
  60. data/ext/libyajl2/vendored/test/cases/ag_false_then_garbage.json +1 -0
  61. data/ext/libyajl2/vendored/test/cases/ag_false_then_garbage.json.gold +2 -0
  62. data/ext/libyajl2/vendored/test/cases/ag_null_then_garbage.json +1 -0
  63. data/ext/libyajl2/vendored/test/cases/ag_null_then_garbage.json.gold +2 -0
  64. data/ext/libyajl2/vendored/test/cases/ag_true_then_garbage.json +1 -0
  65. data/ext/libyajl2/vendored/test/cases/ag_true_then_garbage.json.gold +2 -0
  66. data/ext/libyajl2/vendored/test/cases/am_eof.json +1 -0
  67. data/ext/libyajl2/vendored/test/cases/am_eof.json.gold +4 -0
  68. data/ext/libyajl2/vendored/test/cases/am_integers.json +1 -0
  69. data/ext/libyajl2/vendored/test/cases/am_integers.json.gold +3 -0
  70. data/ext/libyajl2/vendored/test/cases/am_multiple.json +3 -0
  71. data/ext/libyajl2/vendored/test/cases/am_multiple.json.gold +5 -0
  72. data/ext/libyajl2/vendored/test/cases/am_stuff.json +7 -0
  73. data/ext/libyajl2/vendored/test/cases/am_stuff.json.gold +14 -0
  74. data/ext/libyajl2/vendored/test/cases/ap_array_open.json +1 -0
  75. data/ext/libyajl2/vendored/test/cases/ap_array_open.json.gold +2 -0
  76. data/ext/libyajl2/vendored/test/cases/ap_eof_str.json +1 -0
  77. data/ext/libyajl2/vendored/test/cases/ap_eof_str.json.gold +1 -0
  78. data/ext/libyajl2/vendored/test/cases/ap_map_open.json +1 -0
  79. data/ext/libyajl2/vendored/test/cases/ap_map_open.json.gold +2 -0
  80. data/ext/libyajl2/vendored/test/cases/ap_partial_ok.json +1 -0
  81. data/ext/libyajl2/vendored/test/cases/ap_partial_ok.json.gold +4 -0
  82. data/ext/libyajl2/vendored/test/cases/array.json +6 -0
  83. data/ext/libyajl2/vendored/test/cases/array.json.gold +22 -0
  84. data/ext/libyajl2/vendored/test/cases/array_close.json +1 -0
  85. data/ext/libyajl2/vendored/test/cases/array_close.json.gold +2 -0
  86. data/ext/libyajl2/vendored/test/cases/bignums.json +1 -0
  87. data/ext/libyajl2/vendored/test/cases/bignums.json.gold +5 -0
  88. data/ext/libyajl2/vendored/test/cases/bogus_char.json +4 -0
  89. data/ext/libyajl2/vendored/test/cases/bogus_char.json.gold +10 -0
  90. data/ext/libyajl2/vendored/test/cases/codepoints_from_unicode_org.json +1 -0
  91. data/ext/libyajl2/vendored/test/cases/codepoints_from_unicode_org.json.gold +2 -0
  92. data/ext/libyajl2/vendored/test/cases/deep_arrays.json +1 -0
  93. data/ext/libyajl2/vendored/test/cases/deep_arrays.json.gold +2049 -0
  94. data/ext/libyajl2/vendored/test/cases/difficult_json_c_test_case.json +1 -0
  95. data/ext/libyajl2/vendored/test/cases/difficult_json_c_test_case.json.gold +36 -0
  96. data/ext/libyajl2/vendored/test/cases/doubles.json +1 -0
  97. data/ext/libyajl2/vendored/test/cases/doubles.json.gold +7 -0
  98. data/ext/libyajl2/vendored/test/cases/doubles_in_array.json +1 -0
  99. data/ext/libyajl2/vendored/test/cases/doubles_in_array.json.gold +8 -0
  100. data/ext/libyajl2/vendored/test/cases/empty_array.json +1 -0
  101. data/ext/libyajl2/vendored/test/cases/empty_array.json.gold +3 -0
  102. data/ext/libyajl2/vendored/test/cases/empty_string.json +1 -0
  103. data/ext/libyajl2/vendored/test/cases/empty_string.json.gold +2 -0
  104. data/ext/libyajl2/vendored/test/cases/escaped_bulgarian.json +4 -0
  105. data/ext/libyajl2/vendored/test/cases/escaped_bulgarian.json.gold +7 -0
  106. data/ext/libyajl2/vendored/test/cases/escaped_foobar.json +1 -0
  107. data/ext/libyajl2/vendored/test/cases/escaped_foobar.json.gold +2 -0
  108. data/ext/libyajl2/vendored/test/cases/false.json +1 -0
  109. data/ext/libyajl2/vendored/test/cases/false.json.gold +2 -0
  110. data/ext/libyajl2/vendored/test/cases/fg_false_then_garbage.json +1 -0
  111. data/ext/libyajl2/vendored/test/cases/fg_false_then_garbage.json.gold +3 -0
  112. data/ext/libyajl2/vendored/test/cases/fg_issue_7.json +1 -0
  113. data/ext/libyajl2/vendored/test/cases/fg_issue_7.json.gold +3 -0
  114. data/ext/libyajl2/vendored/test/cases/fg_null_then_garbage.json +1 -0
  115. data/ext/libyajl2/vendored/test/cases/fg_null_then_garbage.json.gold +3 -0
  116. data/ext/libyajl2/vendored/test/cases/fg_true_then_garbage.json +1 -0
  117. data/ext/libyajl2/vendored/test/cases/fg_true_then_garbage.json.gold +3 -0
  118. data/ext/libyajl2/vendored/test/cases/four_byte_utf8.json +2 -0
  119. data/ext/libyajl2/vendored/test/cases/four_byte_utf8.json.gold +5 -0
  120. data/ext/libyajl2/vendored/test/cases/high_overflow.json +1 -0
  121. data/ext/libyajl2/vendored/test/cases/high_overflow.json.gold +2 -0
  122. data/ext/libyajl2/vendored/test/cases/integers.json +3 -0
  123. data/ext/libyajl2/vendored/test/cases/integers.json.gold +14 -0
  124. data/ext/libyajl2/vendored/test/cases/invalid_utf8.json +1 -0
  125. data/ext/libyajl2/vendored/test/cases/invalid_utf8.json.gold +3 -0
  126. data/ext/libyajl2/vendored/test/cases/isolated_surrogate_marker.json +1 -0
  127. data/ext/libyajl2/vendored/test/cases/isolated_surrogate_marker.json.gold +2 -0
  128. data/ext/libyajl2/vendored/test/cases/leading_zero_in_number.json +1 -0
  129. data/ext/libyajl2/vendored/test/cases/leading_zero_in_number.json.gold +5 -0
  130. data/ext/libyajl2/vendored/test/cases/lonely_minus_sign.json +7 -0
  131. data/ext/libyajl2/vendored/test/cases/lonely_minus_sign.json.gold +9 -0
  132. data/ext/libyajl2/vendored/test/cases/lonely_number.json +1 -0
  133. data/ext/libyajl2/vendored/test/cases/lonely_number.json.gold +2 -0
  134. data/ext/libyajl2/vendored/test/cases/low_overflow.json +1 -0
  135. data/ext/libyajl2/vendored/test/cases/low_overflow.json.gold +2 -0
  136. data/ext/libyajl2/vendored/test/cases/map_close.json +1 -0
  137. data/ext/libyajl2/vendored/test/cases/map_close.json.gold +2 -0
  138. data/ext/libyajl2/vendored/test/cases/missing_integer_after_decimal_point.json +1 -0
  139. data/ext/libyajl2/vendored/test/cases/missing_integer_after_decimal_point.json.gold +2 -0
  140. data/ext/libyajl2/vendored/test/cases/missing_integer_after_exponent.json +1 -0
  141. data/ext/libyajl2/vendored/test/cases/missing_integer_after_exponent.json.gold +2 -0
  142. data/ext/libyajl2/vendored/test/cases/multiple.json +3 -0
  143. data/ext/libyajl2/vendored/test/cases/multiple.json.gold +4 -0
  144. data/ext/libyajl2/vendored/test/cases/non_utf8_char_in_string.json +1 -0
  145. data/ext/libyajl2/vendored/test/cases/non_utf8_char_in_string.json.gold +8 -0
  146. data/ext/libyajl2/vendored/test/cases/np_partial_bad.json +1 -0
  147. data/ext/libyajl2/vendored/test/cases/np_partial_bad.json.gold +5 -0
  148. data/ext/libyajl2/vendored/test/cases/null.json +1 -0
  149. data/ext/libyajl2/vendored/test/cases/null.json.gold +2 -0
  150. data/ext/libyajl2/vendored/test/cases/nulls_and_bools.json +5 -0
  151. data/ext/libyajl2/vendored/test/cases/nulls_and_bools.json.gold +9 -0
  152. data/ext/libyajl2/vendored/test/cases/simple.json +5 -0
  153. data/ext/libyajl2/vendored/test/cases/simple.json.gold +9 -0
  154. data/ext/libyajl2/vendored/test/cases/simple_with_comments.json +11 -0
  155. data/ext/libyajl2/vendored/test/cases/simple_with_comments.json.gold +5 -0
  156. data/ext/libyajl2/vendored/test/cases/string_invalid_escape.json +1 -0
  157. data/ext/libyajl2/vendored/test/cases/string_invalid_escape.json.gold +3 -0
  158. data/ext/libyajl2/vendored/test/cases/string_invalid_hex_char.json +1 -0
  159. data/ext/libyajl2/vendored/test/cases/string_invalid_hex_char.json.gold +2 -0
  160. data/ext/libyajl2/vendored/test/cases/string_with_escapes.json +3 -0
  161. data/ext/libyajl2/vendored/test/cases/string_with_escapes.json.gold +7 -0
  162. data/ext/libyajl2/vendored/test/cases/string_with_invalid_newline.json +2 -0
  163. data/ext/libyajl2/vendored/test/cases/string_with_invalid_newline.json.gold +2 -0
  164. data/ext/libyajl2/vendored/test/cases/three_byte_utf8.json +1 -0
  165. data/ext/libyajl2/vendored/test/cases/three_byte_utf8.json.gold +7 -0
  166. data/ext/libyajl2/vendored/test/cases/true.json +1 -0
  167. data/ext/libyajl2/vendored/test/cases/true.json.gold +2 -0
  168. data/ext/libyajl2/vendored/test/cases/unescaped_bulgarian.json +1 -0
  169. data/ext/libyajl2/vendored/test/cases/unescaped_bulgarian.json.gold +4 -0
  170. data/ext/libyajl2/vendored/test/cases/zerobyte.json +1 -0
  171. data/ext/libyajl2/vendored/test/cases/zerobyte.json.gold +0 -0
  172. data/ext/libyajl2/vendored/test/run_tests.sh +94 -0
  173. data/ext/libyajl2/vendored/test/yajl_test.c +281 -0
  174. data/ext/libyajl2/vendored/verify/CMakeLists.txt +39 -0
  175. data/ext/libyajl2/vendored/verify/json_verify.c +116 -0
  176. data/lib/ffi_yajl.rb +14 -0
  177. data/lib/ffi_yajl/benchmark.rb +7 -0
  178. data/lib/ffi_yajl/benchmark/MIT-LICENSE +20 -0
  179. data/lib/ffi_yajl/benchmark/encode.rb +135 -0
  180. data/lib/ffi_yajl/benchmark/encode_json_and_marshal.rb +42 -0
  181. data/lib/ffi_yajl/benchmark/encode_json_and_yaml.rb +53 -0
  182. data/lib/ffi_yajl/benchmark/encode_profile.rb +38 -0
  183. data/lib/ffi_yajl/benchmark/http.rb +32 -0
  184. data/lib/ffi_yajl/benchmark/parse.rb +133 -0
  185. data/lib/ffi_yajl/benchmark/parse_json_and_marshal.rb +50 -0
  186. data/lib/ffi_yajl/benchmark/parse_json_and_yaml.rb +55 -0
  187. data/lib/ffi_yajl/benchmark/parse_profile.rb +37 -0
  188. data/lib/ffi_yajl/benchmark/parse_profile_ruby_prof.rb +39 -0
  189. data/lib/ffi_yajl/benchmark/parse_stream.rb +54 -0
  190. data/lib/ffi_yajl/benchmark/subjects/item.json +1 -0
  191. data/lib/ffi_yajl/benchmark/subjects/ohai.json +1216 -0
  192. data/lib/ffi_yajl/benchmark/subjects/ohai.marshal_dump +0 -0
  193. data/lib/ffi_yajl/benchmark/subjects/ohai.yml +975 -0
  194. data/lib/ffi_yajl/benchmark/subjects/twitter_search.json +1 -0
  195. data/lib/ffi_yajl/benchmark/subjects/twitter_stream.json +430 -0
  196. data/lib/ffi_yajl/benchmark/subjects/unicode.json +1 -0
  197. data/lib/ffi_yajl/encoder.rb +53 -0
  198. data/lib/ffi_yajl/ext.rb +22 -0
  199. data/lib/ffi_yajl/ext/.keep +0 -0
  200. data/lib/ffi_yajl/ffi.rb +129 -0
  201. data/lib/ffi_yajl/ffi/encoder.rb +175 -0
  202. data/lib/ffi_yajl/ffi/parser.rb +145 -0
  203. data/lib/ffi_yajl/json_gem.rb +121 -0
  204. data/lib/ffi_yajl/parser.rb +23 -0
  205. data/lib/ffi_yajl/version.rb +3 -0
  206. data/lib/libyajl.so +0 -0
  207. data/lib/libyajl.so.2 +0 -0
  208. data/lib/libyajl.so.2.0.5 +0 -0
  209. data/lib/libyajl_s.a +0 -0
  210. data/spec/ffi_yajl/encoder_spec.rb +39 -0
  211. data/spec/ffi_yajl/json_gem_spec.rb +355 -0
  212. data/spec/ffi_yajl/parser_spec.rb +78 -0
  213. data/spec/spec_helper.rb +14 -0
  214. metadata +332 -0
@@ -0,0 +1,34 @@
1
+ /*
2
+ * Copyright (c) 2007-2011, Lloyd Hilaiel <lloyd@hilaiel.com>
3
+ *
4
+ * Permission to use, copy, modify, and/or distribute this software for any
5
+ * purpose with or without fee is hereby granted, provided that the above
6
+ * copyright notice and this permission notice appear in all copies.
7
+ *
8
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15
+ */
16
+
17
+ #ifndef __YAJL_ENCODE_H__
18
+ #define __YAJL_ENCODE_H__
19
+
20
+ #include "yajl_buf.h"
21
+ #include "api/yajl_gen.h"
22
+
23
+ void yajl_string_encode(const yajl_print_t printer,
24
+ void * ctx,
25
+ const unsigned char * str,
26
+ size_t length,
27
+ int escape_solidus);
28
+
29
+ void yajl_string_decode(yajl_buf buf, const unsigned char * str,
30
+ size_t length);
31
+
32
+ int yajl_string_validate_utf8(const unsigned char * s, size_t len);
33
+
34
+ #endif
@@ -0,0 +1,354 @@
1
+ /*
2
+ * Copyright (c) 2007-2011, Lloyd Hilaiel <lloyd@hilaiel.com>
3
+ *
4
+ * Permission to use, copy, modify, and/or distribute this software for any
5
+ * purpose with or without fee is hereby granted, provided that the above
6
+ * copyright notice and this permission notice appear in all copies.
7
+ *
8
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15
+ */
16
+
17
+ #include "api/yajl_gen.h"
18
+ #include "yajl_buf.h"
19
+ #include "yajl_encode.h"
20
+
21
+ #include <stdlib.h>
22
+ #include <string.h>
23
+ #include <stdio.h>
24
+ #include <math.h>
25
+ #include <stdarg.h>
26
+
27
+ typedef enum {
28
+ yajl_gen_start,
29
+ yajl_gen_map_start,
30
+ yajl_gen_map_key,
31
+ yajl_gen_map_val,
32
+ yajl_gen_array_start,
33
+ yajl_gen_in_array,
34
+ yajl_gen_complete,
35
+ yajl_gen_error
36
+ } yajl_gen_state;
37
+
38
+ struct yajl_gen_t
39
+ {
40
+ unsigned int flags;
41
+ unsigned int depth;
42
+ const char * indentString;
43
+ yajl_gen_state state[YAJL_MAX_DEPTH];
44
+ yajl_print_t print;
45
+ void * ctx; /* yajl_buf */
46
+ /* memory allocation routines */
47
+ yajl_alloc_funcs alloc;
48
+ };
49
+
50
+ int
51
+ yajl_gen_config(yajl_gen g, yajl_gen_option opt, ...)
52
+ {
53
+ int rv = 1;
54
+ va_list ap;
55
+ va_start(ap, opt);
56
+
57
+ switch(opt) {
58
+ case yajl_gen_beautify:
59
+ case yajl_gen_validate_utf8:
60
+ case yajl_gen_escape_solidus:
61
+ if (va_arg(ap, int)) g->flags |= opt;
62
+ else g->flags &= ~opt;
63
+ break;
64
+ case yajl_gen_indent_string: {
65
+ const char *indent = va_arg(ap, const char *);
66
+ g->indentString = indent;
67
+ for (; *indent; indent++) {
68
+ if (*indent != '\n'
69
+ && *indent != '\v'
70
+ && *indent != '\f'
71
+ && *indent != '\t'
72
+ && *indent != '\r'
73
+ && *indent != ' ')
74
+ {
75
+ g->indentString = NULL;
76
+ rv = 0;
77
+ }
78
+ }
79
+ break;
80
+ }
81
+ case yajl_gen_print_callback:
82
+ yajl_buf_free(g->ctx);
83
+ g->print = va_arg(ap, const yajl_print_t);
84
+ g->ctx = va_arg(ap, void *);
85
+ break;
86
+ default:
87
+ rv = 0;
88
+ }
89
+
90
+ va_end(ap);
91
+
92
+ return rv;
93
+ }
94
+
95
+
96
+
97
+ yajl_gen
98
+ yajl_gen_alloc(const yajl_alloc_funcs * afs)
99
+ {
100
+ yajl_gen g = NULL;
101
+ yajl_alloc_funcs afsBuffer;
102
+
103
+ /* first order of business is to set up memory allocation routines */
104
+ if (afs != NULL) {
105
+ if (afs->malloc == NULL || afs->realloc == NULL || afs->free == NULL)
106
+ {
107
+ return NULL;
108
+ }
109
+ } else {
110
+ yajl_set_default_alloc_funcs(&afsBuffer);
111
+ afs = &afsBuffer;
112
+ }
113
+
114
+ g = (yajl_gen) YA_MALLOC(afs, sizeof(struct yajl_gen_t));
115
+ if (!g) return NULL;
116
+
117
+ memset((void *) g, 0, sizeof(struct yajl_gen_t));
118
+ /* copy in pointers to allocation routines */
119
+ memcpy((void *) &(g->alloc), (void *) afs, sizeof(yajl_alloc_funcs));
120
+
121
+ g->print = (yajl_print_t)&yajl_buf_append;
122
+ g->ctx = yajl_buf_alloc(&(g->alloc));
123
+ g->indentString = " ";
124
+
125
+ return g;
126
+ }
127
+
128
+ void
129
+ yajl_gen_free(yajl_gen g)
130
+ {
131
+ if (g->print == (yajl_print_t)&yajl_buf_append) yajl_buf_free((yajl_buf)g->ctx);
132
+ YA_FREE(&(g->alloc), g);
133
+ }
134
+
135
+ #define INSERT_SEP \
136
+ if (g->state[g->depth] == yajl_gen_map_key || \
137
+ g->state[g->depth] == yajl_gen_in_array) { \
138
+ g->print(g->ctx, ",", 1); \
139
+ if ((g->flags & yajl_gen_beautify)) g->print(g->ctx, "\n", 1); \
140
+ } else if (g->state[g->depth] == yajl_gen_map_val) { \
141
+ g->print(g->ctx, ":", 1); \
142
+ if ((g->flags & yajl_gen_beautify)) g->print(g->ctx, " ", 1); \
143
+ }
144
+
145
+ #define INSERT_WHITESPACE \
146
+ if ((g->flags & yajl_gen_beautify)) { \
147
+ if (g->state[g->depth] != yajl_gen_map_val) { \
148
+ unsigned int _i; \
149
+ for (_i=0;_i<g->depth;_i++) \
150
+ g->print(g->ctx, \
151
+ g->indentString, \
152
+ (unsigned int)strlen(g->indentString)); \
153
+ } \
154
+ }
155
+
156
+ #define ENSURE_NOT_KEY \
157
+ if (g->state[g->depth] == yajl_gen_map_key || \
158
+ g->state[g->depth] == yajl_gen_map_start) { \
159
+ return yajl_gen_keys_must_be_strings; \
160
+ } \
161
+
162
+ /* check that we're not complete, or in error state. in a valid state
163
+ * to be generating */
164
+ #define ENSURE_VALID_STATE \
165
+ if (g->state[g->depth] == yajl_gen_error) { \
166
+ return yajl_gen_in_error_state;\
167
+ } else if (g->state[g->depth] == yajl_gen_complete) { \
168
+ return yajl_gen_generation_complete; \
169
+ }
170
+
171
+ #define INCREMENT_DEPTH \
172
+ if (++(g->depth) >= YAJL_MAX_DEPTH) return yajl_max_depth_exceeded;
173
+
174
+ #define DECREMENT_DEPTH \
175
+ if (--(g->depth) >= YAJL_MAX_DEPTH) return yajl_gen_error;
176
+
177
+ #define APPENDED_ATOM \
178
+ switch (g->state[g->depth]) { \
179
+ case yajl_gen_start: \
180
+ g->state[g->depth] = yajl_gen_complete; \
181
+ break; \
182
+ case yajl_gen_map_start: \
183
+ case yajl_gen_map_key: \
184
+ g->state[g->depth] = yajl_gen_map_val; \
185
+ break; \
186
+ case yajl_gen_array_start: \
187
+ g->state[g->depth] = yajl_gen_in_array; \
188
+ break; \
189
+ case yajl_gen_map_val: \
190
+ g->state[g->depth] = yajl_gen_map_key; \
191
+ break; \
192
+ default: \
193
+ break; \
194
+ } \
195
+
196
+ #define FINAL_NEWLINE \
197
+ if ((g->flags & yajl_gen_beautify) && g->state[g->depth] == yajl_gen_complete) \
198
+ g->print(g->ctx, "\n", 1);
199
+
200
+ yajl_gen_status
201
+ yajl_gen_integer(yajl_gen g, long long int number)
202
+ {
203
+ char i[32];
204
+ ENSURE_VALID_STATE; ENSURE_NOT_KEY; INSERT_SEP; INSERT_WHITESPACE;
205
+ sprintf(i, "%lld", number);
206
+ g->print(g->ctx, i, (unsigned int)strlen(i));
207
+ APPENDED_ATOM;
208
+ FINAL_NEWLINE;
209
+ return yajl_gen_status_ok;
210
+ }
211
+
212
+ #if defined(_WIN32) || defined(WIN32)
213
+ #include <float.h>
214
+ #define isnan _isnan
215
+ #define isinf !_finite
216
+ #endif
217
+
218
+ yajl_gen_status
219
+ yajl_gen_double(yajl_gen g, double number)
220
+ {
221
+ char i[32];
222
+ ENSURE_VALID_STATE; ENSURE_NOT_KEY;
223
+ if (isnan(number) || isinf(number)) return yajl_gen_invalid_number;
224
+ INSERT_SEP; INSERT_WHITESPACE;
225
+ sprintf(i, "%.20g", number);
226
+ if (strspn(i, "0123456789-") == strlen(i)) {
227
+ strcat(i, ".0");
228
+ }
229
+ g->print(g->ctx, i, (unsigned int)strlen(i));
230
+ APPENDED_ATOM;
231
+ FINAL_NEWLINE;
232
+ return yajl_gen_status_ok;
233
+ }
234
+
235
+ yajl_gen_status
236
+ yajl_gen_number(yajl_gen g, const char * s, size_t l)
237
+ {
238
+ ENSURE_VALID_STATE; ENSURE_NOT_KEY; INSERT_SEP; INSERT_WHITESPACE;
239
+ g->print(g->ctx, s, l);
240
+ APPENDED_ATOM;
241
+ FINAL_NEWLINE;
242
+ return yajl_gen_status_ok;
243
+ }
244
+
245
+ yajl_gen_status
246
+ yajl_gen_string(yajl_gen g, const unsigned char * str,
247
+ size_t len)
248
+ {
249
+ // if validation is enabled, check that the string is valid utf8
250
+ // XXX: This checking could be done a little faster, in the same pass as
251
+ // the string encoding
252
+ if (g->flags & yajl_gen_validate_utf8) {
253
+ if (!yajl_string_validate_utf8(str, len)) {
254
+ return yajl_gen_invalid_string;
255
+ }
256
+ }
257
+ ENSURE_VALID_STATE; INSERT_SEP; INSERT_WHITESPACE;
258
+ g->print(g->ctx, "\"", 1);
259
+ yajl_string_encode(g->print, g->ctx, str, len, g->flags & yajl_gen_escape_solidus);
260
+ g->print(g->ctx, "\"", 1);
261
+ APPENDED_ATOM;
262
+ FINAL_NEWLINE;
263
+ return yajl_gen_status_ok;
264
+ }
265
+
266
+ yajl_gen_status
267
+ yajl_gen_null(yajl_gen g)
268
+ {
269
+ ENSURE_VALID_STATE; ENSURE_NOT_KEY; INSERT_SEP; INSERT_WHITESPACE;
270
+ g->print(g->ctx, "null", strlen("null"));
271
+ APPENDED_ATOM;
272
+ FINAL_NEWLINE;
273
+ return yajl_gen_status_ok;
274
+ }
275
+
276
+ yajl_gen_status
277
+ yajl_gen_bool(yajl_gen g, int boolean)
278
+ {
279
+ const char * val = boolean ? "true" : "false";
280
+
281
+ ENSURE_VALID_STATE; ENSURE_NOT_KEY; INSERT_SEP; INSERT_WHITESPACE;
282
+ g->print(g->ctx, val, (unsigned int)strlen(val));
283
+ APPENDED_ATOM;
284
+ FINAL_NEWLINE;
285
+ return yajl_gen_status_ok;
286
+ }
287
+
288
+ yajl_gen_status
289
+ yajl_gen_map_open(yajl_gen g)
290
+ {
291
+ ENSURE_VALID_STATE; ENSURE_NOT_KEY; INSERT_SEP; INSERT_WHITESPACE;
292
+ INCREMENT_DEPTH;
293
+
294
+ g->state[g->depth] = yajl_gen_map_start;
295
+ g->print(g->ctx, "{", 1);
296
+ if ((g->flags & yajl_gen_beautify)) g->print(g->ctx, "\n", 1);
297
+ FINAL_NEWLINE;
298
+ return yajl_gen_status_ok;
299
+ }
300
+
301
+ yajl_gen_status
302
+ yajl_gen_map_close(yajl_gen g)
303
+ {
304
+ ENSURE_VALID_STATE;
305
+ DECREMENT_DEPTH;
306
+
307
+ if ((g->flags & yajl_gen_beautify)) g->print(g->ctx, "\n", 1);
308
+ APPENDED_ATOM;
309
+ INSERT_WHITESPACE;
310
+ g->print(g->ctx, "}", 1);
311
+ FINAL_NEWLINE;
312
+ return yajl_gen_status_ok;
313
+ }
314
+
315
+ yajl_gen_status
316
+ yajl_gen_array_open(yajl_gen g)
317
+ {
318
+ ENSURE_VALID_STATE; ENSURE_NOT_KEY; INSERT_SEP; INSERT_WHITESPACE;
319
+ INCREMENT_DEPTH;
320
+ g->state[g->depth] = yajl_gen_array_start;
321
+ g->print(g->ctx, "[", 1);
322
+ if ((g->flags & yajl_gen_beautify)) g->print(g->ctx, "\n", 1);
323
+ FINAL_NEWLINE;
324
+ return yajl_gen_status_ok;
325
+ }
326
+
327
+ yajl_gen_status
328
+ yajl_gen_array_close(yajl_gen g)
329
+ {
330
+ ENSURE_VALID_STATE;
331
+ DECREMENT_DEPTH;
332
+ if ((g->flags & yajl_gen_beautify)) g->print(g->ctx, "\n", 1);
333
+ APPENDED_ATOM;
334
+ INSERT_WHITESPACE;
335
+ g->print(g->ctx, "]", 1);
336
+ FINAL_NEWLINE;
337
+ return yajl_gen_status_ok;
338
+ }
339
+
340
+ yajl_gen_status
341
+ yajl_gen_get_buf(yajl_gen g, const unsigned char ** buf,
342
+ size_t * len)
343
+ {
344
+ if (g->print != (yajl_print_t)&yajl_buf_append) return yajl_gen_no_buf;
345
+ *buf = yajl_buf_data((yajl_buf)g->ctx);
346
+ *len = yajl_buf_len((yajl_buf)g->ctx);
347
+ return yajl_gen_status_ok;
348
+ }
349
+
350
+ void
351
+ yajl_gen_clear(yajl_gen g)
352
+ {
353
+ if (g->print == (yajl_print_t)&yajl_buf_append) yajl_buf_clear((yajl_buf)g->ctx);
354
+ }
@@ -0,0 +1,763 @@
1
+ /*
2
+ * Copyright (c) 2007-2011, Lloyd Hilaiel <lloyd@hilaiel.com>
3
+ *
4
+ * Permission to use, copy, modify, and/or distribute this software for any
5
+ * purpose with or without fee is hereby granted, provided that the above
6
+ * copyright notice and this permission notice appear in all copies.
7
+ *
8
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15
+ */
16
+
17
+ #include "yajl_lex.h"
18
+ #include "yajl_buf.h"
19
+
20
+ #include <stdlib.h>
21
+ #include <stdio.h>
22
+ #include <assert.h>
23
+ #include <string.h>
24
+
25
+ #ifdef YAJL_LEXER_DEBUG
26
+ static const char *
27
+ tokToStr(yajl_tok tok)
28
+ {
29
+ switch (tok) {
30
+ case yajl_tok_bool: return "bool";
31
+ case yajl_tok_colon: return "colon";
32
+ case yajl_tok_comma: return "comma";
33
+ case yajl_tok_eof: return "eof";
34
+ case yajl_tok_error: return "error";
35
+ case yajl_tok_left_brace: return "brace";
36
+ case yajl_tok_left_bracket: return "bracket";
37
+ case yajl_tok_null: return "null";
38
+ case yajl_tok_integer: return "integer";
39
+ case yajl_tok_double: return "double";
40
+ case yajl_tok_right_brace: return "brace";
41
+ case yajl_tok_right_bracket: return "bracket";
42
+ case yajl_tok_string: return "string";
43
+ case yajl_tok_string_with_escapes: return "string_with_escapes";
44
+ }
45
+ return "unknown";
46
+ }
47
+ #endif
48
+
49
+ /* Impact of the stream parsing feature on the lexer:
50
+ *
51
+ * YAJL support stream parsing. That is, the ability to parse the first
52
+ * bits of a chunk of JSON before the last bits are available (still on
53
+ * the network or disk). This makes the lexer more complex. The
54
+ * responsibility of the lexer is to handle transparently the case where
55
+ * a chunk boundary falls in the middle of a token. This is
56
+ * accomplished is via a buffer and a character reading abstraction.
57
+ *
58
+ * Overview of implementation
59
+ *
60
+ * When we lex to end of input string before end of token is hit, we
61
+ * copy all of the input text composing the token into our lexBuf.
62
+ *
63
+ * Every time we read a character, we do so through the readChar function.
64
+ * readChar's responsibility is to handle pulling all chars from the buffer
65
+ * before pulling chars from input text
66
+ */
67
+
68
+ struct yajl_lexer_t {
69
+ /* the overal line and char offset into the data */
70
+ size_t lineOff;
71
+ size_t charOff;
72
+
73
+ /* error */
74
+ yajl_lex_error error;
75
+
76
+ /* a input buffer to handle the case where a token is spread over
77
+ * multiple chunks */
78
+ yajl_buf buf;
79
+
80
+ /* in the case where we have data in the lexBuf, bufOff holds
81
+ * the current offset into the lexBuf. */
82
+ size_t bufOff;
83
+
84
+ /* are we using the lex buf? */
85
+ unsigned int bufInUse;
86
+
87
+ /* shall we allow comments? */
88
+ unsigned int allowComments;
89
+
90
+ /* shall we validate utf8 inside strings? */
91
+ unsigned int validateUTF8;
92
+
93
+ yajl_alloc_funcs * alloc;
94
+ };
95
+
96
+ #define readChar(lxr, txt, off) \
97
+ (((lxr)->bufInUse && yajl_buf_len((lxr)->buf) && lxr->bufOff < yajl_buf_len((lxr)->buf)) ? \
98
+ (*((const unsigned char *) yajl_buf_data((lxr)->buf) + ((lxr)->bufOff)++)) : \
99
+ ((txt)[(*(off))++]))
100
+
101
+ #define unreadChar(lxr, off) ((*(off) > 0) ? (*(off))-- : ((lxr)->bufOff--))
102
+
103
+ yajl_lexer
104
+ yajl_lex_alloc(yajl_alloc_funcs * alloc,
105
+ unsigned int allowComments, unsigned int validateUTF8)
106
+ {
107
+ yajl_lexer lxr = (yajl_lexer) YA_MALLOC(alloc, sizeof(struct yajl_lexer_t));
108
+ memset((void *) lxr, 0, sizeof(struct yajl_lexer_t));
109
+ lxr->buf = yajl_buf_alloc(alloc);
110
+ lxr->allowComments = allowComments;
111
+ lxr->validateUTF8 = validateUTF8;
112
+ lxr->alloc = alloc;
113
+ return lxr;
114
+ }
115
+
116
+ void
117
+ yajl_lex_free(yajl_lexer lxr)
118
+ {
119
+ yajl_buf_free(lxr->buf);
120
+ YA_FREE(lxr->alloc, lxr);
121
+ return;
122
+ }
123
+
124
+ /* a lookup table which lets us quickly determine three things:
125
+ * VEC - valid escaped control char
126
+ * note. the solidus '/' may be escaped or not.
127
+ * IJC - invalid json char
128
+ * VHC - valid hex char
129
+ * NFP - needs further processing (from a string scanning perspective)
130
+ * NUC - needs utf8 checking when enabled (from a string scanning perspective)
131
+ */
132
+ #define VEC 0x01
133
+ #define IJC 0x02
134
+ #define VHC 0x04
135
+ #define NFP 0x08
136
+ #define NUC 0x10
137
+
138
+ static const char charLookupTable[256] =
139
+ {
140
+ /*00*/ IJC , IJC , IJC , IJC , IJC , IJC , IJC , IJC ,
141
+ /*08*/ IJC , IJC , IJC , IJC , IJC , IJC , IJC , IJC ,
142
+ /*10*/ IJC , IJC , IJC , IJC , IJC , IJC , IJC , IJC ,
143
+ /*18*/ IJC , IJC , IJC , IJC , IJC , IJC , IJC , IJC ,
144
+
145
+ /*20*/ 0 , 0 , NFP|VEC|IJC, 0 , 0 , 0 , 0 , 0 ,
146
+ /*28*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , VEC ,
147
+ /*30*/ VHC , VHC , VHC , VHC , VHC , VHC , VHC , VHC ,
148
+ /*38*/ VHC , VHC , 0 , 0 , 0 , 0 , 0 , 0 ,
149
+
150
+ /*40*/ 0 , VHC , VHC , VHC , VHC , VHC , VHC , 0 ,
151
+ /*48*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
152
+ /*50*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
153
+ /*58*/ 0 , 0 , 0 , 0 , NFP|VEC|IJC, 0 , 0 , 0 ,
154
+
155
+ /*60*/ 0 , VHC , VEC|VHC, VHC , VHC , VHC , VEC|VHC, 0 ,
156
+ /*68*/ 0 , 0 , 0 , 0 , 0 , 0 , VEC , 0 ,
157
+ /*70*/ 0 , 0 , VEC , 0 , VEC , 0 , 0 , 0 ,
158
+ /*78*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
159
+
160
+ NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC ,
161
+ NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC ,
162
+ NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC ,
163
+ NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC ,
164
+
165
+ NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC ,
166
+ NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC ,
167
+ NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC ,
168
+ NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC ,
169
+
170
+ NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC ,
171
+ NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC ,
172
+ NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC ,
173
+ NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC ,
174
+
175
+ NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC ,
176
+ NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC ,
177
+ NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC ,
178
+ NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC
179
+ };
180
+
181
+ /** process a variable length utf8 encoded codepoint.
182
+ *
183
+ * returns:
184
+ * yajl_tok_string - if valid utf8 char was parsed and offset was
185
+ * advanced
186
+ * yajl_tok_eof - if end of input was hit before validation could
187
+ * complete
188
+ * yajl_tok_error - if invalid utf8 was encountered
189
+ *
190
+ * NOTE: on error the offset will point to the first char of the
191
+ * invalid utf8 */
192
+ #define UTF8_CHECK_EOF if (*offset >= jsonTextLen) { return yajl_tok_eof; }
193
+
194
+ static yajl_tok
195
+ yajl_lex_utf8_char(yajl_lexer lexer, const unsigned char * jsonText,
196
+ size_t jsonTextLen, size_t * offset,
197
+ unsigned char curChar)
198
+ {
199
+ if (curChar <= 0x7f) {
200
+ /* single byte */
201
+ return yajl_tok_string;
202
+ } else if ((curChar >> 5) == 0x6) {
203
+ /* two byte */
204
+ UTF8_CHECK_EOF;
205
+ curChar = readChar(lexer, jsonText, offset);
206
+ if ((curChar >> 6) == 0x2) return yajl_tok_string;
207
+ } else if ((curChar >> 4) == 0x0e) {
208
+ /* three byte */
209
+ UTF8_CHECK_EOF;
210
+ curChar = readChar(lexer, jsonText, offset);
211
+ if ((curChar >> 6) == 0x2) {
212
+ UTF8_CHECK_EOF;
213
+ curChar = readChar(lexer, jsonText, offset);
214
+ if ((curChar >> 6) == 0x2) return yajl_tok_string;
215
+ }
216
+ } else if ((curChar >> 3) == 0x1e) {
217
+ /* four byte */
218
+ UTF8_CHECK_EOF;
219
+ curChar = readChar(lexer, jsonText, offset);
220
+ if ((curChar >> 6) == 0x2) {
221
+ UTF8_CHECK_EOF;
222
+ curChar = readChar(lexer, jsonText, offset);
223
+ if ((curChar >> 6) == 0x2) {
224
+ UTF8_CHECK_EOF;
225
+ curChar = readChar(lexer, jsonText, offset);
226
+ if ((curChar >> 6) == 0x2) return yajl_tok_string;
227
+ }
228
+ }
229
+ }
230
+
231
+ return yajl_tok_error;
232
+ }
233
+
234
+ /* lex a string. input is the lexer, pointer to beginning of
235
+ * json text, and start of string (offset).
236
+ * a token is returned which has the following meanings:
237
+ * yajl_tok_string: lex of string was successful. offset points to
238
+ * terminating '"'.
239
+ * yajl_tok_eof: end of text was encountered before we could complete
240
+ * the lex.
241
+ * yajl_tok_error: embedded in the string were unallowable chars. offset
242
+ * points to the offending char
243
+ */
244
+ #define STR_CHECK_EOF \
245
+ if (*offset >= jsonTextLen) { \
246
+ tok = yajl_tok_eof; \
247
+ goto finish_string_lex; \
248
+ }
249
+
250
+ /** scan a string for interesting characters that might need further
251
+ * review. return the number of chars that are uninteresting and can
252
+ * be skipped.
253
+ * (lth) hi world, any thoughts on how to make this routine faster? */
254
+ static size_t
255
+ yajl_string_scan(const unsigned char * buf, size_t len, int utf8check)
256
+ {
257
+ unsigned char mask = IJC|NFP|(utf8check ? NUC : 0);
258
+ size_t skip = 0;
259
+ while (skip < len && !(charLookupTable[*buf] & mask))
260
+ {
261
+ skip++;
262
+ buf++;
263
+ }
264
+ return skip;
265
+ }
266
+
267
+ static yajl_tok
268
+ yajl_lex_string(yajl_lexer lexer, const unsigned char * jsonText,
269
+ size_t jsonTextLen, size_t * offset)
270
+ {
271
+ yajl_tok tok = yajl_tok_error;
272
+ int hasEscapes = 0;
273
+
274
+ for (;;) {
275
+ unsigned char curChar;
276
+
277
+ /* now jump into a faster scanning routine to skip as much
278
+ * of the buffers as possible */
279
+ {
280
+ const unsigned char * p;
281
+ size_t len;
282
+
283
+ if ((lexer->bufInUse && yajl_buf_len(lexer->buf) &&
284
+ lexer->bufOff < yajl_buf_len(lexer->buf)))
285
+ {
286
+ p = ((const unsigned char *) yajl_buf_data(lexer->buf) +
287
+ (lexer->bufOff));
288
+ len = yajl_buf_len(lexer->buf) - lexer->bufOff;
289
+ lexer->bufOff += yajl_string_scan(p, len, lexer->validateUTF8);
290
+ }
291
+ else if (*offset < jsonTextLen)
292
+ {
293
+ p = jsonText + *offset;
294
+ len = jsonTextLen - *offset;
295
+ *offset += yajl_string_scan(p, len, lexer->validateUTF8);
296
+ }
297
+ }
298
+
299
+ STR_CHECK_EOF;
300
+
301
+ curChar = readChar(lexer, jsonText, offset);
302
+
303
+ /* quote terminates */
304
+ if (curChar == '"') {
305
+ tok = yajl_tok_string;
306
+ break;
307
+ }
308
+ /* backslash escapes a set of control chars, */
309
+ else if (curChar == '\\') {
310
+ hasEscapes = 1;
311
+ STR_CHECK_EOF;
312
+
313
+ /* special case \u */
314
+ curChar = readChar(lexer, jsonText, offset);
315
+ if (curChar == 'u') {
316
+ unsigned int i = 0;
317
+
318
+ for (i=0;i<4;i++) {
319
+ STR_CHECK_EOF;
320
+ curChar = readChar(lexer, jsonText, offset);
321
+ if (!(charLookupTable[curChar] & VHC)) {
322
+ /* back up to offending char */
323
+ unreadChar(lexer, offset);
324
+ lexer->error = yajl_lex_string_invalid_hex_char;
325
+ goto finish_string_lex;
326
+ }
327
+ }
328
+ } else if (!(charLookupTable[curChar] & VEC)) {
329
+ /* back up to offending char */
330
+ unreadChar(lexer, offset);
331
+ lexer->error = yajl_lex_string_invalid_escaped_char;
332
+ goto finish_string_lex;
333
+ }
334
+ }
335
+ /* when not validating UTF8 it's a simple table lookup to determine
336
+ * if the present character is invalid */
337
+ else if(charLookupTable[curChar] & IJC) {
338
+ /* back up to offending char */
339
+ unreadChar(lexer, offset);
340
+ lexer->error = yajl_lex_string_invalid_json_char;
341
+ goto finish_string_lex;
342
+ }
343
+ /* when in validate UTF8 mode we need to do some extra work */
344
+ else if (lexer->validateUTF8) {
345
+ yajl_tok t = yajl_lex_utf8_char(lexer, jsonText, jsonTextLen,
346
+ offset, curChar);
347
+
348
+ if (t == yajl_tok_eof) {
349
+ tok = yajl_tok_eof;
350
+ goto finish_string_lex;
351
+ } else if (t == yajl_tok_error) {
352
+ lexer->error = yajl_lex_string_invalid_utf8;
353
+ goto finish_string_lex;
354
+ }
355
+ }
356
+ /* accept it, and move on */
357
+ }
358
+ finish_string_lex:
359
+ /* tell our buddy, the parser, wether he needs to process this string
360
+ * again */
361
+ if (hasEscapes && tok == yajl_tok_string) {
362
+ tok = yajl_tok_string_with_escapes;
363
+ }
364
+
365
+ return tok;
366
+ }
367
+
368
+ #define RETURN_IF_EOF if (*offset >= jsonTextLen) return yajl_tok_eof;
369
+
370
+ static yajl_tok
371
+ yajl_lex_number(yajl_lexer lexer, const unsigned char * jsonText,
372
+ size_t jsonTextLen, size_t * offset)
373
+ {
374
+ /** XXX: numbers are the only entities in json that we must lex
375
+ * _beyond_ in order to know that they are complete. There
376
+ * is an ambiguous case for integers at EOF. */
377
+
378
+ unsigned char c;
379
+
380
+ yajl_tok tok = yajl_tok_integer;
381
+
382
+ RETURN_IF_EOF;
383
+ c = readChar(lexer, jsonText, offset);
384
+
385
+ /* optional leading minus */
386
+ if (c == '-') {
387
+ RETURN_IF_EOF;
388
+ c = readChar(lexer, jsonText, offset);
389
+ }
390
+
391
+ /* a single zero, or a series of integers */
392
+ if (c == '0') {
393
+ RETURN_IF_EOF;
394
+ c = readChar(lexer, jsonText, offset);
395
+ } else if (c >= '1' && c <= '9') {
396
+ do {
397
+ RETURN_IF_EOF;
398
+ c = readChar(lexer, jsonText, offset);
399
+ } while (c >= '0' && c <= '9');
400
+ } else {
401
+ unreadChar(lexer, offset);
402
+ lexer->error = yajl_lex_missing_integer_after_minus;
403
+ return yajl_tok_error;
404
+ }
405
+
406
+ /* optional fraction (indicates this is floating point) */
407
+ if (c == '.') {
408
+ int numRd = 0;
409
+
410
+ RETURN_IF_EOF;
411
+ c = readChar(lexer, jsonText, offset);
412
+
413
+ while (c >= '0' && c <= '9') {
414
+ numRd++;
415
+ RETURN_IF_EOF;
416
+ c = readChar(lexer, jsonText, offset);
417
+ }
418
+
419
+ if (!numRd) {
420
+ unreadChar(lexer, offset);
421
+ lexer->error = yajl_lex_missing_integer_after_decimal;
422
+ return yajl_tok_error;
423
+ }
424
+ tok = yajl_tok_double;
425
+ }
426
+
427
+ /* optional exponent (indicates this is floating point) */
428
+ if (c == 'e' || c == 'E') {
429
+ RETURN_IF_EOF;
430
+ c = readChar(lexer, jsonText, offset);
431
+
432
+ /* optional sign */
433
+ if (c == '+' || c == '-') {
434
+ RETURN_IF_EOF;
435
+ c = readChar(lexer, jsonText, offset);
436
+ }
437
+
438
+ if (c >= '0' && c <= '9') {
439
+ do {
440
+ RETURN_IF_EOF;
441
+ c = readChar(lexer, jsonText, offset);
442
+ } while (c >= '0' && c <= '9');
443
+ } else {
444
+ unreadChar(lexer, offset);
445
+ lexer->error = yajl_lex_missing_integer_after_exponent;
446
+ return yajl_tok_error;
447
+ }
448
+ tok = yajl_tok_double;
449
+ }
450
+
451
+ /* we always go "one too far" */
452
+ unreadChar(lexer, offset);
453
+
454
+ return tok;
455
+ }
456
+
457
+ static yajl_tok
458
+ yajl_lex_comment(yajl_lexer lexer, const unsigned char * jsonText,
459
+ size_t jsonTextLen, size_t * offset)
460
+ {
461
+ unsigned char c;
462
+
463
+ yajl_tok tok = yajl_tok_comment;
464
+
465
+ RETURN_IF_EOF;
466
+ c = readChar(lexer, jsonText, offset);
467
+
468
+ /* either slash or star expected */
469
+ if (c == '/') {
470
+ /* now we throw away until end of line */
471
+ do {
472
+ RETURN_IF_EOF;
473
+ c = readChar(lexer, jsonText, offset);
474
+ } while (c != '\n');
475
+ } else if (c == '*') {
476
+ /* now we throw away until end of comment */
477
+ for (;;) {
478
+ RETURN_IF_EOF;
479
+ c = readChar(lexer, jsonText, offset);
480
+ if (c == '*') {
481
+ RETURN_IF_EOF;
482
+ c = readChar(lexer, jsonText, offset);
483
+ if (c == '/') {
484
+ break;
485
+ } else {
486
+ unreadChar(lexer, offset);
487
+ }
488
+ }
489
+ }
490
+ } else {
491
+ lexer->error = yajl_lex_invalid_char;
492
+ tok = yajl_tok_error;
493
+ }
494
+
495
+ return tok;
496
+ }
497
+
498
+ yajl_tok
499
+ yajl_lex_lex(yajl_lexer lexer, const unsigned char * jsonText,
500
+ size_t jsonTextLen, size_t * offset,
501
+ const unsigned char ** outBuf, size_t * outLen)
502
+ {
503
+ yajl_tok tok = yajl_tok_error;
504
+ unsigned char c;
505
+ size_t startOffset = *offset;
506
+
507
+ *outBuf = NULL;
508
+ *outLen = 0;
509
+
510
+ for (;;) {
511
+ assert(*offset <= jsonTextLen);
512
+
513
+ if (*offset >= jsonTextLen) {
514
+ tok = yajl_tok_eof;
515
+ goto lexed;
516
+ }
517
+
518
+ c = readChar(lexer, jsonText, offset);
519
+
520
+ switch (c) {
521
+ case '{':
522
+ tok = yajl_tok_left_bracket;
523
+ goto lexed;
524
+ case '}':
525
+ tok = yajl_tok_right_bracket;
526
+ goto lexed;
527
+ case '[':
528
+ tok = yajl_tok_left_brace;
529
+ goto lexed;
530
+ case ']':
531
+ tok = yajl_tok_right_brace;
532
+ goto lexed;
533
+ case ',':
534
+ tok = yajl_tok_comma;
535
+ goto lexed;
536
+ case ':':
537
+ tok = yajl_tok_colon;
538
+ goto lexed;
539
+ case '\t': case '\n': case '\v': case '\f': case '\r': case ' ':
540
+ startOffset++;
541
+ break;
542
+ case 't': {
543
+ const char * want = "rue";
544
+ do {
545
+ if (*offset >= jsonTextLen) {
546
+ tok = yajl_tok_eof;
547
+ goto lexed;
548
+ }
549
+ c = readChar(lexer, jsonText, offset);
550
+ if (c != *want) {
551
+ unreadChar(lexer, offset);
552
+ lexer->error = yajl_lex_invalid_string;
553
+ tok = yajl_tok_error;
554
+ goto lexed;
555
+ }
556
+ } while (*(++want));
557
+ tok = yajl_tok_bool;
558
+ goto lexed;
559
+ }
560
+ case 'f': {
561
+ const char * want = "alse";
562
+ do {
563
+ if (*offset >= jsonTextLen) {
564
+ tok = yajl_tok_eof;
565
+ goto lexed;
566
+ }
567
+ c = readChar(lexer, jsonText, offset);
568
+ if (c != *want) {
569
+ unreadChar(lexer, offset);
570
+ lexer->error = yajl_lex_invalid_string;
571
+ tok = yajl_tok_error;
572
+ goto lexed;
573
+ }
574
+ } while (*(++want));
575
+ tok = yajl_tok_bool;
576
+ goto lexed;
577
+ }
578
+ case 'n': {
579
+ const char * want = "ull";
580
+ do {
581
+ if (*offset >= jsonTextLen) {
582
+ tok = yajl_tok_eof;
583
+ goto lexed;
584
+ }
585
+ c = readChar(lexer, jsonText, offset);
586
+ if (c != *want) {
587
+ unreadChar(lexer, offset);
588
+ lexer->error = yajl_lex_invalid_string;
589
+ tok = yajl_tok_error;
590
+ goto lexed;
591
+ }
592
+ } while (*(++want));
593
+ tok = yajl_tok_null;
594
+ goto lexed;
595
+ }
596
+ case '"': {
597
+ tok = yajl_lex_string(lexer, (const unsigned char *) jsonText,
598
+ jsonTextLen, offset);
599
+ goto lexed;
600
+ }
601
+ case '-':
602
+ case '0': case '1': case '2': case '3': case '4':
603
+ case '5': case '6': case '7': case '8': case '9': {
604
+ /* integer parsing wants to start from the beginning */
605
+ unreadChar(lexer, offset);
606
+ tok = yajl_lex_number(lexer, (const unsigned char *) jsonText,
607
+ jsonTextLen, offset);
608
+ goto lexed;
609
+ }
610
+ case '/':
611
+ /* hey, look, a probable comment! If comments are disabled
612
+ * it's an error. */
613
+ if (!lexer->allowComments) {
614
+ unreadChar(lexer, offset);
615
+ lexer->error = yajl_lex_unallowed_comment;
616
+ tok = yajl_tok_error;
617
+ goto lexed;
618
+ }
619
+ /* if comments are enabled, then we should try to lex
620
+ * the thing. possible outcomes are
621
+ * - successful lex (tok_comment, which means continue),
622
+ * - malformed comment opening (slash not followed by
623
+ * '*' or '/') (tok_error)
624
+ * - eof hit. (tok_eof) */
625
+ tok = yajl_lex_comment(lexer, (const unsigned char *) jsonText,
626
+ jsonTextLen, offset);
627
+ if (tok == yajl_tok_comment) {
628
+ /* "error" is silly, but that's the initial
629
+ * state of tok. guilty until proven innocent. */
630
+ tok = yajl_tok_error;
631
+ yajl_buf_clear(lexer->buf);
632
+ lexer->bufInUse = 0;
633
+ startOffset = *offset;
634
+ break;
635
+ }
636
+ /* hit error or eof, bail */
637
+ goto lexed;
638
+ default:
639
+ lexer->error = yajl_lex_invalid_char;
640
+ tok = yajl_tok_error;
641
+ goto lexed;
642
+ }
643
+ }
644
+
645
+
646
+ lexed:
647
+ /* need to append to buffer if the buffer is in use or
648
+ * if it's an EOF token */
649
+ if (tok == yajl_tok_eof || lexer->bufInUse) {
650
+ if (!lexer->bufInUse) yajl_buf_clear(lexer->buf);
651
+ lexer->bufInUse = 1;
652
+ yajl_buf_append(lexer->buf, jsonText + startOffset, *offset - startOffset);
653
+ lexer->bufOff = 0;
654
+
655
+ if (tok != yajl_tok_eof) {
656
+ *outBuf = yajl_buf_data(lexer->buf);
657
+ *outLen = yajl_buf_len(lexer->buf);
658
+ lexer->bufInUse = 0;
659
+ }
660
+ } else if (tok != yajl_tok_error) {
661
+ *outBuf = jsonText + startOffset;
662
+ *outLen = *offset - startOffset;
663
+ }
664
+
665
+ /* special case for strings. skip the quotes. */
666
+ if (tok == yajl_tok_string || tok == yajl_tok_string_with_escapes)
667
+ {
668
+ assert(*outLen >= 2);
669
+ (*outBuf)++;
670
+ *outLen -= 2;
671
+ }
672
+
673
+
674
+ #ifdef YAJL_LEXER_DEBUG
675
+ if (tok == yajl_tok_error) {
676
+ printf("lexical error: %s\n",
677
+ yajl_lex_error_to_string(yajl_lex_get_error(lexer)));
678
+ } else if (tok == yajl_tok_eof) {
679
+ printf("EOF hit\n");
680
+ } else {
681
+ printf("lexed %s: '", tokToStr(tok));
682
+ fwrite(*outBuf, 1, *outLen, stdout);
683
+ printf("'\n");
684
+ }
685
+ #endif
686
+
687
+ return tok;
688
+ }
689
+
690
+ const char *
691
+ yajl_lex_error_to_string(yajl_lex_error error)
692
+ {
693
+ switch (error) {
694
+ case yajl_lex_e_ok:
695
+ return "ok, no error";
696
+ case yajl_lex_string_invalid_utf8:
697
+ return "invalid bytes in UTF8 string.";
698
+ case yajl_lex_string_invalid_escaped_char:
699
+ return "inside a string, '\\' occurs before a character "
700
+ "which it may not.";
701
+ case yajl_lex_string_invalid_json_char:
702
+ return "invalid character inside string.";
703
+ case yajl_lex_string_invalid_hex_char:
704
+ return "invalid (non-hex) character occurs after '\\u' inside "
705
+ "string.";
706
+ case yajl_lex_invalid_char:
707
+ return "invalid char in json text.";
708
+ case yajl_lex_invalid_string:
709
+ return "invalid string in json text.";
710
+ case yajl_lex_missing_integer_after_exponent:
711
+ return "malformed number, a digit is required after the exponent.";
712
+ case yajl_lex_missing_integer_after_decimal:
713
+ return "malformed number, a digit is required after the "
714
+ "decimal point.";
715
+ case yajl_lex_missing_integer_after_minus:
716
+ return "malformed number, a digit is required after the "
717
+ "minus sign.";
718
+ case yajl_lex_unallowed_comment:
719
+ return "probable comment found in input text, comments are "
720
+ "not enabled.";
721
+ }
722
+ return "unknown error code";
723
+ }
724
+
725
+
726
+ /** allows access to more specific information about the lexical
727
+ * error when yajl_lex_lex returns yajl_tok_error. */
728
+ yajl_lex_error
729
+ yajl_lex_get_error(yajl_lexer lexer)
730
+ {
731
+ if (lexer == NULL) return (yajl_lex_error) -1;
732
+ return lexer->error;
733
+ }
734
+
735
+ size_t yajl_lex_current_line(yajl_lexer lexer)
736
+ {
737
+ return lexer->lineOff;
738
+ }
739
+
740
+ size_t yajl_lex_current_char(yajl_lexer lexer)
741
+ {
742
+ return lexer->charOff;
743
+ }
744
+
745
+ yajl_tok yajl_lex_peek(yajl_lexer lexer, const unsigned char * jsonText,
746
+ size_t jsonTextLen, size_t offset)
747
+ {
748
+ const unsigned char * outBuf;
749
+ size_t outLen;
750
+ size_t bufLen = yajl_buf_len(lexer->buf);
751
+ size_t bufOff = lexer->bufOff;
752
+ unsigned int bufInUse = lexer->bufInUse;
753
+ yajl_tok tok;
754
+
755
+ tok = yajl_lex_lex(lexer, jsonText, jsonTextLen, &offset,
756
+ &outBuf, &outLen);
757
+
758
+ lexer->bufOff = bufOff;
759
+ lexer->bufInUse = bufInUse;
760
+ yajl_buf_truncate(lexer->buf, bufLen);
761
+
762
+ return tok;
763
+ }