ffi-yajl 0.0.3-universal-java → 0.0.4-universal-java

Sign up to get free protection for your applications and to get access to all the features.
Files changed (182) hide show
  1. checksums.yaml +4 -4
  2. data/ext/ffi_yajl/ext/encoder/encoder.c +1 -1
  3. data/ext/libyajl2/vendored/.gitignore +3 -0
  4. data/ext/libyajl2/vendored/BUILDING +23 -0
  5. data/ext/libyajl2/vendored/BUILDING.win32 +27 -0
  6. data/ext/libyajl2/vendored/CMakeLists.txt +79 -0
  7. data/ext/libyajl2/vendored/COPYING +13 -0
  8. data/ext/libyajl2/vendored/ChangeLog +175 -0
  9. data/ext/libyajl2/vendored/README +74 -0
  10. data/ext/libyajl2/vendored/TODO +9 -0
  11. data/ext/libyajl2/vendored/YAJLDoc.cmake +26 -0
  12. data/ext/libyajl2/vendored/configure +79 -0
  13. data/ext/libyajl2/vendored/example/CMakeLists.txt +23 -0
  14. data/ext/libyajl2/vendored/example/README.md +7 -0
  15. data/ext/libyajl2/vendored/example/parse_config.c +69 -0
  16. data/ext/libyajl2/vendored/example/sample.config +101 -0
  17. data/ext/libyajl2/vendored/perf/CMakeLists.txt +23 -0
  18. data/ext/libyajl2/vendored/perf/documents.c +1418 -0
  19. data/ext/libyajl2/vendored/perf/documents.h +28 -0
  20. data/ext/libyajl2/vendored/perf/perftest.c +134 -0
  21. data/ext/libyajl2/vendored/reformatter/CMakeLists.txt +39 -0
  22. data/ext/libyajl2/vendored/reformatter/json_reformat.c +194 -0
  23. data/ext/libyajl2/vendored/src/CMakeLists.txt +86 -0
  24. data/ext/libyajl2/vendored/src/YAJL.dxy +1258 -0
  25. data/ext/libyajl2/vendored/src/api/yajl_common.h +75 -0
  26. data/ext/libyajl2/vendored/src/api/yajl_gen.h +157 -0
  27. data/ext/libyajl2/vendored/src/api/yajl_parse.h +226 -0
  28. data/ext/libyajl2/vendored/src/api/yajl_tree.h +185 -0
  29. data/ext/libyajl2/vendored/src/api/yajl_version.h.cmake +23 -0
  30. data/ext/libyajl2/vendored/src/yajl +33 -0
  31. data/ext/libyajl2/vendored/src/yajl.c +175 -0
  32. data/ext/libyajl2/vendored/src/yajl.pc.cmake +9 -0
  33. data/ext/libyajl2/vendored/src/yajl_alloc.c +52 -0
  34. data/ext/libyajl2/vendored/src/yajl_alloc.h +34 -0
  35. data/ext/libyajl2/vendored/src/yajl_buf.c +103 -0
  36. data/ext/libyajl2/vendored/src/yajl_buf.h +57 -0
  37. data/ext/libyajl2/vendored/src/yajl_bytestack.h +69 -0
  38. data/ext/libyajl2/vendored/src/yajl_encode.c +220 -0
  39. data/ext/libyajl2/vendored/src/yajl_encode.h +34 -0
  40. data/ext/libyajl2/vendored/src/yajl_gen.c +354 -0
  41. data/ext/libyajl2/vendored/src/yajl_lex.c +763 -0
  42. data/ext/libyajl2/vendored/src/yajl_lex.h +117 -0
  43. data/ext/libyajl2/vendored/src/yajl_parser.c +498 -0
  44. data/ext/libyajl2/vendored/src/yajl_parser.h +78 -0
  45. data/ext/libyajl2/vendored/src/yajl_tree.c +503 -0
  46. data/ext/libyajl2/vendored/src/yajl_version.c +7 -0
  47. data/ext/libyajl2/vendored/test/CMakeLists.txt +23 -0
  48. data/ext/libyajl2/vendored/test/cases/ac_difficult_json_c_test_case_with_comments.json +1 -0
  49. data/ext/libyajl2/vendored/test/cases/ac_difficult_json_c_test_case_with_comments.json.gold +36 -0
  50. data/ext/libyajl2/vendored/test/cases/ac_simple_with_comments.json +11 -0
  51. data/ext/libyajl2/vendored/test/cases/ac_simple_with_comments.json.gold +9 -0
  52. data/ext/libyajl2/vendored/test/cases/ag_false_then_garbage.json +1 -0
  53. data/ext/libyajl2/vendored/test/cases/ag_false_then_garbage.json.gold +2 -0
  54. data/ext/libyajl2/vendored/test/cases/ag_null_then_garbage.json +1 -0
  55. data/ext/libyajl2/vendored/test/cases/ag_null_then_garbage.json.gold +2 -0
  56. data/ext/libyajl2/vendored/test/cases/ag_true_then_garbage.json +1 -0
  57. data/ext/libyajl2/vendored/test/cases/ag_true_then_garbage.json.gold +2 -0
  58. data/ext/libyajl2/vendored/test/cases/am_eof.json +1 -0
  59. data/ext/libyajl2/vendored/test/cases/am_eof.json.gold +4 -0
  60. data/ext/libyajl2/vendored/test/cases/am_integers.json +1 -0
  61. data/ext/libyajl2/vendored/test/cases/am_integers.json.gold +3 -0
  62. data/ext/libyajl2/vendored/test/cases/am_multiple.json +3 -0
  63. data/ext/libyajl2/vendored/test/cases/am_multiple.json.gold +5 -0
  64. data/ext/libyajl2/vendored/test/cases/am_stuff.json +7 -0
  65. data/ext/libyajl2/vendored/test/cases/am_stuff.json.gold +14 -0
  66. data/ext/libyajl2/vendored/test/cases/ap_array_open.json +1 -0
  67. data/ext/libyajl2/vendored/test/cases/ap_array_open.json.gold +2 -0
  68. data/ext/libyajl2/vendored/test/cases/ap_eof_str.json +1 -0
  69. data/ext/libyajl2/vendored/test/cases/ap_eof_str.json.gold +1 -0
  70. data/ext/libyajl2/vendored/test/cases/ap_map_open.json +1 -0
  71. data/ext/libyajl2/vendored/test/cases/ap_map_open.json.gold +2 -0
  72. data/ext/libyajl2/vendored/test/cases/ap_partial_ok.json +1 -0
  73. data/ext/libyajl2/vendored/test/cases/ap_partial_ok.json.gold +4 -0
  74. data/ext/libyajl2/vendored/test/cases/array.json +6 -0
  75. data/ext/libyajl2/vendored/test/cases/array.json.gold +22 -0
  76. data/ext/libyajl2/vendored/test/cases/array_close.json +1 -0
  77. data/ext/libyajl2/vendored/test/cases/array_close.json.gold +2 -0
  78. data/ext/libyajl2/vendored/test/cases/bignums.json +1 -0
  79. data/ext/libyajl2/vendored/test/cases/bignums.json.gold +5 -0
  80. data/ext/libyajl2/vendored/test/cases/bogus_char.json +4 -0
  81. data/ext/libyajl2/vendored/test/cases/bogus_char.json.gold +10 -0
  82. data/ext/libyajl2/vendored/test/cases/codepoints_from_unicode_org.json +1 -0
  83. data/ext/libyajl2/vendored/test/cases/codepoints_from_unicode_org.json.gold +2 -0
  84. data/ext/libyajl2/vendored/test/cases/deep_arrays.json +1 -0
  85. data/ext/libyajl2/vendored/test/cases/deep_arrays.json.gold +2049 -0
  86. data/ext/libyajl2/vendored/test/cases/difficult_json_c_test_case.json +1 -0
  87. data/ext/libyajl2/vendored/test/cases/difficult_json_c_test_case.json.gold +36 -0
  88. data/ext/libyajl2/vendored/test/cases/doubles.json +1 -0
  89. data/ext/libyajl2/vendored/test/cases/doubles.json.gold +7 -0
  90. data/ext/libyajl2/vendored/test/cases/doubles_in_array.json +1 -0
  91. data/ext/libyajl2/vendored/test/cases/doubles_in_array.json.gold +8 -0
  92. data/ext/libyajl2/vendored/test/cases/empty_array.json +1 -0
  93. data/ext/libyajl2/vendored/test/cases/empty_array.json.gold +3 -0
  94. data/ext/libyajl2/vendored/test/cases/empty_string.json +1 -0
  95. data/ext/libyajl2/vendored/test/cases/empty_string.json.gold +2 -0
  96. data/ext/libyajl2/vendored/test/cases/escaped_bulgarian.json +4 -0
  97. data/ext/libyajl2/vendored/test/cases/escaped_bulgarian.json.gold +7 -0
  98. data/ext/libyajl2/vendored/test/cases/escaped_foobar.json +1 -0
  99. data/ext/libyajl2/vendored/test/cases/escaped_foobar.json.gold +2 -0
  100. data/ext/libyajl2/vendored/test/cases/false.json +1 -0
  101. data/ext/libyajl2/vendored/test/cases/false.json.gold +2 -0
  102. data/ext/libyajl2/vendored/test/cases/fg_false_then_garbage.json +1 -0
  103. data/ext/libyajl2/vendored/test/cases/fg_false_then_garbage.json.gold +3 -0
  104. data/ext/libyajl2/vendored/test/cases/fg_issue_7.json +1 -0
  105. data/ext/libyajl2/vendored/test/cases/fg_issue_7.json.gold +3 -0
  106. data/ext/libyajl2/vendored/test/cases/fg_null_then_garbage.json +1 -0
  107. data/ext/libyajl2/vendored/test/cases/fg_null_then_garbage.json.gold +3 -0
  108. data/ext/libyajl2/vendored/test/cases/fg_true_then_garbage.json +1 -0
  109. data/ext/libyajl2/vendored/test/cases/fg_true_then_garbage.json.gold +3 -0
  110. data/ext/libyajl2/vendored/test/cases/four_byte_utf8.json +2 -0
  111. data/ext/libyajl2/vendored/test/cases/four_byte_utf8.json.gold +5 -0
  112. data/ext/libyajl2/vendored/test/cases/high_overflow.json +1 -0
  113. data/ext/libyajl2/vendored/test/cases/high_overflow.json.gold +2 -0
  114. data/ext/libyajl2/vendored/test/cases/integers.json +3 -0
  115. data/ext/libyajl2/vendored/test/cases/integers.json.gold +14 -0
  116. data/ext/libyajl2/vendored/test/cases/invalid_utf8.json +1 -0
  117. data/ext/libyajl2/vendored/test/cases/invalid_utf8.json.gold +3 -0
  118. data/ext/libyajl2/vendored/test/cases/isolated_surrogate_marker.json +1 -0
  119. data/ext/libyajl2/vendored/test/cases/isolated_surrogate_marker.json.gold +2 -0
  120. data/ext/libyajl2/vendored/test/cases/leading_zero_in_number.json +1 -0
  121. data/ext/libyajl2/vendored/test/cases/leading_zero_in_number.json.gold +5 -0
  122. data/ext/libyajl2/vendored/test/cases/lonely_minus_sign.json +7 -0
  123. data/ext/libyajl2/vendored/test/cases/lonely_minus_sign.json.gold +9 -0
  124. data/ext/libyajl2/vendored/test/cases/lonely_number.json +1 -0
  125. data/ext/libyajl2/vendored/test/cases/lonely_number.json.gold +2 -0
  126. data/ext/libyajl2/vendored/test/cases/low_overflow.json +1 -0
  127. data/ext/libyajl2/vendored/test/cases/low_overflow.json.gold +2 -0
  128. data/ext/libyajl2/vendored/test/cases/map_close.json +1 -0
  129. data/ext/libyajl2/vendored/test/cases/map_close.json.gold +2 -0
  130. data/ext/libyajl2/vendored/test/cases/missing_integer_after_decimal_point.json +1 -0
  131. data/ext/libyajl2/vendored/test/cases/missing_integer_after_decimal_point.json.gold +2 -0
  132. data/ext/libyajl2/vendored/test/cases/missing_integer_after_exponent.json +1 -0
  133. data/ext/libyajl2/vendored/test/cases/missing_integer_after_exponent.json.gold +2 -0
  134. data/ext/libyajl2/vendored/test/cases/multiple.json +3 -0
  135. data/ext/libyajl2/vendored/test/cases/multiple.json.gold +4 -0
  136. data/ext/libyajl2/vendored/test/cases/non_utf8_char_in_string.json +1 -0
  137. data/ext/libyajl2/vendored/test/cases/non_utf8_char_in_string.json.gold +8 -0
  138. data/ext/libyajl2/vendored/test/cases/np_partial_bad.json +1 -0
  139. data/ext/libyajl2/vendored/test/cases/np_partial_bad.json.gold +5 -0
  140. data/ext/libyajl2/vendored/test/cases/null.json +1 -0
  141. data/ext/libyajl2/vendored/test/cases/null.json.gold +2 -0
  142. data/ext/libyajl2/vendored/test/cases/nulls_and_bools.json +5 -0
  143. data/ext/libyajl2/vendored/test/cases/nulls_and_bools.json.gold +9 -0
  144. data/ext/libyajl2/vendored/test/cases/simple.json +5 -0
  145. data/ext/libyajl2/vendored/test/cases/simple.json.gold +9 -0
  146. data/ext/libyajl2/vendored/test/cases/simple_with_comments.json +11 -0
  147. data/ext/libyajl2/vendored/test/cases/simple_with_comments.json.gold +5 -0
  148. data/ext/libyajl2/vendored/test/cases/string_invalid_escape.json +1 -0
  149. data/ext/libyajl2/vendored/test/cases/string_invalid_escape.json.gold +3 -0
  150. data/ext/libyajl2/vendored/test/cases/string_invalid_hex_char.json +1 -0
  151. data/ext/libyajl2/vendored/test/cases/string_invalid_hex_char.json.gold +2 -0
  152. data/ext/libyajl2/vendored/test/cases/string_with_escapes.json +3 -0
  153. data/ext/libyajl2/vendored/test/cases/string_with_escapes.json.gold +7 -0
  154. data/ext/libyajl2/vendored/test/cases/string_with_invalid_newline.json +2 -0
  155. data/ext/libyajl2/vendored/test/cases/string_with_invalid_newline.json.gold +2 -0
  156. data/ext/libyajl2/vendored/test/cases/three_byte_utf8.json +1 -0
  157. data/ext/libyajl2/vendored/test/cases/three_byte_utf8.json.gold +7 -0
  158. data/ext/libyajl2/vendored/test/cases/true.json +1 -0
  159. data/ext/libyajl2/vendored/test/cases/true.json.gold +2 -0
  160. data/ext/libyajl2/vendored/test/cases/unescaped_bulgarian.json +1 -0
  161. data/ext/libyajl2/vendored/test/cases/unescaped_bulgarian.json.gold +4 -0
  162. data/ext/libyajl2/vendored/test/cases/zerobyte.json +1 -0
  163. data/ext/libyajl2/vendored/test/cases/zerobyte.json.gold +0 -0
  164. data/ext/libyajl2/vendored/test/run_tests.sh +94 -0
  165. data/ext/libyajl2/vendored/test/yajl_test.c +281 -0
  166. data/ext/libyajl2/vendored/verify/CMakeLists.txt +39 -0
  167. data/ext/libyajl2/vendored/verify/json_verify.c +116 -0
  168. data/lib/ffi_yajl/version.rb +1 -1
  169. data/lib/libyajl.so +0 -0
  170. data/lib/libyajl.so.2 +0 -0
  171. data/lib/libyajl.so.2.0.5 +0 -0
  172. data/lib/libyajl_s.a +0 -0
  173. data/spec/ffi_yajl/encoder_spec.rb +6 -0
  174. metadata +171 -10
  175. data/ext/ffi_yajl/ext/encoder/encoder.bundle +0 -0
  176. data/ext/ffi_yajl/ext/encoder/encoder.o +0 -0
  177. data/ext/ffi_yajl/ext/parser/parser.bundle +0 -0
  178. data/ext/ffi_yajl/ext/parser/parser.o +0 -0
  179. data/ext/libyajl2/mkmf.log +0 -63
  180. data/lib/libyajl.2.0.1.dylib +0 -0
  181. data/lib/libyajl.2.dylib +0 -0
  182. data/lib/libyajl.dylib +0 -0
@@ -0,0 +1,34 @@
1
+ /*
2
+ * Copyright (c) 2007-2011, Lloyd Hilaiel <lloyd@hilaiel.com>
3
+ *
4
+ * Permission to use, copy, modify, and/or distribute this software for any
5
+ * purpose with or without fee is hereby granted, provided that the above
6
+ * copyright notice and this permission notice appear in all copies.
7
+ *
8
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15
+ */
16
+
17
+ #ifndef __YAJL_ENCODE_H__
18
+ #define __YAJL_ENCODE_H__
19
+
20
+ #include "yajl_buf.h"
21
+ #include "api/yajl_gen.h"
22
+
23
+ void yajl_string_encode(const yajl_print_t printer,
24
+ void * ctx,
25
+ const unsigned char * str,
26
+ size_t length,
27
+ int escape_solidus);
28
+
29
+ void yajl_string_decode(yajl_buf buf, const unsigned char * str,
30
+ size_t length);
31
+
32
+ int yajl_string_validate_utf8(const unsigned char * s, size_t len);
33
+
34
+ #endif
@@ -0,0 +1,354 @@
1
+ /*
2
+ * Copyright (c) 2007-2011, Lloyd Hilaiel <lloyd@hilaiel.com>
3
+ *
4
+ * Permission to use, copy, modify, and/or distribute this software for any
5
+ * purpose with or without fee is hereby granted, provided that the above
6
+ * copyright notice and this permission notice appear in all copies.
7
+ *
8
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15
+ */
16
+
17
+ #include "api/yajl_gen.h"
18
+ #include "yajl_buf.h"
19
+ #include "yajl_encode.h"
20
+
21
+ #include <stdlib.h>
22
+ #include <string.h>
23
+ #include <stdio.h>
24
+ #include <math.h>
25
+ #include <stdarg.h>
26
+
27
+ typedef enum {
28
+ yajl_gen_start,
29
+ yajl_gen_map_start,
30
+ yajl_gen_map_key,
31
+ yajl_gen_map_val,
32
+ yajl_gen_array_start,
33
+ yajl_gen_in_array,
34
+ yajl_gen_complete,
35
+ yajl_gen_error
36
+ } yajl_gen_state;
37
+
38
+ struct yajl_gen_t
39
+ {
40
+ unsigned int flags;
41
+ unsigned int depth;
42
+ const char * indentString;
43
+ yajl_gen_state state[YAJL_MAX_DEPTH];
44
+ yajl_print_t print;
45
+ void * ctx; /* yajl_buf */
46
+ /* memory allocation routines */
47
+ yajl_alloc_funcs alloc;
48
+ };
49
+
50
+ int
51
+ yajl_gen_config(yajl_gen g, yajl_gen_option opt, ...)
52
+ {
53
+ int rv = 1;
54
+ va_list ap;
55
+ va_start(ap, opt);
56
+
57
+ switch(opt) {
58
+ case yajl_gen_beautify:
59
+ case yajl_gen_validate_utf8:
60
+ case yajl_gen_escape_solidus:
61
+ if (va_arg(ap, int)) g->flags |= opt;
62
+ else g->flags &= ~opt;
63
+ break;
64
+ case yajl_gen_indent_string: {
65
+ const char *indent = va_arg(ap, const char *);
66
+ g->indentString = indent;
67
+ for (; *indent; indent++) {
68
+ if (*indent != '\n'
69
+ && *indent != '\v'
70
+ && *indent != '\f'
71
+ && *indent != '\t'
72
+ && *indent != '\r'
73
+ && *indent != ' ')
74
+ {
75
+ g->indentString = NULL;
76
+ rv = 0;
77
+ }
78
+ }
79
+ break;
80
+ }
81
+ case yajl_gen_print_callback:
82
+ yajl_buf_free(g->ctx);
83
+ g->print = va_arg(ap, const yajl_print_t);
84
+ g->ctx = va_arg(ap, void *);
85
+ break;
86
+ default:
87
+ rv = 0;
88
+ }
89
+
90
+ va_end(ap);
91
+
92
+ return rv;
93
+ }
94
+
95
+
96
+
97
+ yajl_gen
98
+ yajl_gen_alloc(const yajl_alloc_funcs * afs)
99
+ {
100
+ yajl_gen g = NULL;
101
+ yajl_alloc_funcs afsBuffer;
102
+
103
+ /* first order of business is to set up memory allocation routines */
104
+ if (afs != NULL) {
105
+ if (afs->malloc == NULL || afs->realloc == NULL || afs->free == NULL)
106
+ {
107
+ return NULL;
108
+ }
109
+ } else {
110
+ yajl_set_default_alloc_funcs(&afsBuffer);
111
+ afs = &afsBuffer;
112
+ }
113
+
114
+ g = (yajl_gen) YA_MALLOC(afs, sizeof(struct yajl_gen_t));
115
+ if (!g) return NULL;
116
+
117
+ memset((void *) g, 0, sizeof(struct yajl_gen_t));
118
+ /* copy in pointers to allocation routines */
119
+ memcpy((void *) &(g->alloc), (void *) afs, sizeof(yajl_alloc_funcs));
120
+
121
+ g->print = (yajl_print_t)&yajl_buf_append;
122
+ g->ctx = yajl_buf_alloc(&(g->alloc));
123
+ g->indentString = " ";
124
+
125
+ return g;
126
+ }
127
+
128
+ void
129
+ yajl_gen_free(yajl_gen g)
130
+ {
131
+ if (g->print == (yajl_print_t)&yajl_buf_append) yajl_buf_free((yajl_buf)g->ctx);
132
+ YA_FREE(&(g->alloc), g);
133
+ }
134
+
135
+ #define INSERT_SEP \
136
+ if (g->state[g->depth] == yajl_gen_map_key || \
137
+ g->state[g->depth] == yajl_gen_in_array) { \
138
+ g->print(g->ctx, ",", 1); \
139
+ if ((g->flags & yajl_gen_beautify)) g->print(g->ctx, "\n", 1); \
140
+ } else if (g->state[g->depth] == yajl_gen_map_val) { \
141
+ g->print(g->ctx, ":", 1); \
142
+ if ((g->flags & yajl_gen_beautify)) g->print(g->ctx, " ", 1); \
143
+ }
144
+
145
+ #define INSERT_WHITESPACE \
146
+ if ((g->flags & yajl_gen_beautify)) { \
147
+ if (g->state[g->depth] != yajl_gen_map_val) { \
148
+ unsigned int _i; \
149
+ for (_i=0;_i<g->depth;_i++) \
150
+ g->print(g->ctx, \
151
+ g->indentString, \
152
+ (unsigned int)strlen(g->indentString)); \
153
+ } \
154
+ }
155
+
156
+ #define ENSURE_NOT_KEY \
157
+ if (g->state[g->depth] == yajl_gen_map_key || \
158
+ g->state[g->depth] == yajl_gen_map_start) { \
159
+ return yajl_gen_keys_must_be_strings; \
160
+ } \
161
+
162
+ /* check that we're not complete, or in error state. in a valid state
163
+ * to be generating */
164
+ #define ENSURE_VALID_STATE \
165
+ if (g->state[g->depth] == yajl_gen_error) { \
166
+ return yajl_gen_in_error_state;\
167
+ } else if (g->state[g->depth] == yajl_gen_complete) { \
168
+ return yajl_gen_generation_complete; \
169
+ }
170
+
171
+ #define INCREMENT_DEPTH \
172
+ if (++(g->depth) >= YAJL_MAX_DEPTH) return yajl_max_depth_exceeded;
173
+
174
+ #define DECREMENT_DEPTH \
175
+ if (--(g->depth) >= YAJL_MAX_DEPTH) return yajl_gen_error;
176
+
177
+ #define APPENDED_ATOM \
178
+ switch (g->state[g->depth]) { \
179
+ case yajl_gen_start: \
180
+ g->state[g->depth] = yajl_gen_complete; \
181
+ break; \
182
+ case yajl_gen_map_start: \
183
+ case yajl_gen_map_key: \
184
+ g->state[g->depth] = yajl_gen_map_val; \
185
+ break; \
186
+ case yajl_gen_array_start: \
187
+ g->state[g->depth] = yajl_gen_in_array; \
188
+ break; \
189
+ case yajl_gen_map_val: \
190
+ g->state[g->depth] = yajl_gen_map_key; \
191
+ break; \
192
+ default: \
193
+ break; \
194
+ } \
195
+
196
+ #define FINAL_NEWLINE \
197
+ if ((g->flags & yajl_gen_beautify) && g->state[g->depth] == yajl_gen_complete) \
198
+ g->print(g->ctx, "\n", 1);
199
+
200
+ yajl_gen_status
201
+ yajl_gen_integer(yajl_gen g, long long int number)
202
+ {
203
+ char i[32];
204
+ ENSURE_VALID_STATE; ENSURE_NOT_KEY; INSERT_SEP; INSERT_WHITESPACE;
205
+ sprintf(i, "%lld", number);
206
+ g->print(g->ctx, i, (unsigned int)strlen(i));
207
+ APPENDED_ATOM;
208
+ FINAL_NEWLINE;
209
+ return yajl_gen_status_ok;
210
+ }
211
+
212
+ #if defined(_WIN32) || defined(WIN32)
213
+ #include <float.h>
214
+ #define isnan _isnan
215
+ #define isinf !_finite
216
+ #endif
217
+
218
+ yajl_gen_status
219
+ yajl_gen_double(yajl_gen g, double number)
220
+ {
221
+ char i[32];
222
+ ENSURE_VALID_STATE; ENSURE_NOT_KEY;
223
+ if (isnan(number) || isinf(number)) return yajl_gen_invalid_number;
224
+ INSERT_SEP; INSERT_WHITESPACE;
225
+ sprintf(i, "%.20g", number);
226
+ if (strspn(i, "0123456789-") == strlen(i)) {
227
+ strcat(i, ".0");
228
+ }
229
+ g->print(g->ctx, i, (unsigned int)strlen(i));
230
+ APPENDED_ATOM;
231
+ FINAL_NEWLINE;
232
+ return yajl_gen_status_ok;
233
+ }
234
+
235
+ yajl_gen_status
236
+ yajl_gen_number(yajl_gen g, const char * s, size_t l)
237
+ {
238
+ ENSURE_VALID_STATE; ENSURE_NOT_KEY; INSERT_SEP; INSERT_WHITESPACE;
239
+ g->print(g->ctx, s, l);
240
+ APPENDED_ATOM;
241
+ FINAL_NEWLINE;
242
+ return yajl_gen_status_ok;
243
+ }
244
+
245
+ yajl_gen_status
246
+ yajl_gen_string(yajl_gen g, const unsigned char * str,
247
+ size_t len)
248
+ {
249
+ // if validation is enabled, check that the string is valid utf8
250
+ // XXX: This checking could be done a little faster, in the same pass as
251
+ // the string encoding
252
+ if (g->flags & yajl_gen_validate_utf8) {
253
+ if (!yajl_string_validate_utf8(str, len)) {
254
+ return yajl_gen_invalid_string;
255
+ }
256
+ }
257
+ ENSURE_VALID_STATE; INSERT_SEP; INSERT_WHITESPACE;
258
+ g->print(g->ctx, "\"", 1);
259
+ yajl_string_encode(g->print, g->ctx, str, len, g->flags & yajl_gen_escape_solidus);
260
+ g->print(g->ctx, "\"", 1);
261
+ APPENDED_ATOM;
262
+ FINAL_NEWLINE;
263
+ return yajl_gen_status_ok;
264
+ }
265
+
266
+ yajl_gen_status
267
+ yajl_gen_null(yajl_gen g)
268
+ {
269
+ ENSURE_VALID_STATE; ENSURE_NOT_KEY; INSERT_SEP; INSERT_WHITESPACE;
270
+ g->print(g->ctx, "null", strlen("null"));
271
+ APPENDED_ATOM;
272
+ FINAL_NEWLINE;
273
+ return yajl_gen_status_ok;
274
+ }
275
+
276
+ yajl_gen_status
277
+ yajl_gen_bool(yajl_gen g, int boolean)
278
+ {
279
+ const char * val = boolean ? "true" : "false";
280
+
281
+ ENSURE_VALID_STATE; ENSURE_NOT_KEY; INSERT_SEP; INSERT_WHITESPACE;
282
+ g->print(g->ctx, val, (unsigned int)strlen(val));
283
+ APPENDED_ATOM;
284
+ FINAL_NEWLINE;
285
+ return yajl_gen_status_ok;
286
+ }
287
+
288
+ yajl_gen_status
289
+ yajl_gen_map_open(yajl_gen g)
290
+ {
291
+ ENSURE_VALID_STATE; ENSURE_NOT_KEY; INSERT_SEP; INSERT_WHITESPACE;
292
+ INCREMENT_DEPTH;
293
+
294
+ g->state[g->depth] = yajl_gen_map_start;
295
+ g->print(g->ctx, "{", 1);
296
+ if ((g->flags & yajl_gen_beautify)) g->print(g->ctx, "\n", 1);
297
+ FINAL_NEWLINE;
298
+ return yajl_gen_status_ok;
299
+ }
300
+
301
+ yajl_gen_status
302
+ yajl_gen_map_close(yajl_gen g)
303
+ {
304
+ ENSURE_VALID_STATE;
305
+ DECREMENT_DEPTH;
306
+
307
+ if ((g->flags & yajl_gen_beautify)) g->print(g->ctx, "\n", 1);
308
+ APPENDED_ATOM;
309
+ INSERT_WHITESPACE;
310
+ g->print(g->ctx, "}", 1);
311
+ FINAL_NEWLINE;
312
+ return yajl_gen_status_ok;
313
+ }
314
+
315
+ yajl_gen_status
316
+ yajl_gen_array_open(yajl_gen g)
317
+ {
318
+ ENSURE_VALID_STATE; ENSURE_NOT_KEY; INSERT_SEP; INSERT_WHITESPACE;
319
+ INCREMENT_DEPTH;
320
+ g->state[g->depth] = yajl_gen_array_start;
321
+ g->print(g->ctx, "[", 1);
322
+ if ((g->flags & yajl_gen_beautify)) g->print(g->ctx, "\n", 1);
323
+ FINAL_NEWLINE;
324
+ return yajl_gen_status_ok;
325
+ }
326
+
327
+ yajl_gen_status
328
+ yajl_gen_array_close(yajl_gen g)
329
+ {
330
+ ENSURE_VALID_STATE;
331
+ DECREMENT_DEPTH;
332
+ if ((g->flags & yajl_gen_beautify)) g->print(g->ctx, "\n", 1);
333
+ APPENDED_ATOM;
334
+ INSERT_WHITESPACE;
335
+ g->print(g->ctx, "]", 1);
336
+ FINAL_NEWLINE;
337
+ return yajl_gen_status_ok;
338
+ }
339
+
340
+ yajl_gen_status
341
+ yajl_gen_get_buf(yajl_gen g, const unsigned char ** buf,
342
+ size_t * len)
343
+ {
344
+ if (g->print != (yajl_print_t)&yajl_buf_append) return yajl_gen_no_buf;
345
+ *buf = yajl_buf_data((yajl_buf)g->ctx);
346
+ *len = yajl_buf_len((yajl_buf)g->ctx);
347
+ return yajl_gen_status_ok;
348
+ }
349
+
350
+ void
351
+ yajl_gen_clear(yajl_gen g)
352
+ {
353
+ if (g->print == (yajl_print_t)&yajl_buf_append) yajl_buf_clear((yajl_buf)g->ctx);
354
+ }
@@ -0,0 +1,763 @@
1
+ /*
2
+ * Copyright (c) 2007-2011, Lloyd Hilaiel <lloyd@hilaiel.com>
3
+ *
4
+ * Permission to use, copy, modify, and/or distribute this software for any
5
+ * purpose with or without fee is hereby granted, provided that the above
6
+ * copyright notice and this permission notice appear in all copies.
7
+ *
8
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15
+ */
16
+
17
+ #include "yajl_lex.h"
18
+ #include "yajl_buf.h"
19
+
20
+ #include <stdlib.h>
21
+ #include <stdio.h>
22
+ #include <assert.h>
23
+ #include <string.h>
24
+
25
+ #ifdef YAJL_LEXER_DEBUG
26
+ static const char *
27
+ tokToStr(yajl_tok tok)
28
+ {
29
+ switch (tok) {
30
+ case yajl_tok_bool: return "bool";
31
+ case yajl_tok_colon: return "colon";
32
+ case yajl_tok_comma: return "comma";
33
+ case yajl_tok_eof: return "eof";
34
+ case yajl_tok_error: return "error";
35
+ case yajl_tok_left_brace: return "brace";
36
+ case yajl_tok_left_bracket: return "bracket";
37
+ case yajl_tok_null: return "null";
38
+ case yajl_tok_integer: return "integer";
39
+ case yajl_tok_double: return "double";
40
+ case yajl_tok_right_brace: return "brace";
41
+ case yajl_tok_right_bracket: return "bracket";
42
+ case yajl_tok_string: return "string";
43
+ case yajl_tok_string_with_escapes: return "string_with_escapes";
44
+ }
45
+ return "unknown";
46
+ }
47
+ #endif
48
+
49
+ /* Impact of the stream parsing feature on the lexer:
50
+ *
51
+ * YAJL support stream parsing. That is, the ability to parse the first
52
+ * bits of a chunk of JSON before the last bits are available (still on
53
+ * the network or disk). This makes the lexer more complex. The
54
+ * responsibility of the lexer is to handle transparently the case where
55
+ * a chunk boundary falls in the middle of a token. This is
56
+ * accomplished is via a buffer and a character reading abstraction.
57
+ *
58
+ * Overview of implementation
59
+ *
60
+ * When we lex to end of input string before end of token is hit, we
61
+ * copy all of the input text composing the token into our lexBuf.
62
+ *
63
+ * Every time we read a character, we do so through the readChar function.
64
+ * readChar's responsibility is to handle pulling all chars from the buffer
65
+ * before pulling chars from input text
66
+ */
67
+
68
+ struct yajl_lexer_t {
69
+ /* the overal line and char offset into the data */
70
+ size_t lineOff;
71
+ size_t charOff;
72
+
73
+ /* error */
74
+ yajl_lex_error error;
75
+
76
+ /* a input buffer to handle the case where a token is spread over
77
+ * multiple chunks */
78
+ yajl_buf buf;
79
+
80
+ /* in the case where we have data in the lexBuf, bufOff holds
81
+ * the current offset into the lexBuf. */
82
+ size_t bufOff;
83
+
84
+ /* are we using the lex buf? */
85
+ unsigned int bufInUse;
86
+
87
+ /* shall we allow comments? */
88
+ unsigned int allowComments;
89
+
90
+ /* shall we validate utf8 inside strings? */
91
+ unsigned int validateUTF8;
92
+
93
+ yajl_alloc_funcs * alloc;
94
+ };
95
+
96
+ #define readChar(lxr, txt, off) \
97
+ (((lxr)->bufInUse && yajl_buf_len((lxr)->buf) && lxr->bufOff < yajl_buf_len((lxr)->buf)) ? \
98
+ (*((const unsigned char *) yajl_buf_data((lxr)->buf) + ((lxr)->bufOff)++)) : \
99
+ ((txt)[(*(off))++]))
100
+
101
+ #define unreadChar(lxr, off) ((*(off) > 0) ? (*(off))-- : ((lxr)->bufOff--))
102
+
103
+ yajl_lexer
104
+ yajl_lex_alloc(yajl_alloc_funcs * alloc,
105
+ unsigned int allowComments, unsigned int validateUTF8)
106
+ {
107
+ yajl_lexer lxr = (yajl_lexer) YA_MALLOC(alloc, sizeof(struct yajl_lexer_t));
108
+ memset((void *) lxr, 0, sizeof(struct yajl_lexer_t));
109
+ lxr->buf = yajl_buf_alloc(alloc);
110
+ lxr->allowComments = allowComments;
111
+ lxr->validateUTF8 = validateUTF8;
112
+ lxr->alloc = alloc;
113
+ return lxr;
114
+ }
115
+
116
+ void
117
+ yajl_lex_free(yajl_lexer lxr)
118
+ {
119
+ yajl_buf_free(lxr->buf);
120
+ YA_FREE(lxr->alloc, lxr);
121
+ return;
122
+ }
123
+
124
+ /* a lookup table which lets us quickly determine three things:
125
+ * VEC - valid escaped control char
126
+ * note. the solidus '/' may be escaped or not.
127
+ * IJC - invalid json char
128
+ * VHC - valid hex char
129
+ * NFP - needs further processing (from a string scanning perspective)
130
+ * NUC - needs utf8 checking when enabled (from a string scanning perspective)
131
+ */
132
+ #define VEC 0x01
133
+ #define IJC 0x02
134
+ #define VHC 0x04
135
+ #define NFP 0x08
136
+ #define NUC 0x10
137
+
138
+ static const char charLookupTable[256] =
139
+ {
140
+ /*00*/ IJC , IJC , IJC , IJC , IJC , IJC , IJC , IJC ,
141
+ /*08*/ IJC , IJC , IJC , IJC , IJC , IJC , IJC , IJC ,
142
+ /*10*/ IJC , IJC , IJC , IJC , IJC , IJC , IJC , IJC ,
143
+ /*18*/ IJC , IJC , IJC , IJC , IJC , IJC , IJC , IJC ,
144
+
145
+ /*20*/ 0 , 0 , NFP|VEC|IJC, 0 , 0 , 0 , 0 , 0 ,
146
+ /*28*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , VEC ,
147
+ /*30*/ VHC , VHC , VHC , VHC , VHC , VHC , VHC , VHC ,
148
+ /*38*/ VHC , VHC , 0 , 0 , 0 , 0 , 0 , 0 ,
149
+
150
+ /*40*/ 0 , VHC , VHC , VHC , VHC , VHC , VHC , 0 ,
151
+ /*48*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
152
+ /*50*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
153
+ /*58*/ 0 , 0 , 0 , 0 , NFP|VEC|IJC, 0 , 0 , 0 ,
154
+
155
+ /*60*/ 0 , VHC , VEC|VHC, VHC , VHC , VHC , VEC|VHC, 0 ,
156
+ /*68*/ 0 , 0 , 0 , 0 , 0 , 0 , VEC , 0 ,
157
+ /*70*/ 0 , 0 , VEC , 0 , VEC , 0 , 0 , 0 ,
158
+ /*78*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
159
+
160
+ NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC ,
161
+ NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC ,
162
+ NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC ,
163
+ NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC ,
164
+
165
+ NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC ,
166
+ NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC ,
167
+ NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC ,
168
+ NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC ,
169
+
170
+ NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC ,
171
+ NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC ,
172
+ NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC ,
173
+ NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC ,
174
+
175
+ NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC ,
176
+ NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC ,
177
+ NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC ,
178
+ NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC
179
+ };
180
+
181
+ /** process a variable length utf8 encoded codepoint.
182
+ *
183
+ * returns:
184
+ * yajl_tok_string - if valid utf8 char was parsed and offset was
185
+ * advanced
186
+ * yajl_tok_eof - if end of input was hit before validation could
187
+ * complete
188
+ * yajl_tok_error - if invalid utf8 was encountered
189
+ *
190
+ * NOTE: on error the offset will point to the first char of the
191
+ * invalid utf8 */
192
+ #define UTF8_CHECK_EOF if (*offset >= jsonTextLen) { return yajl_tok_eof; }
193
+
194
+ static yajl_tok
195
+ yajl_lex_utf8_char(yajl_lexer lexer, const unsigned char * jsonText,
196
+ size_t jsonTextLen, size_t * offset,
197
+ unsigned char curChar)
198
+ {
199
+ if (curChar <= 0x7f) {
200
+ /* single byte */
201
+ return yajl_tok_string;
202
+ } else if ((curChar >> 5) == 0x6) {
203
+ /* two byte */
204
+ UTF8_CHECK_EOF;
205
+ curChar = readChar(lexer, jsonText, offset);
206
+ if ((curChar >> 6) == 0x2) return yajl_tok_string;
207
+ } else if ((curChar >> 4) == 0x0e) {
208
+ /* three byte */
209
+ UTF8_CHECK_EOF;
210
+ curChar = readChar(lexer, jsonText, offset);
211
+ if ((curChar >> 6) == 0x2) {
212
+ UTF8_CHECK_EOF;
213
+ curChar = readChar(lexer, jsonText, offset);
214
+ if ((curChar >> 6) == 0x2) return yajl_tok_string;
215
+ }
216
+ } else if ((curChar >> 3) == 0x1e) {
217
+ /* four byte */
218
+ UTF8_CHECK_EOF;
219
+ curChar = readChar(lexer, jsonText, offset);
220
+ if ((curChar >> 6) == 0x2) {
221
+ UTF8_CHECK_EOF;
222
+ curChar = readChar(lexer, jsonText, offset);
223
+ if ((curChar >> 6) == 0x2) {
224
+ UTF8_CHECK_EOF;
225
+ curChar = readChar(lexer, jsonText, offset);
226
+ if ((curChar >> 6) == 0x2) return yajl_tok_string;
227
+ }
228
+ }
229
+ }
230
+
231
+ return yajl_tok_error;
232
+ }
233
+
234
+ /* lex a string. input is the lexer, pointer to beginning of
235
+ * json text, and start of string (offset).
236
+ * a token is returned which has the following meanings:
237
+ * yajl_tok_string: lex of string was successful. offset points to
238
+ * terminating '"'.
239
+ * yajl_tok_eof: end of text was encountered before we could complete
240
+ * the lex.
241
+ * yajl_tok_error: embedded in the string were unallowable chars. offset
242
+ * points to the offending char
243
+ */
244
+ #define STR_CHECK_EOF \
245
+ if (*offset >= jsonTextLen) { \
246
+ tok = yajl_tok_eof; \
247
+ goto finish_string_lex; \
248
+ }
249
+
250
+ /** scan a string for interesting characters that might need further
251
+ * review. return the number of chars that are uninteresting and can
252
+ * be skipped.
253
+ * (lth) hi world, any thoughts on how to make this routine faster? */
254
+ static size_t
255
+ yajl_string_scan(const unsigned char * buf, size_t len, int utf8check)
256
+ {
257
+ unsigned char mask = IJC|NFP|(utf8check ? NUC : 0);
258
+ size_t skip = 0;
259
+ while (skip < len && !(charLookupTable[*buf] & mask))
260
+ {
261
+ skip++;
262
+ buf++;
263
+ }
264
+ return skip;
265
+ }
266
+
267
+ static yajl_tok
268
+ yajl_lex_string(yajl_lexer lexer, const unsigned char * jsonText,
269
+ size_t jsonTextLen, size_t * offset)
270
+ {
271
+ yajl_tok tok = yajl_tok_error;
272
+ int hasEscapes = 0;
273
+
274
+ for (;;) {
275
+ unsigned char curChar;
276
+
277
+ /* now jump into a faster scanning routine to skip as much
278
+ * of the buffers as possible */
279
+ {
280
+ const unsigned char * p;
281
+ size_t len;
282
+
283
+ if ((lexer->bufInUse && yajl_buf_len(lexer->buf) &&
284
+ lexer->bufOff < yajl_buf_len(lexer->buf)))
285
+ {
286
+ p = ((const unsigned char *) yajl_buf_data(lexer->buf) +
287
+ (lexer->bufOff));
288
+ len = yajl_buf_len(lexer->buf) - lexer->bufOff;
289
+ lexer->bufOff += yajl_string_scan(p, len, lexer->validateUTF8);
290
+ }
291
+ else if (*offset < jsonTextLen)
292
+ {
293
+ p = jsonText + *offset;
294
+ len = jsonTextLen - *offset;
295
+ *offset += yajl_string_scan(p, len, lexer->validateUTF8);
296
+ }
297
+ }
298
+
299
+ STR_CHECK_EOF;
300
+
301
+ curChar = readChar(lexer, jsonText, offset);
302
+
303
+ /* quote terminates */
304
+ if (curChar == '"') {
305
+ tok = yajl_tok_string;
306
+ break;
307
+ }
308
+ /* backslash escapes a set of control chars, */
309
+ else if (curChar == '\\') {
310
+ hasEscapes = 1;
311
+ STR_CHECK_EOF;
312
+
313
+ /* special case \u */
314
+ curChar = readChar(lexer, jsonText, offset);
315
+ if (curChar == 'u') {
316
+ unsigned int i = 0;
317
+
318
+ for (i=0;i<4;i++) {
319
+ STR_CHECK_EOF;
320
+ curChar = readChar(lexer, jsonText, offset);
321
+ if (!(charLookupTable[curChar] & VHC)) {
322
+ /* back up to offending char */
323
+ unreadChar(lexer, offset);
324
+ lexer->error = yajl_lex_string_invalid_hex_char;
325
+ goto finish_string_lex;
326
+ }
327
+ }
328
+ } else if (!(charLookupTable[curChar] & VEC)) {
329
+ /* back up to offending char */
330
+ unreadChar(lexer, offset);
331
+ lexer->error = yajl_lex_string_invalid_escaped_char;
332
+ goto finish_string_lex;
333
+ }
334
+ }
335
+ /* when not validating UTF8 it's a simple table lookup to determine
336
+ * if the present character is invalid */
337
+ else if(charLookupTable[curChar] & IJC) {
338
+ /* back up to offending char */
339
+ unreadChar(lexer, offset);
340
+ lexer->error = yajl_lex_string_invalid_json_char;
341
+ goto finish_string_lex;
342
+ }
343
+ /* when in validate UTF8 mode we need to do some extra work */
344
+ else if (lexer->validateUTF8) {
345
+ yajl_tok t = yajl_lex_utf8_char(lexer, jsonText, jsonTextLen,
346
+ offset, curChar);
347
+
348
+ if (t == yajl_tok_eof) {
349
+ tok = yajl_tok_eof;
350
+ goto finish_string_lex;
351
+ } else if (t == yajl_tok_error) {
352
+ lexer->error = yajl_lex_string_invalid_utf8;
353
+ goto finish_string_lex;
354
+ }
355
+ }
356
+ /* accept it, and move on */
357
+ }
358
+ finish_string_lex:
359
+ /* tell our buddy, the parser, wether he needs to process this string
360
+ * again */
361
+ if (hasEscapes && tok == yajl_tok_string) {
362
+ tok = yajl_tok_string_with_escapes;
363
+ }
364
+
365
+ return tok;
366
+ }
367
+
368
+ #define RETURN_IF_EOF if (*offset >= jsonTextLen) return yajl_tok_eof;
369
+
370
+ static yajl_tok
371
+ yajl_lex_number(yajl_lexer lexer, const unsigned char * jsonText,
372
+ size_t jsonTextLen, size_t * offset)
373
+ {
374
+ /** XXX: numbers are the only entities in json that we must lex
375
+ * _beyond_ in order to know that they are complete. There
376
+ * is an ambiguous case for integers at EOF. */
377
+
378
+ unsigned char c;
379
+
380
+ yajl_tok tok = yajl_tok_integer;
381
+
382
+ RETURN_IF_EOF;
383
+ c = readChar(lexer, jsonText, offset);
384
+
385
+ /* optional leading minus */
386
+ if (c == '-') {
387
+ RETURN_IF_EOF;
388
+ c = readChar(lexer, jsonText, offset);
389
+ }
390
+
391
+ /* a single zero, or a series of integers */
392
+ if (c == '0') {
393
+ RETURN_IF_EOF;
394
+ c = readChar(lexer, jsonText, offset);
395
+ } else if (c >= '1' && c <= '9') {
396
+ do {
397
+ RETURN_IF_EOF;
398
+ c = readChar(lexer, jsonText, offset);
399
+ } while (c >= '0' && c <= '9');
400
+ } else {
401
+ unreadChar(lexer, offset);
402
+ lexer->error = yajl_lex_missing_integer_after_minus;
403
+ return yajl_tok_error;
404
+ }
405
+
406
+ /* optional fraction (indicates this is floating point) */
407
+ if (c == '.') {
408
+ int numRd = 0;
409
+
410
+ RETURN_IF_EOF;
411
+ c = readChar(lexer, jsonText, offset);
412
+
413
+ while (c >= '0' && c <= '9') {
414
+ numRd++;
415
+ RETURN_IF_EOF;
416
+ c = readChar(lexer, jsonText, offset);
417
+ }
418
+
419
+ if (!numRd) {
420
+ unreadChar(lexer, offset);
421
+ lexer->error = yajl_lex_missing_integer_after_decimal;
422
+ return yajl_tok_error;
423
+ }
424
+ tok = yajl_tok_double;
425
+ }
426
+
427
+ /* optional exponent (indicates this is floating point) */
428
+ if (c == 'e' || c == 'E') {
429
+ RETURN_IF_EOF;
430
+ c = readChar(lexer, jsonText, offset);
431
+
432
+ /* optional sign */
433
+ if (c == '+' || c == '-') {
434
+ RETURN_IF_EOF;
435
+ c = readChar(lexer, jsonText, offset);
436
+ }
437
+
438
+ if (c >= '0' && c <= '9') {
439
+ do {
440
+ RETURN_IF_EOF;
441
+ c = readChar(lexer, jsonText, offset);
442
+ } while (c >= '0' && c <= '9');
443
+ } else {
444
+ unreadChar(lexer, offset);
445
+ lexer->error = yajl_lex_missing_integer_after_exponent;
446
+ return yajl_tok_error;
447
+ }
448
+ tok = yajl_tok_double;
449
+ }
450
+
451
+ /* we always go "one too far" */
452
+ unreadChar(lexer, offset);
453
+
454
+ return tok;
455
+ }
456
+
457
+ static yajl_tok
458
+ yajl_lex_comment(yajl_lexer lexer, const unsigned char * jsonText,
459
+ size_t jsonTextLen, size_t * offset)
460
+ {
461
+ unsigned char c;
462
+
463
+ yajl_tok tok = yajl_tok_comment;
464
+
465
+ RETURN_IF_EOF;
466
+ c = readChar(lexer, jsonText, offset);
467
+
468
+ /* either slash or star expected */
469
+ if (c == '/') {
470
+ /* now we throw away until end of line */
471
+ do {
472
+ RETURN_IF_EOF;
473
+ c = readChar(lexer, jsonText, offset);
474
+ } while (c != '\n');
475
+ } else if (c == '*') {
476
+ /* now we throw away until end of comment */
477
+ for (;;) {
478
+ RETURN_IF_EOF;
479
+ c = readChar(lexer, jsonText, offset);
480
+ if (c == '*') {
481
+ RETURN_IF_EOF;
482
+ c = readChar(lexer, jsonText, offset);
483
+ if (c == '/') {
484
+ break;
485
+ } else {
486
+ unreadChar(lexer, offset);
487
+ }
488
+ }
489
+ }
490
+ } else {
491
+ lexer->error = yajl_lex_invalid_char;
492
+ tok = yajl_tok_error;
493
+ }
494
+
495
+ return tok;
496
+ }
497
+
498
+ yajl_tok
499
+ yajl_lex_lex(yajl_lexer lexer, const unsigned char * jsonText,
500
+ size_t jsonTextLen, size_t * offset,
501
+ const unsigned char ** outBuf, size_t * outLen)
502
+ {
503
+ yajl_tok tok = yajl_tok_error;
504
+ unsigned char c;
505
+ size_t startOffset = *offset;
506
+
507
+ *outBuf = NULL;
508
+ *outLen = 0;
509
+
510
+ for (;;) {
511
+ assert(*offset <= jsonTextLen);
512
+
513
+ if (*offset >= jsonTextLen) {
514
+ tok = yajl_tok_eof;
515
+ goto lexed;
516
+ }
517
+
518
+ c = readChar(lexer, jsonText, offset);
519
+
520
+ switch (c) {
521
+ case '{':
522
+ tok = yajl_tok_left_bracket;
523
+ goto lexed;
524
+ case '}':
525
+ tok = yajl_tok_right_bracket;
526
+ goto lexed;
527
+ case '[':
528
+ tok = yajl_tok_left_brace;
529
+ goto lexed;
530
+ case ']':
531
+ tok = yajl_tok_right_brace;
532
+ goto lexed;
533
+ case ',':
534
+ tok = yajl_tok_comma;
535
+ goto lexed;
536
+ case ':':
537
+ tok = yajl_tok_colon;
538
+ goto lexed;
539
+ case '\t': case '\n': case '\v': case '\f': case '\r': case ' ':
540
+ startOffset++;
541
+ break;
542
+ case 't': {
543
+ const char * want = "rue";
544
+ do {
545
+ if (*offset >= jsonTextLen) {
546
+ tok = yajl_tok_eof;
547
+ goto lexed;
548
+ }
549
+ c = readChar(lexer, jsonText, offset);
550
+ if (c != *want) {
551
+ unreadChar(lexer, offset);
552
+ lexer->error = yajl_lex_invalid_string;
553
+ tok = yajl_tok_error;
554
+ goto lexed;
555
+ }
556
+ } while (*(++want));
557
+ tok = yajl_tok_bool;
558
+ goto lexed;
559
+ }
560
+ case 'f': {
561
+ const char * want = "alse";
562
+ do {
563
+ if (*offset >= jsonTextLen) {
564
+ tok = yajl_tok_eof;
565
+ goto lexed;
566
+ }
567
+ c = readChar(lexer, jsonText, offset);
568
+ if (c != *want) {
569
+ unreadChar(lexer, offset);
570
+ lexer->error = yajl_lex_invalid_string;
571
+ tok = yajl_tok_error;
572
+ goto lexed;
573
+ }
574
+ } while (*(++want));
575
+ tok = yajl_tok_bool;
576
+ goto lexed;
577
+ }
578
+ case 'n': {
579
+ const char * want = "ull";
580
+ do {
581
+ if (*offset >= jsonTextLen) {
582
+ tok = yajl_tok_eof;
583
+ goto lexed;
584
+ }
585
+ c = readChar(lexer, jsonText, offset);
586
+ if (c != *want) {
587
+ unreadChar(lexer, offset);
588
+ lexer->error = yajl_lex_invalid_string;
589
+ tok = yajl_tok_error;
590
+ goto lexed;
591
+ }
592
+ } while (*(++want));
593
+ tok = yajl_tok_null;
594
+ goto lexed;
595
+ }
596
+ case '"': {
597
+ tok = yajl_lex_string(lexer, (const unsigned char *) jsonText,
598
+ jsonTextLen, offset);
599
+ goto lexed;
600
+ }
601
+ case '-':
602
+ case '0': case '1': case '2': case '3': case '4':
603
+ case '5': case '6': case '7': case '8': case '9': {
604
+ /* integer parsing wants to start from the beginning */
605
+ unreadChar(lexer, offset);
606
+ tok = yajl_lex_number(lexer, (const unsigned char *) jsonText,
607
+ jsonTextLen, offset);
608
+ goto lexed;
609
+ }
610
+ case '/':
611
+ /* hey, look, a probable comment! If comments are disabled
612
+ * it's an error. */
613
+ if (!lexer->allowComments) {
614
+ unreadChar(lexer, offset);
615
+ lexer->error = yajl_lex_unallowed_comment;
616
+ tok = yajl_tok_error;
617
+ goto lexed;
618
+ }
619
+ /* if comments are enabled, then we should try to lex
620
+ * the thing. possible outcomes are
621
+ * - successful lex (tok_comment, which means continue),
622
+ * - malformed comment opening (slash not followed by
623
+ * '*' or '/') (tok_error)
624
+ * - eof hit. (tok_eof) */
625
+ tok = yajl_lex_comment(lexer, (const unsigned char *) jsonText,
626
+ jsonTextLen, offset);
627
+ if (tok == yajl_tok_comment) {
628
+ /* "error" is silly, but that's the initial
629
+ * state of tok. guilty until proven innocent. */
630
+ tok = yajl_tok_error;
631
+ yajl_buf_clear(lexer->buf);
632
+ lexer->bufInUse = 0;
633
+ startOffset = *offset;
634
+ break;
635
+ }
636
+ /* hit error or eof, bail */
637
+ goto lexed;
638
+ default:
639
+ lexer->error = yajl_lex_invalid_char;
640
+ tok = yajl_tok_error;
641
+ goto lexed;
642
+ }
643
+ }
644
+
645
+
646
+ lexed:
647
+ /* need to append to buffer if the buffer is in use or
648
+ * if it's an EOF token */
649
+ if (tok == yajl_tok_eof || lexer->bufInUse) {
650
+ if (!lexer->bufInUse) yajl_buf_clear(lexer->buf);
651
+ lexer->bufInUse = 1;
652
+ yajl_buf_append(lexer->buf, jsonText + startOffset, *offset - startOffset);
653
+ lexer->bufOff = 0;
654
+
655
+ if (tok != yajl_tok_eof) {
656
+ *outBuf = yajl_buf_data(lexer->buf);
657
+ *outLen = yajl_buf_len(lexer->buf);
658
+ lexer->bufInUse = 0;
659
+ }
660
+ } else if (tok != yajl_tok_error) {
661
+ *outBuf = jsonText + startOffset;
662
+ *outLen = *offset - startOffset;
663
+ }
664
+
665
+ /* special case for strings. skip the quotes. */
666
+ if (tok == yajl_tok_string || tok == yajl_tok_string_with_escapes)
667
+ {
668
+ assert(*outLen >= 2);
669
+ (*outBuf)++;
670
+ *outLen -= 2;
671
+ }
672
+
673
+
674
+ #ifdef YAJL_LEXER_DEBUG
675
+ if (tok == yajl_tok_error) {
676
+ printf("lexical error: %s\n",
677
+ yajl_lex_error_to_string(yajl_lex_get_error(lexer)));
678
+ } else if (tok == yajl_tok_eof) {
679
+ printf("EOF hit\n");
680
+ } else {
681
+ printf("lexed %s: '", tokToStr(tok));
682
+ fwrite(*outBuf, 1, *outLen, stdout);
683
+ printf("'\n");
684
+ }
685
+ #endif
686
+
687
+ return tok;
688
+ }
689
+
690
+ const char *
691
+ yajl_lex_error_to_string(yajl_lex_error error)
692
+ {
693
+ switch (error) {
694
+ case yajl_lex_e_ok:
695
+ return "ok, no error";
696
+ case yajl_lex_string_invalid_utf8:
697
+ return "invalid bytes in UTF8 string.";
698
+ case yajl_lex_string_invalid_escaped_char:
699
+ return "inside a string, '\\' occurs before a character "
700
+ "which it may not.";
701
+ case yajl_lex_string_invalid_json_char:
702
+ return "invalid character inside string.";
703
+ case yajl_lex_string_invalid_hex_char:
704
+ return "invalid (non-hex) character occurs after '\\u' inside "
705
+ "string.";
706
+ case yajl_lex_invalid_char:
707
+ return "invalid char in json text.";
708
+ case yajl_lex_invalid_string:
709
+ return "invalid string in json text.";
710
+ case yajl_lex_missing_integer_after_exponent:
711
+ return "malformed number, a digit is required after the exponent.";
712
+ case yajl_lex_missing_integer_after_decimal:
713
+ return "malformed number, a digit is required after the "
714
+ "decimal point.";
715
+ case yajl_lex_missing_integer_after_minus:
716
+ return "malformed number, a digit is required after the "
717
+ "minus sign.";
718
+ case yajl_lex_unallowed_comment:
719
+ return "probable comment found in input text, comments are "
720
+ "not enabled.";
721
+ }
722
+ return "unknown error code";
723
+ }
724
+
725
+
726
+ /** allows access to more specific information about the lexical
727
+ * error when yajl_lex_lex returns yajl_tok_error. */
728
+ yajl_lex_error
729
+ yajl_lex_get_error(yajl_lexer lexer)
730
+ {
731
+ if (lexer == NULL) return (yajl_lex_error) -1;
732
+ return lexer->error;
733
+ }
734
+
735
+ size_t yajl_lex_current_line(yajl_lexer lexer)
736
+ {
737
+ return lexer->lineOff;
738
+ }
739
+
740
+ size_t yajl_lex_current_char(yajl_lexer lexer)
741
+ {
742
+ return lexer->charOff;
743
+ }
744
+
745
+ yajl_tok yajl_lex_peek(yajl_lexer lexer, const unsigned char * jsonText,
746
+ size_t jsonTextLen, size_t offset)
747
+ {
748
+ const unsigned char * outBuf;
749
+ size_t outLen;
750
+ size_t bufLen = yajl_buf_len(lexer->buf);
751
+ size_t bufOff = lexer->bufOff;
752
+ unsigned int bufInUse = lexer->bufInUse;
753
+ yajl_tok tok;
754
+
755
+ tok = yajl_lex_lex(lexer, jsonText, jsonTextLen, &offset,
756
+ &outBuf, &outLen);
757
+
758
+ lexer->bufOff = bufOff;
759
+ lexer->bufInUse = bufInUse;
760
+ yajl_buf_truncate(lexer->buf, bufLen);
761
+
762
+ return tok;
763
+ }