@port-labs/jq-node-bindings 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. package/.editorconfig +5 -0
  2. package/.jshintignore +1 -0
  3. package/.jshintrc +23 -0
  4. package/binding.gyp +56 -0
  5. package/configure +26 -0
  6. package/deps/jq/.gitattributes +2 -0
  7. package/deps/jq/.travis.yml +53 -0
  8. package/deps/jq/AUTHORS +73 -0
  9. package/deps/jq/COPYING +70 -0
  10. package/deps/jq/ChangeLog +1349 -0
  11. package/deps/jq/Makefile.am +198 -0
  12. package/deps/jq/NEWS +88 -0
  13. package/deps/jq/README.md +64 -0
  14. package/deps/jq/builtin.c +1684 -0
  15. package/deps/jq/builtin.h +10 -0
  16. package/deps/jq/bytecode.c +161 -0
  17. package/deps/jq/bytecode.h +92 -0
  18. package/deps/jq/compile-ios.sh +102 -0
  19. package/deps/jq/compile.c +1210 -0
  20. package/deps/jq/compile.h +101 -0
  21. package/deps/jq/config/m4/check-math-func.m4 +4 -0
  22. package/deps/jq/config/m4/find-func-no-libs.m4 +8 -0
  23. package/deps/jq/config/m4/find-func-no-libs2.m4 +62 -0
  24. package/deps/jq/config/m4/find-func.m4 +9 -0
  25. package/deps/jq/config/m4/misc.m4 +3 -0
  26. package/deps/jq/configure.ac +221 -0
  27. package/deps/jq/docs/Gemfile +7 -0
  28. package/deps/jq/docs/Gemfile.lock +63 -0
  29. package/deps/jq/docs/README.md +25 -0
  30. package/deps/jq/docs/Rakefile +145 -0
  31. package/deps/jq/docs/content/1.tutorial/default.yml +327 -0
  32. package/deps/jq/docs/content/2.download/default.yml +117 -0
  33. package/deps/jq/docs/content/3.manual/manual.yml +2878 -0
  34. package/deps/jq/docs/content/3.manual/v1.3/manual.yml +1270 -0
  35. package/deps/jq/docs/content/3.manual/v1.4/manual.yml +1672 -0
  36. package/deps/jq/docs/content/index/index.yml +51 -0
  37. package/deps/jq/docs/default_manpage.md +22 -0
  38. package/deps/jq/docs/public/.htaccess +28 -0
  39. package/deps/jq/docs/public/bootstrap/css/bootstrap-responsive.css +1058 -0
  40. package/deps/jq/docs/public/bootstrap/css/bootstrap-responsive.min.css +9 -0
  41. package/deps/jq/docs/public/bootstrap/css/bootstrap.css +5224 -0
  42. package/deps/jq/docs/public/bootstrap/css/bootstrap.min.css +9 -0
  43. package/deps/jq/docs/public/bootstrap/img/glyphicons-halflings-white.png +0 -0
  44. package/deps/jq/docs/public/bootstrap/img/glyphicons-halflings.png +0 -0
  45. package/deps/jq/docs/public/bootstrap/js/bootstrap.js +2027 -0
  46. package/deps/jq/docs/public/bootstrap/js/bootstrap.min.js +6 -0
  47. package/deps/jq/docs/public/css/base.scss +99 -0
  48. package/deps/jq/docs/public/jq.png +0 -0
  49. package/deps/jq/docs/public/robots.txt +2 -0
  50. package/deps/jq/docs/site.yml +18 -0
  51. package/deps/jq/docs/templates/default.liquid +34 -0
  52. package/deps/jq/docs/templates/index.liquid +60 -0
  53. package/deps/jq/docs/templates/manual.liquid +122 -0
  54. package/deps/jq/docs/templates/shared/_footer.liquid +5 -0
  55. package/deps/jq/docs/templates/shared/_head.liquid +12 -0
  56. package/deps/jq/docs/templates/shared/_header.liquid +26 -0
  57. package/deps/jq/exec_stack.h +112 -0
  58. package/deps/jq/execute.c +1155 -0
  59. package/deps/jq/inject_errors.c +112 -0
  60. package/deps/jq/jq.1.default +39 -0
  61. package/deps/jq/jq.1.prebuilt +3075 -0
  62. package/deps/jq/jq.h +60 -0
  63. package/deps/jq/jq.spec +70 -0
  64. package/deps/jq/jq_parser.h +9 -0
  65. package/deps/jq/jq_test.c +346 -0
  66. package/deps/jq/jv.c +1333 -0
  67. package/deps/jq/jv.h +240 -0
  68. package/deps/jq/jv_alloc.c +179 -0
  69. package/deps/jq/jv_alloc.h +27 -0
  70. package/deps/jq/jv_aux.c +619 -0
  71. package/deps/jq/jv_dtoa.c +4275 -0
  72. package/deps/jq/jv_dtoa.h +22 -0
  73. package/deps/jq/jv_file.c +49 -0
  74. package/deps/jq/jv_parse.c +852 -0
  75. package/deps/jq/jv_print.c +348 -0
  76. package/deps/jq/jv_unicode.c +96 -0
  77. package/deps/jq/jv_unicode.h +11 -0
  78. package/deps/jq/jv_utf8_tables.h +37 -0
  79. package/deps/jq/lexer.c +2442 -0
  80. package/deps/jq/lexer.h +362 -0
  81. package/deps/jq/lexer.l +184 -0
  82. package/deps/jq/libm.h +160 -0
  83. package/deps/jq/linker.c +393 -0
  84. package/deps/jq/linker.h +7 -0
  85. package/deps/jq/locfile.c +91 -0
  86. package/deps/jq/locfile.h +29 -0
  87. package/deps/jq/m4/ax_compare_version.m4 +177 -0
  88. package/deps/jq/m4/ax_prog_bison_version.m4 +68 -0
  89. package/deps/jq/main.c +566 -0
  90. package/deps/jq/opcode_list.h +44 -0
  91. package/deps/jq/parser.c +3914 -0
  92. package/deps/jq/parser.h +193 -0
  93. package/deps/jq/parser.y +923 -0
  94. package/deps/jq/scripts/crosscompile +42 -0
  95. package/deps/jq/scripts/gen_utf8_tables.py +32 -0
  96. package/deps/jq/scripts/version +5 -0
  97. package/deps/jq/setup.sh +33 -0
  98. package/deps/jq/tests/jq.test +1235 -0
  99. package/deps/jq/tests/jqtest +5 -0
  100. package/deps/jq/tests/mantest +7 -0
  101. package/deps/jq/tests/modules/.jq +5 -0
  102. package/deps/jq/tests/modules/a.jq +2 -0
  103. package/deps/jq/tests/modules/b/b.jq +2 -0
  104. package/deps/jq/tests/modules/c/c.jq +16 -0
  105. package/deps/jq/tests/modules/c/d.jq +1 -0
  106. package/deps/jq/tests/modules/data.json +4 -0
  107. package/deps/jq/tests/modules/lib/jq/e/e.jq +1 -0
  108. package/deps/jq/tests/modules/lib/jq/f.jq +1 -0
  109. package/deps/jq/tests/modules/syntaxerror/syntaxerror.jq +1 -0
  110. package/deps/jq/tests/modules/test_bind_order.jq +4 -0
  111. package/deps/jq/tests/modules/test_bind_order0.jq +1 -0
  112. package/deps/jq/tests/modules/test_bind_order1.jq +2 -0
  113. package/deps/jq/tests/modules/test_bind_order2.jq +2 -0
  114. package/deps/jq/tests/onig.supp +21 -0
  115. package/deps/jq/tests/onig.test +85 -0
  116. package/deps/jq/tests/onigtest +5 -0
  117. package/deps/jq/tests/setup +36 -0
  118. package/deps/jq/tests/shtest +205 -0
  119. package/deps/jq/tests/torture/input0.json +7 -0
  120. package/deps/jq/util.c +462 -0
  121. package/deps/jq/util.h +64 -0
  122. package/deps/jq.gyp +35 -0
  123. package/index.d.ts +3 -0
  124. package/jest.config.js +10 -0
  125. package/lib/index.js +14 -0
  126. package/package.json +48 -0
  127. package/reports/jest-port-api.xml +35 -0
  128. package/src/binding.cc +177 -0
  129. package/src/binding.h +13 -0
  130. package/test/santiy.test.js +122 -0
  131. package/util/configure.js +27 -0
@@ -0,0 +1,348 @@
1
+ #include <assert.h>
2
+ #include <stdio.h>
3
+ #include <float.h>
4
+ #include <string.h>
5
+
6
+ #ifdef WIN32
7
+ #include <windows.h>
8
+ #include <io.h>
9
+ #include <fileapi.h>
10
+ #endif
11
+
12
+ #include "jv.h"
13
+ #include "jv_dtoa.h"
14
+ #include "jv_unicode.h"
15
+
16
+ #define ESC "\033"
17
+ #define COL(c) (ESC "[" c "m")
18
+ #define COLRESET (ESC "[0m")
19
+
20
+ // Colour table. See https://en.wikipedia.org/wiki/ANSI_escape_code#Colors
21
+ // for how to choose these.
22
+ static const jv_kind colour_kinds[] =
23
+ {JV_KIND_NULL, JV_KIND_FALSE, JV_KIND_TRUE, JV_KIND_NUMBER,
24
+ JV_KIND_STRING, JV_KIND_ARRAY, JV_KIND_OBJECT};
25
+ static const char* const colours[] =
26
+ {COL("1;30"), COL("0;39"), COL("0;39"), COL("0;39"),
27
+ COL("0;32"), COL("1;39"), COL("1;39")};
28
+ #define FIELD_COLOUR COL("34;1")
29
+
30
+ static void put_buf(const char *s, int len, FILE *fout, jv *strout, int is_tty) {
31
+ if (strout) {
32
+ *strout = jv_string_append_buf(*strout, s, len);
33
+ } else {
34
+ #ifdef WIN32
35
+ /* See util.h */
36
+ if (is_tty)
37
+ WriteFile((HANDLE)_get_osfhandle(fileno(fout)), s, len, NULL, NULL);
38
+ else
39
+ fwrite(s, 1, len, fout);
40
+ #else
41
+ fwrite(s, 1, len, fout);
42
+ #endif
43
+ }
44
+ }
45
+
46
+ static void put_char(char c, FILE* fout, jv* strout, int T) {
47
+ put_buf(&c, 1, fout, strout, T);
48
+ }
49
+
50
+ static void put_str(const char* s, FILE* fout, jv* strout, int T) {
51
+ put_buf(s, strlen(s), fout, strout, T);
52
+ }
53
+
54
+ static void put_indent(int n, int flags, FILE* fout, jv* strout, int T) {
55
+ if (flags & JV_PRINT_TAB) {
56
+ while (n--)
57
+ put_char('\t', fout, strout, T);
58
+ } else {
59
+ n *= ((flags & (JV_PRINT_SPACE0 | JV_PRINT_SPACE1 | JV_PRINT_SPACE2)) >> 8);
60
+ while (n--)
61
+ put_char(' ', fout, strout, T);
62
+ }
63
+ }
64
+
65
+ static void jvp_dump_string(jv str, int ascii_only, FILE* F, jv* S, int T) {
66
+ assert(jv_get_kind(str) == JV_KIND_STRING);
67
+ const char* i = jv_string_value(str);
68
+ const char* end = i + jv_string_length_bytes(jv_copy(str));
69
+ const char* cstart;
70
+ int c = 0;
71
+ char buf[32];
72
+ put_char('"', F, S, T);
73
+ while ((i = jvp_utf8_next((cstart = i), end, &c))) {
74
+ assert(c != -1);
75
+ int unicode_escape = 0;
76
+ if (0x20 <= c && c <= 0x7E) {
77
+ // printable ASCII
78
+ if (c == '"' || c == '\\') {
79
+ put_char('\\', F, S, T);
80
+ }
81
+ put_char(c, F, S, T);
82
+ } else if (c < 0x20 || c == 0x7F) {
83
+ // ASCII control character
84
+ switch (c) {
85
+ case '\b':
86
+ put_char('\\', F, S, T);
87
+ put_char('b', F, S, T);
88
+ break;
89
+ case '\t':
90
+ put_char('\\', F, S, T);
91
+ put_char('t', F, S, T);
92
+ break;
93
+ case '\r':
94
+ put_char('\\', F, S, T);
95
+ put_char('r', F, S, T);
96
+ break;
97
+ case '\n':
98
+ put_char('\\', F, S, T);
99
+ put_char('n', F, S, T);
100
+ break;
101
+ case '\f':
102
+ put_char('\\', F, S, T);
103
+ put_char('f', F, S, T);
104
+ break;
105
+ default:
106
+ unicode_escape = 1;
107
+ break;
108
+ }
109
+ } else {
110
+ if (ascii_only) {
111
+ unicode_escape = 1;
112
+ } else {
113
+ put_buf(cstart, i - cstart, F, S, T);
114
+ }
115
+ }
116
+ if (unicode_escape) {
117
+ if (c <= 0xffff) {
118
+ sprintf(buf, "\\u%04x", c);
119
+ } else {
120
+ c -= 0x10000;
121
+ sprintf(buf, "\\u%04x\\u%04x",
122
+ 0xD800 | ((c & 0xffc00) >> 10),
123
+ 0xDC00 | (c & 0x003ff));
124
+ }
125
+ put_str(buf, F, S, T);
126
+ }
127
+ }
128
+ assert(c != -1);
129
+ put_char('"', F, S, T);
130
+ }
131
+
132
+ static void put_refcnt(struct dtoa_context* C, int refcnt, FILE *F, jv* S, int T){
133
+ char buf[JVP_DTOA_FMT_MAX_LEN];
134
+ put_char(' ', F, S, T);
135
+ put_char('(', F, S, T);
136
+ put_str(jvp_dtoa_fmt(C, buf, refcnt), F, S, T);
137
+ put_char(')', F, S, T);
138
+ }
139
+
140
+ static void jv_dump_term(struct dtoa_context* C, jv x, int flags, int indent, FILE* F, jv* S) {
141
+ char buf[JVP_DTOA_FMT_MAX_LEN];
142
+ const char* colour = 0;
143
+ double refcnt = (flags & JV_PRINT_REFCOUNT) ? jv_get_refcnt(x) - 1 : -1;
144
+ if (flags & JV_PRINT_COLOUR) {
145
+ for (unsigned i=0; i<sizeof(colour_kinds)/sizeof(colour_kinds[0]); i++) {
146
+ if (jv_get_kind(x) == colour_kinds[i]) {
147
+ colour = colours[i];
148
+ put_str(colour, F, S, flags & JV_PRINT_ISATTY);
149
+ break;
150
+ }
151
+ }
152
+ }
153
+ switch (jv_get_kind(x)) {
154
+ default:
155
+ case JV_KIND_INVALID:
156
+ if (flags & JV_PRINT_INVALID) {
157
+ jv msg = jv_invalid_get_msg(jv_copy(x));
158
+ if (jv_get_kind(msg) == JV_KIND_STRING) {
159
+ put_str("<invalid:", F, S, flags & JV_PRINT_ISATTY);
160
+ jvp_dump_string(msg, flags | JV_PRINT_ASCII, F, S, flags & JV_PRINT_ISATTY);
161
+ put_str(">", F, S, flags & JV_PRINT_ISATTY);
162
+ } else {
163
+ put_str("<invalid>", F, S, flags & JV_PRINT_ISATTY);
164
+ }
165
+ } else {
166
+ assert(0 && "Invalid value");
167
+ }
168
+ break;
169
+ case JV_KIND_NULL:
170
+ put_str("null", F, S, flags & JV_PRINT_ISATTY);
171
+ break;
172
+ case JV_KIND_FALSE:
173
+ put_str("false", F, S, flags & JV_PRINT_ISATTY);
174
+ break;
175
+ case JV_KIND_TRUE:
176
+ put_str("true", F, S, flags & JV_PRINT_ISATTY);
177
+ break;
178
+ case JV_KIND_NUMBER: {
179
+ double d = jv_number_value(x);
180
+ if (d != d) {
181
+ // JSON doesn't have NaN, so we'll render it as "null"
182
+ put_str("null", F, S, flags & JV_PRINT_ISATTY);
183
+ } else {
184
+ // Normalise infinities to something we can print in valid JSON
185
+ if (d > DBL_MAX) d = DBL_MAX;
186
+ if (d < -DBL_MAX) d = -DBL_MAX;
187
+ put_str(jvp_dtoa_fmt(C, buf, d), F, S, flags & JV_PRINT_ISATTY);
188
+ }
189
+ break;
190
+ }
191
+ case JV_KIND_STRING:
192
+ jvp_dump_string(x, flags & JV_PRINT_ASCII, F, S, flags & JV_PRINT_ISATTY);
193
+ if (flags & JV_PRINT_REFCOUNT)
194
+ put_refcnt(C, refcnt, F, S, flags & JV_PRINT_ISATTY);
195
+ break;
196
+ case JV_KIND_ARRAY: {
197
+ if (jv_array_length(jv_copy(x)) == 0) {
198
+ put_str("[]", F, S, flags & JV_PRINT_ISATTY);
199
+ break;
200
+ }
201
+ put_str("[", F, S, flags & JV_PRINT_ISATTY);
202
+ if (flags & JV_PRINT_PRETTY) {
203
+ put_char('\n', F, S, flags & JV_PRINT_ISATTY);
204
+ put_indent(indent + 1, flags, F, S, flags & JV_PRINT_ISATTY);
205
+ }
206
+ jv_array_foreach(x, i, elem) {
207
+ if (i!=0) {
208
+ if (flags & JV_PRINT_PRETTY) {
209
+ put_str(",\n", F, S, flags & JV_PRINT_ISATTY);
210
+ put_indent(indent + 1, flags, F, S, flags & JV_PRINT_ISATTY);
211
+ } else {
212
+ put_str(",", F, S, flags & JV_PRINT_ISATTY);
213
+ }
214
+ }
215
+ jv_dump_term(C, elem, flags, indent + 1, F, S);
216
+ if (colour) put_str(colour, F, S, flags & JV_PRINT_ISATTY);
217
+ }
218
+ if (flags & JV_PRINT_PRETTY) {
219
+ put_char('\n', F, S, flags & JV_PRINT_ISATTY);
220
+ put_indent(indent, flags, F, S, flags & JV_PRINT_ISATTY);
221
+ }
222
+ if (colour) put_str(colour, F, S, flags & JV_PRINT_ISATTY);
223
+ put_char(']', F, S, flags & JV_PRINT_ISATTY);
224
+ if (flags & JV_PRINT_REFCOUNT)
225
+ put_refcnt(C, refcnt, F, S, flags & JV_PRINT_ISATTY);
226
+ break;
227
+ }
228
+ case JV_KIND_OBJECT: {
229
+ if (jv_object_length(jv_copy(x)) == 0) {
230
+ put_str("{}", F, S, flags & JV_PRINT_ISATTY);
231
+ break;
232
+ }
233
+ put_char('{', F, S, flags & JV_PRINT_ISATTY);
234
+ if (flags & JV_PRINT_PRETTY) {
235
+ put_char('\n', F, S, flags & JV_PRINT_ISATTY);
236
+ put_indent(indent + 1, flags, F, S, flags & JV_PRINT_ISATTY);
237
+ }
238
+ int first = 1;
239
+ int i = 0;
240
+ jv keyset = jv_null();
241
+ while (1) {
242
+ jv key, value;
243
+ if (flags & JV_PRINT_SORTED) {
244
+ if (first) {
245
+ keyset = jv_keys(jv_copy(x));
246
+ i = 0;
247
+ } else {
248
+ i++;
249
+ }
250
+ if (i >= jv_array_length(jv_copy(keyset))) {
251
+ jv_free(keyset);
252
+ break;
253
+ }
254
+ key = jv_array_get(jv_copy(keyset), i);
255
+ value = jv_object_get(jv_copy(x), jv_copy(key));
256
+ } else {
257
+ if (first) {
258
+ i = jv_object_iter(x);
259
+ } else {
260
+ i = jv_object_iter_next(x, i);
261
+ }
262
+ if (!jv_object_iter_valid(x, i)) break;
263
+ key = jv_object_iter_key(x, i);
264
+ value = jv_object_iter_value(x, i);
265
+ }
266
+
267
+ if (!first) {
268
+ if (flags & JV_PRINT_PRETTY){
269
+ put_str(",\n", F, S, flags & JV_PRINT_ISATTY);
270
+ put_indent(indent + 1, flags, F, S, flags & JV_PRINT_ISATTY);
271
+ } else {
272
+ put_str(",", F, S, flags & JV_PRINT_ISATTY);
273
+ }
274
+ }
275
+ if (colour) put_str(COLRESET, F, S, flags & JV_PRINT_ISATTY);
276
+
277
+ first = 0;
278
+ if (colour) put_str(FIELD_COLOUR, F, S, flags & JV_PRINT_ISATTY);
279
+ jvp_dump_string(key, flags & JV_PRINT_ASCII, F, S, flags & JV_PRINT_ISATTY);
280
+ jv_free(key);
281
+ if (colour) put_str(COLRESET, F, S, flags & JV_PRINT_ISATTY);
282
+
283
+ if (colour) put_str(colour, F, S, flags & JV_PRINT_ISATTY);
284
+ put_str((flags & JV_PRINT_PRETTY) ? ": " : ":", F, S, flags & JV_PRINT_ISATTY);
285
+ if (colour) put_str(COLRESET, F, S, flags & JV_PRINT_ISATTY);
286
+
287
+ jv_dump_term(C, value, flags, indent + 1, F, S);
288
+ if (colour) put_str(colour, F, S, flags & JV_PRINT_ISATTY);
289
+ }
290
+ if (flags & JV_PRINT_PRETTY) {
291
+ put_char('\n', F, S, flags & JV_PRINT_ISATTY);
292
+ put_indent(indent, flags, F, S, flags & JV_PRINT_ISATTY);
293
+ }
294
+ if (colour) put_str(colour, F, S, flags & JV_PRINT_ISATTY);
295
+ put_char('}', F, S, flags & JV_PRINT_ISATTY);
296
+ if (flags & JV_PRINT_REFCOUNT)
297
+ put_refcnt(C, refcnt, F, S, flags & JV_PRINT_ISATTY);
298
+ }
299
+ }
300
+ jv_free(x);
301
+ if (colour) {
302
+ put_str(COLRESET, F, S, flags & JV_PRINT_ISATTY);
303
+ }
304
+ }
305
+
306
+ void jv_dumpf(jv x, FILE *f, int flags) {
307
+ struct dtoa_context C;
308
+ jvp_dtoa_context_init(&C);
309
+ jv_dump_term(&C, x, flags, 0, f, 0);
310
+ jvp_dtoa_context_free(&C);
311
+ }
312
+
313
+ void jv_dump(jv x, int flags) {
314
+ jv_dumpf(x, stdout, flags);
315
+ }
316
+
317
+ /* This one is nice for use in debuggers */
318
+ void jv_show(jv x, int flags) {
319
+ if (flags == -1)
320
+ flags = JV_PRINT_PRETTY | JV_PRINT_COLOUR | JV_PRINT_INDENT_FLAGS(2);
321
+ jv_dumpf(jv_copy(x), stderr, flags | JV_PRINT_INVALID);
322
+ fflush(stderr);
323
+ }
324
+
325
+ jv jv_dump_string(jv x, int flags) {
326
+ struct dtoa_context C;
327
+ jvp_dtoa_context_init(&C);
328
+ jv s = jv_string("");
329
+ jv_dump_term(&C, x, flags, 0, 0, &s);
330
+ jvp_dtoa_context_free(&C);
331
+ return s;
332
+ }
333
+
334
+ char *jv_dump_string_trunc(jv x, char *outbuf, size_t bufsize) {
335
+ x = jv_dump_string(x,0);
336
+ const char* p = jv_string_value(x);
337
+ const size_t len = strlen(p);
338
+ strncpy(outbuf, p, bufsize);
339
+ outbuf[bufsize - 1] = 0;
340
+ if (len > bufsize - 1 && bufsize >= 4) {
341
+ // Indicate truncation with '...'
342
+ outbuf[bufsize - 2]='.';
343
+ outbuf[bufsize - 3]='.';
344
+ outbuf[bufsize - 4]='.';
345
+ }
346
+ jv_free(x);
347
+ return outbuf;
348
+ }
@@ -0,0 +1,96 @@
1
+ #include <stdio.h>
2
+ #include <assert.h>
3
+ #include "jv_unicode.h"
4
+ #include "jv_utf8_tables.h"
5
+
6
+ const char* jvp_utf8_next(const char* in, const char* end, int* codepoint_ret) {
7
+ assert(in <= end);
8
+ if (in == end) {
9
+ return 0;
10
+ }
11
+ int codepoint = -1;
12
+ unsigned char first = (unsigned char)in[0];
13
+ int length = utf8_coding_length[first];
14
+ if ((first & 0x80) == 0) {
15
+ /* Fast-path for ASCII */
16
+ codepoint = first;
17
+ length = 1;
18
+ } else if (length == 0 || length == UTF8_CONTINUATION_BYTE) {
19
+ /* Bad single byte - either an invalid byte or an out-of-place continuation byte */
20
+ length = 1;
21
+ } else if (in + length > end) {
22
+ /* String ends before UTF8 sequence ends */
23
+ length = end - in;
24
+ } else {
25
+ codepoint = ((unsigned)in[0]) & utf8_coding_bits[first];
26
+ for (int i=1; i<length; i++) {
27
+ unsigned ch = (unsigned char)in[i];
28
+ if (utf8_coding_length[ch] != UTF8_CONTINUATION_BYTE){
29
+ /* Invalid UTF8 sequence - not followed by the right number of continuation bytes */
30
+ codepoint = -1;
31
+ length = i;
32
+ break;
33
+ }
34
+ codepoint = (codepoint << 6) | (ch & 0x3f);
35
+ }
36
+ if (codepoint < utf8_first_codepoint[length]) {
37
+ /* Overlong UTF8 sequence */
38
+ codepoint = -1;
39
+ }
40
+ if (0xD800 <= codepoint && codepoint <= 0xDFFF) {
41
+ /* Surrogate codepoints can't be encoded in UTF8 */
42
+ codepoint = -1;
43
+ }
44
+ if (codepoint > 0x10FFFF) {
45
+ /* Outside Unicode range */
46
+ codepoint = -1;
47
+ }
48
+ }
49
+ assert(length > 0);
50
+ *codepoint_ret = codepoint;
51
+ return in + length;
52
+ }
53
+
54
+ int jvp_utf8_is_valid(const char* in, const char* end) {
55
+ int codepoint;
56
+ while ((in = jvp_utf8_next(in, end, &codepoint))) {
57
+ if (codepoint == -1) return 0;
58
+ }
59
+ return 1;
60
+ }
61
+
62
+ int jvp_utf8_decode_length(char startchar) {
63
+ if ((startchar & 0x80) == 0) return 1;
64
+ else if ((startchar & 0xC0) == 0xC0) return 2;
65
+ else if ((startchar & 0xE0) == 0xE0) return 3;
66
+ else return 4;
67
+ }
68
+
69
+ int jvp_utf8_encode_length(int codepoint) {
70
+ if (codepoint <= 0x7F) return 1;
71
+ else if (codepoint <= 0x7FF) return 2;
72
+ else if (codepoint <= 0xFFFF) return 3;
73
+ else return 4;
74
+ }
75
+
76
+ int jvp_utf8_encode(int codepoint, char* out) {
77
+ assert(codepoint >= 0 && codepoint <= 0x10FFFF);
78
+ char* start = out;
79
+ if (codepoint <= 0x7F) {
80
+ *out++ = codepoint;
81
+ } else if (codepoint <= 0x7FF) {
82
+ *out++ = 0xC0 + ((codepoint & 0x7C0) >> 6);
83
+ *out++ = 0x80 + ((codepoint & 0x03F));
84
+ } else if(codepoint <= 0xFFFF) {
85
+ *out++ = 0xE0 + ((codepoint & 0xF000) >> 12);
86
+ *out++ = 0x80 + ((codepoint & 0x0FC0) >> 6);
87
+ *out++ = 0x80 + ((codepoint & 0x003F));
88
+ } else {
89
+ *out++ = 0xF0 + ((codepoint & 0x1C0000) >> 18);
90
+ *out++ = 0x80 + ((codepoint & 0x03F000) >> 12);
91
+ *out++ = 0x80 + ((codepoint & 0x000FC0) >> 6);
92
+ *out++ = 0x80 + ((codepoint & 0x00003F));
93
+ }
94
+ assert(out - start == jvp_utf8_encode_length(codepoint));
95
+ return out - start;
96
+ }
@@ -0,0 +1,11 @@
1
+ #ifndef JV_UNICODE_H
2
+ #define JV_UNICODE_H
3
+
4
+ const char* jvp_utf8_next(const char* in, const char* end, int* codepoint);
5
+ int jvp_utf8_is_valid(const char* in, const char* end);
6
+
7
+ int jvp_utf8_decode_length(char startchar);
8
+
9
+ int jvp_utf8_encode_length(int codepoint);
10
+ int jvp_utf8_encode(int codepoint, char* out);
11
+ #endif
@@ -0,0 +1,37 @@
1
+ #define UTF8_CONTINUATION_BYTE ((unsigned char)255)
2
+ static const unsigned char utf8_coding_length[] =
3
+ {0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
4
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
5
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
6
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
7
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
8
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
9
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
10
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
11
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
12
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
13
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
14
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
15
+ 0x00, 0x00, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
16
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
17
+ 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
18
+ 0x04, 0x04, 0x04, 0x04, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
19
+ static const unsigned char utf8_coding_bits[] =
20
+ {0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
21
+ 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
22
+ 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
23
+ 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
24
+ 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
25
+ 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
26
+ 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
27
+ 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
28
+ 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f,
29
+ 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f,
30
+ 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f,
31
+ 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f,
32
+ 0x00, 0x00, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
33
+ 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
34
+ 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
35
+ 0x07, 0x07, 0x07, 0x07, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
36
+ static const int utf8_first_codepoint[] =
37
+ {0x00, 0x00, 0x80, 0x800, 0x10000};