jsonsl 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,1009 @@
1
+ /**
2
+ * JSON Simple/Stacked/Stateful Lexer.
3
+ * - Does not buffer data
4
+ * - Maintains state
5
+ * - Callback oriented
6
+ * - Lightweight and fast. One source file and one header file
7
+ *
8
+ * Copyright (C) 2012-2015 Mark Nunberg
9
+ * See included LICENSE file for license details.
10
+ */
11
+
12
+ #ifndef JSONSL_H_
13
+ #define JSONSL_H_
14
+
15
+ #include <ruby.h>
16
+ #define JSONSL_STATE_USER_FIELDS \
17
+ VALUE val; \
18
+ VALUE pkey;
19
+
20
+ #include <stdio.h>
21
+ #include <stdlib.h>
22
+ #include <stddef.h>
23
+ #include <string.h>
24
+ #include <sys/types.h>
25
+ #include <wchar.h>
26
+
27
+ #ifdef __cplusplus
28
+ extern "C" {
29
+ #endif /* __cplusplus */
30
+
31
+ #ifdef JSONSL_USE_WCHAR
32
+ typedef jsonsl_char_t wchar_t;
33
+ typedef jsonsl_uchar_t unsigned wchar_t;
34
+ #else
35
+ typedef char jsonsl_char_t;
36
+ typedef unsigned char jsonsl_uchar_t;
37
+ #endif /* JSONSL_USE_WCHAR */
38
+
39
+ #ifdef JSONSL_PARSE_NAN
40
+ #define JSONSL__NAN_PROXY JSONSL_SPECIALf_NAN
41
+ #define JSONSL__INF_PROXY JSONSL_SPECIALf_INF
42
+ #else
43
+ #define JSONSL__NAN_PROXY 0
44
+ #define JSONSL__INF_PROXY 0
45
+ #endif
46
+
47
+ /* Stolen from http-parser.h, and possibly others */
48
+ #if defined(_WIN32) && !defined(__MINGW32__) && (!defined(_MSC_VER) || _MSC_VER<1600)
49
+ typedef __int8 int8_t;
50
+ typedef unsigned __int8 uint8_t;
51
+ typedef __int16 int16_t;
52
+ typedef unsigned __int16 uint16_t;
53
+ typedef __int32 int32_t;
54
+ typedef unsigned __int32 uint32_t;
55
+ typedef __int64 int64_t;
56
+ typedef unsigned __int64 uint64_t;
57
+ #if !defined(_MSC_VER) || _MSC_VER<1400
58
+ typedef unsigned int size_t;
59
+ typedef int ssize_t;
60
+ #endif
61
+ #else
62
+ #include <stdint.h>
63
+ #endif
64
+
65
+
66
+ #if (!defined(JSONSL_STATE_GENERIC)) && (!defined(JSONSL_STATE_USER_FIELDS))
67
+ #define JSONSL_STATE_GENERIC
68
+ #endif /* !defined JSONSL_STATE_GENERIC */
69
+
70
+ #ifdef JSONSL_STATE_GENERIC
71
+ #define JSONSL_STATE_USER_FIELDS
72
+ #endif /* JSONSL_STATE_GENERIC */
73
+
74
+ /* Additional fields for component object */
75
+ #ifndef JSONSL_JPR_COMPONENT_USER_FIELDS
76
+ #define JSONSL_JPR_COMPONENT_USER_FIELDS
77
+ #endif
78
+
79
+ #ifndef JSONSL_API
80
+ /**
81
+ * We require a /DJSONSL_DLL so that users already using this as a static
82
+ * or embedded library don't get confused
83
+ */
84
+ #if defined(_WIN32) && defined(JSONSL_DLL)
85
+ #define JSONSL_API __declspec(dllexport)
86
+ #else
87
+ #define JSONSL_API
88
+ #endif /* _WIN32 */
89
+
90
+ #endif /* !JSONSL_API */
91
+
92
+ #ifndef JSONSL_INLINE
93
+ #if defined(_MSC_VER)
94
+ #define JSONSL_INLINE __inline
95
+ #elif defined(__GNUC__)
96
+ #define JSONSL_INLINE __inline__
97
+ #else
98
+ #define JSONSL_INLINE inline
99
+ #endif /* _MSC_VER or __GNUC__ */
100
+ #endif /* JSONSL_INLINE */
101
+
102
+ #define JSONSL_MAX_LEVELS 512
103
+
104
+ struct jsonsl_st;
105
+ typedef struct jsonsl_st *jsonsl_t;
106
+
107
+ typedef struct jsonsl_jpr_st* jsonsl_jpr_t;
108
+
109
+ /**
110
+ * This flag is true when AND'd against a type whose value
111
+ * must be in "quoutes" i.e. T_HKEY and T_STRING
112
+ */
113
+ #define JSONSL_Tf_STRINGY 0xffff00
114
+
115
+ /**
116
+ * Constant representing the special JSON types.
117
+ * The values are special and aid in speed (the OBJECT and LIST
118
+ * values are the char literals of their openings).
119
+ *
120
+ * Their actual value is a character which attempts to resemble
121
+ * some mnemonic reference to the actual type.
122
+ *
123
+ * If new types are added, they must fit into the ASCII printable
124
+ * range (so they should be AND'd with 0x7f and yield something
125
+ * meaningful)
126
+ */
127
+ #define JSONSL_XTYPE \
128
+ X(STRING, '"'|JSONSL_Tf_STRINGY) \
129
+ X(HKEY, '#'|JSONSL_Tf_STRINGY) \
130
+ X(OBJECT, '{') \
131
+ X(LIST, '[') \
132
+ X(SPECIAL, '^') \
133
+ X(UESCAPE, 'u')
134
+ typedef enum {
135
+ #define X(o, c) \
136
+ JSONSL_T_##o = c,
137
+ JSONSL_XTYPE
138
+ JSONSL_T_UNKNOWN = '?',
139
+ /* Abstract 'root' object */
140
+ JSONSL_T_ROOT = 0
141
+ #undef X
142
+ } jsonsl_type_t;
143
+
144
+ /**
145
+ * Subtypes for T_SPECIAL. We define them as flags
146
+ * because more than one type can be applied to a
147
+ * given object.
148
+ */
149
+
150
+ #define JSONSL_XSPECIAL \
151
+ X(NONE, 0) \
152
+ X(SIGNED, 1<<0) \
153
+ X(UNSIGNED, 1<<1) \
154
+ X(TRUE, 1<<2) \
155
+ X(FALSE, 1<<3) \
156
+ X(NULL, 1<<4) \
157
+ X(FLOAT, 1<<5) \
158
+ X(EXPONENT, 1<<6) \
159
+ X(NONASCII, 1<<7) \
160
+ X(NAN, 1<<8) \
161
+ X(INF, 1<<9)
162
+ typedef enum {
163
+ #define X(o,b) \
164
+ JSONSL_SPECIALf_##o = b,
165
+ JSONSL_XSPECIAL
166
+ #undef X
167
+ /* Handy flags for checking */
168
+
169
+ JSONSL_SPECIALf_UNKNOWN = 1 << 10,
170
+
171
+ /** @private Private */
172
+ JSONSL_SPECIALf_ZERO = 1 << 11 | JSONSL_SPECIALf_UNSIGNED,
173
+ /** @private */
174
+ JSONSL_SPECIALf_DASH = 1 << 12,
175
+ /** @private */
176
+ JSONSL_SPECIALf_POS_INF = (JSONSL_SPECIALf_INF),
177
+ JSONSL_SPECIALf_NEG_INF = (JSONSL_SPECIALf_INF|JSONSL_SPECIALf_SIGNED),
178
+
179
+ /** Type is numeric */
180
+ JSONSL_SPECIALf_NUMERIC = (JSONSL_SPECIALf_SIGNED| JSONSL_SPECIALf_UNSIGNED),
181
+
182
+ /** Type is a boolean */
183
+ JSONSL_SPECIALf_BOOLEAN = (JSONSL_SPECIALf_TRUE|JSONSL_SPECIALf_FALSE),
184
+
185
+ /** Type is an "extended", not integral type (but numeric) */
186
+ JSONSL_SPECIALf_NUMNOINT =
187
+ (JSONSL_SPECIALf_FLOAT|JSONSL_SPECIALf_EXPONENT|JSONSL_SPECIALf_NAN
188
+ |JSONSL_SPECIALf_INF)
189
+ } jsonsl_special_t;
190
+
191
+
192
+ /**
193
+ * These are the various types of stack (or other) events
194
+ * which will trigger a callback.
195
+ * Like the type constants, this are also mnemonic
196
+ */
197
+ #define JSONSL_XACTION \
198
+ X(PUSH, '+') \
199
+ X(POP, '-') \
200
+ X(UESCAPE, 'U') \
201
+ X(ERROR, '!')
202
+ typedef enum {
203
+ #define X(a,c) \
204
+ JSONSL_ACTION_##a = c,
205
+ JSONSL_XACTION
206
+ JSONSL_ACTION_UNKNOWN = '?'
207
+ #undef X
208
+ } jsonsl_action_t;
209
+
210
+
211
+ /**
212
+ * Various errors which may be thrown while parsing JSON
213
+ */
214
+ #define JSONSL_XERR \
215
+ /* Trailing garbage characters */ \
216
+ X(GARBAGE_TRAILING) \
217
+ /* We were expecting a 'special' (numeric, true, false, null) */ \
218
+ X(SPECIAL_EXPECTED) \
219
+ /* The 'special' value was incomplete */ \
220
+ X(SPECIAL_INCOMPLETE) \
221
+ /* Found a stray token */ \
222
+ X(STRAY_TOKEN) \
223
+ /* We were expecting a token before this one */ \
224
+ X(MISSING_TOKEN) \
225
+ /* Cannot insert because the container is not ready */ \
226
+ X(CANT_INSERT) \
227
+ /* Found a '\' outside a string */ \
228
+ X(ESCAPE_OUTSIDE_STRING) \
229
+ /* Found a ':' outside of a hash */ \
230
+ X(KEY_OUTSIDE_OBJECT) \
231
+ /* found a string outside of a container */ \
232
+ X(STRING_OUTSIDE_CONTAINER) \
233
+ /* Found a null byte in middle of string */ \
234
+ X(FOUND_NULL_BYTE) \
235
+ /* Current level exceeds limit specified in constructor */ \
236
+ X(LEVELS_EXCEEDED) \
237
+ /* Got a } as a result of an opening [ or vice versa */ \
238
+ X(BRACKET_MISMATCH) \
239
+ /* We expected a key, but got something else instead */ \
240
+ X(HKEY_EXPECTED) \
241
+ /* We got an illegal control character (bad whitespace or something) */ \
242
+ X(WEIRD_WHITESPACE) \
243
+ /* Found a \u-escape, but there were less than 4 following hex digits */ \
244
+ X(UESCAPE_TOOSHORT) \
245
+ /* Invalid two-character escape */ \
246
+ X(ESCAPE_INVALID) \
247
+ /* Trailing comma */ \
248
+ X(TRAILING_COMMA) \
249
+ /* An invalid number was passed in a numeric field */ \
250
+ X(INVALID_NUMBER) \
251
+ /* Value is missing for object */ \
252
+ X(VALUE_EXPECTED) \
253
+ /* The following are for JPR Stuff */ \
254
+ \
255
+ /* Found a literal '%' but it was only followed by a single valid hex digit */ \
256
+ X(PERCENT_BADHEX) \
257
+ /* jsonpointer URI is malformed '/' */ \
258
+ X(JPR_BADPATH) \
259
+ /* Duplicate slash */ \
260
+ X(JPR_DUPSLASH) \
261
+ /* No leading root */ \
262
+ X(JPR_NOROOT) \
263
+ /* Allocation failure */ \
264
+ X(ENOMEM) \
265
+ /* Invalid unicode codepoint detected (in case of escapes) */ \
266
+ X(INVALID_CODEPOINT)
267
+
268
+ typedef enum {
269
+ JSONSL_ERROR_SUCCESS = 0,
270
+ #define X(e) \
271
+ JSONSL_ERROR_##e,
272
+ JSONSL_XERR
273
+ #undef X
274
+ JSONSL_ERROR_GENERIC
275
+ } jsonsl_error_t;
276
+
277
+
278
+ /**
279
+ * A state is a single level of the stack.
280
+ * Non-private data (i.e. the 'data' field, see the STATE_GENERIC section)
281
+ * will remain in tact until the item is popped.
282
+ *
283
+ * As a result, it means a parent state object may be accessed from a child
284
+ * object, (the parents fields will all be valid). This allows a user to create
285
+ * an ad-hoc hierarchy on top of the JSON one.
286
+ *
287
+ */
288
+ struct jsonsl_state_st {
289
+ /**
290
+ * The JSON object type
291
+ */
292
+ unsigned type;
293
+
294
+ /** If this element is special, then its extended type is here */
295
+ unsigned special_flags;
296
+
297
+ /**
298
+ * The position (in terms of number of bytes since the first call to
299
+ * jsonsl_feed()) at which the state was first pushed. This includes
300
+ * opening tokens, if applicable.
301
+ *
302
+ * @note For strings (i.e. type & JSONSL_Tf_STRINGY is nonzero) this will
303
+ * be the position of the first quote.
304
+ *
305
+ * @see jsonsl_st::pos which contains the _current_ position and can be
306
+ * used during a POP callback to get the length of the element.
307
+ */
308
+ size_t pos_begin;
309
+
310
+ /**FIXME: This is redundant as the same information can be derived from
311
+ * jsonsl_st::pos at pop-time */
312
+ size_t pos_cur;
313
+
314
+ /**
315
+ * Level of recursion into nesting. This is mainly a convenience
316
+ * variable, as this can technically be deduced from the lexer's
317
+ * level parameter (though the logic is not that simple)
318
+ */
319
+ unsigned int level;
320
+
321
+
322
+ /**
323
+ * how many elements in the object/list.
324
+ * For objects (hashes), an element is either
325
+ * a key or a value. Thus for one complete pair,
326
+ * nelem will be 2.
327
+ *
328
+ * For special types, this will hold the sum of the digits.
329
+ * This only holds true for values which are simple signed/unsigned
330
+ * numbers. Otherwise a special flag is set, and extra handling is not
331
+ * performed.
332
+ */
333
+ uint64_t nelem;
334
+
335
+
336
+
337
+ /*TODO: merge this and special_flags into a union */
338
+
339
+
340
+ /**
341
+ * Useful for an opening nest, this will prevent a callback from being
342
+ * invoked on this item or any of its children
343
+ */
344
+ int ignore_callback;
345
+
346
+ /**
347
+ * Counter which is incremented each time an escape ('\') is encountered.
348
+ * This is used internally for non-string types and should only be
349
+ * inspected by the user if the state actually represents a string
350
+ * type.
351
+ */
352
+ unsigned int nescapes;
353
+
354
+ /**
355
+ * Put anything you want here. if JSONSL_STATE_USER_FIELDS is here, then
356
+ * the macro expansion happens here.
357
+ *
358
+ * You can use these fields to store hierarchical or 'tagging' information
359
+ * for specific objects.
360
+ *
361
+ * See the documentation above for the lifetime of the state object (i.e.
362
+ * if the private data points to allocated memory, it should be freed
363
+ * when the object is popped, as the state object will be re-used)
364
+ */
365
+ #ifndef JSONSL_STATE_GENERIC
366
+ JSONSL_STATE_USER_FIELDS
367
+ #else
368
+
369
+ /**
370
+ * Otherwise, this is a simple void * pointer for anything you want
371
+ */
372
+ void *data;
373
+ #endif /* JSONSL_STATE_USER_FIELDS */
374
+ };
375
+
376
+ /**Gets the number of elements in the list.
377
+ * @param st The state. Must be of type JSONSL_T_LIST
378
+ * @return number of elements in the list
379
+ */
380
+ #define JSONSL_LIST_SIZE(st) ((st)->nelem)
381
+
382
+ /**Gets the number of key-value pairs in an object
383
+ * @param st The state. Must be of type JSONSL_T_OBJECT
384
+ * @return the number of key-value pairs in the object
385
+ */
386
+ #define JSONSL_OBJECT_SIZE(st) ((st)->nelem / 2)
387
+
388
+ /**Gets the numeric value.
389
+ * @param st The state. Must be of type JSONSL_T_SPECIAL and
390
+ * special_flags must have the JSONSL_SPECIALf_NUMERIC flag
391
+ * set.
392
+ * @return the numeric value of the state.
393
+ */
394
+ #define JSONSL_NUMERIC_VALUE(st) ((st)->nelem)
395
+
396
+ /*
397
+ * So now we need some special structure for keeping the
398
+ * JPR info in sync. Preferrably all in a single block
399
+ * of memory (there's no need for separate allocations.
400
+ * So we will define a 'table' with the following layout
401
+ *
402
+ * Level nPosbl JPR1_last JPR2_last JPR3_last
403
+ *
404
+ * 0 1 NOMATCH POSSIBLE POSSIBLE
405
+ * 1 0 NOMATCH NOMATCH COMPLETE
406
+ * [ table ends here because no further path is possible]
407
+ *
408
+ * Where the JPR..n corresponds to the number of JPRs
409
+ * requested, and nPosble is a quick flag to determine
410
+ *
411
+ * the number of possibilities. In the future this might
412
+ * be made into a proper 'jump' table,
413
+ *
414
+ * Since we always mark JPRs from the higher levels descending
415
+ * into the lower ones, a prospective child match would first
416
+ * look at the parent table to check the possibilities, and then
417
+ * see which ones were possible..
418
+ *
419
+ * Thus, the size of this blob would be (and these are all ints here)
420
+ * nLevels * nJPR * 2.
421
+ *
422
+ * the 'Width' of the table would be nJPR*2, and the 'height' would be
423
+ * nlevels
424
+ */
425
+
426
+ /**
427
+ * This is called when a stack change ocurs.
428
+ *
429
+ * @param jsn The lexer
430
+ * @param action The type of action, this can be PUSH or POP
431
+ * @param state A pointer to the stack currently affected by the action
432
+ * @param at A pointer to the position of the input buffer which triggered
433
+ * this action.
434
+ */
435
+ typedef void (*jsonsl_stack_callback)(
436
+ jsonsl_t jsn,
437
+ jsonsl_action_t action,
438
+ struct jsonsl_state_st* state,
439
+ const jsonsl_char_t *at);
440
+
441
+
442
+ /**
443
+ * This is called when an error is encountered.
444
+ * Sometimes it's possible to 'erase' characters (by replacing them
445
+ * with whitespace). If you think you have corrected the error, you
446
+ * can return a true value, in which case the parser will backtrack
447
+ * and try again.
448
+ *
449
+ * @param jsn The lexer
450
+ * @param error The error which was thrown
451
+ * @param state the current state
452
+ * @param a pointer to the position of the input buffer which triggered
453
+ * the error. Note that this is not const, this is because you have the
454
+ * possibility of modifying the character in an attempt to correct the
455
+ * error
456
+ *
457
+ * @return zero to bail, nonzero to try again (this only makes sense if
458
+ * the input buffer has been modified by this callback)
459
+ */
460
+ typedef int (*jsonsl_error_callback)(
461
+ jsonsl_t jsn,
462
+ jsonsl_error_t error,
463
+ struct jsonsl_state_st* state,
464
+ jsonsl_char_t *at);
465
+
466
+ struct jsonsl_st {
467
+ /** Public, read-only */
468
+
469
+ /** This is the current level of the stack */
470
+ unsigned int level;
471
+
472
+ /** Flag set to indicate we should stop processing */
473
+ unsigned int stopfl;
474
+
475
+ /**
476
+ * This is the current position, relative to the beginning
477
+ * of the stream.
478
+ */
479
+ size_t pos;
480
+
481
+ /** This is the 'bytes' variable passed to feed() */
482
+ const jsonsl_char_t *base;
483
+
484
+ /** Callback invoked for PUSH actions */
485
+ jsonsl_stack_callback action_callback_PUSH;
486
+
487
+ /** Callback invoked for POP actions */
488
+ jsonsl_stack_callback action_callback_POP;
489
+
490
+ /** Default callback for any action, if neither PUSH or POP callbacks are defined */
491
+ jsonsl_stack_callback action_callback;
492
+
493
+ /**
494
+ * Do not invoke callbacks for objects deeper than this level.
495
+ * NOTE: This field establishes the lower bound for ignored callbacks,
496
+ * and is thus misnamed. `min_ignore_level` would actually make more
497
+ * sense, but we don't want to break API.
498
+ */
499
+ unsigned int max_callback_level;
500
+
501
+ /** The error callback. Invoked when an error happens. Should not be NULL */
502
+ jsonsl_error_callback error_callback;
503
+
504
+ /* these are boolean flags you can modify. You will be called
505
+ * about notification for each of these types if the corresponding
506
+ * variable is true.
507
+ */
508
+
509
+ /**
510
+ * @name Callback Booleans.
511
+ * These determine whether a callback is to be invoked for certain types of objects
512
+ * @{*/
513
+
514
+ /** Boolean flag to enable or disable the invokcation for events on this type*/
515
+ int call_SPECIAL;
516
+ int call_OBJECT;
517
+ int call_LIST;
518
+ int call_STRING;
519
+ int call_HKEY;
520
+ /*@}*/
521
+
522
+ /**
523
+ * @name u-Escape handling
524
+ * Special handling for the \\u-f00d type sequences. These are meant
525
+ * to be translated back into the corresponding octet(s).
526
+ * A special callback (if set) is invoked with *at=='u'. An application
527
+ * may wish to temporarily suspend parsing and handle the 'u-' sequence
528
+ * internally (or not).
529
+ */
530
+
531
+ /*@{*/
532
+
533
+ /** Callback to be invoked for a u-escape */
534
+ jsonsl_stack_callback action_callback_UESCAPE;
535
+
536
+ /** Boolean flag, whether to invoke the callback */
537
+ int call_UESCAPE;
538
+
539
+ /** Boolean flag, whether we should return after encountering a u-escape:
540
+ * the callback is invoked and then we return if this is true
541
+ */
542
+ int return_UESCAPE;
543
+ /*@}*/
544
+
545
+ struct {
546
+ int allow_trailing_comma;
547
+ } options;
548
+
549
+ /** Put anything here */
550
+ void *data;
551
+
552
+ /*@{*/
553
+ /** Private */
554
+ int in_escape;
555
+ char expecting;
556
+ char tok_last;
557
+ int can_insert;
558
+ unsigned int levels_max;
559
+
560
+ #ifndef JSONSL_NO_JPR
561
+ size_t jpr_count;
562
+ jsonsl_jpr_t *jprs;
563
+
564
+ /* Root pointer for JPR matching information */
565
+ size_t *jpr_root;
566
+ #endif /* JSONSL_NO_JPR */
567
+ /*@}*/
568
+
569
+ /**
570
+ * This is the stack. Its upper bound is levels_max, or the
571
+ * nlevels argument passed to jsonsl_new. If you modify this structure,
572
+ * make sure that this member is last.
573
+ */
574
+ struct jsonsl_state_st stack[1];
575
+ };
576
+
577
+
578
+ /**
579
+ * Creates a new lexer object, with capacity for recursion up to nlevels
580
+ *
581
+ * @param nlevels maximum recursion depth
582
+ */
583
+ JSONSL_API
584
+ jsonsl_t jsonsl_new(int nlevels);
585
+
586
+ /**
587
+ * Feeds data into the lexer.
588
+ *
589
+ * @param jsn the lexer object
590
+ * @param bytes new data to be fed
591
+ * @param nbytes size of new data
592
+ */
593
+ JSONSL_API
594
+ void jsonsl_feed(jsonsl_t jsn, const jsonsl_char_t *bytes, size_t nbytes);
595
+
596
+ /**
597
+ * Resets the internal parser state. This does not free the parser
598
+ * but does clean it internally, so that the next time feed() is called,
599
+ * it will be treated as a new stream
600
+ *
601
+ * @param jsn the lexer
602
+ */
603
+ JSONSL_API
604
+ void jsonsl_reset(jsonsl_t jsn);
605
+
606
+ /**
607
+ * Frees the lexer, cleaning any allocated memory taken
608
+ *
609
+ * @param jsn the lexer
610
+ */
611
+ JSONSL_API
612
+ void jsonsl_destroy(jsonsl_t jsn);
613
+
614
+ /**
615
+ * Gets the 'parent' element, given the current one
616
+ *
617
+ * @param jsn the lexer
618
+ * @param cur the current nest, which should be a struct jsonsl_nest_st
619
+ */
620
+ static JSONSL_INLINE
621
+ struct jsonsl_state_st *jsonsl_last_state(const jsonsl_t jsn,
622
+ const struct jsonsl_state_st *state)
623
+ {
624
+ /* Don't complain about overriding array bounds */
625
+ if (state->level > 1) {
626
+ return jsn->stack + state->level - 1;
627
+ } else {
628
+ return NULL;
629
+ }
630
+ }
631
+
632
+ /**
633
+ * Gets the state of the last fully consumed child of this parent. This is
634
+ * only valid in the parent's POP callback.
635
+ *
636
+ * @param the lexer
637
+ * @return A pointer to the child.
638
+ */
639
+ static JSONSL_INLINE
640
+ struct jsonsl_state_st *jsonsl_last_child(const jsonsl_t jsn,
641
+ const struct jsonsl_state_st *parent)
642
+ {
643
+ return jsn->stack + (parent->level + 1);
644
+ }
645
+
646
+ /**Call to instruct the parser to stop parsing and return. This is valid
647
+ * only from within a callback */
648
+ static JSONSL_INLINE
649
+ void jsonsl_stop(jsonsl_t jsn)
650
+ {
651
+ jsn->stopfl = 1;
652
+ }
653
+
654
+ /**
655
+ * This enables receiving callbacks on all events. Doesn't do
656
+ * anything special but helps avoid some boilerplate.
657
+ * This does not touch the UESCAPE callbacks or flags.
658
+ */
659
+ static JSONSL_INLINE
660
+ void jsonsl_enable_all_callbacks(jsonsl_t jsn)
661
+ {
662
+ jsn->call_HKEY = 1;
663
+ jsn->call_STRING = 1;
664
+ jsn->call_OBJECT = 1;
665
+ jsn->call_SPECIAL = 1;
666
+ jsn->call_LIST = 1;
667
+ }
668
+
669
+ /**
670
+ * A macro which returns true if the current state object can
671
+ * have children. This means a list type or an object type.
672
+ */
673
+ #define JSONSL_STATE_IS_CONTAINER(state) \
674
+ (state->type == JSONSL_T_OBJECT || state->type == JSONSL_T_LIST)
675
+
676
+ /**
677
+ * These two functions, dump a string representation
678
+ * of the error or type, respectively. They will never
679
+ * return NULL
680
+ */
681
+ JSONSL_API
682
+ const char* jsonsl_strerror(jsonsl_error_t err);
683
+ JSONSL_API
684
+ const char* jsonsl_strtype(jsonsl_type_t jt);
685
+
686
+ /**
687
+ * Dumps global metrics to the screen. This is a noop unless
688
+ * jsonsl was compiled with JSONSL_USE_METRICS
689
+ */
690
+ JSONSL_API
691
+ void jsonsl_dump_global_metrics(void);
692
+
693
+ /* This macro just here for editors to do code folding */
694
+ #ifndef JSONSL_NO_JPR
695
+
696
+ /**
697
+ * @name JSON Pointer API
698
+ *
699
+ * JSONPointer API. This isn't really related to the lexer (at least not yet)
700
+ * JSONPointer provides an extremely simple specification for providing
701
+ * locations within JSON objects. We will extend it a bit and allow for
702
+ * providing 'wildcard' characters by which to be able to 'query' the stream.
703
+ *
704
+ * See http://tools.ietf.org/html/draft-pbryan-zyp-json-pointer-00
705
+ *
706
+ * Currently I'm implementing the 'single query' API which can only use a single
707
+ * query component. In the future I will integrate my yet-to-be-published
708
+ * Boyer-Moore-esque prefix searching implementation, in order to allow
709
+ * multiple paths to be merged into one for quick and efficient searching.
710
+ *
711
+ *
712
+ * JPR (as we'll refer to it within the source) can be used by splitting
713
+ * the components into mutliple sections, and incrementally 'track' each
714
+ * component. When JSONSL delivers a 'pop' callback for a string, or a 'push'
715
+ * callback for an object, we will check to see whether the index matching
716
+ * the component corresponding to the current level contains a match
717
+ * for our path.
718
+ *
719
+ * In order to do this properly, a structure must be maintained within the
720
+ * parent indicating whether its children are possible matches. This flag
721
+ * will be 'inherited' by call children which may conform to the match
722
+ * specification, and discarded by all which do not (thereby eliminating
723
+ * their children from inheriting it).
724
+ *
725
+ * A successful match is a complete one. One can provide multiple paths with
726
+ * multiple levels of matches e.g.
727
+ * /foo/bar/baz/^/blah
728
+ *
729
+ * @{
730
+ */
731
+
732
+ /** The wildcard character */
733
+ #ifndef JSONSL_PATH_WILDCARD_CHAR
734
+ #define JSONSL_PATH_WILDCARD_CHAR '^'
735
+ #endif /* WILDCARD_CHAR */
736
+
737
+ #define JSONSL_XMATCH \
738
+ X(COMPLETE,1) \
739
+ X(POSSIBLE,0) \
740
+ X(NOMATCH,-1) \
741
+ X(TYPE_MISMATCH, -2)
742
+
743
+ typedef enum {
744
+
745
+ #define X(T,v) \
746
+ JSONSL_MATCH_##T = v,
747
+ JSONSL_XMATCH
748
+
749
+ #undef X
750
+ JSONSL_MATCH_UNKNOWN
751
+ } jsonsl_jpr_match_t;
752
+
753
+ typedef enum {
754
+ JSONSL_PATH_STRING = 1,
755
+ JSONSL_PATH_WILDCARD,
756
+ JSONSL_PATH_NUMERIC,
757
+ JSONSL_PATH_ROOT,
758
+
759
+ /* Special */
760
+ JSONSL_PATH_INVALID = -1,
761
+ JSONSL_PATH_NONE = 0
762
+ } jsonsl_jpr_type_t;
763
+
764
+ struct jsonsl_jpr_component_st {
765
+ /** The string the component points to */
766
+ char *pstr;
767
+ /** if this is a numeric type, the number is 'cached' here */
768
+ unsigned long idx;
769
+ /** The length of the string */
770
+ size_t len;
771
+ /** The type of component (NUMERIC or STRING) */
772
+ jsonsl_jpr_type_t ptype;
773
+
774
+ /** Set this to true to enforce type checking between dict keys and array
775
+ * indices. jsonsl_jpr_match() will return TYPE_MISMATCH if it detects
776
+ * that an array index is actually a child of a dictionary. */
777
+ short is_arridx;
778
+
779
+ /* Extra fields (for more advanced searches. Default is empty) */
780
+ JSONSL_JPR_COMPONENT_USER_FIELDS
781
+ };
782
+
783
+ struct jsonsl_jpr_st {
784
+ /** Path components */
785
+ struct jsonsl_jpr_component_st *components;
786
+ size_t ncomponents;
787
+
788
+ /**Type of the match to be expected. If nonzero, will be compared against
789
+ * the actual type */
790
+ unsigned match_type;
791
+
792
+ /** Base of allocated string for components */
793
+ char *basestr;
794
+
795
+ /** The original match string. Useful for returning to the user */
796
+ char *orig;
797
+ size_t norig;
798
+ };
799
+
800
+ /**
801
+ * Create a new JPR object.
802
+ *
803
+ * @param path the JSONPointer path specification.
804
+ * @param errp a pointer to a jsonsl_error_t. If this function returns NULL,
805
+ * then more details will be in this variable.
806
+ *
807
+ * @return a new jsonsl_jpr_t object, or NULL on error.
808
+ */
809
+ JSONSL_API
810
+ jsonsl_jpr_t jsonsl_jpr_new(const char *path, jsonsl_error_t *errp);
811
+
812
+ /**
813
+ * Destroy a JPR object
814
+ */
815
+ JSONSL_API
816
+ void jsonsl_jpr_destroy(jsonsl_jpr_t jpr);
817
+
818
+ /**
819
+ * Match a JSON object against a type and specific level
820
+ *
821
+ * @param jpr the JPR object
822
+ * @param parent_type the type of the parent (should be T_LIST or T_OBJECT)
823
+ * @param parent_level the level of the parent
824
+ * @param key the 'key' of the child. If the parent is an array, this should be
825
+ * empty.
826
+ * @param nkey - the length of the key. If the parent is an array (T_LIST), then
827
+ * this should be the current index.
828
+ *
829
+ * NOTE: The key of the child means any kind of associative data related to the
830
+ * element. Thus: <<< { "foo" : [ >>,
831
+ * the opening array's key is "foo".
832
+ *
833
+ * @return a status constant. This indicates whether a match was excluded, possible,
834
+ * or successful.
835
+ */
836
+ JSONSL_API
837
+ jsonsl_jpr_match_t jsonsl_jpr_match(jsonsl_jpr_t jpr,
838
+ unsigned int parent_type,
839
+ unsigned int parent_level,
840
+ const char *key, size_t nkey);
841
+
842
+ /**
843
+ * Alternate matching algorithm. This matching algorithm does not use
844
+ * JSONPointer but relies on a more structured searching mechanism. It
845
+ * assumes that there is a clear distinction between array indices and
846
+ * object keys. In this case, the jsonsl_path_component_st::ptype should
847
+ * be set to @ref JSONSL_PATH_NUMERIC for an array index (the
848
+ * jsonsl_path_comonent_st::is_arridx field will be removed in a future
849
+ * version).
850
+ *
851
+ * @param jpr The path
852
+ * @param parent The parent structure. Can be NULL if this is the root object
853
+ * @param child The child structure. Should not be NULL
854
+ * @param key Object key, if an object
855
+ * @param nkey Length of object key
856
+ * @return Status constant if successful
857
+ *
858
+ * @note
859
+ * For successful matching, both the key and the path itself should be normalized
860
+ * to contain 'proper' utf8 sequences rather than utf16 '\uXXXX' escapes. This
861
+ * should currently be done in the application. Another version of this function
862
+ * may use a temporary buffer in such circumstances (allocated by the application).
863
+ *
864
+ * Since this function also checks the state of the child, it should only
865
+ * be called on PUSH callbacks, and not POP callbacks
866
+ */
867
+ JSONSL_API
868
+ jsonsl_jpr_match_t
869
+ jsonsl_path_match(jsonsl_jpr_t jpr,
870
+ const struct jsonsl_state_st *parent,
871
+ const struct jsonsl_state_st *child,
872
+ const char *key, size_t nkey);
873
+
874
+
875
+ /**
876
+ * Associate a set of JPR objects with a lexer instance.
877
+ * This should be called before the lexer has been fed any data (and
878
+ * behavior is undefined if you don't adhere to this).
879
+ *
880
+ * After using this function, you may subsequently call match_state() on
881
+ * given states (presumably from within the callbacks).
882
+ *
883
+ * Note that currently the first JPR is the quickest and comes
884
+ * pre-allocated with the state structure. Further JPR objects
885
+ * are chained.
886
+ *
887
+ * @param jsn The lexer
888
+ * @param jprs An array of jsonsl_jpr_t objects
889
+ * @param njprs How many elements in the jprs array.
890
+ */
891
+ JSONSL_API
892
+ void jsonsl_jpr_match_state_init(jsonsl_t jsn,
893
+ jsonsl_jpr_t *jprs,
894
+ size_t njprs);
895
+
896
+ /**
897
+ * This follows the same semantics as the normal match,
898
+ * except we infer parent and type information from the relevant state objects.
899
+ * The match status (for all possible JPR objects) is set in the *out parameter.
900
+ *
901
+ * If a match has succeeded, then its JPR object will be returned. In all other
902
+ * instances, NULL is returned;
903
+ *
904
+ * @param jpr The jsonsl_jpr_t handle
905
+ * @param state The jsonsl_state_st which is a candidate
906
+ * @param key The hash key (if applicable, can be NULL if parent is list)
907
+ * @param nkey Length of hash key (if applicable, can be zero if parent is list)
908
+ * @param out A pointer to a jsonsl_jpr_match_t. This will be populated with
909
+ * the match result
910
+ *
911
+ * @return If a match was completed in full, then the JPR object containing
912
+ * the matching path will be returned. Otherwise, the return is NULL (note, this
913
+ * does not mean matching has failed, it can still be part of the match: check
914
+ * the out parameter).
915
+ */
916
+ JSONSL_API
917
+ jsonsl_jpr_t jsonsl_jpr_match_state(jsonsl_t jsn,
918
+ struct jsonsl_state_st *state,
919
+ const char *key,
920
+ size_t nkey,
921
+ jsonsl_jpr_match_t *out);
922
+
923
+
924
+ /**
925
+ * Cleanup any memory allocated and any states set by
926
+ * match_state_init() and match_state()
927
+ * @param jsn The lexer
928
+ */
929
+ JSONSL_API
930
+ void jsonsl_jpr_match_state_cleanup(jsonsl_t jsn);
931
+
932
+ /**
933
+ * Return a string representation of the match result returned by match()
934
+ */
935
+ JSONSL_API
936
+ const char *jsonsl_strmatchtype(jsonsl_jpr_match_t match);
937
+
938
+ /* @}*/
939
+
940
+ /**
941
+ * Utility function to convert escape sequences into their original form.
942
+ *
943
+ * The decoders I've sampled do not seem to specify a standard behavior of what
944
+ * to escape/unescape.
945
+ *
946
+ * RFC 4627 Mandates only that the quoute, backslash, and ASCII control
947
+ * characters (0x00-0x1f) be escaped. It is often common for applications
948
+ * to escape a '/' - however this may also be desired behavior. the JSON
949
+ * spec is not clear on this, and therefore jsonsl leaves it up to you.
950
+ *
951
+ * Additionally, sometimes you may wish to _normalize_ JSON. This is specifically
952
+ * true when dealing with 'u-escapes' which can be expressed perfectly fine
953
+ * as utf8. One use case for normalization is JPR string comparison, in which
954
+ * case two effectively equivalent strings may not match because one is using
955
+ * u-escapes and the other proper utf8. To normalize u-escapes only, pass in
956
+ * an empty `toEscape` table, enabling only the `u` index.
957
+ *
958
+ * @param in The input string.
959
+ * @param out An allocated output (should be the same size as in)
960
+ * @param len the size of the buffer
961
+ * @param toEscape - A sparse array of characters to unescape. Characters
962
+ * which are not present in this array, e.g. toEscape['c'] == 0 will be
963
+ * ignored and passed to the output in their original form.
964
+ * @param oflags If not null, and a \uXXXX escape expands to a non-ascii byte,
965
+ * then this variable will have the SPECIALf_NONASCII flag on.
966
+ *
967
+ * @param err A pointer to an error variable. If an error ocurrs, it will be
968
+ * set in this variable
969
+ * @param errat If not null and an error occurs, this will be set to point
970
+ * to the position within the string at which the offending character was
971
+ * encountered.
972
+ *
973
+ * @return The effective size of the output buffer.
974
+ *
975
+ * @note
976
+ * This function now encodes the UTF8 equivalents of utf16 escapes (i.e.
977
+ * 'u-escapes'). Previously this would encode the escapes as utf16 literals,
978
+ * which while still correct in some sense was confusing for many (especially
979
+ * considering that the inputs were variations of char).
980
+ *
981
+ * @note
982
+ * The output buffer will never be larger than the input buffer, since
983
+ * standard escape sequences (i.e. '\t') occupy two bytes in the source
984
+ * but only one byte (when unescaped) in the output. Likewise u-escapes
985
+ * (i.e. \uXXXX) will occupy six bytes in the source, but at the most
986
+ * two bytes when escaped.
987
+ */
988
+ JSONSL_API
989
+ size_t jsonsl_util_unescape_ex(const char *in,
990
+ char *out,
991
+ size_t len,
992
+ const int toEscape[128],
993
+ unsigned *oflags,
994
+ jsonsl_error_t *err,
995
+ const char **errat);
996
+
997
+ /**
998
+ * Convenience macro to avoid passing too many parameters
999
+ */
1000
+ #define jsonsl_util_unescape(in, out, len, toEscape, err) \
1001
+ jsonsl_util_unescape_ex(in, out, len, toEscape, NULL, err, NULL)
1002
+
1003
+ #endif /* JSONSL_NO_JPR */
1004
+
1005
+ #ifdef __cplusplus
1006
+ }
1007
+ #endif /* __cplusplus */
1008
+
1009
+ #endif /* JSONSL_H_ */