jsonsl 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1009 @@
1
+ /**
2
+ * JSON Simple/Stacked/Stateful Lexer.
3
+ * - Does not buffer data
4
+ * - Maintains state
5
+ * - Callback oriented
6
+ * - Lightweight and fast. One source file and one header file
7
+ *
8
+ * Copyright (C) 2012-2015 Mark Nunberg
9
+ * See included LICENSE file for license details.
10
+ */
11
+
12
+ #ifndef JSONSL_H_
13
+ #define JSONSL_H_
14
+
15
+ #include <ruby.h>
16
+ #define JSONSL_STATE_USER_FIELDS \
17
+ VALUE val; \
18
+ VALUE pkey;
19
+
20
+ #include <stdio.h>
21
+ #include <stdlib.h>
22
+ #include <stddef.h>
23
+ #include <string.h>
24
+ #include <sys/types.h>
25
+ #include <wchar.h>
26
+
27
+ #ifdef __cplusplus
28
+ extern "C" {
29
+ #endif /* __cplusplus */
30
+
31
+ #ifdef JSONSL_USE_WCHAR
32
+ typedef jsonsl_char_t wchar_t;
33
+ typedef jsonsl_uchar_t unsigned wchar_t;
34
+ #else
35
+ typedef char jsonsl_char_t;
36
+ typedef unsigned char jsonsl_uchar_t;
37
+ #endif /* JSONSL_USE_WCHAR */
38
+
39
+ #ifdef JSONSL_PARSE_NAN
40
+ #define JSONSL__NAN_PROXY JSONSL_SPECIALf_NAN
41
+ #define JSONSL__INF_PROXY JSONSL_SPECIALf_INF
42
+ #else
43
+ #define JSONSL__NAN_PROXY 0
44
+ #define JSONSL__INF_PROXY 0
45
+ #endif
46
+
47
+ /* Stolen from http-parser.h, and possibly others */
48
+ #if defined(_WIN32) && !defined(__MINGW32__) && (!defined(_MSC_VER) || _MSC_VER<1600)
49
+ typedef __int8 int8_t;
50
+ typedef unsigned __int8 uint8_t;
51
+ typedef __int16 int16_t;
52
+ typedef unsigned __int16 uint16_t;
53
+ typedef __int32 int32_t;
54
+ typedef unsigned __int32 uint32_t;
55
+ typedef __int64 int64_t;
56
+ typedef unsigned __int64 uint64_t;
57
+ #if !defined(_MSC_VER) || _MSC_VER<1400
58
+ typedef unsigned int size_t;
59
+ typedef int ssize_t;
60
+ #endif
61
+ #else
62
+ #include <stdint.h>
63
+ #endif
64
+
65
+
66
+ #if (!defined(JSONSL_STATE_GENERIC)) && (!defined(JSONSL_STATE_USER_FIELDS))
67
+ #define JSONSL_STATE_GENERIC
68
+ #endif /* !defined JSONSL_STATE_GENERIC */
69
+
70
+ #ifdef JSONSL_STATE_GENERIC
71
+ #define JSONSL_STATE_USER_FIELDS
72
+ #endif /* JSONSL_STATE_GENERIC */
73
+
74
+ /* Additional fields for component object */
75
+ #ifndef JSONSL_JPR_COMPONENT_USER_FIELDS
76
+ #define JSONSL_JPR_COMPONENT_USER_FIELDS
77
+ #endif
78
+
79
+ #ifndef JSONSL_API
80
+ /**
81
+ * We require a /DJSONSL_DLL so that users already using this as a static
82
+ * or embedded library don't get confused
83
+ */
84
+ #if defined(_WIN32) && defined(JSONSL_DLL)
85
+ #define JSONSL_API __declspec(dllexport)
86
+ #else
87
+ #define JSONSL_API
88
+ #endif /* _WIN32 */
89
+
90
+ #endif /* !JSONSL_API */
91
+
92
+ #ifndef JSONSL_INLINE
93
+ #if defined(_MSC_VER)
94
+ #define JSONSL_INLINE __inline
95
+ #elif defined(__GNUC__)
96
+ #define JSONSL_INLINE __inline__
97
+ #else
98
+ #define JSONSL_INLINE inline
99
+ #endif /* _MSC_VER or __GNUC__ */
100
+ #endif /* JSONSL_INLINE */
101
+
102
+ #define JSONSL_MAX_LEVELS 512
103
+
104
+ struct jsonsl_st;
105
+ typedef struct jsonsl_st *jsonsl_t;
106
+
107
+ typedef struct jsonsl_jpr_st* jsonsl_jpr_t;
108
+
109
+ /**
110
+ * This flag is true when AND'd against a type whose value
111
+ * must be in "quoutes" i.e. T_HKEY and T_STRING
112
+ */
113
+ #define JSONSL_Tf_STRINGY 0xffff00
114
+
115
+ /**
116
+ * Constant representing the special JSON types.
117
+ * The values are special and aid in speed (the OBJECT and LIST
118
+ * values are the char literals of their openings).
119
+ *
120
+ * Their actual value is a character which attempts to resemble
121
+ * some mnemonic reference to the actual type.
122
+ *
123
+ * If new types are added, they must fit into the ASCII printable
124
+ * range (so they should be AND'd with 0x7f and yield something
125
+ * meaningful)
126
+ */
127
+ #define JSONSL_XTYPE \
128
+ X(STRING, '"'|JSONSL_Tf_STRINGY) \
129
+ X(HKEY, '#'|JSONSL_Tf_STRINGY) \
130
+ X(OBJECT, '{') \
131
+ X(LIST, '[') \
132
+ X(SPECIAL, '^') \
133
+ X(UESCAPE, 'u')
134
+ typedef enum {
135
+ #define X(o, c) \
136
+ JSONSL_T_##o = c,
137
+ JSONSL_XTYPE
138
+ JSONSL_T_UNKNOWN = '?',
139
+ /* Abstract 'root' object */
140
+ JSONSL_T_ROOT = 0
141
+ #undef X
142
+ } jsonsl_type_t;
143
+
144
+ /**
145
+ * Subtypes for T_SPECIAL. We define them as flags
146
+ * because more than one type can be applied to a
147
+ * given object.
148
+ */
149
+
150
+ #define JSONSL_XSPECIAL \
151
+ X(NONE, 0) \
152
+ X(SIGNED, 1<<0) \
153
+ X(UNSIGNED, 1<<1) \
154
+ X(TRUE, 1<<2) \
155
+ X(FALSE, 1<<3) \
156
+ X(NULL, 1<<4) \
157
+ X(FLOAT, 1<<5) \
158
+ X(EXPONENT, 1<<6) \
159
+ X(NONASCII, 1<<7) \
160
+ X(NAN, 1<<8) \
161
+ X(INF, 1<<9)
162
+ typedef enum {
163
+ #define X(o,b) \
164
+ JSONSL_SPECIALf_##o = b,
165
+ JSONSL_XSPECIAL
166
+ #undef X
167
+ /* Handy flags for checking */
168
+
169
+ JSONSL_SPECIALf_UNKNOWN = 1 << 10,
170
+
171
+ /** @private Private */
172
+ JSONSL_SPECIALf_ZERO = 1 << 11 | JSONSL_SPECIALf_UNSIGNED,
173
+ /** @private */
174
+ JSONSL_SPECIALf_DASH = 1 << 12,
175
+ /** @private */
176
+ JSONSL_SPECIALf_POS_INF = (JSONSL_SPECIALf_INF),
177
+ JSONSL_SPECIALf_NEG_INF = (JSONSL_SPECIALf_INF|JSONSL_SPECIALf_SIGNED),
178
+
179
+ /** Type is numeric */
180
+ JSONSL_SPECIALf_NUMERIC = (JSONSL_SPECIALf_SIGNED| JSONSL_SPECIALf_UNSIGNED),
181
+
182
+ /** Type is a boolean */
183
+ JSONSL_SPECIALf_BOOLEAN = (JSONSL_SPECIALf_TRUE|JSONSL_SPECIALf_FALSE),
184
+
185
+ /** Type is an "extended", not integral type (but numeric) */
186
+ JSONSL_SPECIALf_NUMNOINT =
187
+ (JSONSL_SPECIALf_FLOAT|JSONSL_SPECIALf_EXPONENT|JSONSL_SPECIALf_NAN
188
+ |JSONSL_SPECIALf_INF)
189
+ } jsonsl_special_t;
190
+
191
+
192
+ /**
193
+ * These are the various types of stack (or other) events
194
+ * which will trigger a callback.
195
+ * Like the type constants, this are also mnemonic
196
+ */
197
+ #define JSONSL_XACTION \
198
+ X(PUSH, '+') \
199
+ X(POP, '-') \
200
+ X(UESCAPE, 'U') \
201
+ X(ERROR, '!')
202
+ typedef enum {
203
+ #define X(a,c) \
204
+ JSONSL_ACTION_##a = c,
205
+ JSONSL_XACTION
206
+ JSONSL_ACTION_UNKNOWN = '?'
207
+ #undef X
208
+ } jsonsl_action_t;
209
+
210
+
211
+ /**
212
+ * Various errors which may be thrown while parsing JSON
213
+ */
214
+ #define JSONSL_XERR \
215
+ /* Trailing garbage characters */ \
216
+ X(GARBAGE_TRAILING) \
217
+ /* We were expecting a 'special' (numeric, true, false, null) */ \
218
+ X(SPECIAL_EXPECTED) \
219
+ /* The 'special' value was incomplete */ \
220
+ X(SPECIAL_INCOMPLETE) \
221
+ /* Found a stray token */ \
222
+ X(STRAY_TOKEN) \
223
+ /* We were expecting a token before this one */ \
224
+ X(MISSING_TOKEN) \
225
+ /* Cannot insert because the container is not ready */ \
226
+ X(CANT_INSERT) \
227
+ /* Found a '\' outside a string */ \
228
+ X(ESCAPE_OUTSIDE_STRING) \
229
+ /* Found a ':' outside of a hash */ \
230
+ X(KEY_OUTSIDE_OBJECT) \
231
+ /* found a string outside of a container */ \
232
+ X(STRING_OUTSIDE_CONTAINER) \
233
+ /* Found a null byte in middle of string */ \
234
+ X(FOUND_NULL_BYTE) \
235
+ /* Current level exceeds limit specified in constructor */ \
236
+ X(LEVELS_EXCEEDED) \
237
+ /* Got a } as a result of an opening [ or vice versa */ \
238
+ X(BRACKET_MISMATCH) \
239
+ /* We expected a key, but got something else instead */ \
240
+ X(HKEY_EXPECTED) \
241
+ /* We got an illegal control character (bad whitespace or something) */ \
242
+ X(WEIRD_WHITESPACE) \
243
+ /* Found a \u-escape, but there were less than 4 following hex digits */ \
244
+ X(UESCAPE_TOOSHORT) \
245
+ /* Invalid two-character escape */ \
246
+ X(ESCAPE_INVALID) \
247
+ /* Trailing comma */ \
248
+ X(TRAILING_COMMA) \
249
+ /* An invalid number was passed in a numeric field */ \
250
+ X(INVALID_NUMBER) \
251
+ /* Value is missing for object */ \
252
+ X(VALUE_EXPECTED) \
253
+ /* The following are for JPR Stuff */ \
254
+ \
255
+ /* Found a literal '%' but it was only followed by a single valid hex digit */ \
256
+ X(PERCENT_BADHEX) \
257
+ /* jsonpointer URI is malformed '/' */ \
258
+ X(JPR_BADPATH) \
259
+ /* Duplicate slash */ \
260
+ X(JPR_DUPSLASH) \
261
+ /* No leading root */ \
262
+ X(JPR_NOROOT) \
263
+ /* Allocation failure */ \
264
+ X(ENOMEM) \
265
+ /* Invalid unicode codepoint detected (in case of escapes) */ \
266
+ X(INVALID_CODEPOINT)
267
+
268
+ typedef enum {
269
+ JSONSL_ERROR_SUCCESS = 0,
270
+ #define X(e) \
271
+ JSONSL_ERROR_##e,
272
+ JSONSL_XERR
273
+ #undef X
274
+ JSONSL_ERROR_GENERIC
275
+ } jsonsl_error_t;
276
+
277
+
278
+ /**
279
+ * A state is a single level of the stack.
280
+ * Non-private data (i.e. the 'data' field, see the STATE_GENERIC section)
281
+ * will remain in tact until the item is popped.
282
+ *
283
+ * As a result, it means a parent state object may be accessed from a child
284
+ * object, (the parents fields will all be valid). This allows a user to create
285
+ * an ad-hoc hierarchy on top of the JSON one.
286
+ *
287
+ */
288
+ struct jsonsl_state_st {
289
+ /**
290
+ * The JSON object type
291
+ */
292
+ unsigned type;
293
+
294
+ /** If this element is special, then its extended type is here */
295
+ unsigned special_flags;
296
+
297
+ /**
298
+ * The position (in terms of number of bytes since the first call to
299
+ * jsonsl_feed()) at which the state was first pushed. This includes
300
+ * opening tokens, if applicable.
301
+ *
302
+ * @note For strings (i.e. type & JSONSL_Tf_STRINGY is nonzero) this will
303
+ * be the position of the first quote.
304
+ *
305
+ * @see jsonsl_st::pos which contains the _current_ position and can be
306
+ * used during a POP callback to get the length of the element.
307
+ */
308
+ size_t pos_begin;
309
+
310
+ /**FIXME: This is redundant as the same information can be derived from
311
+ * jsonsl_st::pos at pop-time */
312
+ size_t pos_cur;
313
+
314
+ /**
315
+ * Level of recursion into nesting. This is mainly a convenience
316
+ * variable, as this can technically be deduced from the lexer's
317
+ * level parameter (though the logic is not that simple)
318
+ */
319
+ unsigned int level;
320
+
321
+
322
+ /**
323
+ * how many elements in the object/list.
324
+ * For objects (hashes), an element is either
325
+ * a key or a value. Thus for one complete pair,
326
+ * nelem will be 2.
327
+ *
328
+ * For special types, this will hold the sum of the digits.
329
+ * This only holds true for values which are simple signed/unsigned
330
+ * numbers. Otherwise a special flag is set, and extra handling is not
331
+ * performed.
332
+ */
333
+ uint64_t nelem;
334
+
335
+
336
+
337
+ /*TODO: merge this and special_flags into a union */
338
+
339
+
340
+ /**
341
+ * Useful for an opening nest, this will prevent a callback from being
342
+ * invoked on this item or any of its children
343
+ */
344
+ int ignore_callback;
345
+
346
+ /**
347
+ * Counter which is incremented each time an escape ('\') is encountered.
348
+ * This is used internally for non-string types and should only be
349
+ * inspected by the user if the state actually represents a string
350
+ * type.
351
+ */
352
+ unsigned int nescapes;
353
+
354
+ /**
355
+ * Put anything you want here. if JSONSL_STATE_USER_FIELDS is here, then
356
+ * the macro expansion happens here.
357
+ *
358
+ * You can use these fields to store hierarchical or 'tagging' information
359
+ * for specific objects.
360
+ *
361
+ * See the documentation above for the lifetime of the state object (i.e.
362
+ * if the private data points to allocated memory, it should be freed
363
+ * when the object is popped, as the state object will be re-used)
364
+ */
365
+ #ifndef JSONSL_STATE_GENERIC
366
+ JSONSL_STATE_USER_FIELDS
367
+ #else
368
+
369
+ /**
370
+ * Otherwise, this is a simple void * pointer for anything you want
371
+ */
372
+ void *data;
373
+ #endif /* JSONSL_STATE_USER_FIELDS */
374
+ };
375
+
376
+ /**Gets the number of elements in the list.
377
+ * @param st The state. Must be of type JSONSL_T_LIST
378
+ * @return number of elements in the list
379
+ */
380
+ #define JSONSL_LIST_SIZE(st) ((st)->nelem)
381
+
382
+ /**Gets the number of key-value pairs in an object
383
+ * @param st The state. Must be of type JSONSL_T_OBJECT
384
+ * @return the number of key-value pairs in the object
385
+ */
386
+ #define JSONSL_OBJECT_SIZE(st) ((st)->nelem / 2)
387
+
388
+ /**Gets the numeric value.
389
+ * @param st The state. Must be of type JSONSL_T_SPECIAL and
390
+ * special_flags must have the JSONSL_SPECIALf_NUMERIC flag
391
+ * set.
392
+ * @return the numeric value of the state.
393
+ */
394
+ #define JSONSL_NUMERIC_VALUE(st) ((st)->nelem)
395
+
396
+ /*
397
+ * So now we need some special structure for keeping the
398
+ * JPR info in sync. Preferrably all in a single block
399
+ * of memory (there's no need for separate allocations.
400
+ * So we will define a 'table' with the following layout
401
+ *
402
+ * Level nPosbl JPR1_last JPR2_last JPR3_last
403
+ *
404
+ * 0 1 NOMATCH POSSIBLE POSSIBLE
405
+ * 1 0 NOMATCH NOMATCH COMPLETE
406
+ * [ table ends here because no further path is possible]
407
+ *
408
+ * Where the JPR..n corresponds to the number of JPRs
409
+ * requested, and nPosble is a quick flag to determine
410
+ *
411
+ * the number of possibilities. In the future this might
412
+ * be made into a proper 'jump' table,
413
+ *
414
+ * Since we always mark JPRs from the higher levels descending
415
+ * into the lower ones, a prospective child match would first
416
+ * look at the parent table to check the possibilities, and then
417
+ * see which ones were possible..
418
+ *
419
+ * Thus, the size of this blob would be (and these are all ints here)
420
+ * nLevels * nJPR * 2.
421
+ *
422
+ * the 'Width' of the table would be nJPR*2, and the 'height' would be
423
+ * nlevels
424
+ */
425
+
426
+ /**
427
+ * This is called when a stack change ocurs.
428
+ *
429
+ * @param jsn The lexer
430
+ * @param action The type of action, this can be PUSH or POP
431
+ * @param state A pointer to the stack currently affected by the action
432
+ * @param at A pointer to the position of the input buffer which triggered
433
+ * this action.
434
+ */
435
+ typedef void (*jsonsl_stack_callback)(
436
+ jsonsl_t jsn,
437
+ jsonsl_action_t action,
438
+ struct jsonsl_state_st* state,
439
+ const jsonsl_char_t *at);
440
+
441
+
442
+ /**
443
+ * This is called when an error is encountered.
444
+ * Sometimes it's possible to 'erase' characters (by replacing them
445
+ * with whitespace). If you think you have corrected the error, you
446
+ * can return a true value, in which case the parser will backtrack
447
+ * and try again.
448
+ *
449
+ * @param jsn The lexer
450
+ * @param error The error which was thrown
451
+ * @param state the current state
452
+ * @param a pointer to the position of the input buffer which triggered
453
+ * the error. Note that this is not const, this is because you have the
454
+ * possibility of modifying the character in an attempt to correct the
455
+ * error
456
+ *
457
+ * @return zero to bail, nonzero to try again (this only makes sense if
458
+ * the input buffer has been modified by this callback)
459
+ */
460
+ typedef int (*jsonsl_error_callback)(
461
+ jsonsl_t jsn,
462
+ jsonsl_error_t error,
463
+ struct jsonsl_state_st* state,
464
+ jsonsl_char_t *at);
465
+
466
+ struct jsonsl_st {
467
+ /** Public, read-only */
468
+
469
+ /** This is the current level of the stack */
470
+ unsigned int level;
471
+
472
+ /** Flag set to indicate we should stop processing */
473
+ unsigned int stopfl;
474
+
475
+ /**
476
+ * This is the current position, relative to the beginning
477
+ * of the stream.
478
+ */
479
+ size_t pos;
480
+
481
+ /** This is the 'bytes' variable passed to feed() */
482
+ const jsonsl_char_t *base;
483
+
484
+ /** Callback invoked for PUSH actions */
485
+ jsonsl_stack_callback action_callback_PUSH;
486
+
487
+ /** Callback invoked for POP actions */
488
+ jsonsl_stack_callback action_callback_POP;
489
+
490
+ /** Default callback for any action, if neither PUSH or POP callbacks are defined */
491
+ jsonsl_stack_callback action_callback;
492
+
493
+ /**
494
+ * Do not invoke callbacks for objects deeper than this level.
495
+ * NOTE: This field establishes the lower bound for ignored callbacks,
496
+ * and is thus misnamed. `min_ignore_level` would actually make more
497
+ * sense, but we don't want to break API.
498
+ */
499
+ unsigned int max_callback_level;
500
+
501
+ /** The error callback. Invoked when an error happens. Should not be NULL */
502
+ jsonsl_error_callback error_callback;
503
+
504
+ /* these are boolean flags you can modify. You will be called
505
+ * about notification for each of these types if the corresponding
506
+ * variable is true.
507
+ */
508
+
509
+ /**
510
+ * @name Callback Booleans.
511
+ * These determine whether a callback is to be invoked for certain types of objects
512
+ * @{*/
513
+
514
+ /** Boolean flag to enable or disable the invokcation for events on this type*/
515
+ int call_SPECIAL;
516
+ int call_OBJECT;
517
+ int call_LIST;
518
+ int call_STRING;
519
+ int call_HKEY;
520
+ /*@}*/
521
+
522
+ /**
523
+ * @name u-Escape handling
524
+ * Special handling for the \\u-f00d type sequences. These are meant
525
+ * to be translated back into the corresponding octet(s).
526
+ * A special callback (if set) is invoked with *at=='u'. An application
527
+ * may wish to temporarily suspend parsing and handle the 'u-' sequence
528
+ * internally (or not).
529
+ */
530
+
531
+ /*@{*/
532
+
533
+ /** Callback to be invoked for a u-escape */
534
+ jsonsl_stack_callback action_callback_UESCAPE;
535
+
536
+ /** Boolean flag, whether to invoke the callback */
537
+ int call_UESCAPE;
538
+
539
+ /** Boolean flag, whether we should return after encountering a u-escape:
540
+ * the callback is invoked and then we return if this is true
541
+ */
542
+ int return_UESCAPE;
543
+ /*@}*/
544
+
545
+ struct {
546
+ int allow_trailing_comma;
547
+ } options;
548
+
549
+ /** Put anything here */
550
+ void *data;
551
+
552
+ /*@{*/
553
+ /** Private */
554
+ int in_escape;
555
+ char expecting;
556
+ char tok_last;
557
+ int can_insert;
558
+ unsigned int levels_max;
559
+
560
+ #ifndef JSONSL_NO_JPR
561
+ size_t jpr_count;
562
+ jsonsl_jpr_t *jprs;
563
+
564
+ /* Root pointer for JPR matching information */
565
+ size_t *jpr_root;
566
+ #endif /* JSONSL_NO_JPR */
567
+ /*@}*/
568
+
569
+ /**
570
+ * This is the stack. Its upper bound is levels_max, or the
571
+ * nlevels argument passed to jsonsl_new. If you modify this structure,
572
+ * make sure that this member is last.
573
+ */
574
+ struct jsonsl_state_st stack[1];
575
+ };
576
+
577
+
578
+ /**
579
+ * Creates a new lexer object, with capacity for recursion up to nlevels
580
+ *
581
+ * @param nlevels maximum recursion depth
582
+ */
583
+ JSONSL_API
584
+ jsonsl_t jsonsl_new(int nlevels);
585
+
586
+ /**
587
+ * Feeds data into the lexer.
588
+ *
589
+ * @param jsn the lexer object
590
+ * @param bytes new data to be fed
591
+ * @param nbytes size of new data
592
+ */
593
+ JSONSL_API
594
+ void jsonsl_feed(jsonsl_t jsn, const jsonsl_char_t *bytes, size_t nbytes);
595
+
596
+ /**
597
+ * Resets the internal parser state. This does not free the parser
598
+ * but does clean it internally, so that the next time feed() is called,
599
+ * it will be treated as a new stream
600
+ *
601
+ * @param jsn the lexer
602
+ */
603
+ JSONSL_API
604
+ void jsonsl_reset(jsonsl_t jsn);
605
+
606
+ /**
607
+ * Frees the lexer, cleaning any allocated memory taken
608
+ *
609
+ * @param jsn the lexer
610
+ */
611
+ JSONSL_API
612
+ void jsonsl_destroy(jsonsl_t jsn);
613
+
614
+ /**
615
+ * Gets the 'parent' element, given the current one
616
+ *
617
+ * @param jsn the lexer
618
+ * @param cur the current nest, which should be a struct jsonsl_nest_st
619
+ */
620
+ static JSONSL_INLINE
621
+ struct jsonsl_state_st *jsonsl_last_state(const jsonsl_t jsn,
622
+ const struct jsonsl_state_st *state)
623
+ {
624
+ /* Don't complain about overriding array bounds */
625
+ if (state->level > 1) {
626
+ return jsn->stack + state->level - 1;
627
+ } else {
628
+ return NULL;
629
+ }
630
+ }
631
+
632
+ /**
633
+ * Gets the state of the last fully consumed child of this parent. This is
634
+ * only valid in the parent's POP callback.
635
+ *
636
+ * @param the lexer
637
+ * @return A pointer to the child.
638
+ */
639
+ static JSONSL_INLINE
640
+ struct jsonsl_state_st *jsonsl_last_child(const jsonsl_t jsn,
641
+ const struct jsonsl_state_st *parent)
642
+ {
643
+ return jsn->stack + (parent->level + 1);
644
+ }
645
+
646
+ /**Call to instruct the parser to stop parsing and return. This is valid
647
+ * only from within a callback */
648
+ static JSONSL_INLINE
649
+ void jsonsl_stop(jsonsl_t jsn)
650
+ {
651
+ jsn->stopfl = 1;
652
+ }
653
+
654
+ /**
655
+ * This enables receiving callbacks on all events. Doesn't do
656
+ * anything special but helps avoid some boilerplate.
657
+ * This does not touch the UESCAPE callbacks or flags.
658
+ */
659
+ static JSONSL_INLINE
660
+ void jsonsl_enable_all_callbacks(jsonsl_t jsn)
661
+ {
662
+ jsn->call_HKEY = 1;
663
+ jsn->call_STRING = 1;
664
+ jsn->call_OBJECT = 1;
665
+ jsn->call_SPECIAL = 1;
666
+ jsn->call_LIST = 1;
667
+ }
668
+
669
+ /**
670
+ * A macro which returns true if the current state object can
671
+ * have children. This means a list type or an object type.
672
+ */
673
+ #define JSONSL_STATE_IS_CONTAINER(state) \
674
+ (state->type == JSONSL_T_OBJECT || state->type == JSONSL_T_LIST)
675
+
676
+ /**
677
+ * These two functions, dump a string representation
678
+ * of the error or type, respectively. They will never
679
+ * return NULL
680
+ */
681
+ JSONSL_API
682
+ const char* jsonsl_strerror(jsonsl_error_t err);
683
+ JSONSL_API
684
+ const char* jsonsl_strtype(jsonsl_type_t jt);
685
+
686
+ /**
687
+ * Dumps global metrics to the screen. This is a noop unless
688
+ * jsonsl was compiled with JSONSL_USE_METRICS
689
+ */
690
+ JSONSL_API
691
+ void jsonsl_dump_global_metrics(void);
692
+
693
+ /* This macro just here for editors to do code folding */
694
+ #ifndef JSONSL_NO_JPR
695
+
696
+ /**
697
+ * @name JSON Pointer API
698
+ *
699
+ * JSONPointer API. This isn't really related to the lexer (at least not yet)
700
+ * JSONPointer provides an extremely simple specification for providing
701
+ * locations within JSON objects. We will extend it a bit and allow for
702
+ * providing 'wildcard' characters by which to be able to 'query' the stream.
703
+ *
704
+ * See http://tools.ietf.org/html/draft-pbryan-zyp-json-pointer-00
705
+ *
706
+ * Currently I'm implementing the 'single query' API which can only use a single
707
+ * query component. In the future I will integrate my yet-to-be-published
708
+ * Boyer-Moore-esque prefix searching implementation, in order to allow
709
+ * multiple paths to be merged into one for quick and efficient searching.
710
+ *
711
+ *
712
+ * JPR (as we'll refer to it within the source) can be used by splitting
713
+ * the components into mutliple sections, and incrementally 'track' each
714
+ * component. When JSONSL delivers a 'pop' callback for a string, or a 'push'
715
+ * callback for an object, we will check to see whether the index matching
716
+ * the component corresponding to the current level contains a match
717
+ * for our path.
718
+ *
719
+ * In order to do this properly, a structure must be maintained within the
720
+ * parent indicating whether its children are possible matches. This flag
721
+ * will be 'inherited' by call children which may conform to the match
722
+ * specification, and discarded by all which do not (thereby eliminating
723
+ * their children from inheriting it).
724
+ *
725
+ * A successful match is a complete one. One can provide multiple paths with
726
+ * multiple levels of matches e.g.
727
+ * /foo/bar/baz/^/blah
728
+ *
729
+ * @{
730
+ */
731
+
732
+ /** The wildcard character */
733
+ #ifndef JSONSL_PATH_WILDCARD_CHAR
734
+ #define JSONSL_PATH_WILDCARD_CHAR '^'
735
+ #endif /* WILDCARD_CHAR */
736
+
737
+ #define JSONSL_XMATCH \
738
+ X(COMPLETE,1) \
739
+ X(POSSIBLE,0) \
740
+ X(NOMATCH,-1) \
741
+ X(TYPE_MISMATCH, -2)
742
+
743
+ typedef enum {
744
+
745
+ #define X(T,v) \
746
+ JSONSL_MATCH_##T = v,
747
+ JSONSL_XMATCH
748
+
749
+ #undef X
750
+ JSONSL_MATCH_UNKNOWN
751
+ } jsonsl_jpr_match_t;
752
+
753
+ typedef enum {
754
+ JSONSL_PATH_STRING = 1,
755
+ JSONSL_PATH_WILDCARD,
756
+ JSONSL_PATH_NUMERIC,
757
+ JSONSL_PATH_ROOT,
758
+
759
+ /* Special */
760
+ JSONSL_PATH_INVALID = -1,
761
+ JSONSL_PATH_NONE = 0
762
+ } jsonsl_jpr_type_t;
763
+
764
+ struct jsonsl_jpr_component_st {
765
+ /** The string the component points to */
766
+ char *pstr;
767
+ /** if this is a numeric type, the number is 'cached' here */
768
+ unsigned long idx;
769
+ /** The length of the string */
770
+ size_t len;
771
+ /** The type of component (NUMERIC or STRING) */
772
+ jsonsl_jpr_type_t ptype;
773
+
774
+ /** Set this to true to enforce type checking between dict keys and array
775
+ * indices. jsonsl_jpr_match() will return TYPE_MISMATCH if it detects
776
+ * that an array index is actually a child of a dictionary. */
777
+ short is_arridx;
778
+
779
+ /* Extra fields (for more advanced searches. Default is empty) */
780
+ JSONSL_JPR_COMPONENT_USER_FIELDS
781
+ };
782
+
783
+ struct jsonsl_jpr_st {
784
+ /** Path components */
785
+ struct jsonsl_jpr_component_st *components;
786
+ size_t ncomponents;
787
+
788
+ /**Type of the match to be expected. If nonzero, will be compared against
789
+ * the actual type */
790
+ unsigned match_type;
791
+
792
+ /** Base of allocated string for components */
793
+ char *basestr;
794
+
795
+ /** The original match string. Useful for returning to the user */
796
+ char *orig;
797
+ size_t norig;
798
+ };
799
+
800
+ /**
801
+ * Create a new JPR object.
802
+ *
803
+ * @param path the JSONPointer path specification.
804
+ * @param errp a pointer to a jsonsl_error_t. If this function returns NULL,
805
+ * then more details will be in this variable.
806
+ *
807
+ * @return a new jsonsl_jpr_t object, or NULL on error.
808
+ */
809
+ JSONSL_API
810
+ jsonsl_jpr_t jsonsl_jpr_new(const char *path, jsonsl_error_t *errp);
811
+
812
+ /**
813
+ * Destroy a JPR object
814
+ */
815
+ JSONSL_API
816
+ void jsonsl_jpr_destroy(jsonsl_jpr_t jpr);
817
+
818
+ /**
819
+ * Match a JSON object against a type and specific level
820
+ *
821
+ * @param jpr the JPR object
822
+ * @param parent_type the type of the parent (should be T_LIST or T_OBJECT)
823
+ * @param parent_level the level of the parent
824
+ * @param key the 'key' of the child. If the parent is an array, this should be
825
+ * empty.
826
+ * @param nkey - the length of the key. If the parent is an array (T_LIST), then
827
+ * this should be the current index.
828
+ *
829
+ * NOTE: The key of the child means any kind of associative data related to the
830
+ * element. Thus: <<< { "foo" : [ >>,
831
+ * the opening array's key is "foo".
832
+ *
833
+ * @return a status constant. This indicates whether a match was excluded, possible,
834
+ * or successful.
835
+ */
836
+ JSONSL_API
837
+ jsonsl_jpr_match_t jsonsl_jpr_match(jsonsl_jpr_t jpr,
838
+ unsigned int parent_type,
839
+ unsigned int parent_level,
840
+ const char *key, size_t nkey);
841
+
842
+ /**
843
+ * Alternate matching algorithm. This matching algorithm does not use
844
+ * JSONPointer but relies on a more structured searching mechanism. It
845
+ * assumes that there is a clear distinction between array indices and
846
+ * object keys. In this case, the jsonsl_path_component_st::ptype should
847
+ * be set to @ref JSONSL_PATH_NUMERIC for an array index (the
848
+ * jsonsl_path_comonent_st::is_arridx field will be removed in a future
849
+ * version).
850
+ *
851
+ * @param jpr The path
852
+ * @param parent The parent structure. Can be NULL if this is the root object
853
+ * @param child The child structure. Should not be NULL
854
+ * @param key Object key, if an object
855
+ * @param nkey Length of object key
856
+ * @return Status constant if successful
857
+ *
858
+ * @note
859
+ * For successful matching, both the key and the path itself should be normalized
860
+ * to contain 'proper' utf8 sequences rather than utf16 '\uXXXX' escapes. This
861
+ * should currently be done in the application. Another version of this function
862
+ * may use a temporary buffer in such circumstances (allocated by the application).
863
+ *
864
+ * Since this function also checks the state of the child, it should only
865
+ * be called on PUSH callbacks, and not POP callbacks
866
+ */
867
+ JSONSL_API
868
+ jsonsl_jpr_match_t
869
+ jsonsl_path_match(jsonsl_jpr_t jpr,
870
+ const struct jsonsl_state_st *parent,
871
+ const struct jsonsl_state_st *child,
872
+ const char *key, size_t nkey);
873
+
874
+
875
+ /**
876
+ * Associate a set of JPR objects with a lexer instance.
877
+ * This should be called before the lexer has been fed any data (and
878
+ * behavior is undefined if you don't adhere to this).
879
+ *
880
+ * After using this function, you may subsequently call match_state() on
881
+ * given states (presumably from within the callbacks).
882
+ *
883
+ * Note that currently the first JPR is the quickest and comes
884
+ * pre-allocated with the state structure. Further JPR objects
885
+ * are chained.
886
+ *
887
+ * @param jsn The lexer
888
+ * @param jprs An array of jsonsl_jpr_t objects
889
+ * @param njprs How many elements in the jprs array.
890
+ */
891
+ JSONSL_API
892
+ void jsonsl_jpr_match_state_init(jsonsl_t jsn,
893
+ jsonsl_jpr_t *jprs,
894
+ size_t njprs);
895
+
896
+ /**
897
+ * This follows the same semantics as the normal match,
898
+ * except we infer parent and type information from the relevant state objects.
899
+ * The match status (for all possible JPR objects) is set in the *out parameter.
900
+ *
901
+ * If a match has succeeded, then its JPR object will be returned. In all other
902
+ * instances, NULL is returned;
903
+ *
904
+ * @param jpr The jsonsl_jpr_t handle
905
+ * @param state The jsonsl_state_st which is a candidate
906
+ * @param key The hash key (if applicable, can be NULL if parent is list)
907
+ * @param nkey Length of hash key (if applicable, can be zero if parent is list)
908
+ * @param out A pointer to a jsonsl_jpr_match_t. This will be populated with
909
+ * the match result
910
+ *
911
+ * @return If a match was completed in full, then the JPR object containing
912
+ * the matching path will be returned. Otherwise, the return is NULL (note, this
913
+ * does not mean matching has failed, it can still be part of the match: check
914
+ * the out parameter).
915
+ */
916
+ JSONSL_API
917
+ jsonsl_jpr_t jsonsl_jpr_match_state(jsonsl_t jsn,
918
+ struct jsonsl_state_st *state,
919
+ const char *key,
920
+ size_t nkey,
921
+ jsonsl_jpr_match_t *out);
922
+
923
+
924
+ /**
925
+ * Cleanup any memory allocated and any states set by
926
+ * match_state_init() and match_state()
927
+ * @param jsn The lexer
928
+ */
929
+ JSONSL_API
930
+ void jsonsl_jpr_match_state_cleanup(jsonsl_t jsn);
931
+
932
+ /**
933
+ * Return a string representation of the match result returned by match()
934
+ */
935
+ JSONSL_API
936
+ const char *jsonsl_strmatchtype(jsonsl_jpr_match_t match);
937
+
938
+ /* @}*/
939
+
940
+ /**
941
+ * Utility function to convert escape sequences into their original form.
942
+ *
943
+ * The decoders I've sampled do not seem to specify a standard behavior of what
944
+ * to escape/unescape.
945
+ *
946
+ * RFC 4627 Mandates only that the quoute, backslash, and ASCII control
947
+ * characters (0x00-0x1f) be escaped. It is often common for applications
948
+ * to escape a '/' - however this may also be desired behavior. the JSON
949
+ * spec is not clear on this, and therefore jsonsl leaves it up to you.
950
+ *
951
+ * Additionally, sometimes you may wish to _normalize_ JSON. This is specifically
952
+ * true when dealing with 'u-escapes' which can be expressed perfectly fine
953
+ * as utf8. One use case for normalization is JPR string comparison, in which
954
+ * case two effectively equivalent strings may not match because one is using
955
+ * u-escapes and the other proper utf8. To normalize u-escapes only, pass in
956
+ * an empty `toEscape` table, enabling only the `u` index.
957
+ *
958
+ * @param in The input string.
959
+ * @param out An allocated output (should be the same size as in)
960
+ * @param len the size of the buffer
961
+ * @param toEscape - A sparse array of characters to unescape. Characters
962
+ * which are not present in this array, e.g. toEscape['c'] == 0 will be
963
+ * ignored and passed to the output in their original form.
964
+ * @param oflags If not null, and a \uXXXX escape expands to a non-ascii byte,
965
+ * then this variable will have the SPECIALf_NONASCII flag on.
966
+ *
967
+ * @param err A pointer to an error variable. If an error ocurrs, it will be
968
+ * set in this variable
969
+ * @param errat If not null and an error occurs, this will be set to point
970
+ * to the position within the string at which the offending character was
971
+ * encountered.
972
+ *
973
+ * @return The effective size of the output buffer.
974
+ *
975
+ * @note
976
+ * This function now encodes the UTF8 equivalents of utf16 escapes (i.e.
977
+ * 'u-escapes'). Previously this would encode the escapes as utf16 literals,
978
+ * which while still correct in some sense was confusing for many (especially
979
+ * considering that the inputs were variations of char).
980
+ *
981
+ * @note
982
+ * The output buffer will never be larger than the input buffer, since
983
+ * standard escape sequences (i.e. '\t') occupy two bytes in the source
984
+ * but only one byte (when unescaped) in the output. Likewise u-escapes
985
+ * (i.e. \uXXXX) will occupy six bytes in the source, but at the most
986
+ * two bytes when escaped.
987
+ */
988
+ JSONSL_API
989
+ size_t jsonsl_util_unescape_ex(const char *in,
990
+ char *out,
991
+ size_t len,
992
+ const int toEscape[128],
993
+ unsigned *oflags,
994
+ jsonsl_error_t *err,
995
+ const char **errat);
996
+
997
+ /**
998
+ * Convenience macro to avoid passing too many parameters
999
+ */
1000
+ #define jsonsl_util_unescape(in, out, len, toEscape, err) \
1001
+ jsonsl_util_unescape_ex(in, out, len, toEscape, NULL, err, NULL)
1002
+
1003
+ #endif /* JSONSL_NO_JPR */
1004
+
1005
+ #ifdef __cplusplus
1006
+ }
1007
+ #endif /* __cplusplus */
1008
+
1009
+ #endif /* JSONSL_H_ */