RubyGems - libyajl2 - Versions diffs - 0.1.4 → 0.1.5 - Mend

libyajl2 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

checksums.yaml +4 -4
data/Rakefile +65 -10
data/ext/libyajl2/api/yajl_common.h +75 -0
data/ext/libyajl2/api/yajl_gen.h +167 -0
data/ext/libyajl2/api/yajl_parse.h +226 -0
data/ext/libyajl2/api/yajl_tree.h +186 -0
data/ext/libyajl2/api/yajl_version.h +23 -0
data/ext/libyajl2/extconf.rb +24 -177
data/ext/libyajl2/patches/000-mingw-gcc.patch +26 -0
data/ext/libyajl2/yajl/yajl_common.h +75 -0
data/ext/libyajl2/yajl/yajl_gen.h +167 -0
data/ext/libyajl2/yajl/yajl_parse.h +226 -0
data/ext/libyajl2/yajl/yajl_tree.h +186 -0
data/ext/libyajl2/yajl/yajl_version.h +23 -0
data/ext/libyajl2/yajl.c +175 -0
data/ext/libyajl2/yajl_alloc.c +52 -0
data/ext/libyajl2/yajl_alloc.h +34 -0
data/ext/libyajl2/yajl_buf.c +103 -0
data/ext/libyajl2/yajl_buf.h +57 -0
data/ext/libyajl2/yajl_bytestack.h +69 -0
data/ext/libyajl2/yajl_encode.c +220 -0
data/ext/libyajl2/yajl_encode.h +34 -0
data/ext/libyajl2/yajl_gen.c +362 -0
data/ext/libyajl2/yajl_lex.c +763 -0
data/ext/libyajl2/yajl_lex.h +117 -0
data/ext/libyajl2/yajl_parser.c +498 -0
data/ext/libyajl2/yajl_parser.h +78 -0
data/ext/libyajl2/yajl_tree.c +503 -0
data/ext/libyajl2/yajl_version.c +7 -0
data/lib/libyajl2/version.rb +1 -1
metadata +28 -3
data/ext/libyajl2/Makefile +0 -9

data/ext/libyajl2/yajl_lex.c ADDED Viewed

@@ -0,0 +1,763 @@
+/*
+ * Copyright (c) 2007-2014, Lloyd Hilaiel <me@lloyd.io>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include "yajl_lex.h"
+#include "yajl_buf.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include <string.h>
+#ifdef YAJL_LEXER_DEBUG
+static const char *
+tokToStr(yajl_tok tok)
+{
+    switch (tok) {
+        case yajl_tok_bool: return "bool";
+        case yajl_tok_colon: return "colon";
+        case yajl_tok_comma: return "comma";
+        case yajl_tok_eof: return "eof";
+        case yajl_tok_error: return "error";
+        case yajl_tok_left_brace: return "brace";
+        case yajl_tok_left_bracket: return "bracket";
+        case yajl_tok_null: return "null";
+        case yajl_tok_integer: return "integer";
+        case yajl_tok_double: return "double";
+        case yajl_tok_right_brace: return "brace";
+        case yajl_tok_right_bracket: return "bracket";
+        case yajl_tok_string: return "string";
+        case yajl_tok_string_with_escapes: return "string_with_escapes";
+    }
+    return "unknown";
+}
+#endif
+/* Impact of the stream parsing feature on the lexer:
+ *
+ * YAJL support stream parsing.  That is, the ability to parse the first
+ * bits of a chunk of JSON before the last bits are available (still on
+ * the network or disk).  This makes the lexer more complex.  The
+ * responsibility of the lexer is to handle transparently the case where
+ * a chunk boundary falls in the middle of a token.  This is
+ * accomplished is via a buffer and a character reading abstraction.
+ *
+ * Overview of implementation
+ *
+ * When we lex to end of input string before end of token is hit, we
+ * copy all of the input text composing the token into our lexBuf.
+ *
+ * Every time we read a character, we do so through the readChar function.
+ * readChar's responsibility is to handle pulling all chars from the buffer
+ * before pulling chars from input text
+ */
+struct yajl_lexer_t {
+    /* the overal line and char offset into the data */
+    size_t lineOff;
+    size_t charOff;
+    /* error */
+    yajl_lex_error error;
+    /* a input buffer to handle the case where a token is spread over
+     * multiple chunks */
+    yajl_buf buf;
+    /* in the case where we have data in the lexBuf, bufOff holds
+     * the current offset into the lexBuf. */
+    size_t bufOff;
+    /* are we using the lex buf? */
+    unsigned int bufInUse;
+    /* shall we allow comments? */
+    unsigned int allowComments;
+    /* shall we validate utf8 inside strings? */
+    unsigned int validateUTF8;
+    yajl_alloc_funcs * alloc;
+};
+#define readChar(lxr, txt, off)                      \
+    (((lxr)->bufInUse && yajl_buf_len((lxr)->buf) && lxr->bufOff < yajl_buf_len((lxr)->buf)) ? \
+     (*((const unsigned char *) yajl_buf_data((lxr)->buf) + ((lxr)->bufOff)++)) : \
+     ((txt)[(*(off))++]))
+#define unreadChar(lxr, off) ((*(off) > 0) ? (*(off))-- : ((lxr)->bufOff--))
+yajl_lexer
+yajl_lex_alloc(yajl_alloc_funcs * alloc,
+               unsigned int allowComments, unsigned int validateUTF8)
+{
+    yajl_lexer lxr = (yajl_lexer) YA_MALLOC(alloc, sizeof(struct yajl_lexer_t));
+    memset((void *) lxr, 0, sizeof(struct yajl_lexer_t));
+    lxr->buf = yajl_buf_alloc(alloc);
+    lxr->allowComments = allowComments;
+    lxr->validateUTF8 = validateUTF8;
+    lxr->alloc = alloc;
+    return lxr;
+}
+void
+yajl_lex_free(yajl_lexer lxr)
+{
+    yajl_buf_free(lxr->buf);
+    YA_FREE(lxr->alloc, lxr);
+    return;
+}
+/* a lookup table which lets us quickly determine three things:
+ * VEC - valid escaped control char
+ * note.  the solidus '/' may be escaped or not.
+ * IJC - invalid json char
+ * VHC - valid hex char
+ * NFP - needs further processing (from a string scanning perspective)
+ * NUC - needs utf8 checking when enabled (from a string scanning perspective)
+ */
+#define VEC 0x01
+#define IJC 0x02
+#define VHC 0x04
+#define NFP 0x08
+#define NUC 0x10
+static const char charLookupTable[256] =
+{
+/*00*/ IJC    , IJC    , IJC    , IJC    , IJC    , IJC    , IJC    , IJC    ,
+/*08*/ IJC    , IJC    , IJC    , IJC    , IJC    , IJC    , IJC    , IJC    ,
+/*10*/ IJC    , IJC    , IJC    , IJC    , IJC    , IJC    , IJC    , IJC    ,
+/*18*/ IJC    , IJC    , IJC    , IJC    , IJC    , IJC    , IJC    , IJC    ,
+/*20*/ 0      , 0      , NFP|VEC|IJC, 0      , 0      , 0      , 0      , 0      ,
+/*28*/ 0      , 0      , 0      , 0      , 0      , 0      , 0      , VEC    ,
+/*30*/ VHC    , VHC    , VHC    , VHC    , VHC    , VHC    , VHC    , VHC    ,
+/*38*/ VHC    , VHC    , 0      , 0      , 0      , 0      , 0      , 0      ,
+/*40*/ 0      , VHC    , VHC    , VHC    , VHC    , VHC    , VHC    , 0      ,
+/*48*/ 0      , 0      , 0      , 0      , 0      , 0      , 0      , 0      ,
+/*50*/ 0      , 0      , 0      , 0      , 0      , 0      , 0      , 0      ,
+/*58*/ 0      , 0      , 0      , 0      , NFP|VEC|IJC, 0      , 0      , 0      ,
+/*60*/ 0      , VHC    , VEC|VHC, VHC    , VHC    , VHC    , VEC|VHC, 0      ,
+/*68*/ 0      , 0      , 0      , 0      , 0      , 0      , VEC    , 0      ,
+/*70*/ 0      , 0      , VEC    , 0      , VEC    , 0      , 0      , 0      ,
+/*78*/ 0      , 0      , 0      , 0      , 0      , 0      , 0      , 0      ,
+       NUC    , NUC    , NUC    , NUC    , NUC    , NUC    , NUC    , NUC    ,
+       NUC    , NUC    , NUC    , NUC    , NUC    , NUC    , NUC    , NUC    ,
+       NUC    , NUC    , NUC    , NUC    , NUC    , NUC    , NUC    , NUC    ,
+       NUC    , NUC    , NUC    , NUC    , NUC    , NUC    , NUC    , NUC    ,
+       NUC    , NUC    , NUC    , NUC    , NUC    , NUC    , NUC    , NUC    ,
+       NUC    , NUC    , NUC    , NUC    , NUC    , NUC    , NUC    , NUC    ,
+       NUC    , NUC    , NUC    , NUC    , NUC    , NUC    , NUC    , NUC    ,
+       NUC    , NUC    , NUC    , NUC    , NUC    , NUC    , NUC    , NUC    ,
+       NUC    , NUC    , NUC    , NUC    , NUC    , NUC    , NUC    , NUC    ,
+       NUC    , NUC    , NUC    , NUC    , NUC    , NUC    , NUC    , NUC    ,
+       NUC    , NUC    , NUC    , NUC    , NUC    , NUC    , NUC    , NUC    ,
+       NUC    , NUC    , NUC    , NUC    , NUC    , NUC    , NUC    , NUC    ,
+       NUC    , NUC    , NUC    , NUC    , NUC    , NUC    , NUC    , NUC    ,
+       NUC    , NUC    , NUC    , NUC    , NUC    , NUC    , NUC    , NUC    ,
+       NUC    , NUC    , NUC    , NUC    , NUC    , NUC    , NUC    , NUC    ,
+       NUC    , NUC    , NUC    , NUC    , NUC    , NUC    , NUC    , NUC
+};
+/** process a variable length utf8 encoded codepoint.
+ *
+ *  returns:
+ *    yajl_tok_string - if valid utf8 char was parsed and offset was
+ *                      advanced
+ *    yajl_tok_eof - if end of input was hit before validation could
+ *                   complete
+ *    yajl_tok_error - if invalid utf8 was encountered
+ *
+ *  NOTE: on error the offset will point to the first char of the
+ *  invalid utf8 */
+#define UTF8_CHECK_EOF if (*offset >= jsonTextLen) { return yajl_tok_eof; }
+static yajl_tok
+yajl_lex_utf8_char(yajl_lexer lexer, const unsigned char * jsonText,
+                   size_t jsonTextLen, size_t * offset,
+                   unsigned char curChar)
+{
+    if (curChar <= 0x7f) {
+        /* single byte */
+        return yajl_tok_string;
+    } else if ((curChar >> 5) == 0x6) {
+        /* two byte */
+        UTF8_CHECK_EOF;
+        curChar = readChar(lexer, jsonText, offset);
+        if ((curChar >> 6) == 0x2) return yajl_tok_string;
+    } else if ((curChar >> 4) == 0x0e) {
+        /* three byte */
+        UTF8_CHECK_EOF;
+        curChar = readChar(lexer, jsonText, offset);
+        if ((curChar >> 6) == 0x2) {
+            UTF8_CHECK_EOF;
+            curChar = readChar(lexer, jsonText, offset);
+            if ((curChar >> 6) == 0x2) return yajl_tok_string;
+        }
+    } else if ((curChar >> 3) == 0x1e) {
+        /* four byte */
+        UTF8_CHECK_EOF;
+        curChar = readChar(lexer, jsonText, offset);
+        if ((curChar >> 6) == 0x2) {
+            UTF8_CHECK_EOF;
+            curChar = readChar(lexer, jsonText, offset);
+            if ((curChar >> 6) == 0x2) {
+                UTF8_CHECK_EOF;
+                curChar = readChar(lexer, jsonText, offset);
+                if ((curChar >> 6) == 0x2) return yajl_tok_string;
+            }
+        }
+    }
+    return yajl_tok_error;
+}
+/* lex a string.  input is the lexer, pointer to beginning of
+ * json text, and start of string (offset).
+ * a token is returned which has the following meanings:
+ * yajl_tok_string: lex of string was successful.  offset points to
+ *                  terminating '"'.
+ * yajl_tok_eof: end of text was encountered before we could complete
+ *               the lex.
+ * yajl_tok_error: embedded in the string were unallowable chars.  offset
+ *               points to the offending char
+ */
+#define STR_CHECK_EOF \
+if (*offset >= jsonTextLen) { \
+   tok = yajl_tok_eof; \
+   goto finish_string_lex; \
+}
+/** scan a string for interesting characters that might need further
+ *  review.  return the number of chars that are uninteresting and can
+ *  be skipped.
+ * (lth) hi world, any thoughts on how to make this routine faster? */
+static size_t
+yajl_string_scan(const unsigned char * buf, size_t len, int utf8check)
+{
+    unsigned char mask = IJC|NFP|(utf8check ? NUC : 0);
+    size_t skip = 0;
+    while (skip < len && !(charLookupTable[*buf] & mask))
+    {
+        skip++;
+        buf++;
+    }
+    return skip;
+}
+static yajl_tok
+yajl_lex_string(yajl_lexer lexer, const unsigned char * jsonText,
+                size_t jsonTextLen, size_t * offset)
+{
+    yajl_tok tok = yajl_tok_error;
+    int hasEscapes = 0;
+    for (;;) {
+        unsigned char curChar;
+        /* now jump into a faster scanning routine to skip as much
+         * of the buffers as possible */
+        {
+            const unsigned char * p;
+            size_t len;
+            if ((lexer->bufInUse && yajl_buf_len(lexer->buf) &&
+                 lexer->bufOff < yajl_buf_len(lexer->buf)))
+            {
+                p = ((const unsigned char *) yajl_buf_data(lexer->buf) +
+                     (lexer->bufOff));
+                len = yajl_buf_len(lexer->buf) - lexer->bufOff;
+                lexer->bufOff += yajl_string_scan(p, len, lexer->validateUTF8);
+            }
+            else if (*offset < jsonTextLen)
+            {
+                p = jsonText + *offset;
+                len = jsonTextLen - *offset;
+                *offset += yajl_string_scan(p, len, lexer->validateUTF8);
+            }
+        }
+        STR_CHECK_EOF;
+        curChar = readChar(lexer, jsonText, offset);
+        /* quote terminates */
+        if (curChar == '"') {
+            tok = yajl_tok_string;
+            break;
+        }
+        /* backslash escapes a set of control chars, */
+        else if (curChar == '\\') {
+            hasEscapes = 1;
+            STR_CHECK_EOF;
+            /* special case \u */
+            curChar = readChar(lexer, jsonText, offset);
+            if (curChar == 'u') {
+                unsigned int i = 0;
+                for (i=0;i<4;i++) {
+                    STR_CHECK_EOF;
+                    curChar = readChar(lexer, jsonText, offset);
+                    if (!(charLookupTable[curChar] & VHC)) {
+                        /* back up to offending char */
+                        unreadChar(lexer, offset);
+                        lexer->error = yajl_lex_string_invalid_hex_char;
+                        goto finish_string_lex;
+                    }
+                }
+            } else if (!(charLookupTable[curChar] & VEC)) {
+                /* back up to offending char */
+                unreadChar(lexer, offset);
+                lexer->error = yajl_lex_string_invalid_escaped_char;
+                goto finish_string_lex;
+            }
+        }
+        /* when not validating UTF8 it's a simple table lookup to determine
+         * if the present character is invalid */
+        else if(charLookupTable[curChar] & IJC) {
+            /* back up to offending char */
+            unreadChar(lexer, offset);
+            lexer->error = yajl_lex_string_invalid_json_char;
+            goto finish_string_lex;
+        }
+        /* when in validate UTF8 mode we need to do some extra work */
+        else if (lexer->validateUTF8) {
+            yajl_tok t = yajl_lex_utf8_char(lexer, jsonText, jsonTextLen,
+                                            offset, curChar);
+            if (t == yajl_tok_eof) {
+                tok = yajl_tok_eof;
+                goto finish_string_lex;
+            } else if (t == yajl_tok_error) {
+                lexer->error = yajl_lex_string_invalid_utf8;
+                goto finish_string_lex;
+            }
+        }
+        /* accept it, and move on */
+    }
+  finish_string_lex:
+    /* tell our buddy, the parser, wether he needs to process this string
+     * again */
+    if (hasEscapes && tok == yajl_tok_string) {
+        tok = yajl_tok_string_with_escapes;
+    }
+    return tok;
+}
+#define RETURN_IF_EOF if (*offset >= jsonTextLen) return yajl_tok_eof;
+static yajl_tok
+yajl_lex_number(yajl_lexer lexer, const unsigned char * jsonText,
+                size_t jsonTextLen, size_t * offset)
+{
+    /** XXX: numbers are the only entities in json that we must lex
+     *       _beyond_ in order to know that they are complete.  There
+     *       is an ambiguous case for integers at EOF. */
+    unsigned char c;
+    yajl_tok tok = yajl_tok_integer;
+    RETURN_IF_EOF;
+    c = readChar(lexer, jsonText, offset);
+    /* optional leading minus */
+    if (c == '-') {
+        RETURN_IF_EOF;
+        c = readChar(lexer, jsonText, offset);
+    }
+    /* a single zero, or a series of integers */
+    if (c == '0') {
+        RETURN_IF_EOF;
+        c = readChar(lexer, jsonText, offset);
+    } else if (c >= '1' && c <= '9') {
+        do {
+            RETURN_IF_EOF;
+            c = readChar(lexer, jsonText, offset);
+        } while (c >= '0' && c <= '9');
+    } else {
+        unreadChar(lexer, offset);
+        lexer->error = yajl_lex_missing_integer_after_minus;
+        return yajl_tok_error;
+    }
+    /* optional fraction (indicates this is floating point) */
+    if (c == '.') {
+        int numRd = 0;
+        RETURN_IF_EOF;
+        c = readChar(lexer, jsonText, offset);
+        while (c >= '0' && c <= '9') {
+            numRd++;
+            RETURN_IF_EOF;
+            c = readChar(lexer, jsonText, offset);
+        }
+        if (!numRd) {
+            unreadChar(lexer, offset);
+            lexer->error = yajl_lex_missing_integer_after_decimal;
+            return yajl_tok_error;
+        }
+        tok = yajl_tok_double;
+    }
+    /* optional exponent (indicates this is floating point) */
+    if (c == 'e' || c == 'E') {
+        RETURN_IF_EOF;
+        c = readChar(lexer, jsonText, offset);
+        /* optional sign */
+        if (c == '+' || c == '-') {
+            RETURN_IF_EOF;
+            c = readChar(lexer, jsonText, offset);
+        }
+        if (c >= '0' && c <= '9') {
+            do {
+                RETURN_IF_EOF;
+                c = readChar(lexer, jsonText, offset);
+            } while (c >= '0' && c <= '9');
+        } else {
+            unreadChar(lexer, offset);
+            lexer->error = yajl_lex_missing_integer_after_exponent;
+            return yajl_tok_error;
+        }
+        tok = yajl_tok_double;
+    }
+    /* we always go "one too far" */
+    unreadChar(lexer, offset);
+    return tok;
+}
+static yajl_tok
+yajl_lex_comment(yajl_lexer lexer, const unsigned char * jsonText,
+                 size_t jsonTextLen, size_t * offset)
+{
+    unsigned char c;
+    yajl_tok tok = yajl_tok_comment;
+    RETURN_IF_EOF;
+    c = readChar(lexer, jsonText, offset);
+    /* either slash or star expected */
+    if (c == '/') {
+        /* now we throw away until end of line */
+        do {
+            RETURN_IF_EOF;
+            c = readChar(lexer, jsonText, offset);
+        } while (c != '\n');
+    } else if (c == '*') {
+        /* now we throw away until end of comment */
+        for (;;) {
+            RETURN_IF_EOF;
+            c = readChar(lexer, jsonText, offset);
+            if (c == '*') {
+                RETURN_IF_EOF;
+                c = readChar(lexer, jsonText, offset);
+                if (c == '/') {
+                    break;
+                } else {
+                    unreadChar(lexer, offset);
+                }
+            }
+        }
+    } else {
+        lexer->error = yajl_lex_invalid_char;
+        tok = yajl_tok_error;
+    }
+    return tok;
+}
+yajl_tok
+yajl_lex_lex(yajl_lexer lexer, const unsigned char * jsonText,
+             size_t jsonTextLen, size_t * offset,
+             const unsigned char ** outBuf, size_t * outLen)
+{
+    yajl_tok tok = yajl_tok_error;
+    unsigned char c;
+    size_t startOffset = *offset;
+    *outBuf = NULL;
+    *outLen = 0;
+    for (;;) {
+        assert(*offset <= jsonTextLen);
+        if (*offset >= jsonTextLen) {
+            tok = yajl_tok_eof;
+            goto lexed;
+        }
+        c = readChar(lexer, jsonText, offset);
+        switch (c) {
+            case '{':
+                tok = yajl_tok_left_bracket;
+                goto lexed;
+            case '}':
+                tok = yajl_tok_right_bracket;
+                goto lexed;
+            case '[':
+                tok = yajl_tok_left_brace;
+                goto lexed;
+            case ']':
+                tok = yajl_tok_right_brace;
+                goto lexed;
+            case ',':
+                tok = yajl_tok_comma;
+                goto lexed;
+            case ':':
+                tok = yajl_tok_colon;
+                goto lexed;
+            case '\t': case '\n': case '\v': case '\f': case '\r': case ' ':
+                startOffset++;
+                break;
+            case 't': {
+                const char * want = "rue";
+                do {
+                    if (*offset >= jsonTextLen) {
+                        tok = yajl_tok_eof;
+                        goto lexed;
+                    }
+                    c = readChar(lexer, jsonText, offset);
+                    if (c != *want) {
+                        unreadChar(lexer, offset);
+                        lexer->error = yajl_lex_invalid_string;
+                        tok = yajl_tok_error;
+                        goto lexed;
+                    }
+                } while (*(++want));
+                tok = yajl_tok_bool;
+                goto lexed;
+            }
+            case 'f': {
+                const char * want = "alse";
+                do {
+                    if (*offset >= jsonTextLen) {
+                        tok = yajl_tok_eof;
+                        goto lexed;
+                    }
+                    c = readChar(lexer, jsonText, offset);
+                    if (c != *want) {
+                        unreadChar(lexer, offset);
+                        lexer->error = yajl_lex_invalid_string;
+                        tok = yajl_tok_error;
+                        goto lexed;
+                    }
+                } while (*(++want));
+                tok = yajl_tok_bool;
+                goto lexed;
+            }
+            case 'n': {
+                const char * want = "ull";
+                do {
+                    if (*offset >= jsonTextLen) {
+                        tok = yajl_tok_eof;
+                        goto lexed;
+                    }
+                    c = readChar(lexer, jsonText, offset);
+                    if (c != *want) {
+                        unreadChar(lexer, offset);
+                        lexer->error = yajl_lex_invalid_string;
+                        tok = yajl_tok_error;
+                        goto lexed;
+                    }
+                } while (*(++want));
+                tok = yajl_tok_null;
+                goto lexed;
+            }
+            case '"': {
+                tok = yajl_lex_string(lexer, (const unsigned char *) jsonText,
+                                      jsonTextLen, offset);
+                goto lexed;
+            }
+            case '-':
+            case '0': case '1': case '2': case '3': case '4':
+            case '5': case '6': case '7': case '8': case '9': {
+                /* integer parsing wants to start from the beginning */
+                unreadChar(lexer, offset);
+                tok = yajl_lex_number(lexer, (const unsigned char *) jsonText,
+                                      jsonTextLen, offset);
+                goto lexed;
+            }
+            case '/':
+                /* hey, look, a probable comment!  If comments are disabled
+                 * it's an error. */
+                if (!lexer->allowComments) {
+                    unreadChar(lexer, offset);
+                    lexer->error = yajl_lex_unallowed_comment;
+                    tok = yajl_tok_error;
+                    goto lexed;
+                }
+                /* if comments are enabled, then we should try to lex
+                 * the thing.  possible outcomes are
+                 * - successful lex (tok_comment, which means continue),
+                 * - malformed comment opening (slash not followed by
+                 *   '*' or '/') (tok_error)
+                 * - eof hit. (tok_eof) */
+                tok = yajl_lex_comment(lexer, (const unsigned char *) jsonText,
+                                       jsonTextLen, offset);
+                if (tok == yajl_tok_comment) {
+                    /* "error" is silly, but that's the initial
+                     * state of tok.  guilty until proven innocent. */
+                    tok = yajl_tok_error;
+                    yajl_buf_clear(lexer->buf);
+                    lexer->bufInUse = 0;
+                    startOffset = *offset;
+                    break;
+                }
+                /* hit error or eof, bail */
+                goto lexed;
+            default:
+                lexer->error = yajl_lex_invalid_char;
+                tok = yajl_tok_error;
+                goto lexed;
+        }
+    }
+  lexed:
+    /* need to append to buffer if the buffer is in use or
+     * if it's an EOF token */
+    if (tok == yajl_tok_eof || lexer->bufInUse) {
+        if (!lexer->bufInUse) yajl_buf_clear(lexer->buf);
+        lexer->bufInUse = 1;
+        yajl_buf_append(lexer->buf, jsonText + startOffset, *offset - startOffset);
+        lexer->bufOff = 0;
+        if (tok != yajl_tok_eof) {
+            *outBuf = yajl_buf_data(lexer->buf);
+            *outLen = yajl_buf_len(lexer->buf);
+            lexer->bufInUse = 0;
+        }
+    } else if (tok != yajl_tok_error) {
+        *outBuf = jsonText + startOffset;
+        *outLen = *offset - startOffset;
+    }
+    /* special case for strings. skip the quotes. */
+    if (tok == yajl_tok_string || tok == yajl_tok_string_with_escapes)
+    {
+        assert(*outLen >= 2);
+        (*outBuf)++;
+        *outLen -= 2;
+    }
+#ifdef YAJL_LEXER_DEBUG
+    if (tok == yajl_tok_error) {
+        printf("lexical error: %s\n",
+               yajl_lex_error_to_string(yajl_lex_get_error(lexer)));
+    } else if (tok == yajl_tok_eof) {
+        printf("EOF hit\n");
+    } else {
+        printf("lexed %s: '", tokToStr(tok));
+        fwrite(*outBuf, 1, *outLen, stdout);
+        printf("'\n");
+    }
+#endif
+    return tok;
+}
+const char *
+yajl_lex_error_to_string(yajl_lex_error error)
+{
+    switch (error) {
+        case yajl_lex_e_ok:
+            return "ok, no error";
+        case yajl_lex_string_invalid_utf8:
+            return "invalid bytes in UTF8 string.";
+        case yajl_lex_string_invalid_escaped_char:
+            return "inside a string, '\\' occurs before a character "
+                   "which it may not.";
+        case yajl_lex_string_invalid_json_char:
+            return "invalid character inside string.";
+        case yajl_lex_string_invalid_hex_char:
+            return "invalid (non-hex) character occurs after '\\u' inside "
+                   "string.";
+        case yajl_lex_invalid_char:
+            return "invalid char in json text.";
+        case yajl_lex_invalid_string:
+            return "invalid string in json text.";
+        case yajl_lex_missing_integer_after_exponent:
+            return "malformed number, a digit is required after the exponent.";
+        case yajl_lex_missing_integer_after_decimal:
+            return "malformed number, a digit is required after the "
+                   "decimal point.";
+        case yajl_lex_missing_integer_after_minus:
+            return "malformed number, a digit is required after the "
+                   "minus sign.";
+        case yajl_lex_unallowed_comment:
+            return "probable comment found in input text, comments are "
+                   "not enabled.";
+    }
+    return "unknown error code";
+}
+/** allows access to more specific information about the lexical
+ *  error when yajl_lex_lex returns yajl_tok_error. */
+yajl_lex_error
+yajl_lex_get_error(yajl_lexer lexer)
+{
+    if (lexer == NULL) return (yajl_lex_error) -1;
+    return lexer->error;
+}
+size_t yajl_lex_current_line(yajl_lexer lexer)
+{
+    return lexer->lineOff;
+}
+size_t yajl_lex_current_char(yajl_lexer lexer)
+{
+    return lexer->charOff;
+}
+yajl_tok yajl_lex_peek(yajl_lexer lexer, const unsigned char * jsonText,
+                       size_t jsonTextLen, size_t offset)
+{
+    const unsigned char * outBuf;
+    size_t outLen;
+    size_t bufLen = yajl_buf_len(lexer->buf);
+    size_t bufOff = lexer->bufOff;
+    unsigned int bufInUse = lexer->bufInUse;
+    yajl_tok tok;
+    tok = yajl_lex_lex(lexer, jsonText, jsonTextLen, &offset,
+                       &outBuf, &outLen);
+    lexer->bufOff = bufOff;
+    lexer->bufInUse = bufInUse;
+    yajl_buf_truncate(lexer->buf, bufLen);
+    return tok;
+}