RubyGems - benofsky-yajl-ruby - Versions diffs - 0.7.6 - Mend

benofsky-yajl-ruby 0.7.6

Files changed (143) hide show

data/.gitignore +9 -0
data/CHANGELOG.md +281 -0
data/MIT-LICENSE +20 -0
data/README.rdoc +320 -0
data/Rakefile +40 -0
data/VERSION.yml +5 -0
data/benchmark/encode.rb +58 -0
data/benchmark/encode_json_and_marshal.rb +42 -0
data/benchmark/encode_json_and_yaml.rb +53 -0
data/benchmark/http.rb +32 -0
data/benchmark/parse.rb +59 -0
data/benchmark/parse_json_and_marshal.rb +50 -0
data/benchmark/parse_json_and_yaml.rb +55 -0
data/benchmark/parse_stream.rb +54 -0
data/benchmark/subjects/item.json +1 -0
data/benchmark/subjects/ohai.json +1216 -0
data/benchmark/subjects/ohai.marshal_dump +0 -0
data/benchmark/subjects/ohai.yml +975 -0
data/benchmark/subjects/twitter_search.json +1 -0
data/benchmark/subjects/twitter_stream.json +430 -0
data/benchmark/subjects/unicode.json +1 -0
data/examples/encoding/chunked_encoding.rb +27 -0
data/examples/encoding/one_shot.rb +13 -0
data/examples/encoding/to_an_io.rb +12 -0
data/examples/http/twitter_search_api.rb +12 -0
data/examples/http/twitter_stream_api.rb +26 -0
data/examples/parsing/from_file.rb +14 -0
data/examples/parsing/from_stdin.rb +9 -0
data/examples/parsing/from_string.rb +13 -0
data/ext/api/yajl_common.h +85 -0
data/ext/api/yajl_gen.h +159 -0
data/ext/api/yajl_parse.h +196 -0
data/ext/extconf.rb +9 -0
data/ext/yajl.c +164 -0
data/ext/yajl_alloc.c +65 -0
data/ext/yajl_alloc.h +50 -0
data/ext/yajl_buf.c +119 -0
data/ext/yajl_buf.h +73 -0
data/ext/yajl_bytestack.h +85 -0
data/ext/yajl_encode.c +188 -0
data/ext/yajl_encode.h +50 -0
data/ext/yajl_ext.c +911 -0
data/ext/yajl_ext.h +128 -0
data/ext/yajl_gen.c +317 -0
data/ext/yajl_lex.c +747 -0
data/ext/yajl_lex.h +135 -0
data/ext/yajl_parser.c +450 -0
data/ext/yajl_parser.h +82 -0
data/lib/yajl/bzip2/stream_reader.rb +32 -0
data/lib/yajl/bzip2/stream_writer.rb +15 -0
data/lib/yajl/bzip2.rb +11 -0
data/lib/yajl/deflate/stream_reader.rb +44 -0
data/lib/yajl/deflate/stream_writer.rb +21 -0
data/lib/yajl/deflate.rb +6 -0
data/lib/yajl/gzip/stream_reader.rb +31 -0
data/lib/yajl/gzip/stream_writer.rb +14 -0
data/lib/yajl/gzip.rb +6 -0
data/lib/yajl/http_stream.rb +197 -0
data/lib/yajl/json_gem/encoding.rb +50 -0
data/lib/yajl/json_gem/parsing.rb +27 -0
data/lib/yajl/json_gem.rb +14 -0
data/lib/yajl.rb +93 -0
data/spec/encoding/encoding_spec.rb +234 -0
data/spec/global/global_spec.rb +55 -0
data/spec/http/fixtures/http.bzip2.dump +0 -0
data/spec/http/fixtures/http.chunked.dump +11 -0
data/spec/http/fixtures/http.deflate.dump +0 -0
data/spec/http/fixtures/http.error.dump +12 -0
data/spec/http/fixtures/http.gzip.dump +0 -0
data/spec/http/fixtures/http.html.dump +1220 -0
data/spec/http/fixtures/http.raw.dump +1226 -0
data/spec/http/http_delete_spec.rb +99 -0
data/spec/http/http_error_spec.rb +33 -0
data/spec/http/http_get_spec.rb +110 -0
data/spec/http/http_post_spec.rb +124 -0
data/spec/http/http_put_spec.rb +106 -0
data/spec/json_gem_compatibility/compatibility_spec.rb +203 -0
data/spec/parsing/active_support_spec.rb +64 -0
data/spec/parsing/chunked_spec.rb +98 -0
data/spec/parsing/fixtures/fail.15.json +1 -0
data/spec/parsing/fixtures/fail.16.json +1 -0
data/spec/parsing/fixtures/fail.17.json +1 -0
data/spec/parsing/fixtures/fail.26.json +1 -0
data/spec/parsing/fixtures/fail11.json +1 -0
data/spec/parsing/fixtures/fail12.json +1 -0
data/spec/parsing/fixtures/fail13.json +1 -0
data/spec/parsing/fixtures/fail14.json +1 -0
data/spec/parsing/fixtures/fail19.json +1 -0
data/spec/parsing/fixtures/fail20.json +1 -0
data/spec/parsing/fixtures/fail21.json +1 -0
data/spec/parsing/fixtures/fail22.json +1 -0
data/spec/parsing/fixtures/fail23.json +1 -0
data/spec/parsing/fixtures/fail24.json +1 -0
data/spec/parsing/fixtures/fail25.json +1 -0
data/spec/parsing/fixtures/fail27.json +2 -0
data/spec/parsing/fixtures/fail28.json +2 -0
data/spec/parsing/fixtures/fail3.json +1 -0
data/spec/parsing/fixtures/fail4.json +1 -0
data/spec/parsing/fixtures/fail5.json +1 -0
data/spec/parsing/fixtures/fail6.json +1 -0
data/spec/parsing/fixtures/fail9.json +1 -0
data/spec/parsing/fixtures/pass.array.json +6 -0
data/spec/parsing/fixtures/pass.codepoints_from_unicode_org.json +1 -0
data/spec/parsing/fixtures/pass.contacts.json +1 -0
data/spec/parsing/fixtures/pass.db100.xml.json +1 -0
data/spec/parsing/fixtures/pass.db1000.xml.json +1 -0
data/spec/parsing/fixtures/pass.dc_simple_with_comments.json +11 -0
data/spec/parsing/fixtures/pass.deep_arrays.json +1 -0
data/spec/parsing/fixtures/pass.difficult_json_c_test_case.json +1 -0
data/spec/parsing/fixtures/pass.difficult_json_c_test_case_with_comments.json +1 -0
data/spec/parsing/fixtures/pass.doubles.json +1 -0
data/spec/parsing/fixtures/pass.empty_array.json +1 -0
data/spec/parsing/fixtures/pass.empty_string.json +1 -0
data/spec/parsing/fixtures/pass.escaped_bulgarian.json +4 -0
data/spec/parsing/fixtures/pass.escaped_foobar.json +1 -0
data/spec/parsing/fixtures/pass.item.json +1 -0
data/spec/parsing/fixtures/pass.json-org-sample1.json +23 -0
data/spec/parsing/fixtures/pass.json-org-sample2.json +11 -0
data/spec/parsing/fixtures/pass.json-org-sample3.json +26 -0
data/spec/parsing/fixtures/pass.json-org-sample4-nows.json +88 -0
data/spec/parsing/fixtures/pass.json-org-sample4.json +89 -0
data/spec/parsing/fixtures/pass.json-org-sample5.json +27 -0
data/spec/parsing/fixtures/pass.map-spain.xml.json +1 -0
data/spec/parsing/fixtures/pass.ns-invoice100.xml.json +1 -0
data/spec/parsing/fixtures/pass.ns-soap.xml.json +1 -0
data/spec/parsing/fixtures/pass.numbers-fp-4k.json +6 -0
data/spec/parsing/fixtures/pass.numbers-fp-64k.json +61 -0
data/spec/parsing/fixtures/pass.numbers-int-4k.json +11 -0
data/spec/parsing/fixtures/pass.numbers-int-64k.json +154 -0
data/spec/parsing/fixtures/pass.twitter-search.json +1 -0
data/spec/parsing/fixtures/pass.twitter-search2.json +1 -0
data/spec/parsing/fixtures/pass.unicode.json +3315 -0
data/spec/parsing/fixtures/pass.yelp.json +1 -0
data/spec/parsing/fixtures/pass1.json +56 -0
data/spec/parsing/fixtures/pass2.json +1 -0
data/spec/parsing/fixtures/pass3.json +6 -0
data/spec/parsing/fixtures_spec.rb +41 -0
data/spec/parsing/one_off_spec.rb +81 -0
data/spec/rcov.opts +3 -0
data/spec/spec.opts +2 -0
data/spec/spec_helper.rb +16 -0
data/yajl-ruby.gemspec +203 -0
metadata +232 -0

data/ext/yajl_lex.c ADDED Viewed

@@ -0,0 +1,747 @@
+/*
+ * Copyright 2010, Lloyd Hilaiel.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *
+ *  3. Neither the name of Lloyd Hilaiel nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+#include "yajl_lex.h"
+#include "yajl_buf.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include <string.h>
+#ifdef YAJL_LEXER_DEBUG
+static const char *
+tokToStr(yajl_tok tok)
+{
+    switch (tok) {
+        case yajl_tok_bool: return "bool";
+        case yajl_tok_colon: return "colon";
+        case yajl_tok_comma: return "comma";
+        case yajl_tok_eof: return "eof";
+        case yajl_tok_error: return "error";
+        case yajl_tok_left_brace: return "brace";
+        case yajl_tok_left_bracket: return "bracket";
+        case yajl_tok_null: return "null";
+        case yajl_tok_integer: return "integer";
+        case yajl_tok_double: return "double";
+        case yajl_tok_right_brace: return "brace";
+        case yajl_tok_right_bracket: return "bracket";
+        case yajl_tok_string: return "string";
+        case yajl_tok_string_with_escapes: return "string_with_escapes";
+    }
+    return "unknown";
+}
+#endif
+/* Impact of the stream parsing feature on the lexer:
+ *
+ * YAJL support stream parsing.  That is, the ability to parse the first
+ * bits of a chunk of JSON before the last bits are available (still on
+ * the network or disk).  This makes the lexer more complex.  The
+ * responsibility of the lexer is to handle transparently the case where
+ * a chunk boundary falls in the middle of a token.  This is
+ * accomplished is via a buffer and a character reading abstraction.
+ *
+ * Overview of implementation
+ *
+ * When we lex to end of input string before end of token is hit, we
+ * copy all of the input text composing the token into our lexBuf.
+ *
+ * Every time we read a character, we do so through the readChar function.
+ * readChar's responsibility is to handle pulling all chars from the buffer
+ * before pulling chars from input text
+ */
+struct yajl_lexer_t {
+    /* the overal line and char offset into the data */
+    unsigned int lineOff;
+    unsigned int charOff;
+    /* error */
+    yajl_lex_error error;
+    /* a input buffer to handle the case where a token is spread over
+     * multiple chunks */
+    yajl_buf buf;
+    /* in the case where we have data in the lexBuf, bufOff holds
+     * the current offset into the lexBuf. */
+    unsigned int bufOff;
+    /* are we using the lex buf? */
+    unsigned int bufInUse;
+    /* shall we allow comments? */
+    unsigned int allowComments;
+    /* shall we validate utf8 inside strings? */
+    unsigned int validateUTF8;
+    yajl_alloc_funcs * alloc;
+};
+#define readChar(lxr, txt, off)                      \
+    (((lxr)->bufInUse && yajl_buf_len((lxr)->buf) && lxr->bufOff < yajl_buf_len((lxr)->buf)) ? \
+     (*((const unsigned char *) yajl_buf_data((lxr)->buf) + ((lxr)->bufOff)++)) : \
+     ((txt)[(*(off))++]))
+#define unreadChar(lxr, off) ((*(off) > 0) ? (*(off))-- : ((lxr)->bufOff--))
+yajl_lexer
+yajl_lex_alloc(yajl_alloc_funcs * alloc,
+               unsigned int allowComments, unsigned int validateUTF8)
+{
+    yajl_lexer lxr = (yajl_lexer) YA_MALLOC(alloc, sizeof(struct yajl_lexer_t));
+    memset((void *) lxr, 0, sizeof(struct yajl_lexer_t));
+    lxr->buf = yajl_buf_alloc(alloc);
+    lxr->allowComments = allowComments;
+    lxr->validateUTF8 = validateUTF8;
+    lxr->alloc = alloc;
+    return lxr;
+}
+yajl_lexer
+yajl_lex_realloc(yajl_lexer orig) {
+    yajl_buf_clear(orig->buf);
+    orig->bufInUse = 0;
+    orig->bufOff = 0;
+    orig->lineOff = 0;
+    orig->lineOff = 0;
+    return orig;
+}
+void
+yajl_lex_free(yajl_lexer lxr)
+{
+    yajl_buf_free(lxr->buf);
+    YA_FREE(lxr->alloc, lxr);
+    return;
+}
+/* a lookup table which lets us quickly determine three things:
+ * VEC - valid escaped conrol char
+ * IJC - invalid json char
+ * VHC - valid hex char
+ * note.  the solidus '/' may be escaped or not.
+ * note.  the
+ */
+#define VEC 1
+#define IJC 2
+#define VHC 4
+static const char charLookupTable[256] =
+{
+/*00*/ IJC    , IJC    , IJC    , IJC    , IJC    , IJC    , IJC    , IJC    ,
+/*08*/ IJC    , IJC    , IJC    , IJC    , IJC    , IJC    , IJC    , IJC    ,
+/*10*/ IJC    , IJC    , IJC    , IJC    , IJC    , IJC    , IJC    , IJC    ,
+/*18*/ IJC    , IJC    , IJC    , IJC    , IJC    , IJC    , IJC    , IJC    ,
+/*20*/ 0      , 0      , VEC|IJC, 0      , 0      , 0      , 0      , 0      ,
+/*28*/ 0      , 0      , 0      , 0      , 0      , 0      , 0      , VEC    ,
+/*30*/ VHC    , VHC    , VHC    , VHC    , VHC    , VHC    , VHC    , VHC    ,
+/*38*/ VHC    , VHC    , 0      , 0      , 0      , 0      , 0      , 0      ,
+/*40*/ 0      , VHC    , VHC    , VHC    , VHC    , VHC    , VHC    , 0      ,
+/*48*/ 0      , 0      , 0      , 0      , 0      , 0      , 0      , 0      ,
+/*50*/ 0      , 0      , 0      , 0      , 0      , 0      , 0      , 0      ,
+/*58*/ 0      , 0      , 0      , 0      , VEC|IJC, 0      , 0      , 0      ,
+/*60*/ 0      , VHC    , VEC|VHC, VHC    , VHC    , VHC    , VEC|VHC, 0      ,
+/*68*/ 0      , 0      , 0      , 0      , 0      , 0      , VEC    , 0      ,
+/*70*/ 0      , 0      , VEC    , 0      , VEC    , 0      , 0      , 0      ,
+/*78*/ 0      , 0      , 0      , 0      , 0      , 0      , 0      , 0      ,
+/* include these so we don't have to always check the range of the char */
+       0      , 0      , 0      , 0      , 0      , 0      , 0      , 0      ,
+       0      , 0      , 0      , 0      , 0      , 0      , 0      , 0      ,
+       0      , 0      , 0      , 0      , 0      , 0      , 0      , 0      ,
+       0      , 0      , 0      , 0      , 0      , 0      , 0      , 0      ,
+       0      , 0      , 0      , 0      , 0      , 0      , 0      , 0      ,
+       0      , 0      , 0      , 0      , 0      , 0      , 0      , 0      ,
+       0      , 0      , 0      , 0      , 0      , 0      , 0      , 0      ,
+       0      , 0      , 0      , 0      , 0      , 0      , 0      , 0      ,
+       0      , 0      , 0      , 0      , 0      , 0      , 0      , 0      ,
+       0      , 0      , 0      , 0      , 0      , 0      , 0      , 0      ,
+       0      , 0      , 0      , 0      , 0      , 0      , 0      , 0      ,
+       0      , 0      , 0      , 0      , 0      , 0      , 0      , 0      ,
+       0      , 0      , 0      , 0      , 0      , 0      , 0      , 0      ,
+       0      , 0      , 0      , 0      , 0      , 0      , 0      , 0      ,
+       0      , 0      , 0      , 0      , 0      , 0      , 0      , 0      ,
+       0      , 0      , 0      , 0      , 0      , 0      , 0      , 0
+};
+/** process a variable length utf8 encoded codepoint.
+ *
+ *  returns:
+ *    yajl_tok_string - if valid utf8 char was parsed and offset was
+ *                      advanced
+ *    yajl_tok_eof - if end of input was hit before validation could
+ *                   complete
+ *    yajl_tok_error - if invalid utf8 was encountered
+ *
+ *  NOTE: on error the offset will point to the first char of the
+ *  invalid utf8 */
+#define UTF8_CHECK_EOF if (*offset >= jsonTextLen) { return yajl_tok_eof; }
+static yajl_tok
+yajl_lex_utf8_char(yajl_lexer lexer, const unsigned char * jsonText,
+                   unsigned int jsonTextLen, unsigned int * offset,
+                   unsigned char curChar)
+{
+    if (curChar <= 0x7f) {
+        /* single byte */
+        return yajl_tok_string;
+    } else if ((curChar >> 5) == 0x6) {
+        /* two byte */
+        UTF8_CHECK_EOF;
+        curChar = readChar(lexer, jsonText, offset);
+        if ((curChar >> 6) == 0x2) return yajl_tok_string;
+    } else if ((curChar >> 4) == 0x0e) {
+        /* three byte */
+        UTF8_CHECK_EOF;
+        curChar = readChar(lexer, jsonText, offset);
+        if ((curChar >> 6) == 0x2) {
+            UTF8_CHECK_EOF;
+            curChar = readChar(lexer, jsonText, offset);
+            if ((curChar >> 6) == 0x2) return yajl_tok_string;
+        }
+    } else if ((curChar >> 3) == 0x1e) {
+        /* four byte */
+        UTF8_CHECK_EOF;
+        curChar = readChar(lexer, jsonText, offset);
+        if ((curChar >> 6) == 0x2) {
+            UTF8_CHECK_EOF;
+            curChar = readChar(lexer, jsonText, offset);
+            if ((curChar >> 6) == 0x2) {
+                UTF8_CHECK_EOF;
+                curChar = readChar(lexer, jsonText, offset);
+                if ((curChar >> 6) == 0x2) return yajl_tok_string;
+            }
+        }
+    }
+    return yajl_tok_error;
+}
+/* lex a string.  input is the lexer, pointer to beginning of
+ * json text, and start of string (offset).
+ * a token is returned which has the following meanings:
+ * yajl_tok_string: lex of string was successful.  offset points to
+ *                  terminating '"'.
+ * yajl_tok_eof: end of text was encountered before we could complete
+ *               the lex.
+ * yajl_tok_error: embedded in the string were unallowable chars.  offset
+ *               points to the offending char
+ */
+#define STR_CHECK_EOF \
+if (*offset >= jsonTextLen) { \
+   tok = yajl_tok_eof; \
+   goto finish_string_lex; \
+}
+static yajl_tok
+yajl_lex_string(yajl_lexer lexer, const unsigned char * jsonText,
+                unsigned int jsonTextLen, unsigned int * offset)
+{
+    yajl_tok tok = yajl_tok_error;
+    int hasEscapes = 0;
+    for (;;) {
+		unsigned char curChar;
+		STR_CHECK_EOF;
+        curChar = readChar(lexer, jsonText, offset);
+        /* quote terminates */
+        if (curChar == '"') {
+            tok = yajl_tok_string;
+            break;
+        }
+        /* backslash escapes a set of control chars, */
+        else if (curChar == '\\') {
+            hasEscapes = 1;
+            STR_CHECK_EOF;
+            /* special case \u */
+            curChar = readChar(lexer, jsonText, offset);
+            if (curChar == 'u') {
+                unsigned int i = 0;
+                for (i=0;i<4;i++) {
+                    STR_CHECK_EOF;
+                    curChar = readChar(lexer, jsonText, offset);
+                    if (!(charLookupTable[curChar] & VHC)) {
+                        /* back up to offending char */
+                        unreadChar(lexer, offset);
+                        lexer->error = yajl_lex_string_invalid_hex_char;
+                        goto finish_string_lex;
+                    }
+                }
+            } else if (!(charLookupTable[curChar] & VEC)) {
+                /* back up to offending char */
+                unreadChar(lexer, offset);
+                lexer->error = yajl_lex_string_invalid_escaped_char;
+                goto finish_string_lex;
+            }
+        }
+        /* when not validating UTF8 it's a simple table lookup to determine
+         * if the present character is invalid */
+        else if(charLookupTable[curChar] & IJC) {
+            /* back up to offending char */
+            unreadChar(lexer, offset);
+            lexer->error = yajl_lex_string_invalid_json_char;
+            goto finish_string_lex;
+        }
+        /* when in validate UTF8 mode we need to do some extra work */
+        else if (lexer->validateUTF8) {
+            yajl_tok t = yajl_lex_utf8_char(lexer, jsonText, jsonTextLen,
+                                            offset, curChar);
+            if (t == yajl_tok_eof) {
+                tok = yajl_tok_eof;
+                goto finish_string_lex;
+            } else if (t == yajl_tok_error) {
+                lexer->error = yajl_lex_string_invalid_utf8;
+                goto finish_string_lex;
+            }
+        }
+        /* accept it, and move on */
+    }
+  finish_string_lex:
+    /* tell our buddy, the parser, wether he needs to process this string
+     * again */
+    if (hasEscapes && tok == yajl_tok_string) {
+        tok = yajl_tok_string_with_escapes;
+    }
+    return tok;
+}
+#define RETURN_IF_EOF if (*offset >= jsonTextLen) return yajl_tok_eof;
+static yajl_tok
+yajl_lex_number(yajl_lexer lexer, const unsigned char * jsonText,
+                unsigned int jsonTextLen, unsigned int * offset)
+{
+    /** XXX: numbers are the only entities in json that we must lex
+     *       _beyond_ in order to know that they are complete.  There
+     *       is an ambiguous case for integers at EOF. */
+    unsigned char c;
+    yajl_tok tok = yajl_tok_integer;
+    RETURN_IF_EOF;
+    c = readChar(lexer, jsonText, offset);
+    /* optional leading minus */
+    if (c == '-') {
+        RETURN_IF_EOF;
+        c = readChar(lexer, jsonText, offset);
+    }
+    /* a single zero, or a series of integers */
+    if (c == '0') {
+        RETURN_IF_EOF;
+        c = readChar(lexer, jsonText, offset);
+    } else if (c >= '1' && c <= '9') {
+        do {
+            RETURN_IF_EOF;
+            c = readChar(lexer, jsonText, offset);
+        } while (c >= '0' && c <= '9');
+    } else {
+        unreadChar(lexer, offset);
+        lexer->error = yajl_lex_missing_integer_after_minus;
+        return yajl_tok_error;
+    }
+    /* optional fraction (indicates this is floating point) */
+    if (c == '.') {
+        int numRd = 0;
+        RETURN_IF_EOF;
+        c = readChar(lexer, jsonText, offset);
+        while (c >= '0' && c <= '9') {
+            numRd++;
+            RETURN_IF_EOF;
+            c = readChar(lexer, jsonText, offset);
+        }
+        if (!numRd) {
+            unreadChar(lexer, offset);
+            lexer->error = yajl_lex_missing_integer_after_decimal;
+            return yajl_tok_error;
+        }
+        tok = yajl_tok_double;
+    }
+    /* optional exponent (indicates this is floating point) */
+    if (c == 'e' || c == 'E') {
+        RETURN_IF_EOF;
+        c = readChar(lexer, jsonText, offset);
+        /* optional sign */
+        if (c == '+' || c == '-') {
+            RETURN_IF_EOF;
+            c = readChar(lexer, jsonText, offset);
+        }
+        if (c >= '0' && c <= '9') {
+            do {
+                RETURN_IF_EOF;
+                c = readChar(lexer, jsonText, offset);
+            } while (c >= '0' && c <= '9');
+        } else {
+            unreadChar(lexer, offset);
+            lexer->error = yajl_lex_missing_integer_after_exponent;
+            return yajl_tok_error;
+        }
+        tok = yajl_tok_double;
+    }
+    /* we always go "one too far" */
+    unreadChar(lexer, offset);
+    return tok;
+}
+static yajl_tok
+yajl_lex_comment(yajl_lexer lexer, const unsigned char * jsonText,
+                 unsigned int jsonTextLen, unsigned int * offset)
+{
+    unsigned char c;
+    yajl_tok tok = yajl_tok_comment;
+    RETURN_IF_EOF;
+    c = readChar(lexer, jsonText, offset);
+    /* either slash or star expected */
+    if (c == '/') {
+        /* now we throw away until end of line */
+        do {
+            RETURN_IF_EOF;
+            c = readChar(lexer, jsonText, offset);
+        } while (c != '\n');
+    } else if (c == '*') {
+        /* now we throw away until end of comment */
+        for (;;) {
+            RETURN_IF_EOF;
+            c = readChar(lexer, jsonText, offset);
+            if (c == '*') {
+                RETURN_IF_EOF;
+                c = readChar(lexer, jsonText, offset);
+                if (c == '/') {
+                    break;
+                } else {
+                    unreadChar(lexer, offset);
+                }
+            }
+        }
+    } else {
+        lexer->error = yajl_lex_invalid_char;
+        tok = yajl_tok_error;
+    }
+    return tok;
+}
+yajl_tok
+yajl_lex_lex(yajl_lexer lexer, const unsigned char * jsonText,
+             unsigned int jsonTextLen, unsigned int * offset,
+             const unsigned char ** outBuf, unsigned int * outLen)
+{
+    yajl_tok tok = yajl_tok_error;
+    unsigned char c;
+    unsigned int startOffset = *offset;
+    *outBuf = NULL;
+    *outLen = 0;
+    for (;;) {
+        assert(*offset <= jsonTextLen);
+        if (*offset >= jsonTextLen) {
+            tok = yajl_tok_eof;
+            goto lexed;
+        }
+        c = readChar(lexer, jsonText, offset);
+        switch (c) {
+            case '{':
+                tok = yajl_tok_left_bracket;
+                goto lexed;
+            case '}':
+                tok = yajl_tok_right_bracket;
+                goto lexed;
+            case '[':
+                tok = yajl_tok_left_brace;
+                goto lexed;
+            case ']':
+                tok = yajl_tok_right_brace;
+                goto lexed;
+            case ',':
+                tok = yajl_tok_comma;
+                goto lexed;
+            case ':':
+                tok = yajl_tok_colon;
+                goto lexed;
+            case '\t': case '\n': case '\v': case '\f': case '\r': case ' ':
+                startOffset++;
+                break;
+            case 't': {
+                const char * want = "rue";
+                do {
+                    if (*offset >= jsonTextLen) {
+                        tok = yajl_tok_eof;
+                        goto lexed;
+                    }
+                    c = readChar(lexer, jsonText, offset);
+                    if (c != *want) {
+                        unreadChar(lexer, offset);
+                        lexer->error = yajl_lex_invalid_string;
+                        tok = yajl_tok_error;
+                        goto lexed;
+                    }
+                } while (*(++want));
+                tok = yajl_tok_bool;
+                goto lexed;
+            }
+            case 'f': {
+                const char * want = "alse";
+                do {
+                    if (*offset >= jsonTextLen) {
+                        tok = yajl_tok_eof;
+                        goto lexed;
+                    }
+                    c = readChar(lexer, jsonText, offset);
+                    if (c != *want) {
+                        unreadChar(lexer, offset);
+                        lexer->error = yajl_lex_invalid_string;
+                        tok = yajl_tok_error;
+                        goto lexed;
+                    }
+                } while (*(++want));
+                tok = yajl_tok_bool;
+                goto lexed;
+            }
+            case 'n': {
+                const char * want = "ull";
+                do {
+                    if (*offset >= jsonTextLen) {
+                        tok = yajl_tok_eof;
+                        goto lexed;
+                    }
+                    c = readChar(lexer, jsonText, offset);
+                    if (c != *want) {
+                        unreadChar(lexer, offset);
+                        lexer->error = yajl_lex_invalid_string;
+                        tok = yajl_tok_error;
+                        goto lexed;
+                    }
+                } while (*(++want));
+                tok = yajl_tok_null;
+                goto lexed;
+            }
+            case '"': {
+                tok = yajl_lex_string(lexer, (const unsigned char *) jsonText,
+                                      jsonTextLen, offset);
+                goto lexed;
+            }
+            case '-':
+            case '0': case '1': case '2': case '3': case '4':
+            case '5': case '6': case '7': case '8': case '9': {
+                /* integer parsing wants to start from the beginning */
+                unreadChar(lexer, offset);
+                tok = yajl_lex_number(lexer, (const unsigned char *) jsonText,
+                                      jsonTextLen, offset);
+                goto lexed;
+            }
+            case '/':
+                /* hey, look, a probable comment!  If comments are disabled
+                 * it's an error. */
+                if (!lexer->allowComments) {
+                    unreadChar(lexer, offset);
+                    lexer->error = yajl_lex_unallowed_comment;
+                    tok = yajl_tok_error;
+                    goto lexed;
+                }
+                /* if comments are enabled, then we should try to lex
+                 * the thing.  possible outcomes are
+                 * - successful lex (tok_comment, which means continue),
+                 * - malformed comment opening (slash not followed by
+                 *   '*' or '/') (tok_error)
+                 * - eof hit. (tok_eof) */
+                tok = yajl_lex_comment(lexer, (const unsigned char *) jsonText,
+                                       jsonTextLen, offset);
+                if (tok == yajl_tok_comment) {
+                    /* "error" is silly, but that's the initial
+                     * state of tok.  guilty until proven innocent. */
+                    tok = yajl_tok_error;
+                    yajl_buf_clear(lexer->buf);
+                    lexer->bufInUse = 0;
+                    startOffset = *offset;
+                    break;
+                }
+                /* hit error or eof, bail */
+                goto lexed;
+            default:
+                lexer->error = yajl_lex_invalid_char;
+                tok = yajl_tok_error;
+                goto lexed;
+        }
+    }
+  lexed:
+    /* need to append to buffer if the buffer is in use or
+     * if it's an EOF token */
+    if (tok == yajl_tok_eof || lexer->bufInUse) {
+        if (!lexer->bufInUse) yajl_buf_clear(lexer->buf);
+        lexer->bufInUse = 1;
+        yajl_buf_append(lexer->buf, jsonText + startOffset, *offset - startOffset);
+        lexer->bufOff = 0;
+        if (tok != yajl_tok_eof) {
+            *outBuf = yajl_buf_data(lexer->buf);
+            *outLen = yajl_buf_len(lexer->buf);
+            lexer->bufInUse = 0;
+        }
+    } else if (tok != yajl_tok_error) {
+        *outBuf = jsonText + startOffset;
+        *outLen = *offset - startOffset;
+    }
+    /* special case for strings. skip the quotes. */
+    if (tok == yajl_tok_string || tok == yajl_tok_string_with_escapes)
+    {
+        assert(*outLen >= 2);
+        (*outBuf)++;
+        *outLen -= 2;
+    }
+#ifdef YAJL_LEXER_DEBUG
+    if (tok == yajl_tok_error) {
+        printf("lexical error: %s\n",
+               yajl_lex_error_to_string(yajl_lex_get_error(lexer)));
+    } else if (tok == yajl_tok_eof) {
+        printf("EOF hit\n");
+    } else {
+        printf("lexed %s: '", tokToStr(tok));
+        fwrite(*outBuf, 1, *outLen, stdout);
+        printf("'\n");
+    }
+#endif
+    return tok;
+}
+const char *
+yajl_lex_error_to_string(yajl_lex_error error)
+{
+    switch (error) {
+        case yajl_lex_e_ok:
+            return "ok, no error";
+        case yajl_lex_string_invalid_utf8:
+            return "invalid bytes in UTF8 string.";
+        case yajl_lex_string_invalid_escaped_char:
+            return "inside a string, '\\' occurs before a character "
+                   "which it may not.";
+        case yajl_lex_string_invalid_json_char:
+            return "invalid character inside string.";
+        case yajl_lex_string_invalid_hex_char:
+            return "invalid (non-hex) character occurs after '\\u' inside "
+                   "string.";
+        case yajl_lex_invalid_char:
+            return "invalid char in json text.";
+        case yajl_lex_invalid_string:
+            return "invalid string in json text.";
+        case yajl_lex_missing_integer_after_exponent:
+            return "malformed number, a digit is required after the exponent.";
+        case yajl_lex_missing_integer_after_decimal:
+            return "malformed number, a digit is required after the "
+                   "decimal point.";
+        case yajl_lex_missing_integer_after_minus:
+            return "malformed number, a digit is required after the "
+                   "minus sign.";
+        case yajl_lex_unallowed_comment:
+            return "probable comment found in input text, comments are "
+                   "not enabled.";
+    }
+    return "unknown error code";
+}
+/** allows access to more specific information about the lexical
+ *  error when yajl_lex_lex returns yajl_tok_error. */
+yajl_lex_error
+yajl_lex_get_error(yajl_lexer lexer)
+{
+    if (lexer == NULL) return (yajl_lex_error) -1;
+    return lexer->error;
+}
+unsigned int yajl_lex_current_line(yajl_lexer lexer)
+{
+    return lexer->lineOff;
+}
+unsigned int yajl_lex_current_char(yajl_lexer lexer)
+{
+    return lexer->charOff;
+}
+yajl_tok yajl_lex_peek(yajl_lexer lexer, const unsigned char * jsonText,
+                       unsigned int jsonTextLen, unsigned int offset)
+{
+    const unsigned char * outBuf;
+    unsigned int outLen;
+    unsigned int bufLen = yajl_buf_len(lexer->buf);
+    unsigned int bufOff = lexer->bufOff;
+    unsigned int bufInUse = lexer->bufInUse;
+    yajl_tok tok;
+    tok = yajl_lex_lex(lexer, jsonText, jsonTextLen, &offset,
+                       &outBuf, &outLen);
+    lexer->bufOff = bufOff;
+    lexer->bufInUse = bufInUse;
+    yajl_buf_truncate(lexer->buf, bufLen);
+    return tok;
+}