RubyGems - mittens - Versions diffs - 0.1.0 - Mend

mittens 0.1.0

Files changed (137) hide show

checksums.yaml +7 -0
data/CHANGELOG.md +3 -0
data/Gemfile +7 -0
data/LICENSE.txt +30 -0
data/README.md +62 -0
data/Rakefile +21 -0
data/ext/mittens/ext.c +96 -0
data/ext/mittens/extconf.rb +12 -0
data/lib/mittens/version.rb +3 -0
data/lib/mittens.rb +7 -0
data/mittens.gemspec +22 -0
data/vendor/snowball/.gitignore +26 -0
data/vendor/snowball/.travis.yml +112 -0
data/vendor/snowball/AUTHORS +27 -0
data/vendor/snowball/CONTRIBUTING.rst +216 -0
data/vendor/snowball/COPYING +29 -0
data/vendor/snowball/GNUmakefile +742 -0
data/vendor/snowball/NEWS +754 -0
data/vendor/snowball/README.rst +37 -0
data/vendor/snowball/ada/README.md +74 -0
data/vendor/snowball/ada/generate/generate.adb +83 -0
data/vendor/snowball/ada/generate.gpr +21 -0
data/vendor/snowball/ada/src/stemmer.adb +620 -0
data/vendor/snowball/ada/src/stemmer.ads +219 -0
data/vendor/snowball/ada/src/stemwords.adb +70 -0
data/vendor/snowball/ada/stemmer_config.gpr +83 -0
data/vendor/snowball/ada/stemwords.gpr +21 -0
data/vendor/snowball/algorithms/arabic.sbl +558 -0
data/vendor/snowball/algorithms/armenian.sbl +301 -0
data/vendor/snowball/algorithms/basque.sbl +149 -0
data/vendor/snowball/algorithms/catalan.sbl +202 -0
data/vendor/snowball/algorithms/danish.sbl +93 -0
data/vendor/snowball/algorithms/dutch.sbl +164 -0
data/vendor/snowball/algorithms/english.sbl +229 -0
data/vendor/snowball/algorithms/finnish.sbl +197 -0
data/vendor/snowball/algorithms/french.sbl +254 -0
data/vendor/snowball/algorithms/german.sbl +139 -0
data/vendor/snowball/algorithms/german2.sbl +145 -0
data/vendor/snowball/algorithms/greek.sbl +701 -0
data/vendor/snowball/algorithms/hindi.sbl +323 -0
data/vendor/snowball/algorithms/hungarian.sbl +241 -0
data/vendor/snowball/algorithms/indonesian.sbl +192 -0
data/vendor/snowball/algorithms/irish.sbl +149 -0
data/vendor/snowball/algorithms/italian.sbl +202 -0
data/vendor/snowball/algorithms/kraaij_pohlmann.sbl +240 -0
data/vendor/snowball/algorithms/lithuanian.sbl +373 -0
data/vendor/snowball/algorithms/lovins.sbl +208 -0
data/vendor/snowball/algorithms/nepali.sbl +92 -0
data/vendor/snowball/algorithms/norwegian.sbl +80 -0
data/vendor/snowball/algorithms/porter.sbl +139 -0
data/vendor/snowball/algorithms/portuguese.sbl +218 -0
data/vendor/snowball/algorithms/romanian.sbl +236 -0
data/vendor/snowball/algorithms/russian.sbl +221 -0
data/vendor/snowball/algorithms/serbian.sbl +2379 -0
data/vendor/snowball/algorithms/spanish.sbl +230 -0
data/vendor/snowball/algorithms/swedish.sbl +72 -0
data/vendor/snowball/algorithms/tamil.sbl +405 -0
data/vendor/snowball/algorithms/turkish.sbl +470 -0
data/vendor/snowball/algorithms/yiddish.sbl +460 -0
data/vendor/snowball/charsets/ISO-8859-2.sbl +98 -0
data/vendor/snowball/charsets/KOI8-R.sbl +74 -0
data/vendor/snowball/charsets/cp850.sbl +130 -0
data/vendor/snowball/compiler/analyser.c +1547 -0
data/vendor/snowball/compiler/driver.c +615 -0
data/vendor/snowball/compiler/generator.c +1748 -0
data/vendor/snowball/compiler/generator_ada.c +1702 -0
data/vendor/snowball/compiler/generator_csharp.c +1322 -0
data/vendor/snowball/compiler/generator_go.c +1278 -0
data/vendor/snowball/compiler/generator_java.c +1313 -0
data/vendor/snowball/compiler/generator_js.c +1316 -0
data/vendor/snowball/compiler/generator_pascal.c +1387 -0
data/vendor/snowball/compiler/generator_python.c +1337 -0
data/vendor/snowball/compiler/generator_rust.c +1295 -0
data/vendor/snowball/compiler/header.h +418 -0
data/vendor/snowball/compiler/space.c +286 -0
data/vendor/snowball/compiler/syswords.h +86 -0
data/vendor/snowball/compiler/syswords2.h +13 -0
data/vendor/snowball/compiler/tokeniser.c +567 -0
data/vendor/snowball/csharp/.gitignore +8 -0
data/vendor/snowball/csharp/Snowball/Algorithms/.gitignore +1 -0
data/vendor/snowball/csharp/Snowball/Among.cs +108 -0
data/vendor/snowball/csharp/Snowball/AssemblyInfo.cs +36 -0
data/vendor/snowball/csharp/Snowball/Stemmer.cs +660 -0
data/vendor/snowball/csharp/Stemwords/App.config +6 -0
data/vendor/snowball/csharp/Stemwords/Program.cs +114 -0
data/vendor/snowball/doc/TODO +12 -0
data/vendor/snowball/doc/libstemmer_c_README +148 -0
data/vendor/snowball/doc/libstemmer_csharp_README +53 -0
data/vendor/snowball/doc/libstemmer_java_README +67 -0
data/vendor/snowball/doc/libstemmer_js_README +48 -0
data/vendor/snowball/doc/libstemmer_python_README +113 -0
data/vendor/snowball/examples/stemwords.c +204 -0
data/vendor/snowball/go/README.md +55 -0
data/vendor/snowball/go/among.go +16 -0
data/vendor/snowball/go/env.go +403 -0
data/vendor/snowball/go/stemwords/generate.go +68 -0
data/vendor/snowball/go/stemwords/main.go +68 -0
data/vendor/snowball/go/util.go +34 -0
data/vendor/snowball/iconv.py +50 -0
data/vendor/snowball/include/libstemmer.h +78 -0
data/vendor/snowball/java/org/tartarus/snowball/Among.java +29 -0
data/vendor/snowball/java/org/tartarus/snowball/SnowballProgram.java +381 -0
data/vendor/snowball/java/org/tartarus/snowball/SnowballStemmer.java +8 -0
data/vendor/snowball/java/org/tartarus/snowball/TestApp.java +75 -0
data/vendor/snowball/javascript/base-stemmer.js +294 -0
data/vendor/snowball/javascript/stemwords.js +106 -0
data/vendor/snowball/libstemmer/libstemmer_c.in +96 -0
data/vendor/snowball/libstemmer/mkalgorithms.pl +90 -0
data/vendor/snowball/libstemmer/mkmodules.pl +267 -0
data/vendor/snowball/libstemmer/modules.txt +63 -0
data/vendor/snowball/libstemmer/test.c +34 -0
data/vendor/snowball/pascal/.gitignore +4 -0
data/vendor/snowball/pascal/SnowballProgram.pas +430 -0
data/vendor/snowball/pascal/generate.pl +23 -0
data/vendor/snowball/pascal/stemwords-template.dpr +78 -0
data/vendor/snowball/python/MANIFEST.in +7 -0
data/vendor/snowball/python/create_init.py +54 -0
data/vendor/snowball/python/setup.cfg +6 -0
data/vendor/snowball/python/setup.py +81 -0
data/vendor/snowball/python/snowballstemmer/among.py +13 -0
data/vendor/snowball/python/snowballstemmer/basestemmer.py +323 -0
data/vendor/snowball/python/stemwords.py +101 -0
data/vendor/snowball/python/testapp.py +28 -0
data/vendor/snowball/runtime/api.c +58 -0
data/vendor/snowball/runtime/api.h +32 -0
data/vendor/snowball/runtime/header.h +61 -0
data/vendor/snowball/runtime/utilities.c +513 -0
data/vendor/snowball/rust/Cargo.toml +7 -0
data/vendor/snowball/rust/build.rs +55 -0
data/vendor/snowball/rust/rust-pre-1.27-compat.patch +30 -0
data/vendor/snowball/rust/src/main.rs +102 -0
data/vendor/snowball/rust/src/snowball/algorithms/mod.rs +2 -0
data/vendor/snowball/rust/src/snowball/among.rs +6 -0
data/vendor/snowball/rust/src/snowball/mod.rs +6 -0
data/vendor/snowball/rust/src/snowball/snowball_env.rs +421 -0
data/vendor/snowball/tests/stemtest.c +95 -0
metadata +178 -0

data/vendor/snowball/compiler/syswords.h ADDED Viewed

@@ -0,0 +1,86 @@
+static const struct system_word vocab[82+1] = {
+  { 0, (const byte *)"", 82+1},
+  { 1, (const byte *)"$",             c_dollar },
+  { 1, (const byte *)"(",             c_bra },
+  { 1, (const byte *)")",             c_ket },
+  { 1, (const byte *)"*",             c_multiply },
+  { 1, (const byte *)"+",             c_plus },
+  { 1, (const byte *)"-",             c_minus },
+  { 1, (const byte *)"/",             c_divide },
+  { 1, (const byte *)"<",             c_ls },
+  { 1, (const byte *)"=",             c_assign },
+  { 1, (const byte *)">",             c_gr },
+  { 1, (const byte *)"?",             c_debug },
+  { 1, (const byte *)"[",             c_leftslice },
+  { 1, (const byte *)"]",             c_rightslice },
+  { 2, (const byte *)"!=",            c_ne },
+  { 2, (const byte *)"*=",            c_multiplyassign },
+  { 2, (const byte *)"+=",            c_plusassign },
+  { 2, (const byte *)"-=",            c_minusassign },
+  { 2, (const byte *)"->",            c_sliceto },
+  { 2, (const byte *)"/*",            c_comment2 },
+  { 2, (const byte *)"//",            c_comment1 },
+  { 2, (const byte *)"/=",            c_divideassign },
+  { 2, (const byte *)"<+",            c_insert },
+  { 2, (const byte *)"<-",            c_slicefrom },
+  { 2, (const byte *)"<=",            c_le },
+  { 2, (const byte *)"==",            c_eq },
+  { 2, (const byte *)"=>",            c_assignto },
+  { 2, (const byte *)">=",            c_ge },
+  { 2, (const byte *)"as",            c_as },
+  { 2, (const byte *)"do",            c_do },
+  { 2, (const byte *)"or",            c_or },
+  { 3, (const byte *)"and",           c_and },
+  { 3, (const byte *)"for",           c_for },
+  { 3, (const byte *)"get",           c_get },
+  { 3, (const byte *)"hex",           c_hex },
+  { 3, (const byte *)"hop",           c_hop },
+  { 3, (const byte *)"len",           c_len },
+  { 3, (const byte *)"non",           c_non },
+  { 3, (const byte *)"not",           c_not },
+  { 3, (const byte *)"set",           c_set },
+  { 3, (const byte *)"try",           c_try },
+  { 4, (const byte *)"fail",          c_fail },
+  { 4, (const byte *)"goto",          c_goto },
+  { 4, (const byte *)"loop",          c_loop },
+  { 4, (const byte *)"next",          c_next },
+  { 4, (const byte *)"size",          c_size },
+  { 4, (const byte *)"test",          c_test },
+  { 4, (const byte *)"true",          c_true },
+  { 5, (const byte *)"among",         c_among },
+  { 5, (const byte *)"false",         c_false },
+  { 5, (const byte *)"lenof",         c_lenof },
+  { 5, (const byte *)"limit",         c_limit },
+  { 5, (const byte *)"unset",         c_unset },
+  { 6, (const byte *)"atmark",        c_atmark },
+  { 6, (const byte *)"attach",        c_attach },
+  { 6, (const byte *)"cursor",        c_cursor },
+  { 6, (const byte *)"define",        c_define },
+  { 6, (const byte *)"delete",        c_delete },
+  { 6, (const byte *)"gopast",        c_gopast },
+  { 6, (const byte *)"insert",        c_insert },
+  { 6, (const byte *)"maxint",        c_maxint },
+  { 6, (const byte *)"minint",        c_minint },
+  { 6, (const byte *)"repeat",        c_repeat },
+  { 6, (const byte *)"sizeof",        c_sizeof },
+  { 6, (const byte *)"tomark",        c_tomark },
+  { 7, (const byte *)"atleast",       c_atleast },
+  { 7, (const byte *)"atlimit",       c_atlimit },
+  { 7, (const byte *)"decimal",       c_decimal },
+  { 7, (const byte *)"reverse",       c_reverse },
+  { 7, (const byte *)"setmark",       c_setmark },
+  { 7, (const byte *)"strings",       c_strings },
+  { 7, (const byte *)"tolimit",       c_tolimit },
+  { 8, (const byte *)"booleans",      c_booleans },
+  { 8, (const byte *)"integers",      c_integers },
+  { 8, (const byte *)"routines",      c_routines },
+  { 8, (const byte *)"setlimit",      c_setlimit },
+  { 9, (const byte *)"backwards",     c_backwards },
+  { 9, (const byte *)"externals",     c_externals },
+  { 9, (const byte *)"groupings",     c_groupings },
+  { 9, (const byte *)"stringdef",     c_stringdef },
+  { 9, (const byte *)"substring",     c_substring },
+ { 12, (const byte *)"backwardmode",  c_backwardmode },
+ { 13, (const byte *)"stringescapes", c_stringescapes }
+};

data/vendor/snowball/compiler/syswords2.h ADDED Viewed

@@ -0,0 +1,13 @@
+    c_among = 4, c_and, c_as, c_assign, c_assignto, c_atleast,
+    c_atlimit, c_atmark, c_attach, c_backwardmode, c_backwards,
+    c_booleans, c_bra, c_comment1, c_comment2, c_cursor, c_debug,
+    c_decimal, c_define, c_delete, c_divide, c_divideassign, c_do,
+    c_dollar, c_eq, c_externals, c_fail, c_false, c_for, c_ge, c_get,
+    c_gopast, c_goto, c_gr, c_groupings, c_hex, c_hop, c_insert,
+    c_integers, c_ket, c_le, c_leftslice, c_len, c_lenof, c_limit, c_loop,
+    c_ls, c_maxint, c_minint, c_minus, c_minusassign, c_multiply,
+    c_multiplyassign, c_ne, c_next, c_non, c_not, c_or, c_plus,
+    c_plusassign, c_repeat, c_reverse, c_rightslice, c_routines,
+    c_set, c_setlimit, c_setmark, c_size, c_sizeof, c_slicefrom,
+    c_sliceto, c_stringdef, c_stringescapes, c_strings, c_substring,
+    c_test, c_tolimit, c_tomark, c_true, c_try, c_unset,

data/vendor/snowball/compiler/tokeniser.c ADDED Viewed

@@ -0,0 +1,567 @@
+#include <stdio.h>   /* stderr etc */
+#include <stdlib.h>  /* malloc free */
+#include <string.h>  /* strlen */
+#include <ctype.h>   /* isalpha etc */
+#include "header.h"
+struct system_word {
+    int s_size;      /* size of system word */
+    const byte * s;  /* pointer to the system word */
+    int code;        /* its internal code */
+};
+/* ASCII collating assumed in syswords.c */
+#include "syswords.h"
+#define INITIAL_INPUT_BUFFER_SIZE 8192
+static int hex_to_num(int ch);
+static int smaller(int a, int b) { return a < b ? a : b; }
+extern symbol * get_input(const char * filename) {
+    FILE * input = fopen(filename, "r");
+    if (input == 0) { return 0; }
+    {
+        symbol * u = create_b(INITIAL_INPUT_BUFFER_SIZE);
+        int size = 0;
+        while (true) {
+            int ch = getc(input);
+            if (ch == EOF) break;
+            if (size >= CAPACITY(u)) u = increase_capacity(u, size);
+            u[size++] = ch;
+        }
+        fclose(input);
+        SIZE(u) = size;
+        return u;
+    }
+}
+static void error(struct tokeniser * t, const char * s1, int n, symbol * p, const char * s2) {
+    if (t->error_count == 20) { fprintf(stderr, "... etc\n"); exit(1); }
+    fprintf(stderr, "%s:%d: ", t->file, t->line_number);
+    if (s1) fprintf(stderr, "%s", s1);
+    if (p) {
+        int i;
+        for (i = 0; i < n; i++) fprintf(stderr, "%c", p[i]);
+    }
+    if (s2) fprintf(stderr, "%s", s2);
+    fprintf(stderr, "\n");
+    t->error_count++;
+}
+static void error1(struct tokeniser * t, const char * s) {
+    error(t, s, 0,0, 0);
+}
+static void error2(struct tokeniser * t, const char * s) {
+    error(t, "unexpected end of text after ", 0,0, s);
+}
+static int compare_words(int m, symbol * p, int n, const byte * q) {
+    if (m != n) return m - n;
+    {
+        int i; for (i = 0; i < n; i++) {
+            int diff = p[i] - q[i];
+            if (diff) return diff;
+        }
+    }
+    return 0;
+}
+static int find_word(int n, symbol * p) {
+    int i = 0; int j = vocab->code;
+    do {
+        int k = i + (j - i)/2;
+        const struct system_word * w = vocab + k;
+        int diff = compare_words(n, p, w->s_size, w->s);
+        if (diff == 0) return w->code;
+        if (diff < 0) j = k; else i = k;
+    } while (j - i != 1);
+    return -1;
+}
+static int get_number(int n, symbol * p) {
+    int x = 0;
+    int i; for (i = 0; i < n; i++) x = 10*x + p[i] - '0';
+    return x;
+}
+static int eq_s(struct tokeniser * t, const char * s) {
+    int l = strlen(s);
+    if (SIZE(t->p) - t->c < l) return false;
+    {
+        int i;
+        for (i = 0; i < l; i++) if (t->p[t->c + i] != s[i]) return false;
+    }
+    t->c += l; return true;
+}
+static int white_space(struct tokeniser * t, int ch) {
+    switch (ch) {
+        case '\n':
+            t->line_number++;
+            /* fall through */
+        case '\r':
+        case '\t':
+        case ' ':
+            return true;
+    }
+    return false;
+}
+static symbol * find_in_m(struct tokeniser * t, int n, symbol * p) {
+    struct m_pair * q;
+    for (q = t->m_pairs; q; q = q->next) {
+        symbol * name = q->name;
+        if (n == SIZE(name) && memcmp(name, p, n * sizeof(symbol)) == 0) return q->value;
+    }
+    return 0;
+}
+static int read_literal_string(struct tokeniser * t, int c) {
+    symbol * p = t->p;
+    int ch;
+    SIZE(t->b) = 0;
+    while (true) {
+        if (c >= SIZE(p)) { error2(t, "'"); return c; }
+        ch = p[c];
+        if (ch == '\n') { error1(t, "string not terminated"); return c; }
+        c++;
+        if (ch == t->m_start) {
+            /* Inside insert characters. */
+            int c0 = c;
+            int newlines = false; /* no newlines as yet */
+            int black_found = false; /* no printing chars as yet */
+            while (true) {
+                if (c >= SIZE(p)) { error2(t, "'"); return c; }
+                ch = p[c]; c++;
+                if (ch == t->m_end) break;
+                if (!white_space(t, ch)) black_found = true;
+                if (ch == '\n') newlines = true;
+                if (newlines && black_found) {
+                    error1(t, "string not terminated");
+                    return c;
+                }
+            }
+            if (!newlines) {
+                int n = c - c0 - 1;    /* macro size */
+                int firstch = p[c0];
+                symbol * q = find_in_m(t, n, p + c0);
+                if (q == 0) {
+                    if (n == 1 && (firstch == '\'' || firstch == t->m_start))
+                        t->b = add_to_b(t->b, 1, p + c0);
+                    else if (n >= 3 && firstch == 'U' && p[c0 + 1] == '+') {
+                        int codepoint = 0;
+                        int x;
+                        if (t->uplusmode == UPLUS_DEFINED) {
+                            /* See if found with xxxx upper-cased. */
+                            symbol * uc = create_b(n);
+                            int i;
+                            for (i = 0; i != n; ++i) {
+                                uc[i] = toupper(p[c0 + i]);
+                            }
+                            q = find_in_m(t, n, uc);
+                            lose_b(uc);
+                            if (q != 0) {
+                                t->b = add_to_b(t->b, SIZE(q), q);
+                                continue;
+                            }
+                            error1(t, "Some U+xxxx stringdefs seen but not this one");
+                        } else {
+                            t->uplusmode = UPLUS_UNICODE;
+                        }
+                        for (x = c0 + 2; x != c - 1; ++x) {
+                            int hex = hex_to_num(p[x]);
+                            if (hex < 0) {
+                                error1(t, "Bad hex digit following U+");
+                                break;
+                            }
+                            codepoint = (codepoint << 4) | hex;
+                        }
+                        if (t->encoding == ENC_UTF8) {
+                            if (codepoint < 0 || codepoint > 0x01ffff) {
+                                error1(t, "character values exceed 0x01ffff");
+                            }
+                            /* Ensure there's enough space for a max length
+                             * UTF-8 sequence. */
+                            if (CAPACITY(t->b) < SIZE(t->b) + 3) {
+                                t->b = increase_capacity(t->b, 3);
+                            }
+                            SIZE(t->b) += put_utf8(codepoint, t->b + SIZE(t->b));
+                        } else {
+                            symbol sym;
+                            if (t->encoding == ENC_SINGLEBYTE) {
+                                /* Only ISO-8859-1 is handled this way - for
+                                 * other single-byte character sets you need
+                                 * stringdef all the U+xxxx codes you use
+                                 * like - e.g.:
+                                 *
+                                 * stringdef U+0171   hex 'FB'
+                                 */
+                                if (codepoint < 0 || codepoint > 0xff) {
+                                    error1(t, "character values exceed 256");
+                                }
+                            } else {
+                                if (codepoint < 0 || codepoint > 0xffff) {
+                                    error1(t, "character values exceed 64K");
+                                }
+                            }
+                            sym = codepoint;
+                            t->b = add_to_b(t->b, 1, &sym);
+                        }
+                    } else
+                        error(t, "string macro '", n, p + c0, "' undeclared");
+                } else
+                    t->b = add_to_b(t->b, SIZE(q), q);
+            }
+        } else {
+            if (ch == '\'') return c;
+            if (ch < 0 || ch >= 0x80) {
+                if (t->encoding != ENC_WIDECHARS) {
+                    /* We don't really want people using non-ASCII literal
+                     * strings, but historically it's worked for single-byte
+                     * and UTF-8 if the source encoding matches what the
+                     * generated stemmer works in and it seems unfair to just
+                     * suddenly make this a hard error.`
+                     */
+                    fprintf(stderr,
+                            "%s:%d: warning: Non-ASCII literal strings aren't "
+                            "portable - use stringdef instead\n",
+                            t->file, t->line_number);
+                } else {
+                    error1(t, "Non-ASCII literal strings aren't "
+                              "portable - use stringdef instead");
+                }
+            }
+            t->b = add_to_b(t->b, 1, p + c - 1);
+        }
+    }
+}
+static int next_token(struct tokeniser * t) {
+    symbol * p = t->p;
+    int c = t->c;
+    int ch;
+    int code = -1;
+    while (true) {
+        if (c >= SIZE(p)) { t->c = c; return -1; }
+        ch = p[c];
+        if (white_space(t, ch)) { c++; continue; }
+        if (isalpha(ch)) {
+            int c0 = c;
+            while (c < SIZE(p) && (isalnum(p[c]) || p[c] == '_')) c++;
+            code = find_word(c - c0, p + c0);
+            if (code < 0 || t->token_disabled[code]) {
+                t->b = move_to_b(t->b, c - c0, p + c0);
+                code = c_name;
+            }
+        } else
+        if (isdigit(ch)) {
+            int c0 = c;
+            while (c < SIZE(p) && isdigit(p[c])) c++;
+            t->number = get_number(c - c0, p + c0);
+            code = c_number;
+        } else
+        if (ch == '\'') {
+            c = read_literal_string(t, c + 1);
+            code = c_literalstring;
+        } else
+        {
+            int lim = smaller(2, SIZE(p) - c);
+            int i;
+            for (i = lim; i > 0; i--) {
+                code = find_word(i, p + c);
+                if (code >= 0) { c += i; break; }
+            }
+        }
+        if (code >= 0) {
+            t->c = c;
+            return code;
+        }
+        error(t, "'", 1, p + c, "' unknown");
+        c++;
+        continue;
+    }
+}
+static int next_char(struct tokeniser * t) {
+    if (t->c >= SIZE(t->p)) return -1;
+    return t->p[t->c++];
+}
+static int next_real_char(struct tokeniser * t) {
+    while (true) {
+        int ch = next_char(t);
+        if (!white_space(t, ch)) return ch;
+    }
+}
+static void read_chars(struct tokeniser * t) {
+    int ch = next_real_char(t);
+    if (ch < 0) { error2(t, "stringdef"); return; }
+    {
+        int c0 = t->c-1;
+        while (true) {
+            ch = next_char(t);
+            if (white_space(t, ch) || ch < 0) break;
+        }
+        t->b2 = move_to_b(t->b2, t->c - c0 - 1, t->p + c0);
+    }
+}
+static int decimal_to_num(int ch) {
+    if ('0' <= ch && ch <= '9') return ch - '0';
+    return -1;
+}
+static int hex_to_num(int ch) {
+    if ('0' <= ch && ch <= '9') return ch - '0';
+    if ('a' <= ch && ch <= 'f') return ch - 'a' + 10;
+    if ('A' <= ch && ch <= 'F') return ch - 'A' + 10;
+    return -1;
+}
+static void convert_numeric_string(struct tokeniser * t, symbol * p, int base) {
+    int c = 0; int d = 0;
+    while (true) {
+        while (c < SIZE(p) && p[c] == ' ') c++;
+        if (c == SIZE(p)) break;
+        {
+            int number = 0;
+            while (c != SIZE(p)) {
+                int ch = p[c];
+                if (ch == ' ') break;
+                if (base == 10) {
+                    ch = decimal_to_num(ch);
+                    if (ch < 0) {
+                        error1(t, "decimal string contains non-digits");
+                        return;
+                    }
+                } else {
+                    ch = hex_to_num(ch);
+                    if (ch < 0) {
+                        error1(t, "hex string contains non-hex characters");
+                        return;
+                    }
+                }
+                number = base * number + ch;
+                c++;
+            }
+            if (t->encoding == ENC_SINGLEBYTE) {
+                if (number < 0 || number > 0xff) {
+                    error1(t, "character values exceed 256");
+                    return;
+                }
+            } else {
+                if (number < 0 || number > 0xffff) {
+                    error1(t, "character values exceed 64K");
+                    return;
+                }
+            }
+            if (t->encoding == ENC_UTF8)
+                d += put_utf8(number, p + d);
+            else
+                p[d++] = number;
+        }
+    }
+    SIZE(p) = d;
+}
+extern int read_token(struct tokeniser * t) {
+    symbol * p = t->p;
+    int held = t->token_held;
+    t->token_held = false;
+    if (held) return t->token;
+    while (true) {
+        int code = next_token(t);
+        switch (code) {
+            case c_comment1: /*  slash-slash comment */
+                while (t->c < SIZE(p) && p[t->c] != '\n') t->c++;
+                continue;
+            case c_comment2: /* slash-star comment */
+                while (true) {
+                    if (t->c >= SIZE(p)) {
+                        error1(t, "/* comment not terminated");
+                        t->token = -1;
+                        return -1;
+                    }
+                    if (p[t->c] == '\n') t->line_number++;
+                    if (eq_s(t, "*/")) break;
+                    t->c++;
+                }
+                continue;
+            case c_stringescapes: {
+                int ch1 = next_real_char(t);
+                int ch2 = next_real_char(t);
+                if (ch2 < 0) {
+                    error2(t, "stringescapes");
+                    continue;
+                }
+                if (ch1 == '\'') {
+                    error1(t, "first stringescape cannot be '");
+                    continue;
+                }
+                t->m_start = ch1;
+                t->m_end = ch2;
+                continue;
+            }
+            case c_stringdef: {
+                int base = 0;
+                read_chars(t);
+                code = read_token(t);
+                if (code == c_hex) { base = 16; code = read_token(t); } else
+                if (code == c_decimal) { base = 10; code = read_token(t); }
+                if (code != c_literalstring) {
+                    error1(t, "string omitted after stringdef");
+                    continue;
+                }
+                if (base > 0) convert_numeric_string(t, t->b, base);
+                {   NEW(m_pair, q);
+                    q->next = t->m_pairs;
+                    q->name = copy_b(t->b2);
+                    q->value = copy_b(t->b);
+                    t->m_pairs = q;
+                    if (t->uplusmode != UPLUS_DEFINED &&
+                        (SIZE(t->b2) >= 3 && t->b2[0] == 'U' && t->b2[1] == '+')) {
+                        if (t->uplusmode == UPLUS_UNICODE) {
+                            error1(t, "U+xxxx already used with implicit meaning");
+                        } else {
+                            t->uplusmode = UPLUS_DEFINED;
+                        }
+                    }
+                }
+                continue;
+            }
+            case c_get:
+                code = read_token(t);
+                if (code != c_literalstring) {
+                    error1(t, "string omitted after get"); continue;
+                }
+                t->get_depth++;
+                if (t->get_depth > 10) {
+                    error1(t, "get directives go 10 deep. Looping?");
+                    exit(1);
+                }
+                {
+                    NEW(input, q);
+                    char * file = b_to_s(t->b);
+                    symbol * u = get_input(file);
+                    if (u == 0) {
+                        struct include * r;
+                        for (r = t->includes; r; r = r->next) {
+                            symbol * b = copy_b(r->b);
+                            b = add_to_b(b, SIZE(t->b), t->b);
+                            free(file);
+                            file = b_to_s(b);
+                            u = get_input(file);
+                            lose_b(b);
+                            if (u != 0) break;
+                        }
+                    }
+                    if (u == 0) {
+                        error(t, "Can't get '", SIZE(t->b), t->b, "'");
+                        exit(1);
+                    }
+                    memmove(q, t, sizeof(struct input));
+                    t->next = q;
+                    t->p = u;
+                    t->c = 0;
+                    t->file = file;
+                    t->file_needs_freeing = true;
+                    t->line_number = 1;
+                }
+                p = t->p;
+                continue;
+            case -1:
+                if (t->next) {
+                    lose_b(p);
+                    {
+                        struct input * q = t->next;
+                        memmove(t, q, sizeof(struct input)); p = t->p;
+                        FREE(q);
+                    }
+                    t->get_depth--;
+                    continue;
+                }
+                /* fall through */
+            default:
+                t->previous_token = t->token;
+                t->token = code;
+                return code;
+        }
+    }
+}
+extern const char * name_of_token(int code) {
+    int i;
+    for (i = 1; i < vocab->code; i++)
+        if ((vocab + i)->code == code) return (const char *)(vocab + i)->s;
+    switch (code) {
+        case c_mathassign:   return "=";
+        case c_name:         return "name";
+        case c_number:       return "number";
+        case c_literalstring:return "literal";
+        case c_neg:          return "neg";
+        case c_grouping:     return "grouping";
+        case c_call:         return "call";
+        case c_booltest:     return "Boolean test";
+        case -2:             return "start of text";
+        case -1:             return "end of text";
+        default:             return "?";
+    }
+}
+extern void disable_token(struct tokeniser * t, int code) {
+    t->token_disabled[code] = 1;
+}
+extern struct tokeniser * create_tokeniser(symbol * p, char * file) {
+    NEW(tokeniser, t);
+    t->next = 0;
+    t->p = p;
+    t->c = 0;
+    t->file = file;
+    t->file_needs_freeing = false;
+    t->line_number = 1;
+    t->b = create_b(0);
+    t->b2 = create_b(0);
+    t->m_start = -1;
+    t->m_pairs = 0;
+    t->get_depth = 0;
+    t->error_count = 0;
+    t->token_held = false;
+    t->token = -2;
+    t->previous_token = -2;
+    t->uplusmode = UPLUS_NONE;
+    memset(t->token_disabled, 0, sizeof(t->token_disabled));
+    return t;
+}
+extern void close_tokeniser(struct tokeniser * t) {
+    lose_b(t->b);
+    lose_b(t->b2);
+    {
+        struct m_pair * q = t->m_pairs;
+        while (q) {
+            struct m_pair * q_next = q->next;
+            lose_b(q->name);
+            lose_b(q->value);
+            FREE(q);
+            q = q_next;
+        }
+    }
+    {
+        struct input * q = t->next;
+        while (q) {
+            struct input * q_next = q->next;
+            FREE(q);
+            q = q_next;
+        }
+    }
+    if (t->file_needs_freeing) free(t->file);
+    FREE(t);
+}

data/vendor/snowball/csharp/.gitignore ADDED Viewed

@@ -0,0 +1,8 @@
+*.o
+*.suo
+*.user
+*.GhostDoc.xml
+bin/
+obj/
+TestResults/
+TestResult.xml

data/vendor/snowball/csharp/Snowball/Algorithms/.gitignore ADDED Viewed

	@@ -0,0 +1 @@
1	+ *.generated.cs