RubyGems - mittens - Versions diffs - 0.2.0 → 0.3.0 - Mend

mittens 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (98) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +4 -0
data/README.md +3 -3
data/lib/mittens/version.rb +1 -1
data/vendor/snowball/.github/workflows/ci.yml +216 -0
data/vendor/snowball/CONTRIBUTING.rst +111 -62
data/vendor/snowball/GNUmakefile +194 -136
data/vendor/snowball/NEWS +798 -3
data/vendor/snowball/README.rst +50 -1
data/vendor/snowball/ada/src/stemmer.adb +25 -13
data/vendor/snowball/ada/src/stemmer.ads +9 -9
data/vendor/snowball/ada/stemmer_config.gpr +7 -7
data/vendor/snowball/algorithms/basque.sbl +4 -19
data/vendor/snowball/algorithms/catalan.sbl +2 -9
data/vendor/snowball/algorithms/danish.sbl +1 -1
data/vendor/snowball/algorithms/dutch.sbl +284 -122
data/vendor/snowball/algorithms/dutch_porter.sbl +178 -0
data/vendor/snowball/algorithms/english.sbl +52 -37
data/vendor/snowball/algorithms/esperanto.sbl +157 -0
data/vendor/snowball/algorithms/estonian.sbl +269 -0
data/vendor/snowball/algorithms/finnish.sbl +2 -3
data/vendor/snowball/algorithms/french.sbl +42 -16
data/vendor/snowball/algorithms/german.sbl +35 -14
data/vendor/snowball/algorithms/greek.sbl +76 -76
data/vendor/snowball/algorithms/hungarian.sbl +8 -6
data/vendor/snowball/algorithms/indonesian.sbl +14 -8
data/vendor/snowball/algorithms/italian.sbl +11 -21
data/vendor/snowball/algorithms/lithuanian.sbl +36 -37
data/vendor/snowball/algorithms/lovins.sbl +0 -1
data/vendor/snowball/algorithms/nepali.sbl +138 -37
data/vendor/snowball/algorithms/norwegian.sbl +19 -5
data/vendor/snowball/algorithms/porter.sbl +2 -2
data/vendor/snowball/algorithms/portuguese.sbl +9 -13
data/vendor/snowball/algorithms/romanian.sbl +17 -4
data/vendor/snowball/algorithms/serbian.sbl +467 -468
data/vendor/snowball/algorithms/spanish.sbl +5 -7
data/vendor/snowball/algorithms/swedish.sbl +60 -6
data/vendor/snowball/algorithms/tamil.sbl +207 -176
data/vendor/snowball/algorithms/turkish.sbl +461 -445
data/vendor/snowball/algorithms/yiddish.sbl +36 -38
data/vendor/snowball/compiler/analyser.c +445 -192
data/vendor/snowball/compiler/driver.c +109 -101
data/vendor/snowball/compiler/generator.c +853 -464
data/vendor/snowball/compiler/generator_ada.c +404 -366
data/vendor/snowball/compiler/generator_csharp.c +297 -260
data/vendor/snowball/compiler/generator_go.c +323 -254
data/vendor/snowball/compiler/generator_java.c +326 -252
data/vendor/snowball/compiler/generator_js.c +362 -252
data/vendor/snowball/compiler/generator_pascal.c +349 -197
data/vendor/snowball/compiler/generator_python.c +257 -240
data/vendor/snowball/compiler/generator_rust.c +423 -251
data/vendor/snowball/compiler/header.h +117 -71
data/vendor/snowball/compiler/space.c +137 -68
data/vendor/snowball/compiler/syswords.h +2 -2
data/vendor/snowball/compiler/tokeniser.c +125 -107
data/vendor/snowball/csharp/Snowball/Among.cs +14 -14
data/vendor/snowball/csharp/Snowball/AssemblyInfo.cs +7 -7
data/vendor/snowball/csharp/Snowball/Stemmer.cs +57 -37
data/vendor/snowball/csharp/Stemwords/App.config +2 -2
data/vendor/snowball/csharp/Stemwords/Program.cs +16 -12
data/vendor/snowball/doc/libstemmer_c_README +7 -4
data/vendor/snowball/doc/libstemmer_csharp_README +4 -1
data/vendor/snowball/doc/libstemmer_java_README +12 -1
data/vendor/snowball/doc/libstemmer_js_README +6 -4
data/vendor/snowball/doc/libstemmer_python_README +9 -4
data/vendor/snowball/examples/stemwords.c +12 -12
data/vendor/snowball/go/env.go +107 -31
data/vendor/snowball/go/util.go +0 -4
data/vendor/snowball/include/libstemmer.h +4 -0
data/vendor/snowball/java/org/tartarus/snowball/Among.java +32 -15
data/vendor/snowball/java/org/tartarus/snowball/SnowballProgram.java +347 -261
data/vendor/snowball/java/org/tartarus/snowball/SnowballStemmer.java +3 -0
data/vendor/snowball/java/org/tartarus/snowball/TestApp.java +52 -37
data/vendor/snowball/javascript/base-stemmer.js +186 -2
data/vendor/snowball/javascript/stemwords.js +3 -6
data/vendor/snowball/libstemmer/libstemmer_c.in +1 -1
data/vendor/snowball/libstemmer/mkalgorithms.pl +6 -6
data/vendor/snowball/libstemmer/mkmodules.pl +2 -2
data/vendor/snowball/libstemmer/modules.txt +13 -10
data/vendor/snowball/libstemmer/test.c +1 -1
data/vendor/snowball/pascal/SnowballProgram.pas +84 -2
data/vendor/snowball/pascal/generate.pl +13 -13
data/vendor/snowball/python/create_init.py +4 -1
data/vendor/snowball/python/setup.cfg +0 -3
data/vendor/snowball/python/setup.py +8 -3
data/vendor/snowball/python/snowballstemmer/basestemmer.py +20 -54
data/vendor/snowball/python/stemwords.py +8 -12
data/vendor/snowball/runtime/api.c +10 -5
data/vendor/snowball/runtime/header.h +10 -9
data/vendor/snowball/runtime/utilities.c +9 -9
data/vendor/snowball/rust/build.rs +1 -1
data/vendor/snowball/rust/src/snowball/snowball_env.rs +83 -5
data/vendor/snowball/tests/stemtest.c +7 -4
metadata +7 -7
data/vendor/snowball/.travis.yml +0 -112
data/vendor/snowball/algorithms/german2.sbl +0 -145
data/vendor/snowball/algorithms/kraaij_pohlmann.sbl +0 -240
data/vendor/snowball/compiler/syswords2.h +0 -13

data/vendor/snowball/compiler/analyser.c CHANGED Viewed

@@ -1,4 +1,5 @@
+#include <assert.h>
+#include <limits.h>  /* for INT_MAX */
 #include <stdio.h>   /* printf etc */
 #include <stdlib.h>  /* exit */
 #include <string.h>  /* memmove */
@@ -17,7 +18,7 @@ typedef enum {
     e_empty_among = 18,
     e_adjacent_bracketed_in_among = 19,
     e_substring_preceded_by_substring = 20,
-    /* For codes below here, tokeniser->b is printed before the error. */
+    /* For codes below here, tokeniser->s is printed before the error. */
     e_redeclared = 30,
     e_undeclared = 31,
     e_declared_as_different_mode = 32,
@@ -36,38 +37,38 @@ static struct node * C_style(struct analyser * a, const char * s, int token);
 static void print_node_(struct node * p, int n, const char * s) {
-    int i;
-    for (i = 0; i < n; i++) fputs(i == n - 1 ? s : "  ", stdout);
-    printf("%s ", name_of_token(p->type));
-    if (p->name) report_b(stdout, p->name->b);
+    printf("%*s%s", n * 2, s, name_of_token(p->type));
+    if (p->name) {
+        putchar(' ');
+        report_s(stdout, p->name->s);
+    }
     if (p->literalstring) {
-        printf("'");
+        printf(" '");
         report_b(stdout, p->literalstring);
         printf("'");
     } else if (p->type == c_number) {
-        printf("%d", p->number);
+        printf(" %d", p->number);
     }
     printf("\n");
     if (p->AE) print_node_(p->AE, n+1, "# ");
-    if (p->left) print_node_(p->left, n+1, "  ");
+    if (p->left) print_node_(p->left, n+1, "");
     if (p->aux) print_node_(p->aux, n+1, "@ ");
-    if (p->right) print_node_(p->right, n, "  ");
+    if (p->right) print_node_(p->right, n, "");
 }
 extern void print_program(struct analyser * a) {
-    print_node_(a->program, 0, "  ");
+    print_node_(a->program, 0, "");
 }
 static struct node * new_node(struct analyser * a, int type) {
     NEW(node, p);
     p->next = a->nodes; a->nodes = p;
-    p->left = 0;
-    p->right = 0;
-    p->aux = 0;
-    p->AE = 0;
-    p->name = 0;
-    p->literalstring = 0;
+    p->left = NULL;
+    p->right = NULL;
+    p->aux = NULL;
+    p->AE = NULL;
+    p->name = NULL;
+    p->literalstring = NULL;
     p->mode = a->mode;
     p->line_number = a->tokeniser->line_number;
     p->type = type;
@@ -78,7 +79,6 @@ static const char * name_of_mode(int n) {
     switch (n) {
         case m_backward: return "string backward";
         case m_forward:  return "string forward";
-    /*  case m_integer:  return "integer";  */
     }
     fprintf(stderr, "Invalid mode %d in name_of_mode()\n", n);
     exit(1);
@@ -86,6 +86,7 @@ static const char * name_of_mode(int n) {
 static const char * name_of_type(int n) {
     switch (n) {
+        case 'b': return "boolean";
         case 's': return "string";
         case 'i': return "integer";
         case 'r': return "routine";
@@ -117,9 +118,14 @@ static void count_error(struct analyser * a) {
 static void error2(struct analyser * a, error_code n, int x) {
     struct tokeniser * t = a->tokeniser;
+    if (n == e_unexpected_token && t->token_reported_as_unexpected) {
+        // Avoid duplicate errors if this token was already reported as
+        // unexpected and then held.
+        return;
+    }
     count_error(a);
     fprintf(stderr, "%s:%d: ", t->file, t->line_number);
-    if ((int)n >= (int)e_redeclared) report_b(stderr, t->b);
+    if ((int)n >= (int)e_redeclared) report_s(stderr, t->s);
     switch (n) {
         case e_token_omitted:
             fprintf(stderr, "%s omitted", name_of_token(t->omission)); break;
@@ -127,12 +133,14 @@ static void error2(struct analyser * a, error_code n, int x) {
             fprintf(stderr, "in among(...), ");
             /* fall through */
         case e_unexpected_token:
+            t->token_reported_as_unexpected = true;
             fprintf(stderr, "unexpected %s", name_of_token(t->token));
             if (t->token == c_number) fprintf(stderr, " %d", t->number);
             if (t->token == c_name) {
-                fprintf(stderr, " ");
-                report_b(stderr, t->b);
-            } break;
+                t->s[SIZE(t->s)] = 0;
+                fprintf(stderr, " %s", t->s);
+            }
+            break;
         case e_string_omitted:
             fprintf(stderr, "string omitted"); break;
@@ -179,9 +187,8 @@ static void error(struct analyser * a, error_code n) { error2(a, n, 0); }
 static void error4(struct analyser * a, struct name * q) {
     count_error(a);
-    fprintf(stderr, "%s:%d: ", a->tokeniser->file, q->used->line_number);
-    report_b(stderr, q->b);
-    fprintf(stderr, " undefined\n");
+    q->s[SIZE(q->s)] = 0;
+    fprintf(stderr, "%s:%d: %s undefined\n", a->tokeniser->file, q->used->line_number, q->s);
 }
 static void omission_error(struct analyser * a, int n) {
@@ -198,35 +205,33 @@ static int check_token(struct analyser * a, int code) {
 static int get_token(struct analyser * a, int code) {
     struct tokeniser * t = a->tokeniser;
     read_token(t);
-    {
-        int x = check_token(a, code);
-        if (!x) t->token_held = true;
-        return x;
-    }
+    int x = check_token(a, code);
+    if (!x) hold_token(t);
+    return x;
 }
 static struct name * look_for_name(struct analyser * a) {
-    symbol * q = a->tokeniser->b;
+    const byte * q = a->tokeniser->s;
     struct name * p;
     for (p = a->names; p; p = p->next) {
-        symbol * b = p->b;
+        byte * b = p->s;
         int n = SIZE(b);
-        if (n == SIZE(q) && memcmp(q, b, n * sizeof(symbol)) == 0) {
+        if (n == SIZE(q) && memcmp(q, b, n) == 0) {
             p->referenced = true;
             return p;
         }
     }
-    return 0;
+    return NULL;
 }
 static struct name * find_name(struct analyser * a) {
     struct name * p = look_for_name(a);
-    if (p == 0) error(a, e_undeclared);
+    if (p == NULL) error(a, e_undeclared);
     return p;
 }
 static void check_routine_mode(struct analyser * a, struct name * p, int mode) {
-    if (p->mode < 0) p->mode = mode; else
+    if (p->mode == m_unknown) p->mode = mode; else
     if (p->mode != mode) error2(a, e_misused, mode);
 }
@@ -265,10 +270,8 @@ static void read_names(struct analyser * a, int type) {
                  * its special meaning, for compatibility with older versions
                  * of snowball.
                  */
-                static const symbol c_len_lit[] = {
-                    'l', 'e', 'n'
-                };
-                t->b = MOVE_TO_B(t->b, c_len_lit);
+                SIZE(t->s) = 0;
+                t->s = add_literal_to_s(t->s, "len");
                 goto handle_as_name;
             }
             case c_lenof: {
@@ -276,31 +279,29 @@ static void read_names(struct analyser * a, int type) {
                  * its special meaning, for compatibility with older versions
                  * of snowball.
                  */
-                static const symbol c_lenof_lit[] = {
-                    'l', 'e', 'n', 'o', 'f'
-                };
-                t->b = MOVE_TO_B(t->b, c_lenof_lit);
+                SIZE(t->s) = 0;
+                t->s = add_literal_to_s(t->s, "lenof");
                 goto handle_as_name;
             }
             case c_name:
 handle_as_name:
-                if (look_for_name(a) != 0) error(a, e_redeclared); else {
+                if (look_for_name(a) != NULL) error(a, e_redeclared); else {
                     NEW(name, p);
-                    p->b = copy_b(t->b);
+                    p->s = copy_s(t->s);
                     p->type = type;
-                    p->mode = -1; /* routines, externals */
+                    p->mode = m_unknown; /* used for routines, externals */
                     /* We defer assigning counts until after we've eliminated
                      * variables whose values are never used. */
                     p->count = -1;
                     p->referenced = false;
                     p->used_in_among = false;
-                    p->used = 0;
+                    p->used = NULL;
                     p->value_used = false;
                     p->initialised = false;
                     p->used_in_definition = false;
-                    p->local_to = 0;
-                    p->grouping = 0;
-                    p->definition = 0;
+                    p->local_to = NULL;
+                    p->grouping = NULL;
+                    p->definition = NULL;
                     p->declaration_line_number = t->line_number;
                     p->next = a->names;
                     a->names = p;
@@ -310,7 +311,7 @@ handle_as_name:
                 }
                 break;
             default:
-                if (!check_token(a, c_ket)) t->token_held = true;
+                if (!check_token(a, c_ket)) hold_token(t);
                 return;
         }
     }
@@ -325,7 +326,6 @@ static symbol * new_literalstring(struct analyser * a) {
 }
 static int read_AE_test(struct analyser * a) {
     struct tokeniser * t = a->tokeniser;
     switch (read_token(t)) {
         case c_assign: return c_mathassign;
@@ -335,11 +335,14 @@ static int read_AE_test(struct analyser * a) {
         case c_divideassign:
         case c_eq:
         case c_ne:
-        case c_gr:
+        case c_gt:
         case c_ge:
-        case c_ls:
+        case c_lt:
         case c_le: return t->token;
-        default: error(a, e_unexpected_token); t->token_held = true; return c_eq;
+        default:
+            error(a, e_unexpected_token);
+            hold_token(t);
+            return c_eq;
     }
 }
@@ -422,12 +425,16 @@ static struct node * read_AE(struct analyser * a, struct name * assigned_to, int
         case c_number:
             p = new_node(a, c_number);
             p->number = t->number;
+            p->fixed_constant = true;
             break;
         case c_lenof:
         case c_sizeof: {
             int token = t->token;
             p = C_style(a, "S", token);
-            if (!p->literalstring) break;
+            if (!p->literalstring) {
+                if (p->name) p->name->value_used = true;
+                break;
+            }
             /* Replace lenof or sizeof on a literal string with a numeric
              * constant.
@@ -449,18 +456,19 @@ static struct node * read_AE(struct analyser * a, struct name * assigned_to, int
             p->type = c_number;
             p->literalstring = NULL;
             p->number = result;
+            p->fixed_constant = (token == c_lenof);
             break;
         }
         default:
             error(a, e_unexpected_token);
-            t->token_held = true;
-            return 0;
+            hold_token(t);
+            return NULL;
     }
     while (true) {
         int token = read_token(t);
         int b = binding(token);
         if (binding(token) <= B) {
-            t->token_held = true;
+            hold_token(t);
             return p;
         }
         struct node * r = read_AE(a, assigned_to, b);
@@ -478,6 +486,11 @@ static struct node * read_AE(struct analyser * a, struct name * assigned_to, int
                     q->number = p->number * r->number;
                     break;
                 case c_divide:
+                    if (r->number == 0) {
+                        fprintf(stderr, "%s:%d: Division by zero\n",
+                                t->file, t->line_number);
+                        exit(1);
+                    }
                     q->number = p->number / r->number;
                     break;
                 default:
@@ -485,10 +498,101 @@ static struct node * read_AE(struct analyser * a, struct name * assigned_to, int
                             name_of_token(token));
                     exit(1);
             }
+            q->fixed_constant = p->fixed_constant && r->fixed_constant;
+            q->line_number = p->line_number;
         } else {
-            q = new_node(a, token);
-            q->left = p;
-            q->right = r;
+            // Check for specific constant or no-op cases.
+            q = NULL;
+            switch (token) {
+                case c_plus:
+                    // 0 + r is r
+                    if (p->type == c_number && p->number == 0) {
+                        q = r;
+                        break;
+                    }
+                    // p + 0 is p
+                    if (r->type == c_number && r->number == 0) {
+                        q = p;
+                        break;
+                    }
+                    break;
+                case c_minus:
+                    // 0 - r is -r
+                    if (p->type == c_number && p->number == 0) {
+                        q = new_node(a, c_neg);
+                        q->right = r;
+                        break;
+                    }
+                    // p - 0 is p
+                    if (r->type == c_number && r->number == 0) {
+                        q = p;
+                        break;
+                    }
+                    break;
+                case c_multiply:
+                    // 0 * r is 0
+                    if (p->type == c_number && p->number == 0) {
+                        q = p;
+                        break;
+                    }
+                    // p * 0 is 0
+                    if (r->type == c_number && r->number == 0) {
+                        q = r;
+                        q->line_number = p->line_number;
+                        break;
+                    }
+                    // -1 * r is -r
+                    if (p->type == c_number && p->number == -1) {
+                        q = new_node(a, c_neg);
+                        q->right = r;
+                        q->line_number = p->line_number;
+                        break;
+                    }
+                    // p * -1 is -p
+                    if (r->type == c_number && r->number == -1) {
+                        q = new_node(a, c_neg);
+                        q->right = p;
+                        q->line_number = p->line_number;
+                        break;
+                    }
+                    // 1 * r is r
+                    if (p->type == c_number && p->number == 1) {
+                        q = r;
+                        q->line_number = p->line_number;
+                        break;
+                    }
+                    // p * 1 is p
+                    if (r->type == c_number && r->number == 1) {
+                        q = p;
+                        break;
+                    }
+                    break;
+                case c_divide:
+                    // p / 1 is p
+                    if (r->type == c_number && r->number == 1) {
+                        q = p;
+                        break;
+                    }
+                    // p / -1 is -p
+                    if (r->type == c_number && r->number == -1) {
+                        q = new_node(a, c_neg);
+                        q->right = p;
+                        q->line_number = p->line_number;
+                        break;
+                    }
+                    // p / 0 is an error!
+                    if (r->type == c_number && r->number == 0) {
+                        fprintf(stderr, "%s:%d: Division by zero\n",
+                                t->file, t->line_number);
+                        exit(1);
+                    }
+                    break;
+            }
+            if (!q) {
+                q = new_node(a, token);
+                q->left = p;
+                q->right = r;
+            }
         }
         p = q;
     }
@@ -503,30 +607,30 @@ static struct node * read_C_connection(struct analyser * a, struct node * q, int
         q = read_C(a);
         p_end->right = q; p_end = q;
     } while (read_token(t) == op);
-    t->token_held = true;
+    hold_token(t);
     return p;
 }
 static struct node * read_C_list(struct analyser * a) {
     struct tokeniser * t = a->tokeniser;
     struct node * p = new_node(a, c_bra);
-    struct node * p_end = 0;
+    struct node * p_end = NULL;
     while (true) {
         int token = read_token(t);
         if (token == c_ket) return p;
         if (token < 0) { omission_error(a, c_ket); return p; }
-        t->token_held = true;
+        hold_token(t);
         {
             struct node * q = read_C(a);
             while (true) {
                 token = read_token(t);
                 if (token != c_and && token != c_or) {
-                    t->token_held = true;
+                    hold_token(t);
                     break;
                 }
                 q = read_C_connection(a, q, token);
             }
-            if (p_end == 0) p->left = q; else p_end->right = q;
+            if (p_end == NULL) p->left = q; else p_end->right = q;
             p_end = q;
         }
     }
@@ -541,7 +645,7 @@ static struct node * C_style(struct analyser * a, const char * s, int token) {
         case 'D':
             p->aux = read_C(a); continue;
         case 'A':
-            p->AE = read_AE(a, 0, 0); continue;
+            p->AE = read_AE(a, NULL, 0); continue;
         case 'f':
             get_token(a, c_for); continue;
         case 'S':
@@ -632,11 +736,10 @@ static int compare_node(const struct node *p, const struct node *q) {
     PTR_NULL_CHECK(p->name, q->name);
     if (p->name) {
         int r;
-        if (SIZE(p->name->b) != SIZE(q->name->b)) {
-            return SIZE(p->name->b) - SIZE(q->name->b);
+        if (SIZE(p->name->s) != SIZE(q->name->s)) {
+            return SIZE(p->name->s) - SIZE(q->name->s);
         }
-        r = memcmp(p->name->b, q->name->b,
-                   SIZE(p->name->b) * sizeof(symbol));
+        r = memcmp(p->name->s, q->name->s, SIZE(p->name->s));
         if (r != 0) return r;
     }
@@ -654,29 +757,34 @@ static int compare_node(const struct node *p, const struct node *q) {
     return compare_node(p->right, q->right);
 }
-static void make_among(struct analyser * a, struct node * p, struct node * substring) {
+static struct node * make_among(struct analyser * a, struct node * p, struct node * substring) {
     NEW(among, x);
     NEWVEC(amongvec, v, p->number);
     struct node * q = p->left;
+    struct node * starter = NULL;
     struct amongvec * w0 = v;
     struct amongvec * w1 = v;
     int result = 1;
-    int direction = substring != 0 ? substring->mode : p->mode;
+    int direction = substring != NULL ? substring->mode : p->mode;
     int backward = direction == m_backward;
-    if (a->amongs == 0) a->amongs = x; else a->amongs_end->next = x;
+    if (a->amongs == NULL) a->amongs = x; else a->amongs_end->next = x;
     a->amongs_end = x;
-    x->next = 0;
+    x->next = NULL;
+    x->node = p;
     x->b = v;
     x->number = a->among_count++;
     x->function_count = 0;
-    x->starter = 0;
     x->nocommand_count = 0;
     x->amongvar_needed = false;
+    x->always_matches = false;
+    x->shortest_size = INT_MAX;
-    if (q->type == c_bra) { x->starter = q; q = q->right; }
+    if (q->type == c_bra) {
+        starter = q;
+        p->left = q = q->right;
+    }
     while (q) {
         if (q->type == c_literalstring) {
@@ -694,10 +802,15 @@ static void make_among(struct analyser * a, struct node * p, struct node * subst
                 check_routine_mode(a, function, direction);
                 x->function_count++;
             } else {
-                w1->function = 0;
+                w1->function = NULL;
+                if (w1->size == 0) {
+                    // This among contains the empty string without a gating
+                    // function so it will always match.
+                    x->always_matches = true;
+                }
             }
             w1++;
-        } else if (q->left == 0) {
+        } else if (q->left == NULL) {
             /* empty command: () */
             w0 = w1;
         } else {
@@ -732,7 +845,8 @@ static void make_among(struct analyser * a, struct node * p, struct node * subst
     x->command_count = result - 1;
     {
         NEWVEC(node*, commands, x->command_count);
-        memset(commands, 0, x->command_count * sizeof(struct node*));
+        for (int i = 0; i != x->command_count; ++i)
+            commands[i] = NULL;
         for (w0 = v; w0 < w1; w0++) {
             if (w0->result > 0) {
                 /* result == -1 when there's no command. */
@@ -757,6 +871,8 @@ static void make_among(struct analyser * a, struct node * p, struct node * subst
         int size = w0->size;
         struct amongvec * w;
+        if (size && size < x->shortest_size) x->shortest_size = size;
         for (w = w0 - 1; w >= v; w--) {
             if (w->size < size && memcmp(w->b, b, w->size * sizeof(symbol)) == 0) {
                 w0->i = w - v;  /* fill in index of longest substring */
@@ -782,16 +898,29 @@ static void make_among(struct analyser * a, struct node * p, struct node * subst
     x->literalstring_count = p->number;
     p->among = x;
-    x->substring = substring;
-    if (substring != 0) substring->among = x;
     if (x->command_count > 1 ||
-        (x->command_count == 1 && x->nocommand_count > 0) ||
-        x->starter != 0) {
+        (x->command_count == 1 && x->nocommand_count > 0)) {
         /* We need to set among_var rather than just checking if find_among*()
          * returns zero or not.
          */
         x->amongvar_needed = a->amongvar_needed = true;
     }
+    if (starter) {
+        starter->right = p;
+        if (substring) {
+            p = starter;
+        } else {
+            substring = new_node(a, c_substring);
+            substring->right = starter;
+            p = substring;
+        }
+    }
+    x->substring = substring;
+    if (substring != NULL) substring->among = x;
+    if (x->function_count > 0) ++a->among_with_function_count;
+    return p;
 }
 static int
@@ -805,11 +934,11 @@ is_just_true(struct node * q)
 static struct node * read_among(struct analyser * a) {
     struct tokeniser * t = a->tokeniser;
     struct node * p = new_node(a, c_among);
-    struct node * p_end = 0;
+    struct node * p_end = NULL;
     int previous_token = -1;
     struct node * substring = a->substring;
-    a->substring = 0;
+    a->substring = NULL;
     p->number = 0; /* counts the number of literals */
     if (!get_token(a, c_bra)) return p;
     while (true) {
@@ -822,8 +951,9 @@ static struct node * read_among(struct analyser * a) {
                     struct node * r = new_node(a, c_name);
                     name_to_node(a, r, 'r');
                     q->left = r;
+                } else {
+                    hold_token(t);
                 }
-                else t->token_held = true;
                 p->number++; break;
             case c_bra:
                 if (previous_token == c_bra) error(a, e_adjacent_bracketed_in_among);
@@ -832,7 +962,7 @@ static struct node * read_among(struct analyser * a) {
                     /* Convert anything equivalent to () to () so we handle it
                      * the same way.
                      */
-                    q->left = 0;
+                    q->left = NULL;
                 }
                 break;
             default:
@@ -841,19 +971,18 @@ static struct node * read_among(struct analyser * a) {
                 continue;
             case c_ket:
                 if (p->number == 0) error(a, e_empty_among);
-                if (t->error_count == 0) make_among(a, p, substring);
+                if (t->error_count == 0) p = make_among(a, p, substring);
                 return p;
         }
         previous_token = token;
-        if (p_end == 0) p->left = q; else p_end->right = q;
+        if (p_end == NULL) p->left = q; else p_end->right = q;
         p_end = q;
     }
 }
 static struct node * read_substring(struct analyser * a) {
     struct node * p = new_node(a, c_substring);
-    if (a->substring != 0) error2(a, e_substring_preceded_by_substring, a->substring->line_number);
+    if (a->substring != NULL) error2(a, e_substring_preceded_by_substring, a->substring->line_number);
     a->substring = p;
     return p;
 }
@@ -863,6 +992,10 @@ static void check_modifyable(struct analyser * a) {
 }
 static int ae_uses_name(struct node * p, struct name * q) {
+    if (!p) {
+        // AE is NULL after a syntax error, e.g. `$x = $y`
+        return 0;
+    }
     switch (p->type) {
         case c_name:
         case c_lenof:
@@ -925,13 +1058,88 @@ static struct node * read_C(struct analyser * a) {
         case c_fail:
         case c_test:
         case c_do:
-        case c_goto:
-        case c_gopast:
         case c_repeat:
             return C_style(a, "C", token);
-        case c_loop:
-        case c_atleast:
-            return C_style(a, "AC", token);
+        case c_goto:
+        case c_gopast: {
+            struct node * subcommand = read_C(a);
+            if (subcommand->type == c_grouping || subcommand->type == c_non) {
+                /* We synthesise special commands for "goto" or "gopast" when
+                 * used on a grouping or an inverted grouping - the movement of
+                 * c by the matching action is exactly what we want!
+                 *
+                 * Adding the tokens happens to give unique values (the code
+                 * would fail to compile if it didn't!)
+                 */
+                switch (token + subcommand->type) {
+                    case c_goto + c_grouping:
+                        subcommand->type = c_goto_grouping;
+                        break;
+                    case c_gopast + c_grouping:
+                        subcommand->type = c_gopast_grouping;
+                        break;
+                    case c_goto + c_non:
+                        subcommand->type = c_goto_non;
+                        break;
+                    case c_gopast + c_non:
+                        subcommand->type = c_gopast_non;
+                        break;
+                    default:
+                        fprintf(stderr, "Unexpected go/grouping combination: %s %s",
+                                name_of_token(token),
+                                name_of_token(subcommand->type));
+                        exit(1);
+                }
+                return subcommand;
+            }
+            struct node * p = new_node(a, token);
+            p->left = subcommand;
+            return p;
+        }
+        case c_loop: {
+            struct node * n = C_style(a, "AC", token);
+            // n->AE is NULL after a syntax error, e.g. `loop next`.
+            if (n->AE && n->AE->type == c_number) {
+                if (n->AE->number <= 0) {
+                    // `loop N C`, where N <= 0 is a no-op.
+                    if (n->AE->fixed_constant) {
+                        fprintf(stderr,
+                                "%s:%d: warning: loop %d C is a no-op\n",
+                                t->file, n->AE->line_number, n->AE->number);
+                    }
+                    n->AE = NULL;
+                    n->left = NULL;
+                    n->type = c_true;
+                } else if (n->AE->number == 1) {
+                    // `loop 1 C` -> `C`.
+                    if (n->AE->fixed_constant) {
+                        fprintf(stderr,
+                                "%s:%d: warning: loop 1 C is just C\n",
+                                t->file, n->AE->line_number);
+                    }
+                    n = n->left;
+                }
+            }
+            return n;
+        }
+        case c_atleast: {
+            struct node * n = C_style(a, "AC", token);
+            // n->AE is NULL after a syntax error, e.g. `loop next`.
+            if (n->AE && n->AE->type == c_number) {
+                if (n->AE->number <= 0) {
+                    // `atleast N C` where N <= 0 -> `repeat C`.
+                    if (n->AE->fixed_constant) {
+                        fprintf(stderr,
+                                "%s:%d: warning: atleast %d C is just repeat C\n",
+                                t->file, n->AE->line_number, n->AE->number);
+                    }
+                    n->AE = NULL;
+                    n->type = c_repeat;
+                }
+            }
+            return n;
+        }
         case c_setmark: {
             struct node * n = C_style(a, "i", token);
             if (n->name) n->name->initialised = true;
@@ -942,24 +1150,28 @@ static struct node * read_C(struct analyser * a) {
             return C_style(a, "A", token);
         case c_hop: {
             struct node * n = C_style(a, "A", token);
-            if (n->AE->type == c_number) {
-                if (n->AE->number < 0) {
+            // n->AE is NULL after a syntax error, e.g. `hop hop`.
+            if (n->AE && n->AE->type == c_number) {
+                if (n->AE->number == 1) {
+                    // Convert `hop 1` to `next`.
+                    n->AE = NULL;
+                    n->type = c_next;
+                } else if (n->AE->number == 0) {
+                    if (n->AE->fixed_constant) {
+                        fprintf(stderr,
+                                "%s:%d: warning: hop 0 is a no-op\n",
+                                t->file, n->AE->line_number);
+                    }
+                    n->AE = NULL;
+                    n->type = c_true;
+                } else if (n->AE->number < 0) {
                     fprintf(stderr,
                             "%s:%d: warning: hop %d now signals f (as was "
                             "always documented) rather than moving the cursor "
                             "in the opposite direction\n",
-                            a->tokeniser->file,
-                            n->AE->line_number,
-                            n->AE->number);
+                            t->file, n->AE->line_number, n->AE->number);
                     n->AE = NULL;
                     n->type = c_false;
-                } else if (n->AE->number == 0) {
-                    fprintf(stderr,
-                            "%s:%d: warning: hop 0 is a no-op\n",
-                            a->tokeniser->file,
-                            n->AE->line_number);
-                    n->AE = NULL;
-                    n->type = c_true;
                 }
             }
             return n;
@@ -978,10 +1190,16 @@ static struct node * read_C(struct analyser * a) {
             return new_node(a, token);
         case c_assignto:
         case c_sliceto: {
-            struct node *n;
             check_modifyable(a);
-            n = C_style(a, "s", token);
+            struct node *n = C_style(a, "s", token);
             if (n->name) n->name->initialised = true;
+            if (token == c_assignto) {
+                fprintf(stderr,
+                        "%s:%d: warning: Use of `=>` is not recommended, "
+                        "see https://snowballstem.org/compiler/snowman.html "
+                        "section 13.3 for details\n",
+                        t->file, n->line_number);
+            }
             return n;
         }
         case c_assign:
@@ -1003,29 +1221,28 @@ static struct node * read_C(struct analyser * a) {
             return n;
         }
         case c_dollar: {
-            struct tokeniser * t = a->tokeniser;
             read_token(t);
             if (t->token == c_bra) {
                 /* Handle newer $(AE REL_OP AE) syntax. */
-                struct node * n = read_AE(a, 0, 0);
+                struct node * n = read_AE(a, NULL, 0);
                 read_token(t);
-                int token = t->token;
+                token = t->token;
                 switch (token) {
                     case c_assign:
                         count_error(a);
                         fprintf(stderr, "%s:%d: Expected relational operator (did you mean '=='?)\n",
-				t->file, t->line_number);
+                                t->file, t->line_number);
                         /* Assume it was == to try to avoid an error avalanche. */
                         token = c_eq;
                         /* FALLTHRU */
                     case c_eq:
                     case c_ne:
-                    case c_gr:
+                    case c_gt:
                     case c_ge:
-                    case c_ls:
+                    case c_lt:
                     case c_le: {
                         struct node * lhs = n;
-                        struct node * rhs = read_AE(a, 0, 0);
+                        struct node * rhs = read_AE(a, NULL, 0);
                         if (lhs->type == c_number && rhs->type == c_number) {
                             // Evaluate constant numeric test expression.
                             int result;
@@ -1036,13 +1253,13 @@ static struct node * read_C(struct analyser * a) {
                                 case c_ne:
                                     result = (lhs->number != rhs->number);
                                     break;
-                                case c_gr:
+                                case c_gt:
                                     result = (lhs->number > rhs->number);
                                     break;
                                 case c_ge:
                                     result = (lhs->number >= rhs->number);
                                     break;
-                                case c_ls:
+                                case c_lt:
                                     result = (lhs->number < rhs->number);
                                     break;
                                 case c_le:
@@ -1064,7 +1281,7 @@ static struct node * read_C(struct analyser * a) {
                     }
                     default:
                         error(a, e_unexpected_token);
-                        t->token_held = true;
+                        hold_token(t);
                         break;
                 }
                 return n;
@@ -1104,9 +1321,9 @@ static struct node * read_C(struct analyser * a) {
                     switch (p->type) {
                         case c_eq:
                         case c_ne:
-                        case c_gr:
+                        case c_gt:
                         case c_ge:
-                        case c_ls:
+                        case c_lt:
                         case c_le:
                             p->left = new_node(a, c_name);
                             p->left->name = q;
@@ -1136,7 +1353,7 @@ static struct node * read_C(struct analyser * a) {
             }
             error(a, e_unexpected_token);
-            t->token_held = true;
+            hold_token(t);
             return new_node(a, c_dollar);
         }
         case c_name:
@@ -1181,7 +1398,7 @@ static struct node * read_C(struct analyser * a) {
             return read_literalstring(a);
         case c_among: return read_among(a);
         case c_substring: return read_substring(a);
-        default: error(a, e_unexpected_token); return 0;
+        default: error(a, e_unexpected_token); return NULL;
     }
 }
@@ -1189,28 +1406,30 @@ static int next_symbol(symbol * p, symbol * W, int utf8) {
     if (utf8) {
         int ch;
         int j = get_utf8(p, & ch);
-        W[0] = ch; return j;
+        *W = ch;
+        return j;
     } else {
-        W[0] = p[0]; return 1;
+        *W = *p;
+        return 1;
     }
 }
 static symbol * alter_grouping(symbol * p, symbol * q, int style, int utf8) {
     int j = 0;
-    symbol W[1];
+    symbol W;
     int width;
     if (style == c_plus) {
         while (j < SIZE(q)) {
-            width = next_symbol(q + j, W, utf8);
-            p = add_to_b(p, 1, W);
+            width = next_symbol(q + j, &W, utf8);
+            p = add_symbol_to_b(p, W);
             j += width;
         }
     } else {
         while (j < SIZE(q)) {
             int i;
-            width = next_symbol(q + j, W, utf8);
+            width = next_symbol(q + j, &W, utf8);
             for (i = 0; i < SIZE(p); i++) {
-                if (p[i] == W[0]) {
+                if (p[i] == W) {
                     memmove(p + i, p + i + 1, (SIZE(p) - i - 1) * sizeof(symbol));
                     SIZE(p)--;
                 }
@@ -1226,25 +1445,42 @@ static void read_define_grouping(struct analyser * a, struct name * q) {
     int style = c_plus;
     {
         NEW(grouping, p);
-        if (a->groupings == 0) a->groupings = p; else a->groupings_end->next = p;
+        if (a->groupings == NULL) a->groupings = p; else a->groupings_end->next = p;
         a->groupings_end = p;
-        if (q) q->grouping = p;
-        p->next = 0;
+        if (q) {
+            if (q->grouping != NULL) {
+                error(a, e_redefined);
+                FREE(q->grouping);
+            }
+            q->grouping = p;
+        }
+        p->next = NULL;
         p->name = q;
-        p->line_number = a->tokeniser->line_number;
+        p->line_number = t->line_number;
         p->b = create_b(0);
         while (true) {
             switch (read_token(t)) {
-                case c_name:
-                    {
-                        struct name * r = find_name(a);
-                        if (r) {
-                            check_name_type(a, r, 'g');
-                            p->b = alter_grouping(p->b, r->grouping->b, style, false);
-                            r->used_in_definition = true;
-                        }
+                case c_name: {
+                    struct name * r = find_name(a);
+                    if (!r) break;
+                    check_name_type(a, r, 'g');
+                    if (r == q) {
+                        count_error(a);
+                        r->s[SIZE(r->s)] = 0;
+                        fprintf(stderr, "%s:%d: %s defined in terms of itself\n",
+                                t->file, t->line_number, r->s);
+                    } else if (!r->grouping) {
+                        count_error(a);
+                        r->s[SIZE(r->s)] = 0;
+                        fprintf(stderr, "%s:%d: %s undefined\n",
+                                t->file, t->line_number, r->s);
+                    } else {
+                        p->b = alter_grouping(p->b, r->grouping->b, style, false);
                     }
+                    r->used_in_definition = true;
                     break;
+                }
                 case c_literalstring:
                     p->b = alter_grouping(p->b, t->b, style, (a->encoding == ENC_UTF8));
                     break;
@@ -1269,7 +1505,7 @@ static void read_define_grouping(struct analyser * a, struct name * q) {
             p->smallest_ch = min;
             if (min == 1<<16) error(a, e_empty_grouping);
         }
-        t->token_held = true; return;
+        hold_token(t);
     }
 }
@@ -1278,20 +1514,27 @@ static void read_define_routine(struct analyser * a, struct name * q) {
     a->amongvar_needed = false;
     if (q) {
         check_name_type(a, q, 'R');
-        if (q->definition != 0) error(a, e_redefined);
-        if (q->mode < 0) q->mode = a->mode; else
+        if (q->definition != NULL) error(a, e_redefined);
+        if (q->mode == m_unknown) q->mode = a->mode; else
         if (q->mode != a->mode) error2(a, e_declared_as_different_mode, q->mode);
     }
     p->name = q;
-    if (a->program == 0) a->program = p; else a->program_end->right = p;
+    if (a->program == NULL) a->program = p; else a->program_end->right = p;
     a->program_end = p;
     get_token(a, c_as);
     p->left = read_C(a);
     if (q) q->definition = p->left;
-    if (a->substring != 0) {
+    /* We should get a node with a NULL right pointer from read_C() for the
+     * routine's code.  We synthesise a "functionend" node there so
+     * optimisations such as dead code elimination and tail call optimisation
+     * can easily see where the function ends.
+     */
+    assert(p->left->right == NULL);
+    p->left->right = new_node(a, c_functionend);
+    if (a->substring != NULL) {
         error2(a, e_unresolved_substring, a->substring->line_number);
-        a->substring = 0;
+        a->substring = NULL;
     }
     p->amongvar_needed = a->amongvar_needed;
 }
@@ -1303,15 +1546,19 @@ static void read_define(struct analyser * a) {
         if (q) {
             type = q->type;
         } else {
-            /* No declaration, so sniff next token - if it is 'as' then parse
-             * as a routine, otherwise as a grouping.
+            /* No declaration so sniff next token - if it is a string or name
+             * we parse as a grouping, otherwise we parse as a routine.  This
+             * avoids an avalanche of further errors if `as` is missing from a
+             * routine definition.
              */
-            if (read_token(a->tokeniser) == c_as) {
-                type = t_routine;
-            } else {
-                type = t_grouping;
+            switch (peek_token(a->tokeniser)) {
+                case c_literalstring:
+                case c_name:
+                    type = t_grouping;
+                    break;
+                default:
+                    type = t_routine;
             }
-            a->tokeniser->token_held = true;
         }
         if (type == t_grouping) {
@@ -1396,10 +1643,10 @@ extern void read_program(struct analyser * a) {
         while (q) {
             switch (q->type) {
                 case t_external: case t_routine:
-                    if (q->used && q->definition == 0) error4(a, q);
+                    if (q->used && q->definition == NULL) error4(a, q);
                     break;
                 case t_grouping:
-                    if (q->used && q->grouping == 0) error4(a, q);
+                    if (q->used && q->grouping == NULL) error4(a, q);
                     break;
             }
             q = q->next;
@@ -1411,21 +1658,22 @@ extern void read_program(struct analyser * a) {
         struct name ** ptr = &(a->names);
         while (q) {
             if (!q->referenced) {
-                fprintf(stderr, "%s:%d: warning: %s '",
+                q->s[SIZE(q->s)] = 0;
+                fprintf(stderr, "%s:%d: warning: %s '%s' ",
                         a->tokeniser->file,
                         q->declaration_line_number,
-                        name_of_name_type(q->type));
-                report_b(stderr, q->b);
+                        name_of_name_type(q->type),
+                        q->s);
                 if (q->type == t_routine ||
                     q->type == t_external ||
                     q->type == t_grouping) {
-                    fprintf(stderr, "' declared but not defined\n");
+                    fprintf(stderr, "declared but not defined\n");
                 } else {
-                    fprintf(stderr, "' defined but not used\n");
-                    q = q->next;
-                    *ptr = q;
-                    continue;
+                    fprintf(stderr, "defined but not used\n");
                 }
+                q = q->next;
+                *ptr = q;
+                continue;
             } else if (q->type == t_routine || q->type == t_grouping) {
                 /* It's OK to define a grouping but only use it to define other
                  * groupings.
@@ -1437,29 +1685,32 @@ extern void read_program(struct analyser * a) {
                     } else {
                         line_num = q->grouping->line_number;
                     }
-                    fprintf(stderr, "%s:%d: warning: %s '",
+                    q->s[SIZE(q->s)] = 0;
+                    fprintf(stderr, "%s:%d: warning: %s '%s' defined but not used\n",
                             a->tokeniser->file,
                             line_num,
-                            name_of_name_type(q->type));
-                    report_b(stderr, q->b);
-                    fprintf(stderr, "' defined but not used\n");
+                            name_of_name_type(q->type),
+                            q->s);
+                    q = q->next;
+                    *ptr = q;
+                    continue;
                 }
             } else if (q->type == t_external) {
                 /* Unused is OK. */
             } else if (!q->initialised) {
-                fprintf(stderr, "%s:%d: warning: %s '",
+                q->s[SIZE(q->s)] = 0;
+                fprintf(stderr, "%s:%d: warning: %s '%s' is never initialised\n",
                         a->tokeniser->file,
                         q->declaration_line_number,
-                        name_of_name_type(q->type));
-                report_b(stderr, q->b);
-                fprintf(stderr, "' is never initialised\n");
+                        name_of_name_type(q->type),
+                        q->s);
             } else if (!q->value_used) {
-                fprintf(stderr, "%s:%d: warning: %s '",
+                q->s[SIZE(q->s)] = 0;
+                fprintf(stderr, "%s:%d: warning: %s '%s' is set but never used\n",
                         a->tokeniser->file,
                         q->declaration_line_number,
-                        name_of_name_type(q->type));
-                report_b(stderr, q->b);
-                fprintf(stderr, "' is set but never used\n");
+                        name_of_name_type(q->type),
+                        q->s);
                 remove_dead_assignments(a->program, q);
                 q = q->next;
                 *ptr = q;
@@ -1485,17 +1736,18 @@ extern void read_program(struct analyser * a) {
 extern struct analyser * create_analyser(struct tokeniser * t) {
     NEW(analyser, a);
     a->tokeniser = t;
-    a->nodes = 0;
-    a->names = 0;
-    a->literalstrings = 0;
-    a->program = 0;
-    a->amongs = 0;
+    a->nodes = NULL;
+    a->names = NULL;
+    a->literalstrings = NULL;
+    a->program = NULL;
+    a->amongs = NULL;
     a->among_count = 0;
-    a->groupings = 0;
+    a->among_with_function_count = 0;
+    a->groupings = NULL;
     a->mode = m_forward;
     a->modifyable = true;
     { int i; for (i = 0; i < t_size; i++) a->name_count[i] = 0; }
-    a->substring = 0;
+    a->substring = NULL;
     a->int_limits_used = false;
     return a;
 }
@@ -1513,7 +1765,8 @@ extern void close_analyser(struct analyser * a) {
         struct name * q = a->names;
         while (q) {
             struct name * q_next = q->next;
-            lose_b(q->b); FREE(q);
+            lose_s(q->s);
+            FREE(q);
             q = q_next;
         }
     }