mittens 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (137) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +3 -0
  3. data/Gemfile +7 -0
  4. data/LICENSE.txt +30 -0
  5. data/README.md +62 -0
  6. data/Rakefile +21 -0
  7. data/ext/mittens/ext.c +96 -0
  8. data/ext/mittens/extconf.rb +12 -0
  9. data/lib/mittens/version.rb +3 -0
  10. data/lib/mittens.rb +7 -0
  11. data/mittens.gemspec +22 -0
  12. data/vendor/snowball/.gitignore +26 -0
  13. data/vendor/snowball/.travis.yml +112 -0
  14. data/vendor/snowball/AUTHORS +27 -0
  15. data/vendor/snowball/CONTRIBUTING.rst +216 -0
  16. data/vendor/snowball/COPYING +29 -0
  17. data/vendor/snowball/GNUmakefile +742 -0
  18. data/vendor/snowball/NEWS +754 -0
  19. data/vendor/snowball/README.rst +37 -0
  20. data/vendor/snowball/ada/README.md +74 -0
  21. data/vendor/snowball/ada/generate/generate.adb +83 -0
  22. data/vendor/snowball/ada/generate.gpr +21 -0
  23. data/vendor/snowball/ada/src/stemmer.adb +620 -0
  24. data/vendor/snowball/ada/src/stemmer.ads +219 -0
  25. data/vendor/snowball/ada/src/stemwords.adb +70 -0
  26. data/vendor/snowball/ada/stemmer_config.gpr +83 -0
  27. data/vendor/snowball/ada/stemwords.gpr +21 -0
  28. data/vendor/snowball/algorithms/arabic.sbl +558 -0
  29. data/vendor/snowball/algorithms/armenian.sbl +301 -0
  30. data/vendor/snowball/algorithms/basque.sbl +149 -0
  31. data/vendor/snowball/algorithms/catalan.sbl +202 -0
  32. data/vendor/snowball/algorithms/danish.sbl +93 -0
  33. data/vendor/snowball/algorithms/dutch.sbl +164 -0
  34. data/vendor/snowball/algorithms/english.sbl +229 -0
  35. data/vendor/snowball/algorithms/finnish.sbl +197 -0
  36. data/vendor/snowball/algorithms/french.sbl +254 -0
  37. data/vendor/snowball/algorithms/german.sbl +139 -0
  38. data/vendor/snowball/algorithms/german2.sbl +145 -0
  39. data/vendor/snowball/algorithms/greek.sbl +701 -0
  40. data/vendor/snowball/algorithms/hindi.sbl +323 -0
  41. data/vendor/snowball/algorithms/hungarian.sbl +241 -0
  42. data/vendor/snowball/algorithms/indonesian.sbl +192 -0
  43. data/vendor/snowball/algorithms/irish.sbl +149 -0
  44. data/vendor/snowball/algorithms/italian.sbl +202 -0
  45. data/vendor/snowball/algorithms/kraaij_pohlmann.sbl +240 -0
  46. data/vendor/snowball/algorithms/lithuanian.sbl +373 -0
  47. data/vendor/snowball/algorithms/lovins.sbl +208 -0
  48. data/vendor/snowball/algorithms/nepali.sbl +92 -0
  49. data/vendor/snowball/algorithms/norwegian.sbl +80 -0
  50. data/vendor/snowball/algorithms/porter.sbl +139 -0
  51. data/vendor/snowball/algorithms/portuguese.sbl +218 -0
  52. data/vendor/snowball/algorithms/romanian.sbl +236 -0
  53. data/vendor/snowball/algorithms/russian.sbl +221 -0
  54. data/vendor/snowball/algorithms/serbian.sbl +2379 -0
  55. data/vendor/snowball/algorithms/spanish.sbl +230 -0
  56. data/vendor/snowball/algorithms/swedish.sbl +72 -0
  57. data/vendor/snowball/algorithms/tamil.sbl +405 -0
  58. data/vendor/snowball/algorithms/turkish.sbl +470 -0
  59. data/vendor/snowball/algorithms/yiddish.sbl +460 -0
  60. data/vendor/snowball/charsets/ISO-8859-2.sbl +98 -0
  61. data/vendor/snowball/charsets/KOI8-R.sbl +74 -0
  62. data/vendor/snowball/charsets/cp850.sbl +130 -0
  63. data/vendor/snowball/compiler/analyser.c +1547 -0
  64. data/vendor/snowball/compiler/driver.c +615 -0
  65. data/vendor/snowball/compiler/generator.c +1748 -0
  66. data/vendor/snowball/compiler/generator_ada.c +1702 -0
  67. data/vendor/snowball/compiler/generator_csharp.c +1322 -0
  68. data/vendor/snowball/compiler/generator_go.c +1278 -0
  69. data/vendor/snowball/compiler/generator_java.c +1313 -0
  70. data/vendor/snowball/compiler/generator_js.c +1316 -0
  71. data/vendor/snowball/compiler/generator_pascal.c +1387 -0
  72. data/vendor/snowball/compiler/generator_python.c +1337 -0
  73. data/vendor/snowball/compiler/generator_rust.c +1295 -0
  74. data/vendor/snowball/compiler/header.h +418 -0
  75. data/vendor/snowball/compiler/space.c +286 -0
  76. data/vendor/snowball/compiler/syswords.h +86 -0
  77. data/vendor/snowball/compiler/syswords2.h +13 -0
  78. data/vendor/snowball/compiler/tokeniser.c +567 -0
  79. data/vendor/snowball/csharp/.gitignore +8 -0
  80. data/vendor/snowball/csharp/Snowball/Algorithms/.gitignore +1 -0
  81. data/vendor/snowball/csharp/Snowball/Among.cs +108 -0
  82. data/vendor/snowball/csharp/Snowball/AssemblyInfo.cs +36 -0
  83. data/vendor/snowball/csharp/Snowball/Stemmer.cs +660 -0
  84. data/vendor/snowball/csharp/Stemwords/App.config +6 -0
  85. data/vendor/snowball/csharp/Stemwords/Program.cs +114 -0
  86. data/vendor/snowball/doc/TODO +12 -0
  87. data/vendor/snowball/doc/libstemmer_c_README +148 -0
  88. data/vendor/snowball/doc/libstemmer_csharp_README +53 -0
  89. data/vendor/snowball/doc/libstemmer_java_README +67 -0
  90. data/vendor/snowball/doc/libstemmer_js_README +48 -0
  91. data/vendor/snowball/doc/libstemmer_python_README +113 -0
  92. data/vendor/snowball/examples/stemwords.c +204 -0
  93. data/vendor/snowball/go/README.md +55 -0
  94. data/vendor/snowball/go/among.go +16 -0
  95. data/vendor/snowball/go/env.go +403 -0
  96. data/vendor/snowball/go/stemwords/generate.go +68 -0
  97. data/vendor/snowball/go/stemwords/main.go +68 -0
  98. data/vendor/snowball/go/util.go +34 -0
  99. data/vendor/snowball/iconv.py +50 -0
  100. data/vendor/snowball/include/libstemmer.h +78 -0
  101. data/vendor/snowball/java/org/tartarus/snowball/Among.java +29 -0
  102. data/vendor/snowball/java/org/tartarus/snowball/SnowballProgram.java +381 -0
  103. data/vendor/snowball/java/org/tartarus/snowball/SnowballStemmer.java +8 -0
  104. data/vendor/snowball/java/org/tartarus/snowball/TestApp.java +75 -0
  105. data/vendor/snowball/javascript/base-stemmer.js +294 -0
  106. data/vendor/snowball/javascript/stemwords.js +106 -0
  107. data/vendor/snowball/libstemmer/libstemmer_c.in +96 -0
  108. data/vendor/snowball/libstemmer/mkalgorithms.pl +90 -0
  109. data/vendor/snowball/libstemmer/mkmodules.pl +267 -0
  110. data/vendor/snowball/libstemmer/modules.txt +63 -0
  111. data/vendor/snowball/libstemmer/test.c +34 -0
  112. data/vendor/snowball/pascal/.gitignore +4 -0
  113. data/vendor/snowball/pascal/SnowballProgram.pas +430 -0
  114. data/vendor/snowball/pascal/generate.pl +23 -0
  115. data/vendor/snowball/pascal/stemwords-template.dpr +78 -0
  116. data/vendor/snowball/python/MANIFEST.in +7 -0
  117. data/vendor/snowball/python/create_init.py +54 -0
  118. data/vendor/snowball/python/setup.cfg +6 -0
  119. data/vendor/snowball/python/setup.py +81 -0
  120. data/vendor/snowball/python/snowballstemmer/among.py +13 -0
  121. data/vendor/snowball/python/snowballstemmer/basestemmer.py +323 -0
  122. data/vendor/snowball/python/stemwords.py +101 -0
  123. data/vendor/snowball/python/testapp.py +28 -0
  124. data/vendor/snowball/runtime/api.c +58 -0
  125. data/vendor/snowball/runtime/api.h +32 -0
  126. data/vendor/snowball/runtime/header.h +61 -0
  127. data/vendor/snowball/runtime/utilities.c +513 -0
  128. data/vendor/snowball/rust/Cargo.toml +7 -0
  129. data/vendor/snowball/rust/build.rs +55 -0
  130. data/vendor/snowball/rust/rust-pre-1.27-compat.patch +30 -0
  131. data/vendor/snowball/rust/src/main.rs +102 -0
  132. data/vendor/snowball/rust/src/snowball/algorithms/mod.rs +2 -0
  133. data/vendor/snowball/rust/src/snowball/among.rs +6 -0
  134. data/vendor/snowball/rust/src/snowball/mod.rs +6 -0
  135. data/vendor/snowball/rust/src/snowball/snowball_env.rs +421 -0
  136. data/vendor/snowball/tests/stemtest.c +95 -0
  137. metadata +178 -0
@@ -0,0 +1,204 @@
1
+ /* This is a simple program which uses libstemmer to provide a command
2
+ * line interface for stemming using any of the algorithms provided.
3
+ */
4
+
5
+ #include <stdio.h>
6
+ #include <stdlib.h> /* for malloc, free */
7
+ #include <string.h> /* for memmove */
8
+ #include <ctype.h> /* for isupper, tolower */
9
+
10
+ #include "libstemmer.h"
11
+
12
+ const char * progname;
13
+ static int pretty = 1;
14
+
15
+ static void
16
+ stem_file(struct sb_stemmer * stemmer, FILE * f_in, FILE * f_out)
17
+ {
18
+ #define INC 10
19
+ int lim = INC;
20
+ sb_symbol * b = (sb_symbol *) malloc(lim * sizeof(sb_symbol));
21
+
22
+ while (1) {
23
+ int ch = getc(f_in);
24
+ if (ch == EOF) {
25
+ free(b); return;
26
+ }
27
+ {
28
+ int i = 0;
29
+ int inlen = 0;
30
+ while (ch != '\n' && ch != EOF) {
31
+ if (i == lim) {
32
+ sb_symbol * newb;
33
+ newb = (sb_symbol *)
34
+ realloc(b, (lim + INC) * sizeof(sb_symbol));
35
+ if (newb == 0) goto error;
36
+ b = newb;
37
+ lim = lim + INC;
38
+ }
39
+ /* Update count of utf-8 characters. */
40
+ if (ch < 0x80 || ch > 0xBF) inlen += 1;
41
+ /* force lower case: */
42
+ ch = tolower(ch);
43
+
44
+ b[i] = ch;
45
+ i++;
46
+ ch = getc(f_in);
47
+ }
48
+
49
+ {
50
+ const sb_symbol * stemmed = sb_stemmer_stem(stemmer, b, i);
51
+ if (stemmed == NULL)
52
+ {
53
+ fprintf(stderr, "Out of memory");
54
+ exit(1);
55
+ }
56
+
57
+ if (pretty == 1) {
58
+ fwrite(b, i, 1, f_out);
59
+ fputs(" -> ", f_out);
60
+ } else if (pretty == 2) {
61
+ fwrite(b, i, 1, f_out);
62
+ if (sb_stemmer_length(stemmer) > 0) {
63
+ int j;
64
+ if (inlen < 30) {
65
+ for (j = 30 - inlen; j > 0; j--)
66
+ fputs(" ", f_out);
67
+ } else {
68
+ fputs("\n", f_out);
69
+ for (j = 30; j > 0; j--)
70
+ fputs(" ", f_out);
71
+ }
72
+ }
73
+ }
74
+
75
+ fputs((const char *)stemmed, f_out);
76
+ putc('\n', f_out);
77
+ }
78
+ }
79
+ }
80
+ error:
81
+ if (b != 0) free(b);
82
+ return;
83
+ }
84
+
85
+ /** Display the command line syntax, and then exit.
86
+ * @param n The value to exit with.
87
+ */
88
+ static void
89
+ usage(int n)
90
+ {
91
+ printf("usage: %s [-l <language>] [-i <input file>] [-o <output file>] [-c <character encoding>] [-p[2]] [-h]\n"
92
+ "\n"
93
+ "The input file consists of a list of words to be stemmed, one per\n"
94
+ "line. Words should be in lower case, but (for English) A-Z letters\n"
95
+ "are mapped to their a-z equivalents anyway. If omitted, stdin is\n"
96
+ "used.\n"
97
+ "\n"
98
+ "If -c is given, the argument is the character encoding of the input\n"
99
+ "and output files. If it is omitted, the UTF-8 encoding is used.\n"
100
+ "\n"
101
+ "If -p is given the output file consists of each word of the input\n"
102
+ "file followed by \"->\" followed by its stemmed equivalent.\n"
103
+ "If -p2 is given the output file is a two column layout containing\n"
104
+ "the input words in the first column and the stemmed equivalents in\n"
105
+ "the second column.\n"
106
+ "Otherwise, the output file consists of the stemmed words, one per\n"
107
+ "line.\n"
108
+ "\n"
109
+ "-h displays this help\n",
110
+ progname);
111
+ exit(n);
112
+ }
113
+
114
+ int
115
+ main(int argc, char * argv[])
116
+ {
117
+ const char * in = 0;
118
+ const char * out = 0;
119
+ FILE * f_in;
120
+ FILE * f_out;
121
+ struct sb_stemmer * stemmer;
122
+
123
+ const char * language = "english";
124
+ const char * charenc = NULL;
125
+
126
+ int i = 1;
127
+ pretty = 0;
128
+
129
+ progname = argv[0];
130
+
131
+ while (i < argc) {
132
+ const char * s = argv[i++];
133
+ if (s[0] == '-') {
134
+ if (strcmp(s, "-o") == 0) {
135
+ if (i >= argc) {
136
+ fprintf(stderr, "%s requires an argument\n", s);
137
+ exit(1);
138
+ }
139
+ out = argv[i++];
140
+ } else if (strcmp(s, "-i") == 0) {
141
+ if (i >= argc) {
142
+ fprintf(stderr, "%s requires an argument\n", s);
143
+ exit(1);
144
+ }
145
+ in = argv[i++];
146
+ } else if (strcmp(s, "-l") == 0) {
147
+ if (i >= argc) {
148
+ fprintf(stderr, "%s requires an argument\n", s);
149
+ exit(1);
150
+ }
151
+ language = argv[i++];
152
+ } else if (strcmp(s, "-c") == 0) {
153
+ if (i >= argc) {
154
+ fprintf(stderr, "%s requires an argument\n", s);
155
+ exit(1);
156
+ }
157
+ charenc = argv[i++];
158
+ } else if (strcmp(s, "-p2") == 0) {
159
+ pretty = 2;
160
+ } else if (strcmp(s, "-p") == 0) {
161
+ pretty = 1;
162
+ } else if (strcmp(s, "-h") == 0) {
163
+ usage(0);
164
+ } else {
165
+ fprintf(stderr, "option %s unknown\n", s);
166
+ usage(1);
167
+ }
168
+ } else {
169
+ fprintf(stderr, "unexpected parameter %s\n", s);
170
+ usage(1);
171
+ }
172
+ }
173
+
174
+ /* prepare the files */
175
+ f_in = (in == 0) ? stdin : fopen(in, "r");
176
+ if (f_in == 0) {
177
+ fprintf(stderr, "file %s not found\n", in);
178
+ exit(1);
179
+ }
180
+ f_out = (out == 0) ? stdout : fopen(out, "w");
181
+ if (f_out == 0) {
182
+ fprintf(stderr, "file %s cannot be opened\n", out);
183
+ exit(1);
184
+ }
185
+
186
+ /* do the stemming process: */
187
+ stemmer = sb_stemmer_new(language, charenc);
188
+ if (stemmer == 0) {
189
+ if (charenc == NULL) {
190
+ fprintf(stderr, "language `%s' not available for stemming\n", language);
191
+ exit(1);
192
+ } else {
193
+ fprintf(stderr, "language `%s' not available for stemming in encoding `%s'\n", language, charenc);
194
+ exit(1);
195
+ }
196
+ }
197
+ stem_file(stemmer, f_in, f_out);
198
+ sb_stemmer_delete(stemmer);
199
+
200
+ if (in != 0) (void) fclose(f_in);
201
+ if (out != 0) (void) fclose(f_out);
202
+
203
+ return 0;
204
+ }
@@ -0,0 +1,55 @@
1
+ # Go Target for Snowball
2
+
3
+ The initial implementation was built as a port of the Rust target. The initial focus has been on getting it to function, and making it work correctly. No attempt has been made to beautify the implementation, generated code, or address performance issues.
4
+
5
+ ## Usage
6
+
7
+ To generate Go source for a Snowball algorithm:
8
+ ```
9
+ $ snowball path/to/algorithm.sbl -go -o algorithm
10
+ ```
11
+
12
+ ### Go specific options
13
+
14
+ `-gop[ackage]` the package name used in the generated go file (defaults to `snowball`)
15
+
16
+ `-gor[untime]` the import path used for the Go Snowball runtime (defaults to `github.com/snowballstem/snowball/go`)
17
+
18
+ ## Code Organization
19
+
20
+ `compiler/generator_go.c` has the Go code generation logic
21
+
22
+ `go/` contains the default Go Snowball runtime support
23
+
24
+ `go/stemwords` contains the source for a Go version of the stemwords utility
25
+
26
+ `go/algorithms` location where the makefile generated code will end up
27
+
28
+ ## Using the Generated Stemmers
29
+
30
+ Assuming you generated a stemmer, put that code in a package which is imported by this code as `english`.
31
+
32
+ ```
33
+ env := snowball.NewEnv("beautiful")
34
+ english.Stem(env)
35
+ fmt.Printf("stemmed word is: %s", env.Current())
36
+ ```
37
+
38
+ NOTE: you can use the env.SetCurrent("new_word") to reuse the env on subsequent calls to the stemmer.
39
+
40
+ ## Testing
41
+
42
+ Only the existing Snowball algorithms have been used for testing. This does not exercise all features of the language.
43
+
44
+ Run:
45
+
46
+ ```
47
+ $ make check_go
48
+ ```
49
+
50
+ An initial pass of fuzz-testing has been performed on the generated stemmers for the algorithms in this repo. Each ran for 5 minutes and used an initial corpus seeded with 10k words from the algorithm's snowballstem-data voc.txt file.
51
+
52
+ ## Known Limitations
53
+
54
+ - Code going through generate_dollar production has not been tested
55
+ - Code going through generate_debug production has not been tested
@@ -0,0 +1,16 @@
1
+ package snowball
2
+
3
+ import "fmt"
4
+
5
+ type AmongF func(env *Env, ctx interface{}) bool
6
+
7
+ type Among struct {
8
+ Str string
9
+ A int32
10
+ B int32
11
+ F AmongF
12
+ }
13
+
14
+ func (a *Among) String() string {
15
+ return fmt.Sprintf("str: `%s`, a: %d, b: %d, f: %p", a.Str, a.A, a.B, a.F)
16
+ }
@@ -0,0 +1,403 @@
1
+ package snowball
2
+
3
+ import (
4
+ "log"
5
+ "strings"
6
+ "unicode/utf8"
7
+ )
8
+
9
+ // Env represents the Snowball execution environment
10
+ type Env struct {
11
+ current string
12
+ Cursor int
13
+ Limit int
14
+ LimitBackward int
15
+ Bra int
16
+ Ket int
17
+ }
18
+
19
+ // NewEnv creates a new Snowball execution environment on the provided string
20
+ func NewEnv(val string) *Env {
21
+ return &Env{
22
+ current: val,
23
+ Cursor: 0,
24
+ Limit: len(val),
25
+ LimitBackward: 0,
26
+ Bra: 0,
27
+ Ket: len(val),
28
+ }
29
+ }
30
+
31
+ func (env *Env) Current() string {
32
+ return env.current
33
+ }
34
+
35
+ func (env *Env) SetCurrent(s string) {
36
+ env.current = s
37
+ env.Cursor = 0
38
+ env.Limit = len(s)
39
+ env.LimitBackward = 0
40
+ env.Bra = 0
41
+ env.Ket = len(s)
42
+ }
43
+
44
+ func (env *Env) ReplaceS(bra, ket int, s string) int32 {
45
+ adjustment := int32(len(s)) - (int32(ket) - int32(bra))
46
+ result, _ := splitAt(env.current, bra)
47
+ rsplit := ket
48
+ if ket < bra {
49
+ rsplit = bra
50
+ }
51
+ _, rhs := splitAt(env.current, rsplit)
52
+ result += s
53
+ result += rhs
54
+
55
+ newLim := int32(env.Limit) + adjustment
56
+ env.Limit = int(newLim)
57
+
58
+ if env.Cursor >= ket {
59
+ newCur := int32(env.Cursor) + adjustment
60
+ env.Cursor = int(newCur)
61
+ } else if env.Cursor > bra {
62
+ env.Cursor = bra
63
+ }
64
+
65
+ env.current = result
66
+ return adjustment
67
+ }
68
+
69
+ func (env *Env) EqS(s string) bool {
70
+ if env.Cursor >= env.Limit {
71
+ return false
72
+ }
73
+
74
+ if strings.HasPrefix(env.current[env.Cursor:], s) {
75
+ env.Cursor += len(s)
76
+ for !onCharBoundary(env.current, env.Cursor) {
77
+ env.Cursor++
78
+ }
79
+ return true
80
+ }
81
+ return false
82
+ }
83
+
84
+ func (env *Env) EqSB(s string) bool {
85
+ if int32(env.Cursor)-int32(env.LimitBackward) < int32(len(s)) {
86
+ return false
87
+ } else if !onCharBoundary(env.current, env.Cursor-len(s)) ||
88
+ !strings.HasPrefix(env.current[env.Cursor-len(s):], s) {
89
+ return false
90
+ } else {
91
+ env.Cursor -= len(s)
92
+ return true
93
+ }
94
+ }
95
+
96
+ func (env *Env) SliceFrom(s string) bool {
97
+ bra, ket := env.Bra, env.Ket
98
+ env.ReplaceS(bra, ket, s)
99
+ return true
100
+ }
101
+
102
+ func (env *Env) NextChar() {
103
+ env.Cursor++
104
+ for !onCharBoundary(env.current, env.Cursor) {
105
+ env.Cursor++
106
+ }
107
+ }
108
+
109
+ func (env *Env) PrevChar() {
110
+ env.Cursor--
111
+ for !onCharBoundary(env.current, env.Cursor) {
112
+ env.Cursor--
113
+ }
114
+ }
115
+
116
+ func (env *Env) Hop(delta int32) bool {
117
+ res := env.Cursor
118
+ for delta > 0 {
119
+ delta--
120
+ if res >= env.Limit {
121
+ return false
122
+ }
123
+ res++
124
+ for res < env.Limit && !onCharBoundary(env.current, res) {
125
+ res++
126
+ }
127
+ }
128
+ env.Cursor = res
129
+ return true
130
+ }
131
+
132
+ func (env *Env) HopChecked(delta int32) bool {
133
+ return delta >= 0 && env.Hop(delta)
134
+ }
135
+
136
+ func (env *Env) HopBack(delta int32) bool {
137
+ res := env.Cursor
138
+ for delta > 0 {
139
+ delta--
140
+ if res <= env.LimitBackward {
141
+ return false
142
+ }
143
+ res--
144
+ for res > env.LimitBackward && !onCharBoundary(env.current, res) {
145
+ res--
146
+ }
147
+ }
148
+ env.Cursor = res
149
+ return true
150
+ }
151
+
152
+ func (env *Env) HopBackChecked(delta int32) bool {
153
+ return delta >= 0 && env.HopBack(delta)
154
+ }
155
+
156
+ func (env *Env) InGrouping(chars []byte, min, max int32) bool {
157
+ if env.Cursor >= env.Limit {
158
+ return false
159
+ }
160
+
161
+ r, _ := utf8.DecodeRuneInString(env.current[env.Cursor:])
162
+ if r != utf8.RuneError {
163
+ if r > max || r < min {
164
+ return false
165
+ }
166
+ r -= min
167
+ if (chars[uint(r>>3)] & (0x1 << uint(r&0x7))) == 0 {
168
+ return false
169
+ }
170
+ env.NextChar()
171
+ return true
172
+ }
173
+ return false
174
+ }
175
+
176
+ func (env *Env) InGroupingB(chars []byte, min, max int32) bool {
177
+ if env.Cursor <= env.LimitBackward {
178
+ return false
179
+ }
180
+ env.PrevChar()
181
+ r, _ := utf8.DecodeRuneInString(env.current[env.Cursor:])
182
+ if r != utf8.RuneError {
183
+ env.NextChar()
184
+ if r > max || r < min {
185
+ return false
186
+ }
187
+ r -= min
188
+ if (chars[uint(r>>3)] & (0x1 << uint(r&0x7))) == 0 {
189
+ return false
190
+ }
191
+ env.PrevChar()
192
+ return true
193
+ }
194
+ return false
195
+ }
196
+
197
+ func (env *Env) OutGrouping(chars []byte, min, max int32) bool {
198
+ if env.Cursor >= env.Limit {
199
+ return false
200
+ }
201
+ r, _ := utf8.DecodeRuneInString(env.current[env.Cursor:])
202
+ if r != utf8.RuneError {
203
+ if r > max || r < min {
204
+ env.NextChar()
205
+ return true
206
+ }
207
+ r -= min
208
+ if (chars[uint(r>>3)] & (0x1 << uint(r&0x7))) == 0 {
209
+ env.NextChar()
210
+ return true
211
+ }
212
+ }
213
+ return false
214
+ }
215
+
216
+ func (env *Env) OutGroupingB(chars []byte, min, max int32) bool {
217
+ if env.Cursor <= env.LimitBackward {
218
+ return false
219
+ }
220
+ env.PrevChar()
221
+ r, _ := utf8.DecodeRuneInString(env.current[env.Cursor:])
222
+ if r != utf8.RuneError {
223
+ env.NextChar()
224
+ if r > max || r < min {
225
+ env.PrevChar()
226
+ return true
227
+ }
228
+ r -= min
229
+ if (chars[uint(r>>3)] & (0x1 << uint(r&0x7))) == 0 {
230
+ env.PrevChar()
231
+ return true
232
+ }
233
+ }
234
+ return false
235
+ }
236
+
237
+ func (env *Env) SliceDel() bool {
238
+ return env.SliceFrom("")
239
+ }
240
+
241
+ func (env *Env) Insert(bra, ket int, s string) {
242
+ adjustment := env.ReplaceS(bra, ket, s)
243
+ if bra <= env.Bra {
244
+ env.Bra = int(int32(env.Bra) + adjustment)
245
+ }
246
+ if bra <= env.Ket {
247
+ env.Ket = int(int32(env.Ket) + adjustment)
248
+ }
249
+ }
250
+
251
+ func (env *Env) SliceTo() string {
252
+ return env.current[env.Bra:env.Ket]
253
+ }
254
+
255
+ func (env *Env) FindAmong(amongs []*Among, ctx interface{}) int32 {
256
+ var i int32
257
+ j := int32(len(amongs))
258
+
259
+ c := env.Cursor
260
+ l := env.Limit
261
+
262
+ var commonI, commonJ int
263
+
264
+ firstKeyInspected := false
265
+ for {
266
+ k := i + ((j - i) >> 1)
267
+ var diff int32
268
+ common := min(commonI, commonJ)
269
+ w := amongs[k]
270
+ for lvar := common; lvar < len(w.Str); lvar++ {
271
+ if c+common == l {
272
+ diff--
273
+ break
274
+ }
275
+ diff = int32(env.current[c+common]) - int32(w.Str[lvar])
276
+ if diff != 0 {
277
+ break
278
+ }
279
+ common++
280
+ }
281
+ if diff < 0 {
282
+ j = k
283
+ commonJ = common
284
+ } else {
285
+ i = k
286
+ commonI = common
287
+ }
288
+ if j-i <= 1 {
289
+ if i > 0 {
290
+ break
291
+ }
292
+ if j == i {
293
+ break
294
+ }
295
+ if firstKeyInspected {
296
+ break
297
+ }
298
+ firstKeyInspected = true
299
+ }
300
+ }
301
+
302
+ for {
303
+ w := amongs[i]
304
+ if commonI >= len(w.Str) {
305
+ env.Cursor = c + len(w.Str)
306
+ if w.F != nil {
307
+ res := w.F(env, ctx)
308
+ env.Cursor = c + len(w.Str)
309
+ if res {
310
+ return w.B
311
+ }
312
+ } else {
313
+ return w.B
314
+ }
315
+ }
316
+ i = w.A
317
+ if i < 0 {
318
+ return 0
319
+ }
320
+ }
321
+ }
322
+
323
+ func (env *Env) FindAmongB(amongs []*Among, ctx interface{}) int32 {
324
+ var i int32
325
+ j := int32(len(amongs))
326
+
327
+ c := env.Cursor
328
+ lb := env.LimitBackward
329
+
330
+ var commonI, commonJ int
331
+
332
+ firstKeyInspected := false
333
+
334
+ for {
335
+ k := i + ((j - i) >> 1)
336
+ diff := int32(0)
337
+ common := min(commonI, commonJ)
338
+ w := amongs[k]
339
+ for lvar := len(w.Str) - int(common) - 1; lvar >= 0; lvar-- {
340
+ if c-common == lb {
341
+ diff--
342
+ break
343
+ }
344
+ diff = int32(env.current[c-common-1]) - int32(w.Str[lvar])
345
+ if diff != 0 {
346
+ break
347
+ }
348
+ // Count up commons. But not one character but the byte width of that char
349
+ common++
350
+ }
351
+ if diff < 0 {
352
+ j = k
353
+ commonJ = common
354
+ } else {
355
+ i = k
356
+ commonI = common
357
+ }
358
+ if j-i <= 1 {
359
+ if i > 0 {
360
+ break
361
+ }
362
+ if j == i {
363
+ break
364
+ }
365
+ if firstKeyInspected {
366
+ break
367
+ }
368
+ firstKeyInspected = true
369
+ }
370
+ }
371
+ for {
372
+ w := amongs[i]
373
+ if commonI >= len(w.Str) {
374
+ env.Cursor = c - len(w.Str)
375
+ if w.F != nil {
376
+ res := w.F(env, ctx)
377
+ env.Cursor = c - len(w.Str)
378
+ if res {
379
+ return w.B
380
+ }
381
+ } else {
382
+ return w.B
383
+ }
384
+ }
385
+ i = w.A
386
+ if i < 0 {
387
+ return 0
388
+ }
389
+ }
390
+ }
391
+
392
+ func (env *Env) Debug(count, lineNumber int) {
393
+ log.Printf("snowball debug, count: %d, line: %d", count, lineNumber)
394
+ }
395
+
396
+ func (env *Env) Clone() *Env {
397
+ clone := *env
398
+ return &clone
399
+ }
400
+
401
+ func (env *Env) AssignTo() string {
402
+ return env.Current()
403
+ }