mittens 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +3 -3
- data/lib/mittens/version.rb +1 -1
- data/vendor/snowball/.github/workflows/ci.yml +216 -0
- data/vendor/snowball/CONTRIBUTING.rst +111 -62
- data/vendor/snowball/GNUmakefile +194 -136
- data/vendor/snowball/NEWS +798 -3
- data/vendor/snowball/README.rst +50 -1
- data/vendor/snowball/ada/src/stemmer.adb +25 -13
- data/vendor/snowball/ada/src/stemmer.ads +9 -9
- data/vendor/snowball/ada/stemmer_config.gpr +7 -7
- data/vendor/snowball/algorithms/basque.sbl +4 -19
- data/vendor/snowball/algorithms/catalan.sbl +2 -9
- data/vendor/snowball/algorithms/danish.sbl +1 -1
- data/vendor/snowball/algorithms/dutch.sbl +284 -122
- data/vendor/snowball/algorithms/dutch_porter.sbl +178 -0
- data/vendor/snowball/algorithms/english.sbl +52 -37
- data/vendor/snowball/algorithms/esperanto.sbl +157 -0
- data/vendor/snowball/algorithms/estonian.sbl +269 -0
- data/vendor/snowball/algorithms/finnish.sbl +2 -3
- data/vendor/snowball/algorithms/french.sbl +42 -16
- data/vendor/snowball/algorithms/german.sbl +35 -14
- data/vendor/snowball/algorithms/greek.sbl +76 -76
- data/vendor/snowball/algorithms/hungarian.sbl +8 -6
- data/vendor/snowball/algorithms/indonesian.sbl +14 -8
- data/vendor/snowball/algorithms/italian.sbl +11 -21
- data/vendor/snowball/algorithms/lithuanian.sbl +36 -37
- data/vendor/snowball/algorithms/lovins.sbl +0 -1
- data/vendor/snowball/algorithms/nepali.sbl +138 -37
- data/vendor/snowball/algorithms/norwegian.sbl +19 -5
- data/vendor/snowball/algorithms/porter.sbl +2 -2
- data/vendor/snowball/algorithms/portuguese.sbl +9 -13
- data/vendor/snowball/algorithms/romanian.sbl +17 -4
- data/vendor/snowball/algorithms/serbian.sbl +467 -468
- data/vendor/snowball/algorithms/spanish.sbl +5 -7
- data/vendor/snowball/algorithms/swedish.sbl +60 -6
- data/vendor/snowball/algorithms/tamil.sbl +207 -176
- data/vendor/snowball/algorithms/turkish.sbl +461 -445
- data/vendor/snowball/algorithms/yiddish.sbl +36 -38
- data/vendor/snowball/compiler/analyser.c +445 -192
- data/vendor/snowball/compiler/driver.c +109 -101
- data/vendor/snowball/compiler/generator.c +853 -464
- data/vendor/snowball/compiler/generator_ada.c +404 -366
- data/vendor/snowball/compiler/generator_csharp.c +297 -260
- data/vendor/snowball/compiler/generator_go.c +323 -254
- data/vendor/snowball/compiler/generator_java.c +326 -252
- data/vendor/snowball/compiler/generator_js.c +362 -252
- data/vendor/snowball/compiler/generator_pascal.c +349 -197
- data/vendor/snowball/compiler/generator_python.c +257 -240
- data/vendor/snowball/compiler/generator_rust.c +423 -251
- data/vendor/snowball/compiler/header.h +117 -71
- data/vendor/snowball/compiler/space.c +137 -68
- data/vendor/snowball/compiler/syswords.h +2 -2
- data/vendor/snowball/compiler/tokeniser.c +125 -107
- data/vendor/snowball/csharp/Snowball/Among.cs +14 -14
- data/vendor/snowball/csharp/Snowball/AssemblyInfo.cs +7 -7
- data/vendor/snowball/csharp/Snowball/Stemmer.cs +57 -37
- data/vendor/snowball/csharp/Stemwords/App.config +2 -2
- data/vendor/snowball/csharp/Stemwords/Program.cs +16 -12
- data/vendor/snowball/doc/libstemmer_c_README +7 -4
- data/vendor/snowball/doc/libstemmer_csharp_README +4 -1
- data/vendor/snowball/doc/libstemmer_java_README +12 -1
- data/vendor/snowball/doc/libstemmer_js_README +6 -4
- data/vendor/snowball/doc/libstemmer_python_README +9 -4
- data/vendor/snowball/examples/stemwords.c +12 -12
- data/vendor/snowball/go/env.go +107 -31
- data/vendor/snowball/go/util.go +0 -4
- data/vendor/snowball/include/libstemmer.h +4 -0
- data/vendor/snowball/java/org/tartarus/snowball/Among.java +32 -15
- data/vendor/snowball/java/org/tartarus/snowball/SnowballProgram.java +347 -261
- data/vendor/snowball/java/org/tartarus/snowball/SnowballStemmer.java +3 -0
- data/vendor/snowball/java/org/tartarus/snowball/TestApp.java +52 -37
- data/vendor/snowball/javascript/base-stemmer.js +186 -2
- data/vendor/snowball/javascript/stemwords.js +3 -6
- data/vendor/snowball/libstemmer/libstemmer_c.in +1 -1
- data/vendor/snowball/libstemmer/mkalgorithms.pl +6 -6
- data/vendor/snowball/libstemmer/mkmodules.pl +2 -2
- data/vendor/snowball/libstemmer/modules.txt +13 -10
- data/vendor/snowball/libstemmer/test.c +1 -1
- data/vendor/snowball/pascal/SnowballProgram.pas +84 -2
- data/vendor/snowball/pascal/generate.pl +13 -13
- data/vendor/snowball/python/create_init.py +4 -1
- data/vendor/snowball/python/setup.cfg +0 -3
- data/vendor/snowball/python/setup.py +8 -3
- data/vendor/snowball/python/snowballstemmer/basestemmer.py +20 -54
- data/vendor/snowball/python/stemwords.py +8 -12
- data/vendor/snowball/runtime/api.c +10 -5
- data/vendor/snowball/runtime/header.h +10 -9
- data/vendor/snowball/runtime/utilities.c +9 -9
- data/vendor/snowball/rust/build.rs +1 -1
- data/vendor/snowball/rust/src/snowball/snowball_env.rs +83 -5
- data/vendor/snowball/tests/stemtest.c +7 -4
- metadata +7 -7
- data/vendor/snowball/.travis.yml +0 -112
- data/vendor/snowball/algorithms/german2.sbl +0 -145
- data/vendor/snowball/algorithms/kraaij_pohlmann.sbl +0 -240
- data/vendor/snowball/compiler/syswords2.h +0 -13
@@ -1,38 +1,30 @@
|
|
1
1
|
|
2
2
|
package org.tartarus.snowball;
|
3
|
-
import java.lang.reflect.
|
3
|
+
import java.lang.reflect.UndeclaredThrowableException;
|
4
4
|
import java.io.Serializable;
|
5
|
+
import java.util.Arrays;
|
5
6
|
|
7
|
+
/**
|
8
|
+
* Base class for a snowball stemmer
|
9
|
+
*/
|
6
10
|
public class SnowballProgram implements Serializable {
|
7
11
|
protected SnowballProgram()
|
8
12
|
{
|
9
|
-
|
10
|
-
|
13
|
+
cursor = 0;
|
14
|
+
length = limit = 0;
|
15
|
+
limit_backward = 0;
|
16
|
+
bra = cursor;
|
17
|
+
ket = limit;
|
11
18
|
}
|
12
19
|
|
13
20
|
static final long serialVersionUID = 2016072500L;
|
14
21
|
|
15
|
-
private void init() {
|
16
|
-
cursor = 0;
|
17
|
-
limit = current.length();
|
18
|
-
limit_backward = 0;
|
19
|
-
bra = cursor;
|
20
|
-
ket = limit;
|
21
|
-
}
|
22
|
-
|
23
22
|
/**
|
24
23
|
* Set the current string.
|
25
24
|
*/
|
26
25
|
public void setCurrent(String value)
|
27
26
|
{
|
28
|
-
|
29
|
-
// the library keeps a reference to the buffer returned (for example,
|
30
|
-
// by converting it to a String in a way which doesn't force a copy),
|
31
|
-
// the buffer size will not decrease, and we will risk wasting a large
|
32
|
-
// amount of memory.
|
33
|
-
// Thanks to Wolfram Esser for spotting this problem.
|
34
|
-
current = new StringBuilder(value);
|
35
|
-
init();
|
27
|
+
setCurrent(value.toCharArray(), value.length());
|
36
28
|
}
|
37
29
|
|
38
30
|
/**
|
@@ -40,320 +32,414 @@ public class SnowballProgram implements Serializable {
|
|
40
32
|
*/
|
41
33
|
public String getCurrent()
|
42
34
|
{
|
43
|
-
return current
|
35
|
+
return new String(current, 0, length);
|
36
|
+
}
|
37
|
+
|
38
|
+
/**
|
39
|
+
* Set the current string.
|
40
|
+
* @param text character array containing input
|
41
|
+
* @param length valid length of text.
|
42
|
+
*/
|
43
|
+
public void setCurrent(char[] text, int length) {
|
44
|
+
current = text;
|
45
|
+
cursor = 0;
|
46
|
+
this.length = limit = length;
|
47
|
+
limit_backward = 0;
|
48
|
+
bra = cursor;
|
49
|
+
ket = limit;
|
50
|
+
}
|
51
|
+
|
52
|
+
/**
|
53
|
+
* Get the current buffer containing the stem.
|
54
|
+
* <p>
|
55
|
+
* NOTE: this may be a reference to a different character array than the
|
56
|
+
* one originally provided with setCurrent, in the exceptional case that
|
57
|
+
* stemming produced a longer intermediate or result string.
|
58
|
+
* </p>
|
59
|
+
* <p>
|
60
|
+
* It is necessary to use {@link #getCurrentBufferLength()} to determine
|
61
|
+
* the valid length of the returned buffer. For example, many words are
|
62
|
+
* stemmed simply by subtracting from the length to remove suffixes.
|
63
|
+
* </p>
|
64
|
+
* @see #getCurrentBufferLength()
|
65
|
+
*/
|
66
|
+
public char[] getCurrentBuffer() {
|
67
|
+
return current;
|
68
|
+
}
|
69
|
+
|
70
|
+
/**
|
71
|
+
* Get the valid length of the character array in
|
72
|
+
* {@link #getCurrentBuffer()}.
|
73
|
+
* @return valid length of the array.
|
74
|
+
*/
|
75
|
+
public int getCurrentBufferLength() {
|
76
|
+
return length;
|
44
77
|
}
|
45
78
|
|
46
79
|
// current string
|
47
|
-
|
80
|
+
private char[] current;
|
48
81
|
|
49
82
|
protected int cursor;
|
83
|
+
protected int length;
|
50
84
|
protected int limit;
|
51
85
|
protected int limit_backward;
|
52
86
|
protected int bra;
|
53
87
|
protected int ket;
|
54
88
|
|
55
89
|
public SnowballProgram(SnowballProgram other) {
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
90
|
+
current = other.current;
|
91
|
+
cursor = other.cursor;
|
92
|
+
length = other.length;
|
93
|
+
limit = other.limit;
|
94
|
+
limit_backward = other.limit_backward;
|
95
|
+
bra = other.bra;
|
96
|
+
ket = other.ket;
|
62
97
|
}
|
63
98
|
|
64
99
|
protected void copy_from(SnowballProgram other)
|
65
100
|
{
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
101
|
+
current = other.current;
|
102
|
+
cursor = other.cursor;
|
103
|
+
length = other.length;
|
104
|
+
limit = other.limit;
|
105
|
+
limit_backward = other.limit_backward;
|
106
|
+
bra = other.bra;
|
107
|
+
ket = other.ket;
|
108
|
+
}
|
109
|
+
|
110
|
+
protected boolean in_grouping(char[] s, int min, int max)
|
111
|
+
{
|
112
|
+
if (cursor >= limit) return false;
|
113
|
+
int ch = current[cursor];
|
114
|
+
if (ch > max || ch < min) return false;
|
115
|
+
ch -= min;
|
116
|
+
if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return false;
|
117
|
+
cursor++;
|
118
|
+
return true;
|
119
|
+
}
|
120
|
+
|
121
|
+
protected boolean go_in_grouping(char[] s, int min, int max)
|
122
|
+
{
|
123
|
+
while (cursor < limit) {
|
124
|
+
int ch = current[cursor];
|
125
|
+
if (ch > max || ch < min)
|
126
|
+
return true;
|
127
|
+
ch -= min;
|
128
|
+
if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return true;
|
129
|
+
cursor++;
|
130
|
+
}
|
131
|
+
return false;
|
132
|
+
}
|
133
|
+
|
134
|
+
protected boolean in_grouping_b(char[] s, int min, int max)
|
135
|
+
{
|
136
|
+
if (cursor <= limit_backward) return false;
|
137
|
+
int ch = current[cursor - 1];
|
138
|
+
if (ch > max || ch < min) return false;
|
139
|
+
ch -= min;
|
140
|
+
if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return false;
|
141
|
+
cursor--;
|
142
|
+
return true;
|
72
143
|
}
|
73
144
|
|
74
|
-
protected boolean
|
145
|
+
protected boolean go_in_grouping_b(char[] s, int min, int max)
|
75
146
|
{
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
147
|
+
while (cursor > limit_backward) {
|
148
|
+
int ch = current[cursor - 1];
|
149
|
+
if (ch > max || ch < min) return true;
|
150
|
+
ch -= min;
|
151
|
+
if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return true;
|
152
|
+
cursor--;
|
153
|
+
}
|
154
|
+
return false;
|
83
155
|
}
|
84
156
|
|
85
|
-
protected boolean
|
157
|
+
protected boolean out_grouping(char[] s, int min, int max)
|
86
158
|
{
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
159
|
+
if (cursor >= limit) return false;
|
160
|
+
int ch = current[cursor];
|
161
|
+
if (ch > max || ch < min) {
|
162
|
+
cursor++;
|
163
|
+
return true;
|
164
|
+
}
|
165
|
+
ch -= min;
|
166
|
+
if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) {
|
167
|
+
cursor++;
|
168
|
+
return true;
|
169
|
+
}
|
170
|
+
return false;
|
94
171
|
}
|
95
172
|
|
96
|
-
protected boolean
|
173
|
+
protected boolean go_out_grouping(char[] s, int min, int max)
|
97
174
|
{
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
return false;
|
175
|
+
while (cursor < limit) {
|
176
|
+
int ch = current[cursor];
|
177
|
+
if (ch <= max && ch >= min) {
|
178
|
+
ch -= min;
|
179
|
+
if ((s[ch >> 3] & (0X1 << (ch & 0X7))) != 0) {
|
180
|
+
return true;
|
181
|
+
}
|
182
|
+
}
|
183
|
+
cursor++;
|
184
|
+
}
|
185
|
+
return false;
|
110
186
|
}
|
111
187
|
|
112
|
-
protected boolean out_grouping_b(char
|
188
|
+
protected boolean out_grouping_b(char[] s, int min, int max)
|
113
189
|
{
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
190
|
+
if (cursor <= limit_backward) return false;
|
191
|
+
int ch = current[cursor - 1];
|
192
|
+
if (ch > max || ch < min) {
|
193
|
+
cursor--;
|
194
|
+
return true;
|
195
|
+
}
|
196
|
+
ch -= min;
|
197
|
+
if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) {
|
198
|
+
cursor--;
|
199
|
+
return true;
|
200
|
+
}
|
201
|
+
return false;
|
202
|
+
}
|
203
|
+
|
204
|
+
protected boolean go_out_grouping_b(char[] s, int min, int max)
|
205
|
+
{
|
206
|
+
while (cursor > limit_backward) {
|
207
|
+
int ch = current[cursor - 1];
|
208
|
+
if (ch <= max && ch >= min) {
|
209
|
+
ch -= min;
|
210
|
+
if ((s[ch >> 3] & (0X1 << (ch & 0X7))) != 0) {
|
211
|
+
return true;
|
212
|
+
}
|
213
|
+
}
|
214
|
+
cursor--;
|
215
|
+
}
|
216
|
+
return false;
|
126
217
|
}
|
127
218
|
|
128
219
|
protected boolean eq_s(CharSequence s)
|
129
220
|
{
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
221
|
+
if (limit - cursor < s.length()) return false;
|
222
|
+
int i;
|
223
|
+
for (i = 0; i != s.length(); i++) {
|
224
|
+
if (current[cursor + i] != s.charAt(i)) return false;
|
225
|
+
}
|
226
|
+
cursor += s.length();
|
227
|
+
return true;
|
137
228
|
}
|
138
229
|
|
139
230
|
protected boolean eq_s_b(CharSequence s)
|
140
231
|
{
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
232
|
+
if (cursor - limit_backward < s.length()) return false;
|
233
|
+
int i;
|
234
|
+
for (i = 0; i != s.length(); i++) {
|
235
|
+
if (current[cursor - s.length() + i] != s.charAt(i)) return false;
|
236
|
+
}
|
237
|
+
cursor -= s.length();
|
238
|
+
return true;
|
148
239
|
}
|
149
240
|
|
150
|
-
protected int find_among(Among v
|
241
|
+
protected int find_among(Among[] v)
|
151
242
|
{
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
i = w.substring_i;
|
217
|
-
if (i < 0) return 0;
|
218
|
-
}
|
243
|
+
int i = 0;
|
244
|
+
int j = v.length;
|
245
|
+
|
246
|
+
int c = cursor;
|
247
|
+
int l = limit;
|
248
|
+
|
249
|
+
int common_i = 0;
|
250
|
+
int common_j = 0;
|
251
|
+
|
252
|
+
boolean first_key_inspected = false;
|
253
|
+
|
254
|
+
while (true) {
|
255
|
+
int k = i + ((j - i) >> 1);
|
256
|
+
int diff = 0;
|
257
|
+
int common = common_i < common_j ? common_i : common_j; // smaller
|
258
|
+
Among w = v[k];
|
259
|
+
int i2;
|
260
|
+
for (i2 = common; i2 < w.s.length; i2++) {
|
261
|
+
if (c + common == l) {
|
262
|
+
diff = -1;
|
263
|
+
break;
|
264
|
+
}
|
265
|
+
diff = current[c + common] - w.s[i2];
|
266
|
+
if (diff != 0) break;
|
267
|
+
common++;
|
268
|
+
}
|
269
|
+
if (diff < 0) {
|
270
|
+
j = k;
|
271
|
+
common_j = common;
|
272
|
+
} else {
|
273
|
+
i = k;
|
274
|
+
common_i = common;
|
275
|
+
}
|
276
|
+
if (j - i <= 1) {
|
277
|
+
if (i > 0) break; // v->s has been inspected
|
278
|
+
if (j == i) break; // only one item in v
|
279
|
+
|
280
|
+
// - but now we need to go round once more to get
|
281
|
+
// v->s inspected. This looks messy, but is actually
|
282
|
+
// the optimal approach.
|
283
|
+
|
284
|
+
if (first_key_inspected) break;
|
285
|
+
first_key_inspected = true;
|
286
|
+
}
|
287
|
+
}
|
288
|
+
while (true) {
|
289
|
+
Among w = v[i];
|
290
|
+
if (common_i >= w.s.length) {
|
291
|
+
cursor = c + w.s.length;
|
292
|
+
if (w.method == null) return w.result;
|
293
|
+
boolean res = false;
|
294
|
+
try {
|
295
|
+
res = (boolean) w.method.invokeExact(this);
|
296
|
+
} catch (Error | RuntimeException e) {
|
297
|
+
throw e;
|
298
|
+
} catch (Throwable e) {
|
299
|
+
throw new UndeclaredThrowableException(e);
|
300
|
+
}
|
301
|
+
cursor = c + w.s.length;
|
302
|
+
if (res) return w.result;
|
303
|
+
}
|
304
|
+
i = w.substring_i;
|
305
|
+
if (i < 0) return 0;
|
306
|
+
}
|
219
307
|
}
|
220
308
|
|
221
309
|
// find_among_b is for backwards processing. Same comments apply
|
222
|
-
protected int find_among_b(Among v
|
310
|
+
protected int find_among_b(Among[] v)
|
223
311
|
{
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
i = w.substring_i;
|
285
|
-
if (i < 0) return 0;
|
286
|
-
}
|
312
|
+
int i = 0;
|
313
|
+
int j = v.length;
|
314
|
+
|
315
|
+
int c = cursor;
|
316
|
+
int lb = limit_backward;
|
317
|
+
|
318
|
+
int common_i = 0;
|
319
|
+
int common_j = 0;
|
320
|
+
|
321
|
+
boolean first_key_inspected = false;
|
322
|
+
|
323
|
+
while (true) {
|
324
|
+
int k = i + ((j - i) >> 1);
|
325
|
+
int diff = 0;
|
326
|
+
int common = common_i < common_j ? common_i : common_j;
|
327
|
+
Among w = v[k];
|
328
|
+
int i2;
|
329
|
+
for (i2 = w.s.length - 1 - common; i2 >= 0; i2--) {
|
330
|
+
if (c - common == lb) {
|
331
|
+
diff = -1;
|
332
|
+
break;
|
333
|
+
}
|
334
|
+
diff = current[c - 1 - common] - w.s[i2];
|
335
|
+
if (diff != 0) break;
|
336
|
+
common++;
|
337
|
+
}
|
338
|
+
if (diff < 0) {
|
339
|
+
j = k;
|
340
|
+
common_j = common;
|
341
|
+
} else {
|
342
|
+
i = k;
|
343
|
+
common_i = common;
|
344
|
+
}
|
345
|
+
if (j - i <= 1) {
|
346
|
+
if (i > 0) break;
|
347
|
+
if (j == i) break;
|
348
|
+
if (first_key_inspected) break;
|
349
|
+
first_key_inspected = true;
|
350
|
+
}
|
351
|
+
}
|
352
|
+
while (true) {
|
353
|
+
Among w = v[i];
|
354
|
+
if (common_i >= w.s.length) {
|
355
|
+
cursor = c - w.s.length;
|
356
|
+
if (w.method == null) return w.result;
|
357
|
+
|
358
|
+
boolean res = false;
|
359
|
+
try {
|
360
|
+
res = (boolean) w.method.invokeExact(this);
|
361
|
+
} catch (Error | RuntimeException e) {
|
362
|
+
throw e;
|
363
|
+
} catch (Throwable e) {
|
364
|
+
throw new UndeclaredThrowableException(e);
|
365
|
+
}
|
366
|
+
cursor = c - w.s.length;
|
367
|
+
if (res) return w.result;
|
368
|
+
}
|
369
|
+
i = w.substring_i;
|
370
|
+
if (i < 0) return 0;
|
371
|
+
}
|
287
372
|
}
|
288
373
|
|
289
374
|
/* to replace chars between c_bra and c_ket in current by the
|
290
375
|
* chars in s.
|
291
376
|
*/
|
292
|
-
protected int replace_s(int c_bra, int c_ket,
|
377
|
+
protected int replace_s(int c_bra, int c_ket, CharSequence s)
|
293
378
|
{
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
379
|
+
final int adjustment = s.length() - (c_ket - c_bra);
|
380
|
+
final int newLength = length + adjustment;
|
381
|
+
//resize if necessary
|
382
|
+
if (newLength > current.length) {
|
383
|
+
current = Arrays.copyOf(current, newLength);
|
384
|
+
}
|
385
|
+
// if the substring being replaced is longer or shorter than the
|
386
|
+
// replacement, need to shift things around
|
387
|
+
if (adjustment != 0 && c_ket < length) {
|
388
|
+
System.arraycopy(current, c_ket, current, c_bra + s.length(),
|
389
|
+
length - c_ket);
|
390
|
+
}
|
391
|
+
// insert the replacement text
|
392
|
+
// Note, faster is s.getChars(0, s.length(), current, c_bra);
|
393
|
+
// but would have to duplicate this method for both String and StringBuilder
|
394
|
+
for (int i = 0; i < s.length(); i++)
|
395
|
+
current[c_bra + i] = s.charAt(i);
|
396
|
+
|
397
|
+
length += adjustment;
|
398
|
+
limit += adjustment;
|
399
|
+
if (cursor >= c_ket) cursor += adjustment;
|
400
|
+
else if (cursor > c_bra) cursor = c_bra;
|
401
|
+
return adjustment;
|
300
402
|
}
|
301
403
|
|
302
404
|
protected void slice_check()
|
303
405
|
{
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
{
|
309
|
-
System.err.println("faulty slice operation");
|
310
|
-
// FIXME: report error somehow.
|
311
|
-
/*
|
312
|
-
fprintf(stderr, "faulty slice operation:\n");
|
313
|
-
debug(z, -1, 0);
|
314
|
-
exit(1);
|
315
|
-
*/
|
316
|
-
}
|
317
|
-
}
|
318
|
-
|
319
|
-
protected void slice_from(String s)
|
320
|
-
{
|
321
|
-
slice_check();
|
322
|
-
replace_s(bra, ket, s);
|
406
|
+
assert bra >= 0 : "bra=" + bra;
|
407
|
+
assert bra <= ket : "bra=" + bra + ",ket=" + ket;
|
408
|
+
assert limit <= length : "limit=" + limit + ",length=" + length;
|
409
|
+
assert ket <= limit : "ket=" + ket + ",limit=" + limit;
|
323
410
|
}
|
324
411
|
|
325
412
|
protected void slice_from(CharSequence s)
|
326
413
|
{
|
327
|
-
|
414
|
+
slice_check();
|
415
|
+
replace_s(bra, ket, s);
|
328
416
|
}
|
329
417
|
|
330
418
|
protected void slice_del()
|
331
419
|
{
|
332
|
-
|
333
|
-
}
|
334
|
-
|
335
|
-
protected void insert(int c_bra, int c_ket, String s)
|
336
|
-
{
|
337
|
-
int adjustment = replace_s(c_bra, c_ket, s);
|
338
|
-
if (c_bra <= bra) bra += adjustment;
|
339
|
-
if (c_bra <= ket) ket += adjustment;
|
420
|
+
slice_from("");
|
340
421
|
}
|
341
422
|
|
342
423
|
protected void insert(int c_bra, int c_ket, CharSequence s)
|
343
424
|
{
|
344
|
-
|
425
|
+
int adjustment = replace_s(c_bra, c_ket, s);
|
426
|
+
if (c_bra <= bra) bra += adjustment;
|
427
|
+
if (c_bra <= ket) ket += adjustment;
|
345
428
|
}
|
346
429
|
|
347
430
|
/* Copy the slice into the supplied StringBuilder */
|
348
431
|
protected void slice_to(StringBuilder s)
|
349
432
|
{
|
350
|
-
|
351
|
-
|
433
|
+
slice_check();
|
434
|
+
int len = ket - bra;
|
435
|
+
s.setLength(0);
|
436
|
+
s.append(current, bra, len);
|
352
437
|
}
|
353
438
|
|
354
439
|
protected void assign_to(StringBuilder s)
|
355
440
|
{
|
356
|
-
|
441
|
+
s.setLength(0);
|
442
|
+
s.append(current, 0, limit);
|
357
443
|
}
|
358
444
|
|
359
445
|
/*
|
@@ -378,4 +464,4 @@ extern void debug(struct SN_env * z, int number, int line_count)
|
|
378
464
|
}
|
379
465
|
*/
|
380
466
|
|
381
|
-
}
|
467
|
+
}
|