mittens 0.2.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/Gemfile +1 -1
- data/README.md +3 -3
- data/Rakefile +2 -2
- data/ext/mittens/extconf.rb +3 -1
- data/lib/mittens/version.rb +1 -1
- data/vendor/snowball/.github/workflows/ci.yml +216 -0
- data/vendor/snowball/CONTRIBUTING.rst +111 -62
- data/vendor/snowball/GNUmakefile +194 -136
- data/vendor/snowball/NEWS +798 -3
- data/vendor/snowball/README.rst +50 -1
- data/vendor/snowball/ada/src/stemmer.adb +25 -13
- data/vendor/snowball/ada/src/stemmer.ads +9 -9
- data/vendor/snowball/ada/stemmer_config.gpr +7 -7
- data/vendor/snowball/algorithms/basque.sbl +4 -19
- data/vendor/snowball/algorithms/catalan.sbl +2 -9
- data/vendor/snowball/algorithms/danish.sbl +1 -1
- data/vendor/snowball/algorithms/dutch.sbl +284 -122
- data/vendor/snowball/algorithms/dutch_porter.sbl +178 -0
- data/vendor/snowball/algorithms/english.sbl +52 -37
- data/vendor/snowball/algorithms/esperanto.sbl +157 -0
- data/vendor/snowball/algorithms/estonian.sbl +269 -0
- data/vendor/snowball/algorithms/finnish.sbl +2 -3
- data/vendor/snowball/algorithms/french.sbl +42 -16
- data/vendor/snowball/algorithms/german.sbl +35 -14
- data/vendor/snowball/algorithms/greek.sbl +76 -76
- data/vendor/snowball/algorithms/hungarian.sbl +8 -6
- data/vendor/snowball/algorithms/indonesian.sbl +14 -8
- data/vendor/snowball/algorithms/italian.sbl +11 -21
- data/vendor/snowball/algorithms/lithuanian.sbl +36 -37
- data/vendor/snowball/algorithms/lovins.sbl +0 -1
- data/vendor/snowball/algorithms/nepali.sbl +138 -37
- data/vendor/snowball/algorithms/norwegian.sbl +19 -5
- data/vendor/snowball/algorithms/porter.sbl +2 -2
- data/vendor/snowball/algorithms/portuguese.sbl +9 -13
- data/vendor/snowball/algorithms/romanian.sbl +17 -4
- data/vendor/snowball/algorithms/serbian.sbl +467 -468
- data/vendor/snowball/algorithms/spanish.sbl +5 -7
- data/vendor/snowball/algorithms/swedish.sbl +60 -6
- data/vendor/snowball/algorithms/tamil.sbl +207 -176
- data/vendor/snowball/algorithms/turkish.sbl +461 -445
- data/vendor/snowball/algorithms/yiddish.sbl +36 -38
- data/vendor/snowball/compiler/analyser.c +445 -192
- data/vendor/snowball/compiler/driver.c +109 -101
- data/vendor/snowball/compiler/generator.c +853 -464
- data/vendor/snowball/compiler/generator_ada.c +404 -366
- data/vendor/snowball/compiler/generator_csharp.c +297 -260
- data/vendor/snowball/compiler/generator_go.c +323 -254
- data/vendor/snowball/compiler/generator_java.c +326 -252
- data/vendor/snowball/compiler/generator_js.c +362 -252
- data/vendor/snowball/compiler/generator_pascal.c +349 -197
- data/vendor/snowball/compiler/generator_python.c +257 -240
- data/vendor/snowball/compiler/generator_rust.c +423 -251
- data/vendor/snowball/compiler/header.h +117 -71
- data/vendor/snowball/compiler/space.c +137 -68
- data/vendor/snowball/compiler/syswords.h +2 -2
- data/vendor/snowball/compiler/tokeniser.c +125 -107
- data/vendor/snowball/csharp/Snowball/Among.cs +14 -14
- data/vendor/snowball/csharp/Snowball/AssemblyInfo.cs +7 -7
- data/vendor/snowball/csharp/Snowball/Stemmer.cs +57 -37
- data/vendor/snowball/csharp/Stemwords/App.config +2 -2
- data/vendor/snowball/csharp/Stemwords/Program.cs +16 -12
- data/vendor/snowball/doc/libstemmer_c_README +7 -4
- data/vendor/snowball/doc/libstemmer_csharp_README +4 -1
- data/vendor/snowball/doc/libstemmer_java_README +12 -1
- data/vendor/snowball/doc/libstemmer_js_README +6 -4
- data/vendor/snowball/doc/libstemmer_python_README +9 -4
- data/vendor/snowball/examples/stemwords.c +12 -12
- data/vendor/snowball/go/env.go +107 -31
- data/vendor/snowball/go/util.go +0 -4
- data/vendor/snowball/include/libstemmer.h +4 -0
- data/vendor/snowball/java/org/tartarus/snowball/Among.java +32 -15
- data/vendor/snowball/java/org/tartarus/snowball/SnowballProgram.java +347 -261
- data/vendor/snowball/java/org/tartarus/snowball/SnowballStemmer.java +3 -0
- data/vendor/snowball/java/org/tartarus/snowball/TestApp.java +52 -37
- data/vendor/snowball/javascript/base-stemmer.js +186 -2
- data/vendor/snowball/javascript/stemwords.js +3 -6
- data/vendor/snowball/libstemmer/libstemmer_c.in +1 -1
- data/vendor/snowball/libstemmer/mkalgorithms.pl +6 -6
- data/vendor/snowball/libstemmer/mkmodules.pl +2 -2
- data/vendor/snowball/libstemmer/modules.txt +13 -10
- data/vendor/snowball/libstemmer/test.c +1 -1
- data/vendor/snowball/pascal/SnowballProgram.pas +84 -2
- data/vendor/snowball/pascal/generate.pl +13 -13
- data/vendor/snowball/python/create_init.py +4 -1
- data/vendor/snowball/python/setup.cfg +0 -3
- data/vendor/snowball/python/setup.py +8 -3
- data/vendor/snowball/python/snowballstemmer/basestemmer.py +20 -54
- data/vendor/snowball/python/stemwords.py +8 -12
- data/vendor/snowball/runtime/api.c +10 -5
- data/vendor/snowball/runtime/header.h +10 -9
- data/vendor/snowball/runtime/utilities.c +9 -9
- data/vendor/snowball/rust/build.rs +1 -1
- data/vendor/snowball/rust/src/snowball/snowball_env.rs +83 -5
- data/vendor/snowball/tests/stemtest.c +7 -4
- metadata +7 -7
- data/vendor/snowball/.travis.yml +0 -112
- data/vendor/snowball/algorithms/german2.sbl +0 -145
- data/vendor/snowball/algorithms/kraaij_pohlmann.sbl +0 -240
- data/vendor/snowball/compiler/syswords2.h +0 -13
|
@@ -189,27 +189,69 @@ impl<'a> SnowballEnv<'a> {
|
|
|
189
189
|
return false;
|
|
190
190
|
}
|
|
191
191
|
|
|
192
|
+
pub fn go_in_grouping(&mut self, chars: &[u8], min: u32, max: u32) -> bool {
|
|
193
|
+
while self.cursor < self.limit {
|
|
194
|
+
if let Some(chr) = self.current[self.cursor as usize..].chars().next() {
|
|
195
|
+
let mut ch = chr as u32; //codepoint as integer
|
|
196
|
+
if ch > max || ch < min {
|
|
197
|
+
return true;
|
|
198
|
+
}
|
|
199
|
+
ch -= min;
|
|
200
|
+
if (chars[(ch >> 3) as usize] & (0x1 << (ch & 0x7))) == 0 {
|
|
201
|
+
return true;
|
|
202
|
+
}
|
|
203
|
+
self.next_char();
|
|
204
|
+
} else {
|
|
205
|
+
return false;
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
return false;
|
|
209
|
+
}
|
|
210
|
+
|
|
192
211
|
pub fn in_grouping_b(&mut self, chars: &[u8], min: u32, max: u32) -> bool {
|
|
193
212
|
if self.cursor <= self.limit_backward {
|
|
194
213
|
return false;
|
|
195
214
|
}
|
|
215
|
+
let c = self.cursor;
|
|
196
216
|
self.previous_char();
|
|
197
217
|
if let Some(chr) = self.current[self.cursor as usize..].chars().next() {
|
|
198
218
|
let mut ch = chr as u32; //codepoint as integer
|
|
199
|
-
self.next_char();
|
|
200
219
|
if ch > max || ch < min {
|
|
220
|
+
self.cursor = c;
|
|
201
221
|
return false;
|
|
202
222
|
}
|
|
203
223
|
ch -= min;
|
|
204
224
|
if (chars[(ch >> 3) as usize] & (0x1 << (ch & 0x7))) == 0 {
|
|
225
|
+
self.cursor = c;
|
|
205
226
|
return false;
|
|
206
227
|
}
|
|
207
|
-
self.previous_char();
|
|
208
228
|
return true;
|
|
209
229
|
}
|
|
210
230
|
return false;
|
|
211
231
|
}
|
|
212
232
|
|
|
233
|
+
pub fn go_in_grouping_b(&mut self, chars: &[u8], min: u32, max: u32) -> bool {
|
|
234
|
+
while self.cursor > self.limit_backward {
|
|
235
|
+
let c = self.cursor;
|
|
236
|
+
self.previous_char();
|
|
237
|
+
if let Some(chr) = self.current[self.cursor as usize..].chars().next() {
|
|
238
|
+
let mut ch = chr as u32; //codepoint as integer
|
|
239
|
+
if ch > max || ch < min {
|
|
240
|
+
self.cursor = c;
|
|
241
|
+
return true;
|
|
242
|
+
}
|
|
243
|
+
ch -= min;
|
|
244
|
+
if (chars[(ch >> 3) as usize] & (0x1 << (ch & 0x7))) == 0 {
|
|
245
|
+
self.cursor = c;
|
|
246
|
+
return true;
|
|
247
|
+
}
|
|
248
|
+
} else {
|
|
249
|
+
return false;
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
return false;
|
|
253
|
+
}
|
|
254
|
+
|
|
213
255
|
pub fn out_grouping(&mut self, chars: &[u8], min: u32, max: u32) -> bool {
|
|
214
256
|
if self.cursor >= self.limit {
|
|
215
257
|
return false;
|
|
@@ -229,26 +271,62 @@ impl<'a> SnowballEnv<'a> {
|
|
|
229
271
|
return false;
|
|
230
272
|
}
|
|
231
273
|
|
|
274
|
+
pub fn go_out_grouping(&mut self, chars: &[u8], min: u32, max: u32) -> bool {
|
|
275
|
+
while self.cursor < self.limit {
|
|
276
|
+
if let Some(chr) = self.current[self.cursor as usize..].chars().next() {
|
|
277
|
+
let mut ch = chr as u32; //codepoint as integer
|
|
278
|
+
if ch <= max && ch >= min {
|
|
279
|
+
ch -= min;
|
|
280
|
+
if (chars[(ch >> 3) as usize] & (0x1 << (ch & 0x7))) != 0 {
|
|
281
|
+
return true;
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
self.next_char();
|
|
285
|
+
} else {
|
|
286
|
+
return false;
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
return false;
|
|
290
|
+
}
|
|
291
|
+
|
|
232
292
|
pub fn out_grouping_b(&mut self, chars: &[u8], min: u32, max: u32) -> bool {
|
|
233
293
|
if self.cursor <= self.limit_backward {
|
|
234
294
|
return false;
|
|
235
295
|
}
|
|
296
|
+
let c = self.cursor;
|
|
236
297
|
self.previous_char();
|
|
237
298
|
if let Some(chr) = self.current[self.cursor as usize..].chars().next() {
|
|
238
299
|
let mut ch = chr as u32; //codepoint as integer
|
|
239
|
-
self.next_char();
|
|
240
300
|
if ch > max || ch < min {
|
|
241
|
-
self.previous_char();
|
|
242
301
|
return true;
|
|
243
302
|
}
|
|
244
303
|
ch -= min;
|
|
245
304
|
if (chars[(ch >> 3) as usize] & (0x1 << (ch & 0x7))) == 0 {
|
|
246
|
-
self.previous_char();
|
|
247
305
|
return true;
|
|
248
306
|
}
|
|
307
|
+
self.cursor = c;
|
|
249
308
|
}
|
|
250
309
|
return false;
|
|
310
|
+
}
|
|
251
311
|
|
|
312
|
+
pub fn go_out_grouping_b(&mut self, chars: &[u8], min: u32, max: u32) -> bool {
|
|
313
|
+
while self.cursor > self.limit_backward {
|
|
314
|
+
let c = self.cursor;
|
|
315
|
+
self.previous_char();
|
|
316
|
+
if let Some(chr) = self.current[self.cursor as usize..].chars().next() {
|
|
317
|
+
let mut ch = chr as u32; //codepoint as integer
|
|
318
|
+
if ch <= max && ch >= min {
|
|
319
|
+
ch -= min;
|
|
320
|
+
if (chars[(ch >> 3) as usize] & (0x1 << (ch & 0x7))) != 0 {
|
|
321
|
+
self.cursor = c;
|
|
322
|
+
return true;
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
} else {
|
|
326
|
+
return false;
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
return false;
|
|
252
330
|
}
|
|
253
331
|
|
|
254
332
|
|
|
@@ -1,5 +1,4 @@
|
|
|
1
|
-
/*
|
|
2
|
-
* line interface for stemming using any of the algorithms provided.
|
|
1
|
+
/* Unit tests for handling of cases the vocabularies don't cover.
|
|
3
2
|
*/
|
|
4
3
|
|
|
5
4
|
#include <stdio.h>
|
|
@@ -20,16 +19,20 @@ static const struct testcase {
|
|
|
20
19
|
/* Expected output string (0 means same as input) */
|
|
21
20
|
const char * expect;
|
|
22
21
|
} testcases[] = {
|
|
22
|
+
// Regression tests for C support code bug decoding 4 byte UTF-8 sequences.
|
|
23
|
+
// https://github.com/snowballstem/snowball/issues/138
|
|
23
24
|
{ "en", 0,
|
|
24
25
|
"a" EMOJI_FACE_THROWING_A_KISS "ing",
|
|
25
26
|
"a" EMOJI_FACE_THROWING_A_KISS "e" },
|
|
26
27
|
{ "en", 0, U_40079 "wing", 0 },
|
|
27
28
|
// The Finnish stemmer used to damage numbers ending with two or more of
|
|
28
|
-
// the same digit
|
|
29
|
+
// the same digit. Regression test, applied to all stemmers.
|
|
30
|
+
// https://github.com/snowballstem/snowball/issues/66
|
|
29
31
|
{ 0, 0, "2000", 0 },
|
|
30
32
|
{ 0, 0, "999", 0 },
|
|
31
33
|
{ 0, 0, "1000000000", 0 },
|
|
32
|
-
// The Danish stemmer used to damage a number at the end of a word
|
|
34
|
+
// The Danish stemmer used to damage a number at the end of a word.
|
|
35
|
+
// Regression test, applied to all stemmers.
|
|
33
36
|
// https://github.com/snowballstem/snowball/issues/81
|
|
34
37
|
{ 0, 0, "space1999", 0 },
|
|
35
38
|
{ 0, 0, "hal9000", 0 },
|
metadata
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: mittens
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.3.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Andrew Kane
|
|
8
8
|
bindir: bin
|
|
9
9
|
cert_chain: []
|
|
10
|
-
date:
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
11
11
|
dependencies: []
|
|
12
12
|
email: andrew@ankane.org
|
|
13
13
|
executables: []
|
|
@@ -25,8 +25,8 @@ files:
|
|
|
25
25
|
- lib/mittens.rb
|
|
26
26
|
- lib/mittens/version.rb
|
|
27
27
|
- mittens.gemspec
|
|
28
|
+
- vendor/snowball/.github/workflows/ci.yml
|
|
28
29
|
- vendor/snowball/.gitignore
|
|
29
|
-
- vendor/snowball/.travis.yml
|
|
30
30
|
- vendor/snowball/AUTHORS
|
|
31
31
|
- vendor/snowball/CONTRIBUTING.rst
|
|
32
32
|
- vendor/snowball/COPYING
|
|
@@ -47,18 +47,19 @@ files:
|
|
|
47
47
|
- vendor/snowball/algorithms/catalan.sbl
|
|
48
48
|
- vendor/snowball/algorithms/danish.sbl
|
|
49
49
|
- vendor/snowball/algorithms/dutch.sbl
|
|
50
|
+
- vendor/snowball/algorithms/dutch_porter.sbl
|
|
50
51
|
- vendor/snowball/algorithms/english.sbl
|
|
52
|
+
- vendor/snowball/algorithms/esperanto.sbl
|
|
53
|
+
- vendor/snowball/algorithms/estonian.sbl
|
|
51
54
|
- vendor/snowball/algorithms/finnish.sbl
|
|
52
55
|
- vendor/snowball/algorithms/french.sbl
|
|
53
56
|
- vendor/snowball/algorithms/german.sbl
|
|
54
|
-
- vendor/snowball/algorithms/german2.sbl
|
|
55
57
|
- vendor/snowball/algorithms/greek.sbl
|
|
56
58
|
- vendor/snowball/algorithms/hindi.sbl
|
|
57
59
|
- vendor/snowball/algorithms/hungarian.sbl
|
|
58
60
|
- vendor/snowball/algorithms/indonesian.sbl
|
|
59
61
|
- vendor/snowball/algorithms/irish.sbl
|
|
60
62
|
- vendor/snowball/algorithms/italian.sbl
|
|
61
|
-
- vendor/snowball/algorithms/kraaij_pohlmann.sbl
|
|
62
63
|
- vendor/snowball/algorithms/lithuanian.sbl
|
|
63
64
|
- vendor/snowball/algorithms/lovins.sbl
|
|
64
65
|
- vendor/snowball/algorithms/nepali.sbl
|
|
@@ -90,7 +91,6 @@ files:
|
|
|
90
91
|
- vendor/snowball/compiler/header.h
|
|
91
92
|
- vendor/snowball/compiler/space.c
|
|
92
93
|
- vendor/snowball/compiler/syswords.h
|
|
93
|
-
- vendor/snowball/compiler/syswords2.h
|
|
94
94
|
- vendor/snowball/compiler/tokeniser.c
|
|
95
95
|
- vendor/snowball/csharp/.gitignore
|
|
96
96
|
- vendor/snowball/csharp/Snowball/Algorithms/.gitignore
|
|
@@ -168,7 +168,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
168
168
|
- !ruby/object:Gem::Version
|
|
169
169
|
version: '0'
|
|
170
170
|
requirements: []
|
|
171
|
-
rubygems_version: 3.6.
|
|
171
|
+
rubygems_version: 3.6.9
|
|
172
172
|
specification_version: 4
|
|
173
173
|
summary: Stemming for Ruby, powered by Snowball
|
|
174
174
|
test_files: []
|
data/vendor/snowball/.travis.yml
DELETED
|
@@ -1,112 +0,0 @@
|
|
|
1
|
-
language: minimal
|
|
2
|
-
arch: arm64
|
|
3
|
-
dist: xenial
|
|
4
|
-
env:
|
|
5
|
-
global: MAKE=make
|
|
6
|
-
matrix:
|
|
7
|
-
include:
|
|
8
|
-
- name: "C distribution build"
|
|
9
|
-
language: c
|
|
10
|
-
dist: focal
|
|
11
|
-
compiler: gcc
|
|
12
|
-
env: CFLAGS_DIST_BUILD='-O2 -Wall -W -std=c90 -Wdeclaration-after-statement -Werror'
|
|
13
|
-
- name: "C distribution build (clang)"
|
|
14
|
-
language: c
|
|
15
|
-
dist: focal
|
|
16
|
-
compiler: clang
|
|
17
|
-
env: CFLAGS_DIST_BUILD='-O2 -Wall -W -std=c90 -Wdeclaration-after-statement -Werror'
|
|
18
|
-
- language: c
|
|
19
|
-
compiler: gcc
|
|
20
|
-
env: c_tests=y CFLAGS='-O2 -Wall -W -std=c99 -Werror'
|
|
21
|
-
- language: c
|
|
22
|
-
compiler: clang
|
|
23
|
-
env: c_tests=y CFLAGS='-O2 -Wall -W -std=c99 -Werror'
|
|
24
|
-
- language: java
|
|
25
|
-
env: JAVA=java JAVAC=javac
|
|
26
|
-
- language: go
|
|
27
|
-
go: "1.8"
|
|
28
|
-
env: GO=go
|
|
29
|
-
- language: go
|
|
30
|
-
dist: bionic
|
|
31
|
-
go: "1.17"
|
|
32
|
-
env: GO=go
|
|
33
|
-
- language: node_js
|
|
34
|
-
node_js: "node"
|
|
35
|
-
env: NODE=node
|
|
36
|
-
dist: focal # seems to be needed for working node
|
|
37
|
-
- language: rust
|
|
38
|
-
rust:
|
|
39
|
-
- stable
|
|
40
|
-
- beta
|
|
41
|
-
dist: bionic
|
|
42
|
-
env: RUST=rust
|
|
43
|
-
- language: csharp
|
|
44
|
-
arch: amd64 # csharp doesn't seem to work on arm64
|
|
45
|
-
env: MCS=mcs
|
|
46
|
-
dist: bionic
|
|
47
|
-
- name: Pascal
|
|
48
|
-
env: FPC=fpc
|
|
49
|
-
dist: bionic
|
|
50
|
-
addons:
|
|
51
|
-
apt:
|
|
52
|
-
packages:
|
|
53
|
-
- fpc
|
|
54
|
-
# The pure Python versions run slowly so we need to thin the testdata
|
|
55
|
-
# for languages such as Arabic where there's a lot, or else the build
|
|
56
|
-
# hits the travis time limit. With pypy, it's enough faster than we
|
|
57
|
-
# can run the full tests.
|
|
58
|
-
- language: python
|
|
59
|
-
python: "3.9"
|
|
60
|
-
env: PYTHON=python THIN_FACTOR=10
|
|
61
|
-
- language: python
|
|
62
|
-
python: "3.7"
|
|
63
|
-
env: PYTHON=python THIN_FACTOR=10
|
|
64
|
-
- language: python
|
|
65
|
-
python: "3.6"
|
|
66
|
-
env: PYTHON=python THIN_FACTOR=10
|
|
67
|
-
- language: python
|
|
68
|
-
python: "pypy3.7-7.3.5"
|
|
69
|
-
env: PYTHON=python
|
|
70
|
-
dist: bionic
|
|
71
|
-
- name: "Ada"
|
|
72
|
-
env: gprbuild=gprbuild
|
|
73
|
-
dist: bionic
|
|
74
|
-
addons:
|
|
75
|
-
apt:
|
|
76
|
-
packages:
|
|
77
|
-
- gnat
|
|
78
|
-
- gprbuild
|
|
79
|
-
- os: windows
|
|
80
|
-
language: c
|
|
81
|
-
env: c_tests=y MAKE=mingw32-make
|
|
82
|
-
- os: windows
|
|
83
|
-
language: go
|
|
84
|
-
env: GO=go MAKE=mingw32-make
|
|
85
|
-
|
|
86
|
-
before_install:
|
|
87
|
-
# Try to check out a branch of the same name from the snowball-data repo
|
|
88
|
-
# sibling of this snowball repo, so that PRs requiring changes to both can be
|
|
89
|
-
# CI tested easily.
|
|
90
|
-
#
|
|
91
|
-
# If that fails, just use the standard snowball-data repo's default branch.
|
|
92
|
-
- GH_BRANCH=${TRAVIS_PULL_REQUEST_BRANCH:-$TRAVIS_BRANCH}
|
|
93
|
-
- GH_REPO_SLUG=${TRAVIS_PULL_REQUEST_SLUG:-$TRAVIS_REPO_SLUG}
|
|
94
|
-
- GH_REPO_URL=https://github.com/${GH_REPO_SLUG%%/*}/snowball-data.git
|
|
95
|
-
- echo "Trying branch $GH_BRANCH from $GH_REPO_URL"
|
|
96
|
-
- git clone --depth=1 -b "$GH_BRANCH" "$GH_REPO_URL" || git clone --depth=1 https://github.com/snowballstem/snowball-data.git
|
|
97
|
-
|
|
98
|
-
script:
|
|
99
|
-
# Ensure CC is set for building the compiler in non-C builds.
|
|
100
|
-
- test -n "$CC" || export CC=gcc
|
|
101
|
-
- $MAKE CC="$CC"
|
|
102
|
-
- test -z "$CFLAGS_DIST_BUILD" || { pip install setuptools && $MAKE dist && mkdir tmp && cd tmp && tar xf ../dist/libstemmer_c-*.tar.gz && cd libstemmer_c-* && $MAKE CFLAGS="$CFLAGS_DIST_BUILD" ; }
|
|
103
|
-
- test -z "$c_tests" || $MAKE check CC="$CC" STEMMING_DATA=snowball-data
|
|
104
|
-
- test -z "$PYTHON" || $MAKE check_python python="$PYTHON" STEMMING_DATA=snowball-data
|
|
105
|
-
- test -z "$JAVA" -o -z "$JAVAC" || $MAKE check_java STEMMING_DATA=snowball-data
|
|
106
|
-
- test -z "$MCS" || $MAKE check_csharp MCS="$MCS" STEMMING_DATA=snowball-data
|
|
107
|
-
- test -z "$NODE" || $MAKE check_js STEMMING_DATA=snowball-data
|
|
108
|
-
- test -z "$RUST" || $MAKE check_rust STEMMING_DATA=snowball-data
|
|
109
|
-
- test -z "$RUST" || $MAKE check_rust STEMMING_DATA=snowball-data
|
|
110
|
-
- test -z "$GO" || $MAKE check_go STEMMING_DATA=snowball-data
|
|
111
|
-
- test -z "$FPC" || $MAKE check_pascal STEMMING_DATA=snowball-data
|
|
112
|
-
- test -z "$gprbuild" || $MAKE check_ada STEMMING_DATA=snowball-data
|
|
@@ -1,145 +0,0 @@
|
|
|
1
|
-
|
|
2
|
-
/*
|
|
3
|
-
Extra rule for -nisse ending added 11 Dec 2009
|
|
4
|
-
*/
|
|
5
|
-
|
|
6
|
-
routines (
|
|
7
|
-
prelude postlude
|
|
8
|
-
mark_regions
|
|
9
|
-
R1 R2
|
|
10
|
-
standard_suffix
|
|
11
|
-
)
|
|
12
|
-
|
|
13
|
-
externals ( stem )
|
|
14
|
-
|
|
15
|
-
integers ( p1 p2 x )
|
|
16
|
-
|
|
17
|
-
groupings ( v s_ending st_ending )
|
|
18
|
-
|
|
19
|
-
stringescapes {}
|
|
20
|
-
|
|
21
|
-
/* special characters */
|
|
22
|
-
|
|
23
|
-
stringdef a" '{U+00E4}'
|
|
24
|
-
stringdef o" '{U+00F6}'
|
|
25
|
-
stringdef u" '{U+00FC}'
|
|
26
|
-
stringdef ss '{U+00DF}'
|
|
27
|
-
|
|
28
|
-
define v 'aeiouy{a"}{o"}{u"}'
|
|
29
|
-
|
|
30
|
-
define s_ending 'bdfghklmnrt'
|
|
31
|
-
define st_ending s_ending - 'r'
|
|
32
|
-
|
|
33
|
-
define prelude as (
|
|
34
|
-
|
|
35
|
-
test repeat goto (
|
|
36
|
-
v [('u'] v <- 'U') or
|
|
37
|
-
('y'] v <- 'Y')
|
|
38
|
-
)
|
|
39
|
-
|
|
40
|
-
repeat (
|
|
41
|
-
[substring] among(
|
|
42
|
-
'{ss}' (<- 'ss')
|
|
43
|
-
'ae' (<- '{a"}')
|
|
44
|
-
'oe' (<- '{o"}')
|
|
45
|
-
'ue' (<- '{u"}')
|
|
46
|
-
'qu' ()
|
|
47
|
-
'' (next)
|
|
48
|
-
)
|
|
49
|
-
)
|
|
50
|
-
|
|
51
|
-
)
|
|
52
|
-
|
|
53
|
-
define mark_regions as (
|
|
54
|
-
|
|
55
|
-
$p1 = limit
|
|
56
|
-
$p2 = limit
|
|
57
|
-
|
|
58
|
-
test(hop 3 setmark x)
|
|
59
|
-
|
|
60
|
-
gopast v gopast non-v setmark p1
|
|
61
|
-
try($p1 < x $p1 = x) // at least 3
|
|
62
|
-
gopast v gopast non-v setmark p2
|
|
63
|
-
|
|
64
|
-
)
|
|
65
|
-
|
|
66
|
-
define postlude as repeat (
|
|
67
|
-
|
|
68
|
-
[substring] among(
|
|
69
|
-
'Y' (<- 'y')
|
|
70
|
-
'U' (<- 'u')
|
|
71
|
-
'{a"}' (<- 'a')
|
|
72
|
-
'{o"}' (<- 'o')
|
|
73
|
-
'{u"}' (<- 'u')
|
|
74
|
-
'' (next)
|
|
75
|
-
)
|
|
76
|
-
|
|
77
|
-
)
|
|
78
|
-
|
|
79
|
-
backwardmode (
|
|
80
|
-
|
|
81
|
-
define R1 as $p1 <= cursor
|
|
82
|
-
define R2 as $p2 <= cursor
|
|
83
|
-
|
|
84
|
-
define standard_suffix as (
|
|
85
|
-
do (
|
|
86
|
-
[substring] R1 among(
|
|
87
|
-
'em' 'ern' 'er'
|
|
88
|
-
( delete
|
|
89
|
-
)
|
|
90
|
-
'e' 'en' 'es'
|
|
91
|
-
( delete
|
|
92
|
-
try (['s'] 'nis' delete)
|
|
93
|
-
)
|
|
94
|
-
's'
|
|
95
|
-
( s_ending delete
|
|
96
|
-
)
|
|
97
|
-
)
|
|
98
|
-
)
|
|
99
|
-
do (
|
|
100
|
-
[substring] R1 among(
|
|
101
|
-
'en' 'er' 'est'
|
|
102
|
-
( delete
|
|
103
|
-
)
|
|
104
|
-
'st'
|
|
105
|
-
( st_ending hop 3 delete
|
|
106
|
-
)
|
|
107
|
-
)
|
|
108
|
-
)
|
|
109
|
-
do (
|
|
110
|
-
[substring] R2 among(
|
|
111
|
-
'end' 'ung'
|
|
112
|
-
( delete
|
|
113
|
-
try (['ig'] not 'e' R2 delete)
|
|
114
|
-
)
|
|
115
|
-
'ig' 'ik' 'isch'
|
|
116
|
-
( not 'e' delete
|
|
117
|
-
)
|
|
118
|
-
'lich' 'heit'
|
|
119
|
-
( delete
|
|
120
|
-
try (
|
|
121
|
-
['er' or 'en'] R1 delete
|
|
122
|
-
)
|
|
123
|
-
)
|
|
124
|
-
'keit'
|
|
125
|
-
( delete
|
|
126
|
-
try (
|
|
127
|
-
[substring] R2 among(
|
|
128
|
-
'lich' 'ig'
|
|
129
|
-
( delete
|
|
130
|
-
)
|
|
131
|
-
)
|
|
132
|
-
)
|
|
133
|
-
)
|
|
134
|
-
)
|
|
135
|
-
)
|
|
136
|
-
)
|
|
137
|
-
)
|
|
138
|
-
|
|
139
|
-
define stem as (
|
|
140
|
-
do prelude
|
|
141
|
-
do mark_regions
|
|
142
|
-
backwards
|
|
143
|
-
do standard_suffix
|
|
144
|
-
do postlude
|
|
145
|
-
)
|