mittens 0.2.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +8 -0
  3. data/Gemfile +1 -1
  4. data/README.md +3 -3
  5. data/Rakefile +2 -2
  6. data/ext/mittens/extconf.rb +3 -1
  7. data/lib/mittens/version.rb +1 -1
  8. data/vendor/snowball/.github/workflows/ci.yml +216 -0
  9. data/vendor/snowball/CONTRIBUTING.rst +111 -62
  10. data/vendor/snowball/GNUmakefile +194 -136
  11. data/vendor/snowball/NEWS +798 -3
  12. data/vendor/snowball/README.rst +50 -1
  13. data/vendor/snowball/ada/src/stemmer.adb +25 -13
  14. data/vendor/snowball/ada/src/stemmer.ads +9 -9
  15. data/vendor/snowball/ada/stemmer_config.gpr +7 -7
  16. data/vendor/snowball/algorithms/basque.sbl +4 -19
  17. data/vendor/snowball/algorithms/catalan.sbl +2 -9
  18. data/vendor/snowball/algorithms/danish.sbl +1 -1
  19. data/vendor/snowball/algorithms/dutch.sbl +284 -122
  20. data/vendor/snowball/algorithms/dutch_porter.sbl +178 -0
  21. data/vendor/snowball/algorithms/english.sbl +52 -37
  22. data/vendor/snowball/algorithms/esperanto.sbl +157 -0
  23. data/vendor/snowball/algorithms/estonian.sbl +269 -0
  24. data/vendor/snowball/algorithms/finnish.sbl +2 -3
  25. data/vendor/snowball/algorithms/french.sbl +42 -16
  26. data/vendor/snowball/algorithms/german.sbl +35 -14
  27. data/vendor/snowball/algorithms/greek.sbl +76 -76
  28. data/vendor/snowball/algorithms/hungarian.sbl +8 -6
  29. data/vendor/snowball/algorithms/indonesian.sbl +14 -8
  30. data/vendor/snowball/algorithms/italian.sbl +11 -21
  31. data/vendor/snowball/algorithms/lithuanian.sbl +36 -37
  32. data/vendor/snowball/algorithms/lovins.sbl +0 -1
  33. data/vendor/snowball/algorithms/nepali.sbl +138 -37
  34. data/vendor/snowball/algorithms/norwegian.sbl +19 -5
  35. data/vendor/snowball/algorithms/porter.sbl +2 -2
  36. data/vendor/snowball/algorithms/portuguese.sbl +9 -13
  37. data/vendor/snowball/algorithms/romanian.sbl +17 -4
  38. data/vendor/snowball/algorithms/serbian.sbl +467 -468
  39. data/vendor/snowball/algorithms/spanish.sbl +5 -7
  40. data/vendor/snowball/algorithms/swedish.sbl +60 -6
  41. data/vendor/snowball/algorithms/tamil.sbl +207 -176
  42. data/vendor/snowball/algorithms/turkish.sbl +461 -445
  43. data/vendor/snowball/algorithms/yiddish.sbl +36 -38
  44. data/vendor/snowball/compiler/analyser.c +445 -192
  45. data/vendor/snowball/compiler/driver.c +109 -101
  46. data/vendor/snowball/compiler/generator.c +853 -464
  47. data/vendor/snowball/compiler/generator_ada.c +404 -366
  48. data/vendor/snowball/compiler/generator_csharp.c +297 -260
  49. data/vendor/snowball/compiler/generator_go.c +323 -254
  50. data/vendor/snowball/compiler/generator_java.c +326 -252
  51. data/vendor/snowball/compiler/generator_js.c +362 -252
  52. data/vendor/snowball/compiler/generator_pascal.c +349 -197
  53. data/vendor/snowball/compiler/generator_python.c +257 -240
  54. data/vendor/snowball/compiler/generator_rust.c +423 -251
  55. data/vendor/snowball/compiler/header.h +117 -71
  56. data/vendor/snowball/compiler/space.c +137 -68
  57. data/vendor/snowball/compiler/syswords.h +2 -2
  58. data/vendor/snowball/compiler/tokeniser.c +125 -107
  59. data/vendor/snowball/csharp/Snowball/Among.cs +14 -14
  60. data/vendor/snowball/csharp/Snowball/AssemblyInfo.cs +7 -7
  61. data/vendor/snowball/csharp/Snowball/Stemmer.cs +57 -37
  62. data/vendor/snowball/csharp/Stemwords/App.config +2 -2
  63. data/vendor/snowball/csharp/Stemwords/Program.cs +16 -12
  64. data/vendor/snowball/doc/libstemmer_c_README +7 -4
  65. data/vendor/snowball/doc/libstemmer_csharp_README +4 -1
  66. data/vendor/snowball/doc/libstemmer_java_README +12 -1
  67. data/vendor/snowball/doc/libstemmer_js_README +6 -4
  68. data/vendor/snowball/doc/libstemmer_python_README +9 -4
  69. data/vendor/snowball/examples/stemwords.c +12 -12
  70. data/vendor/snowball/go/env.go +107 -31
  71. data/vendor/snowball/go/util.go +0 -4
  72. data/vendor/snowball/include/libstemmer.h +4 -0
  73. data/vendor/snowball/java/org/tartarus/snowball/Among.java +32 -15
  74. data/vendor/snowball/java/org/tartarus/snowball/SnowballProgram.java +347 -261
  75. data/vendor/snowball/java/org/tartarus/snowball/SnowballStemmer.java +3 -0
  76. data/vendor/snowball/java/org/tartarus/snowball/TestApp.java +52 -37
  77. data/vendor/snowball/javascript/base-stemmer.js +186 -2
  78. data/vendor/snowball/javascript/stemwords.js +3 -6
  79. data/vendor/snowball/libstemmer/libstemmer_c.in +1 -1
  80. data/vendor/snowball/libstemmer/mkalgorithms.pl +6 -6
  81. data/vendor/snowball/libstemmer/mkmodules.pl +2 -2
  82. data/vendor/snowball/libstemmer/modules.txt +13 -10
  83. data/vendor/snowball/libstemmer/test.c +1 -1
  84. data/vendor/snowball/pascal/SnowballProgram.pas +84 -2
  85. data/vendor/snowball/pascal/generate.pl +13 -13
  86. data/vendor/snowball/python/create_init.py +4 -1
  87. data/vendor/snowball/python/setup.cfg +0 -3
  88. data/vendor/snowball/python/setup.py +8 -3
  89. data/vendor/snowball/python/snowballstemmer/basestemmer.py +20 -54
  90. data/vendor/snowball/python/stemwords.py +8 -12
  91. data/vendor/snowball/runtime/api.c +10 -5
  92. data/vendor/snowball/runtime/header.h +10 -9
  93. data/vendor/snowball/runtime/utilities.c +9 -9
  94. data/vendor/snowball/rust/build.rs +1 -1
  95. data/vendor/snowball/rust/src/snowball/snowball_env.rs +83 -5
  96. data/vendor/snowball/tests/stemtest.c +7 -4
  97. metadata +7 -7
  98. data/vendor/snowball/.travis.yml +0 -112
  99. data/vendor/snowball/algorithms/german2.sbl +0 -145
  100. data/vendor/snowball/algorithms/kraaij_pohlmann.sbl +0 -240
  101. data/vendor/snowball/compiler/syswords2.h +0 -13
@@ -189,27 +189,69 @@ impl<'a> SnowballEnv<'a> {
189
189
  return false;
190
190
  }
191
191
 
192
+ pub fn go_in_grouping(&mut self, chars: &[u8], min: u32, max: u32) -> bool {
193
+ while self.cursor < self.limit {
194
+ if let Some(chr) = self.current[self.cursor as usize..].chars().next() {
195
+ let mut ch = chr as u32; //codepoint as integer
196
+ if ch > max || ch < min {
197
+ return true;
198
+ }
199
+ ch -= min;
200
+ if (chars[(ch >> 3) as usize] & (0x1 << (ch & 0x7))) == 0 {
201
+ return true;
202
+ }
203
+ self.next_char();
204
+ } else {
205
+ return false;
206
+ }
207
+ }
208
+ return false;
209
+ }
210
+
192
211
  pub fn in_grouping_b(&mut self, chars: &[u8], min: u32, max: u32) -> bool {
193
212
  if self.cursor <= self.limit_backward {
194
213
  return false;
195
214
  }
215
+ let c = self.cursor;
196
216
  self.previous_char();
197
217
  if let Some(chr) = self.current[self.cursor as usize..].chars().next() {
198
218
  let mut ch = chr as u32; //codepoint as integer
199
- self.next_char();
200
219
  if ch > max || ch < min {
220
+ self.cursor = c;
201
221
  return false;
202
222
  }
203
223
  ch -= min;
204
224
  if (chars[(ch >> 3) as usize] & (0x1 << (ch & 0x7))) == 0 {
225
+ self.cursor = c;
205
226
  return false;
206
227
  }
207
- self.previous_char();
208
228
  return true;
209
229
  }
210
230
  return false;
211
231
  }
212
232
 
233
+ pub fn go_in_grouping_b(&mut self, chars: &[u8], min: u32, max: u32) -> bool {
234
+ while self.cursor > self.limit_backward {
235
+ let c = self.cursor;
236
+ self.previous_char();
237
+ if let Some(chr) = self.current[self.cursor as usize..].chars().next() {
238
+ let mut ch = chr as u32; //codepoint as integer
239
+ if ch > max || ch < min {
240
+ self.cursor = c;
241
+ return true;
242
+ }
243
+ ch -= min;
244
+ if (chars[(ch >> 3) as usize] & (0x1 << (ch & 0x7))) == 0 {
245
+ self.cursor = c;
246
+ return true;
247
+ }
248
+ } else {
249
+ return false;
250
+ }
251
+ }
252
+ return false;
253
+ }
254
+
213
255
  pub fn out_grouping(&mut self, chars: &[u8], min: u32, max: u32) -> bool {
214
256
  if self.cursor >= self.limit {
215
257
  return false;
@@ -229,26 +271,62 @@ impl<'a> SnowballEnv<'a> {
229
271
  return false;
230
272
  }
231
273
 
274
+ pub fn go_out_grouping(&mut self, chars: &[u8], min: u32, max: u32) -> bool {
275
+ while self.cursor < self.limit {
276
+ if let Some(chr) = self.current[self.cursor as usize..].chars().next() {
277
+ let mut ch = chr as u32; //codepoint as integer
278
+ if ch <= max && ch >= min {
279
+ ch -= min;
280
+ if (chars[(ch >> 3) as usize] & (0x1 << (ch & 0x7))) != 0 {
281
+ return true;
282
+ }
283
+ }
284
+ self.next_char();
285
+ } else {
286
+ return false;
287
+ }
288
+ }
289
+ return false;
290
+ }
291
+
232
292
  pub fn out_grouping_b(&mut self, chars: &[u8], min: u32, max: u32) -> bool {
233
293
  if self.cursor <= self.limit_backward {
234
294
  return false;
235
295
  }
296
+ let c = self.cursor;
236
297
  self.previous_char();
237
298
  if let Some(chr) = self.current[self.cursor as usize..].chars().next() {
238
299
  let mut ch = chr as u32; //codepoint as integer
239
- self.next_char();
240
300
  if ch > max || ch < min {
241
- self.previous_char();
242
301
  return true;
243
302
  }
244
303
  ch -= min;
245
304
  if (chars[(ch >> 3) as usize] & (0x1 << (ch & 0x7))) == 0 {
246
- self.previous_char();
247
305
  return true;
248
306
  }
307
+ self.cursor = c;
249
308
  }
250
309
  return false;
310
+ }
251
311
 
312
+ pub fn go_out_grouping_b(&mut self, chars: &[u8], min: u32, max: u32) -> bool {
313
+ while self.cursor > self.limit_backward {
314
+ let c = self.cursor;
315
+ self.previous_char();
316
+ if let Some(chr) = self.current[self.cursor as usize..].chars().next() {
317
+ let mut ch = chr as u32; //codepoint as integer
318
+ if ch <= max && ch >= min {
319
+ ch -= min;
320
+ if (chars[(ch >> 3) as usize] & (0x1 << (ch & 0x7))) != 0 {
321
+ self.cursor = c;
322
+ return true;
323
+ }
324
+ }
325
+ } else {
326
+ return false;
327
+ }
328
+ }
329
+ return false;
252
330
  }
253
331
 
254
332
 
@@ -1,5 +1,4 @@
1
- /* This is a simple program which uses libstemmer to provide a command
2
- * line interface for stemming using any of the algorithms provided.
1
+ /* Unit tests for handling of cases the vocabularies don't cover.
3
2
  */
4
3
 
5
4
  #include <stdio.h>
@@ -20,16 +19,20 @@ static const struct testcase {
20
19
  /* Expected output string (0 means same as input) */
21
20
  const char * expect;
22
21
  } testcases[] = {
22
+ // Regression tests for C support code bug decoding 4 byte UTF-8 sequences.
23
+ // https://github.com/snowballstem/snowball/issues/138
23
24
  { "en", 0,
24
25
  "a" EMOJI_FACE_THROWING_A_KISS "ing",
25
26
  "a" EMOJI_FACE_THROWING_A_KISS "e" },
26
27
  { "en", 0, U_40079 "wing", 0 },
27
28
  // The Finnish stemmer used to damage numbers ending with two or more of
28
- // the same digit: https://github.com/snowballstem/snowball/issues/66
29
+ // the same digit. Regression test, applied to all stemmers.
30
+ // https://github.com/snowballstem/snowball/issues/66
29
31
  { 0, 0, "2000", 0 },
30
32
  { 0, 0, "999", 0 },
31
33
  { 0, 0, "1000000000", 0 },
32
- // The Danish stemmer used to damage a number at the end of a word:
34
+ // The Danish stemmer used to damage a number at the end of a word.
35
+ // Regression test, applied to all stemmers.
33
36
  // https://github.com/snowballstem/snowball/issues/81
34
37
  { 0, 0, "space1999", 0 },
35
38
  { 0, 0, "hal9000", 0 },
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mittens
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  bindir: bin
9
9
  cert_chain: []
10
- date: 2025-04-03 00:00:00.000000000 Z
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
11
  dependencies: []
12
12
  email: andrew@ankane.org
13
13
  executables: []
@@ -25,8 +25,8 @@ files:
25
25
  - lib/mittens.rb
26
26
  - lib/mittens/version.rb
27
27
  - mittens.gemspec
28
+ - vendor/snowball/.github/workflows/ci.yml
28
29
  - vendor/snowball/.gitignore
29
- - vendor/snowball/.travis.yml
30
30
  - vendor/snowball/AUTHORS
31
31
  - vendor/snowball/CONTRIBUTING.rst
32
32
  - vendor/snowball/COPYING
@@ -47,18 +47,19 @@ files:
47
47
  - vendor/snowball/algorithms/catalan.sbl
48
48
  - vendor/snowball/algorithms/danish.sbl
49
49
  - vendor/snowball/algorithms/dutch.sbl
50
+ - vendor/snowball/algorithms/dutch_porter.sbl
50
51
  - vendor/snowball/algorithms/english.sbl
52
+ - vendor/snowball/algorithms/esperanto.sbl
53
+ - vendor/snowball/algorithms/estonian.sbl
51
54
  - vendor/snowball/algorithms/finnish.sbl
52
55
  - vendor/snowball/algorithms/french.sbl
53
56
  - vendor/snowball/algorithms/german.sbl
54
- - vendor/snowball/algorithms/german2.sbl
55
57
  - vendor/snowball/algorithms/greek.sbl
56
58
  - vendor/snowball/algorithms/hindi.sbl
57
59
  - vendor/snowball/algorithms/hungarian.sbl
58
60
  - vendor/snowball/algorithms/indonesian.sbl
59
61
  - vendor/snowball/algorithms/irish.sbl
60
62
  - vendor/snowball/algorithms/italian.sbl
61
- - vendor/snowball/algorithms/kraaij_pohlmann.sbl
62
63
  - vendor/snowball/algorithms/lithuanian.sbl
63
64
  - vendor/snowball/algorithms/lovins.sbl
64
65
  - vendor/snowball/algorithms/nepali.sbl
@@ -90,7 +91,6 @@ files:
90
91
  - vendor/snowball/compiler/header.h
91
92
  - vendor/snowball/compiler/space.c
92
93
  - vendor/snowball/compiler/syswords.h
93
- - vendor/snowball/compiler/syswords2.h
94
94
  - vendor/snowball/compiler/tokeniser.c
95
95
  - vendor/snowball/csharp/.gitignore
96
96
  - vendor/snowball/csharp/Snowball/Algorithms/.gitignore
@@ -168,7 +168,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
168
168
  - !ruby/object:Gem::Version
169
169
  version: '0'
170
170
  requirements: []
171
- rubygems_version: 3.6.2
171
+ rubygems_version: 3.6.9
172
172
  specification_version: 4
173
173
  summary: Stemming for Ruby, powered by Snowball
174
174
  test_files: []
@@ -1,112 +0,0 @@
1
- language: minimal
2
- arch: arm64
3
- dist: xenial
4
- env:
5
- global: MAKE=make
6
- matrix:
7
- include:
8
- - name: "C distribution build"
9
- language: c
10
- dist: focal
11
- compiler: gcc
12
- env: CFLAGS_DIST_BUILD='-O2 -Wall -W -std=c90 -Wdeclaration-after-statement -Werror'
13
- - name: "C distribution build (clang)"
14
- language: c
15
- dist: focal
16
- compiler: clang
17
- env: CFLAGS_DIST_BUILD='-O2 -Wall -W -std=c90 -Wdeclaration-after-statement -Werror'
18
- - language: c
19
- compiler: gcc
20
- env: c_tests=y CFLAGS='-O2 -Wall -W -std=c99 -Werror'
21
- - language: c
22
- compiler: clang
23
- env: c_tests=y CFLAGS='-O2 -Wall -W -std=c99 -Werror'
24
- - language: java
25
- env: JAVA=java JAVAC=javac
26
- - language: go
27
- go: "1.8"
28
- env: GO=go
29
- - language: go
30
- dist: bionic
31
- go: "1.17"
32
- env: GO=go
33
- - language: node_js
34
- node_js: "node"
35
- env: NODE=node
36
- dist: focal # seems to be needed for working node
37
- - language: rust
38
- rust:
39
- - stable
40
- - beta
41
- dist: bionic
42
- env: RUST=rust
43
- - language: csharp
44
- arch: amd64 # csharp doesn't seem to work on arm64
45
- env: MCS=mcs
46
- dist: bionic
47
- - name: Pascal
48
- env: FPC=fpc
49
- dist: bionic
50
- addons:
51
- apt:
52
- packages:
53
- - fpc
54
- # The pure Python versions run slowly so we need to thin the testdata
55
- # for languages such as Arabic where there's a lot, or else the build
56
- # hits the travis time limit. With pypy, it's enough faster than we
57
- # can run the full tests.
58
- - language: python
59
- python: "3.9"
60
- env: PYTHON=python THIN_FACTOR=10
61
- - language: python
62
- python: "3.7"
63
- env: PYTHON=python THIN_FACTOR=10
64
- - language: python
65
- python: "3.6"
66
- env: PYTHON=python THIN_FACTOR=10
67
- - language: python
68
- python: "pypy3.7-7.3.5"
69
- env: PYTHON=python
70
- dist: bionic
71
- - name: "Ada"
72
- env: gprbuild=gprbuild
73
- dist: bionic
74
- addons:
75
- apt:
76
- packages:
77
- - gnat
78
- - gprbuild
79
- - os: windows
80
- language: c
81
- env: c_tests=y MAKE=mingw32-make
82
- - os: windows
83
- language: go
84
- env: GO=go MAKE=mingw32-make
85
-
86
- before_install:
87
- # Try to check out a branch of the same name from the snowball-data repo
88
- # sibling of this snowball repo, so that PRs requiring changes to both can be
89
- # CI tested easily.
90
- #
91
- # If that fails, just use the standard snowball-data repo's default branch.
92
- - GH_BRANCH=${TRAVIS_PULL_REQUEST_BRANCH:-$TRAVIS_BRANCH}
93
- - GH_REPO_SLUG=${TRAVIS_PULL_REQUEST_SLUG:-$TRAVIS_REPO_SLUG}
94
- - GH_REPO_URL=https://github.com/${GH_REPO_SLUG%%/*}/snowball-data.git
95
- - echo "Trying branch $GH_BRANCH from $GH_REPO_URL"
96
- - git clone --depth=1 -b "$GH_BRANCH" "$GH_REPO_URL" || git clone --depth=1 https://github.com/snowballstem/snowball-data.git
97
-
98
- script:
99
- # Ensure CC is set for building the compiler in non-C builds.
100
- - test -n "$CC" || export CC=gcc
101
- - $MAKE CC="$CC"
102
- - test -z "$CFLAGS_DIST_BUILD" || { pip install setuptools && $MAKE dist && mkdir tmp && cd tmp && tar xf ../dist/libstemmer_c-*.tar.gz && cd libstemmer_c-* && $MAKE CFLAGS="$CFLAGS_DIST_BUILD" ; }
103
- - test -z "$c_tests" || $MAKE check CC="$CC" STEMMING_DATA=snowball-data
104
- - test -z "$PYTHON" || $MAKE check_python python="$PYTHON" STEMMING_DATA=snowball-data
105
- - test -z "$JAVA" -o -z "$JAVAC" || $MAKE check_java STEMMING_DATA=snowball-data
106
- - test -z "$MCS" || $MAKE check_csharp MCS="$MCS" STEMMING_DATA=snowball-data
107
- - test -z "$NODE" || $MAKE check_js STEMMING_DATA=snowball-data
108
- - test -z "$RUST" || $MAKE check_rust STEMMING_DATA=snowball-data
109
- - test -z "$RUST" || $MAKE check_rust STEMMING_DATA=snowball-data
110
- - test -z "$GO" || $MAKE check_go STEMMING_DATA=snowball-data
111
- - test -z "$FPC" || $MAKE check_pascal STEMMING_DATA=snowball-data
112
- - test -z "$gprbuild" || $MAKE check_ada STEMMING_DATA=snowball-data
@@ -1,145 +0,0 @@
1
-
2
- /*
3
- Extra rule for -nisse ending added 11 Dec 2009
4
- */
5
-
6
- routines (
7
- prelude postlude
8
- mark_regions
9
- R1 R2
10
- standard_suffix
11
- )
12
-
13
- externals ( stem )
14
-
15
- integers ( p1 p2 x )
16
-
17
- groupings ( v s_ending st_ending )
18
-
19
- stringescapes {}
20
-
21
- /* special characters */
22
-
23
- stringdef a" '{U+00E4}'
24
- stringdef o" '{U+00F6}'
25
- stringdef u" '{U+00FC}'
26
- stringdef ss '{U+00DF}'
27
-
28
- define v 'aeiouy{a"}{o"}{u"}'
29
-
30
- define s_ending 'bdfghklmnrt'
31
- define st_ending s_ending - 'r'
32
-
33
- define prelude as (
34
-
35
- test repeat goto (
36
- v [('u'] v <- 'U') or
37
- ('y'] v <- 'Y')
38
- )
39
-
40
- repeat (
41
- [substring] among(
42
- '{ss}' (<- 'ss')
43
- 'ae' (<- '{a"}')
44
- 'oe' (<- '{o"}')
45
- 'ue' (<- '{u"}')
46
- 'qu' ()
47
- '' (next)
48
- )
49
- )
50
-
51
- )
52
-
53
- define mark_regions as (
54
-
55
- $p1 = limit
56
- $p2 = limit
57
-
58
- test(hop 3 setmark x)
59
-
60
- gopast v gopast non-v setmark p1
61
- try($p1 < x $p1 = x) // at least 3
62
- gopast v gopast non-v setmark p2
63
-
64
- )
65
-
66
- define postlude as repeat (
67
-
68
- [substring] among(
69
- 'Y' (<- 'y')
70
- 'U' (<- 'u')
71
- '{a"}' (<- 'a')
72
- '{o"}' (<- 'o')
73
- '{u"}' (<- 'u')
74
- '' (next)
75
- )
76
-
77
- )
78
-
79
- backwardmode (
80
-
81
- define R1 as $p1 <= cursor
82
- define R2 as $p2 <= cursor
83
-
84
- define standard_suffix as (
85
- do (
86
- [substring] R1 among(
87
- 'em' 'ern' 'er'
88
- ( delete
89
- )
90
- 'e' 'en' 'es'
91
- ( delete
92
- try (['s'] 'nis' delete)
93
- )
94
- 's'
95
- ( s_ending delete
96
- )
97
- )
98
- )
99
- do (
100
- [substring] R1 among(
101
- 'en' 'er' 'est'
102
- ( delete
103
- )
104
- 'st'
105
- ( st_ending hop 3 delete
106
- )
107
- )
108
- )
109
- do (
110
- [substring] R2 among(
111
- 'end' 'ung'
112
- ( delete
113
- try (['ig'] not 'e' R2 delete)
114
- )
115
- 'ig' 'ik' 'isch'
116
- ( not 'e' delete
117
- )
118
- 'lich' 'heit'
119
- ( delete
120
- try (
121
- ['er' or 'en'] R1 delete
122
- )
123
- )
124
- 'keit'
125
- ( delete
126
- try (
127
- [substring] R2 among(
128
- 'lich' 'ig'
129
- ( delete
130
- )
131
- )
132
- )
133
- )
134
- )
135
- )
136
- )
137
- )
138
-
139
- define stem as (
140
- do prelude
141
- do mark_regions
142
- backwards
143
- do standard_suffix
144
- do postlude
145
- )