mittens 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +3 -3
- data/lib/mittens/version.rb +1 -1
- data/vendor/snowball/.github/workflows/ci.yml +216 -0
- data/vendor/snowball/CONTRIBUTING.rst +111 -62
- data/vendor/snowball/GNUmakefile +194 -136
- data/vendor/snowball/NEWS +798 -3
- data/vendor/snowball/README.rst +50 -1
- data/vendor/snowball/ada/src/stemmer.adb +25 -13
- data/vendor/snowball/ada/src/stemmer.ads +9 -9
- data/vendor/snowball/ada/stemmer_config.gpr +7 -7
- data/vendor/snowball/algorithms/basque.sbl +4 -19
- data/vendor/snowball/algorithms/catalan.sbl +2 -9
- data/vendor/snowball/algorithms/danish.sbl +1 -1
- data/vendor/snowball/algorithms/dutch.sbl +284 -122
- data/vendor/snowball/algorithms/dutch_porter.sbl +178 -0
- data/vendor/snowball/algorithms/english.sbl +52 -37
- data/vendor/snowball/algorithms/esperanto.sbl +157 -0
- data/vendor/snowball/algorithms/estonian.sbl +269 -0
- data/vendor/snowball/algorithms/finnish.sbl +2 -3
- data/vendor/snowball/algorithms/french.sbl +42 -16
- data/vendor/snowball/algorithms/german.sbl +35 -14
- data/vendor/snowball/algorithms/greek.sbl +76 -76
- data/vendor/snowball/algorithms/hungarian.sbl +8 -6
- data/vendor/snowball/algorithms/indonesian.sbl +14 -8
- data/vendor/snowball/algorithms/italian.sbl +11 -21
- data/vendor/snowball/algorithms/lithuanian.sbl +36 -37
- data/vendor/snowball/algorithms/lovins.sbl +0 -1
- data/vendor/snowball/algorithms/nepali.sbl +138 -37
- data/vendor/snowball/algorithms/norwegian.sbl +19 -5
- data/vendor/snowball/algorithms/porter.sbl +2 -2
- data/vendor/snowball/algorithms/portuguese.sbl +9 -13
- data/vendor/snowball/algorithms/romanian.sbl +17 -4
- data/vendor/snowball/algorithms/serbian.sbl +467 -468
- data/vendor/snowball/algorithms/spanish.sbl +5 -7
- data/vendor/snowball/algorithms/swedish.sbl +60 -6
- data/vendor/snowball/algorithms/tamil.sbl +207 -176
- data/vendor/snowball/algorithms/turkish.sbl +461 -445
- data/vendor/snowball/algorithms/yiddish.sbl +36 -38
- data/vendor/snowball/compiler/analyser.c +445 -192
- data/vendor/snowball/compiler/driver.c +109 -101
- data/vendor/snowball/compiler/generator.c +853 -464
- data/vendor/snowball/compiler/generator_ada.c +404 -366
- data/vendor/snowball/compiler/generator_csharp.c +297 -260
- data/vendor/snowball/compiler/generator_go.c +323 -254
- data/vendor/snowball/compiler/generator_java.c +326 -252
- data/vendor/snowball/compiler/generator_js.c +362 -252
- data/vendor/snowball/compiler/generator_pascal.c +349 -197
- data/vendor/snowball/compiler/generator_python.c +257 -240
- data/vendor/snowball/compiler/generator_rust.c +423 -251
- data/vendor/snowball/compiler/header.h +117 -71
- data/vendor/snowball/compiler/space.c +137 -68
- data/vendor/snowball/compiler/syswords.h +2 -2
- data/vendor/snowball/compiler/tokeniser.c +125 -107
- data/vendor/snowball/csharp/Snowball/Among.cs +14 -14
- data/vendor/snowball/csharp/Snowball/AssemblyInfo.cs +7 -7
- data/vendor/snowball/csharp/Snowball/Stemmer.cs +57 -37
- data/vendor/snowball/csharp/Stemwords/App.config +2 -2
- data/vendor/snowball/csharp/Stemwords/Program.cs +16 -12
- data/vendor/snowball/doc/libstemmer_c_README +7 -4
- data/vendor/snowball/doc/libstemmer_csharp_README +4 -1
- data/vendor/snowball/doc/libstemmer_java_README +12 -1
- data/vendor/snowball/doc/libstemmer_js_README +6 -4
- data/vendor/snowball/doc/libstemmer_python_README +9 -4
- data/vendor/snowball/examples/stemwords.c +12 -12
- data/vendor/snowball/go/env.go +107 -31
- data/vendor/snowball/go/util.go +0 -4
- data/vendor/snowball/include/libstemmer.h +4 -0
- data/vendor/snowball/java/org/tartarus/snowball/Among.java +32 -15
- data/vendor/snowball/java/org/tartarus/snowball/SnowballProgram.java +347 -261
- data/vendor/snowball/java/org/tartarus/snowball/SnowballStemmer.java +3 -0
- data/vendor/snowball/java/org/tartarus/snowball/TestApp.java +52 -37
- data/vendor/snowball/javascript/base-stemmer.js +186 -2
- data/vendor/snowball/javascript/stemwords.js +3 -6
- data/vendor/snowball/libstemmer/libstemmer_c.in +1 -1
- data/vendor/snowball/libstemmer/mkalgorithms.pl +6 -6
- data/vendor/snowball/libstemmer/mkmodules.pl +2 -2
- data/vendor/snowball/libstemmer/modules.txt +13 -10
- data/vendor/snowball/libstemmer/test.c +1 -1
- data/vendor/snowball/pascal/SnowballProgram.pas +84 -2
- data/vendor/snowball/pascal/generate.pl +13 -13
- data/vendor/snowball/python/create_init.py +4 -1
- data/vendor/snowball/python/setup.cfg +0 -3
- data/vendor/snowball/python/setup.py +8 -3
- data/vendor/snowball/python/snowballstemmer/basestemmer.py +20 -54
- data/vendor/snowball/python/stemwords.py +8 -12
- data/vendor/snowball/runtime/api.c +10 -5
- data/vendor/snowball/runtime/header.h +10 -9
- data/vendor/snowball/runtime/utilities.c +9 -9
- data/vendor/snowball/rust/build.rs +1 -1
- data/vendor/snowball/rust/src/snowball/snowball_env.rs +83 -5
- data/vendor/snowball/tests/stemtest.c +7 -4
- metadata +7 -7
- data/vendor/snowball/.travis.yml +0 -112
- data/vendor/snowball/algorithms/german2.sbl +0 -145
- data/vendor/snowball/algorithms/kraaij_pohlmann.sbl +0 -240
- data/vendor/snowball/compiler/syswords2.h +0 -13
@@ -28,9 +28,13 @@ Type
|
|
28
28
|
|
29
29
|
Protected
|
30
30
|
Function InGrouping(s : array of char; min, max : Integer) : Boolean;
|
31
|
+
Function GoInGrouping(s : array of char; min, max : Integer) : Boolean;
|
31
32
|
Function InGroupingBk(s : array of char; min, max : Integer) : Boolean;
|
33
|
+
Function GoInGroupingBk(s : array of char; min, max : Integer) : Boolean;
|
32
34
|
Function OutGrouping(s : array of char; min, max : Integer) : Boolean;
|
35
|
+
Function GoOutGrouping(s : array of char; min, max : Integer) : Boolean;
|
33
36
|
Function OutGroupingBk(s : array of char; min, max : Integer) : Boolean;
|
37
|
+
Function GoOutGroupingBk(s : array of char; min, max : Integer) : Boolean;
|
34
38
|
|
35
39
|
Function EqS(s_size : Integer; s : AnsiString) : Boolean;
|
36
40
|
Function EqSBk(s_size : Integer; s : AnsiString) : Boolean;
|
@@ -45,7 +49,7 @@ Type
|
|
45
49
|
Procedure SliceCheck;
|
46
50
|
Procedure SliceFrom(s : AnsiString);
|
47
51
|
|
48
|
-
Function ReplaceS(bra, ket : Integer; s : AnsiString) : Integer;
|
52
|
+
Function ReplaceS(bra, ket : Integer; s : AnsiString) : Integer;
|
49
53
|
Procedure Insert(bra, ket : Integer; s : AnsiString);
|
50
54
|
|
51
55
|
Function SliceTo : AnsiString;
|
@@ -89,6 +93,23 @@ Begin
|
|
89
93
|
Result := True;
|
90
94
|
End;
|
91
95
|
|
96
|
+
Function TSnowballProgram.GoInGrouping(s : array of char; min, max : Integer) : Boolean;
|
97
|
+
Var ch : Integer;
|
98
|
+
Begin
|
99
|
+
Result := True;
|
100
|
+
While (FCursor < FLimit) Do
|
101
|
+
Begin
|
102
|
+
ch := Ord(FCurrent[FCursor + 1]);
|
103
|
+
If (ch > max) Or (ch < min) Then Exit;
|
104
|
+
|
105
|
+
ch := ch - min;
|
106
|
+
If (Ord(s[ch Shr 3]) And Ord(1 Shl (ch And $7))) = 0 Then Exit;
|
107
|
+
|
108
|
+
Inc(FCursor);
|
109
|
+
End;
|
110
|
+
Result := False;
|
111
|
+
End;
|
112
|
+
|
92
113
|
Function TSnowballProgram.InGroupingBk(s : array of char; min, max : Integer) : Boolean;
|
93
114
|
Var ch : Integer;
|
94
115
|
Begin
|
@@ -105,6 +126,23 @@ Begin
|
|
105
126
|
Result := True;
|
106
127
|
End;
|
107
128
|
|
129
|
+
Function TSnowballProgram.GoInGroupingBk(s : array of char; min, max : Integer) : Boolean;
|
130
|
+
Var ch : Integer;
|
131
|
+
Begin
|
132
|
+
Result := True;
|
133
|
+
While (FCursor > FBkLimit) Do
|
134
|
+
Begin
|
135
|
+
ch := Ord(FCurrent[FCursor]);
|
136
|
+
If (ch > max) Or (ch < min) Then Exit;
|
137
|
+
|
138
|
+
ch := ch - min;
|
139
|
+
If (Ord(s[ch Shr 3]) And Ord(1 Shl (ch And $7))) = 0 Then Exit;
|
140
|
+
|
141
|
+
Dec(FCursor);
|
142
|
+
End;
|
143
|
+
Result := False;
|
144
|
+
End;
|
145
|
+
|
108
146
|
Function TSnowballProgram.OutGrouping(s : array of char; min, max : Integer) : Boolean;
|
109
147
|
Var ch : Integer;
|
110
148
|
Begin
|
@@ -128,6 +166,29 @@ Begin
|
|
128
166
|
End;
|
129
167
|
End;
|
130
168
|
|
169
|
+
Function TSnowballProgram.GoOutGrouping(s : array of char; min, max : Integer) : Boolean;
|
170
|
+
Var ch : Integer;
|
171
|
+
Begin
|
172
|
+
Result := True;
|
173
|
+
|
174
|
+
While (FCursor < FLimit) Do
|
175
|
+
Begin
|
176
|
+
ch := Ord(FCurrent[FCursor + 1]);
|
177
|
+
|
178
|
+
If (ch <= max) And (ch >= min) Then
|
179
|
+
Begin
|
180
|
+
ch := ch - min;
|
181
|
+
If (Ord(s[ch Shr 3]) And Ord(1 Shl (ch And $7))) <> 0 Then
|
182
|
+
Begin
|
183
|
+
Exit;
|
184
|
+
End;
|
185
|
+
End;
|
186
|
+
|
187
|
+
Inc(FCursor);
|
188
|
+
End;
|
189
|
+
Result := False;
|
190
|
+
End;
|
191
|
+
|
131
192
|
Function TSnowballProgram.OutGroupingBk(s : array of char; min, max : Integer) : Boolean;
|
132
193
|
Var ch : Integer;
|
133
194
|
Begin
|
@@ -151,6 +212,27 @@ Begin
|
|
151
212
|
End;
|
152
213
|
End;
|
153
214
|
|
215
|
+
Function TSnowballProgram.GoOutGroupingBk(s : array of char; min, max : Integer) : Boolean;
|
216
|
+
Var ch : Integer;
|
217
|
+
Begin
|
218
|
+
Result := True;
|
219
|
+
|
220
|
+
While (FCursor > FBkLimit) Do
|
221
|
+
Begin
|
222
|
+
ch := Ord(FCurrent[FCursor]);
|
223
|
+
If (ch <= max) And (ch >= min) Then
|
224
|
+
Begin
|
225
|
+
ch := ch - min;
|
226
|
+
If (Ord(s[ch Shr 3]) And Ord(1 Shl (ch And $7))) <> 0 Then
|
227
|
+
Begin
|
228
|
+
Exit;
|
229
|
+
End;
|
230
|
+
End;
|
231
|
+
Dec(FCursor);
|
232
|
+
End;
|
233
|
+
Result := False;
|
234
|
+
End;
|
235
|
+
|
154
236
|
Function TSnowballProgram.EqS(s_size : Integer; s : AnsiString) : Boolean;
|
155
237
|
Var I : Integer;
|
156
238
|
Begin
|
@@ -262,7 +344,7 @@ Begin
|
|
262
344
|
If Not Assigned(w.Method) Then
|
263
345
|
Begin
|
264
346
|
Result := w.Result;
|
265
|
-
Exit;
|
347
|
+
Exit;
|
266
348
|
End;
|
267
349
|
|
268
350
|
res := w.Method;
|
@@ -7,17 +7,17 @@ use warnings;
|
|
7
7
|
my @sources = @ARGV;
|
8
8
|
|
9
9
|
while (defined(my $line = <STDIN>)) {
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
10
|
+
if ($line =~ /\{\s*BEGIN TEMPLATE\s*\}/) {
|
11
|
+
my $template = '';
|
12
|
+
while (defined($line = <STDIN>) && $line !~ /\{\s*END TEMPLATE\s*\}/) {
|
13
|
+
$template .= $line;
|
14
|
+
}
|
15
|
+
foreach my $source(@sources) {
|
16
|
+
my $out = $template;
|
17
|
+
$out =~ s/%STEMMER%/$source/g;
|
18
|
+
print $out;
|
19
|
+
}
|
20
|
+
next;
|
21
|
+
}
|
22
|
+
print $line;
|
23
23
|
}
|
@@ -15,12 +15,15 @@ for pyscript in os.listdir(python_out_folder):
|
|
15
15
|
match = filematch.match(pyscript)
|
16
16
|
if (match):
|
17
17
|
langname = match.group(1)
|
18
|
-
titlecase = langname.title()
|
18
|
+
titlecase = re.sub(r"_", "", langname.title())
|
19
19
|
languages.append(" '%(lang)s': %(title)sStemmer," % {'lang': langname, 'title': titlecase})
|
20
20
|
imports.append('from .%(lang)s_stemmer import %(title)sStemmer' % {'lang': langname, 'title': titlecase})
|
21
21
|
imports.sort()
|
22
22
|
languages.sort()
|
23
23
|
|
24
|
+
if len(languages) == 0:
|
25
|
+
raise AssertionError('languages list is empty!')
|
26
|
+
|
24
27
|
src = '''__all__ = ('language', 'stemmer')
|
25
28
|
|
26
29
|
%(imports)s
|
@@ -3,7 +3,7 @@
|
|
3
3
|
from setuptools import setup
|
4
4
|
import re
|
5
5
|
|
6
|
-
SNOWBALL_VERSION = '
|
6
|
+
SNOWBALL_VERSION = '3.0.1'
|
7
7
|
|
8
8
|
n_stemmers = 0
|
9
9
|
|
@@ -42,8 +42,8 @@ classifiers = [
|
|
42
42
|
for lang in langs:
|
43
43
|
lang_titlecase = lang.title()
|
44
44
|
# Only classifiers listed in https://pypi.org/classifiers/ are allowed
|
45
|
-
|
46
|
-
|
45
|
+
# Remove them here or submit them to https://github.com/pypa/trove-classifiers
|
46
|
+
classifiers.append('Natural Language :: ' + lang_titlecase)
|
47
47
|
|
48
48
|
classifiers.extend([
|
49
49
|
'Operating System :: OS Independent',
|
@@ -52,6 +52,7 @@ classifiers.extend([
|
|
52
52
|
'Programming Language :: Python :: 2.6',
|
53
53
|
'Programming Language :: Python :: 2.7',
|
54
54
|
'Programming Language :: Python :: 3',
|
55
|
+
'Programming Language :: Python :: 3.3',
|
55
56
|
'Programming Language :: Python :: 3.4',
|
56
57
|
'Programming Language :: Python :: 3.5',
|
57
58
|
'Programming Language :: Python :: 3.6',
|
@@ -59,6 +60,9 @@ classifiers.extend([
|
|
59
60
|
'Programming Language :: Python :: 3.8',
|
60
61
|
'Programming Language :: Python :: 3.9',
|
61
62
|
'Programming Language :: Python :: 3.10',
|
63
|
+
'Programming Language :: Python :: 3.11',
|
64
|
+
'Programming Language :: Python :: 3.12',
|
65
|
+
'Programming Language :: Python :: 3.13',
|
62
66
|
'Programming Language :: Python :: Implementation :: CPython',
|
63
67
|
'Programming Language :: Python :: Implementation :: PyPy',
|
64
68
|
'Topic :: Database',
|
@@ -77,5 +81,6 @@ setup(name='snowballstemmer',
|
|
77
81
|
license="BSD-3-Clause",
|
78
82
|
packages=['snowballstemmer'],
|
79
83
|
package_dir={"snowballstemmer": "src/snowballstemmer"},
|
84
|
+
python_requires='!=3.0.*, !=3.1.*, !=3.2.*',
|
80
85
|
classifiers = classifiers
|
81
86
|
)
|
@@ -27,95 +27,63 @@ class BaseStemmer(object):
|
|
27
27
|
self.bra = other.bra
|
28
28
|
self.ket = other.ket
|
29
29
|
|
30
|
-
def in_grouping(self, s
|
30
|
+
def in_grouping(self, s):
|
31
31
|
if self.cursor >= self.limit:
|
32
32
|
return False
|
33
|
-
|
34
|
-
if ch > max or ch < min:
|
35
|
-
return False
|
36
|
-
ch -= min
|
37
|
-
if (s[ch >> 3] & (0x1 << (ch & 0x7))) == 0:
|
33
|
+
if self.current[self.cursor] not in s:
|
38
34
|
return False
|
39
35
|
self.cursor += 1
|
40
36
|
return True
|
41
37
|
|
42
|
-
def go_in_grouping(self, s
|
38
|
+
def go_in_grouping(self, s):
|
43
39
|
while self.cursor < self.limit:
|
44
|
-
|
45
|
-
if ch > max or ch < min:
|
46
|
-
return True
|
47
|
-
ch -= min
|
48
|
-
if (s[ch >> 3] & (0x1 << (ch & 0x7))) == 0:
|
40
|
+
if self.current[self.cursor] not in s:
|
49
41
|
return True
|
50
42
|
self.cursor += 1
|
51
43
|
return False
|
52
44
|
|
53
|
-
def in_grouping_b(self, s
|
45
|
+
def in_grouping_b(self, s):
|
54
46
|
if self.cursor <= self.limit_backward:
|
55
47
|
return False
|
56
|
-
|
57
|
-
if ch > max or ch < min:
|
58
|
-
return False
|
59
|
-
ch -= min
|
60
|
-
if (s[ch >> 3] & (0x1 << (ch & 0x7))) == 0:
|
48
|
+
if self.current[self.cursor - 1] not in s:
|
61
49
|
return False
|
62
50
|
self.cursor -= 1
|
63
51
|
return True
|
64
52
|
|
65
|
-
def go_in_grouping_b(self, s
|
53
|
+
def go_in_grouping_b(self, s):
|
66
54
|
while self.cursor > self.limit_backward:
|
67
|
-
|
68
|
-
if ch > max or ch < min:
|
69
|
-
return True
|
70
|
-
ch -= min
|
71
|
-
if (s[ch >> 3] & (0x1 << (ch & 0x7))) == 0:
|
55
|
+
if self.current[self.cursor - 1] not in s:
|
72
56
|
return True
|
73
57
|
self.cursor -= 1
|
74
58
|
return False
|
75
59
|
|
76
|
-
def out_grouping(self, s
|
60
|
+
def out_grouping(self, s):
|
77
61
|
if self.cursor >= self.limit:
|
78
62
|
return False
|
79
|
-
|
80
|
-
if ch > max or ch < min:
|
81
|
-
self.cursor += 1
|
82
|
-
return True
|
83
|
-
ch -= min
|
84
|
-
if (s[ch >> 3] & (0X1 << (ch & 0x7))) == 0:
|
63
|
+
if self.current[self.cursor] not in s:
|
85
64
|
self.cursor += 1
|
86
65
|
return True
|
87
66
|
return False
|
88
67
|
|
89
|
-
def go_out_grouping(self, s
|
68
|
+
def go_out_grouping(self, s):
|
90
69
|
while self.cursor < self.limit:
|
91
|
-
|
92
|
-
|
93
|
-
ch -= min
|
94
|
-
if (s[ch >> 3] & (0X1 << (ch & 0x7))):
|
95
|
-
return True
|
70
|
+
if self.current[self.cursor] in s:
|
71
|
+
return True
|
96
72
|
self.cursor += 1
|
97
73
|
return False
|
98
74
|
|
99
|
-
def out_grouping_b(self, s
|
75
|
+
def out_grouping_b(self, s):
|
100
76
|
if self.cursor <= self.limit_backward:
|
101
77
|
return False
|
102
|
-
|
103
|
-
if ch > max or ch < min:
|
104
|
-
self.cursor -= 1
|
105
|
-
return True
|
106
|
-
ch -= min
|
107
|
-
if (s[ch >> 3] & (0X1 << (ch & 0x7))) == 0:
|
78
|
+
if self.current[self.cursor - 1] not in s:
|
108
79
|
self.cursor -= 1
|
109
80
|
return True
|
110
81
|
return False
|
111
82
|
|
112
|
-
def go_out_grouping_b(self, s
|
83
|
+
def go_out_grouping_b(self, s):
|
113
84
|
while self.cursor > self.limit_backward:
|
114
|
-
|
115
|
-
|
116
|
-
ch -= min
|
117
|
-
if (s[ch >> 3] & (0X1 << (ch & 0x7))):
|
118
|
-
return True
|
85
|
+
if self.current[self.cursor - 1] in s:
|
86
|
+
return True
|
119
87
|
self.cursor -= 1
|
120
88
|
return False
|
121
89
|
|
@@ -183,8 +151,7 @@ class BaseStemmer(object):
|
|
183
151
|
self.cursor = c + len(w.s)
|
184
152
|
if w.method is None:
|
185
153
|
return w.result
|
186
|
-
|
187
|
-
res = method()
|
154
|
+
res = w.method(self)
|
188
155
|
self.cursor = c + len(w.s)
|
189
156
|
if res:
|
190
157
|
return w.result
|
@@ -241,8 +208,7 @@ class BaseStemmer(object):
|
|
241
208
|
self.cursor = c - len(w.s)
|
242
209
|
if w.method is None:
|
243
210
|
return w.result
|
244
|
-
|
245
|
-
res = method()
|
211
|
+
res = w.method(self)
|
246
212
|
self.cursor = c - len(w.s)
|
247
213
|
if res:
|
248
214
|
return w.result
|
@@ -16,7 +16,7 @@ and output files. If it is omitted, the UTF-8 encoding is used.
|
|
16
16
|
If -p is given the output file consists of each word of the input
|
17
17
|
file followed by \"->\" followed by its stemmed equivalent.
|
18
18
|
If -p2 is given the output file is a two column layout containing
|
19
|
-
the input words in the first column and the stemmed
|
19
|
+
the input words in the first column and the stemmed equivalents in
|
20
20
|
the second column.
|
21
21
|
|
22
22
|
Otherwise, the output file consists of the stemmed words, one per
|
@@ -36,8 +36,7 @@ def main():
|
|
36
36
|
language = 'English'
|
37
37
|
show_help = False
|
38
38
|
while len(argv):
|
39
|
-
arg = argv
|
40
|
-
argv = argv[1:]
|
39
|
+
arg = argv.pop(0)
|
41
40
|
if arg == '-h':
|
42
41
|
show_help = True
|
43
42
|
break
|
@@ -49,25 +48,22 @@ def main():
|
|
49
48
|
if len(argv) == 0:
|
50
49
|
show_help = True
|
51
50
|
break
|
52
|
-
language = argv
|
53
|
-
argv = argv[1:]
|
51
|
+
language = argv.pop(0)
|
54
52
|
elif arg == "-i":
|
55
53
|
if len(argv) == 0:
|
56
54
|
show_help = True
|
57
55
|
break
|
58
|
-
input = argv
|
59
|
-
argv = argv[1:]
|
56
|
+
input = argv.pop(0)
|
60
57
|
elif arg == "-o":
|
61
58
|
if len(argv) == 0:
|
62
59
|
show_help = True
|
63
60
|
break
|
64
|
-
output = argv
|
65
|
-
argv = argv[1:]
|
61
|
+
output = argv.pop(0)
|
66
62
|
elif arg == "-c":
|
67
63
|
if len(argv) == 0:
|
68
64
|
show_help = True
|
69
65
|
break
|
70
|
-
encoding = argv
|
66
|
+
encoding = argv.pop(0)
|
71
67
|
if show_help or input == '' or output == '':
|
72
68
|
usage()
|
73
69
|
else:
|
@@ -76,8 +72,8 @@ def main():
|
|
76
72
|
|
77
73
|
def stemming(lang, input, output, encoding, pretty):
|
78
74
|
stemmer = snowballstemmer.stemmer(lang)
|
79
|
-
with codecs.open(
|
80
|
-
with codecs.open(
|
75
|
+
with codecs.open(input, "r", encoding) as infile:
|
76
|
+
with codecs.open(output, "w", encoding) as outfile:
|
81
77
|
for original in infile.readlines():
|
82
78
|
original = original.strip()
|
83
79
|
# Convert only ASCII-letters to lowercase, to match C behavior
|
@@ -1,23 +1,28 @@
|
|
1
1
|
|
2
|
-
#include <stdlib.h> /* for calloc, free */
|
2
|
+
#include <stdlib.h> /* for malloc, calloc, free */
|
3
3
|
#include "header.h"
|
4
4
|
|
5
5
|
extern struct SN_env * SN_create_env(int S_size, int I_size)
|
6
6
|
{
|
7
|
-
|
7
|
+
static const struct SN_env default_SN_env = {};
|
8
|
+
struct SN_env * z = (struct SN_env *) malloc(sizeof(struct SN_env));
|
8
9
|
if (z == NULL) return NULL;
|
10
|
+
*z = default_SN_env;
|
9
11
|
z->p = create_s();
|
10
12
|
if (z->p == NULL) goto error;
|
11
13
|
if (S_size)
|
12
14
|
{
|
13
15
|
int i;
|
14
|
-
z->S = (symbol * *)
|
16
|
+
z->S = (symbol * *) malloc(S_size * sizeof(symbol *));
|
15
17
|
if (z->S == NULL) goto error;
|
16
18
|
|
17
19
|
for (i = 0; i < S_size; i++)
|
18
20
|
{
|
19
21
|
z->S[i] = create_s();
|
20
|
-
if (z->S[i] == NULL)
|
22
|
+
if (z->S[i] == NULL) {
|
23
|
+
S_size = i;
|
24
|
+
goto error;
|
25
|
+
}
|
21
26
|
}
|
22
27
|
}
|
23
28
|
|
@@ -36,7 +41,7 @@ error:
|
|
36
41
|
extern void SN_close_env(struct SN_env * z, int S_size)
|
37
42
|
{
|
38
43
|
if (z == NULL) return;
|
39
|
-
if (
|
44
|
+
if (z->S)
|
40
45
|
{
|
41
46
|
int i;
|
42
47
|
for (i = 0; i < S_size; i++)
|
@@ -1,11 +1,6 @@
|
|
1
1
|
|
2
|
-
#include <limits.h>
|
3
|
-
|
4
2
|
#include "api.h"
|
5
3
|
|
6
|
-
#define MAXINT INT_MAX
|
7
|
-
#define MININT INT_MIN
|
8
|
-
|
9
4
|
#define HEAD 2*sizeof(int)
|
10
5
|
|
11
6
|
#define SIZE(p) ((int *)(p))[-1]
|
@@ -13,10 +8,16 @@
|
|
13
8
|
#define CAPACITY(p) ((int *)(p))[-2]
|
14
9
|
|
15
10
|
struct among
|
16
|
-
{
|
17
|
-
|
18
|
-
int
|
19
|
-
|
11
|
+
{
|
12
|
+
/* Number of symbols in s. */
|
13
|
+
int s_size;
|
14
|
+
/* Search string. */
|
15
|
+
const symbol * s;
|
16
|
+
/* Delta of index to longest matching substring, or 0 if none. */
|
17
|
+
int substring_i;
|
18
|
+
/* Result of the lookup. */
|
19
|
+
int result;
|
20
|
+
/* Optional condition routine, or NULL if none. */
|
20
21
|
int (* function)(struct SN_env *);
|
21
22
|
};
|
22
23
|
|
@@ -282,19 +282,19 @@ extern int find_among(struct SN_env * z, const struct among * v, int v_size) {
|
|
282
282
|
first_key_inspected = 1;
|
283
283
|
}
|
284
284
|
}
|
285
|
+
w = v + i;
|
285
286
|
while (1) {
|
286
|
-
w = v + i;
|
287
287
|
if (common_i >= w->s_size) {
|
288
288
|
z->c = c + w->s_size;
|
289
|
-
if (w->function ==
|
289
|
+
if (w->function == NULL) return w->result;
|
290
290
|
{
|
291
291
|
int res = w->function(z);
|
292
292
|
z->c = c + w->s_size;
|
293
293
|
if (res) return w->result;
|
294
294
|
}
|
295
295
|
}
|
296
|
-
|
297
|
-
|
296
|
+
if (!w->substring_i) return 0;
|
297
|
+
w += w->substring_i;
|
298
298
|
}
|
299
299
|
}
|
300
300
|
|
@@ -337,19 +337,19 @@ extern int find_among_b(struct SN_env * z, const struct among * v, int v_size) {
|
|
337
337
|
first_key_inspected = 1;
|
338
338
|
}
|
339
339
|
}
|
340
|
+
w = v + i;
|
340
341
|
while (1) {
|
341
|
-
w = v + i;
|
342
342
|
if (common_i >= w->s_size) {
|
343
343
|
z->c = c - w->s_size;
|
344
|
-
if (w->function ==
|
344
|
+
if (w->function == NULL) return w->result;
|
345
345
|
{
|
346
346
|
int res = w->function(z);
|
347
347
|
z->c = c - w->s_size;
|
348
348
|
if (res) return w->result;
|
349
349
|
}
|
350
350
|
}
|
351
|
-
|
352
|
-
|
351
|
+
if (!w->substring_i) return 0;
|
352
|
+
w += w->substring_i;
|
353
353
|
}
|
354
354
|
}
|
355
355
|
|
@@ -434,7 +434,7 @@ extern int slice_from_v(struct SN_env * z, const symbol * p) {
|
|
434
434
|
}
|
435
435
|
|
436
436
|
extern int slice_del(struct SN_env * z) {
|
437
|
-
return slice_from_s(z, 0,
|
437
|
+
return slice_from_s(z, 0, NULL);
|
438
438
|
}
|
439
439
|
|
440
440
|
extern int insert_s(struct SN_env * z, int bra, int ket, int s_size, const symbol * s) {
|