mittens 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +3 -0
  3. data/Gemfile +7 -0
  4. data/LICENSE.txt +30 -0
  5. data/README.md +62 -0
  6. data/Rakefile +21 -0
  7. data/ext/mittens/ext.c +96 -0
  8. data/ext/mittens/extconf.rb +12 -0
  9. data/lib/mittens/version.rb +3 -0
  10. data/lib/mittens.rb +7 -0
  11. data/mittens.gemspec +22 -0
  12. data/vendor/snowball/.gitignore +26 -0
  13. data/vendor/snowball/.travis.yml +112 -0
  14. data/vendor/snowball/AUTHORS +27 -0
  15. data/vendor/snowball/CONTRIBUTING.rst +216 -0
  16. data/vendor/snowball/COPYING +29 -0
  17. data/vendor/snowball/GNUmakefile +742 -0
  18. data/vendor/snowball/NEWS +754 -0
  19. data/vendor/snowball/README.rst +37 -0
  20. data/vendor/snowball/ada/README.md +74 -0
  21. data/vendor/snowball/ada/generate/generate.adb +83 -0
  22. data/vendor/snowball/ada/generate.gpr +21 -0
  23. data/vendor/snowball/ada/src/stemmer.adb +620 -0
  24. data/vendor/snowball/ada/src/stemmer.ads +219 -0
  25. data/vendor/snowball/ada/src/stemwords.adb +70 -0
  26. data/vendor/snowball/ada/stemmer_config.gpr +83 -0
  27. data/vendor/snowball/ada/stemwords.gpr +21 -0
  28. data/vendor/snowball/algorithms/arabic.sbl +558 -0
  29. data/vendor/snowball/algorithms/armenian.sbl +301 -0
  30. data/vendor/snowball/algorithms/basque.sbl +149 -0
  31. data/vendor/snowball/algorithms/catalan.sbl +202 -0
  32. data/vendor/snowball/algorithms/danish.sbl +93 -0
  33. data/vendor/snowball/algorithms/dutch.sbl +164 -0
  34. data/vendor/snowball/algorithms/english.sbl +229 -0
  35. data/vendor/snowball/algorithms/finnish.sbl +197 -0
  36. data/vendor/snowball/algorithms/french.sbl +254 -0
  37. data/vendor/snowball/algorithms/german.sbl +139 -0
  38. data/vendor/snowball/algorithms/german2.sbl +145 -0
  39. data/vendor/snowball/algorithms/greek.sbl +701 -0
  40. data/vendor/snowball/algorithms/hindi.sbl +323 -0
  41. data/vendor/snowball/algorithms/hungarian.sbl +241 -0
  42. data/vendor/snowball/algorithms/indonesian.sbl +192 -0
  43. data/vendor/snowball/algorithms/irish.sbl +149 -0
  44. data/vendor/snowball/algorithms/italian.sbl +202 -0
  45. data/vendor/snowball/algorithms/kraaij_pohlmann.sbl +240 -0
  46. data/vendor/snowball/algorithms/lithuanian.sbl +373 -0
  47. data/vendor/snowball/algorithms/lovins.sbl +208 -0
  48. data/vendor/snowball/algorithms/nepali.sbl +92 -0
  49. data/vendor/snowball/algorithms/norwegian.sbl +80 -0
  50. data/vendor/snowball/algorithms/porter.sbl +139 -0
  51. data/vendor/snowball/algorithms/portuguese.sbl +218 -0
  52. data/vendor/snowball/algorithms/romanian.sbl +236 -0
  53. data/vendor/snowball/algorithms/russian.sbl +221 -0
  54. data/vendor/snowball/algorithms/serbian.sbl +2379 -0
  55. data/vendor/snowball/algorithms/spanish.sbl +230 -0
  56. data/vendor/snowball/algorithms/swedish.sbl +72 -0
  57. data/vendor/snowball/algorithms/tamil.sbl +405 -0
  58. data/vendor/snowball/algorithms/turkish.sbl +470 -0
  59. data/vendor/snowball/algorithms/yiddish.sbl +460 -0
  60. data/vendor/snowball/charsets/ISO-8859-2.sbl +98 -0
  61. data/vendor/snowball/charsets/KOI8-R.sbl +74 -0
  62. data/vendor/snowball/charsets/cp850.sbl +130 -0
  63. data/vendor/snowball/compiler/analyser.c +1547 -0
  64. data/vendor/snowball/compiler/driver.c +615 -0
  65. data/vendor/snowball/compiler/generator.c +1748 -0
  66. data/vendor/snowball/compiler/generator_ada.c +1702 -0
  67. data/vendor/snowball/compiler/generator_csharp.c +1322 -0
  68. data/vendor/snowball/compiler/generator_go.c +1278 -0
  69. data/vendor/snowball/compiler/generator_java.c +1313 -0
  70. data/vendor/snowball/compiler/generator_js.c +1316 -0
  71. data/vendor/snowball/compiler/generator_pascal.c +1387 -0
  72. data/vendor/snowball/compiler/generator_python.c +1337 -0
  73. data/vendor/snowball/compiler/generator_rust.c +1295 -0
  74. data/vendor/snowball/compiler/header.h +418 -0
  75. data/vendor/snowball/compiler/space.c +286 -0
  76. data/vendor/snowball/compiler/syswords.h +86 -0
  77. data/vendor/snowball/compiler/syswords2.h +13 -0
  78. data/vendor/snowball/compiler/tokeniser.c +567 -0
  79. data/vendor/snowball/csharp/.gitignore +8 -0
  80. data/vendor/snowball/csharp/Snowball/Algorithms/.gitignore +1 -0
  81. data/vendor/snowball/csharp/Snowball/Among.cs +108 -0
  82. data/vendor/snowball/csharp/Snowball/AssemblyInfo.cs +36 -0
  83. data/vendor/snowball/csharp/Snowball/Stemmer.cs +660 -0
  84. data/vendor/snowball/csharp/Stemwords/App.config +6 -0
  85. data/vendor/snowball/csharp/Stemwords/Program.cs +114 -0
  86. data/vendor/snowball/doc/TODO +12 -0
  87. data/vendor/snowball/doc/libstemmer_c_README +148 -0
  88. data/vendor/snowball/doc/libstemmer_csharp_README +53 -0
  89. data/vendor/snowball/doc/libstemmer_java_README +67 -0
  90. data/vendor/snowball/doc/libstemmer_js_README +48 -0
  91. data/vendor/snowball/doc/libstemmer_python_README +113 -0
  92. data/vendor/snowball/examples/stemwords.c +204 -0
  93. data/vendor/snowball/go/README.md +55 -0
  94. data/vendor/snowball/go/among.go +16 -0
  95. data/vendor/snowball/go/env.go +403 -0
  96. data/vendor/snowball/go/stemwords/generate.go +68 -0
  97. data/vendor/snowball/go/stemwords/main.go +68 -0
  98. data/vendor/snowball/go/util.go +34 -0
  99. data/vendor/snowball/iconv.py +50 -0
  100. data/vendor/snowball/include/libstemmer.h +78 -0
  101. data/vendor/snowball/java/org/tartarus/snowball/Among.java +29 -0
  102. data/vendor/snowball/java/org/tartarus/snowball/SnowballProgram.java +381 -0
  103. data/vendor/snowball/java/org/tartarus/snowball/SnowballStemmer.java +8 -0
  104. data/vendor/snowball/java/org/tartarus/snowball/TestApp.java +75 -0
  105. data/vendor/snowball/javascript/base-stemmer.js +294 -0
  106. data/vendor/snowball/javascript/stemwords.js +106 -0
  107. data/vendor/snowball/libstemmer/libstemmer_c.in +96 -0
  108. data/vendor/snowball/libstemmer/mkalgorithms.pl +90 -0
  109. data/vendor/snowball/libstemmer/mkmodules.pl +267 -0
  110. data/vendor/snowball/libstemmer/modules.txt +63 -0
  111. data/vendor/snowball/libstemmer/test.c +34 -0
  112. data/vendor/snowball/pascal/.gitignore +4 -0
  113. data/vendor/snowball/pascal/SnowballProgram.pas +430 -0
  114. data/vendor/snowball/pascal/generate.pl +23 -0
  115. data/vendor/snowball/pascal/stemwords-template.dpr +78 -0
  116. data/vendor/snowball/python/MANIFEST.in +7 -0
  117. data/vendor/snowball/python/create_init.py +54 -0
  118. data/vendor/snowball/python/setup.cfg +6 -0
  119. data/vendor/snowball/python/setup.py +81 -0
  120. data/vendor/snowball/python/snowballstemmer/among.py +13 -0
  121. data/vendor/snowball/python/snowballstemmer/basestemmer.py +323 -0
  122. data/vendor/snowball/python/stemwords.py +101 -0
  123. data/vendor/snowball/python/testapp.py +28 -0
  124. data/vendor/snowball/runtime/api.c +58 -0
  125. data/vendor/snowball/runtime/api.h +32 -0
  126. data/vendor/snowball/runtime/header.h +61 -0
  127. data/vendor/snowball/runtime/utilities.c +513 -0
  128. data/vendor/snowball/rust/Cargo.toml +7 -0
  129. data/vendor/snowball/rust/build.rs +55 -0
  130. data/vendor/snowball/rust/rust-pre-1.27-compat.patch +30 -0
  131. data/vendor/snowball/rust/src/main.rs +102 -0
  132. data/vendor/snowball/rust/src/snowball/algorithms/mod.rs +2 -0
  133. data/vendor/snowball/rust/src/snowball/among.rs +6 -0
  134. data/vendor/snowball/rust/src/snowball/mod.rs +6 -0
  135. data/vendor/snowball/rust/src/snowball/snowball_env.rs +421 -0
  136. data/vendor/snowball/tests/stemtest.c +95 -0
  137. metadata +178 -0
@@ -0,0 +1,430 @@
1
+ unit SnowballProgram;
2
+
3
+ interface
4
+
5
+ Type
6
+ TAmongHandler = Function : Boolean of Object;
7
+
8
+ Type
9
+ TAmong = record
10
+ Str : AnsiString; // search string
11
+ Index : Integer; // index to longest matching substring
12
+ Result : Integer; // result of the lookup
13
+ Method : TAmongHandler; // method to use if substring matches
14
+ End;
15
+
16
+ Type
17
+ {$M+}
18
+ TSnowballProgram = Class
19
+ Protected
20
+ FCurrent : AnsiString;
21
+ FCursor : Integer;
22
+ FLimit : Integer;
23
+ FBkLimit : Integer;
24
+ FBra : Integer;
25
+ FKet : Integer;
26
+
27
+ Procedure SetCurrent(Current: AnsiString);
28
+
29
+ Protected
30
+ Function InGrouping(s : array of char; min, max : Integer) : Boolean;
31
+ Function InGroupingBk(s : array of char; min, max : Integer) : Boolean;
32
+ Function OutGrouping(s : array of char; min, max : Integer) : Boolean;
33
+ Function OutGroupingBk(s : array of char; min, max : Integer) : Boolean;
34
+
35
+ Function EqS(s_size : Integer; s : AnsiString) : Boolean;
36
+ Function EqSBk(s_size : Integer; s : AnsiString) : Boolean;
37
+
38
+ Function EqV(s : AnsiString) : Boolean;
39
+ Function EqVBk(s : AnsiString) : Boolean;
40
+
41
+ Function FindAmong(v : array of TAmong; v_size : Integer) : Integer;
42
+ Function FindAmongBk(v : array of TAmong; v_size : Integer) : Integer;
43
+
44
+ Procedure SliceDel;
45
+ Procedure SliceCheck;
46
+ Procedure SliceFrom(s : AnsiString);
47
+
48
+ Function ReplaceS(bra, ket : Integer; s : AnsiString) : Integer;
49
+ Procedure Insert(bra, ket : Integer; s : AnsiString);
50
+
51
+ Function SliceTo : AnsiString;
52
+ Function AssignTo : AnsiString;
53
+
54
+ Public
55
+ { Set & Retrieve current string }
56
+ Property Current: AnsiString Read FCurrent Write SetCurrent;
57
+
58
+ { Method subclasses need to implement }
59
+ Function stem : Boolean; Virtual; Abstract;
60
+ End;
61
+
62
+ Implementation
63
+
64
+ Uses Math;
65
+
66
+ Procedure TSnowballProgram.SetCurrent(Current: AnsiString);
67
+ Begin
68
+ FCurrent := Current;
69
+ FCursor := 0;
70
+ FLimit := Length(Current);
71
+ FBkLimit := 0;
72
+ FBra := FCursor;
73
+ FKet := FLimit;
74
+ End;
75
+
76
+ Function TSnowballProgram.InGrouping(s : array of char; min, max : Integer) : Boolean;
77
+ Var ch : Integer;
78
+ Begin
79
+ Result := False;
80
+ If (FCursor >= FLimit) Then Exit;
81
+
82
+ ch := Ord(FCurrent[FCursor + 1]);
83
+ If (ch > max) Or (ch < min) Then Exit;
84
+
85
+ ch := ch - min;
86
+ If (Ord(s[ch Shr 3]) And Ord(1 Shl (ch And $7))) = 0 Then Exit;
87
+
88
+ Inc(FCursor);
89
+ Result := True;
90
+ End;
91
+
92
+ Function TSnowballProgram.InGroupingBk(s : array of char; min, max : Integer) : Boolean;
93
+ Var ch : Integer;
94
+ Begin
95
+ Result := False;
96
+ If (FCursor <= FBkLimit) Then Exit;
97
+
98
+ ch := Ord(FCurrent[FCursor]);
99
+ If (ch > max) Or (ch < min) Then Exit;
100
+
101
+ ch := ch - min;
102
+ If (Ord(s[ch Shr 3]) And Ord(1 Shl (ch And $7))) = 0 Then Exit;
103
+
104
+ Dec(FCursor);
105
+ Result := True;
106
+ End;
107
+
108
+ Function TSnowballProgram.OutGrouping(s : array of char; min, max : Integer) : Boolean;
109
+ Var ch : Integer;
110
+ Begin
111
+ Result := False;
112
+ If (FCursor >= FLimit) Then Exit;
113
+
114
+ ch := Ord(FCurrent[FCursor + 1]);
115
+
116
+ If (ch > max) Or (ch < min) Then
117
+ Begin
118
+ Inc(FCursor);
119
+ Result := True;
120
+ Exit;
121
+ End;
122
+
123
+ ch := ch - min;
124
+ If (Ord(s[ch Shr 3]) And Ord(1 Shl (ch And $7))) = 0 Then
125
+ Begin
126
+ Inc(FCursor);
127
+ Result := True;
128
+ End;
129
+ End;
130
+
131
+ Function TSnowballProgram.OutGroupingBk(s : array of char; min, max : Integer) : Boolean;
132
+ Var ch : Integer;
133
+ Begin
134
+ Result := False;
135
+
136
+ If (FCursor <= FBkLimit) Then Exit;
137
+
138
+ ch := Ord(FCurrent[FCursor]);
139
+ If (ch > max) Or (ch < min) Then
140
+ Begin
141
+ Dec(FCursor);
142
+ Result := True;
143
+ Exit;
144
+ End;
145
+
146
+ ch := ch - min;
147
+ If (Ord(s[ch Shr 3]) And Ord(1 Shl (ch And $7))) = 0 Then
148
+ Begin
149
+ Dec(FCursor);
150
+ Result := True;
151
+ End;
152
+ End;
153
+
154
+ Function TSnowballProgram.EqS(s_size : Integer; s : AnsiString) : Boolean;
155
+ Var I : Integer;
156
+ Begin
157
+ Result := False;
158
+
159
+ If (FLimit - FCursor) < s_size Then Exit;
160
+
161
+ For I := 1 To s_size Do
162
+ If FCurrent[FCursor + I] <> s[I] Then Exit;
163
+
164
+ FCursor := FCursor + s_size;
165
+
166
+ Result := True;
167
+ End;
168
+
169
+ Function TSnowballProgram.EqSBk(s_size : Integer; s : AnsiString) : Boolean;
170
+ Var I : Integer;
171
+ Begin
172
+ Result := False;
173
+
174
+ if (FCursor - FBkLimit) < s_size Then Exit;
175
+
176
+ For I := 1 To s_size Do
177
+ If FCurrent[FCursor - s_size + I] <> s[i] Then Exit;
178
+
179
+ FCursor := FCursor - s_size;
180
+
181
+ Result := True;
182
+ End;
183
+
184
+ Function TSnowballProgram.EqV(s : AnsiString) : Boolean;
185
+ Begin
186
+ Result := EqS(Length(s), s);
187
+ End;
188
+
189
+ Function TSnowballProgram.EqVBk(s : AnsiString) : Boolean;
190
+ Begin
191
+ Result := EqSBk(Length(s), s);
192
+ End;
193
+
194
+ Function TSnowballProgram.FindAmong(v : array of TAmong; v_size : Integer) : Integer;
195
+ Var i, i2, j, c, l, common_i, common_j, k, diff, common : Integer;
196
+ first_key_inspected, res : Boolean;
197
+ w : TAmong;
198
+ Begin
199
+ i := 0;
200
+ j := v_size;
201
+
202
+ c := FCursor;
203
+ l := FLimit;
204
+
205
+ common_i := 0;
206
+ common_j := 0;
207
+
208
+ first_key_inspected := false;
209
+
210
+ While True Do
211
+ Begin
212
+ k := i + ((j - i) Shr 1);
213
+ diff := 0;
214
+ common := Min(common_i, common_j); // smaller
215
+ w := v[k];
216
+
217
+ For i2 := common To Length(w.Str) - 1 Do
218
+ Begin
219
+ if (c + common) = l Then
220
+ Begin
221
+ diff := -1;
222
+ Break;
223
+ End;
224
+
225
+ diff := Ord(FCurrent[c + common + 1]) - Ord(w.Str[i2 + 1]);
226
+ if diff <> 0 Then Break;
227
+
228
+ Inc(common);
229
+ End;
230
+
231
+ if diff < 0 Then
232
+ Begin
233
+ j := k;
234
+ common_j := common;
235
+ End
236
+ Else
237
+ Begin
238
+ i := k;
239
+ common_i := common;
240
+ End;
241
+
242
+ If (j - i) <= 1 Then
243
+ Begin
244
+ If (i > 0) Then Break; // v->s has been inspected
245
+ if (j = i) Then Break; // only one item in v
246
+
247
+ // - but now we need to go round once more to get
248
+ // v->s inspected. This looks messy, but is actually
249
+ // the optimal approach.
250
+
251
+ if (first_key_inspected) Then Break;
252
+ first_key_inspected := True;
253
+ End;
254
+ End;
255
+
256
+ While True Do
257
+ Begin
258
+ w := v[i];
259
+ If (common_i >= Length(w.Str)) Then
260
+ Begin
261
+ FCursor := c + Length(w.Str);
262
+ If Not Assigned(w.Method) Then
263
+ Begin
264
+ Result := w.Result;
265
+ Exit;
266
+ End;
267
+
268
+ res := w.Method;
269
+
270
+ FCursor := c + Length(w.Str);
271
+ if (res) Then Begin
272
+ Result := w.Result;
273
+ Exit;
274
+ End;
275
+ End;
276
+
277
+ i := w.Index;
278
+ if i < 0 Then
279
+ Begin
280
+ Result := 0;
281
+ Exit;
282
+ End;
283
+ End;
284
+ End;
285
+
286
+ Function TSnowballProgram.FindAmongBk(v : array of TAmong; v_size : Integer) : Integer;
287
+ Var i, j, c, lb, common_i, common_j, k, diff, common, i2 : Integer;
288
+ first_key_inspected, res : Boolean;
289
+ w : TAmong;
290
+ Begin
291
+ i := 0;
292
+ j := v_size;
293
+
294
+ c := FCursor;
295
+ lb := FBkLimit;
296
+
297
+ common_i := 0;
298
+ common_j := 0;
299
+
300
+ first_key_inspected := false;
301
+
302
+ While True Do
303
+ Begin
304
+ k := i + ((j - i) Shr 1);
305
+ diff := 0;
306
+ common := Min(common_i, common_j);
307
+ w := v[k];
308
+
309
+ For i2 := Length(w.Str) - 1 - common DownTo 0 Do
310
+ Begin
311
+ If (c - common) = lb Then
312
+ Begin
313
+ diff := -1;
314
+ Break;
315
+ End;
316
+
317
+ diff := Ord(FCurrent[c - common]) - Ord(w.Str[i2 + 1]);
318
+ if diff <> 0 Then Break;
319
+ Inc(common);
320
+ End;
321
+
322
+ If diff < 0 Then
323
+ Begin
324
+ j := k;
325
+ common_j := common;
326
+ End
327
+ Else
328
+ Begin
329
+ i := k;
330
+ common_i := common;
331
+ End;
332
+
333
+ If (j - i) <= 1 Then
334
+ Begin
335
+ if i > 0 Then Break;
336
+ if j = i Then Break;
337
+ if first_key_inspected Then Break;
338
+ first_key_inspected := True;
339
+ End;
340
+ End;
341
+
342
+ While True Do
343
+ Begin
344
+ w := v[i];
345
+ if common_i >= Length(w.Str) Then
346
+ Begin
347
+ FCursor := c - Length(w.Str);
348
+ If Not Assigned(w.Method) Then
349
+ Begin
350
+ Result := w.Result;
351
+ Exit;
352
+ End;
353
+
354
+ res := w.Method;
355
+
356
+ FCursor := c - Length(w.Str);
357
+ If Res Then
358
+ Begin
359
+ Result := w.Result;
360
+ Exit;
361
+ End;
362
+ End;
363
+
364
+ i := w.Index;
365
+ If i < 0 Then
366
+ Begin
367
+ Result := 0;
368
+ Exit;
369
+ End;
370
+ End;
371
+ End;
372
+
373
+ Procedure TSnowballProgram.SliceCheck;
374
+ Begin
375
+ if (FBra < 0) Or (FBra > FKet) Or (FKet > FLimit) Or (FLimit > Length(FCurrent)) Then
376
+ Begin
377
+ WriteLn('Faulty slice operation.');
378
+ Halt;
379
+ End;
380
+ End;
381
+
382
+ Procedure TSnowballProgram.SliceDel;
383
+ Begin
384
+ SliceFrom('');
385
+ End;
386
+
387
+ Function TSnowballProgram.ReplaceS(bra, ket : Integer; s : AnsiString) : Integer;
388
+ Var adjustment : Integer;
389
+ Begin
390
+ adjustment := Length(s) - (ket - bra);
391
+
392
+ Delete(FCurrent, bra + 1, ket - bra);
393
+ System.Insert(s, FCurrent, bra + 1);
394
+
395
+ FLimit := FLimit + adjustment;
396
+
397
+ if (FCursor >= ket) Then
398
+ FCursor := FCursor + adjustment
399
+ Else If (FCursor > bra) Then
400
+ FCursor := bra;
401
+
402
+ Result := adjustment;
403
+ End;
404
+
405
+ Procedure TSnowballProgram.Insert(bra, ket : Integer; s : AnsiString);
406
+ Var adjustment : Integer;
407
+ Begin
408
+ adjustment := ReplaceS(bra, ket, s);
409
+ If (bra <= FBra) Then FBra := FBra + adjustment;
410
+ If (bra <= FKet) Then FKet := FKet + adjustment;
411
+ End;
412
+
413
+ Function TSnowballProgram.SliceTo() : AnsiString;
414
+ Begin
415
+ SliceCheck();
416
+ Result := Copy(FCurrent, FBra + 1, FKet - FBra);
417
+ End;
418
+
419
+ Procedure TSnowballProgram.SliceFrom(s : AnsiString);
420
+ Begin
421
+ SliceCheck();
422
+ ReplaceS(FBra, FKet, s);
423
+ End;
424
+
425
+ Function TSnowballProgram.AssignTo() : AnsiString;
426
+ Begin
427
+ Result := Copy(FCurrent, 1, FLimit);
428
+ End;
429
+
430
+ End.
@@ -0,0 +1,23 @@
1
+ #!/usr/bin/env perl
2
+ use strict;
3
+ use warnings;
4
+
5
+ # Generate Pascal stemwords source.
6
+
7
+ my @sources = @ARGV;
8
+
9
+ while (defined(my $line = <STDIN>)) {
10
+ if ($line =~ /\{\s*BEGIN TEMPLATE\s*\}/) {
11
+ my $template = '';
12
+ while (defined($line = <STDIN>) && $line !~ /\{\s*END TEMPLATE\s*\}/) {
13
+ $template .= $line;
14
+ }
15
+ foreach my $source(@sources) {
16
+ my $out = $template;
17
+ $out =~ s/%STEMMER%/$source/g;
18
+ print $out;
19
+ }
20
+ next;
21
+ }
22
+ print $line;
23
+ }
@@ -0,0 +1,78 @@
1
+ program stemwords;
2
+
3
+ {$ifdef windows}
4
+ {$APPTYPE CONSOLE}
5
+ {$endif}
6
+
7
+ uses
8
+ SnowballProgram,
9
+ { BEGIN TEMPLATE }
10
+ %STEMMER%Stemmer in '%STEMMER%Stemmer.pas',
11
+ { END TEMPLATE }
12
+ SysUtils;
13
+
14
+ Var
15
+ Stemmer : TSnowballProgram;
16
+ CurWord : AnsiString;
17
+ i : Integer;
18
+ language : AnsiString;
19
+
20
+ Const
21
+ Delimiters : Set Of Char = [#10, #13];
22
+
23
+ Function NextWord : Boolean;
24
+ Var C : Char;
25
+ Begin
26
+ CurWord := '';
27
+
28
+ Result := Not Eof;
29
+
30
+ While Not Eof Do
31
+ Begin
32
+ Read(C);
33
+ If IOResult <> 0 Then Break;
34
+ If C In Delimiters Then Break;
35
+ CurWord := CurWord + C;
36
+ End;
37
+ End;
38
+
39
+ begin
40
+ language := 'english';
41
+ i := 0;
42
+ while i < ParamCount do
43
+ begin
44
+ i := i + 1;
45
+ if ParamStr(i) = '-l' then
46
+ begin
47
+ i := i + 1;
48
+ language := ParamStr(i);
49
+ continue;
50
+ end;
51
+ WriteLn('option '+ParamStr(i)+' unknown');
52
+ Exit;
53
+ end;
54
+ if False then
55
+ { BEGIN TEMPLATE }
56
+ else if language = '%STEMMER%' then
57
+ Stemmer := T%STEMMER%Stemmer.Create
58
+ { END TEMPLATE }
59
+ else
60
+ begin
61
+ WriteLn('Stemming language '+language+' unknown');
62
+ Exit;
63
+ end;
64
+
65
+ Try
66
+ While Not Eof Do
67
+ Begin
68
+ While NextWord Do
69
+ Begin
70
+ Stemmer.Current := CurWord;
71
+ Stemmer.Stem;
72
+ WriteLn(Stemmer.Current);
73
+ End;
74
+ End;
75
+ Finally
76
+ Stemmer.Free;
77
+ End;
78
+ end.
@@ -0,0 +1,7 @@
1
+ include *.rst
2
+ include modules.txt
3
+ include setup.*
4
+ recursive-include src *.py
5
+ include MANIFEST.in
6
+ include COPYING
7
+ include NEWS
@@ -0,0 +1,54 @@
1
+ #! /bin/sh/env python
2
+
3
+ import sys
4
+ import re
5
+ import os
6
+
7
+ python_out_folder = sys.argv[1]
8
+
9
+ filematch = re.compile(r"(\w+)_stemmer\.py$")
10
+
11
+ imports = []
12
+ languages = []
13
+
14
+ for pyscript in os.listdir(python_out_folder):
15
+ match = filematch.match(pyscript)
16
+ if (match):
17
+ langname = match.group(1)
18
+ titlecase = langname.title()
19
+ languages.append(" '%(lang)s': %(title)sStemmer," % {'lang': langname, 'title': titlecase})
20
+ imports.append('from .%(lang)s_stemmer import %(title)sStemmer' % {'lang': langname, 'title': titlecase})
21
+ imports.sort()
22
+ languages.sort()
23
+
24
+ src = '''__all__ = ('language', 'stemmer')
25
+
26
+ %(imports)s
27
+
28
+ _languages = {
29
+ %(languages)s
30
+ }
31
+
32
+ try:
33
+ import Stemmer
34
+ cext_available = True
35
+ except ImportError:
36
+ cext_available = False
37
+
38
+ def algorithms():
39
+ if cext_available:
40
+ return Stemmer.language()
41
+ else:
42
+ return list(_languages.keys())
43
+
44
+ def stemmer(lang):
45
+ if cext_available:
46
+ return Stemmer.Stemmer(lang)
47
+ if lang.lower() in _languages:
48
+ return _languages[lang.lower()]()
49
+ else:
50
+ raise KeyError("Stemming algorithm '%%s' not found" %% lang)
51
+ ''' % {'imports': '\n'.join(imports), 'languages': '\n'.join(languages)}
52
+
53
+ with open(os.path.join(python_out_folder, '__init__.py'), 'w') as out:
54
+ out.write(src)
@@ -0,0 +1,6 @@
1
+ [metadata]
2
+ long_description = file: README.rst
3
+ long_description_content_type = text/x-rst
4
+
5
+ [bdist_wheel]
6
+ universal=1
@@ -0,0 +1,81 @@
1
+ #!/usr/bin/env python
2
+
3
+ from setuptools import setup
4
+ import re
5
+
6
+ SNOWBALL_VERSION = '2.2.0'
7
+
8
+ n_stemmers = 0
9
+
10
+ langs = []
11
+ variants = {}
12
+ with open('modules.txt') as fp:
13
+ for line in fp.readlines():
14
+ if len(line) <= 1 or line[0] == '#':
15
+ continue
16
+ if line[-1:] == '\n':
17
+ line = line[:-1]
18
+ tokens = re.split(r'\s+', line)
19
+ if len(tokens) < 3:
20
+ print("Bad modules.txt line: " + line)
21
+ continue
22
+ (name, encs, codes) = tokens[:3]
23
+ if len(tokens) > 3:
24
+ variant_of = tokens[3]
25
+ if variant_of in variants:
26
+ variants[variant_of].append(name)
27
+ else:
28
+ variants[variant_of] = [name]
29
+ else:
30
+ langs.append(name)
31
+ n_stemmers += 1
32
+
33
+ desc = 'This package provides ' + str(n_stemmers) + ' stemmers for ' + \
34
+ str(len(langs)) + ' languages generated from Snowball algorithms.'
35
+
36
+ classifiers = [
37
+ 'Development Status :: 5 - Production/Stable',
38
+ 'Intended Audience :: Developers',
39
+ 'License :: OSI Approved :: BSD License'
40
+ ]
41
+
42
+ for lang in langs:
43
+ lang_titlecase = lang.title()
44
+ # Only classifiers listed in https://pypi.org/classifiers/ are allowed
45
+ if lang_titlecase not in ('Armenian', 'Yiddish'):
46
+ classifiers.append('Natural Language :: ' + lang_titlecase)
47
+
48
+ classifiers.extend([
49
+ 'Operating System :: OS Independent',
50
+ 'Programming Language :: Python',
51
+ 'Programming Language :: Python :: 2',
52
+ 'Programming Language :: Python :: 2.6',
53
+ 'Programming Language :: Python :: 2.7',
54
+ 'Programming Language :: Python :: 3',
55
+ 'Programming Language :: Python :: 3.4',
56
+ 'Programming Language :: Python :: 3.5',
57
+ 'Programming Language :: Python :: 3.6',
58
+ 'Programming Language :: Python :: 3.7',
59
+ 'Programming Language :: Python :: 3.8',
60
+ 'Programming Language :: Python :: 3.9',
61
+ 'Programming Language :: Python :: 3.10',
62
+ 'Programming Language :: Python :: Implementation :: CPython',
63
+ 'Programming Language :: Python :: Implementation :: PyPy',
64
+ 'Topic :: Database',
65
+ 'Topic :: Internet :: WWW/HTTP :: Indexing/Search',
66
+ 'Topic :: Text Processing :: Indexing',
67
+ 'Topic :: Text Processing :: Linguistic'
68
+ ])
69
+
70
+ setup(name='snowballstemmer',
71
+ version=SNOWBALL_VERSION,
72
+ description=desc,
73
+ author='Snowball Developers',
74
+ author_email='snowball-discuss@lists.tartarus.org',
75
+ url='https://github.com/snowballstem/snowball',
76
+ keywords="stemmer",
77
+ license="BSD-3-Clause",
78
+ packages=['snowballstemmer'],
79
+ package_dir={"snowballstemmer": "src/snowballstemmer"},
80
+ classifiers = classifiers
81
+ )
@@ -0,0 +1,13 @@
1
+
2
+ class Among(object):
3
+ def __init__(self, s, substring_i, result, method=None):
4
+ """
5
+ @ivar s search string
6
+ @ivar substring index to longest matching substring
7
+ @ivar result of the lookup
8
+ @ivar method method to use if substring matches
9
+ """
10
+ self.s = s
11
+ self.substring_i = substring_i
12
+ self.result = result
13
+ self.method = method