mittens 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +3 -0
  3. data/Gemfile +7 -0
  4. data/LICENSE.txt +30 -0
  5. data/README.md +62 -0
  6. data/Rakefile +21 -0
  7. data/ext/mittens/ext.c +96 -0
  8. data/ext/mittens/extconf.rb +12 -0
  9. data/lib/mittens/version.rb +3 -0
  10. data/lib/mittens.rb +7 -0
  11. data/mittens.gemspec +22 -0
  12. data/vendor/snowball/.gitignore +26 -0
  13. data/vendor/snowball/.travis.yml +112 -0
  14. data/vendor/snowball/AUTHORS +27 -0
  15. data/vendor/snowball/CONTRIBUTING.rst +216 -0
  16. data/vendor/snowball/COPYING +29 -0
  17. data/vendor/snowball/GNUmakefile +742 -0
  18. data/vendor/snowball/NEWS +754 -0
  19. data/vendor/snowball/README.rst +37 -0
  20. data/vendor/snowball/ada/README.md +74 -0
  21. data/vendor/snowball/ada/generate/generate.adb +83 -0
  22. data/vendor/snowball/ada/generate.gpr +21 -0
  23. data/vendor/snowball/ada/src/stemmer.adb +620 -0
  24. data/vendor/snowball/ada/src/stemmer.ads +219 -0
  25. data/vendor/snowball/ada/src/stemwords.adb +70 -0
  26. data/vendor/snowball/ada/stemmer_config.gpr +83 -0
  27. data/vendor/snowball/ada/stemwords.gpr +21 -0
  28. data/vendor/snowball/algorithms/arabic.sbl +558 -0
  29. data/vendor/snowball/algorithms/armenian.sbl +301 -0
  30. data/vendor/snowball/algorithms/basque.sbl +149 -0
  31. data/vendor/snowball/algorithms/catalan.sbl +202 -0
  32. data/vendor/snowball/algorithms/danish.sbl +93 -0
  33. data/vendor/snowball/algorithms/dutch.sbl +164 -0
  34. data/vendor/snowball/algorithms/english.sbl +229 -0
  35. data/vendor/snowball/algorithms/finnish.sbl +197 -0
  36. data/vendor/snowball/algorithms/french.sbl +254 -0
  37. data/vendor/snowball/algorithms/german.sbl +139 -0
  38. data/vendor/snowball/algorithms/german2.sbl +145 -0
  39. data/vendor/snowball/algorithms/greek.sbl +701 -0
  40. data/vendor/snowball/algorithms/hindi.sbl +323 -0
  41. data/vendor/snowball/algorithms/hungarian.sbl +241 -0
  42. data/vendor/snowball/algorithms/indonesian.sbl +192 -0
  43. data/vendor/snowball/algorithms/irish.sbl +149 -0
  44. data/vendor/snowball/algorithms/italian.sbl +202 -0
  45. data/vendor/snowball/algorithms/kraaij_pohlmann.sbl +240 -0
  46. data/vendor/snowball/algorithms/lithuanian.sbl +373 -0
  47. data/vendor/snowball/algorithms/lovins.sbl +208 -0
  48. data/vendor/snowball/algorithms/nepali.sbl +92 -0
  49. data/vendor/snowball/algorithms/norwegian.sbl +80 -0
  50. data/vendor/snowball/algorithms/porter.sbl +139 -0
  51. data/vendor/snowball/algorithms/portuguese.sbl +218 -0
  52. data/vendor/snowball/algorithms/romanian.sbl +236 -0
  53. data/vendor/snowball/algorithms/russian.sbl +221 -0
  54. data/vendor/snowball/algorithms/serbian.sbl +2379 -0
  55. data/vendor/snowball/algorithms/spanish.sbl +230 -0
  56. data/vendor/snowball/algorithms/swedish.sbl +72 -0
  57. data/vendor/snowball/algorithms/tamil.sbl +405 -0
  58. data/vendor/snowball/algorithms/turkish.sbl +470 -0
  59. data/vendor/snowball/algorithms/yiddish.sbl +460 -0
  60. data/vendor/snowball/charsets/ISO-8859-2.sbl +98 -0
  61. data/vendor/snowball/charsets/KOI8-R.sbl +74 -0
  62. data/vendor/snowball/charsets/cp850.sbl +130 -0
  63. data/vendor/snowball/compiler/analyser.c +1547 -0
  64. data/vendor/snowball/compiler/driver.c +615 -0
  65. data/vendor/snowball/compiler/generator.c +1748 -0
  66. data/vendor/snowball/compiler/generator_ada.c +1702 -0
  67. data/vendor/snowball/compiler/generator_csharp.c +1322 -0
  68. data/vendor/snowball/compiler/generator_go.c +1278 -0
  69. data/vendor/snowball/compiler/generator_java.c +1313 -0
  70. data/vendor/snowball/compiler/generator_js.c +1316 -0
  71. data/vendor/snowball/compiler/generator_pascal.c +1387 -0
  72. data/vendor/snowball/compiler/generator_python.c +1337 -0
  73. data/vendor/snowball/compiler/generator_rust.c +1295 -0
  74. data/vendor/snowball/compiler/header.h +418 -0
  75. data/vendor/snowball/compiler/space.c +286 -0
  76. data/vendor/snowball/compiler/syswords.h +86 -0
  77. data/vendor/snowball/compiler/syswords2.h +13 -0
  78. data/vendor/snowball/compiler/tokeniser.c +567 -0
  79. data/vendor/snowball/csharp/.gitignore +8 -0
  80. data/vendor/snowball/csharp/Snowball/Algorithms/.gitignore +1 -0
  81. data/vendor/snowball/csharp/Snowball/Among.cs +108 -0
  82. data/vendor/snowball/csharp/Snowball/AssemblyInfo.cs +36 -0
  83. data/vendor/snowball/csharp/Snowball/Stemmer.cs +660 -0
  84. data/vendor/snowball/csharp/Stemwords/App.config +6 -0
  85. data/vendor/snowball/csharp/Stemwords/Program.cs +114 -0
  86. data/vendor/snowball/doc/TODO +12 -0
  87. data/vendor/snowball/doc/libstemmer_c_README +148 -0
  88. data/vendor/snowball/doc/libstemmer_csharp_README +53 -0
  89. data/vendor/snowball/doc/libstemmer_java_README +67 -0
  90. data/vendor/snowball/doc/libstemmer_js_README +48 -0
  91. data/vendor/snowball/doc/libstemmer_python_README +113 -0
  92. data/vendor/snowball/examples/stemwords.c +204 -0
  93. data/vendor/snowball/go/README.md +55 -0
  94. data/vendor/snowball/go/among.go +16 -0
  95. data/vendor/snowball/go/env.go +403 -0
  96. data/vendor/snowball/go/stemwords/generate.go +68 -0
  97. data/vendor/snowball/go/stemwords/main.go +68 -0
  98. data/vendor/snowball/go/util.go +34 -0
  99. data/vendor/snowball/iconv.py +50 -0
  100. data/vendor/snowball/include/libstemmer.h +78 -0
  101. data/vendor/snowball/java/org/tartarus/snowball/Among.java +29 -0
  102. data/vendor/snowball/java/org/tartarus/snowball/SnowballProgram.java +381 -0
  103. data/vendor/snowball/java/org/tartarus/snowball/SnowballStemmer.java +8 -0
  104. data/vendor/snowball/java/org/tartarus/snowball/TestApp.java +75 -0
  105. data/vendor/snowball/javascript/base-stemmer.js +294 -0
  106. data/vendor/snowball/javascript/stemwords.js +106 -0
  107. data/vendor/snowball/libstemmer/libstemmer_c.in +96 -0
  108. data/vendor/snowball/libstemmer/mkalgorithms.pl +90 -0
  109. data/vendor/snowball/libstemmer/mkmodules.pl +267 -0
  110. data/vendor/snowball/libstemmer/modules.txt +63 -0
  111. data/vendor/snowball/libstemmer/test.c +34 -0
  112. data/vendor/snowball/pascal/.gitignore +4 -0
  113. data/vendor/snowball/pascal/SnowballProgram.pas +430 -0
  114. data/vendor/snowball/pascal/generate.pl +23 -0
  115. data/vendor/snowball/pascal/stemwords-template.dpr +78 -0
  116. data/vendor/snowball/python/MANIFEST.in +7 -0
  117. data/vendor/snowball/python/create_init.py +54 -0
  118. data/vendor/snowball/python/setup.cfg +6 -0
  119. data/vendor/snowball/python/setup.py +81 -0
  120. data/vendor/snowball/python/snowballstemmer/among.py +13 -0
  121. data/vendor/snowball/python/snowballstemmer/basestemmer.py +323 -0
  122. data/vendor/snowball/python/stemwords.py +101 -0
  123. data/vendor/snowball/python/testapp.py +28 -0
  124. data/vendor/snowball/runtime/api.c +58 -0
  125. data/vendor/snowball/runtime/api.h +32 -0
  126. data/vendor/snowball/runtime/header.h +61 -0
  127. data/vendor/snowball/runtime/utilities.c +513 -0
  128. data/vendor/snowball/rust/Cargo.toml +7 -0
  129. data/vendor/snowball/rust/build.rs +55 -0
  130. data/vendor/snowball/rust/rust-pre-1.27-compat.patch +30 -0
  131. data/vendor/snowball/rust/src/main.rs +102 -0
  132. data/vendor/snowball/rust/src/snowball/algorithms/mod.rs +2 -0
  133. data/vendor/snowball/rust/src/snowball/among.rs +6 -0
  134. data/vendor/snowball/rust/src/snowball/mod.rs +6 -0
  135. data/vendor/snowball/rust/src/snowball/snowball_env.rs +421 -0
  136. data/vendor/snowball/tests/stemtest.c +95 -0
  137. metadata +178 -0
@@ -0,0 +1,620 @@
1
+ -----------------------------------------------------------------------
2
+ -- stemmer -- Multi-language stemmer with Snowball generator
3
+ -- Written by Stephane Carrez (Stephane.Carrez@gmail.com)
4
+ -- All rights reserved.
5
+ --
6
+ -- Redistribution and use in source and binary forms, with or without
7
+ -- modification, are permitted provided that the following conditions
8
+ -- are met:
9
+ --
10
+ -- 1. Redistributions of source code must retain the above copyright notice,
11
+ -- this list of conditions and the following disclaimer.
12
+ -- 2. Redistributions in binary form must reproduce the above copyright notice,
13
+ -- this list of conditions and the following disclaimer in the documentation
14
+ -- and/or other materials provided with the distribution.
15
+ -- 3. Neither the name of the Snowball project nor the names of its contributors
16
+ -- may be used to endorse or promote products derived from this software
17
+ -- without specific prior written permission.
18
+ --
19
+ -- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20
+ -- ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21
+ -- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22
+ -- DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23
+ -- ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24
+ -- (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25
+ -- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26
+ -- ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27
+ -- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28
+ -- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
+ -----------------------------------------------------------------------
30
+ with Interfaces;
31
+ package body Stemmer with SPARK_Mode is
32
+
33
+ subtype Byte is Interfaces.Unsigned_8;
34
+ use type Interfaces.Unsigned_8;
35
+
36
+ procedure Stem_Word (Context : in out Context_Type'Class;
37
+ Word : in String;
38
+ Result : out Boolean) is
39
+ begin
40
+ Context.P (1 .. Word'Length) := Word;
41
+ Context.C := 0;
42
+ Context.L := Word'Length;
43
+ Context.Lb := 0;
44
+ Stemmer.Stem (Context, Result);
45
+ end Stem_Word;
46
+
47
+ function Get_Result (Context : in Context_Type'Class) return String is
48
+ begin
49
+ return Context.P (1 .. Context.L);
50
+ end Get_Result;
51
+
52
+ function Eq_S (Context : in Context_Type'Class;
53
+ S : in String) return Char_Index is
54
+ begin
55
+ if Context.L - Context.C < S'Length then
56
+ return 0;
57
+ end if;
58
+ if Context.P (Context.C + 1 .. Context.C + S'Length) /= S then
59
+ return 0;
60
+ end if;
61
+ return S'Length;
62
+ end Eq_S;
63
+
64
+ function Eq_S_Backward (Context : in Context_Type'Class;
65
+ S : in String) return Char_Index is
66
+ begin
67
+ if Context.C - Context.Lb < S'Length then
68
+ return 0;
69
+ end if;
70
+ if Context.P (Context.C + 1 - S'Length .. Context.C) /= S then
71
+ return 0;
72
+ end if;
73
+ return S'Length;
74
+ end Eq_S_Backward;
75
+
76
+ function Length (Context : in Context_Type'Class) return Natural is
77
+ begin
78
+ return Context.L - Context.Lb;
79
+ end Length;
80
+
81
+ function Length_Utf8 (Context : in Context_Type'Class) return Natural is
82
+ Count : Natural := 0;
83
+ Pos : Positive := 1;
84
+ Val : Byte;
85
+ begin
86
+ while Pos <= Context.L loop
87
+ Val := Character'Pos (Context.P (Pos));
88
+ Pos := Pos + 1;
89
+ if Val >= 16#C0# or Val < 16#80# then
90
+ Count := Count + 1;
91
+ end if;
92
+ end loop;
93
+ return Count;
94
+ end Length_Utf8;
95
+
96
+ function Check_Among (Context : in Context_Type'Class;
97
+ Pos : in Char_Index;
98
+ Shift : in Natural;
99
+ Mask : in Mask_Type) return Boolean is
100
+ use Interfaces;
101
+ Val : constant Byte := Character'Pos (Context.P (Pos + 1));
102
+ begin
103
+ if Natural (Shift_Right (Val, 5)) /= Shift then
104
+ return True;
105
+ end if;
106
+ return (Shift_Right (Unsigned_64 (Mask), Natural (Val and 16#1f#)) and 1) = 0;
107
+ end Check_Among;
108
+
109
+ procedure Find_Among (Context : in out Context_Type'Class;
110
+ Amongs : in Among_Array_Type;
111
+ Pattern : in String;
112
+ Execute : access procedure
113
+ (Ctx : in out Context_Type'Class;
114
+ Operation : in Operation_Index;
115
+ Status : out Boolean);
116
+ Result : out Integer) is
117
+ I : Natural := Amongs'First;
118
+ J : Natural := Amongs'Last + 1;
119
+ Common_I : Natural := 0;
120
+ Common_J : Natural := 0;
121
+ First_Key_Inspected : Boolean := False;
122
+ C : constant Natural := Context.C;
123
+ L : constant Integer := Context.L;
124
+ begin
125
+ loop
126
+ declare
127
+ K : constant Natural := I + (J - I) / 2;
128
+ W : constant Among_Type := Amongs (K);
129
+ Common : Natural := (if Common_I < Common_J then Common_I else Common_J);
130
+ Diff : Integer := 0;
131
+ begin
132
+ for I2 in W.First + Common .. W.Last loop
133
+ if C + Common = L then
134
+ Diff := -1;
135
+ exit;
136
+ end if;
137
+ Diff := Character'Pos (Context.P (C + Common + 1))
138
+ - Character'Pos (Pattern (I2));
139
+ exit when Diff /= 0;
140
+ Common := Common + 1;
141
+ end loop;
142
+ if Diff < 0 then
143
+ J := K;
144
+ Common_J := Common;
145
+ else
146
+ I := K;
147
+ Common_I := Common;
148
+ end if;
149
+ end;
150
+ if J - I <= 1 then
151
+ exit when I > 0 or J = I or First_Key_Inspected;
152
+ First_Key_Inspected := True;
153
+ end if;
154
+ end loop;
155
+
156
+ loop
157
+ declare
158
+ W : constant Among_Type := Amongs (I);
159
+ Len : constant Natural := W.Last - W.First + 1;
160
+ Status : Boolean;
161
+ begin
162
+ if Common_I >= Len then
163
+ Context.C := C + Len;
164
+ if W.Operation = 0 then
165
+ Result := W.Result;
166
+ return;
167
+ end if;
168
+ Execute (Context, W.Operation, Status);
169
+ Context.C := C + Len;
170
+ if Status then
171
+ Result := W.Result;
172
+ return;
173
+ end if;
174
+ end if;
175
+ exit when W.Substring_I < 0;
176
+ I := W.Substring_I;
177
+ end;
178
+ end loop;
179
+ Result := 0;
180
+ end Find_Among;
181
+
182
+ procedure Find_Among_Backward (Context : in out Context_Type'Class;
183
+ Amongs : in Among_Array_Type;
184
+ Pattern : in String;
185
+ Execute : access procedure
186
+ (Ctx : in out Context_Type'Class;
187
+ Operation : in Operation_Index;
188
+ Status : out Boolean);
189
+ Result : out Integer) is
190
+ I : Natural := Amongs'First;
191
+ J : Natural := Amongs'Last + 1;
192
+ Common_I : Natural := 0;
193
+ Common_J : Natural := 0;
194
+ First_Key_Inspected : Boolean := False;
195
+ C : constant Integer := Context.C;
196
+ Lb : constant Integer := Context.Lb;
197
+ begin
198
+ loop
199
+ declare
200
+ K : constant Natural := I + (J - I) / 2;
201
+ W : constant Among_Type := Amongs (K);
202
+ Common : Natural := (if Common_I < Common_J then Common_I else Common_J);
203
+ Diff : Integer := 0;
204
+ begin
205
+ for I2 in reverse W.First .. W.Last - Common loop
206
+ if C - Common = Lb then
207
+ Diff := -1;
208
+ exit;
209
+ end if;
210
+ Diff := Character'Pos (Context.P (C - Common))
211
+ - Character'Pos (Pattern (I2));
212
+ exit when Diff /= 0;
213
+ Common := Common + 1;
214
+ end loop;
215
+ if Diff < 0 then
216
+ J := K;
217
+ Common_J := Common;
218
+ else
219
+ I := K;
220
+ Common_I := Common;
221
+ end if;
222
+ end;
223
+ if J - I <= 1 then
224
+ exit when I > 0 or J = I or First_Key_Inspected;
225
+ First_Key_Inspected := True;
226
+ end if;
227
+ end loop;
228
+
229
+ loop
230
+ declare
231
+ W : constant Among_Type := Amongs (I);
232
+ Len : constant Natural := W.Last - W.First + 1;
233
+ Status : Boolean;
234
+ begin
235
+ if Common_I >= Len then
236
+ Context.C := C - Len;
237
+ if W.Operation = 0 then
238
+ Result := W.Result;
239
+ return;
240
+ end if;
241
+ Execute (Context, W.Operation, Status);
242
+ Context.C := C - Len;
243
+ if Status then
244
+ Result := W.Result;
245
+ return;
246
+ end if;
247
+ end if;
248
+ exit when W.Substring_I < 0;
249
+ I := W.Substring_I;
250
+ end;
251
+ end loop;
252
+ Result := 0;
253
+ end Find_Among_Backward;
254
+
255
+ function Skip_Utf8 (Context : in Context_Type'Class) return Result_Index is
256
+ Pos : Char_Index := Context.C;
257
+ Val : Byte;
258
+ begin
259
+ if Pos >= Context.L then
260
+ return -1;
261
+ end if;
262
+ Pos := Pos + 1;
263
+ Val := Character'Pos (Context.P (Pos));
264
+ if Val >= 16#C0# then
265
+ while Pos < Context.L loop
266
+ Val := Character'Pos (Context.P (Pos + 1));
267
+ exit when Val >= 16#C0# or Val < 16#80#;
268
+ Pos := Pos + 1;
269
+ end loop;
270
+ end if;
271
+ return Pos;
272
+ end Skip_Utf8;
273
+
274
+ function Skip_Utf8 (Context : in Context_Type'Class;
275
+ N : in Integer) return Result_Index is
276
+ Pos : Char_Index := Context.C;
277
+ Val : Byte;
278
+ begin
279
+ if N < 0 then
280
+ return -1;
281
+ end if;
282
+ for I in 1 .. N loop
283
+ if Pos >= Context.L then
284
+ return -1;
285
+ end if;
286
+ Pos := Pos + 1;
287
+ Val := Character'Pos (Context.P (Pos));
288
+ if Val >= 16#C0# then
289
+ while Pos < Context.L loop
290
+ Val := Character'Pos (Context.P (Pos + 1));
291
+ exit when Val >= 16#C0# or Val < 16#80#;
292
+ Pos := Pos + 1;
293
+ end loop;
294
+ end if;
295
+ end loop;
296
+ return Pos;
297
+ end Skip_Utf8;
298
+
299
+ function Skip_Utf8_Backward (Context : in Context_Type'Class) return Result_Index is
300
+ Pos : Char_Index := Context.C;
301
+ Val : Byte;
302
+ begin
303
+ if Pos <= Context.Lb then
304
+ return -1;
305
+ end if;
306
+ Val := Character'Pos (Context.P (Pos));
307
+ Pos := Pos - 1;
308
+ if Val >= 16#80# then
309
+ while Pos > Context.Lb loop
310
+ Val := Character'Pos (Context.P (Pos + 1));
311
+ exit when Val >= 16#C0#;
312
+ Pos := Pos - 1;
313
+ end loop;
314
+ end if;
315
+ return Pos;
316
+ end Skip_Utf8_Backward;
317
+
318
+ function Skip_Utf8_Backward (Context : in Context_Type'Class;
319
+ N : in Integer) return Result_Index is
320
+ Pos : Char_Index := Context.C;
321
+ Val : Byte;
322
+ begin
323
+ if N < 0 then
324
+ return -1;
325
+ end if;
326
+ for I in 1 .. N loop
327
+ if Pos <= Context.Lb then
328
+ return -1;
329
+ end if;
330
+ Val := Character'Pos (Context.P (Pos));
331
+ Pos := Pos - 1;
332
+ if Val >= 16#80# then
333
+ while Pos > Context.Lb loop
334
+ Val := Character'Pos (Context.P (Pos + 1));
335
+ exit when Val >= 16#C0#;
336
+ Pos := Pos - 1;
337
+ end loop;
338
+ end if;
339
+ end loop;
340
+ return Pos;
341
+ end Skip_Utf8_Backward;
342
+
343
+ function Shift_Left (Value : in Utf8_Type;
344
+ Shift : in Natural) return Utf8_Type
345
+ is (Utf8_Type (Interfaces.Shift_Left (Interfaces.Unsigned_32 (Value), Shift)));
346
+
347
+ procedure Get_Utf8 (Context : in Context_Type'Class;
348
+ Value : out Utf8_Type;
349
+ Count : out Natural) is
350
+ B0, B1, B2, B3 : Byte;
351
+ begin
352
+ if Context.C >= Context.L then
353
+ Value := 0;
354
+ Count := 0;
355
+ return;
356
+ end if;
357
+ B0 := Character'Pos (Context.P (Context.C + 1));
358
+ if B0 < 16#C0# or Context.C + 1 >= Context.L then
359
+ Value := Utf8_Type (B0);
360
+ Count := 1;
361
+ return;
362
+ end if;
363
+ B1 := Character'Pos (Context.P (Context.C + 2)) and 16#3F#;
364
+ if B0 < 16#E0# or Context.C + 2 >= Context.L then
365
+ Value := Shift_Left (Utf8_Type (B0 and 16#1F#), 6) or Utf8_Type (B1);
366
+ Count := 2;
367
+ return;
368
+ end if;
369
+ B2 := Character'Pos (Context.P (Context.C + 3)) and 16#3F#;
370
+ if B0 < 16#F0# or Context.C + 3 >= Context.L then
371
+ Value := Shift_Left (Utf8_Type (B0 and 16#0F#), 12)
372
+ or Shift_Left (Utf8_Type (B1), 6) or Utf8_Type (B2);
373
+ Count := 3;
374
+ return;
375
+ end if;
376
+ B3 := Character'Pos (Context.P (Context.C + 4)) and 16#3F#;
377
+ Value := Shift_Left (Utf8_Type (B0 and 16#07#), 18)
378
+ or Shift_Left (Utf8_Type (B1), 12)
379
+ or Shift_Left (Utf8_Type (B2), 6) or Utf8_Type (B3);
380
+ Count := 4;
381
+ end Get_Utf8;
382
+
383
+ procedure Get_Utf8_Backward (Context : in Context_Type'Class;
384
+ Value : out Utf8_Type;
385
+ Count : out Natural) is
386
+ B0, B1, B2, B3 : Byte;
387
+ begin
388
+ if Context.C <= Context.Lb then
389
+ Value := 0;
390
+ Count := 0;
391
+ return;
392
+ end if;
393
+ B3 := Character'Pos (Context.P (Context.C));
394
+ if B3 < 16#80# or Context.C - 1 <= Context.Lb then
395
+ Value := Utf8_Type (B3);
396
+ Count := 1;
397
+ return;
398
+ end if;
399
+ B2 := Character'Pos (Context.P (Context.C - 1));
400
+ if B2 >= 16#C0# or Context.C - 2 <= Context.Lb then
401
+ B3 := B3 and 16#3F#;
402
+ Value := Shift_Left (Utf8_Type (B2 and 16#1F#), 6) or Utf8_Type (B3);
403
+ Count := 2;
404
+ return;
405
+ end if;
406
+ B1 := Character'Pos (Context.P (Context.C - 2));
407
+ if B1 >= 16#E0# or Context.C - 3 <= Context.Lb then
408
+ B3 := B3 and 16#3F#;
409
+ B2 := B2 and 16#3F#;
410
+ Value := Shift_Left (Utf8_Type (B1 and 16#0F#), 12)
411
+ or Shift_Left (Utf8_Type (B2), 6) or Utf8_Type (B3);
412
+ Count := 3;
413
+ return;
414
+ end if;
415
+ B0 := Character'Pos (Context.P (Context.C - 3));
416
+ B1 := B1 and 16#1F#;
417
+ B2 := B2 and 16#3F#;
418
+ B3 := B3 and 16#3F#;
419
+ Value := Shift_Left (Utf8_Type (B0 and 16#07#), 18)
420
+ or Shift_Left (Utf8_Type (B1), 12)
421
+ or Shift_Left (Utf8_Type (B2), 6) or Utf8_Type (B3);
422
+ Count := 4;
423
+ end Get_Utf8_Backward;
424
+
425
+ procedure Out_Grouping (Context : in out Context_Type'Class;
426
+ S : in Grouping_Array;
427
+ Min : in Utf8_Type;
428
+ Max : in Utf8_Type;
429
+ Repeat : in Boolean;
430
+ Result : out Result_Index) is
431
+ Ch : Utf8_Type;
432
+ Count : Natural;
433
+ begin
434
+ if Context.C >= Context.L then
435
+ Result := -1;
436
+ return;
437
+ end if;
438
+
439
+ loop
440
+ Get_Utf8 (Context, Ch, Count);
441
+ if Count = 0 then
442
+ Result := -1;
443
+ return;
444
+ end if;
445
+ if Ch <= Max and Ch >= Min then
446
+ Ch := Ch - Min;
447
+ if S (Ch) then
448
+ Result := Count;
449
+ return;
450
+ end if;
451
+ end if;
452
+ Context.C := Context.C + Count;
453
+ exit when not Repeat;
454
+ end loop;
455
+ Result := 0;
456
+ end Out_Grouping;
457
+
458
+ procedure Out_Grouping_Backward (Context : in out Context_Type'Class;
459
+ S : in Grouping_Array;
460
+ Min : in Utf8_Type;
461
+ Max : in Utf8_Type;
462
+ Repeat : in Boolean;
463
+ Result : out Result_Index) is
464
+ Ch : Utf8_Type;
465
+ Count : Natural;
466
+ begin
467
+ if Context.C = 0 then
468
+ Result := -1;
469
+ return;
470
+ end if;
471
+
472
+ loop
473
+ Get_Utf8_Backward (Context, Ch, Count);
474
+ if Count = 0 then
475
+ Result := -1;
476
+ return;
477
+ end if;
478
+ if Ch <= Max and Ch >= Min then
479
+ Ch := Ch - Min;
480
+ if S (Ch) then
481
+ Result := Count;
482
+ return;
483
+ end if;
484
+ end if;
485
+ Context.C := Context.C - Count;
486
+ exit when not Repeat;
487
+ end loop;
488
+ Result := 0;
489
+ end Out_Grouping_Backward;
490
+
491
+ procedure In_Grouping (Context : in out Context_Type'Class;
492
+ S : in Grouping_Array;
493
+ Min : in Utf8_Type;
494
+ Max : in Utf8_Type;
495
+ Repeat : in Boolean;
496
+ Result : out Result_Index) is
497
+ Ch : Utf8_Type;
498
+ Count : Natural;
499
+ begin
500
+ if Context.C >= Context.L then
501
+ Result := -1;
502
+ return;
503
+ end if;
504
+
505
+ loop
506
+ Get_Utf8 (Context, Ch, Count);
507
+ if Count = 0 then
508
+ Result := -1;
509
+ return;
510
+ end if;
511
+ if Ch > Max or Ch < Min then
512
+ Result := Count;
513
+ return;
514
+ end if;
515
+ Ch := Ch - Min;
516
+ if not S (Ch) then
517
+ Result := Count;
518
+ return;
519
+ end if;
520
+ Context.C := Context.C + Count;
521
+ exit when not Repeat;
522
+ end loop;
523
+ Result := 0;
524
+ end In_Grouping;
525
+
526
+ procedure In_Grouping_Backward (Context : in out Context_Type'Class;
527
+ S : in Grouping_Array;
528
+ Min : in Utf8_Type;
529
+ Max : in Utf8_Type;
530
+ Repeat : in Boolean;
531
+ Result : out Result_Index) is
532
+ Ch : Utf8_Type;
533
+ Count : Natural;
534
+ begin
535
+ if Context.C = 0 then
536
+ Result := -1;
537
+ return;
538
+ end if;
539
+
540
+ loop
541
+ Get_Utf8_Backward (Context, Ch, Count);
542
+ if Count = 0 then
543
+ Result := -1;
544
+ return;
545
+ end if;
546
+ if Ch > Max or Ch < Min then
547
+ Result := Count;
548
+ return;
549
+ end if;
550
+ Ch := Ch - Min;
551
+ if not S (Ch) then
552
+ Result := Count;
553
+ return;
554
+ end if;
555
+ Context.C := Context.C - Count;
556
+ exit when not Repeat;
557
+ end loop;
558
+ Result := 0;
559
+ end In_Grouping_Backward;
560
+
561
+ procedure Replace (Context : in out Context_Type'Class;
562
+ C_Bra : in Char_Index;
563
+ C_Ket : in Char_Index;
564
+ S : in String;
565
+ Adjustment : out Integer) is
566
+ begin
567
+ Adjustment := S'Length - (C_Ket - C_Bra);
568
+ if Adjustment > 0 then
569
+ Context.P (C_Bra + S'Length + 1 .. Context.Lb + Adjustment + 1)
570
+ := Context.P (C_Ket + 1 .. Context.Lb + 1);
571
+ end if;
572
+ if S'Length > 0 then
573
+ Context.P (C_Bra + 1 .. C_Bra + S'Length) := S;
574
+ end if;
575
+ if Adjustment < 0 then
576
+ Context.P (C_Bra + S'Length + 1 .. Context.L + Adjustment + 1)
577
+ := Context.P (C_Ket + 1 .. Context.L + 1);
578
+ end if;
579
+ Context.L := Context.L + Adjustment;
580
+ if Context.C >= C_Ket then
581
+ Context.C := Context.C + Adjustment;
582
+ elsif Context.C > C_Bra then
583
+ Context.C := C_Bra;
584
+ end if;
585
+ end Replace;
586
+
587
+ procedure Slice_Del (Context : in out Context_Type'Class) is
588
+ Result : Integer;
589
+ begin
590
+ Replace (Context, Context.Bra, Context.Ket, "", Result);
591
+ end Slice_Del;
592
+
593
+ procedure Slice_From (Context : in out Context_Type'Class;
594
+ Text : in String) is
595
+ Result : Integer;
596
+ begin
597
+ Replace (Context, Context.Bra, Context.Ket, Text, Result);
598
+ end Slice_From;
599
+
600
+ function Slice_To (Context : in Context_Type'Class) return String is
601
+ begin
602
+ return Context.P (Context.Bra + 1 .. Context.Ket);
603
+ end Slice_To;
604
+
605
+ procedure Insert (Context : in out Context_Type'Class;
606
+ C_Bra : in Char_Index;
607
+ C_Ket : in Char_Index;
608
+ S : in String) is
609
+ Result : Integer;
610
+ begin
611
+ Replace (Context, C_Bra, C_Ket, S, Result);
612
+ if C_Bra <= Context.Bra then
613
+ Context.Bra := Context.Bra + Result;
614
+ end if;
615
+ if C_Bra <= Context.Ket then
616
+ Context.Ket := Context.Ket + Result;
617
+ end if;
618
+ end Insert;
619
+
620
+ end Stemmer;