mittens 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/Gemfile +7 -0
- data/LICENSE.txt +30 -0
- data/README.md +62 -0
- data/Rakefile +21 -0
- data/ext/mittens/ext.c +96 -0
- data/ext/mittens/extconf.rb +12 -0
- data/lib/mittens/version.rb +3 -0
- data/lib/mittens.rb +7 -0
- data/mittens.gemspec +22 -0
- data/vendor/snowball/.gitignore +26 -0
- data/vendor/snowball/.travis.yml +112 -0
- data/vendor/snowball/AUTHORS +27 -0
- data/vendor/snowball/CONTRIBUTING.rst +216 -0
- data/vendor/snowball/COPYING +29 -0
- data/vendor/snowball/GNUmakefile +742 -0
- data/vendor/snowball/NEWS +754 -0
- data/vendor/snowball/README.rst +37 -0
- data/vendor/snowball/ada/README.md +74 -0
- data/vendor/snowball/ada/generate/generate.adb +83 -0
- data/vendor/snowball/ada/generate.gpr +21 -0
- data/vendor/snowball/ada/src/stemmer.adb +620 -0
- data/vendor/snowball/ada/src/stemmer.ads +219 -0
- data/vendor/snowball/ada/src/stemwords.adb +70 -0
- data/vendor/snowball/ada/stemmer_config.gpr +83 -0
- data/vendor/snowball/ada/stemwords.gpr +21 -0
- data/vendor/snowball/algorithms/arabic.sbl +558 -0
- data/vendor/snowball/algorithms/armenian.sbl +301 -0
- data/vendor/snowball/algorithms/basque.sbl +149 -0
- data/vendor/snowball/algorithms/catalan.sbl +202 -0
- data/vendor/snowball/algorithms/danish.sbl +93 -0
- data/vendor/snowball/algorithms/dutch.sbl +164 -0
- data/vendor/snowball/algorithms/english.sbl +229 -0
- data/vendor/snowball/algorithms/finnish.sbl +197 -0
- data/vendor/snowball/algorithms/french.sbl +254 -0
- data/vendor/snowball/algorithms/german.sbl +139 -0
- data/vendor/snowball/algorithms/german2.sbl +145 -0
- data/vendor/snowball/algorithms/greek.sbl +701 -0
- data/vendor/snowball/algorithms/hindi.sbl +323 -0
- data/vendor/snowball/algorithms/hungarian.sbl +241 -0
- data/vendor/snowball/algorithms/indonesian.sbl +192 -0
- data/vendor/snowball/algorithms/irish.sbl +149 -0
- data/vendor/snowball/algorithms/italian.sbl +202 -0
- data/vendor/snowball/algorithms/kraaij_pohlmann.sbl +240 -0
- data/vendor/snowball/algorithms/lithuanian.sbl +373 -0
- data/vendor/snowball/algorithms/lovins.sbl +208 -0
- data/vendor/snowball/algorithms/nepali.sbl +92 -0
- data/vendor/snowball/algorithms/norwegian.sbl +80 -0
- data/vendor/snowball/algorithms/porter.sbl +139 -0
- data/vendor/snowball/algorithms/portuguese.sbl +218 -0
- data/vendor/snowball/algorithms/romanian.sbl +236 -0
- data/vendor/snowball/algorithms/russian.sbl +221 -0
- data/vendor/snowball/algorithms/serbian.sbl +2379 -0
- data/vendor/snowball/algorithms/spanish.sbl +230 -0
- data/vendor/snowball/algorithms/swedish.sbl +72 -0
- data/vendor/snowball/algorithms/tamil.sbl +405 -0
- data/vendor/snowball/algorithms/turkish.sbl +470 -0
- data/vendor/snowball/algorithms/yiddish.sbl +460 -0
- data/vendor/snowball/charsets/ISO-8859-2.sbl +98 -0
- data/vendor/snowball/charsets/KOI8-R.sbl +74 -0
- data/vendor/snowball/charsets/cp850.sbl +130 -0
- data/vendor/snowball/compiler/analyser.c +1547 -0
- data/vendor/snowball/compiler/driver.c +615 -0
- data/vendor/snowball/compiler/generator.c +1748 -0
- data/vendor/snowball/compiler/generator_ada.c +1702 -0
- data/vendor/snowball/compiler/generator_csharp.c +1322 -0
- data/vendor/snowball/compiler/generator_go.c +1278 -0
- data/vendor/snowball/compiler/generator_java.c +1313 -0
- data/vendor/snowball/compiler/generator_js.c +1316 -0
- data/vendor/snowball/compiler/generator_pascal.c +1387 -0
- data/vendor/snowball/compiler/generator_python.c +1337 -0
- data/vendor/snowball/compiler/generator_rust.c +1295 -0
- data/vendor/snowball/compiler/header.h +418 -0
- data/vendor/snowball/compiler/space.c +286 -0
- data/vendor/snowball/compiler/syswords.h +86 -0
- data/vendor/snowball/compiler/syswords2.h +13 -0
- data/vendor/snowball/compiler/tokeniser.c +567 -0
- data/vendor/snowball/csharp/.gitignore +8 -0
- data/vendor/snowball/csharp/Snowball/Algorithms/.gitignore +1 -0
- data/vendor/snowball/csharp/Snowball/Among.cs +108 -0
- data/vendor/snowball/csharp/Snowball/AssemblyInfo.cs +36 -0
- data/vendor/snowball/csharp/Snowball/Stemmer.cs +660 -0
- data/vendor/snowball/csharp/Stemwords/App.config +6 -0
- data/vendor/snowball/csharp/Stemwords/Program.cs +114 -0
- data/vendor/snowball/doc/TODO +12 -0
- data/vendor/snowball/doc/libstemmer_c_README +148 -0
- data/vendor/snowball/doc/libstemmer_csharp_README +53 -0
- data/vendor/snowball/doc/libstemmer_java_README +67 -0
- data/vendor/snowball/doc/libstemmer_js_README +48 -0
- data/vendor/snowball/doc/libstemmer_python_README +113 -0
- data/vendor/snowball/examples/stemwords.c +204 -0
- data/vendor/snowball/go/README.md +55 -0
- data/vendor/snowball/go/among.go +16 -0
- data/vendor/snowball/go/env.go +403 -0
- data/vendor/snowball/go/stemwords/generate.go +68 -0
- data/vendor/snowball/go/stemwords/main.go +68 -0
- data/vendor/snowball/go/util.go +34 -0
- data/vendor/snowball/iconv.py +50 -0
- data/vendor/snowball/include/libstemmer.h +78 -0
- data/vendor/snowball/java/org/tartarus/snowball/Among.java +29 -0
- data/vendor/snowball/java/org/tartarus/snowball/SnowballProgram.java +381 -0
- data/vendor/snowball/java/org/tartarus/snowball/SnowballStemmer.java +8 -0
- data/vendor/snowball/java/org/tartarus/snowball/TestApp.java +75 -0
- data/vendor/snowball/javascript/base-stemmer.js +294 -0
- data/vendor/snowball/javascript/stemwords.js +106 -0
- data/vendor/snowball/libstemmer/libstemmer_c.in +96 -0
- data/vendor/snowball/libstemmer/mkalgorithms.pl +90 -0
- data/vendor/snowball/libstemmer/mkmodules.pl +267 -0
- data/vendor/snowball/libstemmer/modules.txt +63 -0
- data/vendor/snowball/libstemmer/test.c +34 -0
- data/vendor/snowball/pascal/.gitignore +4 -0
- data/vendor/snowball/pascal/SnowballProgram.pas +430 -0
- data/vendor/snowball/pascal/generate.pl +23 -0
- data/vendor/snowball/pascal/stemwords-template.dpr +78 -0
- data/vendor/snowball/python/MANIFEST.in +7 -0
- data/vendor/snowball/python/create_init.py +54 -0
- data/vendor/snowball/python/setup.cfg +6 -0
- data/vendor/snowball/python/setup.py +81 -0
- data/vendor/snowball/python/snowballstemmer/among.py +13 -0
- data/vendor/snowball/python/snowballstemmer/basestemmer.py +323 -0
- data/vendor/snowball/python/stemwords.py +101 -0
- data/vendor/snowball/python/testapp.py +28 -0
- data/vendor/snowball/runtime/api.c +58 -0
- data/vendor/snowball/runtime/api.h +32 -0
- data/vendor/snowball/runtime/header.h +61 -0
- data/vendor/snowball/runtime/utilities.c +513 -0
- data/vendor/snowball/rust/Cargo.toml +7 -0
- data/vendor/snowball/rust/build.rs +55 -0
- data/vendor/snowball/rust/rust-pre-1.27-compat.patch +30 -0
- data/vendor/snowball/rust/src/main.rs +102 -0
- data/vendor/snowball/rust/src/snowball/algorithms/mod.rs +2 -0
- data/vendor/snowball/rust/src/snowball/among.rs +6 -0
- data/vendor/snowball/rust/src/snowball/mod.rs +6 -0
- data/vendor/snowball/rust/src/snowball/snowball_env.rs +421 -0
- data/vendor/snowball/tests/stemtest.c +95 -0
- metadata +178 -0
@@ -0,0 +1,219 @@
|
|
1
|
+
-----------------------------------------------------------------------
|
2
|
+
-- stemmer -- Multi-language stemmer with Snowball generator
|
3
|
+
-- Written by Stephane Carrez (Stephane.Carrez@gmail.com)
|
4
|
+
-- All rights reserved.
|
5
|
+
--
|
6
|
+
-- Redistribution and use in source and binary forms, with or without
|
7
|
+
-- modification, are permitted provided that the following conditions
|
8
|
+
-- are met:
|
9
|
+
--
|
10
|
+
-- 1. Redistributions of source code must retain the above copyright notice,
|
11
|
+
-- this list of conditions and the following disclaimer.
|
12
|
+
-- 2. Redistributions in binary form must reproduce the above copyright notice,
|
13
|
+
-- this list of conditions and the following disclaimer in the documentation
|
14
|
+
-- and/or other materials provided with the distribution.
|
15
|
+
-- 3. Neither the name of the Snowball project nor the names of its contributors
|
16
|
+
-- may be used to endorse or promote products derived from this software
|
17
|
+
-- without specific prior written permission.
|
18
|
+
--
|
19
|
+
-- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
20
|
+
-- ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
21
|
+
-- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
22
|
+
-- DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
23
|
+
-- ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
24
|
+
-- (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
25
|
+
-- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
26
|
+
-- ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
27
|
+
-- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
28
|
+
-- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
29
|
+
-----------------------------------------------------------------------
|
30
|
+
package Stemmer with SPARK_Mode is
|
31
|
+
|
32
|
+
pragma Preelaborate;
|
33
|
+
|
34
|
+
WORD_MAX_LENGTH : constant := 1024;
|
35
|
+
|
36
|
+
type Context_Type is abstract tagged private;
|
37
|
+
|
38
|
+
-- Apply the stemming algorithm on the word initialized in the context.
|
39
|
+
procedure Stem (Context : in out Context_Type;
|
40
|
+
Result : out Boolean) is abstract;
|
41
|
+
|
42
|
+
-- Stem the word and return True if it was reduced.
|
43
|
+
procedure Stem_Word (Context : in out Context_Type'Class;
|
44
|
+
Word : in String;
|
45
|
+
Result : out Boolean) with
|
46
|
+
Global => null,
|
47
|
+
Pre => Word'Length < WORD_MAX_LENGTH;
|
48
|
+
|
49
|
+
-- Get the stem or the input word unmodified.
|
50
|
+
function Get_Result (Context : in Context_Type'Class) return String with
|
51
|
+
Global => null,
|
52
|
+
Post => Get_Result'Result'Length < WORD_MAX_LENGTH;
|
53
|
+
|
54
|
+
private
|
55
|
+
|
56
|
+
type Mask_Type is mod 2**32;
|
57
|
+
|
58
|
+
-- A 32-bit character value that was read from UTF-8 sequence.
|
59
|
+
-- A modular value is used because shift and logical arithmetic is necessary.
|
60
|
+
type Utf8_Type is mod 2**32;
|
61
|
+
|
62
|
+
-- Index of the Grouping_Array. The index comes from the 32-bit character value
|
63
|
+
-- minus a starting offset. We don't expect large tables and we check against
|
64
|
+
-- a maximum value.
|
65
|
+
subtype Grouping_Index is Utf8_Type range 0 .. 16384;
|
66
|
+
|
67
|
+
type Grouping_Array is array (Grouping_Index range <>) of Boolean with Pack;
|
68
|
+
|
69
|
+
subtype Among_Index is Natural range 0 .. 65535;
|
70
|
+
subtype Among_Start_Index is Among_Index range 1 .. Among_Index'Last;
|
71
|
+
subtype Operation_Index is Natural range 0 .. 65535;
|
72
|
+
subtype Result_Index is Integer range -1 .. WORD_MAX_LENGTH - 1;
|
73
|
+
subtype Char_Index is Result_Index range 0 .. Result_Index'Last;
|
74
|
+
|
75
|
+
type Among_Type is record
|
76
|
+
First : Among_Start_Index;
|
77
|
+
Last : Among_Index;
|
78
|
+
Substring_I : Integer;
|
79
|
+
Result : Integer;
|
80
|
+
Operation : Operation_Index;
|
81
|
+
end record;
|
82
|
+
|
83
|
+
type Among_Array_Type is array (Natural range <>) of Among_Type;
|
84
|
+
|
85
|
+
function Eq_S (Context : in Context_Type'Class;
|
86
|
+
S : in String) return Char_Index with
|
87
|
+
Global => null,
|
88
|
+
Pre => S'Length > 0,
|
89
|
+
Post => Eq_S'Result = 0 or Eq_S'Result = S'Length;
|
90
|
+
|
91
|
+
function Eq_S_Backward (Context : in Context_Type'Class;
|
92
|
+
S : in String) return Char_Index with
|
93
|
+
Global => null,
|
94
|
+
Pre => S'Length > 0,
|
95
|
+
Post => Eq_S_Backward'Result = 0 or Eq_S_Backward'Result = S'Length;
|
96
|
+
|
97
|
+
procedure Find_Among (Context : in out Context_Type'Class;
|
98
|
+
Amongs : in Among_Array_Type;
|
99
|
+
Pattern : in String;
|
100
|
+
Execute : access procedure
|
101
|
+
(Ctx : in out Context_Type'Class;
|
102
|
+
Operation : in Operation_Index;
|
103
|
+
Status : out Boolean);
|
104
|
+
Result : out Integer) with
|
105
|
+
Global => null,
|
106
|
+
Pre => Pattern'Length > 0 and Amongs'Length > 0;
|
107
|
+
|
108
|
+
procedure Find_Among_Backward (Context : in out Context_Type'Class;
|
109
|
+
Amongs : in Among_Array_Type;
|
110
|
+
Pattern : in String;
|
111
|
+
Execute : access procedure
|
112
|
+
(Ctx : in out Context_Type'Class;
|
113
|
+
Operation : in Operation_Index;
|
114
|
+
Status : out Boolean);
|
115
|
+
Result : out Integer) with
|
116
|
+
Global => null,
|
117
|
+
Pre => Pattern'Length > 0 and Amongs'Length > 0;
|
118
|
+
|
119
|
+
function Skip_Utf8 (Context : in Context_Type'Class) return Result_Index with
|
120
|
+
Global => null;
|
121
|
+
|
122
|
+
function Skip_Utf8 (Context : in Context_Type'Class;
|
123
|
+
N : in Integer) return Result_Index with
|
124
|
+
Global => null;
|
125
|
+
|
126
|
+
function Skip_Utf8_Backward (Context : in Context_Type'Class) return Result_Index with
|
127
|
+
Global => null;
|
128
|
+
|
129
|
+
function Skip_Utf8_Backward (Context : in Context_Type'Class;
|
130
|
+
N : in Integer) return Result_Index with
|
131
|
+
Global => null;
|
132
|
+
|
133
|
+
procedure Get_Utf8 (Context : in Context_Type'Class;
|
134
|
+
Value : out Utf8_Type;
|
135
|
+
Count : out Natural);
|
136
|
+
|
137
|
+
procedure Get_Utf8_Backward (Context : in Context_Type'Class;
|
138
|
+
Value : out Utf8_Type;
|
139
|
+
Count : out Natural);
|
140
|
+
|
141
|
+
function Length (Context : in Context_Type'Class) return Natural;
|
142
|
+
|
143
|
+
function Length_Utf8 (Context : in Context_Type'Class) return Natural;
|
144
|
+
|
145
|
+
function Check_Among (Context : in Context_Type'Class;
|
146
|
+
Pos : in Char_Index;
|
147
|
+
Shift : in Natural;
|
148
|
+
Mask : in Mask_Type) return Boolean;
|
149
|
+
|
150
|
+
procedure Out_Grouping (Context : in out Context_Type'Class;
|
151
|
+
S : in Grouping_Array;
|
152
|
+
Min : in Utf8_Type;
|
153
|
+
Max : in Utf8_Type;
|
154
|
+
Repeat : in Boolean;
|
155
|
+
Result : out Result_Index);
|
156
|
+
|
157
|
+
procedure Out_Grouping_Backward (Context : in out Context_Type'Class;
|
158
|
+
S : in Grouping_Array;
|
159
|
+
Min : in Utf8_Type;
|
160
|
+
Max : in Utf8_Type;
|
161
|
+
Repeat : in Boolean;
|
162
|
+
Result : out Result_Index);
|
163
|
+
|
164
|
+
procedure In_Grouping (Context : in out Context_Type'Class;
|
165
|
+
S : in Grouping_Array;
|
166
|
+
Min : in Utf8_Type;
|
167
|
+
Max : in Utf8_Type;
|
168
|
+
Repeat : in Boolean;
|
169
|
+
Result : out Result_Index);
|
170
|
+
|
171
|
+
procedure In_Grouping_Backward (Context : in out Context_Type'Class;
|
172
|
+
S : in Grouping_Array;
|
173
|
+
Min : in Utf8_Type;
|
174
|
+
Max : in Utf8_Type;
|
175
|
+
Repeat : in Boolean;
|
176
|
+
Result : out Result_Index);
|
177
|
+
|
178
|
+
procedure Replace (Context : in out Context_Type'Class;
|
179
|
+
C_Bra : in Char_Index;
|
180
|
+
C_Ket : in Char_Index;
|
181
|
+
S : in String;
|
182
|
+
Adjustment : out Integer) with
|
183
|
+
Global => null,
|
184
|
+
Pre => C_Bra >= Context.Lb and C_Ket >= C_Bra and C_Ket <= Context.L;
|
185
|
+
|
186
|
+
procedure Slice_Del (Context : in out Context_Type'Class) with
|
187
|
+
Global => null,
|
188
|
+
Pre => Context.Bra >= Context.Lb and Context.Ket >= Context.Bra
|
189
|
+
and Context.Ket <= Context.L;
|
190
|
+
|
191
|
+
procedure Slice_From (Context : in out Context_Type'Class;
|
192
|
+
Text : in String) with
|
193
|
+
Global => null,
|
194
|
+
Pre => Context.Bra >= Context.Lb and Context.Ket >= Context.Bra
|
195
|
+
and Context.Ket <= Context.L
|
196
|
+
and Context.L - Context.Lb + Text'Length + Context.Ket - Context.Bra < Context.P'Length;
|
197
|
+
|
198
|
+
function Slice_To (Context : in Context_Type'Class) return String;
|
199
|
+
|
200
|
+
procedure Insert (Context : in out Context_Type'Class;
|
201
|
+
C_Bra : in Char_Index;
|
202
|
+
C_Ket : in Char_Index;
|
203
|
+
S : in String) with
|
204
|
+
Global => null,
|
205
|
+
Pre => C_Bra >= Context.Lb and C_Ket >= C_Bra and C_Ket <= Context.L;
|
206
|
+
|
207
|
+
-- The context indexes follow the C paradigm: they start at 0 for the first character.
|
208
|
+
-- This is necessary because several algorithms rely on this when they compare the
|
209
|
+
-- cursor position ('C') or setup some markers from the cursor.
|
210
|
+
type Context_Type is abstract tagged record
|
211
|
+
C : Char_Index := 0;
|
212
|
+
L : Char_Index := 0;
|
213
|
+
Lb : Char_Index := 0;
|
214
|
+
Bra : Char_Index := 0;
|
215
|
+
Ket : Char_Index := 0;
|
216
|
+
P : String (1 .. WORD_MAX_LENGTH);
|
217
|
+
end record;
|
218
|
+
|
219
|
+
end Stemmer;
|
@@ -0,0 +1,70 @@
|
|
1
|
+
with Ada.Text_IO;
|
2
|
+
with Ada.Command_Line;
|
3
|
+
with Stemmer.Factory;
|
4
|
+
procedure Stemwords is
|
5
|
+
|
6
|
+
use Stemmer.Factory;
|
7
|
+
|
8
|
+
function Get_Language (Name : in String) return Language_Type;
|
9
|
+
function Is_Space (C : in Character) return Boolean;
|
10
|
+
|
11
|
+
function Is_Space (C : in Character) return Boolean is
|
12
|
+
begin
|
13
|
+
return C = ' ' or C = ASCII.HT;
|
14
|
+
end Is_Space;
|
15
|
+
|
16
|
+
function Get_Language (Name : in String) return Language_Type is
|
17
|
+
begin
|
18
|
+
return Language_Type'Value ("L_" & Name);
|
19
|
+
|
20
|
+
exception
|
21
|
+
when Constraint_Error =>
|
22
|
+
Ada.Text_IO.Put_Line ("Unsupported language: " & Name);
|
23
|
+
return L_ENGLISH;
|
24
|
+
|
25
|
+
end Get_Language;
|
26
|
+
|
27
|
+
Count : constant Natural := Ada.Command_Line.Argument_Count;
|
28
|
+
begin
|
29
|
+
if Count /= 3 then
|
30
|
+
Ada.Text_IO.Put_Line ("Usage: stemwords <language> <input file> <output file>");
|
31
|
+
return;
|
32
|
+
end if;
|
33
|
+
declare
|
34
|
+
Lang : constant Language_Type := Get_Language (Ada.Command_Line.Argument (1));
|
35
|
+
Input : constant String := Ada.Command_Line.Argument (2);
|
36
|
+
Output : constant String := Ada.Command_Line.Argument (3);
|
37
|
+
Src_File : Ada.Text_IO.File_Type;
|
38
|
+
Dst_File : Ada.Text_IO.File_Type;
|
39
|
+
begin
|
40
|
+
Ada.Text_IO.Open (Src_File, Ada.Text_IO.In_File, Input);
|
41
|
+
Ada.Text_IO.Create (Dst_File, Ada.Text_IO.Out_File, Output);
|
42
|
+
while not Ada.Text_IO.End_Of_File (Src_File) loop
|
43
|
+
declare
|
44
|
+
Line : constant String := Ada.Text_IO.Get_Line (Src_File);
|
45
|
+
Pos : Positive := Line'First;
|
46
|
+
Last_Pos : Positive;
|
47
|
+
Start_Pos : Positive;
|
48
|
+
begin
|
49
|
+
while Pos <= Line'Last loop
|
50
|
+
Last_Pos := Pos;
|
51
|
+
while Pos <= Line'Last and then Is_Space (Line (Pos)) loop
|
52
|
+
Pos := Pos + 1;
|
53
|
+
end loop;
|
54
|
+
if Last_Pos < Pos then
|
55
|
+
Ada.Text_IO.Put (Dst_File, Line (Last_Pos .. Pos - 1));
|
56
|
+
end if;
|
57
|
+
exit when Pos > Line'Last;
|
58
|
+
Start_Pos := Pos;
|
59
|
+
while Pos <= Line'Last and then not Is_Space (Line (Pos)) loop
|
60
|
+
Pos := Pos + 1;
|
61
|
+
end loop;
|
62
|
+
Ada.Text_IO.Put (Dst_File, Stemmer.Factory.Stem (Lang, Line (Start_Pos .. Pos - 1)));
|
63
|
+
end loop;
|
64
|
+
Ada.Text_IO.New_Line (Dst_File);
|
65
|
+
end;
|
66
|
+
end loop;
|
67
|
+
Ada.Text_IO.Close (Src_File);
|
68
|
+
Ada.Text_IO.Close (Dst_File);
|
69
|
+
end;
|
70
|
+
end Stemwords;
|
@@ -0,0 +1,83 @@
|
|
1
|
+
abstract project Stemmer_Config is
|
2
|
+
for Source_Dirs use ();
|
3
|
+
|
4
|
+
type Yes_No is ("yes", "no");
|
5
|
+
|
6
|
+
type Library_Type_Type is ("relocatable", "static", "static-pic");
|
7
|
+
|
8
|
+
type Build_Type is ("distrib", "debug", "optimize", "profile", "coverage");
|
9
|
+
Mode : Build_Type := external ("BUILD", "distrib");
|
10
|
+
|
11
|
+
Processors := External ("PROCESSORS", "1");
|
12
|
+
|
13
|
+
package Builder is
|
14
|
+
case Mode is
|
15
|
+
when "debug" =>
|
16
|
+
for Default_Switches ("Ada") use ("-g", "-j" & Processors);
|
17
|
+
when others =>
|
18
|
+
for Default_Switches ("Ada") use ("-g", "-O3", "-j" & Processors);
|
19
|
+
end case;
|
20
|
+
end Builder;
|
21
|
+
|
22
|
+
package compiler is
|
23
|
+
warnings := ("-gnatwua");
|
24
|
+
defaults := ("-gnat2012");
|
25
|
+
case Mode is
|
26
|
+
when "distrib" =>
|
27
|
+
for Default_Switches ("Ada") use defaults & ("-gnatafno", "-gnatVa", "-gnatwa");
|
28
|
+
|
29
|
+
when "debug" =>
|
30
|
+
for Default_Switches ("Ada") use defaults & warnings
|
31
|
+
& ("-gnata", "-gnatVaMI", "-gnaty3abcefhiklmnprstxM99");
|
32
|
+
|
33
|
+
when "coverage" =>
|
34
|
+
for Default_Switches ("Ada") use defaults & warnings
|
35
|
+
& ("-gnata", "-gnatVaMI", "-gnaty3abcefhiklmnprstxM99",
|
36
|
+
"-fprofile-arcs", "-ftest-coverage");
|
37
|
+
|
38
|
+
when "optimize" =>
|
39
|
+
for Default_Switches ("Ada") use defaults & warnings
|
40
|
+
& ("-gnatn", "-gnatp", "-fdata-sections", "-ffunction-sections");
|
41
|
+
|
42
|
+
when "profile" =>
|
43
|
+
for Default_Switches ("Ada") use defaults & warnings & ("-pg");
|
44
|
+
|
45
|
+
end case;
|
46
|
+
end compiler;
|
47
|
+
|
48
|
+
package binder is
|
49
|
+
case Mode is
|
50
|
+
when "debug" =>
|
51
|
+
for Default_Switches ("Ada") use ("-E");
|
52
|
+
|
53
|
+
when others =>
|
54
|
+
for Default_Switches ("Ada") use ("-E");
|
55
|
+
|
56
|
+
end case;
|
57
|
+
end binder;
|
58
|
+
|
59
|
+
package linker is
|
60
|
+
case Mode is
|
61
|
+
when "profile" =>
|
62
|
+
for Default_Switches ("Ada") use ("-pg");
|
63
|
+
|
64
|
+
when "distrib" =>
|
65
|
+
for Default_Switches ("Ada") use ("-s");
|
66
|
+
|
67
|
+
when "optimize" =>
|
68
|
+
for Default_Switches ("Ada") use ("-Wl,--gc-sections");
|
69
|
+
|
70
|
+
when "coverage" =>
|
71
|
+
for Default_Switches ("ada") use ("-fprofile-arcs");
|
72
|
+
|
73
|
+
when others =>
|
74
|
+
null;
|
75
|
+
end case;
|
76
|
+
|
77
|
+
end linker;
|
78
|
+
|
79
|
+
package Ide is
|
80
|
+
for VCS_Kind use "git";
|
81
|
+
end Ide;
|
82
|
+
|
83
|
+
end Stemmer_Config;
|
@@ -0,0 +1,21 @@
|
|
1
|
+
with "stemmer_config";
|
2
|
+
project Stemwords is
|
3
|
+
|
4
|
+
Mains := ("stemwords.adb");
|
5
|
+
|
6
|
+
for Main use Mains;
|
7
|
+
|
8
|
+
for Source_Dirs use ("src", "algorithms");
|
9
|
+
|
10
|
+
for Object_Dir use "./" & Stemmer_Config'Object_Dir & "/obj";
|
11
|
+
for Exec_Dir use "./" & Stemmer_Config'Exec_Dir & "/bin";
|
12
|
+
|
13
|
+
package Binder renames Stemmer_Config.Binder;
|
14
|
+
|
15
|
+
package Builder renames Stemmer_Config.Builder;
|
16
|
+
|
17
|
+
package Compiler renames Stemmer_Config.Compiler;
|
18
|
+
|
19
|
+
package Linker renames Stemmer_Config.Linker;
|
20
|
+
|
21
|
+
end Stemwords;
|