mittens 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/Gemfile +7 -0
- data/LICENSE.txt +30 -0
- data/README.md +62 -0
- data/Rakefile +21 -0
- data/ext/mittens/ext.c +96 -0
- data/ext/mittens/extconf.rb +12 -0
- data/lib/mittens/version.rb +3 -0
- data/lib/mittens.rb +7 -0
- data/mittens.gemspec +22 -0
- data/vendor/snowball/.gitignore +26 -0
- data/vendor/snowball/.travis.yml +112 -0
- data/vendor/snowball/AUTHORS +27 -0
- data/vendor/snowball/CONTRIBUTING.rst +216 -0
- data/vendor/snowball/COPYING +29 -0
- data/vendor/snowball/GNUmakefile +742 -0
- data/vendor/snowball/NEWS +754 -0
- data/vendor/snowball/README.rst +37 -0
- data/vendor/snowball/ada/README.md +74 -0
- data/vendor/snowball/ada/generate/generate.adb +83 -0
- data/vendor/snowball/ada/generate.gpr +21 -0
- data/vendor/snowball/ada/src/stemmer.adb +620 -0
- data/vendor/snowball/ada/src/stemmer.ads +219 -0
- data/vendor/snowball/ada/src/stemwords.adb +70 -0
- data/vendor/snowball/ada/stemmer_config.gpr +83 -0
- data/vendor/snowball/ada/stemwords.gpr +21 -0
- data/vendor/snowball/algorithms/arabic.sbl +558 -0
- data/vendor/snowball/algorithms/armenian.sbl +301 -0
- data/vendor/snowball/algorithms/basque.sbl +149 -0
- data/vendor/snowball/algorithms/catalan.sbl +202 -0
- data/vendor/snowball/algorithms/danish.sbl +93 -0
- data/vendor/snowball/algorithms/dutch.sbl +164 -0
- data/vendor/snowball/algorithms/english.sbl +229 -0
- data/vendor/snowball/algorithms/finnish.sbl +197 -0
- data/vendor/snowball/algorithms/french.sbl +254 -0
- data/vendor/snowball/algorithms/german.sbl +139 -0
- data/vendor/snowball/algorithms/german2.sbl +145 -0
- data/vendor/snowball/algorithms/greek.sbl +701 -0
- data/vendor/snowball/algorithms/hindi.sbl +323 -0
- data/vendor/snowball/algorithms/hungarian.sbl +241 -0
- data/vendor/snowball/algorithms/indonesian.sbl +192 -0
- data/vendor/snowball/algorithms/irish.sbl +149 -0
- data/vendor/snowball/algorithms/italian.sbl +202 -0
- data/vendor/snowball/algorithms/kraaij_pohlmann.sbl +240 -0
- data/vendor/snowball/algorithms/lithuanian.sbl +373 -0
- data/vendor/snowball/algorithms/lovins.sbl +208 -0
- data/vendor/snowball/algorithms/nepali.sbl +92 -0
- data/vendor/snowball/algorithms/norwegian.sbl +80 -0
- data/vendor/snowball/algorithms/porter.sbl +139 -0
- data/vendor/snowball/algorithms/portuguese.sbl +218 -0
- data/vendor/snowball/algorithms/romanian.sbl +236 -0
- data/vendor/snowball/algorithms/russian.sbl +221 -0
- data/vendor/snowball/algorithms/serbian.sbl +2379 -0
- data/vendor/snowball/algorithms/spanish.sbl +230 -0
- data/vendor/snowball/algorithms/swedish.sbl +72 -0
- data/vendor/snowball/algorithms/tamil.sbl +405 -0
- data/vendor/snowball/algorithms/turkish.sbl +470 -0
- data/vendor/snowball/algorithms/yiddish.sbl +460 -0
- data/vendor/snowball/charsets/ISO-8859-2.sbl +98 -0
- data/vendor/snowball/charsets/KOI8-R.sbl +74 -0
- data/vendor/snowball/charsets/cp850.sbl +130 -0
- data/vendor/snowball/compiler/analyser.c +1547 -0
- data/vendor/snowball/compiler/driver.c +615 -0
- data/vendor/snowball/compiler/generator.c +1748 -0
- data/vendor/snowball/compiler/generator_ada.c +1702 -0
- data/vendor/snowball/compiler/generator_csharp.c +1322 -0
- data/vendor/snowball/compiler/generator_go.c +1278 -0
- data/vendor/snowball/compiler/generator_java.c +1313 -0
- data/vendor/snowball/compiler/generator_js.c +1316 -0
- data/vendor/snowball/compiler/generator_pascal.c +1387 -0
- data/vendor/snowball/compiler/generator_python.c +1337 -0
- data/vendor/snowball/compiler/generator_rust.c +1295 -0
- data/vendor/snowball/compiler/header.h +418 -0
- data/vendor/snowball/compiler/space.c +286 -0
- data/vendor/snowball/compiler/syswords.h +86 -0
- data/vendor/snowball/compiler/syswords2.h +13 -0
- data/vendor/snowball/compiler/tokeniser.c +567 -0
- data/vendor/snowball/csharp/.gitignore +8 -0
- data/vendor/snowball/csharp/Snowball/Algorithms/.gitignore +1 -0
- data/vendor/snowball/csharp/Snowball/Among.cs +108 -0
- data/vendor/snowball/csharp/Snowball/AssemblyInfo.cs +36 -0
- data/vendor/snowball/csharp/Snowball/Stemmer.cs +660 -0
- data/vendor/snowball/csharp/Stemwords/App.config +6 -0
- data/vendor/snowball/csharp/Stemwords/Program.cs +114 -0
- data/vendor/snowball/doc/TODO +12 -0
- data/vendor/snowball/doc/libstemmer_c_README +148 -0
- data/vendor/snowball/doc/libstemmer_csharp_README +53 -0
- data/vendor/snowball/doc/libstemmer_java_README +67 -0
- data/vendor/snowball/doc/libstemmer_js_README +48 -0
- data/vendor/snowball/doc/libstemmer_python_README +113 -0
- data/vendor/snowball/examples/stemwords.c +204 -0
- data/vendor/snowball/go/README.md +55 -0
- data/vendor/snowball/go/among.go +16 -0
- data/vendor/snowball/go/env.go +403 -0
- data/vendor/snowball/go/stemwords/generate.go +68 -0
- data/vendor/snowball/go/stemwords/main.go +68 -0
- data/vendor/snowball/go/util.go +34 -0
- data/vendor/snowball/iconv.py +50 -0
- data/vendor/snowball/include/libstemmer.h +78 -0
- data/vendor/snowball/java/org/tartarus/snowball/Among.java +29 -0
- data/vendor/snowball/java/org/tartarus/snowball/SnowballProgram.java +381 -0
- data/vendor/snowball/java/org/tartarus/snowball/SnowballStemmer.java +8 -0
- data/vendor/snowball/java/org/tartarus/snowball/TestApp.java +75 -0
- data/vendor/snowball/javascript/base-stemmer.js +294 -0
- data/vendor/snowball/javascript/stemwords.js +106 -0
- data/vendor/snowball/libstemmer/libstemmer_c.in +96 -0
- data/vendor/snowball/libstemmer/mkalgorithms.pl +90 -0
- data/vendor/snowball/libstemmer/mkmodules.pl +267 -0
- data/vendor/snowball/libstemmer/modules.txt +63 -0
- data/vendor/snowball/libstemmer/test.c +34 -0
- data/vendor/snowball/pascal/.gitignore +4 -0
- data/vendor/snowball/pascal/SnowballProgram.pas +430 -0
- data/vendor/snowball/pascal/generate.pl +23 -0
- data/vendor/snowball/pascal/stemwords-template.dpr +78 -0
- data/vendor/snowball/python/MANIFEST.in +7 -0
- data/vendor/snowball/python/create_init.py +54 -0
- data/vendor/snowball/python/setup.cfg +6 -0
- data/vendor/snowball/python/setup.py +81 -0
- data/vendor/snowball/python/snowballstemmer/among.py +13 -0
- data/vendor/snowball/python/snowballstemmer/basestemmer.py +323 -0
- data/vendor/snowball/python/stemwords.py +101 -0
- data/vendor/snowball/python/testapp.py +28 -0
- data/vendor/snowball/runtime/api.c +58 -0
- data/vendor/snowball/runtime/api.h +32 -0
- data/vendor/snowball/runtime/header.h +61 -0
- data/vendor/snowball/runtime/utilities.c +513 -0
- data/vendor/snowball/rust/Cargo.toml +7 -0
- data/vendor/snowball/rust/build.rs +55 -0
- data/vendor/snowball/rust/rust-pre-1.27-compat.patch +30 -0
- data/vendor/snowball/rust/src/main.rs +102 -0
- data/vendor/snowball/rust/src/snowball/algorithms/mod.rs +2 -0
- data/vendor/snowball/rust/src/snowball/among.rs +6 -0
- data/vendor/snowball/rust/src/snowball/mod.rs +6 -0
- data/vendor/snowball/rust/src/snowball/snowball_env.rs +421 -0
- data/vendor/snowball/tests/stemtest.c +95 -0
- metadata +178 -0
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
-----------------------------------------------------------------------
|
|
2
|
+
-- stemmer -- Multi-language stemmer with Snowball generator
|
|
3
|
+
-- Written by Stephane Carrez (Stephane.Carrez@gmail.com)
|
|
4
|
+
-- All rights reserved.
|
|
5
|
+
--
|
|
6
|
+
-- Redistribution and use in source and binary forms, with or without
|
|
7
|
+
-- modification, are permitted provided that the following conditions
|
|
8
|
+
-- are met:
|
|
9
|
+
--
|
|
10
|
+
-- 1. Redistributions of source code must retain the above copyright notice,
|
|
11
|
+
-- this list of conditions and the following disclaimer.
|
|
12
|
+
-- 2. Redistributions in binary form must reproduce the above copyright notice,
|
|
13
|
+
-- this list of conditions and the following disclaimer in the documentation
|
|
14
|
+
-- and/or other materials provided with the distribution.
|
|
15
|
+
-- 3. Neither the name of the Snowball project nor the names of its contributors
|
|
16
|
+
-- may be used to endorse or promote products derived from this software
|
|
17
|
+
-- without specific prior written permission.
|
|
18
|
+
--
|
|
19
|
+
-- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
|
20
|
+
-- ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
21
|
+
-- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
22
|
+
-- DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
|
23
|
+
-- ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
24
|
+
-- (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
25
|
+
-- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
|
26
|
+
-- ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
27
|
+
-- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
28
|
+
-- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
29
|
+
-----------------------------------------------------------------------
|
|
30
|
+
package Stemmer with SPARK_Mode is
|
|
31
|
+
|
|
32
|
+
pragma Preelaborate;
|
|
33
|
+
|
|
34
|
+
WORD_MAX_LENGTH : constant := 1024;
|
|
35
|
+
|
|
36
|
+
type Context_Type is abstract tagged private;
|
|
37
|
+
|
|
38
|
+
-- Apply the stemming algorithm on the word initialized in the context.
|
|
39
|
+
procedure Stem (Context : in out Context_Type;
|
|
40
|
+
Result : out Boolean) is abstract;
|
|
41
|
+
|
|
42
|
+
-- Stem the word and return True if it was reduced.
|
|
43
|
+
procedure Stem_Word (Context : in out Context_Type'Class;
|
|
44
|
+
Word : in String;
|
|
45
|
+
Result : out Boolean) with
|
|
46
|
+
Global => null,
|
|
47
|
+
Pre => Word'Length < WORD_MAX_LENGTH;
|
|
48
|
+
|
|
49
|
+
-- Get the stem or the input word unmodified.
|
|
50
|
+
function Get_Result (Context : in Context_Type'Class) return String with
|
|
51
|
+
Global => null,
|
|
52
|
+
Post => Get_Result'Result'Length < WORD_MAX_LENGTH;
|
|
53
|
+
|
|
54
|
+
private
|
|
55
|
+
|
|
56
|
+
type Mask_Type is mod 2**32;
|
|
57
|
+
|
|
58
|
+
-- A 32-bit character value that was read from UTF-8 sequence.
|
|
59
|
+
-- A modular value is used because shift and logical arithmetic is necessary.
|
|
60
|
+
type Utf8_Type is mod 2**32;
|
|
61
|
+
|
|
62
|
+
-- Index of the Grouping_Array. The index comes from the 32-bit character value
|
|
63
|
+
-- minus a starting offset. We don't expect large tables and we check against
|
|
64
|
+
-- a maximum value.
|
|
65
|
+
subtype Grouping_Index is Utf8_Type range 0 .. 16384;
|
|
66
|
+
|
|
67
|
+
type Grouping_Array is array (Grouping_Index range <>) of Boolean with Pack;
|
|
68
|
+
|
|
69
|
+
subtype Among_Index is Natural range 0 .. 65535;
|
|
70
|
+
subtype Among_Start_Index is Among_Index range 1 .. Among_Index'Last;
|
|
71
|
+
subtype Operation_Index is Natural range 0 .. 65535;
|
|
72
|
+
subtype Result_Index is Integer range -1 .. WORD_MAX_LENGTH - 1;
|
|
73
|
+
subtype Char_Index is Result_Index range 0 .. Result_Index'Last;
|
|
74
|
+
|
|
75
|
+
type Among_Type is record
|
|
76
|
+
First : Among_Start_Index;
|
|
77
|
+
Last : Among_Index;
|
|
78
|
+
Substring_I : Integer;
|
|
79
|
+
Result : Integer;
|
|
80
|
+
Operation : Operation_Index;
|
|
81
|
+
end record;
|
|
82
|
+
|
|
83
|
+
type Among_Array_Type is array (Natural range <>) of Among_Type;
|
|
84
|
+
|
|
85
|
+
function Eq_S (Context : in Context_Type'Class;
|
|
86
|
+
S : in String) return Char_Index with
|
|
87
|
+
Global => null,
|
|
88
|
+
Pre => S'Length > 0,
|
|
89
|
+
Post => Eq_S'Result = 0 or Eq_S'Result = S'Length;
|
|
90
|
+
|
|
91
|
+
function Eq_S_Backward (Context : in Context_Type'Class;
|
|
92
|
+
S : in String) return Char_Index with
|
|
93
|
+
Global => null,
|
|
94
|
+
Pre => S'Length > 0,
|
|
95
|
+
Post => Eq_S_Backward'Result = 0 or Eq_S_Backward'Result = S'Length;
|
|
96
|
+
|
|
97
|
+
procedure Find_Among (Context : in out Context_Type'Class;
|
|
98
|
+
Amongs : in Among_Array_Type;
|
|
99
|
+
Pattern : in String;
|
|
100
|
+
Execute : access procedure
|
|
101
|
+
(Ctx : in out Context_Type'Class;
|
|
102
|
+
Operation : in Operation_Index;
|
|
103
|
+
Status : out Boolean);
|
|
104
|
+
Result : out Integer) with
|
|
105
|
+
Global => null,
|
|
106
|
+
Pre => Pattern'Length > 0 and Amongs'Length > 0;
|
|
107
|
+
|
|
108
|
+
procedure Find_Among_Backward (Context : in out Context_Type'Class;
|
|
109
|
+
Amongs : in Among_Array_Type;
|
|
110
|
+
Pattern : in String;
|
|
111
|
+
Execute : access procedure
|
|
112
|
+
(Ctx : in out Context_Type'Class;
|
|
113
|
+
Operation : in Operation_Index;
|
|
114
|
+
Status : out Boolean);
|
|
115
|
+
Result : out Integer) with
|
|
116
|
+
Global => null,
|
|
117
|
+
Pre => Pattern'Length > 0 and Amongs'Length > 0;
|
|
118
|
+
|
|
119
|
+
function Skip_Utf8 (Context : in Context_Type'Class) return Result_Index with
|
|
120
|
+
Global => null;
|
|
121
|
+
|
|
122
|
+
function Skip_Utf8 (Context : in Context_Type'Class;
|
|
123
|
+
N : in Integer) return Result_Index with
|
|
124
|
+
Global => null;
|
|
125
|
+
|
|
126
|
+
function Skip_Utf8_Backward (Context : in Context_Type'Class) return Result_Index with
|
|
127
|
+
Global => null;
|
|
128
|
+
|
|
129
|
+
function Skip_Utf8_Backward (Context : in Context_Type'Class;
|
|
130
|
+
N : in Integer) return Result_Index with
|
|
131
|
+
Global => null;
|
|
132
|
+
|
|
133
|
+
procedure Get_Utf8 (Context : in Context_Type'Class;
|
|
134
|
+
Value : out Utf8_Type;
|
|
135
|
+
Count : out Natural);
|
|
136
|
+
|
|
137
|
+
procedure Get_Utf8_Backward (Context : in Context_Type'Class;
|
|
138
|
+
Value : out Utf8_Type;
|
|
139
|
+
Count : out Natural);
|
|
140
|
+
|
|
141
|
+
function Length (Context : in Context_Type'Class) return Natural;
|
|
142
|
+
|
|
143
|
+
function Length_Utf8 (Context : in Context_Type'Class) return Natural;
|
|
144
|
+
|
|
145
|
+
function Check_Among (Context : in Context_Type'Class;
|
|
146
|
+
Pos : in Char_Index;
|
|
147
|
+
Shift : in Natural;
|
|
148
|
+
Mask : in Mask_Type) return Boolean;
|
|
149
|
+
|
|
150
|
+
procedure Out_Grouping (Context : in out Context_Type'Class;
|
|
151
|
+
S : in Grouping_Array;
|
|
152
|
+
Min : in Utf8_Type;
|
|
153
|
+
Max : in Utf8_Type;
|
|
154
|
+
Repeat : in Boolean;
|
|
155
|
+
Result : out Result_Index);
|
|
156
|
+
|
|
157
|
+
procedure Out_Grouping_Backward (Context : in out Context_Type'Class;
|
|
158
|
+
S : in Grouping_Array;
|
|
159
|
+
Min : in Utf8_Type;
|
|
160
|
+
Max : in Utf8_Type;
|
|
161
|
+
Repeat : in Boolean;
|
|
162
|
+
Result : out Result_Index);
|
|
163
|
+
|
|
164
|
+
procedure In_Grouping (Context : in out Context_Type'Class;
|
|
165
|
+
S : in Grouping_Array;
|
|
166
|
+
Min : in Utf8_Type;
|
|
167
|
+
Max : in Utf8_Type;
|
|
168
|
+
Repeat : in Boolean;
|
|
169
|
+
Result : out Result_Index);
|
|
170
|
+
|
|
171
|
+
procedure In_Grouping_Backward (Context : in out Context_Type'Class;
|
|
172
|
+
S : in Grouping_Array;
|
|
173
|
+
Min : in Utf8_Type;
|
|
174
|
+
Max : in Utf8_Type;
|
|
175
|
+
Repeat : in Boolean;
|
|
176
|
+
Result : out Result_Index);
|
|
177
|
+
|
|
178
|
+
procedure Replace (Context : in out Context_Type'Class;
|
|
179
|
+
C_Bra : in Char_Index;
|
|
180
|
+
C_Ket : in Char_Index;
|
|
181
|
+
S : in String;
|
|
182
|
+
Adjustment : out Integer) with
|
|
183
|
+
Global => null,
|
|
184
|
+
Pre => C_Bra >= Context.Lb and C_Ket >= C_Bra and C_Ket <= Context.L;
|
|
185
|
+
|
|
186
|
+
procedure Slice_Del (Context : in out Context_Type'Class) with
|
|
187
|
+
Global => null,
|
|
188
|
+
Pre => Context.Bra >= Context.Lb and Context.Ket >= Context.Bra
|
|
189
|
+
and Context.Ket <= Context.L;
|
|
190
|
+
|
|
191
|
+
procedure Slice_From (Context : in out Context_Type'Class;
|
|
192
|
+
Text : in String) with
|
|
193
|
+
Global => null,
|
|
194
|
+
Pre => Context.Bra >= Context.Lb and Context.Ket >= Context.Bra
|
|
195
|
+
and Context.Ket <= Context.L
|
|
196
|
+
and Context.L - Context.Lb + Text'Length + Context.Ket - Context.Bra < Context.P'Length;
|
|
197
|
+
|
|
198
|
+
function Slice_To (Context : in Context_Type'Class) return String;
|
|
199
|
+
|
|
200
|
+
procedure Insert (Context : in out Context_Type'Class;
|
|
201
|
+
C_Bra : in Char_Index;
|
|
202
|
+
C_Ket : in Char_Index;
|
|
203
|
+
S : in String) with
|
|
204
|
+
Global => null,
|
|
205
|
+
Pre => C_Bra >= Context.Lb and C_Ket >= C_Bra and C_Ket <= Context.L;
|
|
206
|
+
|
|
207
|
+
-- The context indexes follow the C paradigm: they start at 0 for the first character.
|
|
208
|
+
-- This is necessary because several algorithms rely on this when they compare the
|
|
209
|
+
-- cursor position ('C') or setup some markers from the cursor.
|
|
210
|
+
type Context_Type is abstract tagged record
|
|
211
|
+
C : Char_Index := 0;
|
|
212
|
+
L : Char_Index := 0;
|
|
213
|
+
Lb : Char_Index := 0;
|
|
214
|
+
Bra : Char_Index := 0;
|
|
215
|
+
Ket : Char_Index := 0;
|
|
216
|
+
P : String (1 .. WORD_MAX_LENGTH);
|
|
217
|
+
end record;
|
|
218
|
+
|
|
219
|
+
end Stemmer;
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
with Ada.Text_IO;
|
|
2
|
+
with Ada.Command_Line;
|
|
3
|
+
with Stemmer.Factory;
|
|
4
|
+
procedure Stemwords is
|
|
5
|
+
|
|
6
|
+
use Stemmer.Factory;
|
|
7
|
+
|
|
8
|
+
function Get_Language (Name : in String) return Language_Type;
|
|
9
|
+
function Is_Space (C : in Character) return Boolean;
|
|
10
|
+
|
|
11
|
+
function Is_Space (C : in Character) return Boolean is
|
|
12
|
+
begin
|
|
13
|
+
return C = ' ' or C = ASCII.HT;
|
|
14
|
+
end Is_Space;
|
|
15
|
+
|
|
16
|
+
function Get_Language (Name : in String) return Language_Type is
|
|
17
|
+
begin
|
|
18
|
+
return Language_Type'Value ("L_" & Name);
|
|
19
|
+
|
|
20
|
+
exception
|
|
21
|
+
when Constraint_Error =>
|
|
22
|
+
Ada.Text_IO.Put_Line ("Unsupported language: " & Name);
|
|
23
|
+
return L_ENGLISH;
|
|
24
|
+
|
|
25
|
+
end Get_Language;
|
|
26
|
+
|
|
27
|
+
Count : constant Natural := Ada.Command_Line.Argument_Count;
|
|
28
|
+
begin
|
|
29
|
+
if Count /= 3 then
|
|
30
|
+
Ada.Text_IO.Put_Line ("Usage: stemwords <language> <input file> <output file>");
|
|
31
|
+
return;
|
|
32
|
+
end if;
|
|
33
|
+
declare
|
|
34
|
+
Lang : constant Language_Type := Get_Language (Ada.Command_Line.Argument (1));
|
|
35
|
+
Input : constant String := Ada.Command_Line.Argument (2);
|
|
36
|
+
Output : constant String := Ada.Command_Line.Argument (3);
|
|
37
|
+
Src_File : Ada.Text_IO.File_Type;
|
|
38
|
+
Dst_File : Ada.Text_IO.File_Type;
|
|
39
|
+
begin
|
|
40
|
+
Ada.Text_IO.Open (Src_File, Ada.Text_IO.In_File, Input);
|
|
41
|
+
Ada.Text_IO.Create (Dst_File, Ada.Text_IO.Out_File, Output);
|
|
42
|
+
while not Ada.Text_IO.End_Of_File (Src_File) loop
|
|
43
|
+
declare
|
|
44
|
+
Line : constant String := Ada.Text_IO.Get_Line (Src_File);
|
|
45
|
+
Pos : Positive := Line'First;
|
|
46
|
+
Last_Pos : Positive;
|
|
47
|
+
Start_Pos : Positive;
|
|
48
|
+
begin
|
|
49
|
+
while Pos <= Line'Last loop
|
|
50
|
+
Last_Pos := Pos;
|
|
51
|
+
while Pos <= Line'Last and then Is_Space (Line (Pos)) loop
|
|
52
|
+
Pos := Pos + 1;
|
|
53
|
+
end loop;
|
|
54
|
+
if Last_Pos < Pos then
|
|
55
|
+
Ada.Text_IO.Put (Dst_File, Line (Last_Pos .. Pos - 1));
|
|
56
|
+
end if;
|
|
57
|
+
exit when Pos > Line'Last;
|
|
58
|
+
Start_Pos := Pos;
|
|
59
|
+
while Pos <= Line'Last and then not Is_Space (Line (Pos)) loop
|
|
60
|
+
Pos := Pos + 1;
|
|
61
|
+
end loop;
|
|
62
|
+
Ada.Text_IO.Put (Dst_File, Stemmer.Factory.Stem (Lang, Line (Start_Pos .. Pos - 1)));
|
|
63
|
+
end loop;
|
|
64
|
+
Ada.Text_IO.New_Line (Dst_File);
|
|
65
|
+
end;
|
|
66
|
+
end loop;
|
|
67
|
+
Ada.Text_IO.Close (Src_File);
|
|
68
|
+
Ada.Text_IO.Close (Dst_File);
|
|
69
|
+
end;
|
|
70
|
+
end Stemwords;
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
abstract project Stemmer_Config is
|
|
2
|
+
for Source_Dirs use ();
|
|
3
|
+
|
|
4
|
+
type Yes_No is ("yes", "no");
|
|
5
|
+
|
|
6
|
+
type Library_Type_Type is ("relocatable", "static", "static-pic");
|
|
7
|
+
|
|
8
|
+
type Build_Type is ("distrib", "debug", "optimize", "profile", "coverage");
|
|
9
|
+
Mode : Build_Type := external ("BUILD", "distrib");
|
|
10
|
+
|
|
11
|
+
Processors := External ("PROCESSORS", "1");
|
|
12
|
+
|
|
13
|
+
package Builder is
|
|
14
|
+
case Mode is
|
|
15
|
+
when "debug" =>
|
|
16
|
+
for Default_Switches ("Ada") use ("-g", "-j" & Processors);
|
|
17
|
+
when others =>
|
|
18
|
+
for Default_Switches ("Ada") use ("-g", "-O3", "-j" & Processors);
|
|
19
|
+
end case;
|
|
20
|
+
end Builder;
|
|
21
|
+
|
|
22
|
+
package compiler is
|
|
23
|
+
warnings := ("-gnatwua");
|
|
24
|
+
defaults := ("-gnat2012");
|
|
25
|
+
case Mode is
|
|
26
|
+
when "distrib" =>
|
|
27
|
+
for Default_Switches ("Ada") use defaults & ("-gnatafno", "-gnatVa", "-gnatwa");
|
|
28
|
+
|
|
29
|
+
when "debug" =>
|
|
30
|
+
for Default_Switches ("Ada") use defaults & warnings
|
|
31
|
+
& ("-gnata", "-gnatVaMI", "-gnaty3abcefhiklmnprstxM99");
|
|
32
|
+
|
|
33
|
+
when "coverage" =>
|
|
34
|
+
for Default_Switches ("Ada") use defaults & warnings
|
|
35
|
+
& ("-gnata", "-gnatVaMI", "-gnaty3abcefhiklmnprstxM99",
|
|
36
|
+
"-fprofile-arcs", "-ftest-coverage");
|
|
37
|
+
|
|
38
|
+
when "optimize" =>
|
|
39
|
+
for Default_Switches ("Ada") use defaults & warnings
|
|
40
|
+
& ("-gnatn", "-gnatp", "-fdata-sections", "-ffunction-sections");
|
|
41
|
+
|
|
42
|
+
when "profile" =>
|
|
43
|
+
for Default_Switches ("Ada") use defaults & warnings & ("-pg");
|
|
44
|
+
|
|
45
|
+
end case;
|
|
46
|
+
end compiler;
|
|
47
|
+
|
|
48
|
+
package binder is
|
|
49
|
+
case Mode is
|
|
50
|
+
when "debug" =>
|
|
51
|
+
for Default_Switches ("Ada") use ("-E");
|
|
52
|
+
|
|
53
|
+
when others =>
|
|
54
|
+
for Default_Switches ("Ada") use ("-E");
|
|
55
|
+
|
|
56
|
+
end case;
|
|
57
|
+
end binder;
|
|
58
|
+
|
|
59
|
+
package linker is
|
|
60
|
+
case Mode is
|
|
61
|
+
when "profile" =>
|
|
62
|
+
for Default_Switches ("Ada") use ("-pg");
|
|
63
|
+
|
|
64
|
+
when "distrib" =>
|
|
65
|
+
for Default_Switches ("Ada") use ("-s");
|
|
66
|
+
|
|
67
|
+
when "optimize" =>
|
|
68
|
+
for Default_Switches ("Ada") use ("-Wl,--gc-sections");
|
|
69
|
+
|
|
70
|
+
when "coverage" =>
|
|
71
|
+
for Default_Switches ("ada") use ("-fprofile-arcs");
|
|
72
|
+
|
|
73
|
+
when others =>
|
|
74
|
+
null;
|
|
75
|
+
end case;
|
|
76
|
+
|
|
77
|
+
end linker;
|
|
78
|
+
|
|
79
|
+
package Ide is
|
|
80
|
+
for VCS_Kind use "git";
|
|
81
|
+
end Ide;
|
|
82
|
+
|
|
83
|
+
end Stemmer_Config;
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
with "stemmer_config";
|
|
2
|
+
project Stemwords is
|
|
3
|
+
|
|
4
|
+
Mains := ("stemwords.adb");
|
|
5
|
+
|
|
6
|
+
for Main use Mains;
|
|
7
|
+
|
|
8
|
+
for Source_Dirs use ("src", "algorithms");
|
|
9
|
+
|
|
10
|
+
for Object_Dir use "./" & Stemmer_Config'Object_Dir & "/obj";
|
|
11
|
+
for Exec_Dir use "./" & Stemmer_Config'Exec_Dir & "/bin";
|
|
12
|
+
|
|
13
|
+
package Binder renames Stemmer_Config.Binder;
|
|
14
|
+
|
|
15
|
+
package Builder renames Stemmer_Config.Builder;
|
|
16
|
+
|
|
17
|
+
package Compiler renames Stemmer_Config.Compiler;
|
|
18
|
+
|
|
19
|
+
package Linker renames Stemmer_Config.Linker;
|
|
20
|
+
|
|
21
|
+
end Stemwords;
|