mittens 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/Gemfile +7 -0
- data/LICENSE.txt +30 -0
- data/README.md +62 -0
- data/Rakefile +21 -0
- data/ext/mittens/ext.c +96 -0
- data/ext/mittens/extconf.rb +12 -0
- data/lib/mittens/version.rb +3 -0
- data/lib/mittens.rb +7 -0
- data/mittens.gemspec +22 -0
- data/vendor/snowball/.gitignore +26 -0
- data/vendor/snowball/.travis.yml +112 -0
- data/vendor/snowball/AUTHORS +27 -0
- data/vendor/snowball/CONTRIBUTING.rst +216 -0
- data/vendor/snowball/COPYING +29 -0
- data/vendor/snowball/GNUmakefile +742 -0
- data/vendor/snowball/NEWS +754 -0
- data/vendor/snowball/README.rst +37 -0
- data/vendor/snowball/ada/README.md +74 -0
- data/vendor/snowball/ada/generate/generate.adb +83 -0
- data/vendor/snowball/ada/generate.gpr +21 -0
- data/vendor/snowball/ada/src/stemmer.adb +620 -0
- data/vendor/snowball/ada/src/stemmer.ads +219 -0
- data/vendor/snowball/ada/src/stemwords.adb +70 -0
- data/vendor/snowball/ada/stemmer_config.gpr +83 -0
- data/vendor/snowball/ada/stemwords.gpr +21 -0
- data/vendor/snowball/algorithms/arabic.sbl +558 -0
- data/vendor/snowball/algorithms/armenian.sbl +301 -0
- data/vendor/snowball/algorithms/basque.sbl +149 -0
- data/vendor/snowball/algorithms/catalan.sbl +202 -0
- data/vendor/snowball/algorithms/danish.sbl +93 -0
- data/vendor/snowball/algorithms/dutch.sbl +164 -0
- data/vendor/snowball/algorithms/english.sbl +229 -0
- data/vendor/snowball/algorithms/finnish.sbl +197 -0
- data/vendor/snowball/algorithms/french.sbl +254 -0
- data/vendor/snowball/algorithms/german.sbl +139 -0
- data/vendor/snowball/algorithms/german2.sbl +145 -0
- data/vendor/snowball/algorithms/greek.sbl +701 -0
- data/vendor/snowball/algorithms/hindi.sbl +323 -0
- data/vendor/snowball/algorithms/hungarian.sbl +241 -0
- data/vendor/snowball/algorithms/indonesian.sbl +192 -0
- data/vendor/snowball/algorithms/irish.sbl +149 -0
- data/vendor/snowball/algorithms/italian.sbl +202 -0
- data/vendor/snowball/algorithms/kraaij_pohlmann.sbl +240 -0
- data/vendor/snowball/algorithms/lithuanian.sbl +373 -0
- data/vendor/snowball/algorithms/lovins.sbl +208 -0
- data/vendor/snowball/algorithms/nepali.sbl +92 -0
- data/vendor/snowball/algorithms/norwegian.sbl +80 -0
- data/vendor/snowball/algorithms/porter.sbl +139 -0
- data/vendor/snowball/algorithms/portuguese.sbl +218 -0
- data/vendor/snowball/algorithms/romanian.sbl +236 -0
- data/vendor/snowball/algorithms/russian.sbl +221 -0
- data/vendor/snowball/algorithms/serbian.sbl +2379 -0
- data/vendor/snowball/algorithms/spanish.sbl +230 -0
- data/vendor/snowball/algorithms/swedish.sbl +72 -0
- data/vendor/snowball/algorithms/tamil.sbl +405 -0
- data/vendor/snowball/algorithms/turkish.sbl +470 -0
- data/vendor/snowball/algorithms/yiddish.sbl +460 -0
- data/vendor/snowball/charsets/ISO-8859-2.sbl +98 -0
- data/vendor/snowball/charsets/KOI8-R.sbl +74 -0
- data/vendor/snowball/charsets/cp850.sbl +130 -0
- data/vendor/snowball/compiler/analyser.c +1547 -0
- data/vendor/snowball/compiler/driver.c +615 -0
- data/vendor/snowball/compiler/generator.c +1748 -0
- data/vendor/snowball/compiler/generator_ada.c +1702 -0
- data/vendor/snowball/compiler/generator_csharp.c +1322 -0
- data/vendor/snowball/compiler/generator_go.c +1278 -0
- data/vendor/snowball/compiler/generator_java.c +1313 -0
- data/vendor/snowball/compiler/generator_js.c +1316 -0
- data/vendor/snowball/compiler/generator_pascal.c +1387 -0
- data/vendor/snowball/compiler/generator_python.c +1337 -0
- data/vendor/snowball/compiler/generator_rust.c +1295 -0
- data/vendor/snowball/compiler/header.h +418 -0
- data/vendor/snowball/compiler/space.c +286 -0
- data/vendor/snowball/compiler/syswords.h +86 -0
- data/vendor/snowball/compiler/syswords2.h +13 -0
- data/vendor/snowball/compiler/tokeniser.c +567 -0
- data/vendor/snowball/csharp/.gitignore +8 -0
- data/vendor/snowball/csharp/Snowball/Algorithms/.gitignore +1 -0
- data/vendor/snowball/csharp/Snowball/Among.cs +108 -0
- data/vendor/snowball/csharp/Snowball/AssemblyInfo.cs +36 -0
- data/vendor/snowball/csharp/Snowball/Stemmer.cs +660 -0
- data/vendor/snowball/csharp/Stemwords/App.config +6 -0
- data/vendor/snowball/csharp/Stemwords/Program.cs +114 -0
- data/vendor/snowball/doc/TODO +12 -0
- data/vendor/snowball/doc/libstemmer_c_README +148 -0
- data/vendor/snowball/doc/libstemmer_csharp_README +53 -0
- data/vendor/snowball/doc/libstemmer_java_README +67 -0
- data/vendor/snowball/doc/libstemmer_js_README +48 -0
- data/vendor/snowball/doc/libstemmer_python_README +113 -0
- data/vendor/snowball/examples/stemwords.c +204 -0
- data/vendor/snowball/go/README.md +55 -0
- data/vendor/snowball/go/among.go +16 -0
- data/vendor/snowball/go/env.go +403 -0
- data/vendor/snowball/go/stemwords/generate.go +68 -0
- data/vendor/snowball/go/stemwords/main.go +68 -0
- data/vendor/snowball/go/util.go +34 -0
- data/vendor/snowball/iconv.py +50 -0
- data/vendor/snowball/include/libstemmer.h +78 -0
- data/vendor/snowball/java/org/tartarus/snowball/Among.java +29 -0
- data/vendor/snowball/java/org/tartarus/snowball/SnowballProgram.java +381 -0
- data/vendor/snowball/java/org/tartarus/snowball/SnowballStemmer.java +8 -0
- data/vendor/snowball/java/org/tartarus/snowball/TestApp.java +75 -0
- data/vendor/snowball/javascript/base-stemmer.js +294 -0
- data/vendor/snowball/javascript/stemwords.js +106 -0
- data/vendor/snowball/libstemmer/libstemmer_c.in +96 -0
- data/vendor/snowball/libstemmer/mkalgorithms.pl +90 -0
- data/vendor/snowball/libstemmer/mkmodules.pl +267 -0
- data/vendor/snowball/libstemmer/modules.txt +63 -0
- data/vendor/snowball/libstemmer/test.c +34 -0
- data/vendor/snowball/pascal/.gitignore +4 -0
- data/vendor/snowball/pascal/SnowballProgram.pas +430 -0
- data/vendor/snowball/pascal/generate.pl +23 -0
- data/vendor/snowball/pascal/stemwords-template.dpr +78 -0
- data/vendor/snowball/python/MANIFEST.in +7 -0
- data/vendor/snowball/python/create_init.py +54 -0
- data/vendor/snowball/python/setup.cfg +6 -0
- data/vendor/snowball/python/setup.py +81 -0
- data/vendor/snowball/python/snowballstemmer/among.py +13 -0
- data/vendor/snowball/python/snowballstemmer/basestemmer.py +323 -0
- data/vendor/snowball/python/stemwords.py +101 -0
- data/vendor/snowball/python/testapp.py +28 -0
- data/vendor/snowball/runtime/api.c +58 -0
- data/vendor/snowball/runtime/api.h +32 -0
- data/vendor/snowball/runtime/header.h +61 -0
- data/vendor/snowball/runtime/utilities.c +513 -0
- data/vendor/snowball/rust/Cargo.toml +7 -0
- data/vendor/snowball/rust/build.rs +55 -0
- data/vendor/snowball/rust/rust-pre-1.27-compat.patch +30 -0
- data/vendor/snowball/rust/src/main.rs +102 -0
- data/vendor/snowball/rust/src/snowball/algorithms/mod.rs +2 -0
- data/vendor/snowball/rust/src/snowball/among.rs +6 -0
- data/vendor/snowball/rust/src/snowball/mod.rs +6 -0
- data/vendor/snowball/rust/src/snowball/snowball_env.rs +421 -0
- data/vendor/snowball/tests/stemtest.c +95 -0
- metadata +178 -0
@@ -0,0 +1,240 @@
|
|
1
|
+
strings ( ch )
|
2
|
+
integers ( p1 p2 )
|
3
|
+
booleans ( Y_found stemmed GE_removed )
|
4
|
+
|
5
|
+
routines (
|
6
|
+
|
7
|
+
R1 R2
|
8
|
+
C V VX
|
9
|
+
lengthen_V
|
10
|
+
Step_1 Step_2 Step_3 Step_4 Step_7
|
11
|
+
Step_6 Step_1c
|
12
|
+
Lose_prefix
|
13
|
+
Lose_infix
|
14
|
+
measure
|
15
|
+
)
|
16
|
+
|
17
|
+
externals ( stem )
|
18
|
+
|
19
|
+
groupings ( v v_WX AOU AIOU )
|
20
|
+
|
21
|
+
stringescapes {}
|
22
|
+
|
23
|
+
define v 'aeiouy'
|
24
|
+
define v_WX v + 'wx'
|
25
|
+
define AOU 'aou'
|
26
|
+
define AIOU 'aiou'
|
27
|
+
|
28
|
+
backwardmode (
|
29
|
+
|
30
|
+
define R1 as ($p1 <= cursor)
|
31
|
+
define R2 as ($p2 <= cursor)
|
32
|
+
|
33
|
+
define V as test (v or 'ij')
|
34
|
+
define VX as test (next v or 'ij')
|
35
|
+
define C as test (not 'ij' non-v)
|
36
|
+
|
37
|
+
define lengthen_V as do (
|
38
|
+
non-v_WX [ (AOU] test (non-v or atlimit)) or
|
39
|
+
('e'] test (non-v or atlimit
|
40
|
+
not AIOU
|
41
|
+
not (next AIOU non-v)))
|
42
|
+
->ch insert ch
|
43
|
+
)
|
44
|
+
|
45
|
+
define Step_1 as
|
46
|
+
(
|
47
|
+
[substring] among (
|
48
|
+
|
49
|
+
'{'}s' (delete)
|
50
|
+
's' (R1 not ('t' R1) C delete)
|
51
|
+
'ies' (R1 <-'ie')
|
52
|
+
'es'
|
53
|
+
(('ar' R1 C ] delete lengthen_V) or
|
54
|
+
('er' R1 C ] delete) or
|
55
|
+
(R1 C <-'e'))
|
56
|
+
|
57
|
+
'aus' (R1 V <-'au')
|
58
|
+
'en' (('hed' R1 ] <-'heid') or
|
59
|
+
('nd' delete) or
|
60
|
+
('d' R1 C ] delete) or
|
61
|
+
('i' or 'j' V delete) or
|
62
|
+
(R1 C delete lengthen_V))
|
63
|
+
'nde' (<-'nd')
|
64
|
+
)
|
65
|
+
)
|
66
|
+
|
67
|
+
define Step_2 as
|
68
|
+
(
|
69
|
+
[substring] among (
|
70
|
+
'je' (('{'}t' ] delete) or
|
71
|
+
('et' ] R1 C delete) or
|
72
|
+
('rnt' ] <-'rn') or
|
73
|
+
('t' ] R1 VX delete) or
|
74
|
+
('ink' ] <-'ing') or
|
75
|
+
('mp' ] <-'m') or
|
76
|
+
('{'}' ] R1 delete) or
|
77
|
+
(] R1 C delete))
|
78
|
+
'ge' (R1 <-'g')
|
79
|
+
'lijke'(R1 <-'lijk')
|
80
|
+
'ische'(R1 <-'isch')
|
81
|
+
'de' (R1 C delete)
|
82
|
+
'te' (R1 <-'t')
|
83
|
+
'se' (R1 <-'s')
|
84
|
+
're' (R1 <-'r')
|
85
|
+
'le' (R1 delete attach 'l' lengthen_V)
|
86
|
+
'ene' (R1 C delete attach 'en' lengthen_V)
|
87
|
+
'ieve' (R1 C <-'ief')
|
88
|
+
)
|
89
|
+
)
|
90
|
+
|
91
|
+
define Step_3 as
|
92
|
+
(
|
93
|
+
[substring] among (
|
94
|
+
'atie' (R1 <-'eer')
|
95
|
+
'iteit' (R1 delete lengthen_V)
|
96
|
+
'heid'
|
97
|
+
'sel'
|
98
|
+
'ster' (R1 delete)
|
99
|
+
'rder' (<-'r')
|
100
|
+
'ing'
|
101
|
+
'isme'
|
102
|
+
'erij' (R1 delete lengthen_V)
|
103
|
+
'arij' (R1 C <-'aar')
|
104
|
+
'fie' (R2 delete attach 'f' lengthen_V)
|
105
|
+
'gie' (R2 delete attach 'g' lengthen_V)
|
106
|
+
'tst' (R1 C <-'t')
|
107
|
+
'dst' (R1 C <-'d')
|
108
|
+
)
|
109
|
+
)
|
110
|
+
|
111
|
+
define Step_4 as
|
112
|
+
(
|
113
|
+
( [substring] among (
|
114
|
+
'ioneel' (R1 <-'ie')
|
115
|
+
'atief' (R1 <-'eer')
|
116
|
+
'baar' (R1 delete)
|
117
|
+
'naar' (R1 V <-'n')
|
118
|
+
'laar' (R1 V <-'l')
|
119
|
+
'raar' (R1 V <-'r')
|
120
|
+
'tant' (R1 <-'teer')
|
121
|
+
'lijker'
|
122
|
+
'lijkst' (R1 <-'lijk')
|
123
|
+
'achtig'
|
124
|
+
'achtiger'
|
125
|
+
'achtigst'(R1 delete)
|
126
|
+
'eriger'
|
127
|
+
'erigst'
|
128
|
+
'erig'
|
129
|
+
'end' (R1 C delete lengthen_V)
|
130
|
+
)
|
131
|
+
)
|
132
|
+
or
|
133
|
+
( [substring] among (
|
134
|
+
'iger'
|
135
|
+
'igst'
|
136
|
+
'ig' (R1 C delete lengthen_V)
|
137
|
+
)
|
138
|
+
)
|
139
|
+
)
|
140
|
+
|
141
|
+
define Step_7 as
|
142
|
+
(
|
143
|
+
[substring] among (
|
144
|
+
'kt' (<-'k')
|
145
|
+
'ft' (<-'f')
|
146
|
+
'pt' (<-'p')
|
147
|
+
)
|
148
|
+
)
|
149
|
+
|
150
|
+
define Step_6 as
|
151
|
+
(
|
152
|
+
[substring] among (
|
153
|
+
'bb' (<-'b')
|
154
|
+
'cc' (<-'c')
|
155
|
+
'dd' (<-'d')
|
156
|
+
'ff' (<-'f')
|
157
|
+
'gg' (<-'g')
|
158
|
+
'hh' (<-'h')
|
159
|
+
'jj' (<-'j')
|
160
|
+
'kk' (<-'k')
|
161
|
+
'll' (<-'l')
|
162
|
+
'mm' (<-'m')
|
163
|
+
'nn' (<-'n')
|
164
|
+
'pp' (<-'p')
|
165
|
+
'qq' (<-'q')
|
166
|
+
'rr' (<-'r')
|
167
|
+
'ss' (<-'s')
|
168
|
+
'tt' (<-'t')
|
169
|
+
'vv' (<-'v')
|
170
|
+
'ww' (<-'w')
|
171
|
+
'xx' (<-'x')
|
172
|
+
'zz' (<-'z')
|
173
|
+
'v' (<-'f')
|
174
|
+
'z' (<-'s')
|
175
|
+
)
|
176
|
+
)
|
177
|
+
|
178
|
+
define Step_1c as
|
179
|
+
(
|
180
|
+
[substring] among ( (R1 C)
|
181
|
+
'd' (not ('n' R1) delete)
|
182
|
+
't' (not ('h' R1) delete)
|
183
|
+
)
|
184
|
+
)
|
185
|
+
)
|
186
|
+
|
187
|
+
define Lose_prefix as (
|
188
|
+
['ge'] test hop 3 (goto v goto non-v)
|
189
|
+
set GE_removed
|
190
|
+
delete
|
191
|
+
)
|
192
|
+
|
193
|
+
define Lose_infix as (
|
194
|
+
next
|
195
|
+
gopast (['ge']) test hop 3 (goto v goto non-v)
|
196
|
+
set GE_removed
|
197
|
+
delete
|
198
|
+
)
|
199
|
+
|
200
|
+
define measure as (
|
201
|
+
$p1 = limit
|
202
|
+
$p2 = limit
|
203
|
+
do(
|
204
|
+
repeat non-v atleast 1 ('ij' or v) non-v setmark p1
|
205
|
+
repeat non-v atleast 1 ('ij' or v) non-v setmark p2
|
206
|
+
)
|
207
|
+
|
208
|
+
)
|
209
|
+
define stem as (
|
210
|
+
|
211
|
+
unset Y_found
|
212
|
+
unset stemmed
|
213
|
+
do ( ['y'] <-'Y' set Y_found )
|
214
|
+
do repeat(goto (v ['y'])<-'Y' set Y_found )
|
215
|
+
|
216
|
+
measure
|
217
|
+
|
218
|
+
backwards (
|
219
|
+
do (Step_1 set stemmed )
|
220
|
+
do (Step_2 set stemmed )
|
221
|
+
do (Step_3 set stemmed )
|
222
|
+
do (Step_4 set stemmed )
|
223
|
+
)
|
224
|
+
unset GE_removed
|
225
|
+
do (Lose_prefix and measure)
|
226
|
+
backwards (
|
227
|
+
do (GE_removed Step_1c)
|
228
|
+
)
|
229
|
+
unset GE_removed
|
230
|
+
do (Lose_infix and measure)
|
231
|
+
backwards (
|
232
|
+
do (GE_removed Step_1c)
|
233
|
+
)
|
234
|
+
backwards (
|
235
|
+
do (Step_7 set stemmed )
|
236
|
+
do (stemmed or GE_removed Step_6)
|
237
|
+
)
|
238
|
+
do(Y_found repeat(goto (['Y']) <-'y'))
|
239
|
+
)
|
240
|
+
|
@@ -0,0 +1,373 @@
|
|
1
|
+
externals ( stem )
|
2
|
+
|
3
|
+
// escape symbols for substituting lithuanian characters
|
4
|
+
stringescapes { }
|
5
|
+
|
6
|
+
/* Special characters in Unicode Latin Extended-A */
|
7
|
+
// ' nosine
|
8
|
+
stringdef a' '{U+0105}' // ą a + ogonek
|
9
|
+
stringdef e' '{U+0119}' // ę e + ogonek
|
10
|
+
stringdef i' '{U+012F}' // į i + ogonek
|
11
|
+
stringdef u' '{U+0173}' // ų u + ogonek
|
12
|
+
|
13
|
+
// . taskas
|
14
|
+
stringdef e. '{U+0117}' // ė e + dot
|
15
|
+
|
16
|
+
// - ilgoji
|
17
|
+
stringdef u- '{U+016B}' // ū u + macron
|
18
|
+
|
19
|
+
// * varnele
|
20
|
+
stringdef c* '{U+010D}' // č c + caron (haček)
|
21
|
+
stringdef s* '{U+0161}' // š s + caron (haček)
|
22
|
+
stringdef z* '{U+017E}' // ž z + caron (haček)
|
23
|
+
|
24
|
+
// [C](VC)^m[V|C]
|
25
|
+
// definitions of variables for
|
26
|
+
// p1 - position of m = 0
|
27
|
+
integers ( p1 )
|
28
|
+
|
29
|
+
// groupings
|
30
|
+
// v - lithuanian vowels
|
31
|
+
groupings ( v )
|
32
|
+
|
33
|
+
// v - all lithuanian vowels
|
34
|
+
define v 'aeiyou{a'}{e'}{i'}{u'}{e.}{u-}'
|
35
|
+
|
36
|
+
// all lithuanian stemmer routines: 4 steps
|
37
|
+
routines (
|
38
|
+
step2 R1 step1 fix_chdz fix_gd fix_conflicts
|
39
|
+
)
|
40
|
+
|
41
|
+
backwardmode (
|
42
|
+
|
43
|
+
define R1 as $p1 <= cursor
|
44
|
+
define step1 as (
|
45
|
+
setlimit tomark p1 for ([substring]) R1 among (
|
46
|
+
// Daiktavardžiai (Nouns)
|
47
|
+
// I linksniuotė (declension I)
|
48
|
+
'as' 'ias' 'is' 'ys' // vyras, kelias, brolis, gaidys
|
49
|
+
'o' 'io' // vyro, kelio
|
50
|
+
'ui' 'iui' // vyrui, keliui
|
51
|
+
'{a'}' 'i{a'}' '{i'}' // vyrą, kelią, brolį
|
52
|
+
'u' 'iu' // vyru, keliu
|
53
|
+
'e' 'yje' // vyre, kelyje
|
54
|
+
'y' 'au' 'i' // kely, brolau, broli,
|
55
|
+
'an' // nusižengiman
|
56
|
+
|
57
|
+
'ai' 'iai' // vyrai, keliai
|
58
|
+
'{u'}' 'i{u'}' // vyrų, kelių
|
59
|
+
'ams' 'am' // vyrams, vyram
|
60
|
+
'iams' 'iam' // broliams, broliam
|
61
|
+
'us' 'ius' // vyrus, brolius
|
62
|
+
'ais' 'iais' // vyrais, keliais
|
63
|
+
'uose' 'iuose' 'uos' 'iuos' // vyruose, keliuose, vyruos, keliuos
|
64
|
+
'uosna' 'iuosna' // vyruosna, keliuosna
|
65
|
+
'ysna' // žutysna
|
66
|
+
|
67
|
+
'asis' 'aisi' // sukimasis, sukimaisi
|
68
|
+
'osi' '{u'}si' // sukimosi, sukimųsi
|
69
|
+
'uisi' // sukimuisi
|
70
|
+
'{a'}si' // sukimąsi
|
71
|
+
'usi' // sukimusi
|
72
|
+
'esi' // sukimesi
|
73
|
+
|
74
|
+
'uo' // mėnuo
|
75
|
+
|
76
|
+
|
77
|
+
// II linksniuote (declension II)
|
78
|
+
'a' 'ia' // galva, vysnios
|
79
|
+
'os' 'ios' // galvos, vysnios
|
80
|
+
'oj' 'oje' 'ioje' // galvoje, vysnioje
|
81
|
+
'osna' 'iosna' // galvosna, vyšniosna
|
82
|
+
'om' 'oms' 'ioms' // galvoms, vysnioms
|
83
|
+
'omis' 'iomis' // galvomis, vysniomis
|
84
|
+
'ose' 'iose' // galvose, vysniose
|
85
|
+
'on' 'ion' // galvon, vyšnion
|
86
|
+
|
87
|
+
|
88
|
+
// III linksniuote (declension III)
|
89
|
+
'{e.}' // gervė
|
90
|
+
'{e.}s' // gervės
|
91
|
+
'ei' // gervei
|
92
|
+
'{e'}' // gervę
|
93
|
+
'{e.}j' '{e.}je' // gervėj, gervėje
|
94
|
+
'{e.}ms' // gervėms
|
95
|
+
'es' // gerves
|
96
|
+
'{e.}mis' // gervėmis
|
97
|
+
'{e.}se' // gervėse
|
98
|
+
'{e.}sna' // gervėsna
|
99
|
+
'{e.}n' // žydaitėn
|
100
|
+
|
101
|
+
|
102
|
+
// IV linksniuote (declension IV)
|
103
|
+
'aus' 'iaus' // sūnaus, skaičiaus
|
104
|
+
'umi' 'iumi' // sūnumi, skaičiumi
|
105
|
+
'uje' 'iuje' // sūnuje, skaičiuje
|
106
|
+
'iau' // skaičiau
|
107
|
+
|
108
|
+
'{u-}s' // sūnūs
|
109
|
+
'ums' // sūnums
|
110
|
+
'umis' // sūnumis
|
111
|
+
'un' 'iun' // sūnun, administratoriun
|
112
|
+
|
113
|
+
|
114
|
+
// V linksniuote (declension V)
|
115
|
+
'ies' 'ens' 'enio' 'ers' // avies, vandens, sesers
|
116
|
+
'eniui' 'eriai' // vandeniui, eriai
|
117
|
+
'en{i'}' 'er{i'}' // vandenį, seserį
|
118
|
+
'imi' 'eniu' 'erimi' 'eria' // avimi, vandeniu, seserimi, seseria
|
119
|
+
'enyje' 'eryje' // vandenyje, seseryje
|
120
|
+
'ie' 'enie' 'erie' // avie, vandenie, seserie
|
121
|
+
|
122
|
+
'enys' 'erys' // vandenys, seserys
|
123
|
+
// 'en{u'}' konfliktas su 'žandenų' 'antenų'
|
124
|
+
'er{u'}' // seserų
|
125
|
+
'ims' 'enims' 'erims' // avims, vandemins, seserims
|
126
|
+
'enis' // vandenis
|
127
|
+
'imis' // žebenkštimis
|
128
|
+
'enimis' // vandenimis
|
129
|
+
'yse' 'enyse' 'eryse' // avyse, vandenyse, seseryse
|
130
|
+
|
131
|
+
|
132
|
+
// Būdvardžiai (Adjectives)
|
133
|
+
// (i)a linksniuotė
|
134
|
+
'iem' 'iems' // geriem, geriems
|
135
|
+
'ame' 'iame' // naujame, mediniame
|
136
|
+
|
137
|
+
|
138
|
+
// Veiksmažodžiai (Verbs)
|
139
|
+
// Tiesioginė nuosaka (indicative mood)
|
140
|
+
// esamasis laikas (present tense)
|
141
|
+
// (i)a asmenuotė (declension (i)a)
|
142
|
+
'uosi' 'iuosi' // dirbuosi, traukiuosi
|
143
|
+
'iesi' // dirbiesi
|
144
|
+
'asi' 'iasi' // dirbasi, traukiasi
|
145
|
+
'am{e.}s' 'iam{e.}s' // dirbamės, traukiamės
|
146
|
+
'at' 'ate' 'iat' 'iate' // dirbat, dirbate, ariat, traukiate
|
147
|
+
'at{e.}s' 'iat{e.}s' // dirbatės, traukiatės
|
148
|
+
|
149
|
+
// i asmenuotė (declension i)
|
150
|
+
'isi' // tikisi
|
151
|
+
'im' // mylim
|
152
|
+
// 'ime' konfliktassu daiktavardžiu vietininku, pvz. 'gėrime'
|
153
|
+
'im{e.}s' // tikimės
|
154
|
+
'it' 'ite' // mylit, mylite, tikitės
|
155
|
+
// 'it{e.}s' konfliktas su priesaga ir dgs. vardininko galūne -ait-ės pvz. žydaitės
|
156
|
+
|
157
|
+
// o asmenuotė (declension o)
|
158
|
+
'ome' // mokome
|
159
|
+
'ot' 'ote' // mokot, mokote
|
160
|
+
|
161
|
+
// būtasis laikas
|
162
|
+
// o asmenuotė (declension o)
|
163
|
+
'{e.}jo' '{e.}josi' // tikėjo, tikėjosi
|
164
|
+
'ot{e.}s' // tikėjotės/bijotės
|
165
|
+
|
166
|
+
// ė asmenuotė (declension ė)
|
167
|
+
'eisi' // mokeisi
|
168
|
+
'{e.}si' // mokėsi
|
169
|
+
'{e.}m' '{e.}me' // mokėm, mokėme
|
170
|
+
'{e.}m{e.}s' // mokėmės
|
171
|
+
'{e.}t' '{e.}te' // mokėt, mokėte
|
172
|
+
'{e.}t{e.}s' // mokėtės
|
173
|
+
|
174
|
+
// būtasis dažninis laikas (frequentative past tense)
|
175
|
+
'ausi' // mokydavausi
|
176
|
+
'om{e.}s' // mokydavomės/bijomės
|
177
|
+
|
178
|
+
|
179
|
+
// būsimasis laikas (future tense)
|
180
|
+
'siu' 'siuosi' // dirbsiu, mokysiuosi
|
181
|
+
'si' 'siesi' // dirbsi, dirbsiesi
|
182
|
+
's' 'ysis' // dirbs, mokysis
|
183
|
+
'sim' 'sime' // dirbsim, dirbsime
|
184
|
+
'sit' 'site' // gersit, gersite
|
185
|
+
|
186
|
+
// tariamoji nuosaka (subjunctive mood)
|
187
|
+
'{c*}iau' '{c*}iausi' // dirbčiau
|
188
|
+
'tum' 'tumei' // dirbtum, dirbtumei
|
189
|
+
'tumeis' 'tumeisi' // mokytumeis, mokytumeisi
|
190
|
+
// 't{u'}' nes blogai batutų -> batų
|
191
|
+
't{u'}si' // mokytųsi
|
192
|
+
// 'tume' konfliktas su 'šventume'
|
193
|
+
'tum{e.}m' // dirbtumėm
|
194
|
+
'tum{e.}me' // dirbtumėme
|
195
|
+
'tum{e.}m{e.}s' // mokytumėmės
|
196
|
+
'tute' 'tum{e.}t' // dirbtute, dirbtumėt
|
197
|
+
'tum{e.}te' // dirbtumėte
|
198
|
+
'tum{e.}t{e.}s' // mokytumėtės
|
199
|
+
|
200
|
+
// liepiamoji nuosaka (imperative mood)
|
201
|
+
'k' 'ki' // dirbk, dirbki, mokykis
|
202
|
+
// 'kis' konfliktas viln-išk-is
|
203
|
+
// 'kime' konfliktas, nes pirkime
|
204
|
+
'kim{e.}s' // mokykimės
|
205
|
+
|
206
|
+
// bendratis (infinitive)
|
207
|
+
'uoti' 'iuoti' // meluoti, dygsniuoti
|
208
|
+
'auti' 'iauti' // draugauti, girtuokliauti
|
209
|
+
'oti' 'ioti' // dovanoti, meškerioti
|
210
|
+
'{e.}ti' // auklėti
|
211
|
+
'yti' // akyti
|
212
|
+
'inti' // auginti
|
213
|
+
'in{e.}ti' // blusinėti
|
214
|
+
'enti' // gyventi
|
215
|
+
'tel{e.}ti' // bumbtelėti
|
216
|
+
'ter{e.}ti' // bumbterėti
|
217
|
+
|
218
|
+
'ti' // skalbti
|
219
|
+
// 'tis' konfliktas, nes rytme-tis -> rytme
|
220
|
+
|
221
|
+
// dalyviai (participles)
|
222
|
+
'{a'}s' 'i{a'}s' '{i'}s' // dirbąs, žaidžiąs, gulįs
|
223
|
+
't{u'}s' // suktųs -> suk
|
224
|
+
'sim{e.}s' // suksimės
|
225
|
+
'sit{e.}s' // suksitės
|
226
|
+
'kite' // supkite
|
227
|
+
)
|
228
|
+
|
229
|
+
delete
|
230
|
+
)
|
231
|
+
|
232
|
+
define step2 as repeat (
|
233
|
+
setlimit tomark p1 for ([substring]) among (
|
234
|
+
// daiktavardziu priesagos (Noun suffixes)
|
235
|
+
|
236
|
+
// budvardziu priesagos (Adjective suffixes)
|
237
|
+
// 'in' // konfliktas su 'augintinis' ir 'akiniais' // lauk-in-is
|
238
|
+
'ing' // tvark-ing-as
|
239
|
+
'i{s*}k' // lenk-išk-as
|
240
|
+
'{e.}t' // dem-ėt-as
|
241
|
+
'ot' // garban-ot-as
|
242
|
+
'uot' 'iuot' // lang-uot-as, akin-iuot-as
|
243
|
+
// 'tin', nes augintinis // dirb-tin-is
|
244
|
+
// 'ut', nes batutas, degutas etc. // maž-ut-is
|
245
|
+
'yt' // maž-yt-is
|
246
|
+
'iuk' // maž-iuk-as
|
247
|
+
'iul' // maž-ul-is
|
248
|
+
'{e.}l' // maž-ėl-is
|
249
|
+
'yl' // maž-yl-is
|
250
|
+
'u{c*}iuk' // maž-učiuk-as
|
251
|
+
'uliuk' // maž-uliuk-as
|
252
|
+
'ut{e.}ait' // maž-utėlait-is
|
253
|
+
'ok' // did-ok-as
|
254
|
+
'iok' // višč-iok-as
|
255
|
+
'sv' '{s*}v' 'zgan' // sal-sv-as, pilk-šv-as, bal-zgan-as
|
256
|
+
'op' 'iop' // dvej-op-as, viener-iop-as
|
257
|
+
'ain' // apval-ain-as
|
258
|
+
'yk{s*}t' 'yk{s*}{c*}' // ten-ykšt-is, vakar-ykšč-ias
|
259
|
+
|
260
|
+
// laisniai
|
261
|
+
'esn' // did-esn-is
|
262
|
+
'aus' 'iaus' // nauj-aus-ias, ger-iaus-ias
|
263
|
+
|
264
|
+
// ivardziuotiniai budvardziai (Pronominal adjectives)
|
265
|
+
// vyriska gimine (Male gender)
|
266
|
+
'ias' // žaliasis
|
267
|
+
'oj' 'ioj' // gerojo, žaliojo
|
268
|
+
'aj' 'iaj' // gerajam, žaliajam
|
269
|
+
'{a'}j' 'i{a'}j' // garąjį, žaliąjį
|
270
|
+
'uoj' 'iuoj' // geruoju, žaliuoju
|
271
|
+
'iej' // gerieji
|
272
|
+
'{u'}j' 'i{u'}j' // gerųjų, žaliųjų
|
273
|
+
'ies' // geriesiems
|
274
|
+
'uos' 'iuos' // geruosius, žaliuosius
|
275
|
+
'ais' 'iais' // geraisiais, žaliaisiais
|
276
|
+
|
277
|
+
// moteriska gimine (Female gender)
|
278
|
+
'os' 'ios' // gerosios, žaliosios
|
279
|
+
'{a'}s' 'i{a'}s' // gerąsios, žaliąsias
|
280
|
+
|
281
|
+
// būtasis dažninis laikas (frequentative past tense)
|
282
|
+
'dav' // ei-dav-o
|
283
|
+
|
284
|
+
// dalyvių priesagos (particple suffix)
|
285
|
+
'ant' 'iant'
|
286
|
+
'int' // tur-int-is
|
287
|
+
'{e.}j' // tur-ėj-o
|
288
|
+
'{e'}' //
|
289
|
+
'{e.}j{e'}'
|
290
|
+
'{e'}s' // dirb-ęs-is
|
291
|
+
|
292
|
+
'siant' // dirb-siant
|
293
|
+
|
294
|
+
// pusdalyviai (participle)
|
295
|
+
'dam' // bėg-dam-as
|
296
|
+
|
297
|
+
'auj' // ūkinink-auj-a
|
298
|
+
'jam'
|
299
|
+
'iau'
|
300
|
+
'am' // baiminim-ams-i
|
301
|
+
)
|
302
|
+
|
303
|
+
delete
|
304
|
+
)
|
305
|
+
|
306
|
+
define fix_conflicts as (
|
307
|
+
[substring] among (
|
308
|
+
// 'lietuvaite' -> 'lietuvaitė', konfliktas su 'myl-ite'
|
309
|
+
'aite' (<-'ait{e.}')
|
310
|
+
// 'lietuvaitės' -> 'lietuvaitė', konfliktas su 'myl-itės'
|
311
|
+
'ait{e.}s' (<-'ait{e.}')
|
312
|
+
|
313
|
+
// ''ūs-uotės' -> 'ūs-uotė', konfliktas 'mokotės'
|
314
|
+
'uot{e.}s' (<-'uot{e.}')
|
315
|
+
// ''ūs-uote' -> 'ūs-uotė', konfliktas 'mokote'
|
316
|
+
'uote' (<-'uot{e.}')
|
317
|
+
|
318
|
+
// 'žerėjime' -> 'žėrėjimas', konfliktas su 'žais-ime'
|
319
|
+
'{e.}jime' (<-'{e.}jimas')
|
320
|
+
|
321
|
+
// 'žvilgesiu' -> 'žvilgesys', konfliktas su 'dirb-siu'
|
322
|
+
'esiu' (<-'esys')
|
323
|
+
// 'duobkasiu' -> 'duobkasys', konfliktas su 'pakasiu'
|
324
|
+
'asius' (<-'asys')
|
325
|
+
|
326
|
+
// 'žioravime' -> 'žioravimas', konfliktas su 'myl-ime'
|
327
|
+
'avime' (<-'avimas')
|
328
|
+
'ojime' (<-'ojimas')
|
329
|
+
|
330
|
+
// 'advokatės' -> 'advokatė', konfliktas su 'dirb-atės'
|
331
|
+
'okat{e.}s' (<-'okat{e.}')
|
332
|
+
// 'advokate' -> 'advokatė', konfliktas su 'dirb-ate'
|
333
|
+
'okate' (<-'okat{e.}')
|
334
|
+
)
|
335
|
+
)
|
336
|
+
|
337
|
+
define fix_chdz as (
|
338
|
+
[substring] among (
|
339
|
+
'{c*}' (<-'t')
|
340
|
+
'd{z*}' (<-'d')
|
341
|
+
)
|
342
|
+
)
|
343
|
+
|
344
|
+
define fix_gd as (
|
345
|
+
[substring] among (
|
346
|
+
'gd' (<-'g')
|
347
|
+
// '{e.}k' (<-'{e.}g')
|
348
|
+
)
|
349
|
+
)
|
350
|
+
|
351
|
+
)
|
352
|
+
|
353
|
+
define stem as (
|
354
|
+
|
355
|
+
$p1 = limit
|
356
|
+
|
357
|
+
do (
|
358
|
+
// priešdėlis 'a' ilgeniuose nei 6 raidės žodžiuose, pvz. 'a-liejus'.
|
359
|
+
try (test 'a' $(len > 6) hop 1)
|
360
|
+
|
361
|
+
gopast v gopast non-v setmark p1
|
362
|
+
)
|
363
|
+
|
364
|
+
backwards (
|
365
|
+
do fix_conflicts
|
366
|
+
do step1
|
367
|
+
do fix_chdz
|
368
|
+
do step2
|
369
|
+
do fix_chdz
|
370
|
+
do fix_gd
|
371
|
+
)
|
372
|
+
|
373
|
+
)
|