mittens 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/Gemfile +7 -0
- data/LICENSE.txt +30 -0
- data/README.md +62 -0
- data/Rakefile +21 -0
- data/ext/mittens/ext.c +96 -0
- data/ext/mittens/extconf.rb +12 -0
- data/lib/mittens/version.rb +3 -0
- data/lib/mittens.rb +7 -0
- data/mittens.gemspec +22 -0
- data/vendor/snowball/.gitignore +26 -0
- data/vendor/snowball/.travis.yml +112 -0
- data/vendor/snowball/AUTHORS +27 -0
- data/vendor/snowball/CONTRIBUTING.rst +216 -0
- data/vendor/snowball/COPYING +29 -0
- data/vendor/snowball/GNUmakefile +742 -0
- data/vendor/snowball/NEWS +754 -0
- data/vendor/snowball/README.rst +37 -0
- data/vendor/snowball/ada/README.md +74 -0
- data/vendor/snowball/ada/generate/generate.adb +83 -0
- data/vendor/snowball/ada/generate.gpr +21 -0
- data/vendor/snowball/ada/src/stemmer.adb +620 -0
- data/vendor/snowball/ada/src/stemmer.ads +219 -0
- data/vendor/snowball/ada/src/stemwords.adb +70 -0
- data/vendor/snowball/ada/stemmer_config.gpr +83 -0
- data/vendor/snowball/ada/stemwords.gpr +21 -0
- data/vendor/snowball/algorithms/arabic.sbl +558 -0
- data/vendor/snowball/algorithms/armenian.sbl +301 -0
- data/vendor/snowball/algorithms/basque.sbl +149 -0
- data/vendor/snowball/algorithms/catalan.sbl +202 -0
- data/vendor/snowball/algorithms/danish.sbl +93 -0
- data/vendor/snowball/algorithms/dutch.sbl +164 -0
- data/vendor/snowball/algorithms/english.sbl +229 -0
- data/vendor/snowball/algorithms/finnish.sbl +197 -0
- data/vendor/snowball/algorithms/french.sbl +254 -0
- data/vendor/snowball/algorithms/german.sbl +139 -0
- data/vendor/snowball/algorithms/german2.sbl +145 -0
- data/vendor/snowball/algorithms/greek.sbl +701 -0
- data/vendor/snowball/algorithms/hindi.sbl +323 -0
- data/vendor/snowball/algorithms/hungarian.sbl +241 -0
- data/vendor/snowball/algorithms/indonesian.sbl +192 -0
- data/vendor/snowball/algorithms/irish.sbl +149 -0
- data/vendor/snowball/algorithms/italian.sbl +202 -0
- data/vendor/snowball/algorithms/kraaij_pohlmann.sbl +240 -0
- data/vendor/snowball/algorithms/lithuanian.sbl +373 -0
- data/vendor/snowball/algorithms/lovins.sbl +208 -0
- data/vendor/snowball/algorithms/nepali.sbl +92 -0
- data/vendor/snowball/algorithms/norwegian.sbl +80 -0
- data/vendor/snowball/algorithms/porter.sbl +139 -0
- data/vendor/snowball/algorithms/portuguese.sbl +218 -0
- data/vendor/snowball/algorithms/romanian.sbl +236 -0
- data/vendor/snowball/algorithms/russian.sbl +221 -0
- data/vendor/snowball/algorithms/serbian.sbl +2379 -0
- data/vendor/snowball/algorithms/spanish.sbl +230 -0
- data/vendor/snowball/algorithms/swedish.sbl +72 -0
- data/vendor/snowball/algorithms/tamil.sbl +405 -0
- data/vendor/snowball/algorithms/turkish.sbl +470 -0
- data/vendor/snowball/algorithms/yiddish.sbl +460 -0
- data/vendor/snowball/charsets/ISO-8859-2.sbl +98 -0
- data/vendor/snowball/charsets/KOI8-R.sbl +74 -0
- data/vendor/snowball/charsets/cp850.sbl +130 -0
- data/vendor/snowball/compiler/analyser.c +1547 -0
- data/vendor/snowball/compiler/driver.c +615 -0
- data/vendor/snowball/compiler/generator.c +1748 -0
- data/vendor/snowball/compiler/generator_ada.c +1702 -0
- data/vendor/snowball/compiler/generator_csharp.c +1322 -0
- data/vendor/snowball/compiler/generator_go.c +1278 -0
- data/vendor/snowball/compiler/generator_java.c +1313 -0
- data/vendor/snowball/compiler/generator_js.c +1316 -0
- data/vendor/snowball/compiler/generator_pascal.c +1387 -0
- data/vendor/snowball/compiler/generator_python.c +1337 -0
- data/vendor/snowball/compiler/generator_rust.c +1295 -0
- data/vendor/snowball/compiler/header.h +418 -0
- data/vendor/snowball/compiler/space.c +286 -0
- data/vendor/snowball/compiler/syswords.h +86 -0
- data/vendor/snowball/compiler/syswords2.h +13 -0
- data/vendor/snowball/compiler/tokeniser.c +567 -0
- data/vendor/snowball/csharp/.gitignore +8 -0
- data/vendor/snowball/csharp/Snowball/Algorithms/.gitignore +1 -0
- data/vendor/snowball/csharp/Snowball/Among.cs +108 -0
- data/vendor/snowball/csharp/Snowball/AssemblyInfo.cs +36 -0
- data/vendor/snowball/csharp/Snowball/Stemmer.cs +660 -0
- data/vendor/snowball/csharp/Stemwords/App.config +6 -0
- data/vendor/snowball/csharp/Stemwords/Program.cs +114 -0
- data/vendor/snowball/doc/TODO +12 -0
- data/vendor/snowball/doc/libstemmer_c_README +148 -0
- data/vendor/snowball/doc/libstemmer_csharp_README +53 -0
- data/vendor/snowball/doc/libstemmer_java_README +67 -0
- data/vendor/snowball/doc/libstemmer_js_README +48 -0
- data/vendor/snowball/doc/libstemmer_python_README +113 -0
- data/vendor/snowball/examples/stemwords.c +204 -0
- data/vendor/snowball/go/README.md +55 -0
- data/vendor/snowball/go/among.go +16 -0
- data/vendor/snowball/go/env.go +403 -0
- data/vendor/snowball/go/stemwords/generate.go +68 -0
- data/vendor/snowball/go/stemwords/main.go +68 -0
- data/vendor/snowball/go/util.go +34 -0
- data/vendor/snowball/iconv.py +50 -0
- data/vendor/snowball/include/libstemmer.h +78 -0
- data/vendor/snowball/java/org/tartarus/snowball/Among.java +29 -0
- data/vendor/snowball/java/org/tartarus/snowball/SnowballProgram.java +381 -0
- data/vendor/snowball/java/org/tartarus/snowball/SnowballStemmer.java +8 -0
- data/vendor/snowball/java/org/tartarus/snowball/TestApp.java +75 -0
- data/vendor/snowball/javascript/base-stemmer.js +294 -0
- data/vendor/snowball/javascript/stemwords.js +106 -0
- data/vendor/snowball/libstemmer/libstemmer_c.in +96 -0
- data/vendor/snowball/libstemmer/mkalgorithms.pl +90 -0
- data/vendor/snowball/libstemmer/mkmodules.pl +267 -0
- data/vendor/snowball/libstemmer/modules.txt +63 -0
- data/vendor/snowball/libstemmer/test.c +34 -0
- data/vendor/snowball/pascal/.gitignore +4 -0
- data/vendor/snowball/pascal/SnowballProgram.pas +430 -0
- data/vendor/snowball/pascal/generate.pl +23 -0
- data/vendor/snowball/pascal/stemwords-template.dpr +78 -0
- data/vendor/snowball/python/MANIFEST.in +7 -0
- data/vendor/snowball/python/create_init.py +54 -0
- data/vendor/snowball/python/setup.cfg +6 -0
- data/vendor/snowball/python/setup.py +81 -0
- data/vendor/snowball/python/snowballstemmer/among.py +13 -0
- data/vendor/snowball/python/snowballstemmer/basestemmer.py +323 -0
- data/vendor/snowball/python/stemwords.py +101 -0
- data/vendor/snowball/python/testapp.py +28 -0
- data/vendor/snowball/runtime/api.c +58 -0
- data/vendor/snowball/runtime/api.h +32 -0
- data/vendor/snowball/runtime/header.h +61 -0
- data/vendor/snowball/runtime/utilities.c +513 -0
- data/vendor/snowball/rust/Cargo.toml +7 -0
- data/vendor/snowball/rust/build.rs +55 -0
- data/vendor/snowball/rust/rust-pre-1.27-compat.patch +30 -0
- data/vendor/snowball/rust/src/main.rs +102 -0
- data/vendor/snowball/rust/src/snowball/algorithms/mod.rs +2 -0
- data/vendor/snowball/rust/src/snowball/among.rs +6 -0
- data/vendor/snowball/rust/src/snowball/mod.rs +6 -0
- data/vendor/snowball/rust/src/snowball/snowball_env.rs +421 -0
- data/vendor/snowball/tests/stemtest.c +95 -0
- metadata +178 -0
|
@@ -0,0 +1,460 @@
|
|
|
1
|
+
/* *******************************************
|
|
2
|
+
* Stemmer for Yiddish language in YIVO script
|
|
3
|
+
*
|
|
4
|
+
* Author: Assaf Urieli
|
|
5
|
+
* Emails: assaf.urieli at gmail.com
|
|
6
|
+
* Version: 0.1 (15.05.2020)
|
|
7
|
+
*
|
|
8
|
+
********************************************* */
|
|
9
|
+
|
|
10
|
+
routines (
|
|
11
|
+
prelude
|
|
12
|
+
mark_regions
|
|
13
|
+
R1
|
|
14
|
+
R1plus3
|
|
15
|
+
standard_suffix
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
externals ( stem )
|
|
19
|
+
|
|
20
|
+
integers ( p1 x )
|
|
21
|
+
|
|
22
|
+
groupings ( vowel niked alefBeys consonant )
|
|
23
|
+
|
|
24
|
+
stringescapes {}
|
|
25
|
+
|
|
26
|
+
// AlefBeys
|
|
27
|
+
stringdef Alef '{U+05D0}'
|
|
28
|
+
stringdef Beys '{U+05D1}'
|
|
29
|
+
stringdef Giml '{U+05D2}'
|
|
30
|
+
stringdef Dalet '{U+05D3}'
|
|
31
|
+
stringdef Hey '{U+05D4}'
|
|
32
|
+
stringdef Vov '{U+05D5}'
|
|
33
|
+
stringdef Zayen '{U+05D6}'
|
|
34
|
+
stringdef Khes '{U+05D7}'
|
|
35
|
+
stringdef Tes '{U+05D8}'
|
|
36
|
+
stringdef Yud '{U+05D9}'
|
|
37
|
+
stringdef LangerKhof '{U+05DA}'
|
|
38
|
+
stringdef Khof '{U+05DB}'
|
|
39
|
+
stringdef Lamed '{U+05DC}'
|
|
40
|
+
stringdef ShlosMem '{U+05DD}'
|
|
41
|
+
stringdef Mem '{U+05DE}'
|
|
42
|
+
stringdef LangerNun '{U+05DF}'
|
|
43
|
+
stringdef Nun '{U+05E0}'
|
|
44
|
+
stringdef Samekh '{U+05E1}'
|
|
45
|
+
stringdef Ayen '{U+05E2}'
|
|
46
|
+
stringdef LangerFey '{U+05E3}'
|
|
47
|
+
stringdef Fey '{U+05E4}'
|
|
48
|
+
stringdef LangerTsadek '{U+05E5}'
|
|
49
|
+
stringdef Tsadek '{U+05E6}'
|
|
50
|
+
stringdef Kuf '{U+05E7}'
|
|
51
|
+
stringdef Reysh '{U+05E8}'
|
|
52
|
+
stringdef Shin '{U+05E9}'
|
|
53
|
+
stringdef Sof '{U+05EA}'
|
|
54
|
+
stringdef TsveyVovn '{U+05F0}'
|
|
55
|
+
stringdef VovYud '{U+05F1}'
|
|
56
|
+
stringdef TsveyYudn '{U+05F2}'
|
|
57
|
+
|
|
58
|
+
// Niked
|
|
59
|
+
stringdef Shvo '{U+05B0}'
|
|
60
|
+
stringdef Khirik '{U+05B4}'
|
|
61
|
+
stringdef Tseyre '{U+05B5}'
|
|
62
|
+
stringdef Segl '{U+05B6}'
|
|
63
|
+
stringdef ReducedSegl '{U+05B1}'
|
|
64
|
+
stringdef Pasekh '{U+05B7}'
|
|
65
|
+
stringdef ReducedPasekh '{U+05B2}'
|
|
66
|
+
stringdef Komets '{U+05B8}'
|
|
67
|
+
stringdef ReducedKomets '{U+05B3}'
|
|
68
|
+
stringdef Rafe '{U+05BF}'
|
|
69
|
+
stringdef SinDot '{U+05C2}'
|
|
70
|
+
stringdef ShinDot '{U+05C1}'
|
|
71
|
+
stringdef Khoylm '{U+05B9}'
|
|
72
|
+
stringdef Melupm '{U+05BC}'
|
|
73
|
+
stringdef Kubuts '{U+05BB}'
|
|
74
|
+
|
|
75
|
+
// Groupings
|
|
76
|
+
define niked '{Shvo}{Khirik}{Tseyre}{Segl}{ReducedSegl}{Pasekh}{ReducedPasekh}{Komets}{ReducedKomets}{SinDot}{ShinDot}{Khoylm}{Melupm}{Kubuts}{Rafe}'
|
|
77
|
+
define alefBeys '{Alef}{Beys}{Giml}{Dalet}{Hey}{Vov}{Zayen}{Khes}{Tes}{Yud}{LangerKhof}{Khof}{Lamed}{ShlosMem}{Mem}{LangerNun}{Nun}{Samekh}{Ayen}{LangerFey}{Fey}{LangerTsadek}{Tsadek}{Kuf}{Reysh}{Shin}{Sof}{TsveyVovn}{VovYud}{TsveyYudn}'
|
|
78
|
+
define vowel '{Alef}{Vov}{Yud}{Ayen}{VovYud}{TsveyYudn}'
|
|
79
|
+
define consonant alefBeys - vowel
|
|
80
|
+
|
|
81
|
+
define prelude as (
|
|
82
|
+
do (
|
|
83
|
+
repeat goto (
|
|
84
|
+
[substring] among (
|
|
85
|
+
'{Vov}{Vov}' ( not '{Melupm}' <- '{TsveyVovn}' )
|
|
86
|
+
'{Vov}{Yud}' ( not '{Khirik}' <- '{VovYud}' )
|
|
87
|
+
'{Yud}{Yud}' ( not '{Khirik}' <- '{TsveyYudn}' )
|
|
88
|
+
'{LangerKhof}' ( <- '{Khof}')
|
|
89
|
+
'{ShlosMem}' ( <- '{Mem}' )
|
|
90
|
+
'{LangerNun}' ( <- '{Nun}' )
|
|
91
|
+
'{LangerFey}' ( <- '{Fey}' )
|
|
92
|
+
'{LangerTsadek}' ( <- '{Tsadek}' )
|
|
93
|
+
)
|
|
94
|
+
)
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
do (repeat goto ( [niked] delete ))
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
define mark_regions as (
|
|
101
|
+
$p1 = limit
|
|
102
|
+
|
|
103
|
+
(
|
|
104
|
+
try (
|
|
105
|
+
// Replace past participle ge- at start of word
|
|
106
|
+
// Unless word starts with gelt- or gebn-
|
|
107
|
+
['{Giml}{Ayen}']
|
|
108
|
+
not ('{Lamed}{Tes}' or '{Beys}{Nun}') <- 'GE'
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
try (
|
|
112
|
+
// skip verbal prefix
|
|
113
|
+
among(
|
|
114
|
+
// Free stressed: Adurkh-, Durkh-, Ahin-, Aher-, Avek-, Mit-, Antkegn-, Akegn-, Anider-, Arop-, Aroys-, Aroyf-, Arum-, Arayn-, Arunter-, Ariber-, Nokh-, Farbay-, Aheym-, Afir-, Faroys-, Funander-, Tsuzamen-, Tsunoyf-, Tsurik-
|
|
115
|
+
'{Alef}{Dalet}{Vov}{Reysh}{Khof}' '{Dalet}{Vov}{Reysh}{Khof}' '{Alef}{Hey}{Yud}{Nun}' '{Alef}{Hey}{Ayen}{Reysh}' '{Alef}{TsveyVovn}{Ayen}{Kuf}' '{Mem}{Yud}{Tes}' '{Alef}{Nun}{Tes}{Kuf}{Ayen}{Giml}{Nun}' '{Alef}{Kuf}{Ayen}{Giml}{Nun}' '{Alef}{Nun}{Yud}{Dalet}{Ayen}{Reysh}' '{Alef}{Reysh}{Alef}{Fey}' '{Alef}{Reysh}{VovYud}{Samekh}' '{Alef}{Reysh}{VovYud}{Fey}' '{Alef}{Reysh}{Vov}{Mem}' '{Alef}{Reysh}{TsveyYudn}{Nun}' '{Alef}{Reysh}{Vov}{Nun}{Tes}{Ayen}{Reysh}' '{Alef}{Reysh}{Yud}{Beys}{Ayen}{Reysh}' '{Nun}{Alef}{Khof}' '{Fey}{Alef}{Reysh}{Beys}{TsveyYudn}' '{Alef}{Hey}{TsveyYudn}{Mem}' '{Alef}{Fey}{Yud}{Reysh}' '{Fey}{Alef}{Reysh}{VovYud}{Samekh}' '{Fey}{Vov}{Nun}{Alef}{Nun}{Dalet}{Ayen}{Reysh}' '{Tsadek}{Vov}{Zayen}{Alef}{Mem}{Ayen}{Nun}' '{Tsadek}{Vov}{Nun}{VovYud}{Fey}' '{Tsadek}{Vov}{Reysh}{Yud}{Kuf}'
|
|
116
|
+
|
|
117
|
+
// Stressed: Oys-, Oyf-, Um-, Unter-, Iber-, Ayn-, On-, Op-, Bay-, For-, Tsu-.
|
|
118
|
+
'{Alef}{VovYud}{Samekh}' '{Alef}{VovYud}{Fey}' '{Alef}{Vov}{Mem}' '{Alef}{Vov}{Nun}{Tes}{Ayen}{Reysh}' '{Alef}{Yud}{Beys}{Ayen}{Reysh}' '{Alef}{TsveyYudn}{Nun}' '{Alef}{Nun}' '{Alef}{Fey}' '{Beys}{TsveyYudn}' '{Fey}{Alef}{Reysh}' '{Tsadek}{Vov}'
|
|
119
|
+
|
|
120
|
+
// Unstressed: Ant-, Ba-, Der-, Tse-. Far- already covered by For-. Ge- comes later.
|
|
121
|
+
'{Alef}{Nun}{Tes}' '{Beys}{Alef}' '{Dalet}{Ayen}{Reysh}' '{Tsadek}{Ayen}'
|
|
122
|
+
|
|
123
|
+
// If verbal prefix followed by Tsu- or Ge-, replace it
|
|
124
|
+
(
|
|
125
|
+
// Don't mark the TSU- prefix inside verbs like "oys-tsugn"
|
|
126
|
+
test (('{Tsadek}{Vov}{Giml}{Nun}' or '{Tsadek}{Vov}{Kuf}{Tes}' or '{Tsadek}{Vov}{Kuf}{Nun}') atlimit)
|
|
127
|
+
or
|
|
128
|
+
// Don't mark the GE- prefix inside verbs like "avek-gebn"
|
|
129
|
+
test ('{Giml}{Ayen}{Beys}{Nun}')
|
|
130
|
+
or
|
|
131
|
+
( ['{Giml}{Ayen}'] <- 'GE')
|
|
132
|
+
or
|
|
133
|
+
(['{Tsadek}{Vov}'] <- 'TSU')
|
|
134
|
+
)
|
|
135
|
+
)
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
test(hop 3 setmark x)
|
|
139
|
+
|
|
140
|
+
// We want to allow three-consonant Hebrew roots.
|
|
141
|
+
// To this end, we skip three-consonant combinations that exist in non-Hebraic Yiddish.
|
|
142
|
+
try (
|
|
143
|
+
among(
|
|
144
|
+
'{Shin}{Fey}{Reysh}' '{Shin}{Tes}{Reysh}' '{Shin}{Tes}{Shin}' '{Dalet}{Zayen}{Shin}'
|
|
145
|
+
( true )
|
|
146
|
+
)
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
// Either 3 consonants or the first non-vowel after a vowel
|
|
150
|
+
(
|
|
151
|
+
not (consonant consonant consonant setmark p1)
|
|
152
|
+
goto vowel repeat vowel setmark p1
|
|
153
|
+
)
|
|
154
|
+
try($p1 < x $p1 = x) // at least 3 past the prefix
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
backwardmode (
|
|
160
|
+
define R1 as $p1 <= cursor
|
|
161
|
+
// Like R1, but also allows the cursor to be outside R1 by the width of Giml Yud Samekh
|
|
162
|
+
define R1plus3 as $p1 <= cursor + sizeof '{Giml}{Yud}{Samekh}'
|
|
163
|
+
|
|
164
|
+
define standard_suffix as (
|
|
165
|
+
do (
|
|
166
|
+
[substring] among(
|
|
167
|
+
// Plural/adjective endings: -er, -ers, -e, -n, -s, -en, -ns, -eners, -ens, -es
|
|
168
|
+
'{Ayen}{Reysh}{Samekh}' '{Ayen}{Nun}' '{Nun}{Samekh}' '{Ayen}{Nun}{Ayen}{Reysh}{Samekh}' '{Ayen}{Samekh}' '{Ayen}' '{Nun}' '{Samekh}' '{Ayen}{Mem}' '{Ayen}{Reysh}'
|
|
169
|
+
( R1 delete )
|
|
170
|
+
|
|
171
|
+
// Exception: don't delete noun endings -ie, like "agitatsie"
|
|
172
|
+
'{Yud}{Ayen}'
|
|
173
|
+
( true )
|
|
174
|
+
|
|
175
|
+
// -ies => ie
|
|
176
|
+
'{Yud}{Ayen}{Samekh}'
|
|
177
|
+
( R1 <- '{Yud}{Ayen}' )
|
|
178
|
+
|
|
179
|
+
// Plural/adjective endings: -enem, -ener, -ene, -ens
|
|
180
|
+
'{Ayen}{Nun}{Ayen}' '{Ayen}{Nun}{Ayen}{Mem}' '{Ayen}{Nun}{Ayen}{Reysh}' '{Ayen}{Nun}{Samekh}'
|
|
181
|
+
(R1 delete
|
|
182
|
+
[substring] among (
|
|
183
|
+
// -gegangen => -gey
|
|
184
|
+
'{Giml}{Alef}{Nun}{Giml}' (<- '{Giml}{TsveyYudn}')
|
|
185
|
+
// -genumen => -nem
|
|
186
|
+
'{Nun}{Vov}{Mem}' (<- '{Nun}{Ayen}{Mem}')
|
|
187
|
+
// -gemiten => -mayd
|
|
188
|
+
'{Mem}{Yud}{Tes}' (<- '{Mem}{TsveyYudn}{Dalet}')
|
|
189
|
+
// -gebiten => -bayt
|
|
190
|
+
'{Beys}{Yud}{Tes}' (<- '{Beys}{TsveyYudn}{Tes}')
|
|
191
|
+
// -gebisen => -bays
|
|
192
|
+
'{Beys}{Yud}{Samekh}' (<- '{Beys}{TsveyYudn}{Samekh}')
|
|
193
|
+
// -gevizen => -vayz
|
|
194
|
+
'{TsveyVovn}{Yud}{Zayen}' (<- '{TsveyVovn}{TsveyYudn}{Zayen}')
|
|
195
|
+
// -getriben => -trayb
|
|
196
|
+
'{Tes}{Reysh}{Yud}{Beys}' (<- '{Tes}{Reysh}{TsveyYudn}{Beys}')
|
|
197
|
+
// -geliten => -layt
|
|
198
|
+
'{Lamed}{Yud}{Tes}' (<- '{Lamed}{TsveyYudn}{Tes}')
|
|
199
|
+
// -gekliben => -klayb
|
|
200
|
+
'{Kuf}{Lamed}{Yud}{Beys}' (<- '{Kuf}{Lamed}{TsveyYudn}{Beys}')
|
|
201
|
+
// -geriben => -rayb
|
|
202
|
+
'{Reysh}{Yud}{Beys}' (<- '{Reysh}{TsveyYudn}{Beys}')
|
|
203
|
+
// -gerisen => -rays
|
|
204
|
+
'{Reysh}{Yud}{Samekh}' (<- '{Reysh}{TsveyYudn}{Samekh}')
|
|
205
|
+
// -geshvigen => -shvayg
|
|
206
|
+
'{Shin}{TsveyVovn}{Yud}{Giml}' (<- '{Shin}{TsveyVovn}{TsveyYudn}{Giml}')
|
|
207
|
+
// -geshmisen => -shmays
|
|
208
|
+
'{Shin}{Mem}{Yud}{Samekh}' (<- '{Shin}{Mem}{TsveyYudn}{Samekh}')
|
|
209
|
+
// -geshniten => -shnayd
|
|
210
|
+
'{Shin}{Nun}{Yud}{Tes}' (<- '{Shin}{Nun}{TsveyYudn}{Dalet}')
|
|
211
|
+
// -geshriben => -shrayb
|
|
212
|
+
'{Shin}{Reysh}{Yud}{Beys}' (<- '{Shin}{Reysh}{TsveyYudn}{Beys}')
|
|
213
|
+
// -gebunden => -bind
|
|
214
|
+
'{Beys}{Vov}{Nun}{Dalet}' (<- '{Beys}{Yud}{Nun}{Dalet}')
|
|
215
|
+
// -gevuntshn => -vintsh
|
|
216
|
+
'{TsveyVovn}{Vov}{Tes}{Shin}' (<- '{TsveyVovn}{Yud}{Tes}{Shin}')
|
|
217
|
+
// -gezungen => -zing
|
|
218
|
+
'{Zayen}{Vov}{Nun}{Giml}' (<- '{Zayen}{Yud}{Nun}{Giml}')
|
|
219
|
+
// -getrunken => -trink
|
|
220
|
+
'{Tes}{Reysh}{Vov}{Nun}{Kuf}' (<- '{Tes}{Reysh}{Yud}{Nun}{Kuf}')
|
|
221
|
+
// -getsvungen => -tsving
|
|
222
|
+
'{Tsadek}{TsveyVovn}{Vov}{Nun}{Giml}' (<- '{Tsadek}{TsveyVovn}{Yud}{Nun}{Giml}')
|
|
223
|
+
// -geshlungen => -shling
|
|
224
|
+
'{Shin}{Lamed}{Vov}{Nun}{Giml}' (<- '{Shin}{Lamed}{Yud}{Nun}{Giml}')
|
|
225
|
+
// -geboygen => -beyg
|
|
226
|
+
'{Beys}{VovYud}{Giml}' (<- '{Beys}{TsveyYudn}{Giml}')
|
|
227
|
+
// -gehoyben => -heyb
|
|
228
|
+
'{Hey}{VovYud}{Beys}' (<- '{Hey}{TsveyYudn}{Beys}')
|
|
229
|
+
// -farloyren => -farlir
|
|
230
|
+
'{Fey}{Alef}{Reysh}{Lamed}{VovYud}{Reysh}' (<- '{Fey}{Alef}{Reysh}{Lamed}{Yud}{Reysh}')
|
|
231
|
+
// -shtanen => -shtey
|
|
232
|
+
'{Shin}{Tes}{Alef}{Nun}' (<- '{Shin}{Tes}{TsveyYudn}')
|
|
233
|
+
// -geshvoyrn => -shver
|
|
234
|
+
'{Shin}{TsveyVovn}{VovYud}{Reysh}' (<- '{Shin}{TsveyVovn}{Ayen}{Reysh}')
|
|
235
|
+
)
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
// Verb/past participle ending: -t
|
|
239
|
+
'{Tes}'
|
|
240
|
+
( R1 delete )
|
|
241
|
+
|
|
242
|
+
// As well as noun/adjectives ending in -tn, -te, -ter, -ts so that the "-t" doesn't differentiate
|
|
243
|
+
// Similarly for past participles: -tns, -tene, -tenem, -tener
|
|
244
|
+
// If the Tes was before R1, we try to perform the same action while leaving the Tes in place
|
|
245
|
+
'{Tes}{Nun}' '{Tes}{Ayen}' '{Tes}{Ayen}{Reysh}' '{Tes}{Samekh}'
|
|
246
|
+
'{Tes}{Nun}{Samekh}' '{Tes}{Ayen}{Nun}{Ayen}' '{Tes}{Ayen}{Nun}{Ayen}{Mem}' '{Tes}{Ayen}{Nun}{Ayen}{Reysh}'
|
|
247
|
+
( ((R1 delete) or ( <- '{Tes}'))
|
|
248
|
+
// -(ge)brakht => -breng
|
|
249
|
+
['{Beys}{Reysh}{Alef}{Khof}' try '{Giml}{Ayen}'] <- '{Beys}{Reysh}{Ayen}{Nun}{Giml}'
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
// Past participles: -et, -etn, -ets, -ete, -eter
|
|
253
|
+
'{Ayen}{Tes}' '{Ayen}{Tes}{Nun}' '{Ayen}{Tes}{Samekh}' '{Ayen}{Tes}{Ayen}' '{Ayen}{Tes}{Ayen}{Reysh}'
|
|
254
|
+
( R1 delete )
|
|
255
|
+
|
|
256
|
+
// -geyn shorted to -gey
|
|
257
|
+
'{Giml}{TsveyYudn}{Nun}'
|
|
258
|
+
( <- '{Giml}{TsveyYudn}')
|
|
259
|
+
|
|
260
|
+
// ##################### Long list of irregular past participles
|
|
261
|
+
// -(ge)gangen (shortened to -gangen after prefixes) => -gey
|
|
262
|
+
'{Giml}{Alef}{Nun}{Giml}{Ayen}{Nun}'
|
|
263
|
+
( <- '{Giml}{TsveyYudn}' )
|
|
264
|
+
|
|
265
|
+
// -(ge)numen (shortened to -numen after prefixes) => -nem
|
|
266
|
+
'{Nun}{Vov}{Mem}{Ayen}{Nun}'
|
|
267
|
+
(<- '{Nun}{Ayen}{Mem}' )
|
|
268
|
+
|
|
269
|
+
// -(ge)shribn (shortened to -shribn after prefixes) => -shrayb
|
|
270
|
+
'{Shin}{Reysh}{Yud}{Beys}{Nun}'
|
|
271
|
+
(<- '{Shin}{Reysh}{TsveyYudn}{Beys}' )
|
|
272
|
+
|
|
273
|
+
// -gemiten => -mayd
|
|
274
|
+
'GE{Mem}{Yud}{Tes}{Nun}'
|
|
275
|
+
(<- '{Mem}{TsveyYudn}{Dalet}')
|
|
276
|
+
|
|
277
|
+
// -gebiten => -bayt
|
|
278
|
+
'GE{Beys}{Yud}{Tes}{Nun}'
|
|
279
|
+
(<- '{Beys}{TsveyYudn}{Tes}')
|
|
280
|
+
|
|
281
|
+
// -gebisen => -bays
|
|
282
|
+
'GE{Beys}{Yud}{Samekh}{Nun}'
|
|
283
|
+
( <- '{Beys}{TsveyYudn}{Samekh}')
|
|
284
|
+
|
|
285
|
+
// -gevizen => -vayz
|
|
286
|
+
'{TsveyVovn}{Yud}{Zayen}{Nun}'
|
|
287
|
+
( <- '{TsveyVovn}{TsveyYudn}{Zayen}')
|
|
288
|
+
|
|
289
|
+
// -getriben => -trayb
|
|
290
|
+
'{Tes}{Reysh}{Yud}{Beys}{Nun}'
|
|
291
|
+
( <- '{Tes}{Reysh}{TsveyYudn}{Beys}')
|
|
292
|
+
|
|
293
|
+
// -geliten => -layt
|
|
294
|
+
'GE{Lamed}{Yud}{Tes}{Nun}'
|
|
295
|
+
( <- '{Lamed}{TsveyYudn}{Tes}')
|
|
296
|
+
|
|
297
|
+
// -gekliben => -klayb
|
|
298
|
+
'{Kuf}{Lamed}{Yud}{Beys}{Nun}'
|
|
299
|
+
( <- '{Kuf}{Lamed}{TsveyYudn}{Beys}')
|
|
300
|
+
|
|
301
|
+
// -geriben => -rayb
|
|
302
|
+
'{Reysh}{Yud}{Beys}{Nun}'
|
|
303
|
+
( <- '{Reysh}{TsveyYudn}{Beys}')
|
|
304
|
+
|
|
305
|
+
// -gerisen => -rays
|
|
306
|
+
'GE{Reysh}{Yud}{Samekh}{Nun}'
|
|
307
|
+
( <- '{Reysh}{TsveyYudn}{Samekh}')
|
|
308
|
+
|
|
309
|
+
// -geshvigen => -shvayg
|
|
310
|
+
'{Shin}{TsveyVovn}{Yud}{Giml}{Nun}'
|
|
311
|
+
( <- '{Shin}{TsveyVovn}{TsveyYudn}{Giml}')
|
|
312
|
+
|
|
313
|
+
// -geshmisen => -shmays
|
|
314
|
+
'{Shin}{Mem}{Yud}{Samekh}{Nun}'
|
|
315
|
+
( <- '{Shin}{Mem}{TsveyYudn}{Samekh}')
|
|
316
|
+
|
|
317
|
+
// -geshniten => -shnayd
|
|
318
|
+
'{Shin}{Nun}{Yud}{Tes}{Nun}'
|
|
319
|
+
( <- '{Shin}{Nun}{TsveyYudn}{Dalet}')
|
|
320
|
+
|
|
321
|
+
// -gebunden => -bind
|
|
322
|
+
'{Beys}{Vov}{Nun}{Dalet}{Nun}'
|
|
323
|
+
( <- '{Beys}{Yud}{Nun}{Dalet}')
|
|
324
|
+
|
|
325
|
+
// -gevuntshn => -vintsh
|
|
326
|
+
'{TsveyVovn}{Vov}{Tes}{Shin}{Nun}'
|
|
327
|
+
( <- '{TsveyVovn}{Yud}{Tes}{Shin}')
|
|
328
|
+
|
|
329
|
+
// -gezungen => -zing
|
|
330
|
+
'{Zayen}{Vov}{Nun}{Giml}{Nun}'
|
|
331
|
+
( <- '{Zayen}{Yud}{Nun}{Giml}')
|
|
332
|
+
|
|
333
|
+
// -getrunken => -trink
|
|
334
|
+
'{Tes}{Reysh}{Vov}{Nun}{Kuf}{Nun}'
|
|
335
|
+
( <- '{Tes}{Reysh}{Yud}{Nun}{Kuf}')
|
|
336
|
+
|
|
337
|
+
// -getsvungen => -tsving
|
|
338
|
+
'{Tsadek}{TsveyVovn}{Vov}{Nun}{Giml}{Nun}'
|
|
339
|
+
( <- '{Tsadek}{TsveyVovn}{Yud}{Nun}{Giml}')
|
|
340
|
+
|
|
341
|
+
// -geshlungen => -shling
|
|
342
|
+
'{Shin}{Lamed}{Vov}{Nun}{Giml}{Nun}'
|
|
343
|
+
( <- '{Shin}{Lamed}{Yud}{Nun}{Giml}')
|
|
344
|
+
|
|
345
|
+
// -geboygen => -beyg
|
|
346
|
+
'{Beys}{VovYud}{Giml}{Nun}'
|
|
347
|
+
( <- '{Beys}{TsveyYudn}{Giml}')
|
|
348
|
+
|
|
349
|
+
// -gehoyben => -heyb
|
|
350
|
+
'{Hey}{VovYud}{Beys}{Nun}'
|
|
351
|
+
( <- '{Hey}{TsveyYudn}{Beys}')
|
|
352
|
+
|
|
353
|
+
// -farloyren => -farlir
|
|
354
|
+
'{Fey}{Alef}{Reysh}{Lamed}{VovYud}{Reysh}{Nun}'
|
|
355
|
+
( <- '{Fey}{Alef}{Reysh}{Lamed}{Yud}{Reysh}')
|
|
356
|
+
|
|
357
|
+
// -shtanen => -shtey
|
|
358
|
+
'{Shin}{Tes}{Alef}{Nun}{Ayen}{Nun}'
|
|
359
|
+
( <- '{Shin}{Tes}{TsveyYudn}')
|
|
360
|
+
|
|
361
|
+
// -geshvoyrn => -shver
|
|
362
|
+
'{Shin}{TsveyVovn}{VovYud}{Reysh}{Nun}'
|
|
363
|
+
( <- '{Shin}{TsveyVovn}{Ayen}{Reysh}')
|
|
364
|
+
|
|
365
|
+
// -(ge)brakht (shortened to -brakht after prefixes) => -breng
|
|
366
|
+
'{Beys}{Reysh}{Alef}{Khof}{Tes}'
|
|
367
|
+
(<- '{Beys}{Reysh}{Ayen}{Nun}{Giml}' )
|
|
368
|
+
|
|
369
|
+
// ###### End of irregular past participles
|
|
370
|
+
|
|
371
|
+
// Noun endings: -ung, -hayt, -kayt, -ikayt, -shaft
|
|
372
|
+
'{Vov}{Nun}{Giml}' '{Hey}{TsveyYudn}{Tes}' '{Kuf}{TsveyYudn}{Tes}' '{Yud}{Kuf}{TsveyYudn}{Tes}' '{Shin}{Alef}{Fey}{Tes}'
|
|
373
|
+
( R1 delete )
|
|
374
|
+
|
|
375
|
+
// Noun endings: -izm, izmen
|
|
376
|
+
'{Yud}{Zayen}{Mem}' '{Yud}{Zayen}{Mem}{Ayen}{Nun}'
|
|
377
|
+
( R1 delete )
|
|
378
|
+
|
|
379
|
+
// Plural ending: -im
|
|
380
|
+
'{Yud}{Mem}'
|
|
381
|
+
( R1 delete )
|
|
382
|
+
|
|
383
|
+
// Plural ending: -os (Hebraic), replace with -h
|
|
384
|
+
'{Vov}{Sof}'
|
|
385
|
+
( R1 <- '{Hey}' )
|
|
386
|
+
|
|
387
|
+
// Diminutive endings: -elekh, -ele, -lekh, -eles, -elen
|
|
388
|
+
'{Ayen}{Lamed}{Ayen}{Khof}' '{Ayen}{Lamed}{Ayen}' '{Lamed}{Ayen}{Khof}' '{Ayen}{Lamed}{Ayen}{Samekh}' '{Ayen}{Lamed}{Ayen}{Nun}'
|
|
389
|
+
( R1 delete )
|
|
390
|
+
|
|
391
|
+
// Noun ending: -ist
|
|
392
|
+
'{Yud}{Samekh}{Tes}'
|
|
393
|
+
(
|
|
394
|
+
// Exceptions: -gist, -shist
|
|
395
|
+
( ('{Giml}' or '{Shin}') try (R1plus3 <- '{Yud}{Samekh}') )
|
|
396
|
+
or
|
|
397
|
+
( R1 delete )
|
|
398
|
+
)
|
|
399
|
+
|
|
400
|
+
// Noun ending: -istn
|
|
401
|
+
'{Yud}{Samekh}{Tes}{Nun}'
|
|
402
|
+
( R1 delete )
|
|
403
|
+
|
|
404
|
+
// Verb ending: -stu
|
|
405
|
+
'{Samekh}{Tes}{Vov}'
|
|
406
|
+
( R1 delete )
|
|
407
|
+
|
|
408
|
+
// Superlative ending: -ster, -ste, -stn
|
|
409
|
+
'{Samekh}{Tes}{Ayen}{Reysh}' '{Samekh}{Tes}{Ayen}' '{Samekh}{Tes}{Nun}'
|
|
410
|
+
( R1 delete )
|
|
411
|
+
|
|
412
|
+
// Ambiguous verb ending: -st
|
|
413
|
+
'{Samekh}{Tes}'
|
|
414
|
+
( R1 delete )
|
|
415
|
+
)
|
|
416
|
+
)
|
|
417
|
+
|
|
418
|
+
do (
|
|
419
|
+
[substring] among(
|
|
420
|
+
// Noun endings: -ung, -hayt, -kayt, -ikayt, -shaft
|
|
421
|
+
'{Vov}{Nun}{Giml}' '{Hey}{TsveyYudn}{Tes}' '{Kuf}{TsveyYudn}{Tes}' '{Yud}{Kuf}{TsveyYudn}{Tes}' '{Shin}{Alef}{Fey}{Tes}'
|
|
422
|
+
( R1 delete )
|
|
423
|
+
|
|
424
|
+
// Diminutive endings: -l
|
|
425
|
+
'{Lamed}'
|
|
426
|
+
( R1 consonant delete )
|
|
427
|
+
)
|
|
428
|
+
)
|
|
429
|
+
|
|
430
|
+
do (
|
|
431
|
+
[substring] among(
|
|
432
|
+
// Adjective endings: -ig, -ik, -ish, -nik, -dik
|
|
433
|
+
'{Yud}{Giml}' '{Yud}{Kuf}' '{Yud}{Shin}' '{Nun}{Yud}{Kuf}' '{Dalet}{Yud}{Kuf}'
|
|
434
|
+
( R1 delete )
|
|
435
|
+
|
|
436
|
+
// Exceptions to above: -blik, -glik
|
|
437
|
+
'{Beys}{Lamed}{Yud}{Kuf}' '{Giml}{Lamed}{Yud}{Kuf}'
|
|
438
|
+
( true )
|
|
439
|
+
|
|
440
|
+
// Present participle endings: -ndik
|
|
441
|
+
'{Nun}{Dalet}{Yud}{Kuf}'
|
|
442
|
+
( R1 delete )
|
|
443
|
+
|
|
444
|
+
// Present participle ending -endik: delete if after a -ng, -nk, -n, -m, consonant+l, or vowel.
|
|
445
|
+
// Otherwise, delete just the -ndik part.
|
|
446
|
+
'{Ayen}{Nun}{Dalet}{Yud}{Kuf}'
|
|
447
|
+
( R1 delete )
|
|
448
|
+
)
|
|
449
|
+
)
|
|
450
|
+
|
|
451
|
+
do (repeat goto ( ['GE' or 'TSU'] delete ))
|
|
452
|
+
)
|
|
453
|
+
)
|
|
454
|
+
|
|
455
|
+
define stem as (
|
|
456
|
+
do prelude
|
|
457
|
+
do mark_regions
|
|
458
|
+
backwards
|
|
459
|
+
do standard_suffix
|
|
460
|
+
)
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
// ISO-8859-2 character mappings.
|
|
2
|
+
|
|
3
|
+
stringdef U+00A0 hex 'A0'
|
|
4
|
+
stringdef U+0104 hex 'A1'
|
|
5
|
+
stringdef U+02D8 hex 'A2'
|
|
6
|
+
stringdef U+0141 hex 'A3'
|
|
7
|
+
stringdef U+00A4 hex 'A4'
|
|
8
|
+
stringdef U+013D hex 'A5'
|
|
9
|
+
stringdef U+015A hex 'A6'
|
|
10
|
+
stringdef U+00A7 hex 'A7'
|
|
11
|
+
stringdef U+00A8 hex 'A8'
|
|
12
|
+
stringdef U+0160 hex 'A9'
|
|
13
|
+
stringdef U+015E hex 'AA'
|
|
14
|
+
stringdef U+0164 hex 'AB'
|
|
15
|
+
stringdef U+0179 hex 'AC'
|
|
16
|
+
stringdef U+00AD hex 'AD'
|
|
17
|
+
stringdef U+017D hex 'AE'
|
|
18
|
+
stringdef U+017B hex 'AF'
|
|
19
|
+
stringdef U+00B0 hex 'B0'
|
|
20
|
+
stringdef U+0105 hex 'B1'
|
|
21
|
+
stringdef U+02DB hex 'B2'
|
|
22
|
+
stringdef U+0142 hex 'B3'
|
|
23
|
+
stringdef U+00B4 hex 'B4'
|
|
24
|
+
stringdef U+013E hex 'B5'
|
|
25
|
+
stringdef U+015B hex 'B6'
|
|
26
|
+
stringdef U+02C7 hex 'B7'
|
|
27
|
+
stringdef U+00B8 hex 'B8'
|
|
28
|
+
stringdef U+0161 hex 'B9'
|
|
29
|
+
stringdef U+015F hex 'BA'
|
|
30
|
+
stringdef U+0165 hex 'BB'
|
|
31
|
+
stringdef U+017A hex 'BC'
|
|
32
|
+
stringdef U+02DD hex 'BD'
|
|
33
|
+
stringdef U+017E hex 'BE'
|
|
34
|
+
stringdef U+017C hex 'BF'
|
|
35
|
+
stringdef U+0154 hex 'C0'
|
|
36
|
+
stringdef U+00C1 hex 'C1'
|
|
37
|
+
stringdef U+00C2 hex 'C2'
|
|
38
|
+
stringdef U+0102 hex 'C3'
|
|
39
|
+
stringdef U+00C4 hex 'C4'
|
|
40
|
+
stringdef U+0139 hex 'C5'
|
|
41
|
+
stringdef U+0106 hex 'C6'
|
|
42
|
+
stringdef U+00C7 hex 'C7'
|
|
43
|
+
stringdef U+010C hex 'C8'
|
|
44
|
+
stringdef U+00C9 hex 'C9'
|
|
45
|
+
stringdef U+0118 hex 'CA'
|
|
46
|
+
stringdef U+00CB hex 'CB'
|
|
47
|
+
stringdef U+011A hex 'CC'
|
|
48
|
+
stringdef U+00CD hex 'CD'
|
|
49
|
+
stringdef U+00CE hex 'CE'
|
|
50
|
+
stringdef U+010E hex 'CF'
|
|
51
|
+
stringdef U+0110 hex 'D0'
|
|
52
|
+
stringdef U+0143 hex 'D1'
|
|
53
|
+
stringdef U+0147 hex 'D2'
|
|
54
|
+
stringdef U+00D3 hex 'D3'
|
|
55
|
+
stringdef U+00D4 hex 'D4'
|
|
56
|
+
stringdef U+0150 hex 'D5'
|
|
57
|
+
stringdef U+00D6 hex 'D6'
|
|
58
|
+
stringdef U+00D7 hex 'D7'
|
|
59
|
+
stringdef U+0158 hex 'D8'
|
|
60
|
+
stringdef U+016E hex 'D9'
|
|
61
|
+
stringdef U+00DA hex 'DA'
|
|
62
|
+
stringdef U+0170 hex 'DB'
|
|
63
|
+
stringdef U+00DC hex 'DC'
|
|
64
|
+
stringdef U+00DD hex 'DD'
|
|
65
|
+
stringdef U+0162 hex 'DE'
|
|
66
|
+
stringdef U+00DF hex 'DF'
|
|
67
|
+
stringdef U+0155 hex 'E0'
|
|
68
|
+
stringdef U+00E1 hex 'E1'
|
|
69
|
+
stringdef U+00E2 hex 'E2'
|
|
70
|
+
stringdef U+0103 hex 'E3'
|
|
71
|
+
stringdef U+00E4 hex 'E4'
|
|
72
|
+
stringdef U+013A hex 'E5'
|
|
73
|
+
stringdef U+0107 hex 'E6'
|
|
74
|
+
stringdef U+00E7 hex 'E7'
|
|
75
|
+
stringdef U+010D hex 'E8'
|
|
76
|
+
stringdef U+00E9 hex 'E9'
|
|
77
|
+
stringdef U+0119 hex 'EA'
|
|
78
|
+
stringdef U+00EB hex 'EB'
|
|
79
|
+
stringdef U+011B hex 'EC'
|
|
80
|
+
stringdef U+00ED hex 'ED'
|
|
81
|
+
stringdef U+00EE hex 'EE'
|
|
82
|
+
stringdef U+010F hex 'EF'
|
|
83
|
+
stringdef U+0111 hex 'F0'
|
|
84
|
+
stringdef U+0144 hex 'F1'
|
|
85
|
+
stringdef U+0148 hex 'F2'
|
|
86
|
+
stringdef U+00F3 hex 'F3'
|
|
87
|
+
stringdef U+00F4 hex 'F4'
|
|
88
|
+
stringdef U+0151 hex 'F5'
|
|
89
|
+
stringdef U+00F6 hex 'F6'
|
|
90
|
+
stringdef U+00F7 hex 'F7'
|
|
91
|
+
stringdef U+0159 hex 'F8'
|
|
92
|
+
stringdef U+016F hex 'F9'
|
|
93
|
+
stringdef U+00FA hex 'FA'
|
|
94
|
+
stringdef U+0171 hex 'FB'
|
|
95
|
+
stringdef U+00FC hex 'FC'
|
|
96
|
+
stringdef U+00FD hex 'FD'
|
|
97
|
+
stringdef U+0163 hex 'FE'
|
|
98
|
+
stringdef U+02D9 hex 'FF'
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
// KOI8-R character mappings.
|
|
2
|
+
|
|
3
|
+
stringdef U+00A0 hex '9A'
|
|
4
|
+
stringdef U+00A9 hex 'BF'
|
|
5
|
+
stringdef U+00B0 hex '9C'
|
|
6
|
+
stringdef U+00B2 hex '9D'
|
|
7
|
+
stringdef U+00B7 hex '9E'
|
|
8
|
+
stringdef U+00F7 hex '9F'
|
|
9
|
+
stringdef U+0401 hex 'B3'
|
|
10
|
+
stringdef U+0410 hex 'E1'
|
|
11
|
+
stringdef U+0411 hex 'E2'
|
|
12
|
+
stringdef U+0412 hex 'F7'
|
|
13
|
+
stringdef U+0413 hex 'E7'
|
|
14
|
+
stringdef U+0414 hex 'E4'
|
|
15
|
+
stringdef U+0415 hex 'E5'
|
|
16
|
+
stringdef U+0416 hex 'F6'
|
|
17
|
+
stringdef U+0417 hex 'FA'
|
|
18
|
+
stringdef U+0418 hex 'E9'
|
|
19
|
+
stringdef U+0419 hex 'EA'
|
|
20
|
+
stringdef U+041A hex 'EB'
|
|
21
|
+
stringdef U+041B hex 'EC'
|
|
22
|
+
stringdef U+041C hex 'ED'
|
|
23
|
+
stringdef U+041D hex 'EE'
|
|
24
|
+
stringdef U+041E hex 'EF'
|
|
25
|
+
stringdef U+041F hex 'F0'
|
|
26
|
+
stringdef U+0420 hex 'F2'
|
|
27
|
+
stringdef U+0421 hex 'F3'
|
|
28
|
+
stringdef U+0422 hex 'F4'
|
|
29
|
+
stringdef U+0423 hex 'F5'
|
|
30
|
+
stringdef U+0424 hex 'E6'
|
|
31
|
+
stringdef U+0425 hex 'E8'
|
|
32
|
+
stringdef U+0426 hex 'E3'
|
|
33
|
+
stringdef U+0427 hex 'FE'
|
|
34
|
+
stringdef U+0428 hex 'FB'
|
|
35
|
+
stringdef U+0429 hex 'FD'
|
|
36
|
+
stringdef U+042A hex 'FF'
|
|
37
|
+
stringdef U+042B hex 'F9'
|
|
38
|
+
stringdef U+042C hex 'F8'
|
|
39
|
+
stringdef U+042D hex 'FC'
|
|
40
|
+
stringdef U+042E hex 'E0'
|
|
41
|
+
stringdef U+042F hex 'F1'
|
|
42
|
+
stringdef U+0430 hex 'C1'
|
|
43
|
+
stringdef U+0431 hex 'C2'
|
|
44
|
+
stringdef U+0432 hex 'D7'
|
|
45
|
+
stringdef U+0433 hex 'C7'
|
|
46
|
+
stringdef U+0434 hex 'C4'
|
|
47
|
+
stringdef U+0435 hex 'C5'
|
|
48
|
+
stringdef U+0436 hex 'D6'
|
|
49
|
+
stringdef U+0437 hex 'DA'
|
|
50
|
+
stringdef U+0438 hex 'C9'
|
|
51
|
+
stringdef U+0439 hex 'CA'
|
|
52
|
+
stringdef U+043A hex 'CB'
|
|
53
|
+
stringdef U+043B hex 'CC'
|
|
54
|
+
stringdef U+043C hex 'CD'
|
|
55
|
+
stringdef U+043D hex 'CE'
|
|
56
|
+
stringdef U+043E hex 'CF'
|
|
57
|
+
stringdef U+043F hex 'D0'
|
|
58
|
+
stringdef U+0440 hex 'D2'
|
|
59
|
+
stringdef U+0441 hex 'D3'
|
|
60
|
+
stringdef U+0442 hex 'D4'
|
|
61
|
+
stringdef U+0443 hex 'D5'
|
|
62
|
+
stringdef U+0444 hex 'C6'
|
|
63
|
+
stringdef U+0445 hex 'C8'
|
|
64
|
+
stringdef U+0446 hex 'C3'
|
|
65
|
+
stringdef U+0447 hex 'DE'
|
|
66
|
+
stringdef U+0448 hex 'DB'
|
|
67
|
+
stringdef U+0449 hex 'DD'
|
|
68
|
+
stringdef U+044A hex 'DF'
|
|
69
|
+
stringdef U+044B hex 'D9'
|
|
70
|
+
stringdef U+044C hex 'D8'
|
|
71
|
+
stringdef U+044D hex 'DC'
|
|
72
|
+
stringdef U+044E hex 'C0'
|
|
73
|
+
stringdef U+044F hex 'D1'
|
|
74
|
+
stringdef U+0451 hex 'A3'
|