language_detector 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +24 -0
- data/Rakefile +18 -0
- data/VERSION +1 -0
- data/lib/language_detector.rb +232 -0
- data/lib/model-fm.yml +52504 -0
- data/lib/model-tc.yml +53985 -0
- data/lib/textcat_ngrams/afrikaans.lm +400 -0
- data/lib/textcat_ngrams/albanian.lm +400 -0
- data/lib/textcat_ngrams/amharic-utf.lm +400 -0
- data/lib/textcat_ngrams/arabic-iso8859_6.lm +400 -0
- data/lib/textcat_ngrams/arabic-windows1256.lm +400 -0
- data/lib/textcat_ngrams/armenian.lm +400 -0
- data/lib/textcat_ngrams/basque.lm +400 -0
- data/lib/textcat_ngrams/belarus-windows1251.lm +400 -0
- data/lib/textcat_ngrams/bosnian.lm +400 -0
- data/lib/textcat_ngrams/breton.lm +400 -0
- data/lib/textcat_ngrams/bulgarian-iso8859_5.lm +400 -0
- data/lib/textcat_ngrams/catalan.lm +400 -0
- data/lib/textcat_ngrams/chinese-big5.lm +400 -0
- data/lib/textcat_ngrams/chinese-gb2312.lm +400 -0
- data/lib/textcat_ngrams/croatian-ascii.lm +400 -0
- data/lib/textcat_ngrams/czech-iso8859_2.lm +400 -0
- data/lib/textcat_ngrams/danish.lm +400 -0
- data/lib/textcat_ngrams/dutch.lm +400 -0
- data/lib/textcat_ngrams/english.lm +400 -0
- data/lib/textcat_ngrams/esperanto.lm +400 -0
- data/lib/textcat_ngrams/estonian.lm +400 -0
- data/lib/textcat_ngrams/finnish.lm +400 -0
- data/lib/textcat_ngrams/french.lm +400 -0
- data/lib/textcat_ngrams/frisian.lm +400 -0
- data/lib/textcat_ngrams/georgian.lm +400 -0
- data/lib/textcat_ngrams/german.lm +400 -0
- data/lib/textcat_ngrams/greek-iso8859-7.lm +400 -0
- data/lib/textcat_ngrams/hebrew-iso8859_8.lm +400 -0
- data/lib/textcat_ngrams/hindi.lm +400 -0
- data/lib/textcat_ngrams/hungarian.lm +400 -0
- data/lib/textcat_ngrams/icelandic.lm +400 -0
- data/lib/textcat_ngrams/indonesian.lm +400 -0
- data/lib/textcat_ngrams/irish.lm +400 -0
- data/lib/textcat_ngrams/italian.lm +400 -0
- data/lib/textcat_ngrams/japanese-euc_jp.lm +400 -0
- data/lib/textcat_ngrams/japanese-shift_jis.lm +400 -0
- data/lib/textcat_ngrams/korean.lm +400 -0
- data/lib/textcat_ngrams/latin.lm +400 -0
- data/lib/textcat_ngrams/latvian.lm +400 -0
- data/lib/textcat_ngrams/lithuanian.lm +400 -0
- data/lib/textcat_ngrams/malay.lm +400 -0
- data/lib/textcat_ngrams/manx.lm +400 -0
- data/lib/textcat_ngrams/marathi.lm +400 -0
- data/lib/textcat_ngrams/mingo.lm +400 -0
- data/lib/textcat_ngrams/nepali.lm +400 -0
- data/lib/textcat_ngrams/norwegian.lm +400 -0
- data/lib/textcat_ngrams/persian.lm +400 -0
- data/lib/textcat_ngrams/polish.lm +400 -0
- data/lib/textcat_ngrams/portuguese.lm +400 -0
- data/lib/textcat_ngrams/quechua.lm +400 -0
- data/lib/textcat_ngrams/romanian.lm +400 -0
- data/lib/textcat_ngrams/rumantsch.lm +400 -0
- data/lib/textcat_ngrams/russian-iso8859_5.lm +400 -0
- data/lib/textcat_ngrams/russian-koi8_r.lm +400 -0
- data/lib/textcat_ngrams/russian-windows1251.lm +400 -0
- data/lib/textcat_ngrams/sanskrit.lm +400 -0
- data/lib/textcat_ngrams/scots.lm +400 -0
- data/lib/textcat_ngrams/scots_gaelic.lm +400 -0
- data/lib/textcat_ngrams/serbian-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovak-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovak-windows1250.lm +400 -0
- data/lib/textcat_ngrams/slovenian-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovenian-iso8859_2.lm +400 -0
- data/lib/textcat_ngrams/spanish.lm +400 -0
- data/lib/textcat_ngrams/swahili.lm +400 -0
- data/lib/textcat_ngrams/swedish.lm +400 -0
- data/lib/textcat_ngrams/tagalog.lm +400 -0
- data/lib/textcat_ngrams/tamil.lm +400 -0
- data/lib/textcat_ngrams/thai.lm +400 -0
- data/lib/textcat_ngrams/turkish.lm +400 -0
- data/lib/textcat_ngrams/ukrainian-koi8_u.lm +400 -0
- data/lib/textcat_ngrams/vietnamese.lm +400 -0
- data/lib/textcat_ngrams/welsh.lm +400 -0
- data/lib/textcat_ngrams/yiddish-utf.lm +400 -0
- data/lib/training_data/ar-utf8.txt +54 -0
- data/lib/training_data/bg-utf8.txt +26 -0
- data/lib/training_data/cs-utf8.txt +48 -0
- data/lib/training_data/da-utf8.txt +159 -0
- data/lib/training_data/de-utf8.txt +569 -0
- data/lib/training_data/el-utf8.txt +48 -0
- data/lib/training_data/en-utf8.txt +81 -0
- data/lib/training_data/es-utf8.txt +185 -0
- data/lib/training_data/et-utf8.txt +50 -0
- data/lib/training_data/fa-utf8.txt +42 -0
- data/lib/training_data/fi-utf8.txt +83 -0
- data/lib/training_data/fr-utf8.txt +191 -0
- data/lib/training_data/fy-utf8.txt +22 -0
- data/lib/training_data/ga-utf8.txt +109 -0
- data/lib/training_data/he-utf8.txt +116 -0
- data/lib/training_data/hi-utf8.txt +49 -0
- data/lib/training_data/hr-utf8.txt +80 -0
- data/lib/training_data/hu-utf8.txt +87 -0
- data/lib/training_data/io-utf8.txt +41 -0
- data/lib/training_data/is-utf8.txt +94 -0
- data/lib/training_data/it-utf8.txt +228 -0
- data/lib/training_data/ja-utf8.txt +200 -0
- data/lib/training_data/ko-utf8.txt +147 -0
- data/lib/training_data/nl-utf8.txt +215 -0
- data/lib/training_data/no-utf8.txt +281 -0
- data/lib/training_data/pl-utf8.txt +120 -0
- data/lib/training_data/pt-utf8.txt +214 -0
- data/lib/training_data/ro-utf8.txt +66 -0
- data/lib/training_data/ru-utf8.txt +310 -0
- data/lib/training_data/sl-utf8.txt +263 -0
- data/lib/training_data/sv-utf8.txt +174 -0
- data/lib/training_data/th-utf8.txt +49 -0
- data/lib/training_data/tk-utf8.txt +101 -0
- data/lib/training_data/todo/af.txt +114 -0
- data/lib/training_data/todo/amharic-utf.txt +95 -0
- data/lib/training_data/todo/arabic-windows1256.txt +157 -0
- data/lib/training_data/todo/armenian.txt +86 -0
- data/lib/training_data/todo/basque.txt +136 -0
- data/lib/training_data/todo/belarus-windows1251.txt +97 -0
- data/lib/training_data/todo/bosnian.txt +97 -0
- data/lib/training_data/todo/breton.txt +159 -0
- data/lib/training_data/todo/bulgarian-iso8859_5.txt +115 -0
- data/lib/training_data/todo/catalan.txt +93 -0
- data/lib/training_data/todo/croatian-ascii.txt +104 -0
- data/lib/training_data/todo/esperanto.txt +95 -0
- data/lib/training_data/todo/estonian.txt +218 -0
- data/lib/training_data/todo/frisian.txt +99 -0
- data/lib/training_data/todo/georgian.txt +86 -0
- data/lib/training_data/todo/greek-iso8859-7.txt +139 -0
- data/lib/training_data/todo/hawaian.txt +108 -0
- data/lib/training_data/todo/hebrew-iso8859_8.txt +79 -0
- data/lib/training_data/todo/hindi.txt +77 -0
- data/lib/training_data/todo/hungarian.txt +102 -0
- data/lib/training_data/todo/icelandic.txt +131 -0
- data/lib/training_data/todo/indonesian.txt +93 -0
- data/lib/training_data/todo/irish.txt +209 -0
- data/lib/training_data/todo/latin.txt +120 -0
- data/lib/training_data/todo/latvian.txt +126 -0
- data/lib/training_data/todo/lithuanian.txt +99 -0
- data/lib/training_data/todo/malay.txt +108 -0
- data/lib/training_data/todo/manx.txt +78 -0
- data/lib/training_data/todo/marathi.txt +100 -0
- data/lib/training_data/todo/mf.txt +100 -0
- data/lib/training_data/todo/middle_frisian.txt +102 -0
- data/lib/training_data/todo/mingo.txt +146 -0
- data/lib/training_data/todo/nepali.txt +131 -0
- data/lib/training_data/todo/persian.txt +73 -0
- data/lib/training_data/todo/quechua.txt +108 -0
- data/lib/training_data/todo/romanian.txt +103 -0
- data/lib/training_data/todo/rumantsch.txt +110 -0
- data/lib/training_data/todo/sanskrit.txt +135 -0
- data/lib/training_data/todo/scots.txt +490 -0
- data/lib/training_data/todo/scots_gaelic.txt +93 -0
- data/lib/training_data/todo/serbian-ascii.txt +121 -0
- data/lib/training_data/todo/slovak-ascii.txt +102 -0
- data/lib/training_data/todo/slovak-windows1250.txt +115 -0
- data/lib/training_data/todo/slovenian-ascii.txt +100 -0
- data/lib/training_data/todo/slovenian-iso8859_2.txt +96 -0
- data/lib/training_data/todo/sq.txt +110 -0
- data/lib/training_data/todo/swahili.txt +120 -0
- data/lib/training_data/todo/tagalog.txt +135 -0
- data/lib/training_data/todo/tamil.txt +123 -0
- data/lib/training_data/todo/turkish.txt +117 -0
- data/lib/training_data/todo/ukrainian-koi8_r.txt +214 -0
- data/lib/training_data/todo/vietnamese.txt +92 -0
- data/lib/training_data/todo/welsh.txt +148 -0
- data/lib/training_data/todo/yiddish-utf.txt +83 -0
- data/lib/training_data/uk-utf8.txt +75 -0
- data/lib/training_data/vi-utf8.txt +47 -0
- data/lib/training_data/zh-utf8.txt +228 -0
- data/test/language_detector_test.rb +78 -0
- metadata +232 -0
@@ -0,0 +1,400 @@
|
|
1
|
+
_ 20738
|
2
|
+
a 7004
|
3
|
+
e 5699
|
4
|
+
i 5321
|
5
|
+
s 4731
|
6
|
+
t 3769
|
7
|
+
l 3448
|
8
|
+
u 3446
|
9
|
+
n 2902
|
10
|
+
k 2584
|
11
|
+
d 2202
|
12
|
+
m 2043
|
13
|
+
a_ 1758
|
14
|
+
o 1684
|
15
|
+
r 1429
|
16
|
+
g 1174
|
17
|
+
v 1151
|
18
|
+
e_ 1139
|
19
|
+
i_ 1136
|
20
|
+
_k 1136
|
21
|
+
s_ 1077
|
22
|
+
h 1009
|
23
|
+
, 995
|
24
|
+
,_ 995
|
25
|
+
_t 953
|
26
|
+
p 953
|
27
|
+
j 940
|
28
|
+
� 900
|
29
|
+
is 896
|
30
|
+
st 851
|
31
|
+
se 841
|
32
|
+
_s 822
|
33
|
+
. 821
|
34
|
+
as 801
|
35
|
+
d_ 801
|
36
|
+
le 800
|
37
|
+
ta 794
|
38
|
+
in 793
|
39
|
+
_m 790
|
40
|
+
� 754
|
41
|
+
._ 753
|
42
|
+
t_ 746
|
43
|
+
ma 710
|
44
|
+
_p 680
|
45
|
+
si 677
|
46
|
+
_v 660
|
47
|
+
es 636
|
48
|
+
al 626
|
49
|
+
us 619
|
50
|
+
el 602
|
51
|
+
_o 596
|
52
|
+
_e 586
|
53
|
+
ja 580
|
54
|
+
_j 563
|
55
|
+
te 562
|
56
|
+
� 549
|
57
|
+
li 532
|
58
|
+
va 515
|
59
|
+
id 501
|
60
|
+
ol 498
|
61
|
+
tu 497
|
62
|
+
da 490
|
63
|
+
_n 480
|
64
|
+
ku 478
|
65
|
+
ud 459
|
66
|
+
nu 455
|
67
|
+
na 438
|
68
|
+
ei 432
|
69
|
+
ks 418
|
70
|
+
mi 411
|
71
|
+
ee 411
|
72
|
+
u_ 407
|
73
|
+
ka 400
|
74
|
+
n_ 394
|
75
|
+
b 394
|
76
|
+
ga 386
|
77
|
+
_l 384
|
78
|
+
_a 380
|
79
|
+
an 366
|
80
|
+
ja_ 365
|
81
|
+
et 358
|
82
|
+
me 358
|
83
|
+
l_ 350
|
84
|
+
at 348
|
85
|
+
la 341
|
86
|
+
ad 340
|
87
|
+
st_ 339
|
88
|
+
ne 336
|
89
|
+
ll 333
|
90
|
+
_ta 332
|
91
|
+
ra 330
|
92
|
+
_ja 328
|
93
|
+
ik 323
|
94
|
+
en 318
|
95
|
+
ni 308
|
96
|
+
ul 305
|
97
|
+
sa 302
|
98
|
+
_ol 302
|
99
|
+
nd 299
|
100
|
+
_ja_ 299
|
101
|
+
nud 296
|
102
|
+
ii 291
|
103
|
+
ko 286
|
104
|
+
_se 285
|
105
|
+
le_ 283
|
106
|
+
aa 281
|
107
|
+
is_ 281
|
108
|
+
gi 270
|
109
|
+
_te 269
|
110
|
+
ag 269
|
111
|
+
_va 268
|
112
|
+
_ku 267
|
113
|
+
ed 262
|
114
|
+
em 255
|
115
|
+
_mi 255
|
116
|
+
ma_ 247
|
117
|
+
ti 246
|
118
|
+
ri 245
|
119
|
+
_h 242
|
120
|
+
gu 239
|
121
|
+
id_ 238
|
122
|
+
ast 237
|
123
|
+
it 236
|
124
|
+
ga_ 236
|
125
|
+
un 232
|
126
|
+
de 230
|
127
|
+
ud_ 230
|
128
|
+
ha 230
|
129
|
+
ak 228
|
130
|
+
ah 228
|
131
|
+
uu 228
|
132
|
+
il 227
|
133
|
+
�i 226
|
134
|
+
as_ 223
|
135
|
+
ke 222
|
136
|
+
ar 220
|
137
|
+
a, 220
|
138
|
+
am 220
|
139
|
+
_ko 220
|
140
|
+
a,_ 220
|
141
|
+
_ka 220
|
142
|
+
ai 220
|
143
|
+
eg 216
|
144
|
+
sin 214
|
145
|
+
est 214
|
146
|
+
ui 214
|
147
|
+
he 214
|
148
|
+
ks_ 213
|
149
|
+
� 213
|
150
|
+
oo 213
|
151
|
+
ju 207
|
152
|
+
�r 205
|
153
|
+
ut 203
|
154
|
+
in_ 203
|
155
|
+
oli 201
|
156
|
+
ki 199
|
157
|
+
su 199
|
158
|
+
es_ 199
|
159
|
+
lt 198
|
160
|
+
ist 188
|
161
|
+
li_ 186
|
162
|
+
ea 186
|
163
|
+
vi 184
|
164
|
+
im 181
|
165
|
+
mu 181
|
166
|
+
se_ 180
|
167
|
+
ts 180
|
168
|
+
on 178
|
169
|
+
ise 178
|
170
|
+
ta_ 177
|
171
|
+
ek 176
|
172
|
+
_oli 176
|
173
|
+
sel 173
|
174
|
+
nud_ 173
|
175
|
+
_� 172
|
176
|
+
a. 171
|
177
|
+
nn 170
|
178
|
+
ema 169
|
179
|
+
ng 168
|
180
|
+
lu 168
|
181
|
+
ge 167
|
182
|
+
_si 166
|
183
|
+
_ei 165
|
184
|
+
_i 165
|
185
|
+
_ei_ 164
|
186
|
+
ei_ 164
|
187
|
+
_r 163
|
188
|
+
ole 161
|
189
|
+
pa 160
|
190
|
+
lle 160
|
191
|
+
a._ 160
|
192
|
+
ust 159
|
193
|
+
du 156
|
194
|
+
er 156
|
195
|
+
v� 153
|
196
|
+
da_ 153
|
197
|
+
min 152
|
198
|
+
et_ 151
|
199
|
+
d,_ 149
|
200
|
+
_M 149
|
201
|
+
ht 149
|
202
|
+
d, 149
|
203
|
+
M 149
|
204
|
+
kui 148
|
205
|
+
_et 147
|
206
|
+
K 147
|
207
|
+
_K 146
|
208
|
+
pe 145
|
209
|
+
gi_ 145
|
210
|
+
_v� 145
|
211
|
+
or 144
|
212
|
+
_tu 142
|
213
|
+
lt_ 141
|
214
|
+
_ma 141
|
215
|
+
asi 140
|
216
|
+
ve 139
|
217
|
+
us_ 138
|
218
|
+
ig 136
|
219
|
+
sin_ 136
|
220
|
+
ur 135
|
221
|
+
_ta_ 134
|
222
|
+
di 134
|
223
|
+
_et_ 134
|
224
|
+
s,_ 132
|
225
|
+
tas 132
|
226
|
+
s, 132
|
227
|
+
_kui 131
|
228
|
+
sk 131
|
229
|
+
re 130
|
230
|
+
po 129
|
231
|
+
oli_ 129
|
232
|
+
om 129
|
233
|
+
�i 128
|
234
|
+
inu 128
|
235
|
+
_na 128
|
236
|
+
_oli_ 128
|
237
|
+
_sa 128
|
238
|
+
aj 128
|
239
|
+
mis 127
|
240
|
+
ui_ 127
|
241
|
+
_me 127
|
242
|
+
_pa 126
|
243
|
+
tus 125
|
244
|
+
pi 125
|
245
|
+
te_ 124
|
246
|
+
�l 123
|
247
|
+
- 123
|
248
|
+
est_ 122
|
249
|
+
_on 121
|
250
|
+
kk 121
|
251
|
+
tt 120
|
252
|
+
aga 119
|
253
|
+
na_ 119
|
254
|
+
_T 119
|
255
|
+
T 119
|
256
|
+
b_ 118
|
257
|
+
al_ 118
|
258
|
+
sta 118
|
259
|
+
_mu 116
|
260
|
+
_ju 116
|
261
|
+
ida 116
|
262
|
+
aks 116
|
263
|
+
gu_ 116
|
264
|
+
_ni 116
|
265
|
+
s. 116
|
266
|
+
ad_ 116
|
267
|
+
_pe 114
|
268
|
+
eks 114
|
269
|
+
ev 114
|
270
|
+
end 113
|
271
|
+
s._ 113
|
272
|
+
use 111
|
273
|
+
�ra 111
|
274
|
+
_po 111
|
275
|
+
_min 110
|
276
|
+
S 110
|
277
|
+
aja 110
|
278
|
+
_la 110
|
279
|
+
ele 109
|
280
|
+
el_ 108
|
281
|
+
on_ 108
|
282
|
+
ab 108
|
283
|
+
_S 108
|
284
|
+
av 107
|
285
|
+
ing 107
|
286
|
+
kui_ 106
|
287
|
+
_on_ 106
|
288
|
+
au 104
|
289
|
+
ne_ 104
|
290
|
+
ti_ 104
|
291
|
+
ell 103
|
292
|
+
ae 101
|
293
|
+
k� 101
|
294
|
+
ed_ 100
|
295
|
+
_ke 99
|
296
|
+
ata 99
|
297
|
+
iis 99
|
298
|
+
! 98
|
299
|
+
!_ 98
|
300
|
+
sid 98
|
301
|
+
nda 98
|
302
|
+
eh 98
|
303
|
+
lle_ 97
|
304
|
+
pu 97
|
305
|
+
�� 97
|
306
|
+
v�i 97
|
307
|
+
ine 96
|
308
|
+
t, 96
|
309
|
+
e,_ 96
|
310
|
+
ale 96
|
311
|
+
_v�i 96
|
312
|
+
t,_ 96
|
313
|
+
e, 96
|
314
|
+
eda 96
|
315
|
+
uk 95
|
316
|
+
ast_ 95
|
317
|
+
ld 95
|
318
|
+
? 94
|
319
|
+
_kui_ 94
|
320
|
+
_sel 93
|
321
|
+
_k� 93
|
322
|
+
tul 93
|
323
|
+
ega 93
|
324
|
+
lg 92
|
325
|
+
sii 92
|
326
|
+
val 92
|
327
|
+
e. 92
|
328
|
+
_su 92
|
329
|
+
ug 92
|
330
|
+
oh 92
|
331
|
+
k� 92
|
332
|
+
d. 91
|
333
|
+
ee_ 91
|
334
|
+
see 91
|
335
|
+
e._ 91
|
336
|
+
�� 91
|
337
|
+
oma 91
|
338
|
+
_ole 90
|
339
|
+
ses 90
|
340
|
+
stu 90
|
341
|
+
�t 90
|
342
|
+
�� 90
|
343
|
+
_om 89
|
344
|
+
me_ 89
|
345
|
+
ot 89
|
346
|
+
d._ 89
|
347
|
+
_sii 88
|
348
|
+
to 88
|
349
|
+
_en 87
|
350
|
+
atu 87
|
351
|
+
?_ 87
|
352
|
+
A 86
|
353
|
+
J 86
|
354
|
+
pea 86
|
355
|
+
j� 85
|
356
|
+
_A 85
|
357
|
+
_see 85
|
358
|
+
ime 84
|
359
|
+
_pi 84
|
360
|
+
_ha 84
|
361
|
+
m� 84
|
362
|
+
n� 84
|
363
|
+
_J 84
|
364
|
+
les 84
|
365
|
+
ste 84
|
366
|
+
kas 84
|
367
|
+
_� 84
|
368
|
+
v� 83
|
369
|
+
E 83
|
370
|
+
p� 83
|
371
|
+
_ve 83
|
372
|
+
_E 83
|
373
|
+
eis 82
|
374
|
+
_j� 81
|
375
|
+
_pea 81
|
376
|
+
_m� 80
|
377
|
+
um 80
|
378
|
+
_k� 80
|
379
|
+
iku 80
|
380
|
+
�d 80
|
381
|
+
all 79
|
382
|
+
eid 79
|
383
|
+
ba 79
|
384
|
+
_v� 79
|
385
|
+
ina 78
|
386
|
+
lj 78
|
387
|
+
sid_ 78
|
388
|
+
hu 78
|
389
|
+
tun 78
|
390
|
+
l� 78
|
391
|
+
_oma 77
|
392
|
+
i,_ 77
|
393
|
+
i, 77
|
394
|
+
agu 77
|
395
|
+
uh 77
|
396
|
+
lm 76
|
397
|
+
ras 76
|
398
|
+
ss 76
|
399
|
+
k� 76
|
400
|
+
ees 76
|
@@ -0,0 +1,400 @@
|
|
1
|
+
_ 19984
|
2
|
+
a 9133
|
3
|
+
i 8384
|
4
|
+
t 7797
|
5
|
+
e 6481
|
6
|
+
n 6431
|
7
|
+
s 5897
|
8
|
+
l 4504
|
9
|
+
o 4163
|
10
|
+
u 4106
|
11
|
+
k 4013
|
12
|
+
� 3354
|
13
|
+
n_ 2868
|
14
|
+
m 2569
|
15
|
+
a_ 1987
|
16
|
+
v 1905
|
17
|
+
r 1827
|
18
|
+
ta 1580
|
19
|
+
en 1553
|
20
|
+
is 1515
|
21
|
+
h 1508
|
22
|
+
y 1462
|
23
|
+
st 1390
|
24
|
+
in 1375
|
25
|
+
p 1342
|
26
|
+
j 1333
|
27
|
+
an 1139
|
28
|
+
si 1073
|
29
|
+
tt 1030
|
30
|
+
te 1008
|
31
|
+
en_ 982
|
32
|
+
_k 980
|
33
|
+
it 974
|
34
|
+
ll 947
|
35
|
+
aa 942
|
36
|
+
�_ 902
|
37
|
+
va 878
|
38
|
+
el 855
|
39
|
+
_t 851
|
40
|
+
ka 846
|
41
|
+
i_ 835
|
42
|
+
. 832
|
43
|
+
se 818
|
44
|
+
li 806
|
45
|
+
t� 804
|
46
|
+
oi 767
|
47
|
+
ai 744
|
48
|
+
._ 739
|
49
|
+
tu 734
|
50
|
+
_o 719
|
51
|
+
mi 715
|
52
|
+
al 703
|
53
|
+
on 684
|
54
|
+
d 681
|
55
|
+
_v 662
|
56
|
+
et 654
|
57
|
+
_j 641
|
58
|
+
t_ 635
|
59
|
+
ti 632
|
60
|
+
_m 628
|
61
|
+
_s 620
|
62
|
+
ja 616
|
63
|
+
ma 596
|
64
|
+
sa 595
|
65
|
+
la 582
|
66
|
+
ist 575
|
67
|
+
_e 565
|
68
|
+
to 565
|
69
|
+
ks 557
|
70
|
+
in_ 554
|
71
|
+
es 551
|
72
|
+
il 538
|
73
|
+
an_ 536
|
74
|
+
ki 527
|
75
|
+
, 525
|
76
|
+
ku 525
|
77
|
+
,_ 524
|
78
|
+
us 520
|
79
|
+
as 514
|
80
|
+
nt 512
|
81
|
+
ri 495
|
82
|
+
ke 494
|
83
|
+
at 491
|
84
|
+
_p 485
|
85
|
+
le 484
|
86
|
+
ik 483
|
87
|
+
ss 477
|
88
|
+
ut 469
|
89
|
+
� 469
|
90
|
+
sta 460
|
91
|
+
ee 459
|
92
|
+
uu 458
|
93
|
+
ol 457
|
94
|
+
ta_ 451
|
95
|
+
ne 445
|
96
|
+
�� 445
|
97
|
+
ei 443
|
98
|
+
uo 436
|
99
|
+
ko 433
|
100
|
+
un 430
|
101
|
+
lu 421
|
102
|
+
ii 420
|
103
|
+
e_ 418
|
104
|
+
nn 413
|
105
|
+
_h 412
|
106
|
+
ar 408
|
107
|
+
er 402
|
108
|
+
�n 396
|
109
|
+
ja_ 386
|
110
|
+
im 381
|
111
|
+
on_ 365
|
112
|
+
_va 363
|
113
|
+
aan 354
|
114
|
+
_a 352
|
115
|
+
me 350
|
116
|
+
ak 345
|
117
|
+
ssa 331
|
118
|
+
na 330
|
119
|
+
ie 329
|
120
|
+
pa 327
|
121
|
+
_ja 326
|
122
|
+
ia 325
|
123
|
+
t�_ 322
|
124
|
+
_l 319
|
125
|
+
vi 317
|
126
|
+
ise 316
|
127
|
+
tta 315
|
128
|
+
de 314
|
129
|
+
os 312
|
130
|
+
lli 309
|
131
|
+
_ja_ 304
|
132
|
+
jo 295
|
133
|
+
v� 290
|
134
|
+
su 289
|
135
|
+
au 287
|
136
|
+
lis 286
|
137
|
+
_on 285
|
138
|
+
s� 284
|
139
|
+
uk 280
|
140
|
+
am 280
|
141
|
+
ot 280
|
142
|
+
ty 275
|
143
|
+
ett 271
|
144
|
+
tt� 270
|
145
|
+
ni 269
|
146
|
+
l� 267
|
147
|
+
ksi 264
|
148
|
+
nk 264
|
149
|
+
ht 263
|
150
|
+
ul 261
|
151
|
+
ell 261
|
152
|
+
sa_ 259
|
153
|
+
ha 257
|
154
|
+
sen 257
|
155
|
+
a. 254
|
156
|
+
isi 253
|
157
|
+
ste 253
|
158
|
+
aan_ 252
|
159
|
+
_on_ 252
|
160
|
+
_ka 252
|
161
|
+
sk 251
|
162
|
+
kk 246
|
163
|
+
itt 245
|
164
|
+
ok 242
|
165
|
+
a._ 239
|
166
|
+
all 239
|
167
|
+
yt 239
|
168
|
+
m� 237
|
169
|
+
mu 237
|
170
|
+
av 237
|
171
|
+
_y 236
|
172
|
+
lla 233
|
173
|
+
taa 231
|
174
|
+
ais 231
|
175
|
+
een 230
|
176
|
+
K 230
|
177
|
+
lt 228
|
178
|
+
s_ 227
|
179
|
+
ast 227
|
180
|
+
iv 226
|
181
|
+
ssa_ 225
|
182
|
+
ra 225
|
183
|
+
- 223
|
184
|
+
kse 223
|
185
|
+
oit 220
|
186
|
+
om 220
|
187
|
+
T 219
|
188
|
+
_ku 218
|
189
|
+
�n_ 216
|
190
|
+
aa_ 214
|
191
|
+
at_ 214
|
192
|
+
tel 211
|
193
|
+
ui 210
|
194
|
+
si_ 208
|
195
|
+
rk 207
|
196
|
+
sta_ 207
|
197
|
+
_jo 203
|
198
|
+
k� 202
|
199
|
+
_K 201
|
200
|
+
est 200
|
201
|
+
em 200
|
202
|
+
he 199
|
203
|
+
_n 199
|
204
|
+
vo 198
|
205
|
+
_ta 196
|
206
|
+
eh 196
|
207
|
+
_ol 196
|
208
|
+
S 196
|
209
|
+
nta 196
|
210
|
+
_ko 194
|
211
|
+
je 194
|
212
|
+
st� 194
|
213
|
+
�r 193
|
214
|
+
ust 191
|
215
|
+
mis 191
|
216
|
+
ns 190
|
217
|
+
pu 189
|
218
|
+
nen 188
|
219
|
+
�t 188
|
220
|
+
toi 188
|
221
|
+
iin 187
|
222
|
+
ten 187
|
223
|
+
min 186
|
224
|
+
ista 185
|
225
|
+
hd 184
|
226
|
+
a, 184
|
227
|
+
a,_ 184
|
228
|
+
sen_ 183
|
229
|
+
E 182
|
230
|
+
lle 181
|
231
|
+
vat 179
|
232
|
+
ill 177
|
233
|
+
no 176
|
234
|
+
p� 176
|
235
|
+
lm 176
|
236
|
+
llis 175
|
237
|
+
n. 175
|
238
|
+
io 172
|
239
|
+
ine 171
|
240
|
+
n._ 170
|
241
|
+
pi 169
|
242
|
+
uks 168
|
243
|
+
ava 168
|
244
|
+
��n 166
|
245
|
+
nen_ 165
|
246
|
+
ah 165
|
247
|
+
_mu 164
|
248
|
+
tus 163
|
249
|
+
mm 162
|
250
|
+
_to 162
|
251
|
+
ek 160
|
252
|
+
int 159
|
253
|
+
_r 159
|
254
|
+
lin 158
|
255
|
+
oim 158
|
256
|
+
_T 158
|
257
|
+
A 158
|
258
|
+
imi 157
|
259
|
+
t� 157
|
260
|
+
la_ 157
|
261
|
+
j� 157
|
262
|
+
aj 156
|
263
|
+
yh 155
|
264
|
+
o_ 154
|
265
|
+
lo 154
|
266
|
+
oli 153
|
267
|
+
een_ 153
|
268
|
+
le_ 153
|
269
|
+
_si 153
|
270
|
+
g 152
|
271
|
+
aik 151
|
272
|
+
vat_ 150
|
273
|
+
L 149
|
274
|
+
ur 149
|
275
|
+
ti_ 149
|
276
|
+
sia 148
|
277
|
+
ite 147
|
278
|
+
inen 147
|
279
|
+
ain 146
|
280
|
+
sti 146
|
281
|
+
lla_ 146
|
282
|
+
ys 145
|
283
|
+
_mi 145
|
284
|
+
val 144
|
285
|
+
stu 144
|
286
|
+
�m 144
|
287
|
+
alli 143
|
288
|
+
pe 143
|
289
|
+
utt 142
|
290
|
+
et_ 141
|
291
|
+
_tu 141
|
292
|
+
eri 140
|
293
|
+
_E 140
|
294
|
+
: 140
|
295
|
+
nki 139
|
296
|
+
ir 139
|
297
|
+
ll� 138
|
298
|
+
up 138
|
299
|
+
�i 137
|
300
|
+
ama 137
|
301
|
+
_ha 135
|
302
|
+
id 135
|
303
|
+
_se 135
|
304
|
+
po 134
|
305
|
+
inen_ 134
|
306
|
+
tte 133
|
307
|
+
nna 133
|
308
|
+
ten_ 132
|
309
|
+
or 132
|
310
|
+
ts 131
|
311
|
+
n� 131
|
312
|
+
yk 131
|
313
|
+
�s 131
|
314
|
+
_S 130
|
315
|
+
ses 130
|
316
|
+
ve 130
|
317
|
+
ess 129
|
318
|
+
�l 129
|
319
|
+
ita 129
|
320
|
+
lai 129
|
321
|
+
H 129
|
322
|
+
van 127
|
323
|
+
�k 127
|
324
|
+
kin 127
|
325
|
+
N 127
|
326
|
+
_te 126
|
327
|
+
den 126
|
328
|
+
tee 126
|
329
|
+
P 126
|
330
|
+
kaa 126
|
331
|
+
iin_ 125
|
332
|
+
kun 125
|
333
|
+
ois 125
|
334
|
+
sit 125
|
335
|
+
oh 124
|
336
|
+
V 124
|
337
|
+
y� 124
|
338
|
+
�v 124
|
339
|
+
tav 124
|
340
|
+
voi 124
|
341
|
+
ia_ 123
|
342
|
+
I 123
|
343
|
+
oll 123
|
344
|
+
maa 122
|
345
|
+
ih 122
|
346
|
+
oj 122
|
347
|
+
rj 121
|
348
|
+
ro 121
|
349
|
+
ikk 120
|
350
|
+
so 120
|
351
|
+
oo 120
|
352
|
+
oimi 120
|
353
|
+
do 120
|
354
|
+
pp 119
|
355
|
+
M 119
|
356
|
+
_ei 118
|
357
|
+
toim 118
|
358
|
+
op 118
|
359
|
+
uut 118
|
360
|
+
tet 118
|
361
|
+
_i 118
|
362
|
+
_ma 117
|
363
|
+
vai 117
|
364
|
+
l�_ 116
|
365
|
+
u_ 116
|
366
|
+
sy 116
|
367
|
+
kau 116
|
368
|
+
utta 116
|
369
|
+
un_ 115
|
370
|
+
eu 115
|
371
|
+
ss� 115
|
372
|
+
tti 115
|
373
|
+
_sa 115
|
374
|
+
mp 114
|
375
|
+
eis 114
|
376
|
+
ka_ 112
|
377
|
+
ett� 112
|
378
|
+
taa_ 111
|
379
|
+
_et 111
|
380
|
+
hu 111
|
381
|
+
itu 111
|
382
|
+
suu 111
|
383
|
+
den_ 111
|
384
|
+
ksen 110
|
385
|
+
ap 110
|
386
|
+
_ke 110
|
387
|
+
uv 110
|
388
|
+
tam 110
|
389
|
+
yv 109
|
390
|
+
aup 109
|
391
|
+
st�_ 109
|
392
|
+
asta 109
|
393
|
+
�y 109
|
394
|
+
kan 108
|
395
|
+
nu 108
|
396
|
+
ukse 108
|
397
|
+
_toi 107
|
398
|
+
ien 107
|
399
|
+
hi 107
|
400
|
+
iss 107
|