language_detector 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +24 -0
- data/Rakefile +18 -0
- data/VERSION +1 -0
- data/lib/language_detector.rb +232 -0
- data/lib/model-fm.yml +52504 -0
- data/lib/model-tc.yml +53985 -0
- data/lib/textcat_ngrams/afrikaans.lm +400 -0
- data/lib/textcat_ngrams/albanian.lm +400 -0
- data/lib/textcat_ngrams/amharic-utf.lm +400 -0
- data/lib/textcat_ngrams/arabic-iso8859_6.lm +400 -0
- data/lib/textcat_ngrams/arabic-windows1256.lm +400 -0
- data/lib/textcat_ngrams/armenian.lm +400 -0
- data/lib/textcat_ngrams/basque.lm +400 -0
- data/lib/textcat_ngrams/belarus-windows1251.lm +400 -0
- data/lib/textcat_ngrams/bosnian.lm +400 -0
- data/lib/textcat_ngrams/breton.lm +400 -0
- data/lib/textcat_ngrams/bulgarian-iso8859_5.lm +400 -0
- data/lib/textcat_ngrams/catalan.lm +400 -0
- data/lib/textcat_ngrams/chinese-big5.lm +400 -0
- data/lib/textcat_ngrams/chinese-gb2312.lm +400 -0
- data/lib/textcat_ngrams/croatian-ascii.lm +400 -0
- data/lib/textcat_ngrams/czech-iso8859_2.lm +400 -0
- data/lib/textcat_ngrams/danish.lm +400 -0
- data/lib/textcat_ngrams/dutch.lm +400 -0
- data/lib/textcat_ngrams/english.lm +400 -0
- data/lib/textcat_ngrams/esperanto.lm +400 -0
- data/lib/textcat_ngrams/estonian.lm +400 -0
- data/lib/textcat_ngrams/finnish.lm +400 -0
- data/lib/textcat_ngrams/french.lm +400 -0
- data/lib/textcat_ngrams/frisian.lm +400 -0
- data/lib/textcat_ngrams/georgian.lm +400 -0
- data/lib/textcat_ngrams/german.lm +400 -0
- data/lib/textcat_ngrams/greek-iso8859-7.lm +400 -0
- data/lib/textcat_ngrams/hebrew-iso8859_8.lm +400 -0
- data/lib/textcat_ngrams/hindi.lm +400 -0
- data/lib/textcat_ngrams/hungarian.lm +400 -0
- data/lib/textcat_ngrams/icelandic.lm +400 -0
- data/lib/textcat_ngrams/indonesian.lm +400 -0
- data/lib/textcat_ngrams/irish.lm +400 -0
- data/lib/textcat_ngrams/italian.lm +400 -0
- data/lib/textcat_ngrams/japanese-euc_jp.lm +400 -0
- data/lib/textcat_ngrams/japanese-shift_jis.lm +400 -0
- data/lib/textcat_ngrams/korean.lm +400 -0
- data/lib/textcat_ngrams/latin.lm +400 -0
- data/lib/textcat_ngrams/latvian.lm +400 -0
- data/lib/textcat_ngrams/lithuanian.lm +400 -0
- data/lib/textcat_ngrams/malay.lm +400 -0
- data/lib/textcat_ngrams/manx.lm +400 -0
- data/lib/textcat_ngrams/marathi.lm +400 -0
- data/lib/textcat_ngrams/mingo.lm +400 -0
- data/lib/textcat_ngrams/nepali.lm +400 -0
- data/lib/textcat_ngrams/norwegian.lm +400 -0
- data/lib/textcat_ngrams/persian.lm +400 -0
- data/lib/textcat_ngrams/polish.lm +400 -0
- data/lib/textcat_ngrams/portuguese.lm +400 -0
- data/lib/textcat_ngrams/quechua.lm +400 -0
- data/lib/textcat_ngrams/romanian.lm +400 -0
- data/lib/textcat_ngrams/rumantsch.lm +400 -0
- data/lib/textcat_ngrams/russian-iso8859_5.lm +400 -0
- data/lib/textcat_ngrams/russian-koi8_r.lm +400 -0
- data/lib/textcat_ngrams/russian-windows1251.lm +400 -0
- data/lib/textcat_ngrams/sanskrit.lm +400 -0
- data/lib/textcat_ngrams/scots.lm +400 -0
- data/lib/textcat_ngrams/scots_gaelic.lm +400 -0
- data/lib/textcat_ngrams/serbian-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovak-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovak-windows1250.lm +400 -0
- data/lib/textcat_ngrams/slovenian-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovenian-iso8859_2.lm +400 -0
- data/lib/textcat_ngrams/spanish.lm +400 -0
- data/lib/textcat_ngrams/swahili.lm +400 -0
- data/lib/textcat_ngrams/swedish.lm +400 -0
- data/lib/textcat_ngrams/tagalog.lm +400 -0
- data/lib/textcat_ngrams/tamil.lm +400 -0
- data/lib/textcat_ngrams/thai.lm +400 -0
- data/lib/textcat_ngrams/turkish.lm +400 -0
- data/lib/textcat_ngrams/ukrainian-koi8_u.lm +400 -0
- data/lib/textcat_ngrams/vietnamese.lm +400 -0
- data/lib/textcat_ngrams/welsh.lm +400 -0
- data/lib/textcat_ngrams/yiddish-utf.lm +400 -0
- data/lib/training_data/ar-utf8.txt +54 -0
- data/lib/training_data/bg-utf8.txt +26 -0
- data/lib/training_data/cs-utf8.txt +48 -0
- data/lib/training_data/da-utf8.txt +159 -0
- data/lib/training_data/de-utf8.txt +569 -0
- data/lib/training_data/el-utf8.txt +48 -0
- data/lib/training_data/en-utf8.txt +81 -0
- data/lib/training_data/es-utf8.txt +185 -0
- data/lib/training_data/et-utf8.txt +50 -0
- data/lib/training_data/fa-utf8.txt +42 -0
- data/lib/training_data/fi-utf8.txt +83 -0
- data/lib/training_data/fr-utf8.txt +191 -0
- data/lib/training_data/fy-utf8.txt +22 -0
- data/lib/training_data/ga-utf8.txt +109 -0
- data/lib/training_data/he-utf8.txt +116 -0
- data/lib/training_data/hi-utf8.txt +49 -0
- data/lib/training_data/hr-utf8.txt +80 -0
- data/lib/training_data/hu-utf8.txt +87 -0
- data/lib/training_data/io-utf8.txt +41 -0
- data/lib/training_data/is-utf8.txt +94 -0
- data/lib/training_data/it-utf8.txt +228 -0
- data/lib/training_data/ja-utf8.txt +200 -0
- data/lib/training_data/ko-utf8.txt +147 -0
- data/lib/training_data/nl-utf8.txt +215 -0
- data/lib/training_data/no-utf8.txt +281 -0
- data/lib/training_data/pl-utf8.txt +120 -0
- data/lib/training_data/pt-utf8.txt +214 -0
- data/lib/training_data/ro-utf8.txt +66 -0
- data/lib/training_data/ru-utf8.txt +310 -0
- data/lib/training_data/sl-utf8.txt +263 -0
- data/lib/training_data/sv-utf8.txt +174 -0
- data/lib/training_data/th-utf8.txt +49 -0
- data/lib/training_data/tk-utf8.txt +101 -0
- data/lib/training_data/todo/af.txt +114 -0
- data/lib/training_data/todo/amharic-utf.txt +95 -0
- data/lib/training_data/todo/arabic-windows1256.txt +157 -0
- data/lib/training_data/todo/armenian.txt +86 -0
- data/lib/training_data/todo/basque.txt +136 -0
- data/lib/training_data/todo/belarus-windows1251.txt +97 -0
- data/lib/training_data/todo/bosnian.txt +97 -0
- data/lib/training_data/todo/breton.txt +159 -0
- data/lib/training_data/todo/bulgarian-iso8859_5.txt +115 -0
- data/lib/training_data/todo/catalan.txt +93 -0
- data/lib/training_data/todo/croatian-ascii.txt +104 -0
- data/lib/training_data/todo/esperanto.txt +95 -0
- data/lib/training_data/todo/estonian.txt +218 -0
- data/lib/training_data/todo/frisian.txt +99 -0
- data/lib/training_data/todo/georgian.txt +86 -0
- data/lib/training_data/todo/greek-iso8859-7.txt +139 -0
- data/lib/training_data/todo/hawaian.txt +108 -0
- data/lib/training_data/todo/hebrew-iso8859_8.txt +79 -0
- data/lib/training_data/todo/hindi.txt +77 -0
- data/lib/training_data/todo/hungarian.txt +102 -0
- data/lib/training_data/todo/icelandic.txt +131 -0
- data/lib/training_data/todo/indonesian.txt +93 -0
- data/lib/training_data/todo/irish.txt +209 -0
- data/lib/training_data/todo/latin.txt +120 -0
- data/lib/training_data/todo/latvian.txt +126 -0
- data/lib/training_data/todo/lithuanian.txt +99 -0
- data/lib/training_data/todo/malay.txt +108 -0
- data/lib/training_data/todo/manx.txt +78 -0
- data/lib/training_data/todo/marathi.txt +100 -0
- data/lib/training_data/todo/mf.txt +100 -0
- data/lib/training_data/todo/middle_frisian.txt +102 -0
- data/lib/training_data/todo/mingo.txt +146 -0
- data/lib/training_data/todo/nepali.txt +131 -0
- data/lib/training_data/todo/persian.txt +73 -0
- data/lib/training_data/todo/quechua.txt +108 -0
- data/lib/training_data/todo/romanian.txt +103 -0
- data/lib/training_data/todo/rumantsch.txt +110 -0
- data/lib/training_data/todo/sanskrit.txt +135 -0
- data/lib/training_data/todo/scots.txt +490 -0
- data/lib/training_data/todo/scots_gaelic.txt +93 -0
- data/lib/training_data/todo/serbian-ascii.txt +121 -0
- data/lib/training_data/todo/slovak-ascii.txt +102 -0
- data/lib/training_data/todo/slovak-windows1250.txt +115 -0
- data/lib/training_data/todo/slovenian-ascii.txt +100 -0
- data/lib/training_data/todo/slovenian-iso8859_2.txt +96 -0
- data/lib/training_data/todo/sq.txt +110 -0
- data/lib/training_data/todo/swahili.txt +120 -0
- data/lib/training_data/todo/tagalog.txt +135 -0
- data/lib/training_data/todo/tamil.txt +123 -0
- data/lib/training_data/todo/turkish.txt +117 -0
- data/lib/training_data/todo/ukrainian-koi8_r.txt +214 -0
- data/lib/training_data/todo/vietnamese.txt +92 -0
- data/lib/training_data/todo/welsh.txt +148 -0
- data/lib/training_data/todo/yiddish-utf.txt +83 -0
- data/lib/training_data/uk-utf8.txt +75 -0
- data/lib/training_data/vi-utf8.txt +47 -0
- data/lib/training_data/zh-utf8.txt +228 -0
- data/test/language_detector_test.rb +78 -0
- metadata +232 -0
@@ -0,0 +1,400 @@
|
|
1
|
+
_ 20738
|
2
|
+
a 7004
|
3
|
+
e 5699
|
4
|
+
i 5321
|
5
|
+
s 4731
|
6
|
+
t 3769
|
7
|
+
l 3448
|
8
|
+
u 3446
|
9
|
+
n 2902
|
10
|
+
k 2584
|
11
|
+
d 2202
|
12
|
+
m 2043
|
13
|
+
a_ 1758
|
14
|
+
o 1684
|
15
|
+
r 1429
|
16
|
+
g 1174
|
17
|
+
v 1151
|
18
|
+
e_ 1139
|
19
|
+
i_ 1136
|
20
|
+
_k 1136
|
21
|
+
s_ 1077
|
22
|
+
h 1009
|
23
|
+
, 995
|
24
|
+
,_ 995
|
25
|
+
_t 953
|
26
|
+
p 953
|
27
|
+
j 940
|
28
|
+
� 900
|
29
|
+
is 896
|
30
|
+
st 851
|
31
|
+
se 841
|
32
|
+
_s 822
|
33
|
+
. 821
|
34
|
+
as 801
|
35
|
+
d_ 801
|
36
|
+
le 800
|
37
|
+
ta 794
|
38
|
+
in 793
|
39
|
+
_m 790
|
40
|
+
� 754
|
41
|
+
._ 753
|
42
|
+
t_ 746
|
43
|
+
ma 710
|
44
|
+
_p 680
|
45
|
+
si 677
|
46
|
+
_v 660
|
47
|
+
es 636
|
48
|
+
al 626
|
49
|
+
us 619
|
50
|
+
el 602
|
51
|
+
_o 596
|
52
|
+
_e 586
|
53
|
+
ja 580
|
54
|
+
_j 563
|
55
|
+
te 562
|
56
|
+
� 549
|
57
|
+
li 532
|
58
|
+
va 515
|
59
|
+
id 501
|
60
|
+
ol 498
|
61
|
+
tu 497
|
62
|
+
da 490
|
63
|
+
_n 480
|
64
|
+
ku 478
|
65
|
+
ud 459
|
66
|
+
nu 455
|
67
|
+
na 438
|
68
|
+
ei 432
|
69
|
+
ks 418
|
70
|
+
mi 411
|
71
|
+
ee 411
|
72
|
+
u_ 407
|
73
|
+
ka 400
|
74
|
+
n_ 394
|
75
|
+
b 394
|
76
|
+
ga 386
|
77
|
+
_l 384
|
78
|
+
_a 380
|
79
|
+
an 366
|
80
|
+
ja_ 365
|
81
|
+
et 358
|
82
|
+
me 358
|
83
|
+
l_ 350
|
84
|
+
at 348
|
85
|
+
la 341
|
86
|
+
ad 340
|
87
|
+
st_ 339
|
88
|
+
ne 336
|
89
|
+
ll 333
|
90
|
+
_ta 332
|
91
|
+
ra 330
|
92
|
+
_ja 328
|
93
|
+
ik 323
|
94
|
+
en 318
|
95
|
+
ni 308
|
96
|
+
ul 305
|
97
|
+
sa 302
|
98
|
+
_ol 302
|
99
|
+
nd 299
|
100
|
+
_ja_ 299
|
101
|
+
nud 296
|
102
|
+
ii 291
|
103
|
+
ko 286
|
104
|
+
_se 285
|
105
|
+
le_ 283
|
106
|
+
aa 281
|
107
|
+
is_ 281
|
108
|
+
gi 270
|
109
|
+
_te 269
|
110
|
+
ag 269
|
111
|
+
_va 268
|
112
|
+
_ku 267
|
113
|
+
ed 262
|
114
|
+
em 255
|
115
|
+
_mi 255
|
116
|
+
ma_ 247
|
117
|
+
ti 246
|
118
|
+
ri 245
|
119
|
+
_h 242
|
120
|
+
gu 239
|
121
|
+
id_ 238
|
122
|
+
ast 237
|
123
|
+
it 236
|
124
|
+
ga_ 236
|
125
|
+
un 232
|
126
|
+
de 230
|
127
|
+
ud_ 230
|
128
|
+
ha 230
|
129
|
+
ak 228
|
130
|
+
ah 228
|
131
|
+
uu 228
|
132
|
+
il 227
|
133
|
+
�i 226
|
134
|
+
as_ 223
|
135
|
+
ke 222
|
136
|
+
ar 220
|
137
|
+
a, 220
|
138
|
+
am 220
|
139
|
+
_ko 220
|
140
|
+
a,_ 220
|
141
|
+
_ka 220
|
142
|
+
ai 220
|
143
|
+
eg 216
|
144
|
+
sin 214
|
145
|
+
est 214
|
146
|
+
ui 214
|
147
|
+
he 214
|
148
|
+
ks_ 213
|
149
|
+
� 213
|
150
|
+
oo 213
|
151
|
+
ju 207
|
152
|
+
�r 205
|
153
|
+
ut 203
|
154
|
+
in_ 203
|
155
|
+
oli 201
|
156
|
+
ki 199
|
157
|
+
su 199
|
158
|
+
es_ 199
|
159
|
+
lt 198
|
160
|
+
ist 188
|
161
|
+
li_ 186
|
162
|
+
ea 186
|
163
|
+
vi 184
|
164
|
+
im 181
|
165
|
+
mu 181
|
166
|
+
se_ 180
|
167
|
+
ts 180
|
168
|
+
on 178
|
169
|
+
ise 178
|
170
|
+
ta_ 177
|
171
|
+
ek 176
|
172
|
+
_oli 176
|
173
|
+
sel 173
|
174
|
+
nud_ 173
|
175
|
+
_� 172
|
176
|
+
a. 171
|
177
|
+
nn 170
|
178
|
+
ema 169
|
179
|
+
ng 168
|
180
|
+
lu 168
|
181
|
+
ge 167
|
182
|
+
_si 166
|
183
|
+
_ei 165
|
184
|
+
_i 165
|
185
|
+
_ei_ 164
|
186
|
+
ei_ 164
|
187
|
+
_r 163
|
188
|
+
ole 161
|
189
|
+
pa 160
|
190
|
+
lle 160
|
191
|
+
a._ 160
|
192
|
+
ust 159
|
193
|
+
du 156
|
194
|
+
er 156
|
195
|
+
v� 153
|
196
|
+
da_ 153
|
197
|
+
min 152
|
198
|
+
et_ 151
|
199
|
+
d,_ 149
|
200
|
+
_M 149
|
201
|
+
ht 149
|
202
|
+
d, 149
|
203
|
+
M 149
|
204
|
+
kui 148
|
205
|
+
_et 147
|
206
|
+
K 147
|
207
|
+
_K 146
|
208
|
+
pe 145
|
209
|
+
gi_ 145
|
210
|
+
_v� 145
|
211
|
+
or 144
|
212
|
+
_tu 142
|
213
|
+
lt_ 141
|
214
|
+
_ma 141
|
215
|
+
asi 140
|
216
|
+
ve 139
|
217
|
+
us_ 138
|
218
|
+
ig 136
|
219
|
+
sin_ 136
|
220
|
+
ur 135
|
221
|
+
_ta_ 134
|
222
|
+
di 134
|
223
|
+
_et_ 134
|
224
|
+
s,_ 132
|
225
|
+
tas 132
|
226
|
+
s, 132
|
227
|
+
_kui 131
|
228
|
+
sk 131
|
229
|
+
re 130
|
230
|
+
po 129
|
231
|
+
oli_ 129
|
232
|
+
om 129
|
233
|
+
�i 128
|
234
|
+
inu 128
|
235
|
+
_na 128
|
236
|
+
_oli_ 128
|
237
|
+
_sa 128
|
238
|
+
aj 128
|
239
|
+
mis 127
|
240
|
+
ui_ 127
|
241
|
+
_me 127
|
242
|
+
_pa 126
|
243
|
+
tus 125
|
244
|
+
pi 125
|
245
|
+
te_ 124
|
246
|
+
�l 123
|
247
|
+
- 123
|
248
|
+
est_ 122
|
249
|
+
_on 121
|
250
|
+
kk 121
|
251
|
+
tt 120
|
252
|
+
aga 119
|
253
|
+
na_ 119
|
254
|
+
_T 119
|
255
|
+
T 119
|
256
|
+
b_ 118
|
257
|
+
al_ 118
|
258
|
+
sta 118
|
259
|
+
_mu 116
|
260
|
+
_ju 116
|
261
|
+
ida 116
|
262
|
+
aks 116
|
263
|
+
gu_ 116
|
264
|
+
_ni 116
|
265
|
+
s. 116
|
266
|
+
ad_ 116
|
267
|
+
_pe 114
|
268
|
+
eks 114
|
269
|
+
ev 114
|
270
|
+
end 113
|
271
|
+
s._ 113
|
272
|
+
use 111
|
273
|
+
�ra 111
|
274
|
+
_po 111
|
275
|
+
_min 110
|
276
|
+
S 110
|
277
|
+
aja 110
|
278
|
+
_la 110
|
279
|
+
ele 109
|
280
|
+
el_ 108
|
281
|
+
on_ 108
|
282
|
+
ab 108
|
283
|
+
_S 108
|
284
|
+
av 107
|
285
|
+
ing 107
|
286
|
+
kui_ 106
|
287
|
+
_on_ 106
|
288
|
+
au 104
|
289
|
+
ne_ 104
|
290
|
+
ti_ 104
|
291
|
+
ell 103
|
292
|
+
ae 101
|
293
|
+
k� 101
|
294
|
+
ed_ 100
|
295
|
+
_ke 99
|
296
|
+
ata 99
|
297
|
+
iis 99
|
298
|
+
! 98
|
299
|
+
!_ 98
|
300
|
+
sid 98
|
301
|
+
nda 98
|
302
|
+
eh 98
|
303
|
+
lle_ 97
|
304
|
+
pu 97
|
305
|
+
�� 97
|
306
|
+
v�i 97
|
307
|
+
ine 96
|
308
|
+
t, 96
|
309
|
+
e,_ 96
|
310
|
+
ale 96
|
311
|
+
_v�i 96
|
312
|
+
t,_ 96
|
313
|
+
e, 96
|
314
|
+
eda 96
|
315
|
+
uk 95
|
316
|
+
ast_ 95
|
317
|
+
ld 95
|
318
|
+
? 94
|
319
|
+
_kui_ 94
|
320
|
+
_sel 93
|
321
|
+
_k� 93
|
322
|
+
tul 93
|
323
|
+
ega 93
|
324
|
+
lg 92
|
325
|
+
sii 92
|
326
|
+
val 92
|
327
|
+
e. 92
|
328
|
+
_su 92
|
329
|
+
ug 92
|
330
|
+
oh 92
|
331
|
+
k� 92
|
332
|
+
d. 91
|
333
|
+
ee_ 91
|
334
|
+
see 91
|
335
|
+
e._ 91
|
336
|
+
�� 91
|
337
|
+
oma 91
|
338
|
+
_ole 90
|
339
|
+
ses 90
|
340
|
+
stu 90
|
341
|
+
�t 90
|
342
|
+
�� 90
|
343
|
+
_om 89
|
344
|
+
me_ 89
|
345
|
+
ot 89
|
346
|
+
d._ 89
|
347
|
+
_sii 88
|
348
|
+
to 88
|
349
|
+
_en 87
|
350
|
+
atu 87
|
351
|
+
?_ 87
|
352
|
+
A 86
|
353
|
+
J 86
|
354
|
+
pea 86
|
355
|
+
j� 85
|
356
|
+
_A 85
|
357
|
+
_see 85
|
358
|
+
ime 84
|
359
|
+
_pi 84
|
360
|
+
_ha 84
|
361
|
+
m� 84
|
362
|
+
n� 84
|
363
|
+
_J 84
|
364
|
+
les 84
|
365
|
+
ste 84
|
366
|
+
kas 84
|
367
|
+
_� 84
|
368
|
+
v� 83
|
369
|
+
E 83
|
370
|
+
p� 83
|
371
|
+
_ve 83
|
372
|
+
_E 83
|
373
|
+
eis 82
|
374
|
+
_j� 81
|
375
|
+
_pea 81
|
376
|
+
_m� 80
|
377
|
+
um 80
|
378
|
+
_k� 80
|
379
|
+
iku 80
|
380
|
+
�d 80
|
381
|
+
all 79
|
382
|
+
eid 79
|
383
|
+
ba 79
|
384
|
+
_v� 79
|
385
|
+
ina 78
|
386
|
+
lj 78
|
387
|
+
sid_ 78
|
388
|
+
hu 78
|
389
|
+
tun 78
|
390
|
+
l� 78
|
391
|
+
_oma 77
|
392
|
+
i,_ 77
|
393
|
+
i, 77
|
394
|
+
agu 77
|
395
|
+
uh 77
|
396
|
+
lm 76
|
397
|
+
ras 76
|
398
|
+
ss 76
|
399
|
+
k� 76
|
400
|
+
ees 76
|
@@ -0,0 +1,400 @@
|
|
1
|
+
_ 19984
|
2
|
+
a 9133
|
3
|
+
i 8384
|
4
|
+
t 7797
|
5
|
+
e 6481
|
6
|
+
n 6431
|
7
|
+
s 5897
|
8
|
+
l 4504
|
9
|
+
o 4163
|
10
|
+
u 4106
|
11
|
+
k 4013
|
12
|
+
� 3354
|
13
|
+
n_ 2868
|
14
|
+
m 2569
|
15
|
+
a_ 1987
|
16
|
+
v 1905
|
17
|
+
r 1827
|
18
|
+
ta 1580
|
19
|
+
en 1553
|
20
|
+
is 1515
|
21
|
+
h 1508
|
22
|
+
y 1462
|
23
|
+
st 1390
|
24
|
+
in 1375
|
25
|
+
p 1342
|
26
|
+
j 1333
|
27
|
+
an 1139
|
28
|
+
si 1073
|
29
|
+
tt 1030
|
30
|
+
te 1008
|
31
|
+
en_ 982
|
32
|
+
_k 980
|
33
|
+
it 974
|
34
|
+
ll 947
|
35
|
+
aa 942
|
36
|
+
�_ 902
|
37
|
+
va 878
|
38
|
+
el 855
|
39
|
+
_t 851
|
40
|
+
ka 846
|
41
|
+
i_ 835
|
42
|
+
. 832
|
43
|
+
se 818
|
44
|
+
li 806
|
45
|
+
t� 804
|
46
|
+
oi 767
|
47
|
+
ai 744
|
48
|
+
._ 739
|
49
|
+
tu 734
|
50
|
+
_o 719
|
51
|
+
mi 715
|
52
|
+
al 703
|
53
|
+
on 684
|
54
|
+
d 681
|
55
|
+
_v 662
|
56
|
+
et 654
|
57
|
+
_j 641
|
58
|
+
t_ 635
|
59
|
+
ti 632
|
60
|
+
_m 628
|
61
|
+
_s 620
|
62
|
+
ja 616
|
63
|
+
ma 596
|
64
|
+
sa 595
|
65
|
+
la 582
|
66
|
+
ist 575
|
67
|
+
_e 565
|
68
|
+
to 565
|
69
|
+
ks 557
|
70
|
+
in_ 554
|
71
|
+
es 551
|
72
|
+
il 538
|
73
|
+
an_ 536
|
74
|
+
ki 527
|
75
|
+
, 525
|
76
|
+
ku 525
|
77
|
+
,_ 524
|
78
|
+
us 520
|
79
|
+
as 514
|
80
|
+
nt 512
|
81
|
+
ri 495
|
82
|
+
ke 494
|
83
|
+
at 491
|
84
|
+
_p 485
|
85
|
+
le 484
|
86
|
+
ik 483
|
87
|
+
ss 477
|
88
|
+
ut 469
|
89
|
+
� 469
|
90
|
+
sta 460
|
91
|
+
ee 459
|
92
|
+
uu 458
|
93
|
+
ol 457
|
94
|
+
ta_ 451
|
95
|
+
ne 445
|
96
|
+
�� 445
|
97
|
+
ei 443
|
98
|
+
uo 436
|
99
|
+
ko 433
|
100
|
+
un 430
|
101
|
+
lu 421
|
102
|
+
ii 420
|
103
|
+
e_ 418
|
104
|
+
nn 413
|
105
|
+
_h 412
|
106
|
+
ar 408
|
107
|
+
er 402
|
108
|
+
�n 396
|
109
|
+
ja_ 386
|
110
|
+
im 381
|
111
|
+
on_ 365
|
112
|
+
_va 363
|
113
|
+
aan 354
|
114
|
+
_a 352
|
115
|
+
me 350
|
116
|
+
ak 345
|
117
|
+
ssa 331
|
118
|
+
na 330
|
119
|
+
ie 329
|
120
|
+
pa 327
|
121
|
+
_ja 326
|
122
|
+
ia 325
|
123
|
+
t�_ 322
|
124
|
+
_l 319
|
125
|
+
vi 317
|
126
|
+
ise 316
|
127
|
+
tta 315
|
128
|
+
de 314
|
129
|
+
os 312
|
130
|
+
lli 309
|
131
|
+
_ja_ 304
|
132
|
+
jo 295
|
133
|
+
v� 290
|
134
|
+
su 289
|
135
|
+
au 287
|
136
|
+
lis 286
|
137
|
+
_on 285
|
138
|
+
s� 284
|
139
|
+
uk 280
|
140
|
+
am 280
|
141
|
+
ot 280
|
142
|
+
ty 275
|
143
|
+
ett 271
|
144
|
+
tt� 270
|
145
|
+
ni 269
|
146
|
+
l� 267
|
147
|
+
ksi 264
|
148
|
+
nk 264
|
149
|
+
ht 263
|
150
|
+
ul 261
|
151
|
+
ell 261
|
152
|
+
sa_ 259
|
153
|
+
ha 257
|
154
|
+
sen 257
|
155
|
+
a. 254
|
156
|
+
isi 253
|
157
|
+
ste 253
|
158
|
+
aan_ 252
|
159
|
+
_on_ 252
|
160
|
+
_ka 252
|
161
|
+
sk 251
|
162
|
+
kk 246
|
163
|
+
itt 245
|
164
|
+
ok 242
|
165
|
+
a._ 239
|
166
|
+
all 239
|
167
|
+
yt 239
|
168
|
+
m� 237
|
169
|
+
mu 237
|
170
|
+
av 237
|
171
|
+
_y 236
|
172
|
+
lla 233
|
173
|
+
taa 231
|
174
|
+
ais 231
|
175
|
+
een 230
|
176
|
+
K 230
|
177
|
+
lt 228
|
178
|
+
s_ 227
|
179
|
+
ast 227
|
180
|
+
iv 226
|
181
|
+
ssa_ 225
|
182
|
+
ra 225
|
183
|
+
- 223
|
184
|
+
kse 223
|
185
|
+
oit 220
|
186
|
+
om 220
|
187
|
+
T 219
|
188
|
+
_ku 218
|
189
|
+
�n_ 216
|
190
|
+
aa_ 214
|
191
|
+
at_ 214
|
192
|
+
tel 211
|
193
|
+
ui 210
|
194
|
+
si_ 208
|
195
|
+
rk 207
|
196
|
+
sta_ 207
|
197
|
+
_jo 203
|
198
|
+
k� 202
|
199
|
+
_K 201
|
200
|
+
est 200
|
201
|
+
em 200
|
202
|
+
he 199
|
203
|
+
_n 199
|
204
|
+
vo 198
|
205
|
+
_ta 196
|
206
|
+
eh 196
|
207
|
+
_ol 196
|
208
|
+
S 196
|
209
|
+
nta 196
|
210
|
+
_ko 194
|
211
|
+
je 194
|
212
|
+
st� 194
|
213
|
+
�r 193
|
214
|
+
ust 191
|
215
|
+
mis 191
|
216
|
+
ns 190
|
217
|
+
pu 189
|
218
|
+
nen 188
|
219
|
+
�t 188
|
220
|
+
toi 188
|
221
|
+
iin 187
|
222
|
+
ten 187
|
223
|
+
min 186
|
224
|
+
ista 185
|
225
|
+
hd 184
|
226
|
+
a, 184
|
227
|
+
a,_ 184
|
228
|
+
sen_ 183
|
229
|
+
E 182
|
230
|
+
lle 181
|
231
|
+
vat 179
|
232
|
+
ill 177
|
233
|
+
no 176
|
234
|
+
p� 176
|
235
|
+
lm 176
|
236
|
+
llis 175
|
237
|
+
n. 175
|
238
|
+
io 172
|
239
|
+
ine 171
|
240
|
+
n._ 170
|
241
|
+
pi 169
|
242
|
+
uks 168
|
243
|
+
ava 168
|
244
|
+
��n 166
|
245
|
+
nen_ 165
|
246
|
+
ah 165
|
247
|
+
_mu 164
|
248
|
+
tus 163
|
249
|
+
mm 162
|
250
|
+
_to 162
|
251
|
+
ek 160
|
252
|
+
int 159
|
253
|
+
_r 159
|
254
|
+
lin 158
|
255
|
+
oim 158
|
256
|
+
_T 158
|
257
|
+
A 158
|
258
|
+
imi 157
|
259
|
+
t� 157
|
260
|
+
la_ 157
|
261
|
+
j� 157
|
262
|
+
aj 156
|
263
|
+
yh 155
|
264
|
+
o_ 154
|
265
|
+
lo 154
|
266
|
+
oli 153
|
267
|
+
een_ 153
|
268
|
+
le_ 153
|
269
|
+
_si 153
|
270
|
+
g 152
|
271
|
+
aik 151
|
272
|
+
vat_ 150
|
273
|
+
L 149
|
274
|
+
ur 149
|
275
|
+
ti_ 149
|
276
|
+
sia 148
|
277
|
+
ite 147
|
278
|
+
inen 147
|
279
|
+
ain 146
|
280
|
+
sti 146
|
281
|
+
lla_ 146
|
282
|
+
ys 145
|
283
|
+
_mi 145
|
284
|
+
val 144
|
285
|
+
stu 144
|
286
|
+
�m 144
|
287
|
+
alli 143
|
288
|
+
pe 143
|
289
|
+
utt 142
|
290
|
+
et_ 141
|
291
|
+
_tu 141
|
292
|
+
eri 140
|
293
|
+
_E 140
|
294
|
+
: 140
|
295
|
+
nki 139
|
296
|
+
ir 139
|
297
|
+
ll� 138
|
298
|
+
up 138
|
299
|
+
�i 137
|
300
|
+
ama 137
|
301
|
+
_ha 135
|
302
|
+
id 135
|
303
|
+
_se 135
|
304
|
+
po 134
|
305
|
+
inen_ 134
|
306
|
+
tte 133
|
307
|
+
nna 133
|
308
|
+
ten_ 132
|
309
|
+
or 132
|
310
|
+
ts 131
|
311
|
+
n� 131
|
312
|
+
yk 131
|
313
|
+
�s 131
|
314
|
+
_S 130
|
315
|
+
ses 130
|
316
|
+
ve 130
|
317
|
+
ess 129
|
318
|
+
�l 129
|
319
|
+
ita 129
|
320
|
+
lai 129
|
321
|
+
H 129
|
322
|
+
van 127
|
323
|
+
�k 127
|
324
|
+
kin 127
|
325
|
+
N 127
|
326
|
+
_te 126
|
327
|
+
den 126
|
328
|
+
tee 126
|
329
|
+
P 126
|
330
|
+
kaa 126
|
331
|
+
iin_ 125
|
332
|
+
kun 125
|
333
|
+
ois 125
|
334
|
+
sit 125
|
335
|
+
oh 124
|
336
|
+
V 124
|
337
|
+
y� 124
|
338
|
+
�v 124
|
339
|
+
tav 124
|
340
|
+
voi 124
|
341
|
+
ia_ 123
|
342
|
+
I 123
|
343
|
+
oll 123
|
344
|
+
maa 122
|
345
|
+
ih 122
|
346
|
+
oj 122
|
347
|
+
rj 121
|
348
|
+
ro 121
|
349
|
+
ikk 120
|
350
|
+
so 120
|
351
|
+
oo 120
|
352
|
+
oimi 120
|
353
|
+
do 120
|
354
|
+
pp 119
|
355
|
+
M 119
|
356
|
+
_ei 118
|
357
|
+
toim 118
|
358
|
+
op 118
|
359
|
+
uut 118
|
360
|
+
tet 118
|
361
|
+
_i 118
|
362
|
+
_ma 117
|
363
|
+
vai 117
|
364
|
+
l�_ 116
|
365
|
+
u_ 116
|
366
|
+
sy 116
|
367
|
+
kau 116
|
368
|
+
utta 116
|
369
|
+
un_ 115
|
370
|
+
eu 115
|
371
|
+
ss� 115
|
372
|
+
tti 115
|
373
|
+
_sa 115
|
374
|
+
mp 114
|
375
|
+
eis 114
|
376
|
+
ka_ 112
|
377
|
+
ett� 112
|
378
|
+
taa_ 111
|
379
|
+
_et 111
|
380
|
+
hu 111
|
381
|
+
itu 111
|
382
|
+
suu 111
|
383
|
+
den_ 111
|
384
|
+
ksen 110
|
385
|
+
ap 110
|
386
|
+
_ke 110
|
387
|
+
uv 110
|
388
|
+
tam 110
|
389
|
+
yv 109
|
390
|
+
aup 109
|
391
|
+
st�_ 109
|
392
|
+
asta 109
|
393
|
+
�y 109
|
394
|
+
kan 108
|
395
|
+
nu 108
|
396
|
+
ukse 108
|
397
|
+
_toi 107
|
398
|
+
ien 107
|
399
|
+
hi 107
|
400
|
+
iss 107
|