language_detector 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +24 -0
- data/Rakefile +18 -0
- data/VERSION +1 -0
- data/lib/language_detector.rb +232 -0
- data/lib/model-fm.yml +52504 -0
- data/lib/model-tc.yml +53985 -0
- data/lib/textcat_ngrams/afrikaans.lm +400 -0
- data/lib/textcat_ngrams/albanian.lm +400 -0
- data/lib/textcat_ngrams/amharic-utf.lm +400 -0
- data/lib/textcat_ngrams/arabic-iso8859_6.lm +400 -0
- data/lib/textcat_ngrams/arabic-windows1256.lm +400 -0
- data/lib/textcat_ngrams/armenian.lm +400 -0
- data/lib/textcat_ngrams/basque.lm +400 -0
- data/lib/textcat_ngrams/belarus-windows1251.lm +400 -0
- data/lib/textcat_ngrams/bosnian.lm +400 -0
- data/lib/textcat_ngrams/breton.lm +400 -0
- data/lib/textcat_ngrams/bulgarian-iso8859_5.lm +400 -0
- data/lib/textcat_ngrams/catalan.lm +400 -0
- data/lib/textcat_ngrams/chinese-big5.lm +400 -0
- data/lib/textcat_ngrams/chinese-gb2312.lm +400 -0
- data/lib/textcat_ngrams/croatian-ascii.lm +400 -0
- data/lib/textcat_ngrams/czech-iso8859_2.lm +400 -0
- data/lib/textcat_ngrams/danish.lm +400 -0
- data/lib/textcat_ngrams/dutch.lm +400 -0
- data/lib/textcat_ngrams/english.lm +400 -0
- data/lib/textcat_ngrams/esperanto.lm +400 -0
- data/lib/textcat_ngrams/estonian.lm +400 -0
- data/lib/textcat_ngrams/finnish.lm +400 -0
- data/lib/textcat_ngrams/french.lm +400 -0
- data/lib/textcat_ngrams/frisian.lm +400 -0
- data/lib/textcat_ngrams/georgian.lm +400 -0
- data/lib/textcat_ngrams/german.lm +400 -0
- data/lib/textcat_ngrams/greek-iso8859-7.lm +400 -0
- data/lib/textcat_ngrams/hebrew-iso8859_8.lm +400 -0
- data/lib/textcat_ngrams/hindi.lm +400 -0
- data/lib/textcat_ngrams/hungarian.lm +400 -0
- data/lib/textcat_ngrams/icelandic.lm +400 -0
- data/lib/textcat_ngrams/indonesian.lm +400 -0
- data/lib/textcat_ngrams/irish.lm +400 -0
- data/lib/textcat_ngrams/italian.lm +400 -0
- data/lib/textcat_ngrams/japanese-euc_jp.lm +400 -0
- data/lib/textcat_ngrams/japanese-shift_jis.lm +400 -0
- data/lib/textcat_ngrams/korean.lm +400 -0
- data/lib/textcat_ngrams/latin.lm +400 -0
- data/lib/textcat_ngrams/latvian.lm +400 -0
- data/lib/textcat_ngrams/lithuanian.lm +400 -0
- data/lib/textcat_ngrams/malay.lm +400 -0
- data/lib/textcat_ngrams/manx.lm +400 -0
- data/lib/textcat_ngrams/marathi.lm +400 -0
- data/lib/textcat_ngrams/mingo.lm +400 -0
- data/lib/textcat_ngrams/nepali.lm +400 -0
- data/lib/textcat_ngrams/norwegian.lm +400 -0
- data/lib/textcat_ngrams/persian.lm +400 -0
- data/lib/textcat_ngrams/polish.lm +400 -0
- data/lib/textcat_ngrams/portuguese.lm +400 -0
- data/lib/textcat_ngrams/quechua.lm +400 -0
- data/lib/textcat_ngrams/romanian.lm +400 -0
- data/lib/textcat_ngrams/rumantsch.lm +400 -0
- data/lib/textcat_ngrams/russian-iso8859_5.lm +400 -0
- data/lib/textcat_ngrams/russian-koi8_r.lm +400 -0
- data/lib/textcat_ngrams/russian-windows1251.lm +400 -0
- data/lib/textcat_ngrams/sanskrit.lm +400 -0
- data/lib/textcat_ngrams/scots.lm +400 -0
- data/lib/textcat_ngrams/scots_gaelic.lm +400 -0
- data/lib/textcat_ngrams/serbian-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovak-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovak-windows1250.lm +400 -0
- data/lib/textcat_ngrams/slovenian-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovenian-iso8859_2.lm +400 -0
- data/lib/textcat_ngrams/spanish.lm +400 -0
- data/lib/textcat_ngrams/swahili.lm +400 -0
- data/lib/textcat_ngrams/swedish.lm +400 -0
- data/lib/textcat_ngrams/tagalog.lm +400 -0
- data/lib/textcat_ngrams/tamil.lm +400 -0
- data/lib/textcat_ngrams/thai.lm +400 -0
- data/lib/textcat_ngrams/turkish.lm +400 -0
- data/lib/textcat_ngrams/ukrainian-koi8_u.lm +400 -0
- data/lib/textcat_ngrams/vietnamese.lm +400 -0
- data/lib/textcat_ngrams/welsh.lm +400 -0
- data/lib/textcat_ngrams/yiddish-utf.lm +400 -0
- data/lib/training_data/ar-utf8.txt +54 -0
- data/lib/training_data/bg-utf8.txt +26 -0
- data/lib/training_data/cs-utf8.txt +48 -0
- data/lib/training_data/da-utf8.txt +159 -0
- data/lib/training_data/de-utf8.txt +569 -0
- data/lib/training_data/el-utf8.txt +48 -0
- data/lib/training_data/en-utf8.txt +81 -0
- data/lib/training_data/es-utf8.txt +185 -0
- data/lib/training_data/et-utf8.txt +50 -0
- data/lib/training_data/fa-utf8.txt +42 -0
- data/lib/training_data/fi-utf8.txt +83 -0
- data/lib/training_data/fr-utf8.txt +191 -0
- data/lib/training_data/fy-utf8.txt +22 -0
- data/lib/training_data/ga-utf8.txt +109 -0
- data/lib/training_data/he-utf8.txt +116 -0
- data/lib/training_data/hi-utf8.txt +49 -0
- data/lib/training_data/hr-utf8.txt +80 -0
- data/lib/training_data/hu-utf8.txt +87 -0
- data/lib/training_data/io-utf8.txt +41 -0
- data/lib/training_data/is-utf8.txt +94 -0
- data/lib/training_data/it-utf8.txt +228 -0
- data/lib/training_data/ja-utf8.txt +200 -0
- data/lib/training_data/ko-utf8.txt +147 -0
- data/lib/training_data/nl-utf8.txt +215 -0
- data/lib/training_data/no-utf8.txt +281 -0
- data/lib/training_data/pl-utf8.txt +120 -0
- data/lib/training_data/pt-utf8.txt +214 -0
- data/lib/training_data/ro-utf8.txt +66 -0
- data/lib/training_data/ru-utf8.txt +310 -0
- data/lib/training_data/sl-utf8.txt +263 -0
- data/lib/training_data/sv-utf8.txt +174 -0
- data/lib/training_data/th-utf8.txt +49 -0
- data/lib/training_data/tk-utf8.txt +101 -0
- data/lib/training_data/todo/af.txt +114 -0
- data/lib/training_data/todo/amharic-utf.txt +95 -0
- data/lib/training_data/todo/arabic-windows1256.txt +157 -0
- data/lib/training_data/todo/armenian.txt +86 -0
- data/lib/training_data/todo/basque.txt +136 -0
- data/lib/training_data/todo/belarus-windows1251.txt +97 -0
- data/lib/training_data/todo/bosnian.txt +97 -0
- data/lib/training_data/todo/breton.txt +159 -0
- data/lib/training_data/todo/bulgarian-iso8859_5.txt +115 -0
- data/lib/training_data/todo/catalan.txt +93 -0
- data/lib/training_data/todo/croatian-ascii.txt +104 -0
- data/lib/training_data/todo/esperanto.txt +95 -0
- data/lib/training_data/todo/estonian.txt +218 -0
- data/lib/training_data/todo/frisian.txt +99 -0
- data/lib/training_data/todo/georgian.txt +86 -0
- data/lib/training_data/todo/greek-iso8859-7.txt +139 -0
- data/lib/training_data/todo/hawaian.txt +108 -0
- data/lib/training_data/todo/hebrew-iso8859_8.txt +79 -0
- data/lib/training_data/todo/hindi.txt +77 -0
- data/lib/training_data/todo/hungarian.txt +102 -0
- data/lib/training_data/todo/icelandic.txt +131 -0
- data/lib/training_data/todo/indonesian.txt +93 -0
- data/lib/training_data/todo/irish.txt +209 -0
- data/lib/training_data/todo/latin.txt +120 -0
- data/lib/training_data/todo/latvian.txt +126 -0
- data/lib/training_data/todo/lithuanian.txt +99 -0
- data/lib/training_data/todo/malay.txt +108 -0
- data/lib/training_data/todo/manx.txt +78 -0
- data/lib/training_data/todo/marathi.txt +100 -0
- data/lib/training_data/todo/mf.txt +100 -0
- data/lib/training_data/todo/middle_frisian.txt +102 -0
- data/lib/training_data/todo/mingo.txt +146 -0
- data/lib/training_data/todo/nepali.txt +131 -0
- data/lib/training_data/todo/persian.txt +73 -0
- data/lib/training_data/todo/quechua.txt +108 -0
- data/lib/training_data/todo/romanian.txt +103 -0
- data/lib/training_data/todo/rumantsch.txt +110 -0
- data/lib/training_data/todo/sanskrit.txt +135 -0
- data/lib/training_data/todo/scots.txt +490 -0
- data/lib/training_data/todo/scots_gaelic.txt +93 -0
- data/lib/training_data/todo/serbian-ascii.txt +121 -0
- data/lib/training_data/todo/slovak-ascii.txt +102 -0
- data/lib/training_data/todo/slovak-windows1250.txt +115 -0
- data/lib/training_data/todo/slovenian-ascii.txt +100 -0
- data/lib/training_data/todo/slovenian-iso8859_2.txt +96 -0
- data/lib/training_data/todo/sq.txt +110 -0
- data/lib/training_data/todo/swahili.txt +120 -0
- data/lib/training_data/todo/tagalog.txt +135 -0
- data/lib/training_data/todo/tamil.txt +123 -0
- data/lib/training_data/todo/turkish.txt +117 -0
- data/lib/training_data/todo/ukrainian-koi8_r.txt +214 -0
- data/lib/training_data/todo/vietnamese.txt +92 -0
- data/lib/training_data/todo/welsh.txt +148 -0
- data/lib/training_data/todo/yiddish-utf.txt +83 -0
- data/lib/training_data/uk-utf8.txt +75 -0
- data/lib/training_data/vi-utf8.txt +47 -0
- data/lib/training_data/zh-utf8.txt +228 -0
- data/test/language_detector_test.rb +78 -0
- metadata +232 -0
@@ -0,0 +1,400 @@
|
|
1
|
+
_ 20326
|
2
|
+
e 6617
|
3
|
+
t 4843
|
4
|
+
o 3834
|
5
|
+
n 3653
|
6
|
+
i 3602
|
7
|
+
a 3433
|
8
|
+
s 2945
|
9
|
+
r 2921
|
10
|
+
h 2507
|
11
|
+
e_ 2000
|
12
|
+
d 1816
|
13
|
+
_t 1785
|
14
|
+
c 1639
|
15
|
+
l 1635
|
16
|
+
th 1535
|
17
|
+
he 1351
|
18
|
+
_th 1333
|
19
|
+
u 1309
|
20
|
+
f 1253
|
21
|
+
m 1175
|
22
|
+
p 1151
|
23
|
+
_a 1145
|
24
|
+
the 1142
|
25
|
+
_the 1060
|
26
|
+
s_ 978
|
27
|
+
er 968
|
28
|
+
_o 967
|
29
|
+
he_ 928
|
30
|
+
d_ 888
|
31
|
+
t_ 885
|
32
|
+
the_ 844
|
33
|
+
_the_ 843
|
34
|
+
on 842
|
35
|
+
in 817
|
36
|
+
y 783
|
37
|
+
n_ 773
|
38
|
+
b 761
|
39
|
+
re 754
|
40
|
+
, 734
|
41
|
+
,_ 732
|
42
|
+
an 732
|
43
|
+
g 728
|
44
|
+
w 718
|
45
|
+
_i 707
|
46
|
+
en 676
|
47
|
+
f_ 599
|
48
|
+
y_ 595
|
49
|
+
of 594
|
50
|
+
_of 592
|
51
|
+
es 589
|
52
|
+
ti 587
|
53
|
+
v 580
|
54
|
+
_of_ 575
|
55
|
+
of_ 575
|
56
|
+
nd 568
|
57
|
+
at 549
|
58
|
+
r_ 540
|
59
|
+
_w 534
|
60
|
+
it 522
|
61
|
+
ed 496
|
62
|
+
_p 494
|
63
|
+
nt 485
|
64
|
+
_c 462
|
65
|
+
o_ 457
|
66
|
+
io 450
|
67
|
+
_an 439
|
68
|
+
te 432
|
69
|
+
or 425
|
70
|
+
_b 418
|
71
|
+
nd_ 407
|
72
|
+
to 406
|
73
|
+
st 402
|
74
|
+
is 401
|
75
|
+
_s 396
|
76
|
+
_in 389
|
77
|
+
ion 385
|
78
|
+
and 385
|
79
|
+
de 384
|
80
|
+
ve 382
|
81
|
+
ha 375
|
82
|
+
ar 366
|
83
|
+
_m 361
|
84
|
+
and_ 360
|
85
|
+
_and 360
|
86
|
+
_and_ 358
|
87
|
+
se 353
|
88
|
+
_to 347
|
89
|
+
me 346
|
90
|
+
to_ 344
|
91
|
+
ed_ 339
|
92
|
+
. 330
|
93
|
+
be 329
|
94
|
+
_f 329
|
95
|
+
._ 329
|
96
|
+
_to_ 320
|
97
|
+
co 317
|
98
|
+
ic 316
|
99
|
+
ns 308
|
100
|
+
al 307
|
101
|
+
le 304
|
102
|
+
ou 304
|
103
|
+
ce 293
|
104
|
+
ent 279
|
105
|
+
l_ 278
|
106
|
+
_co 277
|
107
|
+
tio 275
|
108
|
+
on_ 274
|
109
|
+
_d 274
|
110
|
+
tion 268
|
111
|
+
ri 266
|
112
|
+
_e 264
|
113
|
+
ng 253
|
114
|
+
hi 251
|
115
|
+
er_ 249
|
116
|
+
ea 246
|
117
|
+
as 245
|
118
|
+
_be 242
|
119
|
+
pe 242
|
120
|
+
h_ 234
|
121
|
+
_r 232
|
122
|
+
ec 227
|
123
|
+
ch 223
|
124
|
+
ro 222
|
125
|
+
ct 220
|
126
|
+
_h 219
|
127
|
+
pr 217
|
128
|
+
in_ 217
|
129
|
+
ne 214
|
130
|
+
ll 214
|
131
|
+
rt 213
|
132
|
+
s,_ 210
|
133
|
+
s, 210
|
134
|
+
li 209
|
135
|
+
ra 208
|
136
|
+
T 207
|
137
|
+
wh 204
|
138
|
+
a_ 203
|
139
|
+
ac 201
|
140
|
+
_wh 199
|
141
|
+
_n 196
|
142
|
+
ts 196
|
143
|
+
di 196
|
144
|
+
es_ 195
|
145
|
+
si 194
|
146
|
+
re_ 193
|
147
|
+
at_ 192
|
148
|
+
nc 192
|
149
|
+
ie 190
|
150
|
+
_a_ 188
|
151
|
+
_in_ 185
|
152
|
+
ing 184
|
153
|
+
us 182
|
154
|
+
_re 182
|
155
|
+
g_ 179
|
156
|
+
ng_ 178
|
157
|
+
op 178
|
158
|
+
con 177
|
159
|
+
tha 175
|
160
|
+
_l 174
|
161
|
+
_tha 174
|
162
|
+
ver 173
|
163
|
+
ma 173
|
164
|
+
ion_ 171
|
165
|
+
_con 171
|
166
|
+
ci 170
|
167
|
+
ons 170
|
168
|
+
_it 170
|
169
|
+
po 169
|
170
|
+
ere 168
|
171
|
+
is_ 167
|
172
|
+
ta 167
|
173
|
+
la 166
|
174
|
+
_pr 165
|
175
|
+
fo 164
|
176
|
+
ho 164
|
177
|
+
ir 162
|
178
|
+
ss 161
|
179
|
+
men 160
|
180
|
+
be_ 160
|
181
|
+
un 159
|
182
|
+
ty 159
|
183
|
+
_be_ 158
|
184
|
+
ing_ 157
|
185
|
+
om 156
|
186
|
+
ot 156
|
187
|
+
hat 155
|
188
|
+
ly 155
|
189
|
+
_g 155
|
190
|
+
em 153
|
191
|
+
_T 151
|
192
|
+
rs 150
|
193
|
+
mo 148
|
194
|
+
ch_ 148
|
195
|
+
wi 147
|
196
|
+
we 147
|
197
|
+
ad 147
|
198
|
+
ts_ 145
|
199
|
+
res 143
|
200
|
+
_wi 143
|
201
|
+
I 143
|
202
|
+
hat_ 142
|
203
|
+
ei 141
|
204
|
+
ly_ 141
|
205
|
+
ni 140
|
206
|
+
os 140
|
207
|
+
ca 139
|
208
|
+
ur 139
|
209
|
+
A 138
|
210
|
+
ut 138
|
211
|
+
that 138
|
212
|
+
_that 137
|
213
|
+
ati 137
|
214
|
+
_fo 137
|
215
|
+
st_ 137
|
216
|
+
il 136
|
217
|
+
or_ 136
|
218
|
+
for 136
|
219
|
+
pa 136
|
220
|
+
ul 135
|
221
|
+
ate 135
|
222
|
+
ter 134
|
223
|
+
it_ 134
|
224
|
+
nt_ 133
|
225
|
+
that_ 132
|
226
|
+
_ha 129
|
227
|
+
al_ 128
|
228
|
+
el 128
|
229
|
+
as_ 127
|
230
|
+
ll_ 127
|
231
|
+
_ma 125
|
232
|
+
no 124
|
233
|
+
ment 124
|
234
|
+
an_ 124
|
235
|
+
tion_ 122
|
236
|
+
su 122
|
237
|
+
bl 122
|
238
|
+
_de 122
|
239
|
+
nce 120
|
240
|
+
pl 120
|
241
|
+
fe 119
|
242
|
+
tr 118
|
243
|
+
so 118
|
244
|
+
int 115
|
245
|
+
ov 114
|
246
|
+
e, 114
|
247
|
+
e,_ 114
|
248
|
+
_u 113
|
249
|
+
ent_ 113
|
250
|
+
Th 113
|
251
|
+
her 113
|
252
|
+
j 112
|
253
|
+
atio 112
|
254
|
+
ation 112
|
255
|
+
_Th 111
|
256
|
+
le_ 110
|
257
|
+
ai 110
|
258
|
+
_it_ 110
|
259
|
+
_on 110
|
260
|
+
_for 109
|
261
|
+
ect 109
|
262
|
+
k 109
|
263
|
+
hic 108
|
264
|
+
est 108
|
265
|
+
der 107
|
266
|
+
tu 107
|
267
|
+
na 106
|
268
|
+
_by_ 106
|
269
|
+
by_ 106
|
270
|
+
E 106
|
271
|
+
by 106
|
272
|
+
_by 106
|
273
|
+
ve_ 106
|
274
|
+
_di 106
|
275
|
+
en_ 104
|
276
|
+
vi 104
|
277
|
+
m_ 103
|
278
|
+
_whi 102
|
279
|
+
iv 102
|
280
|
+
whi 102
|
281
|
+
ns_ 102
|
282
|
+
_A 101
|
283
|
+
ich 100
|
284
|
+
ge 100
|
285
|
+
pro 99
|
286
|
+
ess 99
|
287
|
+
_whic 99
|
288
|
+
ers 99
|
289
|
+
hich 99
|
290
|
+
ce_ 99
|
291
|
+
which 99
|
292
|
+
whic 99
|
293
|
+
all 98
|
294
|
+
ove 98
|
295
|
+
_is 98
|
296
|
+
ich_ 97
|
297
|
+
ee 97
|
298
|
+
hich_ 97
|
299
|
+
n,_ 96
|
300
|
+
n, 96
|
301
|
+
im 95
|
302
|
+
ir_ 94
|
303
|
+
hei 94
|
304
|
+
ions 94
|
305
|
+
sti 94
|
306
|
+
se_ 94
|
307
|
+
per 93
|
308
|
+
The 93
|
309
|
+
_pa 93
|
310
|
+
heir 93
|
311
|
+
id 93
|
312
|
+
eir 93
|
313
|
+
eir_ 93
|
314
|
+
ig 93
|
315
|
+
heir_ 93
|
316
|
+
_no 93
|
317
|
+
ev 93
|
318
|
+
era 92
|
319
|
+
_int 92
|
320
|
+
ted 91
|
321
|
+
_The 91
|
322
|
+
ies 91
|
323
|
+
art 91
|
324
|
+
thei 90
|
325
|
+
_ar 90
|
326
|
+
_thei 90
|
327
|
+
their 90
|
328
|
+
_pro 90
|
329
|
+
et 89
|
330
|
+
_pe 88
|
331
|
+
_mo 88
|
332
|
+
ther 88
|
333
|
+
x 87
|
334
|
+
gh 87
|
335
|
+
S 87
|
336
|
+
_is_ 87
|
337
|
+
ol 87
|
338
|
+
ty_ 87
|
339
|
+
_I 86
|
340
|
+
nde 86
|
341
|
+
am 86
|
342
|
+
rn 86
|
343
|
+
nte 86
|
344
|
+
mp 85
|
345
|
+
_su 84
|
346
|
+
_we 84
|
347
|
+
par 84
|
348
|
+
_v 84
|
349
|
+
pu 82
|
350
|
+
his 82
|
351
|
+
ow 82
|
352
|
+
mi 82
|
353
|
+
go 81
|
354
|
+
N 81
|
355
|
+
ue 81
|
356
|
+
ple 81
|
357
|
+
ep 80
|
358
|
+
ab 80
|
359
|
+
;_ 80
|
360
|
+
; 80
|
361
|
+
ex 80
|
362
|
+
ain 80
|
363
|
+
over 80
|
364
|
+
_un 79
|
365
|
+
q 79
|
366
|
+
qu 79
|
367
|
+
pp 79
|
368
|
+
ith 79
|
369
|
+
ry 79
|
370
|
+
_as 79
|
371
|
+
ber 79
|
372
|
+
ub 78
|
373
|
+
av 78
|
374
|
+
uc 78
|
375
|
+
s._ 77
|
376
|
+
s. 77
|
377
|
+
enc 77
|
378
|
+
are 77
|
379
|
+
iti 77
|
380
|
+
gr 76
|
381
|
+
his_ 76
|
382
|
+
ua 76
|
383
|
+
part 76
|
384
|
+
ff 75
|
385
|
+
eve 75
|
386
|
+
O 75
|
387
|
+
rea 74
|
388
|
+
ous 74
|
389
|
+
ia 74
|
390
|
+
The_ 73
|
391
|
+
ag 73
|
392
|
+
mb 73
|
393
|
+
_go 73
|
394
|
+
fa 72
|
395
|
+
on,_ 72
|
396
|
+
ern 72
|
397
|
+
t,_ 72
|
398
|
+
on, 72
|
399
|
+
t, 72
|
400
|
+
_me 71
|
@@ -0,0 +1,400 @@
|
|
1
|
+
_ 57050
|
2
|
+
a 16035
|
3
|
+
i 12706
|
4
|
+
e 12227
|
5
|
+
o 12102
|
6
|
+
n 10393
|
7
|
+
s 8344
|
8
|
+
l 7707
|
9
|
+
r 7492
|
10
|
+
t 7134
|
11
|
+
k 5376
|
12
|
+
u 4558
|
13
|
+
j 3946
|
14
|
+
a_ 3875
|
15
|
+
m 3783
|
16
|
+
d 3710
|
17
|
+
p 3693
|
18
|
+
la 2840
|
19
|
+
s_ 2769
|
20
|
+
e_ 2751
|
21
|
+
. 2706
|
22
|
+
_l 2635
|
23
|
+
_k 2619
|
24
|
+
v 2531
|
25
|
+
n_ 2504
|
26
|
+
o_ 2444
|
27
|
+
i_ 2333
|
28
|
+
._ 2278
|
29
|
+
on 2238
|
30
|
+
, 2193
|
31
|
+
,_ 2182
|
32
|
+
_la 2100
|
33
|
+
en 2080
|
34
|
+
j_ 2050
|
35
|
+
as 2028
|
36
|
+
la_ 2012
|
37
|
+
ta 1956
|
38
|
+
_la_ 1907
|
39
|
+
an 1882
|
40
|
+
_p 1850
|
41
|
+
g 1831
|
42
|
+
_e 1791
|
43
|
+
_d 1778
|
44
|
+
is 1737
|
45
|
+
aj 1658
|
46
|
+
st 1635
|
47
|
+
_s 1575
|
48
|
+
c 1526
|
49
|
+
de 1517
|
50
|
+
oj 1498
|
51
|
+
er 1476
|
52
|
+
ti 1456
|
53
|
+
f 1443
|
54
|
+
_a 1442
|
55
|
+
b 1427
|
56
|
+
ro 1379
|
57
|
+
_m 1351
|
58
|
+
ra 1341
|
59
|
+
nt 1293
|
60
|
+
ka 1270
|
61
|
+
ri 1258
|
62
|
+
al 1249
|
63
|
+
as_ 1248
|
64
|
+
aj_ 1213
|
65
|
+
to 1209
|
66
|
+
_de 1203
|
67
|
+
_t 1200
|
68
|
+
te 1179
|
69
|
+
_n 1176
|
70
|
+
is_ 1171
|
71
|
+
in 1151
|
72
|
+
ko 1145
|
73
|
+
or 1114
|
74
|
+
es 1083
|
75
|
+
re 1034
|
76
|
+
ia 1029
|
77
|
+
li 1022
|
78
|
+
de_ 1016
|
79
|
+
_de_ 979
|
80
|
+
ar 974
|
81
|
+
_v 966
|
82
|
+
vi 942
|
83
|
+
lo 932
|
84
|
+
x 928
|
85
|
+
io 917
|
86
|
+
ne 855
|
87
|
+
no 848
|
88
|
+
ni 843
|
89
|
+
mi 835
|
90
|
+
ma 819
|
91
|
+
_ka 816
|
92
|
+
el 815
|
93
|
+
pr 771
|
94
|
+
z 744
|
95
|
+
un 734
|
96
|
+
l_ 732
|
97
|
+
po 730
|
98
|
+
_f 725
|
99
|
+
� 724
|
100
|
+
est 691
|
101
|
+
na 687
|
102
|
+
ki 679
|
103
|
+
kaj 676
|
104
|
+
si 665
|
105
|
+
u_ 663
|
106
|
+
kaj_ 660
|
107
|
+
" 654
|
108
|
+
tas 651
|
109
|
+
le 650
|
110
|
+
oj_ 648
|
111
|
+
_i 643
|
112
|
+
tr 642
|
113
|
+
_pr 630
|
114
|
+
_es 628
|
115
|
+
jn 626
|
116
|
+
pe 618
|
117
|
+
_kaj 616
|
118
|
+
ig 616
|
119
|
+
_kaj_ 611
|
120
|
+
do 608
|
121
|
+
sta 606
|
122
|
+
on_ 602
|
123
|
+
ek 602
|
124
|
+
ci 597
|
125
|
+
r_ 595
|
126
|
+
� 594
|
127
|
+
_r 593
|
128
|
+
il 592
|
129
|
+
_est 587
|
130
|
+
di 586
|
131
|
+
am 586
|
132
|
+
_mi 582
|
133
|
+
a� 578
|
134
|
+
_vi 577
|
135
|
+
mo 575
|
136
|
+
ant 565
|
137
|
+
_ne 562
|
138
|
+
en_ 561
|
139
|
+
o. 559
|
140
|
+
� 543
|
141
|
+
iu 538
|
142
|
+
o, 529
|
143
|
+
ur 527
|
144
|
+
o._ 527
|
145
|
+
om 525
|
146
|
+
o,_ 524
|
147
|
+
at 521
|
148
|
+
va 521
|
149
|
+
- 519
|
150
|
+
_en 518
|
151
|
+
: 513
|
152
|
+
:_ 512
|
153
|
+
_ti 500
|
154
|
+
M 496
|
155
|
+
h 488
|
156
|
+
nd 484
|
157
|
+
me 484
|
158
|
+
_al 481
|
159
|
+
_ko 479
|
160
|
+
ve 478
|
161
|
+
ie 478
|
162
|
+
_ki 473
|
163
|
+
it 473
|
164
|
+
L 466
|
165
|
+
_b 465
|
166
|
+
se 462
|
167
|
+
em 452
|
168
|
+
ol 450
|
169
|
+
nta 449
|
170
|
+
tu 448
|
171
|
+
ik 444
|
172
|
+
ov 443
|
173
|
+
da 443
|
174
|
+
_M 440
|
175
|
+
_po 439
|
176
|
+
tas_ 438
|
177
|
+
ne_ 437
|
178
|
+
et 437
|
179
|
+
_ma 436
|
180
|
+
_en_ 435
|
181
|
+
su 429
|
182
|
+
pl 426
|
183
|
+
_L 425
|
184
|
+
pa 420
|
185
|
+
_o 417
|
186
|
+
vo 408
|
187
|
+
an_ 407
|
188
|
+
ro_ 406
|
189
|
+
sti 406
|
190
|
+
nu 399
|
191
|
+
kon 396
|
192
|
+
stas 391
|
193
|
+
m_ 391
|
194
|
+
ir 388
|
195
|
+
n. 386
|
196
|
+
fa 386
|
197
|
+
jn_ 382
|
198
|
+
ku 382
|
199
|
+
os 376
|
200
|
+
ke 375
|
201
|
+
n, 375
|
202
|
+
esta 374
|
203
|
+
n,_ 372
|
204
|
+
_su 362
|
205
|
+
ta_ 362
|
206
|
+
stas_ 359
|
207
|
+
xi 359
|
208
|
+
Mi 358
|
209
|
+
_ne_ 356
|
210
|
+
al_ 355
|
211
|
+
nk 353
|
212
|
+
so 353
|
213
|
+
n._ 352
|
214
|
+
id 349
|
215
|
+
_g 348
|
216
|
+
estas 347
|
217
|
+
ga 346
|
218
|
+
_h 345
|
219
|
+
per 345
|
220
|
+
_Mi 340
|
221
|
+
ok 339
|
222
|
+
K 339
|
223
|
+
mp 337
|
224
|
+
_esta 337
|
225
|
+
s,_ 335
|
226
|
+
s, 335
|
227
|
+
_se 333
|
228
|
+
anta 332
|
229
|
+
ul 326
|
230
|
+
ran 325
|
231
|
+
_" 323
|
232
|
+
�_ 322
|
233
|
+
te_ 320
|
234
|
+
ak 320
|
235
|
+
a�_ 320
|
236
|
+
ed 320
|
237
|
+
rt 319
|
238
|
+
ojn 318
|
239
|
+
gi 318
|
240
|
+
_� 317
|
241
|
+
tis 316
|
242
|
+
gx 316
|
243
|
+
mal 316
|
244
|
+
ia_ 315
|
245
|
+
ks 310
|
246
|
+
_al_ 310
|
247
|
+
mi_ 309
|
248
|
+
S 309
|
249
|
+
lu 309
|
250
|
+
ns 308
|
251
|
+
kt 305
|
252
|
+
io_ 302
|
253
|
+
ent 300
|
254
|
+
? 300
|
255
|
+
_K 300
|
256
|
+
ec 300
|
257
|
+
el_ 299
|
258
|
+
_- 299
|
259
|
+
li_ 299
|
260
|
+
E 298
|
261
|
+
� 298
|
262
|
+
_li 297
|
263
|
+
fo 296
|
264
|
+
ter 296
|
265
|
+
_re 296
|
266
|
+
A 295
|
267
|
+
nto 294
|
268
|
+
vi_ 292
|
269
|
+
La 292
|
270
|
+
_mal 290
|
271
|
+
nte 288
|
272
|
+
sp 287
|
273
|
+
sa 287
|
274
|
+
_mi_ 279
|
275
|
+
ut 278
|
276
|
+
op 278
|
277
|
+
_ke 277
|
278
|
+
bo 277
|
279
|
+
ajn 276
|
280
|
+
un_ 276
|
281
|
+
T 274
|
282
|
+
to_ 272
|
283
|
+
-_ 272
|
284
|
+
bl 272
|
285
|
+
_an 271
|
286
|
+
_La 271
|
287
|
+
�i 269
|
288
|
+
_S 268
|
289
|
+
_pl 267
|
290
|
+
_fa 266
|
291
|
+
ni_ 266
|
292
|
+
La_ 265
|
293
|
+
_E 264
|
294
|
+
N 263
|
295
|
+
tis_ 263
|
296
|
+
_tr 263
|
297
|
+
' 262
|
298
|
+
! 262
|
299
|
+
_-_ 262
|
300
|
+
pro 261
|
301
|
+
iu_ 261
|
302
|
+
i� 261
|
303
|
+
nc 260
|
304
|
+
_si 259
|
305
|
+
du 257
|
306
|
+
_kon 256
|
307
|
+
ru 255
|
308
|
+
_vi_ 254
|
309
|
+
_j 253
|
310
|
+
ce 251
|
311
|
+
ke_ 249
|
312
|
+
ap 248
|
313
|
+
us 247
|
314
|
+
be 247
|
315
|
+
im 247
|
316
|
+
B 246
|
317
|
+
_ku 246
|
318
|
+
_La_ 246
|
319
|
+
tra 245
|
320
|
+
ad 245
|
321
|
+
uj 245
|
322
|
+
ac 245
|
323
|
+
ita 243
|
324
|
+
pre 242
|
325
|
+
_pro 242
|
326
|
+
co 241
|
327
|
+
rm 241
|
328
|
+
_ni 238
|
329
|
+
_pe 236
|
330
|
+
?_ 234
|
331
|
+
on. 234
|
332
|
+
toj 234
|
333
|
+
"_ 234
|
334
|
+
j. 234
|
335
|
+
_ke_ 233
|
336
|
+
s. 232
|
337
|
+
_A 231
|
338
|
+
av 230
|
339
|
+
ri_ 230
|
340
|
+
_el 229
|
341
|
+
por 229
|
342
|
+
` 224
|
343
|
+
ev 224
|
344
|
+
las 223
|
345
|
+
P 223
|
346
|
+
j._ 221
|
347
|
+
eni 220
|
348
|
+
_T 220
|
349
|
+
_B 219
|
350
|
+
j,_ 218
|
351
|
+
j, 218
|
352
|
+
era 217
|
353
|
+
_in 216
|
354
|
+
on._ 216
|
355
|
+
cx 216
|
356
|
+
_N 215
|
357
|
+
ion 215
|
358
|
+
ab 215
|
359
|
+
.. 214
|
360
|
+
) 213
|
361
|
+
fi 213
|
362
|
+
or_ 212
|
363
|
+
pri 212
|
364
|
+
s._ 212
|
365
|
+
_por 210
|
366
|
+
ez 210
|
367
|
+
in_ 210
|
368
|
+
am_ 209
|
369
|
+
on,_ 209
|
370
|
+
ll 209
|
371
|
+
�i 209
|
372
|
+
on, 209
|
373
|
+
_ve 208
|
374
|
+
ris 208
|
375
|
+
esti 208
|
376
|
+
!_ 207
|
377
|
+
men 206
|
378
|
+
vas 205
|
379
|
+
iel 204
|
380
|
+
taj 203
|
381
|
+
_c 201
|
382
|
+
aro 201
|
383
|
+
ank 200
|
384
|
+
_pri 200
|
385
|
+
jo 200
|
386
|
+
ja 200
|
387
|
+
ont 200
|
388
|
+
lt 199
|
389
|
+
_P 199
|
390
|
+
igi 199
|
391
|
+
_pa 197
|
392
|
+
oj. 197
|
393
|
+
( 196
|
394
|
+
au 195
|
395
|
+
oro 195
|
396
|
+
ng 195
|
397
|
+
_( 194
|
398
|
+
sto 194
|
399
|
+
ast 194
|
400
|
+
ag 193
|