language_detector 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +24 -0
- data/Rakefile +18 -0
- data/VERSION +1 -0
- data/lib/language_detector.rb +232 -0
- data/lib/model-fm.yml +52504 -0
- data/lib/model-tc.yml +53985 -0
- data/lib/textcat_ngrams/afrikaans.lm +400 -0
- data/lib/textcat_ngrams/albanian.lm +400 -0
- data/lib/textcat_ngrams/amharic-utf.lm +400 -0
- data/lib/textcat_ngrams/arabic-iso8859_6.lm +400 -0
- data/lib/textcat_ngrams/arabic-windows1256.lm +400 -0
- data/lib/textcat_ngrams/armenian.lm +400 -0
- data/lib/textcat_ngrams/basque.lm +400 -0
- data/lib/textcat_ngrams/belarus-windows1251.lm +400 -0
- data/lib/textcat_ngrams/bosnian.lm +400 -0
- data/lib/textcat_ngrams/breton.lm +400 -0
- data/lib/textcat_ngrams/bulgarian-iso8859_5.lm +400 -0
- data/lib/textcat_ngrams/catalan.lm +400 -0
- data/lib/textcat_ngrams/chinese-big5.lm +400 -0
- data/lib/textcat_ngrams/chinese-gb2312.lm +400 -0
- data/lib/textcat_ngrams/croatian-ascii.lm +400 -0
- data/lib/textcat_ngrams/czech-iso8859_2.lm +400 -0
- data/lib/textcat_ngrams/danish.lm +400 -0
- data/lib/textcat_ngrams/dutch.lm +400 -0
- data/lib/textcat_ngrams/english.lm +400 -0
- data/lib/textcat_ngrams/esperanto.lm +400 -0
- data/lib/textcat_ngrams/estonian.lm +400 -0
- data/lib/textcat_ngrams/finnish.lm +400 -0
- data/lib/textcat_ngrams/french.lm +400 -0
- data/lib/textcat_ngrams/frisian.lm +400 -0
- data/lib/textcat_ngrams/georgian.lm +400 -0
- data/lib/textcat_ngrams/german.lm +400 -0
- data/lib/textcat_ngrams/greek-iso8859-7.lm +400 -0
- data/lib/textcat_ngrams/hebrew-iso8859_8.lm +400 -0
- data/lib/textcat_ngrams/hindi.lm +400 -0
- data/lib/textcat_ngrams/hungarian.lm +400 -0
- data/lib/textcat_ngrams/icelandic.lm +400 -0
- data/lib/textcat_ngrams/indonesian.lm +400 -0
- data/lib/textcat_ngrams/irish.lm +400 -0
- data/lib/textcat_ngrams/italian.lm +400 -0
- data/lib/textcat_ngrams/japanese-euc_jp.lm +400 -0
- data/lib/textcat_ngrams/japanese-shift_jis.lm +400 -0
- data/lib/textcat_ngrams/korean.lm +400 -0
- data/lib/textcat_ngrams/latin.lm +400 -0
- data/lib/textcat_ngrams/latvian.lm +400 -0
- data/lib/textcat_ngrams/lithuanian.lm +400 -0
- data/lib/textcat_ngrams/malay.lm +400 -0
- data/lib/textcat_ngrams/manx.lm +400 -0
- data/lib/textcat_ngrams/marathi.lm +400 -0
- data/lib/textcat_ngrams/mingo.lm +400 -0
- data/lib/textcat_ngrams/nepali.lm +400 -0
- data/lib/textcat_ngrams/norwegian.lm +400 -0
- data/lib/textcat_ngrams/persian.lm +400 -0
- data/lib/textcat_ngrams/polish.lm +400 -0
- data/lib/textcat_ngrams/portuguese.lm +400 -0
- data/lib/textcat_ngrams/quechua.lm +400 -0
- data/lib/textcat_ngrams/romanian.lm +400 -0
- data/lib/textcat_ngrams/rumantsch.lm +400 -0
- data/lib/textcat_ngrams/russian-iso8859_5.lm +400 -0
- data/lib/textcat_ngrams/russian-koi8_r.lm +400 -0
- data/lib/textcat_ngrams/russian-windows1251.lm +400 -0
- data/lib/textcat_ngrams/sanskrit.lm +400 -0
- data/lib/textcat_ngrams/scots.lm +400 -0
- data/lib/textcat_ngrams/scots_gaelic.lm +400 -0
- data/lib/textcat_ngrams/serbian-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovak-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovak-windows1250.lm +400 -0
- data/lib/textcat_ngrams/slovenian-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovenian-iso8859_2.lm +400 -0
- data/lib/textcat_ngrams/spanish.lm +400 -0
- data/lib/textcat_ngrams/swahili.lm +400 -0
- data/lib/textcat_ngrams/swedish.lm +400 -0
- data/lib/textcat_ngrams/tagalog.lm +400 -0
- data/lib/textcat_ngrams/tamil.lm +400 -0
- data/lib/textcat_ngrams/thai.lm +400 -0
- data/lib/textcat_ngrams/turkish.lm +400 -0
- data/lib/textcat_ngrams/ukrainian-koi8_u.lm +400 -0
- data/lib/textcat_ngrams/vietnamese.lm +400 -0
- data/lib/textcat_ngrams/welsh.lm +400 -0
- data/lib/textcat_ngrams/yiddish-utf.lm +400 -0
- data/lib/training_data/ar-utf8.txt +54 -0
- data/lib/training_data/bg-utf8.txt +26 -0
- data/lib/training_data/cs-utf8.txt +48 -0
- data/lib/training_data/da-utf8.txt +159 -0
- data/lib/training_data/de-utf8.txt +569 -0
- data/lib/training_data/el-utf8.txt +48 -0
- data/lib/training_data/en-utf8.txt +81 -0
- data/lib/training_data/es-utf8.txt +185 -0
- data/lib/training_data/et-utf8.txt +50 -0
- data/lib/training_data/fa-utf8.txt +42 -0
- data/lib/training_data/fi-utf8.txt +83 -0
- data/lib/training_data/fr-utf8.txt +191 -0
- data/lib/training_data/fy-utf8.txt +22 -0
- data/lib/training_data/ga-utf8.txt +109 -0
- data/lib/training_data/he-utf8.txt +116 -0
- data/lib/training_data/hi-utf8.txt +49 -0
- data/lib/training_data/hr-utf8.txt +80 -0
- data/lib/training_data/hu-utf8.txt +87 -0
- data/lib/training_data/io-utf8.txt +41 -0
- data/lib/training_data/is-utf8.txt +94 -0
- data/lib/training_data/it-utf8.txt +228 -0
- data/lib/training_data/ja-utf8.txt +200 -0
- data/lib/training_data/ko-utf8.txt +147 -0
- data/lib/training_data/nl-utf8.txt +215 -0
- data/lib/training_data/no-utf8.txt +281 -0
- data/lib/training_data/pl-utf8.txt +120 -0
- data/lib/training_data/pt-utf8.txt +214 -0
- data/lib/training_data/ro-utf8.txt +66 -0
- data/lib/training_data/ru-utf8.txt +310 -0
- data/lib/training_data/sl-utf8.txt +263 -0
- data/lib/training_data/sv-utf8.txt +174 -0
- data/lib/training_data/th-utf8.txt +49 -0
- data/lib/training_data/tk-utf8.txt +101 -0
- data/lib/training_data/todo/af.txt +114 -0
- data/lib/training_data/todo/amharic-utf.txt +95 -0
- data/lib/training_data/todo/arabic-windows1256.txt +157 -0
- data/lib/training_data/todo/armenian.txt +86 -0
- data/lib/training_data/todo/basque.txt +136 -0
- data/lib/training_data/todo/belarus-windows1251.txt +97 -0
- data/lib/training_data/todo/bosnian.txt +97 -0
- data/lib/training_data/todo/breton.txt +159 -0
- data/lib/training_data/todo/bulgarian-iso8859_5.txt +115 -0
- data/lib/training_data/todo/catalan.txt +93 -0
- data/lib/training_data/todo/croatian-ascii.txt +104 -0
- data/lib/training_data/todo/esperanto.txt +95 -0
- data/lib/training_data/todo/estonian.txt +218 -0
- data/lib/training_data/todo/frisian.txt +99 -0
- data/lib/training_data/todo/georgian.txt +86 -0
- data/lib/training_data/todo/greek-iso8859-7.txt +139 -0
- data/lib/training_data/todo/hawaian.txt +108 -0
- data/lib/training_data/todo/hebrew-iso8859_8.txt +79 -0
- data/lib/training_data/todo/hindi.txt +77 -0
- data/lib/training_data/todo/hungarian.txt +102 -0
- data/lib/training_data/todo/icelandic.txt +131 -0
- data/lib/training_data/todo/indonesian.txt +93 -0
- data/lib/training_data/todo/irish.txt +209 -0
- data/lib/training_data/todo/latin.txt +120 -0
- data/lib/training_data/todo/latvian.txt +126 -0
- data/lib/training_data/todo/lithuanian.txt +99 -0
- data/lib/training_data/todo/malay.txt +108 -0
- data/lib/training_data/todo/manx.txt +78 -0
- data/lib/training_data/todo/marathi.txt +100 -0
- data/lib/training_data/todo/mf.txt +100 -0
- data/lib/training_data/todo/middle_frisian.txt +102 -0
- data/lib/training_data/todo/mingo.txt +146 -0
- data/lib/training_data/todo/nepali.txt +131 -0
- data/lib/training_data/todo/persian.txt +73 -0
- data/lib/training_data/todo/quechua.txt +108 -0
- data/lib/training_data/todo/romanian.txt +103 -0
- data/lib/training_data/todo/rumantsch.txt +110 -0
- data/lib/training_data/todo/sanskrit.txt +135 -0
- data/lib/training_data/todo/scots.txt +490 -0
- data/lib/training_data/todo/scots_gaelic.txt +93 -0
- data/lib/training_data/todo/serbian-ascii.txt +121 -0
- data/lib/training_data/todo/slovak-ascii.txt +102 -0
- data/lib/training_data/todo/slovak-windows1250.txt +115 -0
- data/lib/training_data/todo/slovenian-ascii.txt +100 -0
- data/lib/training_data/todo/slovenian-iso8859_2.txt +96 -0
- data/lib/training_data/todo/sq.txt +110 -0
- data/lib/training_data/todo/swahili.txt +120 -0
- data/lib/training_data/todo/tagalog.txt +135 -0
- data/lib/training_data/todo/tamil.txt +123 -0
- data/lib/training_data/todo/turkish.txt +117 -0
- data/lib/training_data/todo/ukrainian-koi8_r.txt +214 -0
- data/lib/training_data/todo/vietnamese.txt +92 -0
- data/lib/training_data/todo/welsh.txt +148 -0
- data/lib/training_data/todo/yiddish-utf.txt +83 -0
- data/lib/training_data/uk-utf8.txt +75 -0
- data/lib/training_data/vi-utf8.txt +47 -0
- data/lib/training_data/zh-utf8.txt +228 -0
- data/test/language_detector_test.rb +78 -0
- metadata +232 -0
@@ -0,0 +1,400 @@
|
|
1
|
+
_ 20326
|
2
|
+
e 6617
|
3
|
+
t 4843
|
4
|
+
o 3834
|
5
|
+
n 3653
|
6
|
+
i 3602
|
7
|
+
a 3433
|
8
|
+
s 2945
|
9
|
+
r 2921
|
10
|
+
h 2507
|
11
|
+
e_ 2000
|
12
|
+
d 1816
|
13
|
+
_t 1785
|
14
|
+
c 1639
|
15
|
+
l 1635
|
16
|
+
th 1535
|
17
|
+
he 1351
|
18
|
+
_th 1333
|
19
|
+
u 1309
|
20
|
+
f 1253
|
21
|
+
m 1175
|
22
|
+
p 1151
|
23
|
+
_a 1145
|
24
|
+
the 1142
|
25
|
+
_the 1060
|
26
|
+
s_ 978
|
27
|
+
er 968
|
28
|
+
_o 967
|
29
|
+
he_ 928
|
30
|
+
d_ 888
|
31
|
+
t_ 885
|
32
|
+
the_ 844
|
33
|
+
_the_ 843
|
34
|
+
on 842
|
35
|
+
in 817
|
36
|
+
y 783
|
37
|
+
n_ 773
|
38
|
+
b 761
|
39
|
+
re 754
|
40
|
+
, 734
|
41
|
+
,_ 732
|
42
|
+
an 732
|
43
|
+
g 728
|
44
|
+
w 718
|
45
|
+
_i 707
|
46
|
+
en 676
|
47
|
+
f_ 599
|
48
|
+
y_ 595
|
49
|
+
of 594
|
50
|
+
_of 592
|
51
|
+
es 589
|
52
|
+
ti 587
|
53
|
+
v 580
|
54
|
+
_of_ 575
|
55
|
+
of_ 575
|
56
|
+
nd 568
|
57
|
+
at 549
|
58
|
+
r_ 540
|
59
|
+
_w 534
|
60
|
+
it 522
|
61
|
+
ed 496
|
62
|
+
_p 494
|
63
|
+
nt 485
|
64
|
+
_c 462
|
65
|
+
o_ 457
|
66
|
+
io 450
|
67
|
+
_an 439
|
68
|
+
te 432
|
69
|
+
or 425
|
70
|
+
_b 418
|
71
|
+
nd_ 407
|
72
|
+
to 406
|
73
|
+
st 402
|
74
|
+
is 401
|
75
|
+
_s 396
|
76
|
+
_in 389
|
77
|
+
ion 385
|
78
|
+
and 385
|
79
|
+
de 384
|
80
|
+
ve 382
|
81
|
+
ha 375
|
82
|
+
ar 366
|
83
|
+
_m 361
|
84
|
+
and_ 360
|
85
|
+
_and 360
|
86
|
+
_and_ 358
|
87
|
+
se 353
|
88
|
+
_to 347
|
89
|
+
me 346
|
90
|
+
to_ 344
|
91
|
+
ed_ 339
|
92
|
+
. 330
|
93
|
+
be 329
|
94
|
+
_f 329
|
95
|
+
._ 329
|
96
|
+
_to_ 320
|
97
|
+
co 317
|
98
|
+
ic 316
|
99
|
+
ns 308
|
100
|
+
al 307
|
101
|
+
le 304
|
102
|
+
ou 304
|
103
|
+
ce 293
|
104
|
+
ent 279
|
105
|
+
l_ 278
|
106
|
+
_co 277
|
107
|
+
tio 275
|
108
|
+
on_ 274
|
109
|
+
_d 274
|
110
|
+
tion 268
|
111
|
+
ri 266
|
112
|
+
_e 264
|
113
|
+
ng 253
|
114
|
+
hi 251
|
115
|
+
er_ 249
|
116
|
+
ea 246
|
117
|
+
as 245
|
118
|
+
_be 242
|
119
|
+
pe 242
|
120
|
+
h_ 234
|
121
|
+
_r 232
|
122
|
+
ec 227
|
123
|
+
ch 223
|
124
|
+
ro 222
|
125
|
+
ct 220
|
126
|
+
_h 219
|
127
|
+
pr 217
|
128
|
+
in_ 217
|
129
|
+
ne 214
|
130
|
+
ll 214
|
131
|
+
rt 213
|
132
|
+
s,_ 210
|
133
|
+
s, 210
|
134
|
+
li 209
|
135
|
+
ra 208
|
136
|
+
T 207
|
137
|
+
wh 204
|
138
|
+
a_ 203
|
139
|
+
ac 201
|
140
|
+
_wh 199
|
141
|
+
_n 196
|
142
|
+
ts 196
|
143
|
+
di 196
|
144
|
+
es_ 195
|
145
|
+
si 194
|
146
|
+
re_ 193
|
147
|
+
at_ 192
|
148
|
+
nc 192
|
149
|
+
ie 190
|
150
|
+
_a_ 188
|
151
|
+
_in_ 185
|
152
|
+
ing 184
|
153
|
+
us 182
|
154
|
+
_re 182
|
155
|
+
g_ 179
|
156
|
+
ng_ 178
|
157
|
+
op 178
|
158
|
+
con 177
|
159
|
+
tha 175
|
160
|
+
_l 174
|
161
|
+
_tha 174
|
162
|
+
ver 173
|
163
|
+
ma 173
|
164
|
+
ion_ 171
|
165
|
+
_con 171
|
166
|
+
ci 170
|
167
|
+
ons 170
|
168
|
+
_it 170
|
169
|
+
po 169
|
170
|
+
ere 168
|
171
|
+
is_ 167
|
172
|
+
ta 167
|
173
|
+
la 166
|
174
|
+
_pr 165
|
175
|
+
fo 164
|
176
|
+
ho 164
|
177
|
+
ir 162
|
178
|
+
ss 161
|
179
|
+
men 160
|
180
|
+
be_ 160
|
181
|
+
un 159
|
182
|
+
ty 159
|
183
|
+
_be_ 158
|
184
|
+
ing_ 157
|
185
|
+
om 156
|
186
|
+
ot 156
|
187
|
+
hat 155
|
188
|
+
ly 155
|
189
|
+
_g 155
|
190
|
+
em 153
|
191
|
+
_T 151
|
192
|
+
rs 150
|
193
|
+
mo 148
|
194
|
+
ch_ 148
|
195
|
+
wi 147
|
196
|
+
we 147
|
197
|
+
ad 147
|
198
|
+
ts_ 145
|
199
|
+
res 143
|
200
|
+
_wi 143
|
201
|
+
I 143
|
202
|
+
hat_ 142
|
203
|
+
ei 141
|
204
|
+
ly_ 141
|
205
|
+
ni 140
|
206
|
+
os 140
|
207
|
+
ca 139
|
208
|
+
ur 139
|
209
|
+
A 138
|
210
|
+
ut 138
|
211
|
+
that 138
|
212
|
+
_that 137
|
213
|
+
ati 137
|
214
|
+
_fo 137
|
215
|
+
st_ 137
|
216
|
+
il 136
|
217
|
+
or_ 136
|
218
|
+
for 136
|
219
|
+
pa 136
|
220
|
+
ul 135
|
221
|
+
ate 135
|
222
|
+
ter 134
|
223
|
+
it_ 134
|
224
|
+
nt_ 133
|
225
|
+
that_ 132
|
226
|
+
_ha 129
|
227
|
+
al_ 128
|
228
|
+
el 128
|
229
|
+
as_ 127
|
230
|
+
ll_ 127
|
231
|
+
_ma 125
|
232
|
+
no 124
|
233
|
+
ment 124
|
234
|
+
an_ 124
|
235
|
+
tion_ 122
|
236
|
+
su 122
|
237
|
+
bl 122
|
238
|
+
_de 122
|
239
|
+
nce 120
|
240
|
+
pl 120
|
241
|
+
fe 119
|
242
|
+
tr 118
|
243
|
+
so 118
|
244
|
+
int 115
|
245
|
+
ov 114
|
246
|
+
e, 114
|
247
|
+
e,_ 114
|
248
|
+
_u 113
|
249
|
+
ent_ 113
|
250
|
+
Th 113
|
251
|
+
her 113
|
252
|
+
j 112
|
253
|
+
atio 112
|
254
|
+
ation 112
|
255
|
+
_Th 111
|
256
|
+
le_ 110
|
257
|
+
ai 110
|
258
|
+
_it_ 110
|
259
|
+
_on 110
|
260
|
+
_for 109
|
261
|
+
ect 109
|
262
|
+
k 109
|
263
|
+
hic 108
|
264
|
+
est 108
|
265
|
+
der 107
|
266
|
+
tu 107
|
267
|
+
na 106
|
268
|
+
_by_ 106
|
269
|
+
by_ 106
|
270
|
+
E 106
|
271
|
+
by 106
|
272
|
+
_by 106
|
273
|
+
ve_ 106
|
274
|
+
_di 106
|
275
|
+
en_ 104
|
276
|
+
vi 104
|
277
|
+
m_ 103
|
278
|
+
_whi 102
|
279
|
+
iv 102
|
280
|
+
whi 102
|
281
|
+
ns_ 102
|
282
|
+
_A 101
|
283
|
+
ich 100
|
284
|
+
ge 100
|
285
|
+
pro 99
|
286
|
+
ess 99
|
287
|
+
_whic 99
|
288
|
+
ers 99
|
289
|
+
hich 99
|
290
|
+
ce_ 99
|
291
|
+
which 99
|
292
|
+
whic 99
|
293
|
+
all 98
|
294
|
+
ove 98
|
295
|
+
_is 98
|
296
|
+
ich_ 97
|
297
|
+
ee 97
|
298
|
+
hich_ 97
|
299
|
+
n,_ 96
|
300
|
+
n, 96
|
301
|
+
im 95
|
302
|
+
ir_ 94
|
303
|
+
hei 94
|
304
|
+
ions 94
|
305
|
+
sti 94
|
306
|
+
se_ 94
|
307
|
+
per 93
|
308
|
+
The 93
|
309
|
+
_pa 93
|
310
|
+
heir 93
|
311
|
+
id 93
|
312
|
+
eir 93
|
313
|
+
eir_ 93
|
314
|
+
ig 93
|
315
|
+
heir_ 93
|
316
|
+
_no 93
|
317
|
+
ev 93
|
318
|
+
era 92
|
319
|
+
_int 92
|
320
|
+
ted 91
|
321
|
+
_The 91
|
322
|
+
ies 91
|
323
|
+
art 91
|
324
|
+
thei 90
|
325
|
+
_ar 90
|
326
|
+
_thei 90
|
327
|
+
their 90
|
328
|
+
_pro 90
|
329
|
+
et 89
|
330
|
+
_pe 88
|
331
|
+
_mo 88
|
332
|
+
ther 88
|
333
|
+
x 87
|
334
|
+
gh 87
|
335
|
+
S 87
|
336
|
+
_is_ 87
|
337
|
+
ol 87
|
338
|
+
ty_ 87
|
339
|
+
_I 86
|
340
|
+
nde 86
|
341
|
+
am 86
|
342
|
+
rn 86
|
343
|
+
nte 86
|
344
|
+
mp 85
|
345
|
+
_su 84
|
346
|
+
_we 84
|
347
|
+
par 84
|
348
|
+
_v 84
|
349
|
+
pu 82
|
350
|
+
his 82
|
351
|
+
ow 82
|
352
|
+
mi 82
|
353
|
+
go 81
|
354
|
+
N 81
|
355
|
+
ue 81
|
356
|
+
ple 81
|
357
|
+
ep 80
|
358
|
+
ab 80
|
359
|
+
;_ 80
|
360
|
+
; 80
|
361
|
+
ex 80
|
362
|
+
ain 80
|
363
|
+
over 80
|
364
|
+
_un 79
|
365
|
+
q 79
|
366
|
+
qu 79
|
367
|
+
pp 79
|
368
|
+
ith 79
|
369
|
+
ry 79
|
370
|
+
_as 79
|
371
|
+
ber 79
|
372
|
+
ub 78
|
373
|
+
av 78
|
374
|
+
uc 78
|
375
|
+
s._ 77
|
376
|
+
s. 77
|
377
|
+
enc 77
|
378
|
+
are 77
|
379
|
+
iti 77
|
380
|
+
gr 76
|
381
|
+
his_ 76
|
382
|
+
ua 76
|
383
|
+
part 76
|
384
|
+
ff 75
|
385
|
+
eve 75
|
386
|
+
O 75
|
387
|
+
rea 74
|
388
|
+
ous 74
|
389
|
+
ia 74
|
390
|
+
The_ 73
|
391
|
+
ag 73
|
392
|
+
mb 73
|
393
|
+
_go 73
|
394
|
+
fa 72
|
395
|
+
on,_ 72
|
396
|
+
ern 72
|
397
|
+
t,_ 72
|
398
|
+
on, 72
|
399
|
+
t, 72
|
400
|
+
_me 71
|
@@ -0,0 +1,400 @@
|
|
1
|
+
_ 57050
|
2
|
+
a 16035
|
3
|
+
i 12706
|
4
|
+
e 12227
|
5
|
+
o 12102
|
6
|
+
n 10393
|
7
|
+
s 8344
|
8
|
+
l 7707
|
9
|
+
r 7492
|
10
|
+
t 7134
|
11
|
+
k 5376
|
12
|
+
u 4558
|
13
|
+
j 3946
|
14
|
+
a_ 3875
|
15
|
+
m 3783
|
16
|
+
d 3710
|
17
|
+
p 3693
|
18
|
+
la 2840
|
19
|
+
s_ 2769
|
20
|
+
e_ 2751
|
21
|
+
. 2706
|
22
|
+
_l 2635
|
23
|
+
_k 2619
|
24
|
+
v 2531
|
25
|
+
n_ 2504
|
26
|
+
o_ 2444
|
27
|
+
i_ 2333
|
28
|
+
._ 2278
|
29
|
+
on 2238
|
30
|
+
, 2193
|
31
|
+
,_ 2182
|
32
|
+
_la 2100
|
33
|
+
en 2080
|
34
|
+
j_ 2050
|
35
|
+
as 2028
|
36
|
+
la_ 2012
|
37
|
+
ta 1956
|
38
|
+
_la_ 1907
|
39
|
+
an 1882
|
40
|
+
_p 1850
|
41
|
+
g 1831
|
42
|
+
_e 1791
|
43
|
+
_d 1778
|
44
|
+
is 1737
|
45
|
+
aj 1658
|
46
|
+
st 1635
|
47
|
+
_s 1575
|
48
|
+
c 1526
|
49
|
+
de 1517
|
50
|
+
oj 1498
|
51
|
+
er 1476
|
52
|
+
ti 1456
|
53
|
+
f 1443
|
54
|
+
_a 1442
|
55
|
+
b 1427
|
56
|
+
ro 1379
|
57
|
+
_m 1351
|
58
|
+
ra 1341
|
59
|
+
nt 1293
|
60
|
+
ka 1270
|
61
|
+
ri 1258
|
62
|
+
al 1249
|
63
|
+
as_ 1248
|
64
|
+
aj_ 1213
|
65
|
+
to 1209
|
66
|
+
_de 1203
|
67
|
+
_t 1200
|
68
|
+
te 1179
|
69
|
+
_n 1176
|
70
|
+
is_ 1171
|
71
|
+
in 1151
|
72
|
+
ko 1145
|
73
|
+
or 1114
|
74
|
+
es 1083
|
75
|
+
re 1034
|
76
|
+
ia 1029
|
77
|
+
li 1022
|
78
|
+
de_ 1016
|
79
|
+
_de_ 979
|
80
|
+
ar 974
|
81
|
+
_v 966
|
82
|
+
vi 942
|
83
|
+
lo 932
|
84
|
+
x 928
|
85
|
+
io 917
|
86
|
+
ne 855
|
87
|
+
no 848
|
88
|
+
ni 843
|
89
|
+
mi 835
|
90
|
+
ma 819
|
91
|
+
_ka 816
|
92
|
+
el 815
|
93
|
+
pr 771
|
94
|
+
z 744
|
95
|
+
un 734
|
96
|
+
l_ 732
|
97
|
+
po 730
|
98
|
+
_f 725
|
99
|
+
� 724
|
100
|
+
est 691
|
101
|
+
na 687
|
102
|
+
ki 679
|
103
|
+
kaj 676
|
104
|
+
si 665
|
105
|
+
u_ 663
|
106
|
+
kaj_ 660
|
107
|
+
" 654
|
108
|
+
tas 651
|
109
|
+
le 650
|
110
|
+
oj_ 648
|
111
|
+
_i 643
|
112
|
+
tr 642
|
113
|
+
_pr 630
|
114
|
+
_es 628
|
115
|
+
jn 626
|
116
|
+
pe 618
|
117
|
+
_kaj 616
|
118
|
+
ig 616
|
119
|
+
_kaj_ 611
|
120
|
+
do 608
|
121
|
+
sta 606
|
122
|
+
on_ 602
|
123
|
+
ek 602
|
124
|
+
ci 597
|
125
|
+
r_ 595
|
126
|
+
� 594
|
127
|
+
_r 593
|
128
|
+
il 592
|
129
|
+
_est 587
|
130
|
+
di 586
|
131
|
+
am 586
|
132
|
+
_mi 582
|
133
|
+
a� 578
|
134
|
+
_vi 577
|
135
|
+
mo 575
|
136
|
+
ant 565
|
137
|
+
_ne 562
|
138
|
+
en_ 561
|
139
|
+
o. 559
|
140
|
+
� 543
|
141
|
+
iu 538
|
142
|
+
o, 529
|
143
|
+
ur 527
|
144
|
+
o._ 527
|
145
|
+
om 525
|
146
|
+
o,_ 524
|
147
|
+
at 521
|
148
|
+
va 521
|
149
|
+
- 519
|
150
|
+
_en 518
|
151
|
+
: 513
|
152
|
+
:_ 512
|
153
|
+
_ti 500
|
154
|
+
M 496
|
155
|
+
h 488
|
156
|
+
nd 484
|
157
|
+
me 484
|
158
|
+
_al 481
|
159
|
+
_ko 479
|
160
|
+
ve 478
|
161
|
+
ie 478
|
162
|
+
_ki 473
|
163
|
+
it 473
|
164
|
+
L 466
|
165
|
+
_b 465
|
166
|
+
se 462
|
167
|
+
em 452
|
168
|
+
ol 450
|
169
|
+
nta 449
|
170
|
+
tu 448
|
171
|
+
ik 444
|
172
|
+
ov 443
|
173
|
+
da 443
|
174
|
+
_M 440
|
175
|
+
_po 439
|
176
|
+
tas_ 438
|
177
|
+
ne_ 437
|
178
|
+
et 437
|
179
|
+
_ma 436
|
180
|
+
_en_ 435
|
181
|
+
su 429
|
182
|
+
pl 426
|
183
|
+
_L 425
|
184
|
+
pa 420
|
185
|
+
_o 417
|
186
|
+
vo 408
|
187
|
+
an_ 407
|
188
|
+
ro_ 406
|
189
|
+
sti 406
|
190
|
+
nu 399
|
191
|
+
kon 396
|
192
|
+
stas 391
|
193
|
+
m_ 391
|
194
|
+
ir 388
|
195
|
+
n. 386
|
196
|
+
fa 386
|
197
|
+
jn_ 382
|
198
|
+
ku 382
|
199
|
+
os 376
|
200
|
+
ke 375
|
201
|
+
n, 375
|
202
|
+
esta 374
|
203
|
+
n,_ 372
|
204
|
+
_su 362
|
205
|
+
ta_ 362
|
206
|
+
stas_ 359
|
207
|
+
xi 359
|
208
|
+
Mi 358
|
209
|
+
_ne_ 356
|
210
|
+
al_ 355
|
211
|
+
nk 353
|
212
|
+
so 353
|
213
|
+
n._ 352
|
214
|
+
id 349
|
215
|
+
_g 348
|
216
|
+
estas 347
|
217
|
+
ga 346
|
218
|
+
_h 345
|
219
|
+
per 345
|
220
|
+
_Mi 340
|
221
|
+
ok 339
|
222
|
+
K 339
|
223
|
+
mp 337
|
224
|
+
_esta 337
|
225
|
+
s,_ 335
|
226
|
+
s, 335
|
227
|
+
_se 333
|
228
|
+
anta 332
|
229
|
+
ul 326
|
230
|
+
ran 325
|
231
|
+
_" 323
|
232
|
+
�_ 322
|
233
|
+
te_ 320
|
234
|
+
ak 320
|
235
|
+
a�_ 320
|
236
|
+
ed 320
|
237
|
+
rt 319
|
238
|
+
ojn 318
|
239
|
+
gi 318
|
240
|
+
_� 317
|
241
|
+
tis 316
|
242
|
+
gx 316
|
243
|
+
mal 316
|
244
|
+
ia_ 315
|
245
|
+
ks 310
|
246
|
+
_al_ 310
|
247
|
+
mi_ 309
|
248
|
+
S 309
|
249
|
+
lu 309
|
250
|
+
ns 308
|
251
|
+
kt 305
|
252
|
+
io_ 302
|
253
|
+
ent 300
|
254
|
+
? 300
|
255
|
+
_K 300
|
256
|
+
ec 300
|
257
|
+
el_ 299
|
258
|
+
_- 299
|
259
|
+
li_ 299
|
260
|
+
E 298
|
261
|
+
� 298
|
262
|
+
_li 297
|
263
|
+
fo 296
|
264
|
+
ter 296
|
265
|
+
_re 296
|
266
|
+
A 295
|
267
|
+
nto 294
|
268
|
+
vi_ 292
|
269
|
+
La 292
|
270
|
+
_mal 290
|
271
|
+
nte 288
|
272
|
+
sp 287
|
273
|
+
sa 287
|
274
|
+
_mi_ 279
|
275
|
+
ut 278
|
276
|
+
op 278
|
277
|
+
_ke 277
|
278
|
+
bo 277
|
279
|
+
ajn 276
|
280
|
+
un_ 276
|
281
|
+
T 274
|
282
|
+
to_ 272
|
283
|
+
-_ 272
|
284
|
+
bl 272
|
285
|
+
_an 271
|
286
|
+
_La 271
|
287
|
+
�i 269
|
288
|
+
_S 268
|
289
|
+
_pl 267
|
290
|
+
_fa 266
|
291
|
+
ni_ 266
|
292
|
+
La_ 265
|
293
|
+
_E 264
|
294
|
+
N 263
|
295
|
+
tis_ 263
|
296
|
+
_tr 263
|
297
|
+
' 262
|
298
|
+
! 262
|
299
|
+
_-_ 262
|
300
|
+
pro 261
|
301
|
+
iu_ 261
|
302
|
+
i� 261
|
303
|
+
nc 260
|
304
|
+
_si 259
|
305
|
+
du 257
|
306
|
+
_kon 256
|
307
|
+
ru 255
|
308
|
+
_vi_ 254
|
309
|
+
_j 253
|
310
|
+
ce 251
|
311
|
+
ke_ 249
|
312
|
+
ap 248
|
313
|
+
us 247
|
314
|
+
be 247
|
315
|
+
im 247
|
316
|
+
B 246
|
317
|
+
_ku 246
|
318
|
+
_La_ 246
|
319
|
+
tra 245
|
320
|
+
ad 245
|
321
|
+
uj 245
|
322
|
+
ac 245
|
323
|
+
ita 243
|
324
|
+
pre 242
|
325
|
+
_pro 242
|
326
|
+
co 241
|
327
|
+
rm 241
|
328
|
+
_ni 238
|
329
|
+
_pe 236
|
330
|
+
?_ 234
|
331
|
+
on. 234
|
332
|
+
toj 234
|
333
|
+
"_ 234
|
334
|
+
j. 234
|
335
|
+
_ke_ 233
|
336
|
+
s. 232
|
337
|
+
_A 231
|
338
|
+
av 230
|
339
|
+
ri_ 230
|
340
|
+
_el 229
|
341
|
+
por 229
|
342
|
+
` 224
|
343
|
+
ev 224
|
344
|
+
las 223
|
345
|
+
P 223
|
346
|
+
j._ 221
|
347
|
+
eni 220
|
348
|
+
_T 220
|
349
|
+
_B 219
|
350
|
+
j,_ 218
|
351
|
+
j, 218
|
352
|
+
era 217
|
353
|
+
_in 216
|
354
|
+
on._ 216
|
355
|
+
cx 216
|
356
|
+
_N 215
|
357
|
+
ion 215
|
358
|
+
ab 215
|
359
|
+
.. 214
|
360
|
+
) 213
|
361
|
+
fi 213
|
362
|
+
or_ 212
|
363
|
+
pri 212
|
364
|
+
s._ 212
|
365
|
+
_por 210
|
366
|
+
ez 210
|
367
|
+
in_ 210
|
368
|
+
am_ 209
|
369
|
+
on,_ 209
|
370
|
+
ll 209
|
371
|
+
�i 209
|
372
|
+
on, 209
|
373
|
+
_ve 208
|
374
|
+
ris 208
|
375
|
+
esti 208
|
376
|
+
!_ 207
|
377
|
+
men 206
|
378
|
+
vas 205
|
379
|
+
iel 204
|
380
|
+
taj 203
|
381
|
+
_c 201
|
382
|
+
aro 201
|
383
|
+
ank 200
|
384
|
+
_pri 200
|
385
|
+
jo 200
|
386
|
+
ja 200
|
387
|
+
ont 200
|
388
|
+
lt 199
|
389
|
+
_P 199
|
390
|
+
igi 199
|
391
|
+
_pa 197
|
392
|
+
oj. 197
|
393
|
+
( 196
|
394
|
+
au 195
|
395
|
+
oro 195
|
396
|
+
ng 195
|
397
|
+
_( 194
|
398
|
+
sto 194
|
399
|
+
ast 194
|
400
|
+
ag 193
|