language_detector 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +24 -0
- data/Rakefile +18 -0
- data/VERSION +1 -0
- data/lib/language_detector.rb +232 -0
- data/lib/model-fm.yml +52504 -0
- data/lib/model-tc.yml +53985 -0
- data/lib/textcat_ngrams/afrikaans.lm +400 -0
- data/lib/textcat_ngrams/albanian.lm +400 -0
- data/lib/textcat_ngrams/amharic-utf.lm +400 -0
- data/lib/textcat_ngrams/arabic-iso8859_6.lm +400 -0
- data/lib/textcat_ngrams/arabic-windows1256.lm +400 -0
- data/lib/textcat_ngrams/armenian.lm +400 -0
- data/lib/textcat_ngrams/basque.lm +400 -0
- data/lib/textcat_ngrams/belarus-windows1251.lm +400 -0
- data/lib/textcat_ngrams/bosnian.lm +400 -0
- data/lib/textcat_ngrams/breton.lm +400 -0
- data/lib/textcat_ngrams/bulgarian-iso8859_5.lm +400 -0
- data/lib/textcat_ngrams/catalan.lm +400 -0
- data/lib/textcat_ngrams/chinese-big5.lm +400 -0
- data/lib/textcat_ngrams/chinese-gb2312.lm +400 -0
- data/lib/textcat_ngrams/croatian-ascii.lm +400 -0
- data/lib/textcat_ngrams/czech-iso8859_2.lm +400 -0
- data/lib/textcat_ngrams/danish.lm +400 -0
- data/lib/textcat_ngrams/dutch.lm +400 -0
- data/lib/textcat_ngrams/english.lm +400 -0
- data/lib/textcat_ngrams/esperanto.lm +400 -0
- data/lib/textcat_ngrams/estonian.lm +400 -0
- data/lib/textcat_ngrams/finnish.lm +400 -0
- data/lib/textcat_ngrams/french.lm +400 -0
- data/lib/textcat_ngrams/frisian.lm +400 -0
- data/lib/textcat_ngrams/georgian.lm +400 -0
- data/lib/textcat_ngrams/german.lm +400 -0
- data/lib/textcat_ngrams/greek-iso8859-7.lm +400 -0
- data/lib/textcat_ngrams/hebrew-iso8859_8.lm +400 -0
- data/lib/textcat_ngrams/hindi.lm +400 -0
- data/lib/textcat_ngrams/hungarian.lm +400 -0
- data/lib/textcat_ngrams/icelandic.lm +400 -0
- data/lib/textcat_ngrams/indonesian.lm +400 -0
- data/lib/textcat_ngrams/irish.lm +400 -0
- data/lib/textcat_ngrams/italian.lm +400 -0
- data/lib/textcat_ngrams/japanese-euc_jp.lm +400 -0
- data/lib/textcat_ngrams/japanese-shift_jis.lm +400 -0
- data/lib/textcat_ngrams/korean.lm +400 -0
- data/lib/textcat_ngrams/latin.lm +400 -0
- data/lib/textcat_ngrams/latvian.lm +400 -0
- data/lib/textcat_ngrams/lithuanian.lm +400 -0
- data/lib/textcat_ngrams/malay.lm +400 -0
- data/lib/textcat_ngrams/manx.lm +400 -0
- data/lib/textcat_ngrams/marathi.lm +400 -0
- data/lib/textcat_ngrams/mingo.lm +400 -0
- data/lib/textcat_ngrams/nepali.lm +400 -0
- data/lib/textcat_ngrams/norwegian.lm +400 -0
- data/lib/textcat_ngrams/persian.lm +400 -0
- data/lib/textcat_ngrams/polish.lm +400 -0
- data/lib/textcat_ngrams/portuguese.lm +400 -0
- data/lib/textcat_ngrams/quechua.lm +400 -0
- data/lib/textcat_ngrams/romanian.lm +400 -0
- data/lib/textcat_ngrams/rumantsch.lm +400 -0
- data/lib/textcat_ngrams/russian-iso8859_5.lm +400 -0
- data/lib/textcat_ngrams/russian-koi8_r.lm +400 -0
- data/lib/textcat_ngrams/russian-windows1251.lm +400 -0
- data/lib/textcat_ngrams/sanskrit.lm +400 -0
- data/lib/textcat_ngrams/scots.lm +400 -0
- data/lib/textcat_ngrams/scots_gaelic.lm +400 -0
- data/lib/textcat_ngrams/serbian-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovak-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovak-windows1250.lm +400 -0
- data/lib/textcat_ngrams/slovenian-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovenian-iso8859_2.lm +400 -0
- data/lib/textcat_ngrams/spanish.lm +400 -0
- data/lib/textcat_ngrams/swahili.lm +400 -0
- data/lib/textcat_ngrams/swedish.lm +400 -0
- data/lib/textcat_ngrams/tagalog.lm +400 -0
- data/lib/textcat_ngrams/tamil.lm +400 -0
- data/lib/textcat_ngrams/thai.lm +400 -0
- data/lib/textcat_ngrams/turkish.lm +400 -0
- data/lib/textcat_ngrams/ukrainian-koi8_u.lm +400 -0
- data/lib/textcat_ngrams/vietnamese.lm +400 -0
- data/lib/textcat_ngrams/welsh.lm +400 -0
- data/lib/textcat_ngrams/yiddish-utf.lm +400 -0
- data/lib/training_data/ar-utf8.txt +54 -0
- data/lib/training_data/bg-utf8.txt +26 -0
- data/lib/training_data/cs-utf8.txt +48 -0
- data/lib/training_data/da-utf8.txt +159 -0
- data/lib/training_data/de-utf8.txt +569 -0
- data/lib/training_data/el-utf8.txt +48 -0
- data/lib/training_data/en-utf8.txt +81 -0
- data/lib/training_data/es-utf8.txt +185 -0
- data/lib/training_data/et-utf8.txt +50 -0
- data/lib/training_data/fa-utf8.txt +42 -0
- data/lib/training_data/fi-utf8.txt +83 -0
- data/lib/training_data/fr-utf8.txt +191 -0
- data/lib/training_data/fy-utf8.txt +22 -0
- data/lib/training_data/ga-utf8.txt +109 -0
- data/lib/training_data/he-utf8.txt +116 -0
- data/lib/training_data/hi-utf8.txt +49 -0
- data/lib/training_data/hr-utf8.txt +80 -0
- data/lib/training_data/hu-utf8.txt +87 -0
- data/lib/training_data/io-utf8.txt +41 -0
- data/lib/training_data/is-utf8.txt +94 -0
- data/lib/training_data/it-utf8.txt +228 -0
- data/lib/training_data/ja-utf8.txt +200 -0
- data/lib/training_data/ko-utf8.txt +147 -0
- data/lib/training_data/nl-utf8.txt +215 -0
- data/lib/training_data/no-utf8.txt +281 -0
- data/lib/training_data/pl-utf8.txt +120 -0
- data/lib/training_data/pt-utf8.txt +214 -0
- data/lib/training_data/ro-utf8.txt +66 -0
- data/lib/training_data/ru-utf8.txt +310 -0
- data/lib/training_data/sl-utf8.txt +263 -0
- data/lib/training_data/sv-utf8.txt +174 -0
- data/lib/training_data/th-utf8.txt +49 -0
- data/lib/training_data/tk-utf8.txt +101 -0
- data/lib/training_data/todo/af.txt +114 -0
- data/lib/training_data/todo/amharic-utf.txt +95 -0
- data/lib/training_data/todo/arabic-windows1256.txt +157 -0
- data/lib/training_data/todo/armenian.txt +86 -0
- data/lib/training_data/todo/basque.txt +136 -0
- data/lib/training_data/todo/belarus-windows1251.txt +97 -0
- data/lib/training_data/todo/bosnian.txt +97 -0
- data/lib/training_data/todo/breton.txt +159 -0
- data/lib/training_data/todo/bulgarian-iso8859_5.txt +115 -0
- data/lib/training_data/todo/catalan.txt +93 -0
- data/lib/training_data/todo/croatian-ascii.txt +104 -0
- data/lib/training_data/todo/esperanto.txt +95 -0
- data/lib/training_data/todo/estonian.txt +218 -0
- data/lib/training_data/todo/frisian.txt +99 -0
- data/lib/training_data/todo/georgian.txt +86 -0
- data/lib/training_data/todo/greek-iso8859-7.txt +139 -0
- data/lib/training_data/todo/hawaian.txt +108 -0
- data/lib/training_data/todo/hebrew-iso8859_8.txt +79 -0
- data/lib/training_data/todo/hindi.txt +77 -0
- data/lib/training_data/todo/hungarian.txt +102 -0
- data/lib/training_data/todo/icelandic.txt +131 -0
- data/lib/training_data/todo/indonesian.txt +93 -0
- data/lib/training_data/todo/irish.txt +209 -0
- data/lib/training_data/todo/latin.txt +120 -0
- data/lib/training_data/todo/latvian.txt +126 -0
- data/lib/training_data/todo/lithuanian.txt +99 -0
- data/lib/training_data/todo/malay.txt +108 -0
- data/lib/training_data/todo/manx.txt +78 -0
- data/lib/training_data/todo/marathi.txt +100 -0
- data/lib/training_data/todo/mf.txt +100 -0
- data/lib/training_data/todo/middle_frisian.txt +102 -0
- data/lib/training_data/todo/mingo.txt +146 -0
- data/lib/training_data/todo/nepali.txt +131 -0
- data/lib/training_data/todo/persian.txt +73 -0
- data/lib/training_data/todo/quechua.txt +108 -0
- data/lib/training_data/todo/romanian.txt +103 -0
- data/lib/training_data/todo/rumantsch.txt +110 -0
- data/lib/training_data/todo/sanskrit.txt +135 -0
- data/lib/training_data/todo/scots.txt +490 -0
- data/lib/training_data/todo/scots_gaelic.txt +93 -0
- data/lib/training_data/todo/serbian-ascii.txt +121 -0
- data/lib/training_data/todo/slovak-ascii.txt +102 -0
- data/lib/training_data/todo/slovak-windows1250.txt +115 -0
- data/lib/training_data/todo/slovenian-ascii.txt +100 -0
- data/lib/training_data/todo/slovenian-iso8859_2.txt +96 -0
- data/lib/training_data/todo/sq.txt +110 -0
- data/lib/training_data/todo/swahili.txt +120 -0
- data/lib/training_data/todo/tagalog.txt +135 -0
- data/lib/training_data/todo/tamil.txt +123 -0
- data/lib/training_data/todo/turkish.txt +117 -0
- data/lib/training_data/todo/ukrainian-koi8_r.txt +214 -0
- data/lib/training_data/todo/vietnamese.txt +92 -0
- data/lib/training_data/todo/welsh.txt +148 -0
- data/lib/training_data/todo/yiddish-utf.txt +83 -0
- data/lib/training_data/uk-utf8.txt +75 -0
- data/lib/training_data/vi-utf8.txt +47 -0
- data/lib/training_data/zh-utf8.txt +228 -0
- data/test/language_detector_test.rb +78 -0
- metadata +232 -0
@@ -0,0 +1,400 @@
|
|
1
|
+
_ 10406
|
2
|
+
a 2828
|
3
|
+
e 2676
|
4
|
+
i 2458
|
5
|
+
o 2418
|
6
|
+
n 1814
|
7
|
+
r 1484
|
8
|
+
v 1253
|
9
|
+
l 1248
|
10
|
+
s 1228
|
11
|
+
t 1172
|
12
|
+
j 1107
|
13
|
+
d 1085
|
14
|
+
k 911
|
15
|
+
p 880
|
16
|
+
a_ 823
|
17
|
+
m 763
|
18
|
+
i_ 681
|
19
|
+
e_ 678
|
20
|
+
_p 603
|
21
|
+
o_ 566
|
22
|
+
u 521
|
23
|
+
z 516
|
24
|
+
b 456
|
25
|
+
_s 435
|
26
|
+
je 434
|
27
|
+
, 416
|
28
|
+
,_ 411
|
29
|
+
ni 399
|
30
|
+
� 383
|
31
|
+
_v 372
|
32
|
+
_d 356
|
33
|
+
pr 355
|
34
|
+
g 345
|
35
|
+
ra 336
|
36
|
+
_n 332
|
37
|
+
st 323
|
38
|
+
an 313
|
39
|
+
po 303
|
40
|
+
re 301
|
41
|
+
na 295
|
42
|
+
h 287
|
43
|
+
ov 276
|
44
|
+
_pr 276
|
45
|
+
li 275
|
46
|
+
al 274
|
47
|
+
_z 270
|
48
|
+
je_ 259
|
49
|
+
la 255
|
50
|
+
� 253
|
51
|
+
ne 248
|
52
|
+
en 246
|
53
|
+
ko 244
|
54
|
+
in 237
|
55
|
+
c 234
|
56
|
+
ti 234
|
57
|
+
v_ 234
|
58
|
+
_po 232
|
59
|
+
no 230
|
60
|
+
ve 230
|
61
|
+
_k 227
|
62
|
+
_i 224
|
63
|
+
da 224
|
64
|
+
. 221
|
65
|
+
_j 221
|
66
|
+
ri 220
|
67
|
+
ja 216
|
68
|
+
_t 214
|
69
|
+
se 213
|
70
|
+
ed 212
|
71
|
+
._ 211
|
72
|
+
em 206
|
73
|
+
te 205
|
74
|
+
za 201
|
75
|
+
od 201
|
76
|
+
av 200
|
77
|
+
lo 196
|
78
|
+
nj 194
|
79
|
+
_o 194
|
80
|
+
_je 193
|
81
|
+
il 190
|
82
|
+
or 183
|
83
|
+
ka 181
|
84
|
+
sk 179
|
85
|
+
_b 178
|
86
|
+
_je_ 178
|
87
|
+
ih 178
|
88
|
+
n_ 177
|
89
|
+
_za 173
|
90
|
+
h_ 171
|
91
|
+
er 171
|
92
|
+
os 171
|
93
|
+
_na 168
|
94
|
+
va 168
|
95
|
+
ta 164
|
96
|
+
le 163
|
97
|
+
m_ 161
|
98
|
+
ev 157
|
99
|
+
ij 157
|
100
|
+
ar 157
|
101
|
+
do 155
|
102
|
+
to 155
|
103
|
+
� 154
|
104
|
+
A 153
|
105
|
+
el 150
|
106
|
+
_m 148
|
107
|
+
ro 147
|
108
|
+
ol 146
|
109
|
+
_v_ 145
|
110
|
+
aj 145
|
111
|
+
di 143
|
112
|
+
N 142
|
113
|
+
S 142
|
114
|
+
at 140
|
115
|
+
ih_ 139
|
116
|
+
ki 138
|
117
|
+
de 137
|
118
|
+
_in 135
|
119
|
+
vo 135
|
120
|
+
ga 134
|
121
|
+
me 131
|
122
|
+
in_ 129
|
123
|
+
vi 129
|
124
|
+
om 127
|
125
|
+
_in_ 125
|
126
|
+
et 124
|
127
|
+
pre 124
|
128
|
+
O 123
|
129
|
+
bi 120
|
130
|
+
I 119
|
131
|
+
da_ 117
|
132
|
+
ik 117
|
133
|
+
ma 115
|
134
|
+
E 114
|
135
|
+
so 113
|
136
|
+
bo 112
|
137
|
+
it 112
|
138
|
+
anj 112
|
139
|
+
eg 110
|
140
|
+
ni_ 109
|
141
|
+
mi 108
|
142
|
+
ke 108
|
143
|
+
na_ 108
|
144
|
+
u_ 108
|
145
|
+
lj 106
|
146
|
+
iz 105
|
147
|
+
ob 105
|
148
|
+
_da 103
|
149
|
+
li_ 103
|
150
|
+
is 103
|
151
|
+
im 102
|
152
|
+
red 102
|
153
|
+
_pre 102
|
154
|
+
dr 100
|
155
|
+
mo 99
|
156
|
+
P 99
|
157
|
+
_se 99
|
158
|
+
ji 98
|
159
|
+
r_ 97
|
160
|
+
ad 97
|
161
|
+
pri 97
|
162
|
+
K 97
|
163
|
+
_l 97
|
164
|
+
tr 95
|
165
|
+
pa 94
|
166
|
+
no_ 94
|
167
|
+
j_ 92
|
168
|
+
ki_ 91
|
169
|
+
ti_ 91
|
170
|
+
_pri 91
|
171
|
+
dn 89
|
172
|
+
_P 88
|
173
|
+
ej 88
|
174
|
+
_da_ 87
|
175
|
+
ne_ 86
|
176
|
+
ega 86
|
177
|
+
_r 86
|
178
|
+
_bi 86
|
179
|
+
l_ 86
|
180
|
+
em_ 86
|
181
|
+
go 86
|
182
|
+
" 85
|
183
|
+
sl 85
|
184
|
+
ek 84
|
185
|
+
ali 84
|
186
|
+
ove 84
|
187
|
+
a� 84
|
188
|
+
ak 84
|
189
|
+
ci 83
|
190
|
+
ga_ 83
|
191
|
+
ko_ 83
|
192
|
+
se_ 82
|
193
|
+
_S 82
|
194
|
+
jo 81
|
195
|
+
ot 81
|
196
|
+
ja_ 81
|
197
|
+
_so 80
|
198
|
+
lov 80
|
199
|
+
L 80
|
200
|
+
D 79
|
201
|
+
V 79
|
202
|
+
as 78
|
203
|
+
_do 78
|
204
|
+
am 78
|
205
|
+
nje 77
|
206
|
+
es 77
|
207
|
+
za_ 77
|
208
|
+
_pa 76
|
209
|
+
T 75
|
210
|
+
tu 75
|
211
|
+
_za_ 74
|
212
|
+
sti 74
|
213
|
+
_dr 74
|
214
|
+
la_ 74
|
215
|
+
_N 74
|
216
|
+
_de 74
|
217
|
+
ega_ 73
|
218
|
+
_ko 73
|
219
|
+
og 73
|
220
|
+
ns 72
|
221
|
+
�e 72
|
222
|
+
ds 72
|
223
|
+
_bo 71
|
224
|
+
ora 71
|
225
|
+
vn 71
|
226
|
+
ost 71
|
227
|
+
_ne 71
|
228
|
+
i� 70
|
229
|
+
ven 69
|
230
|
+
z_ 69
|
231
|
+
�i 69
|
232
|
+
_te 68
|
233
|
+
ce 68
|
234
|
+
_se_ 67
|
235
|
+
�a 67
|
236
|
+
o� 67
|
237
|
+
M 66
|
238
|
+
_u 66
|
239
|
+
un 65
|
240
|
+
ln 65
|
241
|
+
pos 64
|
242
|
+
ju 64
|
243
|
+
sta 64
|
244
|
+
op 64
|
245
|
+
di_ 63
|
246
|
+
ud 63
|
247
|
+
vs 63
|
248
|
+
t_ 62
|
249
|
+
nsk 62
|
250
|
+
tv 62
|
251
|
+
on 62
|
252
|
+
ski 62
|
253
|
+
R 62
|
254
|
+
pa_ 62
|
255
|
+
_ka 62
|
256
|
+
i, 61
|
257
|
+
so_ 61
|
258
|
+
_iz 60
|
259
|
+
_pa_ 60
|
260
|
+
s_ 60
|
261
|
+
i,_ 60
|
262
|
+
pro 59
|
263
|
+
del 59
|
264
|
+
rav 59
|
265
|
+
eni 59
|
266
|
+
oli 58
|
267
|
+
rj 58
|
268
|
+
e, 57
|
269
|
+
�e 57
|
270
|
+
ili 57
|
271
|
+
vr 57
|
272
|
+
d_ 57
|
273
|
+
_le 57
|
274
|
+
pred 57
|
275
|
+
jo_ 56
|
276
|
+
e,_ 56
|
277
|
+
nik 56
|
278
|
+
love 56
|
279
|
+
_pred 56
|
280
|
+
ske 56
|
281
|
+
er_ 55
|
282
|
+
str 55
|
283
|
+
�n 54
|
284
|
+
pra 54
|
285
|
+
J 54
|
286
|
+
_� 54
|
287
|
+
oven 53
|
288
|
+
_ra 53
|
289
|
+
tn 53
|
290
|
+
_na_ 53
|
291
|
+
_so_ 53
|
292
|
+
nih 53
|
293
|
+
loven 53
|
294
|
+
si 52
|
295
|
+
ke_ 52
|
296
|
+
_g 52
|
297
|
+
ic 52
|
298
|
+
udi 51
|
299
|
+
bi_ 51
|
300
|
+
eds 51
|
301
|
+
oj 51
|
302
|
+
ru 51
|
303
|
+
a, 51
|
304
|
+
_pro 50
|
305
|
+
_pos 50
|
306
|
+
nc 50
|
307
|
+
nih_ 50
|
308
|
+
�u 50
|
309
|
+
a,_ 50
|
310
|
+
_a 50
|
311
|
+
az 50
|
312
|
+
ok 50
|
313
|
+
B 50
|
314
|
+
let 49
|
315
|
+
udi_ 49
|
316
|
+
_od 49
|
317
|
+
_K 49
|
318
|
+
aj_ 48
|
319
|
+
_bi_ 48
|
320
|
+
_ve 48
|
321
|
+
ra� 48
|
322
|
+
o, 47
|
323
|
+
_tu 47
|
324
|
+
ija 47
|
325
|
+
ter 47
|
326
|
+
ist 47
|
327
|
+
Z 47
|
328
|
+
reds 46
|
329
|
+
nd 46
|
330
|
+
ali_ 46
|
331
|
+
A_ 46
|
332
|
+
iti 46
|
333
|
+
bil 46
|
334
|
+
_ob 46
|
335
|
+
o,_ 46
|
336
|
+
ati 46
|
337
|
+
tud 45
|
338
|
+
tudi 45
|
339
|
+
_ki 45
|
340
|
+
k_ 45
|
341
|
+
be 45
|
342
|
+
a� 45
|
343
|
+
ir 45
|
344
|
+
�a 45
|
345
|
+
do_ 45
|
346
|
+
sp 45
|
347
|
+
_ki_ 45
|
348
|
+
_st 45
|
349
|
+
ep 44
|
350
|
+
_del 44
|
351
|
+
tudi_ 44
|
352
|
+
r� 44
|
353
|
+
a�u 44
|
354
|
+
_ni 44
|
355
|
+
ah 43
|
356
|
+
ra�u 43
|
357
|
+
ra�un 43
|
358
|
+
i� 43
|
359
|
+
_mo 43
|
360
|
+
avn 43
|
361
|
+
_tud 43
|
362
|
+
�un 43
|
363
|
+
a�un 43
|
364
|
+
_tudi 43
|
365
|
+
_to 42
|
366
|
+
raz 42
|
367
|
+
kr 42
|
368
|
+
ova 42
|
369
|
+
_e 42
|
370
|
+
ogo 42
|
371
|
+
ani 42
|
372
|
+
_" 42
|
373
|
+
ev_ 42
|
374
|
+
br 42
|
375
|
+
eb 42
|
376
|
+
sa 42
|
377
|
+
mi_ 42
|
378
|
+
tem 42
|
379
|
+
ta_ 41
|
380
|
+
prav 41
|
381
|
+
i. 41
|
382
|
+
slov 41
|
383
|
+
ens 41
|
384
|
+
bo_ 41
|
385
|
+
�e 41
|
386
|
+
_T 41
|
387
|
+
_let 41
|
388
|
+
odo 41
|
389
|
+
slo 41
|
390
|
+
ensk 40
|
391
|
+
ka_ 40
|
392
|
+
neg 40
|
393
|
+
ez 40
|
394
|
+
nos 40
|
395
|
+
e� 40
|
396
|
+
_sl 40
|
397
|
+
_V 40
|
398
|
+
r�a 40
|
399
|
+
nega 40
|
400
|
+
ili_ 39
|
@@ -0,0 +1,400 @@
|
|
1
|
+
_ 25044
|
2
|
+
e 7830
|
3
|
+
a 7437
|
4
|
+
o 5102
|
5
|
+
s 4394
|
6
|
+
n 4358
|
7
|
+
i 4065
|
8
|
+
r 3998
|
9
|
+
l 3634
|
10
|
+
d 3118
|
11
|
+
c 2931
|
12
|
+
t 2834
|
13
|
+
u 2316
|
14
|
+
a_ 2269
|
15
|
+
e_ 2211
|
16
|
+
s_ 1862
|
17
|
+
de 1679
|
18
|
+
p 1673
|
19
|
+
_d 1644
|
20
|
+
m 1447
|
21
|
+
_de 1443
|
22
|
+
n_ 1332
|
23
|
+
o_ 1301
|
24
|
+
en 1295
|
25
|
+
_e 1216
|
26
|
+
es 1177
|
27
|
+
_l 1132
|
28
|
+
de_ 1080
|
29
|
+
la 1060
|
30
|
+
os 1028
|
31
|
+
_de_ 1027
|
32
|
+
_p 963
|
33
|
+
l_ 910
|
34
|
+
ci 890
|
35
|
+
_c 866
|
36
|
+
_a 866
|
37
|
+
os_ 801
|
38
|
+
ar 777
|
39
|
+
er 775
|
40
|
+
as 768
|
41
|
+
ra 746
|
42
|
+
nt 736
|
43
|
+
_la 727
|
44
|
+
re 726
|
45
|
+
,_ 724
|
46
|
+
, 724
|
47
|
+
el 722
|
48
|
+
ta 708
|
49
|
+
ue 701
|
50
|
+
g 678
|
51
|
+
on 674
|
52
|
+
al 670
|
53
|
+
_s 666
|
54
|
+
co 653
|
55
|
+
b 637
|
56
|
+
an 622
|
57
|
+
v 616
|
58
|
+
la_ 616
|
59
|
+
or 612
|
60
|
+
te 599
|
61
|
+
st 596
|
62
|
+
el_ 580
|
63
|
+
_la_ 573
|
64
|
+
y 545
|
65
|
+
to 543
|
66
|
+
r_ 517
|
67
|
+
ad 512
|
68
|
+
� 511
|
69
|
+
do 504
|
70
|
+
ro 504
|
71
|
+
se 488
|
72
|
+
as_ 488
|
73
|
+
q 487
|
74
|
+
qu 487
|
75
|
+
. 479
|
76
|
+
._ 478
|
77
|
+
en_ 475
|
78
|
+
ca 460
|
79
|
+
in 459
|
80
|
+
un 456
|
81
|
+
_co 450
|
82
|
+
es_ 449
|
83
|
+
ic 449
|
84
|
+
_en 440
|
85
|
+
ac 440
|
86
|
+
que 439
|
87
|
+
na 439
|
88
|
+
lo 430
|
89
|
+
_m 430
|
90
|
+
f 429
|
91
|
+
ent 428
|
92
|
+
da 412
|
93
|
+
ue_ 411
|
94
|
+
po 405
|
95
|
+
le 399
|
96
|
+
_q 399
|
97
|
+
_qu 399
|
98
|
+
que_ 393
|
99
|
+
_que 388
|
100
|
+
ie 386
|
101
|
+
h 385
|
102
|
+
pa 382
|
103
|
+
y_ 371
|
104
|
+
ti 367
|
105
|
+
_que_ 365
|
106
|
+
_en_ 365
|
107
|
+
_y 361
|
108
|
+
tr 358
|
109
|
+
_el 353
|
110
|
+
ri 349
|
111
|
+
ia 342
|
112
|
+
_el_ 333
|
113
|
+
_se 330
|
114
|
+
i� 330
|
115
|
+
_y_ 330
|
116
|
+
io 329
|
117
|
+
pr 320
|
118
|
+
�n 317
|
119
|
+
ec 317
|
120
|
+
no 314
|
121
|
+
id 301
|
122
|
+
� 300
|
123
|
+
mi 299
|
124
|
+
_t 299
|
125
|
+
i�n 292
|
126
|
+
nte 292
|
127
|
+
me 286
|
128
|
+
aci 283
|
129
|
+
do_ 279
|
130
|
+
li 276
|
131
|
+
con 276
|
132
|
+
nd 273
|
133
|
+
est 272
|
134
|
+
ni 272
|
135
|
+
� 271
|
136
|
+
di 270
|
137
|
+
_es 268
|
138
|
+
_lo 267
|
139
|
+
ci� 265
|
140
|
+
ma 265
|
141
|
+
�n_ 264
|
142
|
+
_pr 263
|
143
|
+
_r 261
|
144
|
+
ci�n 255
|
145
|
+
z 254
|
146
|
+
ra_ 251
|
147
|
+
si 247
|
148
|
+
i�n_ 246
|
149
|
+
oc 245
|
150
|
+
nc 244
|
151
|
+
_u 244
|
152
|
+
_po 243
|
153
|
+
los 243
|
154
|
+
or_ 242
|
155
|
+
_con 241
|
156
|
+
is 239
|
157
|
+
del 238
|
158
|
+
_del 237
|
159
|
+
ado 236
|
160
|
+
se_ 233
|
161
|
+
_i 233
|
162
|
+
los_ 231
|
163
|
+
_re 231
|
164
|
+
por 229
|
165
|
+
_del_ 228
|
166
|
+
sta 228
|
167
|
+
del_ 228
|
168
|
+
al_ 228
|
169
|
+
ne 226
|
170
|
+
_h 226
|
171
|
+
cu 225
|
172
|
+
_n 225
|
173
|
+
_a_ 224
|
174
|
+
_v 224
|
175
|
+
_un 223
|
176
|
+
ce 222
|
177
|
+
so 220
|
178
|
+
ci�n_ 218
|
179
|
+
res 218
|
180
|
+
vi 217
|
181
|
+
om 216
|
182
|
+
te_ 212
|
183
|
+
_pa 211
|
184
|
+
ien 210
|
185
|
+
j 209
|
186
|
+
E 208
|
187
|
+
_los 207
|
188
|
+
_los_ 207
|
189
|
+
to_ 206
|
190
|
+
ol 204
|
191
|
+
it 203
|
192
|
+
am 202
|
193
|
+
aci� 201
|
194
|
+
rt 201
|
195
|
+
aci�n 201
|
196
|
+
pe 197
|
197
|
+
ha 190
|
198
|
+
_se_ 189
|
199
|
+
nto 188
|
200
|
+
_o 184
|
201
|
+
_E 184
|
202
|
+
on_ 184
|
203
|
+
sa 183
|
204
|
+
na_ 182
|
205
|
+
ta_ 181
|
206
|
+
su 180
|
207
|
+
cia 180
|
208
|
+
mo 180
|
209
|
+
ct 178
|
210
|
+
par 178
|
211
|
+
_f 177
|
212
|
+
_por 176
|
213
|
+
eg 172
|
214
|
+
_in 172
|
215
|
+
ur 170
|
216
|
+
L 168
|
217
|
+
ve 166
|
218
|
+
im 164
|
219
|
+
ga 163
|
220
|
+
_est 161
|
221
|
+
ar_ 161
|
222
|
+
ab 160
|
223
|
+
_L 159
|
224
|
+
tu 158
|
225
|
+
at 158
|
226
|
+
no_ 157
|
227
|
+
s, 157
|
228
|
+
s,_ 157
|
229
|
+
_por_ 156
|
230
|
+
por_ 156
|
231
|
+
las 156
|
232
|
+
ba 154
|
233
|
+
o,_ 154
|
234
|
+
o, 154
|
235
|
+
ento 151
|
236
|
+
et 150
|
237
|
+
C 150
|
238
|
+
_ha 149
|
239
|
+
A 149
|
240
|
+
tra 148
|
241
|
+
ient 148
|
242
|
+
_al 147
|
243
|
+
a,_ 146
|
244
|
+
ica 146
|
245
|
+
a, 146
|
246
|
+
pro 146
|
247
|
+
ado_ 145
|
248
|
+
ici 144
|
249
|
+
_ca 144
|
250
|
+
an_ 144
|
251
|
+
las_ 143
|
252
|
+
ara 143
|
253
|
+
nci 143
|
254
|
+
ente 142
|
255
|
+
� 142
|
256
|
+
rr 142
|
257
|
+
ir 142
|
258
|
+
da_ 141
|
259
|
+
em 141
|
260
|
+
ll 140
|
261
|
+
il 139
|
262
|
+
�a 138
|
263
|
+
iv 138
|
264
|
+
_su 138
|
265
|
+
_par 136
|
266
|
+
ul 136
|
267
|
+
ant 136
|
268
|
+
_A 135
|
269
|
+
mp 135
|
270
|
+
_las_ 134
|
271
|
+
_las 134
|
272
|
+
_C 134
|
273
|
+
_pro 133
|
274
|
+
men 132
|
275
|
+
P 132
|
276
|
+
des 131
|
277
|
+
com 130
|
278
|
+
ion 130
|
279
|
+
era 130
|
280
|
+
ed 129
|
281
|
+
ida 129
|
282
|
+
sp 128
|
283
|
+
gu 127
|
284
|
+
nte_ 127
|
285
|
+
ns 127
|
286
|
+
za 126
|
287
|
+
dos 125
|
288
|
+
M 125
|
289
|
+
cio 125
|
290
|
+
les 125
|
291
|
+
_P 124
|
292
|
+
bl 124
|
293
|
+
_com 122
|
294
|
+
s._ 122
|
295
|
+
s. 122
|
296
|
+
_M 121
|
297
|
+
ua 120
|
298
|
+
nta 120
|
299
|
+
mu 119
|
300
|
+
_no 118
|
301
|
+
dad 118
|
302
|
+
� 117
|
303
|
+
� 116
|
304
|
+
un_ 116
|
305
|
+
va 116
|
306
|
+
ist 116
|
307
|
+
nes 116
|
308
|
+
iento 115
|
309
|
+
one 114
|
310
|
+
ara_ 113
|
311
|
+
S 113
|
312
|
+
ada 113
|
313
|
+
_un_ 113
|
314
|
+
fi 111
|
315
|
+
pre 110
|
316
|
+
tos 110
|
317
|
+
ter 109
|
318
|
+
ot 109
|
319
|
+
esta 108
|
320
|
+
_me 107
|
321
|
+
ido 107
|
322
|
+
ob 107
|
323
|
+
_g 105
|
324
|
+
br 105
|
325
|
+
go 105
|
326
|
+
ea 104
|
327
|
+
nto_ 104
|
328
|
+
ona 103
|
329
|
+
pu 103
|
330
|
+
dos_ 103
|
331
|
+
tro 103
|
332
|
+
ier 103
|
333
|
+
para 102
|
334
|
+
ment 101
|
335
|
+
ag 101
|
336
|
+
ero 101
|
337
|
+
gr 101
|
338
|
+
rec 101
|
339
|
+
bi 101
|
340
|
+
ia_ 100
|
341
|
+
una 100
|
342
|
+
nic 99
|
343
|
+
ncia 99
|
344
|
+
�a_ 98
|
345
|
+
a._ 98
|
346
|
+
tos_ 98
|
347
|
+
a. 98
|
348
|
+
ran 98
|
349
|
+
lo_ 97
|
350
|
+
ones 97
|
351
|
+
rm 96
|
352
|
+
lu 96
|
353
|
+
ron 95
|
354
|
+
con_ 95
|
355
|
+
�_ 95
|
356
|
+
nes_ 95
|
357
|
+
_ci 95
|
358
|
+
ante 94
|
359
|
+
ch 94
|
360
|
+
_con_ 94
|
361
|
+
_para 94
|
362
|
+
ntr 93
|
363
|
+
una_ 93
|
364
|
+
para_ 93
|
365
|
+
mie 92
|
366
|
+
ico 92
|
367
|
+
fe 92
|
368
|
+
les_ 92
|
369
|
+
uc 92
|
370
|
+
ip 91
|
371
|
+
sto 91
|
372
|
+
_ma 91
|
373
|
+
ui 91
|
374
|
+
sta_ 91
|
375
|
+
_ve 90
|
376
|
+
cion 90
|
377
|
+
" 90
|
378
|
+
op 90
|
379
|
+
cal 89
|
380
|
+
_mu 89
|
381
|
+
_S 89
|
382
|
+
ro_ 89
|
383
|
+
_pe 88
|
384
|
+
ste 88
|
385
|
+
ras 88
|
386
|
+
pl 88
|
387
|
+
_una 88
|
388
|
+
_di 87
|
389
|
+
ento_ 86
|
390
|
+
ita 86
|
391
|
+
ione 85
|
392
|
+
ect 85
|
393
|
+
_una_ 85
|
394
|
+
mien 85
|
395
|
+
tan 85
|
396
|
+
du 84
|
397
|
+
den 84
|
398
|
+
ndo 84
|
399
|
+
per 84
|
400
|
+
eri 84
|