language_detector 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +24 -0
- data/Rakefile +18 -0
- data/VERSION +1 -0
- data/lib/language_detector.rb +232 -0
- data/lib/model-fm.yml +52504 -0
- data/lib/model-tc.yml +53985 -0
- data/lib/textcat_ngrams/afrikaans.lm +400 -0
- data/lib/textcat_ngrams/albanian.lm +400 -0
- data/lib/textcat_ngrams/amharic-utf.lm +400 -0
- data/lib/textcat_ngrams/arabic-iso8859_6.lm +400 -0
- data/lib/textcat_ngrams/arabic-windows1256.lm +400 -0
- data/lib/textcat_ngrams/armenian.lm +400 -0
- data/lib/textcat_ngrams/basque.lm +400 -0
- data/lib/textcat_ngrams/belarus-windows1251.lm +400 -0
- data/lib/textcat_ngrams/bosnian.lm +400 -0
- data/lib/textcat_ngrams/breton.lm +400 -0
- data/lib/textcat_ngrams/bulgarian-iso8859_5.lm +400 -0
- data/lib/textcat_ngrams/catalan.lm +400 -0
- data/lib/textcat_ngrams/chinese-big5.lm +400 -0
- data/lib/textcat_ngrams/chinese-gb2312.lm +400 -0
- data/lib/textcat_ngrams/croatian-ascii.lm +400 -0
- data/lib/textcat_ngrams/czech-iso8859_2.lm +400 -0
- data/lib/textcat_ngrams/danish.lm +400 -0
- data/lib/textcat_ngrams/dutch.lm +400 -0
- data/lib/textcat_ngrams/english.lm +400 -0
- data/lib/textcat_ngrams/esperanto.lm +400 -0
- data/lib/textcat_ngrams/estonian.lm +400 -0
- data/lib/textcat_ngrams/finnish.lm +400 -0
- data/lib/textcat_ngrams/french.lm +400 -0
- data/lib/textcat_ngrams/frisian.lm +400 -0
- data/lib/textcat_ngrams/georgian.lm +400 -0
- data/lib/textcat_ngrams/german.lm +400 -0
- data/lib/textcat_ngrams/greek-iso8859-7.lm +400 -0
- data/lib/textcat_ngrams/hebrew-iso8859_8.lm +400 -0
- data/lib/textcat_ngrams/hindi.lm +400 -0
- data/lib/textcat_ngrams/hungarian.lm +400 -0
- data/lib/textcat_ngrams/icelandic.lm +400 -0
- data/lib/textcat_ngrams/indonesian.lm +400 -0
- data/lib/textcat_ngrams/irish.lm +400 -0
- data/lib/textcat_ngrams/italian.lm +400 -0
- data/lib/textcat_ngrams/japanese-euc_jp.lm +400 -0
- data/lib/textcat_ngrams/japanese-shift_jis.lm +400 -0
- data/lib/textcat_ngrams/korean.lm +400 -0
- data/lib/textcat_ngrams/latin.lm +400 -0
- data/lib/textcat_ngrams/latvian.lm +400 -0
- data/lib/textcat_ngrams/lithuanian.lm +400 -0
- data/lib/textcat_ngrams/malay.lm +400 -0
- data/lib/textcat_ngrams/manx.lm +400 -0
- data/lib/textcat_ngrams/marathi.lm +400 -0
- data/lib/textcat_ngrams/mingo.lm +400 -0
- data/lib/textcat_ngrams/nepali.lm +400 -0
- data/lib/textcat_ngrams/norwegian.lm +400 -0
- data/lib/textcat_ngrams/persian.lm +400 -0
- data/lib/textcat_ngrams/polish.lm +400 -0
- data/lib/textcat_ngrams/portuguese.lm +400 -0
- data/lib/textcat_ngrams/quechua.lm +400 -0
- data/lib/textcat_ngrams/romanian.lm +400 -0
- data/lib/textcat_ngrams/rumantsch.lm +400 -0
- data/lib/textcat_ngrams/russian-iso8859_5.lm +400 -0
- data/lib/textcat_ngrams/russian-koi8_r.lm +400 -0
- data/lib/textcat_ngrams/russian-windows1251.lm +400 -0
- data/lib/textcat_ngrams/sanskrit.lm +400 -0
- data/lib/textcat_ngrams/scots.lm +400 -0
- data/lib/textcat_ngrams/scots_gaelic.lm +400 -0
- data/lib/textcat_ngrams/serbian-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovak-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovak-windows1250.lm +400 -0
- data/lib/textcat_ngrams/slovenian-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovenian-iso8859_2.lm +400 -0
- data/lib/textcat_ngrams/spanish.lm +400 -0
- data/lib/textcat_ngrams/swahili.lm +400 -0
- data/lib/textcat_ngrams/swedish.lm +400 -0
- data/lib/textcat_ngrams/tagalog.lm +400 -0
- data/lib/textcat_ngrams/tamil.lm +400 -0
- data/lib/textcat_ngrams/thai.lm +400 -0
- data/lib/textcat_ngrams/turkish.lm +400 -0
- data/lib/textcat_ngrams/ukrainian-koi8_u.lm +400 -0
- data/lib/textcat_ngrams/vietnamese.lm +400 -0
- data/lib/textcat_ngrams/welsh.lm +400 -0
- data/lib/textcat_ngrams/yiddish-utf.lm +400 -0
- data/lib/training_data/ar-utf8.txt +54 -0
- data/lib/training_data/bg-utf8.txt +26 -0
- data/lib/training_data/cs-utf8.txt +48 -0
- data/lib/training_data/da-utf8.txt +159 -0
- data/lib/training_data/de-utf8.txt +569 -0
- data/lib/training_data/el-utf8.txt +48 -0
- data/lib/training_data/en-utf8.txt +81 -0
- data/lib/training_data/es-utf8.txt +185 -0
- data/lib/training_data/et-utf8.txt +50 -0
- data/lib/training_data/fa-utf8.txt +42 -0
- data/lib/training_data/fi-utf8.txt +83 -0
- data/lib/training_data/fr-utf8.txt +191 -0
- data/lib/training_data/fy-utf8.txt +22 -0
- data/lib/training_data/ga-utf8.txt +109 -0
- data/lib/training_data/he-utf8.txt +116 -0
- data/lib/training_data/hi-utf8.txt +49 -0
- data/lib/training_data/hr-utf8.txt +80 -0
- data/lib/training_data/hu-utf8.txt +87 -0
- data/lib/training_data/io-utf8.txt +41 -0
- data/lib/training_data/is-utf8.txt +94 -0
- data/lib/training_data/it-utf8.txt +228 -0
- data/lib/training_data/ja-utf8.txt +200 -0
- data/lib/training_data/ko-utf8.txt +147 -0
- data/lib/training_data/nl-utf8.txt +215 -0
- data/lib/training_data/no-utf8.txt +281 -0
- data/lib/training_data/pl-utf8.txt +120 -0
- data/lib/training_data/pt-utf8.txt +214 -0
- data/lib/training_data/ro-utf8.txt +66 -0
- data/lib/training_data/ru-utf8.txt +310 -0
- data/lib/training_data/sl-utf8.txt +263 -0
- data/lib/training_data/sv-utf8.txt +174 -0
- data/lib/training_data/th-utf8.txt +49 -0
- data/lib/training_data/tk-utf8.txt +101 -0
- data/lib/training_data/todo/af.txt +114 -0
- data/lib/training_data/todo/amharic-utf.txt +95 -0
- data/lib/training_data/todo/arabic-windows1256.txt +157 -0
- data/lib/training_data/todo/armenian.txt +86 -0
- data/lib/training_data/todo/basque.txt +136 -0
- data/lib/training_data/todo/belarus-windows1251.txt +97 -0
- data/lib/training_data/todo/bosnian.txt +97 -0
- data/lib/training_data/todo/breton.txt +159 -0
- data/lib/training_data/todo/bulgarian-iso8859_5.txt +115 -0
- data/lib/training_data/todo/catalan.txt +93 -0
- data/lib/training_data/todo/croatian-ascii.txt +104 -0
- data/lib/training_data/todo/esperanto.txt +95 -0
- data/lib/training_data/todo/estonian.txt +218 -0
- data/lib/training_data/todo/frisian.txt +99 -0
- data/lib/training_data/todo/georgian.txt +86 -0
- data/lib/training_data/todo/greek-iso8859-7.txt +139 -0
- data/lib/training_data/todo/hawaian.txt +108 -0
- data/lib/training_data/todo/hebrew-iso8859_8.txt +79 -0
- data/lib/training_data/todo/hindi.txt +77 -0
- data/lib/training_data/todo/hungarian.txt +102 -0
- data/lib/training_data/todo/icelandic.txt +131 -0
- data/lib/training_data/todo/indonesian.txt +93 -0
- data/lib/training_data/todo/irish.txt +209 -0
- data/lib/training_data/todo/latin.txt +120 -0
- data/lib/training_data/todo/latvian.txt +126 -0
- data/lib/training_data/todo/lithuanian.txt +99 -0
- data/lib/training_data/todo/malay.txt +108 -0
- data/lib/training_data/todo/manx.txt +78 -0
- data/lib/training_data/todo/marathi.txt +100 -0
- data/lib/training_data/todo/mf.txt +100 -0
- data/lib/training_data/todo/middle_frisian.txt +102 -0
- data/lib/training_data/todo/mingo.txt +146 -0
- data/lib/training_data/todo/nepali.txt +131 -0
- data/lib/training_data/todo/persian.txt +73 -0
- data/lib/training_data/todo/quechua.txt +108 -0
- data/lib/training_data/todo/romanian.txt +103 -0
- data/lib/training_data/todo/rumantsch.txt +110 -0
- data/lib/training_data/todo/sanskrit.txt +135 -0
- data/lib/training_data/todo/scots.txt +490 -0
- data/lib/training_data/todo/scots_gaelic.txt +93 -0
- data/lib/training_data/todo/serbian-ascii.txt +121 -0
- data/lib/training_data/todo/slovak-ascii.txt +102 -0
- data/lib/training_data/todo/slovak-windows1250.txt +115 -0
- data/lib/training_data/todo/slovenian-ascii.txt +100 -0
- data/lib/training_data/todo/slovenian-iso8859_2.txt +96 -0
- data/lib/training_data/todo/sq.txt +110 -0
- data/lib/training_data/todo/swahili.txt +120 -0
- data/lib/training_data/todo/tagalog.txt +135 -0
- data/lib/training_data/todo/tamil.txt +123 -0
- data/lib/training_data/todo/turkish.txt +117 -0
- data/lib/training_data/todo/ukrainian-koi8_r.txt +214 -0
- data/lib/training_data/todo/vietnamese.txt +92 -0
- data/lib/training_data/todo/welsh.txt +148 -0
- data/lib/training_data/todo/yiddish-utf.txt +83 -0
- data/lib/training_data/uk-utf8.txt +75 -0
- data/lib/training_data/vi-utf8.txt +47 -0
- data/lib/training_data/zh-utf8.txt +228 -0
- data/test/language_detector_test.rb +78 -0
- metadata +232 -0
@@ -0,0 +1,400 @@
|
|
1
|
+
_ 20674
|
2
|
+
a 6376
|
3
|
+
e 5815
|
4
|
+
i 5746
|
5
|
+
t 3396
|
6
|
+
r 3280
|
7
|
+
n 3103
|
8
|
+
u 2835
|
9
|
+
s 2611
|
10
|
+
c 2582
|
11
|
+
e_ 2235
|
12
|
+
l 2224
|
13
|
+
o 2149
|
14
|
+
a_ 1974
|
15
|
+
d 1629
|
16
|
+
m 1528
|
17
|
+
p 1410
|
18
|
+
i_ 1358
|
19
|
+
in 1308
|
20
|
+
_c 1167
|
21
|
+
_s 1118
|
22
|
+
_d 999
|
23
|
+
re 905
|
24
|
+
ar 898
|
25
|
+
, 791
|
26
|
+
,_ 786
|
27
|
+
_p 785
|
28
|
+
de 771
|
29
|
+
_a 754
|
30
|
+
te 687
|
31
|
+
_i 667
|
32
|
+
at 654
|
33
|
+
ti 645
|
34
|
+
ca 639
|
35
|
+
n_ 630
|
36
|
+
ta 617
|
37
|
+
si 614
|
38
|
+
_de 609
|
39
|
+
f 606
|
40
|
+
st 583
|
41
|
+
ri 581
|
42
|
+
u_ 567
|
43
|
+
nt 553
|
44
|
+
. 542
|
45
|
+
ra 540
|
46
|
+
_m 534
|
47
|
+
g 528
|
48
|
+
v 516
|
49
|
+
ul 516
|
50
|
+
de_ 513
|
51
|
+
_in 503
|
52
|
+
b 492
|
53
|
+
_de_ 474
|
54
|
+
._ 472
|
55
|
+
le 459
|
56
|
+
l_ 444
|
57
|
+
un 443
|
58
|
+
_si 440
|
59
|
+
es 437
|
60
|
+
tr 426
|
61
|
+
ea 420
|
62
|
+
t_ 412
|
63
|
+
ce 412
|
64
|
+
ma 407
|
65
|
+
cu 402
|
66
|
+
er 398
|
67
|
+
_ca 397
|
68
|
+
si_ 388
|
69
|
+
_f 387
|
70
|
+
_l 383
|
71
|
+
z 382
|
72
|
+
la 381
|
73
|
+
ne 370
|
74
|
+
sa 364
|
75
|
+
as 360
|
76
|
+
_e 357
|
77
|
+
in_ 356
|
78
|
+
an 352
|
79
|
+
it 351
|
80
|
+
te_ 346
|
81
|
+
or 345
|
82
|
+
el 345
|
83
|
+
ci 339
|
84
|
+
_si_ 333
|
85
|
+
_n 330
|
86
|
+
are 324
|
87
|
+
pe 319
|
88
|
+
re_ 317
|
89
|
+
al 310
|
90
|
+
_t 309
|
91
|
+
se 304
|
92
|
+
ic 295
|
93
|
+
ie 290
|
94
|
+
_u 290
|
95
|
+
ul_ 290
|
96
|
+
ni 289
|
97
|
+
int 285
|
98
|
+
_o 280
|
99
|
+
en 279
|
100
|
+
ta_ 279
|
101
|
+
ur 261
|
102
|
+
pa 256
|
103
|
+
co 255
|
104
|
+
_pe 254
|
105
|
+
ia 252
|
106
|
+
mi 251
|
107
|
+
pr 249
|
108
|
+
_ma 249
|
109
|
+
oa 249
|
110
|
+
me 246
|
111
|
+
lu 246
|
112
|
+
li 241
|
113
|
+
im 238
|
114
|
+
_in_ 237
|
115
|
+
da 237
|
116
|
+
na 237
|
117
|
+
_sa 235
|
118
|
+
ac 234
|
119
|
+
- 234
|
120
|
+
ii 232
|
121
|
+
est 231
|
122
|
+
r_ 231
|
123
|
+
h 230
|
124
|
+
_cu 230
|
125
|
+
le_ 229
|
126
|
+
ai 229
|
127
|
+
ca_ 227
|
128
|
+
il 226
|
129
|
+
ru 223
|
130
|
+
sc 223
|
131
|
+
_v 221
|
132
|
+
nu 220
|
133
|
+
tu 220
|
134
|
+
_un 220
|
135
|
+
nd 220
|
136
|
+
di 219
|
137
|
+
are_ 216
|
138
|
+
to 215
|
139
|
+
am 214
|
140
|
+
on 213
|
141
|
+
o_ 208
|
142
|
+
is 208
|
143
|
+
sa_ 203
|
144
|
+
la_ 200
|
145
|
+
_b 200
|
146
|
+
ste 195
|
147
|
+
et 194
|
148
|
+
ec 191
|
149
|
+
_r 186
|
150
|
+
car 185
|
151
|
+
ui 180
|
152
|
+
un_ 179
|
153
|
+
lo 178
|
154
|
+
cu_ 177
|
155
|
+
ei 176
|
156
|
+
e, 175
|
157
|
+
e,_ 175
|
158
|
+
pe_ 171
|
159
|
+
m_ 167
|
160
|
+
_la 166
|
161
|
+
a, 165
|
162
|
+
a,_ 164
|
163
|
+
_ce 164
|
164
|
+
rt 163
|
165
|
+
_co 163
|
166
|
+
ent 162
|
167
|
+
ro 162
|
168
|
+
ele 162
|
169
|
+
_pe_ 160
|
170
|
+
po 160
|
171
|
+
ea_ 159
|
172
|
+
" 158
|
173
|
+
ntr 158
|
174
|
+
_cu_ 158
|
175
|
+
_pr 157
|
176
|
+
ut 157
|
177
|
+
nc 156
|
178
|
+
ata 155
|
179
|
+
care 154
|
180
|
+
um 153
|
181
|
+
au 151
|
182
|
+
va 151
|
183
|
+
_o_ 150
|
184
|
+
_car 150
|
185
|
+
ii_ 145
|
186
|
+
ind 145
|
187
|
+
_un_ 144
|
188
|
+
os 144
|
189
|
+
ad 141
|
190
|
+
_la_ 140
|
191
|
+
I 140
|
192
|
+
este 138
|
193
|
+
ste_ 138
|
194
|
+
care_ 138
|
195
|
+
ir 137
|
196
|
+
ga 136
|
197
|
+
ap 136
|
198
|
+
ol 136
|
199
|
+
ra_ 136
|
200
|
+
_di 134
|
201
|
+
D 134
|
202
|
+
_care 133
|
203
|
+
se_ 133
|
204
|
+
om 133
|
205
|
+
ara 133
|
206
|
+
ati 133
|
207
|
+
fi 133
|
208
|
+
_sa_ 131
|
209
|
+
zi 130
|
210
|
+
vi 130
|
211
|
+
_ca_ 129
|
212
|
+
_se 128
|
213
|
+
_nu 128
|
214
|
+
ai_ 127
|
215
|
+
ch 127
|
216
|
+
pi 124
|
217
|
+
ve 123
|
218
|
+
fa 122
|
219
|
+
ot 121
|
220
|
+
_a_ 120
|
221
|
+
este_ 120
|
222
|
+
du 119
|
223
|
+
ine 119
|
224
|
+
s_ 118
|
225
|
+
fo 118
|
226
|
+
_ci 118
|
227
|
+
ui_ 118
|
228
|
+
ba 118
|
229
|
+
i, 117
|
230
|
+
i,_ 116
|
231
|
+
ne_ 115
|
232
|
+
us 115
|
233
|
+
_g 115
|
234
|
+
a. 115
|
235
|
+
fe 114
|
236
|
+
A 114
|
237
|
+
pu 114
|
238
|
+
ce_ 113
|
239
|
+
ar_ 113
|
240
|
+
_pa 113
|
241
|
+
oc 112
|
242
|
+
sta 112
|
243
|
+
lui 112
|
244
|
+
ns 112
|
245
|
+
em 112
|
246
|
+
' 112
|
247
|
+
oar 112
|
248
|
+
din 111
|
249
|
+
iu 111
|
250
|
+
_int 111
|
251
|
+
ate 111
|
252
|
+
mu 111
|
253
|
+
hi 110
|
254
|
+
ele_ 110
|
255
|
+
mp 109
|
256
|
+
_D 109
|
257
|
+
S 109
|
258
|
+
sti 108
|
259
|
+
bi 108
|
260
|
+
ata_ 107
|
261
|
+
ti_ 107
|
262
|
+
tra 107
|
263
|
+
C 107
|
264
|
+
c_ 106
|
265
|
+
tre 106
|
266
|
+
_al 105
|
267
|
+
rea 105
|
268
|
+
mai 105
|
269
|
+
j 104
|
270
|
+
a._ 104
|
271
|
+
gi 104
|
272
|
+
e. 103
|
273
|
+
d_ 103
|
274
|
+
_fa 103
|
275
|
+
E 102
|
276
|
+
mo 102
|
277
|
+
at_ 101
|
278
|
+
_e_ 101
|
279
|
+
nte 101
|
280
|
+
lt 101
|
281
|
+
sp 101
|
282
|
+
za 100
|
283
|
+
mai_ 100
|
284
|
+
su 99
|
285
|
+
na_ 98
|
286
|
+
tat 97
|
287
|
+
sin 97
|
288
|
+
ez 96
|
289
|
+
tru 96
|
290
|
+
e._ 96
|
291
|
+
ie_ 96
|
292
|
+
ia_ 96
|
293
|
+
_re 96
|
294
|
+
tul 96
|
295
|
+
_fo 96
|
296
|
+
ina 95
|
297
|
+
art 95
|
298
|
+
_C 95
|
299
|
+
no 95
|
300
|
+
nu_ 94
|
301
|
+
_es 94
|
302
|
+
_po 94
|
303
|
+
cr 94
|
304
|
+
inc 93
|
305
|
+
_da 92
|
306
|
+
_mai 92
|
307
|
+
lui_ 92
|
308
|
+
_din 92
|
309
|
+
_est 92
|
310
|
+
pre 91
|
311
|
+
_mai_ 91
|
312
|
+
io 91
|
313
|
+
chi 91
|
314
|
+
ge 90
|
315
|
+
pri 90
|
316
|
+
eu 90
|
317
|
+
uri 90
|
318
|
+
az 90
|
319
|
+
_nu_ 89
|
320
|
+
_me 89
|
321
|
+
ct 89
|
322
|
+
au_ 88
|
323
|
+
esc 88
|
324
|
+
ev 88
|
325
|
+
ei_ 88
|
326
|
+
min 87
|
327
|
+
ace 87
|
328
|
+
op 86
|
329
|
+
ng 86
|
330
|
+
ici 86
|
331
|
+
_lu 85
|
332
|
+
ari 85
|
333
|
+
_mi 84
|
334
|
+
ita 84
|
335
|
+
_S 84
|
336
|
+
_tr 84
|
337
|
+
ere 83
|
338
|
+
or_ 83
|
339
|
+
ast 83
|
340
|
+
ist 83
|
341
|
+
nt_ 83
|
342
|
+
_se_ 82
|
343
|
+
ou 82
|
344
|
+
tin 82
|
345
|
+
intr 82
|
346
|
+
con 82
|
347
|
+
do 81
|
348
|
+
_fi 81
|
349
|
+
str 81
|
350
|
+
am_ 80
|
351
|
+
rat 80
|
352
|
+
ru_ 80
|
353
|
+
ri_ 80
|
354
|
+
par 80
|
355
|
+
oi 80
|
356
|
+
uc 79
|
357
|
+
ze 79
|
358
|
+
pl 79
|
359
|
+
res 78
|
360
|
+
_ac 77
|
361
|
+
ulu 77
|
362
|
+
din_ 76
|
363
|
+
va_ 76
|
364
|
+
ada 76
|
365
|
+
ului 75
|
366
|
+
_con 75
|
367
|
+
id 75
|
368
|
+
inte 74
|
369
|
+
ile 73
|
370
|
+
cit 73
|
371
|
+
_din_ 73
|
372
|
+
lor 73
|
373
|
+
_" 72
|
374
|
+
ig 72
|
375
|
+
rin 72
|
376
|
+
da_ 72
|
377
|
+
_st 72
|
378
|
+
-_ 71
|
379
|
+
_- 71
|
380
|
+
it_ 71
|
381
|
+
ani 71
|
382
|
+
nd_ 71
|
383
|
+
ci_ 70
|
384
|
+
ag 70
|
385
|
+
eri 70
|
386
|
+
i. 70
|
387
|
+
tru_ 70
|
388
|
+
_ne 70
|
389
|
+
rm 70
|
390
|
+
P 69
|
391
|
+
_este 69
|
392
|
+
nta 69
|
393
|
+
bu 69
|
394
|
+
une 69
|
395
|
+
ma_ 69
|
396
|
+
nti 69
|
397
|
+
imp 68
|
398
|
+
_-_ 68
|
399
|
+
iv 68
|
400
|
+
ind_ 68
|
@@ -0,0 +1,400 @@
|
|
1
|
+
_ 10888
|
2
|
+
a 3490
|
3
|
+
e 2268
|
4
|
+
i 2196
|
5
|
+
s 2169
|
6
|
+
n 1961
|
7
|
+
t 1555
|
8
|
+
r 1510
|
9
|
+
l 1281
|
10
|
+
u 1249
|
11
|
+
a_ 1155
|
12
|
+
c 1060
|
13
|
+
d 954
|
14
|
+
o 815
|
15
|
+
s_ 744
|
16
|
+
g 726
|
17
|
+
m 686
|
18
|
+
h 620
|
19
|
+
p 617
|
20
|
+
n_ 559
|
21
|
+
v 493
|
22
|
+
_s 480
|
23
|
+
er 476
|
24
|
+
ch 469
|
25
|
+
_d 457
|
26
|
+
in 455
|
27
|
+
da 418
|
28
|
+
_c 404
|
29
|
+
r_ 388
|
30
|
+
_e 377
|
31
|
+
. 358
|
32
|
+
_p 340
|
33
|
+
as 333
|
34
|
+
l_ 328
|
35
|
+
, 327
|
36
|
+
._ 320
|
37
|
+
_i 320
|
38
|
+
,_ 315
|
39
|
+
la 313
|
40
|
+
en 312
|
41
|
+
sc 310
|
42
|
+
an 301
|
43
|
+
ta 300
|
44
|
+
_da 285
|
45
|
+
f 282
|
46
|
+
_t 268
|
47
|
+
_a 262
|
48
|
+
nt 259
|
49
|
+
_m 252
|
50
|
+
un 248
|
51
|
+
ra 247
|
52
|
+
i_ 247
|
53
|
+
na 245
|
54
|
+
ma 245
|
55
|
+
ia 241
|
56
|
+
ar 234
|
57
|
+
sch 228
|
58
|
+
b 228
|
59
|
+
da_ 217
|
60
|
+
es 214
|
61
|
+
ai 213
|
62
|
+
st 212
|
63
|
+
' 209
|
64
|
+
e_ 208
|
65
|
+
as_ 208
|
66
|
+
er_ 200
|
67
|
+
t_ 199
|
68
|
+
re 199
|
69
|
+
_l 195
|
70
|
+
al 193
|
71
|
+
_n 193
|
72
|
+
el 192
|
73
|
+
tg 192
|
74
|
+
te 188
|
75
|
+
z 187
|
76
|
+
ha 180
|
77
|
+
_f 180
|
78
|
+
sa 174
|
79
|
+
_da_ 172
|
80
|
+
ve 169
|
81
|
+
ei 168
|
82
|
+
_v 165
|
83
|
+
at 165
|
84
|
+
ss 161
|
85
|
+
is 161
|
86
|
+
_ch 161
|
87
|
+
on 160
|
88
|
+
la_ 158
|
89
|
+
cu 158
|
90
|
+
ad 158
|
91
|
+
he 156
|
92
|
+
in_ 147
|
93
|
+
va 147
|
94
|
+
_in 146
|
95
|
+
gl 145
|
96
|
+
ns 141
|
97
|
+
ur 140
|
98
|
+
� 140
|
99
|
+
u_ 138
|
100
|
+
ts 138
|
101
|
+
pe 136
|
102
|
+
li 134
|
103
|
+
gi 133
|
104
|
+
et 133
|
105
|
+
de 132
|
106
|
+
ig 132
|
107
|
+
or 130
|
108
|
+
ti 129
|
109
|
+
il 127
|
110
|
+
d_ 126
|
111
|
+
che 123
|
112
|
+
ut 122
|
113
|
+
us 122
|
114
|
+
cha 121
|
115
|
+
di 120
|
116
|
+
ia_ 120
|
117
|
+
_b 118
|
118
|
+
_la 117
|
119
|
+
na_ 116
|
120
|
+
ain 115
|
121
|
+
per 115
|
122
|
+
to 115
|
123
|
+
_cu 113
|
124
|
+
_sc 112
|
125
|
+
se 111
|
126
|
+
ls 109
|
127
|
+
- 108
|
128
|
+
iu 108
|
129
|
+
ca 107
|
130
|
+
si 104
|
131
|
+
ir 102
|
132
|
+
rt 102
|
133
|
+
ie 102
|
134
|
+
_g 102
|
135
|
+
un_ 102
|
136
|
+
nd 101
|
137
|
+
av 101
|
138
|
+
ni 100
|
139
|
+
q 99
|
140
|
+
au 97
|
141
|
+
ls_ 97
|
142
|
+
ll 96
|
143
|
+
qu 96
|
144
|
+
_pe 96
|
145
|
+
le 96
|
146
|
+
rs 95
|
147
|
+
pa 95
|
148
|
+
ri 95
|
149
|
+
_ma 94
|
150
|
+
_per 93
|
151
|
+
am 93
|
152
|
+
eg 92
|
153
|
+
_r 92
|
154
|
+
gn 92
|
155
|
+
me 92
|
156
|
+
pi 91
|
157
|
+
an_ 91
|
158
|
+
en_ 91
|
159
|
+
ga 91
|
160
|
+
ent 90
|
161
|
+
hi 90
|
162
|
+
_e_ 90
|
163
|
+
it 89
|
164
|
+
ta_ 88
|
165
|
+
ter 87
|
166
|
+
ns_ 86
|
167
|
+
iv 86
|
168
|
+
igl 86
|
169
|
+
a. 85
|
170
|
+
em 85
|
171
|
+
I 85
|
172
|
+
chi 84
|
173
|
+
_en 84
|
174
|
+
int 84
|
175
|
+
ue 83
|
176
|
+
su 82
|
177
|
+
tt 82
|
178
|
+
a, 82
|
179
|
+
a,_ 82
|
180
|
+
o_ 82
|
181
|
+
_ve 82
|
182
|
+
a._ 82
|
183
|
+
_q 81
|
184
|
+
_qu 80
|
185
|
+
ge 80
|
186
|
+
" 80
|
187
|
+
_la_ 80
|
188
|
+
ar_ 80
|
189
|
+
vi 79
|
190
|
+
gl_ 79
|
191
|
+
tu 78
|
192
|
+
ng 78
|
193
|
+
ro 76
|
194
|
+
mi 76
|
195
|
+
sta 75
|
196
|
+
ed 75
|
197
|
+
lla 74
|
198
|
+
ei_ 73
|
199
|
+
_o 73
|
200
|
+
ic 73
|
201
|
+
el_ 73
|
202
|
+
_il 73
|
203
|
+
_in_ 72
|
204
|
+
g_ 72
|
205
|
+
pr 71
|
206
|
+
nu 70
|
207
|
+
ina 70
|
208
|
+
_h 69
|
209
|
+
scha 68
|
210
|
+
mai 68
|
211
|
+
pl 68
|
212
|
+
il_ 68
|
213
|
+
os 68
|
214
|
+
ha_ 68
|
215
|
+
be 67
|
216
|
+
uo 67
|
217
|
+
cun 67
|
218
|
+
ra_ 67
|
219
|
+
_pa 65
|
220
|
+
ts_ 64
|
221
|
+
s. 64
|
222
|
+
co 64
|
223
|
+
_u 64
|
224
|
+
fi 64
|
225
|
+
_I 63
|
226
|
+
sa_ 63
|
227
|
+
s._ 63
|
228
|
+
_re 62
|
229
|
+
�n 62
|
230
|
+
_nu 62
|
231
|
+
? 62
|
232
|
+
S 62
|
233
|
+
_se 61
|
234
|
+
no 61
|
235
|
+
nt_ 61
|
236
|
+
E 61
|
237
|
+
tr 61
|
238
|
+
mp 60
|
239
|
+
igl_ 60
|
240
|
+
_su 60
|
241
|
+
_st 60
|
242
|
+
ess 60
|
243
|
+
im 60
|
244
|
+
zi 59
|
245
|
+
?_ 59
|
246
|
+
nc 59
|
247
|
+
_E 58
|
248
|
+
_- 58
|
249
|
+
_cun 58
|
250
|
+
_te 58
|
251
|
+
_sa 58
|
252
|
+
ant 58
|
253
|
+
main 58
|
254
|
+
eu 57
|
255
|
+
ssa 57
|
256
|
+
-_ 57
|
257
|
+
iun 57
|
258
|
+
_-_ 57
|
259
|
+
aint 56
|
260
|
+
op 56
|
261
|
+
al_ 56
|
262
|
+
dal 56
|
263
|
+
j 56
|
264
|
+
ama 55
|
265
|
+
_tg 55
|
266
|
+
ua 55
|
267
|
+
M 55
|
268
|
+
per_ 55
|
269
|
+
tsc 54
|
270
|
+
nta 54
|
271
|
+
tsch 54
|
272
|
+
um 54
|
273
|
+
fa 54
|
274
|
+
za 54
|
275
|
+
_di 54
|
276
|
+
pia 54
|
277
|
+
_per_ 54
|
278
|
+
T 54
|
279
|
+
_M 53
|
280
|
+
ne 53
|
281
|
+
era 53
|
282
|
+
A 53
|
283
|
+
que 53
|
284
|
+
_dal 53
|
285
|
+
cr 53
|
286
|
+
_de 53
|
287
|
+
lla_ 53
|
288
|
+
_che 52
|
289
|
+
h' 52
|
290
|
+
_me 51
|
291
|
+
ot 51
|
292
|
+
_pr 51
|
293
|
+
_pl 51
|
294
|
+
_sch 51
|
295
|
+
ch' 50
|
296
|
+
_S 50
|
297
|
+
opi 50
|
298
|
+
ou 50
|
299
|
+
tta 50
|
300
|
+
mo 50
|
301
|
+
ada 50
|
302
|
+
ba 50
|
303
|
+
_an 49
|
304
|
+
top 49
|
305
|
+
id 49
|
306
|
+
� 49
|
307
|
+
he_ 49
|
308
|
+
C 49
|
309
|
+
va_ 49
|
310
|
+
uto 49
|
311
|
+
ins 48
|
312
|
+
topi 48
|
313
|
+
L 48
|
314
|
+
ir_ 48
|
315
|
+
ist 48
|
316
|
+
c_ 48
|
317
|
+
_il_ 48
|
318
|
+
P 48
|
319
|
+
ss_ 48
|
320
|
+
ag 47
|
321
|
+
_no 47
|
322
|
+
res 47
|
323
|
+
las 47
|
324
|
+
_vi 46
|
325
|
+
s, 46
|
326
|
+
schi 46
|
327
|
+
_en_ 46
|
328
|
+
tg_ 46
|
329
|
+
s,_ 46
|
330
|
+
_si 46
|
331
|
+
_que 45
|
332
|
+
_T 45
|
333
|
+
az 45
|
334
|
+
'i 45
|
335
|
+
cun_ 44
|
336
|
+
_fa 44
|
337
|
+
_mi 44
|
338
|
+
utop 44
|
339
|
+
utopi 44
|
340
|
+
sche 44
|
341
|
+
_C 44
|
342
|
+
ur_ 44
|
343
|
+
tge 44
|
344
|
+
po 44
|
345
|
+
es_ 44
|
346
|
+
x 44
|
347
|
+
nz 44
|
348
|
+
_L 44
|
349
|
+
_cun_ 44
|
350
|
+
man 44
|
351
|
+
_ch' 43
|
352
|
+
_fi 43
|
353
|
+
pu 43
|
354
|
+
ell 43
|
355
|
+
opia 43
|
356
|
+
_igl 43
|
357
|
+
_ig 43
|
358
|
+
sp 43
|
359
|
+
topia 43
|
360
|
+
ava 42
|
361
|
+
egn 42
|
362
|
+
che_ 42
|
363
|
+
on_ 42
|
364
|
+
ci 42
|
365
|
+
_P 42
|
366
|
+
ev 42
|
367
|
+
ond 41
|
368
|
+
_" 41
|
369
|
+
� 41
|
370
|
+
us_ 41
|
371
|
+
_ha 41
|
372
|
+
D 40
|
373
|
+
_co 40
|
374
|
+
etg 40
|
375
|
+
'e 40
|
376
|
+
las_ 40
|
377
|
+
est 40
|
378
|
+
ura 40
|
379
|
+
uel 39
|
380
|
+
ed_ 39
|
381
|
+
vo 39
|
382
|
+
gia 39
|
383
|
+
bu 39
|
384
|
+
mu 39
|
385
|
+
nn 39
|
386
|
+
gli 39
|
387
|
+
_A 39
|
388
|
+
lu 39
|
389
|
+
cha_ 39
|
390
|
+
ul 38
|
391
|
+
mal 38
|
392
|
+
_�n 38
|
393
|
+
ina_ 38
|
394
|
+
_bu 38
|
395
|
+
_ca 38
|
396
|
+
_� 38
|
397
|
+
uc 38
|
398
|
+
nts 38
|
399
|
+
tra 38
|
400
|
+
_tu 37
|