language_detector 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +24 -0
- data/Rakefile +18 -0
- data/VERSION +1 -0
- data/lib/language_detector.rb +232 -0
- data/lib/model-fm.yml +52504 -0
- data/lib/model-tc.yml +53985 -0
- data/lib/textcat_ngrams/afrikaans.lm +400 -0
- data/lib/textcat_ngrams/albanian.lm +400 -0
- data/lib/textcat_ngrams/amharic-utf.lm +400 -0
- data/lib/textcat_ngrams/arabic-iso8859_6.lm +400 -0
- data/lib/textcat_ngrams/arabic-windows1256.lm +400 -0
- data/lib/textcat_ngrams/armenian.lm +400 -0
- data/lib/textcat_ngrams/basque.lm +400 -0
- data/lib/textcat_ngrams/belarus-windows1251.lm +400 -0
- data/lib/textcat_ngrams/bosnian.lm +400 -0
- data/lib/textcat_ngrams/breton.lm +400 -0
- data/lib/textcat_ngrams/bulgarian-iso8859_5.lm +400 -0
- data/lib/textcat_ngrams/catalan.lm +400 -0
- data/lib/textcat_ngrams/chinese-big5.lm +400 -0
- data/lib/textcat_ngrams/chinese-gb2312.lm +400 -0
- data/lib/textcat_ngrams/croatian-ascii.lm +400 -0
- data/lib/textcat_ngrams/czech-iso8859_2.lm +400 -0
- data/lib/textcat_ngrams/danish.lm +400 -0
- data/lib/textcat_ngrams/dutch.lm +400 -0
- data/lib/textcat_ngrams/english.lm +400 -0
- data/lib/textcat_ngrams/esperanto.lm +400 -0
- data/lib/textcat_ngrams/estonian.lm +400 -0
- data/lib/textcat_ngrams/finnish.lm +400 -0
- data/lib/textcat_ngrams/french.lm +400 -0
- data/lib/textcat_ngrams/frisian.lm +400 -0
- data/lib/textcat_ngrams/georgian.lm +400 -0
- data/lib/textcat_ngrams/german.lm +400 -0
- data/lib/textcat_ngrams/greek-iso8859-7.lm +400 -0
- data/lib/textcat_ngrams/hebrew-iso8859_8.lm +400 -0
- data/lib/textcat_ngrams/hindi.lm +400 -0
- data/lib/textcat_ngrams/hungarian.lm +400 -0
- data/lib/textcat_ngrams/icelandic.lm +400 -0
- data/lib/textcat_ngrams/indonesian.lm +400 -0
- data/lib/textcat_ngrams/irish.lm +400 -0
- data/lib/textcat_ngrams/italian.lm +400 -0
- data/lib/textcat_ngrams/japanese-euc_jp.lm +400 -0
- data/lib/textcat_ngrams/japanese-shift_jis.lm +400 -0
- data/lib/textcat_ngrams/korean.lm +400 -0
- data/lib/textcat_ngrams/latin.lm +400 -0
- data/lib/textcat_ngrams/latvian.lm +400 -0
- data/lib/textcat_ngrams/lithuanian.lm +400 -0
- data/lib/textcat_ngrams/malay.lm +400 -0
- data/lib/textcat_ngrams/manx.lm +400 -0
- data/lib/textcat_ngrams/marathi.lm +400 -0
- data/lib/textcat_ngrams/mingo.lm +400 -0
- data/lib/textcat_ngrams/nepali.lm +400 -0
- data/lib/textcat_ngrams/norwegian.lm +400 -0
- data/lib/textcat_ngrams/persian.lm +400 -0
- data/lib/textcat_ngrams/polish.lm +400 -0
- data/lib/textcat_ngrams/portuguese.lm +400 -0
- data/lib/textcat_ngrams/quechua.lm +400 -0
- data/lib/textcat_ngrams/romanian.lm +400 -0
- data/lib/textcat_ngrams/rumantsch.lm +400 -0
- data/lib/textcat_ngrams/russian-iso8859_5.lm +400 -0
- data/lib/textcat_ngrams/russian-koi8_r.lm +400 -0
- data/lib/textcat_ngrams/russian-windows1251.lm +400 -0
- data/lib/textcat_ngrams/sanskrit.lm +400 -0
- data/lib/textcat_ngrams/scots.lm +400 -0
- data/lib/textcat_ngrams/scots_gaelic.lm +400 -0
- data/lib/textcat_ngrams/serbian-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovak-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovak-windows1250.lm +400 -0
- data/lib/textcat_ngrams/slovenian-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovenian-iso8859_2.lm +400 -0
- data/lib/textcat_ngrams/spanish.lm +400 -0
- data/lib/textcat_ngrams/swahili.lm +400 -0
- data/lib/textcat_ngrams/swedish.lm +400 -0
- data/lib/textcat_ngrams/tagalog.lm +400 -0
- data/lib/textcat_ngrams/tamil.lm +400 -0
- data/lib/textcat_ngrams/thai.lm +400 -0
- data/lib/textcat_ngrams/turkish.lm +400 -0
- data/lib/textcat_ngrams/ukrainian-koi8_u.lm +400 -0
- data/lib/textcat_ngrams/vietnamese.lm +400 -0
- data/lib/textcat_ngrams/welsh.lm +400 -0
- data/lib/textcat_ngrams/yiddish-utf.lm +400 -0
- data/lib/training_data/ar-utf8.txt +54 -0
- data/lib/training_data/bg-utf8.txt +26 -0
- data/lib/training_data/cs-utf8.txt +48 -0
- data/lib/training_data/da-utf8.txt +159 -0
- data/lib/training_data/de-utf8.txt +569 -0
- data/lib/training_data/el-utf8.txt +48 -0
- data/lib/training_data/en-utf8.txt +81 -0
- data/lib/training_data/es-utf8.txt +185 -0
- data/lib/training_data/et-utf8.txt +50 -0
- data/lib/training_data/fa-utf8.txt +42 -0
- data/lib/training_data/fi-utf8.txt +83 -0
- data/lib/training_data/fr-utf8.txt +191 -0
- data/lib/training_data/fy-utf8.txt +22 -0
- data/lib/training_data/ga-utf8.txt +109 -0
- data/lib/training_data/he-utf8.txt +116 -0
- data/lib/training_data/hi-utf8.txt +49 -0
- data/lib/training_data/hr-utf8.txt +80 -0
- data/lib/training_data/hu-utf8.txt +87 -0
- data/lib/training_data/io-utf8.txt +41 -0
- data/lib/training_data/is-utf8.txt +94 -0
- data/lib/training_data/it-utf8.txt +228 -0
- data/lib/training_data/ja-utf8.txt +200 -0
- data/lib/training_data/ko-utf8.txt +147 -0
- data/lib/training_data/nl-utf8.txt +215 -0
- data/lib/training_data/no-utf8.txt +281 -0
- data/lib/training_data/pl-utf8.txt +120 -0
- data/lib/training_data/pt-utf8.txt +214 -0
- data/lib/training_data/ro-utf8.txt +66 -0
- data/lib/training_data/ru-utf8.txt +310 -0
- data/lib/training_data/sl-utf8.txt +263 -0
- data/lib/training_data/sv-utf8.txt +174 -0
- data/lib/training_data/th-utf8.txt +49 -0
- data/lib/training_data/tk-utf8.txt +101 -0
- data/lib/training_data/todo/af.txt +114 -0
- data/lib/training_data/todo/amharic-utf.txt +95 -0
- data/lib/training_data/todo/arabic-windows1256.txt +157 -0
- data/lib/training_data/todo/armenian.txt +86 -0
- data/lib/training_data/todo/basque.txt +136 -0
- data/lib/training_data/todo/belarus-windows1251.txt +97 -0
- data/lib/training_data/todo/bosnian.txt +97 -0
- data/lib/training_data/todo/breton.txt +159 -0
- data/lib/training_data/todo/bulgarian-iso8859_5.txt +115 -0
- data/lib/training_data/todo/catalan.txt +93 -0
- data/lib/training_data/todo/croatian-ascii.txt +104 -0
- data/lib/training_data/todo/esperanto.txt +95 -0
- data/lib/training_data/todo/estonian.txt +218 -0
- data/lib/training_data/todo/frisian.txt +99 -0
- data/lib/training_data/todo/georgian.txt +86 -0
- data/lib/training_data/todo/greek-iso8859-7.txt +139 -0
- data/lib/training_data/todo/hawaian.txt +108 -0
- data/lib/training_data/todo/hebrew-iso8859_8.txt +79 -0
- data/lib/training_data/todo/hindi.txt +77 -0
- data/lib/training_data/todo/hungarian.txt +102 -0
- data/lib/training_data/todo/icelandic.txt +131 -0
- data/lib/training_data/todo/indonesian.txt +93 -0
- data/lib/training_data/todo/irish.txt +209 -0
- data/lib/training_data/todo/latin.txt +120 -0
- data/lib/training_data/todo/latvian.txt +126 -0
- data/lib/training_data/todo/lithuanian.txt +99 -0
- data/lib/training_data/todo/malay.txt +108 -0
- data/lib/training_data/todo/manx.txt +78 -0
- data/lib/training_data/todo/marathi.txt +100 -0
- data/lib/training_data/todo/mf.txt +100 -0
- data/lib/training_data/todo/middle_frisian.txt +102 -0
- data/lib/training_data/todo/mingo.txt +146 -0
- data/lib/training_data/todo/nepali.txt +131 -0
- data/lib/training_data/todo/persian.txt +73 -0
- data/lib/training_data/todo/quechua.txt +108 -0
- data/lib/training_data/todo/romanian.txt +103 -0
- data/lib/training_data/todo/rumantsch.txt +110 -0
- data/lib/training_data/todo/sanskrit.txt +135 -0
- data/lib/training_data/todo/scots.txt +490 -0
- data/lib/training_data/todo/scots_gaelic.txt +93 -0
- data/lib/training_data/todo/serbian-ascii.txt +121 -0
- data/lib/training_data/todo/slovak-ascii.txt +102 -0
- data/lib/training_data/todo/slovak-windows1250.txt +115 -0
- data/lib/training_data/todo/slovenian-ascii.txt +100 -0
- data/lib/training_data/todo/slovenian-iso8859_2.txt +96 -0
- data/lib/training_data/todo/sq.txt +110 -0
- data/lib/training_data/todo/swahili.txt +120 -0
- data/lib/training_data/todo/tagalog.txt +135 -0
- data/lib/training_data/todo/tamil.txt +123 -0
- data/lib/training_data/todo/turkish.txt +117 -0
- data/lib/training_data/todo/ukrainian-koi8_r.txt +214 -0
- data/lib/training_data/todo/vietnamese.txt +92 -0
- data/lib/training_data/todo/welsh.txt +148 -0
- data/lib/training_data/todo/yiddish-utf.txt +83 -0
- data/lib/training_data/uk-utf8.txt +75 -0
- data/lib/training_data/vi-utf8.txt +47 -0
- data/lib/training_data/zh-utf8.txt +228 -0
- data/test/language_detector_test.rb +78 -0
- metadata +232 -0
@@ -0,0 +1,400 @@
|
|
1
|
+
_ 20674
|
2
|
+
a 6376
|
3
|
+
e 5815
|
4
|
+
i 5746
|
5
|
+
t 3396
|
6
|
+
r 3280
|
7
|
+
n 3103
|
8
|
+
u 2835
|
9
|
+
s 2611
|
10
|
+
c 2582
|
11
|
+
e_ 2235
|
12
|
+
l 2224
|
13
|
+
o 2149
|
14
|
+
a_ 1974
|
15
|
+
d 1629
|
16
|
+
m 1528
|
17
|
+
p 1410
|
18
|
+
i_ 1358
|
19
|
+
in 1308
|
20
|
+
_c 1167
|
21
|
+
_s 1118
|
22
|
+
_d 999
|
23
|
+
re 905
|
24
|
+
ar 898
|
25
|
+
, 791
|
26
|
+
,_ 786
|
27
|
+
_p 785
|
28
|
+
de 771
|
29
|
+
_a 754
|
30
|
+
te 687
|
31
|
+
_i 667
|
32
|
+
at 654
|
33
|
+
ti 645
|
34
|
+
ca 639
|
35
|
+
n_ 630
|
36
|
+
ta 617
|
37
|
+
si 614
|
38
|
+
_de 609
|
39
|
+
f 606
|
40
|
+
st 583
|
41
|
+
ri 581
|
42
|
+
u_ 567
|
43
|
+
nt 553
|
44
|
+
. 542
|
45
|
+
ra 540
|
46
|
+
_m 534
|
47
|
+
g 528
|
48
|
+
v 516
|
49
|
+
ul 516
|
50
|
+
de_ 513
|
51
|
+
_in 503
|
52
|
+
b 492
|
53
|
+
_de_ 474
|
54
|
+
._ 472
|
55
|
+
le 459
|
56
|
+
l_ 444
|
57
|
+
un 443
|
58
|
+
_si 440
|
59
|
+
es 437
|
60
|
+
tr 426
|
61
|
+
ea 420
|
62
|
+
t_ 412
|
63
|
+
ce 412
|
64
|
+
ma 407
|
65
|
+
cu 402
|
66
|
+
er 398
|
67
|
+
_ca 397
|
68
|
+
si_ 388
|
69
|
+
_f 387
|
70
|
+
_l 383
|
71
|
+
z 382
|
72
|
+
la 381
|
73
|
+
ne 370
|
74
|
+
sa 364
|
75
|
+
as 360
|
76
|
+
_e 357
|
77
|
+
in_ 356
|
78
|
+
an 352
|
79
|
+
it 351
|
80
|
+
te_ 346
|
81
|
+
or 345
|
82
|
+
el 345
|
83
|
+
ci 339
|
84
|
+
_si_ 333
|
85
|
+
_n 330
|
86
|
+
are 324
|
87
|
+
pe 319
|
88
|
+
re_ 317
|
89
|
+
al 310
|
90
|
+
_t 309
|
91
|
+
se 304
|
92
|
+
ic 295
|
93
|
+
ie 290
|
94
|
+
_u 290
|
95
|
+
ul_ 290
|
96
|
+
ni 289
|
97
|
+
int 285
|
98
|
+
_o 280
|
99
|
+
en 279
|
100
|
+
ta_ 279
|
101
|
+
ur 261
|
102
|
+
pa 256
|
103
|
+
co 255
|
104
|
+
_pe 254
|
105
|
+
ia 252
|
106
|
+
mi 251
|
107
|
+
pr 249
|
108
|
+
_ma 249
|
109
|
+
oa 249
|
110
|
+
me 246
|
111
|
+
lu 246
|
112
|
+
li 241
|
113
|
+
im 238
|
114
|
+
_in_ 237
|
115
|
+
da 237
|
116
|
+
na 237
|
117
|
+
_sa 235
|
118
|
+
ac 234
|
119
|
+
- 234
|
120
|
+
ii 232
|
121
|
+
est 231
|
122
|
+
r_ 231
|
123
|
+
h 230
|
124
|
+
_cu 230
|
125
|
+
le_ 229
|
126
|
+
ai 229
|
127
|
+
ca_ 227
|
128
|
+
il 226
|
129
|
+
ru 223
|
130
|
+
sc 223
|
131
|
+
_v 221
|
132
|
+
nu 220
|
133
|
+
tu 220
|
134
|
+
_un 220
|
135
|
+
nd 220
|
136
|
+
di 219
|
137
|
+
are_ 216
|
138
|
+
to 215
|
139
|
+
am 214
|
140
|
+
on 213
|
141
|
+
o_ 208
|
142
|
+
is 208
|
143
|
+
sa_ 203
|
144
|
+
la_ 200
|
145
|
+
_b 200
|
146
|
+
ste 195
|
147
|
+
et 194
|
148
|
+
ec 191
|
149
|
+
_r 186
|
150
|
+
car 185
|
151
|
+
ui 180
|
152
|
+
un_ 179
|
153
|
+
lo 178
|
154
|
+
cu_ 177
|
155
|
+
ei 176
|
156
|
+
e, 175
|
157
|
+
e,_ 175
|
158
|
+
pe_ 171
|
159
|
+
m_ 167
|
160
|
+
_la 166
|
161
|
+
a, 165
|
162
|
+
a,_ 164
|
163
|
+
_ce 164
|
164
|
+
rt 163
|
165
|
+
_co 163
|
166
|
+
ent 162
|
167
|
+
ro 162
|
168
|
+
ele 162
|
169
|
+
_pe_ 160
|
170
|
+
po 160
|
171
|
+
ea_ 159
|
172
|
+
" 158
|
173
|
+
ntr 158
|
174
|
+
_cu_ 158
|
175
|
+
_pr 157
|
176
|
+
ut 157
|
177
|
+
nc 156
|
178
|
+
ata 155
|
179
|
+
care 154
|
180
|
+
um 153
|
181
|
+
au 151
|
182
|
+
va 151
|
183
|
+
_o_ 150
|
184
|
+
_car 150
|
185
|
+
ii_ 145
|
186
|
+
ind 145
|
187
|
+
_un_ 144
|
188
|
+
os 144
|
189
|
+
ad 141
|
190
|
+
_la_ 140
|
191
|
+
I 140
|
192
|
+
este 138
|
193
|
+
ste_ 138
|
194
|
+
care_ 138
|
195
|
+
ir 137
|
196
|
+
ga 136
|
197
|
+
ap 136
|
198
|
+
ol 136
|
199
|
+
ra_ 136
|
200
|
+
_di 134
|
201
|
+
D 134
|
202
|
+
_care 133
|
203
|
+
se_ 133
|
204
|
+
om 133
|
205
|
+
ara 133
|
206
|
+
ati 133
|
207
|
+
fi 133
|
208
|
+
_sa_ 131
|
209
|
+
zi 130
|
210
|
+
vi 130
|
211
|
+
_ca_ 129
|
212
|
+
_se 128
|
213
|
+
_nu 128
|
214
|
+
ai_ 127
|
215
|
+
ch 127
|
216
|
+
pi 124
|
217
|
+
ve 123
|
218
|
+
fa 122
|
219
|
+
ot 121
|
220
|
+
_a_ 120
|
221
|
+
este_ 120
|
222
|
+
du 119
|
223
|
+
ine 119
|
224
|
+
s_ 118
|
225
|
+
fo 118
|
226
|
+
_ci 118
|
227
|
+
ui_ 118
|
228
|
+
ba 118
|
229
|
+
i, 117
|
230
|
+
i,_ 116
|
231
|
+
ne_ 115
|
232
|
+
us 115
|
233
|
+
_g 115
|
234
|
+
a. 115
|
235
|
+
fe 114
|
236
|
+
A 114
|
237
|
+
pu 114
|
238
|
+
ce_ 113
|
239
|
+
ar_ 113
|
240
|
+
_pa 113
|
241
|
+
oc 112
|
242
|
+
sta 112
|
243
|
+
lui 112
|
244
|
+
ns 112
|
245
|
+
em 112
|
246
|
+
' 112
|
247
|
+
oar 112
|
248
|
+
din 111
|
249
|
+
iu 111
|
250
|
+
_int 111
|
251
|
+
ate 111
|
252
|
+
mu 111
|
253
|
+
hi 110
|
254
|
+
ele_ 110
|
255
|
+
mp 109
|
256
|
+
_D 109
|
257
|
+
S 109
|
258
|
+
sti 108
|
259
|
+
bi 108
|
260
|
+
ata_ 107
|
261
|
+
ti_ 107
|
262
|
+
tra 107
|
263
|
+
C 107
|
264
|
+
c_ 106
|
265
|
+
tre 106
|
266
|
+
_al 105
|
267
|
+
rea 105
|
268
|
+
mai 105
|
269
|
+
j 104
|
270
|
+
a._ 104
|
271
|
+
gi 104
|
272
|
+
e. 103
|
273
|
+
d_ 103
|
274
|
+
_fa 103
|
275
|
+
E 102
|
276
|
+
mo 102
|
277
|
+
at_ 101
|
278
|
+
_e_ 101
|
279
|
+
nte 101
|
280
|
+
lt 101
|
281
|
+
sp 101
|
282
|
+
za 100
|
283
|
+
mai_ 100
|
284
|
+
su 99
|
285
|
+
na_ 98
|
286
|
+
tat 97
|
287
|
+
sin 97
|
288
|
+
ez 96
|
289
|
+
tru 96
|
290
|
+
e._ 96
|
291
|
+
ie_ 96
|
292
|
+
ia_ 96
|
293
|
+
_re 96
|
294
|
+
tul 96
|
295
|
+
_fo 96
|
296
|
+
ina 95
|
297
|
+
art 95
|
298
|
+
_C 95
|
299
|
+
no 95
|
300
|
+
nu_ 94
|
301
|
+
_es 94
|
302
|
+
_po 94
|
303
|
+
cr 94
|
304
|
+
inc 93
|
305
|
+
_da 92
|
306
|
+
_mai 92
|
307
|
+
lui_ 92
|
308
|
+
_din 92
|
309
|
+
_est 92
|
310
|
+
pre 91
|
311
|
+
_mai_ 91
|
312
|
+
io 91
|
313
|
+
chi 91
|
314
|
+
ge 90
|
315
|
+
pri 90
|
316
|
+
eu 90
|
317
|
+
uri 90
|
318
|
+
az 90
|
319
|
+
_nu_ 89
|
320
|
+
_me 89
|
321
|
+
ct 89
|
322
|
+
au_ 88
|
323
|
+
esc 88
|
324
|
+
ev 88
|
325
|
+
ei_ 88
|
326
|
+
min 87
|
327
|
+
ace 87
|
328
|
+
op 86
|
329
|
+
ng 86
|
330
|
+
ici 86
|
331
|
+
_lu 85
|
332
|
+
ari 85
|
333
|
+
_mi 84
|
334
|
+
ita 84
|
335
|
+
_S 84
|
336
|
+
_tr 84
|
337
|
+
ere 83
|
338
|
+
or_ 83
|
339
|
+
ast 83
|
340
|
+
ist 83
|
341
|
+
nt_ 83
|
342
|
+
_se_ 82
|
343
|
+
ou 82
|
344
|
+
tin 82
|
345
|
+
intr 82
|
346
|
+
con 82
|
347
|
+
do 81
|
348
|
+
_fi 81
|
349
|
+
str 81
|
350
|
+
am_ 80
|
351
|
+
rat 80
|
352
|
+
ru_ 80
|
353
|
+
ri_ 80
|
354
|
+
par 80
|
355
|
+
oi 80
|
356
|
+
uc 79
|
357
|
+
ze 79
|
358
|
+
pl 79
|
359
|
+
res 78
|
360
|
+
_ac 77
|
361
|
+
ulu 77
|
362
|
+
din_ 76
|
363
|
+
va_ 76
|
364
|
+
ada 76
|
365
|
+
ului 75
|
366
|
+
_con 75
|
367
|
+
id 75
|
368
|
+
inte 74
|
369
|
+
ile 73
|
370
|
+
cit 73
|
371
|
+
_din_ 73
|
372
|
+
lor 73
|
373
|
+
_" 72
|
374
|
+
ig 72
|
375
|
+
rin 72
|
376
|
+
da_ 72
|
377
|
+
_st 72
|
378
|
+
-_ 71
|
379
|
+
_- 71
|
380
|
+
it_ 71
|
381
|
+
ani 71
|
382
|
+
nd_ 71
|
383
|
+
ci_ 70
|
384
|
+
ag 70
|
385
|
+
eri 70
|
386
|
+
i. 70
|
387
|
+
tru_ 70
|
388
|
+
_ne 70
|
389
|
+
rm 70
|
390
|
+
P 69
|
391
|
+
_este 69
|
392
|
+
nta 69
|
393
|
+
bu 69
|
394
|
+
une 69
|
395
|
+
ma_ 69
|
396
|
+
nti 69
|
397
|
+
imp 68
|
398
|
+
_-_ 68
|
399
|
+
iv 68
|
400
|
+
ind_ 68
|
@@ -0,0 +1,400 @@
|
|
1
|
+
_ 10888
|
2
|
+
a 3490
|
3
|
+
e 2268
|
4
|
+
i 2196
|
5
|
+
s 2169
|
6
|
+
n 1961
|
7
|
+
t 1555
|
8
|
+
r 1510
|
9
|
+
l 1281
|
10
|
+
u 1249
|
11
|
+
a_ 1155
|
12
|
+
c 1060
|
13
|
+
d 954
|
14
|
+
o 815
|
15
|
+
s_ 744
|
16
|
+
g 726
|
17
|
+
m 686
|
18
|
+
h 620
|
19
|
+
p 617
|
20
|
+
n_ 559
|
21
|
+
v 493
|
22
|
+
_s 480
|
23
|
+
er 476
|
24
|
+
ch 469
|
25
|
+
_d 457
|
26
|
+
in 455
|
27
|
+
da 418
|
28
|
+
_c 404
|
29
|
+
r_ 388
|
30
|
+
_e 377
|
31
|
+
. 358
|
32
|
+
_p 340
|
33
|
+
as 333
|
34
|
+
l_ 328
|
35
|
+
, 327
|
36
|
+
._ 320
|
37
|
+
_i 320
|
38
|
+
,_ 315
|
39
|
+
la 313
|
40
|
+
en 312
|
41
|
+
sc 310
|
42
|
+
an 301
|
43
|
+
ta 300
|
44
|
+
_da 285
|
45
|
+
f 282
|
46
|
+
_t 268
|
47
|
+
_a 262
|
48
|
+
nt 259
|
49
|
+
_m 252
|
50
|
+
un 248
|
51
|
+
ra 247
|
52
|
+
i_ 247
|
53
|
+
na 245
|
54
|
+
ma 245
|
55
|
+
ia 241
|
56
|
+
ar 234
|
57
|
+
sch 228
|
58
|
+
b 228
|
59
|
+
da_ 217
|
60
|
+
es 214
|
61
|
+
ai 213
|
62
|
+
st 212
|
63
|
+
' 209
|
64
|
+
e_ 208
|
65
|
+
as_ 208
|
66
|
+
er_ 200
|
67
|
+
t_ 199
|
68
|
+
re 199
|
69
|
+
_l 195
|
70
|
+
al 193
|
71
|
+
_n 193
|
72
|
+
el 192
|
73
|
+
tg 192
|
74
|
+
te 188
|
75
|
+
z 187
|
76
|
+
ha 180
|
77
|
+
_f 180
|
78
|
+
sa 174
|
79
|
+
_da_ 172
|
80
|
+
ve 169
|
81
|
+
ei 168
|
82
|
+
_v 165
|
83
|
+
at 165
|
84
|
+
ss 161
|
85
|
+
is 161
|
86
|
+
_ch 161
|
87
|
+
on 160
|
88
|
+
la_ 158
|
89
|
+
cu 158
|
90
|
+
ad 158
|
91
|
+
he 156
|
92
|
+
in_ 147
|
93
|
+
va 147
|
94
|
+
_in 146
|
95
|
+
gl 145
|
96
|
+
ns 141
|
97
|
+
ur 140
|
98
|
+
� 140
|
99
|
+
u_ 138
|
100
|
+
ts 138
|
101
|
+
pe 136
|
102
|
+
li 134
|
103
|
+
gi 133
|
104
|
+
et 133
|
105
|
+
de 132
|
106
|
+
ig 132
|
107
|
+
or 130
|
108
|
+
ti 129
|
109
|
+
il 127
|
110
|
+
d_ 126
|
111
|
+
che 123
|
112
|
+
ut 122
|
113
|
+
us 122
|
114
|
+
cha 121
|
115
|
+
di 120
|
116
|
+
ia_ 120
|
117
|
+
_b 118
|
118
|
+
_la 117
|
119
|
+
na_ 116
|
120
|
+
ain 115
|
121
|
+
per 115
|
122
|
+
to 115
|
123
|
+
_cu 113
|
124
|
+
_sc 112
|
125
|
+
se 111
|
126
|
+
ls 109
|
127
|
+
- 108
|
128
|
+
iu 108
|
129
|
+
ca 107
|
130
|
+
si 104
|
131
|
+
ir 102
|
132
|
+
rt 102
|
133
|
+
ie 102
|
134
|
+
_g 102
|
135
|
+
un_ 102
|
136
|
+
nd 101
|
137
|
+
av 101
|
138
|
+
ni 100
|
139
|
+
q 99
|
140
|
+
au 97
|
141
|
+
ls_ 97
|
142
|
+
ll 96
|
143
|
+
qu 96
|
144
|
+
_pe 96
|
145
|
+
le 96
|
146
|
+
rs 95
|
147
|
+
pa 95
|
148
|
+
ri 95
|
149
|
+
_ma 94
|
150
|
+
_per 93
|
151
|
+
am 93
|
152
|
+
eg 92
|
153
|
+
_r 92
|
154
|
+
gn 92
|
155
|
+
me 92
|
156
|
+
pi 91
|
157
|
+
an_ 91
|
158
|
+
en_ 91
|
159
|
+
ga 91
|
160
|
+
ent 90
|
161
|
+
hi 90
|
162
|
+
_e_ 90
|
163
|
+
it 89
|
164
|
+
ta_ 88
|
165
|
+
ter 87
|
166
|
+
ns_ 86
|
167
|
+
iv 86
|
168
|
+
igl 86
|
169
|
+
a. 85
|
170
|
+
em 85
|
171
|
+
I 85
|
172
|
+
chi 84
|
173
|
+
_en 84
|
174
|
+
int 84
|
175
|
+
ue 83
|
176
|
+
su 82
|
177
|
+
tt 82
|
178
|
+
a, 82
|
179
|
+
a,_ 82
|
180
|
+
o_ 82
|
181
|
+
_ve 82
|
182
|
+
a._ 82
|
183
|
+
_q 81
|
184
|
+
_qu 80
|
185
|
+
ge 80
|
186
|
+
" 80
|
187
|
+
_la_ 80
|
188
|
+
ar_ 80
|
189
|
+
vi 79
|
190
|
+
gl_ 79
|
191
|
+
tu 78
|
192
|
+
ng 78
|
193
|
+
ro 76
|
194
|
+
mi 76
|
195
|
+
sta 75
|
196
|
+
ed 75
|
197
|
+
lla 74
|
198
|
+
ei_ 73
|
199
|
+
_o 73
|
200
|
+
ic 73
|
201
|
+
el_ 73
|
202
|
+
_il 73
|
203
|
+
_in_ 72
|
204
|
+
g_ 72
|
205
|
+
pr 71
|
206
|
+
nu 70
|
207
|
+
ina 70
|
208
|
+
_h 69
|
209
|
+
scha 68
|
210
|
+
mai 68
|
211
|
+
pl 68
|
212
|
+
il_ 68
|
213
|
+
os 68
|
214
|
+
ha_ 68
|
215
|
+
be 67
|
216
|
+
uo 67
|
217
|
+
cun 67
|
218
|
+
ra_ 67
|
219
|
+
_pa 65
|
220
|
+
ts_ 64
|
221
|
+
s. 64
|
222
|
+
co 64
|
223
|
+
_u 64
|
224
|
+
fi 64
|
225
|
+
_I 63
|
226
|
+
sa_ 63
|
227
|
+
s._ 63
|
228
|
+
_re 62
|
229
|
+
�n 62
|
230
|
+
_nu 62
|
231
|
+
? 62
|
232
|
+
S 62
|
233
|
+
_se 61
|
234
|
+
no 61
|
235
|
+
nt_ 61
|
236
|
+
E 61
|
237
|
+
tr 61
|
238
|
+
mp 60
|
239
|
+
igl_ 60
|
240
|
+
_su 60
|
241
|
+
_st 60
|
242
|
+
ess 60
|
243
|
+
im 60
|
244
|
+
zi 59
|
245
|
+
?_ 59
|
246
|
+
nc 59
|
247
|
+
_E 58
|
248
|
+
_- 58
|
249
|
+
_cun 58
|
250
|
+
_te 58
|
251
|
+
_sa 58
|
252
|
+
ant 58
|
253
|
+
main 58
|
254
|
+
eu 57
|
255
|
+
ssa 57
|
256
|
+
-_ 57
|
257
|
+
iun 57
|
258
|
+
_-_ 57
|
259
|
+
aint 56
|
260
|
+
op 56
|
261
|
+
al_ 56
|
262
|
+
dal 56
|
263
|
+
j 56
|
264
|
+
ama 55
|
265
|
+
_tg 55
|
266
|
+
ua 55
|
267
|
+
M 55
|
268
|
+
per_ 55
|
269
|
+
tsc 54
|
270
|
+
nta 54
|
271
|
+
tsch 54
|
272
|
+
um 54
|
273
|
+
fa 54
|
274
|
+
za 54
|
275
|
+
_di 54
|
276
|
+
pia 54
|
277
|
+
_per_ 54
|
278
|
+
T 54
|
279
|
+
_M 53
|
280
|
+
ne 53
|
281
|
+
era 53
|
282
|
+
A 53
|
283
|
+
que 53
|
284
|
+
_dal 53
|
285
|
+
cr 53
|
286
|
+
_de 53
|
287
|
+
lla_ 53
|
288
|
+
_che 52
|
289
|
+
h' 52
|
290
|
+
_me 51
|
291
|
+
ot 51
|
292
|
+
_pr 51
|
293
|
+
_pl 51
|
294
|
+
_sch 51
|
295
|
+
ch' 50
|
296
|
+
_S 50
|
297
|
+
opi 50
|
298
|
+
ou 50
|
299
|
+
tta 50
|
300
|
+
mo 50
|
301
|
+
ada 50
|
302
|
+
ba 50
|
303
|
+
_an 49
|
304
|
+
top 49
|
305
|
+
id 49
|
306
|
+
� 49
|
307
|
+
he_ 49
|
308
|
+
C 49
|
309
|
+
va_ 49
|
310
|
+
uto 49
|
311
|
+
ins 48
|
312
|
+
topi 48
|
313
|
+
L 48
|
314
|
+
ir_ 48
|
315
|
+
ist 48
|
316
|
+
c_ 48
|
317
|
+
_il_ 48
|
318
|
+
P 48
|
319
|
+
ss_ 48
|
320
|
+
ag 47
|
321
|
+
_no 47
|
322
|
+
res 47
|
323
|
+
las 47
|
324
|
+
_vi 46
|
325
|
+
s, 46
|
326
|
+
schi 46
|
327
|
+
_en_ 46
|
328
|
+
tg_ 46
|
329
|
+
s,_ 46
|
330
|
+
_si 46
|
331
|
+
_que 45
|
332
|
+
_T 45
|
333
|
+
az 45
|
334
|
+
'i 45
|
335
|
+
cun_ 44
|
336
|
+
_fa 44
|
337
|
+
_mi 44
|
338
|
+
utop 44
|
339
|
+
utopi 44
|
340
|
+
sche 44
|
341
|
+
_C 44
|
342
|
+
ur_ 44
|
343
|
+
tge 44
|
344
|
+
po 44
|
345
|
+
es_ 44
|
346
|
+
x 44
|
347
|
+
nz 44
|
348
|
+
_L 44
|
349
|
+
_cun_ 44
|
350
|
+
man 44
|
351
|
+
_ch' 43
|
352
|
+
_fi 43
|
353
|
+
pu 43
|
354
|
+
ell 43
|
355
|
+
opia 43
|
356
|
+
_igl 43
|
357
|
+
_ig 43
|
358
|
+
sp 43
|
359
|
+
topia 43
|
360
|
+
ava 42
|
361
|
+
egn 42
|
362
|
+
che_ 42
|
363
|
+
on_ 42
|
364
|
+
ci 42
|
365
|
+
_P 42
|
366
|
+
ev 42
|
367
|
+
ond 41
|
368
|
+
_" 41
|
369
|
+
� 41
|
370
|
+
us_ 41
|
371
|
+
_ha 41
|
372
|
+
D 40
|
373
|
+
_co 40
|
374
|
+
etg 40
|
375
|
+
'e 40
|
376
|
+
las_ 40
|
377
|
+
est 40
|
378
|
+
ura 40
|
379
|
+
uel 39
|
380
|
+
ed_ 39
|
381
|
+
vo 39
|
382
|
+
gia 39
|
383
|
+
bu 39
|
384
|
+
mu 39
|
385
|
+
nn 39
|
386
|
+
gli 39
|
387
|
+
_A 39
|
388
|
+
lu 39
|
389
|
+
cha_ 39
|
390
|
+
ul 38
|
391
|
+
mal 38
|
392
|
+
_�n 38
|
393
|
+
ina_ 38
|
394
|
+
_bu 38
|
395
|
+
_ca 38
|
396
|
+
_� 38
|
397
|
+
uc 38
|
398
|
+
nts 38
|
399
|
+
tra 38
|
400
|
+
_tu 37
|