language_detector 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +24 -0
- data/Rakefile +18 -0
- data/VERSION +1 -0
- data/lib/language_detector.rb +232 -0
- data/lib/model-fm.yml +52504 -0
- data/lib/model-tc.yml +53985 -0
- data/lib/textcat_ngrams/afrikaans.lm +400 -0
- data/lib/textcat_ngrams/albanian.lm +400 -0
- data/lib/textcat_ngrams/amharic-utf.lm +400 -0
- data/lib/textcat_ngrams/arabic-iso8859_6.lm +400 -0
- data/lib/textcat_ngrams/arabic-windows1256.lm +400 -0
- data/lib/textcat_ngrams/armenian.lm +400 -0
- data/lib/textcat_ngrams/basque.lm +400 -0
- data/lib/textcat_ngrams/belarus-windows1251.lm +400 -0
- data/lib/textcat_ngrams/bosnian.lm +400 -0
- data/lib/textcat_ngrams/breton.lm +400 -0
- data/lib/textcat_ngrams/bulgarian-iso8859_5.lm +400 -0
- data/lib/textcat_ngrams/catalan.lm +400 -0
- data/lib/textcat_ngrams/chinese-big5.lm +400 -0
- data/lib/textcat_ngrams/chinese-gb2312.lm +400 -0
- data/lib/textcat_ngrams/croatian-ascii.lm +400 -0
- data/lib/textcat_ngrams/czech-iso8859_2.lm +400 -0
- data/lib/textcat_ngrams/danish.lm +400 -0
- data/lib/textcat_ngrams/dutch.lm +400 -0
- data/lib/textcat_ngrams/english.lm +400 -0
- data/lib/textcat_ngrams/esperanto.lm +400 -0
- data/lib/textcat_ngrams/estonian.lm +400 -0
- data/lib/textcat_ngrams/finnish.lm +400 -0
- data/lib/textcat_ngrams/french.lm +400 -0
- data/lib/textcat_ngrams/frisian.lm +400 -0
- data/lib/textcat_ngrams/georgian.lm +400 -0
- data/lib/textcat_ngrams/german.lm +400 -0
- data/lib/textcat_ngrams/greek-iso8859-7.lm +400 -0
- data/lib/textcat_ngrams/hebrew-iso8859_8.lm +400 -0
- data/lib/textcat_ngrams/hindi.lm +400 -0
- data/lib/textcat_ngrams/hungarian.lm +400 -0
- data/lib/textcat_ngrams/icelandic.lm +400 -0
- data/lib/textcat_ngrams/indonesian.lm +400 -0
- data/lib/textcat_ngrams/irish.lm +400 -0
- data/lib/textcat_ngrams/italian.lm +400 -0
- data/lib/textcat_ngrams/japanese-euc_jp.lm +400 -0
- data/lib/textcat_ngrams/japanese-shift_jis.lm +400 -0
- data/lib/textcat_ngrams/korean.lm +400 -0
- data/lib/textcat_ngrams/latin.lm +400 -0
- data/lib/textcat_ngrams/latvian.lm +400 -0
- data/lib/textcat_ngrams/lithuanian.lm +400 -0
- data/lib/textcat_ngrams/malay.lm +400 -0
- data/lib/textcat_ngrams/manx.lm +400 -0
- data/lib/textcat_ngrams/marathi.lm +400 -0
- data/lib/textcat_ngrams/mingo.lm +400 -0
- data/lib/textcat_ngrams/nepali.lm +400 -0
- data/lib/textcat_ngrams/norwegian.lm +400 -0
- data/lib/textcat_ngrams/persian.lm +400 -0
- data/lib/textcat_ngrams/polish.lm +400 -0
- data/lib/textcat_ngrams/portuguese.lm +400 -0
- data/lib/textcat_ngrams/quechua.lm +400 -0
- data/lib/textcat_ngrams/romanian.lm +400 -0
- data/lib/textcat_ngrams/rumantsch.lm +400 -0
- data/lib/textcat_ngrams/russian-iso8859_5.lm +400 -0
- data/lib/textcat_ngrams/russian-koi8_r.lm +400 -0
- data/lib/textcat_ngrams/russian-windows1251.lm +400 -0
- data/lib/textcat_ngrams/sanskrit.lm +400 -0
- data/lib/textcat_ngrams/scots.lm +400 -0
- data/lib/textcat_ngrams/scots_gaelic.lm +400 -0
- data/lib/textcat_ngrams/serbian-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovak-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovak-windows1250.lm +400 -0
- data/lib/textcat_ngrams/slovenian-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovenian-iso8859_2.lm +400 -0
- data/lib/textcat_ngrams/spanish.lm +400 -0
- data/lib/textcat_ngrams/swahili.lm +400 -0
- data/lib/textcat_ngrams/swedish.lm +400 -0
- data/lib/textcat_ngrams/tagalog.lm +400 -0
- data/lib/textcat_ngrams/tamil.lm +400 -0
- data/lib/textcat_ngrams/thai.lm +400 -0
- data/lib/textcat_ngrams/turkish.lm +400 -0
- data/lib/textcat_ngrams/ukrainian-koi8_u.lm +400 -0
- data/lib/textcat_ngrams/vietnamese.lm +400 -0
- data/lib/textcat_ngrams/welsh.lm +400 -0
- data/lib/textcat_ngrams/yiddish-utf.lm +400 -0
- data/lib/training_data/ar-utf8.txt +54 -0
- data/lib/training_data/bg-utf8.txt +26 -0
- data/lib/training_data/cs-utf8.txt +48 -0
- data/lib/training_data/da-utf8.txt +159 -0
- data/lib/training_data/de-utf8.txt +569 -0
- data/lib/training_data/el-utf8.txt +48 -0
- data/lib/training_data/en-utf8.txt +81 -0
- data/lib/training_data/es-utf8.txt +185 -0
- data/lib/training_data/et-utf8.txt +50 -0
- data/lib/training_data/fa-utf8.txt +42 -0
- data/lib/training_data/fi-utf8.txt +83 -0
- data/lib/training_data/fr-utf8.txt +191 -0
- data/lib/training_data/fy-utf8.txt +22 -0
- data/lib/training_data/ga-utf8.txt +109 -0
- data/lib/training_data/he-utf8.txt +116 -0
- data/lib/training_data/hi-utf8.txt +49 -0
- data/lib/training_data/hr-utf8.txt +80 -0
- data/lib/training_data/hu-utf8.txt +87 -0
- data/lib/training_data/io-utf8.txt +41 -0
- data/lib/training_data/is-utf8.txt +94 -0
- data/lib/training_data/it-utf8.txt +228 -0
- data/lib/training_data/ja-utf8.txt +200 -0
- data/lib/training_data/ko-utf8.txt +147 -0
- data/lib/training_data/nl-utf8.txt +215 -0
- data/lib/training_data/no-utf8.txt +281 -0
- data/lib/training_data/pl-utf8.txt +120 -0
- data/lib/training_data/pt-utf8.txt +214 -0
- data/lib/training_data/ro-utf8.txt +66 -0
- data/lib/training_data/ru-utf8.txt +310 -0
- data/lib/training_data/sl-utf8.txt +263 -0
- data/lib/training_data/sv-utf8.txt +174 -0
- data/lib/training_data/th-utf8.txt +49 -0
- data/lib/training_data/tk-utf8.txt +101 -0
- data/lib/training_data/todo/af.txt +114 -0
- data/lib/training_data/todo/amharic-utf.txt +95 -0
- data/lib/training_data/todo/arabic-windows1256.txt +157 -0
- data/lib/training_data/todo/armenian.txt +86 -0
- data/lib/training_data/todo/basque.txt +136 -0
- data/lib/training_data/todo/belarus-windows1251.txt +97 -0
- data/lib/training_data/todo/bosnian.txt +97 -0
- data/lib/training_data/todo/breton.txt +159 -0
- data/lib/training_data/todo/bulgarian-iso8859_5.txt +115 -0
- data/lib/training_data/todo/catalan.txt +93 -0
- data/lib/training_data/todo/croatian-ascii.txt +104 -0
- data/lib/training_data/todo/esperanto.txt +95 -0
- data/lib/training_data/todo/estonian.txt +218 -0
- data/lib/training_data/todo/frisian.txt +99 -0
- data/lib/training_data/todo/georgian.txt +86 -0
- data/lib/training_data/todo/greek-iso8859-7.txt +139 -0
- data/lib/training_data/todo/hawaian.txt +108 -0
- data/lib/training_data/todo/hebrew-iso8859_8.txt +79 -0
- data/lib/training_data/todo/hindi.txt +77 -0
- data/lib/training_data/todo/hungarian.txt +102 -0
- data/lib/training_data/todo/icelandic.txt +131 -0
- data/lib/training_data/todo/indonesian.txt +93 -0
- data/lib/training_data/todo/irish.txt +209 -0
- data/lib/training_data/todo/latin.txt +120 -0
- data/lib/training_data/todo/latvian.txt +126 -0
- data/lib/training_data/todo/lithuanian.txt +99 -0
- data/lib/training_data/todo/malay.txt +108 -0
- data/lib/training_data/todo/manx.txt +78 -0
- data/lib/training_data/todo/marathi.txt +100 -0
- data/lib/training_data/todo/mf.txt +100 -0
- data/lib/training_data/todo/middle_frisian.txt +102 -0
- data/lib/training_data/todo/mingo.txt +146 -0
- data/lib/training_data/todo/nepali.txt +131 -0
- data/lib/training_data/todo/persian.txt +73 -0
- data/lib/training_data/todo/quechua.txt +108 -0
- data/lib/training_data/todo/romanian.txt +103 -0
- data/lib/training_data/todo/rumantsch.txt +110 -0
- data/lib/training_data/todo/sanskrit.txt +135 -0
- data/lib/training_data/todo/scots.txt +490 -0
- data/lib/training_data/todo/scots_gaelic.txt +93 -0
- data/lib/training_data/todo/serbian-ascii.txt +121 -0
- data/lib/training_data/todo/slovak-ascii.txt +102 -0
- data/lib/training_data/todo/slovak-windows1250.txt +115 -0
- data/lib/training_data/todo/slovenian-ascii.txt +100 -0
- data/lib/training_data/todo/slovenian-iso8859_2.txt +96 -0
- data/lib/training_data/todo/sq.txt +110 -0
- data/lib/training_data/todo/swahili.txt +120 -0
- data/lib/training_data/todo/tagalog.txt +135 -0
- data/lib/training_data/todo/tamil.txt +123 -0
- data/lib/training_data/todo/turkish.txt +117 -0
- data/lib/training_data/todo/ukrainian-koi8_r.txt +214 -0
- data/lib/training_data/todo/vietnamese.txt +92 -0
- data/lib/training_data/todo/welsh.txt +148 -0
- data/lib/training_data/todo/yiddish-utf.txt +83 -0
- data/lib/training_data/uk-utf8.txt +75 -0
- data/lib/training_data/vi-utf8.txt +47 -0
- data/lib/training_data/zh-utf8.txt +228 -0
- data/test/language_detector_test.rb +78 -0
- metadata +232 -0
@@ -0,0 +1,400 @@
|
|
1
|
+
_ 8010
|
2
|
+
a 2622
|
3
|
+
i 1573
|
4
|
+
h 1334
|
5
|
+
n 1247
|
6
|
+
r 968
|
7
|
+
_a 847
|
8
|
+
e 830
|
9
|
+
s 817
|
10
|
+
t 747
|
11
|
+
l 639
|
12
|
+
c 636
|
13
|
+
g 598
|
14
|
+
o 590
|
15
|
+
d 554
|
16
|
+
n_ 501
|
17
|
+
a_ 487
|
18
|
+
m 432
|
19
|
+
an 415
|
20
|
+
u 413
|
21
|
+
b 379
|
22
|
+
h_ 352
|
23
|
+
ai 350
|
24
|
+
ch 350
|
25
|
+
ea 346
|
26
|
+
r_ 346
|
27
|
+
� 334
|
28
|
+
� 321
|
29
|
+
_s 309
|
30
|
+
� 306
|
31
|
+
in 281
|
32
|
+
ar 277
|
33
|
+
. 269
|
34
|
+
_d 266
|
35
|
+
s_ 254
|
36
|
+
ir 253
|
37
|
+
_b 250
|
38
|
+
f 250
|
39
|
+
an_ 246
|
40
|
+
, 241
|
41
|
+
,_ 226
|
42
|
+
ag 225
|
43
|
+
_an 221
|
44
|
+
bh 218
|
45
|
+
_c 216
|
46
|
+
._ 212
|
47
|
+
ac 210
|
48
|
+
ha 208
|
49
|
+
_a_ 201
|
50
|
+
" 199
|
51
|
+
_m 199
|
52
|
+
th 198
|
53
|
+
_t 190
|
54
|
+
ach 182
|
55
|
+
_ag 180
|
56
|
+
_an_ 179
|
57
|
+
�_ 176
|
58
|
+
_l 168
|
59
|
+
na 168
|
60
|
+
nn 160
|
61
|
+
e_ 159
|
62
|
+
ar_ 158
|
63
|
+
_g 157
|
64
|
+
� 156
|
65
|
+
_i 152
|
66
|
+
il 150
|
67
|
+
le 150
|
68
|
+
is 143
|
69
|
+
� 142
|
70
|
+
_bh 138
|
71
|
+
ei 138
|
72
|
+
g_ 135
|
73
|
+
_f 135
|
74
|
+
dh 135
|
75
|
+
l_ 126
|
76
|
+
t_ 125
|
77
|
+
ig 123
|
78
|
+
�_ 122
|
79
|
+
_n 120
|
80
|
+
gu 120
|
81
|
+
�_ 120
|
82
|
+
mh 118
|
83
|
+
id 117
|
84
|
+
ch_ 117
|
85
|
+
ad 116
|
86
|
+
he 114
|
87
|
+
ir_ 114
|
88
|
+
ra 109
|
89
|
+
o_ 109
|
90
|
+
ach_ 107
|
91
|
+
ia 105
|
92
|
+
_ar 105
|
93
|
+
us 104
|
94
|
+
ui 104
|
95
|
+
_" 101
|
96
|
+
us_ 100
|
97
|
+
T 99
|
98
|
+
am 99
|
99
|
+
ta 98
|
100
|
+
gus 98
|
101
|
+
gus_ 98
|
102
|
+
_le 97
|
103
|
+
gh 97
|
104
|
+
_ch 97
|
105
|
+
agus 94
|
106
|
+
agus_ 94
|
107
|
+
agu 94
|
108
|
+
�a 93
|
109
|
+
_agus 93
|
110
|
+
_agu 93
|
111
|
+
ean 93
|
112
|
+
na_ 92
|
113
|
+
d_ 92
|
114
|
+
�i 91
|
115
|
+
p 89
|
116
|
+
it 89
|
117
|
+
A 89
|
118
|
+
_ar_ 88
|
119
|
+
rt 86
|
120
|
+
al 85
|
121
|
+
oi 84
|
122
|
+
sa 84
|
123
|
+
"_ 82
|
124
|
+
hai 81
|
125
|
+
_r 79
|
126
|
+
nn_ 79
|
127
|
+
hu 79
|
128
|
+
as 79
|
129
|
+
�i 78
|
130
|
+
_T 78
|
131
|
+
ma 77
|
132
|
+
air 77
|
133
|
+
at 77
|
134
|
+
ann 76
|
135
|
+
B 76
|
136
|
+
s� 76
|
137
|
+
h� 75
|
138
|
+
igh 74
|
139
|
+
st 74
|
140
|
+
ga 73
|
141
|
+
go 71
|
142
|
+
ua 71
|
143
|
+
ne 71
|
144
|
+
la 71
|
145
|
+
- 71
|
146
|
+
de 71
|
147
|
+
te 71
|
148
|
+
re 70
|
149
|
+
inn 70
|
150
|
+
ith 69
|
151
|
+
eac 69
|
152
|
+
_s� 69
|
153
|
+
in_ 68
|
154
|
+
_go 68
|
155
|
+
hi 68
|
156
|
+
each 68
|
157
|
+
dh_ 68
|
158
|
+
si 67
|
159
|
+
ag_ 67
|
160
|
+
_go_ 66
|
161
|
+
hea 66
|
162
|
+
go_ 66
|
163
|
+
tha 64
|
164
|
+
om 64
|
165
|
+
_s�_ 63
|
166
|
+
s�_ 63
|
167
|
+
h�_ 63
|
168
|
+
on 62
|
169
|
+
se 61
|
170
|
+
�i 60
|
171
|
+
nt 60
|
172
|
+
C 60
|
173
|
+
D 59
|
174
|
+
i_ 58
|
175
|
+
_ag_ 58
|
176
|
+
is_ 58
|
177
|
+
�o 58
|
178
|
+
_de 57
|
179
|
+
_B 56
|
180
|
+
il_ 56
|
181
|
+
or 56
|
182
|
+
_th 54
|
183
|
+
ca 53
|
184
|
+
fa 53
|
185
|
+
amh 53
|
186
|
+
_A 53
|
187
|
+
le_ 52
|
188
|
+
? 52
|
189
|
+
S 51
|
190
|
+
io 51
|
191
|
+
_in 51
|
192
|
+
s� 51
|
193
|
+
li 51
|
194
|
+
rai 50
|
195
|
+
hf 50
|
196
|
+
ht 50
|
197
|
+
eo 50
|
198
|
+
sc 50
|
199
|
+
ri 49
|
200
|
+
: 49
|
201
|
+
igh_ 49
|
202
|
+
gh_ 49
|
203
|
+
_s� 49
|
204
|
+
:_ 49
|
205
|
+
h� 49
|
206
|
+
_D 49
|
207
|
+
be 49
|
208
|
+
aig 49
|
209
|
+
h� 48
|
210
|
+
oc 48
|
211
|
+
idh 48
|
212
|
+
rt_ 48
|
213
|
+
ho 47
|
214
|
+
os 47
|
215
|
+
ann_ 47
|
216
|
+
_C 46
|
217
|
+
! 46
|
218
|
+
Bh 46
|
219
|
+
bhf 45
|
220
|
+
_si 45
|
221
|
+
lt 45
|
222
|
+
_bhf 45
|
223
|
+
irt 45
|
224
|
+
ear 44
|
225
|
+
_na 44
|
226
|
+
ta_ 44
|
227
|
+
air_ 44
|
228
|
+
_p 44
|
229
|
+
im 44
|
230
|
+
aga 44
|
231
|
+
_ma 44
|
232
|
+
_S 44
|
233
|
+
aigh 43
|
234
|
+
�n 43
|
235
|
+
_dh 43
|
236
|
+
uai 43
|
237
|
+
ao 43
|
238
|
+
cht 43
|
239
|
+
ain 42
|
240
|
+
bhe 42
|
241
|
+
ait 42
|
242
|
+
fh 42
|
243
|
+
sa_ 41
|
244
|
+
m_ 41
|
245
|
+
adh 41
|
246
|
+
ile 41
|
247
|
+
_� 41
|
248
|
+
ail 41
|
249
|
+
eir 41
|
250
|
+
�i 41
|
251
|
+
_Bh 40
|
252
|
+
as_ 40
|
253
|
+
cha 40
|
254
|
+
idh_ 40
|
255
|
+
h�i 39
|
256
|
+
_i_ 39
|
257
|
+
bh_ 39
|
258
|
+
th_ 39
|
259
|
+
ad_ 39
|
260
|
+
och 39
|
261
|
+
mh_ 39
|
262
|
+
tr 39
|
263
|
+
rea 38
|
264
|
+
_se 38
|
265
|
+
ro 38
|
266
|
+
r� 38
|
267
|
+
hair 38
|
268
|
+
_is 38
|
269
|
+
uil 37
|
270
|
+
i� 37
|
271
|
+
�in 37
|
272
|
+
I 37
|
273
|
+
ll 37
|
274
|
+
m� 37
|
275
|
+
_be 36
|
276
|
+
ba 36
|
277
|
+
eann 36
|
278
|
+
t� 36
|
279
|
+
_o 36
|
280
|
+
M 36
|
281
|
+
aid 36
|
282
|
+
aith 36
|
283
|
+
ib 36
|
284
|
+
' 36
|
285
|
+
tea 36
|
286
|
+
_m� 35
|
287
|
+
chu 35
|
288
|
+
ibh 35
|
289
|
+
each_ 35
|
290
|
+
ean_ 34
|
291
|
+
irt_ 34
|
292
|
+
_na_ 34
|
293
|
+
N 34
|
294
|
+
ist 34
|
295
|
+
fu 34
|
296
|
+
mha 34
|
297
|
+
bea 34
|
298
|
+
h. 34
|
299
|
+
_bhe 34
|
300
|
+
l� 34
|
301
|
+
ic 34
|
302
|
+
_s�_ 33
|
303
|
+
eis 33
|
304
|
+
bh� 33
|
305
|
+
ni 33
|
306
|
+
h�a 33
|
307
|
+
_sa 33
|
308
|
+
ith_ 33
|
309
|
+
s�_ 33
|
310
|
+
har 33
|
311
|
+
_bh� 33
|
312
|
+
ig_ 32
|
313
|
+
ur 32
|
314
|
+
a� 32
|
315
|
+
hr 32
|
316
|
+
_am 32
|
317
|
+
_bh�_ 31
|
318
|
+
da 31
|
319
|
+
�ir 31
|
320
|
+
hfu 31
|
321
|
+
_chu 31
|
322
|
+
ol 31
|
323
|
+
ne_ 31
|
324
|
+
_fa 31
|
325
|
+
An 31
|
326
|
+
Bh�_ 31
|
327
|
+
Bh� 31
|
328
|
+
n, 31
|
329
|
+
_ac 31
|
330
|
+
bh�_ 31
|
331
|
+
_bhfu 30
|
332
|
+
_� 30
|
333
|
+
ana 30
|
334
|
+
_M 30
|
335
|
+
m�_ 30
|
336
|
+
_fh 30
|
337
|
+
aigh_ 30
|
338
|
+
bhfu 30
|
339
|
+
_m�_ 30
|
340
|
+
t� 29
|
341
|
+
_le_ 29
|
342
|
+
t�_ 29
|
343
|
+
hean 29
|
344
|
+
h�in 29
|
345
|
+
sin 29
|
346
|
+
eir_ 29
|
347
|
+
nne 29
|
348
|
+
c� 29
|
349
|
+
_aga 29
|
350
|
+
h._ 29
|
351
|
+
T� 29
|
352
|
+
ibh_ 29
|
353
|
+
iste 28
|
354
|
+
An_ 28
|
355
|
+
do 28
|
356
|
+
hui 28
|
357
|
+
fui 28
|
358
|
+
n� 28
|
359
|
+
ste 28
|
360
|
+
acht 28
|
361
|
+
n,_ 28
|
362
|
+
co 28
|
363
|
+
dea 28
|
364
|
+
ng 28
|
365
|
+
nach 28
|
366
|
+
id_ 28
|
367
|
+
hfui 28
|
368
|
+
.. 28
|
369
|
+
lei 28
|
370
|
+
nac 28
|
371
|
+
ce 27
|
372
|
+
a. 27
|
373
|
+
c_ 27
|
374
|
+
lea 27
|
375
|
+
hfuil 27
|
376
|
+
_Bh�_ 27
|
377
|
+
_bea 27
|
378
|
+
adh_ 27
|
379
|
+
di 27
|
380
|
+
fuil 27
|
381
|
+
." 27
|
382
|
+
T�_ 27
|
383
|
+
ha_ 27
|
384
|
+
�_ 27
|
385
|
+
uil_ 27
|
386
|
+
."_ 27
|
387
|
+
bhfui 27
|
388
|
+
_Bh� 27
|
389
|
+
�an 27
|
390
|
+
_do 27
|
391
|
+
lta 27
|
392
|
+
aoi 27
|
393
|
+
_lei 27
|
394
|
+
_mh 26
|
395
|
+
d� 26
|
396
|
+
fuil_ 26
|
397
|
+
eat 26
|
398
|
+
-_ 26
|
399
|
+
teac 26
|
400
|
+
ath 26
|
@@ -0,0 +1,400 @@
|
|
1
|
+
_ 25028
|
2
|
+
a 7570
|
3
|
+
e 6477
|
4
|
+
i 5481
|
5
|
+
o 5104
|
6
|
+
l 3905
|
7
|
+
n 3866
|
8
|
+
r 3502
|
9
|
+
t 2934
|
10
|
+
c 2862
|
11
|
+
s 2862
|
12
|
+
a_ 2504
|
13
|
+
e_ 2404
|
14
|
+
d 2004
|
15
|
+
i_ 1749
|
16
|
+
o_ 1679
|
17
|
+
u 1650
|
18
|
+
v 1611
|
19
|
+
p 1561
|
20
|
+
m 1414
|
21
|
+
_c 1325
|
22
|
+
, 1192
|
23
|
+
,_ 1192
|
24
|
+
_s 1190
|
25
|
+
_d 1094
|
26
|
+
g 1067
|
27
|
+
an 925
|
28
|
+
er 915
|
29
|
+
_a 914
|
30
|
+
_p 895
|
31
|
+
la 858
|
32
|
+
_l 830
|
33
|
+
re 799
|
34
|
+
ar 769
|
35
|
+
h 762
|
36
|
+
no 753
|
37
|
+
co 726
|
38
|
+
va 698
|
39
|
+
_e 657
|
40
|
+
n_ 656
|
41
|
+
on 656
|
42
|
+
ra 653
|
43
|
+
to 651
|
44
|
+
f 638
|
45
|
+
di 638
|
46
|
+
_i 634
|
47
|
+
ch 634
|
48
|
+
ll 633
|
49
|
+
l_ 624
|
50
|
+
la_ 598
|
51
|
+
ta 593
|
52
|
+
el 576
|
53
|
+
in 567
|
54
|
+
_m 558
|
55
|
+
en 529
|
56
|
+
b 528
|
57
|
+
ri 525
|
58
|
+
_co 523
|
59
|
+
_n 523
|
60
|
+
_di 522
|
61
|
+
li 513
|
62
|
+
av 507
|
63
|
+
al 501
|
64
|
+
le 494
|
65
|
+
ia 492
|
66
|
+
se 484
|
67
|
+
ol 479
|
68
|
+
_f 477
|
69
|
+
or 477
|
70
|
+
te 469
|
71
|
+
_e_ 467
|
72
|
+
ve 454
|
73
|
+
at 449
|
74
|
+
de 447
|
75
|
+
. 443
|
76
|
+
ne 429
|
77
|
+
va_ 428
|
78
|
+
ca 426
|
79
|
+
._ 422
|
80
|
+
tt 422
|
81
|
+
re_ 415
|
82
|
+
nt 415
|
83
|
+
io 411
|
84
|
+
_v 407
|
85
|
+
pe 405
|
86
|
+
z 392
|
87
|
+
to_ 391
|
88
|
+
_ch 389
|
89
|
+
na 384
|
90
|
+
si 384
|
91
|
+
' 383
|
92
|
+
he 382
|
93
|
+
no_ 379
|
94
|
+
ci 374
|
95
|
+
_la 373
|
96
|
+
ro 371
|
97
|
+
_g 370
|
98
|
+
st 368
|
99
|
+
cc 366
|
100
|
+
he_ 362
|
101
|
+
di_ 362
|
102
|
+
ma 358
|
103
|
+
ev 354
|
104
|
+
che 354
|
105
|
+
es 352
|
106
|
+
me 352
|
107
|
+
pa 351
|
108
|
+
_t 349
|
109
|
+
ti 348
|
110
|
+
_di_ 347
|
111
|
+
ss 345
|
112
|
+
che_ 344
|
113
|
+
a,_ 337
|
114
|
+
a, 337
|
115
|
+
nd 335
|
116
|
+
o, 333
|
117
|
+
o,_ 333
|
118
|
+
ell 330
|
119
|
+
gl 323
|
120
|
+
sa 322
|
121
|
+
il 322
|
122
|
+
gli 321
|
123
|
+
da 318
|
124
|
+
as 318
|
125
|
+
do 314
|
126
|
+
_che 308
|
127
|
+
_che_ 306
|
128
|
+
eva 306
|
129
|
+
_la_ 300
|
130
|
+
lla 298
|
131
|
+
le_ 293
|
132
|
+
un 291
|
133
|
+
_pe 290
|
134
|
+
_de 288
|
135
|
+
q 283
|
136
|
+
qu 283
|
137
|
+
ava 280
|
138
|
+
po 277
|
139
|
+
on_ 275
|
140
|
+
r_ 273
|
141
|
+
li_ 273
|
142
|
+
_b 269
|
143
|
+
_il 268
|
144
|
+
_il_ 268
|
145
|
+
il_ 268
|
146
|
+
lo 267
|
147
|
+
om 263
|
148
|
+
e, 263
|
149
|
+
e,_ 263
|
150
|
+
ni 258
|
151
|
+
tr 258
|
152
|
+
so 255
|
153
|
+
ra_ 253
|
154
|
+
os 251
|
155
|
+
_in 249
|
156
|
+
_u 248
|
157
|
+
per 244
|
158
|
+
are 243
|
159
|
+
et 243
|
160
|
+
_se 240
|
161
|
+
ano 239
|
162
|
+
si_ 238
|
163
|
+
_ca 238
|
164
|
+
_qu 238
|
165
|
+
lla_ 238
|
166
|
+
_q 238
|
167
|
+
_a_ 236
|
168
|
+
ac 236
|
169
|
+
_r 234
|
170
|
+
ic 233
|
171
|
+
_no 232
|
172
|
+
ie 227
|
173
|
+
fa 227
|
174
|
+
hi 226
|
175
|
+
del 225
|
176
|
+
ua 222
|
177
|
+
_per 218
|
178
|
+
ce 218
|
179
|
+
_ma 216
|
180
|
+
sc 216
|
181
|
+
_del 215
|
182
|
+
mi 212
|
183
|
+
_un 208
|
184
|
+
chi 206
|
185
|
+
era 205
|
186
|
+
i, 205
|
187
|
+
i,_ 205
|
188
|
+
su 203
|
189
|
+
and 202
|
190
|
+
vo 202
|
191
|
+
_fa 201
|
192
|
+
eva_ 200
|
193
|
+
ano_ 199
|
194
|
+
gli_ 197
|
195
|
+
non 196
|
196
|
+
pi 196
|
197
|
+
vi 195
|
198
|
+
er_ 195
|
199
|
+
_al 194
|
200
|
+
se_ 193
|
201
|
+
_ne 192
|
202
|
+
_non 191
|
203
|
+
am 190
|
204
|
+
is 187
|
205
|
+
ava_ 187
|
206
|
+
_non_ 186
|
207
|
+
non_ 186
|
208
|
+
in_ 185
|
209
|
+
ent 185
|
210
|
+
_si 184
|
211
|
+
_pa 184
|
212
|
+
com 183
|
213
|
+
! 182
|
214
|
+
_le 182
|
215
|
+
_su 181
|
216
|
+
uo 181
|
217
|
+
el_ 180
|
218
|
+
!_ 180
|
219
|
+
l' 178
|
220
|
+
ue 177
|
221
|
+
te_ 177
|
222
|
+
_com 177
|
223
|
+
are_ 176
|
224
|
+
pr 176
|
225
|
+
_in_ 176
|
226
|
+
van 172
|
227
|
+
mo 172
|
228
|
+
ta_ 171
|
229
|
+
gn 167
|
230
|
+
ere 166
|
231
|
+
na_ 166
|
232
|
+
tto 163
|
233
|
+
it 161
|
234
|
+
_per_ 161
|
235
|
+
per_ 161
|
236
|
+
� 161
|
237
|
+
all 160
|
238
|
+
ess 159
|
239
|
+
ut 159
|
240
|
+
col 158
|
241
|
+
acc 157
|
242
|
+
gi 155
|
243
|
+
lo_ 154
|
244
|
+
oc 154
|
245
|
+
vano 153
|
246
|
+
io_ 153
|
247
|
+
_av 151
|
248
|
+
ndo 151
|
249
|
+
�_ 151
|
250
|
+
ato 149
|
251
|
+
ave 148
|
252
|
+
_st 147
|
253
|
+
me_ 147
|
254
|
+
'a 146
|
255
|
+
ia_ 144
|
256
|
+
con 143
|
257
|
+
mp 143
|
258
|
+
fi 142
|
259
|
+
ett 142
|
260
|
+
_si_ 141
|
261
|
+
_pi 140
|
262
|
+
era_ 140
|
263
|
+
ti_ 140
|
264
|
+
� 140
|
265
|
+
vano_ 140
|
266
|
+
_gl 139
|
267
|
+
qua 139
|
268
|
+
ella 139
|
269
|
+
sta 138
|
270
|
+
ome 137
|
271
|
+
S 137
|
272
|
+
_gli 137
|
273
|
+
_S 137
|
274
|
+
ad 136
|
275
|
+
_ve 134
|
276
|
+
ant 134
|
277
|
+
ne_ 134
|
278
|
+
�_ 133
|
279
|
+
sp 133
|
280
|
+
do_ 133
|
281
|
+
_po 132
|
282
|
+
ro_ 132
|
283
|
+
ov 132
|
284
|
+
_le_ 131
|
285
|
+
ella_ 130
|
286
|
+
sse 129
|
287
|
+
_con 128
|
288
|
+
ir 128
|
289
|
+
_vi 128
|
290
|
+
ig 127
|
291
|
+
_gli_ 127
|
292
|
+
_ave 127
|
293
|
+
vev 127
|
294
|
+
un_ 126
|
295
|
+
ot 126
|
296
|
+
veva 125
|
297
|
+
dell 125
|
298
|
+
que 125
|
299
|
+
a. 125
|
300
|
+
_o 125
|
301
|
+
a._ 124
|
302
|
+
tu 124
|
303
|
+
cia 123
|
304
|
+
za 123
|
305
|
+
_que 123
|
306
|
+
_da 121
|
307
|
+
par 121
|
308
|
+
_pr 120
|
309
|
+
cch 120
|
310
|
+
_dell 120
|
311
|
+
eg 119
|
312
|
+
_sa 119
|
313
|
+
o._ 119
|
314
|
+
o. 119
|
315
|
+
_col 118
|
316
|
+
lt 118
|
317
|
+
_un_ 118
|
318
|
+
rt 118
|
319
|
+
ur 117
|
320
|
+
_vo 117
|
321
|
+
_me 117
|
322
|
+
ome_ 117
|
323
|
+
L 116
|
324
|
+
ap 116
|
325
|
+
_L 116
|
326
|
+
zi 116
|
327
|
+
nto 116
|
328
|
+
og 115
|
329
|
+
_an 115
|
330
|
+
_so 115
|
331
|
+
em 114
|
332
|
+
ag 114
|
333
|
+
be 111
|
334
|
+
ni_ 111
|
335
|
+
im 110
|
336
|
+
cchi 110
|
337
|
+
ver 110
|
338
|
+
lle 109
|
339
|
+
nz 109
|
340
|
+
cci 109
|
341
|
+
_ri 109
|
342
|
+
nc 108
|
343
|
+
_er 108
|
344
|
+
come_ 107
|
345
|
+
come 107
|
346
|
+
aveva 107
|
347
|
+
ui 107
|
348
|
+
avev 107
|
349
|
+
tto_ 107
|
350
|
+
_come 106
|
351
|
+
ed 106
|
352
|
+
P 105
|
353
|
+
man 105
|
354
|
+
_P 105
|
355
|
+
rs 105
|
356
|
+
occ 104
|
357
|
+
ndo_ 103
|
358
|
+
ato_ 103
|
359
|
+
_qua 103
|
360
|
+
_era 103
|
361
|
+
ari 102
|
362
|
+
ba 100
|
363
|
+
_mo 100
|
364
|
+
nel 100
|
365
|
+
id 99
|
366
|
+
men 98
|
367
|
+
_fi 98
|
368
|
+
_all 98
|
369
|
+
rr 97
|
370
|
+
_do 97
|
371
|
+
_avev 97
|
372
|
+
att 97
|
373
|
+
l'a 96
|
374
|
+
ei 96
|
375
|
+
zz 96
|
376
|
+
; 96
|
377
|
+
vol 95
|
378
|
+
pp 95
|
379
|
+
tra 95
|
380
|
+
;_ 95
|
381
|
+
ere_ 94
|
382
|
+
lle_ 94
|
383
|
+
nda 94
|
384
|
+
utt 94
|
385
|
+
est 93
|
386
|
+
_nel 93
|
387
|
+
ul 92
|
388
|
+
ola 92
|
389
|
+
iv 92
|
390
|
+
ando 90
|
391
|
+
ale 90
|
392
|
+
lu 90
|
393
|
+
rn 90
|
394
|
+
e. 89
|
395
|
+
e._ 89
|
396
|
+
ll' 89
|
397
|
+
tta 88
|
398
|
+
nte 87
|
399
|
+
_l' 87
|
400
|
+
uel 87
|