language_detector 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +24 -0
- data/Rakefile +18 -0
- data/VERSION +1 -0
- data/lib/language_detector.rb +232 -0
- data/lib/model-fm.yml +52504 -0
- data/lib/model-tc.yml +53985 -0
- data/lib/textcat_ngrams/afrikaans.lm +400 -0
- data/lib/textcat_ngrams/albanian.lm +400 -0
- data/lib/textcat_ngrams/amharic-utf.lm +400 -0
- data/lib/textcat_ngrams/arabic-iso8859_6.lm +400 -0
- data/lib/textcat_ngrams/arabic-windows1256.lm +400 -0
- data/lib/textcat_ngrams/armenian.lm +400 -0
- data/lib/textcat_ngrams/basque.lm +400 -0
- data/lib/textcat_ngrams/belarus-windows1251.lm +400 -0
- data/lib/textcat_ngrams/bosnian.lm +400 -0
- data/lib/textcat_ngrams/breton.lm +400 -0
- data/lib/textcat_ngrams/bulgarian-iso8859_5.lm +400 -0
- data/lib/textcat_ngrams/catalan.lm +400 -0
- data/lib/textcat_ngrams/chinese-big5.lm +400 -0
- data/lib/textcat_ngrams/chinese-gb2312.lm +400 -0
- data/lib/textcat_ngrams/croatian-ascii.lm +400 -0
- data/lib/textcat_ngrams/czech-iso8859_2.lm +400 -0
- data/lib/textcat_ngrams/danish.lm +400 -0
- data/lib/textcat_ngrams/dutch.lm +400 -0
- data/lib/textcat_ngrams/english.lm +400 -0
- data/lib/textcat_ngrams/esperanto.lm +400 -0
- data/lib/textcat_ngrams/estonian.lm +400 -0
- data/lib/textcat_ngrams/finnish.lm +400 -0
- data/lib/textcat_ngrams/french.lm +400 -0
- data/lib/textcat_ngrams/frisian.lm +400 -0
- data/lib/textcat_ngrams/georgian.lm +400 -0
- data/lib/textcat_ngrams/german.lm +400 -0
- data/lib/textcat_ngrams/greek-iso8859-7.lm +400 -0
- data/lib/textcat_ngrams/hebrew-iso8859_8.lm +400 -0
- data/lib/textcat_ngrams/hindi.lm +400 -0
- data/lib/textcat_ngrams/hungarian.lm +400 -0
- data/lib/textcat_ngrams/icelandic.lm +400 -0
- data/lib/textcat_ngrams/indonesian.lm +400 -0
- data/lib/textcat_ngrams/irish.lm +400 -0
- data/lib/textcat_ngrams/italian.lm +400 -0
- data/lib/textcat_ngrams/japanese-euc_jp.lm +400 -0
- data/lib/textcat_ngrams/japanese-shift_jis.lm +400 -0
- data/lib/textcat_ngrams/korean.lm +400 -0
- data/lib/textcat_ngrams/latin.lm +400 -0
- data/lib/textcat_ngrams/latvian.lm +400 -0
- data/lib/textcat_ngrams/lithuanian.lm +400 -0
- data/lib/textcat_ngrams/malay.lm +400 -0
- data/lib/textcat_ngrams/manx.lm +400 -0
- data/lib/textcat_ngrams/marathi.lm +400 -0
- data/lib/textcat_ngrams/mingo.lm +400 -0
- data/lib/textcat_ngrams/nepali.lm +400 -0
- data/lib/textcat_ngrams/norwegian.lm +400 -0
- data/lib/textcat_ngrams/persian.lm +400 -0
- data/lib/textcat_ngrams/polish.lm +400 -0
- data/lib/textcat_ngrams/portuguese.lm +400 -0
- data/lib/textcat_ngrams/quechua.lm +400 -0
- data/lib/textcat_ngrams/romanian.lm +400 -0
- data/lib/textcat_ngrams/rumantsch.lm +400 -0
- data/lib/textcat_ngrams/russian-iso8859_5.lm +400 -0
- data/lib/textcat_ngrams/russian-koi8_r.lm +400 -0
- data/lib/textcat_ngrams/russian-windows1251.lm +400 -0
- data/lib/textcat_ngrams/sanskrit.lm +400 -0
- data/lib/textcat_ngrams/scots.lm +400 -0
- data/lib/textcat_ngrams/scots_gaelic.lm +400 -0
- data/lib/textcat_ngrams/serbian-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovak-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovak-windows1250.lm +400 -0
- data/lib/textcat_ngrams/slovenian-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovenian-iso8859_2.lm +400 -0
- data/lib/textcat_ngrams/spanish.lm +400 -0
- data/lib/textcat_ngrams/swahili.lm +400 -0
- data/lib/textcat_ngrams/swedish.lm +400 -0
- data/lib/textcat_ngrams/tagalog.lm +400 -0
- data/lib/textcat_ngrams/tamil.lm +400 -0
- data/lib/textcat_ngrams/thai.lm +400 -0
- data/lib/textcat_ngrams/turkish.lm +400 -0
- data/lib/textcat_ngrams/ukrainian-koi8_u.lm +400 -0
- data/lib/textcat_ngrams/vietnamese.lm +400 -0
- data/lib/textcat_ngrams/welsh.lm +400 -0
- data/lib/textcat_ngrams/yiddish-utf.lm +400 -0
- data/lib/training_data/ar-utf8.txt +54 -0
- data/lib/training_data/bg-utf8.txt +26 -0
- data/lib/training_data/cs-utf8.txt +48 -0
- data/lib/training_data/da-utf8.txt +159 -0
- data/lib/training_data/de-utf8.txt +569 -0
- data/lib/training_data/el-utf8.txt +48 -0
- data/lib/training_data/en-utf8.txt +81 -0
- data/lib/training_data/es-utf8.txt +185 -0
- data/lib/training_data/et-utf8.txt +50 -0
- data/lib/training_data/fa-utf8.txt +42 -0
- data/lib/training_data/fi-utf8.txt +83 -0
- data/lib/training_data/fr-utf8.txt +191 -0
- data/lib/training_data/fy-utf8.txt +22 -0
- data/lib/training_data/ga-utf8.txt +109 -0
- data/lib/training_data/he-utf8.txt +116 -0
- data/lib/training_data/hi-utf8.txt +49 -0
- data/lib/training_data/hr-utf8.txt +80 -0
- data/lib/training_data/hu-utf8.txt +87 -0
- data/lib/training_data/io-utf8.txt +41 -0
- data/lib/training_data/is-utf8.txt +94 -0
- data/lib/training_data/it-utf8.txt +228 -0
- data/lib/training_data/ja-utf8.txt +200 -0
- data/lib/training_data/ko-utf8.txt +147 -0
- data/lib/training_data/nl-utf8.txt +215 -0
- data/lib/training_data/no-utf8.txt +281 -0
- data/lib/training_data/pl-utf8.txt +120 -0
- data/lib/training_data/pt-utf8.txt +214 -0
- data/lib/training_data/ro-utf8.txt +66 -0
- data/lib/training_data/ru-utf8.txt +310 -0
- data/lib/training_data/sl-utf8.txt +263 -0
- data/lib/training_data/sv-utf8.txt +174 -0
- data/lib/training_data/th-utf8.txt +49 -0
- data/lib/training_data/tk-utf8.txt +101 -0
- data/lib/training_data/todo/af.txt +114 -0
- data/lib/training_data/todo/amharic-utf.txt +95 -0
- data/lib/training_data/todo/arabic-windows1256.txt +157 -0
- data/lib/training_data/todo/armenian.txt +86 -0
- data/lib/training_data/todo/basque.txt +136 -0
- data/lib/training_data/todo/belarus-windows1251.txt +97 -0
- data/lib/training_data/todo/bosnian.txt +97 -0
- data/lib/training_data/todo/breton.txt +159 -0
- data/lib/training_data/todo/bulgarian-iso8859_5.txt +115 -0
- data/lib/training_data/todo/catalan.txt +93 -0
- data/lib/training_data/todo/croatian-ascii.txt +104 -0
- data/lib/training_data/todo/esperanto.txt +95 -0
- data/lib/training_data/todo/estonian.txt +218 -0
- data/lib/training_data/todo/frisian.txt +99 -0
- data/lib/training_data/todo/georgian.txt +86 -0
- data/lib/training_data/todo/greek-iso8859-7.txt +139 -0
- data/lib/training_data/todo/hawaian.txt +108 -0
- data/lib/training_data/todo/hebrew-iso8859_8.txt +79 -0
- data/lib/training_data/todo/hindi.txt +77 -0
- data/lib/training_data/todo/hungarian.txt +102 -0
- data/lib/training_data/todo/icelandic.txt +131 -0
- data/lib/training_data/todo/indonesian.txt +93 -0
- data/lib/training_data/todo/irish.txt +209 -0
- data/lib/training_data/todo/latin.txt +120 -0
- data/lib/training_data/todo/latvian.txt +126 -0
- data/lib/training_data/todo/lithuanian.txt +99 -0
- data/lib/training_data/todo/malay.txt +108 -0
- data/lib/training_data/todo/manx.txt +78 -0
- data/lib/training_data/todo/marathi.txt +100 -0
- data/lib/training_data/todo/mf.txt +100 -0
- data/lib/training_data/todo/middle_frisian.txt +102 -0
- data/lib/training_data/todo/mingo.txt +146 -0
- data/lib/training_data/todo/nepali.txt +131 -0
- data/lib/training_data/todo/persian.txt +73 -0
- data/lib/training_data/todo/quechua.txt +108 -0
- data/lib/training_data/todo/romanian.txt +103 -0
- data/lib/training_data/todo/rumantsch.txt +110 -0
- data/lib/training_data/todo/sanskrit.txt +135 -0
- data/lib/training_data/todo/scots.txt +490 -0
- data/lib/training_data/todo/scots_gaelic.txt +93 -0
- data/lib/training_data/todo/serbian-ascii.txt +121 -0
- data/lib/training_data/todo/slovak-ascii.txt +102 -0
- data/lib/training_data/todo/slovak-windows1250.txt +115 -0
- data/lib/training_data/todo/slovenian-ascii.txt +100 -0
- data/lib/training_data/todo/slovenian-iso8859_2.txt +96 -0
- data/lib/training_data/todo/sq.txt +110 -0
- data/lib/training_data/todo/swahili.txt +120 -0
- data/lib/training_data/todo/tagalog.txt +135 -0
- data/lib/training_data/todo/tamil.txt +123 -0
- data/lib/training_data/todo/turkish.txt +117 -0
- data/lib/training_data/todo/ukrainian-koi8_r.txt +214 -0
- data/lib/training_data/todo/vietnamese.txt +92 -0
- data/lib/training_data/todo/welsh.txt +148 -0
- data/lib/training_data/todo/yiddish-utf.txt +83 -0
- data/lib/training_data/uk-utf8.txt +75 -0
- data/lib/training_data/vi-utf8.txt +47 -0
- data/lib/training_data/zh-utf8.txt +228 -0
- data/test/language_detector_test.rb +78 -0
- metadata +232 -0
@@ -0,0 +1,400 @@
|
|
1
|
+
_ 8010
|
2
|
+
a 2622
|
3
|
+
i 1573
|
4
|
+
h 1334
|
5
|
+
n 1247
|
6
|
+
r 968
|
7
|
+
_a 847
|
8
|
+
e 830
|
9
|
+
s 817
|
10
|
+
t 747
|
11
|
+
l 639
|
12
|
+
c 636
|
13
|
+
g 598
|
14
|
+
o 590
|
15
|
+
d 554
|
16
|
+
n_ 501
|
17
|
+
a_ 487
|
18
|
+
m 432
|
19
|
+
an 415
|
20
|
+
u 413
|
21
|
+
b 379
|
22
|
+
h_ 352
|
23
|
+
ai 350
|
24
|
+
ch 350
|
25
|
+
ea 346
|
26
|
+
r_ 346
|
27
|
+
� 334
|
28
|
+
� 321
|
29
|
+
_s 309
|
30
|
+
� 306
|
31
|
+
in 281
|
32
|
+
ar 277
|
33
|
+
. 269
|
34
|
+
_d 266
|
35
|
+
s_ 254
|
36
|
+
ir 253
|
37
|
+
_b 250
|
38
|
+
f 250
|
39
|
+
an_ 246
|
40
|
+
, 241
|
41
|
+
,_ 226
|
42
|
+
ag 225
|
43
|
+
_an 221
|
44
|
+
bh 218
|
45
|
+
_c 216
|
46
|
+
._ 212
|
47
|
+
ac 210
|
48
|
+
ha 208
|
49
|
+
_a_ 201
|
50
|
+
" 199
|
51
|
+
_m 199
|
52
|
+
th 198
|
53
|
+
_t 190
|
54
|
+
ach 182
|
55
|
+
_ag 180
|
56
|
+
_an_ 179
|
57
|
+
�_ 176
|
58
|
+
_l 168
|
59
|
+
na 168
|
60
|
+
nn 160
|
61
|
+
e_ 159
|
62
|
+
ar_ 158
|
63
|
+
_g 157
|
64
|
+
� 156
|
65
|
+
_i 152
|
66
|
+
il 150
|
67
|
+
le 150
|
68
|
+
is 143
|
69
|
+
� 142
|
70
|
+
_bh 138
|
71
|
+
ei 138
|
72
|
+
g_ 135
|
73
|
+
_f 135
|
74
|
+
dh 135
|
75
|
+
l_ 126
|
76
|
+
t_ 125
|
77
|
+
ig 123
|
78
|
+
�_ 122
|
79
|
+
_n 120
|
80
|
+
gu 120
|
81
|
+
�_ 120
|
82
|
+
mh 118
|
83
|
+
id 117
|
84
|
+
ch_ 117
|
85
|
+
ad 116
|
86
|
+
he 114
|
87
|
+
ir_ 114
|
88
|
+
ra 109
|
89
|
+
o_ 109
|
90
|
+
ach_ 107
|
91
|
+
ia 105
|
92
|
+
_ar 105
|
93
|
+
us 104
|
94
|
+
ui 104
|
95
|
+
_" 101
|
96
|
+
us_ 100
|
97
|
+
T 99
|
98
|
+
am 99
|
99
|
+
ta 98
|
100
|
+
gus 98
|
101
|
+
gus_ 98
|
102
|
+
_le 97
|
103
|
+
gh 97
|
104
|
+
_ch 97
|
105
|
+
agus 94
|
106
|
+
agus_ 94
|
107
|
+
agu 94
|
108
|
+
�a 93
|
109
|
+
_agus 93
|
110
|
+
_agu 93
|
111
|
+
ean 93
|
112
|
+
na_ 92
|
113
|
+
d_ 92
|
114
|
+
�i 91
|
115
|
+
p 89
|
116
|
+
it 89
|
117
|
+
A 89
|
118
|
+
_ar_ 88
|
119
|
+
rt 86
|
120
|
+
al 85
|
121
|
+
oi 84
|
122
|
+
sa 84
|
123
|
+
"_ 82
|
124
|
+
hai 81
|
125
|
+
_r 79
|
126
|
+
nn_ 79
|
127
|
+
hu 79
|
128
|
+
as 79
|
129
|
+
�i 78
|
130
|
+
_T 78
|
131
|
+
ma 77
|
132
|
+
air 77
|
133
|
+
at 77
|
134
|
+
ann 76
|
135
|
+
B 76
|
136
|
+
s� 76
|
137
|
+
h� 75
|
138
|
+
igh 74
|
139
|
+
st 74
|
140
|
+
ga 73
|
141
|
+
go 71
|
142
|
+
ua 71
|
143
|
+
ne 71
|
144
|
+
la 71
|
145
|
+
- 71
|
146
|
+
de 71
|
147
|
+
te 71
|
148
|
+
re 70
|
149
|
+
inn 70
|
150
|
+
ith 69
|
151
|
+
eac 69
|
152
|
+
_s� 69
|
153
|
+
in_ 68
|
154
|
+
_go 68
|
155
|
+
hi 68
|
156
|
+
each 68
|
157
|
+
dh_ 68
|
158
|
+
si 67
|
159
|
+
ag_ 67
|
160
|
+
_go_ 66
|
161
|
+
hea 66
|
162
|
+
go_ 66
|
163
|
+
tha 64
|
164
|
+
om 64
|
165
|
+
_s�_ 63
|
166
|
+
s�_ 63
|
167
|
+
h�_ 63
|
168
|
+
on 62
|
169
|
+
se 61
|
170
|
+
�i 60
|
171
|
+
nt 60
|
172
|
+
C 60
|
173
|
+
D 59
|
174
|
+
i_ 58
|
175
|
+
_ag_ 58
|
176
|
+
is_ 58
|
177
|
+
�o 58
|
178
|
+
_de 57
|
179
|
+
_B 56
|
180
|
+
il_ 56
|
181
|
+
or 56
|
182
|
+
_th 54
|
183
|
+
ca 53
|
184
|
+
fa 53
|
185
|
+
amh 53
|
186
|
+
_A 53
|
187
|
+
le_ 52
|
188
|
+
? 52
|
189
|
+
S 51
|
190
|
+
io 51
|
191
|
+
_in 51
|
192
|
+
s� 51
|
193
|
+
li 51
|
194
|
+
rai 50
|
195
|
+
hf 50
|
196
|
+
ht 50
|
197
|
+
eo 50
|
198
|
+
sc 50
|
199
|
+
ri 49
|
200
|
+
: 49
|
201
|
+
igh_ 49
|
202
|
+
gh_ 49
|
203
|
+
_s� 49
|
204
|
+
:_ 49
|
205
|
+
h� 49
|
206
|
+
_D 49
|
207
|
+
be 49
|
208
|
+
aig 49
|
209
|
+
h� 48
|
210
|
+
oc 48
|
211
|
+
idh 48
|
212
|
+
rt_ 48
|
213
|
+
ho 47
|
214
|
+
os 47
|
215
|
+
ann_ 47
|
216
|
+
_C 46
|
217
|
+
! 46
|
218
|
+
Bh 46
|
219
|
+
bhf 45
|
220
|
+
_si 45
|
221
|
+
lt 45
|
222
|
+
_bhf 45
|
223
|
+
irt 45
|
224
|
+
ear 44
|
225
|
+
_na 44
|
226
|
+
ta_ 44
|
227
|
+
air_ 44
|
228
|
+
_p 44
|
229
|
+
im 44
|
230
|
+
aga 44
|
231
|
+
_ma 44
|
232
|
+
_S 44
|
233
|
+
aigh 43
|
234
|
+
�n 43
|
235
|
+
_dh 43
|
236
|
+
uai 43
|
237
|
+
ao 43
|
238
|
+
cht 43
|
239
|
+
ain 42
|
240
|
+
bhe 42
|
241
|
+
ait 42
|
242
|
+
fh 42
|
243
|
+
sa_ 41
|
244
|
+
m_ 41
|
245
|
+
adh 41
|
246
|
+
ile 41
|
247
|
+
_� 41
|
248
|
+
ail 41
|
249
|
+
eir 41
|
250
|
+
�i 41
|
251
|
+
_Bh 40
|
252
|
+
as_ 40
|
253
|
+
cha 40
|
254
|
+
idh_ 40
|
255
|
+
h�i 39
|
256
|
+
_i_ 39
|
257
|
+
bh_ 39
|
258
|
+
th_ 39
|
259
|
+
ad_ 39
|
260
|
+
och 39
|
261
|
+
mh_ 39
|
262
|
+
tr 39
|
263
|
+
rea 38
|
264
|
+
_se 38
|
265
|
+
ro 38
|
266
|
+
r� 38
|
267
|
+
hair 38
|
268
|
+
_is 38
|
269
|
+
uil 37
|
270
|
+
i� 37
|
271
|
+
�in 37
|
272
|
+
I 37
|
273
|
+
ll 37
|
274
|
+
m� 37
|
275
|
+
_be 36
|
276
|
+
ba 36
|
277
|
+
eann 36
|
278
|
+
t� 36
|
279
|
+
_o 36
|
280
|
+
M 36
|
281
|
+
aid 36
|
282
|
+
aith 36
|
283
|
+
ib 36
|
284
|
+
' 36
|
285
|
+
tea 36
|
286
|
+
_m� 35
|
287
|
+
chu 35
|
288
|
+
ibh 35
|
289
|
+
each_ 35
|
290
|
+
ean_ 34
|
291
|
+
irt_ 34
|
292
|
+
_na_ 34
|
293
|
+
N 34
|
294
|
+
ist 34
|
295
|
+
fu 34
|
296
|
+
mha 34
|
297
|
+
bea 34
|
298
|
+
h. 34
|
299
|
+
_bhe 34
|
300
|
+
l� 34
|
301
|
+
ic 34
|
302
|
+
_s�_ 33
|
303
|
+
eis 33
|
304
|
+
bh� 33
|
305
|
+
ni 33
|
306
|
+
h�a 33
|
307
|
+
_sa 33
|
308
|
+
ith_ 33
|
309
|
+
s�_ 33
|
310
|
+
har 33
|
311
|
+
_bh� 33
|
312
|
+
ig_ 32
|
313
|
+
ur 32
|
314
|
+
a� 32
|
315
|
+
hr 32
|
316
|
+
_am 32
|
317
|
+
_bh�_ 31
|
318
|
+
da 31
|
319
|
+
�ir 31
|
320
|
+
hfu 31
|
321
|
+
_chu 31
|
322
|
+
ol 31
|
323
|
+
ne_ 31
|
324
|
+
_fa 31
|
325
|
+
An 31
|
326
|
+
Bh�_ 31
|
327
|
+
Bh� 31
|
328
|
+
n, 31
|
329
|
+
_ac 31
|
330
|
+
bh�_ 31
|
331
|
+
_bhfu 30
|
332
|
+
_� 30
|
333
|
+
ana 30
|
334
|
+
_M 30
|
335
|
+
m�_ 30
|
336
|
+
_fh 30
|
337
|
+
aigh_ 30
|
338
|
+
bhfu 30
|
339
|
+
_m�_ 30
|
340
|
+
t� 29
|
341
|
+
_le_ 29
|
342
|
+
t�_ 29
|
343
|
+
hean 29
|
344
|
+
h�in 29
|
345
|
+
sin 29
|
346
|
+
eir_ 29
|
347
|
+
nne 29
|
348
|
+
c� 29
|
349
|
+
_aga 29
|
350
|
+
h._ 29
|
351
|
+
T� 29
|
352
|
+
ibh_ 29
|
353
|
+
iste 28
|
354
|
+
An_ 28
|
355
|
+
do 28
|
356
|
+
hui 28
|
357
|
+
fui 28
|
358
|
+
n� 28
|
359
|
+
ste 28
|
360
|
+
acht 28
|
361
|
+
n,_ 28
|
362
|
+
co 28
|
363
|
+
dea 28
|
364
|
+
ng 28
|
365
|
+
nach 28
|
366
|
+
id_ 28
|
367
|
+
hfui 28
|
368
|
+
.. 28
|
369
|
+
lei 28
|
370
|
+
nac 28
|
371
|
+
ce 27
|
372
|
+
a. 27
|
373
|
+
c_ 27
|
374
|
+
lea 27
|
375
|
+
hfuil 27
|
376
|
+
_Bh�_ 27
|
377
|
+
_bea 27
|
378
|
+
adh_ 27
|
379
|
+
di 27
|
380
|
+
fuil 27
|
381
|
+
." 27
|
382
|
+
T�_ 27
|
383
|
+
ha_ 27
|
384
|
+
�_ 27
|
385
|
+
uil_ 27
|
386
|
+
."_ 27
|
387
|
+
bhfui 27
|
388
|
+
_Bh� 27
|
389
|
+
�an 27
|
390
|
+
_do 27
|
391
|
+
lta 27
|
392
|
+
aoi 27
|
393
|
+
_lei 27
|
394
|
+
_mh 26
|
395
|
+
d� 26
|
396
|
+
fuil_ 26
|
397
|
+
eat 26
|
398
|
+
-_ 26
|
399
|
+
teac 26
|
400
|
+
ath 26
|
@@ -0,0 +1,400 @@
|
|
1
|
+
_ 25028
|
2
|
+
a 7570
|
3
|
+
e 6477
|
4
|
+
i 5481
|
5
|
+
o 5104
|
6
|
+
l 3905
|
7
|
+
n 3866
|
8
|
+
r 3502
|
9
|
+
t 2934
|
10
|
+
c 2862
|
11
|
+
s 2862
|
12
|
+
a_ 2504
|
13
|
+
e_ 2404
|
14
|
+
d 2004
|
15
|
+
i_ 1749
|
16
|
+
o_ 1679
|
17
|
+
u 1650
|
18
|
+
v 1611
|
19
|
+
p 1561
|
20
|
+
m 1414
|
21
|
+
_c 1325
|
22
|
+
, 1192
|
23
|
+
,_ 1192
|
24
|
+
_s 1190
|
25
|
+
_d 1094
|
26
|
+
g 1067
|
27
|
+
an 925
|
28
|
+
er 915
|
29
|
+
_a 914
|
30
|
+
_p 895
|
31
|
+
la 858
|
32
|
+
_l 830
|
33
|
+
re 799
|
34
|
+
ar 769
|
35
|
+
h 762
|
36
|
+
no 753
|
37
|
+
co 726
|
38
|
+
va 698
|
39
|
+
_e 657
|
40
|
+
n_ 656
|
41
|
+
on 656
|
42
|
+
ra 653
|
43
|
+
to 651
|
44
|
+
f 638
|
45
|
+
di 638
|
46
|
+
_i 634
|
47
|
+
ch 634
|
48
|
+
ll 633
|
49
|
+
l_ 624
|
50
|
+
la_ 598
|
51
|
+
ta 593
|
52
|
+
el 576
|
53
|
+
in 567
|
54
|
+
_m 558
|
55
|
+
en 529
|
56
|
+
b 528
|
57
|
+
ri 525
|
58
|
+
_co 523
|
59
|
+
_n 523
|
60
|
+
_di 522
|
61
|
+
li 513
|
62
|
+
av 507
|
63
|
+
al 501
|
64
|
+
le 494
|
65
|
+
ia 492
|
66
|
+
se 484
|
67
|
+
ol 479
|
68
|
+
_f 477
|
69
|
+
or 477
|
70
|
+
te 469
|
71
|
+
_e_ 467
|
72
|
+
ve 454
|
73
|
+
at 449
|
74
|
+
de 447
|
75
|
+
. 443
|
76
|
+
ne 429
|
77
|
+
va_ 428
|
78
|
+
ca 426
|
79
|
+
._ 422
|
80
|
+
tt 422
|
81
|
+
re_ 415
|
82
|
+
nt 415
|
83
|
+
io 411
|
84
|
+
_v 407
|
85
|
+
pe 405
|
86
|
+
z 392
|
87
|
+
to_ 391
|
88
|
+
_ch 389
|
89
|
+
na 384
|
90
|
+
si 384
|
91
|
+
' 383
|
92
|
+
he 382
|
93
|
+
no_ 379
|
94
|
+
ci 374
|
95
|
+
_la 373
|
96
|
+
ro 371
|
97
|
+
_g 370
|
98
|
+
st 368
|
99
|
+
cc 366
|
100
|
+
he_ 362
|
101
|
+
di_ 362
|
102
|
+
ma 358
|
103
|
+
ev 354
|
104
|
+
che 354
|
105
|
+
es 352
|
106
|
+
me 352
|
107
|
+
pa 351
|
108
|
+
_t 349
|
109
|
+
ti 348
|
110
|
+
_di_ 347
|
111
|
+
ss 345
|
112
|
+
che_ 344
|
113
|
+
a,_ 337
|
114
|
+
a, 337
|
115
|
+
nd 335
|
116
|
+
o, 333
|
117
|
+
o,_ 333
|
118
|
+
ell 330
|
119
|
+
gl 323
|
120
|
+
sa 322
|
121
|
+
il 322
|
122
|
+
gli 321
|
123
|
+
da 318
|
124
|
+
as 318
|
125
|
+
do 314
|
126
|
+
_che 308
|
127
|
+
_che_ 306
|
128
|
+
eva 306
|
129
|
+
_la_ 300
|
130
|
+
lla 298
|
131
|
+
le_ 293
|
132
|
+
un 291
|
133
|
+
_pe 290
|
134
|
+
_de 288
|
135
|
+
q 283
|
136
|
+
qu 283
|
137
|
+
ava 280
|
138
|
+
po 277
|
139
|
+
on_ 275
|
140
|
+
r_ 273
|
141
|
+
li_ 273
|
142
|
+
_b 269
|
143
|
+
_il 268
|
144
|
+
_il_ 268
|
145
|
+
il_ 268
|
146
|
+
lo 267
|
147
|
+
om 263
|
148
|
+
e, 263
|
149
|
+
e,_ 263
|
150
|
+
ni 258
|
151
|
+
tr 258
|
152
|
+
so 255
|
153
|
+
ra_ 253
|
154
|
+
os 251
|
155
|
+
_in 249
|
156
|
+
_u 248
|
157
|
+
per 244
|
158
|
+
are 243
|
159
|
+
et 243
|
160
|
+
_se 240
|
161
|
+
ano 239
|
162
|
+
si_ 238
|
163
|
+
_ca 238
|
164
|
+
_qu 238
|
165
|
+
lla_ 238
|
166
|
+
_q 238
|
167
|
+
_a_ 236
|
168
|
+
ac 236
|
169
|
+
_r 234
|
170
|
+
ic 233
|
171
|
+
_no 232
|
172
|
+
ie 227
|
173
|
+
fa 227
|
174
|
+
hi 226
|
175
|
+
del 225
|
176
|
+
ua 222
|
177
|
+
_per 218
|
178
|
+
ce 218
|
179
|
+
_ma 216
|
180
|
+
sc 216
|
181
|
+
_del 215
|
182
|
+
mi 212
|
183
|
+
_un 208
|
184
|
+
chi 206
|
185
|
+
era 205
|
186
|
+
i, 205
|
187
|
+
i,_ 205
|
188
|
+
su 203
|
189
|
+
and 202
|
190
|
+
vo 202
|
191
|
+
_fa 201
|
192
|
+
eva_ 200
|
193
|
+
ano_ 199
|
194
|
+
gli_ 197
|
195
|
+
non 196
|
196
|
+
pi 196
|
197
|
+
vi 195
|
198
|
+
er_ 195
|
199
|
+
_al 194
|
200
|
+
se_ 193
|
201
|
+
_ne 192
|
202
|
+
_non 191
|
203
|
+
am 190
|
204
|
+
is 187
|
205
|
+
ava_ 187
|
206
|
+
_non_ 186
|
207
|
+
non_ 186
|
208
|
+
in_ 185
|
209
|
+
ent 185
|
210
|
+
_si 184
|
211
|
+
_pa 184
|
212
|
+
com 183
|
213
|
+
! 182
|
214
|
+
_le 182
|
215
|
+
_su 181
|
216
|
+
uo 181
|
217
|
+
el_ 180
|
218
|
+
!_ 180
|
219
|
+
l' 178
|
220
|
+
ue 177
|
221
|
+
te_ 177
|
222
|
+
_com 177
|
223
|
+
are_ 176
|
224
|
+
pr 176
|
225
|
+
_in_ 176
|
226
|
+
van 172
|
227
|
+
mo 172
|
228
|
+
ta_ 171
|
229
|
+
gn 167
|
230
|
+
ere 166
|
231
|
+
na_ 166
|
232
|
+
tto 163
|
233
|
+
it 161
|
234
|
+
_per_ 161
|
235
|
+
per_ 161
|
236
|
+
� 161
|
237
|
+
all 160
|
238
|
+
ess 159
|
239
|
+
ut 159
|
240
|
+
col 158
|
241
|
+
acc 157
|
242
|
+
gi 155
|
243
|
+
lo_ 154
|
244
|
+
oc 154
|
245
|
+
vano 153
|
246
|
+
io_ 153
|
247
|
+
_av 151
|
248
|
+
ndo 151
|
249
|
+
�_ 151
|
250
|
+
ato 149
|
251
|
+
ave 148
|
252
|
+
_st 147
|
253
|
+
me_ 147
|
254
|
+
'a 146
|
255
|
+
ia_ 144
|
256
|
+
con 143
|
257
|
+
mp 143
|
258
|
+
fi 142
|
259
|
+
ett 142
|
260
|
+
_si_ 141
|
261
|
+
_pi 140
|
262
|
+
era_ 140
|
263
|
+
ti_ 140
|
264
|
+
� 140
|
265
|
+
vano_ 140
|
266
|
+
_gl 139
|
267
|
+
qua 139
|
268
|
+
ella 139
|
269
|
+
sta 138
|
270
|
+
ome 137
|
271
|
+
S 137
|
272
|
+
_gli 137
|
273
|
+
_S 137
|
274
|
+
ad 136
|
275
|
+
_ve 134
|
276
|
+
ant 134
|
277
|
+
ne_ 134
|
278
|
+
�_ 133
|
279
|
+
sp 133
|
280
|
+
do_ 133
|
281
|
+
_po 132
|
282
|
+
ro_ 132
|
283
|
+
ov 132
|
284
|
+
_le_ 131
|
285
|
+
ella_ 130
|
286
|
+
sse 129
|
287
|
+
_con 128
|
288
|
+
ir 128
|
289
|
+
_vi 128
|
290
|
+
ig 127
|
291
|
+
_gli_ 127
|
292
|
+
_ave 127
|
293
|
+
vev 127
|
294
|
+
un_ 126
|
295
|
+
ot 126
|
296
|
+
veva 125
|
297
|
+
dell 125
|
298
|
+
que 125
|
299
|
+
a. 125
|
300
|
+
_o 125
|
301
|
+
a._ 124
|
302
|
+
tu 124
|
303
|
+
cia 123
|
304
|
+
za 123
|
305
|
+
_que 123
|
306
|
+
_da 121
|
307
|
+
par 121
|
308
|
+
_pr 120
|
309
|
+
cch 120
|
310
|
+
_dell 120
|
311
|
+
eg 119
|
312
|
+
_sa 119
|
313
|
+
o._ 119
|
314
|
+
o. 119
|
315
|
+
_col 118
|
316
|
+
lt 118
|
317
|
+
_un_ 118
|
318
|
+
rt 118
|
319
|
+
ur 117
|
320
|
+
_vo 117
|
321
|
+
_me 117
|
322
|
+
ome_ 117
|
323
|
+
L 116
|
324
|
+
ap 116
|
325
|
+
_L 116
|
326
|
+
zi 116
|
327
|
+
nto 116
|
328
|
+
og 115
|
329
|
+
_an 115
|
330
|
+
_so 115
|
331
|
+
em 114
|
332
|
+
ag 114
|
333
|
+
be 111
|
334
|
+
ni_ 111
|
335
|
+
im 110
|
336
|
+
cchi 110
|
337
|
+
ver 110
|
338
|
+
lle 109
|
339
|
+
nz 109
|
340
|
+
cci 109
|
341
|
+
_ri 109
|
342
|
+
nc 108
|
343
|
+
_er 108
|
344
|
+
come_ 107
|
345
|
+
come 107
|
346
|
+
aveva 107
|
347
|
+
ui 107
|
348
|
+
avev 107
|
349
|
+
tto_ 107
|
350
|
+
_come 106
|
351
|
+
ed 106
|
352
|
+
P 105
|
353
|
+
man 105
|
354
|
+
_P 105
|
355
|
+
rs 105
|
356
|
+
occ 104
|
357
|
+
ndo_ 103
|
358
|
+
ato_ 103
|
359
|
+
_qua 103
|
360
|
+
_era 103
|
361
|
+
ari 102
|
362
|
+
ba 100
|
363
|
+
_mo 100
|
364
|
+
nel 100
|
365
|
+
id 99
|
366
|
+
men 98
|
367
|
+
_fi 98
|
368
|
+
_all 98
|
369
|
+
rr 97
|
370
|
+
_do 97
|
371
|
+
_avev 97
|
372
|
+
att 97
|
373
|
+
l'a 96
|
374
|
+
ei 96
|
375
|
+
zz 96
|
376
|
+
; 96
|
377
|
+
vol 95
|
378
|
+
pp 95
|
379
|
+
tra 95
|
380
|
+
;_ 95
|
381
|
+
ere_ 94
|
382
|
+
lle_ 94
|
383
|
+
nda 94
|
384
|
+
utt 94
|
385
|
+
est 93
|
386
|
+
_nel 93
|
387
|
+
ul 92
|
388
|
+
ola 92
|
389
|
+
iv 92
|
390
|
+
ando 90
|
391
|
+
ale 90
|
392
|
+
lu 90
|
393
|
+
rn 90
|
394
|
+
e. 89
|
395
|
+
e._ 89
|
396
|
+
ll' 89
|
397
|
+
tta 88
|
398
|
+
nte 87
|
399
|
+
_l' 87
|
400
|
+
uel 87
|