language_detector 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +24 -0
- data/Rakefile +18 -0
- data/VERSION +1 -0
- data/lib/language_detector.rb +232 -0
- data/lib/model-fm.yml +52504 -0
- data/lib/model-tc.yml +53985 -0
- data/lib/textcat_ngrams/afrikaans.lm +400 -0
- data/lib/textcat_ngrams/albanian.lm +400 -0
- data/lib/textcat_ngrams/amharic-utf.lm +400 -0
- data/lib/textcat_ngrams/arabic-iso8859_6.lm +400 -0
- data/lib/textcat_ngrams/arabic-windows1256.lm +400 -0
- data/lib/textcat_ngrams/armenian.lm +400 -0
- data/lib/textcat_ngrams/basque.lm +400 -0
- data/lib/textcat_ngrams/belarus-windows1251.lm +400 -0
- data/lib/textcat_ngrams/bosnian.lm +400 -0
- data/lib/textcat_ngrams/breton.lm +400 -0
- data/lib/textcat_ngrams/bulgarian-iso8859_5.lm +400 -0
- data/lib/textcat_ngrams/catalan.lm +400 -0
- data/lib/textcat_ngrams/chinese-big5.lm +400 -0
- data/lib/textcat_ngrams/chinese-gb2312.lm +400 -0
- data/lib/textcat_ngrams/croatian-ascii.lm +400 -0
- data/lib/textcat_ngrams/czech-iso8859_2.lm +400 -0
- data/lib/textcat_ngrams/danish.lm +400 -0
- data/lib/textcat_ngrams/dutch.lm +400 -0
- data/lib/textcat_ngrams/english.lm +400 -0
- data/lib/textcat_ngrams/esperanto.lm +400 -0
- data/lib/textcat_ngrams/estonian.lm +400 -0
- data/lib/textcat_ngrams/finnish.lm +400 -0
- data/lib/textcat_ngrams/french.lm +400 -0
- data/lib/textcat_ngrams/frisian.lm +400 -0
- data/lib/textcat_ngrams/georgian.lm +400 -0
- data/lib/textcat_ngrams/german.lm +400 -0
- data/lib/textcat_ngrams/greek-iso8859-7.lm +400 -0
- data/lib/textcat_ngrams/hebrew-iso8859_8.lm +400 -0
- data/lib/textcat_ngrams/hindi.lm +400 -0
- data/lib/textcat_ngrams/hungarian.lm +400 -0
- data/lib/textcat_ngrams/icelandic.lm +400 -0
- data/lib/textcat_ngrams/indonesian.lm +400 -0
- data/lib/textcat_ngrams/irish.lm +400 -0
- data/lib/textcat_ngrams/italian.lm +400 -0
- data/lib/textcat_ngrams/japanese-euc_jp.lm +400 -0
- data/lib/textcat_ngrams/japanese-shift_jis.lm +400 -0
- data/lib/textcat_ngrams/korean.lm +400 -0
- data/lib/textcat_ngrams/latin.lm +400 -0
- data/lib/textcat_ngrams/latvian.lm +400 -0
- data/lib/textcat_ngrams/lithuanian.lm +400 -0
- data/lib/textcat_ngrams/malay.lm +400 -0
- data/lib/textcat_ngrams/manx.lm +400 -0
- data/lib/textcat_ngrams/marathi.lm +400 -0
- data/lib/textcat_ngrams/mingo.lm +400 -0
- data/lib/textcat_ngrams/nepali.lm +400 -0
- data/lib/textcat_ngrams/norwegian.lm +400 -0
- data/lib/textcat_ngrams/persian.lm +400 -0
- data/lib/textcat_ngrams/polish.lm +400 -0
- data/lib/textcat_ngrams/portuguese.lm +400 -0
- data/lib/textcat_ngrams/quechua.lm +400 -0
- data/lib/textcat_ngrams/romanian.lm +400 -0
- data/lib/textcat_ngrams/rumantsch.lm +400 -0
- data/lib/textcat_ngrams/russian-iso8859_5.lm +400 -0
- data/lib/textcat_ngrams/russian-koi8_r.lm +400 -0
- data/lib/textcat_ngrams/russian-windows1251.lm +400 -0
- data/lib/textcat_ngrams/sanskrit.lm +400 -0
- data/lib/textcat_ngrams/scots.lm +400 -0
- data/lib/textcat_ngrams/scots_gaelic.lm +400 -0
- data/lib/textcat_ngrams/serbian-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovak-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovak-windows1250.lm +400 -0
- data/lib/textcat_ngrams/slovenian-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovenian-iso8859_2.lm +400 -0
- data/lib/textcat_ngrams/spanish.lm +400 -0
- data/lib/textcat_ngrams/swahili.lm +400 -0
- data/lib/textcat_ngrams/swedish.lm +400 -0
- data/lib/textcat_ngrams/tagalog.lm +400 -0
- data/lib/textcat_ngrams/tamil.lm +400 -0
- data/lib/textcat_ngrams/thai.lm +400 -0
- data/lib/textcat_ngrams/turkish.lm +400 -0
- data/lib/textcat_ngrams/ukrainian-koi8_u.lm +400 -0
- data/lib/textcat_ngrams/vietnamese.lm +400 -0
- data/lib/textcat_ngrams/welsh.lm +400 -0
- data/lib/textcat_ngrams/yiddish-utf.lm +400 -0
- data/lib/training_data/ar-utf8.txt +54 -0
- data/lib/training_data/bg-utf8.txt +26 -0
- data/lib/training_data/cs-utf8.txt +48 -0
- data/lib/training_data/da-utf8.txt +159 -0
- data/lib/training_data/de-utf8.txt +569 -0
- data/lib/training_data/el-utf8.txt +48 -0
- data/lib/training_data/en-utf8.txt +81 -0
- data/lib/training_data/es-utf8.txt +185 -0
- data/lib/training_data/et-utf8.txt +50 -0
- data/lib/training_data/fa-utf8.txt +42 -0
- data/lib/training_data/fi-utf8.txt +83 -0
- data/lib/training_data/fr-utf8.txt +191 -0
- data/lib/training_data/fy-utf8.txt +22 -0
- data/lib/training_data/ga-utf8.txt +109 -0
- data/lib/training_data/he-utf8.txt +116 -0
- data/lib/training_data/hi-utf8.txt +49 -0
- data/lib/training_data/hr-utf8.txt +80 -0
- data/lib/training_data/hu-utf8.txt +87 -0
- data/lib/training_data/io-utf8.txt +41 -0
- data/lib/training_data/is-utf8.txt +94 -0
- data/lib/training_data/it-utf8.txt +228 -0
- data/lib/training_data/ja-utf8.txt +200 -0
- data/lib/training_data/ko-utf8.txt +147 -0
- data/lib/training_data/nl-utf8.txt +215 -0
- data/lib/training_data/no-utf8.txt +281 -0
- data/lib/training_data/pl-utf8.txt +120 -0
- data/lib/training_data/pt-utf8.txt +214 -0
- data/lib/training_data/ro-utf8.txt +66 -0
- data/lib/training_data/ru-utf8.txt +310 -0
- data/lib/training_data/sl-utf8.txt +263 -0
- data/lib/training_data/sv-utf8.txt +174 -0
- data/lib/training_data/th-utf8.txt +49 -0
- data/lib/training_data/tk-utf8.txt +101 -0
- data/lib/training_data/todo/af.txt +114 -0
- data/lib/training_data/todo/amharic-utf.txt +95 -0
- data/lib/training_data/todo/arabic-windows1256.txt +157 -0
- data/lib/training_data/todo/armenian.txt +86 -0
- data/lib/training_data/todo/basque.txt +136 -0
- data/lib/training_data/todo/belarus-windows1251.txt +97 -0
- data/lib/training_data/todo/bosnian.txt +97 -0
- data/lib/training_data/todo/breton.txt +159 -0
- data/lib/training_data/todo/bulgarian-iso8859_5.txt +115 -0
- data/lib/training_data/todo/catalan.txt +93 -0
- data/lib/training_data/todo/croatian-ascii.txt +104 -0
- data/lib/training_data/todo/esperanto.txt +95 -0
- data/lib/training_data/todo/estonian.txt +218 -0
- data/lib/training_data/todo/frisian.txt +99 -0
- data/lib/training_data/todo/georgian.txt +86 -0
- data/lib/training_data/todo/greek-iso8859-7.txt +139 -0
- data/lib/training_data/todo/hawaian.txt +108 -0
- data/lib/training_data/todo/hebrew-iso8859_8.txt +79 -0
- data/lib/training_data/todo/hindi.txt +77 -0
- data/lib/training_data/todo/hungarian.txt +102 -0
- data/lib/training_data/todo/icelandic.txt +131 -0
- data/lib/training_data/todo/indonesian.txt +93 -0
- data/lib/training_data/todo/irish.txt +209 -0
- data/lib/training_data/todo/latin.txt +120 -0
- data/lib/training_data/todo/latvian.txt +126 -0
- data/lib/training_data/todo/lithuanian.txt +99 -0
- data/lib/training_data/todo/malay.txt +108 -0
- data/lib/training_data/todo/manx.txt +78 -0
- data/lib/training_data/todo/marathi.txt +100 -0
- data/lib/training_data/todo/mf.txt +100 -0
- data/lib/training_data/todo/middle_frisian.txt +102 -0
- data/lib/training_data/todo/mingo.txt +146 -0
- data/lib/training_data/todo/nepali.txt +131 -0
- data/lib/training_data/todo/persian.txt +73 -0
- data/lib/training_data/todo/quechua.txt +108 -0
- data/lib/training_data/todo/romanian.txt +103 -0
- data/lib/training_data/todo/rumantsch.txt +110 -0
- data/lib/training_data/todo/sanskrit.txt +135 -0
- data/lib/training_data/todo/scots.txt +490 -0
- data/lib/training_data/todo/scots_gaelic.txt +93 -0
- data/lib/training_data/todo/serbian-ascii.txt +121 -0
- data/lib/training_data/todo/slovak-ascii.txt +102 -0
- data/lib/training_data/todo/slovak-windows1250.txt +115 -0
- data/lib/training_data/todo/slovenian-ascii.txt +100 -0
- data/lib/training_data/todo/slovenian-iso8859_2.txt +96 -0
- data/lib/training_data/todo/sq.txt +110 -0
- data/lib/training_data/todo/swahili.txt +120 -0
- data/lib/training_data/todo/tagalog.txt +135 -0
- data/lib/training_data/todo/tamil.txt +123 -0
- data/lib/training_data/todo/turkish.txt +117 -0
- data/lib/training_data/todo/ukrainian-koi8_r.txt +214 -0
- data/lib/training_data/todo/vietnamese.txt +92 -0
- data/lib/training_data/todo/welsh.txt +148 -0
- data/lib/training_data/todo/yiddish-utf.txt +83 -0
- data/lib/training_data/uk-utf8.txt +75 -0
- data/lib/training_data/vi-utf8.txt +47 -0
- data/lib/training_data/zh-utf8.txt +228 -0
- data/test/language_detector_test.rb +78 -0
- metadata +232 -0
@@ -0,0 +1,400 @@
|
|
1
|
+
_ 16483
|
2
|
+
a 9342
|
3
|
+
i 5293
|
4
|
+
a_ 4071
|
5
|
+
u 2730
|
6
|
+
k 2609
|
7
|
+
n 2351
|
8
|
+
w 2076
|
9
|
+
m 1928
|
10
|
+
e 1866
|
11
|
+
h 1800
|
12
|
+
o 1775
|
13
|
+
wa 1743
|
14
|
+
l 1486
|
15
|
+
s 1419
|
16
|
+
i_ 1401
|
17
|
+
t 1399
|
18
|
+
_k 1306
|
19
|
+
y 1100
|
20
|
+
_w 969
|
21
|
+
li 945
|
22
|
+
wa_ 911
|
23
|
+
z 891
|
24
|
+
_wa 890
|
25
|
+
ka 834
|
26
|
+
ku 799
|
27
|
+
r 770
|
28
|
+
b 733
|
29
|
+
an 727
|
30
|
+
ma 723
|
31
|
+
o_ 711
|
32
|
+
_m 707
|
33
|
+
na 698
|
34
|
+
ya 675
|
35
|
+
ha 672
|
36
|
+
g 602
|
37
|
+
al 580
|
38
|
+
d 570
|
39
|
+
at 560
|
40
|
+
am 554
|
41
|
+
_n 549
|
42
|
+
_ku 549
|
43
|
+
ik 543
|
44
|
+
_h 515
|
45
|
+
ya_ 514
|
46
|
+
A 494
|
47
|
+
is 487
|
48
|
+
_y 485
|
49
|
+
hi 474
|
50
|
+
na_ 471
|
51
|
+
_ya 471
|
52
|
+
ta 468
|
53
|
+
sh 456
|
54
|
+
ali 449
|
55
|
+
j 426
|
56
|
+
u_ 423
|
57
|
+
ki 418
|
58
|
+
e_ 402
|
59
|
+
p 401
|
60
|
+
ti 401
|
61
|
+
_wa_ 399
|
62
|
+
f 392
|
63
|
+
_ya_ 390
|
64
|
+
ba 390
|
65
|
+
ri 385
|
66
|
+
ng 385
|
67
|
+
il 380
|
68
|
+
c 358
|
69
|
+
hu 356
|
70
|
+
_na 356
|
71
|
+
ni 355
|
72
|
+
za 354
|
73
|
+
zi 351
|
74
|
+
ia 344
|
75
|
+
_na_ 343
|
76
|
+
_a 336
|
77
|
+
in 327
|
78
|
+
_ma 326
|
79
|
+
ch 322
|
80
|
+
mb 317
|
81
|
+
ika 314
|
82
|
+
. 311
|
83
|
+
._ 309
|
84
|
+
_ka 307
|
85
|
+
as 306
|
86
|
+
ak 306
|
87
|
+
ati 301
|
88
|
+
, 300
|
89
|
+
ka_ 296
|
90
|
+
,_ 294
|
91
|
+
_u 292
|
92
|
+
kw 286
|
93
|
+
ili 278
|
94
|
+
K 274
|
95
|
+
en 271
|
96
|
+
si 266
|
97
|
+
_kw 262
|
98
|
+
la 261
|
99
|
+
ni_ 261
|
100
|
+
ma_ 261
|
101
|
+
_s 258
|
102
|
+
kwa 258
|
103
|
+
ar 256
|
104
|
+
ut 245
|
105
|
+
za_ 245
|
106
|
+
nd 242
|
107
|
+
mba 241
|
108
|
+
_kwa 239
|
109
|
+
_z 234
|
110
|
+
li_ 233
|
111
|
+
un 233
|
112
|
+
ny 230
|
113
|
+
it 229
|
114
|
+
se 229
|
115
|
+
yo 227
|
116
|
+
ia_ 222
|
117
|
+
M 221
|
118
|
+
sa 221
|
119
|
+
kat 217
|
120
|
+
_K 214
|
121
|
+
_i 213
|
122
|
+
ika_ 213
|
123
|
+
ana 212
|
124
|
+
ish 212
|
125
|
+
kati 206
|
126
|
+
_ha 204
|
127
|
+
on 201
|
128
|
+
ai 200
|
129
|
+
I 198
|
130
|
+
aa 196
|
131
|
+
um 195
|
132
|
+
im 190
|
133
|
+
v 188
|
134
|
+
mu 187
|
135
|
+
amb 187
|
136
|
+
sha 185
|
137
|
+
em 183
|
138
|
+
fa 181
|
139
|
+
zi_ 180
|
140
|
+
di 179
|
141
|
+
mi 178
|
142
|
+
_M 178
|
143
|
+
us 176
|
144
|
+
_ki 176
|
145
|
+
ha_ 175
|
146
|
+
iw 172
|
147
|
+
ama 172
|
148
|
+
_kat 168
|
149
|
+
_kati 168
|
150
|
+
_hi 166
|
151
|
+
_l 166
|
152
|
+
ra 166
|
153
|
+
kwa_ 165
|
154
|
+
la_ 164
|
155
|
+
W 164
|
156
|
+
ja 163
|
157
|
+
U 163
|
158
|
+
N 163
|
159
|
+
amba 161
|
160
|
+
ao 161
|
161
|
+
_za 160
|
162
|
+
ji 160
|
163
|
+
B 157
|
164
|
+
iwa 155
|
165
|
+
tik 155
|
166
|
+
wal 155
|
167
|
+
le 155
|
168
|
+
tika 154
|
169
|
+
ge 153
|
170
|
+
lis 153
|
171
|
+
tu 152
|
172
|
+
atika 152
|
173
|
+
to 152
|
174
|
+
atik 152
|
175
|
+
uw 152
|
176
|
+
_kwa_ 151
|
177
|
+
A_ 151
|
178
|
+
ke 150
|
179
|
+
S 147
|
180
|
+
tika_ 145
|
181
|
+
aj 145
|
182
|
+
we 144
|
183
|
+
cha 144
|
184
|
+
bi 141
|
185
|
+
az 140
|
186
|
+
er 139
|
187
|
+
ek 138
|
188
|
+
katik 138
|
189
|
+
ez 138
|
190
|
+
uwa 137
|
191
|
+
kut 135
|
192
|
+
_al 134
|
193
|
+
_B 134
|
194
|
+
ad 134
|
195
|
+
mu_ 133
|
196
|
+
_ali 133
|
197
|
+
rik 132
|
198
|
+
_W 131
|
199
|
+
ba_ 131
|
200
|
+
kuw 131
|
201
|
+
me 130
|
202
|
+
ali_ 128
|
203
|
+
kuwa 128
|
204
|
+
ema 127
|
205
|
+
wan 127
|
206
|
+
bu 126
|
207
|
+
sem 126
|
208
|
+
_A 125
|
209
|
+
ir 125
|
210
|
+
ata 125
|
211
|
+
iz 124
|
212
|
+
_hu 124
|
213
|
+
ay 124
|
214
|
+
ul 124
|
215
|
+
af 123
|
216
|
+
iki 122
|
217
|
+
ema_ 121
|
218
|
+
da 120
|
219
|
+
ti_ 120
|
220
|
+
sema 119
|
221
|
+
aka 118
|
222
|
+
sema_ 118
|
223
|
+
te 118
|
224
|
+
uz 117
|
225
|
+
yo_ 117
|
226
|
+
_v 117
|
227
|
+
io 116
|
228
|
+
iy 115
|
229
|
+
uta 115
|
230
|
+
ani 115
|
231
|
+
_wal 115
|
232
|
+
he 115
|
233
|
+
if 114
|
234
|
+
_la 114
|
235
|
+
ab 114
|
236
|
+
go 112
|
237
|
+
_za_ 111
|
238
|
+
ama_ 111
|
239
|
+
sa_ 111
|
240
|
+
pa 110
|
241
|
+
_t 110
|
242
|
+
zo 110
|
243
|
+
nge 110
|
244
|
+
wam 109
|
245
|
+
wali 108
|
246
|
+
ua 107
|
247
|
+
ur 106
|
248
|
+
_c 106
|
249
|
+
ise 105
|
250
|
+
_ch 105
|
251
|
+
isem 105
|
252
|
+
ho 105
|
253
|
+
ye 104
|
254
|
+
iyo 104
|
255
|
+
E 104
|
256
|
+
el 104
|
257
|
+
mo 103
|
258
|
+
ung 103
|
259
|
+
eri 103
|
260
|
+
_wali 103
|
261
|
+
_b 102
|
262
|
+
mba_ 102
|
263
|
+
ari 101
|
264
|
+
ita 101
|
265
|
+
isema 100
|
266
|
+
ot 99
|
267
|
+
_la_ 99
|
268
|
+
uk 99
|
269
|
+
ao_ 99
|
270
|
+
di_ 99
|
271
|
+
sha_ 99
|
272
|
+
ini 99
|
273
|
+
kuwa_ 98
|
274
|
+
uwa_ 98
|
275
|
+
ana_ 98
|
276
|
+
lise 98
|
277
|
+
lisem 98
|
278
|
+
uli 97
|
279
|
+
shi 97
|
280
|
+
ga 96
|
281
|
+
iwa_ 96
|
282
|
+
fu 96
|
283
|
+
T 96
|
284
|
+
R 95
|
285
|
+
_il 95
|
286
|
+
wak 94
|
287
|
+
aw 94
|
288
|
+
isha 94
|
289
|
+
ri_ 93
|
290
|
+
_am 93
|
291
|
+
ara 92
|
292
|
+
_cha 92
|
293
|
+
aji 92
|
294
|
+
_ili 91
|
295
|
+
ifa 91
|
296
|
+
O 90
|
297
|
+
_p 90
|
298
|
+
uh 90
|
299
|
+
iri 90
|
300
|
+
chi 90
|
301
|
+
asi 89
|
302
|
+
po 89
|
303
|
+
a. 89
|
304
|
+
ong 89
|
305
|
+
azi 88
|
306
|
+
_j 88
|
307
|
+
_kut 88
|
308
|
+
eny 88
|
309
|
+
nc 88
|
310
|
+
a._ 88
|
311
|
+
ko 87
|
312
|
+
uu 87
|
313
|
+
id 87
|
314
|
+
w_ 87
|
315
|
+
no 87
|
316
|
+
P 86
|
317
|
+
ah 86
|
318
|
+
ina 86
|
319
|
+
rika 86
|
320
|
+
_Bw 85
|
321
|
+
H 85
|
322
|
+
gu 85
|
323
|
+
uo 85
|
324
|
+
Bw_ 85
|
325
|
+
_Bw_ 85
|
326
|
+
_se 85
|
327
|
+
Bw 85
|
328
|
+
ib 84
|
329
|
+
_S 84
|
330
|
+
kam 84
|
331
|
+
hi_ 84
|
332
|
+
nya 84
|
333
|
+
si_ 83
|
334
|
+
a, 82
|
335
|
+
no_ 81
|
336
|
+
pi 81
|
337
|
+
ok 81
|
338
|
+
i. 81
|
339
|
+
ip 81
|
340
|
+
kwam 81
|
341
|
+
i._ 81
|
342
|
+
amba_ 80
|
343
|
+
dh 80
|
344
|
+
end 80
|
345
|
+
ani_ 80
|
346
|
+
a,_ 79
|
347
|
+
wamb 79
|
348
|
+
kwamb 79
|
349
|
+
_sh 79
|
350
|
+
eza 79
|
351
|
+
nz 79
|
352
|
+
wi 79
|
353
|
+
_kwam 79
|
354
|
+
wamba 79
|
355
|
+
alis 78
|
356
|
+
_kuw 78
|
357
|
+
ngo 78
|
358
|
+
ap 77
|
359
|
+
_N 77
|
360
|
+
any 77
|
361
|
+
ili_ 77
|
362
|
+
C 77
|
363
|
+
WA 76
|
364
|
+
vy 76
|
365
|
+
wana 76
|
366
|
+
_hiy 75
|
367
|
+
Wa 75
|
368
|
+
hiyo 75
|
369
|
+
nch 75
|
370
|
+
_hiyo 75
|
371
|
+
de 75
|
372
|
+
_kuwa 75
|
373
|
+
ing 75
|
374
|
+
hiy 75
|
375
|
+
vi 75
|
376
|
+
isha_ 74
|
377
|
+
es 74
|
378
|
+
atu 74
|
379
|
+
_Wa 74
|
380
|
+
nchi 74
|
381
|
+
aki 74
|
382
|
+
lim 73
|
383
|
+
da_ 73
|
384
|
+
ini_ 73
|
385
|
+
ash 73
|
386
|
+
ala 73
|
387
|
+
i, 73
|
388
|
+
ano 73
|
389
|
+
i,_ 72
|
390
|
+
_kam 71
|
391
|
+
_wan 71
|
392
|
+
ano_ 71
|
393
|
+
mw 71
|
394
|
+
nde 71
|
395
|
+
ji_ 71
|
396
|
+
ion 70
|
397
|
+
_amb 70
|
398
|
+
ndi 70
|
399
|
+
_Ka 70
|
400
|
+
eza_ 70
|
@@ -0,0 +1,400 @@
|
|
1
|
+
_ 33494
|
2
|
+
e 8992
|
3
|
+
n 7900
|
4
|
+
t 7859
|
5
|
+
a 7781
|
6
|
+
r 7251
|
7
|
+
s 6435
|
8
|
+
i 5649
|
9
|
+
l 4541
|
10
|
+
d 4079
|
11
|
+
o 3724
|
12
|
+
m 3203
|
13
|
+
k 3058
|
14
|
+
g 2478
|
15
|
+
en 2403
|
16
|
+
n_ 2389
|
17
|
+
t_ 2073
|
18
|
+
de 1939
|
19
|
+
r_ 1910
|
20
|
+
v 1890
|
21
|
+
h 1789
|
22
|
+
u 1782
|
23
|
+
_s 1768
|
24
|
+
� 1724
|
25
|
+
er 1709
|
26
|
+
f 1597
|
27
|
+
en_ 1537
|
28
|
+
a_ 1526
|
29
|
+
an 1357
|
30
|
+
p 1320
|
31
|
+
et 1317
|
32
|
+
� 1278
|
33
|
+
� 1261
|
34
|
+
st 1236
|
35
|
+
ar 1226
|
36
|
+
c 1191
|
37
|
+
_d 1158
|
38
|
+
e_ 1116
|
39
|
+
in 1045
|
40
|
+
_f 1027
|
41
|
+
te 1000
|
42
|
+
b 997
|
43
|
+
_a 978
|
44
|
+
s_ 974
|
45
|
+
ra 958
|
46
|
+
. 956
|
47
|
+
tt 935
|
48
|
+
_i 898
|
49
|
+
_m 890
|
50
|
+
._ 886
|
51
|
+
ll 870
|
52
|
+
ta 844
|
53
|
+
_o 842
|
54
|
+
_e 839
|
55
|
+
nd 820
|
56
|
+
ti 804
|
57
|
+
sk 798
|
58
|
+
re 779
|
59
|
+
at 769
|
60
|
+
_de 754
|
61
|
+
om 743
|
62
|
+
m_ 739
|
63
|
+
�r 720
|
64
|
+
, 697
|
65
|
+
,_ 695
|
66
|
+
ng 686
|
67
|
+
li 673
|
68
|
+
ka 666
|
69
|
+
oc 662
|
70
|
+
_h 654
|
71
|
+
on 652
|
72
|
+
et_ 647
|
73
|
+
ch 645
|
74
|
+
ns 643
|
75
|
+
is 642
|
76
|
+
er_ 630
|
77
|
+
�r 625
|
78
|
+
_v 614
|
79
|
+
_t 614
|
80
|
+
ni 611
|
81
|
+
i_ 609
|
82
|
+
_oc 592
|
83
|
+
tt_ 587
|
84
|
+
na 586
|
85
|
+
y 586
|
86
|
+
la 579
|
87
|
+
_b 579
|
88
|
+
h_ 577
|
89
|
+
kt 575
|
90
|
+
ch_ 568
|
91
|
+
ig 564
|
92
|
+
f� 563
|
93
|
+
och 555
|
94
|
+
or 555
|
95
|
+
_och 554
|
96
|
+
och_ 554
|
97
|
+
_och_ 553
|
98
|
+
me 548
|
99
|
+
den 548
|
100
|
+
om_ 535
|
101
|
+
_i_ 531
|
102
|
+
d_ 530
|
103
|
+
j 529
|
104
|
+
ik 520
|
105
|
+
de_ 520
|
106
|
+
f�r 518
|
107
|
+
ge 498
|
108
|
+
ad 497
|
109
|
+
_k 491
|
110
|
+
_f� 487
|
111
|
+
ri 484
|
112
|
+
el 482
|
113
|
+
il 481
|
114
|
+
so 480
|
115
|
+
al 474
|
116
|
+
g_ 469
|
117
|
+
le 464
|
118
|
+
an_ 461
|
119
|
+
_f�r 447
|
120
|
+
si 437
|
121
|
+
ar_ 437
|
122
|
+
att 435
|
123
|
+
_p 434
|
124
|
+
es 420
|
125
|
+
ing 413
|
126
|
+
se 407
|
127
|
+
to 404
|
128
|
+
_u 403
|
129
|
+
_en 403
|
130
|
+
and 398
|
131
|
+
den_ 395
|
132
|
+
nde 393
|
133
|
+
nn 393
|
134
|
+
_l 391
|
135
|
+
�_ 391
|
136
|
+
D 385
|
137
|
+
�n 383
|
138
|
+
nt 382
|
139
|
+
l_ 381
|
140
|
+
tr 378
|
141
|
+
_D 372
|
142
|
+
va 370
|
143
|
+
am 369
|
144
|
+
sa 367
|
145
|
+
_so 365
|
146
|
+
ga 364
|
147
|
+
_en_ 361
|
148
|
+
�r_ 358
|
149
|
+
ck 357
|
150
|
+
av 354
|
151
|
+
v_ 351
|
152
|
+
ed 347
|
153
|
+
ma 346
|
154
|
+
da 346
|
155
|
+
som 346
|
156
|
+
rs 344
|
157
|
+
som_ 344
|
158
|
+
ve 342
|
159
|
+
ter 341
|
160
|
+
att_ 341
|
161
|
+
ha 338
|
162
|
+
ne 337
|
163
|
+
ut 335
|
164
|
+
as 332
|
165
|
+
ska 329
|
166
|
+
_at 327
|
167
|
+
_att 326
|
168
|
+
_som 324
|
169
|
+
_att_ 324
|
170
|
+
_som_ 323
|
171
|
+
vi 322
|
172
|
+
ikt 317
|
173
|
+
_av 316
|
174
|
+
det 316
|
175
|
+
_den 315
|
176
|
+
he 315
|
177
|
+
ss 314
|
178
|
+
un 307
|
179
|
+
ke 304
|
180
|
+
_g 303
|
181
|
+
us 302
|
182
|
+
di 302
|
183
|
+
_st 300
|
184
|
+
rn 297
|
185
|
+
_me 296
|
186
|
+
_� 295
|
187
|
+
ade 294
|
188
|
+
" 290
|
189
|
+
_ha 290
|
190
|
+
av_ 289
|
191
|
+
ill 288
|
192
|
+
_n 286
|
193
|
+
_in 279
|
194
|
+
io 275
|
195
|
+
_r 275
|
196
|
+
der 275
|
197
|
+
it 274
|
198
|
+
_av_ 274
|
199
|
+
sta 274
|
200
|
+
gen 272
|
201
|
+
isk 270
|
202
|
+
_ti 269
|
203
|
+
id 265
|
204
|
+
na_ 265
|
205
|
+
ns_ 264
|
206
|
+
ko 262
|
207
|
+
_den_ 261
|
208
|
+
ag 258
|
209
|
+
det_ 257
|
210
|
+
lig 257
|
211
|
+
era 256
|
212
|
+
ll_ 255
|
213
|
+
_det 252
|
214
|
+
_�r 251
|
215
|
+
be 249
|
216
|
+
_�r_ 248
|
217
|
+
ra_ 247
|
218
|
+
ion 244
|
219
|
+
- 241
|
220
|
+
pr 240
|
221
|
+
oni 233
|
222
|
+
til 231
|
223
|
+
ten 228
|
224
|
+
_si 225
|
225
|
+
k_ 222
|
226
|
+
p� 222
|
227
|
+
fr 221
|
228
|
+
ro 219
|
229
|
+
till 219
|
230
|
+
iv 216
|
231
|
+
ls 216
|
232
|
+
ande 215
|
233
|
+
�r_ 214
|
234
|
+
_det_ 213
|
235
|
+
�l 212
|
236
|
+
_p� 211
|
237
|
+
ts 210
|
238
|
+
ens 209
|
239
|
+
med 209
|
240
|
+
mm 208
|
241
|
+
rt 208
|
242
|
+
_till 208
|
243
|
+
_til 208
|
244
|
+
_va 207
|
245
|
+
_fr 205
|
246
|
+
_sk 205
|
247
|
+
var 205
|
248
|
+
nin 204
|
249
|
+
ning 203
|
250
|
+
ol 201
|
251
|
+
ka_ 200
|
252
|
+
lle 198
|
253
|
+
ett 198
|
254
|
+
rd 197
|
255
|
+
em 196
|
256
|
+
p�_ 195
|
257
|
+
x 195
|
258
|
+
rk 194
|
259
|
+
_ut 194
|
260
|
+
ste 194
|
261
|
+
ds 193
|
262
|
+
_vi 192
|
263
|
+
�r 192
|
264
|
+
S 192
|
265
|
+
nde_ 191
|
266
|
+
are 191
|
267
|
+
ver 190
|
268
|
+
_p�_ 190
|
269
|
+
nis 189
|
270
|
+
kr 189
|
271
|
+
_med 188
|
272
|
+
all 188
|
273
|
+
�n 187
|
274
|
+
nge 185
|
275
|
+
mo 184
|
276
|
+
os 183
|
277
|
+
ld 182
|
278
|
+
ade_ 181
|
279
|
+
_S 181
|
280
|
+
ed_ 180
|
281
|
+
r� 176
|
282
|
+
De 175
|
283
|
+
_- 175
|
284
|
+
kan 174
|
285
|
+
ta_ 173
|
286
|
+
ng_ 172
|
287
|
+
v� 171
|
288
|
+
f�r_ 170
|
289
|
+
ill_ 170
|
290
|
+
han 170
|
291
|
+
_De 170
|
292
|
+
pp 169
|
293
|
+
lt 169
|
294
|
+
sam 168
|
295
|
+
nte 167
|
296
|
+
ans 167
|
297
|
+
ton 166
|
298
|
+
ur 165
|
299
|
+
mi 165
|
300
|
+
ess 165
|
301
|
+
kl 164
|
302
|
+
ig_ 164
|
303
|
+
ks 164
|
304
|
+
as_ 163
|
305
|
+
und 163
|
306
|
+
men 162
|
307
|
+
med_ 161
|
308
|
+
_med_ 161
|
309
|
+
ak 161
|
310
|
+
Di 160
|
311
|
+
ot 159
|
312
|
+
rna 159
|
313
|
+
ul 159
|
314
|
+
_var 159
|
315
|
+
te_ 158
|
316
|
+
gen_ 158
|
317
|
+
het 157
|
318
|
+
kto 157
|
319
|
+
str 156
|
320
|
+
_Di 155
|
321
|
+
tad 155
|
322
|
+
lan 154
|
323
|
+
ga_ 154
|
324
|
+
iska 154
|
325
|
+
fa 154
|
326
|
+
fi 154
|
327
|
+
s� 154
|
328
|
+
Dikt 153
|
329
|
+
Dik 153
|
330
|
+
pe 153
|
331
|
+
ska_ 152
|
332
|
+
ja 152
|
333
|
+
H 151
|
334
|
+
res 151
|
335
|
+
ku 151
|
336
|
+
iu 150
|
337
|
+
ande_ 150
|
338
|
+
till_ 150
|
339
|
+
t. 150
|
340
|
+
ern 150
|
341
|
+
rm 149
|
342
|
+
_Dikt 149
|
343
|
+
_Dik 149
|
344
|
+
ie 149
|
345
|
+
bl 148
|
346
|
+
-_ 147
|
347
|
+
od 147
|
348
|
+
_H 147
|
349
|
+
n. 147
|
350
|
+
ist 147
|
351
|
+
_di 146
|
352
|
+
ius 146
|
353
|
+
_" 145
|
354
|
+
la_ 145
|
355
|
+
sl 145
|
356
|
+
man 145
|
357
|
+
ren 145
|
358
|
+
_f�r_ 145
|
359
|
+
toni 144
|
360
|
+
kton 144
|
361
|
+
n._ 144
|
362
|
+
ktoni 144
|
363
|
+
ikton 144
|
364
|
+
I 144
|
365
|
+
ikto 144
|
366
|
+
nius 143
|
367
|
+
ten_ 143
|
368
|
+
onius 143
|
369
|
+
oniu 143
|
370
|
+
toniu 143
|
371
|
+
ing_ 143
|
372
|
+
Dikto 143
|
373
|
+
niu 143
|
374
|
+
_ko 143
|
375
|
+
ic 142
|
376
|
+
_sa 142
|
377
|
+
_han 142
|
378
|
+
ett_ 142
|
379
|
+
sm 141
|
380
|
+
ba 141
|
381
|
+
M 141
|
382
|
+
gr 140
|
383
|
+
l� 140
|
384
|
+
ex 138
|
385
|
+
t._ 138
|
386
|
+
sp 137
|
387
|
+
lla 137
|
388
|
+
_et 137
|
389
|
+
_M 137
|
390
|
+
dr 137
|
391
|
+
r� 136
|
392
|
+
rad 136
|
393
|
+
ek 136
|
394
|
+
_be 135
|
395
|
+
tar 135
|
396
|
+
_-_ 135
|
397
|
+
_om 134
|
398
|
+
rl 134
|
399
|
+
E 134
|
400
|
+
m� 133
|