language_detector 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +24 -0
- data/Rakefile +18 -0
- data/VERSION +1 -0
- data/lib/language_detector.rb +232 -0
- data/lib/model-fm.yml +52504 -0
- data/lib/model-tc.yml +53985 -0
- data/lib/textcat_ngrams/afrikaans.lm +400 -0
- data/lib/textcat_ngrams/albanian.lm +400 -0
- data/lib/textcat_ngrams/amharic-utf.lm +400 -0
- data/lib/textcat_ngrams/arabic-iso8859_6.lm +400 -0
- data/lib/textcat_ngrams/arabic-windows1256.lm +400 -0
- data/lib/textcat_ngrams/armenian.lm +400 -0
- data/lib/textcat_ngrams/basque.lm +400 -0
- data/lib/textcat_ngrams/belarus-windows1251.lm +400 -0
- data/lib/textcat_ngrams/bosnian.lm +400 -0
- data/lib/textcat_ngrams/breton.lm +400 -0
- data/lib/textcat_ngrams/bulgarian-iso8859_5.lm +400 -0
- data/lib/textcat_ngrams/catalan.lm +400 -0
- data/lib/textcat_ngrams/chinese-big5.lm +400 -0
- data/lib/textcat_ngrams/chinese-gb2312.lm +400 -0
- data/lib/textcat_ngrams/croatian-ascii.lm +400 -0
- data/lib/textcat_ngrams/czech-iso8859_2.lm +400 -0
- data/lib/textcat_ngrams/danish.lm +400 -0
- data/lib/textcat_ngrams/dutch.lm +400 -0
- data/lib/textcat_ngrams/english.lm +400 -0
- data/lib/textcat_ngrams/esperanto.lm +400 -0
- data/lib/textcat_ngrams/estonian.lm +400 -0
- data/lib/textcat_ngrams/finnish.lm +400 -0
- data/lib/textcat_ngrams/french.lm +400 -0
- data/lib/textcat_ngrams/frisian.lm +400 -0
- data/lib/textcat_ngrams/georgian.lm +400 -0
- data/lib/textcat_ngrams/german.lm +400 -0
- data/lib/textcat_ngrams/greek-iso8859-7.lm +400 -0
- data/lib/textcat_ngrams/hebrew-iso8859_8.lm +400 -0
- data/lib/textcat_ngrams/hindi.lm +400 -0
- data/lib/textcat_ngrams/hungarian.lm +400 -0
- data/lib/textcat_ngrams/icelandic.lm +400 -0
- data/lib/textcat_ngrams/indonesian.lm +400 -0
- data/lib/textcat_ngrams/irish.lm +400 -0
- data/lib/textcat_ngrams/italian.lm +400 -0
- data/lib/textcat_ngrams/japanese-euc_jp.lm +400 -0
- data/lib/textcat_ngrams/japanese-shift_jis.lm +400 -0
- data/lib/textcat_ngrams/korean.lm +400 -0
- data/lib/textcat_ngrams/latin.lm +400 -0
- data/lib/textcat_ngrams/latvian.lm +400 -0
- data/lib/textcat_ngrams/lithuanian.lm +400 -0
- data/lib/textcat_ngrams/malay.lm +400 -0
- data/lib/textcat_ngrams/manx.lm +400 -0
- data/lib/textcat_ngrams/marathi.lm +400 -0
- data/lib/textcat_ngrams/mingo.lm +400 -0
- data/lib/textcat_ngrams/nepali.lm +400 -0
- data/lib/textcat_ngrams/norwegian.lm +400 -0
- data/lib/textcat_ngrams/persian.lm +400 -0
- data/lib/textcat_ngrams/polish.lm +400 -0
- data/lib/textcat_ngrams/portuguese.lm +400 -0
- data/lib/textcat_ngrams/quechua.lm +400 -0
- data/lib/textcat_ngrams/romanian.lm +400 -0
- data/lib/textcat_ngrams/rumantsch.lm +400 -0
- data/lib/textcat_ngrams/russian-iso8859_5.lm +400 -0
- data/lib/textcat_ngrams/russian-koi8_r.lm +400 -0
- data/lib/textcat_ngrams/russian-windows1251.lm +400 -0
- data/lib/textcat_ngrams/sanskrit.lm +400 -0
- data/lib/textcat_ngrams/scots.lm +400 -0
- data/lib/textcat_ngrams/scots_gaelic.lm +400 -0
- data/lib/textcat_ngrams/serbian-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovak-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovak-windows1250.lm +400 -0
- data/lib/textcat_ngrams/slovenian-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovenian-iso8859_2.lm +400 -0
- data/lib/textcat_ngrams/spanish.lm +400 -0
- data/lib/textcat_ngrams/swahili.lm +400 -0
- data/lib/textcat_ngrams/swedish.lm +400 -0
- data/lib/textcat_ngrams/tagalog.lm +400 -0
- data/lib/textcat_ngrams/tamil.lm +400 -0
- data/lib/textcat_ngrams/thai.lm +400 -0
- data/lib/textcat_ngrams/turkish.lm +400 -0
- data/lib/textcat_ngrams/ukrainian-koi8_u.lm +400 -0
- data/lib/textcat_ngrams/vietnamese.lm +400 -0
- data/lib/textcat_ngrams/welsh.lm +400 -0
- data/lib/textcat_ngrams/yiddish-utf.lm +400 -0
- data/lib/training_data/ar-utf8.txt +54 -0
- data/lib/training_data/bg-utf8.txt +26 -0
- data/lib/training_data/cs-utf8.txt +48 -0
- data/lib/training_data/da-utf8.txt +159 -0
- data/lib/training_data/de-utf8.txt +569 -0
- data/lib/training_data/el-utf8.txt +48 -0
- data/lib/training_data/en-utf8.txt +81 -0
- data/lib/training_data/es-utf8.txt +185 -0
- data/lib/training_data/et-utf8.txt +50 -0
- data/lib/training_data/fa-utf8.txt +42 -0
- data/lib/training_data/fi-utf8.txt +83 -0
- data/lib/training_data/fr-utf8.txt +191 -0
- data/lib/training_data/fy-utf8.txt +22 -0
- data/lib/training_data/ga-utf8.txt +109 -0
- data/lib/training_data/he-utf8.txt +116 -0
- data/lib/training_data/hi-utf8.txt +49 -0
- data/lib/training_data/hr-utf8.txt +80 -0
- data/lib/training_data/hu-utf8.txt +87 -0
- data/lib/training_data/io-utf8.txt +41 -0
- data/lib/training_data/is-utf8.txt +94 -0
- data/lib/training_data/it-utf8.txt +228 -0
- data/lib/training_data/ja-utf8.txt +200 -0
- data/lib/training_data/ko-utf8.txt +147 -0
- data/lib/training_data/nl-utf8.txt +215 -0
- data/lib/training_data/no-utf8.txt +281 -0
- data/lib/training_data/pl-utf8.txt +120 -0
- data/lib/training_data/pt-utf8.txt +214 -0
- data/lib/training_data/ro-utf8.txt +66 -0
- data/lib/training_data/ru-utf8.txt +310 -0
- data/lib/training_data/sl-utf8.txt +263 -0
- data/lib/training_data/sv-utf8.txt +174 -0
- data/lib/training_data/th-utf8.txt +49 -0
- data/lib/training_data/tk-utf8.txt +101 -0
- data/lib/training_data/todo/af.txt +114 -0
- data/lib/training_data/todo/amharic-utf.txt +95 -0
- data/lib/training_data/todo/arabic-windows1256.txt +157 -0
- data/lib/training_data/todo/armenian.txt +86 -0
- data/lib/training_data/todo/basque.txt +136 -0
- data/lib/training_data/todo/belarus-windows1251.txt +97 -0
- data/lib/training_data/todo/bosnian.txt +97 -0
- data/lib/training_data/todo/breton.txt +159 -0
- data/lib/training_data/todo/bulgarian-iso8859_5.txt +115 -0
- data/lib/training_data/todo/catalan.txt +93 -0
- data/lib/training_data/todo/croatian-ascii.txt +104 -0
- data/lib/training_data/todo/esperanto.txt +95 -0
- data/lib/training_data/todo/estonian.txt +218 -0
- data/lib/training_data/todo/frisian.txt +99 -0
- data/lib/training_data/todo/georgian.txt +86 -0
- data/lib/training_data/todo/greek-iso8859-7.txt +139 -0
- data/lib/training_data/todo/hawaian.txt +108 -0
- data/lib/training_data/todo/hebrew-iso8859_8.txt +79 -0
- data/lib/training_data/todo/hindi.txt +77 -0
- data/lib/training_data/todo/hungarian.txt +102 -0
- data/lib/training_data/todo/icelandic.txt +131 -0
- data/lib/training_data/todo/indonesian.txt +93 -0
- data/lib/training_data/todo/irish.txt +209 -0
- data/lib/training_data/todo/latin.txt +120 -0
- data/lib/training_data/todo/latvian.txt +126 -0
- data/lib/training_data/todo/lithuanian.txt +99 -0
- data/lib/training_data/todo/malay.txt +108 -0
- data/lib/training_data/todo/manx.txt +78 -0
- data/lib/training_data/todo/marathi.txt +100 -0
- data/lib/training_data/todo/mf.txt +100 -0
- data/lib/training_data/todo/middle_frisian.txt +102 -0
- data/lib/training_data/todo/mingo.txt +146 -0
- data/lib/training_data/todo/nepali.txt +131 -0
- data/lib/training_data/todo/persian.txt +73 -0
- data/lib/training_data/todo/quechua.txt +108 -0
- data/lib/training_data/todo/romanian.txt +103 -0
- data/lib/training_data/todo/rumantsch.txt +110 -0
- data/lib/training_data/todo/sanskrit.txt +135 -0
- data/lib/training_data/todo/scots.txt +490 -0
- data/lib/training_data/todo/scots_gaelic.txt +93 -0
- data/lib/training_data/todo/serbian-ascii.txt +121 -0
- data/lib/training_data/todo/slovak-ascii.txt +102 -0
- data/lib/training_data/todo/slovak-windows1250.txt +115 -0
- data/lib/training_data/todo/slovenian-ascii.txt +100 -0
- data/lib/training_data/todo/slovenian-iso8859_2.txt +96 -0
- data/lib/training_data/todo/sq.txt +110 -0
- data/lib/training_data/todo/swahili.txt +120 -0
- data/lib/training_data/todo/tagalog.txt +135 -0
- data/lib/training_data/todo/tamil.txt +123 -0
- data/lib/training_data/todo/turkish.txt +117 -0
- data/lib/training_data/todo/ukrainian-koi8_r.txt +214 -0
- data/lib/training_data/todo/vietnamese.txt +92 -0
- data/lib/training_data/todo/welsh.txt +148 -0
- data/lib/training_data/todo/yiddish-utf.txt +83 -0
- data/lib/training_data/uk-utf8.txt +75 -0
- data/lib/training_data/vi-utf8.txt +47 -0
- data/lib/training_data/zh-utf8.txt +228 -0
- data/test/language_detector_test.rb +78 -0
- metadata +232 -0
@@ -0,0 +1,400 @@
|
|
1
|
+
_ 7044
|
2
|
+
f 2698
|
3
|
+
] 1196
|
4
|
+
g 933
|
5
|
+
s 815
|
6
|
+
l 781
|
7
|
+
/ 698
|
8
|
+
; 662
|
9
|
+
k 645
|
10
|
+
d 635
|
11
|
+
]_ 630
|
12
|
+
f_ 611
|
13
|
+
sf 549
|
14
|
+
f] 541
|
15
|
+
n 526
|
16
|
+
t 460
|
17
|
+
o 412
|
18
|
+
j 399
|
19
|
+
f]_ 374
|
20
|
+
L 365
|
21
|
+
_; 364
|
22
|
+
x 356
|
23
|
+
_k 353
|
24
|
+
{ 337
|
25
|
+
b 323
|
26
|
+
u 321
|
27
|
+
' 303
|
28
|
+
df 299
|
29
|
+
sf] 290
|
30
|
+
_l 283
|
31
|
+
sf]_ 265
|
32
|
+
g] 245
|
33
|
+
G 232
|
34
|
+
e 231
|
35
|
+
| 227
|
36
|
+
a 222
|
37
|
+
/_ 222
|
38
|
+
L_ 211
|
39
|
+
_g 210
|
40
|
+
c 207
|
41
|
+
df_ 207
|
42
|
+
{_ 207
|
43
|
+
_c 201
|
44
|
+
g_ 201
|
45
|
+
kf 184
|
46
|
+
f/ 177
|
47
|
+
_u 171
|
48
|
+
/f 168
|
49
|
+
P 163
|
50
|
+
_/ 162
|
51
|
+
k| 161
|
52
|
+
_s 161
|
53
|
+
} 159
|
54
|
+
+ 158
|
55
|
+
h 151
|
56
|
+
fn 150
|
57
|
+
n] 145
|
58
|
+
of 141
|
59
|
+
tf 140
|
60
|
+
: 136
|
61
|
+
p 136
|
62
|
+
t_ 134
|
63
|
+
. 133
|
64
|
+
_k| 133
|
65
|
+
._ 133
|
66
|
+
_e 131
|
67
|
+
_d 131
|
68
|
+
_f 129
|
69
|
+
_._ 127
|
70
|
+
_. 127
|
71
|
+
n]_ 123
|
72
|
+
;f 119
|
73
|
+
lj 118
|
74
|
+
O 118
|
75
|
+
? 118
|
76
|
+
q 118
|
77
|
+
nf 118
|
78
|
+
y 116
|
79
|
+
_a 113
|
80
|
+
m 111
|
81
|
+
Ps 110
|
82
|
+
r 110
|
83
|
+
fg 106
|
84
|
+
w 105
|
85
|
+
sf_ 103
|
86
|
+
lg 102
|
87
|
+
g]_ 100
|
88
|
+
gf 94
|
89
|
+
_g] 91
|
90
|
+
Psf 90
|
91
|
+
x? 89
|
92
|
+
fd 88
|
93
|
+
fO 88
|
94
|
+
_lj 85
|
95
|
+
]s 85
|
96
|
+
z 85
|
97
|
+
s_ 84
|
98
|
+
_p 82
|
99
|
+
_sf 80
|
100
|
+
D 79
|
101
|
+
kl 78
|
102
|
+
_;f 78
|
103
|
+
f/_ 77
|
104
|
+
cf 77
|
105
|
+
}_ 76
|
106
|
+
Psf] 76
|
107
|
+
jf 76
|
108
|
+
, 75
|
109
|
+
fl 75
|
110
|
+
\ 75
|
111
|
+
_x 74
|
112
|
+
kfn 74
|
113
|
+
f{ 74
|
114
|
+
_/f 74
|
115
|
+
]k 73
|
116
|
+
of] 73
|
117
|
+
,_ 72
|
118
|
+
v 72
|
119
|
+
lt 72
|
120
|
+
_cf 72
|
121
|
+
_b 71
|
122
|
+
O{ 70
|
123
|
+
i 69
|
124
|
+
xf 68
|
125
|
+
]kf 68
|
126
|
+
_/_ 68
|
127
|
+
Psf]_ 68
|
128
|
+
_h 67
|
129
|
+
g]kfn 67
|
130
|
+
g]k 67
|
131
|
+
d_ 67
|
132
|
+
g]kf 67
|
133
|
+
O{_ 67
|
134
|
+
]kfn 67
|
135
|
+
_g]k 66
|
136
|
+
g' 66
|
137
|
+
_g]kf 66
|
138
|
+
o_ 64
|
139
|
+
" 64
|
140
|
+
;b 63
|
141
|
+
Gq 63
|
142
|
+
f+ 63
|
143
|
+
n_ 63
|
144
|
+
fk 62
|
145
|
+
sf/ 62
|
146
|
+
/L 62
|
147
|
+
_kl 62
|
148
|
+
I 61
|
149
|
+
T 61
|
150
|
+
/s 61
|
151
|
+
If 61
|
152
|
+
dG 60
|
153
|
+
_j 60
|
154
|
+
]sf 60
|
155
|
+
;_ 59
|
156
|
+
u_ 59
|
157
|
+
yf 59
|
158
|
+
dGq 58
|
159
|
+
fO{ 58
|
160
|
+
af 57
|
161
|
+
eP 57
|
162
|
+
fs 57
|
163
|
+
fO{_ 57
|
164
|
+
fj 56
|
165
|
+
f; 56
|
166
|
+
qL 56
|
167
|
+
l/ 55
|
168
|
+
\_ 55
|
169
|
+
;D 54
|
170
|
+
dGqL 54
|
171
|
+
GqL 54
|
172
|
+
;+ 53
|
173
|
+
g\ 53
|
174
|
+
/sf 52
|
175
|
+
nfO 51
|
176
|
+
_kf 51
|
177
|
+
ug 51
|
178
|
+
pg 51
|
179
|
+
:t 51
|
180
|
+
fp 50
|
181
|
+
_ug 50
|
182
|
+
u/ 50
|
183
|
+
lg_ 49
|
184
|
+
ePs 49
|
185
|
+
fdf 48
|
186
|
+
cl 48
|
187
|
+
nfO{_ 48
|
188
|
+
nfO{ 48
|
189
|
+
;/ 48
|
190
|
+
_cl 47
|
191
|
+
F 47
|
192
|
+
'/ 47
|
193
|
+
g\_ 47
|
194
|
+
_r 47
|
195
|
+
fo 47
|
196
|
+
_eP 47
|
197
|
+
]{ 46
|
198
|
+
Z 46
|
199
|
+
/L_ 46
|
200
|
+
_f_ 45
|
201
|
+
ePsf 45
|
202
|
+
gd 45
|
203
|
+
_;D 45
|
204
|
+
if 45
|
205
|
+
Gb 44
|
206
|
+
lb 44
|
207
|
+
Q 44
|
208
|
+
_n 44
|
209
|
+
fsf 44
|
210
|
+
]sf] 43
|
211
|
+
+; 43
|
212
|
+
nL 43
|
213
|
+
lx 43
|
214
|
+
_t 43
|
215
|
+
_: 42
|
216
|
+
+;b 42
|
217
|
+
_o 42
|
218
|
+
fdf_ 42
|
219
|
+
bf 42
|
220
|
+
hf 42
|
221
|
+
_lg 42
|
222
|
+
ePsf] 41
|
223
|
+
j_ 41
|
224
|
+
ln 41
|
225
|
+
ef 41
|
226
|
+
/] 41
|
227
|
+
_df 41
|
228
|
+
To 40
|
229
|
+
klg_ 40
|
230
|
+
_klg_ 40
|
231
|
+
]sf]_ 40
|
232
|
+
klg 40
|
233
|
+
_klg 40
|
234
|
+
_u/ 40
|
235
|
+
f{_ 40
|
236
|
+
_ePs 40
|
237
|
+
'g 40
|
238
|
+
Gg 39
|
239
|
+
:y 39
|
240
|
+
_g\ 39
|
241
|
+
f} 39
|
242
|
+
kf_ 39
|
243
|
+
:tf 38
|
244
|
+
]{_ 38
|
245
|
+
fpg 38
|
246
|
+
b' 38
|
247
|
+
hg 38
|
248
|
+
g]{ 38
|
249
|
+
jZ 38
|
250
|
+
_;+ 37
|
251
|
+
gs 37
|
252
|
+
wf 37
|
253
|
+
o{ 37
|
254
|
+
GqL_ 37
|
255
|
+
l_ 37
|
256
|
+
qL_ 37
|
257
|
+
dGqL_ 37
|
258
|
+
gf_ 37
|
259
|
+
]; 37
|
260
|
+
_ePsf 36
|
261
|
+
x' 36
|
262
|
+
?_ 36
|
263
|
+
! 36
|
264
|
+
fx 35
|
265
|
+
u| 34
|
266
|
+
;d 34
|
267
|
+
_lb 34
|
268
|
+
fn_ 34
|
269
|
+
_P 34
|
270
|
+
fg_ 34
|
271
|
+
= 34
|
272
|
+
bn 34
|
273
|
+
Zj 34
|
274
|
+
o; 34
|
275
|
+
g]{_ 34
|
276
|
+
_g\_ 34
|
277
|
+
kIf 33
|
278
|
+
dl 33
|
279
|
+
kI 33
|
280
|
+
ljZj 33
|
281
|
+
jZj 33
|
282
|
+
rf 33
|
283
|
+
+u 33
|
284
|
+
]d 33
|
285
|
+
ljZ 33
|
286
|
+
nL_ 32
|
287
|
+
f+; 32
|
288
|
+
fnL 32
|
289
|
+
gL 32
|
290
|
+
;/sf 32
|
291
|
+
tf_ 32
|
292
|
+
wfg 32
|
293
|
+
_;/ 32
|
294
|
+
Qm 32
|
295
|
+
fb 32
|
296
|
+
f;_ 32
|
297
|
+
ld 32
|
298
|
+
;/sf/ 32
|
299
|
+
/sf/ 32
|
300
|
+
_x' 32
|
301
|
+
;/s 32
|
302
|
+
;f+ 32
|
303
|
+
_;f+ 32
|
304
|
+
fu 32
|
305
|
+
x?_ 32
|
306
|
+
;f+; 31
|
307
|
+
_k|:t 31
|
308
|
+
s' 31
|
309
|
+
_kI 31
|
310
|
+
_k|: 31
|
311
|
+
of]_ 31
|
312
|
+
_kIf 31
|
313
|
+
_;f+; 31
|
314
|
+
]l 31
|
315
|
+
k|: 31
|
316
|
+
bg 31
|
317
|
+
_;/s 31
|
318
|
+
|: 31
|
319
|
+
f+;b 31
|
320
|
+
_;/sf 31
|
321
|
+
|:t 31
|
322
|
+
|] 31
|
323
|
+
k|:t 31
|
324
|
+
m_ 31
|
325
|
+
;f+;b 31
|
326
|
+
Jo 30
|
327
|
+
k|wf 30
|
328
|
+
|w 30
|
329
|
+
|wf 30
|
330
|
+
k|w 30
|
331
|
+
km 30
|
332
|
+
J 30
|
333
|
+
k|wfg 30
|
334
|
+
|wfg 30
|
335
|
+
]t 30
|
336
|
+
@ 30
|
337
|
+
;f] 30
|
338
|
+
b_ 29
|
339
|
+
> 29
|
340
|
+
kfn_ 29
|
341
|
+
fn] 29
|
342
|
+
gsf 29
|
343
|
+
sf+ 29
|
344
|
+
Zjf 29
|
345
|
+
]kfn_ 29
|
346
|
+
jif 29
|
347
|
+
ji 29
|
348
|
+
jf; 29
|
349
|
+
_k|w 29
|
350
|
+
fgd 29
|
351
|
+
_k|wf 29
|
352
|
+
Zjf; 29
|
353
|
+
ljZjf 28
|
354
|
+
jZjf; 28
|
355
|
+
]n 28
|
356
|
+
_z 28
|
357
|
+
_s' 28
|
358
|
+
_dG 28
|
359
|
+
|]; 28
|
360
|
+
_;d 28
|
361
|
+
jZjf 28
|
362
|
+
u|] 27
|
363
|
+
gdGq 27
|
364
|
+
gdG 27
|
365
|
+
?n 27
|
366
|
+
x'g 27
|
367
|
+
|wfgd 27
|
368
|
+
wfgd 27
|
369
|
+
gdGqL 27
|
370
|
+
+u|] 27
|
371
|
+
f+u 27
|
372
|
+
fgdG 27
|
373
|
+
u|]; 27
|
374
|
+
+u|]; 27
|
375
|
+
wfgdG 27
|
376
|
+
dfg 27
|
377
|
+
a} 27
|
378
|
+
_dGq 27
|
379
|
+
f+u| 27
|
380
|
+
fgdGq 27
|
381
|
+
sf+u| 27
|
382
|
+
_sf+u 27
|
383
|
+
f+u|] 27
|
384
|
+
+u| 27
|
385
|
+
sf+u 27
|
386
|
+
_sf+ 27
|
387
|
+
x?n 27
|
388
|
+
S 26
|
389
|
+
_To 26
|
390
|
+
]kfnL 26
|
391
|
+
_! 26
|
392
|
+
_T 26
|
393
|
+
ul 26
|
394
|
+
/f] 26
|
395
|
+
fsf] 26
|
396
|
+
af_ 26
|
397
|
+
;s 26
|
398
|
+
kfnL 26
|
399
|
+
|:tf 26
|
400
|
+
k|:tf 26
|
@@ -0,0 +1,400 @@
|
|
1
|
+
_ 22970
|
2
|
+
e 6833
|
3
|
+
n 4206
|
4
|
+
r 3516
|
5
|
+
t 3112
|
6
|
+
a 2587
|
7
|
+
s 2440
|
8
|
+
i 2112
|
9
|
+
l 1901
|
10
|
+
o 1900
|
11
|
+
n_ 1875
|
12
|
+
r_ 1761
|
13
|
+
k 1713
|
14
|
+
g 1630
|
15
|
+
en 1615
|
16
|
+
m 1508
|
17
|
+
e_ 1450
|
18
|
+
d 1444
|
19
|
+
er 1436
|
20
|
+
h 1306
|
21
|
+
t_ 1300
|
22
|
+
_h 1180
|
23
|
+
_s 1148
|
24
|
+
er_ 1105
|
25
|
+
v 982
|
26
|
+
en_ 976
|
27
|
+
an 919
|
28
|
+
. 901
|
29
|
+
._ 791
|
30
|
+
_. 781
|
31
|
+
et 770
|
32
|
+
g_ 762
|
33
|
+
_._ 726
|
34
|
+
� 725
|
35
|
+
u 719
|
36
|
+
f 709
|
37
|
+
p 702
|
38
|
+
ha 682
|
39
|
+
_ha 672
|
40
|
+
de 657
|
41
|
+
te 651
|
42
|
+
_e 621
|
43
|
+
et_ 614
|
44
|
+
re 581
|
45
|
+
ne 565
|
46
|
+
_o 554
|
47
|
+
an_ 544
|
48
|
+
ke 534
|
49
|
+
_, 522
|
50
|
+
,_ 522
|
51
|
+
, 522
|
52
|
+
_,_ 522
|
53
|
+
_f 519
|
54
|
+
_m 515
|
55
|
+
or 503
|
56
|
+
_d 483
|
57
|
+
_i 480
|
58
|
+
�_ 479
|
59
|
+
se 476
|
60
|
+
m_ 469
|
61
|
+
nn 454
|
62
|
+
b 449
|
63
|
+
me 441
|
64
|
+
� 434
|
65
|
+
_a 413
|
66
|
+
st 404
|
67
|
+
_t 398
|
68
|
+
og 380
|
69
|
+
_v 377
|
70
|
+
_og 366
|
71
|
+
ar 364
|
72
|
+
el 364
|
73
|
+
le 361
|
74
|
+
i_ 356
|
75
|
+
om 353
|
76
|
+
og_ 351
|
77
|
+
_og_ 351
|
78
|
+
li 350
|
79
|
+
_k 346
|
80
|
+
_de 339
|
81
|
+
ge 339
|
82
|
+
han 337
|
83
|
+
y 333
|
84
|
+
_han 332
|
85
|
+
ve 330
|
86
|
+
kk 323
|
87
|
+
in 311
|
88
|
+
_b 307
|
89
|
+
fo 301
|
90
|
+
j 301
|
91
|
+
il 298
|
92
|
+
_H 291
|
93
|
+
H 291
|
94
|
+
han_ 288
|
95
|
+
_han_ 288
|
96
|
+
for 287
|
97
|
+
ik 281
|
98
|
+
l_ 278
|
99
|
+
kke 277
|
100
|
+
tt 276
|
101
|
+
ti 270
|
102
|
+
ne_ 270
|
103
|
+
d_ 269
|
104
|
+
ed 269
|
105
|
+
om_ 268
|
106
|
+
nne 266
|
107
|
+
_me 264
|
108
|
+
ng 257
|
109
|
+
_er 257
|
110
|
+
_fo 256
|
111
|
+
eg 256
|
112
|
+
_se 256
|
113
|
+
_g 256
|
114
|
+
un 255
|
115
|
+
ig 255
|
116
|
+
sk 253
|
117
|
+
_er_ 252
|
118
|
+
_p 252
|
119
|
+
_for 250
|
120
|
+
ke_ 249
|
121
|
+
_n 238
|
122
|
+
_l 233
|
123
|
+
al 232
|
124
|
+
�r 222
|
125
|
+
s_ 221
|
126
|
+
ar_ 215
|
127
|
+
at 214
|
128
|
+
_en 211
|
129
|
+
he 211
|
130
|
+
pe 209
|
131
|
+
_i_ 208
|
132
|
+
am 200
|
133
|
+
es 200
|
134
|
+
si 200
|
135
|
+
enn 197
|
136
|
+
det 195
|
137
|
+
or_ 193
|
138
|
+
vi 190
|
139
|
+
ns 189
|
140
|
+
ikk 188
|
141
|
+
det_ 185
|
142
|
+
so 185
|
143
|
+
un_ 183
|
144
|
+
il_ 181
|
145
|
+
nd 181
|
146
|
+
te_ 181
|
147
|
+
"_ 180
|
148
|
+
" 180
|
149
|
+
_"_ 180
|
150
|
+
_" 180
|
151
|
+
em 179
|
152
|
+
_ti 176
|
153
|
+
kke_ 176
|
154
|
+
lig 174
|
155
|
+
ten 174
|
156
|
+
Ha 173
|
157
|
+
_Ha 173
|
158
|
+
re_ 172
|
159
|
+
ikke 168
|
160
|
+
je 165
|
161
|
+
Han 165
|
162
|
+
ter 165
|
163
|
+
_Han 165
|
164
|
+
eg_ 164
|
165
|
+
p� 164
|
166
|
+
_p� 163
|
167
|
+
_si 163
|
168
|
+
_� 163
|
169
|
+
_Han_ 162
|
170
|
+
Han_ 162
|
171
|
+
p�_ 162
|
172
|
+
_p�_ 161
|
173
|
+
til 160
|
174
|
+
som 160
|
175
|
+
_so 159
|
176
|
+
den 159
|
177
|
+
_det 157
|
178
|
+
ed_ 155
|
179
|
+
ll 155
|
180
|
+
_ik 155
|
181
|
+
rt 155
|
182
|
+
som_ 153
|
183
|
+
ra 152
|
184
|
+
a_ 152
|
185
|
+
har 152
|
186
|
+
nt 152
|
187
|
+
de_ 152
|
188
|
+
tr 151
|
189
|
+
v_ 151
|
190
|
+
_har 151
|
191
|
+
ka 151
|
192
|
+
ig_ 150
|
193
|
+
_som 150
|
194
|
+
for_ 150
|
195
|
+
_som_ 150
|
196
|
+
_en_ 149
|
197
|
+
hu 149
|
198
|
+
_ikk 148
|
199
|
+
_ham 148
|
200
|
+
ham 148
|
201
|
+
ste 148
|
202
|
+
_det_ 148
|
203
|
+
_ikke 148
|
204
|
+
enne 148
|
205
|
+
ikke_ 148
|
206
|
+
har_ 147
|
207
|
+
nge 147
|
208
|
+
D 147
|
209
|
+
_har_ 147
|
210
|
+
_D 147
|
211
|
+
am_ 147
|
212
|
+
ere 147
|
213
|
+
ham_ 146
|
214
|
+
_ham_ 146
|
215
|
+
it 145
|
216
|
+
_he 144
|
217
|
+
_til 144
|
218
|
+
av 143
|
219
|
+
va 140
|
220
|
+
men 140
|
221
|
+
�r 140
|
222
|
+
_ve 140
|
223
|
+
_hu 139
|
224
|
+
ta 139
|
225
|
+
pen 137
|
226
|
+
sp 137
|
227
|
+
_st 135
|
228
|
+
tte 135
|
229
|
+
la 135
|
230
|
+
_E 133
|
231
|
+
E 133
|
232
|
+
den_ 130
|
233
|
+
is 130
|
234
|
+
til_ 128
|
235
|
+
_r 128
|
236
|
+
tt_ 128
|
237
|
+
�r_ 127
|
238
|
+
k_ 124
|
239
|
+
_�_ 124
|
240
|
+
ri 124
|
241
|
+
_til_ 124
|
242
|
+
at_ 123
|
243
|
+
ene 123
|
244
|
+
seg 123
|
245
|
+
_av 123
|
246
|
+
med 122
|
247
|
+
_vi 122
|
248
|
+
_seg 122
|
249
|
+
seg_ 121
|
250
|
+
_seg_ 121
|
251
|
+
_for_ 120
|
252
|
+
nne_ 120
|
253
|
+
ut 120
|
254
|
+
_u 119
|
255
|
+
mm 119
|
256
|
+
mme 119
|
257
|
+
De 118
|
258
|
+
_De 118
|
259
|
+
_at 118
|
260
|
+
_hun 117
|
261
|
+
hun 117
|
262
|
+
ko 117
|
263
|
+
be 116
|
264
|
+
_at_ 115
|
265
|
+
ter_ 115
|
266
|
+
pen_ 114
|
267
|
+
ker 113
|
268
|
+
hun_ 113
|
269
|
+
_hun_ 113
|
270
|
+
on 111
|
271
|
+
lig_ 111
|
272
|
+
.. 110
|
273
|
+
hen 107
|
274
|
+
_med 107
|
275
|
+
rs 106
|
276
|
+
ser 106
|
277
|
+
med_ 105
|
278
|
+
_men 104
|
279
|
+
_hen 104
|
280
|
+
_sk 104
|
281
|
+
_med_ 104
|
282
|
+
ak 103
|
283
|
+
ans 103
|
284
|
+
ker_ 102
|
285
|
+
av_ 101
|
286
|
+
_ka 101
|
287
|
+
no 100
|
288
|
+
ver 100
|
289
|
+
ler 99
|
290
|
+
J 99
|
291
|
+
spe 99
|
292
|
+
ten_ 99
|
293
|
+
_J 99
|
294
|
+
ene_ 98
|
295
|
+
ld 98
|
296
|
+
hv 98
|
297
|
+
_av_ 98
|
298
|
+
ger 97
|
299
|
+
ni 96
|
300
|
+
gen 96
|
301
|
+
ie 95
|
302
|
+
ser_ 94
|
303
|
+
_et 94
|
304
|
+
spen 94
|
305
|
+
_hv 94
|
306
|
+
men_ 93
|
307
|
+
Espe 92
|
308
|
+
Es 92
|
309
|
+
_Esp 92
|
310
|
+
_Es 92
|
311
|
+
_Espe 92
|
312
|
+
Esp 92
|
313
|
+
_al 92
|
314
|
+
Espen 92
|
315
|
+
lle 89
|
316
|
+
rem 89
|
317
|
+
id 89
|
318
|
+
f� 89
|
319
|
+
ei 88
|
320
|
+
inn 88
|
321
|
+
rd 88
|
322
|
+
enne_ 88
|
323
|
+
_henn 87
|
324
|
+
henne 87
|
325
|
+
henn 87
|
326
|
+
kt 86
|
327
|
+
spen_ 86
|
328
|
+
_om 86
|
329
|
+
ler_ 86
|
330
|
+
da 86
|
331
|
+
ett 86
|
332
|
+
itt 86
|
333
|
+
bl 85
|
334
|
+
to 85
|
335
|
+
_Je 84
|
336
|
+
ger_ 84
|
337
|
+
Je 84
|
338
|
+
� 84
|
339
|
+
ma 83
|
340
|
+
ing 83
|
341
|
+
�r 83
|
342
|
+
ns_ 83
|
343
|
+
eli 82
|
344
|
+
ang 82
|
345
|
+
_be 82
|
346
|
+
s� 82
|
347
|
+
_den 82
|
348
|
+
pp 81
|
349
|
+
rk 81
|
350
|
+
dr 81
|
351
|
+
oe 81
|
352
|
+
ss 81
|
353
|
+
_f� 80
|
354
|
+
ek 80
|
355
|
+
le_ 79
|
356
|
+
_no 79
|
357
|
+
kj 78
|
358
|
+
elig 78
|
359
|
+
nes 78
|
360
|
+
nn_ 77
|
361
|
+
nk 77
|
362
|
+
fr 77
|
363
|
+
sl 77
|
364
|
+
my 77
|
365
|
+
kan 77
|
366
|
+
s�_ 76
|
367
|
+
as 76
|
368
|
+
_om_ 76
|
369
|
+
_kan 75
|
370
|
+
_ko 75
|
371
|
+
_bl 73
|
372
|
+
Hu 73
|
373
|
+
nen 73
|
374
|
+
_Hu 73
|
375
|
+
eng 73
|
376
|
+
gj 73
|
377
|
+
rt_ 72
|
378
|
+
ge_ 72
|
379
|
+
ba 72
|
380
|
+
lv 71
|
381
|
+
rer 71
|
382
|
+
nde 71
|
383
|
+
ls 70
|
384
|
+
lo 70
|
385
|
+
ga 70
|
386
|
+
_noe 70
|
387
|
+
ro 70
|
388
|
+
_den_ 70
|
389
|
+
_ut 70
|
390
|
+
noe 70
|
391
|
+
Hun 69
|
392
|
+
Hun_ 69
|
393
|
+
_in 69
|
394
|
+
_Hun 69
|
395
|
+
_Hun_ 69
|
396
|
+
ren 68
|
397
|
+
�re 68
|
398
|
+
�r_ 68
|
399
|
+
sen 68
|
400
|
+
sa 67
|