language_detector 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +24 -0
- data/Rakefile +18 -0
- data/VERSION +1 -0
- data/lib/language_detector.rb +232 -0
- data/lib/model-fm.yml +52504 -0
- data/lib/model-tc.yml +53985 -0
- data/lib/textcat_ngrams/afrikaans.lm +400 -0
- data/lib/textcat_ngrams/albanian.lm +400 -0
- data/lib/textcat_ngrams/amharic-utf.lm +400 -0
- data/lib/textcat_ngrams/arabic-iso8859_6.lm +400 -0
- data/lib/textcat_ngrams/arabic-windows1256.lm +400 -0
- data/lib/textcat_ngrams/armenian.lm +400 -0
- data/lib/textcat_ngrams/basque.lm +400 -0
- data/lib/textcat_ngrams/belarus-windows1251.lm +400 -0
- data/lib/textcat_ngrams/bosnian.lm +400 -0
- data/lib/textcat_ngrams/breton.lm +400 -0
- data/lib/textcat_ngrams/bulgarian-iso8859_5.lm +400 -0
- data/lib/textcat_ngrams/catalan.lm +400 -0
- data/lib/textcat_ngrams/chinese-big5.lm +400 -0
- data/lib/textcat_ngrams/chinese-gb2312.lm +400 -0
- data/lib/textcat_ngrams/croatian-ascii.lm +400 -0
- data/lib/textcat_ngrams/czech-iso8859_2.lm +400 -0
- data/lib/textcat_ngrams/danish.lm +400 -0
- data/lib/textcat_ngrams/dutch.lm +400 -0
- data/lib/textcat_ngrams/english.lm +400 -0
- data/lib/textcat_ngrams/esperanto.lm +400 -0
- data/lib/textcat_ngrams/estonian.lm +400 -0
- data/lib/textcat_ngrams/finnish.lm +400 -0
- data/lib/textcat_ngrams/french.lm +400 -0
- data/lib/textcat_ngrams/frisian.lm +400 -0
- data/lib/textcat_ngrams/georgian.lm +400 -0
- data/lib/textcat_ngrams/german.lm +400 -0
- data/lib/textcat_ngrams/greek-iso8859-7.lm +400 -0
- data/lib/textcat_ngrams/hebrew-iso8859_8.lm +400 -0
- data/lib/textcat_ngrams/hindi.lm +400 -0
- data/lib/textcat_ngrams/hungarian.lm +400 -0
- data/lib/textcat_ngrams/icelandic.lm +400 -0
- data/lib/textcat_ngrams/indonesian.lm +400 -0
- data/lib/textcat_ngrams/irish.lm +400 -0
- data/lib/textcat_ngrams/italian.lm +400 -0
- data/lib/textcat_ngrams/japanese-euc_jp.lm +400 -0
- data/lib/textcat_ngrams/japanese-shift_jis.lm +400 -0
- data/lib/textcat_ngrams/korean.lm +400 -0
- data/lib/textcat_ngrams/latin.lm +400 -0
- data/lib/textcat_ngrams/latvian.lm +400 -0
- data/lib/textcat_ngrams/lithuanian.lm +400 -0
- data/lib/textcat_ngrams/malay.lm +400 -0
- data/lib/textcat_ngrams/manx.lm +400 -0
- data/lib/textcat_ngrams/marathi.lm +400 -0
- data/lib/textcat_ngrams/mingo.lm +400 -0
- data/lib/textcat_ngrams/nepali.lm +400 -0
- data/lib/textcat_ngrams/norwegian.lm +400 -0
- data/lib/textcat_ngrams/persian.lm +400 -0
- data/lib/textcat_ngrams/polish.lm +400 -0
- data/lib/textcat_ngrams/portuguese.lm +400 -0
- data/lib/textcat_ngrams/quechua.lm +400 -0
- data/lib/textcat_ngrams/romanian.lm +400 -0
- data/lib/textcat_ngrams/rumantsch.lm +400 -0
- data/lib/textcat_ngrams/russian-iso8859_5.lm +400 -0
- data/lib/textcat_ngrams/russian-koi8_r.lm +400 -0
- data/lib/textcat_ngrams/russian-windows1251.lm +400 -0
- data/lib/textcat_ngrams/sanskrit.lm +400 -0
- data/lib/textcat_ngrams/scots.lm +400 -0
- data/lib/textcat_ngrams/scots_gaelic.lm +400 -0
- data/lib/textcat_ngrams/serbian-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovak-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovak-windows1250.lm +400 -0
- data/lib/textcat_ngrams/slovenian-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovenian-iso8859_2.lm +400 -0
- data/lib/textcat_ngrams/spanish.lm +400 -0
- data/lib/textcat_ngrams/swahili.lm +400 -0
- data/lib/textcat_ngrams/swedish.lm +400 -0
- data/lib/textcat_ngrams/tagalog.lm +400 -0
- data/lib/textcat_ngrams/tamil.lm +400 -0
- data/lib/textcat_ngrams/thai.lm +400 -0
- data/lib/textcat_ngrams/turkish.lm +400 -0
- data/lib/textcat_ngrams/ukrainian-koi8_u.lm +400 -0
- data/lib/textcat_ngrams/vietnamese.lm +400 -0
- data/lib/textcat_ngrams/welsh.lm +400 -0
- data/lib/textcat_ngrams/yiddish-utf.lm +400 -0
- data/lib/training_data/ar-utf8.txt +54 -0
- data/lib/training_data/bg-utf8.txt +26 -0
- data/lib/training_data/cs-utf8.txt +48 -0
- data/lib/training_data/da-utf8.txt +159 -0
- data/lib/training_data/de-utf8.txt +569 -0
- data/lib/training_data/el-utf8.txt +48 -0
- data/lib/training_data/en-utf8.txt +81 -0
- data/lib/training_data/es-utf8.txt +185 -0
- data/lib/training_data/et-utf8.txt +50 -0
- data/lib/training_data/fa-utf8.txt +42 -0
- data/lib/training_data/fi-utf8.txt +83 -0
- data/lib/training_data/fr-utf8.txt +191 -0
- data/lib/training_data/fy-utf8.txt +22 -0
- data/lib/training_data/ga-utf8.txt +109 -0
- data/lib/training_data/he-utf8.txt +116 -0
- data/lib/training_data/hi-utf8.txt +49 -0
- data/lib/training_data/hr-utf8.txt +80 -0
- data/lib/training_data/hu-utf8.txt +87 -0
- data/lib/training_data/io-utf8.txt +41 -0
- data/lib/training_data/is-utf8.txt +94 -0
- data/lib/training_data/it-utf8.txt +228 -0
- data/lib/training_data/ja-utf8.txt +200 -0
- data/lib/training_data/ko-utf8.txt +147 -0
- data/lib/training_data/nl-utf8.txt +215 -0
- data/lib/training_data/no-utf8.txt +281 -0
- data/lib/training_data/pl-utf8.txt +120 -0
- data/lib/training_data/pt-utf8.txt +214 -0
- data/lib/training_data/ro-utf8.txt +66 -0
- data/lib/training_data/ru-utf8.txt +310 -0
- data/lib/training_data/sl-utf8.txt +263 -0
- data/lib/training_data/sv-utf8.txt +174 -0
- data/lib/training_data/th-utf8.txt +49 -0
- data/lib/training_data/tk-utf8.txt +101 -0
- data/lib/training_data/todo/af.txt +114 -0
- data/lib/training_data/todo/amharic-utf.txt +95 -0
- data/lib/training_data/todo/arabic-windows1256.txt +157 -0
- data/lib/training_data/todo/armenian.txt +86 -0
- data/lib/training_data/todo/basque.txt +136 -0
- data/lib/training_data/todo/belarus-windows1251.txt +97 -0
- data/lib/training_data/todo/bosnian.txt +97 -0
- data/lib/training_data/todo/breton.txt +159 -0
- data/lib/training_data/todo/bulgarian-iso8859_5.txt +115 -0
- data/lib/training_data/todo/catalan.txt +93 -0
- data/lib/training_data/todo/croatian-ascii.txt +104 -0
- data/lib/training_data/todo/esperanto.txt +95 -0
- data/lib/training_data/todo/estonian.txt +218 -0
- data/lib/training_data/todo/frisian.txt +99 -0
- data/lib/training_data/todo/georgian.txt +86 -0
- data/lib/training_data/todo/greek-iso8859-7.txt +139 -0
- data/lib/training_data/todo/hawaian.txt +108 -0
- data/lib/training_data/todo/hebrew-iso8859_8.txt +79 -0
- data/lib/training_data/todo/hindi.txt +77 -0
- data/lib/training_data/todo/hungarian.txt +102 -0
- data/lib/training_data/todo/icelandic.txt +131 -0
- data/lib/training_data/todo/indonesian.txt +93 -0
- data/lib/training_data/todo/irish.txt +209 -0
- data/lib/training_data/todo/latin.txt +120 -0
- data/lib/training_data/todo/latvian.txt +126 -0
- data/lib/training_data/todo/lithuanian.txt +99 -0
- data/lib/training_data/todo/malay.txt +108 -0
- data/lib/training_data/todo/manx.txt +78 -0
- data/lib/training_data/todo/marathi.txt +100 -0
- data/lib/training_data/todo/mf.txt +100 -0
- data/lib/training_data/todo/middle_frisian.txt +102 -0
- data/lib/training_data/todo/mingo.txt +146 -0
- data/lib/training_data/todo/nepali.txt +131 -0
- data/lib/training_data/todo/persian.txt +73 -0
- data/lib/training_data/todo/quechua.txt +108 -0
- data/lib/training_data/todo/romanian.txt +103 -0
- data/lib/training_data/todo/rumantsch.txt +110 -0
- data/lib/training_data/todo/sanskrit.txt +135 -0
- data/lib/training_data/todo/scots.txt +490 -0
- data/lib/training_data/todo/scots_gaelic.txt +93 -0
- data/lib/training_data/todo/serbian-ascii.txt +121 -0
- data/lib/training_data/todo/slovak-ascii.txt +102 -0
- data/lib/training_data/todo/slovak-windows1250.txt +115 -0
- data/lib/training_data/todo/slovenian-ascii.txt +100 -0
- data/lib/training_data/todo/slovenian-iso8859_2.txt +96 -0
- data/lib/training_data/todo/sq.txt +110 -0
- data/lib/training_data/todo/swahili.txt +120 -0
- data/lib/training_data/todo/tagalog.txt +135 -0
- data/lib/training_data/todo/tamil.txt +123 -0
- data/lib/training_data/todo/turkish.txt +117 -0
- data/lib/training_data/todo/ukrainian-koi8_r.txt +214 -0
- data/lib/training_data/todo/vietnamese.txt +92 -0
- data/lib/training_data/todo/welsh.txt +148 -0
- data/lib/training_data/todo/yiddish-utf.txt +83 -0
- data/lib/training_data/uk-utf8.txt +75 -0
- data/lib/training_data/vi-utf8.txt +47 -0
- data/lib/training_data/zh-utf8.txt +228 -0
- data/test/language_detector_test.rb +78 -0
- metadata +232 -0
@@ -0,0 +1,400 @@
|
|
1
|
+
_ 11688
|
2
|
+
e 3223
|
3
|
+
a 2469
|
4
|
+
t 2269
|
5
|
+
i 1928
|
6
|
+
n 1903
|
7
|
+
r 1414
|
8
|
+
o 1406
|
9
|
+
h 1369
|
10
|
+
s 1249
|
11
|
+
l 929
|
12
|
+
n_ 890
|
13
|
+
_t 862
|
14
|
+
_a 843
|
15
|
+
d 818
|
16
|
+
e_ 798
|
17
|
+
th 704
|
18
|
+
w 661
|
19
|
+
he 625
|
20
|
+
an 612
|
21
|
+
t_ 606
|
22
|
+
u 592
|
23
|
+
_th 575
|
24
|
+
c 508
|
25
|
+
s_ 471
|
26
|
+
the 470
|
27
|
+
, 469
|
28
|
+
- 458
|
29
|
+
in 455
|
30
|
+
m 445
|
31
|
+
,_ 440
|
32
|
+
b 434
|
33
|
+
g 429
|
34
|
+
er 409
|
35
|
+
ee 408
|
36
|
+
_the 407
|
37
|
+
k 402
|
38
|
+
an_ 402
|
39
|
+
f 385
|
40
|
+
_w 378
|
41
|
+
he_ 376
|
42
|
+
the_ 364
|
43
|
+
_an 362
|
44
|
+
_o 360
|
45
|
+
y 358
|
46
|
+
_the_ 354
|
47
|
+
_s 353
|
48
|
+
_an_ 342
|
49
|
+
a_ 335
|
50
|
+
r_ 327
|
51
|
+
_b 316
|
52
|
+
d_ 303
|
53
|
+
i_ 278
|
54
|
+
en 277
|
55
|
+
p 270
|
56
|
+
ei 245
|
57
|
+
A 236
|
58
|
+
wa 232
|
59
|
+
_A 231
|
60
|
+
re 229
|
61
|
+
in_ 229
|
62
|
+
ui 218
|
63
|
+
oo 217
|
64
|
+
le 217
|
65
|
+
ai 216
|
66
|
+
et 212
|
67
|
+
ti 209
|
68
|
+
it 209
|
69
|
+
_f 206
|
70
|
+
te 204
|
71
|
+
_a_ 203
|
72
|
+
_m 202
|
73
|
+
ha 200
|
74
|
+
as 193
|
75
|
+
on 188
|
76
|
+
at 184
|
77
|
+
_i 183
|
78
|
+
_wa 183
|
79
|
+
_c 182
|
80
|
+
o_ 180
|
81
|
+
or 178
|
82
|
+
_h 176
|
83
|
+
_g 169
|
84
|
+
ch 165
|
85
|
+
A_ 159
|
86
|
+
_l 158
|
87
|
+
_A_ 157
|
88
|
+
st 156
|
89
|
+
_d 155
|
90
|
+
_ti 148
|
91
|
+
. 148
|
92
|
+
._ 146
|
93
|
+
ke 144
|
94
|
+
ti_ 143
|
95
|
+
-- 143
|
96
|
+
_o_ 142
|
97
|
+
ow 142
|
98
|
+
--- 140
|
99
|
+
ed 138
|
100
|
+
---- 138
|
101
|
+
_r 137
|
102
|
+
as_ 137
|
103
|
+
y_ 136
|
104
|
+
er_ 136
|
105
|
+
----- 136
|
106
|
+
ir 135
|
107
|
+
aa 135
|
108
|
+
la 131
|
109
|
+
een 130
|
110
|
+
ae 129
|
111
|
+
_ti_ 128
|
112
|
+
ra 126
|
113
|
+
es 125
|
114
|
+
nd 124
|
115
|
+
de 120
|
116
|
+
h_ 120
|
117
|
+
ie 120
|
118
|
+
ar 119
|
119
|
+
ll 119
|
120
|
+
nt 118
|
121
|
+
ot 118
|
122
|
+
en_ 115
|
123
|
+
ma 115
|
124
|
+
eet 113
|
125
|
+
her 112
|
126
|
+
el 112
|
127
|
+
is 112
|
128
|
+
' 112
|
129
|
+
at_ 111
|
130
|
+
ic 109
|
131
|
+
se 108
|
132
|
+
or_ 106
|
133
|
+
wu 104
|
134
|
+
me 104
|
135
|
+
ne 103
|
136
|
+
fo 102
|
137
|
+
on_ 101
|
138
|
+
was 99
|
139
|
+
_was 98
|
140
|
+
et_ 98
|
141
|
+
ri 98
|
142
|
+
_e 97
|
143
|
+
_ma 97
|
144
|
+
v 97
|
145
|
+
_n 97
|
146
|
+
! 97
|
147
|
+
li 97
|
148
|
+
ht 93
|
149
|
+
hi 92
|
150
|
+
_wu 92
|
151
|
+
ng 91
|
152
|
+
ro 91
|
153
|
+
it_ 90
|
154
|
+
ck 90
|
155
|
+
_fo 90
|
156
|
+
tha 90
|
157
|
+
k_ 89
|
158
|
+
il 89
|
159
|
+
cht 86
|
160
|
+
eet_ 86
|
161
|
+
_p 86
|
162
|
+
we 86
|
163
|
+
_was_ 85
|
164
|
+
was_ 85
|
165
|
+
rt 84
|
166
|
+
ed_ 83
|
167
|
+
ter 83
|
168
|
+
id 83
|
169
|
+
ga 82
|
170
|
+
; 82
|
171
|
+
;_ 81
|
172
|
+
ther 79
|
173
|
+
tt 76
|
174
|
+
air 76
|
175
|
+
e, 75
|
176
|
+
un 75
|
177
|
+
ho 75
|
178
|
+
for 74
|
179
|
+
ge 74
|
180
|
+
_st 73
|
181
|
+
_y 72
|
182
|
+
_he 72
|
183
|
+
wh 71
|
184
|
+
_on 71
|
185
|
+
sh 70
|
186
|
+
z 70
|
187
|
+
e,_ 69
|
188
|
+
bi 68
|
189
|
+
_tha 68
|
190
|
+
wui 67
|
191
|
+
!_ 67
|
192
|
+
ad 67
|
193
|
+
een_ 66
|
194
|
+
l_ 66
|
195
|
+
ts 66
|
196
|
+
_for 66
|
197
|
+
n, 66
|
198
|
+
_wh 65
|
199
|
+
re_ 65
|
200
|
+
be 65
|
201
|
+
eh 64
|
202
|
+
hat 64
|
203
|
+
ns 64
|
204
|
+
br 64
|
205
|
+
g_ 64
|
206
|
+
ui_ 64
|
207
|
+
rr 64
|
208
|
+
wui_ 63
|
209
|
+
ni 63
|
210
|
+
_wui 62
|
211
|
+
ay 62
|
212
|
+
s, 62
|
213
|
+
pe 61
|
214
|
+
n,_ 61
|
215
|
+
bo 61
|
216
|
+
al 61
|
217
|
+
ye 61
|
218
|
+
_bi 60
|
219
|
+
oot 60
|
220
|
+
na 60
|
221
|
+
ang 60
|
222
|
+
s,_ 59
|
223
|
+
es_ 59
|
224
|
+
ill 58
|
225
|
+
that 58
|
226
|
+
_wui_ 58
|
227
|
+
nn 58
|
228
|
+
eh_ 58
|
229
|
+
oa 57
|
230
|
+
han 57
|
231
|
+
_that 56
|
232
|
+
_br 56
|
233
|
+
ca 56
|
234
|
+
_ga 56
|
235
|
+
ng_ 56
|
236
|
+
um 55
|
237
|
+
hat_ 55
|
238
|
+
oon 55
|
239
|
+
od 55
|
240
|
+
for_ 55
|
241
|
+
no 55
|
242
|
+
ree 55
|
243
|
+
_for_ 54
|
244
|
+
_le 54
|
245
|
+
ht_ 54
|
246
|
+
ot_ 54
|
247
|
+
_k 53
|
248
|
+
rd 53
|
249
|
+
ki 53
|
250
|
+
aw 53
|
251
|
+
nd_ 53
|
252
|
+
_on_ 53
|
253
|
+
_it 53
|
254
|
+
ik 53
|
255
|
+
t, 53
|
256
|
+
_be 52
|
257
|
+
that_ 52
|
258
|
+
ve 52
|
259
|
+
rn 52
|
260
|
+
's 51
|
261
|
+
au 51
|
262
|
+
co 51
|
263
|
+
ich 51
|
264
|
+
to 51
|
265
|
+
lo 51
|
266
|
+
t,_ 51
|
267
|
+
ea 51
|
268
|
+
tee 51
|
269
|
+
lan 50
|
270
|
+
fi 50
|
271
|
+
_at 50
|
272
|
+
am 50
|
273
|
+
_in 50
|
274
|
+
ere 50
|
275
|
+
ur 50
|
276
|
+
le_ 50
|
277
|
+
nt_ 49
|
278
|
+
's_ 49
|
279
|
+
hin 49
|
280
|
+
yi 49
|
281
|
+
hr 49
|
282
|
+
ts_ 49
|
283
|
+
_ca 48
|
284
|
+
" 48
|
285
|
+
ta 48
|
286
|
+
cht_ 48
|
287
|
+
-_ 48
|
288
|
+
_as 47
|
289
|
+
T 47
|
290
|
+
ang_ 47
|
291
|
+
lei 46
|
292
|
+
_ma_ 46
|
293
|
+
tr 46
|
294
|
+
_ro 46
|
295
|
+
fe 46
|
296
|
+
ma_ 46
|
297
|
+
icht 46
|
298
|
+
_as_ 46
|
299
|
+
der 46
|
300
|
+
cl 46
|
301
|
+
e- 45
|
302
|
+
n- 45
|
303
|
+
thr 45
|
304
|
+
ba 45
|
305
|
+
m_ 45
|
306
|
+
st_ 45
|
307
|
+
rt_ 45
|
308
|
+
_u 45
|
309
|
+
do 45
|
310
|
+
_T 45
|
311
|
+
im 44
|
312
|
+
_se 44
|
313
|
+
sk 44
|
314
|
+
_la 44
|
315
|
+
eik 44
|
316
|
+
bit 43
|
317
|
+
ike 43
|
318
|
+
B 43
|
319
|
+
kee 43
|
320
|
+
tte 43
|
321
|
+
di 43
|
322
|
+
eed 43
|
323
|
+
_B 42
|
324
|
+
_aa 42
|
325
|
+
her_ 42
|
326
|
+
da 42
|
327
|
+
ff 42
|
328
|
+
tu 42
|
329
|
+
ie_ 42
|
330
|
+
_cl 42
|
331
|
+
_ba 42
|
332
|
+
oot_ 42
|
333
|
+
bu 41
|
334
|
+
eike 41
|
335
|
+
oc 41
|
336
|
+
hu 41
|
337
|
+
_thr 41
|
338
|
+
ther_ 41
|
339
|
+
_co 41
|
340
|
+
aa_ 41
|
341
|
+
so 41
|
342
|
+
_me 41
|
343
|
+
H 41
|
344
|
+
_H 40
|
345
|
+
ke_ 40
|
346
|
+
ert 40
|
347
|
+
lu 40
|
348
|
+
ist 40
|
349
|
+
si 40
|
350
|
+
iz 40
|
351
|
+
ar_ 39
|
352
|
+
uc 39
|
353
|
+
thi 39
|
354
|
+
ad_ 39
|
355
|
+
ru 39
|
356
|
+
owe 39
|
357
|
+
gi 38
|
358
|
+
_bit 38
|
359
|
+
_do 38
|
360
|
+
int 38
|
361
|
+
bl 38
|
362
|
+
ld 38
|
363
|
+
_at_ 38
|
364
|
+
lt 38
|
365
|
+
ac 38
|
366
|
+
_ha 38
|
367
|
+
ae_ 38
|
368
|
+
rs 37
|
369
|
+
here 37
|
370
|
+
ei_ 37
|
371
|
+
han_ 37
|
372
|
+
p_ 37
|
373
|
+
is_ 37
|
374
|
+
eth 37
|
375
|
+
fa 37
|
376
|
+
_sk 37
|
377
|
+
ll_ 37
|
378
|
+
ss 36
|
379
|
+
bra 36
|
380
|
+
wha 36
|
381
|
+
gl 36
|
382
|
+
ck_ 36
|
383
|
+
pl 36
|
384
|
+
lin 36
|
385
|
+
ir_ 36
|
386
|
+
ab 36
|
387
|
+
_ther 36
|
388
|
+
_da 35
|
389
|
+
ce 35
|
390
|
+
rin 35
|
391
|
+
_oo 35
|
392
|
+
rl 35
|
393
|
+
wee 35
|
394
|
+
and 35
|
395
|
+
sa 35
|
396
|
+
_yi 35
|
397
|
+
_bra 35
|
398
|
+
'd 35
|
399
|
+
ds 35
|
400
|
+
_bo 35
|
@@ -0,0 +1,400 @@
|
|
1
|
+
_ 12634
|
2
|
+
a 5353
|
3
|
+
h 3268
|
4
|
+
i 2898
|
5
|
+
n 2792
|
6
|
+
e 1651
|
7
|
+
r 1563
|
8
|
+
d 1455
|
9
|
+
_a 1425
|
10
|
+
c 1245
|
11
|
+
n_ 1236
|
12
|
+
s 1165
|
13
|
+
l 1152
|
14
|
+
an 1121
|
15
|
+
t 980
|
16
|
+
ai 979
|
17
|
+
g 962
|
18
|
+
u 905
|
19
|
+
ch 902
|
20
|
+
ha 836
|
21
|
+
h_ 833
|
22
|
+
a_ 829
|
23
|
+
ea 821
|
24
|
+
o 794
|
25
|
+
dh 726
|
26
|
+
an_ 711
|
27
|
+
b 639
|
28
|
+
m 585
|
29
|
+
na 514
|
30
|
+
nn 506
|
31
|
+
ac 498
|
32
|
+
r_ 495
|
33
|
+
s_ 482
|
34
|
+
ir 480
|
35
|
+
ach 466
|
36
|
+
id 458
|
37
|
+
_an 450
|
38
|
+
_c 427
|
39
|
+
th 403
|
40
|
+
� 388
|
41
|
+
he 383
|
42
|
+
in 379
|
43
|
+
bh 367
|
44
|
+
idh 358
|
45
|
+
ad 342
|
46
|
+
_n 341
|
47
|
+
il 332
|
48
|
+
nn_ 323
|
49
|
+
_t 322
|
50
|
+
_d 319
|
51
|
+
ar 317
|
52
|
+
e_ 311
|
53
|
+
dh_ 307
|
54
|
+
_an_ 303
|
55
|
+
_b 302
|
56
|
+
_na 294
|
57
|
+
air 289
|
58
|
+
ig 279
|
59
|
+
. 275
|
60
|
+
ir_ 272
|
61
|
+
ag 272
|
62
|
+
_ai 272
|
63
|
+
, 270
|
64
|
+
gu 269
|
65
|
+
,_ 269
|
66
|
+
._ 265
|
67
|
+
ean 264
|
68
|
+
ch_ 261
|
69
|
+
f 259
|
70
|
+
? 258
|
71
|
+
_s 255
|
72
|
+
ann 250
|
73
|
+
ra 241
|
74
|
+
ei 241
|
75
|
+
_a_ 241
|
76
|
+
ha_ 241
|
77
|
+
d_ 238
|
78
|
+
- 235
|
79
|
+
_m 231
|
80
|
+
gh 230
|
81
|
+
hea 228
|
82
|
+
le 226
|
83
|
+
_f 224
|
84
|
+
ui 223
|
85
|
+
is 223
|
86
|
+
as 218
|
87
|
+
adh 218
|
88
|
+
l_ 216
|
89
|
+
g_ 208
|
90
|
+
�i 207
|
91
|
+
� 207
|
92
|
+
hai 205
|
93
|
+
cha 205
|
94
|
+
air_ 204
|
95
|
+
na_ 201
|
96
|
+
inn 198
|
97
|
+
tha 190
|
98
|
+
C 189
|
99
|
+
G 188
|
100
|
+
ann_ 187
|
101
|
+
_ag 186
|
102
|
+
_air 186
|
103
|
+
eac 185
|
104
|
+
_g 185
|
105
|
+
_na_ 184
|
106
|
+
ach_ 184
|
107
|
+
_C 183
|
108
|
+
us 183
|
109
|
+
_ch 183
|
110
|
+
la 182
|
111
|
+
_G 182
|
112
|
+
each 181
|
113
|
+
us_ 178
|
114
|
+
al 178
|
115
|
+
gus 176
|
116
|
+
gus_ 176
|
117
|
+
_th 169
|
118
|
+
_air_ 168
|
119
|
+
_agus 167
|
120
|
+
agus_ 167
|
121
|
+
_agu 167
|
122
|
+
agus 167
|
123
|
+
agu 167
|
124
|
+
ta 164
|
125
|
+
aid 163
|
126
|
+
hi 163
|
127
|
+
hd 163
|
128
|
+
chd 160
|
129
|
+
T 157
|
130
|
+
A 156
|
131
|
+
ic 152
|
132
|
+
_T 152
|
133
|
+
adh_ 150
|
134
|
+
idh_ 148
|
135
|
+
mh 147
|
136
|
+
?_ 146
|
137
|
+
ar_ 145
|
138
|
+
oi 144
|
139
|
+
da 143
|
140
|
+
aidh 143
|
141
|
+
_bh 139
|
142
|
+
ean_ 138
|
143
|
+
sa 138
|
144
|
+
ig_ 138
|
145
|
+
_r 136
|
146
|
+
_A 134
|
147
|
+
� 134
|
148
|
+
te 134
|
149
|
+
achd 131
|
150
|
+
hu 131
|
151
|
+
_e 130
|
152
|
+
aig 130
|
153
|
+
_l 130
|
154
|
+
_ann 129
|
155
|
+
ain 127
|
156
|
+
ne 127
|
157
|
+
dhe 125
|
158
|
+
_dh 125
|
159
|
+
�id 123
|
160
|
+
o_ 121
|
161
|
+
hl 119
|
162
|
+
acha 119
|
163
|
+
ga 118
|
164
|
+
�idh 118
|
165
|
+
on 118
|
166
|
+
it 117
|
167
|
+
aidh_ 116
|
168
|
+
de 115
|
169
|
+
nan 115
|
170
|
+
ua 115
|
171
|
+
_ann_ 115
|
172
|
+
ich 115
|
173
|
+
il_ 114
|
174
|
+
m_ 114
|
175
|
+
eil 114
|
176
|
+
ri 112
|
177
|
+
at 112
|
178
|
+
ma 111
|
179
|
+
li 109
|
180
|
+
ao 109
|
181
|
+
re 109
|
182
|
+
inn_ 108
|
183
|
+
_tha 107
|
184
|
+
fh 106
|
185
|
+
as_ 106
|
186
|
+
bh_ 106
|
187
|
+
nan_ 103
|
188
|
+
lea 103
|
189
|
+
lt 103
|
190
|
+
S 103
|
191
|
+
a? 103
|
192
|
+
a?_ 102
|
193
|
+
io 102
|
194
|
+
E 101
|
195
|
+
am 101
|
196
|
+
' 100
|
197
|
+
_a? 100
|
198
|
+
igh 100
|
199
|
+
_a?_ 99
|
200
|
+
_gu 99
|
201
|
+
idhe 99
|
202
|
+
t_ 99
|
203
|
+
se 99
|
204
|
+
si 98
|
205
|
+
ba 97
|
206
|
+
� 97
|
207
|
+
tha_ 96
|
208
|
+
bha 95
|
209
|
+
B 94
|
210
|
+
is_ 94
|
211
|
+
u_ 94
|
212
|
+
_B 94
|
213
|
+
_i 93
|
214
|
+
ile 92
|
215
|
+
aic 91
|
216
|
+
hei 91
|
217
|
+
ia 90
|
218
|
+
ho 89
|
219
|
+
Th 88
|
220
|
+
ath 88
|
221
|
+
_Th 88
|
222
|
+
rt 87
|
223
|
+
ib 87
|
224
|
+
G�id 86
|
225
|
+
_G�i 86
|
226
|
+
_G� 86
|
227
|
+
G�i 86
|
228
|
+
�r 86
|
229
|
+
G� 86
|
230
|
+
G�idh 86
|
231
|
+
_G�id 86
|
232
|
+
had 85
|
233
|
+
ibh 85
|
234
|
+
_fh 85
|
235
|
+
p 84
|
236
|
+
ad_ 83
|
237
|
+
_? 83
|
238
|
+
_E 83
|
239
|
+
hd_ 82
|
240
|
+
dhea 82
|
241
|
+
chd_ 82
|
242
|
+
ear 81
|
243
|
+
ith 81
|
244
|
+
_tha_ 80
|
245
|
+
h- 79
|
246
|
+
eal 78
|
247
|
+
hean 78
|
248
|
+
sg 77
|
249
|
+
rea 77
|
250
|
+
_S 76
|
251
|
+
ais 75
|
252
|
+
ll 75
|
253
|
+
han 74
|
254
|
+
h� 74
|
255
|
+
achd_ 74
|
256
|
+
ead 74
|
257
|
+
idhea 73
|
258
|
+
am_ 72
|
259
|
+
dha 72
|
260
|
+
_nan 71
|
261
|
+
_nan_ 71
|
262
|
+
hadh 71
|
263
|
+
gh_ 71
|
264
|
+
ail 70
|
265
|
+
hui 70
|
266
|
+
Ch 69
|
267
|
+
eachd 69
|
268
|
+
h. 69
|
269
|
+
aich 69
|
270
|
+
hli 69
|
271
|
+
chai 69
|
272
|
+
om 68
|
273
|
+
fa 68
|
274
|
+
chad 68
|
275
|
+
I 67
|
276
|
+
h._ 67
|
277
|
+
_Ch 67
|
278
|
+
tea 67
|
279
|
+
nea 66
|
280
|
+
chadh 66
|
281
|
+
achad 66
|
282
|
+
rai 66
|
283
|
+
lig 66
|
284
|
+
haid 66
|
285
|
+
dea 66
|
286
|
+
rt_ 65
|
287
|
+
�r 65
|
288
|
+
dhl 65
|
289
|
+
ana 64
|
290
|
+
eann 64
|
291
|
+
Ei 64
|
292
|
+
le_ 64
|
293
|
+
hn 64
|
294
|
+
ilt 64
|
295
|
+
uid 64
|
296
|
+
_fa 63
|
297
|
+
_Tha 63
|
298
|
+
Tha 63
|
299
|
+
ob 63
|
300
|
+
_si 62
|
301
|
+
ro 62
|
302
|
+
cu 62
|
303
|
+
ainn 62
|
304
|
+
un 62
|
305
|
+
dhli 61
|
306
|
+
idhli 61
|
307
|
+
lean 61
|
308
|
+
idhl 61
|
309
|
+
�idhl 61
|
310
|
+
hlig 61
|
311
|
+
dhlig 61
|
312
|
+
in_ 60
|
313
|
+
_� 60
|
314
|
+
st 60
|
315
|
+
rr 60
|
316
|
+
_cu 60
|
317
|
+
hr 60
|
318
|
+
_aig 60
|
319
|
+
bhe 59
|
320
|
+
i_ 59
|
321
|
+
aigh 59
|
322
|
+
Tha_ 59
|
323
|
+
� 59
|
324
|
+
_ri 59
|
325
|
+
_Tha_ 59
|
326
|
+
lb 58
|
327
|
+
che 58
|
328
|
+
ran 58
|
329
|
+
nac 58
|
330
|
+
haidh 58
|
331
|
+
hadh_ 58
|
332
|
+
aig_ 58
|
333
|
+
Gh 58
|
334
|
+
ilea 58
|
335
|
+
_Gh 58
|
336
|
+
lte 58
|
337
|
+
_le 58
|
338
|
+
ru 58
|
339
|
+
�idhe 57
|
340
|
+
_I 57
|
341
|
+
ilte 57
|
342
|
+
eadh 57
|
343
|
+
M 56
|
344
|
+
hlig_ 56
|
345
|
+
L 56
|
346
|
+
chu 56
|
347
|
+
nach 56
|
348
|
+
_ma 56
|
349
|
+
lig_ 56
|
350
|
+
h,_ 55
|
351
|
+
th_ 55
|
352
|
+
ibh_ 55
|
353
|
+
_aig_ 55
|
354
|
+
D 55
|
355
|
+
atha 55
|
356
|
+
_Ei 55
|
357
|
+
h, 55
|
358
|
+
gu_ 54
|
359
|
+
_gu_ 54
|
360
|
+
im 54
|
361
|
+
eil_ 54
|
362
|
+
eu 53
|
363
|
+
_M 53
|
364
|
+
Al 53
|
365
|
+
irt 53
|
366
|
+
_L 53
|
367
|
+
iad 53
|
368
|
+
sea 52
|
369
|
+
lba 52
|
370
|
+
Alba 52
|
371
|
+
F 52
|
372
|
+
Alb 52
|
373
|
+
uai 52
|
374
|
+
ich_ 52
|
375
|
+
_F 51
|
376
|
+
ilean 51
|
377
|
+
has 51
|
378
|
+
tai 51
|
379
|
+
each_ 50
|
380
|
+
eacha 50
|
381
|
+
har 50
|
382
|
+
ni 50
|
383
|
+
_de 50
|
384
|
+
irt_ 50
|
385
|
+
n,_ 50
|
386
|
+
mha 50
|
387
|
+
n, 50
|
388
|
+
_e_ 50
|
389
|
+
ide 49
|
390
|
+
neach 49
|
391
|
+
neac 49
|
392
|
+
ur 49
|
393
|
+
rd 49
|
394
|
+
_h 49
|
395
|
+
hean_ 49
|
396
|
+
oc 49
|
397
|
+
e� 49
|
398
|
+
te_ 49
|
399
|
+
han_ 49
|
400
|
+
on_ 49
|