language_detector 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +24 -0
- data/Rakefile +18 -0
- data/VERSION +1 -0
- data/lib/language_detector.rb +232 -0
- data/lib/model-fm.yml +52504 -0
- data/lib/model-tc.yml +53985 -0
- data/lib/textcat_ngrams/afrikaans.lm +400 -0
- data/lib/textcat_ngrams/albanian.lm +400 -0
- data/lib/textcat_ngrams/amharic-utf.lm +400 -0
- data/lib/textcat_ngrams/arabic-iso8859_6.lm +400 -0
- data/lib/textcat_ngrams/arabic-windows1256.lm +400 -0
- data/lib/textcat_ngrams/armenian.lm +400 -0
- data/lib/textcat_ngrams/basque.lm +400 -0
- data/lib/textcat_ngrams/belarus-windows1251.lm +400 -0
- data/lib/textcat_ngrams/bosnian.lm +400 -0
- data/lib/textcat_ngrams/breton.lm +400 -0
- data/lib/textcat_ngrams/bulgarian-iso8859_5.lm +400 -0
- data/lib/textcat_ngrams/catalan.lm +400 -0
- data/lib/textcat_ngrams/chinese-big5.lm +400 -0
- data/lib/textcat_ngrams/chinese-gb2312.lm +400 -0
- data/lib/textcat_ngrams/croatian-ascii.lm +400 -0
- data/lib/textcat_ngrams/czech-iso8859_2.lm +400 -0
- data/lib/textcat_ngrams/danish.lm +400 -0
- data/lib/textcat_ngrams/dutch.lm +400 -0
- data/lib/textcat_ngrams/english.lm +400 -0
- data/lib/textcat_ngrams/esperanto.lm +400 -0
- data/lib/textcat_ngrams/estonian.lm +400 -0
- data/lib/textcat_ngrams/finnish.lm +400 -0
- data/lib/textcat_ngrams/french.lm +400 -0
- data/lib/textcat_ngrams/frisian.lm +400 -0
- data/lib/textcat_ngrams/georgian.lm +400 -0
- data/lib/textcat_ngrams/german.lm +400 -0
- data/lib/textcat_ngrams/greek-iso8859-7.lm +400 -0
- data/lib/textcat_ngrams/hebrew-iso8859_8.lm +400 -0
- data/lib/textcat_ngrams/hindi.lm +400 -0
- data/lib/textcat_ngrams/hungarian.lm +400 -0
- data/lib/textcat_ngrams/icelandic.lm +400 -0
- data/lib/textcat_ngrams/indonesian.lm +400 -0
- data/lib/textcat_ngrams/irish.lm +400 -0
- data/lib/textcat_ngrams/italian.lm +400 -0
- data/lib/textcat_ngrams/japanese-euc_jp.lm +400 -0
- data/lib/textcat_ngrams/japanese-shift_jis.lm +400 -0
- data/lib/textcat_ngrams/korean.lm +400 -0
- data/lib/textcat_ngrams/latin.lm +400 -0
- data/lib/textcat_ngrams/latvian.lm +400 -0
- data/lib/textcat_ngrams/lithuanian.lm +400 -0
- data/lib/textcat_ngrams/malay.lm +400 -0
- data/lib/textcat_ngrams/manx.lm +400 -0
- data/lib/textcat_ngrams/marathi.lm +400 -0
- data/lib/textcat_ngrams/mingo.lm +400 -0
- data/lib/textcat_ngrams/nepali.lm +400 -0
- data/lib/textcat_ngrams/norwegian.lm +400 -0
- data/lib/textcat_ngrams/persian.lm +400 -0
- data/lib/textcat_ngrams/polish.lm +400 -0
- data/lib/textcat_ngrams/portuguese.lm +400 -0
- data/lib/textcat_ngrams/quechua.lm +400 -0
- data/lib/textcat_ngrams/romanian.lm +400 -0
- data/lib/textcat_ngrams/rumantsch.lm +400 -0
- data/lib/textcat_ngrams/russian-iso8859_5.lm +400 -0
- data/lib/textcat_ngrams/russian-koi8_r.lm +400 -0
- data/lib/textcat_ngrams/russian-windows1251.lm +400 -0
- data/lib/textcat_ngrams/sanskrit.lm +400 -0
- data/lib/textcat_ngrams/scots.lm +400 -0
- data/lib/textcat_ngrams/scots_gaelic.lm +400 -0
- data/lib/textcat_ngrams/serbian-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovak-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovak-windows1250.lm +400 -0
- data/lib/textcat_ngrams/slovenian-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovenian-iso8859_2.lm +400 -0
- data/lib/textcat_ngrams/spanish.lm +400 -0
- data/lib/textcat_ngrams/swahili.lm +400 -0
- data/lib/textcat_ngrams/swedish.lm +400 -0
- data/lib/textcat_ngrams/tagalog.lm +400 -0
- data/lib/textcat_ngrams/tamil.lm +400 -0
- data/lib/textcat_ngrams/thai.lm +400 -0
- data/lib/textcat_ngrams/turkish.lm +400 -0
- data/lib/textcat_ngrams/ukrainian-koi8_u.lm +400 -0
- data/lib/textcat_ngrams/vietnamese.lm +400 -0
- data/lib/textcat_ngrams/welsh.lm +400 -0
- data/lib/textcat_ngrams/yiddish-utf.lm +400 -0
- data/lib/training_data/ar-utf8.txt +54 -0
- data/lib/training_data/bg-utf8.txt +26 -0
- data/lib/training_data/cs-utf8.txt +48 -0
- data/lib/training_data/da-utf8.txt +159 -0
- data/lib/training_data/de-utf8.txt +569 -0
- data/lib/training_data/el-utf8.txt +48 -0
- data/lib/training_data/en-utf8.txt +81 -0
- data/lib/training_data/es-utf8.txt +185 -0
- data/lib/training_data/et-utf8.txt +50 -0
- data/lib/training_data/fa-utf8.txt +42 -0
- data/lib/training_data/fi-utf8.txt +83 -0
- data/lib/training_data/fr-utf8.txt +191 -0
- data/lib/training_data/fy-utf8.txt +22 -0
- data/lib/training_data/ga-utf8.txt +109 -0
- data/lib/training_data/he-utf8.txt +116 -0
- data/lib/training_data/hi-utf8.txt +49 -0
- data/lib/training_data/hr-utf8.txt +80 -0
- data/lib/training_data/hu-utf8.txt +87 -0
- data/lib/training_data/io-utf8.txt +41 -0
- data/lib/training_data/is-utf8.txt +94 -0
- data/lib/training_data/it-utf8.txt +228 -0
- data/lib/training_data/ja-utf8.txt +200 -0
- data/lib/training_data/ko-utf8.txt +147 -0
- data/lib/training_data/nl-utf8.txt +215 -0
- data/lib/training_data/no-utf8.txt +281 -0
- data/lib/training_data/pl-utf8.txt +120 -0
- data/lib/training_data/pt-utf8.txt +214 -0
- data/lib/training_data/ro-utf8.txt +66 -0
- data/lib/training_data/ru-utf8.txt +310 -0
- data/lib/training_data/sl-utf8.txt +263 -0
- data/lib/training_data/sv-utf8.txt +174 -0
- data/lib/training_data/th-utf8.txt +49 -0
- data/lib/training_data/tk-utf8.txt +101 -0
- data/lib/training_data/todo/af.txt +114 -0
- data/lib/training_data/todo/amharic-utf.txt +95 -0
- data/lib/training_data/todo/arabic-windows1256.txt +157 -0
- data/lib/training_data/todo/armenian.txt +86 -0
- data/lib/training_data/todo/basque.txt +136 -0
- data/lib/training_data/todo/belarus-windows1251.txt +97 -0
- data/lib/training_data/todo/bosnian.txt +97 -0
- data/lib/training_data/todo/breton.txt +159 -0
- data/lib/training_data/todo/bulgarian-iso8859_5.txt +115 -0
- data/lib/training_data/todo/catalan.txt +93 -0
- data/lib/training_data/todo/croatian-ascii.txt +104 -0
- data/lib/training_data/todo/esperanto.txt +95 -0
- data/lib/training_data/todo/estonian.txt +218 -0
- data/lib/training_data/todo/frisian.txt +99 -0
- data/lib/training_data/todo/georgian.txt +86 -0
- data/lib/training_data/todo/greek-iso8859-7.txt +139 -0
- data/lib/training_data/todo/hawaian.txt +108 -0
- data/lib/training_data/todo/hebrew-iso8859_8.txt +79 -0
- data/lib/training_data/todo/hindi.txt +77 -0
- data/lib/training_data/todo/hungarian.txt +102 -0
- data/lib/training_data/todo/icelandic.txt +131 -0
- data/lib/training_data/todo/indonesian.txt +93 -0
- data/lib/training_data/todo/irish.txt +209 -0
- data/lib/training_data/todo/latin.txt +120 -0
- data/lib/training_data/todo/latvian.txt +126 -0
- data/lib/training_data/todo/lithuanian.txt +99 -0
- data/lib/training_data/todo/malay.txt +108 -0
- data/lib/training_data/todo/manx.txt +78 -0
- data/lib/training_data/todo/marathi.txt +100 -0
- data/lib/training_data/todo/mf.txt +100 -0
- data/lib/training_data/todo/middle_frisian.txt +102 -0
- data/lib/training_data/todo/mingo.txt +146 -0
- data/lib/training_data/todo/nepali.txt +131 -0
- data/lib/training_data/todo/persian.txt +73 -0
- data/lib/training_data/todo/quechua.txt +108 -0
- data/lib/training_data/todo/romanian.txt +103 -0
- data/lib/training_data/todo/rumantsch.txt +110 -0
- data/lib/training_data/todo/sanskrit.txt +135 -0
- data/lib/training_data/todo/scots.txt +490 -0
- data/lib/training_data/todo/scots_gaelic.txt +93 -0
- data/lib/training_data/todo/serbian-ascii.txt +121 -0
- data/lib/training_data/todo/slovak-ascii.txt +102 -0
- data/lib/training_data/todo/slovak-windows1250.txt +115 -0
- data/lib/training_data/todo/slovenian-ascii.txt +100 -0
- data/lib/training_data/todo/slovenian-iso8859_2.txt +96 -0
- data/lib/training_data/todo/sq.txt +110 -0
- data/lib/training_data/todo/swahili.txt +120 -0
- data/lib/training_data/todo/tagalog.txt +135 -0
- data/lib/training_data/todo/tamil.txt +123 -0
- data/lib/training_data/todo/turkish.txt +117 -0
- data/lib/training_data/todo/ukrainian-koi8_r.txt +214 -0
- data/lib/training_data/todo/vietnamese.txt +92 -0
- data/lib/training_data/todo/welsh.txt +148 -0
- data/lib/training_data/todo/yiddish-utf.txt +83 -0
- data/lib/training_data/uk-utf8.txt +75 -0
- data/lib/training_data/vi-utf8.txt +47 -0
- data/lib/training_data/zh-utf8.txt +228 -0
- data/test/language_detector_test.rb +78 -0
- metadata +232 -0
@@ -0,0 +1,400 @@
|
|
1
|
+
_ 11688
|
2
|
+
e 3223
|
3
|
+
a 2469
|
4
|
+
t 2269
|
5
|
+
i 1928
|
6
|
+
n 1903
|
7
|
+
r 1414
|
8
|
+
o 1406
|
9
|
+
h 1369
|
10
|
+
s 1249
|
11
|
+
l 929
|
12
|
+
n_ 890
|
13
|
+
_t 862
|
14
|
+
_a 843
|
15
|
+
d 818
|
16
|
+
e_ 798
|
17
|
+
th 704
|
18
|
+
w 661
|
19
|
+
he 625
|
20
|
+
an 612
|
21
|
+
t_ 606
|
22
|
+
u 592
|
23
|
+
_th 575
|
24
|
+
c 508
|
25
|
+
s_ 471
|
26
|
+
the 470
|
27
|
+
, 469
|
28
|
+
- 458
|
29
|
+
in 455
|
30
|
+
m 445
|
31
|
+
,_ 440
|
32
|
+
b 434
|
33
|
+
g 429
|
34
|
+
er 409
|
35
|
+
ee 408
|
36
|
+
_the 407
|
37
|
+
k 402
|
38
|
+
an_ 402
|
39
|
+
f 385
|
40
|
+
_w 378
|
41
|
+
he_ 376
|
42
|
+
the_ 364
|
43
|
+
_an 362
|
44
|
+
_o 360
|
45
|
+
y 358
|
46
|
+
_the_ 354
|
47
|
+
_s 353
|
48
|
+
_an_ 342
|
49
|
+
a_ 335
|
50
|
+
r_ 327
|
51
|
+
_b 316
|
52
|
+
d_ 303
|
53
|
+
i_ 278
|
54
|
+
en 277
|
55
|
+
p 270
|
56
|
+
ei 245
|
57
|
+
A 236
|
58
|
+
wa 232
|
59
|
+
_A 231
|
60
|
+
re 229
|
61
|
+
in_ 229
|
62
|
+
ui 218
|
63
|
+
oo 217
|
64
|
+
le 217
|
65
|
+
ai 216
|
66
|
+
et 212
|
67
|
+
ti 209
|
68
|
+
it 209
|
69
|
+
_f 206
|
70
|
+
te 204
|
71
|
+
_a_ 203
|
72
|
+
_m 202
|
73
|
+
ha 200
|
74
|
+
as 193
|
75
|
+
on 188
|
76
|
+
at 184
|
77
|
+
_i 183
|
78
|
+
_wa 183
|
79
|
+
_c 182
|
80
|
+
o_ 180
|
81
|
+
or 178
|
82
|
+
_h 176
|
83
|
+
_g 169
|
84
|
+
ch 165
|
85
|
+
A_ 159
|
86
|
+
_l 158
|
87
|
+
_A_ 157
|
88
|
+
st 156
|
89
|
+
_d 155
|
90
|
+
_ti 148
|
91
|
+
. 148
|
92
|
+
._ 146
|
93
|
+
ke 144
|
94
|
+
ti_ 143
|
95
|
+
-- 143
|
96
|
+
_o_ 142
|
97
|
+
ow 142
|
98
|
+
--- 140
|
99
|
+
ed 138
|
100
|
+
---- 138
|
101
|
+
_r 137
|
102
|
+
as_ 137
|
103
|
+
y_ 136
|
104
|
+
er_ 136
|
105
|
+
----- 136
|
106
|
+
ir 135
|
107
|
+
aa 135
|
108
|
+
la 131
|
109
|
+
een 130
|
110
|
+
ae 129
|
111
|
+
_ti_ 128
|
112
|
+
ra 126
|
113
|
+
es 125
|
114
|
+
nd 124
|
115
|
+
de 120
|
116
|
+
h_ 120
|
117
|
+
ie 120
|
118
|
+
ar 119
|
119
|
+
ll 119
|
120
|
+
nt 118
|
121
|
+
ot 118
|
122
|
+
en_ 115
|
123
|
+
ma 115
|
124
|
+
eet 113
|
125
|
+
her 112
|
126
|
+
el 112
|
127
|
+
is 112
|
128
|
+
' 112
|
129
|
+
at_ 111
|
130
|
+
ic 109
|
131
|
+
se 108
|
132
|
+
or_ 106
|
133
|
+
wu 104
|
134
|
+
me 104
|
135
|
+
ne 103
|
136
|
+
fo 102
|
137
|
+
on_ 101
|
138
|
+
was 99
|
139
|
+
_was 98
|
140
|
+
et_ 98
|
141
|
+
ri 98
|
142
|
+
_e 97
|
143
|
+
_ma 97
|
144
|
+
v 97
|
145
|
+
_n 97
|
146
|
+
! 97
|
147
|
+
li 97
|
148
|
+
ht 93
|
149
|
+
hi 92
|
150
|
+
_wu 92
|
151
|
+
ng 91
|
152
|
+
ro 91
|
153
|
+
it_ 90
|
154
|
+
ck 90
|
155
|
+
_fo 90
|
156
|
+
tha 90
|
157
|
+
k_ 89
|
158
|
+
il 89
|
159
|
+
cht 86
|
160
|
+
eet_ 86
|
161
|
+
_p 86
|
162
|
+
we 86
|
163
|
+
_was_ 85
|
164
|
+
was_ 85
|
165
|
+
rt 84
|
166
|
+
ed_ 83
|
167
|
+
ter 83
|
168
|
+
id 83
|
169
|
+
ga 82
|
170
|
+
; 82
|
171
|
+
;_ 81
|
172
|
+
ther 79
|
173
|
+
tt 76
|
174
|
+
air 76
|
175
|
+
e, 75
|
176
|
+
un 75
|
177
|
+
ho 75
|
178
|
+
for 74
|
179
|
+
ge 74
|
180
|
+
_st 73
|
181
|
+
_y 72
|
182
|
+
_he 72
|
183
|
+
wh 71
|
184
|
+
_on 71
|
185
|
+
sh 70
|
186
|
+
z 70
|
187
|
+
e,_ 69
|
188
|
+
bi 68
|
189
|
+
_tha 68
|
190
|
+
wui 67
|
191
|
+
!_ 67
|
192
|
+
ad 67
|
193
|
+
een_ 66
|
194
|
+
l_ 66
|
195
|
+
ts 66
|
196
|
+
_for 66
|
197
|
+
n, 66
|
198
|
+
_wh 65
|
199
|
+
re_ 65
|
200
|
+
be 65
|
201
|
+
eh 64
|
202
|
+
hat 64
|
203
|
+
ns 64
|
204
|
+
br 64
|
205
|
+
g_ 64
|
206
|
+
ui_ 64
|
207
|
+
rr 64
|
208
|
+
wui_ 63
|
209
|
+
ni 63
|
210
|
+
_wui 62
|
211
|
+
ay 62
|
212
|
+
s, 62
|
213
|
+
pe 61
|
214
|
+
n,_ 61
|
215
|
+
bo 61
|
216
|
+
al 61
|
217
|
+
ye 61
|
218
|
+
_bi 60
|
219
|
+
oot 60
|
220
|
+
na 60
|
221
|
+
ang 60
|
222
|
+
s,_ 59
|
223
|
+
es_ 59
|
224
|
+
ill 58
|
225
|
+
that 58
|
226
|
+
_wui_ 58
|
227
|
+
nn 58
|
228
|
+
eh_ 58
|
229
|
+
oa 57
|
230
|
+
han 57
|
231
|
+
_that 56
|
232
|
+
_br 56
|
233
|
+
ca 56
|
234
|
+
_ga 56
|
235
|
+
ng_ 56
|
236
|
+
um 55
|
237
|
+
hat_ 55
|
238
|
+
oon 55
|
239
|
+
od 55
|
240
|
+
for_ 55
|
241
|
+
no 55
|
242
|
+
ree 55
|
243
|
+
_for_ 54
|
244
|
+
_le 54
|
245
|
+
ht_ 54
|
246
|
+
ot_ 54
|
247
|
+
_k 53
|
248
|
+
rd 53
|
249
|
+
ki 53
|
250
|
+
aw 53
|
251
|
+
nd_ 53
|
252
|
+
_on_ 53
|
253
|
+
_it 53
|
254
|
+
ik 53
|
255
|
+
t, 53
|
256
|
+
_be 52
|
257
|
+
that_ 52
|
258
|
+
ve 52
|
259
|
+
rn 52
|
260
|
+
's 51
|
261
|
+
au 51
|
262
|
+
co 51
|
263
|
+
ich 51
|
264
|
+
to 51
|
265
|
+
lo 51
|
266
|
+
t,_ 51
|
267
|
+
ea 51
|
268
|
+
tee 51
|
269
|
+
lan 50
|
270
|
+
fi 50
|
271
|
+
_at 50
|
272
|
+
am 50
|
273
|
+
_in 50
|
274
|
+
ere 50
|
275
|
+
ur 50
|
276
|
+
le_ 50
|
277
|
+
nt_ 49
|
278
|
+
's_ 49
|
279
|
+
hin 49
|
280
|
+
yi 49
|
281
|
+
hr 49
|
282
|
+
ts_ 49
|
283
|
+
_ca 48
|
284
|
+
" 48
|
285
|
+
ta 48
|
286
|
+
cht_ 48
|
287
|
+
-_ 48
|
288
|
+
_as 47
|
289
|
+
T 47
|
290
|
+
ang_ 47
|
291
|
+
lei 46
|
292
|
+
_ma_ 46
|
293
|
+
tr 46
|
294
|
+
_ro 46
|
295
|
+
fe 46
|
296
|
+
ma_ 46
|
297
|
+
icht 46
|
298
|
+
_as_ 46
|
299
|
+
der 46
|
300
|
+
cl 46
|
301
|
+
e- 45
|
302
|
+
n- 45
|
303
|
+
thr 45
|
304
|
+
ba 45
|
305
|
+
m_ 45
|
306
|
+
st_ 45
|
307
|
+
rt_ 45
|
308
|
+
_u 45
|
309
|
+
do 45
|
310
|
+
_T 45
|
311
|
+
im 44
|
312
|
+
_se 44
|
313
|
+
sk 44
|
314
|
+
_la 44
|
315
|
+
eik 44
|
316
|
+
bit 43
|
317
|
+
ike 43
|
318
|
+
B 43
|
319
|
+
kee 43
|
320
|
+
tte 43
|
321
|
+
di 43
|
322
|
+
eed 43
|
323
|
+
_B 42
|
324
|
+
_aa 42
|
325
|
+
her_ 42
|
326
|
+
da 42
|
327
|
+
ff 42
|
328
|
+
tu 42
|
329
|
+
ie_ 42
|
330
|
+
_cl 42
|
331
|
+
_ba 42
|
332
|
+
oot_ 42
|
333
|
+
bu 41
|
334
|
+
eike 41
|
335
|
+
oc 41
|
336
|
+
hu 41
|
337
|
+
_thr 41
|
338
|
+
ther_ 41
|
339
|
+
_co 41
|
340
|
+
aa_ 41
|
341
|
+
so 41
|
342
|
+
_me 41
|
343
|
+
H 41
|
344
|
+
_H 40
|
345
|
+
ke_ 40
|
346
|
+
ert 40
|
347
|
+
lu 40
|
348
|
+
ist 40
|
349
|
+
si 40
|
350
|
+
iz 40
|
351
|
+
ar_ 39
|
352
|
+
uc 39
|
353
|
+
thi 39
|
354
|
+
ad_ 39
|
355
|
+
ru 39
|
356
|
+
owe 39
|
357
|
+
gi 38
|
358
|
+
_bit 38
|
359
|
+
_do 38
|
360
|
+
int 38
|
361
|
+
bl 38
|
362
|
+
ld 38
|
363
|
+
_at_ 38
|
364
|
+
lt 38
|
365
|
+
ac 38
|
366
|
+
_ha 38
|
367
|
+
ae_ 38
|
368
|
+
rs 37
|
369
|
+
here 37
|
370
|
+
ei_ 37
|
371
|
+
han_ 37
|
372
|
+
p_ 37
|
373
|
+
is_ 37
|
374
|
+
eth 37
|
375
|
+
fa 37
|
376
|
+
_sk 37
|
377
|
+
ll_ 37
|
378
|
+
ss 36
|
379
|
+
bra 36
|
380
|
+
wha 36
|
381
|
+
gl 36
|
382
|
+
ck_ 36
|
383
|
+
pl 36
|
384
|
+
lin 36
|
385
|
+
ir_ 36
|
386
|
+
ab 36
|
387
|
+
_ther 36
|
388
|
+
_da 35
|
389
|
+
ce 35
|
390
|
+
rin 35
|
391
|
+
_oo 35
|
392
|
+
rl 35
|
393
|
+
wee 35
|
394
|
+
and 35
|
395
|
+
sa 35
|
396
|
+
_yi 35
|
397
|
+
_bra 35
|
398
|
+
'd 35
|
399
|
+
ds 35
|
400
|
+
_bo 35
|
@@ -0,0 +1,400 @@
|
|
1
|
+
_ 12634
|
2
|
+
a 5353
|
3
|
+
h 3268
|
4
|
+
i 2898
|
5
|
+
n 2792
|
6
|
+
e 1651
|
7
|
+
r 1563
|
8
|
+
d 1455
|
9
|
+
_a 1425
|
10
|
+
c 1245
|
11
|
+
n_ 1236
|
12
|
+
s 1165
|
13
|
+
l 1152
|
14
|
+
an 1121
|
15
|
+
t 980
|
16
|
+
ai 979
|
17
|
+
g 962
|
18
|
+
u 905
|
19
|
+
ch 902
|
20
|
+
ha 836
|
21
|
+
h_ 833
|
22
|
+
a_ 829
|
23
|
+
ea 821
|
24
|
+
o 794
|
25
|
+
dh 726
|
26
|
+
an_ 711
|
27
|
+
b 639
|
28
|
+
m 585
|
29
|
+
na 514
|
30
|
+
nn 506
|
31
|
+
ac 498
|
32
|
+
r_ 495
|
33
|
+
s_ 482
|
34
|
+
ir 480
|
35
|
+
ach 466
|
36
|
+
id 458
|
37
|
+
_an 450
|
38
|
+
_c 427
|
39
|
+
th 403
|
40
|
+
� 388
|
41
|
+
he 383
|
42
|
+
in 379
|
43
|
+
bh 367
|
44
|
+
idh 358
|
45
|
+
ad 342
|
46
|
+
_n 341
|
47
|
+
il 332
|
48
|
+
nn_ 323
|
49
|
+
_t 322
|
50
|
+
_d 319
|
51
|
+
ar 317
|
52
|
+
e_ 311
|
53
|
+
dh_ 307
|
54
|
+
_an_ 303
|
55
|
+
_b 302
|
56
|
+
_na 294
|
57
|
+
air 289
|
58
|
+
ig 279
|
59
|
+
. 275
|
60
|
+
ir_ 272
|
61
|
+
ag 272
|
62
|
+
_ai 272
|
63
|
+
, 270
|
64
|
+
gu 269
|
65
|
+
,_ 269
|
66
|
+
._ 265
|
67
|
+
ean 264
|
68
|
+
ch_ 261
|
69
|
+
f 259
|
70
|
+
? 258
|
71
|
+
_s 255
|
72
|
+
ann 250
|
73
|
+
ra 241
|
74
|
+
ei 241
|
75
|
+
_a_ 241
|
76
|
+
ha_ 241
|
77
|
+
d_ 238
|
78
|
+
- 235
|
79
|
+
_m 231
|
80
|
+
gh 230
|
81
|
+
hea 228
|
82
|
+
le 226
|
83
|
+
_f 224
|
84
|
+
ui 223
|
85
|
+
is 223
|
86
|
+
as 218
|
87
|
+
adh 218
|
88
|
+
l_ 216
|
89
|
+
g_ 208
|
90
|
+
�i 207
|
91
|
+
� 207
|
92
|
+
hai 205
|
93
|
+
cha 205
|
94
|
+
air_ 204
|
95
|
+
na_ 201
|
96
|
+
inn 198
|
97
|
+
tha 190
|
98
|
+
C 189
|
99
|
+
G 188
|
100
|
+
ann_ 187
|
101
|
+
_ag 186
|
102
|
+
_air 186
|
103
|
+
eac 185
|
104
|
+
_g 185
|
105
|
+
_na_ 184
|
106
|
+
ach_ 184
|
107
|
+
_C 183
|
108
|
+
us 183
|
109
|
+
_ch 183
|
110
|
+
la 182
|
111
|
+
_G 182
|
112
|
+
each 181
|
113
|
+
us_ 178
|
114
|
+
al 178
|
115
|
+
gus 176
|
116
|
+
gus_ 176
|
117
|
+
_th 169
|
118
|
+
_air_ 168
|
119
|
+
_agus 167
|
120
|
+
agus_ 167
|
121
|
+
_agu 167
|
122
|
+
agus 167
|
123
|
+
agu 167
|
124
|
+
ta 164
|
125
|
+
aid 163
|
126
|
+
hi 163
|
127
|
+
hd 163
|
128
|
+
chd 160
|
129
|
+
T 157
|
130
|
+
A 156
|
131
|
+
ic 152
|
132
|
+
_T 152
|
133
|
+
adh_ 150
|
134
|
+
idh_ 148
|
135
|
+
mh 147
|
136
|
+
?_ 146
|
137
|
+
ar_ 145
|
138
|
+
oi 144
|
139
|
+
da 143
|
140
|
+
aidh 143
|
141
|
+
_bh 139
|
142
|
+
ean_ 138
|
143
|
+
sa 138
|
144
|
+
ig_ 138
|
145
|
+
_r 136
|
146
|
+
_A 134
|
147
|
+
� 134
|
148
|
+
te 134
|
149
|
+
achd 131
|
150
|
+
hu 131
|
151
|
+
_e 130
|
152
|
+
aig 130
|
153
|
+
_l 130
|
154
|
+
_ann 129
|
155
|
+
ain 127
|
156
|
+
ne 127
|
157
|
+
dhe 125
|
158
|
+
_dh 125
|
159
|
+
�id 123
|
160
|
+
o_ 121
|
161
|
+
hl 119
|
162
|
+
acha 119
|
163
|
+
ga 118
|
164
|
+
�idh 118
|
165
|
+
on 118
|
166
|
+
it 117
|
167
|
+
aidh_ 116
|
168
|
+
de 115
|
169
|
+
nan 115
|
170
|
+
ua 115
|
171
|
+
_ann_ 115
|
172
|
+
ich 115
|
173
|
+
il_ 114
|
174
|
+
m_ 114
|
175
|
+
eil 114
|
176
|
+
ri 112
|
177
|
+
at 112
|
178
|
+
ma 111
|
179
|
+
li 109
|
180
|
+
ao 109
|
181
|
+
re 109
|
182
|
+
inn_ 108
|
183
|
+
_tha 107
|
184
|
+
fh 106
|
185
|
+
as_ 106
|
186
|
+
bh_ 106
|
187
|
+
nan_ 103
|
188
|
+
lea 103
|
189
|
+
lt 103
|
190
|
+
S 103
|
191
|
+
a? 103
|
192
|
+
a?_ 102
|
193
|
+
io 102
|
194
|
+
E 101
|
195
|
+
am 101
|
196
|
+
' 100
|
197
|
+
_a? 100
|
198
|
+
igh 100
|
199
|
+
_a?_ 99
|
200
|
+
_gu 99
|
201
|
+
idhe 99
|
202
|
+
t_ 99
|
203
|
+
se 99
|
204
|
+
si 98
|
205
|
+
ba 97
|
206
|
+
� 97
|
207
|
+
tha_ 96
|
208
|
+
bha 95
|
209
|
+
B 94
|
210
|
+
is_ 94
|
211
|
+
u_ 94
|
212
|
+
_B 94
|
213
|
+
_i 93
|
214
|
+
ile 92
|
215
|
+
aic 91
|
216
|
+
hei 91
|
217
|
+
ia 90
|
218
|
+
ho 89
|
219
|
+
Th 88
|
220
|
+
ath 88
|
221
|
+
_Th 88
|
222
|
+
rt 87
|
223
|
+
ib 87
|
224
|
+
G�id 86
|
225
|
+
_G�i 86
|
226
|
+
_G� 86
|
227
|
+
G�i 86
|
228
|
+
�r 86
|
229
|
+
G� 86
|
230
|
+
G�idh 86
|
231
|
+
_G�id 86
|
232
|
+
had 85
|
233
|
+
ibh 85
|
234
|
+
_fh 85
|
235
|
+
p 84
|
236
|
+
ad_ 83
|
237
|
+
_? 83
|
238
|
+
_E 83
|
239
|
+
hd_ 82
|
240
|
+
dhea 82
|
241
|
+
chd_ 82
|
242
|
+
ear 81
|
243
|
+
ith 81
|
244
|
+
_tha_ 80
|
245
|
+
h- 79
|
246
|
+
eal 78
|
247
|
+
hean 78
|
248
|
+
sg 77
|
249
|
+
rea 77
|
250
|
+
_S 76
|
251
|
+
ais 75
|
252
|
+
ll 75
|
253
|
+
han 74
|
254
|
+
h� 74
|
255
|
+
achd_ 74
|
256
|
+
ead 74
|
257
|
+
idhea 73
|
258
|
+
am_ 72
|
259
|
+
dha 72
|
260
|
+
_nan 71
|
261
|
+
_nan_ 71
|
262
|
+
hadh 71
|
263
|
+
gh_ 71
|
264
|
+
ail 70
|
265
|
+
hui 70
|
266
|
+
Ch 69
|
267
|
+
eachd 69
|
268
|
+
h. 69
|
269
|
+
aich 69
|
270
|
+
hli 69
|
271
|
+
chai 69
|
272
|
+
om 68
|
273
|
+
fa 68
|
274
|
+
chad 68
|
275
|
+
I 67
|
276
|
+
h._ 67
|
277
|
+
_Ch 67
|
278
|
+
tea 67
|
279
|
+
nea 66
|
280
|
+
chadh 66
|
281
|
+
achad 66
|
282
|
+
rai 66
|
283
|
+
lig 66
|
284
|
+
haid 66
|
285
|
+
dea 66
|
286
|
+
rt_ 65
|
287
|
+
�r 65
|
288
|
+
dhl 65
|
289
|
+
ana 64
|
290
|
+
eann 64
|
291
|
+
Ei 64
|
292
|
+
le_ 64
|
293
|
+
hn 64
|
294
|
+
ilt 64
|
295
|
+
uid 64
|
296
|
+
_fa 63
|
297
|
+
_Tha 63
|
298
|
+
Tha 63
|
299
|
+
ob 63
|
300
|
+
_si 62
|
301
|
+
ro 62
|
302
|
+
cu 62
|
303
|
+
ainn 62
|
304
|
+
un 62
|
305
|
+
dhli 61
|
306
|
+
idhli 61
|
307
|
+
lean 61
|
308
|
+
idhl 61
|
309
|
+
�idhl 61
|
310
|
+
hlig 61
|
311
|
+
dhlig 61
|
312
|
+
in_ 60
|
313
|
+
_� 60
|
314
|
+
st 60
|
315
|
+
rr 60
|
316
|
+
_cu 60
|
317
|
+
hr 60
|
318
|
+
_aig 60
|
319
|
+
bhe 59
|
320
|
+
i_ 59
|
321
|
+
aigh 59
|
322
|
+
Tha_ 59
|
323
|
+
� 59
|
324
|
+
_ri 59
|
325
|
+
_Tha_ 59
|
326
|
+
lb 58
|
327
|
+
che 58
|
328
|
+
ran 58
|
329
|
+
nac 58
|
330
|
+
haidh 58
|
331
|
+
hadh_ 58
|
332
|
+
aig_ 58
|
333
|
+
Gh 58
|
334
|
+
ilea 58
|
335
|
+
_Gh 58
|
336
|
+
lte 58
|
337
|
+
_le 58
|
338
|
+
ru 58
|
339
|
+
�idhe 57
|
340
|
+
_I 57
|
341
|
+
ilte 57
|
342
|
+
eadh 57
|
343
|
+
M 56
|
344
|
+
hlig_ 56
|
345
|
+
L 56
|
346
|
+
chu 56
|
347
|
+
nach 56
|
348
|
+
_ma 56
|
349
|
+
lig_ 56
|
350
|
+
h,_ 55
|
351
|
+
th_ 55
|
352
|
+
ibh_ 55
|
353
|
+
_aig_ 55
|
354
|
+
D 55
|
355
|
+
atha 55
|
356
|
+
_Ei 55
|
357
|
+
h, 55
|
358
|
+
gu_ 54
|
359
|
+
_gu_ 54
|
360
|
+
im 54
|
361
|
+
eil_ 54
|
362
|
+
eu 53
|
363
|
+
_M 53
|
364
|
+
Al 53
|
365
|
+
irt 53
|
366
|
+
_L 53
|
367
|
+
iad 53
|
368
|
+
sea 52
|
369
|
+
lba 52
|
370
|
+
Alba 52
|
371
|
+
F 52
|
372
|
+
Alb 52
|
373
|
+
uai 52
|
374
|
+
ich_ 52
|
375
|
+
_F 51
|
376
|
+
ilean 51
|
377
|
+
has 51
|
378
|
+
tai 51
|
379
|
+
each_ 50
|
380
|
+
eacha 50
|
381
|
+
har 50
|
382
|
+
ni 50
|
383
|
+
_de 50
|
384
|
+
irt_ 50
|
385
|
+
n,_ 50
|
386
|
+
mha 50
|
387
|
+
n, 50
|
388
|
+
_e_ 50
|
389
|
+
ide 49
|
390
|
+
neach 49
|
391
|
+
neac 49
|
392
|
+
ur 49
|
393
|
+
rd 49
|
394
|
+
_h 49
|
395
|
+
hean_ 49
|
396
|
+
oc 49
|
397
|
+
e� 49
|
398
|
+
te_ 49
|
399
|
+
han_ 49
|
400
|
+
on_ 49
|