langdetect-ruby 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +24 -13
  3. data/langdetect-ruby.gemspec +1 -1
  4. data/lib/lingua_ruby/configuration.rb +4 -1
  5. data/lib/lingua_ruby/detector.rb +59 -1
  6. data/lib/lingua_ruby/profile_loader.rb +26 -6
  7. data/lib/lingua_ruby/profiles/am.json +193 -0
  8. data/lib/lingua_ruby/profiles/bg.json +290 -0
  9. data/lib/lingua_ruby/profiles/bn.json +211 -0
  10. data/lib/lingua_ruby/profiles/cs.json +302 -0
  11. data/lib/lingua_ruby/profiles/da.json +302 -0
  12. data/lib/lingua_ruby/profiles/de.json +302 -0
  13. data/lib/lingua_ruby/profiles/el.json +302 -0
  14. data/lib/lingua_ruby/profiles/es.json +302 -0
  15. data/lib/lingua_ruby/profiles/et.json +289 -0
  16. data/lib/lingua_ruby/profiles/fa.json +234 -0
  17. data/lib/lingua_ruby/profiles/fi.json +284 -0
  18. data/lib/lingua_ruby/profiles/fr.json +302 -0
  19. data/lib/lingua_ruby/profiles/ha.json +302 -0
  20. data/lib/lingua_ruby/profiles/hi.json +255 -0
  21. data/lib/lingua_ruby/profiles/hr.json +302 -0
  22. data/lib/lingua_ruby/profiles/hu.json +302 -0
  23. data/lib/lingua_ruby/profiles/it.json +302 -0
  24. data/lib/lingua_ruby/profiles/lt.json +294 -0
  25. data/lib/lingua_ruby/profiles/lv.json +302 -0
  26. data/lib/lingua_ruby/profiles/my.json +200 -0
  27. data/lib/lingua_ruby/profiles/no.json +297 -0
  28. data/lib/lingua_ruby/profiles/pl.json +302 -0
  29. data/lib/lingua_ruby/profiles/pt.json +302 -0
  30. data/lib/lingua_ruby/profiles/ro.json +302 -0
  31. data/lib/lingua_ruby/profiles/ru.json +297 -0
  32. data/lib/lingua_ruby/profiles/sk.json +302 -0
  33. data/lib/lingua_ruby/profiles/sv.json +302 -0
  34. data/lib/lingua_ruby/profiles/sw.json +268 -0
  35. data/lib/lingua_ruby/profiles/ta.json +235 -0
  36. data/lib/lingua_ruby/profiles/te.json +254 -0
  37. data/lib/lingua_ruby/profiles/th.json +251 -0
  38. data/lib/lingua_ruby/profiles/tl.json +302 -0
  39. data/lib/lingua_ruby/profiles/tr.json +302 -0
  40. data/lib/lingua_ruby/profiles/uk.json +302 -0
  41. data/lib/lingua_ruby/profiles/ur.json +232 -0
  42. data/lib/lingua_ruby/profiles/vi.json +277 -0
  43. data/lib/lingua_ruby/profiles/yo.json +245 -0
  44. data/lib/lingua_ruby/profiles/zu.json +302 -0
  45. data/lib/lingua_ruby/result.rb +13 -26
  46. data/lib/lingua_ruby/version.rb +1 -1
  47. data/lib/lingua_ruby.rb +4 -0
  48. metadata +41 -2
@@ -0,0 +1,268 @@
1
+ {
2
+ "a": 0,
3
+ "i": 1,
4
+ "n": 2,
5
+ "a ": 3,
6
+ "i ": 4,
7
+ "m": 5,
8
+ "k": 6,
9
+ "ni": 7,
10
+ "u": 8,
11
+ "l": 9,
12
+ " m": 10,
13
+ " n": 11,
14
+ "s": 12,
15
+ "h": 13,
16
+ "z": 14,
17
+ "an": 15,
18
+ "wa": 16,
19
+ "ki": 17,
20
+ "w": 18,
21
+ "r": 19,
22
+ "o": 20,
23
+ "d": 21,
24
+ "ni ": 22,
25
+ "y": 23,
26
+ "li": 24,
27
+ "a n": 25,
28
+ "il": 26,
29
+ "ri": 27,
30
+ "ik": 28,
31
+ "ma": 29,
32
+ "ha": 30,
33
+ "ya": 31,
34
+ " k": 32,
35
+ "ya ": 33,
36
+ "t": 34,
37
+ "rik": 35,
38
+ "ili": 36,
39
+ "za": 37,
40
+ "zan": 38,
41
+ " ki": 39,
42
+ "ar": 40,
43
+ " l": 41,
44
+ "nza": 42,
45
+ "anz": 43,
46
+ "tan": 44,
47
+ " y": 45,
48
+ "g": 46,
49
+ "a m": 47,
50
+ " ma": 48,
51
+ "wa ": 49,
52
+ " ya": 50,
53
+ "hi": 51,
54
+ "i m": 52,
55
+ "na": 53,
56
+ " ni": 54,
57
+ "ia": 55,
58
+ "ia ": 56,
59
+ "nia": 57,
60
+ "nz": 58,
61
+ "ani": 59,
62
+ "ta": 60,
63
+ "as": 61,
64
+ "io": 62,
65
+ "gha": 63,
66
+ "ugh": 64,
67
+ "ha ": 65,
68
+ "di": 66,
69
+ "lug": 67,
70
+ "am": 68,
71
+ "a k": 69,
72
+ "kis": 70,
73
+ "isw": 71,
74
+ "swa": 72,
75
+ "wah": 73,
76
+ "i k": 74,
77
+ " lu": 75,
78
+ "i l": 76,
79
+ " wa": 77,
80
+ "iki": 78,
81
+ "ari": 79,
82
+ "har": 80,
83
+ "sha": 81,
84
+ "ash": 82,
85
+ "mas": 83,
86
+ "kin": 84,
87
+ " af": 85,
88
+ "afr": 86,
89
+ "fri": 87,
90
+ "i n": 88,
91
+ "ka ": 89,
92
+ "ika": 90,
93
+ "in": 91,
94
+ "i y": 92,
95
+ "a t": 93,
96
+ " ta": 94,
97
+ "ah": 95,
98
+ "sw": 96,
99
+ "is": 97,
100
+ "gh": 98,
101
+ "ug": 99,
102
+ "lu": 100,
103
+ "sh": 101,
104
+ "ka": 102,
105
+ "fr": 103,
106
+ "af": 104,
107
+ " a": 105,
108
+ "o ": 106,
109
+ " na": 107,
110
+ "na ": 108,
111
+ " mj": 109,
112
+ "j": 110,
113
+ "e": 111,
114
+ "f": 112,
115
+ "hil": 113,
116
+ "ahi": 114,
117
+ "li ": 115,
118
+ "la": 116,
119
+ " w": 117,
120
+ "u ": 118,
121
+ "ku": 119,
122
+ "mk": 120,
123
+ "ji": 121,
124
+ "mj": 122,
125
+ "mku": 123,
126
+ "do": 124,
127
+ " d": 125,
128
+ " mk": 126,
129
+ "ji ": 127,
130
+ "mji": 128,
131
+ " t": 129,
132
+ "mi": 130,
133
+ "a l": 131,
134
+ "a z": 132,
135
+ "c": 133,
136
+ "odo": 134,
137
+ "dom": 135,
138
+ "oma": 136,
139
+ "ma ": 137,
140
+ "uu ": 138,
141
+ "u w": 139,
142
+ "kuu": 140,
143
+ " za": 141,
144
+ "zai": 142,
145
+ "aid": 143,
146
+ "idi": 144,
147
+ "di ": 145,
148
+ "ina": 146,
149
+ "naz": 147,
150
+ "azu": 148,
151
+ "zun": 149,
152
+ "ung": 150,
153
+ "ngu": 151,
154
+ "gum": 152,
155
+ "umz": 153,
156
+ "mzw": 154,
157
+ "zwa": 155,
158
+ "mam": 156,
159
+ "ami": 157,
160
+ "mil": 158,
161
+ "lio": 159,
162
+ "ion": 160,
163
+ "oni": 161,
164
+ "a w": 162,
165
+ "wat": 163,
166
+ "atu": 164,
167
+ "tu ": 165,
168
+ "u a": 166,
169
+ " la": 167,
170
+ "lak": 168,
171
+ "aki": 169,
172
+ "ini": 170,
173
+ "i d": 171,
174
+ " da": 172,
175
+ "dar": 173,
176
+ "ar ": 174,
177
+ "r e": 175,
178
+ " es": 176,
179
+ "es ": 177,
180
+ "s s": 178,
181
+ " sa": 179,
182
+ "sal": 180,
183
+ "ala": 181,
184
+ "laa": 182,
185
+ "aam": 183,
186
+ "am ": 184,
187
+ "m n": 185,
188
+ " nd": 186,
189
+ "ndi": 187,
190
+ "dio": 188,
191
+ "io ": 189,
192
+ "o m": 190,
193
+ "kub": 191,
194
+ "ubw": 192,
195
+ "bwa": 193,
196
+ "da": 194,
197
+ "r ": 195,
198
+ " e": 196,
199
+ "es": 197,
200
+ "s ": 198,
201
+ " s": 199,
202
+ "sa": 200,
203
+ "al": 201,
204
+ "aa": 202,
205
+ "m ": 203,
206
+ "nd": 204,
207
+ "ub": 205,
208
+ "bw": 206,
209
+ " z": 207,
210
+ "ai": 208,
211
+ "id": 209,
212
+ "az": 210,
213
+ "zu": 211,
214
+ "b": 212,
215
+ "nc": 213,
216
+ "ch": 214,
217
+ " i": 215,
218
+ "iy": 216,
219
+ "yo": 217,
220
+ "ok": 218,
221
+ "ko": 219,
222
+ " r": 220,
223
+ "ra": 221,
224
+ "sm": 222,
225
+ "ke": 223,
226
+ "en": 224,
227
+ "ny": 225,
228
+ "od": 226,
229
+ "om": 227,
230
+ "uu": 228,
231
+ "ak": 229,
232
+ "oko": 230,
233
+ "ko ": 231,
234
+ "o a": 232,
235
+ "ki ": 233,
236
+ "a y": 234,
237
+ "a r": 235,
238
+ " ra": 236,
239
+ "ras": 237,
240
+ "asm": 238,
241
+ "smi": 239,
242
+ "mi ": 240,
243
+ " ke": 241,
244
+ "ken": 242,
245
+ "eny": 243,
246
+ "nya": 244,
247
+ "a d": 245,
248
+ " do": 246,
249
+ "dod": 247,
250
+ "un": 248,
251
+ "ng": 249,
252
+ "gu": 250,
253
+ "um": 251,
254
+ "mz": 252,
255
+ "zw": 253,
256
+ "on": 254,
257
+ "at": 255,
258
+ "tu": 256,
259
+ " nc": 257,
260
+ "nch": 258,
261
+ "chi": 259,
262
+ "hi ": 260,
263
+ "i i": 261,
264
+ " il": 262,
265
+ "liy": 263,
266
+ "iyo": 264,
267
+ "yok": 265
268
+ }
@@ -0,0 +1,235 @@
1
+ {
2
+ "்": 0,
3
+ "ி": 1,
4
+ "ம": 2,
5
+ "் ": 3,
6
+ "க": 4,
7
+ "ா": 5,
8
+ "ன": 6,
9
+ "த": 7,
10
+ "ந": 8,
11
+ "ு": 9,
12
+ "ன்": 10,
13
+ "ய": 11,
14
+ "ற": 12,
15
+ "ல": 13,
16
+ "ழ": 14,
17
+ " ம": 15,
18
+ "ிய": 16,
19
+ "யா": 17,
20
+ " த": 18,
21
+ "ம்": 19,
22
+ "ள": 20,
23
+ "ின்": 21,
24
+ "ன் ": 22,
25
+ "்க": 23,
26
+ "ும": 24,
27
+ "் ம": 25,
28
+ "ல்": 26,
29
+ "ில": 27,
30
+ "ியா": 28,
31
+ "ல் ": 29,
32
+ "்ற": 30,
33
+ "ை": 31,
34
+ "ில்": 32,
35
+ " ஒ": 33,
36
+ "ட": 34,
37
+ "ர": 35,
38
+ "ஒ": 36,
39
+ "ின": 37,
40
+ "ும்": 38,
41
+ "ம் ": 39,
42
+ "திய": 40,
43
+ "்தி": 41,
44
+ "யாவ": 42,
45
+ "ந்த": 43,
46
+ "இந்": 44,
47
+ "கர": 45,
48
+ "நக": 46,
49
+ "கு": 47,
50
+ "ாக": 48,
51
+ "றா": 49,
52
+ "ஒன": 50,
53
+ "ளி": 51,
54
+ "கள": 52,
55
+ "நகர": 53,
56
+ "கும": 54,
57
+ "ாகு": 55,
58
+ "றாக": 56,
59
+ "்றா": 57,
60
+ "ன்ற": 58,
61
+ "ஒன்": 59,
62
+ " ஒன": 60,
63
+ "் ஒ": 61,
64
+ "ளில": 62,
65
+ "களி": 63,
66
+ "ொழி": 64,
67
+ "மொழ": 65,
68
+ " மொ": 66,
69
+ "ிழ்": 67,
70
+ "மிழ": 68,
71
+ "தமி": 69,
72
+ " தம": 70,
73
+ "நாட": 71,
74
+ "ாவி": 72,
75
+ "்த": 73,
76
+ "தம": 74,
77
+ "மி": 75,
78
+ "இ": 76,
79
+ "தி": 77,
80
+ "ந்": 78,
81
+ "இந": 79,
82
+ "ொ": 80,
83
+ "உ": 81,
84
+ "வ": 82,
85
+ "ச": 83,
86
+ "ெ": 84,
87
+ "ாட": 85,
88
+ "ிழ": 86,
89
+ "ழ்": 87,
90
+ "மொ": 88,
91
+ "ொழ": 89,
92
+ "ழி": 90,
93
+ "நா": 91,
94
+ " உ": 92,
95
+ "வி": 93,
96
+ "கி": 94,
97
+ "ாவ": 95,
98
+ "ற்": 96,
99
+ " ந": 97,
100
+ "ு ": 98,
101
+ "ற்ற": 99,
102
+ "ென்": 100,
103
+ "சென": 101,
104
+ " செ": 102,
105
+ "ங": 103,
106
+ "ப": 104,
107
+ "் ச": 105,
108
+ " உல": 106,
109
+ "உலக": 107,
110
+ "லகி": 108,
111
+ "கின": 109,
112
+ "ெற": 110,
113
+ "தெ": 111,
114
+ "் ப": 112,
115
+ " பழ": 113,
116
+ "பழம": 114,
117
+ "ழமை": 115,
118
+ "மைய": 116,
119
+ "ையா": 117,
120
+ "யான": 118,
121
+ "ான ": 119,
122
+ "ன ம": 120,
123
+ "ழிக": 121,
124
+ "ிகள": 122,
125
+ "ா ": 123,
126
+ "்று": 124,
127
+ "றும": 125,
128
+ "் இ": 126,
129
+ " இந": 127,
130
+ "வின": 128,
131
+ " மு": 129,
132
+ "முக": 130,
133
+ "ுக்": 131,
134
+ "க்க": 132,
135
+ "்கி": 133,
136
+ "கிய": 134,
137
+ "ிய ": 135,
138
+ "ய ந": 136,
139
+ " நக": 137,
140
+ "கரங": 138,
141
+ "ரங்": 139,
142
+ "ங்க": 140,
143
+ "்கள": 141,
144
+ "ன்ன": 142,
145
+ "்னை": 143,
146
+ "னை ": 144,
147
+ "ை த": 145,
148
+ "ழ்ந": 146,
149
+ "்நா": 147,
150
+ "ாட்": 148,
151
+ "ட்ட": 149,
152
+ "்டி": 150,
153
+ "டின": 151,
154
+ "் த": 152,
155
+ " தல": 153,
156
+ "தலை": 154,
157
+ "லைந": 155,
158
+ "ைநக": 156,
159
+ "கரம": 157,
160
+ "ரம்": 158,
161
+ " மற": 159,
162
+ "மற்": 160,
163
+ "ட்": 161,
164
+ "்ட": 162,
165
+ "டி": 163,
166
+ "தல": 164,
167
+ "லை": 165,
168
+ "ைந": 166,
169
+ "ரம": 167,
170
+ "மற": 168,
171
+ "று": 169,
172
+ " இ": 170,
173
+ "மு": 171,
174
+ "ுக": 172,
175
+ "க்": 173,
176
+ "ய ": 174,
177
+ "ரங": 175,
178
+ "ங்": 176,
179
+ "டு": 177,
180
+ "யா ": 178,
181
+ " ப": 179,
182
+ "பழ": 180,
183
+ "ழம": 181,
184
+ "மை": 182,
185
+ "ைய": 183,
186
+ "ான": 184,
187
+ "ன ": 185,
188
+ "ிக": 186,
189
+ "லக": 187,
190
+ "உல": 188,
191
+ "ி ": 189,
192
+ " ச": 190,
193
+ "செ": 191,
194
+ "ென": 192,
195
+ "்ன": 193,
196
+ "னை": 194,
197
+ "ை ": 195,
198
+ "்ந": 196,
199
+ "்ள ": 197,
200
+ "ள ஒ": 198,
201
+ " ஒர": 199,
202
+ "ஒரு": 200,
203
+ "ரு ": 201,
204
+ "ு ந": 202,
205
+ " நா": 203,
206
+ "ள்": 204,
207
+ "ாடு": 205,
208
+ "டு ": 206,
209
+ "ு த": 207,
210
+ "உள": 208,
211
+ "ழ் ": 209,
212
+ "சி": 210,
213
+ "ாச": 211,
214
+ "கா": 212,
215
+ "ழி ": 213,
216
+ "ி உ": 214,
217
+ "ா த": 215,
218
+ " தெ": 216,
219
+ "தெற": 217,
220
+ "ெற்": 218,
221
+ "ற்க": 219,
222
+ "்கா": 220,
223
+ "காச": 221,
224
+ "ாசி": 222,
225
+ "சிய": 223,
226
+ "ரு": 224,
227
+ "ஒர": 225,
228
+ "வில": 226,
229
+ "ள ": 227,
230
+ "்ள": 228,
231
+ "் உ": 229,
232
+ " உள": 230,
233
+ "உள்": 231,
234
+ "ள்ள": 232
235
+ }