langdetect-ruby 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +24 -13
  3. data/langdetect-ruby.gemspec +1 -1
  4. data/lib/lingua_ruby/configuration.rb +4 -1
  5. data/lib/lingua_ruby/detector.rb +59 -1
  6. data/lib/lingua_ruby/profile_loader.rb +26 -6
  7. data/lib/lingua_ruby/profiles/am.json +193 -0
  8. data/lib/lingua_ruby/profiles/bg.json +290 -0
  9. data/lib/lingua_ruby/profiles/bn.json +211 -0
  10. data/lib/lingua_ruby/profiles/cs.json +302 -0
  11. data/lib/lingua_ruby/profiles/da.json +302 -0
  12. data/lib/lingua_ruby/profiles/de.json +302 -0
  13. data/lib/lingua_ruby/profiles/el.json +302 -0
  14. data/lib/lingua_ruby/profiles/es.json +302 -0
  15. data/lib/lingua_ruby/profiles/et.json +289 -0
  16. data/lib/lingua_ruby/profiles/fa.json +234 -0
  17. data/lib/lingua_ruby/profiles/fi.json +284 -0
  18. data/lib/lingua_ruby/profiles/fr.json +302 -0
  19. data/lib/lingua_ruby/profiles/ha.json +302 -0
  20. data/lib/lingua_ruby/profiles/hi.json +255 -0
  21. data/lib/lingua_ruby/profiles/hr.json +302 -0
  22. data/lib/lingua_ruby/profiles/hu.json +302 -0
  23. data/lib/lingua_ruby/profiles/it.json +302 -0
  24. data/lib/lingua_ruby/profiles/lt.json +294 -0
  25. data/lib/lingua_ruby/profiles/lv.json +302 -0
  26. data/lib/lingua_ruby/profiles/my.json +200 -0
  27. data/lib/lingua_ruby/profiles/no.json +297 -0
  28. data/lib/lingua_ruby/profiles/pl.json +302 -0
  29. data/lib/lingua_ruby/profiles/pt.json +302 -0
  30. data/lib/lingua_ruby/profiles/ro.json +302 -0
  31. data/lib/lingua_ruby/profiles/ru.json +297 -0
  32. data/lib/lingua_ruby/profiles/sk.json +302 -0
  33. data/lib/lingua_ruby/profiles/sv.json +302 -0
  34. data/lib/lingua_ruby/profiles/sw.json +268 -0
  35. data/lib/lingua_ruby/profiles/ta.json +235 -0
  36. data/lib/lingua_ruby/profiles/te.json +254 -0
  37. data/lib/lingua_ruby/profiles/th.json +251 -0
  38. data/lib/lingua_ruby/profiles/tl.json +302 -0
  39. data/lib/lingua_ruby/profiles/tr.json +302 -0
  40. data/lib/lingua_ruby/profiles/uk.json +302 -0
  41. data/lib/lingua_ruby/profiles/ur.json +232 -0
  42. data/lib/lingua_ruby/profiles/vi.json +277 -0
  43. data/lib/lingua_ruby/profiles/yo.json +245 -0
  44. data/lib/lingua_ruby/profiles/zu.json +302 -0
  45. data/lib/lingua_ruby/result.rb +13 -26
  46. data/lib/lingua_ruby/version.rb +1 -1
  47. data/lib/lingua_ruby.rb +4 -0
  48. metadata +41 -2
@@ -0,0 +1,255 @@
1
+ {
2
+ "ा": 0,
3
+ "र": 1,
4
+ "ी ": 2,
5
+ "ी": 3,
6
+ "ह": 4,
7
+ "क": 5,
8
+ "े": 6,
9
+ "ि": 7,
10
+ " ह": 8,
11
+ "भा": 9,
12
+ "भ": 10,
13
+ " है": 11,
14
+ "ं": 12,
15
+ "द": 13,
16
+ "ै": 14,
17
+ "है": 15,
18
+ " द": 16,
19
+ "त ": 17,
20
+ " भा": 18,
21
+ "ं ": 19,
22
+ "क ": 20,
23
+ "ा ": 21,
24
+ " ए": 22,
25
+ "ल": 23,
26
+ "ै ": 24,
27
+ "त": 25,
28
+ "न": 26,
29
+ " भ": 27,
30
+ "्": 28,
31
+ "स": 29,
32
+ "म": 30,
33
+ "है ": 31,
34
+ "ष": 32,
35
+ "ए": 33,
36
+ "रत ": 34,
37
+ "ारत": 35,
38
+ " म": 36,
39
+ "मे": 37,
40
+ "ें": 38,
41
+ "षा": 39,
42
+ " स": 40,
43
+ "भार": 41,
44
+ "एक": 42,
45
+ "ली ": 43,
46
+ "ाष": 44,
47
+ "र ": 45,
48
+ "ली": 46,
49
+ "े ": 47,
50
+ " मे": 48,
51
+ "में": 49,
52
+ "ें ": 50,
53
+ "श": 51,
54
+ "एक ": 52,
55
+ "ज": 53,
56
+ "ाषा": 54,
57
+ "रत": 55,
58
+ "ार": 56,
59
+ " एक": 57,
60
+ "ी भ": 58,
61
+ "ं स": 59,
62
+ "भाष": 60,
63
+ "ा म": 61,
64
+ "से": 62,
65
+ "और": 63,
66
+ " औ": 64,
67
+ "ाज": 65,
68
+ "ध": 66,
69
+ "ान": 67,
70
+ "या ": 68,
71
+ "से ": 69,
72
+ "िया": 70,
73
+ "और ": 71,
74
+ " और": 72,
75
+ "ै औ": 73,
76
+ "षा ": 74,
77
+ "त क": 75,
78
+ " की": 76,
79
+ "की ": 77,
80
+ "ी र": 78,
81
+ " रा": 79,
82
+ "राज": 80,
83
+ "िय": 81,
84
+ "ब": 82,
85
+ "ु": 83,
86
+ "या": 84,
87
+ "औ": 85,
88
+ "य": 86,
89
+ " क": 87,
90
+ "की": 88,
91
+ " र": 89,
92
+ "रा": 90,
93
+ "क प": 91,
94
+ "र ए": 92,
95
+ "ा भ": 93,
96
+ "ी ह": 94,
97
+ "नी ": 95,
98
+ "ानी": 96,
99
+ "धान": 97,
100
+ "श ह": 98,
101
+ "जधा": 99,
102
+ "ाजभ": 100,
103
+ "जभा": 101,
104
+ "ा ह": 102,
105
+ "ाजध": 103,
106
+ "्ली": 104,
107
+ "ल्ल": 105,
108
+ " दि": 106,
109
+ "र ह": 107,
110
+ "ै ह": 108,
111
+ "हर ": 109,
112
+ " हि": 110,
113
+ "हिं": 111,
114
+ "िंद": 112,
115
+ "ंदी": 113,
116
+ "दी ": 114,
117
+ "शहर": 115,
118
+ " शह": 116,
119
+ "ख श": 117,
120
+ "ुख ": 118,
121
+ "मुख": 119,
122
+ "रमु": 120,
123
+ "्रम": 121,
124
+ "प्र": 122,
125
+ " प्": 123,
126
+ " जा": 124,
127
+ "जान": 125,
128
+ "ई द": 126,
129
+ "ाने": 127,
130
+ "ने ": 128,
131
+ "े व": 129,
132
+ " वा": 130,
133
+ "वाल": 131,
134
+ "ाली": 132,
135
+ "षाओ": 133,
136
+ "नई ": 134,
137
+ " नई": 135,
138
+ "ाओं": 136,
139
+ "ओं ": 137,
140
+ "ं म": 138,
141
+ " से": 139,
142
+ "े ए": 140,
143
+ "ै न": 141,
144
+ "क ह": 142,
145
+ "र द": 143,
146
+ " दु": 144,
147
+ "दुन": 145,
148
+ "ुनि": 146,
149
+ "निय": 147,
150
+ " सब": 148,
151
+ "सबस": 149,
152
+ "बसे": 150,
153
+ "िल्": 151,
154
+ "े अ": 152,
155
+ " अध": 153,
156
+ "अधि": 154,
157
+ "धिक": 155,
158
+ "िक ": 156,
159
+ "क ब": 157,
160
+ " बो": 158,
161
+ "बोल": 159,
162
+ "ोली": 160,
163
+ "दिल": 161,
164
+ "ी ज": 162,
165
+ "ण": 163,
166
+ "श ": 164,
167
+ "हि": 165,
168
+ "िं": 166,
169
+ "ंद": 167,
170
+ "दी": 168,
171
+ "जभ": 169,
172
+ "दु": 170,
173
+ "ुन": 171,
174
+ "नि": 172,
175
+ "सब": 173,
176
+ "बस": 174,
177
+ " अ": 175,
178
+ "अध": 176,
179
+ "धि": 177,
180
+ "िक": 178,
181
+ " ब": 179,
182
+ "बो": 180,
183
+ "ोल": 181,
184
+ " ज": 182,
185
+ "जा": 183,
186
+ "ने": 184,
187
+ " व": 185,
188
+ "थ": 186,
189
+ "अ": 187,
190
+ "ो": 188,
191
+ "व": 189,
192
+ "ओ": 190,
193
+ "ई": 191,
194
+ "प": 192,
195
+ "ख": 193,
196
+ "दक": 194,
197
+ "क्": 195,
198
+ "्ष": 196,
199
+ "षि": 197,
200
+ "िण": 198,
201
+ "ण ": 199,
202
+ "एश": 200,
203
+ "शि": 201,
204
+ "स्": 202,
205
+ "्थ": 203,
206
+ "थि": 204,
207
+ "ित": 205,
208
+ "दे": 206,
209
+ "ेश": 207,
210
+ "वा": 208,
211
+ "हर": 209,
212
+ "त द": 210,
213
+ " दक": 211,
214
+ "दक्": 212,
215
+ "क्ष": 213,
216
+ "्षि": 214,
217
+ "षिण": 215,
218
+ "िण ": 216,
219
+ "ण ए": 217,
220
+ " एश": 218,
221
+ "एशि": 219,
222
+ "शिय": 220,
223
+ " स्": 221,
224
+ "स्थ": 222,
225
+ "्थि": 223,
226
+ "थित": 224,
227
+ "ित ": 225,
228
+ "त ए": 226,
229
+ "क द": 227,
230
+ " दे": 228,
231
+ "देश": 229,
232
+ "ेश ": 230,
233
+ "ाल": 231,
234
+ "ाओ": 232,
235
+ "ओं": 233,
236
+ " न": 234,
237
+ "नई": 235,
238
+ "ई ": 236,
239
+ "दि": 237,
240
+ "िल": 238,
241
+ "ल्": 239,
242
+ "्ल": 240,
243
+ "जध": 241,
244
+ "धा": 242,
245
+ "नी": 243,
246
+ " प": 244,
247
+ "प्": 245,
248
+ "्र": 246,
249
+ "रम": 247,
250
+ "मु": 248,
251
+ "ुख": 249,
252
+ "ख ": 250,
253
+ " श": 251,
254
+ "शह": 252
255
+ }
@@ -0,0 +1,302 @@
1
+ {
2
+ "a": 0,
3
+ "i": 1,
4
+ "j": 2,
5
+ "e": 3,
6
+ "o": 4,
7
+ "r": 5,
8
+ "t": 6,
9
+ "s": 7,
10
+ "v": 8,
11
+ "k": 9,
12
+ "n": 10,
13
+ "i ": 11,
14
+ " j": 12,
15
+ "je": 13,
16
+ "u": 14,
17
+ " je": 15,
18
+ "d": 16,
19
+ "l": 17,
20
+ "va": 18,
21
+ "at": 19,
22
+ "sk": 20,
23
+ "a ": 21,
24
+ "e ": 22,
25
+ "oj": 23,
26
+ "h": 24,
27
+ "g": 25,
28
+ "z": 26,
29
+ "j ": 27,
30
+ "ats": 28,
31
+ "hrv": 29,
32
+ "p": 30,
33
+ "rva": 31,
34
+ "vat": 32,
35
+ "ts": 33,
36
+ "tsk": 34,
37
+ "oj ": 35,
38
+ "je ": 36,
39
+ "hr": 37,
40
+ "rv": 38,
41
+ "st": 39,
42
+ " g": 40,
43
+ "ka ": 41,
44
+ "ur": 42,
45
+ " hr": 43,
46
+ " h": 44,
47
+ "ki": 45,
48
+ "gr": 46,
49
+ "d ": 47,
50
+ "ti": 48,
51
+ "po": 49,
52
+ "b": 50,
53
+ "av": 51,
54
+ "m": 52,
55
+ "ka": 53,
56
+ "na": 54,
57
+ " u ": 55,
58
+ "tur": 56,
59
+ "ij": 57,
60
+ " z": 58,
61
+ "noj": 59,
62
+ "ad ": 60,
63
+ " p": 61,
64
+ " i ": 62,
65
+ "ve": 63,
66
+ "č": 64,
67
+ "rad": 65,
68
+ "gra": 66,
69
+ "li": 67,
70
+ "ska": 68,
71
+ " gr": 69,
72
+ "i g": 70,
73
+ " i": 71,
74
+ "ni ": 72,
75
+ "ist": 73,
76
+ "a j": 74,
77
+ "ni": 75,
78
+ "ra": 76,
79
+ "ad": 77,
80
+ "lav": 78,
81
+ " d": 79,
82
+ "in": 80,
83
+ " u": 81,
84
+ "u ": 82,
85
+ " po": 83,
86
+ "is": 84,
87
+ "zik": 85,
88
+ "ezi": 86,
89
+ "jez": 87,
90
+ "no": 88,
91
+ "tu": 89,
92
+ "sti": 90,
93
+ "ski": 91,
94
+ "ez": 92,
95
+ "zi": 93,
96
+ "ik": 94,
97
+ "la": 95,
98
+ "i h": 96,
99
+ "tin": 97,
100
+ " o": 98,
101
+ " s": 99,
102
+ "ba": 100,
103
+ " ze": 101,
104
+ "u z": 102,
105
+ "i n": 103,
106
+ " na": 104,
107
+ "d u": 105,
108
+ "ći ": 106,
109
+ "naj": 107,
110
+ "eći": 108,
111
+ "već": 109,
112
+ "jve": 110,
113
+ "ajv": 111,
114
+ "ž": 112,
115
+ " sl": 113,
116
+ "sla": 114,
117
+ "ave": 115,
118
+ "ven": 116,
119
+ "ens": 117,
120
+ "nsk": 118,
121
+ "kih": 119,
122
+ "ih ": 120,
123
+ "h j": 121,
124
+ "ika": 122,
125
+ "a z": 123,
126
+ " za": 124,
127
+ "zag": 125,
128
+ "agr": 126,
129
+ "gre": 127,
130
+ "reb": 128,
131
+ "eb ": 129,
132
+ "b j": 130,
133
+ "e g": 131,
134
+ " gl": 132,
135
+ "gla": 133,
136
+ "avn": 134,
137
+ "vni": 135,
138
+ "d h": 136,
139
+ "ske": 137,
140
+ "ke ": 138,
141
+ "e i": 139,
142
+ "uri": 140,
143
+ " tu": 141,
144
+ "i t": 142,
145
+ "i i": 143,
146
+ "ini": 144,
147
+ "šti": 145,
148
+ "ašt": 146,
149
+ "baš": 147,
150
+ " ba": 148,
151
+ "j b": 149,
152
+ "rno": 150,
153
+ "urn": 151,
154
+ "ltu": 152,
155
+ "ult": 153,
156
+ "kul": 154,
157
+ " ku": 155,
158
+ "svo": 156,
159
+ "ama": 157,
160
+ "jam": 158,
161
+ "ija": 159,
162
+ "cij": 160,
163
+ "aci": 161,
164
+ "nac": 162,
165
+ "ina": 163,
166
+ "est": 164,
167
+ "des": 165,
168
+ " de": 166,
169
+ "m d": 167,
170
+ "im ": 168,
171
+ "kim": 169,
172
+ "čki": 170,
173
+ "ičk": 171,
174
+ "tič": 172,
175
+ "ris": 173,
176
+ " sv": 174,
177
+ "o s": 175,
178
+ "po ": 176,
179
+ "a p": 177,
180
+ "ta ": 178,
181
+ "ata": 179,
182
+ "nat": 180,
183
+ "zna": 181,
184
+ "ozn": 182,
185
+ "poz": 183,
186
+ "e p": 184,
187
+ "ji ": 185,
188
+ "lji": 186,
189
+ "mlj": 187,
190
+ "eml": 188,
191
+ "zem": 189,
192
+ "i k": 190,
193
+ "li ": 191,
194
+ "ali": 192,
195
+ "bal": 193,
196
+ "oba": 194,
197
+ " ob": 195,
198
+ "j o": 196,
199
+ "poj": 197,
200
+ "epo": 198,
201
+ "jep": 199,
202
+ "ije": 200,
203
+ "lij": 201,
204
+ " li": 202,
205
+ "j l": 203,
206
+ "joj": 204,
207
+ "ojo": 205,
208
+ "voj": 206,
209
+ "za": 207,
210
+ "ag": 208,
211
+ "re": 209,
212
+ "eb": 210,
213
+ "b ": 211,
214
+ "gl": 212,
215
+ "vn": 213,
216
+ "ke": 214,
217
+ " n": 215,
218
+ "aj": 216,
219
+ "jv": 217,
220
+ "eć": 218,
221
+ "ći": 219,
222
+ "ze": 220,
223
+ "em": 221,
224
+ "ml": 222,
225
+ "lj": 223,
226
+ "ji": 224,
227
+ "oz": 225,
228
+ "zn": 226,
229
+ "ta": 227,
230
+ "o ": 228,
231
+ "sv": 229,
232
+ "vo": 230,
233
+ "jo": 231,
234
+ " l": 232,
235
+ "ep": 233,
236
+ "ob": 234,
237
+ "al": 235,
238
+ "ć": 236,
239
+ "š": 237,
240
+ "c": 238,
241
+ "dr": 239,
242
+ "rž": 240,
243
+ "ža": 241,
244
+ "ju": 242,
245
+ "ug": 243,
246
+ "go": 244,
247
+ "oi": 245,
248
+ "to": 246,
249
+ "oč": 247,
250
+ "čn": 248,
251
+ " e": 249,
252
+ "eu": 250,
253
+ "ro": 251,
254
+ "op": 252,
255
+ "pi": 253,
256
+ "k ": 254,
257
+ "ed": 255,
258
+ "da": 256,
259
+ "an": 257,
260
+ "n ": 258,
261
+ "od": 259,
262
+ "sl": 260,
263
+ "en": 261,
264
+ "ns": 262,
265
+ "ih": 263,
266
+ "h ": 264,
267
+ " k": 265,
268
+ " ju": 266,
269
+ "jug": 267,
270
+ "ugo": 268,
271
+ "goi": 269,
272
+ "ois": 270,
273
+ "sto": 271,
274
+ "toč": 272,
275
+ "očn": 273,
276
+ "čno": 274,
277
+ "j e": 275,
278
+ " eu": 276,
279
+ "eur": 277,
280
+ "uro": 278,
281
+ "rop": 279,
282
+ "opi": 280,
283
+ "pi ": 281,
284
+ "ki ": 282,
285
+ "i j": 283,
286
+ "ik ": 284,
287
+ "k j": 285,
288
+ "e j": 286,
289
+ "jed": 287,
290
+ "eda": 288,
291
+ "dan": 289,
292
+ "an ": 290,
293
+ "n o": 291,
294
+ " od": 292,
295
+ "od ": 293,
296
+ "d s": 294,
297
+ "ku": 295,
298
+ "ul": 296,
299
+ "lt": 297,
300
+ "rn": 298,
301
+ " b": 299
302
+ }