langdetect-ruby 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +24 -13
  3. data/langdetect-ruby.gemspec +1 -1
  4. data/lib/lingua_ruby/configuration.rb +4 -1
  5. data/lib/lingua_ruby/detector.rb +59 -1
  6. data/lib/lingua_ruby/profile_loader.rb +26 -6
  7. data/lib/lingua_ruby/profiles/am.json +193 -0
  8. data/lib/lingua_ruby/profiles/bg.json +290 -0
  9. data/lib/lingua_ruby/profiles/bn.json +211 -0
  10. data/lib/lingua_ruby/profiles/cs.json +302 -0
  11. data/lib/lingua_ruby/profiles/da.json +302 -0
  12. data/lib/lingua_ruby/profiles/de.json +302 -0
  13. data/lib/lingua_ruby/profiles/el.json +302 -0
  14. data/lib/lingua_ruby/profiles/es.json +302 -0
  15. data/lib/lingua_ruby/profiles/et.json +289 -0
  16. data/lib/lingua_ruby/profiles/fa.json +234 -0
  17. data/lib/lingua_ruby/profiles/fi.json +284 -0
  18. data/lib/lingua_ruby/profiles/fr.json +302 -0
  19. data/lib/lingua_ruby/profiles/ha.json +302 -0
  20. data/lib/lingua_ruby/profiles/hi.json +255 -0
  21. data/lib/lingua_ruby/profiles/hr.json +302 -0
  22. data/lib/lingua_ruby/profiles/hu.json +302 -0
  23. data/lib/lingua_ruby/profiles/it.json +302 -0
  24. data/lib/lingua_ruby/profiles/lt.json +294 -0
  25. data/lib/lingua_ruby/profiles/lv.json +302 -0
  26. data/lib/lingua_ruby/profiles/my.json +200 -0
  27. data/lib/lingua_ruby/profiles/no.json +297 -0
  28. data/lib/lingua_ruby/profiles/pl.json +302 -0
  29. data/lib/lingua_ruby/profiles/pt.json +302 -0
  30. data/lib/lingua_ruby/profiles/ro.json +302 -0
  31. data/lib/lingua_ruby/profiles/ru.json +297 -0
  32. data/lib/lingua_ruby/profiles/sk.json +302 -0
  33. data/lib/lingua_ruby/profiles/sv.json +302 -0
  34. data/lib/lingua_ruby/profiles/sw.json +268 -0
  35. data/lib/lingua_ruby/profiles/ta.json +235 -0
  36. data/lib/lingua_ruby/profiles/te.json +254 -0
  37. data/lib/lingua_ruby/profiles/th.json +251 -0
  38. data/lib/lingua_ruby/profiles/tl.json +302 -0
  39. data/lib/lingua_ruby/profiles/tr.json +302 -0
  40. data/lib/lingua_ruby/profiles/uk.json +302 -0
  41. data/lib/lingua_ruby/profiles/ur.json +232 -0
  42. data/lib/lingua_ruby/profiles/vi.json +277 -0
  43. data/lib/lingua_ruby/profiles/yo.json +245 -0
  44. data/lib/lingua_ruby/profiles/zu.json +302 -0
  45. data/lib/lingua_ruby/result.rb +13 -26
  46. data/lib/lingua_ruby/version.rb +1 -1
  47. data/lib/lingua_ruby.rb +4 -0
  48. metadata +41 -2
@@ -0,0 +1,302 @@
1
+ {
2
+ "e": 0,
3
+ "s": 1,
4
+ "a": 2,
5
+ "o": 3,
6
+ "v": 4,
7
+ "n": 5,
8
+ "k": 6,
9
+ "j": 7,
10
+ "t": 8,
11
+ "r": 9,
12
+ "i": 10,
13
+ "l": 11,
14
+ "ov": 12,
15
+ "h": 13,
16
+ "e ": 14,
17
+ "sl": 15,
18
+ " j": 16,
19
+ "u": 17,
20
+ " s": 18,
21
+ "a ": 19,
22
+ "st": 20,
23
+ "je": 21,
24
+ "nsk": 22,
25
+ "slo": 23,
26
+ "o ": 24,
27
+ "sk": 25,
28
+ "ns": 26,
29
+ "lo": 27,
30
+ "lov": 28,
31
+ "m": 29,
32
+ "to": 30,
33
+ " sl": 31,
34
+ "ove": 32,
35
+ "ven": 33,
36
+ "ve": 34,
37
+ "en": 35,
38
+ "ens": 36,
39
+ " m": 37,
40
+ "ý": 38,
41
+ "je ": 39,
42
+ " k": 40,
43
+ " je": 41,
44
+ "ri": 42,
45
+ "v ": 43,
46
+ "aj": 44,
47
+ "ra": 45,
48
+ "z": 46,
49
+ "is": 47,
50
+ "ko": 48,
51
+ "sto": 49,
52
+ "es": 50,
53
+ " a": 51,
54
+ " me": 52,
55
+ "mes": 53,
56
+ "est": 54,
57
+ "d": 55,
58
+ "me": 56,
59
+ "e s": 57,
60
+ "ský": 58,
61
+ " h": 59,
62
+ "av": 60,
63
+ "la": 61,
64
+ "at": 62,
65
+ " b": 63,
66
+ "h ": 64,
67
+ "ó": 65,
68
+ "p": 66,
69
+ "sko": 67,
70
+ "e m": 68,
71
+ "ko ": 69,
72
+ " kr": 70,
73
+ "kra": 71,
74
+ "raj": 72,
75
+ "aji": 73,
76
+ "jin": 74,
77
+ " v ": 75,
78
+ "u ": 76,
79
+ " a ": 77,
80
+ "ist": 78,
81
+ "tú": 79,
82
+ "to ": 80,
83
+ "oh": 81,
84
+ "edn": 82,
85
+ "á ": 83,
86
+ "ch ": 84,
87
+ "ja": 85,
88
+ "b": 86,
89
+ "c": 87,
90
+ "kr": 88,
91
+ "ký": 89,
92
+ "ji": 90,
93
+ "in": 91,
94
+ "ur": 92,
95
+ "na": 93,
96
+ "y": 94,
97
+ " v": 95,
98
+ "o s": 96,
99
+ "ne": 97,
100
+ "dn": 98,
101
+ "ed": 99,
102
+ "ch": 100,
103
+ "ýc": 101,
104
+ "ú": 102,
105
+ " ja": 103,
106
+ "va": 104,
107
+ "jaz": 105,
108
+ "azy": 106,
109
+ "zyk": 107,
110
+ "á": 108,
111
+ "ých": 109,
112
+ "lav": 110,
113
+ "yk": 111,
114
+ "zy": 112,
115
+ "az": 113,
116
+ "ajv": 114,
117
+ "yko": 115,
118
+ "jvä": 116,
119
+ "é": 117,
120
+ "ov ": 118,
121
+ "naj": 119,
122
+ " na": 120,
123
+ "kov": 121,
124
+ "a n": 122,
125
+ "a j": 123,
126
+ "e h": 124,
127
+ "va ": 125,
128
+ "ava": 126,
129
+ " hl": 127,
130
+ "hla": 128,
131
+ "avn": 129,
132
+ "vné": 130,
133
+ "né ": 131,
134
+ "é m": 132,
135
+ "sla": 133,
136
+ "isl": 134,
137
+ "tis": 135,
138
+ "ati": 136,
139
+ "rat": 137,
140
+ "bra": 138,
141
+ " br": 139,
142
+ "ska": 140,
143
+ "ka ": 141,
144
+ "a a": 142,
145
+ "v b": 143,
146
+ "ult": 144,
147
+ "ltú": 145,
148
+ "túr": 146,
149
+ "úru": 147,
150
+ "ru ": 148,
151
+ "u k": 149,
152
+ " kt": 150,
153
+ "kto": 151,
154
+ "tor": 152,
155
+ "orá": 153,
156
+ "rá ": 154,
157
+ "á p": 155,
158
+ " pr": 156,
159
+ "pri": 157,
160
+ "riť": 158,
161
+ "iťa": 159,
162
+ "ťah": 160,
163
+ "ahu": 161,
164
+ "huj": 162,
165
+ "uje": 163,
166
+ " mn": 164,
167
+ "mno": 165,
168
+ "noh": 166,
169
+ "ohý": 167,
170
+ "hýc": 168,
171
+ "h t": 169,
172
+ " tu": 170,
173
+ "tur": 171,
174
+ "uri": 172,
175
+ "ris": 173,
176
+ "tov": 174,
177
+ "väč": 175,
178
+ "äčš": 176,
179
+ "čši": 177,
180
+ "šie": 178,
181
+ "ie ": 179,
182
+ "o v": 180,
183
+ "v k": 181,
184
+ "ine": 182,
185
+ "ne ": 183,
186
+ "o m": 184,
187
+ " má": 185,
188
+ "má ": 186,
189
+ "á b": 187,
190
+ " bo": 188,
191
+ "boh": 189,
192
+ "oha": 190,
193
+ "hat": 191,
194
+ "atú": 192,
195
+ "tú ": 193,
196
+ "ú h": 194,
197
+ " hi": 195,
198
+ "his": 196,
199
+ "stó": 197,
200
+ "tór": 198,
201
+ "óri": 199,
202
+ "riu": 200,
203
+ "iu ": 201,
204
+ "u a": 202,
205
+ "a k": 203,
206
+ " ku": 204,
207
+ "kul": 205,
208
+ "né": 206,
209
+ "é ": 207,
210
+ "ka": 208,
211
+ " n": 209,
212
+ "jv": 210,
213
+ "vä": 211,
214
+ "äč": 212,
215
+ "čš": 213,
216
+ "ši": 214,
217
+ "ie": 215,
218
+ "má": 216,
219
+ "bo": 217,
220
+ "ha": 218,
221
+ "ú ": 219,
222
+ "hi": 220,
223
+ "tó": 221,
224
+ "ór": 222,
225
+ "iu": 223,
226
+ "ku": 224,
227
+ "ul": 225,
228
+ "lt": 226,
229
+ "úr": 227,
230
+ "ru": 228,
231
+ "kt": 229,
232
+ "or": 230,
233
+ "ä": 231,
234
+ "č": 232,
235
+ "š": 233,
236
+ "ť": 234,
237
+ "tr": 235,
238
+ "re": 236,
239
+ "ej": 237,
240
+ "j ": 238,
241
+ " e": 239,
242
+ "eu": 240,
243
+ "ró": 241,
244
+ "óp": 242,
245
+ "pe": 243,
246
+ "ý ": 244,
247
+ "k ": 245,
248
+ "ný": 246,
249
+ "ým": 247,
250
+ "m ": 248,
251
+ " z": 249,
252
+ "zo": 250,
253
+ "an": 251,
254
+ "br": 252,
255
+ "ti": 253,
256
+ "hl": 254,
257
+ "vn": 255,
258
+ "ej ": 256,
259
+ "j e": 257,
260
+ " eu": 258,
261
+ "eur": 259,
262
+ "uró": 260,
263
+ "róp": 261,
264
+ "ópe": 262,
265
+ "pe ": 263,
266
+ "ký ": 264,
267
+ "ý j": 265,
268
+ "yk ": 266,
269
+ "k j": 267,
270
+ "e j": 268,
271
+ "jed": 269,
272
+ "dný": 270,
273
+ "ným": 271,
274
+ "ým ": 272,
275
+ "m z": 273,
276
+ " zo": 274,
277
+ "zo ": 275,
278
+ "ova": 276,
279
+ "van": 277,
280
+ "ans": 278,
281
+ "kýc": 279,
282
+ "h j": 280,
283
+ "rá": 281,
284
+ " p": 282,
285
+ "pr": 283,
286
+ "iť": 284,
287
+ "ťa": 285,
288
+ "ah": 286,
289
+ "hu": 287,
290
+ "uj": 288,
291
+ "mn": 289,
292
+ "no": 290,
293
+ "hý": 291,
294
+ " t": 292,
295
+ "tu": 293,
296
+ "o j": 294,
297
+ "e k": 295,
298
+ "ina": 296,
299
+ "na ": 297,
300
+ "a v": 298,
301
+ "v s": 299
302
+ }
@@ -0,0 +1,302 @@
1
+ {
2
+ "r": 0,
3
+ "s": 1,
4
+ "a": 2,
5
+ "i": 3,
6
+ "t": 4,
7
+ "n": 5,
8
+ "e": 6,
9
+ "o": 7,
10
+ "k": 8,
11
+ "r ": 9,
12
+ " s": 10,
13
+ "v": 11,
14
+ "ä": 12,
15
+ "a ": 13,
16
+ "u": 14,
17
+ "c": 15,
18
+ "h": 16,
19
+ "t ": 17,
20
+ "er": 18,
21
+ "oc": 19,
22
+ "l": 20,
23
+ "ur": 21,
24
+ "sve": 22,
25
+ " är": 23,
26
+ "är ": 24,
27
+ " oc": 25,
28
+ "in": 26,
29
+ "d": 27,
30
+ "r s": 28,
31
+ "är": 29,
32
+ " ä": 30,
33
+ "ri": 31,
34
+ " o": 32,
35
+ "sv": 33,
36
+ "ve": 34,
37
+ "et": 35,
38
+ "ur ": 36,
39
+ "st": 37,
40
+ "ck": 38,
41
+ "och": 39,
42
+ "ch ": 40,
43
+ "tu": 41,
44
+ "än": 42,
45
+ " si": 43,
46
+ "sin": 44,
47
+ "r o": 45,
48
+ "ch": 46,
49
+ "si": 47,
50
+ " k": 48,
51
+ "tur": 49,
52
+ "h ": 50,
53
+ " sv": 51,
54
+ "ge": 52,
55
+ "ig": 53,
56
+ "m": 54,
57
+ "ige": 55,
58
+ "ver": 56,
59
+ "eri": 57,
60
+ "rig": 58,
61
+ "or": 59,
62
+ "g": 60,
63
+ "to": 61,
64
+ "ner": 62,
65
+ "a s": 63,
66
+ " h": 64,
67
+ "ka ": 65,
68
+ "io": 66,
69
+ "one": 67,
70
+ "tio": 68,
71
+ "et ": 69,
72
+ "kä": 70,
73
+ "nt": 71,
74
+ " f": 72,
75
+ "fö": 73,
76
+ "ör": 74,
77
+ "at": 75,
78
+ "na": 76,
79
+ "ge ": 77,
80
+ "r k": 78,
81
+ " kä": 79,
82
+ "kän": 80,
83
+ "änt": 81,
84
+ "nt ": 82,
85
+ "t f": 83,
86
+ "e ä": 84,
87
+ " fö": 85,
88
+ "för": 86,
89
+ "ör ": 87,
90
+ "ra ": 88,
91
+ "ock": 89,
92
+ "in ": 90,
93
+ "va": 91,
94
+ "n ": 92,
95
+ "sto": 93,
96
+ "ti": 94,
97
+ "rk": 95,
98
+ " a": 96,
99
+ "s ": 97,
100
+ "ta": 98,
101
+ " t": 99,
102
+ "ka": 100,
103
+ "sk": 101,
104
+ "ra": 102,
105
+ "no": 103,
106
+ " n": 104,
107
+ " i": 105,
108
+ "d ": 106,
109
+ "la": 107,
110
+ " e": 108,
111
+ "e ": 109,
112
+ "ö": 110,
113
+ "f": 111,
114
+ "å": 112,
115
+ "p": 113,
116
+ "on": 114,
117
+ "ne": 115,
118
+ "nn": 116,
119
+ "is": 117,
120
+ " m": 118,
121
+ "ult": 119,
122
+ "kor": 120,
123
+ "kul": 121,
124
+ "or ": 122,
125
+ "ati": 123,
126
+ " st": 124,
127
+ "vat": 125,
128
+ "toc": 126,
129
+ "ktu": 127,
130
+ "ckh": 128,
131
+ "kho": 129,
132
+ "hol": 130,
133
+ "uvu": 131,
134
+ "olm": 132,
135
+ "ova": 133,
136
+ "lm ": 134,
137
+ "m ä": 135,
138
+ "ges": 136,
139
+ "es ": 137,
140
+ "s h": 138,
141
+ "huv": 139,
142
+ " hu": 140,
143
+ "il": 141,
144
+ "rka": 142,
145
+ "a t": 143,
146
+ " ti": 144,
147
+ "io ": 145,
148
+ "o m": 146,
149
+ " mi": 147,
150
+ "mil": 148,
151
+ "ilj": 149,
152
+ "ljo": 150,
153
+ "jon": 151,
154
+ "ion": 152,
155
+ "er ": 153,
156
+ "r m": 154,
157
+ " mä": 155,
158
+ "män": 156,
159
+ "änn": 157,
160
+ " ku": 158,
161
+ "nni": 159,
162
+ "nis": 160,
163
+ "isk": 161,
164
+ "sko": 162,
165
+ "ark": 163,
166
+ "n n": 164,
167
+ "å k": 165,
168
+ "så ": 166,
169
+ "kså": 167,
170
+ "cks": 168,
171
+ " ar": 169,
172
+ "a a": 170,
173
+ "ia ": 171,
174
+ "ria": 172,
175
+ "n v": 173,
176
+ " va": 174,
177
+ "kra": 175,
178
+ "vac": 176,
179
+ "ckr": 177,
180
+ "ori": 178,
181
+ "tor": 179,
182
+ "ist": 180,
183
+ "his": 181,
184
+ " hi": 182,
185
+ "ack": 183,
186
+ "ltu": 184,
187
+ "h h": 185,
188
+ "vud": 186,
189
+ "uds": 187,
190
+ "dst": 188,
191
+ "sta": 189,
192
+ "tad": 190,
193
+ "ad ": 191,
194
+ "d o": 192,
195
+ "nov": 193,
196
+ "ekt": 194,
197
+ "tek": 195,
198
+ "nno": 196,
199
+ "ite": 197,
200
+ "kit": 198,
201
+ "inn": 199,
202
+ "rki": 200,
203
+ " in": 201,
204
+ "a i": 202,
205
+ "na ": 203,
206
+ "ina": 204,
207
+ "h s": 205,
208
+ "atu": 206,
209
+ "h ä": 207,
210
+ "nat": 208,
211
+ " na": 209,
212
+ "lj": 210,
213
+ "jo": 211,
214
+ "mä": 212,
215
+ "ni": 213,
216
+ "ko": 214,
217
+ "kh": 215,
218
+ "ho": 216,
219
+ "ol": 217,
220
+ "lm": 218,
221
+ "m ": 219,
222
+ "es": 220,
223
+ "hu": 221,
224
+ "uv": 222,
225
+ "vu": 223,
226
+ "ud": 224,
227
+ "ds": 225,
228
+ "ad": 226,
229
+ " v": 227,
230
+ "ac": 228,
231
+ "kr": 229,
232
+ "ar": 230,
233
+ "ki": 231,
234
+ "it": 232,
235
+ "te": 233,
236
+ "ek": 234,
237
+ "kt": 235,
238
+ "ku": 236,
239
+ "opa": 237,
240
+ "j": 238,
241
+ "tt": 239,
242
+ " l": 240,
243
+ "an": 241,
244
+ "nd": 242,
245
+ "i ": 243,
246
+ "rr": 244,
247
+ "eu": 245,
248
+ "ro": 246,
249
+ "op": 247,
250
+ "pa": 248,
251
+ " d": 249,
252
+ "de": 250,
253
+ "en": 251,
254
+ "ns": 252,
255
+ "sp": 253,
256
+ "pr": 254,
257
+ "rå": 255,
258
+ "åk": 256,
259
+ "ke": 257,
260
+ "al": 258,
261
+ "as": 259,
262
+ "av": 260,
263
+ "v ": 261,
264
+ " c": 262,
265
+ "ci": 263,
266
+ "ir": 264,
267
+ "o ": 265,
268
+ "mi": 266,
269
+ "pa ": 267,
270
+ "a d": 268,
271
+ " de": 269,
272
+ "det": 270,
273
+ "t s": 271,
274
+ "ven": 272,
275
+ "ens": 273,
276
+ "nsk": 274,
277
+ "ska": 275,
278
+ " sp": 276,
279
+ "spr": 277,
280
+ "prå": 278,
281
+ "råk": 279,
282
+ "åke": 280,
283
+ "ket": 281,
284
+ "t t": 282,
285
+ " ta": 283,
286
+ "tal": 284,
287
+ "ala": 285,
288
+ "las": 286,
289
+ "as ": 287,
290
+ "s a": 288,
291
+ " av": 289,
292
+ "av ": 290,
293
+ "v c": 291,
294
+ " ci": 292,
295
+ "cir": 293,
296
+ "irk": 294,
297
+ "ul": 295,
298
+ "lt": 296,
299
+ "hi": 297,
300
+ "ia": 298,
301
+ "ks": 299
302
+ }