langdetect-ruby 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +24 -13
  3. data/langdetect-ruby.gemspec +1 -1
  4. data/lib/lingua_ruby/configuration.rb +4 -1
  5. data/lib/lingua_ruby/detector.rb +59 -1
  6. data/lib/lingua_ruby/profile_loader.rb +26 -6
  7. data/lib/lingua_ruby/profiles/am.json +193 -0
  8. data/lib/lingua_ruby/profiles/bg.json +290 -0
  9. data/lib/lingua_ruby/profiles/bn.json +211 -0
  10. data/lib/lingua_ruby/profiles/cs.json +302 -0
  11. data/lib/lingua_ruby/profiles/da.json +302 -0
  12. data/lib/lingua_ruby/profiles/de.json +302 -0
  13. data/lib/lingua_ruby/profiles/el.json +302 -0
  14. data/lib/lingua_ruby/profiles/es.json +302 -0
  15. data/lib/lingua_ruby/profiles/et.json +289 -0
  16. data/lib/lingua_ruby/profiles/fa.json +234 -0
  17. data/lib/lingua_ruby/profiles/fi.json +284 -0
  18. data/lib/lingua_ruby/profiles/fr.json +302 -0
  19. data/lib/lingua_ruby/profiles/ha.json +302 -0
  20. data/lib/lingua_ruby/profiles/hi.json +255 -0
  21. data/lib/lingua_ruby/profiles/hr.json +302 -0
  22. data/lib/lingua_ruby/profiles/hu.json +302 -0
  23. data/lib/lingua_ruby/profiles/it.json +302 -0
  24. data/lib/lingua_ruby/profiles/lt.json +294 -0
  25. data/lib/lingua_ruby/profiles/lv.json +302 -0
  26. data/lib/lingua_ruby/profiles/my.json +200 -0
  27. data/lib/lingua_ruby/profiles/no.json +297 -0
  28. data/lib/lingua_ruby/profiles/pl.json +302 -0
  29. data/lib/lingua_ruby/profiles/pt.json +302 -0
  30. data/lib/lingua_ruby/profiles/ro.json +302 -0
  31. data/lib/lingua_ruby/profiles/ru.json +297 -0
  32. data/lib/lingua_ruby/profiles/sk.json +302 -0
  33. data/lib/lingua_ruby/profiles/sv.json +302 -0
  34. data/lib/lingua_ruby/profiles/sw.json +268 -0
  35. data/lib/lingua_ruby/profiles/ta.json +235 -0
  36. data/lib/lingua_ruby/profiles/te.json +254 -0
  37. data/lib/lingua_ruby/profiles/th.json +251 -0
  38. data/lib/lingua_ruby/profiles/tl.json +302 -0
  39. data/lib/lingua_ruby/profiles/tr.json +302 -0
  40. data/lib/lingua_ruby/profiles/uk.json +302 -0
  41. data/lib/lingua_ruby/profiles/ur.json +232 -0
  42. data/lib/lingua_ruby/profiles/vi.json +277 -0
  43. data/lib/lingua_ruby/profiles/yo.json +245 -0
  44. data/lib/lingua_ruby/profiles/zu.json +302 -0
  45. data/lib/lingua_ruby/result.rb +13 -26
  46. data/lib/lingua_ruby/version.rb +1 -1
  47. data/lib/lingua_ruby.rb +4 -0
  48. metadata +41 -2
@@ -0,0 +1,302 @@
1
+ {
2
+ "a": 0,
3
+ "e": 1,
4
+ "s": 2,
5
+ "o": 3,
6
+ "l": 4,
7
+ "d": 5,
8
+ "i": 6,
9
+ "a ": 7,
10
+ "u": 8,
11
+ "s ": 9,
12
+ "n": 10,
13
+ "t": 11,
14
+ "r": 12,
15
+ "es": 13,
16
+ " e": 14,
17
+ "e ": 15,
18
+ "p": 16,
19
+ "c": 17,
20
+ "l ": 18,
21
+ "es ": 19,
22
+ " es": 20,
23
+ "pa": 21,
24
+ "ad": 22,
25
+ "de": 23,
26
+ " d": 24,
27
+ "m": 25,
28
+ " de": 26,
29
+ "añ": 27,
30
+ "ñ": 28,
31
+ "de ": 29,
32
+ "on": 30,
33
+ " c": 31,
34
+ " m": 32,
35
+ "as": 33,
36
+ "un": 34,
37
+ "ro": 35,
38
+ "st": 36,
39
+ "as ": 37,
40
+ " l": 38,
41
+ "la": 39,
42
+ "da": 40,
43
+ "ri": 41,
44
+ "ca": 42,
45
+ "ra": 43,
46
+ "tr": 44,
47
+ "el ": 45,
48
+ "pañ": 46,
49
+ "spa": 47,
50
+ " a": 48,
51
+ "esp": 49,
52
+ "it": 50,
53
+ " s": 51,
54
+ "sp": 52,
55
+ "ur": 53,
56
+ "el": 54,
57
+ "al ": 55,
58
+ "ic": 56,
59
+ "ita": 57,
60
+ "e a": 58,
61
+ "su": 59,
62
+ "or": 60,
63
+ "ta": 61,
64
+ " la": 62,
65
+ "ci": 63,
66
+ "no": 64,
67
+ "n ": 65,
68
+ "í": 66,
69
+ "y ": 67,
70
+ " y": 68,
71
+ "al": 69,
72
+ "da ": 70,
73
+ "ña": 71,
74
+ "uro": 72,
75
+ "g": 73,
76
+ "e e": 74,
77
+ "ono": 75,
78
+ "tra": 76,
79
+ " su": 77,
80
+ " u": 78,
81
+ "s c": 79,
82
+ "h": 80,
83
+ "l e": 81,
84
+ "año": 82,
85
+ "is": 83,
86
+ " y ": 84,
87
+ "a y": 85,
88
+ " el": 86,
89
+ "te": 87,
90
+ "en": 88,
91
+ "o ": 89,
92
+ "do": 90,
93
+ "s d": 91,
94
+ "sit": 92,
95
+ "ño": 93,
96
+ "ada": 94,
97
+ "na": 95,
98
+ "ua": 96,
99
+ "tu": 97,
100
+ "si": 98,
101
+ "do ": 99,
102
+ " h": 100,
103
+ " ca": 101,
104
+ "id": 102,
105
+ "a c": 103,
106
+ " p": 104,
107
+ "aña": 105,
108
+ "ña ": 106,
109
+ "a e": 107,
110
+ "y": 108,
111
+ "s u": 109,
112
+ " un": 110,
113
+ "s l": 111,
114
+ "por": 112,
115
+ "su ": 113,
116
+ " po": 114,
117
+ "cul": 115,
118
+ "a p": 116,
119
+ "u r": 117,
120
+ "or ": 118,
121
+ " ri": 119,
122
+ " cu": 120,
123
+ "ca ": 121,
124
+ "ica": 122,
125
+ "ric": 123,
126
+ "r s": 124,
127
+ "l m": 125,
128
+ " mu": 126,
129
+ "mun": 127,
130
+ "und": 128,
131
+ "ndo": 129,
132
+ "o m": 130,
133
+ " ma": 131,
134
+ "mad": 132,
135
+ "adr": 133,
136
+ "dri": 134,
137
+ "rid": 135,
138
+ "id ": 136,
139
+ "d e": 137,
140
+ "la ": 138,
141
+ "cap": 139,
142
+ "api": 140,
143
+ "pit": 141,
144
+ "tal": 142,
145
+ "l d": 143,
146
+ "y e": 144,
147
+ " co": 145,
148
+ "con": 146,
149
+ "noc": 147,
150
+ "oci": 148,
151
+ "cid": 149,
152
+ "ida": 150,
153
+ "ult": 151,
154
+ "ion": 152,
155
+ "ona": 153,
156
+ "nal": 154,
157
+ "l q": 155,
158
+ " qu": 156,
159
+ "que": 157,
160
+ "ue ": 158,
161
+ " at": 159,
162
+ "atr": 160,
163
+ "rae": 161,
164
+ "ae ": 162,
165
+ " a ": 163,
166
+ "a m": 164,
167
+ " mi": 165,
168
+ "mil": 166,
169
+ "ill": 167,
170
+ "llo": 168,
171
+ "lon": 169,
172
+ "one": 170,
173
+ "nes": 171,
174
+ "e v": 172,
175
+ " vi": 173,
176
+ "vis": 174,
177
+ "isi": 175,
178
+ "tan": 176,
179
+ "ant": 177,
180
+ "nte": 178,
181
+ "tes": 179,
182
+ "cad": 180,
183
+ "a a": 181,
184
+ " añ": 182,
185
+ "ltu": 183,
186
+ "tur": 184,
187
+ "ura": 185,
188
+ "ra ": 186,
189
+ "a h": 187,
190
+ " hi": 188,
191
+ "his": 189,
192
+ "ist": 190,
193
+ "sto": 191,
194
+ "tor": 192,
195
+ "ori": 193,
196
+ "ria": 194,
197
+ "ia ": 195,
198
+ "y g": 196,
199
+ " ga": 197,
200
+ "gas": 198,
201
+ "ast": 199,
202
+ "str": 200,
203
+ "tro": 201,
204
+ "ron": 202,
205
+ "nom": 203,
206
+ "omí": 204,
207
+ "mía": 205,
208
+ "ía ": 206,
209
+ "a t": 207,
210
+ " tr": 208,
211
+ "rad": 209,
212
+ "adi": 210,
213
+ "dic": 211,
214
+ "ici": 212,
215
+ "cio": 213,
216
+ "oe": 214,
217
+ "lt": 215,
218
+ "hi": 216,
219
+ "to": 217,
220
+ "ia": 218,
221
+ " g": 219,
222
+ "ga": 220,
223
+ "om": 221,
224
+ "mí": 222,
225
+ "ía": 223,
226
+ " t": 224,
227
+ "di": 225,
228
+ "io": 226,
229
+ " q": 227,
230
+ "qu": 228,
231
+ "ue": 229,
232
+ "at": 230,
233
+ "ae": 231,
234
+ "mi": 232,
235
+ "il": 233,
236
+ "ll": 234,
237
+ "lo": 235,
238
+ "ne": 236,
239
+ " v": 237,
240
+ "vi": 238,
241
+ "an": 239,
242
+ "nt": 240,
243
+ "v": 241,
244
+ "q": 242,
245
+ "eu": 243,
246
+ "op": 244,
247
+ "ol": 245,
248
+ "le": 246,
249
+ "ng": 247,
250
+ "gu": 248,
251
+ "má": 249,
252
+ "ás": 250,
253
+ "ha": 251,
254
+ "ab": 252,
255
+ "bl": 253,
256
+ "mu": 254,
257
+ "nd": 255,
258
+ "ma": 256,
259
+ "dr": 257,
260
+ "ís": 258,
261
+ "d ": 259,
262
+ "aí": 260,
263
+ "ap": 261,
264
+ "pi": 262,
265
+ "co": 263,
266
+ "oc": 264,
267
+ "po": 265,
268
+ "r ": 266,
269
+ "u ": 267,
270
+ " r": 268,
271
+ "cu": 269,
272
+ "ul": 270,
273
+ "rop": 271,
274
+ "opa": 272,
275
+ "pa ": 273,
276
+ "ñol": 274,
277
+ "ol ": 275,
278
+ "una": 276,
279
+ "na ": 277,
280
+ "a d": 278,
281
+ "e l": 279,
282
+ "las": 280,
283
+ " le": 281,
284
+ "len": 282,
285
+ "eng": 283,
286
+ "ngu": 284,
287
+ "gua": 285,
288
+ "uas": 286,
289
+ "s m": 287,
290
+ " má": 288,
291
+ "más": 289,
292
+ "ás ": 290,
293
+ "s h": 291,
294
+ " ha": 292,
295
+ "hab": 293,
296
+ "abl": 294,
297
+ "bla": 295,
298
+ "lad": 296,
299
+ "das": 297,
300
+ "del": 298,
301
+ "b": 299
302
+ }
@@ -0,0 +1,289 @@
1
+ {
2
+ "e": 0,
3
+ "i": 1,
4
+ "u": 2,
5
+ "o": 3,
6
+ "l": 4,
7
+ "a": 5,
8
+ "s": 6,
9
+ "n": 7,
10
+ "t": 8,
11
+ "k": 9,
12
+ "r": 10,
13
+ "ee": 11,
14
+ "ri": 12,
15
+ "n ": 13,
16
+ "i ": 14,
17
+ " k": 15,
18
+ "st": 16,
19
+ "li": 17,
20
+ "al": 18,
21
+ "oo": 19,
22
+ "a ": 20,
23
+ "p": 21,
24
+ "on": 22,
25
+ " o": 23,
26
+ "m": 24,
27
+ "es": 25,
28
+ "j": 26,
29
+ "est": 27,
30
+ "ti ": 28,
31
+ "d": 29,
32
+ "sti": 30,
33
+ "ur": 31,
34
+ "tu": 32,
35
+ " e": 33,
36
+ "ja": 34,
37
+ "eel": 35,
38
+ "el": 36,
39
+ " p": 37,
40
+ "ees": 38,
41
+ "on ": 39,
42
+ "ti": 40,
43
+ " on": 41,
44
+ " t": 42,
45
+ "uri": 43,
46
+ "uu": 44,
47
+ "ku": 45,
48
+ "in": 46,
49
+ "nn": 47,
50
+ "is": 48,
51
+ "kee": 49,
52
+ " ke": 50,
53
+ " ee": 51,
54
+ "ja ": 52,
55
+ " ri": 53,
56
+ "lin": 54,
57
+ "inn": 55,
58
+ "nn ": 56,
59
+ " r": 57,
60
+ "ik": 58,
61
+ "s ": 59,
62
+ "ke": 60,
63
+ "om": 61,
64
+ "is ": 62,
65
+ " l": 63,
66
+ " tu": 64,
67
+ "un": 65,
68
+ "el ": 66,
69
+ "d ": 67,
70
+ "ka": 68,
71
+ "i k": 69,
72
+ "s e": 70,
73
+ "lo": 71,
74
+ "us": 72,
75
+ " m": 73,
76
+ "loo": 74,
77
+ "rii": 75,
78
+ "ii": 76,
79
+ "g": 77,
80
+ "i o": 78,
81
+ "n r": 79,
82
+ "me": 80,
83
+ "e ": 81,
84
+ " ja": 82,
85
+ "uur": 83,
86
+ "ri ": 84,
87
+ "a k": 85,
88
+ " s": 86,
89
+ "ul": 87,
90
+ "l ": 88,
91
+ "ta": 89,
92
+ " j": 90,
93
+ "ali": 91,
94
+ " ku": 92,
95
+ "pa": 93,
96
+ " pe": 94,
97
+ "rik": 95,
98
+ "ikk": 96,
99
+ "n t": 97,
100
+ "kka": 98,
101
+ "kal": 99,
102
+ "lik": 100,
103
+ "iku": 101,
104
+ " li": 102,
105
+ "suu": 103,
106
+ " su": 104,
107
+ "a s": 105,
108
+ "iig": 106,
109
+ "n j": 107,
110
+ "gis": 108,
111
+ "igi": 109,
112
+ "õ": 110,
113
+ "m l": 111,
114
+ "i p": 112,
115
+ "n e": 113,
116
+ "im ": 114,
117
+ "n o": 115,
118
+ "rim": 116,
119
+ "tun": 117,
120
+ "unt": 118,
121
+ "ntu": 119,
122
+ "tud": 120,
123
+ "ud ": 121,
124
+ "d o": 122,
125
+ "pea": 123,
126
+ " om": 124,
127
+ "oma": 125,
128
+ "ma ": 126,
129
+ "eal": 127,
130
+ "a r": 128,
131
+ "ku ": 129,
132
+ "ole": 130,
133
+ "les": 131,
134
+ "st ": 132,
135
+ "t m": 133,
136
+ " mi": 134,
137
+ "mis": 135,
138
+ "s m": 136,
139
+ " me": 137,
140
+ "mee": 138,
141
+ "eli": 139,
142
+ "lit": 140,
143
+ "ita": 141,
144
+ "tab": 142,
145
+ "ab ": 143,
146
+ "b p": 144,
147
+ " pa": 145,
148
+ "pal": 146,
149
+ "alj": 147,
150
+ "lju": 148,
151
+ "jus": 149,
152
+ "usi": 150,
153
+ "sid": 151,
154
+ "id ": 152,
155
+ "d t": 153,
156
+ "tur": 154,
157
+ "ris": 155,
158
+ "ist": 156,
159
+ "ste": 157,
160
+ "u a": 158,
161
+ " aj": 159,
162
+ "aja": 160,
163
+ "jal": 161,
164
+ "alo": 162,
165
+ "oo ": 163,
166
+ "o k": 164,
167
+ "kul": 165,
168
+ "ult": 166,
169
+ "ltu": 167,
170
+ "tuu": 168,
171
+ "i j": 169,
172
+ " ka": 170,
173
+ "kau": 171,
174
+ "aun": 172,
175
+ "uni": 173,
176
+ "ni ": 174,
177
+ "i l": 175,
178
+ " lo": 176,
179
+ "ood": 177,
180
+ "odu": 178,
181
+ "dus": 179,
182
+ "use": 180,
183
+ "se ": 181,
184
+ "e p": 182,
185
+ " po": 183,
186
+ "poo": 184,
187
+ "ool": 185,
188
+ "su": 186,
189
+ "im": 187,
190
+ "m ": 188,
191
+ "ig": 189,
192
+ "gi": 190,
193
+ "nt": 191,
194
+ "ud": 192,
195
+ "ma": 193,
196
+ "kk": 194,
197
+ "u ": 195,
198
+ " a": 196,
199
+ "aj": 197,
200
+ "o ": 198,
201
+ "lt": 199,
202
+ "au": 200,
203
+ "ni": 201,
204
+ "od": 202,
205
+ "du": 203,
206
+ "se": 204,
207
+ "po": 205,
208
+ "ol": 206,
209
+ "le": 207,
210
+ "t ": 208,
211
+ "mi": 209,
212
+ "it": 210,
213
+ "h": 211,
214
+ "v": 212,
215
+ "b": 213,
216
+ "k ": 214,
217
+ "põ": 215,
218
+ "õh": 216,
219
+ "hj": 217,
220
+ "eu": 218,
221
+ "ro": 219,
222
+ "op": 220,
223
+ "as": 221,
224
+ "so": 222,
225
+ " u": 223,
226
+ "ug": 224,
227
+ "gr": 225,
228
+ "lk": 226,
229
+ "ko": 227,
230
+ "nd": 228,
231
+ "da": 229,
232
+ "lu": 230,
233
+ "uv": 231,
234
+ "v ": 232,
235
+ "ll": 233,
236
+ "pe": 234,
237
+ "ea": 235,
238
+ "ab": 236,
239
+ "soo": 237,
240
+ "oom": 238,
241
+ "ome": 239,
242
+ "me ": 240,
243
+ "e u": 241,
244
+ " ug": 242,
245
+ "ugr": 243,
246
+ "gri": 244,
247
+ "elk": 245,
248
+ "lko": 246,
249
+ "kon": 247,
250
+ "ond": 248,
251
+ "nda": 249,
252
+ "da ": 250,
253
+ "kuu": 251,
254
+ "uul": 252,
255
+ "ulu": 253,
256
+ "luv": 254,
257
+ "uv ": 255,
258
+ "v k": 256,
259
+ "l t": 257,
260
+ " ta": 258,
261
+ "tal": 259,
262
+ "all": 260,
263
+ "lli": 261,
264
+ "b ": 262,
265
+ "lj": 263,
266
+ "ju": 264,
267
+ "si": 265,
268
+ "id": 266,
269
+ "te": 267,
270
+ "iik": 268,
271
+ "ik ": 269,
272
+ "k p": 270,
273
+ " põ": 271,
274
+ "põh": 272,
275
+ "õhj": 273,
276
+ "hja": 274,
277
+ "a e": 275,
278
+ " eu": 276,
279
+ "eur": 277,
280
+ "uro": 278,
281
+ "roo": 279,
282
+ "oop": 280,
283
+ "opa": 281,
284
+ "pas": 282,
285
+ "as ": 283,
286
+ "l o": 284,
287
+ "n s": 285,
288
+ " so": 286
289
+ }