langdetect-ruby 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +24 -13
  3. data/langdetect-ruby.gemspec +1 -1
  4. data/lib/lingua_ruby/configuration.rb +4 -1
  5. data/lib/lingua_ruby/detector.rb +59 -1
  6. data/lib/lingua_ruby/profile_loader.rb +26 -6
  7. data/lib/lingua_ruby/profiles/am.json +193 -0
  8. data/lib/lingua_ruby/profiles/bg.json +290 -0
  9. data/lib/lingua_ruby/profiles/bn.json +211 -0
  10. data/lib/lingua_ruby/profiles/cs.json +302 -0
  11. data/lib/lingua_ruby/profiles/da.json +302 -0
  12. data/lib/lingua_ruby/profiles/de.json +302 -0
  13. data/lib/lingua_ruby/profiles/el.json +302 -0
  14. data/lib/lingua_ruby/profiles/es.json +302 -0
  15. data/lib/lingua_ruby/profiles/et.json +289 -0
  16. data/lib/lingua_ruby/profiles/fa.json +234 -0
  17. data/lib/lingua_ruby/profiles/fi.json +284 -0
  18. data/lib/lingua_ruby/profiles/fr.json +302 -0
  19. data/lib/lingua_ruby/profiles/ha.json +302 -0
  20. data/lib/lingua_ruby/profiles/hi.json +255 -0
  21. data/lib/lingua_ruby/profiles/hr.json +302 -0
  22. data/lib/lingua_ruby/profiles/hu.json +302 -0
  23. data/lib/lingua_ruby/profiles/it.json +302 -0
  24. data/lib/lingua_ruby/profiles/lt.json +294 -0
  25. data/lib/lingua_ruby/profiles/lv.json +302 -0
  26. data/lib/lingua_ruby/profiles/my.json +200 -0
  27. data/lib/lingua_ruby/profiles/no.json +297 -0
  28. data/lib/lingua_ruby/profiles/pl.json +302 -0
  29. data/lib/lingua_ruby/profiles/pt.json +302 -0
  30. data/lib/lingua_ruby/profiles/ro.json +302 -0
  31. data/lib/lingua_ruby/profiles/ru.json +297 -0
  32. data/lib/lingua_ruby/profiles/sk.json +302 -0
  33. data/lib/lingua_ruby/profiles/sv.json +302 -0
  34. data/lib/lingua_ruby/profiles/sw.json +268 -0
  35. data/lib/lingua_ruby/profiles/ta.json +235 -0
  36. data/lib/lingua_ruby/profiles/te.json +254 -0
  37. data/lib/lingua_ruby/profiles/th.json +251 -0
  38. data/lib/lingua_ruby/profiles/tl.json +302 -0
  39. data/lib/lingua_ruby/profiles/tr.json +302 -0
  40. data/lib/lingua_ruby/profiles/uk.json +302 -0
  41. data/lib/lingua_ruby/profiles/ur.json +232 -0
  42. data/lib/lingua_ruby/profiles/vi.json +277 -0
  43. data/lib/lingua_ruby/profiles/yo.json +245 -0
  44. data/lib/lingua_ruby/profiles/zu.json +302 -0
  45. data/lib/lingua_ruby/result.rb +13 -26
  46. data/lib/lingua_ruby/version.rb +1 -1
  47. data/lib/lingua_ruby.rb +4 -0
  48. metadata +41 -2
@@ -0,0 +1,302 @@
1
+ {
2
+ "a": 0,
3
+ "e": 1,
4
+ "r": 2,
5
+ "g": 3,
6
+ "l": 4,
7
+ "y": 5,
8
+ "s": 6,
9
+ "z": 7,
10
+ "á": 8,
11
+ "o": 9,
12
+ "n": 10,
13
+ "gy": 11,
14
+ "t": 12,
15
+ "el": 13,
16
+ "m": 14,
17
+ "u": 15,
18
+ "v": 16,
19
+ "or": 17,
20
+ "g ": 18,
21
+ "ag": 19,
22
+ " e": 20,
23
+ "k": 21,
24
+ " a": 22,
25
+ "a ": 23,
26
+ "agy": 24,
27
+ "y ": 25,
28
+ "eg": 26,
29
+ "ors": 27,
30
+ "ág ": 28,
31
+ "zág": 29,
32
+ "rsz": 30,
33
+ "ág": 31,
34
+ "zá": 32,
35
+ "szá": 33,
36
+ "sz": 34,
37
+ "rs": 35,
38
+ "ro": 36,
39
+ "é": 37,
40
+ "p": 38,
41
+ "i": 39,
42
+ "d": 40,
43
+ "b": 41,
44
+ "egy": 42,
45
+ " eg": 43,
46
+ "me": 44,
47
+ "ó": 45,
48
+ "ye": 46,
49
+ "yar": 47,
50
+ "gya": 48,
51
+ "mag": 49,
52
+ "ar": 50,
53
+ "ya": 51,
54
+ "ur": 52,
55
+ "mel": 53,
56
+ "ma": 54,
57
+ "st": 55,
58
+ "sa": 56,
59
+ "os": 57,
60
+ " é": 58,
61
+ "és": 59,
62
+ "s ": 60,
63
+ "vá": 61,
64
+ "ror": 62,
65
+ "aro": 63,
66
+ "le": 64,
67
+ "ár": 65,
68
+ " v": 66,
69
+ "az": 67,
70
+ "l ": 68,
71
+ " t": 69,
72
+ "a a": 70,
73
+ "és ": 71,
74
+ " és": 72,
75
+ "sa ": 73,
76
+ "osa": 74,
77
+ "ros": 75,
78
+ "áro": 76,
79
+ "vár": 77,
80
+ "ly ": 78,
81
+ "ely": 79,
82
+ "ame": 80,
83
+ " am": 81,
84
+ "lv ": 82,
85
+ "elv": 83,
86
+ "yel": 84,
87
+ "nye": 85,
88
+ " ny": 86,
89
+ "r n": 87,
90
+ " ma": 88,
91
+ " a ": 89,
92
+ " or": 90,
93
+ "róp": 91,
94
+ "uró": 92,
95
+ "eur": 93,
96
+ " eu": 94,
97
+ "gy ": 95,
98
+ "f": 96,
99
+ "óp": 97,
100
+ " o": 98,
101
+ " m": 99,
102
+ "r ": 100,
103
+ "am": 101,
104
+ "ly": 102,
105
+ " n": 103,
106
+ " k": 104,
107
+ "ny": 105,
108
+ "ró": 106,
109
+ "eu": 107,
110
+ "da": 108,
111
+ " f": 109,
112
+ "ö": 110,
113
+ "lv": 111,
114
+ "v ": 112,
115
+ "kö": 113,
116
+ "öz": 114,
117
+ "ú": 115,
118
+ "ővá": 116,
119
+ "ő": 117,
120
+ "a é": 118,
121
+ "ü": 119,
122
+ "s l": 120,
123
+ " le": 121,
124
+ "leg": 122,
125
+ "egn": 123,
126
+ "gna": 124,
127
+ "nag": 125,
128
+ "gyo": 126,
129
+ "yob": 127,
130
+ "obb": 128,
131
+ "bb ": 129,
132
+ "b v": 130,
133
+ " vá": 131,
134
+ " az": 132,
135
+ "val": 133,
136
+ "es": 134,
137
+ "edü": 135,
138
+ "dül": 136,
139
+ "ülá": 137,
140
+ "lál": 138,
141
+ "áll": 139,
142
+ "lló": 140,
143
+ "ló ": 141,
144
+ "ó e": 142,
145
+ "ópá": 143,
146
+ "páb": 144,
147
+ "ába": 145,
148
+ "ban": 146,
149
+ "an ": 147,
150
+ "n b": 148,
151
+ " bu": 149,
152
+ "bud": 150,
153
+ "uda": 151,
154
+ "dap": 152,
155
+ "ape": 153,
156
+ "pes": 154,
157
+ "est": 155,
158
+ "st ": 156,
159
+ "t m": 157,
160
+ "g f": 158,
161
+ " fő": 159,
162
+ "főv": 160,
163
+ "al ": 161,
164
+ "l r": 162,
165
+ " re": 163,
166
+ "ren": 164,
167
+ "end": 165,
168
+ "nde": 166,
169
+ "del": 167,
170
+ "elk": 168,
171
+ "lke": 169,
172
+ "kez": 170,
173
+ "ezi": 171,
174
+ "zik": 172,
175
+ "ik ": 173,
176
+ "k a": 174,
177
+ "y v": 175,
178
+ " vo": 176,
179
+ "von": 177,
180
+ "onz": 178,
181
+ "nzz": 179,
182
+ "zza": 180,
183
+ "za ": 181,
184
+ "a t": 182,
185
+ " tu": 183,
186
+ "tur": 184,
187
+ "uri": 185,
188
+ "ris": 186,
189
+ "ist": 187,
190
+ "stá": 188,
191
+ "ták": 189,
192
+ "áka": 190,
193
+ "kat": 191,
194
+ "az ": 192,
195
+ "z o": 193,
196
+ "g g": 194,
197
+ " ga": 195,
198
+ "gaz": 196,
199
+ "azd": 197,
200
+ "zda": 198,
201
+ "dag": 199,
202
+ "ag ": 200,
203
+ "g t": 201,
204
+ " tö": 202,
205
+ "tör": 203,
206
+ "ört": 204,
207
+ "rté": 205,
208
+ "tén": 206,
209
+ "éne": 207,
210
+ "nel": 208,
211
+ "ele": 209,
212
+ "lem": 210,
213
+ "emm": 211,
214
+ "mme": 212,
215
+ "el ": 213,
216
+ "l é": 214,
217
+ "s k": 215,
218
+ " ku": 216,
219
+ "kul": 217,
220
+ "ult": 218,
221
+ "ltú": 219,
222
+ "túr": 220,
223
+ "úrá": 221,
224
+ "ráv": 222,
225
+ "áva": 223,
226
+ "én": 224,
227
+ "ne": 225,
228
+ "em": 226,
229
+ "mm": 227,
230
+ "ó ": 228,
231
+ "ku": 229,
232
+ "ul": 230,
233
+ "lt": 231,
234
+ "tú": 232,
235
+ "úr": 233,
236
+ "rá": 234,
237
+ "áv": 235,
238
+ "va": 236,
239
+ "al": 237,
240
+ " r": 238,
241
+ "re": 239,
242
+ "en": 240,
243
+ "nd": 241,
244
+ "de": 242,
245
+ "lk": 243,
246
+ "ke": 244,
247
+ "ez": 245,
248
+ "zi": 246,
249
+ "ik": 247,
250
+ "k ": 248,
251
+ "vo": 249,
252
+ "on": 250,
253
+ "nz": 251,
254
+ "t ": 252,
255
+ "fő": 253,
256
+ "őv": 254,
257
+ "pe": 255,
258
+ "ap": 256,
259
+ "ud": 257,
260
+ "bu": 258,
261
+ " b": 259,
262
+ "n ": 260,
263
+ " l": 261,
264
+ "an": 262,
265
+ "gn": 263,
266
+ "na": 264,
267
+ "yo": 265,
268
+ "ob": 266,
269
+ "bb": 267,
270
+ "b ": 268,
271
+ "ba": 269,
272
+ "áb": 270,
273
+ "z ": 271,
274
+ " g": 272,
275
+ "ga": 273,
276
+ "zd": 274,
277
+ "pá": 275,
278
+ "tö": 276,
279
+ "ör": 277,
280
+ "rt": 278,
281
+ "té": 279,
282
+ "zz": 280,
283
+ "fi": 281,
284
+ "ópa": 282,
285
+ "pai": 283,
286
+ "ai ": 284,
287
+ "i o": 285,
288
+ "g a": 286,
289
+ "a m": 287,
290
+ "ar ": 288,
291
+ "i ": 289,
292
+ "ai": 290,
293
+ "pa": 291,
294
+ "v e": 292,
295
+ "y f": 293,
296
+ " fi": 294,
297
+ "fin": 295,
298
+ "inn": 296,
299
+ "nnu": 297,
300
+ "nug": 298,
301
+ "ugo": 299
302
+ }
@@ -0,0 +1,302 @@
1
+ {
2
+ "a": 0,
3
+ "i": 1,
4
+ "l": 2,
5
+ "a ": 3,
6
+ "e": 4,
7
+ "t": 5,
8
+ "s": 6,
9
+ "u": 7,
10
+ "n": 8,
11
+ "o": 9,
12
+ "r": 10,
13
+ "c": 11,
14
+ "e ": 12,
15
+ " l": 13,
16
+ "li": 14,
17
+ "ta": 15,
18
+ "al": 16,
19
+ "it": 17,
20
+ "d": 18,
21
+ "la ": 19,
22
+ " s": 20,
23
+ "tal": 21,
24
+ "l ": 22,
25
+ " la": 23,
26
+ "la": 24,
27
+ "m": 25,
28
+ "el": 26,
29
+ "p": 27,
30
+ "ua": 28,
31
+ " è ": 29,
32
+ " e": 30,
33
+ " c": 31,
34
+ " d": 32,
35
+ "ali": 33,
36
+ "è": 34,
37
+ "è ": 35,
38
+ " è": 36,
39
+ "ia": 37,
40
+ "ita": 38,
41
+ " i": 39,
42
+ "ll": 40,
43
+ "ca": 41,
44
+ "a c": 42,
45
+ "ri": 43,
46
+ " a": 44,
47
+ "le": 45,
48
+ "in": 46,
49
+ "ti": 47,
50
+ " it": 48,
51
+ "a è": 49,
52
+ "na": 50,
53
+ "ci": 51,
54
+ "ia ": 52,
55
+ "lia": 53,
56
+ "os": 54,
57
+ "a s": 55,
58
+ " p": 56,
59
+ "ua ": 57,
60
+ "z": 58,
61
+ "a l": 59,
62
+ "ell": 60,
63
+ "del": 61,
64
+ "de": 62,
65
+ " su": 63,
66
+ "su": 64,
67
+ " de": 65,
68
+ "l i": 66,
69
+ "i ": 67,
70
+ "io": 68,
71
+ "ina": 69,
72
+ " pe": 70,
73
+ "per": 71,
74
+ "nt": 72,
75
+ "r l": 73,
76
+ " r": 74,
77
+ "er ": 75,
78
+ "st": 76,
79
+ "sa": 77,
80
+ "a p": 78,
81
+ "sua": 79,
82
+ "sci": 80,
83
+ "na ": 81,
84
+ "a e": 82,
85
+ " e ": 83,
86
+ "ica": 84,
87
+ "sa ": 85,
88
+ "ll ": 86,
89
+ "osa": 87,
90
+ "le ": 88,
91
+ "ale": 89,
92
+ "r ": 90,
93
+ "to": 91,
94
+ "at": 92,
95
+ "tu": 93,
96
+ "si": 94,
97
+ "er": 95,
98
+ "pe": 96,
99
+ "sc": 97,
100
+ "pa": 98,
101
+ "on": 99,
102
+ "d ": 100,
103
+ " m": 101,
104
+ "an": 102,
105
+ "ic": 103,
106
+ "ur": 104,
107
+ "ro": 105,
108
+ "à r": 106,
109
+ " ro": 107,
110
+ "rom": 108,
111
+ "oni": 109,
112
+ "te ": 110,
113
+ "d è": 111,
114
+ "ed ": 112,
115
+ " ed": 113,
116
+ "e d": 114,
117
+ "ion": 115,
118
+ "lio": 116,
119
+ "ili": 117,
120
+ "oma": 118,
121
+ "ma ": 119,
122
+ "pit": 120,
123
+ "è l": 121,
124
+ "api": 122,
125
+ "cap": 123,
126
+ " ca": 124,
127
+ "mil": 125,
128
+ "sti": 126,
129
+ "ist": 127,
130
+ "ris": 128,
131
+ "uri": 129,
132
+ "tur": 130,
133
+ " tu": 131,
134
+ "i t": 132,
135
+ "a b": 133,
136
+ " be": 134,
137
+ "di ": 135,
138
+ "bel": 136,
139
+ "lle": 137,
140
+ "lez": 138,
141
+ "ezz": 139,
142
+ "zza": 140,
143
+ "za ": 141,
144
+ " di": 142,
145
+ "i d": 143,
146
+ "e m": 144,
147
+ "ni ": 145,
148
+ " mu": 146,
149
+ "mus": 147,
150
+ "usi": 148,
151
+ "sic": 149,
152
+ "cal": 150,
153
+ "lit": 151,
154
+ "ità": 152,
155
+ "tà ": 153,
156
+ "e r": 154,
157
+ " ri": 155,
158
+ "rin": 156,
159
+ " at": 157,
160
+ "nas": 158,
161
+ "asc": 159,
162
+ "cim": 160,
163
+ "ime": 161,
164
+ "men": 162,
165
+ "e a": 163,
166
+ "he ": 164,
167
+ "che": 165,
168
+ " ch": 166,
169
+ "ent": 167,
170
+ "ios": 168,
171
+ "nta": 169,
172
+ "e e": 170,
173
+ "e l": 171,
174
+ " cu": 172,
175
+ "cuc": 173,
176
+ "uci": 174,
177
+ "cin": 175,
178
+ "a d": 176,
179
+ "eli": 177,
180
+ "zio": 178,
181
+ "liz": 179,
182
+ "izi": 180,
183
+ "è f": 181,
184
+ " fa": 182,
185
+ "fam": 183,
186
+ "amo": 184,
187
+ "mos": 185,
188
+ " mi": 186,
189
+ "a m": 187,
190
+ " st": 188,
191
+ "sto": 189,
192
+ "tor": 190,
193
+ "ori": 191,
194
+ "ra ": 192,
195
+ "ira": 193,
196
+ "tir": 194,
197
+ "tti": 195,
198
+ "att": 196,
199
+ "ria": 197,
200
+ "a a": 198,
201
+ " an": 199,
202
+ "ant": 200,
203
+ "nti": 201,
204
+ "tic": 202,
205
+ "ca ": 203,
206
+ " l ": 204,
207
+ "l a": 205,
208
+ " ar": 206,
209
+ "art": 207,
210
+ "rte": 208,
211
+ "g": 209,
212
+ "us": 210,
213
+ "tà": 211,
214
+ "à ": 212,
215
+ "om": 213,
216
+ "ma": 214,
217
+ "ap": 215,
218
+ "pi": 216,
219
+ "ed": 217,
220
+ " f": 218,
221
+ "fa": 219,
222
+ "am": 220,
223
+ "mo": 221,
224
+ "or": 222,
225
+ "ar": 223,
226
+ "rt": 224,
227
+ "te": 225,
228
+ "as": 226,
229
+ "im": 227,
230
+ "me": 228,
231
+ "en": 229,
232
+ "cu": 230,
233
+ "uc": 231,
234
+ "iz": 232,
235
+ "zi": 233,
236
+ "ch": 234,
237
+ "he": 235,
238
+ "tt": 236,
239
+ "ir": 237,
240
+ "b": 238,
241
+ "à": 239,
242
+ "f": 240,
243
+ "h": 241,
244
+ " u": 242,
245
+ "un": 243,
246
+ "n ": 244,
247
+ "ae": 245,
248
+ "es": 246,
249
+ "se": 247,
250
+ "o ": 248,
251
+ " n": 249,
252
+ "ne": 250,
253
+ "ud": 251,
254
+ "eu": 252,
255
+ "op": 253,
256
+ "ng": 254,
257
+ "gu": 255,
258
+ "co": 256,
259
+ "no": 257,
260
+ "iu": 258,
261
+ "ut": 259,
262
+ " b": 260,
263
+ "be": 261,
264
+ "ez": 262,
265
+ "zz": 263,
266
+ "za": 264,
267
+ "mu": 265,
268
+ "ra": 266,
269
+ "sud": 267,
270
+ "ud ": 268,
271
+ "d d": 269,
272
+ "l e": 270,
273
+ " eu": 271,
274
+ "eur": 272,
275
+ "uro": 273,
276
+ "rop": 274,
277
+ "opa": 275,
278
+ "pa ": 276,
279
+ " li": 277,
280
+ "lin": 278,
281
+ "ing": 279,
282
+ "ngu": 280,
283
+ "gua": 281,
284
+ "a i": 282,
285
+ "ian": 283,
286
+ "ana": 284,
287
+ "è c": 285,
288
+ " co": 286,
289
+ "con": 287,
290
+ "ono": 288,
291
+ "nos": 289,
292
+ "osc": 290,
293
+ "ciu": 291,
294
+ "iut": 292,
295
+ "uta": 293,
296
+ "ta ": 294,
297
+ "mi": 295,
298
+ "il": 296,
299
+ "ni": 297,
300
+ "di": 298,
301
+ " t": 299
302
+ }