langdetect-ruby 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +24 -13
  3. data/langdetect-ruby.gemspec +1 -1
  4. data/lib/lingua_ruby/configuration.rb +4 -1
  5. data/lib/lingua_ruby/detector.rb +59 -1
  6. data/lib/lingua_ruby/profile_loader.rb +26 -6
  7. data/lib/lingua_ruby/profiles/am.json +193 -0
  8. data/lib/lingua_ruby/profiles/bg.json +290 -0
  9. data/lib/lingua_ruby/profiles/bn.json +211 -0
  10. data/lib/lingua_ruby/profiles/cs.json +302 -0
  11. data/lib/lingua_ruby/profiles/da.json +302 -0
  12. data/lib/lingua_ruby/profiles/de.json +302 -0
  13. data/lib/lingua_ruby/profiles/el.json +302 -0
  14. data/lib/lingua_ruby/profiles/es.json +302 -0
  15. data/lib/lingua_ruby/profiles/et.json +289 -0
  16. data/lib/lingua_ruby/profiles/fa.json +234 -0
  17. data/lib/lingua_ruby/profiles/fi.json +284 -0
  18. data/lib/lingua_ruby/profiles/fr.json +302 -0
  19. data/lib/lingua_ruby/profiles/ha.json +302 -0
  20. data/lib/lingua_ruby/profiles/hi.json +255 -0
  21. data/lib/lingua_ruby/profiles/hr.json +302 -0
  22. data/lib/lingua_ruby/profiles/hu.json +302 -0
  23. data/lib/lingua_ruby/profiles/it.json +302 -0
  24. data/lib/lingua_ruby/profiles/lt.json +294 -0
  25. data/lib/lingua_ruby/profiles/lv.json +302 -0
  26. data/lib/lingua_ruby/profiles/my.json +200 -0
  27. data/lib/lingua_ruby/profiles/no.json +297 -0
  28. data/lib/lingua_ruby/profiles/pl.json +302 -0
  29. data/lib/lingua_ruby/profiles/pt.json +302 -0
  30. data/lib/lingua_ruby/profiles/ro.json +302 -0
  31. data/lib/lingua_ruby/profiles/ru.json +297 -0
  32. data/lib/lingua_ruby/profiles/sk.json +302 -0
  33. data/lib/lingua_ruby/profiles/sv.json +302 -0
  34. data/lib/lingua_ruby/profiles/sw.json +268 -0
  35. data/lib/lingua_ruby/profiles/ta.json +235 -0
  36. data/lib/lingua_ruby/profiles/te.json +254 -0
  37. data/lib/lingua_ruby/profiles/th.json +251 -0
  38. data/lib/lingua_ruby/profiles/tl.json +302 -0
  39. data/lib/lingua_ruby/profiles/tr.json +302 -0
  40. data/lib/lingua_ruby/profiles/uk.json +302 -0
  41. data/lib/lingua_ruby/profiles/ur.json +232 -0
  42. data/lib/lingua_ruby/profiles/vi.json +277 -0
  43. data/lib/lingua_ruby/profiles/yo.json +245 -0
  44. data/lib/lingua_ruby/profiles/zu.json +302 -0
  45. data/lib/lingua_ruby/result.rb +13 -26
  46. data/lib/lingua_ruby/version.rb +1 -1
  47. data/lib/lingua_ruby.rb +4 -0
  48. metadata +41 -2
@@ -0,0 +1,302 @@
1
+ {
2
+ "a": 0,
3
+ "o": 1,
4
+ "i": 2,
5
+ "s": 3,
6
+ "k": 4,
7
+ "t": 5,
8
+ "e": 6,
9
+ "w": 7,
10
+ "j": 8,
11
+ "r": 9,
12
+ "z": 10,
13
+ "y": 11,
14
+ "st": 12,
15
+ "m": 13,
16
+ "l": 14,
17
+ "p": 15,
18
+ "u": 16,
19
+ " j": 17,
20
+ "a ": 18,
21
+ "ol": 19,
22
+ "po": 20,
23
+ "sk": 21,
24
+ "je": 22,
25
+ " p": 23,
26
+ "m ": 24,
27
+ "ę": 25,
28
+ "wi": 26,
29
+ "pol": 27,
30
+ " w": 28,
31
+ " k": 29,
32
+ "w ": 30,
33
+ "ols": 31,
34
+ "lsk": 32,
35
+ " je": 33,
36
+ "i ": 34,
37
+ "ls": 35,
38
+ "c": 36,
39
+ " po": 37,
40
+ "zy": 38,
41
+ "ki": 39,
42
+ "ur": 40,
43
+ "ia": 41,
44
+ "ym ": 42,
45
+ "ym": 43,
46
+ "st ": 44,
47
+ "ski": 45,
48
+ "ą": 46,
49
+ "g": 47,
50
+ "es": 48,
51
+ "ó": 49,
52
+ "t ": 50,
53
+ "jes": 51,
54
+ "ra": 52,
55
+ "n": 53,
56
+ "ł": 54,
57
+ "est": 55,
58
+ "aj": 56,
59
+ " i ": 57,
60
+ "ię": 58,
61
+ " i": 59,
62
+ "ą ": 60,
63
+ "to": 61,
64
+ "ka ": 62,
65
+ "sz": 63,
66
+ "sto": 64,
67
+ "ska": 65,
68
+ "ęz": 66,
69
+ " s": 67,
70
+ "wa": 68,
71
+ "ic": 69,
72
+ " m": 70,
73
+ " w ": 71,
74
+ "m w": 72,
75
+ "a j": 73,
76
+ "u ": 74,
77
+ "nym": 75,
78
+ "tur": 76,
79
+ "ga": 77,
80
+ "at": 78,
81
+ "ę ": 79,
82
+ "em ": 80,
83
+ "tu": 81,
84
+ "raj": 82,
85
+ "kra": 83,
86
+ "tó": 84,
87
+ " kr": 85,
88
+ "ów ": 86,
89
+ "ka": 87,
90
+ "ję": 88,
91
+ "ęzy": 89,
92
+ "kr": 90,
93
+ "em": 91,
94
+ "zyk": 92,
95
+ "ło": 93,
96
+ "ow": 94,
97
+ "wia": 95,
98
+ "ki ": 96,
99
+ " z ": 97,
100
+ " ś": 98,
101
+ "ny": 99,
102
+ "ie": 100,
103
+ "ro": 101,
104
+ "h": 102,
105
+ "yk": 103,
106
+ "jęz": 104,
107
+ "d": 105,
108
+ " ję": 106,
109
+ "ów": 107,
110
+ "ś": 108,
111
+ "z ": 109,
112
+ " z": 110,
113
+ "ast": 111,
114
+ "ata": 112,
115
+ "zym": 113,
116
+ "szy": 114,
117
+ "ańs": 115,
118
+ "ńsk": 116,
119
+ "ksz": 117,
120
+ "ęks": 118,
121
+ "ias": 119,
122
+ "iań": 120,
123
+ "mia": 121,
124
+ " mi": 122,
125
+ "sło": 123,
126
+ "m m": 124,
127
+ "owi": 125,
128
+ "łow": 126,
129
+ "rsz": 127,
130
+ "sza": 128,
131
+ "zaw": 129,
132
+ "awa": 130,
133
+ "ars": 131,
134
+ "wa ": 132,
135
+ "t s": 133,
136
+ " st": 134,
137
+ "tol": 135,
138
+ "oli": 136,
139
+ "lic": 137,
140
+ "icą": 138,
141
+ "cą ": 139,
142
+ "ą p": 140,
143
+ "i i": 141,
144
+ "war": 142,
145
+ "i n": 143,
146
+ " na": 144,
147
+ " wa": 145,
148
+ "h w": 146,
149
+ "ch ": 147,
150
+ "naj": 148,
151
+ "ajw": 149,
152
+ "ich": 150,
153
+ "kic": 151,
154
+ "jwi": 152,
155
+ "wię": 153,
156
+ "ięk": 154,
157
+ "u t": 155,
158
+ "lu ": 156,
159
+ "elu": 157,
160
+ "iel": 158,
161
+ "wie": 159,
162
+ " wi": 160,
163
+ "a w": 161,
164
+ "ga ": 162,
165
+ "ąga": 163,
166
+ "iąg": 164,
167
+ "cią": 165,
168
+ "yci": 166,
169
+ "zyc": 167,
170
+ "rzy": 168,
171
+ "prz": 169,
172
+ " pr": 170,
173
+ "a p": 171,
174
+ "ra ": 172,
175
+ "iat": 173,
176
+ "świ": 174,
177
+ " św": 175,
178
+ "o ś": 176,
179
+ "go ": 177,
180
+ "ego": 178,
181
+ "łeg": 179,
182
+ "ałe": 180,
183
+ "cał": 181,
184
+ " ca": 182,
185
+ "z c": 183,
186
+ "w z": 184,
187
+ "tów": 185,
188
+ "stó": 186,
189
+ "yst": 187,
190
+ "rys": 188,
191
+ "ury": 189,
192
+ " tu": 190,
193
+ "óra": 191,
194
+ " hi": 192,
195
+ "ą h": 193,
196
+ "tą ": 194,
197
+ "atą": 195,
198
+ "gat": 196,
199
+ "oga": 197,
200
+ "bog": 198,
201
+ " bo": 199,
202
+ "a b": 200,
203
+ "ma ": 201,
204
+ " ma": 202,
205
+ "a m": 203,
206
+ "u p": 204,
207
+ "ju ": 205,
208
+ "aju": 206,
209
+ "w k": 207,
210
+ "tem": 208,
211
+ "ste": 209,
212
+ "tór": 210,
213
+ "któ": 211,
214
+ " kt": 212,
215
+ "ę k": 213,
216
+ "rę ": 214,
217
+ "urę": 215,
218
+ "ltu": 216,
219
+ "ult": 217,
220
+ "kul": 218,
221
+ " ku": 219,
222
+ "i k": 220,
223
+ "ę i": 221,
224
+ "ię ": 222,
225
+ "rię": 223,
226
+ "ori": 224,
227
+ "tor": 225,
228
+ "ist": 226,
229
+ "his": 227,
230
+ "ż": 228,
231
+ "aw": 229,
232
+ "li": 230,
233
+ "cą": 231,
234
+ " n": 232,
235
+ "na": 233,
236
+ "jw": 234,
237
+ "ęk": 235,
238
+ "ks": 236,
239
+ "mi": 237,
240
+ "as": 238,
241
+ "te": 239,
242
+ "ju": 240,
243
+ "ma": 241,
244
+ " b": 242,
245
+ "bo": 243,
246
+ "og": 244,
247
+ "tą": 245,
248
+ " h": 246,
249
+ "hi": 247,
250
+ "is": 248,
251
+ "or": 249,
252
+ "ri": 250,
253
+ "ku": 251,
254
+ "ul": 252,
255
+ "lt": 253,
256
+ "rę": 254,
257
+ "kt": 255,
258
+ "ór": 256,
259
+ "pr": 257,
260
+ "rz": 258,
261
+ "ń": 259,
262
+ "b": 260,
263
+ "oł": 261,
264
+ "oż": 262,
265
+ "żo": 263,
266
+ "on": 264,
267
+ " e": 265,
268
+ "eu": 266,
269
+ "op": 267,
270
+ "pi": 268,
271
+ "e ": 269,
272
+ "śr": 270,
273
+ "od": 271,
274
+ "dk": 272,
275
+ "ko": 273,
276
+ "we": 274,
277
+ "ej": 275,
278
+ "j ": 276,
279
+ "k ": 277,
280
+ "ed": 278,
281
+ "dn": 279,
282
+ "kó": 280,
283
+ "sł": 281,
284
+ "ań": 282,
285
+ "ńs": 283,
286
+ "ch": 284,
287
+ "h ": 285,
288
+ "ar": 286,
289
+ "rs": 287,
290
+ "za": 288,
291
+ "eur": 289,
292
+ "uro": 290,
293
+ "rop": 291,
294
+ "opi": 292,
295
+ "pie": 293,
296
+ "ie ": 294,
297
+ "e ś": 295,
298
+ " śr": 296,
299
+ "śro": 297,
300
+ "rod": 298,
301
+ "odk": 299
302
+ }
@@ -0,0 +1,302 @@
1
+ {
2
+ "a": 0,
3
+ "o": 1,
4
+ "u": 2,
5
+ "a ": 3,
6
+ "s": 4,
7
+ "t": 5,
8
+ "l": 6,
9
+ "e": 7,
10
+ "i": 8,
11
+ "r": 9,
12
+ "p": 10,
13
+ "m": 11,
14
+ "d": 12,
15
+ "g": 13,
16
+ "n": 14,
17
+ "c": 15,
18
+ "al": 16,
19
+ "o ": 17,
20
+ " p": 18,
21
+ "tu": 19,
22
+ "é": 20,
23
+ "é ": 21,
24
+ " é ": 22,
25
+ "tug": 23,
26
+ "rtu": 24,
27
+ "ort": 25,
28
+ "por": 26,
29
+ "s ": 27,
30
+ " c": 28,
31
+ "do": 29,
32
+ "ua": 30,
33
+ "gu": 31,
34
+ " é": 32,
35
+ "l ": 33,
36
+ " e": 34,
37
+ "ug": 35,
38
+ "rt": 36,
39
+ "or": 37,
40
+ "po": 38,
41
+ "e ": 39,
42
+ "í": 40,
43
+ " l": 41,
44
+ "da": 42,
45
+ " d": 43,
46
+ " s": 44,
47
+ "la": 45,
48
+ "ia": 46,
49
+ "al ": 47,
50
+ "do ": 48,
51
+ "da ": 49,
52
+ "a e": 50,
53
+ "ua ": 51,
54
+ "a p": 52,
55
+ " po": 53,
56
+ "pa": 54,
57
+ "m ": 55,
58
+ "ga": 56,
59
+ "it": 57,
60
+ "b": 58,
61
+ "f": 59,
62
+ "st": 60,
63
+ "es": 61,
64
+ " su": 62,
65
+ "ia ": 63,
66
+ "aís": 64,
67
+ "paí": 65,
68
+ " pa": 66,
69
+ "gal": 67,
70
+ "uga": 68,
71
+ "a o": 69,
72
+ "aí": 70,
73
+ "ís": 71,
74
+ " o": 72,
75
+ "om": 73,
76
+ "as": 74,
77
+ "ra": 75,
78
+ "ci": 76,
79
+ "on": 77,
80
+ "a c": 78,
81
+ "o l": 79,
82
+ " mu": 80,
83
+ "ala": 81,
84
+ "fal": 82,
85
+ " fa": 83,
86
+ "a é": 84,
87
+ "ugu": 85,
88
+ " e ": 86,
89
+ " co": 87,
90
+ "h": 88,
91
+ "gua": 89,
92
+ "ngu": 90,
93
+ "íng": 91,
94
+ "la ": 92,
95
+ "lín": 93,
96
+ " lí": 94,
97
+ " a ": 95,
98
+ "su": 96,
99
+ "no": 97,
100
+ "is": 98,
101
+ "mu": 99,
102
+ " m": 100,
103
+ "fa": 101,
104
+ " f": 102,
105
+ "ad": 103,
106
+ "ng": 104,
107
+ "ín": 105,
108
+ "si": 106,
109
+ "lí": 107,
110
+ " a": 108,
111
+ "co": 109,
112
+ "ro": 110,
113
+ "ta": 111,
114
+ "ur": 112,
115
+ " li": 113,
116
+ " de": 114,
117
+ "de ": 115,
118
+ "íse": 116,
119
+ "e p": 117,
120
+ "l e": 118,
121
+ "mb": 119,
122
+ "e é": 120,
123
+ "é c": 121,
124
+ "con": 122,
125
+ "onh": 123,
126
+ "sua": 124,
127
+ "nhe": 125,
128
+ "hec": 126,
129
+ "eci": 127,
130
+ "cid": 128,
131
+ "ida": 129,
132
+ " pe": 130,
133
+ "pel": 131,
134
+ "a s": 132,
135
+ "ela": 133,
136
+ "ud": 134,
137
+ "ndo": 135,
138
+ "lis": 136,
139
+ "isb": 137,
140
+ "sbo": 138,
141
+ "boa": 139,
142
+ "oa ": 140,
143
+ "é a": 141,
144
+ "und": 142,
145
+ "mun": 143,
146
+ "o m": 144,
147
+ "eu": 145,
148
+ " ca": 146,
149
+ " do": 147,
150
+ "s d": 148,
151
+ "cap": 149,
152
+ "es ": 150,
153
+ "api": 151,
154
+ "ses": 152,
155
+ "pit": 153,
156
+ "ita": 154,
157
+ "tal": 155,
158
+ "l d": 156,
159
+ " cu": 157,
160
+ "o b": 158,
161
+ " br": 159,
162
+ "bra": 160,
163
+ "ras": 161,
164
+ "asi": 162,
165
+ "sil": 163,
166
+ "il ": 164,
167
+ "l t": 165,
168
+ " ta": 166,
169
+ "tam": 167,
170
+ "amb": 168,
171
+ "mbé": 169,
172
+ "bém": 170,
173
+ "ém ": 171,
174
+ "m f": 172,
175
+ "guê": 173,
176
+ "uês": 174,
177
+ "ês ": 175,
178
+ "s c": 176,
179
+ "com": 177,
180
+ "omo": 178,
181
+ "mo ": 179,
182
+ " of": 180,
183
+ "ofi": 181,
184
+ "fic": 182,
185
+ "ici": 183,
186
+ "cia": 184,
187
+ "ial": 185,
188
+ "cul": 186,
189
+ "ult": 187,
190
+ "ltu": 188,
191
+ "tur": 189,
192
+ "ura": 190,
193
+ "ra ": 191,
194
+ "a h": 192,
195
+ " hi": 193,
196
+ "his": 194,
197
+ "ist": 195,
198
+ "stó": 196,
199
+ "tór": 197,
200
+ "óri": 198,
201
+ "ria": 199,
202
+ "oe": 200,
203
+ "e g": 201,
204
+ " ga": 202,
205
+ "gas": 203,
206
+ "ast": 204,
207
+ "str": 205,
208
+ "tro": 206,
209
+ "ron": 207,
210
+ "ono": 208,
211
+ "nom": 209,
212
+ "omi": 210,
213
+ "mia": 211,
214
+ "te": 212,
215
+ " o ": 213,
216
+ "bé": 214,
217
+ "ém": 215,
218
+ "uê": 216,
219
+ "ês": 217,
220
+ "mo": 218,
221
+ "of": 219,
222
+ "fi": 220,
223
+ "ic": 221,
224
+ "oa": 222,
225
+ "bo": 223,
226
+ "sb": 224,
227
+ "li": 225,
228
+ "nd": 226,
229
+ "um": 227,
230
+ "l é": 228,
231
+ " u": 229,
232
+ "é u": 230,
233
+ " um": 231,
234
+ "um ": 232,
235
+ "m p": 233,
236
+ "un": 234,
237
+ "se": 235,
238
+ "os": 236,
239
+ "ís ": 237,
240
+ "s s": 238,
241
+ "nh": 239,
242
+ "he": 240,
243
+ "ec": 241,
244
+ "de": 242,
245
+ "id": 243,
246
+ "pe": 244,
247
+ "el": 245,
248
+ "cu": 246,
249
+ "ul": 247,
250
+ "lt": 248,
251
+ " h": 249,
252
+ "hi": 250,
253
+ "tó": 251,
254
+ "ór": 252,
255
+ "ri": 253,
256
+ "pi": 254,
257
+ " g": 255,
258
+ "ap": 256,
259
+ "tr": 257,
260
+ "ca": 258,
261
+ "mi": 259,
262
+ " b": 260,
263
+ "br": 261,
264
+ "il": 262,
265
+ " t": 263,
266
+ "am": 264,
267
+ "rop": 265,
268
+ "opa": 266,
269
+ "pa ": 267,
270
+ "a a": 268,
271
+ "a l": 269,
272
+ "em": 270,
273
+ "sa": 271,
274
+ "ue": 272,
275
+ "gue": 273,
276
+ "ues": 274,
277
+ "esa": 275,
278
+ "sa ": 276,
279
+ "é f": 277,
280
+ " n": 278,
281
+ "op": 279,
282
+ "lad": 280,
283
+ "ada": 281,
284
+ " em": 282,
285
+ "em ": 283,
286
+ "m m": 284,
287
+ "mui": 285,
288
+ "uit": 286,
289
+ "ito": 287,
290
+ "tos": 288,
291
+ "os ": 289,
292
+ "s p": 290,
293
+ " si": 291,
294
+ "sit": 292,
295
+ "itu": 293,
296
+ "tua": 294,
297
+ "uad": 295,
298
+ "ado": 296,
299
+ "to": 297,
300
+ "o n": 298,
301
+ " no": 299
302
+ }