langdetect-ruby 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +24 -13
  3. data/langdetect-ruby.gemspec +1 -1
  4. data/lib/lingua_ruby/configuration.rb +4 -1
  5. data/lib/lingua_ruby/detector.rb +59 -1
  6. data/lib/lingua_ruby/profile_loader.rb +26 -6
  7. data/lib/lingua_ruby/profiles/am.json +193 -0
  8. data/lib/lingua_ruby/profiles/bg.json +290 -0
  9. data/lib/lingua_ruby/profiles/bn.json +211 -0
  10. data/lib/lingua_ruby/profiles/cs.json +302 -0
  11. data/lib/lingua_ruby/profiles/da.json +302 -0
  12. data/lib/lingua_ruby/profiles/de.json +302 -0
  13. data/lib/lingua_ruby/profiles/el.json +302 -0
  14. data/lib/lingua_ruby/profiles/es.json +302 -0
  15. data/lib/lingua_ruby/profiles/et.json +289 -0
  16. data/lib/lingua_ruby/profiles/fa.json +234 -0
  17. data/lib/lingua_ruby/profiles/fi.json +284 -0
  18. data/lib/lingua_ruby/profiles/fr.json +302 -0
  19. data/lib/lingua_ruby/profiles/ha.json +302 -0
  20. data/lib/lingua_ruby/profiles/hi.json +255 -0
  21. data/lib/lingua_ruby/profiles/hr.json +302 -0
  22. data/lib/lingua_ruby/profiles/hu.json +302 -0
  23. data/lib/lingua_ruby/profiles/it.json +302 -0
  24. data/lib/lingua_ruby/profiles/lt.json +294 -0
  25. data/lib/lingua_ruby/profiles/lv.json +302 -0
  26. data/lib/lingua_ruby/profiles/my.json +200 -0
  27. data/lib/lingua_ruby/profiles/no.json +297 -0
  28. data/lib/lingua_ruby/profiles/pl.json +302 -0
  29. data/lib/lingua_ruby/profiles/pt.json +302 -0
  30. data/lib/lingua_ruby/profiles/ro.json +302 -0
  31. data/lib/lingua_ruby/profiles/ru.json +297 -0
  32. data/lib/lingua_ruby/profiles/sk.json +302 -0
  33. data/lib/lingua_ruby/profiles/sv.json +302 -0
  34. data/lib/lingua_ruby/profiles/sw.json +268 -0
  35. data/lib/lingua_ruby/profiles/ta.json +235 -0
  36. data/lib/lingua_ruby/profiles/te.json +254 -0
  37. data/lib/lingua_ruby/profiles/th.json +251 -0
  38. data/lib/lingua_ruby/profiles/tl.json +302 -0
  39. data/lib/lingua_ruby/profiles/tr.json +302 -0
  40. data/lib/lingua_ruby/profiles/uk.json +302 -0
  41. data/lib/lingua_ruby/profiles/ur.json +232 -0
  42. data/lib/lingua_ruby/profiles/vi.json +277 -0
  43. data/lib/lingua_ruby/profiles/yo.json +245 -0
  44. data/lib/lingua_ruby/profiles/zu.json +302 -0
  45. data/lib/lingua_ruby/result.rb +13 -26
  46. data/lib/lingua_ruby/version.rb +1 -1
  47. data/lib/lingua_ruby.rb +4 -0
  48. metadata +41 -2
@@ -0,0 +1,302 @@
1
+ {
2
+ "e": 0,
3
+ "s": 1,
4
+ "k": 2,
5
+ "a": 3,
6
+ "r": 4,
7
+ "t": 5,
8
+ "u": 6,
9
+ "o": 7,
10
+ " j": 8,
11
+ "i": 9,
12
+ "j": 10,
13
+ "e ": 11,
14
+ "n": 12,
15
+ "v": 13,
16
+ "m": 14,
17
+ " je": 15,
18
+ "je": 16,
19
+ "á": 17,
20
+ "je ": 18,
21
+ "z": 19,
22
+ "h": 20,
23
+ " s": 21,
24
+ "l": 22,
25
+ "a ": 23,
26
+ "p": 24,
27
+ "sk": 25,
28
+ "st": 26,
29
+ "esk": 27,
30
+ "čes": 28,
31
+ "u ": 29,
32
+ "ou": 30,
33
+ " a": 31,
34
+ "ou ": 32,
35
+ "če": 33,
36
+ "es": 34,
37
+ "č": 35,
38
+ "í": 36,
39
+ "ý": 37,
40
+ "e s": 38,
41
+ " z": 39,
42
+ "ní": 40,
43
+ "í ": 41,
44
+ "a j": 42,
45
+ " če": 43,
46
+ "m ": 44,
47
+ "y": 45,
48
+ "c": 46,
49
+ "é": 47,
50
+ "d": 48,
51
+ " č": 49,
52
+ "ro": 50,
53
+ "é ": 51,
54
+ " a ": 52,
55
+ "tu": 53,
56
+ "ur": 54,
57
+ "em": 55,
58
+ " t": 56,
59
+ "hi": 57,
60
+ " k": 58,
61
+ "uro": 59,
62
+ "rou": 60,
63
+ "sv": 61,
64
+ "ám": 62,
65
+ "ná": 63,
66
+ "zn": 64,
67
+ "ké": 65,
68
+ "o ": 66,
69
+ "to": 67,
70
+ " h": 68,
71
+ "pu": 69,
72
+ " sv": 70,
73
+ "nám": 71,
74
+ "zná": 72,
75
+ " zn": 73,
76
+ "ké ": 74,
77
+ "sto": 75,
78
+ "zyk": 76,
79
+ "azy": 77,
80
+ "jaz": 78,
81
+ " ja": 79,
82
+ "ský": 80,
83
+ "ní ": 81,
84
+ "dní": 82,
85
+ "edn": 83,
86
+ " st": 84,
87
+ "tur": 85,
88
+ "lik": 86,
89
+ "bli": 87,
90
+ "ubl": 88,
91
+ "pub": 89,
92
+ "epu": 90,
93
+ "rep": 91,
94
+ " re": 92,
95
+ "ve": 93,
96
+ "ep": 94,
97
+ "ub": 95,
98
+ "re": 96,
99
+ " r": 97,
100
+ "á ": 98,
101
+ "ed": 99,
102
+ "b": 100,
103
+ "yk": 101,
104
+ "dn": 102,
105
+ "ě": 103,
106
+ "ik": 104,
107
+ "li": 105,
108
+ "ch": 106,
109
+ "bl": 107,
110
+ "ra": 108,
111
+ " p": 109,
112
+ "zy": 110,
113
+ "ja": 111,
114
+ "az": 112,
115
+ "ký": 113,
116
+ "iky": 114,
117
+ "ásn": 115,
118
+ "ky ": 116,
119
+ "y a": 117,
120
+ "e z": 118,
121
+ "rás": 119,
122
+ "krá": 120,
123
+ "ámá": 121,
124
+ " kr": 122,
125
+ "u k": 123,
126
+ "ů": 124,
127
+ "má ": 125,
128
+ "vou": 126,
129
+ "svo": 127,
130
+ "ká": 128,
131
+ "á s": 129,
132
+ "no": 130,
133
+ "ans": 131,
134
+ "nsk": 132,
135
+ "kýc": 133,
136
+ "ých": 134,
137
+ "ch ": 135,
138
+ "h j": 136,
139
+ "yků": 137,
140
+ "ků ": 138,
141
+ "ů p": 139,
142
+ " pr": 140,
143
+ "pra": 141,
144
+ "rah": 142,
145
+ "aha": 143,
146
+ "ha ": 144,
147
+ "e h": 145,
148
+ " hl": 146,
149
+ "hla": 147,
150
+ "lav": 148,
151
+ "avn": 149,
152
+ "vní": 150,
153
+ "í m": 151,
154
+ " mě": 152,
155
+ "měs": 153,
156
+ "ěst": 154,
157
+ "to ": 155,
158
+ "o č": 156,
159
+ "ské": 157,
160
+ "é r": 158,
161
+ "ko ": 159,
162
+ "o j": 160,
163
+ "e t": 161,
164
+ " ta": 162,
165
+ "tak": 163,
166
+ "aké": 164,
167
+ "é z": 165,
168
+ "ámé": 166,
169
+ "mé ": 167,
170
+ "é s": 168,
171
+ "svý": 169,
172
+ "vým": 170,
173
+ "ým ": 171,
174
+ "m p": 172,
175
+ " pi": 173,
176
+ "piv": 174,
177
+ "ive": 175,
178
+ "vem": 176,
179
+ "em ": 177,
180
+ "m a": 178,
181
+ "a t": 179,
182
+ " tr": 180,
183
+ "tra": 181,
184
+ "rad": 182,
185
+ "adi": 183,
186
+ "dic": 184,
187
+ "ice": 185,
188
+ "cem": 186,
189
+ "emi": 187,
190
+ "sno": 188,
191
+ "nou": 189,
192
+ "u a": 190,
193
+ " ar": 191,
194
+ "arc": 192,
195
+ "rch": 193,
196
+ "chi": 194,
197
+ "hit": 195,
198
+ "ite": 196,
199
+ "tek": 197,
200
+ "ekt": 198,
201
+ "ktu": 199,
202
+ "ř": 200,
203
+ "u h": 201,
204
+ " hi": 202,
205
+ "his": 203,
206
+ "ist": 204,
207
+ "tor": 205,
208
+ "ori": 206,
209
+ "rií": 207,
210
+ "ií ": 208,
211
+ "í a": 209,
212
+ "a k": 210,
213
+ " ku": 211,
214
+ "kul": 212,
215
+ "ult": 213,
216
+ "ltu": 214,
217
+ "u č": 215,
218
+ "sko": 216,
219
+ "ar": 217,
220
+ "rc": 218,
221
+ "ze": 219,
222
+ "it": 220,
223
+ "te": 221,
224
+ "ek": 222,
225
+ "kt": 223,
226
+ "ím": 224,
227
+ "k ": 225,
228
+ "is": 226,
229
+ "or": 227,
230
+ "ri": 228,
231
+ "ií": 229,
232
+ "ku": 230,
233
+ "ul": 231,
234
+ "lt": 232,
235
+ "ko": 233,
236
+ "ta": 234,
237
+ "ak": 235,
238
+ "mé": 236,
239
+ "vý": 237,
240
+ "ým": 238,
241
+ "pi": 239,
242
+ "iv": 240,
243
+ "tr": 241,
244
+ "ad": 242,
245
+ "t ": 243,
246
+ "ah": 244,
247
+ "ha": 245,
248
+ "pr": 246,
249
+ "hl": 247,
250
+ "la": 248,
251
+ "av": 249,
252
+ "vn": 250,
253
+ " m": 251,
254
+ "mě": 252,
255
+ "ěs": 253,
256
+ "ů ": 254,
257
+ "ků": 255,
258
+ "h ": 256,
259
+ "ky": 257,
260
+ "y ": 258,
261
+ "ýc": 259,
262
+ "ns": 260,
263
+ "an": 261,
264
+ "má": 262,
265
+ "va": 263,
266
+ "vo": 264,
267
+ "ov": 265,
268
+ "lo": 266,
269
+ "sl": 267,
270
+ "kr": 268,
271
+ "rá": 269,
272
+ "ás": 270,
273
+ "sn": 271,
274
+ "í e": 272,
275
+ " ev": 273,
276
+ "evr": 274,
277
+ "vro": 275,
278
+ "rop": 276,
279
+ "opě": 277,
280
+ "pě ": 278,
281
+ "ě č": 279,
282
+ "át": 280,
283
+ "tá": 281,
284
+ "ký ": 282,
285
+ "ý j": 283,
286
+ "ka": 284,
287
+ "yk ": 285,
288
+ "k j": 286,
289
+ "e j": 287,
290
+ "jed": 288,
291
+ "ním": 289,
292
+ "ím ": 290,
293
+ "m z": 291,
294
+ " ze": 292,
295
+ "ze ": 293,
296
+ " sl": 294,
297
+ "slo": 295,
298
+ "lov": 296,
299
+ "ova": 297,
300
+ "van": 298,
301
+ "di": 299
302
+ }
@@ -0,0 +1,302 @@
1
+ {
2
+ "e": 0,
3
+ "r": 1,
4
+ "s": 2,
5
+ "a": 3,
6
+ "n": 4,
7
+ "k": 5,
8
+ "t": 6,
9
+ "d": 7,
10
+ "i": 8,
11
+ "o": 9,
12
+ "r ": 10,
13
+ "l": 11,
14
+ "er": 12,
15
+ "er ": 13,
16
+ "m": 14,
17
+ " e": 15,
18
+ "rk": 16,
19
+ "t ": 17,
20
+ "an": 18,
21
+ "v": 19,
22
+ " s": 20,
23
+ "og": 21,
24
+ "g": 22,
25
+ "ke": 23,
26
+ " d": 24,
27
+ "dan": 25,
28
+ "or": 26,
29
+ "ark": 27,
30
+ " er": 28,
31
+ " k": 29,
32
+ "g ": 30,
33
+ "en": 31,
34
+ " o": 32,
35
+ "u": 33,
36
+ "ar": 34,
37
+ "og ": 35,
38
+ "da": 36,
39
+ " og": 37,
40
+ "f": 38,
41
+ "s ": 39,
42
+ "te": 40,
43
+ "it": 41,
44
+ " si": 42,
45
+ "r k": 43,
46
+ "si": 44,
47
+ "h": 45,
48
+ "sk": 46,
49
+ " da": 47,
50
+ "st": 48,
51
+ "et": 49,
52
+ "ur": 50,
53
+ "li": 51,
54
+ "anm": 52,
55
+ "ma": 53,
56
+ "nma": 54,
57
+ "nm": 55,
58
+ "mar": 56,
59
+ "nd": 57,
60
+ "n ": 58,
61
+ " ke": 59,
62
+ "tu": 60,
63
+ "r o": 61,
64
+ "ks ": 62,
65
+ "in": 63,
66
+ "fo": 64,
67
+ " f": 65,
68
+ " h": 66,
69
+ "dt": 67,
70
+ "ds": 68,
71
+ "ve": 69,
72
+ "k e": 70,
73
+ "rk ": 71,
74
+ "et ": 72,
75
+ "ken": 73,
76
+ "end": 74,
77
+ "ndt": 75,
78
+ "dt ": 76,
79
+ "t f": 77,
80
+ " fo": 78,
81
+ "for": 79,
82
+ "ske": 80,
83
+ "or ": 81,
84
+ "r s": 82,
85
+ "sin": 83,
86
+ "in ": 84,
87
+ "ite": 85,
88
+ "tur": 86,
89
+ "ur ": 87,
90
+ "a ": 88,
91
+ "al": 89,
92
+ " m": 90,
93
+ "ro": 91,
94
+ "ks": 92,
95
+ "de": 93,
96
+ "rd": 94,
97
+ "ek": 95,
98
+ "e ": 96,
99
+ " a": 97,
100
+ "es": 98,
101
+ "d ": 99,
102
+ "ta": 100,
103
+ "p": 101,
104
+ " l": 102,
105
+ "ne": 103,
106
+ "k ": 104,
107
+ "s h": 105,
108
+ "rks": 106,
109
+ "r d": 107,
110
+ "n e": 108,
111
+ "vn ": 109,
112
+ "avn": 110,
113
+ "hav": 111,
114
+ " se": 112,
115
+ " ho": 113,
116
+ "hov": 114,
117
+ "ove": 115,
118
+ "ved": 116,
119
+ "eds": 117,
120
+ "dst": 118,
121
+ "sta": 119,
122
+ "tad": 120,
123
+ "ad ": 121,
124
+ "d o": 122,
125
+ "kva": 123,
126
+ "skv": 124,
127
+ "sek": 125,
128
+ "eks": 126,
129
+ "s m": 127,
130
+ " mi": 128,
131
+ "mil": 129,
132
+ "ill": 130,
133
+ "lli": 131,
134
+ "lio": 132,
135
+ "ion": 133,
136
+ "one": 134,
137
+ "ner": 135,
138
+ "r m": 136,
139
+ " me": 137,
140
+ "men": 138,
141
+ "tet": 139,
142
+ "enn": 140,
143
+ "nne": 141,
144
+ "nes": 142,
145
+ "esk": 143,
146
+ "ker": 144,
147
+ "lit": 145,
148
+ "ali": 146,
149
+ " kø": 147,
150
+ "køb": 148,
151
+ "val": 149,
152
+ "øbe": 150,
153
+ "ben": 151,
154
+ "enh": 152,
155
+ "nha": 153,
156
+ "ltu": 154,
157
+ "rds": 155,
158
+ "ærd": 156,
159
+ "fær": 157,
160
+ "lfæ": 158,
161
+ "elf": 159,
162
+ "vel": 160,
163
+ "g h": 161,
164
+ " ve": 162,
165
+ "t v": 163,
166
+ " hi": 164,
167
+ "it ": 165,
168
+ "his": 166,
169
+ "ist": 167,
170
+ "sit": 168,
171
+ "å k": 169,
172
+ "sto": 170,
173
+ "tor": 171,
174
+ "ori": 172,
175
+ "rie": 173,
176
+ "ie ": 174,
177
+ "e d": 175,
178
+ "ogs": 176,
179
+ "gså": 177,
180
+ "så ": 178,
181
+ "g e": 179,
182
+ "vsk": 180,
183
+ "ivs": 181,
184
+ "liv": 182,
185
+ " li": 183,
186
+ "n l": 184,
187
+ "g s": 185,
188
+ "m o": 186,
189
+ "em ": 187,
190
+ "tem": 188,
191
+ "ste": 189,
192
+ "yst": 190,
193
+ "sys": 191,
194
+ "n a": 192,
195
+ " ar": 193,
196
+ "rki": 194,
197
+ "kit": 195,
198
+ "ssy": 196,
199
+ "tek": 197,
200
+ "ekt": 198,
201
+ "ktu": 199,
202
+ " ku": 200,
203
+ "kul": 201,
204
+ "ult": 202,
205
+ "dss": 203,
206
+ "on": 204,
207
+ "me": 205,
208
+ "nn": 206,
209
+ "kø": 207,
210
+ "øb": 208,
211
+ "be": 209,
212
+ "nh": 210,
213
+ "ha": 211,
214
+ "av": 212,
215
+ "vn": 213,
216
+ "ho": 214,
217
+ "ov": 215,
218
+ "ed": 216,
219
+ "ad": 217,
220
+ "ki": 218,
221
+ "kt": 219,
222
+ "ku": 220,
223
+ "ul": 221,
224
+ "lt": 222,
225
+ "hi": 223,
226
+ "is": 224,
227
+ "to": 225,
228
+ "ri": 226,
229
+ "ie": 227,
230
+ "gs": 228,
231
+ "så": 229,
232
+ "å ": 230,
233
+ " v": 231,
234
+ "el": 232,
235
+ "opa": 233,
236
+ "c": 234,
237
+ "ø": 235,
238
+ "b": 236,
239
+ "å": 237,
240
+ "æ": 238,
241
+ "y": 239,
242
+ "la": 240,
243
+ " i": 241,
244
+ "i ": 242,
245
+ " n": 243,
246
+ "no": 244,
247
+ "eu": 245,
248
+ "op": 246,
249
+ "pa": 247,
250
+ "ns": 248,
251
+ "sp": 249,
252
+ "pr": 250,
253
+ " t": 251,
254
+ "le": 252,
255
+ "af": 253,
256
+ "f ": 254,
257
+ " c": 255,
258
+ "ci": 256,
259
+ "ir": 257,
260
+ "ka": 258,
261
+ "se": 259,
262
+ "mi": 260,
263
+ "il": 261,
264
+ "ll": 262,
265
+ "io": 263,
266
+ "pa ": 264,
267
+ "a d": 265,
268
+ " de": 266,
269
+ "det": 267,
270
+ "t d": 268,
271
+ "ans": 269,
272
+ "nsk": 270,
273
+ "ke ": 271,
274
+ "e s": 272,
275
+ " sp": 273,
276
+ "spr": 274,
277
+ "pro": 275,
278
+ "rog": 276,
279
+ "g t": 277,
280
+ " ta": 278,
281
+ "tal": 279,
282
+ "ale": 280,
283
+ "les": 281,
284
+ "es ": 282,
285
+ "s a": 283,
286
+ " af": 284,
287
+ "af ": 285,
288
+ "f c": 286,
289
+ " ci": 287,
290
+ "cir": 288,
291
+ "irk": 289,
292
+ "rka": 290,
293
+ "ka ": 291,
294
+ "a s": 292,
295
+ "lf": 293,
296
+ "fæ": 294,
297
+ "ær": 295,
298
+ "ss": 296,
299
+ "sy": 297,
300
+ "ys": 298,
301
+ "em": 299
302
+ }