langdetect-ruby 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +24 -13
  3. data/langdetect-ruby.gemspec +1 -1
  4. data/lib/lingua_ruby/configuration.rb +4 -1
  5. data/lib/lingua_ruby/detector.rb +59 -1
  6. data/lib/lingua_ruby/profile_loader.rb +26 -6
  7. data/lib/lingua_ruby/profiles/am.json +193 -0
  8. data/lib/lingua_ruby/profiles/bg.json +290 -0
  9. data/lib/lingua_ruby/profiles/bn.json +211 -0
  10. data/lib/lingua_ruby/profiles/cs.json +302 -0
  11. data/lib/lingua_ruby/profiles/da.json +302 -0
  12. data/lib/lingua_ruby/profiles/de.json +302 -0
  13. data/lib/lingua_ruby/profiles/el.json +302 -0
  14. data/lib/lingua_ruby/profiles/es.json +302 -0
  15. data/lib/lingua_ruby/profiles/et.json +289 -0
  16. data/lib/lingua_ruby/profiles/fa.json +234 -0
  17. data/lib/lingua_ruby/profiles/fi.json +284 -0
  18. data/lib/lingua_ruby/profiles/fr.json +302 -0
  19. data/lib/lingua_ruby/profiles/ha.json +302 -0
  20. data/lib/lingua_ruby/profiles/hi.json +255 -0
  21. data/lib/lingua_ruby/profiles/hr.json +302 -0
  22. data/lib/lingua_ruby/profiles/hu.json +302 -0
  23. data/lib/lingua_ruby/profiles/it.json +302 -0
  24. data/lib/lingua_ruby/profiles/lt.json +294 -0
  25. data/lib/lingua_ruby/profiles/lv.json +302 -0
  26. data/lib/lingua_ruby/profiles/my.json +200 -0
  27. data/lib/lingua_ruby/profiles/no.json +297 -0
  28. data/lib/lingua_ruby/profiles/pl.json +302 -0
  29. data/lib/lingua_ruby/profiles/pt.json +302 -0
  30. data/lib/lingua_ruby/profiles/ro.json +302 -0
  31. data/lib/lingua_ruby/profiles/ru.json +297 -0
  32. data/lib/lingua_ruby/profiles/sk.json +302 -0
  33. data/lib/lingua_ruby/profiles/sv.json +302 -0
  34. data/lib/lingua_ruby/profiles/sw.json +268 -0
  35. data/lib/lingua_ruby/profiles/ta.json +235 -0
  36. data/lib/lingua_ruby/profiles/te.json +254 -0
  37. data/lib/lingua_ruby/profiles/th.json +251 -0
  38. data/lib/lingua_ruby/profiles/tl.json +302 -0
  39. data/lib/lingua_ruby/profiles/tr.json +302 -0
  40. data/lib/lingua_ruby/profiles/uk.json +302 -0
  41. data/lib/lingua_ruby/profiles/ur.json +232 -0
  42. data/lib/lingua_ruby/profiles/vi.json +277 -0
  43. data/lib/lingua_ruby/profiles/yo.json +245 -0
  44. data/lib/lingua_ruby/profiles/zu.json +302 -0
  45. data/lib/lingua_ruby/result.rb +13 -26
  46. data/lib/lingua_ruby/version.rb +1 -1
  47. data/lib/lingua_ruby.rb +4 -0
  48. metadata +41 -2
@@ -0,0 +1,294 @@
1
+ {
2
+ "i": 0,
3
+ "a": 1,
4
+ "u": 2,
5
+ "s": 3,
6
+ "t": 4,
7
+ "r": 5,
8
+ "l": 6,
9
+ "e": 7,
10
+ "a ": 8,
11
+ "v": 9,
12
+ "tu": 10,
13
+ "ie": 11,
14
+ "s ": 12,
15
+ "au": 13,
16
+ "ur": 14,
17
+ "y": 15,
18
+ "ia": 16,
19
+ "li": 17,
20
+ "n": 18,
21
+ "o": 19,
22
+ "k": 20,
23
+ "ų": 21,
24
+ "iet": 22,
25
+ "lie": 23,
26
+ "etu": 24,
27
+ "ri": 25,
28
+ "š": 26,
29
+ " t": 27,
30
+ "st": 28,
31
+ "al": 29,
32
+ "ra": 30,
33
+ "tuv": 31,
34
+ "ų ": 32,
35
+ "j": 33,
36
+ " k": 34,
37
+ "uv": 35,
38
+ "et": 36,
39
+ " i": 37,
40
+ "iau": 38,
41
+ "je ": 39,
42
+ "as": 40,
43
+ "je": 41,
44
+ "yra": 42,
45
+ " li": 43,
46
+ "us": 44,
47
+ " yr": 45,
48
+ "ra ": 46,
49
+ "e ": 47,
50
+ " l": 48,
51
+ "vi": 49,
52
+ "ą ": 50,
53
+ "is": 51,
54
+ " tu": 52,
55
+ " š": 53,
56
+ "yr": 54,
57
+ " y": 55,
58
+ "tur": 56,
59
+ "g": 57,
60
+ "d": 58,
61
+ "uri": 59,
62
+ "ą": 60,
63
+ " ku": 61,
64
+ "in": 62,
65
+ "ir": 63,
66
+ "r ": 64,
67
+ " d": 65,
68
+ "a y": 66,
69
+ "va ": 67,
70
+ "uva": 68,
71
+ "i ": 69,
72
+ "ku": 70,
73
+ "ist": 71,
74
+ "ą i": 72,
75
+ "i t": 73,
76
+ "ri ": 74,
77
+ "as ": 75,
78
+ "ir ": 76,
79
+ " ir": 77,
80
+ "tin": 78,
81
+ "yje": 79,
82
+ "lyj": 80,
83
+ "usi": 81,
84
+ "aus": 82,
85
+ " vi": 83,
86
+ "alb": 84,
87
+ "kal": 85,
88
+ " ka": 86,
89
+ "ų k": 87,
90
+ "ių ": 88,
91
+ "e l": 89,
92
+ "s š": 90,
93
+ "šal": 91,
94
+ " ša": 92,
95
+ "ių": 93,
96
+ "ka": 94,
97
+ "lb": 95,
98
+ " v": 96,
99
+ "en": 97,
100
+ " s": 98,
101
+ "ša": 99,
102
+ "va": 100,
103
+ "ni": 101,
104
+ "si": 102,
105
+ "b": 103,
106
+ "ul": 104,
107
+ "p": 105,
108
+ "ė": 106,
109
+ "ly": 107,
110
+ "yj": 108,
111
+ "ti": 109,
112
+ "os": 110,
113
+ " pa": 111,
114
+ "ost": 112,
115
+ "sti": 113,
116
+ " e": 114,
117
+ "inė": 115,
118
+ "stų": 116,
119
+ "ris": 117,
120
+ "nė ": 118,
121
+ "g t": 119,
122
+ "ė i": 120,
123
+ "ės": 121,
124
+ "rė": 122,
125
+ "r d": 123,
126
+ " di": 124,
127
+ "did": 125,
128
+ "idž": 126,
129
+ "dži": 127,
130
+ "žia": 128,
131
+ "sia": 129,
132
+ "ias": 130,
133
+ "ši": 131,
134
+ "pas": 132,
135
+ "asa": 133,
136
+ "sau": 134,
137
+ "aul": 135,
138
+ "uly": 136,
139
+ "ro": 137,
140
+ "eu": 138,
141
+ "e v": 139,
142
+ "vil": 140,
143
+ "iln": 141,
144
+ "lni": 142,
145
+ "niu": 143,
146
+ "ius": 144,
147
+ "us ": 145,
148
+ "s y": 146,
149
+ "a l": 147,
150
+ "uvo": 148,
151
+ "vos": 149,
152
+ "os ": 150,
153
+ "s s": 151,
154
+ " so": 152,
155
+ "sos": 153,
156
+ "s m": 154,
157
+ "kia": 155,
158
+ " is": 156,
159
+ "uki": 157,
160
+ "sto": 158,
161
+ "tor": 159,
162
+ "ori": 160,
163
+ "rij": 161,
164
+ "iją": 162,
165
+ "ją ": 163,
166
+ "r k": 164,
167
+ "auk": 165,
168
+ "kul": 166,
169
+ "rau": 167,
170
+ "ult": 168,
171
+ "ltū": 169,
172
+ "tūr": 170,
173
+ "ūrą": 171,
174
+ "tra": 172,
175
+ "rą ": 173,
176
+ "ą k": 174,
177
+ "kur": 175,
178
+ " tr": 176,
179
+ " mi": 177,
180
+ "mie": 178,
181
+ "ies": 179,
182
+ "est": 180,
183
+ "sta": 181,
184
+ "tas": 182,
185
+ "ug ": 183,
186
+ "aly": 184,
187
+ "a t": 185,
188
+ "aug": 186,
189
+ "dau": 187,
190
+ " da": 188,
191
+ "ū": 189,
192
+ "a d": 190,
193
+ "m": 191,
194
+ "ž": 192,
195
+ "urt": 193,
196
+ "rti": 194,
197
+ "ing": 195,
198
+ "ngą": 196,
199
+ "gą ": 197,
200
+ "ia ": 198,
201
+ "nė": 199,
202
+ "ūr": 200,
203
+ "rą": 201,
204
+ "tr": 202,
205
+ "uk": 203,
206
+ "ki": 204,
207
+ "da": 205,
208
+ "ug": 206,
209
+ "g ": 207,
210
+ "tų": 208,
211
+ "sa": 209,
212
+ "pa": 210,
213
+ " p": 211,
214
+ "bų": 212,
215
+ "vų": 213,
216
+ "yv": 214,
217
+ "gy": 215,
218
+ " g": 216,
219
+ "a š": 217,
220
+ "se": 218,
221
+ "ali": 219,
222
+ "lis": 220,
223
+ "is ": 221,
224
+ " ši": 222,
225
+ "ė ": 223,
226
+ "so": 224,
227
+ "vo": 225,
228
+ "di": 226,
229
+ "id": 227,
230
+ "dž": 228,
231
+ "ži": 229,
232
+ " m": 230,
233
+ "mi": 231,
234
+ "es": 232,
235
+ "ta": 233,
236
+ "iu": 234,
237
+ "ln": 235,
238
+ "il": 236,
239
+ "rt": 237,
240
+ "ng": 238,
241
+ "gą": 239,
242
+ "to": 240,
243
+ "or": 241,
244
+ "ij": 242,
245
+ "ją": 243,
246
+ "lt": 244,
247
+ "tū": 245,
248
+ "vie": 246,
249
+ "ien": 247,
250
+ "ena": 248,
251
+ "na ": 249,
252
+ "a i": 250,
253
+ " iš": 251,
254
+ "iš ": 252,
255
+ "š s": 253,
256
+ " se": 254,
257
+ "sen": 255,
258
+ "eni": 256,
259
+ "nia": 257,
260
+ "po": 258,
261
+ "op": 259,
262
+ "sių": 260,
263
+ "ų g": 261,
264
+ " gy": 262,
265
+ "gyv": 263,
266
+ "yvų": 264,
267
+ "vų ": 265,
268
+ "lbų": 266,
269
+ "bų ": 267,
270
+ "ų p": 268,
271
+ "šia": 269,
272
+ "š ": 270,
273
+ "aur": 271,
274
+ "urė": 272,
275
+ "rės": 273,
276
+ "ės ": 274,
277
+ "s e": 275,
278
+ " eu": 276,
279
+ "eur": 277,
280
+ "uro": 278,
281
+ "rop": 279,
282
+ "opo": 280,
283
+ "poj": 281,
284
+ "oje": 282,
285
+ "iš": 283,
286
+ "na": 284,
287
+ "uvi": 285,
288
+ "vių": 286,
289
+ "ba": 287,
290
+ "lba": 288,
291
+ "ba ": 289,
292
+ "a v": 290,
293
+ "oj": 291
294
+ }
@@ -0,0 +1,302 @@
1
+ {
2
+ "a": 0,
3
+ "i": 1,
4
+ "s": 2,
5
+ "t": 3,
6
+ "l": 4,
7
+ "v": 5,
8
+ "u": 6,
9
+ "r": 7,
10
+ "a ": 8,
11
+ "st": 9,
12
+ "e": 10,
13
+ "o": 11,
14
+ " v": 12,
15
+ "al": 13,
16
+ "ir": 14,
17
+ "r ": 15,
18
+ "va": 16,
19
+ "vi": 17,
20
+ "d": 18,
21
+ "ā": 19,
22
+ "p": 20,
23
+ "ie": 21,
24
+ "u ": 22,
25
+ "ls": 23,
26
+ "s ": 24,
27
+ "ta": 25,
28
+ " i": 26,
29
+ " l": 27,
30
+ "atv": 28,
31
+ "lat": 29,
32
+ "n": 30,
33
+ "a i": 31,
34
+ " ir": 32,
35
+ "ir ": 33,
36
+ " va": 34,
37
+ "k": 35,
38
+ "val": 36,
39
+ "la": 37,
40
+ "at": 38,
41
+ "tvi": 39,
42
+ "tv": 40,
43
+ "da": 41,
44
+ "ta ": 42,
45
+ "vij": 43,
46
+ "o ": 44,
47
+ "tu": 45,
48
+ "as": 46,
49
+ "ist": 47,
50
+ "pi": 48,
51
+ "ija": 49,
52
+ " p": 50,
53
+ " la": 51,
54
+ "is": 52,
55
+ "ij": 53,
56
+ "ja": 54,
57
+ "ē": 55,
58
+ "g": 56,
59
+ "ī": 57,
60
+ "b": 58,
61
+ "m": 59,
62
+ "z": 60,
63
+ "j": 61,
64
+ "u v": 62,
65
+ "alo": 63,
66
+ "n ": 64,
67
+ "vie": 65,
68
+ "tūr": 66,
69
+ " pi": 67,
70
+ " s": 68,
71
+ "sa": 69,
72
+ "stu": 70,
73
+ "to": 71,
74
+ "to ": 72,
75
+ "ri": 73,
76
+ "sta": 74,
77
+ " k": 75,
78
+ "tū": 76,
79
+ "ūr": 77,
80
+ "lst": 78,
81
+ "als": 79,
82
+ "ka": 80,
83
+ "ai": 81,
84
+ " d": 82,
85
+ "ū": 83,
86
+ "r v": 84,
87
+ "ja ": 85,
88
+ "us": 86,
89
+ "as ": 87,
90
+ "un": 88,
91
+ "lod": 89,
92
+ "ga": 90,
93
+ " da": 91,
94
+ "un ": 92,
95
+ "lt": 93,
96
+ "ba": 94,
97
+ " b": 95,
98
+ " un": 96,
99
+ "ēta": 97,
100
+ "pil": 98,
101
+ "od": 99,
102
+ "lo": 100,
103
+ "ils": 101,
104
+ "sēt": 102,
105
+ " ba": 103,
106
+ "lsē": 104,
107
+ "ā ": 105,
108
+ " u": 106,
109
+ "ēt": 107,
110
+ "sē": 108,
111
+ "il": 109,
112
+ "ais": 110,
113
+ "alv": 111,
114
+ " ga": 112,
115
+ "jas": 113,
116
+ "elā": 114,
117
+ "lāk": 115,
118
+ "ākā": 116,
119
+ "s g": 117,
120
+ "r p": 118,
121
+ "ī l": 119,
122
+ "spi": 120,
123
+ "a u": 121,
124
+ " pa": 122,
125
+ "asp": 123,
126
+ "kā ": 124,
127
+ "ā p": 125,
128
+ "gal": 126,
129
+ "vas": 127,
130
+ "a v": 128,
131
+ "n l": 129,
132
+ " li": 130,
133
+ "ama": 131,
134
+ "tam": 132,
135
+ "lie": 133,
136
+ "stī": 134,
137
+ "iel": 135,
138
+ "īst": 136,
139
+ "tī ": 137,
140
+ "zīs": 138,
141
+ "lva": 139,
142
+ "azī": 140,
143
+ "paz": 141,
144
+ "n s": 142,
145
+ " sk": 143,
146
+ "ska": 144,
147
+ "kai": 145,
148
+ "sto": 146,
149
+ "o d": 147,
150
+ "dab": 148,
151
+ "abu": 149,
152
+ "bu ": 150,
153
+ "u k": 151,
154
+ " ka": 152,
155
+ "kas": 153,
156
+ "s p": 154,
157
+ "pie": 155,
158
+ "ies": 156,
159
+ "esa": 157,
160
+ "sai": 158,
161
+ "a d": 159,
162
+ "dau": 160,
163
+ "aud": 161,
164
+ "udz": 162,
165
+ "dzu": 163,
166
+ "zus": 164,
167
+ "us ": 165,
168
+ "s t": 166,
169
+ " tū": 167,
170
+ "ūri": 168,
171
+ "ris": 169,
172
+ "tus": 170,
173
+ "ma ": 171,
174
+ "a a": 172,
175
+ " ar": 173,
176
+ "ar ": 174,
177
+ "r s": 175,
178
+ " sa": 176,
179
+ "sav": 177,
180
+ "avu": 178,
181
+ "vu ": 179,
182
+ "u b": 180,
183
+ "bag": 181,
184
+ "agā": 182,
185
+ "gāt": 183,
186
+ "āto": 184,
187
+ "o v": 185,
188
+ " vē": 186,
189
+ "vēs": 187,
190
+ "ēst": 188,
191
+ "tur": 189,
192
+ "uri": 190,
193
+ "ri ": 191,
194
+ "i k": 192,
195
+ " ku": 193,
196
+ "kul": 194,
197
+ "ult": 195,
198
+ "ltū": 196,
199
+ "ūru": 197,
200
+ "ru ": 198,
201
+ "u u": 199,
202
+ "ļ": 200,
203
+ "sp": 201,
204
+ "li": 202,
205
+ "el": 203,
206
+ "lā": 204,
207
+ "āk": 205,
208
+ "kā": 206,
209
+ "tī": 207,
210
+ "ī ": 208,
211
+ "pa": 209,
212
+ "az": 210,
213
+ "zī": 211,
214
+ "īs": 212,
215
+ "am": 213,
216
+ "ma": 214,
217
+ " a": 215,
218
+ "ar": 216,
219
+ "av": 217,
220
+ "vu": 218,
221
+ "ag": 219,
222
+ "gā": 220,
223
+ "āt": 221,
224
+ "vē": 222,
225
+ "ēs": 223,
226
+ "ur": 224,
227
+ "i ": 225,
228
+ "ku": 226,
229
+ "š": 227,
230
+ "ts": 228,
231
+ " z": 229,
232
+ "zi": 230,
233
+ "em": 231,
234
+ "me": 232,
235
+ "eļ": 233,
236
+ "ļe": 234,
237
+ "ei": 235,
238
+ "ro": 236,
239
+ "op": 237,
240
+ "pā": 238,
241
+ "eš": 239,
242
+ "šu": 240,
243
+ "en": 241,
244
+ "na": 242,
245
+ " n": 243,
246
+ "no": 244,
247
+ "dā": 245,
248
+ "ām": 246,
249
+ "m ": 247,
250
+ " r": 248,
251
+ "rī": 249,
252
+ "īg": 250,
253
+ " g": 251,
254
+ "lv": 252,
255
+ "ul": 253,
256
+ "ieš": 254,
257
+ "ešu": 255,
258
+ "šu ": 256,
259
+ "oda": 257,
260
+ "da ": 258,
261
+ " vi": 259,
262
+ "ien": 260,
263
+ "ena": 261,
264
+ "na ": 262,
265
+ "a n": 263,
266
+ " no": 264,
267
+ "no ": 265,
268
+ "o b": 266,
269
+ "bal": 267,
270
+ "alt": 268,
271
+ "ltu": 269,
272
+ "tu ": 270,
273
+ "odā": 271,
274
+ "dām": 272,
275
+ "ām ": 273,
276
+ "m r": 274,
277
+ " rī": 275,
278
+ "rīg": 276,
279
+ "īga": 277,
280
+ "ga ": 278,
281
+ "r l": 279,
282
+ "ru": 280,
283
+ "sk": 281,
284
+ "ab": 282,
285
+ "bu": 283,
286
+ "es": 284,
287
+ "au": 285,
288
+ "ud": 286,
289
+ "dz": 287,
290
+ "zu": 288,
291
+ " t": 289,
292
+ "sts": 290,
293
+ "ts ": 291,
294
+ "s z": 292,
295
+ " zi": 293,
296
+ "zie": 294,
297
+ "iem": 295,
298
+ "eme": 296,
299
+ "meļ": 297,
300
+ "eļe": 298,
301
+ "ļei": 299
302
+ }