langdetect-ruby 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +24 -13
  3. data/langdetect-ruby.gemspec +1 -1
  4. data/lib/lingua_ruby/configuration.rb +4 -1
  5. data/lib/lingua_ruby/detector.rb +59 -1
  6. data/lib/lingua_ruby/profile_loader.rb +26 -6
  7. data/lib/lingua_ruby/profiles/am.json +193 -0
  8. data/lib/lingua_ruby/profiles/bg.json +290 -0
  9. data/lib/lingua_ruby/profiles/bn.json +211 -0
  10. data/lib/lingua_ruby/profiles/cs.json +302 -0
  11. data/lib/lingua_ruby/profiles/da.json +302 -0
  12. data/lib/lingua_ruby/profiles/de.json +302 -0
  13. data/lib/lingua_ruby/profiles/el.json +302 -0
  14. data/lib/lingua_ruby/profiles/es.json +302 -0
  15. data/lib/lingua_ruby/profiles/et.json +289 -0
  16. data/lib/lingua_ruby/profiles/fa.json +234 -0
  17. data/lib/lingua_ruby/profiles/fi.json +284 -0
  18. data/lib/lingua_ruby/profiles/fr.json +302 -0
  19. data/lib/lingua_ruby/profiles/ha.json +302 -0
  20. data/lib/lingua_ruby/profiles/hi.json +255 -0
  21. data/lib/lingua_ruby/profiles/hr.json +302 -0
  22. data/lib/lingua_ruby/profiles/hu.json +302 -0
  23. data/lib/lingua_ruby/profiles/it.json +302 -0
  24. data/lib/lingua_ruby/profiles/lt.json +294 -0
  25. data/lib/lingua_ruby/profiles/lv.json +302 -0
  26. data/lib/lingua_ruby/profiles/my.json +200 -0
  27. data/lib/lingua_ruby/profiles/no.json +297 -0
  28. data/lib/lingua_ruby/profiles/pl.json +302 -0
  29. data/lib/lingua_ruby/profiles/pt.json +302 -0
  30. data/lib/lingua_ruby/profiles/ro.json +302 -0
  31. data/lib/lingua_ruby/profiles/ru.json +297 -0
  32. data/lib/lingua_ruby/profiles/sk.json +302 -0
  33. data/lib/lingua_ruby/profiles/sv.json +302 -0
  34. data/lib/lingua_ruby/profiles/sw.json +268 -0
  35. data/lib/lingua_ruby/profiles/ta.json +235 -0
  36. data/lib/lingua_ruby/profiles/te.json +254 -0
  37. data/lib/lingua_ruby/profiles/th.json +251 -0
  38. data/lib/lingua_ruby/profiles/tl.json +302 -0
  39. data/lib/lingua_ruby/profiles/tr.json +302 -0
  40. data/lib/lingua_ruby/profiles/uk.json +302 -0
  41. data/lib/lingua_ruby/profiles/ur.json +232 -0
  42. data/lib/lingua_ruby/profiles/vi.json +277 -0
  43. data/lib/lingua_ruby/profiles/yo.json +245 -0
  44. data/lib/lingua_ruby/profiles/zu.json +302 -0
  45. data/lib/lingua_ruby/result.rb +13 -26
  46. data/lib/lingua_ruby/version.rb +1 -1
  47. data/lib/lingua_ruby.rb +4 -0
  48. metadata +41 -2
@@ -0,0 +1,302 @@
1
+ {
2
+ "e": 0,
3
+ "a": 1,
4
+ "s": 2,
5
+ "n": 3,
6
+ "e ": 4,
7
+ "t": 5,
8
+ "u": 6,
9
+ "r": 7,
10
+ "o": 8,
11
+ "l": 9,
12
+ "i": 10,
13
+ " e": 11,
14
+ "c": 12,
15
+ "a ": 13,
16
+ "m": 14,
17
+ "p": 15,
18
+ "s ": 16,
19
+ "es": 17,
20
+ "st": 18,
21
+ "t ": 19,
22
+ "la": 20,
23
+ "d": 21,
24
+ " la": 22,
25
+ " p": 23,
26
+ " l": 24,
27
+ "f": 25,
28
+ "an": 26,
29
+ "la ": 27,
30
+ "pa": 28,
31
+ "ur": 29,
32
+ " pa": 30,
33
+ "st ": 31,
34
+ "est": 32,
35
+ " es": 33,
36
+ "on": 34,
37
+ "e e": 35,
38
+ "de": 36,
39
+ " d": 37,
40
+ " s": 38,
41
+ " c": 39,
42
+ "is": 40,
43
+ "en": 41,
44
+ "ue": 42,
45
+ "ran": 43,
46
+ "fra": 44,
47
+ " fr": 45,
48
+ "de ": 46,
49
+ " f": 47,
50
+ "fr": 48,
51
+ "ra": 49,
52
+ "e l": 50,
53
+ "om": 51,
54
+ "et": 52,
55
+ "a f": 53,
56
+ "ri": 54,
57
+ "mo": 55,
58
+ " m": 56,
59
+ "co": 57,
60
+ "our": 58,
61
+ "nu": 59,
62
+ "ur ": 60,
63
+ "ou": 61,
64
+ "r ": 62,
65
+ "sa": 63,
66
+ " sa": 64,
67
+ "sa ": 65,
68
+ "to": 66,
69
+ "me": 67,
70
+ "e d": 68,
71
+ "par": 69,
72
+ "s d": 70,
73
+ " de": 71,
74
+ "nom": 72,
75
+ "ue ": 73,
76
+ "le ": 74,
77
+ "ale": 75,
78
+ "tal": 76,
79
+ "ent": 77,
80
+ " mo": 78,
81
+ "mon": 79,
82
+ "e p": 80,
83
+ "a c": 81,
84
+ " et": 82,
85
+ "et ": 83,
86
+ "ys ": 84,
87
+ "ays": 85,
88
+ "pay": 86,
89
+ " co": 87,
90
+ "ce ": 88,
91
+ "nce": 89,
92
+ "anc": 90,
93
+ "ro": 91,
94
+ "eu": 92,
95
+ "tu": 93,
96
+ "nt": 94,
97
+ "ta": 95,
98
+ "al": 96,
99
+ "le": 97,
100
+ "it": 98,
101
+ "se": 99,
102
+ "ar": 100,
103
+ "ys": 101,
104
+ "ay": 102,
105
+ "n ": 103,
106
+ "ce": 104,
107
+ "nc": 105,
108
+ "y": 106,
109
+ "es ": 107,
110
+ "g": 108,
111
+ "no": 109,
112
+ "é": 110,
113
+ "re": 111,
114
+ "ari": 112,
115
+ "ris": 113,
116
+ "is ": 114,
117
+ "s e": 115,
118
+ "t l": 116,
119
+ "ns ": 117,
120
+ " ca": 118,
121
+ "ans": 119,
122
+ "dan": 120,
123
+ "cap": 121,
124
+ "api": 122,
125
+ "pit": 123,
126
+ "ita": 124,
127
+ " da": 125,
128
+ "a t": 126,
129
+ "t e": 127,
130
+ "t c": 128,
131
+ "me ": 129,
132
+ "omb": 130,
133
+ "eif": 131,
134
+ "mbr": 132,
135
+ "bre": 133,
136
+ "reu": 134,
137
+ "eux": 135,
138
+ "ux ": 136,
139
+ " ei": 137,
140
+ "iff": 138,
141
+ " no": 139,
142
+ "r e": 140,
143
+ "tou": 141,
144
+ "x p": 142,
145
+ "e n": 143,
146
+ " du": 144,
147
+ "du ": 145,
148
+ "u m": 146,
149
+ "ffe": 147,
150
+ "ond": 148,
151
+ "fel": 149,
152
+ "nde": 150,
153
+ " to": 151,
154
+ "mme": 152,
155
+ "ure": 153,
156
+ "re ": 154,
157
+ "e s": 155,
158
+ "a g": 156,
159
+ " ga": 157,
160
+ "gas": 158,
161
+ "ast": 159,
162
+ "str": 160,
163
+ "tro": 161,
164
+ "ron": 162,
165
+ "s h": 163,
166
+ "ts ": 164,
167
+ "nts": 165,
168
+ "ono": 166,
169
+ "men": 167,
170
+ "omi": 168,
171
+ "ume": 169,
172
+ "mie": 170,
173
+ "ie ": 171,
174
+ "t s": 172,
175
+ "num": 173,
176
+ " se": 174,
177
+ "onu": 175,
178
+ "s m": 176,
179
+ "ses": 177,
180
+ "omm": 178,
181
+ "con": 179,
182
+ "onn": 180,
183
+ "nnu": 181,
184
+ "com": 182,
185
+ "s c": 183,
186
+ "ues": 184,
187
+ "que": 185,
188
+ "iqu": 186,
189
+ "riq": 187,
190
+ "ori": 188,
191
+ "tor": 189,
192
+ "nue": 190,
193
+ " po": 191,
194
+ "pou": 192,
195
+ "sto": 193,
196
+ "ist": 194,
197
+ "r s": 195,
198
+ " cu": 196,
199
+ "cul": 197,
200
+ "his": 198,
201
+ "ult": 199,
202
+ "ltu": 200,
203
+ "tur": 201,
204
+ " hi": 202,
205
+ " n": 203,
206
+ "mb": 204,
207
+ "br": 205,
208
+ "ux": 206,
209
+ "x ": 207,
210
+ "du": 208,
211
+ "u ": 209,
212
+ "nd": 210,
213
+ "ca": 211,
214
+ "ap": 212,
215
+ "pi": 213,
216
+ "nn": 214,
217
+ "po": 215,
218
+ "cu": 216,
219
+ "ul": 217,
220
+ "lt": 218,
221
+ " g": 219,
222
+ "ga": 220,
223
+ "as": 221,
224
+ "tr": 222,
225
+ "mi": 223,
226
+ "ie": 224,
227
+ "um": 225,
228
+ "ts": 226,
229
+ " h": 227,
230
+ "hi": 228,
231
+ "or": 229,
232
+ "ç": 230,
233
+ "b": 231,
234
+ "x": 232,
235
+ "h": 233,
236
+ "q": 234,
237
+ " u": 235,
238
+ "un": 236,
239
+ "si": 237,
240
+ "ué": 238,
241
+ "é ": 239,
242
+ "op": 240,
243
+ "pe": 241,
244
+ " o": 242,
245
+ "oc": 243,
246
+ "cc": 244,
247
+ "ci": 245,
248
+ "id": 246,
249
+ "ng": 247,
250
+ "gu": 248,
251
+ "nç": 249,
252
+ "ça": 250,
253
+ "ai": 251,
254
+ "rl": 252,
255
+ "lé": 253,
256
+ "ée": 254,
257
+ "da": 255,
258
+ "ns": 256,
259
+ "ope": 257,
260
+ "pe ": 258,
261
+ "e o": 259,
262
+ " oc": 260,
263
+ "occ": 261,
264
+ "cci": 262,
265
+ "cid": 263,
266
+ "ide": 264,
267
+ "den": 265,
268
+ "nta": 266,
269
+ "a l": 267,
270
+ "lan": 268,
271
+ "ang": 269,
272
+ "ngu": 270,
273
+ "gue": 271,
274
+ "e f": 272,
275
+ "anç": 273,
276
+ "nça": 274,
277
+ "çai": 275,
278
+ "ais": 276,
279
+ "ise": 277,
280
+ "se ": 278,
281
+ "t p": 279,
282
+ "arl": 280,
283
+ "rlé": 281,
284
+ "lée": 282,
285
+ "ée ": 283,
286
+ "iq": 284,
287
+ "qu": 285,
288
+ "mm": 286,
289
+ " t": 287,
290
+ "ei": 288,
291
+ "if": 289,
292
+ "ff": 290,
293
+ "fe": 291,
294
+ "el": 292,
295
+ "t u": 293,
296
+ " un": 294,
297
+ "un ": 295,
298
+ "n p": 296,
299
+ "s s": 297,
300
+ " si": 298,
301
+ "sit": 299
302
+ }
@@ -0,0 +1,302 @@
1
+ {
2
+ "a": 0,
3
+ "a ": 1,
4
+ "n": 2,
5
+ "i": 3,
6
+ "m": 4,
7
+ "u": 5,
8
+ "n ": 6,
9
+ "an": 7,
10
+ "s": 8,
11
+ "d": 9,
12
+ " a": 10,
13
+ "ma": 11,
14
+ "da": 12,
15
+ "r": 13,
16
+ "k": 14,
17
+ "h": 15,
18
+ "b": 16,
19
+ " da": 17,
20
+ "e": 18,
21
+ "na": 19,
22
+ " d": 20,
23
+ "an ": 21,
24
+ "ya": 22,
25
+ "da ": 23,
26
+ "y": 24,
27
+ "ha": 25,
28
+ "g": 26,
29
+ "ir": 27,
30
+ " m": 28,
31
+ " ha": 29,
32
+ " s": 30,
33
+ " h": 31,
34
+ "c": 32,
35
+ "ka": 33,
36
+ "na ": 34,
37
+ "a a": 35,
38
+ "ma ": 36,
39
+ "a d": 37,
40
+ "in ": 38,
41
+ "su": 39,
42
+ "in": 40,
43
+ "e ": 41,
44
+ "sa": 42,
45
+ "j": 43,
46
+ " ma": 44,
47
+ "wa": 45,
48
+ " b": 46,
49
+ "w": 47,
50
+ "a s": 48,
51
+ "ba": 49,
52
+ "afi": 50,
53
+ "n a": 51,
54
+ "ad": 52,
55
+ "a k": 53,
56
+ "a c": 54,
57
+ "ga": 55,
58
+ "ab": 56,
59
+ " ya": 57,
60
+ "un": 58,
61
+ "ku": 59,
62
+ "ta": 60,
63
+ "a y": 61,
64
+ "ban": 62,
65
+ "i ": 63,
66
+ " y": 64,
67
+ "am": 65,
68
+ " c": 66,
69
+ "n h": 67,
70
+ "af": 68,
71
+ "as": 69,
72
+ " k": 70,
73
+ "fi": 71,
74
+ "ka ": 72,
75
+ "t": 73,
76
+ "sh": 74,
77
+ "a m": 75,
78
+ "f": 76,
79
+ "awa": 77,
80
+ " su": 78,
81
+ " ce": 79,
82
+ "sa ": 80,
83
+ "ada": 81,
84
+ "ya ": 82,
85
+ "iya": 83,
86
+ "uk": 84,
87
+ "ce ": 85,
88
+ "una": 86,
89
+ "sun": 87,
90
+ " a ": 88,
91
+ " na": 89,
92
+ "amm": 90,
93
+ "mma": 91,
94
+ "rka": 92,
95
+ "irk": 93,
96
+ " af": 94,
97
+ "fir": 95,
98
+ "l": 96,
99
+ " g": 97,
100
+ "ar": 98,
101
+ " n": 99,
102
+ "ag": 100,
103
+ "mm": 101,
104
+ "rk": 102,
105
+ " i": 103,
106
+ "iy": 104,
107
+ "aj": 105,
108
+ "ce": 106,
109
+ "ci": 107,
110
+ "aw": 108,
111
+ "aga": 109,
112
+ " ku": 110,
113
+ "kum": 111,
114
+ "uma": 112,
115
+ " mu": 113,
116
+ "mut": 114,
117
+ "uku": 115,
118
+ "gi": 116,
119
+ "rm": 117,
120
+ "ac": 118,
121
+ "usa": 119,
122
+ "aus": 120,
123
+ "hau": 121,
124
+ "en ": 122,
125
+ "she": 123,
126
+ "ars": 124,
127
+ "har": 125,
128
+ "a h": 126,
129
+ "rs": 127,
130
+ "he": 128,
131
+ " ad": 129,
132
+ "l a": 130,
133
+ "al ": 131,
134
+ " al": 132,
135
+ "wan": 133,
136
+ "rni": 134,
137
+ "irn": 135,
138
+ "bir": 136,
139
+ " bi": 137,
140
+ " ba": 138,
141
+ "e b": 139,
142
+ "ta ": 140,
143
+ "ita": 141,
144
+ " it": 142,
145
+ "n n": 143,
146
+ "o": 144,
147
+ "aba": 145,
148
+ "dab": 146,
149
+ " sh": 147,
150
+ "ana": 148,
151
+ "je": 149,
152
+ "wa ": 150,
153
+ "er": 151,
154
+ "yaw": 152,
155
+ "ri": 153,
156
+ "e d": 154,
157
+ "ane": 155,
158
+ "tan": 156,
159
+ "uta": 157,
160
+ "gir": 158,
161
+ " gi": 159,
162
+ "i g": 160,
163
+ "bu": 161,
164
+ "ja": 162,
165
+ "asa": 163,
166
+ "kas": 164,
167
+ " ka": 165,
168
+ "it": 166,
169
+ "riy": 167,
170
+ "eri": 168,
171
+ "jer": 169,
172
+ "aje": 170,
173
+ "naj": 171,
174
+ "bi": 172,
175
+ "u ": 173,
176
+ "rn": 174,
177
+ "l ": 175,
178
+ "al": 176,
179
+ "um": 177,
180
+ "mu": 178,
181
+ "ut": 179,
182
+ "yam": 180,
183
+ "cin": 181,
184
+ "ni": 182,
185
+ "mac": 183,
186
+ "ne": 184,
187
+ "aci": 185,
188
+ "us": 186,
189
+ "au": 187,
190
+ "en": 188,
191
+ "irm": 189,
192
+ "rma": 190,
193
+ "os ": 191,
194
+ "and": 192,
195
+ "dun": 193,
196
+ "mas": 194,
197
+ "jiy": 195,
198
+ "un ": 196,
199
+ "aji": 197,
200
+ "n g": 198,
201
+ " ga": 199,
202
+ "gar": 200,
203
+ "gaj": 201,
204
+ "rga": 202,
205
+ "arg": 203,
206
+ "s i": 204,
207
+ "ni ": 205,
208
+ "i m": 206,
209
+ "maf": 207,
210
+ "fi ": 208,
211
+ " j": 209,
212
+ "n j": 210,
213
+ " ja": 211,
214
+ "jam": 212,
215
+ "ama": 213,
216
+ "nen": 214,
217
+ "s ": 215,
218
+ "os": 216,
219
+ "go": 217,
220
+ "la": 218,
221
+ "adu": 219,
222
+ "nda": 220,
223
+ "suk": 221,
224
+ "uka": 222,
225
+ "had": 223,
226
+ " sa": 224,
227
+ "san": 225,
228
+ "a o": 226,
229
+ " o ": 227,
230
+ "o i": 228,
231
+ " in": 229,
232
+ "han": 230,
233
+ "ann": 231,
234
+ "nnu": 232,
235
+ "nu ": 233,
236
+ "u d": 234,
237
+ "a b": 235,
238
+ "asu": 236,
239
+ "su ": 237,
240
+ "u b": 238,
241
+ "n s": 239,
242
+ "sha": 240,
243
+ "ha ": 241,
244
+ " aw": 242,
245
+ "a w": 243,
246
+ "uwa": 244,
247
+ "kuw": 245,
248
+ "kuk": 246,
249
+ " wa": 247,
250
+ "wad": 248,
251
+ "buk": 249,
252
+ " l": 250,
253
+ " bu": 251,
254
+ "dan": 252,
255
+ "gos": 253,
256
+ "nu": 254,
257
+ "uw": 255,
258
+ "gan": 256,
259
+ "mag": 257,
260
+ "bb": 258,
261
+ "ne ": 259,
262
+ "uj": 260,
263
+ "hi": 261,
264
+ "e m": 262,
265
+ "mai": 263,
266
+ "nan": 264,
267
+ "ai ": 265,
268
+ "ai": 266,
269
+ "rsu": 267,
270
+ "yan": 268,
271
+ "nya": 269,
272
+ "any": 270,
273
+ "man": 271,
274
+ "n m": 272,
275
+ "kin": 273,
276
+ "iki": 274,
277
+ "cik": 275,
278
+ " ci": 276,
279
+ "ga ": 277,
280
+ "dag": 278,
281
+ "a n": 279,
282
+ "ny": 280,
283
+ "hen": 281,
284
+ "ki": 282,
285
+ "rsh": 283,
286
+ "ik": 284,
287
+ "ago": 285,
288
+ "lag": 286,
289
+ " la": 287,
290
+ "a l": 288,
291
+ " am": 289,
292
+ "nin": 290,
293
+ "du": 291,
294
+ "n b": 292,
295
+ "bba": 293,
296
+ "abb": 294,
297
+ "bab": 295,
298
+ "rg": 296,
299
+ "ji": 297,
300
+ " w": 298,
301
+ "nd": 299
302
+ }