langdetect-ruby 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +24 -13
  3. data/langdetect-ruby.gemspec +1 -1
  4. data/lib/lingua_ruby/configuration.rb +4 -1
  5. data/lib/lingua_ruby/detector.rb +59 -1
  6. data/lib/lingua_ruby/profile_loader.rb +26 -6
  7. data/lib/lingua_ruby/profiles/am.json +193 -0
  8. data/lib/lingua_ruby/profiles/bg.json +290 -0
  9. data/lib/lingua_ruby/profiles/bn.json +211 -0
  10. data/lib/lingua_ruby/profiles/cs.json +302 -0
  11. data/lib/lingua_ruby/profiles/da.json +302 -0
  12. data/lib/lingua_ruby/profiles/de.json +302 -0
  13. data/lib/lingua_ruby/profiles/el.json +302 -0
  14. data/lib/lingua_ruby/profiles/es.json +302 -0
  15. data/lib/lingua_ruby/profiles/et.json +289 -0
  16. data/lib/lingua_ruby/profiles/fa.json +234 -0
  17. data/lib/lingua_ruby/profiles/fi.json +284 -0
  18. data/lib/lingua_ruby/profiles/fr.json +302 -0
  19. data/lib/lingua_ruby/profiles/ha.json +302 -0
  20. data/lib/lingua_ruby/profiles/hi.json +255 -0
  21. data/lib/lingua_ruby/profiles/hr.json +302 -0
  22. data/lib/lingua_ruby/profiles/hu.json +302 -0
  23. data/lib/lingua_ruby/profiles/it.json +302 -0
  24. data/lib/lingua_ruby/profiles/lt.json +294 -0
  25. data/lib/lingua_ruby/profiles/lv.json +302 -0
  26. data/lib/lingua_ruby/profiles/my.json +200 -0
  27. data/lib/lingua_ruby/profiles/no.json +297 -0
  28. data/lib/lingua_ruby/profiles/pl.json +302 -0
  29. data/lib/lingua_ruby/profiles/pt.json +302 -0
  30. data/lib/lingua_ruby/profiles/ro.json +302 -0
  31. data/lib/lingua_ruby/profiles/ru.json +297 -0
  32. data/lib/lingua_ruby/profiles/sk.json +302 -0
  33. data/lib/lingua_ruby/profiles/sv.json +302 -0
  34. data/lib/lingua_ruby/profiles/sw.json +268 -0
  35. data/lib/lingua_ruby/profiles/ta.json +235 -0
  36. data/lib/lingua_ruby/profiles/te.json +254 -0
  37. data/lib/lingua_ruby/profiles/th.json +251 -0
  38. data/lib/lingua_ruby/profiles/tl.json +302 -0
  39. data/lib/lingua_ruby/profiles/tr.json +302 -0
  40. data/lib/lingua_ruby/profiles/uk.json +302 -0
  41. data/lib/lingua_ruby/profiles/ur.json +232 -0
  42. data/lib/lingua_ruby/profiles/vi.json +277 -0
  43. data/lib/lingua_ruby/profiles/yo.json +245 -0
  44. data/lib/lingua_ruby/profiles/zu.json +302 -0
  45. data/lib/lingua_ruby/result.rb +13 -26
  46. data/lib/lingua_ruby/version.rb +1 -1
  47. data/lib/lingua_ruby.rb +4 -0
  48. metadata +41 -2
@@ -0,0 +1,200 @@
1
+ {
2
+ "်": 0,
3
+ "ာ": 1,
4
+ "င": 2,
5
+ "သ": 3,
6
+ "မ": 4,
7
+ "ြ": 5,
8
+ "န": 6,
9
+ "ု": 7,
10
+ "ည်": 8,
11
+ "ည": 9,
12
+ "သည": 10,
13
+ "င်": 11,
14
+ "် ": 12,
15
+ "တ": 13,
16
+ "ိ": 14,
17
+ "ံ": 15,
18
+ "သည်": 16,
19
+ "ေ": 17,
20
+ "စ": 18,
21
+ "ည် ": 19,
22
+ "ို": 20,
23
+ "မြ": 21,
24
+ "င်င": 22,
25
+ "်ငံ": 23,
26
+ "ော": 24,
27
+ "စ်": 25,
28
+ "်သ": 26,
29
+ " မ": 27,
30
+ " မြ": 28,
31
+ "်သည": 29,
32
+ "မြန": 30,
33
+ "ြန်": 31,
34
+ "န်မ": 32,
35
+ "်မာ": 33,
36
+ "နို": 34,
37
+ "ိုင": 35,
38
+ "ုင်": 36,
39
+ "န်": 37,
40
+ "ငံ": 38,
41
+ "်င": 39,
42
+ "ုင": 40,
43
+ "ြန": 41,
44
+ "ရ": 42,
45
+ "မာ": 43,
46
+ "်မ": 44,
47
+ "နိ": 45,
48
+ "တော": 46,
49
+ "ာသ": 47,
50
+ "မာန": 48,
51
+ "ာနိ": 49,
52
+ "ဖ": 50,
53
+ "ြစ": 51,
54
+ "ာန": 52,
55
+ "ဖြ": 53,
56
+ "ှ": 54,
57
+ "် မ": 55,
58
+ "စ်သ": 56,
59
+ "ြစ်": 57,
60
+ "တေ": 58,
61
+ "ဖြစ": 59,
62
+ "ရှ": 60,
63
+ "း": 61,
64
+ "ဘာ": 62,
65
+ "ုံး": 63,
66
+ "သာ": 64,
67
+ "ံ ": 65,
68
+ "့": 66,
69
+ "ံး": 67,
70
+ "ုံ": 68,
71
+ "ငံ ": 69,
72
+ "အ": 70,
73
+ " န": 71,
74
+ "ာသာ": 72,
75
+ "ဘာသ": 73,
76
+ "ဘ": 74,
77
+ "့တေ": 75,
78
+ "ာ်": 76,
79
+ "့တ": 77,
80
+ "ော်": 78,
81
+ "ာ်ဖ": 79,
82
+ "ု့တ": 80,
83
+ "ို့": 81,
84
+ "င် ": 82,
85
+ "် တ": 83,
86
+ " တည": 84,
87
+ "တည်": 85,
88
+ "ည်ရ": 86,
89
+ "်ရှ": 87,
90
+ "ရှိ": 88,
91
+ "ှိသ": 89,
92
+ "ိသေ": 90,
93
+ "သော": 91,
94
+ "ော ": 92,
95
+ "ာ န": 93,
96
+ " နိ": 94,
97
+ "ငံတ": 95,
98
+ "ံတစ": 96,
99
+ "တစ်": 97,
100
+ "စ်ခ": 98,
101
+ "်ခု": 99,
102
+ "်ဖြ": 100,
103
+ "စကာ": 101,
104
+ "ကား": 102,
105
+ "ားဖ": 103,
106
+ "းဖြ": 104,
107
+ "် န": 105,
108
+ " နေ": 106,
109
+ "နေပ": 107,
110
+ "ေပြ": 108,
111
+ "ပြည": 109,
112
+ "ြည်": 110,
113
+ "ည်တ": 111,
114
+ "်တေ": 112,
115
+ "ာ်သ": 113,
116
+ "ံ မ": 114,
117
+ "မြိ": 115,
118
+ "ြို": 116,
119
+ "ခုဖ": 117,
120
+ "ုဖြ": 118,
121
+ "မာဘ": 119,
122
+ "ာဘာ": 120,
123
+ "သာသ": 121,
124
+ "ာသည": 122,
125
+ "ံ ရ": 123,
126
+ " ရု": 124,
127
+ "ရုံ": 125,
128
+ "ံးသ": 126,
129
+ "းသု": 127,
130
+ "သုံ": 128,
131
+ "ံးဘ": 129,
132
+ "းဘာ": 130,
133
+ "သာစ": 131,
134
+ "ာစက": 132,
135
+ "ွ": 133,
136
+ "တည": 134,
137
+ "်ရ": 135,
138
+ "ှိ": 136,
139
+ "ိသ": 137,
140
+ "သေ": 138,
141
+ "ာ ": 139,
142
+ "ံတ": 140,
143
+ "တစ": 141,
144
+ "်ခ": 142,
145
+ "ခု": 143,
146
+ "ုဖ": 144,
147
+ "ာဘ": 145,
148
+ " ရ": 146,
149
+ "ရု": 147,
150
+ "းသ": 148,
151
+ "သု": 149,
152
+ "ခ": 150,
153
+ "က": 151,
154
+ "ပ": 152,
155
+ "ံသ": 153,
156
+ " အ": 154,
157
+ "အရ": 155,
158
+ "ှေ": 156,
159
+ "ေ့": 157,
160
+ "ာင": 158,
161
+ "်အ": 159,
162
+ "အာ": 160,
163
+ "ာရ": 161,
164
+ "ှတ": 162,
165
+ "တွ": 163,
166
+ "ွင": 164,
167
+ " တ": 165,
168
+ "် အ": 166,
169
+ " အရ": 167,
170
+ "အရှ": 168,
171
+ "ရှေ": 169,
172
+ "ှေ့": 170,
173
+ "ေ့တ": 171,
174
+ "ောင": 172,
175
+ "ာင်": 173,
176
+ "င်အ": 174,
177
+ "်အာ": 175,
178
+ "အာရ": 176,
179
+ "ာရှ": 177,
180
+ "ရှတ": 178,
181
+ "ှတွ": 179,
182
+ "တွင": 180,
183
+ "ွင်": 181,
184
+ "းဘ": 182,
185
+ "ာစ": 183,
186
+ "စက": 184,
187
+ "ကာ": 185,
188
+ "ား": 186,
189
+ "းဖ": 187,
190
+ "နေ": 188,
191
+ "ေပ": 189,
192
+ "ပြ": 190,
193
+ "ြည": 191,
194
+ "်တ": 192,
195
+ "ြိ": 193,
196
+ "ု့": 194,
197
+ "်ဖ": 195,
198
+ "ငံသ": 196,
199
+ "ံသည": 197
200
+ }
@@ -0,0 +1,297 @@
1
+ {
2
+ "e": 0,
3
+ "r": 1,
4
+ "o": 2,
5
+ "n": 3,
6
+ "t": 4,
7
+ "s": 5,
8
+ "r ": 6,
9
+ "i": 7,
10
+ "or": 8,
11
+ "d": 9,
12
+ " o": 10,
13
+ "a": 11,
14
+ "k": 12,
15
+ "er ": 13,
16
+ "l": 14,
17
+ "er": 15,
18
+ "g": 16,
19
+ "t ": 17,
20
+ " e": 18,
21
+ "e ": 19,
22
+ "no": 20,
23
+ "u": 21,
24
+ "nor": 22,
25
+ " n": 23,
26
+ " s": 24,
27
+ "en": 25,
28
+ " f": 26,
29
+ "r o": 27,
30
+ " no": 28,
31
+ "og": 29,
32
+ "in": 30,
33
+ "j": 31,
34
+ " og": 32,
35
+ " er": 33,
36
+ "f": 34,
37
+ "m": 35,
38
+ "es": 36,
39
+ "og ": 37,
40
+ "nt": 38,
41
+ "ne": 39,
42
+ "st": 40,
43
+ "rge": 41,
44
+ "g ": 42,
45
+ "org": 43,
46
+ " k": 44,
47
+ "je": 45,
48
+ "si": 46,
49
+ " si": 47,
50
+ "et ": 48,
51
+ "sin": 49,
52
+ "nt ": 50,
53
+ "t f": 51,
54
+ "ur": 52,
55
+ "ent": 53,
56
+ "d ": 54,
57
+ "et": 55,
58
+ "ge": 56,
59
+ "ke": 57,
60
+ "v": 58,
61
+ "rg": 59,
62
+ "ur ": 60,
63
+ " h": 61,
64
+ "p": 62,
65
+ "tur": 63,
66
+ "e n": 64,
67
+ "å": 65,
68
+ "es ": 66,
69
+ "rd": 67,
70
+ "h": 68,
71
+ "kj": 69,
72
+ "fo": 70,
73
+ "ge ": 71,
74
+ "nd": 72,
75
+ "n ": 73,
76
+ "in ": 74,
77
+ "tu": 75,
78
+ "r s": 76,
79
+ "or ": 77,
80
+ "for": 78,
81
+ " fo": 79,
82
+ "ri": 80,
83
+ "jen": 81,
84
+ "kje": 82,
85
+ " kj": 83,
86
+ "r k": 84,
87
+ "e e": 85,
88
+ "ord": 86,
89
+ "de": 87,
90
+ "ta": 88,
91
+ "re": 89,
92
+ "tr": 90,
93
+ "sk": 91,
94
+ "s ": 92,
95
+ "ske": 93,
96
+ " m": 94,
97
+ "o e": 95,
98
+ "em ": 96,
99
+ "mtr": 97,
100
+ "omt": 98,
101
+ "men": 99,
102
+ " me": 100,
103
+ "m m": 101,
104
+ "r m": 102,
105
+ "ner": 103,
106
+ "one": 104,
107
+ "ion": 105,
108
+ "lio": 106,
109
+ "lli": 107,
110
+ "ill": 108,
111
+ "mil": 109,
112
+ " mi": 110,
113
+ "r n": 111,
114
+ "lo ": 112,
115
+ "slo": 113,
116
+ "ges": 114,
117
+ "s h": 115,
118
+ " ho": 116,
119
+ "osl": 117,
120
+ " os": 118,
121
+ "hov": 119,
122
+ " fe": 120,
123
+ "ker": 121,
124
+ "esk": 122,
125
+ "ren": 123,
126
+ "nes": 124,
127
+ "nne": 125,
128
+ "enn": 126,
129
+ "fem": 127,
130
+ "ove": 128,
131
+ "ved": 129,
132
+ "tre": 130,
133
+ "eds": 131,
134
+ "dst": 132,
135
+ "ie ": 133,
136
+ "ogs": 134,
137
+ "gså": 135,
138
+ "så ": 136,
139
+ "å k": 137,
140
+ "ine": 138,
141
+ "ne ": 139,
142
+ "e f": 140,
143
+ " fj": 141,
144
+ "fjo": 142,
145
+ "jor": 143,
146
+ "rde": 144,
147
+ "der": 145,
148
+ "g s": 146,
149
+ "n o": 147,
150
+ " ol": 148,
151
+ "olj": 149,
152
+ "lje": 150,
153
+ "jei": 151,
154
+ "ein": 152,
155
+ "ind": 153,
156
+ "ndu": 154,
157
+ "dus": 155,
158
+ "ust": 156,
159
+ "str": 157,
160
+ "tri": 158,
161
+ "sta": 159,
162
+ "tad": 160,
163
+ "ad ": 161,
164
+ "d o": 162,
165
+ "g e": 163,
166
+ "n v": 164,
167
+ " va": 165,
168
+ "vak": 166,
169
+ "akr": 167,
170
+ "kre": 168,
171
+ "re ": 169,
172
+ " na": 170,
173
+ "nat": 171,
174
+ "atu": 172,
175
+ " ku": 173,
176
+ "kul": 174,
177
+ "ult": 175,
178
+ "ltu": 176,
179
+ "g h": 177,
180
+ " hi": 178,
181
+ "his": 179,
182
+ "ist": 180,
183
+ "sto": 181,
184
+ "tor": 182,
185
+ "ori": 183,
186
+ "rie": 184,
187
+ " l": 185,
188
+ "il": 186,
189
+ "ll": 187,
190
+ "li": 188,
191
+ "io": 189,
192
+ "on": 190,
193
+ "me": 191,
194
+ "nn": 192,
195
+ "os": 193,
196
+ "sl": 194,
197
+ "lo": 195,
198
+ "o ": 196,
199
+ "ho": 197,
200
+ "ov": 198,
201
+ "ve": 199,
202
+ "ed": 200,
203
+ "ds": 201,
204
+ "ad": 202,
205
+ " v": 203,
206
+ "va": 204,
207
+ "ak": 205,
208
+ "kr": 206,
209
+ "na": 207,
210
+ "at": 208,
211
+ "ku": 209,
212
+ "ul": 210,
213
+ "lt": 211,
214
+ "hi": 212,
215
+ "la": 213,
216
+ "an": 214,
217
+ " i": 215,
218
+ "i ": 216,
219
+ "eu": 217,
220
+ "ro": 218,
221
+ "op": 219,
222
+ "pa": 220,
223
+ "a ": 221,
224
+ " d": 222,
225
+ "rs": 223,
226
+ "sp": 224,
227
+ "pr": 225,
228
+ "rå": 226,
229
+ "åk": 227,
230
+ " t": 228,
231
+ "al": 229,
232
+ "le": 230,
233
+ " a": 231,
234
+ "av": 232,
235
+ "v ": 233,
236
+ "om": 234,
237
+ "mt": 235,
238
+ "fe": 236,
239
+ "em": 237,
240
+ "m ": 238,
241
+ "mi": 239,
242
+ "is": 240,
243
+ "rop": 241,
244
+ "opa": 242,
245
+ "pa ": 243,
246
+ "a d": 244,
247
+ " de": 245,
248
+ "det": 246,
249
+ "t n": 247,
250
+ "ors": 248,
251
+ "rsk": 249,
252
+ "ke ": 250,
253
+ "e s": 251,
254
+ " sp": 252,
255
+ "spr": 253,
256
+ "prå": 254,
257
+ "råk": 255,
258
+ "åke": 256,
259
+ "ket": 257,
260
+ "t t": 258,
261
+ " ta": 259,
262
+ "tal": 260,
263
+ "ale": 261,
264
+ "les": 262,
265
+ "s a": 263,
266
+ " av": 264,
267
+ "av ": 265,
268
+ "v o": 266,
269
+ " om": 267,
270
+ "to": 268,
271
+ "ie": 269,
272
+ "gs": 270,
273
+ "så": 271,
274
+ "å ": 272,
275
+ "fj": 273,
276
+ "jo": 274,
277
+ "ol": 275,
278
+ "lj": 276,
279
+ "ei": 277,
280
+ "du": 278,
281
+ "us": 279,
282
+ "r e": 280,
283
+ " et": 281,
284
+ "t l": 282,
285
+ " la": 283,
286
+ "lan": 284,
287
+ "and": 285,
288
+ "nd ": 286,
289
+ "d i": 287,
290
+ " i ": 288,
291
+ "i n": 289,
292
+ "rd ": 290,
293
+ "d e": 291,
294
+ " eu": 292,
295
+ "eur": 293,
296
+ "uro": 294
297
+ }