langdetect-ruby 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +24 -13
  3. data/langdetect-ruby.gemspec +1 -1
  4. data/lib/lingua_ruby/configuration.rb +4 -1
  5. data/lib/lingua_ruby/detector.rb +59 -1
  6. data/lib/lingua_ruby/profile_loader.rb +26 -6
  7. data/lib/lingua_ruby/profiles/am.json +193 -0
  8. data/lib/lingua_ruby/profiles/bg.json +290 -0
  9. data/lib/lingua_ruby/profiles/bn.json +211 -0
  10. data/lib/lingua_ruby/profiles/cs.json +302 -0
  11. data/lib/lingua_ruby/profiles/da.json +302 -0
  12. data/lib/lingua_ruby/profiles/de.json +302 -0
  13. data/lib/lingua_ruby/profiles/el.json +302 -0
  14. data/lib/lingua_ruby/profiles/es.json +302 -0
  15. data/lib/lingua_ruby/profiles/et.json +289 -0
  16. data/lib/lingua_ruby/profiles/fa.json +234 -0
  17. data/lib/lingua_ruby/profiles/fi.json +284 -0
  18. data/lib/lingua_ruby/profiles/fr.json +302 -0
  19. data/lib/lingua_ruby/profiles/ha.json +302 -0
  20. data/lib/lingua_ruby/profiles/hi.json +255 -0
  21. data/lib/lingua_ruby/profiles/hr.json +302 -0
  22. data/lib/lingua_ruby/profiles/hu.json +302 -0
  23. data/lib/lingua_ruby/profiles/it.json +302 -0
  24. data/lib/lingua_ruby/profiles/lt.json +294 -0
  25. data/lib/lingua_ruby/profiles/lv.json +302 -0
  26. data/lib/lingua_ruby/profiles/my.json +200 -0
  27. data/lib/lingua_ruby/profiles/no.json +297 -0
  28. data/lib/lingua_ruby/profiles/pl.json +302 -0
  29. data/lib/lingua_ruby/profiles/pt.json +302 -0
  30. data/lib/lingua_ruby/profiles/ro.json +302 -0
  31. data/lib/lingua_ruby/profiles/ru.json +297 -0
  32. data/lib/lingua_ruby/profiles/sk.json +302 -0
  33. data/lib/lingua_ruby/profiles/sv.json +302 -0
  34. data/lib/lingua_ruby/profiles/sw.json +268 -0
  35. data/lib/lingua_ruby/profiles/ta.json +235 -0
  36. data/lib/lingua_ruby/profiles/te.json +254 -0
  37. data/lib/lingua_ruby/profiles/th.json +251 -0
  38. data/lib/lingua_ruby/profiles/tl.json +302 -0
  39. data/lib/lingua_ruby/profiles/tr.json +302 -0
  40. data/lib/lingua_ruby/profiles/uk.json +302 -0
  41. data/lib/lingua_ruby/profiles/ur.json +232 -0
  42. data/lib/lingua_ruby/profiles/vi.json +277 -0
  43. data/lib/lingua_ruby/profiles/yo.json +245 -0
  44. data/lib/lingua_ruby/profiles/zu.json +302 -0
  45. data/lib/lingua_ruby/result.rb +13 -26
  46. data/lib/lingua_ruby/version.rb +1 -1
  47. data/lib/lingua_ruby.rb +4 -0
  48. metadata +41 -2
@@ -0,0 +1,277 @@
1
+ {
2
+ "h": 0,
3
+ "n": 1,
4
+ "t": 2,
5
+ "a": 3,
6
+ "i": 4,
7
+ " n": 5,
8
+ "à": 6,
9
+ "c": 7,
10
+ "m": 8,
11
+ "v": 9,
12
+ "à ": 10,
13
+ " v": 11,
14
+ "l": 12,
15
+ "t ": 13,
16
+ "g": 14,
17
+ "m ": 15,
18
+ " l": 16,
19
+ " t": 17,
20
+ "iệ": 18,
21
+ "ệt": 19,
22
+ "vi": 20,
23
+ "na": 21,
24
+ "am": 22,
25
+ "ng": 23,
26
+ " c": 24,
27
+ "h ": 25,
28
+ "th": 26,
29
+ "việ": 27,
30
+ "iệt": 28,
31
+ "ệt ": 29,
32
+ " na": 30,
33
+ "nam": 31,
34
+ "am ": 32,
35
+ " th": 33,
36
+ "ệ": 34,
37
+ " h": 35,
38
+ " ph": 36,
39
+ "là ": 37,
40
+ "là": 38,
41
+ "t n": 39,
42
+ "p": 40,
43
+ " là": 41,
44
+ "nh": 42,
45
+ "a ": 43,
46
+ " p": 44,
47
+ "ph": 45,
48
+ " vi": 46,
49
+ "nh ": 47,
50
+ "ô": 48,
51
+ "ch": 49,
52
+ "ng ": 50,
53
+ "hà": 51,
54
+ "ố": 52,
55
+ "g ": 53,
56
+ "ủ": 54,
57
+ "n ": 55,
58
+ "hí": 56,
59
+ " s": 57,
60
+ "a v": 58,
61
+ "ủa ": 59,
62
+ "của": 60,
63
+ "đ": 61,
64
+ "hứ": 62,
65
+ "u": 63,
66
+ "củ": 64,
67
+ "ủa": 65,
68
+ "ộ": 66,
69
+ "i ": 67,
70
+ "à t": 68,
71
+ "và": 69,
72
+ "àn": 70,
73
+ "hố": 71,
74
+ "ố ": 72,
75
+ "thà": 73,
76
+ "à n": 74,
77
+ " ng": 75,
78
+ "hàn": 76,
79
+ "ó": 77,
80
+ "ành": 78,
81
+ "s": 79,
82
+ "h p": 80,
83
+ "ứ": 81,
84
+ " m": 82,
85
+ "phố": 83,
86
+ "hố ": 84,
87
+ " ch": 85,
88
+ "c ": 86,
89
+ " củ": 87,
90
+ "thứ": 88,
91
+ "í": 89,
92
+ "ôn": 90,
93
+ "đô": 91,
94
+ " đ": 92,
95
+ " và": 93,
96
+ "và ": 94,
97
+ " đô": 95,
98
+ "chí": 96,
99
+ "m v": 97,
100
+ "thủ": 98,
101
+ "à l": 99,
102
+ "hủ ": 100,
103
+ "ô c": 101,
104
+ "đô ": 102,
105
+ "ủ đ": 103,
106
+ "ến": 104,
107
+ "ngô": 105,
108
+ "gôn": 106,
109
+ "ôn ": 107,
110
+ "n n": 108,
111
+ "ngữ": 109,
112
+ "gữ ": 110,
113
+ "ữ c": 111,
114
+ "hín": 112,
115
+ "ính": 113,
116
+ "h t": 114,
117
+ "hức": 115,
118
+ "ức ": 116,
119
+ "c c": 117,
120
+ "m h": 118,
121
+ " hà": 119,
122
+ "hà ": 120,
123
+ " nộ": 121,
124
+ "nội": 122,
125
+ "ội ": 123,
126
+ "i l": 124,
127
+ "ố l": 125,
128
+ " có": 126,
129
+ "có ": 127,
130
+ "ó l": 128,
131
+ " lị": 129,
132
+ "lịc": 130,
133
+ "ịch": 131,
134
+ "ch ": 132,
135
+ "h s": 133,
136
+ " sử": 134,
137
+ "sử ": 135,
138
+ "ử v": 136,
139
+ "à v": 137,
140
+ " vă": 138,
141
+ "văn": 139,
142
+ "ăn ": 140,
143
+ "n h": 141,
144
+ " hó": 142,
145
+ "hóa": 143,
146
+ "óa ": 144,
147
+ "a p": 145,
148
+ "pho": 146,
149
+ "hon": 147,
150
+ "ong": 148,
151
+ "g p": 149,
152
+ "phú": 150,
153
+ " lớ": 151,
154
+ "lớn": 152,
155
+ "ớn ": 153,
156
+ "n t": 154,
157
+ "hứ ": 155,
158
+ "ứ h": 156,
159
+ " ha": 157,
160
+ "hai": 158,
161
+ "ai ": 159,
162
+ "i s": 160,
163
+ " sa": 161,
164
+ "sau": 162,
165
+ "au ": 163,
166
+ "u t": 164,
167
+ "ố h": 165,
168
+ " hồ": 166,
169
+ "hồ ": 167,
170
+ "ồ c": 168,
171
+ "hí ": 169,
172
+ "í m": 170,
173
+ " mi": 171,
174
+ "min": 172,
175
+ "inh": 173,
176
+ "h v": 174,
177
+ "m c": 175,
178
+ "ế": 176,
179
+ "gô": 177,
180
+ "gữ": 178,
181
+ "ữ ": 179,
182
+ "á": 180,
183
+ "ín": 181,
184
+ "ức": 182,
185
+ "nộ": 183,
186
+ "ội": 184,
187
+ "hủ": 185,
188
+ "ủ ": 186,
189
+ "ô ": 187,
190
+ "lớ": 188,
191
+ "ớn": 189,
192
+ "ứ ": 190,
193
+ "ha": 191,
194
+ "ai": 192,
195
+ "sa": 193,
196
+ "au": 194,
197
+ "u ": 195,
198
+ "hồ": 196,
199
+ "ồ ": 197,
200
+ "í ": 198,
201
+ "mi": 199,
202
+ "o": 200,
203
+ "ú": 201,
204
+ "ă": 202,
205
+ "ử": 203,
206
+ "ị": 204,
207
+ "ồ": 205,
208
+ "ớ": 206,
209
+ "mộ": 207,
210
+ "ột": 208,
211
+ " q": 209,
212
+ "qu": 210,
213
+ "uố": 211,
214
+ "ốc": 212,
215
+ " g": 213,
216
+ "gi": 214,
217
+ "ia": 215,
218
+ "nằ": 216,
219
+ "ằm": 217,
220
+ " ở": 218,
221
+ "ở ": 219,
222
+ " á": 220,
223
+ "á ": 221,
224
+ "ữ": 222,
225
+ "ti": 223,
226
+ "iế": 224,
227
+ "uốc": 225,
228
+ "ốc ": 226,
229
+ "c g": 227,
230
+ " gi": 228,
231
+ "gia": 229,
232
+ "ia ": 230,
233
+ "a n": 231,
234
+ " nằ": 232,
235
+ "nằm": 233,
236
+ "ằm ": 234,
237
+ "m ở": 235,
238
+ " ở ": 236,
239
+ "ở đ": 237,
240
+ "đôn": 238,
241
+ "ông": 239,
242
+ "g n": 240,
243
+ "m á": 241,
244
+ " á ": 242,
245
+ "á t": 243,
246
+ " ti": 244,
247
+ "tiế": 245,
248
+ "iến": 246,
249
+ "ếng": 247,
250
+ "g v": 248,
251
+ "t l": 249,
252
+ "in": 250,
253
+ "có": 251,
254
+ "ó ": 252,
255
+ "lị": 253,
256
+ "ịc": 254,
257
+ "sử": 255,
258
+ "ử ": 256,
259
+ "vă": 257,
260
+ "ăn": 258,
261
+ "hó": 259,
262
+ "óa": 260,
263
+ "ho": 261,
264
+ "on": 262,
265
+ "hú": 263,
266
+ "ở": 264,
267
+ "ằ": 265,
268
+ "q": 266,
269
+ "m l": 267,
270
+ "à m": 268,
271
+ " mộ": 269,
272
+ "một": 270,
273
+ "ột ": 271,
274
+ "t q": 272,
275
+ " qu": 273,
276
+ "quố": 274
277
+ }
@@ -0,0 +1,245 @@
1
+ {
2
+ "n": 0,
3
+ "í": 1,
4
+ "à": 2,
5
+ "ọ": 3,
6
+ "r": 4,
7
+ "è": 5,
8
+ "n ": 6,
9
+ "̀": 7,
10
+ "o": 8,
11
+ " è": 9,
12
+ " n": 10,
13
+ "rí": 11,
14
+ "ì": 12,
15
+ "ọ̀": 13,
16
+ "dè": 14,
17
+ "èd": 15,
18
+ "è ": 16,
19
+ "̀ ": 17,
20
+ "ír": 18,
21
+ "wọ": 19,
22
+ " èd": 20,
23
+ "j": 21,
24
+ "d": 22,
25
+ "w": 23,
26
+ "dè ": 24,
27
+ "ù": 25,
28
+ "èdè": 26,
29
+ "ẹ": 27,
30
+ "or": 28,
31
+ " o": 29,
32
+ "rù": 30,
33
+ "á": 31,
34
+ "írí": 32,
35
+ "í ": 33,
36
+ "l": 34,
37
+ "à ": 35,
38
+ "ọ̀ ": 36,
39
+ "̀ è": 37,
40
+ "ní ": 38,
41
+ " ní": 39,
42
+ "ní": 40,
43
+ "íà ": 41,
44
+ "ríà": 42,
45
+ " à": 43,
46
+ "nàì": 44,
47
+ "àìj": 45,
48
+ "ìjí": 46,
49
+ "jír": 47,
50
+ "nà": 48,
51
+ "àì": 49,
52
+ "ìj": 50,
53
+ "y": 51,
54
+ "jí": 52,
55
+ "íà": 53,
56
+ "a": 54,
57
+ "k": 55,
58
+ "s": 56,
59
+ "́": 57,
60
+ "an ": 58,
61
+ "kan": 59,
62
+ "í ì": 60,
63
+ "ò": 61,
64
+ " ìw": 62,
65
+ "ìwọ": 63,
66
+ "wọ̀": 64,
67
+ "b": 65,
68
+ "ẹ̀ ": 66,
69
+ "lẹ̀": 67,
70
+ "ílẹ": 68,
71
+ "ríl": 69,
72
+ "orí": 70,
73
+ " or": 71,
74
+ "ẹ́ ": 72,
75
+ "jẹ́": 73,
76
+ " jẹ": 74,
77
+ "t": 75,
78
+ "ń": 76,
79
+ "i": 77,
80
+ "p": 78,
81
+ "ú": 79,
82
+ "àn": 80,
83
+ "pọ̀": 81,
84
+ " nà": 82,
85
+ "ọ n": 83,
86
+ "sọ ": 84,
87
+ " sọ": 85,
88
+ "ń s": 86,
89
+ " ń ": 87,
90
+ "n ń": 88,
91
+ "ọn ": 89,
92
+ "wọn": 90,
93
+ "àwọ": 91,
94
+ " àw": 92,
95
+ " ọ̀": 93,
96
+ "bá ": 94,
97
+ "ùbá": 95,
98
+ "rùb": 96,
99
+ "orù": 97,
100
+ "yor": 98,
101
+ " yo": 99,
102
+ "è y": 100,
103
+ "à è": 101,
104
+ "ùn ": 102,
105
+ "rùn": 103,
106
+ "òrù": 104,
107
+ "oòr": 105,
108
+ " oò": 106,
109
+ "̀ o": 107,
110
+ "ẹ́": 108,
111
+ " ń": 109,
112
+ "ń ": 110,
113
+ " s": 111,
114
+ "sọ": 112,
115
+ "ọ ": 113,
116
+ "yo": 114,
117
+ "ka": 115,
118
+ "i ": 116,
119
+ "an": 117,
120
+ "ùb": 118,
121
+ "pọ": 119,
122
+ "bá": 120,
123
+ "á ": 121,
124
+ " y": 122,
125
+ "ẹ̀": 123,
126
+ " ọ": 124,
127
+ "́ ": 125,
128
+ "ọn": 126,
129
+ "íl": 127,
130
+ "ùn": 128,
131
+ "òr": 129,
132
+ "àw": 130,
133
+ "jẹ": 131,
134
+ "oò": 132,
135
+ " j": 133,
136
+ " ì": 134,
137
+ "ìw": 135,
138
+ "lẹ": 136,
139
+ "í n": 137,
140
+ " gú": 138,
141
+ " k": 139,
142
+ "́n ": 140,
143
+ "ọ́n": 141,
144
+ "wọ́": 142,
145
+ " wọ": 143,
146
+ "í w": 144,
147
+ "tí ": 145,
148
+ " tí": 146,
149
+ "è t": 147,
150
+ "n è": 148,
151
+ "a à": 149,
152
+ "ra ": 150,
153
+ "ára": 151,
154
+ "gúú": 152,
155
+ "úús": 153,
156
+ "úsù": 154,
157
+ "sù ": 155,
158
+ "ù n": 156,
159
+ "à à": 157,
160
+ " àt": 158,
161
+ "àti": 159,
162
+ "ti ": 160,
163
+ "i à": 161,
164
+ "n o": 162,
165
+ "è m": 163,
166
+ " mí": 164,
167
+ "mír": 165,
168
+ "írà": 166,
169
+ "ràn": 167,
170
+ "á n": 168,
171
+ " ni": 169,
172
+ "ni ": 170,
173
+ "i ọ": 171,
174
+ "ọ̀p": 172,
175
+ "̀pọ": 173,
176
+ "ọ̀l": 174,
177
+ "̀lọ": 175,
178
+ "lọp": 176,
179
+ "ọpọ": 177,
180
+ " èn": 178,
181
+ "ènì": 179,
182
+ "nìy": 180,
183
+ "ìyà": 181,
184
+ "yàn": 182,
185
+ "àn ": 183,
186
+ "n g": 184,
187
+ "è k": 185,
188
+ "tí": 186,
189
+ "f": 187,
190
+ " w": 188,
191
+ "ọ́": 189,
192
+ "́n": 190,
193
+ "́ o": 191,
194
+ "ni": 192,
195
+ "̀p": 193,
196
+ "à j": 194,
197
+ "̀l": 195,
198
+ "lọ": 196,
199
+ "ọp": 197,
200
+ "èn": 198,
201
+ "g": 199,
202
+ "nì": 200,
203
+ "rà": 201,
204
+ "mí": 202,
205
+ " m": 203,
206
+ "ti": 204,
207
+ "àt": 205,
208
+ "ù ": 206,
209
+ "sù": 207,
210
+ "ús": 208,
211
+ "úú": 209,
212
+ "gú": 210,
213
+ " g": 211,
214
+ "ìy": 212,
215
+ "yà": 213,
216
+ "lár": 214,
217
+ " lá": 215,
218
+ "n l": 216,
219
+ "̀ka": 217,
220
+ "ọ̀k": 218,
221
+ "́ ọ": 219,
222
+ "á j": 220,
223
+ " á": 221,
224
+ "áf": 222,
225
+ "fí": 223,
226
+ "ík": 224,
227
+ "kà": 225,
228
+ "kà ": 226,
229
+ "íkà": 227,
230
+ "rík": 228,
231
+ "fír": 229,
232
+ "áfí": 230,
233
+ " áf": 231,
234
+ "n á": 232,
235
+ "̀k": 233,
236
+ " l": 234,
237
+ "lá": 235,
238
+ "ár": 236,
239
+ "ra": 237,
240
+ "a ": 238,
241
+ "m": 239,
242
+ "n n": 240,
243
+ " t": 241,
244
+ " ka": 242
245
+ }