langdetect-ruby 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +24 -13
  3. data/langdetect-ruby.gemspec +1 -1
  4. data/lib/lingua_ruby/configuration.rb +4 -1
  5. data/lib/lingua_ruby/detector.rb +59 -1
  6. data/lib/lingua_ruby/profile_loader.rb +26 -6
  7. data/lib/lingua_ruby/profiles/am.json +193 -0
  8. data/lib/lingua_ruby/profiles/bg.json +290 -0
  9. data/lib/lingua_ruby/profiles/bn.json +211 -0
  10. data/lib/lingua_ruby/profiles/cs.json +302 -0
  11. data/lib/lingua_ruby/profiles/da.json +302 -0
  12. data/lib/lingua_ruby/profiles/de.json +302 -0
  13. data/lib/lingua_ruby/profiles/el.json +302 -0
  14. data/lib/lingua_ruby/profiles/es.json +302 -0
  15. data/lib/lingua_ruby/profiles/et.json +289 -0
  16. data/lib/lingua_ruby/profiles/fa.json +234 -0
  17. data/lib/lingua_ruby/profiles/fi.json +284 -0
  18. data/lib/lingua_ruby/profiles/fr.json +302 -0
  19. data/lib/lingua_ruby/profiles/ha.json +302 -0
  20. data/lib/lingua_ruby/profiles/hi.json +255 -0
  21. data/lib/lingua_ruby/profiles/hr.json +302 -0
  22. data/lib/lingua_ruby/profiles/hu.json +302 -0
  23. data/lib/lingua_ruby/profiles/it.json +302 -0
  24. data/lib/lingua_ruby/profiles/lt.json +294 -0
  25. data/lib/lingua_ruby/profiles/lv.json +302 -0
  26. data/lib/lingua_ruby/profiles/my.json +200 -0
  27. data/lib/lingua_ruby/profiles/no.json +297 -0
  28. data/lib/lingua_ruby/profiles/pl.json +302 -0
  29. data/lib/lingua_ruby/profiles/pt.json +302 -0
  30. data/lib/lingua_ruby/profiles/ro.json +302 -0
  31. data/lib/lingua_ruby/profiles/ru.json +297 -0
  32. data/lib/lingua_ruby/profiles/sk.json +302 -0
  33. data/lib/lingua_ruby/profiles/sv.json +302 -0
  34. data/lib/lingua_ruby/profiles/sw.json +268 -0
  35. data/lib/lingua_ruby/profiles/ta.json +235 -0
  36. data/lib/lingua_ruby/profiles/te.json +254 -0
  37. data/lib/lingua_ruby/profiles/th.json +251 -0
  38. data/lib/lingua_ruby/profiles/tl.json +302 -0
  39. data/lib/lingua_ruby/profiles/tr.json +302 -0
  40. data/lib/lingua_ruby/profiles/uk.json +302 -0
  41. data/lib/lingua_ruby/profiles/ur.json +232 -0
  42. data/lib/lingua_ruby/profiles/vi.json +277 -0
  43. data/lib/lingua_ruby/profiles/yo.json +245 -0
  44. data/lib/lingua_ruby/profiles/zu.json +302 -0
  45. data/lib/lingua_ruby/result.rb +13 -26
  46. data/lib/lingua_ruby/version.rb +1 -1
  47. data/lib/lingua_ruby.rb +4 -0
  48. metadata +41 -2
@@ -0,0 +1,254 @@
1
+ {
2
+ "ా": 0,
3
+ "ర": 1,
4
+ "్": 2,
5
+ "ు": 3,
6
+ "ల": 4,
7
+ "ద": 5,
8
+ "ం": 6,
9
+ "త": 7,
10
+ "ు ": 8,
11
+ "ి": 9,
12
+ "న": 10,
13
+ "రా": 11,
14
+ "్ర": 12,
15
+ "మ": 13,
16
+ "గ": 14,
17
+ " త": 15,
18
+ "ేశ": 16,
19
+ "దే": 17,
20
+ "ియ": 18,
21
+ " మ": 19,
22
+ "ే": 20,
23
+ "శ": 21,
24
+ "దేశ": 22,
25
+ "య": 23,
26
+ " తె": 24,
27
+ "ెల": 25,
28
+ "తె": 26,
29
+ "తెల": 27,
30
+ "ె": 28,
31
+ "ణ": 29,
32
+ "ష": 30,
33
+ "క": 31,
34
+ "ప్": 32,
35
+ "్ ": 33,
36
+ "మర": 34,
37
+ "రి": 35,
38
+ "యు": 36,
39
+ "లం": 37,
40
+ "ంగ": 38,
41
+ "గా": 39,
42
+ "ాష": 40,
43
+ "ాణ": 41,
44
+ "ణ ": 42,
45
+ " ర": 43,
46
+ " ఒక": 44,
47
+ "ఒక ": 45,
48
+ "లో ": 46,
49
+ "శం ": 47,
50
+ "ేశం": 48,
51
+ "ప్ర": 49,
52
+ " మర": 50,
53
+ "మరి": 51,
54
+ "రియ": 52,
55
+ "ియు": 53,
56
+ "యు ": 54,
57
+ "ెలం": 55,
58
+ "లంగ": 56,
59
+ "ంగా": 57,
60
+ "గాణ": 58,
61
+ "ాణ ": 59,
62
+ "ణ ర": 60,
63
+ " రా": 61,
64
+ "్ల": 62,
65
+ "ట్": 63,
66
+ "భ": 64,
67
+ "ో": 65,
68
+ "ఒ": 66,
69
+ "ధ": 67,
70
+ "ప": 68,
71
+ "ట": 69,
72
+ "భా": 70,
73
+ "ార": 71,
74
+ "శం": 72,
75
+ "ం ": 73,
76
+ " ద": 74,
77
+ "ాల": 75,
78
+ "లో": 76,
79
+ "ో ": 77,
80
+ "క ": 78,
81
+ "ఒక": 79,
82
+ " ఒ": 80,
83
+ "గరం": 81,
84
+ "హైద": 82,
85
+ "ఆంధ": 83,
86
+ "ంధ్": 84,
87
+ "ధ్ర": 85,
88
+ "్రప": 86,
89
+ "రప్": 87,
90
+ " హై": 88,
91
+ "్రద": 89,
92
+ "రదే": 90,
93
+ "ేశ్": 91,
94
+ "శ్ ": 92,
95
+ "్ మ": 93,
96
+ "ఖ న": 94,
97
+ "ుఖ ": 95,
98
+ "ముఖ": 96,
99
+ "రము": 97,
100
+ "్రమ": 98,
101
+ "ు త": 99,
102
+ "క ద": 100,
103
+ " దే": 101,
104
+ "ం త": 102,
105
+ "నగర": 103,
106
+ "ెలు": 104,
107
+ "లుగ": 105,
108
+ "ుగు": 106,
109
+ "గు ": 107,
110
+ "ు భ": 108,
111
+ " భా": 109,
112
+ "భాష": 110,
113
+ "ాషన": 111,
114
+ "షను": 112,
115
+ "ను ": 113,
116
+ "ు ఆ": 114,
117
+ " ఆం": 115,
118
+ " నగ": 116,
119
+ "ైదర": 117,
120
+ " మా": 118,
121
+ "మాట": 119,
122
+ "ని ": 120,
123
+ "ాని": 121,
124
+ "ధాన": 122,
125
+ "జధా": 123,
126
+ "ాజధ": 124,
127
+ "రాజ": 125,
128
+ "్ త": 126,
129
+ "ద్ ": 127,
130
+ "తార": 128,
131
+ "డతా": 129,
132
+ "ాడత": 130,
133
+ "ాద్": 131,
134
+ "ాట్": 132,
135
+ "ట్ల": 133,
136
+ "బాద": 134,
137
+ "్లా": 135,
138
+ "ాబా": 136,
139
+ "లాడ": 137,
140
+ " ప్": 138,
141
+ "దరా": 139,
142
+ "ు హ": 140,
143
+ "క ప": 141,
144
+ "ు ఒ": 142,
145
+ "రాబ": 143,
146
+ "రు ": 144,
147
+ "రాష": 145,
148
+ "ాష్": 146,
149
+ "ష్ట": 147,
150
+ "్ట్": 148,
151
+ "ట్ర": 149,
152
+ "్రా": 150,
153
+ "రాల": 151,
154
+ "ాల్": 152,
155
+ "ల్ల": 153,
156
+ "్లో": 154,
157
+ "ారు": 155,
158
+ "ో మ": 156,
159
+ "ి మ": 157,
160
+ "స": 158,
161
+ "న ": 159,
162
+ "లు": 160,
163
+ "ుగ": 161,
164
+ "గు": 162,
165
+ " భ": 163,
166
+ "షన": 164,
167
+ "ను": 165,
168
+ " ఆ": 166,
169
+ "ఆం": 167,
170
+ "ంధ": 168,
171
+ "ధ్": 169,
172
+ "రప": 170,
173
+ "రద": 171,
174
+ "శ్": 172,
175
+ "ష్": 173,
176
+ "్ట": 174,
177
+ "ల్": 175,
178
+ "మా": 176,
179
+ "ాట": 177,
180
+ "లా": 178,
181
+ "ాడ": 179,
182
+ "డత": 180,
183
+ "తా": 181,
184
+ "ఉ": 182,
185
+ "ఆ": 183,
186
+ "డ": 184,
187
+ "హ": 185,
188
+ "ై": 186,
189
+ "బ": 187,
190
+ "జ": 188,
191
+ "ఖ": 189,
192
+ "రత": 190,
193
+ "తద": 191,
194
+ "దక": 192,
195
+ "క్": 193,
196
+ "్ష": 194,
197
+ "షి": 195,
198
+ "ిణ": 196,
199
+ "ణా": 197,
200
+ "ాస": 198,
201
+ "సి": 199,
202
+ "యా": 200,
203
+ " ఉ": 201,
204
+ "ఉన": 202,
205
+ "న్": 203,
206
+ "్న": 204,
207
+ "రు": 205,
208
+ "భార": 206,
209
+ "ారత": 207,
210
+ "రతద": 208,
211
+ "తదే": 209,
212
+ "ం ద": 210,
213
+ " దక": 211,
214
+ "దక్": 212,
215
+ "క్ష": 213,
216
+ "్షి": 214,
217
+ "షిణ": 215,
218
+ "ిణా": 216,
219
+ "ణాస": 217,
220
+ "ాసి": 218,
221
+ "సియ": 219,
222
+ "ియా": 220,
223
+ "యాల": 221,
224
+ "ాలో": 222,
225
+ "ో ఉ": 223,
226
+ " ఉన": 224,
227
+ "ఉన్": 225,
228
+ "న్న": 226,
229
+ "్న ": 227,
230
+ "న ఒ": 228,
231
+ " హ": 229,
232
+ "హై": 230,
233
+ "ైద": 231,
234
+ "దర": 232,
235
+ "ాబ": 233,
236
+ "బా": 234,
237
+ "ాద": 235,
238
+ "ద్": 236,
239
+ "ాజ": 237,
240
+ "జధ": 238,
241
+ "ధా": 239,
242
+ "ాన": 240,
243
+ "ని": 241,
244
+ "ి ": 242,
245
+ " ప": 243,
246
+ "రమ": 244,
247
+ "ము": 245,
248
+ "ుఖ": 246,
249
+ "ఖ ": 247,
250
+ " న": 248,
251
+ "నగ": 249,
252
+ "గర": 250,
253
+ "రం": 251
254
+ }
@@ -0,0 +1,251 @@
1
+ {
2
+ "เ": 0,
3
+ "ท": 1,
4
+ "ป": 2,
5
+ "ร": 3,
6
+ "อ": 4,
7
+ "ง": 5,
8
+ "น": 6,
9
+ "ย": 7,
10
+ "า": 8,
11
+ "ะ": 9,
12
+ "ะเ": 10,
13
+ "เท": 11,
14
+ "ระเ": 12,
15
+ "ะเท": 13,
16
+ "เทศ": 14,
17
+ "ปร": 15,
18
+ "ประ": 16,
19
+ "ระ": 17,
20
+ "ทศ": 18,
21
+ "ศ": 19,
22
+ "่": 20,
23
+ "ี": 21,
24
+ "ไท": 22,
25
+ "ทย": 23,
26
+ "เป": 24,
27
+ "ป็": 25,
28
+ "็น": 26,
29
+ "ไทย": 27,
30
+ "ป็น": 28,
31
+ "เป็": 29,
32
+ "ไ": 30,
33
+ "็": 31,
34
+ "ใ": 32,
35
+ "อง": 33,
36
+ "ห": 34,
37
+ "ทศไ": 35,
38
+ "ม": 36,
39
+ "นเ": 37,
40
+ "ก": 38,
41
+ "ที่": 39,
42
+ "ศไ": 40,
43
+ "ี่": 41,
44
+ "ศไท": 42,
45
+ "ที": 43,
46
+ "ต": 44,
47
+ "มือ": 45,
48
+ "่ใ": 46,
49
+ "ใน": 47,
50
+ "ยเป": 48,
51
+ "ทยเ": 49,
52
+ "าร": 50,
53
+ "ษา": 51,
54
+ "าษ": 52,
55
+ "ือง": 53,
56
+ "ภา": 54,
57
+ "ภาษ": 55,
58
+ "ขอ": 56,
59
+ "็นเ": 57,
60
+ "ีย": 58,
61
+ "นเม": 59,
62
+ "เมื": 60,
63
+ "ช": 61,
64
+ "ว": 62,
65
+ "ภ": 63,
66
+ "ษ": 64,
67
+ "ข": 65,
68
+ "ุ": 66,
69
+ "้": 67,
70
+ "เม": 68,
71
+ "ื": 69,
72
+ "ล": 70,
73
+ "มื": 71,
74
+ "ือ": 72,
75
+ "ั": 73,
76
+ "ยเ": 74,
77
+ "นป": 75,
78
+ "ของ": 76,
79
+ "องป": 77,
80
+ "งปร": 78,
81
+ "าษา": 79,
82
+ "นปร": 80,
83
+ "งป": 81,
84
+ "ู": 82,
85
+ "ษาร": 83,
86
+ "ใต้": 84,
87
+ "ต้ ": 85,
88
+ "้ ภ": 86,
89
+ " ภา": 87,
90
+ "นภา": 88,
91
+ "็นภ": 89,
92
+ "าไท": 90,
93
+ "ษาไ": 91,
94
+ "ในเ": 92,
95
+ "นเอ": 93,
96
+ "เอเ": 94,
97
+ "อเช": 95,
98
+ "เชี": 96,
99
+ "ชีย": 97,
100
+ "ียต": 98,
101
+ "ยตะ": 99,
102
+ "ตะว": 100,
103
+ "ะวั": 101,
104
+ "วัน": 102,
105
+ "ันอ": 103,
106
+ "นออ": 104,
107
+ "ออก": 105,
108
+ "อกเ": 106,
109
+ "กเฉ": 107,
110
+ "เฉี": 108,
111
+ "ฉีย": 109,
112
+ "ียง": 110,
113
+ "ยงใ": 111,
114
+ "งใต": 112,
115
+ "งหล": 113,
116
+ "หลว": 114,
117
+ "ลวง": 115,
118
+ "วงข": 116,
119
+ "งขอ": 117,
120
+ "ทยแ": 118,
121
+ "ยแล": 119,
122
+ "และ": 120,
123
+ "ละเ": 121,
124
+ "ะเป": 122,
125
+ "องท": 123,
126
+ "งที": 124,
127
+ "ี่ใ": 125,
128
+ "่ให": 126,
129
+ "ใหญ": 127,
130
+ "หญ่": 128,
131
+ "ญ่ท": 129,
132
+ "่ที": 130,
133
+ "ี่ส": 131,
134
+ "่สุ": 132,
135
+ "สุด": 133,
136
+ "ุดใ": 134,
137
+ "ดใน": 135,
138
+ "ในป": 136,
139
+ "ารา": 137,
140
+ "ราช": 138,
141
+ "าชก": 139,
142
+ "ชกา": 140,
143
+ "การ": 141,
144
+ "ารข": 142,
145
+ "รขอ": 143,
146
+ "ทย ": 144,
147
+ "ย ก": 145,
148
+ " กร": 146,
149
+ "กรุ": 147,
150
+ "รุง": 148,
151
+ "ุงเ": 149,
152
+ "งเท": 150,
153
+ "เทพ": 151,
154
+ "ทพม": 152,
155
+ "พมห": 153,
156
+ "มหา": 154,
157
+ "หาน": 155,
158
+ "านค": 156,
159
+ "นคร": 157,
160
+ "ครเ": 158,
161
+ "รเป": 159,
162
+ "องห": 160,
163
+ "ะว": 161,
164
+ "วั": 162,
165
+ "ัน": 163,
166
+ "นอ": 164,
167
+ "ออ": 165,
168
+ "อก": 166,
169
+ "กเ": 167,
170
+ "เฉ": 168,
171
+ "ฉี": 169,
172
+ "ยง": 170,
173
+ "งใ": 171,
174
+ "ใต": 172,
175
+ "ต้": 173,
176
+ "้ ": 174,
177
+ " ภ": 175,
178
+ "าไ": 176,
179
+ "นภ": 177,
180
+ "รา": 178,
181
+ "าช": 179,
182
+ "ชก": 180,
183
+ "กา": 181,
184
+ "รข": 182,
185
+ "ฉ": 183,
186
+ "พ": 184,
187
+ "ค": 185,
188
+ "แ": 186,
189
+ "ญ": 187,
190
+ "ส": 188,
191
+ "ด": 189,
192
+ "ศท": 190,
193
+ "่ต": 191,
194
+ "ตั": 192,
195
+ "ั้": 193,
196
+ "้ง": 194,
197
+ "งอ": 195,
198
+ "อย": 196,
199
+ "ยู": 197,
200
+ "ู่": 198,
201
+ "เอ": 199,
202
+ "อเ": 200,
203
+ "เช": 201,
204
+ "ชี": 202,
205
+ "ยต": 203,
206
+ "ตะ": 204,
207
+ "งท": 205,
208
+ "ให": 206,
209
+ "หญ": 207,
210
+ "ญ่": 208,
211
+ "่ท": 209,
212
+ "่ส": 210,
213
+ "สุ": 211,
214
+ "ุด": 212,
215
+ "ดใ": 213,
216
+ "็นป": 214,
217
+ "ทศท": 215,
218
+ "ศที": 216,
219
+ "ี่ต": 217,
220
+ "่ตั": 218,
221
+ "ตั้": 219,
222
+ "ั้ง": 220,
223
+ "้งอ": 221,
224
+ "งอย": 222,
225
+ "อยู": 223,
226
+ "ยู่": 224,
227
+ "ู่ใ": 225,
228
+ "่ใน": 226,
229
+ "ย ": 227,
230
+ " ก": 228,
231
+ "กร": 229,
232
+ "รุ": 230,
233
+ "ุง": 231,
234
+ "งเ": 232,
235
+ "ทพ": 233,
236
+ "พม": 234,
237
+ "มห": 235,
238
+ "หา": 236,
239
+ "าน": 237,
240
+ "นค": 238,
241
+ "คร": 239,
242
+ "รเ": 240,
243
+ "งห": 241,
244
+ "หล": 242,
245
+ "ลว": 243,
246
+ "วง": 244,
247
+ "งข": 245,
248
+ "ยแ": 246,
249
+ "แล": 247,
250
+ "ละ": 248
251
+ }