mini_embed 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: b44c5e93e9fc010a7c97e41f4fe4eaef3a5ee1033d1f8348c1d7a3f8d01d7d39
4
+ data.tar.gz: 61ca897bcf84b44822a15bc2ba9e37299567a797b26fd61ba987a4b7645c45b4
5
+ SHA512:
6
+ metadata.gz: ab91e23951d0ef745d37a41467f778de5dcc7bedf21445292d31a1b6286add8cd851ebed6520756689d56226395bac3ce20a4f4bac0066cff8fc8488f26dcff6
7
+ data.tar.gz: 75c2885a6a63dfbc12db9bd80848f4d9526b7e37a23697ae227d1099a29ff5f6a3ea264378e5b66ce8a0b56935ddaa27de513594dfb530d5702652a09c391c27
data/README.md ADDED
@@ -0,0 +1,74 @@
1
+ # mini_embed
2
+
3
+ Fast, minimal GGUF embedding extractor for Ruby.
4
+
5
+ ## Installation
6
+
7
+ Add to your Gemfile:
8
+
9
+ ```ruby
10
+ gem 'mini_embed'
11
+ ```
12
+ Or install globally:
13
+
14
+ ```sh
15
+ gem install mini_embed
16
+ ```
17
+
18
+ Usage
19
+ ```ruby
20
+ require 'mini_embed'
21
+
22
+ model = MiniEmbed.new(model: 'path/to/model.gguf')
23
+ embeddings_bin = model.embeddings(text: "hello world") # => binary ouput
24
+ embeddings_array = embeddings_bin.unpack('f*') # => array of float
25
+ puts embeddings_array.size # => model dimension
26
+ ```
27
+
28
+ Supported Quantizations
29
+
30
+ ```
31
+ F32, F16
32
+
33
+ Q4_0, Q4_1
34
+
35
+ Q5_0, Q5_1
36
+
37
+ Q8_0, Q8_1
38
+
39
+ Q2_K, Q3_K, Q4_K, Q5_K, Q6_K, Q8_K
40
+ ```
41
+
42
+ ## Building the Gem
43
+
44
+ From the `mini_embed/` directory:
45
+
46
+ ```bash
47
+ bundle install
48
+ bundle exec rake compile
49
+ ```
50
+
51
+
52
+ To build the gem file:
53
+
54
+ ```bash
55
+ gem build mini_embed.gemspec
56
+ ```
57
+
58
+ To install locally:
59
+
60
+ ```bash
61
+ gem install ./mini_embed-0.1.0.gem
62
+ ```
63
+ Using in a Rails project
64
+ Add to Gemfile:
65
+
66
+ ```ruby
67
+ gem 'mini_embed', path: '/path/to/mini_embed'
68
+ ```
69
+
70
+ Then `bundle install` and use as above.
71
+
72
+ ## License
73
+
74
+ MIT License. See [LICENSE](LICENSE).
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'mkmf'
4
+
5
+ # Ensure we have necessary headers and functions
6
+ have_header('sys/mman.h')
7
+ have_header('stdint.h')
8
+ have_func('mmap')
9
+ have_func('munmap')
10
+
11
+ # Create the Makefile
12
+ create_makefile('mini_embed/mini_embed')
@@ -0,0 +1,704 @@
1
+ #include <stdio.h>
2
+ #include <stdlib.h>
3
+ #include <stdint.h>
4
+ #include <string.h>
5
+ #include <math.h>
6
+ #include <sys/mman.h>
7
+ #include <sys/stat.h>
8
+ #include <fcntl.h>
9
+ #include <unistd.h>
10
+ #include <ctype.h>
11
+ #include "ruby.h"
12
+
13
+ #define HASH_SIZE 131071
14
+ #define MAX_DIMS 4
15
+ #define GGUF_ALIGN 32
16
+
17
+ enum ggml_type {
18
+ GGML_TYPE_F32 = 0,
19
+ GGML_TYPE_F16 = 1,
20
+ GGML_TYPE_Q4_0 = 2,
21
+ GGML_TYPE_Q4_1 = 3,
22
+ GGML_TYPE_Q5_0 = 6,
23
+ GGML_TYPE_Q5_1 = 7,
24
+ GGML_TYPE_Q8_0 = 8,
25
+ GGML_TYPE_Q8_1 = 9,
26
+ GGML_TYPE_Q2_K = 10,
27
+ GGML_TYPE_Q3_K = 11,
28
+ GGML_TYPE_Q4_K = 12,
29
+ GGML_TYPE_Q5_K = 13,
30
+ GGML_TYPE_Q6_K = 14,
31
+ GGML_TYPE_Q8_K = 15,
32
+ };
33
+
34
+ /* ------------------------------------------------------------------------- */
35
+ static int safe_advance(uint8_t **p, uint8_t *end, size_t sz) {
36
+ if (*p + sz > end) return 0;
37
+ *p += sz;
38
+ return 1;
39
+ }
40
+
41
+ static uint32_t rd32(uint8_t **p, uint8_t *end) {
42
+ uint32_t v = 0;
43
+ if (!safe_advance(p, end, 4)) return 0;
44
+ memcpy(&v, *p - 4, 4);
45
+ return v;
46
+ }
47
+
48
+ static uint64_t rd64(uint8_t **p, uint8_t *end) {
49
+ uint64_t v = 0;
50
+ if (!safe_advance(p, end, 8)) return 0;
51
+ memcpy(&v, *p - 8, 8);
52
+ return v;
53
+ }
54
+
55
+ static char *rdstr(uint8_t **p, uint8_t *end) {
56
+ if (*p + 8 > end) return NULL;
57
+ uint64_t len;
58
+ memcpy(&len, *p, 8);
59
+ *p += 8;
60
+ if (len == 0 || len > (1 << 20)) return NULL;
61
+ if (*p + len > end) return NULL;
62
+ char *s = malloc(len + 1);
63
+ if (!s) return NULL;
64
+ memcpy(s, *p, len);
65
+ s[len] = '\0';
66
+ *p += len;
67
+ return s;
68
+ }
69
+
70
+ static void align_to_32(uint8_t **p, uint8_t *end, uint8_t *base) {
71
+ size_t off = *p - base;
72
+ size_t aligned = (off + GGUF_ALIGN - 1) & ~(GGUF_ALIGN - 1);
73
+ if (base + aligned <= end)
74
+ *p = base + aligned;
75
+ }
76
+
77
+ /* ------------------------------------------------------------------------- */
78
+ typedef struct HashNode {
79
+ char *key;
80
+ int id;
81
+ struct HashNode *next;
82
+ } HashNode;
83
+
84
+ typedef struct {
85
+ int vocab_size;
86
+ int dim;
87
+ char **tokens;
88
+ float *float_data;
89
+ void *tensor_data;
90
+ int tensor_type;
91
+ void *mapped;
92
+ size_t mapped_size;
93
+ HashNode **table;
94
+ } EmbedModel;
95
+
96
+ typedef struct {
97
+ EmbedModel *model;
98
+ } ruby_embedder;
99
+
100
+ static unsigned long hash(const char *s) {
101
+ unsigned long h = 5381;
102
+ int c;
103
+ while ((c = *s++)) h = ((h << 5) + h) + c;
104
+ return h % HASH_SIZE;
105
+ }
106
+
107
+ static void hset(EmbedModel *m, char *k, int id) {
108
+ unsigned long h = hash(k);
109
+ HashNode *n = malloc(sizeof(*n));
110
+ n->key = k;
111
+ n->id = id;
112
+ n->next = m->table[h];
113
+ m->table[h] = n;
114
+ }
115
+
116
+ static int hget(EmbedModel *m, const char *k) {
117
+ HashNode *n = m->table[hash(k)];
118
+ while (n) {
119
+ if (strcmp(n->key, k) == 0) return n->id;
120
+ n = n->next;
121
+ }
122
+ return -1;
123
+ }
124
+
125
+ /* ------------------------------------------------------------------------- */
126
+ static void *map_file(const char *path, size_t *size) {
127
+ int fd = open(path, O_RDONLY);
128
+ if (fd < 0) return NULL;
129
+ struct stat st;
130
+ if (fstat(fd, &st) != 0) { close(fd); return NULL; }
131
+ *size = st.st_size;
132
+ void *data = mmap(NULL, *size, PROT_READ, MAP_PRIVATE, fd, 0);
133
+ close(fd);
134
+ if (data == MAP_FAILED) return NULL;
135
+ return data;
136
+ }
137
+
138
+ /* ------------------------------------------------------------------------- */
139
+ static float fp16_to_fp32(uint16_t h) {
140
+ const uint16_t sign = (h >> 15) & 1;
141
+ const uint16_t exp = (h >> 10) & 0x1F;
142
+ const uint16_t mant = h & 0x3FF;
143
+ float val;
144
+ if (exp == 0) {
145
+ val = (mant / 1024.0f) * 6.103515625e-5f;
146
+ } else if (exp == 31) {
147
+ return 0.0f;
148
+ } else {
149
+ val = (1.0f + mant / 1024.0f) * (1 << (exp - 15));
150
+ }
151
+ return sign ? -val : val;
152
+ }
153
+
154
+ /* ------------------------------------------------------------------------- */
155
+ /* Block dequantization */
156
+
157
+ static void dequantize_row_q4_0(const void *vx, float *y, int k) {
158
+ const int nb = k / 32;
159
+ const uint8_t *x = vx;
160
+ for (int i = 0; i < nb; i++) {
161
+ const float d = ((const float*)(x + i*34))[0];
162
+ const uint8_t *q = x + i*34 + 4;
163
+ for (int j = 0; j < 32; j++) {
164
+ const int v = (q[j/2] >> (4*(j%2))) & 0x0F;
165
+ y[i*32 + j] = (v - 8.0f) * d;
166
+ }
167
+ }
168
+ }
169
+
170
+ static void dequantize_row_q4_1(const void *vx, float *y, int k) {
171
+ const int nb = k / 32;
172
+ const uint8_t *x = vx;
173
+ for (int i = 0; i < nb; i++) {
174
+ const float d = ((const float*)(x + i*36))[0];
175
+ const float m = ((const float*)(x + i*36))[1];
176
+ const uint8_t *q = x + i*36 + 8;
177
+ for (int j = 0; j < 32; j++) {
178
+ const int v = (q[j/2] >> (4*(j%2))) & 0x0F;
179
+ y[i*32 + j] = v * d + m;
180
+ }
181
+ }
182
+ }
183
+
184
+ static void dequantize_row_q5_0(const void *vx, float *y, int k) {
185
+ const int nb = k / 32;
186
+ const uint8_t *x = vx;
187
+ for (int i = 0; i < nb; i++) {
188
+ const float d = ((const float*)(x + i*40))[0];
189
+ const uint8_t *qh = x + i*40 + 4;
190
+ const uint8_t *ql = x + i*40 + 8;
191
+ uint32_t qh32;
192
+ memcpy(&qh32, qh, 4);
193
+ for (int j = 0; j < 32; j++) {
194
+ const uint8_t vh = (qh32 >> j) & 1;
195
+ const int v = ((ql[j/2] >> (4*(j%2))) & 0x0F) | (vh << 4);
196
+ y[i*32 + j] = (v - 16.0f) * d;
197
+ }
198
+ }
199
+ }
200
+
201
+ static void dequantize_row_q5_1(const void *vx, float *y, int k) {
202
+ const int nb = k / 32;
203
+ const uint8_t *x = vx;
204
+ for (int i = 0; i < nb; i++) {
205
+ const float d = ((const float*)(x + i*44))[0];
206
+ const float m = ((const float*)(x + i*44))[1];
207
+ const uint8_t *qh = x + i*44 + 8;
208
+ const uint8_t *ql = x + i*44 + 12;
209
+ uint32_t qh32;
210
+ memcpy(&qh32, qh, 4);
211
+ for (int j = 0; j < 32; j++) {
212
+ const uint8_t vh = (qh32 >> j) & 1;
213
+ const int v = ((ql[j/2] >> (4*(j%2))) & 0x0F) | (vh << 4);
214
+ y[i*32 + j] = v * d + m;
215
+ }
216
+ }
217
+ }
218
+
219
+ static void dequantize_row_q8_0(const void *vx, float *y, int k) {
220
+ const int nb = k / 32;
221
+ const uint8_t *x = vx;
222
+ for (int i = 0; i < nb; i++) {
223
+ const float d = ((const float*)(x + i*36))[0];
224
+ const int8_t *q = (const int8_t*)(x + i*36 + 4);
225
+ for (int j = 0; j < 32; j++) {
226
+ y[i*32 + j] = (float)q[j] * d;
227
+ }
228
+ }
229
+ }
230
+
231
+ static void dequantize_row_q8_1(const void *vx, float *y, int k) {
232
+ const int nb = k / 32;
233
+ const uint8_t *x = vx;
234
+ for (int i = 0; i < nb; i++) {
235
+ const float d = ((const float*)(x + i*40))[0];
236
+ const float s = ((const float*)(x + i*40))[1];
237
+ const int8_t *q = (const int8_t*)(x + i*40 + 8);
238
+ for (int j = 0; j < 32; j++) {
239
+ y[i*32 + j] = (float)q[j] * d + s;
240
+ }
241
+ }
242
+ }
243
+
244
+ /* K-quants */
245
+ static void dequantize_row_q2_K(const void *vx, float *y, int k) {
246
+ const int nb = k / 256;
247
+ const uint8_t *x = vx;
248
+ for (int i = 0; i < nb; i++) {
249
+ const float d = ((const float*)(x + i*336))[0];
250
+ const float m = ((const float*)(x + i*336))[1];
251
+ const uint8_t *q = x + i*336 + 8;
252
+ const uint8_t *scales = q + 64;
253
+ for (int j = 0; j < 256; j += 32) {
254
+ const uint8_t ls = scales[j/32] & 0xF;
255
+ const uint8_t ms = scales[j/32] >> 4;
256
+ for (int l = 0; l < 32; l++) {
257
+ const int v = (q[(j+l)/4] >> (2*((j+l)%4))) & 0x03;
258
+ const float dl = d * (ls - 32);
259
+ const float ml = m * (ms - 32);
260
+ y[i*256 + j + l] = v * dl + ml;
261
+ }
262
+ }
263
+ }
264
+ }
265
+
266
+ static void dequantize_row_q3_K(const void *vx, float *y, int k) {
267
+ const int nb = k / 256;
268
+ const uint8_t *x = vx;
269
+ for (int i = 0; i < nb; i++) {
270
+ const float d = ((const float*)(x + i*352))[0];
271
+ const uint8_t *q = x + i*352 + 4;
272
+ const uint8_t *scales = q + 256;
273
+ const uint8_t *h = scales + 32;
274
+ for (int j = 0; j < 256; j += 64) {
275
+ const uint8_t ls1 = scales[j/64] & 0x1F;
276
+ const uint8_t ls2 = (scales[j/64] >> 4) | ((scales[j/64+1] & 0x0F) << 4);
277
+ const uint8_t ms = scales[j/64+1] >> 4;
278
+ for (int l = 0; l < 64; l++) {
279
+ int v = (q[(j+l)/2] >> (4*((j+l)%2))) & 0x0F;
280
+ const int bit = (h[(j+l)/8] >> ((j+l)%8)) & 1;
281
+ v |= bit << 4;
282
+ const float dl = d * (ls1 - 32);
283
+ const float ml = (l < 32) ? (ls2 - 32) * d : (ms - 32) * d;
284
+ y[i*256 + j + l] = v * dl + ml;
285
+ }
286
+ }
287
+ }
288
+ }
289
+
290
+ static void dequantize_row_q4_K(const void *vx, float *y, int k) {
291
+ const int nb = k / 256;
292
+ const uint8_t *x = vx;
293
+ for (int i = 0; i < nb; i++) {
294
+ const float d = ((const float*)(x + i*416))[0];
295
+ const float m = ((const float*)(x + i*416))[1];
296
+ const uint8_t *q = x + i*416 + 8;
297
+ const uint8_t *scales = q + 128;
298
+ for (int j = 0; j < 256; j += 32) {
299
+ const uint8_t ls = scales[j/32] & 0x3F;
300
+ const uint8_t ms = scales[j/32] >> 6;
301
+ for (int l = 0; l < 32; l++) {
302
+ const int v = (q[(j+l)/2] >> (4*((j+l)%2))) & 0x0F;
303
+ const float dl = d * (ls - 32);
304
+ const float ml = m * (ms - 2);
305
+ y[i*256 + j + l] = v * dl + ml;
306
+ }
307
+ }
308
+ }
309
+ }
310
+
311
+ static void dequantize_row_q5_K(const void *vx, float *y, int k) {
312
+ const int nb = k / 256;
313
+ const uint8_t *x = vx;
314
+ for (int i = 0; i < nb; i++) {
315
+ const float d = ((const float*)(x + i*448))[0];
316
+ const float m = ((const float*)(x + i*448))[1];
317
+ const uint8_t *q = x + i*448 + 8;
318
+ const uint8_t *qh = q + 128;
319
+ const uint8_t *scales = qh + 32;
320
+ for (int j = 0; j < 256; j += 32) {
321
+ const uint8_t ls = scales[j/32] & 0x3F;
322
+ const uint8_t ms = scales[j/32] >> 6;
323
+ for (int l = 0; l < 32; l++) {
324
+ int v = (q[(j+l)/2] >> (4*((j+l)%2))) & 0x0F;
325
+ const int bit = (qh[(j+l)/8] >> ((j+l)%8)) & 1;
326
+ v |= bit << 4;
327
+ const float dl = d * (ls - 32);
328
+ const float ml = m * (ms - 2);
329
+ y[i*256 + j + l] = v * dl + ml;
330
+ }
331
+ }
332
+ }
333
+ }
334
+
335
+ static void dequantize_row_q6_K(const void *vx, float *y, int k) {
336
+ const int nb = k / 256;
337
+ const uint8_t *x = vx;
338
+ for (int i = 0; i < nb; i++) {
339
+ const float d = ((const float*)(x + i*480))[0];
340
+ const uint8_t *q = x + i*480 + 4;
341
+ const uint8_t *qh = q + 256;
342
+ const uint8_t *scales = qh + 64;
343
+ for (int j = 0; j < 256; j += 64) {
344
+ const uint8_t ls = scales[j/64];
345
+ for (int l = 0; l < 64; l++) {
346
+ int v = (q[(j+l)/2] >> (4*((j+l)%2))) & 0x0F;
347
+ const int bit = (qh[(j+l)/4] >> (2*((j+l)%4))) & 0x03;
348
+ v |= bit << 4;
349
+ y[i*256 + j + l] = v * d * (ls - 32);
350
+ }
351
+ }
352
+ }
353
+ }
354
+
355
+ static void dequantize_row_q8_K(const void *vx, float *y, int k) {
356
+ const int nb = k / 256;
357
+ const uint8_t *x = vx;
358
+ for (int i = 0; i < nb; i++) {
359
+ const float d = ((const float*)(x + i*544))[0];
360
+ const int8_t *q = (const int8_t*)(x + i*544 + 4);
361
+ const uint8_t *scales = (const uint8_t*)(q + 256);
362
+ for (int j = 0; j < 256; j += 32) {
363
+ const uint8_t ls = scales[j/32];
364
+ for (int l = 0; l < 32; l++) {
365
+ y[i*256 + j + l] = (float)q[j+l] * d * ls;
366
+ }
367
+ }
368
+ }
369
+ }
370
+
371
+ /* ------------------------------------------------------------------------- */
372
+ static float* dequantize_tensor(const void *data, int type, int n_rows, int n_cols) {
373
+ if (type == GGML_TYPE_F32) {
374
+ float *out = malloc(n_rows * n_cols * sizeof(float));
375
+ if (!out) return NULL;
376
+ memcpy(out, data, n_rows * n_cols * sizeof(float));
377
+ return out;
378
+ }
379
+ if (type == GGML_TYPE_F16) {
380
+ float *out = malloc(n_rows * n_cols * sizeof(float));
381
+ if (!out) return NULL;
382
+ const uint16_t *in = data;
383
+ for (int i = 0; i < n_rows * n_cols; i++) {
384
+ out[i] = fp16_to_fp32(in[i]);
385
+ }
386
+ return out;
387
+ }
388
+
389
+ float *out = malloc(n_rows * n_cols * sizeof(float));
390
+ if (!out) return NULL;
391
+ const uint8_t *in = data;
392
+ size_t row_bytes = 0;
393
+ void (*dequant_func)(const void*, float*, int) = NULL;
394
+
395
+ switch (type) {
396
+ case GGML_TYPE_Q4_0: dequant_func = dequantize_row_q4_0; row_bytes = (n_cols / 32) * 34; break;
397
+ case GGML_TYPE_Q4_1: dequant_func = dequantize_row_q4_1; row_bytes = (n_cols / 32) * 36; break;
398
+ case GGML_TYPE_Q5_0: dequant_func = dequantize_row_q5_0; row_bytes = (n_cols / 32) * 40; break;
399
+ case GGML_TYPE_Q5_1: dequant_func = dequantize_row_q5_1; row_bytes = (n_cols / 32) * 44; break;
400
+ case GGML_TYPE_Q8_0: dequant_func = dequantize_row_q8_0; row_bytes = (n_cols / 32) * 36; break;
401
+ case GGML_TYPE_Q8_1: dequant_func = dequantize_row_q8_1; row_bytes = (n_cols / 32) * 40; break;
402
+ case GGML_TYPE_Q2_K: dequant_func = dequantize_row_q2_K; row_bytes = (n_cols / 256) * 336; break;
403
+ case GGML_TYPE_Q3_K: dequant_func = dequantize_row_q3_K; row_bytes = (n_cols / 256) * 352; break;
404
+ case GGML_TYPE_Q4_K: dequant_func = dequantize_row_q4_K; row_bytes = (n_cols / 256) * 416; break;
405
+ case GGML_TYPE_Q5_K: dequant_func = dequantize_row_q5_K; row_bytes = (n_cols / 256) * 448; break;
406
+ case GGML_TYPE_Q6_K: dequant_func = dequantize_row_q6_K; row_bytes = (n_cols / 256) * 480; break;
407
+ case GGML_TYPE_Q8_K: dequant_func = dequantize_row_q8_K; row_bytes = (n_cols / 256) * 544; break;
408
+ default:
409
+ free(out);
410
+ return NULL;
411
+ }
412
+
413
+ for (int r = 0; r < n_rows; r++) {
414
+ dequant_func(in + r * row_bytes, out + r * n_cols, n_cols);
415
+ }
416
+ return out;
417
+ }
418
+
419
+ /* ------------------------------------------------------------------------- */
420
+ static int skip_value(uint8_t **p, uint8_t *end, uint32_t type) {
421
+ switch (type) {
422
+ case 0: case 1: case 7: return safe_advance(p, end, 1);
423
+ case 2: case 3: return safe_advance(p, end, 2);
424
+ case 4: case 5: case 6: return safe_advance(p, end, 4);
425
+ case 8: {
426
+ uint64_t len = rd64(p, end);
427
+ return safe_advance(p, end, len);
428
+ }
429
+ case 9: {
430
+ uint32_t subtype = rd32(p, end);
431
+ uint64_t n = rd64(p, end);
432
+ for (uint64_t i = 0; i < n; i++)
433
+ if (!skip_value(p, end, subtype)) return 0;
434
+ return 1;
435
+ }
436
+ default: return 0;
437
+ }
438
+ }
439
+
440
+ /* ------------------------------------------------------------------------- */
441
+ static void free_model_contents(EmbedModel *m) {
442
+ if (!m) return;
443
+ if (m->tokens) {
444
+ for (int i = 0; i < m->vocab_size; i++) free(m->tokens[i]);
445
+ free(m->tokens);
446
+ }
447
+ if (m->table) {
448
+ for (int i = 0; i < HASH_SIZE; i++) {
449
+ HashNode *n = m->table[i];
450
+ while (n) {
451
+ HashNode *next = n->next;
452
+ free(n);
453
+ n = next;
454
+ }
455
+ }
456
+ free(m->table);
457
+ }
458
+ if (m->float_data) free(m->float_data);
459
+ if (m->mapped) munmap(m->mapped, m->mapped_size);
460
+ free(m);
461
+ }
462
+
463
+ /* ------------------------------------------------------------------------- */
464
+ static int is_printable_string(const char *s, size_t len) {
465
+ for (size_t i = 0; i < len; i++)
466
+ if (!isprint((unsigned char)s[i])) return 0;
467
+ return 1;
468
+ }
469
+
470
+ /* Fallback: find the start of tensor info by scanning for a valid string */
471
+ static uint8_t *find_tensor_info_start(uint8_t *cur, uint8_t *end) {
472
+ uint8_t *scan = cur;
473
+ while (scan + 8 < end) {
474
+ uint64_t len;
475
+ memcpy(&len, scan, 8);
476
+ if (len > 0 && len < 256 && scan + 8 + len <= end) {
477
+ if (is_printable_string((char*)scan + 8, len)) {
478
+ return scan;
479
+ }
480
+ }
481
+ scan++;
482
+ }
483
+ return NULL;
484
+ }
485
+
486
+ /* ------------------------------------------------------------------------- */
487
+ static EmbedModel *embed_load_gguf(const char *path) {
488
+ size_t sz;
489
+ uint8_t *base = map_file(path, &sz);
490
+ if (!base) return NULL;
491
+ uint8_t *cur = base;
492
+ uint8_t *end = base + sz;
493
+
494
+ if (memcmp(cur, "GGUF", 4) != 0) { munmap(base, sz); return NULL; }
495
+ cur += 4;
496
+ uint32_t version = rd32(&cur, end);
497
+ (void)version;
498
+ uint64_t n_tensors = rd64(&cur, end);
499
+ uint64_t n_kv = rd64(&cur, end);
500
+
501
+ EmbedModel *m = calloc(1, sizeof(*m));
502
+ if (!m) { munmap(base, sz); return NULL; }
503
+ m->mapped = base;
504
+ m->mapped_size = sz;
505
+ m->table = calloc(HASH_SIZE, sizeof(HashNode*));
506
+ if (!m->table) { free_model_contents(m); return NULL; }
507
+
508
+ /* ---------- Metadata ---------- */
509
+ int vocab_found = 0;
510
+ for (uint64_t i = 0; i < n_kv; i++) {
511
+ char *key = rdstr(&cur, end);
512
+ if (!key) { free_model_contents(m); return NULL; }
513
+ uint32_t type = rd32(&cur, end);
514
+
515
+ if ((strcmp(key, "tokenizer.ggml.tokens") == 0 ||
516
+ strcmp(key, "tokenizer.ggml.token_list") == 0) && type == 9) {
517
+ uint32_t subtype = rd32(&cur, end);
518
+ uint64_t n = rd64(&cur, end);
519
+ if (subtype != 8) { free(key); free_model_contents(m); return NULL; }
520
+ m->tokens = malloc(sizeof(char*) * n);
521
+ if (!m->tokens) { free(key); free_model_contents(m); return NULL; }
522
+ m->vocab_size = (int)n;
523
+ for (uint64_t j = 0; j < n; j++) {
524
+ char *tok = rdstr(&cur, end);
525
+ if (!tok) tok = strdup("");
526
+ m->tokens[j] = tok;
527
+ hset(m, tok, (int)j);
528
+ }
529
+ vocab_found = 1;
530
+ } else {
531
+ if (!skip_value(&cur, end, type)) {
532
+ free(key); free_model_contents(m); return NULL;
533
+ }
534
+ }
535
+ free(key);
536
+ }
537
+
538
+ if (!vocab_found) { free_model_contents(m); return NULL; }
539
+
540
+ uint8_t *after_kv = cur;
541
+ align_to_32(&cur, end, base);
542
+ uint8_t *tensor_start = cur;
543
+
544
+ /* ---------- Tensor info ---------- */
545
+ int embd_found = 0;
546
+ void *raw_tensor_data = NULL;
547
+ int tensor_type = -1;
548
+ uint64_t dim0 = 0, dim1 = 0;
549
+ int need_transpose = 0;
550
+
551
+ for (int attempt = 0; attempt < 2; attempt++) {
552
+ cur = tensor_start;
553
+ for (uint64_t i = 0; i < n_tensors; i++) {
554
+ char *name = rdstr(&cur, end);
555
+ if (!name) break;
556
+ uint32_t n_dims = rd32(&cur, end);
557
+ uint64_t dims[MAX_DIMS] = {0};
558
+ for (uint32_t d = 0; d < n_dims && d < MAX_DIMS; d++)
559
+ dims[d] = rd64(&cur, end);
560
+ uint32_t type = rd32(&cur, end);
561
+ uint64_t offset = rd64(&cur, end);
562
+
563
+ int is_token_embd = (strcmp(name, "token_embd.weight") == 0 ||
564
+ strcmp(name, "embeddings.word_embeddings.weight") == 0 ||
565
+ strcmp(name, "model.embed_tokens.weight") == 0);
566
+
567
+ if (!is_token_embd && n_dims == 2 && m->vocab_size > 0) {
568
+ if ((uint64_t)m->vocab_size == dims[0] && strstr(name, "embd") != NULL)
569
+ is_token_embd = 1;
570
+ else if ((uint64_t)m->vocab_size == dims[1] && strstr(name, "embd") != NULL)
571
+ is_token_embd = 1;
572
+ }
573
+
574
+ if (!embd_found && is_token_embd) {
575
+ if (n_dims < 2 || dims[1] == 0) { free(name); free_model_contents(m); return NULL; }
576
+ dim0 = dims[0];
577
+ dim1 = dims[1];
578
+ if (dim0 == (uint64_t)m->vocab_size) {
579
+ m->dim = (int)dim1;
580
+ need_transpose = 0;
581
+ } else if (dim1 == (uint64_t)m->vocab_size) {
582
+ m->dim = (int)dim0;
583
+ need_transpose = 1;
584
+ } else {
585
+ m->dim = (dim0 < dim1) ? (int)dim0 : (int)dim1;
586
+ need_transpose = (dim0 > dim1) ? 1 : 0;
587
+ }
588
+ raw_tensor_data = base + offset;
589
+ tensor_type = type;
590
+ embd_found = 1;
591
+ }
592
+ free(name);
593
+ }
594
+ if (embd_found) break;
595
+ if (attempt == 0) {
596
+ tensor_start = find_tensor_info_start(after_kv, end);
597
+ if (!tensor_start) break;
598
+ }
599
+ }
600
+
601
+ if (!embd_found || m->dim == 0) {
602
+ free_model_contents(m);
603
+ return NULL;
604
+ }
605
+
606
+ /* Dequantize */
607
+ if (tensor_type == GGML_TYPE_F32 && !need_transpose) {
608
+ m->float_data = NULL;
609
+ m->tensor_data = raw_tensor_data;
610
+ } else {
611
+ int n_rows = need_transpose ? (int)dim1 : (int)dim0;
612
+ int n_cols = need_transpose ? (int)dim0 : (int)dim1;
613
+ m->float_data = dequantize_tensor(raw_tensor_data, tensor_type, n_rows, n_cols);
614
+ if (!m->float_data) {
615
+ free_model_contents(m);
616
+ return NULL;
617
+ }
618
+ m->tensor_data = m->float_data;
619
+ }
620
+ m->tensor_type = tensor_type;
621
+
622
+ return m;
623
+ }
624
+
625
+ /* ------------------------------------------------------------------------- */
626
+ static void embed_text(EmbedModel *m, const char *txt, float *out) {
627
+ memset(out, 0, sizeof(float) * m->dim);
628
+ char *copy = strdup(txt);
629
+ if (!copy) return;
630
+
631
+ char *tok = strtok(copy, " ");
632
+ int used = 0;
633
+ const float *embd_matrix = m->tensor_data;
634
+
635
+ while (tok) {
636
+ int id = hget(m, tok);
637
+ if (id >= 0 && id < m->vocab_size) {
638
+ const float *vec = embd_matrix + id * m->dim;
639
+ for (int i = 0; i < m->dim; i++) out[i] += vec[i];
640
+ used++;
641
+ }
642
+ tok = strtok(NULL, " ");
643
+ }
644
+
645
+ if (used > 0) {
646
+ float inv = 1.0f / used;
647
+ for (int i = 0; i < m->dim; i++) out[i] *= inv;
648
+ }
649
+ free(copy);
650
+ }
651
+
652
+ /* ------------------------------------------------------------------------- */
653
+ static void rb_embedder_free(void *p) {
654
+ ruby_embedder *e = p;
655
+ if (!e) return;
656
+ if (e->model) free_model_contents(e->model);
657
+ free(e);
658
+ }
659
+
660
+ static size_t rb_embedder_memsize(const void *p) {
661
+ return sizeof(ruby_embedder);
662
+ }
663
+
664
+ static const rb_data_type_t ruby_embedder_type = {
665
+ "MiniEmbed",
666
+ {NULL, rb_embedder_free, rb_embedder_memsize, NULL},
667
+ NULL, NULL, RUBY_TYPED_FREE_IMMEDIATELY
668
+ };
669
+
670
+ static VALUE rb_embedder_alloc(VALUE klass) {
671
+ ruby_embedder *e = calloc(1, sizeof(*e));
672
+ return TypedData_Wrap_Struct(klass, &ruby_embedder_type, e);
673
+ }
674
+
675
+ static VALUE rb_embedder_initialize(VALUE self, VALUE opts) {
676
+ ruby_embedder *e;
677
+ TypedData_Get_Struct(self, ruby_embedder, &ruby_embedder_type, e);
678
+
679
+ VALUE path = rb_hash_aref(opts, ID2SYM(rb_intern("model")));
680
+ const char *cpath = StringValueCStr(path);
681
+ e->model = embed_load_gguf(cpath);
682
+ if (!e->model)
683
+ rb_raise(rb_eRuntimeError, "failed to load GGUF model");
684
+ return self;
685
+ }
686
+
687
+ static VALUE rb_embed(VALUE self, VALUE opts) {
688
+ ruby_embedder *e;
689
+ TypedData_Get_Struct(self, ruby_embedder, &ruby_embedder_type, e);
690
+
691
+ VALUE text = rb_hash_aref(opts, ID2SYM(rb_intern("text")));
692
+ const char *ctext = StringValueCStr(text);
693
+
694
+ VALUE out = rb_str_new(NULL, e->model->dim * sizeof(float));
695
+ embed_text(e->model, ctext, (float*)RSTRING_PTR(out));
696
+ return out;
697
+ }
698
+
699
+ void Init_mini_embed(void) {
700
+ VALUE c = rb_define_class("MiniEmbed", rb_cObject);
701
+ rb_define_alloc_func(c, rb_embedder_alloc);
702
+ rb_define_method(c, "initialize", rb_embedder_initialize, 1);
703
+ rb_define_method(c, "embeddings", rb_embed, 1);
704
+ }
data/lib/mini_embed.rb ADDED
@@ -0,0 +1,3 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'mini_embed/mini_embed'
metadata ADDED
@@ -0,0 +1,86 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: mini_embed
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Makapoxa
8
+ bindir: bin
9
+ cert_chain: []
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: rake
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - "~>"
17
+ - !ruby/object:Gem::Version
18
+ version: '13.0'
19
+ type: :development
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - "~>"
24
+ - !ruby/object:Gem::Version
25
+ version: '13.0'
26
+ - !ruby/object:Gem::Dependency
27
+ name: rake-compiler
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - "~>"
31
+ - !ruby/object:Gem::Version
32
+ version: '1.2'
33
+ type: :development
34
+ prerelease: false
35
+ version_requirements: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: '1.2'
40
+ - !ruby/object:Gem::Dependency
41
+ name: rspec
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - "~>"
45
+ - !ruby/object:Gem::Version
46
+ version: '3.0'
47
+ type: :development
48
+ prerelease: false
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - "~>"
52
+ - !ruby/object:Gem::Version
53
+ version: '3.0'
54
+ description: A minimal C extension to load GGUF models and compute token embeddings.
55
+ executables: []
56
+ extensions:
57
+ - ext/mini_embed/extconf.rb
58
+ extra_rdoc_files: []
59
+ files:
60
+ - README.md
61
+ - ext/mini_embed/extconf.rb
62
+ - ext/mini_embed/mini_embed.c
63
+ - lib/mini_embed.rb
64
+ homepage: https://github.com/Makapoxa/mini_embed
65
+ licenses:
66
+ - MIT
67
+ metadata:
68
+ rubygems_mfa_required: 'true'
69
+ rdoc_options: []
70
+ require_paths:
71
+ - lib
72
+ required_ruby_version: !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - ">="
75
+ - !ruby/object:Gem::Version
76
+ version: 2.6.0
77
+ required_rubygems_version: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - ">="
80
+ - !ruby/object:Gem::Version
81
+ version: '0'
82
+ requirements: []
83
+ rubygems_version: 3.6.7
84
+ specification_version: 4
85
+ summary: Fast GGUF embedding extraction
86
+ test_files: []