tinybits 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: a4826141cb2aae8ecab56241a3b5cec14a7c40911cabd7aae971bcbde3a8248c
4
+ data.tar.gz: a87cd61d1aa59343b3c81e8dfd38554cbf590e78b18b3866d98e16f3f4a86cb5
5
+ SHA512:
6
+ metadata.gz: d67489ba3c2e4bdeb4ba7a458d4c70a61e766f10dc77282190e82e8f2dcef46b8f8c5d4de4ee551711229d42366b84d45ad08666abf49e24550adb5d99a94787
7
+ data.tar.gz: 592ed8e9367d428a3bc76994c8bcb91ef77793fd0c3f12beed21e0f08b6d1b5410b04408ad0e391eddc55b1beefd836e1b96e8ff0d64b160d3e035df96cf7425
@@ -0,0 +1,5 @@
1
+ require 'mkmf'
2
+
3
+
4
+ dir_config('tinybits_ext')
5
+ create_makefile('tinybits_ext')
@@ -0,0 +1,927 @@
1
+ /**
2
+ * TinyBits Amalgamated Header
3
+ * Generated on: Sat Apr 19 07:05:26 PM CEST 2025
4
+ */
5
+
6
+ #ifndef TINY_BITS_H
7
+ #define TINY_BITS_H
8
+
9
+ /* Begin common.h */
10
+
11
+
12
+ #include <stdint.h>
13
+ #include <stdlib.h>
14
+ #include <string.h>
15
+ #include <stddef.h> // for size_t
16
+ #include <math.h>
17
+
18
+
19
+ #define TB_HASH_SIZE 128
20
+ #define TB_HASH_CACHE_SIZE 256
21
+ #define MAX_BYTES 9
22
+ #define TB_DDP_STR_LEN_MAX 128
23
+
24
+ #define TB_INT_TAG 0x80
25
+ #define TB_STR_TAG 0x40
26
+ #define TB_STR_LEN 0x1F
27
+ #define TB_REF_TAG 0x60
28
+ #define TB_REF_LEN 0x1F
29
+ #define TB_DBL_TAG 0x20
30
+ #define TB_PFP_TAG 0x20
31
+ #define TB_NFP_TAG 0x30
32
+ #define TB_NAN_TAG 0x2D
33
+ #define TB_INF_TAG 0x3D
34
+ #define TB_NNF_TAG 0x2E
35
+ #define TB_F16_TAG 0x3E
36
+ #define TB_F32_TAG 0x2F
37
+ #define TB_F64_TAG 0x3F
38
+ #define TB_MAP_TAG 0x10
39
+ #define TB_MAP_LEN 0x0F
40
+ #define TB_ARR_TAG 0x08
41
+ #define TB_ARR_LEN 0x07
42
+ #define TB_SEP_TAG 0x05
43
+ #define TB_EXT_TAG 0x04
44
+ #define TB_BLB_TAG 0x03
45
+ #define TB_NIL_TAG 0x02
46
+ #define TB_TRU_TAG 0x01
47
+ #define TB_FLS_TAG 0x00
48
+
49
+ // Feature flags (from encoder)
50
+ #define TB_FEATURE_STRING_DEDUPE 0x01
51
+ #define TB_FEATURE_COMPRESS_FLOATS 0x02
52
+
53
+ static double powers[] = {
54
+ 1.0,
55
+ 10.0,
56
+ 100.0,
57
+ 1000.0,
58
+ 10000.0,
59
+ 100000.0,
60
+ 1000000.0,
61
+ 10000000.0,
62
+ 100000000.0,
63
+ 1000000000.0,
64
+ 10000000000.0,
65
+ 100000000000.0,
66
+ 1000000000000.0
67
+ };
68
+
69
+ typedef struct HashEntry {
70
+ uint32_t hash; // 32-bit hash from fast_hash_32
71
+ uint32_t length;
72
+ uint32_t offset;
73
+ uint32_t next_index;
74
+ } HashEntry;
75
+
76
+ typedef struct HashTable {
77
+ HashEntry* cache; // HASH_SIZE is 2048, use directly or define HASH_SIZE in header
78
+ uint32_t next_id;
79
+ uint32_t cache_size;
80
+ uint32_t cache_pos;
81
+ uint8_t bins[TB_HASH_SIZE];
82
+ } HashTable;
83
+
84
+ static inline uint32_t fast_hash_32(const char* str, uint16_t len) {
85
+ uint32_t hash = len;
86
+ hash = (hash << 16) | (((unsigned char)str[0] << 8) | (unsigned char)str[1]);
87
+ hash ^= (((unsigned char)str[len-2] << 8) | (unsigned char)str[len-1]);
88
+ return hash;
89
+ }
90
+
91
+ static inline int encode_varint(uint64_t value, uint8_t* buffer) {
92
+ if (value <= 240) {
93
+ buffer[0] = (uint8_t)value; // 1 byte
94
+ return 1;
95
+ } else if (value < 2288) { // 241 to 248
96
+ value -= 240;
97
+ int prefix = 241 + (value / 256);
98
+ buffer[0] = (uint8_t)prefix; // A0
99
+ buffer[1] = (uint8_t)(value % 256); // A1
100
+ return 2;
101
+ } else if (value <= 67823) { // Up to 249
102
+ value -= 2288;
103
+ buffer[0] = 249; // A0
104
+ buffer[1] = (uint8_t)(value / 256); // A1
105
+ buffer[2] = (uint8_t)(value % 256); // A2
106
+ return 3;
107
+ } else if (value < (1ULL << 24)) { // 250: 3-byte big-endian
108
+ buffer[0] = 250; // A0
109
+ buffer[1] = (uint8_t)(value >> 16); // A1 (most significant)
110
+ buffer[2] = (uint8_t)(value >> 8); // A2
111
+ buffer[3] = (uint8_t)value; // A3 (least significant)
112
+ return 4;
113
+ } else if (value < (1ULL << 32)) { // 251: 4-byte big-endian
114
+ buffer[0] = 251; // A0
115
+ buffer[1] = (uint8_t)(value >> 24);
116
+ buffer[2] = (uint8_t)(value >> 16);
117
+ buffer[3] = (uint8_t)(value >> 8);
118
+ buffer[4] = (uint8_t)value;
119
+ return 5;
120
+ } else if (value < (1ULL << 40)) { // 252: 5-byte big-endian
121
+ buffer[0] = 252; // A0
122
+ buffer[1] = (uint8_t)(value >> 32);
123
+ buffer[2] = (uint8_t)(value >> 24);
124
+ buffer[3] = (uint8_t)(value >> 16);
125
+ buffer[4] = (uint8_t)(value >> 8);
126
+ buffer[5] = (uint8_t)value;
127
+ return 6;
128
+ } else if (value < (1ULL << 48)) { // 253: 6-byte big-endian
129
+ buffer[0] = 253; // A0
130
+ buffer[1] = (uint8_t)(value >> 40);
131
+ buffer[2] = (uint8_t)(value >> 32);
132
+ buffer[3] = (uint8_t)(value >> 24);
133
+ buffer[4] = (uint8_t)(value >> 16);
134
+ buffer[5] = (uint8_t)(value >> 8);
135
+ buffer[6] = (uint8_t)value;
136
+ return 7;
137
+ } else if (value < (1ULL << 56)) { // 254: 7-byte big-endian
138
+ buffer[0] = 254; // A0
139
+ buffer[1] = (uint8_t)(value >> 48);
140
+ buffer[2] = (uint8_t)(value >> 40);
141
+ buffer[3] = (uint8_t)(value >> 32);
142
+ buffer[4] = (uint8_t)(value >> 24);
143
+ buffer[5] = (uint8_t)(value >> 16);
144
+ buffer[6] = (uint8_t)(value >> 8);
145
+ buffer[7] = (uint8_t)value;
146
+ return 8;
147
+ } else { // 255: 8-byte big-endian
148
+ buffer[0] = 255; // A0
149
+ buffer[1] = (uint8_t)(value >> 56);
150
+ buffer[2] = (uint8_t)(value >> 48);
151
+ buffer[3] = (uint8_t)(value >> 40);
152
+ buffer[4] = (uint8_t)(value >> 32);
153
+ buffer[5] = (uint8_t)(value >> 24);
154
+ buffer[6] = (uint8_t)(value >> 16);
155
+ buffer[7] = (uint8_t)(value >> 8);
156
+ buffer[8] = (uint8_t)value;
157
+ return 9;
158
+ }
159
+ }
160
+
161
+ static inline int varint_size(uint64_t value){
162
+ if (value < (1ULL << 48)) { // 253: 6-byte big-endian
163
+ return 7;
164
+ } else if(value < (1ULL << 52)){
165
+ return 8;
166
+ }
167
+ return 9;
168
+ }
169
+
170
+ static inline uint64_t decode_varint(const uint8_t* buffer, size_t size, size_t *pos) {
171
+ uint8_t prefix = buffer[*pos];
172
+ if (prefix <= 240) {
173
+ *pos += 1;
174
+ return prefix;
175
+ } else if (prefix >= 241 && prefix <= 248) {
176
+ uint64_t value = 240 + 256 * (prefix - 241) + buffer[*pos+1];
177
+ *pos += 2;
178
+ return value;
179
+ } else if (prefix == 249) {
180
+ uint64_t value = 2288 + 256 * buffer[*pos+1] + buffer[*pos+2];
181
+ *pos += 3;
182
+ return value;
183
+ } else if (prefix == 250) {
184
+ uint64_t value = ((uint64_t)buffer[*pos+1] << 16) | ((uint64_t)buffer[*pos+2] << 8) | buffer[*pos+3];
185
+ *pos += 4;
186
+ return value;
187
+ } else if (prefix == 251) {
188
+ uint64_t value = ((uint64_t)buffer[*pos+1] << 24) | ((uint64_t)buffer[*pos+2] << 16) |
189
+ ((uint64_t)buffer[*pos+3] << 8) | buffer[*pos+4];
190
+ *pos += 5;
191
+ return value;
192
+ } else if (prefix == 252) {
193
+ uint64_t value = ((uint64_t)buffer[*pos+1] << 32) | ((uint64_t)buffer[*pos+2] << 24) |
194
+ ((uint64_t)buffer[*pos+3] << 16) | ((uint64_t)buffer[*pos+4] << 8) | buffer[*pos+5];
195
+ *pos += 6;
196
+ return value;
197
+ } else if (prefix == 253) {
198
+ uint64_t value = ((uint64_t)buffer[*pos+1] << 40) | ((uint64_t)buffer[*pos+2] << 32) |
199
+ ((uint64_t)buffer[*pos+3] << 24) | ((uint64_t)buffer[*pos+4] << 16) |
200
+ ((uint64_t)buffer[*pos+5] << 8) | buffer[*pos+6];
201
+ *pos += 7;
202
+ return value;
203
+ } else if (prefix == 254) {
204
+ uint64_t value = ((uint64_t)buffer[*pos+1] << 48) | ((uint64_t)buffer[*pos+2] << 40) |
205
+ ((uint64_t)buffer[*pos+3] << 32) | ((uint64_t)buffer[*pos+4] << 24) |
206
+ ((uint64_t)buffer[*pos+5] << 16) | ((uint64_t)buffer[*pos+6] << 8) | buffer[*pos+7];
207
+ *pos += 8;
208
+ return value;
209
+ } else if (prefix == 255) {
210
+ uint64_t value = ((uint64_t)buffer[*pos+1] << 56) | ((uint64_t)buffer[*pos+2] << 48) |
211
+ ((uint64_t)buffer[*pos+3] << 40) | ((uint64_t)buffer[*pos+4] << 32) |
212
+ ((uint64_t)buffer[*pos+5] << 24) | ((uint64_t)buffer[*pos+6] << 16) |
213
+ ((uint64_t)buffer[*pos+7] << 8) | buffer[*pos+8];
214
+ *pos += 9;
215
+ return value;
216
+ } else {
217
+ return 0; // Error case
218
+ }
219
+ }
220
+
221
+ static inline int fast_memcmp(const void *ptr1, const void *ptr2, size_t num) {
222
+ if(num < 32){
223
+ const unsigned char *p1 = (const unsigned char*)ptr1;
224
+ const unsigned char *p2 = (const unsigned char*)ptr2;
225
+ for(size_t i = 0; i < num; i++){
226
+ if(p1[i] != p2[i]) return 1;
227
+ }
228
+ }else{
229
+ return memcmp(ptr1, ptr2, num);
230
+ }
231
+ return 0;
232
+ }
233
+
234
+ static inline void *fast_memcpy(unsigned char *ptr1, const char *ptr2, size_t num) {
235
+ for(size_t i = 0; i < num; i++){
236
+ ptr1[i] = ptr2[i];
237
+ }
238
+ return ptr1;
239
+ }
240
+
241
+ #include <immintrin.h>
242
+ #include <stddef.h>
243
+ #include <stdint.h>
244
+
245
+ static inline uint64_t dtoi_bits(double d) {
246
+ union {
247
+ double d;
248
+ uint64_t u;
249
+ } converter;
250
+ converter.d = d;
251
+ return converter.u;
252
+ }
253
+
254
+ static inline double itod_bits(uint64_t u) {
255
+ union {
256
+ double d;
257
+ uint64_t u;
258
+ } converter;
259
+ converter.u = u;
260
+ return converter.d;
261
+ }
262
+
263
+ static inline void encode_uint64( uint64_t value, uint8_t *buffer) {
264
+ buffer[0] = (value >> 56) & 0xFF;
265
+ buffer[1] = (value >> 48) & 0xFF;
266
+ buffer[2] = (value >> 40) & 0xFF;
267
+ buffer[3] = (value >> 32) & 0xFF;
268
+ buffer[4] = (value >> 24) & 0xFF;
269
+ buffer[5] = (value >> 16) & 0xFF;
270
+ buffer[6] = (value >> 8) & 0xFF;
271
+ buffer[7] = value & 0xFF;
272
+ }
273
+
274
+ static inline uint64_t decode_uint64(const uint8_t *buffer) {
275
+ return ((uint64_t)buffer[0] << 56) |
276
+ ((uint64_t)buffer[1] << 48) |
277
+ ((uint64_t)buffer[2] << 40) |
278
+ ((uint64_t)buffer[3] << 32) |
279
+ ((uint64_t)buffer[4] << 24) |
280
+ ((uint64_t)buffer[5] << 16) |
281
+ ((uint64_t)buffer[6] << 8) |
282
+ (uint64_t)buffer[7];
283
+ }
284
+
285
+ static inline int decimal_places_count(double abs_val, double *scaled) {
286
+ //double abs_val = fabs(val);
287
+ *scaled = abs_val;
288
+ double temp = *scaled;
289
+ if(*scaled == (uint64_t)(*scaled) && *scaled >= abs_val) { return 0;}
290
+
291
+ *scaled = abs_val * 10000;
292
+ temp = *scaled;
293
+ if(*scaled == (uint64_t)(*scaled) && *scaled >= abs_val) {
294
+ *scaled = abs_val * 10;
295
+ if(*scaled == (uint64_t)(*scaled) && *scaled >= abs_val) { return 1;}
296
+ *scaled = abs_val * 100;
297
+ if(*scaled == (uint64_t)(*scaled) && *scaled >= abs_val) { return 2;}
298
+ *scaled = abs_val * 1000;
299
+ if(*scaled == (uint64_t)(*scaled) && *scaled >= abs_val) { return 3;}
300
+ *scaled = temp;
301
+ return 4;
302
+ }
303
+
304
+ *scaled = abs_val * 100000000;
305
+ temp = *scaled;
306
+ if(*scaled == (uint64_t)(*scaled) && *scaled >= abs_val) {
307
+ *scaled = abs_val * 100000;
308
+ if(*scaled == (uint64_t)(*scaled) && *scaled >= abs_val) { return 5;}
309
+ *scaled = abs_val * 1000000;
310
+ if(*scaled == (uint64_t)(*scaled) && *scaled >= abs_val) { return 6;}
311
+ *scaled = abs_val * 10000000;
312
+ if(*scaled == (uint64_t)(*scaled) && *scaled >= abs_val) { return 7;}
313
+ *scaled = temp;
314
+ return 8;
315
+ }
316
+
317
+ *scaled = abs_val * 1000000000000;
318
+ temp = *scaled;
319
+ if(*scaled == (uint64_t)(*scaled) && *scaled >= abs_val) {
320
+ *scaled = abs_val * 1000000000;
321
+ if(*scaled == (uint64_t)(*scaled) && *scaled >= abs_val) { return 9;}
322
+ *scaled = abs_val * 10000000000;
323
+ if(*scaled == (uint64_t)(*scaled) && *scaled >= abs_val) { return 10;}
324
+ *scaled = abs_val * 100000000000;
325
+ if(*scaled == (uint64_t)(*scaled) && *scaled >= abs_val) { return 11;}
326
+ *scaled = temp;
327
+ return 12;
328
+ }
329
+ return -1;
330
+ }
331
+
332
+ /* End common.h */
333
+
334
+ /* Begin packer.h */
335
+
336
+
337
+ typedef struct tiny_bits_packer {
338
+ unsigned char *buffer; // Pointer to the allocated buffer
339
+ size_t capacity; // Total allocated size of the buffer
340
+ size_t current_pos; // Current position in the buffer (write position)
341
+ HashTable encode_table; // Add the hash table here
342
+ HashTable dictionary;
343
+ uint8_t features;
344
+ // Add any other encoder-specific state here if needed (e.g., string deduplication table later)
345
+ } tiny_bits_packer;
346
+
347
+ static inline unsigned char *tiny_bits_packer_ensure_capacity(tiny_bits_packer *encoder, size_t needed_size) {
348
+ if (!encoder) return NULL;
349
+
350
+ size_t available_space = encoder->capacity - encoder->current_pos;
351
+ if (needed_size > available_space) {
352
+ size_t new_capacity = encoder->capacity + needed_size + (encoder->capacity);
353
+ unsigned char *new_buffer = (unsigned char *)realloc(encoder->buffer, new_capacity);
354
+ if (!new_buffer) return NULL;
355
+ encoder->buffer = new_buffer;
356
+ encoder->capacity = new_capacity;
357
+ }
358
+ return encoder->buffer + encoder->current_pos;
359
+ }
360
+
361
+ tiny_bits_packer *tiny_bits_packer_create(size_t initial_capacity, uint8_t features) {
362
+ tiny_bits_packer *encoder = (tiny_bits_packer *)malloc(sizeof(tiny_bits_packer));
363
+ if (!encoder) return NULL;
364
+
365
+ encoder->buffer = (unsigned char *)malloc(initial_capacity);
366
+ if (!encoder->buffer) {
367
+ free(encoder);
368
+ return NULL;
369
+ }
370
+ encoder->capacity = initial_capacity;
371
+ encoder->current_pos = 0;
372
+ encoder->features = features;
373
+
374
+ // Only allocate hash table if deduplication is enabled
375
+ if (features & TB_FEATURE_STRING_DEDUPE) {
376
+ encoder->encode_table.cache = (HashEntry*)malloc(sizeof(HashEntry) * TB_HASH_CACHE_SIZE);
377
+ if (!encoder->encode_table.cache) {
378
+ //free(encoder->encode_table.buckets);
379
+ free(encoder->buffer);
380
+ free(encoder);
381
+ return NULL;
382
+ }
383
+ encoder->encode_table.cache_size = TB_HASH_CACHE_SIZE;
384
+ encoder->encode_table.cache_pos = 0;
385
+ encoder->encode_table.next_id = 0;
386
+ } else {
387
+ encoder->encode_table.cache = NULL;
388
+ encoder->encode_table.cache_size = 0;
389
+ encoder->encode_table.cache_pos = 0;
390
+ encoder->encode_table.next_id = 0;
391
+ }
392
+
393
+ return encoder;
394
+ }
395
+
396
+ inline void tiny_bits_packer_reset(tiny_bits_packer *encoder) {
397
+ if (!encoder) return;
398
+ encoder->current_pos = 0;
399
+ if (encoder->features & TB_FEATURE_STRING_DEDUPE) {
400
+ encoder->encode_table.next_id = 0;
401
+ encoder->encode_table.cache_pos = 0;
402
+ memset(encoder->encode_table.bins, 0, TB_HASH_SIZE * sizeof(uint8_t));
403
+ }
404
+
405
+ }
406
+
407
+ void tiny_bits_packer_destroy(tiny_bits_packer *encoder) {
408
+ if (!encoder) return;
409
+
410
+ if (encoder->features & TB_FEATURE_STRING_DEDUPE) {
411
+ free(encoder->encode_table.cache);
412
+ }
413
+ free(encoder->buffer);
414
+ free(encoder);
415
+ }
416
+
417
+ static inline int pack_arr(tiny_bits_packer *encoder, int arr_len){
418
+ int written = 0;
419
+ int needed_size;
420
+ uint8_t *buffer;
421
+
422
+ if(arr_len < TB_ARR_LEN){
423
+ needed_size = 1;
424
+ } else {
425
+ needed_size = 1 + varint_size((uint64_t)(arr_len - 7));
426
+ }
427
+ buffer = tiny_bits_packer_ensure_capacity(encoder, needed_size);
428
+ if (!buffer) return 0; // Handle error
429
+
430
+ if(arr_len < TB_ARR_LEN){
431
+ buffer[0] = TB_ARR_TAG | arr_len;
432
+ written = 1;
433
+ } else {
434
+ buffer[0] = TB_ARR_TAG | TB_ARR_LEN;
435
+ written = 1;
436
+ written += encode_varint((uint64_t)(arr_len - TB_ARR_LEN), buffer + written);
437
+ }
438
+ encoder->current_pos += written;
439
+ return written;
440
+ }
441
+
442
+ static inline int pack_map(tiny_bits_packer *encoder, int map_len){
443
+ int written = 0;
444
+ int needed_size;
445
+ uint8_t *buffer;
446
+
447
+ if(map_len < TB_MAP_LEN){
448
+ needed_size = 1;
449
+ } else {
450
+ needed_size = 1 + varint_size((uint64_t)(map_len - 15));
451
+ }
452
+ buffer = tiny_bits_packer_ensure_capacity(encoder, needed_size);
453
+ if (!buffer) return 0; // Handle error
454
+
455
+ if(map_len < TB_MAP_LEN){
456
+ buffer[0] = TB_MAP_TAG | map_len;
457
+ written = 1;
458
+ } else {
459
+ buffer[0] = TB_MAP_TAG | TB_MAP_LEN;
460
+ written = 1;
461
+ written += encode_varint((uint64_t)(map_len - TB_MAP_LEN), buffer + written);
462
+ }
463
+ encoder->current_pos += written;
464
+ return written;
465
+ }
466
+
467
+ static inline int pack_int(tiny_bits_packer *encoder, int64_t value){
468
+ int written = 0;
469
+ int needed_size = 10;
470
+ uint8_t *buffer;
471
+ buffer = tiny_bits_packer_ensure_capacity(encoder, needed_size);
472
+ if (!buffer) return 0; // Handle error
473
+ //printf("value is %ld\n", value);
474
+
475
+ if (value >= 0 && value < 120) {
476
+ buffer[0] = (uint8_t)(TB_INT_TAG | value); // No continuation
477
+ //printf("value is %ld, wrote to buffer %x\n", value, buffer[0]);
478
+ encoder->current_pos += 1;
479
+ return 1;
480
+ } else if (value >= 120) {
481
+ buffer[0] = 248; // Tag for positive with continuation
482
+ value -= 120;
483
+ } else if (value > -7) {
484
+ buffer[0] = (uint8_t)(248 + (-value)); // No continuation
485
+ encoder->current_pos += 1;
486
+ return 1;
487
+ } else {
488
+ buffer[0] = 255; // Tag for negative with continuation
489
+ value = -(value + 7); // Store positive magnitude
490
+ }
491
+ // Encode continuation bytes in BER format (7 bits per byte)
492
+ written += encode_varint(value, buffer + 1) + 1 ;
493
+ encoder->current_pos += written;
494
+ return written;
495
+ }
496
+
497
+ static inline int pack_null(tiny_bits_packer *encoder){
498
+ int needed_size = 1;
499
+ uint8_t *buffer = tiny_bits_packer_ensure_capacity(encoder, needed_size);
500
+ if (!buffer) return 0; // Handle error
501
+
502
+ buffer[0] = (uint8_t)TB_NIL_TAG;
503
+ encoder->current_pos += 1;
504
+ return 1;
505
+ }
506
+
507
+ static inline int pack_true(tiny_bits_packer *encoder){
508
+ int needed_size = 1;
509
+ uint8_t *buffer = tiny_bits_packer_ensure_capacity(encoder, needed_size);
510
+ if (!buffer) return 0; // Handle error
511
+
512
+ buffer[0] = (uint8_t)TB_TRU_TAG;
513
+ encoder->current_pos += 1;
514
+ return 1;
515
+ }
516
+
517
+ static inline int pack_false(tiny_bits_packer *encoder){
518
+ int needed_size = 1;
519
+ uint8_t *buffer = tiny_bits_packer_ensure_capacity(encoder, needed_size);
520
+ if (!buffer) return 0; // Handle error
521
+
522
+ buffer[0] = (uint8_t)TB_FLS_TAG;
523
+ encoder->current_pos += 1;
524
+ return 1;
525
+ }
526
+
527
+ static inline int pack_nan(tiny_bits_packer *encoder){
528
+ int needed_size = 1;
529
+ uint8_t *buffer = tiny_bits_packer_ensure_capacity(encoder, needed_size);
530
+ if (!buffer) return 0; // Handle error
531
+
532
+ buffer[0] = (uint8_t)TB_NAN_TAG;
533
+ encoder->current_pos += 1;
534
+ return 1;
535
+ }
536
+
537
+ static inline int pack_infinity(tiny_bits_packer *encoder){
538
+ int needed_size = 1;
539
+ uint8_t *buffer = tiny_bits_packer_ensure_capacity(encoder, needed_size);
540
+ if (!buffer) return 0; // Handle error
541
+
542
+ buffer[0] = (uint8_t)TB_INF_TAG;
543
+ encoder->current_pos += 1;
544
+ return 1;
545
+ }
546
+
547
+ static inline int pack_negative_infinity(tiny_bits_packer *encoder){
548
+ int needed_size = 1;
549
+ uint8_t *buffer = tiny_bits_packer_ensure_capacity(encoder, needed_size);
550
+ if (!buffer) return 0; // Handle error
551
+
552
+ buffer[0] = (uint8_t)TB_NNF_TAG;
553
+ encoder->current_pos += 1;
554
+ return 1;
555
+ }
556
+
557
+ static inline int pack_str(tiny_bits_packer *encoder, char* str, uint32_t str_len) {
558
+ uint32_t id = 0;
559
+ int found = 0;
560
+ int written = 0;
561
+ int needed_size = 0;
562
+ uint8_t *buffer;
563
+ uint32_t hash_code = 0;
564
+ uint32_t hash = 0;
565
+ if ((encoder->features & TB_FEATURE_STRING_DEDUPE) && str_len >= 2 && str_len <= 128) {
566
+ hash_code = fast_hash_32(str, str_len);
567
+ hash = hash_code % TB_HASH_SIZE;
568
+ uint8_t index = encoder->encode_table.bins[hash];
569
+ while (index > 0) {
570
+ HashEntry entry = encoder->encode_table.cache[index - 1];
571
+ if (hash_code == entry.hash
572
+ && str_len == entry.length
573
+ && (str_len <= 4 || (fast_memcmp(str, encoder->buffer + entry.offset, str_len) == 0) )) {
574
+ id = index - 1;
575
+ found = 1;
576
+ break;
577
+ }
578
+ index = entry.next_index;
579
+ }
580
+ }
581
+
582
+ if (found) {
583
+ // Encode existing string ID
584
+ if (id < 31) {
585
+ needed_size = 1;
586
+ } else {
587
+ needed_size = 1 + varint_size(id - 31);
588
+ }
589
+ buffer = tiny_bits_packer_ensure_capacity(encoder, needed_size);
590
+ if (!buffer) return 0;
591
+
592
+ if (id < TB_REF_LEN) {
593
+ buffer[0] = TB_REF_TAG | id;
594
+ written = 1;
595
+ } else {
596
+ buffer[0] = TB_REF_TAG | TB_REF_LEN;
597
+ written = 1;
598
+ written += encode_varint(id - TB_REF_LEN, buffer + written);
599
+ }
600
+ } else {
601
+ needed_size = 10 + str_len;
602
+ buffer = tiny_bits_packer_ensure_capacity(encoder, needed_size);
603
+ if (!buffer) return 0;
604
+
605
+ if (str_len < TB_STR_LEN) {
606
+ buffer[0] = TB_STR_TAG | str_len;
607
+ written = 1;
608
+ fast_memcpy(buffer + written, str, str_len);
609
+ written += str_len;
610
+ } else {
611
+ buffer[0] = TB_STR_TAG | TB_STR_LEN;
612
+ written = 1;
613
+ written += encode_varint(str_len - TB_STR_LEN, buffer + written);
614
+ memcpy(buffer + written, str, str_len);
615
+ written += str_len;
616
+ }
617
+
618
+ if ((encoder->features & TB_FEATURE_STRING_DEDUPE)
619
+ && encoder->encode_table.cache_pos < TB_HASH_CACHE_SIZE
620
+ && str_len >= 2 && str_len <= 128){
621
+ HashEntry* new_entry = &encoder->encode_table.cache[encoder->encode_table.cache_pos++];
622
+ new_entry->hash = hash_code;
623
+ new_entry->length = str_len;
624
+ new_entry->offset = encoder->current_pos + written - str_len;
625
+ new_entry->next_index = encoder->encode_table.bins[hash];
626
+ encoder->encode_table.bins[hash] = encoder->encode_table.cache_pos;
627
+ }
628
+
629
+ }
630
+
631
+ encoder->current_pos += written;
632
+ return written;
633
+ }
634
+
635
+ static inline int pack_double(tiny_bits_packer *encoder, double val) {
636
+ int written = 0;
637
+ uint8_t *buffer = tiny_bits_packer_ensure_capacity(encoder, 10);
638
+ if (!buffer) return 0;
639
+ // scaled varint encoding
640
+ if (encoder->features & TB_FEATURE_COMPRESS_FLOATS) {
641
+ double abs_val = fabs(val); ///val >= 0 ? val : -val;
642
+ double scaled; //= abs_val;
643
+ int multiplies = decimal_places_count(abs_val, &scaled);
644
+ if(multiplies >= 0){
645
+ uint64_t integer = (uint64_t)scaled;
646
+ if(integer < (1ULL << 48)) {
647
+ if (!buffer) return 0;
648
+ if(val >= 0){
649
+ buffer[0] = TB_PFP_TAG | (multiplies);
650
+ } else {
651
+ buffer[0] = TB_NFP_TAG | (multiplies);
652
+ }
653
+ written++;
654
+ written += encode_varint(integer, buffer + written);
655
+ encoder->current_pos += written;
656
+ return written;
657
+ }
658
+ }
659
+
660
+ }
661
+ // Fallback to raw double
662
+ buffer[0] = TB_F64_TAG;
663
+ written++;
664
+ encode_uint64(dtoi_bits(val), buffer + written);
665
+ written += 8;
666
+ encoder->current_pos += written;
667
+ return written;
668
+ }
669
+
670
+ static inline int pack_blob(tiny_bits_packer *encoder, const char* blob, int blob_size){
671
+ int written = 0;
672
+ int needed_size;
673
+ uint8_t *buffer;
674
+
675
+ needed_size = 1 + varint_size((uint64_t)blob_size) + blob_size;
676
+ buffer = tiny_bits_packer_ensure_capacity(encoder, needed_size);
677
+ if (!buffer) return 0; // Handle error
678
+
679
+ buffer[0] = (uint8_t)TB_BLB_TAG;
680
+ written++;
681
+ written += encode_varint((uint64_t)blob_size, buffer + written);
682
+ memcpy(buffer + written, blob, blob_size);
683
+ written += blob_size;
684
+ encoder->current_pos += written;
685
+ return written;
686
+ }
687
+
688
+ /* End packer.h */
689
+
690
+ /* Begin unpacker.h */
691
+
692
+
693
+
694
+ // Decoder return types
695
+ enum tiny_bits_type {
696
+ TINY_BITS_ARRAY, // length: number of elements
697
+ TINY_BITS_MAP, // length: number of key-value pairs
698
+ TINY_BITS_INT, // int_val: integer value
699
+ TINY_BITS_DOUBLE, // double_val: double value
700
+ TINY_BITS_STR, // length: byte length of string
701
+ TINY_BITS_BLOB, // length: byte length of blob
702
+ TINY_BITS_TRUE, // No value
703
+ TINY_BITS_FALSE, // No value
704
+ TINY_BITS_NULL, // No value
705
+ TINY_BITS_NAN, // No value
706
+ TINY_BITS_INF, // No value
707
+ TINY_BITS_N_INF, // No value
708
+ TINY_BITS_EXT, // No value
709
+ TINY_BITS_FINISHED, // End of buffer
710
+ TINY_BITS_ERROR // Parsing error
711
+ };
712
+
713
+ typedef union tiny_bits_value {
714
+ int64_t int_val; // TINY_BITS_INT
715
+ double double_val; // TINY_BITS_DOUBLE
716
+ size_t length; // TINY_BITS_ARRAY, TINY_BITS_MAP,
717
+ struct { // TINY_BITS_STR, TINY_BITS_BLOB
718
+ const char *data;
719
+ size_t length;
720
+ int32_t id;
721
+ } str_blob_val;
722
+ } tiny_bits_value;
723
+
724
+ typedef struct tiny_bits_unpacker {
725
+ const unsigned char *buffer; // Input buffer (read-only)
726
+ size_t size; // Total size of buffer
727
+ size_t current_pos; // Current read position
728
+ struct {
729
+ char *str; // Pointer to decompressed string data (owned by strings array)
730
+ size_t length; // Length of string
731
+ } *strings; // Array of decoded strings
732
+ size_t strings_size; // Capacity of strings array
733
+ size_t strings_count; // Number of strings stored
734
+ HashTable dictionary;
735
+ } tiny_bits_unpacker;
736
+
737
+ tiny_bits_unpacker *tiny_bits_unpacker_create(void) {
738
+
739
+ tiny_bits_unpacker *decoder = (tiny_bits_unpacker *)malloc(sizeof(tiny_bits_unpacker));
740
+ if (!decoder) return NULL;
741
+ // String array setup
742
+ decoder->strings_size = 8; // Initial capacity
743
+ decoder->strings = (void *)malloc(decoder->strings_size * sizeof(*decoder->strings));
744
+ if (!decoder->strings) {
745
+ free(decoder);
746
+ return NULL;
747
+ }
748
+ decoder->strings_count = 0;
749
+ return decoder;
750
+ }
751
+
752
+ void tiny_bits_unpacker_set_buffer(tiny_bits_unpacker *decoder, const unsigned char *buffer, size_t size) {
753
+ if (!decoder) return;
754
+ if (!buffer || size < 1) return;
755
+ decoder->buffer = buffer;
756
+ decoder->size = size;
757
+ decoder->current_pos = 0;
758
+ decoder->strings_count = 0;
759
+ }
760
+
761
+ static inline void tiny_bits_unpacker_reset(tiny_bits_unpacker *decoder) {
762
+ if (!decoder) return;
763
+ decoder->current_pos = 0;
764
+ decoder->strings_count = 0;
765
+ }
766
+
767
+ void tiny_bits_unpacker_destroy(tiny_bits_unpacker *decoder) {
768
+ if (!decoder) return;
769
+ if (decoder->strings) {
770
+ free(decoder->strings);
771
+ }
772
+ free(decoder);
773
+ }
774
+
775
+ static inline enum tiny_bits_type _unpack_int(tiny_bits_unpacker *decoder, uint8_t tag, tiny_bits_value *value){
776
+ size_t pos = decoder->current_pos;
777
+ if (tag < 248) { // Small positive (128-247)
778
+ value->int_val = tag - 128;
779
+ return TINY_BITS_INT;
780
+ } else if (tag == 248) { // Positive with continuation
781
+ uint64_t val = decode_varint(decoder->buffer, decoder->size, &pos);
782
+ value->int_val = val + 120;
783
+ decoder->current_pos = pos;
784
+ return TINY_BITS_INT;
785
+ } else if (tag > 248 && tag < 255) { // Small negative (248-254)
786
+ value->int_val = -(tag - 248);
787
+ return TINY_BITS_INT;
788
+ } else { // 255: Negative with continuation
789
+ uint64_t val = decode_varint(decoder->buffer, decoder->size, &pos);
790
+ value->int_val = -(val + 7);
791
+ decoder->current_pos = pos;
792
+ return TINY_BITS_INT;
793
+ }
794
+ }
795
+
796
+ static inline enum tiny_bits_type _unpack_arr(tiny_bits_unpacker *decoder, uint8_t tag, tiny_bits_value *value){
797
+ size_t pos = decoder->current_pos;
798
+ if (tag < 0b00001111) { // Small array (0-30)
799
+ value->length = tag & 0b00000111;
800
+ } else { // Large array
801
+ value->length = decode_varint(decoder->buffer, decoder->size, &pos) + 7;
802
+ decoder->current_pos = pos;
803
+ }
804
+ return TINY_BITS_ARRAY;
805
+ }
806
+
807
+ static inline enum tiny_bits_type _unpack_map(tiny_bits_unpacker *decoder, uint8_t tag, tiny_bits_value *value){
808
+ size_t pos = decoder->current_pos;
809
+ if (tag < 0x1F) { // Small map (0-14)
810
+ value->length = tag & 0x0F;
811
+ } else { // Large map
812
+ value->length = decode_varint(decoder->buffer, decoder->size, &pos) + 15;
813
+ decoder->current_pos = pos;
814
+ }
815
+ return TINY_BITS_MAP;
816
+ }
817
+
818
+ static inline enum tiny_bits_type _unpack_double(tiny_bits_unpacker *decoder, uint8_t tag, tiny_bits_value *value){
819
+ size_t pos = decoder->current_pos;
820
+ if (tag == TB_F64_TAG) { // Raw double
821
+ uint64_t number = decode_uint64(decoder->buffer + pos);
822
+ value->double_val = itod_bits(number);
823
+ decoder->current_pos += 8;
824
+ } else { // Compressed double
825
+ uint64_t number = decode_varint(decoder->buffer, decoder->size, &pos);
826
+ int order = (tag & 0x0F);
827
+ double fractional = (double)number / powers[order];
828
+ //fractional /= powers[order];
829
+ if(tag & 0x10) fractional = -fractional;
830
+ value->double_val = fractional;
831
+ decoder->current_pos = pos;
832
+ }
833
+ return TINY_BITS_DOUBLE;
834
+ }
835
+
836
+ static inline enum tiny_bits_type _unpack_blob(tiny_bits_unpacker *decoder, uint8_t tag, tiny_bits_value *value){
837
+ size_t pos = decoder->current_pos;
838
+ size_t len = decode_varint(decoder->buffer, decoder->size, &pos);
839
+ value->str_blob_val.data = (const char *)decoder->buffer + pos;
840
+ value->str_blob_val.length = len;
841
+ decoder->current_pos = pos + len;
842
+ return TINY_BITS_BLOB;
843
+ }
844
+
845
+ static inline enum tiny_bits_type _unpack_str(tiny_bits_unpacker *decoder, uint8_t tag, tiny_bits_value *value){
846
+ size_t pos = decoder->current_pos;
847
+ size_t len;
848
+ if (tag < 0x5F) { // Small string (0-30)
849
+ len = tag & 0x1F;
850
+ value->str_blob_val.data = (const char *)decoder->buffer + pos;
851
+ value->str_blob_val.length = len;
852
+ decoder->current_pos = pos + len;
853
+ } else if (tag == 0x5F) { // Large string
854
+ len = decode_varint(decoder->buffer, decoder->size, &pos) + 31;
855
+ value->str_blob_val.data = (const char *)decoder->buffer + pos;
856
+ value->str_blob_val.length = len;
857
+ decoder->current_pos = pos + len;
858
+ } else { // Deduplicated (small: < 0x7F, large: 0x7F)
859
+ uint32_t id = (tag < 0x7F) ? (tag & 0x1F) : decode_varint(decoder->buffer, decoder->size, &pos) + 31;
860
+ if (id >= decoder->strings_count) return TINY_BITS_ERROR;
861
+ len = decoder->strings[id].length;
862
+ value->str_blob_val.data = decoder->strings[id].str;
863
+ value->str_blob_val.length = len;
864
+ value->str_blob_val.id = id+1;
865
+ decoder->current_pos = pos; // Update pos after varint
866
+ return TINY_BITS_STR;
867
+ }
868
+ value->str_blob_val.id = 0;
869
+ // Handle new string (not deduplicated)
870
+ if(decoder->strings_count < TB_HASH_CACHE_SIZE){
871
+ if (decoder->strings_count >= decoder->strings_size) {
872
+ size_t new_size = decoder->strings_size * 2;
873
+ void *new_strings = realloc(decoder->strings, new_size * sizeof(*decoder->strings));
874
+ if (!new_strings) return TINY_BITS_ERROR;
875
+ decoder->strings = new_strings;
876
+ decoder->strings_size = new_size;
877
+ }
878
+
879
+ decoder->strings[decoder->strings_count].str = (char *)decoder->buffer + pos;
880
+ decoder->strings[decoder->strings_count].length = len;
881
+ decoder->strings_count++;
882
+ value->str_blob_val.id = -1 * decoder->strings_count;
883
+ }
884
+ return TINY_BITS_STR;
885
+ }
886
+
887
+ static inline enum tiny_bits_type unpack_value(tiny_bits_unpacker *decoder, tiny_bits_value *value) {
888
+ if (!decoder || !value || decoder->current_pos >= decoder->size) {
889
+ return (decoder && decoder->current_pos >= decoder->size) ? TINY_BITS_FINISHED : TINY_BITS_ERROR;
890
+ }
891
+
892
+ uint8_t tag = decoder->buffer[decoder->current_pos++];
893
+ //printf("found tag %X\n", tag);
894
+ // Dispatch based on tag
895
+ if ((tag & TB_INT_TAG) == TB_INT_TAG) { // Integers
896
+ return _unpack_int(decoder, tag, value);
897
+ } else if ((tag & TB_STR_TAG) == TB_STR_TAG) { // Strings
898
+ return _unpack_str(decoder, tag, value);
899
+ } else if (tag == TB_NIL_TAG) {
900
+ return TINY_BITS_NULL;
901
+ } else if (tag == TB_NAN_TAG) {
902
+ return TINY_BITS_NAN;
903
+ } else if (tag == TB_INF_TAG) {
904
+ return TINY_BITS_INF;
905
+ } else if (tag == TB_NNF_TAG) {
906
+ return TINY_BITS_N_INF;
907
+ } else if ((tag & TB_DBL_TAG) == TB_DBL_TAG) { // Doubles
908
+ return _unpack_double(decoder, tag, value);
909
+ } else if ((tag & TB_MAP_TAG) == TB_MAP_TAG) { // Maps
910
+ return _unpack_map(decoder, tag, value);
911
+ } else if ((tag & TB_ARR_TAG) == TB_ARR_TAG) { // Arrays
912
+ return _unpack_arr(decoder, tag, value);
913
+ } else if (tag == TB_BLB_TAG) { // Blob
914
+ return _unpack_blob(decoder, tag, value);
915
+ } else if (tag == TB_TRU_TAG) {
916
+ return TINY_BITS_TRUE;
917
+ } else if (tag == TB_FLS_TAG) {
918
+ return TINY_BITS_FALSE;
919
+ }
920
+ //printf("UNKOWN TAG\n");
921
+ return TINY_BITS_ERROR; // Unknown tag
922
+ }
923
+
924
+
925
+ /* End unpacker.h */
926
+
927
+ #endif /* TINY_BIS_H */
@@ -0,0 +1,315 @@
1
+ #include <ruby.h>
2
+ #include <ruby/encoding.h>
3
+
4
+ #include "tinybits.h"
5
+
6
+ // Ruby module and classes
7
+ VALUE rb_mTinyBits;
8
+ VALUE rb_cPacker;
9
+ VALUE rb_cUnpacker;
10
+
11
+ // Forward declarations
12
+ static VALUE rb_packer_alloc(VALUE klass);
13
+ static VALUE rb_packer_init(VALUE self);
14
+ static VALUE rb_unpacker_alloc(VALUE klass);
15
+ static VALUE rb_unpacker_init(VALUE self);
16
+ static VALUE rb_pack(VALUE self, VALUE obj);
17
+ static VALUE rb_unpack(VALUE self, VALUE buffer);
18
+
19
+ // Structure to pass data and error status through rb_hash_foreach
20
+ typedef struct {
21
+ tiny_bits_packer* packer;
22
+ int error_occurred; // 0 for no error, 1 if callback encountered an error
23
+ } HashIterContext;
24
+
25
+ // Forward declaration for the recursive function
26
+ static int pack_ruby_object_recursive(tiny_bits_packer* packer, VALUE obj, VALUE context);
27
+
28
+ // Callback function for rb_hash_foreach
29
+ // It receives key, value, and the packer pointer (passed as user_data)
30
+ inline static int hash_foreach_callback(VALUE key, VALUE value, VALUE user_data) {
31
+ // Cast user_data back to our context struct pointer
32
+ HashIterContext* context = (HashIterContext*)user_data;
33
+
34
+ // Pack the key using the packer from the context
35
+ if (!pack_ruby_object_recursive(context->packer, key, user_data)) {
36
+ context->error_occurred = 1; // Signal error occurred
37
+ return ST_STOP; // Stop iteration
38
+ }
39
+ // Pack the value using the packer from the context
40
+ if (!pack_ruby_object_recursive(context->packer, value, user_data)) {
41
+ context->error_occurred = 1; // Signal error occurred
42
+ return ST_STOP; // Stop iteration
43
+ }
44
+
45
+ // If both succeeded, continue
46
+ return ST_CONTINUE;
47
+ }
48
+
49
+ // Packer structure
50
+ typedef struct {
51
+ tiny_bits_packer* packer;
52
+ } PackerData;
53
+
54
+ static void packer_free(void* data) {
55
+ PackerData* packer_data = (PackerData*)data;
56
+ if (packer_data->packer) {
57
+ tiny_bits_packer_destroy(packer_data->packer);
58
+ }
59
+ free(packer_data);
60
+ }
61
+
62
+ static size_t packer_memsize(const void* data) {
63
+ return sizeof(PackerData);
64
+ }
65
+
66
+ static const rb_data_type_t packer_data_type = {
67
+ "TinyBits::Packer",
68
+ {0, packer_free, packer_memsize,},
69
+ 0, 0, RUBY_TYPED_FREE_IMMEDIATELY
70
+ };
71
+
72
+ static VALUE rb_packer_alloc(VALUE klass) {
73
+ PackerData* packer_data = ALLOC(PackerData);
74
+ packer_data->packer = NULL;
75
+ return TypedData_Wrap_Struct(klass, &packer_data_type, packer_data);
76
+ }
77
+
78
+ static VALUE rb_packer_init(VALUE self) {
79
+ PackerData* packer_data;
80
+ TypedData_Get_Struct(self, PackerData, &packer_data_type, packer_data);
81
+
82
+ packer_data->packer = tiny_bits_packer_create(256, (TB_FEATURE_STRING_DEDUPE | TB_FEATURE_COMPRESS_FLOATS) | 0); // Initial capacity and features
83
+ if (!packer_data->packer) {
84
+ rb_raise(rb_eRuntimeError, "Failed to initialize packer");
85
+ }
86
+ return self;
87
+ }
88
+
89
+ // Optimized recursive packing function
90
+ static inline int pack_ruby_object_recursive(tiny_bits_packer* packer, VALUE obj, VALUE context) {
91
+ switch (TYPE(obj)) {
92
+ case T_STRING: {
93
+ return pack_str(packer, RSTRING_PTR(obj), RSTRING_LEN(obj));
94
+ }
95
+ case T_HASH: {
96
+ long len = RHASH_SIZE(obj);
97
+ int written = pack_map(packer, len);
98
+ if (written <= 0) return 0; // Error check based on tiny_bits API
99
+ rb_hash_foreach(obj, hash_foreach_callback, context);
100
+ return (((HashIterContext *)context)->error_occurred == 0);
101
+ }
102
+ case T_ARRAY: {
103
+ long len = RARRAY_LEN(obj);
104
+ int written = pack_arr(packer, len);
105
+ if (written <= 0) return 0; // Error check based on tiny_bits API
106
+ for (long i = 0; i < len; i++) {
107
+ if(!pack_ruby_object_recursive(packer, rb_ary_entry(obj, i), context)) return 0; // Propagate error
108
+ }
109
+ return 1; // Success
110
+ }
111
+ case T_FIXNUM: {
112
+ int64_t val = NUM2LONG(obj); // Assumes fits in int64_t
113
+ return pack_int(packer, val);
114
+ }
115
+ case T_FLOAT: {
116
+ double val = NUM2DBL(obj);
117
+ return pack_double(packer, val);
118
+ }
119
+ case T_NIL:
120
+ return pack_null(packer);
121
+ case T_TRUE:
122
+ return pack_true(packer);
123
+ case T_FALSE:
124
+ return pack_false(packer);
125
+ case T_SYMBOL: {
126
+ VALUE str = rb_sym2str(obj);
127
+ return pack_str(packer, RSTRING_PTR(str), RSTRING_LEN(str));
128
+ }
129
+ default:
130
+ rb_warn("Unsupported type encountered during packing: %s", rb_obj_classname(obj));
131
+ return 0;
132
+ }
133
+ }
134
+
135
+ // keeps the public API the same.
136
+ static VALUE rb_pack(VALUE self, VALUE obj) {
137
+ PackerData* packer_data;
138
+ TypedData_Get_Struct(self, PackerData, &packer_data_type, packer_data);
139
+
140
+
141
+ if (!packer_data->packer) {
142
+ rb_raise(rb_eRuntimeError, "Packer not initialized");
143
+ }
144
+
145
+ // Reset before packing (assuming this is efficient)
146
+ tiny_bits_packer_reset(packer_data->packer);
147
+
148
+ HashIterContext context;
149
+ context.packer = packer_data->packer; // Pass the current packer
150
+ context.error_occurred = 0; // Initialize error flag
151
+
152
+ // Call the optimized recursive function
153
+ if (!pack_ruby_object_recursive(packer_data->packer, obj, (VALUE)&context)) {
154
+ // Error occurred during packing (might be unsupported type or tiny_bits error)
155
+ rb_raise(rb_eRuntimeError, "Failed to pack object (unsupported type or packing error)");
156
+ }
157
+
158
+ VALUE result = rb_str_new((const char*)packer_data->packer->buffer, packer_data->packer->current_pos);
159
+ rb_obj_freeze(result);
160
+ return result;
161
+ }
162
+
163
+
164
+ // Unpacker structure
165
+ typedef struct {
166
+ tiny_bits_unpacker* unpacker;
167
+ size_t strings_index;
168
+ VALUE ruby_strings[TB_HASH_CACHE_SIZE];
169
+ } UnpackerData;
170
+
171
+ static void unpacker_free(void* data) {
172
+ UnpackerData* unpacker_data = (UnpackerData*)data;
173
+ if (unpacker_data->unpacker) {
174
+ tiny_bits_unpacker_destroy(unpacker_data->unpacker);
175
+ }
176
+ for (size_t i = 0; i < TB_HASH_CACHE_SIZE; i++) {
177
+ unpacker_data->ruby_strings[i] = Qnil;
178
+ }
179
+ free(unpacker_data);
180
+ }
181
+
182
+ static size_t unpacker_memsize(const void* data) {
183
+ return sizeof(UnpackerData);
184
+ }
185
+
186
+ static const rb_data_type_t unpacker_data_type = {
187
+ "TinyBits::Unpacker",
188
+ {0, unpacker_free, unpacker_memsize,},
189
+ 0, 0, RUBY_TYPED_FREE_IMMEDIATELY
190
+ };
191
+
192
+ static VALUE rb_unpacker_alloc(VALUE klass) {
193
+ UnpackerData* unpacker_data = ALLOC(UnpackerData);
194
+ unpacker_data->unpacker = NULL;
195
+ return TypedData_Wrap_Struct(klass, &unpacker_data_type, unpacker_data);
196
+ }
197
+
198
+ static VALUE rb_unpacker_init(VALUE self) {
199
+ UnpackerData* unpacker_data;
200
+ TypedData_Get_Struct(self, UnpackerData, &unpacker_data_type, unpacker_data);
201
+
202
+ unpacker_data->unpacker = tiny_bits_unpacker_create();
203
+ if (!unpacker_data->unpacker) {
204
+ rb_raise(rb_eRuntimeError, "Failed to initialize unpacker");
205
+ }
206
+ unpacker_data->strings_index = 0;
207
+ return self;
208
+ }
209
+
210
+ static inline VALUE rb_unpack_str(UnpackerData* unpacker_data, tiny_bits_value value, size_t interned){
211
+ int32_t id = value.str_blob_val.id;
212
+ if(id > 0)
213
+ return unpacker_data->ruby_strings[id-1];
214
+ else if(id <= 0){
215
+ VALUE str;
216
+ if(interned > 0){
217
+ str = rb_enc_interned_str(value.str_blob_val.data, value.str_blob_val.length, rb_utf8_encoding());
218
+ } else {
219
+ str = rb_utf8_str_new(value.str_blob_val.data, value.str_blob_val.length);
220
+ rb_obj_freeze(str);
221
+ }
222
+ if(id < 0){
223
+ unpacker_data->ruby_strings[abs(id)-1] = str;
224
+ }
225
+ return str;
226
+ }
227
+ return Qundef;
228
+ }
229
+
230
+ static VALUE unpack_ruby_object(UnpackerData* unpacker_data, size_t interned) {
231
+ tiny_bits_unpacker* unpacker = unpacker_data->unpacker;
232
+ tiny_bits_value value;
233
+ enum tiny_bits_type type = unpack_value(unpacker, &value);
234
+
235
+ if (type == TINY_BITS_ERROR) {
236
+ return Qundef; // Use Qundef as a sentinel for error (not nil)
237
+ }
238
+
239
+ switch (type) {
240
+ case TINY_BITS_STR: {
241
+ return rb_unpack_str(unpacker_data, value, interned);
242
+ }
243
+ case TINY_BITS_DOUBLE:
244
+ return DBL2NUM(value.double_val);
245
+ case TINY_BITS_INT:
246
+ return LONG2NUM(value.int_val);
247
+ case TINY_BITS_NULL:
248
+ return Qnil;
249
+ case TINY_BITS_TRUE:
250
+ return Qtrue;
251
+ case TINY_BITS_FALSE:
252
+ return Qfalse;
253
+ case TINY_BITS_ARRAY: {
254
+ VALUE arr = rb_ary_new_capa(value.length);
255
+ for (size_t i = 0; i < value.length; i++) {
256
+ VALUE element = unpack_ruby_object(unpacker_data, 0);
257
+ if (element == Qundef) return Qundef; // Error
258
+ rb_ary_push(arr, element);
259
+ }
260
+ return arr;
261
+ }
262
+ case TINY_BITS_MAP: {
263
+ VALUE hash = rb_hash_new_capa(value.length);
264
+ for (size_t i = 0; i < value.length; i++) {
265
+ VALUE key = unpack_ruby_object(unpacker_data, 1);
266
+ if (key == Qundef) return Qundef; // Error
267
+ VALUE val = unpack_ruby_object(unpacker_data, 0);
268
+ if (val == Qundef) return Qundef; // Error
269
+ rb_hash_aset(hash, key, val);
270
+ }
271
+ return hash;
272
+ }
273
+ case TINY_BITS_BLOB:
274
+ // For simplicity, treat blobs as strings (similar to strings)
275
+ VALUE blob = rb_str_new(value.str_blob_val.data, value.str_blob_val.length);
276
+ rb_obj_freeze(blob);
277
+ return blob;
278
+ default:
279
+ return Qundef; // Error
280
+ }
281
+ }
282
+
283
+ static VALUE rb_unpack(VALUE self, VALUE buffer) {
284
+ UnpackerData* unpacker_data;
285
+ TypedData_Get_Struct(self, UnpackerData, &unpacker_data_type, unpacker_data);
286
+
287
+ if (!unpacker_data->unpacker) {
288
+ rb_raise(rb_eRuntimeError, "Unpacker not initialized");
289
+ }
290
+
291
+ StringValue(buffer); // Ensure it's a string
292
+
293
+ tiny_bits_unpacker_set_buffer(unpacker_data->unpacker, (const unsigned char*)RSTRING_PTR(buffer), RSTRING_LEN(buffer));
294
+
295
+ VALUE result = unpack_ruby_object(unpacker_data, 0);
296
+ if (result == Qundef) {
297
+ rb_raise(rb_eRuntimeError, "Failed to unpack data");
298
+ }
299
+
300
+ return result;
301
+ }
302
+
303
+ void Init_tinybits_ext(void) {
304
+ rb_mTinyBits = rb_define_module("TinyBits");
305
+ rb_cPacker = rb_define_class_under(rb_mTinyBits, "Packer", rb_cObject);
306
+ rb_cUnpacker = rb_define_class_under(rb_mTinyBits, "Unpacker", rb_cObject);
307
+
308
+ rb_define_alloc_func(rb_cPacker, rb_packer_alloc);
309
+ rb_define_method(rb_cPacker, "initialize", rb_packer_init, 0);
310
+ rb_define_method(rb_cPacker, "pack", rb_pack, 1);
311
+
312
+ rb_define_alloc_func(rb_cUnpacker, rb_unpacker_alloc);
313
+ rb_define_method(rb_cUnpacker, "initialize", rb_unpacker_init, 0);
314
+ rb_define_method(rb_cUnpacker, "unpack", rb_unpack, 1);
315
+ }
@@ -0,0 +1,3 @@
1
+ module TinyBits
2
+ VERSION = '0.1.0'.freeze
3
+ end
data/lib/tinybits.rb ADDED
@@ -0,0 +1,4 @@
1
+ require_relative './tinybits/version'
2
+ require 'tinybits_ext'
3
+
4
+
metadata ADDED
@@ -0,0 +1,48 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: tinybits
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Mohamed Hassan
8
+ bindir: bin
9
+ cert_chain: []
10
+ date: 2025-04-20 00:00:00.000000000 Z
11
+ dependencies: []
12
+ description: TinyBits is a Ruby gem that wraps the TinyBits C serializartion library,
13
+ offering Rubyists the power of serializion with intger/float compression and string
14
+ deduplication!
15
+ email: oldmoe@gamil.com
16
+ executables: []
17
+ extensions:
18
+ - ext/tinybits/extconf.rb
19
+ extra_rdoc_files: []
20
+ files:
21
+ - ext/tinybits/extconf.rb
22
+ - ext/tinybits/tinybits.h
23
+ - ext/tinybits/tinybits_ext.c
24
+ - lib/tinybits.rb
25
+ - lib/tinybits/version.rb
26
+ homepage: https://github.com/oldmoe/tinybits-rb
27
+ licenses:
28
+ - MIT
29
+ metadata:
30
+ source_code_uri: https://github.com/oldmoe/tinybits-rb
31
+ rdoc_options: []
32
+ require_paths:
33
+ - lib
34
+ required_ruby_version: !ruby/object:Gem::Requirement
35
+ requirements:
36
+ - - ">="
37
+ - !ruby/object:Gem::Version
38
+ version: 3.0.0
39
+ required_rubygems_version: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
44
+ requirements: []
45
+ rubygems_version: 3.6.0.dev
46
+ specification_version: 4
47
+ summary: Very fast and compact serialization for Ruby!
48
+ test_files: []