tinybits 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/ext/tinybits/extconf.rb +5 -0
- data/ext/tinybits/tinybits.h +927 -0
- data/ext/tinybits/tinybits_ext.c +315 -0
- data/lib/tinybits/version.rb +3 -0
- data/lib/tinybits.rb +4 -0
- metadata +48 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: a4826141cb2aae8ecab56241a3b5cec14a7c40911cabd7aae971bcbde3a8248c
|
4
|
+
data.tar.gz: a87cd61d1aa59343b3c81e8dfd38554cbf590e78b18b3866d98e16f3f4a86cb5
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: d67489ba3c2e4bdeb4ba7a458d4c70a61e766f10dc77282190e82e8f2dcef46b8f8c5d4de4ee551711229d42366b84d45ad08666abf49e24550adb5d99a94787
|
7
|
+
data.tar.gz: 592ed8e9367d428a3bc76994c8bcb91ef77793fd0c3f12beed21e0f08b6d1b5410b04408ad0e391eddc55b1beefd836e1b96e8ff0d64b160d3e035df96cf7425
|
@@ -0,0 +1,927 @@
|
|
1
|
+
/**
|
2
|
+
* TinyBits Amalgamated Header
|
3
|
+
* Generated on: Sat Apr 19 07:05:26 PM CEST 2025
|
4
|
+
*/
|
5
|
+
|
6
|
+
#ifndef TINY_BITS_H
|
7
|
+
#define TINY_BITS_H
|
8
|
+
|
9
|
+
/* Begin common.h */
|
10
|
+
|
11
|
+
|
12
|
+
#include <stdint.h>
|
13
|
+
#include <stdlib.h>
|
14
|
+
#include <string.h>
|
15
|
+
#include <stddef.h> // for size_t
|
16
|
+
#include <math.h>
|
17
|
+
|
18
|
+
|
19
|
+
#define TB_HASH_SIZE 128
|
20
|
+
#define TB_HASH_CACHE_SIZE 256
|
21
|
+
#define MAX_BYTES 9
|
22
|
+
#define TB_DDP_STR_LEN_MAX 128
|
23
|
+
|
24
|
+
#define TB_INT_TAG 0x80
|
25
|
+
#define TB_STR_TAG 0x40
|
26
|
+
#define TB_STR_LEN 0x1F
|
27
|
+
#define TB_REF_TAG 0x60
|
28
|
+
#define TB_REF_LEN 0x1F
|
29
|
+
#define TB_DBL_TAG 0x20
|
30
|
+
#define TB_PFP_TAG 0x20
|
31
|
+
#define TB_NFP_TAG 0x30
|
32
|
+
#define TB_NAN_TAG 0x2D
|
33
|
+
#define TB_INF_TAG 0x3D
|
34
|
+
#define TB_NNF_TAG 0x2E
|
35
|
+
#define TB_F16_TAG 0x3E
|
36
|
+
#define TB_F32_TAG 0x2F
|
37
|
+
#define TB_F64_TAG 0x3F
|
38
|
+
#define TB_MAP_TAG 0x10
|
39
|
+
#define TB_MAP_LEN 0x0F
|
40
|
+
#define TB_ARR_TAG 0x08
|
41
|
+
#define TB_ARR_LEN 0x07
|
42
|
+
#define TB_SEP_TAG 0x05
|
43
|
+
#define TB_EXT_TAG 0x04
|
44
|
+
#define TB_BLB_TAG 0x03
|
45
|
+
#define TB_NIL_TAG 0x02
|
46
|
+
#define TB_TRU_TAG 0x01
|
47
|
+
#define TB_FLS_TAG 0x00
|
48
|
+
|
49
|
+
// Feature flags (from encoder)
|
50
|
+
#define TB_FEATURE_STRING_DEDUPE 0x01
|
51
|
+
#define TB_FEATURE_COMPRESS_FLOATS 0x02
|
52
|
+
|
53
|
+
static double powers[] = {
|
54
|
+
1.0,
|
55
|
+
10.0,
|
56
|
+
100.0,
|
57
|
+
1000.0,
|
58
|
+
10000.0,
|
59
|
+
100000.0,
|
60
|
+
1000000.0,
|
61
|
+
10000000.0,
|
62
|
+
100000000.0,
|
63
|
+
1000000000.0,
|
64
|
+
10000000000.0,
|
65
|
+
100000000000.0,
|
66
|
+
1000000000000.0
|
67
|
+
};
|
68
|
+
|
69
|
+
typedef struct HashEntry {
|
70
|
+
uint32_t hash; // 32-bit hash from fast_hash_32
|
71
|
+
uint32_t length;
|
72
|
+
uint32_t offset;
|
73
|
+
uint32_t next_index;
|
74
|
+
} HashEntry;
|
75
|
+
|
76
|
+
typedef struct HashTable {
|
77
|
+
HashEntry* cache; // HASH_SIZE is 2048, use directly or define HASH_SIZE in header
|
78
|
+
uint32_t next_id;
|
79
|
+
uint32_t cache_size;
|
80
|
+
uint32_t cache_pos;
|
81
|
+
uint8_t bins[TB_HASH_SIZE];
|
82
|
+
} HashTable;
|
83
|
+
|
84
|
+
static inline uint32_t fast_hash_32(const char* str, uint16_t len) {
|
85
|
+
uint32_t hash = len;
|
86
|
+
hash = (hash << 16) | (((unsigned char)str[0] << 8) | (unsigned char)str[1]);
|
87
|
+
hash ^= (((unsigned char)str[len-2] << 8) | (unsigned char)str[len-1]);
|
88
|
+
return hash;
|
89
|
+
}
|
90
|
+
|
91
|
+
static inline int encode_varint(uint64_t value, uint8_t* buffer) {
|
92
|
+
if (value <= 240) {
|
93
|
+
buffer[0] = (uint8_t)value; // 1 byte
|
94
|
+
return 1;
|
95
|
+
} else if (value < 2288) { // 241 to 248
|
96
|
+
value -= 240;
|
97
|
+
int prefix = 241 + (value / 256);
|
98
|
+
buffer[0] = (uint8_t)prefix; // A0
|
99
|
+
buffer[1] = (uint8_t)(value % 256); // A1
|
100
|
+
return 2;
|
101
|
+
} else if (value <= 67823) { // Up to 249
|
102
|
+
value -= 2288;
|
103
|
+
buffer[0] = 249; // A0
|
104
|
+
buffer[1] = (uint8_t)(value / 256); // A1
|
105
|
+
buffer[2] = (uint8_t)(value % 256); // A2
|
106
|
+
return 3;
|
107
|
+
} else if (value < (1ULL << 24)) { // 250: 3-byte big-endian
|
108
|
+
buffer[0] = 250; // A0
|
109
|
+
buffer[1] = (uint8_t)(value >> 16); // A1 (most significant)
|
110
|
+
buffer[2] = (uint8_t)(value >> 8); // A2
|
111
|
+
buffer[3] = (uint8_t)value; // A3 (least significant)
|
112
|
+
return 4;
|
113
|
+
} else if (value < (1ULL << 32)) { // 251: 4-byte big-endian
|
114
|
+
buffer[0] = 251; // A0
|
115
|
+
buffer[1] = (uint8_t)(value >> 24);
|
116
|
+
buffer[2] = (uint8_t)(value >> 16);
|
117
|
+
buffer[3] = (uint8_t)(value >> 8);
|
118
|
+
buffer[4] = (uint8_t)value;
|
119
|
+
return 5;
|
120
|
+
} else if (value < (1ULL << 40)) { // 252: 5-byte big-endian
|
121
|
+
buffer[0] = 252; // A0
|
122
|
+
buffer[1] = (uint8_t)(value >> 32);
|
123
|
+
buffer[2] = (uint8_t)(value >> 24);
|
124
|
+
buffer[3] = (uint8_t)(value >> 16);
|
125
|
+
buffer[4] = (uint8_t)(value >> 8);
|
126
|
+
buffer[5] = (uint8_t)value;
|
127
|
+
return 6;
|
128
|
+
} else if (value < (1ULL << 48)) { // 253: 6-byte big-endian
|
129
|
+
buffer[0] = 253; // A0
|
130
|
+
buffer[1] = (uint8_t)(value >> 40);
|
131
|
+
buffer[2] = (uint8_t)(value >> 32);
|
132
|
+
buffer[3] = (uint8_t)(value >> 24);
|
133
|
+
buffer[4] = (uint8_t)(value >> 16);
|
134
|
+
buffer[5] = (uint8_t)(value >> 8);
|
135
|
+
buffer[6] = (uint8_t)value;
|
136
|
+
return 7;
|
137
|
+
} else if (value < (1ULL << 56)) { // 254: 7-byte big-endian
|
138
|
+
buffer[0] = 254; // A0
|
139
|
+
buffer[1] = (uint8_t)(value >> 48);
|
140
|
+
buffer[2] = (uint8_t)(value >> 40);
|
141
|
+
buffer[3] = (uint8_t)(value >> 32);
|
142
|
+
buffer[4] = (uint8_t)(value >> 24);
|
143
|
+
buffer[5] = (uint8_t)(value >> 16);
|
144
|
+
buffer[6] = (uint8_t)(value >> 8);
|
145
|
+
buffer[7] = (uint8_t)value;
|
146
|
+
return 8;
|
147
|
+
} else { // 255: 8-byte big-endian
|
148
|
+
buffer[0] = 255; // A0
|
149
|
+
buffer[1] = (uint8_t)(value >> 56);
|
150
|
+
buffer[2] = (uint8_t)(value >> 48);
|
151
|
+
buffer[3] = (uint8_t)(value >> 40);
|
152
|
+
buffer[4] = (uint8_t)(value >> 32);
|
153
|
+
buffer[5] = (uint8_t)(value >> 24);
|
154
|
+
buffer[6] = (uint8_t)(value >> 16);
|
155
|
+
buffer[7] = (uint8_t)(value >> 8);
|
156
|
+
buffer[8] = (uint8_t)value;
|
157
|
+
return 9;
|
158
|
+
}
|
159
|
+
}
|
160
|
+
|
161
|
+
static inline int varint_size(uint64_t value){
|
162
|
+
if (value < (1ULL << 48)) { // 253: 6-byte big-endian
|
163
|
+
return 7;
|
164
|
+
} else if(value < (1ULL << 52)){
|
165
|
+
return 8;
|
166
|
+
}
|
167
|
+
return 9;
|
168
|
+
}
|
169
|
+
|
170
|
+
static inline uint64_t decode_varint(const uint8_t* buffer, size_t size, size_t *pos) {
|
171
|
+
uint8_t prefix = buffer[*pos];
|
172
|
+
if (prefix <= 240) {
|
173
|
+
*pos += 1;
|
174
|
+
return prefix;
|
175
|
+
} else if (prefix >= 241 && prefix <= 248) {
|
176
|
+
uint64_t value = 240 + 256 * (prefix - 241) + buffer[*pos+1];
|
177
|
+
*pos += 2;
|
178
|
+
return value;
|
179
|
+
} else if (prefix == 249) {
|
180
|
+
uint64_t value = 2288 + 256 * buffer[*pos+1] + buffer[*pos+2];
|
181
|
+
*pos += 3;
|
182
|
+
return value;
|
183
|
+
} else if (prefix == 250) {
|
184
|
+
uint64_t value = ((uint64_t)buffer[*pos+1] << 16) | ((uint64_t)buffer[*pos+2] << 8) | buffer[*pos+3];
|
185
|
+
*pos += 4;
|
186
|
+
return value;
|
187
|
+
} else if (prefix == 251) {
|
188
|
+
uint64_t value = ((uint64_t)buffer[*pos+1] << 24) | ((uint64_t)buffer[*pos+2] << 16) |
|
189
|
+
((uint64_t)buffer[*pos+3] << 8) | buffer[*pos+4];
|
190
|
+
*pos += 5;
|
191
|
+
return value;
|
192
|
+
} else if (prefix == 252) {
|
193
|
+
uint64_t value = ((uint64_t)buffer[*pos+1] << 32) | ((uint64_t)buffer[*pos+2] << 24) |
|
194
|
+
((uint64_t)buffer[*pos+3] << 16) | ((uint64_t)buffer[*pos+4] << 8) | buffer[*pos+5];
|
195
|
+
*pos += 6;
|
196
|
+
return value;
|
197
|
+
} else if (prefix == 253) {
|
198
|
+
uint64_t value = ((uint64_t)buffer[*pos+1] << 40) | ((uint64_t)buffer[*pos+2] << 32) |
|
199
|
+
((uint64_t)buffer[*pos+3] << 24) | ((uint64_t)buffer[*pos+4] << 16) |
|
200
|
+
((uint64_t)buffer[*pos+5] << 8) | buffer[*pos+6];
|
201
|
+
*pos += 7;
|
202
|
+
return value;
|
203
|
+
} else if (prefix == 254) {
|
204
|
+
uint64_t value = ((uint64_t)buffer[*pos+1] << 48) | ((uint64_t)buffer[*pos+2] << 40) |
|
205
|
+
((uint64_t)buffer[*pos+3] << 32) | ((uint64_t)buffer[*pos+4] << 24) |
|
206
|
+
((uint64_t)buffer[*pos+5] << 16) | ((uint64_t)buffer[*pos+6] << 8) | buffer[*pos+7];
|
207
|
+
*pos += 8;
|
208
|
+
return value;
|
209
|
+
} else if (prefix == 255) {
|
210
|
+
uint64_t value = ((uint64_t)buffer[*pos+1] << 56) | ((uint64_t)buffer[*pos+2] << 48) |
|
211
|
+
((uint64_t)buffer[*pos+3] << 40) | ((uint64_t)buffer[*pos+4] << 32) |
|
212
|
+
((uint64_t)buffer[*pos+5] << 24) | ((uint64_t)buffer[*pos+6] << 16) |
|
213
|
+
((uint64_t)buffer[*pos+7] << 8) | buffer[*pos+8];
|
214
|
+
*pos += 9;
|
215
|
+
return value;
|
216
|
+
} else {
|
217
|
+
return 0; // Error case
|
218
|
+
}
|
219
|
+
}
|
220
|
+
|
221
|
+
static inline int fast_memcmp(const void *ptr1, const void *ptr2, size_t num) {
|
222
|
+
if(num < 32){
|
223
|
+
const unsigned char *p1 = (const unsigned char*)ptr1;
|
224
|
+
const unsigned char *p2 = (const unsigned char*)ptr2;
|
225
|
+
for(size_t i = 0; i < num; i++){
|
226
|
+
if(p1[i] != p2[i]) return 1;
|
227
|
+
}
|
228
|
+
}else{
|
229
|
+
return memcmp(ptr1, ptr2, num);
|
230
|
+
}
|
231
|
+
return 0;
|
232
|
+
}
|
233
|
+
|
234
|
+
static inline void *fast_memcpy(unsigned char *ptr1, const char *ptr2, size_t num) {
|
235
|
+
for(size_t i = 0; i < num; i++){
|
236
|
+
ptr1[i] = ptr2[i];
|
237
|
+
}
|
238
|
+
return ptr1;
|
239
|
+
}
|
240
|
+
|
241
|
+
#include <immintrin.h>
|
242
|
+
#include <stddef.h>
|
243
|
+
#include <stdint.h>
|
244
|
+
|
245
|
+
static inline uint64_t dtoi_bits(double d) {
|
246
|
+
union {
|
247
|
+
double d;
|
248
|
+
uint64_t u;
|
249
|
+
} converter;
|
250
|
+
converter.d = d;
|
251
|
+
return converter.u;
|
252
|
+
}
|
253
|
+
|
254
|
+
static inline double itod_bits(uint64_t u) {
|
255
|
+
union {
|
256
|
+
double d;
|
257
|
+
uint64_t u;
|
258
|
+
} converter;
|
259
|
+
converter.u = u;
|
260
|
+
return converter.d;
|
261
|
+
}
|
262
|
+
|
263
|
+
static inline void encode_uint64( uint64_t value, uint8_t *buffer) {
|
264
|
+
buffer[0] = (value >> 56) & 0xFF;
|
265
|
+
buffer[1] = (value >> 48) & 0xFF;
|
266
|
+
buffer[2] = (value >> 40) & 0xFF;
|
267
|
+
buffer[3] = (value >> 32) & 0xFF;
|
268
|
+
buffer[4] = (value >> 24) & 0xFF;
|
269
|
+
buffer[5] = (value >> 16) & 0xFF;
|
270
|
+
buffer[6] = (value >> 8) & 0xFF;
|
271
|
+
buffer[7] = value & 0xFF;
|
272
|
+
}
|
273
|
+
|
274
|
+
static inline uint64_t decode_uint64(const uint8_t *buffer) {
|
275
|
+
return ((uint64_t)buffer[0] << 56) |
|
276
|
+
((uint64_t)buffer[1] << 48) |
|
277
|
+
((uint64_t)buffer[2] << 40) |
|
278
|
+
((uint64_t)buffer[3] << 32) |
|
279
|
+
((uint64_t)buffer[4] << 24) |
|
280
|
+
((uint64_t)buffer[5] << 16) |
|
281
|
+
((uint64_t)buffer[6] << 8) |
|
282
|
+
(uint64_t)buffer[7];
|
283
|
+
}
|
284
|
+
|
285
|
+
static inline int decimal_places_count(double abs_val, double *scaled) {
|
286
|
+
//double abs_val = fabs(val);
|
287
|
+
*scaled = abs_val;
|
288
|
+
double temp = *scaled;
|
289
|
+
if(*scaled == (uint64_t)(*scaled) && *scaled >= abs_val) { return 0;}
|
290
|
+
|
291
|
+
*scaled = abs_val * 10000;
|
292
|
+
temp = *scaled;
|
293
|
+
if(*scaled == (uint64_t)(*scaled) && *scaled >= abs_val) {
|
294
|
+
*scaled = abs_val * 10;
|
295
|
+
if(*scaled == (uint64_t)(*scaled) && *scaled >= abs_val) { return 1;}
|
296
|
+
*scaled = abs_val * 100;
|
297
|
+
if(*scaled == (uint64_t)(*scaled) && *scaled >= abs_val) { return 2;}
|
298
|
+
*scaled = abs_val * 1000;
|
299
|
+
if(*scaled == (uint64_t)(*scaled) && *scaled >= abs_val) { return 3;}
|
300
|
+
*scaled = temp;
|
301
|
+
return 4;
|
302
|
+
}
|
303
|
+
|
304
|
+
*scaled = abs_val * 100000000;
|
305
|
+
temp = *scaled;
|
306
|
+
if(*scaled == (uint64_t)(*scaled) && *scaled >= abs_val) {
|
307
|
+
*scaled = abs_val * 100000;
|
308
|
+
if(*scaled == (uint64_t)(*scaled) && *scaled >= abs_val) { return 5;}
|
309
|
+
*scaled = abs_val * 1000000;
|
310
|
+
if(*scaled == (uint64_t)(*scaled) && *scaled >= abs_val) { return 6;}
|
311
|
+
*scaled = abs_val * 10000000;
|
312
|
+
if(*scaled == (uint64_t)(*scaled) && *scaled >= abs_val) { return 7;}
|
313
|
+
*scaled = temp;
|
314
|
+
return 8;
|
315
|
+
}
|
316
|
+
|
317
|
+
*scaled = abs_val * 1000000000000;
|
318
|
+
temp = *scaled;
|
319
|
+
if(*scaled == (uint64_t)(*scaled) && *scaled >= abs_val) {
|
320
|
+
*scaled = abs_val * 1000000000;
|
321
|
+
if(*scaled == (uint64_t)(*scaled) && *scaled >= abs_val) { return 9;}
|
322
|
+
*scaled = abs_val * 10000000000;
|
323
|
+
if(*scaled == (uint64_t)(*scaled) && *scaled >= abs_val) { return 10;}
|
324
|
+
*scaled = abs_val * 100000000000;
|
325
|
+
if(*scaled == (uint64_t)(*scaled) && *scaled >= abs_val) { return 11;}
|
326
|
+
*scaled = temp;
|
327
|
+
return 12;
|
328
|
+
}
|
329
|
+
return -1;
|
330
|
+
}
|
331
|
+
|
332
|
+
/* End common.h */
|
333
|
+
|
334
|
+
/* Begin packer.h */
|
335
|
+
|
336
|
+
|
337
|
+
typedef struct tiny_bits_packer {
|
338
|
+
unsigned char *buffer; // Pointer to the allocated buffer
|
339
|
+
size_t capacity; // Total allocated size of the buffer
|
340
|
+
size_t current_pos; // Current position in the buffer (write position)
|
341
|
+
HashTable encode_table; // Add the hash table here
|
342
|
+
HashTable dictionary;
|
343
|
+
uint8_t features;
|
344
|
+
// Add any other encoder-specific state here if needed (e.g., string deduplication table later)
|
345
|
+
} tiny_bits_packer;
|
346
|
+
|
347
|
+
static inline unsigned char *tiny_bits_packer_ensure_capacity(tiny_bits_packer *encoder, size_t needed_size) {
|
348
|
+
if (!encoder) return NULL;
|
349
|
+
|
350
|
+
size_t available_space = encoder->capacity - encoder->current_pos;
|
351
|
+
if (needed_size > available_space) {
|
352
|
+
size_t new_capacity = encoder->capacity + needed_size + (encoder->capacity);
|
353
|
+
unsigned char *new_buffer = (unsigned char *)realloc(encoder->buffer, new_capacity);
|
354
|
+
if (!new_buffer) return NULL;
|
355
|
+
encoder->buffer = new_buffer;
|
356
|
+
encoder->capacity = new_capacity;
|
357
|
+
}
|
358
|
+
return encoder->buffer + encoder->current_pos;
|
359
|
+
}
|
360
|
+
|
361
|
+
tiny_bits_packer *tiny_bits_packer_create(size_t initial_capacity, uint8_t features) {
|
362
|
+
tiny_bits_packer *encoder = (tiny_bits_packer *)malloc(sizeof(tiny_bits_packer));
|
363
|
+
if (!encoder) return NULL;
|
364
|
+
|
365
|
+
encoder->buffer = (unsigned char *)malloc(initial_capacity);
|
366
|
+
if (!encoder->buffer) {
|
367
|
+
free(encoder);
|
368
|
+
return NULL;
|
369
|
+
}
|
370
|
+
encoder->capacity = initial_capacity;
|
371
|
+
encoder->current_pos = 0;
|
372
|
+
encoder->features = features;
|
373
|
+
|
374
|
+
// Only allocate hash table if deduplication is enabled
|
375
|
+
if (features & TB_FEATURE_STRING_DEDUPE) {
|
376
|
+
encoder->encode_table.cache = (HashEntry*)malloc(sizeof(HashEntry) * TB_HASH_CACHE_SIZE);
|
377
|
+
if (!encoder->encode_table.cache) {
|
378
|
+
//free(encoder->encode_table.buckets);
|
379
|
+
free(encoder->buffer);
|
380
|
+
free(encoder);
|
381
|
+
return NULL;
|
382
|
+
}
|
383
|
+
encoder->encode_table.cache_size = TB_HASH_CACHE_SIZE;
|
384
|
+
encoder->encode_table.cache_pos = 0;
|
385
|
+
encoder->encode_table.next_id = 0;
|
386
|
+
} else {
|
387
|
+
encoder->encode_table.cache = NULL;
|
388
|
+
encoder->encode_table.cache_size = 0;
|
389
|
+
encoder->encode_table.cache_pos = 0;
|
390
|
+
encoder->encode_table.next_id = 0;
|
391
|
+
}
|
392
|
+
|
393
|
+
return encoder;
|
394
|
+
}
|
395
|
+
|
396
|
+
inline void tiny_bits_packer_reset(tiny_bits_packer *encoder) {
|
397
|
+
if (!encoder) return;
|
398
|
+
encoder->current_pos = 0;
|
399
|
+
if (encoder->features & TB_FEATURE_STRING_DEDUPE) {
|
400
|
+
encoder->encode_table.next_id = 0;
|
401
|
+
encoder->encode_table.cache_pos = 0;
|
402
|
+
memset(encoder->encode_table.bins, 0, TB_HASH_SIZE * sizeof(uint8_t));
|
403
|
+
}
|
404
|
+
|
405
|
+
}
|
406
|
+
|
407
|
+
void tiny_bits_packer_destroy(tiny_bits_packer *encoder) {
|
408
|
+
if (!encoder) return;
|
409
|
+
|
410
|
+
if (encoder->features & TB_FEATURE_STRING_DEDUPE) {
|
411
|
+
free(encoder->encode_table.cache);
|
412
|
+
}
|
413
|
+
free(encoder->buffer);
|
414
|
+
free(encoder);
|
415
|
+
}
|
416
|
+
|
417
|
+
static inline int pack_arr(tiny_bits_packer *encoder, int arr_len){
|
418
|
+
int written = 0;
|
419
|
+
int needed_size;
|
420
|
+
uint8_t *buffer;
|
421
|
+
|
422
|
+
if(arr_len < TB_ARR_LEN){
|
423
|
+
needed_size = 1;
|
424
|
+
} else {
|
425
|
+
needed_size = 1 + varint_size((uint64_t)(arr_len - 7));
|
426
|
+
}
|
427
|
+
buffer = tiny_bits_packer_ensure_capacity(encoder, needed_size);
|
428
|
+
if (!buffer) return 0; // Handle error
|
429
|
+
|
430
|
+
if(arr_len < TB_ARR_LEN){
|
431
|
+
buffer[0] = TB_ARR_TAG | arr_len;
|
432
|
+
written = 1;
|
433
|
+
} else {
|
434
|
+
buffer[0] = TB_ARR_TAG | TB_ARR_LEN;
|
435
|
+
written = 1;
|
436
|
+
written += encode_varint((uint64_t)(arr_len - TB_ARR_LEN), buffer + written);
|
437
|
+
}
|
438
|
+
encoder->current_pos += written;
|
439
|
+
return written;
|
440
|
+
}
|
441
|
+
|
442
|
+
static inline int pack_map(tiny_bits_packer *encoder, int map_len){
|
443
|
+
int written = 0;
|
444
|
+
int needed_size;
|
445
|
+
uint8_t *buffer;
|
446
|
+
|
447
|
+
if(map_len < TB_MAP_LEN){
|
448
|
+
needed_size = 1;
|
449
|
+
} else {
|
450
|
+
needed_size = 1 + varint_size((uint64_t)(map_len - 15));
|
451
|
+
}
|
452
|
+
buffer = tiny_bits_packer_ensure_capacity(encoder, needed_size);
|
453
|
+
if (!buffer) return 0; // Handle error
|
454
|
+
|
455
|
+
if(map_len < TB_MAP_LEN){
|
456
|
+
buffer[0] = TB_MAP_TAG | map_len;
|
457
|
+
written = 1;
|
458
|
+
} else {
|
459
|
+
buffer[0] = TB_MAP_TAG | TB_MAP_LEN;
|
460
|
+
written = 1;
|
461
|
+
written += encode_varint((uint64_t)(map_len - TB_MAP_LEN), buffer + written);
|
462
|
+
}
|
463
|
+
encoder->current_pos += written;
|
464
|
+
return written;
|
465
|
+
}
|
466
|
+
|
467
|
+
static inline int pack_int(tiny_bits_packer *encoder, int64_t value){
|
468
|
+
int written = 0;
|
469
|
+
int needed_size = 10;
|
470
|
+
uint8_t *buffer;
|
471
|
+
buffer = tiny_bits_packer_ensure_capacity(encoder, needed_size);
|
472
|
+
if (!buffer) return 0; // Handle error
|
473
|
+
//printf("value is %ld\n", value);
|
474
|
+
|
475
|
+
if (value >= 0 && value < 120) {
|
476
|
+
buffer[0] = (uint8_t)(TB_INT_TAG | value); // No continuation
|
477
|
+
//printf("value is %ld, wrote to buffer %x\n", value, buffer[0]);
|
478
|
+
encoder->current_pos += 1;
|
479
|
+
return 1;
|
480
|
+
} else if (value >= 120) {
|
481
|
+
buffer[0] = 248; // Tag for positive with continuation
|
482
|
+
value -= 120;
|
483
|
+
} else if (value > -7) {
|
484
|
+
buffer[0] = (uint8_t)(248 + (-value)); // No continuation
|
485
|
+
encoder->current_pos += 1;
|
486
|
+
return 1;
|
487
|
+
} else {
|
488
|
+
buffer[0] = 255; // Tag for negative with continuation
|
489
|
+
value = -(value + 7); // Store positive magnitude
|
490
|
+
}
|
491
|
+
// Encode continuation bytes in BER format (7 bits per byte)
|
492
|
+
written += encode_varint(value, buffer + 1) + 1 ;
|
493
|
+
encoder->current_pos += written;
|
494
|
+
return written;
|
495
|
+
}
|
496
|
+
|
497
|
+
static inline int pack_null(tiny_bits_packer *encoder){
|
498
|
+
int needed_size = 1;
|
499
|
+
uint8_t *buffer = tiny_bits_packer_ensure_capacity(encoder, needed_size);
|
500
|
+
if (!buffer) return 0; // Handle error
|
501
|
+
|
502
|
+
buffer[0] = (uint8_t)TB_NIL_TAG;
|
503
|
+
encoder->current_pos += 1;
|
504
|
+
return 1;
|
505
|
+
}
|
506
|
+
|
507
|
+
static inline int pack_true(tiny_bits_packer *encoder){
|
508
|
+
int needed_size = 1;
|
509
|
+
uint8_t *buffer = tiny_bits_packer_ensure_capacity(encoder, needed_size);
|
510
|
+
if (!buffer) return 0; // Handle error
|
511
|
+
|
512
|
+
buffer[0] = (uint8_t)TB_TRU_TAG;
|
513
|
+
encoder->current_pos += 1;
|
514
|
+
return 1;
|
515
|
+
}
|
516
|
+
|
517
|
+
static inline int pack_false(tiny_bits_packer *encoder){
|
518
|
+
int needed_size = 1;
|
519
|
+
uint8_t *buffer = tiny_bits_packer_ensure_capacity(encoder, needed_size);
|
520
|
+
if (!buffer) return 0; // Handle error
|
521
|
+
|
522
|
+
buffer[0] = (uint8_t)TB_FLS_TAG;
|
523
|
+
encoder->current_pos += 1;
|
524
|
+
return 1;
|
525
|
+
}
|
526
|
+
|
527
|
+
static inline int pack_nan(tiny_bits_packer *encoder){
|
528
|
+
int needed_size = 1;
|
529
|
+
uint8_t *buffer = tiny_bits_packer_ensure_capacity(encoder, needed_size);
|
530
|
+
if (!buffer) return 0; // Handle error
|
531
|
+
|
532
|
+
buffer[0] = (uint8_t)TB_NAN_TAG;
|
533
|
+
encoder->current_pos += 1;
|
534
|
+
return 1;
|
535
|
+
}
|
536
|
+
|
537
|
+
static inline int pack_infinity(tiny_bits_packer *encoder){
|
538
|
+
int needed_size = 1;
|
539
|
+
uint8_t *buffer = tiny_bits_packer_ensure_capacity(encoder, needed_size);
|
540
|
+
if (!buffer) return 0; // Handle error
|
541
|
+
|
542
|
+
buffer[0] = (uint8_t)TB_INF_TAG;
|
543
|
+
encoder->current_pos += 1;
|
544
|
+
return 1;
|
545
|
+
}
|
546
|
+
|
547
|
+
static inline int pack_negative_infinity(tiny_bits_packer *encoder){
|
548
|
+
int needed_size = 1;
|
549
|
+
uint8_t *buffer = tiny_bits_packer_ensure_capacity(encoder, needed_size);
|
550
|
+
if (!buffer) return 0; // Handle error
|
551
|
+
|
552
|
+
buffer[0] = (uint8_t)TB_NNF_TAG;
|
553
|
+
encoder->current_pos += 1;
|
554
|
+
return 1;
|
555
|
+
}
|
556
|
+
|
557
|
+
static inline int pack_str(tiny_bits_packer *encoder, char* str, uint32_t str_len) {
|
558
|
+
uint32_t id = 0;
|
559
|
+
int found = 0;
|
560
|
+
int written = 0;
|
561
|
+
int needed_size = 0;
|
562
|
+
uint8_t *buffer;
|
563
|
+
uint32_t hash_code = 0;
|
564
|
+
uint32_t hash = 0;
|
565
|
+
if ((encoder->features & TB_FEATURE_STRING_DEDUPE) && str_len >= 2 && str_len <= 128) {
|
566
|
+
hash_code = fast_hash_32(str, str_len);
|
567
|
+
hash = hash_code % TB_HASH_SIZE;
|
568
|
+
uint8_t index = encoder->encode_table.bins[hash];
|
569
|
+
while (index > 0) {
|
570
|
+
HashEntry entry = encoder->encode_table.cache[index - 1];
|
571
|
+
if (hash_code == entry.hash
|
572
|
+
&& str_len == entry.length
|
573
|
+
&& (str_len <= 4 || (fast_memcmp(str, encoder->buffer + entry.offset, str_len) == 0) )) {
|
574
|
+
id = index - 1;
|
575
|
+
found = 1;
|
576
|
+
break;
|
577
|
+
}
|
578
|
+
index = entry.next_index;
|
579
|
+
}
|
580
|
+
}
|
581
|
+
|
582
|
+
if (found) {
|
583
|
+
// Encode existing string ID
|
584
|
+
if (id < 31) {
|
585
|
+
needed_size = 1;
|
586
|
+
} else {
|
587
|
+
needed_size = 1 + varint_size(id - 31);
|
588
|
+
}
|
589
|
+
buffer = tiny_bits_packer_ensure_capacity(encoder, needed_size);
|
590
|
+
if (!buffer) return 0;
|
591
|
+
|
592
|
+
if (id < TB_REF_LEN) {
|
593
|
+
buffer[0] = TB_REF_TAG | id;
|
594
|
+
written = 1;
|
595
|
+
} else {
|
596
|
+
buffer[0] = TB_REF_TAG | TB_REF_LEN;
|
597
|
+
written = 1;
|
598
|
+
written += encode_varint(id - TB_REF_LEN, buffer + written);
|
599
|
+
}
|
600
|
+
} else {
|
601
|
+
needed_size = 10 + str_len;
|
602
|
+
buffer = tiny_bits_packer_ensure_capacity(encoder, needed_size);
|
603
|
+
if (!buffer) return 0;
|
604
|
+
|
605
|
+
if (str_len < TB_STR_LEN) {
|
606
|
+
buffer[0] = TB_STR_TAG | str_len;
|
607
|
+
written = 1;
|
608
|
+
fast_memcpy(buffer + written, str, str_len);
|
609
|
+
written += str_len;
|
610
|
+
} else {
|
611
|
+
buffer[0] = TB_STR_TAG | TB_STR_LEN;
|
612
|
+
written = 1;
|
613
|
+
written += encode_varint(str_len - TB_STR_LEN, buffer + written);
|
614
|
+
memcpy(buffer + written, str, str_len);
|
615
|
+
written += str_len;
|
616
|
+
}
|
617
|
+
|
618
|
+
if ((encoder->features & TB_FEATURE_STRING_DEDUPE)
|
619
|
+
&& encoder->encode_table.cache_pos < TB_HASH_CACHE_SIZE
|
620
|
+
&& str_len >= 2 && str_len <= 128){
|
621
|
+
HashEntry* new_entry = &encoder->encode_table.cache[encoder->encode_table.cache_pos++];
|
622
|
+
new_entry->hash = hash_code;
|
623
|
+
new_entry->length = str_len;
|
624
|
+
new_entry->offset = encoder->current_pos + written - str_len;
|
625
|
+
new_entry->next_index = encoder->encode_table.bins[hash];
|
626
|
+
encoder->encode_table.bins[hash] = encoder->encode_table.cache_pos;
|
627
|
+
}
|
628
|
+
|
629
|
+
}
|
630
|
+
|
631
|
+
encoder->current_pos += written;
|
632
|
+
return written;
|
633
|
+
}
|
634
|
+
|
635
|
+
static inline int pack_double(tiny_bits_packer *encoder, double val) {
|
636
|
+
int written = 0;
|
637
|
+
uint8_t *buffer = tiny_bits_packer_ensure_capacity(encoder, 10);
|
638
|
+
if (!buffer) return 0;
|
639
|
+
// scaled varint encoding
|
640
|
+
if (encoder->features & TB_FEATURE_COMPRESS_FLOATS) {
|
641
|
+
double abs_val = fabs(val); ///val >= 0 ? val : -val;
|
642
|
+
double scaled; //= abs_val;
|
643
|
+
int multiplies = decimal_places_count(abs_val, &scaled);
|
644
|
+
if(multiplies >= 0){
|
645
|
+
uint64_t integer = (uint64_t)scaled;
|
646
|
+
if(integer < (1ULL << 48)) {
|
647
|
+
if (!buffer) return 0;
|
648
|
+
if(val >= 0){
|
649
|
+
buffer[0] = TB_PFP_TAG | (multiplies);
|
650
|
+
} else {
|
651
|
+
buffer[0] = TB_NFP_TAG | (multiplies);
|
652
|
+
}
|
653
|
+
written++;
|
654
|
+
written += encode_varint(integer, buffer + written);
|
655
|
+
encoder->current_pos += written;
|
656
|
+
return written;
|
657
|
+
}
|
658
|
+
}
|
659
|
+
|
660
|
+
}
|
661
|
+
// Fallback to raw double
|
662
|
+
buffer[0] = TB_F64_TAG;
|
663
|
+
written++;
|
664
|
+
encode_uint64(dtoi_bits(val), buffer + written);
|
665
|
+
written += 8;
|
666
|
+
encoder->current_pos += written;
|
667
|
+
return written;
|
668
|
+
}
|
669
|
+
|
670
|
+
static inline int pack_blob(tiny_bits_packer *encoder, const char* blob, int blob_size){
|
671
|
+
int written = 0;
|
672
|
+
int needed_size;
|
673
|
+
uint8_t *buffer;
|
674
|
+
|
675
|
+
needed_size = 1 + varint_size((uint64_t)blob_size) + blob_size;
|
676
|
+
buffer = tiny_bits_packer_ensure_capacity(encoder, needed_size);
|
677
|
+
if (!buffer) return 0; // Handle error
|
678
|
+
|
679
|
+
buffer[0] = (uint8_t)TB_BLB_TAG;
|
680
|
+
written++;
|
681
|
+
written += encode_varint((uint64_t)blob_size, buffer + written);
|
682
|
+
memcpy(buffer + written, blob, blob_size);
|
683
|
+
written += blob_size;
|
684
|
+
encoder->current_pos += written;
|
685
|
+
return written;
|
686
|
+
}
|
687
|
+
|
688
|
+
/* End packer.h */
|
689
|
+
|
690
|
+
/* Begin unpacker.h */
|
691
|
+
|
692
|
+
|
693
|
+
|
694
|
+
// Decoder return types
|
695
|
+
enum tiny_bits_type {
|
696
|
+
TINY_BITS_ARRAY, // length: number of elements
|
697
|
+
TINY_BITS_MAP, // length: number of key-value pairs
|
698
|
+
TINY_BITS_INT, // int_val: integer value
|
699
|
+
TINY_BITS_DOUBLE, // double_val: double value
|
700
|
+
TINY_BITS_STR, // length: byte length of string
|
701
|
+
TINY_BITS_BLOB, // length: byte length of blob
|
702
|
+
TINY_BITS_TRUE, // No value
|
703
|
+
TINY_BITS_FALSE, // No value
|
704
|
+
TINY_BITS_NULL, // No value
|
705
|
+
TINY_BITS_NAN, // No value
|
706
|
+
TINY_BITS_INF, // No value
|
707
|
+
TINY_BITS_N_INF, // No value
|
708
|
+
TINY_BITS_EXT, // No value
|
709
|
+
TINY_BITS_FINISHED, // End of buffer
|
710
|
+
TINY_BITS_ERROR // Parsing error
|
711
|
+
};
|
712
|
+
|
713
|
+
typedef union tiny_bits_value {
|
714
|
+
int64_t int_val; // TINY_BITS_INT
|
715
|
+
double double_val; // TINY_BITS_DOUBLE
|
716
|
+
size_t length; // TINY_BITS_ARRAY, TINY_BITS_MAP,
|
717
|
+
struct { // TINY_BITS_STR, TINY_BITS_BLOB
|
718
|
+
const char *data;
|
719
|
+
size_t length;
|
720
|
+
int32_t id;
|
721
|
+
} str_blob_val;
|
722
|
+
} tiny_bits_value;
|
723
|
+
|
724
|
+
typedef struct tiny_bits_unpacker {
|
725
|
+
const unsigned char *buffer; // Input buffer (read-only)
|
726
|
+
size_t size; // Total size of buffer
|
727
|
+
size_t current_pos; // Current read position
|
728
|
+
struct {
|
729
|
+
char *str; // Pointer to decompressed string data (owned by strings array)
|
730
|
+
size_t length; // Length of string
|
731
|
+
} *strings; // Array of decoded strings
|
732
|
+
size_t strings_size; // Capacity of strings array
|
733
|
+
size_t strings_count; // Number of strings stored
|
734
|
+
HashTable dictionary;
|
735
|
+
} tiny_bits_unpacker;
|
736
|
+
|
737
|
+
tiny_bits_unpacker *tiny_bits_unpacker_create(void) {
|
738
|
+
|
739
|
+
tiny_bits_unpacker *decoder = (tiny_bits_unpacker *)malloc(sizeof(tiny_bits_unpacker));
|
740
|
+
if (!decoder) return NULL;
|
741
|
+
// String array setup
|
742
|
+
decoder->strings_size = 8; // Initial capacity
|
743
|
+
decoder->strings = (void *)malloc(decoder->strings_size * sizeof(*decoder->strings));
|
744
|
+
if (!decoder->strings) {
|
745
|
+
free(decoder);
|
746
|
+
return NULL;
|
747
|
+
}
|
748
|
+
decoder->strings_count = 0;
|
749
|
+
return decoder;
|
750
|
+
}
|
751
|
+
|
752
|
+
void tiny_bits_unpacker_set_buffer(tiny_bits_unpacker *decoder, const unsigned char *buffer, size_t size) {
|
753
|
+
if (!decoder) return;
|
754
|
+
if (!buffer || size < 1) return;
|
755
|
+
decoder->buffer = buffer;
|
756
|
+
decoder->size = size;
|
757
|
+
decoder->current_pos = 0;
|
758
|
+
decoder->strings_count = 0;
|
759
|
+
}
|
760
|
+
|
761
|
+
static inline void tiny_bits_unpacker_reset(tiny_bits_unpacker *decoder) {
|
762
|
+
if (!decoder) return;
|
763
|
+
decoder->current_pos = 0;
|
764
|
+
decoder->strings_count = 0;
|
765
|
+
}
|
766
|
+
|
767
|
+
void tiny_bits_unpacker_destroy(tiny_bits_unpacker *decoder) {
|
768
|
+
if (!decoder) return;
|
769
|
+
if (decoder->strings) {
|
770
|
+
free(decoder->strings);
|
771
|
+
}
|
772
|
+
free(decoder);
|
773
|
+
}
|
774
|
+
|
775
|
+
static inline enum tiny_bits_type _unpack_int(tiny_bits_unpacker *decoder, uint8_t tag, tiny_bits_value *value){
|
776
|
+
size_t pos = decoder->current_pos;
|
777
|
+
if (tag < 248) { // Small positive (128-247)
|
778
|
+
value->int_val = tag - 128;
|
779
|
+
return TINY_BITS_INT;
|
780
|
+
} else if (tag == 248) { // Positive with continuation
|
781
|
+
uint64_t val = decode_varint(decoder->buffer, decoder->size, &pos);
|
782
|
+
value->int_val = val + 120;
|
783
|
+
decoder->current_pos = pos;
|
784
|
+
return TINY_BITS_INT;
|
785
|
+
} else if (tag > 248 && tag < 255) { // Small negative (248-254)
|
786
|
+
value->int_val = -(tag - 248);
|
787
|
+
return TINY_BITS_INT;
|
788
|
+
} else { // 255: Negative with continuation
|
789
|
+
uint64_t val = decode_varint(decoder->buffer, decoder->size, &pos);
|
790
|
+
value->int_val = -(val + 7);
|
791
|
+
decoder->current_pos = pos;
|
792
|
+
return TINY_BITS_INT;
|
793
|
+
}
|
794
|
+
}
|
795
|
+
|
796
|
+
static inline enum tiny_bits_type _unpack_arr(tiny_bits_unpacker *decoder, uint8_t tag, tiny_bits_value *value){
|
797
|
+
size_t pos = decoder->current_pos;
|
798
|
+
if (tag < 0b00001111) { // Small array (0-30)
|
799
|
+
value->length = tag & 0b00000111;
|
800
|
+
} else { // Large array
|
801
|
+
value->length = decode_varint(decoder->buffer, decoder->size, &pos) + 7;
|
802
|
+
decoder->current_pos = pos;
|
803
|
+
}
|
804
|
+
return TINY_BITS_ARRAY;
|
805
|
+
}
|
806
|
+
|
807
|
+
static inline enum tiny_bits_type _unpack_map(tiny_bits_unpacker *decoder, uint8_t tag, tiny_bits_value *value){
|
808
|
+
size_t pos = decoder->current_pos;
|
809
|
+
if (tag < 0x1F) { // Small map (0-14)
|
810
|
+
value->length = tag & 0x0F;
|
811
|
+
} else { // Large map
|
812
|
+
value->length = decode_varint(decoder->buffer, decoder->size, &pos) + 15;
|
813
|
+
decoder->current_pos = pos;
|
814
|
+
}
|
815
|
+
return TINY_BITS_MAP;
|
816
|
+
}
|
817
|
+
|
818
|
+
static inline enum tiny_bits_type _unpack_double(tiny_bits_unpacker *decoder, uint8_t tag, tiny_bits_value *value){
|
819
|
+
size_t pos = decoder->current_pos;
|
820
|
+
if (tag == TB_F64_TAG) { // Raw double
|
821
|
+
uint64_t number = decode_uint64(decoder->buffer + pos);
|
822
|
+
value->double_val = itod_bits(number);
|
823
|
+
decoder->current_pos += 8;
|
824
|
+
} else { // Compressed double
|
825
|
+
uint64_t number = decode_varint(decoder->buffer, decoder->size, &pos);
|
826
|
+
int order = (tag & 0x0F);
|
827
|
+
double fractional = (double)number / powers[order];
|
828
|
+
//fractional /= powers[order];
|
829
|
+
if(tag & 0x10) fractional = -fractional;
|
830
|
+
value->double_val = fractional;
|
831
|
+
decoder->current_pos = pos;
|
832
|
+
}
|
833
|
+
return TINY_BITS_DOUBLE;
|
834
|
+
}
|
835
|
+
|
836
|
+
static inline enum tiny_bits_type _unpack_blob(tiny_bits_unpacker *decoder, uint8_t tag, tiny_bits_value *value){
|
837
|
+
size_t pos = decoder->current_pos;
|
838
|
+
size_t len = decode_varint(decoder->buffer, decoder->size, &pos);
|
839
|
+
value->str_blob_val.data = (const char *)decoder->buffer + pos;
|
840
|
+
value->str_blob_val.length = len;
|
841
|
+
decoder->current_pos = pos + len;
|
842
|
+
return TINY_BITS_BLOB;
|
843
|
+
}
|
844
|
+
|
845
|
+
static inline enum tiny_bits_type _unpack_str(tiny_bits_unpacker *decoder, uint8_t tag, tiny_bits_value *value){
|
846
|
+
size_t pos = decoder->current_pos;
|
847
|
+
size_t len;
|
848
|
+
if (tag < 0x5F) { // Small string (0-30)
|
849
|
+
len = tag & 0x1F;
|
850
|
+
value->str_blob_val.data = (const char *)decoder->buffer + pos;
|
851
|
+
value->str_blob_val.length = len;
|
852
|
+
decoder->current_pos = pos + len;
|
853
|
+
} else if (tag == 0x5F) { // Large string
|
854
|
+
len = decode_varint(decoder->buffer, decoder->size, &pos) + 31;
|
855
|
+
value->str_blob_val.data = (const char *)decoder->buffer + pos;
|
856
|
+
value->str_blob_val.length = len;
|
857
|
+
decoder->current_pos = pos + len;
|
858
|
+
} else { // Deduplicated (small: < 0x7F, large: 0x7F)
|
859
|
+
uint32_t id = (tag < 0x7F) ? (tag & 0x1F) : decode_varint(decoder->buffer, decoder->size, &pos) + 31;
|
860
|
+
if (id >= decoder->strings_count) return TINY_BITS_ERROR;
|
861
|
+
len = decoder->strings[id].length;
|
862
|
+
value->str_blob_val.data = decoder->strings[id].str;
|
863
|
+
value->str_blob_val.length = len;
|
864
|
+
value->str_blob_val.id = id+1;
|
865
|
+
decoder->current_pos = pos; // Update pos after varint
|
866
|
+
return TINY_BITS_STR;
|
867
|
+
}
|
868
|
+
value->str_blob_val.id = 0;
|
869
|
+
// Handle new string (not deduplicated)
|
870
|
+
if(decoder->strings_count < TB_HASH_CACHE_SIZE){
|
871
|
+
if (decoder->strings_count >= decoder->strings_size) {
|
872
|
+
size_t new_size = decoder->strings_size * 2;
|
873
|
+
void *new_strings = realloc(decoder->strings, new_size * sizeof(*decoder->strings));
|
874
|
+
if (!new_strings) return TINY_BITS_ERROR;
|
875
|
+
decoder->strings = new_strings;
|
876
|
+
decoder->strings_size = new_size;
|
877
|
+
}
|
878
|
+
|
879
|
+
decoder->strings[decoder->strings_count].str = (char *)decoder->buffer + pos;
|
880
|
+
decoder->strings[decoder->strings_count].length = len;
|
881
|
+
decoder->strings_count++;
|
882
|
+
value->str_blob_val.id = -1 * decoder->strings_count;
|
883
|
+
}
|
884
|
+
return TINY_BITS_STR;
|
885
|
+
}
|
886
|
+
|
887
|
+
static inline enum tiny_bits_type unpack_value(tiny_bits_unpacker *decoder, tiny_bits_value *value) {
|
888
|
+
if (!decoder || !value || decoder->current_pos >= decoder->size) {
|
889
|
+
return (decoder && decoder->current_pos >= decoder->size) ? TINY_BITS_FINISHED : TINY_BITS_ERROR;
|
890
|
+
}
|
891
|
+
|
892
|
+
uint8_t tag = decoder->buffer[decoder->current_pos++];
|
893
|
+
//printf("found tag %X\n", tag);
|
894
|
+
// Dispatch based on tag
|
895
|
+
if ((tag & TB_INT_TAG) == TB_INT_TAG) { // Integers
|
896
|
+
return _unpack_int(decoder, tag, value);
|
897
|
+
} else if ((tag & TB_STR_TAG) == TB_STR_TAG) { // Strings
|
898
|
+
return _unpack_str(decoder, tag, value);
|
899
|
+
} else if (tag == TB_NIL_TAG) {
|
900
|
+
return TINY_BITS_NULL;
|
901
|
+
} else if (tag == TB_NAN_TAG) {
|
902
|
+
return TINY_BITS_NAN;
|
903
|
+
} else if (tag == TB_INF_TAG) {
|
904
|
+
return TINY_BITS_INF;
|
905
|
+
} else if (tag == TB_NNF_TAG) {
|
906
|
+
return TINY_BITS_N_INF;
|
907
|
+
} else if ((tag & TB_DBL_TAG) == TB_DBL_TAG) { // Doubles
|
908
|
+
return _unpack_double(decoder, tag, value);
|
909
|
+
} else if ((tag & TB_MAP_TAG) == TB_MAP_TAG) { // Maps
|
910
|
+
return _unpack_map(decoder, tag, value);
|
911
|
+
} else if ((tag & TB_ARR_TAG) == TB_ARR_TAG) { // Arrays
|
912
|
+
return _unpack_arr(decoder, tag, value);
|
913
|
+
} else if (tag == TB_BLB_TAG) { // Blob
|
914
|
+
return _unpack_blob(decoder, tag, value);
|
915
|
+
} else if (tag == TB_TRU_TAG) {
|
916
|
+
return TINY_BITS_TRUE;
|
917
|
+
} else if (tag == TB_FLS_TAG) {
|
918
|
+
return TINY_BITS_FALSE;
|
919
|
+
}
|
920
|
+
//printf("UNKOWN TAG\n");
|
921
|
+
return TINY_BITS_ERROR; // Unknown tag
|
922
|
+
}
|
923
|
+
|
924
|
+
|
925
|
+
/* End unpacker.h */
|
926
|
+
|
927
|
+
#endif /* TINY_BIS_H */
|
@@ -0,0 +1,315 @@
|
|
1
|
+
#include <ruby.h>
|
2
|
+
#include <ruby/encoding.h>
|
3
|
+
|
4
|
+
#include "tinybits.h"
|
5
|
+
|
6
|
+
// Ruby module and classes
|
7
|
+
VALUE rb_mTinyBits;
|
8
|
+
VALUE rb_cPacker;
|
9
|
+
VALUE rb_cUnpacker;
|
10
|
+
|
11
|
+
// Forward declarations
|
12
|
+
static VALUE rb_packer_alloc(VALUE klass);
|
13
|
+
static VALUE rb_packer_init(VALUE self);
|
14
|
+
static VALUE rb_unpacker_alloc(VALUE klass);
|
15
|
+
static VALUE rb_unpacker_init(VALUE self);
|
16
|
+
static VALUE rb_pack(VALUE self, VALUE obj);
|
17
|
+
static VALUE rb_unpack(VALUE self, VALUE buffer);
|
18
|
+
|
19
|
+
// Structure to pass data and error status through rb_hash_foreach
|
20
|
+
typedef struct {
|
21
|
+
tiny_bits_packer* packer;
|
22
|
+
int error_occurred; // 0 for no error, 1 if callback encountered an error
|
23
|
+
} HashIterContext;
|
24
|
+
|
25
|
+
// Forward declaration for the recursive function
|
26
|
+
static int pack_ruby_object_recursive(tiny_bits_packer* packer, VALUE obj, VALUE context);
|
27
|
+
|
28
|
+
// Callback function for rb_hash_foreach
|
29
|
+
// It receives key, value, and the packer pointer (passed as user_data)
|
30
|
+
inline static int hash_foreach_callback(VALUE key, VALUE value, VALUE user_data) {
|
31
|
+
// Cast user_data back to our context struct pointer
|
32
|
+
HashIterContext* context = (HashIterContext*)user_data;
|
33
|
+
|
34
|
+
// Pack the key using the packer from the context
|
35
|
+
if (!pack_ruby_object_recursive(context->packer, key, user_data)) {
|
36
|
+
context->error_occurred = 1; // Signal error occurred
|
37
|
+
return ST_STOP; // Stop iteration
|
38
|
+
}
|
39
|
+
// Pack the value using the packer from the context
|
40
|
+
if (!pack_ruby_object_recursive(context->packer, value, user_data)) {
|
41
|
+
context->error_occurred = 1; // Signal error occurred
|
42
|
+
return ST_STOP; // Stop iteration
|
43
|
+
}
|
44
|
+
|
45
|
+
// If both succeeded, continue
|
46
|
+
return ST_CONTINUE;
|
47
|
+
}
|
48
|
+
|
49
|
+
// Packer structure
|
50
|
+
typedef struct {
|
51
|
+
tiny_bits_packer* packer;
|
52
|
+
} PackerData;
|
53
|
+
|
54
|
+
static void packer_free(void* data) {
|
55
|
+
PackerData* packer_data = (PackerData*)data;
|
56
|
+
if (packer_data->packer) {
|
57
|
+
tiny_bits_packer_destroy(packer_data->packer);
|
58
|
+
}
|
59
|
+
free(packer_data);
|
60
|
+
}
|
61
|
+
|
62
|
+
static size_t packer_memsize(const void* data) {
|
63
|
+
return sizeof(PackerData);
|
64
|
+
}
|
65
|
+
|
66
|
+
static const rb_data_type_t packer_data_type = {
|
67
|
+
"TinyBits::Packer",
|
68
|
+
{0, packer_free, packer_memsize,},
|
69
|
+
0, 0, RUBY_TYPED_FREE_IMMEDIATELY
|
70
|
+
};
|
71
|
+
|
72
|
+
static VALUE rb_packer_alloc(VALUE klass) {
|
73
|
+
PackerData* packer_data = ALLOC(PackerData);
|
74
|
+
packer_data->packer = NULL;
|
75
|
+
return TypedData_Wrap_Struct(klass, &packer_data_type, packer_data);
|
76
|
+
}
|
77
|
+
|
78
|
+
static VALUE rb_packer_init(VALUE self) {
|
79
|
+
PackerData* packer_data;
|
80
|
+
TypedData_Get_Struct(self, PackerData, &packer_data_type, packer_data);
|
81
|
+
|
82
|
+
packer_data->packer = tiny_bits_packer_create(256, (TB_FEATURE_STRING_DEDUPE | TB_FEATURE_COMPRESS_FLOATS) | 0); // Initial capacity and features
|
83
|
+
if (!packer_data->packer) {
|
84
|
+
rb_raise(rb_eRuntimeError, "Failed to initialize packer");
|
85
|
+
}
|
86
|
+
return self;
|
87
|
+
}
|
88
|
+
|
89
|
+
// Optimized recursive packing function
|
90
|
+
static inline int pack_ruby_object_recursive(tiny_bits_packer* packer, VALUE obj, VALUE context) {
|
91
|
+
switch (TYPE(obj)) {
|
92
|
+
case T_STRING: {
|
93
|
+
return pack_str(packer, RSTRING_PTR(obj), RSTRING_LEN(obj));
|
94
|
+
}
|
95
|
+
case T_HASH: {
|
96
|
+
long len = RHASH_SIZE(obj);
|
97
|
+
int written = pack_map(packer, len);
|
98
|
+
if (written <= 0) return 0; // Error check based on tiny_bits API
|
99
|
+
rb_hash_foreach(obj, hash_foreach_callback, context);
|
100
|
+
return (((HashIterContext *)context)->error_occurred == 0);
|
101
|
+
}
|
102
|
+
case T_ARRAY: {
|
103
|
+
long len = RARRAY_LEN(obj);
|
104
|
+
int written = pack_arr(packer, len);
|
105
|
+
if (written <= 0) return 0; // Error check based on tiny_bits API
|
106
|
+
for (long i = 0; i < len; i++) {
|
107
|
+
if(!pack_ruby_object_recursive(packer, rb_ary_entry(obj, i), context)) return 0; // Propagate error
|
108
|
+
}
|
109
|
+
return 1; // Success
|
110
|
+
}
|
111
|
+
case T_FIXNUM: {
|
112
|
+
int64_t val = NUM2LONG(obj); // Assumes fits in int64_t
|
113
|
+
return pack_int(packer, val);
|
114
|
+
}
|
115
|
+
case T_FLOAT: {
|
116
|
+
double val = NUM2DBL(obj);
|
117
|
+
return pack_double(packer, val);
|
118
|
+
}
|
119
|
+
case T_NIL:
|
120
|
+
return pack_null(packer);
|
121
|
+
case T_TRUE:
|
122
|
+
return pack_true(packer);
|
123
|
+
case T_FALSE:
|
124
|
+
return pack_false(packer);
|
125
|
+
case T_SYMBOL: {
|
126
|
+
VALUE str = rb_sym2str(obj);
|
127
|
+
return pack_str(packer, RSTRING_PTR(str), RSTRING_LEN(str));
|
128
|
+
}
|
129
|
+
default:
|
130
|
+
rb_warn("Unsupported type encountered during packing: %s", rb_obj_classname(obj));
|
131
|
+
return 0;
|
132
|
+
}
|
133
|
+
}
|
134
|
+
|
135
|
+
// keeps the public API the same.
|
136
|
+
static VALUE rb_pack(VALUE self, VALUE obj) {
|
137
|
+
PackerData* packer_data;
|
138
|
+
TypedData_Get_Struct(self, PackerData, &packer_data_type, packer_data);
|
139
|
+
|
140
|
+
|
141
|
+
if (!packer_data->packer) {
|
142
|
+
rb_raise(rb_eRuntimeError, "Packer not initialized");
|
143
|
+
}
|
144
|
+
|
145
|
+
// Reset before packing (assuming this is efficient)
|
146
|
+
tiny_bits_packer_reset(packer_data->packer);
|
147
|
+
|
148
|
+
HashIterContext context;
|
149
|
+
context.packer = packer_data->packer; // Pass the current packer
|
150
|
+
context.error_occurred = 0; // Initialize error flag
|
151
|
+
|
152
|
+
// Call the optimized recursive function
|
153
|
+
if (!pack_ruby_object_recursive(packer_data->packer, obj, (VALUE)&context)) {
|
154
|
+
// Error occurred during packing (might be unsupported type or tiny_bits error)
|
155
|
+
rb_raise(rb_eRuntimeError, "Failed to pack object (unsupported type or packing error)");
|
156
|
+
}
|
157
|
+
|
158
|
+
VALUE result = rb_str_new((const char*)packer_data->packer->buffer, packer_data->packer->current_pos);
|
159
|
+
rb_obj_freeze(result);
|
160
|
+
return result;
|
161
|
+
}
|
162
|
+
|
163
|
+
|
164
|
+
// Unpacker structure
|
165
|
+
typedef struct {
|
166
|
+
tiny_bits_unpacker* unpacker;
|
167
|
+
size_t strings_index;
|
168
|
+
VALUE ruby_strings[TB_HASH_CACHE_SIZE];
|
169
|
+
} UnpackerData;
|
170
|
+
|
171
|
+
static void unpacker_free(void* data) {
|
172
|
+
UnpackerData* unpacker_data = (UnpackerData*)data;
|
173
|
+
if (unpacker_data->unpacker) {
|
174
|
+
tiny_bits_unpacker_destroy(unpacker_data->unpacker);
|
175
|
+
}
|
176
|
+
for (size_t i = 0; i < TB_HASH_CACHE_SIZE; i++) {
|
177
|
+
unpacker_data->ruby_strings[i] = Qnil;
|
178
|
+
}
|
179
|
+
free(unpacker_data);
|
180
|
+
}
|
181
|
+
|
182
|
+
static size_t unpacker_memsize(const void* data) {
|
183
|
+
return sizeof(UnpackerData);
|
184
|
+
}
|
185
|
+
|
186
|
+
static const rb_data_type_t unpacker_data_type = {
|
187
|
+
"TinyBits::Unpacker",
|
188
|
+
{0, unpacker_free, unpacker_memsize,},
|
189
|
+
0, 0, RUBY_TYPED_FREE_IMMEDIATELY
|
190
|
+
};
|
191
|
+
|
192
|
+
static VALUE rb_unpacker_alloc(VALUE klass) {
|
193
|
+
UnpackerData* unpacker_data = ALLOC(UnpackerData);
|
194
|
+
unpacker_data->unpacker = NULL;
|
195
|
+
return TypedData_Wrap_Struct(klass, &unpacker_data_type, unpacker_data);
|
196
|
+
}
|
197
|
+
|
198
|
+
static VALUE rb_unpacker_init(VALUE self) {
|
199
|
+
UnpackerData* unpacker_data;
|
200
|
+
TypedData_Get_Struct(self, UnpackerData, &unpacker_data_type, unpacker_data);
|
201
|
+
|
202
|
+
unpacker_data->unpacker = tiny_bits_unpacker_create();
|
203
|
+
if (!unpacker_data->unpacker) {
|
204
|
+
rb_raise(rb_eRuntimeError, "Failed to initialize unpacker");
|
205
|
+
}
|
206
|
+
unpacker_data->strings_index = 0;
|
207
|
+
return self;
|
208
|
+
}
|
209
|
+
|
210
|
+
static inline VALUE rb_unpack_str(UnpackerData* unpacker_data, tiny_bits_value value, size_t interned){
|
211
|
+
int32_t id = value.str_blob_val.id;
|
212
|
+
if(id > 0)
|
213
|
+
return unpacker_data->ruby_strings[id-1];
|
214
|
+
else if(id <= 0){
|
215
|
+
VALUE str;
|
216
|
+
if(interned > 0){
|
217
|
+
str = rb_enc_interned_str(value.str_blob_val.data, value.str_blob_val.length, rb_utf8_encoding());
|
218
|
+
} else {
|
219
|
+
str = rb_utf8_str_new(value.str_blob_val.data, value.str_blob_val.length);
|
220
|
+
rb_obj_freeze(str);
|
221
|
+
}
|
222
|
+
if(id < 0){
|
223
|
+
unpacker_data->ruby_strings[abs(id)-1] = str;
|
224
|
+
}
|
225
|
+
return str;
|
226
|
+
}
|
227
|
+
return Qundef;
|
228
|
+
}
|
229
|
+
|
230
|
+
static VALUE unpack_ruby_object(UnpackerData* unpacker_data, size_t interned) {
|
231
|
+
tiny_bits_unpacker* unpacker = unpacker_data->unpacker;
|
232
|
+
tiny_bits_value value;
|
233
|
+
enum tiny_bits_type type = unpack_value(unpacker, &value);
|
234
|
+
|
235
|
+
if (type == TINY_BITS_ERROR) {
|
236
|
+
return Qundef; // Use Qundef as a sentinel for error (not nil)
|
237
|
+
}
|
238
|
+
|
239
|
+
switch (type) {
|
240
|
+
case TINY_BITS_STR: {
|
241
|
+
return rb_unpack_str(unpacker_data, value, interned);
|
242
|
+
}
|
243
|
+
case TINY_BITS_DOUBLE:
|
244
|
+
return DBL2NUM(value.double_val);
|
245
|
+
case TINY_BITS_INT:
|
246
|
+
return LONG2NUM(value.int_val);
|
247
|
+
case TINY_BITS_NULL:
|
248
|
+
return Qnil;
|
249
|
+
case TINY_BITS_TRUE:
|
250
|
+
return Qtrue;
|
251
|
+
case TINY_BITS_FALSE:
|
252
|
+
return Qfalse;
|
253
|
+
case TINY_BITS_ARRAY: {
|
254
|
+
VALUE arr = rb_ary_new_capa(value.length);
|
255
|
+
for (size_t i = 0; i < value.length; i++) {
|
256
|
+
VALUE element = unpack_ruby_object(unpacker_data, 0);
|
257
|
+
if (element == Qundef) return Qundef; // Error
|
258
|
+
rb_ary_push(arr, element);
|
259
|
+
}
|
260
|
+
return arr;
|
261
|
+
}
|
262
|
+
case TINY_BITS_MAP: {
|
263
|
+
VALUE hash = rb_hash_new_capa(value.length);
|
264
|
+
for (size_t i = 0; i < value.length; i++) {
|
265
|
+
VALUE key = unpack_ruby_object(unpacker_data, 1);
|
266
|
+
if (key == Qundef) return Qundef; // Error
|
267
|
+
VALUE val = unpack_ruby_object(unpacker_data, 0);
|
268
|
+
if (val == Qundef) return Qundef; // Error
|
269
|
+
rb_hash_aset(hash, key, val);
|
270
|
+
}
|
271
|
+
return hash;
|
272
|
+
}
|
273
|
+
case TINY_BITS_BLOB:
|
274
|
+
// For simplicity, treat blobs as strings (similar to strings)
|
275
|
+
VALUE blob = rb_str_new(value.str_blob_val.data, value.str_blob_val.length);
|
276
|
+
rb_obj_freeze(blob);
|
277
|
+
return blob;
|
278
|
+
default:
|
279
|
+
return Qundef; // Error
|
280
|
+
}
|
281
|
+
}
|
282
|
+
|
283
|
+
static VALUE rb_unpack(VALUE self, VALUE buffer) {
|
284
|
+
UnpackerData* unpacker_data;
|
285
|
+
TypedData_Get_Struct(self, UnpackerData, &unpacker_data_type, unpacker_data);
|
286
|
+
|
287
|
+
if (!unpacker_data->unpacker) {
|
288
|
+
rb_raise(rb_eRuntimeError, "Unpacker not initialized");
|
289
|
+
}
|
290
|
+
|
291
|
+
StringValue(buffer); // Ensure it's a string
|
292
|
+
|
293
|
+
tiny_bits_unpacker_set_buffer(unpacker_data->unpacker, (const unsigned char*)RSTRING_PTR(buffer), RSTRING_LEN(buffer));
|
294
|
+
|
295
|
+
VALUE result = unpack_ruby_object(unpacker_data, 0);
|
296
|
+
if (result == Qundef) {
|
297
|
+
rb_raise(rb_eRuntimeError, "Failed to unpack data");
|
298
|
+
}
|
299
|
+
|
300
|
+
return result;
|
301
|
+
}
|
302
|
+
|
303
|
+
void Init_tinybits_ext(void) {
|
304
|
+
rb_mTinyBits = rb_define_module("TinyBits");
|
305
|
+
rb_cPacker = rb_define_class_under(rb_mTinyBits, "Packer", rb_cObject);
|
306
|
+
rb_cUnpacker = rb_define_class_under(rb_mTinyBits, "Unpacker", rb_cObject);
|
307
|
+
|
308
|
+
rb_define_alloc_func(rb_cPacker, rb_packer_alloc);
|
309
|
+
rb_define_method(rb_cPacker, "initialize", rb_packer_init, 0);
|
310
|
+
rb_define_method(rb_cPacker, "pack", rb_pack, 1);
|
311
|
+
|
312
|
+
rb_define_alloc_func(rb_cUnpacker, rb_unpacker_alloc);
|
313
|
+
rb_define_method(rb_cUnpacker, "initialize", rb_unpacker_init, 0);
|
314
|
+
rb_define_method(rb_cUnpacker, "unpack", rb_unpack, 1);
|
315
|
+
}
|
data/lib/tinybits.rb
ADDED
metadata
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: tinybits
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Mohamed Hassan
|
8
|
+
bindir: bin
|
9
|
+
cert_chain: []
|
10
|
+
date: 2025-04-20 00:00:00.000000000 Z
|
11
|
+
dependencies: []
|
12
|
+
description: TinyBits is a Ruby gem that wraps the TinyBits C serializartion library,
|
13
|
+
offering Rubyists the power of serializion with intger/float compression and string
|
14
|
+
deduplication!
|
15
|
+
email: oldmoe@gamil.com
|
16
|
+
executables: []
|
17
|
+
extensions:
|
18
|
+
- ext/tinybits/extconf.rb
|
19
|
+
extra_rdoc_files: []
|
20
|
+
files:
|
21
|
+
- ext/tinybits/extconf.rb
|
22
|
+
- ext/tinybits/tinybits.h
|
23
|
+
- ext/tinybits/tinybits_ext.c
|
24
|
+
- lib/tinybits.rb
|
25
|
+
- lib/tinybits/version.rb
|
26
|
+
homepage: https://github.com/oldmoe/tinybits-rb
|
27
|
+
licenses:
|
28
|
+
- MIT
|
29
|
+
metadata:
|
30
|
+
source_code_uri: https://github.com/oldmoe/tinybits-rb
|
31
|
+
rdoc_options: []
|
32
|
+
require_paths:
|
33
|
+
- lib
|
34
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
35
|
+
requirements:
|
36
|
+
- - ">="
|
37
|
+
- !ruby/object:Gem::Version
|
38
|
+
version: 3.0.0
|
39
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '0'
|
44
|
+
requirements: []
|
45
|
+
rubygems_version: 3.6.0.dev
|
46
|
+
specification_version: 4
|
47
|
+
summary: Very fast and compact serialization for Ruby!
|
48
|
+
test_files: []
|