tinybits 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/tinybits/test_date.rb +37 -0
- data/ext/tinybits/tinybits.h +305 -77
- data/ext/tinybits/tinybits_ext.c +154 -1
- data/lib/tinybits/version.rb +1 -1
- data/lib/tinybits.rb +4 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 51ab487694dc35eabde1bebda52a2726074ab6c80ef84ce2c845141c9732283c
|
4
|
+
data.tar.gz: 5c3b2c2eb52a15fb5e32d54c37d705fd41c9ec4759464519c599f83f517b9e2b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 65fd3c3a22a67d8a1a6a218644d4ed82171386202de0b09ce86c285501852cf40c71690820defbf811be66db29da6346d9e933878027377e1ac0c680c08cdf60
|
7
|
+
data.tar.gz: 6d389ea71dac6c417956d98dfe545f3baa015f2947ce968d9f8ec2f0dbe62ffdd5f841fb789658b00a69fe7975b8ed7788b1747a2867440fee10f0c19b05c243
|
@@ -0,0 +1,37 @@
|
|
1
|
+
require './tinybits_ext'
|
2
|
+
|
3
|
+
packer = TinyBits::Packer.new
|
4
|
+
unpacker = TinyBits::Unpacker.new
|
5
|
+
|
6
|
+
t = [Time.now, nil, true, false]
|
7
|
+
|
8
|
+
|
9
|
+
|
10
|
+
|
11
|
+
puts packer.dump(t).bytesize
|
12
|
+
|
13
|
+
puts t
|
14
|
+
puts t2 = unpacker.unpack(packer.pack(t))
|
15
|
+
puts t == t2
|
16
|
+
|
17
|
+
|
18
|
+
objects = [{"abc": 123}, {"abc": [1, 2, "abc"]}, ["xyz", "abc", "xyz", 7.6] ]
|
19
|
+
|
20
|
+
puts "----------------"
|
21
|
+
|
22
|
+
packer.reset
|
23
|
+
|
24
|
+
objects.each do |obj|
|
25
|
+
puts packer << obj
|
26
|
+
end
|
27
|
+
|
28
|
+
buffer = packer.to_s
|
29
|
+
|
30
|
+
puts buffer.bytesize
|
31
|
+
|
32
|
+
unpacker.buffer = buffer
|
33
|
+
|
34
|
+
while(value = unpacker.pop)
|
35
|
+
pp value
|
36
|
+
puts "+++++++++++++++++++++++++"
|
37
|
+
end
|
data/ext/tinybits/tinybits.h
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
/**
|
2
2
|
* TinyBits Amalgamated Header
|
3
|
-
* Generated on: Sat
|
3
|
+
* Generated on: Sat May 3 09:20:46 PM CEST 2025
|
4
4
|
*/
|
5
5
|
|
6
6
|
#ifndef TINY_BITS_H
|
@@ -8,43 +8,49 @@
|
|
8
8
|
|
9
9
|
/* Begin common.h */
|
10
10
|
|
11
|
-
|
12
11
|
#include <stdint.h>
|
13
12
|
#include <stdlib.h>
|
14
13
|
#include <string.h>
|
15
14
|
#include <stddef.h> // for size_t
|
16
15
|
#include <math.h>
|
17
|
-
|
16
|
+
#include <stdio.h>
|
18
17
|
|
19
18
|
#define TB_HASH_SIZE 128
|
20
19
|
#define TB_HASH_CACHE_SIZE 256
|
21
20
|
#define MAX_BYTES 9
|
22
21
|
#define TB_DDP_STR_LEN_MAX 128
|
23
22
|
|
24
|
-
|
25
|
-
#define
|
26
|
-
#define
|
27
|
-
#define
|
28
|
-
#define
|
29
|
-
#define
|
30
|
-
#define
|
31
|
-
#define
|
32
|
-
#define
|
33
|
-
#define
|
34
|
-
#define
|
35
|
-
#define
|
36
|
-
#define
|
37
|
-
#define
|
38
|
-
#define
|
39
|
-
#define
|
40
|
-
#define
|
41
|
-
#define
|
42
|
-
#define
|
43
|
-
#define
|
44
|
-
#define
|
45
|
-
#define
|
46
|
-
#define
|
47
|
-
|
23
|
+
// main tags
|
24
|
+
#define TB_INT_TAG 0x80 // +/- integer
|
25
|
+
#define TB_REF_TAG 0x60 // deduped string
|
26
|
+
#define TB_STR_TAG 0x40 // string
|
27
|
+
#define TB_DBL_TAG 0x20 // double value
|
28
|
+
#define TB_PFP_TAG 0x20 // + compressed double
|
29
|
+
#define TB_NFP_TAG 0x30 // - compressed double
|
30
|
+
#define TB_NAN_TAG 0x2D // NaN
|
31
|
+
#define TB_INF_TAG 0x3D // Infinity
|
32
|
+
#define TB_NNF_TAG 0x2E // -Infinity
|
33
|
+
#define TB_F16_TAG 0x3E // f16
|
34
|
+
#define TB_F32_TAG 0x2F // float (32bit)
|
35
|
+
#define TB_F64_TAG 0x3F // double (64bit)
|
36
|
+
#define TB_MAP_TAG 0x10 // map { key: value}
|
37
|
+
#define TB_ARR_TAG 0x08 // array [element1, element2]
|
38
|
+
#define TB_DTM_TAG 0x07 // datetime
|
39
|
+
#define TB_NXT_TAG 0x06 // native extensions (multibyte tags)
|
40
|
+
#define TB_SEP_TAG 0x05 // separator (for group deduplication)
|
41
|
+
#define TB_EXT_TAG 0x04 // extension (user extentions)
|
42
|
+
#define TB_BLB_TAG 0x03 // blob
|
43
|
+
#define TB_NIL_TAG 0x02 // NULL
|
44
|
+
#define TB_TRU_TAG 0x01 // TRUE
|
45
|
+
#define TB_FLS_TAG 0x00 // FALSE
|
46
|
+
|
47
|
+
// Length values (for string, map & array)
|
48
|
+
#define TB_STR_LEN 0x1F // max embedded string length
|
49
|
+
#define TB_REF_LEN 0x1F // max embedded reference id
|
50
|
+
#define TB_MAP_LEN 0x0F // max embedded map length
|
51
|
+
#define TB_ARR_LEN 0x07 // max embedded array length
|
52
|
+
|
53
|
+
// native extensions TR_NXT_TAG
|
48
54
|
|
49
55
|
// Feature flags (from encoder)
|
50
56
|
#define TB_FEATURE_STRING_DEDUPE 0x01
|
@@ -358,6 +364,13 @@ static inline unsigned char *tiny_bits_packer_ensure_capacity(tiny_bits_packer *
|
|
358
364
|
return encoder->buffer + encoder->current_pos;
|
359
365
|
}
|
360
366
|
|
367
|
+
/**
|
368
|
+
* @brief allocates and initializes a new packer
|
369
|
+
*
|
370
|
+
* @return pointer to new packer instance
|
371
|
+
*
|
372
|
+
* @note the returned packer object must be freed using tiny_bits_packer_destroy()
|
373
|
+
*/
|
361
374
|
tiny_bits_packer *tiny_bits_packer_create(size_t initial_capacity, uint8_t features) {
|
362
375
|
tiny_bits_packer *encoder = (tiny_bits_packer *)malloc(sizeof(tiny_bits_packer));
|
363
376
|
if (!encoder) return NULL;
|
@@ -393,6 +406,13 @@ tiny_bits_packer *tiny_bits_packer_create(size_t initial_capacity, uint8_t featu
|
|
393
406
|
return encoder;
|
394
407
|
}
|
395
408
|
|
409
|
+
/**
|
410
|
+
* @brief Resets internal data structure of the packer object
|
411
|
+
*
|
412
|
+
* @param encoder The packer instance
|
413
|
+
*
|
414
|
+
* @note This function allows for more efficient packing by reusing the same packer object
|
415
|
+
*/
|
396
416
|
inline void tiny_bits_packer_reset(tiny_bits_packer *encoder) {
|
397
417
|
if (!encoder) return;
|
398
418
|
encoder->current_pos = 0;
|
@@ -404,6 +424,12 @@ inline void tiny_bits_packer_reset(tiny_bits_packer *encoder) {
|
|
404
424
|
|
405
425
|
}
|
406
426
|
|
427
|
+
/**
|
428
|
+
* @brief Deallocate the packer object and its internal data structures
|
429
|
+
*
|
430
|
+
* @param encoder The unpacker instance
|
431
|
+
*
|
432
|
+
*/
|
407
433
|
void tiny_bits_packer_destroy(tiny_bits_packer *encoder) {
|
408
434
|
if (!encoder) return;
|
409
435
|
|
@@ -414,6 +440,15 @@ void tiny_bits_packer_destroy(tiny_bits_packer *encoder) {
|
|
414
440
|
free(encoder);
|
415
441
|
}
|
416
442
|
|
443
|
+
/**
|
444
|
+
* @brief Packs an array header into the buffer
|
445
|
+
*
|
446
|
+
* @param encoder Pointer to the packer instance
|
447
|
+
* @param arr_len Number of elements in the array
|
448
|
+
* @return Number of bytes written, or 0 on error
|
449
|
+
*
|
450
|
+
* @note This function only writes the array header, not the elements themselves
|
451
|
+
*/
|
417
452
|
static inline int pack_arr(tiny_bits_packer *encoder, int arr_len){
|
418
453
|
int written = 0;
|
419
454
|
int needed_size;
|
@@ -439,6 +474,15 @@ static inline int pack_arr(tiny_bits_packer *encoder, int arr_len){
|
|
439
474
|
return written;
|
440
475
|
}
|
441
476
|
|
477
|
+
/**
|
478
|
+
* @brief Packs a map header into the buffer
|
479
|
+
*
|
480
|
+
* @param encoder Pointer to the packer instance
|
481
|
+
* @param map_len Number of key-value pairs in the map
|
482
|
+
* @return Number of bytes written, or 0 on error
|
483
|
+
*
|
484
|
+
* @note This function only writes the map header, not the key-value pairs themselves
|
485
|
+
*/
|
442
486
|
static inline int pack_map(tiny_bits_packer *encoder, int map_len){
|
443
487
|
int written = 0;
|
444
488
|
int needed_size;
|
@@ -464,6 +508,15 @@ static inline int pack_map(tiny_bits_packer *encoder, int map_len){
|
|
464
508
|
return written;
|
465
509
|
}
|
466
510
|
|
511
|
+
/**
|
512
|
+
* @brief Packs an integer value into the buffer
|
513
|
+
*
|
514
|
+
* @param encoder Pointer to the packer instance
|
515
|
+
* @param value The integer value to pack
|
516
|
+
* @return Number of bytes written, or 0 on error
|
517
|
+
*
|
518
|
+
* @note Uses a compact representation for small values and SQLite4 like integer encoding for larger values
|
519
|
+
*/
|
467
520
|
static inline int pack_int(tiny_bits_packer *encoder, int64_t value){
|
468
521
|
int written = 0;
|
469
522
|
int needed_size = 10;
|
@@ -494,66 +547,95 @@ static inline int pack_int(tiny_bits_packer *encoder, int64_t value){
|
|
494
547
|
return written;
|
495
548
|
}
|
496
549
|
|
497
|
-
static inline int
|
498
|
-
|
499
|
-
uint8_t *buffer = tiny_bits_packer_ensure_capacity(encoder, needed_size);
|
550
|
+
static inline int _pack_tag_only(tiny_bits_packer *encoder, uint8_t tag){
|
551
|
+
uint8_t *buffer = tiny_bits_packer_ensure_capacity(encoder, 1);
|
500
552
|
if (!buffer) return 0; // Handle error
|
501
|
-
|
502
|
-
buffer[0] = (uint8_t)TB_NIL_TAG;
|
553
|
+
buffer[0] = tag;
|
503
554
|
encoder->current_pos += 1;
|
504
555
|
return 1;
|
556
|
+
|
505
557
|
}
|
506
558
|
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
559
|
+
/**
|
560
|
+
* @brief Packs a separator tag into the buffer
|
561
|
+
*
|
562
|
+
* @param encoder Pointer to the packer instance
|
563
|
+
* @return Number of bytes written, or 0 on error
|
564
|
+
*/
|
565
|
+
static inline int pack_separator(tiny_bits_packer *encoder){
|
566
|
+
return _pack_tag_only(encoder, (uint8_t)TB_SEP_TAG);
|
567
|
+
}
|
511
568
|
|
512
|
-
|
513
|
-
|
514
|
-
|
569
|
+
/**
|
570
|
+
* @brief Packs a NULL value into the buffer
|
571
|
+
*
|
572
|
+
* @param encoder Pointer to the packer instance
|
573
|
+
* @return Number of bytes written, or 0 on error
|
574
|
+
*/
|
575
|
+
static inline int pack_null(tiny_bits_packer *encoder){
|
576
|
+
return _pack_tag_only(encoder, (uint8_t)TB_NIL_TAG);
|
515
577
|
}
|
516
578
|
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
|
579
|
+
/**
|
580
|
+
* @brief Packs a TRUE boolean value into the buffer
|
581
|
+
*
|
582
|
+
* @param encoder Pointer to the packer instance
|
583
|
+
* @return Number of bytes written, or 0 on error
|
584
|
+
*/
|
585
|
+
static inline int pack_true(tiny_bits_packer *encoder){
|
586
|
+
return _pack_tag_only(encoder, (uint8_t)TB_TRU_TAG);
|
587
|
+
}
|
521
588
|
|
522
|
-
|
523
|
-
|
524
|
-
|
589
|
+
/**
|
590
|
+
* @brief Packs a FALSE boolean value into the buffer
|
591
|
+
*
|
592
|
+
* @param encoder Pointer to the packer instance
|
593
|
+
* @return Number of bytes written, or 0 on error
|
594
|
+
*/
|
595
|
+
static inline int pack_false(tiny_bits_packer *encoder){
|
596
|
+
return _pack_tag_only(encoder, (uint8_t)TB_FLS_TAG);
|
525
597
|
}
|
526
598
|
|
599
|
+
/**
|
600
|
+
* @brief Packs a NaN (Not a Number) value into the buffer
|
601
|
+
*
|
602
|
+
* @param encoder Pointer to the packer instance
|
603
|
+
* @return Number of bytes written, or 0 on error
|
604
|
+
*/
|
527
605
|
static inline int pack_nan(tiny_bits_packer *encoder){
|
528
|
-
|
529
|
-
uint8_t *buffer = tiny_bits_packer_ensure_capacity(encoder, needed_size);
|
530
|
-
if (!buffer) return 0; // Handle error
|
531
|
-
|
532
|
-
buffer[0] = (uint8_t)TB_NAN_TAG;
|
533
|
-
encoder->current_pos += 1;
|
534
|
-
return 1;
|
606
|
+
return _pack_tag_only(encoder, (uint8_t)TB_NAN_TAG);
|
535
607
|
}
|
536
608
|
|
609
|
+
/**
|
610
|
+
* @brief Packs a positive infinity value into the buffer
|
611
|
+
*
|
612
|
+
* @param encoder Pointer to the packer instance
|
613
|
+
* @return Number of bytes written, or 0 on error
|
614
|
+
*/
|
537
615
|
static inline int pack_infinity(tiny_bits_packer *encoder){
|
538
|
-
|
539
|
-
uint8_t *buffer = tiny_bits_packer_ensure_capacity(encoder, needed_size);
|
540
|
-
if (!buffer) return 0; // Handle error
|
541
|
-
|
542
|
-
buffer[0] = (uint8_t)TB_INF_TAG;
|
543
|
-
encoder->current_pos += 1;
|
544
|
-
return 1;
|
616
|
+
return _pack_tag_only(encoder, (uint8_t)TB_INF_TAG);
|
545
617
|
}
|
546
618
|
|
619
|
+
/**
|
620
|
+
* @brief Packs a negative infinity value into the buffer
|
621
|
+
*
|
622
|
+
* @param encoder Pointer to the packer instance
|
623
|
+
* @return Number of bytes written, or 0 on error
|
624
|
+
*/
|
547
625
|
static inline int pack_negative_infinity(tiny_bits_packer *encoder){
|
548
|
-
|
549
|
-
uint8_t *buffer = tiny_bits_packer_ensure_capacity(encoder, needed_size);
|
550
|
-
if (!buffer) return 0; // Handle error
|
551
|
-
|
552
|
-
buffer[0] = (uint8_t)TB_NNF_TAG;
|
553
|
-
encoder->current_pos += 1;
|
554
|
-
return 1;
|
626
|
+
return _pack_tag_only(encoder, (uint8_t)TB_NNF_TAG);
|
555
627
|
}
|
556
628
|
|
629
|
+
/**
|
630
|
+
* @brief Packs a string into the buffer
|
631
|
+
*
|
632
|
+
* @param encoder Pointer to the packer instance
|
633
|
+
* @param str Pointer to the string data
|
634
|
+
* @param str_len Length of the string in bytes
|
635
|
+
* @return Number of bytes written, or 0 on error
|
636
|
+
*
|
637
|
+
* @note If string deduplication is enabled, this may store a reference to a previously stored string
|
638
|
+
*/
|
557
639
|
static inline int pack_str(tiny_bits_packer *encoder, char* str, uint32_t str_len) {
|
558
640
|
uint32_t id = 0;
|
559
641
|
int found = 0;
|
@@ -632,10 +714,29 @@ static inline int pack_str(tiny_bits_packer *encoder, char* str, uint32_t str_le
|
|
632
714
|
return written;
|
633
715
|
}
|
634
716
|
|
717
|
+
/**
|
718
|
+
* @brief Packs a double-precision floating point value into the buffer
|
719
|
+
*
|
720
|
+
* @param encoder Pointer to the packer instance
|
721
|
+
* @param val The double value to pack
|
722
|
+
* @return Number of bytes written, or 0 on error
|
723
|
+
*
|
724
|
+
* @note If TB_FEATURE_COMPRESS_FLOATS is enabled, this will use a more compact representation for some values
|
725
|
+
*/
|
635
726
|
static inline int pack_double(tiny_bits_packer *encoder, double val) {
|
636
727
|
int written = 0;
|
637
728
|
uint8_t *buffer = tiny_bits_packer_ensure_capacity(encoder, 10);
|
638
729
|
if (!buffer) return 0;
|
730
|
+
if(isnan(val)){
|
731
|
+
return pack_nan(encoder);
|
732
|
+
}
|
733
|
+
if(isinf(val)){
|
734
|
+
if(val > 0){
|
735
|
+
return pack_infinity(encoder);
|
736
|
+
} else {
|
737
|
+
return pack_negative_infinity(encoder);
|
738
|
+
}
|
739
|
+
}
|
639
740
|
// scaled varint encoding
|
640
741
|
if (encoder->features & TB_FEATURE_COMPRESS_FLOATS) {
|
641
742
|
double abs_val = fabs(val); ///val >= 0 ? val : -val;
|
@@ -667,6 +768,37 @@ static inline int pack_double(tiny_bits_packer *encoder, double val) {
|
|
667
768
|
return written;
|
668
769
|
}
|
669
770
|
|
771
|
+
/**
|
772
|
+
* @brief Packs a unixtime double-precision floating point value, along with a time zone offset into the buffer
|
773
|
+
*
|
774
|
+
* @param encoder Pointer to the packer instance
|
775
|
+
* @param val The unixtime double value to pack
|
776
|
+
* @param offset The timezone offset (as a +/- seconds)
|
777
|
+
* @return Number of bytes written, or 0 on error
|
778
|
+
*
|
779
|
+
*/
|
780
|
+
static inline int pack_datetime(tiny_bits_packer *encoder, double val, int16_t offset) {
|
781
|
+
int written = 0;
|
782
|
+
uint8_t *buffer = tiny_bits_packer_ensure_capacity(encoder, 11);
|
783
|
+
if (!buffer) return 0;
|
784
|
+
buffer[0] = TB_DTM_TAG;
|
785
|
+
buffer[1] = (int8_t) ((offset % 86400) / (60*15)); // convert seconds to multiples of 15 minutes
|
786
|
+
written += 2;
|
787
|
+
encode_uint64(dtoi_bits(val), buffer + written);
|
788
|
+
written += 8;
|
789
|
+
encoder->current_pos += written;
|
790
|
+
//written += pack_double(encoder, val);
|
791
|
+
return written;
|
792
|
+
}
|
793
|
+
|
794
|
+
/**
|
795
|
+
* @brief Packs a binary blob (byte array) into the buffer
|
796
|
+
*
|
797
|
+
* @param encoder Pointer to the packer instance
|
798
|
+
* @param blob Pointer to the binary data
|
799
|
+
* @param blob_size Size of the binary data in bytes
|
800
|
+
* @return Number of bytes written, or 0 on error
|
801
|
+
*/
|
670
802
|
static inline int pack_blob(tiny_bits_packer *encoder, const char* blob, int blob_size){
|
671
803
|
int written = 0;
|
672
804
|
int needed_size;
|
@@ -697,19 +829,22 @@ enum tiny_bits_type {
|
|
697
829
|
TINY_BITS_MAP, // length: number of key-value pairs
|
698
830
|
TINY_BITS_INT, // int_val: integer value
|
699
831
|
TINY_BITS_DOUBLE, // double_val: double value
|
700
|
-
TINY_BITS_STR, // length: byte length of string
|
701
|
-
TINY_BITS_BLOB, // length: byte length of blob
|
832
|
+
TINY_BITS_STR, // str_blob_val.length: byte length of string, str_blob_val.data: pointer to string
|
833
|
+
TINY_BITS_BLOB, // str_blob_val.length: byte length of blob, str_blob_val.data: pointer to blob
|
702
834
|
TINY_BITS_TRUE, // No value
|
703
835
|
TINY_BITS_FALSE, // No value
|
704
836
|
TINY_BITS_NULL, // No value
|
705
|
-
TINY_BITS_NAN,
|
706
|
-
TINY_BITS_INF,
|
707
|
-
TINY_BITS_N_INF,
|
708
|
-
TINY_BITS_EXT,
|
837
|
+
TINY_BITS_NAN, // No value
|
838
|
+
TINY_BITS_INF, // No value
|
839
|
+
TINY_BITS_N_INF, // No value
|
840
|
+
TINY_BITS_EXT, // No value
|
841
|
+
TINY_BITS_SEP, // No balue
|
709
842
|
TINY_BITS_FINISHED, // End of buffer
|
710
|
-
TINY_BITS_ERROR // Parsing error
|
843
|
+
TINY_BITS_ERROR, // Parsing error
|
844
|
+
TINY_BITS_DATETIME // double_val: double value
|
711
845
|
};
|
712
846
|
|
847
|
+
// value union
|
713
848
|
typedef union tiny_bits_value {
|
714
849
|
int64_t int_val; // TINY_BITS_INT
|
715
850
|
double double_val; // TINY_BITS_DOUBLE
|
@@ -719,8 +854,13 @@ typedef union tiny_bits_value {
|
|
719
854
|
size_t length;
|
720
855
|
int32_t id;
|
721
856
|
} str_blob_val;
|
857
|
+
struct { // TINY_BITS_STR, TINY_BITS_BLOB
|
858
|
+
double unixtime;
|
859
|
+
size_t offset;
|
860
|
+
} datetime_val;
|
722
861
|
} tiny_bits_value;
|
723
862
|
|
863
|
+
// The unpacker data structure
|
724
864
|
typedef struct tiny_bits_unpacker {
|
725
865
|
const unsigned char *buffer; // Input buffer (read-only)
|
726
866
|
size_t size; // Total size of buffer
|
@@ -734,6 +874,13 @@ typedef struct tiny_bits_unpacker {
|
|
734
874
|
HashTable dictionary;
|
735
875
|
} tiny_bits_unpacker;
|
736
876
|
|
877
|
+
/**
|
878
|
+
* @brief allocates and initializes a new unpacker
|
879
|
+
*
|
880
|
+
* @return pointer to new unpacker instance
|
881
|
+
*
|
882
|
+
* @note the returned unpacker object must be freed using tiny_bits_unpacker_destroy()
|
883
|
+
*/
|
737
884
|
tiny_bits_unpacker *tiny_bits_unpacker_create(void) {
|
738
885
|
|
739
886
|
tiny_bits_unpacker *decoder = (tiny_bits_unpacker *)malloc(sizeof(tiny_bits_unpacker));
|
@@ -749,7 +896,18 @@ tiny_bits_unpacker *tiny_bits_unpacker_create(void) {
|
|
749
896
|
return decoder;
|
750
897
|
}
|
751
898
|
|
752
|
-
|
899
|
+
/**
|
900
|
+
* @breif Provides a buffer to the unpacker for unpacking
|
901
|
+
*
|
902
|
+
* @param decoder The unpakcer instance
|
903
|
+
*
|
904
|
+
* @param buffer A pointer to the buffer
|
905
|
+
*
|
906
|
+
* @param size Size of the region to be unpacked
|
907
|
+
*
|
908
|
+
* @note This function implicitly resets the unpacker object so no need to call tiny_bits_unpacker_reset()
|
909
|
+
*/
|
910
|
+
static inline void tiny_bits_unpacker_set_buffer(tiny_bits_unpacker *decoder, const unsigned char *buffer, size_t size) {
|
753
911
|
if (!decoder) return;
|
754
912
|
if (!buffer || size < 1) return;
|
755
913
|
decoder->buffer = buffer;
|
@@ -758,12 +916,26 @@ void tiny_bits_unpacker_set_buffer(tiny_bits_unpacker *decoder, const unsigned c
|
|
758
916
|
decoder->strings_count = 0;
|
759
917
|
}
|
760
918
|
|
919
|
+
/**
|
920
|
+
* @brief Resets internal data structure of the unpacker object
|
921
|
+
*
|
922
|
+
* @param decoder The unpacker instance
|
923
|
+
*
|
924
|
+
* @note This function is useful if you want to operate on the same buffer again for some reason
|
925
|
+
*/
|
761
926
|
static inline void tiny_bits_unpacker_reset(tiny_bits_unpacker *decoder) {
|
762
927
|
if (!decoder) return;
|
763
928
|
decoder->current_pos = 0;
|
764
929
|
decoder->strings_count = 0;
|
765
930
|
}
|
766
931
|
|
932
|
+
|
933
|
+
/**
|
934
|
+
* @brief Deallocate the unpacker object and its internal data structures
|
935
|
+
*
|
936
|
+
* @param decoder The unpacker instance
|
937
|
+
*
|
938
|
+
*/
|
767
939
|
void tiny_bits_unpacker_destroy(tiny_bits_unpacker *decoder) {
|
768
940
|
if (!decoder) return;
|
769
941
|
if (decoder->strings) {
|
@@ -833,6 +1005,19 @@ static inline enum tiny_bits_type _unpack_double(tiny_bits_unpacker *decoder, ui
|
|
833
1005
|
return TINY_BITS_DOUBLE;
|
834
1006
|
}
|
835
1007
|
|
1008
|
+
static inline enum tiny_bits_type _unpack_datetime(tiny_bits_unpacker *decoder, uint8_t tag, tiny_bits_value *value){
|
1009
|
+
size_t pos = decoder->current_pos;
|
1010
|
+
value->datetime_val.offset = decoder->buffer[pos] * (60*15); // convert offset back to seconds (from multiples of 15 minutes)
|
1011
|
+
//uint8_t dbl_tag = decoder->buffer[decoder->current_pos++];
|
1012
|
+
//tiny_bits_value dbl_val;
|
1013
|
+
//_unpack_double(decoder, dbl_tag, &dbl_val);
|
1014
|
+
//value->datetime_val.unixtime = dbl_val.double_val;
|
1015
|
+
uint64_t unixtime = decode_uint64(decoder->buffer + pos + 1);
|
1016
|
+
value->datetime_val.unixtime = itod_bits(unixtime);
|
1017
|
+
decoder->current_pos += 9;
|
1018
|
+
return TINY_BITS_DATETIME;
|
1019
|
+
}
|
1020
|
+
|
836
1021
|
static inline enum tiny_bits_type _unpack_blob(tiny_bits_unpacker *decoder, uint8_t tag, tiny_bits_value *value){
|
837
1022
|
size_t pos = decoder->current_pos;
|
838
1023
|
size_t len = decode_varint(decoder->buffer, decoder->size, &pos);
|
@@ -884,13 +1069,51 @@ static inline enum tiny_bits_type _unpack_str(tiny_bits_unpacker *decoder, uint8
|
|
884
1069
|
return TINY_BITS_STR;
|
885
1070
|
}
|
886
1071
|
|
1072
|
+
/**
|
1073
|
+
* @brief Unpacks a value and returns its type while setting its value
|
1074
|
+
*
|
1075
|
+
* @param decoder The unpacker instance
|
1076
|
+
* @param[out] value A supplied tiny_bits_value instance
|
1077
|
+
*
|
1078
|
+
* @return enum tiny_bits_type
|
1079
|
+
*
|
1080
|
+
* This is the entry point to unpacking tinybits structures. You keep calling
|
1081
|
+
* this method repeatedly until it returns TINY_BITS_FINISHED when it reaches end of buffer
|
1082
|
+
* or if it returns TINY_BITS_ERROR if it stumbles on a malformed or unknown structure.
|
1083
|
+
*
|
1084
|
+
* TINY_BITS_SEP means the current object was fully unpacked, and that there is potentially another one
|
1085
|
+
* this is specifically for stream unpacking multiple objects one after the other as they are being recieved
|
1086
|
+
*
|
1087
|
+
* The location of the value you need in the value union will depend on the returned type as follows
|
1088
|
+
*
|
1089
|
+
* TINY_BITS_TRUE, TINY_BITS_FALSE, TINY_BITS_NULL, TINY_BITS_NAN, TINY_BITS_INF & TINY_BITS_N_INF all
|
1090
|
+
* don't set the value, the type itself is sufficient information for client code to reconstruct the value.
|
1091
|
+
*
|
1092
|
+
* TINY_BITS_INT sets value.int_val
|
1093
|
+
*
|
1094
|
+
* TINY_BITS_DOUBLE sets value.double_val
|
1095
|
+
*
|
1096
|
+
* TINY_BITS_ARRAY and TINY_BITS_MAP both set value.length, for TINY_BITS_ARRAY it means the number of entries,
|
1097
|
+
* for TINY_BITS_MAP it means the number of key/value pairs. You have to keep calling unpac_value() afterwards to
|
1098
|
+
* get all the members of the stored array/map. Please note that tinybits doesn't do size checks on the elements supplied
|
1099
|
+
* during packing of arrays/maps. It is the responsibility of client code to ensure a 3 element array actually packs 3 elements.
|
1100
|
+
*
|
1101
|
+
* TINY_BITS_STR & TINY_BITS_BLOB both set the value.str_blob_val struct, which has two members, data, a pointer to the string/blob in the buffer and
|
1102
|
+
* length. Since some returned strings might be deduplicated, they will return the same data pointer and length value for their other instances, there is also an id
|
1103
|
+
* value in the struct, which will be only set for strings. You can use to quickly determine the state of the strings as follows
|
1104
|
+
*
|
1105
|
+
* A positive value means the string is a duplicate of a previous string, speficially a duplicate of the (id-1)th unpacked, deduplicatable string
|
1106
|
+
*
|
1107
|
+
* A negative value means the sting is not a duplicate but is deduplicatable
|
1108
|
+
*
|
1109
|
+
* A zero value means the string is not deduplicatable and no duplicates should be expected (this is a heuristic, as duplicates may still exist)
|
1110
|
+
*/
|
887
1111
|
static inline enum tiny_bits_type unpack_value(tiny_bits_unpacker *decoder, tiny_bits_value *value) {
|
888
1112
|
if (!decoder || !value || decoder->current_pos >= decoder->size) {
|
889
1113
|
return (decoder && decoder->current_pos >= decoder->size) ? TINY_BITS_FINISHED : TINY_BITS_ERROR;
|
890
1114
|
}
|
891
1115
|
|
892
1116
|
uint8_t tag = decoder->buffer[decoder->current_pos++];
|
893
|
-
//printf("found tag %X\n", tag);
|
894
1117
|
// Dispatch based on tag
|
895
1118
|
if ((tag & TB_INT_TAG) == TB_INT_TAG) { // Integers
|
896
1119
|
return _unpack_int(decoder, tag, value);
|
@@ -912,12 +1135,17 @@ static inline enum tiny_bits_type unpack_value(tiny_bits_unpacker *decoder, tiny
|
|
912
1135
|
return _unpack_arr(decoder, tag, value);
|
913
1136
|
} else if (tag == TB_BLB_TAG) { // Blob
|
914
1137
|
return _unpack_blob(decoder, tag, value);
|
1138
|
+
} else if (tag == TB_DTM_TAG) {
|
1139
|
+
return _unpack_datetime(decoder, tag, value);
|
1140
|
+
} else if (tag == TB_SEP_TAG) {
|
1141
|
+
return TINY_BITS_SEP;
|
1142
|
+
} else if (tag == TB_EXT_TAG) {
|
1143
|
+
return TINY_BITS_EXT;
|
915
1144
|
} else if (tag == TB_TRU_TAG) {
|
916
1145
|
return TINY_BITS_TRUE;
|
917
1146
|
} else if (tag == TB_FLS_TAG) {
|
918
1147
|
return TINY_BITS_FALSE;
|
919
1148
|
}
|
920
|
-
//printf("UNKOWN TAG\n");
|
921
1149
|
return TINY_BITS_ERROR; // Unknown tag
|
922
1150
|
}
|
923
1151
|
|
data/ext/tinybits/tinybits_ext.c
CHANGED
@@ -127,6 +127,11 @@ static inline int pack_ruby_object_recursive(tiny_bits_packer* packer, VALUE obj
|
|
127
127
|
return pack_str(packer, RSTRING_PTR(str), RSTRING_LEN(str));
|
128
128
|
}
|
129
129
|
default:
|
130
|
+
if(rb_obj_is_kind_of(obj, rb_cTime)){
|
131
|
+
double unixtime = NUM2DBL(rb_funcall(obj, rb_intern("to_f"), 0));
|
132
|
+
return pack_datetime(packer, unixtime, FIX2INT(rb_time_utc_offset(obj)));
|
133
|
+
}
|
134
|
+
//printf("Unsupported type encountered during packing: %s", rb_obj_classname(obj));
|
130
135
|
rb_warn("Unsupported type encountered during packing: %s", rb_obj_classname(obj));
|
131
136
|
return 0;
|
132
137
|
}
|
@@ -160,6 +165,65 @@ static VALUE rb_pack(VALUE self, VALUE obj) {
|
|
160
165
|
return result;
|
161
166
|
}
|
162
167
|
|
168
|
+
static VALUE rb_push(VALUE self, VALUE obj) {
|
169
|
+
PackerData* packer_data;
|
170
|
+
TypedData_Get_Struct(self, PackerData, &packer_data_type, packer_data);
|
171
|
+
|
172
|
+
if (!packer_data->packer) {
|
173
|
+
rb_raise(rb_eRuntimeError, "Packer not initialized");
|
174
|
+
}
|
175
|
+
|
176
|
+
size_t initial_pos = packer_data->packer->current_pos;
|
177
|
+
|
178
|
+
HashIterContext context;
|
179
|
+
context.packer = packer_data->packer; // Pass the current packer
|
180
|
+
context.error_occurred = 0; // Initialize error flag
|
181
|
+
|
182
|
+
|
183
|
+
if(initial_pos > 0){
|
184
|
+
if(!pack_separator(packer_data->packer)){
|
185
|
+
rb_raise(rb_eRuntimeError, "Failed to pack object (multi-object packing error)");
|
186
|
+
}
|
187
|
+
}
|
188
|
+
|
189
|
+
// Call the optimized recursive function
|
190
|
+
if (!pack_ruby_object_recursive(packer_data->packer, obj, (VALUE)&context)) {
|
191
|
+
// Error occurred during packing (might be unsupported type or tiny_bits error)
|
192
|
+
rb_raise(rb_eRuntimeError, "Failed to pack object (unsupported type or packing error)");
|
193
|
+
}
|
194
|
+
|
195
|
+
return INT2FIX(packer_data->packer->current_pos - initial_pos);
|
196
|
+
}
|
197
|
+
|
198
|
+
static VALUE rb_to_s(VALUE self){
|
199
|
+
PackerData* packer_data;
|
200
|
+
TypedData_Get_Struct(self, PackerData, &packer_data_type, packer_data);
|
201
|
+
|
202
|
+
if (!packer_data->packer) {
|
203
|
+
rb_raise(rb_eRuntimeError, "Packer not initialized");
|
204
|
+
}
|
205
|
+
|
206
|
+
if(packer_data->packer->current_pos == 0){
|
207
|
+
return rb_str_new("", 0);
|
208
|
+
}
|
209
|
+
VALUE result = rb_str_new((const char*)packer_data->packer->buffer, packer_data->packer->current_pos);
|
210
|
+
rb_obj_freeze(result);
|
211
|
+
return result;
|
212
|
+
}
|
213
|
+
|
214
|
+
static VALUE rb_reset(VALUE self){
|
215
|
+
PackerData* packer_data;
|
216
|
+
TypedData_Get_Struct(self, PackerData, &packer_data_type, packer_data);
|
217
|
+
|
218
|
+
|
219
|
+
if (!packer_data->packer) {
|
220
|
+
rb_raise(rb_eRuntimeError, "Packer not initialized");
|
221
|
+
}
|
222
|
+
|
223
|
+
// Reset before packing (assuming this is efficient)
|
224
|
+
tiny_bits_packer_reset(packer_data->packer);
|
225
|
+
return self;
|
226
|
+
}
|
163
227
|
|
164
228
|
// Unpacker structure
|
165
229
|
typedef struct {
|
@@ -271,10 +335,13 @@ static VALUE unpack_ruby_object(UnpackerData* unpacker_data, size_t interned) {
|
|
271
335
|
return hash;
|
272
336
|
}
|
273
337
|
case TINY_BITS_BLOB:
|
274
|
-
// For
|
338
|
+
// For smplicity, treat blobs as strings (similar to strings)
|
275
339
|
VALUE blob = rb_str_new(value.str_blob_val.data, value.str_blob_val.length);
|
276
340
|
rb_obj_freeze(blob);
|
277
341
|
return blob;
|
342
|
+
case TINY_BITS_DATETIME:
|
343
|
+
VALUE time = rb_time_num_new(DBL2NUM(value.datetime_val.unixtime), INT2FIX(((int16_t)value.datetime_val.offset)));
|
344
|
+
return time;
|
278
345
|
default:
|
279
346
|
return Qundef; // Error
|
280
347
|
}
|
@@ -300,6 +367,81 @@ static VALUE rb_unpack(VALUE self, VALUE buffer) {
|
|
300
367
|
return result;
|
301
368
|
}
|
302
369
|
|
370
|
+
static VALUE rb_set_buffer(VALUE self, VALUE buffer){
|
371
|
+
UnpackerData* unpacker_data;
|
372
|
+
TypedData_Get_Struct(self, UnpackerData, &unpacker_data_type, unpacker_data);
|
373
|
+
|
374
|
+
if (!unpacker_data->unpacker) {
|
375
|
+
rb_raise(rb_eRuntimeError, "Unpacker not initialized");
|
376
|
+
}
|
377
|
+
|
378
|
+
StringValue(buffer); // Ensure it's a string
|
379
|
+
|
380
|
+
tiny_bits_unpacker_set_buffer(unpacker_data->unpacker, (const unsigned char*)RSTRING_PTR(buffer), RSTRING_LEN(buffer));
|
381
|
+
|
382
|
+
// set the buffer as an instance variable to mainatin a reference to it
|
383
|
+
rb_iv_set(self, "@buffer", buffer);
|
384
|
+
|
385
|
+
return self;
|
386
|
+
}
|
387
|
+
|
388
|
+
static VALUE rb_pop(VALUE self) {
|
389
|
+
|
390
|
+
VALUE buffer = rb_iv_get(self, "@buffer");
|
391
|
+
if(buffer == Qnil){
|
392
|
+
rb_raise(rb_eRuntimeError, "No buffer is set");
|
393
|
+
}
|
394
|
+
|
395
|
+
UnpackerData* unpacker_data;
|
396
|
+
TypedData_Get_Struct(self, UnpackerData, &unpacker_data_type, unpacker_data);
|
397
|
+
|
398
|
+
if (!unpacker_data->unpacker) {
|
399
|
+
rb_raise(rb_eRuntimeError, "Unpacker not initialized");
|
400
|
+
}
|
401
|
+
|
402
|
+
tiny_bits_unpacker* unpacker = unpacker_data->unpacker;
|
403
|
+
tiny_bits_value value;
|
404
|
+
|
405
|
+
if(unpacker->current_pos >= unpacker->size - 1){
|
406
|
+
return Qnil;
|
407
|
+
}
|
408
|
+
|
409
|
+
if(unpacker->current_pos > 0){
|
410
|
+
enum tiny_bits_type type = unpack_value(unpacker, &value);
|
411
|
+
if(type != TINY_BITS_SEP){
|
412
|
+
rb_raise(rb_eRuntimeError, "Malformed multi-object buffer");
|
413
|
+
}
|
414
|
+
}
|
415
|
+
|
416
|
+
VALUE result = unpack_ruby_object(unpacker_data, 0);
|
417
|
+
if (result == Qundef) {
|
418
|
+
rb_raise(rb_eRuntimeError, "Failed to unpack data");
|
419
|
+
}
|
420
|
+
|
421
|
+
return result;
|
422
|
+
}
|
423
|
+
|
424
|
+
static VALUE rb_finished(VALUE self){
|
425
|
+
VALUE buffer = rb_iv_get(self, "@buffer");
|
426
|
+
if(buffer == Qnil){
|
427
|
+
rb_raise(rb_eRuntimeError, "No buffer is set");
|
428
|
+
}
|
429
|
+
UnpackerData* unpacker_data;
|
430
|
+
TypedData_Get_Struct(self, UnpackerData, &unpacker_data_type, unpacker_data);
|
431
|
+
|
432
|
+
if (!unpacker_data->unpacker) {
|
433
|
+
rb_raise(rb_eRuntimeError, "Unpacker not initialized");
|
434
|
+
}
|
435
|
+
|
436
|
+
tiny_bits_unpacker* unpacker = unpacker_data->unpacker;
|
437
|
+
|
438
|
+
if(unpacker->current_pos >= (unpacker->size - 1)){
|
439
|
+
return Qtrue;
|
440
|
+
}
|
441
|
+
|
442
|
+
return Qfalse;
|
443
|
+
}
|
444
|
+
|
303
445
|
void Init_tinybits_ext(void) {
|
304
446
|
rb_mTinyBits = rb_define_module("TinyBits");
|
305
447
|
rb_cPacker = rb_define_class_under(rb_mTinyBits, "Packer", rb_cObject);
|
@@ -308,8 +450,19 @@ void Init_tinybits_ext(void) {
|
|
308
450
|
rb_define_alloc_func(rb_cPacker, rb_packer_alloc);
|
309
451
|
rb_define_method(rb_cPacker, "initialize", rb_packer_init, 0);
|
310
452
|
rb_define_method(rb_cPacker, "pack", rb_pack, 1);
|
453
|
+
rb_define_alias(rb_cPacker, "encode", "pack");
|
454
|
+
rb_define_alias(rb_cPacker, "dump", "pack");
|
455
|
+
rb_define_method(rb_cPacker, "push", rb_push, 1);
|
456
|
+
rb_define_alias(rb_cPacker, "<<", "push");
|
457
|
+
rb_define_method(rb_cPacker, "to_s", rb_to_s, 0);
|
458
|
+
rb_define_method(rb_cPacker, "reset", rb_reset, 0);
|
311
459
|
|
312
460
|
rb_define_alloc_func(rb_cUnpacker, rb_unpacker_alloc);
|
313
461
|
rb_define_method(rb_cUnpacker, "initialize", rb_unpacker_init, 0);
|
314
462
|
rb_define_method(rb_cUnpacker, "unpack", rb_unpack, 1);
|
463
|
+
rb_define_alias(rb_cUnpacker, "load", "unpack");
|
464
|
+
rb_define_alias(rb_cUnpacker, "decode", "unpack");
|
465
|
+
rb_define_method(rb_cUnpacker, "buffer=", rb_set_buffer, 1);
|
466
|
+
rb_define_method(rb_cUnpacker, "pop", rb_pop, 0);
|
467
|
+
rb_define_method(rb_cUnpacker, "finished?", rb_finished, 0);
|
315
468
|
}
|
data/lib/tinybits/version.rb
CHANGED
data/lib/tinybits.rb
CHANGED
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tinybits
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mohamed Hassan
|
8
8
|
bindir: bin
|
9
9
|
cert_chain: []
|
10
|
-
date: 2025-
|
10
|
+
date: 2025-05-03 00:00:00.000000000 Z
|
11
11
|
dependencies: []
|
12
12
|
description: TinyBits is a Ruby gem that wraps the TinyBits C serializartion library,
|
13
13
|
offering Rubyists the power of serializion with intger/float compression and string
|
@@ -19,6 +19,7 @@ extensions:
|
|
19
19
|
extra_rdoc_files: []
|
20
20
|
files:
|
21
21
|
- ext/tinybits/extconf.rb
|
22
|
+
- ext/tinybits/test_date.rb
|
22
23
|
- ext/tinybits/tinybits.h
|
23
24
|
- ext/tinybits/tinybits_ext.c
|
24
25
|
- lib/tinybits.rb
|