tinybits 0.3.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 340b92ea6c9e9782aa1b63c08c226d23bae56d1d5780dc46fb79f9c0a508b71b
4
- data.tar.gz: 7fdeb074fe4a24e061834e1084702ee97827698eaa46f907b7b0a756b7ea05be
3
+ metadata.gz: 9ce29d814129fc925832887a9a4d83efd6dff20a614aab92e2dc80d10197d5aa
4
+ data.tar.gz: 137f8181f9d0f4c4e3d6d5abe3cb3c0014de07149e08a6e1f4d6c134cbc5c003
5
5
  SHA512:
6
- metadata.gz: 0c286652db3de7817fbea5241f10221f666cc298121348dbe5718f9221c6d9ef4beed593d478851367b877bc3c66bbf1dcc359b4fb5997221790353bf1bbbcb9
7
- data.tar.gz: 9b67f07cb2c1b656aa7dc843fd8441760473ba8047af09cc220508ed9e604889ab213b2f4c69855c4dbe9114190836b5603c898d0c3c4f8ce9b68b2dd77539c4
6
+ metadata.gz: ea777d8a1c1230512e9865e6cababb992f45d70b6c1c8d03d65567c82bda4a514ea1759341decf4c2b5da6934c23d75bc4f01ca7682a32136557382ba368d256
7
+ data.tar.gz: f16d288249b7cdb434e23d80823a5c5816a7b3be3989f0d0c0ce2513c7255c7f3e1df06b48f1a57681d4ccb3678328f8aa207fcdd5cbe2289cf7372f647aa52a
@@ -1,5 +1,6 @@
1
1
  require 'mkmf'
2
2
 
3
+ $CFLAGS << " -O3 -march=native"
3
4
 
4
5
  dir_config('tinybits_ext')
5
6
  create_makefile('tinybits_ext')
@@ -1,6 +1,6 @@
1
1
  /**
2
2
  * TinyBits Amalgamated Header
3
- * Generated on: Sun May 4 03:12:10 AM CEST 2025
3
+ * Generated on: Tue May 13 11:39:13 PM CEST 2025
4
4
  */
5
5
 
6
6
  #ifndef TINY_BITS_H
@@ -89,8 +89,11 @@ typedef struct HashTable {
89
89
 
90
90
  static inline uint32_t fast_hash_32(const char* str, uint16_t len) {
91
91
  uint32_t hash = len;
92
- hash = (hash << 16) | (((unsigned char)str[0] << 8) | (unsigned char)str[1]);
93
- hash ^= (((unsigned char)str[len-2] << 24) | ((unsigned char)str[len-1] << 16));
92
+ hash = (hash << 24) |
93
+ ((unsigned char)str[0] << 16) |
94
+ ((unsigned char)str[1] << 8 ) |
95
+ ((unsigned char)str[len-1]);
96
+ //hash ^= (((unsigned char)str[len-2] << 24) | ((unsigned char)str[len-1] << 16));
94
97
  return hash;
95
98
  }
96
99
 
@@ -173,46 +176,111 @@ static inline int varint_size(uint64_t value){
173
176
  return 9;
174
177
  }
175
178
 
176
- static inline uint64_t decode_varint(const uint8_t* buffer, size_t size, size_t *pos) {
179
+ static inline int8_t decode_varint(const uint8_t* buffer, size_t size, size_t pos, uint64_t *value) {
180
+ if(pos >= size) return 0;
181
+ uint8_t prefix = buffer[pos];
182
+ if (prefix <= 240) {
183
+ *value = prefix;
184
+ return 1;
185
+ } else if (prefix >= 241 && prefix <= 248) {
186
+ if (pos + 1 >= size) return 0; // Not enough bytes
187
+ *value = 240 + 256 * (prefix - 241) + buffer[pos+1];
188
+ return 2;
189
+ } else if (prefix == 249){
190
+ if (pos + 2 >= size) return 0; // Not enough bytes
191
+ *value = 2288 + 256 * buffer[pos+1] + buffer[pos+2];
192
+ return 3;
193
+ } else if (prefix == 250){
194
+ if (pos + 3 >= size) return 0; // Not enough bytes
195
+ *value = ((uint64_t)buffer[pos+1] << 16) | ((uint64_t)buffer[pos+2] << 8) | buffer[pos+3];
196
+ return 4;
197
+ } else if (prefix == 251){
198
+ if (pos + 4 >= size) return 0; // Not enough bytes
199
+ *value = ((uint64_t)buffer[pos+1] << 24) | ((uint64_t)buffer[pos+2] << 16) |
200
+ ((uint64_t)buffer[pos+3] << 8) | buffer[pos+4];
201
+ return 5;
202
+ } else if (prefix == 252){
203
+ if (pos + 5 >= size) return 0; // Not enough bytes
204
+ *value = ((uint64_t)buffer[pos+1] << 32) | ((uint64_t)buffer[pos+2] << 24) |
205
+ ((uint64_t)buffer[pos+3] << 16) | ((uint64_t)buffer[pos+4] << 8) | buffer[pos+5];
206
+ return 6;
207
+ } else if (prefix == 253){
208
+ if (pos + 6 >= size) return 0; // Not enough bytes
209
+ *value = ((uint64_t)buffer[pos+1] << 40) | ((uint64_t)buffer[pos+2] << 32) |
210
+ ((uint64_t)buffer[pos+3] << 24) | ((uint64_t)buffer[pos+4] << 16) |
211
+ ((uint64_t)buffer[pos+5] << 8) | buffer[pos+6];
212
+ return 7;
213
+ } else if (prefix == 254){
214
+ if (pos + 7 >= size) return 0; // Not enough bytes
215
+ *value = ((uint64_t)buffer[pos+1] << 48) | ((uint64_t)buffer[pos+2] << 40) |
216
+ ((uint64_t)buffer[pos+3] << 32) | ((uint64_t)buffer[pos+4] << 24) |
217
+ ((uint64_t)buffer[pos+5] << 16) | ((uint64_t)buffer[pos+6] << 8) | buffer[pos+7];
218
+ return 8;
219
+ } else if (prefix == 255){
220
+ if (pos + 8 >= size) return 0; // Not enough bytes
221
+ *value = ((uint64_t)buffer[pos+1] << 56) | ((uint64_t)buffer[pos+2] << 48) |
222
+ ((uint64_t)buffer[pos+3] << 40) | ((uint64_t)buffer[pos+4] << 32) |
223
+ ((uint64_t)buffer[pos+5] << 24) | ((uint64_t)buffer[pos+6] << 16) |
224
+ ((uint64_t)buffer[pos+7] << 8) | buffer[pos+8];
225
+ return 9;
226
+ } else {
227
+ return 0;
228
+ }
229
+
230
+ }
231
+
232
+ static inline uint64_t decode_varint_old(const uint8_t* buffer, size_t size, size_t *pos) {
233
+ if (*pos >= size) {
234
+ return 0; // not enough buffer
235
+ }
236
+
177
237
  uint8_t prefix = buffer[*pos];
178
238
  if (prefix <= 240) {
179
239
  *pos += 1;
180
240
  return prefix;
181
241
  } else if (prefix >= 241 && prefix <= 248) {
242
+ if (*pos + 1 >= size) return 0; // Not enough bytes
182
243
  uint64_t value = 240 + 256 * (prefix - 241) + buffer[*pos+1];
183
244
  *pos += 2;
184
245
  return value;
185
246
  } else if (prefix == 249) {
247
+ if (*pos + 2 >= size) return 0; // Not enough bytes
186
248
  uint64_t value = 2288 + 256 * buffer[*pos+1] + buffer[*pos+2];
187
249
  *pos += 3;
188
250
  return value;
189
251
  } else if (prefix == 250) {
252
+ if (*pos + 3 >= size) return 0; // Not enough bytes
190
253
  uint64_t value = ((uint64_t)buffer[*pos+1] << 16) | ((uint64_t)buffer[*pos+2] << 8) | buffer[*pos+3];
191
254
  *pos += 4;
192
255
  return value;
193
256
  } else if (prefix == 251) {
257
+ if (*pos + 4 >= size) return 0; // Not enough bytes
194
258
  uint64_t value = ((uint64_t)buffer[*pos+1] << 24) | ((uint64_t)buffer[*pos+2] << 16) |
195
259
  ((uint64_t)buffer[*pos+3] << 8) | buffer[*pos+4];
196
260
  *pos += 5;
197
261
  return value;
198
262
  } else if (prefix == 252) {
263
+ if (*pos + 5 >= size) return 0; // Not enough bytes
199
264
  uint64_t value = ((uint64_t)buffer[*pos+1] << 32) | ((uint64_t)buffer[*pos+2] << 24) |
200
265
  ((uint64_t)buffer[*pos+3] << 16) | ((uint64_t)buffer[*pos+4] << 8) | buffer[*pos+5];
201
266
  *pos += 6;
202
267
  return value;
203
268
  } else if (prefix == 253) {
269
+ if (*pos + 6 >= size) return 0; // Not enough bytes
204
270
  uint64_t value = ((uint64_t)buffer[*pos+1] << 40) | ((uint64_t)buffer[*pos+2] << 32) |
205
271
  ((uint64_t)buffer[*pos+3] << 24) | ((uint64_t)buffer[*pos+4] << 16) |
206
272
  ((uint64_t)buffer[*pos+5] << 8) | buffer[*pos+6];
207
273
  *pos += 7;
208
274
  return value;
209
275
  } else if (prefix == 254) {
276
+ if (*pos + 7 >= size) return 0; // Not enough bytes
210
277
  uint64_t value = ((uint64_t)buffer[*pos+1] << 48) | ((uint64_t)buffer[*pos+2] << 40) |
211
278
  ((uint64_t)buffer[*pos+3] << 32) | ((uint64_t)buffer[*pos+4] << 24) |
212
279
  ((uint64_t)buffer[*pos+5] << 16) | ((uint64_t)buffer[*pos+6] << 8) | buffer[*pos+7];
213
280
  *pos += 8;
214
281
  return value;
215
282
  } else if (prefix == 255) {
283
+ if (*pos + 8 >= size) return 0; // Not enough bytes
216
284
  uint64_t value = ((uint64_t)buffer[*pos+1] << 56) | ((uint64_t)buffer[*pos+2] << 48) |
217
285
  ((uint64_t)buffer[*pos+3] << 40) | ((uint64_t)buffer[*pos+4] << 32) |
218
286
  ((uint64_t)buffer[*pos+5] << 24) | ((uint64_t)buffer[*pos+6] << 16) |
@@ -396,6 +464,7 @@ tiny_bits_packer *tiny_bits_packer_create(size_t initial_capacity, uint8_t featu
396
464
  encoder->encode_table.cache_size = TB_HASH_CACHE_SIZE;
397
465
  encoder->encode_table.cache_pos = 0;
398
466
  encoder->encode_table.next_id = 0;
467
+ memset(encoder->encode_table.bins, 0, TB_HASH_SIZE * sizeof(uint8_t));
399
468
  } else {
400
469
  encoder->encode_table.cache = NULL;
401
470
  encoder->encode_table.cache_size = 0;
@@ -652,7 +721,7 @@ static inline int pack_str(tiny_bits_packer *encoder, char* str, uint32_t str_le
652
721
  HashEntry entry = encoder->encode_table.cache[index - 1];
653
722
  if (hash_code == entry.hash
654
723
  && str_len == entry.length
655
- && (str_len <= 4 || (fast_memcmp(str, encoder->buffer + entry.offset, str_len) == 0) )) {
724
+ && fast_memcmp(str, encoder->buffer + entry.offset, str_len) == 0 ) {
656
725
  id = index - 1;
657
726
  found = 1;
658
727
  break;
@@ -950,17 +1019,23 @@ static inline enum tiny_bits_type _unpack_int(tiny_bits_unpacker *decoder, uint8
950
1019
  value->int_val = tag - 128;
951
1020
  return TINY_BITS_INT;
952
1021
  } else if (tag == 248) { // Positive with continuation
953
- uint64_t val = decode_varint(decoder->buffer, decoder->size, &pos);
1022
+ uint8_t read;
1023
+ uint64_t val;
1024
+ read = decode_varint(decoder->buffer, decoder->size, pos, &val);
1025
+ if(read == 0) return TINY_BITS_ERROR;
954
1026
  value->int_val = val + 120;
955
- decoder->current_pos = pos;
1027
+ decoder->current_pos += read;
956
1028
  return TINY_BITS_INT;
957
1029
  } else if (tag > 248 && tag < 255) { // Small negative (248-254)
958
1030
  value->int_val = -(tag - 248);
959
1031
  return TINY_BITS_INT;
960
1032
  } else { // 255: Negative with continuation
961
- uint64_t val = decode_varint(decoder->buffer, decoder->size, &pos);
1033
+ uint8_t read;
1034
+ uint64_t val;
1035
+ read = decode_varint(decoder->buffer, decoder->size, pos, &val);
1036
+ if(read == 0) return TINY_BITS_ERROR;
962
1037
  value->int_val = -(val + 7);
963
- decoder->current_pos = pos;
1038
+ decoder->current_pos += read;
964
1039
  return TINY_BITS_INT;
965
1040
  }
966
1041
  }
@@ -970,8 +1045,12 @@ static inline enum tiny_bits_type _unpack_arr(tiny_bits_unpacker *decoder, uint8
970
1045
  if (tag < 0b00001111) { // Small array (0-30)
971
1046
  value->length = tag & 0b00000111;
972
1047
  } else { // Large array
973
- value->length = decode_varint(decoder->buffer, decoder->size, &pos) + 7;
974
- decoder->current_pos = pos;
1048
+ uint8_t read;
1049
+ uint64_t val;
1050
+ read = decode_varint(decoder->buffer, decoder->size, pos, &val);
1051
+ if(read == 0) return TINY_BITS_ERROR;
1052
+ value->length = val + 7;
1053
+ decoder->current_pos += read;
975
1054
  }
976
1055
  return TINY_BITS_ARRAY;
977
1056
  }
@@ -981,8 +1060,12 @@ static inline enum tiny_bits_type _unpack_map(tiny_bits_unpacker *decoder, uint8
981
1060
  if (tag < 0x1F) { // Small map (0-14)
982
1061
  value->length = tag & 0x0F;
983
1062
  } else { // Large map
984
- value->length = decode_varint(decoder->buffer, decoder->size, &pos) + 15;
985
- decoder->current_pos = pos;
1063
+ uint8_t read;
1064
+ uint64_t val;
1065
+ read = decode_varint(decoder->buffer, decoder->size, pos, &val);
1066
+ if(read == 0) return TINY_BITS_ERROR;
1067
+ value->length = val + 15;
1068
+ decoder->current_pos += read;
986
1069
  }
987
1070
  return TINY_BITS_MAP;
988
1071
  }
@@ -990,28 +1073,28 @@ static inline enum tiny_bits_type _unpack_map(tiny_bits_unpacker *decoder, uint8
990
1073
  static inline enum tiny_bits_type _unpack_double(tiny_bits_unpacker *decoder, uint8_t tag, tiny_bits_value *value){
991
1074
  size_t pos = decoder->current_pos;
992
1075
  if (tag == TB_F64_TAG) { // Raw double
1076
+ if(pos + 8 > decoder->size) return TINY_BITS_ERROR;
993
1077
  uint64_t number = decode_uint64(decoder->buffer + pos);
994
1078
  value->double_val = itod_bits(number);
995
1079
  decoder->current_pos += 8;
996
1080
  } else { // Compressed double
997
- uint64_t number = decode_varint(decoder->buffer, decoder->size, &pos);
1081
+ uint8_t read;
1082
+ uint64_t number;
1083
+ read = decode_varint(decoder->buffer, decoder->size, pos, &number);
1084
+ if(read == 0) return TINY_BITS_ERROR;
998
1085
  int order = (tag & 0x0F);
999
1086
  double fractional = (double)number / powers[order];
1000
- //fractional /= powers[order];
1001
1087
  if(tag & 0x10) fractional = -fractional;
1002
1088
  value->double_val = fractional;
1003
- decoder->current_pos = pos;
1089
+ decoder->current_pos += read;
1004
1090
  }
1005
1091
  return TINY_BITS_DOUBLE;
1006
1092
  }
1007
1093
 
1008
1094
  static inline enum tiny_bits_type _unpack_datetime(tiny_bits_unpacker *decoder, uint8_t tag, tiny_bits_value *value){
1009
1095
  size_t pos = decoder->current_pos;
1096
+ if(pos + 8 > decoder->size) return TINY_BITS_ERROR;
1010
1097
  value->datetime_val.offset = decoder->buffer[pos] * (60*15); // convert offset back to seconds (from multiples of 15 minutes)
1011
- //uint8_t dbl_tag = decoder->buffer[decoder->current_pos++];
1012
- //tiny_bits_value dbl_val;
1013
- //_unpack_double(decoder, dbl_tag, &dbl_val);
1014
- //value->datetime_val.unixtime = dbl_val.double_val;
1015
1098
  uint64_t unixtime = decode_uint64(decoder->buffer + pos + 1);
1016
1099
  value->datetime_val.unixtime = itod_bits(unixtime);
1017
1100
  decoder->current_pos += 9;
@@ -1020,10 +1103,14 @@ static inline enum tiny_bits_type _unpack_datetime(tiny_bits_unpacker *decoder,
1020
1103
 
1021
1104
  static inline enum tiny_bits_type _unpack_blob(tiny_bits_unpacker *decoder, uint8_t tag, tiny_bits_value *value){
1022
1105
  size_t pos = decoder->current_pos;
1023
- size_t len = decode_varint(decoder->buffer, decoder->size, &pos);
1106
+ size_t len;
1107
+ size_t read;
1108
+ read = decode_varint(decoder->buffer, decoder->size, pos, &len);
1109
+ if(read == 0) return TINY_BITS_ERROR;
1110
+ if((pos + read + len) > decoder->size) return TINY_BITS_ERROR;
1024
1111
  value->str_blob_val.data = (const char *)decoder->buffer + pos;
1025
1112
  value->str_blob_val.length = len;
1026
- decoder->current_pos = pos + len;
1113
+ decoder->current_pos = pos + read + len;
1027
1114
  return TINY_BITS_BLOB;
1028
1115
  }
1029
1116
 
@@ -1032,22 +1119,35 @@ static inline enum tiny_bits_type _unpack_str(tiny_bits_unpacker *decoder, uint8
1032
1119
  size_t len;
1033
1120
  if (tag < 0x5F) { // Small string (0-30)
1034
1121
  len = tag & 0x1F;
1122
+ if(pos + len > decoder->size) return TINY_BITS_ERROR;
1035
1123
  value->str_blob_val.data = (const char *)decoder->buffer + pos;
1036
1124
  value->str_blob_val.length = len;
1037
- decoder->current_pos = pos + len;
1125
+ decoder->current_pos += len;
1038
1126
  } else if (tag == 0x5F) { // Large string
1039
- len = decode_varint(decoder->buffer, decoder->size, &pos) + 31;
1127
+ size_t read;
1128
+ read = decode_varint(decoder->buffer, decoder->size, pos, &len);
1129
+ if(read == 0) return TINY_BITS_ERROR;
1130
+ len += 31;
1131
+ if(pos + read + len > decoder->size) return TINY_BITS_ERROR;
1040
1132
  value->str_blob_val.data = (const char *)decoder->buffer + pos;
1041
1133
  value->str_blob_val.length = len;
1042
- decoder->current_pos = pos + len;
1134
+ decoder->current_pos += (read + len);
1043
1135
  } else { // Deduplicated (small: < 0x7F, large: 0x7F)
1044
- uint32_t id = (tag < 0x7F) ? (tag & 0x1F) : decode_varint(decoder->buffer, decoder->size, &pos) + 31;
1136
+ size_t id;
1137
+ size_t read;
1138
+ if(tag < 0x7F){
1139
+ id = tag & 0x1F;
1140
+ }else {
1141
+ read = decode_varint(decoder->buffer, decoder->size, pos, &id);
1142
+ if(read == 0) return TINY_BITS_ERROR;
1143
+ id += 31;
1144
+ decoder->current_pos += read; // Update pos after varint
1145
+ }
1045
1146
  if (id >= decoder->strings_count) return TINY_BITS_ERROR;
1046
1147
  len = decoder->strings[id].length;
1047
1148
  value->str_blob_val.data = decoder->strings[id].str;
1048
1149
  value->str_blob_val.length = len;
1049
- value->str_blob_val.id = id+1;
1050
- decoder->current_pos = pos; // Update pos after varint
1150
+ value->str_blob_val.id = id + 1;
1051
1151
  return TINY_BITS_STR;
1052
1152
  }
1053
1153
  value->str_blob_val.id = 0;
@@ -4,8 +4,23 @@
4
4
  #include "tinybits.h"
5
5
 
6
6
  // Ruby module and classes
7
+ /*
8
+ * Document-module: TinyBits
9
+ *
10
+ * A Ruby extension for fast binary serialization and deserialization of Ruby objects.
11
+ */
7
12
  VALUE rb_mTinyBits;
13
+ /*
14
+ * Document-class: TinyBits::Packer
15
+ *
16
+ * The Packer class handles serialization of Ruby objects to the TinyBits binary format.
17
+ */
8
18
  VALUE rb_cPacker;
19
+ /*
20
+ * Document-class: TinyBits::Unpacker
21
+ *
22
+ * The Unpacker class handles deserialization of TinyBits binary format to Ruby objects.
23
+ */
9
24
  VALUE rb_cUnpacker;
10
25
 
11
26
  // Forward declarations
@@ -75,6 +90,13 @@ static VALUE rb_packer_alloc(VALUE klass) {
75
90
  return TypedData_Wrap_Struct(klass, &packer_data_type, packer_data);
76
91
  }
77
92
 
93
+ /*
94
+ * Document-method: initialize
95
+ *
96
+ * Initializes a new Packer object
97
+ *
98
+ * @return [Packer] The initialized packer object.
99
+ */
78
100
  static VALUE rb_packer_init(VALUE self) {
79
101
  PackerData* packer_data;
80
102
  TypedData_Get_Struct(self, PackerData, &packer_data_type, packer_data);
@@ -141,7 +163,17 @@ static inline int pack_ruby_object_recursive(tiny_bits_packer* packer, VALUE obj
141
163
  }
142
164
  }
143
165
 
144
- // keeps the public API the same.
166
+ /*
167
+ * Document-method: pack
168
+ *
169
+ * Packs a Ruby object into a binary string.
170
+ * Supports Ruby types: String, Array, Hash, Integer, Float, nil, true, false, Symbol, and Time.
171
+ * Objects can implement a `to_tinybits` method to provide custom serialization.
172
+ *
173
+ * @param obj [Object] The Ruby object to pack.
174
+ * @return [String] The packed binary string (frozen).
175
+ * @raise [RuntimeError] If packing fails due to unsupported types or other errors.
176
+ */
145
177
  static VALUE rb_pack(VALUE self, VALUE obj) {
146
178
  PackerData* packer_data;
147
179
  TypedData_Get_Struct(self, PackerData, &packer_data_type, packer_data);
@@ -169,6 +201,16 @@ static VALUE rb_pack(VALUE self, VALUE obj) {
169
201
  return result;
170
202
  }
171
203
 
204
+ /*
205
+ * Document-method: push
206
+ *
207
+ * Appends a packed object to the current buffer.
208
+ * Inserts a separator when appending to non-empty buffer.
209
+ *
210
+ * @param obj [Object] The Ruby object to append.
211
+ * @return [Integer] The number of bytes added to the buffer.
212
+ * @raise [RuntimeError] If packing fails.
213
+ */
172
214
  static VALUE rb_push(VALUE self, VALUE obj) {
173
215
  PackerData* packer_data;
174
216
  TypedData_Get_Struct(self, PackerData, &packer_data_type, packer_data);
@@ -183,13 +225,13 @@ static VALUE rb_push(VALUE self, VALUE obj) {
183
225
  context.packer = packer_data->packer; // Pass the current packer
184
226
  context.error_occurred = 0; // Initialize error flag
185
227
 
186
-
228
+ /*
187
229
  if(initial_pos > 0){
188
230
  if(!pack_separator(packer_data->packer)){
189
231
  rb_raise(rb_eRuntimeError, "Failed to pack object (multi-object packing error)");
190
232
  }
191
233
  }
192
-
234
+ */
193
235
  // Call the optimized recursive function
194
236
  if (!pack_ruby_object_recursive(packer_data->packer, obj, (VALUE)&context)) {
195
237
  // Error occurred during packing (might be unsupported type or tiny_bits error)
@@ -199,6 +241,13 @@ static VALUE rb_push(VALUE self, VALUE obj) {
199
241
  return INT2FIX(packer_data->packer->current_pos - initial_pos);
200
242
  }
201
243
 
244
+ /*
245
+ * Document-method: to_s
246
+ *
247
+ * Returns the current packed buffer as a string.
248
+ *
249
+ * @return [String] The current packed buffer contents (frozen).
250
+ */
202
251
  static VALUE rb_to_s(VALUE self){
203
252
  PackerData* packer_data;
204
253
  TypedData_Get_Struct(self, PackerData, &packer_data_type, packer_data);
@@ -215,6 +264,13 @@ static VALUE rb_to_s(VALUE self){
215
264
  return result;
216
265
  }
217
266
 
267
+ /*
268
+ * Document-method: reset
269
+ *
270
+ * Resets the packer's buffer to empty.
271
+ *
272
+ * @return [Packer] self
273
+ */
218
274
  static VALUE rb_reset(VALUE self){
219
275
  PackerData* packer_data;
220
276
  TypedData_Get_Struct(self, PackerData, &packer_data_type, packer_data);
@@ -232,8 +288,8 @@ static VALUE rb_reset(VALUE self){
232
288
  // Unpacker structure
233
289
  typedef struct {
234
290
  tiny_bits_unpacker* unpacker;
235
- size_t strings_index;
236
- VALUE ruby_strings[TB_HASH_CACHE_SIZE];
291
+ //size_t strings_index;
292
+ //VALUE ruby_strings[TB_HASH_CACHE_SIZE];
237
293
  } UnpackerData;
238
294
 
239
295
  static void unpacker_free(void* data) {
@@ -241,9 +297,9 @@ static void unpacker_free(void* data) {
241
297
  if (unpacker_data->unpacker) {
242
298
  tiny_bits_unpacker_destroy(unpacker_data->unpacker);
243
299
  }
244
- for (size_t i = 0; i < TB_HASH_CACHE_SIZE; i++) {
245
- unpacker_data->ruby_strings[i] = Qnil;
246
- }
300
+ //for (size_t i = 0; i < TB_HASH_CACHE_SIZE; i++) {
301
+ //unpacker_data->ruby_strings[i] = Qnil;
302
+ //}
247
303
  free(unpacker_data);
248
304
  }
249
305
 
@@ -263,6 +319,13 @@ static VALUE rb_unpacker_alloc(VALUE klass) {
263
319
  return TypedData_Wrap_Struct(klass, &unpacker_data_type, unpacker_data);
264
320
  }
265
321
 
322
+ /*
323
+ * Document-method: initialize
324
+ *
325
+ * Initializes a new Unpacker.
326
+ *
327
+ * @return [Unpacker] The initialized unpacker object.
328
+ */
266
329
  static VALUE rb_unpacker_init(VALUE self) {
267
330
  UnpackerData* unpacker_data;
268
331
  TypedData_Get_Struct(self, UnpackerData, &unpacker_data_type, unpacker_data);
@@ -271,14 +334,15 @@ static VALUE rb_unpacker_init(VALUE self) {
271
334
  if (!unpacker_data->unpacker) {
272
335
  rb_raise(rb_eRuntimeError, "Failed to initialize unpacker");
273
336
  }
274
- unpacker_data->strings_index = 0;
337
+ VALUE strings = rb_ary_new_capa(TB_HASH_CACHE_SIZE);
338
+ rb_iv_set(self, "@strings", strings);
275
339
  return self;
276
340
  }
277
341
 
278
- static inline VALUE rb_unpack_str(UnpackerData* unpacker_data, tiny_bits_value value, size_t interned){
342
+ static inline VALUE rb_unpack_str(VALUE strings, UnpackerData* unpacker_data, tiny_bits_value value, size_t interned){
279
343
  int32_t id = value.str_blob_val.id;
280
344
  if(id > 0)
281
- return unpacker_data->ruby_strings[id-1];
345
+ return rb_ary_entry(strings, id-1);
282
346
  else if(id <= 0){
283
347
  VALUE str;
284
348
  if(interned > 0){
@@ -288,14 +352,14 @@ static inline VALUE rb_unpack_str(UnpackerData* unpacker_data, tiny_bits_value v
288
352
  rb_obj_freeze(str);
289
353
  }
290
354
  if(id < 0){
291
- unpacker_data->ruby_strings[abs(id)-1] = str;
355
+ rb_ary_push(strings, str);
292
356
  }
293
357
  return str;
294
358
  }
295
359
  return Qundef;
296
360
  }
297
361
 
298
- static VALUE unpack_ruby_object(UnpackerData* unpacker_data, size_t interned) {
362
+ static VALUE unpack_ruby_object(VALUE strings, UnpackerData* unpacker_data, size_t interned) {
299
363
  tiny_bits_unpacker* unpacker = unpacker_data->unpacker;
300
364
  tiny_bits_value value;
301
365
  enum tiny_bits_type type = unpack_value(unpacker, &value);
@@ -306,7 +370,7 @@ static VALUE unpack_ruby_object(UnpackerData* unpacker_data, size_t interned) {
306
370
 
307
371
  switch (type) {
308
372
  case TINY_BITS_STR: {
309
- return rb_unpack_str(unpacker_data, value, interned);
373
+ return rb_unpack_str(strings, unpacker_data, value, interned);
310
374
  }
311
375
  case TINY_BITS_DOUBLE:
312
376
  return DBL2NUM(value.double_val);
@@ -321,7 +385,7 @@ static VALUE unpack_ruby_object(UnpackerData* unpacker_data, size_t interned) {
321
385
  case TINY_BITS_ARRAY: {
322
386
  VALUE arr = rb_ary_new_capa(value.length);
323
387
  for (size_t i = 0; i < value.length; i++) {
324
- VALUE element = unpack_ruby_object(unpacker_data, 0);
388
+ VALUE element = unpack_ruby_object(strings, unpacker_data, 0);
325
389
  if (element == Qundef) return Qundef; // Error
326
390
  rb_ary_push(arr, element);
327
391
  }
@@ -330,9 +394,9 @@ static VALUE unpack_ruby_object(UnpackerData* unpacker_data, size_t interned) {
330
394
  case TINY_BITS_MAP: {
331
395
  VALUE hash = rb_hash_new_capa(value.length);
332
396
  for (size_t i = 0; i < value.length; i++) {
333
- VALUE key = unpack_ruby_object(unpacker_data, 1);
397
+ VALUE key = unpack_ruby_object(strings, unpacker_data, 0);
334
398
  if (key == Qundef) return Qundef; // Error
335
- VALUE val = unpack_ruby_object(unpacker_data, 0);
399
+ VALUE val = unpack_ruby_object(strings, unpacker_data, 0);
336
400
  if (val == Qundef) return Qundef; // Error
337
401
  rb_hash_aset(hash, key, val);
338
402
  }
@@ -351,6 +415,15 @@ static VALUE unpack_ruby_object(UnpackerData* unpacker_data, size_t interned) {
351
415
  }
352
416
  }
353
417
 
418
+ /*
419
+ * Document-method: unpack
420
+ *
421
+ * Unpacks a binary string into a Ruby object.
422
+ *
423
+ * @param buffer [String] The binary string to unpack.
424
+ * @return [Object] The unpacked Ruby object.
425
+ * @raise [RuntimeError] If unpacking fails.
426
+ */
354
427
  static VALUE rb_unpack(VALUE self, VALUE buffer) {
355
428
  UnpackerData* unpacker_data;
356
429
  TypedData_Get_Struct(self, UnpackerData, &unpacker_data_type, unpacker_data);
@@ -363,14 +436,27 @@ static VALUE rb_unpack(VALUE self, VALUE buffer) {
363
436
 
364
437
  tiny_bits_unpacker_set_buffer(unpacker_data->unpacker, (const unsigned char*)RSTRING_PTR(buffer), RSTRING_LEN(buffer));
365
438
 
366
- VALUE result = unpack_ruby_object(unpacker_data, 0);
439
+ VALUE array = rb_iv_get(self, "@strings");
440
+ VALUE result = unpack_ruby_object(array, unpacker_data, 0);
441
+
367
442
  if (result == Qundef) {
368
443
  rb_raise(rb_eRuntimeError, "Failed to unpack data");
369
444
  }
370
445
 
446
+ rb_ary_clear(array);
447
+
371
448
  return result;
372
449
  }
373
450
 
451
+ /*
452
+ * Document-method: buffer=
453
+ *
454
+ * Sets the buffer for incremental unpacking.
455
+ *
456
+ * @param buffer [String] The binary buffer to unpack incrementally.
457
+ * @return [Unpacker] self
458
+ * @note Stores a reference to the buffer internally.
459
+ */
374
460
  static VALUE rb_set_buffer(VALUE self, VALUE buffer){
375
461
  UnpackerData* unpacker_data;
376
462
  TypedData_Get_Struct(self, UnpackerData, &unpacker_data_type, unpacker_data);
@@ -385,13 +471,25 @@ static VALUE rb_set_buffer(VALUE self, VALUE buffer){
385
471
 
386
472
  // set the buffer as an instance variable to mainatin a reference to it
387
473
  rb_iv_set(self, "@buffer", buffer);
388
-
474
+ VALUE array = rb_iv_get(self, "@strings");
475
+ rb_ary_clear(array);
389
476
  return self;
390
477
  }
391
478
 
479
+ /*
480
+ * Document-method: pop
481
+ *
482
+ * Extracts the next object from the buffer.
483
+ *
484
+ * @return [Object, nil] The next unpacked object or nil if buffer is exhausted.
485
+ * @raise [RuntimeError] If no buffer is set or if unpacking fails.
486
+ * @note Requires #buffer= to be called first.
487
+ */
392
488
  static VALUE rb_pop(VALUE self) {
393
489
 
394
490
  VALUE buffer = rb_iv_get(self, "@buffer");
491
+ VALUE array = rb_iv_get(self, "@strings");
492
+
395
493
  if(buffer == Qnil){
396
494
  rb_raise(rb_eRuntimeError, "No buffer is set");
397
495
  }
@@ -404,27 +502,33 @@ static VALUE rb_pop(VALUE self) {
404
502
  }
405
503
 
406
504
  tiny_bits_unpacker* unpacker = unpacker_data->unpacker;
407
- tiny_bits_value value;
408
505
 
409
506
  if(unpacker->current_pos >= unpacker->size - 1){
507
+ rb_ary_clear(array);
410
508
  return Qnil;
411
509
  }
412
-
413
- if(unpacker->current_pos > 0){
414
- enum tiny_bits_type type = unpack_value(unpacker, &value);
415
- if(type != TINY_BITS_SEP){
416
- rb_raise(rb_eRuntimeError, "Malformed multi-object buffer");
417
- }
418
- }
419
-
420
- VALUE result = unpack_ruby_object(unpacker_data, 0);
510
+
511
+ VALUE result = unpack_ruby_object(array, unpacker_data, 0);
421
512
  if (result == Qundef) {
422
513
  rb_raise(rb_eRuntimeError, "Failed to unpack data");
423
514
  }
424
515
 
516
+ if(unpacker->current_pos >= (unpacker->size - 1)){
517
+ rb_ary_clear(array);
518
+ }
519
+
425
520
  return result;
426
521
  }
427
522
 
523
+ /*
524
+ * Document-method: finished?
525
+ *
526
+ * Checks if the buffer has been fully consumed.
527
+ *
528
+ * @return [Boolean] true if there are no more objects to unpack.
529
+ * @raise [RuntimeError] If no buffer is set.
530
+ * @note Requires #buffer= to be called first.
531
+ */
428
532
  static VALUE rb_finished(VALUE self){
429
533
  VALUE buffer = rb_iv_get(self, "@buffer");
430
534
  if(buffer == Qnil){
@@ -0,0 +1,57 @@
1
+ module TinyBits
2
+
3
+ class DPacker
4
+
5
+ # creates a new TinyBits::DPacker object
6
+ # @param dict [[String]] an array of strings to use as a dictiontary (order is significant).
7
+ # @return [TinyBits::DPacker] the dpacker object.
8
+ # @raise [RuntimeError] the dict is nil or if any member doesn't respnd to to_s.
9
+ def initialize(dict)
10
+ @dict = dict.collect{|a| a.to_s }
11
+ @dict_set = false
12
+ @packer = Packer.new
13
+ end
14
+
15
+ def reset
16
+ @packer.reset
17
+ @dict_set = false
18
+ self
19
+ end
20
+
21
+ # packs an object to a binary string using the stored dictionary
22
+ # @param obj [Object] The Ruby object to pack.
23
+ # @return [String] The packed binary string (frozen).
24
+ # @raise [RuntimeError] If packing fails due to unsupported types or other errors.
25
+ def pack(object)
26
+ reset
27
+ self << object
28
+ to_s
29
+ end
30
+
31
+ # push an object to an inceremental buffer
32
+ # @param obj [Object] The Ruby object to pack.
33
+ # @return [Integer] The number of bytes written to the buffer.
34
+ # @raise [RuntimeError] If packing fails due to unsupported types or other errors.
35
+ def <<(object)
36
+ if !@dict_set
37
+ #@packer.reset
38
+ @dict_size = @packer << @dict
39
+ @dict_set = true
40
+ end
41
+ res = @packer << object
42
+ res
43
+ end
44
+
45
+ # return the packed buffer
46
+ # @return [String] The packed buffer (minus the dictionary).
47
+ def to_s
48
+ res = @packer.to_s
49
+ if @dict_set
50
+ res[@dict_size, res.bytesize - @dict_size]
51
+ else
52
+ res
53
+ end
54
+ end
55
+
56
+ end
57
+ end
@@ -0,0 +1,35 @@
1
+ module TinyBits
2
+
3
+ class DUnpacker
4
+
5
+ def initialize(dict)
6
+ @dict = Packer.new.pack(dict.collect{|a| a.to_s })
7
+ @unpacker = Unpacker.new
8
+ @dict_popped = false
9
+ end
10
+
11
+ def unpack(buffer)
12
+ self.buffer = buffer
13
+ pop
14
+ end
15
+
16
+ def buffer=(buffer)
17
+ @unpacker.buffer = "#{@dict}#{buffer}"
18
+ @dict_popped = false
19
+ end
20
+
21
+ def pop
22
+ if !@dict_popped
23
+ @unpacker.pop
24
+ @dict_popped = true
25
+ end
26
+ @unpacker.pop
27
+ end
28
+
29
+ def finished?
30
+ @unpacker.finished?
31
+ end
32
+
33
+ end
34
+
35
+ end
@@ -1,3 +1,3 @@
1
1
  module TinyBits
2
- VERSION = '0.3.0'.freeze
2
+ VERSION = '0.6.0'.freeze
3
3
  end
data/lib/tinybits.rb CHANGED
@@ -1,7 +1,34 @@
1
1
  require_relative './tinybits/version'
2
2
  require 'tinybits_ext'
3
+ require_relative './tinybits/dpacker'
4
+ require_relative './tinybits/dunpacker'
3
5
 
4
6
  module TinyBits
5
- def self.pack(object) = Packer.new.pack(object)
6
- def self.unpack(buffer) = Unpacker.new.unpack(buffer)
7
+ # packs an object to a binary string
8
+ # @param obj [Object] The Ruby object to pack.
9
+ # @return [String] The packed binary string (frozen).
10
+ # @raise [RuntimeError] If packing fails due to unsupported types or other errors.
11
+ # this is a convinience interface, a better way is to instantiate a TinyBits::Packer
12
+ # object and use its #pack method
13
+ def self.pack(object, dict = nil)
14
+ if dict
15
+ DPacker.new(dict).pack(object)
16
+ else
17
+ Packer.new.pack(object)
18
+ end
19
+ end
20
+
21
+ # unpacks an object from a binary string
22
+ # @param buffer [String] The Ruby string holding the packed buffer.
23
+ # @return [Object] The unpacked Ruby Object (all strings within the object will be frozen).
24
+ # @raise [RuntimeError] If unpacking fails due to unsupported types or malformed data.
25
+ # this is a convinience interface, a better way is to instantiate a TinyBits::Unpacker
26
+ # object and use its #unpack method
27
+ def self.unpack(buffer, dict = nil)
28
+ if dict
29
+ DUnpacker.new(dict).unpack(buffer)
30
+ else
31
+ Unpacker.new.unpack(buffer)
32
+ end
33
+ end
7
34
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tinybits
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mohamed Hassan
8
8
  bindir: bin
9
9
  cert_chain: []
10
- date: 2025-05-04 00:00:00.000000000 Z
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
11
  dependencies: []
12
12
  description: TinyBits is a Ruby gem that wraps the TinyBits C serializartion library,
13
13
  offering Rubyists the power of serializion with intger/float compression and string
@@ -19,10 +19,11 @@ extensions:
19
19
  extra_rdoc_files: []
20
20
  files:
21
21
  - ext/tinybits/extconf.rb
22
- - ext/tinybits/test_date.rb
23
22
  - ext/tinybits/tinybits.h
24
23
  - ext/tinybits/tinybits_ext.c
25
24
  - lib/tinybits.rb
25
+ - lib/tinybits/dpacker.rb
26
+ - lib/tinybits/dunpacker.rb
26
27
  - lib/tinybits/version.rb
27
28
  homepage: https://github.com/oldmoe/tinybits-rb
28
29
  licenses:
@@ -43,7 +44,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
43
44
  - !ruby/object:Gem::Version
44
45
  version: '0'
45
46
  requirements: []
46
- rubygems_version: 3.6.0.dev
47
+ rubygems_version: 3.6.9
47
48
  specification_version: 4
48
49
  summary: Very fast and compact serialization for Ruby!
49
50
  test_files: []
@@ -1,62 +0,0 @@
1
- require './tinybits_ext'
2
-
3
- packer = TinyBits::Packer.new
4
- unpacker = TinyBits::Unpacker.new
5
-
6
- t = [Time.now, nil, true, false]
7
-
8
-
9
-
10
- =begin
11
- puts packer.dump(t).bytesize
12
-
13
- puts t
14
- puts t2 = unpacker.unpack(packer.pack(t))
15
- puts t == t2
16
- =end
17
-
18
- class User
19
- def initialize(name:, title:)
20
- @name = name
21
- @title = title
22
- end
23
-
24
- def to_tinybits
25
- {"name" => @name, "title" => @title}
26
- end
27
- end
28
-
29
- objects = [{"abc": 123}, {"abc": [1, 2, "abc"]}, ["xyz", "abc", "xyz", 7.6] ]
30
-
31
- puts "----------------"
32
-
33
- #packer.reset
34
-
35
- objects.each do |obj|
36
- puts packer << obj
37
- end
38
-
39
- buffer = packer.to_s
40
-
41
- puts buffer.bytesize
42
-
43
- unpacker.buffer = buffer
44
-
45
-
46
- while(value = unpacker.pop)
47
- pp value
48
- puts "+++++++++++++++++++++++++"
49
- end
50
-
51
- packer.reset
52
-
53
- user = User.new(name: "Mohamed", title: "Father")
54
-
55
- data = { "user" => user, "tags" => ["user", "Father"] }
56
-
57
- pp user
58
- pp user.to_tinybits
59
- packed = packer.pack(data)
60
- puts packed.bytesize
61
- pp unpacker.unpack(packed)
62
-