twilic 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,512 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Twilic
4
+ module Core
5
+ module ProtocolHelpers
6
+ module_function
7
+
8
+ def column_null_strategy_local(values, present_bits)
9
+ null_count = values.count { |v| v.kind == Model::ValueKind::NULL }
10
+ return [Model::NullStrategy::ALL_PRESENT_ELIDED, nil, false] if null_count.zero?
11
+
12
+ if null_count <= values.length / 4
13
+ inverted = present_bits.map { |bit| !bit }
14
+ return [Model::NullStrategy::INVERTED_PRESENCE_BITMAP, inverted, true]
15
+ end
16
+ [Model::NullStrategy::PRESENCE_BITMAP, present_bits.dup, true]
17
+ end
18
+
19
+ def strip_nulls_local(values)
20
+ values.reject { |v| v.kind == Model::ValueKind::NULL }
21
+ end
22
+
23
+ def rows_to_columns(rows)
24
+ return nil if rows.empty?
25
+
26
+ width = rows.map(&:length).max
27
+ column_values = Array.new(width) { [] }
28
+ column_presence = Array.new(width) { [] }
29
+ rows.each do |row|
30
+ width.times do |col|
31
+ value = col < row.length ? row[col].clone_value : Model.null_value
32
+ column_values[col] << value
33
+ column_presence[col] << (value.kind != Model::ValueKind::NULL)
34
+ end
35
+ end
36
+ Array.new(width) do |col|
37
+ null_strategy, presence, has_presence = column_null_strategy_local(
38
+ column_values[col], column_presence[col]
39
+ )
40
+ codec, tvd = infer_column_codec_and_values(strip_nulls_local(column_values[col]))
41
+ Model::Column.new(
42
+ field_id: col, null_strategy: null_strategy, presence: presence,
43
+ has_presence: has_presence, codec: codec, dictionary_id: nil, values: tvd
44
+ )
45
+ end
46
+ end
47
+
48
+ def infer_column_codec_and_values(values)
49
+ return [Model::VectorCodec::PLAIN, Model::TypedVectorData.new(
50
+ kind: Model::ElementType::VALUE, bools: [], i64s: [], u64s: [], f64s: [],
51
+ strings: [], binary: [], values: nil
52
+ )] if values.empty?
53
+
54
+ kinds = values.map(&:kind)
55
+ if kinds.all?(Model::ValueKind::I64)
56
+ data = values.map(&:i64)
57
+ return [select_integer_codec(data), typed_data_i64(data)]
58
+ end
59
+ if kinds.all?(Model::ValueKind::U64)
60
+ data = values.map(&:u64)
61
+ return [select_u64_codec(data), typed_data_u64(data)]
62
+ end
63
+ if kinds.all?(Model::ValueKind::F64)
64
+ data = values.map(&:f64)
65
+ return [select_float_codec(data), typed_data_f64(data)]
66
+ end
67
+ if kinds.all?(Model::ValueKind::BOOL)
68
+ data = values.map(&:bool)
69
+ return [Model::VectorCodec::DIRECT_BITPACK, typed_data_bool(data)]
70
+ end
71
+ if kinds.all?(Model::ValueKind::STRING)
72
+ data = values.map(&:str)
73
+ return [select_string_codec(data), typed_data_string(data)]
74
+ end
75
+ cloned = values.map(&:clone_value)
76
+ [Model::VectorCodec::PLAIN, Model::TypedVectorData.new(
77
+ kind: Model::ElementType::VALUE, bools: [], i64s: [], u64s: [], f64s: [],
78
+ strings: [], binary: [], values: cloned
79
+ )]
80
+ end
81
+
82
+ def typed_data_i64(data)
83
+ Model::TypedVectorData.new(kind: Model::ElementType::I64, bools: [], i64s: data,
84
+ u64s: [], f64s: [], strings: [], binary: [], values: [])
85
+ end
86
+
87
+ def typed_data_u64(data)
88
+ Model::TypedVectorData.new(kind: Model::ElementType::U64, bools: [], i64s: [],
89
+ u64s: data, f64s: [], strings: [], binary: [], values: [])
90
+ end
91
+
92
+ def typed_data_f64(data)
93
+ Model::TypedVectorData.new(kind: Model::ElementType::F64, bools: [], i64s: [],
94
+ u64s: [], f64s: data, strings: [], binary: [], values: [])
95
+ end
96
+
97
+ def typed_data_bool(data)
98
+ Model::TypedVectorData.new(kind: Model::ElementType::BOOL, bools: data, i64s: [],
99
+ u64s: [], f64s: [], strings: [], binary: [], values: [])
100
+ end
101
+
102
+ def typed_data_string(data)
103
+ Model::TypedVectorData.new(kind: Model::ElementType::STRING, bools: [], i64s: [],
104
+ u64s: [], f64s: [], strings: data, binary: [], values: [])
105
+ end
106
+
107
+ def select_integer_codec(values)
108
+ return Model::VectorCodec::PLAIN if values.length < 4
109
+
110
+ delta_vals = deltas(values)
111
+ dd = deltas(delta_vals)
112
+ non_zero_dd = (1...dd.length).count { |i| dd[i] != 0 }
113
+ non_zero_ratio = dd.length > 1 ? non_zero_dd.to_f / (dd.length - 1) : 0.0
114
+ delta_range_bits = bit_width_signed(delta_vals.min, delta_vals.max)
115
+ return Model::VectorCodec::DELTA_DELTA_BITPACK if values.length >= 8 &&
116
+ (non_zero_ratio <= 0.25 || delta_range_bits <= 2)
117
+
118
+ repeated_ratio, avg_run = run_stats(values)
119
+ return Model::VectorCodec::RLE if repeated_ratio >= 0.5 && avg_run >= 3.0
120
+
121
+ range_bits = bit_width_signed(values.min, values.max)
122
+ return Model::VectorCodec::FOR_BITPACK if range_bits <= 60
123
+
124
+ monotonic = values.each_cons(2).all? { |a, b| b >= a }
125
+ return Model::VectorCodec::DELTA_FOR_BITPACK if values.length >= 8 && monotonic &&
126
+ delta_range_bits <= range_bits - 3
127
+
128
+ max_abs_delta_bits = delta_vals.map { |v| bit_width_u64(abs64(v)) }.max
129
+ return Model::VectorCodec::DELTA_BITPACK if max_abs_delta_bits <= 61
130
+
131
+ max_bit_width = values.map { |v| bit_width_u64(abs64(v)) }.max
132
+ return Model::VectorCodec::SIMPLE8B if values.length >= 8 && max_bit_width <= 16 && !monotonic
133
+ return Model::VectorCodec::DIRECT_BITPACK if max_bit_width < 64
134
+
135
+ Model::VectorCodec::PLAIN
136
+ end
137
+
138
+ def select_u64_codec(values)
139
+ if values.all? { |v| v <= 0x7FFFFFFFFFFFFFFF }
140
+ return select_integer_codec(values.map { |v| v & 0x7FFFFFFFFFFFFFFF })
141
+ end
142
+ return Model::VectorCodec::DIRECT_BITPACK if values.length < 4
143
+
144
+ repeated_ratio, avg_run = run_stats_u64(values)
145
+ return Model::VectorCodec::RLE if repeated_ratio >= 0.5 && avg_run >= 3.0
146
+
147
+ return Model::VectorCodec::FOR_BITPACK if bit_width_u64(values.max - values.min) <= 60
148
+
149
+ max_width = values.map { |v| bit_width_u64(v) }.max
150
+ return Model::VectorCodec::SIMPLE8B if values.length >= 8 && max_width <= 16
151
+ return Model::VectorCodec::DIRECT_BITPACK if max_width < 64
152
+
153
+ Model::VectorCodec::PLAIN
154
+ end
155
+
156
+ def select_float_codec(values)
157
+ return Model::VectorCodec::PLAIN if values.length < 4
158
+
159
+ changes = 0
160
+ prev = [values[0]].pack("E").unpack1("Q<")
161
+ values.each_cons(2) do |_, cur|
162
+ bits = [cur].pack("E").unpack1("Q<")
163
+ changes += 1 if bits != prev
164
+ prev = bits
165
+ end
166
+ changes * 2 <= values.length ? Model::VectorCodec::XOR_FLOAT : Model::VectorCodec::PLAIN
167
+ end
168
+
169
+ def select_string_codec(values)
170
+ return Model::VectorCodec::PLAIN if values.empty?
171
+
172
+ return Model::VectorCodec::DICTIONARY if values.uniq.length * 2 <= values.length
173
+
174
+ prefix_gain = 0
175
+ prev = ""
176
+ values.each do |v|
177
+ prefix_gain += common_prefix_len(prev.b, v.b)
178
+ prev = v
179
+ end
180
+ return Model::VectorCodec::PREFIX_DELTA if prefix_gain > values.length * 2
181
+
182
+ Model::VectorCodec::PLAIN
183
+ end
184
+
185
+ def deltas(values)
186
+ values.each_with_index.map { |value, i| i.zero? ? value : value - values[i - 1] }
187
+ end
188
+
189
+ def run_stats(values)
190
+ return [0.0, 0.0] if values.empty?
191
+
192
+ runs = []
193
+ run_len = 1
194
+ (1...values.length).each do |i|
195
+ if values[i] == values[i - 1]
196
+ run_len += 1
197
+ else
198
+ runs << run_len
199
+ run_len = 1
200
+ end
201
+ end
202
+ runs << run_len
203
+ repeated_items = runs.select { |r| r > 1 }.sum
204
+ [repeated_items.to_f / values.length, runs.sum.to_f / runs.length]
205
+ end
206
+
207
+ def run_stats_u64(values)
208
+ run_stats(values)
209
+ end
210
+
211
+ def bit_width_signed(min, max)
212
+ range_val = max >= min ? max - min : min - max
213
+ bit_width_u64(range_val)
214
+ end
215
+
216
+ def bit_width_u64(v)
217
+ return 1 if v.zero?
218
+
219
+ v.to_s(2).length
220
+ end
221
+
222
+ def abs64(v)
223
+ v.negative? ? -v : v
224
+ end
225
+
226
+ def common_prefix_len(a, b)
227
+ n = [a.bytesize, b.bytesize].min
228
+ i = 0
229
+ while i < n && a.getbyte(i) == b.getbyte(i)
230
+ i += 1
231
+ end
232
+ i
233
+ end
234
+
235
+ def rle_encode_bytes(input)
236
+ return nil if input.empty?
237
+
238
+ out = +""
239
+ i = 0
240
+ while i < input.bytesize
241
+ j = i + 1
242
+ while j < input.bytesize && input.getbyte(j) == input.getbyte(i) && j - i < 255
243
+ j += 1
244
+ end
245
+ out << (j - i).chr << input[i].chr
246
+ i = j
247
+ end
248
+ out
249
+ end
250
+
251
+ def rle_decode_bytes(input)
252
+ out = +""
253
+ i = 0
254
+ while i < input.bytesize
255
+ raise Errors.invalid_data("rle payload") if i + 1 >= input.bytesize
256
+
257
+ run = input.getbyte(i)
258
+ b = input.getbyte(i + 1)
259
+ run.times { out << b.chr }
260
+ i += 2
261
+ end
262
+ out
263
+ end
264
+
265
+ def control_bitpack_encode_bytes(input)
266
+ input.b.dup
267
+ end
268
+
269
+ def control_bitpack_decode_bytes(input)
270
+ input.b.dup
271
+ end
272
+
273
+ def control_huffman_encode_bytes(input)
274
+ input.dup
275
+ end
276
+
277
+ def control_huffman_decode_bytes(input)
278
+ input.dup
279
+ end
280
+
281
+ def control_fse_encode_bytes(input)
282
+ input.dup
283
+ end
284
+
285
+ def control_fse_decode_bytes(input)
286
+ input.dup
287
+ end
288
+
289
+ def template_descriptor_from_columns(template_id, columns)
290
+ Model::TemplateDescriptor.new(
291
+ template_id: template_id,
292
+ field_ids: columns.map(&:field_id),
293
+ null_strategies: columns.map(&:null_strategy),
294
+ codecs: columns.map(&:codec)
295
+ )
296
+ end
297
+
298
+ def find_template_id(templates, columns)
299
+ templates.keys.sort.each do |id|
300
+ t = templates[id]
301
+ next if t.field_ids.length != columns.length
302
+
303
+ ok = t.field_ids.each_with_index.all? do |fid, i|
304
+ fid == columns[i].field_id && t.null_strategies[i] == columns[i].null_strategy
305
+ end
306
+ return [id, true] if ok
307
+ end
308
+ [0, false]
309
+ end
310
+
311
+ def diff_template_columns(previous, current)
312
+ mask = Array.new(current.length, false)
313
+ changed = []
314
+ current.each_with_index do |col, i|
315
+ if i >= previous.length || estimate_column_size(previous[i]) != estimate_column_size(col)
316
+ mask[i] = true
317
+ changed << col
318
+ end
319
+ end
320
+ [mask, changed]
321
+ end
322
+
323
+ def merge_template_columns(previous, changed_mask, changed)
324
+ out = Array.new(changed_mask.length)
325
+ idx = 0
326
+ changed_mask.each_with_index do |bit, i|
327
+ if bit
328
+ raise Errors.invalid_data("template changed column count mismatch") if idx >= changed.length
329
+
330
+ out[i] = changed[idx]
331
+ idx += 1
332
+ else
333
+ raise Errors.invalid_data("template reference out of range") if i >= previous.length
334
+
335
+ out[i] = previous[i]
336
+ end
337
+ end
338
+ out
339
+ end
340
+
341
+ def diff_message(prev, current)
342
+ a = message_fields(prev)
343
+ b = message_fields(current)
344
+ n = [a.length, b.length].max
345
+ ops = []
346
+ n.times do |i|
347
+ if i < a.length && i < b.length
348
+ if Model.equal(a[i], b[i])
349
+ ops << Model::PatchOperation.new(field_id: i, opcode: Model::PatchOpcode::KEEP, value: nil)
350
+ else
351
+ ops << Model::PatchOperation.new(
352
+ field_id: i, opcode: Model::PatchOpcode::REPLACE_SCALAR, value: b[i].clone_value
353
+ )
354
+ end
355
+ elsif i < b.length
356
+ ops << Model::PatchOperation.new(
357
+ field_id: i, opcode: Model::PatchOpcode::INSERT_FIELD, value: b[i].clone_value
358
+ )
359
+ else
360
+ ops << Model::PatchOperation.new(field_id: i, opcode: Model::PatchOpcode::DELETE_FIELD, value: nil)
361
+ end
362
+ end
363
+ [ops, 0]
364
+ end
365
+
366
+ def message_fields(message)
367
+ case message.kind
368
+ when Model::MessageKind::ARRAY
369
+ message.array.map(&:clone_value)
370
+ when Model::MessageKind::MAP
371
+ message.map.map { |e| e.value.clone_value }
372
+ when Model::MessageKind::SHAPED_OBJECT
373
+ message.shaped_object.values.map(&:clone_value)
374
+ when Model::MessageKind::SCHEMA_OBJECT
375
+ message.schema_object.fields.map(&:clone_value)
376
+ else
377
+ []
378
+ end
379
+ end
380
+
381
+ def rebuild_message_like(base, fields)
382
+ case base.kind
383
+ when Model::MessageKind::ARRAY
384
+ Model.message(kind: Model::MessageKind::ARRAY, array: fields)
385
+ when Model::MessageKind::MAP
386
+ entries = fields.each_with_index.map do |value, i|
387
+ raise Errors.invalid_data("patch map shape mismatch") if i >= base.map.length
388
+
389
+ Model::MessageMapEntry.new(key: base.map[i].key, value: value)
390
+ end
391
+ Model.message(kind: Model::MessageKind::MAP, map: entries)
392
+ when Model::MessageKind::SHAPED_OBJECT
393
+ s = base.shaped_object
394
+ Model.message(kind: Model::MessageKind::SHAPED_OBJECT, shaped_object: Model::ShapedObjectMessage.new(
395
+ shape_id: s.shape_id, presence: s.presence&.dup, has_presence: s.has_presence, values: fields
396
+ ))
397
+ when Model::MessageKind::SCHEMA_OBJECT
398
+ s = base.schema_object
399
+ Model.message(kind: Model::MessageKind::SCHEMA_OBJECT, schema_object: Model::SchemaObjectMessage.new(
400
+ schema_id: s.schema_id, presence: s.presence&.dup, has_presence: s.has_presence, fields: fields
401
+ ))
402
+ else
403
+ raise Errors.invalid_data("state patch reconstruction unsupported for this message kind")
404
+ end
405
+ end
406
+
407
+ def estimate_message_size(message)
408
+ case message.kind
409
+ when Model::MessageKind::SCALAR
410
+ 1 + estimate_value_size(message.scalar)
411
+ when Model::MessageKind::ARRAY
412
+ 1 + varuint_size(message.array.length) + message.array.sum { |v| estimate_value_size(v) }
413
+ when Model::MessageKind::MAP
414
+ 1 + varuint_size(message.map.length) +
415
+ message.map.sum { |e| encoded_key_ref_size(e.key) + estimate_value_size(e.value) }
416
+ when Model::MessageKind::STATE_PATCH
417
+ sp = message.state_patch
418
+ 1 + 2 + varuint_size(sp.operations.length) +
419
+ sp.operations.sum do |op|
420
+ varuint_size(op.field_id) + 2 + (op.value ? estimate_value_size(op.value) : 0)
421
+ end
422
+ else
423
+ 16
424
+ end
425
+ end
426
+
427
+ def estimate_column_size(column)
428
+ size = varuint_size(column.field_id) + 4
429
+ case column.values.kind
430
+ when Model::ElementType::BOOL
431
+ size + column.values.bools.length / 8 + 2
432
+ when Model::ElementType::I64
433
+ size + column.values.i64s.length * 4
434
+ when Model::ElementType::U64
435
+ size + column.values.u64s.length * 4
436
+ when Model::ElementType::F64
437
+ size + column.values.f64s.length * 8
438
+ when Model::ElementType::STRING
439
+ size + column.values.strings.sum { |s| encoded_string_size(s) }
440
+ else
441
+ size
442
+ end
443
+ end
444
+
445
+ def estimate_value_size(value)
446
+ case value.kind
447
+ when Model::ValueKind::NULL, Model::ValueKind::BOOL then 1
448
+ when Model::ValueKind::I64 then 2 + smallest_u64_size(Wire.encode_zigzag(value.i64))
449
+ when Model::ValueKind::U64 then 2 + smallest_u64_size(value.u64)
450
+ when Model::ValueKind::F64 then 9
451
+ when Model::ValueKind::STRING then 2 + encoded_string_size(value.str)
452
+ when Model::ValueKind::BINARY then 1 + encoded_bytes_size(value.bin.bytesize)
453
+ when Model::ValueKind::ARRAY
454
+ 1 + varuint_size(value.arr.length) + value.arr.sum { |v| estimate_value_size(v) }
455
+ when Model::ValueKind::MAP
456
+ 1 + varuint_size(value.map.length) +
457
+ value.map.sum { |e| encoded_string_size(e.key) + estimate_value_size(e.value) }
458
+ else
459
+ 1
460
+ end
461
+ end
462
+
463
+ def encoded_bytes_size(length)
464
+ varuint_size(length) + length
465
+ end
466
+
467
+ def encoded_string_size(value)
468
+ encoded_bytes_size(value.b.bytesize)
469
+ end
470
+
471
+ def encoded_key_ref_size(key)
472
+ if key.is_id
473
+ 1 + varuint_size(key.id)
474
+ else
475
+ encoded_string_size(key.literal)
476
+ end
477
+ end
478
+
479
+ def varuint_size(value)
480
+ sz = 1
481
+ while value >= 0x80
482
+ value >>= 7
483
+ sz += 1
484
+ end
485
+ sz
486
+ end
487
+
488
+ def smallest_u64_size(value)
489
+ if value <= 0xFF then 1
490
+ elsif value <= 0xFFFF then 2
491
+ elsif value <= 0xFFFFFFFF then 4
492
+ else 8
493
+ end
494
+ end
495
+
496
+ def key_ref_field_identity(key, state)
497
+ s = key_ref_string(key, state)
498
+ s.empty? ? nil : s
499
+ end
500
+
501
+ def key_ref_string(key, state)
502
+ if key.is_id
503
+ s, ok = state.key_table.get_value(key.id)
504
+ return s if ok
505
+
506
+ return ""
507
+ end
508
+ key.literal
509
+ end
510
+ end
511
+ end
512
+ end