twilic 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2044 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "twilic/core/model"
4
+ require "twilic/core/wire"
5
+ require "twilic/core/codec"
6
+ require "twilic/core/session"
7
+ require "twilic/core/dictionary"
8
+ require "twilic/core/errors"
9
+ require "twilic/core/v2"
10
+
11
+ module Twilic
12
+ module Core
13
+ module Protocol
14
+
15
+ def self.new_twilic_codec
16
+ TwilicCodec.new
17
+ end
18
+
19
+ def self.twilic_codec_with_options(options)
20
+ TwilicCodec.new(options)
21
+ end
22
+
23
+ TAG_NULL = 0
24
+ TAG_BOOL_FALSE = 1
25
+ TAG_BOOL_TRUE = 2
26
+ TAG_I64 = 3
27
+ TAG_U64 = 4
28
+ TAG_F64 = 5
29
+ TAG_STRING = 6
30
+ TAG_BINARY = 7
31
+ TAG_ARRAY = 8
32
+ TAG_MAP = 9
33
+
34
+ class TwilicCodec
35
+ attr_accessor :state
36
+
37
+ def initialize(options = nil)
38
+ @state = options ? Session::MutableSessionState.new(options) : Session::MutableSessionState.new
39
+ @state.key_table = Session::MutableInternTable.new
40
+ @state.string_table = Session::MutableInternTable.new
41
+ @state.shape_table = Session::MutableShapeTable.new
42
+ end
43
+
44
+ def self.new_twilic_codec
45
+ new
46
+ end
47
+
48
+ def self.twilic_codec_with_options(options)
49
+ new(options)
50
+ end
51
+
52
+ def encode_message(message)
53
+ out = +""
54
+ write_message(message, out)
55
+ out
56
+ end
57
+
58
+ def decode_message(bytes)
59
+ reader = Wire::Reader.new(bytes)
60
+ msg = read_message(reader)
61
+ raise Errors.invalid_data("trailing bytes in message") unless reader.eof?
62
+
63
+ case msg.kind
64
+ when Model::MessageKind::CONTROL
65
+ # control does not update previous message body
66
+ when Model::MessageKind::STATE_PATCH
67
+ begin
68
+ reconstructed = apply_state_patch(
69
+ msg.state_patch.base_ref,
70
+ msg.state_patch.operations,
71
+ msg.state_patch.literals
72
+ )
73
+ @state.previous_message = reconstructed
74
+ @state.previous_message_size = bytes.bytesize
75
+ rescue StandardError => e
76
+ raise e if Errors.unknown_reference?(e) || Errors.stateless_retry?(e)
77
+ end
78
+ when Model::MessageKind::TEMPLATE_BATCH
79
+ if @state.previous_message.nil?
80
+ @state.previous_message = msg.clone_message
81
+ @state.previous_message_size = bytes.bytesize
82
+ end
83
+ else
84
+ @state.previous_message = msg.clone_message
85
+ @state.previous_message_size = bytes.bytesize
86
+ end
87
+ msg
88
+ end
89
+
90
+ def encode_value(value)
91
+ msg = message_for_value(value)
92
+ out = encode_message(msg)
93
+ @state.previous_message = msg.clone_message
94
+ @state.previous_message_size = out.bytesize
95
+ out
96
+ end
97
+
98
+ def decode_value(bytes)
99
+ msg = decode_message(bytes)
100
+ @state.previous_message = msg.clone_message
101
+ case msg.kind
102
+ when Model::MessageKind::SCALAR
103
+ msg.scalar.clone_value
104
+ when Model::MessageKind::ARRAY
105
+ Model.array_value(msg.array)
106
+ when Model::MessageKind::MAP
107
+ entries = entries_to_map(msg.map, @state)
108
+ Model.map_value(entries)
109
+ when Model::MessageKind::SHAPED_OBJECT
110
+ keys, ok = @state.shape_table.get_keys(msg.shaped_object.shape_id)
111
+ raise reference_error("shape_id", msg.shaped_object.shape_id) unless ok
112
+
113
+ Model.map_value(
114
+ shape_values_to_map(
115
+ keys,
116
+ msg.shaped_object.presence,
117
+ msg.shaped_object.has_presence,
118
+ msg.shaped_object.values
119
+ )
120
+ )
121
+ when Model::MessageKind::TYPED_VECTOR
122
+ typed_vector_to_value(msg.typed_vector)
123
+ else
124
+ raise Errors.invalid_data("decode_value expects scalar/array/map/vector message")
125
+ end
126
+ end
127
+
128
+ def reference_error(kind, id)
129
+ if @state.options.unknown_reference_policy == Session::UnknownReferencePolicy::STATELESS_RETRY
130
+ raise Errors.stateless_retry_required(kind, id)
131
+ end
132
+ raise Errors.unknown_reference(kind, id)
133
+ end
134
+
135
+ def shape_key(keys)
136
+ @state.shape_table.shape_key(keys)
137
+ end
138
+
139
+ def message_for_value(value)
140
+ case value.kind
141
+ when Model::ValueKind::ARRAY
142
+ vec, ok = try_make_typed_vector(value.arr)
143
+ return Model.message(kind: Model::MessageKind::TYPED_VECTOR, typed_vector: vec) if ok
144
+
145
+ arr = value.arr.map(&:clone_value)
146
+ Model.message(kind: Model::MessageKind::ARRAY, array: arr)
147
+ when Model::ValueKind::MAP
148
+ keys = value.map.map(&:key)
149
+ had_observation = @state.encode_shape_observations.key?(shape_key(keys))
150
+ obs = observe_encode_shape_candidate(keys)
151
+ shape_id, ok = @state.shape_table.get_id(keys)
152
+ return shaped_message(shape_id, value.map) if ok && (!had_observation || obs >= 2)
153
+
154
+ map_message(value.map)
155
+ else
156
+ sc = value.clone_value
157
+ Model.message(kind: Model::MessageKind::SCALAR, scalar: sc)
158
+ end
159
+ end
160
+
161
+ def map_message(entries)
162
+ out = entries.map do |entry|
163
+ key = entry.key
164
+ id, ok = @state.key_table.get_id(key)
165
+ key_ref = if ok
166
+ Model::KeyRef.id_ref(id)
167
+ else
168
+ @state.key_table.register(key)
169
+ Model::KeyRef.literal(key)
170
+ end
171
+ Model::MessageMapEntry.new(key: key_ref, value: entry.value.clone_value)
172
+ end
173
+ Model.message(kind: Model::MessageKind::MAP, map: out)
174
+ end
175
+
176
+ def shaped_message(shape_id, entries)
177
+ keys, = @state.shape_table.get_keys(shape_id)
178
+ index = {}
179
+ entries.each { |entry| index[entry.key] = entry.value }
180
+
181
+ values = []
182
+ presence = Array.new(keys.length, false)
183
+ all = true
184
+ keys.each_with_index do |key, i|
185
+ v = index[key]
186
+ if v
187
+ presence[i] = true
188
+ values << v.clone_value
189
+ else
190
+ presence[i] = false
191
+ all = false
192
+ end
193
+ end
194
+
195
+ msg = Model::ShapedObjectMessage.new(
196
+ shape_id: shape_id,
197
+ values: values,
198
+ has_presence: !all,
199
+ presence: all ? nil : presence
200
+ )
201
+ Model.message(kind: Model::MessageKind::SHAPED_OBJECT, shaped_object: msg)
202
+ end
203
+
204
+ def try_make_typed_vector(values)
205
+ return [nil, false] if values.length < 4
206
+
207
+ all_bool = true
208
+ all_i64 = true
209
+ all_u64 = true
210
+ all_f64 = true
211
+ all_str = true
212
+ values.each do |value|
213
+ case value.kind
214
+ when Model::ValueKind::BOOL
215
+ all_i64 = false
216
+ all_u64 = false
217
+ all_f64 = false
218
+ all_str = false
219
+ when Model::ValueKind::I64
220
+ all_bool = false
221
+ all_u64 = false
222
+ all_f64 = false
223
+ all_str = false
224
+ when Model::ValueKind::U64
225
+ all_bool = false
226
+ all_i64 = false
227
+ all_f64 = false
228
+ all_str = false
229
+ when Model::ValueKind::F64
230
+ all_bool = false
231
+ all_i64 = false
232
+ all_u64 = false
233
+ all_str = false
234
+ when Model::ValueKind::STRING
235
+ all_bool = false
236
+ all_i64 = false
237
+ all_u64 = false
238
+ all_f64 = false
239
+ else
240
+ return [nil, false]
241
+ end
242
+ end
243
+
244
+ if all_bool
245
+ bools = values.map(&:bool)
246
+ return [
247
+ Model::TypedVector.new(
248
+ element_type: Model::ElementType::BOOL,
249
+ codec: Model::VectorCodec::DIRECT_BITPACK,
250
+ data: Model::TypedVectorData.new(
251
+ kind: Model::ElementType::BOOL,
252
+ bools: bools,
253
+ i64s: [],
254
+ u64s: [],
255
+ f64s: [],
256
+ strings: [],
257
+ binary: [],
258
+ values: []
259
+ )
260
+ ),
261
+ true
262
+ ]
263
+ end
264
+
265
+ if all_i64
266
+ vals = values.map(&:i64)
267
+ return [
268
+ Model::TypedVector.new(
269
+ element_type: Model::ElementType::I64,
270
+ codec: select_integer_codec(vals),
271
+ data: Model::TypedVectorData.new(
272
+ kind: Model::ElementType::I64,
273
+ bools: [],
274
+ i64s: vals,
275
+ u64s: [],
276
+ f64s: [],
277
+ strings: [],
278
+ binary: [],
279
+ values: []
280
+ )
281
+ ),
282
+ true
283
+ ]
284
+ end
285
+
286
+ if all_u64
287
+ vals = values.map(&:u64)
288
+ return [
289
+ Model::TypedVector.new(
290
+ element_type: Model::ElementType::U64,
291
+ codec: select_u64_codec(vals),
292
+ data: Model::TypedVectorData.new(
293
+ kind: Model::ElementType::U64,
294
+ bools: [],
295
+ i64s: [],
296
+ u64s: vals,
297
+ f64s: [],
298
+ strings: [],
299
+ binary: [],
300
+ values: []
301
+ )
302
+ ),
303
+ true
304
+ ]
305
+ end
306
+
307
+ if all_f64
308
+ vals = values.map(&:f64)
309
+ return [
310
+ Model::TypedVector.new(
311
+ element_type: Model::ElementType::F64,
312
+ codec: select_float_codec(vals),
313
+ data: Model::TypedVectorData.new(
314
+ kind: Model::ElementType::F64,
315
+ bools: [],
316
+ i64s: [],
317
+ u64s: [],
318
+ f64s: vals,
319
+ strings: [],
320
+ binary: [],
321
+ values: []
322
+ )
323
+ ),
324
+ true
325
+ ]
326
+ end
327
+
328
+ if all_str
329
+ vals = values.map(&:str)
330
+ return [
331
+ Model::TypedVector.new(
332
+ element_type: Model::ElementType::STRING,
333
+ codec: select_string_codec(vals),
334
+ data: Model::TypedVectorData.new(
335
+ kind: Model::ElementType::STRING,
336
+ bools: [],
337
+ i64s: [],
338
+ u64s: [],
339
+ f64s: [],
340
+ strings: vals,
341
+ binary: [],
342
+ values: []
343
+ )
344
+ ),
345
+ true
346
+ ]
347
+ end
348
+
349
+ [nil, false]
350
+ end
351
+
352
+ def write_message(message, out)
353
+ case message.kind
354
+ when Model::MessageKind::SCALAR
355
+ out << message.kind.value.chr
356
+ write_value(message.scalar, out)
357
+ when Model::MessageKind::ARRAY
358
+ out << message.kind.value.chr
359
+ Wire.encode_varuint(message.array.length, out)
360
+ message.array.each { |value| write_value(value, out) }
361
+ when Model::MessageKind::MAP
362
+ out << message.kind.value.chr
363
+ Wire.encode_varuint(message.map.length, out)
364
+ message.map.each do |entry|
365
+ write_key_ref(entry.key, out)
366
+ field_id = key_ref_field_identity(entry.key, @state)
367
+ write_value_with_field(entry.value, field_id, out)
368
+ end
369
+ when Model::MessageKind::SHAPED_OBJECT
370
+ out << message.kind.value.chr
371
+ Wire.encode_varuint(message.shaped_object.shape_id, out)
372
+ write_presence(message.shaped_object.presence, message.shaped_object.has_presence, out)
373
+ Wire.encode_varuint(message.shaped_object.values.length, out)
374
+ keys, ok = @state.shape_table.get_keys(message.shaped_object.shape_id)
375
+ if ok
376
+ pres = message.shaped_object.presence
377
+ unless message.shaped_object.has_presence
378
+ pres = Array.new(keys.length, true)
379
+ end
380
+ value_idx = 0
381
+ keys.each_with_index do |key, i|
382
+ next if i < pres.length && !pres[i]
383
+ break if value_idx >= message.shaped_object.values.length
384
+
385
+ write_value_with_field(message.shaped_object.values[value_idx], key, out)
386
+ value_idx += 1
387
+ end
388
+ while value_idx < message.shaped_object.values.length
389
+ write_value(message.shaped_object.values[value_idx], out)
390
+ value_idx += 1
391
+ end
392
+ else
393
+ message.shaped_object.values.each { |value| write_value(value, out) }
394
+ end
395
+ when Model::MessageKind::SCHEMA_OBJECT
396
+ out << message.kind.value.chr
397
+ schema_id = nil
398
+ if message.schema_object.schema_id
399
+ out << 1.chr
400
+ Wire.encode_varuint(message.schema_object.schema_id, out)
401
+ schema_id = message.schema_object.schema_id
402
+ else
403
+ out << 0.chr
404
+ end
405
+ write_presence(message.schema_object.presence, message.schema_object.has_presence, out)
406
+ Wire.encode_varuint(message.schema_object.fields.length, out)
407
+
408
+ schema = nil
409
+ if schema_id
410
+ schema = @state.schemas[schema_id]
411
+ elsif @state.last_schema_id
412
+ schema = @state.schemas[@state.last_schema_id]
413
+ end
414
+
415
+ if schema
416
+ out << 1.chr
417
+ write_schema_fields(
418
+ schema,
419
+ message.schema_object.presence,
420
+ message.schema_object.has_presence,
421
+ message.schema_object.fields,
422
+ out
423
+ )
424
+ @state.last_schema_id = schema_id if schema_id
425
+ else
426
+ out << 0.chr
427
+ message.schema_object.fields.each { |field| write_value(field, out) }
428
+ end
429
+ when Model::MessageKind::TYPED_VECTOR
430
+ out << message.kind.value.chr
431
+ write_typed_vector(message.typed_vector, out)
432
+ when Model::MessageKind::ROW_BATCH
433
+ out << message.kind.value.chr
434
+ Wire.encode_varuint(message.row_batch.rows.length, out)
435
+ message.row_batch.rows.each do |row|
436
+ Wire.encode_varuint(row.length, out)
437
+ row.each { |value| write_value(value, out) }
438
+ end
439
+ when Model::MessageKind::COLUMN_BATCH
440
+ out << message.kind.value.chr
441
+ Wire.encode_varuint(message.column_batch.count, out)
442
+ Wire.encode_varuint(message.column_batch.columns.length, out)
443
+ message.column_batch.columns.each { |column| write_column(column, out) }
444
+ when Model::MessageKind::CONTROL
445
+ out << message.kind.value.chr
446
+ write_control(message.control, out)
447
+ when Model::MessageKind::EXT
448
+ out << message.kind.value.chr
449
+ Wire.encode_varuint(message.ext.ext_type, out)
450
+ Wire.encode_bytes(message.ext.payload, out)
451
+ when Model::MessageKind::STATE_PATCH
452
+ out << message.kind.value.chr
453
+ write_base_ref(message.state_patch.base_ref, out)
454
+ Wire.encode_varuint(message.state_patch.operations.length, out)
455
+ message.state_patch.operations.each do |op|
456
+ Wire.encode_varuint(op.field_id, out)
457
+ out << op.opcode.value.chr
458
+ if op.value
459
+ out << 1.chr
460
+ write_value(op.value, out)
461
+ else
462
+ out << 0.chr
463
+ end
464
+ end
465
+ Wire.encode_varuint(message.state_patch.literals.length, out)
466
+ message.state_patch.literals.each { |literal| write_value(literal, out) }
467
+ when Model::MessageKind::TEMPLATE_BATCH
468
+ out << message.kind.value.chr
469
+ Wire.encode_varuint(message.template_batch.template_id, out)
470
+ Wire.encode_varuint(message.template_batch.count, out)
471
+ Wire.encode_bitmap(message.template_batch.changed_column_mask, out)
472
+ Wire.encode_varuint(message.template_batch.columns.length, out)
473
+ message.template_batch.columns.each { |column| write_column(column, out) }
474
+ when Model::MessageKind::CONTROL_STREAM
475
+ out << message.kind.value.chr
476
+ out << message.control_stream.codec.value.chr
477
+ write_control_stream_payload(
478
+ message.control_stream.codec,
479
+ message.control_stream.payload,
480
+ out
481
+ )
482
+ when Model::MessageKind::BASE_SNAPSHOT
483
+ out << message.kind.value.chr
484
+ Wire.encode_varuint(message.base_snapshot.base_id, out)
485
+ Wire.encode_varuint(message.base_snapshot.schema_or_shape_ref, out)
486
+ write_message(message.base_snapshot.payload, out)
487
+ @state.register_base_snapshot(message.base_snapshot.base_id, message.base_snapshot.payload)
488
+ else
489
+ raise Errors.invalid_data("unsupported message kind")
490
+ end
491
+ end
492
+
493
+ def read_message(reader)
494
+ kind_byte = reader.read_u8
495
+ kind = Model::MessageKind.from_byte(kind_byte)
496
+ raise Errors.invalid_kind(kind_byte) if kind.nil?
497
+
498
+ case kind
499
+ when Model::MessageKind::SCALAR
500
+ v = read_value(reader)
501
+ Model.message(kind: Model::MessageKind::SCALAR, scalar: v)
502
+ when Model::MessageKind::ARRAY
503
+ n = reader.read_varuint
504
+ values = []
505
+ n.times { values << read_value(reader) }
506
+ Model.message(kind: Model::MessageKind::ARRAY, array: values)
507
+ when Model::MessageKind::MAP
508
+ n = reader.read_varuint
509
+ entries = []
510
+ n.times do
511
+ key_ref = read_key_ref(reader)
512
+ field_identity = key_ref_field_identity(key_ref, @state)
513
+ v = read_value_with_field(reader, field_identity)
514
+ entries << Model::MessageMapEntry.new(key: key_ref, value: v)
515
+ end
516
+ keys = entries.map { |entry| key_ref_string(entry.key, @state) }
517
+ observe_decode_shape_candidate(keys)
518
+ Model.message(kind: Model::MessageKind::MAP, map: entries)
519
+ when Model::MessageKind::SHAPED_OBJECT
520
+ shape_id = reader.read_varuint
521
+ presence, has_presence = read_presence(reader)
522
+ n = reader.read_varuint
523
+ values = []
524
+ keys, ok = @state.shape_table.get_keys(shape_id)
525
+ if ok
526
+ pres = presence
527
+ unless has_presence
528
+ pres = Array.new(keys.length, true)
529
+ end
530
+ read_count = 0
531
+ keys.each_with_index do |key, i|
532
+ next if i < pres.length && !pres[i]
533
+ break if read_count >= n
534
+
535
+ values << read_value_with_field(reader, key)
536
+ read_count += 1
537
+ end
538
+ while read_count < n
539
+ values << read_value(reader)
540
+ read_count += 1
541
+ end
542
+ else
543
+ n.times { values << read_value(reader) }
544
+ end
545
+ Model.message(
546
+ kind: Model::MessageKind::SHAPED_OBJECT,
547
+ shaped_object: Model::ShapedObjectMessage.new(
548
+ shape_id: shape_id, presence: presence, has_presence: has_presence, values: values
549
+ )
550
+ )
551
+ when Model::MessageKind::SCHEMA_OBJECT
552
+ has_schema = reader.read_u8
553
+ schema_id = nil
554
+ if has_schema == 1
555
+ schema_id = reader.read_varuint
556
+ end
557
+ presence, has_presence = read_presence(reader)
558
+ n = reader.read_varuint
559
+ mode = reader.read_u8
560
+ fields = []
561
+ if mode == 1
562
+ effective_id = if schema_id
563
+ schema_id
564
+ elsif @state.last_schema_id
565
+ @state.last_schema_id
566
+ else
567
+ raise Errors.invalid_data("schema object requires schema id in context")
568
+ end
569
+ schema = @state.schemas[effective_id]
570
+ raise reference_error("schema_id", effective_id) if schema.nil?
571
+
572
+ fields = read_schema_fields(schema, presence, has_presence, n, reader)
573
+ @state.last_schema_id = effective_id
574
+ else
575
+ n.times { fields << read_value(reader) }
576
+ @state.last_schema_id = schema_id if schema_id
577
+ end
578
+ Model.message(
579
+ kind: Model::MessageKind::SCHEMA_OBJECT,
580
+ schema_object: Model::SchemaObjectMessage.new(
581
+ schema_id: schema_id, presence: presence, has_presence: has_presence, fields: fields
582
+ )
583
+ )
584
+ when Model::MessageKind::TYPED_VECTOR
585
+ tv = read_typed_vector(reader, nil, nil)
586
+ Model.message(kind: Model::MessageKind::TYPED_VECTOR, typed_vector: tv)
587
+ when Model::MessageKind::ROW_BATCH
588
+ row_count = reader.read_varuint
589
+ rows = []
590
+ row_count.times do
591
+ field_count = reader.read_varuint
592
+ row = []
593
+ field_count.times { row << read_value(reader) }
594
+ rows << row
595
+ end
596
+ Model.message(
597
+ kind: Model::MessageKind::ROW_BATCH,
598
+ row_batch: Model::RowBatchMessage.new(rows: rows)
599
+ )
600
+ when Model::MessageKind::COLUMN_BATCH
601
+ count = reader.read_varuint
602
+ col_count = reader.read_varuint
603
+ cols = []
604
+ col_count.times { cols << read_column(reader) }
605
+ Model.message(
606
+ kind: Model::MessageKind::COLUMN_BATCH,
607
+ column_batch: Model::ColumnBatchMessage.new(count: count, columns: cols)
608
+ )
609
+ when Model::MessageKind::CONTROL
610
+ ctrl = read_control(reader)
611
+ Model.message(kind: Model::MessageKind::CONTROL, control: ctrl)
612
+ when Model::MessageKind::EXT
613
+ ext_type = reader.read_varuint
614
+ payload = reader.read_bytes
615
+ Model.message(
616
+ kind: Model::MessageKind::EXT,
617
+ ext: Model::ExtMessage.new(ext_type: ext_type, payload: payload)
618
+ )
619
+ when Model::MessageKind::STATE_PATCH
620
+ base_ref = read_base_ref(reader)
621
+ n = reader.read_varuint
622
+ ops = []
623
+ n.times do
624
+ field_id = reader.read_varuint
625
+ op_byte = reader.read_u8
626
+ opcode = Model::PatchOpcode.from_byte(op_byte)
627
+ raise Errors.invalid_data("patch opcode") if opcode.nil?
628
+
629
+ has_value = reader.read_u8
630
+ value = has_value == 1 ? read_value(reader) : nil
631
+ ops << Model::PatchOperation.new(field_id: field_id, opcode: opcode, value: value)
632
+ end
633
+ lit_n = reader.read_varuint
634
+ lits = []
635
+ lit_n.times { lits << read_value(reader) }
636
+ Model.message(
637
+ kind: Model::MessageKind::STATE_PATCH,
638
+ state_patch: Model::StatePatchMessage.new(base_ref: base_ref, operations: ops, literals: lits)
639
+ )
640
+ when Model::MessageKind::TEMPLATE_BATCH
641
+ template_id = reader.read_varuint
642
+ count = reader.read_varuint
643
+ mask = reader.read_bitmap
644
+ col_n = reader.read_varuint
645
+ changed_cols = []
646
+ col_n.times { changed_cols << read_column(reader) }
647
+ full_cols = changed_cols
648
+ prev = @state.template_columns[template_id]
649
+ if prev
650
+ full_cols = merge_template_columns(prev, mask, changed_cols)
651
+ else
652
+ mask.each do |bit|
653
+ raise reference_error("template_id", template_id) unless bit
654
+ end
655
+ end
656
+ @state.template_columns[template_id] = full_cols
657
+ @state.templates[template_id] = template_descriptor_from_columns(template_id, full_cols)
658
+ if count >= 16
659
+ @state.previous_message = Model.message(
660
+ kind: Model::MessageKind::COLUMN_BATCH,
661
+ column_batch: Model::ColumnBatchMessage.new(count: count, columns: full_cols)
662
+ )
663
+ end
664
+ Model.message(
665
+ kind: Model::MessageKind::TEMPLATE_BATCH,
666
+ template_batch: Model::TemplateBatchMessage.new(
667
+ template_id: template_id, count: count, changed_column_mask: mask, columns: changed_cols
668
+ )
669
+ )
670
+ when Model::MessageKind::CONTROL_STREAM
671
+ codec_byte = reader.read_u8
672
+ codec = Model::ControlStreamCodec.from_byte(codec_byte)
673
+ raise Errors.invalid_data("control stream codec") if codec.nil?
674
+
675
+ payload = read_control_stream_payload(codec, reader)
676
+ Model.message(
677
+ kind: Model::MessageKind::CONTROL_STREAM,
678
+ control_stream: Model::ControlStreamMessage.new(codec: codec, payload: payload)
679
+ )
680
+ when Model::MessageKind::BASE_SNAPSHOT
681
+ base_id = reader.read_varuint
682
+ schema_or_shape_ref = reader.read_varuint
683
+ payload = read_message(reader)
684
+ @state.register_base_snapshot(base_id, payload)
685
+ Model.message(
686
+ kind: Model::MessageKind::BASE_SNAPSHOT,
687
+ base_snapshot: Model::BaseSnapshotMessage.new(
688
+ base_id: base_id,
689
+ schema_or_shape_ref: schema_or_shape_ref,
690
+ payload: payload
691
+ )
692
+ )
693
+ else
694
+ raise Errors.invalid_data("unsupported message kind")
695
+ end
696
+ end
697
+
698
+ def write_value(value, out)
699
+ write_value_with_field(value, nil, out)
700
+ end
701
+
702
+ def write_value_with_field(value, field_identity, out)
703
+ case value.kind
704
+ when Model::ValueKind::NULL
705
+ out << TAG_NULL.chr
706
+ when Model::ValueKind::BOOL
707
+ out << (value.bool ? TAG_BOOL_TRUE : TAG_BOOL_FALSE).chr
708
+ when Model::ValueKind::I64
709
+ out << TAG_I64.chr
710
+ write_smallest_u64(Wire.encode_zigzag(value.i64), out)
711
+ when Model::ValueKind::U64
712
+ out << TAG_U64.chr
713
+ write_smallest_u64(value.u64, out)
714
+ when Model::ValueKind::F64
715
+ out << TAG_F64.chr
716
+ Wire.append_f64_le(out, value.f64)
717
+ when Model::ValueKind::STRING
718
+ out << TAG_STRING.chr
719
+ unless field_identity.nil?
720
+ enum_vals = @state.field_enums[field_identity]
721
+ unless enum_vals.nil?
722
+ enum_vals.each_with_index do |enum_value, i|
723
+ if enum_value == value.str
724
+ out << Model::StringMode::INLINE_ENUM.value.chr
725
+ Wire.encode_varuint(i, out)
726
+ return
727
+ end
728
+ end
729
+ end
730
+ end
731
+ if value.str.empty?
732
+ out << Model::StringMode::EMPTY.value.chr
733
+ return
734
+ end
735
+ id, ok = @state.string_table.get_id(value.str)
736
+ if ok
737
+ out << Model::StringMode::REF.value.chr
738
+ Wire.encode_varuint(id, out)
739
+ return
740
+ end
741
+ base_id, prefix_len, has_prefix = best_prefix_base(value.str)
742
+ if has_prefix && prefix_len >= 4 && prefix_len < value.str.bytesize
743
+ out << Model::StringMode::PREFIX_DELTA.value.chr
744
+ Wire.encode_varuint(base_id, out)
745
+ Wire.encode_varuint(prefix_len, out)
746
+ Wire.encode_string(value.str.byteslice(prefix_len, value.str.bytesize - prefix_len), out)
747
+ @state.string_table.register(value.str)
748
+ return
749
+ end
750
+ out << Model::StringMode::LITERAL.value.chr
751
+ Wire.encode_string(value.str, out)
752
+ @state.string_table.register(value.str)
753
+ when Model::ValueKind::BINARY
754
+ out << TAG_BINARY.chr
755
+ Wire.encode_bytes(value.bin, out)
756
+ when Model::ValueKind::ARRAY
757
+ out << TAG_ARRAY.chr
758
+ Wire.encode_varuint(value.arr.length, out)
759
+ value.arr.each { |entry| write_value(entry, out) }
760
+ when Model::ValueKind::MAP
761
+ out << TAG_MAP.chr
762
+ Wire.encode_varuint(value.map.length, out)
763
+ value.map.each do |entry|
764
+ write_key_ref(Model::KeyRef.literal(entry.key), out)
765
+ write_value_with_field(entry.value, entry.key, out)
766
+ end
767
+ end
768
+ end
769
+
770
+ def read_value(reader)
771
+ read_value_with_field(reader, nil)
772
+ end
773
+
774
+ def read_value_with_field(reader, field_identity)
775
+ tag = reader.read_u8
776
+ case tag
777
+ when TAG_NULL
778
+ Model.null_value
779
+ when TAG_BOOL_FALSE
780
+ Model.bool_value(false)
781
+ when TAG_BOOL_TRUE
782
+ Model.bool_value(true)
783
+ when TAG_I64
784
+ Model.i64_value(Wire.decode_zigzag(read_smallest_u64(reader)))
785
+ when TAG_U64
786
+ Model.u64_value(read_smallest_u64(reader))
787
+ when TAG_F64
788
+ Model.f64_value(Wire.read_f64_le(reader))
789
+ when TAG_STRING
790
+ mode_byte = reader.read_u8
791
+ mode = Model::StringMode.from_byte(mode_byte)
792
+ raise Errors.invalid_data("string mode") if mode.nil?
793
+
794
+ case mode
795
+ when Model::StringMode::EMPTY
796
+ Model.string_value("")
797
+ when Model::StringMode::LITERAL
798
+ s = reader.read_string
799
+ @state.string_table.register(s)
800
+ Model.string_value(s)
801
+ when Model::StringMode::REF
802
+ id = reader.read_varuint
803
+ s, ok = @state.string_table.get_value(id)
804
+ raise reference_error("string_id", id) unless ok
805
+
806
+ Model.string_value(s)
807
+ when Model::StringMode::PREFIX_DELTA
808
+ base_id = reader.read_varuint
809
+ prefix_len = reader.read_varuint
810
+ suffix = reader.read_string
811
+ base, ok = @state.string_table.get_value(base_id)
812
+ raise reference_error("string_id", base_id) unless ok
813
+ raise Errors.invalid_data("prefix delta length") if prefix_len > base.bytesize
814
+
815
+ s = base.byteslice(0, prefix_len) + suffix
816
+ @state.string_table.register(s)
817
+ Model.string_value(s)
818
+ when Model::StringMode::INLINE_ENUM
819
+ raise Errors.invalid_data("inline enum missing field identity") if field_identity.nil?
820
+
821
+ enum_vals = @state.field_enums[field_identity]
822
+ raise Errors.invalid_data("inline enum unknown field") if enum_vals.nil?
823
+
824
+ code = reader.read_varuint
825
+ raise Errors.invalid_data("inline enum code") if code >= enum_vals.length
826
+
827
+ Model.string_value(enum_vals[code])
828
+ end
829
+ when TAG_BINARY
830
+ Model.binary_value(reader.read_bytes)
831
+ when TAG_ARRAY
832
+ n = reader.read_varuint
833
+ out = []
834
+ n.times { out << read_value(reader) }
835
+ Model.array_value(out)
836
+ when TAG_MAP
837
+ n = reader.read_varuint
838
+ out = []
839
+ n.times do
840
+ key_ref = read_key_ref(reader)
841
+ key = key_ref.literal
842
+ value = read_value_with_field(reader, key)
843
+ out << Model.entry(key, value)
844
+ end
845
+ Model.map_value(out)
846
+ else
847
+ raise Errors.invalid_tag(tag)
848
+ end
849
+ end
850
+
851
+ def write_schema_fields(schema, presence, has_presence, fields, out)
852
+ indices = Protocol.schema_present_field_indices(schema, presence, has_presence)
853
+ indices.each_with_index do |schema_idx, i|
854
+ raise Errors.invalid_data("schema fields length mismatch") if i >= fields.length
855
+
856
+ write_schema_field_value(schema.fields[schema_idx], fields[i], out)
857
+ end
858
+ end
859
+
860
+ def read_schema_fields(schema, presence, has_presence, n, reader)
861
+ indices = Protocol.schema_present_field_indices(schema, presence, has_presence)
862
+ raise Errors.invalid_data("schema fields length") if indices.length != n
863
+
864
+ out = []
865
+ indices.each do |schema_idx|
866
+ out << read_schema_field_value(schema.fields[schema_idx], reader)
867
+ end
868
+ out
869
+ end
870
+
871
+ def write_schema_field_value(field, value, out)
872
+ case Protocol.normalized_logical_type(field.logical_type)
873
+ when "bool"
874
+ raise Errors.invalid_data("schema bool field type mismatch") unless value.kind == Model::ValueKind::BOOL
875
+
876
+ write_value(value, out)
877
+ when "i64", "int64", "int"
878
+ raise Errors.invalid_data("schema i64 field type mismatch") unless value.kind == Model::ValueKind::I64
879
+
880
+ write_value(value, out)
881
+ when "u64", "uint64", "uint"
882
+ raise Errors.invalid_data("schema u64 field type mismatch") unless value.kind == Model::ValueKind::U64
883
+
884
+ write_value(value, out)
885
+ when "f64", "float64", "float"
886
+ raise Errors.invalid_data("schema f64 field type mismatch") unless value.kind == Model::ValueKind::F64
887
+
888
+ write_value(value, out)
889
+ when "string"
890
+ raise Errors.invalid_data("schema string field type mismatch") unless value.kind == Model::ValueKind::STRING
891
+
892
+ write_value_with_field(value, field.name, out)
893
+ else
894
+ write_value(value, out)
895
+ end
896
+ end
897
+
898
+ def read_schema_field_value(field, reader)
899
+ if Protocol.normalized_logical_type(field.logical_type) == "string"
900
+ return read_value_with_field(reader, field.name)
901
+ end
902
+ read_value(reader)
903
+ end
904
+
905
+ def write_key_ref(key_ref, out)
906
+ if key_ref.is_id
907
+ out << 1.chr
908
+ Wire.encode_varuint(key_ref.id, out)
909
+ return
910
+ end
911
+ out << 0.chr
912
+ Wire.encode_string(key_ref.literal, out)
913
+ @state.key_table.register(key_ref.literal)
914
+ end
915
+
916
+ def read_key_ref(reader)
917
+ mode = reader.read_u8
918
+ if mode == 1
919
+ id = reader.read_varuint
920
+ key, ok = @state.key_table.get_value(id)
921
+ raise reference_error("key_id", id) unless ok
922
+
923
+ return Model::KeyRef.literal(key)
924
+ end
925
+ raise Errors.invalid_data("key ref mode") unless mode.zero?
926
+
927
+ s = reader.read_string
928
+ @state.key_table.register(s)
929
+ Model::KeyRef.literal(s)
930
+ end
931
+
932
+ def write_presence(presence, has_presence, out)
933
+ unless has_presence
934
+ out << 0.chr
935
+ return
936
+ end
937
+ out << 1.chr
938
+ Wire.encode_bitmap(presence, out)
939
+ end
940
+
941
+ def read_presence(reader)
942
+ flag = reader.read_u8
943
+ return [nil, false] if flag.zero?
944
+ raise Errors.invalid_data("presence flag") unless flag == 1
945
+
946
+ [reader.read_bitmap, true]
947
+ end
948
+
949
+ def typed_vector_len(data)
950
+ case data.kind
951
+ when Model::ElementType::BOOL
952
+ data.bools.length
953
+ when Model::ElementType::I64
954
+ data.i64s.length
955
+ when Model::ElementType::U64
956
+ data.u64s.length
957
+ when Model::ElementType::F64
958
+ data.f64s.length
959
+ when Model::ElementType::STRING
960
+ data.strings.length
961
+ when Model::ElementType::BINARY
962
+ data.binary.length
963
+ when Model::ElementType::VALUE
964
+ data.values.length
965
+ else
966
+ 0
967
+ end
968
+ end
969
+
970
+ def write_typed_vector(vector, out)
971
+ out << vector.element_type.value.chr
972
+ Wire.encode_varuint(typed_vector_len(vector.data), out)
973
+ out << vector.codec.value.chr
974
+ case vector.element_type
975
+ when Model::ElementType::BOOL
976
+ Wire.encode_bitmap(vector.data.bools, out)
977
+ when Model::ElementType::I64
978
+ Codec.encode_i64_vector(vector.data.i64s, vector.codec, out)
979
+ when Model::ElementType::U64
980
+ Codec.encode_u64_vector(vector.data.u64s, vector.codec, out)
981
+ when Model::ElementType::F64
982
+ Codec.encode_f64_vector(vector.data.f64s, vector.codec, out)
983
+ when Model::ElementType::STRING
984
+ write_string_vector(vector.data.strings, vector.codec, out)
985
+ when Model::ElementType::BINARY
986
+ Wire.encode_varuint(vector.data.binary.length, out)
987
+ vector.data.binary.each { |bytes| Wire.encode_bytes(bytes, out) }
988
+ when Model::ElementType::VALUE
989
+ Wire.encode_varuint(vector.data.values.length, out)
990
+ vector.data.values.each { |entry| write_value(entry, out) }
991
+ else
992
+ raise Errors.invalid_data("unsupported element type")
993
+ end
994
+ end
995
+
996
+ def read_typed_vector(reader, forced_element, expected_codec)
997
+ elem_type = if forced_element.nil?
998
+ elem_byte = reader.read_u8
999
+ parsed = Model::ElementType.from_byte(elem_byte)
1000
+ raise Errors.invalid_data("vector element type") if parsed.nil?
1001
+
1002
+ parsed
1003
+ else
1004
+ forced_element
1005
+ end
1006
+ expected_len = reader.read_varuint
1007
+ codec_byte = reader.read_u8
1008
+ codec = Model::VectorCodec.from_byte(codec_byte)
1009
+ raise Errors.invalid_data("vector codec") if codec.nil?
1010
+ raise Errors.invalid_data("column codec mismatch") if !expected_codec.nil? && codec != expected_codec
1011
+
1012
+ data = Model::TypedVectorData.new(
1013
+ kind: elem_type, bools: [], i64s: [], u64s: [], f64s: [], strings: [], binary: [], values: []
1014
+ )
1015
+ case elem_type
1016
+ when Model::ElementType::BOOL
1017
+ data = data.with(bools: reader.read_bitmap)
1018
+ when Model::ElementType::I64
1019
+ data = data.with(i64s: Codec.decode_i64_vector(reader, codec))
1020
+ when Model::ElementType::U64
1021
+ data = data.with(u64s: Codec.decode_u64_vector(reader, codec))
1022
+ when Model::ElementType::F64
1023
+ data = data.with(f64s: Codec.decode_f64_vector(reader, codec))
1024
+ when Model::ElementType::STRING
1025
+ data = data.with(strings: read_string_vector(reader, codec))
1026
+ when Model::ElementType::BINARY
1027
+ n = reader.read_varuint
1028
+ values = []
1029
+ n.times { values << reader.read_bytes }
1030
+ data = data.with(binary: values)
1031
+ when Model::ElementType::VALUE
1032
+ n = reader.read_varuint
1033
+ values = []
1034
+ n.times { values << read_value(reader) }
1035
+ data = data.with(values: values)
1036
+ end
1037
+ raise Errors.invalid_data("typed vector length mismatch") if typed_vector_len(data) != expected_len
1038
+
1039
+ Model::TypedVector.new(element_type: elem_type, codec: codec, data: data)
1040
+ end
1041
+
1042
+ def write_column(column, out)
1043
+ Wire.encode_varuint(column.field_id, out)
1044
+ out << column.null_strategy.value.chr
1045
+ case column.null_strategy
1046
+ when Model::NullStrategy::PRESENCE_BITMAP, Model::NullStrategy::INVERTED_PRESENCE_BITMAP
1047
+ if !column.has_presence || column.presence.nil?
1048
+ raise Errors.invalid_data("missing column presence bitmap")
1049
+ end
1050
+ Wire.encode_bitmap(column.presence, out)
1051
+ end
1052
+ out << column.codec.value.chr
1053
+ if column.dictionary_id
1054
+ out << 1.chr
1055
+ Wire.encode_varuint(column.dictionary_id, out)
1056
+ payload = @state.dictionaries[column.dictionary_id]
1057
+ if payload
1058
+ profile = @state.dictionary_profiles[column.dictionary_id]
1059
+ if profile
1060
+ out << 1.chr
1061
+ Wire.encode_varuint(profile.version, out)
1062
+ Wire.encode_varuint(profile.hash, out)
1063
+ Wire.encode_varuint(profile.expires_at, out)
1064
+ out << dictionary_fallback_to_byte(profile.fallback).chr
1065
+ Wire.encode_bytes(payload, out)
1066
+ else
1067
+ out << 0.chr
1068
+ end
1069
+ else
1070
+ out << 0.chr
1071
+ end
1072
+ else
1073
+ out << 0.chr
1074
+ end
1075
+
1076
+ trained_block = nil
1077
+ if !column.dictionary_id.nil? && column.values.kind == Model::ElementType::STRING
1078
+ if column.codec == Model::VectorCodec::DICTIONARY || column.codec == Model::VectorCodec::STRING_REF
1079
+ payload = @state.dictionaries[column.dictionary_id]
1080
+ if payload
1081
+ begin
1082
+ dictionary = Dictionary.decode_trained_dictionary_payload(payload)
1083
+ block, ok = Dictionary.encode_trained_dictionary_block(column.values.strings, dictionary)
1084
+ trained_block = block if ok
1085
+ rescue StandardError
1086
+ # fall through to regular typed-vector encoding
1087
+ end
1088
+ end
1089
+ end
1090
+ end
1091
+ unless trained_block.nil?
1092
+ out << 1.chr
1093
+ Wire.encode_bytes(trained_block, out)
1094
+ return
1095
+ end
1096
+
1097
+ out << 0.chr
1098
+ tv = Model::TypedVector.new(
1099
+ element_type: column.values.kind,
1100
+ codec: column.codec,
1101
+ data: Model.clone_typed_vector_data(column.values)
1102
+ )
1103
+ write_typed_vector(tv, out)
1104
+ end
1105
+
1106
+ def read_column(reader)
1107
+ field_id = reader.read_varuint
1108
+ null_byte = reader.read_u8
1109
+ null_strategy = Model::NullStrategy.from_byte(null_byte)
1110
+ raise Errors.invalid_data("null strategy") if null_strategy.nil?
1111
+
1112
+ presence = nil
1113
+ has_presence = false
1114
+ case null_strategy
1115
+ when Model::NullStrategy::PRESENCE_BITMAP, Model::NullStrategy::INVERTED_PRESENCE_BITMAP
1116
+ presence = reader.read_bitmap
1117
+ has_presence = true
1118
+ end
1119
+
1120
+ codec_byte = reader.read_u8
1121
+ codec = Model::VectorCodec.from_byte(codec_byte)
1122
+ raise Errors.invalid_data("column codec") if codec.nil?
1123
+
1124
+ has_dict = reader.read_u8
1125
+ dictionary_id = nil
1126
+ case has_dict
1127
+ when 0
1128
+ when 1
1129
+ id = reader.read_varuint
1130
+ has_profile = reader.read_u8
1131
+ case has_profile
1132
+ when 0
1133
+ raise reference_error("dict_id", id) unless @state.dictionaries.key?(id)
1134
+ when 1
1135
+ version = reader.read_varuint
1136
+ hash = reader.read_varuint
1137
+ expires_at = reader.read_varuint
1138
+ fallback_byte = reader.read_u8
1139
+ fallback = Session::DictionaryFallback.from_byte(fallback_byte)
1140
+ raise Errors.invalid_data("dictionary fallback") if fallback.nil?
1141
+
1142
+ payload = reader.read_bytes
1143
+ if Dictionary.dictionary_payload_hash(payload) != hash
1144
+ raise Errors.invalid_data("dictionary profile hash mismatch")
1145
+ end
1146
+ @state.dictionaries[id] = payload
1147
+ @state.dictionary_profiles[id] = Session::DictionaryProfile.new(
1148
+ version: version,
1149
+ hash: hash,
1150
+ expires_at: expires_at,
1151
+ fallback: fallback
1152
+ )
1153
+ else
1154
+ raise Errors.invalid_data("dictionary profile flag")
1155
+ end
1156
+ dictionary_id = id
1157
+ else
1158
+ raise Errors.invalid_data("dictionary flag")
1159
+ end
1160
+
1161
+ payload_mode = reader.read_u8
1162
+ values = nil
1163
+ case payload_mode
1164
+ when 0
1165
+ values = read_typed_vector(reader, nil, codec).data
1166
+ when 1
1167
+ raise Errors.invalid_data("trained dictionary block requires dict_id") if dictionary_id.nil?
1168
+ unless codec == Model::VectorCodec::DICTIONARY || codec == Model::VectorCodec::STRING_REF
1169
+ raise Errors.invalid_data("trained dictionary block requires string dictionary codec")
1170
+ end
1171
+
1172
+ dictionary_payload = @state.dictionaries[dictionary_id]
1173
+ raise reference_error("dict_id", dictionary_id) if dictionary_payload.nil?
1174
+
1175
+ dictionary = Dictionary.decode_trained_dictionary_payload(dictionary_payload)
1176
+ block = reader.read_bytes
1177
+ strings = Dictionary.decode_trained_dictionary_block(block, dictionary)
1178
+ values = Model::TypedVectorData.new(
1179
+ kind: Model::ElementType::STRING,
1180
+ bools: [],
1181
+ i64s: [],
1182
+ u64s: [],
1183
+ f64s: [],
1184
+ strings: strings,
1185
+ binary: [],
1186
+ values: []
1187
+ )
1188
+ else
1189
+ raise Errors.invalid_data("column payload mode")
1190
+ end
1191
+
1192
+ Model::Column.new(
1193
+ field_id: field_id,
1194
+ null_strategy: null_strategy,
1195
+ presence: presence,
1196
+ has_presence: has_presence,
1197
+ codec: codec,
1198
+ dictionary_id: dictionary_id,
1199
+ values: values
1200
+ )
1201
+ end
1202
+
1203
+ def write_control(control, out)
1204
+ out << control.opcode.value.chr
1205
+ case control.opcode
1206
+ when Model::ControlOpcode::REGISTER_KEYS
1207
+ Wire.encode_varuint(control.register_keys.length, out)
1208
+ control.register_keys.each do |key|
1209
+ Wire.encode_string(key, out)
1210
+ @state.key_table.register(key)
1211
+ end
1212
+ when Model::ControlOpcode::REGISTER_SHAPE
1213
+ raise Errors.invalid_data("register shape payload missing") if control.register_shape.nil?
1214
+
1215
+ Wire.encode_varuint(control.register_shape.shape_id, out)
1216
+ Wire.encode_varuint(control.register_shape.keys.length, out)
1217
+ keys = []
1218
+ control.register_shape.keys.each do |key_ref|
1219
+ write_key_ref(key_ref, out)
1220
+ keys << key_ref.literal
1221
+ end
1222
+ @state.shape_table.register_with_id(control.register_shape.shape_id, keys)
1223
+ when Model::ControlOpcode::REGISTER_STRINGS
1224
+ Wire.encode_varuint(control.register_strings.length, out)
1225
+ control.register_strings.each do |str|
1226
+ Wire.encode_string(str, out)
1227
+ @state.string_table.register(str)
1228
+ end
1229
+ when Model::ControlOpcode::PROMOTE_STRING_FIELD_TO_ENUM
1230
+ raise Errors.invalid_data("promote enum payload missing") if control.promote_string_field_to_enum.nil?
1231
+
1232
+ Wire.encode_string(control.promote_string_field_to_enum.field_identity, out)
1233
+ Wire.encode_varuint(control.promote_string_field_to_enum.values.length, out)
1234
+ control.promote_string_field_to_enum.values.each { |value| Wire.encode_string(value, out) }
1235
+ @state.field_enums[control.promote_string_field_to_enum.field_identity] =
1236
+ control.promote_string_field_to_enum.values.dup
1237
+ when Model::ControlOpcode::RESET_TABLES
1238
+ @state.reset_tables
1239
+ when Model::ControlOpcode::RESET_STATE
1240
+ @state.reset_state
1241
+ else
1242
+ raise Errors.invalid_data("control opcode")
1243
+ end
1244
+ end
1245
+
1246
+ def read_control(reader)
1247
+ op_byte = reader.read_u8
1248
+ opcode = Model::ControlOpcode.from_byte(op_byte)
1249
+ raise Errors.invalid_data("control opcode") if opcode.nil?
1250
+
1251
+ msg = Model::ControlMessage.new(
1252
+ register_keys: [],
1253
+ register_shape: nil,
1254
+ register_strings: [],
1255
+ promote_string_field_to_enum: nil,
1256
+ reset_tables: false,
1257
+ reset_state: false,
1258
+ opcode: opcode
1259
+ )
1260
+ case opcode
1261
+ when Model::ControlOpcode::REGISTER_KEYS
1262
+ n = reader.read_varuint
1263
+ keys = Array.new(n, "")
1264
+ n.times do |i|
1265
+ key = reader.read_string
1266
+ keys[i] = key
1267
+ @state.key_table.register(key)
1268
+ end
1269
+ msg = msg.with(register_keys: keys)
1270
+ when Model::ControlOpcode::REGISTER_SHAPE
1271
+ shape_id = reader.read_varuint
1272
+ n = reader.read_varuint
1273
+ keys = Array.new(n)
1274
+ key_names = Array.new(n, "")
1275
+ n.times do |i|
1276
+ key_ref = read_key_ref(reader)
1277
+ keys[i] = key_ref
1278
+ key_names[i] = key_ref.literal
1279
+ end
1280
+ @state.shape_table.register_with_id(shape_id, key_names)
1281
+ msg = msg.with(register_shape: Model::RegisterShapeControl.new(shape_id: shape_id, keys: keys))
1282
+ when Model::ControlOpcode::REGISTER_STRINGS
1283
+ n = reader.read_varuint
1284
+ strings = Array.new(n, "")
1285
+ n.times do |i|
1286
+ str = reader.read_string
1287
+ strings[i] = str
1288
+ @state.string_table.register(str)
1289
+ end
1290
+ msg = msg.with(register_strings: strings)
1291
+ when Model::ControlOpcode::PROMOTE_STRING_FIELD_TO_ENUM
1292
+ field_identity = reader.read_string
1293
+ n = reader.read_varuint
1294
+ values = Array.new(n, "")
1295
+ n.times do |i|
1296
+ values[i] = reader.read_string
1297
+ end
1298
+ @state.field_enums[field_identity] = values.dup
1299
+ msg = msg.with(
1300
+ promote_string_field_to_enum: Model::PromoteEnumControl.new(
1301
+ field_identity: field_identity,
1302
+ values: values
1303
+ )
1304
+ )
1305
+ when Model::ControlOpcode::RESET_TABLES
1306
+ msg = msg.with(reset_tables: true)
1307
+ @state.reset_tables
1308
+ when Model::ControlOpcode::RESET_STATE
1309
+ msg = msg.with(reset_state: true)
1310
+ @state.reset_state
1311
+ end
1312
+ msg
1313
+ end
1314
+
1315
+ attr_accessor :state
1316
+
1317
+ def write_base_ref(base_ref, out)
1318
+ if base_ref.previous
1319
+ out << 0.chr
1320
+ return
1321
+ end
1322
+ out << 1.chr
1323
+ Wire.encode_varuint(base_ref.base_id, out)
1324
+ end
1325
+
1326
+ def read_base_ref(reader)
1327
+ mode = reader.read_u8
1328
+ case mode
1329
+ when 0
1330
+ Model::BaseRef.previous
1331
+ when 1
1332
+ id = reader.read_varuint
1333
+ Model::BaseRef.id_ref(id)
1334
+ else
1335
+ raise Errors.invalid_data("base ref")
1336
+ end
1337
+ end
1338
+
1339
+ def write_control_stream_payload(codec, payload, out)
1340
+ encoded = case codec
1341
+ when Model::ControlStreamCodec::PLAIN
1342
+ payload.b.dup
1343
+ when Model::ControlStreamCodec::RLE
1344
+ rle_encode_bytes(payload)
1345
+ when Model::ControlStreamCodec::BITPACK
1346
+ control_bitpack_encode_bytes(payload)
1347
+ when Model::ControlStreamCodec::HUFFMAN
1348
+ control_huffman_encode_bytes(payload)
1349
+ when Model::ControlStreamCodec::FSE
1350
+ control_fse_encode_bytes(payload)
1351
+ end
1352
+ Wire.encode_bytes(encoded, out)
1353
+ end
1354
+
1355
+ def read_control_stream_payload(codec, reader)
1356
+ encoded = reader.read_bytes
1357
+ case codec
1358
+ when Model::ControlStreamCodec::PLAIN
1359
+ encoded
1360
+ when Model::ControlStreamCodec::RLE
1361
+ rle_decode_bytes(encoded)
1362
+ when Model::ControlStreamCodec::BITPACK
1363
+ control_bitpack_decode_bytes(encoded)
1364
+ when Model::ControlStreamCodec::HUFFMAN
1365
+ control_huffman_decode_bytes(encoded)
1366
+ when Model::ControlStreamCodec::FSE
1367
+ control_fse_decode_bytes(encoded)
1368
+ else
1369
+ raise Errors.invalid_data("control stream codec")
1370
+ end
1371
+ end
1372
+
1373
+ def best_prefix_base(value)
1374
+ best_id = 0
1375
+ best_len = 0
1376
+ state.string_table.by_id.each_with_index do |candidate, id|
1377
+ n = common_prefix_len(value.b, candidate.b)
1378
+ if n > best_len
1379
+ best_len = n
1380
+ best_id = id
1381
+ end
1382
+ end
1383
+ return [0, 0, false] if best_len.zero?
1384
+
1385
+ [best_id, best_len, true]
1386
+ end
1387
+
1388
+ def write_string_vector(values, codec, out)
1389
+ case codec
1390
+ when Model::VectorCodec::DICTIONARY
1391
+ dict = {}
1392
+ uniq = []
1393
+ refs = Array.new(values.length, 0)
1394
+ values.each_with_index do |v, i|
1395
+ id = dict[v]
1396
+ if id
1397
+ refs[i] = id
1398
+ else
1399
+ id = uniq.length
1400
+ dict[v] = id
1401
+ uniq << v
1402
+ refs[i] = id
1403
+ end
1404
+ end
1405
+ Wire.encode_varuint(uniq.length, out)
1406
+ uniq.each { |v| Wire.encode_string(v, out) }
1407
+ Codec.encode_u64_vector(refs, Model::VectorCodec::DIRECT_BITPACK, out)
1408
+ when Model::VectorCodec::STRING_REF
1409
+ Wire.encode_varuint(values.length, out)
1410
+ values.each do |v|
1411
+ id, ok = state.string_table.get_id(v)
1412
+ if ok
1413
+ Wire.encode_varuint(id, out)
1414
+ else
1415
+ id = state.string_table.register(v)
1416
+ Wire.encode_varuint(id, out)
1417
+ end
1418
+ end
1419
+ when Model::VectorCodec::PREFIX_DELTA
1420
+ Wire.encode_varuint(values.length, out)
1421
+ prev = ""
1422
+ values.each do |v|
1423
+ prefix = common_prefix_len(prev.b, v.b)
1424
+ Wire.encode_varuint(prefix, out)
1425
+ Wire.encode_string(v.byteslice(prefix, v.bytesize - prefix), out)
1426
+ prev = v
1427
+ end
1428
+ else
1429
+ Wire.encode_varuint(values.length, out)
1430
+ values.each { |v| Wire.encode_string(v, out) }
1431
+ end
1432
+ end
1433
+
1434
+ def read_string_vector(reader, codec)
1435
+ case codec
1436
+ when Model::VectorCodec::DICTIONARY
1437
+ dict_n = reader.read_varuint
1438
+ dict = Array.new(dict_n, "")
1439
+ dict_n.times do |i|
1440
+ dict[i] = reader.read_string
1441
+ end
1442
+ refs = Codec.decode_u64_vector(reader, Model::VectorCodec::DIRECT_BITPACK)
1443
+ out = Array.new(refs.length, "")
1444
+ refs.each_with_index do |ref, i|
1445
+ raise Errors.invalid_data("dictionary reference") if ref >= dict.length
1446
+
1447
+ out[i] = dict[ref]
1448
+ end
1449
+ out
1450
+ when Model::VectorCodec::STRING_REF
1451
+ n = reader.read_varuint
1452
+ out = Array.new(n, "")
1453
+ n.times do |i|
1454
+ id = reader.read_varuint
1455
+ s, ok = state.string_table.get_value(id)
1456
+ raise reference_error("string_id", id) unless ok
1457
+
1458
+ out[i] = s
1459
+ end
1460
+ out
1461
+ when Model::VectorCodec::PREFIX_DELTA
1462
+ n = reader.read_varuint
1463
+ out = Array.new(n, "")
1464
+ prev = ""
1465
+ n.times do |i|
1466
+ prefix = reader.read_varuint
1467
+ suffix = reader.read_string
1468
+ raise Errors.invalid_data("prefix delta in string vector") if prefix > prev.length
1469
+
1470
+ out[i] = prev.byteslice(0, prefix) + suffix
1471
+ prev = out[i]
1472
+ end
1473
+ out
1474
+ else
1475
+ n = reader.read_varuint
1476
+ out = Array.new(n, "")
1477
+ n.times do |i|
1478
+ out[i] = reader.read_string
1479
+ end
1480
+ out
1481
+ end
1482
+ end
1483
+
1484
+ def apply_state_patch(base_ref, operations, literals)
1485
+ base = if base_ref.previous
1486
+ raise reference_error("previous", 0) unless state.previous_message
1487
+
1488
+ state.previous_message.clone_message
1489
+ else
1490
+ b, ok = state.get_base_snapshot(base_ref.base_id)
1491
+ raise reference_error("base_id", base_ref.base_id) unless ok
1492
+
1493
+ b
1494
+ end
1495
+ _ = literals
1496
+ fields = message_fields(base)
1497
+ operations.each do |op|
1498
+ idx = op.field_id
1499
+ case op.opcode
1500
+ when Model::PatchOpcode::KEEP
1501
+ # no-op
1502
+ when Model::PatchOpcode::REPLACE_SCALAR,
1503
+ Model::PatchOpcode::REPLACE_VECTOR,
1504
+ Model::PatchOpcode::INSERT_FIELD,
1505
+ Model::PatchOpcode::STRING_REF,
1506
+ Model::PatchOpcode::PREFIX_DELTA
1507
+ raise Errors.invalid_data("patch operation missing value") if op.value.nil?
1508
+
1509
+ if idx < fields.length
1510
+ fields[idx] = op.value.clone_value
1511
+ elsif idx == fields.length
1512
+ fields << op.value.clone_value
1513
+ else
1514
+ raise Errors.invalid_data("patch field index out of range")
1515
+ end
1516
+ when Model::PatchOpcode::DELETE_FIELD
1517
+ raise Errors.invalid_data("delete field index out of range") if idx.negative? || idx >= fields.length
1518
+
1519
+ fields.delete_at(idx)
1520
+ when Model::PatchOpcode::APPEND_VECTOR
1521
+ if op.value.nil? || idx.negative? || idx >= fields.length
1522
+ raise Errors.invalid_data("append vector patch invalid")
1523
+ end
1524
+ if fields[idx].kind != Model::ValueKind::ARRAY || op.value.kind != Model::ValueKind::ARRAY
1525
+ raise Errors.invalid_data("append vector requires arrays")
1526
+ end
1527
+
1528
+ fields[idx] = fields[idx].with(arr: fields[idx].arr + op.value.arr)
1529
+ when Model::PatchOpcode::TRUNCATE_VECTOR
1530
+ if op.value.nil? || idx.negative? || idx >= fields.length
1531
+ raise Errors.invalid_data("truncate vector patch invalid")
1532
+ end
1533
+ if fields[idx].kind != Model::ValueKind::ARRAY || op.value.kind != Model::ValueKind::U64
1534
+ raise Errors.invalid_data("truncate vector requires array and u64")
1535
+ end
1536
+
1537
+ n = op.value.u64
1538
+ raise Errors.invalid_data("truncate length") if n.negative? || n > fields[idx].arr.length
1539
+
1540
+ fields[idx] = fields[idx].with(arr: fields[idx].arr[0, n].dup)
1541
+ end
1542
+ end
1543
+ rebuild_message_like(base, fields)
1544
+ end
1545
+
1546
+ def observe_decode_shape_candidate(keys)
1547
+ _id, ok = state.shape_table.get_id(keys)
1548
+ return if ok
1549
+
1550
+ observed = state.shape_table.observe(keys)
1551
+ state.shape_table.register(keys) if should_register_shape(keys, observed)
1552
+ end
1553
+
1554
+ def should_register_shape(keys, observed_count)
1555
+ !keys.empty? && observed_count >= 2
1556
+ end
1557
+
1558
+ def observe_encode_shape_candidate(keys)
1559
+ sk = shape_key(keys)
1560
+ state.encode_shape_observations[sk] ||= 0
1561
+ state.encode_shape_observations[sk] += 1
1562
+ count = state.encode_shape_observations[sk]
1563
+ state.shape_table.register(keys) if should_register_shape(keys, count)
1564
+ count
1565
+ end
1566
+ private
1567
+
1568
+ def write_smallest_u64(value, out)
1569
+ if value <= 0xFF
1570
+ out << 1.chr
1571
+ out << value.chr
1572
+ elsif value <= 0xFFFF
1573
+ out << 2.chr
1574
+ out << (value & 0xFF).chr
1575
+ out << ((value >> 8) & 0xFF).chr
1576
+ elsif value <= 0xFFFFFFFF
1577
+ out << 4.chr
1578
+ out << (value & 0xFF).chr
1579
+ out << ((value >> 8) & 0xFF).chr
1580
+ out << ((value >> 16) & 0xFF).chr
1581
+ out << ((value >> 24) & 0xFF).chr
1582
+ else
1583
+ out << 8.chr
1584
+ Wire.append_u64_le(out, value)
1585
+ end
1586
+ end
1587
+
1588
+ def read_smallest_u64(reader)
1589
+ size = reader.read_u8
1590
+ case size
1591
+ when 1
1592
+ reader.read_u8
1593
+ when 2
1594
+ bytes = reader.read_exact(2)
1595
+ bytes.getbyte(0) | (bytes.getbyte(1) << 8)
1596
+ when 4
1597
+ bytes = reader.read_exact(4)
1598
+ bytes.getbyte(0) | (bytes.getbyte(1) << 8) | (bytes.getbyte(2) << 16) | (bytes.getbyte(3) << 24)
1599
+ when 8
1600
+ Wire.read_u64_le(reader)
1601
+ else
1602
+ raise Errors.invalid_data("smallest u64 size")
1603
+ end
1604
+ end
1605
+
1606
+ def dictionary_fallback_to_byte(fallback)
1607
+ case fallback
1608
+ when Session::DictionaryFallback::FAIL_FAST
1609
+ 0
1610
+ when Session::DictionaryFallback::STATELESS_RETRY
1611
+ 1
1612
+ else
1613
+ raise Errors.invalid_data("dictionary fallback")
1614
+ end
1615
+ end
1616
+ end
1617
+ class SessionEncoder
1618
+ attr_reader :codec
1619
+
1620
+ def initialize(options)
1621
+ @codec = Protocol.twilic_codec_with_options(options)
1622
+ end
1623
+
1624
+ def encode(value)
1625
+ msg = codec.message_for_value(value)
1626
+ if codec.state.options.enable_state_patch && codec.state.previous_message &&
1627
+ supports_state_patch(codec.state.previous_message, msg)
1628
+ base_ref = Model::BaseRef.previous
1629
+ ops, _literals = diff_message(codec.state.previous_message, msg)
1630
+ patch_msg = Model.message(
1631
+ kind: Model::MessageKind::STATE_PATCH,
1632
+ state_patch: Model::StatePatchMessage.new(base_ref: base_ref, operations: ops, literals: [])
1633
+ )
1634
+ patch_size = encoded_size(patch_msg)
1635
+ full_size = encoded_size(msg)
1636
+ if patch_size < full_size
1637
+ begin
1638
+ return codec.encode_message(patch_msg)
1639
+ rescue StandardError
1640
+ # fall back to full message path
1641
+ end
1642
+ end
1643
+ end
1644
+ codec.encode_message(msg)
1645
+ end
1646
+
1647
+ def encode_with_schema(schema, value)
1648
+ codec.state.schemas[schema.schema_id] = schema
1649
+ codec.state.last_schema_id = schema.schema_id
1650
+ schema.fields.each do |field|
1651
+ next if field.enum_values.empty?
1652
+
1653
+ codec.state.field_enums[field.name] = field.enum_values.dup
1654
+ end
1655
+ raise Errors.invalid_data("encode_with_schema expects map value") unless value.kind == Model::ValueKind::MAP
1656
+
1657
+ presence = Array.new(schema.fields.length, false)
1658
+ fields = []
1659
+ has_presence = false
1660
+ schema.fields.each_with_index do |field, i|
1661
+ v = lookup_map_field(value, field.name)
1662
+ if v
1663
+ presence[i] = true
1664
+ fields << v.clone_value
1665
+ else
1666
+ presence[i] = false
1667
+ has_presence = true
1668
+ end
1669
+ end
1670
+ msg = Model.message(
1671
+ kind: Model::MessageKind::SCHEMA_OBJECT,
1672
+ schema_object: Model::SchemaObjectMessage.new(
1673
+ schema_id: schema.schema_id, presence: presence, has_presence: has_presence, fields: fields
1674
+ )
1675
+ )
1676
+ codec.encode_message(msg)
1677
+ end
1678
+
1679
+ def encode_batch(values)
1680
+ if values.empty?
1681
+ msg = Model.message(
1682
+ kind: Model::MessageKind::ROW_BATCH,
1683
+ row_batch: Model::RowBatchMessage.new(rows: [])
1684
+ )
1685
+ return codec.encode_message(msg)
1686
+ end
1687
+
1688
+ msg = nil
1689
+ if values.length >= 16
1690
+ cols = columns_from_map_values(values)
1691
+ cols = rows_to_columns(rows_from_values(values)) if cols.nil?
1692
+ Dictionary.apply_dictionary_references(codec.state, cols) if codec.state.options.enable_trained_dictionary
1693
+ msg = Model.message(
1694
+ kind: Model::MessageKind::COLUMN_BATCH,
1695
+ column_batch: Model::ColumnBatchMessage.new(count: values.length, columns: cols)
1696
+ )
1697
+ else
1698
+ msg = Model.message(
1699
+ kind: Model::MessageKind::ROW_BATCH,
1700
+ row_batch: Model::RowBatchMessage.new(rows: rows_from_values(values))
1701
+ )
1702
+ end
1703
+
1704
+ bytes = codec.encode_message(msg)
1705
+ codec.state.previous_message = msg
1706
+ size = bytes.bytesize
1707
+ codec.state.previous_message_size = size
1708
+ record_full_message_as_base
1709
+ bytes
1710
+ end
1711
+
1712
+ def record_full_message_as_base
1713
+ return if codec.state.options.max_base_snapshots.zero?
1714
+ return if codec.state.previous_message.nil?
1715
+
1716
+ base_id = codec.state.allocate_base_id
1717
+ codec.state.register_base_snapshot(base_id, codec.state.previous_message)
1718
+ end
1719
+
1720
+ def encode_patch(value)
1721
+ msg = codec.message_for_value(value)
1722
+ if codec.state.previous_message.nil? || !supports_state_patch(codec.state.previous_message, msg)
1723
+ return codec.encode_message(msg)
1724
+ end
1725
+ ops, _literals = diff_message(codec.state.previous_message, msg)
1726
+ patch_msg = Model.message(
1727
+ kind: Model::MessageKind::STATE_PATCH,
1728
+ state_patch: Model::StatePatchMessage.new(
1729
+ base_ref: Model::BaseRef.previous, operations: ops, literals: []
1730
+ )
1731
+ )
1732
+ return codec.encode_message(msg) if encoded_size(patch_msg) >= encoded_size(msg)
1733
+
1734
+ codec.encode_message(patch_msg)
1735
+ end
1736
+
1737
+ def encode_micro_batch(values)
1738
+ return encode_batch(values) if values.empty?
1739
+ if !codec.state.options.enable_template_batch || !has_uniform_micro_batch_shape(values)
1740
+ return encode_batch(values)
1741
+ end
1742
+
1743
+ columns = columns_from_map_values(values)
1744
+ columns = rows_to_columns(rows_from_values(values)) if columns.nil?
1745
+ Dictionary.apply_dictionary_references(codec.state, columns) if codec.state.options.enable_trained_dictionary
1746
+ template_id, ok = find_template_id(codec.state.templates, columns)
1747
+ unless ok
1748
+ template_id = codec.state.allocate_template_id
1749
+ codec.state.templates[template_id] = template_descriptor_from_columns(template_id, columns)
1750
+ codec.state.template_columns[template_id] = columns
1751
+ mask = Array.new(columns.length, true)
1752
+ msg = Model.message(
1753
+ kind: Model::MessageKind::TEMPLATE_BATCH,
1754
+ template_batch: Model::TemplateBatchMessage.new(
1755
+ template_id: template_id, count: values.length, changed_column_mask: mask, columns: columns
1756
+ )
1757
+ )
1758
+ return codec.encode_message(msg)
1759
+ end
1760
+ mask, changed_cols = diff_template_columns(codec.state.template_columns[template_id], columns)
1761
+ codec.state.template_columns[template_id] = columns
1762
+ msg = Model.message(
1763
+ kind: Model::MessageKind::TEMPLATE_BATCH,
1764
+ template_batch: Model::TemplateBatchMessage.new(
1765
+ template_id: template_id, count: values.length, changed_column_mask: mask, columns: changed_cols
1766
+ )
1767
+ )
1768
+ codec.encode_message(msg)
1769
+ end
1770
+
1771
+ def reset
1772
+ codec.state.reset_state
1773
+ end
1774
+
1775
+ def decode_message(bytes)
1776
+ codec.decode_message(bytes)
1777
+ end
1778
+ end
1779
+
1780
+ module_function
1781
+
1782
+ def lookup_map_field(value, key)
1783
+ return nil unless value.kind == Model::ValueKind::MAP
1784
+
1785
+ value.map.each do |entry|
1786
+ if entry.key == key
1787
+ v = entry.value.clone_value
1788
+ return v
1789
+ end
1790
+ end
1791
+ nil
1792
+ end
1793
+
1794
+ def schema_present_field_indices(schema, presence, has_presence)
1795
+ unless has_presence
1796
+ out = Array.new(schema.fields.length, 0)
1797
+ out.each_index { |i| out[i] = i }
1798
+ return out
1799
+ end
1800
+ raise Errors.invalid_data("presence bitmap mismatch for schema") if presence.length != schema.fields.length
1801
+
1802
+ out = []
1803
+ schema.fields.each_with_index do |_field, i|
1804
+ out << i if presence[i]
1805
+ end
1806
+ out
1807
+ end
1808
+
1809
+ def normalized_logical_type(raw)
1810
+ raw.strip.downcase
1811
+ end
1812
+
1813
+ def rows_from_values(values)
1814
+ rows = Array.new(values.length) { [] }
1815
+ values.each_with_index do |value, i|
1816
+ if value.kind == Model::ValueKind::ARRAY
1817
+ row = Array.new(value.arr.length)
1818
+ value.arr.each_with_index { |item, j| row[j] = item.clone_value }
1819
+ rows[i] = row
1820
+ else
1821
+ rows[i] = [value.clone_value]
1822
+ end
1823
+ end
1824
+ rows
1825
+ end
1826
+
1827
+ def column_null_strategy(values, present_bits)
1828
+ null_count = 0
1829
+ values.each do |value|
1830
+ null_count += 1 if value.kind == Model::ValueKind::NULL
1831
+ end
1832
+ optional_count = values.length
1833
+ if null_count.zero?
1834
+ return [Model::NullStrategy::ALL_PRESENT_ELIDED, nil, false]
1835
+ end
1836
+ if null_count <= optional_count / 4
1837
+ inverted = Array.new(present_bits.length, false)
1838
+ present_bits.each_with_index do |bit, i|
1839
+ inverted[i] = !bit
1840
+ end
1841
+ return [Model::NullStrategy::INVERTED_PRESENCE_BITMAP, inverted, true]
1842
+ end
1843
+ [Model::NullStrategy::PRESENCE_BITMAP, present_bits.dup, true]
1844
+ end
1845
+
1846
+ def strip_nulls(values)
1847
+ out = []
1848
+ values.each do |value|
1849
+ out << value unless value.kind == Model::ValueKind::NULL
1850
+ end
1851
+ out
1852
+ end
1853
+
1854
+ def columns_from_map_values(values)
1855
+ return nil if values.empty?
1856
+
1857
+ values.each do |value|
1858
+ return nil unless value.kind == Model::ValueKind::MAP
1859
+ end
1860
+ key_order = []
1861
+ key_index = {}
1862
+ column_values = []
1863
+ column_presence = []
1864
+ values.each_with_index do |row_value, row_idx|
1865
+ present = Array.new(key_order.length, false)
1866
+ row_value.map.each do |entry|
1867
+ key = entry.key
1868
+ entry_value = entry.value.clone_value
1869
+ col_idx = key_index[key]
1870
+ unless col_idx
1871
+ col_idx = key_order.length
1872
+ key_order << key
1873
+ key_index[key] = col_idx
1874
+ column_values << Array.new(row_idx)
1875
+ column_presence << Array.new(row_idx)
1876
+ present << false
1877
+ end
1878
+ column_values[col_idx] << entry_value
1879
+ column_presence[col_idx] << true
1880
+ present[col_idx] = true
1881
+ end
1882
+ key_order.each_index do |col_idx|
1883
+ next if present[col_idx]
1884
+
1885
+ column_values[col_idx] << Model.null_value
1886
+ column_presence[col_idx] << false
1887
+ end
1888
+ end
1889
+ columns = Array.new(key_order.length)
1890
+ key_order.each_index do |field_id|
1891
+ col_values = column_values[field_id]
1892
+ present_bits = column_presence[field_id]
1893
+ null_strategy, presence, has_presence = column_null_strategy(col_values, present_bits)
1894
+ codec, tvd = infer_column_codec_and_values(strip_nulls(col_values))
1895
+ columns[field_id] = Model::Column.new(
1896
+ field_id: field_id,
1897
+ null_strategy: null_strategy,
1898
+ presence: presence,
1899
+ has_presence: has_presence,
1900
+ codec: codec,
1901
+ dictionary_id: nil,
1902
+ values: tvd
1903
+ )
1904
+ end
1905
+ columns
1906
+ end
1907
+
1908
+ def has_uniform_micro_batch_shape(values)
1909
+ return false if values.empty?
1910
+ return false if values[0].kind != Model::ValueKind::MAP
1911
+
1912
+ keys = values[0].map.map(&:key)
1913
+ (1...values.length).each do |i|
1914
+ return false if values[i].kind != Model::ValueKind::MAP || values[i].map.length != keys.length
1915
+
1916
+ keys.each_index do |j|
1917
+ return false if values[i].map[j].key != keys[j]
1918
+ end
1919
+ end
1920
+ true
1921
+ end
1922
+
1923
+ def should_register_shape(keys, observed_count)
1924
+ !keys.empty? && observed_count >= 2
1925
+ end
1926
+
1927
+ def supports_state_patch(base, current)
1928
+ !base.nil? && !current.nil? && base.kind == current.kind &&
1929
+ (base.kind == Model::MessageKind::MAP ||
1930
+ base.kind == Model::MessageKind::SCHEMA_OBJECT ||
1931
+ base.kind == Model::MessageKind::SHAPED_OBJECT ||
1932
+ base.kind == Model::MessageKind::ARRAY)
1933
+ end
1934
+
1935
+ def encoded_size(message)
1936
+ estimate_message_size(message)
1937
+ end
1938
+
1939
+ def typed_vector_to_value(vector)
1940
+ case vector.element_type
1941
+ when Model::ElementType::BOOL
1942
+ out = Array.new(vector.data.bools.length)
1943
+ out.each_index { |i| out[i] = Model.bool_value(vector.data.bools[i]) }
1944
+ Model.array_value(out)
1945
+ when Model::ElementType::I64
1946
+ out = Array.new(vector.data.i64s.length)
1947
+ out.each_index { |i| out[i] = Model.i64_value(vector.data.i64s[i]) }
1948
+ Model.array_value(out)
1949
+ when Model::ElementType::U64
1950
+ out = Array.new(vector.data.u64s.length)
1951
+ out.each_index { |i| out[i] = Model.u64_value(vector.data.u64s[i]) }
1952
+ Model.array_value(out)
1953
+ when Model::ElementType::F64
1954
+ out = Array.new(vector.data.f64s.length)
1955
+ out.each_index { |i| out[i] = Model.f64_value(vector.data.f64s[i]) }
1956
+ Model.array_value(out)
1957
+ when Model::ElementType::STRING
1958
+ out = Array.new(vector.data.strings.length)
1959
+ out.each_index { |i| out[i] = Model.string_value(vector.data.strings[i]) }
1960
+ Model.array_value(out)
1961
+ else
1962
+ Model.array_value([])
1963
+ end
1964
+ end
1965
+
1966
+ def entries_to_map(entries, state)
1967
+ out = Array.new(entries.length)
1968
+ entries.each_with_index do |entry, i|
1969
+ key = key_ref_string(entry.key, state)
1970
+ out[i] = Model::MapEntry.new(key, entry.value.clone_value)
1971
+ _id, ok = state.key_table.get_id(key)
1972
+ state.key_table.register(key) unless ok
1973
+ end
1974
+ out
1975
+ end
1976
+
1977
+ def key_ref_string(key, state)
1978
+ if key.is_id
1979
+ s, ok = state.key_table.get_value(key.id)
1980
+ return s if ok
1981
+
1982
+ return ""
1983
+ end
1984
+ key.literal
1985
+ end
1986
+
1987
+ def key_ref_field_identity(key, state)
1988
+ s = key_ref_string(key, state)
1989
+ return nil if s == ""
1990
+
1991
+ s
1992
+ end
1993
+
1994
+ def shape_values_to_map(keys, presence, has_presence, values)
1995
+ out = []
1996
+ idx = 0
1997
+ keys.each_with_index do |key, i|
1998
+ next if has_presence && i < presence.length && !presence[i]
1999
+ break if idx >= values.length
2000
+
2001
+ out << Model.entry(key, values[idx].clone_value)
2002
+ idx += 1
2003
+ end
2004
+ out
2005
+ end
2006
+ end
2007
+ end
2008
+ end
2009
+
2010
+ require "twilic/core/protocol_helpers"
2011
+
2012
+ module Twilic
2013
+ module Core
2014
+ module Protocol
2015
+ ProtocolHelpers.singleton_methods(false).each do |name|
2016
+ next if singleton_methods(false).include?(name)
2017
+
2018
+ define_singleton_method(name) do |*args, **kwargs, &block|
2019
+ ProtocolHelpers.send(name, *args, **kwargs, &block)
2020
+ end
2021
+ end
2022
+
2023
+ def self.delegate_helpers_to(klass)
2024
+ ProtocolHelpers.singleton_methods(false).each do |name|
2025
+ next if klass.method_defined?(name)
2026
+
2027
+ klass.define_method(name) do |*args, **kwargs, &block|
2028
+ ProtocolHelpers.send(name, *args, **kwargs, &block)
2029
+ end
2030
+ end
2031
+ singleton_methods(false).each do |name|
2032
+ next if klass.method_defined?(name)
2033
+
2034
+ klass.define_method(name) do |*args, **kwargs, &block|
2035
+ Protocol.send(name, *args, **kwargs, &block)
2036
+ end
2037
+ end
2038
+ end
2039
+
2040
+ delegate_helpers_to(TwilicCodec)
2041
+ delegate_helpers_to(SessionEncoder)
2042
+ end
2043
+ end
2044
+ end