twilic 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.editorconfig +18 -0
- data/.gitattributes +1 -0
- data/.gitignore +9 -0
- data/.markdownlint.jsonc +22 -0
- data/Gemfile +5 -0
- data/Gemfile.lock +53 -0
- data/LICENSE +21 -0
- data/README.md +119 -0
- data/Rakefile +12 -0
- data/docs/CHANGELOG.md +31 -0
- data/docs/CONTRIBUTING.md +51 -0
- data/docs/SPEC-TEST-TRACEABILITY.md +87 -0
- data/lib/twilic/core/api.rb +30 -0
- data/lib/twilic/core/codec.rb +766 -0
- data/lib/twilic/core/dictionary.rb +236 -0
- data/lib/twilic/core/errors.rb +87 -0
- data/lib/twilic/core/interop_fixtures.rb +340 -0
- data/lib/twilic/core/model.rb +506 -0
- data/lib/twilic/core/protocol.rb +2044 -0
- data/lib/twilic/core/protocol_helpers.rb +512 -0
- data/lib/twilic/core/session.rb +461 -0
- data/lib/twilic/core/v2.rb +387 -0
- data/lib/twilic/core/wire.rb +158 -0
- data/lib/twilic/version.rb +5 -0
- data/lib/twilic.rb +147 -0
- data/package.json +14 -0
- data/pnpm-lock.yaml +723 -0
- data/twilic.gemspec +32 -0
- metadata +118 -0
|
@@ -0,0 +1,2044 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "twilic/core/model"
|
|
4
|
+
require "twilic/core/wire"
|
|
5
|
+
require "twilic/core/codec"
|
|
6
|
+
require "twilic/core/session"
|
|
7
|
+
require "twilic/core/dictionary"
|
|
8
|
+
require "twilic/core/errors"
|
|
9
|
+
require "twilic/core/v2"
|
|
10
|
+
|
|
11
|
+
module Twilic
|
|
12
|
+
module Core
|
|
13
|
+
module Protocol
|
|
14
|
+
|
|
15
|
+
def self.new_twilic_codec
|
|
16
|
+
TwilicCodec.new
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def self.twilic_codec_with_options(options)
|
|
20
|
+
TwilicCodec.new(options)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
TAG_NULL = 0
|
|
24
|
+
TAG_BOOL_FALSE = 1
|
|
25
|
+
TAG_BOOL_TRUE = 2
|
|
26
|
+
TAG_I64 = 3
|
|
27
|
+
TAG_U64 = 4
|
|
28
|
+
TAG_F64 = 5
|
|
29
|
+
TAG_STRING = 6
|
|
30
|
+
TAG_BINARY = 7
|
|
31
|
+
TAG_ARRAY = 8
|
|
32
|
+
TAG_MAP = 9
|
|
33
|
+
|
|
34
|
+
class TwilicCodec
|
|
35
|
+
attr_accessor :state
|
|
36
|
+
|
|
37
|
+
def initialize(options = nil)
|
|
38
|
+
@state = options ? Session::MutableSessionState.new(options) : Session::MutableSessionState.new
|
|
39
|
+
@state.key_table = Session::MutableInternTable.new
|
|
40
|
+
@state.string_table = Session::MutableInternTable.new
|
|
41
|
+
@state.shape_table = Session::MutableShapeTable.new
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def self.new_twilic_codec
|
|
45
|
+
new
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def self.twilic_codec_with_options(options)
|
|
49
|
+
new(options)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def encode_message(message)
|
|
53
|
+
out = +""
|
|
54
|
+
write_message(message, out)
|
|
55
|
+
out
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def decode_message(bytes)
|
|
59
|
+
reader = Wire::Reader.new(bytes)
|
|
60
|
+
msg = read_message(reader)
|
|
61
|
+
raise Errors.invalid_data("trailing bytes in message") unless reader.eof?
|
|
62
|
+
|
|
63
|
+
case msg.kind
|
|
64
|
+
when Model::MessageKind::CONTROL
|
|
65
|
+
# control does not update previous message body
|
|
66
|
+
when Model::MessageKind::STATE_PATCH
|
|
67
|
+
begin
|
|
68
|
+
reconstructed = apply_state_patch(
|
|
69
|
+
msg.state_patch.base_ref,
|
|
70
|
+
msg.state_patch.operations,
|
|
71
|
+
msg.state_patch.literals
|
|
72
|
+
)
|
|
73
|
+
@state.previous_message = reconstructed
|
|
74
|
+
@state.previous_message_size = bytes.bytesize
|
|
75
|
+
rescue StandardError => e
|
|
76
|
+
raise e if Errors.unknown_reference?(e) || Errors.stateless_retry?(e)
|
|
77
|
+
end
|
|
78
|
+
when Model::MessageKind::TEMPLATE_BATCH
|
|
79
|
+
if @state.previous_message.nil?
|
|
80
|
+
@state.previous_message = msg.clone_message
|
|
81
|
+
@state.previous_message_size = bytes.bytesize
|
|
82
|
+
end
|
|
83
|
+
else
|
|
84
|
+
@state.previous_message = msg.clone_message
|
|
85
|
+
@state.previous_message_size = bytes.bytesize
|
|
86
|
+
end
|
|
87
|
+
msg
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def encode_value(value)
|
|
91
|
+
msg = message_for_value(value)
|
|
92
|
+
out = encode_message(msg)
|
|
93
|
+
@state.previous_message = msg.clone_message
|
|
94
|
+
@state.previous_message_size = out.bytesize
|
|
95
|
+
out
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def decode_value(bytes)
|
|
99
|
+
msg = decode_message(bytes)
|
|
100
|
+
@state.previous_message = msg.clone_message
|
|
101
|
+
case msg.kind
|
|
102
|
+
when Model::MessageKind::SCALAR
|
|
103
|
+
msg.scalar.clone_value
|
|
104
|
+
when Model::MessageKind::ARRAY
|
|
105
|
+
Model.array_value(msg.array)
|
|
106
|
+
when Model::MessageKind::MAP
|
|
107
|
+
entries = entries_to_map(msg.map, @state)
|
|
108
|
+
Model.map_value(entries)
|
|
109
|
+
when Model::MessageKind::SHAPED_OBJECT
|
|
110
|
+
keys, ok = @state.shape_table.get_keys(msg.shaped_object.shape_id)
|
|
111
|
+
raise reference_error("shape_id", msg.shaped_object.shape_id) unless ok
|
|
112
|
+
|
|
113
|
+
Model.map_value(
|
|
114
|
+
shape_values_to_map(
|
|
115
|
+
keys,
|
|
116
|
+
msg.shaped_object.presence,
|
|
117
|
+
msg.shaped_object.has_presence,
|
|
118
|
+
msg.shaped_object.values
|
|
119
|
+
)
|
|
120
|
+
)
|
|
121
|
+
when Model::MessageKind::TYPED_VECTOR
|
|
122
|
+
typed_vector_to_value(msg.typed_vector)
|
|
123
|
+
else
|
|
124
|
+
raise Errors.invalid_data("decode_value expects scalar/array/map/vector message")
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def reference_error(kind, id)
|
|
129
|
+
if @state.options.unknown_reference_policy == Session::UnknownReferencePolicy::STATELESS_RETRY
|
|
130
|
+
raise Errors.stateless_retry_required(kind, id)
|
|
131
|
+
end
|
|
132
|
+
raise Errors.unknown_reference(kind, id)
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
def shape_key(keys)
|
|
136
|
+
@state.shape_table.shape_key(keys)
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
def message_for_value(value)
|
|
140
|
+
case value.kind
|
|
141
|
+
when Model::ValueKind::ARRAY
|
|
142
|
+
vec, ok = try_make_typed_vector(value.arr)
|
|
143
|
+
return Model.message(kind: Model::MessageKind::TYPED_VECTOR, typed_vector: vec) if ok
|
|
144
|
+
|
|
145
|
+
arr = value.arr.map(&:clone_value)
|
|
146
|
+
Model.message(kind: Model::MessageKind::ARRAY, array: arr)
|
|
147
|
+
when Model::ValueKind::MAP
|
|
148
|
+
keys = value.map.map(&:key)
|
|
149
|
+
had_observation = @state.encode_shape_observations.key?(shape_key(keys))
|
|
150
|
+
obs = observe_encode_shape_candidate(keys)
|
|
151
|
+
shape_id, ok = @state.shape_table.get_id(keys)
|
|
152
|
+
return shaped_message(shape_id, value.map) if ok && (!had_observation || obs >= 2)
|
|
153
|
+
|
|
154
|
+
map_message(value.map)
|
|
155
|
+
else
|
|
156
|
+
sc = value.clone_value
|
|
157
|
+
Model.message(kind: Model::MessageKind::SCALAR, scalar: sc)
|
|
158
|
+
end
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
def map_message(entries)
|
|
162
|
+
out = entries.map do |entry|
|
|
163
|
+
key = entry.key
|
|
164
|
+
id, ok = @state.key_table.get_id(key)
|
|
165
|
+
key_ref = if ok
|
|
166
|
+
Model::KeyRef.id_ref(id)
|
|
167
|
+
else
|
|
168
|
+
@state.key_table.register(key)
|
|
169
|
+
Model::KeyRef.literal(key)
|
|
170
|
+
end
|
|
171
|
+
Model::MessageMapEntry.new(key: key_ref, value: entry.value.clone_value)
|
|
172
|
+
end
|
|
173
|
+
Model.message(kind: Model::MessageKind::MAP, map: out)
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
def shaped_message(shape_id, entries)
|
|
177
|
+
keys, = @state.shape_table.get_keys(shape_id)
|
|
178
|
+
index = {}
|
|
179
|
+
entries.each { |entry| index[entry.key] = entry.value }
|
|
180
|
+
|
|
181
|
+
values = []
|
|
182
|
+
presence = Array.new(keys.length, false)
|
|
183
|
+
all = true
|
|
184
|
+
keys.each_with_index do |key, i|
|
|
185
|
+
v = index[key]
|
|
186
|
+
if v
|
|
187
|
+
presence[i] = true
|
|
188
|
+
values << v.clone_value
|
|
189
|
+
else
|
|
190
|
+
presence[i] = false
|
|
191
|
+
all = false
|
|
192
|
+
end
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
msg = Model::ShapedObjectMessage.new(
|
|
196
|
+
shape_id: shape_id,
|
|
197
|
+
values: values,
|
|
198
|
+
has_presence: !all,
|
|
199
|
+
presence: all ? nil : presence
|
|
200
|
+
)
|
|
201
|
+
Model.message(kind: Model::MessageKind::SHAPED_OBJECT, shaped_object: msg)
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
def try_make_typed_vector(values)
|
|
205
|
+
return [nil, false] if values.length < 4
|
|
206
|
+
|
|
207
|
+
all_bool = true
|
|
208
|
+
all_i64 = true
|
|
209
|
+
all_u64 = true
|
|
210
|
+
all_f64 = true
|
|
211
|
+
all_str = true
|
|
212
|
+
values.each do |value|
|
|
213
|
+
case value.kind
|
|
214
|
+
when Model::ValueKind::BOOL
|
|
215
|
+
all_i64 = false
|
|
216
|
+
all_u64 = false
|
|
217
|
+
all_f64 = false
|
|
218
|
+
all_str = false
|
|
219
|
+
when Model::ValueKind::I64
|
|
220
|
+
all_bool = false
|
|
221
|
+
all_u64 = false
|
|
222
|
+
all_f64 = false
|
|
223
|
+
all_str = false
|
|
224
|
+
when Model::ValueKind::U64
|
|
225
|
+
all_bool = false
|
|
226
|
+
all_i64 = false
|
|
227
|
+
all_f64 = false
|
|
228
|
+
all_str = false
|
|
229
|
+
when Model::ValueKind::F64
|
|
230
|
+
all_bool = false
|
|
231
|
+
all_i64 = false
|
|
232
|
+
all_u64 = false
|
|
233
|
+
all_str = false
|
|
234
|
+
when Model::ValueKind::STRING
|
|
235
|
+
all_bool = false
|
|
236
|
+
all_i64 = false
|
|
237
|
+
all_u64 = false
|
|
238
|
+
all_f64 = false
|
|
239
|
+
else
|
|
240
|
+
return [nil, false]
|
|
241
|
+
end
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
if all_bool
|
|
245
|
+
bools = values.map(&:bool)
|
|
246
|
+
return [
|
|
247
|
+
Model::TypedVector.new(
|
|
248
|
+
element_type: Model::ElementType::BOOL,
|
|
249
|
+
codec: Model::VectorCodec::DIRECT_BITPACK,
|
|
250
|
+
data: Model::TypedVectorData.new(
|
|
251
|
+
kind: Model::ElementType::BOOL,
|
|
252
|
+
bools: bools,
|
|
253
|
+
i64s: [],
|
|
254
|
+
u64s: [],
|
|
255
|
+
f64s: [],
|
|
256
|
+
strings: [],
|
|
257
|
+
binary: [],
|
|
258
|
+
values: []
|
|
259
|
+
)
|
|
260
|
+
),
|
|
261
|
+
true
|
|
262
|
+
]
|
|
263
|
+
end
|
|
264
|
+
|
|
265
|
+
if all_i64
|
|
266
|
+
vals = values.map(&:i64)
|
|
267
|
+
return [
|
|
268
|
+
Model::TypedVector.new(
|
|
269
|
+
element_type: Model::ElementType::I64,
|
|
270
|
+
codec: select_integer_codec(vals),
|
|
271
|
+
data: Model::TypedVectorData.new(
|
|
272
|
+
kind: Model::ElementType::I64,
|
|
273
|
+
bools: [],
|
|
274
|
+
i64s: vals,
|
|
275
|
+
u64s: [],
|
|
276
|
+
f64s: [],
|
|
277
|
+
strings: [],
|
|
278
|
+
binary: [],
|
|
279
|
+
values: []
|
|
280
|
+
)
|
|
281
|
+
),
|
|
282
|
+
true
|
|
283
|
+
]
|
|
284
|
+
end
|
|
285
|
+
|
|
286
|
+
if all_u64
|
|
287
|
+
vals = values.map(&:u64)
|
|
288
|
+
return [
|
|
289
|
+
Model::TypedVector.new(
|
|
290
|
+
element_type: Model::ElementType::U64,
|
|
291
|
+
codec: select_u64_codec(vals),
|
|
292
|
+
data: Model::TypedVectorData.new(
|
|
293
|
+
kind: Model::ElementType::U64,
|
|
294
|
+
bools: [],
|
|
295
|
+
i64s: [],
|
|
296
|
+
u64s: vals,
|
|
297
|
+
f64s: [],
|
|
298
|
+
strings: [],
|
|
299
|
+
binary: [],
|
|
300
|
+
values: []
|
|
301
|
+
)
|
|
302
|
+
),
|
|
303
|
+
true
|
|
304
|
+
]
|
|
305
|
+
end
|
|
306
|
+
|
|
307
|
+
if all_f64
|
|
308
|
+
vals = values.map(&:f64)
|
|
309
|
+
return [
|
|
310
|
+
Model::TypedVector.new(
|
|
311
|
+
element_type: Model::ElementType::F64,
|
|
312
|
+
codec: select_float_codec(vals),
|
|
313
|
+
data: Model::TypedVectorData.new(
|
|
314
|
+
kind: Model::ElementType::F64,
|
|
315
|
+
bools: [],
|
|
316
|
+
i64s: [],
|
|
317
|
+
u64s: [],
|
|
318
|
+
f64s: vals,
|
|
319
|
+
strings: [],
|
|
320
|
+
binary: [],
|
|
321
|
+
values: []
|
|
322
|
+
)
|
|
323
|
+
),
|
|
324
|
+
true
|
|
325
|
+
]
|
|
326
|
+
end
|
|
327
|
+
|
|
328
|
+
if all_str
|
|
329
|
+
vals = values.map(&:str)
|
|
330
|
+
return [
|
|
331
|
+
Model::TypedVector.new(
|
|
332
|
+
element_type: Model::ElementType::STRING,
|
|
333
|
+
codec: select_string_codec(vals),
|
|
334
|
+
data: Model::TypedVectorData.new(
|
|
335
|
+
kind: Model::ElementType::STRING,
|
|
336
|
+
bools: [],
|
|
337
|
+
i64s: [],
|
|
338
|
+
u64s: [],
|
|
339
|
+
f64s: [],
|
|
340
|
+
strings: vals,
|
|
341
|
+
binary: [],
|
|
342
|
+
values: []
|
|
343
|
+
)
|
|
344
|
+
),
|
|
345
|
+
true
|
|
346
|
+
]
|
|
347
|
+
end
|
|
348
|
+
|
|
349
|
+
[nil, false]
|
|
350
|
+
end
|
|
351
|
+
|
|
352
|
+
def write_message(message, out)
|
|
353
|
+
case message.kind
|
|
354
|
+
when Model::MessageKind::SCALAR
|
|
355
|
+
out << message.kind.value.chr
|
|
356
|
+
write_value(message.scalar, out)
|
|
357
|
+
when Model::MessageKind::ARRAY
|
|
358
|
+
out << message.kind.value.chr
|
|
359
|
+
Wire.encode_varuint(message.array.length, out)
|
|
360
|
+
message.array.each { |value| write_value(value, out) }
|
|
361
|
+
when Model::MessageKind::MAP
|
|
362
|
+
out << message.kind.value.chr
|
|
363
|
+
Wire.encode_varuint(message.map.length, out)
|
|
364
|
+
message.map.each do |entry|
|
|
365
|
+
write_key_ref(entry.key, out)
|
|
366
|
+
field_id = key_ref_field_identity(entry.key, @state)
|
|
367
|
+
write_value_with_field(entry.value, field_id, out)
|
|
368
|
+
end
|
|
369
|
+
when Model::MessageKind::SHAPED_OBJECT
|
|
370
|
+
out << message.kind.value.chr
|
|
371
|
+
Wire.encode_varuint(message.shaped_object.shape_id, out)
|
|
372
|
+
write_presence(message.shaped_object.presence, message.shaped_object.has_presence, out)
|
|
373
|
+
Wire.encode_varuint(message.shaped_object.values.length, out)
|
|
374
|
+
keys, ok = @state.shape_table.get_keys(message.shaped_object.shape_id)
|
|
375
|
+
if ok
|
|
376
|
+
pres = message.shaped_object.presence
|
|
377
|
+
unless message.shaped_object.has_presence
|
|
378
|
+
pres = Array.new(keys.length, true)
|
|
379
|
+
end
|
|
380
|
+
value_idx = 0
|
|
381
|
+
keys.each_with_index do |key, i|
|
|
382
|
+
next if i < pres.length && !pres[i]
|
|
383
|
+
break if value_idx >= message.shaped_object.values.length
|
|
384
|
+
|
|
385
|
+
write_value_with_field(message.shaped_object.values[value_idx], key, out)
|
|
386
|
+
value_idx += 1
|
|
387
|
+
end
|
|
388
|
+
while value_idx < message.shaped_object.values.length
|
|
389
|
+
write_value(message.shaped_object.values[value_idx], out)
|
|
390
|
+
value_idx += 1
|
|
391
|
+
end
|
|
392
|
+
else
|
|
393
|
+
message.shaped_object.values.each { |value| write_value(value, out) }
|
|
394
|
+
end
|
|
395
|
+
when Model::MessageKind::SCHEMA_OBJECT
|
|
396
|
+
out << message.kind.value.chr
|
|
397
|
+
schema_id = nil
|
|
398
|
+
if message.schema_object.schema_id
|
|
399
|
+
out << 1.chr
|
|
400
|
+
Wire.encode_varuint(message.schema_object.schema_id, out)
|
|
401
|
+
schema_id = message.schema_object.schema_id
|
|
402
|
+
else
|
|
403
|
+
out << 0.chr
|
|
404
|
+
end
|
|
405
|
+
write_presence(message.schema_object.presence, message.schema_object.has_presence, out)
|
|
406
|
+
Wire.encode_varuint(message.schema_object.fields.length, out)
|
|
407
|
+
|
|
408
|
+
schema = nil
|
|
409
|
+
if schema_id
|
|
410
|
+
schema = @state.schemas[schema_id]
|
|
411
|
+
elsif @state.last_schema_id
|
|
412
|
+
schema = @state.schemas[@state.last_schema_id]
|
|
413
|
+
end
|
|
414
|
+
|
|
415
|
+
if schema
|
|
416
|
+
out << 1.chr
|
|
417
|
+
write_schema_fields(
|
|
418
|
+
schema,
|
|
419
|
+
message.schema_object.presence,
|
|
420
|
+
message.schema_object.has_presence,
|
|
421
|
+
message.schema_object.fields,
|
|
422
|
+
out
|
|
423
|
+
)
|
|
424
|
+
@state.last_schema_id = schema_id if schema_id
|
|
425
|
+
else
|
|
426
|
+
out << 0.chr
|
|
427
|
+
message.schema_object.fields.each { |field| write_value(field, out) }
|
|
428
|
+
end
|
|
429
|
+
when Model::MessageKind::TYPED_VECTOR
|
|
430
|
+
out << message.kind.value.chr
|
|
431
|
+
write_typed_vector(message.typed_vector, out)
|
|
432
|
+
when Model::MessageKind::ROW_BATCH
|
|
433
|
+
out << message.kind.value.chr
|
|
434
|
+
Wire.encode_varuint(message.row_batch.rows.length, out)
|
|
435
|
+
message.row_batch.rows.each do |row|
|
|
436
|
+
Wire.encode_varuint(row.length, out)
|
|
437
|
+
row.each { |value| write_value(value, out) }
|
|
438
|
+
end
|
|
439
|
+
when Model::MessageKind::COLUMN_BATCH
|
|
440
|
+
out << message.kind.value.chr
|
|
441
|
+
Wire.encode_varuint(message.column_batch.count, out)
|
|
442
|
+
Wire.encode_varuint(message.column_batch.columns.length, out)
|
|
443
|
+
message.column_batch.columns.each { |column| write_column(column, out) }
|
|
444
|
+
when Model::MessageKind::CONTROL
|
|
445
|
+
out << message.kind.value.chr
|
|
446
|
+
write_control(message.control, out)
|
|
447
|
+
when Model::MessageKind::EXT
|
|
448
|
+
out << message.kind.value.chr
|
|
449
|
+
Wire.encode_varuint(message.ext.ext_type, out)
|
|
450
|
+
Wire.encode_bytes(message.ext.payload, out)
|
|
451
|
+
when Model::MessageKind::STATE_PATCH
|
|
452
|
+
out << message.kind.value.chr
|
|
453
|
+
write_base_ref(message.state_patch.base_ref, out)
|
|
454
|
+
Wire.encode_varuint(message.state_patch.operations.length, out)
|
|
455
|
+
message.state_patch.operations.each do |op|
|
|
456
|
+
Wire.encode_varuint(op.field_id, out)
|
|
457
|
+
out << op.opcode.value.chr
|
|
458
|
+
if op.value
|
|
459
|
+
out << 1.chr
|
|
460
|
+
write_value(op.value, out)
|
|
461
|
+
else
|
|
462
|
+
out << 0.chr
|
|
463
|
+
end
|
|
464
|
+
end
|
|
465
|
+
Wire.encode_varuint(message.state_patch.literals.length, out)
|
|
466
|
+
message.state_patch.literals.each { |literal| write_value(literal, out) }
|
|
467
|
+
when Model::MessageKind::TEMPLATE_BATCH
|
|
468
|
+
out << message.kind.value.chr
|
|
469
|
+
Wire.encode_varuint(message.template_batch.template_id, out)
|
|
470
|
+
Wire.encode_varuint(message.template_batch.count, out)
|
|
471
|
+
Wire.encode_bitmap(message.template_batch.changed_column_mask, out)
|
|
472
|
+
Wire.encode_varuint(message.template_batch.columns.length, out)
|
|
473
|
+
message.template_batch.columns.each { |column| write_column(column, out) }
|
|
474
|
+
when Model::MessageKind::CONTROL_STREAM
|
|
475
|
+
out << message.kind.value.chr
|
|
476
|
+
out << message.control_stream.codec.value.chr
|
|
477
|
+
write_control_stream_payload(
|
|
478
|
+
message.control_stream.codec,
|
|
479
|
+
message.control_stream.payload,
|
|
480
|
+
out
|
|
481
|
+
)
|
|
482
|
+
when Model::MessageKind::BASE_SNAPSHOT
|
|
483
|
+
out << message.kind.value.chr
|
|
484
|
+
Wire.encode_varuint(message.base_snapshot.base_id, out)
|
|
485
|
+
Wire.encode_varuint(message.base_snapshot.schema_or_shape_ref, out)
|
|
486
|
+
write_message(message.base_snapshot.payload, out)
|
|
487
|
+
@state.register_base_snapshot(message.base_snapshot.base_id, message.base_snapshot.payload)
|
|
488
|
+
else
|
|
489
|
+
raise Errors.invalid_data("unsupported message kind")
|
|
490
|
+
end
|
|
491
|
+
end
|
|
492
|
+
|
|
493
|
+
def read_message(reader)
|
|
494
|
+
kind_byte = reader.read_u8
|
|
495
|
+
kind = Model::MessageKind.from_byte(kind_byte)
|
|
496
|
+
raise Errors.invalid_kind(kind_byte) if kind.nil?
|
|
497
|
+
|
|
498
|
+
case kind
|
|
499
|
+
when Model::MessageKind::SCALAR
|
|
500
|
+
v = read_value(reader)
|
|
501
|
+
Model.message(kind: Model::MessageKind::SCALAR, scalar: v)
|
|
502
|
+
when Model::MessageKind::ARRAY
|
|
503
|
+
n = reader.read_varuint
|
|
504
|
+
values = []
|
|
505
|
+
n.times { values << read_value(reader) }
|
|
506
|
+
Model.message(kind: Model::MessageKind::ARRAY, array: values)
|
|
507
|
+
when Model::MessageKind::MAP
|
|
508
|
+
n = reader.read_varuint
|
|
509
|
+
entries = []
|
|
510
|
+
n.times do
|
|
511
|
+
key_ref = read_key_ref(reader)
|
|
512
|
+
field_identity = key_ref_field_identity(key_ref, @state)
|
|
513
|
+
v = read_value_with_field(reader, field_identity)
|
|
514
|
+
entries << Model::MessageMapEntry.new(key: key_ref, value: v)
|
|
515
|
+
end
|
|
516
|
+
keys = entries.map { |entry| key_ref_string(entry.key, @state) }
|
|
517
|
+
observe_decode_shape_candidate(keys)
|
|
518
|
+
Model.message(kind: Model::MessageKind::MAP, map: entries)
|
|
519
|
+
when Model::MessageKind::SHAPED_OBJECT
|
|
520
|
+
shape_id = reader.read_varuint
|
|
521
|
+
presence, has_presence = read_presence(reader)
|
|
522
|
+
n = reader.read_varuint
|
|
523
|
+
values = []
|
|
524
|
+
keys, ok = @state.shape_table.get_keys(shape_id)
|
|
525
|
+
if ok
|
|
526
|
+
pres = presence
|
|
527
|
+
unless has_presence
|
|
528
|
+
pres = Array.new(keys.length, true)
|
|
529
|
+
end
|
|
530
|
+
read_count = 0
|
|
531
|
+
keys.each_with_index do |key, i|
|
|
532
|
+
next if i < pres.length && !pres[i]
|
|
533
|
+
break if read_count >= n
|
|
534
|
+
|
|
535
|
+
values << read_value_with_field(reader, key)
|
|
536
|
+
read_count += 1
|
|
537
|
+
end
|
|
538
|
+
while read_count < n
|
|
539
|
+
values << read_value(reader)
|
|
540
|
+
read_count += 1
|
|
541
|
+
end
|
|
542
|
+
else
|
|
543
|
+
n.times { values << read_value(reader) }
|
|
544
|
+
end
|
|
545
|
+
Model.message(
|
|
546
|
+
kind: Model::MessageKind::SHAPED_OBJECT,
|
|
547
|
+
shaped_object: Model::ShapedObjectMessage.new(
|
|
548
|
+
shape_id: shape_id, presence: presence, has_presence: has_presence, values: values
|
|
549
|
+
)
|
|
550
|
+
)
|
|
551
|
+
when Model::MessageKind::SCHEMA_OBJECT
|
|
552
|
+
has_schema = reader.read_u8
|
|
553
|
+
schema_id = nil
|
|
554
|
+
if has_schema == 1
|
|
555
|
+
schema_id = reader.read_varuint
|
|
556
|
+
end
|
|
557
|
+
presence, has_presence = read_presence(reader)
|
|
558
|
+
n = reader.read_varuint
|
|
559
|
+
mode = reader.read_u8
|
|
560
|
+
fields = []
|
|
561
|
+
if mode == 1
|
|
562
|
+
effective_id = if schema_id
|
|
563
|
+
schema_id
|
|
564
|
+
elsif @state.last_schema_id
|
|
565
|
+
@state.last_schema_id
|
|
566
|
+
else
|
|
567
|
+
raise Errors.invalid_data("schema object requires schema id in context")
|
|
568
|
+
end
|
|
569
|
+
schema = @state.schemas[effective_id]
|
|
570
|
+
raise reference_error("schema_id", effective_id) if schema.nil?
|
|
571
|
+
|
|
572
|
+
fields = read_schema_fields(schema, presence, has_presence, n, reader)
|
|
573
|
+
@state.last_schema_id = effective_id
|
|
574
|
+
else
|
|
575
|
+
n.times { fields << read_value(reader) }
|
|
576
|
+
@state.last_schema_id = schema_id if schema_id
|
|
577
|
+
end
|
|
578
|
+
Model.message(
|
|
579
|
+
kind: Model::MessageKind::SCHEMA_OBJECT,
|
|
580
|
+
schema_object: Model::SchemaObjectMessage.new(
|
|
581
|
+
schema_id: schema_id, presence: presence, has_presence: has_presence, fields: fields
|
|
582
|
+
)
|
|
583
|
+
)
|
|
584
|
+
when Model::MessageKind::TYPED_VECTOR
|
|
585
|
+
tv = read_typed_vector(reader, nil, nil)
|
|
586
|
+
Model.message(kind: Model::MessageKind::TYPED_VECTOR, typed_vector: tv)
|
|
587
|
+
when Model::MessageKind::ROW_BATCH
|
|
588
|
+
row_count = reader.read_varuint
|
|
589
|
+
rows = []
|
|
590
|
+
row_count.times do
|
|
591
|
+
field_count = reader.read_varuint
|
|
592
|
+
row = []
|
|
593
|
+
field_count.times { row << read_value(reader) }
|
|
594
|
+
rows << row
|
|
595
|
+
end
|
|
596
|
+
Model.message(
|
|
597
|
+
kind: Model::MessageKind::ROW_BATCH,
|
|
598
|
+
row_batch: Model::RowBatchMessage.new(rows: rows)
|
|
599
|
+
)
|
|
600
|
+
when Model::MessageKind::COLUMN_BATCH
|
|
601
|
+
count = reader.read_varuint
|
|
602
|
+
col_count = reader.read_varuint
|
|
603
|
+
cols = []
|
|
604
|
+
col_count.times { cols << read_column(reader) }
|
|
605
|
+
Model.message(
|
|
606
|
+
kind: Model::MessageKind::COLUMN_BATCH,
|
|
607
|
+
column_batch: Model::ColumnBatchMessage.new(count: count, columns: cols)
|
|
608
|
+
)
|
|
609
|
+
when Model::MessageKind::CONTROL
|
|
610
|
+
ctrl = read_control(reader)
|
|
611
|
+
Model.message(kind: Model::MessageKind::CONTROL, control: ctrl)
|
|
612
|
+
when Model::MessageKind::EXT
|
|
613
|
+
ext_type = reader.read_varuint
|
|
614
|
+
payload = reader.read_bytes
|
|
615
|
+
Model.message(
|
|
616
|
+
kind: Model::MessageKind::EXT,
|
|
617
|
+
ext: Model::ExtMessage.new(ext_type: ext_type, payload: payload)
|
|
618
|
+
)
|
|
619
|
+
when Model::MessageKind::STATE_PATCH
|
|
620
|
+
base_ref = read_base_ref(reader)
|
|
621
|
+
n = reader.read_varuint
|
|
622
|
+
ops = []
|
|
623
|
+
n.times do
|
|
624
|
+
field_id = reader.read_varuint
|
|
625
|
+
op_byte = reader.read_u8
|
|
626
|
+
opcode = Model::PatchOpcode.from_byte(op_byte)
|
|
627
|
+
raise Errors.invalid_data("patch opcode") if opcode.nil?
|
|
628
|
+
|
|
629
|
+
has_value = reader.read_u8
|
|
630
|
+
value = has_value == 1 ? read_value(reader) : nil
|
|
631
|
+
ops << Model::PatchOperation.new(field_id: field_id, opcode: opcode, value: value)
|
|
632
|
+
end
|
|
633
|
+
lit_n = reader.read_varuint
|
|
634
|
+
lits = []
|
|
635
|
+
lit_n.times { lits << read_value(reader) }
|
|
636
|
+
Model.message(
|
|
637
|
+
kind: Model::MessageKind::STATE_PATCH,
|
|
638
|
+
state_patch: Model::StatePatchMessage.new(base_ref: base_ref, operations: ops, literals: lits)
|
|
639
|
+
)
|
|
640
|
+
when Model::MessageKind::TEMPLATE_BATCH
|
|
641
|
+
template_id = reader.read_varuint
|
|
642
|
+
count = reader.read_varuint
|
|
643
|
+
mask = reader.read_bitmap
|
|
644
|
+
col_n = reader.read_varuint
|
|
645
|
+
changed_cols = []
|
|
646
|
+
col_n.times { changed_cols << read_column(reader) }
|
|
647
|
+
full_cols = changed_cols
|
|
648
|
+
prev = @state.template_columns[template_id]
|
|
649
|
+
if prev
|
|
650
|
+
full_cols = merge_template_columns(prev, mask, changed_cols)
|
|
651
|
+
else
|
|
652
|
+
mask.each do |bit|
|
|
653
|
+
raise reference_error("template_id", template_id) unless bit
|
|
654
|
+
end
|
|
655
|
+
end
|
|
656
|
+
@state.template_columns[template_id] = full_cols
|
|
657
|
+
@state.templates[template_id] = template_descriptor_from_columns(template_id, full_cols)
|
|
658
|
+
if count >= 16
|
|
659
|
+
@state.previous_message = Model.message(
|
|
660
|
+
kind: Model::MessageKind::COLUMN_BATCH,
|
|
661
|
+
column_batch: Model::ColumnBatchMessage.new(count: count, columns: full_cols)
|
|
662
|
+
)
|
|
663
|
+
end
|
|
664
|
+
Model.message(
|
|
665
|
+
kind: Model::MessageKind::TEMPLATE_BATCH,
|
|
666
|
+
template_batch: Model::TemplateBatchMessage.new(
|
|
667
|
+
template_id: template_id, count: count, changed_column_mask: mask, columns: changed_cols
|
|
668
|
+
)
|
|
669
|
+
)
|
|
670
|
+
when Model::MessageKind::CONTROL_STREAM
|
|
671
|
+
codec_byte = reader.read_u8
|
|
672
|
+
codec = Model::ControlStreamCodec.from_byte(codec_byte)
|
|
673
|
+
raise Errors.invalid_data("control stream codec") if codec.nil?
|
|
674
|
+
|
|
675
|
+
payload = read_control_stream_payload(codec, reader)
|
|
676
|
+
Model.message(
|
|
677
|
+
kind: Model::MessageKind::CONTROL_STREAM,
|
|
678
|
+
control_stream: Model::ControlStreamMessage.new(codec: codec, payload: payload)
|
|
679
|
+
)
|
|
680
|
+
when Model::MessageKind::BASE_SNAPSHOT
|
|
681
|
+
base_id = reader.read_varuint
|
|
682
|
+
schema_or_shape_ref = reader.read_varuint
|
|
683
|
+
payload = read_message(reader)
|
|
684
|
+
@state.register_base_snapshot(base_id, payload)
|
|
685
|
+
Model.message(
|
|
686
|
+
kind: Model::MessageKind::BASE_SNAPSHOT,
|
|
687
|
+
base_snapshot: Model::BaseSnapshotMessage.new(
|
|
688
|
+
base_id: base_id,
|
|
689
|
+
schema_or_shape_ref: schema_or_shape_ref,
|
|
690
|
+
payload: payload
|
|
691
|
+
)
|
|
692
|
+
)
|
|
693
|
+
else
|
|
694
|
+
raise Errors.invalid_data("unsupported message kind")
|
|
695
|
+
end
|
|
696
|
+
end
|
|
697
|
+
|
|
698
|
+
def write_value(value, out)
|
|
699
|
+
write_value_with_field(value, nil, out)
|
|
700
|
+
end
|
|
701
|
+
|
|
702
|
+
def write_value_with_field(value, field_identity, out)
|
|
703
|
+
case value.kind
|
|
704
|
+
when Model::ValueKind::NULL
|
|
705
|
+
out << TAG_NULL.chr
|
|
706
|
+
when Model::ValueKind::BOOL
|
|
707
|
+
out << (value.bool ? TAG_BOOL_TRUE : TAG_BOOL_FALSE).chr
|
|
708
|
+
when Model::ValueKind::I64
|
|
709
|
+
out << TAG_I64.chr
|
|
710
|
+
write_smallest_u64(Wire.encode_zigzag(value.i64), out)
|
|
711
|
+
when Model::ValueKind::U64
|
|
712
|
+
out << TAG_U64.chr
|
|
713
|
+
write_smallest_u64(value.u64, out)
|
|
714
|
+
when Model::ValueKind::F64
|
|
715
|
+
out << TAG_F64.chr
|
|
716
|
+
Wire.append_f64_le(out, value.f64)
|
|
717
|
+
when Model::ValueKind::STRING
|
|
718
|
+
out << TAG_STRING.chr
|
|
719
|
+
unless field_identity.nil?
|
|
720
|
+
enum_vals = @state.field_enums[field_identity]
|
|
721
|
+
unless enum_vals.nil?
|
|
722
|
+
enum_vals.each_with_index do |enum_value, i|
|
|
723
|
+
if enum_value == value.str
|
|
724
|
+
out << Model::StringMode::INLINE_ENUM.value.chr
|
|
725
|
+
Wire.encode_varuint(i, out)
|
|
726
|
+
return
|
|
727
|
+
end
|
|
728
|
+
end
|
|
729
|
+
end
|
|
730
|
+
end
|
|
731
|
+
if value.str.empty?
|
|
732
|
+
out << Model::StringMode::EMPTY.value.chr
|
|
733
|
+
return
|
|
734
|
+
end
|
|
735
|
+
id, ok = @state.string_table.get_id(value.str)
|
|
736
|
+
if ok
|
|
737
|
+
out << Model::StringMode::REF.value.chr
|
|
738
|
+
Wire.encode_varuint(id, out)
|
|
739
|
+
return
|
|
740
|
+
end
|
|
741
|
+
base_id, prefix_len, has_prefix = best_prefix_base(value.str)
|
|
742
|
+
if has_prefix && prefix_len >= 4 && prefix_len < value.str.bytesize
|
|
743
|
+
out << Model::StringMode::PREFIX_DELTA.value.chr
|
|
744
|
+
Wire.encode_varuint(base_id, out)
|
|
745
|
+
Wire.encode_varuint(prefix_len, out)
|
|
746
|
+
Wire.encode_string(value.str.byteslice(prefix_len, value.str.bytesize - prefix_len), out)
|
|
747
|
+
@state.string_table.register(value.str)
|
|
748
|
+
return
|
|
749
|
+
end
|
|
750
|
+
out << Model::StringMode::LITERAL.value.chr
|
|
751
|
+
Wire.encode_string(value.str, out)
|
|
752
|
+
@state.string_table.register(value.str)
|
|
753
|
+
when Model::ValueKind::BINARY
|
|
754
|
+
out << TAG_BINARY.chr
|
|
755
|
+
Wire.encode_bytes(value.bin, out)
|
|
756
|
+
when Model::ValueKind::ARRAY
|
|
757
|
+
out << TAG_ARRAY.chr
|
|
758
|
+
Wire.encode_varuint(value.arr.length, out)
|
|
759
|
+
value.arr.each { |entry| write_value(entry, out) }
|
|
760
|
+
when Model::ValueKind::MAP
|
|
761
|
+
out << TAG_MAP.chr
|
|
762
|
+
Wire.encode_varuint(value.map.length, out)
|
|
763
|
+
value.map.each do |entry|
|
|
764
|
+
write_key_ref(Model::KeyRef.literal(entry.key), out)
|
|
765
|
+
write_value_with_field(entry.value, entry.key, out)
|
|
766
|
+
end
|
|
767
|
+
end
|
|
768
|
+
end
|
|
769
|
+
|
|
770
|
+
def read_value(reader)
|
|
771
|
+
read_value_with_field(reader, nil)
|
|
772
|
+
end
|
|
773
|
+
|
|
774
|
+
def read_value_with_field(reader, field_identity)
|
|
775
|
+
tag = reader.read_u8
|
|
776
|
+
case tag
|
|
777
|
+
when TAG_NULL
|
|
778
|
+
Model.null_value
|
|
779
|
+
when TAG_BOOL_FALSE
|
|
780
|
+
Model.bool_value(false)
|
|
781
|
+
when TAG_BOOL_TRUE
|
|
782
|
+
Model.bool_value(true)
|
|
783
|
+
when TAG_I64
|
|
784
|
+
Model.i64_value(Wire.decode_zigzag(read_smallest_u64(reader)))
|
|
785
|
+
when TAG_U64
|
|
786
|
+
Model.u64_value(read_smallest_u64(reader))
|
|
787
|
+
when TAG_F64
|
|
788
|
+
Model.f64_value(Wire.read_f64_le(reader))
|
|
789
|
+
when TAG_STRING
|
|
790
|
+
mode_byte = reader.read_u8
|
|
791
|
+
mode = Model::StringMode.from_byte(mode_byte)
|
|
792
|
+
raise Errors.invalid_data("string mode") if mode.nil?
|
|
793
|
+
|
|
794
|
+
case mode
|
|
795
|
+
when Model::StringMode::EMPTY
|
|
796
|
+
Model.string_value("")
|
|
797
|
+
when Model::StringMode::LITERAL
|
|
798
|
+
s = reader.read_string
|
|
799
|
+
@state.string_table.register(s)
|
|
800
|
+
Model.string_value(s)
|
|
801
|
+
when Model::StringMode::REF
|
|
802
|
+
id = reader.read_varuint
|
|
803
|
+
s, ok = @state.string_table.get_value(id)
|
|
804
|
+
raise reference_error("string_id", id) unless ok
|
|
805
|
+
|
|
806
|
+
Model.string_value(s)
|
|
807
|
+
when Model::StringMode::PREFIX_DELTA
|
|
808
|
+
base_id = reader.read_varuint
|
|
809
|
+
prefix_len = reader.read_varuint
|
|
810
|
+
suffix = reader.read_string
|
|
811
|
+
base, ok = @state.string_table.get_value(base_id)
|
|
812
|
+
raise reference_error("string_id", base_id) unless ok
|
|
813
|
+
raise Errors.invalid_data("prefix delta length") if prefix_len > base.bytesize
|
|
814
|
+
|
|
815
|
+
s = base.byteslice(0, prefix_len) + suffix
|
|
816
|
+
@state.string_table.register(s)
|
|
817
|
+
Model.string_value(s)
|
|
818
|
+
when Model::StringMode::INLINE_ENUM
|
|
819
|
+
raise Errors.invalid_data("inline enum missing field identity") if field_identity.nil?
|
|
820
|
+
|
|
821
|
+
enum_vals = @state.field_enums[field_identity]
|
|
822
|
+
raise Errors.invalid_data("inline enum unknown field") if enum_vals.nil?
|
|
823
|
+
|
|
824
|
+
code = reader.read_varuint
|
|
825
|
+
raise Errors.invalid_data("inline enum code") if code >= enum_vals.length
|
|
826
|
+
|
|
827
|
+
Model.string_value(enum_vals[code])
|
|
828
|
+
end
|
|
829
|
+
when TAG_BINARY
|
|
830
|
+
Model.binary_value(reader.read_bytes)
|
|
831
|
+
when TAG_ARRAY
|
|
832
|
+
n = reader.read_varuint
|
|
833
|
+
out = []
|
|
834
|
+
n.times { out << read_value(reader) }
|
|
835
|
+
Model.array_value(out)
|
|
836
|
+
when TAG_MAP
|
|
837
|
+
n = reader.read_varuint
|
|
838
|
+
out = []
|
|
839
|
+
n.times do
|
|
840
|
+
key_ref = read_key_ref(reader)
|
|
841
|
+
key = key_ref.literal
|
|
842
|
+
value = read_value_with_field(reader, key)
|
|
843
|
+
out << Model.entry(key, value)
|
|
844
|
+
end
|
|
845
|
+
Model.map_value(out)
|
|
846
|
+
else
|
|
847
|
+
raise Errors.invalid_tag(tag)
|
|
848
|
+
end
|
|
849
|
+
end
|
|
850
|
+
|
|
851
|
+
def write_schema_fields(schema, presence, has_presence, fields, out)
|
|
852
|
+
indices = Protocol.schema_present_field_indices(schema, presence, has_presence)
|
|
853
|
+
indices.each_with_index do |schema_idx, i|
|
|
854
|
+
raise Errors.invalid_data("schema fields length mismatch") if i >= fields.length
|
|
855
|
+
|
|
856
|
+
write_schema_field_value(schema.fields[schema_idx], fields[i], out)
|
|
857
|
+
end
|
|
858
|
+
end
|
|
859
|
+
|
|
860
|
+
def read_schema_fields(schema, presence, has_presence, n, reader)
|
|
861
|
+
indices = Protocol.schema_present_field_indices(schema, presence, has_presence)
|
|
862
|
+
raise Errors.invalid_data("schema fields length") if indices.length != n
|
|
863
|
+
|
|
864
|
+
out = []
|
|
865
|
+
indices.each do |schema_idx|
|
|
866
|
+
out << read_schema_field_value(schema.fields[schema_idx], reader)
|
|
867
|
+
end
|
|
868
|
+
out
|
|
869
|
+
end
|
|
870
|
+
|
|
871
|
+
def write_schema_field_value(field, value, out)
|
|
872
|
+
case Protocol.normalized_logical_type(field.logical_type)
|
|
873
|
+
when "bool"
|
|
874
|
+
raise Errors.invalid_data("schema bool field type mismatch") unless value.kind == Model::ValueKind::BOOL
|
|
875
|
+
|
|
876
|
+
write_value(value, out)
|
|
877
|
+
when "i64", "int64", "int"
|
|
878
|
+
raise Errors.invalid_data("schema i64 field type mismatch") unless value.kind == Model::ValueKind::I64
|
|
879
|
+
|
|
880
|
+
write_value(value, out)
|
|
881
|
+
when "u64", "uint64", "uint"
|
|
882
|
+
raise Errors.invalid_data("schema u64 field type mismatch") unless value.kind == Model::ValueKind::U64
|
|
883
|
+
|
|
884
|
+
write_value(value, out)
|
|
885
|
+
when "f64", "float64", "float"
|
|
886
|
+
raise Errors.invalid_data("schema f64 field type mismatch") unless value.kind == Model::ValueKind::F64
|
|
887
|
+
|
|
888
|
+
write_value(value, out)
|
|
889
|
+
when "string"
|
|
890
|
+
raise Errors.invalid_data("schema string field type mismatch") unless value.kind == Model::ValueKind::STRING
|
|
891
|
+
|
|
892
|
+
write_value_with_field(value, field.name, out)
|
|
893
|
+
else
|
|
894
|
+
write_value(value, out)
|
|
895
|
+
end
|
|
896
|
+
end
|
|
897
|
+
|
|
898
|
+
def read_schema_field_value(field, reader)
|
|
899
|
+
if Protocol.normalized_logical_type(field.logical_type) == "string"
|
|
900
|
+
return read_value_with_field(reader, field.name)
|
|
901
|
+
end
|
|
902
|
+
read_value(reader)
|
|
903
|
+
end
|
|
904
|
+
|
|
905
|
+
def write_key_ref(key_ref, out)
|
|
906
|
+
if key_ref.is_id
|
|
907
|
+
out << 1.chr
|
|
908
|
+
Wire.encode_varuint(key_ref.id, out)
|
|
909
|
+
return
|
|
910
|
+
end
|
|
911
|
+
out << 0.chr
|
|
912
|
+
Wire.encode_string(key_ref.literal, out)
|
|
913
|
+
@state.key_table.register(key_ref.literal)
|
|
914
|
+
end
|
|
915
|
+
|
|
916
|
+
def read_key_ref(reader)
|
|
917
|
+
mode = reader.read_u8
|
|
918
|
+
if mode == 1
|
|
919
|
+
id = reader.read_varuint
|
|
920
|
+
key, ok = @state.key_table.get_value(id)
|
|
921
|
+
raise reference_error("key_id", id) unless ok
|
|
922
|
+
|
|
923
|
+
return Model::KeyRef.literal(key)
|
|
924
|
+
end
|
|
925
|
+
raise Errors.invalid_data("key ref mode") unless mode.zero?
|
|
926
|
+
|
|
927
|
+
s = reader.read_string
|
|
928
|
+
@state.key_table.register(s)
|
|
929
|
+
Model::KeyRef.literal(s)
|
|
930
|
+
end
|
|
931
|
+
|
|
932
|
+
def write_presence(presence, has_presence, out)
|
|
933
|
+
unless has_presence
|
|
934
|
+
out << 0.chr
|
|
935
|
+
return
|
|
936
|
+
end
|
|
937
|
+
out << 1.chr
|
|
938
|
+
Wire.encode_bitmap(presence, out)
|
|
939
|
+
end
|
|
940
|
+
|
|
941
|
+
def read_presence(reader)
|
|
942
|
+
flag = reader.read_u8
|
|
943
|
+
return [nil, false] if flag.zero?
|
|
944
|
+
raise Errors.invalid_data("presence flag") unless flag == 1
|
|
945
|
+
|
|
946
|
+
[reader.read_bitmap, true]
|
|
947
|
+
end
|
|
948
|
+
|
|
949
|
+
def typed_vector_len(data)
|
|
950
|
+
case data.kind
|
|
951
|
+
when Model::ElementType::BOOL
|
|
952
|
+
data.bools.length
|
|
953
|
+
when Model::ElementType::I64
|
|
954
|
+
data.i64s.length
|
|
955
|
+
when Model::ElementType::U64
|
|
956
|
+
data.u64s.length
|
|
957
|
+
when Model::ElementType::F64
|
|
958
|
+
data.f64s.length
|
|
959
|
+
when Model::ElementType::STRING
|
|
960
|
+
data.strings.length
|
|
961
|
+
when Model::ElementType::BINARY
|
|
962
|
+
data.binary.length
|
|
963
|
+
when Model::ElementType::VALUE
|
|
964
|
+
data.values.length
|
|
965
|
+
else
|
|
966
|
+
0
|
|
967
|
+
end
|
|
968
|
+
end
|
|
969
|
+
|
|
970
|
+
def write_typed_vector(vector, out)
|
|
971
|
+
out << vector.element_type.value.chr
|
|
972
|
+
Wire.encode_varuint(typed_vector_len(vector.data), out)
|
|
973
|
+
out << vector.codec.value.chr
|
|
974
|
+
case vector.element_type
|
|
975
|
+
when Model::ElementType::BOOL
|
|
976
|
+
Wire.encode_bitmap(vector.data.bools, out)
|
|
977
|
+
when Model::ElementType::I64
|
|
978
|
+
Codec.encode_i64_vector(vector.data.i64s, vector.codec, out)
|
|
979
|
+
when Model::ElementType::U64
|
|
980
|
+
Codec.encode_u64_vector(vector.data.u64s, vector.codec, out)
|
|
981
|
+
when Model::ElementType::F64
|
|
982
|
+
Codec.encode_f64_vector(vector.data.f64s, vector.codec, out)
|
|
983
|
+
when Model::ElementType::STRING
|
|
984
|
+
write_string_vector(vector.data.strings, vector.codec, out)
|
|
985
|
+
when Model::ElementType::BINARY
|
|
986
|
+
Wire.encode_varuint(vector.data.binary.length, out)
|
|
987
|
+
vector.data.binary.each { |bytes| Wire.encode_bytes(bytes, out) }
|
|
988
|
+
when Model::ElementType::VALUE
|
|
989
|
+
Wire.encode_varuint(vector.data.values.length, out)
|
|
990
|
+
vector.data.values.each { |entry| write_value(entry, out) }
|
|
991
|
+
else
|
|
992
|
+
raise Errors.invalid_data("unsupported element type")
|
|
993
|
+
end
|
|
994
|
+
end
|
|
995
|
+
|
|
996
|
+
def read_typed_vector(reader, forced_element, expected_codec)
|
|
997
|
+
elem_type = if forced_element.nil?
|
|
998
|
+
elem_byte = reader.read_u8
|
|
999
|
+
parsed = Model::ElementType.from_byte(elem_byte)
|
|
1000
|
+
raise Errors.invalid_data("vector element type") if parsed.nil?
|
|
1001
|
+
|
|
1002
|
+
parsed
|
|
1003
|
+
else
|
|
1004
|
+
forced_element
|
|
1005
|
+
end
|
|
1006
|
+
expected_len = reader.read_varuint
|
|
1007
|
+
codec_byte = reader.read_u8
|
|
1008
|
+
codec = Model::VectorCodec.from_byte(codec_byte)
|
|
1009
|
+
raise Errors.invalid_data("vector codec") if codec.nil?
|
|
1010
|
+
raise Errors.invalid_data("column codec mismatch") if !expected_codec.nil? && codec != expected_codec
|
|
1011
|
+
|
|
1012
|
+
data = Model::TypedVectorData.new(
|
|
1013
|
+
kind: elem_type, bools: [], i64s: [], u64s: [], f64s: [], strings: [], binary: [], values: []
|
|
1014
|
+
)
|
|
1015
|
+
case elem_type
|
|
1016
|
+
when Model::ElementType::BOOL
|
|
1017
|
+
data = data.with(bools: reader.read_bitmap)
|
|
1018
|
+
when Model::ElementType::I64
|
|
1019
|
+
data = data.with(i64s: Codec.decode_i64_vector(reader, codec))
|
|
1020
|
+
when Model::ElementType::U64
|
|
1021
|
+
data = data.with(u64s: Codec.decode_u64_vector(reader, codec))
|
|
1022
|
+
when Model::ElementType::F64
|
|
1023
|
+
data = data.with(f64s: Codec.decode_f64_vector(reader, codec))
|
|
1024
|
+
when Model::ElementType::STRING
|
|
1025
|
+
data = data.with(strings: read_string_vector(reader, codec))
|
|
1026
|
+
when Model::ElementType::BINARY
|
|
1027
|
+
n = reader.read_varuint
|
|
1028
|
+
values = []
|
|
1029
|
+
n.times { values << reader.read_bytes }
|
|
1030
|
+
data = data.with(binary: values)
|
|
1031
|
+
when Model::ElementType::VALUE
|
|
1032
|
+
n = reader.read_varuint
|
|
1033
|
+
values = []
|
|
1034
|
+
n.times { values << read_value(reader) }
|
|
1035
|
+
data = data.with(values: values)
|
|
1036
|
+
end
|
|
1037
|
+
raise Errors.invalid_data("typed vector length mismatch") if typed_vector_len(data) != expected_len
|
|
1038
|
+
|
|
1039
|
+
Model::TypedVector.new(element_type: elem_type, codec: codec, data: data)
|
|
1040
|
+
end
|
|
1041
|
+
|
|
1042
|
+
def write_column(column, out)
|
|
1043
|
+
Wire.encode_varuint(column.field_id, out)
|
|
1044
|
+
out << column.null_strategy.value.chr
|
|
1045
|
+
case column.null_strategy
|
|
1046
|
+
when Model::NullStrategy::PRESENCE_BITMAP, Model::NullStrategy::INVERTED_PRESENCE_BITMAP
|
|
1047
|
+
if !column.has_presence || column.presence.nil?
|
|
1048
|
+
raise Errors.invalid_data("missing column presence bitmap")
|
|
1049
|
+
end
|
|
1050
|
+
Wire.encode_bitmap(column.presence, out)
|
|
1051
|
+
end
|
|
1052
|
+
out << column.codec.value.chr
|
|
1053
|
+
if column.dictionary_id
|
|
1054
|
+
out << 1.chr
|
|
1055
|
+
Wire.encode_varuint(column.dictionary_id, out)
|
|
1056
|
+
payload = @state.dictionaries[column.dictionary_id]
|
|
1057
|
+
if payload
|
|
1058
|
+
profile = @state.dictionary_profiles[column.dictionary_id]
|
|
1059
|
+
if profile
|
|
1060
|
+
out << 1.chr
|
|
1061
|
+
Wire.encode_varuint(profile.version, out)
|
|
1062
|
+
Wire.encode_varuint(profile.hash, out)
|
|
1063
|
+
Wire.encode_varuint(profile.expires_at, out)
|
|
1064
|
+
out << dictionary_fallback_to_byte(profile.fallback).chr
|
|
1065
|
+
Wire.encode_bytes(payload, out)
|
|
1066
|
+
else
|
|
1067
|
+
out << 0.chr
|
|
1068
|
+
end
|
|
1069
|
+
else
|
|
1070
|
+
out << 0.chr
|
|
1071
|
+
end
|
|
1072
|
+
else
|
|
1073
|
+
out << 0.chr
|
|
1074
|
+
end
|
|
1075
|
+
|
|
1076
|
+
trained_block = nil
|
|
1077
|
+
if !column.dictionary_id.nil? && column.values.kind == Model::ElementType::STRING
|
|
1078
|
+
if column.codec == Model::VectorCodec::DICTIONARY || column.codec == Model::VectorCodec::STRING_REF
|
|
1079
|
+
payload = @state.dictionaries[column.dictionary_id]
|
|
1080
|
+
if payload
|
|
1081
|
+
begin
|
|
1082
|
+
dictionary = Dictionary.decode_trained_dictionary_payload(payload)
|
|
1083
|
+
block, ok = Dictionary.encode_trained_dictionary_block(column.values.strings, dictionary)
|
|
1084
|
+
trained_block = block if ok
|
|
1085
|
+
rescue StandardError
|
|
1086
|
+
# fall through to regular typed-vector encoding
|
|
1087
|
+
end
|
|
1088
|
+
end
|
|
1089
|
+
end
|
|
1090
|
+
end
|
|
1091
|
+
unless trained_block.nil?
|
|
1092
|
+
out << 1.chr
|
|
1093
|
+
Wire.encode_bytes(trained_block, out)
|
|
1094
|
+
return
|
|
1095
|
+
end
|
|
1096
|
+
|
|
1097
|
+
out << 0.chr
|
|
1098
|
+
tv = Model::TypedVector.new(
|
|
1099
|
+
element_type: column.values.kind,
|
|
1100
|
+
codec: column.codec,
|
|
1101
|
+
data: Model.clone_typed_vector_data(column.values)
|
|
1102
|
+
)
|
|
1103
|
+
write_typed_vector(tv, out)
|
|
1104
|
+
end
|
|
1105
|
+
|
|
1106
|
+
def read_column(reader)
|
|
1107
|
+
field_id = reader.read_varuint
|
|
1108
|
+
null_byte = reader.read_u8
|
|
1109
|
+
null_strategy = Model::NullStrategy.from_byte(null_byte)
|
|
1110
|
+
raise Errors.invalid_data("null strategy") if null_strategy.nil?
|
|
1111
|
+
|
|
1112
|
+
presence = nil
|
|
1113
|
+
has_presence = false
|
|
1114
|
+
case null_strategy
|
|
1115
|
+
when Model::NullStrategy::PRESENCE_BITMAP, Model::NullStrategy::INVERTED_PRESENCE_BITMAP
|
|
1116
|
+
presence = reader.read_bitmap
|
|
1117
|
+
has_presence = true
|
|
1118
|
+
end
|
|
1119
|
+
|
|
1120
|
+
codec_byte = reader.read_u8
|
|
1121
|
+
codec = Model::VectorCodec.from_byte(codec_byte)
|
|
1122
|
+
raise Errors.invalid_data("column codec") if codec.nil?
|
|
1123
|
+
|
|
1124
|
+
has_dict = reader.read_u8
|
|
1125
|
+
dictionary_id = nil
|
|
1126
|
+
case has_dict
|
|
1127
|
+
when 0
|
|
1128
|
+
when 1
|
|
1129
|
+
id = reader.read_varuint
|
|
1130
|
+
has_profile = reader.read_u8
|
|
1131
|
+
case has_profile
|
|
1132
|
+
when 0
|
|
1133
|
+
raise reference_error("dict_id", id) unless @state.dictionaries.key?(id)
|
|
1134
|
+
when 1
|
|
1135
|
+
version = reader.read_varuint
|
|
1136
|
+
hash = reader.read_varuint
|
|
1137
|
+
expires_at = reader.read_varuint
|
|
1138
|
+
fallback_byte = reader.read_u8
|
|
1139
|
+
fallback = Session::DictionaryFallback.from_byte(fallback_byte)
|
|
1140
|
+
raise Errors.invalid_data("dictionary fallback") if fallback.nil?
|
|
1141
|
+
|
|
1142
|
+
payload = reader.read_bytes
|
|
1143
|
+
if Dictionary.dictionary_payload_hash(payload) != hash
|
|
1144
|
+
raise Errors.invalid_data("dictionary profile hash mismatch")
|
|
1145
|
+
end
|
|
1146
|
+
@state.dictionaries[id] = payload
|
|
1147
|
+
@state.dictionary_profiles[id] = Session::DictionaryProfile.new(
|
|
1148
|
+
version: version,
|
|
1149
|
+
hash: hash,
|
|
1150
|
+
expires_at: expires_at,
|
|
1151
|
+
fallback: fallback
|
|
1152
|
+
)
|
|
1153
|
+
else
|
|
1154
|
+
raise Errors.invalid_data("dictionary profile flag")
|
|
1155
|
+
end
|
|
1156
|
+
dictionary_id = id
|
|
1157
|
+
else
|
|
1158
|
+
raise Errors.invalid_data("dictionary flag")
|
|
1159
|
+
end
|
|
1160
|
+
|
|
1161
|
+
payload_mode = reader.read_u8
|
|
1162
|
+
values = nil
|
|
1163
|
+
case payload_mode
|
|
1164
|
+
when 0
|
|
1165
|
+
values = read_typed_vector(reader, nil, codec).data
|
|
1166
|
+
when 1
|
|
1167
|
+
raise Errors.invalid_data("trained dictionary block requires dict_id") if dictionary_id.nil?
|
|
1168
|
+
unless codec == Model::VectorCodec::DICTIONARY || codec == Model::VectorCodec::STRING_REF
|
|
1169
|
+
raise Errors.invalid_data("trained dictionary block requires string dictionary codec")
|
|
1170
|
+
end
|
|
1171
|
+
|
|
1172
|
+
dictionary_payload = @state.dictionaries[dictionary_id]
|
|
1173
|
+
raise reference_error("dict_id", dictionary_id) if dictionary_payload.nil?
|
|
1174
|
+
|
|
1175
|
+
dictionary = Dictionary.decode_trained_dictionary_payload(dictionary_payload)
|
|
1176
|
+
block = reader.read_bytes
|
|
1177
|
+
strings = Dictionary.decode_trained_dictionary_block(block, dictionary)
|
|
1178
|
+
values = Model::TypedVectorData.new(
|
|
1179
|
+
kind: Model::ElementType::STRING,
|
|
1180
|
+
bools: [],
|
|
1181
|
+
i64s: [],
|
|
1182
|
+
u64s: [],
|
|
1183
|
+
f64s: [],
|
|
1184
|
+
strings: strings,
|
|
1185
|
+
binary: [],
|
|
1186
|
+
values: []
|
|
1187
|
+
)
|
|
1188
|
+
else
|
|
1189
|
+
raise Errors.invalid_data("column payload mode")
|
|
1190
|
+
end
|
|
1191
|
+
|
|
1192
|
+
Model::Column.new(
|
|
1193
|
+
field_id: field_id,
|
|
1194
|
+
null_strategy: null_strategy,
|
|
1195
|
+
presence: presence,
|
|
1196
|
+
has_presence: has_presence,
|
|
1197
|
+
codec: codec,
|
|
1198
|
+
dictionary_id: dictionary_id,
|
|
1199
|
+
values: values
|
|
1200
|
+
)
|
|
1201
|
+
end
|
|
1202
|
+
|
|
1203
|
+
def write_control(control, out)
|
|
1204
|
+
out << control.opcode.value.chr
|
|
1205
|
+
case control.opcode
|
|
1206
|
+
when Model::ControlOpcode::REGISTER_KEYS
|
|
1207
|
+
Wire.encode_varuint(control.register_keys.length, out)
|
|
1208
|
+
control.register_keys.each do |key|
|
|
1209
|
+
Wire.encode_string(key, out)
|
|
1210
|
+
@state.key_table.register(key)
|
|
1211
|
+
end
|
|
1212
|
+
when Model::ControlOpcode::REGISTER_SHAPE
|
|
1213
|
+
raise Errors.invalid_data("register shape payload missing") if control.register_shape.nil?
|
|
1214
|
+
|
|
1215
|
+
Wire.encode_varuint(control.register_shape.shape_id, out)
|
|
1216
|
+
Wire.encode_varuint(control.register_shape.keys.length, out)
|
|
1217
|
+
keys = []
|
|
1218
|
+
control.register_shape.keys.each do |key_ref|
|
|
1219
|
+
write_key_ref(key_ref, out)
|
|
1220
|
+
keys << key_ref.literal
|
|
1221
|
+
end
|
|
1222
|
+
@state.shape_table.register_with_id(control.register_shape.shape_id, keys)
|
|
1223
|
+
when Model::ControlOpcode::REGISTER_STRINGS
|
|
1224
|
+
Wire.encode_varuint(control.register_strings.length, out)
|
|
1225
|
+
control.register_strings.each do |str|
|
|
1226
|
+
Wire.encode_string(str, out)
|
|
1227
|
+
@state.string_table.register(str)
|
|
1228
|
+
end
|
|
1229
|
+
when Model::ControlOpcode::PROMOTE_STRING_FIELD_TO_ENUM
|
|
1230
|
+
raise Errors.invalid_data("promote enum payload missing") if control.promote_string_field_to_enum.nil?
|
|
1231
|
+
|
|
1232
|
+
Wire.encode_string(control.promote_string_field_to_enum.field_identity, out)
|
|
1233
|
+
Wire.encode_varuint(control.promote_string_field_to_enum.values.length, out)
|
|
1234
|
+
control.promote_string_field_to_enum.values.each { |value| Wire.encode_string(value, out) }
|
|
1235
|
+
@state.field_enums[control.promote_string_field_to_enum.field_identity] =
|
|
1236
|
+
control.promote_string_field_to_enum.values.dup
|
|
1237
|
+
when Model::ControlOpcode::RESET_TABLES
|
|
1238
|
+
@state.reset_tables
|
|
1239
|
+
when Model::ControlOpcode::RESET_STATE
|
|
1240
|
+
@state.reset_state
|
|
1241
|
+
else
|
|
1242
|
+
raise Errors.invalid_data("control opcode")
|
|
1243
|
+
end
|
|
1244
|
+
end
|
|
1245
|
+
|
|
1246
|
+
def read_control(reader)
|
|
1247
|
+
op_byte = reader.read_u8
|
|
1248
|
+
opcode = Model::ControlOpcode.from_byte(op_byte)
|
|
1249
|
+
raise Errors.invalid_data("control opcode") if opcode.nil?
|
|
1250
|
+
|
|
1251
|
+
msg = Model::ControlMessage.new(
|
|
1252
|
+
register_keys: [],
|
|
1253
|
+
register_shape: nil,
|
|
1254
|
+
register_strings: [],
|
|
1255
|
+
promote_string_field_to_enum: nil,
|
|
1256
|
+
reset_tables: false,
|
|
1257
|
+
reset_state: false,
|
|
1258
|
+
opcode: opcode
|
|
1259
|
+
)
|
|
1260
|
+
case opcode
|
|
1261
|
+
when Model::ControlOpcode::REGISTER_KEYS
|
|
1262
|
+
n = reader.read_varuint
|
|
1263
|
+
keys = Array.new(n, "")
|
|
1264
|
+
n.times do |i|
|
|
1265
|
+
key = reader.read_string
|
|
1266
|
+
keys[i] = key
|
|
1267
|
+
@state.key_table.register(key)
|
|
1268
|
+
end
|
|
1269
|
+
msg = msg.with(register_keys: keys)
|
|
1270
|
+
when Model::ControlOpcode::REGISTER_SHAPE
|
|
1271
|
+
shape_id = reader.read_varuint
|
|
1272
|
+
n = reader.read_varuint
|
|
1273
|
+
keys = Array.new(n)
|
|
1274
|
+
key_names = Array.new(n, "")
|
|
1275
|
+
n.times do |i|
|
|
1276
|
+
key_ref = read_key_ref(reader)
|
|
1277
|
+
keys[i] = key_ref
|
|
1278
|
+
key_names[i] = key_ref.literal
|
|
1279
|
+
end
|
|
1280
|
+
@state.shape_table.register_with_id(shape_id, key_names)
|
|
1281
|
+
msg = msg.with(register_shape: Model::RegisterShapeControl.new(shape_id: shape_id, keys: keys))
|
|
1282
|
+
when Model::ControlOpcode::REGISTER_STRINGS
|
|
1283
|
+
n = reader.read_varuint
|
|
1284
|
+
strings = Array.new(n, "")
|
|
1285
|
+
n.times do |i|
|
|
1286
|
+
str = reader.read_string
|
|
1287
|
+
strings[i] = str
|
|
1288
|
+
@state.string_table.register(str)
|
|
1289
|
+
end
|
|
1290
|
+
msg = msg.with(register_strings: strings)
|
|
1291
|
+
when Model::ControlOpcode::PROMOTE_STRING_FIELD_TO_ENUM
|
|
1292
|
+
field_identity = reader.read_string
|
|
1293
|
+
n = reader.read_varuint
|
|
1294
|
+
values = Array.new(n, "")
|
|
1295
|
+
n.times do |i|
|
|
1296
|
+
values[i] = reader.read_string
|
|
1297
|
+
end
|
|
1298
|
+
@state.field_enums[field_identity] = values.dup
|
|
1299
|
+
msg = msg.with(
|
|
1300
|
+
promote_string_field_to_enum: Model::PromoteEnumControl.new(
|
|
1301
|
+
field_identity: field_identity,
|
|
1302
|
+
values: values
|
|
1303
|
+
)
|
|
1304
|
+
)
|
|
1305
|
+
when Model::ControlOpcode::RESET_TABLES
|
|
1306
|
+
msg = msg.with(reset_tables: true)
|
|
1307
|
+
@state.reset_tables
|
|
1308
|
+
when Model::ControlOpcode::RESET_STATE
|
|
1309
|
+
msg = msg.with(reset_state: true)
|
|
1310
|
+
@state.reset_state
|
|
1311
|
+
end
|
|
1312
|
+
msg
|
|
1313
|
+
end
|
|
1314
|
+
|
|
1315
|
+
attr_accessor :state
|
|
1316
|
+
|
|
1317
|
+
def write_base_ref(base_ref, out)
|
|
1318
|
+
if base_ref.previous
|
|
1319
|
+
out << 0.chr
|
|
1320
|
+
return
|
|
1321
|
+
end
|
|
1322
|
+
out << 1.chr
|
|
1323
|
+
Wire.encode_varuint(base_ref.base_id, out)
|
|
1324
|
+
end
|
|
1325
|
+
|
|
1326
|
+
def read_base_ref(reader)
|
|
1327
|
+
mode = reader.read_u8
|
|
1328
|
+
case mode
|
|
1329
|
+
when 0
|
|
1330
|
+
Model::BaseRef.previous
|
|
1331
|
+
when 1
|
|
1332
|
+
id = reader.read_varuint
|
|
1333
|
+
Model::BaseRef.id_ref(id)
|
|
1334
|
+
else
|
|
1335
|
+
raise Errors.invalid_data("base ref")
|
|
1336
|
+
end
|
|
1337
|
+
end
|
|
1338
|
+
|
|
1339
|
+
def write_control_stream_payload(codec, payload, out)
|
|
1340
|
+
encoded = case codec
|
|
1341
|
+
when Model::ControlStreamCodec::PLAIN
|
|
1342
|
+
payload.b.dup
|
|
1343
|
+
when Model::ControlStreamCodec::RLE
|
|
1344
|
+
rle_encode_bytes(payload)
|
|
1345
|
+
when Model::ControlStreamCodec::BITPACK
|
|
1346
|
+
control_bitpack_encode_bytes(payload)
|
|
1347
|
+
when Model::ControlStreamCodec::HUFFMAN
|
|
1348
|
+
control_huffman_encode_bytes(payload)
|
|
1349
|
+
when Model::ControlStreamCodec::FSE
|
|
1350
|
+
control_fse_encode_bytes(payload)
|
|
1351
|
+
end
|
|
1352
|
+
Wire.encode_bytes(encoded, out)
|
|
1353
|
+
end
|
|
1354
|
+
|
|
1355
|
+
def read_control_stream_payload(codec, reader)
|
|
1356
|
+
encoded = reader.read_bytes
|
|
1357
|
+
case codec
|
|
1358
|
+
when Model::ControlStreamCodec::PLAIN
|
|
1359
|
+
encoded
|
|
1360
|
+
when Model::ControlStreamCodec::RLE
|
|
1361
|
+
rle_decode_bytes(encoded)
|
|
1362
|
+
when Model::ControlStreamCodec::BITPACK
|
|
1363
|
+
control_bitpack_decode_bytes(encoded)
|
|
1364
|
+
when Model::ControlStreamCodec::HUFFMAN
|
|
1365
|
+
control_huffman_decode_bytes(encoded)
|
|
1366
|
+
when Model::ControlStreamCodec::FSE
|
|
1367
|
+
control_fse_decode_bytes(encoded)
|
|
1368
|
+
else
|
|
1369
|
+
raise Errors.invalid_data("control stream codec")
|
|
1370
|
+
end
|
|
1371
|
+
end
|
|
1372
|
+
|
|
1373
|
+
def best_prefix_base(value)
|
|
1374
|
+
best_id = 0
|
|
1375
|
+
best_len = 0
|
|
1376
|
+
state.string_table.by_id.each_with_index do |candidate, id|
|
|
1377
|
+
n = common_prefix_len(value.b, candidate.b)
|
|
1378
|
+
if n > best_len
|
|
1379
|
+
best_len = n
|
|
1380
|
+
best_id = id
|
|
1381
|
+
end
|
|
1382
|
+
end
|
|
1383
|
+
return [0, 0, false] if best_len.zero?
|
|
1384
|
+
|
|
1385
|
+
[best_id, best_len, true]
|
|
1386
|
+
end
|
|
1387
|
+
|
|
1388
|
+
def write_string_vector(values, codec, out)
|
|
1389
|
+
case codec
|
|
1390
|
+
when Model::VectorCodec::DICTIONARY
|
|
1391
|
+
dict = {}
|
|
1392
|
+
uniq = []
|
|
1393
|
+
refs = Array.new(values.length, 0)
|
|
1394
|
+
values.each_with_index do |v, i|
|
|
1395
|
+
id = dict[v]
|
|
1396
|
+
if id
|
|
1397
|
+
refs[i] = id
|
|
1398
|
+
else
|
|
1399
|
+
id = uniq.length
|
|
1400
|
+
dict[v] = id
|
|
1401
|
+
uniq << v
|
|
1402
|
+
refs[i] = id
|
|
1403
|
+
end
|
|
1404
|
+
end
|
|
1405
|
+
Wire.encode_varuint(uniq.length, out)
|
|
1406
|
+
uniq.each { |v| Wire.encode_string(v, out) }
|
|
1407
|
+
Codec.encode_u64_vector(refs, Model::VectorCodec::DIRECT_BITPACK, out)
|
|
1408
|
+
when Model::VectorCodec::STRING_REF
|
|
1409
|
+
Wire.encode_varuint(values.length, out)
|
|
1410
|
+
values.each do |v|
|
|
1411
|
+
id, ok = state.string_table.get_id(v)
|
|
1412
|
+
if ok
|
|
1413
|
+
Wire.encode_varuint(id, out)
|
|
1414
|
+
else
|
|
1415
|
+
id = state.string_table.register(v)
|
|
1416
|
+
Wire.encode_varuint(id, out)
|
|
1417
|
+
end
|
|
1418
|
+
end
|
|
1419
|
+
when Model::VectorCodec::PREFIX_DELTA
|
|
1420
|
+
Wire.encode_varuint(values.length, out)
|
|
1421
|
+
prev = ""
|
|
1422
|
+
values.each do |v|
|
|
1423
|
+
prefix = common_prefix_len(prev.b, v.b)
|
|
1424
|
+
Wire.encode_varuint(prefix, out)
|
|
1425
|
+
Wire.encode_string(v.byteslice(prefix, v.bytesize - prefix), out)
|
|
1426
|
+
prev = v
|
|
1427
|
+
end
|
|
1428
|
+
else
|
|
1429
|
+
Wire.encode_varuint(values.length, out)
|
|
1430
|
+
values.each { |v| Wire.encode_string(v, out) }
|
|
1431
|
+
end
|
|
1432
|
+
end
|
|
1433
|
+
|
|
1434
|
+
def read_string_vector(reader, codec)
|
|
1435
|
+
case codec
|
|
1436
|
+
when Model::VectorCodec::DICTIONARY
|
|
1437
|
+
dict_n = reader.read_varuint
|
|
1438
|
+
dict = Array.new(dict_n, "")
|
|
1439
|
+
dict_n.times do |i|
|
|
1440
|
+
dict[i] = reader.read_string
|
|
1441
|
+
end
|
|
1442
|
+
refs = Codec.decode_u64_vector(reader, Model::VectorCodec::DIRECT_BITPACK)
|
|
1443
|
+
out = Array.new(refs.length, "")
|
|
1444
|
+
refs.each_with_index do |ref, i|
|
|
1445
|
+
raise Errors.invalid_data("dictionary reference") if ref >= dict.length
|
|
1446
|
+
|
|
1447
|
+
out[i] = dict[ref]
|
|
1448
|
+
end
|
|
1449
|
+
out
|
|
1450
|
+
when Model::VectorCodec::STRING_REF
|
|
1451
|
+
n = reader.read_varuint
|
|
1452
|
+
out = Array.new(n, "")
|
|
1453
|
+
n.times do |i|
|
|
1454
|
+
id = reader.read_varuint
|
|
1455
|
+
s, ok = state.string_table.get_value(id)
|
|
1456
|
+
raise reference_error("string_id", id) unless ok
|
|
1457
|
+
|
|
1458
|
+
out[i] = s
|
|
1459
|
+
end
|
|
1460
|
+
out
|
|
1461
|
+
when Model::VectorCodec::PREFIX_DELTA
|
|
1462
|
+
n = reader.read_varuint
|
|
1463
|
+
out = Array.new(n, "")
|
|
1464
|
+
prev = ""
|
|
1465
|
+
n.times do |i|
|
|
1466
|
+
prefix = reader.read_varuint
|
|
1467
|
+
suffix = reader.read_string
|
|
1468
|
+
raise Errors.invalid_data("prefix delta in string vector") if prefix > prev.length
|
|
1469
|
+
|
|
1470
|
+
out[i] = prev.byteslice(0, prefix) + suffix
|
|
1471
|
+
prev = out[i]
|
|
1472
|
+
end
|
|
1473
|
+
out
|
|
1474
|
+
else
|
|
1475
|
+
n = reader.read_varuint
|
|
1476
|
+
out = Array.new(n, "")
|
|
1477
|
+
n.times do |i|
|
|
1478
|
+
out[i] = reader.read_string
|
|
1479
|
+
end
|
|
1480
|
+
out
|
|
1481
|
+
end
|
|
1482
|
+
end
|
|
1483
|
+
|
|
1484
|
+
def apply_state_patch(base_ref, operations, literals)
|
|
1485
|
+
base = if base_ref.previous
|
|
1486
|
+
raise reference_error("previous", 0) unless state.previous_message
|
|
1487
|
+
|
|
1488
|
+
state.previous_message.clone_message
|
|
1489
|
+
else
|
|
1490
|
+
b, ok = state.get_base_snapshot(base_ref.base_id)
|
|
1491
|
+
raise reference_error("base_id", base_ref.base_id) unless ok
|
|
1492
|
+
|
|
1493
|
+
b
|
|
1494
|
+
end
|
|
1495
|
+
_ = literals
|
|
1496
|
+
fields = message_fields(base)
|
|
1497
|
+
operations.each do |op|
|
|
1498
|
+
idx = op.field_id
|
|
1499
|
+
case op.opcode
|
|
1500
|
+
when Model::PatchOpcode::KEEP
|
|
1501
|
+
# no-op
|
|
1502
|
+
when Model::PatchOpcode::REPLACE_SCALAR,
|
|
1503
|
+
Model::PatchOpcode::REPLACE_VECTOR,
|
|
1504
|
+
Model::PatchOpcode::INSERT_FIELD,
|
|
1505
|
+
Model::PatchOpcode::STRING_REF,
|
|
1506
|
+
Model::PatchOpcode::PREFIX_DELTA
|
|
1507
|
+
raise Errors.invalid_data("patch operation missing value") if op.value.nil?
|
|
1508
|
+
|
|
1509
|
+
if idx < fields.length
|
|
1510
|
+
fields[idx] = op.value.clone_value
|
|
1511
|
+
elsif idx == fields.length
|
|
1512
|
+
fields << op.value.clone_value
|
|
1513
|
+
else
|
|
1514
|
+
raise Errors.invalid_data("patch field index out of range")
|
|
1515
|
+
end
|
|
1516
|
+
when Model::PatchOpcode::DELETE_FIELD
|
|
1517
|
+
raise Errors.invalid_data("delete field index out of range") if idx.negative? || idx >= fields.length
|
|
1518
|
+
|
|
1519
|
+
fields.delete_at(idx)
|
|
1520
|
+
when Model::PatchOpcode::APPEND_VECTOR
|
|
1521
|
+
if op.value.nil? || idx.negative? || idx >= fields.length
|
|
1522
|
+
raise Errors.invalid_data("append vector patch invalid")
|
|
1523
|
+
end
|
|
1524
|
+
if fields[idx].kind != Model::ValueKind::ARRAY || op.value.kind != Model::ValueKind::ARRAY
|
|
1525
|
+
raise Errors.invalid_data("append vector requires arrays")
|
|
1526
|
+
end
|
|
1527
|
+
|
|
1528
|
+
fields[idx] = fields[idx].with(arr: fields[idx].arr + op.value.arr)
|
|
1529
|
+
when Model::PatchOpcode::TRUNCATE_VECTOR
|
|
1530
|
+
if op.value.nil? || idx.negative? || idx >= fields.length
|
|
1531
|
+
raise Errors.invalid_data("truncate vector patch invalid")
|
|
1532
|
+
end
|
|
1533
|
+
if fields[idx].kind != Model::ValueKind::ARRAY || op.value.kind != Model::ValueKind::U64
|
|
1534
|
+
raise Errors.invalid_data("truncate vector requires array and u64")
|
|
1535
|
+
end
|
|
1536
|
+
|
|
1537
|
+
n = op.value.u64
|
|
1538
|
+
raise Errors.invalid_data("truncate length") if n.negative? || n > fields[idx].arr.length
|
|
1539
|
+
|
|
1540
|
+
fields[idx] = fields[idx].with(arr: fields[idx].arr[0, n].dup)
|
|
1541
|
+
end
|
|
1542
|
+
end
|
|
1543
|
+
rebuild_message_like(base, fields)
|
|
1544
|
+
end
|
|
1545
|
+
|
|
1546
|
+
def observe_decode_shape_candidate(keys)
|
|
1547
|
+
_id, ok = state.shape_table.get_id(keys)
|
|
1548
|
+
return if ok
|
|
1549
|
+
|
|
1550
|
+
observed = state.shape_table.observe(keys)
|
|
1551
|
+
state.shape_table.register(keys) if should_register_shape(keys, observed)
|
|
1552
|
+
end
|
|
1553
|
+
|
|
1554
|
+
def should_register_shape(keys, observed_count)
|
|
1555
|
+
!keys.empty? && observed_count >= 2
|
|
1556
|
+
end
|
|
1557
|
+
|
|
1558
|
+
def observe_encode_shape_candidate(keys)
|
|
1559
|
+
sk = shape_key(keys)
|
|
1560
|
+
state.encode_shape_observations[sk] ||= 0
|
|
1561
|
+
state.encode_shape_observations[sk] += 1
|
|
1562
|
+
count = state.encode_shape_observations[sk]
|
|
1563
|
+
state.shape_table.register(keys) if should_register_shape(keys, count)
|
|
1564
|
+
count
|
|
1565
|
+
end
|
|
1566
|
+
private
|
|
1567
|
+
|
|
1568
|
+
def write_smallest_u64(value, out)
|
|
1569
|
+
if value <= 0xFF
|
|
1570
|
+
out << 1.chr
|
|
1571
|
+
out << value.chr
|
|
1572
|
+
elsif value <= 0xFFFF
|
|
1573
|
+
out << 2.chr
|
|
1574
|
+
out << (value & 0xFF).chr
|
|
1575
|
+
out << ((value >> 8) & 0xFF).chr
|
|
1576
|
+
elsif value <= 0xFFFFFFFF
|
|
1577
|
+
out << 4.chr
|
|
1578
|
+
out << (value & 0xFF).chr
|
|
1579
|
+
out << ((value >> 8) & 0xFF).chr
|
|
1580
|
+
out << ((value >> 16) & 0xFF).chr
|
|
1581
|
+
out << ((value >> 24) & 0xFF).chr
|
|
1582
|
+
else
|
|
1583
|
+
out << 8.chr
|
|
1584
|
+
Wire.append_u64_le(out, value)
|
|
1585
|
+
end
|
|
1586
|
+
end
|
|
1587
|
+
|
|
1588
|
+
def read_smallest_u64(reader)
|
|
1589
|
+
size = reader.read_u8
|
|
1590
|
+
case size
|
|
1591
|
+
when 1
|
|
1592
|
+
reader.read_u8
|
|
1593
|
+
when 2
|
|
1594
|
+
bytes = reader.read_exact(2)
|
|
1595
|
+
bytes.getbyte(0) | (bytes.getbyte(1) << 8)
|
|
1596
|
+
when 4
|
|
1597
|
+
bytes = reader.read_exact(4)
|
|
1598
|
+
bytes.getbyte(0) | (bytes.getbyte(1) << 8) | (bytes.getbyte(2) << 16) | (bytes.getbyte(3) << 24)
|
|
1599
|
+
when 8
|
|
1600
|
+
Wire.read_u64_le(reader)
|
|
1601
|
+
else
|
|
1602
|
+
raise Errors.invalid_data("smallest u64 size")
|
|
1603
|
+
end
|
|
1604
|
+
end
|
|
1605
|
+
|
|
1606
|
+
def dictionary_fallback_to_byte(fallback)
|
|
1607
|
+
case fallback
|
|
1608
|
+
when Session::DictionaryFallback::FAIL_FAST
|
|
1609
|
+
0
|
|
1610
|
+
when Session::DictionaryFallback::STATELESS_RETRY
|
|
1611
|
+
1
|
|
1612
|
+
else
|
|
1613
|
+
raise Errors.invalid_data("dictionary fallback")
|
|
1614
|
+
end
|
|
1615
|
+
end
|
|
1616
|
+
end
|
|
1617
|
+
class SessionEncoder
|
|
1618
|
+
attr_reader :codec
|
|
1619
|
+
|
|
1620
|
+
def initialize(options)
|
|
1621
|
+
@codec = Protocol.twilic_codec_with_options(options)
|
|
1622
|
+
end
|
|
1623
|
+
|
|
1624
|
+
def encode(value)
|
|
1625
|
+
msg = codec.message_for_value(value)
|
|
1626
|
+
if codec.state.options.enable_state_patch && codec.state.previous_message &&
|
|
1627
|
+
supports_state_patch(codec.state.previous_message, msg)
|
|
1628
|
+
base_ref = Model::BaseRef.previous
|
|
1629
|
+
ops, _literals = diff_message(codec.state.previous_message, msg)
|
|
1630
|
+
patch_msg = Model.message(
|
|
1631
|
+
kind: Model::MessageKind::STATE_PATCH,
|
|
1632
|
+
state_patch: Model::StatePatchMessage.new(base_ref: base_ref, operations: ops, literals: [])
|
|
1633
|
+
)
|
|
1634
|
+
patch_size = encoded_size(patch_msg)
|
|
1635
|
+
full_size = encoded_size(msg)
|
|
1636
|
+
if patch_size < full_size
|
|
1637
|
+
begin
|
|
1638
|
+
return codec.encode_message(patch_msg)
|
|
1639
|
+
rescue StandardError
|
|
1640
|
+
# fall back to full message path
|
|
1641
|
+
end
|
|
1642
|
+
end
|
|
1643
|
+
end
|
|
1644
|
+
codec.encode_message(msg)
|
|
1645
|
+
end
|
|
1646
|
+
|
|
1647
|
+
def encode_with_schema(schema, value)
|
|
1648
|
+
codec.state.schemas[schema.schema_id] = schema
|
|
1649
|
+
codec.state.last_schema_id = schema.schema_id
|
|
1650
|
+
schema.fields.each do |field|
|
|
1651
|
+
next if field.enum_values.empty?
|
|
1652
|
+
|
|
1653
|
+
codec.state.field_enums[field.name] = field.enum_values.dup
|
|
1654
|
+
end
|
|
1655
|
+
raise Errors.invalid_data("encode_with_schema expects map value") unless value.kind == Model::ValueKind::MAP
|
|
1656
|
+
|
|
1657
|
+
presence = Array.new(schema.fields.length, false)
|
|
1658
|
+
fields = []
|
|
1659
|
+
has_presence = false
|
|
1660
|
+
schema.fields.each_with_index do |field, i|
|
|
1661
|
+
v = lookup_map_field(value, field.name)
|
|
1662
|
+
if v
|
|
1663
|
+
presence[i] = true
|
|
1664
|
+
fields << v.clone_value
|
|
1665
|
+
else
|
|
1666
|
+
presence[i] = false
|
|
1667
|
+
has_presence = true
|
|
1668
|
+
end
|
|
1669
|
+
end
|
|
1670
|
+
msg = Model.message(
|
|
1671
|
+
kind: Model::MessageKind::SCHEMA_OBJECT,
|
|
1672
|
+
schema_object: Model::SchemaObjectMessage.new(
|
|
1673
|
+
schema_id: schema.schema_id, presence: presence, has_presence: has_presence, fields: fields
|
|
1674
|
+
)
|
|
1675
|
+
)
|
|
1676
|
+
codec.encode_message(msg)
|
|
1677
|
+
end
|
|
1678
|
+
|
|
1679
|
+
def encode_batch(values)
|
|
1680
|
+
if values.empty?
|
|
1681
|
+
msg = Model.message(
|
|
1682
|
+
kind: Model::MessageKind::ROW_BATCH,
|
|
1683
|
+
row_batch: Model::RowBatchMessage.new(rows: [])
|
|
1684
|
+
)
|
|
1685
|
+
return codec.encode_message(msg)
|
|
1686
|
+
end
|
|
1687
|
+
|
|
1688
|
+
msg = nil
|
|
1689
|
+
if values.length >= 16
|
|
1690
|
+
cols = columns_from_map_values(values)
|
|
1691
|
+
cols = rows_to_columns(rows_from_values(values)) if cols.nil?
|
|
1692
|
+
Dictionary.apply_dictionary_references(codec.state, cols) if codec.state.options.enable_trained_dictionary
|
|
1693
|
+
msg = Model.message(
|
|
1694
|
+
kind: Model::MessageKind::COLUMN_BATCH,
|
|
1695
|
+
column_batch: Model::ColumnBatchMessage.new(count: values.length, columns: cols)
|
|
1696
|
+
)
|
|
1697
|
+
else
|
|
1698
|
+
msg = Model.message(
|
|
1699
|
+
kind: Model::MessageKind::ROW_BATCH,
|
|
1700
|
+
row_batch: Model::RowBatchMessage.new(rows: rows_from_values(values))
|
|
1701
|
+
)
|
|
1702
|
+
end
|
|
1703
|
+
|
|
1704
|
+
bytes = codec.encode_message(msg)
|
|
1705
|
+
codec.state.previous_message = msg
|
|
1706
|
+
size = bytes.bytesize
|
|
1707
|
+
codec.state.previous_message_size = size
|
|
1708
|
+
record_full_message_as_base
|
|
1709
|
+
bytes
|
|
1710
|
+
end
|
|
1711
|
+
|
|
1712
|
+
def record_full_message_as_base
|
|
1713
|
+
return if codec.state.options.max_base_snapshots.zero?
|
|
1714
|
+
return if codec.state.previous_message.nil?
|
|
1715
|
+
|
|
1716
|
+
base_id = codec.state.allocate_base_id
|
|
1717
|
+
codec.state.register_base_snapshot(base_id, codec.state.previous_message)
|
|
1718
|
+
end
|
|
1719
|
+
|
|
1720
|
+
def encode_patch(value)
|
|
1721
|
+
msg = codec.message_for_value(value)
|
|
1722
|
+
if codec.state.previous_message.nil? || !supports_state_patch(codec.state.previous_message, msg)
|
|
1723
|
+
return codec.encode_message(msg)
|
|
1724
|
+
end
|
|
1725
|
+
ops, _literals = diff_message(codec.state.previous_message, msg)
|
|
1726
|
+
patch_msg = Model.message(
|
|
1727
|
+
kind: Model::MessageKind::STATE_PATCH,
|
|
1728
|
+
state_patch: Model::StatePatchMessage.new(
|
|
1729
|
+
base_ref: Model::BaseRef.previous, operations: ops, literals: []
|
|
1730
|
+
)
|
|
1731
|
+
)
|
|
1732
|
+
return codec.encode_message(msg) if encoded_size(patch_msg) >= encoded_size(msg)
|
|
1733
|
+
|
|
1734
|
+
codec.encode_message(patch_msg)
|
|
1735
|
+
end
|
|
1736
|
+
|
|
1737
|
+
def encode_micro_batch(values)
|
|
1738
|
+
return encode_batch(values) if values.empty?
|
|
1739
|
+
if !codec.state.options.enable_template_batch || !has_uniform_micro_batch_shape(values)
|
|
1740
|
+
return encode_batch(values)
|
|
1741
|
+
end
|
|
1742
|
+
|
|
1743
|
+
columns = columns_from_map_values(values)
|
|
1744
|
+
columns = rows_to_columns(rows_from_values(values)) if columns.nil?
|
|
1745
|
+
Dictionary.apply_dictionary_references(codec.state, columns) if codec.state.options.enable_trained_dictionary
|
|
1746
|
+
template_id, ok = find_template_id(codec.state.templates, columns)
|
|
1747
|
+
unless ok
|
|
1748
|
+
template_id = codec.state.allocate_template_id
|
|
1749
|
+
codec.state.templates[template_id] = template_descriptor_from_columns(template_id, columns)
|
|
1750
|
+
codec.state.template_columns[template_id] = columns
|
|
1751
|
+
mask = Array.new(columns.length, true)
|
|
1752
|
+
msg = Model.message(
|
|
1753
|
+
kind: Model::MessageKind::TEMPLATE_BATCH,
|
|
1754
|
+
template_batch: Model::TemplateBatchMessage.new(
|
|
1755
|
+
template_id: template_id, count: values.length, changed_column_mask: mask, columns: columns
|
|
1756
|
+
)
|
|
1757
|
+
)
|
|
1758
|
+
return codec.encode_message(msg)
|
|
1759
|
+
end
|
|
1760
|
+
mask, changed_cols = diff_template_columns(codec.state.template_columns[template_id], columns)
|
|
1761
|
+
codec.state.template_columns[template_id] = columns
|
|
1762
|
+
msg = Model.message(
|
|
1763
|
+
kind: Model::MessageKind::TEMPLATE_BATCH,
|
|
1764
|
+
template_batch: Model::TemplateBatchMessage.new(
|
|
1765
|
+
template_id: template_id, count: values.length, changed_column_mask: mask, columns: changed_cols
|
|
1766
|
+
)
|
|
1767
|
+
)
|
|
1768
|
+
codec.encode_message(msg)
|
|
1769
|
+
end
|
|
1770
|
+
|
|
1771
|
+
def reset
|
|
1772
|
+
codec.state.reset_state
|
|
1773
|
+
end
|
|
1774
|
+
|
|
1775
|
+
def decode_message(bytes)
|
|
1776
|
+
codec.decode_message(bytes)
|
|
1777
|
+
end
|
|
1778
|
+
end
|
|
1779
|
+
|
|
1780
|
+
module_function
|
|
1781
|
+
|
|
1782
|
+
def lookup_map_field(value, key)
|
|
1783
|
+
return nil unless value.kind == Model::ValueKind::MAP
|
|
1784
|
+
|
|
1785
|
+
value.map.each do |entry|
|
|
1786
|
+
if entry.key == key
|
|
1787
|
+
v = entry.value.clone_value
|
|
1788
|
+
return v
|
|
1789
|
+
end
|
|
1790
|
+
end
|
|
1791
|
+
nil
|
|
1792
|
+
end
|
|
1793
|
+
|
|
1794
|
+
def schema_present_field_indices(schema, presence, has_presence)
|
|
1795
|
+
unless has_presence
|
|
1796
|
+
out = Array.new(schema.fields.length, 0)
|
|
1797
|
+
out.each_index { |i| out[i] = i }
|
|
1798
|
+
return out
|
|
1799
|
+
end
|
|
1800
|
+
raise Errors.invalid_data("presence bitmap mismatch for schema") if presence.length != schema.fields.length
|
|
1801
|
+
|
|
1802
|
+
out = []
|
|
1803
|
+
schema.fields.each_with_index do |_field, i|
|
|
1804
|
+
out << i if presence[i]
|
|
1805
|
+
end
|
|
1806
|
+
out
|
|
1807
|
+
end
|
|
1808
|
+
|
|
1809
|
+
def normalized_logical_type(raw)
|
|
1810
|
+
raw.strip.downcase
|
|
1811
|
+
end
|
|
1812
|
+
|
|
1813
|
+
def rows_from_values(values)
|
|
1814
|
+
rows = Array.new(values.length) { [] }
|
|
1815
|
+
values.each_with_index do |value, i|
|
|
1816
|
+
if value.kind == Model::ValueKind::ARRAY
|
|
1817
|
+
row = Array.new(value.arr.length)
|
|
1818
|
+
value.arr.each_with_index { |item, j| row[j] = item.clone_value }
|
|
1819
|
+
rows[i] = row
|
|
1820
|
+
else
|
|
1821
|
+
rows[i] = [value.clone_value]
|
|
1822
|
+
end
|
|
1823
|
+
end
|
|
1824
|
+
rows
|
|
1825
|
+
end
|
|
1826
|
+
|
|
1827
|
+
def column_null_strategy(values, present_bits)
|
|
1828
|
+
null_count = 0
|
|
1829
|
+
values.each do |value|
|
|
1830
|
+
null_count += 1 if value.kind == Model::ValueKind::NULL
|
|
1831
|
+
end
|
|
1832
|
+
optional_count = values.length
|
|
1833
|
+
if null_count.zero?
|
|
1834
|
+
return [Model::NullStrategy::ALL_PRESENT_ELIDED, nil, false]
|
|
1835
|
+
end
|
|
1836
|
+
if null_count <= optional_count / 4
|
|
1837
|
+
inverted = Array.new(present_bits.length, false)
|
|
1838
|
+
present_bits.each_with_index do |bit, i|
|
|
1839
|
+
inverted[i] = !bit
|
|
1840
|
+
end
|
|
1841
|
+
return [Model::NullStrategy::INVERTED_PRESENCE_BITMAP, inverted, true]
|
|
1842
|
+
end
|
|
1843
|
+
[Model::NullStrategy::PRESENCE_BITMAP, present_bits.dup, true]
|
|
1844
|
+
end
|
|
1845
|
+
|
|
1846
|
+
def strip_nulls(values)
|
|
1847
|
+
out = []
|
|
1848
|
+
values.each do |value|
|
|
1849
|
+
out << value unless value.kind == Model::ValueKind::NULL
|
|
1850
|
+
end
|
|
1851
|
+
out
|
|
1852
|
+
end
|
|
1853
|
+
|
|
1854
|
+
def columns_from_map_values(values)
|
|
1855
|
+
return nil if values.empty?
|
|
1856
|
+
|
|
1857
|
+
values.each do |value|
|
|
1858
|
+
return nil unless value.kind == Model::ValueKind::MAP
|
|
1859
|
+
end
|
|
1860
|
+
key_order = []
|
|
1861
|
+
key_index = {}
|
|
1862
|
+
column_values = []
|
|
1863
|
+
column_presence = []
|
|
1864
|
+
values.each_with_index do |row_value, row_idx|
|
|
1865
|
+
present = Array.new(key_order.length, false)
|
|
1866
|
+
row_value.map.each do |entry|
|
|
1867
|
+
key = entry.key
|
|
1868
|
+
entry_value = entry.value.clone_value
|
|
1869
|
+
col_idx = key_index[key]
|
|
1870
|
+
unless col_idx
|
|
1871
|
+
col_idx = key_order.length
|
|
1872
|
+
key_order << key
|
|
1873
|
+
key_index[key] = col_idx
|
|
1874
|
+
column_values << Array.new(row_idx)
|
|
1875
|
+
column_presence << Array.new(row_idx)
|
|
1876
|
+
present << false
|
|
1877
|
+
end
|
|
1878
|
+
column_values[col_idx] << entry_value
|
|
1879
|
+
column_presence[col_idx] << true
|
|
1880
|
+
present[col_idx] = true
|
|
1881
|
+
end
|
|
1882
|
+
key_order.each_index do |col_idx|
|
|
1883
|
+
next if present[col_idx]
|
|
1884
|
+
|
|
1885
|
+
column_values[col_idx] << Model.null_value
|
|
1886
|
+
column_presence[col_idx] << false
|
|
1887
|
+
end
|
|
1888
|
+
end
|
|
1889
|
+
columns = Array.new(key_order.length)
|
|
1890
|
+
key_order.each_index do |field_id|
|
|
1891
|
+
col_values = column_values[field_id]
|
|
1892
|
+
present_bits = column_presence[field_id]
|
|
1893
|
+
null_strategy, presence, has_presence = column_null_strategy(col_values, present_bits)
|
|
1894
|
+
codec, tvd = infer_column_codec_and_values(strip_nulls(col_values))
|
|
1895
|
+
columns[field_id] = Model::Column.new(
|
|
1896
|
+
field_id: field_id,
|
|
1897
|
+
null_strategy: null_strategy,
|
|
1898
|
+
presence: presence,
|
|
1899
|
+
has_presence: has_presence,
|
|
1900
|
+
codec: codec,
|
|
1901
|
+
dictionary_id: nil,
|
|
1902
|
+
values: tvd
|
|
1903
|
+
)
|
|
1904
|
+
end
|
|
1905
|
+
columns
|
|
1906
|
+
end
|
|
1907
|
+
|
|
1908
|
+
def has_uniform_micro_batch_shape(values)
|
|
1909
|
+
return false if values.empty?
|
|
1910
|
+
return false if values[0].kind != Model::ValueKind::MAP
|
|
1911
|
+
|
|
1912
|
+
keys = values[0].map.map(&:key)
|
|
1913
|
+
(1...values.length).each do |i|
|
|
1914
|
+
return false if values[i].kind != Model::ValueKind::MAP || values[i].map.length != keys.length
|
|
1915
|
+
|
|
1916
|
+
keys.each_index do |j|
|
|
1917
|
+
return false if values[i].map[j].key != keys[j]
|
|
1918
|
+
end
|
|
1919
|
+
end
|
|
1920
|
+
true
|
|
1921
|
+
end
|
|
1922
|
+
|
|
1923
|
+
def should_register_shape(keys, observed_count)
|
|
1924
|
+
!keys.empty? && observed_count >= 2
|
|
1925
|
+
end
|
|
1926
|
+
|
|
1927
|
+
def supports_state_patch(base, current)
|
|
1928
|
+
!base.nil? && !current.nil? && base.kind == current.kind &&
|
|
1929
|
+
(base.kind == Model::MessageKind::MAP ||
|
|
1930
|
+
base.kind == Model::MessageKind::SCHEMA_OBJECT ||
|
|
1931
|
+
base.kind == Model::MessageKind::SHAPED_OBJECT ||
|
|
1932
|
+
base.kind == Model::MessageKind::ARRAY)
|
|
1933
|
+
end
|
|
1934
|
+
|
|
1935
|
+
def encoded_size(message)
|
|
1936
|
+
estimate_message_size(message)
|
|
1937
|
+
end
|
|
1938
|
+
|
|
1939
|
+
def typed_vector_to_value(vector)
|
|
1940
|
+
case vector.element_type
|
|
1941
|
+
when Model::ElementType::BOOL
|
|
1942
|
+
out = Array.new(vector.data.bools.length)
|
|
1943
|
+
out.each_index { |i| out[i] = Model.bool_value(vector.data.bools[i]) }
|
|
1944
|
+
Model.array_value(out)
|
|
1945
|
+
when Model::ElementType::I64
|
|
1946
|
+
out = Array.new(vector.data.i64s.length)
|
|
1947
|
+
out.each_index { |i| out[i] = Model.i64_value(vector.data.i64s[i]) }
|
|
1948
|
+
Model.array_value(out)
|
|
1949
|
+
when Model::ElementType::U64
|
|
1950
|
+
out = Array.new(vector.data.u64s.length)
|
|
1951
|
+
out.each_index { |i| out[i] = Model.u64_value(vector.data.u64s[i]) }
|
|
1952
|
+
Model.array_value(out)
|
|
1953
|
+
when Model::ElementType::F64
|
|
1954
|
+
out = Array.new(vector.data.f64s.length)
|
|
1955
|
+
out.each_index { |i| out[i] = Model.f64_value(vector.data.f64s[i]) }
|
|
1956
|
+
Model.array_value(out)
|
|
1957
|
+
when Model::ElementType::STRING
|
|
1958
|
+
out = Array.new(vector.data.strings.length)
|
|
1959
|
+
out.each_index { |i| out[i] = Model.string_value(vector.data.strings[i]) }
|
|
1960
|
+
Model.array_value(out)
|
|
1961
|
+
else
|
|
1962
|
+
Model.array_value([])
|
|
1963
|
+
end
|
|
1964
|
+
end
|
|
1965
|
+
|
|
1966
|
+
def entries_to_map(entries, state)
|
|
1967
|
+
out = Array.new(entries.length)
|
|
1968
|
+
entries.each_with_index do |entry, i|
|
|
1969
|
+
key = key_ref_string(entry.key, state)
|
|
1970
|
+
out[i] = Model::MapEntry.new(key, entry.value.clone_value)
|
|
1971
|
+
_id, ok = state.key_table.get_id(key)
|
|
1972
|
+
state.key_table.register(key) unless ok
|
|
1973
|
+
end
|
|
1974
|
+
out
|
|
1975
|
+
end
|
|
1976
|
+
|
|
1977
|
+
def key_ref_string(key, state)
|
|
1978
|
+
if key.is_id
|
|
1979
|
+
s, ok = state.key_table.get_value(key.id)
|
|
1980
|
+
return s if ok
|
|
1981
|
+
|
|
1982
|
+
return ""
|
|
1983
|
+
end
|
|
1984
|
+
key.literal
|
|
1985
|
+
end
|
|
1986
|
+
|
|
1987
|
+
def key_ref_field_identity(key, state)
|
|
1988
|
+
s = key_ref_string(key, state)
|
|
1989
|
+
return nil if s == ""
|
|
1990
|
+
|
|
1991
|
+
s
|
|
1992
|
+
end
|
|
1993
|
+
|
|
1994
|
+
def shape_values_to_map(keys, presence, has_presence, values)
|
|
1995
|
+
out = []
|
|
1996
|
+
idx = 0
|
|
1997
|
+
keys.each_with_index do |key, i|
|
|
1998
|
+
next if has_presence && i < presence.length && !presence[i]
|
|
1999
|
+
break if idx >= values.length
|
|
2000
|
+
|
|
2001
|
+
out << Model.entry(key, values[idx].clone_value)
|
|
2002
|
+
idx += 1
|
|
2003
|
+
end
|
|
2004
|
+
out
|
|
2005
|
+
end
|
|
2006
|
+
end
|
|
2007
|
+
end
|
|
2008
|
+
end
|
|
2009
|
+
|
|
2010
|
+
require "twilic/core/protocol_helpers"
|
|
2011
|
+
|
|
2012
|
+
module Twilic
|
|
2013
|
+
module Core
|
|
2014
|
+
module Protocol
|
|
2015
|
+
ProtocolHelpers.singleton_methods(false).each do |name|
|
|
2016
|
+
next if singleton_methods(false).include?(name)
|
|
2017
|
+
|
|
2018
|
+
define_singleton_method(name) do |*args, **kwargs, &block|
|
|
2019
|
+
ProtocolHelpers.send(name, *args, **kwargs, &block)
|
|
2020
|
+
end
|
|
2021
|
+
end
|
|
2022
|
+
|
|
2023
|
+
def self.delegate_helpers_to(klass)
|
|
2024
|
+
ProtocolHelpers.singleton_methods(false).each do |name|
|
|
2025
|
+
next if klass.method_defined?(name)
|
|
2026
|
+
|
|
2027
|
+
klass.define_method(name) do |*args, **kwargs, &block|
|
|
2028
|
+
ProtocolHelpers.send(name, *args, **kwargs, &block)
|
|
2029
|
+
end
|
|
2030
|
+
end
|
|
2031
|
+
singleton_methods(false).each do |name|
|
|
2032
|
+
next if klass.method_defined?(name)
|
|
2033
|
+
|
|
2034
|
+
klass.define_method(name) do |*args, **kwargs, &block|
|
|
2035
|
+
Protocol.send(name, *args, **kwargs, &block)
|
|
2036
|
+
end
|
|
2037
|
+
end
|
|
2038
|
+
end
|
|
2039
|
+
|
|
2040
|
+
delegate_helpers_to(TwilicCodec)
|
|
2041
|
+
delegate_helpers_to(SessionEncoder)
|
|
2042
|
+
end
|
|
2043
|
+
end
|
|
2044
|
+
end
|