unibuf 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +178 -330
- data/CODE_OF_CONDUCT.md +132 -0
- data/README.adoc +443 -254
- data/docs/CAPNPROTO.adoc +436 -0
- data/docs/FLATBUFFERS.adoc +430 -0
- data/docs/PROTOBUF.adoc +515 -0
- data/docs/TXTPROTO.adoc +369 -0
- data/lib/unibuf/commands/convert.rb +60 -2
- data/lib/unibuf/commands/schema.rb +68 -11
- data/lib/unibuf/errors.rb +23 -26
- data/lib/unibuf/models/capnproto/enum_definition.rb +72 -0
- data/lib/unibuf/models/capnproto/field_definition.rb +81 -0
- data/lib/unibuf/models/capnproto/interface_definition.rb +70 -0
- data/lib/unibuf/models/capnproto/method_definition.rb +81 -0
- data/lib/unibuf/models/capnproto/schema.rb +84 -0
- data/lib/unibuf/models/capnproto/struct_definition.rb +96 -0
- data/lib/unibuf/models/capnproto/union_definition.rb +62 -0
- data/lib/unibuf/models/flatbuffers/enum_definition.rb +69 -0
- data/lib/unibuf/models/flatbuffers/field_definition.rb +88 -0
- data/lib/unibuf/models/flatbuffers/schema.rb +102 -0
- data/lib/unibuf/models/flatbuffers/struct_definition.rb +70 -0
- data/lib/unibuf/models/flatbuffers/table_definition.rb +73 -0
- data/lib/unibuf/models/flatbuffers/union_definition.rb +60 -0
- data/lib/unibuf/models/message.rb +10 -0
- data/lib/unibuf/models/values/scalar_value.rb +2 -2
- data/lib/unibuf/parsers/binary/wire_format_parser.rb +199 -19
- data/lib/unibuf/parsers/capnproto/binary_parser.rb +267 -0
- data/lib/unibuf/parsers/capnproto/grammar.rb +272 -0
- data/lib/unibuf/parsers/capnproto/list_reader.rb +208 -0
- data/lib/unibuf/parsers/capnproto/pointer_decoder.rb +163 -0
- data/lib/unibuf/parsers/capnproto/processor.rb +348 -0
- data/lib/unibuf/parsers/capnproto/segment_reader.rb +131 -0
- data/lib/unibuf/parsers/capnproto/struct_reader.rb +199 -0
- data/lib/unibuf/parsers/flatbuffers/binary_parser.rb +325 -0
- data/lib/unibuf/parsers/flatbuffers/grammar.rb +235 -0
- data/lib/unibuf/parsers/flatbuffers/processor.rb +299 -0
- data/lib/unibuf/parsers/textproto/grammar.rb +1 -1
- data/lib/unibuf/parsers/textproto/processor.rb +10 -0
- data/lib/unibuf/serializers/binary_serializer.rb +218 -0
- data/lib/unibuf/serializers/capnproto/binary_serializer.rb +402 -0
- data/lib/unibuf/serializers/capnproto/list_writer.rb +199 -0
- data/lib/unibuf/serializers/capnproto/pointer_encoder.rb +118 -0
- data/lib/unibuf/serializers/capnproto/segment_builder.rb +124 -0
- data/lib/unibuf/serializers/capnproto/struct_writer.rb +139 -0
- data/lib/unibuf/serializers/flatbuffers/binary_serializer.rb +167 -0
- data/lib/unibuf/validators/type_validator.rb +1 -1
- data/lib/unibuf/version.rb +1 -1
- data/lib/unibuf.rb +27 -0
- metadata +36 -1
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "segment_reader"
|
|
4
|
+
require_relative "pointer_decoder"
|
|
5
|
+
require_relative "struct_reader"
|
|
6
|
+
|
|
7
|
+
module Unibuf
|
|
8
|
+
module Parsers
|
|
9
|
+
module Capnproto
|
|
10
|
+
# Reader for Cap'n Proto list data
|
|
11
|
+
# Lists can contain primitives, pointers, or structs
|
|
12
|
+
class ListReader
|
|
13
|
+
attr_reader :segment_reader, :segment_id, :word_offset, :element_size,
|
|
14
|
+
:element_count
|
|
15
|
+
|
|
16
|
+
# Initialize list reader
|
|
17
|
+
# @param segment_reader [SegmentReader] Segment reader
|
|
18
|
+
# @param segment_id [Integer] Segment containing the list
|
|
19
|
+
# @param word_offset [Integer] Word offset of list start
|
|
20
|
+
# @param element_size [Integer] Element size code
|
|
21
|
+
# @param element_count [Integer] Number of elements
|
|
22
|
+
def initialize(segment_reader, segment_id, word_offset, element_size,
|
|
23
|
+
element_count)
|
|
24
|
+
@segment_reader = segment_reader
|
|
25
|
+
@segment_id = segment_id
|
|
26
|
+
@word_offset = word_offset
|
|
27
|
+
@element_size = element_size
|
|
28
|
+
@element_count = element_count
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Get list length
|
|
32
|
+
# @return [Integer]
|
|
33
|
+
def length
|
|
34
|
+
@element_count
|
|
35
|
+
end
|
|
36
|
+
alias size length
|
|
37
|
+
|
|
38
|
+
# Read an element as a primitive value
|
|
39
|
+
# @param index [Integer] Element index
|
|
40
|
+
# @return [Object] Element value
|
|
41
|
+
def read_primitive(index, type = :uint64)
|
|
42
|
+
raise ArgumentError, "Index out of bounds" if index >= @element_count
|
|
43
|
+
|
|
44
|
+
case @element_size
|
|
45
|
+
when PointerDecoder::ELEMENT_SIZE_VOID
|
|
46
|
+
nil
|
|
47
|
+
when PointerDecoder::ELEMENT_SIZE_BIT
|
|
48
|
+
read_bit(index)
|
|
49
|
+
when PointerDecoder::ELEMENT_SIZE_BYTE
|
|
50
|
+
read_byte(index, type)
|
|
51
|
+
when PointerDecoder::ELEMENT_SIZE_TWO_BYTES
|
|
52
|
+
read_two_bytes(index, type)
|
|
53
|
+
when PointerDecoder::ELEMENT_SIZE_FOUR_BYTES
|
|
54
|
+
read_four_bytes(index, type)
|
|
55
|
+
when PointerDecoder::ELEMENT_SIZE_EIGHT_BYTES
|
|
56
|
+
read_eight_bytes(index, type)
|
|
57
|
+
else
|
|
58
|
+
raise "Cannot read primitive from this list type"
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Read an element as a pointer
|
|
63
|
+
# @param index [Integer] Element index
|
|
64
|
+
# @return [Hash, nil] Decoded pointer
|
|
65
|
+
def read_pointer(index)
|
|
66
|
+
raise ArgumentError, "Index out of bounds" if index >= @element_count
|
|
67
|
+
raise "List elements are not pointers" unless @element_size == PointerDecoder::ELEMENT_SIZE_POINTER
|
|
68
|
+
|
|
69
|
+
pointer_word = @segment_reader.read_word(@segment_id,
|
|
70
|
+
@word_offset + index)
|
|
71
|
+
return nil if pointer_word.zero?
|
|
72
|
+
|
|
73
|
+
PointerDecoder.decode(pointer_word)
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# Read an element as a struct
|
|
77
|
+
# @param index [Integer] Element index
|
|
78
|
+
# @return [StructReader] Struct reader
|
|
79
|
+
def read_struct(index)
|
|
80
|
+
raise ArgumentError, "Index out of bounds" if index >= @element_count
|
|
81
|
+
|
|
82
|
+
if @element_size == PointerDecoder::ELEMENT_SIZE_INLINE_COMPOSITE
|
|
83
|
+
read_inline_composite_struct(index)
|
|
84
|
+
else
|
|
85
|
+
raise "List elements are not structs"
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Read text (UTF-8 string)
|
|
90
|
+
# @return [String]
|
|
91
|
+
def read_text
|
|
92
|
+
raise "Not a text list" unless @element_size == PointerDecoder::ELEMENT_SIZE_BYTE
|
|
93
|
+
|
|
94
|
+
# Text is a byte list with null terminator
|
|
95
|
+
bytes = (0...@element_count).map { |i| read_byte(i, :uint8) }
|
|
96
|
+
# Remove null terminator
|
|
97
|
+
bytes.pop if bytes.last.zero?
|
|
98
|
+
bytes.pack("C*").force_encoding("UTF-8")
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# Read data (binary blob)
|
|
102
|
+
# @return [String]
|
|
103
|
+
def read_data
|
|
104
|
+
raise "Not a data list" unless @element_size == PointerDecoder::ELEMENT_SIZE_BYTE
|
|
105
|
+
|
|
106
|
+
bytes = (0...@element_count).map { |i| read_byte(i, :uint8) }
|
|
107
|
+
bytes.pack("C*")
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
private
|
|
111
|
+
|
|
112
|
+
def read_bit(index)
|
|
113
|
+
word_index = index / 64
|
|
114
|
+
bit_index = index % 64
|
|
115
|
+
word = @segment_reader.read_word(@segment_id,
|
|
116
|
+
@word_offset + word_index)
|
|
117
|
+
(word >> bit_index).allbits?(1)
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def read_byte(index, type)
|
|
121
|
+
word_index = index / 8
|
|
122
|
+
byte_index = index % 8
|
|
123
|
+
word = @segment_reader.read_word(@segment_id,
|
|
124
|
+
@word_offset + word_index)
|
|
125
|
+
|
|
126
|
+
value = (word >> (byte_index * 8)) & 0xFF
|
|
127
|
+
|
|
128
|
+
# Convert to signed if needed
|
|
129
|
+
if type == :int8
|
|
130
|
+
value >= 128 ? value - 256 : value
|
|
131
|
+
else
|
|
132
|
+
value
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def read_two_bytes(index, type)
|
|
137
|
+
word_index = index / 4
|
|
138
|
+
half_word_index = index % 4
|
|
139
|
+
word = @segment_reader.read_word(@segment_id,
|
|
140
|
+
@word_offset + word_index)
|
|
141
|
+
|
|
142
|
+
value = (word >> (half_word_index * 16)) & 0xFFFF
|
|
143
|
+
|
|
144
|
+
# Convert to signed if needed
|
|
145
|
+
if type == :int16
|
|
146
|
+
value >= 32768 ? value - 65536 : value
|
|
147
|
+
else
|
|
148
|
+
value
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
def read_four_bytes(index, type)
|
|
153
|
+
word_index = index / 2
|
|
154
|
+
dword_index = index % 2
|
|
155
|
+
word = @segment_reader.read_word(@segment_id,
|
|
156
|
+
@word_offset + word_index)
|
|
157
|
+
|
|
158
|
+
value = (word >> (dword_index * 32)) & 0xFFFFFFFF
|
|
159
|
+
|
|
160
|
+
case type
|
|
161
|
+
when :int32
|
|
162
|
+
value >= 2147483648 ? value - 4294967296 : value
|
|
163
|
+
when :float32
|
|
164
|
+
[value].pack("L").unpack1("f")
|
|
165
|
+
else
|
|
166
|
+
value
|
|
167
|
+
end
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
def read_eight_bytes(index, type)
|
|
171
|
+
word = @segment_reader.read_word(@segment_id, @word_offset + index)
|
|
172
|
+
|
|
173
|
+
case type
|
|
174
|
+
when :int64
|
|
175
|
+
word >= 9223372036854775808 ? word - 18446744073709551616 : word
|
|
176
|
+
when :float64
|
|
177
|
+
[word].pack("Q").unpack1("d")
|
|
178
|
+
else
|
|
179
|
+
word
|
|
180
|
+
end
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
def read_inline_composite_struct(index)
|
|
184
|
+
# For inline composite, first word is a tag describing struct size
|
|
185
|
+
tag_word = @segment_reader.read_word(@segment_id, @word_offset)
|
|
186
|
+
tag = PointerDecoder.decode(tag_word)
|
|
187
|
+
|
|
188
|
+
raise "Invalid inline composite tag" unless tag[:type] == :struct
|
|
189
|
+
|
|
190
|
+
data_words = tag[:data_words]
|
|
191
|
+
pointer_words = tag[:pointer_words]
|
|
192
|
+
struct_size = data_words + pointer_words
|
|
193
|
+
|
|
194
|
+
# Structs start after the tag
|
|
195
|
+
struct_offset = @word_offset + 1 + (index * struct_size)
|
|
196
|
+
|
|
197
|
+
StructReader.new(
|
|
198
|
+
@segment_reader,
|
|
199
|
+
@segment_id,
|
|
200
|
+
struct_offset,
|
|
201
|
+
data_words,
|
|
202
|
+
pointer_words,
|
|
203
|
+
)
|
|
204
|
+
end
|
|
205
|
+
end
|
|
206
|
+
end
|
|
207
|
+
end
|
|
208
|
+
end
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Unibuf
|
|
4
|
+
module Parsers
|
|
5
|
+
module Capnproto
|
|
6
|
+
# Decoder for Cap'n Proto pointer words
|
|
7
|
+
# Pointers are 64-bit words that encode type, offset, and size information
|
|
8
|
+
class PointerDecoder
|
|
9
|
+
# Pointer type constants (bits 0-1)
|
|
10
|
+
POINTER_TYPE_STRUCT = 0
|
|
11
|
+
POINTER_TYPE_LIST = 1
|
|
12
|
+
POINTER_TYPE_FAR = 2
|
|
13
|
+
POINTER_TYPE_OTHER = 3
|
|
14
|
+
|
|
15
|
+
# List element size constants (bits 32-34 for list pointers)
|
|
16
|
+
ELEMENT_SIZE_VOID = 0
|
|
17
|
+
ELEMENT_SIZE_BIT = 1
|
|
18
|
+
ELEMENT_SIZE_BYTE = 2
|
|
19
|
+
ELEMENT_SIZE_TWO_BYTES = 3
|
|
20
|
+
ELEMENT_SIZE_FOUR_BYTES = 4
|
|
21
|
+
ELEMENT_SIZE_EIGHT_BYTES = 5
|
|
22
|
+
ELEMENT_SIZE_POINTER = 6
|
|
23
|
+
ELEMENT_SIZE_INLINE_COMPOSITE = 7
|
|
24
|
+
|
|
25
|
+
# Decode a pointer word
|
|
26
|
+
# @param word [Integer] 64-bit pointer word
|
|
27
|
+
# @return [Hash] Decoded pointer information
|
|
28
|
+
def self.decode(word)
|
|
29
|
+
return null_pointer if word.zero?
|
|
30
|
+
|
|
31
|
+
pointer_type = word & 0x3
|
|
32
|
+
|
|
33
|
+
case pointer_type
|
|
34
|
+
when POINTER_TYPE_STRUCT
|
|
35
|
+
decode_struct_pointer(word)
|
|
36
|
+
when POINTER_TYPE_LIST
|
|
37
|
+
decode_list_pointer(word)
|
|
38
|
+
when POINTER_TYPE_FAR
|
|
39
|
+
decode_far_pointer(word)
|
|
40
|
+
when POINTER_TYPE_OTHER
|
|
41
|
+
decode_other_pointer(word)
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Check if pointer is null
|
|
46
|
+
# @param word [Integer] 64-bit pointer word
|
|
47
|
+
# @return [Boolean]
|
|
48
|
+
def self.null_pointer?(word)
|
|
49
|
+
word.zero?
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
private_class_method def self.null_pointer
|
|
53
|
+
{
|
|
54
|
+
type: :null,
|
|
55
|
+
null: true,
|
|
56
|
+
}
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Decode struct pointer
|
|
60
|
+
# Bits:
|
|
61
|
+
# 0-1: Type = 0 (struct)
|
|
62
|
+
# 2-31: Signed offset in words (30 bits)
|
|
63
|
+
# 32-47: Data section size in words (16 bits)
|
|
64
|
+
# 48-63: Pointer section size in words (16 bits)
|
|
65
|
+
private_class_method def self.decode_struct_pointer(word)
|
|
66
|
+
# Extract offset (bits 2-31, signed)
|
|
67
|
+
offset_raw = (word >> 2) & 0x3FFFFFFF
|
|
68
|
+
# Convert to signed 30-bit integer
|
|
69
|
+
offset = offset_raw >= 0x20000000 ? offset_raw - 0x40000000 : offset_raw
|
|
70
|
+
|
|
71
|
+
# Extract data section size (bits 32-47)
|
|
72
|
+
data_size = (word >> 32) & 0xFFFF
|
|
73
|
+
|
|
74
|
+
# Extract pointer section size (bits 48-63)
|
|
75
|
+
pointer_size = (word >> 48) & 0xFFFF
|
|
76
|
+
|
|
77
|
+
{
|
|
78
|
+
type: :struct,
|
|
79
|
+
offset: offset,
|
|
80
|
+
data_words: data_size,
|
|
81
|
+
pointer_words: pointer_size,
|
|
82
|
+
}
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# Decode list pointer
|
|
86
|
+
# Bits:
|
|
87
|
+
# 0-1: Type = 1 (list)
|
|
88
|
+
# 2-31: Signed offset in words (30 bits)
|
|
89
|
+
# 32-34: Element size (3 bits)
|
|
90
|
+
# 35-63: Element count (29 bits)
|
|
91
|
+
private_class_method def self.decode_list_pointer(word)
|
|
92
|
+
# Extract offset (bits 2-31, signed)
|
|
93
|
+
offset_raw = (word >> 2) & 0x3FFFFFFF
|
|
94
|
+
offset = offset_raw >= 0x20000000 ? offset_raw - 0x40000000 : offset_raw
|
|
95
|
+
|
|
96
|
+
# Extract element size (bits 32-34)
|
|
97
|
+
element_size = (word >> 32) & 0x7
|
|
98
|
+
|
|
99
|
+
# Extract element count (bits 35-63)
|
|
100
|
+
element_count = (word >> 35) & 0x1FFFFFFF
|
|
101
|
+
|
|
102
|
+
{
|
|
103
|
+
type: :list,
|
|
104
|
+
offset: offset,
|
|
105
|
+
element_size: element_size,
|
|
106
|
+
element_count: element_count,
|
|
107
|
+
element_size_name: element_size_name(element_size),
|
|
108
|
+
}
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
# Decode far pointer
|
|
112
|
+
# Bits:
|
|
113
|
+
# 0-1: Type = 2 (far)
|
|
114
|
+
# 2: Landing pad flag (0 = normal, 1 = double-far)
|
|
115
|
+
# 3-31: Offset in words within target segment (29 bits)
|
|
116
|
+
# 32-63: Target segment ID (32 bits)
|
|
117
|
+
private_class_method def self.decode_far_pointer(word)
|
|
118
|
+
# Extract landing pad flag (bit 2)
|
|
119
|
+
double_far = (word >> 2).allbits?(0x1)
|
|
120
|
+
|
|
121
|
+
# Extract offset (bits 3-31)
|
|
122
|
+
offset = (word >> 3) & 0x1FFFFFFF
|
|
123
|
+
|
|
124
|
+
# Extract segment ID (bits 32-63)
|
|
125
|
+
segment_id = (word >> 32) & 0xFFFFFFFF
|
|
126
|
+
|
|
127
|
+
{
|
|
128
|
+
type: :far,
|
|
129
|
+
offset: offset,
|
|
130
|
+
segment_id: segment_id,
|
|
131
|
+
double_far: double_far,
|
|
132
|
+
}
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# Decode other pointer (capability)
|
|
136
|
+
private_class_method def self.decode_other_pointer(word)
|
|
137
|
+
# Extract capability index (bits 32-63)
|
|
138
|
+
capability_index = (word >> 32) & 0xFFFFFFFF
|
|
139
|
+
|
|
140
|
+
{
|
|
141
|
+
type: :capability,
|
|
142
|
+
index: capability_index,
|
|
143
|
+
}
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
# Get element size name
|
|
147
|
+
private_class_method def self.element_size_name(size)
|
|
148
|
+
case size
|
|
149
|
+
when ELEMENT_SIZE_VOID then :void
|
|
150
|
+
when ELEMENT_SIZE_BIT then :bit
|
|
151
|
+
when ELEMENT_SIZE_BYTE then :byte
|
|
152
|
+
when ELEMENT_SIZE_TWO_BYTES then :two_bytes
|
|
153
|
+
when ELEMENT_SIZE_FOUR_BYTES then :four_bytes
|
|
154
|
+
when ELEMENT_SIZE_EIGHT_BYTES then :eight_bytes
|
|
155
|
+
when ELEMENT_SIZE_POINTER then :pointer
|
|
156
|
+
when ELEMENT_SIZE_INLINE_COMPOSITE then :inline_composite
|
|
157
|
+
else :unknown
|
|
158
|
+
end
|
|
159
|
+
end
|
|
160
|
+
end
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
end
|