unibuf 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +178 -330
- data/CODE_OF_CONDUCT.md +132 -0
- data/README.adoc +443 -254
- data/docs/CAPNPROTO.adoc +436 -0
- data/docs/FLATBUFFERS.adoc +430 -0
- data/docs/PROTOBUF.adoc +515 -0
- data/docs/TXTPROTO.adoc +369 -0
- data/lib/unibuf/commands/convert.rb +60 -2
- data/lib/unibuf/commands/schema.rb +68 -11
- data/lib/unibuf/errors.rb +23 -26
- data/lib/unibuf/models/capnproto/enum_definition.rb +72 -0
- data/lib/unibuf/models/capnproto/field_definition.rb +81 -0
- data/lib/unibuf/models/capnproto/interface_definition.rb +70 -0
- data/lib/unibuf/models/capnproto/method_definition.rb +81 -0
- data/lib/unibuf/models/capnproto/schema.rb +84 -0
- data/lib/unibuf/models/capnproto/struct_definition.rb +96 -0
- data/lib/unibuf/models/capnproto/union_definition.rb +62 -0
- data/lib/unibuf/models/flatbuffers/enum_definition.rb +69 -0
- data/lib/unibuf/models/flatbuffers/field_definition.rb +88 -0
- data/lib/unibuf/models/flatbuffers/schema.rb +102 -0
- data/lib/unibuf/models/flatbuffers/struct_definition.rb +70 -0
- data/lib/unibuf/models/flatbuffers/table_definition.rb +73 -0
- data/lib/unibuf/models/flatbuffers/union_definition.rb +60 -0
- data/lib/unibuf/models/message.rb +10 -0
- data/lib/unibuf/models/values/scalar_value.rb +2 -2
- data/lib/unibuf/parsers/binary/wire_format_parser.rb +199 -19
- data/lib/unibuf/parsers/capnproto/binary_parser.rb +267 -0
- data/lib/unibuf/parsers/capnproto/grammar.rb +272 -0
- data/lib/unibuf/parsers/capnproto/list_reader.rb +208 -0
- data/lib/unibuf/parsers/capnproto/pointer_decoder.rb +163 -0
- data/lib/unibuf/parsers/capnproto/processor.rb +348 -0
- data/lib/unibuf/parsers/capnproto/segment_reader.rb +131 -0
- data/lib/unibuf/parsers/capnproto/struct_reader.rb +199 -0
- data/lib/unibuf/parsers/flatbuffers/binary_parser.rb +325 -0
- data/lib/unibuf/parsers/flatbuffers/grammar.rb +235 -0
- data/lib/unibuf/parsers/flatbuffers/processor.rb +299 -0
- data/lib/unibuf/parsers/textproto/grammar.rb +1 -1
- data/lib/unibuf/parsers/textproto/processor.rb +10 -0
- data/lib/unibuf/serializers/binary_serializer.rb +218 -0
- data/lib/unibuf/serializers/capnproto/binary_serializer.rb +402 -0
- data/lib/unibuf/serializers/capnproto/list_writer.rb +199 -0
- data/lib/unibuf/serializers/capnproto/pointer_encoder.rb +118 -0
- data/lib/unibuf/serializers/capnproto/segment_builder.rb +124 -0
- data/lib/unibuf/serializers/capnproto/struct_writer.rb +139 -0
- data/lib/unibuf/serializers/flatbuffers/binary_serializer.rb +167 -0
- data/lib/unibuf/validators/type_validator.rb +1 -1
- data/lib/unibuf/version.rb +1 -1
- data/lib/unibuf.rb +27 -0
- metadata +36 -1
data/docs/CAPNPROTO.adoc
ADDED
|
@@ -0,0 +1,436 @@
|
|
|
1
|
+
= Cap'n Proto Support in Unibuf
|
|
2
|
+
|
|
3
|
+
:toc:
|
|
4
|
+
:toclevels: 3
|
|
5
|
+
|
|
6
|
+
== Purpose
|
|
7
|
+
|
|
8
|
+
Unibuf provides complete support for Cap'n Proto, a high-performance data interchange format designed for zero-copy serialization.
|
|
9
|
+
|
|
10
|
+
Features:
|
|
11
|
+
|
|
12
|
+
* Parse Cap'n Proto schema files (`.capnp`)
|
|
13
|
+
* Parse Cap'n Proto binary format
|
|
14
|
+
* Serialize to Cap'n Proto binary format
|
|
15
|
+
* Schema-driven validation
|
|
16
|
+
* Support for all Cap'n Proto constructs
|
|
17
|
+
* Round-trip serialization
|
|
18
|
+
|
|
19
|
+
== Cap'n Proto Overview
|
|
20
|
+
|
|
21
|
+
Cap'n Proto is designed for:
|
|
22
|
+
|
|
23
|
+
Zero-copy serialization::
|
|
24
|
+
Data can be used directly from serialized form without parsing
|
|
25
|
+
|
|
26
|
+
Extremely fast::
|
|
27
|
+
No encoding/decoding step required
|
|
28
|
+
|
|
29
|
+
Schema evolution::
|
|
30
|
+
Backward and forward compatibility
|
|
31
|
+
|
|
32
|
+
RPC support::
|
|
33
|
+
Built-in RPC interface definitions
|
|
34
|
+
|
|
35
|
+
Word-aligned::
|
|
36
|
+
All data aligned to 8-byte boundaries
|
|
37
|
+
|
|
38
|
+
== Schema Parsing
|
|
39
|
+
|
|
40
|
+
=== General
|
|
41
|
+
|
|
42
|
+
Parse Cap'n Proto schema files to extract struct, enum, and interface definitions.
|
|
43
|
+
|
|
44
|
+
=== Parsing a schema file
|
|
45
|
+
|
|
46
|
+
[source,ruby]
|
|
47
|
+
----
|
|
48
|
+
require "unibuf"
|
|
49
|
+
|
|
50
|
+
# Parse Cap'n Proto schema
|
|
51
|
+
schema = Unibuf.parse_capnproto_schema("addressbook.capnp") # <1>
|
|
52
|
+
|
|
53
|
+
# Access schema information
|
|
54
|
+
puts "File ID: #{schema.file_id}" # <2>
|
|
55
|
+
puts "Structs: #{schema.struct_names.join(', ')}" # <3>
|
|
56
|
+
puts "Interfaces: #{schema.interface_names.join(', ')}" # <4>
|
|
57
|
+
----
|
|
58
|
+
<1> Parse `.capnp` schema file
|
|
59
|
+
<2> Get file ID
|
|
60
|
+
<3> List all struct names
|
|
61
|
+
<4> List all interface names
|
|
62
|
+
|
|
63
|
+
=== Schema structure
|
|
64
|
+
|
|
65
|
+
.Example Cap'n Proto schema
|
|
66
|
+
[source,capnp]
|
|
67
|
+
----
|
|
68
|
+
@0x9eb32e19f86ee174; # File ID
|
|
69
|
+
|
|
70
|
+
struct Person {
|
|
71
|
+
id @0 :UInt32; # Field with ordinal
|
|
72
|
+
name @1 :Text; # Text is a pointer type
|
|
73
|
+
email @2 :Text;
|
|
74
|
+
phones @3 :List(PhoneNumber); # Generic list type
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
enum Gender {
|
|
78
|
+
male @0;
|
|
79
|
+
female @1;
|
|
80
|
+
other @2;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
interface Calculator {
|
|
84
|
+
add @0 (a :Int32, b :Int32) -> (result :Int32); # RPC method
|
|
85
|
+
}
|
|
86
|
+
----
|
|
87
|
+
|
|
88
|
+
=== Accessing structs
|
|
89
|
+
|
|
90
|
+
[source,ruby]
|
|
91
|
+
----
|
|
92
|
+
# Find struct by name
|
|
93
|
+
person = schema.find_struct("Person") # <1>
|
|
94
|
+
|
|
95
|
+
# Access fields
|
|
96
|
+
person.fields.each do |field|
|
|
97
|
+
puts "#{field.name} @#{field.ordinal} :#{field.type}" # <2>
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
# Check field properties
|
|
101
|
+
id_field = person.find_field("id") # <3>
|
|
102
|
+
puts "Primitive? #{id_field.primitive_type?}" # <4>
|
|
103
|
+
puts "List? #{id_field.list_type?}" # <5>
|
|
104
|
+
----
|
|
105
|
+
<1> Find struct definition
|
|
106
|
+
<2> Print field with ordinal and type
|
|
107
|
+
<3> Find specific field
|
|
108
|
+
<4> Check if primitive type
|
|
109
|
+
<5> Check if list type
|
|
110
|
+
|
|
111
|
+
=== Accessing enums
|
|
112
|
+
|
|
113
|
+
[source,ruby]
|
|
114
|
+
----
|
|
115
|
+
# Find enum by name
|
|
116
|
+
gender = schema.find_enum("Gender") # <1>
|
|
117
|
+
|
|
118
|
+
# Access values
|
|
119
|
+
gender.values.each do |name, ordinal|
|
|
120
|
+
puts "#{name} = #{ordinal}" # <2>
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# Query values
|
|
124
|
+
ordinal = gender.find_value("male") # <3>
|
|
125
|
+
name = gender.find_name_by_ordinal(0) # <4>
|
|
126
|
+
----
|
|
127
|
+
<1> Find enum definition
|
|
128
|
+
<2> Iterate through values
|
|
129
|
+
<3> Get ordinal by name
|
|
130
|
+
<4> Get name by ordinal
|
|
131
|
+
|
|
132
|
+
=== Accessing interfaces
|
|
133
|
+
|
|
134
|
+
[source,ruby]
|
|
135
|
+
----
|
|
136
|
+
# Find interface by name
|
|
137
|
+
calc = schema.find_interface("Calculator") # <1>
|
|
138
|
+
|
|
139
|
+
# Access methods
|
|
140
|
+
calc.methods.each do |method|
|
|
141
|
+
puts "#{method.name} @#{method.ordinal}" # <2>
|
|
142
|
+
puts " Params: #{method.param_names.join(', ')}" # <3>
|
|
143
|
+
puts " Results: #{method.result_names.join(', ')}" # <4>
|
|
144
|
+
end
|
|
145
|
+
----
|
|
146
|
+
<1> Find interface definition
|
|
147
|
+
<2> Print method with ordinal
|
|
148
|
+
<3> List parameter names
|
|
149
|
+
<4> List result names
|
|
150
|
+
|
|
151
|
+
== Binary Format
|
|
152
|
+
|
|
153
|
+
=== General
|
|
154
|
+
|
|
155
|
+
Cap'n Proto binary format uses segments, pointers, and word alignment for efficient zero-copy access.
|
|
156
|
+
|
|
157
|
+
=== Parsing binary data
|
|
158
|
+
|
|
159
|
+
[source,ruby]
|
|
160
|
+
----
|
|
161
|
+
require "unibuf"
|
|
162
|
+
|
|
163
|
+
# 1. Load schema
|
|
164
|
+
schema = Unibuf.parse_capnproto_schema("addressbook.capnp") # <1>
|
|
165
|
+
|
|
166
|
+
# 2. Create parser
|
|
167
|
+
parser = Unibuf::Parsers::Capnproto::BinaryParser.new(schema) # <2>
|
|
168
|
+
|
|
169
|
+
# 3. Parse binary data
|
|
170
|
+
data = parser.parse(binary_data, root_type: "Person") # <3>
|
|
171
|
+
|
|
172
|
+
# 4. Access fields
|
|
173
|
+
puts data[:id] # => 1 # <4>
|
|
174
|
+
puts data[:name] # => "Alice" # <5>
|
|
175
|
+
puts data[:email] # => "alice@example.com" # <6>
|
|
176
|
+
----
|
|
177
|
+
<1> Parse schema file
|
|
178
|
+
<2> Create binary parser
|
|
179
|
+
<3> Parse with root type
|
|
180
|
+
<4> Access numeric field
|
|
181
|
+
<5> Access text field
|
|
182
|
+
<6> Access another text field
|
|
183
|
+
|
|
184
|
+
=== Serializing to binary
|
|
185
|
+
|
|
186
|
+
[source,ruby]
|
|
187
|
+
----
|
|
188
|
+
# Create serializer
|
|
189
|
+
serializer = Unibuf::Serializers::Capnproto::BinarySerializer.new(schema) # <1>
|
|
190
|
+
|
|
191
|
+
# Prepare data
|
|
192
|
+
data = {
|
|
193
|
+
id: 1,
|
|
194
|
+
name: "Alice",
|
|
195
|
+
email: "alice@example.com",
|
|
196
|
+
phones: []
|
|
197
|
+
} # <2>
|
|
198
|
+
|
|
199
|
+
# Serialize
|
|
200
|
+
binary = serializer.serialize(data, root_type: "Person") # <3>
|
|
201
|
+
|
|
202
|
+
# Write to file
|
|
203
|
+
File.binwrite("person.capnp.bin", binary) # <4>
|
|
204
|
+
----
|
|
205
|
+
<1> Create serializer with schema
|
|
206
|
+
<2> Prepare data as hash
|
|
207
|
+
<3> Serialize with root type
|
|
208
|
+
<4> Write binary output
|
|
209
|
+
|
|
210
|
+
=== Round-trip serialization
|
|
211
|
+
|
|
212
|
+
[source,ruby]
|
|
213
|
+
----
|
|
214
|
+
# Parse original
|
|
215
|
+
original = parser.parse(binary_data, root_type: "Person") # <1>
|
|
216
|
+
|
|
217
|
+
# Serialize
|
|
218
|
+
output_binary = serializer.serialize(original, root_type: "Person") # <2>
|
|
219
|
+
|
|
220
|
+
# Parse again
|
|
221
|
+
reparsed = parser.parse(output_binary, root_type: "Person") # <3>
|
|
222
|
+
|
|
223
|
+
# Verify equivalence
|
|
224
|
+
puts original == reparsed # => true # <4>
|
|
225
|
+
----
|
|
226
|
+
<1> Parse original binary
|
|
227
|
+
<2> Serialize to binary
|
|
228
|
+
<3> Parse serialized output
|
|
229
|
+
<4> Verify semantic equivalence
|
|
230
|
+
|
|
231
|
+
== Binary Format Details
|
|
232
|
+
|
|
233
|
+
=== Segment structure
|
|
234
|
+
|
|
235
|
+
Cap'n Proto organizes data into segments:
|
|
236
|
+
|
|
237
|
+
Segment table::
|
|
238
|
+
Header containing segment count and sizes
|
|
239
|
+
|
|
240
|
+
Segments::
|
|
241
|
+
Contiguous memory blocks containing structs and lists
|
|
242
|
+
|
|
243
|
+
Word alignment::
|
|
244
|
+
All data aligned to 8-byte (64-bit) boundaries
|
|
245
|
+
|
|
246
|
+
=== Pointer encoding
|
|
247
|
+
|
|
248
|
+
Pointers are 64-bit words encoding:
|
|
249
|
+
|
|
250
|
+
Struct pointers (type 0)::
|
|
251
|
+
Offset, data word count, pointer word count
|
|
252
|
+
|
|
253
|
+
List pointers (type 1)::
|
|
254
|
+
Offset, element size, element count
|
|
255
|
+
|
|
256
|
+
Far pointers (type 2)::
|
|
257
|
+
Cross-segment references
|
|
258
|
+
|
|
259
|
+
Capability pointers (type 3)::
|
|
260
|
+
RPC capability references
|
|
261
|
+
|
|
262
|
+
=== Struct layout
|
|
263
|
+
|
|
264
|
+
Structs have two sections:
|
|
265
|
+
|
|
266
|
+
Data section::
|
|
267
|
+
Inline primitive values (bool, integers, floats)
|
|
268
|
+
|
|
269
|
+
Pointer section::
|
|
270
|
+
References to other structs, lists, text, data
|
|
271
|
+
|
|
272
|
+
== Architecture
|
|
273
|
+
|
|
274
|
+
=== Parser components
|
|
275
|
+
|
|
276
|
+
SegmentReader (`lib/unibuf/parsers/capnproto/segment_reader.rb`)::
|
|
277
|
+
Reads segments from binary data with word-aligned access
|
|
278
|
+
|
|
279
|
+
PointerDecoder (`lib/unibuf/parsers/capnproto/pointer_decoder.rb`)::
|
|
280
|
+
Decodes 64-bit pointer words into type, offset, and size information
|
|
281
|
+
|
|
282
|
+
StructReader (`lib/unibuf/parsers/capnproto/struct_reader.rb`)::
|
|
283
|
+
Reads struct data section (primitives) and pointer section (references)
|
|
284
|
+
|
|
285
|
+
ListReader (`lib/unibuf/parsers/capnproto/list_reader.rb`)::
|
|
286
|
+
Reads lists with various element sizes (bit, byte, pointer, composite)
|
|
287
|
+
|
|
288
|
+
BinaryParser (`lib/unibuf/parsers/capnproto/binary_parser.rb`)::
|
|
289
|
+
Coordinates all components for schema-driven parsing
|
|
290
|
+
|
|
291
|
+
=== Serializer components
|
|
292
|
+
|
|
293
|
+
SegmentBuilder (`lib/unibuf/serializers/capnproto/segment_builder.rb`)::
|
|
294
|
+
Allocates and builds segments with word alignment
|
|
295
|
+
|
|
296
|
+
PointerEncoder (`lib/unibuf/serializers/capnproto/pointer_encoder.rb`)::
|
|
297
|
+
Encodes pointer information into 64-bit words
|
|
298
|
+
|
|
299
|
+
StructWriter (`lib/unibuf/serializers/capnproto/struct_writer.rb`)::
|
|
300
|
+
Writes struct data section and pointer section
|
|
301
|
+
|
|
302
|
+
ListWriter (`lib/unibuf/serializers/capnproto/list_writer.rb`)::
|
|
303
|
+
Writes lists with various element sizes
|
|
304
|
+
|
|
305
|
+
BinarySerializer (`lib/unibuf/serializers/capnproto/binary_serializer.rb`)::
|
|
306
|
+
Coordinates all components for schema-driven serialization
|
|
307
|
+
|
|
308
|
+
=== Model classes
|
|
309
|
+
|
|
310
|
+
Schema (`lib/unibuf/models/capnproto/schema.rb`)::
|
|
311
|
+
Root schema with file ID, structs, enums, interfaces
|
|
312
|
+
|
|
313
|
+
StructDefinition (`lib/unibuf/models/capnproto/struct_definition.rb`)::
|
|
314
|
+
Struct type with fields, unions, nested types
|
|
315
|
+
|
|
316
|
+
FieldDefinition (`lib/unibuf/models/capnproto/field_definition.rb`)::
|
|
317
|
+
Field specification with ordinal, type, default value
|
|
318
|
+
|
|
319
|
+
EnumDefinition (`lib/unibuf/models/capnproto/enum_definition.rb`)::
|
|
320
|
+
Enum type with values and ordinals
|
|
321
|
+
|
|
322
|
+
InterfaceDefinition (`lib/unibuf/models/capnproto/interface_definition.rb`)::
|
|
323
|
+
RPC interface with methods
|
|
324
|
+
|
|
325
|
+
MethodDefinition (`lib/unibuf/models/capnproto/method_definition.rb`)::
|
|
326
|
+
RPC method with parameters and results
|
|
327
|
+
|
|
328
|
+
UnionDefinition (`lib/unibuf/models/capnproto/union_definition.rb`)::
|
|
329
|
+
Discriminated union within struct
|
|
330
|
+
|
|
331
|
+
== Command-Line Usage
|
|
332
|
+
|
|
333
|
+
=== Schema command
|
|
334
|
+
|
|
335
|
+
[source,shell]
|
|
336
|
+
----
|
|
337
|
+
# Parse and display Cap'n Proto schema
|
|
338
|
+
unibuf schema addressbook.capnp # <1>
|
|
339
|
+
|
|
340
|
+
# Output as JSON
|
|
341
|
+
unibuf schema addressbook.capnp --format json # <2>
|
|
342
|
+
|
|
343
|
+
# Output as YAML
|
|
344
|
+
unibuf schema addressbook.capnp --format yaml # <3>
|
|
345
|
+
----
|
|
346
|
+
<1> Display schema structure
|
|
347
|
+
<2> JSON output format
|
|
348
|
+
<3> YAML output format
|
|
349
|
+
|
|
350
|
+
== Testing
|
|
351
|
+
|
|
352
|
+
=== Test coverage
|
|
353
|
+
|
|
354
|
+
Cap'n Proto implementation includes:
|
|
355
|
+
|
|
356
|
+
Grammar tests (43 tests)::
|
|
357
|
+
All schema constructs, primitives, generics, annotations
|
|
358
|
+
|
|
359
|
+
Integration tests (9 tests)::
|
|
360
|
+
Real schema files, nested types, complex structures
|
|
361
|
+
|
|
362
|
+
Binary parser tests (14 tests)::
|
|
363
|
+
Segment reading, pointer decoding, struct/list reading
|
|
364
|
+
|
|
365
|
+
Binary serializer tests (15 tests)::
|
|
366
|
+
Segment building, pointer encoding, round-trip verification
|
|
367
|
+
|
|
368
|
+
**Total: 81 tests, 100% passing**
|
|
369
|
+
|
|
370
|
+
=== Running tests
|
|
371
|
+
|
|
372
|
+
[source,shell]
|
|
373
|
+
----
|
|
374
|
+
# Run all Cap'n Proto tests
|
|
375
|
+
bundle exec rspec spec/unibuf/parsers/capnproto/ spec/unibuf/serializers/capnproto/
|
|
376
|
+
|
|
377
|
+
# Run specific test suite
|
|
378
|
+
bundle exec rspec spec/unibuf/parsers/capnproto/grammar_spec.rb
|
|
379
|
+
----
|
|
380
|
+
|
|
381
|
+
== Implementation Notes
|
|
382
|
+
|
|
383
|
+
=== Design decisions
|
|
384
|
+
|
|
385
|
+
Manual pointer management::
|
|
386
|
+
Cap'n Proto's dynamic pointer-based format requires runtime offset calculation, making manual implementation more appropriate than declarative approaches
|
|
387
|
+
|
|
388
|
+
Symmetric architecture::
|
|
389
|
+
Reader and writer components mirror each other for consistency
|
|
390
|
+
|
|
391
|
+
Word-aligned access::
|
|
392
|
+
All operations respect 8-byte word boundaries per Cap'n Proto specification
|
|
393
|
+
|
|
394
|
+
=== Limitations
|
|
395
|
+
|
|
396
|
+
The current implementation supports:
|
|
397
|
+
|
|
398
|
+
✅ Single-segment binaries (most common case)
|
|
399
|
+
✅ All primitive types
|
|
400
|
+
✅ Text and Data types
|
|
401
|
+
✅ Lists of primitives
|
|
402
|
+
✅ Nested structs
|
|
403
|
+
✅ Enums
|
|
404
|
+
|
|
405
|
+
Future enhancements may add:
|
|
406
|
+
- Multi-segment binary support
|
|
407
|
+
- Far pointer optimization
|
|
408
|
+
- Packed encoding
|
|
409
|
+
- RPC runtime support
|
|
410
|
+
|
|
411
|
+
== References
|
|
412
|
+
|
|
413
|
+
Cap'n Proto official documentation::
|
|
414
|
+
https://capnproto.org/
|
|
415
|
+
|
|
416
|
+
Language specification::
|
|
417
|
+
https://capnproto.org/language.html
|
|
418
|
+
|
|
419
|
+
Encoding specification::
|
|
420
|
+
https://capnproto.org/encoding.html
|
|
421
|
+
|
|
422
|
+
Example schemas::
|
|
423
|
+
https://github.com/capnproto/capnproto/tree/master/c%2B%2B/samples
|
|
424
|
+
|
|
425
|
+
== Support
|
|
426
|
+
|
|
427
|
+
For issues, questions, or contributions related to Cap'n Proto support:
|
|
428
|
+
|
|
429
|
+
* GitHub Issues: https://github.com/lutaml/unibuf/issues
|
|
430
|
+
* Documentation: https://github.com/lutaml/unibuf/tree/main/docs
|
|
431
|
+
|
|
432
|
+
== Copyright and License
|
|
433
|
+
|
|
434
|
+
Copyright https://www.ribose.com[Ribose Inc.]
|
|
435
|
+
|
|
436
|
+
Licensed under the 3-clause BSD License.
|