unibuf 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +170 -200
  3. data/CODE_OF_CONDUCT.md +132 -0
  4. data/README.adoc +306 -114
  5. data/docs/CAPNPROTO.adoc +436 -0
  6. data/docs/FLATBUFFERS.adoc +430 -0
  7. data/docs/PROTOBUF.adoc +515 -0
  8. data/docs/TXTPROTO.adoc +369 -0
  9. data/lib/unibuf/commands/convert.rb +60 -2
  10. data/lib/unibuf/commands/schema.rb +68 -11
  11. data/lib/unibuf/errors.rb +23 -26
  12. data/lib/unibuf/models/capnproto/enum_definition.rb +72 -0
  13. data/lib/unibuf/models/capnproto/field_definition.rb +81 -0
  14. data/lib/unibuf/models/capnproto/interface_definition.rb +70 -0
  15. data/lib/unibuf/models/capnproto/method_definition.rb +81 -0
  16. data/lib/unibuf/models/capnproto/schema.rb +84 -0
  17. data/lib/unibuf/models/capnproto/struct_definition.rb +96 -0
  18. data/lib/unibuf/models/capnproto/union_definition.rb +62 -0
  19. data/lib/unibuf/models/flatbuffers/enum_definition.rb +69 -0
  20. data/lib/unibuf/models/flatbuffers/field_definition.rb +88 -0
  21. data/lib/unibuf/models/flatbuffers/schema.rb +102 -0
  22. data/lib/unibuf/models/flatbuffers/struct_definition.rb +70 -0
  23. data/lib/unibuf/models/flatbuffers/table_definition.rb +73 -0
  24. data/lib/unibuf/models/flatbuffers/union_definition.rb +60 -0
  25. data/lib/unibuf/models/message.rb +10 -0
  26. data/lib/unibuf/parsers/capnproto/binary_parser.rb +267 -0
  27. data/lib/unibuf/parsers/capnproto/grammar.rb +272 -0
  28. data/lib/unibuf/parsers/capnproto/list_reader.rb +208 -0
  29. data/lib/unibuf/parsers/capnproto/pointer_decoder.rb +163 -0
  30. data/lib/unibuf/parsers/capnproto/processor.rb +348 -0
  31. data/lib/unibuf/parsers/capnproto/segment_reader.rb +131 -0
  32. data/lib/unibuf/parsers/capnproto/struct_reader.rb +199 -0
  33. data/lib/unibuf/parsers/flatbuffers/binary_parser.rb +325 -0
  34. data/lib/unibuf/parsers/flatbuffers/grammar.rb +235 -0
  35. data/lib/unibuf/parsers/flatbuffers/processor.rb +299 -0
  36. data/lib/unibuf/serializers/binary_serializer.rb +218 -0
  37. data/lib/unibuf/serializers/capnproto/binary_serializer.rb +402 -0
  38. data/lib/unibuf/serializers/capnproto/list_writer.rb +199 -0
  39. data/lib/unibuf/serializers/capnproto/pointer_encoder.rb +118 -0
  40. data/lib/unibuf/serializers/capnproto/segment_builder.rb +124 -0
  41. data/lib/unibuf/serializers/capnproto/struct_writer.rb +139 -0
  42. data/lib/unibuf/serializers/flatbuffers/binary_serializer.rb +167 -0
  43. data/lib/unibuf/version.rb +1 -1
  44. data/lib/unibuf.rb +27 -0
  45. metadata +36 -1
@@ -0,0 +1,325 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "stringio"
4
+
5
+ module Unibuf
6
+ module Parsers
7
+ module Flatbuffers
8
+ # FlatBuffers binary format parser
9
+ # Reference: https://flatbuffers.dev/md__internals.html
10
+ #
11
+ # FlatBuffers uses offset-based format with vtables for efficient access
12
+ # - Root object offset at beginning of file
13
+ # - Tables have vtables for field lookup
14
+ # - Structs are inline (no vtable)
15
+ # - Strings and vectors are length-prefixed
16
+ class BinaryParser
17
+ attr_reader :schema
18
+
19
+ def initialize(schema)
20
+ @schema = schema
21
+ end
22
+
23
+ # Parse FlatBuffers binary data
24
+ # @param binary_data [String] Binary FlatBuffers data
25
+ # @return [Hash] Parsed FlatBuffer object
26
+ def parse(binary_data)
27
+ raise ArgumentError, "Binary data cannot be nil" if binary_data.nil?
28
+
29
+ if binary_data.empty?
30
+ raise ArgumentError, "Binary data cannot be empty"
31
+ end
32
+
33
+ @io = StringIO.new(binary_data)
34
+ @io.set_encoding(Encoding::BINARY)
35
+
36
+ # Read root object offset (first 4 bytes)
37
+ root_offset = read_uint32(0)
38
+
39
+ # Read root table
40
+ root_table_def = schema.find_table(schema.root_type)
41
+ unless root_table_def
42
+ raise ParseError,
43
+ "Root type '#{schema.root_type}' not found in schema"
44
+ end
45
+
46
+ read_table(root_offset, root_table_def)
47
+ end
48
+
49
+ def parse_file(path)
50
+ parse(File.binread(path))
51
+ end
52
+
53
+ private
54
+
55
+ # Read a table at the given offset
56
+ def read_table(offset, table_def)
57
+ return nil if offset.zero?
58
+
59
+ # Read vtable offset (stored as soffset at table location)
60
+ vtable_offset = read_soffset32(offset)
61
+ vtable_pos = offset - vtable_offset
62
+
63
+ # Read vtable
64
+ vtable = read_vtable(vtable_pos)
65
+
66
+ # Build object from table fields
67
+ result = {}
68
+
69
+ table_def.fields.each_with_index do |field_def, index|
70
+ # Field index in vtable
71
+ next if index >= vtable[:field_offsets].size
72
+
73
+ field_offset = vtable[:field_offsets][index]
74
+ next if field_offset.zero? # Field not present
75
+
76
+ # Calculate absolute position
77
+ field_pos = offset + field_offset
78
+
79
+ # Read field value based on type
80
+ value = read_field_value(field_pos, field_def)
81
+ result[field_def.name] = value unless value.nil?
82
+ end
83
+
84
+ result
85
+ end
86
+
87
+ # Read vtable structure
88
+ def read_vtable(pos)
89
+ vtable_size = read_uint16(pos)
90
+ object_size = read_uint16(pos + 2)
91
+
92
+ # Read field offsets (2 bytes each)
93
+ field_count = (vtable_size - 4) / 2
94
+ field_offsets = []
95
+
96
+ field_count.times do |i|
97
+ offset = read_uint16(pos + 4 + (i * 2))
98
+ field_offsets << offset
99
+ end
100
+
101
+ {
102
+ vtable_size: vtable_size,
103
+ object_size: object_size,
104
+ field_offsets: field_offsets,
105
+ }
106
+ end
107
+
108
+ # Read field value based on type
109
+ def read_field_value(pos, field_def)
110
+ if field_def.vector?
111
+ read_vector(pos, field_def)
112
+ elsif field_def.scalar?
113
+ read_scalar(pos, field_def.type)
114
+ elsif field_def.type == "string"
115
+ read_string(pos)
116
+ else
117
+ # User type (table, struct, enum)
118
+ read_user_type(pos, field_def)
119
+ end
120
+ end
121
+
122
+ # Read a vector at the given position
123
+ def read_vector(pos, field_def)
124
+ # Vectors are stored as offset to vector data
125
+ vector_offset = read_uoffset32(pos)
126
+ vector_pos = pos + vector_offset
127
+
128
+ # Read vector length
129
+ length = read_uint32(vector_pos)
130
+ vector_data_pos = vector_pos + 4
131
+
132
+ element_type = field_def.vector_element_type
133
+
134
+ # Read vector elements
135
+ elements = []
136
+ length.times do |i|
137
+ element_pos = vector_data_pos + (i * element_size(element_type))
138
+ elements << read_element(element_pos, element_type)
139
+ end
140
+
141
+ elements
142
+ end
143
+
144
+ # Read a single element
145
+ def read_element(pos, element_type)
146
+ case element_type
147
+ when "byte", "ubyte", "short", "ushort", "int", "uint",
148
+ "long", "ulong", "float", "double", "bool"
149
+ read_scalar(pos, element_type)
150
+ when "string"
151
+ # String in vector is offset
152
+ offset = read_uoffset32(pos)
153
+ read_string(pos + offset)
154
+ else
155
+ # User type
156
+ field_def = Models::Flatbuffers::FieldDefinition.new(
157
+ name: "element",
158
+ type: element_type,
159
+ )
160
+ read_user_type(pos, field_def)
161
+ end
162
+ end
163
+
164
+ # Get size of element type
165
+ def element_size(type)
166
+ case type
167
+ when "byte", "ubyte", "bool" then 1
168
+ when "short", "ushort" then 2
169
+ when "int", "uint", "float" then 4
170
+ when "long", "ulong", "double" then 8
171
+ when "string" then 4 # offset
172
+ else 4 # default to offset size
173
+ end
174
+ end
175
+
176
+ # Read scalar value
177
+ def read_scalar(pos, type)
178
+ case type
179
+ when "byte" then read_int8(pos)
180
+ when "ubyte" then read_uint8(pos)
181
+ when "short" then read_int16(pos)
182
+ when "ushort" then read_uint16(pos)
183
+ when "int" then read_int32(pos)
184
+ when "uint" then read_uint32(pos)
185
+ when "long" then read_int64(pos)
186
+ when "ulong" then read_uint64(pos)
187
+ when "float" then read_float(pos)
188
+ when "double" then read_double(pos)
189
+ when "bool" then read_uint8(pos) != 0
190
+ else
191
+ raise ParseError, "Unknown scalar type: #{type}"
192
+ end
193
+ end
194
+
195
+ # Read user-defined type (table, struct, enum)
196
+ def read_user_type(pos, field_def)
197
+ type_def = schema.find_type(field_def.type)
198
+
199
+ case type_def
200
+ when Models::Flatbuffers::TableDefinition
201
+ # Table: read via offset
202
+ offset = read_uoffset32(pos)
203
+ read_table(pos + offset, type_def)
204
+ when Models::Flatbuffers::StructDefinition
205
+ # Struct: read inline
206
+ read_struct(pos, type_def)
207
+ when Models::Flatbuffers::EnumDefinition
208
+ # Enum: read as integer
209
+ value = read_scalar(pos, type_def.type)
210
+ type_def.find_name_by_value(value) || value
211
+ else
212
+ raise ParseError, "Unknown type: #{field_def.type}"
213
+ end
214
+ end
215
+
216
+ # Read struct (inline, fixed-size)
217
+ def read_struct(pos, struct_def)
218
+ result = {}
219
+ current_pos = pos
220
+
221
+ struct_def.fields.each do |field_def|
222
+ value = if field_def.scalar?
223
+ read_scalar(current_pos, field_def.type)
224
+ else
225
+ # Nested struct
226
+ nested_struct = schema.find_struct(field_def.type)
227
+ read_struct(current_pos, nested_struct)
228
+ end
229
+
230
+ result[field_def.name] = value
231
+ current_pos += field_size(field_def)
232
+ end
233
+
234
+ result
235
+ end
236
+
237
+ # Get size of field for struct
238
+ def field_size(field_def)
239
+ if field_def.scalar?
240
+ element_size(field_def.type)
241
+ else
242
+ # Nested struct size
243
+ nested_struct = schema.find_struct(field_def.type)
244
+ nested_struct.fields.sum { |f| field_size(f) }
245
+ end
246
+ end
247
+
248
+ # Read string at position (offset points to length-prefixed string)
249
+ def read_string(pos)
250
+ # String is stored as offset to string data
251
+ offset = read_uoffset32(pos)
252
+ string_pos = pos + offset
253
+
254
+ # Read string length
255
+ length = read_uint32(string_pos)
256
+
257
+ # Read string data
258
+ @io.seek(string_pos + 4)
259
+ @io.read(length).force_encoding(Encoding::UTF_8)
260
+ end
261
+
262
+ # Read methods for different integer types
263
+ def read_int8(pos)
264
+ @io.seek(pos)
265
+ @io.read(1).unpack1("c")
266
+ end
267
+
268
+ def read_uint8(pos)
269
+ @io.seek(pos)
270
+ @io.readbyte
271
+ end
272
+
273
+ def read_int16(pos)
274
+ @io.seek(pos)
275
+ @io.read(2).unpack1("s<")
276
+ end
277
+
278
+ def read_uint16(pos)
279
+ @io.seek(pos)
280
+ @io.read(2).unpack1("S<")
281
+ end
282
+
283
+ def read_int32(pos)
284
+ @io.seek(pos)
285
+ @io.read(4).unpack1("l<")
286
+ end
287
+
288
+ def read_uint32(pos)
289
+ @io.seek(pos)
290
+ @io.read(4).unpack1("L<")
291
+ end
292
+
293
+ def read_int64(pos)
294
+ @io.seek(pos)
295
+ @io.read(8).unpack1("q<")
296
+ end
297
+
298
+ def read_uint64(pos)
299
+ @io.seek(pos)
300
+ @io.read(8).unpack1("Q<")
301
+ end
302
+
303
+ def read_float(pos)
304
+ @io.seek(pos)
305
+ @io.read(4).unpack1("e")
306
+ end
307
+
308
+ def read_double(pos)
309
+ @io.seek(pos)
310
+ @io.read(8).unpack1("E")
311
+ end
312
+
313
+ # Read unsigned offset (uoffset32)
314
+ def read_uoffset32(pos)
315
+ read_uint32(pos)
316
+ end
317
+
318
+ # Read signed offset (soffset32)
319
+ def read_soffset32(pos)
320
+ read_int32(pos)
321
+ end
322
+ end
323
+ end
324
+ end
325
+ end
@@ -0,0 +1,235 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "parslet"
4
+
5
+ module Unibuf
6
+ module Parsers
7
+ module Flatbuffers
8
+ # Parslet grammar for parsing FlatBuffers schema definitions
9
+ # Reference: https://flatbuffers.dev/flatbuffers_grammar.html
10
+ class Grammar < Parslet::Parser
11
+ # ===== Lexical Elements =====
12
+
13
+ # Whitespace and comments
14
+ rule(:space) { match['\s'].repeat(1) }
15
+ rule(:space?) { space.maybe }
16
+ rule(:newline) { str("\n") }
17
+
18
+ # Comments (// and /* */)
19
+ rule(:line_comment) do
20
+ str("//") >> (newline.absent? >> any).repeat >> newline.maybe
21
+ end
22
+ rule(:block_comment) do
23
+ str("/*") >> (str("*/").absent? >> any).repeat >> str("*/")
24
+ end
25
+ rule(:comment) { line_comment | block_comment }
26
+
27
+ rule(:whitespace) { (space | comment).repeat(1) }
28
+ rule(:whitespace?) { (space | comment).repeat }
29
+
30
+ # Identifiers
31
+ rule(:letter) { match["a-zA-Z_"] }
32
+ rule(:digit) { match["0-9"] }
33
+ rule(:identifier) do
34
+ (letter >> (letter | digit).repeat).as(:identifier)
35
+ end
36
+
37
+ # Strings
38
+ rule(:string_content) { (str('"').absent? >> any).repeat }
39
+ rule(:string_literal) do
40
+ str('"') >> string_content.as(:string) >> str('"')
41
+ end
42
+
43
+ # Numbers (including negative)
44
+ rule(:number) do
45
+ (match["+-"].maybe >> digit.repeat(1) >>
46
+ (str(".") >> digit.repeat(1)).maybe).as(:number)
47
+ end
48
+
49
+ # Boolean literals
50
+ rule(:bool_literal) do
51
+ (str("true") | str("false")).as(:bool)
52
+ end
53
+
54
+ # ===== Syntax Elements =====
55
+
56
+ # Namespace declaration: namespace com.example.game;
57
+ rule(:namespace_stmt) do
58
+ str("namespace") >> whitespace >>
59
+ identifier.as(:namespace) >>
60
+ (str(".") >> identifier).repeat >> whitespace? >> str(";")
61
+ end
62
+
63
+ # Include statement: include "other.fbs";
64
+ rule(:include_stmt) do
65
+ str("include") >> whitespace >>
66
+ string_literal.as(:include) >> whitespace? >> str(";")
67
+ end
68
+
69
+ # Attribute: (id: 1, deprecated)
70
+ rule(:attribute) do
71
+ identifier.as(:name) >>
72
+ (whitespace? >> str(":") >> whitespace? >>
73
+ (number | bool_literal | string_literal | identifier).as(:value)).maybe
74
+ end
75
+
76
+ rule(:metadata) do
77
+ str("(") >> whitespace? >>
78
+ attribute.as(:attr) >>
79
+ (whitespace? >> str(",") >> whitespace? >> attribute.as(:attr)).repeat >>
80
+ whitespace? >> str(")")
81
+ end
82
+
83
+ # Scalar types
84
+ rule(:scalar_type) do
85
+ (str("byte") | str("ubyte") | str("short") | str("ushort") |
86
+ str("int") | str("uint") | str("long") | str("ulong") |
87
+ str("float") | str("double") | str("bool") | str("string")).as(:scalar_type)
88
+ end
89
+
90
+ # Vector type: [type]
91
+ rule(:vector_type) do
92
+ str("[") >> whitespace? >>
93
+ (scalar_type | identifier.as(:user_type)) >>
94
+ whitespace? >> str("]")
95
+ end
96
+
97
+ # Field type
98
+ rule(:field_type) do
99
+ vector_type.as(:vector) | scalar_type | identifier.as(:user_type)
100
+ end
101
+
102
+ # Default value
103
+ rule(:default_value) do
104
+ whitespace? >> str("=") >> whitespace? >>
105
+ (number | bool_literal | string_literal | identifier.as(:enum_value)).as(:default)
106
+ end
107
+
108
+ # Table field: name: type = default (metadata);
109
+ rule(:table_field) do
110
+ identifier.as(:name) >> whitespace? >> str(":") >> whitespace? >>
111
+ field_type.as(:type) >>
112
+ default_value.maybe >>
113
+ (whitespace? >> metadata.as(:metadata)).maybe >>
114
+ whitespace? >> str(";")
115
+ end
116
+
117
+ # Table definition
118
+ rule(:table_body) do
119
+ (table_field.as(:field) | whitespace).repeat
120
+ end
121
+
122
+ rule(:table_def) do
123
+ str("table") >> whitespace >>
124
+ identifier.as(:table_name) >>
125
+ (whitespace? >> metadata.as(:metadata)).maybe >>
126
+ whitespace? >> str("{") >> whitespace? >>
127
+ table_body.as(:body) >> whitespace? >>
128
+ str("}")
129
+ end
130
+
131
+ # Struct definition (similar to table but fixed size)
132
+ rule(:struct_field) do
133
+ identifier.as(:name) >> whitespace? >> str(":") >> whitespace? >>
134
+ field_type.as(:type) >>
135
+ (whitespace? >> metadata.as(:metadata)).maybe >>
136
+ whitespace? >> str(";")
137
+ end
138
+
139
+ rule(:struct_body) do
140
+ (struct_field.as(:field) | whitespace).repeat
141
+ end
142
+
143
+ rule(:struct_def) do
144
+ str("struct") >> whitespace >>
145
+ identifier.as(:struct_name) >>
146
+ (whitespace? >> metadata.as(:metadata)).maybe >>
147
+ whitespace? >> str("{") >> whitespace? >>
148
+ struct_body.as(:body) >> whitespace? >>
149
+ str("}")
150
+ end
151
+
152
+ # Enum definition
153
+ rule(:enum_value) do
154
+ identifier.as(:name) >>
155
+ (whitespace? >> str("=") >> whitespace? >> number.as(:value)).maybe >>
156
+ whitespace? >> (str(",") | str(";")).maybe
157
+ end
158
+
159
+ rule(:enum_type) do
160
+ whitespace? >> str(":") >> whitespace? >> scalar_type.as(:enum_type)
161
+ end
162
+
163
+ rule(:enum_def) do
164
+ str("enum") >> whitespace >>
165
+ identifier.as(:enum_name) >>
166
+ enum_type.maybe >>
167
+ (whitespace? >> metadata.as(:metadata)).maybe >>
168
+ whitespace? >> str("{") >> whitespace? >>
169
+ (enum_value.as(:value) >> whitespace?).repeat.as(:values) >>
170
+ whitespace? >> str("}")
171
+ end
172
+
173
+ # Union definition
174
+ rule(:union_value) do
175
+ identifier.as(:type) >> whitespace? >> (str(",") | str(";")).maybe
176
+ end
177
+
178
+ rule(:union_def) do
179
+ str("union") >> whitespace >>
180
+ identifier.as(:union_name) >>
181
+ (whitespace? >> metadata.as(:metadata)).maybe >>
182
+ whitespace? >> str("{") >> whitespace? >>
183
+ (union_value.as(:type) >> whitespace?).repeat.as(:types) >>
184
+ whitespace? >> str("}")
185
+ end
186
+
187
+ # Root type declaration: root_type Monster;
188
+ rule(:root_type_stmt) do
189
+ str("root_type") >> whitespace >>
190
+ identifier.as(:root_type) >> whitespace? >> str(";")
191
+ end
192
+
193
+ # File identifier: file_identifier "ABCD";
194
+ rule(:file_identifier_stmt) do
195
+ str("file_identifier") >> whitespace >>
196
+ string_literal.as(:file_identifier) >> whitespace? >> str(";")
197
+ end
198
+
199
+ # File extension: file_extension "dat";
200
+ rule(:file_extension_stmt) do
201
+ str("file_extension") >> whitespace >>
202
+ string_literal.as(:file_extension) >> whitespace? >> str(";")
203
+ end
204
+
205
+ # Attribute declaration: attribute "id";
206
+ rule(:attribute_decl) do
207
+ str("attribute") >> whitespace >>
208
+ string_literal.as(:attribute) >> whitespace? >> str(";")
209
+ end
210
+
211
+ # Top-level elements
212
+ rule(:fbs_element) do
213
+ namespace_stmt.as(:namespace) |
214
+ include_stmt.as(:include) |
215
+ table_def.as(:table) |
216
+ struct_def.as(:struct) |
217
+ enum_def.as(:enum) |
218
+ union_def.as(:union) |
219
+ root_type_stmt.as(:root_type) |
220
+ file_identifier_stmt.as(:file_identifier) |
221
+ file_extension_stmt.as(:file_extension) |
222
+ attribute_decl.as(:attribute_decl) |
223
+ whitespace
224
+ end
225
+
226
+ # FlatBuffers schema file
227
+ rule(:fbs_file) do
228
+ whitespace? >> fbs_element.repeat >> whitespace?
229
+ end
230
+
231
+ root(:fbs_file)
232
+ end
233
+ end
234
+ end
235
+ end