unibuf 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +178 -330
  3. data/CODE_OF_CONDUCT.md +132 -0
  4. data/README.adoc +443 -254
  5. data/docs/CAPNPROTO.adoc +436 -0
  6. data/docs/FLATBUFFERS.adoc +430 -0
  7. data/docs/PROTOBUF.adoc +515 -0
  8. data/docs/TXTPROTO.adoc +369 -0
  9. data/lib/unibuf/commands/convert.rb +60 -2
  10. data/lib/unibuf/commands/schema.rb +68 -11
  11. data/lib/unibuf/errors.rb +23 -26
  12. data/lib/unibuf/models/capnproto/enum_definition.rb +72 -0
  13. data/lib/unibuf/models/capnproto/field_definition.rb +81 -0
  14. data/lib/unibuf/models/capnproto/interface_definition.rb +70 -0
  15. data/lib/unibuf/models/capnproto/method_definition.rb +81 -0
  16. data/lib/unibuf/models/capnproto/schema.rb +84 -0
  17. data/lib/unibuf/models/capnproto/struct_definition.rb +96 -0
  18. data/lib/unibuf/models/capnproto/union_definition.rb +62 -0
  19. data/lib/unibuf/models/flatbuffers/enum_definition.rb +69 -0
  20. data/lib/unibuf/models/flatbuffers/field_definition.rb +88 -0
  21. data/lib/unibuf/models/flatbuffers/schema.rb +102 -0
  22. data/lib/unibuf/models/flatbuffers/struct_definition.rb +70 -0
  23. data/lib/unibuf/models/flatbuffers/table_definition.rb +73 -0
  24. data/lib/unibuf/models/flatbuffers/union_definition.rb +60 -0
  25. data/lib/unibuf/models/message.rb +10 -0
  26. data/lib/unibuf/models/values/scalar_value.rb +2 -2
  27. data/lib/unibuf/parsers/binary/wire_format_parser.rb +199 -19
  28. data/lib/unibuf/parsers/capnproto/binary_parser.rb +267 -0
  29. data/lib/unibuf/parsers/capnproto/grammar.rb +272 -0
  30. data/lib/unibuf/parsers/capnproto/list_reader.rb +208 -0
  31. data/lib/unibuf/parsers/capnproto/pointer_decoder.rb +163 -0
  32. data/lib/unibuf/parsers/capnproto/processor.rb +348 -0
  33. data/lib/unibuf/parsers/capnproto/segment_reader.rb +131 -0
  34. data/lib/unibuf/parsers/capnproto/struct_reader.rb +199 -0
  35. data/lib/unibuf/parsers/flatbuffers/binary_parser.rb +325 -0
  36. data/lib/unibuf/parsers/flatbuffers/grammar.rb +235 -0
  37. data/lib/unibuf/parsers/flatbuffers/processor.rb +299 -0
  38. data/lib/unibuf/parsers/textproto/grammar.rb +1 -1
  39. data/lib/unibuf/parsers/textproto/processor.rb +10 -0
  40. data/lib/unibuf/serializers/binary_serializer.rb +218 -0
  41. data/lib/unibuf/serializers/capnproto/binary_serializer.rb +402 -0
  42. data/lib/unibuf/serializers/capnproto/list_writer.rb +199 -0
  43. data/lib/unibuf/serializers/capnproto/pointer_encoder.rb +118 -0
  44. data/lib/unibuf/serializers/capnproto/segment_builder.rb +124 -0
  45. data/lib/unibuf/serializers/capnproto/struct_writer.rb +139 -0
  46. data/lib/unibuf/serializers/flatbuffers/binary_serializer.rb +167 -0
  47. data/lib/unibuf/validators/type_validator.rb +1 -1
  48. data/lib/unibuf/version.rb +1 -1
  49. data/lib/unibuf.rb +27 -0
  50. metadata +36 -1
@@ -0,0 +1,267 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "segment_reader"
4
+ require_relative "pointer_decoder"
5
+ require_relative "struct_reader"
6
+ require_relative "list_reader"
7
+
8
+ module Unibuf
9
+ module Parsers
10
+ module Capnproto
11
+ # Parser for Cap'n Proto binary format
12
+ # Coordinates segment reading, pointer following, and data extraction
13
+ class BinaryParser
14
+ attr_reader :schema, :segment_reader
15
+
16
+ # Initialize with schema
17
+ # @param schema [Models::Capnproto::Schema] Cap'n Proto schema
18
+ def initialize(schema)
19
+ @schema = schema
20
+ @segment_reader = nil
21
+ end
22
+
23
+ # Parse binary data
24
+ # @param data [String] Binary data
25
+ # @param root_type [String, nil] Root struct type name
26
+ # @return [Hash] Parsed data
27
+ def parse(data, root_type: nil)
28
+ @segment_reader = SegmentReader.new(data)
29
+
30
+ # Root object is at segment 0, word 0
31
+ # First word is a pointer to the root struct
32
+ root_pointer_word = @segment_reader.read_word(0, 0)
33
+ root_pointer = PointerDecoder.decode(root_pointer_word)
34
+
35
+ unless root_pointer[:type] == :struct
36
+ raise ParseError,
37
+ "Invalid root pointer"
38
+ end
39
+
40
+ # Follow pointer to root struct
41
+ root_struct_offset = 1 + root_pointer[:offset]
42
+ root_struct = StructReader.new(
43
+ @segment_reader,
44
+ 0,
45
+ root_struct_offset,
46
+ root_pointer[:data_words],
47
+ root_pointer[:pointer_words],
48
+ )
49
+
50
+ # Determine root type from schema if not provided
51
+ root_type ||= @schema.structs.first&.name
52
+ raise ParseError, "No root type specified" unless root_type
53
+
54
+ struct_def = @schema.find_struct(root_type)
55
+ unless struct_def
56
+ raise ParseError,
57
+ "Struct type not found: #{root_type}"
58
+ end
59
+
60
+ parse_struct(root_struct, struct_def)
61
+ end
62
+
63
+ private
64
+
65
+ # Parse a struct according to its definition
66
+ # @param struct_reader [StructReader] Struct reader
67
+ # @param struct_def [Models::Capnproto::StructDefinition] Struct definition
68
+ # @return [Hash] Parsed data
69
+ def parse_struct(struct_reader, struct_def)
70
+ result = {}
71
+
72
+ struct_def.fields.each do |field|
73
+ result[field.name.to_sym] =
74
+ parse_field(struct_reader, field, struct_def)
75
+ end
76
+
77
+ result
78
+ end
79
+
80
+ # Parse a field
81
+ # @param struct_reader [StructReader] Struct reader
82
+ # @param field [Models::Capnproto::FieldDefinition] Field definition
83
+ # @param struct_def [Models::Capnproto::StructDefinition] Parent struct definition
84
+ # @return [Object] Field value
85
+ def parse_field(struct_reader, field, struct_def)
86
+ if field.primitive_type?
87
+ parse_primitive_field(struct_reader, field)
88
+ elsif field.list_type?
89
+ parse_list_field(struct_reader, field, struct_def)
90
+ elsif text_or_data_type?(field)
91
+ parse_text_or_data_field(struct_reader, field, struct_def)
92
+ elsif field.user_type?
93
+ parse_user_type_field(struct_reader, field, struct_def)
94
+ end
95
+ end
96
+
97
+ # Parse a primitive field
98
+ def parse_primitive_field(struct_reader, field)
99
+ ordinal = field.ordinal
100
+ type = field.type
101
+
102
+ # Calculate word and offset based on type
103
+ case type
104
+ when "Bool"
105
+ struct_reader.read_bool(ordinal / 64, ordinal % 64)
106
+ when "Int8"
107
+ struct_reader.read_int8(ordinal / 8, ordinal % 8)
108
+ when "UInt8"
109
+ struct_reader.read_uint8(ordinal / 8, ordinal % 8)
110
+ when "Int16"
111
+ struct_reader.read_int16(ordinal / 4, ordinal % 4)
112
+ when "UInt16"
113
+ struct_reader.read_uint16(ordinal / 4, ordinal % 4)
114
+ when "Int32"
115
+ struct_reader.read_int32(ordinal / 2, ordinal % 2)
116
+ when "UInt32"
117
+ struct_reader.read_uint32(ordinal / 2, ordinal % 2)
118
+ when "Int64"
119
+ struct_reader.read_int64(ordinal)
120
+ when "UInt64"
121
+ struct_reader.read_uint64(ordinal)
122
+ when "Float32"
123
+ struct_reader.read_float32(ordinal / 2, ordinal % 2)
124
+ when "Float64"
125
+ struct_reader.read_float64(ordinal)
126
+ when "Void"
127
+ nil
128
+ else
129
+ field.default_value
130
+ end
131
+ end
132
+
133
+ # Parse a list field
134
+ def parse_list_field(struct_reader, field, struct_def)
135
+ # Get pointer index - count non-primitive fields before this one
136
+ pointer_index = get_pointer_index(field, struct_def)
137
+
138
+ target = struct_reader.follow_pointer(pointer_index)
139
+ return nil unless target && target[:type] == :list
140
+
141
+ list_reader = ListReader.new(
142
+ @segment_reader,
143
+ target[:segment_id],
144
+ target[:word_offset],
145
+ target[:element_size],
146
+ target[:element_count],
147
+ )
148
+
149
+ element_type = field.element_type
150
+
151
+ # Check if element is Text or Data
152
+ if element_type == "Text"
153
+ return list_reader.read_text
154
+ elsif element_type == "Data"
155
+ return list_reader.read_data
156
+ end
157
+
158
+ # Parse list elements
159
+ (0...list_reader.length).map do |i|
160
+ if primitive_type?(element_type)
161
+ type_symbol = type_to_symbol(element_type)
162
+ list_reader.read_primitive(i, type_symbol)
163
+ else
164
+ # Struct element
165
+ element_struct_def = @schema.find_struct(element_type)
166
+ if element_struct_def
167
+ element_struct = list_reader.read_struct(i)
168
+ parse_struct(element_struct, element_struct_def)
169
+ end
170
+ end
171
+ end
172
+ end
173
+
174
+ # Parse a user-defined type field (struct, enum, etc.)
175
+ def parse_user_type_field(struct_reader, field, struct_def)
176
+ # Check if it's an enum
177
+ enum_def = @schema.find_enum(field.type)
178
+ if enum_def
179
+ # Enums are stored as UInt16 in data section
180
+ value = struct_reader.read_uint16(field.ordinal / 4,
181
+ field.ordinal % 4)
182
+ # Find enum name by value
183
+ enum_def.find_name_by_ordinal(value) || value
184
+ else
185
+ # It's a struct - use pointer index
186
+ pointer_index = get_pointer_index(field, struct_def)
187
+
188
+ target = struct_reader.follow_pointer(pointer_index)
189
+ return nil unless target && target[:type] == :struct
190
+
191
+ nested_struct = StructReader.new(
192
+ @segment_reader,
193
+ target[:segment_id],
194
+ target[:word_offset],
195
+ target[:data_words],
196
+ target[:pointer_words],
197
+ )
198
+
199
+ nested_struct_def = @schema.find_struct(field.type)
200
+ return nil unless nested_struct_def
201
+
202
+ parse_struct(nested_struct, nested_struct_def)
203
+ end
204
+ end
205
+
206
+ # Parse Text or Data field (special pointer types)
207
+ def parse_text_or_data_field(struct_reader, field, struct_def)
208
+ # Get pointer index
209
+ pointer_index = get_pointer_index(field, struct_def)
210
+
211
+ target = struct_reader.follow_pointer(pointer_index)
212
+ return nil unless target && target[:type] == :list
213
+
214
+ list_reader = ListReader.new(
215
+ @segment_reader,
216
+ target[:segment_id],
217
+ target[:word_offset],
218
+ target[:element_size],
219
+ target[:element_count],
220
+ )
221
+
222
+ if field.type == "Text"
223
+ list_reader.read_text
224
+ else
225
+ list_reader.read_data
226
+ end
227
+ end
228
+
229
+ # Get pointer index for a field
230
+ # Count non-primitive fields before this one
231
+ def get_pointer_index(field, struct_def)
232
+ struct_def.fields.take_while do |f|
233
+ f != field
234
+ end.count { |f| !f.primitive_type? }
235
+ end
236
+
237
+ # Check if field is Text or Data type
238
+ def text_or_data_type?(field)
239
+ ["Text", "Data"].include?(field.type)
240
+ end
241
+
242
+ # Check if type is primitive
243
+ def primitive_type?(type)
244
+ Models::Capnproto::FieldDefinition::PRIMITIVE_TYPES.include?(type)
245
+ end
246
+
247
+ # Convert type string to symbol for list reading
248
+ def type_to_symbol(type)
249
+ case type
250
+ when "Int8" then :int8
251
+ when "UInt8" then :uint8
252
+ when "Int16" then :int16
253
+ when "UInt16" then :uint16
254
+ when "Int32" then :int32
255
+ when "UInt32" then :uint32
256
+ when "Int64" then :int64
257
+ when "UInt64" then :uint64
258
+ when "Float32" then :float32
259
+ when "Float64" then :float64
260
+ when "Bool" then :bool
261
+ else :uint64
262
+ end
263
+ end
264
+ end
265
+ end
266
+ end
267
+ end
@@ -0,0 +1,272 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "parslet"
4
+
5
+ module Unibuf
6
+ module Parsers
7
+ module Capnproto
8
+ # Parslet grammar for parsing Cap'n Proto schema definitions
9
+ # Reference: https://capnproto.org/language.html
10
+ class Grammar < Parslet::Parser
11
+ # ===== Lexical Elements =====
12
+
13
+ # Whitespace and comments
14
+ rule(:space) { match['\s'].repeat(1) }
15
+ rule(:space?) { space.maybe }
16
+ rule(:newline) { str("\n") }
17
+
18
+ # Comments (# style, different from Proto3)
19
+ rule(:line_comment) do
20
+ str("#") >> (newline.absent? >> any).repeat >> newline.maybe
21
+ end
22
+ rule(:comment) { line_comment }
23
+
24
+ rule(:whitespace) { (space | comment).repeat(1) }
25
+ rule(:whitespace?) { (space | comment).repeat }
26
+
27
+ # Identifiers
28
+ rule(:letter) { match["a-zA-Z_"] }
29
+ rule(:digit) { match["0-9"] }
30
+ rule(:identifier) do
31
+ (letter >> (letter | digit).repeat).as(:identifier)
32
+ end
33
+
34
+ # Strings
35
+ rule(:string_content) { (str('"').absent? >> any).repeat }
36
+ rule(:string_literal) do
37
+ str('"') >> string_content.as(:string) >> str('"')
38
+ end
39
+
40
+ # Numbers (including hex for file IDs)
41
+ rule(:hex_digit) { match["0-9a-fA-F"] }
42
+ rule(:hex_number) do
43
+ str("0x") >> hex_digit.repeat(1)
44
+ end
45
+ rule(:decimal_number) do
46
+ match["+-"].maybe >> digit.repeat(1) >>
47
+ (str(".") >> digit.repeat(1)).maybe
48
+ end
49
+ rule(:number) { (hex_number | decimal_number).as(:number) }
50
+
51
+ # Boolean literals
52
+ rule(:bool_literal) do
53
+ (str("true") | str("false")).as(:bool)
54
+ end
55
+
56
+ # ===== File-Level Elements =====
57
+
58
+ # File ID: @0x...;
59
+ rule(:file_id) do
60
+ str("@") >> hex_number.as(:number) >> whitespace? >> str(";")
61
+ end
62
+
63
+ # using declaration: using Foo = import "foo.capnp";
64
+ rule(:using_stmt) do
65
+ str("using") >> whitespace >>
66
+ identifier.as(:alias) >> whitespace? >>
67
+ str("=") >> whitespace? >>
68
+ str("import") >> whitespace >>
69
+ string_literal.as(:import_path) >> whitespace? >>
70
+ str(";")
71
+ end
72
+
73
+ # Annotation: $annotation or $annotation(value)
74
+ rule(:annotation_value) do
75
+ str("(") >> whitespace? >>
76
+ (number | bool_literal | string_literal | identifier).as(:value) >>
77
+ whitespace? >> str(")")
78
+ end
79
+
80
+ rule(:annotation) do
81
+ str("$") >> identifier.as(:annotation) >>
82
+ annotation_value.maybe
83
+ end
84
+
85
+ # ===== Type System =====
86
+
87
+ # Primitive types
88
+ rule(:primitive_type) do
89
+ (str("Void") | str("Bool") |
90
+ str("Int8") | str("Int16") | str("Int32") | str("Int64") |
91
+ str("UInt8") | str("UInt16") | str("UInt32") | str("UInt64") |
92
+ str("Float32") | str("Float64") |
93
+ str("Text") | str("Data") |
94
+ str("AnyPointer")).as(:primitive_type)
95
+ end
96
+
97
+ # Generic type: List(T)
98
+ rule(:generic_type) do
99
+ str("List") >> whitespace? >>
100
+ str("(") >> whitespace? >>
101
+ field_type.as(:element_type) >>
102
+ whitespace? >> str(")")
103
+ end
104
+
105
+ # Field type
106
+ rule(:field_type) do
107
+ generic_type.as(:generic) |
108
+ primitive_type |
109
+ identifier.as(:user_type)
110
+ end
111
+
112
+ # ===== Struct Definition =====
113
+
114
+ # Field definition: name @ordinal :Type;
115
+ rule(:field_def) do
116
+ identifier.as(:name) >> whitespace? >>
117
+ str("@") >> number.as(:ordinal) >> whitespace? >>
118
+ str(":") >> whitespace? >>
119
+ field_type.as(:type) >>
120
+ (whitespace? >> str("=") >> whitespace? >>
121
+ (number | bool_literal | string_literal).as(:default)).maybe >>
122
+ whitespace? >> str(";")
123
+ end
124
+
125
+ # Union within struct: union { field1 @0 :Text; field2 @1 :Int32; }
126
+ rule(:union_body) do
127
+ (field_def.as(:field) | whitespace).repeat
128
+ end
129
+
130
+ rule(:union_def) do
131
+ str("union") >> whitespace? >>
132
+ str("{") >> whitespace? >>
133
+ union_body.as(:fields) >> whitespace? >>
134
+ str("}")
135
+ end
136
+
137
+ # Group: group { field @0 :Text; }
138
+ rule(:group_body) do
139
+ (field_def.as(:field) | whitespace).repeat
140
+ end
141
+
142
+ rule(:group_def) do
143
+ identifier.as(:name) >> whitespace? >>
144
+ str("@") >> number.as(:ordinal) >> whitespace? >>
145
+ str(":group") >> whitespace? >>
146
+ str("{") >> whitespace? >>
147
+ group_body.as(:fields) >> whitespace? >>
148
+ str("}")
149
+ end
150
+
151
+ # Struct body
152
+ rule(:struct_element) do
153
+ field_def.as(:field) |
154
+ union_def.as(:union) |
155
+ group_def.as(:group) |
156
+ struct_def.as(:nested_struct) |
157
+ enum_def.as(:nested_enum) |
158
+ interface_def.as(:nested_interface) |
159
+ whitespace
160
+ end
161
+
162
+ rule(:struct_body) do
163
+ struct_element.repeat
164
+ end
165
+
166
+ rule(:struct_def) do
167
+ (annotation.as(:annotation) >> whitespace?).repeat >>
168
+ str("struct") >> whitespace >>
169
+ identifier.as(:struct_name) >> whitespace? >>
170
+ str("{") >> whitespace? >>
171
+ struct_body.as(:body) >> whitespace? >>
172
+ str("}")
173
+ end
174
+
175
+ # ===== Enum Definition =====
176
+
177
+ # Enum value: name @ordinal;
178
+ rule(:enum_value) do
179
+ identifier.as(:name) >> whitespace? >>
180
+ str("@") >> number.as(:ordinal) >> whitespace? >>
181
+ str(";") >> whitespace?
182
+ end
183
+
184
+ rule(:enum_def) do
185
+ (annotation.as(:annotation) >> whitespace?).repeat >>
186
+ str("enum") >> whitespace >>
187
+ identifier.as(:enum_name) >> whitespace? >>
188
+ str("{") >> whitespace? >>
189
+ enum_value.repeat(1).as(:values) >> whitespace? >>
190
+ str("}")
191
+ end
192
+
193
+ # ===== Interface Definition (RPC) =====
194
+
195
+ # Method parameter: name :Type
196
+ rule(:param) do
197
+ identifier.as(:name) >> whitespace? >>
198
+ str(":") >> whitespace? >>
199
+ field_type.as(:type)
200
+ end
201
+
202
+ rule(:param_list) do
203
+ (param.as(:param) >>
204
+ (whitespace? >> str(",") >> whitespace? >>
205
+ param.as(:param)).repeat).maybe
206
+ end
207
+
208
+ # Method definition: methodName @ordinal (params) -> (results);
209
+ rule(:method_def) do
210
+ identifier.as(:name) >> whitespace? >>
211
+ str("@") >> number.as(:ordinal) >> whitespace? >>
212
+ str("(") >> whitespace? >>
213
+ param_list.as(:params) >> whitespace? >>
214
+ str(")") >> whitespace? >>
215
+ (str("->") >> whitespace? >>
216
+ str("(") >> whitespace? >>
217
+ param_list.as(:results) >> whitespace? >>
218
+ str(")")).maybe >> whitespace? >>
219
+ str(";")
220
+ end
221
+
222
+ rule(:interface_body) do
223
+ (method_def.as(:method) | whitespace).repeat
224
+ end
225
+
226
+ rule(:interface_def) do
227
+ (annotation.as(:annotation) >> whitespace?).repeat >>
228
+ str("interface") >> whitespace >>
229
+ identifier.as(:interface_name) >> whitespace? >>
230
+ str("{") >> whitespace? >>
231
+ interface_body.as(:body) >> whitespace? >>
232
+ str("}")
233
+ end
234
+
235
+ # ===== Const Definition =====
236
+
237
+ rule(:const_value) do
238
+ number | bool_literal | string_literal | identifier.as(:ref)
239
+ end
240
+
241
+ rule(:const_def) do
242
+ str("const") >> whitespace >>
243
+ identifier.as(:name) >> whitespace? >>
244
+ str(":") >> whitespace? >>
245
+ field_type.as(:type) >> whitespace? >>
246
+ str("=") >> whitespace? >>
247
+ const_value.as(:value) >> whitespace? >>
248
+ str(";")
249
+ end
250
+
251
+ # ===== Top-Level Elements =====
252
+
253
+ rule(:capnp_element) do
254
+ file_id.as(:file_id) |
255
+ using_stmt.as(:using) |
256
+ const_def.as(:const) |
257
+ struct_def.as(:struct) |
258
+ enum_def.as(:enum) |
259
+ interface_def.as(:interface) |
260
+ whitespace
261
+ end
262
+
263
+ # Cap'n Proto file
264
+ rule(:capnp_file) do
265
+ whitespace? >> capnp_element.repeat >> whitespace?
266
+ end
267
+
268
+ root(:capnp_file)
269
+ end
270
+ end
271
+ end
272
+ end