unibuf 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +178 -330
  3. data/CODE_OF_CONDUCT.md +132 -0
  4. data/README.adoc +443 -254
  5. data/docs/CAPNPROTO.adoc +436 -0
  6. data/docs/FLATBUFFERS.adoc +430 -0
  7. data/docs/PROTOBUF.adoc +515 -0
  8. data/docs/TXTPROTO.adoc +369 -0
  9. data/lib/unibuf/commands/convert.rb +60 -2
  10. data/lib/unibuf/commands/schema.rb +68 -11
  11. data/lib/unibuf/errors.rb +23 -26
  12. data/lib/unibuf/models/capnproto/enum_definition.rb +72 -0
  13. data/lib/unibuf/models/capnproto/field_definition.rb +81 -0
  14. data/lib/unibuf/models/capnproto/interface_definition.rb +70 -0
  15. data/lib/unibuf/models/capnproto/method_definition.rb +81 -0
  16. data/lib/unibuf/models/capnproto/schema.rb +84 -0
  17. data/lib/unibuf/models/capnproto/struct_definition.rb +96 -0
  18. data/lib/unibuf/models/capnproto/union_definition.rb +62 -0
  19. data/lib/unibuf/models/flatbuffers/enum_definition.rb +69 -0
  20. data/lib/unibuf/models/flatbuffers/field_definition.rb +88 -0
  21. data/lib/unibuf/models/flatbuffers/schema.rb +102 -0
  22. data/lib/unibuf/models/flatbuffers/struct_definition.rb +70 -0
  23. data/lib/unibuf/models/flatbuffers/table_definition.rb +73 -0
  24. data/lib/unibuf/models/flatbuffers/union_definition.rb +60 -0
  25. data/lib/unibuf/models/message.rb +10 -0
  26. data/lib/unibuf/models/values/scalar_value.rb +2 -2
  27. data/lib/unibuf/parsers/binary/wire_format_parser.rb +199 -19
  28. data/lib/unibuf/parsers/capnproto/binary_parser.rb +267 -0
  29. data/lib/unibuf/parsers/capnproto/grammar.rb +272 -0
  30. data/lib/unibuf/parsers/capnproto/list_reader.rb +208 -0
  31. data/lib/unibuf/parsers/capnproto/pointer_decoder.rb +163 -0
  32. data/lib/unibuf/parsers/capnproto/processor.rb +348 -0
  33. data/lib/unibuf/parsers/capnproto/segment_reader.rb +131 -0
  34. data/lib/unibuf/parsers/capnproto/struct_reader.rb +199 -0
  35. data/lib/unibuf/parsers/flatbuffers/binary_parser.rb +325 -0
  36. data/lib/unibuf/parsers/flatbuffers/grammar.rb +235 -0
  37. data/lib/unibuf/parsers/flatbuffers/processor.rb +299 -0
  38. data/lib/unibuf/parsers/textproto/grammar.rb +1 -1
  39. data/lib/unibuf/parsers/textproto/processor.rb +10 -0
  40. data/lib/unibuf/serializers/binary_serializer.rb +218 -0
  41. data/lib/unibuf/serializers/capnproto/binary_serializer.rb +402 -0
  42. data/lib/unibuf/serializers/capnproto/list_writer.rb +199 -0
  43. data/lib/unibuf/serializers/capnproto/pointer_encoder.rb +118 -0
  44. data/lib/unibuf/serializers/capnproto/segment_builder.rb +124 -0
  45. data/lib/unibuf/serializers/capnproto/struct_writer.rb +139 -0
  46. data/lib/unibuf/serializers/flatbuffers/binary_serializer.rb +167 -0
  47. data/lib/unibuf/validators/type_validator.rb +1 -1
  48. data/lib/unibuf/version.rb +1 -1
  49. data/lib/unibuf.rb +27 -0
  50. metadata +36 -1
@@ -0,0 +1,299 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../models/flatbuffers/schema"
4
+ require_relative "../../models/flatbuffers/table_definition"
5
+ require_relative "../../models/flatbuffers/struct_definition"
6
+ require_relative "../../models/flatbuffers/field_definition"
7
+ require_relative "../../models/flatbuffers/enum_definition"
8
+ require_relative "../../models/flatbuffers/union_definition"
9
+
10
+ module Unibuf
11
+ module Parsers
12
+ module Flatbuffers
13
+ # Processor to transform FlatBuffers AST to Schema models
14
+ class Processor
15
+ class << self
16
+ def process(ast)
17
+ return Models::Flatbuffers::Schema.new unless ast
18
+
19
+ elements = Array(ast)
20
+
21
+ attributes = {
22
+ namespace: extract_namespace(elements),
23
+ includes: extract_includes(elements),
24
+ tables: extract_tables(elements),
25
+ structs: extract_structs(elements),
26
+ enums: extract_enums(elements),
27
+ unions: extract_unions(elements),
28
+ root_type: extract_root_type(elements),
29
+ file_identifier: extract_file_identifier(elements),
30
+ file_extension: extract_file_extension(elements),
31
+ attributes: extract_attributes(elements),
32
+ }
33
+
34
+ Models::Flatbuffers::Schema.new(attributes)
35
+ end
36
+
37
+ private
38
+
39
+ def extract_namespace(elements)
40
+ ns_element = elements.find { |el| el.key?(:namespace) }
41
+ return nil unless ns_element
42
+
43
+ # Namespace is array of identifiers
44
+ ns_parts = Array(ns_element[:namespace])
45
+ names = ns_parts.filter_map do |part|
46
+ if part[:namespace]
47
+ part[:namespace][:identifier].to_s
48
+ elsif part[:identifier]
49
+ part[:identifier].to_s
50
+ end
51
+ end
52
+
53
+ names.join(".")
54
+ end
55
+
56
+ def extract_includes(elements)
57
+ elements.select { |el| el.key?(:include) }.map do |el|
58
+ el[:include][:include][:string].to_s
59
+ end
60
+ end
61
+
62
+ def extract_tables(elements)
63
+ elements.select { |el| el.key?(:table) }.map do |el|
64
+ process_table(el[:table])
65
+ end
66
+ end
67
+
68
+ def extract_structs(elements)
69
+ elements.select { |el| el.key?(:struct) }.map do |el|
70
+ process_struct(el[:struct])
71
+ end
72
+ end
73
+
74
+ def extract_enums(elements)
75
+ elements.select { |el| el.key?(:enum) }.map do |el|
76
+ process_enum(el[:enum])
77
+ end
78
+ end
79
+
80
+ def extract_unions(elements)
81
+ elements.select { |el| el.key?(:union) }.map do |el|
82
+ process_union(el[:union])
83
+ end
84
+ end
85
+
86
+ def extract_root_type(elements)
87
+ root_element = elements.find { |el| el.key?(:root_type) }
88
+ return nil unless root_element
89
+
90
+ root_element[:root_type][:root_type][:identifier].to_s
91
+ end
92
+
93
+ def extract_file_identifier(elements)
94
+ fi_element = elements.find { |el| el.key?(:file_identifier) }
95
+ return nil unless fi_element
96
+
97
+ fi_element[:file_identifier][:file_identifier][:string].to_s
98
+ end
99
+
100
+ def extract_file_extension(elements)
101
+ fe_element = elements.find { |el| el.key?(:file_extension) }
102
+ return nil unless fe_element
103
+
104
+ fe_element[:file_extension][:file_extension][:string].to_s
105
+ end
106
+
107
+ def extract_attributes(elements)
108
+ elements.select { |el| el.key?(:attribute_decl) }.map do |el|
109
+ el[:attribute_decl][:attribute][:string].to_s
110
+ end
111
+ end
112
+
113
+ def process_table(table_data)
114
+ name = table_data[:table_name][:identifier].to_s
115
+ body = table_data[:body]
116
+ metadata = process_metadata(table_data[:metadata])
117
+
118
+ fields = extract_table_fields(body)
119
+
120
+ Models::Flatbuffers::TableDefinition.new(
121
+ name: name,
122
+ fields: fields,
123
+ metadata: metadata,
124
+ )
125
+ end
126
+
127
+ def process_struct(struct_data)
128
+ name = struct_data[:struct_name][:identifier].to_s
129
+ body = struct_data[:body]
130
+ metadata = process_metadata(struct_data[:metadata])
131
+
132
+ fields = extract_struct_fields(body)
133
+
134
+ Models::Flatbuffers::StructDefinition.new(
135
+ name: name,
136
+ fields: fields,
137
+ metadata: metadata,
138
+ )
139
+ end
140
+
141
+ def extract_table_fields(body)
142
+ return [] unless body
143
+
144
+ Array(body).select do |el|
145
+ el.respond_to?(:key?) && el.key?(:field)
146
+ end.map do |el|
147
+ process_field(el[:field])
148
+ end
149
+ end
150
+
151
+ def extract_struct_fields(body)
152
+ return [] unless body
153
+
154
+ Array(body).select do |el|
155
+ el.respond_to?(:key?) && el.key?(:field)
156
+ end.map do |el|
157
+ process_field(el[:field])
158
+ end
159
+ end
160
+
161
+ def process_field(field_data)
162
+ name = field_data[:name][:identifier].to_s
163
+ type = process_field_type(field_data[:type])
164
+ default_value = process_default_value(field_data[:default])
165
+ metadata = process_metadata(field_data[:metadata])
166
+
167
+ Models::Flatbuffers::FieldDefinition.new(
168
+ name: name,
169
+ type: type,
170
+ default_value: default_value,
171
+ metadata: metadata,
172
+ )
173
+ end
174
+
175
+ def process_field_type(type_data)
176
+ if type_data[:vector]
177
+ # Vector type: [element_type]
178
+ element_type = if type_data[:vector][:scalar_type]
179
+ type_data[:vector][:scalar_type].to_s
180
+ else
181
+ type_data[:vector][:user_type][:identifier].to_s
182
+ end
183
+ { vector: element_type }
184
+ elsif type_data[:scalar_type]
185
+ type_data[:scalar_type].to_s
186
+ else
187
+ type_data[:user_type][:identifier].to_s
188
+ end
189
+ end
190
+
191
+ def process_default_value(default_data)
192
+ return nil unless default_data
193
+
194
+ if default_data[:number]
195
+ val = default_data[:number].to_s
196
+ val.include?(".") ? val.to_f : val.to_i
197
+ elsif default_data[:bool]
198
+ default_data[:bool].to_s == "true"
199
+ elsif default_data[:string]
200
+ default_data[:string].to_s
201
+ elsif default_data[:enum_value]
202
+ default_data[:enum_value][:identifier].to_s
203
+ end
204
+ end
205
+
206
+ def process_metadata(metadata_data)
207
+ return {} unless metadata_data
208
+
209
+ result = {}
210
+ attrs = Array(metadata_data).select { |el| el.key?(:attr) }
211
+
212
+ attrs.each do |attr_el|
213
+ attr = attr_el[:attr]
214
+ name = attr[:name][:identifier].to_s.to_sym
215
+
216
+ value = if attr[:value]
217
+ process_metadata_value(attr[:value])
218
+ else
219
+ true
220
+ end
221
+
222
+ result[name] = value
223
+ end
224
+
225
+ result
226
+ end
227
+
228
+ def process_metadata_value(value_data)
229
+ if value_data[:number]
230
+ val = value_data[:number].to_s
231
+ val.include?(".") ? val.to_f : val.to_i
232
+ elsif value_data[:bool]
233
+ value_data[:bool].to_s == "true"
234
+ elsif value_data[:string]
235
+ value_data[:string].to_s
236
+ elsif value_data[:identifier]
237
+ value_data[:identifier].to_s
238
+ else
239
+ value_data.to_s
240
+ end
241
+ end
242
+
243
+ def process_enum(enum_data)
244
+ name = enum_data[:enum_name][:identifier].to_s
245
+ type = if enum_data[:enum_type]
246
+ enum_data[:enum_type][:enum_type].to_s
247
+ else
248
+ "int"
249
+ end
250
+ metadata = process_metadata(enum_data[:metadata])
251
+ values = {}
252
+
253
+ last_value = -1
254
+ Array(enum_data[:values]).each do |val_el|
255
+ next unless val_el.respond_to?(:key?) && val_el.key?(:value)
256
+
257
+ val = val_el[:value]
258
+ val_name = val[:name][:identifier].to_s
259
+
260
+ val_num = if val[:value]
261
+ val[:value][:number].to_s.to_i
262
+ else
263
+ last_value + 1
264
+ end
265
+
266
+ values[val_name] = val_num
267
+ last_value = val_num
268
+ end
269
+
270
+ Models::Flatbuffers::EnumDefinition.new(
271
+ name: name,
272
+ type: type,
273
+ values: values,
274
+ metadata: metadata,
275
+ )
276
+ end
277
+
278
+ def process_union(union_data)
279
+ name = union_data[:union_name][:identifier].to_s
280
+ metadata = process_metadata(union_data[:metadata])
281
+ types = []
282
+
283
+ Array(union_data[:types]).each do |type_el|
284
+ next unless type_el.respond_to?(:key?) && type_el.key?(:type)
285
+
286
+ types << type_el[:type][:type][:identifier].to_s
287
+ end
288
+
289
+ Models::Flatbuffers::UnionDefinition.new(
290
+ name: name,
291
+ types: types,
292
+ metadata: metadata,
293
+ )
294
+ end
295
+ end
296
+ end
297
+ end
298
+ end
299
+ end
@@ -72,7 +72,7 @@ module Unibuf
72
72
 
73
73
  # Numbers - with optional sign
74
74
  rule(:signed_number) do
75
- str("-") >> whitespace? >> (float_token | hex_int | oct_int | dec_int)
75
+ (str("-") >> whitespace? >> (float_token | hex_int | oct_int | dec_int)).as(:negative)
76
76
  end
77
77
  rule(:unsigned_number) { float_token | hex_int | oct_int | dec_int }
78
78
  rule(:number) { signed_number | unsigned_number }
@@ -60,6 +60,7 @@ module Unibuf
60
60
  end
61
61
 
62
62
  # Process a value (polymorphic)
63
+ # rubocop:disable Metrics/MethodLength
63
64
  def process_value(value)
64
65
  return nil unless value
65
66
  return value.to_s if value.is_a?(String)
@@ -74,6 +75,14 @@ module Unibuf
74
75
 
75
76
  return nil unless value.respond_to?(:[])
76
77
 
78
+ # Handle negative numbers
79
+ if value[:negative]
80
+ inner_value = process_value(value[:negative])
81
+ return -inner_value if inner_value.is_a?(Numeric)
82
+
83
+ return inner_value
84
+ end
85
+
77
86
  if value[:string]
78
87
  # Single string
79
88
  extract_and_unescape_string(value[:string])
@@ -103,6 +112,7 @@ module Unibuf
103
112
  value.to_s
104
113
  end
105
114
  end
115
+ # rubocop:enable Metrics/MethodLength
106
116
 
107
117
  # Extract and unescape a string token
108
118
  def extract_and_unescape_string(str_token)
@@ -0,0 +1,218 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Unibuf
4
+ module Serializers
5
+ # Binary Protocol Buffer wire format serializer
6
+ # Implements Protocol Buffers binary encoding specification
7
+ # Reference: https://protobuf.dev/programming-guides/encoding/
8
+ class BinarySerializer
9
+ attr_reader :schema
10
+
11
+ # Wire types (must match parser)
12
+ WIRE_TYPE_VARINT = 0
13
+ WIRE_TYPE_64BIT = 1
14
+ WIRE_TYPE_LENGTH_DELIMITED = 2
15
+ WIRE_TYPE_32BIT = 5
16
+
17
+ def initialize(schema)
18
+ @schema = schema
19
+ end
20
+
21
+ # Serialize a Message to binary Protocol Buffer format
22
+ # @param message [Models::Message] The message to serialize
23
+ # @param message_type [String] The message type name from schema
24
+ # @return [String] Binary data
25
+ def serialize(message, message_type: nil)
26
+ raise ArgumentError, "Message cannot be nil" if message.nil?
27
+
28
+ # Find message definition
29
+ msg_def = find_message_definition(message_type)
30
+ unless msg_def
31
+ raise ArgumentError,
32
+ "Message type required or schema must have exactly one message"
33
+ end
34
+
35
+ # Serialize all fields
36
+ serialize_fields(message, msg_def)
37
+ end
38
+
39
+ def serialize_to_file(message, path, message_type: nil)
40
+ binary_data = serialize(message, message_type: message_type)
41
+ File.binwrite(path, binary_data)
42
+ end
43
+
44
+ private
45
+
46
+ def find_message_definition(type_name)
47
+ return schema.messages.first if type_name.nil? && schema.messages.size == 1
48
+
49
+ schema.find_message(type_name)
50
+ end
51
+
52
+ def serialize_fields(message, msg_def)
53
+ output = (+"").force_encoding(Encoding::BINARY)
54
+
55
+ message.fields.each do |field|
56
+ # Find field definition
57
+ field_def = msg_def.fields.find { |fd| fd.name == field.name }
58
+ next unless field_def # Skip unknown fields
59
+
60
+ # Encode field
61
+ encoded_field = encode_field(field, field_def)
62
+ output << encoded_field if encoded_field
63
+ end
64
+
65
+ output
66
+ end
67
+
68
+ def encode_field(field, field_def)
69
+ # Determine wire type based on field type
70
+ wire_type = wire_type_for_field(field_def)
71
+ return nil unless wire_type
72
+
73
+ # Encode tag (field_number << 3) | wire_type
74
+ tag = (field_def.number << 3) | wire_type
75
+ output = encode_varint(tag)
76
+
77
+ # Encode value based on wire type
78
+ output << encode_field_value(field, field_def, wire_type)
79
+
80
+ output
81
+ end
82
+
83
+ def wire_type_for_field(field_def)
84
+ case field_def.type
85
+ when "bool", "int32", "int64", "uint32", "uint64", "sint32", "sint64"
86
+ WIRE_TYPE_VARINT
87
+ when "fixed64", "sfixed64", "double"
88
+ WIRE_TYPE_64BIT
89
+ when "string", "bytes"
90
+ WIRE_TYPE_LENGTH_DELIMITED
91
+ when "fixed32", "sfixed32", "float"
92
+ WIRE_TYPE_32BIT
93
+ else
94
+ # Assume it's a message type
95
+ WIRE_TYPE_LENGTH_DELIMITED
96
+ end
97
+ end
98
+
99
+ def encode_field_value(field, field_def, wire_type)
100
+ case wire_type
101
+ when WIRE_TYPE_VARINT
102
+ encode_varint_value(field, field_def)
103
+ when WIRE_TYPE_64BIT
104
+ encode_64bit_value(field, field_def)
105
+ when WIRE_TYPE_LENGTH_DELIMITED
106
+ encode_length_delimited_value(field, field_def)
107
+ when WIRE_TYPE_32BIT
108
+ encode_32bit_value(field, field_def)
109
+ else
110
+ raise SerializationError, "Unsupported wire type: #{wire_type}"
111
+ end
112
+ end
113
+
114
+ # Encode varint values
115
+ def encode_varint_value(field, field_def)
116
+ value = field.value
117
+
118
+ case field_def.type
119
+ when "bool"
120
+ encode_varint(value ? 1 : 0)
121
+ when "int32", "int64", "uint32", "uint64"
122
+ encode_varint(value)
123
+ when "sint32"
124
+ encode_varint(encode_zigzag_32(value))
125
+ when "sint64"
126
+ encode_varint(encode_zigzag_64(value))
127
+ else
128
+ encode_varint(value)
129
+ end
130
+ end
131
+
132
+ # Encode 64-bit values
133
+ def encode_64bit_value(field, field_def)
134
+ value = field.value
135
+
136
+ case field_def.type
137
+ when "fixed64"
138
+ [value].pack("Q<")
139
+ when "sfixed64"
140
+ [value].pack("q<")
141
+ when "double"
142
+ [value].pack("E")
143
+ else
144
+ [value].pack("Q<")
145
+ end
146
+ end
147
+
148
+ # Encode 32-bit values
149
+ def encode_32bit_value(field, field_def)
150
+ value = field.value
151
+
152
+ case field_def.type
153
+ when "fixed32"
154
+ [value].pack("L<")
155
+ when "sfixed32"
156
+ [value].pack("l<")
157
+ when "float"
158
+ [value].pack("e")
159
+ else
160
+ [value].pack("L<")
161
+ end
162
+ end
163
+
164
+ # Encode length-delimited values
165
+ def encode_length_delimited_value(field, field_def)
166
+ value = field.value
167
+
168
+ case field_def.type
169
+ when "string"
170
+ bytes = value.dup.force_encoding(Encoding::UTF_8)
171
+ encode_varint(bytes.bytesize) + bytes
172
+ when "bytes"
173
+ encode_varint(value.bytesize) + value
174
+ else
175
+ # Embedded message
176
+ nested_msg = field.as_message
177
+ nested_msg_def = schema.find_message(field_def.type)
178
+ unless nested_msg_def
179
+ raise SerializationError,
180
+ "Unknown message type: #{field_def.type}"
181
+ end
182
+
183
+ nested_bytes = serialize_fields(nested_msg, nested_msg_def)
184
+ encode_varint(nested_bytes.bytesize) + nested_bytes
185
+ end
186
+ end
187
+
188
+ # Encode variable-length integer
189
+ # Values 0-127: 1 byte
190
+ # Larger values: multiple bytes with continuation bit
191
+ def encode_varint(value)
192
+ return "\x00".b if value.zero?
193
+
194
+ output = (+"").force_encoding(Encoding::BINARY)
195
+
196
+ while value.positive?
197
+ byte = value & 0x7F
198
+ value >>= 7
199
+ byte |= 0x80 if value.positive?
200
+ output << byte.chr
201
+ end
202
+
203
+ output
204
+ end
205
+
206
+ # Encode ZigZag for signed 32-bit integers
207
+ # Maps signed integers to unsigned for efficient encoding
208
+ def encode_zigzag_32(value)
209
+ (value << 1) ^ (value >> 31)
210
+ end
211
+
212
+ # Encode ZigZag for signed 64-bit integers
213
+ def encode_zigzag_64(value)
214
+ (value << 1) ^ (value >> 63)
215
+ end
216
+ end
217
+ end
218
+ end