unibuf 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +178 -330
  3. data/CODE_OF_CONDUCT.md +132 -0
  4. data/README.adoc +443 -254
  5. data/docs/CAPNPROTO.adoc +436 -0
  6. data/docs/FLATBUFFERS.adoc +430 -0
  7. data/docs/PROTOBUF.adoc +515 -0
  8. data/docs/TXTPROTO.adoc +369 -0
  9. data/lib/unibuf/commands/convert.rb +60 -2
  10. data/lib/unibuf/commands/schema.rb +68 -11
  11. data/lib/unibuf/errors.rb +23 -26
  12. data/lib/unibuf/models/capnproto/enum_definition.rb +72 -0
  13. data/lib/unibuf/models/capnproto/field_definition.rb +81 -0
  14. data/lib/unibuf/models/capnproto/interface_definition.rb +70 -0
  15. data/lib/unibuf/models/capnproto/method_definition.rb +81 -0
  16. data/lib/unibuf/models/capnproto/schema.rb +84 -0
  17. data/lib/unibuf/models/capnproto/struct_definition.rb +96 -0
  18. data/lib/unibuf/models/capnproto/union_definition.rb +62 -0
  19. data/lib/unibuf/models/flatbuffers/enum_definition.rb +69 -0
  20. data/lib/unibuf/models/flatbuffers/field_definition.rb +88 -0
  21. data/lib/unibuf/models/flatbuffers/schema.rb +102 -0
  22. data/lib/unibuf/models/flatbuffers/struct_definition.rb +70 -0
  23. data/lib/unibuf/models/flatbuffers/table_definition.rb +73 -0
  24. data/lib/unibuf/models/flatbuffers/union_definition.rb +60 -0
  25. data/lib/unibuf/models/message.rb +10 -0
  26. data/lib/unibuf/models/values/scalar_value.rb +2 -2
  27. data/lib/unibuf/parsers/binary/wire_format_parser.rb +199 -19
  28. data/lib/unibuf/parsers/capnproto/binary_parser.rb +267 -0
  29. data/lib/unibuf/parsers/capnproto/grammar.rb +272 -0
  30. data/lib/unibuf/parsers/capnproto/list_reader.rb +208 -0
  31. data/lib/unibuf/parsers/capnproto/pointer_decoder.rb +163 -0
  32. data/lib/unibuf/parsers/capnproto/processor.rb +348 -0
  33. data/lib/unibuf/parsers/capnproto/segment_reader.rb +131 -0
  34. data/lib/unibuf/parsers/capnproto/struct_reader.rb +199 -0
  35. data/lib/unibuf/parsers/flatbuffers/binary_parser.rb +325 -0
  36. data/lib/unibuf/parsers/flatbuffers/grammar.rb +235 -0
  37. data/lib/unibuf/parsers/flatbuffers/processor.rb +299 -0
  38. data/lib/unibuf/parsers/textproto/grammar.rb +1 -1
  39. data/lib/unibuf/parsers/textproto/processor.rb +10 -0
  40. data/lib/unibuf/serializers/binary_serializer.rb +218 -0
  41. data/lib/unibuf/serializers/capnproto/binary_serializer.rb +402 -0
  42. data/lib/unibuf/serializers/capnproto/list_writer.rb +199 -0
  43. data/lib/unibuf/serializers/capnproto/pointer_encoder.rb +118 -0
  44. data/lib/unibuf/serializers/capnproto/segment_builder.rb +124 -0
  45. data/lib/unibuf/serializers/capnproto/struct_writer.rb +139 -0
  46. data/lib/unibuf/serializers/flatbuffers/binary_serializer.rb +167 -0
  47. data/lib/unibuf/validators/type_validator.rb +1 -1
  48. data/lib/unibuf/version.rb +1 -1
  49. data/lib/unibuf.rb +27 -0
  50. metadata +36 -1
@@ -0,0 +1,102 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Unibuf
4
+ module Models
5
+ module Flatbuffers
6
+ # Represents a FlatBuffers schema (.fbs file)
7
+ class Schema
8
+ attr_reader :namespace, :includes, :tables, :structs, :enums, :unions,
9
+ :root_type, :file_identifier, :file_extension, :attributes
10
+
11
+ def initialize(attributes = {})
12
+ @namespace = attributes[:namespace] || attributes["namespace"]
13
+ @includes = Array(attributes[:includes] || attributes["includes"])
14
+ @tables = Array(attributes[:tables] || attributes["tables"])
15
+ @structs = Array(attributes[:structs] || attributes["structs"])
16
+ @enums = Array(attributes[:enums] || attributes["enums"])
17
+ @unions = Array(attributes[:unions] || attributes["unions"])
18
+ @root_type = attributes[:root_type] || attributes["root_type"]
19
+ @file_identifier = attributes[:file_identifier] || attributes["file_identifier"]
20
+ @file_extension = attributes[:file_extension] || attributes["file_extension"]
21
+ @attributes = Array(attributes[:attributes] || attributes["attributes"])
22
+ end
23
+
24
+ # Queries
25
+ def find_table(name)
26
+ tables.find { |t| t.name == name }
27
+ end
28
+
29
+ def find_struct(name)
30
+ structs.find { |s| s.name == name }
31
+ end
32
+
33
+ def find_enum(name)
34
+ enums.find { |e| e.name == name }
35
+ end
36
+
37
+ def find_union(name)
38
+ unions.find { |u| u.name == name }
39
+ end
40
+
41
+ def find_type(name)
42
+ find_table(name) || find_struct(name) || find_enum(name) || find_union(name)
43
+ end
44
+
45
+ def table_names
46
+ tables.map(&:name)
47
+ end
48
+
49
+ def struct_names
50
+ structs.map(&:name)
51
+ end
52
+
53
+ def enum_names
54
+ enums.map(&:name)
55
+ end
56
+
57
+ def union_names
58
+ unions.map(&:name)
59
+ end
60
+
61
+ # Validation
62
+ def valid?
63
+ validate!
64
+ true
65
+ rescue ValidationError
66
+ false
67
+ end
68
+
69
+ def validate!
70
+ raise ValidationError, "Root type required" unless root_type
71
+
72
+ unless find_table(root_type)
73
+ raise ValidationError,
74
+ "Root type '#{root_type}' not found"
75
+ end
76
+
77
+ tables.each(&:validate!)
78
+ structs.each(&:validate!)
79
+ enums.each(&:validate!)
80
+ unions.each(&:validate!)
81
+
82
+ true
83
+ end
84
+
85
+ def to_h
86
+ {
87
+ namespace: namespace,
88
+ includes: includes,
89
+ tables: tables.map(&:to_h),
90
+ structs: structs.map(&:to_h),
91
+ enums: enums.map(&:to_h),
92
+ unions: unions.map(&:to_h),
93
+ root_type: root_type,
94
+ file_identifier: file_identifier,
95
+ file_extension: file_extension,
96
+ attributes: attributes,
97
+ }
98
+ end
99
+ end
100
+ end
101
+ end
102
+ end
@@ -0,0 +1,70 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Unibuf
4
+ module Models
5
+ module Flatbuffers
6
+ # Represents a FlatBuffers struct definition
7
+ # Structs are fixed-size, stored inline (no vtable)
8
+ class StructDefinition
9
+ attr_reader :name, :fields, :metadata
10
+
11
+ def initialize(attributes = {})
12
+ @name = attributes[:name] || attributes["name"]
13
+ @fields = Array(attributes[:fields] || attributes["fields"])
14
+ @metadata = attributes[:metadata] || attributes["metadata"] || {}
15
+ end
16
+
17
+ # Queries
18
+ def find_field(field_name)
19
+ fields.find { |f| f.name == field_name }
20
+ end
21
+
22
+ def field_names
23
+ fields.map(&:name)
24
+ end
25
+
26
+ # Classification
27
+ def fixed_size?
28
+ # Structs are always fixed size
29
+ true
30
+ end
31
+
32
+ # Validation
33
+ def valid?
34
+ validate!
35
+ true
36
+ rescue ValidationError
37
+ false
38
+ end
39
+
40
+ def validate!
41
+ raise ValidationError, "Struct name required" unless name
42
+
43
+ if fields.empty?
44
+ raise ValidationError,
45
+ "Struct must have at least one field"
46
+ end
47
+
48
+ # Structs cannot contain vectors or other non-scalar types
49
+ fields.each do |field|
50
+ field.validate!
51
+ if field.vector?
52
+ raise ValidationError,
53
+ "Struct '#{name}' field '#{field.name}' cannot be a vector"
54
+ end
55
+ end
56
+
57
+ true
58
+ end
59
+
60
+ def to_h
61
+ {
62
+ name: name,
63
+ fields: fields.map(&:to_h),
64
+ metadata: metadata,
65
+ }
66
+ end
67
+ end
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,73 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Unibuf
4
+ module Models
5
+ module Flatbuffers
6
+ # Represents a FlatBuffers table definition
7
+ class TableDefinition
8
+ attr_reader :name, :fields, :metadata
9
+
10
+ def initialize(attributes = {})
11
+ @name = attributes[:name] || attributes["name"]
12
+ @fields = Array(attributes[:fields] || attributes["fields"])
13
+ @metadata = attributes[:metadata] || attributes["metadata"] || {}
14
+ end
15
+
16
+ # Queries
17
+ def find_field(field_name)
18
+ fields.find { |f| f.name == field_name }
19
+ end
20
+
21
+ def field_names
22
+ fields.map(&:name)
23
+ end
24
+
25
+ def has_metadata?(key)
26
+ metadata.key?(key)
27
+ end
28
+
29
+ def get_metadata(key)
30
+ metadata[key]
31
+ end
32
+
33
+ # Classification
34
+ def has_vectors?
35
+ fields.any?(&:vector?)
36
+ end
37
+
38
+ def has_nested_tables?
39
+ fields.any? { |f| f.type_kind == :table }
40
+ end
41
+
42
+ # Validation
43
+ def valid?
44
+ validate!
45
+ true
46
+ rescue ValidationError
47
+ false
48
+ end
49
+
50
+ def validate!
51
+ raise ValidationError, "Table name required" unless name
52
+
53
+ if fields.empty?
54
+ raise ValidationError,
55
+ "Table must have at least one field"
56
+ end
57
+
58
+ fields.each(&:validate!)
59
+
60
+ true
61
+ end
62
+
63
+ def to_h
64
+ {
65
+ name: name,
66
+ fields: fields.map(&:to_h),
67
+ metadata: metadata,
68
+ }
69
+ end
70
+ end
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Unibuf
4
+ module Models
5
+ module Flatbuffers
6
+ # Represents a FlatBuffers union definition
7
+ # Unions represent a choice between multiple table types
8
+ class UnionDefinition
9
+ attr_reader :name, :types, :metadata
10
+
11
+ def initialize(attributes = {})
12
+ @name = attributes[:name] || attributes["name"]
13
+ @types = Array(attributes[:types] || attributes["types"])
14
+ @metadata = attributes[:metadata] || attributes["metadata"] || {}
15
+ end
16
+
17
+ # Queries
18
+ def includes_type?(type_name)
19
+ types.include?(type_name)
20
+ end
21
+
22
+ def type_count
23
+ types.size
24
+ end
25
+
26
+ # Validation
27
+ def valid?
28
+ validate!
29
+ true
30
+ rescue ValidationError
31
+ false
32
+ end
33
+
34
+ def validate!
35
+ raise ValidationError, "Union name required" unless name
36
+
37
+ if types.empty?
38
+ raise ValidationError,
39
+ "Union must have at least one type"
40
+ end
41
+
42
+ # Check for duplicate types
43
+ if types.uniq.size != types.size
44
+ raise ValidationError, "Union '#{name}' has duplicate types"
45
+ end
46
+
47
+ true
48
+ end
49
+
50
+ def to_h
51
+ {
52
+ name: name,
53
+ types: types,
54
+ metadata: metadata,
55
+ }
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
@@ -182,6 +182,16 @@ module Unibuf
182
182
  lines.join("\n")
183
183
  end
184
184
 
185
+ # Serialize to binary Protocol Buffer format
186
+ # @param schema [Models::Schema] The schema defining the message structure
187
+ # @param message_type [String] The message type name from schema
188
+ # @return [String] Binary data
189
+ def to_binary(schema:, message_type: nil)
190
+ require_relative "../serializers/binary_serializer"
191
+ serializer = Serializers::BinarySerializer.new(schema)
192
+ serializer.serialize(self, message_type: message_type)
193
+ end
194
+
185
195
  # Comparison
186
196
  def ==(other)
187
197
  return false unless other.is_a?(Message)
@@ -58,8 +58,8 @@ module Unibuf
58
58
  return raw_value if boolean?
59
59
  return true if string? && %w[true t 1].include?(raw_value.downcase)
60
60
  return false if string? && %w[false f 0].include?(raw_value.downcase)
61
- return true if raw_value == 1
62
- return false if raw_value.zero?
61
+ return true if integer? && raw_value == 1
62
+ return false if integer? && raw_value.zero?
63
63
 
64
64
  raise TypeCoercionError,
65
65
  "Cannot convert #{raw_value.class} to Boolean"
@@ -1,41 +1,221 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "bindata"
4
+
3
5
  module Unibuf
4
6
  module Parsers
5
7
  module Binary
6
8
  # Binary Protocol Buffer wire format parser
7
- # Requires bindata gem for implementation
8
- #
9
- # TODO: Implement wire format parsing using bindata
9
+ # Implements Protocol Buffers binary encoding specification
10
10
  # Reference: https://protobuf.dev/programming-guides/encoding/
11
11
  class WireFormatParser
12
12
  attr_reader :schema
13
13
 
14
+ # Wire types
15
+ WIRE_TYPE_VARINT = 0
16
+ WIRE_TYPE_64BIT = 1
17
+ WIRE_TYPE_LENGTH_DELIMITED = 2
18
+ WIRE_TYPE_START_GROUP = 3 # Deprecated
19
+ WIRE_TYPE_END_GROUP = 4 # Deprecated
20
+ WIRE_TYPE_32BIT = 5
21
+
14
22
  def initialize(schema)
15
23
  @schema = schema
16
24
  end
17
25
 
18
- def parse(binary_data)
19
- raise NotImplementedError, <<~MSG
20
- Binary Protocol Buffer parsing not yet implemented.
26
+ # Parse binary Protocol Buffer data
27
+ # @param binary_data [String] Binary data
28
+ # @return [Models::Message] Parsed message
29
+ def parse(binary_data, message_type: nil)
30
+ raise ArgumentError, "Binary data cannot be nil" if binary_data.nil?
31
+
32
+ if binary_data.empty?
33
+ raise ArgumentError,
34
+ "Binary data cannot be empty"
35
+ end
36
+
37
+ # Find message definition
38
+ msg_def = find_message_definition(message_type)
39
+ unless msg_def
40
+ raise ArgumentError,
41
+ "Message type required or schema must have exactly one message"
42
+ end
43
+
44
+ begin
45
+ # Parse fields from binary
46
+ fields = parse_fields(binary_data, msg_def)
47
+
48
+ # Build Message model
49
+ Models::Message.new("fields" => fields)
50
+ rescue EOFError => e
51
+ raise ParseError, "Unexpected end of data: #{e.message}"
52
+ end
53
+ end
54
+
55
+ def parse_file(path, message_type: nil)
56
+ parse(File.binread(path), message_type: message_type)
57
+ end
58
+
59
+ private
21
60
 
22
- This feature requires:
23
- 1. bindata gem integration
24
- 2. Wire format decoder
25
- 3. Schema-driven field extraction
26
- 4. Type deserialization
61
+ def find_message_definition(type_name)
62
+ return schema.messages.first if type_name.nil? && schema.messages.size == 1
63
+
64
+ schema.find_message(type_name)
65
+ end
66
+
67
+ def parse_fields(data, msg_def)
68
+ fields = []
69
+ io = StringIO.new(data)
70
+ io.set_encoding(Encoding::BINARY)
71
+
72
+ until io.eof?
73
+ begin
74
+ # Read field tag
75
+ tag = read_varint(io)
76
+ field_number = tag >> 3
77
+ wire_type = tag & 0x7
78
+
79
+ # Find field definition
80
+ field_def = msg_def.find_field_by_number(field_number)
81
+ next unless field_def # Skip unknown fields
82
+
83
+ # Parse field value based on wire type
84
+ value = parse_field_value(io, wire_type, field_def)
85
+
86
+ fields << {
87
+ "name" => field_def.name,
88
+ "value" => value,
89
+ }
90
+ rescue EOFError => e
91
+ raise ParseError, "Incomplete field data: #{e.message}"
92
+ end
93
+ end
94
+
95
+ fields
96
+ end
27
97
 
28
- Current implementation: Text format only
29
- Roadmap: Binary support in v2.0.0
98
+ def parse_field_value(io, wire_type, field_def)
99
+ case wire_type
100
+ when WIRE_TYPE_VARINT
101
+ parse_varint_value(io, field_def)
102
+ when WIRE_TYPE_64BIT
103
+ parse_64bit_value(io, field_def)
104
+ when WIRE_TYPE_LENGTH_DELIMITED
105
+ parse_length_delimited_value(io, field_def)
106
+ when WIRE_TYPE_32BIT
107
+ parse_32bit_value(io, field_def)
108
+ else
109
+ raise ParseError, "Unsupported wire type: #{wire_type}"
110
+ end
111
+ end
112
+
113
+ def parse_varint_value(io, field_def)
114
+ value = read_varint(io)
115
+
116
+ case field_def.type
117
+ when "bool"
118
+ value != 0
119
+ when "int32", "int64", "uint32", "uint64"
120
+ value
121
+ when "sint32"
122
+ decode_zigzag_32(value)
123
+ when "sint64"
124
+ decode_zigzag_64(value)
125
+ else
126
+ value
127
+ end
128
+ end
129
+
130
+ def parse_64bit_value(io, field_def)
131
+ bytes = io.read(8)
132
+ unless bytes && bytes.bytesize == 8
133
+ raise ParseError,
134
+ "Unexpected EOF reading 64-bit value"
135
+ end
136
+
137
+ case field_def.type
138
+ when "fixed64"
139
+ bytes.unpack1("Q<")
140
+ when "sfixed64"
141
+ bytes.unpack1("q<")
142
+ when "double"
143
+ bytes.unpack1("E")
144
+ else
145
+ bytes.unpack1("Q<")
146
+ end
147
+ end
148
+
149
+ def parse_32bit_value(io, field_def)
150
+ bytes = io.read(4)
151
+ unless bytes && bytes.bytesize == 4
152
+ raise ParseError,
153
+ "Unexpected EOF reading 32-bit value"
154
+ end
155
+
156
+ case field_def.type
157
+ when "fixed32"
158
+ bytes.unpack1("L<")
159
+ when "sfixed32"
160
+ bytes.unpack1("l<")
161
+ when "float"
162
+ bytes.unpack1("e")
163
+ else
164
+ bytes.unpack1("L<")
165
+ end
166
+ end
167
+
168
+ def parse_length_delimited_value(io, field_def)
169
+ length = read_varint(io)
170
+ bytes = io.read(length)
171
+ unless bytes && bytes.bytesize == length
172
+ raise ParseError,
173
+ "Unexpected EOF reading length-delimited value"
174
+ end
175
+
176
+ case field_def.type
177
+ when "string"
178
+ bytes.force_encoding(Encoding::UTF_8)
179
+ when "bytes"
180
+ bytes
181
+ else
182
+ # Embedded message
183
+ nested_msg_def = schema.find_message(field_def.type)
184
+ if nested_msg_def
185
+ nested_fields = parse_fields(bytes, nested_msg_def)
186
+ { "fields" => nested_fields }
187
+ else
188
+ bytes
189
+ end
190
+ end
191
+ end
192
+
193
+ # Read varint (variable-length integer)
194
+ def read_varint(io)
195
+ result = 0
196
+ shift = 0
197
+
198
+ loop do
199
+ byte = io.readbyte
200
+ result |= (byte & 0x7F) << shift
201
+ break if byte.nobits?(0x80)
202
+
203
+ shift += 7
204
+ raise ParseError, "Varint too long" if shift >= 64
205
+ end
206
+
207
+ result
208
+ rescue EOFError => e
209
+ raise ParseError, "Unexpected EOF reading varint: #{e.message}"
210
+ end
30
211
 
31
- For now, use text format:
32
- Unibuf.parse_textproto(text_content)
33
- Unibuf.parse_textproto_file("file.txtpb")
34
- MSG
212
+ # Decode ZigZag encoding for signed integers
213
+ def decode_zigzag_32(value)
214
+ (value >> 1) ^ -(value & 1)
35
215
  end
36
216
 
37
- def parse_file(path)
38
- parse(File.binread(path))
217
+ def decode_zigzag_64(value)
218
+ (value >> 1) ^ -(value & 1)
39
219
  end
40
220
  end
41
221
  end