unibuf 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -58,8 +58,8 @@ module Unibuf
58
58
  return raw_value if boolean?
59
59
  return true if string? && %w[true t 1].include?(raw_value.downcase)
60
60
  return false if string? && %w[false f 0].include?(raw_value.downcase)
61
- return true if raw_value == 1
62
- return false if raw_value.zero?
61
+ return true if integer? && raw_value == 1
62
+ return false if integer? && raw_value.zero?
63
63
 
64
64
  raise TypeCoercionError,
65
65
  "Cannot convert #{raw_value.class} to Boolean"
@@ -1,41 +1,221 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "bindata"
4
+
3
5
  module Unibuf
4
6
  module Parsers
5
7
  module Binary
6
8
  # Binary Protocol Buffer wire format parser
7
- # Requires bindata gem for implementation
8
- #
9
- # TODO: Implement wire format parsing using bindata
9
+ # Implements Protocol Buffers binary encoding specification
10
10
  # Reference: https://protobuf.dev/programming-guides/encoding/
11
11
  class WireFormatParser
12
12
  attr_reader :schema
13
13
 
14
+ # Wire types
15
+ WIRE_TYPE_VARINT = 0
16
+ WIRE_TYPE_64BIT = 1
17
+ WIRE_TYPE_LENGTH_DELIMITED = 2
18
+ WIRE_TYPE_START_GROUP = 3 # Deprecated
19
+ WIRE_TYPE_END_GROUP = 4 # Deprecated
20
+ WIRE_TYPE_32BIT = 5
21
+
14
22
  def initialize(schema)
15
23
  @schema = schema
16
24
  end
17
25
 
18
- def parse(binary_data)
19
- raise NotImplementedError, <<~MSG
20
- Binary Protocol Buffer parsing not yet implemented.
26
+ # Parse binary Protocol Buffer data
27
+ # @param binary_data [String] Binary data
28
+ # @return [Models::Message] Parsed message
29
+ def parse(binary_data, message_type: nil)
30
+ raise ArgumentError, "Binary data cannot be nil" if binary_data.nil?
31
+
32
+ if binary_data.empty?
33
+ raise ArgumentError,
34
+ "Binary data cannot be empty"
35
+ end
36
+
37
+ # Find message definition
38
+ msg_def = find_message_definition(message_type)
39
+ unless msg_def
40
+ raise ArgumentError,
41
+ "Message type required or schema must have exactly one message"
42
+ end
43
+
44
+ begin
45
+ # Parse fields from binary
46
+ fields = parse_fields(binary_data, msg_def)
47
+
48
+ # Build Message model
49
+ Models::Message.new("fields" => fields)
50
+ rescue EOFError => e
51
+ raise ParseError, "Unexpected end of data: #{e.message}"
52
+ end
53
+ end
54
+
55
+ def parse_file(path, message_type: nil)
56
+ parse(File.binread(path), message_type: message_type)
57
+ end
58
+
59
+ private
21
60
 
22
- This feature requires:
23
- 1. bindata gem integration
24
- 2. Wire format decoder
25
- 3. Schema-driven field extraction
26
- 4. Type deserialization
61
+ def find_message_definition(type_name)
62
+ return schema.messages.first if type_name.nil? && schema.messages.size == 1
63
+
64
+ schema.find_message(type_name)
65
+ end
66
+
67
+ def parse_fields(data, msg_def)
68
+ fields = []
69
+ io = StringIO.new(data)
70
+ io.set_encoding(Encoding::BINARY)
71
+
72
+ until io.eof?
73
+ begin
74
+ # Read field tag
75
+ tag = read_varint(io)
76
+ field_number = tag >> 3
77
+ wire_type = tag & 0x7
78
+
79
+ # Find field definition
80
+ field_def = msg_def.find_field_by_number(field_number)
81
+ next unless field_def # Skip unknown fields
82
+
83
+ # Parse field value based on wire type
84
+ value = parse_field_value(io, wire_type, field_def)
85
+
86
+ fields << {
87
+ "name" => field_def.name,
88
+ "value" => value,
89
+ }
90
+ rescue EOFError => e
91
+ raise ParseError, "Incomplete field data: #{e.message}"
92
+ end
93
+ end
94
+
95
+ fields
96
+ end
27
97
 
28
- Current implementation: Text format only
29
- Roadmap: Binary support in v2.0.0
98
+ def parse_field_value(io, wire_type, field_def)
99
+ case wire_type
100
+ when WIRE_TYPE_VARINT
101
+ parse_varint_value(io, field_def)
102
+ when WIRE_TYPE_64BIT
103
+ parse_64bit_value(io, field_def)
104
+ when WIRE_TYPE_LENGTH_DELIMITED
105
+ parse_length_delimited_value(io, field_def)
106
+ when WIRE_TYPE_32BIT
107
+ parse_32bit_value(io, field_def)
108
+ else
109
+ raise ParseError, "Unsupported wire type: #{wire_type}"
110
+ end
111
+ end
112
+
113
+ def parse_varint_value(io, field_def)
114
+ value = read_varint(io)
115
+
116
+ case field_def.type
117
+ when "bool"
118
+ value != 0
119
+ when "int32", "int64", "uint32", "uint64"
120
+ value
121
+ when "sint32"
122
+ decode_zigzag_32(value)
123
+ when "sint64"
124
+ decode_zigzag_64(value)
125
+ else
126
+ value
127
+ end
128
+ end
129
+
130
+ def parse_64bit_value(io, field_def)
131
+ bytes = io.read(8)
132
+ unless bytes && bytes.bytesize == 8
133
+ raise ParseError,
134
+ "Unexpected EOF reading 64-bit value"
135
+ end
136
+
137
+ case field_def.type
138
+ when "fixed64"
139
+ bytes.unpack1("Q<")
140
+ when "sfixed64"
141
+ bytes.unpack1("q<")
142
+ when "double"
143
+ bytes.unpack1("E")
144
+ else
145
+ bytes.unpack1("Q<")
146
+ end
147
+ end
148
+
149
+ def parse_32bit_value(io, field_def)
150
+ bytes = io.read(4)
151
+ unless bytes && bytes.bytesize == 4
152
+ raise ParseError,
153
+ "Unexpected EOF reading 32-bit value"
154
+ end
155
+
156
+ case field_def.type
157
+ when "fixed32"
158
+ bytes.unpack1("L<")
159
+ when "sfixed32"
160
+ bytes.unpack1("l<")
161
+ when "float"
162
+ bytes.unpack1("e")
163
+ else
164
+ bytes.unpack1("L<")
165
+ end
166
+ end
167
+
168
+ def parse_length_delimited_value(io, field_def)
169
+ length = read_varint(io)
170
+ bytes = io.read(length)
171
+ unless bytes && bytes.bytesize == length
172
+ raise ParseError,
173
+ "Unexpected EOF reading length-delimited value"
174
+ end
175
+
176
+ case field_def.type
177
+ when "string"
178
+ bytes.force_encoding(Encoding::UTF_8)
179
+ when "bytes"
180
+ bytes
181
+ else
182
+ # Embedded message
183
+ nested_msg_def = schema.find_message(field_def.type)
184
+ if nested_msg_def
185
+ nested_fields = parse_fields(bytes, nested_msg_def)
186
+ { "fields" => nested_fields }
187
+ else
188
+ bytes
189
+ end
190
+ end
191
+ end
192
+
193
+ # Read varint (variable-length integer)
194
+ def read_varint(io)
195
+ result = 0
196
+ shift = 0
197
+
198
+ loop do
199
+ byte = io.readbyte
200
+ result |= (byte & 0x7F) << shift
201
+ break if byte.nobits?(0x80)
202
+
203
+ shift += 7
204
+ raise ParseError, "Varint too long" if shift >= 64
205
+ end
206
+
207
+ result
208
+ rescue EOFError => e
209
+ raise ParseError, "Unexpected EOF reading varint: #{e.message}"
210
+ end
30
211
 
31
- For now, use text format:
32
- Unibuf.parse_textproto(text_content)
33
- Unibuf.parse_textproto_file("file.txtpb")
34
- MSG
212
+ # Decode ZigZag encoding for signed integers
213
+ def decode_zigzag_32(value)
214
+ (value >> 1) ^ -(value & 1)
35
215
  end
36
216
 
37
- def parse_file(path)
38
- parse(File.binread(path))
217
+ def decode_zigzag_64(value)
218
+ (value >> 1) ^ -(value & 1)
39
219
  end
40
220
  end
41
221
  end
@@ -72,7 +72,7 @@ module Unibuf
72
72
 
73
73
  # Numbers - with optional sign
74
74
  rule(:signed_number) do
75
- str("-") >> whitespace? >> (float_token | hex_int | oct_int | dec_int)
75
+ (str("-") >> whitespace? >> (float_token | hex_int | oct_int | dec_int)).as(:negative)
76
76
  end
77
77
  rule(:unsigned_number) { float_token | hex_int | oct_int | dec_int }
78
78
  rule(:number) { signed_number | unsigned_number }
@@ -60,6 +60,7 @@ module Unibuf
60
60
  end
61
61
 
62
62
  # Process a value (polymorphic)
63
+ # rubocop:disable Metrics/MethodLength
63
64
  def process_value(value)
64
65
  return nil unless value
65
66
  return value.to_s if value.is_a?(String)
@@ -74,6 +75,14 @@ module Unibuf
74
75
 
75
76
  return nil unless value.respond_to?(:[])
76
77
 
78
+ # Handle negative numbers
79
+ if value[:negative]
80
+ inner_value = process_value(value[:negative])
81
+ return -inner_value if inner_value.is_a?(Numeric)
82
+
83
+ return inner_value
84
+ end
85
+
77
86
  if value[:string]
78
87
  # Single string
79
88
  extract_and_unescape_string(value[:string])
@@ -103,6 +112,7 @@ module Unibuf
103
112
  value.to_s
104
113
  end
105
114
  end
115
+ # rubocop:enable Metrics/MethodLength
106
116
 
107
117
  # Extract and unescape a string token
108
118
  def extract_and_unescape_string(str_token)
@@ -61,7 +61,7 @@ module Unibuf
61
61
  def validate_message(message, schema = {})
62
62
  errors = []
63
63
 
64
- message.fields_array.each do |field|
64
+ Array(message.fields).each do |field|
65
65
  next unless schema.key?(field.name)
66
66
 
67
67
  expected_type = schema[field.name]
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Unibuf
4
- VERSION = "0.1.0"
4
+ VERSION = "0.1.1"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: unibuf
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ronald Tse