unibuf 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +133 -255
- data/README.adoc +217 -220
- data/lib/unibuf/models/values/scalar_value.rb +2 -2
- data/lib/unibuf/parsers/binary/wire_format_parser.rb +199 -19
- data/lib/unibuf/parsers/textproto/grammar.rb +1 -1
- data/lib/unibuf/parsers/textproto/processor.rb +10 -0
- data/lib/unibuf/validators/type_validator.rb +1 -1
- data/lib/unibuf/version.rb +1 -1
- metadata +1 -1
|
@@ -58,8 +58,8 @@ module Unibuf
|
|
|
58
58
|
return raw_value if boolean?
|
|
59
59
|
return true if string? && %w[true t 1].include?(raw_value.downcase)
|
|
60
60
|
return false if string? && %w[false f 0].include?(raw_value.downcase)
|
|
61
|
-
return true if raw_value == 1
|
|
62
|
-
return false if raw_value.zero?
|
|
61
|
+
return true if integer? && raw_value == 1
|
|
62
|
+
return false if integer? && raw_value.zero?
|
|
63
63
|
|
|
64
64
|
raise TypeCoercionError,
|
|
65
65
|
"Cannot convert #{raw_value.class} to Boolean"
|
|
@@ -1,41 +1,221 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require "bindata"
|
|
4
|
+
|
|
3
5
|
module Unibuf
|
|
4
6
|
module Parsers
|
|
5
7
|
module Binary
|
|
6
8
|
# Binary Protocol Buffer wire format parser
|
|
7
|
-
#
|
|
8
|
-
#
|
|
9
|
-
# TODO: Implement wire format parsing using bindata
|
|
9
|
+
# Implements Protocol Buffers binary encoding specification
|
|
10
10
|
# Reference: https://protobuf.dev/programming-guides/encoding/
|
|
11
11
|
class WireFormatParser
|
|
12
12
|
attr_reader :schema
|
|
13
13
|
|
|
14
|
+
# Wire types
|
|
15
|
+
WIRE_TYPE_VARINT = 0
|
|
16
|
+
WIRE_TYPE_64BIT = 1
|
|
17
|
+
WIRE_TYPE_LENGTH_DELIMITED = 2
|
|
18
|
+
WIRE_TYPE_START_GROUP = 3 # Deprecated
|
|
19
|
+
WIRE_TYPE_END_GROUP = 4 # Deprecated
|
|
20
|
+
WIRE_TYPE_32BIT = 5
|
|
21
|
+
|
|
14
22
|
def initialize(schema)
|
|
15
23
|
@schema = schema
|
|
16
24
|
end
|
|
17
25
|
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
26
|
+
# Parse binary Protocol Buffer data
|
|
27
|
+
# @param binary_data [String] Binary data
|
|
28
|
+
# @return [Models::Message] Parsed message
|
|
29
|
+
def parse(binary_data, message_type: nil)
|
|
30
|
+
raise ArgumentError, "Binary data cannot be nil" if binary_data.nil?
|
|
31
|
+
|
|
32
|
+
if binary_data.empty?
|
|
33
|
+
raise ArgumentError,
|
|
34
|
+
"Binary data cannot be empty"
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Find message definition
|
|
38
|
+
msg_def = find_message_definition(message_type)
|
|
39
|
+
unless msg_def
|
|
40
|
+
raise ArgumentError,
|
|
41
|
+
"Message type required or schema must have exactly one message"
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
begin
|
|
45
|
+
# Parse fields from binary
|
|
46
|
+
fields = parse_fields(binary_data, msg_def)
|
|
47
|
+
|
|
48
|
+
# Build Message model
|
|
49
|
+
Models::Message.new("fields" => fields)
|
|
50
|
+
rescue EOFError => e
|
|
51
|
+
raise ParseError, "Unexpected end of data: #{e.message}"
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def parse_file(path, message_type: nil)
|
|
56
|
+
parse(File.binread(path), message_type: message_type)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
private
|
|
21
60
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
61
|
+
def find_message_definition(type_name)
|
|
62
|
+
return schema.messages.first if type_name.nil? && schema.messages.size == 1
|
|
63
|
+
|
|
64
|
+
schema.find_message(type_name)
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def parse_fields(data, msg_def)
|
|
68
|
+
fields = []
|
|
69
|
+
io = StringIO.new(data)
|
|
70
|
+
io.set_encoding(Encoding::BINARY)
|
|
71
|
+
|
|
72
|
+
until io.eof?
|
|
73
|
+
begin
|
|
74
|
+
# Read field tag
|
|
75
|
+
tag = read_varint(io)
|
|
76
|
+
field_number = tag >> 3
|
|
77
|
+
wire_type = tag & 0x7
|
|
78
|
+
|
|
79
|
+
# Find field definition
|
|
80
|
+
field_def = msg_def.find_field_by_number(field_number)
|
|
81
|
+
next unless field_def # Skip unknown fields
|
|
82
|
+
|
|
83
|
+
# Parse field value based on wire type
|
|
84
|
+
value = parse_field_value(io, wire_type, field_def)
|
|
85
|
+
|
|
86
|
+
fields << {
|
|
87
|
+
"name" => field_def.name,
|
|
88
|
+
"value" => value,
|
|
89
|
+
}
|
|
90
|
+
rescue EOFError => e
|
|
91
|
+
raise ParseError, "Incomplete field data: #{e.message}"
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
fields
|
|
96
|
+
end
|
|
27
97
|
|
|
28
|
-
|
|
29
|
-
|
|
98
|
+
def parse_field_value(io, wire_type, field_def)
|
|
99
|
+
case wire_type
|
|
100
|
+
when WIRE_TYPE_VARINT
|
|
101
|
+
parse_varint_value(io, field_def)
|
|
102
|
+
when WIRE_TYPE_64BIT
|
|
103
|
+
parse_64bit_value(io, field_def)
|
|
104
|
+
when WIRE_TYPE_LENGTH_DELIMITED
|
|
105
|
+
parse_length_delimited_value(io, field_def)
|
|
106
|
+
when WIRE_TYPE_32BIT
|
|
107
|
+
parse_32bit_value(io, field_def)
|
|
108
|
+
else
|
|
109
|
+
raise ParseError, "Unsupported wire type: #{wire_type}"
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def parse_varint_value(io, field_def)
|
|
114
|
+
value = read_varint(io)
|
|
115
|
+
|
|
116
|
+
case field_def.type
|
|
117
|
+
when "bool"
|
|
118
|
+
value != 0
|
|
119
|
+
when "int32", "int64", "uint32", "uint64"
|
|
120
|
+
value
|
|
121
|
+
when "sint32"
|
|
122
|
+
decode_zigzag_32(value)
|
|
123
|
+
when "sint64"
|
|
124
|
+
decode_zigzag_64(value)
|
|
125
|
+
else
|
|
126
|
+
value
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def parse_64bit_value(io, field_def)
|
|
131
|
+
bytes = io.read(8)
|
|
132
|
+
unless bytes && bytes.bytesize == 8
|
|
133
|
+
raise ParseError,
|
|
134
|
+
"Unexpected EOF reading 64-bit value"
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
case field_def.type
|
|
138
|
+
when "fixed64"
|
|
139
|
+
bytes.unpack1("Q<")
|
|
140
|
+
when "sfixed64"
|
|
141
|
+
bytes.unpack1("q<")
|
|
142
|
+
when "double"
|
|
143
|
+
bytes.unpack1("E")
|
|
144
|
+
else
|
|
145
|
+
bytes.unpack1("Q<")
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
def parse_32bit_value(io, field_def)
|
|
150
|
+
bytes = io.read(4)
|
|
151
|
+
unless bytes && bytes.bytesize == 4
|
|
152
|
+
raise ParseError,
|
|
153
|
+
"Unexpected EOF reading 32-bit value"
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
case field_def.type
|
|
157
|
+
when "fixed32"
|
|
158
|
+
bytes.unpack1("L<")
|
|
159
|
+
when "sfixed32"
|
|
160
|
+
bytes.unpack1("l<")
|
|
161
|
+
when "float"
|
|
162
|
+
bytes.unpack1("e")
|
|
163
|
+
else
|
|
164
|
+
bytes.unpack1("L<")
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
def parse_length_delimited_value(io, field_def)
|
|
169
|
+
length = read_varint(io)
|
|
170
|
+
bytes = io.read(length)
|
|
171
|
+
unless bytes && bytes.bytesize == length
|
|
172
|
+
raise ParseError,
|
|
173
|
+
"Unexpected EOF reading length-delimited value"
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
case field_def.type
|
|
177
|
+
when "string"
|
|
178
|
+
bytes.force_encoding(Encoding::UTF_8)
|
|
179
|
+
when "bytes"
|
|
180
|
+
bytes
|
|
181
|
+
else
|
|
182
|
+
# Embedded message
|
|
183
|
+
nested_msg_def = schema.find_message(field_def.type)
|
|
184
|
+
if nested_msg_def
|
|
185
|
+
nested_fields = parse_fields(bytes, nested_msg_def)
|
|
186
|
+
{ "fields" => nested_fields }
|
|
187
|
+
else
|
|
188
|
+
bytes
|
|
189
|
+
end
|
|
190
|
+
end
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
# Read varint (variable-length integer)
|
|
194
|
+
def read_varint(io)
|
|
195
|
+
result = 0
|
|
196
|
+
shift = 0
|
|
197
|
+
|
|
198
|
+
loop do
|
|
199
|
+
byte = io.readbyte
|
|
200
|
+
result |= (byte & 0x7F) << shift
|
|
201
|
+
break if byte.nobits?(0x80)
|
|
202
|
+
|
|
203
|
+
shift += 7
|
|
204
|
+
raise ParseError, "Varint too long" if shift >= 64
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
result
|
|
208
|
+
rescue EOFError => e
|
|
209
|
+
raise ParseError, "Unexpected EOF reading varint: #{e.message}"
|
|
210
|
+
end
|
|
30
211
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
MSG
|
|
212
|
+
# Decode ZigZag encoding for signed integers
|
|
213
|
+
def decode_zigzag_32(value)
|
|
214
|
+
(value >> 1) ^ -(value & 1)
|
|
35
215
|
end
|
|
36
216
|
|
|
37
|
-
def
|
|
38
|
-
|
|
217
|
+
def decode_zigzag_64(value)
|
|
218
|
+
(value >> 1) ^ -(value & 1)
|
|
39
219
|
end
|
|
40
220
|
end
|
|
41
221
|
end
|
|
@@ -72,7 +72,7 @@ module Unibuf
|
|
|
72
72
|
|
|
73
73
|
# Numbers - with optional sign
|
|
74
74
|
rule(:signed_number) do
|
|
75
|
-
str("-") >> whitespace? >> (float_token | hex_int | oct_int | dec_int)
|
|
75
|
+
(str("-") >> whitespace? >> (float_token | hex_int | oct_int | dec_int)).as(:negative)
|
|
76
76
|
end
|
|
77
77
|
rule(:unsigned_number) { float_token | hex_int | oct_int | dec_int }
|
|
78
78
|
rule(:number) { signed_number | unsigned_number }
|
|
@@ -60,6 +60,7 @@ module Unibuf
|
|
|
60
60
|
end
|
|
61
61
|
|
|
62
62
|
# Process a value (polymorphic)
|
|
63
|
+
# rubocop:disable Metrics/MethodLength
|
|
63
64
|
def process_value(value)
|
|
64
65
|
return nil unless value
|
|
65
66
|
return value.to_s if value.is_a?(String)
|
|
@@ -74,6 +75,14 @@ module Unibuf
|
|
|
74
75
|
|
|
75
76
|
return nil unless value.respond_to?(:[])
|
|
76
77
|
|
|
78
|
+
# Handle negative numbers
|
|
79
|
+
if value[:negative]
|
|
80
|
+
inner_value = process_value(value[:negative])
|
|
81
|
+
return -inner_value if inner_value.is_a?(Numeric)
|
|
82
|
+
|
|
83
|
+
return inner_value
|
|
84
|
+
end
|
|
85
|
+
|
|
77
86
|
if value[:string]
|
|
78
87
|
# Single string
|
|
79
88
|
extract_and_unescape_string(value[:string])
|
|
@@ -103,6 +112,7 @@ module Unibuf
|
|
|
103
112
|
value.to_s
|
|
104
113
|
end
|
|
105
114
|
end
|
|
115
|
+
# rubocop:enable Metrics/MethodLength
|
|
106
116
|
|
|
107
117
|
# Extract and unescape a string token
|
|
108
118
|
def extract_and_unescape_string(str_token)
|
data/lib/unibuf/version.rb
CHANGED