unibuf 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,141 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "parslet"
4
+
5
+ module Unibuf
6
+ module Parsers
7
+ module Textproto
8
+ # Parslet grammar for Protocol Buffers text format following the official spec
9
+ # Reference: https://protobuf.dev/reference/protobuf/textformat-spec/
10
+ class Grammar < Parslet::Parser
11
+ # ===== Lexical Elements =====
12
+
13
+ # Characters
14
+ rule(:newline) { str("\n") }
15
+ rule(:letter) { match["a-zA-Z_"] }
16
+ rule(:dec) { match["0-9"] }
17
+ rule(:oct) { match["0-7"] }
18
+ rule(:hex) { match["0-9a-fA-F"] }
19
+
20
+ # Whitespace and comments (# or //)
21
+ rule(:comment) do
22
+ (str("#") | str("//")) >> (newline.absent? >> any).repeat >> newline.maybe
23
+ end
24
+ rule(:whitespace) { match['\s'].repeat(1) | comment }
25
+ rule(:whitespace?) { whitespace.repeat }
26
+
27
+ # Identifiers
28
+ rule(:ident) { letter >> (letter | dec).repeat }
29
+ rule(:identifier) { ident.as(:identifier) }
30
+
31
+ # String literals
32
+ rule(:escape) do
33
+ str("\\") >> (
34
+ str("a") | str("b") | str("f") | str("n") | str("r") |
35
+ str("t") | str("v") | str("?") | str("\\") | str("'") |
36
+ str('"') |
37
+ (oct >> oct.maybe >> oct.maybe) |
38
+ (str("x") >> hex >> hex.maybe)
39
+ )
40
+ end
41
+ rule(:string_content) { (escape | (str('"').absent? >> any)).repeat }
42
+ rule(:single_string) do
43
+ str("'") >> (escape | (str("'").absent? >> any)).repeat >> str("'")
44
+ end
45
+ rule(:double_string) { str('"') >> string_content >> str('"') }
46
+ rule(:string_part) { (single_string | double_string).as(:string) }
47
+
48
+ rule(:string_value) do
49
+ string_part >> (whitespace? >> string_part).repeat
50
+ end
51
+
52
+ # String = STRING, { STRING } - multiple strings concatenate
53
+
54
+ # Numeric literals
55
+ rule(:sign) { match["+-"] }
56
+ rule(:dec_lit) { (str("0") | (match["1-9"] >> dec.repeat)) }
57
+ rule(:exp) { match["Ee"] >> sign.maybe >> dec.repeat(1) }
58
+ rule(:float_lit) do
59
+ (str(".") >> dec.repeat(1) >> exp.maybe) |
60
+ (dec_lit >> str(".") >> dec.repeat >> exp.maybe) |
61
+ (dec_lit >> exp)
62
+ end
63
+
64
+ rule(:dec_int) { dec_lit.as(:integer) }
65
+ rule(:oct_int) { (str("0") >> oct.repeat(1)).as(:integer) }
66
+ rule(:hex_int) do
67
+ (str("0") >> match["Xx"] >> hex.repeat(1)).as(:integer)
68
+ end
69
+ rule(:float_token) do
70
+ ((float_lit >> match["Ff"].maybe) | (dec_lit >> match["Ff"])).as(:float)
71
+ end
72
+
73
+ # Numbers - with optional sign
74
+ rule(:signed_number) do
75
+ str("-") >> whitespace? >> (float_token | hex_int | oct_int | dec_int)
76
+ end
77
+ rule(:unsigned_number) { float_token | hex_int | oct_int | dec_int }
78
+ rule(:number) { signed_number | unsigned_number }
79
+
80
+ # ===== Syntax Elements =====
81
+
82
+ # Scalar values (not message blocks)
83
+ rule(:scalar_value) do
84
+ string_value | number | identifier | scalar_list
85
+ end
86
+
87
+ # Lists
88
+ rule(:scalar_list) do
89
+ str("[") >> whitespace? >>
90
+ (scalar_value >> (whitespace? >> str(",") >> whitespace? >> scalar_value).repeat).maybe.as(:list) >>
91
+ whitespace? >> str("]")
92
+ end
93
+
94
+ rule(:message_list) do
95
+ str("[") >> whitespace? >>
96
+ (message_value >> (whitespace? >> str(",") >> whitespace? >> message_value).repeat).maybe.as(:list) >>
97
+ whitespace? >> str("]")
98
+ end
99
+
100
+ # Message value: { fields } or < fields >
101
+ rule(:message_value) do
102
+ ((str("{") >> whitespace? >> message >> whitespace? >> str("}")) |
103
+ (str("<") >> whitespace? >> message >> whitespace? >> str(">"))).as(:message)
104
+ end
105
+
106
+ # Field names
107
+ rule(:field_name) { identifier.as(:field_name) }
108
+
109
+ # Fields - following official spec
110
+ # ScalarField: field_name ":" scalar_value
111
+ rule(:scalar_field) do
112
+ field_name >>
113
+ whitespace? >> str(":") >> whitespace? >>
114
+ scalar_value.as(:field_value) >>
115
+ whitespace? >> (str(";") | str(",")).maybe
116
+ end
117
+
118
+ # MessageField: field_name [":" ] (message_value | message_list)
119
+ rule(:message_field) do
120
+ field_name >>
121
+ (whitespace? >> str(":")).maybe >> whitespace? >>
122
+ (message_value | message_list).as(:field_value) >>
123
+ whitespace? >> (str(";") | str(",")).maybe
124
+ end
125
+
126
+ # Any field type
127
+ rule(:field) do
128
+ whitespace? >> (message_field | scalar_field).as(:field) >> whitespace?
129
+ end
130
+
131
+ # Message = { Field }
132
+ rule(:message) { field.repeat }
133
+
134
+ # Document root
135
+ rule(:document) { whitespace? >> message >> whitespace? }
136
+
137
+ root(:document)
138
+ end
139
+ end
140
+ end
141
+ end
@@ -0,0 +1,92 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "grammar"
4
+ require_relative "processor"
5
+ require_relative "../../models/message"
6
+
7
+ module Unibuf
8
+ module Parsers
9
+ module Textproto
10
+ # High-level parser for Protocol Buffers text format
11
+ # Combines Grammar (Parslet) and Processor (manual transformation)
12
+ class Parser
13
+ attr_reader :grammar
14
+
15
+ def initialize
16
+ @grammar = Grammar.new
17
+ end
18
+
19
+ # Parse textproto content from a string
20
+ # @param content [String] The textproto content
21
+ # @return [Unibuf::Models::Message] The parsed message
22
+ def parse(content)
23
+ raise ArgumentError, "Content cannot be nil" if content.nil?
24
+ raise ArgumentError, "Content cannot be empty" if content.empty?
25
+
26
+ begin
27
+ # Step 1: Parse with Parslet grammar -> AST
28
+ ast = grammar.parse(content)
29
+
30
+ # Step 2: Transform AST with Processor -> Hash
31
+ hash = Processor.process(ast)
32
+
33
+ # Step 3: Create domain model from hash
34
+ Models::Message.new(hash)
35
+ rescue Parslet::ParseFailed => e
36
+ raise ParseError, format_parse_error(e, content)
37
+ end
38
+ end
39
+
40
+ # Parse textproto from a file
41
+ # @param path [String] Path to the textproto file
42
+ # @return [Unibuf::Models::Message] The parsed message
43
+ def parse_file(path)
44
+ unless File.exist?(path)
45
+ raise FileNotFoundError,
46
+ "File not found: #{path}"
47
+ end
48
+
49
+ begin
50
+ content = File.read(path)
51
+ parse(content)
52
+ rescue Errno::ENOENT => e
53
+ raise FileNotFoundError, "Cannot read file: #{path} - #{e.message}"
54
+ rescue Errno::EACCES => e
55
+ raise FileReadError, "Permission denied: #{path} - #{e.message}"
56
+ rescue StandardError => e
57
+ raise FileReadError, "Error reading file: #{path} - #{e.message}"
58
+ end
59
+ end
60
+
61
+ private
62
+
63
+ # Format Parslet parse error with context
64
+ def format_parse_error(error, content)
65
+ lines = content.lines
66
+ line_no = error.parse_failure_cause.source.line_and_column[0]
67
+ col_no = error.parse_failure_cause.source.line_and_column[1]
68
+
69
+ context = []
70
+ context << "Parse error at line #{line_no}, column #{col_no}:"
71
+ context << ""
72
+
73
+ # Show context lines
74
+ start_line = [line_no - 2, 0].max
75
+ end_line = [line_no + 2, lines.size - 1].min
76
+
77
+ (start_line..end_line).each do |i|
78
+ prefix = i == line_no - 1 ? "=> " : " "
79
+ context << "#{prefix}#{i + 1}: #{lines[i]}"
80
+ end
81
+
82
+ context << ""
83
+ context << "#{' ' * (col_no + 7)}^"
84
+ context << ""
85
+ context << error.parse_failure_cause.to_s
86
+
87
+ context.join("\n")
88
+ end
89
+ end
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,136 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Unibuf
4
+ module Parsers
5
+ module Textproto
6
+ # Processor to transform Parslet AST to Ruby hashes
7
+ # Follows fontist pattern - manual transformation, not Parslet::Transform
8
+ class Processor
9
+ class << self
10
+ # Process the AST from the grammar into a normalized hash
11
+ # @param ast [Hash, Array] The Parslet AST
12
+ # @return [Hash] Normalized hash suitable for model construction
13
+ def process(ast)
14
+ return { "fields" => [] } if ast.nil? || ast.empty?
15
+
16
+ fields = normalize_fields(ast)
17
+ { "fields" => fields }
18
+ end
19
+
20
+ private
21
+
22
+ # Normalize fields from AST
23
+ def normalize_fields(ast)
24
+ return [] unless ast
25
+
26
+ fields_array = Array(ast)
27
+ normalized = []
28
+
29
+ fields_array.each do |item|
30
+ # Handle Parslet::Slice by converting to string first
31
+ next unless item.respond_to?(:key?) || item.respond_to?(:[])
32
+
33
+ # New grammar wraps everything in :field
34
+ if item.respond_to?(:[]) && item[:field]
35
+ field_data = item[:field]
36
+ normalized << process_field(field_data)
37
+ end
38
+ end
39
+
40
+ normalized
41
+ end
42
+
43
+ # Process a single field
44
+ def process_field(field_data)
45
+ name = extract_name(field_data[:field_name])
46
+ value = process_value(field_data[:field_value])
47
+
48
+ { "name" => name, "value" => value }
49
+ end
50
+
51
+ # Extract field name
52
+ def extract_name(name_data)
53
+ return name_data.to_s unless name_data.respond_to?(:[])
54
+
55
+ if name_data[:identifier]
56
+ name_data[:identifier].to_s
57
+ else
58
+ name_data.to_s
59
+ end
60
+ end
61
+
62
+ # Process a value (polymorphic)
63
+ def process_value(value)
64
+ return nil unless value
65
+ return value.to_s if value.is_a?(String)
66
+
67
+ # Check if it's an array of string parts (concatenated strings)
68
+ if value.is_a?(Array) && value.first.respond_to?(:[]) && value.first[:string]
69
+ # Multiple strings - concatenate them
70
+ return value.map do |part|
71
+ extract_and_unescape_string(part[:string])
72
+ end.join
73
+ end
74
+
75
+ return nil unless value.respond_to?(:[])
76
+
77
+ if value[:string]
78
+ # Single string
79
+ extract_and_unescape_string(value[:string])
80
+ elsif value[:integer]
81
+ value[:integer].to_s.to_i
82
+ elsif value[:float]
83
+ value[:float].to_s.to_f
84
+ elsif value[:identifier]
85
+ # Could be boolean or enum value
86
+ val = value[:identifier].to_s
87
+ case val.downcase
88
+ when "true", "t"
89
+ true
90
+ when "false", "f"
91
+ false
92
+ else
93
+ val # Enum value
94
+ end
95
+ elsif value[:message]
96
+ # Nested message
97
+ fields = normalize_fields(value[:message])
98
+ { "fields" => fields }
99
+ elsif value[:list]
100
+ # List of values
101
+ process_list(value[:list])
102
+ else
103
+ value.to_s
104
+ end
105
+ end
106
+
107
+ # Extract and unescape a string token
108
+ def extract_and_unescape_string(str_token)
109
+ str = str_token.to_s
110
+ # Remove surrounding quotes
111
+ str = str[1..-2] if str.start_with?('"') && str.end_with?('"')
112
+ str = str[1..-2] if str.start_with?("'") && str.end_with?("'")
113
+ unescape_string(str)
114
+ end
115
+
116
+ # Process a list of values
117
+ def process_list(list)
118
+ return [] unless list
119
+
120
+ Array(list).map { |item| process_value(item) }
121
+ end
122
+
123
+ # Unescape string content
124
+ def unescape_string(str)
125
+ str.gsub('\\n', "\n")
126
+ .gsub('\\t', "\t")
127
+ .gsub('\\r', "\r")
128
+ .gsub('\\"', '"')
129
+ .gsub("\\'", "'")
130
+ .gsub("\\\\", "\\")
131
+ end
132
+ end
133
+ end
134
+ end
135
+ end
136
+ end
@@ -0,0 +1,110 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Unibuf
4
+ module Validators
5
+ # Validates Protocol Buffer messages against Proto3 schemas
6
+ # Ensures textproto files conform to their schema definitions
7
+ class SchemaValidator
8
+ attr_reader :schema
9
+
10
+ def initialize(schema)
11
+ @schema = schema
12
+ end
13
+
14
+ # Validate a message against the schema
15
+ # @param message [Message] The textproto message
16
+ # @param message_type [String] Expected message type name
17
+ # @return [Boolean] true if valid
18
+ # @raise [SchemaValidationError] if invalid
19
+ def validate!(message, message_type = nil)
20
+ errors = validate(message, message_type)
21
+
22
+ return true if errors.empty?
23
+
24
+ raise SchemaValidationError,
25
+ "Schema validation failed:\n#{errors.join("\n")}"
26
+ end
27
+
28
+ # Validate and return errors
29
+ # @param message [Message] The textproto message
30
+ # @param message_type [String] Expected message type name
31
+ # @return [Array<String>] List of validation errors
32
+ def validate(message, message_type = nil)
33
+ errors = []
34
+
35
+ # Find message definition
36
+ msg_def = find_message_definition(message_type)
37
+ unless msg_def
38
+ return ["Unknown message type: #{message_type}"]
39
+ end
40
+
41
+ # Validate each field in the message using public fields
42
+ Array(message.fields).each do |field|
43
+ field_errors = validate_field(field, msg_def)
44
+ errors.concat(field_errors)
45
+ end
46
+
47
+ # Check for required fields
48
+ required_errors = check_required_fields(message, msg_def)
49
+ errors.concat(required_errors)
50
+
51
+ errors
52
+ end
53
+
54
+ private
55
+
56
+ def find_message_definition(type_name)
57
+ return schema.messages.first if type_name.nil? && schema.messages.size == 1
58
+
59
+ schema.find_message(type_name)
60
+ end
61
+
62
+ def validate_field(field, msg_def)
63
+ errors = []
64
+
65
+ # Check if field exists in schema
66
+ field_def = msg_def.find_field(field.name)
67
+ unless field_def
68
+ errors << "Unknown field '#{field.name}' in message '#{msg_def.name}'"
69
+ return errors
70
+ end
71
+
72
+ # Validate field value type
73
+ unless field_def.valid_value?(field.value)
74
+ errors << "Invalid value for field '#{field.name}': " \
75
+ "expected #{field_def.type}, got #{field.value.class}"
76
+ end
77
+
78
+ # Validate nested messages recursively
79
+ if field.message_field? && field_def.message_type?
80
+ nested_msg = field.as_message
81
+ nested_def = schema.find_message(field_def.type)
82
+
83
+ if nested_def
84
+ nested_errors = validate(nested_msg, field_def.type)
85
+ errors.concat(nested_errors.map { |e| " #{field.name}.#{e}" })
86
+ end
87
+ end
88
+
89
+ errors
90
+ end
91
+
92
+ def check_required_fields(message, msg_def)
93
+ errors = []
94
+
95
+ # In proto3, all fields are optional by default
96
+ # We only check required fields if explicitly marked
97
+ msg_def.fields.each do |field_def|
98
+ next unless field_def.required?
99
+
100
+ field = message.find_field(field_def.name)
101
+ unless field
102
+ errors << "Required field '#{field_def.name}' missing in #{msg_def.name}"
103
+ end
104
+ end
105
+
106
+ errors
107
+ end
108
+ end
109
+ end
110
+ end
@@ -0,0 +1,122 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Unibuf
4
+ module Validators
5
+ # Validates field types and values
6
+ # Ensures type safety and Protocol Buffer compliance
7
+ class TypeValidator
8
+ # Type mapping for Protocol Buffer types
9
+ VALID_TYPES = {
10
+ string: [String],
11
+ int32: [Integer],
12
+ int64: [Integer],
13
+ uint32: [Integer],
14
+ uint64: [Integer],
15
+ sint32: [Integer],
16
+ sint64: [Integer],
17
+ fixed32: [Integer],
18
+ fixed64: [Integer],
19
+ sfixed32: [Integer],
20
+ sfixed64: [Integer],
21
+ float: [Float, Integer],
22
+ double: [Float, Integer],
23
+ bool: [TrueClass, FalseClass],
24
+ bytes: [String],
25
+ }.freeze
26
+
27
+ class << self
28
+ # Validate a field's type
29
+ # @param field [Field] The field to validate
30
+ # @param expected_type [Symbol] The expected Protocol Buffer type
31
+ # @return [Boolean] true if valid
32
+ # @raise [TypeValidationError] if invalid
33
+ def validate_field(field, expected_type)
34
+ return true if field.value.nil? # Allow nil for optional fields
35
+
36
+ valid_classes = VALID_TYPES[expected_type]
37
+ unless valid_classes
38
+ raise TypeValidationError,
39
+ "Unknown type '#{expected_type}'"
40
+ end
41
+
42
+ unless valid_classes.any? { |klass| field.value.is_a?(klass) }
43
+ raise TypeValidationError,
44
+ "Field '#{field.name}' expected #{expected_type}, " \
45
+ "got #{field.value.class}"
46
+ end
47
+
48
+ # Additional range validation for numeric types
49
+ if numeric_type?(expected_type)
50
+ validate_numeric_range(field,
51
+ expected_type)
52
+ end
53
+
54
+ true
55
+ end
56
+
57
+ # Validate all fields in a message
58
+ # @param message [Message] The message to validate
59
+ # @param schema [Hash] Type schema mapping field names to types
60
+ # @return [Array<String>] List of validation errors
61
+ def validate_message(message, schema = {})
62
+ errors = []
63
+
64
+ message.fields_array.each do |field|
65
+ next unless schema.key?(field.name)
66
+
67
+ expected_type = schema[field.name]
68
+ begin
69
+ validate_field(field, expected_type)
70
+ rescue TypeValidationError => e
71
+ errors << e.message
72
+ end
73
+ end
74
+
75
+ errors
76
+ end
77
+
78
+ # Check if a type is numeric
79
+ def numeric_type?(type)
80
+ %i[int32 int64 uint32 uint64 sint32 sint64 fixed32 fixed64 sfixed32
81
+ sfixed64 float double].include?(type)
82
+ end
83
+
84
+ # Check if a type is signed
85
+ def signed_type?(type)
86
+ %i[int32 int64 sint32 sint64 sfixed32 sfixed64 float
87
+ double].include?(type)
88
+ end
89
+
90
+ # Check if a type is unsigned
91
+ def unsigned_type?(type)
92
+ %i[uint32 uint64 fixed32 fixed64].include?(type)
93
+ end
94
+
95
+ private
96
+
97
+ def validate_numeric_range(field, expected_type)
98
+ value = field.value
99
+ return unless value.is_a?(Numeric)
100
+
101
+ case expected_type
102
+ when :int32, :sint32, :sfixed32
103
+ validate_range(field, value, -2**31, (2**31) - 1)
104
+ when :int64, :sint64, :sfixed64
105
+ validate_range(field, value, -2**63, (2**63) - 1)
106
+ when :uint32, :fixed32
107
+ validate_range(field, value, 0, (2**32) - 1)
108
+ when :uint64, :fixed64
109
+ validate_range(field, value, 0, (2**64) - 1)
110
+ end
111
+ end
112
+
113
+ def validate_range(field, value, min, max)
114
+ return if value.between?(min, max)
115
+
116
+ raise TypeValidationError,
117
+ "Field '#{field.name}' value #{value} out of range [#{min}, #{max}]"
118
+ end
119
+ end
120
+ end
121
+ end
122
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Unibuf
4
+ VERSION = "0.1.0"
5
+ end