RubyGems - unibuf - Versions diffs - 0.1.0 - Mend

unibuf 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

checksums.yaml +7 -0
data/.rspec +3 -0
data/.rubocop.yml +16 -0
data/.rubocop_todo.yml +498 -0
data/README.adoc +490 -0
data/Rakefile +12 -0
data/exe/unibuf +7 -0
data/lib/unibuf/cli.rb +128 -0
data/lib/unibuf/commands/convert.rb +121 -0
data/lib/unibuf/commands/parse.rb +85 -0
data/lib/unibuf/commands/schema.rb +114 -0
data/lib/unibuf/commands/validate.rb +76 -0
data/lib/unibuf/errors.rb +36 -0
data/lib/unibuf/models/enum_definition.rb +78 -0
data/lib/unibuf/models/field.rb +159 -0
data/lib/unibuf/models/field_definition.rb +119 -0
data/lib/unibuf/models/message.rb +203 -0
data/lib/unibuf/models/message_definition.rb +102 -0
data/lib/unibuf/models/schema.rb +67 -0
data/lib/unibuf/models/values/base_value.rb +78 -0
data/lib/unibuf/models/values/list_value.rb +114 -0
data/lib/unibuf/models/values/map_value.rb +103 -0
data/lib/unibuf/models/values/message_value.rb +70 -0
data/lib/unibuf/models/values/scalar_value.rb +113 -0
data/lib/unibuf/parsers/binary/wire_format_parser.rb +43 -0
data/lib/unibuf/parsers/proto3/grammar.rb +149 -0
data/lib/unibuf/parsers/proto3/processor.rb +188 -0
data/lib/unibuf/parsers/textproto/grammar.rb +141 -0
data/lib/unibuf/parsers/textproto/parser.rb +92 -0
data/lib/unibuf/parsers/textproto/processor.rb +136 -0
data/lib/unibuf/validators/schema_validator.rb +110 -0
data/lib/unibuf/validators/type_validator.rb +122 -0
data/lib/unibuf/version.rb +5 -0
data/lib/unibuf.rb +207 -0
data/sig/unibuf.rbs +4 -0
metadata +139 -0

data/lib/unibuf/parsers/textproto/grammar.rb ADDED Viewed

@@ -0,0 +1,141 @@
+# frozen_string_literal: true
+require "parslet"
+module Unibuf
+  module Parsers
+    module Textproto
+      # Parslet grammar for Protocol Buffers text format following the official spec
+      # Reference: https://protobuf.dev/reference/protobuf/textformat-spec/
+      class Grammar < Parslet::Parser
+        # ===== Lexical Elements =====
+        # Characters
+        rule(:newline) { str("\n") }
+        rule(:letter) { match["a-zA-Z_"] }
+        rule(:dec) { match["0-9"] }
+        rule(:oct) { match["0-7"] }
+        rule(:hex) { match["0-9a-fA-F"] }
+        # Whitespace and comments (# or //)
+        rule(:comment) do
+          (str("#") | str("//")) >> (newline.absent? >> any).repeat >> newline.maybe
+        end
+        rule(:whitespace) { match['\s'].repeat(1) | comment }
+        rule(:whitespace?) { whitespace.repeat }
+        # Identifiers
+        rule(:ident) { letter >> (letter | dec).repeat }
+        rule(:identifier) { ident.as(:identifier) }
+        # String literals
+        rule(:escape) do
+          str("\\") >> (
+            str("a") | str("b") | str("f") | str("n") | str("r") |
+            str("t") | str("v") | str("?") | str("\\") | str("'") |
+            str('"') |
+            (oct >> oct.maybe >> oct.maybe) |
+            (str("x") >> hex >> hex.maybe)
+          )
+        end
+        rule(:string_content) { (escape | (str('"').absent? >> any)).repeat }
+        rule(:single_string) do
+          str("'") >> (escape | (str("'").absent? >> any)).repeat >> str("'")
+        end
+        rule(:double_string) { str('"') >> string_content >> str('"') }
+        rule(:string_part) { (single_string | double_string).as(:string) }
+        rule(:string_value) do
+          string_part >> (whitespace? >> string_part).repeat
+        end
+        # String = STRING, { STRING } - multiple strings concatenate
+        # Numeric literals
+        rule(:sign) { match["+-"] }
+        rule(:dec_lit) { (str("0") | (match["1-9"] >> dec.repeat)) }
+        rule(:exp) { match["Ee"] >> sign.maybe >> dec.repeat(1) }
+        rule(:float_lit) do
+          (str(".") >> dec.repeat(1) >> exp.maybe) |
+            (dec_lit >> str(".") >> dec.repeat >> exp.maybe) |
+            (dec_lit >> exp)
+        end
+        rule(:dec_int) { dec_lit.as(:integer) }
+        rule(:oct_int) { (str("0") >> oct.repeat(1)).as(:integer) }
+        rule(:hex_int) do
+          (str("0") >> match["Xx"] >> hex.repeat(1)).as(:integer)
+        end
+        rule(:float_token) do
+          ((float_lit >> match["Ff"].maybe) | (dec_lit >> match["Ff"])).as(:float)
+        end
+        # Numbers - with optional sign
+        rule(:signed_number) do
+          str("-") >> whitespace? >> (float_token | hex_int | oct_int | dec_int)
+        end
+        rule(:unsigned_number) { float_token | hex_int | oct_int | dec_int }
+        rule(:number) { signed_number | unsigned_number }
+        # ===== Syntax Elements =====
+        # Scalar values (not message blocks)
+        rule(:scalar_value) do
+          string_value | number | identifier | scalar_list
+        end
+        # Lists
+        rule(:scalar_list) do
+          str("[") >> whitespace? >>
+            (scalar_value >> (whitespace? >> str(",") >> whitespace? >> scalar_value).repeat).maybe.as(:list) >>
+            whitespace? >> str("]")
+        end
+        rule(:message_list) do
+          str("[") >> whitespace? >>
+            (message_value >> (whitespace? >> str(",") >> whitespace? >> message_value).repeat).maybe.as(:list) >>
+            whitespace? >> str("]")
+        end
+        # Message value: { fields } or < fields >
+        rule(:message_value) do
+          ((str("{") >> whitespace? >> message >> whitespace? >> str("}")) |
+           (str("<") >> whitespace? >> message >> whitespace? >> str(">"))).as(:message)
+        end
+        # Field names
+        rule(:field_name) { identifier.as(:field_name) }
+        # Fields - following official spec
+        # ScalarField: field_name ":" scalar_value
+        rule(:scalar_field) do
+          field_name >>
+            whitespace? >> str(":") >> whitespace? >>
+            scalar_value.as(:field_value) >>
+            whitespace? >> (str(";") | str(",")).maybe
+        end
+        # MessageField: field_name [":" ] (message_value | message_list)
+        rule(:message_field) do
+          field_name >>
+            (whitespace? >> str(":")).maybe >> whitespace? >>
+            (message_value | message_list).as(:field_value) >>
+            whitespace? >> (str(";") | str(",")).maybe
+        end
+        # Any field type
+        rule(:field) do
+          whitespace? >> (message_field | scalar_field).as(:field) >> whitespace?
+        end
+        # Message = { Field }
+        rule(:message) { field.repeat }
+        # Document root
+        rule(:document) { whitespace? >> message >> whitespace? }
+        root(:document)
+      end
+    end
+  end
+end

data/lib/unibuf/parsers/textproto/parser.rb ADDED Viewed

@@ -0,0 +1,92 @@
+# frozen_string_literal: true
+require_relative "grammar"
+require_relative "processor"
+require_relative "../../models/message"
+module Unibuf
+  module Parsers
+    module Textproto
+      # High-level parser for Protocol Buffers text format
+      # Combines Grammar (Parslet) and Processor (manual transformation)
+      class Parser
+        attr_reader :grammar
+        def initialize
+          @grammar = Grammar.new
+        end
+        # Parse textproto content from a string
+        # @param content [String] The textproto content
+        # @return [Unibuf::Models::Message] The parsed message
+        def parse(content)
+          raise ArgumentError, "Content cannot be nil" if content.nil?
+          raise ArgumentError, "Content cannot be empty" if content.empty?
+          begin
+            # Step 1: Parse with Parslet grammar -> AST
+            ast = grammar.parse(content)
+            # Step 2: Transform AST with Processor -> Hash
+            hash = Processor.process(ast)
+            # Step 3: Create domain model from hash
+            Models::Message.new(hash)
+          rescue Parslet::ParseFailed => e
+            raise ParseError, format_parse_error(e, content)
+          end
+        end
+        # Parse textproto from a file
+        # @param path [String] Path to the textproto file
+        # @return [Unibuf::Models::Message] The parsed message
+        def parse_file(path)
+          unless File.exist?(path)
+            raise FileNotFoundError,
+                  "File not found: #{path}"
+          end
+          begin
+            content = File.read(path)
+            parse(content)
+          rescue Errno::ENOENT => e
+            raise FileNotFoundError, "Cannot read file: #{path} - #{e.message}"
+          rescue Errno::EACCES => e
+            raise FileReadError, "Permission denied: #{path} - #{e.message}"
+          rescue StandardError => e
+            raise FileReadError, "Error reading file: #{path} - #{e.message}"
+          end
+        end
+        private
+        # Format Parslet parse error with context
+        def format_parse_error(error, content)
+          lines = content.lines
+          line_no = error.parse_failure_cause.source.line_and_column[0]
+          col_no = error.parse_failure_cause.source.line_and_column[1]
+          context = []
+          context << "Parse error at line #{line_no}, column #{col_no}:"
+          context << ""
+          # Show context lines
+          start_line = [line_no - 2, 0].max
+          end_line = [line_no + 2, lines.size - 1].min
+          (start_line..end_line).each do |i|
+            prefix = i == line_no - 1 ? "=> " : "   "
+            context << "#{prefix}#{i + 1}: #{lines[i]}"
+          end
+          context << ""
+          context << "#{' ' * (col_no + 7)}^"
+          context << ""
+          context << error.parse_failure_cause.to_s
+          context.join("\n")
+        end
+      end
+    end
+  end
+end

data/lib/unibuf/parsers/textproto/processor.rb ADDED Viewed

@@ -0,0 +1,136 @@
+# frozen_string_literal: true
+module Unibuf
+  module Parsers
+    module Textproto
+      # Processor to transform Parslet AST to Ruby hashes
+      # Follows fontist pattern - manual transformation, not Parslet::Transform
+      class Processor
+        class << self
+          # Process the AST from the grammar into a normalized hash
+          # @param ast [Hash, Array] The Parslet AST
+          # @return [Hash] Normalized hash suitable for model construction
+          def process(ast)
+            return { "fields" => [] } if ast.nil? || ast.empty?
+            fields = normalize_fields(ast)
+            { "fields" => fields }
+          end
+          private
+          # Normalize fields from AST
+          def normalize_fields(ast)
+            return [] unless ast
+            fields_array = Array(ast)
+            normalized = []
+            fields_array.each do |item|
+              # Handle Parslet::Slice by converting to string first
+              next unless item.respond_to?(:key?) || item.respond_to?(:[])
+              # New grammar wraps everything in :field
+              if item.respond_to?(:[]) && item[:field]
+                field_data = item[:field]
+                normalized << process_field(field_data)
+              end
+            end
+            normalized
+          end
+          # Process a single field
+          def process_field(field_data)
+            name = extract_name(field_data[:field_name])
+            value = process_value(field_data[:field_value])
+            { "name" => name, "value" => value }
+          end
+          # Extract field name
+          def extract_name(name_data)
+            return name_data.to_s unless name_data.respond_to?(:[])
+            if name_data[:identifier]
+              name_data[:identifier].to_s
+            else
+              name_data.to_s
+            end
+          end
+          # Process a value (polymorphic)
+          def process_value(value)
+            return nil unless value
+            return value.to_s if value.is_a?(String)
+            # Check if it's an array of string parts (concatenated strings)
+            if value.is_a?(Array) && value.first.respond_to?(:[]) && value.first[:string]
+              # Multiple strings - concatenate them
+              return value.map do |part|
+                extract_and_unescape_string(part[:string])
+              end.join
+            end
+            return nil unless value.respond_to?(:[])
+            if value[:string]
+              # Single string
+              extract_and_unescape_string(value[:string])
+            elsif value[:integer]
+              value[:integer].to_s.to_i
+            elsif value[:float]
+              value[:float].to_s.to_f
+            elsif value[:identifier]
+              # Could be boolean or enum value
+              val = value[:identifier].to_s
+              case val.downcase
+              when "true", "t"
+                true
+              when "false", "f"
+                false
+              else
+                val # Enum value
+              end
+            elsif value[:message]
+              # Nested message
+              fields = normalize_fields(value[:message])
+              { "fields" => fields }
+            elsif value[:list]
+              # List of values
+              process_list(value[:list])
+            else
+              value.to_s
+            end
+          end
+          # Extract and unescape a string token
+          def extract_and_unescape_string(str_token)
+            str = str_token.to_s
+            # Remove surrounding quotes
+            str = str[1..-2] if str.start_with?('"') && str.end_with?('"')
+            str = str[1..-2] if str.start_with?("'") && str.end_with?("'")
+            unescape_string(str)
+          end
+          # Process a list of values
+          def process_list(list)
+            return [] unless list
+            Array(list).map { |item| process_value(item) }
+          end
+          # Unescape string content
+          def unescape_string(str)
+            str.gsub('\\n', "\n")
+              .gsub('\\t', "\t")
+              .gsub('\\r', "\r")
+              .gsub('\\"', '"')
+              .gsub("\\'", "'")
+              .gsub("\\\\", "\\")
+          end
+        end
+      end
+    end
+  end
+end

data/lib/unibuf/validators/schema_validator.rb ADDED Viewed

@@ -0,0 +1,110 @@
+# frozen_string_literal: true
+module Unibuf
+  module Validators
+    # Validates Protocol Buffer messages against Proto3 schemas
+    # Ensures textproto files conform to their schema definitions
+    class SchemaValidator
+      attr_reader :schema
+      def initialize(schema)
+        @schema = schema
+      end
+      # Validate a message against the schema
+      # @param message [Message] The textproto message
+      # @param message_type [String] Expected message type name
+      # @return [Boolean] true if valid
+      # @raise [SchemaValidationError] if invalid
+      def validate!(message, message_type = nil)
+        errors = validate(message, message_type)
+        return true if errors.empty?
+        raise SchemaValidationError,
+              "Schema validation failed:\n#{errors.join("\n")}"
+      end
+      # Validate and return errors
+      # @param message [Message] The textproto message
+      # @param message_type [String] Expected message type name
+      # @return [Array<String>] List of validation errors
+      def validate(message, message_type = nil)
+        errors = []
+        # Find message definition
+        msg_def = find_message_definition(message_type)
+        unless msg_def
+          return ["Unknown message type: #{message_type}"]
+        end
+        # Validate each field in the message using public fields
+        Array(message.fields).each do |field|
+          field_errors = validate_field(field, msg_def)
+          errors.concat(field_errors)
+        end
+        # Check for required fields
+        required_errors = check_required_fields(message, msg_def)
+        errors.concat(required_errors)
+        errors
+      end
+      private
+      def find_message_definition(type_name)
+        return schema.messages.first if type_name.nil? && schema.messages.size == 1
+        schema.find_message(type_name)
+      end
+      def validate_field(field, msg_def)
+        errors = []
+        # Check if field exists in schema
+        field_def = msg_def.find_field(field.name)
+        unless field_def
+          errors << "Unknown field '#{field.name}' in message '#{msg_def.name}'"
+          return errors
+        end
+        # Validate field value type
+        unless field_def.valid_value?(field.value)
+          errors << "Invalid value for field '#{field.name}': " \
+                    "expected #{field_def.type}, got #{field.value.class}"
+        end
+        # Validate nested messages recursively
+        if field.message_field? && field_def.message_type?
+          nested_msg = field.as_message
+          nested_def = schema.find_message(field_def.type)
+          if nested_def
+            nested_errors = validate(nested_msg, field_def.type)
+            errors.concat(nested_errors.map { |e| "  #{field.name}.#{e}" })
+          end
+        end
+        errors
+      end
+      def check_required_fields(message, msg_def)
+        errors = []
+        # In proto3, all fields are optional by default
+        # We only check required fields if explicitly marked
+        msg_def.fields.each do |field_def|
+          next unless field_def.required?
+          field = message.find_field(field_def.name)
+          unless field
+            errors << "Required field '#{field_def.name}' missing in #{msg_def.name}"
+          end
+        end
+        errors
+      end
+    end
+  end
+end

data/lib/unibuf/validators/type_validator.rb ADDED Viewed

@@ -0,0 +1,122 @@
+# frozen_string_literal: true
+module Unibuf
+  module Validators
+    # Validates field types and values
+    # Ensures type safety and Protocol Buffer compliance
+    class TypeValidator
+      # Type mapping for Protocol Buffer types
+      VALID_TYPES = {
+        string: [String],
+        int32: [Integer],
+        int64: [Integer],
+        uint32: [Integer],
+        uint64: [Integer],
+        sint32: [Integer],
+        sint64: [Integer],
+        fixed32: [Integer],
+        fixed64: [Integer],
+        sfixed32: [Integer],
+        sfixed64: [Integer],
+        float: [Float, Integer],
+        double: [Float, Integer],
+        bool: [TrueClass, FalseClass],
+        bytes: [String],
+      }.freeze
+      class << self
+        # Validate a field's type
+        # @param field [Field] The field to validate
+        # @param expected_type [Symbol] The expected Protocol Buffer type
+        # @return [Boolean] true if valid
+        # @raise [TypeValidationError] if invalid
+        def validate_field(field, expected_type)
+          return true if field.value.nil? # Allow nil for optional fields
+          valid_classes = VALID_TYPES[expected_type]
+          unless valid_classes
+            raise TypeValidationError,
+                  "Unknown type '#{expected_type}'"
+          end
+          unless valid_classes.any? { |klass| field.value.is_a?(klass) }
+            raise TypeValidationError,
+                  "Field '#{field.name}' expected #{expected_type}, " \
+                  "got #{field.value.class}"
+          end
+          # Additional range validation for numeric types
+          if numeric_type?(expected_type)
+            validate_numeric_range(field,
+                                   expected_type)
+          end
+          true
+        end
+        # Validate all fields in a message
+        # @param message [Message] The message to validate
+        # @param schema [Hash] Type schema mapping field names to types
+        # @return [Array<String>] List of validation errors
+        def validate_message(message, schema = {})
+          errors = []
+          message.fields_array.each do |field|
+            next unless schema.key?(field.name)
+            expected_type = schema[field.name]
+            begin
+              validate_field(field, expected_type)
+            rescue TypeValidationError => e
+              errors << e.message
+            end
+          end
+          errors
+        end
+        # Check if a type is numeric
+        def numeric_type?(type)
+          %i[int32 int64 uint32 uint64 sint32 sint64 fixed32 fixed64 sfixed32
+             sfixed64 float double].include?(type)
+        end
+        # Check if a type is signed
+        def signed_type?(type)
+          %i[int32 int64 sint32 sint64 sfixed32 sfixed64 float
+             double].include?(type)
+        end
+        # Check if a type is unsigned
+        def unsigned_type?(type)
+          %i[uint32 uint64 fixed32 fixed64].include?(type)
+        end
+        private
+        def validate_numeric_range(field, expected_type)
+          value = field.value
+          return unless value.is_a?(Numeric)
+          case expected_type
+          when :int32, :sint32, :sfixed32
+            validate_range(field, value, -2**31, (2**31) - 1)
+          when :int64, :sint64, :sfixed64
+            validate_range(field, value, -2**63, (2**63) - 1)
+          when :uint32, :fixed32
+            validate_range(field, value, 0, (2**32) - 1)
+          when :uint64, :fixed64
+            validate_range(field, value, 0, (2**64) - 1)
+          end
+        end
+        def validate_range(field, value, min, max)
+          return if value.between?(min, max)
+          raise TypeValidationError,
+                "Field '#{field.name}' value #{value} out of range [#{min}, #{max}]"
+        end
+      end
+    end
+  end
+end

data/lib/unibuf/version.rb ADDED Viewed

@@ -0,0 +1,5 @@
+# frozen_string_literal: true
+module Unibuf
+  VERSION = "0.1.0"
+end