RubyGems - kumi-parser - Versions diffs - 0.0.3 → 0.0.4 - Mend

kumi-parser 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

checksums.yaml +4 -4
data/CLAUDE.md +120 -0
data/README.md +38 -41
data/lib/kumi/parser/base.rb +51 -0
data/lib/kumi/parser/direct_parser.rb +502 -0
data/lib/kumi/parser/errors.rb +40 -0
data/lib/kumi/parser/smart_tokenizer.rb +287 -0
data/lib/kumi/parser/syntax_validator.rb +3 -25
data/lib/kumi/parser/text_parser.rb +19 -34
data/lib/kumi/parser/token.rb +84 -0
data/lib/kumi/parser/token_metadata.rb +370 -0
data/lib/kumi/parser/version.rb +1 -1
data/lib/kumi/text_parser.rb +40 -0
data/lib/kumi/text_schema.rb +31 -0
data/lib/kumi-parser.rb +1 -0
metadata +10 -8
data/lib/kumi/parser/analyzer_diagnostic_converter.rb +0 -84
data/lib/kumi/parser/text_parser/editor_diagnostic.rb +0 -102
data/lib/kumi/parser/text_parser/grammar.rb +0 -214
data/lib/kumi/parser/text_parser/parser.rb +0 -168
data/lib/kumi/parser/text_parser/transform.rb +0 -170
data/lib/kumi/parser.rb +0 -8
data/test_basic.rb +0 -44

data/lib/kumi/parser/smart_tokenizer.rb ADDED Viewed

@@ -0,0 +1,287 @@
+# frozen_string_literal: true
+require_relative 'token_metadata'
+module Kumi
+  module Parser
+    # Context-aware tokenizer that produces tokens with embedded semantic metadata
+    class SmartTokenizer
+      def initialize(source, source_file: '<input>')
+        @source = source
+        @source_file = source_file
+        @pos = 0
+        @line = 1
+        @column = 1
+        @context_stack = [:global]
+        @tokens = []
+      end
+      def tokenize
+        while @pos < @source.length
+          skip_whitespace_except_newlines
+          case current_char
+          when nil then break
+          when "\n" then handle_newline
+          when '#' then consume_comment
+          when '"' then consume_string
+          when /\d/ then consume_number
+          when /[a-zA-Z_]/ then consume_identifier_or_keyword
+          when ':' then consume_symbol_or_colon
+          else
+            consume_operator_or_punctuation
+          end
+        end
+        add_token(:eof, nil, {})
+        @tokens
+      end
+      private
+      def current_char
+        return nil if @pos >= @source.length
+        @source[@pos]
+      end
+      def peek_char(offset = 1)
+        peek_pos = @pos + offset
+        return nil if peek_pos >= @source.length
+        @source[peek_pos]
+      end
+      def advance
+        if current_char == "\n"
+          @line += 1
+          @column = 1
+        else
+          @column += 1
+        end
+        @pos += 1
+      end
+      def skip_whitespace_except_newlines
+        advance while current_char && current_char.match?(/[ \t\r]/)
+      end
+      def handle_newline
+        add_token(:newline, "\n", Kumi::Parser::TOKEN_METADATA[:newline])
+        advance
+      end
+      def consume_comment
+        start_column = @column
+        advance # skip #
+        comment_text = ''
+        while current_char && current_char != "\n"
+          comment_text += current_char
+          advance
+        end
+        location = Kumi::Syntax::Location.new(file: @source_file, line: @line, column: start_column)
+        add_token(:comment, comment_text, Kumi::Parser::TOKEN_METADATA[:comment])
+      end
+      def consume_string
+        start_column = @column
+        advance # skip opening quote
+        string_content = ''
+        while current_char && current_char != '"'
+          if current_char == '\\'
+            advance
+            # Handle escape sequences
+            case current_char
+            when 'n' then string_content += "\n"
+            when 't' then string_content += "\t"
+            when 'r' then string_content += "\r"
+            when '\\' then string_content += '\\'
+            when '"' then string_content += '"'
+            else
+              string_content += current_char if current_char
+            end
+          else
+            string_content += current_char
+          end
+          advance
+        end
+        raise_tokenizer_error('Unterminated string literal') if current_char != '"'
+        advance # skip closing quote
+        location = Kumi::Syntax::Location.new(file: @source_file, line: @line, column: start_column)
+        @tokens << Token.new(:string, string_content, location, Kumi::Parser::TOKEN_METADATA[:string])
+      end
+      def consume_number
+        start_column = @column
+        number_str = ''
+        has_dot = false
+        # Consume digits and underscores, and optionally a decimal point
+        while current_char && (current_char.match?(/[0-9_]/) || (!has_dot && current_char == '.'))
+          if current_char == '.'
+            # Make sure next character is a digit to distinguish from member access
+            break unless peek_char && peek_char.match?(/\d/)
+            has_dot = true
+            number_str += current_char
+          else
+            number_str += current_char
+          end
+          advance
+        end
+        token_type = has_dot ? :float : :integer
+        location = Kumi::Syntax::Location.new(file: @source_file, line: @line, column: start_column)
+        @tokens << Token.new(token_type, number_str, location, Kumi::Parser::TOKEN_METADATA[token_type])
+      end
+      def consume_identifier_or_keyword
+        start_column = @column
+        identifier = consume_while { |c| c.match?(/[a-zA-Z0-9_]/) }
+        # Check if it's a keyword
+        if keyword_type = Kumi::Parser::KEYWORDS[identifier]
+          metadata = Kumi::Parser::TOKEN_METADATA[keyword_type].dup
+          # Update context based on keyword
+          case keyword_type
+          when :schema, :input
+            @context_stack.push(keyword_type)
+            metadata[:opens_context] = keyword_type
+          when :end
+            closed_context = @context_stack.pop if @context_stack.length > 1
+            metadata[:closes_context] = closed_context
+          end
+          location = Kumi::Syntax::Location.new(file: @source_file, line: @line, column: start_column)
+          @tokens << Token.new(keyword_type, identifier, location, metadata)
+        else
+          # It's an identifier - determine its role based on context
+          metadata = Kumi::Parser::TOKEN_METADATA[:identifier].dup
+          # Add context-specific metadata
+          case current_context
+          when :input
+            metadata[:context] = :input_declaration
+          when :schema
+            metadata[:context] = :schema_body
+          end
+          location = Kumi::Syntax::Location.new(file: @source_file, line: @line, column: start_column)
+          @tokens << Token.new(:identifier, identifier, location, metadata)
+        end
+      end
+      def consume_symbol_or_colon
+        start_column = @column
+        if peek_char && peek_char.match?(/[a-zA-Z_]/)
+          # It's a symbol like :name
+          advance # skip :
+          symbol_name = consume_while { |c| c.match?(/[a-zA-Z0-9_]/) }
+          location = Kumi::Syntax::Location.new(file: @source_file, line: @line, column: start_column)
+          @tokens << Token.new(:symbol, symbol_name.to_sym, location, Kumi::Parser::TOKEN_METADATA[:symbol])
+        else
+          # It's just a colon
+          location = Kumi::Syntax::Location.new(file: @source_file, line: @line, column: start_column)
+          @tokens << Token.new(:colon, ':', location, Kumi::Parser::TOKEN_METADATA[:colon])
+          advance
+        end
+      end
+      def consume_operator_or_punctuation
+        start_column = @column
+        char = current_char
+        # Handle multi-character operators
+        case char
+        when '='
+          if peek_char == '='
+            advance
+            advance
+            location = Kumi::Syntax::Location.new(file: @source_file, line: @line, column: start_column)
+            @tokens << Token.new(:eq, '==', location, Kumi::Parser::TOKEN_METADATA[:eq])
+          else
+            raise_tokenizer_error("Unexpected '=' (did you mean '=='?)")
+          end
+        when '!'
+          if peek_char == '='
+            advance
+            advance
+            location = Kumi::Syntax::Location.new(file: @source_file, line: @line, column: start_column)
+            @tokens << Token.new(:ne, '!=', location, Kumi::Parser::TOKEN_METADATA[:ne])
+          else
+            raise_tokenizer_error("Unexpected '!' (did you mean '!='?)")
+          end
+        when '>'
+          if peek_char == '='
+            advance
+            advance
+            location = Kumi::Syntax::Location.new(file: @source_file, line: @line, column: start_column)
+            @tokens << Token.new(:gte, '>=', location, Kumi::Parser::TOKEN_METADATA[:gte])
+          else
+            advance
+            location = Kumi::Syntax::Location.new(file: @source_file, line: @line, column: start_column)
+            @tokens << Token.new(:gt, '>', location, Kumi::Parser::TOKEN_METADATA[:gt])
+          end
+        when '<'
+          if peek_char == '='
+            advance
+            advance
+            location = Kumi::Syntax::Location.new(file: @source_file, line: @line, column: start_column)
+            @tokens << Token.new(:lte, '<=', location, Kumi::Parser::TOKEN_METADATA[:lte])
+          else
+            advance
+            location = Kumi::Syntax::Location.new(file: @source_file, line: @line, column: start_column)
+            @tokens << Token.new(:lt, '<', location, Kumi::Parser::TOKEN_METADATA[:lt])
+          end
+        else
+          # Single character operators/punctuation
+          token_type = CHAR_TO_TOKEN[char]
+          if token_type
+            metadata = Kumi::Parser::TOKEN_METADATA[token_type].dup
+            location = Kumi::Syntax::Location.new(file: @source_file, line: @line, column: start_column)
+            @tokens << Token.new(token_type, char, location, metadata)
+            advance
+          else
+            raise_tokenizer_error("Unexpected character: #{char}")
+          end
+        end
+      end
+      def consume_while(&block)
+        result = ''
+        while current_char && block.call(current_char)
+          result += current_char
+          advance
+        end
+        result
+      end
+      def current_context
+        @context_stack.last
+      end
+      def add_token(type, value, metadata)
+        location = Kumi::Syntax::Location.new(file: @source_file, line: @line, column: @column)
+        token = Token.new(type, value, location, metadata)
+        @tokens << token
+      end
+      def raise_tokenizer_error(message)
+        location = Kumi::Syntax::Location.new(file: @source_file, line: @line, column: @column)
+        raise TokenizerError.new(message, location: location)
+      end
+    end
+    # Custom error for tokenization issues
+  end
+end

data/lib/kumi/parser/syntax_validator.rb CHANGED Viewed

@@ -1,33 +1,11 @@
 # frozen_string_literal: true
-require_relative 'text_parser/parser'
-require_relative 'text_parser/editor_diagnostic'
-require_relative 'error_extractor'
 module Kumi
   module Parser
-    # Validates Kumi DSL syntax
+    # Validates Kumi DSL syntax using new parser
     class SyntaxValidator
-      def initialize
-        @parser = TextParser::Parser.new
-      end
       def validate(text, source_file: '<input>')
-        @parser.parse(text, source_file: source_file)
-        TextParser::DiagnosticCollection.new([])
-      rescue StandardError => e
-        # ErrorExtractor.extract returns a hash, convert it to an EditorDiagnostic
-        error_hash = ErrorExtractor.extract(e)
-        return TextParser::DiagnosticCollection.new([]) if error_hash.empty?
-        diagnostic = TextParser::EditorDiagnostic.new(
-          line: error_hash[:line],
-          column: error_hash[:column],
-          message: error_hash[:message],
-          severity: error_hash[:severity],
-          type: error_hash[:type]
-        )
-        TextParser::DiagnosticCollection.new([diagnostic])
+        Kumi::Parser::Base.validate(text, source_file: source_file)
       end
       def valid?(text, source_file: '<input>')
@@ -36,7 +14,7 @@ module Kumi
       def first_error(text, source_file: '<input>')
         diagnostics = validate(text, source_file: source_file)
-        diagnostics.empty? ? nil : diagnostics.to_a.first.message
+        diagnostics.empty? ? nil : diagnostics.first[:message]
       end
     end
   end

data/lib/kumi/parser/text_parser.rb CHANGED Viewed

@@ -1,52 +1,37 @@
 # frozen_string_literal: true
-require_relative 'syntax_validator'
+require_relative 'smart_tokenizer'
+require_relative 'direct_parser'
 module Kumi
   module Parser
     module TextParser
-      # TextParser module - all classes are autoloaded by Zeitwerk
+      # Clean text parser focused on core parsing functionality
       class << self
+        # Parse text to AST
+        def parse(text, source_file: '<input>')
+          tokens = Kumi::Parser::SmartTokenizer.new(text, source_file: source_file).tokenize
+          Kumi::Parser::DirectParser.new(tokens).parse
+        rescue Kumi::Parser::Errors::ParseError, Kumi::Parser::Errors::TokenizerError => e
+          # Convert parser errors to the expected SyntaxError for compatibility
+          raise Kumi::Errors::SyntaxError, e.message
+        end
         # Check if text is syntactically valid
         def valid?(text, source_file: '<input>')
-          validator.valid?(text, source_file: source_file)
+          parse(text, source_file: source_file)
+          true
+        rescue StandardError => e
+          false
         end
-        # Validate text and return diagnostic collection
+        # Basic validation - returns array of error hashes
         def validate(text, source_file: '<input>')
+          # Use SyntaxValidator for proper diagnostic extraction
+          validator = Kumi::Parser::SyntaxValidator.new
           validator.validate(text, source_file: source_file)
         end
-        # Get Monaco Editor format diagnostics
-        def diagnostics_for_monaco(text, source_file: '<input>')
-          validate(text, source_file: source_file).to_monaco
-        end
-        # Get CodeMirror format diagnostics
-        def diagnostics_for_codemirror(text, source_file: '<input>')
-          validate(text, source_file: source_file).to_codemirror
-        end
-        # Get JSON format diagnostics
-        def diagnostics_as_json(text, source_file: '<input>')
-          validate(text, source_file: source_file).to_json
-        end
-        # Parse text (compatibility method)
-        def parse(text, source_file: '<input>')
-          parser.parse(text, source_file: source_file)
-        end
-        private
-        def validator
-          @validator ||= SyntaxValidator.new
-        end
-        def parser
-          @parser ||= TextParser::Parser.new
-        end
       end
     end
   end

data/lib/kumi/parser/token.rb ADDED Viewed

@@ -0,0 +1,84 @@
+# frozen_string_literal: true
+module Kumi
+  module Parser
+    # Token with embedded metadata for smart parsing
+    class Token
+      attr_reader :type, :value, :location, :metadata
+      def initialize(type, value, location, metadata = {})
+        @type = type
+        @value = value
+        @location = location
+        @metadata = metadata
+      end
+      # Semantic predicates embedded in token
+      def keyword?
+        @metadata[:category] == :keyword
+      end
+      def operator?
+        @metadata[:category] == :operator
+      end
+      def literal?
+        @metadata[:category] == :literal
+      end
+      def identifier?
+        @metadata[:category] == :identifier
+      end
+      def punctuation?
+        @metadata[:category] == :punctuation
+      end
+      # Operator precedence embedded in token
+      def precedence
+        @metadata[:precedence] || 0
+      end
+      def left_associative?
+        @metadata[:associativity] == :left
+      end
+      def right_associative?
+        @metadata[:associativity] == :right
+      end
+      # Parser hints embedded in token
+      def expects_block?
+        @metadata[:expects_block] == true
+      end
+      def terminates_expression?
+        @metadata[:terminates_expression] == true
+      end
+      def starts_expression?
+        @metadata[:starts_expression] == true
+      end
+      # Direct AST construction hint
+      def ast_class
+        @metadata[:ast_class]
+      end
+      def to_s
+        "#{@type}(#{@value.inspect}) at #{@location}"
+      end
+      def inspect
+        to_s
+      end
+      def ==(other)
+        other.is_a?(Token) &&
+          @type == other.type &&
+          @value == other.value &&
+          @location == other.location
+      end
+    end
+  end
+end