RubyGems - kumi-parser - Versions diffs - 0.0.2 → 0.0.3 - Mend

kumi-parser 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

checksums.yaml +4 -4
data/.rspec +3 -0
data/LICENSE +21 -0
data/README.md +76 -0
data/Rakefile +10 -0
data/examples/debug_text_parser.rb +41 -0
data/examples/debug_transform_rule.rb +26 -0
data/examples/text_parser_comprehensive_test.rb +333 -0
data/examples/text_parser_test_with_comments.rb +146 -0
data/kumi-parser.gemspec +45 -0
data/lib/kumi/parser/analyzer_diagnostic_converter.rb +84 -0
data/lib/kumi/parser/error_extractor.rb +89 -0
data/lib/kumi/parser/syntax_validator.rb +43 -0
data/lib/kumi/parser/text_parser/api.rb +60 -0
data/lib/kumi/parser/text_parser/editor_diagnostic.rb +102 -0
data/lib/kumi/parser/text_parser/grammar.rb +214 -0
data/lib/kumi/parser/text_parser/parser.rb +168 -0
data/lib/kumi/parser/text_parser/transform.rb +170 -0
data/lib/kumi/parser/text_parser.rb +53 -0
data/lib/kumi/parser/version.rb +7 -0
data/lib/kumi/parser.rb +8 -0
data/lib/kumi-parser.rb +18 -0
data/test_basic.rb +44 -0
metadata +24 -2

data/kumi-parser.gemspec ADDED Viewed

@@ -0,0 +1,45 @@
+# frozen_string_literal: true
+require_relative 'lib/kumi/parser/version'
+Gem::Specification.new do |spec|
+  spec.name = 'kumi-parser'
+  spec.version = Kumi::Parser::VERSION
+  spec.authors = ['Kumi Team']
+  spec.email = ['dev@kumi.ai']
+  spec.summary = 'Text parser for Kumi'
+  spec.description = 'Allows Kumi schemas to be written as plain text with syntax validation and editor integration.'
+  spec.homepage = 'https://github.com/amuta/kumi-parser'
+  spec.license = 'MIT'
+  spec.required_ruby_version = '>= 3.0.0'
+  spec.metadata['allowed_push_host'] = 'https://rubygems.org'
+  spec.metadata['homepage_uri'] = spec.homepage
+  spec.metadata['source_code_uri'] = 'https://github.com/amuta/kumi-parser'
+  spec.metadata['changelog_uri'] = 'https://github.com/amuta/kumi-parser/blob/main/CHANGELOG.md'
+  # Specify which files should be added to the gem when it is released.
+  spec.files = Dir.chdir(__dir__) do
+    `git ls-files -z`.split("\x0").reject do |f|
+      (File.expand_path(f) == __FILE__) ||
+        f.start_with?(*%w[bin/ test/ spec/ features/ .git .github appveyor Gemfile]) ||
+        f.end_with?('.gem')
+    end
+  end
+  spec.bindir = 'exe'
+  spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
+  spec.require_paths = ['lib']
+  # Dependencies
+  spec.add_dependency 'kumi', '~> 0.0.7'
+  spec.add_dependency 'parslet', '~> 2.0'
+  spec.add_dependency 'zeitwerk', '~> 2.6'
+  # Development dependencies
+  spec.add_development_dependency 'bundler', '~> 2.0'
+  spec.add_development_dependency 'rake', '~> 13.0'
+  spec.add_development_dependency 'rspec', '~> 3.0'
+  spec.add_development_dependency 'rubocop', '~> 1.21'
+  spec.add_development_dependency 'simplecov', '~> 0.22'
+end

data/lib/kumi/parser/analyzer_diagnostic_converter.rb ADDED Viewed

@@ -0,0 +1,84 @@
+# frozen_string_literal: true
+require_relative 'text_parser/editor_diagnostic'
+module Kumi
+  module Parser
+    # Converts analyzer errors to editor diagnostics
+    class AnalyzerDiagnosticConverter
+      def self.convert_errors(errors)
+        diagnostics = TextParser::DiagnosticCollection.new
+        errors.each do |error|
+          diagnostic = convert_single_error(error)
+          diagnostics << diagnostic if diagnostic
+        end
+        diagnostics
+      end
+      def self.convert_single_error(error)
+        # Handle legacy array format [location, message]
+        if error.is_a?(Array) && error.size == 2
+          location, message = error
+          line = location&.respond_to?(:line) ? location.line : 1
+          column = location&.respond_to?(:column) ? location.column : 1
+          return TextParser::EditorDiagnostic.new(
+            line: line,
+            column: column,
+            message: message.to_s,
+            severity: :error,
+            type: :semantic
+          )
+        end
+        # Handle regular error objects
+        if error&.respond_to?(:message)
+          line = error.respond_to?(:location) && error.location&.respond_to?(:line) ? error.location.line : 1
+          column = error.respond_to?(:location) && error.location&.respond_to?(:column) ? error.location.column : 1
+          # Extract error type and map to severity
+          error_type = error.respond_to?(:type) ? error.type : :semantic
+          severity = map_type_to_severity(error_type)
+          return TextParser::EditorDiagnostic.new(
+            line: line,
+            column: column,
+            message: error.message,
+            severity: severity,
+            type: error_type
+          )
+        end
+        # Handle unknown formats (strings, etc.)
+        return unless error
+        TextParser::EditorDiagnostic.new(
+          line: 1,
+          column: 1,
+          message: "Unknown analyzer error: #{error}",
+          severity: :error,
+          type: :semantic
+        )
+      end
+      def self.extract_location(location)
+        if location&.respond_to?(:line) && location.respond_to?(:column)
+          { line: location.line, column: location.column }
+        else
+          { line: 1, column: 1 }
+        end
+      end
+      def self.map_type_to_severity(type)
+        case type
+        when :warning then :warning
+        when :info then :info
+        when :hint then :hint
+        else :error
+        end
+      end
+    end
+  end
+end

data/lib/kumi/parser/error_extractor.rb ADDED Viewed

@@ -0,0 +1,89 @@
+# frozen_string_literal: true
+module Kumi
+  module Parser
+    # Extracts errors from parslet parse failures
+    class ErrorExtractor
+      def self.extract(error)
+        # Basic error extraction from parslet parse failures
+        # This would typically parse the parslet error message
+        # and extract location information
+        return {} unless error.respond_to?(:message)
+        message = error.message
+        # Determine error type based on class
+        error_type = case error.class.name
+                     when /Syntax/ then :syntax
+                     else :runtime
+                     end
+        # Simple regex to extract line/column info
+        if match = message.match(/at line (\d+) char (\d+)/)
+          line = match[1].to_i
+          column = match[2].to_i
+        else
+          line = 1
+          column = 1
+        end
+        # Format message based on error type
+        formatted_message = if error_type == :syntax
+                              extract_user_friendly_message(message)
+                            else
+                              "#{error.class.name}: #{message}"
+                            end
+        {
+          message: formatted_message,
+          line: line,
+          column: column,
+          severity: :error,
+          type: error_type
+        }
+      end
+      def self.humanize_error_message(raw_message)
+        extract_user_friendly_message(raw_message)
+      end
+      def self.extract_user_friendly_message(raw_message)
+        # Clean up the message first - remove markers, location info, and extra whitespace
+        cleaned_message = raw_message.gsub(/^\s*`-\s*/, '').gsub(/ at line \d+ char \d+\.?/, '').strip
+        # Convert parslet's technical error messages to user-friendly ones
+        case cleaned_message
+        when /Expected ":", but got "(\w+)"/
+          "Missing ':' before symbol, but got \"#{::Regexp.last_match(1)}\""
+        when /Expected ":"/
+          "Missing ':' before symbol"
+        when /Expected "do", but got "(\w+)"/
+          "Missing 'do' keyword, but got \"#{::Regexp.last_match(1)}\""
+        when /Expected "do"/
+          "Missing 'do' keyword"
+        when /Expected "end", but got (.+)/
+          "Missing 'end' keyword, but got #{::Regexp.last_match(1)}"
+        when /Expected "end"/
+          "Missing 'end' keyword"
+        when /Expected "(\w+)", but got "(\w+)"/
+          "Missing '#{::Regexp.last_match(1)}' keyword, but got \"#{::Regexp.last_match(2)}\""
+        when /Expected '(\w+)'/
+          "Expected '#{::Regexp.last_match(1)}'"
+        when /Expected "([^"]+)", but got "([^"]+)"/
+          "Expected '#{::Regexp.last_match(1)}', but got \"#{::Regexp.last_match(2)}\""
+        when /Expected "(\w+)"/
+          "Missing '#{::Regexp.last_match(1)}' keyword"
+        when /Failed to match.*Premature end of input/m
+          'Failed to match - premature end of input'
+        when /Premature end of input/
+          "Unexpected end of file - missing 'end'?"
+        when /Failed to match/
+          'Failed to match sequence'
+        else
+          'Parse error'
+        end
+      end
+    end
+  end
+end

data/lib/kumi/parser/syntax_validator.rb ADDED Viewed

@@ -0,0 +1,43 @@
+# frozen_string_literal: true
+require_relative 'text_parser/parser'
+require_relative 'text_parser/editor_diagnostic'
+require_relative 'error_extractor'
+module Kumi
+  module Parser
+    # Validates Kumi DSL syntax
+    class SyntaxValidator
+      def initialize
+        @parser = TextParser::Parser.new
+      end
+      def validate(text, source_file: '<input>')
+        @parser.parse(text, source_file: source_file)
+        TextParser::DiagnosticCollection.new([])
+      rescue StandardError => e
+        # ErrorExtractor.extract returns a hash, convert it to an EditorDiagnostic
+        error_hash = ErrorExtractor.extract(e)
+        return TextParser::DiagnosticCollection.new([]) if error_hash.empty?
+        diagnostic = TextParser::EditorDiagnostic.new(
+          line: error_hash[:line],
+          column: error_hash[:column],
+          message: error_hash[:message],
+          severity: error_hash[:severity],
+          type: error_hash[:type]
+        )
+        TextParser::DiagnosticCollection.new([diagnostic])
+      end
+      def valid?(text, source_file: '<input>')
+        validate(text, source_file: source_file).empty?
+      end
+      def first_error(text, source_file: '<input>')
+        diagnostics = validate(text, source_file: source_file)
+        diagnostics.empty? ? nil : diagnostics.to_a.first.message
+      end
+    end
+  end
+end

data/lib/kumi/parser/text_parser/api.rb ADDED Viewed

@@ -0,0 +1,60 @@
+# frozen_string_literal: true
+require 'ostruct'
+module Kumi
+  module Parser
+    module TextParser
+      # Public API for TextParser
+      class Api
+        class << self
+          def parse(text, source_file: '<input>')
+            parser = Parser.new
+            parser.parse(text, source_file: source_file)
+          end
+          def validate(text, source_file: '<input>')
+            parse(text, source_file: source_file)
+            []
+          rescue StandardError => e
+            [create_diagnostic(e, source_file)]
+          end
+          def valid?(text, source_file: '<input>')
+            validate(text, source_file: source_file).empty?
+          end
+          def diagnostics_for_monaco(text, source_file: '<input>')
+            validate(text, source_file: source_file)
+          end
+          def diagnostics_for_codemirror(text, source_file: '<input>')
+            validate(text, source_file: source_file)
+          end
+          def diagnostics_as_json(text, source_file: '<input>')
+            validate(text, source_file: source_file).map(&:to_h)
+          end
+          def analyze(text, source_file: '<input>')
+            ast = parse(text, source_file: source_file)
+            { success: true, ast: ast, diagnostics: [] }
+          rescue StandardError => e
+            { success: false, ast: nil, diagnostics: [create_diagnostic(e, source_file)] }
+          end
+          private
+          def create_diagnostic(error, source_file)
+            OpenStruct.new(
+              line: 1,
+              column: 1,
+              message: error.message,
+              source_file: source_file
+            )
+          end
+        end
+      end
+    end
+  end
+end

data/lib/kumi/parser/text_parser/editor_diagnostic.rb ADDED Viewed

@@ -0,0 +1,102 @@
+# frozen_string_literal: true
+module Kumi
+  module Parser
+    module TextParser
+      # Simple diagnostic for online editors
+      class EditorDiagnostic
+        attr_reader :line, :column, :message, :severity, :type
+        def initialize(line:, column:, message:, severity: :error, type: :syntax)
+          @line = line
+          @column = column
+          @message = message
+          @severity = severity
+          @type = type
+        end
+        def to_monaco
+          {
+            startLineNumber: line,
+            startColumn: column,
+            endLineNumber: line,
+            endColumn: column + 1,
+            message: message,
+            severity: monaco_severity
+          }
+        end
+        def to_codemirror
+          {
+            from: (line - 1) * 1000 + (column - 1),
+            to: (line - 1) * 1000 + column,
+            message: message,
+            severity: severity.to_s
+          }
+        end
+        def to_h
+          {
+            line: line,
+            column: column,
+            message: message,
+            severity: severity.to_s,
+            type: type.to_s
+          }
+        end
+        def to_json(*args)
+          require 'json'
+          to_h.to_json(*args)
+        end
+        private
+        def monaco_severity
+          case severity
+          when :error then 8    # Monaco.MarkerSeverity.Error
+          when :warning then 4  # Monaco.MarkerSeverity.Warning
+          when :info then 2     # Monaco.MarkerSeverity.Info
+          else 8
+          end
+        end
+      end
+      # Collection of diagnostics
+      class DiagnosticCollection
+        def initialize(diagnostics = [])
+          @diagnostics = diagnostics
+        end
+        def <<(diagnostic)
+          @diagnostics << diagnostic
+        end
+        def empty?
+          @diagnostics.empty?
+        end
+        def count
+          @diagnostics.length
+        end
+        def to_monaco
+          @diagnostics.map(&:to_monaco)
+        end
+        def to_codemirror
+          @diagnostics.map(&:to_codemirror)
+        end
+        def to_json(*args)
+          require 'json'
+          @diagnostics.map(&:to_h).to_json(*args)
+        end
+        def to_a
+          @diagnostics
+        end
+      end
+    end
+  end
+end

data/lib/kumi/parser/text_parser/grammar.rb ADDED Viewed

@@ -0,0 +1,214 @@
+# frozen_string_literal: true
+require 'parslet'
+module Kumi
+  module Parser
+    module TextParser
+      # Parslet grammar with proper arithmetic operator precedence
+      class Grammar < Parslet::Parser
+        # Basic tokens
+        rule(:space) { match('\s').repeat(1) }
+        rule(:space?) { space.maybe }
+        rule(:newline?) { match('\n').maybe }
+        # Comments
+        rule(:comment) { str('#') >> match('[^\n]').repeat }
+        rule(:ws) { (space | comment).repeat }
+        rule(:ws?) { ws.maybe }
+        # Identifiers and symbols
+        rule(:identifier) { match('[a-zA-Z_]') >> match('[a-zA-Z0-9_]').repeat }
+        rule(:symbol) { str(':') >> identifier.as(:symbol) }
+        # Literals
+        rule(:integer) { match('[0-9]').repeat(1) }
+        rule(:float) { integer >> str('.') >> match('[0-9]').repeat(1) }
+        rule(:number) { float.as(:float) | integer.as(:integer) }
+        rule(:string_literal) do
+          str('"') >> (str('"').absent? >> any).repeat.as(:string) >> str('"')
+        end
+        rule(:boolean) { (str('true').as(:true) | str('false').as(:false)) }
+        rule(:literal) { number | string_literal | boolean }
+        # Keywords
+        rule(:schema_kw) { str('schema') }
+        rule(:input_kw) { str('input') }
+        rule(:value_kw) { str('value') }
+        rule(:trait_kw) { str('trait') }
+        rule(:do_kw) { str('do') }
+        rule(:end_kw) { str('end') }
+        # Type keywords
+        rule(:type_name) do
+          str('integer') | str('float') | str('string') | str('boolean') | str('any')
+        end
+        # Operators (ordered by precedence, highest to lowest)
+        rule(:mult_op) { str('*').as(:multiply) | str('/').as(:divide) | str('%').as(:modulo) }
+        rule(:add_op) { str('+').as(:add) | str('-').as(:subtract) }
+        rule(:comp_op) do
+          str('>=').as(:>=) | str('<=').as(:<=) | str('==').as(:==) |
+            str('!=').as(:!=) | str('>').as(:>) | str('<').as(:<)
+        end
+        rule(:logical_and_op) { str('&').as(:and) }
+        rule(:logical_or_op) { str('|').as(:or) }
+        # Expressions with proper precedence (using left recursion elimination)
+        rule(:primary_expr) do
+          str('(') >> ws? >> expression >> ws? >> str(')') |
+            function_call |
+            input_reference |
+            declaration_reference |
+            literal
+        end
+        # Function calls: fn(:name, arg1, arg2, ...)
+        rule(:function_call) do
+          str('fn(') >> ws? >>
+            symbol.as(:fn_name) >>
+            (str(',') >> ws? >> expression).repeat(0).as(:args) >>
+            ws? >> str(')')
+        end
+        # Multiplication/Division (left-associative)
+        rule(:mult_expr) do
+          primary_expr.as(:left) >>
+            (space? >> mult_op.as(:op) >> space? >> primary_expr.as(:right)).repeat.as(:ops)
+        end
+        # Addition/Subtraction (left-associative)
+        rule(:add_expr) do
+          mult_expr.as(:left) >>
+            (space? >> add_op.as(:op) >> space? >> mult_expr.as(:right)).repeat.as(:ops)
+        end
+        # Comparison operators
+        rule(:comp_expr) do
+          add_expr.as(:left) >>
+            (space? >> comp_op.as(:op) >> space? >> add_expr.as(:right)).maybe.as(:comp)
+        end
+        # Logical AND (higher precedence than OR)
+        rule(:logical_and_expr) do
+          comp_expr.as(:left) >>
+            (space? >> logical_and_op.as(:op) >> space? >> comp_expr.as(:right)).repeat.as(:ops)
+        end
+        # Logical OR (lowest precedence)
+        rule(:logical_or_expr) do
+          logical_and_expr.as(:left) >>
+            (space? >> logical_or_op.as(:op) >> space? >> logical_and_expr.as(:right)).repeat.as(:ops)
+        end
+        rule(:expression) { logical_or_expr }
+        # Input references: input.field or input.field.subfield
+        rule(:input_reference) do
+          str('input.') >> input_path.as(:input_ref)
+        end
+        rule(:input_path) do
+          identifier >> (str('.') >> identifier).repeat
+        end
+        # Declaration references: just identifier
+        rule(:declaration_reference) do
+          identifier.as(:decl_ref)
+        end
+        # Input declarations
+        rule(:input_declaration) do
+          nested_array_declaration | simple_input_declaration
+        end
+        rule(:simple_input_declaration) do
+          ws? >> type_name.as(:type) >> space >> symbol.as(:name) >>
+            (str(',') >> ws? >> domain_spec).maybe.as(:domain) >> ws? >> newline?
+        end
+        rule(:nested_array_declaration) do
+          ws? >> str('array') >> space >> symbol.as(:name) >> space >> do_kw >> ws? >> newline? >>
+            (ws? >> input_declaration >> ws?).repeat.as(:nested_fields) >>
+            ws? >> end_kw >> ws? >> newline?
+        end
+        rule(:domain_spec) do
+          str('domain:') >> ws? >> domain_value.as(:domain_value)
+        end
+        rule(:domain_value) do
+          # Ranges: 1..10, 1...10, 0.0..100.0
+          range_value |
+            # Word arrays: %w[active inactive]
+            word_array_value |
+            # String arrays: ["active", "inactive"]
+            string_array_value
+        end
+        rule(:range_value) do
+          (float | integer) >> str('..') >> (float | integer)
+        end
+        rule(:word_array_value) do
+          str('%w[') >> (identifier >> space?).repeat.as(:words) >> str(']')
+        end
+        rule(:string_array_value) do
+          str('[') >> space? >>
+            (string_literal >> (str(',') >> space? >> string_literal).repeat).maybe >>
+            space? >> str(']')
+        end
+        # Value declarations
+        rule(:value_declaration) do
+          cascade_value_declaration | simple_value_declaration
+        end
+        rule(:simple_value_declaration) do
+          ws? >> value_kw.as(:type) >> space >> symbol.as(:name) >> str(',') >> ws? >>
+            expression.as(:expr) >> ws? >> newline?
+        end
+        rule(:cascade_value_declaration) do
+          ws? >> value_kw.as(:type) >> space >> symbol.as(:name) >> space >> do_kw >> ws? >> newline? >>
+            (ws? >> cascade_case >> ws?).repeat.as(:cases) >>
+            ws? >> end_kw >> ws? >> newline?
+        end
+        rule(:cascade_case) do
+          (ws? >> str('on') >> space >> identifier.as(:condition) >> str(',') >> ws? >>
+           expression.as(:result) >> ws? >> newline?) |
+            (ws? >> str('base') >> space >> expression.as(:base_result) >> ws? >> newline?)
+        end
+        # Trait declarations
+        rule(:trait_declaration) do
+          ws? >> trait_kw.as(:type) >> space >> symbol.as(:name) >> str(',') >> ws? >>
+            expression.as(:expr) >> ws? >> newline?
+        end
+        # Input block
+        rule(:input_block) do
+          ws? >> input_kw >> space >> do_kw >> ws? >> newline? >>
+            (ws? >> input_declaration >> ws?).repeat.as(:declarations) >>
+            ws? >> end_kw >> ws? >> newline?
+        end
+        # Schema structure
+        rule(:schema_body) do
+          input_block.as(:input) >>
+            (ws? >> (value_declaration | trait_declaration) >> ws?).repeat.as(:declarations)
+        end
+        rule(:schema) do
+          ws? >> schema_kw >> space >> do_kw >> ws? >> newline? >>
+            schema_body >>
+            ws? >> end_kw >> ws?
+        end
+        root(:schema)
+      end
+    end
+  end
+end