RubyGems - kumi-parser - Versions diffs - 0.0.3 → 0.0.4 - Mend

kumi-parser 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

checksums.yaml +4 -4
data/CLAUDE.md +120 -0
data/README.md +38 -41
data/lib/kumi/parser/base.rb +51 -0
data/lib/kumi/parser/direct_parser.rb +502 -0
data/lib/kumi/parser/errors.rb +40 -0
data/lib/kumi/parser/smart_tokenizer.rb +287 -0
data/lib/kumi/parser/syntax_validator.rb +3 -25
data/lib/kumi/parser/text_parser.rb +19 -34
data/lib/kumi/parser/token.rb +84 -0
data/lib/kumi/parser/token_metadata.rb +370 -0
data/lib/kumi/parser/version.rb +1 -1
data/lib/kumi/text_parser.rb +40 -0
data/lib/kumi/text_schema.rb +31 -0
data/lib/kumi-parser.rb +1 -0
metadata +10 -8
data/lib/kumi/parser/analyzer_diagnostic_converter.rb +0 -84
data/lib/kumi/parser/text_parser/editor_diagnostic.rb +0 -102
data/lib/kumi/parser/text_parser/grammar.rb +0 -214
data/lib/kumi/parser/text_parser/parser.rb +0 -168
data/lib/kumi/parser/text_parser/transform.rb +0 -170
data/lib/kumi/parser.rb +0 -8
data/test_basic.rb +0 -44

data/lib/kumi/parser/token_metadata.rb ADDED Viewed

@@ -0,0 +1,370 @@
+# frozen_string_literal: true
+module Kumi
+  module Parser
+    # Token types
+    module TokenType
+      # Literals
+      INTEGER = :integer
+      FLOAT = :float
+      STRING = :string
+      BOOLEAN = :boolean
+      # Identifiers and symbols
+      IDENTIFIER = :identifier
+      SYMBOL = :symbol           # :name
+      # Keywords
+      SCHEMA = :schema
+      INPUT = :input
+      VALUE = :value
+      TRAIT = :trait
+      DO = :do
+      END_KW = :end
+      ON = :on
+      BASE = :base
+      # Type keywords
+      INTEGER_TYPE = :integer_type   # integer
+      FLOAT_TYPE = :float_type       # float
+      STRING_TYPE = :string_type     # string
+      BOOLEAN_TYPE = :boolean_type   # boolean
+      ANY_TYPE = :any_type           # any
+      ARRAY_TYPE = :array_type       # array
+      # Function keywords
+      FN = :fn
+      # Operators (by precedence)
+      MULTIPLY = :multiply       # *
+      DIVIDE = :divide          # /
+      MODULO = :modulo          # %
+      ADD = :add                # +
+      SUBTRACT = :subtract      # -
+      GTE = :gte                # >=
+      LTE = :lte                # <=
+      GT = :gt                  # >
+      LT = :lt                  # <
+      EQ = :eq                  # ==
+      NE = :ne                  # !=
+      AND = :and                # &
+      OR = :or                  # |
+      # Punctuation
+      DOT = :dot                # .
+      COMMA = :comma            # ,
+      COLON = :colon            # :
+      LPAREN = :lparen          # (
+      RPAREN = :rparen          # )
+      LBRACKET = :lbracket      # [
+      RBRACKET = :rbracket      # ]
+      # Special
+      NEWLINE = :newline
+      EOF = :eof
+      COMMENT = :comment        # # comment
+    end
+    # Rich metadata for each token type
+    TOKEN_METADATA = {
+      # Keywords with parsing hints
+      schema: {
+        category: :keyword,
+        expects_block: true,
+        block_terminator: :end
+      },
+      input: {
+        category: :keyword,
+        expects_block: true,
+        block_terminator: :end,
+        context: :input_declarations
+      },
+      value: {
+        category: :keyword,
+        expects_expression: true,
+        declaration_type: :value
+      },
+      trait: {
+        category: :keyword,
+        expects_expression: true,
+        declaration_type: :trait
+      },
+      do: {
+        category: :keyword,
+        block_opener: true
+      },
+      end: {
+        category: :keyword,
+        block_closer: true,
+        terminates_expression: true
+      },
+      on: {
+        category: :keyword,
+        cascade_keyword: true,
+        expects_condition: true
+      },
+      base: {
+        category: :keyword,
+        cascade_keyword: true,
+        is_base_case: true
+      },
+      # Type keywords
+      integer_type: {
+        category: :type_keyword,
+        starts_declaration: true,
+        type_name: :integer
+      },
+      float_type: {
+        category: :type_keyword,
+        starts_declaration: true,
+        type_name: :float
+      },
+      string_type: {
+        category: :type_keyword,
+        starts_declaration: true,
+        type_name: :string
+      },
+      boolean_type: {
+        category: :type_keyword,
+        starts_declaration: true,
+        type_name: :boolean
+      },
+      any_type: {
+        category: :type_keyword,
+        starts_declaration: true,
+        type_name: :any
+      },
+      array_type: {
+        category: :type_keyword,
+        starts_declaration: true,
+        type_name: :array
+      },
+      # Function keyword
+      fn: {
+        category: :keyword,
+        function_keyword: true,
+        starts_expression: true
+      },
+      # Operators with precedence and associativity
+      multiply: {
+        category: :operator,
+        precedence: 6,
+        associativity: :left,
+        arity: :binary
+      },
+      divide: {
+        category: :operator,
+        precedence: 6,
+        associativity: :left,
+        arity: :binary
+      },
+      modulo: {
+        category: :operator,
+        precedence: 6,
+        associativity: :left,
+        arity: :binary
+      },
+      add: {
+        category: :operator,
+        precedence: 5,
+        associativity: :left,
+        arity: :binary
+      },
+      subtract: {
+        category: :operator,
+        precedence: 5,
+        associativity: :left,
+        arity: :binary
+      },
+      gte: {
+        category: :operator,
+        precedence: 4,
+        associativity: :left,
+        arity: :binary,
+        returns_boolean: true
+      },
+      lte: {
+        category: :operator,
+        precedence: 4,
+        associativity: :left,
+        arity: :binary,
+        returns_boolean: true
+      },
+      gt: {
+        category: :operator,
+        precedence: 4,
+        associativity: :left,
+        arity: :binary,
+        returns_boolean: true
+      },
+      lt: {
+        category: :operator,
+        precedence: 4,
+        associativity: :left,
+        arity: :binary,
+        returns_boolean: true
+      },
+      eq: {
+        category: :operator,
+        precedence: 4,
+        associativity: :left,
+        arity: :binary,
+        returns_boolean: true
+      },
+      ne: {
+        category: :operator,
+        precedence: 4,
+        associativity: :left,
+        arity: :binary,
+        returns_boolean: true
+      },
+      and: {
+        category: :operator,
+        precedence: 3,
+        associativity: :left,
+        arity: :binary,
+        requires_boolean: true
+      },
+      or: {
+        category: :operator,
+        precedence: 2,
+        associativity: :left,
+        arity: :binary,
+        requires_boolean: true
+      },
+      # Literals with type information
+      integer: {
+        category: :literal,
+        starts_expression: true,
+        ast_class: 'Kumi::Syntax::Literal'
+      },
+      float: {
+        category: :literal,
+        starts_expression: true,
+        ast_class: 'Kumi::Syntax::Literal'
+      },
+      string: {
+        category: :literal,
+        starts_expression: true,
+        ast_class: 'Kumi::Syntax::Literal'
+      },
+      boolean: {
+        category: :literal,
+        starts_expression: true,
+        ast_class: 'Kumi::Syntax::Literal'
+      },
+      # Identifiers and references
+      identifier: {
+        category: :identifier,
+        starts_expression: true,
+        can_be_reference: true
+      },
+      symbol: {
+        category: :identifier,
+        starts_expression: true,
+        is_declaration_name: true
+      },
+      # Punctuation with parser hints
+      dot: {
+        category: :punctuation,
+        indicates_member_access: true
+      },
+      comma: {
+        category: :punctuation,
+        separates_items: true
+      },
+      colon: {
+        category: :punctuation,
+        indicates_symbol: true
+      },
+      lparen: {
+        category: :punctuation,
+        opens_group: true,
+        group_closer: :rparen,
+        starts_expression: true
+      },
+      rparen: {
+        category: :punctuation,
+        closes_group: true,
+        terminates_expression: true
+      },
+      lbracket: {
+        category: :punctuation,
+        opens_group: true,
+        group_closer: :rbracket,
+        starts_expression: true,
+        indicates_array: true
+      },
+      rbracket: {
+        category: :punctuation,
+        closes_group: true,
+        terminates_expression: true
+      },
+      # Special tokens
+      newline: {
+        category: :whitespace,
+        separates_statements: true
+      },
+      eof: {
+        category: :special,
+        terminates_input: true
+      },
+      comment: {
+        category: :whitespace,
+        ignored_by_parser: true
+      }
+    }.freeze
+    # Character to token mappings
+    CHAR_TO_TOKEN = {
+      '(' => :lparen,
+      ')' => :rparen,
+      '[' => :lbracket,
+      ']' => :rbracket,
+      ',' => :comma,
+      '.' => :dot,
+      ':' => :colon,
+      '+' => :add,
+      '-' => :subtract,
+      '*' => :multiply,
+      '/' => :divide,
+      '%' => :modulo,
+      '&' => :and,
+      '|' => :or
+    }.freeze
+    # Keywords mapping
+    KEYWORDS = {
+      'schema' => :schema,
+      'input' => :input,
+      'value' => :value,
+      'trait' => :trait,
+      'do' => :do,
+      'end' => :end,
+      'on' => :on,
+      'base' => :base,
+      'fn' => :fn,
+      'true' => :boolean,
+      'false' => :boolean,
+      'integer' => :integer_type,
+      'float' => :float_type,
+      'string' => :string_type,
+      'boolean' => :boolean_type,
+      'any' => :any_type,
+      'array' => :array_type
+    }.freeze
+    # Opener to closer mappings for error recovery
+    OPENER_FOR_CLOSER = {
+      rparen: :lparen,
+      rbracket: :lbracket
+    }.freeze
+  end
+end

data/lib/kumi/parser/version.rb CHANGED Viewed

@@ -2,6 +2,6 @@
 module Kumi
   module Parser
-    VERSION = '0.0.3'
+    VERSION = '0.0.4'
   end
 end

data/lib/kumi/text_parser.rb ADDED Viewed

@@ -0,0 +1,40 @@
+# frozen_string_literal: true
+require_relative 'parser/text_parser'
+module Kumi
+  # Top-level text parser module with same interface as Ruby DSL
+  module TextParser
+    extend self
+    # Parse text schema and return AST (same interface as RubyParser::Dsl.build_syntax_tree)
+    def parse(text, source_file: '<input>')
+      Parser::TextParser.parse(text, source_file: source_file)
+    end
+    # Validate text schema
+    def valid?(text, source_file: '<input>')
+      Parser::TextParser.valid?(text, source_file: source_file)
+    end
+    # Get validation diagnostics
+    def validate(text, source_file: '<input>')
+      Parser::TextParser.validate(text, source_file: source_file)
+    end
+    # Get Monaco Editor format diagnostics
+    def diagnostics_for_monaco(text, source_file: '<input>')
+      Parser::TextParser.diagnostics_for_monaco(text, source_file: source_file)
+    end
+    # Get CodeMirror format diagnostics
+    def diagnostics_for_codemirror(text, source_file: '<input>')
+      Parser::TextParser.diagnostics_for_codemirror(text, source_file: source_file)
+    end
+    # Get JSON format diagnostics
+    def diagnostics_as_json(text, source_file: '<input>')
+      Parser::TextParser.diagnostics_as_json(text, source_file: source_file)
+    end
+  end
+end

data/lib/kumi/text_schema.rb ADDED Viewed

@@ -0,0 +1,31 @@
+# frozen_string_literal: true
+require 'kumi'
+require_relative 'text_parser'
+module Kumi
+  # Text-based schema that extends Kumi::Schema with text parsing capabilities
+  class TextSchema
+    extend Kumi::Schema
+    # Create a schema from text using the same pipeline as Ruby DSL
+    def self.from_text(text, source_file: '<input>')
+      # Parse text to AST (same as RubyParser::Dsl.build_syntax_tree)
+      @__syntax_tree__ = Kumi::TextParser.parse(text, source_file: source_file).freeze
+      @__analyzer_result__ = Analyzer.analyze!(@__syntax_tree__).freeze
+      @__compiled_schema__ = Compiler.compile(@__syntax_tree__, analyzer: @__analyzer_result__).freeze
+      Inspector.new(@__syntax_tree__, @__analyzer_result__, @__compiled_schema__)
+    end
+    # Validate text schema
+    def self.valid?(text, source_file: '<input>')
+      Kumi::TextParser.valid?(text, source_file: source_file)
+    end
+    # Get validation diagnostics
+    def self.validate(text, source_file: '<input>')
+      Kumi::TextParser.validate(text, source_file: source_file)
+    end
+  end
+end

data/lib/kumi-parser.rb CHANGED Viewed

@@ -1,6 +1,7 @@
 # frozen_string_literal: true
 require 'kumi'
+require 'kumi/syntax/node'
 require 'zeitwerk'
 require 'parslet'

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: kumi-parser
 version: !ruby/object:Gem::Version
-  version: 0.0.3
+  version: 0.0.4
 platform: ruby
 authors:
 - Kumi Team
@@ -130,6 +130,7 @@ extensions: []
 extra_rdoc_files: []
 files:
 - ".rspec"
+- CLAUDE.md
 - LICENSE
 - README.md
 - Rakefile
@@ -139,18 +140,19 @@ files:
 - examples/text_parser_test_with_comments.rb
 - kumi-parser.gemspec
 - lib/kumi-parser.rb
-- lib/kumi/parser.rb
-- lib/kumi/parser/analyzer_diagnostic_converter.rb
+- lib/kumi/parser/base.rb
+- lib/kumi/parser/direct_parser.rb
 - lib/kumi/parser/error_extractor.rb
+- lib/kumi/parser/errors.rb
+- lib/kumi/parser/smart_tokenizer.rb
 - lib/kumi/parser/syntax_validator.rb
 - lib/kumi/parser/text_parser.rb
 - lib/kumi/parser/text_parser/api.rb
-- lib/kumi/parser/text_parser/editor_diagnostic.rb
-- lib/kumi/parser/text_parser/grammar.rb
-- lib/kumi/parser/text_parser/parser.rb
-- lib/kumi/parser/text_parser/transform.rb
+- lib/kumi/parser/token.rb
+- lib/kumi/parser/token_metadata.rb
 - lib/kumi/parser/version.rb
-- test_basic.rb
+- lib/kumi/text_parser.rb
+- lib/kumi/text_schema.rb
 homepage: https://github.com/amuta/kumi-parser
 licenses:
 - MIT

data/lib/kumi/parser/analyzer_diagnostic_converter.rb DELETED Viewed

@@ -1,84 +0,0 @@
-# frozen_string_literal: true
-require_relative 'text_parser/editor_diagnostic'
-module Kumi
-  module Parser
-    # Converts analyzer errors to editor diagnostics
-    class AnalyzerDiagnosticConverter
-      def self.convert_errors(errors)
-        diagnostics = TextParser::DiagnosticCollection.new
-        errors.each do |error|
-          diagnostic = convert_single_error(error)
-          diagnostics << diagnostic if diagnostic
-        end
-        diagnostics
-      end
-      def self.convert_single_error(error)
-        # Handle legacy array format [location, message]
-        if error.is_a?(Array) && error.size == 2
-          location, message = error
-          line = location&.respond_to?(:line) ? location.line : 1
-          column = location&.respond_to?(:column) ? location.column : 1
-          return TextParser::EditorDiagnostic.new(
-            line: line,
-            column: column,
-            message: message.to_s,
-            severity: :error,
-            type: :semantic
-          )
-        end
-        # Handle regular error objects
-        if error&.respond_to?(:message)
-          line = error.respond_to?(:location) && error.location&.respond_to?(:line) ? error.location.line : 1
-          column = error.respond_to?(:location) && error.location&.respond_to?(:column) ? error.location.column : 1
-          # Extract error type and map to severity
-          error_type = error.respond_to?(:type) ? error.type : :semantic
-          severity = map_type_to_severity(error_type)
-          return TextParser::EditorDiagnostic.new(
-            line: line,
-            column: column,
-            message: error.message,
-            severity: severity,
-            type: error_type
-          )
-        end
-        # Handle unknown formats (strings, etc.)
-        return unless error
-        TextParser::EditorDiagnostic.new(
-          line: 1,
-          column: 1,
-          message: "Unknown analyzer error: #{error}",
-          severity: :error,
-          type: :semantic
-        )
-      end
-      def self.extract_location(location)
-        if location&.respond_to?(:line) && location.respond_to?(:column)
-          { line: location.line, column: location.column }
-        else
-          { line: 1, column: 1 }
-        end
-      end
-      def self.map_type_to_severity(type)
-        case type
-        when :warning then :warning
-        when :info then :info
-        when :hint then :hint
-        else :error
-        end
-      end
-    end
-  end
-end

data/lib/kumi/parser/text_parser/editor_diagnostic.rb DELETED Viewed

@@ -1,102 +0,0 @@
-# frozen_string_literal: true
-module Kumi
-  module Parser
-    module TextParser
-      # Simple diagnostic for online editors
-      class EditorDiagnostic
-        attr_reader :line, :column, :message, :severity, :type
-        def initialize(line:, column:, message:, severity: :error, type: :syntax)
-          @line = line
-          @column = column
-          @message = message
-          @severity = severity
-          @type = type
-        end
-        def to_monaco
-          {
-            startLineNumber: line,
-            startColumn: column,
-            endLineNumber: line,
-            endColumn: column + 1,
-            message: message,
-            severity: monaco_severity
-          }
-        end
-        def to_codemirror
-          {
-            from: (line - 1) * 1000 + (column - 1),
-            to: (line - 1) * 1000 + column,
-            message: message,
-            severity: severity.to_s
-          }
-        end
-        def to_h
-          {
-            line: line,
-            column: column,
-            message: message,
-            severity: severity.to_s,
-            type: type.to_s
-          }
-        end
-        def to_json(*args)
-          require 'json'
-          to_h.to_json(*args)
-        end
-        private
-        def monaco_severity
-          case severity
-          when :error then 8    # Monaco.MarkerSeverity.Error
-          when :warning then 4  # Monaco.MarkerSeverity.Warning
-          when :info then 2     # Monaco.MarkerSeverity.Info
-          else 8
-          end
-        end
-      end
-      # Collection of diagnostics
-      class DiagnosticCollection
-        def initialize(diagnostics = [])
-          @diagnostics = diagnostics
-        end
-        def <<(diagnostic)
-          @diagnostics << diagnostic
-        end
-        def empty?
-          @diagnostics.empty?
-        end
-        def count
-          @diagnostics.length
-        end
-        def to_monaco
-          @diagnostics.map(&:to_monaco)
-        end
-        def to_codemirror
-          @diagnostics.map(&:to_codemirror)
-        end
-        def to_json(*args)
-          require 'json'
-          @diagnostics.map(&:to_h).to_json(*args)
-        end
-        def to_a
-          @diagnostics
-        end
-      end
-    end
-  end
-end