RubyGems - kumi-parser - Versions diffs - 0.0.33 → 0.1.0 - Mend

kumi-parser 0.0.33 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

checksums.yaml +4 -4
data/.rubocop.yml +41 -0
data/CHANGELOG.md +64 -0
data/CLAUDE.md +59 -120
data/README.md +28 -6
data/examples/parse_and_inspect.rb +34 -0
data/kumi-parser.gemspec +3 -4
data/lib/kumi/parser/grammar.rb +120 -0
data/lib/kumi/parser/lexer.rb +232 -0
data/lib/kumi/parser/parse_error.rb +52 -0
data/lib/kumi/parser/parser.rb +692 -0
data/lib/kumi/parser/source.rb +76 -0
data/lib/kumi/parser/text_parser.rb +37 -27
data/lib/kumi/parser/token.rb +10 -71
data/lib/kumi/parser/version.rb +1 -1
data/lib/kumi-parser.rb +9 -10
metadata +16 -37
data/examples/debug_text_parser.rb +0 -41
data/examples/debug_transform_rule.rb +0 -26
data/examples/text_parser_comprehensive_test.rb +0 -333
data/examples/text_parser_test_with_comments.rb +0 -146
data/lib/kumi/parser/base.rb +0 -51
data/lib/kumi/parser/direct_parser.rb +0 -698
data/lib/kumi/parser/error_extractor.rb +0 -89
data/lib/kumi/parser/errors.rb +0 -40
data/lib/kumi/parser/helpers.rb +0 -154
data/lib/kumi/parser/smart_tokenizer.rb +0 -373
data/lib/kumi/parser/syntax_validator.rb +0 -21
data/lib/kumi/parser/text_parser/api.rb +0 -60
data/lib/kumi/parser/token_constants.rb +0 -468
data/lib/kumi/text_parser.rb +0 -40
data/lib/kumi/text_schema.rb +0 -31

data/lib/kumi/parser/source.rb ADDED Viewed

@@ -0,0 +1,76 @@
+# frozen_string_literal: true
+module Kumi
+  module Parser
+    # The text being parsed, plus the bookkeeping needed to turn a byte offset
+    # into a 1-based line/column and to render a caret-annotated code frame for
+    # error messages. Owning this here keeps location math in one place instead
+    # of being recomputed in the lexer and the parser.
+    class Source
+      attr_reader :text, :file
+      def initialize(text, file: 'schema')
+        @text = text
+        @file = file
+        @line_starts = compute_line_starts(text)
+      end
+      # 1-based [line, column] for a 0-based byte offset.
+      def line_col(offset)
+        offset = text.length if offset > text.length
+        line = upper_bound(@line_starts, offset) - 1
+        col = offset - @line_starts[line] + 1
+        [line + 1, col]
+      end
+      def location(offset)
+        line, col = line_col(offset)
+        Kumi::Syntax::Location.new(file: file, line: line, column: col)
+      end
+      # A two-line-of-context code frame with a caret under the offending column,
+      # in the same shape kumi-core's text frontend already renders.
+      def code_frame(offset, context: 2)
+        line, col = line_col(offset)
+        lines = text.lines
+        from = [line - 1 - context, 0].max
+        to   = [line - 1 + context, lines.length - 1].min
+        return '' if lines.empty?
+        out = []
+        (from..to).each do |i|
+          marker = i + 1 == line ? '➤' : ' '
+          out << format('%s %4d | %s', marker, i + 1, lines[i].to_s.chomp)
+          out << format('       | %s^', ' ' * (col - 1)) if i + 1 == line
+        end
+        out.join("\n")
+      end
+      private
+      def compute_line_starts(text)
+        starts = [0]
+        text.each_char.with_index do |ch, i|
+          starts << i + 1 if ch == "\n"
+        end
+        starts
+      end
+      # Index of the last line whose start is <= offset, then +1 so callers can
+      # subtract back to a 0-based line — keeps the arithmetic in line_col simple.
+      def upper_bound(starts, offset)
+        lo = 0
+        hi = starts.length
+        while lo < hi
+          mid = (lo + hi) / 2
+          if starts[mid] <= offset
+            lo = mid + 1
+          else
+            hi = mid
+          end
+        end
+        lo
+      end
+    end
+  end
+end

data/lib/kumi/parser/text_parser.rb CHANGED Viewed

@@ -1,38 +1,48 @@
 # frozen_string_literal: true
-require_relative 'smart_tokenizer'
-require_relative 'direct_parser'
-require_relative 'errors'
 module Kumi
   module Parser
+    # The public entry point. kumi-core calls `TextParser.parse(src, source_file:)`
+    # and nothing else; `valid?` and `validate` exist for editor/tooling use.
+    #
+    # Parse errors are raised as Kumi::Errors::SyntaxError carrying both a
+    # self-contained, framed message (so standalone callers get a useful string)
+    # and a Location object (so kumi-core's frontend can render its own frame
+    # without re-parsing the message). The message itself is the bare what/why,
+    # without location — the frame and `file:line:col` header are added by the
+    # presentation layer from the Location.
     module TextParser
-      # Clean text parser focused on core parsing functionality
+      module_function
-      class << self
-        # Parse text to AST
-        def parse(text, source_file: '<input>')
-          tokens = Kumi::Parser::SmartTokenizer.new(text, source_file: source_file).tokenize
-          Kumi::Parser::DirectParser.new(tokens).parse
-        rescue Kumi::Parser::Errors::ParseError, Kumi::Parser::Errors::TokenizerError => e
-          # Convert parser errors to the expected SyntaxError for compatibility
-          raise Kumi::Errors::SyntaxError, e.message
-        end
+      def parse(text, source_file: 'schema')
+        source = Source.new(text, file: source_file)
+        tokens = Lexer.new(source).tokenize
+        Parser.new(tokens, source).parse
+      rescue ParseError => e
+        raise Kumi::Errors::SyntaxError.new(e.short_message, e.location)
+      end
-        # Check if text is syntactically valid
-        def valid?(text, source_file: '<input>')
-          parse(text, source_file: source_file)
-          true
-        rescue StandardError => e
-          false
-        end
+      def valid?(text, source_file: 'schema')
+        parse(text, source_file: source_file)
+        true
+      rescue Kumi::Errors::SyntaxError
+        false
+      end
-        # Basic validation - returns array of error hashes
-        def validate(text, source_file: '<input>')
-          # Use SyntaxValidator for proper diagnostic extraction
-          validator = Kumi::Parser::SyntaxValidator.new
-          validator.validate(text, source_file: source_file)
-        end
+      # Returns an array of diagnostic hashes (empty when valid) for editors.
+      def validate(text, source_file: 'schema')
+        source = Source.new(text, file: source_file)
+        tokens = Lexer.new(source).tokenize
+        Parser.new(tokens, source).parse
+        []
+      rescue ParseError => e
+        [{
+          line: e.line,
+          column: e.column,
+          message: e.short_message,
+          severity: :error,
+          type: :syntax
+        }]
       end
     end
   end

data/lib/kumi/parser/token.rb CHANGED Viewed

@@ -2,83 +2,22 @@
 module Kumi
   module Parser
-    # Token with embedded metadata for smart parsing
-    class Token
-      attr_reader :type, :value, :location, :metadata
-      def initialize(type, value, location, metadata = {})
-        @type = type
-        @value = value
-        @location = location
-        @metadata = metadata
-      end
-      # Semantic predicates embedded in token
-      def keyword?
-        @metadata[:category] == :keyword
-      end
-      def operator?
-        @metadata[:category] == :operator
-      end
-      def literal?
-        @metadata[:category] == :literal
-      end
-      def identifier?
-        @metadata[:category] == :identifier
-      end
-      def punctuation?
-        @metadata[:category] == :punctuation
-      end
-      # Operator precedence embedded in token
-      def precedence
-        @metadata[:precedence] || 0
-      end
-      def left_associative?
-        @metadata[:associativity] == :left
-      end
-      def right_associative?
-        @metadata[:associativity] == :right
-      end
-      # Parser hints embedded in token
-      def expects_block?
-        @metadata[:expects_block] == true
-      end
-      def terminates_expression?
-        @metadata[:terminates_expression] == true
-      end
-      def starts_expression?
-        @metadata[:starts_expression] == true
-      end
-      # Direct AST construction hint
-      def ast_class
-        @metadata[:ast_class]
-      end
+    # One lexical token. Unlike the old SmartTokenizer's metadata-bag tokens,
+    # this carries only a typed `kind`, its literal `value`, and the byte
+    # `offset` where it starts — enough to build a location and error frame on
+    # demand via Source.
+    #
+    # Everything the parser needs to know about a token's role (precedence,
+    # associativity, whether it's a type keyword, …) lives in the grammar
+    # tables in Grammar, keyed by `kind` — not duplicated onto every token.
+    Token = Struct.new(:kind, :value, :offset) do
       def to_s
-        "#{@type}(#{@value.inspect}) at #{@location}"
+        "#{kind}(#{value.inspect})"
       end
       def inspect
         to_s
       end
-      def ==(other)
-        other.is_a?(Token) &&
-          @type == other.type &&
-          @value == other.value &&
-          @location == other.location
-      end
     end
   end
 end

data/lib/kumi/parser/version.rb CHANGED Viewed

@@ -2,6 +2,6 @@
 module Kumi
   module Parser
-    VERSION = '0.0.33'
+    VERSION = '0.1.0'
   end
 end

data/lib/kumi-parser.rb CHANGED Viewed

@@ -2,20 +2,19 @@
 require 'kumi'
 require 'kumi/syntax/node'
-require 'zeitwerk'
-require 'parslet'
-loader = Zeitwerk::Loader.for_gem(warn_on_extra_files: false)
-loader.ignore("#{__dir__}/kumi-parser.rb")
-loader.ignore("#{__dir__}/kumi/parser/version.rb")
-loader.ignore("#{__dir__}/kumi/parser/token_constants.rb")
-loader.setup
 require_relative 'kumi/parser/version'
-require_relative 'kumi/parser/token_constants'
+require_relative 'kumi/parser/grammar'
+require_relative 'kumi/parser/source'
+require_relative 'kumi/parser/token'
+require_relative 'kumi/parser/parse_error'
+require_relative 'kumi/parser/lexer'
+require_relative 'kumi/parser/parser'
+require_relative 'kumi/parser/text_parser'
 module Kumi
+  # Text frontend for Kumi: lexes and parses `.kumi` schema source into
+  # kumi-core's Kumi::Syntax AST. See Kumi::Parser::TextParser for the API.
   module Parser
-    # Parser extension for Kumi DSL
   end
 end

metadata CHANGED Viewed

@@ -1,43 +1,29 @@
 --- !ruby/object:Gem::Specification
 name: kumi-parser
 version: !ruby/object:Gem::Version
-  version: 0.0.33
+  version: 0.1.0
 platform: ruby
 authors:
 - Kumi Team
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2026-06-14 00:00:00.000000000 Z
+date: 2026-06-20 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
-  name: parslet
+  name: kumi
   requirement: !ruby/object:Gem::Requirement
     requirements:
-    - - "~>"
+    - - ">="
       - !ruby/object:Gem::Version
-        version: '2.0'
+        version: '0'
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
-    - - "~>"
-      - !ruby/object:Gem::Version
-        version: '2.0'
-- !ruby/object:Gem::Dependency
-  name: zeitwerk
-  requirement: !ruby/object:Gem::Requirement
-    requirements:
-    - - "~>"
-      - !ruby/object:Gem::Version
-        version: '2.6'
-  type: :runtime
-  prerelease: false
-  version_requirements: !ruby/object:Gem::Requirement
-    requirements:
-    - - "~>"
+    - - ">="
       - !ruby/object:Gem::Version
-        version: '2.6'
+        version: '0'
 - !ruby/object:Gem::Dependency
   name: bundler
   requirement: !ruby/object:Gem::Requirement
@@ -117,31 +103,24 @@ extensions: []
 extra_rdoc_files: []
 files:
 - ".rspec"
+- ".rubocop.yml"
 - ".ruby-version"
+- CHANGELOG.md
 - CLAUDE.md
 - LICENSE
 - README.md
 - Rakefile
-- examples/debug_text_parser.rb
-- examples/debug_transform_rule.rb
-- examples/text_parser_comprehensive_test.rb
-- examples/text_parser_test_with_comments.rb
+- examples/parse_and_inspect.rb
 - kumi-parser.gemspec
 - lib/kumi-parser.rb
-- lib/kumi/parser/base.rb
-- lib/kumi/parser/direct_parser.rb
-- lib/kumi/parser/error_extractor.rb
-- lib/kumi/parser/errors.rb
-- lib/kumi/parser/helpers.rb
-- lib/kumi/parser/smart_tokenizer.rb
-- lib/kumi/parser/syntax_validator.rb
+- lib/kumi/parser/grammar.rb
+- lib/kumi/parser/lexer.rb
+- lib/kumi/parser/parse_error.rb
+- lib/kumi/parser/parser.rb
+- lib/kumi/parser/source.rb
 - lib/kumi/parser/text_parser.rb
-- lib/kumi/parser/text_parser/api.rb
 - lib/kumi/parser/token.rb
-- lib/kumi/parser/token_constants.rb
 - lib/kumi/parser/version.rb
-- lib/kumi/text_parser.rb
-- lib/kumi/text_schema.rb
 homepage: https://github.com/amuta/kumi-parser
 licenses:
 - MIT
@@ -158,7 +137,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
-      version: 3.0.0
+      version: 3.1.0
 required_rubygems_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="

data/examples/debug_text_parser.rb DELETED Viewed

@@ -1,41 +0,0 @@
-# Debug the text parser transform
-require_relative 'lib/kumi/text_parser'
-schema_text = <<~SCHEMA
-  schema do
-    input do
-      integer :age
-    end
-  #{'  '}
-    trait :adult, input.age >= 18
-    value :bonus, 100
-  end
-SCHEMA
-puts 'Debugging text parser...'
-begin
-  # Test just the grammar parsing first
-  grammar = Kumi::TextParser::Grammar.new
-  parse_tree = grammar.parse(schema_text)
-  puts 'Raw parse tree:'
-  puts parse_tree.inspect
-  puts
-  # Now test the transform
-  transform = Kumi::TextParser::Transform.new
-  ast = transform.apply(parse_tree)
-  puts 'Transformed AST:'
-  puts ast.inspect
-  puts
-  puts 'AST structure:'
-  puts "- Values: #{ast.values.count} - #{ast.values.map(&:name)}"
-  puts "- Traits: #{ast.traits.count} - #{ast.traits.map(&:name)}"
-rescue StandardError => e
-  puts "Error: #{e.message}"
-  puts e.backtrace.first(5)
-end

data/examples/debug_transform_rule.rb DELETED Viewed

@@ -1,26 +0,0 @@
-# Debug specific transform rule
-require_relative 'lib/kumi/text_parser'
-# Test just the trait parsing
-trait_text = 'trait :adult, input.age >= 18'
-grammar = Kumi::TextParser::Grammar.new
-transform = Kumi::TextParser::Transform.new
-begin
-  # Parse just the trait declaration
-  parse_result = grammar.trait_declaration.parse(trait_text)
-  puts 'Trait parse result:'
-  puts parse_result.inspect
-  puts
-  # Try to transform it
-  transformed = transform.apply(parse_result)
-  puts 'Transformed result:'
-  puts transformed.inspect
-  puts "Class: #{transformed.class}"
-rescue StandardError => e
-  puts "Error: #{e.message}"
-  puts e.backtrace.first(5)
-end