kumi-parser 0.0.33 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,76 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kumi
4
+ module Parser
5
+ # The text being parsed, plus the bookkeeping needed to turn a byte offset
6
+ # into a 1-based line/column and to render a caret-annotated code frame for
7
+ # error messages. Owning this here keeps location math in one place instead
8
+ # of being recomputed in the lexer and the parser.
9
+ class Source
10
+ attr_reader :text, :file
11
+
12
+ def initialize(text, file: 'schema')
13
+ @text = text
14
+ @file = file
15
+ @line_starts = compute_line_starts(text)
16
+ end
17
+
18
+ # 1-based [line, column] for a 0-based byte offset.
19
+ def line_col(offset)
20
+ offset = text.length if offset > text.length
21
+ line = upper_bound(@line_starts, offset) - 1
22
+ col = offset - @line_starts[line] + 1
23
+ [line + 1, col]
24
+ end
25
+
26
+ def location(offset)
27
+ line, col = line_col(offset)
28
+ Kumi::Syntax::Location.new(file: file, line: line, column: col)
29
+ end
30
+
31
+ # A two-line-of-context code frame with a caret under the offending column,
32
+ # in the same shape kumi-core's text frontend already renders.
33
+ def code_frame(offset, context: 2)
34
+ line, col = line_col(offset)
35
+ lines = text.lines
36
+ from = [line - 1 - context, 0].max
37
+ to = [line - 1 + context, lines.length - 1].min
38
+ return '' if lines.empty?
39
+
40
+ out = []
41
+ (from..to).each do |i|
42
+ marker = i + 1 == line ? '➤' : ' '
43
+ out << format('%s %4d | %s', marker, i + 1, lines[i].to_s.chomp)
44
+ out << format(' | %s^', ' ' * (col - 1)) if i + 1 == line
45
+ end
46
+ out.join("\n")
47
+ end
48
+
49
+ private
50
+
51
+ def compute_line_starts(text)
52
+ starts = [0]
53
+ text.each_char.with_index do |ch, i|
54
+ starts << i + 1 if ch == "\n"
55
+ end
56
+ starts
57
+ end
58
+
59
+ # Index of the last line whose start is <= offset, then +1 so callers can
60
+ # subtract back to a 0-based line — keeps the arithmetic in line_col simple.
61
+ def upper_bound(starts, offset)
62
+ lo = 0
63
+ hi = starts.length
64
+ while lo < hi
65
+ mid = (lo + hi) / 2
66
+ if starts[mid] <= offset
67
+ lo = mid + 1
68
+ else
69
+ hi = mid
70
+ end
71
+ end
72
+ lo
73
+ end
74
+ end
75
+ end
76
+ end
@@ -1,38 +1,48 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative 'smart_tokenizer'
4
- require_relative 'direct_parser'
5
- require_relative 'errors'
6
-
7
3
  module Kumi
8
4
  module Parser
5
+ # The public entry point. kumi-core calls `TextParser.parse(src, source_file:)`
6
+ # and nothing else; `valid?` and `validate` exist for editor/tooling use.
7
+ #
8
+ # Parse errors are raised as Kumi::Errors::SyntaxError carrying both a
9
+ # self-contained, framed message (so standalone callers get a useful string)
10
+ # and a Location object (so kumi-core's frontend can render its own frame
11
+ # without re-parsing the message). The message itself is the bare what/why,
12
+ # without location — the frame and `file:line:col` header are added by the
13
+ # presentation layer from the Location.
9
14
  module TextParser
10
- # Clean text parser focused on core parsing functionality
15
+ module_function
11
16
 
12
- class << self
13
- # Parse text to AST
14
- def parse(text, source_file: '<input>')
15
- tokens = Kumi::Parser::SmartTokenizer.new(text, source_file: source_file).tokenize
16
- Kumi::Parser::DirectParser.new(tokens).parse
17
- rescue Kumi::Parser::Errors::ParseError, Kumi::Parser::Errors::TokenizerError => e
18
- # Convert parser errors to the expected SyntaxError for compatibility
19
- raise Kumi::Errors::SyntaxError, e.message
20
- end
17
+ def parse(text, source_file: 'schema')
18
+ source = Source.new(text, file: source_file)
19
+ tokens = Lexer.new(source).tokenize
20
+ Parser.new(tokens, source).parse
21
+ rescue ParseError => e
22
+ raise Kumi::Errors::SyntaxError.new(e.short_message, e.location)
23
+ end
21
24
 
22
- # Check if text is syntactically valid
23
- def valid?(text, source_file: '<input>')
24
- parse(text, source_file: source_file)
25
- true
26
- rescue StandardError => e
27
- false
28
- end
25
+ def valid?(text, source_file: 'schema')
26
+ parse(text, source_file: source_file)
27
+ true
28
+ rescue Kumi::Errors::SyntaxError
29
+ false
30
+ end
29
31
 
30
- # Basic validation - returns array of error hashes
31
- def validate(text, source_file: '<input>')
32
- # Use SyntaxValidator for proper diagnostic extraction
33
- validator = Kumi::Parser::SyntaxValidator.new
34
- validator.validate(text, source_file: source_file)
35
- end
32
+ # Returns an array of diagnostic hashes (empty when valid) for editors.
33
+ def validate(text, source_file: 'schema')
34
+ source = Source.new(text, file: source_file)
35
+ tokens = Lexer.new(source).tokenize
36
+ Parser.new(tokens, source).parse
37
+ []
38
+ rescue ParseError => e
39
+ [{
40
+ line: e.line,
41
+ column: e.column,
42
+ message: e.short_message,
43
+ severity: :error,
44
+ type: :syntax
45
+ }]
36
46
  end
37
47
  end
38
48
  end
@@ -2,83 +2,22 @@
2
2
 
3
3
  module Kumi
4
4
  module Parser
5
- # Token with embedded metadata for smart parsing
6
- class Token
7
- attr_reader :type, :value, :location, :metadata
8
-
9
- def initialize(type, value, location, metadata = {})
10
- @type = type
11
- @value = value
12
- @location = location
13
- @metadata = metadata
14
- end
15
-
16
- # Semantic predicates embedded in token
17
- def keyword?
18
- @metadata[:category] == :keyword
19
- end
20
-
21
- def operator?
22
- @metadata[:category] == :operator
23
- end
24
-
25
- def literal?
26
- @metadata[:category] == :literal
27
- end
28
-
29
- def identifier?
30
- @metadata[:category] == :identifier
31
- end
32
-
33
- def punctuation?
34
- @metadata[:category] == :punctuation
35
- end
36
-
37
- # Operator precedence embedded in token
38
- def precedence
39
- @metadata[:precedence] || 0
40
- end
41
-
42
- def left_associative?
43
- @metadata[:associativity] == :left
44
- end
45
-
46
- def right_associative?
47
- @metadata[:associativity] == :right
48
- end
49
-
50
- # Parser hints embedded in token
51
- def expects_block?
52
- @metadata[:expects_block] == true
53
- end
54
-
55
- def terminates_expression?
56
- @metadata[:terminates_expression] == true
57
- end
58
-
59
- def starts_expression?
60
- @metadata[:starts_expression] == true
61
- end
62
-
63
- # Direct AST construction hint
64
- def ast_class
65
- @metadata[:ast_class]
66
- end
67
-
5
+ # One lexical token. Unlike the old SmartTokenizer's metadata-bag tokens,
6
+ # this carries only a typed `kind`, its literal `value`, and the byte
7
+ # `offset` where it starts — enough to build a location and error frame on
8
+ # demand via Source.
9
+ #
10
+ # Everything the parser needs to know about a token's role (precedence,
11
+ # associativity, whether it's a type keyword, …) lives in the grammar
12
+ # tables in Grammar, keyed by `kind` — not duplicated onto every token.
13
+ Token = Struct.new(:kind, :value, :offset) do
68
14
  def to_s
69
- "#{@type}(#{@value.inspect}) at #{@location}"
15
+ "#{kind}(#{value.inspect})"
70
16
  end
71
17
 
72
18
  def inspect
73
19
  to_s
74
20
  end
75
-
76
- def ==(other)
77
- other.is_a?(Token) &&
78
- @type == other.type &&
79
- @value == other.value &&
80
- @location == other.location
81
- end
82
21
  end
83
22
  end
84
23
  end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Kumi
4
4
  module Parser
5
- VERSION = '0.0.33'
5
+ VERSION = '0.1.0'
6
6
  end
7
7
  end
data/lib/kumi-parser.rb CHANGED
@@ -2,20 +2,19 @@
2
2
 
3
3
  require 'kumi'
4
4
  require 'kumi/syntax/node'
5
- require 'zeitwerk'
6
- require 'parslet'
7
-
8
- loader = Zeitwerk::Loader.for_gem(warn_on_extra_files: false)
9
- loader.ignore("#{__dir__}/kumi-parser.rb")
10
- loader.ignore("#{__dir__}/kumi/parser/version.rb")
11
- loader.ignore("#{__dir__}/kumi/parser/token_constants.rb")
12
- loader.setup
13
5
 
14
6
  require_relative 'kumi/parser/version'
15
- require_relative 'kumi/parser/token_constants'
7
+ require_relative 'kumi/parser/grammar'
8
+ require_relative 'kumi/parser/source'
9
+ require_relative 'kumi/parser/token'
10
+ require_relative 'kumi/parser/parse_error'
11
+ require_relative 'kumi/parser/lexer'
12
+ require_relative 'kumi/parser/parser'
13
+ require_relative 'kumi/parser/text_parser'
16
14
 
17
15
  module Kumi
16
+ # Text frontend for Kumi: lexes and parses `.kumi` schema source into
17
+ # kumi-core's Kumi::Syntax AST. See Kumi::Parser::TextParser for the API.
18
18
  module Parser
19
- # Parser extension for Kumi DSL
20
19
  end
21
20
  end
metadata CHANGED
@@ -1,43 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kumi-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.33
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kumi Team
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2026-06-14 00:00:00.000000000 Z
11
+ date: 2026-06-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: parslet
14
+ name: kumi
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - "~>"
17
+ - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: '2.0'
19
+ version: '0'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - "~>"
25
- - !ruby/object:Gem::Version
26
- version: '2.0'
27
- - !ruby/object:Gem::Dependency
28
- name: zeitwerk
29
- requirement: !ruby/object:Gem::Requirement
30
- requirements:
31
- - - "~>"
32
- - !ruby/object:Gem::Version
33
- version: '2.6'
34
- type: :runtime
35
- prerelease: false
36
- version_requirements: !ruby/object:Gem::Requirement
37
- requirements:
38
- - - "~>"
24
+ - - ">="
39
25
  - !ruby/object:Gem::Version
40
- version: '2.6'
26
+ version: '0'
41
27
  - !ruby/object:Gem::Dependency
42
28
  name: bundler
43
29
  requirement: !ruby/object:Gem::Requirement
@@ -117,31 +103,24 @@ extensions: []
117
103
  extra_rdoc_files: []
118
104
  files:
119
105
  - ".rspec"
106
+ - ".rubocop.yml"
120
107
  - ".ruby-version"
108
+ - CHANGELOG.md
121
109
  - CLAUDE.md
122
110
  - LICENSE
123
111
  - README.md
124
112
  - Rakefile
125
- - examples/debug_text_parser.rb
126
- - examples/debug_transform_rule.rb
127
- - examples/text_parser_comprehensive_test.rb
128
- - examples/text_parser_test_with_comments.rb
113
+ - examples/parse_and_inspect.rb
129
114
  - kumi-parser.gemspec
130
115
  - lib/kumi-parser.rb
131
- - lib/kumi/parser/base.rb
132
- - lib/kumi/parser/direct_parser.rb
133
- - lib/kumi/parser/error_extractor.rb
134
- - lib/kumi/parser/errors.rb
135
- - lib/kumi/parser/helpers.rb
136
- - lib/kumi/parser/smart_tokenizer.rb
137
- - lib/kumi/parser/syntax_validator.rb
116
+ - lib/kumi/parser/grammar.rb
117
+ - lib/kumi/parser/lexer.rb
118
+ - lib/kumi/parser/parse_error.rb
119
+ - lib/kumi/parser/parser.rb
120
+ - lib/kumi/parser/source.rb
138
121
  - lib/kumi/parser/text_parser.rb
139
- - lib/kumi/parser/text_parser/api.rb
140
122
  - lib/kumi/parser/token.rb
141
- - lib/kumi/parser/token_constants.rb
142
123
  - lib/kumi/parser/version.rb
143
- - lib/kumi/text_parser.rb
144
- - lib/kumi/text_schema.rb
145
124
  homepage: https://github.com/amuta/kumi-parser
146
125
  licenses:
147
126
  - MIT
@@ -158,7 +137,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
158
137
  requirements:
159
138
  - - ">="
160
139
  - !ruby/object:Gem::Version
161
- version: 3.0.0
140
+ version: 3.1.0
162
141
  required_rubygems_version: !ruby/object:Gem::Requirement
163
142
  requirements:
164
143
  - - ">="
@@ -1,41 +0,0 @@
1
- # Debug the text parser transform
2
-
3
- require_relative 'lib/kumi/text_parser'
4
-
5
- schema_text = <<~SCHEMA
6
- schema do
7
- input do
8
- integer :age
9
- end
10
- #{' '}
11
- trait :adult, input.age >= 18
12
- value :bonus, 100
13
- end
14
- SCHEMA
15
-
16
- puts 'Debugging text parser...'
17
-
18
- begin
19
- # Test just the grammar parsing first
20
- grammar = Kumi::TextParser::Grammar.new
21
- parse_tree = grammar.parse(schema_text)
22
-
23
- puts 'Raw parse tree:'
24
- puts parse_tree.inspect
25
- puts
26
-
27
- # Now test the transform
28
- transform = Kumi::TextParser::Transform.new
29
- ast = transform.apply(parse_tree)
30
-
31
- puts 'Transformed AST:'
32
- puts ast.inspect
33
- puts
34
-
35
- puts 'AST structure:'
36
- puts "- Values: #{ast.values.count} - #{ast.values.map(&:name)}"
37
- puts "- Traits: #{ast.traits.count} - #{ast.traits.map(&:name)}"
38
- rescue StandardError => e
39
- puts "Error: #{e.message}"
40
- puts e.backtrace.first(5)
41
- end
@@ -1,26 +0,0 @@
1
- # Debug specific transform rule
2
-
3
- require_relative 'lib/kumi/text_parser'
4
-
5
- # Test just the trait parsing
6
- trait_text = 'trait :adult, input.age >= 18'
7
-
8
- grammar = Kumi::TextParser::Grammar.new
9
- transform = Kumi::TextParser::Transform.new
10
-
11
- begin
12
- # Parse just the trait declaration
13
- parse_result = grammar.trait_declaration.parse(trait_text)
14
- puts 'Trait parse result:'
15
- puts parse_result.inspect
16
- puts
17
-
18
- # Try to transform it
19
- transformed = transform.apply(parse_result)
20
- puts 'Transformed result:'
21
- puts transformed.inspect
22
- puts "Class: #{transformed.class}"
23
- rescue StandardError => e
24
- puts "Error: #{e.message}"
25
- puts e.backtrace.first(5)
26
- end