kumi-parser 0.0.33 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +41 -0
- data/CHANGELOG.md +64 -0
- data/CLAUDE.md +59 -120
- data/README.md +28 -6
- data/examples/parse_and_inspect.rb +34 -0
- data/kumi-parser.gemspec +3 -4
- data/lib/kumi/parser/grammar.rb +120 -0
- data/lib/kumi/parser/lexer.rb +232 -0
- data/lib/kumi/parser/parse_error.rb +52 -0
- data/lib/kumi/parser/parser.rb +692 -0
- data/lib/kumi/parser/source.rb +76 -0
- data/lib/kumi/parser/text_parser.rb +37 -27
- data/lib/kumi/parser/token.rb +10 -71
- data/lib/kumi/parser/version.rb +1 -1
- data/lib/kumi-parser.rb +9 -10
- metadata +16 -37
- data/examples/debug_text_parser.rb +0 -41
- data/examples/debug_transform_rule.rb +0 -26
- data/examples/text_parser_comprehensive_test.rb +0 -333
- data/examples/text_parser_test_with_comments.rb +0 -146
- data/lib/kumi/parser/base.rb +0 -51
- data/lib/kumi/parser/direct_parser.rb +0 -698
- data/lib/kumi/parser/error_extractor.rb +0 -89
- data/lib/kumi/parser/errors.rb +0 -40
- data/lib/kumi/parser/helpers.rb +0 -154
- data/lib/kumi/parser/smart_tokenizer.rb +0 -373
- data/lib/kumi/parser/syntax_validator.rb +0 -21
- data/lib/kumi/parser/text_parser/api.rb +0 -60
- data/lib/kumi/parser/token_constants.rb +0 -468
- data/lib/kumi/text_parser.rb +0 -40
- data/lib/kumi/text_schema.rb +0 -31
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Kumi
|
|
4
|
+
module Parser
|
|
5
|
+
# The text being parsed, plus the bookkeeping needed to turn a byte offset
|
|
6
|
+
# into a 1-based line/column and to render a caret-annotated code frame for
|
|
7
|
+
# error messages. Owning this here keeps location math in one place instead
|
|
8
|
+
# of being recomputed in the lexer and the parser.
|
|
9
|
+
class Source
|
|
10
|
+
attr_reader :text, :file
|
|
11
|
+
|
|
12
|
+
def initialize(text, file: 'schema')
|
|
13
|
+
@text = text
|
|
14
|
+
@file = file
|
|
15
|
+
@line_starts = compute_line_starts(text)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
# 1-based [line, column] for a 0-based byte offset.
|
|
19
|
+
def line_col(offset)
|
|
20
|
+
offset = text.length if offset > text.length
|
|
21
|
+
line = upper_bound(@line_starts, offset) - 1
|
|
22
|
+
col = offset - @line_starts[line] + 1
|
|
23
|
+
[line + 1, col]
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def location(offset)
|
|
27
|
+
line, col = line_col(offset)
|
|
28
|
+
Kumi::Syntax::Location.new(file: file, line: line, column: col)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# A two-line-of-context code frame with a caret under the offending column,
|
|
32
|
+
# in the same shape kumi-core's text frontend already renders.
|
|
33
|
+
def code_frame(offset, context: 2)
|
|
34
|
+
line, col = line_col(offset)
|
|
35
|
+
lines = text.lines
|
|
36
|
+
from = [line - 1 - context, 0].max
|
|
37
|
+
to = [line - 1 + context, lines.length - 1].min
|
|
38
|
+
return '' if lines.empty?
|
|
39
|
+
|
|
40
|
+
out = []
|
|
41
|
+
(from..to).each do |i|
|
|
42
|
+
marker = i + 1 == line ? '➤' : ' '
|
|
43
|
+
out << format('%s %4d | %s', marker, i + 1, lines[i].to_s.chomp)
|
|
44
|
+
out << format(' | %s^', ' ' * (col - 1)) if i + 1 == line
|
|
45
|
+
end
|
|
46
|
+
out.join("\n")
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
private
|
|
50
|
+
|
|
51
|
+
def compute_line_starts(text)
|
|
52
|
+
starts = [0]
|
|
53
|
+
text.each_char.with_index do |ch, i|
|
|
54
|
+
starts << i + 1 if ch == "\n"
|
|
55
|
+
end
|
|
56
|
+
starts
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Index of the last line whose start is <= offset, then +1 so callers can
|
|
60
|
+
# subtract back to a 0-based line — keeps the arithmetic in line_col simple.
|
|
61
|
+
def upper_bound(starts, offset)
|
|
62
|
+
lo = 0
|
|
63
|
+
hi = starts.length
|
|
64
|
+
while lo < hi
|
|
65
|
+
mid = (lo + hi) / 2
|
|
66
|
+
if starts[mid] <= offset
|
|
67
|
+
lo = mid + 1
|
|
68
|
+
else
|
|
69
|
+
hi = mid
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
lo
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
@@ -1,38 +1,48 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require_relative 'smart_tokenizer'
|
|
4
|
-
require_relative 'direct_parser'
|
|
5
|
-
require_relative 'errors'
|
|
6
|
-
|
|
7
3
|
module Kumi
|
|
8
4
|
module Parser
|
|
5
|
+
# The public entry point. kumi-core calls `TextParser.parse(src, source_file:)`
|
|
6
|
+
# and nothing else; `valid?` and `validate` exist for editor/tooling use.
|
|
7
|
+
#
|
|
8
|
+
# Parse errors are raised as Kumi::Errors::SyntaxError carrying both a
|
|
9
|
+
# self-contained, framed message (so standalone callers get a useful string)
|
|
10
|
+
# and a Location object (so kumi-core's frontend can render its own frame
|
|
11
|
+
# without re-parsing the message). The message itself is the bare what/why,
|
|
12
|
+
# without location — the frame and `file:line:col` header are added by the
|
|
13
|
+
# presentation layer from the Location.
|
|
9
14
|
module TextParser
|
|
10
|
-
|
|
15
|
+
module_function
|
|
11
16
|
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
raise Kumi::Errors::SyntaxError, e.message
|
|
20
|
-
end
|
|
17
|
+
def parse(text, source_file: 'schema')
|
|
18
|
+
source = Source.new(text, file: source_file)
|
|
19
|
+
tokens = Lexer.new(source).tokenize
|
|
20
|
+
Parser.new(tokens, source).parse
|
|
21
|
+
rescue ParseError => e
|
|
22
|
+
raise Kumi::Errors::SyntaxError.new(e.short_message, e.location)
|
|
23
|
+
end
|
|
21
24
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
end
|
|
25
|
+
def valid?(text, source_file: 'schema')
|
|
26
|
+
parse(text, source_file: source_file)
|
|
27
|
+
true
|
|
28
|
+
rescue Kumi::Errors::SyntaxError
|
|
29
|
+
false
|
|
30
|
+
end
|
|
29
31
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
32
|
+
# Returns an array of diagnostic hashes (empty when valid) for editors.
|
|
33
|
+
def validate(text, source_file: 'schema')
|
|
34
|
+
source = Source.new(text, file: source_file)
|
|
35
|
+
tokens = Lexer.new(source).tokenize
|
|
36
|
+
Parser.new(tokens, source).parse
|
|
37
|
+
[]
|
|
38
|
+
rescue ParseError => e
|
|
39
|
+
[{
|
|
40
|
+
line: e.line,
|
|
41
|
+
column: e.column,
|
|
42
|
+
message: e.short_message,
|
|
43
|
+
severity: :error,
|
|
44
|
+
type: :syntax
|
|
45
|
+
}]
|
|
36
46
|
end
|
|
37
47
|
end
|
|
38
48
|
end
|
data/lib/kumi/parser/token.rb
CHANGED
|
@@ -2,83 +2,22 @@
|
|
|
2
2
|
|
|
3
3
|
module Kumi
|
|
4
4
|
module Parser
|
|
5
|
-
#
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
end
|
|
15
|
-
|
|
16
|
-
# Semantic predicates embedded in token
|
|
17
|
-
def keyword?
|
|
18
|
-
@metadata[:category] == :keyword
|
|
19
|
-
end
|
|
20
|
-
|
|
21
|
-
def operator?
|
|
22
|
-
@metadata[:category] == :operator
|
|
23
|
-
end
|
|
24
|
-
|
|
25
|
-
def literal?
|
|
26
|
-
@metadata[:category] == :literal
|
|
27
|
-
end
|
|
28
|
-
|
|
29
|
-
def identifier?
|
|
30
|
-
@metadata[:category] == :identifier
|
|
31
|
-
end
|
|
32
|
-
|
|
33
|
-
def punctuation?
|
|
34
|
-
@metadata[:category] == :punctuation
|
|
35
|
-
end
|
|
36
|
-
|
|
37
|
-
# Operator precedence embedded in token
|
|
38
|
-
def precedence
|
|
39
|
-
@metadata[:precedence] || 0
|
|
40
|
-
end
|
|
41
|
-
|
|
42
|
-
def left_associative?
|
|
43
|
-
@metadata[:associativity] == :left
|
|
44
|
-
end
|
|
45
|
-
|
|
46
|
-
def right_associative?
|
|
47
|
-
@metadata[:associativity] == :right
|
|
48
|
-
end
|
|
49
|
-
|
|
50
|
-
# Parser hints embedded in token
|
|
51
|
-
def expects_block?
|
|
52
|
-
@metadata[:expects_block] == true
|
|
53
|
-
end
|
|
54
|
-
|
|
55
|
-
def terminates_expression?
|
|
56
|
-
@metadata[:terminates_expression] == true
|
|
57
|
-
end
|
|
58
|
-
|
|
59
|
-
def starts_expression?
|
|
60
|
-
@metadata[:starts_expression] == true
|
|
61
|
-
end
|
|
62
|
-
|
|
63
|
-
# Direct AST construction hint
|
|
64
|
-
def ast_class
|
|
65
|
-
@metadata[:ast_class]
|
|
66
|
-
end
|
|
67
|
-
|
|
5
|
+
# One lexical token. Unlike the old SmartTokenizer's metadata-bag tokens,
|
|
6
|
+
# this carries only a typed `kind`, its literal `value`, and the byte
|
|
7
|
+
# `offset` where it starts — enough to build a location and error frame on
|
|
8
|
+
# demand via Source.
|
|
9
|
+
#
|
|
10
|
+
# Everything the parser needs to know about a token's role (precedence,
|
|
11
|
+
# associativity, whether it's a type keyword, …) lives in the grammar
|
|
12
|
+
# tables in Grammar, keyed by `kind` — not duplicated onto every token.
|
|
13
|
+
Token = Struct.new(:kind, :value, :offset) do
|
|
68
14
|
def to_s
|
|
69
|
-
"#{
|
|
15
|
+
"#{kind}(#{value.inspect})"
|
|
70
16
|
end
|
|
71
17
|
|
|
72
18
|
def inspect
|
|
73
19
|
to_s
|
|
74
20
|
end
|
|
75
|
-
|
|
76
|
-
def ==(other)
|
|
77
|
-
other.is_a?(Token) &&
|
|
78
|
-
@type == other.type &&
|
|
79
|
-
@value == other.value &&
|
|
80
|
-
@location == other.location
|
|
81
|
-
end
|
|
82
21
|
end
|
|
83
22
|
end
|
|
84
23
|
end
|
data/lib/kumi/parser/version.rb
CHANGED
data/lib/kumi-parser.rb
CHANGED
|
@@ -2,20 +2,19 @@
|
|
|
2
2
|
|
|
3
3
|
require 'kumi'
|
|
4
4
|
require 'kumi/syntax/node'
|
|
5
|
-
require 'zeitwerk'
|
|
6
|
-
require 'parslet'
|
|
7
|
-
|
|
8
|
-
loader = Zeitwerk::Loader.for_gem(warn_on_extra_files: false)
|
|
9
|
-
loader.ignore("#{__dir__}/kumi-parser.rb")
|
|
10
|
-
loader.ignore("#{__dir__}/kumi/parser/version.rb")
|
|
11
|
-
loader.ignore("#{__dir__}/kumi/parser/token_constants.rb")
|
|
12
|
-
loader.setup
|
|
13
5
|
|
|
14
6
|
require_relative 'kumi/parser/version'
|
|
15
|
-
require_relative 'kumi/parser/
|
|
7
|
+
require_relative 'kumi/parser/grammar'
|
|
8
|
+
require_relative 'kumi/parser/source'
|
|
9
|
+
require_relative 'kumi/parser/token'
|
|
10
|
+
require_relative 'kumi/parser/parse_error'
|
|
11
|
+
require_relative 'kumi/parser/lexer'
|
|
12
|
+
require_relative 'kumi/parser/parser'
|
|
13
|
+
require_relative 'kumi/parser/text_parser'
|
|
16
14
|
|
|
17
15
|
module Kumi
|
|
16
|
+
# Text frontend for Kumi: lexes and parses `.kumi` schema source into
|
|
17
|
+
# kumi-core's Kumi::Syntax AST. See Kumi::Parser::TextParser for the API.
|
|
18
18
|
module Parser
|
|
19
|
-
# Parser extension for Kumi DSL
|
|
20
19
|
end
|
|
21
20
|
end
|
metadata
CHANGED
|
@@ -1,43 +1,29 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: kumi-parser
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.0
|
|
4
|
+
version: 0.1.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Kumi Team
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-06-
|
|
11
|
+
date: 2026-06-20 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
|
-
name:
|
|
14
|
+
name: kumi
|
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
|
16
16
|
requirements:
|
|
17
|
-
- - "
|
|
17
|
+
- - ">="
|
|
18
18
|
- !ruby/object:Gem::Version
|
|
19
|
-
version: '
|
|
19
|
+
version: '0'
|
|
20
20
|
type: :runtime
|
|
21
21
|
prerelease: false
|
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
|
23
23
|
requirements:
|
|
24
|
-
- - "
|
|
25
|
-
- !ruby/object:Gem::Version
|
|
26
|
-
version: '2.0'
|
|
27
|
-
- !ruby/object:Gem::Dependency
|
|
28
|
-
name: zeitwerk
|
|
29
|
-
requirement: !ruby/object:Gem::Requirement
|
|
30
|
-
requirements:
|
|
31
|
-
- - "~>"
|
|
32
|
-
- !ruby/object:Gem::Version
|
|
33
|
-
version: '2.6'
|
|
34
|
-
type: :runtime
|
|
35
|
-
prerelease: false
|
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
37
|
-
requirements:
|
|
38
|
-
- - "~>"
|
|
24
|
+
- - ">="
|
|
39
25
|
- !ruby/object:Gem::Version
|
|
40
|
-
version: '
|
|
26
|
+
version: '0'
|
|
41
27
|
- !ruby/object:Gem::Dependency
|
|
42
28
|
name: bundler
|
|
43
29
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -117,31 +103,24 @@ extensions: []
|
|
|
117
103
|
extra_rdoc_files: []
|
|
118
104
|
files:
|
|
119
105
|
- ".rspec"
|
|
106
|
+
- ".rubocop.yml"
|
|
120
107
|
- ".ruby-version"
|
|
108
|
+
- CHANGELOG.md
|
|
121
109
|
- CLAUDE.md
|
|
122
110
|
- LICENSE
|
|
123
111
|
- README.md
|
|
124
112
|
- Rakefile
|
|
125
|
-
- examples/
|
|
126
|
-
- examples/debug_transform_rule.rb
|
|
127
|
-
- examples/text_parser_comprehensive_test.rb
|
|
128
|
-
- examples/text_parser_test_with_comments.rb
|
|
113
|
+
- examples/parse_and_inspect.rb
|
|
129
114
|
- kumi-parser.gemspec
|
|
130
115
|
- lib/kumi-parser.rb
|
|
131
|
-
- lib/kumi/parser/
|
|
132
|
-
- lib/kumi/parser/
|
|
133
|
-
- lib/kumi/parser/
|
|
134
|
-
- lib/kumi/parser/
|
|
135
|
-
- lib/kumi/parser/
|
|
136
|
-
- lib/kumi/parser/smart_tokenizer.rb
|
|
137
|
-
- lib/kumi/parser/syntax_validator.rb
|
|
116
|
+
- lib/kumi/parser/grammar.rb
|
|
117
|
+
- lib/kumi/parser/lexer.rb
|
|
118
|
+
- lib/kumi/parser/parse_error.rb
|
|
119
|
+
- lib/kumi/parser/parser.rb
|
|
120
|
+
- lib/kumi/parser/source.rb
|
|
138
121
|
- lib/kumi/parser/text_parser.rb
|
|
139
|
-
- lib/kumi/parser/text_parser/api.rb
|
|
140
122
|
- lib/kumi/parser/token.rb
|
|
141
|
-
- lib/kumi/parser/token_constants.rb
|
|
142
123
|
- lib/kumi/parser/version.rb
|
|
143
|
-
- lib/kumi/text_parser.rb
|
|
144
|
-
- lib/kumi/text_schema.rb
|
|
145
124
|
homepage: https://github.com/amuta/kumi-parser
|
|
146
125
|
licenses:
|
|
147
126
|
- MIT
|
|
@@ -158,7 +137,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
158
137
|
requirements:
|
|
159
138
|
- - ">="
|
|
160
139
|
- !ruby/object:Gem::Version
|
|
161
|
-
version: 3.
|
|
140
|
+
version: 3.1.0
|
|
162
141
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
163
142
|
requirements:
|
|
164
143
|
- - ">="
|
|
@@ -1,41 +0,0 @@
|
|
|
1
|
-
# Debug the text parser transform
|
|
2
|
-
|
|
3
|
-
require_relative 'lib/kumi/text_parser'
|
|
4
|
-
|
|
5
|
-
schema_text = <<~SCHEMA
|
|
6
|
-
schema do
|
|
7
|
-
input do
|
|
8
|
-
integer :age
|
|
9
|
-
end
|
|
10
|
-
#{' '}
|
|
11
|
-
trait :adult, input.age >= 18
|
|
12
|
-
value :bonus, 100
|
|
13
|
-
end
|
|
14
|
-
SCHEMA
|
|
15
|
-
|
|
16
|
-
puts 'Debugging text parser...'
|
|
17
|
-
|
|
18
|
-
begin
|
|
19
|
-
# Test just the grammar parsing first
|
|
20
|
-
grammar = Kumi::TextParser::Grammar.new
|
|
21
|
-
parse_tree = grammar.parse(schema_text)
|
|
22
|
-
|
|
23
|
-
puts 'Raw parse tree:'
|
|
24
|
-
puts parse_tree.inspect
|
|
25
|
-
puts
|
|
26
|
-
|
|
27
|
-
# Now test the transform
|
|
28
|
-
transform = Kumi::TextParser::Transform.new
|
|
29
|
-
ast = transform.apply(parse_tree)
|
|
30
|
-
|
|
31
|
-
puts 'Transformed AST:'
|
|
32
|
-
puts ast.inspect
|
|
33
|
-
puts
|
|
34
|
-
|
|
35
|
-
puts 'AST structure:'
|
|
36
|
-
puts "- Values: #{ast.values.count} - #{ast.values.map(&:name)}"
|
|
37
|
-
puts "- Traits: #{ast.traits.count} - #{ast.traits.map(&:name)}"
|
|
38
|
-
rescue StandardError => e
|
|
39
|
-
puts "Error: #{e.message}"
|
|
40
|
-
puts e.backtrace.first(5)
|
|
41
|
-
end
|
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
# Debug specific transform rule
|
|
2
|
-
|
|
3
|
-
require_relative 'lib/kumi/text_parser'
|
|
4
|
-
|
|
5
|
-
# Test just the trait parsing
|
|
6
|
-
trait_text = 'trait :adult, input.age >= 18'
|
|
7
|
-
|
|
8
|
-
grammar = Kumi::TextParser::Grammar.new
|
|
9
|
-
transform = Kumi::TextParser::Transform.new
|
|
10
|
-
|
|
11
|
-
begin
|
|
12
|
-
# Parse just the trait declaration
|
|
13
|
-
parse_result = grammar.trait_declaration.parse(trait_text)
|
|
14
|
-
puts 'Trait parse result:'
|
|
15
|
-
puts parse_result.inspect
|
|
16
|
-
puts
|
|
17
|
-
|
|
18
|
-
# Try to transform it
|
|
19
|
-
transformed = transform.apply(parse_result)
|
|
20
|
-
puts 'Transformed result:'
|
|
21
|
-
puts transformed.inspect
|
|
22
|
-
puts "Class: #{transformed.class}"
|
|
23
|
-
rescue StandardError => e
|
|
24
|
-
puts "Error: #{e.message}"
|
|
25
|
-
puts e.backtrace.first(5)
|
|
26
|
-
end
|