kumi-parser 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CLAUDE.md +120 -0
- data/README.md +38 -41
- data/lib/kumi/parser/base.rb +51 -0
- data/lib/kumi/parser/direct_parser.rb +502 -0
- data/lib/kumi/parser/errors.rb +40 -0
- data/lib/kumi/parser/smart_tokenizer.rb +287 -0
- data/lib/kumi/parser/syntax_validator.rb +3 -25
- data/lib/kumi/parser/text_parser.rb +19 -34
- data/lib/kumi/parser/token.rb +84 -0
- data/lib/kumi/parser/token_metadata.rb +370 -0
- data/lib/kumi/parser/version.rb +1 -1
- data/lib/kumi/text_parser.rb +40 -0
- data/lib/kumi/text_schema.rb +31 -0
- data/lib/kumi-parser.rb +1 -0
- metadata +10 -8
- data/lib/kumi/parser/analyzer_diagnostic_converter.rb +0 -84
- data/lib/kumi/parser/text_parser/editor_diagnostic.rb +0 -102
- data/lib/kumi/parser/text_parser/grammar.rb +0 -214
- data/lib/kumi/parser/text_parser/parser.rb +0 -168
- data/lib/kumi/parser/text_parser/transform.rb +0 -170
- data/lib/kumi/parser.rb +0 -8
- data/test_basic.rb +0 -44
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0067c520152fae1c2285b2628f3b8b5d25eff145ae61780cf3d283310816c336
|
4
|
+
data.tar.gz: 4c604f36cd250a8d4672fef7c2acc667c7de498fe0ee088980bcbd8a744b8d5e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c3d3342f43d5bb41b140f40d02d3eaac08abeb88509d4c4f4ce127e38ec3583e7566c68d8a96956d2877a2819b046663e2bf95aa246763bcf7556cf4df380934
|
7
|
+
data.tar.gz: f7891f6e388e224ebfd0da8d027d331c6794325e0d27715f8800f4aa9124dd3421fc36ca3afb1ac10e6f4e8c5db61f610a18e9db0a33d21c9292efbbd5fa45a7
|
data/CLAUDE.md
ADDED
@@ -0,0 +1,120 @@
|
|
1
|
+
# Kumi Parser - Technical Context
|
2
|
+
|
3
|
+
## Current Architecture (January 2025)
|
4
|
+
|
5
|
+
## Key Files
|
6
|
+
|
7
|
+
- `lib/kumi/parser/smart_tokenizer.rb` - Tokenizer with context tracking
|
8
|
+
- `lib/kumi/parser/direct_parser.rb` - Parser implementation (renamed from direct_ast_parser.rb)
|
9
|
+
- `lib/kumi/parser/token_metadata.rb` - Token types and metadata
|
10
|
+
- `lib/kumi/parser/text_parser.rb` - Public API maintaining compatibility
|
11
|
+
- `lib/kumi/parser/base.rb` - Core parsing interface
|
12
|
+
- `lib/kumi/parser/syntax_validator.rb` - Validation with proper diagnostics
|
13
|
+
- `lib/kumi/parser/errors.rb` - Custom error types
|
14
|
+
|
15
|
+
## Important Syntax Rules
|
16
|
+
|
17
|
+
- **Functions**: `fn(:symbol, args...)` only (no dot notation like `fn.max()`)
|
18
|
+
- **Operators**: Standard precedence (*/% > +- > comparisons > & > |)
|
19
|
+
- **Array access**: Uses `array[index]` syntax (converted to `:at` function internally)
|
20
|
+
- **Equality**: `==` and `!=` operators (converted from `:eq`/`:ne` tokens)
|
21
|
+
- **Multi-line expressions**: Parser skips newlines within expressions
|
22
|
+
- **Cascade**: `value :name do ... on condition, result ... base result ... end`
|
23
|
+
- **Constants**: Text parser cannot resolve Ruby constants - use inline values
|
24
|
+
|
25
|
+
## AST Structure & Compatibility
|
26
|
+
|
27
|
+
All nodes from `Kumi::Syntax::*` (defined in main kumi gem):
|
28
|
+
- `Root(inputs, attributes, traits)`
|
29
|
+
- `InputDeclaration(name, domain, type, children)`
|
30
|
+
- `ValueDeclaration(name, expression)`
|
31
|
+
- `TraitDeclaration(name, expression)`
|
32
|
+
- `CallExpression(fn_name, args)`
|
33
|
+
- `InputReference(name)` / `InputElementReference(path)`
|
34
|
+
- `DeclarationReference(name)`
|
35
|
+
- `Literal(value)`
|
36
|
+
- `CascadeExpression(cases)` / `CaseExpression(condition, result)`
|
37
|
+
- `ArrayExpression(elements)`
|
38
|
+
|
39
|
+
**Ruby DSL Compatibility**:
|
40
|
+
- Cascade conditions: Simple trait references wrapped in `all?([trait])` function calls
|
41
|
+
- Array access: `[index]` becomes `CallExpression(:at, [array, index])`
|
42
|
+
- Operators: `:eq` → `:==`, `:ne` → `:!=` for consistency
|
43
|
+
- Constants: Ruby constants resolved to values in DSL, remain as `DeclarationReference` in text parser
|
44
|
+
|
45
|
+
## Debugging & Testing
|
46
|
+
|
47
|
+
**View AST structure**:
|
48
|
+
```ruby
|
49
|
+
ast = Kumi::Parser::TextParser.parse(schema)
|
50
|
+
puts Kumi::Support::SExpressionPrinter.print(ast)
|
51
|
+
# => (Root
|
52
|
+
# inputs: [(InputDeclaration :income :float)]
|
53
|
+
# attributes: [(ValueDeclaration :tax (CallExpression :+ ...))]
|
54
|
+
# traits: [(TraitDeclaration :adult (CallExpression :>= ...))])
|
55
|
+
```
|
56
|
+
|
57
|
+
**Quick validation test**:
|
58
|
+
```ruby
|
59
|
+
ruby -r./lib/kumi/parser/text_parser -e "p Kumi::Parser::TextParser.valid?('schema do input do float :x end end')"
|
60
|
+
```
|
61
|
+
|
62
|
+
**Compare with Ruby DSL**:
|
63
|
+
```ruby
|
64
|
+
# Define schema in Ruby
|
65
|
+
module TestSchema
|
66
|
+
extend Kumi::Schema
|
67
|
+
schema do
|
68
|
+
input do
|
69
|
+
float :income
|
70
|
+
end
|
71
|
+
value :tax, fn(:calc, input.income)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
# Parse equivalent text
|
76
|
+
text_ast = Kumi::Parser::TextParser.parse(<<~KUMI)
|
77
|
+
schema do
|
78
|
+
input do
|
79
|
+
float :income
|
80
|
+
end
|
81
|
+
value :tax, fn(:calc, input.income)
|
82
|
+
end
|
83
|
+
KUMI
|
84
|
+
|
85
|
+
# Compare ASTs
|
86
|
+
ruby_ast = TestSchema.__syntax_tree__
|
87
|
+
text_ast == ruby_ast # Should be true
|
88
|
+
```
|
89
|
+
|
90
|
+
- Tax schema in `spec/kumi/parser/text_parser_example tax_schema_spec.rb` is canonical test
|
91
|
+
- Run all tests: `rspec spec/kumi/parser/`
|
92
|
+
- Integration tests: `rspec spec/kumi/parser/text_parser_integration_spec.rb`
|
93
|
+
|
94
|
+
## Error Handling & Validation
|
95
|
+
|
96
|
+
- **Parse errors**: `Kumi::Parser::Errors::ParseError` (internal) → `Kumi::Errors::SyntaxError` (public API)
|
97
|
+
- **Tokenizer errors**: `Kumi::Parser::Errors::TokenizerError` with location info
|
98
|
+
- **Diagnostics**: Use `SyntaxValidator` for detailed error reporting with line/column info
|
99
|
+
- **Location tracking**: All tokens and AST nodes include `Kumi::Syntax::Location(file, line, column)`
|
100
|
+
|
101
|
+
## Test Status (January 2025)
|
102
|
+
|
103
|
+
✅ **All specs passing**: 32 examples, 0 failures, 1 pending
|
104
|
+
- ✅ Syntax validation with proper diagnostics
|
105
|
+
- ✅ AST compatibility with Ruby DSL (when constants aren't used)
|
106
|
+
- ✅ Integration with analyzer and compiler
|
107
|
+
- ✅ End-to-end execution testing
|
108
|
+
- ✅ Error type compatibility
|
109
|
+
|
110
|
+
## Known Limitations
|
111
|
+
|
112
|
+
- **Ruby constants**: Text parser cannot resolve Ruby constants like `CONST_NAME` - use inline values instead
|
113
|
+
- **Domain specification**: Parsing not fully implemented
|
114
|
+
- **Diagnostic APIs**: Monaco/CodeMirror/JSON format methods not implemented
|
115
|
+
|
116
|
+
## Performance
|
117
|
+
|
118
|
+
- Tokenization: <1ms for typical schemas
|
119
|
+
- Parsing: ~4ms for complete tax schema (21 values, 4 traits)
|
120
|
+
- Direct AST construction eliminates transformation overhead
|
data/README.md
CHANGED
@@ -1,76 +1,73 @@
|
|
1
1
|
# Kumi::Parser
|
2
2
|
|
3
|
-
Text parser for [Kumi](https://github.com/amuta/kumi)
|
3
|
+
Text parser for [Kumi](https://github.com/amuta/kumi) schemas. Direct tokenizer → AST construction with ~4ms parse time.
|
4
4
|
|
5
5
|
## Installation
|
6
6
|
|
7
|
-
Add this line to your application's Gemfile:
|
8
|
-
|
9
7
|
```ruby
|
10
8
|
gem 'kumi-parser'
|
11
9
|
```
|
12
10
|
|
13
|
-
And then execute:
|
14
|
-
|
15
|
-
$ bundle install
|
16
|
-
|
17
|
-
Or install it yourself as:
|
18
|
-
|
19
|
-
$ gem install kumi-parser
|
20
|
-
|
21
11
|
## Usage
|
22
12
|
|
23
|
-
### Basic Parsing
|
24
|
-
|
25
13
|
```ruby
|
26
14
|
require 'kumi/parser'
|
27
15
|
|
28
|
-
|
16
|
+
schema = <<~KUMI
|
29
17
|
schema do
|
30
18
|
input do
|
31
|
-
|
32
|
-
string :status
|
19
|
+
float :income
|
20
|
+
string :status
|
33
21
|
end
|
34
22
|
|
35
23
|
trait :adult, input.age >= 18
|
36
|
-
value :
|
24
|
+
value :tax, fn(:calculate_tax, input.income)
|
37
25
|
end
|
38
|
-
|
26
|
+
KUMI
|
39
27
|
|
40
|
-
# Parse
|
41
|
-
ast = Kumi::Parser::TextParser.parse(
|
28
|
+
# Parse to AST
|
29
|
+
ast = Kumi::Parser::TextParser.parse(schema)
|
42
30
|
|
43
|
-
# Validate
|
44
|
-
|
45
|
-
puts "Valid!" if diagnostics.empty?
|
31
|
+
# Validate
|
32
|
+
Kumi::Parser::TextParser.valid?(schema) # => true
|
46
33
|
```
|
47
34
|
|
48
|
-
|
35
|
+
## API
|
49
36
|
|
50
|
-
|
51
|
-
|
52
|
-
|
37
|
+
- `parse(text)` → AST
|
38
|
+
- `valid?(text)` → Boolean
|
39
|
+
- `validate(text)` → Array of error hashes
|
53
40
|
|
54
|
-
|
55
|
-
codemirror_diagnostics = Kumi::Parser::TextParser.diagnostics_for_codemirror(schema_text)
|
41
|
+
## Syntax
|
56
42
|
|
57
|
-
|
58
|
-
|
43
|
+
```
|
44
|
+
schema do
|
45
|
+
input do
|
46
|
+
<type> :<name>[, domain: <spec>]
|
47
|
+
end
|
48
|
+
|
49
|
+
trait :<name>, <expression>
|
50
|
+
|
51
|
+
value :<name>, <expression>
|
52
|
+
value :<name> do
|
53
|
+
on <condition>, <result>
|
54
|
+
base <result>
|
55
|
+
end
|
56
|
+
end
|
59
57
|
```
|
60
58
|
|
61
|
-
|
59
|
+
**Function calls**: `fn(:name, arg1, arg2, ...)`
|
60
|
+
**Operators**: `+` `-` `*` `/` `%` `>` `<` `>=` `<=` `==` `!=` `&` `|`
|
61
|
+
**References**: `input.field`, `value_name`, `array[index]`
|
62
62
|
|
63
|
-
|
64
|
-
- `validate(text)` - Validate syntax and return diagnostics
|
65
|
-
- `valid?(text)` - Quick validation check (returns boolean)
|
66
|
-
- `diagnostics_for_monaco(text)` - Get Monaco Editor format diagnostics
|
67
|
-
- `diagnostics_for_codemirror(text)` - Get CodeMirror format diagnostics
|
68
|
-
- `diagnostics_as_json(text)` - Get JSON format diagnostics
|
63
|
+
## Architecture
|
69
64
|
|
70
|
-
|
65
|
+
- `smart_tokenizer.rb` - Context-aware tokenization with embedded metadata
|
66
|
+
- `direct_ast_parser.rb` - Recursive descent parser, direct AST construction
|
67
|
+
- `token_metadata.rb` - Token types, precedence, and semantic hints
|
71
68
|
|
72
|
-
|
69
|
+
See `docs/` for technical details.
|
73
70
|
|
74
71
|
## License
|
75
72
|
|
76
|
-
MIT
|
73
|
+
MIT
|
@@ -0,0 +1,51 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'smart_tokenizer'
|
4
|
+
require_relative 'direct_parser'
|
5
|
+
require_relative 'errors'
|
6
|
+
|
7
|
+
module Kumi
|
8
|
+
module Parser
|
9
|
+
# Text parser using tokenizer + direct AST construction
|
10
|
+
class Base
|
11
|
+
def self.parse(source, source_file: '<input>')
|
12
|
+
tokens = SmartTokenizer.new(source, source_file: source_file).tokenize
|
13
|
+
Kumi::Parser::DirectParser.new(tokens).parse
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.valid?(source, source_file: '<input>')
|
17
|
+
parse(source, source_file: source_file)
|
18
|
+
true
|
19
|
+
rescue Errors::TokenizerError, Errors::ParseError
|
20
|
+
false
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.validate(source, source_file: '<input>')
|
24
|
+
parse(source, source_file: source_file)
|
25
|
+
[]
|
26
|
+
rescue Errors::TokenizerError, Errors::ParseError => e
|
27
|
+
[create_diagnostic(e, source_file)]
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
def self.create_diagnostic(error, source_file)
|
33
|
+
location = if error.is_a?(Errors::ParseError) && error.token
|
34
|
+
error.token.location
|
35
|
+
elsif error.respond_to?(:location)
|
36
|
+
error.location
|
37
|
+
else
|
38
|
+
nil
|
39
|
+
end
|
40
|
+
|
41
|
+
{
|
42
|
+
line: location&.line || 1,
|
43
|
+
column: location&.column || 1,
|
44
|
+
message: error.message,
|
45
|
+
severity: :error,
|
46
|
+
type: :syntax
|
47
|
+
}
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|