kumi-parser 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'lib/kumi/parser/version'
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = 'kumi-parser'
7
+ spec.version = Kumi::Parser::VERSION
8
+ spec.authors = ['Kumi Team']
9
+ spec.email = ['dev@kumi.ai']
10
+
11
+ spec.summary = 'Text parser for Kumi'
12
+ spec.description = 'Allows Kumi schemas to be written as plain text with syntax validation and editor integration.'
13
+ spec.homepage = 'https://github.com/amuta/kumi-parser'
14
+ spec.license = 'MIT'
15
+ spec.required_ruby_version = '>= 3.0.0'
16
+
17
+ spec.metadata['allowed_push_host'] = 'https://rubygems.org'
18
+ spec.metadata['homepage_uri'] = spec.homepage
19
+ spec.metadata['source_code_uri'] = 'https://github.com/amuta/kumi-parser'
20
+ spec.metadata['changelog_uri'] = 'https://github.com/amuta/kumi-parser/blob/main/CHANGELOG.md'
21
+
22
+ # Specify which files should be added to the gem when it is released.
23
+ spec.files = Dir.chdir(__dir__) do
24
+ `git ls-files -z`.split("\x0").reject do |f|
25
+ (File.expand_path(f) == __FILE__) ||
26
+ f.start_with?(*%w[bin/ test/ spec/ features/ .git .github appveyor Gemfile]) ||
27
+ f.end_with?('.gem')
28
+ end
29
+ end
30
+ spec.bindir = 'exe'
31
+ spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
32
+ spec.require_paths = ['lib']
33
+
34
+ # Dependencies
35
+ spec.add_dependency 'kumi', '~> 0.0.7'
36
+ spec.add_dependency 'parslet', '~> 2.0'
37
+ spec.add_dependency 'zeitwerk', '~> 2.6'
38
+
39
+ # Development dependencies
40
+ spec.add_development_dependency 'bundler', '~> 2.0'
41
+ spec.add_development_dependency 'rake', '~> 13.0'
42
+ spec.add_development_dependency 'rspec', '~> 3.0'
43
+ spec.add_development_dependency 'rubocop', '~> 1.21'
44
+ spec.add_development_dependency 'simplecov', '~> 0.22'
45
+ end
@@ -0,0 +1,84 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'text_parser/editor_diagnostic'
4
+
5
+ module Kumi
6
+ module Parser
7
+ # Converts analyzer errors to editor diagnostics
8
+ class AnalyzerDiagnosticConverter
9
+ def self.convert_errors(errors)
10
+ diagnostics = TextParser::DiagnosticCollection.new
11
+
12
+ errors.each do |error|
13
+ diagnostic = convert_single_error(error)
14
+ diagnostics << diagnostic if diagnostic
15
+ end
16
+
17
+ diagnostics
18
+ end
19
+
20
+ def self.convert_single_error(error)
21
+ # Handle legacy array format [location, message]
22
+ if error.is_a?(Array) && error.size == 2
23
+ location, message = error
24
+ line = location&.respond_to?(:line) ? location.line : 1
25
+ column = location&.respond_to?(:column) ? location.column : 1
26
+
27
+ return TextParser::EditorDiagnostic.new(
28
+ line: line,
29
+ column: column,
30
+ message: message.to_s,
31
+ severity: :error,
32
+ type: :semantic
33
+ )
34
+ end
35
+
36
+ # Handle regular error objects
37
+ if error&.respond_to?(:message)
38
+ line = error.respond_to?(:location) && error.location&.respond_to?(:line) ? error.location.line : 1
39
+ column = error.respond_to?(:location) && error.location&.respond_to?(:column) ? error.location.column : 1
40
+
41
+ # Extract error type and map to severity
42
+ error_type = error.respond_to?(:type) ? error.type : :semantic
43
+ severity = map_type_to_severity(error_type)
44
+
45
+ return TextParser::EditorDiagnostic.new(
46
+ line: line,
47
+ column: column,
48
+ message: error.message,
49
+ severity: severity,
50
+ type: error_type
51
+ )
52
+ end
53
+
54
+ # Handle unknown formats (strings, etc.)
55
+ return unless error
56
+
57
+ TextParser::EditorDiagnostic.new(
58
+ line: 1,
59
+ column: 1,
60
+ message: "Unknown analyzer error: #{error}",
61
+ severity: :error,
62
+ type: :semantic
63
+ )
64
+ end
65
+
66
+ def self.extract_location(location)
67
+ if location&.respond_to?(:line) && location.respond_to?(:column)
68
+ { line: location.line, column: location.column }
69
+ else
70
+ { line: 1, column: 1 }
71
+ end
72
+ end
73
+
74
+ def self.map_type_to_severity(type)
75
+ case type
76
+ when :warning then :warning
77
+ when :info then :info
78
+ when :hint then :hint
79
+ else :error
80
+ end
81
+ end
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,89 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kumi
4
+ module Parser
5
+ # Extracts errors from parslet parse failures
6
+ class ErrorExtractor
7
+ def self.extract(error)
8
+ # Basic error extraction from parslet parse failures
9
+ # This would typically parse the parslet error message
10
+ # and extract location information
11
+
12
+ return {} unless error.respond_to?(:message)
13
+
14
+ message = error.message
15
+
16
+ # Determine error type based on class
17
+ error_type = case error.class.name
18
+ when /Syntax/ then :syntax
19
+ else :runtime
20
+ end
21
+
22
+ # Simple regex to extract line/column info
23
+ if match = message.match(/at line (\d+) char (\d+)/)
24
+ line = match[1].to_i
25
+ column = match[2].to_i
26
+ else
27
+ line = 1
28
+ column = 1
29
+ end
30
+
31
+ # Format message based on error type
32
+ formatted_message = if error_type == :syntax
33
+ extract_user_friendly_message(message)
34
+ else
35
+ "#{error.class.name}: #{message}"
36
+ end
37
+
38
+ {
39
+ message: formatted_message,
40
+ line: line,
41
+ column: column,
42
+ severity: :error,
43
+ type: error_type
44
+ }
45
+ end
46
+
47
+ def self.humanize_error_message(raw_message)
48
+ extract_user_friendly_message(raw_message)
49
+ end
50
+
51
+ def self.extract_user_friendly_message(raw_message)
52
+ # Clean up the message first - remove markers, location info, and extra whitespace
53
+ cleaned_message = raw_message.gsub(/^\s*`-\s*/, '').gsub(/ at line \d+ char \d+\.?/, '').strip
54
+
55
+ # Convert parslet's technical error messages to user-friendly ones
56
+ case cleaned_message
57
+ when /Expected ":", but got "(\w+)"/
58
+ "Missing ':' before symbol, but got \"#{::Regexp.last_match(1)}\""
59
+ when /Expected ":"/
60
+ "Missing ':' before symbol"
61
+ when /Expected "do", but got "(\w+)"/
62
+ "Missing 'do' keyword, but got \"#{::Regexp.last_match(1)}\""
63
+ when /Expected "do"/
64
+ "Missing 'do' keyword"
65
+ when /Expected "end", but got (.+)/
66
+ "Missing 'end' keyword, but got #{::Regexp.last_match(1)}"
67
+ when /Expected "end"/
68
+ "Missing 'end' keyword"
69
+ when /Expected "(\w+)", but got "(\w+)"/
70
+ "Missing '#{::Regexp.last_match(1)}' keyword, but got \"#{::Regexp.last_match(2)}\""
71
+ when /Expected '(\w+)'/
72
+ "Expected '#{::Regexp.last_match(1)}'"
73
+ when /Expected "([^"]+)", but got "([^"]+)"/
74
+ "Expected '#{::Regexp.last_match(1)}', but got \"#{::Regexp.last_match(2)}\""
75
+ when /Expected "(\w+)"/
76
+ "Missing '#{::Regexp.last_match(1)}' keyword"
77
+ when /Failed to match.*Premature end of input/m
78
+ 'Failed to match - premature end of input'
79
+ when /Premature end of input/
80
+ "Unexpected end of file - missing 'end'?"
81
+ when /Failed to match/
82
+ 'Failed to match sequence'
83
+ else
84
+ 'Parse error'
85
+ end
86
+ end
87
+ end
88
+ end
89
+ end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'text_parser/parser'
4
+ require_relative 'text_parser/editor_diagnostic'
5
+ require_relative 'error_extractor'
6
+
7
+ module Kumi
8
+ module Parser
9
+ # Validates Kumi DSL syntax
10
+ class SyntaxValidator
11
+ def initialize
12
+ @parser = TextParser::Parser.new
13
+ end
14
+
15
+ def validate(text, source_file: '<input>')
16
+ @parser.parse(text, source_file: source_file)
17
+ TextParser::DiagnosticCollection.new([])
18
+ rescue StandardError => e
19
+ # ErrorExtractor.extract returns a hash, convert it to an EditorDiagnostic
20
+ error_hash = ErrorExtractor.extract(e)
21
+ return TextParser::DiagnosticCollection.new([]) if error_hash.empty?
22
+
23
+ diagnostic = TextParser::EditorDiagnostic.new(
24
+ line: error_hash[:line],
25
+ column: error_hash[:column],
26
+ message: error_hash[:message],
27
+ severity: error_hash[:severity],
28
+ type: error_hash[:type]
29
+ )
30
+ TextParser::DiagnosticCollection.new([diagnostic])
31
+ end
32
+
33
+ def valid?(text, source_file: '<input>')
34
+ validate(text, source_file: source_file).empty?
35
+ end
36
+
37
+ def first_error(text, source_file: '<input>')
38
+ diagnostics = validate(text, source_file: source_file)
39
+ diagnostics.empty? ? nil : diagnostics.to_a.first.message
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'ostruct'
4
+
5
+ module Kumi
6
+ module Parser
7
+ module TextParser
8
+ # Public API for TextParser
9
+ class Api
10
+ class << self
11
+ def parse(text, source_file: '<input>')
12
+ parser = Parser.new
13
+ parser.parse(text, source_file: source_file)
14
+ end
15
+
16
+ def validate(text, source_file: '<input>')
17
+ parse(text, source_file: source_file)
18
+ []
19
+ rescue StandardError => e
20
+ [create_diagnostic(e, source_file)]
21
+ end
22
+
23
+ def valid?(text, source_file: '<input>')
24
+ validate(text, source_file: source_file).empty?
25
+ end
26
+
27
+ def diagnostics_for_monaco(text, source_file: '<input>')
28
+ validate(text, source_file: source_file)
29
+ end
30
+
31
+ def diagnostics_for_codemirror(text, source_file: '<input>')
32
+ validate(text, source_file: source_file)
33
+ end
34
+
35
+ def diagnostics_as_json(text, source_file: '<input>')
36
+ validate(text, source_file: source_file).map(&:to_h)
37
+ end
38
+
39
+ def analyze(text, source_file: '<input>')
40
+ ast = parse(text, source_file: source_file)
41
+ { success: true, ast: ast, diagnostics: [] }
42
+ rescue StandardError => e
43
+ { success: false, ast: nil, diagnostics: [create_diagnostic(e, source_file)] }
44
+ end
45
+
46
+ private
47
+
48
+ def create_diagnostic(error, source_file)
49
+ OpenStruct.new(
50
+ line: 1,
51
+ column: 1,
52
+ message: error.message,
53
+ source_file: source_file
54
+ )
55
+ end
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,102 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kumi
4
+ module Parser
5
+ module TextParser
6
+ # Simple diagnostic for online editors
7
+ class EditorDiagnostic
8
+ attr_reader :line, :column, :message, :severity, :type
9
+
10
+ def initialize(line:, column:, message:, severity: :error, type: :syntax)
11
+ @line = line
12
+ @column = column
13
+ @message = message
14
+ @severity = severity
15
+ @type = type
16
+ end
17
+
18
+ def to_monaco
19
+ {
20
+ startLineNumber: line,
21
+ startColumn: column,
22
+ endLineNumber: line,
23
+ endColumn: column + 1,
24
+ message: message,
25
+ severity: monaco_severity
26
+ }
27
+ end
28
+
29
+ def to_codemirror
30
+ {
31
+ from: (line - 1) * 1000 + (column - 1),
32
+ to: (line - 1) * 1000 + column,
33
+ message: message,
34
+ severity: severity.to_s
35
+ }
36
+ end
37
+
38
+ def to_h
39
+ {
40
+ line: line,
41
+ column: column,
42
+ message: message,
43
+ severity: severity.to_s,
44
+ type: type.to_s
45
+ }
46
+ end
47
+
48
+ def to_json(*args)
49
+ require 'json'
50
+ to_h.to_json(*args)
51
+ end
52
+
53
+ private
54
+
55
+ def monaco_severity
56
+ case severity
57
+ when :error then 8 # Monaco.MarkerSeverity.Error
58
+ when :warning then 4 # Monaco.MarkerSeverity.Warning
59
+ when :info then 2 # Monaco.MarkerSeverity.Info
60
+ else 8
61
+ end
62
+ end
63
+ end
64
+
65
+ # Collection of diagnostics
66
+ class DiagnosticCollection
67
+ def initialize(diagnostics = [])
68
+ @diagnostics = diagnostics
69
+ end
70
+
71
+ def <<(diagnostic)
72
+ @diagnostics << diagnostic
73
+ end
74
+
75
+ def empty?
76
+ @diagnostics.empty?
77
+ end
78
+
79
+ def count
80
+ @diagnostics.length
81
+ end
82
+
83
+ def to_monaco
84
+ @diagnostics.map(&:to_monaco)
85
+ end
86
+
87
+ def to_codemirror
88
+ @diagnostics.map(&:to_codemirror)
89
+ end
90
+
91
+ def to_json(*args)
92
+ require 'json'
93
+ @diagnostics.map(&:to_h).to_json(*args)
94
+ end
95
+
96
+ def to_a
97
+ @diagnostics
98
+ end
99
+ end
100
+ end
101
+ end
102
+ end
@@ -0,0 +1,214 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'parslet'
4
+
5
+ module Kumi
6
+ module Parser
7
+ module TextParser
8
+ # Parslet grammar with proper arithmetic operator precedence
9
+ class Grammar < Parslet::Parser
10
+ # Basic tokens
11
+ rule(:space) { match('\s').repeat(1) }
12
+ rule(:space?) { space.maybe }
13
+ rule(:newline?) { match('\n').maybe }
14
+
15
+ # Comments
16
+ rule(:comment) { str('#') >> match('[^\n]').repeat }
17
+ rule(:ws) { (space | comment).repeat }
18
+ rule(:ws?) { ws.maybe }
19
+
20
+ # Identifiers and symbols
21
+ rule(:identifier) { match('[a-zA-Z_]') >> match('[a-zA-Z0-9_]').repeat }
22
+ rule(:symbol) { str(':') >> identifier.as(:symbol) }
23
+
24
+ # Literals
25
+ rule(:integer) { match('[0-9]').repeat(1) }
26
+ rule(:float) { integer >> str('.') >> match('[0-9]').repeat(1) }
27
+ rule(:number) { float.as(:float) | integer.as(:integer) }
28
+ rule(:string_literal) do
29
+ str('"') >> (str('"').absent? >> any).repeat.as(:string) >> str('"')
30
+ end
31
+ rule(:boolean) { (str('true').as(:true) | str('false').as(:false)) }
32
+ rule(:literal) { number | string_literal | boolean }
33
+
34
+ # Keywords
35
+ rule(:schema_kw) { str('schema') }
36
+ rule(:input_kw) { str('input') }
37
+ rule(:value_kw) { str('value') }
38
+ rule(:trait_kw) { str('trait') }
39
+ rule(:do_kw) { str('do') }
40
+ rule(:end_kw) { str('end') }
41
+
42
+ # Type keywords
43
+ rule(:type_name) do
44
+ str('integer') | str('float') | str('string') | str('boolean') | str('any')
45
+ end
46
+
47
+ # Operators (ordered by precedence, highest to lowest)
48
+ rule(:mult_op) { str('*').as(:multiply) | str('/').as(:divide) | str('%').as(:modulo) }
49
+ rule(:add_op) { str('+').as(:add) | str('-').as(:subtract) }
50
+ rule(:comp_op) do
51
+ str('>=').as(:>=) | str('<=').as(:<=) | str('==').as(:==) |
52
+ str('!=').as(:!=) | str('>').as(:>) | str('<').as(:<)
53
+ end
54
+ rule(:logical_and_op) { str('&').as(:and) }
55
+ rule(:logical_or_op) { str('|').as(:or) }
56
+
57
+ # Expressions with proper precedence (using left recursion elimination)
58
+ rule(:primary_expr) do
59
+ str('(') >> ws? >> expression >> ws? >> str(')') |
60
+ function_call |
61
+ input_reference |
62
+ declaration_reference |
63
+ literal
64
+ end
65
+
66
+ # Function calls: fn(:name, arg1, arg2, ...)
67
+ rule(:function_call) do
68
+ str('fn(') >> ws? >>
69
+ symbol.as(:fn_name) >>
70
+ (str(',') >> ws? >> expression).repeat(0).as(:args) >>
71
+ ws? >> str(')')
72
+ end
73
+
74
+ # Multiplication/Division (left-associative)
75
+ rule(:mult_expr) do
76
+ primary_expr.as(:left) >>
77
+ (space? >> mult_op.as(:op) >> space? >> primary_expr.as(:right)).repeat.as(:ops)
78
+ end
79
+
80
+ # Addition/Subtraction (left-associative)
81
+ rule(:add_expr) do
82
+ mult_expr.as(:left) >>
83
+ (space? >> add_op.as(:op) >> space? >> mult_expr.as(:right)).repeat.as(:ops)
84
+ end
85
+
86
+ # Comparison operators
87
+ rule(:comp_expr) do
88
+ add_expr.as(:left) >>
89
+ (space? >> comp_op.as(:op) >> space? >> add_expr.as(:right)).maybe.as(:comp)
90
+ end
91
+
92
+ # Logical AND (higher precedence than OR)
93
+ rule(:logical_and_expr) do
94
+ comp_expr.as(:left) >>
95
+ (space? >> logical_and_op.as(:op) >> space? >> comp_expr.as(:right)).repeat.as(:ops)
96
+ end
97
+
98
+ # Logical OR (lowest precedence)
99
+ rule(:logical_or_expr) do
100
+ logical_and_expr.as(:left) >>
101
+ (space? >> logical_or_op.as(:op) >> space? >> logical_and_expr.as(:right)).repeat.as(:ops)
102
+ end
103
+
104
+ rule(:expression) { logical_or_expr }
105
+
106
+ # Input references: input.field or input.field.subfield
107
+ rule(:input_reference) do
108
+ str('input.') >> input_path.as(:input_ref)
109
+ end
110
+
111
+ rule(:input_path) do
112
+ identifier >> (str('.') >> identifier).repeat
113
+ end
114
+
115
+ # Declaration references: just identifier
116
+ rule(:declaration_reference) do
117
+ identifier.as(:decl_ref)
118
+ end
119
+
120
+ # Input declarations
121
+ rule(:input_declaration) do
122
+ nested_array_declaration | simple_input_declaration
123
+ end
124
+
125
+ rule(:simple_input_declaration) do
126
+ ws? >> type_name.as(:type) >> space >> symbol.as(:name) >>
127
+ (str(',') >> ws? >> domain_spec).maybe.as(:domain) >> ws? >> newline?
128
+ end
129
+
130
+ rule(:nested_array_declaration) do
131
+ ws? >> str('array') >> space >> symbol.as(:name) >> space >> do_kw >> ws? >> newline? >>
132
+ (ws? >> input_declaration >> ws?).repeat.as(:nested_fields) >>
133
+ ws? >> end_kw >> ws? >> newline?
134
+ end
135
+
136
+ rule(:domain_spec) do
137
+ str('domain:') >> ws? >> domain_value.as(:domain_value)
138
+ end
139
+
140
+ rule(:domain_value) do
141
+ # Ranges: 1..10, 1...10, 0.0..100.0
142
+ range_value |
143
+ # Word arrays: %w[active inactive]
144
+ word_array_value |
145
+ # String arrays: ["active", "inactive"]
146
+ string_array_value
147
+ end
148
+
149
+ rule(:range_value) do
150
+ (float | integer) >> str('..') >> (float | integer)
151
+ end
152
+
153
+ rule(:word_array_value) do
154
+ str('%w[') >> (identifier >> space?).repeat.as(:words) >> str(']')
155
+ end
156
+
157
+ rule(:string_array_value) do
158
+ str('[') >> space? >>
159
+ (string_literal >> (str(',') >> space? >> string_literal).repeat).maybe >>
160
+ space? >> str(']')
161
+ end
162
+
163
+ # Value declarations
164
+ rule(:value_declaration) do
165
+ cascade_value_declaration | simple_value_declaration
166
+ end
167
+
168
+ rule(:simple_value_declaration) do
169
+ ws? >> value_kw.as(:type) >> space >> symbol.as(:name) >> str(',') >> ws? >>
170
+ expression.as(:expr) >> ws? >> newline?
171
+ end
172
+
173
+ rule(:cascade_value_declaration) do
174
+ ws? >> value_kw.as(:type) >> space >> symbol.as(:name) >> space >> do_kw >> ws? >> newline? >>
175
+ (ws? >> cascade_case >> ws?).repeat.as(:cases) >>
176
+ ws? >> end_kw >> ws? >> newline?
177
+ end
178
+
179
+ rule(:cascade_case) do
180
+ (ws? >> str('on') >> space >> identifier.as(:condition) >> str(',') >> ws? >>
181
+ expression.as(:result) >> ws? >> newline?) |
182
+ (ws? >> str('base') >> space >> expression.as(:base_result) >> ws? >> newline?)
183
+ end
184
+
185
+ # Trait declarations
186
+ rule(:trait_declaration) do
187
+ ws? >> trait_kw.as(:type) >> space >> symbol.as(:name) >> str(',') >> ws? >>
188
+ expression.as(:expr) >> ws? >> newline?
189
+ end
190
+
191
+ # Input block
192
+ rule(:input_block) do
193
+ ws? >> input_kw >> space >> do_kw >> ws? >> newline? >>
194
+ (ws? >> input_declaration >> ws?).repeat.as(:declarations) >>
195
+ ws? >> end_kw >> ws? >> newline?
196
+ end
197
+
198
+ # Schema structure
199
+ rule(:schema_body) do
200
+ input_block.as(:input) >>
201
+ (ws? >> (value_declaration | trait_declaration) >> ws?).repeat.as(:declarations)
202
+ end
203
+
204
+ rule(:schema) do
205
+ ws? >> schema_kw >> space >> do_kw >> ws? >> newline? >>
206
+ schema_body >>
207
+ ws? >> end_kw >> ws?
208
+ end
209
+
210
+ root(:schema)
211
+ end
212
+ end
213
+ end
214
+ end