lexer_kit 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +21 -0
  3. data/README.md +157 -0
  4. data/exe/lexer_kit +7 -0
  5. data/ext/lexer_kit_rust/Cargo.toml +17 -0
  6. data/ext/lexer_kit_rust/extconf.rb +6 -0
  7. data/ext/lexer_kit_rust/src/deserializer.rs +213 -0
  8. data/ext/lexer_kit_rust/src/dfa.rs +217 -0
  9. data/ext/lexer_kit_rust/src/fast_stream.rs +468 -0
  10. data/ext/lexer_kit_rust/src/lib.rs +248 -0
  11. data/ext/lexer_kit_rust/src/opcodes.rs +718 -0
  12. data/ext/lexer_kit_rust/src/safety_test.rs +498 -0
  13. data/ext/lexer_kit_rust/src/trie.rs +206 -0
  14. data/ext/lexer_kit_rust/src/types.rs +319 -0
  15. data/ext/lexer_kit_rust/src/vm.rs +258 -0
  16. data/lib/lexer_kit/builder/compiler.rb +596 -0
  17. data/lib/lexer_kit/builder/conflict_detector.rb +209 -0
  18. data/lib/lexer_kit/builder/mode_def.rb +36 -0
  19. data/lib/lexer_kit/builder/token_def.rb +65 -0
  20. data/lib/lexer_kit/builder/validator.rb +84 -0
  21. data/lib/lexer_kit/builder.rb +230 -0
  22. data/lib/lexer_kit/cli/commands.rb +389 -0
  23. data/lib/lexer_kit/cli.rb +88 -0
  24. data/lib/lexer_kit/core/diagnostic.rb +103 -0
  25. data/lib/lexer_kit/core/source.rb +154 -0
  26. data/lib/lexer_kit/core/span.rb +80 -0
  27. data/lib/lexer_kit/core/token.rb +120 -0
  28. data/lib/lexer_kit/core.rb +13 -0
  29. data/lib/lexer_kit/debug/disassembler.rb +143 -0
  30. data/lib/lexer_kit/debug/visualizer.rb +203 -0
  31. data/lib/lexer_kit/debug.rb +11 -0
  32. data/lib/lexer_kit/dfa/byte_class_builder.rb +69 -0
  33. data/lib/lexer_kit/dfa/case_folding.rb +45 -0
  34. data/lib/lexer_kit/dfa/char_class_collector.rb +81 -0
  35. data/lib/lexer_kit/dfa/dfa_builder.rb +95 -0
  36. data/lib/lexer_kit/dfa/dfa_minimizer.rb +158 -0
  37. data/lib/lexer_kit/dfa/nfa.rb +304 -0
  38. data/lib/lexer_kit/dfa/regex_ast.rb +64 -0
  39. data/lib/lexer_kit/dfa/regex_parser.rb +385 -0
  40. data/lib/lexer_kit/dfa/utf8_range.rb +175 -0
  41. data/lib/lexer_kit/dfa/utf8_range_pattern.rb +17 -0
  42. data/lib/lexer_kit/dfa.rb +37 -0
  43. data/lib/lexer_kit/errors.rb +76 -0
  44. data/lib/lexer_kit/format/lkb1/decoder.rb +126 -0
  45. data/lib/lexer_kit/format/lkb1.rb +199 -0
  46. data/lib/lexer_kit/format/lkt1.rb +111 -0
  47. data/lib/lexer_kit/format.rb +19 -0
  48. data/lib/lexer_kit/ir/compiled_program.rb +228 -0
  49. data/lib/lexer_kit/ir/constant_pool.rb +107 -0
  50. data/lib/lexer_kit/ir/dfa_table.rb +125 -0
  51. data/lib/lexer_kit/ir/instruction.rb +50 -0
  52. data/lib/lexer_kit/ir/jump_table.rb +94 -0
  53. data/lib/lexer_kit/ir/keyword_table.rb +168 -0
  54. data/lib/lexer_kit/ir/opcode.rb +96 -0
  55. data/lib/lexer_kit/ir/serializer.rb +249 -0
  56. data/lib/lexer_kit/ir.rb +16 -0
  57. data/lib/lexer_kit/runner.rb +114 -0
  58. data/lib/lexer_kit/trie.rb +170 -0
  59. data/lib/lexer_kit/version.rb +5 -0
  60. data/lib/lexer_kit.rb +155 -0
  61. metadata +119 -0
@@ -0,0 +1,209 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LexerKit
4
+ class Builder
5
+ # ConflictDetector analyzes token patterns to detect potential conflicts.
6
+ # A conflict occurs when multiple patterns can match the same input.
7
+ class ConflictDetector
8
+ Conflict = Struct.new(:type, :token1, :token2, :description, keyword_init: true)
9
+
10
+ def initialize(builder)
11
+ @builder = builder
12
+ @conflicts = []
13
+ end
14
+
15
+ # Detect all conflicts in the lexer definition
16
+ # @return [Array<Conflict>]
17
+ def detect
18
+ @conflicts = []
19
+
20
+ @builder.mode_defs.each do |_mode_name, mode_def|
21
+ detect_mode_conflicts(mode_def)
22
+ end
23
+
24
+ @conflicts
25
+ end
26
+
27
+ private
28
+
29
+ def detect_mode_conflicts(mode_def)
30
+ tokens = mode_def.tokens
31
+
32
+ # Check each pair of tokens
33
+ tokens.each_with_index do |token1, i|
34
+ tokens[(i + 1)..].each do |token2|
35
+ check_pair(token1, token2)
36
+ end
37
+ end
38
+ end
39
+
40
+ def check_pair(token1, token2)
41
+ # Skip if both have the same name (same token defined multiple times is ok)
42
+ return if token1.name == token2.name
43
+
44
+ pattern1 = token1.pattern
45
+ pattern2 = token2.pattern
46
+
47
+ if pattern1.is_a?(String) && pattern2.is_a?(String)
48
+ check_literal_pair(token1, token2)
49
+ elsif pattern1.is_a?(String) && pattern2.is_a?(Regexp)
50
+ check_literal_regex_pair(token1, token2)
51
+ elsif pattern1.is_a?(Regexp) && pattern2.is_a?(String)
52
+ check_literal_regex_pair(token2, token1)
53
+ else
54
+ check_regex_pair(token1, token2)
55
+ end
56
+ end
57
+
58
+ def check_literal_pair(token1, token2)
59
+ lit1 = token1.pattern
60
+ lit2 = token2.pattern
61
+
62
+ if lit1 == lit2
63
+ @conflicts << Conflict.new(
64
+ type: :identical,
65
+ token1: token1.name,
66
+ token2: token2.name,
67
+ description: "identical patterns '#{lit1}' (#{token1.name} wins)"
68
+ )
69
+ elsif lit1.start_with?(lit2)
70
+ # lit2 is a prefix of lit1 - this is usually OK due to longest-match
71
+ # But we should warn if they have the same first byte
72
+ @conflicts << Conflict.new(
73
+ type: :prefix,
74
+ token1: token2.name,
75
+ token2: token1.name,
76
+ description: "'#{lit2}' is a prefix of '#{lit1}' (longest match: #{token1.name} preferred for '#{lit1}')"
77
+ )
78
+ elsif lit2.start_with?(lit1)
79
+ @conflicts << Conflict.new(
80
+ type: :prefix,
81
+ token1: token1.name,
82
+ token2: token2.name,
83
+ description: "'#{lit1}' is a prefix of '#{lit2}' (longest match: #{token2.name} preferred for '#{lit2}')"
84
+ )
85
+ end
86
+ end
87
+
88
+ def check_literal_regex_pair(literal_token, regex_token)
89
+ literal = literal_token.pattern
90
+ regex = regex_token.pattern
91
+
92
+ # Check if the literal matches the regex
93
+ return unless regex.match?(literal)
94
+
95
+ @conflicts << Conflict.new(
96
+ type: :literal_matches_regex,
97
+ token1: literal_token.name,
98
+ token2: regex_token.name,
99
+ description: "'#{literal}' matches regex /#{regex.source}/ " \
100
+ "(definition order determines winner: #{literal_token.name} if defined first)"
101
+ )
102
+ end
103
+
104
+ def check_regex_pair(token1, token2)
105
+ # For regex pairs, we need to check if they can match the same string
106
+ # We do this by building a combined DFA and checking for states
107
+ # that accept both patterns
108
+
109
+ pattern1 = token1.pattern
110
+ pattern2 = token2.pattern
111
+
112
+ # Check first byte overlap
113
+ regex1 = DFA::RegexAST::Regex.parse(pattern1)
114
+ regex2 = DFA::RegexAST::Regex.parse(pattern2)
115
+ first_bytes1 = DFA.first_byte_set(regex1)
116
+ first_bytes2 = DFA.first_byte_set(regex2)
117
+ common_bytes = first_bytes1 & first_bytes2
118
+
119
+ return if common_bytes.empty?
120
+
121
+ # Build combined DFA and check for multi-accept states
122
+ begin
123
+ overlapping_examples = find_overlapping_strings(pattern1, pattern2)
124
+
125
+ if overlapping_examples.any?
126
+ examples_str = overlapping_examples.first(3).map(&:inspect).join(", ")
127
+ @conflicts << Conflict.new(
128
+ type: :regex_overlap,
129
+ token1: token1.name,
130
+ token2: token2.name,
131
+ description: "both patterns can match: #{examples_str} " \
132
+ "(definition order determines winner)"
133
+ )
134
+ end
135
+ rescue StandardError
136
+ # If DFA construction fails, just report potential overlap based on first bytes
137
+ if common_bytes.size <= 10
138
+ bytes_str = common_bytes.to_a.map { |b| b.chr.inspect }.join(", ")
139
+ @conflicts << Conflict.new(
140
+ type: :first_byte_overlap,
141
+ token1: token1.name,
142
+ token2: token2.name,
143
+ description: "patterns share first bytes: #{bytes_str} (potential conflict)"
144
+ )
145
+ else
146
+ @conflicts << Conflict.new(
147
+ type: :first_byte_overlap,
148
+ token1: token1.name,
149
+ token2: token2.name,
150
+ description: "patterns share #{common_bytes.size} first bytes (potential conflict)"
151
+ )
152
+ end
153
+ end
154
+ end
155
+
156
+ # Find example strings that both patterns can match
157
+ def find_overlapping_strings(pattern1, pattern2)
158
+ examples = []
159
+
160
+ # Build DFA for each pattern
161
+ regex1 = DFA::RegexAST::Regex.parse(pattern1)
162
+ regex2 = DFA::RegexAST::Regex.parse(pattern2)
163
+ dfa1 = DFA.compile_regex(regex1, 1)
164
+ dfa2 = DFA.compile_regex(regex2, 2)
165
+
166
+ # BFS to find accepting strings
167
+ # State: [dfa1_state, dfa2_state, accumulated_string]
168
+ queue = [[1, 1, ""]]
169
+ visited = Set.new
170
+ max_depth = 10
171
+
172
+ while queue.any? && examples.size < 5
173
+ dfa1_state, dfa2_state, str = queue.shift
174
+
175
+ next if str.length > max_depth
176
+ next if visited.include?([dfa1_state, dfa2_state])
177
+
178
+ visited << [dfa1_state, dfa2_state]
179
+
180
+ # Check if both DFAs accept at this point
181
+ if dfa1.accept(dfa1_state) && dfa2.accept(dfa2_state) && !str.empty?
182
+ examples << str
183
+ next
184
+ end
185
+
186
+ # Explore transitions
187
+ (0..127).each do |byte|
188
+ next_state1 = dfa1.transition(dfa1_state, byte)
189
+ next_state2 = dfa2.transition(dfa2_state, byte)
190
+
191
+ # Both must have valid transitions
192
+ next if next_state1.zero? || next_state2.zero?
193
+
194
+ char = begin
195
+ byte.chr
196
+ rescue StandardError
197
+ next
198
+ end
199
+ next unless char.valid_encoding?
200
+
201
+ queue << [next_state1, next_state2, str + char]
202
+ end
203
+ end
204
+
205
+ examples
206
+ end
207
+ end
208
+ end
209
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LexerKit
4
+ class Builder
5
+ # ModeDef represents a lexer mode definition.
6
+ class ModeDef
7
+ attr_reader :name, :tokens, :location
8
+ attr_accessor :delimited
9
+
10
+ def initialize(name, location: nil)
11
+ @name = name
12
+ @tokens = []
13
+ @delimited = nil
14
+ @location = location
15
+ end
16
+
17
+ def add_token(token_def)
18
+ @tokens << token_def
19
+ end
20
+
21
+ # Get all literal tokens (for SWITCH_BYTE optimization)
22
+ def literal_tokens
23
+ @tokens.select(&:literal?)
24
+ end
25
+
26
+ # Get all regex tokens
27
+ def regex_tokens
28
+ @tokens.select(&:regex?)
29
+ end
30
+
31
+ def inspect
32
+ "#<ModeDef :#{@name} tokens=#{@tokens.size}>"
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,65 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LexerKit
4
+ class Builder
5
+ # TokenDef represents a token definition from the DSL.
6
+ class TokenDef
7
+ attr_reader :name, :pattern, :skip, :push, :pop, :location, :meta, :delimited, :delimiter, :escape
8
+ attr_accessor :inner_mode, :token_id
9
+
10
+ def initialize(
11
+ name:,
12
+ pattern:,
13
+ skip: false,
14
+ push: nil,
15
+ pop: false,
16
+ delimited: false,
17
+ delimiter: nil,
18
+ escape: nil,
19
+ meta: nil,
20
+ location: nil
21
+ )
22
+ @name = name
23
+ @pattern = pattern
24
+ @skip = skip
25
+ @push = push
26
+ @pop = pop
27
+ @delimited = delimited
28
+ @delimiter = delimiter
29
+ @escape = escape
30
+ @meta = meta
31
+ @inner_mode = nil
32
+ @token_id = nil
33
+ @location = location
34
+ end
35
+
36
+ # Check if pattern is a literal string
37
+ def literal?
38
+ @pattern.is_a?(String)
39
+ end
40
+
41
+ # Check if pattern is a regex
42
+ def regex?
43
+ @pattern.is_a?(Regexp) || @pattern.is_a?(LexerKit::RegexAstProvider)
44
+ end
45
+
46
+ # Check if this is a delimited token (like TEXT in templates)
47
+ def delimited?
48
+ @delimited
49
+ end
50
+
51
+ def inspect
52
+ parts = ["#<TokenDef :#{@name}"]
53
+ parts << @pattern.inspect if @pattern
54
+ parts << "skip" if @skip
55
+ parts << "push=#{@push}" if @push
56
+ parts << "pop" if @pop
57
+ parts << "delimited=#{@delimiter.inspect}" if @delimited
58
+ parts << "escape=#{@escape.inspect}" if @escape
59
+ parts << "meta=#{@meta.inspect}" if @meta
60
+ parts << ">"
61
+ parts.join(" ")
62
+ end
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,84 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LexerKit
4
+ class Builder
5
+ # Validator checks builder definitions for errors before compilation.
6
+ class Validator
7
+ def initialize(builder)
8
+ @builder = builder
9
+ end
10
+
11
+ # Validate all definitions
12
+ # @raise [LexerKit::BuildError] if validation fails
13
+ def validate!
14
+ validate_mode_references!
15
+ validate_reserved_token_names!
16
+ end
17
+
18
+ private
19
+
20
+ def validate_reserved_token_names!
21
+ @builder.token_defs.each do |token_def|
22
+ next unless token_def.name == :INVALID
23
+
24
+ message = ":INVALID is reserved for error tokens"
25
+ raise LexerKit::BuildError.from_location(token_def.location, message)
26
+ end
27
+ end
28
+
29
+ def validate_mode_references!
30
+ defined_modes = @builder.mode_defs.keys.to_set
31
+
32
+ @builder.token_defs.each do |token_def|
33
+ next unless token_def.push
34
+ next if defined_modes.include?(token_def.push)
35
+
36
+ raise_undefined_mode_error(token_def, defined_modes)
37
+ end
38
+ end
39
+
40
+ def raise_undefined_mode_error(token_def, defined_modes)
41
+ mode_name = token_def.push
42
+ suggestion = find_similar_mode(mode_name, defined_modes)
43
+
44
+ message = "undefined mode :#{mode_name}"
45
+ notes = suggestion ? ["did you mean :#{suggestion}?"] : nil
46
+
47
+ raise LexerKit::BuildError.from_location(token_def.location, message, notes: notes)
48
+ end
49
+
50
+ def find_similar_mode(target, candidates)
51
+ target_s = target.to_s
52
+ threshold = [target_s.length / 2, 2].max
53
+
54
+ candidates
55
+ .map { |c| [c, levenshtein(target_s, c.to_s)] }
56
+ .select { |_, dist| dist <= threshold }
57
+ .min_by { |_, dist| dist }
58
+ &.first
59
+ end
60
+
61
+ def levenshtein(str1, str2)
62
+ return str2.length if str1.empty?
63
+ return str1.length if str2.empty?
64
+
65
+ matrix = Array.new(str1.length + 1) { Array.new(str2.length + 1, 0) }
66
+ (0..str1.length).each { |i| matrix[i][0] = i }
67
+ (0..str2.length).each { |j| matrix[0][j] = j }
68
+
69
+ (1..str1.length).each do |i|
70
+ (1..str2.length).each do |j|
71
+ cost = str1[i - 1] == str2[j - 1] ? 0 : 1
72
+ matrix[i][j] = [
73
+ matrix[i - 1][j] + 1,
74
+ matrix[i][j - 1] + 1,
75
+ matrix[i - 1][j - 1] + cost
76
+ ].min
77
+ end
78
+ end
79
+
80
+ matrix[str1.length][str2.length]
81
+ end
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,230 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "builder/token_def"
4
+ require_relative "builder/mode_def"
5
+ require_relative "builder/compiler"
6
+ require_relative "builder/conflict_detector"
7
+ require_relative "builder/validator"
8
+
9
+ module LexerKit
10
+ # Builder provides the DSL for defining lexers.
11
+ class Builder
12
+ # DSL methods for defining lexers.
13
+ # These methods are available within the `LexerKit.build` block.
14
+ module DSL
15
+ # Define a token
16
+ # @param name [Symbol] token name
17
+ # @param pattern [String, Regexp] pattern to match
18
+ # @param skip [Boolean] skip this token (don't emit)
19
+ # @param push [Symbol] push mode after match
20
+ # @param pop [Boolean] pop mode after match
21
+ # @param meta [Hash] optional metadata for this token
22
+ def token(name, pattern, skip: false, push: nil, pop: false, meta: nil)
23
+ location = caller_locations(1, 1).first
24
+ token_def = TokenDef.new(
25
+ name: name,
26
+ pattern: pattern,
27
+ skip: skip,
28
+ push: push,
29
+ pop: pop,
30
+ meta: meta,
31
+ location: location
32
+ )
33
+ current_mode_def.add_token(token_def)
34
+ @token_defs << token_def
35
+ end
36
+
37
+ # Define a keyword (matched after identifier, checked by lookup)
38
+ # @param name [Symbol] keyword token name
39
+ # @param value [String] keyword string
40
+ def keyword(name, value)
41
+ @keywords[value.freeze] = name
42
+ end
43
+
44
+ # Define multiple keywords at once
45
+ # @param names [Array<Symbol>] keyword values (upcased for token names)
46
+ # @example
47
+ # define_keywords :if, :else, :while
48
+ # # equivalent to:
49
+ # # keyword :IF, "if"
50
+ # # keyword :ELSE, "else"
51
+ # # keyword :WHILE, "while"
52
+ def define_keywords(*names)
53
+ names.each do |name|
54
+ token_name = name.to_s.upcase.to_sym
55
+ value = name.to_s.downcase
56
+ keyword(token_name, value)
57
+ end
58
+ end
59
+
60
+ # Define a mode
61
+ # @param name [Symbol] mode name
62
+ # @yield mode definition block
63
+ def mode(name, &block)
64
+ location = caller_locations(1, 1).first
65
+ mode_def = ModeDef.new(name, location: location)
66
+ @mode_defs[name] = mode_def
67
+
68
+ old_mode = @current_mode
69
+ @current_mode = name
70
+ instance_eval(&block) if block
71
+ @current_mode = old_mode
72
+ end
73
+
74
+ # Define a delimited section (for templates)
75
+ # Scans text until the delimiter is found, then switches to inner mode.
76
+ # The closing delimiter should be handled by a token in the inner mode with `pop: true`.
77
+ #
78
+ # @param name [Symbol] text token name
79
+ # @param delimiter [String] delimiter that marks the end of text
80
+ # @param escape [String, nil] escape sequence that prevents delimiter match
81
+ #
82
+ # @example
83
+ # delimited :TEXT, delimiter: "{["
84
+ # mode :tag do
85
+ # token :CLOSE, "]}", pop: true # closing delimiter handled here
86
+ # end
87
+ #
88
+ # @example with escape
89
+ # delimited :TEXT, delimiter: "{[", escape: "{[{]}" do
90
+ # # {[{]} is treated as escaped delimiter and does not close TEXT
91
+ # end
92
+ def delimited(name, delimiter: nil, escape: nil, pop: false, skip: false, &block)
93
+ unless delimiter
94
+ location = caller_locations(1, 1).first
95
+ message = "delimited requires `delimiter:` parameter"
96
+ raise LexerKit::BuildError.from_location(location, message)
97
+ end
98
+ location = caller_locations(1, 1).first
99
+
100
+ token_def = add_delimited_token(
101
+ name: name,
102
+ delimiter: delimiter,
103
+ escape: escape,
104
+ pop: pop,
105
+ skip: skip,
106
+ location: location
107
+ )
108
+
109
+ # If block given, define inner mode
110
+ return unless block
111
+
112
+ inner_mode = :"#{name}_inner"
113
+ mode(inner_mode, &block)
114
+ token_def.inner_mode = inner_mode
115
+ end
116
+
117
+ # Define a scan-until section with open/close delimiters.
118
+ #
119
+ # @param name [Symbol] token name for the scanned content
120
+ # @param open [String] opening delimiter
121
+ # @param close [String] closing delimiter
122
+ # @param escape [String, nil] escape sequence that prevents close match
123
+ # @param skip [Boolean] skip emitting the content token
124
+ def scan_until(name, open: nil, close: nil, escape: nil, skip: false)
125
+ location = caller_locations(1, 1).first
126
+ unless open
127
+ message = "scan_until requires `open:` parameter"
128
+ raise LexerKit::BuildError.from_location(location, message)
129
+ end
130
+ unless close
131
+ message = "scan_until requires `close:` parameter"
132
+ raise LexerKit::BuildError.from_location(location, message)
133
+ end
134
+
135
+ mode_name = next_internal_mode_name(name)
136
+ token :"#{name}_OPEN_#{@internal_mode_counter}", open, skip: true, push: mode_name
137
+
138
+ mode(mode_name) do
139
+ delimited(name, delimiter: close, escape: escape, pop: true, skip: skip)
140
+ end
141
+ end
142
+
143
+ # Get or set the version
144
+ # @param v [Integer, nil] version to set, or nil to get current
145
+ # @return [Integer] current version
146
+ def version(v = nil)
147
+ if v.nil?
148
+ @version
149
+ else
150
+ @version = v
151
+ end
152
+ end
153
+ end
154
+
155
+ include DSL
156
+
157
+ attr_reader :token_defs, :mode_defs, :keywords
158
+
159
+ # Error token is always :INVALID
160
+ # @return [Symbol] :INVALID
161
+ def error_token
162
+ :INVALID
163
+ end
164
+
165
+ def initialize
166
+ @token_defs = []
167
+ @mode_defs = { default: ModeDef.new(:default, location: nil) }
168
+ @current_mode = :default
169
+ @keywords = {}
170
+ @version = 1
171
+ @internal_mode_counter = 0
172
+ end
173
+
174
+ # Compile to IR
175
+ # @return [IR::CompiledProgram]
176
+ def compile
177
+ Validator.new(self).validate!
178
+ program = Compiler.new(self).compile
179
+ program.load_native! if LexerKit.native?
180
+ program
181
+ end
182
+
183
+ # Check for pattern conflicts
184
+ # @return [Array<ConflictDetector::Conflict>]
185
+ def check_conflicts
186
+ ConflictDetector.new(self).detect
187
+ end
188
+
189
+ # Check for pattern conflicts and print warnings
190
+ # @param io [IO] output stream (default: $stderr)
191
+ # @return [Array<ConflictDetector::Conflict>]
192
+ def check_conflicts!(io: $stderr)
193
+ conflicts = check_conflicts
194
+ conflicts.each do |conflict|
195
+ io.puts "warning: #{conflict.token1} vs #{conflict.token2}: #{conflict.description}"
196
+ end
197
+ conflicts
198
+ end
199
+
200
+ private
201
+
202
+ def current_mode_def
203
+ @mode_defs[@current_mode]
204
+ end
205
+
206
+ # Generate unique internal mode name for scan_until etc.
207
+ def next_internal_mode_name(base_name)
208
+ @internal_mode_counter += 1
209
+ :"__#{base_name}_#{@internal_mode_counter}"
210
+ end
211
+
212
+ # Internal helper for creating delimited token definitions.
213
+ def add_delimited_token(name:, delimiter:, location:, escape: nil, pop: false, skip: false)
214
+ token_def = TokenDef.new(
215
+ name: name,
216
+ pattern: nil,
217
+ delimited: true,
218
+ delimiter: delimiter,
219
+ escape: escape,
220
+ pop: pop,
221
+ skip: skip,
222
+ location: location
223
+ )
224
+ current_mode_def.add_token(token_def)
225
+ current_mode_def.delimited = token_def
226
+ @token_defs << token_def
227
+ token_def
228
+ end
229
+ end
230
+ end