parsanol 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. checksums.yaml +7 -0
  2. data/Cargo.lock +546 -0
  3. data/Cargo.toml +9 -0
  4. data/HISTORY.txt +12 -0
  5. data/LICENSE +23 -0
  6. data/README.adoc +487 -0
  7. data/Rakefile +135 -0
  8. data/ext/parsanol_native/Cargo.toml +34 -0
  9. data/ext/parsanol_native/extconf.rb +15 -0
  10. data/ext/parsanol_native/src/lib.rs +17 -0
  11. data/lib/parsanol/ast_visitor.rb +122 -0
  12. data/lib/parsanol/atoms/alternative.rb +122 -0
  13. data/lib/parsanol/atoms/base.rb +202 -0
  14. data/lib/parsanol/atoms/can_flatten.rb +194 -0
  15. data/lib/parsanol/atoms/capture.rb +38 -0
  16. data/lib/parsanol/atoms/context.rb +334 -0
  17. data/lib/parsanol/atoms/context_optimized.rb +38 -0
  18. data/lib/parsanol/atoms/custom.rb +110 -0
  19. data/lib/parsanol/atoms/cut.rb +66 -0
  20. data/lib/parsanol/atoms/dsl.rb +96 -0
  21. data/lib/parsanol/atoms/dynamic.rb +39 -0
  22. data/lib/parsanol/atoms/entity.rb +75 -0
  23. data/lib/parsanol/atoms/ignored.rb +37 -0
  24. data/lib/parsanol/atoms/infix.rb +162 -0
  25. data/lib/parsanol/atoms/lookahead.rb +82 -0
  26. data/lib/parsanol/atoms/named.rb +74 -0
  27. data/lib/parsanol/atoms/re.rb +83 -0
  28. data/lib/parsanol/atoms/repetition.rb +259 -0
  29. data/lib/parsanol/atoms/scope.rb +35 -0
  30. data/lib/parsanol/atoms/sequence.rb +194 -0
  31. data/lib/parsanol/atoms/str.rb +103 -0
  32. data/lib/parsanol/atoms/visitor.rb +91 -0
  33. data/lib/parsanol/atoms.rb +46 -0
  34. data/lib/parsanol/buffer.rb +133 -0
  35. data/lib/parsanol/builder_callbacks.rb +353 -0
  36. data/lib/parsanol/cause.rb +122 -0
  37. data/lib/parsanol/context.rb +39 -0
  38. data/lib/parsanol/convenience.rb +36 -0
  39. data/lib/parsanol/edit_tracker.rb +111 -0
  40. data/lib/parsanol/error_reporter/contextual.rb +99 -0
  41. data/lib/parsanol/error_reporter/deepest.rb +120 -0
  42. data/lib/parsanol/error_reporter/tree.rb +63 -0
  43. data/lib/parsanol/error_reporter.rb +100 -0
  44. data/lib/parsanol/expression/treetop.rb +154 -0
  45. data/lib/parsanol/expression.rb +106 -0
  46. data/lib/parsanol/fast_mode.rb +149 -0
  47. data/lib/parsanol/first_set.rb +79 -0
  48. data/lib/parsanol/grammar_builder.rb +177 -0
  49. data/lib/parsanol/incremental_parser.rb +177 -0
  50. data/lib/parsanol/interval_tree.rb +217 -0
  51. data/lib/parsanol/lazy_result.rb +179 -0
  52. data/lib/parsanol/lexer.rb +144 -0
  53. data/lib/parsanol/mermaid.rb +139 -0
  54. data/lib/parsanol/native/parser.rb +612 -0
  55. data/lib/parsanol/native/serializer.rb +248 -0
  56. data/lib/parsanol/native/transformer.rb +435 -0
  57. data/lib/parsanol/native/types.rb +42 -0
  58. data/lib/parsanol/native.rb +217 -0
  59. data/lib/parsanol/optimizer.rb +85 -0
  60. data/lib/parsanol/optimizers/choice_optimizer.rb +78 -0
  61. data/lib/parsanol/optimizers/cut_inserter.rb +179 -0
  62. data/lib/parsanol/optimizers/lookahead_optimizer.rb +50 -0
  63. data/lib/parsanol/optimizers/quantifier_optimizer.rb +60 -0
  64. data/lib/parsanol/optimizers/sequence_optimizer.rb +97 -0
  65. data/lib/parsanol/options/ruby_transform.rb +107 -0
  66. data/lib/parsanol/options/serialized.rb +94 -0
  67. data/lib/parsanol/options/zero_copy.rb +128 -0
  68. data/lib/parsanol/options.rb +20 -0
  69. data/lib/parsanol/parallel.rb +133 -0
  70. data/lib/parsanol/parser.rb +182 -0
  71. data/lib/parsanol/parslet.rb +151 -0
  72. data/lib/parsanol/pattern/binding.rb +91 -0
  73. data/lib/parsanol/pattern.rb +159 -0
  74. data/lib/parsanol/pool.rb +219 -0
  75. data/lib/parsanol/pools/array_pool.rb +75 -0
  76. data/lib/parsanol/pools/buffer_pool.rb +175 -0
  77. data/lib/parsanol/pools/position_pool.rb +92 -0
  78. data/lib/parsanol/pools/slice_pool.rb +64 -0
  79. data/lib/parsanol/position.rb +94 -0
  80. data/lib/parsanol/resettable.rb +29 -0
  81. data/lib/parsanol/result.rb +46 -0
  82. data/lib/parsanol/result_builder.rb +208 -0
  83. data/lib/parsanol/result_stream.rb +261 -0
  84. data/lib/parsanol/rig/rspec.rb +71 -0
  85. data/lib/parsanol/rope.rb +81 -0
  86. data/lib/parsanol/scope.rb +104 -0
  87. data/lib/parsanol/slice.rb +146 -0
  88. data/lib/parsanol/source/line_cache.rb +109 -0
  89. data/lib/parsanol/source.rb +180 -0
  90. data/lib/parsanol/source_location.rb +167 -0
  91. data/lib/parsanol/streaming_parser.rb +124 -0
  92. data/lib/parsanol/string_view.rb +195 -0
  93. data/lib/parsanol/transform.rb +226 -0
  94. data/lib/parsanol/version.rb +5 -0
  95. data/lib/parsanol/wasm/README.md +80 -0
  96. data/lib/parsanol/wasm/package.json +51 -0
  97. data/lib/parsanol/wasm/parsanol.js +252 -0
  98. data/lib/parsanol/wasm/parslet.d.ts +129 -0
  99. data/lib/parsanol/wasm_parser.rb +240 -0
  100. data/lib/parsanol.rb +280 -0
  101. data/parsanol-ruby.gemspec +67 -0
  102. metadata +293 -0
@@ -0,0 +1,106 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Parses treetop-style expression strings and converts them to Parsanol atoms.
4
+ #
5
+ # This allows specifying parser rules as strings using treetop syntax instead
6
+ # of building atoms explicitly with the DSL.
7
+ #
8
+ # == Performance Note
9
+ #
10
+ # The expression parser is implemented in pure Ruby and is NOT accelerated by
11
+ # the Rust native extension. This is intentional and acceptable because:
12
+ #
13
+ # 1. Expression parsing happens at grammar definition time (once)
14
+ # 2. Expression strings are typically short (< 100 characters)
15
+ # 3. The resulting atoms can still be used with Rust-accelerated parsing
16
+ #
17
+ # If you need maximum performance for dynamically generated parsers, consider
18
+ # building atoms directly with the DSL (str, match, any, etc.) instead.
19
+ #
20
+ # == Syntax
21
+ #
22
+ # The treetop syntax supports:
23
+ #
24
+ # - Strings: 'hello' (single quotes)
25
+ # - Character classes: [a-z], [0-9]
26
+ # - Any character: .
27
+ # - Sequence: 'a' 'b' (concatenation)
28
+ # - Alternative: 'a' / 'b'
29
+ # - Optional: 'a' ? (space before ? required)
30
+ # - Zero or more: 'a' * (space before * required)
31
+ # - One or more: 'a' + (space before + required)
32
+ # - Repetition: 'a'{1,3}
33
+ # - Grouping: ('a' / 'b')+
34
+ #
35
+ # == Example
36
+ #
37
+ # # Using exp()
38
+ # rule(:word) { exp("'a' 'b' ?") }
39
+ #
40
+ # # Equivalent DSL:
41
+ # rule(:word) { str('a') >> str('b').maybe }
42
+ #
43
+ # == Result Usage
44
+ #
45
+ # The atoms produced by exp() can be used with Rust-accelerated parsing:
46
+ #
47
+ # atom = Parsanol.exp("'a' +")
48
+ #
49
+ # # Ruby parsing
50
+ # atom.parse('aaa')
51
+ #
52
+ # # Rust-accelerated parsing (if native extension available)
53
+ # Parsanol::Native.parse_with_grammar(atom, 'aaa')
54
+ #
55
+ module Parsanol
56
+ class Expression
57
+ include Parsanol
58
+
59
+ autoload :Treetop, 'parsanol/expression/treetop'
60
+
61
+ # Creates a parser atom from a treetop-style expression string.
62
+ #
63
+ # @param str [String] a treetop expression
64
+ # @param opts [Hash] options (:type => :treetop, default)
65
+ # @return [Parsanol::Expression] expression object (call #to_parslet for atom)
66
+ #
67
+ # @example
68
+ # expr = Parsanol::Expression.new("'a' 'b' ?")
69
+ # atom = expr.to_parslet
70
+ # atom.parse('a') # => "a"@0
71
+ #
72
+ def initialize(str, opts = {}, _context = self)
73
+ @type = opts[:type] || :treetop
74
+ @exp = str
75
+ @parslet = transform(parse(str))
76
+ end
77
+
78
+ # Transforms the parse tree into a parser atom.
79
+ #
80
+ # @param tree [Hash] parse tree from Treetop::Parser
81
+ # @return [Parsanol::Atoms::Base] parser atom
82
+ def transform(tree)
83
+ transform = Treetop::Transform.new
84
+ transform.apply(tree)
85
+ rescue StandardError
86
+ warn "Could not transform: #{tree.inspect}"
87
+ raise
88
+ end
89
+
90
+ # Parses the expression string and returns a parse tree.
91
+ #
92
+ # @param str [String] treetop expression
93
+ # @return [Hash] parse tree
94
+ def parse(str)
95
+ parser = Treetop::Parser.new
96
+ parser.parse(str)
97
+ end
98
+
99
+ # Returns the parser atom for this expression.
100
+ #
101
+ # @return [Parsanol::Atoms::Base] parser atom
102
+ def to_parslet
103
+ @parslet
104
+ end
105
+ end
106
+ end
@@ -0,0 +1,149 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Fast mode patch for Parslet - matches vanilla parslet 2.0 behavior.
4
+ #
5
+ # For grammars with many small allocations (like EXPRESS), this is faster
6
+ # because the overhead of pool management exceeds the benefit.
7
+ #
8
+ # Usage:
9
+ # require 'parslet'
10
+ # require 'parsanol/fast_mode'
11
+ # # Now all parsing uses fast mode methods
12
+ #
13
+
14
+ module Parsanol
15
+ FAST_MODE = true
16
+
17
+ module Atoms
18
+ # Fast mode Context - matches vanilla parslet 2.0 simplicity
19
+ class Context
20
+ # Override try_with_cache with vanilla-like version (no eviction, no pooling)
21
+ def try_with_cache(obj, source, consume_all)
22
+ beg = source.bytepos
23
+
24
+ # Not in cache yet? Return early.
25
+ unless (entry = @cache[beg]&.[](obj.object_id))
26
+ result = obj.try(source, self, consume_all)
27
+
28
+ (@cache[beg] ||= {})[obj.object_id] = [result, source.bytepos - beg] if obj.cached?
29
+
30
+ return result
31
+ end
32
+
33
+ # Cache hit
34
+ result, advance = entry
35
+ source.bytepos = beg + advance
36
+ result
37
+ end
38
+ end
39
+
40
+ # Fast mode Sequence - direct array creation, no lazy evaluation
41
+ class Sequence
42
+ def try(source, context, consume_all)
43
+ parslets = @parslets
44
+
45
+ case parslets.size
46
+ when 1
47
+ success, value = parslets[0].apply(source, context, consume_all)
48
+ success ? succ([:sequence, value]) : context.err(self, source, @error_msg, [value])
49
+ when 2
50
+ success, v1 = parslets[0].apply(source, context, false)
51
+ return context.err(self, source, @error_msg, [v1]) unless success
52
+
53
+ success, v2 = parslets[1].apply(source, context, consume_all)
54
+ success ? succ([:sequence, v1, v2]) : context.err(self, source, @error_msg, [v2])
55
+ when 3
56
+ success, v1 = parslets[0].apply(source, context, false)
57
+ return context.err(self, source, @error_msg, [v1]) unless success
58
+
59
+ success, v2 = parslets[1].apply(source, context, false)
60
+ return context.err(self, source, @error_msg, [v2]) unless success
61
+
62
+ success, v3 = parslets[2].apply(source, context, consume_all)
63
+ success ? succ([:sequence, v1, v2, v3]) : context.err(self, source, @error_msg, [v3])
64
+ else
65
+ result = [:sequence]
66
+ last_idx = parslets.size - 1
67
+ i = 0
68
+ while i <= last_idx
69
+ success, value = parslets[i].apply(source, context, consume_all && i == last_idx)
70
+ return context.err(self, source, @error_msg, [value]) unless success
71
+
72
+ result << value
73
+ i += 1
74
+ end
75
+ succ(result)
76
+ end
77
+ end
78
+ end
79
+
80
+ # Fast mode Repetition - direct array creation, no lazy evaluation
81
+ class Repetition
82
+ EMPTY_REPETITION_ARRAY = [:repetition].freeze
83
+
84
+ def try(source, context, consume_all)
85
+ parslet = @parslet
86
+ min = @min
87
+ max = @max
88
+ tag = @tag
89
+
90
+ # Fast path for .maybe
91
+ if min.zero? && max == 1
92
+ success, value = parslet.apply(source, context, false)
93
+ return succ([tag, value]) if success
94
+
95
+ return succ(tag == :repetition ? EMPTY_REPETITION_ARRAY : [tag])
96
+ end
97
+
98
+ # Fast path for exact count
99
+ if min == max && max && max <= 3
100
+ case max
101
+ when 1
102
+ success, value = parslet.apply(source, context, consume_all)
103
+ return success ? succ([tag, value]) : context.err_at(self, source, @error_msg, source.bytepos, [value])
104
+ when 2
105
+ success, v1 = parslet.apply(source, context, false)
106
+ return context.err_at(self, source, @error_msg, source.bytepos, [v1]) unless success
107
+
108
+ success, v2 = parslet.apply(source, context, consume_all)
109
+ return success ? succ([tag, v1, v2]) : context.err_at(self, source, @error_msg, source.bytepos, [v2])
110
+ when 3
111
+ success, v1 = parslet.apply(source, context, false)
112
+ return context.err_at(self, source, @error_msg, source.bytepos, [v1]) unless success
113
+
114
+ success, v2 = parslet.apply(source, context, false)
115
+ return context.err_at(self, source, @error_msg, source.bytepos, [v2]) unless success
116
+
117
+ success, v3 = parslet.apply(source, context, consume_all)
118
+ return success ? succ([tag, v1, v2, v3]) : context.err_at(self, source, @error_msg, source.bytepos, [v3])
119
+ end
120
+ end
121
+
122
+ # General case
123
+ start_pos = source.bytepos
124
+ occ = 0
125
+ result = [tag]
126
+ break_on = nil
127
+
128
+ loop do
129
+ success, value = parslet.apply(source, context, false)
130
+ break_on = value
131
+ break unless success
132
+
133
+ occ += 1
134
+ result << value
135
+ break if max && occ >= max
136
+ end
137
+
138
+ if occ < min
139
+ source.bytepos = start_pos
140
+ return context.err_at(self, source, @error_msg, start_pos, [break_on])
141
+ end
142
+
143
+ return context.err(self, source, @unconsumed_msg, [break_on]) if consume_all && source.chars_left.positive?
144
+
145
+ succ(result)
146
+ end
147
+ end
148
+ end
149
+ end
@@ -0,0 +1,79 @@
1
+ # frozen_string_literal: true
2
+
3
+ # FIRST Set Analysis for PEG Grammars
4
+ #
5
+ # FIRST sets help identify which terminals can appear at the beginning of
6
+ # a parse. This is essential for:
7
+ # 1. Automatic cut operator insertion (AC-FIRST algorithm)
8
+ # 2. Grammar analysis and optimization
9
+ # 3. Detecting ambiguous choices
10
+ #
11
+ # Reference: Mizushima et al. (2010) "Packrat Parsers Can Handle Practical
12
+ # Grammars in Mostly Constant Space"
13
+ #
14
+ module Parsanol
15
+ module FirstSet
16
+ # Sentinel value representing the empty string (ε)
17
+ EPSILON = :epsilon
18
+
19
+ # Compute the FIRST set for this parslet atom
20
+ # Returns a Set containing:
21
+ # - Terminal atoms (Str, Re) that can match first
22
+ # - EPSILON if the atom can match empty string
23
+ # - nil elements represent unknown/variable terminals (e.g., any)
24
+ #
25
+ # @return [Set] FIRST set containing terminal atoms or EPSILON
26
+ def first_set
27
+ @first_set ||= compute_first_set
28
+ end
29
+
30
+ # Clear cached FIRST set (useful after grammar modifications)
31
+ def clear_first_set_cache
32
+ @first_set = nil
33
+ end
34
+
35
+ protected
36
+
37
+ # Override in subclasses to compute FIRST set
38
+ # Default: conservative approximation (unknown)
39
+ def compute_first_set
40
+ Set.new([nil]) # nil = unknown terminal
41
+ end
42
+
43
+ # Class methods for FIRST set analysis
44
+ class << self
45
+ # Check if two FIRST sets are disjoint
46
+ # Two sets are disjoint if they have no common elements
47
+ # EPSILON is ignored when checking disjointness
48
+ #
49
+ # @param set1 [Set] First FIRST set
50
+ # @param set2 [Set] Second FIRST set
51
+ # @return [Boolean] true if sets are disjoint
52
+ def disjoint?(set1, set2)
53
+ # Remove EPSILON and nil from both sets for comparison
54
+ real_set1 = set1.reject { |x| x == EPSILON || x.nil? }
55
+ real_set2 = set2.reject { |x| x == EPSILON || x.nil? }
56
+
57
+ # If either set is empty (only EPSILON/nil), consider disjoint
58
+ return true if real_set1.empty? || real_set2.empty?
59
+
60
+ # Check if intersection is empty (using to_a for Opal compatibility)
61
+ (real_set1.to_a & real_set2.to_a).empty?
62
+ end
63
+
64
+ # Check if all FIRST sets in a collection are mutually disjoint
65
+ # This is critical for AC-FIRST algorithm - we can only insert
66
+ # cuts when all alternatives have non-overlapping FIRST sets
67
+ #
68
+ # @param sets [Array<Set>] Collection of FIRST sets
69
+ # @return [Boolean] true if all pairs are disjoint
70
+ def all_disjoint?(sets)
71
+ # Need at least 2 sets to check disjointness
72
+ return true if sets.length < 2
73
+
74
+ # Check all pairs
75
+ sets.combination(2).all? { |s1, s2| disjoint?(s1, s2) }
76
+ end
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,177 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Parsanol::GrammarBuilder - Grammar Composition
4
+ #
5
+ # Build complex grammars by importing and composing smaller grammars.
6
+ # This enables reusable grammar modules.
7
+ #
8
+ # Usage:
9
+ # # Define reusable grammars
10
+ # expression_grammar = GrammarBuilder.new
11
+ # .rule("expr", str("a") | str("b"))
12
+ # .build
13
+ #
14
+ # type_grammar = GrammarBuilder.new
15
+ # .rule("type", str("int") | str("str"))
16
+ # .build
17
+ #
18
+ # # Compose into a new grammar
19
+ # combined = GrammarBuilder.new
20
+ # .import(expression_grammar, prefix: "expr")
21
+ # .import(type_grammar, prefix: "type")
22
+ # .rule("typed", seq([ref("expr:root"), str(":"), ref("type:root")]))
23
+ # .build
24
+ #
25
+ # Requires native extension for full functionality.
26
+
27
+ module Parsanol
28
+ class GrammarBuilder
29
+ # Create a new grammar builder
30
+ def initialize
31
+ @rules = {}
32
+ @imports = []
33
+ @root = nil
34
+ end
35
+
36
+ # Define a rule
37
+ #
38
+ # @param name [String, Symbol] Rule name
39
+ # @param parslet [Parsanol::Atoms::Base] Parslet atom
40
+ # @return [self] For chaining
41
+ def rule(name, parslet)
42
+ @rules[name.to_s] = parslet
43
+ self
44
+ end
45
+
46
+ # Get a rule for modification
47
+ #
48
+ # @param name [String, Symbol] Rule name
49
+ # @return [Parsanol::Atoms::Base, nil] The rule atom
50
+ def [](name)
51
+ @rules[name.to_s]
52
+ end
53
+
54
+ # Set the root rule
55
+ #
56
+ # @param name [String, Symbol] Root rule name
57
+ # @return [self] For chaining
58
+ def root(name)
59
+ @root = name.to_s
60
+ self
61
+ end
62
+
63
+ # Import another grammar with optional prefix
64
+ #
65
+ # @param grammar [GrammarBuilder, Hash] Grammar to import
66
+ # @param prefix [String, nil] Optional prefix for imported rules
67
+ # @return [self] For chaining
68
+ def import(grammar, prefix: nil)
69
+ grammar_data = case grammar
70
+ when GrammarBuilder
71
+ grammar.to_h
72
+ when Hash
73
+ grammar
74
+ else
75
+ raise ArgumentError, "Expected GrammarBuilder or Hash, got #{grammar.class}"
76
+ end
77
+
78
+ @imports << { grammar: grammar_data, prefix: prefix }
79
+ self
80
+ end
81
+
82
+ # Import with explicit rule mapping
83
+ #
84
+ # @param grammar [GrammarBuilder, Hash] Grammar to import
85
+ # @param prefix [String, nil] Optional prefix
86
+ # @param rules [Hash] Rule mapping {from_rule: to_rule}
87
+ # @return [self] For chaining
88
+ def import_with_rules(grammar, prefix: nil, rules: {})
89
+ grammar_data = case grammar
90
+ when GrammarBuilder
91
+ grammar.to_h
92
+ when Hash
93
+ grammar
94
+ else
95
+ raise ArgumentError, "Expected GrammarBuilder or Hash, got #{grammar.class}"
96
+ end
97
+
98
+ @imports << { grammar: grammar_data, prefix: prefix, rules: rules }
99
+ self
100
+ end
101
+
102
+ # Build the grammar
103
+ #
104
+ # @return [Hash] Grammar representation
105
+ def build
106
+ {
107
+ rules: @rules,
108
+ root: @root,
109
+ imports: @imports
110
+ }
111
+ end
112
+
113
+ # Convert to JSON for native parser
114
+ #
115
+ # @return [String] JSON representation
116
+ def to_json(*_args)
117
+ build.to_json
118
+ end
119
+
120
+ # Get as a Hash
121
+ #
122
+ # @return [Hash] Grammar representation
123
+ def to_h
124
+ build
125
+ end
126
+
127
+ # Reference another rule in this grammar
128
+ #
129
+ # @param name [String, Symbol] Rule name
130
+ # @return [Parsanol::Atoms::Entity] Entity referencing the rule
131
+ def ref(name)
132
+ Parsanol::Atoms::Entity.new(name)
133
+ end
134
+
135
+ # Reference the root of another grammar
136
+ #
137
+ # @param grammar_name [String] Name of the grammar (for prefixed imports)
138
+ # @return [Parsanol::Atoms::Entity] Entity referencing the root
139
+ def ref_root(grammar_name = nil)
140
+ if grammar_name
141
+ ref("#{grammar_name}:root")
142
+ else
143
+ ref('root')
144
+ end
145
+ end
146
+
147
+ class << self
148
+ # Create a grammar from a block
149
+ #
150
+ # @yield [GrammarBuilder] Builder to configure
151
+ # @return [Hash] Built grammar
152
+ def build(&block)
153
+ builder = new
154
+ builder.instance_eval(&block)
155
+ builder.build
156
+ end
157
+
158
+ # Import a grammar from JSON string
159
+ #
160
+ # @param json [String] JSON representation
161
+ # @return [Hash] Grammar representation
162
+ def from_json(json)
163
+ JSON.parse(json)
164
+ end
165
+ end
166
+ end
167
+
168
+ # Module methods for DSL
169
+ module GrammarBuilderDSL
170
+ # Create a new grammar builder
171
+ #
172
+ # @return [GrammarBuilder] New builder
173
+ def grammar(&block)
174
+ GrammarBuilder.build(&block)
175
+ end
176
+ end
177
+ end
@@ -0,0 +1,177 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Parsanol::IncrementalParser - Incremental Parser for Editor Integration
4
+ #
5
+ # Parse with support for incremental edits. This is useful for editor integration
6
+ # where the input changes frequently (e.g., as the user types).
7
+ #
8
+ # Usage:
9
+ # parser = Parsanol::IncrementalParser.new(grammar, initial_text)
10
+ #
11
+ # # When text changes
12
+ # parser.apply_edit(start: 5, deleted: 3, inserted: "new")
13
+ # result = parser.reparse
14
+ #
15
+ # Requires native extension for full functionality.
16
+
17
+ module Parsanol
18
+ # Represents an edit to apply to the input
19
+ class Edit
20
+ attr_reader :start, :deleted, :inserted
21
+
22
+ def initialize(start:, deleted:, inserted: '')
23
+ @start = start
24
+ @deleted = deleted
25
+ @inserted = inserted
26
+ end
27
+
28
+ # Get the old range that was replaced
29
+ def old_range
30
+ @start...(@start + @deleted)
31
+ end
32
+
33
+ # Check if this edit affects a specific position
34
+ def affects_position?(position)
35
+ position >= @start && position < @start + @deleted + @inserted.length
36
+ end
37
+
38
+ # Get the new position after this edit
39
+ def new_position
40
+ @start + @inserted.length
41
+ end
42
+
43
+ # Apply this edit to a string
44
+ def apply(input)
45
+ input[0...@start] + @inserted + input[(@start + @deleted)..]
46
+ end
47
+
48
+ def to_s
49
+ "Edit(#{@start}, +#{@inserted.length}, -#{@deleted})"
50
+ end
51
+
52
+ def ==(other)
53
+ return false unless other.is_a?(Edit)
54
+
55
+ @start == other.start && @deleted == other.deleted && @inserted == other.inserted
56
+ end
57
+ end
58
+
59
+ class IncrementalParser
60
+ # Create a new incremental parser
61
+ #
62
+ # @param grammar [Parsanol::Parser, Parsanol::Atoms::Base] Grammar to use
63
+ # @param initial_input [String] Initial input string
64
+ def initialize(grammar, initial_input = '')
65
+ @grammar = grammar
66
+ @input = initial_input
67
+
68
+ if Parsanol::Native.available?
69
+ grammar_json = Parsanol::Native.serialize_grammar(grammar.root)
70
+ @native_parser = Parsanol::Native.incremental_parser_new(grammar_json, initial_input)
71
+ else
72
+ @native_parser = nil
73
+ end
74
+
75
+ @edits = []
76
+ @cached_result = nil
77
+ end
78
+
79
+ # Apply an edit to the parser
80
+ #
81
+ # @param start [Integer] Start position of edit
82
+ # @param deleted [Integer] Number of characters deleted
83
+ # @param inserted [String] Text to insert
84
+ def apply_edit(start:, deleted:, inserted: '')
85
+ edit = Edit.new(start: start, deleted: deleted, inserted: inserted)
86
+ @edits << edit
87
+
88
+ # Update cached input
89
+ @input = edit.apply(@input)
90
+
91
+ # Invalidate cached result
92
+ @cached_result = nil
93
+
94
+ return unless @native_parser
95
+
96
+ Parsanol::Native.incremental_parser_apply_edit(@native_parser, start, deleted, inserted)
97
+ end
98
+
99
+ # Convenience method to apply multiple edits
100
+ #
101
+ # @param edits [Array<Hash>] Array of {start:, deleted:, inserted:} hashes
102
+ def apply_edits(edits)
103
+ edits.each do |edit_hash|
104
+ apply_edit(**edit_hash)
105
+ end
106
+ end
107
+
108
+ # Reparse with current input (or optional new input)
109
+ #
110
+ # @param new_input [String, nil] Optional new input (replaces current)
111
+ # @return [Object] Parse result
112
+ def reparse(new_input = nil)
113
+ if new_input
114
+ @input = new_input
115
+ @edits.clear
116
+ @cached_result = nil
117
+ end
118
+
119
+ return @cached_result if @cached_result
120
+
121
+ if @native_parser
122
+ @cached_result = Parsanol::Native.incremental_parser_reparse(@native_parser, @input)
123
+ else
124
+ # Pure Ruby fallback - reparse from scratch
125
+ root = @grammar.root
126
+ @cached_result = root.parse(@input)
127
+ end
128
+
129
+ @cached_result
130
+ end
131
+
132
+ # Invalidate a range (for external changes)
133
+ #
134
+ # @param start [Integer] Start position
135
+ # @param end_pos [Integer] End position
136
+ def invalidate_range(_start, _end_pos)
137
+ # Clear cached result if the invalidated range might affect it
138
+ @cached_result = nil
139
+
140
+ nil unless @native_parser
141
+ # Native implementation handles invalidation
142
+ end
143
+
144
+ # Get the current input
145
+ #
146
+ # @return [String] Current input
147
+ attr_reader :input
148
+
149
+ # Get all applied edits
150
+ #
151
+ # @return [Array<Edit>] Array of edits
152
+ def edits
153
+ @edits.dup
154
+ end
155
+
156
+ # Check if there are unapplied edits
157
+ #
158
+ # @return [Boolean] True if there are pending edits
159
+ def dirty?
160
+ @cached_result.nil? && !@edits.empty?
161
+ end
162
+
163
+ # Reset to initial state
164
+ #
165
+ # @param new_input [String, nil] Optional new initial input
166
+ def reset(new_input = nil)
167
+ @input = new_input || ''
168
+ @edits.clear
169
+ @cached_result = nil
170
+
171
+ return unless @native_parser && new_input
172
+
173
+ grammar_json = Parsanol::Native.serialize_grammar(@grammar.root)
174
+ @native_parser = Parsanol::Native.incremental_parser_new(grammar_json, @input)
175
+ end
176
+ end
177
+ end