parsanol 1.3.4 → 1.3.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. checksums.yaml +4 -4
  2. data/Cargo.lock +0 -2
  3. data/Rakefile +48 -48
  4. data/ext/parsanol_native/Cargo.toml +1 -2
  5. data/ext/parsanol_native/extconf.rb +4 -4
  6. data/lib/parsanol/ast_visitor.rb +1 -1
  7. data/lib/parsanol/atoms/alternative.rb +3 -2
  8. data/lib/parsanol/atoms/base.rb +12 -6
  9. data/lib/parsanol/atoms/can_flatten.rb +8 -8
  10. data/lib/parsanol/atoms/context.rb +23 -16
  11. data/lib/parsanol/atoms/custom.rb +2 -2
  12. data/lib/parsanol/atoms/dynamic.rb +1 -1
  13. data/lib/parsanol/atoms/infix.rb +10 -5
  14. data/lib/parsanol/atoms/lookahead.rb +7 -4
  15. data/lib/parsanol/atoms/re.rb +1 -1
  16. data/lib/parsanol/atoms/repetition.rb +29 -11
  17. data/lib/parsanol/atoms/sequence.rb +3 -2
  18. data/lib/parsanol/atoms/str.rb +9 -3
  19. data/lib/parsanol/atoms.rb +20 -20
  20. data/lib/parsanol/builder_callbacks.rb +2 -2
  21. data/lib/parsanol/cause.rb +2 -2
  22. data/lib/parsanol/context.rb +2 -2
  23. data/lib/parsanol/error_reporter.rb +5 -5
  24. data/lib/parsanol/expression/treetop.rb +17 -17
  25. data/lib/parsanol/expression.rb +1 -1
  26. data/lib/parsanol/fast_mode.rb +50 -12
  27. data/lib/parsanol/first_set.rb +1 -1
  28. data/lib/parsanol/grammar_builder.rb +10 -8
  29. data/lib/parsanol/incremental_parser.rb +13 -8
  30. data/lib/parsanol/interval_tree.rb +12 -3
  31. data/lib/parsanol/lazy_result.rb +2 -2
  32. data/lib/parsanol/mermaid.rb +12 -9
  33. data/lib/parsanol/native/batch_decoder.rb +13 -9
  34. data/lib/parsanol/native/dynamic.rb +7 -6
  35. data/lib/parsanol/native/parser.rb +12 -4
  36. data/lib/parsanol/native/serializer.rb +42 -42
  37. data/lib/parsanol/native/transformer.rb +55 -28
  38. data/lib/parsanol/native/types.rb +3 -3
  39. data/lib/parsanol/native.rb +60 -21
  40. data/lib/parsanol/optimizer.rb +6 -6
  41. data/lib/parsanol/optimizers/choice_optimizer.rb +1 -1
  42. data/lib/parsanol/optimizers/cut_inserter.rb +5 -2
  43. data/lib/parsanol/optimizers/lookahead_optimizer.rb +9 -3
  44. data/lib/parsanol/optimizers/quantifier_optimizer.rb +5 -5
  45. data/lib/parsanol/optimizers/sequence_optimizer.rb +1 -1
  46. data/lib/parsanol/options/zero_copy.rb +1 -1
  47. data/lib/parsanol/options.rb +1 -1
  48. data/lib/parsanol/parallel.rb +8 -13
  49. data/lib/parsanol/parser.rb +51 -13
  50. data/lib/parsanol/parslet.rb +7 -7
  51. data/lib/parsanol/pattern/binding.rb +1 -1
  52. data/lib/parsanol/pattern.rb +4 -1
  53. data/lib/parsanol/pool.rb +3 -3
  54. data/lib/parsanol/pools/buffer_pool.rb +2 -2
  55. data/lib/parsanol/pools/position_pool.rb +2 -2
  56. data/lib/parsanol/position.rb +1 -1
  57. data/lib/parsanol/result_builder.rb +4 -4
  58. data/lib/parsanol/result_stream.rb +10 -5
  59. data/lib/parsanol/slice.rb +11 -8
  60. data/lib/parsanol/source.rb +14 -9
  61. data/lib/parsanol/source_location.rb +1 -1
  62. data/lib/parsanol/streaming_parser.rb +3 -3
  63. data/lib/parsanol/string_view.rb +4 -1
  64. data/lib/parsanol/transform.rb +2 -2
  65. data/lib/parsanol/version.rb +1 -1
  66. data/lib/parsanol/wasm_parser.rb +1 -1
  67. data/lib/parsanol.rb +37 -39
  68. data/parsanol.gemspec +30 -30
  69. metadata +1 -1
@@ -132,7 +132,10 @@ module Parsanol
132
132
 
133
133
  # Check right subtree
134
134
  # Only search right if intervals starting there could overlap
135
- query_recursive(node.right, low, high, results) if node.right && node.low < high
135
+ if node.right && node.low < high
136
+ query_recursive(node.right, low, high,
137
+ results)
138
+ end
136
139
  end
137
140
 
138
141
  # Find exact interval match
@@ -154,10 +157,16 @@ module Parsanol
154
157
  return nil if node.nil?
155
158
 
156
159
  # Recursively delete from left subtree
157
- node.left = delete_overlapping_recursive(node.left, low, high, deleted) if node.left
160
+ if node.left
161
+ node.left = delete_overlapping_recursive(node.left, low, high,
162
+ deleted)
163
+ end
158
164
 
159
165
  # Recursively delete from right subtree
160
- node.right = delete_overlapping_recursive(node.right, low, high, deleted) if node.right
166
+ if node.right
167
+ node.right = delete_overlapping_recursive(node.right, low, high,
168
+ deleted)
169
+ end
161
170
 
162
171
  # Check if current node overlaps
163
172
  if node.low < high && low < node.high
@@ -85,10 +85,10 @@ module Parsanol
85
85
  # @yield [element] Each element
86
86
  # @return [Enumerator, self] Enumerator if no block, self otherwise
87
87
  #
88
- def each(&block)
88
+ def each(&)
89
89
  return to_enum(:each) unless block_given?
90
90
 
91
- to_a.each(&block)
91
+ to_a.each(&)
92
92
  self
93
93
  end
94
94
 
@@ -16,7 +16,7 @@ module Parsanol
16
16
  # Generates Mermaid diagram syntax from parser atoms.
17
17
  class MermaidBuilder
18
18
  def initialize
19
- @lines = ['graph TD']
19
+ @lines = ["graph TD"]
20
20
  @node_counter = 0
21
21
  @connections = []
22
22
  @seen_rules = Set.new
@@ -24,8 +24,8 @@ module Parsanol
24
24
 
25
25
  # Entry point for parser visualization
26
26
  def visit_parser(root_atom)
27
- add_node('Parser', 'root')
28
- traverse(root_atom, 'Parser')
27
+ add_node("Parser", "root")
28
+ traverse(root_atom, "Parser")
29
29
  finalize
30
30
  end
31
31
 
@@ -35,7 +35,7 @@ module Parsanol
35
35
 
36
36
  @seen_rules << rule_name
37
37
 
38
- node_id = add_node(rule_name.to_s.upcase, 'rule')
38
+ node_id = add_node(rule_name.to_s.upcase, "rule")
39
39
  connect(current_parent, node_id)
40
40
  traverse(rule_block.call, node_id)
41
41
  end
@@ -67,18 +67,18 @@ module Parsanol
67
67
 
68
68
  # Leaf nodes
69
69
  def visit_re(regexp)
70
- add_node("match(#{regexp.inspect})", 'terminal', style: 'ellipse')
70
+ add_node("match(#{regexp.inspect})", "terminal", style: "ellipse")
71
71
  end
72
72
 
73
73
  def visit_str(string)
74
- add_node("'#{string}'", 'terminal', style: 'ellipse')
74
+ add_node("'#{string}'", "terminal", style: "ellipse")
75
75
  end
76
76
 
77
77
  private
78
78
 
79
79
  attr_reader :current_parent
80
80
 
81
- def add_node(label, _shape_type = 'rect', _style = nil)
81
+ def add_node(label, _shape_type = "rect", _style = nil)
82
82
  @node_counter += 1
83
83
  node_id = "node_#{@node_counter}"
84
84
  @lines << " #{node_id}[\"#{escape_mermaid(label)}\"]"
@@ -97,7 +97,7 @@ module Parsanol
97
97
  @connections.each do |from, to|
98
98
  @lines << " #{from} --> #{to}"
99
99
  end
100
- @lines << ''
100
+ @lines << ""
101
101
  @lines.join("\n")
102
102
  end
103
103
 
@@ -125,7 +125,10 @@ module Parsanol
125
125
  def mermaid_for_rule(rule_name)
126
126
  builder = MermaidBuilder.new
127
127
  rule_method = method(rule_name)
128
- raise NotImplementedError, "Rule '#{rule_name}' not found" unless rule_method
128
+ unless rule_method
129
+ raise NotImplementedError,
130
+ "Rule '#{rule_name}' not found"
131
+ end
129
132
 
130
133
  rule_method.call.accept(builder)
131
134
  builder.output
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'parsanol/native/transformer'
3
+ require "parsanol/native/transformer"
4
4
 
5
5
  module Parsanol
6
6
  module Native
@@ -59,7 +59,7 @@ module Parsanol
59
59
  # @param slice_class [Class] The Slice class to use
60
60
  # @param grammar_atom [Parsanol::Atoms::Base] The grammar atom (unused, kept for API compat)
61
61
  # @return [Object] Transformed Ruby AST
62
- def decode_and_flatten(data, input, slice_class, grammar_atom)
62
+ def decode_and_flatten(data, input, slice_class, _grammar_atom)
63
63
  # Check if data is batch data (flat u64 array) or already a Ruby value
64
64
  if data.is_a?(Integer) || (data.is_a?(Array) && data.first.is_a?(Integer))
65
65
  # Batch data (flat u64 array) - decode first, then transform
@@ -84,11 +84,13 @@ module Parsanol
84
84
  case value
85
85
  when Array
86
86
  # Recursively process array elements
87
- processed = value.map { |v| join_consecutive_slices(v, slice_class, input) }
87
+ processed = value.map do |v|
88
+ join_consecutive_slices(v, slice_class, input)
89
+ end
88
90
 
89
91
  # Check if all non-nil elements are Slices
90
92
  non_nil = processed.compact
91
- if non_nil.all? { |v| v.is_a?(slice_class) }
93
+ if non_nil.all?(slice_class)
92
94
  # Check if slices are consecutive
93
95
  if slices_consecutive?(non_nil)
94
96
  # Join into single slice
@@ -129,7 +131,7 @@ module Parsanol
129
131
  last = slices.last
130
132
  total_length = last.offset + last.content.bytesize - first.offset
131
133
  content = input_bytes[first.offset, total_length]
132
- content = content.force_encoding('UTF-8') if content
134
+ content = content.force_encoding("UTF-8") if content
133
135
  slice_class.new(first.offset, content, input)
134
136
  end
135
137
 
@@ -156,7 +158,7 @@ module Parsanol
156
158
  bits = @data[@pos]
157
159
  @pos += 1
158
160
  # Convert IEEE 754 bits to float
159
- [bits].pack('Q').unpack1('D')
161
+ [bits].pack("Q").unpack1("D")
160
162
  when TAG_STRING
161
163
  offset = @data[@pos]
162
164
  length = @data[@pos + 1]
@@ -203,6 +205,7 @@ module Parsanol
203
205
 
204
206
  # Read key
205
207
  raise "Expected TAG_HASH_KEY, got #{tag}" unless tag == TAG_HASH_KEY
208
+
206
209
  @pos += 1
207
210
  key = decode_inline_string
208
211
 
@@ -228,22 +231,23 @@ module Parsanol
228
231
  def decode_inline_string_bytes(len)
229
232
  # Read u64 chunks
230
233
  chunks = (len + 7) / 8
231
- bytes = String.new(encoding: 'ASCII-8BIT', capacity: len)
234
+ bytes = String.new(encoding: "ASCII-8BIT", capacity: len)
232
235
  chunks.times do
233
236
  chunk = @data[@pos]
234
237
  @pos += 1
235
238
  8.times do |byte_idx|
236
239
  break if bytes.bytesize >= len
240
+
237
241
  bytes << ((chunk >> (byte_idx * 8)) & 0xFF)
238
242
  end
239
243
  end
240
244
 
241
- bytes.force_encoding('UTF-8')
245
+ bytes.force_encoding("UTF-8")
242
246
  end
243
247
 
244
248
  def create_slice(offset, length)
245
249
  content = @input_bytes[offset, length]
246
- content = content.force_encoding('UTF-8') if content
250
+ content = content.force_encoding("UTF-8") if content
247
251
  @slice_class.new(offset, content, @input)
248
252
  end
249
253
  end
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'json'
3
+ require "json"
4
4
 
5
5
  module Parsanol
6
6
  module Native
@@ -49,13 +49,14 @@ module Parsanol
49
49
  #
50
50
  def register(block, description: nil)
51
51
  # Register with Rust FFI
52
- ffi_id = Native.register_callback(@next_id, description || "Ruby callback ##{@next_id}")
52
+ ffi_id = Native.register_callback(@next_id,
53
+ description || "Ruby callback ##{@next_id}")
53
54
 
54
55
  # Also keep a Ruby-side reference for GC safety
55
56
  @mutex.synchronize do
56
57
  @callbacks[ffi_id] = {
57
58
  block: block,
58
- description: description || "Ruby callback ##{ffi_id}"
59
+ description: description || "Ruby callback ##{ffi_id}",
59
60
  }
60
61
  end
61
62
 
@@ -136,7 +137,7 @@ module Parsanol
136
137
  ctx = DynamicContext.new(
137
138
  context[:input],
138
139
  context[:pos],
139
- context[:captures].transform_keys(&:to_sym)
140
+ context[:captures].transform_keys(&:to_sym),
140
141
  )
141
142
 
142
143
  # Call the block
@@ -207,7 +208,7 @@ module Parsanol
207
208
  # @return [String] The remaining input
208
209
  #
209
210
  def remaining
210
- @input[@pos..] || ''
211
+ @input[@pos..] || ""
211
212
  end
212
213
 
213
214
  # Check if at end of input
@@ -229,7 +230,7 @@ module Parsanol
229
230
  if length
230
231
  @input[@pos + start, length]
231
232
  else
232
- @input[@pos + start..]
233
+ @input[(@pos + start)..]
233
234
  end
234
235
  end
235
236
  end
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'digest'
3
+ require "digest"
4
4
 
5
5
  module Parsanol
6
6
  module Native
@@ -16,7 +16,13 @@ module Parsanol
16
16
  return @cached_available unless @cached_available.nil?
17
17
 
18
18
  @cached_available = begin
19
- require 'parsanol/parsanol_native'
19
+ # Try versioned path first (released gem), then non-versioned (local dev)
20
+ ruby_version = RUBY_VERSION.split(".").take(2).join(".")
21
+ begin
22
+ require "parsanol/#{ruby_version}/parsanol_native"
23
+ rescue LoadError
24
+ require "parsanol/parsanol_native"
25
+ end
20
26
  Parsanol::Native.is_available
21
27
  rescue LoadError
22
28
  false
@@ -48,7 +54,7 @@ module Parsanol
48
54
  def cache_stats
49
55
  {
50
56
  hash_cache_size: GRAMMAR_HASH_CACHE.size,
51
- grammar_cache_size: GRAMMAR_CACHE.size
57
+ grammar_cache_size: GRAMMAR_CACHE.size,
52
58
  }
53
59
  end
54
60
 
@@ -64,6 +70,7 @@ module Parsanol
64
70
  if visited[obj_id]
65
71
  return [:cycle, atom.class.name]
66
72
  end
73
+
67
74
  visited[obj_id] = true
68
75
 
69
76
  case atom
@@ -83,7 +90,8 @@ module Parsanol
83
90
  when ::Parsanol::Atoms::Named
84
91
  [:named, atom.name.to_s, atom_structure(atom.parslet, visited)]
85
92
  when ::Parsanol::Atoms::Lookahead
86
- [:lookahead, atom.positive, atom_structure(atom.bound_parslet, visited)]
93
+ [:lookahead, atom.positive,
94
+ atom_structure(atom.bound_parslet, visited)]
87
95
  else
88
96
  [:unknown, atom.class.name]
89
97
  end
@@ -78,9 +78,9 @@ module Parsanol
78
78
 
79
79
  def serialize_str(atom)
80
80
  {
81
- 'Str' => {
82
- 'pattern' => atom.str
83
- }
81
+ "Str" => {
82
+ "pattern" => atom.str,
83
+ },
84
84
  }
85
85
  end
86
86
 
@@ -90,46 +90,46 @@ module Parsanol
90
90
  pattern = atom.match
91
91
  pattern = ::Regexp.last_match(1) if pattern =~ /^\(\?[-mix]*:(.+)\)$/
92
92
  {
93
- 'Re' => {
94
- 'pattern' => pattern
95
- }
93
+ "Re" => {
94
+ "pattern" => pattern,
95
+ },
96
96
  }
97
97
  end
98
98
 
99
99
  def serialize_sequence(atom)
100
100
  atom_ids = atom.parslets.map { |p| serialize_atom(p) }
101
101
  {
102
- 'Sequence' => {
103
- 'atoms' => atom_ids
104
- }
102
+ "Sequence" => {
103
+ "atoms" => atom_ids,
104
+ },
105
105
  }
106
106
  end
107
107
 
108
108
  def serialize_alternative(atom)
109
109
  atom_ids = atom.alternatives.map { |p| serialize_atom(p) }
110
110
  {
111
- 'Alternative' => {
112
- 'atoms' => atom_ids
113
- }
111
+ "Alternative" => {
112
+ "atoms" => atom_ids,
113
+ },
114
114
  }
115
115
  end
116
116
 
117
117
  def serialize_repetition(atom)
118
118
  {
119
- 'Repetition' => {
120
- 'atom' => serialize_atom(atom.parslet),
121
- 'min' => atom.min,
122
- 'max' => atom.max
123
- }
119
+ "Repetition" => {
120
+ "atom" => serialize_atom(atom.parslet),
121
+ "min" => atom.min,
122
+ "max" => atom.max,
123
+ },
124
124
  }
125
125
  end
126
126
 
127
127
  def serialize_named(atom)
128
128
  {
129
- 'Named' => {
130
- 'name' => atom.name.to_s,
131
- 'atom' => serialize_atom(atom.parslet)
132
- }
129
+ "Named" => {
130
+ "name" => atom.name.to_s,
131
+ "atom" => serialize_atom(atom.parslet),
132
+ },
133
133
  }
134
134
  end
135
135
 
@@ -169,7 +169,7 @@ module Parsanol
169
169
  serialize_named(parslet)
170
170
  when Parsanol::Atoms::Entity
171
171
  # Nested entity - just reference it via serialize_atom
172
- { 'Entity' => { 'atom' => serialize_atom(parslet) } }
172
+ { "Entity" => { "atom" => serialize_atom(parslet) } }
173
173
  when Parsanol::Atoms::Lookahead
174
174
  serialize_lookahead(parslet)
175
175
  else
@@ -181,9 +181,9 @@ module Parsanol
181
181
  else
182
182
  # If the entity's block returns nil, create a placeholder that will fail
183
183
  @atoms[atom_id] = {
184
- 'Str' => {
185
- 'pattern' => "\x00__UNIMPLEMENTED_ENTITY_#{atom.name}__"
186
- }
184
+ "Str" => {
185
+ "pattern" => "\x00__UNIMPLEMENTED_ENTITY_#{atom.name}__",
186
+ },
187
187
  }
188
188
  end
189
189
  atom_id
@@ -191,10 +191,10 @@ module Parsanol
191
191
 
192
192
  def serialize_lookahead(atom)
193
193
  {
194
- 'Lookahead' => {
195
- 'atom' => serialize_atom(atom.bound_parslet),
196
- 'positive' => atom.positive
197
- }
194
+ "Lookahead" => {
195
+ "atom" => serialize_atom(atom.bound_parslet),
196
+ "positive" => atom.positive,
197
+ },
198
198
  }
199
199
  end
200
200
 
@@ -202,10 +202,10 @@ module Parsanol
202
202
  # Capture stores matched text for later reference by Dynamic atoms.
203
203
  # Now properly serialized for native parser support (parsanol-rs 0.3.0+).
204
204
  {
205
- 'Capture' => {
206
- 'name' => atom.capture_key.to_s,
207
- 'atom' => serialize_atom(atom.inner_atom)
208
- }
205
+ "Capture" => {
206
+ "name" => atom.capture_key.to_s,
207
+ "atom" => serialize_atom(atom.inner_atom),
208
+ },
209
209
  }
210
210
  end
211
211
 
@@ -220,9 +220,9 @@ module Parsanol
220
220
  return serialize_unknown(atom) unless inner
221
221
 
222
222
  {
223
- 'Scope' => {
224
- 'atom' => serialize_atom(inner)
225
- }
223
+ "Scope" => {
224
+ "atom" => serialize_atom(inner),
225
+ },
226
226
  }
227
227
  end
228
228
 
@@ -232,9 +232,9 @@ module Parsanol
232
232
  callback_id = Parsanol::Native::Dynamic.register(atom.block)
233
233
 
234
234
  {
235
- 'Dynamic' => {
236
- 'callback_id' => callback_id
237
- }
235
+ "Dynamic" => {
236
+ "callback_id" => callback_id,
237
+ },
238
238
  }
239
239
  end
240
240
 
@@ -242,9 +242,9 @@ module Parsanol
242
242
  # For unsupported atom types, create a placeholder
243
243
  # This will cause a parse error at runtime
244
244
  {
245
- 'Str' => {
246
- 'pattern' => '' # Empty pattern that will never match
247
- }
245
+ "Str" => {
246
+ "pattern" => "", # Empty pattern that will never match
247
+ },
248
248
  }
249
249
  end
250
250
  end