parsanol 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of parsanol might be problematic. Click here for more details.

Files changed (336) hide show
  1. checksums.yaml +7 -0
  2. data/HISTORY.txt +25 -0
  3. data/LICENSE +23 -0
  4. data/README.adoc +643 -0
  5. data/Rakefile +189 -0
  6. data/example/balanced-parens/basic.rb +42 -0
  7. data/example/balanced-parens/basic.rb.md +86 -0
  8. data/example/balanced-parens/parens.rb +42 -0
  9. data/example/balanced-parens/ruby_transform.rb +162 -0
  10. data/example/big.erb +73 -0
  11. data/example/boolean-algebra/basic.rb +70 -0
  12. data/example/boolean-algebra/basic.rb.md +108 -0
  13. data/example/boolean-algebra/ruby_transform.rb +263 -0
  14. data/example/calculator/basic.rb +153 -0
  15. data/example/calculator/basic.rb.md +120 -0
  16. data/example/calculator/pattern.rb +153 -0
  17. data/example/calculator/ruby_transform.rb +156 -0
  18. data/example/calculator/ruby_transform.rb.md +32 -0
  19. data/example/calculator/serialized.rb +257 -0
  20. data/example/calculator/serialized.rb.md +32 -0
  21. data/example/calculator/transform.rb +153 -0
  22. data/example/calculator/zero_copy.rb +269 -0
  23. data/example/calculator/zero_copy.rb.md +36 -0
  24. data/example/capture/basic.rb +49 -0
  25. data/example/capture/basic.rb.md +106 -0
  26. data/example/capture/example.json +39 -0
  27. data/example/comments/basic.rb +35 -0
  28. data/example/comments/basic.rb.md +110 -0
  29. data/example/csv/ruby_transform.rb +148 -0
  30. data/example/csv/ruby_transform.rb.md +131 -0
  31. data/example/csv/serialized.rb +201 -0
  32. data/example/csv/serialized.rb.md +31 -0
  33. data/example/csv/zero_copy.rb +276 -0
  34. data/example/csv/zero_copy.rb.md +36 -0
  35. data/example/custom_atoms/indent_atom.rb +79 -0
  36. data/example/deepest-errors/basic.rb +131 -0
  37. data/example/deepest-errors/basic.rb.md +152 -0
  38. data/example/documentation/basic.rb +18 -0
  39. data/example/documentation/basic.rb.md +97 -0
  40. data/example/email/basic.rb +55 -0
  41. data/example/email/basic.rb.md +102 -0
  42. data/example/email/ruby_transform.rb +106 -0
  43. data/example/empty/basic.rb +13 -0
  44. data/example/empty/basic.rb.md +73 -0
  45. data/example/empty/example.json +38 -0
  46. data/example/erb/basic.rb +47 -0
  47. data/example/erb/basic.rb.md +103 -0
  48. data/example/erb/optimized.rb +42 -0
  49. data/example/error-reporting/basic.rb +132 -0
  50. data/example/error-reporting/basic.rb.md +122 -0
  51. data/example/expression-evaluator/basic.rb +284 -0
  52. data/example/expression-evaluator/basic.rb.md +138 -0
  53. data/example/ini/basic.rb +154 -0
  54. data/example/ini/basic.rb.md +129 -0
  55. data/example/ini/ruby_transform.rb +154 -0
  56. data/example/ip-address/basic.rb +125 -0
  57. data/example/ip-address/basic.rb.md +139 -0
  58. data/example/iso-6709/basic.rb +231 -0
  59. data/example/iso-6709/basic.rb.md +143 -0
  60. data/example/iso-8601/basic.rb +275 -0
  61. data/example/iso-8601/basic.rb.md +149 -0
  62. data/example/json/basic.rb +128 -0
  63. data/example/json/basic.rb.md +121 -0
  64. data/example/json/pattern.rb +128 -0
  65. data/example/json/ruby_transform.rb +200 -0
  66. data/example/json/ruby_transform.rb.md +32 -0
  67. data/example/json/serialized.rb +233 -0
  68. data/example/json/serialized.rb.md +31 -0
  69. data/example/json/transform.rb +128 -0
  70. data/example/json/zero_copy.rb +316 -0
  71. data/example/json/zero_copy.rb.md +36 -0
  72. data/example/local/basic.rb +34 -0
  73. data/example/local/basic.rb.md +91 -0
  74. data/example/local/example.json +38 -0
  75. data/example/markdown/basic.rb +287 -0
  76. data/example/markdown/basic.rb.md +160 -0
  77. data/example/markup/basic.rb +173 -0
  78. data/example/markup/basic.rb.md +118 -0
  79. data/example/mathn/basic.rb +47 -0
  80. data/example/mathn/basic.rb.md +96 -0
  81. data/example/mathn/example.json +39 -0
  82. data/example/minilisp/basic.rb +94 -0
  83. data/example/minilisp/basic.rb.md +133 -0
  84. data/example/modularity/basic.rb +47 -0
  85. data/example/modularity/basic.rb.md +152 -0
  86. data/example/nested-errors/basic.rb +132 -0
  87. data/example/nested-errors/basic.rb.md +157 -0
  88. data/example/output/boolean_algebra.out +4 -0
  89. data/example/output/calc.out +1 -0
  90. data/example/output/capture.out +3 -0
  91. data/example/output/comments.out +8 -0
  92. data/example/output/deepest_errors.out +54 -0
  93. data/example/output/documentation.err +4 -0
  94. data/example/output/documentation.out +1 -0
  95. data/example/output/email_parser.out +2 -0
  96. data/example/output/empty.err +1 -0
  97. data/example/output/erb.out +7 -0
  98. data/example/output/ignore.out +1 -0
  99. data/example/output/ignore_whitespace.out +1 -0
  100. data/example/output/ip_address.out +9 -0
  101. data/example/output/json.out +5 -0
  102. data/example/output/local.out +3 -0
  103. data/example/output/mathn.out +4 -0
  104. data/example/output/minilisp.out +5 -0
  105. data/example/output/modularity.out +0 -0
  106. data/example/output/nested_errors.out +54 -0
  107. data/example/output/optimized_erb.out +1 -0
  108. data/example/output/parens.out +8 -0
  109. data/example/output/prec_calc.out +5 -0
  110. data/example/output/readme.out +1 -0
  111. data/example/output/scopes.out +1 -0
  112. data/example/output/seasons.out +28 -0
  113. data/example/output/sentence.out +1 -0
  114. data/example/output/simple_xml.out +2 -0
  115. data/example/output/string_parser.out +3 -0
  116. data/example/prec-calc/basic.rb +71 -0
  117. data/example/prec-calc/basic.rb.md +114 -0
  118. data/example/readme/basic.rb +30 -0
  119. data/example/readme/basic.rb.md +80 -0
  120. data/example/scopes/basic.rb +15 -0
  121. data/example/scopes/basic.rb.md +73 -0
  122. data/example/scopes/example.json +38 -0
  123. data/example/seasons/basic.rb +46 -0
  124. data/example/seasons/basic.rb.md +117 -0
  125. data/example/seasons/example.json +40 -0
  126. data/example/sentence/basic.rb +36 -0
  127. data/example/sentence/basic.rb.md +81 -0
  128. data/example/sexp/ruby_transform.rb +180 -0
  129. data/example/sexp/ruby_transform.rb.md +143 -0
  130. data/example/simple-xml/basic.rb +54 -0
  131. data/example/simple-xml/basic.rb.md +125 -0
  132. data/example/simple.lit +3 -0
  133. data/example/string-literal/basic.rb +77 -0
  134. data/example/string-literal/basic.rb.md +128 -0
  135. data/example/test.lit +4 -0
  136. data/example/toml/basic.rb +226 -0
  137. data/example/toml/basic.rb.md +173 -0
  138. data/example/url/basic.rb +219 -0
  139. data/example/url/basic.rb.md +142 -0
  140. data/example/url/ruby_transform.rb +219 -0
  141. data/example/yaml/basic.rb +216 -0
  142. data/example/yaml/basic.rb.md +148 -0
  143. data/ext/parsanol_native/extconf.rb +4 -0
  144. data/lib/parsanol/accelerator/application.rb +62 -0
  145. data/lib/parsanol/accelerator/engine.rb +112 -0
  146. data/lib/parsanol/accelerator.rb +162 -0
  147. data/lib/parsanol/ast_visitor.rb +122 -0
  148. data/lib/parsanol/atoms/alternative.rb +97 -0
  149. data/lib/parsanol/atoms/base.rb +214 -0
  150. data/lib/parsanol/atoms/can_flatten.rb +192 -0
  151. data/lib/parsanol/atoms/capture.rb +41 -0
  152. data/lib/parsanol/atoms/context.rb +351 -0
  153. data/lib/parsanol/atoms/context_optimized.rb +42 -0
  154. data/lib/parsanol/atoms/custom.rb +110 -0
  155. data/lib/parsanol/atoms/cut.rb +62 -0
  156. data/lib/parsanol/atoms/dsl.rb +130 -0
  157. data/lib/parsanol/atoms/dynamic.rb +33 -0
  158. data/lib/parsanol/atoms/entity.rb +55 -0
  159. data/lib/parsanol/atoms/ignored.rb +28 -0
  160. data/lib/parsanol/atoms/infix.rb +121 -0
  161. data/lib/parsanol/atoms/lookahead.rb +64 -0
  162. data/lib/parsanol/atoms/named.rb +50 -0
  163. data/lib/parsanol/atoms/re.rb +61 -0
  164. data/lib/parsanol/atoms/repetition.rb +241 -0
  165. data/lib/parsanol/atoms/scope.rb +28 -0
  166. data/lib/parsanol/atoms/sequence.rb +157 -0
  167. data/lib/parsanol/atoms/str.rb +90 -0
  168. data/lib/parsanol/atoms/visitor.rb +91 -0
  169. data/lib/parsanol/atoms.rb +36 -0
  170. data/lib/parsanol/buffer.rb +130 -0
  171. data/lib/parsanol/builder_callbacks.rb +353 -0
  172. data/lib/parsanol/cause.rb +101 -0
  173. data/lib/parsanol/context.rb +23 -0
  174. data/lib/parsanol/convenience.rb +35 -0
  175. data/lib/parsanol/edit_tracker.rb +107 -0
  176. data/lib/parsanol/error_reporter/contextual.rb +122 -0
  177. data/lib/parsanol/error_reporter/deepest.rb +106 -0
  178. data/lib/parsanol/error_reporter/tree.rb +68 -0
  179. data/lib/parsanol/error_reporter.rb +98 -0
  180. data/lib/parsanol/export.rb +163 -0
  181. data/lib/parsanol/expression/treetop.rb +94 -0
  182. data/lib/parsanol/expression.rb +51 -0
  183. data/lib/parsanol/fast_mode.rb +145 -0
  184. data/lib/parsanol/first_set.rb +75 -0
  185. data/lib/parsanol/grammar_builder.rb +177 -0
  186. data/lib/parsanol/graphviz.rb +97 -0
  187. data/lib/parsanol/incremental_parser.rb +179 -0
  188. data/lib/parsanol/interval_tree.rb +215 -0
  189. data/lib/parsanol/lazy_result.rb +178 -0
  190. data/lib/parsanol/lexer.rb +146 -0
  191. data/lib/parsanol/native/parser.rb +630 -0
  192. data/lib/parsanol/native/serializer.rb +245 -0
  193. data/lib/parsanol/native/transformer.rb +438 -0
  194. data/lib/parsanol/native/types.rb +41 -0
  195. data/lib/parsanol/native.rb +217 -0
  196. data/lib/parsanol/optimizer.rb +86 -0
  197. data/lib/parsanol/optimizers/choice_optimizer.rb +78 -0
  198. data/lib/parsanol/optimizers/cut_inserter.rb +175 -0
  199. data/lib/parsanol/optimizers/lookahead_optimizer.rb +58 -0
  200. data/lib/parsanol/optimizers/quantifier_optimizer.rb +62 -0
  201. data/lib/parsanol/optimizers/sequence_optimizer.rb +97 -0
  202. data/lib/parsanol/options/ruby_transform.rb +109 -0
  203. data/lib/parsanol/options/serialized.rb +94 -0
  204. data/lib/parsanol/options/zero_copy.rb +130 -0
  205. data/lib/parsanol/options.rb +20 -0
  206. data/lib/parsanol/parallel.rb +133 -0
  207. data/lib/parsanol/parsanol_native.bundle +0 -0
  208. data/lib/parsanol/parser.rb +151 -0
  209. data/lib/parsanol/parslet.rb +148 -0
  210. data/lib/parsanol/parslet_native.bundle +0 -0
  211. data/lib/parsanol/pattern/binding.rb +49 -0
  212. data/lib/parsanol/pattern.rb +115 -0
  213. data/lib/parsanol/pool.rb +220 -0
  214. data/lib/parsanol/pools/array_pool.rb +75 -0
  215. data/lib/parsanol/pools/buffer_pool.rb +173 -0
  216. data/lib/parsanol/pools/position_pool.rb +92 -0
  217. data/lib/parsanol/pools/slice_pool.rb +64 -0
  218. data/lib/parsanol/position.rb +89 -0
  219. data/lib/parsanol/result.rb +44 -0
  220. data/lib/parsanol/result_builder.rb +208 -0
  221. data/lib/parsanol/result_stream.rb +262 -0
  222. data/lib/parsanol/rig/rspec.rb +52 -0
  223. data/lib/parsanol/rope.rb +78 -0
  224. data/lib/parsanol/scope.rb +42 -0
  225. data/lib/parsanol/slice.rb +172 -0
  226. data/lib/parsanol/source/line_cache.rb +99 -0
  227. data/lib/parsanol/source.rb +171 -0
  228. data/lib/parsanol/source_location.rb +164 -0
  229. data/lib/parsanol/streaming_parser.rb +124 -0
  230. data/lib/parsanol/string_view.rb +192 -0
  231. data/lib/parsanol/transform.rb +267 -0
  232. data/lib/parsanol/version.rb +5 -0
  233. data/lib/parsanol/wasm/README.md +80 -0
  234. data/lib/parsanol/wasm/package.json +51 -0
  235. data/lib/parsanol/wasm/parsanol.js +252 -0
  236. data/lib/parsanol/wasm/parslet.d.ts +129 -0
  237. data/lib/parsanol/wasm_parser.rb +239 -0
  238. data/lib/parsanol.rb +408 -0
  239. data/parsanol-ruby.gemspec +56 -0
  240. data/spec/acceptance/examples_spec.rb +96 -0
  241. data/spec/acceptance/infix_parser_spec.rb +145 -0
  242. data/spec/acceptance/mixing_parsers_spec.rb +74 -0
  243. data/spec/acceptance/regression_spec.rb +329 -0
  244. data/spec/acceptance/repetition_and_maybe_spec.rb +44 -0
  245. data/spec/acceptance/unconsumed_input_spec.rb +21 -0
  246. data/spec/benchmark/comparative/runner_spec.rb +105 -0
  247. data/spec/integration/array_pooling_spec.rb +193 -0
  248. data/spec/integration/buffer_allocation_spec.rb +324 -0
  249. data/spec/integration/position_pooling_spec.rb +184 -0
  250. data/spec/integration/result_builder_spec.rb +282 -0
  251. data/spec/integration/rope_stringview_integration_spec.rb +188 -0
  252. data/spec/integration/slice_pooling_spec.rb +63 -0
  253. data/spec/integration/string_view_integration_spec.rb +125 -0
  254. data/spec/lexer_spec.rb +231 -0
  255. data/spec/parsanol/atom_results_spec.rb +39 -0
  256. data/spec/parsanol/atoms/alternative_spec.rb +26 -0
  257. data/spec/parsanol/atoms/base_spec.rb +127 -0
  258. data/spec/parsanol/atoms/capture_spec.rb +21 -0
  259. data/spec/parsanol/atoms/combinations_spec.rb +5 -0
  260. data/spec/parsanol/atoms/custom_spec.rb +79 -0
  261. data/spec/parsanol/atoms/dsl_spec.rb +7 -0
  262. data/spec/parsanol/atoms/entity_spec.rb +77 -0
  263. data/spec/parsanol/atoms/ignored_spec.rb +15 -0
  264. data/spec/parsanol/atoms/infix_spec.rb +5 -0
  265. data/spec/parsanol/atoms/lookahead_spec.rb +22 -0
  266. data/spec/parsanol/atoms/named_spec.rb +4 -0
  267. data/spec/parsanol/atoms/re_spec.rb +14 -0
  268. data/spec/parsanol/atoms/repetition_spec.rb +24 -0
  269. data/spec/parsanol/atoms/scope_spec.rb +26 -0
  270. data/spec/parsanol/atoms/sequence_spec.rb +28 -0
  271. data/spec/parsanol/atoms/str_spec.rb +15 -0
  272. data/spec/parsanol/atoms/visitor_spec.rb +101 -0
  273. data/spec/parsanol/atoms_spec.rb +488 -0
  274. data/spec/parsanol/auto_optimize_spec.rb +334 -0
  275. data/spec/parsanol/buffer_spec.rb +219 -0
  276. data/spec/parsanol/builder_callbacks_spec.rb +377 -0
  277. data/spec/parsanol/choice_optimizer_spec.rb +231 -0
  278. data/spec/parsanol/convenience_spec.rb +54 -0
  279. data/spec/parsanol/cut_inserter_spec.rb +248 -0
  280. data/spec/parsanol/cut_spec.rb +66 -0
  281. data/spec/parsanol/edit_tracker_spec.rb +218 -0
  282. data/spec/parsanol/error_reporter/contextual_spec.rb +122 -0
  283. data/spec/parsanol/error_reporter/deepest_spec.rb +82 -0
  284. data/spec/parsanol/error_reporter/tree_spec.rb +7 -0
  285. data/spec/parsanol/export_spec.rb +67 -0
  286. data/spec/parsanol/expression/treetop_spec.rb +75 -0
  287. data/spec/parsanol/first_set_spec.rb +298 -0
  288. data/spec/parsanol/interval_tree_spec.rb +205 -0
  289. data/spec/parsanol/lazy_result_spec.rb +288 -0
  290. data/spec/parsanol/lookahead_optimizer_spec.rb +252 -0
  291. data/spec/parsanol/minilisp.citrus +29 -0
  292. data/spec/parsanol/minilisp.tt +29 -0
  293. data/spec/parsanol/optimizer_spec.rb +459 -0
  294. data/spec/parsanol/options/parslet_compat_spec.rb +166 -0
  295. data/spec/parsanol/options/ruby_transform_spec.rb +70 -0
  296. data/spec/parsanol/options/serialized_spec.rb +69 -0
  297. data/spec/parsanol/options/zero_copy_spec.rb +230 -0
  298. data/spec/parsanol/parser_spec.rb +36 -0
  299. data/spec/parsanol/parslet_spec.rb +38 -0
  300. data/spec/parsanol/pattern_spec.rb +272 -0
  301. data/spec/parsanol/pool_spec.rb +392 -0
  302. data/spec/parsanol/pools/array_pool_spec.rb +356 -0
  303. data/spec/parsanol/pools/buffer_pool_spec.rb +365 -0
  304. data/spec/parsanol/pools/position_pool_spec.rb +118 -0
  305. data/spec/parsanol/pools/slice_pool_spec.rb +262 -0
  306. data/spec/parsanol/position_spec.rb +14 -0
  307. data/spec/parsanol/result_builder_spec.rb +391 -0
  308. data/spec/parsanol/rig/rspec_spec.rb +54 -0
  309. data/spec/parsanol/rope_spec.rb +207 -0
  310. data/spec/parsanol/scope_spec.rb +45 -0
  311. data/spec/parsanol/slice_spec.rb +249 -0
  312. data/spec/parsanol/source/line_cache_spec.rb +74 -0
  313. data/spec/parsanol/source_spec.rb +207 -0
  314. data/spec/parsanol/string_view_spec.rb +345 -0
  315. data/spec/parsanol/transform/context_spec.rb +56 -0
  316. data/spec/parsanol/transform_spec.rb +183 -0
  317. data/spec/parsanol/tree_memoization_spec.rb +149 -0
  318. data/spec/parslet_compatibility/expressir_edge_cases_spec.rb +153 -0
  319. data/spec/parslet_compatibility/minimal_reproduction.rb +199 -0
  320. data/spec/parslet_compatibility_spec.rb +399 -0
  321. data/spec/parslet_imported/atom_spec.rb +93 -0
  322. data/spec/parslet_imported/combinator_spec.rb +161 -0
  323. data/spec/parslet_imported/spec_helper.rb +73 -0
  324. data/spec/performance/batch_parsing_benchmark.rb +129 -0
  325. data/spec/performance/complete_optimization_summary.rb +143 -0
  326. data/spec/performance/grammar_caching_analysis.rb +121 -0
  327. data/spec/performance/grammar_caching_benchmark.rb +80 -0
  328. data/spec/performance/native_benchmark_spec.rb +230 -0
  329. data/spec/performance/phase5_benchmark.rb +144 -0
  330. data/spec/performance/profiling_benchmark.rb +131 -0
  331. data/spec/performance/ruby_improvements_benchmark.rb +171 -0
  332. data/spec/performance_spec.rb +374 -0
  333. data/spec/spec_helper.rb +79 -0
  334. data/spec/support/opal.rb +8 -0
  335. data/spec/support/opal.rb.erb +14 -0
  336. metadata +485 -0
@@ -0,0 +1,215 @@
1
+ # Interval tree implementation for GPeg-style incremental parsing
2
+ # Based on the GPeg paper: "Fast Incremental PEG Parsing" (Yedidia, SLE 2021)
3
+ #
4
+ # This data structure stores memoization results keyed by position intervals [start, end)
5
+ # rather than single positions, enabling efficient invalidation of changed regions.
6
+ #
7
+ # Performance characteristics:
8
+ # - Insert: O(log n)
9
+ # - Query: O(log n + k) where k is number of overlapping intervals
10
+ # - Delete overlapping: O(log n + k)
11
+ #
12
+ class Parsanol::IntervalTree
13
+ # A node in the interval tree
14
+ # Each node stores an interval [low, high) and associated data
15
+ class Node
16
+ attr_accessor :interval, :data, :max, :left, :right
17
+
18
+ def initialize(low, high, data)
19
+ @interval = [low, high] # [start, end) half-open interval
20
+ @data = data
21
+ @max = high # Maximum endpoint in subtree
22
+ @left = nil
23
+ @right = nil
24
+ end
25
+
26
+ def low
27
+ @interval[0]
28
+ end
29
+
30
+ def high
31
+ @interval[1]
32
+ end
33
+ end
34
+
35
+ def initialize
36
+ @root = nil
37
+ @size = 0
38
+ end
39
+
40
+ attr_reader :size
41
+
42
+ # Insert an interval with associated data
43
+ # @param low [Integer] Start position (inclusive)
44
+ # @param high [Integer] End position (exclusive)
45
+ # @param data [Object] Data to associate with this interval
46
+ def insert(low, high, data)
47
+ @root = insert_recursive(@root, low, high, data)
48
+ @size += 1
49
+ end
50
+
51
+ # Query for all intervals that overlap with [low, high)
52
+ # @param low [Integer] Start position (inclusive)
53
+ # @param high [Integer] End position (exclusive)
54
+ # @return [Array<Object>] Array of data from overlapping intervals
55
+ def query_overlapping(low, high)
56
+ # Empty intervals cannot overlap with anything
57
+ return [] if low >= high
58
+
59
+ results = []
60
+ query_recursive(@root, low, high, results)
61
+ results
62
+ end
63
+
64
+ # Query for exact interval match
65
+ # @param low [Integer] Start position (inclusive)
66
+ # @param high [Integer] End position (exclusive)
67
+ # @return [Object, nil] Data if exact match found, nil otherwise
68
+ def query_exact(low, high)
69
+ find_exact(@root, low, high)
70
+ end
71
+
72
+ # Delete all intervals that overlap with [low, high)
73
+ # Returns array of deleted data
74
+ # @param low [Integer] Start position (inclusive)
75
+ # @param high [Integer] End position (exclusive)
76
+ # @return [Array<Object>] Array of data from deleted intervals
77
+ def delete_overlapping(low, high)
78
+ deleted = []
79
+ @root = delete_overlapping_recursive(@root, low, high, deleted)
80
+ @size -= deleted.size
81
+ deleted
82
+ end
83
+
84
+ # Clear all intervals
85
+ def clear
86
+ @root = nil
87
+ @size = 0
88
+ end
89
+
90
+ # Check if tree is empty
91
+ def empty?
92
+ @root.nil?
93
+ end
94
+
95
+ private
96
+
97
+ # Insert node recursively maintaining BST property on interval start
98
+ def insert_recursive(node, low, high, data)
99
+ return Node.new(low, high, data) if node.nil?
100
+
101
+ # BST insertion based on interval start position
102
+ if low < node.low
103
+ node.left = insert_recursive(node.left, low, high, data)
104
+ else
105
+ node.right = insert_recursive(node.right, low, high, data)
106
+ end
107
+
108
+ # Update max endpoint in this subtree
109
+ node.max = [node.max, high].max
110
+ node.max = [node.max, node.left.max].max if node.left
111
+ node.max = [node.max, node.right.max].max if node.right
112
+
113
+ node
114
+ end
115
+
116
+ # Query recursively for overlapping intervals
117
+ def query_recursive(node, low, high, results)
118
+ return if node.nil?
119
+
120
+ # If no interval in this subtree can overlap, prune search
121
+ return if node.max <= low
122
+
123
+ # Check left subtree (may have overlapping intervals)
124
+ query_recursive(node.left, low, high, results) if node.left
125
+
126
+ # Check current node for overlap
127
+ # Two intervals [a,b) and [c,d) overlap if: a < d AND c < b
128
+ if node.low < high && low < node.high
129
+ results << node.data
130
+ end
131
+
132
+ # Check right subtree
133
+ # Only search right if intervals starting there could overlap
134
+ query_recursive(node.right, low, high, results) if node.right && node.low < high
135
+ end
136
+
137
+ # Find exact interval match
138
+ def find_exact(node, low, high)
139
+ return nil if node.nil?
140
+
141
+ if node.low == low && node.high == high
142
+ return node.data
143
+ end
144
+
145
+ # Search in appropriate subtree
146
+ if low < node.low
147
+ find_exact(node.left, low, high)
148
+ else
149
+ find_exact(node.right, low, high)
150
+ end
151
+ end
152
+
153
+ # Delete overlapping intervals recursively
154
+ def delete_overlapping_recursive(node, low, high, deleted)
155
+ return nil if node.nil?
156
+
157
+ # Recursively delete from left subtree
158
+ node.left = delete_overlapping_recursive(node.left, low, high, deleted) if node.left
159
+
160
+ # Recursively delete from right subtree
161
+ node.right = delete_overlapping_recursive(node.right, low, high, deleted) if node.right
162
+
163
+ # Check if current node overlaps
164
+ if node.low < high && low < node.high
165
+ # This node overlaps - delete it
166
+ deleted << node.data
167
+
168
+ # Remove this node and reinsert children
169
+ if node.left.nil?
170
+ return node.right
171
+ elsif node.right.nil?
172
+ return node.left
173
+ else
174
+ # Node has two children - replace with inorder successor
175
+ # Find minimum node in right subtree
176
+ min_node = find_min(node.right)
177
+
178
+ # Replace current node's interval and data with successor's
179
+ node.interval = min_node.interval
180
+ node.data = min_node.data
181
+
182
+ # Delete the successor from right subtree
183
+ node.right = delete_min(node.right)
184
+ end
185
+ end
186
+
187
+ # Update max for this node after potential deletions
188
+ if node
189
+ node.max = node.high
190
+ node.max = [node.max, node.left.max].max if node.left
191
+ node.max = [node.max, node.right.max].max if node.right
192
+ end
193
+
194
+ node
195
+ end
196
+
197
+ # Find minimum node in subtree (leftmost)
198
+ def find_min(node)
199
+ return node if node.left.nil?
200
+ find_min(node.left)
201
+ end
202
+
203
+ # Delete minimum node from subtree
204
+ def delete_min(node)
205
+ return node.right if node.left.nil?
206
+ node.left = delete_min(node.left)
207
+
208
+ # Update max
209
+ node.max = node.high
210
+ node.max = [node.max, node.left.max].max if node.left
211
+ node.max = [node.max, node.right.max].max if node.right
212
+
213
+ node
214
+ end
215
+ end
@@ -0,0 +1,178 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Parsanol
4
+ # Lazy wrapper around Buffer that defers array materialization.
5
+ #
6
+ # LazyResult wraps a Buffer and only creates an Array when the result
7
+ # is actually accessed. This reduces allocations for results that are
8
+ # never used (cache hits, backtracking, etc.).
9
+ #
10
+ # == Usage
11
+ #
12
+ # lazy = LazyResult.new(buffer, context)
13
+ # # No array allocated yet
14
+ #
15
+ # lazy.to_a # Now array is materialized and cached
16
+ # lazy.to_a # Returns cached array
17
+ #
18
+ # == Transparency
19
+ #
20
+ # LazyResult acts like an Array for most operations:
21
+ # - Enumerable methods work (each, map, select, etc.)
22
+ # - Array access works ([], size, empty?, etc.)
23
+ # - Can be used in transforms without changes
24
+ #
25
+ class LazyResult
26
+ # @return [Buffer] The underlying buffer
27
+ attr_reader :buffer
28
+
29
+ # @return [Context] The context (for buffer release)
30
+ attr_reader :context
31
+
32
+ # @return [Array, nil] Cached materialized array
33
+ attr_reader :materialized
34
+
35
+ # Initialize a new LazyResult.
36
+ #
37
+ # @param buffer [Buffer] Buffer containing elements
38
+ # @param context [Context] Context for buffer management
39
+ #
40
+ def initialize(buffer, context)
41
+ @buffer = buffer
42
+ @context = context
43
+ @materialized = nil
44
+ end
45
+
46
+ # Materialize to array (with caching).
47
+ #
48
+ # First call creates array from buffer, subsequent calls return cached.
49
+ #
50
+ # @return [Array] Materialized array
51
+ #
52
+ def to_a
53
+ @materialized ||= @buffer.to_a
54
+ end
55
+
56
+ # Get element at index (materializes if needed).
57
+ #
58
+ # @param index [Integer] Zero-based index
59
+ # @return [Object] Element at index
60
+ #
61
+ def [](index)
62
+ to_a[index]
63
+ end
64
+
65
+ # Get number of elements.
66
+ #
67
+ # @return [Integer] Number of elements
68
+ #
69
+ def size
70
+ @buffer.size
71
+ end
72
+
73
+ alias length size
74
+
75
+ # Check if empty.
76
+ #
77
+ # @return [Boolean] true if no elements
78
+ #
79
+ def empty?
80
+ @buffer.empty?
81
+ end
82
+
83
+ # Iterate over elements (materializes if needed).
84
+ #
85
+ # @yield [element] Each element
86
+ # @return [Enumerator, self] Enumerator if no block, self otherwise
87
+ #
88
+ def each(&block)
89
+ return to_enum(:each) unless block_given?
90
+ to_a.each(&block)
91
+ self
92
+ end
93
+
94
+ # Check if acts like an array.
95
+ #
96
+ # @param other [Class] Class to check against
97
+ # @return [Boolean] true if Array
98
+ #
99
+ def is_a?(other)
100
+ other == Array || super
101
+ end
102
+
103
+ alias kind_of? is_a?
104
+
105
+ # Respond to array methods.
106
+ #
107
+ # @param method [Symbol] Method name
108
+ # @param include_private [Boolean] Include private methods
109
+ # @return [Boolean] true if responds
110
+ #
111
+ def respond_to?(method, include_private = false)
112
+ super || to_a.respond_to?(method, include_private)
113
+ end
114
+
115
+ # Delegate unknown methods to materialized array.
116
+ #
117
+ # @param method [Symbol] Method name
118
+ # @param args [Array] Arguments
119
+ # @param block [Proc] Block if given
120
+ # @return [Object] Result of method call
121
+ #
122
+ def method_missing(method, *args, &block)
123
+ if to_a.respond_to?(method)
124
+ to_a.public_send(method, *args, &block)
125
+ else
126
+ super
127
+ end
128
+ end
129
+
130
+ # Support respond_to_missing? for proper method_missing implementation.
131
+ #
132
+ # @param method [Symbol] Method name
133
+ # @param include_private [Boolean] Include private methods
134
+ # @return [Boolean] true if method is supported
135
+ #
136
+ def respond_to_missing?(method, include_private = false)
137
+ to_a.respond_to?(method, include_private) || super
138
+ end
139
+
140
+ # Compare with another object.
141
+ # LazyResult compares equal to arrays with the same content.
142
+ #
143
+ # @param other [Object] Object to compare with
144
+ # @return [Boolean] true if equal
145
+ #
146
+ def ==(other)
147
+ if other.is_a?(Array)
148
+ to_a == other
149
+ elsif other.is_a?(LazyResult)
150
+ to_a == other.to_a
151
+ else
152
+ super
153
+ end
154
+ end
155
+
156
+ alias eql? ==
157
+
158
+ # Hash code based on materialized array.
159
+ #
160
+ # @return [Integer] Hash code
161
+ #
162
+ def hash
163
+ to_a.hash
164
+ end
165
+
166
+ # Inspect for debugging.
167
+ #
168
+ # @return [String] Inspection string
169
+ #
170
+ def inspect
171
+ if @materialized
172
+ "#<LazyResult:#{object_id} materialized=#{@materialized.inspect}>"
173
+ else
174
+ "#<LazyResult:#{object_id} buffer.size=#{@buffer.size}>"
175
+ end
176
+ end
177
+ end
178
+ end
@@ -0,0 +1,146 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "parsanol/native"
4
+
5
+ module Parsanol
6
+ # Generic lexer for fast tokenization
7
+ #
8
+ # Create a lexer by subclassing and defining tokens:
9
+ #
10
+ # class JsonLexer < Parsanol::Lexer
11
+ # token :string, /"[^"]*"/
12
+ # token :number, /-?[0-9]+(\.[0-9]+)?/
13
+ # token :true, /true/
14
+ # token :false, /false/
15
+ # token :null, /null/
16
+ # token :lbrace, /\{/
17
+ # token :rbrace, /\}/
18
+ # token :lbracket, /\[/
19
+ # token :rbracket, /\]/
20
+ # token :colon, /:/
21
+ # token :comma, /,/
22
+ #
23
+ # ignore /\s+/
24
+ # end
25
+ #
26
+ # lexer = JsonLexer.new
27
+ # tokens = lexer.tokenize('{"name": "test"}')
28
+ #
29
+ class Lexer
30
+ class << self
31
+ # Define a token pattern
32
+ #
33
+ # @param name [Symbol] Token type name
34
+ # @param pattern [Regexp] Pattern to match
35
+ # @param priority [Integer] Priority for conflict resolution (higher = preferred)
36
+ # @param block [Proc] Optional block to transform the matched value
37
+ def token(name, pattern, priority: 0, &block)
38
+ token_definitions << Definition.new(
39
+ name: name.to_s,
40
+ pattern: pattern.source,
41
+ priority: priority,
42
+ ignore: false,
43
+ transform: block
44
+ )
45
+ end
46
+
47
+ # Define patterns to ignore (e.g., whitespace, comments)
48
+ #
49
+ # @param pattern [Regexp] Pattern to ignore
50
+ def ignore(pattern)
51
+ token_definitions << Definition.new(
52
+ name: "__ignore__",
53
+ pattern: pattern.source,
54
+ priority: 0,
55
+ ignore: true,
56
+ transform: nil
57
+ )
58
+ end
59
+
60
+ # Define keywords (identifiers with higher priority)
61
+ #
62
+ # @param keywords [Array<Symbol>] Keyword names
63
+ # @param priority [Integer] Priority (default: 100)
64
+ def keyword(*keywords, priority: 100)
65
+ keywords.each do |kw|
66
+ token_definitions << Definition.new(
67
+ name: kw.to_s.upcase,
68
+ pattern: Regexp.new(Regexp.escape(kw.to_s), Regexp::IGNORECASE).source,
69
+ priority: priority,
70
+ ignore: false,
71
+ transform: nil
72
+ )
73
+ end
74
+ end
75
+
76
+ # Get token definitions for this lexer class
77
+ #
78
+ # @return [Array<Definition>] Token definitions
79
+ def token_definitions
80
+ @token_definitions ||= []
81
+ end
82
+
83
+ # Inherit token definitions from parent class
84
+ def inherited(subclass)
85
+ super
86
+ subclass.instance_variable_set(:@token_definitions, token_definitions.dup)
87
+ end
88
+ end
89
+
90
+ # Token definition
91
+ Definition = Struct.new(:name, :pattern, :priority, :ignore, :transform)
92
+
93
+ # Initialize the lexer
94
+ def initialize
95
+ @lexer_id = nil
96
+ @transforms = build_transforms
97
+ end
98
+
99
+ # Tokenize input string
100
+ #
101
+ # @param input [String] Input to tokenize
102
+ # @return [Array<Hash>] Array of tokens with type, value, and location
103
+ def tokenize(input)
104
+ ensure_lexer_created
105
+
106
+ tokens = Native.tokenize_with_lexer(@lexer_id, input)
107
+
108
+ # Apply any transforms
109
+ tokens.map do |token|
110
+ transform = @transforms[token["type"]]
111
+ if transform
112
+ token = token.dup
113
+ token["value"] = transform.call(token["value"])
114
+ end
115
+ token
116
+ end
117
+ end
118
+
119
+ private
120
+
121
+ def ensure_lexer_created
122
+ return if @lexer_id
123
+
124
+ definitions = self.class.token_definitions.map do |d|
125
+ {
126
+ "name" => d.name,
127
+ "pattern" => d.pattern,
128
+ "priority" => d.priority,
129
+ "ignore" => d.ignore
130
+ }
131
+ end
132
+
133
+ @lexer_id = Native.create_lexer(definitions)
134
+ end
135
+
136
+ def build_transforms
137
+ transforms = {}
138
+ self.class.token_definitions.each do |d|
139
+ if d.transform && !d.ignore
140
+ transforms[d.name] = d.transform
141
+ end
142
+ end
143
+ transforms
144
+ end
145
+ end
146
+ end