parsanol 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of parsanol might be problematic. Click here for more details.

Files changed (336) hide show
  1. checksums.yaml +7 -0
  2. data/HISTORY.txt +25 -0
  3. data/LICENSE +23 -0
  4. data/README.adoc +643 -0
  5. data/Rakefile +189 -0
  6. data/example/balanced-parens/basic.rb +42 -0
  7. data/example/balanced-parens/basic.rb.md +86 -0
  8. data/example/balanced-parens/parens.rb +42 -0
  9. data/example/balanced-parens/ruby_transform.rb +162 -0
  10. data/example/big.erb +73 -0
  11. data/example/boolean-algebra/basic.rb +70 -0
  12. data/example/boolean-algebra/basic.rb.md +108 -0
  13. data/example/boolean-algebra/ruby_transform.rb +263 -0
  14. data/example/calculator/basic.rb +153 -0
  15. data/example/calculator/basic.rb.md +120 -0
  16. data/example/calculator/pattern.rb +153 -0
  17. data/example/calculator/ruby_transform.rb +156 -0
  18. data/example/calculator/ruby_transform.rb.md +32 -0
  19. data/example/calculator/serialized.rb +257 -0
  20. data/example/calculator/serialized.rb.md +32 -0
  21. data/example/calculator/transform.rb +153 -0
  22. data/example/calculator/zero_copy.rb +269 -0
  23. data/example/calculator/zero_copy.rb.md +36 -0
  24. data/example/capture/basic.rb +49 -0
  25. data/example/capture/basic.rb.md +106 -0
  26. data/example/capture/example.json +39 -0
  27. data/example/comments/basic.rb +35 -0
  28. data/example/comments/basic.rb.md +110 -0
  29. data/example/csv/ruby_transform.rb +148 -0
  30. data/example/csv/ruby_transform.rb.md +131 -0
  31. data/example/csv/serialized.rb +201 -0
  32. data/example/csv/serialized.rb.md +31 -0
  33. data/example/csv/zero_copy.rb +276 -0
  34. data/example/csv/zero_copy.rb.md +36 -0
  35. data/example/custom_atoms/indent_atom.rb +79 -0
  36. data/example/deepest-errors/basic.rb +131 -0
  37. data/example/deepest-errors/basic.rb.md +152 -0
  38. data/example/documentation/basic.rb +18 -0
  39. data/example/documentation/basic.rb.md +97 -0
  40. data/example/email/basic.rb +55 -0
  41. data/example/email/basic.rb.md +102 -0
  42. data/example/email/ruby_transform.rb +106 -0
  43. data/example/empty/basic.rb +13 -0
  44. data/example/empty/basic.rb.md +73 -0
  45. data/example/empty/example.json +38 -0
  46. data/example/erb/basic.rb +47 -0
  47. data/example/erb/basic.rb.md +103 -0
  48. data/example/erb/optimized.rb +42 -0
  49. data/example/error-reporting/basic.rb +132 -0
  50. data/example/error-reporting/basic.rb.md +122 -0
  51. data/example/expression-evaluator/basic.rb +284 -0
  52. data/example/expression-evaluator/basic.rb.md +138 -0
  53. data/example/ini/basic.rb +154 -0
  54. data/example/ini/basic.rb.md +129 -0
  55. data/example/ini/ruby_transform.rb +154 -0
  56. data/example/ip-address/basic.rb +125 -0
  57. data/example/ip-address/basic.rb.md +139 -0
  58. data/example/iso-6709/basic.rb +231 -0
  59. data/example/iso-6709/basic.rb.md +143 -0
  60. data/example/iso-8601/basic.rb +275 -0
  61. data/example/iso-8601/basic.rb.md +149 -0
  62. data/example/json/basic.rb +128 -0
  63. data/example/json/basic.rb.md +121 -0
  64. data/example/json/pattern.rb +128 -0
  65. data/example/json/ruby_transform.rb +200 -0
  66. data/example/json/ruby_transform.rb.md +32 -0
  67. data/example/json/serialized.rb +233 -0
  68. data/example/json/serialized.rb.md +31 -0
  69. data/example/json/transform.rb +128 -0
  70. data/example/json/zero_copy.rb +316 -0
  71. data/example/json/zero_copy.rb.md +36 -0
  72. data/example/local/basic.rb +34 -0
  73. data/example/local/basic.rb.md +91 -0
  74. data/example/local/example.json +38 -0
  75. data/example/markdown/basic.rb +287 -0
  76. data/example/markdown/basic.rb.md +160 -0
  77. data/example/markup/basic.rb +173 -0
  78. data/example/markup/basic.rb.md +118 -0
  79. data/example/mathn/basic.rb +47 -0
  80. data/example/mathn/basic.rb.md +96 -0
  81. data/example/mathn/example.json +39 -0
  82. data/example/minilisp/basic.rb +94 -0
  83. data/example/minilisp/basic.rb.md +133 -0
  84. data/example/modularity/basic.rb +47 -0
  85. data/example/modularity/basic.rb.md +152 -0
  86. data/example/nested-errors/basic.rb +132 -0
  87. data/example/nested-errors/basic.rb.md +157 -0
  88. data/example/output/boolean_algebra.out +4 -0
  89. data/example/output/calc.out +1 -0
  90. data/example/output/capture.out +3 -0
  91. data/example/output/comments.out +8 -0
  92. data/example/output/deepest_errors.out +54 -0
  93. data/example/output/documentation.err +4 -0
  94. data/example/output/documentation.out +1 -0
  95. data/example/output/email_parser.out +2 -0
  96. data/example/output/empty.err +1 -0
  97. data/example/output/erb.out +7 -0
  98. data/example/output/ignore.out +1 -0
  99. data/example/output/ignore_whitespace.out +1 -0
  100. data/example/output/ip_address.out +9 -0
  101. data/example/output/json.out +5 -0
  102. data/example/output/local.out +3 -0
  103. data/example/output/mathn.out +4 -0
  104. data/example/output/minilisp.out +5 -0
  105. data/example/output/modularity.out +0 -0
  106. data/example/output/nested_errors.out +54 -0
  107. data/example/output/optimized_erb.out +1 -0
  108. data/example/output/parens.out +8 -0
  109. data/example/output/prec_calc.out +5 -0
  110. data/example/output/readme.out +1 -0
  111. data/example/output/scopes.out +1 -0
  112. data/example/output/seasons.out +28 -0
  113. data/example/output/sentence.out +1 -0
  114. data/example/output/simple_xml.out +2 -0
  115. data/example/output/string_parser.out +3 -0
  116. data/example/prec-calc/basic.rb +71 -0
  117. data/example/prec-calc/basic.rb.md +114 -0
  118. data/example/readme/basic.rb +30 -0
  119. data/example/readme/basic.rb.md +80 -0
  120. data/example/scopes/basic.rb +15 -0
  121. data/example/scopes/basic.rb.md +73 -0
  122. data/example/scopes/example.json +38 -0
  123. data/example/seasons/basic.rb +46 -0
  124. data/example/seasons/basic.rb.md +117 -0
  125. data/example/seasons/example.json +40 -0
  126. data/example/sentence/basic.rb +36 -0
  127. data/example/sentence/basic.rb.md +81 -0
  128. data/example/sexp/ruby_transform.rb +180 -0
  129. data/example/sexp/ruby_transform.rb.md +143 -0
  130. data/example/simple-xml/basic.rb +54 -0
  131. data/example/simple-xml/basic.rb.md +125 -0
  132. data/example/simple.lit +3 -0
  133. data/example/string-literal/basic.rb +77 -0
  134. data/example/string-literal/basic.rb.md +128 -0
  135. data/example/test.lit +4 -0
  136. data/example/toml/basic.rb +226 -0
  137. data/example/toml/basic.rb.md +173 -0
  138. data/example/url/basic.rb +219 -0
  139. data/example/url/basic.rb.md +142 -0
  140. data/example/url/ruby_transform.rb +219 -0
  141. data/example/yaml/basic.rb +216 -0
  142. data/example/yaml/basic.rb.md +148 -0
  143. data/ext/parsanol_native/extconf.rb +4 -0
  144. data/lib/parsanol/accelerator/application.rb +62 -0
  145. data/lib/parsanol/accelerator/engine.rb +112 -0
  146. data/lib/parsanol/accelerator.rb +162 -0
  147. data/lib/parsanol/ast_visitor.rb +122 -0
  148. data/lib/parsanol/atoms/alternative.rb +97 -0
  149. data/lib/parsanol/atoms/base.rb +214 -0
  150. data/lib/parsanol/atoms/can_flatten.rb +192 -0
  151. data/lib/parsanol/atoms/capture.rb +41 -0
  152. data/lib/parsanol/atoms/context.rb +351 -0
  153. data/lib/parsanol/atoms/context_optimized.rb +42 -0
  154. data/lib/parsanol/atoms/custom.rb +110 -0
  155. data/lib/parsanol/atoms/cut.rb +62 -0
  156. data/lib/parsanol/atoms/dsl.rb +130 -0
  157. data/lib/parsanol/atoms/dynamic.rb +33 -0
  158. data/lib/parsanol/atoms/entity.rb +55 -0
  159. data/lib/parsanol/atoms/ignored.rb +28 -0
  160. data/lib/parsanol/atoms/infix.rb +121 -0
  161. data/lib/parsanol/atoms/lookahead.rb +64 -0
  162. data/lib/parsanol/atoms/named.rb +50 -0
  163. data/lib/parsanol/atoms/re.rb +61 -0
  164. data/lib/parsanol/atoms/repetition.rb +241 -0
  165. data/lib/parsanol/atoms/scope.rb +28 -0
  166. data/lib/parsanol/atoms/sequence.rb +157 -0
  167. data/lib/parsanol/atoms/str.rb +90 -0
  168. data/lib/parsanol/atoms/visitor.rb +91 -0
  169. data/lib/parsanol/atoms.rb +36 -0
  170. data/lib/parsanol/buffer.rb +130 -0
  171. data/lib/parsanol/builder_callbacks.rb +353 -0
  172. data/lib/parsanol/cause.rb +101 -0
  173. data/lib/parsanol/context.rb +23 -0
  174. data/lib/parsanol/convenience.rb +35 -0
  175. data/lib/parsanol/edit_tracker.rb +107 -0
  176. data/lib/parsanol/error_reporter/contextual.rb +122 -0
  177. data/lib/parsanol/error_reporter/deepest.rb +106 -0
  178. data/lib/parsanol/error_reporter/tree.rb +68 -0
  179. data/lib/parsanol/error_reporter.rb +98 -0
  180. data/lib/parsanol/export.rb +163 -0
  181. data/lib/parsanol/expression/treetop.rb +94 -0
  182. data/lib/parsanol/expression.rb +51 -0
  183. data/lib/parsanol/fast_mode.rb +145 -0
  184. data/lib/parsanol/first_set.rb +75 -0
  185. data/lib/parsanol/grammar_builder.rb +177 -0
  186. data/lib/parsanol/graphviz.rb +97 -0
  187. data/lib/parsanol/incremental_parser.rb +179 -0
  188. data/lib/parsanol/interval_tree.rb +215 -0
  189. data/lib/parsanol/lazy_result.rb +178 -0
  190. data/lib/parsanol/lexer.rb +146 -0
  191. data/lib/parsanol/native/parser.rb +630 -0
  192. data/lib/parsanol/native/serializer.rb +245 -0
  193. data/lib/parsanol/native/transformer.rb +438 -0
  194. data/lib/parsanol/native/types.rb +41 -0
  195. data/lib/parsanol/native.rb +217 -0
  196. data/lib/parsanol/optimizer.rb +86 -0
  197. data/lib/parsanol/optimizers/choice_optimizer.rb +78 -0
  198. data/lib/parsanol/optimizers/cut_inserter.rb +175 -0
  199. data/lib/parsanol/optimizers/lookahead_optimizer.rb +58 -0
  200. data/lib/parsanol/optimizers/quantifier_optimizer.rb +62 -0
  201. data/lib/parsanol/optimizers/sequence_optimizer.rb +97 -0
  202. data/lib/parsanol/options/ruby_transform.rb +109 -0
  203. data/lib/parsanol/options/serialized.rb +94 -0
  204. data/lib/parsanol/options/zero_copy.rb +130 -0
  205. data/lib/parsanol/options.rb +20 -0
  206. data/lib/parsanol/parallel.rb +133 -0
  207. data/lib/parsanol/parsanol_native.bundle +0 -0
  208. data/lib/parsanol/parser.rb +151 -0
  209. data/lib/parsanol/parslet.rb +148 -0
  210. data/lib/parsanol/parslet_native.bundle +0 -0
  211. data/lib/parsanol/pattern/binding.rb +49 -0
  212. data/lib/parsanol/pattern.rb +115 -0
  213. data/lib/parsanol/pool.rb +220 -0
  214. data/lib/parsanol/pools/array_pool.rb +75 -0
  215. data/lib/parsanol/pools/buffer_pool.rb +173 -0
  216. data/lib/parsanol/pools/position_pool.rb +92 -0
  217. data/lib/parsanol/pools/slice_pool.rb +64 -0
  218. data/lib/parsanol/position.rb +89 -0
  219. data/lib/parsanol/result.rb +44 -0
  220. data/lib/parsanol/result_builder.rb +208 -0
  221. data/lib/parsanol/result_stream.rb +262 -0
  222. data/lib/parsanol/rig/rspec.rb +52 -0
  223. data/lib/parsanol/rope.rb +78 -0
  224. data/lib/parsanol/scope.rb +42 -0
  225. data/lib/parsanol/slice.rb +172 -0
  226. data/lib/parsanol/source/line_cache.rb +99 -0
  227. data/lib/parsanol/source.rb +171 -0
  228. data/lib/parsanol/source_location.rb +164 -0
  229. data/lib/parsanol/streaming_parser.rb +124 -0
  230. data/lib/parsanol/string_view.rb +192 -0
  231. data/lib/parsanol/transform.rb +267 -0
  232. data/lib/parsanol/version.rb +5 -0
  233. data/lib/parsanol/wasm/README.md +80 -0
  234. data/lib/parsanol/wasm/package.json +51 -0
  235. data/lib/parsanol/wasm/parsanol.js +252 -0
  236. data/lib/parsanol/wasm/parslet.d.ts +129 -0
  237. data/lib/parsanol/wasm_parser.rb +239 -0
  238. data/lib/parsanol.rb +408 -0
  239. data/parsanol-ruby.gemspec +56 -0
  240. data/spec/acceptance/examples_spec.rb +96 -0
  241. data/spec/acceptance/infix_parser_spec.rb +145 -0
  242. data/spec/acceptance/mixing_parsers_spec.rb +74 -0
  243. data/spec/acceptance/regression_spec.rb +329 -0
  244. data/spec/acceptance/repetition_and_maybe_spec.rb +44 -0
  245. data/spec/acceptance/unconsumed_input_spec.rb +21 -0
  246. data/spec/benchmark/comparative/runner_spec.rb +105 -0
  247. data/spec/integration/array_pooling_spec.rb +193 -0
  248. data/spec/integration/buffer_allocation_spec.rb +324 -0
  249. data/spec/integration/position_pooling_spec.rb +184 -0
  250. data/spec/integration/result_builder_spec.rb +282 -0
  251. data/spec/integration/rope_stringview_integration_spec.rb +188 -0
  252. data/spec/integration/slice_pooling_spec.rb +63 -0
  253. data/spec/integration/string_view_integration_spec.rb +125 -0
  254. data/spec/lexer_spec.rb +231 -0
  255. data/spec/parsanol/atom_results_spec.rb +39 -0
  256. data/spec/parsanol/atoms/alternative_spec.rb +26 -0
  257. data/spec/parsanol/atoms/base_spec.rb +127 -0
  258. data/spec/parsanol/atoms/capture_spec.rb +21 -0
  259. data/spec/parsanol/atoms/combinations_spec.rb +5 -0
  260. data/spec/parsanol/atoms/custom_spec.rb +79 -0
  261. data/spec/parsanol/atoms/dsl_spec.rb +7 -0
  262. data/spec/parsanol/atoms/entity_spec.rb +77 -0
  263. data/spec/parsanol/atoms/ignored_spec.rb +15 -0
  264. data/spec/parsanol/atoms/infix_spec.rb +5 -0
  265. data/spec/parsanol/atoms/lookahead_spec.rb +22 -0
  266. data/spec/parsanol/atoms/named_spec.rb +4 -0
  267. data/spec/parsanol/atoms/re_spec.rb +14 -0
  268. data/spec/parsanol/atoms/repetition_spec.rb +24 -0
  269. data/spec/parsanol/atoms/scope_spec.rb +26 -0
  270. data/spec/parsanol/atoms/sequence_spec.rb +28 -0
  271. data/spec/parsanol/atoms/str_spec.rb +15 -0
  272. data/spec/parsanol/atoms/visitor_spec.rb +101 -0
  273. data/spec/parsanol/atoms_spec.rb +488 -0
  274. data/spec/parsanol/auto_optimize_spec.rb +334 -0
  275. data/spec/parsanol/buffer_spec.rb +219 -0
  276. data/spec/parsanol/builder_callbacks_spec.rb +377 -0
  277. data/spec/parsanol/choice_optimizer_spec.rb +231 -0
  278. data/spec/parsanol/convenience_spec.rb +54 -0
  279. data/spec/parsanol/cut_inserter_spec.rb +248 -0
  280. data/spec/parsanol/cut_spec.rb +66 -0
  281. data/spec/parsanol/edit_tracker_spec.rb +218 -0
  282. data/spec/parsanol/error_reporter/contextual_spec.rb +122 -0
  283. data/spec/parsanol/error_reporter/deepest_spec.rb +82 -0
  284. data/spec/parsanol/error_reporter/tree_spec.rb +7 -0
  285. data/spec/parsanol/export_spec.rb +67 -0
  286. data/spec/parsanol/expression/treetop_spec.rb +75 -0
  287. data/spec/parsanol/first_set_spec.rb +298 -0
  288. data/spec/parsanol/interval_tree_spec.rb +205 -0
  289. data/spec/parsanol/lazy_result_spec.rb +288 -0
  290. data/spec/parsanol/lookahead_optimizer_spec.rb +252 -0
  291. data/spec/parsanol/minilisp.citrus +29 -0
  292. data/spec/parsanol/minilisp.tt +29 -0
  293. data/spec/parsanol/optimizer_spec.rb +459 -0
  294. data/spec/parsanol/options/parslet_compat_spec.rb +166 -0
  295. data/spec/parsanol/options/ruby_transform_spec.rb +70 -0
  296. data/spec/parsanol/options/serialized_spec.rb +69 -0
  297. data/spec/parsanol/options/zero_copy_spec.rb +230 -0
  298. data/spec/parsanol/parser_spec.rb +36 -0
  299. data/spec/parsanol/parslet_spec.rb +38 -0
  300. data/spec/parsanol/pattern_spec.rb +272 -0
  301. data/spec/parsanol/pool_spec.rb +392 -0
  302. data/spec/parsanol/pools/array_pool_spec.rb +356 -0
  303. data/spec/parsanol/pools/buffer_pool_spec.rb +365 -0
  304. data/spec/parsanol/pools/position_pool_spec.rb +118 -0
  305. data/spec/parsanol/pools/slice_pool_spec.rb +262 -0
  306. data/spec/parsanol/position_spec.rb +14 -0
  307. data/spec/parsanol/result_builder_spec.rb +391 -0
  308. data/spec/parsanol/rig/rspec_spec.rb +54 -0
  309. data/spec/parsanol/rope_spec.rb +207 -0
  310. data/spec/parsanol/scope_spec.rb +45 -0
  311. data/spec/parsanol/slice_spec.rb +249 -0
  312. data/spec/parsanol/source/line_cache_spec.rb +74 -0
  313. data/spec/parsanol/source_spec.rb +207 -0
  314. data/spec/parsanol/string_view_spec.rb +345 -0
  315. data/spec/parsanol/transform/context_spec.rb +56 -0
  316. data/spec/parsanol/transform_spec.rb +183 -0
  317. data/spec/parsanol/tree_memoization_spec.rb +149 -0
  318. data/spec/parslet_compatibility/expressir_edge_cases_spec.rb +153 -0
  319. data/spec/parslet_compatibility/minimal_reproduction.rb +199 -0
  320. data/spec/parslet_compatibility_spec.rb +399 -0
  321. data/spec/parslet_imported/atom_spec.rb +93 -0
  322. data/spec/parslet_imported/combinator_spec.rb +161 -0
  323. data/spec/parslet_imported/spec_helper.rb +73 -0
  324. data/spec/performance/batch_parsing_benchmark.rb +129 -0
  325. data/spec/performance/complete_optimization_summary.rb +143 -0
  326. data/spec/performance/grammar_caching_analysis.rb +121 -0
  327. data/spec/performance/grammar_caching_benchmark.rb +80 -0
  328. data/spec/performance/native_benchmark_spec.rb +230 -0
  329. data/spec/performance/phase5_benchmark.rb +144 -0
  330. data/spec/performance/profiling_benchmark.rb +131 -0
  331. data/spec/performance/ruby_improvements_benchmark.rb +171 -0
  332. data/spec/performance_spec.rb +374 -0
  333. data/spec/spec_helper.rb +79 -0
  334. data/spec/support/opal.rb +8 -0
  335. data/spec/support/opal.rb.erb +14 -0
  336. metadata +485 -0
@@ -0,0 +1,124 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Parsanol::StreamingParser - Streaming Parser for Large Inputs
4
+ #
5
+ # Parse large inputs in chunks without loading the entire input into memory.
6
+ # Useful for file parsing, network streams, or very large documents.
7
+ #
8
+ # Usage:
9
+ # parser = Parsanol::StreamingParser.new(json_grammar)
10
+ #
11
+ # File.open("large.json") do |f|
12
+ # parser.parse_stream(f) do |partial_result|
13
+ # # Process each complete element as it's parsed
14
+ # process_item(partial_result)
15
+ # end
16
+ # end
17
+ #
18
+ # Requires native extension for full functionality.
19
+
20
+ module Parsanol
21
+ class StreamingParser
22
+ # Default chunk size (4KB)
23
+ DEFAULT_CHUNK_SIZE = 4096
24
+
25
+ # Create a new streaming parser
26
+ #
27
+ # @param grammar [Parsanol::Parser, Parsanol::Atoms::Base] Grammar to use
28
+ # @param chunk_size [Integer] Size of chunks to read (default: 4096)
29
+ def initialize(grammar, chunk_size: DEFAULT_CHUNK_SIZE)
30
+ @grammar = grammar
31
+ @chunk_size = chunk_size
32
+
33
+ if Parsanol::Native.available?
34
+ grammar_json = Parsanol::Native.serialize_grammar(grammar.root)
35
+ @native_parser = Parsanol::Native.streaming_parser_new(grammar_json)
36
+ else
37
+ @native_parser = nil
38
+ end
39
+
40
+ @buffer = String.new
41
+ @position = 0
42
+ end
43
+
44
+ # Add a chunk of input
45
+ #
46
+ # @param chunk [String] Input chunk to add
47
+ # @return [Boolean] True if more chunks needed, false if ready for parsing
48
+ def add_chunk(chunk)
49
+ @buffer << chunk
50
+
51
+ if @native_parser
52
+ Parsanol::Native.streaming_parser_add_chunk(@native_parser, chunk)
53
+ else
54
+ # Pure Ruby fallback
55
+ false
56
+ end
57
+ end
58
+
59
+ # Parse what we have so far
60
+ #
61
+ # @return [Object, nil] Parsed result or nil if need more data
62
+ def parse_chunk
63
+ if @native_parser
64
+ Parsanol::Native.streaming_parser_parse_chunk(@native_parser)
65
+ else
66
+ # Pure Ruby fallback - not supported
67
+ raise NotImplementedError,
68
+ "Streaming parser requires native extension for full functionality."
69
+ end
70
+ end
71
+
72
+ # Check if we have enough data to make progress
73
+ #
74
+ # @return [Boolean] True if parser can make progress
75
+ def enough_data?
76
+ if @native_parser
77
+ !Parsanol::Native.streaming_parser_parse_chunk(@native_parser).nil?
78
+ else
79
+ false
80
+ end
81
+ end
82
+
83
+ # Parse entire stream (yields partial results)
84
+ #
85
+ # @param io [IO, StringIO] Input source to read from
86
+ # @param chunk_size [Integer] Size of chunks to read
87
+ # @yield [Object] Each complete element as it's parsed
88
+ # @return [Array] All parsed results
89
+ def parse_stream(io, chunk_size: @chunk_size)
90
+ results = []
91
+
92
+ loop do
93
+ chunk = io.read(chunk_size)
94
+ break if chunk.nil? || chunk.empty?
95
+
96
+ add_chunk(chunk)
97
+
98
+ while (result = parse_chunk)
99
+ results << result
100
+ yield result if block_given?
101
+ end
102
+ end
103
+
104
+ results
105
+ end
106
+
107
+ # Reset the parser for reuse
108
+ def reset
109
+ @buffer = String.new
110
+ @position = 0
111
+
112
+ if @native_parser
113
+ grammar_json = Parsanol::Native.serialize_grammar(@grammar.root)
114
+ @native_parser = Parsanol::Native.streaming_parser_new(grammar_json)
115
+ end
116
+ end
117
+
118
+ # Get the current buffer
119
+ attr_reader :buffer
120
+
121
+ # Get the chunk size
122
+ attr_reader :chunk_size
123
+ end
124
+ end
@@ -0,0 +1,192 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Parsanol
4
+ # Zero-copy string view that references original input.
5
+ #
6
+ # StringView avoids string copies by maintaining a reference to the
7
+ # original string with offset and length. Strings are only materialized
8
+ # when explicitly requested via #to_s.
9
+ #
10
+ # == Usage
11
+ #
12
+ # view = StringView.new(input_string, offset: 10, length: 5)
13
+ # view.to_s # Materializes string only when needed
14
+ # view[0] # Direct character access without copying
15
+ #
16
+ # == Performance
17
+ #
18
+ # - No string allocation until to_s called
19
+ # - Direct character access without copying
20
+ # - Reduced GC pressure from intermediate strings
21
+ # - Caches materialized strings for reuse
22
+ #
23
+ # == Design Principles
24
+ #
25
+ # 1. Zero-Copy: Reference original string, don't copy
26
+ # 2. Lazy Materialization: Create strings only when to_s called
27
+ # 3. Caching: Cache materialized strings for reuse
28
+ # 4. Compatibility: Acts like String where needed
29
+ # 5. Extensibility: Foundation for Rope (Phase 3.2)
30
+ #
31
+ class StringView
32
+ # @return [String] Original input string
33
+ attr_reader :string
34
+
35
+ # @return [Integer] Byte offset into string
36
+ attr_reader :offset
37
+
38
+ # @return [Integer] Length in bytes
39
+ attr_reader :length
40
+
41
+ # Initialize a new StringView.
42
+ #
43
+ # @param string [String] Original input string
44
+ # @param offset [Integer] Byte offset (default: 0)
45
+ # @param length [Integer] Length in bytes (default: string.bytesize)
46
+ #
47
+ def initialize(string, offset: 0, length: nil)
48
+ @string = string
49
+ @offset = offset
50
+ @length = length || (string.bytesize - offset)
51
+ @materialized = nil
52
+ end
53
+
54
+ # Materialize to string (with caching).
55
+ #
56
+ # First call creates string slice, subsequent calls return cached.
57
+ # This implements lazy evaluation - strings are only created when
58
+ # explicitly needed, not during parsing.
59
+ #
60
+ # @return [String] Materialized string
61
+ #
62
+ def to_s
63
+ @materialized ||= @string.byteslice(@offset, @length)
64
+ end
65
+
66
+ # Get character at index (zero-copy).
67
+ #
68
+ # Direct access to character in original string without creating
69
+ # intermediate string objects.
70
+ #
71
+ # @param index [Integer] Zero-based index
72
+ # @return [String, nil] Character at index or nil
73
+ #
74
+ def [](index)
75
+ return nil if index < 0 || index >= @length
76
+ @string.byteslice(@offset + index, 1)
77
+ end
78
+
79
+ # Get byte size.
80
+ #
81
+ # @return [Integer] Length in bytes
82
+ #
83
+ def bytesize
84
+ @length
85
+ end
86
+
87
+ alias size bytesize
88
+ alias length bytesize
89
+
90
+ # Check if empty.
91
+ #
92
+ # @return [Boolean] true if length is 0
93
+ #
94
+ def empty?
95
+ @length == 0
96
+ end
97
+
98
+ # Compare with another object.
99
+ #
100
+ # StringViews are only equal if they reference the exact same string object
101
+ # (by object_id) and have the same offset/length. This is consistent with
102
+ # the view pattern - they're views of a specific string instance.
103
+ #
104
+ # When comparing with a String, content is compared.
105
+ #
106
+ # @param other [Object] Object to compare with
107
+ # @return [Boolean] true if equal
108
+ #
109
+ def ==(other)
110
+ case other
111
+ when String
112
+ to_s == other
113
+ when StringView
114
+ # Only equal if viewing the exact same string object with same range
115
+ @string.equal?(other.string) &&
116
+ @offset == other.offset &&
117
+ @length == other.length
118
+ else
119
+ super
120
+ end
121
+ end
122
+
123
+ alias eql? ==
124
+
125
+ # Hash code for hashing.
126
+ #
127
+ # Uses object_id of string to avoid materializing the view.
128
+ #
129
+ # @return [Integer] Hash code
130
+ #
131
+ def hash
132
+ [@string.object_id, @offset, @length].hash
133
+ end
134
+
135
+ # Create substring view (zero-copy).
136
+ #
137
+ # Returns a new StringView referencing a substring of this view.
138
+ # No string allocation occurs - just a new view with adjusted offset.
139
+ #
140
+ # @param start [Integer] Start offset (relative to view)
141
+ # @param len [Integer] Length
142
+ # @return [StringView] New view of substring
143
+ #
144
+ def slice(start, len)
145
+ # Handle edge cases
146
+ return self.class.new(@string, offset: @offset, length: 0) if len <= 0 || start >= @length
147
+
148
+ # Clamp start to valid range [0, @length)
149
+ clamped_start = [[start, 0].max, @length].min
150
+
151
+ # Calculate actual offset in original string
152
+ actual_offset = @offset + clamped_start
153
+
154
+ # Calculate actual length (min of requested and available)
155
+ available = @length - clamped_start
156
+ actual_length = [len, available].min
157
+
158
+ self.class.new(@string, offset: actual_offset, length: actual_length)
159
+ end
160
+
161
+ # Inspect for debugging.
162
+ #
163
+ # Shows whether string has been materialized.
164
+ #
165
+ # @return [String] Inspection string
166
+ #
167
+ def inspect
168
+ if @materialized
169
+ "#<StringView:#{object_id} @offset=#{@offset} @length=#{@length} cached=#{@materialized.inspect}>"
170
+ else
171
+ "#<StringView:#{object_id} @offset=#{@offset} @length=#{@length}>"
172
+ end
173
+ end
174
+
175
+ # Reset for pooling (if needed in future phases).
176
+ #
177
+ # Allows StringView objects to be reused from a pool.
178
+ #
179
+ # @param string [String] New string
180
+ # @param offset [Integer] New offset
181
+ # @param length [Integer] New length
182
+ # @return [self]
183
+ #
184
+ def reset!(string, offset, length)
185
+ @string = string
186
+ @offset = offset
187
+ @length = length
188
+ @materialized = nil
189
+ self
190
+ end
191
+ end
192
+ end
@@ -0,0 +1,267 @@
1
+ # frozen_string_literal: true
2
+
3
+
4
+ require 'parsanol/pattern'
5
+
6
+ # Transforms an expression tree into something else. The transformation
7
+ # performs a depth-first, post-order traversal of the expression tree. During
8
+ # that traversal, each time a rule matches a node, the node is replaced by the
9
+ # result of the block associated to the rule. Otherwise the node is accepted
10
+ # as is into the result tree.
11
+ #
12
+ # This is almost what you would generally do with a tree visitor, except that
13
+ # you can match several levels of the tree at once.
14
+ #
15
+ # As a consequence of this, the resulting tree will contain pieces of the
16
+ # original tree and new pieces. Most likely, you will want to transform the
17
+ # original tree wholly, so this isn't a problem.
18
+ #
19
+ # You will not be able to create a loop, given that each node will be replaced
20
+ # only once and then left alone. This means that the results of a replacement
21
+ # will not be acted upon.
22
+ #
23
+ # Example:
24
+ #
25
+ # class Example < Parsanol::Transform
26
+ # rule(:string => simple(:x)) { # (1)
27
+ # StringLiteral.new(x)
28
+ # }
29
+ # end
30
+ #
31
+ # A tree transform (Parsanol::Transform) is defined by a set of rules. Each
32
+ # rule can be defined by calling #rule with the pattern as argument. The block
33
+ # given will be called every time the rule matches somewhere in the tree given
34
+ # to #apply. It is passed a Hash containing all the variable bindings of this
35
+ # pattern match.
36
+ #
37
+ # In the above example, (1) illustrates a simple matching rule.
38
+ #
39
+ # Let's say you want to parse matching parentheses and distill a maximum nest
40
+ # depth. You would probably write a parser like the one in example/parens.rb;
41
+ # here's the relevant part:
42
+ #
43
+ # rule(:balanced) {
44
+ # str('(').as(:l) >> balanced.maybe.as(:m) >> str(')').as(:r)
45
+ # }
46
+ #
47
+ # If you now apply this to a string like '(())', you get a intermediate parse
48
+ # tree that looks like this:
49
+ #
50
+ # {
51
+ # l: '(',
52
+ # m: {
53
+ # l: '(',
54
+ # m: nil,
55
+ # r: ')'
56
+ # },
57
+ # r: ')'
58
+ # }
59
+ #
60
+ # This parse tree is good for debugging, but what we would really like to have
61
+ # is just the nesting depth. This transformation rule will produce that:
62
+ #
63
+ # rule(:l => '(', :m => simple(:x), :r => ')') {
64
+ # # innermost :m will contain nil
65
+ # x.nil? ? 1 : x+1
66
+ # }
67
+ #
68
+ # = Usage patterns
69
+ #
70
+ # There are four ways of using this class. The first one is very much
71
+ # recommended, followed by the second one for generality. The other ones are
72
+ # omitted here.
73
+ #
74
+ # Recommended usage is as follows:
75
+ #
76
+ # class MyTransformator < Parsanol::Transform
77
+ # rule(...) { ... }
78
+ # rule(...) { ... }
79
+ # # ...
80
+ # end
81
+ # MyTransformator.new.apply(tree)
82
+ #
83
+ # Alternatively, you can use the Transform class as follows:
84
+ #
85
+ # transform = Parsanol::Transform.new do
86
+ # rule(...) { ... }
87
+ # end
88
+ # transform.apply(tree)
89
+ #
90
+ # = Execution context
91
+ #
92
+ # The execution context of action blocks differs depending on the arity of
93
+ # said blocks. This can be confusing. It is however somewhat intentional. You
94
+ # should not create fat Transform descendants containing a lot of helper methods,
95
+ # instead keep your AST class construction in global scope or make it available
96
+ # through a factory. The following piece of code illustrates usage of global
97
+ # scope:
98
+ #
99
+ # transform = Parsanol::Transform.new do
100
+ # rule(...) { AstNode.new(a_variable) }
101
+ # rule(...) { Ast.node(a_variable) } # modules are nice
102
+ # end
103
+ # transform.apply(tree)
104
+ #
105
+ # And here's how you would use a class builder (a factory):
106
+ #
107
+ # transform = Parsanol::Transform.new do
108
+ # rule(...) { builder.add_node(a_variable) }
109
+ # rule(...) { |d| d[:builder].add_node(d[:a_variable]) }
110
+ # end
111
+ # transform.apply(tree, :builder => Builder.new)
112
+ #
113
+ # As you can see, Transform allows you to inject local context for your rule
114
+ # action blocks to use.
115
+ #
116
+ class Parsanol::Transform
117
+ # FIXME: Maybe only part of it? Or maybe only include into constructor
118
+ # context?
119
+ include Parsanol
120
+
121
+ class << self
122
+ # FIXME: Only do this for subclasses?
123
+ include Parsanol
124
+
125
+ # Define a rule for the transform subclass.
126
+ #
127
+ def rule(expression, &block)
128
+ @__transform_rules ||= []
129
+ # Prepend new rules so they have higher precedence than older rules
130
+ @__transform_rules.unshift([Parsanol::Pattern.new(expression), block])
131
+ end
132
+
133
+ # Allows accessing the class' rules
134
+ #
135
+ def rules
136
+ @__transform_rules ||= []
137
+ end
138
+
139
+ def inherited(subclass)
140
+ super
141
+ subclass.instance_variable_set(:@__transform_rules, rules.dup)
142
+ end
143
+ end
144
+
145
+ def initialize(raise_on_unmatch=false, &block)
146
+ @raise_on_unmatch = raise_on_unmatch
147
+ @rules = []
148
+
149
+ if block
150
+ instance_eval(&block)
151
+ end
152
+ end
153
+
154
+ # Defines a rule to be applied whenever apply is called on a tree. A rule
155
+ # is composed of two parts:
156
+ #
157
+ # * an *expression pattern*
158
+ # * a *transformation block*
159
+ #
160
+ def rule(expression, &block)
161
+ # Prepend new rules so they have higher precedence than older rules
162
+ @rules.unshift([Parsanol::Pattern.new(expression), block])
163
+ end
164
+
165
+ # Applies the transformation to a tree that is generated by Parsanol::Parser
166
+ # or a simple parslet. Transformation will proceed down the tree, replacing
167
+ # parts/all of it with new objects. The resulting object will be returned.
168
+ #
169
+ # Using the context parameter, you can inject bindings for the transformation.
170
+ # This can be used to allow access to the outside world from transform blocks,
171
+ # like so:
172
+ #
173
+ # document = # some class that you act on
174
+ # transform.apply(tree, document: document)
175
+ #
176
+ # The above will make document available to all your action blocks:
177
+ #
178
+ # # Variant A
179
+ # rule(...) { document.foo(bar) }
180
+ # # Variant B
181
+ # rule(...) { |d| d[:document].foo(d[:bar]) }
182
+ #
183
+ # @param obj PORO ast to transform
184
+ # @param context start context to inject into the bindings.
185
+ #
186
+ def apply(obj, context=nil)
187
+ transform_elt(
188
+ case obj
189
+ when Hash
190
+ recurse_hash(obj, context)
191
+ when Array
192
+ recurse_array(obj, context)
193
+ else
194
+ obj
195
+ end,
196
+ context
197
+ )
198
+ end
199
+
200
+ # Executes the block on the bindings obtained by Pattern#match, if such a match
201
+ # can be made. Depending on the arity of the given block, it is called in
202
+ # one of two environments: the current one or a clean toplevel environment.
203
+ #
204
+ # If you would like the current environment preserved, please use the
205
+ # arity 1 variant of the block. Alternatively, you can inject a context object
206
+ # and call methods on it (think :ctx => self).
207
+ #
208
+ # # the local variable a is simulated
209
+ # t.call_on_match(:a => :b) { a }
210
+ # # no change of environment here
211
+ # t.call_on_match(:a => :b) { |d| d[:a] }
212
+ #
213
+ def call_on_match(bindings, block)
214
+ if block
215
+ if block.arity == 1
216
+ return block.call(bindings)
217
+ else
218
+ context = Context.new(bindings)
219
+ return context.instance_eval(&block)
220
+ end
221
+ end
222
+ end
223
+
224
+ # Allow easy access to all rules, the ones defined in the instance and the
225
+ # ones predefined in a subclass definition.
226
+ #
227
+ def rules
228
+ self.class.rules + @rules
229
+ end
230
+
231
+ # @api private
232
+ #
233
+ def transform_elt(elt, context)
234
+ rules.each do |pattern, block|
235
+ if bindings=pattern.match(elt, context)
236
+ # Produces transformed value
237
+ return call_on_match(bindings, block)
238
+ end
239
+ end
240
+
241
+ # No rule matched - element is not transformed
242
+ if @raise_on_unmatch && elt.is_a?(Hash)
243
+ elt_types = elt.map do |key, value|
244
+ [ key, value.class ]
245
+ end.to_h
246
+ raise NotImplementedError, "Failed to match `#{elt_types.inspect}`"
247
+ else
248
+ return elt
249
+ end
250
+ end
251
+
252
+ # @api private
253
+ #
254
+ def recurse_hash(hsh, ctx)
255
+ hsh.inject({}) do |new_hsh, (k,v)|
256
+ new_hsh[k] = apply(v, ctx)
257
+ new_hsh
258
+ end
259
+ end
260
+ # @api private
261
+ #
262
+ def recurse_array(ary, ctx)
263
+ ary.map { |elt| apply(elt, ctx) }
264
+ end
265
+ end
266
+
267
+ require 'parsanol/context'
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Parsanol
4
+ VERSION = '3.0.0'
5
+ end
@@ -0,0 +1,80 @@
1
+ # @parsanol/wasm
2
+
3
+ High-performance PEG parser using WebAssembly. Designed for use with Opal (Ruby in JavaScript) and general JavaScript applications.
4
+
5
+ ## Features
6
+
7
+ - **18-44x faster** than pure Ruby parser
8
+ - **99.5% fewer allocations**
9
+ - Works in browsers and Node.js
10
+ - Full TypeScript support
11
+ - Compatible with Opal
12
+
13
+ ## Installation
14
+
15
+ ```bash
16
+ npm install @parsanol/wasm
17
+ ```
18
+
19
+ ## Usage
20
+
21
+ ### Browser/ESM
22
+
23
+ ```html
24
+ <script type="module">
25
+ import { initParsanol, ParsanolParser } from '@parsanol/wasm';
26
+
27
+ // Initialize WASM (call once)
28
+ await initParsanol();
29
+
30
+ // Create parser from grammar JSON
31
+ const grammar = {
32
+ atoms: [
33
+ { Str: { pattern: "hello" } }
34
+ ],
35
+ root: 0
36
+ };
37
+
38
+ const parser = new ParsanolParser(grammar);
39
+
40
+ // Parse input
41
+ const result = parser.parse('hello');
42
+ console.log(result); // "hello"
43
+ </script>
44
+ ```
45
+
46
+ ### Node.js
47
+
48
+ ```javascript
49
+ const { initParsanol, ParsanolParser } = require('@parsanol/wasm');
50
+
51
+ async function main() {
52
+ await initParsanol();
53
+
54
+ const parser = new ParsanolParser(grammarJson);
55
+ const result = parser.parse('input text');
56
+ console.log(result);
57
+ }
58
+
59
+ main();
60
+ ```
61
+
62
+ ### Opal (Ruby in Browser)
63
+
64
+ ```ruby
65
+ # First initialize WASM in JavaScript:
66
+ # Parsanol::WasmParser.init.then { puts "ready" }
67
+
68
+ require 'parsanol/wasm_parser'
69
+
70
+ grammar_json = {
71
+ atoms: [
72
+ { Str: { pattern: "hello" } }
73
+ ],
74
+ root: 0
75
+ }.to_json
76
+
77
+ parser = Parsanol::WasmParser.new(grammar_json)
78
+ result = parser.parse('hello')
79
+ puts result # => "hello"
80
+ ```