parsanol 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of parsanol might be problematic. Click here for more details.

Files changed (336) hide show
  1. checksums.yaml +7 -0
  2. data/HISTORY.txt +25 -0
  3. data/LICENSE +23 -0
  4. data/README.adoc +643 -0
  5. data/Rakefile +189 -0
  6. data/example/balanced-parens/basic.rb +42 -0
  7. data/example/balanced-parens/basic.rb.md +86 -0
  8. data/example/balanced-parens/parens.rb +42 -0
  9. data/example/balanced-parens/ruby_transform.rb +162 -0
  10. data/example/big.erb +73 -0
  11. data/example/boolean-algebra/basic.rb +70 -0
  12. data/example/boolean-algebra/basic.rb.md +108 -0
  13. data/example/boolean-algebra/ruby_transform.rb +263 -0
  14. data/example/calculator/basic.rb +153 -0
  15. data/example/calculator/basic.rb.md +120 -0
  16. data/example/calculator/pattern.rb +153 -0
  17. data/example/calculator/ruby_transform.rb +156 -0
  18. data/example/calculator/ruby_transform.rb.md +32 -0
  19. data/example/calculator/serialized.rb +257 -0
  20. data/example/calculator/serialized.rb.md +32 -0
  21. data/example/calculator/transform.rb +153 -0
  22. data/example/calculator/zero_copy.rb +269 -0
  23. data/example/calculator/zero_copy.rb.md +36 -0
  24. data/example/capture/basic.rb +49 -0
  25. data/example/capture/basic.rb.md +106 -0
  26. data/example/capture/example.json +39 -0
  27. data/example/comments/basic.rb +35 -0
  28. data/example/comments/basic.rb.md +110 -0
  29. data/example/csv/ruby_transform.rb +148 -0
  30. data/example/csv/ruby_transform.rb.md +131 -0
  31. data/example/csv/serialized.rb +201 -0
  32. data/example/csv/serialized.rb.md +31 -0
  33. data/example/csv/zero_copy.rb +276 -0
  34. data/example/csv/zero_copy.rb.md +36 -0
  35. data/example/custom_atoms/indent_atom.rb +79 -0
  36. data/example/deepest-errors/basic.rb +131 -0
  37. data/example/deepest-errors/basic.rb.md +152 -0
  38. data/example/documentation/basic.rb +18 -0
  39. data/example/documentation/basic.rb.md +97 -0
  40. data/example/email/basic.rb +55 -0
  41. data/example/email/basic.rb.md +102 -0
  42. data/example/email/ruby_transform.rb +106 -0
  43. data/example/empty/basic.rb +13 -0
  44. data/example/empty/basic.rb.md +73 -0
  45. data/example/empty/example.json +38 -0
  46. data/example/erb/basic.rb +47 -0
  47. data/example/erb/basic.rb.md +103 -0
  48. data/example/erb/optimized.rb +42 -0
  49. data/example/error-reporting/basic.rb +132 -0
  50. data/example/error-reporting/basic.rb.md +122 -0
  51. data/example/expression-evaluator/basic.rb +284 -0
  52. data/example/expression-evaluator/basic.rb.md +138 -0
  53. data/example/ini/basic.rb +154 -0
  54. data/example/ini/basic.rb.md +129 -0
  55. data/example/ini/ruby_transform.rb +154 -0
  56. data/example/ip-address/basic.rb +125 -0
  57. data/example/ip-address/basic.rb.md +139 -0
  58. data/example/iso-6709/basic.rb +231 -0
  59. data/example/iso-6709/basic.rb.md +143 -0
  60. data/example/iso-8601/basic.rb +275 -0
  61. data/example/iso-8601/basic.rb.md +149 -0
  62. data/example/json/basic.rb +128 -0
  63. data/example/json/basic.rb.md +121 -0
  64. data/example/json/pattern.rb +128 -0
  65. data/example/json/ruby_transform.rb +200 -0
  66. data/example/json/ruby_transform.rb.md +32 -0
  67. data/example/json/serialized.rb +233 -0
  68. data/example/json/serialized.rb.md +31 -0
  69. data/example/json/transform.rb +128 -0
  70. data/example/json/zero_copy.rb +316 -0
  71. data/example/json/zero_copy.rb.md +36 -0
  72. data/example/local/basic.rb +34 -0
  73. data/example/local/basic.rb.md +91 -0
  74. data/example/local/example.json +38 -0
  75. data/example/markdown/basic.rb +287 -0
  76. data/example/markdown/basic.rb.md +160 -0
  77. data/example/markup/basic.rb +173 -0
  78. data/example/markup/basic.rb.md +118 -0
  79. data/example/mathn/basic.rb +47 -0
  80. data/example/mathn/basic.rb.md +96 -0
  81. data/example/mathn/example.json +39 -0
  82. data/example/minilisp/basic.rb +94 -0
  83. data/example/minilisp/basic.rb.md +133 -0
  84. data/example/modularity/basic.rb +47 -0
  85. data/example/modularity/basic.rb.md +152 -0
  86. data/example/nested-errors/basic.rb +132 -0
  87. data/example/nested-errors/basic.rb.md +157 -0
  88. data/example/output/boolean_algebra.out +4 -0
  89. data/example/output/calc.out +1 -0
  90. data/example/output/capture.out +3 -0
  91. data/example/output/comments.out +8 -0
  92. data/example/output/deepest_errors.out +54 -0
  93. data/example/output/documentation.err +4 -0
  94. data/example/output/documentation.out +1 -0
  95. data/example/output/email_parser.out +2 -0
  96. data/example/output/empty.err +1 -0
  97. data/example/output/erb.out +7 -0
  98. data/example/output/ignore.out +1 -0
  99. data/example/output/ignore_whitespace.out +1 -0
  100. data/example/output/ip_address.out +9 -0
  101. data/example/output/json.out +5 -0
  102. data/example/output/local.out +3 -0
  103. data/example/output/mathn.out +4 -0
  104. data/example/output/minilisp.out +5 -0
  105. data/example/output/modularity.out +0 -0
  106. data/example/output/nested_errors.out +54 -0
  107. data/example/output/optimized_erb.out +1 -0
  108. data/example/output/parens.out +8 -0
  109. data/example/output/prec_calc.out +5 -0
  110. data/example/output/readme.out +1 -0
  111. data/example/output/scopes.out +1 -0
  112. data/example/output/seasons.out +28 -0
  113. data/example/output/sentence.out +1 -0
  114. data/example/output/simple_xml.out +2 -0
  115. data/example/output/string_parser.out +3 -0
  116. data/example/prec-calc/basic.rb +71 -0
  117. data/example/prec-calc/basic.rb.md +114 -0
  118. data/example/readme/basic.rb +30 -0
  119. data/example/readme/basic.rb.md +80 -0
  120. data/example/scopes/basic.rb +15 -0
  121. data/example/scopes/basic.rb.md +73 -0
  122. data/example/scopes/example.json +38 -0
  123. data/example/seasons/basic.rb +46 -0
  124. data/example/seasons/basic.rb.md +117 -0
  125. data/example/seasons/example.json +40 -0
  126. data/example/sentence/basic.rb +36 -0
  127. data/example/sentence/basic.rb.md +81 -0
  128. data/example/sexp/ruby_transform.rb +180 -0
  129. data/example/sexp/ruby_transform.rb.md +143 -0
  130. data/example/simple-xml/basic.rb +54 -0
  131. data/example/simple-xml/basic.rb.md +125 -0
  132. data/example/simple.lit +3 -0
  133. data/example/string-literal/basic.rb +77 -0
  134. data/example/string-literal/basic.rb.md +128 -0
  135. data/example/test.lit +4 -0
  136. data/example/toml/basic.rb +226 -0
  137. data/example/toml/basic.rb.md +173 -0
  138. data/example/url/basic.rb +219 -0
  139. data/example/url/basic.rb.md +142 -0
  140. data/example/url/ruby_transform.rb +219 -0
  141. data/example/yaml/basic.rb +216 -0
  142. data/example/yaml/basic.rb.md +148 -0
  143. data/ext/parsanol_native/extconf.rb +4 -0
  144. data/lib/parsanol/accelerator/application.rb +62 -0
  145. data/lib/parsanol/accelerator/engine.rb +112 -0
  146. data/lib/parsanol/accelerator.rb +162 -0
  147. data/lib/parsanol/ast_visitor.rb +122 -0
  148. data/lib/parsanol/atoms/alternative.rb +97 -0
  149. data/lib/parsanol/atoms/base.rb +214 -0
  150. data/lib/parsanol/atoms/can_flatten.rb +192 -0
  151. data/lib/parsanol/atoms/capture.rb +41 -0
  152. data/lib/parsanol/atoms/context.rb +351 -0
  153. data/lib/parsanol/atoms/context_optimized.rb +42 -0
  154. data/lib/parsanol/atoms/custom.rb +110 -0
  155. data/lib/parsanol/atoms/cut.rb +62 -0
  156. data/lib/parsanol/atoms/dsl.rb +130 -0
  157. data/lib/parsanol/atoms/dynamic.rb +33 -0
  158. data/lib/parsanol/atoms/entity.rb +55 -0
  159. data/lib/parsanol/atoms/ignored.rb +28 -0
  160. data/lib/parsanol/atoms/infix.rb +121 -0
  161. data/lib/parsanol/atoms/lookahead.rb +64 -0
  162. data/lib/parsanol/atoms/named.rb +50 -0
  163. data/lib/parsanol/atoms/re.rb +61 -0
  164. data/lib/parsanol/atoms/repetition.rb +241 -0
  165. data/lib/parsanol/atoms/scope.rb +28 -0
  166. data/lib/parsanol/atoms/sequence.rb +157 -0
  167. data/lib/parsanol/atoms/str.rb +90 -0
  168. data/lib/parsanol/atoms/visitor.rb +91 -0
  169. data/lib/parsanol/atoms.rb +36 -0
  170. data/lib/parsanol/buffer.rb +130 -0
  171. data/lib/parsanol/builder_callbacks.rb +353 -0
  172. data/lib/parsanol/cause.rb +101 -0
  173. data/lib/parsanol/context.rb +23 -0
  174. data/lib/parsanol/convenience.rb +35 -0
  175. data/lib/parsanol/edit_tracker.rb +107 -0
  176. data/lib/parsanol/error_reporter/contextual.rb +122 -0
  177. data/lib/parsanol/error_reporter/deepest.rb +106 -0
  178. data/lib/parsanol/error_reporter/tree.rb +68 -0
  179. data/lib/parsanol/error_reporter.rb +98 -0
  180. data/lib/parsanol/export.rb +163 -0
  181. data/lib/parsanol/expression/treetop.rb +94 -0
  182. data/lib/parsanol/expression.rb +51 -0
  183. data/lib/parsanol/fast_mode.rb +145 -0
  184. data/lib/parsanol/first_set.rb +75 -0
  185. data/lib/parsanol/grammar_builder.rb +177 -0
  186. data/lib/parsanol/graphviz.rb +97 -0
  187. data/lib/parsanol/incremental_parser.rb +179 -0
  188. data/lib/parsanol/interval_tree.rb +215 -0
  189. data/lib/parsanol/lazy_result.rb +178 -0
  190. data/lib/parsanol/lexer.rb +146 -0
  191. data/lib/parsanol/native/parser.rb +630 -0
  192. data/lib/parsanol/native/serializer.rb +245 -0
  193. data/lib/parsanol/native/transformer.rb +438 -0
  194. data/lib/parsanol/native/types.rb +41 -0
  195. data/lib/parsanol/native.rb +217 -0
  196. data/lib/parsanol/optimizer.rb +86 -0
  197. data/lib/parsanol/optimizers/choice_optimizer.rb +78 -0
  198. data/lib/parsanol/optimizers/cut_inserter.rb +175 -0
  199. data/lib/parsanol/optimizers/lookahead_optimizer.rb +58 -0
  200. data/lib/parsanol/optimizers/quantifier_optimizer.rb +62 -0
  201. data/lib/parsanol/optimizers/sequence_optimizer.rb +97 -0
  202. data/lib/parsanol/options/ruby_transform.rb +109 -0
  203. data/lib/parsanol/options/serialized.rb +94 -0
  204. data/lib/parsanol/options/zero_copy.rb +130 -0
  205. data/lib/parsanol/options.rb +20 -0
  206. data/lib/parsanol/parallel.rb +133 -0
  207. data/lib/parsanol/parsanol_native.bundle +0 -0
  208. data/lib/parsanol/parser.rb +151 -0
  209. data/lib/parsanol/parslet.rb +148 -0
  210. data/lib/parsanol/parslet_native.bundle +0 -0
  211. data/lib/parsanol/pattern/binding.rb +49 -0
  212. data/lib/parsanol/pattern.rb +115 -0
  213. data/lib/parsanol/pool.rb +220 -0
  214. data/lib/parsanol/pools/array_pool.rb +75 -0
  215. data/lib/parsanol/pools/buffer_pool.rb +173 -0
  216. data/lib/parsanol/pools/position_pool.rb +92 -0
  217. data/lib/parsanol/pools/slice_pool.rb +64 -0
  218. data/lib/parsanol/position.rb +89 -0
  219. data/lib/parsanol/result.rb +44 -0
  220. data/lib/parsanol/result_builder.rb +208 -0
  221. data/lib/parsanol/result_stream.rb +262 -0
  222. data/lib/parsanol/rig/rspec.rb +52 -0
  223. data/lib/parsanol/rope.rb +78 -0
  224. data/lib/parsanol/scope.rb +42 -0
  225. data/lib/parsanol/slice.rb +172 -0
  226. data/lib/parsanol/source/line_cache.rb +99 -0
  227. data/lib/parsanol/source.rb +171 -0
  228. data/lib/parsanol/source_location.rb +164 -0
  229. data/lib/parsanol/streaming_parser.rb +124 -0
  230. data/lib/parsanol/string_view.rb +192 -0
  231. data/lib/parsanol/transform.rb +267 -0
  232. data/lib/parsanol/version.rb +5 -0
  233. data/lib/parsanol/wasm/README.md +80 -0
  234. data/lib/parsanol/wasm/package.json +51 -0
  235. data/lib/parsanol/wasm/parsanol.js +252 -0
  236. data/lib/parsanol/wasm/parslet.d.ts +129 -0
  237. data/lib/parsanol/wasm_parser.rb +239 -0
  238. data/lib/parsanol.rb +408 -0
  239. data/parsanol-ruby.gemspec +56 -0
  240. data/spec/acceptance/examples_spec.rb +96 -0
  241. data/spec/acceptance/infix_parser_spec.rb +145 -0
  242. data/spec/acceptance/mixing_parsers_spec.rb +74 -0
  243. data/spec/acceptance/regression_spec.rb +329 -0
  244. data/spec/acceptance/repetition_and_maybe_spec.rb +44 -0
  245. data/spec/acceptance/unconsumed_input_spec.rb +21 -0
  246. data/spec/benchmark/comparative/runner_spec.rb +105 -0
  247. data/spec/integration/array_pooling_spec.rb +193 -0
  248. data/spec/integration/buffer_allocation_spec.rb +324 -0
  249. data/spec/integration/position_pooling_spec.rb +184 -0
  250. data/spec/integration/result_builder_spec.rb +282 -0
  251. data/spec/integration/rope_stringview_integration_spec.rb +188 -0
  252. data/spec/integration/slice_pooling_spec.rb +63 -0
  253. data/spec/integration/string_view_integration_spec.rb +125 -0
  254. data/spec/lexer_spec.rb +231 -0
  255. data/spec/parsanol/atom_results_spec.rb +39 -0
  256. data/spec/parsanol/atoms/alternative_spec.rb +26 -0
  257. data/spec/parsanol/atoms/base_spec.rb +127 -0
  258. data/spec/parsanol/atoms/capture_spec.rb +21 -0
  259. data/spec/parsanol/atoms/combinations_spec.rb +5 -0
  260. data/spec/parsanol/atoms/custom_spec.rb +79 -0
  261. data/spec/parsanol/atoms/dsl_spec.rb +7 -0
  262. data/spec/parsanol/atoms/entity_spec.rb +77 -0
  263. data/spec/parsanol/atoms/ignored_spec.rb +15 -0
  264. data/spec/parsanol/atoms/infix_spec.rb +5 -0
  265. data/spec/parsanol/atoms/lookahead_spec.rb +22 -0
  266. data/spec/parsanol/atoms/named_spec.rb +4 -0
  267. data/spec/parsanol/atoms/re_spec.rb +14 -0
  268. data/spec/parsanol/atoms/repetition_spec.rb +24 -0
  269. data/spec/parsanol/atoms/scope_spec.rb +26 -0
  270. data/spec/parsanol/atoms/sequence_spec.rb +28 -0
  271. data/spec/parsanol/atoms/str_spec.rb +15 -0
  272. data/spec/parsanol/atoms/visitor_spec.rb +101 -0
  273. data/spec/parsanol/atoms_spec.rb +488 -0
  274. data/spec/parsanol/auto_optimize_spec.rb +334 -0
  275. data/spec/parsanol/buffer_spec.rb +219 -0
  276. data/spec/parsanol/builder_callbacks_spec.rb +377 -0
  277. data/spec/parsanol/choice_optimizer_spec.rb +231 -0
  278. data/spec/parsanol/convenience_spec.rb +54 -0
  279. data/spec/parsanol/cut_inserter_spec.rb +248 -0
  280. data/spec/parsanol/cut_spec.rb +66 -0
  281. data/spec/parsanol/edit_tracker_spec.rb +218 -0
  282. data/spec/parsanol/error_reporter/contextual_spec.rb +122 -0
  283. data/spec/parsanol/error_reporter/deepest_spec.rb +82 -0
  284. data/spec/parsanol/error_reporter/tree_spec.rb +7 -0
  285. data/spec/parsanol/export_spec.rb +67 -0
  286. data/spec/parsanol/expression/treetop_spec.rb +75 -0
  287. data/spec/parsanol/first_set_spec.rb +298 -0
  288. data/spec/parsanol/interval_tree_spec.rb +205 -0
  289. data/spec/parsanol/lazy_result_spec.rb +288 -0
  290. data/spec/parsanol/lookahead_optimizer_spec.rb +252 -0
  291. data/spec/parsanol/minilisp.citrus +29 -0
  292. data/spec/parsanol/minilisp.tt +29 -0
  293. data/spec/parsanol/optimizer_spec.rb +459 -0
  294. data/spec/parsanol/options/parslet_compat_spec.rb +166 -0
  295. data/spec/parsanol/options/ruby_transform_spec.rb +70 -0
  296. data/spec/parsanol/options/serialized_spec.rb +69 -0
  297. data/spec/parsanol/options/zero_copy_spec.rb +230 -0
  298. data/spec/parsanol/parser_spec.rb +36 -0
  299. data/spec/parsanol/parslet_spec.rb +38 -0
  300. data/spec/parsanol/pattern_spec.rb +272 -0
  301. data/spec/parsanol/pool_spec.rb +392 -0
  302. data/spec/parsanol/pools/array_pool_spec.rb +356 -0
  303. data/spec/parsanol/pools/buffer_pool_spec.rb +365 -0
  304. data/spec/parsanol/pools/position_pool_spec.rb +118 -0
  305. data/spec/parsanol/pools/slice_pool_spec.rb +262 -0
  306. data/spec/parsanol/position_spec.rb +14 -0
  307. data/spec/parsanol/result_builder_spec.rb +391 -0
  308. data/spec/parsanol/rig/rspec_spec.rb +54 -0
  309. data/spec/parsanol/rope_spec.rb +207 -0
  310. data/spec/parsanol/scope_spec.rb +45 -0
  311. data/spec/parsanol/slice_spec.rb +249 -0
  312. data/spec/parsanol/source/line_cache_spec.rb +74 -0
  313. data/spec/parsanol/source_spec.rb +207 -0
  314. data/spec/parsanol/string_view_spec.rb +345 -0
  315. data/spec/parsanol/transform/context_spec.rb +56 -0
  316. data/spec/parsanol/transform_spec.rb +183 -0
  317. data/spec/parsanol/tree_memoization_spec.rb +149 -0
  318. data/spec/parslet_compatibility/expressir_edge_cases_spec.rb +153 -0
  319. data/spec/parslet_compatibility/minimal_reproduction.rb +199 -0
  320. data/spec/parslet_compatibility_spec.rb +399 -0
  321. data/spec/parslet_imported/atom_spec.rb +93 -0
  322. data/spec/parslet_imported/combinator_spec.rb +161 -0
  323. data/spec/parslet_imported/spec_helper.rb +73 -0
  324. data/spec/performance/batch_parsing_benchmark.rb +129 -0
  325. data/spec/performance/complete_optimization_summary.rb +143 -0
  326. data/spec/performance/grammar_caching_analysis.rb +121 -0
  327. data/spec/performance/grammar_caching_benchmark.rb +80 -0
  328. data/spec/performance/native_benchmark_spec.rb +230 -0
  329. data/spec/performance/phase5_benchmark.rb +144 -0
  330. data/spec/performance/profiling_benchmark.rb +131 -0
  331. data/spec/performance/ruby_improvements_benchmark.rb +171 -0
  332. data/spec/performance_spec.rb +374 -0
  333. data/spec/spec_helper.rb +79 -0
  334. data/spec/support/opal.rb +8 -0
  335. data/spec/support/opal.rb.erb +14 -0
  336. metadata +485 -0
@@ -0,0 +1,172 @@
1
+ # frozen_string_literal: true
2
+ # A slice is a small part from the parse input. A slice mainly behaves like
3
+ # any other string, except that it remembers where it came from (offset in
4
+ # original input).
5
+ #
6
+ # == Extracting line and column
7
+ #
8
+ # Using the #line_and_column method, you can extract the line and column in
9
+ # the original input where this slice starts.
10
+ #
11
+ # Example:
12
+ # slice.line_and_column # => [1, 13]
13
+ # slice.offset # => 12
14
+ #
15
+ # == Likeness to strings
16
+ #
17
+ # Parsanol::Slice behaves in many ways like a Ruby String. This likeness
18
+ # however is not complete - many of the myriad of operations String supports
19
+ # are not yet in Slice. You can always extract the internal string instance by
20
+ # calling #to_s.
21
+ #
22
+ # These omissions are somewhat intentional. Rather than maintaining a full
23
+ # delegation, we opt for a partial emulation that gets the job done.
24
+ #
25
+ class Parsanol::Slice
26
+ attr_reader :str, :line_cache
27
+
28
+ # Construct a slice using an integer byte position, a string, and an optional line cache.
29
+ # The line cache should be able to answer to the #line_and_column message.
30
+ #
31
+ # @param bytepos [Integer] Byte position in the original input
32
+ # @param string [String] The slice content
33
+ # @param line_cache [Object] Optional line cache for line/column info
34
+ #
35
+ def initialize(bytepos=0, string='', line_cache = nil)
36
+ @bytepos = bytepos
37
+ @str = string
38
+ @line_cache = line_cache
39
+ end
40
+
41
+ # Reset the slice for reuse in object pooling.
42
+ # This allows the slice to be reinitialized with new values for efficient reuse.
43
+ #
44
+ # @param bytepos [Integer] New byte position in the original input
45
+ # @param string [String] New slice content
46
+ # @param line_cache [Object] Optional line cache for line/column info
47
+ # @return [self] Returns self for method chaining
48
+ #
49
+ def reset!(bytepos=0, string='', line_cache=nil)
50
+ @bytepos = bytepos
51
+ @str = string
52
+ @line_cache = line_cache
53
+ self
54
+ end
55
+
56
+ # Create a Slice from a Rope.
57
+ # The rope is converted to a string and used to create the slice.
58
+ #
59
+ # @param rope [Parsanol::Rope] The rope to convert
60
+ # @param bytepos [Integer] Byte position in the input
61
+ # @param line_cache [Object] Optional line cache for line/column info
62
+ # @return [Parsanol::Slice] A new slice with the rope's content
63
+ #
64
+ def self.from_rope(rope, bytepos, line_cache = nil)
65
+ new(bytepos, rope.to_s, line_cache)
66
+ end
67
+
68
+ # Returns the byte position of this slice in the original input.
69
+ # This is the primary position tracking mechanism.
70
+ #
71
+ def offset
72
+ @bytepos
73
+ end
74
+
75
+ # Alias for offset - returns byte position.
76
+ # For backward compatibility and clarity.
77
+ #
78
+ alias bytepos offset
79
+
80
+ # Alias for offset - returns byte position.
81
+ # Note: For ASCII text, bytepos == charpos.
82
+ # For UTF-8, this is an approximation (byte position, not character position).
83
+ #
84
+ alias charpos offset
85
+
86
+ # Compares slices to other slices or strings.
87
+ # Fast path: Compare strings directly, most common case
88
+ #
89
+ def ==(other)
90
+ # Fast path: direct string comparison
91
+ return str == other if other.is_a?(String)
92
+ # Slice to Slice comparison
93
+ return str == other.str if other.is_a?(Parsanol::Slice)
94
+ str == other
95
+ end
96
+
97
+ # Type-strict equality comparison.
98
+ # This only returns true for Slice-to-Slice comparison with equal content.
99
+ #
100
+ def eql?(other)
101
+ other.is_a?(Parsanol::Slice) && str.eql?(other.str)
102
+ end
103
+
104
+ # Hash code for using Slices as hash keys.
105
+ # Incorporates both the string content and position to distinguish
106
+ # Slices from plain Strings and from Slices at different positions.
107
+ #
108
+ def hash
109
+ [str, offset].hash
110
+ end
111
+
112
+ # Match regular expressions.
113
+ #
114
+ def match(regexp)
115
+ str.match(regexp)
116
+ end
117
+
118
+ # Returns the slices size in characters.
119
+ #
120
+ def size
121
+ str.size
122
+ end
123
+
124
+ alias length size
125
+
126
+ # Concatenate two slices; it is assumed that the second slice begins
127
+ # where the first one ends. The offset of the resulting slice is the same
128
+ # as the one of this slice.
129
+ #
130
+ def +(other)
131
+ self.class.new(@bytepos, str + other.to_s, line_cache)
132
+ end
133
+
134
+ # Returns a <line, column> tuple referring to the original input.
135
+ # LineCache expects an integer byte position.
136
+ #
137
+ def line_and_column
138
+ raise ArgumentError, 'No line cache was given, cannot infer line and column.' \
139
+ unless line_cache
140
+
141
+ line_cache.line_and_column(@bytepos)
142
+ end
143
+
144
+ # Conversion operators -----------------------------------------------------
145
+ def to_str
146
+ str.is_a?(String) ? str : str.to_s
147
+ end
148
+ alias to_s to_str
149
+
150
+ def to_slice
151
+ self
152
+ end
153
+
154
+ def to_sym
155
+ str.to_sym
156
+ end
157
+
158
+ def to_i
159
+ self.str.to_i
160
+ end
161
+
162
+ def to_f
163
+ str.to_f
164
+ end
165
+
166
+ # Inspection & Debugging ---------------------------------------------------
167
+
168
+ # Prints the slice as <code>"string"@offset</code>.
169
+ def inspect
170
+ str.inspect + "@#{offset}"
171
+ end
172
+ end
@@ -0,0 +1,99 @@
1
+
2
+
3
+ class Parsanol::Source
4
+ # A cache for line start positions.
5
+ #
6
+ class LineCache
7
+ def initialize
8
+ # Stores line endings as a simple position number. The first line always
9
+ # starts at 0; numbers beyond the biggest entry are on any line > size,
10
+ # but probably make a scan to that position neccessary.
11
+ @line_ends = []
12
+ @line_ends.extend RangeSearch
13
+ @last_line_end = nil
14
+ end
15
+
16
+ # Returns a <line, column> tuple for the given input position. Input
17
+ # position must be given as byte offset into original string.
18
+ #
19
+ def line_and_column(pos)
20
+ pos = pos.bytepos if pos.respond_to? :bytepos
21
+ eol_idx = @line_ends.lbound(pos)
22
+
23
+ if eol_idx
24
+ # eol_idx points to the offset that ends the current line.
25
+ # Let's try to find the offset that starts it:
26
+ offset = eol_idx>0 && @line_ends[eol_idx-1] || 0
27
+ return [eol_idx+1, pos-offset+1]
28
+ else
29
+ # eol_idx is nil, that means that we're beyond the last line end that
30
+ # we know about. Pretend for now that we're just on the last line.
31
+ offset = @line_ends.last || 0
32
+ return [@line_ends.size+1, pos-offset+1]
33
+ end
34
+ end
35
+
36
+ def scan_for_line_endings(start_pos, buf)
37
+ return unless buf
38
+
39
+ buf = StringScanner.new(buf)
40
+ return unless buf.exist?(/\n/)
41
+
42
+ ## If we have already read part or all of buf, we already know about
43
+ ## line ends in that portion. remove it and correct cur (search index)
44
+ if @last_line_end && start_pos < @last_line_end
45
+ # Let's not search the range from start_pos to last_line_end again.
46
+ buf.pos = @last_line_end - start_pos
47
+ end
48
+
49
+ ## Scan the string for line endings; store the positions of all endings
50
+ ## in @line_ends.
51
+ while buf.skip_until(/\n/)
52
+ @last_line_end = start_pos + buf.pos
53
+ @line_ends << @last_line_end
54
+ end
55
+ end
56
+ end
57
+
58
+ # Mixin for arrays that implicitly give a number of ranges, where one range
59
+ # begins where the other one ends.
60
+ #
61
+ # Example:
62
+ #
63
+ # [10, 20, 30]
64
+ # # would describe [0, 10], (10, 20], (20, 30]
65
+ #
66
+ module RangeSearch
67
+ def find_mid(left, right)
68
+ # NOTE: Jonathan Hinkle reported that when mathn is required, just
69
+ # dividing and relying on the integer truncation is not enough.
70
+ left + ((right - left) / 2).floor
71
+ end
72
+
73
+ # Scans the array for the first number that is > than bound. Returns the
74
+ # index of that number.
75
+ #
76
+ def lbound(bound)
77
+ return nil if empty?
78
+ return nil unless last > bound
79
+
80
+ left = 0
81
+ right = size - 1
82
+
83
+ loop do
84
+ mid = find_mid(left, right)
85
+
86
+ if self[mid] > bound
87
+ right = mid
88
+ else
89
+ # assert: self[mid] <= bound
90
+ left = mid+1
91
+ end
92
+
93
+ if right <= left
94
+ return right
95
+ end
96
+ end
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,171 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'stringio'
4
+ require 'strscan'
5
+
6
+ require 'parsanol/position'
7
+ require 'parsanol/source/line_cache'
8
+ require 'parsanol/pools/slice_pool'
9
+ require 'parsanol/pools/position_pool'
10
+
11
+ module Parsanol
12
+ # Wraps the input string for parslet.
13
+ #
14
+ class Source
15
+ attr_reader :slice_pool, :position_pool
16
+
17
+ def initialize(str)
18
+ raise(
19
+ ArgumentError,
20
+ "Must construct Source with a string like object."
21
+ ) unless str.respond_to?(:to_str)
22
+
23
+ @str = StringScanner.new(str)
24
+ @source_str = str.to_str
25
+
26
+ # maps 1 => /./m, 2 => /../m, etc...
27
+ @re_cache = Hash.new { |h,k|
28
+ h[k] = /(.|$){#{k}}/m }
29
+
30
+ @line_cache = LineCache.new
31
+ @line_cache.scan_for_line_endings(0, str)
32
+
33
+ # Phase 1.2: SlicePool for reducing GC pressure during parsing
34
+ # Size of 5000 handles most typical parsing scenarios
35
+ @slice_pool = Parsanol::Pools::SlicePool.new(size: 5000)
36
+
37
+ # Phase 1.4: PositionPool for error reporting positions
38
+ # Size of 1000 handles typical error reporting scenarios
39
+ @position_pool = Parsanol::Pools::PositionPool.new(size: 1000)
40
+ end
41
+
42
+ # Checks if the given pattern matches at the current input position.
43
+ #
44
+ # @param pattern [Regexp] pattern to check for
45
+ # @return [Boolean] true if the pattern matches at #pos
46
+ #
47
+ def matches?(pattern)
48
+ @str.match?(pattern)
49
+ end
50
+ alias match matches?
51
+
52
+ # Consumes n characters from the input, returning them as a slice of the
53
+ # input.
54
+ #
55
+ def consume(n)
56
+ bytepos = self.bytepos
57
+ slice_str = @str.scan(@re_cache[n])
58
+ slice = @slice_pool.acquire_with(bytepos, slice_str, @line_cache)
59
+
60
+ return slice
61
+ end
62
+
63
+ # Helper method to acquire a pooled slice.
64
+ # This is the preferred way for atoms to create slices.
65
+ #
66
+ # @param bytepos [Integer] Byte position in the input
67
+ # @param str [String] The slice content
68
+ # @return [Parsanol::Slice] A pooled slice ready for use
69
+ #
70
+ def slice(bytepos, str)
71
+ @slice_pool.acquire_with(bytepos, str, @line_cache)
72
+ end
73
+
74
+ # Release a slice back to the pool.
75
+ # Currently not actively used - slices are released when pool is recycled.
76
+ #
77
+ # @param slice [Parsanol::Slice] The slice to release
78
+ #
79
+ def release_slice(slice)
80
+ @slice_pool.release(slice)
81
+ end
82
+
83
+ # Returns how many chars remain in the input.
84
+ #
85
+ def chars_left
86
+ @str.rest_size
87
+ end
88
+
89
+ # Returns how many chars there are between current position and the
90
+ # string given. If the string given doesn't occur in the source, then
91
+ # the remaining chars (#chars_left) are returned.
92
+ #
93
+ # @return [Fixnum] count of chars until str or #chars_left
94
+ #
95
+ def chars_until str
96
+ slice_str = @str.check_until(Regexp.new(Regexp.escape(str)))
97
+ return chars_left unless slice_str
98
+ return slice_str.size - str.size
99
+ end
100
+
101
+ # Phase 31: Scan forward to find the next occurrence of a character.
102
+ # Returns the byte position of the next occurrence, or nil if not found.
103
+ # Does not move the scanner position.
104
+ #
105
+ # @param char [String] single character to search for
106
+ # @return [Integer, nil] byte position or nil if not found
107
+ #
108
+ def index_of_char(char)
109
+ # Use StringScanner's string directly for fast indexOf
110
+ idx = @str.rest.index(char)
111
+ return nil unless idx
112
+ @str.pos + idx
113
+ end
114
+
115
+ # Position of the parse as a byte offset into the original string.
116
+ # Returns an integer byte position instead of a Position object.
117
+ #
118
+ # @return [Integer] Current byte position in the input
119
+ # @note Please be aware of encodings at this point.
120
+ #
121
+ def pos
122
+ @str.pos
123
+ end
124
+
125
+ # Alias for pos - returns the current byte position.
126
+ # Provided for clarity and backward compatibility.
127
+ #
128
+ # @return [Integer] Current byte position in the input
129
+ #
130
+ alias bytepos pos
131
+
132
+ # @note Please be aware of encodings at this point.
133
+ #
134
+ def bytepos=(n)
135
+ @str.pos = n
136
+ rescue RangeError
137
+ end
138
+
139
+ # Returns a <line, column> tuple for the given position. If no position is
140
+ # given, line/column information is returned for the current position
141
+ # given by #pos.
142
+ #
143
+ def line_and_column(position=nil)
144
+ @line_cache.line_and_column(position || self.bytepos)
145
+ end
146
+
147
+ # Creates a pooled Position object for the given byte position.
148
+ # This is used for error reporting where Position objects are needed.
149
+ #
150
+ # Phase 1.4: Uses PositionPool to reduce GC pressure when materializing
151
+ # position objects for error messages.
152
+ #
153
+ # @param bytepos [Integer] Byte position in source (defaults to current position)
154
+ # @return [Parsanol::Position] A pooled Position object
155
+ #
156
+ def position(bytepos = nil)
157
+ effective_pos = bytepos || self.bytepos
158
+ line, column = line_and_column(effective_pos)
159
+
160
+ # Calculate character position (approximation for ASCII, exact for UTF-8)
161
+ charpos = @source_str.byteslice(0, effective_pos).size
162
+
163
+ @position_pool.acquire_with(
164
+ string: @source_str,
165
+ bytepos: effective_pos,
166
+ charpos: charpos
167
+ )
168
+ end
169
+
170
+ end
171
+ end
@@ -0,0 +1,164 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Parsanol::SourceLocation - Source Location Tracking
4
+ #
5
+ # Track source positions (line, column, offset) through the parsing and
6
+ # transformation pipeline. This is useful for error reporting, IDE integration,
7
+ # and source mapping.
8
+ #
9
+ # Usage:
10
+ # # Parse with source tracking
11
+ # result = parser.parse_with_spans("hello world")
12
+ # tree = result.tree
13
+ # spans = result.spans
14
+ #
15
+ # # Access span for a node
16
+ # span = spans[node_id]
17
+ # puts "Matched at line #{span.start.line}, column #{span.start.column}"
18
+ #
19
+ # Requires native extension for full functionality.
20
+
21
+ module Parsanol
22
+ # Represents a position in source code
23
+ class SourcePosition
24
+ attr_reader :offset, :line, :column
25
+
26
+ def initialize(offset:, line:, column:)
27
+ @offset = offset
28
+ @line = line
29
+ @column = column
30
+ end
31
+
32
+ def to_s
33
+ "line #{@line}, column #{@column} (offset #{@offset})"
34
+ end
35
+
36
+ def to_h
37
+ { offset: @offset, line: @line, column: @column }
38
+ end
39
+
40
+ def ==(other)
41
+ return false unless other.is_a?(SourcePosition)
42
+ @offset == other.offset && @line == other.line && @column == other.column
43
+ end
44
+
45
+ def eql?(other)
46
+ self == other
47
+ end
48
+
49
+ def hash
50
+ [@offset, @line, @column].hash
51
+ end
52
+ end
53
+
54
+ # Represents a span in source code (from start to end position)
55
+ class SourceSpan
56
+ attr_reader :start, :end
57
+
58
+ def initialize(start_pos:, end_pos:)
59
+ @start = start_pos.is_a?(SourcePosition) ? start_pos : SourcePosition.new(**start_pos)
60
+ @end = end_pos.is_a?(SourcePosition) ? end_pos : SourcePosition.new(**end_pos)
61
+ end
62
+
63
+ # Create a span from offsets (computes line/column from input)
64
+ def self.from_offsets(input, start_offset, end_offset)
65
+ start_pos = compute_position(input, start_offset)
66
+ end_pos = compute_position(input, end_offset)
67
+ new(start_pos: start_pos, end_pos: end_pos)
68
+ end
69
+
70
+ # Merge two spans (returns a new span covering both)
71
+ def merge(other)
72
+ return self if other.nil?
73
+ SourceSpan.new(
74
+ start_pos: [@start, other.start].min_by(&:offset),
75
+ end_pos: [@end, other.end].max_by(&:offset)
76
+ )
77
+ end
78
+
79
+ # Check if this span overlaps with another
80
+ def overlaps?(other)
81
+ return false if other.nil?
82
+ @start.offset < other.end.offset && @end.offset > other.start.offset
83
+ end
84
+
85
+ # Check if this span is adjacent to another
86
+ def adjacent?(other)
87
+ return false if other.nil?
88
+ @end.offset == other.start.offset || other.end.offset == @start.offset
89
+ end
90
+
91
+ # Check if a position is within this span
92
+ def contains?(position)
93
+ offset = position.is_a?(SourcePosition) ? position.offset : position
94
+ offset >= @start.offset && offset <= @end.offset
95
+ end
96
+
97
+ # Get the length of the span in bytes
98
+ def length
99
+ @end.offset - @start.offset
100
+ end
101
+
102
+ # Extract the source text from the input
103
+ def extract(input)
104
+ input.byteslice(@start.offset, length)
105
+ end
106
+
107
+ def to_s
108
+ "#{@start} - #{@end}"
109
+ end
110
+
111
+ def to_h
112
+ { start: @start.to_h, end: @end.to_h }
113
+ end
114
+
115
+ def ==(other)
116
+ return false unless other.is_a?(SourceSpan)
117
+ @start == other.start && @end == other.end
118
+ end
119
+
120
+ private
121
+
122
+ # Compute line and column from offset
123
+ def self.compute_position(input, offset)
124
+ line = 1
125
+ column = 1
126
+ current_offset = 0
127
+
128
+ input.each_char do |char|
129
+ break if current_offset >= offset
130
+
131
+ if char == "\n"
132
+ line += 1
133
+ column = 1
134
+ else
135
+ column += 1
136
+ end
137
+
138
+ current_offset += 1
139
+ end
140
+
141
+ SourcePosition.new(offset: offset, line: line, column: column)
142
+ end
143
+ end
144
+
145
+ # Result wrapper for parse_with_spans
146
+ class ParseResultWithSpans
147
+ attr_reader :tree, :spans
148
+
149
+ def initialize(tree:, spans:)
150
+ @tree = tree
151
+ @spans = spans
152
+ end
153
+
154
+ # Get span for a specific node
155
+ def span_for(node_id)
156
+ @spans[node_id]
157
+ end
158
+
159
+ # Get all spans that contain a position
160
+ def spans_at(offset)
161
+ @spans.values.select { |span| span.contains?(offset) }
162
+ end
163
+ end
164
+ end