parsanol 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of parsanol might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/HISTORY.txt +25 -0
- data/LICENSE +23 -0
- data/README.adoc +643 -0
- data/Rakefile +189 -0
- data/example/balanced-parens/basic.rb +42 -0
- data/example/balanced-parens/basic.rb.md +86 -0
- data/example/balanced-parens/parens.rb +42 -0
- data/example/balanced-parens/ruby_transform.rb +162 -0
- data/example/big.erb +73 -0
- data/example/boolean-algebra/basic.rb +70 -0
- data/example/boolean-algebra/basic.rb.md +108 -0
- data/example/boolean-algebra/ruby_transform.rb +263 -0
- data/example/calculator/basic.rb +153 -0
- data/example/calculator/basic.rb.md +120 -0
- data/example/calculator/pattern.rb +153 -0
- data/example/calculator/ruby_transform.rb +156 -0
- data/example/calculator/ruby_transform.rb.md +32 -0
- data/example/calculator/serialized.rb +257 -0
- data/example/calculator/serialized.rb.md +32 -0
- data/example/calculator/transform.rb +153 -0
- data/example/calculator/zero_copy.rb +269 -0
- data/example/calculator/zero_copy.rb.md +36 -0
- data/example/capture/basic.rb +49 -0
- data/example/capture/basic.rb.md +106 -0
- data/example/capture/example.json +39 -0
- data/example/comments/basic.rb +35 -0
- data/example/comments/basic.rb.md +110 -0
- data/example/csv/ruby_transform.rb +148 -0
- data/example/csv/ruby_transform.rb.md +131 -0
- data/example/csv/serialized.rb +201 -0
- data/example/csv/serialized.rb.md +31 -0
- data/example/csv/zero_copy.rb +276 -0
- data/example/csv/zero_copy.rb.md +36 -0
- data/example/custom_atoms/indent_atom.rb +79 -0
- data/example/deepest-errors/basic.rb +131 -0
- data/example/deepest-errors/basic.rb.md +152 -0
- data/example/documentation/basic.rb +18 -0
- data/example/documentation/basic.rb.md +97 -0
- data/example/email/basic.rb +55 -0
- data/example/email/basic.rb.md +102 -0
- data/example/email/ruby_transform.rb +106 -0
- data/example/empty/basic.rb +13 -0
- data/example/empty/basic.rb.md +73 -0
- data/example/empty/example.json +38 -0
- data/example/erb/basic.rb +47 -0
- data/example/erb/basic.rb.md +103 -0
- data/example/erb/optimized.rb +42 -0
- data/example/error-reporting/basic.rb +132 -0
- data/example/error-reporting/basic.rb.md +122 -0
- data/example/expression-evaluator/basic.rb +284 -0
- data/example/expression-evaluator/basic.rb.md +138 -0
- data/example/ini/basic.rb +154 -0
- data/example/ini/basic.rb.md +129 -0
- data/example/ini/ruby_transform.rb +154 -0
- data/example/ip-address/basic.rb +125 -0
- data/example/ip-address/basic.rb.md +139 -0
- data/example/iso-6709/basic.rb +231 -0
- data/example/iso-6709/basic.rb.md +143 -0
- data/example/iso-8601/basic.rb +275 -0
- data/example/iso-8601/basic.rb.md +149 -0
- data/example/json/basic.rb +128 -0
- data/example/json/basic.rb.md +121 -0
- data/example/json/pattern.rb +128 -0
- data/example/json/ruby_transform.rb +200 -0
- data/example/json/ruby_transform.rb.md +32 -0
- data/example/json/serialized.rb +233 -0
- data/example/json/serialized.rb.md +31 -0
- data/example/json/transform.rb +128 -0
- data/example/json/zero_copy.rb +316 -0
- data/example/json/zero_copy.rb.md +36 -0
- data/example/local/basic.rb +34 -0
- data/example/local/basic.rb.md +91 -0
- data/example/local/example.json +38 -0
- data/example/markdown/basic.rb +287 -0
- data/example/markdown/basic.rb.md +160 -0
- data/example/markup/basic.rb +173 -0
- data/example/markup/basic.rb.md +118 -0
- data/example/mathn/basic.rb +47 -0
- data/example/mathn/basic.rb.md +96 -0
- data/example/mathn/example.json +39 -0
- data/example/minilisp/basic.rb +94 -0
- data/example/minilisp/basic.rb.md +133 -0
- data/example/modularity/basic.rb +47 -0
- data/example/modularity/basic.rb.md +152 -0
- data/example/nested-errors/basic.rb +132 -0
- data/example/nested-errors/basic.rb.md +157 -0
- data/example/output/boolean_algebra.out +4 -0
- data/example/output/calc.out +1 -0
- data/example/output/capture.out +3 -0
- data/example/output/comments.out +8 -0
- data/example/output/deepest_errors.out +54 -0
- data/example/output/documentation.err +4 -0
- data/example/output/documentation.out +1 -0
- data/example/output/email_parser.out +2 -0
- data/example/output/empty.err +1 -0
- data/example/output/erb.out +7 -0
- data/example/output/ignore.out +1 -0
- data/example/output/ignore_whitespace.out +1 -0
- data/example/output/ip_address.out +9 -0
- data/example/output/json.out +5 -0
- data/example/output/local.out +3 -0
- data/example/output/mathn.out +4 -0
- data/example/output/minilisp.out +5 -0
- data/example/output/modularity.out +0 -0
- data/example/output/nested_errors.out +54 -0
- data/example/output/optimized_erb.out +1 -0
- data/example/output/parens.out +8 -0
- data/example/output/prec_calc.out +5 -0
- data/example/output/readme.out +1 -0
- data/example/output/scopes.out +1 -0
- data/example/output/seasons.out +28 -0
- data/example/output/sentence.out +1 -0
- data/example/output/simple_xml.out +2 -0
- data/example/output/string_parser.out +3 -0
- data/example/prec-calc/basic.rb +71 -0
- data/example/prec-calc/basic.rb.md +114 -0
- data/example/readme/basic.rb +30 -0
- data/example/readme/basic.rb.md +80 -0
- data/example/scopes/basic.rb +15 -0
- data/example/scopes/basic.rb.md +73 -0
- data/example/scopes/example.json +38 -0
- data/example/seasons/basic.rb +46 -0
- data/example/seasons/basic.rb.md +117 -0
- data/example/seasons/example.json +40 -0
- data/example/sentence/basic.rb +36 -0
- data/example/sentence/basic.rb.md +81 -0
- data/example/sexp/ruby_transform.rb +180 -0
- data/example/sexp/ruby_transform.rb.md +143 -0
- data/example/simple-xml/basic.rb +54 -0
- data/example/simple-xml/basic.rb.md +125 -0
- data/example/simple.lit +3 -0
- data/example/string-literal/basic.rb +77 -0
- data/example/string-literal/basic.rb.md +128 -0
- data/example/test.lit +4 -0
- data/example/toml/basic.rb +226 -0
- data/example/toml/basic.rb.md +173 -0
- data/example/url/basic.rb +219 -0
- data/example/url/basic.rb.md +142 -0
- data/example/url/ruby_transform.rb +219 -0
- data/example/yaml/basic.rb +216 -0
- data/example/yaml/basic.rb.md +148 -0
- data/ext/parsanol_native/extconf.rb +4 -0
- data/lib/parsanol/accelerator/application.rb +62 -0
- data/lib/parsanol/accelerator/engine.rb +112 -0
- data/lib/parsanol/accelerator.rb +162 -0
- data/lib/parsanol/ast_visitor.rb +122 -0
- data/lib/parsanol/atoms/alternative.rb +97 -0
- data/lib/parsanol/atoms/base.rb +214 -0
- data/lib/parsanol/atoms/can_flatten.rb +192 -0
- data/lib/parsanol/atoms/capture.rb +41 -0
- data/lib/parsanol/atoms/context.rb +351 -0
- data/lib/parsanol/atoms/context_optimized.rb +42 -0
- data/lib/parsanol/atoms/custom.rb +110 -0
- data/lib/parsanol/atoms/cut.rb +62 -0
- data/lib/parsanol/atoms/dsl.rb +130 -0
- data/lib/parsanol/atoms/dynamic.rb +33 -0
- data/lib/parsanol/atoms/entity.rb +55 -0
- data/lib/parsanol/atoms/ignored.rb +28 -0
- data/lib/parsanol/atoms/infix.rb +121 -0
- data/lib/parsanol/atoms/lookahead.rb +64 -0
- data/lib/parsanol/atoms/named.rb +50 -0
- data/lib/parsanol/atoms/re.rb +61 -0
- data/lib/parsanol/atoms/repetition.rb +241 -0
- data/lib/parsanol/atoms/scope.rb +28 -0
- data/lib/parsanol/atoms/sequence.rb +157 -0
- data/lib/parsanol/atoms/str.rb +90 -0
- data/lib/parsanol/atoms/visitor.rb +91 -0
- data/lib/parsanol/atoms.rb +36 -0
- data/lib/parsanol/buffer.rb +130 -0
- data/lib/parsanol/builder_callbacks.rb +353 -0
- data/lib/parsanol/cause.rb +101 -0
- data/lib/parsanol/context.rb +23 -0
- data/lib/parsanol/convenience.rb +35 -0
- data/lib/parsanol/edit_tracker.rb +107 -0
- data/lib/parsanol/error_reporter/contextual.rb +122 -0
- data/lib/parsanol/error_reporter/deepest.rb +106 -0
- data/lib/parsanol/error_reporter/tree.rb +68 -0
- data/lib/parsanol/error_reporter.rb +98 -0
- data/lib/parsanol/export.rb +163 -0
- data/lib/parsanol/expression/treetop.rb +94 -0
- data/lib/parsanol/expression.rb +51 -0
- data/lib/parsanol/fast_mode.rb +145 -0
- data/lib/parsanol/first_set.rb +75 -0
- data/lib/parsanol/grammar_builder.rb +177 -0
- data/lib/parsanol/graphviz.rb +97 -0
- data/lib/parsanol/incremental_parser.rb +179 -0
- data/lib/parsanol/interval_tree.rb +215 -0
- data/lib/parsanol/lazy_result.rb +178 -0
- data/lib/parsanol/lexer.rb +146 -0
- data/lib/parsanol/native/parser.rb +630 -0
- data/lib/parsanol/native/serializer.rb +245 -0
- data/lib/parsanol/native/transformer.rb +438 -0
- data/lib/parsanol/native/types.rb +41 -0
- data/lib/parsanol/native.rb +217 -0
- data/lib/parsanol/optimizer.rb +86 -0
- data/lib/parsanol/optimizers/choice_optimizer.rb +78 -0
- data/lib/parsanol/optimizers/cut_inserter.rb +175 -0
- data/lib/parsanol/optimizers/lookahead_optimizer.rb +58 -0
- data/lib/parsanol/optimizers/quantifier_optimizer.rb +62 -0
- data/lib/parsanol/optimizers/sequence_optimizer.rb +97 -0
- data/lib/parsanol/options/ruby_transform.rb +109 -0
- data/lib/parsanol/options/serialized.rb +94 -0
- data/lib/parsanol/options/zero_copy.rb +130 -0
- data/lib/parsanol/options.rb +20 -0
- data/lib/parsanol/parallel.rb +133 -0
- data/lib/parsanol/parsanol_native.bundle +0 -0
- data/lib/parsanol/parser.rb +151 -0
- data/lib/parsanol/parslet.rb +148 -0
- data/lib/parsanol/parslet_native.bundle +0 -0
- data/lib/parsanol/pattern/binding.rb +49 -0
- data/lib/parsanol/pattern.rb +115 -0
- data/lib/parsanol/pool.rb +220 -0
- data/lib/parsanol/pools/array_pool.rb +75 -0
- data/lib/parsanol/pools/buffer_pool.rb +173 -0
- data/lib/parsanol/pools/position_pool.rb +92 -0
- data/lib/parsanol/pools/slice_pool.rb +64 -0
- data/lib/parsanol/position.rb +89 -0
- data/lib/parsanol/result.rb +44 -0
- data/lib/parsanol/result_builder.rb +208 -0
- data/lib/parsanol/result_stream.rb +262 -0
- data/lib/parsanol/rig/rspec.rb +52 -0
- data/lib/parsanol/rope.rb +78 -0
- data/lib/parsanol/scope.rb +42 -0
- data/lib/parsanol/slice.rb +172 -0
- data/lib/parsanol/source/line_cache.rb +99 -0
- data/lib/parsanol/source.rb +171 -0
- data/lib/parsanol/source_location.rb +164 -0
- data/lib/parsanol/streaming_parser.rb +124 -0
- data/lib/parsanol/string_view.rb +192 -0
- data/lib/parsanol/transform.rb +267 -0
- data/lib/parsanol/version.rb +5 -0
- data/lib/parsanol/wasm/README.md +80 -0
- data/lib/parsanol/wasm/package.json +51 -0
- data/lib/parsanol/wasm/parsanol.js +252 -0
- data/lib/parsanol/wasm/parslet.d.ts +129 -0
- data/lib/parsanol/wasm_parser.rb +239 -0
- data/lib/parsanol.rb +408 -0
- data/parsanol-ruby.gemspec +56 -0
- data/spec/acceptance/examples_spec.rb +96 -0
- data/spec/acceptance/infix_parser_spec.rb +145 -0
- data/spec/acceptance/mixing_parsers_spec.rb +74 -0
- data/spec/acceptance/regression_spec.rb +329 -0
- data/spec/acceptance/repetition_and_maybe_spec.rb +44 -0
- data/spec/acceptance/unconsumed_input_spec.rb +21 -0
- data/spec/benchmark/comparative/runner_spec.rb +105 -0
- data/spec/integration/array_pooling_spec.rb +193 -0
- data/spec/integration/buffer_allocation_spec.rb +324 -0
- data/spec/integration/position_pooling_spec.rb +184 -0
- data/spec/integration/result_builder_spec.rb +282 -0
- data/spec/integration/rope_stringview_integration_spec.rb +188 -0
- data/spec/integration/slice_pooling_spec.rb +63 -0
- data/spec/integration/string_view_integration_spec.rb +125 -0
- data/spec/lexer_spec.rb +231 -0
- data/spec/parsanol/atom_results_spec.rb +39 -0
- data/spec/parsanol/atoms/alternative_spec.rb +26 -0
- data/spec/parsanol/atoms/base_spec.rb +127 -0
- data/spec/parsanol/atoms/capture_spec.rb +21 -0
- data/spec/parsanol/atoms/combinations_spec.rb +5 -0
- data/spec/parsanol/atoms/custom_spec.rb +79 -0
- data/spec/parsanol/atoms/dsl_spec.rb +7 -0
- data/spec/parsanol/atoms/entity_spec.rb +77 -0
- data/spec/parsanol/atoms/ignored_spec.rb +15 -0
- data/spec/parsanol/atoms/infix_spec.rb +5 -0
- data/spec/parsanol/atoms/lookahead_spec.rb +22 -0
- data/spec/parsanol/atoms/named_spec.rb +4 -0
- data/spec/parsanol/atoms/re_spec.rb +14 -0
- data/spec/parsanol/atoms/repetition_spec.rb +24 -0
- data/spec/parsanol/atoms/scope_spec.rb +26 -0
- data/spec/parsanol/atoms/sequence_spec.rb +28 -0
- data/spec/parsanol/atoms/str_spec.rb +15 -0
- data/spec/parsanol/atoms/visitor_spec.rb +101 -0
- data/spec/parsanol/atoms_spec.rb +488 -0
- data/spec/parsanol/auto_optimize_spec.rb +334 -0
- data/spec/parsanol/buffer_spec.rb +219 -0
- data/spec/parsanol/builder_callbacks_spec.rb +377 -0
- data/spec/parsanol/choice_optimizer_spec.rb +231 -0
- data/spec/parsanol/convenience_spec.rb +54 -0
- data/spec/parsanol/cut_inserter_spec.rb +248 -0
- data/spec/parsanol/cut_spec.rb +66 -0
- data/spec/parsanol/edit_tracker_spec.rb +218 -0
- data/spec/parsanol/error_reporter/contextual_spec.rb +122 -0
- data/spec/parsanol/error_reporter/deepest_spec.rb +82 -0
- data/spec/parsanol/error_reporter/tree_spec.rb +7 -0
- data/spec/parsanol/export_spec.rb +67 -0
- data/spec/parsanol/expression/treetop_spec.rb +75 -0
- data/spec/parsanol/first_set_spec.rb +298 -0
- data/spec/parsanol/interval_tree_spec.rb +205 -0
- data/spec/parsanol/lazy_result_spec.rb +288 -0
- data/spec/parsanol/lookahead_optimizer_spec.rb +252 -0
- data/spec/parsanol/minilisp.citrus +29 -0
- data/spec/parsanol/minilisp.tt +29 -0
- data/spec/parsanol/optimizer_spec.rb +459 -0
- data/spec/parsanol/options/parslet_compat_spec.rb +166 -0
- data/spec/parsanol/options/ruby_transform_spec.rb +70 -0
- data/spec/parsanol/options/serialized_spec.rb +69 -0
- data/spec/parsanol/options/zero_copy_spec.rb +230 -0
- data/spec/parsanol/parser_spec.rb +36 -0
- data/spec/parsanol/parslet_spec.rb +38 -0
- data/spec/parsanol/pattern_spec.rb +272 -0
- data/spec/parsanol/pool_spec.rb +392 -0
- data/spec/parsanol/pools/array_pool_spec.rb +356 -0
- data/spec/parsanol/pools/buffer_pool_spec.rb +365 -0
- data/spec/parsanol/pools/position_pool_spec.rb +118 -0
- data/spec/parsanol/pools/slice_pool_spec.rb +262 -0
- data/spec/parsanol/position_spec.rb +14 -0
- data/spec/parsanol/result_builder_spec.rb +391 -0
- data/spec/parsanol/rig/rspec_spec.rb +54 -0
- data/spec/parsanol/rope_spec.rb +207 -0
- data/spec/parsanol/scope_spec.rb +45 -0
- data/spec/parsanol/slice_spec.rb +249 -0
- data/spec/parsanol/source/line_cache_spec.rb +74 -0
- data/spec/parsanol/source_spec.rb +207 -0
- data/spec/parsanol/string_view_spec.rb +345 -0
- data/spec/parsanol/transform/context_spec.rb +56 -0
- data/spec/parsanol/transform_spec.rb +183 -0
- data/spec/parsanol/tree_memoization_spec.rb +149 -0
- data/spec/parslet_compatibility/expressir_edge_cases_spec.rb +153 -0
- data/spec/parslet_compatibility/minimal_reproduction.rb +199 -0
- data/spec/parslet_compatibility_spec.rb +399 -0
- data/spec/parslet_imported/atom_spec.rb +93 -0
- data/spec/parslet_imported/combinator_spec.rb +161 -0
- data/spec/parslet_imported/spec_helper.rb +73 -0
- data/spec/performance/batch_parsing_benchmark.rb +129 -0
- data/spec/performance/complete_optimization_summary.rb +143 -0
- data/spec/performance/grammar_caching_analysis.rb +121 -0
- data/spec/performance/grammar_caching_benchmark.rb +80 -0
- data/spec/performance/native_benchmark_spec.rb +230 -0
- data/spec/performance/phase5_benchmark.rb +144 -0
- data/spec/performance/profiling_benchmark.rb +131 -0
- data/spec/performance/ruby_improvements_benchmark.rb +171 -0
- data/spec/performance_spec.rb +374 -0
- data/spec/spec_helper.rb +79 -0
- data/spec/support/opal.rb +8 -0
- data/spec/support/opal.rb.erb +14 -0
- metadata +485 -0
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../ast_visitor'
|
|
4
|
+
|
|
5
|
+
module Parsanol
|
|
6
|
+
module Optimizers
|
|
7
|
+
# Optimizes repetition/quantifier patterns in the AST
|
|
8
|
+
# Follows visitor pattern for clean separation of concerns
|
|
9
|
+
#
|
|
10
|
+
# Transformations:
|
|
11
|
+
# - repeat(1,1) => unwrap (identity transformation)
|
|
12
|
+
# - repeat(0,1).repeat(0,1) => repeat(0,1) (idempotent)
|
|
13
|
+
# - repeat(n,n).repeat(m,m) => repeat(n*m,n*m) (multiply exact counts)
|
|
14
|
+
class QuantifierOptimizer < ASTVisitor
|
|
15
|
+
# Visit a repetition node and apply quantifier optimizations
|
|
16
|
+
# @param parslet [Parsanol::Atoms::Repetition] repetition to optimize
|
|
17
|
+
# @return [Parsanol::Atoms::Base] optimized parslet
|
|
18
|
+
def visit_repetition(parslet)
|
|
19
|
+
# First optimize the child
|
|
20
|
+
inner = visit(parslet.parslet)
|
|
21
|
+
|
|
22
|
+
# Optimization 1: repeat(1,1) is identity - unwrap it
|
|
23
|
+
if parslet.min == 1 && parslet.max == 1
|
|
24
|
+
return inner
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Optimization 2: Nested repetitions
|
|
28
|
+
if inner.is_a?(Parsanol::Atoms::Repetition)
|
|
29
|
+
# repeat(0,1).repeat(0,1) => repeat(0,1) (idempotent)
|
|
30
|
+
if parslet.min == 0 && parslet.max == 1 &&
|
|
31
|
+
inner.min == 0 && inner.max == 1
|
|
32
|
+
return inner
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# repeat(n,n).repeat(m,m) => repeat(n*m,n*m) for exact counts
|
|
36
|
+
if parslet.min == parslet.max && inner.min == inner.max &&
|
|
37
|
+
parslet.max && inner.max
|
|
38
|
+
new_count = parslet.min * inner.min
|
|
39
|
+
return Parsanol::Atoms::Repetition.new(
|
|
40
|
+
inner.parslet,
|
|
41
|
+
new_count,
|
|
42
|
+
new_count,
|
|
43
|
+
parslet.instance_variable_get(:@tag)
|
|
44
|
+
)
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Return optimized repetition with simplified child
|
|
49
|
+
if inner.equal?(parslet.parslet)
|
|
50
|
+
parslet
|
|
51
|
+
else
|
|
52
|
+
Parsanol::Atoms::Repetition.new(
|
|
53
|
+
inner,
|
|
54
|
+
parslet.min,
|
|
55
|
+
parslet.max,
|
|
56
|
+
parslet.instance_variable_get(:@tag)
|
|
57
|
+
)
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../ast_visitor'
|
|
4
|
+
|
|
5
|
+
module Parsanol
|
|
6
|
+
module Optimizers
|
|
7
|
+
# Optimizes sequence patterns in the AST
|
|
8
|
+
# Follows visitor pattern for clean separation of concerns
|
|
9
|
+
#
|
|
10
|
+
# Transformations:
|
|
11
|
+
# - str('a') >> str('b') => str('ab') (merge adjacent strings)
|
|
12
|
+
# - (A >> B) >> C => A >> B >> C (flatten nested sequences)
|
|
13
|
+
# - Sequence(A) => A (unwrap single-element sequences)
|
|
14
|
+
class SequenceOptimizer < ASTVisitor
|
|
15
|
+
# Visit a sequence node and apply sequence optimizations
|
|
16
|
+
# @param parslet [Parsanol::Atoms::Sequence] sequence to optimize
|
|
17
|
+
# @return [Parsanol::Atoms::Base] optimized parslet
|
|
18
|
+
def visit_sequence(parslet)
|
|
19
|
+
# First optimize children recursively
|
|
20
|
+
new_parslets = parslet.parslets.map { |p| visit(p) }
|
|
21
|
+
|
|
22
|
+
# Optimization 1: Flatten nested sequences
|
|
23
|
+
flattened = flatten_sequences(new_parslets)
|
|
24
|
+
|
|
25
|
+
# Optimization 2: Merge adjacent string literals
|
|
26
|
+
merged = merge_adjacent_strings(flattened)
|
|
27
|
+
|
|
28
|
+
# Optimization 3: Unwrap single-element sequences
|
|
29
|
+
return merged[0] if merged.size == 1
|
|
30
|
+
|
|
31
|
+
# Return optimized sequence if changed
|
|
32
|
+
if merged != parslet.parslets
|
|
33
|
+
Parsanol::Atoms::Sequence.new(*merged)
|
|
34
|
+
else
|
|
35
|
+
parslet
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
private
|
|
40
|
+
|
|
41
|
+
# Flatten nested sequences into a single level
|
|
42
|
+
# @param parslets [Array<Parsanol::Atoms::Base>] array of parslets
|
|
43
|
+
# @return [Array<Parsanol::Atoms::Base>] flattened array
|
|
44
|
+
def flatten_sequences(parslets)
|
|
45
|
+
result = []
|
|
46
|
+
parslets.each do |p|
|
|
47
|
+
if p.is_a?(Parsanol::Atoms::Sequence)
|
|
48
|
+
result.concat(p.parslets)
|
|
49
|
+
else
|
|
50
|
+
result << p
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
result
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Merge adjacent Str atoms into single Str atoms
|
|
57
|
+
# @param parslets [Array<Parsanol::Atoms::Base>] array of parslets
|
|
58
|
+
# @return [Array<Parsanol::Atoms::Base>] array with merged strings
|
|
59
|
+
def merge_adjacent_strings(parslets)
|
|
60
|
+
return parslets if parslets.size < 2
|
|
61
|
+
|
|
62
|
+
result = []
|
|
63
|
+
i = 0
|
|
64
|
+
|
|
65
|
+
while i < parslets.size
|
|
66
|
+
current = parslets[i]
|
|
67
|
+
|
|
68
|
+
if current.is_a?(Parsanol::Atoms::Str)
|
|
69
|
+
# Look ahead for consecutive Str atoms using Rope for O(1) append
|
|
70
|
+
rope = Parsanol::Rope.new.append(current.str)
|
|
71
|
+
j = i + 1
|
|
72
|
+
|
|
73
|
+
while j < parslets.size && parslets[j].is_a?(Parsanol::Atoms::Str)
|
|
74
|
+
rope.append(parslets[j].str)
|
|
75
|
+
j += 1
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Create merged Str if we found consecutive strings
|
|
79
|
+
# O(n) join happens once at the end instead of O(n²) repeated concatenation
|
|
80
|
+
if j > i + 1
|
|
81
|
+
result << Parsanol::Atoms::Str.new(rope.to_s)
|
|
82
|
+
i = j
|
|
83
|
+
else
|
|
84
|
+
result << current
|
|
85
|
+
i += 1
|
|
86
|
+
end
|
|
87
|
+
else
|
|
88
|
+
result << current
|
|
89
|
+
i += 1
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
result
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
end
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Parsanol::RubyTransform - Ruby Transform Mode (Parslet-Compatible)
|
|
4
|
+
#
|
|
5
|
+
# This is the default parsing mode that provides maximum flexibility.
|
|
6
|
+
# - Parsing can use Rust (if available) or pure Ruby
|
|
7
|
+
# - Transformation happens in Ruby using Parslet-style Transform class
|
|
8
|
+
#
|
|
9
|
+
# Usage:
|
|
10
|
+
# class MyParser < Parsanol::Parser
|
|
11
|
+
# include Parsanol::RubyTransform
|
|
12
|
+
# rule(:number) { match('[0-9]').repeat(1).as(:int) }
|
|
13
|
+
# root(:number)
|
|
14
|
+
# end
|
|
15
|
+
#
|
|
16
|
+
# parser = MyParser.new
|
|
17
|
+
# tree = parser.parse("42") # Returns generic tree
|
|
18
|
+
# ast = transform.apply(tree) # Transform in Ruby
|
|
19
|
+
#
|
|
20
|
+
# To use Rust backend for parsing:
|
|
21
|
+
# class MyParser < Parsanol::Parser
|
|
22
|
+
# include Parsanol::RubyTransform
|
|
23
|
+
# parse_backend :rust # Will raise if native extension not available
|
|
24
|
+
# ...
|
|
25
|
+
# end
|
|
26
|
+
|
|
27
|
+
module Parsanol
|
|
28
|
+
module RubyTransform
|
|
29
|
+
def self.included(base)
|
|
30
|
+
base.extend(ClassMethods)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
module ClassMethods
|
|
34
|
+
# Get or set the parsing backend
|
|
35
|
+
# @param backend [Symbol] :ruby (default) or :rust
|
|
36
|
+
# @return [Symbol] Current backend setting
|
|
37
|
+
def parse_backend(backend = nil)
|
|
38
|
+
if backend
|
|
39
|
+
@parse_backend = backend
|
|
40
|
+
end
|
|
41
|
+
@parse_backend ||= :ruby
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# Setter for parsing backend
|
|
45
|
+
# @param backend [Symbol] :ruby or :rust
|
|
46
|
+
def parse_backend=(backend)
|
|
47
|
+
@parse_backend = backend
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Configure parsing to use Rust backend
|
|
51
|
+
# Raises LoadError if native extension not available
|
|
52
|
+
def use_rust_backend!
|
|
53
|
+
unless Parsanol::Native.available?
|
|
54
|
+
raise LoadError,
|
|
55
|
+
"Rust backend requested but native extension not available. " \
|
|
56
|
+
"Run `rake compile` to build the extension."
|
|
57
|
+
end
|
|
58
|
+
@parse_backend = :rust
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Configure parsing to use pure Ruby (default)
|
|
62
|
+
def use_ruby_backend!
|
|
63
|
+
@parse_backend = :ruby
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Parse input and return generic tree
|
|
68
|
+
#
|
|
69
|
+
# @param input [String] The input string to parse
|
|
70
|
+
# @param options [Hash] Parse options
|
|
71
|
+
# @option options [Boolean] :consume_all (true) Consume entire input
|
|
72
|
+
# @return [Hash, Array, String, Parsanol::Slice] Parse tree
|
|
73
|
+
# @raise [Parsanol::ParseFailed] If parsing fails
|
|
74
|
+
def parse(input, options = {})
|
|
75
|
+
if self.class.parse_backend == :rust && Parsanol::Native.available?
|
|
76
|
+
parse_with_rust(input, options)
|
|
77
|
+
else
|
|
78
|
+
parse_with_ruby(input, options)
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# Parse and apply transform in one step
|
|
83
|
+
#
|
|
84
|
+
# @param input [String] The input string to parse
|
|
85
|
+
# @param transform [Parsanol::Transform] Transform to apply
|
|
86
|
+
# @param options [Hash] Parse options
|
|
87
|
+
# @return [Object] Transformed result
|
|
88
|
+
def parse_with_transform(input, transform, options = {})
|
|
89
|
+
tree = parse(input, options)
|
|
90
|
+
transform.apply(tree)
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
private
|
|
94
|
+
|
|
95
|
+
# Parse using Rust native extension
|
|
96
|
+
def parse_with_rust(input, options = {})
|
|
97
|
+
consume_all = options.fetch(:consume_all, true)
|
|
98
|
+
|
|
99
|
+
# Use native parser with Parslet-compatible output
|
|
100
|
+
Parsanol::Native.parse_parslet_compatible(root, input)
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# Parse using pure Ruby
|
|
104
|
+
def parse_with_ruby(input, options = {})
|
|
105
|
+
# Call the root parslet's parse method directly
|
|
106
|
+
root.parse(input, options)
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
end
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'json'
|
|
4
|
+
|
|
5
|
+
# Parsanol::Serialized - Serialized Transform Mode (JSON Output)
|
|
6
|
+
#
|
|
7
|
+
# This mode provides cross-language compatibility through JSON serialization.
|
|
8
|
+
# - Parsing AND transformation happen in Rust for maximum performance
|
|
9
|
+
# - Output is a JSON string that can be deserialized to any format
|
|
10
|
+
# - REQUIRES native extension (will raise LoadError if not available)
|
|
11
|
+
#
|
|
12
|
+
# Usage:
|
|
13
|
+
# class MyParser < Parsanol::Parser
|
|
14
|
+
# include Parsanol::Serialized
|
|
15
|
+
# rule(:number) { match('[0-9]').repeat(1).as(:int) }
|
|
16
|
+
# root(:number)
|
|
17
|
+
# end
|
|
18
|
+
#
|
|
19
|
+
# parser = MyParser.new
|
|
20
|
+
# json = parser.parse_to_json("42") # Returns JSON string
|
|
21
|
+
# # => '{"int": "42"}'
|
|
22
|
+
#
|
|
23
|
+
# # With a deserializer class
|
|
24
|
+
# result = parser.parse_to_struct("42", MyDeserializer)
|
|
25
|
+
#
|
|
26
|
+
# Performance: Faster than RubyTransform because transform happens in Rust.
|
|
27
|
+
# Memory: Higher than ZeroCopy due to JSON serialization overhead.
|
|
28
|
+
|
|
29
|
+
module Parsanol
|
|
30
|
+
module Serialized
|
|
31
|
+
def self.included(base)
|
|
32
|
+
base.extend(ClassMethods)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
module ClassMethods
|
|
36
|
+
# Define output schema for transformation
|
|
37
|
+
# This is optional but helps with type checking
|
|
38
|
+
#
|
|
39
|
+
# @param schema [Hash] Schema definition
|
|
40
|
+
# @example
|
|
41
|
+
# output_schema(
|
|
42
|
+
# number: { type: :integer },
|
|
43
|
+
# binop: { type: :object, properties: [:left, :op, :right] }
|
|
44
|
+
# )
|
|
45
|
+
def output_schema(schema = nil)
|
|
46
|
+
@output_schema = schema if schema
|
|
47
|
+
@output_schema ||= {}
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Parse input and return JSON string
|
|
52
|
+
#
|
|
53
|
+
# @param input [String] The input string to parse
|
|
54
|
+
# @return [String] JSON string representing the parse result
|
|
55
|
+
# @raise [LoadError] If native extension not available
|
|
56
|
+
# @raise [Parsanol::ParseFailed] If parsing fails
|
|
57
|
+
def parse_to_json(input)
|
|
58
|
+
unless Parsanol::Native.available?
|
|
59
|
+
raise LoadError,
|
|
60
|
+
"Serialized mode requires native extension for JSON serialization. " \
|
|
61
|
+
"Run `rake compile` or use Parsanol::RubyTransform for Ruby-only parsing."
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
grammar_json = Parsanol::Native.serialize_grammar(root)
|
|
65
|
+
Parsanol::Native.parse_to_json(grammar_json, input)
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Parse input and deserialize to a Ruby object
|
|
69
|
+
#
|
|
70
|
+
# @param input [String] The input string to parse
|
|
71
|
+
# @param deserializer_class [Class] Class with .from_json method
|
|
72
|
+
# @return [Object] Deserialized object
|
|
73
|
+
# @raise [LoadError] If native extension not available
|
|
74
|
+
# @raise [Parsanol::ParseFailed] If parsing fails
|
|
75
|
+
def parse_to_struct(input, deserializer_class)
|
|
76
|
+
json = parse_to_json(input)
|
|
77
|
+
deserializer_class.from_json(json)
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# Parse input and return Ruby Hash (parsed JSON)
|
|
81
|
+
#
|
|
82
|
+
# @param input [String] The input string to parse
|
|
83
|
+
# @return [Hash, Array] Ruby object from JSON
|
|
84
|
+
# @raise [LoadError] If native extension not available
|
|
85
|
+
# @raise [Parsanol::ParseFailed] If parsing fails
|
|
86
|
+
def parse(input, options = {})
|
|
87
|
+
json = parse_to_json(input)
|
|
88
|
+
JSON.parse(json)
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# Alias for consistency with other modes
|
|
92
|
+
alias parse_to_hash parse
|
|
93
|
+
end
|
|
94
|
+
end
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Parsanol::ZeroCopy - Zero-Copy Transform Mode (Direct FFI Object Construction)
|
|
4
|
+
#
|
|
5
|
+
# This mode provides MAXIMUM PERFORMANCE through zero-copy FFI.
|
|
6
|
+
# - Rust directly constructs Ruby objects via rb_class_new, rb_ivar_set
|
|
7
|
+
# - No serialization overhead whatsoever
|
|
8
|
+
# - REQUIRES native extension AND type mapping definitions
|
|
9
|
+
#
|
|
10
|
+
# Usage:
|
|
11
|
+
# # Define Ruby classes that mirror Rust types
|
|
12
|
+
# module Calculator
|
|
13
|
+
# class Number < Expr
|
|
14
|
+
# attr_reader :value
|
|
15
|
+
# def initialize(value); @value = value; end
|
|
16
|
+
# def eval = @value
|
|
17
|
+
# end
|
|
18
|
+
#
|
|
19
|
+
# class BinOp < Expr
|
|
20
|
+
# attr_reader :left, :op, :right
|
|
21
|
+
# def eval; ...; end
|
|
22
|
+
# end
|
|
23
|
+
# end
|
|
24
|
+
#
|
|
25
|
+
# class CalculatorParser < Parsanol::Parser
|
|
26
|
+
# include Parsanol::ZeroCopy
|
|
27
|
+
#
|
|
28
|
+
# rule(:number) { ... }
|
|
29
|
+
# root(:expression)
|
|
30
|
+
#
|
|
31
|
+
# # Type mapping (tells Rust which Ruby classes to construct)
|
|
32
|
+
# output_types(
|
|
33
|
+
# number: Calculator::Number,
|
|
34
|
+
# binop: Calculator::BinOp
|
|
35
|
+
# )
|
|
36
|
+
# end
|
|
37
|
+
#
|
|
38
|
+
# parser = CalculatorParser.new
|
|
39
|
+
# expr = parser.parse("42+8") # Returns Calculator::Number or BinOp DIRECTLY
|
|
40
|
+
# puts expr.eval # No transform needed!
|
|
41
|
+
#
|
|
42
|
+
# Performance: FASTEST mode (18-44x faster than pure Ruby)
|
|
43
|
+
# Memory: Lowest overhead (zero-copy, no serialization)
|
|
44
|
+
|
|
45
|
+
module Parsanol
|
|
46
|
+
module ZeroCopy
|
|
47
|
+
def self.included(base)
|
|
48
|
+
base.extend(ClassMethods)
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
module ClassMethods
|
|
52
|
+
# Define output type mapping for zero-copy construction
|
|
53
|
+
#
|
|
54
|
+
# This tells the Rust parser which Ruby classes to instantiate
|
|
55
|
+
# for each named capture in the grammar.
|
|
56
|
+
#
|
|
57
|
+
# @param types [Hash] Mapping of rule names to Ruby classes
|
|
58
|
+
# @example
|
|
59
|
+
# output_types(
|
|
60
|
+
# number: Calculator::Number,
|
|
61
|
+
# binop: Calculator::BinOp,
|
|
62
|
+
# expr: Calculator::Expr
|
|
63
|
+
# )
|
|
64
|
+
def output_types(types = nil)
|
|
65
|
+
@output_types = types if types
|
|
66
|
+
@output_types ||= {}
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Define a single output type mapping
|
|
70
|
+
#
|
|
71
|
+
# @param rule_name [Symbol, String] Name of the rule
|
|
72
|
+
# @param ruby_class [Class] Ruby class to instantiate
|
|
73
|
+
# @example
|
|
74
|
+
# output_type :number, Calculator::Number
|
|
75
|
+
def output_type(rule_name, ruby_class)
|
|
76
|
+
output_types[rule_name.to_sym] = ruby_class
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Get output types as a hash suitable for FFI
|
|
80
|
+
#
|
|
81
|
+
# @return [Hash] String keys, class names as values
|
|
82
|
+
def output_types_for_ffi
|
|
83
|
+
output_types.transform_keys(&:to_s).transform_values do |klass|
|
|
84
|
+
klass.is_a?(Class) ? klass.name : klass.to_s
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Parse input and return direct Ruby objects (no serialization)
|
|
90
|
+
#
|
|
91
|
+
# @param input [String] The input string to parse
|
|
92
|
+
# @param options [Hash] Parse options (ignored for zero-copy)
|
|
93
|
+
# @return [Object] Direct Ruby object (type depends on grammar)
|
|
94
|
+
# @raise [LoadError] If native extension not available
|
|
95
|
+
# @raise [Parsanol::ParseFailed] If parsing fails
|
|
96
|
+
def parse(input, options = {})
|
|
97
|
+
unless Parsanol::Native.available?
|
|
98
|
+
raise LoadError,
|
|
99
|
+
"ZeroCopy mode requires native extension for direct FFI object construction. " \
|
|
100
|
+
"Run `rake compile` to build the extension, or use " \
|
|
101
|
+
"Parsanol::RubyTransform for Ruby-only parsing."
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
grammar_json = Parsanol::Native.serialize_grammar(root)
|
|
105
|
+
type_map = self.class.output_types_for_ffi
|
|
106
|
+
|
|
107
|
+
if type_map.empty?
|
|
108
|
+
raise ArgumentError,
|
|
109
|
+
"ZeroCopy mode requires output_types to be defined. " \
|
|
110
|
+
"Add `output_types(number: MyNumberClass)` to your parser class."
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
Parsanol::Native.parse_to_objects(grammar_json, input, type_map)
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
# Parse with explicit type map override
|
|
117
|
+
#
|
|
118
|
+
# @param input [String] The input string to parse
|
|
119
|
+
# @param type_map [Hash] Override type mapping for this parse
|
|
120
|
+
# @return [Object] Direct Ruby object
|
|
121
|
+
def parse_with_types(input, type_map)
|
|
122
|
+
unless Parsanol::Native.available?
|
|
123
|
+
raise LoadError, "ZeroCopy mode requires native extension."
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
grammar_json = Parsanol::Native.serialize_grammar(root)
|
|
127
|
+
Parsanol::Native.parse_to_objects(grammar_json, input, type_map)
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
end
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Parsanol Transform Mode Options
|
|
4
|
+
#
|
|
5
|
+
# This module provides three transformation modes for parsing:
|
|
6
|
+
#
|
|
7
|
+
# 1. RubyTransform - Parse in Rust/Ruby, Transform in Ruby (default, most flexible)
|
|
8
|
+
# 2. Serialized - Parse + Transform in Rust, JSON output (requires native extension)
|
|
9
|
+
# 3. ZeroCopy - Direct FFI object construction (requires native extension, fastest)
|
|
10
|
+
#
|
|
11
|
+
# Usage:
|
|
12
|
+
# class MyParser < Parsanol::Parser
|
|
13
|
+
# include Parsanol::RubyTransform # or Serialized, or ZeroCopy
|
|
14
|
+
# rule(:number) { match('[0-9]').repeat(1).as(:int) }
|
|
15
|
+
# root(:number)
|
|
16
|
+
# end
|
|
17
|
+
|
|
18
|
+
require 'parsanol/options/ruby_transform'
|
|
19
|
+
require 'parsanol/options/serialized'
|
|
20
|
+
require 'parsanol/options/zero_copy'
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Parsanol
|
|
4
|
+
# Parallel parsing support for batch processing multiple inputs.
|
|
5
|
+
# Uses rayon for linear speedup on multi-core systems.
|
|
6
|
+
#
|
|
7
|
+
# @example Parse multiple files in parallel
|
|
8
|
+
# grammar = MyParser.new.serialize_grammar
|
|
9
|
+
# inputs = Dir.glob("*.json").map { |f| File.read(f) }
|
|
10
|
+
#
|
|
11
|
+
# results = Parsanol::Parallel.parse_batch(grammar, inputs)
|
|
12
|
+
# results.each_with_index do |result, i|
|
|
13
|
+
# case result
|
|
14
|
+
# when Hash then puts "File #{i} parsed: #{result}"
|
|
15
|
+
# when Parsanol::ParseFailed then puts "File #{i} failed: #{result.message}"
|
|
16
|
+
# end
|
|
17
|
+
# end
|
|
18
|
+
#
|
|
19
|
+
module Parallel
|
|
20
|
+
# Configuration for parallel parsing.
|
|
21
|
+
#
|
|
22
|
+
# @example Configure with 8 threads
|
|
23
|
+
# config = Parsanol::Parallel::Config.new
|
|
24
|
+
# .with_num_threads(8)
|
|
25
|
+
# .with_min_chunk_size(50)
|
|
26
|
+
#
|
|
27
|
+
# results = Parsanol::Parallel.parse_batch(grammar, inputs, config: config)
|
|
28
|
+
#
|
|
29
|
+
class Config
|
|
30
|
+
# @return [Integer, nil] Number of threads (nil = auto-detect based on CPU cores)
|
|
31
|
+
attr_accessor :num_threads
|
|
32
|
+
|
|
33
|
+
# @return [Integer] Minimum inputs per thread (default: 10)
|
|
34
|
+
attr_accessor :min_chunk_size
|
|
35
|
+
|
|
36
|
+
def initialize
|
|
37
|
+
@num_threads = nil # Auto-detect
|
|
38
|
+
@min_chunk_size = 10
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Set the number of threads to use.
|
|
42
|
+
#
|
|
43
|
+
# @param n [Integer] Number of threads
|
|
44
|
+
# @return [Config] self for chaining
|
|
45
|
+
def with_num_threads(n)
|
|
46
|
+
@num_threads = n
|
|
47
|
+
self
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Set the minimum chunk size per thread.
|
|
51
|
+
#
|
|
52
|
+
# @param size [Integer] Minimum inputs per thread
|
|
53
|
+
# @return [Config] self for chaining
|
|
54
|
+
def with_min_chunk_size(size)
|
|
55
|
+
@min_chunk_size = size
|
|
56
|
+
self
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
class << self
|
|
61
|
+
# Parse multiple inputs in parallel.
|
|
62
|
+
#
|
|
63
|
+
# When the native extension with parallel feature is available,
|
|
64
|
+
# this uses rayon for parallel execution. Otherwise, falls back
|
|
65
|
+
# to sequential parsing.
|
|
66
|
+
#
|
|
67
|
+
# @param grammar_json [String] JSON-serialized grammar
|
|
68
|
+
# @param inputs [Array<String>] Array of input strings to parse
|
|
69
|
+
# @param config [Config] Parallel configuration (optional)
|
|
70
|
+
# @return [Array<Object>] Array of parse results in same order as inputs
|
|
71
|
+
#
|
|
72
|
+
# @example Basic usage
|
|
73
|
+
# results = Parsanol::Parallel.parse_batch(grammar, inputs)
|
|
74
|
+
#
|
|
75
|
+
# @example With configuration
|
|
76
|
+
# config = Parsanol::Parallel::Config.new.with_num_threads(4)
|
|
77
|
+
# results = Parsanol::Parallel.parse_batch(grammar, inputs, config: config)
|
|
78
|
+
#
|
|
79
|
+
def parse_batch(grammar_json, inputs, config: Config.new)
|
|
80
|
+
unless Parsanol::Native.available?
|
|
81
|
+
raise LoadError,
|
|
82
|
+
"Parallel parsing requires native extension. " \
|
|
83
|
+
"Run `rake compile` to build the extension."
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Try to use native parallel parsing
|
|
87
|
+
if respond_to?(:_parse_batch_parallel, true)
|
|
88
|
+
Parsanol::Native.parse_batch_parallel(
|
|
89
|
+
grammar_json,
|
|
90
|
+
inputs,
|
|
91
|
+
num_threads: config.num_threads
|
|
92
|
+
)
|
|
93
|
+
else
|
|
94
|
+
# Fallback to sequential if parallel not available
|
|
95
|
+
inputs.map { |input| Parsanol::Native.parse(grammar_json, input) }
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# Parse multiple inputs in parallel with transformation.
|
|
100
|
+
#
|
|
101
|
+
# @param grammar_json [String] JSON-serialized grammar
|
|
102
|
+
# @param inputs [Array<String>] Array of input strings to parse
|
|
103
|
+
# @param transform [Parsanol::Transform] Transform to apply to each result
|
|
104
|
+
# @param config [Config] Parallel configuration (optional)
|
|
105
|
+
# @return [Array<Object>] Array of transformed results
|
|
106
|
+
#
|
|
107
|
+
def parse_batch_with_transform(grammar_json, inputs, transform, config: Config.new)
|
|
108
|
+
results = parse_batch(grammar_json, inputs, config: config)
|
|
109
|
+
results.map { |result| transform.apply(result) }
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# Get the number of available CPU cores for parallel processing.
|
|
113
|
+
#
|
|
114
|
+
# @return [Integer] Number of available cores
|
|
115
|
+
def available_cores
|
|
116
|
+
require 'etc'
|
|
117
|
+
Etc.nprocessors
|
|
118
|
+
rescue StandardError
|
|
119
|
+
1
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# Estimate optimal number of threads for a given input size.
|
|
123
|
+
#
|
|
124
|
+
# @param input_count [Integer] Number of inputs to process
|
|
125
|
+
# @return [Integer] Recommended number of threads
|
|
126
|
+
def optimal_threads(input_count)
|
|
127
|
+
cores = available_cores
|
|
128
|
+
# Don't use more threads than inputs
|
|
129
|
+
[cores, input_count].min
|
|
130
|
+
end
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
end
|
|
Binary file
|