plurimath-parslet 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/HISTORY.txt +284 -0
- data/LICENSE +23 -0
- data/README.adoc +454 -0
- data/Rakefile +71 -0
- data/lib/parslet/accelerator/application.rb +62 -0
- data/lib/parslet/accelerator/engine.rb +112 -0
- data/lib/parslet/accelerator.rb +162 -0
- data/lib/parslet/atoms/alternative.rb +53 -0
- data/lib/parslet/atoms/base.rb +157 -0
- data/lib/parslet/atoms/can_flatten.rb +137 -0
- data/lib/parslet/atoms/capture.rb +38 -0
- data/lib/parslet/atoms/context.rb +103 -0
- data/lib/parslet/atoms/dsl.rb +112 -0
- data/lib/parslet/atoms/dynamic.rb +32 -0
- data/lib/parslet/atoms/entity.rb +45 -0
- data/lib/parslet/atoms/ignored.rb +26 -0
- data/lib/parslet/atoms/infix.rb +115 -0
- data/lib/parslet/atoms/lookahead.rb +52 -0
- data/lib/parslet/atoms/named.rb +32 -0
- data/lib/parslet/atoms/re.rb +41 -0
- data/lib/parslet/atoms/repetition.rb +87 -0
- data/lib/parslet/atoms/scope.rb +26 -0
- data/lib/parslet/atoms/sequence.rb +48 -0
- data/lib/parslet/atoms/str.rb +42 -0
- data/lib/parslet/atoms/visitor.rb +89 -0
- data/lib/parslet/atoms.rb +34 -0
- data/lib/parslet/cause.rb +101 -0
- data/lib/parslet/context.rb +21 -0
- data/lib/parslet/convenience.rb +33 -0
- data/lib/parslet/error_reporter/contextual.rb +120 -0
- data/lib/parslet/error_reporter/deepest.rb +100 -0
- data/lib/parslet/error_reporter/tree.rb +63 -0
- data/lib/parslet/error_reporter.rb +8 -0
- data/lib/parslet/export.rb +163 -0
- data/lib/parslet/expression/treetop.rb +92 -0
- data/lib/parslet/expression.rb +51 -0
- data/lib/parslet/graphviz.rb +97 -0
- data/lib/parslet/parser.rb +68 -0
- data/lib/parslet/pattern/binding.rb +49 -0
- data/lib/parslet/pattern.rb +113 -0
- data/lib/parslet/position.rb +21 -0
- data/lib/parslet/rig/rspec.rb +52 -0
- data/lib/parslet/scope.rb +42 -0
- data/lib/parslet/slice.rb +105 -0
- data/lib/parslet/source/line_cache.rb +99 -0
- data/lib/parslet/source.rb +96 -0
- data/lib/parslet/transform.rb +265 -0
- data/lib/parslet/version.rb +5 -0
- data/lib/parslet.rb +314 -0
- data/plurimath-parslet.gemspec +42 -0
- data/spec/acceptance/infix_parser_spec.rb +145 -0
- data/spec/acceptance/mixing_parsers_spec.rb +74 -0
- data/spec/acceptance/regression_spec.rb +329 -0
- data/spec/acceptance/repetition_and_maybe_spec.rb +44 -0
- data/spec/acceptance/unconsumed_input_spec.rb +21 -0
- data/spec/examples/boolean_algebra_spec.rb +257 -0
- data/spec/examples/calc_spec.rb +278 -0
- data/spec/examples/capture_spec.rb +137 -0
- data/spec/examples/comments_spec.rb +186 -0
- data/spec/examples/deepest_errors_spec.rb +420 -0
- data/spec/examples/documentation_spec.rb +205 -0
- data/spec/examples/email_parser_spec.rb +275 -0
- data/spec/examples/empty_spec.rb +37 -0
- data/spec/examples/erb_spec.rb +482 -0
- data/spec/examples/ip_address_spec.rb +153 -0
- data/spec/examples/json_spec.rb +413 -0
- data/spec/examples/local_spec.rb +302 -0
- data/spec/examples/mathn_spec.rb +151 -0
- data/spec/examples/minilisp_spec.rb +492 -0
- data/spec/examples/modularity_spec.rb +340 -0
- data/spec/examples/nested_errors_spec.rb +322 -0
- data/spec/examples/optimized_erb_spec.rb +299 -0
- data/spec/examples/parens_spec.rb +239 -0
- data/spec/examples/prec_calc_spec.rb +525 -0
- data/spec/examples/readme_spec.rb +228 -0
- data/spec/examples/scopes_spec.rb +187 -0
- data/spec/examples/seasons_spec.rb +196 -0
- data/spec/examples/sentence_spec.rb +119 -0
- data/spec/examples/simple_xml_spec.rb +250 -0
- data/spec/examples/string_parser_spec.rb +407 -0
- data/spec/fixtures/examples/boolean_algebra.rb +62 -0
- data/spec/fixtures/examples/calc.rb +86 -0
- data/spec/fixtures/examples/capture.rb +36 -0
- data/spec/fixtures/examples/comments.rb +22 -0
- data/spec/fixtures/examples/deepest_errors.rb +99 -0
- data/spec/fixtures/examples/documentation.rb +32 -0
- data/spec/fixtures/examples/email_parser.rb +42 -0
- data/spec/fixtures/examples/empty.rb +10 -0
- data/spec/fixtures/examples/erb.rb +39 -0
- data/spec/fixtures/examples/ip_address.rb +103 -0
- data/spec/fixtures/examples/json.rb +107 -0
- data/spec/fixtures/examples/local.rb +60 -0
- data/spec/fixtures/examples/mathn.rb +47 -0
- data/spec/fixtures/examples/minilisp.rb +75 -0
- data/spec/fixtures/examples/modularity.rb +60 -0
- data/spec/fixtures/examples/nested_errors.rb +95 -0
- data/spec/fixtures/examples/optimized_erb.rb +105 -0
- data/spec/fixtures/examples/parens.rb +25 -0
- data/spec/fixtures/examples/prec_calc.rb +71 -0
- data/spec/fixtures/examples/readme.rb +59 -0
- data/spec/fixtures/examples/scopes.rb +43 -0
- data/spec/fixtures/examples/seasons.rb +40 -0
- data/spec/fixtures/examples/sentence.rb +18 -0
- data/spec/fixtures/examples/simple_xml.rb +51 -0
- data/spec/fixtures/examples/string_parser.rb +77 -0
- data/spec/parslet/atom_results_spec.rb +39 -0
- data/spec/parslet/atoms/alternative_spec.rb +26 -0
- data/spec/parslet/atoms/base_spec.rb +127 -0
- data/spec/parslet/atoms/capture_spec.rb +21 -0
- data/spec/parslet/atoms/combinations_spec.rb +5 -0
- data/spec/parslet/atoms/dsl_spec.rb +7 -0
- data/spec/parslet/atoms/entity_spec.rb +77 -0
- data/spec/parslet/atoms/ignored_spec.rb +15 -0
- data/spec/parslet/atoms/infix_spec.rb +5 -0
- data/spec/parslet/atoms/lookahead_spec.rb +22 -0
- data/spec/parslet/atoms/named_spec.rb +4 -0
- data/spec/parslet/atoms/re_spec.rb +14 -0
- data/spec/parslet/atoms/repetition_spec.rb +24 -0
- data/spec/parslet/atoms/scope_spec.rb +26 -0
- data/spec/parslet/atoms/sequence_spec.rb +28 -0
- data/spec/parslet/atoms/str_spec.rb +15 -0
- data/spec/parslet/atoms/visitor_spec.rb +101 -0
- data/spec/parslet/atoms_spec.rb +488 -0
- data/spec/parslet/convenience_spec.rb +54 -0
- data/spec/parslet/error_reporter/contextual_spec.rb +118 -0
- data/spec/parslet/error_reporter/deepest_spec.rb +82 -0
- data/spec/parslet/error_reporter/tree_spec.rb +7 -0
- data/spec/parslet/export_spec.rb +40 -0
- data/spec/parslet/expression/treetop_spec.rb +74 -0
- data/spec/parslet/minilisp.citrus +29 -0
- data/spec/parslet/minilisp.tt +29 -0
- data/spec/parslet/parser_spec.rb +36 -0
- data/spec/parslet/parslet_spec.rb +38 -0
- data/spec/parslet/pattern_spec.rb +272 -0
- data/spec/parslet/position_spec.rb +14 -0
- data/spec/parslet/rig/rspec_spec.rb +54 -0
- data/spec/parslet/scope_spec.rb +45 -0
- data/spec/parslet/slice_spec.rb +186 -0
- data/spec/parslet/source/line_cache_spec.rb +74 -0
- data/spec/parslet/source_spec.rb +210 -0
- data/spec/parslet/transform/context_spec.rb +56 -0
- data/spec/parslet/transform_spec.rb +183 -0
- data/spec/spec_helper.rb +74 -0
- data/spec/support/opal.rb +8 -0
- data/spec/support/opal.rb.erb +14 -0
- data/spec/support/parslet_matchers.rb +96 -0
- metadata +240 -0
@@ -0,0 +1,162 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
# Optimizes the parsers by pattern matching on the parser atoms and replacing
|
4
|
+
# matches with better versions. See the file qed/accelerators.md for a more
|
5
|
+
# in-depth description.
|
6
|
+
#
|
7
|
+
# Example:
|
8
|
+
# quote = str('"')
|
9
|
+
# parser = quote >> (quote.absent? >> any).repeat >> quote
|
10
|
+
#
|
11
|
+
# A = Accelerator # for making what follows a bit shorter
|
12
|
+
# optimized_parser = A.apply(parser,
|
13
|
+
# A.rule( (A.str(:x).absent? >> A.any).repeat ) { GobbleUp.new(x) })
|
14
|
+
#
|
15
|
+
# optimized_parser.parse('"Parsing is now fully optimized! (tm)"')
|
16
|
+
#
|
17
|
+
module Parslet::Accelerator
|
18
|
+
|
19
|
+
# An expression to match against a tree of parser atoms. Normally, an
|
20
|
+
# expression is produced by Parslet::Accelerator.any,
|
21
|
+
# Parslet::Accelerator.str or Parslet::Accelerator.re.
|
22
|
+
#
|
23
|
+
# Expressions can be chained much like parslet atoms can be:
|
24
|
+
#
|
25
|
+
# expr.repeat(1) # matching repetition
|
26
|
+
# expr.absent? # matching absent?
|
27
|
+
# expr.present? # matching present?
|
28
|
+
# expr1 >> expr2 # matching a sequence
|
29
|
+
# expr1 | expr2 # matching an alternation
|
30
|
+
#
|
31
|
+
# @see Parslet::Accelerator.str
|
32
|
+
# @see Parslet::Accelerator.re
|
33
|
+
# @see Parslet::Accelerator.any
|
34
|
+
#
|
35
|
+
# @see Parslet::Accelerator
|
36
|
+
#
|
37
|
+
class Expression
|
38
|
+
attr_reader :type
|
39
|
+
attr_reader :args
|
40
|
+
|
41
|
+
def initialize(type, *args)
|
42
|
+
@type = type
|
43
|
+
@args = args
|
44
|
+
end
|
45
|
+
|
46
|
+
# @return [Expression]
|
47
|
+
def >> other_expr
|
48
|
+
join_or_new :seq, other_expr
|
49
|
+
end
|
50
|
+
|
51
|
+
# @return [Expression]
|
52
|
+
def | other_expr
|
53
|
+
join_or_new :alt, other_expr
|
54
|
+
end
|
55
|
+
|
56
|
+
# @return [Expression]
|
57
|
+
def absent?
|
58
|
+
Expression.new(:absent, self)
|
59
|
+
end
|
60
|
+
# @return [Expression]
|
61
|
+
def present?
|
62
|
+
Expression.new(:present, self)
|
63
|
+
end
|
64
|
+
|
65
|
+
# @return [Expression]
|
66
|
+
def repeat min=0, max=nil
|
67
|
+
Expression.new(:rep, min, max, self)
|
68
|
+
end
|
69
|
+
|
70
|
+
# @return [Expression]
|
71
|
+
def as name
|
72
|
+
Expression.new(:as, name)
|
73
|
+
end
|
74
|
+
|
75
|
+
# @api private
|
76
|
+
# @return [Expression]
|
77
|
+
def join_or_new tag, other_expr
|
78
|
+
if type == tag
|
79
|
+
@args << other_expr
|
80
|
+
self
|
81
|
+
else
|
82
|
+
Expression.new(tag, self, other_expr)
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
module_function
|
88
|
+
# Returns a match expression that will match `str` parslet atoms.
|
89
|
+
#
|
90
|
+
# @return [Parslet::Accelerator::Expression]
|
91
|
+
#
|
92
|
+
def str variable, *constraints
|
93
|
+
Expression.new(:str, variable, *constraints)
|
94
|
+
end
|
95
|
+
|
96
|
+
# Returns a match expression that will match `match` parslet atoms.
|
97
|
+
#
|
98
|
+
# @return [Parslet::Accelerator::Expression]
|
99
|
+
#
|
100
|
+
def re variable, *constraints
|
101
|
+
Expression.new(:re, variable, *constraints)
|
102
|
+
end
|
103
|
+
|
104
|
+
# Returns a match expression that will match `any` parslet atoms.
|
105
|
+
#
|
106
|
+
# @return [Parslet::Accelerator::Expression]
|
107
|
+
#
|
108
|
+
def any
|
109
|
+
Expression.new(:re, ".")
|
110
|
+
end
|
111
|
+
|
112
|
+
# Given a parslet atom and an expression, will determine if the expression
|
113
|
+
# matches the atom. If successful, returns the bindings into the pattern
|
114
|
+
# that were made. If no bindings had to be made to make the match successful,
|
115
|
+
# the empty hash is returned.
|
116
|
+
#
|
117
|
+
# @param atom [Parslet::Atoms::Base] parslet atom to match against
|
118
|
+
# @param expr [Parslet::Accelerator::Expression] expression to match
|
119
|
+
# @return [nil, Hash] bindings for the match, nil on failure
|
120
|
+
#
|
121
|
+
def match atom, expr
|
122
|
+
engine = Engine.new
|
123
|
+
|
124
|
+
return engine.bindings if engine.match(atom, expr)
|
125
|
+
end
|
126
|
+
|
127
|
+
# Constructs an accelerator rule. A rule is a matching expression and the
|
128
|
+
# code that should be executed once the expression could be bound to a
|
129
|
+
# parser.
|
130
|
+
#
|
131
|
+
# Example:
|
132
|
+
# Accelerator.rule(Accelerator.any) { Parslet.match('.') }
|
133
|
+
#
|
134
|
+
def rule expression, &action
|
135
|
+
[expression, action]
|
136
|
+
end
|
137
|
+
|
138
|
+
# Given a parslet atom and a set of rules, tries to match the rules
|
139
|
+
# recursively through the parslet atom. Once a rule could be matched,
|
140
|
+
# its action block will be called.
|
141
|
+
#
|
142
|
+
# Example:
|
143
|
+
# quote = str('"')
|
144
|
+
# parser = quote >> (quote.absent? >> any).repeat >> quote
|
145
|
+
#
|
146
|
+
# A = Accelerator # for making what follows a bit shorter
|
147
|
+
# optimized_parser = A.apply(parser,
|
148
|
+
# A.rule( (A.str(:x).absent? >> A.any).repeat ) { GobbleUp.new(x) })
|
149
|
+
#
|
150
|
+
# optimized_parser.parse('"Parsing is now fully optimized! (tm)"')
|
151
|
+
#
|
152
|
+
# @param atom [Parslet::Atoms::Base] a parser to optimize
|
153
|
+
# @param *rules [Parslet::Accelerator::Rule] rules produced by .rule
|
154
|
+
# @return [Parslet::Atoms::Base] optimized parser
|
155
|
+
#
|
156
|
+
def apply atom, *rules
|
157
|
+
Application.new(atom, rules).call
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
require 'parslet/accelerator/engine'
|
162
|
+
require 'parslet/accelerator/application'
|
@@ -0,0 +1,53 @@
|
|
1
|
+
|
2
|
+
# Alternative during matching. Contains a list of parslets that is tried each
|
3
|
+
# one in turn. Only fails if all alternatives fail.
|
4
|
+
#
|
5
|
+
# Example:
|
6
|
+
#
|
7
|
+
# str('a') | str('b') # matches either 'a' or 'b'
|
8
|
+
#
|
9
|
+
class Parslet::Atoms::Alternative < Parslet::Atoms::Base
|
10
|
+
attr_reader :alternatives
|
11
|
+
|
12
|
+
# Constructs an Alternative instance using all given parslets in the order
|
13
|
+
# given. This is what happens if you call '|' on existing parslets, like
|
14
|
+
# this:
|
15
|
+
#
|
16
|
+
# str('a') | str('b')
|
17
|
+
#
|
18
|
+
def initialize(*alternatives)
|
19
|
+
super()
|
20
|
+
|
21
|
+
@alternatives = alternatives
|
22
|
+
end
|
23
|
+
|
24
|
+
#---
|
25
|
+
# Don't construct a hanging tree of Alternative parslets, instead store them
|
26
|
+
# all here. This reduces the number of objects created.
|
27
|
+
#+++
|
28
|
+
def |(parslet)
|
29
|
+
self.class.new(*@alternatives + [parslet])
|
30
|
+
end
|
31
|
+
|
32
|
+
def error_msg
|
33
|
+
@error_msg ||= "Expected one of #{alternatives.inspect}"
|
34
|
+
end
|
35
|
+
|
36
|
+
def try(source, context, consume_all)
|
37
|
+
errors = alternatives.map { |a|
|
38
|
+
success, value = result = a.apply(source, context, consume_all)
|
39
|
+
return result if success
|
40
|
+
|
41
|
+
# Aggregate all errors
|
42
|
+
value
|
43
|
+
}
|
44
|
+
|
45
|
+
# If we reach this point, all alternatives have failed.
|
46
|
+
context.err(self, source, error_msg, errors)
|
47
|
+
end
|
48
|
+
|
49
|
+
precedence ALTERNATE
|
50
|
+
def to_s_inner(prec)
|
51
|
+
alternatives.map { |a| a.to_s(prec) }.join(' / ')
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,157 @@
|
|
1
|
+
# Base class for all parslets, handles orchestration of calls and implements
|
2
|
+
# a lot of the operator and chaining methods.
|
3
|
+
#
|
4
|
+
# Also see Parslet::Atoms::DSL chaining parslet atoms together.
|
5
|
+
#
|
6
|
+
class Parslet::Atoms::Base
|
7
|
+
include Parslet::Atoms::Precedence
|
8
|
+
include Parslet::Atoms::DSL
|
9
|
+
include Parslet::Atoms::CanFlatten
|
10
|
+
|
11
|
+
# Parslet label as provided in grammar
|
12
|
+
attr_accessor :label
|
13
|
+
|
14
|
+
# Given a string or an IO object, this will attempt a parse of its contents
|
15
|
+
# and return a result. If the parse fails, a Parslet::ParseFailed exception
|
16
|
+
# will be thrown.
|
17
|
+
#
|
18
|
+
# @param io [String, Source] input for the parse process
|
19
|
+
# @option options [Parslet::ErrorReporter] :reporter error reporter to use,
|
20
|
+
# defaults to Parslet::ErrorReporter::Tree
|
21
|
+
# @option options [Boolean] :prefix Should a prefix match be accepted?
|
22
|
+
# (default: false)
|
23
|
+
# @return [Hash, Array, Parslet::Slice] PORO (Plain old Ruby object) result
|
24
|
+
# tree
|
25
|
+
#
|
26
|
+
def parse(io, options={})
|
27
|
+
source = io.respond_to?(:line_and_column) ?
|
28
|
+
io :
|
29
|
+
Parslet::Source.new(io)
|
30
|
+
|
31
|
+
# Try to cheat. Assuming that we'll be able to parse the input, don't
|
32
|
+
# run error reporting code.
|
33
|
+
success, value = setup_and_apply(source, nil, !options[:prefix])
|
34
|
+
|
35
|
+
# If we didn't succeed the parse, raise an exception for the user.
|
36
|
+
# Stack trace will be off, but the error tree should explain the reason
|
37
|
+
# it failed.
|
38
|
+
unless success
|
39
|
+
# Cheating has not paid off. Now pay the cost: Rerun the parse,
|
40
|
+
# gathering error information in the process.
|
41
|
+
reporter = options[:reporter] || Parslet::ErrorReporter::Tree.new
|
42
|
+
source.bytepos = 0
|
43
|
+
success, value = setup_and_apply(source, reporter, !options[:prefix])
|
44
|
+
|
45
|
+
fail "Assertion failed: success was true when parsing with reporter" \
|
46
|
+
if success
|
47
|
+
|
48
|
+
# Value is a Parslet::Cause, which can be turned into an exception:
|
49
|
+
value.raise
|
50
|
+
|
51
|
+
fail "NEVER REACHED"
|
52
|
+
end
|
53
|
+
|
54
|
+
# assert: success is true
|
55
|
+
|
56
|
+
# Extra input is now handled inline with the rest of the parsing. If
|
57
|
+
# really we have success == true, prefix: false and still some input
|
58
|
+
# is left dangling, that is a BUG.
|
59
|
+
if !options[:prefix] && source.chars_left > 0
|
60
|
+
fail "BUG: New error strategy should not reach this point."
|
61
|
+
end
|
62
|
+
|
63
|
+
return flatten(value)
|
64
|
+
end
|
65
|
+
|
66
|
+
# Creates a context for parsing and applies the current atom to the input.
|
67
|
+
# Returns the parse result.
|
68
|
+
#
|
69
|
+
# @return [<Boolean, Object>] Result of the parse. If the first member is
|
70
|
+
# true, the parse has succeeded.
|
71
|
+
def setup_and_apply(source, error_reporter, consume_all)
|
72
|
+
context = Parslet::Atoms::Context.new(error_reporter)
|
73
|
+
apply(source, context, consume_all)
|
74
|
+
end
|
75
|
+
|
76
|
+
# Calls the #try method of this parslet. Success consumes input, error will
|
77
|
+
# rewind the input.
|
78
|
+
#
|
79
|
+
# @param source [Parslet::Source] source to read input from
|
80
|
+
# @param context [Parslet::Atoms::Context] context to use for the parsing
|
81
|
+
# @param consume_all [Boolean] true if the current parse must consume
|
82
|
+
# all input by itself.
|
83
|
+
def apply(source, context, consume_all=false)
|
84
|
+
old_pos = source.bytepos
|
85
|
+
|
86
|
+
success, _ = result = context.try_with_cache(self, source, consume_all)
|
87
|
+
|
88
|
+
if success
|
89
|
+
# Notify context
|
90
|
+
context.succ(source)
|
91
|
+
# If a consume_all parse was made and doesn't result in the consumption
|
92
|
+
# of all the input, that is considered an error.
|
93
|
+
if consume_all && source.chars_left>0
|
94
|
+
# Read 10 characters ahead. Why ten? I don't know.
|
95
|
+
offending_pos = source.pos
|
96
|
+
offending_input = source.consume(10)
|
97
|
+
|
98
|
+
# Rewind input (as happens always in error case)
|
99
|
+
source.bytepos = old_pos
|
100
|
+
|
101
|
+
return context.err_at(
|
102
|
+
self,
|
103
|
+
source,
|
104
|
+
"Don't know what to do with #{offending_input.to_s.inspect}",
|
105
|
+
offending_pos
|
106
|
+
)
|
107
|
+
end
|
108
|
+
|
109
|
+
# Looks like the parse was successful after all. Don't rewind the input.
|
110
|
+
return result
|
111
|
+
end
|
112
|
+
|
113
|
+
# We only reach this point if the parse has failed. Rewind the input.
|
114
|
+
source.bytepos = old_pos
|
115
|
+
return result
|
116
|
+
end
|
117
|
+
|
118
|
+
# Override this in your Atoms::Base subclasses to implement parsing
|
119
|
+
# behaviour.
|
120
|
+
#
|
121
|
+
def try(source, context, consume_all)
|
122
|
+
raise NotImplementedError, \
|
123
|
+
"Atoms::Base doesn't have behaviour, please implement #try(source, context)."
|
124
|
+
end
|
125
|
+
|
126
|
+
# Returns true if this atom can be cached in the packrat cache. Most parslet
|
127
|
+
# atoms are cached, so this always returns true, unless overridden.
|
128
|
+
#
|
129
|
+
def cached?
|
130
|
+
true
|
131
|
+
end
|
132
|
+
|
133
|
+
# Debug printing - in Treetop syntax.
|
134
|
+
#
|
135
|
+
def self.precedence(prec)
|
136
|
+
define_method(:precedence) { prec }
|
137
|
+
end
|
138
|
+
precedence BASE
|
139
|
+
def to_s(outer_prec=OUTER)
|
140
|
+
str = label || to_s_inner(precedence)
|
141
|
+
if outer_prec < precedence
|
142
|
+
"(#{str})"
|
143
|
+
else
|
144
|
+
str
|
145
|
+
end
|
146
|
+
end
|
147
|
+
def inspect
|
148
|
+
to_s(OUTER)
|
149
|
+
end
|
150
|
+
private
|
151
|
+
|
152
|
+
# Produces an instance of Success and returns it.
|
153
|
+
#
|
154
|
+
def succ(result)
|
155
|
+
[true, result]
|
156
|
+
end
|
157
|
+
end
|
@@ -0,0 +1,137 @@
|
|
1
|
+
|
2
|
+
module Parslet::Atoms
|
3
|
+
# A series of helper functions that have the common topic of flattening
|
4
|
+
# result values into the intermediary tree that consists of Ruby Hashes and
|
5
|
+
# Arrays.
|
6
|
+
#
|
7
|
+
# This module has one main function, #flatten, that takes an annotated
|
8
|
+
# structure as input and returns the reduced form that users expect from
|
9
|
+
# Atom#parse.
|
10
|
+
#
|
11
|
+
# NOTE: Since all of these functions are just that, functions without
|
12
|
+
# side effects, they are in a module and not in a class. Its hard to draw
|
13
|
+
# the line sometimes, but this is beyond.
|
14
|
+
#
|
15
|
+
module CanFlatten
|
16
|
+
# Takes a mixed value coming out of a parslet and converts it to a return
|
17
|
+
# value for the user by dropping things and merging hashes.
|
18
|
+
#
|
19
|
+
# Named is set to true if this result will be embedded in a Hash result from
|
20
|
+
# naming something using <code>.as(...)</code>. It changes the folding
|
21
|
+
# semantics of repetition.
|
22
|
+
#
|
23
|
+
def flatten(value, named=false)
|
24
|
+
# Passes through everything that isn't an array of things
|
25
|
+
return value unless value.instance_of? Array
|
26
|
+
|
27
|
+
# Extracts the s-expression tag
|
28
|
+
tag, *tail = value
|
29
|
+
|
30
|
+
# Merges arrays:
|
31
|
+
result = tail.
|
32
|
+
map { |e| flatten(e) } # first flatten each element
|
33
|
+
|
34
|
+
case tag
|
35
|
+
when :sequence
|
36
|
+
return flatten_sequence(result)
|
37
|
+
when :maybe
|
38
|
+
return named ? result.first : result.first || ''
|
39
|
+
when :repetition
|
40
|
+
return flatten_repetition(result, named)
|
41
|
+
end
|
42
|
+
|
43
|
+
fail "BUG: Unknown tag #{tag.inspect}."
|
44
|
+
end
|
45
|
+
|
46
|
+
# Lisp style fold left where the first element builds the basis for
|
47
|
+
# an inject.
|
48
|
+
#
|
49
|
+
def foldl(list, &block)
|
50
|
+
return '' if list.empty?
|
51
|
+
list[1..-1].inject(list.first, &block)
|
52
|
+
end
|
53
|
+
|
54
|
+
# Flatten results from a sequence of parslets.
|
55
|
+
#
|
56
|
+
# @api private
|
57
|
+
#
|
58
|
+
def flatten_sequence(list)
|
59
|
+
foldl(list.compact) { |r, e| # and then merge flat elements
|
60
|
+
merge_fold(r, e)
|
61
|
+
}
|
62
|
+
end
|
63
|
+
# @api private
|
64
|
+
def merge_fold(l, r)
|
65
|
+
# equal pairs: merge. ----------------------------------------------------
|
66
|
+
if l.class == r.class
|
67
|
+
if l.is_a?(Hash)
|
68
|
+
warn_about_duplicate_keys(l, r)
|
69
|
+
return l.merge(r)
|
70
|
+
else
|
71
|
+
return l + r
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
# unequal pairs: hoist to same level. ------------------------------------
|
76
|
+
|
77
|
+
# Maybe classes are not equal, but both are stringlike?
|
78
|
+
if l.respond_to?(:to_str) && r.respond_to?(:to_str)
|
79
|
+
# if we're merging a String with a Slice, the slice wins.
|
80
|
+
return r if r.respond_to? :to_slice
|
81
|
+
return l if l.respond_to? :to_slice
|
82
|
+
|
83
|
+
fail "NOTREACHED: What other stringlike classes are there?"
|
84
|
+
end
|
85
|
+
|
86
|
+
# special case: If one of them is a string/slice, the other is more important
|
87
|
+
return l if r.respond_to? :to_str
|
88
|
+
return r if l.respond_to? :to_str
|
89
|
+
|
90
|
+
# otherwise just create an array for one of them to live in
|
91
|
+
return l + [r] if r.class == Hash
|
92
|
+
return [l] + r if l.class == Hash
|
93
|
+
|
94
|
+
fail "Unhandled case when foldr'ing sequence."
|
95
|
+
end
|
96
|
+
|
97
|
+
# Flatten results from a repetition of a single parslet. named indicates
|
98
|
+
# whether the user has named the result or not. If the user has named
|
99
|
+
# the results, we want to leave an empty list alone - otherwise it is
|
100
|
+
# turned into an empty string.
|
101
|
+
#
|
102
|
+
# @api private
|
103
|
+
#
|
104
|
+
def flatten_repetition(list, named)
|
105
|
+
if list.any? { |e| e.instance_of?(Hash) }
|
106
|
+
# If keyed subtrees are in the array, we'll want to discard all
|
107
|
+
# strings inbetween. To keep them, name them.
|
108
|
+
return list.select { |e| e.instance_of?(Hash) }
|
109
|
+
end
|
110
|
+
|
111
|
+
if list.any? { |e| e.instance_of?(Array) }
|
112
|
+
# If any arrays are nested in this array, flatten all arrays to this
|
113
|
+
# level.
|
114
|
+
return list.
|
115
|
+
select { |e| e.instance_of?(Array) }.
|
116
|
+
flatten(1)
|
117
|
+
end
|
118
|
+
|
119
|
+
# Consistent handling of empty lists, when we act on a named result
|
120
|
+
return [] if named && list.empty?
|
121
|
+
|
122
|
+
# If there are only strings, concatenate them and return that.
|
123
|
+
foldl(list.compact) { |s,e| s+e }
|
124
|
+
end
|
125
|
+
|
126
|
+
# That annoying warning 'Duplicate subtrees while merging result' comes
|
127
|
+
# from here. You should add more '.as(...)' names to your intermediary tree.
|
128
|
+
#
|
129
|
+
def warn_about_duplicate_keys(h1, h2)
|
130
|
+
d = h1.keys & h2.keys
|
131
|
+
unless d.empty?
|
132
|
+
warn "Duplicate subtrees while merging result of \n #{self.inspect}\nonly the values"+
|
133
|
+
" of the latter will be kept. (keys: #{d.inspect})"
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
|
2
|
+
# Stores the result of matching an atom against input in the #captures in
|
3
|
+
# parse context. Doing so will allow you to pull parts of the ongoing parse
|
4
|
+
# out later and use them to match other pieces of input.
|
5
|
+
#
|
6
|
+
# Example:
|
7
|
+
# # After this, context.captures[:an_a] returns 'a'
|
8
|
+
# str('a').capture(:an_a)
|
9
|
+
#
|
10
|
+
# # Capture and use of the capture: (matches either 'aa' or 'bb')
|
11
|
+
# match['ab'].capture(:first) >>
|
12
|
+
# dynamic { |src, ctx| str(ctx.captures[:first]) }
|
13
|
+
#
|
14
|
+
class Parslet::Atoms::Capture < Parslet::Atoms::Base
|
15
|
+
attr_reader :parslet, :name
|
16
|
+
|
17
|
+
def initialize(parslet, name)
|
18
|
+
super()
|
19
|
+
|
20
|
+
@parslet, @name = parslet, name
|
21
|
+
end
|
22
|
+
|
23
|
+
def apply(source, context, consume_all)
|
24
|
+
success, value = result = parslet.apply(source, context, consume_all)
|
25
|
+
|
26
|
+
if success
|
27
|
+
context.captures[name.to_sym] =
|
28
|
+
flatten(value)
|
29
|
+
end
|
30
|
+
|
31
|
+
return result
|
32
|
+
end
|
33
|
+
|
34
|
+
def to_s_inner(prec)
|
35
|
+
"(#{name.inspect} = #{parslet.to_s(prec)})"
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
@@ -0,0 +1,103 @@
|
|
1
|
+
module Parslet::Atoms
|
2
|
+
# Helper class that implements a transient cache that maps position and
|
3
|
+
# parslet object to results. This is used for memoization in the packrat
|
4
|
+
# style.
|
5
|
+
#
|
6
|
+
# Also, error reporter is stored here and error reporting happens through
|
7
|
+
# this class. This makes the reporting pluggable.
|
8
|
+
#
|
9
|
+
class Context
|
10
|
+
# @param reporter [#err, #err_at] Error reporter (leave empty for default
|
11
|
+
# reporter)
|
12
|
+
def initialize(reporter=Parslet::ErrorReporter::Tree.new)
|
13
|
+
@cache = Hash.new { |h, k| h[k] = {} }
|
14
|
+
@reporter = reporter
|
15
|
+
@captures = Parslet::Scope.new
|
16
|
+
end
|
17
|
+
|
18
|
+
# Caches a parse answer for obj at source.pos. Applying the same parslet
|
19
|
+
# at one position of input always yields the same result, unless the input
|
20
|
+
# has changed.
|
21
|
+
#
|
22
|
+
# We need the entire source here so we can ask for how many characters
|
23
|
+
# were consumed by a successful parse. Imitation of such a parse must
|
24
|
+
# advance the input pos by the same amount of bytes.
|
25
|
+
#
|
26
|
+
def try_with_cache(obj, source, consume_all)
|
27
|
+
beg = source.bytepos
|
28
|
+
|
29
|
+
# Not in cache yet? Return early.
|
30
|
+
unless entry = lookup(obj, beg)
|
31
|
+
result = obj.try(source, self, consume_all)
|
32
|
+
|
33
|
+
if obj.cached?
|
34
|
+
set obj, beg, [result, source.bytepos-beg]
|
35
|
+
end
|
36
|
+
|
37
|
+
return result
|
38
|
+
end
|
39
|
+
|
40
|
+
# the condition in unless has returned true, so entry is not nil.
|
41
|
+
result, advance = entry
|
42
|
+
|
43
|
+
# The data we're skipping here has been read before. (since it is in
|
44
|
+
# the cache) PLUS the actual contents are not interesting anymore since
|
45
|
+
# we know obj matches at beg. So skip reading.
|
46
|
+
source.bytepos = beg + advance
|
47
|
+
return result
|
48
|
+
end
|
49
|
+
|
50
|
+
# Report an error at a given position.
|
51
|
+
# @see ErrorReporter
|
52
|
+
#
|
53
|
+
def err_at(*args)
|
54
|
+
return [false, @reporter.err_at(*args)] if @reporter
|
55
|
+
return [false, nil]
|
56
|
+
end
|
57
|
+
|
58
|
+
# Report an error.
|
59
|
+
# @see ErrorReporter
|
60
|
+
#
|
61
|
+
def err(*args)
|
62
|
+
return [false, @reporter.err(*args)] if @reporter
|
63
|
+
return [false, nil]
|
64
|
+
end
|
65
|
+
|
66
|
+
# Report a successful parse.
|
67
|
+
# @see ErrorReporter::Contextual
|
68
|
+
#
|
69
|
+
def succ(*args)
|
70
|
+
return [true, @reporter.succ(*args)] if @reporter
|
71
|
+
return [true, nil]
|
72
|
+
end
|
73
|
+
|
74
|
+
# Returns the current captures made on the input (see
|
75
|
+
# Parslet::Atoms::Base#capture). Use as follows:
|
76
|
+
#
|
77
|
+
# context.captures[:foobar] # => returns capture :foobar
|
78
|
+
#
|
79
|
+
attr_reader :captures
|
80
|
+
|
81
|
+
# Starts a new scope. Use the #scope method of Parslet::Atoms::DSL
|
82
|
+
# to call this.
|
83
|
+
#
|
84
|
+
def scope
|
85
|
+
captures.push
|
86
|
+
yield
|
87
|
+
ensure
|
88
|
+
captures.pop
|
89
|
+
end
|
90
|
+
|
91
|
+
private
|
92
|
+
# NOTE These methods use #object_id directly, since that seems to bring the
|
93
|
+
# most performance benefit. This is a hot spot; going through
|
94
|
+
# Atoms::Base#hash doesn't yield as much.
|
95
|
+
#
|
96
|
+
def lookup(obj, pos)
|
97
|
+
@cache[pos][obj.object_id]
|
98
|
+
end
|
99
|
+
def set(obj, pos, val)
|
100
|
+
@cache[pos][obj.object_id] = val
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|