wikipeg 4.0.2 → 6.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/HISTORY.md +556 -0
- package/README.md +230 -12
- package/VERSION +1 -1
- package/bin/wikipeg +8 -4
- package/examples/css.pegphp +9 -8
- package/lib/compiler/asts.js +30 -10
- package/lib/compiler/charsets.js +306 -0
- package/lib/compiler/language/javascript.js +107 -33
- package/lib/compiler/language/php.js +193 -55
- package/lib/compiler/passes/analyze-always-match.js +141 -0
- package/lib/compiler/passes/analyze-first.js +245 -0
- package/lib/compiler/passes/ast-to-code.js +316 -100
- package/lib/compiler/passes/inline-simple-rules.js +96 -0
- package/lib/compiler/passes/optimize-character-class.js +147 -0
- package/lib/compiler/passes/optimize-failure-reporting.js +65 -0
- package/lib/compiler/passes/remove-proxy-rules.js +7 -5
- package/lib/compiler/passes/report-infinite-loops.js +4 -1
- package/lib/compiler/passes/report-left-recursion.js +3 -4
- package/lib/compiler/passes/report-unknown-attributes.js +39 -0
- package/lib/compiler/passes/transform-common-lang.js +1 -1
- package/lib/compiler/traverser.js +1 -2
- package/lib/compiler/visitor.js +5 -7
- package/lib/compiler.js +24 -10
- package/lib/parser.js +2784 -3088
- package/lib/peg.js +7 -15
- package/lib/runtime/template.js +9 -1
- package/lib/utils/CaseFolding.txt +1654 -0
- package/lib/utils/arrays.js +0 -72
- package/lib/utils/casefold.js +697 -0
- package/lib/utils/objects.js +9 -39
- package/lib/utils/unicode.js +34 -0
- package/package.json +6 -4
- package/src/DefaultTracer.php +18 -18
- package/src/PEGParserBase.php +53 -28
- package/src/SyntaxError.php +4 -4
- package/src/Tracer.php +1 -1
- package/lib/compiler/opcodes.js +0 -54
package/README.md
CHANGED
|
@@ -57,7 +57,9 @@ want to use the parser in browser environment.
|
|
|
57
57
|
You can tweak the generated parser with several options:
|
|
58
58
|
|
|
59
59
|
* `--cache` — makes the parser cache results, avoiding exponential parsing
|
|
60
|
-
time in pathological cases but making the parser slower
|
|
60
|
+
time in pathological cases but making the parser slower. See the
|
|
61
|
+
`cache` option to `PEG.buildParse` and the [Caching](#caching)
|
|
62
|
+
section below.
|
|
61
63
|
* `--allowed-start-rules` — comma-separated list of rules the parser will be
|
|
62
64
|
allowed to start parsing from (default: the first rule in the grammar)
|
|
63
65
|
* `--plugin` — makes WikiPEG use a specified plugin (can be specified multiple
|
|
@@ -92,16 +94,49 @@ property with more details about the error.
|
|
|
92
94
|
You can tweak the generated parser by passing a second parameter with an options
|
|
93
95
|
object to `PEG.buildParser`. The following options are supported:
|
|
94
96
|
|
|
97
|
+
* `language` — if set to `"javascript"`, the method will generate parser
|
|
98
|
+
code in JavaScript; if set to `"php"`, it will generate parser code in PHP
|
|
99
|
+
(default: `"javascript"`)
|
|
95
100
|
* `cache` — if `true`, makes the parser cache results, avoiding exponential
|
|
96
101
|
parsing time in pathological cases but making the parser slower (default:
|
|
97
|
-
`false`)
|
|
102
|
+
`false`). See the [Caching](#caching) section below.
|
|
103
|
+
* `allowLoops` — if `true`, disables "infinite loop checking", which
|
|
104
|
+
looks for rules like `""*` which can match an infinite number of
|
|
105
|
+
times. Disabling this check can be helpful if it uncovers false
|
|
106
|
+
positives -- matches which can not be empty for reasons outside
|
|
107
|
+
its analysis.
|
|
108
|
+
* `allowUselessChoice` — if `true`, disables the check for rules
|
|
109
|
+
which "always match" as other than the last element in a choice.
|
|
110
|
+
* `caselessRestrict` — by default, WikiPEG uses the Unicode "Simple
|
|
111
|
+
Case Folding" algorithm to implement case-insensitive matching.
|
|
112
|
+
If `caselessRestrict` is true, the algorithm is modified to
|
|
113
|
+
prohibit case-insensitive matches between ASCII and non-ASCII
|
|
114
|
+
characters, in the same way that the PCRE CASELESS_RESTRICT
|
|
115
|
+
feature does.
|
|
116
|
+
* `commonLang` — if `true`, performs some simple modifications to
|
|
117
|
+
action clauses to make it possible to write test cases that work
|
|
118
|
+
in both javascript and PHP.
|
|
119
|
+
* `noAlwaysMatch` — if `true`, disables optimization of rules which
|
|
120
|
+
always match.
|
|
121
|
+
* `noInlining` — if `true`, disables inlining of simple character
|
|
122
|
+
classes and repeated character classes. This can be useful if you
|
|
123
|
+
are tracing execution or testing the parser and wish to see every
|
|
124
|
+
rule entry/exit, or need to explicitly manage caching. See
|
|
125
|
+
the [Caching](#caching) section below.
|
|
126
|
+
* `noOptimizeFirstSet` - if `true`, disables an optimization which
|
|
127
|
+
fails early if looking at the first character is sufficient to
|
|
128
|
+
determine that a rule can not match. This can affect failure
|
|
129
|
+
reporting, since we might be able to fail on a parent rule before
|
|
130
|
+
actually recursing into the child responsible.
|
|
131
|
+
* `cacheInitHook` and `cacheRuleHook` — functions to generate custom cache
|
|
132
|
+
control code
|
|
98
133
|
* `allowedStartRules` — rules the parser will be allowed to start parsing from
|
|
99
134
|
(default: the first rule in the grammar)
|
|
135
|
+
* `allowedStreamRules` — rules the parser will be allowed to start parsing from
|
|
136
|
+
in asynchronous mode
|
|
100
137
|
* `output` — if set to `"parser"`, the method will return generated parser
|
|
101
138
|
object; if set to `"source"`, it will return parser source code as a string
|
|
102
139
|
(default: `"parser"`)
|
|
103
|
-
* `optimize`— selects between optimizing the generated parser for parsing
|
|
104
|
-
speed (`"speed"`) or code size (`"size"`) (default: `"speed"`)
|
|
105
140
|
* `plugins` — plugins to use
|
|
106
141
|
|
|
107
142
|
Using the Parser
|
|
@@ -148,7 +183,7 @@ Let's look at example grammar that recognizes simple arithmetic expressions like
|
|
|
148
183
|
|
|
149
184
|
primary
|
|
150
185
|
= integer
|
|
151
|
-
/ "(" additive
|
|
186
|
+
/ "(" @additive ")"
|
|
152
187
|
|
|
153
188
|
integer "integer"
|
|
154
189
|
= digits:[0-9]+ { return parseInt(digits.join(""), 10); }
|
|
@@ -163,9 +198,12 @@ happens when the pattern matches successfully. A rule can also contain
|
|
|
163
198
|
`integer` rule has a human-readable name). The parsing starts at the first rule,
|
|
164
199
|
which is also called the *start rule*.
|
|
165
200
|
|
|
166
|
-
A rule name must be a JavaScript identifier. It is followed by an
|
|
167
|
-
(“=”) and a parsing expression. If the rule has
|
|
168
|
-
|
|
201
|
+
A rule name must be a JavaScript identifier. It is followed by an
|
|
202
|
+
equals sign (“=”) and a parsing expression. If the rule has additional
|
|
203
|
+
attributes, they are written between square brackets (“[” and “]”)
|
|
204
|
+
between the rule name and the equals sign; see the “Rule attribute
|
|
205
|
+
syntax” section below for more details.
|
|
206
|
+
|
|
169
207
|
Rules need to be separated only by whitespace (their beginning is easily
|
|
170
208
|
recognizable), but a semicolon (“;”) after the parsing expression is allowed.
|
|
171
209
|
|
|
@@ -197,7 +235,7 @@ using a simple initializer.
|
|
|
197
235
|
|
|
198
236
|
primary
|
|
199
237
|
= integer
|
|
200
|
-
/ "(" additive
|
|
238
|
+
/ "(" @additive ")"
|
|
201
239
|
|
|
202
240
|
integer "integer"
|
|
203
241
|
= digits:[0-9]+ { return makeInteger(digits); }
|
|
@@ -215,20 +253,27 @@ example:
|
|
|
215
253
|
containing matched part of the input.
|
|
216
254
|
* An expression matching repeated occurrence of some subexpression produces a
|
|
217
255
|
JavaScript array with all the matches.
|
|
256
|
+
* An expression matching a sequence of expressions produces a
|
|
257
|
+
JavaScript array with all the picked elements.
|
|
258
|
+
* If no matches are picked, all elements of the sequence will be
|
|
259
|
+
present in the array.
|
|
260
|
+
* If the pick operator (`@`) is used, only those elements which
|
|
261
|
+
are picked will be present. If only one element is picked, it
|
|
262
|
+
will be returned directly (not wrapped in a 1-element array).
|
|
218
263
|
|
|
219
264
|
The match results propagate through the rules when the rule names are used in
|
|
220
265
|
expressions, up to the start rule. The generated parser returns start rule's
|
|
221
266
|
match result when parsing is successful.
|
|
222
267
|
|
|
223
268
|
One special case of parser expression is a *parser action* — a piece of
|
|
224
|
-
JavaScript code inside curly braces (
|
|
225
|
-
some of the
|
|
269
|
+
JavaScript code inside curly braces (`{` and `}`) that takes match results of
|
|
270
|
+
some of the preceding expressions and returns a JavaScript value. This value
|
|
226
271
|
is considered match result of the preceding expression (in other words, the
|
|
227
272
|
parser action is a match result transformer).
|
|
228
273
|
|
|
229
274
|
In our arithmetics example, there are many parser actions. Consider the action
|
|
230
275
|
in expression `digits:[0-9]+ { return parseInt(digits.join(""), 10); }`. It
|
|
231
|
-
takes the match result of the expression [0-9]
|
|
276
|
+
takes the match result of the expression `[0-9]+`, which is an array of strings
|
|
232
277
|
containing digits, as its parameter. It joins the digits together to form a
|
|
233
278
|
number and converts it to a JavaScript `number` object.
|
|
234
279
|
|
|
@@ -366,6 +411,21 @@ can be accessed by action's JavaScript code.
|
|
|
366
411
|
#### *expression<sub>1</sub>* *expression<sub>2</sub>* ... *expression<sub>n</sub>*
|
|
367
412
|
|
|
368
413
|
Match a sequence of expressions and return their match results in an array.
|
|
414
|
+
Elements of the sequence can be picked by preceding them with the pick
|
|
415
|
+
operator (`@`), and only those elements will be returned in the array.
|
|
416
|
+
If only one element is picked, it is returned directly (not wrapped in
|
|
417
|
+
an array).
|
|
418
|
+
|
|
419
|
+
#### @ *expression*
|
|
420
|
+
|
|
421
|
+
Pick the specified expression in a sequence to return. See the
|
|
422
|
+
description of a sequence expression above.
|
|
423
|
+
|
|
424
|
+
Note that sequences with pick operators can be nested, for example:
|
|
425
|
+
|
|
426
|
+
foo = @"a" @("b" @"c" "d") "e"
|
|
427
|
+
|
|
428
|
+
will return `["a", "c"]` if it matches.
|
|
369
429
|
|
|
370
430
|
#### *expression* { *action* }
|
|
371
431
|
|
|
@@ -419,6 +479,88 @@ Try to match the first expression, if it does not succeed, try the second one,
|
|
|
419
479
|
etc. Return the match result of the first successfully matched expression. If no
|
|
420
480
|
expression matches, consider the match failed.
|
|
421
481
|
|
|
482
|
+
Rule attribute syntax
|
|
483
|
+
---------------------
|
|
484
|
+
WikiPEG supports attaching attributes to rules which can affect their
|
|
485
|
+
behavior. The syntax is:
|
|
486
|
+
|
|
487
|
+
rule1 [attr1, attr2=false, attr3="string", ...] = nonterminal1 ... ;
|
|
488
|
+
|
|
489
|
+
That is, attributes are comma-separated between square brackets
|
|
490
|
+
between the rule name and the equals sign. Attributes can have
|
|
491
|
+
boolean, string, or integer values. An attribute without a value
|
|
492
|
+
is treated as shorthand for setting it to boolean `true`.
|
|
493
|
+
|
|
494
|
+
The following attributes affect parsing:
|
|
495
|
+
|
|
496
|
+
#### [name="*rule name*"]
|
|
497
|
+
|
|
498
|
+
Provide a human-readable *rule name* for this rule. For example, this
|
|
499
|
+
production:
|
|
500
|
+
|
|
501
|
+
integer [name="simple number"] = [0-9]+
|
|
502
|
+
|
|
503
|
+
will produce an error message like:
|
|
504
|
+
|
|
505
|
+
Expected simple number but "a" found.
|
|
506
|
+
|
|
507
|
+
when parsing a non-number, referencing the human-readable name "simple
|
|
508
|
+
number". Without the human-readable name, WikiPEG uses a description
|
|
509
|
+
of the character class that failed to match:
|
|
510
|
+
|
|
511
|
+
Expected [0-9] but "a" found.
|
|
512
|
+
|
|
513
|
+
Aside from the content of error messages, providing a `name` attribute
|
|
514
|
+
also affects *where* errors are reported, preferring to report failure
|
|
515
|
+
at the named rule instead of inside it.
|
|
516
|
+
|
|
517
|
+
#### [inline] *or* [inline=true]
|
|
518
|
+
|
|
519
|
+
Forces inlining of the given rule, regardless of the status of the
|
|
520
|
+
`noInlining` option.
|
|
521
|
+
|
|
522
|
+
#### [inline=false]
|
|
523
|
+
|
|
524
|
+
Prevents inlining of the given rule.
|
|
525
|
+
|
|
526
|
+
#### [cache] *or* [cache=true]
|
|
527
|
+
|
|
528
|
+
Turns on caching for the given rule, regardless of the status of the
|
|
529
|
+
top-level `cache` option. This can be useful for enabling caching
|
|
530
|
+
only on a few rules while leaving it mostly disabled.
|
|
531
|
+
|
|
532
|
+
If caching is disabled in the top-level WikiPEG options but any rule
|
|
533
|
+
has this attribute set to `true`, then caching will be enabled but all
|
|
534
|
+
rules will default to `[cache=false]`.
|
|
535
|
+
|
|
536
|
+
If caching is enabled in the WikiPEG options, then `[cache]` is
|
|
537
|
+
effectively a no-op, since the default is to cache all rules.
|
|
538
|
+
|
|
539
|
+
#### [cache=false]
|
|
540
|
+
|
|
541
|
+
Turns off caching for the given rule, regardless of the status of the
|
|
542
|
+
top-level `cache` option. This can be useful for selectively disabling
|
|
543
|
+
caching on a few rules while leaving it mostly enabled.
|
|
544
|
+
|
|
545
|
+
If caching is disabled in the top-level WikiPEG options, this is
|
|
546
|
+
effectively a no-op.
|
|
547
|
+
|
|
548
|
+
If caching is enabled in the top-level WikiPEG options, this will
|
|
549
|
+
prevent the given rule from being cached.
|
|
550
|
+
|
|
551
|
+
#### [empty=false]
|
|
552
|
+
|
|
553
|
+
Marks a node as non-nullable; that is, asserts that it cannot match
|
|
554
|
+
the empty string -- usually because of some predicate expression in
|
|
555
|
+
the rule which is beyond WikiPEG's ability to analyze. This can
|
|
556
|
+
prevent false positives when WikiPEG checks for infinite loops.
|
|
557
|
+
|
|
558
|
+
#### [unreachable]
|
|
559
|
+
|
|
560
|
+
Marks a rule as unreachable. If the `allowUselessChoice` option is
|
|
561
|
+
false, this attribute permits a reference to the rule in a choice even
|
|
562
|
+
if a previous option in the choice appears to always match.
|
|
563
|
+
|
|
422
564
|
Rule parameter syntax
|
|
423
565
|
---------------------
|
|
424
566
|
|
|
@@ -493,6 +635,82 @@ In JS this will expose the reference parameter "r" as an object with r.set(),
|
|
|
493
635
|
r.get(). In PHP it will be a native reference such that {$r = 1;} will set
|
|
494
636
|
the value of the reference in the declaration scope.
|
|
495
637
|
|
|
638
|
+
Caching
|
|
639
|
+
-------
|
|
640
|
+
Note that caching makes PEG grammars behave somewhat differently from
|
|
641
|
+
recursive descent parsers. Consider the grammar:
|
|
642
|
+
|
|
643
|
+
start = "a" long_complicated_thing b
|
|
644
|
+
/ "a" long_complicated_thing c
|
|
645
|
+
/ "a" long_complicated_thing
|
|
646
|
+
|
|
647
|
+
// this could be any costly rule, but this is the simplest example
|
|
648
|
+
// which will take time proportional to the file length
|
|
649
|
+
long_complicated_thing = $[^]*
|
|
650
|
+
b = "b"
|
|
651
|
+
c = "c"
|
|
652
|
+
|
|
653
|
+
Without caching, the generated parser will match `"a"`, then scan the
|
|
654
|
+
entire length of the string matching `long_complicated_thing`, then
|
|
655
|
+
match the end-of-file to `"b"` and fail, return to the start of the
|
|
656
|
+
string and do it again (scanning the entire length of the string),
|
|
657
|
+
fail to match `"c"` and so on.
|
|
658
|
+
|
|
659
|
+
When caching is enabled, the second time we try to match
|
|
660
|
+
`long_complicated_thing` at position 2 in the string it will recognize
|
|
661
|
+
that it has tried exactly this parse before and return the previous
|
|
662
|
+
match from the cache. This takes constant time instead of time
|
|
663
|
+
proportional to the input string length. This can be quite
|
|
664
|
+
significant in a grammar that involves a lot of backtracking.
|
|
665
|
+
|
|
666
|
+
There are some caveats, however!
|
|
667
|
+
|
|
668
|
+
First, caching is relatively expensive, so it is only done at rule
|
|
669
|
+
boundaries, like `long_complicated_thing`, `b`, and `c` above. This
|
|
670
|
+
is a departure from a "theoretical" packrat parser.
|
|
671
|
+
|
|
672
|
+
Second, the memoization cache stores an entry for every nonterminal at
|
|
673
|
+
every position is it attempted *whether the result is success or
|
|
674
|
+
failure*. In our example we allocate memory for cache entries for "b"
|
|
675
|
+
and "c" even though they do not match. Writing rules which match
|
|
676
|
+
single characters can easily result in excessive memory use if care is
|
|
677
|
+
not taken.
|
|
678
|
+
|
|
679
|
+
Consider two alterations to our example above. First, consider inlining the
|
|
680
|
+
`long_complicated_thing` rule like so:
|
|
681
|
+
|
|
682
|
+
start = "a" $[^]* "b"
|
|
683
|
+
/ "a" $[^]* "c"
|
|
684
|
+
/ "a" $[^]*
|
|
685
|
+
|
|
686
|
+
The grammar would then match exactly the same strings as before, but
|
|
687
|
+
we would do no caching and each of the choice branches would scan to
|
|
688
|
+
the end of the string.
|
|
689
|
+
|
|
690
|
+
Alternatively, if we just moved the zero-or-more repetition operator
|
|
691
|
+
like so:
|
|
692
|
+
|
|
693
|
+
start = "a" $long_complicated_thing* b
|
|
694
|
+
/ "a" $long_complicated_thing* c
|
|
695
|
+
/ "a" $long_complicated_thing*
|
|
696
|
+
|
|
697
|
+
long_complicated_thing = [^]
|
|
698
|
+
b = "b"
|
|
699
|
+
c = "c"
|
|
700
|
+
|
|
701
|
+
Now not only have we broken caching (each choice will scan to the
|
|
702
|
+
end of the input string, matching long_complicated_thing as it goes)
|
|
703
|
+
we're also going to allocate a cache entry for every character in the
|
|
704
|
+
input string. This can cause ballooning memory requirements for what
|
|
705
|
+
look like simple inputs.
|
|
706
|
+
|
|
707
|
+
By default wikipeg inlines "simple expressions", which are rules that
|
|
708
|
+
match simple literals, character classes, or repeated character
|
|
709
|
+
classes, possibly prefixed with the `$` operator. This is primarily
|
|
710
|
+
done to manage the memory cost of excessive caching of simple matches.
|
|
711
|
+
For more predictable caching, you may wish to use the `noInlining`
|
|
712
|
+
option.
|
|
713
|
+
|
|
496
714
|
Requirements
|
|
497
715
|
-------------
|
|
498
716
|
|
package/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
|
|
1
|
+
6.0.0
|
package/bin/wikipeg
CHANGED
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
|
|
3
3
|
"use strict";
|
|
4
4
|
|
|
5
|
-
var util = require("util");
|
|
6
5
|
var fs = require("fs");
|
|
7
6
|
var path = require("path");
|
|
8
7
|
var PEG = require("../lib/peg");
|
|
@@ -10,11 +9,11 @@ var PEG = require("../lib/peg");
|
|
|
10
9
|
/* Helpers */
|
|
11
10
|
|
|
12
11
|
function printVersion() {
|
|
13
|
-
|
|
12
|
+
console.log("WikiPEG " + PEG.VERSION);
|
|
14
13
|
}
|
|
15
14
|
|
|
16
15
|
function printHelp() {
|
|
17
|
-
|
|
16
|
+
console.log(`Usage: wikipeg [options] [--] [<input_file>] [<output_file>]
|
|
18
17
|
|
|
19
18
|
Generates a parser from the PEG grammar specified in the <input_file> and writes
|
|
20
19
|
it to the <output_file>.
|
|
@@ -66,7 +65,7 @@ function exitFailure() {
|
|
|
66
65
|
}
|
|
67
66
|
|
|
68
67
|
function abort(message) {
|
|
69
|
-
|
|
68
|
+
console.error(message);
|
|
70
69
|
exitFailure();
|
|
71
70
|
}
|
|
72
71
|
|
|
@@ -159,6 +158,11 @@ while (args.length > 0 && isOption(args[0])) {
|
|
|
159
158
|
options.cache = true;
|
|
160
159
|
break;
|
|
161
160
|
|
|
161
|
+
case "--precise-failure":
|
|
162
|
+
options.noInlining = true;
|
|
163
|
+
options.noOptimizeFirstSet = true;
|
|
164
|
+
break;
|
|
165
|
+
|
|
162
166
|
case '--allow-loops':
|
|
163
167
|
options.allowLoops = true;
|
|
164
168
|
break;
|
package/examples/css.pegphp
CHANGED
|
@@ -288,13 +288,16 @@ nmchar
|
|
|
288
288
|
/ nonascii
|
|
289
289
|
/ escape
|
|
290
290
|
|
|
291
|
+
nmchars
|
|
292
|
+
= $[_a-z0-9-]i+ / $[\x80-\uFFFF]+ / escape
|
|
293
|
+
|
|
291
294
|
string1
|
|
292
|
-
= '"' chars:([^\n\r\f\\"] / "\\" nl:nl { return ""; } / escape)* '"' {
|
|
295
|
+
= '"' chars:($[^\n\r\f\\"]+ / "\\" nl:nl { return ""; } / escape)* '"' {
|
|
293
296
|
return implode("", $chars);
|
|
294
297
|
}
|
|
295
298
|
|
|
296
299
|
string2
|
|
297
|
-
= "'" chars:([^\n\r\f\\'] / "\\" nl:nl { return ""; } / escape)* "'" {
|
|
300
|
+
= "'" chars:($[^\n\r\f\\']+ / "\\" nl:nl { return ""; } / escape)* "'" {
|
|
298
301
|
return implode("", $chars);
|
|
299
302
|
}
|
|
300
303
|
|
|
@@ -302,12 +305,10 @@ comment
|
|
|
302
305
|
= "/*" [^*]* "*"+ ([^/*] [^*]* "*"+)* "/"
|
|
303
306
|
|
|
304
307
|
ident
|
|
305
|
-
=
|
|
306
|
-
return $prefix . $start . implode("", $chars);
|
|
307
|
-
}
|
|
308
|
+
= $( "-"? nmstart nmchars* )
|
|
308
309
|
|
|
309
310
|
name
|
|
310
|
-
=
|
|
311
|
+
= $( nmchars+ )
|
|
311
312
|
|
|
312
313
|
num
|
|
313
314
|
= [+-]? ([0-9]+ / [0-9]* "." [0-9]+) ("e" [+-]? [0-9]+)? {
|
|
@@ -319,13 +320,13 @@ string
|
|
|
319
320
|
/ string2
|
|
320
321
|
|
|
321
322
|
url
|
|
322
|
-
= chars:([!#$%&*-\[\]-~] / nonascii / escape)* { return implode("", $chars); }
|
|
323
|
+
= chars:($[!#$%&*-\[\]-~]+ / nonascii / escape)* { return implode("", $chars); }
|
|
323
324
|
|
|
324
325
|
plain_ws
|
|
325
326
|
= [ \t\r\n\f]+
|
|
326
327
|
|
|
327
328
|
w
|
|
328
|
-
=
|
|
329
|
+
= [ \t\r\n\f]*
|
|
329
330
|
|
|
330
331
|
nl
|
|
331
332
|
= "\n"
|
package/lib/compiler/asts.js
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
|
|
3
|
-
var
|
|
4
|
-
visitor = require("./visitor");
|
|
3
|
+
var visitor = require("./visitor");
|
|
5
4
|
|
|
6
5
|
/* AST utilities. */
|
|
7
6
|
var asts = {
|
|
@@ -16,10 +15,19 @@ var asts = {
|
|
|
16
15
|
},
|
|
17
16
|
|
|
18
17
|
indexOfRule: function(ast, name) {
|
|
19
|
-
return
|
|
18
|
+
return ast.rules.findIndex((r) => r.name === name);
|
|
20
19
|
},
|
|
21
20
|
|
|
22
|
-
|
|
21
|
+
findRuleAttribute: function(rule, name) {
|
|
22
|
+
return (rule.attributes || []).find((attr) => attr.name === name);
|
|
23
|
+
},
|
|
24
|
+
|
|
25
|
+
getRuleAttributeValue: function(rule, name, defaultValue) {
|
|
26
|
+
let attr = asts.findRuleAttribute(rule, name);
|
|
27
|
+
return attr === undefined ? defaultValue : attr.value;
|
|
28
|
+
},
|
|
29
|
+
|
|
30
|
+
matchesEmpty: function(ast, node, wrapper) {
|
|
23
31
|
function matchesTrue() { return true; }
|
|
24
32
|
function matchesFalse() { return false; }
|
|
25
33
|
|
|
@@ -27,17 +35,25 @@ var asts = {
|
|
|
27
35
|
return matches(node.expression);
|
|
28
36
|
}
|
|
29
37
|
|
|
30
|
-
|
|
31
|
-
|
|
38
|
+
wrapper = wrapper || ( (f) => f );
|
|
39
|
+
var matches = wrapper(visitor.build({
|
|
40
|
+
rule: function(rule) {
|
|
41
|
+
// Allow explicit override
|
|
42
|
+
let empty = asts.getRuleAttributeValue(rule, 'empty');
|
|
43
|
+
if (empty === undefined) {
|
|
44
|
+
empty = matches(rule.expression);
|
|
45
|
+
}
|
|
46
|
+
return empty;
|
|
47
|
+
},
|
|
32
48
|
|
|
33
49
|
choice: function(node) {
|
|
34
|
-
return
|
|
50
|
+
return node.alternatives.some(matches);
|
|
35
51
|
},
|
|
36
52
|
|
|
37
53
|
action: matchesExpression,
|
|
38
54
|
|
|
39
55
|
sequence: function(node) {
|
|
40
|
-
return
|
|
56
|
+
return node.elements.every(matches);
|
|
41
57
|
},
|
|
42
58
|
|
|
43
59
|
labeled: matchesExpression,
|
|
@@ -50,6 +66,10 @@ var asts = {
|
|
|
50
66
|
semantic_and: matchesTrue,
|
|
51
67
|
semantic_not: matchesTrue,
|
|
52
68
|
|
|
69
|
+
parameter_and: matchesTrue,
|
|
70
|
+
parameter_not: matchesTrue,
|
|
71
|
+
labeled_param: matchesTrue,
|
|
72
|
+
|
|
53
73
|
rule_ref: function(node) {
|
|
54
74
|
return matches(asts.findRule(ast, node.name));
|
|
55
75
|
},
|
|
@@ -58,9 +78,9 @@ var asts = {
|
|
|
58
78
|
return node.value === "";
|
|
59
79
|
},
|
|
60
80
|
|
|
61
|
-
|
|
81
|
+
class: matchesFalse,
|
|
62
82
|
any: matchesFalse
|
|
63
|
-
});
|
|
83
|
+
}));
|
|
64
84
|
|
|
65
85
|
return matches(node);
|
|
66
86
|
}
|