wikipeg 4.0.2 → 6.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/HISTORY.md +556 -0
  2. package/README.md +230 -12
  3. package/VERSION +1 -1
  4. package/bin/wikipeg +8 -4
  5. package/examples/css.pegphp +9 -8
  6. package/lib/compiler/asts.js +30 -10
  7. package/lib/compiler/charsets.js +306 -0
  8. package/lib/compiler/language/javascript.js +107 -33
  9. package/lib/compiler/language/php.js +193 -55
  10. package/lib/compiler/passes/analyze-always-match.js +141 -0
  11. package/lib/compiler/passes/analyze-first.js +245 -0
  12. package/lib/compiler/passes/ast-to-code.js +316 -100
  13. package/lib/compiler/passes/inline-simple-rules.js +96 -0
  14. package/lib/compiler/passes/optimize-character-class.js +147 -0
  15. package/lib/compiler/passes/optimize-failure-reporting.js +65 -0
  16. package/lib/compiler/passes/remove-proxy-rules.js +7 -5
  17. package/lib/compiler/passes/report-infinite-loops.js +4 -1
  18. package/lib/compiler/passes/report-left-recursion.js +3 -4
  19. package/lib/compiler/passes/report-unknown-attributes.js +39 -0
  20. package/lib/compiler/passes/transform-common-lang.js +1 -1
  21. package/lib/compiler/traverser.js +1 -2
  22. package/lib/compiler/visitor.js +5 -7
  23. package/lib/compiler.js +24 -10
  24. package/lib/parser.js +2784 -3088
  25. package/lib/peg.js +7 -15
  26. package/lib/runtime/template.js +9 -1
  27. package/lib/utils/CaseFolding.txt +1654 -0
  28. package/lib/utils/arrays.js +0 -72
  29. package/lib/utils/casefold.js +697 -0
  30. package/lib/utils/objects.js +9 -39
  31. package/lib/utils/unicode.js +34 -0
  32. package/package.json +6 -4
  33. package/src/DefaultTracer.php +18 -18
  34. package/src/PEGParserBase.php +53 -28
  35. package/src/SyntaxError.php +4 -4
  36. package/src/Tracer.php +1 -1
  37. package/lib/compiler/opcodes.js +0 -54
@@ -2,53 +2,23 @@
2
2
 
3
3
  /* Object utilities. */
4
4
  var objects = {
5
- keys: function(object) {
6
- var result = [], key;
7
-
8
- for (key in object) {
9
- if (object.hasOwnProperty(key)) {
10
- result.push(key);
11
- }
12
- }
13
-
14
- return result;
15
- },
16
-
17
- values: function(object) {
18
- var result = [], key;
19
-
20
- for (key in object) {
21
- if (object.hasOwnProperty(key)) {
22
- result.push(object[key]);
23
- }
24
- }
25
-
26
- return result;
27
- },
28
-
29
5
  clone: function(object) {
30
- var result = {}, key;
6
+ var result = Object.create(null);
31
7
 
32
- for (key in object) {
33
- if (object.hasOwnProperty(key)) {
34
- result[key] = object[key];
35
- }
36
- }
8
+ Object.getOwnPropertyNames(object).forEach((key)=>{
9
+ result[key] = object[key];
10
+ });
37
11
 
38
12
  return result;
39
13
  },
40
14
 
41
15
  defaults: function(object, defaults) {
42
- var key;
43
-
44
- for (key in defaults) {
45
- if (defaults.hasOwnProperty(key)) {
46
- if (!(key in object)) {
47
- object[key] = defaults[key];
48
- }
16
+ Object.getOwnPropertyNames(defaults).forEach((key)=>{
17
+ if (!(key in object)) {
18
+ object[key] = defaults[key];
49
19
  }
50
- }
51
- }
20
+ });
21
+ },
52
22
  };
53
23
 
54
24
  module.exports = objects;
@@ -0,0 +1,34 @@
1
+ "use strict";
2
+
3
+ const fs = require("fs"),
4
+ casefold = require("./casefold");
5
+
6
+ // Raw case folding definitions from CaseFolding.txt
7
+ const caseFoldDefs = casefold.parse(
8
+ fs.readFileSync(__dirname + "/CaseFolding.txt", "utf-8")
9
+ );
10
+
11
+ // Maps from a character to its canonical "case folded" version
12
+ const simpleCaseFolding = [];
13
+
14
+ // Maps from a canonical "case folded" character to all characters
15
+ // which map to it.
16
+ const reverseSimpleCaseFolding = [];
17
+
18
+ // Compute simpleCaseFolding/reverseSimpleCaseFolding
19
+ for (const def of caseFoldDefs) {
20
+ if (def.status === 'C' || def.status === 'S') {
21
+ const mapped = def.mapped[0];
22
+ simpleCaseFolding[def.code] = mapped;
23
+ if (reverseSimpleCaseFolding[mapped] === undefined) {
24
+ reverseSimpleCaseFolding[mapped] = [mapped];
25
+ }
26
+ reverseSimpleCaseFolding[mapped].push(def.code);
27
+ }
28
+ }
29
+
30
+ module.exports = {
31
+ defs: caseFoldDefs,
32
+ simpleCaseFolding,
33
+ reverseSimpleCaseFolding,
34
+ };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "wikipeg",
3
- "version": "4.0.2",
3
+ "version": "6.0.0",
4
4
  "description": "Parser generator for JavaScript and PHP",
5
5
  "license": "MIT",
6
6
  "homepage": "https://gerrit.wikimedia.org/r/plugins/gitiles/wikipeg/",
@@ -16,7 +16,7 @@
16
16
  }
17
17
  ],
18
18
  "files": [
19
- "CHANGELOG.md",
19
+ "HISTORY.md",
20
20
  "LICENSE",
21
21
  "README.md",
22
22
  "VERSION",
@@ -25,6 +25,7 @@
25
25
  "examples/*.pegphp",
26
26
  "lib/**/*.js",
27
27
  "lib/**/*.php",
28
+ "lib/utils/CaseFolding.txt",
28
29
  "src/**/*.php",
29
30
  "tools/build-browser.js",
30
31
  "package.json"
@@ -33,14 +34,15 @@
33
34
  "bin": "bin/wikipeg",
34
35
  "scripts": {
35
36
  "eslint": "make eslint",
36
- "test": "make eslint && make test"
37
+ "test": "make eslint && make test",
38
+ "rebuild": "make parser"
37
39
  },
38
40
  "repository": {
39
41
  "type": "git",
40
42
  "url": "https://gerrit.wikimedia.org/r/wikipeg"
41
43
  },
42
44
  "devDependencies": {
43
- "eslint": "8.31.0",
45
+ "eslint": "8.57.0",
44
46
  "jasmine-node": "3.0.0"
45
47
  },
46
48
  "engines": {
@@ -5,31 +5,31 @@ namespace Wikimedia\WikiPEG;
5
5
  use InvalidArgumentException;
6
6
 
7
7
  class DefaultTracer implements Tracer {
8
- private $indentLevel = 0;
8
+ protected int $indentLevel = 0;
9
9
 
10
- public function trace( $event ) {
10
+ public function trace( array $event ): void {
11
11
  switch ( $event['type'] ) {
12
- case 'rule.enter':
13
- $this->log( $event );
14
- $this->indentLevel++;
15
- break;
12
+ case 'rule.enter':
13
+ $this->log( $event );
14
+ $this->indentLevel++;
15
+ break;
16
16
 
17
- case 'rule.match':
18
- $this->indentLevel--;
19
- $this->log( $event );
20
- break;
17
+ case 'rule.match':
18
+ $this->indentLevel--;
19
+ $this->log( $event );
20
+ break;
21
21
 
22
- case 'rule.fail':
23
- $this->indentLevel--;
24
- $this->log( $event );
25
- break;
22
+ case 'rule.fail':
23
+ $this->indentLevel--;
24
+ $this->log( $event );
25
+ break;
26
26
 
27
- default:
28
- throw new InvalidArgumentException( "Invalid event type {$event['type']}" );
27
+ default:
28
+ throw new InvalidArgumentException( "Invalid event type {$event['type']}" );
29
29
  }
30
30
  }
31
31
 
32
- private function log( $event ) {
32
+ protected function log( array $event ) {
33
33
  print str_pad(
34
34
  '' . $event['location'],
35
35
  20
@@ -40,7 +40,7 @@ class DefaultTracer implements Tracer {
40
40
  . "\n";
41
41
  }
42
42
 
43
- private function formatArgs( $argMap ) {
43
+ protected function formatArgs( ?array $argMap ): string {
44
44
  if ( !$argMap ) {
45
45
  return '';
46
46
  }
@@ -2,29 +2,31 @@
2
2
 
3
3
  namespace Wikimedia\WikiPEG;
4
4
 
5
+ use stdClass;
6
+
5
7
  abstract class PEGParserBase {
6
- protected static $FAILED;
7
- protected static $UNDEFINED;
8
- protected $currPos;
9
- protected $savedPos;
10
- protected $input;
11
- protected $inputLength;
12
- protected $options;
8
+ protected static ?stdClass $FAILED = null;
9
+ protected static ?stdClass $UNDEFINED = null;
10
+ protected int $currPos;
11
+ protected int $savedPos;
12
+ protected string $input;
13
+ protected int $inputLength;
14
+ protected array $options;
15
+ /** @var array */
13
16
  protected $cache;
14
17
 
15
18
  /** @var array<int,array{line:int,column:int,seenCR:bool}> */
16
- protected $posDetailsCache;
17
- protected $maxFailPos;
18
- protected $maxFailExpected;
19
+ protected array $posDetailsCache;
20
+ protected int $maxFailPos;
21
+ protected array $maxFailExpected;
19
22
 
20
23
  /** @var array Associative arrays of expectation info */
21
24
  protected $expectations;
22
25
 
23
26
  /** @var Expectation[] */
24
- private $expectationCache;
27
+ private array $expectationCache;
25
28
 
26
- /** @var Tracer */
27
- protected $tracer;
29
+ protected Tracer $tracer;
28
30
 
29
31
  public function __construct() {
30
32
  if ( !self::$FAILED ) {
@@ -35,7 +37,8 @@ abstract class PEGParserBase {
35
37
  }
36
38
  }
37
39
 
38
- protected function traceCall( $parseFunc, $name, $argNames, $args ) {
40
+ /** @return mixed */
41
+ protected function traceCall( callable $parseFunc, string $name, array $argNames, array $args ) {
39
42
  $argMap = [];
40
43
  foreach ( $args as $i => $argValue ) {
41
44
  $argMap[$argNames[$i]] = $argValue;
@@ -47,7 +50,7 @@ abstract class PEGParserBase {
47
50
  'location' => $this->computeLocation( $startPos, $startPos ),
48
51
  'args' => $argMap
49
52
  ] );
50
- $result = call_user_func_array( $parseFunc, $args );
53
+ $result = $parseFunc( ...$args );
51
54
  if ( $result !== self::$FAILED ) {
52
55
  $this->tracer->trace( [
53
56
  'type' => 'rule.match',
@@ -65,17 +68,18 @@ abstract class PEGParserBase {
65
68
  return $result;
66
69
  }
67
70
 
68
- protected function text() {
71
+ protected function text(): string {
69
72
  return substr( $this->input, $this->savedPos, $this->currPos - $this->savedPos );
70
73
  }
71
74
 
72
- protected function location() {
75
+ protected function location(): LocationRange {
73
76
  return $this->computeLocation( $this->savedPos, $this->currPos );
74
77
  }
75
78
 
76
79
  /**
77
80
  * @param string $description
78
81
  * @return never
82
+ * @throws SyntaxError
79
83
  */
80
84
  protected function expected( $description ) {
81
85
  throw $this->buildException(
@@ -89,6 +93,7 @@ abstract class PEGParserBase {
89
93
  /**
90
94
  * @param string $message
91
95
  * @return never
96
+ * @throws SyntaxError
92
97
  */
93
98
  protected function error( $message ) {
94
99
  throw $this->buildException(
@@ -99,7 +104,7 @@ abstract class PEGParserBase {
99
104
  );
100
105
  }
101
106
 
102
- public static function charAt( $s, $byteOffset ) {
107
+ public static function charAt( string $s, int $byteOffset ): string {
103
108
  if ( !isset( $s[$byteOffset] ) ) {
104
109
  return '';
105
110
  }
@@ -117,7 +122,7 @@ abstract class PEGParserBase {
117
122
  return $char;
118
123
  }
119
124
 
120
- public static function charsAt( $s, $byteOffset, $numChars ) {
125
+ public static function charsAt( string $s, int $byteOffset, int $numChars ): string {
121
126
  $ret = '';
122
127
  for ( $i = 0; $i < $numChars; $i++ ) {
123
128
  $ret .= self::consumeChar( $s, $byteOffset );
@@ -125,7 +130,7 @@ abstract class PEGParserBase {
125
130
  return $ret;
126
131
  }
127
132
 
128
- public static function consumeChar( $s, &$byteOffset ) {
133
+ public static function consumeChar( string $s, int &$byteOffset ): string {
129
134
  if ( !isset( $s[$byteOffset] ) ) {
130
135
  return '';
131
136
  }
@@ -143,6 +148,22 @@ abstract class PEGParserBase {
143
148
  return $char;
144
149
  }
145
150
 
151
+ public static function advanceChar( string $s, int &$byteOffset ): void {
152
+ if ( !isset( $s[$byteOffset] ) ) {
153
+ return;
154
+ }
155
+ $byteOffset += match ( ord( $s[$byteOffset] ) & 0xf0 ) {
156
+ default => 1,
157
+ 0xc0, 0xd0 => 2,
158
+ 0xe0 => 3,
159
+ 0xf0 => 4,
160
+ };
161
+ }
162
+
163
+ /**
164
+ * @param mixed $value
165
+ * @return mixed
166
+ */
146
167
  public static function &newRef( $value ) {
147
168
  return $value;
148
169
  }
@@ -165,7 +186,8 @@ abstract class PEGParserBase {
165
186
  while ( $p < $pos ) {
166
187
  $ch = self::charAt( $this->input, $p );
167
188
  if ( $ch === "\n" ) {
168
- if ( !$details['seenCR'] ) { $details['line']++;
189
+ if ( !$details['seenCR'] ) {
190
+ $details['line']++;
169
191
  }
170
192
  $details['column'] = 1;
171
193
  $details['seenCR'] = false;
@@ -185,7 +207,7 @@ abstract class PEGParserBase {
185
207
  return $details;
186
208
  }
187
209
 
188
- protected function computeLocation( $startPos, $endPos ) {
210
+ protected function computeLocation( int $startPos, int $endPos ): LocationRange {
189
211
  if ( $endPos > $this->inputLength ) {
190
212
  $endPos--;
191
213
  }
@@ -202,7 +224,7 @@ abstract class PEGParserBase {
202
224
  );
203
225
  }
204
226
 
205
- protected function fail( $expected ) {
227
+ protected function fail( int $expected ) {
206
228
  if ( $this->currPos < $this->maxFailPos ) {
207
229
  return;
208
230
  }
@@ -234,7 +256,7 @@ abstract class PEGParserBase {
234
256
  return $expanded;
235
257
  }
236
258
 
237
- private function buildMessage( $expected, $found ) {
259
+ private function buildMessage( array $expected, ?string $found ): string {
238
260
  $expectedDescs = [];
239
261
 
240
262
  foreach ( $expected as $info ) {
@@ -251,7 +273,9 @@ abstract class PEGParserBase {
251
273
  return "Expected " . $expectedDesc . " but " . $foundDesc . " found.";
252
274
  }
253
275
 
254
- protected function buildException( $message, $expected, $found, $location ) {
276
+ protected function buildException(
277
+ ?string $message, ?array $expected, ?string $found, LocationRange $location
278
+ ): SyntaxError {
255
279
  if ( $expected !== null ) {
256
280
  sort( $expected );
257
281
  $expected = array_unique( $expected );
@@ -271,7 +295,7 @@ abstract class PEGParserBase {
271
295
  );
272
296
  }
273
297
 
274
- protected function buildParseException() {
298
+ protected function buildParseException(): SyntaxError {
275
299
  $char = self::charAt( $this->input, $this->maxFailPos );
276
300
  return $this->buildException(
277
301
  null,
@@ -284,7 +308,7 @@ abstract class PEGParserBase {
284
308
  protected function initialize() {
285
309
  }
286
310
 
287
- protected function initInternal( $input, $options ) {
311
+ protected function initInternal( string $input, array $options ) {
288
312
  $this->currPos = 0;
289
313
  $this->savedPos = 0;
290
314
  $this->input = $input;
@@ -299,5 +323,6 @@ abstract class PEGParserBase {
299
323
  $this->initialize();
300
324
  }
301
325
 
302
- abstract public function parse( $input, $options = [] );
326
+ /** @return mixed */
327
+ abstract public function parse( string $input, array $options = [] );
303
328
  }
@@ -3,9 +3,9 @@
3
3
  namespace Wikimedia\WikiPEG;
4
4
 
5
5
  class SyntaxError extends \Exception implements \JsonSerializable {
6
- public $expected;
7
- public $found;
8
- public $location;
6
+ public array $expected;
7
+ public ?string $found;
8
+ public LocationRange $location;
9
9
 
10
10
  /**
11
11
  * @param string $message
@@ -13,7 +13,7 @@ class SyntaxError extends \Exception implements \JsonSerializable {
13
13
  * @param string|null $found
14
14
  * @param LocationRange $location
15
15
  */
16
- public function __construct( string $message, array $expected, $found, LocationRange $location ) {
16
+ public function __construct( string $message, array $expected, ?string $found, LocationRange $location ) {
17
17
  parent::__construct( $message );
18
18
  $this->expected = $expected;
19
19
  $this->found = $found;
package/src/Tracer.php CHANGED
@@ -3,5 +3,5 @@
3
3
  namespace Wikimedia\WikiPEG;
4
4
 
5
5
  interface Tracer {
6
- public function trace( $event );
6
+ public function trace( array $event ): void;
7
7
  }
@@ -1,54 +0,0 @@
1
- "use strict";
2
-
3
- /* Bytecode instruction opcodes. */
4
- var opcodes = {
5
- /* Stack Manipulation */
6
-
7
- PUSH: 0, // PUSH c
8
- PUSH_UNDEFINED: 26, // PUSH_UNDEFINED
9
- PUSH_NULL: 27, // PUSH_NULL
10
- PUSH_FAILED: 28, // PUSH_FAILED
11
- PUSH_EMPTY_ARRAY: 29, // PUSH_EMPTY_ARRAY
12
- PUSH_CURR_POS: 1, // PUSH_CURR_POS
13
- POP: 2, // POP
14
- POP_CURR_POS: 3, // POP_CURR_POS
15
- POP_N: 4, // POP_N n
16
- NIP: 5, // NIP
17
- APPEND: 6, // APPEND
18
- WRAP: 7, // WRAP n
19
- TEXT: 8, // TEXT
20
-
21
- /* Conditions and Loops */
22
-
23
- IF: 9, // IF t, f
24
- IF_ERROR: 10, // IF_ERROR t, f
25
- IF_NOT_ERROR: 11, // IF_NOT_ERROR t, f
26
- WHILE_NOT_ERROR: 12, // WHILE_NOT_ERROR b
27
-
28
- /* Matching */
29
-
30
- MATCH_ANY: 13, // MATCH_ANY a, f, ...
31
- MATCH_STRING: 14, // MATCH_STRING s, a, f, ...
32
- MATCH_STRING_IC: 15, // MATCH_STRING_IC s, a, f, ...
33
- MATCH_REGEXP: 16, // MATCH_REGEXP r, a, f, ...
34
- ACCEPT_N: 17, // ACCEPT_N n
35
- ACCEPT_STRING: 18, // ACCEPT_STRING s
36
- FAIL: 19, // FAIL e
37
-
38
- /* Calls */
39
-
40
- LOAD_SAVED_POS: 20, // LOAD_SAVED_POS p
41
- UPDATE_SAVED_POS: 21, // UPDATE_SAVED_POS
42
- CALL: 22, // CALL f, n, pc, p1, p2, ..., pN
43
-
44
- /* Rules */
45
-
46
- RULE: 23, // RULE r
47
-
48
- /* Failure Reporting */
49
-
50
- SILENT_FAILS_ON: 24, // SILENT_FAILS_ON
51
- SILENT_FAILS_OFF: 25 // SILENT_FAILS_FF
52
- };
53
-
54
- module.exports = opcodes;