wikipeg 2.0.5 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,282 +1,303 @@
1
1
  <?php
2
2
 
3
- namespace WikiPEG;
3
+ namespace Wikimedia\WikiPEG;
4
4
 
5
5
  abstract class PEGParserBase {
6
- protected static $FAILED;
7
- protected $currPos;
8
- protected $savedPos;
9
- protected $input;
10
- protected $inputLength;
11
- protected $options;
12
- protected $cache;
13
-
14
- protected $posDetailsCache;
15
- protected $maxFailPos;
16
- protected $maxFailExpected;
17
-
18
- /** @var array Associative arrays of expectation info */
19
- protected $expectations;
20
-
21
- /** @var Expectation[] */
22
- private $expectationCache;
23
-
24
- /** @var Tracer */
25
- protected $tracer;
26
-
27
- public function __construct() {
28
- if (!self::$FAILED) {
29
- self::$FAILED = new \stdClass;
30
- }
31
- }
32
-
33
- protected function traceCall($parseFunc, $name, $argNames, $args) {
34
- $argMap = [];
35
- foreach ($args as $i => $argValue) {
36
- $argMap[$argNames[$i]] = $argValue;
37
- }
38
- $startPos = $this->currPos;
39
- $this->tracer->trace([
40
- 'type' => 'rule.enter',
41
- 'rule' => $name,
42
- 'location' => $this->computeLocation($startPos, $startPos),
43
- 'args' => $argMap
44
- ]);
45
- $result = call_user_func_array($parseFunc, $args);
46
- if ($result !== self::$FAILED) {
47
- $this->tracer->trace([
48
- 'type' => 'rule.match',
49
- 'rule' => $name,
50
- 'location' => $this->computeLocation($startPos, $this->currPos),
51
- ]);
52
- } else {
53
- $this->tracer->trace([
54
- 'type' => 'rule.fail',
55
- 'rule' => $name,
56
- 'result' => $result,
57
- 'location' => $this->computeLocation($startPos, $startPos)
58
- ]);
59
- }
60
- return $result;
61
- }
62
-
63
- protected function text() {
64
- return substr($this->input, $this->savedPos, $this->currPos - $this->savedPos);
65
- }
66
-
67
- protected function location() {
68
- return $this->computeLocation($this->savedPos, $this->currPos);
69
- }
70
-
71
- protected function expected($description) {
72
- throw $this->buildException(
73
- null,
74
- [['type' => "other", 'description' => $description]],
75
- $this->text(),
76
- $this->computeLocation($this->savedPos, $this->currPos)
77
- );
78
- }
79
-
80
- protected function error($message) {
81
- throw $this->buildException(
82
- $message,
83
- null,
84
- $this->text(),
85
- $this->computeLocation($this->savedPos, $this->currPos)
86
- );
87
- }
88
-
89
- public static function charAt($s, $byteOffset) {
90
- if (!isset($s[$byteOffset])) {
91
- return '';
92
- }
93
- $char = $s[$byteOffset];
94
- $byte1 = ord($char);
95
- if (($byte1 & 0xc0) === 0xc0) {
96
- $char .= $s[$byteOffset + 1];
97
- }
98
- if (($byte1 & 0xe0) === 0xe0) {
99
- $char .= $s[$byteOffset + 2];
100
- }
101
- if (($byte1 & 0xf0) === 0xf0) {
102
- $char .= $s[$byteOffset + 3];
103
- }
104
- return $char;
105
- }
106
-
107
- public static function charsAt($s, $byteOffset, $numChars) {
108
- $ret = '';
109
- for ($i = 0; $i < $numChars; $i++) {
110
- $ret .= self::consumeChar($s, $byteOffset);
111
- }
112
- return $ret;
113
- }
114
-
115
- public static function consumeChar($s, &$byteOffset) {
116
- if (!isset($s[$byteOffset])) {
117
- return '';
118
- }
119
- $char = $s[$byteOffset++];
120
- $byte1 = ord($char);
121
- if (($byte1 & 0xc0) === 0xc0) {
122
- $char .= $s[$byteOffset++];
123
- }
124
- if (($byte1 & 0xe0) === 0xe0) {
125
- $char .= $s[$byteOffset++];
126
- }
127
- if (($byte1 & 0xf0) === 0xf0) {
128
- $char .= $s[$byteOffset++];
129
- }
130
- return $char;
131
- }
132
-
133
- public static function &newRef($value) {
134
- return $value;
135
- }
136
-
137
- protected function computePosDetails($pos) {
138
- if (isset($this->posDetailsCache[$pos])) {
139
- return $this->posDetailsCache[$pos];
140
- }
141
- $p = $pos - 1;
142
- while (!isset($this->posDetailsCache[$p])) {
143
- $p--;
144
- }
145
-
146
- $details = $this->posDetailsCache[$p];
147
-
148
- while ($p < $pos) {
149
- $ch = self::charAt($this->input, $p);
150
- if ($ch === "\n") {
151
- if (!$details['seenCR']) { $details['line']++; }
152
- $details['column'] = 1;
153
- $details['seenCR'] = false;
154
- } else if ($ch === "\r" || $ch === "\u2028" || $ch === "\u2029") {
155
- $details['line']++;
156
- $details['column'] = 1;
157
- $details['seenCR'] = true;
158
- } else {
159
- $details['column']++;
160
- $details['seenCR'] = false;
161
- }
162
-
163
- $p++;
164
- }
165
-
166
- $this->posDetailsCache[$pos] = $details;
167
- return $details;
168
- }
169
-
170
- protected function computeLocation($startPos, $endPos) {
171
- if ($endPos > $this->inputLength) {
172
- $endPos--;
173
- }
174
- $startPosDetails = $this->computePosDetails($startPos);
175
- $endPosDetails = $this->computePosDetails($endPos);
176
-
177
- return new LocationRange(
178
- $startPos,
179
- $startPosDetails['line'],
180
- $startPosDetails['column'],
181
- $endPos,
182
- $endPosDetails['line'],
183
- $endPosDetails['column']
184
- );
185
- }
186
-
187
- protected function fail($expected) {
188
- if ($this->currPos < $this->maxFailPos) {
189
- return;
190
- }
191
-
192
- if ($this->currPos > $this->maxFailPos) {
193
- $this->maxFailPos = $this->currPos;
194
- $this->maxFailExpected = [];
195
- }
196
-
197
- $this->maxFailExpected[] = $expected;
198
- }
199
-
200
- private function expandExpectations($expected) {
201
- $expanded = [];
202
- foreach ($expected as $index) {
203
- if (is_int($index)) {
204
- if (!isset($this->expectationCache[$index])) {
205
- $this->expectationCache[$index] = new Expectation($this->expectations[$index]);
206
- }
207
- $expanded[] = $this->expectationCache[$index];
208
- } else {
209
- $expanded[] = new Expectation($index);
210
- }
211
- }
212
- return $expanded;
213
- }
214
-
215
- private function buildMessage($expected, $found) {
216
- $expectedDescs = [];
217
-
218
- foreach ($expected as $info) {
219
- $expectedDescs[] = $info->description;
220
- }
221
- $lastDesc = array_pop($expectedDescs);
222
- if ($expectedDescs) {
223
- $expectedDesc = implode(', ', $expectedDescs) . ' or ' . $lastDesc;
224
- } else {
225
- $expectedDesc = $lastDesc;
226
- }
227
- $foundDesc = $found ? json_encode($found) : "end of input";
228
-
229
- return "Expected " . $expectedDesc . " but " . $foundDesc . " found.";
230
- }
231
-
232
- protected function buildException($message, $expected, $found, $location) {
233
- if ($expected !== null) {
234
- sort($expected);
235
- $expected = array_unique($expected);
236
- $expandedExpected = $this->expandExpectations($expected);
237
- usort($expandedExpected, function ($a, $b) {
238
- return Expectation::compare($a, $b);
239
- });
240
- } else {
241
- $expandedExpected = null;
242
- }
243
-
244
-
245
- return new SyntaxError(
246
- $message !== null ? $message : $this->buildMessage($expandedExpected, $found),
247
- $expandedExpected,
248
- $found,
249
- $location
250
- );
251
- }
252
-
253
- protected function buildParseException() {
254
- $char = self::charAt($this->input, $this->maxFailPos);
255
- return $this->buildException(
256
- null,
257
- $this->maxFailExpected,
258
- $char === '' ? null : $char,
259
- $this->computeLocation($this->maxFailPos, $this->maxFailPos + 1)
260
- );
261
- }
262
-
263
- protected function initialize() {
264
- }
265
-
266
- protected function initInternal($input, $options) {
267
- $this->currPos = 0;
268
- $this->savedPos = 0;
269
- $this->input = $input;
270
- $this->inputLength = strlen($input);
271
- $this->options = $options;
272
- $this->cache = [];
273
- $this->posDetailsCache = [['line' => 1, 'column' => 1, 'seenCR' => false ]];
274
- $this->maxFailPos = 0;
275
- $this->maxFailExpected = [];
276
- $this->tracer = $options['tracer'] ?? new DefaultTracer;
277
-
278
- $this->initialize();
279
- }
280
-
281
- abstract function parse($input, $options = []);
6
+ protected static $FAILED;
7
+ protected static $UNDEFINED;
8
+ protected $currPos;
9
+ protected $savedPos;
10
+ protected $input;
11
+ protected $inputLength;
12
+ protected $options;
13
+ protected $cache;
14
+
15
+ /** @var array<int,array{line:int,column:int,seenCR:bool}> */
16
+ protected $posDetailsCache;
17
+ protected $maxFailPos;
18
+ protected $maxFailExpected;
19
+
20
+ /** @var array Associative arrays of expectation info */
21
+ protected $expectations;
22
+
23
+ /** @var Expectation[] */
24
+ private $expectationCache;
25
+
26
+ /** @var Tracer */
27
+ protected $tracer;
28
+
29
+ public function __construct() {
30
+ if ( !self::$FAILED ) {
31
+ self::$FAILED = new \stdClass;
32
+ }
33
+ if ( !self::$UNDEFINED ) {
34
+ self::$UNDEFINED = new \stdClass;
35
+ }
36
+ }
37
+
38
+ protected function traceCall( $parseFunc, $name, $argNames, $args ) {
39
+ $argMap = [];
40
+ foreach ( $args as $i => $argValue ) {
41
+ $argMap[$argNames[$i]] = $argValue;
42
+ }
43
+ $startPos = $this->currPos;
44
+ $this->tracer->trace( [
45
+ 'type' => 'rule.enter',
46
+ 'rule' => $name,
47
+ 'location' => $this->computeLocation( $startPos, $startPos ),
48
+ 'args' => $argMap
49
+ ] );
50
+ $result = call_user_func_array( $parseFunc, $args );
51
+ if ( $result !== self::$FAILED ) {
52
+ $this->tracer->trace( [
53
+ 'type' => 'rule.match',
54
+ 'rule' => $name,
55
+ 'location' => $this->computeLocation( $startPos, $this->currPos ),
56
+ ] );
57
+ } else {
58
+ $this->tracer->trace( [
59
+ 'type' => 'rule.fail',
60
+ 'rule' => $name,
61
+ 'result' => $result,
62
+ 'location' => $this->computeLocation( $startPos, $startPos )
63
+ ] );
64
+ }
65
+ return $result;
66
+ }
67
+
68
+ protected function text() {
69
+ return substr( $this->input, $this->savedPos, $this->currPos - $this->savedPos );
70
+ }
71
+
72
+ protected function location() {
73
+ return $this->computeLocation( $this->savedPos, $this->currPos );
74
+ }
75
+
76
+ /**
77
+ * @param string $description
78
+ * @return never
79
+ */
80
+ protected function expected( $description ) {
81
+ throw $this->buildException(
82
+ null,
83
+ [ [ 'type' => "other", 'description' => $description ] ],
84
+ $this->text(),
85
+ $this->computeLocation( $this->savedPos, $this->currPos )
86
+ );
87
+ }
88
+
89
+ /**
90
+ * @param string $message
91
+ * @return never
92
+ */
93
+ protected function error( $message ) {
94
+ throw $this->buildException(
95
+ $message,
96
+ null,
97
+ $this->text(),
98
+ $this->computeLocation( $this->savedPos, $this->currPos )
99
+ );
100
+ }
101
+
102
+ public static function charAt( $s, $byteOffset ) {
103
+ if ( !isset( $s[$byteOffset] ) ) {
104
+ return '';
105
+ }
106
+ $char = $s[$byteOffset];
107
+ $byte1 = ord( $char );
108
+ if ( ( $byte1 & 0xc0 ) === 0xc0 ) {
109
+ $char .= $s[$byteOffset + 1];
110
+ }
111
+ if ( ( $byte1 & 0xe0 ) === 0xe0 ) {
112
+ $char .= $s[$byteOffset + 2];
113
+ }
114
+ if ( ( $byte1 & 0xf0 ) === 0xf0 ) {
115
+ $char .= $s[$byteOffset + 3];
116
+ }
117
+ return $char;
118
+ }
119
+
120
+ public static function charsAt( $s, $byteOffset, $numChars ) {
121
+ $ret = '';
122
+ for ( $i = 0; $i < $numChars; $i++ ) {
123
+ $ret .= self::consumeChar( $s, $byteOffset );
124
+ }
125
+ return $ret;
126
+ }
127
+
128
+ public static function consumeChar( $s, &$byteOffset ) {
129
+ if ( !isset( $s[$byteOffset] ) ) {
130
+ return '';
131
+ }
132
+ $char = $s[$byteOffset++];
133
+ $byte1 = ord( $char );
134
+ if ( ( $byte1 & 0xc0 ) === 0xc0 ) {
135
+ $char .= $s[$byteOffset++];
136
+ }
137
+ if ( ( $byte1 & 0xe0 ) === 0xe0 ) {
138
+ $char .= $s[$byteOffset++];
139
+ }
140
+ if ( ( $byte1 & 0xf0 ) === 0xf0 ) {
141
+ $char .= $s[$byteOffset++];
142
+ }
143
+ return $char;
144
+ }
145
+
146
+ public static function &newRef( $value ) {
147
+ return $value;
148
+ }
149
+
150
+ /**
151
+ * @param int $pos
152
+ * @return array{line:int,column:int,seenCR:bool}
153
+ */
154
+ protected function computePosDetails( $pos ) {
155
+ if ( isset( $this->posDetailsCache[$pos] ) ) {
156
+ return $this->posDetailsCache[$pos];
157
+ }
158
+ $p = $pos - 1;
159
+ while ( !isset( $this->posDetailsCache[$p] ) ) {
160
+ $p--;
161
+ }
162
+
163
+ $details = $this->posDetailsCache[$p];
164
+
165
+ while ( $p < $pos ) {
166
+ $ch = self::charAt( $this->input, $p );
167
+ if ( $ch === "\n" ) {
168
+ if ( !$details['seenCR'] ) { $details['line']++;
169
+ }
170
+ $details['column'] = 1;
171
+ $details['seenCR'] = false;
172
+ } elseif ( $ch === "\r" || $ch === "\u2028" || $ch === "\u2029" ) {
173
+ $details['line']++;
174
+ $details['column'] = 1;
175
+ $details['seenCR'] = true;
176
+ } else {
177
+ $details['column']++;
178
+ $details['seenCR'] = false;
179
+ }
180
+
181
+ $p++;
182
+ }
183
+
184
+ $this->posDetailsCache[$pos] = $details;
185
+ return $details;
186
+ }
187
+
188
+ protected function computeLocation( $startPos, $endPos ) {
189
+ if ( $endPos > $this->inputLength ) {
190
+ $endPos--;
191
+ }
192
+ $startPosDetails = $this->computePosDetails( $startPos );
193
+ $endPosDetails = $this->computePosDetails( $endPos );
194
+
195
+ return new LocationRange(
196
+ $startPos,
197
+ $startPosDetails['line'],
198
+ $startPosDetails['column'],
199
+ $endPos,
200
+ $endPosDetails['line'],
201
+ $endPosDetails['column']
202
+ );
203
+ }
204
+
205
+ protected function fail( $expected ) {
206
+ if ( $this->currPos < $this->maxFailPos ) {
207
+ return;
208
+ }
209
+
210
+ if ( $this->currPos > $this->maxFailPos ) {
211
+ $this->maxFailPos = $this->currPos;
212
+ $this->maxFailExpected = [];
213
+ }
214
+
215
+ $this->maxFailExpected[] = $expected;
216
+ }
217
+
218
+ /**
219
+ * @param array<int|array{type:string,value?:?string,description:string}> $expected
220
+ * @return Expectation[]
221
+ */
222
+ private function expandExpectations( $expected ) {
223
+ $expanded = [];
224
+ foreach ( $expected as $index ) {
225
+ if ( is_int( $index ) ) {
226
+ if ( !isset( $this->expectationCache[$index] ) ) {
227
+ $this->expectationCache[$index] = new Expectation( $this->expectations[$index] );
228
+ }
229
+ $expanded[] = $this->expectationCache[$index];
230
+ } else {
231
+ $expanded[] = new Expectation( $index );
232
+ }
233
+ }
234
+ return $expanded;
235
+ }
236
+
237
+ private function buildMessage( $expected, $found ) {
238
+ $expectedDescs = [];
239
+
240
+ foreach ( $expected as $info ) {
241
+ $expectedDescs[] = $info->description;
242
+ }
243
+ $lastDesc = array_pop( $expectedDescs );
244
+ if ( $expectedDescs ) {
245
+ $expectedDesc = implode( ', ', $expectedDescs ) . ' or ' . $lastDesc;
246
+ } else {
247
+ $expectedDesc = $lastDesc;
248
+ }
249
+ $foundDesc = $found ? json_encode( $found ) : "end of input";
250
+
251
+ return "Expected " . $expectedDesc . " but " . $foundDesc . " found.";
252
+ }
253
+
254
+ protected function buildException( $message, $expected, $found, $location ) {
255
+ if ( $expected !== null ) {
256
+ sort( $expected );
257
+ $expected = array_unique( $expected );
258
+ $expandedExpected = $this->expandExpectations( $expected );
259
+ usort( $expandedExpected, static function ( $a, $b ) {
260
+ return Expectation::compare( $a, $b );
261
+ } );
262
+ } else {
263
+ $expandedExpected = [];
264
+ }
265
+
266
+ return new SyntaxError(
267
+ $message ?? $this->buildMessage( $expandedExpected, $found ),
268
+ $expandedExpected,
269
+ $found,
270
+ $location
271
+ );
272
+ }
273
+
274
+ protected function buildParseException() {
275
+ $char = self::charAt( $this->input, $this->maxFailPos );
276
+ return $this->buildException(
277
+ null,
278
+ $this->maxFailExpected,
279
+ $char === '' ? null : $char,
280
+ $this->computeLocation( $this->maxFailPos, $this->maxFailPos + 1 )
281
+ );
282
+ }
283
+
284
+ protected function initialize() {
285
+ }
286
+
287
+ protected function initInternal( $input, $options ) {
288
+ $this->currPos = 0;
289
+ $this->savedPos = 0;
290
+ $this->input = $input;
291
+ $this->inputLength = strlen( $input );
292
+ $this->options = $options;
293
+ $this->cache = [];
294
+ $this->posDetailsCache = [ [ 'line' => 1, 'column' => 1, 'seenCR' => false ] ];
295
+ $this->maxFailPos = 0;
296
+ $this->maxFailExpected = [];
297
+ $this->tracer = $options['tracer'] ?? new DefaultTracer;
298
+
299
+ $this->initialize();
300
+ }
301
+
302
+ abstract public function parse( $input, $options = [] );
282
303
  }
@@ -1,34 +1,37 @@
1
1
  <?php
2
2
 
3
- namespace WikiPEG;
3
+ namespace Wikimedia\WikiPEG;
4
4
 
5
5
  class SyntaxError extends \Exception implements \JsonSerializable {
6
- public $expected, $found, $location;
6
+ public $expected;
7
+ public $found;
8
+ public $location;
7
9
 
8
- /**
9
- * @param string $message
10
- * @param Expectation[] $expected
11
- * @param string|null $found
12
- * @param LocationRange $location
13
- */
14
- public function __construct(string $message, array $expected, $found, LocationRange $location) {
15
- parent::__construct( $message );
16
- $this->expected = $expected;
17
- $this->found = $found;
18
- $this->location = $location;
19
- }
10
+ /**
11
+ * @param string $message
12
+ * @param Expectation[] $expected
13
+ * @param string|null $found
14
+ * @param LocationRange $location
15
+ */
16
+ public function __construct( string $message, array $expected, $found, LocationRange $location ) {
17
+ parent::__construct( $message );
18
+ $this->expected = $expected;
19
+ $this->found = $found;
20
+ $this->location = $location;
21
+ }
20
22
 
21
- /**
22
- * JSON serialization similar to the JavaScript SyntaxError, for testing
23
- * @return array
24
- */
25
- public function jsonSerialize() {
26
- return [
27
- 'name' => 'SyntaxError',
28
- 'message' => $this->message,
29
- 'expected' => $this->expected,
30
- 'found' => $this->found,
31
- 'location' => $this->location
32
- ];
33
- }
23
+ /**
24
+ * JSON serialization similar to the JavaScript SyntaxError, for testing
25
+ * @return array
26
+ */
27
+ #[\ReturnTypeWillChange]
28
+ public function jsonSerialize(): array {
29
+ return [
30
+ 'name' => 'SyntaxError',
31
+ 'message' => $this->message,
32
+ 'expected' => $this->expected,
33
+ 'found' => $this->found,
34
+ 'location' => $this->location
35
+ ];
36
+ }
34
37
  }
package/src/Tracer.php CHANGED
@@ -1,7 +1,7 @@
1
1
  <?php
2
2
 
3
- namespace WikiPEG;
3
+ namespace Wikimedia\WikiPEG;
4
4
 
5
5
  interface Tracer {
6
- function trace($event);
6
+ public function trace( $event );
7
7
  }