porffor 0.57.25 → 0.57.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/rhemyn/parse.js DELETED
@@ -1,323 +0,0 @@
1
- const State = {
2
- none: 0,
3
- insideSet: 1
4
- };
5
-
6
- const Quantifiers = {
7
- '*': [ 0 ], // 0 -
8
- '+': [ 1 ], // 1 -
9
- '?': [ 0, 1 ], // 0 - 1
10
- };
11
- const QuantifierKeys = Object.keys(Quantifiers);
12
-
13
- const getArg = (name, def) => {
14
- const arg = (typeof process !== 'undefined' ? process.argv : Deno.args).find(x => x.startsWith(`--${name}=`));
15
- if (arg) return arg.split('=')[0];
16
-
17
- return def;
18
- };
19
-
20
- // full is spec-compliant but slower. not needed most of the time. (evil)
21
- const DotChars = () => ({
22
- full: [ '\n', '\r', '\u2028', '\u2029' ],
23
- fast: [ '\n', '\r' ]
24
- })[getArg('regex-dot', 'fast')];
25
-
26
- const WordChars = () => ({
27
- full: [ [ 'a', 'z' ], [ 'A', 'Z' ], [ '0', '9' ], '_' ],
28
- fast: [ [ '_', 'z' ], [ 'A', 'Z' ], [ '0', '9' ] ] // skip individual _ with _-z BUT it also matches '`'
29
- })[getArg('regex-word', 'full')];
30
-
31
- const WhitespaceChars = () => ({
32
- full: [ ' ', '\t', '\n', '\r', '\u2028', '\u2029' ],
33
- fast: [ ' ', '\t', '\n', '\r' ]
34
- })[getArg('regex-ws', 'fast')];
35
-
36
- const _Metachars = () => ({
37
- unescaped: {
38
- '.': [ DotChars(), true ], // dot
39
- },
40
- escaped: {
41
- d: [ [ [ '0', '9' ] ], false ], // digit
42
- D: [ [ [ '0', '9' ] ], true ], // not digit
43
- w: [ WordChars(), false ], // word
44
- W: [ WordChars(), true ], // not word
45
- s: [ WhitespaceChars(), false ], // whitespace
46
- S: [ WhitespaceChars(), true ], // not whitespace
47
- }
48
- });
49
-
50
- const EscapeSequences = {
51
- f: '\f',
52
- n: '\n',
53
- r: '\r',
54
- t: '\t',
55
- v: '\v',
56
- '0': '\0'
57
- };
58
-
59
- const HexDigit = /[0-9a-fA-F]/;
60
-
61
- export default str => {
62
- const Metachars = _Metachars();
63
-
64
- const out = {
65
- type: 'Expression',
66
- body: []
67
- };
68
- let node = out, parents = [];
69
-
70
- let state = State.none, setIndex = 0, escape = false;
71
- for (let i = 0; i < str.length; i++) {
72
- const c = str[i];
73
-
74
- const charNode = char => ({
75
- type: 'Character',
76
- char
77
- });
78
-
79
- const rangeNode = (from, to) => ({
80
- type: 'Range',
81
- from,
82
- to
83
- });
84
-
85
- const addChar = (char = c) => {
86
- node.body.push(charNode(char));
87
- };
88
-
89
- const addSet = (matches, negated = false) => {
90
- let body = matches.map(x => x[1] ? rangeNode(x[0], x[1]) : charNode(x));
91
- if (state === State.insideSet) {
92
- // if negated, mark each node as negated for merge
93
- if (negated) body = body.map(x => {
94
- x.negated = true;
95
- return x;
96
- });
97
-
98
- // already in set, merge bodies
99
- node.body.push(...body);
100
- return;
101
- }
102
-
103
- node.body.push({
104
- type: 'Set',
105
- body,
106
- negated
107
- });
108
- };
109
-
110
- const addMetachar = meta => {
111
- const [ matches, negated = false ] = meta;
112
- return addSet(matches, negated);
113
- };
114
-
115
- // get next char and consume it
116
- const seek = (allowEscaped = true) => {
117
- const cNext = str[++i];
118
-
119
- if (cNext === '\\') return !allowEscaped ? undefined : [ str[++i], true ];
120
- return !allowEscaped ? cNext : [ cNext, false ];
121
- };
122
-
123
- // get next char without consuming
124
- const peek = (allowEscaped = true, offset = 0) => {
125
- const cNext = str[i + 1 + offset];
126
-
127
- if (cNext === '\\') return !allowEscaped ? undefined : [ str[i + 2 + offset], true ];
128
- return !allowEscaped ? cNext : [ cNext, false ];
129
- };
130
-
131
- if (escape) {
132
- escape = false;
133
- if (EscapeSequences[c]) {
134
- addChar(EscapeSequences[c]);
135
- continue;
136
- }
137
-
138
- if (Metachars.escaped[c]) {
139
- addMetachar(Metachars.escaped[c]);
140
- continue;
141
- }
142
-
143
- if (c === 'c') {
144
- // \c (not [A-Za-z] ...) = literal \c... (WHY)
145
- const next = peek(false);
146
- if (next == null || /[^a-zA-Z]/.test(next)) {
147
- addChar('\\');
148
- addChar('c');
149
- continue;
150
- }
151
-
152
- // \c[A-Za-z]
153
- const code = seek(false).charCodeAt(0);
154
- addChar(String.fromCharCode(code % 32));
155
- continue;
156
- }
157
-
158
- if (c === 'x') {
159
- // \x = x
160
- // \xH = xH
161
- // \x[0-9a-zA-Z][0-9a-zA-Z] = \xAB
162
- const next1 = peek(false);
163
- const next2 = peek(false, 1);
164
-
165
- // missing a char or invalid hex digit
166
- if (next1 == null || next2 == null || !HexDigit.test(next1) || !HexDigit.test(next2)) {
167
- addChar('x');
168
- continue;
169
- }
170
-
171
- const code = parseInt(seek(false) + seek(false), 16);
172
- addChar(String.fromCodePoint(code));
173
- continue;
174
- }
175
-
176
- if (c === 'u') {
177
- // '\u' = u
178
- // '\uHHH' = uHHH
179
- // '\uABCD' = \uABCD
180
- const next1 = peek(false);
181
- const next2 = peek(false, 1);
182
- const next3 = peek(false, 2);
183
- const next4 = peek(false, 3);
184
-
185
- // missing a char or invalid hex digit
186
- if (next1 == null || next2 == null || next3 == null || next4 == null || !HexDigit.test(next1) || !HexDigit.test(next2) || !HexDigit.test(next3) || !HexDigit.test(next4)) {
187
- addChar('u');
188
- continue;
189
- }
190
-
191
- const code = parseInt(seek(false) + seek(false) + seek(false) + seek(false), 16);
192
- addChar(String.fromCodePoint(code));
193
- continue;
194
- }
195
-
196
- addChar();
197
- continue;
198
- }
199
-
200
- if (c === '\\') {
201
- escape = true;
202
- continue;
203
- }
204
-
205
- switch (state) {
206
- case State.none:
207
- if (c === '[') {
208
- parents.push(node);
209
- node = {
210
- type: 'Set',
211
- body: [],
212
- negated: false
213
- };
214
-
215
- parents.at(-1).body.push(node);
216
-
217
- state = State.insideSet;
218
- setIndex = 0;
219
- continue;
220
- }
221
-
222
- if (c === '(') {
223
- parents.push(node);
224
- node = {
225
- type: 'Group',
226
- body: []
227
- };
228
-
229
- parents.at(-1).body.push(node);
230
- continue;
231
- }
232
-
233
- if (c === ')') {
234
- if (node.type !== 'Group') throw new SyntaxError('Unmatched closing parenthesis');
235
-
236
- node = parents.pop();
237
- continue;
238
- }
239
-
240
- if (QuantifierKeys.includes(c)) {
241
- const last = node.body.at(-1);
242
- if (!last) continue; // ignore, maybe lookahead
243
-
244
- last.quantifier = Quantifiers[c];
245
-
246
- // lazy modifier
247
- if (peek(false) === '?') last.lazy = true;
248
-
249
- continue;
250
- }
251
-
252
- if (Metachars.unescaped[c]) {
253
- addMetachar(Metachars.unescaped[c]);
254
- continue;
255
- }
256
-
257
- addChar();
258
- break;
259
-
260
- case State.insideSet:
261
- setIndex++;
262
- if (setIndex === 1) {
263
- // first char in set
264
- if (c === '^') {
265
- node.negated = true;
266
- continue;
267
- }
268
- }
269
-
270
- if (c === ']') {
271
- state = State.none;
272
- node = parents.pop();
273
-
274
- continue;
275
- }
276
-
277
- // range
278
- if (c === '-') {
279
- // start of set (or not char), just literal -
280
- if (node.body.at(-1)?.char == null) {
281
- addChar(); // add -
282
- continue;
283
- }
284
-
285
- const from = node.body.pop().char;
286
- const [ to, escaped ] = seek();
287
-
288
- // end of set, just literal -
289
- if (to == null || (!escaped && to === ']')) {
290
- addChar(from); // add from char back
291
- i--; // rollback seek
292
-
293
- addChar(); // add -
294
- continue;
295
- }
296
-
297
- // next char was escaped and a metachar, just literal -
298
- if (escaped && Metachars.escaped[to] != null) {
299
- i -= 2; // rollback seek
300
-
301
- addChar(); // add -
302
- continue;
303
- }
304
-
305
- if (to < from) throw new SyntaxError('Range out of order');
306
-
307
- node.body.push(rangeNode(from, to));
308
- continue;
309
- }
310
-
311
- addChar();
312
- break;
313
- }
314
- }
315
-
316
- // still in a group by the end
317
- if (node.type !== 'Expression') throw new SyntaxError('Unmatched opening parenthesis');
318
-
319
- // still in a set by the end
320
- if (state === State.insideSet) throw new SyntaxError('Unmatched opening square bracket');
321
-
322
- return out;
323
- };
@@ -1,59 +0,0 @@
1
- import util from 'node:util';
2
-
3
- import parse from '../parse.js';
4
-
5
- const tests = {
6
- 'a': {},
7
- 'a(b)': {},
8
- 'a(b(c))': {},
9
- 'ab': {},
10
- '[ab]': {},
11
- '[a-z]': {},
12
- 'a*': {},
13
- 'a+': {},
14
- 'a?': {},
15
- 'a(b)+': {},
16
- '[^a]': {},
17
- '[a^]': {},
18
- '[^ab]': {},
19
- '.': {},
20
-
21
- // not range
22
- '[-]': {},
23
- '[0-]': {},
24
- '[-0]': {},
25
- '[\\s-\\S]': {},
26
- '[\\s-.]': {},
27
-
28
- '[\\S]': {},
29
-
30
- '\\c': {},
31
- '\\c0': {},
32
- '\\cJ': {},
33
-
34
- '\\x': {},
35
- '\\x0': {},
36
- '\\x0g': {},
37
- '\\x0a': {},
38
-
39
- '\\u': {},
40
- '\\u0': {},
41
- '\\u000': {},
42
- '\\u000g': {},
43
- '\\u000a': {},
44
-
45
- /*
46
- // email regexes
47
- '^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$': {},
48
-
49
- // input type=email from HTML spec
50
- // https://html.spec.whatwg.org/multipage/input.html#email-state-(type=email)
51
- // simpler form
52
- '^[a-zA-Z0-9.!#$%&’*+/=?^_`{|}~-]+@[a-zA-Z0-9-]+(?:\\.[a-zA-Z0-9-]+)*$': {},
53
- // full/complex form
54
- '^[a-zA-Z0-9.!#$%&\'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$': {}*/
55
- };
56
-
57
- for (const str in tests) {
58
- console.log(str, util.inspect(parse(str), false, null, true));
59
- }