easy-regex-lib 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs ADDED
@@ -0,0 +1,1037 @@
1
+ "use strict";
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
6
+ var __export = (target, all) => {
7
+ for (var name in all)
8
+ __defProp(target, name, { get: all[name], enumerable: true });
9
+ };
10
+ var __copyProps = (to, from, except, desc) => {
11
+ if (from && typeof from === "object" || typeof from === "function") {
12
+ for (let key of __getOwnPropNames(from))
13
+ if (!__hasOwnProp.call(to, key) && key !== except)
14
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
+ }
16
+ return to;
17
+ };
18
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
+
20
+ // src/index.ts
21
+ var index_exports = {};
22
+ __export(index_exports, {
23
+ CompiledPattern: () => CompiledPattern,
24
+ MatchBuilder: () => MatchBuilder,
25
+ PATTERN_SCHEMA_VERSION: () => PATTERN_SCHEMA_VERSION,
26
+ alt: () => alt,
27
+ analyzePattern: () => analyzePattern,
28
+ anyChar: () => anyChar,
29
+ booleanLiteral: () => booleanLiteral,
30
+ compile: () => compile,
31
+ compilePattern: () => compilePattern,
32
+ dash: () => dash,
33
+ deserializePattern: () => deserializePattern,
34
+ diagnose: () => diagnose,
35
+ digit: () => digit,
36
+ dot: () => dot,
37
+ end: () => end,
38
+ explainPattern: () => explainPattern,
39
+ hexDigit: () => hexDigit,
40
+ integer: () => integer,
41
+ letter: () => letter,
42
+ literal: () => literal,
43
+ match: () => match,
44
+ namedGroup: () => namedGroup,
45
+ nonCapturing: () => nonCapturing,
46
+ optimize: () => optimize,
47
+ optional: () => optional,
48
+ patternFromJsonString: () => patternFromJsonString,
49
+ patternToJsonString: () => patternToJsonString,
50
+ presets: () => presets,
51
+ raw: () => raw,
52
+ regex: () => regex,
53
+ repeat: () => repeat,
54
+ seq: () => seq,
55
+ serializePattern: () => serializePattern,
56
+ start: () => start,
57
+ toRegExp: () => toRegExp,
58
+ underscore: () => underscore,
59
+ whitespace: () => whitespace,
60
+ word: () => word,
61
+ wordBoundary: () => wordBoundary
62
+ });
63
+ module.exports = __toCommonJS(index_exports);
64
+
65
+ // src/ast.ts
66
+ var PATTERN_SCHEMA_VERSION = 1;
67
+ function seq(...children) {
68
+ const flat = flattenSequences(children);
69
+ if (flat.length === 0) return { kind: "Literal", value: "" };
70
+ if (flat.length === 1) return flat[0];
71
+ return { kind: "Sequence", children: flat };
72
+ }
73
+ function alt(...children) {
74
+ const choices = flattenChoices(children);
75
+ if (choices.length === 0) return { kind: "Literal", value: "" };
76
+ if (choices.length === 1) return choices[0];
77
+ return { kind: "Choice", children: choices };
78
+ }
79
+ function repeat(child, min, max, greedy = true) {
80
+ if (min === 0 && max === 1) return { kind: "Optional", child, greedy };
81
+ if (min === 1 && max === 1) return child;
82
+ return { kind: "Repeat", child, min, max, greedy };
83
+ }
84
+ function optional(child, greedy = true) {
85
+ return { kind: "Optional", child, greedy };
86
+ }
87
+ function namedGroup(name, child) {
88
+ return { kind: "NamedGroup", name, child };
89
+ }
90
+ function nonCapturing(child) {
91
+ return { kind: "NonCapturing", child };
92
+ }
93
+ function literal(value) {
94
+ return { kind: "Literal", value };
95
+ }
96
+ function optimize(pattern) {
97
+ return flattenAst(pattern);
98
+ }
99
+ function flattenSequences(children) {
100
+ const out = [];
101
+ for (const c of children) {
102
+ const o = optimize(c);
103
+ if (o.kind === "Literal" && o.value === "") continue;
104
+ if (o.kind === "Sequence") out.push(...o.children.map(optimize));
105
+ else out.push(o);
106
+ }
107
+ return out;
108
+ }
109
+ function flattenChoices(children) {
110
+ const out = [];
111
+ for (const c of children) {
112
+ const o = optimize(c);
113
+ if (o.kind === "Choice") out.push(...o.children.map(optimize));
114
+ else out.push(o);
115
+ }
116
+ return dedupeAdjacentDuplicates(out);
117
+ }
118
+ function dedupeAdjacentDuplicates(children) {
119
+ return children;
120
+ }
121
+ function flattenAst(node) {
122
+ switch (node.kind) {
123
+ case "Sequence":
124
+ return seq(...node.children);
125
+ case "Choice":
126
+ return alt(...node.children);
127
+ case "Repeat":
128
+ return {
129
+ ...node,
130
+ child: flattenAst(node.child)
131
+ };
132
+ case "Optional":
133
+ return { ...node, child: flattenAst(node.child) };
134
+ case "NamedGroup":
135
+ return { ...node, child: flattenAst(node.child) };
136
+ case "NonCapturing":
137
+ return { ...node, child: flattenAst(node.child) };
138
+ default:
139
+ return node;
140
+ }
141
+ }
142
+
143
+ // src/primitives.ts
144
+ function wrap(base) {
145
+ const q = base;
146
+ q.exactly = (n) => repeat(base, n, n);
147
+ q.atLeast = (n) => repeat(base, n, Number.POSITIVE_INFINITY);
148
+ q.atMost = (n) => repeat(base, 0, n);
149
+ q.between = (min, max) => repeat(base, min, max);
150
+ q.oneOrMore = () => repeat(base, 1, Number.POSITIVE_INFINITY);
151
+ q.zeroOrMore = () => repeat(base, 0, Number.POSITIVE_INFINITY);
152
+ q.maybe = () => optional(base);
153
+ return q;
154
+ }
155
+ function anyChar() {
156
+ return wrap({ kind: "Any" });
157
+ }
158
+ function digit(opts = {}) {
159
+ return wrap({ kind: "Digit", unicode: opts.unicode ?? false });
160
+ }
161
+ function word(opts = {}) {
162
+ return wrap({ kind: "Word", unicode: opts.unicode ?? false });
163
+ }
164
+ function whitespace(opts = {}) {
165
+ return wrap({ kind: "Whitespace", unicode: opts.unicode ?? false });
166
+ }
167
+ function letter(opts = {}) {
168
+ return wrap({
169
+ kind: "Letter",
170
+ letterCase: opts.case ?? "both",
171
+ unicode: opts.unicode ?? false
172
+ });
173
+ }
174
+ function hexDigit(opts = {}) {
175
+ const node = opts.uppercaseOnly === void 0 ? { kind: "HexDigit" } : { kind: "HexDigit", uppercaseOnly: opts.uppercaseOnly };
176
+ return wrap(node);
177
+ }
178
+ function start() {
179
+ return { kind: "Start" };
180
+ }
181
+ function end() {
182
+ return { kind: "End" };
183
+ }
184
+ function wordBoundary() {
185
+ return { kind: "WordBoundary" };
186
+ }
187
+ function raw(source, flags) {
188
+ return flags === void 0 ? { kind: "RawRegex", source } : { kind: "RawRegex", source, flags };
189
+ }
190
+ function dash() {
191
+ return literal("-");
192
+ }
193
+ function underscore() {
194
+ return literal("_");
195
+ }
196
+ function dot() {
197
+ return literal(".");
198
+ }
199
+ function integer() {
200
+ return digit().oneOrMore();
201
+ }
202
+ function booleanLiteral() {
203
+ return alt(literal("true"), literal("false"));
204
+ }
205
+
206
+ // src/compile.ts
207
+ var META = /* @__PURE__ */ new Set([
208
+ "\\",
209
+ "^",
210
+ "$",
211
+ ".",
212
+ "|",
213
+ "?",
214
+ "*",
215
+ "+",
216
+ "(",
217
+ ")",
218
+ "[",
219
+ "]",
220
+ "{",
221
+ "}"
222
+ ]);
223
+ function escapeLiteral(s) {
224
+ let out = "";
225
+ for (const ch of s) {
226
+ if (META.has(ch)) out += "\\";
227
+ out += ch;
228
+ }
229
+ return out;
230
+ }
231
+ function compilePattern(root, options = {}) {
232
+ const ast = optimize(root);
233
+ const warnings = [];
234
+ const flagCtx = mergeFlagCtx(options.flags, ast);
235
+ const frag = emit(ast, {
236
+ nonCapturing: options.nonCapturing ?? false,
237
+ flags: flagCtx,
238
+ warnings
239
+ });
240
+ return {
241
+ pattern: frag.s,
242
+ flags: stringifyFlags(flagCtx),
243
+ warnings
244
+ };
245
+ }
246
+ function toRegExp(root, options = {}) {
247
+ const { pattern, flags } = compilePattern(root, options);
248
+ return new RegExp(pattern, flags);
249
+ }
250
+ function defaultFlags() {
251
+ return {
252
+ ignoreCase: false,
253
+ multiline: false,
254
+ dotAll: false,
255
+ unicode: false,
256
+ global: false,
257
+ sticky: false
258
+ };
259
+ }
260
+ function mergeFlagCtx(base, ast) {
261
+ const f = defaultFlags();
262
+ if (base) applyFlags(f, base);
263
+ inferFlags(ast, f);
264
+ return f;
265
+ }
266
+ function applyFlags(f, p) {
267
+ if (p.ignoreCase) f.ignoreCase = true;
268
+ if (p.multiline) f.multiline = true;
269
+ if (p.dotAll) f.dotAll = true;
270
+ if (p.unicode) f.unicode = true;
271
+ if (p.global) f.global = true;
272
+ if (p.sticky) f.sticky = true;
273
+ }
274
+ function inferFlags(ast, f) {
275
+ walk(ast, (n) => {
276
+ if (n.kind === "Letter" && n.unicode) f.unicode = true;
277
+ if (n.kind === "Digit" && n.unicode) f.unicode = true;
278
+ if (n.kind === "Word" && n.unicode) f.unicode = true;
279
+ if (n.kind === "Whitespace" && n.unicode) f.unicode = true;
280
+ if (n.kind === "RawRegex" && n.flags) applyFlags(f, n.flags);
281
+ });
282
+ }
283
+ function walk(p, fn) {
284
+ fn(p);
285
+ switch (p.kind) {
286
+ case "Sequence":
287
+ for (const c of p.children) walk(c, fn);
288
+ break;
289
+ case "Choice":
290
+ for (const c of p.children) walk(c, fn);
291
+ break;
292
+ case "Repeat":
293
+ walk(p.child, fn);
294
+ break;
295
+ case "Optional":
296
+ walk(p.child, fn);
297
+ break;
298
+ case "NamedGroup":
299
+ walk(p.child, fn);
300
+ break;
301
+ case "NonCapturing":
302
+ walk(p.child, fn);
303
+ break;
304
+ default:
305
+ break;
306
+ }
307
+ }
308
+ function stringifyFlags(f) {
309
+ let s = "";
310
+ if (f.global) s += "g";
311
+ if (f.ignoreCase) s += "i";
312
+ if (f.multiline) s += "m";
313
+ if (f.dotAll) s += "s";
314
+ if (f.unicode) s += "u";
315
+ if (f.sticky) s += "y";
316
+ return s;
317
+ }
318
+ function emit(node, ctx) {
319
+ switch (node.kind) {
320
+ case "Literal":
321
+ return { s: escapeLiteral(node.value), prec: 3 /* Atom */ };
322
+ case "Any":
323
+ return { s: ".", prec: 3 /* Atom */ };
324
+ case "Digit":
325
+ return {
326
+ s: node.unicode ? "\\d" : "\\d",
327
+ prec: 3 /* Atom */
328
+ };
329
+ case "Word":
330
+ return { s: "\\w", prec: 3 /* Atom */ };
331
+ case "Whitespace":
332
+ return { s: "\\s", prec: 3 /* Atom */ };
333
+ case "Letter": {
334
+ if (node.unicode) {
335
+ ctx.flags.unicode = true;
336
+ if (node.letterCase === "lower") return { s: "\\p{Ll}", prec: 3 /* Atom */ };
337
+ if (node.letterCase === "upper") return { s: "\\p{Lu}", prec: 3 /* Atom */ };
338
+ return { s: "\\p{L}", prec: 3 /* Atom */ };
339
+ }
340
+ if (node.letterCase === "lower") return { s: "[a-z]", prec: 3 /* Atom */ };
341
+ if (node.letterCase === "upper") return { s: "[A-Z]", prec: 3 /* Atom */ };
342
+ return { s: "[A-Za-z]", prec: 3 /* Atom */ };
343
+ }
344
+ case "HexDigit":
345
+ return {
346
+ s: node.uppercaseOnly ? "[0-9A-F]" : "[0-9A-Fa-f]",
347
+ prec: 3 /* Atom */
348
+ };
349
+ case "Start":
350
+ return { s: "^", prec: 3 /* Atom */ };
351
+ case "End":
352
+ return { s: "$", prec: 3 /* Atom */ };
353
+ case "WordBoundary":
354
+ return { s: "\\b", prec: 3 /* Atom */ };
355
+ case "RawRegex":
356
+ ctx.warnings.push({
357
+ code: "raw-regex",
358
+ message: "Raw regex fragment embedded \u2014 review for ReDoS and engine-specific behavior."
359
+ });
360
+ if (node.flags) applyFlags(ctx.flags, node.flags);
361
+ return { s: node.source, prec: 3 /* Atom */ };
362
+ case "Sequence": {
363
+ if (node.children.length === 0) return { s: "", prec: 3 /* Atom */ };
364
+ let s = "";
365
+ for (const c of node.children) {
366
+ const f = emit(c, ctx);
367
+ s += wrapPrec(f, 1 /* Seq */).s;
368
+ }
369
+ return { s, prec: 1 /* Seq */ };
370
+ }
371
+ case "Choice": {
372
+ const parts = node.children.map((c) => {
373
+ const f = emit(c, ctx);
374
+ return wrapPrec(f, 0 /* Alt */).s;
375
+ });
376
+ return { s: parts.join("|"), prec: 0 /* Alt */ };
377
+ }
378
+ case "Repeat": {
379
+ const inner = emit(node.child, ctx);
380
+ const core = wrapPrec(inner, 2 /* Quant */);
381
+ const q = quantifier(node.min, node.max, node.greedy);
382
+ return { s: `${core.s}${q}`, prec: 2 /* Quant */ };
383
+ }
384
+ case "Optional": {
385
+ const inner = emit(node.child, ctx);
386
+ const core = wrapPrec(inner, 2 /* Quant */);
387
+ return { s: `${core.s}?${node.greedy ? "" : "?"}`, prec: 2 /* Quant */ };
388
+ }
389
+ case "NamedGroup": {
390
+ const inner = emit(node.child, ctx);
391
+ const body = wrapPrec(inner, 1 /* Seq */);
392
+ if (ctx.nonCapturing) {
393
+ return { s: `(?:${body.s})`, prec: 3 /* Atom */ };
394
+ }
395
+ return { s: `(?<${escapeGroupName(node.name)}>${body.s})`, prec: 3 /* Atom */ };
396
+ }
397
+ case "NonCapturing": {
398
+ const inner = emit(node.child, ctx);
399
+ return { s: `(?:${inner.s})`, prec: 3 /* Atom */ };
400
+ }
401
+ default: {
402
+ const _exhaustive = node;
403
+ return _exhaustive;
404
+ }
405
+ }
406
+ }
407
+ function escapeGroupName(name) {
408
+ if (!/^[A-Za-z_][A-Za-z0-9_]*$/.test(name)) {
409
+ throw new Error(`Invalid named group identifier: ${JSON.stringify(name)}`);
410
+ }
411
+ return name;
412
+ }
413
+ function quantifier(min, max, greedy) {
414
+ const lazy = greedy ? "" : "?";
415
+ if (min === 0 && max === Number.POSITIVE_INFINITY) return `*${lazy}`;
416
+ if (min === 1 && max === Number.POSITIVE_INFINITY) return `+${lazy}`;
417
+ if (min === 0 && max === 1) return `?${lazy}`;
418
+ if (min === max) return `{${min}}${lazy}`;
419
+ if (max === Number.POSITIVE_INFINITY) return `{${min},}${lazy}`;
420
+ return `{${min},${max}}${lazy}`;
421
+ }
422
+ function wrapPrec(f, parent) {
423
+ if (f.prec < parent) {
424
+ return { s: `(?:${f.s})`, prec: 3 /* Atom */ };
425
+ }
426
+ return f;
427
+ }
428
+
429
+ // src/explain.ts
430
+ var clauseCounter = 0;
431
+ function nextId() {
432
+ clauseCounter += 1;
433
+ return `c${clauseCounter}`;
434
+ }
435
+ function explainPattern(root) {
436
+ clauseCounter = 0;
437
+ const ast = optimize(root);
438
+ const clauses = explainNode(ast);
439
+ return {
440
+ clauses,
441
+ summary: clauses.map((c) => c.text).join(" ")
442
+ };
443
+ }
444
+ function explainNode(node) {
445
+ switch (node.kind) {
446
+ case "Literal":
447
+ return [
448
+ {
449
+ id: nextId(),
450
+ text: node.value === "" ? "Matches an empty fragment." : `Matches the literal ${quote(node.value)}.`
451
+ }
452
+ ];
453
+ case "Any":
454
+ return [{ id: nextId(), text: "Matches any single character (except line terminators unless dotAll)." }];
455
+ case "Digit":
456
+ return [{ id: nextId(), text: node.unicode ? "Matches a Unicode digit." : "Matches an ASCII digit." }];
457
+ case "Word":
458
+ return [{ id: nextId(), text: "Matches a word character." }];
459
+ case "Whitespace":
460
+ return [{ id: nextId(), text: "Matches whitespace." }];
461
+ case "Letter": {
462
+ if (node.unicode) {
463
+ if (node.letterCase === "lower") return [{ id: nextId(), text: "Matches a Unicode lowercase letter." }];
464
+ if (node.letterCase === "upper") return [{ id: nextId(), text: "Matches a Unicode uppercase letter." }];
465
+ return [{ id: nextId(), text: "Matches a Unicode letter." }];
466
+ }
467
+ if (node.letterCase === "lower") return [{ id: nextId(), text: "Matches an ASCII lowercase letter." }];
468
+ if (node.letterCase === "upper") return [{ id: nextId(), text: "Matches an ASCII uppercase letter." }];
469
+ return [{ id: nextId(), text: "Matches an ASCII letter." }];
470
+ }
471
+ case "HexDigit":
472
+ return [
473
+ {
474
+ id: nextId(),
475
+ text: node.uppercaseOnly ? "Matches a hexadecimal digit (0-9A-F)." : "Matches a hexadecimal digit (0-9A-Fa-f)."
476
+ }
477
+ ];
478
+ case "Start":
479
+ return [{ id: nextId(), text: "Must align with the start of the string (or line if multiline)." }];
480
+ case "End":
481
+ return [{ id: nextId(), text: "Must align with the end of the string (or line if multiline)." }];
482
+ case "WordBoundary":
483
+ return [{ id: nextId(), text: "Requires a word boundary." }];
484
+ case "RawRegex":
485
+ return [
486
+ {
487
+ id: nextId(),
488
+ text: `Uses a raw regex fragment: ${quote(node.source)}.`
489
+ }
490
+ ];
491
+ case "Sequence": {
492
+ const out = [];
493
+ for (const c of node.children) out.push(...explainNode(c));
494
+ return out;
495
+ }
496
+ case "Choice": {
497
+ const parts = node.children.map((c) => explainNode(c).map((x) => x.text).join(" "));
498
+ return [
499
+ {
500
+ id: nextId(),
501
+ text: `Matches one of: ${parts.join(" OR ")}.`
502
+ }
503
+ ];
504
+ }
505
+ case "Repeat": {
506
+ const inner = explainNode(node.child).map((x) => x.text).join(" ");
507
+ const q = describeQuantifier(node.min, node.max, node.greedy);
508
+ return [{ id: nextId(), text: `${q} ${inner}` }];
509
+ }
510
+ case "Optional": {
511
+ const inner = explainNode(node.child).map((x) => x.text).join(" ");
512
+ const laz = node.greedy ? "" : " (lazy)";
513
+ return [{ id: nextId(), text: `Optionally${laz}: ${inner}` }];
514
+ }
515
+ case "NamedGroup": {
516
+ const inner = explainNode(node.child).map((x) => x.text).join(" ");
517
+ return [{ id: nextId(), text: `Captures "${node.name}" as: ${inner}` }];
518
+ }
519
+ case "NonCapturing": {
520
+ return explainNode(node.child);
521
+ }
522
+ default: {
523
+ const _never = node;
524
+ return _never;
525
+ }
526
+ }
527
+ }
528
+ function quote(s) {
529
+ return JSON.stringify(s);
530
+ }
531
+ function describeQuantifier(min, max, greedy) {
532
+ const laz = greedy ? "" : "lazy ";
533
+ if (min === 0 && max === Number.POSITIVE_INFINITY) return `Repeat ${laz}zero or more times:`;
534
+ if (min === 1 && max === Number.POSITIVE_INFINITY) return `Repeat ${laz}one or more times:`;
535
+ if (min === 0 && max === 1) return `At most once${greedy ? "" : " (lazy)"}:`;
536
+ if (min === max) return `Repeat exactly ${min} times:`;
537
+ if (max === Number.POSITIVE_INFINITY) return `Repeat at least ${min} times:`;
538
+ return `Repeat between ${min} and ${max} times:`;
539
+ }
540
+
541
+ // src/diagnose.ts
542
+ function diagnose(root, input, options = {}) {
543
+ const ast = optimize(root);
544
+ const { pattern, flags } = compilePattern(ast, options);
545
+ const anchored = new RegExp(`^(?:${pattern})$`, flags);
546
+ const m = anchored.exec(input);
547
+ if (m) {
548
+ const groups = m.groups ?? {};
549
+ return { ok: true, match: m[0], index: 0, groups };
550
+ }
551
+ const sim = simulate(ast, input, 0, options);
552
+ if (sim.ok) {
553
+ if (sim.end === input.length) {
554
+ return {
555
+ ok: false,
556
+ index: 0,
557
+ message: "Simulation matched the full input but the anchored RegExp did not \u2014 check lazy quantifiers, ambiguous alternatives, or raw fragments.",
558
+ expected: explainPattern(ast).summary
559
+ };
560
+ }
561
+ return {
562
+ ok: false,
563
+ index: sim.end,
564
+ message: "The pattern matched only a prefix of the input.",
565
+ expected: explainPattern(ast).summary
566
+ };
567
+ }
568
+ return {
569
+ ok: false,
570
+ index: sim.at,
571
+ message: "The pattern did not match.",
572
+ expected: sim.expected
573
+ };
574
+ }
575
+ function simulate(node, input, pos, options) {
576
+ switch (node.kind) {
577
+ case "Sequence": {
578
+ let p = pos;
579
+ for (const child of node.children) {
580
+ const r = simulate(child, input, p, options);
581
+ if (!r.ok) return r;
582
+ p = r.end;
583
+ }
584
+ return { ok: true, end: p };
585
+ }
586
+ case "NonCapturing":
587
+ return simulate(node.child, input, pos, options);
588
+ case "NamedGroup":
589
+ return simulate(node.child, input, pos, options);
590
+ case "Choice": {
591
+ let lastFail = null;
592
+ for (const child of node.children) {
593
+ const r = simulate(child, input, pos, options);
594
+ if (r.ok) return r;
595
+ lastFail = r;
596
+ }
597
+ return lastFail ?? {
598
+ ok: false,
599
+ at: pos,
600
+ expected: explainPattern(node).summary
601
+ };
602
+ }
603
+ case "Optional": {
604
+ const tryMatch = matchPrefix(node.child, input, pos, options);
605
+ if (!tryMatch) return { ok: true, end: pos };
606
+ return { ok: true, end: pos + tryMatch.len };
607
+ }
608
+ case "Repeat": {
609
+ let p = pos;
610
+ let count = 0;
611
+ while (count < node.min) {
612
+ const m = matchPrefix(node.child, input, p, options);
613
+ if (!m) {
614
+ return {
615
+ ok: false,
616
+ at: p,
617
+ expected: explainPattern(node.child).summary
618
+ };
619
+ }
620
+ p += m.len;
621
+ count += 1;
622
+ }
623
+ if (node.greedy) {
624
+ while (count < node.max) {
625
+ const m = matchPrefix(node.child, input, p, options);
626
+ if (!m) break;
627
+ p += m.len;
628
+ count += 1;
629
+ }
630
+ }
631
+ return { ok: true, end: p };
632
+ }
633
+ default: {
634
+ const m = matchPrefix(node, input, pos, options);
635
+ if (!m) {
636
+ return {
637
+ ok: false,
638
+ at: pos,
639
+ expected: explainPattern(node).summary
640
+ };
641
+ }
642
+ return { ok: true, end: pos + m.len };
643
+ }
644
+ }
645
+ }
646
+ function matchPrefix(node, input, pos, options) {
647
+ const { pattern, flags } = compilePattern(node, {
648
+ ...options,
649
+ nonCapturing: true
650
+ });
651
+ const re = new RegExp(`^(?:${pattern})`, flags);
652
+ const slice = input.slice(pos);
653
+ const m = re.exec(slice);
654
+ if (!m || m.index !== 0) return null;
655
+ return { len: m[0].length };
656
+ }
657
+
658
+ // src/analyze.ts
659
+ function analyzePattern(root) {
660
+ const ast = optimize(root);
661
+ const out = [];
662
+ walk2(ast, (n) => {
663
+ if (n.kind === "RawRegex") {
664
+ out.push({
665
+ severity: "warn",
666
+ code: "raw-regex",
667
+ message: "Raw regex fragments bypass semantic guarantees \u2014 audit for ReDoS."
668
+ });
669
+ }
670
+ if (n.kind === "Repeat") {
671
+ const inner = n.child;
672
+ if (inner.kind === "Repeat" || inner.kind === "Optional" && n.max === Number.POSITIVE_INFINITY) {
673
+ out.push({
674
+ severity: "info",
675
+ code: "nested-quantifier",
676
+ message: "Nested quantifiers can cause catastrophic backtracking \u2014 prefer possessive/atomic patterns when available."
677
+ });
678
+ }
679
+ }
680
+ });
681
+ return out;
682
+ }
683
+ function walk2(node, fn) {
684
+ fn(node);
685
+ switch (node.kind) {
686
+ case "Sequence":
687
+ for (const c of node.children) walk2(c, fn);
688
+ break;
689
+ case "Choice":
690
+ for (const c of node.children) walk2(c, fn);
691
+ break;
692
+ case "Repeat":
693
+ walk2(node.child, fn);
694
+ break;
695
+ case "Optional":
696
+ walk2(node.child, fn);
697
+ break;
698
+ case "NamedGroup":
699
+ walk2(node.child, fn);
700
+ break;
701
+ case "NonCapturing":
702
+ walk2(node.child, fn);
703
+ break;
704
+ default:
705
+ break;
706
+ }
707
+ }
708
+
709
+ // src/serialize.ts
710
+ function serializePattern(root) {
711
+ return {
712
+ schemaVersion: PATTERN_SCHEMA_VERSION,
713
+ pattern: toJson(optimize(root))
714
+ };
715
+ }
716
+ function deserializePattern(data) {
717
+ if (!data || typeof data !== "object") throw new Error("Invalid pattern payload.");
718
+ const obj = data;
719
+ if (obj.schemaVersion !== PATTERN_SCHEMA_VERSION) {
720
+ throw new Error(`Unsupported schemaVersion: ${String(obj.schemaVersion)}`);
721
+ }
722
+ if (!obj.pattern) throw new Error("Missing pattern.");
723
+ return fromJson(obj.pattern);
724
+ }
725
+ function patternToJsonString(root, space) {
726
+ return JSON.stringify(serializePattern(root), null, space);
727
+ }
728
+ function patternFromJsonString(text) {
729
+ return deserializePattern(JSON.parse(text));
730
+ }
731
+ function toJson(node) {
732
+ switch (node.kind) {
733
+ case "Repeat":
734
+ return {
735
+ kind: "Repeat",
736
+ child: toJson(node.child),
737
+ min: node.min,
738
+ max: node.max === Number.POSITIVE_INFINITY ? "__inf__" : node.max,
739
+ greedy: node.greedy
740
+ };
741
+ case "Sequence":
742
+ return { kind: "Sequence", children: node.children.map(toJson) };
743
+ case "Choice":
744
+ return { kind: "Choice", children: node.children.map(toJson) };
745
+ case "Optional":
746
+ return { kind: "Optional", child: toJson(node.child), greedy: node.greedy };
747
+ case "NamedGroup":
748
+ return { kind: "NamedGroup", name: node.name, child: toJson(node.child) };
749
+ case "NonCapturing":
750
+ return { kind: "NonCapturing", child: toJson(node.child) };
751
+ case "Literal":
752
+ return { kind: "Literal", value: node.value };
753
+ case "Any":
754
+ return { kind: "Any" };
755
+ case "Digit":
756
+ return { kind: "Digit", unicode: node.unicode };
757
+ case "Word":
758
+ return { kind: "Word", unicode: node.unicode };
759
+ case "Whitespace":
760
+ return { kind: "Whitespace", unicode: node.unicode };
761
+ case "Letter":
762
+ return { kind: "Letter", letterCase: node.letterCase, unicode: node.unicode };
763
+ case "HexDigit":
764
+ return node.uppercaseOnly === void 0 ? { kind: "HexDigit" } : { kind: "HexDigit", uppercaseOnly: node.uppercaseOnly };
765
+ case "Start":
766
+ return { kind: "Start" };
767
+ case "End":
768
+ return { kind: "End" };
769
+ case "WordBoundary":
770
+ return { kind: "WordBoundary" };
771
+ case "RawRegex":
772
+ return {
773
+ kind: "RawRegex",
774
+ source: node.source,
775
+ ...node.flags ? { flags: node.flags } : {},
776
+ ...node.trusted ? { trusted: node.trusted } : {}
777
+ };
778
+ default: {
779
+ const _never = node;
780
+ return _never;
781
+ }
782
+ }
783
+ }
784
+ function fromJson(node) {
785
+ switch (node.kind) {
786
+ case "Repeat":
787
+ return {
788
+ kind: "Repeat",
789
+ child: fromJson(node.child),
790
+ min: node.min,
791
+ max: node.max === "__inf__" ? Number.POSITIVE_INFINITY : node.max,
792
+ greedy: node.greedy
793
+ };
794
+ case "Sequence":
795
+ return { kind: "Sequence", children: node.children.map(fromJson) };
796
+ case "Choice":
797
+ return { kind: "Choice", children: node.children.map(fromJson) };
798
+ case "Optional":
799
+ return { kind: "Optional", child: fromJson(node.child), greedy: node.greedy };
800
+ case "NamedGroup":
801
+ return { kind: "NamedGroup", name: node.name, child: fromJson(node.child) };
802
+ case "NonCapturing":
803
+ return { kind: "NonCapturing", child: fromJson(node.child) };
804
+ case "Literal":
805
+ return { kind: "Literal", value: node.value };
806
+ case "Any":
807
+ return { kind: "Any" };
808
+ case "Digit":
809
+ return { kind: "Digit", unicode: node.unicode };
810
+ case "Word":
811
+ return { kind: "Word", unicode: node.unicode };
812
+ case "Whitespace":
813
+ return { kind: "Whitespace", unicode: node.unicode };
814
+ case "Letter":
815
+ return {
816
+ kind: "Letter",
817
+ letterCase: node.letterCase,
818
+ unicode: node.unicode
819
+ };
820
+ case "HexDigit":
821
+ return node.uppercaseOnly === void 0 ? { kind: "HexDigit" } : { kind: "HexDigit", uppercaseOnly: node.uppercaseOnly };
822
+ case "Start":
823
+ return { kind: "Start" };
824
+ case "End":
825
+ return { kind: "End" };
826
+ case "WordBoundary":
827
+ return { kind: "WordBoundary" };
828
+ case "RawRegex":
829
+ return {
830
+ kind: "RawRegex",
831
+ source: node.source,
832
+ ...node.flags ? { flags: node.flags } : {},
833
+ ...node.trusted ? { trusted: node.trusted } : {}
834
+ };
835
+ default: {
836
+ const _never = node;
837
+ return _never;
838
+ }
839
+ }
840
+ }
841
+
842
+ // src/compiled.ts
843
+ function compile(ast, options = {}) {
844
+ return new CompiledPattern(ast, options);
845
+ }
846
+ var CompiledPattern = class {
847
+ constructor(ast, compileOpts = {}) {
848
+ this.ast = ast;
849
+ this.compileOpts = compileOpts;
850
+ }
851
+ ast;
852
+ compileOpts;
853
+ /** Compiled regex source (body only). */
854
+ get source() {
855
+ return compilePattern(this.ast, this.compileOpts).pattern;
856
+ }
857
+ /** Engine flags string, e.g. `"iu"`. */
858
+ get flags() {
859
+ return compilePattern(this.ast, this.compileOpts).flags;
860
+ }
861
+ /** Compiler warnings (for example raw-regex notices). */
862
+ get warnings() {
863
+ return compilePattern(this.ast, this.compileOpts).warnings;
864
+ }
865
+ toRegExp() {
866
+ return toRegExp(this.ast, this.compileOpts);
867
+ }
868
+ test(input) {
869
+ return this.toRegExp().test(input);
870
+ }
871
+ exec(input) {
872
+ return this.toRegExp().exec(input);
873
+ }
874
+ explain() {
875
+ return explainPattern(this.ast);
876
+ }
877
+ diagnose(input) {
878
+ return diagnose(this.ast, input, this.compileOpts);
879
+ }
880
+ analyze() {
881
+ return analyzePattern(this.ast);
882
+ }
883
+ toJSON() {
884
+ return serializePattern(this.ast);
885
+ }
886
+ toJSONString(space) {
887
+ return patternToJsonString(this.ast, space);
888
+ }
889
+ };
890
+
891
+ // src/builder.ts
892
+ var MatchBuilder = class {
893
+ constructor(defaults = {}) {
894
+ this.defaults = defaults;
895
+ }
896
+ defaults;
897
+ parts = [];
898
+ /** Anchor start (`^`). */
899
+ start() {
900
+ this.parts.push(start());
901
+ return this;
902
+ }
903
+ /** Anchor end (`$`). */
904
+ end() {
905
+ this.parts.push(end());
906
+ return this;
907
+ }
908
+ boundary() {
909
+ this.parts.push(wordBoundary());
910
+ return this;
911
+ }
912
+ /** Literal text segment (escaped on compile). */
913
+ text(value) {
914
+ this.parts.push(literal(value));
915
+ return this;
916
+ }
917
+ /** Alias of `text`. */
918
+ literal(value) {
919
+ return this.text(value);
920
+ }
921
+ dash() {
922
+ this.parts.push(dash());
923
+ return this;
924
+ }
925
+ /** Append any composed pattern fragment. */
926
+ take(fragment) {
927
+ this.parts.push(fragment);
928
+ return this;
929
+ }
930
+ digit() {
931
+ this.parts.push(digit());
932
+ return this;
933
+ }
934
+ lettersUpper() {
935
+ this.parts.push(letter({ case: "upper" }));
936
+ return this;
937
+ }
938
+ lettersLower() {
939
+ this.parts.push(letter({ case: "lower" }));
940
+ return this;
941
+ }
942
+ named(name, inner) {
943
+ this.parts.push(namedGroup(name, inner));
944
+ return this;
945
+ }
946
+ build() {
947
+ return seq(...this.parts);
948
+ }
949
+ compile(options = {}) {
950
+ const merged = {
951
+ ...options,
952
+ flags: { ...this.defaults.flags, ...options.flags }
953
+ };
954
+ return new CompiledPattern(this.build(), merged);
955
+ }
956
+ };
957
+ function match(opts) {
958
+ return new MatchBuilder(opts ?? {});
959
+ }
960
+ function regex(opts) {
961
+ return match(opts);
962
+ }
963
+
964
+ // src/presets.ts
965
+ var presets = {
966
+ /** RFC 4122 UUID (case-insensitive hex). Version nibble not enforced here. */
967
+ uuid() {
968
+ return seq(
969
+ start(),
970
+ hexDigit().exactly(8),
971
+ literal("-"),
972
+ repeat(seq(hexDigit().exactly(4), literal("-")), 3, 3),
973
+ hexDigit().exactly(12),
974
+ end()
975
+ );
976
+ },
977
+ /** Conservative ASCII slug: lowercase letters, digits, single dashes between segments. */
978
+ slug() {
979
+ const segment = repeat(alt(letter({ case: "lower" }), digit()), 1, Number.POSITIVE_INFINITY);
980
+ return seq(
981
+ start(),
982
+ segment,
983
+ repeat(seq(dash(), segment), 0, Number.POSITIVE_INFINITY),
984
+ end()
985
+ );
986
+ },
987
+ hexColor(opts = {}) {
988
+ const modes = [];
989
+ if (opts.short !== false) modes.push(seq(literal("#"), hexDigit().exactly(3)));
990
+ modes.push(seq(literal("#"), hexDigit().exactly(6)));
991
+ if (opts.alpha) modes.push(seq(literal("#"), hexDigit().exactly(8)));
992
+ const body = modes.length === 1 ? modes[0] : alt(...modes);
993
+ return seq(start(), body, end());
994
+ }
995
+ };
996
+ // Annotate the CommonJS export names for ESM import in node:
997
+ 0 && (module.exports = {
998
+ CompiledPattern,
999
+ MatchBuilder,
1000
+ PATTERN_SCHEMA_VERSION,
1001
+ alt,
1002
+ analyzePattern,
1003
+ anyChar,
1004
+ booleanLiteral,
1005
+ compile,
1006
+ compilePattern,
1007
+ dash,
1008
+ deserializePattern,
1009
+ diagnose,
1010
+ digit,
1011
+ dot,
1012
+ end,
1013
+ explainPattern,
1014
+ hexDigit,
1015
+ integer,
1016
+ letter,
1017
+ literal,
1018
+ match,
1019
+ namedGroup,
1020
+ nonCapturing,
1021
+ optimize,
1022
+ optional,
1023
+ patternFromJsonString,
1024
+ patternToJsonString,
1025
+ presets,
1026
+ raw,
1027
+ regex,
1028
+ repeat,
1029
+ seq,
1030
+ serializePattern,
1031
+ start,
1032
+ toRegExp,
1033
+ underscore,
1034
+ whitespace,
1035
+ word,
1036
+ wordBoundary
1037
+ });