novac 2.0.1 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/README.md +1574 -597
- package/bin/novac +468 -171
- package/bin/nvc +522 -0
- package/bin/nvml +78 -17
- package/demo.nv +0 -0
- package/demo_builtins.nv +0 -0
- package/demo_http.nv +0 -0
- package/examples/bf.nv +69 -0
- package/examples/math.nv +21 -0
- package/kits/birdAPI/kitdef.js +954 -0
- package/kits/kitRNG/kitdef.js +740 -0
- package/kits/kitSSH/kitdef.js +1272 -0
- package/kits/kitadb/kitdef.js +606 -0
- package/kits/kitai/kitdef.js +2185 -0
- package/kits/kitansi/kitdef.js +1402 -0
- package/kits/kitcanvas/kitdef.js +914 -0
- package/kits/kitclippy/kitdef.js +925 -0
- package/kits/kitformat/kitdef.js +1485 -0
- package/kits/kitgps/kitdef.js +1862 -0
- package/kits/kitlibproc/kitdef.js +3 -2
- package/kits/kitmatrix/ex.js +19 -0
- package/kits/kitmatrix/kitdef.js +960 -0
- package/kits/kitmorse/kitdef.js +229 -0
- package/kits/kitmpatch/kitdef.js +906 -0
- package/kits/kitnet/kitdef.js +1401 -0
- package/kits/kitnovacweb/README.md +1416 -143
- package/kits/kitnovacweb/kitdef.js +92 -2
- package/kits/kitnovacweb/nvml/executor.js +578 -176
- package/kits/kitnovacweb/nvml/index.js +2 -2
- package/kits/kitnovacweb/nvml/lexer.js +72 -69
- package/kits/kitnovacweb/nvml/parser.js +328 -159
- package/kits/kitnovacweb/nvml/renderer.js +770 -270
- package/kits/kitparse/kitdef.js +1688 -0
- package/kits/kitproto/kitdef.js +613 -0
- package/kits/kitqr/kitdef.js +637 -0
- package/kits/kitregex++/kitdef.js +1353 -0
- package/kits/kitrequire/kitdef.js +1599 -0
- package/kits/kitx11/kitdef.js +1 -0
- package/kits/kitx11/kitx11.js +2472 -0
- package/kits/kitx11/kitx11_conn.js +948 -0
- package/kits/kitx11/kitx11_worker.js +121 -0
- package/kits/libtea/kitdef.js +2691 -0
- package/kits/libterm/ex.js +285 -0
- package/kits/libterm/kitdef.js +1927 -0
- package/novac/LICENSE +21 -0
- package/novac/README.md +1823 -0
- package/novac/bin/novac +950 -0
- package/novac/bin/nvc +522 -0
- package/novac/bin/nvml +542 -0
- package/novac/demo.nv +245 -0
- package/novac/demo_builtins.nv +209 -0
- package/novac/demo_http.nv +62 -0
- package/novac/examples/bf.nv +69 -0
- package/novac/examples/math.nv +21 -0
- package/novac/kits/kitai/kitdef.js +2185 -0
- package/novac/kits/kitansi/kitdef.js +1402 -0
- package/novac/kits/kitformat/kitdef.js +1485 -0
- package/novac/kits/kitgps/kitdef.js +1862 -0
- package/novac/kits/kitlibfs/kitdef.js +231 -0
- package/{examples/example-project/nova_modules → novac/kits}/kitlibproc/kitdef.js +3 -2
- package/novac/kits/kitmatrix/ex.js +19 -0
- package/novac/kits/kitmatrix/kitdef.js +960 -0
- package/novac/kits/kitmpatch/kitdef.js +906 -0
- package/novac/kits/kitnovacweb/README.md +1572 -0
- package/novac/kits/kitnovacweb/demo.nv +12 -0
- package/novac/kits/kitnovacweb/demo.nvml +71 -0
- package/novac/kits/kitnovacweb/index.nova +12 -0
- package/novac/kits/kitnovacweb/kitdef.js +692 -0
- package/novac/kits/kitnovacweb/nova.kit.json +8 -0
- package/novac/kits/kitnovacweb/nvml/executor.js +739 -0
- package/novac/kits/kitnovacweb/nvml/index.js +67 -0
- package/novac/kits/kitnovacweb/nvml/lexer.js +263 -0
- package/novac/kits/kitnovacweb/nvml/parser.js +508 -0
- package/novac/kits/kitnovacweb/nvml/renderer.js +924 -0
- package/novac/kits/kitparse/kitdef.js +1688 -0
- package/novac/kits/kitregex++/kitdef.js +1353 -0
- package/novac/kits/kitrequire/kitdef.js +1599 -0
- package/novac/kits/kitx11/kitdef.js +1 -0
- package/novac/kits/kitx11/kitx11.js +2472 -0
- package/novac/kits/kitx11/kitx11_conn.js +948 -0
- package/novac/kits/kitx11/kitx11_worker.js +121 -0
- package/novac/kits/libtea/tf.js +2691 -0
- package/novac/kits/libterm/ex.js +285 -0
- package/novac/kits/libterm/kitdef.js +1927 -0
- package/novac/node_modules/chalk/license +9 -0
- package/novac/node_modules/chalk/package.json +83 -0
- package/novac/node_modules/chalk/readme.md +297 -0
- package/novac/node_modules/chalk/source/index.d.ts +325 -0
- package/novac/node_modules/chalk/source/index.js +225 -0
- package/novac/node_modules/chalk/source/utilities.js +33 -0
- package/novac/node_modules/chalk/source/vendor/ansi-styles/index.d.ts +236 -0
- package/novac/node_modules/chalk/source/vendor/ansi-styles/index.js +223 -0
- package/novac/node_modules/chalk/source/vendor/supports-color/browser.d.ts +1 -0
- package/novac/node_modules/chalk/source/vendor/supports-color/browser.js +34 -0
- package/novac/node_modules/chalk/source/vendor/supports-color/index.d.ts +55 -0
- package/novac/node_modules/chalk/source/vendor/supports-color/index.js +190 -0
- package/novac/node_modules/commander/LICENSE +22 -0
- package/novac/node_modules/commander/Readme.md +1176 -0
- package/novac/node_modules/commander/esm.mjs +16 -0
- package/novac/node_modules/commander/index.js +24 -0
- package/novac/node_modules/commander/lib/argument.js +150 -0
- package/novac/node_modules/commander/lib/command.js +2777 -0
- package/novac/node_modules/commander/lib/error.js +39 -0
- package/novac/node_modules/commander/lib/help.js +747 -0
- package/novac/node_modules/commander/lib/option.js +380 -0
- package/novac/node_modules/commander/lib/suggestSimilar.js +101 -0
- package/novac/node_modules/commander/package-support.json +19 -0
- package/novac/node_modules/commander/package.json +82 -0
- package/novac/node_modules/commander/typings/esm.d.mts +3 -0
- package/novac/node_modules/commander/typings/index.d.ts +1113 -0
- package/novac/node_modules/node-addon-api/LICENSE.md +9 -0
- package/novac/node_modules/node-addon-api/README.md +95 -0
- package/novac/node_modules/node-addon-api/common.gypi +21 -0
- package/novac/node_modules/node-addon-api/except.gypi +25 -0
- package/novac/node_modules/node-addon-api/index.js +14 -0
- package/novac/node_modules/node-addon-api/napi-inl.deprecated.h +186 -0
- package/novac/node_modules/node-addon-api/napi-inl.h +7165 -0
- package/novac/node_modules/node-addon-api/napi.h +3364 -0
- package/novac/node_modules/node-addon-api/node_addon_api.gyp +42 -0
- package/novac/node_modules/node-addon-api/node_api.gyp +9 -0
- package/novac/node_modules/node-addon-api/noexcept.gypi +26 -0
- package/novac/node_modules/node-addon-api/package-support.json +21 -0
- package/novac/node_modules/node-addon-api/package.json +480 -0
- package/novac/node_modules/node-addon-api/tools/README.md +73 -0
- package/novac/node_modules/node-addon-api/tools/check-napi.js +99 -0
- package/novac/node_modules/node-addon-api/tools/clang-format.js +71 -0
- package/novac/node_modules/node-addon-api/tools/conversion.js +301 -0
- package/novac/node_modules/serialize-javascript/LICENSE +27 -0
- package/novac/node_modules/serialize-javascript/README.md +149 -0
- package/novac/node_modules/serialize-javascript/index.js +297 -0
- package/novac/node_modules/serialize-javascript/package.json +33 -0
- package/novac/package.json +27 -0
- package/novac/scripts/update-bin.js +24 -0
- package/novac/src/core/bstd.js +1035 -0
- package/novac/src/core/config.js +155 -0
- package/novac/src/core/describe.js +187 -0
- package/novac/src/core/emitter.js +499 -0
- package/novac/src/core/error.js +86 -0
- package/novac/src/core/executor.js +5606 -0
- package/novac/src/core/formatter.js +686 -0
- package/novac/src/core/lexer.js +1026 -0
- package/novac/src/core/nova_builtins.js +717 -0
- package/novac/src/core/nova_thread_worker.js +166 -0
- package/novac/src/core/parser.js +2181 -0
- package/novac/src/core/types.js +112 -0
- package/novac/src/index.js +28 -0
- package/novac/src/runtime/stdlib.js +244 -0
- package/package.json +6 -3
- package/scripts/update-bin.js +0 -0
- package/src/core/bstd.js +838 -362
- package/src/core/executor.js +2578 -170
- package/src/core/lexer.js +502 -54
- package/src/core/nova_builtins.js +21 -3
- package/src/core/parser.js +413 -72
- package/src/core/types.js +30 -2
- package/src/index.js +0 -0
- package/examples/example-project/README.md +0 -3
- package/examples/example-project/src/main.nova +0 -3
- package/src/core/environment.js +0 -0
- /package/{examples/example-project/bin/example-project.nv → novac/node_modules/node-addon-api/nothing.c} +0 -0
|
@@ -0,0 +1,1353 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// ============================================================
|
|
4
|
+
// kitregex++.js — Regex, but 3000% better.
|
|
5
|
+
// module.exports = { kitdef: { ...all exports } }
|
|
6
|
+
// ============================================================
|
|
7
|
+
|
|
8
|
+
// ────────────────────────────────────────────────────────────
|
|
9
|
+
// SECTION 1: INTERNAL HELPERS
|
|
10
|
+
// ────────────────────────────────────────────────────────────
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Escape a string for literal use inside a RegExp.
|
|
14
|
+
* @param {string} str
|
|
15
|
+
* @returns {string}
|
|
16
|
+
*/
|
|
17
|
+
function escapeRegex(str) {
|
|
18
|
+
return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Coerce a value to a RegExp.
|
|
23
|
+
* Accepts: RegExp | string (raw pattern) | KitPattern
|
|
24
|
+
*/
|
|
25
|
+
function _toRegex(pat, flags = '') {
|
|
26
|
+
if (pat instanceof RegExp) return pat;
|
|
27
|
+
if (pat instanceof KitPattern) return pat.toRegex();
|
|
28
|
+
if (typeof pat === 'string') return new RegExp(pat, flags);
|
|
29
|
+
throw new TypeError('kitregex++: pattern must be RegExp, string, or KitPattern');
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Merge regex flags, deduplicating.
|
|
34
|
+
*/
|
|
35
|
+
function _mergeFlags(...flagSets) {
|
|
36
|
+
return [...new Set(flagSets.join('').split(''))].join('');
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// ────────────────────────────────────────────────────────────
|
|
40
|
+
// SECTION 2: PATTERN REGISTRY
|
|
41
|
+
// ────────────────────────────────────────────────────────────
|
|
42
|
+
|
|
43
|
+
const _registry = new Map();
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Register a named pattern for later reuse.
|
|
47
|
+
* @param {string} name
|
|
48
|
+
* @param {RegExp|string|KitPattern} pattern
|
|
49
|
+
* @param {string} [description]
|
|
50
|
+
*/
|
|
51
|
+
function registerPattern(name, pattern, description = '') {
|
|
52
|
+
_registry.set(name, { pattern, description });
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* Retrieve a registered pattern by name.
|
|
57
|
+
* @param {string} name
|
|
58
|
+
* @returns {RegExp|string|KitPattern}
|
|
59
|
+
*/
|
|
60
|
+
function getPattern(name) {
|
|
61
|
+
const entry = _registry.get(name);
|
|
62
|
+
if (!entry) throw new Error(`kitregex++: no pattern registered as "${name}"`);
|
|
63
|
+
return entry.pattern;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* List all registered pattern names.
|
|
68
|
+
* @returns {Array<{name: string, description: string}>}
|
|
69
|
+
*/
|
|
70
|
+
function listPatterns() {
|
|
71
|
+
return [..._registry.entries()].map(([name, { description }]) => ({ name, description }));
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Unregister a named pattern.
|
|
76
|
+
* @param {string} name
|
|
77
|
+
*/
|
|
78
|
+
function unregisterPattern(name) {
|
|
79
|
+
_registry.delete(name);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
// ────────────────────────────────────────────────────────────
|
|
83
|
+
// SECTION 3: BUILT-IN PATTERN PACKS
|
|
84
|
+
// ────────────────────────────────────────────────────────────
|
|
85
|
+
|
|
86
|
+
/**
|
|
87
|
+
* Raw pattern sources (as strings, so they can be composed).
|
|
88
|
+
* Accessed via Patterns.PACK.NAME
|
|
89
|
+
*/
|
|
90
|
+
const Patterns = {
|
|
91
|
+
|
|
92
|
+
// ── Internet ───────────────────────────────────────────
|
|
93
|
+
internet: {
|
|
94
|
+
EMAIL: /^[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}$/,
|
|
95
|
+
EMAIL_LOOSE: /[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}/g,
|
|
96
|
+
URL: /https?:\/\/(www\.)?[-a-zA-Z0-9@:%._+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_+.~#?&/=]*)/,
|
|
97
|
+
URL_LOOSE: /https?:\/\/[^\s"'>]+/g,
|
|
98
|
+
URL_ANY: /(https?:\/\/|www\.)[^\s"'>]+/g,
|
|
99
|
+
IPV4: /\b((25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(25[0-5]|2[0-4]\d|[01]?\d\d?)\b/,
|
|
100
|
+
IPV6: /([0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}/,
|
|
101
|
+
MAC: /\b([0-9a-fA-F]{2}[:\-]){5}[0-9a-fA-F]{2}\b/,
|
|
102
|
+
DOMAIN: /\b([a-zA-Z0-9\-]+\.)+[a-zA-Z]{2,}\b/,
|
|
103
|
+
SLUG: /^[a-z0-9]+(?:-[a-z0-9]+)*$/,
|
|
104
|
+
PORT: /\b(6553[0-5]|655[0-2]\d|65[0-4]\d{2}|6[0-4]\d{3}|[1-5]\d{4}|[1-9]\d{1,3}|[1-9])\b/,
|
|
105
|
+
QUERY_STRING: /\?([^#]*)/,
|
|
106
|
+
HASH_FRAG: /#(.*)/,
|
|
107
|
+
},
|
|
108
|
+
|
|
109
|
+
// ── Dates & Times ─────────────────────────────────────
|
|
110
|
+
datetime: {
|
|
111
|
+
DATE_ISO: /\b\d{4}-(?:0[1-9]|1[0-2])-(?:0[1-9]|[12]\d|3[01])\b/,
|
|
112
|
+
DATE_US: /\b(?:0?[1-9]|1[0-2])\/(?:0?[1-9]|[12]\d|3[01])\/\d{2,4}\b/,
|
|
113
|
+
DATE_EU: /\b(?:0?[1-9]|[12]\d|3[01])\.(?:0?[1-9]|1[0-2])\.\d{2,4}\b/,
|
|
114
|
+
TIME_24: /\b([01]?\d|2[0-3]):([0-5]\d)(?::([0-5]\d))?\b/,
|
|
115
|
+
TIME_12: /\b(0?[1-9]|1[0-2]):([0-5]\d)(?::([0-5]\d))?\s*([AaPp][Mm])\b/,
|
|
116
|
+
DATETIME_ISO: /\d{4}-(?:0[1-9]|1[0-2])-(?:0[1-9]|[12]\d|3[01])[T ]\d{2}:\d{2}(?::\d{2})?(?:\.\d+)?(?:Z|[+-]\d{2}:\d{2})?/,
|
|
117
|
+
UNIX_TS: /\b\d{10}(?:\d{3})?\b/,
|
|
118
|
+
YEAR: /\b(19|20)\d{2}\b/,
|
|
119
|
+
MONTH_NAME: /\b(January|February|March|April|May|June|July|August|September|October|November|December|Jan|Feb|Mar|Apr|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\b/i,
|
|
120
|
+
DURATION_ISO: /P(?:\d+Y)?(?:\d+M)?(?:\d+D)?(?:T(?:\d+H)?(?:\d+M)?(?:\d+S)?)?/,
|
|
121
|
+
},
|
|
122
|
+
|
|
123
|
+
// ── Numbers ───────────────────────────────────────────
|
|
124
|
+
numbers: {
|
|
125
|
+
INTEGER: /^-?\d+$/,
|
|
126
|
+
FLOAT: /^-?\d+\.\d+$/,
|
|
127
|
+
NUMBER: /^-?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?$/,
|
|
128
|
+
HEX: /^0x[0-9a-fA-F]+$/,
|
|
129
|
+
OCTAL: /^0o[0-7]+$/,
|
|
130
|
+
BINARY: /^0b[01]+$/,
|
|
131
|
+
CURRENCY_VAL: /[+-]?[$€£¥]?\s?\d{1,3}(?:[,. ]\d{3})*(?:[.,]\d{2})?/,
|
|
132
|
+
PERCENT: /\b\d+(?:\.\d+)?%/,
|
|
133
|
+
SCIENTIFIC: /-?\d+(?:\.\d+)?[eE][+-]?\d+/,
|
|
134
|
+
FRACTION: /\b\d+\/\d+\b/,
|
|
135
|
+
RANGE: /\b\d+\s*[-–—]\s*\d+\b/,
|
|
136
|
+
},
|
|
137
|
+
|
|
138
|
+
// ── Identifiers / Code ────────────────────────────────
|
|
139
|
+
code: {
|
|
140
|
+
IDENTIFIER: /\b[a-zA-Z_$][a-zA-Z0-9_$]*\b/,
|
|
141
|
+
CAMEL_CASE: /\b[a-z][a-zA-Z0-9]*[A-Z][a-zA-Z0-9]*\b/,
|
|
142
|
+
PASCAL_CASE: /\b[A-Z][a-zA-Z0-9]+\b/,
|
|
143
|
+
SNAKE_CASE: /\b[a-z]+(_[a-z0-9]+)+\b/,
|
|
144
|
+
KEBAB_CASE: /\b[a-z]+(-[a-z0-9]+)+\b/,
|
|
145
|
+
CONST_CASE: /\b[A-Z][A-Z0-9]*(_[A-Z0-9]+)+\b/,
|
|
146
|
+
HEX_COLOR: /#([0-9a-fA-F]{3}|[0-9a-fA-F]{6}|[0-9a-fA-F]{8})\b/,
|
|
147
|
+
CSS_CLASS: /\.[a-zA-Z_-][a-zA-Z0-9_-]*/g,
|
|
148
|
+
CSS_ID: /#[a-zA-Z_-][a-zA-Z0-9_-]*/g,
|
|
149
|
+
HTML_TAG: /<\/?[a-zA-Z][a-zA-Z0-9]*(?:\s[^>]*)?\/?>/g,
|
|
150
|
+
HTML_COMMENT: /<!--[\s\S]*?-->/g,
|
|
151
|
+
JS_COMMENT_LINE: /\/\/[^\n]*/g,
|
|
152
|
+
JS_COMMENT_BLOCK: /\/\*[\s\S]*?\*\//g,
|
|
153
|
+
SEMVER: /\bv?(\d+)\.(\d+)\.(\d+)(?:-([a-zA-Z0-9.\-]+))?(?:\+([a-zA-Z0-9.\-]+))?\b/,
|
|
154
|
+
UUID: /\b[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}\b/i,
|
|
155
|
+
JWT: /\beyJ[A-Za-z0-9\-_]+\.[A-Za-z0-9\-_]+\.[A-Za-z0-9\-_]+\b/,
|
|
156
|
+
TEMPLATE_LIT: /`[^`]*`/g,
|
|
157
|
+
IMPORT_STMT: /import\s+(?:[\w*{},\s]+from\s+)?['"][^'"]+['"]/g,
|
|
158
|
+
REQUIRE_STMT: /require\s*\(\s*['"][^'"]+['"]\s*\)/g,
|
|
159
|
+
},
|
|
160
|
+
|
|
161
|
+
// ── Personal / Contact ────────────────────────────────
|
|
162
|
+
personal: {
|
|
163
|
+
PHONE_US: /\b(?:\+1\s?)?(?:\(\d{3}\)|\d{3})[\s.\-]?\d{3}[\s.\-]?\d{4}\b/,
|
|
164
|
+
PHONE_INTL: /\+\d{1,3}[\s\-]?\d{1,4}[\s\-]?\d{1,4}[\s\-]?\d{1,9}/,
|
|
165
|
+
SSN: /\b\d{3}-\d{2}-\d{4}\b/,
|
|
166
|
+
ZIP_US: /\b\d{5}(?:-\d{4})?\b/,
|
|
167
|
+
ZIP_UK: /\b[A-Z]{1,2}\d[A-Z\d]?\s?\d[A-Z]{2}\b/i,
|
|
168
|
+
POSTAL_CA: /\b[A-Z]\d[A-Z]\s?\d[A-Z]\d\b/i,
|
|
169
|
+
CREDIT_CARD: /\b(?:4\d{12}(?:\d{3})?|5[1-5]\d{14}|3[47]\d{13}|3(?:0[0-5]|[68]\d)\d{11}|6(?:011|5\d{2})\d{12})\b/,
|
|
170
|
+
USERNAME: /\b[a-zA-Z0-9_]{3,20}\b/,
|
|
171
|
+
HASHTAG: /#[a-zA-Z]\w*/g,
|
|
172
|
+
MENTION: /@[a-zA-Z0-9_.]+/g,
|
|
173
|
+
},
|
|
174
|
+
|
|
175
|
+
// ── Filesystem / Paths ────────────────────────────────
|
|
176
|
+
fs: {
|
|
177
|
+
FILE_EXT: /\.([a-zA-Z0-9]+)$/,
|
|
178
|
+
FILEPATH_WIN: /[a-zA-Z]:\\(?:[^\\/:*?"<>|\r\n]+\\)*[^\\/:*?"<>|\r\n]*/,
|
|
179
|
+
FILEPATH_UNIX:/(?:\/[^/\0]+)+\/?/,
|
|
180
|
+
MIME_TYPE: /\b[a-z]+\/[a-z0-9\-+.]+\b/,
|
|
181
|
+
ENV_VAR: /\$\{?[A-Z_][A-Z0-9_]*\}?/g,
|
|
182
|
+
},
|
|
183
|
+
|
|
184
|
+
// ── Text / Prose ──────────────────────────────────────
|
|
185
|
+
text: {
|
|
186
|
+
WORD: /\b\w+\b/g,
|
|
187
|
+
SENTENCE: /[^.!?]*[.!?]+/g,
|
|
188
|
+
PARAGRAPH: /[^\n]+(?:\n(?!\n)[^\n]+)*/g,
|
|
189
|
+
WHITESPACE: /\s+/g,
|
|
190
|
+
BLANK_LINE: /^\s*$/gm,
|
|
191
|
+
REPEATED_WS: /[ \t]{2,}/g,
|
|
192
|
+
QUOTED_SINGLE:/'[^'\\]*(?:\\.[^'\\]*)*'/g,
|
|
193
|
+
QUOTED_DOUBLE:/"[^"\\]*(?:\\.[^"\\]*)* "/g,
|
|
194
|
+
EMOJI: /[\u{1F300}-\u{1FFFF}]/gu,
|
|
195
|
+
DIACRITICS: /[\u0300-\u036f]/g,
|
|
196
|
+
NON_ASCII: /[^\x00-\x7F]/g,
|
|
197
|
+
CONTROL_CHARS:/[\x00-\x1F\x7F]/g,
|
|
198
|
+
NEWLINES: /\r?\n/g,
|
|
199
|
+
CRLF: /\r\n/g,
|
|
200
|
+
},
|
|
201
|
+
};
|
|
202
|
+
|
|
203
|
+
// Auto-register all built-in patterns into the registry
|
|
204
|
+
(function _autoRegister() {
|
|
205
|
+
for (const [pack, entries] of Object.entries(Patterns)) {
|
|
206
|
+
for (const [name, pat] of Object.entries(entries)) {
|
|
207
|
+
registerPattern(`${pack}.${name}`, pat, `Built-in: ${pack} / ${name}`);
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
})();
|
|
211
|
+
|
|
212
|
+
// ────────────────────────────────────────────────────────────
|
|
213
|
+
// SECTION 4: FLAG PRESETS
|
|
214
|
+
// ────────────────────────────────────────────────────────────
|
|
215
|
+
|
|
216
|
+
/**
|
|
217
|
+
* Commonly used flag combinations.
|
|
218
|
+
*/
|
|
219
|
+
const Flags = {
|
|
220
|
+
DEFAULT: '',
|
|
221
|
+
GLOBAL: 'g',
|
|
222
|
+
MULTILINE: 'm',
|
|
223
|
+
INSENSITIVE: 'i',
|
|
224
|
+
DOTALL: 's',
|
|
225
|
+
UNICODE: 'u',
|
|
226
|
+
STICKY: 'y',
|
|
227
|
+
GM: 'gm',
|
|
228
|
+
GI: 'gi',
|
|
229
|
+
GIM: 'gim',
|
|
230
|
+
GS: 'gs',
|
|
231
|
+
GIMS: 'gims',
|
|
232
|
+
GISU: 'gisu',
|
|
233
|
+
};
|
|
234
|
+
|
|
235
|
+
/**
|
|
236
|
+
* Apply a flag preset (or raw flags) to a regex.
|
|
237
|
+
* @param {RegExp|string} pattern
|
|
238
|
+
* @param {string} flags - flag string or key from Flags
|
|
239
|
+
* @returns {RegExp}
|
|
240
|
+
*/
|
|
241
|
+
function withFlags(pattern, flags) {
|
|
242
|
+
const f = Flags[flags] !== undefined ? Flags[flags] : flags;
|
|
243
|
+
const r = _toRegex(pattern);
|
|
244
|
+
return new RegExp(r.source, f);
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
/**
|
|
248
|
+
* Add flags to an existing regex without removing existing ones.
|
|
249
|
+
* @param {RegExp} re
|
|
250
|
+
* @param {string} flags
|
|
251
|
+
* @returns {RegExp}
|
|
252
|
+
*/
|
|
253
|
+
function addFlags(re, flags) {
|
|
254
|
+
return new RegExp(re.source, _mergeFlags(re.flags, flags));
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
/**
|
|
258
|
+
* Remove specific flags from a regex.
|
|
259
|
+
* @param {RegExp} re
|
|
260
|
+
* @param {string} flags - flags to remove
|
|
261
|
+
* @returns {RegExp}
|
|
262
|
+
*/
|
|
263
|
+
function removeFlags(re, flags) {
|
|
264
|
+
const newFlags = re.flags.split('').filter(f => !flags.includes(f)).join('');
|
|
265
|
+
return new RegExp(re.source, newFlags);
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
// ────────────────────────────────────────────────────────────
|
|
269
|
+
// SECTION 5: BUILDER API (KitPattern)
|
|
270
|
+
// ────────────────────────────────────────────────────────────
|
|
271
|
+
|
|
272
|
+
/**
|
|
273
|
+
* KitPattern — human-readable, chainable regex builder.
|
|
274
|
+
*
|
|
275
|
+
* Build patterns without writing raw regex strings.
|
|
276
|
+
*
|
|
277
|
+
* @example
|
|
278
|
+
* const pat = new KitPattern()
|
|
279
|
+
* .startAnchor()
|
|
280
|
+
* .literal('Hello')
|
|
281
|
+
* .whitespace()
|
|
282
|
+
* .capture(p => p.word().oneOrMore())
|
|
283
|
+
* .endAnchor()
|
|
284
|
+
* .flags('i')
|
|
285
|
+
* .toRegex();
|
|
286
|
+
*/
|
|
287
|
+
class KitPattern {
|
|
288
|
+
constructor() {
|
|
289
|
+
this._parts = [];
|
|
290
|
+
this._flags = '';
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
/** Clone this builder. */
|
|
294
|
+
clone() {
|
|
295
|
+
const p = new KitPattern();
|
|
296
|
+
p._parts = [...this._parts];
|
|
297
|
+
p._flags = this._flags;
|
|
298
|
+
return p;
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
// ── Anchors ─────────────────────────────────────────
|
|
302
|
+
|
|
303
|
+
startAnchor() { this._parts.push('^'); return this; }
|
|
304
|
+
endAnchor() { this._parts.push('$'); return this; }
|
|
305
|
+
wordBoundary() { this._parts.push('\\b'); return this; }
|
|
306
|
+
nonWordBoundary() { this._parts.push('\\B'); return this; }
|
|
307
|
+
|
|
308
|
+
// ── Literals ─────────────────────────────────────────
|
|
309
|
+
|
|
310
|
+
/** Match a literal string (auto-escaped). */
|
|
311
|
+
literal(str) { this._parts.push(escapeRegex(str)); return this; }
|
|
312
|
+
/** Raw regex fragment (not escaped). */
|
|
313
|
+
raw(str) { this._parts.push(str); return this; }
|
|
314
|
+
|
|
315
|
+
// ── Character classes ────────────────────────────────
|
|
316
|
+
|
|
317
|
+
digit() { this._parts.push('\\d'); return this; }
|
|
318
|
+
nonDigit() { this._parts.push('\\D'); return this; }
|
|
319
|
+
word() { this._parts.push('\\w'); return this; }
|
|
320
|
+
nonWord() { this._parts.push('\\W'); return this; }
|
|
321
|
+
whitespace() { this._parts.push('\\s'); return this; }
|
|
322
|
+
nonWhitespace() { this._parts.push('\\S'); return this; }
|
|
323
|
+
anyChar() { this._parts.push('.'); return this; }
|
|
324
|
+
tab() { this._parts.push('\\t'); return this; }
|
|
325
|
+
newline() { this._parts.push('\\n'); return this; }
|
|
326
|
+
carriageReturn() { this._parts.push('\\r'); return this; }
|
|
327
|
+
|
|
328
|
+
/**
|
|
329
|
+
* Custom character class, e.g. charClass('a-z0-9_')
|
|
330
|
+
* @param {string} chars
|
|
331
|
+
* @param {boolean} [negate=false]
|
|
332
|
+
*/
|
|
333
|
+
charClass(chars, negate = false) {
|
|
334
|
+
this._parts.push(`[${negate ? '^' : ''}${chars}]`);
|
|
335
|
+
return this;
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
/** Any one of the given characters. */
|
|
339
|
+
oneOf(...chars) {
|
|
340
|
+
this._parts.push(`[${chars.map(escapeRegex).join('')}]`);
|
|
341
|
+
return this;
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
/** None of the given characters. */
|
|
345
|
+
noneOf(...chars) {
|
|
346
|
+
this._parts.push(`[^${chars.map(escapeRegex).join('')}]`);
|
|
347
|
+
return this;
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
// ── Quantifiers ──────────────────────────────────────
|
|
351
|
+
|
|
352
|
+
optional() { this._parts.push('?'); return this; }
|
|
353
|
+
zeroOrMore(lazy=false) { this._parts.push(lazy?'*?':'*'); return this; }
|
|
354
|
+
oneOrMore(lazy=false) { this._parts.push(lazy?'+?':'+'); return this; }
|
|
355
|
+
exactly(n) { this._parts.push(`{${n}}`); return this; }
|
|
356
|
+
atLeast(n) { this._parts.push(`{${n},}`); return this; }
|
|
357
|
+
between(min, max, lazy=false) {
|
|
358
|
+
this._parts.push(`{${min},${max}}${lazy?'?':''}`);
|
|
359
|
+
return this;
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
// ── Groups ────────────────────────────────────────────
|
|
363
|
+
|
|
364
|
+
/**
|
|
365
|
+
* Capturing group. Accepts a builder callback or a raw string.
|
|
366
|
+
* @param {function(KitPattern):KitPattern|string} inner
|
|
367
|
+
*/
|
|
368
|
+
capture(inner) {
|
|
369
|
+
const src = _resolveInner(inner);
|
|
370
|
+
this._parts.push(`(${src})`);
|
|
371
|
+
return this;
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
/**
|
|
375
|
+
* Named capturing group.
|
|
376
|
+
* @param {string} name
|
|
377
|
+
* @param {function(KitPattern):KitPattern|string} inner
|
|
378
|
+
*/
|
|
379
|
+
namedCapture(name, inner) {
|
|
380
|
+
const src = _resolveInner(inner);
|
|
381
|
+
this._parts.push(`(?<${name}>${src})`);
|
|
382
|
+
return this;
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
/**
|
|
386
|
+
* Non-capturing group.
|
|
387
|
+
*/
|
|
388
|
+
group(inner) {
|
|
389
|
+
const src = _resolveInner(inner);
|
|
390
|
+
this._parts.push(`(?:${src})`);
|
|
391
|
+
return this;
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
/**
|
|
395
|
+
* Alternation: match any of the given patterns.
|
|
396
|
+
* @param {...(function|string|KitPattern|RegExp)} alts
|
|
397
|
+
*/
|
|
398
|
+
either(...alts) {
|
|
399
|
+
const src = alts.map(a => _resolveInner(a)).join('|');
|
|
400
|
+
this._parts.push(`(?:${src})`);
|
|
401
|
+
return this;
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
// ── Lookahead / lookbehind ────────────────────────────
|
|
405
|
+
|
|
406
|
+
/** Positive lookahead. */
|
|
407
|
+
lookahead(inner) {
|
|
408
|
+
this._parts.push(`(?=${_resolveInner(inner)})`);
|
|
409
|
+
return this;
|
|
410
|
+
}
|
|
411
|
+
/** Negative lookahead. */
|
|
412
|
+
negativeLookahead(inner) {
|
|
413
|
+
this._parts.push(`(?!${_resolveInner(inner)})`);
|
|
414
|
+
return this;
|
|
415
|
+
}
|
|
416
|
+
/** Positive lookbehind. */
|
|
417
|
+
lookbehind(inner) {
|
|
418
|
+
this._parts.push(`(?<=${_resolveInner(inner)})`);
|
|
419
|
+
return this;
|
|
420
|
+
}
|
|
421
|
+
/** Negative lookbehind. */
|
|
422
|
+
negativeLookbehind(inner) {
|
|
423
|
+
this._parts.push(`(?<!${_resolveInner(inner)})`);
|
|
424
|
+
return this;
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
// ── Backreferences ────────────────────────────────────
|
|
428
|
+
|
|
429
|
+
/** Numeric backreference. */
|
|
430
|
+
backref(n) { this._parts.push(`\\${n}`); return this; }
|
|
431
|
+
/** Named backreference. */
|
|
432
|
+
namedBackref(name) { this._parts.push(`\\k<${name}>`); return this; }
|
|
433
|
+
|
|
434
|
+
// ── Composition ──────────────────────────────────────
|
|
435
|
+
|
|
436
|
+
/**
|
|
437
|
+
* Append another KitPattern, RegExp, or string.
|
|
438
|
+
* @param {KitPattern|RegExp|string} other
|
|
439
|
+
*/
|
|
440
|
+
append(other) {
|
|
441
|
+
if (other instanceof KitPattern) {
|
|
442
|
+
this._parts.push(...other._parts);
|
|
443
|
+
} else if (other instanceof RegExp) {
|
|
444
|
+
this._parts.push(other.source);
|
|
445
|
+
} else {
|
|
446
|
+
this._parts.push(String(other));
|
|
447
|
+
}
|
|
448
|
+
return this;
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
// ── Flags ─────────────────────────────────────────────
|
|
452
|
+
|
|
453
|
+
/**
|
|
454
|
+
* Set flags for the final regex.
|
|
455
|
+
* @param {string} flags - raw flags or key from Flags
|
|
456
|
+
*/
|
|
457
|
+
flags(flags) {
|
|
458
|
+
this._flags = Flags[flags] !== undefined ? Flags[flags] : flags;
|
|
459
|
+
return this;
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
/** Add flags without replacing existing. */
|
|
463
|
+
addFlags(flags) {
|
|
464
|
+
this._flags = _mergeFlags(this._flags, flags);
|
|
465
|
+
return this;
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
// ── Output ────────────────────────────────────────────
|
|
469
|
+
|
|
470
|
+
/** Get the assembled source string. */
|
|
471
|
+
get source() {
|
|
472
|
+
return this._parts.join('');
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
/** Build and return the final RegExp. */
|
|
476
|
+
toRegex() {
|
|
477
|
+
return new RegExp(this.source, this._flags);
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
/** Human-readable description of the pattern parts. */
|
|
481
|
+
describe() {
|
|
482
|
+
return `/${this.source}/${this._flags}`;
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
toString() { return this.describe(); }
|
|
486
|
+
}
|
|
487
|
+
|
|
488
|
+
function _resolveInner(inner) {
|
|
489
|
+
if (typeof inner === 'function') {
|
|
490
|
+
const p = new KitPattern();
|
|
491
|
+
const result = inner(p);
|
|
492
|
+
return (result instanceof KitPattern ? result : p).source;
|
|
493
|
+
}
|
|
494
|
+
if (inner instanceof KitPattern) return inner.source;
|
|
495
|
+
if (inner instanceof RegExp) return inner.source;
|
|
496
|
+
return String(inner);
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
// ────────────────────────────────────────────────────────────
|
|
500
|
+
// SECTION 6: COMPOSITION HELPERS
|
|
501
|
+
// ────────────────────────────────────────────────────────────
|
|
502
|
+
|
|
503
|
+
/**
|
|
504
|
+
* Concatenate multiple patterns into one.
|
|
505
|
+
* @param {...(RegExp|string|KitPattern)} patterns
|
|
506
|
+
* @param {string} [flags='']
|
|
507
|
+
* @returns {RegExp}
|
|
508
|
+
*/
|
|
509
|
+
function concat(...args) {
|
|
510
|
+
let flags = '';
|
|
511
|
+
const pats = [...args];
|
|
512
|
+
if (typeof pats[pats.length - 1] === 'string' && /^[gimsuy]*$/.test(pats[pats.length - 1]) && !(pats[pats.length - 1] instanceof RegExp)) {
|
|
513
|
+
flags = pats.pop();
|
|
514
|
+
}
|
|
515
|
+
const source = pats.map(p => {
|
|
516
|
+
if (p instanceof KitPattern) return p.source;
|
|
517
|
+
if (p instanceof RegExp) return p.source;
|
|
518
|
+
return String(p);
|
|
519
|
+
}).join('');
|
|
520
|
+
return new RegExp(source, flags);
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
/**
|
|
524
|
+
* Alternate: match any one of the given patterns.
|
|
525
|
+
* @param {...(RegExp|string|KitPattern)} patterns
|
|
526
|
+
* @param {string} [flags='']
|
|
527
|
+
* @returns {RegExp}
|
|
528
|
+
*/
|
|
529
|
+
function alternate(...args) {
|
|
530
|
+
let flags = '';
|
|
531
|
+
const pats = [...args];
|
|
532
|
+
if (typeof pats[pats.length - 1] === 'string' && /^[gimsuy]*$/.test(pats[pats.length - 1])) {
|
|
533
|
+
flags = pats.pop();
|
|
534
|
+
}
|
|
535
|
+
const source = pats.map(p => {
|
|
536
|
+
if (p instanceof KitPattern) return p.source;
|
|
537
|
+
if (p instanceof RegExp) return `(?:${p.source})`;
|
|
538
|
+
return `(?:${escapeRegex(String(p))})`;
|
|
539
|
+
}).join('|');
|
|
540
|
+
return new RegExp(source, flags);
|
|
541
|
+
}
|
|
542
|
+
|
|
543
|
+
/**
|
|
544
|
+
* Wrap a pattern in a non-capturing group with a quantifier.
|
|
545
|
+
* @param {RegExp|string|KitPattern} pattern
|
|
546
|
+
* @param {string} quantifier - e.g. '+', '*', '?', '{2,5}'
|
|
547
|
+
* @returns {RegExp}
|
|
548
|
+
*/
|
|
549
|
+
function quantify(pattern, quantifier, flags = '') {
|
|
550
|
+
const src = _resolveInner(pattern);
|
|
551
|
+
return new RegExp(`(?:${src})${quantifier}`, flags);
|
|
552
|
+
}
|
|
553
|
+
|
|
554
|
+
/**
|
|
555
|
+
* Wrap a pattern in a capturing group.
|
|
556
|
+
*/
|
|
557
|
+
function capture(pattern, flags = '') {
|
|
558
|
+
const src = _resolveInner(pattern);
|
|
559
|
+
return new RegExp(`(${src})`, flags);
|
|
560
|
+
}
|
|
561
|
+
|
|
562
|
+
/**
|
|
563
|
+
* Wrap a pattern in a named capturing group.
|
|
564
|
+
*/
|
|
565
|
+
function namedCapture(name, pattern, flags = '') {
|
|
566
|
+
const src = _resolveInner(pattern);
|
|
567
|
+
return new RegExp(`(?<${name}>${src})`, flags);
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
/**
|
|
571
|
+
* Make a pattern optional.
|
|
572
|
+
*/
|
|
573
|
+
function optional(pattern, flags = '') {
|
|
574
|
+
const src = _resolveInner(pattern);
|
|
575
|
+
return new RegExp(`(?:${src})?`, flags);
|
|
576
|
+
}
|
|
577
|
+
|
|
578
|
+
/**
|
|
579
|
+
* Wrap a pattern with anchors.
|
|
580
|
+
*/
|
|
581
|
+
function anchored(pattern, flags = '') {
|
|
582
|
+
const src = _resolveInner(pattern);
|
|
583
|
+
return new RegExp(`^(?:${src})$`, flags);
|
|
584
|
+
}
|
|
585
|
+
|
|
586
|
+
/**
|
|
587
|
+
* Surround a pattern with lookahead/lookbehind fences.
|
|
588
|
+
* @param {RegExp|string} pattern - the core pattern
|
|
589
|
+
* @param {object} [fences]
|
|
590
|
+
* @param {string} [fences.before] - positive lookbehind
|
|
591
|
+
* @param {string} [fences.after] - positive lookahead
|
|
592
|
+
*/
|
|
593
|
+
function fenced(pattern, fences = {}, flags = '') {
|
|
594
|
+
let src = _resolveInner(pattern);
|
|
595
|
+
if (fences.before) src = `(?<=${_resolveInner(fences.before)})${src}`;
|
|
596
|
+
if (fences.after) src = `${src}(?=${_resolveInner(fences.after)})`;
|
|
597
|
+
return new RegExp(src, flags);
|
|
598
|
+
}
|
|
599
|
+
|
|
600
|
+
// ────────────────────────────────────────────────────────────
|
|
601
|
+
// SECTION 7: EXTRACT / TRANSFORM / REPLACE
|
|
602
|
+
// ────────────────────────────────────────────────────────────
|
|
603
|
+
|
|
604
|
+
/**
|
|
605
|
+
* Extract all matches from a string.
|
|
606
|
+
* @param {string} input
|
|
607
|
+
* @param {RegExp|string|KitPattern} pattern
|
|
608
|
+
* @param {object} [options]
|
|
609
|
+
* @param {boolean} [options.groups=false] - include capture groups
|
|
610
|
+
* @param {boolean} [options.named=false] - include named groups
|
|
611
|
+
* @param {boolean} [options.index=false] - include match indices
|
|
612
|
+
* @returns {Array}
|
|
613
|
+
*/
|
|
614
|
+
function extract(input, pattern, options = {}) {
|
|
615
|
+
const re = addFlags(_toRegex(pattern), 'g');
|
|
616
|
+
const results = [];
|
|
617
|
+
let m;
|
|
618
|
+
const reWithIdx = options.index ? addFlags(re, 'd') : re;
|
|
619
|
+
while ((m = reWithIdx.exec(input)) !== null) {
|
|
620
|
+
const entry = { match: m[0] };
|
|
621
|
+
if (options.groups && m.length > 1) entry.groups = m.slice(1);
|
|
622
|
+
if (options.named && m.groups) entry.named = { ...m.groups };
|
|
623
|
+
if (options.index) entry.index = m.index;
|
|
624
|
+
results.push(entry);
|
|
625
|
+
}
|
|
626
|
+
return results;
|
|
627
|
+
}
|
|
628
|
+
|
|
629
|
+
/**
|
|
630
|
+
* Extract and transform matches in one pass.
|
|
631
|
+
* @param {string} input
|
|
632
|
+
* @param {RegExp|string|KitPattern} pattern
|
|
633
|
+
* @param {function(match: string, groups: string[], named: object, index: number): *} transform
|
|
634
|
+
* @returns {Array}
|
|
635
|
+
*/
|
|
636
|
+
function extractMap(input, pattern, transform) {
|
|
637
|
+
const re = addFlags(_toRegex(pattern), 'g');
|
|
638
|
+
const results = [];
|
|
639
|
+
let m;
|
|
640
|
+
while ((m = re.exec(input)) !== null) {
|
|
641
|
+
results.push(transform(m[0], m.slice(1), m.groups || {}, m.index));
|
|
642
|
+
}
|
|
643
|
+
return results;
|
|
644
|
+
}
|
|
645
|
+
|
|
646
|
+
/**
|
|
647
|
+
* Replace matches using a pattern + transform function or string.
|
|
648
|
+
* Essentially a power version of String.replace with richer callbacks.
|
|
649
|
+
*
|
|
650
|
+
* @param {string} input
|
|
651
|
+
* @param {RegExp|string|KitPattern} pattern
|
|
652
|
+
* @param {string|function(match, groups, named, index, input): string} replacer
|
|
653
|
+
* @param {object} [options]
|
|
654
|
+
* @param {boolean} [options.global=true]
|
|
655
|
+
* @returns {string}
|
|
656
|
+
*/
|
|
657
|
+
function replace(input, pattern, replacer, options = {}) {
|
|
658
|
+
const global = options.global !== false;
|
|
659
|
+
let re = _toRegex(pattern);
|
|
660
|
+
if (global && !re.flags.includes('g')) re = addFlags(re, 'g');
|
|
661
|
+
|
|
662
|
+
if (typeof replacer === 'string') {
|
|
663
|
+
return input.replace(re, replacer);
|
|
664
|
+
}
|
|
665
|
+
return input.replace(re, (match, ...rest) => {
|
|
666
|
+
const groups = rest.slice(0, rest.length - 2);
|
|
667
|
+
const index = rest[rest.length - 2];
|
|
668
|
+
const namedG = (typeof rest[rest.length - 1] === 'object' && rest[rest.length - 1]) || {};
|
|
669
|
+
return replacer(match, groups, namedG, index, input);
|
|
670
|
+
});
|
|
671
|
+
}
|
|
672
|
+
|
|
673
|
+
/**
|
|
674
|
+
* Extract-then-replace: apply a transform in place.
|
|
675
|
+
* Like replace() but you return undefined to keep the original match.
|
|
676
|
+
*/
|
|
677
|
+
function transform(input, pattern, transformer) {
|
|
678
|
+
return replace(input, pattern, (match, groups, named, index) => {
|
|
679
|
+
const result = transformer(match, groups, named, index);
|
|
680
|
+
return result === undefined ? match : String(result);
|
|
681
|
+
});
|
|
682
|
+
}
|
|
683
|
+
|
|
684
|
+
/**
|
|
685
|
+
* Split a string on a pattern, keeping delimiter groups.
|
|
686
|
+
* @param {string} input
|
|
687
|
+
* @param {RegExp|string|KitPattern} pattern
|
|
688
|
+
* @param {object} [options]
|
|
689
|
+
* @param {boolean} [options.keepDelimiters=false]
|
|
690
|
+
* @param {number} [options.limit]
|
|
691
|
+
* @returns {string[]}
|
|
692
|
+
*/
|
|
693
|
+
function splitOn(input, pattern, options = {}) {
|
|
694
|
+
const re = _toRegex(pattern);
|
|
695
|
+
const lim = options.limit;
|
|
696
|
+
if (!options.keepDelimiters) {
|
|
697
|
+
return lim !== undefined ? input.split(re, lim) : input.split(re);
|
|
698
|
+
}
|
|
699
|
+
// Keep delimiters via capture group
|
|
700
|
+
const src = re.source.startsWith('(') ? re.source : `(${re.source})`;
|
|
701
|
+
const reCap = new RegExp(src, _mergeFlags(re.flags, 'g'));
|
|
702
|
+
const parts = input.split(reCap);
|
|
703
|
+
return lim !== undefined ? parts.slice(0, lim) : parts;
|
|
704
|
+
}
|
|
705
|
+
|
|
706
|
+
/**
|
|
707
|
+
* Find the first match with full details.
|
|
708
|
+
* @param {string} input
|
|
709
|
+
* @param {RegExp|string|KitPattern} pattern
|
|
710
|
+
* @returns {{ match: string, index: number, groups: string[], named: object }|null}
|
|
711
|
+
*/
|
|
712
|
+
function findFirst(input, pattern) {
|
|
713
|
+
const re = removeFlags(_toRegex(pattern), 'g');
|
|
714
|
+
const m = re.exec(input);
|
|
715
|
+
if (!m) return null;
|
|
716
|
+
return { match: m[0], index: m.index, groups: m.slice(1), named: m.groups || {} };
|
|
717
|
+
}
|
|
718
|
+
|
|
719
|
+
/**
|
|
720
|
+
* Find all matches with full details.
|
|
721
|
+
* @param {string} input
|
|
722
|
+
* @param {RegExp|string|KitPattern} pattern
|
|
723
|
+
* @returns {Array<{ match: string, index: number, groups: string[], named: object }>}
|
|
724
|
+
*/
|
|
725
|
+
function findAll(input, pattern) {
|
|
726
|
+
return extract(input, pattern, { groups: true, named: true, index: true });
|
|
727
|
+
}
|
|
728
|
+
|
|
729
|
+
/**
|
|
730
|
+
* Count the number of matches.
|
|
731
|
+
*/
|
|
732
|
+
function countMatches(input, pattern) {
|
|
733
|
+
const re = addFlags(_toRegex(pattern), 'g');
|
|
734
|
+
return (input.match(re) || []).length;
|
|
735
|
+
}
|
|
736
|
+
|
|
737
|
+
/**
|
|
738
|
+
* Test whether a string fully matches a pattern.
|
|
739
|
+
*/
|
|
740
|
+
function test(input, pattern) {
|
|
741
|
+
return _toRegex(pattern).test(input);
|
|
742
|
+
}
|
|
743
|
+
|
|
744
|
+
/**
|
|
745
|
+
* Test whether a string contains a pattern anywhere.
|
|
746
|
+
*/
|
|
747
|
+
function contains(input, pattern) {
|
|
748
|
+
return removeFlags(addFlags(_toRegex(pattern), ''), 'g').test(input);
|
|
749
|
+
}
|
|
750
|
+
|
|
751
|
+
// ────────────────────────────────────────────────────────────
|
|
752
|
+
// SECTION 8: VALIDATION ENGINE
|
|
753
|
+
// ────────────────────────────────────────────────────────────
|
|
754
|
+
|
|
755
|
+
/**
|
|
756
|
+
* Validation rule: a named test with a custom error message.
|
|
757
|
+
* @typedef {{ pattern: RegExp|KitPattern, message: string, name?: string }} ValidationRule
|
|
758
|
+
*/
|
|
759
|
+
|
|
760
|
+
/**
|
|
761
|
+
* Validate a string against one or more rules.
|
|
762
|
+
* Returns detailed pass/fail results per rule.
|
|
763
|
+
*
|
|
764
|
+
* @param {string} input
|
|
765
|
+
* @param {ValidationRule[]} rules
|
|
766
|
+
* @returns {{ valid: boolean, errors: string[], results: Array<{name, passed, message}> }}
|
|
767
|
+
*/
|
|
768
|
+
function validate(input, rules) {
|
|
769
|
+
const results = rules.map(rule => {
|
|
770
|
+
const re = _toRegex(rule.pattern);
|
|
771
|
+
const passed = re.test(input);
|
|
772
|
+
return {
|
|
773
|
+
name: rule.name || re.toString(),
|
|
774
|
+
passed,
|
|
775
|
+
message: passed ? null : rule.message || `Failed: ${re}`,
|
|
776
|
+
};
|
|
777
|
+
});
|
|
778
|
+
const errors = results.filter(r => !r.passed).map(r => r.message);
|
|
779
|
+
return { valid: errors.length === 0, errors, results };
|
|
780
|
+
}
|
|
781
|
+
|
|
782
|
+
/**
|
|
783
|
+
* Pre-built validators that return { valid, errors, results }.
|
|
784
|
+
*/
|
|
785
|
+
const Validators = {
|
|
786
|
+
email: (s) => validate(s, [
|
|
787
|
+
{ name: 'format', pattern: Patterns.internet.EMAIL, message: 'Invalid email format' },
|
|
788
|
+
]),
|
|
789
|
+
url: (s) => validate(s, [
|
|
790
|
+
{ name: 'format', pattern: Patterns.internet.URL, message: 'Invalid URL format' },
|
|
791
|
+
]),
|
|
792
|
+
ipv4: (s) => validate(s, [
|
|
793
|
+
{ name: 'format', pattern: anchored(Patterns.internet.IPV4), message: 'Invalid IPv4 address' },
|
|
794
|
+
]),
|
|
795
|
+
semver: (s) => validate(s, [
|
|
796
|
+
{ name: 'format', pattern: anchored(Patterns.code.SEMVER), message: 'Invalid semver (expected x.y.z)' },
|
|
797
|
+
]),
|
|
798
|
+
uuid: (s) => validate(s, [
|
|
799
|
+
{ name: 'format', pattern: anchored(Patterns.code.UUID), message: 'Invalid UUID' },
|
|
800
|
+
]),
|
|
801
|
+
dateISO: (s) => validate(s, [
|
|
802
|
+
{ name: 'format', pattern: anchored(Patterns.datetime.DATE_ISO), message: 'Invalid ISO date (expected YYYY-MM-DD)' },
|
|
803
|
+
]),
|
|
804
|
+
hexColor: (s) => validate(s, [
|
|
805
|
+
{ name: 'format', pattern: anchored(Patterns.code.HEX_COLOR),message: 'Invalid hex color' },
|
|
806
|
+
]),
|
|
807
|
+
phoneUS: (s) => validate(s, [
|
|
808
|
+
{ name: 'format', pattern: anchored(Patterns.personal.PHONE_US), message: 'Invalid US phone number' },
|
|
809
|
+
]),
|
|
810
|
+
creditCard: (s) => validate(s, [
|
|
811
|
+
{ name: 'format', pattern: anchored(Patterns.personal.CREDIT_CARD), message: 'Invalid credit card number' },
|
|
812
|
+
]),
|
|
813
|
+
slug: (s) => validate(s, [
|
|
814
|
+
{ name: 'format', pattern: anchored(Patterns.internet.SLUG), message: 'Invalid slug (lowercase letters, numbers, hyphens only)' },
|
|
815
|
+
]),
|
|
816
|
+
|
|
817
|
+
/**
|
|
818
|
+
* Password strength validator.
|
|
819
|
+
* @param {string} s
|
|
820
|
+
* @param {object} [opts]
|
|
821
|
+
* @param {number} [opts.minLength=8]
|
|
822
|
+
* @param {boolean} [opts.requireUpper=true]
|
|
823
|
+
* @param {boolean} [opts.requireLower=true]
|
|
824
|
+
* @param {boolean} [opts.requireDigit=true]
|
|
825
|
+
* @param {boolean} [opts.requireSpecial=true]
|
|
826
|
+
*/
|
|
827
|
+
password: (s, opts = {}) => {
|
|
828
|
+
const min = opts.minLength !== undefined ? opts.minLength : 8;
|
|
829
|
+
const upper = opts.requireUpper !== undefined ? opts.requireUpper : true;
|
|
830
|
+
const lower = opts.requireLower !== undefined ? opts.requireLower : true;
|
|
831
|
+
const digit = opts.requireDigit !== undefined ? opts.requireDigit : true;
|
|
832
|
+
const special = opts.requireSpecial!== undefined ? opts.requireSpecial: true;
|
|
833
|
+
const rules = [
|
|
834
|
+
{ name: 'minLength', pattern: new RegExp(`.{${min},}`), message: `Must be at least ${min} characters` },
|
|
835
|
+
];
|
|
836
|
+
if (upper) rules.push({ name: 'uppercase', pattern: /[A-Z]/, message: 'Must contain an uppercase letter' });
|
|
837
|
+
if (lower) rules.push({ name: 'lowercase', pattern: /[a-z]/, message: 'Must contain a lowercase letter' });
|
|
838
|
+
if (digit) rules.push({ name: 'digit', pattern: /\d/, message: 'Must contain a digit' });
|
|
839
|
+
if (special) rules.push({ name: 'special', pattern: /[!@#$%^&*(),.?":{}|<>]/, message: 'Must contain a special character' });
|
|
840
|
+
return validate(s, rules);
|
|
841
|
+
},
|
|
842
|
+
|
|
843
|
+
/**
|
|
844
|
+
* Build a custom validator from a rules array.
|
|
845
|
+
*/
|
|
846
|
+
custom: (rules) => (s) => validate(s, rules),
|
|
847
|
+
};
|
|
848
|
+
|
|
849
|
+
// ────────────────────────────────────────────────────────────
|
|
850
|
+
// SECTION 9: TOKENIZER / LEXER
|
|
851
|
+
// ────────────────────────────────────────────────────────────
|
|
852
|
+
|
|
853
|
+
/**
|
|
854
|
+
* @typedef {{ type: string, pattern: RegExp|string|KitPattern, transform?: function }} TokenRule
|
|
855
|
+
*/
|
|
856
|
+
|
|
857
|
+
/**
|
|
858
|
+
* Tokenize an input string using an ordered list of token rules.
|
|
859
|
+
* Rules are tried in order; first match wins.
|
|
860
|
+
*
|
|
861
|
+
* @param {string} input
|
|
862
|
+
* @param {TokenRule[]} rules
|
|
863
|
+
* @param {object} [options]
|
|
864
|
+
* @param {boolean} [options.skipUnknown=false] - skip unrecognized chars instead of error token
|
|
865
|
+
* @param {boolean} [options.includePositions=false]
|
|
866
|
+
* @returns {Array<{ type: string, value: string, raw: string, line?: number, col?: number }>}
|
|
867
|
+
*/
|
|
868
|
+
function tokenize(input, rules, options = {}) {
|
|
869
|
+
const compiled = rules.map(r => ({
|
|
870
|
+
...r,
|
|
871
|
+
re: new RegExp('^(?:' + _toRegex(r.pattern).source + ')', _mergeFlags(_toRegex(r.pattern).flags, '')),
|
|
872
|
+
}));
|
|
873
|
+
|
|
874
|
+
const tokens = [];
|
|
875
|
+
let pos = 0;
|
|
876
|
+
let line = 1;
|
|
877
|
+
let col = 1;
|
|
878
|
+
|
|
879
|
+
while (pos < input.length) {
|
|
880
|
+
let matched = false;
|
|
881
|
+
const remaining = input.slice(pos);
|
|
882
|
+
|
|
883
|
+
for (const rule of compiled) {
|
|
884
|
+
const m = rule.re.exec(remaining);
|
|
885
|
+
if (m) {
|
|
886
|
+
const raw = m[0];
|
|
887
|
+
let value = raw;
|
|
888
|
+
if (rule.transform) value = rule.transform(raw, m.slice(1), m.groups || {});
|
|
889
|
+
|
|
890
|
+
const tok = { type: rule.type, value, raw };
|
|
891
|
+
if (options.includePositions) { tok.line = line; tok.col = col; }
|
|
892
|
+
|
|
893
|
+
// Update line/col
|
|
894
|
+
for (const ch of raw) {
|
|
895
|
+
if (ch === '\n') { line++; col = 1; }
|
|
896
|
+
else { col++; }
|
|
897
|
+
}
|
|
898
|
+
|
|
899
|
+
tokens.push(tok);
|
|
900
|
+
pos += raw.length;
|
|
901
|
+
matched = true;
|
|
902
|
+
break;
|
|
903
|
+
}
|
|
904
|
+
}
|
|
905
|
+
|
|
906
|
+
if (!matched) {
|
|
907
|
+
if (options.skipUnknown) {
|
|
908
|
+
pos++;
|
|
909
|
+
} else {
|
|
910
|
+
const ch = input[pos];
|
|
911
|
+
const tok = { type: 'UNKNOWN', value: ch, raw: ch };
|
|
912
|
+
if (options.includePositions) { tok.line = line; tok.col = col; }
|
|
913
|
+
tokens.push(tok);
|
|
914
|
+
col++;
|
|
915
|
+
pos++;
|
|
916
|
+
}
|
|
917
|
+
}
|
|
918
|
+
}
|
|
919
|
+
|
|
920
|
+
return tokens;
|
|
921
|
+
}
|
|
922
|
+
|
|
923
|
+
/**
|
|
924
|
+
* Pre-built lexer for common programming language tokens.
|
|
925
|
+
*/
|
|
926
|
+
const CodeLexer = {
|
|
927
|
+
/**
|
|
928
|
+
* JavaScript-like token rules.
|
|
929
|
+
*/
|
|
930
|
+
JS: [
|
|
931
|
+
{ type: 'COMMENT_BLOCK', pattern: /\/\*[\s\S]*?\*\// },
|
|
932
|
+
{ type: 'COMMENT_LINE', pattern: /\/\/[^\n]*/ },
|
|
933
|
+
{ type: 'STRING_DOUBLE', pattern: /"(?:[^"\\]|\\.)*"/ },
|
|
934
|
+
{ type: 'STRING_SINGLE', pattern: /'(?:[^'\\]|\\.)*'/ },
|
|
935
|
+
{ type: 'TEMPLATE', pattern: /`(?:[^`\\]|\\.)*`/ },
|
|
936
|
+
{ type: 'NUMBER', pattern: /0x[0-9a-fA-F]+|0b[01]+|0o[0-7]+|\d+(?:\.\d+)?(?:[eE][+-]?\d+)?/ },
|
|
937
|
+
{ type: 'KEYWORD', pattern: /\b(?:var|let|const|function|return|if|else|for|while|do|switch|case|break|continue|new|delete|typeof|instanceof|in|of|class|extends|import|export|default|async|await|try|catch|finally|throw|void|null|undefined|true|false|this|super)\b/ },
|
|
938
|
+
{ type: 'IDENTIFIER', pattern: /[a-zA-Z_$][a-zA-Z0-9_$]*/ },
|
|
939
|
+
{ type: 'OPERATOR', pattern: /===|!==|=>|<=|>=|&&|\|\||[+\-*/%^&|~<>!=?:.]/ },
|
|
940
|
+
{ type: 'PUNCTUATION', pattern: /[{}[\]();,]/ },
|
|
941
|
+
{ type: 'WHITESPACE', pattern: /\s+/ },
|
|
942
|
+
],
|
|
943
|
+
|
|
944
|
+
/**
|
|
945
|
+
* Simple expression tokenizer (numbers, ops, parens, identifiers).
|
|
946
|
+
*/
|
|
947
|
+
EXPR: [
|
|
948
|
+
{ type: 'NUMBER', pattern: /-?\d+(?:\.\d+)?/ },
|
|
949
|
+
{ type: 'IDENTIFIER', pattern: /[a-zA-Z_]\w*/ },
|
|
950
|
+
{ type: 'OPERATOR', pattern: /[+\-*/%^]/ },
|
|
951
|
+
{ type: 'LPAREN', pattern: /\(/ },
|
|
952
|
+
{ type: 'RPAREN', pattern: /\)/ },
|
|
953
|
+
{ type: 'COMMA', pattern: /,/ },
|
|
954
|
+
{ type: 'WHITESPACE', pattern: /\s+/ },
|
|
955
|
+
],
|
|
956
|
+
|
|
957
|
+
/**
|
|
958
|
+
* CSV tokenizer.
|
|
959
|
+
*/
|
|
960
|
+
CSV: [
|
|
961
|
+
{ type: 'QUOTED', pattern: /"(?:[^"\\]|\\.)*"/, transform: v => v.slice(1, -1).replace(/""/g, '"') },
|
|
962
|
+
{ type: 'VALUE', pattern: /[^,\n\r"]+/ },
|
|
963
|
+
{ type: 'COMMA', pattern: /,/ },
|
|
964
|
+
{ type: 'NEWLINE', pattern: /\r?\n/ },
|
|
965
|
+
],
|
|
966
|
+
};
|
|
967
|
+
|
|
968
|
+
// ────────────────────────────────────────────────────────────
|
|
969
|
+
// SECTION 10: FUZZY / APPROXIMATE MATCHING
|
|
970
|
+
// ────────────────────────────────────────────────────────────
|
|
971
|
+
|
|
972
|
+
/**
|
|
973
|
+
* Levenshtein distance between two strings.
|
|
974
|
+
* @param {string} a
|
|
975
|
+
* @param {string} b
|
|
976
|
+
* @returns {number}
|
|
977
|
+
*/
|
|
978
|
+
function levenshtein(a, b) {
|
|
979
|
+
const m = a.length, n = b.length;
|
|
980
|
+
const dp = Array.from({ length: m + 1 }, (_, i) =>
|
|
981
|
+
Array.from({ length: n + 1 }, (_, j) => (i === 0 ? j : j === 0 ? i : 0))
|
|
982
|
+
);
|
|
983
|
+
for (let i = 1; i <= m; i++) {
|
|
984
|
+
for (let j = 1; j <= n; j++) {
|
|
985
|
+
dp[i][j] = a[i-1] === b[j-1]
|
|
986
|
+
? dp[i-1][j-1]
|
|
987
|
+
: 1 + Math.min(dp[i-1][j], dp[i][j-1], dp[i-1][j-1]);
|
|
988
|
+
}
|
|
989
|
+
}
|
|
990
|
+
return dp[m][n];
|
|
991
|
+
}
|
|
992
|
+
|
|
993
|
+
/**
|
|
994
|
+
* Similarity score 0–1 based on Levenshtein.
|
|
995
|
+
* @param {string} a
|
|
996
|
+
* @param {string} b
|
|
997
|
+
* @returns {number}
|
|
998
|
+
*/
|
|
999
|
+
function similarity(a, b) {
|
|
1000
|
+
const maxLen = Math.max(a.length, b.length);
|
|
1001
|
+
if (maxLen === 0) return 1;
|
|
1002
|
+
return 1 - levenshtein(a, b) / maxLen;
|
|
1003
|
+
}
|
|
1004
|
+
|
|
1005
|
+
/**
|
|
1006
|
+
* Build a regex that allows up to `maxErrors` character substitutions/insertions/deletions
|
|
1007
|
+
* via pattern expansion (works well for short patterns, up to ~30 chars).
|
|
1008
|
+
*
|
|
1009
|
+
* Strategy: generate alternations with deletions and substitutions.
|
|
1010
|
+
* For fuzzy matching of longer text, use fuzzySearch() instead.
|
|
1011
|
+
*
|
|
1012
|
+
* @param {string} word
|
|
1013
|
+
* @param {number} [maxErrors=1]
|
|
1014
|
+
* @param {string} [flags='i']
|
|
1015
|
+
* @returns {RegExp}
|
|
1016
|
+
*/
|
|
1017
|
+
function fuzzyPattern(word, maxErrors = 1, flags = 'i') {
|
|
1018
|
+
if (maxErrors === 0) return new RegExp(escapeRegex(word), flags);
|
|
1019
|
+
// Generate all variants with up to maxErrors single-char edits
|
|
1020
|
+
const variants = new Set();
|
|
1021
|
+
variants.add(escapeRegex(word));
|
|
1022
|
+
|
|
1023
|
+
function _edits1(w) {
|
|
1024
|
+
const res = new Set();
|
|
1025
|
+
// Deletions
|
|
1026
|
+
for (let i = 0; i < w.length; i++)
|
|
1027
|
+
res.add(escapeRegex(w.slice(0, i) + w.slice(i + 1)));
|
|
1028
|
+
// Substitutions (use . to match any char in place)
|
|
1029
|
+
for (let i = 0; i < w.length; i++)
|
|
1030
|
+
res.add(escapeRegex(w.slice(0, i)) + '.' + escapeRegex(w.slice(i + 1)));
|
|
1031
|
+
// Insertions (one extra any-char)
|
|
1032
|
+
for (let i = 0; i <= w.length; i++)
|
|
1033
|
+
res.add(escapeRegex(w.slice(0, i)) + '.?' + escapeRegex(w.slice(i)));
|
|
1034
|
+
// Transpositions
|
|
1035
|
+
for (let i = 0; i < w.length - 1; i++)
|
|
1036
|
+
res.add(escapeRegex(w.slice(0, i) + w[i+1] + w[i] + w.slice(i + 2)));
|
|
1037
|
+
return res;
|
|
1038
|
+
}
|
|
1039
|
+
|
|
1040
|
+
let frontier = new Set([word]);
|
|
1041
|
+
for (let e = 0; e < maxErrors; e++) {
|
|
1042
|
+
const next = new Set();
|
|
1043
|
+
for (const w of frontier) { for (const v of _edits1(w)) { variants.add(v); next.add(v); } }
|
|
1044
|
+
frontier = next;
|
|
1045
|
+
}
|
|
1046
|
+
|
|
1047
|
+
const src = [...variants].join('|');
|
|
1048
|
+
return new RegExp(`(?:${src})`, flags);
|
|
1049
|
+
}
|
|
1050
|
+
|
|
1051
|
+
/**
|
|
1052
|
+
* Fuzzy search: find all approximate matches of `query` in `text`.
|
|
1053
|
+
* Uses a sliding window + Levenshtein distance.
|
|
1054
|
+
*
|
|
1055
|
+
* @param {string} text
|
|
1056
|
+
* @param {string} query
|
|
1057
|
+
* @param {object} [options]
|
|
1058
|
+
* @param {number} [options.maxErrors=1] - max edit distance
|
|
1059
|
+
* @param {number} [options.minSimilarity] - alternative: min similarity 0-1
|
|
1060
|
+
* @returns {Array<{ match: string, index: number, distance: number, similarity: number }>}
|
|
1061
|
+
*/
|
|
1062
|
+
function fuzzySearch(text, query, options = {}) {
|
|
1063
|
+
const maxErr = options.maxErrors !== undefined ? options.maxErrors : 1;
|
|
1064
|
+
const minSim = options.minSimilarity;
|
|
1065
|
+
const qlen = query.length;
|
|
1066
|
+
const results = [];
|
|
1067
|
+
const seen = new Set();
|
|
1068
|
+
|
|
1069
|
+
// Try windows of sizes qlen-maxErr to qlen+maxErr
|
|
1070
|
+
for (let wlen = Math.max(1, qlen - maxErr); wlen <= qlen + maxErr; wlen++) {
|
|
1071
|
+
for (let i = 0; i <= text.length - wlen; i++) {
|
|
1072
|
+
const candidate = text.slice(i, i + wlen);
|
|
1073
|
+
const dist = levenshtein(candidate.toLowerCase(), query.toLowerCase());
|
|
1074
|
+
const sim = 1 - dist / Math.max(candidate.length, query.length);
|
|
1075
|
+
const key = `${i}:${i + wlen}`;
|
|
1076
|
+
if (dist <= maxErr && (minSim === undefined || sim >= minSim) && !seen.has(key)) {
|
|
1077
|
+
seen.add(key);
|
|
1078
|
+
results.push({ match: candidate, index: i, distance: dist, similarity: parseFloat(sim.toFixed(4)) });
|
|
1079
|
+
}
|
|
1080
|
+
}
|
|
1081
|
+
}
|
|
1082
|
+
|
|
1083
|
+
// Sort by distance, then index
|
|
1084
|
+
results.sort((a, b) => a.distance - b.distance || a.index - b.index);
|
|
1085
|
+
return results;
|
|
1086
|
+
}
|
|
1087
|
+
|
|
1088
|
+
/**
|
|
1089
|
+
* Fuzzy filter: filter a list of strings to those approximately matching a query.
|
|
1090
|
+
* @param {string[]} list
|
|
1091
|
+
* @param {string} query
|
|
1092
|
+
* @param {object} [options]
|
|
1093
|
+
* @param {number} [options.maxErrors=1]
|
|
1094
|
+
* @param {number} [options.minSimilarity=0.5]
|
|
1095
|
+
* @returns {Array<{ item: string, distance: number, similarity: number }>}
|
|
1096
|
+
*/
|
|
1097
|
+
function fuzzyFilter(list, query, options = {}) {
|
|
1098
|
+
const maxErr = options.maxErrors !== undefined ? options.maxErrors : 1;
|
|
1099
|
+
const minSim = options.minSimilarity !== undefined ? options.minSimilarity : 0.5;
|
|
1100
|
+
return list
|
|
1101
|
+
.map(item => {
|
|
1102
|
+
const dist = levenshtein(item.toLowerCase(), query.toLowerCase());
|
|
1103
|
+
const sim = 1 - dist / Math.max(item.length, query.length);
|
|
1104
|
+
return { item, distance: dist, similarity: parseFloat(sim.toFixed(4)) };
|
|
1105
|
+
})
|
|
1106
|
+
.filter(r => r.distance <= maxErr || r.similarity >= minSim)
|
|
1107
|
+
.sort((a, b) => b.similarity - a.similarity || a.distance - b.distance);
|
|
1108
|
+
}
|
|
1109
|
+
|
|
1110
|
+
/**
|
|
1111
|
+
* Highlight fuzzy matches in a string using a wrapper function.
|
|
1112
|
+
* @param {string} text
|
|
1113
|
+
* @param {string} query
|
|
1114
|
+
* @param {function(match: string): string} [highlighter] - default wraps in **
|
|
1115
|
+
* @param {object} [options]
|
|
1116
|
+
* @returns {string}
|
|
1117
|
+
*/
|
|
1118
|
+
function fuzzyHighlight(text, query, highlighter, options = {}) {
|
|
1119
|
+
const wrap = highlighter || (m => `**${m}**`);
|
|
1120
|
+
const matches = fuzzySearch(text, query, options);
|
|
1121
|
+
if (!matches.length) return text;
|
|
1122
|
+
|
|
1123
|
+
// Build non-overlapping replacement ranges
|
|
1124
|
+
const ranges = [];
|
|
1125
|
+
for (const m of matches) {
|
|
1126
|
+
const end = m.index + m.match.length;
|
|
1127
|
+
if (!ranges.some(r => m.index < r.end && end > r.start)) {
|
|
1128
|
+
ranges.push({ start: m.index, end });
|
|
1129
|
+
}
|
|
1130
|
+
}
|
|
1131
|
+
ranges.sort((a, b) => a.start - b.start);
|
|
1132
|
+
|
|
1133
|
+
let result = '';
|
|
1134
|
+
let cursor = 0;
|
|
1135
|
+
for (const { start, end } of ranges) {
|
|
1136
|
+
result += text.slice(cursor, start) + wrap(text.slice(start, end));
|
|
1137
|
+
cursor = end;
|
|
1138
|
+
}
|
|
1139
|
+
result += text.slice(cursor);
|
|
1140
|
+
return result;
|
|
1141
|
+
}
|
|
1142
|
+
|
|
1143
|
+
// ────────────────────────────────────────────────────────────
|
|
1144
|
+
// SECTION 11: UTILITY FUNCTIONS
|
|
1145
|
+
// ────────────────────────────────────────────────────────────
|
|
1146
|
+
|
|
1147
|
+
/**
|
|
1148
|
+
* Highlight all regex matches in a string.
|
|
1149
|
+
* @param {string} input
|
|
1150
|
+
* @param {RegExp|string|KitPattern} pattern
|
|
1151
|
+
* @param {function(match: string): string} [highlighter]
|
|
1152
|
+
* @returns {string}
|
|
1153
|
+
*/
|
|
1154
|
+
function highlight(input, pattern, highlighter) {
|
|
1155
|
+
const wrap = highlighter || (m => `[${m}]`);
|
|
1156
|
+
return replace(input, pattern, (match) => wrap(match));
|
|
1157
|
+
}
|
|
1158
|
+
|
|
1159
|
+
/**
|
|
1160
|
+
* Redact all matches with a mask string.
|
|
1161
|
+
* @param {string} input
|
|
1162
|
+
* @param {RegExp|string|KitPattern} pattern
|
|
1163
|
+
* @param {string} [mask='[REDACTED]']
|
|
1164
|
+
* @returns {string}
|
|
1165
|
+
*/
|
|
1166
|
+
function redact(input, pattern, mask = '[REDACTED]') {
|
|
1167
|
+
return replace(input, pattern, mask);
|
|
1168
|
+
}
|
|
1169
|
+
|
|
1170
|
+
/**
|
|
1171
|
+
* Get a named group from the first match.
|
|
1172
|
+
* @param {string} input
|
|
1173
|
+
* @param {RegExp|string|KitPattern} pattern
|
|
1174
|
+
* @param {string} groupName
|
|
1175
|
+
* @returns {string|null}
|
|
1176
|
+
*/
|
|
1177
|
+
function namedGroup(input, pattern, groupName) {
|
|
1178
|
+
const m = _toRegex(pattern).exec(input);
|
|
1179
|
+
return m && m.groups ? (m.groups[groupName] || null) : null;
|
|
1180
|
+
}
|
|
1181
|
+
|
|
1182
|
+
/**
|
|
1183
|
+
* Explain a regex pattern in plain English (best-effort).
|
|
1184
|
+
* @param {RegExp|string|KitPattern} pattern
|
|
1185
|
+
* @returns {string}
|
|
1186
|
+
*/
|
|
1187
|
+
function explain(pattern) {
|
|
1188
|
+
const re = _toRegex(pattern);
|
|
1189
|
+
const src = re.source;
|
|
1190
|
+
const flags = re.flags;
|
|
1191
|
+
|
|
1192
|
+
const flagDesc = {
|
|
1193
|
+
g: 'global (find all matches)',
|
|
1194
|
+
i: 'case-insensitive',
|
|
1195
|
+
m: 'multiline (^ and $ match line boundaries)',
|
|
1196
|
+
s: 'dotAll (. matches newlines)',
|
|
1197
|
+
u: 'unicode',
|
|
1198
|
+
y: 'sticky (match from lastIndex)',
|
|
1199
|
+
};
|
|
1200
|
+
|
|
1201
|
+
const parts = [];
|
|
1202
|
+
if (flags) {
|
|
1203
|
+
parts.push('Flags: ' + flags.split('').map(f => flagDesc[f] || f).join(', '));
|
|
1204
|
+
}
|
|
1205
|
+
|
|
1206
|
+
// Simple token-by-token explanation
|
|
1207
|
+
const tokenMap = [
|
|
1208
|
+
[/^\^/, () => 'Start of string'],
|
|
1209
|
+
[/^\$/, () => 'End of string'],
|
|
1210
|
+
[/^\\b/, () => 'Word boundary'],
|
|
1211
|
+
[/^\\B/, () => 'Non-word boundary'],
|
|
1212
|
+
[/^\\d/, () => 'Any digit [0-9]'],
|
|
1213
|
+
[/^\\D/, () => 'Any non-digit'],
|
|
1214
|
+
[/^\\w/, () => 'Any word character [a-zA-Z0-9_]'],
|
|
1215
|
+
[/^\\W/, () => 'Any non-word character'],
|
|
1216
|
+
[/^\\s/, () => 'Any whitespace'],
|
|
1217
|
+
[/^\\S/, () => 'Any non-whitespace'],
|
|
1218
|
+
[/^\\n/, () => 'Newline'],
|
|
1219
|
+
[/^\\t/, () => 'Tab'],
|
|
1220
|
+
[/^\./, () => 'Any character (except newline)'],
|
|
1221
|
+
[/^\?\?/, () => 'Zero or one (lazy)'],
|
|
1222
|
+
[/^\?/, () => 'Zero or one (optional)'],
|
|
1223
|
+
[/^\*\?/, () => 'Zero or more (lazy)'],
|
|
1224
|
+
[/^\*/, () => 'Zero or more'],
|
|
1225
|
+
[/^\+\?/, () => 'One or more (lazy)'],
|
|
1226
|
+
[/^\+/, () => 'One or more'],
|
|
1227
|
+
[/^\{(\d+),(\d+)\}\?/, m => `Between ${m[1]} and ${m[2]} times (lazy)`],
|
|
1228
|
+
[/^\{(\d+),(\d+)\}/, m => `Between ${m[1]} and ${m[2]} times`],
|
|
1229
|
+
[/^\{(\d+),\}/, m => `At least ${m[1]} times`],
|
|
1230
|
+
[/^\{(\d+)\}/, m => `Exactly ${m[1]} times`],
|
|
1231
|
+
[/^\(\?<([^>]+)>/, m => `Named capture group "${m[1]}"`],
|
|
1232
|
+
[/^\(\?:/, () => 'Non-capturing group: ('],
|
|
1233
|
+
[/^\(\?=/, () => 'Positive lookahead: ('],
|
|
1234
|
+
[/^\(\?!/, () => 'Negative lookahead: ('],
|
|
1235
|
+
[/^\(\?<=/, () => 'Positive lookbehind: ('],
|
|
1236
|
+
[/^\(\?<!/, () => 'Negative lookbehind: ('],
|
|
1237
|
+
[/^\(/, () => 'Capturing group: ('],
|
|
1238
|
+
[/^\)/, () => 'End group: )'],
|
|
1239
|
+
[/^\|/, () => 'OR'],
|
|
1240
|
+
[/^\[(\^)?([^\]]+)\]/, m => `Character class${m[1] ? ' NOT' : ''}: [${m[2]}]`],
|
|
1241
|
+
[/^\\(.)/, m => `Literal "${m[1]}"`],
|
|
1242
|
+
[/^([a-zA-Z0-9 ])/, m => `Literal "${m[1]}"`],
|
|
1243
|
+
];
|
|
1244
|
+
|
|
1245
|
+
const tokenParts = [];
|
|
1246
|
+
let remaining = src;
|
|
1247
|
+
let safety = 0;
|
|
1248
|
+
while (remaining.length && safety++ < 500) {
|
|
1249
|
+
let found = false;
|
|
1250
|
+
for (const [re, fn] of tokenMap) {
|
|
1251
|
+
const m = re.exec(remaining);
|
|
1252
|
+
if (m) {
|
|
1253
|
+
tokenParts.push(fn(m));
|
|
1254
|
+
remaining = remaining.slice(m[0].length);
|
|
1255
|
+
found = true;
|
|
1256
|
+
break;
|
|
1257
|
+
}
|
|
1258
|
+
}
|
|
1259
|
+
if (!found) { tokenParts.push(`"${remaining[0]}"`); remaining = remaining.slice(1); }
|
|
1260
|
+
}
|
|
1261
|
+
|
|
1262
|
+
if (tokenParts.length) parts.push('Pattern: ' + tokenParts.join(' → '));
|
|
1263
|
+
return parts.join('\n');
|
|
1264
|
+
}
|
|
1265
|
+
|
|
1266
|
+
/**
|
|
1267
|
+
* Benchmark: measure how long a pattern takes on a given input (N iterations).
|
|
1268
|
+
* @param {string} input
|
|
1269
|
+
* @param {RegExp|string|KitPattern} pattern
|
|
1270
|
+
* @param {number} [iterations=10000]
|
|
1271
|
+
* @returns {{ ms: number, perOpNs: number, opsPerSec: number }}
|
|
1272
|
+
*/
|
|
1273
|
+
function benchmark(input, pattern, iterations = 10000) {
|
|
1274
|
+
const re = addFlags(_toRegex(pattern), 'g');
|
|
1275
|
+
const start = Date.now();
|
|
1276
|
+
for (let i = 0; i < iterations; i++) { re.lastIndex = 0; while (re.exec(input)) {} }
|
|
1277
|
+
const ms = Date.now() - start;
|
|
1278
|
+
const perOpNs = (ms * 1e6) / iterations;
|
|
1279
|
+
const opsPerSec = Math.round(1e9 / perOpNs);
|
|
1280
|
+
return { ms, perOpNs: parseFloat(perOpNs.toFixed(2)), opsPerSec };
|
|
1281
|
+
}
|
|
1282
|
+
|
|
1283
|
+
// ────────────────────────────────────────────────────────────
|
|
1284
|
+
// EXPORTS
|
|
1285
|
+
// ────────────────────────────────────────────────────────────
|
|
1286
|
+
|
|
1287
|
+
module.exports = {
|
|
1288
|
+
kitdef: {
|
|
1289
|
+
// ── Utilities ───────────────────────────────────
|
|
1290
|
+
escapeRegex,
|
|
1291
|
+
withFlags,
|
|
1292
|
+
addFlags,
|
|
1293
|
+
removeFlags,
|
|
1294
|
+
Flags,
|
|
1295
|
+
|
|
1296
|
+
// ── Pattern Registry ────────────────────────────
|
|
1297
|
+
registerPattern,
|
|
1298
|
+
unregisterPattern,
|
|
1299
|
+
getPattern,
|
|
1300
|
+
listPatterns,
|
|
1301
|
+
|
|
1302
|
+
// ── Built-in Pattern Packs ──────────────────────
|
|
1303
|
+
Patterns,
|
|
1304
|
+
|
|
1305
|
+
// ── Builder API ─────────────────────────────────
|
|
1306
|
+
KitPattern,
|
|
1307
|
+
|
|
1308
|
+
// ── Composition ─────────────────────────────────
|
|
1309
|
+
concat,
|
|
1310
|
+
alternate,
|
|
1311
|
+
quantify,
|
|
1312
|
+
capture,
|
|
1313
|
+
namedCapture,
|
|
1314
|
+
optional,
|
|
1315
|
+
anchored,
|
|
1316
|
+
fenced,
|
|
1317
|
+
|
|
1318
|
+
// ── Extract / Transform / Replace ───────────────
|
|
1319
|
+
extract,
|
|
1320
|
+
extractMap,
|
|
1321
|
+
replace,
|
|
1322
|
+
transform,
|
|
1323
|
+
splitOn,
|
|
1324
|
+
findFirst,
|
|
1325
|
+
findAll,
|
|
1326
|
+
countMatches,
|
|
1327
|
+
test,
|
|
1328
|
+
contains,
|
|
1329
|
+
highlight,
|
|
1330
|
+
redact,
|
|
1331
|
+
namedGroup,
|
|
1332
|
+
|
|
1333
|
+
// ── Validation ──────────────────────────────────
|
|
1334
|
+
validate,
|
|
1335
|
+
Validators,
|
|
1336
|
+
|
|
1337
|
+
// ── Tokenizer / Lexer ───────────────────────────
|
|
1338
|
+
tokenize,
|
|
1339
|
+
CodeLexer,
|
|
1340
|
+
|
|
1341
|
+
// ── Fuzzy Matching ──────────────────────────────
|
|
1342
|
+
levenshtein,
|
|
1343
|
+
similarity,
|
|
1344
|
+
fuzzyPattern,
|
|
1345
|
+
fuzzySearch,
|
|
1346
|
+
fuzzyFilter,
|
|
1347
|
+
fuzzyHighlight,
|
|
1348
|
+
|
|
1349
|
+
// ── Dev Tools ───────────────────────────────────
|
|
1350
|
+
explain,
|
|
1351
|
+
benchmark,
|
|
1352
|
+
}
|
|
1353
|
+
};
|