@nkardaz/typography-rules 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +911 -0
- package/dist/api/blacklist.d.ts +72 -0
- package/dist/api/htmlNodes.d.ts +30 -0
- package/dist/api/index.d.ts +6 -0
- package/dist/api/newRule.d.ts +51 -0
- package/dist/api/registerRule.d.ts +27 -0
- package/dist/api/rulesInit.d.ts +49 -0
- package/dist/functions/chemNotation.d.ts +10 -0
- package/dist/functions/clearSpaces.d.ts +16 -0
- package/dist/functions/index.cjs +514 -0
- package/dist/functions/index.d.ts +8 -0
- package/dist/functions/index.mjs +491 -0
- package/dist/functions/rubyText.d.ts +11 -0
- package/dist/functions/runt.d.ts +3 -0
- package/dist/functions/smartNumberGrouping.d.ts +25 -0
- package/dist/functions/smartQuotes.d.ts +29 -0
- package/dist/functions/wrapWithTag.d.ts +42 -0
- package/dist/glyphs/index.cjs +737 -0
- package/dist/glyphs/index.d.ts +53 -0
- package/dist/glyphs/index.mjs +714 -0
- package/dist/glyphs/proto.d.ts +11 -0
- package/dist/glyphs/registry.d.ts +728 -0
- package/dist/glyphs/types.d.ts +151 -0
- package/dist/helpers/index.cjs +268 -0
- package/dist/helpers/index.d.ts +133 -0
- package/dist/helpers/index.mjs +245 -0
- package/dist/helpers/types.d.ts +71 -0
- package/dist/index.cjs +985 -0
- package/dist/index.d.ts +5 -0
- package/dist/index.mjs +977 -0
- package/dist/style/index.d.ts +2 -0
- package/dist/style/main.css +16 -0
- package/dist/types.d.ts +223 -0
- package/dist/typography/aliases.d.ts +129 -0
- package/dist/typography/expressions/common.d.ts +29 -0
- package/dist/typography/expressions/en.d.ts +25 -0
- package/dist/typography/expressions/ru.d.ts +29 -0
- package/dist/typography/markup/common.d.ts +17 -0
- package/dist/typography/markup/en.d.ts +3 -0
- package/dist/typography/markup/index.d.ts +4 -0
- package/dist/typography/markup/ru.d.ts +3 -0
- package/dist/typography/sets/ang.d.ts +3 -0
- package/dist/typography/sets/common.d.ts +17 -0
- package/dist/typography/sets/en.d.ts +14 -0
- package/dist/typography/sets/index.d.ts +5 -0
- package/dist/typography/sets/ru.d.ts +16 -0
- package/dist/typography/store.d.ts +63 -0
- package/package.json +92 -0
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
/** A plain string-to-string map used as the base shape for all glyph dictionaries. */
|
|
2
|
+
export type GlyphStringMap = Record<string, string>;
|
|
3
|
+
/** Raw glyph key-value data passed into glyph set constructors. */
|
|
4
|
+
export type GlyphData = Record<string, string>;
|
|
5
|
+
/**
|
|
6
|
+
* A glyph set with typed key access and prototype utility methods.
|
|
7
|
+
*
|
|
8
|
+
* Combines the original data shape `T` with a set of shared methods
|
|
9
|
+
* attached via prototype (see `createCharacters`).
|
|
10
|
+
*
|
|
11
|
+
* @template T - Raw glyph dictionary shape
|
|
12
|
+
*/
|
|
13
|
+
export type GlyphSet<T extends GlyphData = GlyphData> = {
|
|
14
|
+
[K in keyof T]: T[K];
|
|
15
|
+
} & {
|
|
16
|
+
values(this: GlyphSet<T>): string[];
|
|
17
|
+
/**
|
|
18
|
+
* Joins all glyph values into a single string.
|
|
19
|
+
*
|
|
20
|
+
* @param joiner - Separator between values. Defaults to `'|'`.
|
|
21
|
+
* @returns Joined string of all values in the set.
|
|
22
|
+
*
|
|
23
|
+
* @example
|
|
24
|
+
* DASHES.join() // '-|–|⸺|…'
|
|
25
|
+
* DASHES.join('') // '-–⸺…'
|
|
26
|
+
*/
|
|
27
|
+
join(joiner?: string): string;
|
|
28
|
+
/**
|
|
29
|
+
* Mutably inserts new entries into the glyph set.
|
|
30
|
+
*
|
|
31
|
+
* Note: TypeScript types are not updated after insertion.
|
|
32
|
+
* Use type casting if access to inserted keys is needed statically.
|
|
33
|
+
*
|
|
34
|
+
* @param entries - Key-value pairs to add to the set.
|
|
35
|
+
*
|
|
36
|
+
* @example
|
|
37
|
+
* DASHES.insert({ myDash: '\u2E1A' });
|
|
38
|
+
*/
|
|
39
|
+
insert(entries: GlyphData): void;
|
|
40
|
+
/**
|
|
41
|
+
* Checks whether a key exists in the glyph set.
|
|
42
|
+
*
|
|
43
|
+
* @param key - The key to look up.
|
|
44
|
+
* @returns `true` if the key is present, `false` otherwise.
|
|
45
|
+
*
|
|
46
|
+
* @example
|
|
47
|
+
* DASHES.hasKey('em') // true
|
|
48
|
+
* DASHES.hasKey('foo') // false
|
|
49
|
+
*/
|
|
50
|
+
hasKey(key: string): boolean;
|
|
51
|
+
/**
|
|
52
|
+
* Checks whether a value exists in the glyph set.
|
|
53
|
+
*
|
|
54
|
+
* @param value - The glyph value to search for.
|
|
55
|
+
* @returns `true` if the value is present, `false` otherwise.
|
|
56
|
+
*
|
|
57
|
+
* @example
|
|
58
|
+
* DASHES.hasValue('\u2014') // true
|
|
59
|
+
*/
|
|
60
|
+
hasValue(value: string): boolean;
|
|
61
|
+
/**
|
|
62
|
+
* Returns the key associated with a given glyph value.
|
|
63
|
+
*
|
|
64
|
+
* @param value - The glyph value to look up.
|
|
65
|
+
* @returns The corresponding key, or `undefined` if not found.
|
|
66
|
+
*
|
|
67
|
+
* @example
|
|
68
|
+
* DASHES.findKey('\u2014') // 'em'
|
|
69
|
+
* DASHES.findKey('???') // undefined
|
|
70
|
+
*/
|
|
71
|
+
findKey(value: string): string | undefined;
|
|
72
|
+
/**
|
|
73
|
+
* Returns the values associated with given keys.
|
|
74
|
+
*
|
|
75
|
+
* @param keys - The keys to look up.
|
|
76
|
+
* @returns An array of values, or `undefined` if not found.
|
|
77
|
+
*
|
|
78
|
+
* @example
|
|
79
|
+
* DASHES.find('em', 'en') // ['-', '–']
|
|
80
|
+
* DASHES.find('foo', 'bar') // undefined
|
|
81
|
+
*/
|
|
82
|
+
find(...keys: string[]): string[] | undefined;
|
|
83
|
+
};
|
|
84
|
+
/**
|
|
85
|
+
* Structural contract for glyph sets used in generic contexts.
|
|
86
|
+
*
|
|
87
|
+
* Used where the concrete type parameter `T` is not available -
|
|
88
|
+
* for example, as values inside `createCharacterSet` group maps.
|
|
89
|
+
*
|
|
90
|
+
* The index signature allows arbitrary string keys alongside
|
|
91
|
+
* the explicitly declared utility methods.
|
|
92
|
+
*/
|
|
93
|
+
export interface GlyphSetInterface {
|
|
94
|
+
[key: string]: string | ((joiner?: string) => string) | ((entries: GlyphData) => void) | ((key: string) => boolean) | ((value: string) => boolean) | ((value: string) => string | undefined) | ((...keys: string[]) => string[] | undefined);
|
|
95
|
+
values(): string[];
|
|
96
|
+
join(joiner?: string): string;
|
|
97
|
+
insert(entries: GlyphData): void;
|
|
98
|
+
hasKey(key: string): boolean;
|
|
99
|
+
hasValue(value: string): boolean;
|
|
100
|
+
findKey(value: string): string | undefined;
|
|
101
|
+
find(...keys: string[]): string[] | undefined;
|
|
102
|
+
}
|
|
103
|
+
/**
|
|
104
|
+
* Prototype interface for grouped character sets (e.g. `PUNCTUATION`).
|
|
105
|
+
*
|
|
106
|
+
* Provides locale-aware access to glyph groups via hierarchical resolution:
|
|
107
|
+
* `common[key]` is merged with `locale[key]`, locale values taking precedence.
|
|
108
|
+
*
|
|
109
|
+
* @template T - The full character set structure, optionally containing a `common` group
|
|
110
|
+
* and one or more locale groups.
|
|
111
|
+
*/
|
|
112
|
+
export interface ProtoSet<T extends {
|
|
113
|
+
common?: Record<string, GlyphSetInterface>;
|
|
114
|
+
}> {
|
|
115
|
+
/**
|
|
116
|
+
* Retrieves a merged glyph set for a given locale and group key.
|
|
117
|
+
*
|
|
118
|
+
* Merges the `common[key]` group with `locale[key]`, with locale
|
|
119
|
+
* values overriding common ones where keys collide.
|
|
120
|
+
*
|
|
121
|
+
* @param dataSet - Locale identifier (any key of `T` except `'common'`).
|
|
122
|
+
* @param key - Group key present in `common` and/or the target locale.
|
|
123
|
+
* @returns Merged glyph set for the requested locale and group.
|
|
124
|
+
*
|
|
125
|
+
* @example
|
|
126
|
+
* PUNCTUATION.get('en', 'leftSided') // common + en leftSided merged
|
|
127
|
+
* PUNCTUATION.get('ru', 'rightSided') // common + ru rightSided merged
|
|
128
|
+
*/
|
|
129
|
+
get<TDataSet extends Exclude<keyof T, 'common'>, TKey extends keyof (T['common'] & T[TDataSet])>(this: T & ProtoSet<T>, dataSet: TDataSet, key: TKey): GlyphSetInterface;
|
|
130
|
+
/**
|
|
131
|
+
* Returns all top-level group keys of the character set, including `'common'`.
|
|
132
|
+
*
|
|
133
|
+
* @returns Array of string keys present on the character set object.
|
|
134
|
+
*
|
|
135
|
+
* @example
|
|
136
|
+
* PUNCTUATION.getList() // ['common', 'ru', 'en', 'fr', 'is']
|
|
137
|
+
*/
|
|
138
|
+
getList(): (keyof T & string)[];
|
|
139
|
+
/**
|
|
140
|
+
* Checks whether a given group key exists in the character set.
|
|
141
|
+
*
|
|
142
|
+
* @param key - Group key to check.
|
|
143
|
+
* @returns `true` if the key is present, `false` otherwise.
|
|
144
|
+
*
|
|
145
|
+
* @example
|
|
146
|
+
* PUNCTUATION.hasKey('en') // true
|
|
147
|
+
* PUNCTUATION.hasKey('de') // false
|
|
148
|
+
*/
|
|
149
|
+
hasKey(key: keyof T & string): boolean;
|
|
150
|
+
}
|
|
151
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
6
|
+
var __export = (target, all) => {
|
|
7
|
+
for (var name in all)
|
|
8
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
9
|
+
};
|
|
10
|
+
var __copyProps = (to, from, except, desc) => {
|
|
11
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
12
|
+
for (let key of __getOwnPropNames(from))
|
|
13
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
14
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
15
|
+
}
|
|
16
|
+
return to;
|
|
17
|
+
};
|
|
18
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
19
|
+
|
|
20
|
+
// src/helpers/index.ts
|
|
21
|
+
var index_exports = {};
|
|
22
|
+
__export(index_exports, {
|
|
23
|
+
NODE_MARKER: () => NODE_MARKER,
|
|
24
|
+
NODE_MARKER_REGEX: () => NODE_MARKER_REGEX,
|
|
25
|
+
PROTECTED_PATTERNS: () => PROTECTED_PATTERNS,
|
|
26
|
+
PROTECTION_MARKER: () => PROTECTION_MARKER,
|
|
27
|
+
PROTECTION_MARKER_REGEX: () => PROTECTION_MARKER_REGEX,
|
|
28
|
+
createPatterns: () => createPatterns,
|
|
29
|
+
joinNodes: () => joinNodes,
|
|
30
|
+
protect: () => protect,
|
|
31
|
+
splitNodes: () => splitNodes,
|
|
32
|
+
unprotect: () => unprotect
|
|
33
|
+
});
|
|
34
|
+
module.exports = __toCommonJS(index_exports);
|
|
35
|
+
|
|
36
|
+
// src/typography/aliases.ts
|
|
37
|
+
function createAlias(map) {
|
|
38
|
+
const normalized = Object.fromEntries(
|
|
39
|
+
Object.entries(map).map(([key, values]) => [
|
|
40
|
+
key.toLowerCase(),
|
|
41
|
+
values.map((v) => v.toLowerCase())
|
|
42
|
+
])
|
|
43
|
+
);
|
|
44
|
+
const methods = {
|
|
45
|
+
/**
|
|
46
|
+
* Checks whether an alias exists in the map,
|
|
47
|
+
* either as a root key or as an alternative name.
|
|
48
|
+
*
|
|
49
|
+
* @param alias - The alias to look up (case-insensitive).
|
|
50
|
+
* @returns `true` if the alias is found, `false` otherwise.
|
|
51
|
+
*
|
|
52
|
+
* @example
|
|
53
|
+
* ALIAS.has('English') // true
|
|
54
|
+
* ALIAS.has('fr') // false
|
|
55
|
+
*/
|
|
56
|
+
has(alias) {
|
|
57
|
+
const a = alias.toLowerCase();
|
|
58
|
+
if (a in normalized) return true;
|
|
59
|
+
return Object.values(normalized).some((vals) => vals.includes(a));
|
|
60
|
+
},
|
|
61
|
+
/**
|
|
62
|
+
* Resolves an alias to its root key.
|
|
63
|
+
*
|
|
64
|
+
* @param alias - The alias to resolve (case-insensitive).
|
|
65
|
+
* @returns The root key if found, `undefined` otherwise.
|
|
66
|
+
*
|
|
67
|
+
* @example
|
|
68
|
+
* ALIAS.resolve('Russian') // 'ru'
|
|
69
|
+
* ALIAS.resolve('ru-RU') // 'ru'
|
|
70
|
+
* ALIAS.resolve('fr') // undefined
|
|
71
|
+
*/
|
|
72
|
+
resolve(alias) {
|
|
73
|
+
const a = alias.toLowerCase();
|
|
74
|
+
if (a in normalized) return a;
|
|
75
|
+
return Object.keys(normalized).find((k) => {
|
|
76
|
+
const values = normalized[k];
|
|
77
|
+
return Array.isArray(values) && values.includes(a);
|
|
78
|
+
});
|
|
79
|
+
},
|
|
80
|
+
/**
|
|
81
|
+
* Adds one or more alternative names to an existing or new root key.
|
|
82
|
+
* All values are automatically lowercased.
|
|
83
|
+
*
|
|
84
|
+
* @param root - The root key to add aliases to.
|
|
85
|
+
* @param aliases - One or more alternative names to register.
|
|
86
|
+
*
|
|
87
|
+
* @example
|
|
88
|
+
* ALIAS.push('ru', 'Рус', 'ру')
|
|
89
|
+
* ALIAS.ru // ['ru-ru', 'russian', 'русский', 'рус', 'ру']
|
|
90
|
+
*/
|
|
91
|
+
push(root, ...aliases) {
|
|
92
|
+
for (const alias of aliases) {
|
|
93
|
+
const a = alias.toLowerCase();
|
|
94
|
+
const r = root.toLowerCase();
|
|
95
|
+
if (a in normalized) {
|
|
96
|
+
normalized[a]?.push(r);
|
|
97
|
+
} else {
|
|
98
|
+
normalized[a] = [r];
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
},
|
|
102
|
+
/**
|
|
103
|
+
* Normalizes one or more strings to lowercase.
|
|
104
|
+
*
|
|
105
|
+
* @param alias - One or more strings to normalize.
|
|
106
|
+
* @returns An array of lowercased strings.
|
|
107
|
+
*
|
|
108
|
+
* @example
|
|
109
|
+
* ALIAS.normalize('English', 'RU-RU') // ['english', 'ru-ru']
|
|
110
|
+
*/
|
|
111
|
+
normalize(...alias) {
|
|
112
|
+
return alias.map((a) => a.toLowerCase());
|
|
113
|
+
}
|
|
114
|
+
};
|
|
115
|
+
return new Proxy(methods, {
|
|
116
|
+
get(target, prop) {
|
|
117
|
+
if (prop in target) return target[prop];
|
|
118
|
+
return normalized[prop];
|
|
119
|
+
},
|
|
120
|
+
has(_target, prop) {
|
|
121
|
+
return prop in normalized;
|
|
122
|
+
},
|
|
123
|
+
ownKeys() {
|
|
124
|
+
return [...Object.keys(normalized), ...Object.keys(methods)];
|
|
125
|
+
},
|
|
126
|
+
getOwnPropertyDescriptor(_target, prop) {
|
|
127
|
+
if (prop in normalized || prop in methods) {
|
|
128
|
+
return { enumerable: true, configurable: true };
|
|
129
|
+
}
|
|
130
|
+
return void 0;
|
|
131
|
+
}
|
|
132
|
+
});
|
|
133
|
+
}
|
|
134
|
+
var ALIAS = createAlias({
|
|
135
|
+
ru: ["ru-RU", "Russian", "\u0420\u0443\u0441\u0441\u043A\u0438\u0439"],
|
|
136
|
+
en: ["en-US", "English"],
|
|
137
|
+
ang: ["\xC6nglis\u010B", "\xC6nglisc", "Old English"]
|
|
138
|
+
});
|
|
139
|
+
|
|
140
|
+
// src/helpers/index.ts
|
|
141
|
+
var NODE_MARKER = "\uE000\uEDFD\uF43E";
|
|
142
|
+
var PROTECTION_MARKER = "\uE001\uEDF1\uF111";
|
|
143
|
+
var NODE_MARKER_REGEX = new RegExp(NODE_MARKER, "g");
|
|
144
|
+
var PROTECTION_MARKER_REGEX = new RegExp(PROTECTION_MARKER, "g");
|
|
145
|
+
var patternProto = {
|
|
146
|
+
get values() {
|
|
147
|
+
return Object.keys(this).map((key) => {
|
|
148
|
+
const desc = Object.getOwnPropertyDescriptor(this, key);
|
|
149
|
+
return desc?.get ? this[key] : void 0;
|
|
150
|
+
}).filter((v) => v !== void 0);
|
|
151
|
+
},
|
|
152
|
+
combined(locale) {
|
|
153
|
+
const key = locale ? ALIAS.resolve(locale) ?? locale : void 0;
|
|
154
|
+
const baseSources = this.values.map((p) => `(${p.source})`);
|
|
155
|
+
const localeSources = [];
|
|
156
|
+
const locales = localeRegistry.get(this);
|
|
157
|
+
if (key && locales) {
|
|
158
|
+
const sub = locales.get(key);
|
|
159
|
+
if (sub) {
|
|
160
|
+
for (const rx of Object.values(sub)) {
|
|
161
|
+
localeSources.push(`(${rx.source})`);
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
const source = [...baseSources, ...localeSources].join("|");
|
|
166
|
+
return new RegExp(source, "g");
|
|
167
|
+
},
|
|
168
|
+
insert(patterns) {
|
|
169
|
+
const locales = localeRegistry.get(this) ?? /* @__PURE__ */ new Map();
|
|
170
|
+
for (const key of Object.keys(patterns)) {
|
|
171
|
+
const value = patterns[key];
|
|
172
|
+
if (value instanceof RegExp) {
|
|
173
|
+
const src = value.source;
|
|
174
|
+
const flags = value.flags;
|
|
175
|
+
Object.defineProperty(this, key, {
|
|
176
|
+
get() {
|
|
177
|
+
return new RegExp(src, flags);
|
|
178
|
+
},
|
|
179
|
+
enumerable: true,
|
|
180
|
+
configurable: true
|
|
181
|
+
});
|
|
182
|
+
} else {
|
|
183
|
+
locales.set(key, value);
|
|
184
|
+
localeRegistry.set(this, locales);
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
},
|
|
188
|
+
*[Symbol.iterator]() {
|
|
189
|
+
for (const key of Object.keys(this)) {
|
|
190
|
+
const desc = Object.getOwnPropertyDescriptor(this, key);
|
|
191
|
+
if (desc?.get) yield this[key];
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
};
|
|
195
|
+
function createPatterns(patterns) {
|
|
196
|
+
const result = Object.create(patternProto);
|
|
197
|
+
const locales = /* @__PURE__ */ new Map();
|
|
198
|
+
for (const key of Object.keys(patterns)) {
|
|
199
|
+
const value = patterns[key];
|
|
200
|
+
if (value instanceof RegExp) {
|
|
201
|
+
const src = value.source;
|
|
202
|
+
const flags = value.flags;
|
|
203
|
+
Object.defineProperty(result, key, {
|
|
204
|
+
get() {
|
|
205
|
+
return new RegExp(src, flags);
|
|
206
|
+
},
|
|
207
|
+
enumerable: true,
|
|
208
|
+
configurable: true
|
|
209
|
+
});
|
|
210
|
+
} else {
|
|
211
|
+
locales.set(key, value);
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
localeRegistry.set(result, locales);
|
|
215
|
+
return result;
|
|
216
|
+
}
|
|
217
|
+
var localeRegistry = /* @__PURE__ */ new WeakMap();
|
|
218
|
+
var PROTECTED_PATTERNS = createPatterns({
|
|
219
|
+
email: /[a-zA-Z0-9._-]+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9_-]+/g,
|
|
220
|
+
url: /https?:\/\/[^\s]+/g,
|
|
221
|
+
unixPath: /\/[a-zA-Z0-9._\-/]+\.[a-zA-Z0-9]+/g,
|
|
222
|
+
windowsPath: /[A-Za-z]:\\(?:[^\\/:*?"<>|\r\n]+\\)*[^\\/:*?"<>|\r\n]*/g,
|
|
223
|
+
xmlTag: /<\/?[a-zA-Z][^>]*>/g,
|
|
224
|
+
inlineCode: /`[^`\n]+`/g,
|
|
225
|
+
blockCode: /```[\s\S]*?```/g,
|
|
226
|
+
uuid: /\b[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}\b/gi,
|
|
227
|
+
hash: /\b(?=[0-9a-f]*[a-f])[0-9a-f]{7,40}\b/gi,
|
|
228
|
+
ipv4: /\b(?:\d{1,3}\.){3}\d{1,3}\b/g,
|
|
229
|
+
ipv6: /\b(?:[0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}\b/g,
|
|
230
|
+
mac: /\b(?:[0-9a-fA-F]{2}:){5}[0-9a-fA-F]{2}\b/g,
|
|
231
|
+
version: /\bv?\d+\.\d+\.\d+(?:[-+][A-Za-z0-9.-]+)?\b/g,
|
|
232
|
+
selector: /[.#][A-Za-z_][\w-]*/g,
|
|
233
|
+
cliOption: /--?[a-zA-Z][\w-]*/g,
|
|
234
|
+
hashNumber: /(?<!\w)#[0-9]+\b/g,
|
|
235
|
+
isbn: /\b(?:97[89][- ]?)?(?:\d[- ]?){9}[\dX]\b/g,
|
|
236
|
+
issn: /\b\d{4}-\d{3}[\dX]\b/g,
|
|
237
|
+
doi: /\b10\.\d{4,9}\/[-._;()/:A-Za-z0-9]+\b/g,
|
|
238
|
+
orcid: /\b\d{4}-\d{4}-\d{4}-\d{3}[\dX]\b/g,
|
|
239
|
+
protect: /\[##\((.+)\)##\]/g,
|
|
240
|
+
["ru"]: {}
|
|
241
|
+
});
|
|
242
|
+
function protect(text, locale) {
|
|
243
|
+
const key = locale ? ALIAS.resolve(locale) ?? locale : void 0;
|
|
244
|
+
const captured = [];
|
|
245
|
+
const withNodeMarkers = text.replace(NODE_MARKER_REGEX, (match) => {
|
|
246
|
+
captured.push(match);
|
|
247
|
+
return PROTECTION_MARKER;
|
|
248
|
+
});
|
|
249
|
+
const withPatterns = withNodeMarkers.replace(PROTECTED_PATTERNS.combined(key), (match) => {
|
|
250
|
+
captured.push(match);
|
|
251
|
+
return PROTECTION_MARKER;
|
|
252
|
+
});
|
|
253
|
+
return [withPatterns, captured];
|
|
254
|
+
}
|
|
255
|
+
function unprotect(text, captured) {
|
|
256
|
+
const queue = [...captured];
|
|
257
|
+
return text.replace(PROTECTION_MARKER_REGEX, () => queue.shift() ?? "").replace(PROTECTED_PATTERNS.protect, "$1");
|
|
258
|
+
}
|
|
259
|
+
function joinNodes(nodes) {
|
|
260
|
+
return nodes.map((n) => n.value).join(NODE_MARKER);
|
|
261
|
+
}
|
|
262
|
+
function splitNodes(text, nodes) {
|
|
263
|
+
const segments = text.split(NODE_MARKER);
|
|
264
|
+
nodes.forEach((n, i) => {
|
|
265
|
+
n.value = segments[i] ?? n.value;
|
|
266
|
+
});
|
|
267
|
+
}
|
|
268
|
+
//# sourceMappingURL=index.cjs.map
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
import type { PatternData, PatternSet } from './types';
|
|
2
|
+
export type * from './types';
|
|
3
|
+
/**
|
|
4
|
+
* Private Use Area (PUA) marker sequence used to identify AST nodes.
|
|
5
|
+
*
|
|
6
|
+
* This marker is embedded into text during preprocessing stages
|
|
7
|
+
* to preserve structural boundaries through transformations.
|
|
8
|
+
*
|
|
9
|
+
* Must remain unique and never appear in natural text.
|
|
10
|
+
*/
|
|
11
|
+
export declare const NODE_MARKER = "\uE000\uEDFD\uF43E";
|
|
12
|
+
/**
|
|
13
|
+
* Private Use Area (PUA) marker sequence used for protected regions.
|
|
14
|
+
*
|
|
15
|
+
* Used to temporarily wrap substrings that must not be modified
|
|
16
|
+
* during typography or text-processing pipelines.
|
|
17
|
+
*
|
|
18
|
+
* Typically applied before regex-based transformations.
|
|
19
|
+
*/
|
|
20
|
+
export declare const PROTECTION_MARKER = "\uE001\uEDF1\uF111";
|
|
21
|
+
/**
|
|
22
|
+
* RegExp matcher for NODE_MARKER sequences.
|
|
23
|
+
*
|
|
24
|
+
* Used to locate and remove node markers during cleanup phase
|
|
25
|
+
* of text processing pipeline.
|
|
26
|
+
*/
|
|
27
|
+
export declare const NODE_MARKER_REGEX: RegExp;
|
|
28
|
+
/**
|
|
29
|
+
* RegExp matcher for PROTECTION_MARKER sequences.
|
|
30
|
+
*
|
|
31
|
+
* Used to detect protected regions in processed text
|
|
32
|
+
* and restore original content where necessary.
|
|
33
|
+
*/
|
|
34
|
+
export declare const PROTECTION_MARKER_REGEX: RegExp;
|
|
35
|
+
/**
|
|
36
|
+
* Creates a pattern registry from a map of named RegExp patterns.
|
|
37
|
+
*
|
|
38
|
+
* Each pattern is stored as a getter that returns a new `RegExp` instance
|
|
39
|
+
* on every access, ensuring `lastIndex` is always reset to `0` regardless
|
|
40
|
+
* of how the pattern was previously used.
|
|
41
|
+
*
|
|
42
|
+
* Prototype methods (`values`, `Symbol.iterator`) are attached via
|
|
43
|
+
* `Object.create` and do not appear as enumerable keys on the result.
|
|
44
|
+
*
|
|
45
|
+
* @template T — Shape of the source pattern map
|
|
46
|
+
* @param patterns — Raw map of named RegExp patterns
|
|
47
|
+
* @returns A pattern registry with per-access instantiation and prototype utilities
|
|
48
|
+
*
|
|
49
|
+
* @example
|
|
50
|
+
* const PATTERNS = createPatterns({
|
|
51
|
+
* email: /[a-zA-Z0-9._-]+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9_-]+/g,
|
|
52
|
+
* url: /https?:\/\/[^\s]+/g,
|
|
53
|
+
* });
|
|
54
|
+
*
|
|
55
|
+
* PATTERNS.email.test('user@example.com'); // always starts from lastIndex = 0
|
|
56
|
+
* [...PATTERNS]; // [RegExp, RegExp]
|
|
57
|
+
* PATTERNS.values; // [RegExp, RegExp]
|
|
58
|
+
*/
|
|
59
|
+
export declare function createPatterns<T extends PatternData>(patterns: T): PatternSet<T>;
|
|
60
|
+
/**
|
|
61
|
+
* Registry of protected regex patterns used in text preprocessing.
|
|
62
|
+
*
|
|
63
|
+
* These patterns are temporarily excluded from typography transformations
|
|
64
|
+
* by wrapping matches with protection markers.
|
|
65
|
+
*
|
|
66
|
+
* Includes:
|
|
67
|
+
* - URLs and emails
|
|
68
|
+
* - filesystem paths (Unix / Windows)
|
|
69
|
+
* - code blocks and inline code
|
|
70
|
+
* - identifiers (UUID, hashes, ORCID, DOI)
|
|
71
|
+
* - network addresses (IPv4, IPv6, MAC)
|
|
72
|
+
* - programming-related syntax (selectors, CLI flags, versions)
|
|
73
|
+
* - publishing identifiers (ISBN, ISSN)
|
|
74
|
+
*
|
|
75
|
+
* This system ensures that structured technical content
|
|
76
|
+
* is not corrupted by typographic transformations.
|
|
77
|
+
*/
|
|
78
|
+
export declare const PROTECTED_PATTERNS: PatternSet<{
|
|
79
|
+
email: RegExp;
|
|
80
|
+
url: RegExp;
|
|
81
|
+
unixPath: RegExp;
|
|
82
|
+
windowsPath: RegExp;
|
|
83
|
+
xmlTag: RegExp;
|
|
84
|
+
inlineCode: RegExp;
|
|
85
|
+
blockCode: RegExp;
|
|
86
|
+
uuid: RegExp;
|
|
87
|
+
hash: RegExp;
|
|
88
|
+
ipv4: RegExp;
|
|
89
|
+
ipv6: RegExp;
|
|
90
|
+
mac: RegExp;
|
|
91
|
+
version: RegExp;
|
|
92
|
+
selector: RegExp;
|
|
93
|
+
cliOption: RegExp;
|
|
94
|
+
hashNumber: RegExp;
|
|
95
|
+
isbn: RegExp;
|
|
96
|
+
issn: RegExp;
|
|
97
|
+
doi: RegExp;
|
|
98
|
+
orcid: RegExp;
|
|
99
|
+
protect: RegExp;
|
|
100
|
+
ru: {};
|
|
101
|
+
}>;
|
|
102
|
+
/**
|
|
103
|
+
* Wraps NODE_MARKER sequences and PROTECTED_PATTERNS matches
|
|
104
|
+
* with PROTECTION_MARKER, storing originals for later restoration.
|
|
105
|
+
*
|
|
106
|
+
* @returns Tuple of [protected text, captured matches]
|
|
107
|
+
*/
|
|
108
|
+
export declare function protect(text: string, locale?: string): [string, string[]];
|
|
109
|
+
/**
|
|
110
|
+
* Restores original strings previously captured by `protect`.
|
|
111
|
+
*/
|
|
112
|
+
export declare function unprotect(text: string, captured: string[]): string;
|
|
113
|
+
/**
|
|
114
|
+
* Joins an array of objects with a `value: string` field
|
|
115
|
+
* into a single string using NODE_MARKER as separator.
|
|
116
|
+
*
|
|
117
|
+
* Intended for combining sibling text nodes before rule application,
|
|
118
|
+
* so that rules can operate across node boundaries when needed.
|
|
119
|
+
*/
|
|
120
|
+
export declare function joinNodes<T extends {
|
|
121
|
+
value: string;
|
|
122
|
+
}>(nodes: T[]): string;
|
|
123
|
+
/**
|
|
124
|
+
* Splits a transformed string by NODE_MARKER and writes
|
|
125
|
+
* segments back into the corresponding nodes.
|
|
126
|
+
*
|
|
127
|
+
* If segment count doesn't match nodes count (e.g. a rule
|
|
128
|
+
* consumed a marker), original values are preserved as fallback.
|
|
129
|
+
*/
|
|
130
|
+
export declare function splitNodes<T extends {
|
|
131
|
+
value: string;
|
|
132
|
+
}>(text: string, nodes: T[]): void;
|
|
133
|
+
//# sourceMappingURL=index.d.ts.map
|