@nkardaz/typography-rules 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +911 -0
- package/dist/api/blacklist.d.ts +72 -0
- package/dist/api/htmlNodes.d.ts +30 -0
- package/dist/api/index.d.ts +6 -0
- package/dist/api/newRule.d.ts +51 -0
- package/dist/api/registerRule.d.ts +27 -0
- package/dist/api/rulesInit.d.ts +49 -0
- package/dist/functions/chemNotation.d.ts +10 -0
- package/dist/functions/clearSpaces.d.ts +16 -0
- package/dist/functions/index.cjs +514 -0
- package/dist/functions/index.d.ts +8 -0
- package/dist/functions/index.mjs +491 -0
- package/dist/functions/rubyText.d.ts +11 -0
- package/dist/functions/runt.d.ts +3 -0
- package/dist/functions/smartNumberGrouping.d.ts +25 -0
- package/dist/functions/smartQuotes.d.ts +29 -0
- package/dist/functions/wrapWithTag.d.ts +42 -0
- package/dist/glyphs/index.cjs +737 -0
- package/dist/glyphs/index.d.ts +53 -0
- package/dist/glyphs/index.mjs +714 -0
- package/dist/glyphs/proto.d.ts +11 -0
- package/dist/glyphs/registry.d.ts +728 -0
- package/dist/glyphs/types.d.ts +151 -0
- package/dist/helpers/index.cjs +268 -0
- package/dist/helpers/index.d.ts +133 -0
- package/dist/helpers/index.mjs +245 -0
- package/dist/helpers/types.d.ts +71 -0
- package/dist/index.cjs +985 -0
- package/dist/index.d.ts +5 -0
- package/dist/index.mjs +977 -0
- package/dist/style/index.d.ts +2 -0
- package/dist/style/main.css +16 -0
- package/dist/types.d.ts +223 -0
- package/dist/typography/aliases.d.ts +129 -0
- package/dist/typography/expressions/common.d.ts +29 -0
- package/dist/typography/expressions/en.d.ts +25 -0
- package/dist/typography/expressions/ru.d.ts +29 -0
- package/dist/typography/markup/common.d.ts +17 -0
- package/dist/typography/markup/en.d.ts +3 -0
- package/dist/typography/markup/index.d.ts +4 -0
- package/dist/typography/markup/ru.d.ts +3 -0
- package/dist/typography/sets/ang.d.ts +3 -0
- package/dist/typography/sets/common.d.ts +17 -0
- package/dist/typography/sets/en.d.ts +14 -0
- package/dist/typography/sets/index.d.ts +5 -0
- package/dist/typography/sets/ru.d.ts +16 -0
- package/dist/typography/store.d.ts +63 -0
- package/package.json +92 -0
package/README.md
ADDED
|
@@ -0,0 +1,911 @@
|
|
|
1
|
+
# @nkardaz/typography-rules
|
|
2
|
+
|
|
3
|
+
A modular, locale-aware typography rules engine for transforming plain text into
|
|
4
|
+
typographically correct output. Ships with a glyph registry, smart text
|
|
5
|
+
functions, and a composable rule pipeline.
|
|
6
|
+
|
|
7
|
+
Used as a rules provider for typography plugins such as
|
|
8
|
+
[@nkardaz/typography-core](https://github.com/DemerNkardaz/typography-core) /
|
|
9
|
+
[@nkardaz/remark-typography](https://github.com/DemerNkardaz/remark-typography).
|
|
10
|
+
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
## Installation
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
npm i @nkardaz/typography-rules
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
> **Requires Node.js ≥ 24.0.0**
|
|
20
|
+
|
|
21
|
+
---
|
|
22
|
+
|
|
23
|
+
## Package Exports
|
|
24
|
+
|
|
25
|
+
| Export path | Description |
|
|
26
|
+
| ----------------------------------- | ------------------------------------------------------------------------- |
|
|
27
|
+
| `@nkardaz/typography-rules` | Main entry — rules, store, types, functions |
|
|
28
|
+
| `@nkardaz/typography-rules/glyphs` | Glyph registries (DASHES, SPACES, PUNCTUATION, …) |
|
|
29
|
+
| `@nkardaz/typography-rules/helpers` | Text pipeline helpers (protect/unprotect, node markers, pattern registry) |
|
|
30
|
+
| `@nkardaz/typography-rules/functions` | Composable text-processing functions |
|
|
31
|
+
|
|
32
|
+
---
|
|
33
|
+
|
|
34
|
+
## Quick Start
|
|
35
|
+
|
|
36
|
+
### Using default rules
|
|
37
|
+
|
|
38
|
+
```typescript
|
|
39
|
+
import { initTypographyRules, getWeightedRules } from '@nkardaz/typography-rules';
|
|
40
|
+
|
|
41
|
+
// Register all built-in rule groups (common, ru, en, …)
|
|
42
|
+
initTypographyRules();
|
|
43
|
+
|
|
44
|
+
// Register all built-in rules for markup, e.g. [^text] → <sup>text</sup>
|
|
45
|
+
initMarkupRules();
|
|
46
|
+
|
|
47
|
+
// Or apply only a specific locale group
|
|
48
|
+
initTypographyRules('ru');
|
|
49
|
+
|
|
50
|
+
// Retrieve the merged, weight-sorted pipeline for a locale
|
|
51
|
+
const rules = getWeightedRules('ru'); // common + ru rules, sorted by weight
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
### Defining custom rules
|
|
55
|
+
|
|
56
|
+
```typescript
|
|
57
|
+
import { newRule, registerRule } from '@nkardaz/typography-rules';
|
|
58
|
+
|
|
59
|
+
// Replace rule — static string substitution
|
|
60
|
+
registerRule('en', newRule('/english/copyright', /\(c\)/gi, '©'));
|
|
61
|
+
|
|
62
|
+
// Transform rule — dynamic replacement per match
|
|
63
|
+
registerRule(
|
|
64
|
+
'en',
|
|
65
|
+
newRule('/english/bracket-numbers', /\d+/g, (match) => `[${match[0]}]`)
|
|
66
|
+
);
|
|
67
|
+
|
|
68
|
+
// Function rule — full custom processing function
|
|
69
|
+
import { smartQuotes } from '@nkardaz/typography-rules/functions';
|
|
70
|
+
|
|
71
|
+
// Danish quotes: »Jeg husker, at hun sagde ›det her er vigtigt‹ i går.«
|
|
72
|
+
registerRule(
|
|
73
|
+
'da',
|
|
74
|
+
newRule('/danish/typography/quotes', smartQuotes, [
|
|
75
|
+
{ outer: ['»', '«'], inner: ['›', '‹'] },
|
|
76
|
+
])
|
|
77
|
+
);
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
### Registering multiple rules at once
|
|
81
|
+
|
|
82
|
+
```typescript
|
|
83
|
+
import { newRule, registerRule } from '@nkardaz/typography-rules';
|
|
84
|
+
import { DASHES } from '@nkardaz/typography-rules/glyphs';
|
|
85
|
+
|
|
86
|
+
registerRule(
|
|
87
|
+
'en',
|
|
88
|
+
newRule('/english/em-dash', /--/g, DASHES.em),
|
|
89
|
+
newRule('/english/registered', /\(r\)/gi, '®'),
|
|
90
|
+
newRule('/english/trademark', /\(tm\)/gi, '™')
|
|
91
|
+
);
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
---
|
|
95
|
+
|
|
96
|
+
## Core API
|
|
97
|
+
|
|
98
|
+
### `newRule(label, rule, second?, weight?)`
|
|
99
|
+
|
|
100
|
+
Creates a typed typography rule object. Supports three overloads:
|
|
101
|
+
|
|
102
|
+
```typescript
|
|
103
|
+
// 1. Replace rule
|
|
104
|
+
newRule('/my/rule/label', /--/g, '—');
|
|
105
|
+
|
|
106
|
+
// 2. Transform rule
|
|
107
|
+
newRule('/my/rule/label', /\d+/g, (match: RegExpExecArray) => `[${match[0]}]`);
|
|
108
|
+
|
|
109
|
+
// 3. Function rule
|
|
110
|
+
newRule('/my/rule/label', myFunction, ['arg1', 'arg2']);
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
| Parameter | Type | Description |
|
|
114
|
+
| --------- | ---------------------------------- | ------------------------------------------------------------------------------------------------------------------------ |
|
|
115
|
+
| `label` | `string` | Unique rule identifier path, e.g. '/en/math/fractions'. Used by the blacklist system to enable/disable rules selectively |
|
|
116
|
+
| `rule` | `RegExp \| RuleFunction` | Pattern or processing function |
|
|
117
|
+
| `second` | `string \| transform fn \| args[]` | Replacement, transformer, or arguments |
|
|
118
|
+
| `weight` | `number` | Execution priority — lower values run first (default: `0`) |
|
|
119
|
+
|
|
120
|
+
---
|
|
121
|
+
|
|
122
|
+
### `registerRule(locale, rules[])`
|
|
123
|
+
|
|
124
|
+
Registers one or more rules for a locale. Automatically invalidates the weighted
|
|
125
|
+
rule cache for that locale and `'common'`.
|
|
126
|
+
|
|
127
|
+
```typescript
|
|
128
|
+
registerRule('common', newRule('/common/space/cleanup', /\s+/g, ' '));
|
|
129
|
+
registerRule('de', newRule('/deutsch/em-dash', /--/g, '—'), newRule(/"/g, '„'));
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
---
|
|
133
|
+
|
|
134
|
+
### `rulesBase(locale, base, label?, excludes?, ...rules)`
|
|
135
|
+
|
|
136
|
+
Registers rules for a locale, inheriting from an existing base locale. Useful
|
|
137
|
+
for defining locale variants that share most rules with a parent.
|
|
138
|
+
|
|
139
|
+
```typescript
|
|
140
|
+
rulesBase(
|
|
141
|
+
'fr-CA', // target locale
|
|
142
|
+
'fr', // inherit from French
|
|
143
|
+
{ expression: /^\/french\//, replacement: '/french-ca/' }, // rename labels
|
|
144
|
+
['/french/quotes/guillemets'], // exclude specific rules
|
|
145
|
+
newRule('/french-ca/extra', /…/g, '...') // add custom rules on top
|
|
146
|
+
);
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
| Parameter | Type | Description |
|
|
150
|
+
| ---------- | ----------------- | -------------------------------------------------------- |
|
|
151
|
+
| `locale` | `string` | Target locale to register rules for |
|
|
152
|
+
| `base` | `string` | Source locale to inherit rules from |
|
|
153
|
+
| `label` | `LabelTransform?` | Optional `{ expression, replacement }` to rewrite labels |
|
|
154
|
+
| `excludes` | `string[]?` | Rule labels from the base to skip |
|
|
155
|
+
| `...rules` | `Rule[]` | Additional rules appended after the inherited ones |
|
|
156
|
+
|
|
157
|
+
---
|
|
158
|
+
|
|
159
|
+
### `initTypographyRules(from?)`
|
|
160
|
+
|
|
161
|
+
Populates the global rule registry with the built-in default ruleset.
|
|
162
|
+
|
|
163
|
+
```typescript
|
|
164
|
+
initTypographyRules(); // All locales
|
|
165
|
+
initTypographyRules('en'); // English rules only
|
|
166
|
+
initTypographyRules('ru'); // Russian rules only
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
---
|
|
170
|
+
|
|
171
|
+
### `initMarkupRules(from?)`
|
|
172
|
+
|
|
173
|
+
Populates the global rule registry with built-in markup rules (superscript,
|
|
174
|
+
subscript, chemical notation, ruby text).
|
|
175
|
+
|
|
176
|
+
```typescript
|
|
177
|
+
initMarkupRules(); // All markup rule groups
|
|
178
|
+
initMarkupRules('common'); // Common markup rules only
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
---
|
|
182
|
+
|
|
183
|
+
### `getWeightedRules(locale)`
|
|
184
|
+
|
|
185
|
+
Returns a merged, weight-sorted rule pipeline for the given locale: `common`
|
|
186
|
+
rules + locale-specific rules, sorted ascending by `weight`.
|
|
187
|
+
|
|
188
|
+
```typescript
|
|
189
|
+
const pipeline = getWeightedRules('en'); // Rule[]
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
---
|
|
193
|
+
|
|
194
|
+
### `resetTypographyRules()`
|
|
195
|
+
|
|
196
|
+
Clears all registered rules from the global registry and cache.
|
|
197
|
+
|
|
198
|
+
---
|
|
199
|
+
|
|
200
|
+
### `rulesHas(locale)` / `rulesCount(locale)`
|
|
201
|
+
|
|
202
|
+
Utility functions for inspecting the rule registry:
|
|
203
|
+
|
|
204
|
+
```typescript
|
|
205
|
+
rulesHas('en'); // boolean
|
|
206
|
+
rulesCount('en'); // number
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
---
|
|
210
|
+
|
|
211
|
+
### Rule Blacklist
|
|
212
|
+
|
|
213
|
+
A trie-based system for selectively disabling rules by their label path without
|
|
214
|
+
removing them from the registry. Supports hierarchical matching — disabling a
|
|
215
|
+
path prefix disables all rules nested under it.
|
|
216
|
+
|
|
217
|
+
```typescript
|
|
218
|
+
import {
|
|
219
|
+
disableRule,
|
|
220
|
+
enableRule,
|
|
221
|
+
toggleRule,
|
|
222
|
+
isRuleDisabled,
|
|
223
|
+
isGloballyDisabled,
|
|
224
|
+
clearBlacklist,
|
|
225
|
+
} from '@nkardaz/typography-rules';
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
#### `disableRule(rule)`
|
|
229
|
+
|
|
230
|
+
Disables a rule or an entire rule subtree by path prefix. The special value
|
|
231
|
+
`'*'` disables all rules globally.
|
|
232
|
+
|
|
233
|
+
```typescript
|
|
234
|
+
disableRule('/common/math/negative-number'); // disable one rule
|
|
235
|
+
disableRule('/english/ligatures'); // disable all ligature rules
|
|
236
|
+
disableRule('*'); // disable everything
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
#### `enableRule(rule)`
|
|
240
|
+
|
|
241
|
+
Re-enables a previously disabled rule. Clears the global flag if `'*'` is
|
|
242
|
+
passed.
|
|
243
|
+
|
|
244
|
+
```typescript
|
|
245
|
+
enableRule('/english/ligatures/fi'); // re-enable a single rule
|
|
246
|
+
enableRule('*'); // lift global disable
|
|
247
|
+
```
|
|
248
|
+
|
|
249
|
+
#### `toggleRule(rule)`
|
|
250
|
+
|
|
251
|
+
Flips the disabled state of a rule — disables if enabled, enables if disabled.
|
|
252
|
+
|
|
253
|
+
```typescript
|
|
254
|
+
toggleRule('/common/typography/runt');
|
|
255
|
+
```
|
|
256
|
+
|
|
257
|
+
#### `isRuleDisabled(rule)`
|
|
258
|
+
|
|
259
|
+
Returns `true` if the rule is disabled either directly, via a parent prefix, or
|
|
260
|
+
globally.
|
|
261
|
+
|
|
262
|
+
```typescript
|
|
263
|
+
isRuleDisabled('/common/math/negative-number'); // boolean
|
|
264
|
+
```
|
|
265
|
+
|
|
266
|
+
#### `isGloballyDisabled()`
|
|
267
|
+
|
|
268
|
+
Returns `true` if all rules have been globally disabled via `disableRule('*')`.
|
|
269
|
+
|
|
270
|
+
```typescript
|
|
271
|
+
isGloballyDisabled(); // boolean
|
|
272
|
+
```
|
|
273
|
+
|
|
274
|
+
#### `clearBlacklist()`
|
|
275
|
+
|
|
276
|
+
Resets the entire blacklist — removes all disabled paths and clears the global
|
|
277
|
+
flag.
|
|
278
|
+
|
|
279
|
+
```typescript
|
|
280
|
+
clearBlacklist();
|
|
281
|
+
```
|
|
282
|
+
|
|
283
|
+
#### Label path conventions
|
|
284
|
+
|
|
285
|
+
Built-in rule labels follow a consistent hierarchy:
|
|
286
|
+
|
|
287
|
+
| Segment | Example | Meaning |
|
|
288
|
+
| -------------- | ----------------------------------------- | ---------------------- |
|
|
289
|
+
| `common` | `/common/math/…` | Applies to all locales |
|
|
290
|
+
| `english` | `/english/ligatures/…` | English-only rules |
|
|
291
|
+
| `russian` | `/russian/typography/…` | Russian-only rules |
|
|
292
|
+
| Second segment | `/common/space/…`, `/common/typography/…` | Rule category |
|
|
293
|
+
| Third segment | `/common/math/negative-number` | Specific case |
|
|
294
|
+
| Fourth segment | `/common/punctuation/dashes/em-dash` | Specific rule |
|
|
295
|
+
|
|
296
|
+
---
|
|
297
|
+
|
|
298
|
+
## Built-in Functions
|
|
299
|
+
|
|
300
|
+
These are composable text-processing functions that can be used directly or
|
|
301
|
+
wrapped with `newRule`.
|
|
302
|
+
|
|
303
|
+
Must be imported with `@nkardaz/typography-rules/functions`
|
|
304
|
+
|
|
305
|
+
### `smartQuotes(text, settings?)`
|
|
306
|
+
|
|
307
|
+
Converts straight quotes (`"` and `'`) into typographically correct
|
|
308
|
+
opening/closing quote pairs, with support for nested quotation levels and
|
|
309
|
+
apostrophe detection.
|
|
310
|
+
|
|
311
|
+
```typescript
|
|
312
|
+
import { smartQuotes } from '@nkardaz/typography-rules/functions';
|
|
313
|
+
|
|
314
|
+
smartQuotes('"Hello"'); // “Hello” (en defaults)
|
|
315
|
+
smartQuotes('"He said \'hi\'"'); // “He said ‘hi’”
|
|
316
|
+
```
|
|
317
|
+
|
|
318
|
+
**Settings:**
|
|
319
|
+
|
|
320
|
+
| Option | Type | Default | Description |
|
|
321
|
+
| ------- | ------------------ | ------------------ | -------------------------------- |
|
|
322
|
+
| `outer` | `[string, string]` | `[“, ”]` (English) | Opening and closing outer quotes |
|
|
323
|
+
| `inner` | `[string, string]` | `[‘, ’]` (English) | Opening and closing inner quotes |
|
|
324
|
+
|
|
325
|
+
---
|
|
326
|
+
|
|
327
|
+
### `smartNumberGrouping(text, settings?)`
|
|
328
|
+
|
|
329
|
+
Inserts symbols (e.g. `,`) as thousands separators into large numeric sequences
|
|
330
|
+
based on locale (uses `Intl.NumberFormat`).
|
|
331
|
+
|
|
332
|
+
```typescript
|
|
333
|
+
import { smartNumberGrouping } from '@nkardaz/typography-rules/functions';
|
|
334
|
+
|
|
335
|
+
smartNumberGrouping('Price: 1234567');
|
|
336
|
+
// “Price: 1,234,567”
|
|
337
|
+
|
|
338
|
+
smartNumberGrouping('Value: 1234567.891011', { locale: 'ru-RU' });
|
|
339
|
+
// “Value: 1 234 567,891011”
|
|
340
|
+
```
|
|
341
|
+
|
|
342
|
+
**Settings:**
|
|
343
|
+
|
|
344
|
+
| Option | Type | Default | Description |
|
|
345
|
+
| ----------- | -------- | --------- | ------------------------------------------------------ |
|
|
346
|
+
| `locale` | `string` | `'en-US'` | BCP 47 locale tag used by `Intl.NumberFormat` |
|
|
347
|
+
| `minLength` | `number` | `5` | Minimum integer digit count before grouping is applied |
|
|
348
|
+
|
|
349
|
+
---
|
|
350
|
+
|
|
351
|
+
### `clearSpaces(text, settings?)`
|
|
352
|
+
|
|
353
|
+
Collapses runs of two or more identical space characters into a single one. By
|
|
354
|
+
default targets non-breaking, hair, and thin spaces.
|
|
355
|
+
|
|
356
|
+
```typescript
|
|
357
|
+
import { clearSpaces } from '@nkardaz/typography-rules/functions';
|
|
358
|
+
|
|
359
|
+
clearSpaces('a b c'); // 'a b c'
|
|
360
|
+
```
|
|
361
|
+
|
|
362
|
+
**Settings:**
|
|
363
|
+
|
|
364
|
+
| Option | Type | Default | Description |
|
|
365
|
+
| -------- | ---------------------- | ----------------------- | ------------------------------- |
|
|
366
|
+
| `spaces` | `Spaces[] \| string[]` | `[noBreak, hair, thin]` | Space characters to deduplicate |
|
|
367
|
+
|
|
368
|
+
---
|
|
369
|
+
|
|
370
|
+
### `runt(text, settings?)`
|
|
371
|
+
|
|
372
|
+
Prevents typographic runts — single short words isolated at the end of a
|
|
373
|
+
paragraph — by replacing the preceding space with a non-breaking space. For
|
|
374
|
+
longer last words, also protects the penultimate word with
|
|
375
|
+
`white-space: nowrap`.
|
|
376
|
+
|
|
377
|
+
**Settings:**
|
|
378
|
+
|
|
379
|
+
| Option | Type | Default | Description |
|
|
380
|
+
| --------------- | ------------------ | ---------------- | -------------------------------------------------------------------- |
|
|
381
|
+
| `threshold` | `number` | `10` | Maximum character length of the last word to trigger runt protection |
|
|
382
|
+
| `space` | `Spaces \| string` | `SPACES.noBreak` | Replacement space character |
|
|
383
|
+
| `minLineLength` | `number` | `150` | Minimum text length required to apply runt protection at all |
|
|
384
|
+
|
|
385
|
+
---
|
|
386
|
+
|
|
387
|
+
### `wrapWithTag(text, settings?, tagSettings?)`
|
|
388
|
+
|
|
389
|
+
Wraps matched bracket-marker syntax into an HTML element node. Returns `Node[]`.
|
|
390
|
+
|
|
391
|
+
```typescript
|
|
392
|
+
import { wrapWithTag } from '@nkardaz/typography-rules/functions';
|
|
393
|
+
|
|
394
|
+
wrapWithTag('H[^2]O');
|
|
395
|
+
// → [text('H'), sup([text('2')]), text('O')]
|
|
396
|
+
|
|
397
|
+
wrapWithTag('H[*2]O', { marker: '*', tag: 'sup' });
|
|
398
|
+
// → [text('H'), sup([text('2')]), text('O')]
|
|
399
|
+
```
|
|
400
|
+
|
|
401
|
+
**Settings:**
|
|
402
|
+
|
|
403
|
+
| Option | Type | Default | Description |
|
|
404
|
+
| --------- | ------------------ | ------------ | ---------------------------------------- |
|
|
405
|
+
| `marker` | `string` | `'^'` | Character after opening bracket |
|
|
406
|
+
| `tag` | `string` | `'sup'` | HTML tag name for the wrapping element |
|
|
407
|
+
| `wrapper` | `[string, string]` | `['[', ']']` | Bracket pair delimiting the marked range |
|
|
408
|
+
|
|
409
|
+
**Tag settings:**
|
|
410
|
+
|
|
411
|
+
| Option | Type | Description |
|
|
412
|
+
| ----------- | ------------------------ | -------------------------- |
|
|
413
|
+
| `className` | `string` | CSS class on the element |
|
|
414
|
+
| `attrs` | `Record<string, string>` | Additional HTML attributes |
|
|
415
|
+
|
|
416
|
+
---
|
|
417
|
+
|
|
418
|
+
### `rubyText(text, settings?, tagSettings?)`
|
|
419
|
+
|
|
420
|
+
Parses ruby annotation syntax into a `<ruby>` node tree with `<rb>` / `<rt>`
|
|
421
|
+
pairs. Returns `Node[]`.
|
|
422
|
+
|
|
423
|
+
```typescript
|
|
424
|
+
import { rubyText } from '@nkardaz/typography-rules/functions';
|
|
425
|
+
|
|
426
|
+
rubyText('[:平安時代][:へいあんじだい]');
|
|
427
|
+
// → ruby → [ rb('平安時代'), rt('へいあんじだい') ]
|
|
428
|
+
|
|
429
|
+
// Multiple base|furigana pairs separated by |
|
|
430
|
+
rubyText('[:東|京][:とう|きょう]');
|
|
431
|
+
// → ruby → [ rb('東'), rt('ひがし'), rb('京'), rt('きょう') ]
|
|
432
|
+
```
|
|
433
|
+
|
|
434
|
+
**Settings:**
|
|
435
|
+
|
|
436
|
+
| Option | Type | Default | Description |
|
|
437
|
+
| --------- | ------------------ | ------------ | -------------------------------------------------------- |
|
|
438
|
+
| `marker` | `string` | `':'` | Character after opening bracket associated with the ruby |
|
|
439
|
+
| `wrapper` | `[string, string]` | `['[', ']']` | Bracket pair delimiting the ruby group |
|
|
440
|
+
|
|
441
|
+
**Tag settings:** same as `wrapWithTag`.
|
|
442
|
+
|
|
443
|
+
---
|
|
444
|
+
|
|
445
|
+
### `chemNotation(text, settings?, tagSettings?)`
|
|
446
|
+
|
|
447
|
+
Parses chemical notation syntax into MathML `<mmultiscripts>` node trees for
|
|
448
|
+
correct rendering of nuclear/chemical scripts on both sides of a base symbol.
|
|
449
|
+
Returns `Node[]`.
|
|
450
|
+
|
|
451
|
+
```typescript
|
|
452
|
+
import { chemNotation } from '@nkardaz/typography-rules/functions';
|
|
453
|
+
|
|
454
|
+
chemNotation('Вода [%H(_2)-O]');
|
|
455
|
+
chemNotation('[%(^14)(_6)C]');
|
|
456
|
+
```
|
|
457
|
+
|
|
458
|
+
**Syntax inside `[%…]`:**
|
|
459
|
+
|
|
460
|
+
| Notation | Meaning |
|
|
461
|
+
| ------------- | -------------------------------------------------- |
|
|
462
|
+
| `(_val)` | Subscript (lower index) |
|
|
463
|
+
| `(^val)` | Superscript (upper index) |
|
|
464
|
+
| Before base | Left-side prescripts (e.g. `(^14)C`) |
|
|
465
|
+
| After base | Right-side scripts (e.g. `C(_6)`) |
|
|
466
|
+
| `-` separator | Joins multiple parts in one block (e.g. `H(_2)-O`) |
|
|
467
|
+
|
|
468
|
+
**Settings:**
|
|
469
|
+
|
|
470
|
+
| Option | Type | Default | Description |
|
|
471
|
+
| --------- | ------------------ | ------------ | --------------------------------------------------- |
|
|
472
|
+
| `marker` | `string` | `'%'` | Character after opening bracket |
|
|
473
|
+
| `wrapper` | `[string, string]` | `['[', ']']` | Bracket pair delimiting the chemical notation block |
|
|
474
|
+
|
|
475
|
+
**Tag settings:** same as `wrapWithTag`.
|
|
476
|
+
|
|
477
|
+
**Examples:**
|
|
478
|
+
|
|
479
|
+
```
|
|
480
|
+
[%H(_2)-O] → H₂O
|
|
481
|
+
[%NH(_4)-ClO(_4)] → NH₄ClO₄
|
|
482
|
+
[%(^239)U] → ²³⁹U
|
|
483
|
+
[%(^14)(_6)C] → ¹⁴₆C
|
|
484
|
+
[%(^2)(_1)H(^7)(_5)] → ²₁H⁷₅ (left: 2,1 — right: 7,5)
|
|
485
|
+
```
|
|
486
|
+
|
|
487
|
+
> **Note on font styling:** MathML ignores `font-family` and `font-style`. Use a
|
|
488
|
+
> math-capable OpenType font (e.g. STIX Two Math, Latin Modern Math) via
|
|
489
|
+
> `@font-face` on the `math` element. To suppress automatic italicisation of
|
|
490
|
+
> `<mi>`, pass `attrs: { mathvariant: 'normal' }` via `tagSettings`.
|
|
491
|
+
|
|
492
|
+
---
|
|
493
|
+
|
|
494
|
+
## Glyphs
|
|
495
|
+
|
|
496
|
+
The `@nkardaz/typography-rules/glyphs` export provides typed, prototype-enhanced
|
|
497
|
+
glyph registries. All registries support shared utility methods.
|
|
498
|
+
|
|
499
|
+
### Available registries
|
|
500
|
+
|
|
501
|
+
| Export | Description |
|
|
502
|
+
| -------------- | ------------------------------------------------------------------------------- |
|
|
503
|
+
| `DASHES` | Em dash, en dash, soft hyphen, figure dash, non-breaking hyphen, etc. |
|
|
504
|
+
| `SPACES` | All Unicode space variants — non-breaking, thin, hair, narrow, zero-width, etc. |
|
|
505
|
+
| `PUNCTUATION` | Multi-locale quote characters, ellipsis, interrobang, and punctuation marks |
|
|
506
|
+
| `MATHS` | Minus sign (`−`), fraction slash (`⁄`) |
|
|
507
|
+
| `LIGATURES` | Typographic ligatures: fi, fl, ffi, ffl, Æ, Œ, etc. |
|
|
508
|
+
| `CHARACTERS` | Dagger, double dagger, numero (`№`), section sign (`§`), etc. |
|
|
509
|
+
| `TEMPERATURES` | Temperature unit symbols: ℃, ℉, K and text forms |
|
|
510
|
+
| `WALLET` | Currency symbols and ISO 4217 codes |
|
|
511
|
+
| `DIGITS` | ASCII digits and Unicode Roman numeral characters |
|
|
512
|
+
| `RANGES` | Character range strings for use in RegExp character classes |
|
|
513
|
+
|
|
514
|
+
### GlyphSet utility methods
|
|
515
|
+
|
|
516
|
+
All glyph sets expose the following methods:
|
|
517
|
+
|
|
518
|
+
```typescript
|
|
519
|
+
DASHES.join(); // '—|–|⸺|…' — joined string of all values
|
|
520
|
+
DASHES.join(''); // '—–⸺…'
|
|
521
|
+
DASHES.hasKey('em'); // true
|
|
522
|
+
DASHES.hasValue('—'); // true
|
|
523
|
+
DASHES.findKey('—'); // 'em'
|
|
524
|
+
DASHES.find('em', 'en'); // ['—', '–']
|
|
525
|
+
DASHES.insert({ myDash: '\u2E1A' }); // mutably extend the set
|
|
526
|
+
```
|
|
527
|
+
|
|
528
|
+
### PUNCTUATION locale access
|
|
529
|
+
|
|
530
|
+
```typescript
|
|
531
|
+
import { PUNCTUATION } from '@nkardaz/typography-rules/glyphs';
|
|
532
|
+
|
|
533
|
+
PUNCTUATION.get('ru', 'leftSided'); // common + ru leftSided merged
|
|
534
|
+
PUNCTUATION.get('en', 'rightSided'); // common + en rightSided merged
|
|
535
|
+
PUNCTUATION.getList(); // ['common', 'ru', 'en', 'fr', 'is']
|
|
536
|
+
PUNCTUATION.hasKey('de'); // false
|
|
537
|
+
```
|
|
538
|
+
|
|
539
|
+
**Supported locales in PUNCTUATION:**
|
|
540
|
+
|
|
541
|
+
| Locale | Outer quotes | Inner quotes |
|
|
542
|
+
| ------ | --------------- | ----------------- |
|
|
543
|
+
| `ru` | «…» | „…“ |
|
|
544
|
+
| `en` | “…” | ‘…’ |
|
|
545
|
+
| `fr` | «…» | ‹…› |
|
|
546
|
+
| `is` | „…“ | ‚…‘ |
|
|
547
|
+
|
|
548
|
+
---
|
|
549
|
+
|
|
550
|
+
## Aliases
|
|
551
|
+
|
|
552
|
+
The `@nkardaz/typography-rules` export provides an `ALIAS` utility for mapping various locale identifiers to a single root key. All keys and values are automatically normalized to lowercase, and lookups are case-insensitive.
|
|
553
|
+
|
|
554
|
+
### `createAlias(map)`
|
|
555
|
+
|
|
556
|
+
Creates a normalized alias map with utility methods.
|
|
557
|
+
|
|
558
|
+
```typescript
|
|
559
|
+
import { createAlias } from '@nkardaz/typography-rules';
|
|
560
|
+
|
|
561
|
+
const ALIAS = createAlias({
|
|
562
|
+
en: ['en-US', 'English'],
|
|
563
|
+
ru: ['ru-RU', 'Russian'],
|
|
564
|
+
});
|
|
565
|
+
```
|
|
566
|
+
|
|
567
|
+
| Method | Description |
|
|
568
|
+
| :--- | :--- |
|
|
569
|
+
| `has(alias)` | Checks if an alias exists as a root key or an alternative name. |
|
|
570
|
+
| `resolve(alias)` | Resolves an alias to its root key. |
|
|
571
|
+
| `push(root, ...aliases)` | Adds new alternative names to an existing or new root key. |
|
|
572
|
+
| `normalize(...alias)` | Helper to lowercase one or more strings. |
|
|
573
|
+
|
|
574
|
+
### Global `ALIAS`
|
|
575
|
+
|
|
576
|
+
A pre-configured instance used internally for supported locales:
|
|
577
|
+
|
|
578
|
+
```typescript
|
|
579
|
+
import { ALIAS } from '@nkardaz/typography-rules';
|
|
580
|
+
|
|
581
|
+
ALIAS.ru; // ['ru-ru', 'russian', 'русский']
|
|
582
|
+
ALIAS.resolve('Russian'); // 'ru'
|
|
583
|
+
ALIAS.has('Old English'); // true
|
|
584
|
+
```
|
|
585
|
+
|
|
586
|
+
---
|
|
587
|
+
|
|
588
|
+
## Helpers
|
|
589
|
+
|
|
590
|
+
The `@nkardaz/typography-rules/helpers` export provides utilities for safe text
|
|
591
|
+
pipeline construction.
|
|
592
|
+
|
|
593
|
+
### Protection system
|
|
594
|
+
|
|
595
|
+
Temporarily wraps structured content (URLs, emails, code, identifiers) in
|
|
596
|
+
protection markers before typography transformations, then restores originals
|
|
597
|
+
afterward.
|
|
598
|
+
|
|
599
|
+
```typescript
|
|
600
|
+
import { protect, unprotect } from '@nkardaz/typography-rules/helpers';
|
|
601
|
+
|
|
602
|
+
const [protectedText, captured] = protect(text);
|
|
603
|
+
// ... apply typography rules to `protectedText` ...
|
|
604
|
+
const processed = applyRules(protectedText); // your pipeline here
|
|
605
|
+
const result = unprotect(processed, captured);
|
|
606
|
+
```
|
|
607
|
+
|
|
608
|
+
**Protected patterns** (not modified by typography rules):
|
|
609
|
+
|
|
610
|
+
- Email addresses, URLs
|
|
611
|
+
- Unix and Windows file paths
|
|
612
|
+
- XML/HTML tags
|
|
613
|
+
- Inline and block code (backtick syntax)
|
|
614
|
+
- UUIDs, git hashes
|
|
615
|
+
- IPv4, IPv6, MAC addresses
|
|
616
|
+
- Version strings (`v1.2.3`, etc.)
|
|
617
|
+
- CSS selectors, CLI flags (`--option`)
|
|
618
|
+
- ISBN, ISSN, DOI, ORCID identifiers
|
|
619
|
+
- `[##(...)##]` — Special protected block for protect any text inside `()`.
|
|
620
|
+
|
|
621
|
+
### Pattern registry
|
|
622
|
+
|
|
623
|
+
```typescript
|
|
624
|
+
import { createPatterns, PROTECTED_PATTERNS } from '@nkardaz/typography-rules/helpers';
|
|
625
|
+
|
|
626
|
+
const PATTERNS = createPatterns({
|
|
627
|
+
email: /[a-zA-Z0-9._-]+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9_-]+/g,
|
|
628
|
+
url: /https?:\/\/[^\s]+/g,
|
|
629
|
+
});
|
|
630
|
+
|
|
631
|
+
PATTERNS.email; // fresh RegExp instance (lastIndex = 0) on every access
|
|
632
|
+
[...PATTERNS]; // [RegExp, RegExp]
|
|
633
|
+
PATTERNS.combined(); // single alternation RegExp
|
|
634
|
+
PATTERNS.insert({ ... }); // extend with new patterns
|
|
635
|
+
```
|
|
636
|
+
|
|
637
|
+
### Node markers
|
|
638
|
+
|
|
639
|
+
Used to join/split text nodes across boundaries during multi-node processing:
|
|
640
|
+
|
|
641
|
+
```typescript
|
|
642
|
+
import {
|
|
643
|
+
joinNodes,
|
|
644
|
+
splitNodes,
|
|
645
|
+
NODE_MARKER,
|
|
646
|
+
} from '@nkardaz/typography-rules/helpers';
|
|
647
|
+
|
|
648
|
+
const joined = joinNodes(nodes); // 'text1\uE000\uEDFD\uF43Etext2'
|
|
649
|
+
// ... apply rules to `joined` ...
|
|
650
|
+
splitNodes(processed, nodes); // writes segments back to nodes
|
|
651
|
+
```
|
|
652
|
+
|
|
653
|
+
---
|
|
654
|
+
|
|
655
|
+
## Default Rules Reference
|
|
656
|
+
|
|
657
|
+
### Common (applied to all locales)
|
|
658
|
+
|
|
659
|
+
#### Expressions
|
|
660
|
+
|
|
661
|
+
Shared named expression patterns used across common rules
|
|
662
|
+
(`typography/expressions/common.ts`):
|
|
663
|
+
|
|
664
|
+
| Name | Pattern description |
|
|
665
|
+
| -------------------------------- | ------------------------------------------------------------------------- |
|
|
666
|
+
| `plusMinus` | `+` followed by `-` or `−` |
|
|
667
|
+
| `minusPlus` | `-` or `−` followed by `+` |
|
|
668
|
+
| `sectionNumeral` | Section sign `§` followed by numeral(s) |
|
|
669
|
+
| `percentValue` | Number followed by `%`, `‰`, or `‱` |
|
|
670
|
+
| `numeralsRange` | Two digit sequences separated by `-` (e.g. `1-2`) |
|
|
671
|
+
| `ellipsisRange` | Number, then `–` or `−`, then number (e.g. `−2–3`) |
|
|
672
|
+
| `multipleEllipsis` | Two or more consecutive `…` |
|
|
673
|
+
| `walletSymbolBeforeValue` | Currency symbol followed by digits (e.g. `$100`) |
|
|
674
|
+
| `walletSymbolAfterValue` | Digits followed by currency symbol (e.g. `100$`) |
|
|
675
|
+
| `walletISOBeforeValue` | ISO currency code followed by digits (e.g. `USD 100`) |
|
|
676
|
+
| `walletISOAfterValue` | Digits followed by ISO currency code (e.g. `100 USD`) |
|
|
677
|
+
| `expressiveAposiopesis` | Expressive punctuation (`!`, `?`, `‽`, etc.) followed by dots or ellipsis |
|
|
678
|
+
| `backwardsExpressiveAposiopesis` | Dots or ellipsis followed by expressive punctuation |
|
|
679
|
+
| `temperature` | Digit followed by a temperature unit symbol (℃, ℉, K, etc.) |
|
|
680
|
+
|
|
681
|
+
#### Rules
|
|
682
|
+
|
|
683
|
+
| Label | Pattern / Trigger | Replacement | Description |
|
|
684
|
+
| -------------------------------------------- | ------------------------------------ | --------------------- | --------------------------------------------------------------------------- |
|
|
685
|
+
| `/common/space/cleanup/multiple` | Multiple identical special spaces | Single space | Collapses duplicate non-breaking, hair, and thin spaces via `clearSpaces` |
|
|
686
|
+
| `/common/space/cleanup/trim` | Leading / trailing whitespace | _(removed)_ | Trims surrounding whitespace from the processed text |
|
|
687
|
+
| `/common/number/negative` | Hyphen-minus before digit (`-123`) | `−123` | Replaces ASCII hyphen-minus with Unicode minus sign `−` |
|
|
688
|
+
| `/common/number/range/en-dash` | Digit range with hyphen (`1-2`) | `1–2` | Converts hyphen between two digit sequences into an en dash |
|
|
689
|
+
| `/common/number/range/ellipsis-on-negative` | Range with en dash or minus | `−2…3` | Converts numeric ranges using en dash or minus into ellipsis notation |
|
|
690
|
+
| `/common/number/dimension` | `NxN` or `NхN` (latin/cyrillic x) | `N×N` | Replaces dimension separator with multiplication sign × |
|
|
691
|
+
| `/common/number/multiply` | `N*N` | `N×N` | Replaces asterisk between numbers with multiplication sign × |
|
|
692
|
+
| `/common/number/fraction` | `N/N` | `N⁄N` 16⁄9 1000⁄7 | Replaces slash with fraction slash `⁄` |
|
|
693
|
+
| `/common/symbol/copyright` | `(c)` or `(с)` (latin/cyrillic) | `©` | Copyright symbol substitution |
|
|
694
|
+
| `/common/symbol/trademark` | `(tm)` or `(тм)` | `™` | Trademark symbol substitution |
|
|
695
|
+
| `/common/symbol/registered` | `(r)` | `®` | Registered trademark symbol substitution |
|
|
696
|
+
| `/common/symbol/section` | `(s)` | `§` | Section sign substitution |
|
|
697
|
+
| `/common/symbol/math/plus-minus` | `+-` or `+−` | `±` | Plus-minus sign substitution |
|
|
698
|
+
| `/common/symbol/math/minus-plus` | `-+` or `−+` | `∓` | Minus-plus sign substitution |
|
|
699
|
+
| `/common/punctuation/dashes/em-dash` | Double hyphen `--` | `—` | Replaces double hyphen-minus with a typographic em dash |
|
|
700
|
+
| `/common/punctuation/dots/overload` | Four or more consecutive dots `....` | `...` | Normalizes over-long dot sequences before ellipsis conversion |
|
|
701
|
+
| `/common/punctuation/dots/ellipsis` | Three dots `...` | `…` | Converts ASCII triple-dot into the Unicode ellipsis character `…` |
|
|
702
|
+
| `/common/punctuation/dots/ellipsis-overload` | Two or more consecutive `…` | `…` | Deduplicates repeated ellipsis characters |
|
|
703
|
+
| `/common/punctuation/apostrophe` | Straight apostrophe `'` | `’` | Replaces with Unicode right single quotation mark `’`, weight `200` |
|
|
704
|
+
| `/common/symbol/section/value` | `§` followed by numeral(s) | `§ <numeral>` | Adds narrow non-breaking space between section sign and numeral, weight `1` |
|
|
705
|
+
| `/common/typography/runt` | Short last word(s) in a paragraph | Preceding space → ` ` | Prevents typographic runts. Weight: `Infinity` — always runs last |
|
|
706
|
+
|
|
707
|
+
#### Markup Rules
|
|
708
|
+
|
|
709
|
+
| Label | Pattern / Trigger | Replacement | Description |
|
|
710
|
+
| --------------------------- | ------------------------------------- | ------------------ | ------------------------------------------------------------------------- |
|
|
711
|
+
| `/common/wraps/chem` | `[%…]` marker syntax | `<math>` node tree | Parses chemical notation into MathML `<mmultiscripts>` via `chemNotation` |
|
|
712
|
+
| `/common/wraps/ruby` (`?:`) | `[?:base\|…][?:annotation\|…]` syntax | `<ruby>` node tree | Alternate ruby style (`--alternate`), via `rubyText` |
|
|
713
|
+
| `/common/wraps/ruby` (`!:`) | `[!:base\|…][!:annotation\|…]` syntax | `<ruby>` node tree | Ruby annotation below the base (`--under`), via `rubyText` |
|
|
714
|
+
| `/common/wraps/ruby` (`:`) | `[:base\|…][:annotation\|…]` syntax | `<ruby>` node tree | Ruby annotation above the base (`--over`), via `rubyText` |
|
|
715
|
+
| `/common/wraps/sup` | `[^…]` marker syntax | `<sup>` node | Wraps bracket-marker content in a superscript element via `wrapWithTag` |
|
|
716
|
+
| `/common/wraps/sub` | `[_…]` marker syntax | `<sub>` node | Wraps bracket-marker content in a subscript element via `wrapWithTag` |
|
|
717
|
+
|
|
718
|
+
> **Note on markup rule order:** rules are registered with weight `Infinity` and
|
|
719
|
+
> applied in the order shown — `chem` first, ruby variants from least-specific
|
|
720
|
+
> to most-specific marker (to avoid conflicts), then `sup`/`sub` last.
|
|
721
|
+
|
|
722
|
+
---
|
|
723
|
+
|
|
724
|
+
### Russian (`ru`)
|
|
725
|
+
|
|
726
|
+
#### Expressions
|
|
727
|
+
|
|
728
|
+
Russian-specific named expression patterns (`typography/expressions/ru.ts`),
|
|
729
|
+
extending common expressions:
|
|
730
|
+
|
|
731
|
+
| Name | Pattern description |
|
|
732
|
+
| --------------------------- | ----------------------------------------------------------------------------- |
|
|
733
|
+
| `numeroNumeral` | Numero sign `№` followed by numeral(s) |
|
|
734
|
+
| `invalidPunctuationSpacing` | Space after left punctuation or before right punctuation (locale-aware) |
|
|
735
|
+
| `dialogEmDash` | Em dash `—` at the start of a line (dialogue opener) |
|
|
736
|
+
| `attributionEmDash` | Left punctuation, then `—`, then a word (attribution pattern) |
|
|
737
|
+
| `subjectPredicateEmDash` | Word `—` word (subject–predicate dash pattern) |
|
|
738
|
+
| `siUnitMul`, `siUnitDiv` | SI unit multiplication / division expressions (Cyrillic prefixes) |
|
|
739
|
+
| `siUnitBase` | Digit followed by a Russian SI unit |
|
|
740
|
+
| `siUnitPowAfterNum` | Digit, SI unit, then exponent digit |
|
|
741
|
+
| `siUnitPow` | SI unit followed by exponent digit (not preceded by digit) |
|
|
742
|
+
| `date` | Numeral followed by a Russian date abbreviation (в, г, мес, нед, дн, д, etc.) |
|
|
743
|
+
|
|
744
|
+
#### Rules
|
|
745
|
+
|
|
746
|
+
| Label | Pattern / Trigger | Replacement | Description |
|
|
747
|
+
| ------------------------------------------------------- | ----------------------------------------------------- | ------------------------------------------ | ----------------------------------------------------------------------------------------------------------------------- |
|
|
748
|
+
| `/russian/currency/wallet/symbol-flip` | Currency symbol before value (`$100`) | `100$` | Moves currency symbol after the value (Russian convention) |
|
|
749
|
+
| `/russian/currency/wallet/iso-flip` | ISO code before value (`USD 100`) | `100 USD` | Moves ISO code after the value |
|
|
750
|
+
| `/russian/currency/wallet/symbol-value` | Value then currency symbol (`100$`) | `100 $` | Adds non-breaking space between value and currency symbol |
|
|
751
|
+
| `/russian/currency/wallet/iso-value` | Value then ISO code (`100 USD`) | `100 USD` | Adds non-breaking space between value and ISO code |
|
|
752
|
+
| `/russian/currency/rub-to-symbol` | `рублей`, `руб.`, `р.` after digits | `N ₽` | Replaces Russian rouble word forms with `₽` symbol |
|
|
753
|
+
| `/russian/currency/eur-to-symbol` | `евро` after digits | `N €` | Replaces euro word with `€` symbol |
|
|
754
|
+
| `/russian/currency/usd-to-symbol` | `долларов`, `дол.` after digits | `N $` | Replaces dollar word forms with `$` symbol |
|
|
755
|
+
| `/russian/number/groups` | Large numbers (5+ digits) | `1 234 567` | Digit grouping via `smartNumberGrouping` with `locale: 'ru-RU'` |
|
|
756
|
+
| `/russian/number/normalize/dot->comma` | `N.N` decimal dot | `N,N` | Converts decimal dot to comma (Russian numeric standard) |
|
|
757
|
+
| `/russian/metric/si-unit/base` | Digit followed by SI unit | `N Unit` | Narrow non-breaking space between value and unit |
|
|
758
|
+
| `/russian/metric/si-unit/n*n-n` | SI unit multiplication (`м*с`) | `м·с` `Н·м/с` | Replaces `*` between SI units with middle dot `·` |
|
|
759
|
+
| `/russian/metric/si-unit/n-n*n` | SI unit division | `м·с` `Дж/Кл·с` | Same as above for division form |
|
|
760
|
+
| `/russian/metric/si-unit/pow-after-value` | `N Unit<exp>` | `N Unit<sup>exp</sup>` | Superscript exponent with narrow non-breaking space, weight `-1` |
|
|
761
|
+
| `/russian/metric/si-unit/pow` | `Unit<exp>` (no preceding number) | `Unit<sup>exp</sup>`<br/>`м³/(кг·с²)` | Superscript exponent, weight `-1` |
|
|
762
|
+
| `/russian/scientific/temperature/value` | `N ℃` / `N ℉` etc. | `N ℃` | Non-breaking space between temperature value and unit<br/>`°C` `°F` `K` `°D` `°L` `°N` `°W` `°Da` `°H` `°R` `°Ré` `°Rø` |
|
|
763
|
+
| `/russian/symbol/percent-like/value` | `N%` / `N‰` / `N‱` | `N<NBSP>%` | Non-breaking space between value and percent-like symbol |
|
|
764
|
+
| `/russian/symbol/numero/value` | `№` followed by numeral(s) | `№ <numeral>` | Narrow non-breaking space between numero sign and numeral |
|
|
765
|
+
| `/russian/number/division` | `N/N` | `N÷N` | Replaces slash between numbers with obelus `÷` |
|
|
766
|
+
| `/russian/number/division-times` | `N/*N` | `N⋇N` | Division-times operator substitution |
|
|
767
|
+
| `/russian/punctuation/dashes/dialog-em-dash` | `—` at line start | `—<NBSP>` | Non-breaking space after dialogue em dash |
|
|
768
|
+
| `/russian/punctuation/dashes/attribution-em-dash` | Right punctuation, `<SP>—<SP>`, word | Right punctuation, `<NBSP>—<NBSP>`, word | Non-breaking spaces around attribution dash |
|
|
769
|
+
| `/russian/punctuation/dashes/subject-predicate-em-dash` | word, `<SP>—<SP>`, word | word, `<NBSP>—<SP>`, word | Non-breaking spaces around subject–predicate dash |
|
|
770
|
+
| `/russian/punctuation/quotes` | Straight quotes `"…"` | `«…»` / `„…“` | Russian typographic quotes via `smartQuotes`, weight `100` |
|
|
771
|
+
| `/russian/punctuation/dot-after-quote` | `.»` | `».` | Moves period outside closing guillemet, weight `1000` |
|
|
772
|
+
| `/russian/punctuation/dot-after-expression` | Expressive punctuation near dots<br/>`!...` `!…` etc. | Normalized form<br/>`!..` `?..` `‽..` | Normalizes aposiopesis punctuation patterns |
|
|
773
|
+
| `/russian/punctuation/invalid-spacing` | Space after `«` or before `»` | _(removed)_ | Removes invalid spaces around guillemets, weight `1000` |
|
|
774
|
+
| `/russian/compositions/initials` | Б. Ю. Александров etc. | Б. Ю. Александров<br/>Thin-space separated | Replaces regular spaces between initials and name with thin spaces ` ` |
|
|
775
|
+
| `/russian/text/conjunctions` | Short particles: `бы`, `же`, `ли` etc. | `<NBSP>particle` | Prevents particles from being orphaned at line start |
|
|
776
|
+
| `/russian/text/conjunctions` | Prepositions: `за`, `из`, `на`, `не` etc. | `preposition<NBSP>` | Prevents prepositions from being left alone at line end |
|
|
777
|
+
| `/russian/text/adress` | `мкр-н`, `дом`, `д.`, `ул.` etc. | With `<NBSP>` | Prevents address abbreviations from splitting across lines |
|
|
778
|
+
| `/russian/text/common-shorts` | `коп.`, `см.`, `рис.` etc. | With `<NBSP>` | Prevents common abbreviations from splitting |
|
|
779
|
+
| `/russian/text/organizations` | `АО`, `ООО`, `ПАО`, `НИИ` etc. | `ООО<NBSP>` | Non-breaking space after legal entity abbreviations |
|
|
780
|
+
| `/russian/text/dates` | `N в.` / `N г.` / `N мес.` etc. | `N<NBSP>в.` | Non-breaking space between numeral and date abbreviation |
|
|
781
|
+
| `/russian/text/millions` | `N тыс.` / `N млн.` / `N млрд.` | `N<NBSP>тыс.` | Non-breaking space before large-number abbreviations |
|
|
782
|
+
| `/russian/text/no-break-hyphen` | `кто-то`, `кое-что`, `ну-ка`, `всё-таки` etc. | With non-breaking hyphen | Replaces hyphens in fixed compound words/particles with non-breaking hyphen |
|
|
783
|
+
| `/russian/text/orphan-letters` | Single Cyrillic letter followed by space | `letter<NBSP>` | Prevents single-letter words from being orphaned at line end |
|
|
784
|
+
|
|
785
|
+
#### Markup **Rules**
|
|
786
|
+
|
|
787
|
+
_No locale-specific markup rules for `ru` currently._
|
|
788
|
+
|
|
789
|
+
---
|
|
790
|
+
|
|
791
|
+
### English (`en`)
|
|
792
|
+
|
|
793
|
+
#### Expressions
|
|
794
|
+
|
|
795
|
+
English-specific named expression patterns (`typography/expressions/en.ts`),
|
|
796
|
+
extending common expressions:
|
|
797
|
+
|
|
798
|
+
| Name | Pattern description |
|
|
799
|
+
| --------------------------- | -------------------------------------------------------------------- |
|
|
800
|
+
| `numberNumeral` | Number sign `#` followed by digits (e.g. `#42`) |
|
|
801
|
+
| `invalidPunctuationSpacing` | Space after left punctuation or before right punctuation (en-locale) |
|
|
802
|
+
| `siUnitMul`, `siUnitDiv` | SI unit multiplication / division expressions |
|
|
803
|
+
| `siUnitBase` | Digit followed by an SI unit |
|
|
804
|
+
| `siUnitPowAfterNum` | Digit, SI unit, then exponent digit |
|
|
805
|
+
| `siUnitPow` | SI unit followed by exponent digit (not preceded by digit) |
|
|
806
|
+
|
|
807
|
+
#### Rules
|
|
808
|
+
|
|
809
|
+
| Label | Pattern / Trigger | Replacement | Description |
|
|
810
|
+
| -------------------------------------------- | ------------------------------------- | ---------------------- | ---------------------------------------------------------------- |
|
|
811
|
+
| `/english/currency/wallet/symbol-flip` | Value then currency symbol (`100$`) | `$100` | Moves currency symbol before the value (English convention) |
|
|
812
|
+
| `/english/currency/wallet/iso-flip` | ISO code before value (`USD 100`) | `100 USD` | Moves ISO code after the value |
|
|
813
|
+
| `/english/currency/wallet/symbol-value` | Currency symbol before value (`$100`) | `$100` | Ensures no extra space between symbol and value |
|
|
814
|
+
| `/english/currency/wallet/iso-value` | Value then ISO code (`100 USD`) | `100 USD` | Adds non-breaking space between value and ISO code |
|
|
815
|
+
| `/english/number/groups` | Large numbers (5+ digits) | `1,234,567` | Digit grouping via `smartNumberGrouping` with `locale: 'en-US'` |
|
|
816
|
+
| `/english/metric/si-unit/base` | Digit followed by SI unit | `N Unit` | Narrow non-breaking space between value and unit |
|
|
817
|
+
| `/english/metric/si-unit/n*n-n` | SI unit multiplication (`m*s`) | `m·s` `N·m/s` | Replaces `*` between SI units with middle dot `·` |
|
|
818
|
+
| `/english/metric/si-unit/n-n*n` | SI unit division | `m·s` `J/C·s` | Same as above for division form |
|
|
819
|
+
| `/english/metric/si-unit/pow-after-value` | `N Unit<exp>` | `N Unit<sup>exp</sup>` | Superscript exponent with narrow non-breaking space, weight `-1` |
|
|
820
|
+
| `/english/metric/si-unit/pow` | `Unit<exp>` (no preceding number) | `Unit<sup>exp</sup>` | Superscript exponent, weight `-1` |
|
|
821
|
+
| `/english/scientific/temperature/value` | `N ℃` / `N ℉` etc. | `N℃` | Removes space between temperature value and unit |
|
|
822
|
+
| `/english/symbol/percent-like/value` | `N%` / `N‰` / `N‱` | `N%` | Normalizes space between value and percent-like symbol |
|
|
823
|
+
| `/english/symbol/hash/value` | `#` followed by digits (`#42`) | `#42` | Normalizes space between number sign and numeral |
|
|
824
|
+
| `/english/number/division` | `N/N` | `N÷N` | Replaces slash between numbers with obelus `÷` |
|
|
825
|
+
| `/english/number/division-times` | `N/*N` | Division-times form | Division-times operator substitution |
|
|
826
|
+
| `/english/punctuation/quotes` | Straight quotes `"…"` / `'…'` | `“…”` / `‘…’` | US typographic quotes via `smartQuotes`, weight `100` |
|
|
827
|
+
| `/english/punctuation/dot-before-expression` | Expressive punctuation near dots | Normalized form | Normalizes aposiopesis punctuation patterns |
|
|
828
|
+
| `/english/punctuation/invalid-spacing` | Space after `“` or before `”` etc. | _(removed)_ | Removes invalid spaces around punctuation, weight `1000` |
|
|
829
|
+
| `/english/ligatures/fi` | `fi` | `fi` | Typographic fi ligature |
|
|
830
|
+
| `/english/ligatures/fl` | `fl` | `fl` | Typographic fl ligature |
|
|
831
|
+
| `/english/ligatures/ffi` | `ffi` | `ffi` | Typographic ffi ligature |
|
|
832
|
+
| `/english/ligatures/ffl` | `ffl` | `ffl` | Typographic ffl ligature |
|
|
833
|
+
|
|
834
|
+
#### Markup Rules
|
|
835
|
+
|
|
836
|
+
_No locale-specific markup rules for `en` currently._
|
|
837
|
+
|
|
838
|
+
---
|
|
839
|
+
|
|
840
|
+
### Old English / Ænglisċ (`ang`)
|
|
841
|
+
|
|
842
|
+
#### Rules
|
|
843
|
+
|
|
844
|
+
| Label | Pattern / Trigger | Replacement | Description |
|
|
845
|
+
| ---------------------- | --------------------------- | ----------- | ---------------------------------------------------------------------- |
|
|
846
|
+
| `/ænglisċ/articles/þe` | `The` / `the` / `Þe` / `þe` | `Þͤ` / `þͤ` | Replaces modern “the” with Old English thorn + combining letter e (`ͤ`) |
|
|
847
|
+
|
|
848
|
+
---
|
|
849
|
+
|
|
850
|
+
## Rule Weights
|
|
851
|
+
|
|
852
|
+
Rules are applied in ascending weight order. Rules with equal weight preserve
|
|
853
|
+
their registration order (stable sort).
|
|
854
|
+
|
|
855
|
+
| Weight | Meaning |
|
|
856
|
+
| ------------- | ------------------------------------------------------------------- |
|
|
857
|
+
| `-Infinity` | Always first, which must run before all text transforms |
|
|
858
|
+
| `0` (default) | Standard priority |
|
|
859
|
+
| `< 0` | Applied before standard rules |
|
|
860
|
+
| `> 0` | Applied after standard rules |
|
|
861
|
+
| `100` | Early-stage — e.g. quote normalization before apostrophe processing |
|
|
862
|
+
| `200` | Late-stage — e.g. apostrophe normalization after quote processing |
|
|
863
|
+
| `Infinity` | Always last — e.g. `runt`, which must run after all text transforms |
|
|
864
|
+
|
|
865
|
+
---
|
|
866
|
+
|
|
867
|
+
### Node utilities
|
|
868
|
+
|
|
869
|
+
```typescript
|
|
870
|
+
import {
|
|
871
|
+
htmlNode,
|
|
872
|
+
renderNode,
|
|
873
|
+
renderNodes,
|
|
874
|
+
nodeToMdast,
|
|
875
|
+
} from '@nkardaz/typography-rules';
|
|
876
|
+
```
|
|
877
|
+
|
|
878
|
+
| Function | Signature | Description |
|
|
879
|
+
| ------------- | ------------------------------------------- | ------------------------------------------------------------------- |
|
|
880
|
+
| `htmlNode` | `(text, settings?) => Node[]` | Splits text into a mixed array of text and element nodes via RegExp |
|
|
881
|
+
| `renderNode` | `(node: Node) => string` | Serializes a single `Node` to an HTML string |
|
|
882
|
+
| `renderNodes` | `(nodes: Node[]) => string` | Serializes an array of `Node` to an HTML string |
|
|
883
|
+
| `nodeToMdast` | `(node: Node) => Text \| MdxJsxTextElement` | Converts an internal `Node` to an mdast-compatible AST node |
|
|
884
|
+
|
|
885
|
+
---
|
|
886
|
+
|
|
887
|
+
## TypeScript
|
|
888
|
+
|
|
889
|
+
The package is fully typed. Key exported types:
|
|
890
|
+
|
|
891
|
+
```typescript
|
|
892
|
+
import type {
|
|
893
|
+
Rule,
|
|
894
|
+
RegExpReplaceRule,
|
|
895
|
+
RegExpTransformRule,
|
|
896
|
+
FunctionRule,
|
|
897
|
+
RuleFunction,
|
|
898
|
+
Node,
|
|
899
|
+
TextNode,
|
|
900
|
+
ElementNode,
|
|
901
|
+
QuoteSettings,
|
|
902
|
+
NumberSpaceSettings,
|
|
903
|
+
ClearSpacesSettings,
|
|
904
|
+
RuntSettings,
|
|
905
|
+
HtmlNodeSettings,
|
|
906
|
+
WrapWithTagsSettings,
|
|
907
|
+
RubyTextSettings,
|
|
908
|
+
ChemNotationSettings,
|
|
909
|
+
TagSettings,
|
|
910
|
+
} from '@nkardaz/typography-rules';
|
|
911
|
+
```
|