punctilio 1.0.4 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +25 -10
- package/dist/constants.d.ts +13 -0
- package/dist/constants.d.ts.map +1 -1
- package/dist/constants.js +15 -1
- package/dist/constants.js.map +1 -1
- package/dist/dashes.d.ts +7 -51
- package/dist/dashes.d.ts.map +1 -1
- package/dist/dashes.js +66 -104
- package/dist/dashes.js.map +1 -1
- package/dist/index.d.ts +13 -38
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +25 -3
- package/dist/index.js.map +1 -1
- package/dist/quotes.d.ts +4 -31
- package/dist/quotes.d.ts.map +1 -1
- package/dist/quotes.js +4 -13
- package/dist/quotes.js.map +1 -1
- package/dist/symbols.d.ts +16 -158
- package/dist/symbols.d.ts.map +1 -1
- package/dist/symbols.js +55 -189
- package/dist/symbols.js.map +1 -1
- package/dist/utils.d.ts.map +1 -1
- package/dist/utils.js +3 -1
- package/dist/utils.js.map +1 -1
- package/package.json +4 -1
package/README.md
CHANGED
|
@@ -6,7 +6,7 @@ The best typography package for English.
|
|
|
6
6
|
import { transform } from 'punctilio'
|
|
7
7
|
|
|
8
8
|
transform('"It\'s a beautiful thing, the destruction of words..." -- 1984')
|
|
9
|
-
// → “It’s a beautiful thing, the destruction of words
|
|
9
|
+
// → “It’s a beautiful thing, the destruction of words…”—1984
|
|
10
10
|
```
|
|
11
11
|
|
|
12
12
|
[](https://github.com/alexander-turner/punctilio/actions/workflows/test.yml)
|
|
@@ -23,9 +23,9 @@ As far as I can tell, `punctilio` is the most reliable and feature-complete. I b
|
|
|
23
23
|
|
|
24
24
|
[^wrote]: While Claude is the number one contributor to this repository, that’s just because Claude has helped me port my existing code and add minor features. The core regular expressions (e.g. dashes, quotes, multiplication signs) are human-written. Those numerous commits don’t show in this repo’s history.
|
|
25
25
|
|
|
26
|
-
I tested `punctilio` 1.0.1 against [`smartypants`](https://www.npmjs.com/package/smartypants) 0.2.2, [`tipograph`](https://www.npmjs.com/package/tipograph) 0.7.4, [`smartquotes`](https://www.npmjs.com/package/smartquotes) 2.3.2, [`typograf`](https://www.npmjs.com/package/typograf) 7.6.0, and [`retext-smartypants`](https://www.npmjs.com/package/retext-smartypants) 6.2.0.[^python] These other packages have spotty feature coverage and inconsistent impact on text. For example, `smartypants` mishandles quotes after em dashes (though quite hard to see in GitHub
|
|
26
|
+
I tested `punctilio` 1.0.1 against [`smartypants`](https://www.npmjs.com/package/smartypants) 0.2.2, [`tipograph`](https://www.npmjs.com/package/tipograph) 0.7.4, [`smartquotes`](https://www.npmjs.com/package/smartquotes) 2.3.2, [`typograf`](https://www.npmjs.com/package/typograf) 7.6.0, and [`retext-smartypants`](https://www.npmjs.com/package/retext-smartypants) 6.2.0.[^python] These other packages have spotty feature coverage and inconsistent impact on text. For example, `smartypants` mishandles quotes after em dashes (though quite hard to see in GitHub’s font) and lacks multiplication sign support.
|
|
27
27
|
|
|
28
|
-
[^python]: The Python libraries I found were closely related to the JavaScript packages, so I don’t include Python
|
|
28
|
+
[^python]: The Python libraries I found were closely related to the JavaScript packages. I tested them and found similar scores, so I don’t include separate Python results.
|
|
29
29
|
|
|
30
30
|
| Input | `smartypants` | `punctilio` |
|
|
31
31
|
|:-----:|:-----------------:|:-------:|
|
|
@@ -64,7 +64,7 @@ My [`benchmark.mjs`](./benchmark.mjs) measures how well libraries handle a [wide
|
|
|
64
64
|
| Non-English quotes | „Hallo" (German) | ✗ | ✓ | ✗ | ✓ | ✗ |
|
|
65
65
|
| Non-breaking spaces | Chapter 1 | ✗ | ✗ | ✗ | ✓ | ✗ |
|
|
66
66
|
|
|
67
|
-
`typograf` uniquely inserts non-breaking spaces to prevent bad line breaks (e.g. before numbers, after colons). `punctilio`'s
|
|
67
|
+
`typograf` uniquely inserts non-breaking spaces to prevent bad line breaks (e.g. before numbers, after colons). I might add this to `punctilio` in the future. `punctilio`'s other missing feature is non-English quote support—feel free to make a pull request!
|
|
68
68
|
|
|
69
69
|
## Works with HTML DOMs via separation boundaries
|
|
70
70
|
|
|
@@ -82,6 +82,10 @@ transform(`"Wait${DEFAULT_SEPARATOR}"`)
|
|
|
82
82
|
|
|
83
83
|
Use via a DOM walker tracks which text node each segment came from, inserts separators between them, transforms the combined string, then splits on separators to update each node. Use the `separator` option if `U+E000` conflicts with your content. For an example of how to integrate this functionality, see [my website’s code](https://github.com/alexander-turner/TurnTrout.com/blob/main/quartz/plugins/transformers/formatting_improvement_html.ts).
|
|
84
84
|
|
|
85
|
+
### Not for raw Markdown
|
|
86
|
+
|
|
87
|
+
`punctilio` transforms plain text or separator-flattened HTML—not raw Markdown.
|
|
88
|
+
|
|
85
89
|
## Options
|
|
86
90
|
|
|
87
91
|
`punctilio` doesn’t enable all transformations by default. Fractions and degrees tend to match too aggressively (perfectly applying the degree transformation requires semantic meaning). Superscript letters and punctuation ligatures have spotty font support—on GitHub, this README’s font doesn’t even support the example superscript! Furthermore, `ligatures = true` can change the meaning of text by collapsing question and exclamation marks.
|
|
@@ -91,11 +95,22 @@ transform(text, {
|
|
|
91
95
|
punctuationStyle: 'american' | 'british' | 'none', // default: 'american'
|
|
92
96
|
dashStyle: 'american' | 'british' | 'none', // default: 'american'
|
|
93
97
|
|
|
94
|
-
symbols: true,
|
|
95
|
-
collapseSpaces: true,
|
|
96
|
-
fractions: false,
|
|
97
|
-
degrees: false,
|
|
98
|
-
superscript: false,
|
|
99
|
-
ligatures: false,
|
|
98
|
+
symbols: true, // math, legal, arrows
|
|
99
|
+
collapseSpaces: true, // normalize whitespace
|
|
100
|
+
fractions: false, // 1/2 → ½
|
|
101
|
+
degrees: false, // 20 C → 20 °C
|
|
102
|
+
superscript: false, // 1st → 1ˢᵗ
|
|
103
|
+
ligatures: false, // ??? → ⁇, ?! → ⁈, !? → ⁉, !!! → !
|
|
104
|
+
checkIdempotency: true, // verify transform(transform(x)) === transform(x)
|
|
100
105
|
})
|
|
101
106
|
```
|
|
107
|
+
|
|
108
|
+
The `'american'` style follows the [Chicago Manual of Style](https://www.chicagomanualofstyle.org/):
|
|
109
|
+
- **Punctuation**: Periods and commas go inside quotation marks (“Hello,” she said.)
|
|
110
|
+
- **Dashes**: Unspaced em-dashes between words (word—word)
|
|
111
|
+
|
|
112
|
+
The `'british'` style follows [Oxford style](https://www.ox.ac.uk/sites/files/oxford/Style%20Guide%20quick%20reference%20A-Z.pdf):
|
|
113
|
+
- **Punctuation**: Periods and commas go outside quotation marks (“Hello”, she said.)
|
|
114
|
+
- **Dashes**: Spaced en-dashes between words (word – word)
|
|
115
|
+
|
|
116
|
+
`punctilio` is idempotent by design: `transform(transform(text))` always equals `transform(text)`. If performance is critical, set `checkIdempotency: false` to skip the verification pass.
|
package/dist/constants.d.ts
CHANGED
|
@@ -59,6 +59,19 @@ export declare const UNICODE_SYMBOLS: {
|
|
|
59
59
|
readonly EXCLAMATION_QUESTION: "⁉";
|
|
60
60
|
readonly DOUBLE_EXCLAMATION: "‼";
|
|
61
61
|
};
|
|
62
|
+
/**
|
|
63
|
+
* Character class pattern for Latin letters including European accented characters.
|
|
64
|
+
* Use inside regex character classes: `[${LATIN_LETTERS}]`
|
|
65
|
+
*
|
|
66
|
+
* Includes:
|
|
67
|
+
* - Basic Latin: A-Z, a-z
|
|
68
|
+
* - Latin-1 Supplement: À-Ö, Ø-ö, ø-ÿ (excludes × and ÷)
|
|
69
|
+
* - Latin Extended-A: Ā-ſ (U+0100-017F)
|
|
70
|
+
* - Latin Extended-B: ƀ-ɏ (U+0180-024F)
|
|
71
|
+
*
|
|
72
|
+
* Examples of covered characters: é, ñ, ü, ø, ą, ł, ș
|
|
73
|
+
*/
|
|
74
|
+
export declare const LATIN_LETTERS = "A-Za-z\\u00C0-\\u00D6\\u00D8-\\u00F6\\u00F8-\\u00FF\\u0100-\\u017F\\u0180-\\u024F";
|
|
62
75
|
/**
|
|
63
76
|
* Default separator character for text spanning HTML elements.
|
|
64
77
|
* Uses Unicode Private Use Area character U+E000.
|
package/dist/constants.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"constants.d.ts","sourceRoot":"","sources":["../src/constants.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;
|
|
1
|
+
{"version":3,"file":"constants.d.ts","sourceRoot":"","sources":["../src/constants.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAIH;;GAEG;AACH,eAAO,MAAM,eAAe;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAkDlB,CAAA;AAEV;;;;;;;;;;;GAWG;AACH,eAAO,MAAM,aAAa,sFAAsF,CAAA;AAEhH;;;GAGG;AACH,eAAO,MAAM,iBAAiB,WAAW,CAAA;AACzC,eAAO,MAAM,yBAAyB,QAAwC,CAAA;AAE9E;;;;;;;;;;;;;;;GAeG;AACH,wBAAgB,iBAAiB,CAAC,gBAAgB,EAAE,MAAM,GAAG,MAAM,CAElE;AAED;;;;;;;;;;;;;;;GAeG;AACH,wBAAgB,eAAe,CAAC,gBAAgB,EAAE,MAAM,GAAG,MAAM,CAEhE"}
|
package/dist/constants.js
CHANGED
|
@@ -7,6 +7,7 @@
|
|
|
7
7
|
*
|
|
8
8
|
* @module constants
|
|
9
9
|
*/
|
|
10
|
+
import escapeStringRegexp from "escape-string-regexp";
|
|
10
11
|
/**
|
|
11
12
|
* Unicode symbols for typography transformations.
|
|
12
13
|
*/
|
|
@@ -61,12 +62,25 @@ export const UNICODE_SYMBOLS = {
|
|
|
61
62
|
EXCLAMATION_QUESTION: "\u2049", // ⁉
|
|
62
63
|
DOUBLE_EXCLAMATION: "\u203C", // ‼
|
|
63
64
|
};
|
|
65
|
+
/**
|
|
66
|
+
* Character class pattern for Latin letters including European accented characters.
|
|
67
|
+
* Use inside regex character classes: `[${LATIN_LETTERS}]`
|
|
68
|
+
*
|
|
69
|
+
* Includes:
|
|
70
|
+
* - Basic Latin: A-Z, a-z
|
|
71
|
+
* - Latin-1 Supplement: À-Ö, Ø-ö, ø-ÿ (excludes × and ÷)
|
|
72
|
+
* - Latin Extended-A: Ā-ſ (U+0100-017F)
|
|
73
|
+
* - Latin Extended-B: ƀ-ɏ (U+0180-024F)
|
|
74
|
+
*
|
|
75
|
+
* Examples of covered characters: é, ñ, ü, ø, ą, ł, ș
|
|
76
|
+
*/
|
|
77
|
+
export const LATIN_LETTERS = "A-Za-z\\u00C0-\\u00D6\\u00D8-\\u00F6\\u00F8-\\u00FF\\u0100-\\u017F\\u0180-\\u024F";
|
|
64
78
|
/**
|
|
65
79
|
* Default separator character for text spanning HTML elements.
|
|
66
80
|
* Uses Unicode Private Use Area character U+E000.
|
|
67
81
|
*/
|
|
68
82
|
export const DEFAULT_SEPARATOR = "\uE000";
|
|
69
|
-
export const ESCAPED_DEFAULT_SEPARATOR = DEFAULT_SEPARATOR
|
|
83
|
+
export const ESCAPED_DEFAULT_SEPARATOR = escapeStringRegexp(DEFAULT_SEPARATOR);
|
|
70
84
|
/**
|
|
71
85
|
* Creates a marker-aware word boundary pattern for the START of a match.
|
|
72
86
|
*
|
package/dist/constants.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"constants.js","sourceRoot":"","sources":["../src/constants.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH;;GAEG;AACH,MAAM,CAAC,MAAM,eAAe,GAAG;IAC7B,QAAQ,EAAE,QAAQ;IAClB,cAAc,EAAE,QAAQ;IACxB,SAAS,EAAE,QAAQ;IACnB,UAAU,EAAE,QAAQ;IACpB,SAAS,EAAE,QAAQ;IACnB,UAAU,EAAE,QAAQ;IACpB,SAAS,EAAE,QAAQ;IACnB,MAAM,EAAE,QAAQ;IAChB,WAAW,EAAE,QAAQ;IACrB,UAAU,EAAE,QAAQ;IACpB,gBAAgB,EAAE,QAAQ;IAC1B,WAAW,EAAE,QAAQ;IACrB,UAAU,EAAE,QAAQ;IACpB,aAAa,EAAE,QAAQ;IACvB,KAAK,EAAE,QAAQ;IACf,YAAY,EAAE,QAAQ;IACtB,YAAY,EAAE,QAAQ;IACtB,YAAY,EAAE,QAAQ;IACtB,YAAY,EAAE,QAAQ;IACtB,YAAY,EAAE,QAAQ;IACtB,YAAY,EAAE,QAAQ;IACtB,YAAY,EAAE,QAAQ;IACtB,YAAY,EAAE,QAAQ;IACtB,YAAY,EAAE,QAAQ;IACtB,YAAY,EAAE,QAAQ;IACtB,YAAY,EAAE,QAAQ;IACtB,YAAY,EAAE,QAAQ;IACtB,YAAY,EAAE,QAAQ;IACtB,YAAY,EAAE,QAAQ;IACtB,YAAY,EAAE,QAAQ;IACtB,YAAY,EAAE,QAAQ;IACtB,OAAO,EAAE,QAAQ;IACjB,OAAO,EAAE,QAAQ;IACjB,KAAK,EAAE,QAAQ;IACf,iBAAiB,EAAE,QAAQ;IAC3B,kBAAkB,EAAE,QAAQ;IAC5B,iBAAiB,EAAE,QAAQ;IAC3B,kBAAkB,EAAE,QAAQ;IAC5B,IAAI,EAAE,QAAQ;IACd,+BAA+B;IAC/B,cAAc,EAAE,cAAc,EAAE,KAAK;IACrC,cAAc,EAAE,cAAc,EAAE,KAAK;IACrC,cAAc,EAAE,cAAc,EAAE,KAAK;IACrC,cAAc,EAAE,cAAc,EAAE,KAAK;IACrC,wBAAwB;IACxB,eAAe,EAAE,QAAQ,EAAE,IAAI;IAC/B,oBAAoB,EAAE,QAAQ,EAAE,IAAI;IACpC,oBAAoB,EAAE,QAAQ,EAAE,IAAI;IACpC,kBAAkB,EAAE,QAAQ,EAAE,IAAI;CAC1B,CAAA;AAEV;;;GAGG;AACH,MAAM,CAAC,MAAM,iBAAiB,GAAG,QAAQ,CAAA;AACzC,MAAM,CAAC,MAAM,yBAAyB,GAAG,
|
|
1
|
+
{"version":3,"file":"constants.js","sourceRoot":"","sources":["../src/constants.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,kBAAkB,MAAM,sBAAsB,CAAA;AAErD;;GAEG;AACH,MAAM,CAAC,MAAM,eAAe,GAAG;IAC7B,QAAQ,EAAE,QAAQ;IAClB,cAAc,EAAE,QAAQ;IACxB,SAAS,EAAE,QAAQ;IACnB,UAAU,EAAE,QAAQ;IACpB,SAAS,EAAE,QAAQ;IACnB,UAAU,EAAE,QAAQ;IACpB,SAAS,EAAE,QAAQ;IACnB,MAAM,EAAE,QAAQ;IAChB,WAAW,EAAE,QAAQ;IACrB,UAAU,EAAE,QAAQ;IACpB,gBAAgB,EAAE,QAAQ;IAC1B,WAAW,EAAE,QAAQ;IACrB,UAAU,EAAE,QAAQ;IACpB,aAAa,EAAE,QAAQ;IACvB,KAAK,EAAE,QAAQ;IACf,YAAY,EAAE,QAAQ;IACtB,YAAY,EAAE,QAAQ;IACtB,YAAY,EAAE,QAAQ;IACtB,YAAY,EAAE,QAAQ;IACtB,YAAY,EAAE,QAAQ;IACtB,YAAY,EAAE,QAAQ;IACtB,YAAY,EAAE,QAAQ;IACtB,YAAY,EAAE,QAAQ;IACtB,YAAY,EAAE,QAAQ;IACtB,YAAY,EAAE,QAAQ;IACtB,YAAY,EAAE,QAAQ;IACtB,YAAY,EAAE,QAAQ;IACtB,YAAY,EAAE,QAAQ;IACtB,YAAY,EAAE,QAAQ;IACtB,YAAY,EAAE,QAAQ;IACtB,YAAY,EAAE,QAAQ;IACtB,OAAO,EAAE,QAAQ;IACjB,OAAO,EAAE,QAAQ;IACjB,KAAK,EAAE,QAAQ;IACf,iBAAiB,EAAE,QAAQ;IAC3B,kBAAkB,EAAE,QAAQ;IAC5B,iBAAiB,EAAE,QAAQ;IAC3B,kBAAkB,EAAE,QAAQ;IAC5B,IAAI,EAAE,QAAQ;IACd,+BAA+B;IAC/B,cAAc,EAAE,cAAc,EAAE,KAAK;IACrC,cAAc,EAAE,cAAc,EAAE,KAAK;IACrC,cAAc,EAAE,cAAc,EAAE,KAAK;IACrC,cAAc,EAAE,cAAc,EAAE,KAAK;IACrC,wBAAwB;IACxB,eAAe,EAAE,QAAQ,EAAE,IAAI;IAC/B,oBAAoB,EAAE,QAAQ,EAAE,IAAI;IACpC,oBAAoB,EAAE,QAAQ,EAAE,IAAI;IACpC,kBAAkB,EAAE,QAAQ,EAAE,IAAI;CAC1B,CAAA;AAEV;;;;;;;;;;;GAWG;AACH,MAAM,CAAC,MAAM,aAAa,GAAG,mFAAmF,CAAA;AAEhH;;;GAGG;AACH,MAAM,CAAC,MAAM,iBAAiB,GAAG,QAAQ,CAAA;AACzC,MAAM,CAAC,MAAM,yBAAyB,GAAG,kBAAkB,CAAC,iBAAiB,CAAC,CAAA;AAE9E;;;;;;;;;;;;;;;GAeG;AACH,MAAM,UAAU,iBAAiB,CAAC,gBAAwB;IACxD,OAAO,UAAU,gBAAgB,OAAO,CAAA;AAC1C,CAAC;AAED;;;;;;;;;;;;;;;GAeG;AACH,MAAM,UAAU,eAAe,CAAC,gBAAwB;IACtD,OAAO,SAAS,gBAAgB,OAAO,CAAA;AACzC,CAAC"}
|
package/dist/dashes.d.ts
CHANGED
|
@@ -1,29 +1,11 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Dash
|
|
3
|
-
*
|
|
4
|
-
* Converts hyphens and dashes to typographically correct em-dashes,
|
|
5
|
-
* en-dashes, and minus signs based on context.
|
|
2
|
+
* Dash transformation: hyphens → em-dashes, en-dashes, minus signs.
|
|
6
3
|
*/
|
|
7
4
|
export type DashStyle = "american" | "british" | "none";
|
|
8
5
|
export interface DashOptions {
|
|
9
|
-
/**
|
|
10
|
-
* A boundary marker character used when transforming text that spans
|
|
11
|
-
* multiple HTML elements. This character is treated as "transparent"
|
|
12
|
-
* in the regex patterns.
|
|
13
|
-
*
|
|
14
|
-
* Should be a character that doesn't appear in your text.
|
|
15
|
-
* Default: "\uE000" (Unicode Private Use Area)
|
|
16
|
-
*/
|
|
6
|
+
/** Boundary marker for HTML element boundaries. Default: "\uE000" */
|
|
17
7
|
separator?: string;
|
|
18
|
-
/**
|
|
19
|
-
* How to style parenthetical dashes.
|
|
20
|
-
*
|
|
21
|
-
* - `"american"` (default): Unspaced em dash (word—word)
|
|
22
|
-
* - `"british"`: Spaced en dash (word – word)
|
|
23
|
-
* - `"none"`: Don't convert parenthetical dashes
|
|
24
|
-
*
|
|
25
|
-
* Default: "american"
|
|
26
|
-
*/
|
|
8
|
+
/** "american" (unspaced em), "british" (spaced en), "none". Default: "american" */
|
|
27
9
|
dashStyle?: DashStyle;
|
|
28
10
|
}
|
|
29
11
|
/**
|
|
@@ -32,39 +14,13 @@ export interface DashOptions {
|
|
|
32
14
|
* in model names like "Llama-2-7B" where "2-7" should not become "2–7".
|
|
33
15
|
*/
|
|
34
16
|
export declare const numberRangeDisallowedPrefixes: readonly ["-", "–", "—", "−"];
|
|
35
|
-
/**
|
|
36
|
-
* List of month names (full and abbreviated) for date range detection
|
|
37
|
-
*/
|
|
38
17
|
export declare const months: string;
|
|
39
|
-
/**
|
|
40
|
-
* Replaces hyphens with en-dashes in number ranges.
|
|
41
|
-
* Uses marker-aware boundaries to avoid false matches when separators
|
|
42
|
-
* appear between word characters.
|
|
43
|
-
*
|
|
44
|
-
* Allows suffixes which are common in numerical ranges
|
|
45
|
-
* like "1-10x" (1x to 10x magnification).
|
|
46
|
-
*/
|
|
18
|
+
/** Convert number ranges to en-dash (e.g., "1-5" → "1–5"). */
|
|
47
19
|
export declare function enDashNumberRange(text: string, options?: DashOptions): string;
|
|
48
|
-
/**
|
|
49
|
-
* Replaces hyphens with en-dashes in month/date ranges.
|
|
50
|
-
* Supports formats like "January-March", "Jan-Mar", "February-April 2024",
|
|
51
|
-
* and "October 2012 - December 2014".
|
|
52
|
-
*
|
|
53
|
-
* Spacing around the en-dash is controlled by dashStyle:
|
|
54
|
-
* - "american" (default): No spaces (October 2012–December 2014)
|
|
55
|
-
* - "british": Spaced (October 2012 – December 2014)
|
|
56
|
-
* - "none": Preserve original spacing
|
|
57
|
-
*
|
|
58
|
-
* Uses marker-aware boundaries to avoid false matches when separators
|
|
59
|
-
* appear between word characters.
|
|
60
|
-
*/
|
|
20
|
+
/** Convert month ranges to en-dash (e.g., "January-March" → "January–March"). */
|
|
61
21
|
export declare function enDashDateRange(text: string, options?: DashOptions): string;
|
|
62
|
-
/**
|
|
63
|
-
* Replaces hyphens with proper minus signs (−) in numerical contexts.
|
|
64
|
-
*/
|
|
22
|
+
/** Convert hyphens to minus signs in numeric contexts (e.g., "-5" → "−5"). */
|
|
65
23
|
export declare function minusReplace(text: string, options?: DashOptions): string;
|
|
66
|
-
/**
|
|
67
|
-
* Comprehensive dash replacement for typographic correctness.
|
|
68
|
-
*/
|
|
24
|
+
/** Full dash transformation. */
|
|
69
25
|
export declare function hyphenReplace(text: string, options?: DashOptions): string;
|
|
70
26
|
//# sourceMappingURL=dashes.d.ts.map
|
package/dist/dashes.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"dashes.d.ts","sourceRoot":"","sources":["../src/dashes.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"dashes.d.ts","sourceRoot":"","sources":["../src/dashes.ts"],"names":[],"mappings":"AAAA;;GAEG;AAKH,MAAM,MAAM,SAAS,GAAG,UAAU,GAAG,SAAS,GAAG,MAAM,CAAA;AAEvD,MAAM,WAAW,WAAW;IAC1B,qEAAqE;IACrE,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,mFAAmF;IACnF,SAAS,CAAC,EAAE,SAAS,CAAA;CACtB;AAID;;;;GAIG;AACH,eAAO,MAAM,6BAA6B,+BAA0C,CAAA;AAEpF,eAAO,MAAM,MAAM,QAKR,CAAA;AAEX,8DAA8D;AAC9D,wBAAgB,iBAAiB,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,GAAE,WAAgB,GAAG,MAAM,CAqCjF;AAED,iFAAiF;AACjF,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,GAAE,WAAgB,GAAG,MAAM,CAc/E;AAED,8EAA8E;AAC9E,wBAAgB,YAAY,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,GAAE,WAAgB,GAAG,MAAM,CAO5E;AAwDD,gCAAgC;AAChC,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,GAAE,WAAgB,GAAG,MAAM,CAS7E"}
|
package/dist/dashes.js
CHANGED
|
@@ -1,146 +1,108 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Dash
|
|
3
|
-
*
|
|
4
|
-
* Converts hyphens and dashes to typographically correct em-dashes,
|
|
5
|
-
* en-dashes, and minus signs based on context.
|
|
6
|
-
*/
|
|
7
|
-
import { UNICODE_SYMBOLS, DEFAULT_SEPARATOR, ESCAPED_DEFAULT_SEPARATOR, wordBoundaryStart, wordBoundaryEnd } from "./constants.js";
|
|
8
|
-
/**
|
|
9
|
-
* Escapes special regex characters in a string.
|
|
2
|
+
* Dash transformation: hyphens → em-dashes, en-dashes, minus signs.
|
|
10
3
|
*/
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
}
|
|
14
|
-
const { EN_DASH, EM_DASH, MINUS } = UNICODE_SYMBOLS;
|
|
4
|
+
import escapeStringRegexp from "escape-string-regexp";
|
|
5
|
+
import { UNICODE_SYMBOLS, DEFAULT_SEPARATOR, ESCAPED_DEFAULT_SEPARATOR, LATIN_LETTERS, wordBoundaryStart, wordBoundaryEnd } from "./constants.js";
|
|
6
|
+
const { EN_DASH, EM_DASH, MINUS, LEFT_DOUBLE_QUOTE, RIGHT_DOUBLE_QUOTE } = UNICODE_SYMBOLS;
|
|
15
7
|
/**
|
|
16
8
|
* Characters that, when preceding a number, prevent it from being
|
|
17
9
|
* treated as the start of a number range. This prevents false positives
|
|
18
10
|
* in model names like "Llama-2-7B" where "2-7" should not become "2–7".
|
|
19
11
|
*/
|
|
20
12
|
export const numberRangeDisallowedPrefixes = ["-", EN_DASH, EM_DASH, MINUS];
|
|
21
|
-
/**
|
|
22
|
-
* List of month names (full and abbreviated) for date range detection
|
|
23
|
-
*/
|
|
24
13
|
export const months = [
|
|
25
14
|
"January", "February", "March", "April", "May", "June",
|
|
26
15
|
"July", "August", "September", "October", "November", "December",
|
|
27
16
|
"Jan", "Feb", "Mar", "Apr", "May", "Jun",
|
|
28
17
|
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
|
|
29
18
|
].join("|");
|
|
30
|
-
/**
|
|
31
|
-
* Replaces hyphens with en-dashes in number ranges.
|
|
32
|
-
* Uses marker-aware boundaries to avoid false matches when separators
|
|
33
|
-
* appear between word characters.
|
|
34
|
-
*
|
|
35
|
-
* Allows suffixes which are common in numerical ranges
|
|
36
|
-
* like "1-10x" (1x to 10x magnification).
|
|
37
|
-
*/
|
|
19
|
+
/** Convert number ranges to en-dash (e.g., "1-5" → "1–5"). */
|
|
38
20
|
export function enDashNumberRange(text, options = {}) {
|
|
39
|
-
const chr = options.separator
|
|
40
|
-
? escapeRegex(options.separator)
|
|
41
|
-
: ESCAPED_DEFAULT_SEPARATOR;
|
|
21
|
+
const chr = options.separator ? escapeStringRegexp(options.separator) : ESCAPED_DEFAULT_SEPARATOR;
|
|
42
22
|
const wb = wordBoundaryStart(chr);
|
|
43
23
|
const wbe = wordBoundaryEnd(chr);
|
|
44
|
-
|
|
45
|
-
|
|
24
|
+
// Escape dash-like chars for lookbehind: prevents matching after dashes (e.g., Llama-2-7B)
|
|
25
|
+
const disallowed = numberRangeDisallowedPrefixes.map(c => c === "-" ? c : `\\u${c.charCodeAt(0).toString(16).padStart(4, "0")}`).join("");
|
|
26
|
+
// Positive ranges: 1-5, $100-$200, p.10-15
|
|
27
|
+
text = text.replace(new RegExp(`${wb}(?<![${disallowed}${LATIN_LETTERS}.])(?<start>(?:p\\.?|\\$)?\\d[\\d.,]*${chr}?)-(?<end>${chr}?\\$?\\d[\\d.,]*)(?!\\.\\d)(?<following>(?:${chr}?-${chr}?\\d+)*)(?<suffix>${chr}?[xKBTM])?${wbe}`, "g"), (match, start, end, following, suffix = "") => {
|
|
28
|
+
if (following)
|
|
29
|
+
return match;
|
|
30
|
+
const s = start.replace(new RegExp(chr, "g"), "");
|
|
31
|
+
const e = end.replace(new RegExp(chr, "g"), "");
|
|
32
|
+
if (/^(?:19|20)\d{2}$/.test(s) && /^(?:0[1-9]|1[0-2])$/.test(e))
|
|
33
|
+
return match;
|
|
34
|
+
return `${start}${EN_DASH}${end}${suffix || ""}`;
|
|
35
|
+
});
|
|
36
|
+
// Negative ranges: −5-5 → −5–5, −5--2 → −5–−2
|
|
37
|
+
// Separate regex because MINUS isn't a word char, so \b in ${wb} would match after it
|
|
38
|
+
text = text.replace(new RegExp(`(?<![${LATIN_LETTERS}])(?<start>${MINUS}\\d[\\d.,]*${chr}?)-(?<neg>-)?(?<end>${chr}?\\d[\\d.,]*)(?<following>(?:${chr}?-${chr}?\\d+)*)(?<suffix>${chr}?[xKBTM])?${wbe}`, "g"), (match, start, neg, end, following, suffix = "") => {
|
|
39
|
+
if (following)
|
|
40
|
+
return match;
|
|
41
|
+
return `${start}${EN_DASH}${neg ? MINUS : ""}${end}${suffix || ""}`;
|
|
42
|
+
});
|
|
43
|
+
return text;
|
|
46
44
|
}
|
|
47
|
-
/**
|
|
48
|
-
* Replaces hyphens with en-dashes in month/date ranges.
|
|
49
|
-
* Supports formats like "January-March", "Jan-Mar", "February-April 2024",
|
|
50
|
-
* and "October 2012 - December 2014".
|
|
51
|
-
*
|
|
52
|
-
* Spacing around the en-dash is controlled by dashStyle:
|
|
53
|
-
* - "american" (default): No spaces (October 2012–December 2014)
|
|
54
|
-
* - "british": Spaced (October 2012 – December 2014)
|
|
55
|
-
* - "none": Preserve original spacing
|
|
56
|
-
*
|
|
57
|
-
* Uses marker-aware boundaries to avoid false matches when separators
|
|
58
|
-
* appear between word characters.
|
|
59
|
-
*/
|
|
45
|
+
/** Convert month ranges to en-dash (e.g., "January-March" → "January–March"). */
|
|
60
46
|
export function enDashDateRange(text, options = {}) {
|
|
61
|
-
const chr = options.separator
|
|
62
|
-
? escapeRegex(options.separator)
|
|
63
|
-
: ESCAPED_DEFAULT_SEPARATOR;
|
|
47
|
+
const chr = options.separator ? escapeStringRegexp(options.separator) : ESCAPED_DEFAULT_SEPARATOR;
|
|
64
48
|
const dashStyle = options.dashStyle ?? "american";
|
|
65
49
|
const wb = wordBoundaryStart(chr);
|
|
66
50
|
const wbe = wordBoundaryEnd(chr);
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
const groups = args.at(-1);
|
|
72
|
-
const { startMonth, startYear = "", preSep, postSep, endMonth, endYear = "", preSpace, postSpace } = groups;
|
|
73
|
-
let pre, post;
|
|
74
|
-
if (dashStyle === "british") {
|
|
75
|
-
pre = " ";
|
|
76
|
-
post = " ";
|
|
77
|
-
}
|
|
78
|
-
else if (dashStyle === "none") {
|
|
79
|
-
pre = preSpace;
|
|
80
|
-
post = postSpace;
|
|
81
|
-
}
|
|
82
|
-
else {
|
|
83
|
-
// american (default)
|
|
84
|
-
pre = "";
|
|
85
|
-
post = "";
|
|
86
|
-
}
|
|
87
|
-
return `${startMonth}${startYear}${preSep}${pre}${EN_DASH}${post}${postSep}${endMonth}${endYear}`;
|
|
51
|
+
return text.replace(new RegExp(`${wb}(?<startMonth>${months})(?<startYear>${chr}? \\d{4})?(?<preSep>${chr}?)(?<preSpace> ?)-(?<postSpace> ?)(?<postSep>${chr}?)(?<endMonth>${months})(?<endYear> \\d{4})?${wbe}`, "g"), (...args) => {
|
|
52
|
+
const g = args.at(-1);
|
|
53
|
+
const [pre, post] = dashStyle === "british" ? [" ", " "] : dashStyle === "none" ? [g.preSpace, g.postSpace] : ["", ""];
|
|
54
|
+
return `${g.startMonth}${g.startYear || ""}${g.preSep}${pre}${EN_DASH}${post}${g.postSep}${g.endMonth}${g.endYear || ""}`;
|
|
88
55
|
});
|
|
89
56
|
}
|
|
90
|
-
/**
|
|
91
|
-
* Replaces hyphens with proper minus signs (−) in numerical contexts.
|
|
92
|
-
*/
|
|
57
|
+
/** Convert hyphens to minus signs in numeric contexts (e.g., "-5" → "−5"). */
|
|
93
58
|
export function minusReplace(text, options = {}) {
|
|
94
59
|
const chr = options.separator ?? DEFAULT_SEPARATOR;
|
|
95
|
-
|
|
96
|
-
return text.replaceAll(
|
|
60
|
+
// Match after: start of line, whitespace, (, separator, or quotes (straight or curly)
|
|
61
|
+
return text.replaceAll(new RegExp(`(?<before>^|[\\s\\("${chr}${LEFT_DOUBLE_QUOTE}${RIGHT_DOUBLE_QUOTE}])-(?<num>\\s?\\d*\\.?\\d+)`, "gm"), `$<before>${MINUS}$<num>`);
|
|
97
62
|
}
|
|
98
|
-
/**
|
|
63
|
+
/**
|
|
64
|
+
* Convert surrounded dashes to em/en dashes.
|
|
65
|
+
* Handles patterns like "word - word" → "word—word" (Chicago) or "word – word" (Oxford).
|
|
66
|
+
*/
|
|
99
67
|
function convertParentheticalDashes(text, sep, style) {
|
|
100
68
|
if (style === "none")
|
|
101
69
|
return text;
|
|
102
|
-
const
|
|
103
|
-
const
|
|
104
|
-
//
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
? `$<markerBefore> ${dash} $<markerAfter>`
|
|
115
|
-
: `$<markerBefore>${dash}$<markerAfter>`;
|
|
116
|
-
text = text.replace(multipleDashInWords, multiReplacement);
|
|
117
|
-
// Handle dashes at start of line
|
|
118
|
-
text = text.replace(new RegExp(`^(?<sepStart>${sep})?[-]+ `, "gm"), `$<sepStart>${dash} `);
|
|
70
|
+
const localizedDash = style === "british" ? EN_DASH : EM_DASH;
|
|
71
|
+
const maybeSpace = style === "british" ? " " : "";
|
|
72
|
+
// Convert spaced dashes: "word - word" or "word — word"
|
|
73
|
+
text = text.replace(new RegExp(`(?<=[^\\s]|^)(?:(?<sepBefore>${sep}?)[ ]+|(?<sepOnly>${sep}))[~${EN_DASH}${EM_DASH}-]+[ ]*(?<sepAfter>${sep}?)(?:[ ]+|$)`, "g"), `$<sepBefore>$<sepOnly>${maybeSpace}${localizedDash}${maybeSpace}$<sepAfter>`);
|
|
74
|
+
// Convert multiple dashes: "word--word" or "word---word"
|
|
75
|
+
text = text.replace(new RegExp(`(?<=[${LATIN_LETTERS}\\d])(?<sepBefore>${sep}?)[~${EN_DASH}${EM_DASH}-]{2,}(?<sepAfter>${sep}?)(?=[${LATIN_LETTERS} ])`, "g"), `$<sepBefore>${maybeSpace}${localizedDash}${maybeSpace}$<sepAfter>`);
|
|
76
|
+
// Convert dashes at start of line
|
|
77
|
+
text = text.replace(new RegExp(`^(?<leadingSep>${sep})?[-]+ `, "gm"), `$<leadingSep>${localizedDash} `);
|
|
78
|
+
// British: convert unspaced em-dashes to spaced en-dashes (word—word → word – word)
|
|
79
|
+
if (style === "british") {
|
|
80
|
+
text = text.replace(new RegExp(`(?<=[${LATIN_LETTERS}.!?'"])(?<sepBefore>${sep}?)${EM_DASH}(?<sepAfter>${sep}?)(?=[${LATIN_LETTERS}])`, "g"), `$<sepBefore>${maybeSpace}${localizedDash}${maybeSpace}$<sepAfter>`);
|
|
81
|
+
}
|
|
119
82
|
return text;
|
|
120
83
|
}
|
|
121
|
-
/**
|
|
84
|
+
/**
|
|
85
|
+
* Normalize em-dash spacing for Chicago style (American).
|
|
86
|
+
* Removes all spaces around em-dashes per Chicago Manual of Style.
|
|
87
|
+
*
|
|
88
|
+
* TODO: Handle interrupted-then-resumed speech within quotes, where Chicago
|
|
89
|
+
* allows a space after the dash: "Don't inter— Hey! Who threw that?"
|
|
90
|
+
*/
|
|
122
91
|
function normalizeEmDashSpacing(text, sep) {
|
|
123
|
-
|
|
124
|
-
text = text.replace(
|
|
125
|
-
//
|
|
126
|
-
|
|
127
|
-
text = text.replace(postQuote, `$<quote> $<markerBefore>${EM_DASH}$<markerAfter> `);
|
|
128
|
-
// Preserve space after em dash at start of line
|
|
129
|
-
const startOfLine = new RegExp(`^${spacesAroundEM.source}(?<after>[A-Z0-9])`, "gm");
|
|
130
|
-
text = text.replace(startOfLine, `$<markerBefore>${EM_DASH}$<markerAfter> $<after>`);
|
|
92
|
+
// Remove all spaces around em-dashes
|
|
93
|
+
text = text.replace(new RegExp(`(?<before>${sep}?)[ ]*${EM_DASH}[ ]*(?<after>${sep}?)`, "g"), `$<before>${EM_DASH}$<after>`);
|
|
94
|
+
// Preserve space after em-dash at start of line (e.g., attribution)
|
|
95
|
+
text = text.replace(new RegExp(`^(?<sep>${sep}?)${EM_DASH}(?<after>[A-Z0-9])`, "gm"), `$<sep>${EM_DASH} $<after>`);
|
|
131
96
|
return text;
|
|
132
97
|
}
|
|
133
|
-
/**
|
|
134
|
-
* Comprehensive dash replacement for typographic correctness.
|
|
135
|
-
*/
|
|
98
|
+
/** Full dash transformation. */
|
|
136
99
|
export function hyphenReplace(text, options = {}) {
|
|
137
100
|
const sep = options.separator ?? DEFAULT_SEPARATOR;
|
|
138
|
-
const
|
|
101
|
+
const style = options.dashStyle ?? "american";
|
|
139
102
|
text = minusReplace(text, options);
|
|
140
|
-
text = convertParentheticalDashes(text, sep,
|
|
141
|
-
if (
|
|
103
|
+
text = convertParentheticalDashes(text, sep, style);
|
|
104
|
+
if (style === "american")
|
|
142
105
|
text = normalizeEmDashSpacing(text, sep);
|
|
143
|
-
}
|
|
144
106
|
text = enDashNumberRange(text, options);
|
|
145
107
|
text = enDashDateRange(text, options);
|
|
146
108
|
return text;
|
package/dist/dashes.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"dashes.js","sourceRoot":"","sources":["../src/dashes.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"dashes.js","sourceRoot":"","sources":["../src/dashes.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,kBAAkB,MAAM,sBAAsB,CAAA;AACrD,OAAO,EAAE,eAAe,EAAE,iBAAiB,EAAE,yBAAyB,EAAE,aAAa,EAAE,iBAAiB,EAAE,eAAe,EAAE,MAAM,gBAAgB,CAAA;AAWjJ,MAAM,EAAE,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,iBAAiB,EAAE,kBAAkB,EAAE,GAAG,eAAe,CAAA;AAE1F;;;;GAIG;AACH,MAAM,CAAC,MAAM,6BAA6B,GAAG,CAAC,GAAG,EAAE,OAAO,EAAE,OAAO,EAAE,KAAK,CAAU,CAAA;AAEpF,MAAM,CAAC,MAAM,MAAM,GAAG;IACpB,SAAS,EAAE,UAAU,EAAE,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,MAAM;IACtD,MAAM,EAAE,QAAQ,EAAE,WAAW,EAAE,SAAS,EAAE,UAAU,EAAE,UAAU;IAChE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK;IACxC,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK;CACzC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;AAEX,8DAA8D;AAC9D,MAAM,UAAU,iBAAiB,CAAC,IAAY,EAAE,UAAuB,EAAE;IACvE,MAAM,GAAG,GAAG,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC,kBAAkB,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,yBAAyB,CAAA;IACjG,MAAM,EAAE,GAAG,iBAAiB,CAAC,GAAG,CAAC,CAAA;IACjC,MAAM,GAAG,GAAG,eAAe,CAAC,GAAG,CAAC,CAAA;IAEhC,2FAA2F;IAC3F,MAAM,UAAU,GAAG,6BAA6B,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;IAEzI,2CAA2C;IAC3C,IAAI,GAAG,IAAI,CAAC,OAAO,CACjB,IAAI,MAAM,CACR,GAAG,EAAE,QAAQ,UAAU,GAAG,aAAa,wCAAwC,GAAG,aAAa,GAAG,8CAA8C,GAAG,KAAK,GAAG,qBAAqB,GAAG,aAAa,GAAG,EAAE,EACrM,GAAG,CACJ,EACD,CAAC,KAAK,EAAE,KAAK,EAAE,GAAG,EAAE,SAAS,EAAE,MAAM,GAAG,EAAE,EAAE,EAAE;QAC5C,IAAI,SAAS;YAAE,OAAO,KAAK,CAAA;QAC3B,MAAM,CAAC,GAAG,KAAK,CAAC,OAAO,CAAC,IAAI,MAAM,CAAC,GAAG,EAAE,GAAG,CAAC,EAAE,EAAE,CAAC,CAAA;QACjD,MAAM,CAAC,GAAG,GAAG,CAAC,OAAO,CAAC,IAAI,MAAM,CAAC,GAAG,EAAE,GAAG,CAAC,EAAE,EAAE,CAAC,CAAA;QAC/C,IAAI,kBAAkB,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,qBAAqB,CAAC,IAAI,CAAC,CAAC,CAAC;YAAE,OAAO,KAAK,CAAA;QAC7E,OAAO,GAAG,KAAK,GAAG,OAAO,GAAG,GAAG,GAAG,MAAM,IAAI,EAAE,EAAE,CAAA;IAClD,CAAC,CACF,CAAA;IAED,8CAA8C;IAC9C,sFAAsF;IACtF,IAAI,GAAG,IAAI,CAAC,OAAO,CACjB,IAAI,MAAM,CACR,QAAQ,aAAa,cAAc,KAAK,cAAc,GAAG,uBAAuB,GAAG,gCAAgC,GAAG,KAAK,GAAG,qBAAqB,GAAG,aAAa,GAAG,EAAE,EACxK,GAAG,CACJ,EACD,CAAC,KAAK,EAAE,KAAK,EAAE,GAAG,EAAE,GAAG,EAAE,SAAS,EAAE,MAAM,GAAG,EAAE,EAAE,EAAE;QACjD,IAAI,SAAS;YAAE,OAAO,KAAK,CAAA;QAC3B,OAAO,GAAG,KAAK,GAAG,OAAO,GAAG,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,GAAG,GAAG,GAAG,MAAM,IAAI,EAAE,EAAE,CAAA;IACrE,CAAC,CACF,CAAA;IAED,OAAO,IAAI,CAAA;AACb,CAAC;AAED,iFAAiF;AACjF,MAAM,UAAU,eAAe,CAAC,IAAY,EAAE,UAAuB,EAAE;IACrE,MAAM,GAAG,GAAG,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC,kBAAkB,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,yBAAyB,CAAA;IACjG,MAAM,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,UAAU,CAAA;IACjD,MAAM,EAAE,GAAG,iBAAiB,CAAC,GAAG,CAAC,CAAA;IACjC,MAAM,GAAG,GAAG,eAAe,CAAC,GAAG,CAAC,CAAA;IAEhC,OAAO,IAAI,CAAC,OAAO,CACjB,IAAI,MAAM,CAAC,GAAG,EAAE,iBAAiB,MAAM,iBAAiB,GAAG,uBAAuB,GAAG,gDAAgD,GAAG,iBAAiB,MAAM,wBAAwB,GAAG,EAAE,EAAE,GAAG,CAAC,EAClM,CAAC,GAAG,IAAI,EAAE,EAAE;QACV,MAAM,CAAC,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,CAA2B,CAAA;QAC/C,MAAM,CAAC,GAAG,EAAE,IAAI,CAAC,GAAG,SAAS,KAAK,SAAS,CAAC,CAAC,CAAC,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,QAAQ,EAAE,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAA;QACtH,OAAO,GAAG,CAAC,CAAC,UAAU,GAAG,CAAC,CAAC,SAAS,IAAI,EAAE,GAAG,CAAC,CAAC,MAAM,GAAG,GAAG,GAAG,OAAO,GAAG,IAAI,GAAG,CAAC,CAAC,OAAO,GAAG,CAAC,CAAC,QAAQ,GAAG,CAAC,CAAC,OAAO,IAAI,EAAE,EAAE,CAAA;IAC3H,CAAC,CACF,CAAA;AACH,CAAC;AAED,8EAA8E;AAC9E,MAAM,UAAU,YAAY,CAAC,IAAY,EAAE,UAAuB,EAAE;IAClE,MAAM,GAAG,GAAG,OAAO,CAAC,SAAS,IAAI,iBAAiB,CAAA;IAClD,sFAAsF;IACtF,OAAO,IAAI,CAAC,UAAU,CACpB,IAAI,MAAM,CAAC,uBAAuB,GAAG,GAAG,iBAAiB,GAAG,kBAAkB,6BAA6B,EAAE,IAAI,CAAC,EAClH,YAAY,KAAK,QAAQ,CAC1B,CAAA;AACH,CAAC;AAED;;;GAGG;AACH,SAAS,0BAA0B,CAAC,IAAY,EAAE,GAAW,EAAE,KAAgB;IAC7E,IAAI,KAAK,KAAK,MAAM;QAAE,OAAO,IAAI,CAAA;IACjC,MAAM,aAAa,GAAG,KAAK,KAAK,SAAS,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,CAAA;IAC7D,MAAM,UAAU,GAAG,KAAK,KAAK,SAAS,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAA;IAEjD,wDAAwD;IACxD,IAAI,GAAG,IAAI,CAAC,OAAO,CACjB,IAAI,MAAM,CAAC,gCAAgC,GAAG,qBAAqB,GAAG,OAAO,OAAO,GAAG,OAAO,sBAAsB,GAAG,cAAc,EAAE,GAAG,CAAC,EAC3I,yBAAyB,UAAU,GAAG,aAAa,GAAG,UAAU,aAAa,CAC9E,CAAA;IACD,yDAAyD;IACzD,IAAI,GAAG,IAAI,CAAC,OAAO,CACjB,IAAI,MAAM,CAAC,QAAQ,aAAa,qBAAqB,GAAG,OAAO,OAAO,GAAG,OAAO,qBAAqB,GAAG,SAAS,aAAa,KAAK,EAAE,GAAG,CAAC,EACzI,eAAe,UAAU,GAAG,aAAa,GAAG,UAAU,aAAa,CACpE,CAAA;IACD,kCAAkC;IAClC,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,MAAM,CAAC,kBAAkB,GAAG,SAAS,EAAE,IAAI,CAAC,EAAE,gBAAgB,aAAa,GAAG,CAAC,CAAA;IACvG,oFAAoF;IACpF,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;QACxB,IAAI,GAAG,IAAI,CAAC,OAAO,CACjB,IAAI,MAAM,CAAC,QAAQ,aAAa,uBAAuB,GAAG,KAAK,OAAO,eAAe,GAAG,SAAS,aAAa,IAAI,EAAE,GAAG,CAAC,EACxH,eAAe,UAAU,GAAG,aAAa,GAAG,UAAU,aAAa,CACpE,CAAA;IACH,CAAC;IACD,OAAO,IAAI,CAAA;AACb,CAAC;AAED;;;;;;GAMG;AACH,SAAS,sBAAsB,CAAC,IAAY,EAAE,GAAW;IACvD,qCAAqC;IACrC,IAAI,GAAG,IAAI,CAAC,OAAO,CACjB,IAAI,MAAM,CAAC,aAAa,GAAG,SAAS,OAAO,gBAAgB,GAAG,IAAI,EAAE,GAAG,CAAC,EACxE,YAAY,OAAO,UAAU,CAC9B,CAAA;IAED,oEAAoE;IACpE,IAAI,GAAG,IAAI,CAAC,OAAO,CACjB,IAAI,MAAM,CAAC,WAAW,GAAG,KAAK,OAAO,oBAAoB,EAAE,IAAI,CAAC,EAChE,SAAS,OAAO,WAAW,CAC5B,CAAA;IAED,OAAO,IAAI,CAAA;AACb,CAAC;AAED,gCAAgC;AAChC,MAAM,UAAU,aAAa,CAAC,IAAY,EAAE,UAAuB,EAAE;IACnE,MAAM,GAAG,GAAG,OAAO,CAAC,SAAS,IAAI,iBAAiB,CAAA;IAClD,MAAM,KAAK,GAAG,OAAO,CAAC,SAAS,IAAI,UAAU,CAAA;IAC7C,IAAI,GAAG,YAAY,CAAC,IAAI,EAAE,OAAO,CAAC,CAAA;IAClC,IAAI,GAAG,0BAA0B,CAAC,IAAI,EAAE,GAAG,EAAE,KAAK,CAAC,CAAA;IACnD,IAAI,KAAK,KAAK,UAAU;QAAE,IAAI,GAAG,sBAAsB,CAAC,IAAI,EAAE,GAAG,CAAC,CAAA;IAClE,IAAI,GAAG,iBAAiB,CAAC,IAAI,EAAE,OAAO,CAAC,CAAA;IACvC,IAAI,GAAG,eAAe,CAAC,IAAI,EAAE,OAAO,CAAC,CAAA;IACrC,OAAO,IAAI,CAAA;AACb,CAAC"}
|
package/dist/index.d.ts
CHANGED
|
@@ -11,7 +11,7 @@ export { niceQuotes, type QuoteOptions, type PunctuationStyle } from "./quotes.j
|
|
|
11
11
|
import type { PunctuationStyle } from "./quotes.js";
|
|
12
12
|
export { hyphenReplace, enDashNumberRange, enDashDateRange, minusReplace, months, numberRangeDisallowedPrefixes, type DashOptions, type DashStyle, } from "./dashes.js";
|
|
13
13
|
import type { DashStyle } from "./dashes.js";
|
|
14
|
-
export { ellipsis, multiplication, mathSymbols, legalSymbols, arrows, degrees, fractions, primeMarks, collapseSpaces,
|
|
14
|
+
export { ellipsis, multiplication, mathSymbols, legalSymbols, arrows, degrees, fractions, primeMarks, collapseSpaces, superscriptOrdinal, punctuationLigatures, symbolTransform, type SymbolOptions, } from "./symbols.js";
|
|
15
15
|
export interface TransformOptions {
|
|
16
16
|
/**
|
|
17
17
|
* A boundary marker character used when transforming text that spans
|
|
@@ -40,9 +40,9 @@ export interface TransformOptions {
|
|
|
40
40
|
/**
|
|
41
41
|
* How to handle punctuation placement around quotation marks.
|
|
42
42
|
*
|
|
43
|
-
* - `"american"` (default): Periods and commas go inside quotes
|
|
43
|
+
* - `"american"` (default): Chicago style. Periods and commas go inside quotes.
|
|
44
44
|
* Example: "Hello." and "Hello,"
|
|
45
|
-
* - `"british"`: Periods and commas go outside quotes
|
|
45
|
+
* - `"british"`: Oxford style. Periods and commas go outside quotes.
|
|
46
46
|
* Example: "Hello". and "Hello",
|
|
47
47
|
* - `"none"`: Don't modify punctuation placement
|
|
48
48
|
*
|
|
@@ -52,8 +52,8 @@ export interface TransformOptions {
|
|
|
52
52
|
/**
|
|
53
53
|
* How to style parenthetical dashes.
|
|
54
54
|
*
|
|
55
|
-
* - `"american"` (default): Unspaced em dash (word—word)
|
|
56
|
-
* - `"british"`: Spaced en dash (word – word)
|
|
55
|
+
* - `"american"` (default): Chicago style. Unspaced em dash (word—word)
|
|
56
|
+
* - `"british"`: Oxford style. Spaced en dash (word – word)
|
|
57
57
|
* - `"none"`: Don't convert parenthetical dashes
|
|
58
58
|
*
|
|
59
59
|
* Default: "american"
|
|
@@ -81,41 +81,16 @@ export interface TransformOptions {
|
|
|
81
81
|
* Default: false (poor font support)
|
|
82
82
|
*/
|
|
83
83
|
ligatures?: boolean;
|
|
84
|
+
/**
|
|
85
|
+
* Whether to verify that the transformation is idempotent (running twice
|
|
86
|
+
* produces the same result). When enabled, throws an error if the second
|
|
87
|
+
* pass produces a different result than the first.
|
|
88
|
+
*
|
|
89
|
+
* Default: true
|
|
90
|
+
*/
|
|
91
|
+
checkIdempotency?: boolean;
|
|
84
92
|
}
|
|
85
93
|
export { assertSeparatorCountPreserved, countSeparators } from "./utils.js";
|
|
86
94
|
export { DEFAULT_SEPARATOR } from "./constants.js";
|
|
87
|
-
/**
|
|
88
|
-
* Applies all typography transformations: smart quotes, proper dashes,
|
|
89
|
-
* and symbol improvements.
|
|
90
|
-
*
|
|
91
|
-
* This is a convenience function that applies transformations in sequence:
|
|
92
|
-
* 1. hyphenReplace (em-dashes, en-dashes, minus signs)
|
|
93
|
-
* 2. primeMarks (feet/inches, arcminutes/arcseconds)
|
|
94
|
-
* 3. niceQuotes (smart quotes)
|
|
95
|
-
* 4. symbolTransform (ellipses, multiplication, math symbols, legal symbols, arrows)
|
|
96
|
-
* 5. fractions (disabled by default)
|
|
97
|
-
* 6. degrees (disabled by default)
|
|
98
|
-
* 7. superscript (disabled by default)
|
|
99
|
-
* 8. ligatures (disabled by default)
|
|
100
|
-
* 9. collapseSpaces (collapses multiple spaces into one)
|
|
101
|
-
*
|
|
102
|
-
* @param text - The text to transform
|
|
103
|
-
* @param options - Configuration options
|
|
104
|
-
* @returns The text with all typography improvements applied
|
|
105
|
-
*
|
|
106
|
-
* @example
|
|
107
|
-
* ```ts
|
|
108
|
-
* import { transform } from 'punctilio'
|
|
109
|
-
*
|
|
110
|
-
* transform('"Hello," she said - "it\'s pages 1-5."')
|
|
111
|
-
* // → '"Hello," she said—"it's pages 1–5."'
|
|
112
|
-
*
|
|
113
|
-
* transform('Wait... 5x5 != 25 (c) 2024')
|
|
114
|
-
* // → 'Wait… 5×5 ≠ 25 © 2024'
|
|
115
|
-
*
|
|
116
|
-
* transform('Add 1/2 cup', { fractions: true })
|
|
117
|
-
* // → 'Add ½ cup'
|
|
118
|
-
* ```
|
|
119
|
-
*/
|
|
120
95
|
export declare function transform(text: string, options?: TransformOptions): string;
|
|
121
96
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,EAAE,UAAU,EAAE,KAAK,YAAY,EAAE,KAAK,gBAAgB,EAAE,MAAM,aAAa,CAAA;AAClF,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAA;AACnD,OAAO,EACL,aAAa,EACb,iBAAiB,EACjB,eAAe,EACf,YAAY,EACZ,MAAM,EACN,6BAA6B,EAC7B,KAAK,WAAW,EAChB,KAAK,SAAS,GACf,MAAM,aAAa,CAAA;AACpB,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,aAAa,CAAA;AAC5C,OAAO,EACL,QAAQ,EACR,cAAc,EACd,WAAW,EACX,YAAY,EACZ,MAAM,EACN,OAAO,EACP,SAAS,EACT,UAAU,EACV,cAAc,EACd,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,EAAE,UAAU,EAAE,KAAK,YAAY,EAAE,KAAK,gBAAgB,EAAE,MAAM,aAAa,CAAA;AAClF,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAA;AACnD,OAAO,EACL,aAAa,EACb,iBAAiB,EACjB,eAAe,EACf,YAAY,EACZ,MAAM,EACN,6BAA6B,EAC7B,KAAK,WAAW,EAChB,KAAK,SAAS,GACf,MAAM,aAAa,CAAA;AACpB,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,aAAa,CAAA;AAC5C,OAAO,EACL,QAAQ,EACR,cAAc,EACd,WAAW,EACX,YAAY,EACZ,MAAM,EACN,OAAO,EACP,SAAS,EACT,UAAU,EACV,cAAc,EACd,kBAAkB,EAClB,oBAAoB,EACpB,eAAe,EACf,KAAK,aAAa,GACnB,MAAM,cAAc,CAAA;AAErB,MAAM,WAAW,gBAAgB;IAC/B;;;;;;;OAOG;IACH,SAAS,CAAC,EAAE,MAAM,CAAA;IAElB;;;OAGG;IACH,OAAO,CAAC,EAAE,OAAO,CAAA;IAEjB;;;;;;;;OAQG;IACH,cAAc,CAAC,EAAE,OAAO,CAAA;IAExB;;;;;;;;;;OAUG;IACH,gBAAgB,CAAC,EAAE,gBAAgB,CAAA;IAEnC;;;;;;;;OAQG;IACH,SAAS,CAAC,EAAE,SAAS,CAAA;IAErB;;;OAGG;IACH,SAAS,CAAC,EAAE,OAAO,CAAA;IAEnB;;;OAGG;IACH,OAAO,CAAC,EAAE,OAAO,CAAA;IAEjB;;;;OAIG;IACH,WAAW,CAAC,EAAE,OAAO,CAAA;IAErB;;;;OAIG;IACH,SAAS,CAAC,EAAE,OAAO,CAAA;IAEnB;;;;;;OAMG;IACH,gBAAgB,CAAC,EAAE,OAAO,CAAA;CAC3B;AAQD,OAAO,EAAE,6BAA6B,EAAE,eAAe,EAAE,MAAM,YAAY,CAAA;AAC3E,OAAO,EAAE,iBAAiB,EAAE,MAAM,gBAAgB,CAAA;AA+ClD,wBAAgB,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,GAAE,gBAAqB,GAAG,MAAM,CAkD9E"}
|