termaui 0.1.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/LICENSE +50 -0
  2. package/README.md +11 -3
  3. package/dist/terma-clusters.esm.js +84 -0
  4. package/dist/terma-clusters.js +123 -0
  5. package/dist/terma.esm.js +236 -22
  6. package/dist/terma.js +273 -20
  7. package/dist/termaui.css +115 -60
  8. package/dist/termaui.min.css +1 -1
  9. package/package.json +17 -6
  10. package/fonts/babelstone-tibetan-slim.woff2 +0 -0
  11. package/fonts/babelstone-tibetan.woff2 +0 -0
  12. package/fonts/ddc-rinzin.woff2 +0 -0
  13. package/fonts/ddc-uchen.woff2 +0 -0
  14. package/fonts/gangjie-drutsa.woff2 +0 -0
  15. package/fonts/gangjie-uchen.woff2 +0 -0
  16. package/fonts/jamyang-monlam-uchen.woff2 +0 -0
  17. package/fonts/joyig.woff2 +0 -0
  18. package/fonts/khampa-dedri-bechu.woff2 +0 -0
  19. package/fonts/khampa-dedri-chuyig.woff2 +0 -0
  20. package/fonts/khampa-dedri-drutsa.woff2 +0 -0
  21. package/fonts/misans-tibetan.woff2 +0 -0
  22. package/fonts/monlam-uni-dutsa1.woff2 +0 -0
  23. package/fonts/monlam-uni-dutsa2.woff2 +0 -0
  24. package/fonts/monlam-uni-ouchan1.woff2 +0 -0
  25. package/fonts/monlam-uni-ouchan2.woff2 +0 -0
  26. package/fonts/monlam-uni-ouchan3.woff2 +0 -0
  27. package/fonts/monlam-uni-ouchan4.woff2 +0 -0
  28. package/fonts/monlam-uni-ouchan5.woff2 +0 -0
  29. package/fonts/monlam-uni-paytsik.woff2 +0 -0
  30. package/fonts/monlam-uni-sans.woff2 +0 -0
  31. package/fonts/monlam-uni-tikrang.woff2 +0 -0
  32. package/fonts/monlam-uni-tiktong.woff2 +0 -0
  33. package/fonts/noto-sans-tibetan.woff2 +0 -0
  34. package/fonts/noto-serif-tibetan-black.woff2 +0 -0
  35. package/fonts/noto-serif-tibetan-bold.woff2 +0 -0
  36. package/fonts/noto-serif-tibetan-extrabold.woff2 +0 -0
  37. package/fonts/noto-serif-tibetan-extralight.woff2 +0 -0
  38. package/fonts/noto-serif-tibetan-light.woff2 +0 -0
  39. package/fonts/noto-serif-tibetan-medium.woff2 +0 -0
  40. package/fonts/noto-serif-tibetan-regular.woff2 +0 -0
  41. package/fonts/noto-serif-tibetan-semibold.woff2 +0 -0
  42. package/fonts/noto-serif-tibetan-thin.woff2 +0 -0
  43. package/fonts/panchen-tsukring.woff2 +0 -0
  44. package/fonts/qomolangma-betsu.woff2 +0 -0
  45. package/fonts/qomolangma-tsutong.woff2 +0 -0
  46. package/fonts/riwoche-dhodri-yigchen.woff2 +0 -0
  47. package/fonts/sadri-drutsa.woff2 +0 -0
  48. package/fonts/sadri-yigchen.woff2 +0 -0
  49. package/fonts/tibetan-machine-uni.woff2 +0 -0
package/LICENSE ADDED
@@ -0,0 +1,50 @@
1
+ Terma Foundry Dual License
2
+ Copyright (c) 2024 Thupten Chakrishar / Terma Foundry (https://termafoundry.com)
3
+
4
+ ================================================================================
5
+ FREE TIER — Personal, Educational, Religious & Non-Commercial Use
6
+ ================================================================================
7
+
8
+ Permission is hereby granted, free of charge, to any person or organization
9
+ obtaining a copy of this software and associated files ("termaUI"), to use,
10
+ copy, modify, and distribute it for the following purposes:
11
+
12
+ - Personal or hobby projects
13
+ - Educational or academic use
14
+ - Religious, cultural, or non-profit organizations
15
+ - Open-source projects (with attribution)
16
+
17
+ The above copyright notice and this permission notice must be included in all
18
+ copies or substantial portions of the software.
19
+
20
+ ================================================================================
21
+ COMMERCIAL LICENSE — Required for Commercial Use
22
+ ================================================================================
23
+
24
+ Commercial use of termaUI requires a separate written commercial license from
25
+ Terma Foundry. "Commercial use" includes, but is not limited to:
26
+
27
+ - Use in a product or service offered for sale or subscription
28
+ - Use by a for-profit organization in internal or external tools
29
+ - Use in any application or service intended to generate revenue
30
+ - SaaS platforms, commercial websites, or paid software products
31
+
32
+ To obtain a commercial license, contact: buddhistapps@gmail.com
33
+
34
+ ================================================================================
35
+ FONTS
36
+ ================================================================================
37
+
38
+ Fonts bundled with termaUI retain their original open licenses (SIL OFL,
39
+ GPL v2, or similar). These font licenses are independent of this software
40
+ license and are not affected by the dual-license terms above.
41
+
42
+ ================================================================================
43
+
44
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
45
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
46
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
47
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
48
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
49
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
50
+ SOFTWARE.
package/README.md CHANGED
@@ -5,7 +5,7 @@
5
5
  [![npm](https://img.shields.io/npm/v/termaui)](https://www.npmjs.com/package/termaui)
6
6
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
7
7
 
8
- [Documentation](https://termafoundry.com/termaui/docs/) · [Demo](https://termafoundry.com/termaui/) · [GitHub](https://github.com/termafoundry/termafoundry)
8
+ [Documentation](https://termafoundry.com/termaui/docs/) · [Demo](https://termafoundry.com/termaui/) · [GitHub](https://github.com/vajradog/TermaFoundry)
9
9
 
10
10
  ---
11
11
 
@@ -329,6 +329,14 @@ termaui/
329
329
 
330
330
  ## License
331
331
 
332
- MIT free for personal and commercial use.
332
+ termaUI is dual-licensed:
333
333
 
334
- Fonts are bundled under their respective open licenses (SIL OFL, GPL v2, open license). See [termafoundry.com/termaui](https://termafoundry.com/termaui/) for per-font license details.
334
+ **Free** for personal, educational, religious, and non-commercial open-source use.
335
+
336
+ **Commercial use** (products, services, for-profit organizations, or revenue-generating applications) requires a commercial license — contact [buddhistapps@gmail.com](mailto:buddhistapps@gmail.com).
337
+
338
+ Fonts bundled with termaUI retain their original open licenses (SIL OFL, GPL v2, or similar) and are not affected by this dual license. See [termafoundry.com/termaui](https://termafoundry.com/termaui/) for per-font details.
339
+
340
+ ---
341
+
342
+ *Created by [Thupten Chakrishar](https://chakrishar.com) · [Terma Foundry](https://termafoundry.com)*
@@ -0,0 +1,84 @@
1
+ /* ==========================================================================
2
+ terma-clusters.esm.js v0.2.0
3
+ ESM build of terma-clusters — named exports for bundlers (Vite, webpack,
4
+ Rollup, esbuild) and native ES module imports.
5
+
6
+ Usage:
7
+ import { isTibetanCombining, tibetanClusters } from 'termaui/dist/terma-clusters.esm.js';
8
+
9
+ const clusters = tibetanClusters("བོད་");
10
+ // → [{ text: "བོ", cpCount: 2 }, { text: "ད", cpCount: 1 }, { text: "་", cpCount: 1 }]
11
+ ========================================================================== */
12
+
13
+ /**
14
+ * isTibetanCombining(code)
15
+ *
16
+ * Returns true if the given Unicode codepoint (integer) is a Tibetan
17
+ * combining mark that must attach to a preceding base character.
18
+ *
19
+ * Covered ranges:
20
+ * U+0F71–U+0F84 vowel signs, virama (ི ུ ེ ོ etc.)
21
+ * U+0F86–U+0F87 sign marks (྆ ྇)
22
+ * U+0F90–U+0FBC subjoined consonants (ྐ ྒ ྲ ྱ ླ ྷ etc.)
23
+ * U+0F35 ngas bzung nyi zla (༵)
24
+ * U+0F37 ngas bzung snam bu (༷)
25
+ * U+0F39 tsa-phru lenition mark (༹)
26
+ * U+0F7E anusvara — rje su nga ro (ཾ)
27
+ * U+0F7F visarga — rnam bcad (ཿ)
28
+ */
29
+ export function isTibetanCombining(code) {
30
+ return (
31
+ (code >= 0x0F71 && code <= 0x0F84) ||
32
+ (code >= 0x0F86 && code <= 0x0F87) ||
33
+ (code >= 0x0F90 && code <= 0x0FBC) ||
34
+ code === 0x0F35 || code === 0x0F37 || code === 0x0F39 ||
35
+ code === 0x0F7E || code === 0x0F7F
36
+ );
37
+ }
38
+
39
+ /**
40
+ * tibetanClusters(str)
41
+ *
42
+ * Splits a Tibetan string into grapheme clusters. Each cluster is one
43
+ * base character followed by zero or more combining marks.
44
+ *
45
+ * Returns an array of objects:
46
+ * text {string} — the cluster (one or more codepoints)
47
+ * cpCount {number} — number of Unicode codepoints in this cluster
48
+ *
49
+ * Example:
50
+ * tibetanClusters("བོད་")
51
+ * → [
52
+ * { text: "བོ", cpCount: 2 }, // base + naro vowel
53
+ * { text: "ད", cpCount: 1 }, // standalone base
54
+ * { text: "་", cpCount: 1 }, // tsheg — not combining
55
+ * ]
56
+ */
57
+ export function tibetanClusters(str) {
58
+ const codepoints = [...str]; // spread handles surrogate pairs correctly
59
+ const clusters = [];
60
+ let currentText = '';
61
+ let currentCount = 0;
62
+
63
+ for (const cp of codepoints) {
64
+ const code = cp.codePointAt(0);
65
+ if (isTibetanCombining(code) && currentCount > 0) {
66
+ currentText += cp;
67
+ currentCount++;
68
+ } else {
69
+ if (currentCount > 0) {
70
+ clusters.push({ text: currentText, cpCount: currentCount });
71
+ }
72
+ currentText = cp;
73
+ currentCount = 1;
74
+ }
75
+ }
76
+
77
+ if (currentCount > 0) {
78
+ clusters.push({ text: currentText, cpCount: currentCount });
79
+ }
80
+
81
+ return clusters;
82
+ }
83
+
84
+ export default { isTibetanCombining, tibetanClusters };
@@ -0,0 +1,123 @@
1
+ /* ==========================================================================
2
+ terma-clusters.js v0.2.0
3
+ Tibetan grapheme clustering utilities for termaUI.
4
+
5
+ Prevents dotted-circle artifacts (◌) when Tibetan text is split into
6
+ individual codepoints for per-character DOM rendering — typing tutors,
7
+ diff views, search highlights, animated text, syntax highlighters.
8
+
9
+ Root cause: Tibetan vowel signs and subjoined consonants are Unicode
10
+ combining marks. Placed in a DOM element without their base character,
11
+ fonts render them on a dotted-circle placeholder. Grapheme clustering
12
+ groups each base + its combining marks into a single DOM element.
13
+
14
+ Usage (browser — sets window.termaClusters global):
15
+ <script src="https://cdn.jsdelivr.net/npm/termaui/dist/terma-clusters.js"></script>
16
+ <script>
17
+ const clusters = termaClusters.tibetanClusters("བོད་");
18
+ // → [
19
+ // { text: "བོ", cpCount: 2 }, // base + naro vowel
20
+ // { text: "ད", cpCount: 1 }, // standalone base
21
+ // { text: "་", cpCount: 1 }, // tsheg (not combining)
22
+ // ]
23
+ </script>
24
+
25
+ Usage (bundler — ESM):
26
+ import { isTibetanCombining, tibetanClusters } from 'termaui/dist/terma-clusters.esm.js';
27
+ ========================================================================== */
28
+
29
+ const termaClusters = (() => {
30
+ 'use strict';
31
+
32
+ /**
33
+ * isTibetanCombining(code)
34
+ *
35
+ * Returns true if the given Unicode codepoint (integer) is a Tibetan
36
+ * combining mark — a character that must attach to a preceding base
37
+ * character to render correctly.
38
+ *
39
+ * Covered ranges:
40
+ * U+0F71–U+0F84 vowel signs, virama (ི ུ ེ ོ etc.)
41
+ * U+0F86–U+0F87 sign marks (྆ ྇)
42
+ * U+0F90–U+0FBC subjoined consonants (ྐ ྒ ྲ ྱ ླ ྷ etc.)
43
+ * U+0F35 ngas bzung nyi zla (༵)
44
+ * U+0F37 ngas bzung snam bu (༷)
45
+ * U+0F39 tsa-phru lenition mark (༹)
46
+ * U+0F7E anusvara — rje su nga ro (ཾ)
47
+ * U+0F7F visarga — rnam bcad (ཿ)
48
+ */
49
+ function isTibetanCombining(code) {
50
+ return (
51
+ (code >= 0x0F71 && code <= 0x0F84) ||
52
+ (code >= 0x0F86 && code <= 0x0F87) ||
53
+ (code >= 0x0F90 && code <= 0x0FBC) ||
54
+ code === 0x0F35 || code === 0x0F37 || code === 0x0F39 ||
55
+ code === 0x0F7E || code === 0x0F7F
56
+ );
57
+ }
58
+
59
+ /**
60
+ * tibetanClusters(str)
61
+ *
62
+ * Splits a Tibetan string into grapheme clusters. Each cluster is one
63
+ * base character followed by zero or more combining marks.
64
+ *
65
+ * Returns an array of objects:
66
+ * text {string} — the cluster (one or more codepoints)
67
+ * cpCount {number} — number of Unicode codepoints in this cluster
68
+ *
69
+ * Example:
70
+ * tibetanClusters("བོད་")
71
+ * → [
72
+ * { text: "བོ", cpCount: 2 }, // བ + ོ (base + naro vowel)
73
+ * { text: "ད", cpCount: 1 }, // standalone base
74
+ * { text: "་", cpCount: 1 }, // tsheg — not a combining mark
75
+ * ]
76
+ *
77
+ * Wrap each cluster in a <span class="tr-cluster"> rather than wrapping
78
+ * individual codepoints, to prevent dotted-circle artifacts:
79
+ *
80
+ * BROKEN: <span>བ</span><span>ོ</span> → བ + ◌ོ
81
+ * CORRECT: <span class="tr-cluster">བོ</span> → བོ
82
+ */
83
+ function tibetanClusters(str) {
84
+ const codepoints = [...str]; // spread handles surrogate pairs correctly
85
+ const clusters = [];
86
+ let currentText = '';
87
+ let currentCount = 0;
88
+
89
+ for (const cp of codepoints) {
90
+ const code = cp.codePointAt(0);
91
+ if (isTibetanCombining(code) && currentCount > 0) {
92
+ // Attach combining mark to the current cluster
93
+ currentText += cp;
94
+ currentCount++;
95
+ } else {
96
+ // Flush previous cluster, start a new one
97
+ if (currentCount > 0) {
98
+ clusters.push({ text: currentText, cpCount: currentCount });
99
+ }
100
+ currentText = cp;
101
+ currentCount = 1;
102
+ }
103
+ }
104
+
105
+ if (currentCount > 0) {
106
+ clusters.push({ text: currentText, cpCount: currentCount });
107
+ }
108
+
109
+ return clusters;
110
+ }
111
+
112
+ return { isTibetanCombining, tibetanClusters };
113
+ })();
114
+
115
+ // Expose as window.termaClusters in browser environments
116
+ if (typeof window !== 'undefined') {
117
+ window.termaClusters = termaClusters;
118
+ }
119
+
120
+ // Export for module environments (CommonJS / Node)
121
+ if (typeof module !== 'undefined' && module.exports) {
122
+ module.exports = termaClusters;
123
+ }
package/dist/terma.esm.js CHANGED
@@ -1,15 +1,19 @@
1
1
  /* ==========================================================================
2
- terma.js v0.1.0 — ESM Module
2
+ terma.js v0.2.0 — ESM Module
3
3
  Tibetan text processing utilities for termaUI.
4
- Handles line-breaking, justification, and punctuation protection
5
- that CSS alone cannot solve.
4
+ Handles line-breaking, justification, punctuation protection, and
5
+ grapheme clustering to prevent dotted-circle artifacts.
6
6
 
7
7
  Usage (bundler / import statement):
8
8
  import terma from 'termaui';
9
9
  terma.prepareAll();
10
10
 
11
11
  Named imports also work:
12
- import { prepare, prepareAll, normalize, normalizeAll } from 'termaui';
12
+ import { prepare, prepareAll, cluster, clusterAll, normalize, normalizeAll } from 'termaui';
13
+
14
+ Auto-clustering: if window.termaClusters is available (terma-clusters.js
15
+ loaded), prepare() automatically wraps combining marks into .tr-cluster
16
+ spans — no extra code required.
13
17
  ========================================================================== */
14
18
 
15
19
  'use strict';
@@ -23,6 +27,71 @@ const GTER_MA = '\u0F14'; // ༔ terma sign
23
27
  const VISARGA = '\u0F7F'; // ཿ visarga
24
28
  const ZWS = '\u200B'; // zero-width space (break opportunity)
25
29
  const WJ = '\u2060'; // word joiner (prevents break)
30
+ const TIBETAN_RE = /[\u0F00-\u0FFF]/;
31
+
32
+ // ── Head-mark alignment (Layer 2) ──────────────────────────
33
+ const _ascentCache = {};
34
+
35
+ function _measureAscentRatio(fontFamily) {
36
+ if (_ascentCache[fontFamily] !== undefined) return _ascentCache[fontFamily];
37
+ var canvas = document.createElement('canvas');
38
+ var ctx = canvas.getContext('2d');
39
+ ctx.font = '100px ' + fontFamily;
40
+ var m = ctx.measureText('\u0F42');
41
+ if (typeof m.actualBoundingBoxAscent !== 'number') {
42
+ _ascentCache[fontFamily] = null;
43
+ return null;
44
+ }
45
+ var ratio = m.actualBoundingBoxAscent / 100;
46
+ _ascentCache[fontFamily] = ratio;
47
+ return ratio;
48
+ }
49
+
50
+ function _alignHeadMarks(el) {
51
+ if (typeof document === 'undefined') return;
52
+
53
+ var ps = getComputedStyle(el);
54
+ var pSize = parseFloat(ps.fontSize);
55
+ var pRatio = _measureAscentRatio(ps.fontFamily);
56
+ if (pRatio === null) return;
57
+
58
+ var spans = el.querySelectorAll(
59
+ 'span[class*="tr-text-"], span[style*="font-size"]'
60
+ );
61
+
62
+ for (var i = 0; i < spans.length; i++) {
63
+ var span = spans[i];
64
+ if (span.classList.contains('tr-mixed')) continue;
65
+ if (span.classList.contains('tr-cluster')) continue;
66
+
67
+ if (!TIBETAN_RE.test(span.textContent)) {
68
+ span.classList.add('tt-latin');
69
+ span.style.verticalAlign = 'baseline';
70
+ span.style.lineHeight = '';
71
+ continue;
72
+ }
73
+
74
+ span.classList.remove('tt-latin');
75
+
76
+ var ss = getComputedStyle(span);
77
+ var sSize = parseFloat(ss.fontSize);
78
+
79
+ if (Math.abs(pSize - sSize) < 0.5) {
80
+ span.style.verticalAlign = '';
81
+ span.style.lineHeight = '';
82
+ continue;
83
+ }
84
+
85
+ var sRatio = _measureAscentRatio(ss.fontFamily);
86
+ if (sRatio === null) continue;
87
+
88
+ var offset = pRatio * pSize - sRatio * sSize;
89
+ span.style.verticalAlign = offset.toFixed(2) + 'px';
90
+ span.style.lineHeight = '1';
91
+ }
92
+
93
+ el.dataset.termaAligned = 'true';
94
+ }
26
95
 
27
96
  /**
28
97
  * Walk all text nodes inside an element.
@@ -37,49 +106,72 @@ function walkTextNodes(el, callback) {
37
106
  }
38
107
  }
39
108
 
109
+ /**
110
+ * _clusterTextNodes(el) [private]
111
+ *
112
+ * Walks all text nodes inside el that contain Tibetan text with
113
+ * combining marks and replaces each with a series of
114
+ * <span class="tr-cluster"> elements — one per grapheme cluster.
115
+ */
116
+ function _clusterTextNodes(el) {
117
+ const walker = document.createTreeWalker(el, NodeFilter.SHOW_TEXT);
118
+ const nodes = [];
119
+ while (walker.nextNode()) nodes.push(walker.currentNode);
120
+
121
+ for (const node of nodes) {
122
+ const text = node.textContent;
123
+ if (!text || !TIBETAN_RE.test(text)) continue;
124
+
125
+ const clusters = window.termaClusters.tibetanClusters(text);
126
+ if (!clusters.some(c => c.cpCount > 1)) continue;
127
+
128
+ const frag = document.createDocumentFragment();
129
+ for (const c of clusters) {
130
+ const span = document.createElement('span');
131
+ span.className = 'tr-cluster';
132
+ span.textContent = c.text;
133
+ frag.appendChild(span);
134
+ }
135
+ node.parentNode.replaceChild(frag, node);
136
+ }
137
+ }
138
+
40
139
  /**
41
140
  * prepare(element)
42
141
  *
43
142
  * Processes Tibetan text inside the given element:
44
143
  *
45
- * 1. Replaces tsheg before any clause-ending mark with non-breaking tsheg (༌)
46
- * to prevent bad breaks. Covers:
47
- * - shad (། U+0F0D) — standard clause marker
48
- * - nyis-shad (༎ U+0F0E) — double clause marker
49
- * - gter-ma (༔ U+0F14) — terma / treasure-text sign
50
- * Example: ང་། → ང༌། | ར་༎ → ར༌༎ | ར་༔ → ར༌༔
144
+ * 0. Auto-clusters combining marks into .tr-cluster spans (if
145
+ * window.termaClusters is available) — prevents dotted-circle artifacts.
51
146
  *
52
- * 2. Wraps double-shad sequences (། །) with word-joiners so they
53
- * never split across lines.
147
+ * 1. Replaces tsheg before clause-ending marks with non-breaking tsheg (༌).
148
+ * Covers shad (།), nyis-shad (༎), gter-ma (༔).
54
149
  *
55
- * 3. Inserts zero-width spaces after tsheg (་) to give browsers
56
- * proper line-break opportunities — the #1 Tibetan rendering fix.
150
+ * 2. Wraps double-shad sequences (། ) with word-joiners.
57
151
  *
58
- * Call this after the DOM is ready. Safe to call multiple times —
59
- * already-processed elements are skipped.
152
+ * 3. Inserts zero-width spaces after tsheg for line-break opportunities.
153
+ *
154
+ * Safe to call multiple times — already-processed elements are skipped.
60
155
  */
61
156
  export function prepare(el) {
62
157
  if (!el) return;
63
158
  if (el.dataset.termaPrepared) return;
64
159
 
160
+ // Steps 1–3: text-node mutations first — regexes need tsheg and following
161
+ // punctuation in the same text node, which clustering would split apart.
65
162
  walkTextNodes(el, (node) => {
66
163
  let text = node.textContent;
67
164
 
68
- // Step 1: Replace tsheg before clause-ending marks with non-breaking tsheg.
69
- // Covers shad (།), nyis-shad (༎), and gter-ma (༔).
70
- // The captured group ($1) preserves whichever mark was present.
71
165
  text = text.replace(
72
166
  new RegExp(TSHEG + '([' + SHAD + NYIS_SHAD + GTER_MA + '])', 'g'),
73
167
  TSHEG_NONBREAK + '$1'
74
168
  );
75
169
 
76
- // Step 2: Protect double-shad from splitting
77
170
  text = text.replace(
78
171
  new RegExp(SHAD + ' ' + SHAD, 'g'),
79
172
  SHAD + WJ + ' ' + WJ + SHAD
80
173
  );
81
174
 
82
- // Step 3: Insert zero-width space after tsheg for line-break opportunities
83
175
  text = text.replace(
84
176
  new RegExp(TSHEG + '(?!' + ZWS + ')', 'g'),
85
177
  TSHEG + ZWS
@@ -90,6 +182,15 @@ export function prepare(el) {
90
182
  }
91
183
  });
92
184
 
185
+ // Step 4: auto-cluster after text fixes — splitting before would prevent
186
+ // the tsheg→non-breaking-tsheg regex from seeing adjacent characters.
187
+ if (typeof window !== 'undefined' && window.termaClusters) {
188
+ _clusterTextNodes(el);
189
+ }
190
+
191
+ // Step 5: head-mark alignment
192
+ _alignHeadMarks(el);
193
+
93
194
  el.dataset.termaPrepared = 'true';
94
195
  }
95
196
 
@@ -104,6 +205,74 @@ export function prepareAll(selector) {
104
205
  document.querySelectorAll(sel).forEach(prepare);
105
206
  }
106
207
 
208
+ /**
209
+ * prepareEditable(element)
210
+ *
211
+ * Prepares a contenteditable element for live Tibetan text entry.
212
+ * Calls prepare() immediately, then re-applies it on every input event
213
+ * so ZWS insertions survive further typing.
214
+ *
215
+ * Use instead of prepare() for <div contenteditable> typing tutors,
216
+ * Tibetan note-taking UIs, or any live-edit Tibetan surface.
217
+ * Pair with [contenteditable][lang="bo"] { white-space: pre-wrap; }
218
+ * (included in termaui.css by default) to prevent ZWS collapse.
219
+ *
220
+ * Safe to call multiple times on the same element — already-attached
221
+ * listeners are skipped.
222
+ */
223
+ export function prepareEditable(el) {
224
+ if (!el) return;
225
+ if (el.dataset.termaEditable) return;
226
+ prepare(el);
227
+ let debounce;
228
+ el.addEventListener('input', () => {
229
+ clearTimeout(debounce);
230
+ debounce = setTimeout(() => {
231
+ delete el.dataset.termaPrepared;
232
+ prepare(el);
233
+ }, 150);
234
+ });
235
+ el.dataset.termaEditable = 'true';
236
+ }
237
+
238
+ /**
239
+ * prepareAllEditables(selector?)
240
+ *
241
+ * Convenience: prepareEditable() on all [contenteditable][lang="bo"]
242
+ * elements, or all elements matching the given selector.
243
+ */
244
+ export function prepareAllEditables(selector) {
245
+ const sel = selector || '[contenteditable][lang="bo"]';
246
+ document.querySelectorAll(sel).forEach(prepareEditable);
247
+ }
248
+
249
+ /**
250
+ * cluster(element)
251
+ *
252
+ * Explicitly applies grapheme clustering to all Tibetan text nodes
253
+ * inside the element. Requires window.termaClusters to be available.
254
+ *
255
+ * Note: prepare() calls this automatically when terma-clusters.js is loaded.
256
+ */
257
+ export function cluster(el) {
258
+ if (!el) return;
259
+ if (typeof window === 'undefined' || !window.termaClusters) return;
260
+ if (el.dataset.termaClustered) return;
261
+ _clusterTextNodes(el);
262
+ el.dataset.termaClustered = 'true';
263
+ }
264
+
265
+ /**
266
+ * clusterAll(selector?)
267
+ *
268
+ * Convenience: cluster all [lang="bo"] elements on the page,
269
+ * or all elements matching the given selector.
270
+ */
271
+ export function clusterAll(selector) {
272
+ const sel = selector || '[lang="bo"]';
273
+ document.querySelectorAll(sel).forEach(cluster);
274
+ }
275
+
107
276
  /**
108
277
  * normalize(element)
109
278
  *
@@ -134,6 +303,51 @@ export function normalizeAll(selector) {
134
303
  document.querySelectorAll(sel).forEach(normalize);
135
304
  }
136
305
 
137
- const terma = { prepare, prepareAll, normalize, normalizeAll };
306
+ /**
307
+ * alignHeadMarks(element)
308
+ *
309
+ * Applies pixel-perfect Tibetan head-mark alignment to sized spans.
310
+ * Note: prepare() calls this automatically.
311
+ */
312
+ export function alignHeadMarks(el) {
313
+ if (!el) return;
314
+ _alignHeadMarks(el);
315
+ }
316
+
317
+ /**
318
+ * alignHeadMarksAll(selector?)
319
+ *
320
+ * Convenience: alignHeadMarks on all [lang="bo"] elements.
321
+ */
322
+ export function alignHeadMarksAll(selector) {
323
+ const sel = selector || '[lang="bo"]';
324
+ document.querySelectorAll(sel).forEach(alignHeadMarks);
325
+ }
326
+
327
+ /**
328
+ * measureAscentRatio(fontFamily)
329
+ *
330
+ * Returns the ascent ratio for a font family via Canvas TextMetrics.
331
+ */
332
+ export function measureAscentRatio(fontFamily) {
333
+ return _measureAscentRatio(fontFamily);
334
+ }
335
+
336
+ // Re-align after webfont swap
337
+ if (typeof document !== 'undefined' && document.fonts) {
338
+ document.fonts.ready.then(function () {
339
+ for (var k in _ascentCache) delete _ascentCache[k];
340
+ var aligned = document.querySelectorAll('[data-terma-aligned]');
341
+ for (var i = 0; i < aligned.length; i++) {
342
+ _alignHeadMarks(aligned[i]);
343
+ }
344
+ });
345
+ }
346
+
347
+ const terma = {
348
+ prepare, prepareAll, prepareEditable, prepareAllEditables,
349
+ cluster, clusterAll, normalize, normalizeAll,
350
+ alignHeadMarks, alignHeadMarksAll, measureAscentRatio
351
+ };
138
352
 
139
353
  export default terma;