termaui 0.1.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/LICENSE +50 -0
  2. package/README.md +11 -3
  3. package/dist/terma-clusters.esm.js +84 -0
  4. package/dist/terma-clusters.js +123 -0
  5. package/dist/terma.esm.js +236 -22
  6. package/dist/terma.js +273 -20
  7. package/dist/termaui.css +115 -60
  8. package/dist/termaui.min.css +1 -1
  9. package/package.json +17 -6
  10. package/fonts/babelstone-tibetan-slim.woff2 +0 -0
  11. package/fonts/babelstone-tibetan.woff2 +0 -0
  12. package/fonts/ddc-rinzin.woff2 +0 -0
  13. package/fonts/ddc-uchen.woff2 +0 -0
  14. package/fonts/gangjie-drutsa.woff2 +0 -0
  15. package/fonts/gangjie-uchen.woff2 +0 -0
  16. package/fonts/jamyang-monlam-uchen.woff2 +0 -0
  17. package/fonts/joyig.woff2 +0 -0
  18. package/fonts/khampa-dedri-bechu.woff2 +0 -0
  19. package/fonts/khampa-dedri-chuyig.woff2 +0 -0
  20. package/fonts/khampa-dedri-drutsa.woff2 +0 -0
  21. package/fonts/misans-tibetan.woff2 +0 -0
  22. package/fonts/monlam-uni-dutsa1.woff2 +0 -0
  23. package/fonts/monlam-uni-dutsa2.woff2 +0 -0
  24. package/fonts/monlam-uni-ouchan1.woff2 +0 -0
  25. package/fonts/monlam-uni-ouchan2.woff2 +0 -0
  26. package/fonts/monlam-uni-ouchan3.woff2 +0 -0
  27. package/fonts/monlam-uni-ouchan4.woff2 +0 -0
  28. package/fonts/monlam-uni-ouchan5.woff2 +0 -0
  29. package/fonts/monlam-uni-paytsik.woff2 +0 -0
  30. package/fonts/monlam-uni-sans.woff2 +0 -0
  31. package/fonts/monlam-uni-tikrang.woff2 +0 -0
  32. package/fonts/monlam-uni-tiktong.woff2 +0 -0
  33. package/fonts/noto-sans-tibetan.woff2 +0 -0
  34. package/fonts/noto-serif-tibetan-black.woff2 +0 -0
  35. package/fonts/noto-serif-tibetan-bold.woff2 +0 -0
  36. package/fonts/noto-serif-tibetan-extrabold.woff2 +0 -0
  37. package/fonts/noto-serif-tibetan-extralight.woff2 +0 -0
  38. package/fonts/noto-serif-tibetan-light.woff2 +0 -0
  39. package/fonts/noto-serif-tibetan-medium.woff2 +0 -0
  40. package/fonts/noto-serif-tibetan-regular.woff2 +0 -0
  41. package/fonts/noto-serif-tibetan-semibold.woff2 +0 -0
  42. package/fonts/noto-serif-tibetan-thin.woff2 +0 -0
  43. package/fonts/panchen-tsukring.woff2 +0 -0
  44. package/fonts/qomolangma-betsu.woff2 +0 -0
  45. package/fonts/qomolangma-tsutong.woff2 +0 -0
  46. package/fonts/riwoche-dhodri-yigchen.woff2 +0 -0
  47. package/fonts/sadri-drutsa.woff2 +0 -0
  48. package/fonts/sadri-yigchen.woff2 +0 -0
  49. package/fonts/tibetan-machine-uni.woff2 +0 -0
package/dist/terma.js CHANGED
@@ -1,13 +1,17 @@
1
1
  /* ==========================================================================
2
- terma.js v0.1.0
2
+ terma.js v0.2.0
3
3
  Tibetan text processing utilities for termaUI.
4
- Handles line-breaking, justification, and punctuation protection
5
- that CSS alone cannot solve. Required for correct Tibetan line breaking.
4
+ Handles line-breaking, justification, punctuation protection, and
5
+ grapheme clustering to prevent dotted-circle artifacts.
6
6
 
7
7
  Usage (browser script tag — sets window.terma global):
8
8
  <script src="https://cdn.jsdelivr.net/npm/termaui/dist/terma.js"></script>
9
9
  <script>terma.prepareAll();</script>
10
10
 
11
+ Auto-clustering: if terma-clusters.js is also loaded, terma.prepare()
12
+ automatically wraps combining marks into .tr-cluster spans — no
13
+ extra code required.
14
+
11
15
  Usage (bundler — import from package):
12
16
  See terma.esm.js for ESM named/default exports.
13
17
  ========================================================================== */
@@ -24,6 +28,99 @@ const terma = (() => {
24
28
  const VISARGA = '\u0F7F'; // ཿ visarga
25
29
  const ZWS = '\u200B'; // zero-width space (break opportunity)
26
30
  const WJ = '\u2060'; // word joiner (prevents break)
31
+ const TIBETAN_RE = /[\u0F00-\u0FFF]/;
32
+
33
+ // ── Head-mark alignment (Layer 2) ──────────────────────────
34
+ const _ascentCache = {};
35
+
36
+ /**
37
+ * _measureAscentRatio(fontFamily) [private]
38
+ *
39
+ * Uses Canvas TextMetrics to measure the ascent ratio of a Tibetan font.
40
+ * The ratio is: actualBoundingBoxAscent / fontSize.
41
+ * Results are cached per fontFamily string.
42
+ * Returns a number (typically 0.7–0.9 for Tibetan fonts), or null
43
+ * if TextMetrics is not available (old browsers → CSS fallback only).
44
+ */
45
+ function _measureAscentRatio(fontFamily) {
46
+ if (_ascentCache[fontFamily] !== undefined) return _ascentCache[fontFamily];
47
+ var canvas = document.createElement('canvas');
48
+ var ctx = canvas.getContext('2d');
49
+ ctx.font = '100px ' + fontFamily;
50
+ var m = ctx.measureText('\u0F42'); // ག ga — clear head mark
51
+ if (typeof m.actualBoundingBoxAscent !== 'number') {
52
+ _ascentCache[fontFamily] = null;
53
+ return null;
54
+ }
55
+ var ratio = m.actualBoundingBoxAscent / 100;
56
+ _ascentCache[fontFamily] = ratio;
57
+ return ratio;
58
+ }
59
+
60
+ /**
61
+ * _alignHeadMarks(el) [private]
62
+ *
63
+ * Pixel-perfect Tibetan head-mark alignment for sized spans.
64
+ * Scans for span[class*="tr-text-"] and span[style*="font-size"],
65
+ * measures each font's ascent ratio via Canvas, and sets an exact
66
+ * vertical-align offset so all head marks sit on the same line.
67
+ *
68
+ * Formula: offset = parentRatio × parentSize − spanRatio × spanSize
69
+ *
70
+ * Skips: .tr-mixed (own alignment), .tr-cluster (no size),
71
+ * Latin-only spans, same-size spans (within 0.5px).
72
+ */
73
+ function _alignHeadMarks(el) {
74
+ if (typeof document === 'undefined') return;
75
+
76
+ var ps = getComputedStyle(el);
77
+ var pSize = parseFloat(ps.fontSize);
78
+ var pFont = ps.fontFamily;
79
+ var pRatio = _measureAscentRatio(pFont);
80
+ if (pRatio === null) return; // no TextMetrics support → CSS fallback
81
+
82
+ var spans = el.querySelectorAll(
83
+ 'span[class*="tr-text-"], span[style*="font-size"]'
84
+ );
85
+
86
+ for (var i = 0; i < spans.length; i++) {
87
+ var span = spans[i];
88
+
89
+ // Skip .tr-mixed (has its own alignment model)
90
+ if (span.classList.contains('tr-mixed')) continue;
91
+ // Skip .tr-cluster (no size, stays at baseline)
92
+ if (span.classList.contains('tr-cluster')) continue;
93
+
94
+ // Latin detection: no Tibetan Unicode → baseline
95
+ if (!TIBETAN_RE.test(span.textContent)) {
96
+ span.classList.add('tt-latin');
97
+ span.style.verticalAlign = 'baseline';
98
+ span.style.lineHeight = '';
99
+ continue;
100
+ }
101
+
102
+ span.classList.remove('tt-latin');
103
+
104
+ var ss = getComputedStyle(span);
105
+ var sSize = parseFloat(ss.fontSize);
106
+
107
+ // Same-size skip: no alignment needed
108
+ if (Math.abs(pSize - sSize) < 0.5) {
109
+ span.style.verticalAlign = '';
110
+ span.style.lineHeight = '';
111
+ continue;
112
+ }
113
+
114
+ var sRatio = _measureAscentRatio(ss.fontFamily);
115
+ if (sRatio === null) continue;
116
+
117
+ var offset = pRatio * pSize - sRatio * sSize;
118
+ span.style.verticalAlign = offset.toFixed(2) + 'px';
119
+ span.style.lineHeight = '1';
120
+ }
121
+
122
+ el.dataset.termaAligned = 'true';
123
+ }
27
124
 
28
125
  /**
29
126
  * Walk all text nodes inside an element.
@@ -38,11 +135,49 @@ const terma = (() => {
38
135
  }
39
136
  }
40
137
 
138
+ /**
139
+ * _clusterTextNodes(el) [private]
140
+ *
141
+ * Walks all text nodes inside el that contain Tibetan text with
142
+ * combining marks and replaces each with a series of
143
+ * <span class="tr-cluster"> elements — one per grapheme cluster.
144
+ *
145
+ * Called automatically by prepare() when window.termaClusters is loaded.
146
+ * Skips text nodes with no Tibetan content or no combining marks.
147
+ */
148
+ function _clusterTextNodes(el) {
149
+ const walker = document.createTreeWalker(el, NodeFilter.SHOW_TEXT);
150
+ const nodes = [];
151
+ while (walker.nextNode()) nodes.push(walker.currentNode);
152
+
153
+ for (const node of nodes) {
154
+ const text = node.textContent;
155
+ if (!text || !TIBETAN_RE.test(text)) continue;
156
+
157
+ const clusters = window.termaClusters.tibetanClusters(text);
158
+ // Skip if nothing to cluster (no combining marks present)
159
+ if (!clusters.some(c => c.cpCount > 1)) continue;
160
+
161
+ const frag = document.createDocumentFragment();
162
+ for (const c of clusters) {
163
+ const span = document.createElement('span');
164
+ span.className = 'tr-cluster';
165
+ span.textContent = c.text;
166
+ frag.appendChild(span);
167
+ }
168
+ node.parentNode.replaceChild(frag, node);
169
+ }
170
+ }
171
+
41
172
  /**
42
173
  * prepare(element)
43
174
  *
44
175
  * Processes Tibetan text inside the given element:
45
176
  *
177
+ * 0. Auto-clusters combining marks into .tr-cluster spans (if
178
+ * terma-clusters.js is loaded) — prevents dotted-circle artifacts
179
+ * when elements are built from per-codepoint DOM operations.
180
+ *
46
181
  * 1. Replaces tsheg before any clause-ending mark with non-breaking tsheg (༌)
47
182
  * to prevent bad breaks. Covers:
48
183
  * - shad (། U+0F0D) — standard clause marker
@@ -63,26 +198,24 @@ const terma = (() => {
63
198
  if (!el) return;
64
199
  if (el.dataset.termaPrepared) return;
65
200
 
201
+ // Steps 1–3: text-node mutations first — regexes need tsheg and following
202
+ // punctuation in the same text node, which clustering would split apart.
66
203
  walkTextNodes(el, (node) => {
67
204
  let text = node.textContent;
68
205
 
69
206
  // Step 1: Replace tsheg before clause-ending marks with non-breaking tsheg.
70
- // Covers shad (།), nyis-shad (༎), and gter-ma (༔).
71
- // The captured group ($1) preserves whichever mark was present.
72
207
  text = text.replace(
73
208
  new RegExp(TSHEG + '([' + SHAD + NYIS_SHAD + GTER_MA + '])', 'g'),
74
209
  TSHEG_NONBREAK + '$1'
75
210
  );
76
211
 
77
212
  // Step 2: Protect double-shad from splitting
78
- // ། ། → །⁠ ⁠། (word-joiners around the space)
79
213
  text = text.replace(
80
214
  new RegExp(SHAD + ' ' + SHAD, 'g'),
81
215
  SHAD + WJ + ' ' + WJ + SHAD
82
216
  );
83
217
 
84
218
  // Step 3: Insert zero-width space after tsheg for line-break opportunities
85
- // But NOT after non-breaking tsheg (already protected above)
86
219
  text = text.replace(
87
220
  new RegExp(TSHEG + '(?!' + ZWS + ')', 'g'),
88
221
  TSHEG + ZWS
@@ -93,6 +226,16 @@ const terma = (() => {
93
226
  }
94
227
  });
95
228
 
229
+ // Step 4: auto-cluster after text fixes — splitting before would prevent
230
+ // the tsheg→non-breaking-tsheg regex from seeing adjacent characters.
231
+ if (typeof window !== 'undefined' && window.termaClusters) {
232
+ _clusterTextNodes(el);
233
+ }
234
+
235
+ // Step 5: head-mark alignment — pixel-perfect vertical alignment of
236
+ // differently-sized Tibetan spans using Canvas TextMetrics.
237
+ _alignHeadMarks(el);
238
+
96
239
  el.dataset.termaPrepared = 'true';
97
240
  }
98
241
 
@@ -108,22 +251,79 @@ const terma = (() => {
108
251
  }
109
252
 
110
253
  /**
111
- * normalize(element)
254
+ * prepareEditable(element)
112
255
  *
113
- * Applies Unicode NFC normalization to all text nodes inside the element.
256
+ * Prepares a contenteditable element for live Tibetan text entry.
257
+ * Calls prepare() immediately, then re-applies it on every input event
258
+ * so ZWS insertions survive further typing.
114
259
  *
115
- * Why this matters: The same Tibetan glyph can be encoded as different
116
- * Unicode sequences that look identical but are technically different strings.
117
- * For example, a composed "OM" character vs. built from individual parts.
118
- * If your database stores NFC and a user types NFD (or vice versa), searches
119
- * will silently fail. Normalizing to NFC at render time ensures consistent
120
- * string comparison.
260
+ * Use instead of prepare() for <div contenteditable> typing tutors,
261
+ * Tibetan note-taking UIs, or any live-edit Tibetan surface.
262
+ * Pair with [contenteditable][lang="bo"] { white-space: pre-wrap; }
263
+ * (included in termaui.css by default) to prevent ZWS collapse.
121
264
  *
122
- * Note: NFC covers most cases but does not reorder characters that were typed
123
- * in an incorrect logical order (a font/input-method problem). For wrong-order
124
- * stacks that show dotted circles, fix the source data or input method.
265
+ * Safe to call multiple times on the same element already-attached
266
+ * listeners are skipped.
267
+ */
268
+ function prepareEditable(el) {
269
+ if (!el) return;
270
+ if (el.dataset.termaEditable) return;
271
+ prepare(el);
272
+ let debounce;
273
+ el.addEventListener('input', () => {
274
+ clearTimeout(debounce);
275
+ debounce = setTimeout(() => {
276
+ delete el.dataset.termaPrepared;
277
+ prepare(el);
278
+ }, 150);
279
+ });
280
+ el.dataset.termaEditable = 'true';
281
+ }
282
+
283
+ /**
284
+ * prepareAllEditables(selector?)
125
285
  *
126
- * Safe to call multiple times — skips already-normalized elements.
286
+ * Convenience: prepareEditable() on all [contenteditable][lang="bo"]
287
+ * elements, or all elements matching the given selector.
288
+ */
289
+ function prepareAllEditables(selector) {
290
+ const sel = selector || '[contenteditable][lang="bo"]';
291
+ document.querySelectorAll(sel).forEach(prepareEditable);
292
+ }
293
+
294
+ /**
295
+ * cluster(element)
296
+ *
297
+ * Explicitly applies grapheme clustering to all Tibetan text nodes
298
+ * inside the element, wrapping each cluster in a .tr-cluster span.
299
+ * Requires terma-clusters.js to be loaded.
300
+ *
301
+ * Note: prepare() calls this automatically — use cluster() only
302
+ * when you need clustering without the full prepare() pipeline.
303
+ */
304
+ function cluster(el) {
305
+ if (!el) return;
306
+ if (typeof window === 'undefined' || !window.termaClusters) return;
307
+ if (el.dataset.termaClustered) return;
308
+ _clusterTextNodes(el);
309
+ el.dataset.termaClustered = 'true';
310
+ }
311
+
312
+ /**
313
+ * clusterAll(selector?)
314
+ *
315
+ * Convenience: cluster all [lang="bo"] elements on the page,
316
+ * or all elements matching the given selector.
317
+ */
318
+ function clusterAll(selector) {
319
+ const sel = selector || '[lang="bo"]';
320
+ document.querySelectorAll(sel).forEach(cluster);
321
+ }
322
+
323
+ /**
324
+ * normalize(element)
325
+ *
326
+ * Applies Unicode NFC normalization to all text nodes inside the element.
127
327
  */
128
328
  function normalize(el) {
129
329
  if (!el) return;
@@ -150,7 +350,60 @@ const terma = (() => {
150
350
  document.querySelectorAll(sel).forEach(normalize);
151
351
  }
152
352
 
153
- return { prepare, prepareAll, normalize, normalizeAll };
353
+ /**
354
+ * alignHeadMarks(element)
355
+ *
356
+ * Applies pixel-perfect Tibetan head-mark alignment to sized spans
357
+ * inside the element. Uses Canvas TextMetrics to compute exact
358
+ * vertical offsets so all head marks (མགོ་ཅན) sit on the same line.
359
+ *
360
+ * Note: prepare() calls this automatically — use alignHeadMarks()
361
+ * only when you need to re-trigger alignment after dynamic font/size
362
+ * changes without re-running the full prepare() pipeline.
363
+ */
364
+ function alignHeadMarks(el) {
365
+ if (!el) return;
366
+ _alignHeadMarks(el);
367
+ }
368
+
369
+ /**
370
+ * alignHeadMarksAll(selector?)
371
+ *
372
+ * Convenience: alignHeadMarks on all [lang="bo"] elements,
373
+ * or all elements matching the given selector.
374
+ */
375
+ function alignHeadMarksAll(selector) {
376
+ const sel = selector || '[lang="bo"]';
377
+ document.querySelectorAll(sel).forEach(alignHeadMarks);
378
+ }
379
+
380
+ /**
381
+ * measureAscentRatio(fontFamily)
382
+ *
383
+ * Returns the ascent ratio (head-mark position / font-size) for a
384
+ * given font family, measured via Canvas TextMetrics. Cached.
385
+ * Exposed for advanced users building custom alignment logic.
386
+ */
387
+ function measureAscentRatio(fontFamily) {
388
+ return _measureAscentRatio(fontFamily);
389
+ }
390
+
391
+ // Re-align after webfont swap — ascent ratios change from fallback → webfont
392
+ if (typeof document !== 'undefined' && document.fonts) {
393
+ document.fonts.ready.then(function () {
394
+ for (var k in _ascentCache) delete _ascentCache[k];
395
+ var aligned = document.querySelectorAll('[data-terma-aligned]');
396
+ for (var i = 0; i < aligned.length; i++) {
397
+ _alignHeadMarks(aligned[i]);
398
+ }
399
+ });
400
+ }
401
+
402
+ return {
403
+ prepare, prepareAll, prepareEditable, prepareAllEditables,
404
+ cluster, clusterAll, normalize, normalizeAll,
405
+ alignHeadMarks, alignHeadMarksAll, measureAscentRatio
406
+ };
154
407
  })();
155
408
 
156
409
  // Export for module environments