utfu 0.3.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.editorconfig +10 -0
- package/.eslintrc +57 -0
- package/.github/workflows/ci.yml +28 -0
- package/.oxfmtrc.json +14 -0
- package/.oxlintrc.json +33 -0
- package/.vscode/settings.json +5 -0
- package/CHANGELOG.md +16 -0
- package/CONTRIBUTING.md +127 -0
- package/LICENSE +2 -2
- package/README.md +7 -4
- package/package.json +23 -42
- package/pnpm-workspace.yaml +2 -0
- package/scripts/verify-mappings.ts +98 -0
- package/scripts/write-dist-package.js +40 -0
- package/src/index.ts +35 -0
- package/src/mappings.ts +2826 -0
- package/src/types.d.ts +4 -0
- package/tests/dist-package.test.ts +21 -0
- package/tests/edge-cases.test.ts +26 -0
- package/tests/fixtures/utf8-debug.html +1025 -0
- package/tests/index.test.ts +31 -0
- package/tests/verify-mappings.test.ts +117 -0
- package/tsconfig.json +29 -0
- package/vitest.config.ts +9 -0
- package/dist-node/index.js +0 -2635
- package/dist-node/index.js.map +0 -1
- package/dist-src/index.js +0 -38
- package/dist-src/mappings.js +0 -2588
- package/dist-types/index.d.ts +0 -3
- package/dist-types/mappings.d.ts +0 -13
- package/dist-web/index.js +0 -2627
- package/dist-web/index.js.map +0 -1
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import * as he from 'he';
|
|
2
|
+
import { describe, expect, test } from 'vitest';
|
|
3
|
+
|
|
4
|
+
import { hex, htx, txt } from '../src/index';
|
|
5
|
+
import mappings from '../src/mappings.js';
|
|
6
|
+
|
|
7
|
+
describe('mappings', () => {
|
|
8
|
+
mappings.forEach((mapping: any) => {
|
|
9
|
+
const str = mapping.misrender.chars;
|
|
10
|
+
test(`hex: replace ${str} with ${mapping.utf8.hex}`, () => {
|
|
11
|
+
expect(hex(str)).toBe(mapping.utf8.hex);
|
|
12
|
+
});
|
|
13
|
+
});
|
|
14
|
+
|
|
15
|
+
mappings.forEach((mapping: any) => {
|
|
16
|
+
const str = mapping.misrender.chars;
|
|
17
|
+
const html = he.encode(mapping.utf8.chars);
|
|
18
|
+
|
|
19
|
+
test(`htx: replace ${str} with ${html}`, () => {
|
|
20
|
+
expect(htx(str)).toBe(html);
|
|
21
|
+
});
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
mappings.forEach((mapping: any) => {
|
|
25
|
+
const str = mapping.misrender.chars;
|
|
26
|
+
|
|
27
|
+
test(`txt: replace ${str} with ${mapping.utf8.chars}`, () => {
|
|
28
|
+
expect(txt(str)).toBe(mapping.utf8.chars);
|
|
29
|
+
});
|
|
30
|
+
});
|
|
31
|
+
});
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
import { existsSync, promises as fsPromises } from 'fs';
|
|
2
|
+
import * as he from 'he';
|
|
3
|
+
import { resolve } from 'path';
|
|
4
|
+
import { expect, test } from 'vitest';
|
|
5
|
+
import mappings from '../src/mappings.js';
|
|
6
|
+
|
|
7
|
+
function normalize(s: unknown) {
|
|
8
|
+
if (s == null) return '';
|
|
9
|
+
let out = he.decode(String(s));
|
|
10
|
+
out = out.replace(/\u00A0/g, ' ');
|
|
11
|
+
out = out.replace(/\s+/g, ' ').trim();
|
|
12
|
+
return out;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
function parseChartFromHtml(html: string) {
|
|
16
|
+
const rows: string[][] = [];
|
|
17
|
+
const trRe = /<tr[\s\S]*?<\/tr>/gi;
|
|
18
|
+
const tdRe = /<t[dh][^>]*>([\s\S]*?)<\/(?:t[dh])>/gi;
|
|
19
|
+
|
|
20
|
+
let tr: RegExpExecArray | null;
|
|
21
|
+
while ((tr = trRe.exec(html))) {
|
|
22
|
+
const trHtml = tr[0];
|
|
23
|
+
const cols: string[] = [];
|
|
24
|
+
let td: RegExpExecArray | null;
|
|
25
|
+
tdRe.lastIndex = 0;
|
|
26
|
+
while ((td = tdRe.exec(trHtml))) {
|
|
27
|
+
cols.push(normalize(td[1].replace(/<[^>]+>/g, ' ')));
|
|
28
|
+
}
|
|
29
|
+
if (cols.length) rows.push(cols);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
const map = new Map<string, { expected: string; mis: string }>();
|
|
33
|
+
for (const cols of rows) {
|
|
34
|
+
if (cols.length >= 4) {
|
|
35
|
+
const rawCode = (cols[0] || '').toUpperCase().replace(/\s+/g, '');
|
|
36
|
+
const expected = cols[2];
|
|
37
|
+
const mis = cols[3];
|
|
38
|
+
|
|
39
|
+
// Normalize a variety of possible code formats into U+XXXX
|
|
40
|
+
// e.g. "U+00C0", "00C0", "0x00C0", "À"
|
|
41
|
+
let hex: string | null = null;
|
|
42
|
+
const uMatch = rawCode.match(/^U\+([0-9A-F]{1,6})$/i);
|
|
43
|
+
const xMatch = rawCode.match(/^0X([0-9A-F]{1,6})$/i);
|
|
44
|
+
const plainMatch = rawCode.match(/^([0-9A-F]{1,6})$/i);
|
|
45
|
+
const htmlEntityMatch = rawCode.match(/#X?X?([0-9A-F]{1,6})/i) || rawCode.match(/&#x([0-9A-F]+);/i);
|
|
46
|
+
if (uMatch) hex = uMatch[1];
|
|
47
|
+
else if (xMatch) hex = xMatch[1];
|
|
48
|
+
else if (plainMatch) hex = plainMatch[1];
|
|
49
|
+
else if (htmlEntityMatch) hex = htmlEntityMatch[1];
|
|
50
|
+
|
|
51
|
+
if (hex) {
|
|
52
|
+
hex = hex.padStart(4, '0').toUpperCase();
|
|
53
|
+
map.set('U+' + hex, { expected, mis });
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
return map;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
test('mappings match i18nqa utf8-debug chart', async () => {
|
|
61
|
+
const snapshotPath = resolve(__dirname, 'fixtures', 'utf8-debug.html');
|
|
62
|
+
let html: string;
|
|
63
|
+
if (existsSync(snapshotPath)) {
|
|
64
|
+
// Prefer a checked-in snapshot in CI to avoid network fragility
|
|
65
|
+
html = await fsPromises.readFile(snapshotPath, 'utf8');
|
|
66
|
+
} else {
|
|
67
|
+
const url = 'https://www.i18nqa.com/debug/utf8-debug.html';
|
|
68
|
+
const res = await fetch(url);
|
|
69
|
+
if (!res.ok) throw new Error(`Fetch failed: ${res.status}`);
|
|
70
|
+
html = await res.text();
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
const chart = parseChartFromHtml(html);
|
|
74
|
+
const mismatches: Array<any> = [];
|
|
75
|
+
// Build a lookup of our mappings by codepoint (U+XXXX)
|
|
76
|
+
const mappingByCp = new Map<string, any>();
|
|
77
|
+
for (const entry of mappings) {
|
|
78
|
+
const utfChar = entry.utf8.chars;
|
|
79
|
+
const cp = utfChar && utfChar.length ? utfChar.codePointAt(0) : null;
|
|
80
|
+
const cpStr = cp ? 'U+' + cp.toString(16).toUpperCase().padStart(4, '0') : null;
|
|
81
|
+
if (cpStr) mappingByCp.set(cpStr, entry);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
// Iterate the i18nqa chart and ensure we cover each listed codepoint
|
|
85
|
+
for (const pair of Array.from(chart)) {
|
|
86
|
+
const cpStr = pair[0];
|
|
87
|
+
const chartEntry = pair[1];
|
|
88
|
+
const chartExpectedNorm = normalize(chartEntry.expected);
|
|
89
|
+
// skip empty/placeholder expected values (e.g. rows that only show NBSP)
|
|
90
|
+
if (!chartExpectedNorm) continue;
|
|
91
|
+
|
|
92
|
+
if (!mappingByCp.has(cpStr)) {
|
|
93
|
+
mismatches.push({
|
|
94
|
+
cp: cpStr,
|
|
95
|
+
reason: 'chart-codepoint-missing-in-mappings',
|
|
96
|
+
expected: chartEntry.expected,
|
|
97
|
+
mis: chartEntry.mis,
|
|
98
|
+
});
|
|
99
|
+
continue;
|
|
100
|
+
}
|
|
101
|
+
const entry = mappingByCp.get(cpStr);
|
|
102
|
+
const chartExpected = chartExpectedNorm;
|
|
103
|
+
const ourExpected = normalize(entry.utf8.chars);
|
|
104
|
+
if (chartExpected !== ourExpected) {
|
|
105
|
+
mismatches.push({
|
|
106
|
+
cp: cpStr,
|
|
107
|
+
reason: 'char-mismatch',
|
|
108
|
+
expected: chartExpected,
|
|
109
|
+
our: ourExpected,
|
|
110
|
+
mis: entry.misrender.chars,
|
|
111
|
+
});
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// If there are mismatches, include them in the failure message for debugging.
|
|
116
|
+
expect(mismatches).toEqual([]);
|
|
117
|
+
}, 30_000);
|
package/tsconfig.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
{
|
|
2
|
+
"compilerOptions": {
|
|
3
|
+
"target": "ES2020",
|
|
4
|
+
"module": "ES2020",
|
|
5
|
+
"moduleResolution": "node",
|
|
6
|
+
"declaration": true,
|
|
7
|
+
"declarationDir": "types",
|
|
8
|
+
"outDir": "lib",
|
|
9
|
+
"strict": true,
|
|
10
|
+
"types": [
|
|
11
|
+
"vitest/globals",
|
|
12
|
+
"node",
|
|
13
|
+
"he"
|
|
14
|
+
],
|
|
15
|
+
"esModuleInterop": true,
|
|
16
|
+
"allowJs": true,
|
|
17
|
+
"skipLibCheck": true,
|
|
18
|
+
"forceConsistentCasingInFileNames": true
|
|
19
|
+
},
|
|
20
|
+
"include": [
|
|
21
|
+
"src/**/*",
|
|
22
|
+
"scripts/**/*"
|
|
23
|
+
],
|
|
24
|
+
"exclude": [
|
|
25
|
+
"node_modules",
|
|
26
|
+
"coverage",
|
|
27
|
+
"dist"
|
|
28
|
+
]
|
|
29
|
+
}
|