@xterm/addon-unicode-graphemes 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +19 -0
- package/README.md +22 -0
- package/lib/addon-unicode-graphemes.js +2 -0
- package/lib/addon-unicode-graphemes.js.map +1 -0
- package/lib/addon-unicode-graphemes.mjs +44 -0
- package/lib/addon-unicode-graphemes.mjs.map +7 -0
- package/package.json +29 -0
- package/src/UnicodeGraphemeProvider.ts +72 -0
- package/src/UnicodeGraphemesAddon.ts +38 -0
- package/src/third-party/UnicodeProperties.ts +147 -0
- package/src/third-party/tiny-inflate.ts +380 -0
- package/src/third-party/unicode-trie.ts +134 -0
- package/typings/addon-unicode-graphemes.d.ts +14 -0
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
import inflate from './tiny-inflate'
|
|
2
|
+
|
|
3
|
+
// Shift size for getting the index-1 table offset.
|
|
4
|
+
const SHIFT_1 = 6 + 5;
|
|
5
|
+
|
|
6
|
+
// Shift size for getting the index-2 table offset.
|
|
7
|
+
const SHIFT_2 = 5;
|
|
8
|
+
|
|
9
|
+
// Difference between the two shift sizes,
|
|
10
|
+
// for getting an index-1 offset from an index-2 offset. 6=11-5
|
|
11
|
+
const SHIFT_1_2 = SHIFT_1 - SHIFT_2;
|
|
12
|
+
|
|
13
|
+
// Number of index-1 entries for the BMP. 32=0x20
|
|
14
|
+
// This part of the index-1 table is omitted from the serialized form.
|
|
15
|
+
const OMITTED_BMP_INDEX_1_LENGTH = 0x10000 >> SHIFT_1;
|
|
16
|
+
|
|
17
|
+
// Number of entries in an index-2 block. 64=0x40
|
|
18
|
+
const INDEX_2_BLOCK_LENGTH = 1 << SHIFT_1_2;
|
|
19
|
+
|
|
20
|
+
// Mask for getting the lower bits for the in-index-2-block offset. */
|
|
21
|
+
const INDEX_2_MASK = INDEX_2_BLOCK_LENGTH - 1;
|
|
22
|
+
|
|
23
|
+
// Shift size for shifting left the index array values.
|
|
24
|
+
// Increases possible data size with 16-bit index values at the cost
|
|
25
|
+
// of compactability.
|
|
26
|
+
// This requires data blocks to be aligned by DATA_GRANULARITY.
|
|
27
|
+
const INDEX_SHIFT = 2;
|
|
28
|
+
|
|
29
|
+
// Number of entries in a data block. 32=0x20
|
|
30
|
+
const DATA_BLOCK_LENGTH = 1 << SHIFT_2;
|
|
31
|
+
|
|
32
|
+
// Mask for getting the lower bits for the in-data-block offset.
|
|
33
|
+
const DATA_MASK = DATA_BLOCK_LENGTH - 1;
|
|
34
|
+
|
|
35
|
+
// The part of the index-2 table for U+D800..U+DBFF stores values for
|
|
36
|
+
// lead surrogate code _units_ not code _points_.
|
|
37
|
+
// Values for lead surrogate code _points_ are indexed with this portion of the table.
|
|
38
|
+
// Length=32=0x20=0x400>>SHIFT_2. (There are 1024=0x400 lead surrogates.)
|
|
39
|
+
const LSCP_INDEX_2_OFFSET = 0x10000 >> SHIFT_2;
|
|
40
|
+
const LSCP_INDEX_2_LENGTH = 0x400 >> SHIFT_2;
|
|
41
|
+
|
|
42
|
+
// Count the lengths of both BMP pieces. 2080=0x820
|
|
43
|
+
const INDEX_2_BMP_LENGTH = LSCP_INDEX_2_OFFSET + LSCP_INDEX_2_LENGTH;
|
|
44
|
+
|
|
45
|
+
// The 2-byte UTF-8 version of the index-2 table follows at offset 2080=0x820.
|
|
46
|
+
// Length 32=0x20 for lead bytes C0..DF, regardless of SHIFT_2.
|
|
47
|
+
const UTF8_2B_INDEX_2_OFFSET = INDEX_2_BMP_LENGTH;
|
|
48
|
+
const UTF8_2B_INDEX_2_LENGTH = 0x800 >> 6; // U+0800 is the first code point after 2-byte UTF-8
|
|
49
|
+
|
|
50
|
+
// The index-1 table, only used for supplementary code points, at offset 2112=0x840.
|
|
51
|
+
// Variable length, for code points up to highStart, where the last single-value range starts.
|
|
52
|
+
// Maximum length 512=0x200=0x100000>>SHIFT_1.
|
|
53
|
+
// (For 0x100000 supplementary code points U+10000..U+10ffff.)
|
|
54
|
+
//
|
|
55
|
+
// The part of the index-2 table for supplementary code points starts
|
|
56
|
+
// after this index-1 table.
|
|
57
|
+
//
|
|
58
|
+
// Both the index-1 table and the following part of the index-2 table
|
|
59
|
+
// are omitted completely if there is only BMP data.
|
|
60
|
+
const INDEX_1_OFFSET = UTF8_2B_INDEX_2_OFFSET + UTF8_2B_INDEX_2_LENGTH;
|
|
61
|
+
|
|
62
|
+
// The alignment size of a data block. Also the granularity for compaction.
|
|
63
|
+
const DATA_GRANULARITY = 1 << INDEX_SHIFT;
|
|
64
|
+
|
|
65
|
+
const isBigEndian = (new Uint8Array(new Uint32Array([0x12345678]).buffer)[0] === 0x12);
|
|
66
|
+
|
|
67
|
+
class UnicodeTrie {
|
|
68
|
+
private data: Uint32Array;
|
|
69
|
+
private highStart: number;
|
|
70
|
+
private errorValue: number;
|
|
71
|
+
constructor(data: Uint8Array) {
|
|
72
|
+
// read binary format
|
|
73
|
+
|
|
74
|
+
const view = new DataView(data.buffer);
|
|
75
|
+
this.highStart = view.getUint32(0, true);
|
|
76
|
+
this.errorValue = view.getUint32(4, true);
|
|
77
|
+
let uncompressedLength = view.getUint32(8, true);
|
|
78
|
+
data = data.subarray(12);
|
|
79
|
+
|
|
80
|
+
// double inflate the actual trie data
|
|
81
|
+
data = inflate(data, new Uint8Array(uncompressedLength));
|
|
82
|
+
data = inflate(data, new Uint8Array(uncompressedLength));
|
|
83
|
+
|
|
84
|
+
if (isBigEndian) {
|
|
85
|
+
// swap bytes from little-endian
|
|
86
|
+
const len = data.length;
|
|
87
|
+
for (let i = 0; i < len; i += 4) {
|
|
88
|
+
// Exchange data[i] and data[i + 3]:
|
|
89
|
+
let x = data[i]; data[i] = data[i+3]; data[i+3] = x;
|
|
90
|
+
// Exchange data[i + 1] and data[i + 2]:
|
|
91
|
+
let y = data[i+1]; data[i+1] = data[i+2]; data[i+2] = y;
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
this.data = new Uint32Array(data.buffer);
|
|
96
|
+
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
get(codePoint: number): number {
|
|
100
|
+
let index;
|
|
101
|
+
if ((codePoint < 0) || (codePoint > 0x10ffff)) {
|
|
102
|
+
return this.errorValue;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
if ((codePoint < 0xd800) || ((codePoint > 0xdbff) && (codePoint <= 0xffff))) {
|
|
106
|
+
// Ordinary BMP code point, excluding leading surrogates.
|
|
107
|
+
// BMP uses a single level lookup. BMP index starts at offset 0 in the index.
|
|
108
|
+
// data is stored in the index array itself.
|
|
109
|
+
index = (this.data[codePoint >> SHIFT_2] << INDEX_SHIFT) + (codePoint & DATA_MASK);
|
|
110
|
+
return this.data[index];
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
if (codePoint <= 0xffff) {
|
|
114
|
+
// Lead Surrogate Code Point. A Separate index section is stored for
|
|
115
|
+
// lead surrogate code units and code points.
|
|
116
|
+
// The main index has the code unit data.
|
|
117
|
+
// For this function, we need the code point data.
|
|
118
|
+
index = (this.data[LSCP_INDEX_2_OFFSET + ((codePoint - 0xd800) >> SHIFT_2)] << INDEX_SHIFT) + (codePoint & DATA_MASK);
|
|
119
|
+
return this.data[index];
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
if (codePoint < this.highStart) {
|
|
123
|
+
// Supplemental code point, use two-level lookup.
|
|
124
|
+
index = this.data[(INDEX_1_OFFSET - OMITTED_BMP_INDEX_1_LENGTH) + (codePoint >> SHIFT_1)];
|
|
125
|
+
index = this.data[index + ((codePoint >> SHIFT_2) & INDEX_2_MASK)];
|
|
126
|
+
index = (index << INDEX_SHIFT) + (codePoint & DATA_MASK);
|
|
127
|
+
return this.data[index];
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
return this.data[this.data.length - DATA_GRANULARITY];
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
export default UnicodeTrie
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) 2023 The xterm.js authors. All rights reserved.
|
|
3
|
+
* @license MIT
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { Terminal, ITerminalAddon } from '@xterm/xterm';
|
|
7
|
+
|
|
8
|
+
declare module '@xterm/addon-unicode-graphemes' {
|
|
9
|
+
export class UnicodeGraphemesAddon implements ITerminalAddon {
|
|
10
|
+
constructor();
|
|
11
|
+
public activate(terminal: Terminal): void;
|
|
12
|
+
public dispose(): void;
|
|
13
|
+
}
|
|
14
|
+
}
|