@chr33s/pdf-unicode-trie 5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE.md +8 -0
- package/README.md +80 -0
- package/dist/builder.d.ts +12 -0
- package/dist/builder.js +835 -0
- package/dist/builder.js.map +1 -0
- package/dist/index.d.ts +4 -0
- package/dist/index.js +5 -0
- package/dist/index.js.map +1 -0
- package/dist/swap.d.ts +1 -0
- package/dist/swap.js +19 -0
- package/dist/swap.js.map +1 -0
- package/dist/unicode-trie.d.ts +26 -0
- package/dist/unicode-trie.js +110 -0
- package/dist/unicode-trie.js.map +1 -0
- package/package.json +37 -0
- package/src/builder.ts +1012 -0
- package/src/index.ts +4 -0
- package/src/swap.ts +21 -0
- package/src/unicode-trie.ts +148 -0
- package/test/builder.test.ts +151 -0
- package/test/swap.test.ts +36 -0
- package/test/trie.test.ts +125 -0
- package/test/unicode-trie.test.ts +258 -0
- package/tsconfig.json +9 -0
- package/tsconfig.typecheck.json +14 -0
- package/vitest.config.ts +8 -0
package/src/index.ts
ADDED
package/src/swap.ts
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
const isBigEndian = new Uint8Array(new Uint32Array([0x12345678]).buffer)[0] === 0x12;
|
|
2
|
+
|
|
3
|
+
const swap = (array: Uint8Array, left: number, right: number): void => {
|
|
4
|
+
const temp = array[left];
|
|
5
|
+
array[left] = array[right];
|
|
6
|
+
array[right] = temp;
|
|
7
|
+
};
|
|
8
|
+
|
|
9
|
+
const swap32 = (array: Uint8Array): void => {
|
|
10
|
+
const { length } = array;
|
|
11
|
+
for (let index = 0; index < length; index += 4) {
|
|
12
|
+
swap(array, index, index + 3);
|
|
13
|
+
swap(array, index + 1, index + 2);
|
|
14
|
+
}
|
|
15
|
+
};
|
|
16
|
+
|
|
17
|
+
export const swap32LE = (array: Uint8Array): void => {
|
|
18
|
+
if (isBigEndian) {
|
|
19
|
+
swap32(array);
|
|
20
|
+
}
|
|
21
|
+
};
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
import { inflate } from "@chr33s/pdf-common";
|
|
2
|
+
import { swap32LE } from "./swap.js";
|
|
3
|
+
|
|
4
|
+
export type UnicodeTrieJSON = {
|
|
5
|
+
data: Uint32Array | Int32Array;
|
|
6
|
+
highStart: number;
|
|
7
|
+
errorValue: number;
|
|
8
|
+
};
|
|
9
|
+
|
|
10
|
+
export type UnicodeTrieInit = ArrayBufferLike | ArrayBufferView | UnicodeTrieJSON;
|
|
11
|
+
|
|
12
|
+
const SHIFT_1 = 6 + 5;
|
|
13
|
+
const SHIFT_2 = 5;
|
|
14
|
+
const SHIFT_1_2 = SHIFT_1 - SHIFT_2;
|
|
15
|
+
const OMITTED_BMP_INDEX_1_LENGTH = 0x10000 >> SHIFT_1;
|
|
16
|
+
const INDEX_2_BLOCK_LENGTH = 1 << SHIFT_1_2;
|
|
17
|
+
const INDEX_2_MASK = INDEX_2_BLOCK_LENGTH - 1;
|
|
18
|
+
const INDEX_SHIFT = 2;
|
|
19
|
+
const DATA_BLOCK_LENGTH = 1 << SHIFT_2;
|
|
20
|
+
const DATA_MASK = DATA_BLOCK_LENGTH - 1;
|
|
21
|
+
const LSCP_INDEX_2_OFFSET = 0x10000 >> SHIFT_2;
|
|
22
|
+
const LSCP_INDEX_2_LENGTH = 0x400 >> SHIFT_2;
|
|
23
|
+
const INDEX_2_BMP_LENGTH = LSCP_INDEX_2_OFFSET + LSCP_INDEX_2_LENGTH;
|
|
24
|
+
const UTF8_2B_INDEX_2_OFFSET = INDEX_2_BMP_LENGTH;
|
|
25
|
+
const UTF8_2B_INDEX_2_LENGTH = 0x800 >> 6;
|
|
26
|
+
const INDEX_1_OFFSET = UTF8_2B_INDEX_2_OFFSET + UTF8_2B_INDEX_2_LENGTH;
|
|
27
|
+
const DATA_GRANULARITY = 1 << INDEX_SHIFT;
|
|
28
|
+
|
|
29
|
+
const isUnicodeTrieJSON = (value: UnicodeTrieInit): value is UnicodeTrieJSON => {
|
|
30
|
+
if (typeof value !== "object" || value === null) {
|
|
31
|
+
return false;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
if (!("data" in value) || !("highStart" in value) || !("errorValue" in value)) {
|
|
35
|
+
return false;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
const candidate = value as Partial<UnicodeTrieJSON>;
|
|
39
|
+
return (
|
|
40
|
+
(candidate.data instanceof Uint32Array || candidate.data instanceof Int32Array) &&
|
|
41
|
+
typeof candidate.highStart === "number" &&
|
|
42
|
+
typeof candidate.errorValue === "number"
|
|
43
|
+
);
|
|
44
|
+
};
|
|
45
|
+
|
|
46
|
+
const toUint8Array = (value: ArrayBufferLike | ArrayBufferView): Uint8Array => {
|
|
47
|
+
if (value instanceof Uint8Array) {
|
|
48
|
+
return value;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
if (ArrayBuffer.isView(value)) {
|
|
52
|
+
return new Uint8Array(value.buffer, value.byteOffset, value.byteLength);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
if (value instanceof ArrayBuffer) {
|
|
56
|
+
return new Uint8Array(value);
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
if (typeof SharedArrayBuffer !== "undefined" && value instanceof SharedArrayBuffer) {
|
|
60
|
+
return new Uint8Array(value);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
return new Uint8Array(value);
|
|
64
|
+
};
|
|
65
|
+
|
|
66
|
+
class UnicodeTrie {
|
|
67
|
+
readonly data: Uint32Array;
|
|
68
|
+
readonly highStart: number;
|
|
69
|
+
readonly errorValue: number;
|
|
70
|
+
|
|
71
|
+
private constructor(data: Uint32Array, highStart: number, errorValue: number) {
|
|
72
|
+
this.data = data;
|
|
73
|
+
this.highStart = highStart;
|
|
74
|
+
this.errorValue = errorValue;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
/**
|
|
78
|
+
* Create a UnicodeTrie from a JSON object (synchronous)
|
|
79
|
+
*/
|
|
80
|
+
static fromJSON(source: UnicodeTrieJSON): UnicodeTrie {
|
|
81
|
+
const typedData =
|
|
82
|
+
source.data instanceof Uint32Array ? source.data : new Uint32Array(source.data);
|
|
83
|
+
return new UnicodeTrie(typedData, source.highStart, source.errorValue);
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/**
|
|
87
|
+
* Create a UnicodeTrie from compressed binary data (async)
|
|
88
|
+
*/
|
|
89
|
+
static async fromBuffer(source: ArrayBufferLike | ArrayBufferView): Promise<UnicodeTrie> {
|
|
90
|
+
const bytes = toUint8Array(source);
|
|
91
|
+
const view = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength);
|
|
92
|
+
const highStart = view.getUint32(0, true);
|
|
93
|
+
const errorValue = view.getUint32(4, true);
|
|
94
|
+
// uncompressedLength at offset 8 is not needed for async inflate
|
|
95
|
+
|
|
96
|
+
let payload = bytes.subarray(12);
|
|
97
|
+
payload = await inflate(payload, "deflate-raw");
|
|
98
|
+
payload = await inflate(payload, "deflate-raw");
|
|
99
|
+
swap32LE(payload);
|
|
100
|
+
|
|
101
|
+
const data = new Uint32Array(
|
|
102
|
+
payload.buffer,
|
|
103
|
+
payload.byteOffset,
|
|
104
|
+
payload.byteLength / Uint32Array.BYTES_PER_ELEMENT,
|
|
105
|
+
);
|
|
106
|
+
|
|
107
|
+
return new UnicodeTrie(data, highStart, errorValue);
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
/**
|
|
111
|
+
* Create a UnicodeTrie from any supported source (async)
|
|
112
|
+
*/
|
|
113
|
+
static async create(source: UnicodeTrieInit): Promise<UnicodeTrie> {
|
|
114
|
+
if (isUnicodeTrieJSON(source)) {
|
|
115
|
+
return UnicodeTrie.fromJSON(source);
|
|
116
|
+
}
|
|
117
|
+
return UnicodeTrie.fromBuffer(source);
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
get(codePoint: number): number {
|
|
121
|
+
if (codePoint < 0 || codePoint > 0x10ffff) {
|
|
122
|
+
return this.errorValue;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
if (codePoint < 0xd800 || (codePoint > 0xdbff && codePoint <= 0xffff)) {
|
|
126
|
+
const index = (this.data[codePoint >> SHIFT_2] << INDEX_SHIFT) + (codePoint & DATA_MASK);
|
|
127
|
+
return this.data[index];
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
if (codePoint <= 0xffff) {
|
|
131
|
+
const index =
|
|
132
|
+
(this.data[LSCP_INDEX_2_OFFSET + ((codePoint - 0xd800) >> SHIFT_2)] << INDEX_SHIFT) +
|
|
133
|
+
(codePoint & DATA_MASK);
|
|
134
|
+
return this.data[index];
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
if (codePoint < this.highStart) {
|
|
138
|
+
let index = this.data[INDEX_1_OFFSET - OMITTED_BMP_INDEX_1_LENGTH + (codePoint >> SHIFT_1)];
|
|
139
|
+
index = this.data[index + ((codePoint >> SHIFT_2) & INDEX_2_MASK)];
|
|
140
|
+
index = (index << INDEX_SHIFT) + (codePoint & DATA_MASK);
|
|
141
|
+
return this.data[index];
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
return this.data[this.data.length - DATA_GRANULARITY];
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
export default UnicodeTrie;
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
import { describe, expect, test } from "vitest";
|
|
2
|
+
import UnicodeTrieBuilder from "../src/builder.js";
|
|
3
|
+
|
|
4
|
+
describe("UnicodeTrieBuilder", () => {
|
|
5
|
+
describe("constructor", () => {
|
|
6
|
+
test("should accept null initial value", () => {
|
|
7
|
+
const builder = new UnicodeTrieBuilder(null, 0);
|
|
8
|
+
expect(builder.get(0x0100)).toBe(0);
|
|
9
|
+
});
|
|
10
|
+
|
|
11
|
+
test("should accept null error value", () => {
|
|
12
|
+
const builder = new UnicodeTrieBuilder(0, null);
|
|
13
|
+
expect(builder.get(-1)).toBe(0);
|
|
14
|
+
});
|
|
15
|
+
|
|
16
|
+
test("should use default values when not provided", () => {
|
|
17
|
+
const builder = new UnicodeTrieBuilder();
|
|
18
|
+
expect(builder.get(0x0100)).toBe(0);
|
|
19
|
+
expect(builder.get(-1)).toBe(0);
|
|
20
|
+
});
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
describe("set", () => {
|
|
24
|
+
test("should set values for individual code points", () => {
|
|
25
|
+
const builder = new UnicodeTrieBuilder(0, 0);
|
|
26
|
+
builder.set(0x0041, 1);
|
|
27
|
+
builder.set(0x0042, 2);
|
|
28
|
+
builder.set(0x0043, 3);
|
|
29
|
+
|
|
30
|
+
expect(builder.get(0x0041)).toBe(1);
|
|
31
|
+
expect(builder.get(0x0042)).toBe(2);
|
|
32
|
+
expect(builder.get(0x0043)).toBe(3);
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
test("should overwrite previous values", () => {
|
|
36
|
+
const builder = new UnicodeTrieBuilder(0, 0);
|
|
37
|
+
builder.set(0x0041, 100);
|
|
38
|
+
expect(builder.get(0x0041)).toBe(100);
|
|
39
|
+
|
|
40
|
+
builder.set(0x0041, 200);
|
|
41
|
+
expect(builder.get(0x0041)).toBe(200);
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
test("should handle supplementary plane code points", () => {
|
|
45
|
+
const builder = new UnicodeTrieBuilder(0, 0);
|
|
46
|
+
builder.set(0x1f600, 128512); // Emoji code point
|
|
47
|
+
|
|
48
|
+
expect(builder.get(0x1f600)).toBe(128512);
|
|
49
|
+
});
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
describe("setRange", () => {
|
|
53
|
+
test("should set range without overwrite", () => {
|
|
54
|
+
const builder = new UnicodeTrieBuilder(0, 0);
|
|
55
|
+
builder.set(0x50, 99);
|
|
56
|
+
builder.setRange(0x40, 0x60, 50, false);
|
|
57
|
+
|
|
58
|
+
// Existing value should not be overwritten
|
|
59
|
+
expect(builder.get(0x50)).toBe(99);
|
|
60
|
+
// Other values in range should be set
|
|
61
|
+
expect(builder.get(0x40)).toBe(50);
|
|
62
|
+
expect(builder.get(0x60)).toBe(50);
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
test("should set range with overwrite", () => {
|
|
66
|
+
const builder = new UnicodeTrieBuilder(0, 0);
|
|
67
|
+
builder.set(0x50, 99);
|
|
68
|
+
builder.setRange(0x40, 0x60, 50, true);
|
|
69
|
+
|
|
70
|
+
// Existing value should be overwritten
|
|
71
|
+
expect(builder.get(0x50)).toBe(50);
|
|
72
|
+
expect(builder.get(0x40)).toBe(50);
|
|
73
|
+
expect(builder.get(0x60)).toBe(50);
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
test("should handle single code point range", () => {
|
|
77
|
+
const builder = new UnicodeTrieBuilder(0, 0);
|
|
78
|
+
builder.setRange(0x100, 0x100, 42, true);
|
|
79
|
+
|
|
80
|
+
expect(builder.get(0x100)).toBe(42);
|
|
81
|
+
expect(builder.get(0x0ff)).toBe(0);
|
|
82
|
+
expect(builder.get(0x101)).toBe(0);
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
test("should handle large ranges", () => {
|
|
86
|
+
const builder = new UnicodeTrieBuilder(0, 0);
|
|
87
|
+
builder.setRange(0x0, 0xffff, 1, true);
|
|
88
|
+
|
|
89
|
+
expect(builder.get(0x0)).toBe(1);
|
|
90
|
+
expect(builder.get(0x8000)).toBe(1);
|
|
91
|
+
expect(builder.get(0xffff)).toBe(1);
|
|
92
|
+
expect(builder.get(0x10000)).toBe(0); // Beyond range
|
|
93
|
+
});
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
describe("freeze", () => {
|
|
97
|
+
test("should return compacted trie", () => {
|
|
98
|
+
const builder = new UnicodeTrieBuilder(10, 20);
|
|
99
|
+
builder.set(0x1000, 100);
|
|
100
|
+
|
|
101
|
+
const trie = builder.freeze();
|
|
102
|
+
expect(trie.get(0x1000)).toBe(100);
|
|
103
|
+
expect(trie.get(0x1001)).toBe(10);
|
|
104
|
+
expect(trie.get(-1)).toBe(20);
|
|
105
|
+
});
|
|
106
|
+
|
|
107
|
+
test("should return same values as builder", () => {
|
|
108
|
+
const builder = new UnicodeTrieBuilder(5, 999);
|
|
109
|
+
builder.setRange(0x100, 0x1ff, 50, true);
|
|
110
|
+
builder.set(0x150, 75);
|
|
111
|
+
|
|
112
|
+
const trie = builder.freeze();
|
|
113
|
+
|
|
114
|
+
for (let cp = 0x100; cp <= 0x1ff; cp++) {
|
|
115
|
+
expect(trie.get(cp)).toBe(builder.get(cp));
|
|
116
|
+
}
|
|
117
|
+
});
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
describe("toBuffer", () => {
|
|
121
|
+
test("should produce buffer that can be loaded", async () => {
|
|
122
|
+
const builder = new UnicodeTrieBuilder(1, 2);
|
|
123
|
+
builder.set(0x5000, 500);
|
|
124
|
+
|
|
125
|
+
const buffer = await builder.toBuffer();
|
|
126
|
+
expect(buffer).toBeInstanceOf(Buffer);
|
|
127
|
+
expect(buffer.length).toBeGreaterThan(12); // Header is 12 bytes
|
|
128
|
+
|
|
129
|
+
// Verify header values
|
|
130
|
+
const highStart = buffer.readUInt32LE(0);
|
|
131
|
+
const errorValue = buffer.readUInt32LE(4);
|
|
132
|
+
expect(errorValue).toBe(2);
|
|
133
|
+
expect(highStart).toBeGreaterThan(0);
|
|
134
|
+
});
|
|
135
|
+
|
|
136
|
+
test("should produce valid compressed output", async () => {
|
|
137
|
+
const builder = new UnicodeTrieBuilder(0, 0);
|
|
138
|
+
builder.setRange(0x0, 0xff, 1, true);
|
|
139
|
+
|
|
140
|
+
const buffer = await builder.toBuffer();
|
|
141
|
+
|
|
142
|
+
// Import UnicodeTrie to verify
|
|
143
|
+
const { default: UnicodeTrie } = await import("../src/index.js");
|
|
144
|
+
const trie = await UnicodeTrie.create(buffer);
|
|
145
|
+
|
|
146
|
+
expect(trie.get(0x00)).toBe(1);
|
|
147
|
+
expect(trie.get(0xff)).toBe(1);
|
|
148
|
+
expect(trie.get(0x100)).toBe(0);
|
|
149
|
+
});
|
|
150
|
+
});
|
|
151
|
+
});
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import { describe, expect, test } from "vitest";
|
|
2
|
+
import { swap32LE } from "../src/swap.js";
|
|
3
|
+
|
|
4
|
+
describe("swap32LE", () => {
|
|
5
|
+
test("should not modify array on little-endian systems", () => {
|
|
6
|
+
// Most modern systems are little-endian, so this is the common case
|
|
7
|
+
const input = new Uint8Array([0x12, 0x34, 0x56, 0x78]);
|
|
8
|
+
|
|
9
|
+
// On little-endian systems, this should be a no-op
|
|
10
|
+
// On big-endian systems, it will swap bytes
|
|
11
|
+
swap32LE(input);
|
|
12
|
+
|
|
13
|
+
// We check that the function runs without error
|
|
14
|
+
// The actual byte order depends on the system endianness
|
|
15
|
+
expect(input.length).toBe(4);
|
|
16
|
+
});
|
|
17
|
+
|
|
18
|
+
test("should handle empty array", () => {
|
|
19
|
+
const input = new Uint8Array([]);
|
|
20
|
+
expect(() => swap32LE(input)).not.toThrow();
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
test("should handle multiple 32-bit values", () => {
|
|
24
|
+
const input = new Uint8Array([0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08]);
|
|
25
|
+
expect(() => swap32LE(input)).not.toThrow();
|
|
26
|
+
expect(input.length).toBe(8);
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
test("should work with array that is multiple of 4 bytes", () => {
|
|
30
|
+
const input = new Uint8Array(12);
|
|
31
|
+
for (let i = 0; i < 12; i++) {
|
|
32
|
+
input[i] = i;
|
|
33
|
+
}
|
|
34
|
+
expect(() => swap32LE(input)).not.toThrow();
|
|
35
|
+
});
|
|
36
|
+
});
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
import { describe, expect, test } from "vitest";
|
|
2
|
+
import UnicodeTrieBuilder from "../src/builder.js";
|
|
3
|
+
import UnicodeTrie from "../src/unicode-trie.js";
|
|
4
|
+
|
|
5
|
+
describe("UnicodeTrie", () => {
|
|
6
|
+
describe("get", () => {
|
|
7
|
+
test("should return errorValue for negative code points", async () => {
|
|
8
|
+
const builder = new UnicodeTrieBuilder(0, 999);
|
|
9
|
+
const trie = builder.freeze();
|
|
10
|
+
expect(trie.get(-1)).toBe(999);
|
|
11
|
+
expect(trie.get(-100)).toBe(999);
|
|
12
|
+
});
|
|
13
|
+
|
|
14
|
+
test("should return errorValue for code points above 0x10FFFF", async () => {
|
|
15
|
+
const builder = new UnicodeTrieBuilder(0, 888);
|
|
16
|
+
const trie = builder.freeze();
|
|
17
|
+
expect(trie.get(0x110000)).toBe(888);
|
|
18
|
+
expect(trie.get(0x200000)).toBe(888);
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
test("should handle BMP code points (0x0000-0xFFFF)", async () => {
|
|
22
|
+
const builder = new UnicodeTrieBuilder(0, 0);
|
|
23
|
+
builder.set(0x0041, 65); // 'A'
|
|
24
|
+
builder.set(0x0061, 97); // 'a'
|
|
25
|
+
builder.set(0x0000, 1); // NUL
|
|
26
|
+
builder.set(0xffff, 2); // Last BMP
|
|
27
|
+
|
|
28
|
+
const trie = builder.freeze();
|
|
29
|
+
expect(trie.get(0x0041)).toBe(65);
|
|
30
|
+
expect(trie.get(0x0061)).toBe(97);
|
|
31
|
+
expect(trie.get(0x0000)).toBe(1);
|
|
32
|
+
expect(trie.get(0xffff)).toBe(2);
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
test("should handle lead surrogate range (0xD800-0xDBFF)", async () => {
|
|
36
|
+
const builder = new UnicodeTrieBuilder(0, 0);
|
|
37
|
+
builder.set(0xd800, 100);
|
|
38
|
+
builder.set(0xdbff, 101);
|
|
39
|
+
|
|
40
|
+
const trie = builder.freeze();
|
|
41
|
+
expect(trie.get(0xd800)).toBe(100);
|
|
42
|
+
expect(trie.get(0xdbff)).toBe(101);
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
test("should handle supplementary code points (0x10000-0x10FFFF)", async () => {
|
|
46
|
+
const builder = new UnicodeTrieBuilder(0, 0);
|
|
47
|
+
builder.set(0x10000, 200); // First supplementary
|
|
48
|
+
builder.set(0x1f600, 201); // Emoji
|
|
49
|
+
builder.set(0x10ffff, 202); // Last valid
|
|
50
|
+
|
|
51
|
+
const trie = builder.freeze();
|
|
52
|
+
expect(trie.get(0x10000)).toBe(200);
|
|
53
|
+
expect(trie.get(0x1f600)).toBe(201);
|
|
54
|
+
expect(trie.get(0x10ffff)).toBe(202);
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
test("should return initial value for unset code points", async () => {
|
|
58
|
+
const builder = new UnicodeTrieBuilder(42, 0);
|
|
59
|
+
const trie = builder.freeze();
|
|
60
|
+
|
|
61
|
+
expect(trie.get(0x0100)).toBe(42);
|
|
62
|
+
expect(trie.get(0x5000)).toBe(42);
|
|
63
|
+
});
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
describe("fromJSON", () => {
|
|
67
|
+
test("should create trie from JSON object", async () => {
|
|
68
|
+
const builder = new UnicodeTrieBuilder(10, 666);
|
|
69
|
+
builder.set(0x0041, 100);
|
|
70
|
+
const frozen = builder.freeze();
|
|
71
|
+
|
|
72
|
+
const json = {
|
|
73
|
+
data: frozen.data,
|
|
74
|
+
highStart: frozen.highStart,
|
|
75
|
+
errorValue: frozen.errorValue,
|
|
76
|
+
};
|
|
77
|
+
|
|
78
|
+
const trie = UnicodeTrie.fromJSON(json);
|
|
79
|
+
expect(trie.get(0x0041)).toBe(100);
|
|
80
|
+
expect(trie.get(0x0042)).toBe(10);
|
|
81
|
+
expect(trie.get(-1)).toBe(666);
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
test("should accept Int32Array as data", () => {
|
|
85
|
+
const data = new Int32Array([1, 2, 3, 4]);
|
|
86
|
+
const json = {
|
|
87
|
+
data,
|
|
88
|
+
highStart: 0x110000,
|
|
89
|
+
errorValue: 0,
|
|
90
|
+
};
|
|
91
|
+
|
|
92
|
+
const trie = UnicodeTrie.fromJSON(json);
|
|
93
|
+
expect(trie.data).toBeInstanceOf(Uint32Array);
|
|
94
|
+
});
|
|
95
|
+
});
|
|
96
|
+
|
|
97
|
+
describe("create", () => {
|
|
98
|
+
test("should detect JSON format and use fromJSON", async () => {
|
|
99
|
+
const builder = new UnicodeTrieBuilder(5, 500);
|
|
100
|
+
builder.set(0x0100, 99);
|
|
101
|
+
const frozen = builder.freeze();
|
|
102
|
+
|
|
103
|
+
const json = {
|
|
104
|
+
data: frozen.data,
|
|
105
|
+
highStart: frozen.highStart,
|
|
106
|
+
errorValue: frozen.errorValue,
|
|
107
|
+
};
|
|
108
|
+
|
|
109
|
+
const trie = await UnicodeTrie.create(json);
|
|
110
|
+
expect(trie.get(0x0100)).toBe(99);
|
|
111
|
+
expect(trie.get(-1)).toBe(500);
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
test("should detect buffer format and use fromBuffer", async () => {
|
|
115
|
+
const builder = new UnicodeTrieBuilder(7, 700);
|
|
116
|
+
builder.set(0x0200, 77);
|
|
117
|
+
|
|
118
|
+
const buffer = await builder.toBuffer();
|
|
119
|
+
const trie = await UnicodeTrie.create(buffer);
|
|
120
|
+
|
|
121
|
+
expect(trie.get(0x0200)).toBe(77);
|
|
122
|
+
expect(trie.get(-1)).toBe(700);
|
|
123
|
+
});
|
|
124
|
+
});
|
|
125
|
+
});
|