@chr33s/pdf-unicode-trie 5.0.0 → 5.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@chr33s/pdf-unicode-trie",
3
- "version": "5.0.0",
3
+ "version": "5.0.1",
4
4
  "description": "Unicode Trie data structure for fast character metadata lookup, ported from ICU",
5
5
  "type": "module",
6
6
  "sideEffects": false,
@@ -1,151 +0,0 @@
1
- import { describe, expect, test } from "vitest";
2
- import UnicodeTrieBuilder from "../src/builder.js";
3
-
4
- describe("UnicodeTrieBuilder", () => {
5
- describe("constructor", () => {
6
- test("should accept null initial value", () => {
7
- const builder = new UnicodeTrieBuilder(null, 0);
8
- expect(builder.get(0x0100)).toBe(0);
9
- });
10
-
11
- test("should accept null error value", () => {
12
- const builder = new UnicodeTrieBuilder(0, null);
13
- expect(builder.get(-1)).toBe(0);
14
- });
15
-
16
- test("should use default values when not provided", () => {
17
- const builder = new UnicodeTrieBuilder();
18
- expect(builder.get(0x0100)).toBe(0);
19
- expect(builder.get(-1)).toBe(0);
20
- });
21
- });
22
-
23
- describe("set", () => {
24
- test("should set values for individual code points", () => {
25
- const builder = new UnicodeTrieBuilder(0, 0);
26
- builder.set(0x0041, 1);
27
- builder.set(0x0042, 2);
28
- builder.set(0x0043, 3);
29
-
30
- expect(builder.get(0x0041)).toBe(1);
31
- expect(builder.get(0x0042)).toBe(2);
32
- expect(builder.get(0x0043)).toBe(3);
33
- });
34
-
35
- test("should overwrite previous values", () => {
36
- const builder = new UnicodeTrieBuilder(0, 0);
37
- builder.set(0x0041, 100);
38
- expect(builder.get(0x0041)).toBe(100);
39
-
40
- builder.set(0x0041, 200);
41
- expect(builder.get(0x0041)).toBe(200);
42
- });
43
-
44
- test("should handle supplementary plane code points", () => {
45
- const builder = new UnicodeTrieBuilder(0, 0);
46
- builder.set(0x1f600, 128512); // Emoji code point
47
-
48
- expect(builder.get(0x1f600)).toBe(128512);
49
- });
50
- });
51
-
52
- describe("setRange", () => {
53
- test("should set range without overwrite", () => {
54
- const builder = new UnicodeTrieBuilder(0, 0);
55
- builder.set(0x50, 99);
56
- builder.setRange(0x40, 0x60, 50, false);
57
-
58
- // Existing value should not be overwritten
59
- expect(builder.get(0x50)).toBe(99);
60
- // Other values in range should be set
61
- expect(builder.get(0x40)).toBe(50);
62
- expect(builder.get(0x60)).toBe(50);
63
- });
64
-
65
- test("should set range with overwrite", () => {
66
- const builder = new UnicodeTrieBuilder(0, 0);
67
- builder.set(0x50, 99);
68
- builder.setRange(0x40, 0x60, 50, true);
69
-
70
- // Existing value should be overwritten
71
- expect(builder.get(0x50)).toBe(50);
72
- expect(builder.get(0x40)).toBe(50);
73
- expect(builder.get(0x60)).toBe(50);
74
- });
75
-
76
- test("should handle single code point range", () => {
77
- const builder = new UnicodeTrieBuilder(0, 0);
78
- builder.setRange(0x100, 0x100, 42, true);
79
-
80
- expect(builder.get(0x100)).toBe(42);
81
- expect(builder.get(0x0ff)).toBe(0);
82
- expect(builder.get(0x101)).toBe(0);
83
- });
84
-
85
- test("should handle large ranges", () => {
86
- const builder = new UnicodeTrieBuilder(0, 0);
87
- builder.setRange(0x0, 0xffff, 1, true);
88
-
89
- expect(builder.get(0x0)).toBe(1);
90
- expect(builder.get(0x8000)).toBe(1);
91
- expect(builder.get(0xffff)).toBe(1);
92
- expect(builder.get(0x10000)).toBe(0); // Beyond range
93
- });
94
- });
95
-
96
- describe("freeze", () => {
97
- test("should return compacted trie", () => {
98
- const builder = new UnicodeTrieBuilder(10, 20);
99
- builder.set(0x1000, 100);
100
-
101
- const trie = builder.freeze();
102
- expect(trie.get(0x1000)).toBe(100);
103
- expect(trie.get(0x1001)).toBe(10);
104
- expect(trie.get(-1)).toBe(20);
105
- });
106
-
107
- test("should return same values as builder", () => {
108
- const builder = new UnicodeTrieBuilder(5, 999);
109
- builder.setRange(0x100, 0x1ff, 50, true);
110
- builder.set(0x150, 75);
111
-
112
- const trie = builder.freeze();
113
-
114
- for (let cp = 0x100; cp <= 0x1ff; cp++) {
115
- expect(trie.get(cp)).toBe(builder.get(cp));
116
- }
117
- });
118
- });
119
-
120
- describe("toBuffer", () => {
121
- test("should produce buffer that can be loaded", async () => {
122
- const builder = new UnicodeTrieBuilder(1, 2);
123
- builder.set(0x5000, 500);
124
-
125
- const buffer = await builder.toBuffer();
126
- expect(buffer).toBeInstanceOf(Buffer);
127
- expect(buffer.length).toBeGreaterThan(12); // Header is 12 bytes
128
-
129
- // Verify header values
130
- const highStart = buffer.readUInt32LE(0);
131
- const errorValue = buffer.readUInt32LE(4);
132
- expect(errorValue).toBe(2);
133
- expect(highStart).toBeGreaterThan(0);
134
- });
135
-
136
- test("should produce valid compressed output", async () => {
137
- const builder = new UnicodeTrieBuilder(0, 0);
138
- builder.setRange(0x0, 0xff, 1, true);
139
-
140
- const buffer = await builder.toBuffer();
141
-
142
- // Import UnicodeTrie to verify
143
- const { default: UnicodeTrie } = await import("../src/index.js");
144
- const trie = await UnicodeTrie.create(buffer);
145
-
146
- expect(trie.get(0x00)).toBe(1);
147
- expect(trie.get(0xff)).toBe(1);
148
- expect(trie.get(0x100)).toBe(0);
149
- });
150
- });
151
- });
package/test/swap.test.ts DELETED
@@ -1,36 +0,0 @@
1
- import { describe, expect, test } from "vitest";
2
- import { swap32LE } from "../src/swap.js";
3
-
4
- describe("swap32LE", () => {
5
- test("should not modify array on little-endian systems", () => {
6
- // Most modern systems are little-endian, so this is the common case
7
- const input = new Uint8Array([0x12, 0x34, 0x56, 0x78]);
8
-
9
- // On little-endian systems, this should be a no-op
10
- // On big-endian systems, it will swap bytes
11
- swap32LE(input);
12
-
13
- // We check that the function runs without error
14
- // The actual byte order depends on the system endianness
15
- expect(input.length).toBe(4);
16
- });
17
-
18
- test("should handle empty array", () => {
19
- const input = new Uint8Array([]);
20
- expect(() => swap32LE(input)).not.toThrow();
21
- });
22
-
23
- test("should handle multiple 32-bit values", () => {
24
- const input = new Uint8Array([0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08]);
25
- expect(() => swap32LE(input)).not.toThrow();
26
- expect(input.length).toBe(8);
27
- });
28
-
29
- test("should work with array that is multiple of 4 bytes", () => {
30
- const input = new Uint8Array(12);
31
- for (let i = 0; i < 12; i++) {
32
- input[i] = i;
33
- }
34
- expect(() => swap32LE(input)).not.toThrow();
35
- });
36
- });
package/test/trie.test.ts DELETED
@@ -1,125 +0,0 @@
1
- import { describe, expect, test } from "vitest";
2
- import UnicodeTrieBuilder from "../src/builder.js";
3
- import UnicodeTrie from "../src/unicode-trie.js";
4
-
5
- describe("UnicodeTrie", () => {
6
- describe("get", () => {
7
- test("should return errorValue for negative code points", async () => {
8
- const builder = new UnicodeTrieBuilder(0, 999);
9
- const trie = builder.freeze();
10
- expect(trie.get(-1)).toBe(999);
11
- expect(trie.get(-100)).toBe(999);
12
- });
13
-
14
- test("should return errorValue for code points above 0x10FFFF", async () => {
15
- const builder = new UnicodeTrieBuilder(0, 888);
16
- const trie = builder.freeze();
17
- expect(trie.get(0x110000)).toBe(888);
18
- expect(trie.get(0x200000)).toBe(888);
19
- });
20
-
21
- test("should handle BMP code points (0x0000-0xFFFF)", async () => {
22
- const builder = new UnicodeTrieBuilder(0, 0);
23
- builder.set(0x0041, 65); // 'A'
24
- builder.set(0x0061, 97); // 'a'
25
- builder.set(0x0000, 1); // NUL
26
- builder.set(0xffff, 2); // Last BMP
27
-
28
- const trie = builder.freeze();
29
- expect(trie.get(0x0041)).toBe(65);
30
- expect(trie.get(0x0061)).toBe(97);
31
- expect(trie.get(0x0000)).toBe(1);
32
- expect(trie.get(0xffff)).toBe(2);
33
- });
34
-
35
- test("should handle lead surrogate range (0xD800-0xDBFF)", async () => {
36
- const builder = new UnicodeTrieBuilder(0, 0);
37
- builder.set(0xd800, 100);
38
- builder.set(0xdbff, 101);
39
-
40
- const trie = builder.freeze();
41
- expect(trie.get(0xd800)).toBe(100);
42
- expect(trie.get(0xdbff)).toBe(101);
43
- });
44
-
45
- test("should handle supplementary code points (0x10000-0x10FFFF)", async () => {
46
- const builder = new UnicodeTrieBuilder(0, 0);
47
- builder.set(0x10000, 200); // First supplementary
48
- builder.set(0x1f600, 201); // Emoji
49
- builder.set(0x10ffff, 202); // Last valid
50
-
51
- const trie = builder.freeze();
52
- expect(trie.get(0x10000)).toBe(200);
53
- expect(trie.get(0x1f600)).toBe(201);
54
- expect(trie.get(0x10ffff)).toBe(202);
55
- });
56
-
57
- test("should return initial value for unset code points", async () => {
58
- const builder = new UnicodeTrieBuilder(42, 0);
59
- const trie = builder.freeze();
60
-
61
- expect(trie.get(0x0100)).toBe(42);
62
- expect(trie.get(0x5000)).toBe(42);
63
- });
64
- });
65
-
66
- describe("fromJSON", () => {
67
- test("should create trie from JSON object", async () => {
68
- const builder = new UnicodeTrieBuilder(10, 666);
69
- builder.set(0x0041, 100);
70
- const frozen = builder.freeze();
71
-
72
- const json = {
73
- data: frozen.data,
74
- highStart: frozen.highStart,
75
- errorValue: frozen.errorValue,
76
- };
77
-
78
- const trie = UnicodeTrie.fromJSON(json);
79
- expect(trie.get(0x0041)).toBe(100);
80
- expect(trie.get(0x0042)).toBe(10);
81
- expect(trie.get(-1)).toBe(666);
82
- });
83
-
84
- test("should accept Int32Array as data", () => {
85
- const data = new Int32Array([1, 2, 3, 4]);
86
- const json = {
87
- data,
88
- highStart: 0x110000,
89
- errorValue: 0,
90
- };
91
-
92
- const trie = UnicodeTrie.fromJSON(json);
93
- expect(trie.data).toBeInstanceOf(Uint32Array);
94
- });
95
- });
96
-
97
- describe("create", () => {
98
- test("should detect JSON format and use fromJSON", async () => {
99
- const builder = new UnicodeTrieBuilder(5, 500);
100
- builder.set(0x0100, 99);
101
- const frozen = builder.freeze();
102
-
103
- const json = {
104
- data: frozen.data,
105
- highStart: frozen.highStart,
106
- errorValue: frozen.errorValue,
107
- };
108
-
109
- const trie = await UnicodeTrie.create(json);
110
- expect(trie.get(0x0100)).toBe(99);
111
- expect(trie.get(-1)).toBe(500);
112
- });
113
-
114
- test("should detect buffer format and use fromBuffer", async () => {
115
- const builder = new UnicodeTrieBuilder(7, 700);
116
- builder.set(0x0200, 77);
117
-
118
- const buffer = await builder.toBuffer();
119
- const trie = await UnicodeTrie.create(buffer);
120
-
121
- expect(trie.get(0x0200)).toBe(77);
122
- expect(trie.get(-1)).toBe(700);
123
- });
124
- });
125
- });
@@ -1,258 +0,0 @@
1
- import { describe, expect, test } from "vitest";
2
-
3
- import UnicodeTrieBuilder from "../src/builder.js";
4
- import UnicodeTrie from "../src/index.js";
5
-
6
- describe("unicode trie", () => {
7
- test("set", () => {
8
- const trie = new UnicodeTrieBuilder(10, 666);
9
- trie.set(0x4567, 99);
10
- expect(trie.get(0x4566)).toBe(10);
11
- expect(trie.get(0x4567)).toBe(99);
12
- expect(trie.get(-1)).toBe(666);
13
- expect(trie.get(0x110000)).toBe(666);
14
- });
15
-
16
- test("set -> compacted trie", () => {
17
- const builder = new UnicodeTrieBuilder(10, 666);
18
- builder.set(0x4567, 99);
19
-
20
- const trie = builder.freeze();
21
- expect(trie.get(0x4566)).toBe(10);
22
- expect(trie.get(0x4567)).toBe(99);
23
- expect(trie.get(-1)).toBe(666);
24
- expect(trie.get(0x110000)).toBe(666);
25
- });
26
-
27
- test("setRange", () => {
28
- const trie = new UnicodeTrieBuilder(10, 666);
29
- trie.setRange(13, 6666, 7788, false);
30
- trie.setRange(6000, 7000, 9900, true);
31
-
32
- expect(trie.get(12)).toBe(10);
33
- expect(trie.get(13)).toBe(7788);
34
- expect(trie.get(5999)).toBe(7788);
35
- expect(trie.get(6000)).toBe(9900);
36
- expect(trie.get(7000)).toBe(9900);
37
- expect(trie.get(7001)).toBe(10);
38
- expect(trie.get(0x110000)).toBe(666);
39
- });
40
-
41
- test("setRange -> compacted trie", () => {
42
- const builder = new UnicodeTrieBuilder(10, 666);
43
- builder.setRange(13, 6666, 7788, false);
44
- builder.setRange(6000, 7000, 9900, true);
45
-
46
- const trie = builder.freeze();
47
- expect(trie.get(12)).toBe(10);
48
- expect(trie.get(13)).toBe(7788);
49
- expect(trie.get(5999)).toBe(7788);
50
- expect(trie.get(6000)).toBe(9900);
51
- expect(trie.get(7000)).toBe(9900);
52
- expect(trie.get(7001)).toBe(10);
53
- expect(trie.get(0x110000)).toBe(666);
54
- });
55
-
56
- test("toBuffer produces valid compressed output", async () => {
57
- const builder = new UnicodeTrieBuilder();
58
- builder.set(0x4567, 99);
59
-
60
- const buf = await builder.toBuffer();
61
- // Verify buffer has correct header format
62
- expect(buf.length).toBeGreaterThan(12);
63
- // Verify high start and error values in header
64
- expect(buf.readUInt32LE(0)).toBe(builder.freeze().highStart);
65
- expect(buf.readUInt32LE(4)).toBe(builder.freeze().errorValue);
66
- // Verify that the output can be loaded back as a valid trie
67
- const trie = await UnicodeTrie.create(buf);
68
- expect(trie.get(0x4567)).toBe(99);
69
- expect(trie.get(0x4566)).toBe(0);
70
- });
71
-
72
- test("should work with compressed serialization format", async () => {
73
- const builder = new UnicodeTrieBuilder(10, 666);
74
- builder.setRange(13, 6666, 7788, false);
75
- builder.setRange(6000, 7000, 9900, true);
76
-
77
- const buf = await builder.toBuffer();
78
- const trie = await UnicodeTrie.create(buf);
79
- expect(trie.get(12)).toBe(10);
80
- expect(trie.get(13)).toBe(7788);
81
- expect(trie.get(5999)).toBe(7788);
82
- expect(trie.get(6000)).toBe(9900);
83
- expect(trie.get(7000)).toBe(9900);
84
- expect(trie.get(7001)).toBe(10);
85
- expect(trie.get(0x110000)).toBe(666);
86
- });
87
-
88
- const rangeTests = [
89
- {
90
- ranges: [
91
- [0, 0, 0, 0],
92
- [0, 0x40, 0, 0],
93
- [0x40, 0xe7, 0x1234, 0],
94
- [0xe7, 0x3400, 0, 0],
95
- [0x3400, 0x9fa6, 0x6162, 0],
96
- [0x9fa6, 0xda9e, 0x3132, 0],
97
- [0xdada, 0xeeee, 0x87ff, 0],
98
- [0xeeee, 0x11111, 1, 0],
99
- [0x11111, 0x44444, 0x6162, 0],
100
- [0x44444, 0x60003, 0, 0],
101
- [0xf0003, 0xf0004, 0xf, 0],
102
- [0xf0004, 0xf0006, 0x10, 0],
103
- [0xf0006, 0xf0007, 0x11, 0],
104
- [0xf0007, 0xf0040, 0x12, 0],
105
- [0xf0040, 0x110000, 0, 0],
106
- ],
107
- check: [
108
- [0, 0],
109
- [0x40, 0],
110
- [0xe7, 0x1234],
111
- [0x3400, 0],
112
- [0x9fa6, 0x6162],
113
- [0xda9e, 0x3132],
114
- [0xdada, 0],
115
- [0xeeee, 0x87ff],
116
- [0x11111, 1],
117
- [0x44444, 0x6162],
118
- [0xf0003, 0],
119
- [0xf0004, 0xf],
120
- [0xf0006, 0x10],
121
- [0xf0007, 0x11],
122
- [0xf0040, 0x12],
123
- [0x110000, 0],
124
- ],
125
- },
126
- {
127
- ranges: [
128
- [0, 0, 0, 0],
129
- [0x21, 0x7f, 0x5555, 1],
130
- [0x2f800, 0x2fedc, 0x7a, 1],
131
- [0x72, 0xdd, 3, 1],
132
- [0xdd, 0xde, 4, 0],
133
- [0x201, 0x240, 6, 1],
134
- [0x241, 0x280, 6, 1],
135
- [0x281, 0x2c0, 6, 1],
136
- [0x2f987, 0x2fa98, 5, 1],
137
- [0x2f777, 0x2f883, 0, 1],
138
- [0x2f900, 0x2ffaa, 1, 0],
139
- [0x2ffaa, 0x2ffab, 2, 1],
140
- [0x2ffbb, 0x2ffc0, 7, 1],
141
- ],
142
- check: [
143
- [0, 0],
144
- [0x21, 0],
145
- [0x72, 0x5555],
146
- [0xdd, 3],
147
- [0xde, 4],
148
- [0x201, 0],
149
- [0x240, 6],
150
- [0x241, 0],
151
- [0x280, 6],
152
- [0x281, 0],
153
- [0x2c0, 6],
154
- [0x2f883, 0],
155
- [0x2f987, 0x7a],
156
- [0x2fa98, 5],
157
- [0x2fedc, 0x7a],
158
- [0x2ffaa, 1],
159
- [0x2ffab, 2],
160
- [0x2ffbb, 0],
161
- [0x2ffc0, 7],
162
- [0x110000, 0],
163
- ],
164
- },
165
- {
166
- ranges: [
167
- [0, 0, 9, 0],
168
- [0x31, 0xa4, 1, 0],
169
- [0x3400, 0x6789, 2, 0],
170
- [0x8000, 0x89ab, 9, 1],
171
- [0x9000, 0xa000, 4, 1],
172
- [0xabcd, 0xbcde, 3, 1],
173
- [0x55555, 0x110000, 6, 1],
174
- [0xcccc, 0x55555, 6, 1],
175
- ],
176
- check: [
177
- [0, 9],
178
- [0x31, 9],
179
- [0xa4, 1],
180
- [0x3400, 9],
181
- [0x6789, 2],
182
- [0x9000, 9],
183
- [0xa000, 4],
184
- [0xabcd, 9],
185
- [0xbcde, 3],
186
- [0xcccc, 9],
187
- [0x110000, 6],
188
- ],
189
- },
190
- {
191
- ranges: [[0, 0, 3, 0]],
192
- check: [
193
- [0, 3],
194
- [0x110000, 3],
195
- ],
196
- },
197
- {
198
- ranges: [
199
- [0, 0, 3, 0],
200
- [0, 0x110000, 5, 1],
201
- ],
202
- check: [
203
- [0, 3],
204
- [0x110000, 5],
205
- ],
206
- },
207
- ] as const;
208
-
209
- type TrieLike = { get: (codePoint: number) => number };
210
-
211
- const findMismatch = (trie: TrieLike, checks: readonly (readonly [number, number])[]) => {
212
- let start = 0;
213
- for (const [end, expected] of checks) {
214
- for (let codePoint = start; codePoint < end; codePoint++) {
215
- const actual = trie.get(codePoint);
216
- if (actual !== expected) {
217
- return { codePoint, expected, actual } as const;
218
- }
219
- }
220
- start = end;
221
- }
222
- return null;
223
- };
224
-
225
- test("should pass range tests", () => {
226
- for (const rangeTest of rangeTests) {
227
- let initialValue = 0;
228
- let errorValue = 0x0bad;
229
- let index = 0;
230
-
231
- if (rangeTest.ranges[index][1] < 0) {
232
- errorValue = rangeTest.ranges[index][2];
233
- index++;
234
- }
235
-
236
- initialValue = rangeTest.ranges[index++][2];
237
- const builder = new UnicodeTrieBuilder(initialValue, errorValue);
238
-
239
- for (const range of rangeTest.ranges.slice(index)) {
240
- builder.setRange(range[0], range[1] - 1, range[2], range[3] !== 0);
241
- }
242
-
243
- const frozen = builder.freeze();
244
-
245
- const builderMismatch = findMismatch(builder, rangeTest.check);
246
- const builderMessage = builderMismatch
247
- ? `builder mismatch at U+${builderMismatch.codePoint.toString(16)}: expected ${builderMismatch.expected}, got ${builderMismatch.actual}`
248
- : undefined;
249
- expect(builderMismatch, builderMessage).toBeNull();
250
-
251
- const frozenMismatch = findMismatch(frozen, rangeTest.check);
252
- const frozenMessage = frozenMismatch
253
- ? `frozen mismatch at U+${frozenMismatch.codePoint.toString(16)}: expected ${frozenMismatch.expected}, got ${frozenMismatch.actual}`
254
- : undefined;
255
- expect(frozenMismatch, frozenMessage).toBeNull();
256
- }
257
- });
258
- });
package/tsconfig.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "extends": "../../tsconfig.json",
3
- "compilerOptions": {
4
- "rootDir": "src",
5
- "outDir": "dist"
6
- },
7
- "include": ["src/**/*.ts"],
8
- "exclude": ["dist", "test", "scripts", "node_modules"]
9
- }
@@ -1,14 +0,0 @@
1
- {
2
- "extends": "./tsconfig.json",
3
- "compilerOptions": {
4
- "noEmit": true,
5
- "rootDir": "."
6
- },
7
- "include": [
8
- "**/*.ts",
9
- ],
10
- "exclude": [
11
- "dist",
12
- "node_modules"
13
- ]
14
- }
package/vitest.config.ts DELETED
@@ -1,8 +0,0 @@
1
- import { defineConfig } from "vitest/config";
2
-
3
- export default defineConfig({
4
- test: {
5
- environment: "node",
6
- include: ["test/**/*.test.ts"],
7
- },
8
- });