@chr33s/pdf-unicode-properties 5.0.0 → 5.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@chr33s/pdf-unicode-properties",
3
- "version": "5.0.0",
3
+ "version": "5.0.1",
4
4
  "description": "Provides fast access to unicode character properties",
5
5
  "type": "module",
6
6
  "sideEffects": false,
@@ -1,149 +0,0 @@
1
- import codePoints from "@chr33s/pdf-codepoints";
2
- import { base64, deflate } from "@chr33s/pdf-common";
3
- import { builder as UnicodeTrieBuilder } from "@chr33s/pdf-unicode-trie";
4
- import { writeFile } from "node:fs/promises";
5
- import path from "node:path";
6
-
7
- type IndexLookup = Record<string, number>;
8
-
9
- const bits = (value: number): number => (value > 0 ? (Math.log2(value) + 1) | 0 : 0);
10
-
11
- const numericValue = (numeric?: string | null): number => {
12
- if (!numeric) {
13
- return 0;
14
- }
15
-
16
- const fractionMatch = numeric.match(/^(-?\d+)\/(\d+)$/);
17
- if (fractionMatch) {
18
- const numerator = parseInt(fractionMatch[1], 10);
19
- const denominator = parseInt(fractionMatch[2], 10);
20
- return ((numerator + 12) << 4) + (denominator - 1);
21
- }
22
-
23
- if (/^\d0+$/.test(numeric)) {
24
- const mantissa = parseInt(numeric[0]!, 10);
25
- const exponent = numeric.length - 1;
26
- return ((mantissa + 14) << 5) + (exponent - 2);
27
- }
28
-
29
- const value = parseInt(numeric, 10);
30
- if (value <= 50) {
31
- return 1 + value;
32
- }
33
-
34
- let mantissa = value;
35
- let exponent = 0;
36
- while (mantissa % 60 === 0) {
37
- mantissa /= 60;
38
- exponent += 1;
39
- }
40
-
41
- return ((mantissa + 0xbf) << 2) + (exponent - 1);
42
- };
43
-
44
- const addIndex = (
45
- lookup: IndexLookup,
46
- key: string | null | undefined,
47
- currentCount: number,
48
- ): number => {
49
- const normalizedKey = key ?? "";
50
- if (lookup[normalizedKey] == null) {
51
- lookup[normalizedKey] = currentCount;
52
- return currentCount + 1;
53
- }
54
- return currentCount;
55
- };
56
-
57
- const getIndex = (lookup: IndexLookup, key: string | null | undefined): number =>
58
- lookup[key ?? ""] ?? 0;
59
-
60
- const srcDir = path.resolve(process.cwd(), "src");
61
- const trieFilePath = path.join(srcDir, "trie.js");
62
- const dataFilePath = path.join(srcDir, "data.js");
63
-
64
- const categories: IndexLookup = Object.create(null);
65
- const combiningClasses: IndexLookup = Object.create(null);
66
- const scripts: IndexLookup = Object.create(null);
67
- const eaws: IndexLookup = Object.create(null);
68
-
69
- let categoryCount = 0;
70
- let combiningClassCount = 0;
71
- let scriptCount = 0;
72
- let eawCount = 0;
73
-
74
- const entries = Array.from(codePoints);
75
-
76
- for (const entry of entries) {
77
- if (!entry) {
78
- continue;
79
- }
80
-
81
- categoryCount = addIndex(categories, entry.category, categoryCount);
82
- combiningClassCount = addIndex(combiningClasses, entry.combiningClassName, combiningClassCount);
83
- scriptCount = addIndex(scripts, entry.script, scriptCount);
84
- eawCount = addIndex(eaws, entry.eastAsianWidth, eawCount);
85
- }
86
-
87
- const numberBits = 10;
88
- const combiningClassBits = bits(combiningClassCount - 1);
89
- const scriptBits = bits(scriptCount - 1);
90
- const eawBits = bits(eawCount - 1);
91
-
92
- const categoryShift = combiningClassBits + scriptBits + eawBits + numberBits;
93
- const combiningShift = scriptBits + eawBits + numberBits;
94
- const scriptShift = eawBits + numberBits;
95
- const eawShift = numberBits;
96
-
97
- const trie = new UnicodeTrieBuilder();
98
- for (const entry of entries) {
99
- if (!entry) {
100
- continue;
101
- }
102
-
103
- const category = getIndex(categories, entry.category);
104
- const combiningClass = getIndex(combiningClasses, entry.combiningClassName);
105
- const script = getIndex(scripts, entry.script);
106
- const eaw = getIndex(eaws, entry.eastAsianWidth);
107
- const numeric = numericValue(entry.numeric);
108
-
109
- const val =
110
- (category << categoryShift) |
111
- (combiningClass << combiningShift) |
112
- (script << scriptShift) |
113
- (eaw << eawShift) |
114
- numeric;
115
-
116
- trie.set(entry.code, val);
117
- }
118
-
119
- const trieBuffer = await trie.toBuffer();
120
- const compressedTrie = await deflate(trieBuffer);
121
- const triePayload = base64.encode(
122
- compressedTrie.buffer.slice(
123
- compressedTrie.byteOffset,
124
- compressedTrie.byteOffset + compressedTrie.byteLength,
125
- ) as ArrayBuffer,
126
- );
127
-
128
- const emitModule = (value: string): string =>
129
- `const payload = ${JSON.stringify(value)};\nexport default payload;\n`;
130
-
131
- await writeFile(trieFilePath, emitModule(triePayload));
132
-
133
- const encoder = new TextEncoder();
134
- const data = {
135
- categories: Object.keys(categories),
136
- combiningClasses: Object.keys(combiningClasses),
137
- scripts: Object.keys(scripts),
138
- eaw: Object.keys(eaws),
139
- };
140
-
141
- const dataBytes = encoder.encode(JSON.stringify(data));
142
- const compressedData = await deflate(dataBytes);
143
- const dataPayload = base64.encode(
144
- compressedData.buffer.slice(
145
- compressedData.byteOffset,
146
- compressedData.byteOffset + compressedData.byteLength,
147
- ) as ArrayBuffer,
148
- );
149
- await writeFile(dataFilePath, emitModule(dataPayload));
@@ -1,91 +0,0 @@
1
- import { beforeAll, describe, expect, test } from "vitest";
2
- import createUnicodeProperties, { type UnicodePropertiesAPI } from "../src/index.js";
3
-
4
- const code = (char: string) => char.charCodeAt(0);
5
-
6
- describe("unicode-properties", () => {
7
- let unicode: UnicodePropertiesAPI;
8
-
9
- beforeAll(async () => {
10
- unicode = await createUnicodeProperties();
11
- });
12
-
13
- test("getCategory", () => {
14
- expect(unicode.getCategory(code("2"))).toBe("Nd");
15
- expect(unicode.getCategory(code("x"))).toBe("Ll");
16
- });
17
-
18
- test("getCombiningClass", () => {
19
- expect(unicode.getCombiningClass(code("x"))).toBe("Not_Reordered");
20
- expect(unicode.getCombiningClass(code("́"))).toBe("Above");
21
- expect(unicode.getCombiningClass(code("ٕ"))).toBe("Below");
22
- expect(unicode.getCombiningClass(code("ٔ"))).toBe("Above");
23
- });
24
-
25
- test("getScript", () => {
26
- expect(unicode.getScript(code("x"))).toBe("Latin");
27
- expect(unicode.getScript(code("غ"))).toBe("Arabic");
28
- });
29
-
30
- test("getEastAsianWidth", () => {
31
- expect(unicode.getEastAsianWidth(code("x"))).toBe("Na");
32
- expect(unicode.getEastAsianWidth(code("杜"))).toBe("W");
33
- expect(unicode.getEastAsianWidth(code("Æ"))).toBe("A");
34
- });
35
-
36
- test("getNumericValue", () => {
37
- expect(unicode.getNumericValue(code("2"))).toBe(2);
38
- expect(unicode.getNumericValue(code("x"))).toBeNull();
39
- });
40
-
41
- test("isAlphabetic", () => {
42
- expect(unicode.isAlphabetic(code("x"))).toBe(true);
43
- expect(unicode.isAlphabetic(code("2"))).toBe(false);
44
- });
45
-
46
- test("isDigit", () => {
47
- expect(unicode.isDigit(code("x"))).toBe(false);
48
- expect(unicode.isDigit(code("2"))).toBe(true);
49
- });
50
-
51
- test("isPunctuation", () => {
52
- expect(unicode.isPunctuation(code("x"))).toBe(false);
53
- expect(unicode.isPunctuation(code("."))).toBe(true);
54
- });
55
-
56
- test("isLowerCase", () => {
57
- expect(unicode.isLowerCase(code("X"))).toBe(false);
58
- expect(unicode.isLowerCase(code("2"))).toBe(false);
59
- expect(unicode.isLowerCase(code("x"))).toBe(true);
60
- });
61
-
62
- test("isUpperCase", () => {
63
- expect(unicode.isUpperCase(code("X"))).toBe(true);
64
- expect(unicode.isUpperCase(code("2"))).toBe(false);
65
- expect(unicode.isUpperCase(code("x"))).toBe(false);
66
- });
67
-
68
- test("isTitleCase", () => {
69
- expect(unicode.isTitleCase(code("Dz"))).toBe(true);
70
- expect(unicode.isTitleCase(code("2"))).toBe(false);
71
- expect(unicode.isTitleCase(code("x"))).toBe(false);
72
- });
73
-
74
- test("isWhiteSpace", () => {
75
- expect(unicode.isWhiteSpace(code(" "))).toBe(true);
76
- expect(unicode.isWhiteSpace(code("2"))).toBe(false);
77
- expect(unicode.isWhiteSpace(code("x"))).toBe(false);
78
- });
79
-
80
- test("isBaseForm", () => {
81
- expect(unicode.isBaseForm(code("2"))).toBe(true);
82
- expect(unicode.isBaseForm(code("x"))).toBe(true);
83
- expect(unicode.isBaseForm(code("́"))).toBe(false);
84
- });
85
-
86
- test("isMark", () => {
87
- expect(unicode.isMark(code("2"))).toBe(false);
88
- expect(unicode.isMark(code("x"))).toBe(false);
89
- expect(unicode.isMark(code("́"))).toBe(true);
90
- });
91
- });
package/tsconfig.json DELETED
@@ -1,10 +0,0 @@
1
- {
2
- "extends": "../../tsconfig.json",
3
- "compilerOptions": {
4
- "allowJs": true,
5
- "rootDir": "src",
6
- "outDir": "dist"
7
- },
8
- "include": ["src/**/*.ts", "src/**/*.js"],
9
- "exclude": ["dist", "node_modules", "test"]
10
- }
@@ -1,16 +0,0 @@
1
- {
2
- "extends": "./tsconfig.json",
3
- "compilerOptions": {
4
- "noEmit": true,
5
- "rootDir": "."
6
- },
7
- "include": [
8
- "**/*.ts",
9
- ],
10
- "exclude": [
11
- "dist",
12
- "node_modules",
13
- "src/data.js",
14
- "src/trie.js"
15
- ]
16
- }
package/vitest.config.ts DELETED
@@ -1,12 +0,0 @@
1
- import { defineConfig } from "vitest/config";
2
-
3
- export default defineConfig({
4
- test: {
5
- environment: "node",
6
- include: ["test/**/*.test.ts"],
7
- coverage: {
8
- provider: "v8",
9
- reporter: ["text", "lcov"],
10
- },
11
- },
12
- });