lindera-wasm-web 2.3.4 → 3.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +25 -136
- package/lindera_wasm.d.ts +109 -71
- package/lindera_wasm.js +189 -74
- package/lindera_wasm_bg.wasm +0 -0
- package/opfs.d.ts +84 -0
- package/opfs.js +358 -0
- package/package.json +16 -4
package/opfs.js
ADDED
|
@@ -0,0 +1,358 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OPFS (Origin Private File System) helper utilities for Lindera WASM.
|
|
3
|
+
*
|
|
4
|
+
* Provides functions to download, store, load, and manage Lindera dictionaries
|
|
5
|
+
* using the browser's Origin Private File System for persistent caching.
|
|
6
|
+
*
|
|
7
|
+
* @module opfs
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
/** Dictionary file names that make up a built Lindera dictionary. */
|
|
11
|
+
const DICTIONARY_FILES = [
|
|
12
|
+
"metadata.json",
|
|
13
|
+
"dict.da",
|
|
14
|
+
"dict.vals",
|
|
15
|
+
"dict.wordsidx",
|
|
16
|
+
"dict.words",
|
|
17
|
+
"matrix.mtx",
|
|
18
|
+
"char_def.bin",
|
|
19
|
+
"unk.bin",
|
|
20
|
+
];
|
|
21
|
+
|
|
22
|
+
/** Base directory path within OPFS for storing dictionaries. */
|
|
23
|
+
const OPFS_BASE_PATH = ["lindera", "dictionaries"];
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Gets or creates a nested directory handle within OPFS.
|
|
27
|
+
*
|
|
28
|
+
* @param {string[]} pathSegments - Array of directory names forming the path.
|
|
29
|
+
* @returns {Promise<FileSystemDirectoryHandle>} The directory handle.
|
|
30
|
+
*/
|
|
31
|
+
async function getDirectoryHandle(pathSegments) {
|
|
32
|
+
let dir = await navigator.storage.getDirectory();
|
|
33
|
+
for (const segment of pathSegments) {
|
|
34
|
+
dir = await dir.getDirectoryHandle(segment, { create: true });
|
|
35
|
+
}
|
|
36
|
+
return dir;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
/**
|
|
40
|
+
* Gets the OPFS directory handle for a specific dictionary.
|
|
41
|
+
*
|
|
42
|
+
* @param {string} name - The dictionary name (e.g., "ipadic").
|
|
43
|
+
* @returns {Promise<FileSystemDirectoryHandle>} The dictionary directory handle.
|
|
44
|
+
*/
|
|
45
|
+
async function getDictionaryDir(name) {
|
|
46
|
+
return getDirectoryHandle([...OPFS_BASE_PATH, name]);
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* Extracts entries from a zip archive using DecompressionStream.
|
|
51
|
+
*
|
|
52
|
+
* This implementation parses the zip central directory and decompresses
|
|
53
|
+
* entries using the Web Streams API (DecompressionStream), avoiding
|
|
54
|
+
* external library dependencies.
|
|
55
|
+
*
|
|
56
|
+
* @param {ArrayBuffer} zipBuffer - The zip file contents.
|
|
57
|
+
* @returns {Promise<Map<string, Uint8Array>>} Map of filename to file contents.
|
|
58
|
+
*/
|
|
59
|
+
async function extractZip(zipBuffer) {
|
|
60
|
+
const view = new DataView(zipBuffer);
|
|
61
|
+
const bytes = new Uint8Array(zipBuffer);
|
|
62
|
+
const entries = new Map();
|
|
63
|
+
|
|
64
|
+
// Find End of Central Directory record (search from end)
|
|
65
|
+
let eocdOffset = -1;
|
|
66
|
+
for (let i = bytes.length - 22; i >= 0; i--) {
|
|
67
|
+
if (view.getUint32(i, true) === 0x06054b50) {
|
|
68
|
+
eocdOffset = i;
|
|
69
|
+
break;
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
if (eocdOffset === -1) {
|
|
73
|
+
throw new Error("Invalid zip file: End of Central Directory not found");
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
const cdOffset = view.getUint32(eocdOffset + 16, true);
|
|
77
|
+
const cdEntries = view.getUint16(eocdOffset + 10, true);
|
|
78
|
+
|
|
79
|
+
// Parse Central Directory entries
|
|
80
|
+
let offset = cdOffset;
|
|
81
|
+
for (let i = 0; i < cdEntries; i++) {
|
|
82
|
+
if (view.getUint32(offset, true) !== 0x02014b50) {
|
|
83
|
+
throw new Error(
|
|
84
|
+
"Invalid zip file: bad Central Directory entry signature",
|
|
85
|
+
);
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
const compressionMethod = view.getUint16(offset + 10, true);
|
|
89
|
+
const compressedSize = view.getUint32(offset + 20, true);
|
|
90
|
+
const uncompressedSize = view.getUint32(offset + 24, true);
|
|
91
|
+
const fileNameLength = view.getUint16(offset + 28, true);
|
|
92
|
+
const extraFieldLength = view.getUint16(offset + 30, true);
|
|
93
|
+
const commentLength = view.getUint16(offset + 32, true);
|
|
94
|
+
const localHeaderOffset = view.getUint32(offset + 42, true);
|
|
95
|
+
|
|
96
|
+
const fileName = new TextDecoder().decode(
|
|
97
|
+
bytes.subarray(offset + 46, offset + 46 + fileNameLength),
|
|
98
|
+
);
|
|
99
|
+
|
|
100
|
+
// Skip directories
|
|
101
|
+
if (!fileName.endsWith("/")) {
|
|
102
|
+
// Read from local file header to get actual data offset
|
|
103
|
+
const localFileNameLength = view.getUint16(localHeaderOffset + 26, true);
|
|
104
|
+
const localExtraLength = view.getUint16(localHeaderOffset + 28, true);
|
|
105
|
+
const dataOffset =
|
|
106
|
+
localHeaderOffset + 30 + localFileNameLength + localExtraLength;
|
|
107
|
+
|
|
108
|
+
const compressedData = bytes.subarray(
|
|
109
|
+
dataOffset,
|
|
110
|
+
dataOffset + compressedSize,
|
|
111
|
+
);
|
|
112
|
+
|
|
113
|
+
let fileData;
|
|
114
|
+
if (compressionMethod === 0) {
|
|
115
|
+
// Stored (no compression)
|
|
116
|
+
fileData = compressedData;
|
|
117
|
+
} else if (compressionMethod === 8) {
|
|
118
|
+
// Deflate - use DecompressionStream
|
|
119
|
+
const ds = new DecompressionStream("deflate-raw");
|
|
120
|
+
const writer = ds.writable.getWriter();
|
|
121
|
+
const reader = ds.readable.getReader();
|
|
122
|
+
|
|
123
|
+
// Write compressed data and close
|
|
124
|
+
writer.write(compressedData).then(() => writer.close());
|
|
125
|
+
|
|
126
|
+
// Read all decompressed chunks
|
|
127
|
+
const chunks = [];
|
|
128
|
+
let totalLength = 0;
|
|
129
|
+
while (true) {
|
|
130
|
+
const { done, value } = await reader.read();
|
|
131
|
+
if (done) break;
|
|
132
|
+
chunks.push(value);
|
|
133
|
+
totalLength += value.length;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// Concatenate chunks
|
|
137
|
+
fileData = new Uint8Array(totalLength);
|
|
138
|
+
let pos = 0;
|
|
139
|
+
for (const chunk of chunks) {
|
|
140
|
+
fileData.set(chunk, pos);
|
|
141
|
+
pos += chunk.length;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// Verify decompressed size
|
|
145
|
+
if (fileData.length !== uncompressedSize) {
|
|
146
|
+
throw new Error(
|
|
147
|
+
`Size mismatch for ${fileName}: expected ${uncompressedSize}, got ${fileData.length}`,
|
|
148
|
+
);
|
|
149
|
+
}
|
|
150
|
+
} else {
|
|
151
|
+
throw new Error(
|
|
152
|
+
`Unsupported compression method ${compressionMethod} for ${fileName}`,
|
|
153
|
+
);
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
entries.set(fileName, fileData);
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// Move to next Central Directory entry
|
|
160
|
+
offset += 46 + fileNameLength + extraFieldLength + commentLength;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
return entries;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
/**
|
|
167
|
+
* Downloads a dictionary archive, extracts it, and stores the files in OPFS.
|
|
168
|
+
*
|
|
169
|
+
* The archive should be a zip file containing the 8 dictionary files
|
|
170
|
+
* (metadata.json, dict.da, dict.vals, dict.wordsidx, dict.words,
|
|
171
|
+
* matrix.mtx, char_def.bin, unk.bin), optionally nested in a subdirectory.
|
|
172
|
+
*
|
|
173
|
+
* @param {string} url - URL of the dictionary zip archive.
|
|
174
|
+
* @param {string} name - Name to store the dictionary under (e.g., "ipadic").
|
|
175
|
+
* @param {object} [options] - Optional settings.
|
|
176
|
+
* @param {function} [options.onProgress] - Progress callback receiving
|
|
177
|
+
* `{ phase: string, loaded?: number, total?: number }`.
|
|
178
|
+
* @returns {Promise<void>}
|
|
179
|
+
* @throws {Error} If download fails, archive is invalid, or required files are missing.
|
|
180
|
+
*/
|
|
181
|
+
export async function downloadDictionary(url, name, options = {}) {
|
|
182
|
+
const { onProgress } = options;
|
|
183
|
+
|
|
184
|
+
// Download
|
|
185
|
+
if (onProgress) onProgress({ phase: "downloading" });
|
|
186
|
+
const response = await fetch(url);
|
|
187
|
+
if (!response.ok) {
|
|
188
|
+
throw new Error(`Failed to download dictionary: HTTP ${response.status}`);
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
const contentLength = response.headers.get("content-length");
|
|
192
|
+
const total = contentLength ? parseInt(contentLength, 10) : undefined;
|
|
193
|
+
|
|
194
|
+
// Read response body with progress tracking
|
|
195
|
+
let zipBuffer;
|
|
196
|
+
if (onProgress && response.body) {
|
|
197
|
+
const reader = response.body.getReader();
|
|
198
|
+
const chunks = [];
|
|
199
|
+
let loaded = 0;
|
|
200
|
+
|
|
201
|
+
while (true) {
|
|
202
|
+
const { done, value } = await reader.read();
|
|
203
|
+
if (done) break;
|
|
204
|
+
chunks.push(value);
|
|
205
|
+
loaded += value.length;
|
|
206
|
+
onProgress({ phase: "downloading", loaded, total });
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
zipBuffer = new Uint8Array(loaded);
|
|
210
|
+
let pos = 0;
|
|
211
|
+
for (const chunk of chunks) {
|
|
212
|
+
zipBuffer.set(chunk, pos);
|
|
213
|
+
pos += chunk.length;
|
|
214
|
+
}
|
|
215
|
+
zipBuffer = zipBuffer.buffer;
|
|
216
|
+
} else {
|
|
217
|
+
zipBuffer = await response.arrayBuffer();
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
// Extract
|
|
221
|
+
if (onProgress) onProgress({ phase: "extracting" });
|
|
222
|
+
const entries = await extractZip(zipBuffer);
|
|
223
|
+
|
|
224
|
+
// Find dictionary files (may be nested in a subdirectory)
|
|
225
|
+
const fileMap = new Map();
|
|
226
|
+
for (const [path, data] of entries) {
|
|
227
|
+
const baseName = path.split("/").pop();
|
|
228
|
+
if (DICTIONARY_FILES.includes(baseName)) {
|
|
229
|
+
fileMap.set(baseName, data);
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
// Verify all required files are present
|
|
234
|
+
const missing = DICTIONARY_FILES.filter((f) => !fileMap.has(f));
|
|
235
|
+
if (missing.length > 0) {
|
|
236
|
+
throw new Error(
|
|
237
|
+
`Missing dictionary files in archive: ${missing.join(", ")}`,
|
|
238
|
+
);
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
// Store in OPFS
|
|
242
|
+
if (onProgress) onProgress({ phase: "storing" });
|
|
243
|
+
const dir = await getDictionaryDir(name);
|
|
244
|
+
for (const [fileName, data] of fileMap) {
|
|
245
|
+
const fileHandle = await dir.getFileHandle(fileName, { create: true });
|
|
246
|
+
const writable = await fileHandle.createWritable();
|
|
247
|
+
await writable.write(data);
|
|
248
|
+
await writable.close();
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
if (onProgress) onProgress({ phase: "complete" });
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
/**
|
|
255
|
+
* Loads dictionary files from OPFS as an object of Uint8Arrays.
|
|
256
|
+
*
|
|
257
|
+
* The returned object has properties matching the file names expected
|
|
258
|
+
* by `loadDictionaryFromBytes()`.
|
|
259
|
+
*
|
|
260
|
+
* @param {string} name - The dictionary name (e.g., "ipadic").
|
|
261
|
+
* @returns {Promise<DictionaryFiles>} Object containing the dictionary file data.
|
|
262
|
+
* @throws {Error} If the dictionary is not found in OPFS.
|
|
263
|
+
*/
|
|
264
|
+
export async function loadDictionaryFiles(name) {
|
|
265
|
+
let dir;
|
|
266
|
+
try {
|
|
267
|
+
const root = await navigator.storage.getDirectory();
|
|
268
|
+
let current = root;
|
|
269
|
+
for (const segment of [...OPFS_BASE_PATH, name]) {
|
|
270
|
+
current = await current.getDirectoryHandle(segment);
|
|
271
|
+
}
|
|
272
|
+
dir = current;
|
|
273
|
+
} catch {
|
|
274
|
+
throw new Error(
|
|
275
|
+
`Dictionary "${name}" not found in OPFS. Call downloadDictionary() first.`,
|
|
276
|
+
);
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
/** @param {string} fileName */
|
|
280
|
+
async function readFile(fileName) {
|
|
281
|
+
const fileHandle = await dir.getFileHandle(fileName);
|
|
282
|
+
const file = await fileHandle.getFile();
|
|
283
|
+
const buffer = await file.arrayBuffer();
|
|
284
|
+
return new Uint8Array(buffer);
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
return {
|
|
288
|
+
metadata: await readFile("metadata.json"),
|
|
289
|
+
dictDa: await readFile("dict.da"),
|
|
290
|
+
dictVals: await readFile("dict.vals"),
|
|
291
|
+
dictWordsIdx: await readFile("dict.wordsidx"),
|
|
292
|
+
dictWords: await readFile("dict.words"),
|
|
293
|
+
matrixMtx: await readFile("matrix.mtx"),
|
|
294
|
+
charDef: await readFile("char_def.bin"),
|
|
295
|
+
unk: await readFile("unk.bin"),
|
|
296
|
+
};
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
/**
|
|
300
|
+
* Removes a dictionary from OPFS.
|
|
301
|
+
*
|
|
302
|
+
* @param {string} name - The dictionary name to remove.
|
|
303
|
+
* @returns {Promise<void>}
|
|
304
|
+
* @throws {Error} If the dictionary is not found.
|
|
305
|
+
*/
|
|
306
|
+
export async function removeDictionary(name) {
|
|
307
|
+
const root = await navigator.storage.getDirectory();
|
|
308
|
+
let current = root;
|
|
309
|
+
for (const segment of OPFS_BASE_PATH) {
|
|
310
|
+
current = await current.getDirectoryHandle(segment);
|
|
311
|
+
}
|
|
312
|
+
await current.removeEntry(name, { recursive: true });
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
/**
|
|
316
|
+
* Lists all dictionaries stored in OPFS.
|
|
317
|
+
*
|
|
318
|
+
* @returns {Promise<string[]>} Array of dictionary names.
|
|
319
|
+
*/
|
|
320
|
+
export async function listDictionaries() {
|
|
321
|
+
try {
|
|
322
|
+
const root = await navigator.storage.getDirectory();
|
|
323
|
+
let current = root;
|
|
324
|
+
for (const segment of OPFS_BASE_PATH) {
|
|
325
|
+
current = await current.getDirectoryHandle(segment);
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
const names = [];
|
|
329
|
+
for await (const [name, handle] of current.entries()) {
|
|
330
|
+
if (handle.kind === "directory") {
|
|
331
|
+
names.push(name);
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
return names;
|
|
335
|
+
} catch {
|
|
336
|
+
// Base directory doesn't exist yet - no dictionaries stored
|
|
337
|
+
return [];
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
/**
|
|
342
|
+
* Checks if a dictionary exists in OPFS.
|
|
343
|
+
*
|
|
344
|
+
* @param {string} name - The dictionary name to check.
|
|
345
|
+
* @returns {Promise<boolean>} True if the dictionary exists.
|
|
346
|
+
*/
|
|
347
|
+
export async function hasDictionary(name) {
|
|
348
|
+
try {
|
|
349
|
+
const root = await navigator.storage.getDirectory();
|
|
350
|
+
let current = root;
|
|
351
|
+
for (const segment of [...OPFS_BASE_PATH, name]) {
|
|
352
|
+
current = await current.getDirectoryHandle(segment);
|
|
353
|
+
}
|
|
354
|
+
return true;
|
|
355
|
+
} catch {
|
|
356
|
+
return false;
|
|
357
|
+
}
|
|
358
|
+
}
|
package/package.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "lindera-wasm-web",
|
|
3
3
|
"type": "module",
|
|
4
|
-
"description": "Lindera WASM (
|
|
5
|
-
"version": "
|
|
4
|
+
"description": "Lindera WASM (web target)",
|
|
5
|
+
"version": "3.0.1",
|
|
6
6
|
"license": "MIT",
|
|
7
7
|
"repository": {
|
|
8
8
|
"type": "git",
|
|
@@ -11,7 +11,9 @@
|
|
|
11
11
|
"files": [
|
|
12
12
|
"lindera_wasm_bg.wasm",
|
|
13
13
|
"lindera_wasm.js",
|
|
14
|
-
"lindera_wasm.d.ts"
|
|
14
|
+
"lindera_wasm.d.ts",
|
|
15
|
+
"opfs.js",
|
|
16
|
+
"opfs.d.ts"
|
|
15
17
|
],
|
|
16
18
|
"main": "lindera_wasm.js",
|
|
17
19
|
"homepage": "https://github.com/lindera/lindera",
|
|
@@ -25,5 +27,15 @@
|
|
|
25
27
|
"library",
|
|
26
28
|
"wasm",
|
|
27
29
|
"webassembly"
|
|
28
|
-
]
|
|
30
|
+
],
|
|
31
|
+
"exports": {
|
|
32
|
+
".": {
|
|
33
|
+
"types": "./lindera_wasm.d.ts",
|
|
34
|
+
"default": "./lindera_wasm.js"
|
|
35
|
+
},
|
|
36
|
+
"./opfs": {
|
|
37
|
+
"types": "./opfs.d.ts",
|
|
38
|
+
"default": "./opfs.js"
|
|
39
|
+
}
|
|
40
|
+
}
|
|
29
41
|
}
|