@siglum/engine 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/ctan.js ADDED
@@ -0,0 +1,818 @@
1
+ /**
2
+ * @module @siglum/engine/ctan
3
+ * CTAN package fetching and caching.
4
+ */
5
+
6
+ import { fileSystem } from '@siglum/filesystem';
7
+ import {
8
+ getPackageMeta,
9
+ savePackageMeta,
10
+ ensureTexliveMounted,
11
+ CTAN_CACHE_VERSION
12
+ } from './storage.js';
13
+
14
+ // Common LaTeX file extensions for file-to-package lookups
15
+ // Used when package name doesn't match file name (e.g., algorithm.sty → algorithms package)
16
+ const LATEX_FILE_EXTENSIONS = ['.sty', '.cls', '.def', '.clo', '.fd', '.cfg', '.tex'];
17
+
18
+ // Lazy load xzwasm when needed (UMD module loaded via script tag)
19
+ let XzReadableStream = null;
20
+ let xzwasmUrl = './src/xzwasm.js'; // Default, can be overridden
21
+
22
+ function setXzwasmUrl(url) {
23
+ xzwasmUrl = url;
24
+ }
25
+
26
+ async function loadXzwasm() {
27
+ if (XzReadableStream) return XzReadableStream;
28
+
29
+ // Load UMD module via script tag
30
+ return new Promise((resolve, reject) => {
31
+ if (self.xzwasm) {
32
+ XzReadableStream = self.xzwasm.XzReadableStream;
33
+ resolve(XzReadableStream);
34
+ return;
35
+ }
36
+ const script = document.createElement('script');
37
+ script.src = xzwasmUrl;
38
+ script.onload = () => {
39
+ XzReadableStream = self.xzwasm.XzReadableStream;
40
+ resolve(XzReadableStream);
41
+ };
42
+ script.onerror = () => reject(new Error('Failed to load xzwasm from ' + xzwasmUrl));
43
+ document.head.appendChild(script);
44
+ });
45
+ }
46
+
47
+ // Parse TAR archive into Map<path, Uint8Array>
48
+ function parseTar(tarData) {
49
+ const files = new Map();
50
+ let offset = 0;
51
+ const decoder = new TextDecoder();
52
+
53
+ while (offset < tarData.length - 512) {
54
+ // Check for zero block (end of archive)
55
+ let isZero = true;
56
+ for (let i = 0; i < 512; i++) {
57
+ if (tarData[offset + i] !== 0) { isZero = false; break; }
58
+ }
59
+ if (isZero) break;
60
+
61
+ // Parse header - name is at bytes 0-99
62
+ const nameBytes = tarData.subarray(offset, offset + 100);
63
+ let nameEnd = nameBytes.indexOf(0);
64
+ if (nameEnd === -1) nameEnd = 100;
65
+ const name = decoder.decode(nameBytes.subarray(0, nameEnd));
66
+
67
+ // Size is at bytes 124-135 (12 bytes, octal, null/space terminated)
68
+ const sizeBytes = tarData.subarray(offset + 124, offset + 136);
69
+ let sizeEnd = 0;
70
+ for (let i = 0; i < 12; i++) {
71
+ if (sizeBytes[i] === 0 || sizeBytes[i] === 32) break;
72
+ sizeEnd = i + 1;
73
+ }
74
+ const sizeStr = decoder.decode(sizeBytes.subarray(0, sizeEnd));
75
+ const size = parseInt(sizeStr, 8) || 0;
76
+
77
+ // TypeFlag is at byte 156
78
+ const typeFlag = tarData[offset + 156];
79
+
80
+ // Prefix is at bytes 345-499 (USTAR format)
81
+ const prefixBytes = tarData.subarray(offset + 345, offset + 500);
82
+ let prefixEnd = prefixBytes.indexOf(0);
83
+ if (prefixEnd === -1) prefixEnd = 155;
84
+ const prefix = decoder.decode(prefixBytes.subarray(0, prefixEnd));
85
+
86
+ const fullPath = prefix ? prefix + '/' + name : name;
87
+
88
+ offset += 512; // Move past header
89
+
90
+ // Only process regular files (typeFlag 0 or '0' which is ASCII 48)
91
+ if ((typeFlag === 0 || typeFlag === 48) && size > 0 && name) {
92
+ files.set(fullPath, new Uint8Array(tarData.buffer, tarData.byteOffset + offset, size));
93
+ }
94
+
95
+ // Move to next 512-byte boundary
96
+ offset += Math.ceil(size / 512) * 512;
97
+ }
98
+
99
+ return files;
100
+ }
101
+
102
+ // Note: We use TexLive 2025 for ALL packages for version compatibility
103
+
104
+ // Dynamic package name cache (populated by CTAN API lookups)
105
+ const packageNameCache = new Map();
106
+
107
+ // File-to-package index (loaded from server on first use)
108
+ let fileToPackageIndex = null;
109
+ let fileToPackageLoading = null;
110
+
111
+ /**
112
+ * @typedef {Object} CTANFetcherOptions
113
+ * @property {string} [proxyUrl] - CTAN proxy URL
114
+ * @property {string} [bundlesUrl] - Bundles URL
115
+ * @property {string} [xzwasmUrl] - XZ decompression WASM URL
116
+ * @property {(msg: string) => void} [onLog] - Logging callback
117
+ */
118
+
119
+ /**
120
+ * @typedef {Object} PackageResult
121
+ * @property {Map<string, Uint8Array>} files - Map of file paths to contents
122
+ * @property {string[]} dependencies - Package dependencies
123
+ * @property {boolean} [notFound] - True if package was not found
124
+ */
125
+
126
+ /**
127
+ * Fetches LaTeX packages from CTAN/TexLive archives.
128
+ */
129
+ export class CTANFetcher {
130
+ /**
131
+ * @param {CTANFetcherOptions} [options] - Fetcher options
132
+ */
133
+ constructor(options = {}) {
134
+ this.proxyUrl = options.proxyUrl || 'http://localhost:8787';
135
+ this.bundlesUrl = options.bundlesUrl || this.proxyUrl + '/bundles';
136
+ this.mountedFiles = new Set();
137
+ this.fileCache = new Map(); // Memory cache for file contents
138
+ this.fetchCount = 0;
139
+ this.onLog = options.onLog || (() => {});
140
+
141
+ // Set xzwasm URL if provided
142
+ if (options.xzwasmUrl) {
143
+ setXzwasmUrl(options.xzwasmUrl);
144
+ }
145
+ }
146
+
147
+ /**
148
+ * Load file-to-package index (maps filename.sty → package-name).
149
+ * @returns {Promise<Object<string, string>>} Index mapping filenames to packages
150
+ */
151
+ async loadFileToPackageIndex() {
152
+ if (fileToPackageIndex) return fileToPackageIndex;
153
+ if (fileToPackageLoading) return fileToPackageLoading;
154
+
155
+ fileToPackageLoading = (async () => {
156
+ try {
157
+ const response = await fetch(`${this.bundlesUrl}/file-to-package.json`);
158
+ if (response.ok) {
159
+ fileToPackageIndex = await response.json();
160
+ this.onLog(`Loaded file-to-package index: ${Object.keys(fileToPackageIndex).length} entries`);
161
+ } else {
162
+ this.onLog(`Failed to load file-to-package index: ${response.status}`);
163
+ fileToPackageIndex = {};
164
+ }
165
+ } catch (e) {
166
+ this.onLog(`Error loading file-to-package index: ${e.message}`);
167
+ fileToPackageIndex = {};
168
+ }
169
+ return fileToPackageIndex;
170
+ })();
171
+
172
+ return fileToPackageLoading;
173
+ }
174
+
175
+ /**
176
+ * Look up package name for a file.
177
+ * @param {string} fileName - File name (e.g., "lingmacros.sty")
178
+ * @returns {Promise<string|null>} Package name or null
179
+ */
180
+ async lookupPackageForFile(fileName) {
181
+ const index = await this.loadFileToPackageIndex();
182
+ return index[fileName] || null;
183
+ }
184
+
185
+ /**
186
+ * Get all cached file contents.
187
+ * @returns {Object<string, Uint8Array>} Map of file paths to contents
188
+ */
189
+ // Only returns files that were loaded in this session (via fetchPackage)
190
+ getCachedFiles() {
191
+ return Object.fromEntries(this.fileCache);
192
+ }
193
+
194
+ /**
195
+ * Load a package from cache.
196
+ * @param {string} packageName - Package name
197
+ * @returns {Promise<PackageResult|null>} Package result or null if not cached
198
+ */
199
+ async loadPackageFromCache(packageName) {
200
+ try {
201
+ const meta = await getPackageMeta(packageName);
202
+ if (!meta) {
203
+ this.onLog(`[Cache] ${packageName}: no metadata found - will fetch fresh`);
204
+ return null;
205
+ }
206
+
207
+ // Check cache version
208
+ if (meta.cacheVersion !== CTAN_CACHE_VERSION) {
209
+ this.onLog(`[Cache] ${packageName}: version mismatch (cached=${meta.cacheVersion}, current=${CTAN_CACHE_VERSION}) - will refetch`);
210
+ return null;
211
+ }
212
+ this.onLog(`[Cache] ${packageName}: loading from cache (v${meta.cacheVersion}, source=${meta.source || 'unknown'})`);
213
+
214
+ // Check if it's a "not found" marker
215
+ if (meta.notFound) return { notFound: true };
216
+
217
+ // Check memory cache first, then filesystem
218
+ const files = new Map();
219
+ if (meta.files && meta.files.length > 0) {
220
+ const filesToLoad = [];
221
+ for (const filePath of meta.files) {
222
+ if (this.fileCache.has(filePath)) {
223
+ files.set(filePath, this.fileCache.get(filePath));
224
+ this.mountedFiles.add(filePath);
225
+ } else {
226
+ filesToLoad.push(filePath);
227
+ }
228
+ }
229
+
230
+ // Load any missing files from filesystem cache
231
+ if (filesToLoad.length > 0) {
232
+ await ensureTexliveMounted();
233
+ const results = await Promise.all(
234
+ filesToLoad.map(async (filePath) => {
235
+ const content = await fileSystem.readBinary(filePath).catch(() => null);
236
+ return content ? [filePath, new Uint8Array(content)] : null;
237
+ })
238
+ );
239
+ for (const result of results) {
240
+ if (result) {
241
+ files.set(result[0], result[1]);
242
+ this.mountedFiles.add(result[0]);
243
+ this.fileCache.set(result[0], result[1]); // Cache in memory
244
+ }
245
+ }
246
+ }
247
+ }
248
+
249
+ return {
250
+ files,
251
+ dependencies: meta.dependencies || [],
252
+ };
253
+ } catch (e) {
254
+ return null;
255
+ }
256
+ }
257
+
258
+ /**
259
+ * Fetch a package from CTAN/TexLive.
260
+ * @param {string} packageName - Package name
261
+ * @param {{tlYear?: number}} [options] - Options
262
+ * @returns {Promise<PackageResult|null>} Package result or null if not found
263
+ */
264
+ async fetchPackage(packageName, options = {}) {
265
+ const { tlYear } = options;
266
+ const yearLabel = tlYear ? ` (TL${tlYear})` : '';
267
+ this.onLog(`[FETCH] ${packageName}${yearLabel}: starting fetch`);
268
+
269
+ // Skip cache for version-specific requests - we want a different version
270
+ if (!tlYear) {
271
+ // Check cache first
272
+ const cached = await this.loadPackageFromCache(packageName);
273
+ if (cached) {
274
+ if (cached.notFound) {
275
+ // Package itself doesn't exist - but maybe the file is in a different package
276
+ // e.g., algorithm.sty is in the "algorithms" package
277
+ // Try common extensions in order of likelihood
278
+ const extensions = LATEX_FILE_EXTENSIONS;
279
+ for (const ext of extensions) {
280
+ const fileName = packageName + ext;
281
+ const realPkg = await this.lookupPackageForFile(fileName);
282
+ if (realPkg && realPkg !== packageName) {
283
+ this.onLog(`[FETCH] ${packageName}: not found, but ${fileName} is in package "${realPkg}"`);
284
+ return this.fetchPackage(realPkg, options);
285
+ }
286
+ }
287
+ this.onLog(`[FETCH] ${packageName}: marked as not found in cache`);
288
+ return null;
289
+ }
290
+ this.onLog(`[FETCH] ${packageName}: loaded from cache`);
291
+ return cached;
292
+ }
293
+ } else {
294
+ this.onLog(`[FETCH] ${packageName}: skipping cache for TL${tlYear} request`);
295
+ }
296
+
297
+ this.onLog(`[FETCH] ${packageName}: not in cache, fetching from TexLive${yearLabel}...`);
298
+ // Try TexLive first for version compatibility with our LaTeX 2022-11-01
299
+ // (CTAN has latest versions that may require newer LaTeX)
300
+ return this.fetchTexLivePackage(packageName, tlYear);
301
+ }
302
+
303
+ // Look up real TexLive archive name via CTAN API
304
+ async lookupTexLivePackageName(packageName) {
305
+ // Check memory cache first
306
+ if (packageNameCache.has(packageName)) {
307
+ return packageNameCache.get(packageName);
308
+ }
309
+
310
+ try {
311
+ // Query CTAN API for package info
312
+ const response = await fetch(`${this.proxyUrl}/api/ctan-pkg/${packageName}`);
313
+ if (!response.ok) return packageName;
314
+
315
+ const data = await response.json();
316
+ // If package is contained in another, use that
317
+ const realName = data.contained_in || data.name || packageName;
318
+ packageNameCache.set(packageName, realName);
319
+ return realName;
320
+ } catch (e) {
321
+ return packageName;
322
+ }
323
+ }
324
+
325
+ // Fetch from TexLive archive
326
+ // tlYear: optional year (2025, 2024, 2023) - if specified, goes directly to CTAN proxy
327
+ async fetchTexLivePackage(packageName, tlYear = null) {
328
+ const yearLabel = tlYear ? ` (TL${tlYear})` : '';
329
+
330
+ // If specific TL year requested, go directly to CTAN proxy (skip local archive)
331
+ // This is used for version fallback when kernel incompatibility is detected
332
+ if (tlYear) {
333
+ this.onLog(`[TEXLIVE] Fetching ${packageName} from TL${tlYear} via CTAN proxy...`);
334
+ return this.fetchCtanPackage(packageName, tlYear);
335
+ }
336
+
337
+ // Check cache first (same cache as CTAN)
338
+ const cached = await this.loadPackageFromCache(packageName);
339
+ if (cached) {
340
+ if (cached.notFound) {
341
+ this.onLog(`Package ${packageName} marked as not found in cache`);
342
+ return null;
343
+ }
344
+ this.onLog(`Package ${packageName} loaded from cache (TexLive)`);
345
+ return cached;
346
+ }
347
+
348
+ this.onLog(`[TEXLIVE] Fetching ${packageName} from TexLive 2025 via ${this.proxyUrl}...`);
349
+
350
+ let response = null;
351
+ let texlivePkg = packageName;
352
+
353
+ // Try direct package name first
354
+ try {
355
+ const url = `${this.proxyUrl}/api/texlive/${packageName}`;
356
+ this.onLog(`[TEXLIVE] Trying URL: ${url}`);
357
+ response = await fetch(url);
358
+ this.onLog(`[TEXLIVE] Response status: ${response?.status}`);
359
+ } catch (e) {
360
+ this.onLog(`[TEXLIVE] Fetch error: ${e.message}`);
361
+ response = null;
362
+ }
363
+
364
+ // If not found, look up in file-to-package index (try common extensions)
365
+ if (!response || !response.ok) {
366
+ const extensions = LATEX_FILE_EXTENSIONS;
367
+ for (const ext of extensions) {
368
+ const fileName = packageName + ext;
369
+ const realPkg = await this.lookupPackageForFile(fileName);
370
+ if (realPkg && realPkg !== packageName) {
371
+ this.onLog(`${fileName} is in package "${realPkg}", fetching...`);
372
+ texlivePkg = realPkg;
373
+ try {
374
+ response = await fetch(`${this.proxyUrl}/api/texlive/${texlivePkg}`);
375
+ if (response?.ok) break;
376
+ } catch (e) {
377
+ response = null;
378
+ }
379
+ }
380
+ }
381
+ }
382
+
383
+ // If still not found, try CTAN API lookup as fallback
384
+ if (!response || !response.ok) {
385
+ this.onLog(`Looking up package container via CTAN API...`);
386
+ const realName = await this.lookupTexLivePackageName(packageName);
387
+ if (realName !== packageName) {
388
+ this.onLog(`${packageName} is in ${realName}, fetching...`);
389
+ texlivePkg = realName;
390
+ try {
391
+ response = await fetch(`${this.proxyUrl}/api/texlive/${texlivePkg}`);
392
+ } catch (e) {
393
+ response = null;
394
+ }
395
+ }
396
+ }
397
+
398
+ try {
399
+ if (!response || !response.ok) {
400
+ this.onLog(`[TEXLIVE] FAILED for ${packageName} (status=${response?.status}), falling back to CTAN...`);
401
+ // Fall back to CTAN for packages not in TexLive 2025
402
+ return this.fetchCtanPackage(packageName);
403
+ }
404
+ this.onLog(`[TEXLIVE] SUCCESS for ${packageName}, extracting XZ archive...`);
405
+
406
+ // Get XZ-compressed TAR
407
+ let xzData = await response.arrayBuffer();
408
+ const downloadedSize = xzData.byteLength;
409
+ this.onLog(`Downloaded ${(downloadedSize / 1024).toFixed(1)} KB, decompressing...`);
410
+
411
+ // Load xzwasm and decompress XZ using streaming
412
+ // Use Blob.stream() instead of Response(arrayBuffer).body to avoid
413
+ // ArrayBuffer detachment issues when multiple decompressions run in parallel
414
+ const XzStream = await loadXzwasm();
415
+ const xzStream = new XzStream(new Blob([xzData]).stream());
416
+ xzData = null; // Allow GC of original buffer
417
+
418
+ const reader = xzStream.getReader();
419
+ const chunks = [];
420
+ let totalLen = 0;
421
+ while (true) {
422
+ const { done, value } = await reader.read();
423
+ if (done) break;
424
+ // Must copy chunk - xzwasm may reuse its internal buffer
425
+ const chunk = new Uint8Array(value);
426
+ chunks.push(chunk);
427
+ totalLen += chunk.length;
428
+ }
429
+
430
+ // Concatenate chunks into final TAR buffer
431
+ this.onLog(`Decompressed ${chunks.length} chunks, total ${totalLen} bytes`);
432
+ const tarData = new Uint8Array(totalLen);
433
+ let pos = 0;
434
+ for (let i = 0; i < chunks.length; i++) {
435
+ tarData.set(chunks[i], pos);
436
+ pos += chunks[i].length;
437
+ chunks[i] = null; // Allow GC of chunk after copying
438
+ }
439
+
440
+ // Parse TAR
441
+ const tarFiles = parseTar(tarData);
442
+ this.onLog(`Extracted ${tarFiles.size} files from TAR (keys: ${[...tarFiles.keys()].slice(0,3).join(', ')}...)`);
443
+
444
+ // Log package version for debugging
445
+ for (const [tarPath, content] of tarFiles) {
446
+ if (tarPath.endsWith(`${packageName}.sty`)) {
447
+ const text = new TextDecoder().decode(content.slice(0, 500));
448
+ const versionMatch = text.match(/ProvidesPackage\{[^}]+\}\[([^\]]+)\]/);
449
+ if (versionMatch) {
450
+ this.onLog(`Package ${packageName} version: ${versionMatch[1]}`);
451
+ }
452
+ break;
453
+ }
454
+ }
455
+
456
+ // Process files (similar to CTAN fetch)
457
+ const texExtensions = ['.sty', '.cls', '.def', '.cfg', '.tex', '.fd', '.clo', '.ltx'];
458
+ const fontExtensions = ['.pfb', '.pfm', '.afm', '.tfm', '.vf', '.map', '.enc'];
459
+ const files = new Map();
460
+ const cacheWrites = [];
461
+
462
+ await ensureTexliveMounted();
463
+
464
+ for (const [tarPath, content] of tarFiles) {
465
+ // Skip docs and source
466
+ if (tarPath.includes('/doc/') || tarPath.startsWith('doc/')) continue;
467
+ if (tarPath.includes('/source/') || tarPath.startsWith('source/')) continue;
468
+
469
+ const ext = tarPath.substring(tarPath.lastIndexOf('.')).toLowerCase();
470
+ const fileName = tarPath.split('/').pop();
471
+
472
+ if (texExtensions.includes(ext) || fontExtensions.includes(ext)) {
473
+ // Map to texlive path structure
474
+ // Note: tar paths may or may not have leading slash
475
+ let targetPath;
476
+ if (tarPath.includes('/texmf-dist/') || tarPath.includes('texmf-dist/')) {
477
+ const idx = tarPath.indexOf('texmf-dist/');
478
+ targetPath = '/texlive/' + tarPath.substring(idx);
479
+ } else if (tarPath.includes('/tex/') || tarPath.startsWith('tex/')) {
480
+ // Handle both /tex/ and tex/ (no leading slash)
481
+ const idx = tarPath.indexOf('tex/');
482
+ targetPath = '/texlive/texmf-dist/' + tarPath.substring(idx);
483
+ } else if (tarPath.includes('/fonts/') || tarPath.startsWith('fonts/')) {
484
+ const idx = tarPath.indexOf('fonts/');
485
+ targetPath = '/texlive/texmf-dist/' + tarPath.substring(idx);
486
+ } else {
487
+ targetPath = `/texlive/texmf-dist/tex/latex/${packageName}/${fileName}`;
488
+ }
489
+
490
+ const fileData = new Uint8Array(content);
491
+ files.set(targetPath, fileData);
492
+ this.mountedFiles.add(targetPath);
493
+ this.fileCache.set(targetPath, fileData);
494
+ cacheWrites.push(fileSystem.writeBinary(targetPath, fileData, { createParents: true }).catch(() => {}));
495
+ }
496
+ }
497
+
498
+ // Parallel filesystem cache writes
499
+ await Promise.all(cacheWrites);
500
+
501
+ this.onLog(`Processed ${files.size} TeX/font files from ${packageName}`);
502
+
503
+ if (files.size === 0) {
504
+ this.onLog(`No TeX files found in ${packageName}, marking as not found`);
505
+ await savePackageMeta(packageName, {
506
+ notFound: true,
507
+ cacheVersion: CTAN_CACHE_VERSION,
508
+ });
509
+ return null;
510
+ }
511
+
512
+ // Cache metadata under the requested package name
513
+ const cacheEntry = {
514
+ name: texlivePkg, // The actual package that provided the files
515
+ files: [...files.keys()],
516
+ dependencies: [],
517
+ cacheVersion: CTAN_CACHE_VERSION,
518
+ source: 'texlive-2025',
519
+ };
520
+ await savePackageMeta(packageName, cacheEntry);
521
+
522
+ // Also cache under the resolved name if different (avoids duplicate fetches)
523
+ if (texlivePkg !== packageName) {
524
+ await savePackageMeta(texlivePkg, cacheEntry);
525
+ this.onLog(`Cached under both "${packageName}" and "${texlivePkg}"`);
526
+ }
527
+
528
+ this.fetchCount++;
529
+ return { files, dependencies: [] };
530
+ } catch (e) {
531
+ this.onLog(`[TEXLIVE] EXTRACTION ERROR for ${packageName}: ${e.message}`);
532
+ this.onLog(`[TEXLIVE] Stack: ${e.stack?.split('\n').slice(0, 3).join(' | ')}`);
533
+ this.onLog(`[TEXLIVE] Falling back to CTAN (WARNING: may have older version)...`);
534
+ return this.fetchCtanPackage(packageName);
535
+ }
536
+ }
537
+
538
+ // Fetch from CTAN proxy (fallback when TexLive doesn't have the package)
539
+ async fetchCtanPackage(packageName, tlYear = null) {
540
+ const yearSuffix = tlYear ? `?tlYear=${tlYear}` : '';
541
+ const yearLabel = tlYear ? ` (TL${tlYear})` : '';
542
+ this.onLog(`[CTAN-FALLBACK] Fetching ${packageName}${yearLabel} from CTAN proxy...`);
543
+
544
+ if (packageName === 'enumitem' && !tlYear) {
545
+ this.onLog(`[CTAN-FALLBACK] *** WARNING: enumitem from CTAN may be v3.10 which has known bugs! ***`);
546
+ this.onLog(`[CTAN-FALLBACK] *** TexLive 2025 should be used for enumitem v3.11 ***`);
547
+ }
548
+
549
+ let response = null;
550
+ let ctanPkg = packageName;
551
+
552
+ // Try direct package name first
553
+ try {
554
+ response = await fetch(`${this.proxyUrl}/api/fetch/${packageName}${yearSuffix}`);
555
+ } catch (e) {
556
+ response = null;
557
+ }
558
+
559
+ // If not found, look up in file-to-package index (try common extensions)
560
+ if (!response || !response.ok) {
561
+ const extensions = LATEX_FILE_EXTENSIONS;
562
+ for (const ext of extensions) {
563
+ const fileName = packageName + ext;
564
+ const realPkg = await this.lookupPackageForFile(fileName);
565
+ if (realPkg && realPkg !== packageName) {
566
+ this.onLog(`${fileName} is in package "${realPkg}", fetching from CTAN${yearLabel}...`);
567
+ ctanPkg = realPkg;
568
+ try {
569
+ response = await fetch(`${this.proxyUrl}/api/fetch/${ctanPkg}${yearSuffix}`);
570
+ if (response?.ok) break;
571
+ } catch (e) {
572
+ response = null;
573
+ }
574
+ }
575
+ }
576
+ }
577
+
578
+ try {
579
+ if (!response || !response.ok) {
580
+ this.onLog(`CTAN package ${packageName} not found (404)`);
581
+ await savePackageMeta(packageName, {
582
+ notFound: true,
583
+ cacheVersion: CTAN_CACHE_VERSION,
584
+ });
585
+ return null;
586
+ }
587
+
588
+ const data = await response.json();
589
+ if (data.error) {
590
+ this.onLog(`CTAN fetch failed: ${data.error}`);
591
+ await savePackageMeta(packageName, {
592
+ notFound: true,
593
+ cacheVersion: CTAN_CACHE_VERSION,
594
+ });
595
+ return null;
596
+ }
597
+
598
+ // Process and cache files
599
+ const files = new Map();
600
+ const cacheWrites = [];
601
+ await ensureTexliveMounted();
602
+ for (const [path, info] of Object.entries(data.files)) {
603
+ let content;
604
+ if (info.encoding === 'base64') {
605
+ const binary = atob(info.content);
606
+ content = new Uint8Array(binary.length);
607
+ for (let i = 0; i < binary.length; i++) {
608
+ content[i] = binary.charCodeAt(i);
609
+ }
610
+ } else if (typeof info.content === 'string') {
611
+ content = new TextEncoder().encode(info.content);
612
+ } else {
613
+ content = new Uint8Array(info.content);
614
+ }
615
+ files.set(path, content);
616
+ this.mountedFiles.add(path);
617
+ this.fileCache.set(path, content);
618
+ cacheWrites.push(fileSystem.writeBinary(path, content, { createParents: true }).catch(() => {}));
619
+ }
620
+
621
+ // Parallel filesystem cache writes
622
+ await Promise.all(cacheWrites);
623
+
624
+ // Cache metadata under the requested package name
625
+ const cacheEntry = {
626
+ name: ctanPkg, // The actual package that provided the files
627
+ files: [...files.keys()],
628
+ dependencies: data.dependencies || [],
629
+ cacheVersion: CTAN_CACHE_VERSION,
630
+ source: 'ctan',
631
+ };
632
+ await savePackageMeta(packageName, cacheEntry);
633
+
634
+ // Also cache under the resolved name if different (avoids duplicate fetches)
635
+ if (ctanPkg !== packageName) {
636
+ await savePackageMeta(ctanPkg, cacheEntry);
637
+ this.onLog(`Cached under both "${packageName}" and "${ctanPkg}"`);
638
+ }
639
+
640
+ this.fetchCount++;
641
+ return {
642
+ files,
643
+ dependencies: data.dependencies || [],
644
+ };
645
+ } catch (e) {
646
+ this.onLog(`CTAN fetch error: ${e.message}`);
647
+ await savePackageMeta(packageName, {
648
+ notFound: true,
649
+ cacheVersion: CTAN_CACHE_VERSION,
650
+ });
651
+ return null;
652
+ }
653
+ }
654
+
655
+ async fetchWithDependencies(packageName, fetched = new Set()) {
656
+ if (fetched.has(packageName)) return new Map();
657
+ fetched.add(packageName);
658
+
659
+ const result = await this.fetchPackage(packageName);
660
+ if (!result) return new Map();
661
+
662
+ const allFiles = new Map(result.files);
663
+
664
+ // Fetch dependencies
665
+ for (const dep of result.dependencies) {
666
+ const depFiles = await this.fetchWithDependencies(dep, fetched);
667
+ for (const [path, content] of depFiles) {
668
+ allFiles.set(path, content);
669
+ }
670
+ }
671
+
672
+ return allFiles;
673
+ }
674
+
675
+ /**
676
+ * Batch fetch multiple packages in parallel.
677
+ * Used for pre-fetching detected CTAN packages before compilation.
678
+ * @param {string[]} packageNames - Package names to fetch
679
+ * @param {Object} options
680
+ * @param {number} options.concurrency - Max parallel fetches (default: 1, see note below)
681
+ * @returns {Promise<{fetched: string[], failed: string[], skipped: string[]}>}
682
+ */
683
+ async batchFetchPackages(packageNames, options = {}) {
684
+ // Concurrency limited to 1 because xzwasm's WASM module has shared internal state
685
+ // that causes "detached ArrayBuffer" errors when multiple decompressions run in parallel.
686
+ // HTTP fetches are still fast, and decompression is CPU-bound anyway, so serialization
687
+ // doesn't significantly impact total time (~200ms for 10 packages).
688
+ const { concurrency = 1 } = options;
689
+
690
+ const fetched = [];
691
+ const failed = [];
692
+ const skipped = [];
693
+
694
+ // Deduplicate
695
+ const uniquePackages = [...new Set(packageNames)];
696
+ const toFetch = [];
697
+
698
+ // Check cache first
699
+ for (const pkgName of uniquePackages) {
700
+ const cached = await this.loadPackageFromCache(pkgName);
701
+ if (cached && cached.files && !cached.notFound) {
702
+ // Already cached and has files - populate memory cache
703
+ for (const [path, content] of cached.files) {
704
+ this.fileCache.set(path, content);
705
+ }
706
+ skipped.push(pkgName);
707
+ } else {
708
+ toFetch.push(pkgName);
709
+ }
710
+ }
711
+
712
+ if (toFetch.length === 0) {
713
+ return { fetched, failed, skipped };
714
+ }
715
+
716
+ this.onLog(`[PRE-FETCH] Batch fetching ${toFetch.length} packages...`);
717
+
718
+ // Fetch in chunks with concurrency limit
719
+ for (let i = 0; i < toFetch.length; i += concurrency) {
720
+ const chunk = toFetch.slice(i, i + concurrency);
721
+ const results = await Promise.allSettled(
722
+ chunk.map(async (pkgName) => {
723
+ const result = await this.fetchPackage(pkgName);
724
+ return { pkgName, result };
725
+ })
726
+ );
727
+
728
+ for (const res of results) {
729
+ if (res.status === 'fulfilled' && res.value.result) {
730
+ fetched.push(res.value.pkgName);
731
+ } else {
732
+ const pkgName = res.status === 'fulfilled'
733
+ ? res.value.pkgName
734
+ : 'unknown';
735
+ failed.push(pkgName);
736
+ }
737
+ }
738
+ }
739
+
740
+ this.onLog(`[PRE-FETCH] Done: ${fetched.length} fetched, ${failed.length} failed, ${skipped.length} cached`);
741
+ return { fetched, failed, skipped };
742
+ }
743
+
744
+ /**
745
+ * Get list of all mounted file paths.
746
+ * @returns {string[]} Array of file paths
747
+ */
748
+ getMountedFiles() {
749
+ return [...this.mountedFiles];
750
+ }
751
+
752
+ /**
753
+ * Get fetcher statistics.
754
+ * @returns {{fetchCount: number, mountedFiles: number}} Stats object
755
+ */
756
+ getStats() {
757
+ return {
758
+ fetchCount: this.fetchCount,
759
+ mountedFiles: this.mountedFiles.size,
760
+ };
761
+ }
762
+
763
+ /**
764
+ * Clear the mounted files set.
765
+ */
766
+ clearMountedFiles() {
767
+ this.mountedFiles.clear();
768
+ }
769
+ }
770
+
771
+ /**
772
+ * Extract package name from a missing file path.
773
+ * Handles special cases like EC/TC fonts (cm-super).
774
+ * @param {string} filename - File name (e.g., "lingmacros.sty")
775
+ * @returns {string} Package name
776
+ */
777
+ export function getPackageFromFile(filename) {
778
+ // Check for EC/TC fonts (cm-super)
779
+ if (/^(ec|tc)[a-z]{2}\d+$/.test(filename)) {
780
+ return 'cm-super';
781
+ }
782
+ // Remove extension
783
+ return filename.replace(/\.(sty|cls|def|clo|fd|cfg|tex)$/, '');
784
+ }
785
+
786
+ /**
787
+ * Check if a string is a valid CTAN package name.
788
+ * @param {string} name - Package name to validate
789
+ * @returns {boolean} True if valid
790
+ */
791
+ export function isValidPackageName(name) {
792
+ if (!name || name.length < 2 || name.length > 50) return false;
793
+ if (/[^a-zA-Z0-9_-]/.test(name)) return false;
794
+ // Skip common false positives
795
+ const skipList = ['document', 'texput', 'null', 'undefined', 'NaN'];
796
+ if (skipList.includes(name)) return false;
797
+ return true;
798
+ }
799
+
800
+ /**
801
+ * Force clear a specific package from cache (for version refresh).
802
+ * @param {string} packageName - Package name to clear
803
+ * @returns {Promise<boolean>} True if successful
804
+ */
805
+ export async function forceRefreshPackage(packageName) {
806
+ try {
807
+ await savePackageMeta(packageName, {
808
+ name: packageName,
809
+ notFound: false,
810
+ cacheVersion: 0, // Force version mismatch on next load
811
+ files: [],
812
+ clearedAt: Date.now()
813
+ });
814
+ return true;
815
+ } catch (e) {
816
+ return false;
817
+ }
818
+ }