@siglum/engine 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1164 @@
1
+ /**
2
+ * @module @siglum/engine/compiler
3
+ * Main SiglumCompiler class - orchestrates LaTeX compilation in the browser.
4
+ */
5
+
6
+ import { BundleManager, detectEngine, extractPreamble, hashPreamble } from './bundles.js';
7
+
8
+ /**
9
+ * @typedef {Object} SiglumCompilerOptions
10
+ * @property {string} [bundlesUrl] - URL to bundles directory
11
+ * @property {string} [wasmUrl] - URL to busytex.wasm
12
+ * @property {string} [jsUrl] - URL to busytex.js (derived from wasmUrl if not provided)
13
+ * @property {string|null} [workerUrl] - URL to worker.js or null for embedded worker
14
+ * @property {string} [ctanProxyUrl] - CTAN proxy URL
15
+ * @property {string} [xzwasmUrl] - XZ decompression WASM URL
16
+ * @property {(msg: string) => void} [onLog] - Logging callback
17
+ * @property {(stage: string, detail: string) => void} [onProgress] - Progress callback
18
+ * @property {boolean} [enableCtan] - Enable CTAN fetching (default: true if ctanProxyUrl provided)
19
+ * @property {boolean} [enableLazyFS] - Enable lazy filesystem (default: true)
20
+ * @property {boolean} [enableDocCache] - Enable document cache (default: true)
21
+ * @property {number} [maxRetries] - Max fetch retries per compile (default: 15)
22
+ * @property {boolean} [verbose] - Log TeX stdout (default: false)
23
+ * @property {string[]|Object<string, string[]>} [eagerBundles] - Bundles to load eagerly
24
+ */
25
+
26
+ /**
27
+ * @typedef {Object} CompileOptions
28
+ * @property {string} [engine] - 'pdflatex', 'xelatex', or 'lualatex'
29
+ * @property {boolean} [useCache] - Use document cache
30
+ * @property {Object<string, string|Uint8Array>} [additionalFiles] - Additional files for compilation
31
+ */
32
+
33
+ /**
34
+ * @typedef {Object} CompileResult
35
+ * @property {boolean} success - Whether compilation succeeded
36
+ * @property {Uint8Array} [pdf] - Compiled PDF bytes
37
+ * @property {boolean} [pdfIsShared] - True if PDF is in SharedArrayBuffer
38
+ * @property {Object|null} [syncTexData] - SyncTeX data for source mapping
39
+ * @property {Object} [stats] - Compilation statistics
40
+ * @property {string} [log] - TeX compilation log
41
+ * @property {string} [error] - Error message if failed
42
+ * @property {number} [exitCode] - TeX exit code if failed
43
+ * @property {boolean} [cached] - True if result was from cache
44
+ */
45
+ import { CTANFetcher } from './ctan.js';
46
+
47
+ // Module-level tracking to prevent multiple workers across all instances
48
+ let _globalActiveWorker = null;
49
+ let _globalWorkerId = 0;
50
+ import { fileSystem } from '@siglum/filesystem';
51
+ import {
52
+ getAuxCache,
53
+ saveAuxCache,
54
+ getCachedPdf,
55
+ saveCachedPdf,
56
+ hashDocument,
57
+ getFmtPath,
58
+ clearCTANCache,
59
+ getCompiledWasmModule,
60
+ saveWasmBytes,
61
+ saveWasmMemorySnapshot,
62
+ } from './storage.js';
63
+
64
+ // Ensure fmt-cache mount exists
65
+ let fmtCacheMounted = false;
66
+ async function ensureFmtCacheMount() {
67
+ if (fmtCacheMounted) return true;
68
+ try {
69
+ await fileSystem.mountAuto('/fmt-cache');
70
+ fmtCacheMounted = true;
71
+ return true;
72
+ } catch (e) {
73
+ console.warn('Failed to mount fmt-cache:', e);
74
+ return false;
75
+ }
76
+ }
77
+
78
+ /**
79
+ * Browser-based LaTeX compiler using WebAssembly.
80
+ * Handles bundle loading, CTAN fetching, and compilation orchestration.
81
+ */
82
+ export class SiglumCompiler {
83
+ /**
84
+ * @param {SiglumCompilerOptions} [options] - Compiler options
85
+ */
86
+ constructor(options = {}) {
87
+ this.bundlesUrl = options.bundlesUrl || 'packages/bundles';
88
+ this.wasmUrl = options.wasmUrl || 'busytex.wasm';
89
+ this.jsUrl = options.jsUrl || null; // Derived from wasmUrl if not provided
90
+ this.workerUrl = options.workerUrl || null; // Will use embedded worker if not provided
91
+ this.ctanProxyUrl = options.ctanProxyUrl || null;
92
+ this.xzwasmUrl = options.xzwasmUrl || './src/xzwasm.js';
93
+
94
+ this.bundleManager = new BundleManager({
95
+ bundleBase: this.bundlesUrl,
96
+ onLog: (msg) => this._log(msg),
97
+ });
98
+
99
+ this.ctanFetcher = new CTANFetcher({
100
+ proxyUrl: this.ctanProxyUrl,
101
+ xzwasmUrl: this.xzwasmUrl,
102
+ onLog: (msg) => this._log(msg),
103
+ });
104
+
105
+ this.worker = null;
106
+ this.workerReady = false;
107
+ this.wasmModule = null;
108
+ this._initWorkerPromise = null;
109
+ this.pendingCompile = null;
110
+ this.formatGenerationPromise = null;
111
+
112
+ this.onLog = options.onLog || (() => {});
113
+ this.onProgress = options.onProgress || (() => {});
114
+
115
+ // Options
116
+ // Enable CTAN if explicitly set, otherwise default to true only if ctanProxyUrl is provided
117
+ this.enableCtan = options.enableCtan ?? (this.ctanProxyUrl !== null);
118
+ this.enableLazyFS = options.enableLazyFS !== false;
119
+ this.enableDocCache = options.enableDocCache !== false;
120
+ this.maxRetries = options.maxRetries ?? 15; // Max CTAN/bundle fetch retries per compile
121
+ this.verbose = options.verbose ?? false; // Log TeX stdout (adds ~4000 postMessage calls)
122
+
123
+ // Eager bundle loading - bundles to load immediately instead of deferring
124
+ // Can be an array (applies to all engines) or object keyed by engine
125
+ // Example: ['cm-super'] or { pdflatex: ['cm-super'], xelatex: [] }
126
+ this.eagerBundles = options.eagerBundles || {};
127
+
128
+ // Range request coalescing - batch nearby requests to reduce HTTP overhead
129
+ this._pendingRangeRequests = new Map(); // bundleName -> [{requestId, start, end}]
130
+ this._rangeRequestTimer = null;
131
+ this._rangeRequestDebounceMs = 10; // Wait 10ms to batch requests
132
+ this._rangeCoalesceGapBytes = 64 * 1024; // Coalesce ranges within 64KB of each other
133
+ }
134
+
135
+ _log(msg) {
136
+ this.onLog(msg);
137
+ }
138
+
139
+ /**
140
+ * Get eager bundles for a specific engine.
141
+ * Eager bundles are loaded immediately instead of being deferred.
142
+ * @param {string} engine - The engine (pdflatex, xelatex, lualatex)
143
+ * @returns {string[]} List of bundle names to load eagerly
144
+ */
145
+ getEagerBundles(engine) {
146
+ if (Array.isArray(this.eagerBundles)) {
147
+ // Global list applies to all engines
148
+ return this.eagerBundles;
149
+ }
150
+ // Per-engine configuration
151
+ return this.eagerBundles[engine] || [];
152
+ }
153
+
154
+ /**
155
+ * Preload bundles into memory cache.
156
+ * Call this to eagerly load bundles before compilation.
157
+ * Useful for loading large deferred bundles (like cm-super) in advance.
158
+ *
159
+ * @example
160
+ * // Preload cm-super before first compile
161
+ * await compiler.preloadBundles(['cm-super']);
162
+ *
163
+ * @param {string[]} bundleNames - Bundles to preload
164
+ * @returns {Promise<{loaded: string[], failed: string[]}>}
165
+ */
166
+ async preloadBundles(bundleNames) {
167
+ await this.bundleManager.loadManifest();
168
+
169
+ const loaded = [];
170
+ const failed = [];
171
+
172
+ // Load bundles in parallel
173
+ const promises = bundleNames.map(async (bundleName) => {
174
+ try {
175
+ if (!this.bundleManager.bundleExists(bundleName)) {
176
+ this._log(`Preload: bundle ${bundleName} does not exist`);
177
+ failed.push(bundleName);
178
+ return;
179
+ }
180
+
181
+ const data = await this.bundleManager.loadBundle(bundleName);
182
+ if (data) {
183
+ this._log(`Preloaded bundle: ${bundleName} (${(data.byteLength / 1024 / 1024).toFixed(1)}MB)`);
184
+ loaded.push(bundleName);
185
+ } else {
186
+ failed.push(bundleName);
187
+ }
188
+ } catch (e) {
189
+ this._log(`Preload failed for ${bundleName}: ${e.message}`);
190
+ failed.push(bundleName);
191
+ }
192
+ });
193
+
194
+ await Promise.all(promises);
195
+ return { loaded, failed };
196
+ }
197
+
198
+ /**
199
+ * Pre-warm the compiler in the background.
200
+ * Call this early (e.g., on page load) to eliminate cold start latency.
201
+ * The promise resolves when initialization is complete, but you don't need to await it.
202
+ *
203
+ * @example
204
+ * // On app mount, before user starts typing
205
+ * const compiler = new BusyTeXCompiler(options);
206
+ * compiler.prewarm(); // Fire and forget - init happens in background
207
+ *
208
+ * // Later, when user wants to compile:
209
+ * await compiler.compile(source); // Already warmed up!
210
+ *
211
+ * @returns {Promise<void>} Resolves when initialization is complete
212
+ */
213
+ prewarm() {
214
+ // Return existing init promise if already warming/initialized
215
+ if (this._prewarmPromise) {
216
+ return this._prewarmPromise;
217
+ }
218
+
219
+ this._prewarmPromise = this.init().catch(e => {
220
+ this._log('Prewarm failed: ' + e.message);
221
+ // Reset so next prewarm/init can retry
222
+ this._prewarmPromise = null;
223
+ throw e;
224
+ });
225
+
226
+ return this._prewarmPromise;
227
+ }
228
+
229
+ /**
230
+ * Check if compiler is ready (initialized and warmed up)
231
+ * @returns {boolean}
232
+ */
233
+ isReady() {
234
+ return this.workerReady && this.wasmModule !== undefined;
235
+ }
236
+
237
+ /**
238
+ * Initialize the compiler. Loads WASM, manifests, and prepares the worker.
239
+ * @returns {Promise<void>}
240
+ */
241
+ async init() {
242
+ this._log('Initializing Siglum compiler...');
243
+
244
+ // Load manifests + WASM in parallel
245
+ await Promise.all([
246
+ this._loadManifests(),
247
+ this._loadWasm(),
248
+ ]);
249
+
250
+ // Worker init (required) + bundle preload (optional, don't fail if it errors)
251
+ await Promise.all([
252
+ this._initWorker(),
253
+ this.bundleManager.preloadEngine('pdflatex').catch(e => {
254
+ this._log('Bundle preload failed (will load on demand): ' + e.message);
255
+ }),
256
+ ]);
257
+
258
+ this._log('Compiler initialized');
259
+ }
260
+
261
+ async _loadManifests() {
262
+ await this.bundleManager.loadManifest();
263
+ await this.bundleManager.loadBundleDeps();
264
+ }
265
+
266
+ async _loadWasm() {
267
+ this._log('Loading WASM...');
268
+ const startTime = performance.now();
269
+
270
+ try {
271
+ // Try loading cached compiled module first (skips fetch + compile)
272
+ const cachedModule = await getCompiledWasmModule();
273
+ if (cachedModule) {
274
+ this.wasmModule = cachedModule;
275
+ const elapsed = (performance.now() - startTime).toFixed(0);
276
+ this._log('WASM loaded from cache in ' + elapsed + 'ms');
277
+ return;
278
+ }
279
+
280
+ // Fetch WASM as bytes (not streaming compile - we need bytes for caching)
281
+ const response = await fetch(this.wasmUrl);
282
+ const wasmBytes = new Uint8Array(await response.arrayBuffer());
283
+ const fetchElapsed = (performance.now() - startTime).toFixed(0);
284
+
285
+ // Compile from bytes
286
+ const compileStart = performance.now();
287
+ this.wasmModule = await WebAssembly.compile(wasmBytes);
288
+ const compileElapsed = (performance.now() - compileStart).toFixed(0);
289
+ this._log(`WASM fetched in ${fetchElapsed}ms, compiled in ${compileElapsed}ms`);
290
+
291
+ // Cache the bytes for future loads (Module can't be serialized to IndexedDB)
292
+ saveWasmBytes(wasmBytes).catch(() => {});
293
+ } catch (e) {
294
+ this._log('WASM load failed: ' + e.message);
295
+ throw e;
296
+ }
297
+ }
298
+
299
+ async _initWorker() {
300
+ if (this.worker) return;
301
+
302
+ // Prevent race condition: if init is already in progress, wait for it
303
+ if (this._initWorkerPromise) {
304
+ return this._initWorkerPromise;
305
+ }
306
+
307
+ this._initWorkerPromise = this._doInitWorker();
308
+ try {
309
+ await this._initWorkerPromise;
310
+ } finally {
311
+ this._initWorkerPromise = null;
312
+ }
313
+ }
314
+
315
+ async _doInitWorker() {
316
+ // Check for existing global worker - prevents duplicates across instances
317
+ if (_globalActiveWorker && _globalActiveWorker !== this.worker) {
318
+ console.warn('[SiglumCompiler] WARNING: Another worker already exists! Terminating old worker.');
319
+ _globalActiveWorker.terminate();
320
+ _globalActiveWorker = null;
321
+ }
322
+
323
+ // Get worker code - use external URL or read from src/worker.js
324
+ let workerUrl = this.workerUrl;
325
+ if (!workerUrl) {
326
+ // Fetch worker.js and create blob URL
327
+ const workerResponse = await fetch(new URL('./worker.js', import.meta.url));
328
+ const workerCode = await workerResponse.text();
329
+ const blob = new Blob([workerCode], { type: 'application/javascript' });
330
+ workerUrl = URL.createObjectURL(blob);
331
+ }
332
+
333
+ const workerId = ++_globalWorkerId;
334
+
335
+ this.worker = new Worker(workerUrl);
336
+ this.worker._workerId = workerId;
337
+ _globalActiveWorker = this.worker;
338
+
339
+ this.worker.onmessage = (e) => this._handleWorkerMessage(e);
340
+ this.worker.onerror = (e) => this._handleWorkerError(e);
341
+
342
+ // Get absolute URL for busytex.js - use jsUrl if provided, otherwise derive from wasmUrl
343
+ const wasmUrlObj = new URL(this.wasmUrl, window.location.href);
344
+ const busytexJsUrl = this.jsUrl
345
+ ? new URL(this.jsUrl, window.location.href).href
346
+ : new URL('busytex.js', wasmUrlObj.href).href;
347
+
348
+ // NOTE: Memory snapshots are DISABLED - pdfTeX's internal globals cause assertion
349
+ // failures when restored. Fast recompiles come from format caching (.fmt files).
350
+ const memorySnapshot = null;
351
+
352
+ // Send init message
353
+ return new Promise((resolve, reject) => {
354
+ const timeout = setTimeout(() => reject(new Error('Worker init timeout')), 30000);
355
+
356
+ const originalHandler = this.worker.onmessage;
357
+ this.worker.onmessage = (e) => {
358
+ if (e.data.type === 'ready') {
359
+ clearTimeout(timeout);
360
+ this.workerReady = true;
361
+ this.worker.onmessage = originalHandler;
362
+ this._log('Worker ready');
363
+ resolve();
364
+ } else {
365
+ originalHandler(e);
366
+ }
367
+ };
368
+
369
+ const initMsg = {
370
+ type: 'init',
371
+ wasmModule: this.wasmModule,
372
+ busytexJsUrl,
373
+ manifest: this.bundleManager.fileManifest,
374
+ packageMapData: this.bundleManager.packageMap,
375
+ bundleDepsData: this.bundleManager.bundleDeps,
376
+ bundleRegistryData: this.bundleManager.bundleRegistry ? [...this.bundleManager.bundleRegistry] : [],
377
+ verbose: this.verbose,
378
+ };
379
+
380
+ // Include memory snapshot if available (transfer for efficiency)
381
+ if (memorySnapshot) {
382
+ initMsg.memorySnapshot = memorySnapshot;
383
+ this.worker.postMessage(initMsg, [memorySnapshot]);
384
+ } else {
385
+ this.worker.postMessage(initMsg);
386
+ }
387
+ });
388
+ }
389
+
390
+ _handleWorkerMessage(e) {
391
+ const msg = e.data;
392
+
393
+ switch (msg.type) {
394
+ case 'log':
395
+ this._log(msg.message);
396
+ break;
397
+
398
+ case 'progress':
399
+ this.onProgress(msg.stage, msg.detail);
400
+ break;
401
+
402
+ case 'compile-response':
403
+ if (this.pendingCompile) {
404
+ this.pendingCompile.resolve(msg);
405
+ this.pendingCompile = null;
406
+ }
407
+ break;
408
+
409
+ case 'format-generate-response':
410
+ if (this.pendingFormat) {
411
+ this.pendingFormat.resolve(msg);
412
+ this.pendingFormat = null;
413
+ }
414
+ break;
415
+
416
+ case 'ctan-fetch-request':
417
+ this._handleCtanFetchRequest(msg);
418
+ break;
419
+
420
+ case 'bundle-fetch-request':
421
+ this._handleBundleFetchRequest(msg);
422
+ break;
423
+
424
+ case 'file-range-fetch-request':
425
+ this._queueRangeRequest(msg);
426
+ break;
427
+
428
+ case 'memory-snapshot':
429
+ // Worker sent memory snapshot after first successful compile - save to IndexedDB
430
+ this._handleMemorySnapshot(msg).catch(e => {
431
+ this._log('Failed to save memory snapshot: ' + e.message);
432
+ });
433
+ break;
434
+
435
+ default:
436
+ // Log unhandled message types for debugging
437
+ if (msg.type && !['log', 'progress', 'compile-response', 'format-generate-response'].includes(msg.type)) {
438
+ console.log('[Compiler] Unhandled message type:', msg.type);
439
+ }
440
+ }
441
+ }
442
+
443
+ _handleWorkerError(e) {
444
+ this._log('Worker error: ' + e.message);
445
+ if (this.pendingCompile) {
446
+ this.pendingCompile.reject(new Error('Worker error: ' + e.message));
447
+ this.pendingCompile = null;
448
+ }
449
+ this.workerReady = false;
450
+ // Terminate the worker before clearing the reference to avoid memory leak
451
+ if (this.worker) {
452
+ this.worker.terminate();
453
+ // Clear global reference if this is the active worker
454
+ if (_globalActiveWorker === this.worker) {
455
+ _globalActiveWorker = null;
456
+ }
457
+ }
458
+ this.worker = null;
459
+ }
460
+
461
+ async _handleCtanFetchRequest(msg) {
462
+ const { requestId, packageName, fileName, tlYear } = msg;
463
+
464
+ try {
465
+ // Look up the correct package for this file (e.g., bbm.sty → bbm-macros)
466
+ let actualPackage = packageName;
467
+ if (fileName) {
468
+ const mappedPackage = await this.ctanFetcher.lookupPackageForFile(fileName);
469
+ if (mappedPackage && mappedPackage !== packageName) {
470
+ this._log(`[CTAN-REQ] ${fileName} maps to package "${mappedPackage}" (not "${packageName}")`);
471
+ actualPackage = mappedPackage;
472
+ }
473
+ }
474
+
475
+ const yearLabel = tlYear ? ` (TL${tlYear})` : '';
476
+ this._log(`[CTAN-REQ] Worker requested package: ${actualPackage}${yearLabel}`);
477
+ // Only fetch this specific package, not dependencies
478
+ // Dependencies are resolved by the worker's retry loop - if a dependency
479
+ // is missing, the worker will request it specifically
480
+ const result = await this.ctanFetcher.fetchPackage(actualPackage, { tlYear });
481
+ if (result) {
482
+ this._log(`[CTAN-REQ] ${packageName}: got ${result.files?.size || 0} files`);
483
+ }
484
+
485
+ if (!result) {
486
+ this.worker.postMessage({
487
+ type: 'ctan-fetch-response',
488
+ requestId,
489
+ packageName,
490
+ success: false,
491
+ error: 'Package not found',
492
+ });
493
+ return;
494
+ }
495
+
496
+ this.worker.postMessage({
497
+ type: 'ctan-fetch-response',
498
+ requestId,
499
+ packageName,
500
+ success: true,
501
+ files: Object.fromEntries(result.files),
502
+ dependencies: result.dependencies || [],
503
+ });
504
+ } catch (e) {
505
+ this._log('CTAN fetch error: ' + e.message);
506
+ this.worker.postMessage({
507
+ type: 'ctan-fetch-response',
508
+ requestId,
509
+ packageName,
510
+ success: false,
511
+ error: e.message,
512
+ });
513
+ }
514
+ }
515
+
516
+ async _handleBundleFetchRequest(msg) {
517
+ const { requestId, bundleName } = msg;
518
+
519
+ try {
520
+ this._log('Worker requested bundle: ' + bundleName);
521
+
522
+ const bundleData = await this.bundleManager.loadBundle(bundleName);
523
+
524
+ // SharedArrayBuffer: send directly (automatically shared, no transfer list)
525
+ // ArrayBuffer: copy before transfer so original stays valid in cache
526
+ const isShared = typeof SharedArrayBuffer !== 'undefined' && bundleData instanceof SharedArrayBuffer;
527
+ if (isShared) {
528
+ this.worker.postMessage({
529
+ type: 'bundle-fetch-response',
530
+ requestId,
531
+ bundleName,
532
+ success: true,
533
+ bundleData,
534
+ });
535
+ } else {
536
+ const copy = bundleData.slice(0);
537
+ this.worker.postMessage({
538
+ type: 'bundle-fetch-response',
539
+ requestId,
540
+ bundleName,
541
+ success: true,
542
+ bundleData: copy,
543
+ }, [copy]);
544
+ }
545
+ } catch (e) {
546
+ this._log('Bundle fetch error: ' + e.message);
547
+ this.worker.postMessage({
548
+ type: 'bundle-fetch-response',
549
+ requestId,
550
+ bundleName,
551
+ success: false,
552
+ error: e.message,
553
+ });
554
+ }
555
+ }
556
+
557
+ /**
558
+ * Queue a range request for batching. Requests are coalesced and fetched
559
+ * together to reduce HTTP overhead.
560
+ */
561
+ _queueRangeRequest(msg) {
562
+ const { requestId, bundleName, start, end } = msg;
563
+
564
+ // Add to pending queue for this bundle
565
+ if (!this._pendingRangeRequests.has(bundleName)) {
566
+ this._pendingRangeRequests.set(bundleName, []);
567
+ }
568
+ this._pendingRangeRequests.get(bundleName).push({ requestId, start, end });
569
+
570
+ // Reset debounce timer
571
+ if (this._rangeRequestTimer) {
572
+ clearTimeout(this._rangeRequestTimer);
573
+ }
574
+
575
+ this._rangeRequestTimer = setTimeout(() => {
576
+ this._processRangeRequestBatch().catch(e => {
577
+ console.error('[Compiler] Range batch processing error:', e);
578
+ this._log('Error processing range batch: ' + e.message);
579
+ });
580
+ }, this._rangeRequestDebounceMs);
581
+ }
582
+
583
+ /**
584
+ * Process all pending range requests, coalescing nearby ranges.
585
+ */
586
+ async _processRangeRequestBatch() {
587
+ this._rangeRequestTimer = null;
588
+
589
+ // Process each bundle's requests
590
+ for (const [bundleName, requests] of this._pendingRangeRequests.entries()) {
591
+ if (requests.length === 0) continue;
592
+
593
+ // Coalesce ranges
594
+ const coalesced = this._coalesceRanges(requests);
595
+
596
+ this._log(`Range coalescing: ${requests.length} requests -> ${coalesced.length} fetches for ${bundleName}`);
597
+
598
+ // Fetch each coalesced range
599
+ for (const group of coalesced) {
600
+ await this._fetchCoalescedRange(bundleName, group);
601
+ }
602
+ }
603
+
604
+ // Clear processed requests
605
+ this._pendingRangeRequests.clear();
606
+ }
607
+
608
+ /**
609
+ * Coalesce nearby ranges to reduce HTTP requests.
610
+ * Returns groups of original requests that can be satisfied by a single fetch.
611
+ */
612
+ _coalesceRanges(requests) {
613
+ if (requests.length === 0) return [];
614
+ if (requests.length === 1) return [[requests[0]]];
615
+
616
+ // Sort by start position
617
+ const sorted = [...requests].sort((a, b) => a.start - b.start);
618
+
619
+ const groups = [];
620
+ let currentGroup = [sorted[0]];
621
+ let groupEnd = sorted[0].end;
622
+
623
+ for (let i = 1; i < sorted.length; i++) {
624
+ const req = sorted[i];
625
+
626
+ // If this range is within the gap threshold of the current group, merge
627
+ if (req.start <= groupEnd + this._rangeCoalesceGapBytes) {
628
+ currentGroup.push(req);
629
+ groupEnd = Math.max(groupEnd, req.end);
630
+ } else {
631
+ // Start a new group
632
+ groups.push(currentGroup);
633
+ currentGroup = [req];
634
+ groupEnd = req.end;
635
+ }
636
+ }
637
+
638
+ groups.push(currentGroup);
639
+ return groups;
640
+ }
641
+
642
+ /**
643
+ * Fetch a coalesced range and distribute data to original requesters.
644
+ */
645
+ async _fetchCoalescedRange(bundleName, group) {
646
+ // Calculate the overall range to fetch
647
+ const fetchStart = Math.min(...group.map(r => r.start));
648
+ const fetchEnd = Math.max(...group.map(r => r.end));
649
+
650
+ try {
651
+ const url = `${this.bundlesUrl}/${bundleName}.raw`;
652
+ const response = await fetch(url, {
653
+ headers: {
654
+ 'Range': `bytes=${fetchStart}-${fetchEnd - 1}`,
655
+ },
656
+ });
657
+
658
+ if (response.status !== 206 && response.status !== 200) {
659
+ throw new Error(`Range request failed with status ${response.status}`);
660
+ }
661
+
662
+ const fullData = new Uint8Array(await response.arrayBuffer());
663
+ this._log(`Fetched coalesced range [${fetchStart}:${fetchEnd}] = ${fullData.length} bytes`);
664
+
665
+ // Distribute data to each original requester
666
+ for (const req of group) {
667
+ const offset = req.start - fetchStart;
668
+ const length = req.end - req.start;
669
+ const data = fullData.slice(offset, offset + length);
670
+
671
+ this.worker.postMessage({
672
+ type: 'file-range-fetch-response',
673
+ requestId: req.requestId,
674
+ bundleName,
675
+ start: req.start,
676
+ end: req.end,
677
+ success: true,
678
+ data,
679
+ }, [data.buffer]);
680
+ }
681
+ } catch (e) {
682
+ this._log('Coalesced range fetch error: ' + e.message);
683
+
684
+ // Send error to all requesters in this group
685
+ for (const req of group) {
686
+ this.worker.postMessage({
687
+ type: 'file-range-fetch-response',
688
+ requestId: req.requestId,
689
+ bundleName,
690
+ start: req.start,
691
+ end: req.end,
692
+ success: false,
693
+ error: e.message,
694
+ });
695
+ }
696
+ }
697
+ }
698
+
699
+ async _handleMemorySnapshot(msg) {
700
+ // Save memory snapshot to persistent storage for future instant restore
701
+ const { snapshot, byteLength, isShared } = msg;
702
+ if (!snapshot || byteLength === 0) {
703
+ this._log('Memory snapshot is empty, skipping save');
704
+ return;
705
+ }
706
+
707
+ // For SharedArrayBuffer: create a regular copy for IndexedDB (can't store SAB)
708
+ // For transferred ArrayBuffer: wrap in Uint8Array view
709
+ let snapshotArray;
710
+ if (isShared) {
711
+ // Copy from SharedArrayBuffer to regular ArrayBuffer for IndexedDB
712
+ snapshotArray = new Uint8Array(byteLength);
713
+ snapshotArray.set(new Uint8Array(snapshot));
714
+ this._log(`Saving memory snapshot to cache (${(byteLength / 1024 / 1024).toFixed(1)}MB, from shared)...`);
715
+ } else {
716
+ snapshotArray = new Uint8Array(snapshot);
717
+ this._log(`Saving memory snapshot to cache (${(byteLength / 1024 / 1024).toFixed(1)}MB)...`);
718
+ }
719
+
720
+ const success = await saveWasmMemorySnapshot(snapshotArray, {
721
+ savedAt: Date.now(),
722
+ byteLength,
723
+ });
724
+
725
+ if (success) {
726
+ this._log('Memory snapshot saved');
727
+ } else {
728
+ this._log('Failed to save memory snapshot');
729
+ }
730
+ }
731
+
732
+ /**
733
+ * Compile LaTeX source to PDF.
734
+ * @param {string} source - LaTeX source code
735
+ * @param {CompileOptions} [options] - Compilation options
736
+ * @returns {Promise<CompileResult>} Compilation result with PDF or error
737
+ */
738
+ async compile(source, options = {}) {
739
+ // Wait for any pending format generation to complete before checking cache
740
+ // This ensures the format is available in cache for the current compile
741
+ if (this.formatGenerationPromise) {
742
+ this._log('Waiting for format generation to complete...');
743
+ await this.formatGenerationPromise.catch(() => {});
744
+ }
745
+
746
+ const engine = options.engine || detectEngine(source);
747
+ const useCache = this.enableDocCache && options.useCache !== false;
748
+
749
+ // Check document cache
750
+ if (useCache) {
751
+ const docHash = hashDocument(source);
752
+ const cached = await getCachedPdf(docHash, engine);
753
+ if (cached) {
754
+ this._log('Using cached PDF');
755
+ return {
756
+ success: true,
757
+ pdf: new Uint8Array(cached),
758
+ cached: true,
759
+ };
760
+ }
761
+ }
762
+
763
+ // Ensure worker is ready
764
+ if (!this.workerReady) {
765
+ await this._initWorker();
766
+ }
767
+
768
+ // Determine required bundles from direct \usepackage commands
769
+ const { bundles } = this.bundleManager.checkPackages(source, engine);
770
+
771
+ // Add eager bundles for this engine (these get loaded immediately instead of deferred)
772
+ const eagerBundles = this.getEagerBundles(engine);
773
+
774
+ // Pre-scan for CTAN packages and dependency bundles - batch fetch before compilation
775
+ let depBundles = [];
776
+ if (this.enableCtan) {
777
+ const additionalFilesMap = options.additionalFiles || {};
778
+ const { ctanPackages, additionalBundles } = this.bundleManager.prescanForCtanPackages(source, engine, additionalFilesMap);
779
+
780
+ // Add bundles needed for package dependencies (not detected from direct \usepackage)
781
+ if (additionalBundles && additionalBundles.length > 0) {
782
+ depBundles = additionalBundles;
783
+ }
784
+
785
+ if (ctanPackages.length > 0) {
786
+ this._log(`Pre-scan: ${ctanPackages.length} potential CTAN packages`);
787
+ const prescanStart = performance.now();
788
+
789
+ const { fetched, failed, skipped } = await this.ctanFetcher.batchFetchPackages(ctanPackages);
790
+
791
+ const elapsed = (performance.now() - prescanStart).toFixed(0);
792
+ if (fetched.length > 0 || skipped.length > 0) {
793
+ this._log(`Pre-fetch: ${fetched.length} new, ${skipped.length} cached, ${failed.length} not found (${elapsed}ms)`);
794
+ }
795
+ }
796
+ }
797
+
798
+ // Combine all bundles: direct packages + eager + dependency bundles
799
+ const allBundles = [...new Set([...bundles, ...eagerBundles, ...depBundles])];
800
+
801
+ // Log required bundles with optional extras
802
+ const extras = [];
803
+ if (eagerBundles.length > 0) extras.push('eager: ' + eagerBundles.join(', '));
804
+ if (depBundles.length > 0) extras.push('deps: ' + depBundles.join(', '));
805
+ if (extras.length > 0) {
806
+ this._log('Required bundles: ' + bundles.join(', ') + ' (+ ' + extras.join(', ') + ')');
807
+ } else {
808
+ this._log('Required bundles: ' + bundles.join(', '));
809
+ }
810
+
811
+ // Prepare bundle data for worker (SharedArrayBuffer for zero-copy, or regular ArrayBuffer with transfer)
812
+ this.onProgress('loading', 'Loading bundles...');
813
+
814
+ let bundleData = {};
815
+ let transferList = [];
816
+
817
+ // Load bundle blobs
818
+ const loadedBundles = await this.bundleManager.loadBundles(allBundles);
819
+ let totalBytes = 0;
820
+ let usingSharedArrayBuffer = false;
821
+
822
+ for (const [name, data] of Object.entries(loadedBundles)) {
823
+ if (data) {
824
+ // Check if data is SharedArrayBuffer (zero-copy) or regular ArrayBuffer (needs transfer)
825
+ const isShared = typeof SharedArrayBuffer !== 'undefined' && data instanceof SharedArrayBuffer;
826
+ if (isShared) {
827
+ // SharedArrayBuffer: no copy needed, worker reads same memory
828
+ bundleData[name] = data;
829
+ usingSharedArrayBuffer = true;
830
+ } else {
831
+ // Regular ArrayBuffer: copy for transfer (original stays in cache)
832
+ const copy = data.slice(0);
833
+ bundleData[name] = copy;
834
+ transferList.push(copy);
835
+ }
836
+ totalBytes += data.byteLength;
837
+ }
838
+ }
839
+ if (usingSharedArrayBuffer) {
840
+ this._log(`Sharing ${Object.keys(bundleData).length} bundles via SharedArrayBuffer (${(totalBytes/1024/1024).toFixed(1)}MB, zero-copy)`);
841
+ } else {
842
+ this._log(`Transferring ${Object.keys(bundleData).length} bundles (${(totalBytes/1024/1024).toFixed(1)}MB)`);
843
+ }
844
+
845
+ // Get CTAN files from memory cache (populated by previous fetches)
846
+ const ctanFiles = this.ctanFetcher.getCachedFiles();
847
+ const ctanFileCount = Object.keys(ctanFiles).length;
848
+ if (ctanFileCount > 0) {
849
+ this._log(`Passing ${ctanFileCount} cached CTAN files to worker`);
850
+ }
851
+
852
+ // Merge in any additional files provided by the user
853
+ const additionalFiles = options.additionalFiles || {};
854
+ for (const [filename, content] of Object.entries(additionalFiles)) {
855
+ // Convert string content to Uint8Array
856
+ const data = typeof content === 'string'
857
+ ? new TextEncoder().encode(content)
858
+ : content;
859
+ // Mount in current directory (will be found by TeX)
860
+ ctanFiles['/' + filename] = data;
861
+ }
862
+
863
+ // Check for cached format (in-memory first, then filesystem)
864
+ let cachedFormat = null;
865
+ const preamble = extractPreamble(source);
866
+ const preambleHash = hashPreamble(preamble);
867
+ const fmtKey = preambleHash + '_' + engine;
868
+
869
+ // Check in-memory cache first (fast path)
870
+ if (this._fmtMemCache?.key === fmtKey && this._fmtMemCache?.data?.buffer?.byteLength > 0) {
871
+ cachedFormat = { fmtName: fmtKey, fmtData: this._fmtMemCache.data };
872
+ this._log('Using cached format (memory)');
873
+ } else {
874
+ // Fall back to filesystem cache - path is deterministic from fmtKey
875
+ const fmtPath = '/' + getFmtPath(fmtKey);
876
+ await ensureFmtCacheMount();
877
+ const fmtData = await fileSystem.readBinary(fmtPath).catch(() => null);
878
+ if (fmtData && fmtData.byteLength > 0) {
879
+ // Cache in memory for subsequent compiles
880
+ this._fmtMemCache = { key: fmtKey, data: fmtData.slice() };
881
+ cachedFormat = { fmtName: fmtKey, fmtData: this._fmtMemCache.data };
882
+ this._log('Using cached format (filesystem)');
883
+ }
884
+ }
885
+
886
+ // Check for cached aux files (include format state in key to avoid mismatch)
887
+ const auxCacheKey = cachedFormat ? preambleHash + '_fmt' : preambleHash;
888
+ const auxCache = await getAuxCache(auxCacheKey);
889
+
890
+ // Send compile request
891
+ this.onProgress('compiling', 'Compiling...');
892
+ const compileId = crypto.randomUUID();
893
+
894
+ return new Promise((resolve, reject) => {
895
+ const timeout = setTimeout(() => {
896
+ if (this.pendingCompile) {
897
+ this.pendingCompile = null;
898
+ reject(new Error('Compilation timeout'));
899
+ }
900
+ }, 120000);
901
+
902
+ this.pendingCompile = {
903
+ resolve: async (result) => {
904
+ clearTimeout(timeout);
905
+
906
+ if (result.success) {
907
+ // Create Uint8Array view - works for both SharedArrayBuffer and ArrayBuffer
908
+ // Both are zero-copy views, just pointing to different backing memory
909
+ const pdfData = result.pdfData ? new Uint8Array(result.pdfData) : null;
910
+
911
+ // Cache the PDF (IndexedDB requires regular ArrayBuffer, not SharedArrayBuffer)
912
+ if (useCache && pdfData) {
913
+ const docHash = hashDocument(source);
914
+ const cacheBuffer = result.pdfDataIsShared
915
+ ? result.pdfData.slice(0) // Copy SharedArrayBuffer to regular ArrayBuffer
916
+ : result.pdfData; // Already regular ArrayBuffer
917
+ await saveCachedPdf(docHash, engine, cacheBuffer);
918
+ }
919
+
920
+ // Cache aux files (use same key that includes format state)
921
+ if (result.auxFilesToCache) {
922
+ await saveAuxCache(auxCacheKey, result.auxFilesToCache);
923
+ }
924
+
925
+ // Auto-generate format cache if no cached format was used
926
+ // Do this in background to not block the compile result
927
+ // Skip for xelatex - XeTeX can't dump formats with native fonts
928
+ if (!cachedFormat && preamble && engine !== 'xelatex') {
929
+ this.generateFormat(source, { engine }).then(async () => {
930
+ // Populate memory cache from the newly generated format
931
+ await ensureFmtCacheMount();
932
+ const data = await fileSystem.readBinary('/' + getFmtPath(fmtKey)).catch(() => null);
933
+ if (data) this._fmtMemCache = { key: fmtKey, data: new Uint8Array(data) };
934
+ }).catch(() => {}); // Silent fail for background task
935
+ }
936
+
937
+ resolve({
938
+ success: true,
939
+ pdf: pdfData,
940
+ pdfIsShared: result.pdfDataIsShared || false, // Pass flag to consumer
941
+ syncTexData: result.syncTexData || null, // SyncTeX data for source/PDF synchronization
942
+ stats: result.stats,
943
+ log: result.log,
944
+ });
945
+ } else {
946
+ resolve({
947
+ success: false,
948
+ error: result.error,
949
+ exitCode: result.exitCode,
950
+ log: result.log,
951
+ });
952
+ }
953
+ },
954
+ reject: (error) => {
955
+ clearTimeout(timeout);
956
+ reject(error);
957
+ },
958
+ };
959
+
960
+ this.worker.postMessage({
961
+ type: 'compile',
962
+ id: compileId,
963
+ source,
964
+ engine,
965
+ options: {
966
+ enableLazyFS: this.enableLazyFS,
967
+ enableCtan: this.enableCtan,
968
+ maxRetries: this.maxRetries,
969
+ verbose: this.verbose,
970
+ },
971
+ bundleData,
972
+ ctanFiles,
973
+ cachedFormat,
974
+ cachedAuxFiles: auxCache?.files || null,
975
+ // Deferred bundles minus any that are eagerly loaded
976
+ deferredBundleNames: (this.bundleManager.bundleDeps?.deferred || [])
977
+ .filter(b => !eagerBundles.includes(b)),
978
+ }, transferList);
979
+ });
980
+ }
981
+
982
+ /**
983
+ * Pre-generate a format file for faster subsequent compilations.
984
+ * @param {string} source - LaTeX source with preamble to cache
985
+ * @param {{engine?: string}} [options] - Options
986
+ * @returns {Promise<Uint8Array|null>} Format data or null if not supported
987
+ */
988
+ async generateFormat(source, options = {}) {
989
+ const engine = options.engine || 'pdflatex';
990
+
991
+ // XeTeX can't dump formats with native fonts (fontspec)
992
+ if (engine === 'xelatex') {
993
+ this._log('Format caching not supported for XeLaTeX (native fonts)');
994
+ return null;
995
+ }
996
+
997
+ const preamble = extractPreamble(source);
998
+
999
+ if (!preamble) {
1000
+ throw new Error('No preamble found in source');
1001
+ }
1002
+
1003
+ // Check cache - path is deterministic from fmtKey
1004
+ const preambleHash = hashPreamble(preamble);
1005
+ const fmtKey = preambleHash + '_' + engine;
1006
+ const fmtPath = '/' + getFmtPath(fmtKey);
1007
+ await ensureFmtCacheMount();
1008
+ const existingFmt = await fileSystem.readBinary(fmtPath).catch(() => null);
1009
+ if (existingFmt && existingFmt.byteLength > 0) {
1010
+ this._log('Format already cached');
1011
+ return new Uint8Array(existingFmt);
1012
+ }
1013
+
1014
+ // Ensure worker is ready
1015
+ if (!this.workerReady) {
1016
+ await this._initWorker();
1017
+ }
1018
+
1019
+ // Determine required bundles (same logic as compile)
1020
+ const { bundles } = this.bundleManager.checkPackages(source, engine);
1021
+
1022
+ // Get dependency bundles from prescan (e.g., utils for environ)
1023
+ let depBundles = [];
1024
+ if (this.enableCtan) {
1025
+ const { additionalBundles } = this.bundleManager.prescanForCtanPackages(source, engine, {});
1026
+ if (additionalBundles && additionalBundles.length > 0) {
1027
+ depBundles = additionalBundles;
1028
+ }
1029
+ }
1030
+
1031
+ const allBundles = [...new Set([...bundles, ...depBundles])];
1032
+ const bundleData = await this.bundleManager.loadBundles(allBundles);
1033
+
1034
+ // Get CTAN files from memory cache
1035
+ const ctanFiles = this.ctanFetcher.getCachedFiles();
1036
+
1037
+ this._log('Generating format file...');
1038
+ this.onProgress('format', 'Generating format...');
1039
+
1040
+ // Track this promise so compile() can wait for it
1041
+ this.formatGenerationPromise = new Promise((resolve, reject) => {
1042
+ const timeout = setTimeout(() => {
1043
+ if (this.pendingFormat) {
1044
+ this.pendingFormat = null;
1045
+ reject(new Error('Format generation timeout'));
1046
+ }
1047
+ }, 300000); // 5 minute timeout
1048
+
1049
+ this.pendingFormat = {
1050
+ resolve: (result) => {
1051
+ clearTimeout(timeout);
1052
+
1053
+ if (result.success) {
1054
+ const fmtData = new Uint8Array(result.formatData);
1055
+
1056
+ // Cache to filesystem, then resolve
1057
+ ensureFmtCacheMount().then(() => {
1058
+ return fileSystem.writeBinary(fmtPath, fmtData, { createParents: true });
1059
+ }).then(() => {
1060
+ this._log('Format generated and cached');
1061
+ resolve(fmtData);
1062
+ }).catch(e => {
1063
+ // Cache failed but format is still valid
1064
+ this._log('Warning: Failed to cache format: ' + e.message);
1065
+ resolve(fmtData);
1066
+ });
1067
+ } else {
1068
+ reject(new Error(result.error || 'Format generation failed'));
1069
+ }
1070
+ },
1071
+ reject: (error) => {
1072
+ clearTimeout(timeout);
1073
+ reject(error);
1074
+ },
1075
+ };
1076
+
1077
+ this.worker.postMessage({
1078
+ type: 'generate-format',
1079
+ id: crypto.randomUUID(),
1080
+ preambleContent: preamble,
1081
+ engine,
1082
+ manifest: this.bundleManager.fileManifest,
1083
+ packageMapData: this.bundleManager.packageMap,
1084
+ bundleDepsData: this.bundleManager.bundleDeps,
1085
+ bundleRegistryData: [...this.bundleManager.bundleRegistry],
1086
+ bundleData,
1087
+ ctanFiles,
1088
+ maxRetries: this.maxRetries,
1089
+ });
1090
+ }).finally(() => {
1091
+ this.formatGenerationPromise = null;
1092
+ });
1093
+
1094
+ return this.formatGenerationPromise;
1095
+ }
1096
+
1097
+ /**
1098
+ * Clear all caches (CTAN packages, mounted files).
1099
+ * @returns {Promise<void>}
1100
+ */
1101
+ async clearCache() {
1102
+ this._log('Clearing CTAN cache...');
1103
+ await clearCTANCache();
1104
+ this.ctanFetcher.clearMountedFiles();
1105
+ this._log('Cache cleared');
1106
+ }
1107
+
1108
+ /**
1109
+ * Get compiler statistics.
1110
+ * @returns {{bundles: Object, ctan: Object}} Statistics from bundle manager and CTAN fetcher
1111
+ */
1112
+ getStats() {
1113
+ return {
1114
+ bundles: this.bundleManager.getStats(),
1115
+ ctan: this.ctanFetcher.getStats(),
1116
+ };
1117
+ }
1118
+
1119
+ /**
1120
+ * Terminate the worker. Call unload() for full cleanup.
1121
+ */
1122
+ terminate() {
1123
+ if (this.worker) {
1124
+ this.worker.terminate();
1125
+ // Clear global reference if this is the active worker
1126
+ if (_globalActiveWorker === this.worker) {
1127
+ _globalActiveWorker = null;
1128
+ }
1129
+ this.worker = null;
1130
+ this.workerReady = false;
1131
+ }
1132
+ }
1133
+
1134
+ /**
1135
+ * Unload compiler to free memory. Clears RAM caches but keeps disk caches.
1136
+ * Call init() again to reinitialize.
1137
+ */
1138
+ unload() {
1139
+ this._log('Unloading compiler to free memory...');
1140
+
1141
+ // Terminate worker (frees WASM module, heap, worker bundle cache)
1142
+ this.terminate();
1143
+
1144
+ // Clear main thread caches
1145
+ this.bundleManager.clearCache();
1146
+ this.ctanFetcher.clearMountedFiles();
1147
+
1148
+ this._log('Compiler unloaded');
1149
+ }
1150
+
1151
+ /**
1152
+ * Check if compiler is currently loaded.
1153
+ * @returns {boolean}
1154
+ */
1155
+ isLoaded() {
1156
+ return this.worker !== null;
1157
+ }
1158
+ }
1159
+
1160
+ /**
1161
+ * Backwards-compatible alias for SiglumCompiler.
1162
+ * @type {typeof SiglumCompiler}
1163
+ */
1164
+ export const BusyTeXCompiler = SiglumCompiler;