@siglum/engine 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/bundles.js ADDED
@@ -0,0 +1,545 @@
1
+ /**
2
+ * @module @siglum/engine/bundles
3
+ * Bundle loading and package resolution for LaTeX compilation.
4
+ */
5
+
6
+ import {
7
+ getBundleFromCache,
8
+ saveBundleToCache,
9
+ getManifestFromCache,
10
+ saveManifestToCache,
11
+ getManifestVersion,
12
+ saveManifestVersion,
13
+ MANIFEST_CACHE_VERSION,
14
+ } from './storage.js';
15
+
16
+ import { hashPreamble } from './hash.js';
17
+
18
+ // Check if SharedArrayBuffer is available (requires COOP/COEP headers)
19
+ const sharedArrayBufferSupported = typeof SharedArrayBuffer !== 'undefined';
20
+
21
+ // Decompression using native CompressionStream
22
+ // Returns SharedArrayBuffer when available for zero-copy sharing with workers
23
+ async function decompress(compressed, format = 'gzip') {
24
+ // If format is 'none', return as-is (already decompressed by browser)
25
+ let data;
26
+ if (format === 'none') {
27
+ data = compressed;
28
+ } else {
29
+ const ds = new DecompressionStream(format);
30
+ const blob = new Blob([compressed]);
31
+ const stream = blob.stream().pipeThrough(ds);
32
+ data = await new Response(stream).arrayBuffer();
33
+ }
34
+
35
+ // Convert to SharedArrayBuffer for zero-copy worker access
36
+ if (sharedArrayBufferSupported) {
37
+ const shared = new SharedArrayBuffer(data.byteLength);
38
+ new Uint8Array(shared).set(new Uint8Array(data));
39
+ return shared;
40
+ }
41
+
42
+ return data;
43
+ }
44
+
45
+ /**
46
+ * @typedef {Object} BundleManagerOptions
47
+ * @property {string} [bundleBase] - Base URL for bundles
48
+ * @property {(msg: string) => void} [onLog] - Logging callback
49
+ * @property {(stage: string, detail: string) => void} [onProgress] - Progress callback
50
+ */
51
+
52
+ /**
53
+ * Manages LaTeX package bundles - loading, caching, and resolution.
54
+ */
55
+ export class BundleManager {
56
+ /**
57
+ * @param {BundleManagerOptions} [options] - Manager options
58
+ */
59
+ constructor(options = {}) {
60
+ this.bundleBase = options.bundleBase || 'packages/bundles';
61
+ this.bundleCache = new Map(); // In-memory bundle cache
62
+ this.fileManifest = null;
63
+ this.packageMap = null;
64
+ this.bundleDeps = null;
65
+ this.packageDeps = null;
66
+ this.bundleRegistry = null;
67
+ this.bytesDownloaded = 0;
68
+ this.cacheHitCount = 0;
69
+ this.onLog = options.onLog || (() => {});
70
+ this.onProgress = options.onProgress || (() => {});
71
+ }
72
+
73
+ /**
74
+ * Compute a hash for bundle versioning based on manifest entries
75
+ * Uses the file paths and sizes to detect changes
76
+ */
77
+ getBundleHash(bundleName) {
78
+ if (!this.fileManifest) return null;
79
+
80
+ // Get all files in this bundle and create a version string
81
+ const bundleFiles = Object.entries(this.fileManifest)
82
+ .filter(([_, info]) => info.bundle === bundleName)
83
+ .sort(([a], [b]) => a.localeCompare(b))
84
+ .map(([path, info]) => `${path}:${info.size}`)
85
+ .join('|');
86
+
87
+ // Simple hash of the version string
88
+ let hash = 0;
89
+ for (let i = 0; i < bundleFiles.length; i++) {
90
+ const char = bundleFiles.charCodeAt(i);
91
+ hash = ((hash << 5) - hash) + char;
92
+ hash = hash & hash; // Convert to 32bit integer
93
+ }
94
+ return hash.toString(16);
95
+ }
96
+
97
+ /**
98
+ * Load bundle manifests from cache or server.
99
+ * @returns {Promise<Object>} File manifest
100
+ */
101
+ async loadManifest() {
102
+ if (this.fileManifest) return this.fileManifest;
103
+
104
+ // Check cache first
105
+ const cachedVersion = await getManifestVersion();
106
+ if (cachedVersion === MANIFEST_CACHE_VERSION) {
107
+ const [manifest, bundlesData] = await Promise.all([
108
+ getManifestFromCache('file-manifest'),
109
+ getManifestFromCache('bundles'),
110
+ ]);
111
+
112
+ if (manifest && bundlesData) {
113
+ this.onLog('Manifests loaded from cache');
114
+ this.fileManifest = manifest;
115
+ this._initFromBundlesData(bundlesData);
116
+ return this.fileManifest;
117
+ }
118
+ }
119
+
120
+ // Fetch fresh manifests
121
+ const [manifestRes, bundlesRes] = await Promise.all([
122
+ fetch(`${this.bundleBase}/file-manifest.json`),
123
+ fetch(`${this.bundleBase}/bundles.json`),
124
+ ]);
125
+
126
+ this.fileManifest = await manifestRes.json();
127
+ const bundlesData = await bundlesRes.json();
128
+ this._initFromBundlesData(bundlesData);
129
+
130
+ // Save to cache (await to ensure cache is populated)
131
+ try {
132
+ await Promise.all([
133
+ saveManifestToCache('file-manifest', this.fileManifest),
134
+ saveManifestToCache('bundles', bundlesData),
135
+ saveManifestVersion(MANIFEST_CACHE_VERSION),
136
+ ]);
137
+ this.onLog('Manifests saved to cache');
138
+ } catch (e) {
139
+ // Cache save failed, continue anyway
140
+ }
141
+
142
+ return this.fileManifest;
143
+ }
144
+
145
+ _initFromBundlesData(bundlesData) {
146
+ // Extract bundle registry (set of bundle names)
147
+ this.bundleRegistry = new Set(Object.keys(bundlesData.bundles || {}));
148
+ // Extract package map
149
+ this.packageMap = bundlesData.packages || {};
150
+ // Extract bundle deps (engines, bundle requires, deferred)
151
+ this.bundleDeps = {
152
+ engines: bundlesData.engines || {},
153
+ bundles: {},
154
+ deferred: bundlesData.deferred || [],
155
+ };
156
+ // Build bundle dependency map from bundlesData.bundles
157
+ for (const [name, info] of Object.entries(bundlesData.bundles || {})) {
158
+ if (info.requires && info.requires.length > 0) {
159
+ this.bundleDeps.bundles[name] = { requires: info.requires };
160
+ }
161
+ }
162
+ }
163
+
164
+ /**
165
+ * Load bundle dependency information.
166
+ * @returns {Promise<Object>} Bundle dependencies
167
+ */
168
+ async loadBundleDeps() {
169
+ // Ensure manifest is loaded first (sets bundleDeps)
170
+ if (!this.bundleDeps) {
171
+ await this.loadManifest();
172
+ }
173
+
174
+ // Load optional package-deps.json for package-level dependencies
175
+ // This must run even if bundleDeps is already loaded!
176
+ if (!this.packageDeps) {
177
+ const cachedVersion = await getManifestVersion();
178
+ if (cachedVersion === MANIFEST_CACHE_VERSION) {
179
+ const packageDeps = await getManifestFromCache('package-deps');
180
+ if (packageDeps) {
181
+ this.packageDeps = packageDeps;
182
+ return this.bundleDeps;
183
+ }
184
+ }
185
+
186
+ try {
187
+ const packageDepsRes = await fetch(`${this.bundleBase}/package-deps.json`).catch(() => null);
188
+ if (packageDepsRes?.ok) {
189
+ this.packageDeps = await packageDepsRes.json();
190
+ try {
191
+ await saveManifestToCache('package-deps', this.packageDeps);
192
+ } catch (e) {
193
+ // Cache save failed, continue anyway
194
+ }
195
+ }
196
+ } catch (e) {
197
+ // package-deps is optional
198
+ }
199
+ }
200
+
201
+ return this.bundleDeps;
202
+ }
203
+
204
+ /**
205
+ * Check if a bundle exists in the registry.
206
+ * @param {string} bundleName - Bundle name
207
+ * @returns {boolean}
208
+ */
209
+ bundleExists(bundleName) {
210
+ return this.bundleRegistry?.has(bundleName) ?? false;
211
+ }
212
+
213
+ /**
214
+ * Resolve packages to their required bundles.
215
+ * @param {string[]} packages - Package names
216
+ * @param {string} [engine] - Engine name
217
+ * @returns {string[]} Bundle names
218
+ */
219
+ resolveBundles(packages, engine = 'xelatex') {
220
+ const bundles = new Set();
221
+ const resolved = new Set();
222
+
223
+ // Add engine-required bundles from bundle-deps.json
224
+ const engineDeps = this.bundleDeps?.engines?.[engine];
225
+ if (engineDeps?.required) {
226
+ for (const b of engineDeps.required) {
227
+ if (this.bundleExists(b)) bundles.add(b);
228
+ }
229
+ }
230
+
231
+ // Recursive function to add bundle and its dependencies
232
+ const addBundle = (bundleName) => {
233
+ if (resolved.has(bundleName)) return;
234
+ resolved.add(bundleName);
235
+
236
+ if (!this.bundleExists(bundleName)) return;
237
+ bundles.add(bundleName);
238
+
239
+ // Resolve bundle dependencies from bundleDeps.bundles
240
+ const bundleInfo = this.bundleDeps?.bundles?.[bundleName];
241
+ if (bundleInfo?.requires) {
242
+ for (const dep of bundleInfo.requires) {
243
+ addBundle(dep);
244
+ }
245
+ }
246
+ };
247
+
248
+ const resolvePackage = (pkg) => {
249
+ if (resolved.has('pkg:' + pkg)) return;
250
+ resolved.add('pkg:' + pkg);
251
+
252
+ // Find bundle for package
253
+ const bundleName = this.packageMap?.[pkg];
254
+ if (bundleName) {
255
+ addBundle(bundleName);
256
+ }
257
+
258
+ // Resolve package-level dependencies
259
+ const pkgDeps = this.packageDeps?.[pkg] || [];
260
+ for (const dep of pkgDeps) {
261
+ resolvePackage(dep);
262
+ }
263
+ };
264
+
265
+ for (const pkg of packages) {
266
+ resolvePackage(pkg);
267
+ }
268
+
269
+ // Filter to only existing bundles
270
+ return [...bundles].filter(b => this.bundleExists(b));
271
+ }
272
+
273
+ /**
274
+ * Extract packages from LaTeX source and resolve to bundles.
275
+ * @param {string} source - LaTeX source
276
+ * @param {string} [engine] - Engine name
277
+ * @returns {{packages: string[], bundles: string[]}}
278
+ */
279
+ checkPackages(source, engine = 'xelatex') {
280
+ const packages = new Set();
281
+
282
+ // Extract \usepackage commands
283
+ const usePackageRegex = /\\usepackage(?:\[[^\]]*\])?\{([^}]+)\}/g;
284
+ let match;
285
+ while ((match = usePackageRegex.exec(source)) !== null) {
286
+ const pkgList = match[1].split(',').map(p => p.trim());
287
+ for (const pkg of pkgList) packages.add(pkg);
288
+ }
289
+
290
+ // Extract \documentclass
291
+ const docclassMatch = source.match(/\\documentclass(?:\[[^\]]*\])?\{([^}]+)\}/);
292
+ if (docclassMatch) {
293
+ packages.add(docclassMatch[1]);
294
+ }
295
+
296
+ // Extract \RequirePackage
297
+ const requireRegex = /\\RequirePackage(?:\[[^\]]*\])?\{([^}]+)\}/g;
298
+ while ((match = requireRegex.exec(source)) !== null) {
299
+ const pkgList = match[1].split(',').map(p => p.trim());
300
+ for (const pkg of pkgList) packages.add(pkg);
301
+ }
302
+
303
+ const bundles = this.resolveBundles([...packages], engine);
304
+ return { packages: [...packages], bundles };
305
+ }
306
+
307
+ /**
308
+ * Pre-scan source to identify packages needing CTAN fetch.
309
+ * Expands detected packages with known dependencies from packageDeps.
310
+ * Scans additionalFiles for multi-file documents.
311
+ *
312
+ * @param {string} source - Main LaTeX source
313
+ * @param {string} engine - Engine (pdflatex, xelatex, etc.)
314
+ * @param {Object} additionalFiles - Optional map of filename → content
315
+ * @returns {{ bundledPackages: string[], ctanPackages: string[] }}
316
+ */
317
+ prescanForCtanPackages(source, engine = 'pdflatex', additionalFiles = {}) {
318
+ const packages = new Set();
319
+
320
+ // Helper to extract package names from a match
321
+ const extractPackages = (content) => {
322
+ return content.split(',').map(p => p.trim()).filter(p => p);
323
+ };
324
+
325
+ // Helper to scan source for package commands
326
+ const scanSource = (text) => {
327
+ // \usepackage[options]{pkg1,pkg2}
328
+ const usePackageRegex = /\\usepackage(?:\[[^\]]*\])?\{([^}]+)\}/g;
329
+ let match;
330
+ while ((match = usePackageRegex.exec(text)) !== null) {
331
+ for (const pkg of extractPackages(match[1])) packages.add(pkg);
332
+ }
333
+
334
+ // \RequirePackage[options]{pkg} and \RequirePackageWithOptions{pkg}
335
+ const requireRegex = /\\RequirePackage(?:WithOptions)?(?:\[[^\]]*\])?\{([^}]+)\}/g;
336
+ while ((match = requireRegex.exec(text)) !== null) {
337
+ for (const pkg of extractPackages(match[1])) packages.add(pkg);
338
+ }
339
+
340
+ // \documentclass[options]{class}
341
+ const docclassMatch = text.match(/\\documentclass(?:\[[^\]]*\])?\{([^}]+)\}/);
342
+ if (docclassMatch) {
343
+ packages.add(docclassMatch[1]);
344
+ }
345
+
346
+ // \LoadClass[options]{class} and \LoadClassWithOptions{class}
347
+ const loadClassRegex = /\\LoadClass(?:WithOptions)?(?:\[[^\]]*\])?\{([^}]+)\}/g;
348
+ while ((match = loadClassRegex.exec(text)) !== null) {
349
+ packages.add(match[1]);
350
+ }
351
+ };
352
+
353
+ // Scan main source
354
+ scanSource(source);
355
+
356
+ // Scan additional files (for multi-file documents)
357
+ const texFiles = Object.entries(additionalFiles).filter(([f]) => f.endsWith('.tex'));
358
+ if (texFiles.length > 0) {
359
+ const decoder = new TextDecoder(); // Reuse for all files
360
+ for (const [, content] of texFiles) {
361
+ const text = typeof content === 'string' ? content : decoder.decode(content);
362
+ scanSource(text);
363
+ }
364
+ }
365
+
366
+ // Expand with known dependencies from packageDeps
367
+ const expanded = new Set(packages);
368
+ const visited = new Set();
369
+
370
+ const expandDeps = (pkg) => {
371
+ if (visited.has(pkg)) return;
372
+ visited.add(pkg);
373
+
374
+ const deps = this.packageDeps?.packages?.[pkg] || [];
375
+ for (const dep of deps) {
376
+ // Skip obviously invalid entries (LaTeX syntax that leaked into deps)
377
+ if (!dep || dep.startsWith('#') || dep.startsWith('\\')) continue;
378
+ expanded.add(dep);
379
+ expandDeps(dep); // Recursive
380
+ }
381
+ };
382
+
383
+ for (const pkg of packages) {
384
+ expandDeps(pkg);
385
+ }
386
+
387
+ // Get bundles that will be loaded based on direct packages (not deps)
388
+ const directBundles = new Set(this.resolveBundles([...packages], engine));
389
+
390
+ // Categorize expanded packages:
391
+ // - bundled: in a bundle that will be loaded
392
+ // - additionalBundles: in a bundle that exists but won't be loaded (dependency-only)
393
+ // - ctanPackages: not in any bundle, need CTAN fetch
394
+ const bundledPackages = [];
395
+ const ctanPackages = [];
396
+ const additionalBundles = new Set();
397
+
398
+ for (const pkg of expanded) {
399
+ const bundleName = this.packageMap?.[pkg];
400
+ if (bundleName && this.bundleExists(bundleName)) {
401
+ if (directBundles.has(bundleName)) {
402
+ // Bundle will be loaded from direct packages
403
+ bundledPackages.push(pkg);
404
+ } else {
405
+ // Bundle exists but not in direct list - it's a dependency bundle
406
+ bundledPackages.push(pkg);
407
+ additionalBundles.add(bundleName);
408
+ }
409
+ } else {
410
+ ctanPackages.push(pkg);
411
+ }
412
+ }
413
+
414
+ return { bundledPackages, ctanPackages, additionalBundles: [...additionalBundles] };
415
+ }
416
+
417
+ /**
418
+ * Load a bundle by name.
419
+ * @param {string} bundleName - Bundle name
420
+ * @returns {Promise<ArrayBuffer|SharedArrayBuffer>} Bundle data
421
+ */
422
+ async loadBundle(bundleName) {
423
+ // Check memory cache
424
+ if (this.bundleCache.has(bundleName)) {
425
+ return this.bundleCache.get(bundleName);
426
+ }
427
+
428
+ // Check filesystem cache
429
+ const cached = await getBundleFromCache(bundleName);
430
+ if (cached) {
431
+ this.onLog(` From cache: ${bundleName}`);
432
+ this.bundleCache.set(bundleName, cached);
433
+ this.cacheHitCount++;
434
+ return cached;
435
+ }
436
+
437
+ // Fetch from server
438
+ const url = `${this.bundleBase}/${bundleName}.data.gz`;
439
+ this.onLog(` Fetching: ${bundleName}`);
440
+
441
+ const response = await fetch(url);
442
+ if (!response.ok) throw new Error(`Failed to load ${bundleName}: ${response.status}`);
443
+
444
+ const compressed = await response.arrayBuffer();
445
+ this.bytesDownloaded += compressed.byteLength;
446
+
447
+ // Check if response was Brotli-compressed (browser already decompressed)
448
+ const contentEncoding = response.headers.get('Content-Encoding');
449
+ const format = contentEncoding === 'br' ? 'none' : 'gzip';
450
+ const decompressed = await decompress(compressed, format);
451
+ this.bundleCache.set(bundleName, decompressed);
452
+
453
+ // Save to cache in background
454
+ saveBundleToCache(bundleName, decompressed);
455
+
456
+ return decompressed;
457
+ }
458
+
459
+ /**
460
+ * Load multiple bundles in parallel.
461
+ * @param {string[]} bundleNames - Bundle names
462
+ * @returns {Promise<Object<string, ArrayBuffer|SharedArrayBuffer>>} Map of bundle data
463
+ */
464
+ async loadBundles(bundleNames) {
465
+ const bundleData = {};
466
+ await Promise.all(bundleNames.map(async (name) => {
467
+ try {
468
+ bundleData[name] = await this.loadBundle(name);
469
+ } catch (e) {
470
+ this.onLog(`Failed to load bundle ${name}: ${e.message}`);
471
+ }
472
+ }));
473
+ return bundleData;
474
+ }
475
+
476
+ /**
477
+ * Get bundle loading statistics.
478
+ * @returns {{bytesDownloaded: number, cacheHits: number, bundlesCached: number}}
479
+ */
480
+ getStats() {
481
+ return {
482
+ bytesDownloaded: this.bytesDownloaded,
483
+ cacheHits: this.cacheHitCount,
484
+ bundlesCached: this.bundleCache.size,
485
+ };
486
+ }
487
+
488
+ /**
489
+ * Clear in-memory bundle cache to free RAM. Filesystem cache is preserved.
490
+ */
491
+ clearCache() {
492
+ this.bundleCache.clear();
493
+ this.onLog('Bundle memory cache cleared');
494
+ }
495
+
496
+
497
+ /**
498
+ * Preload all required bundles for an engine.
499
+ * @param {string} [engine] - Engine name
500
+ * @returns {Promise<void>}
501
+ */
502
+ async preloadEngine(engine = 'pdflatex') {
503
+ await this.loadBundleDeps();
504
+ const engineDeps = this.bundleDeps?.engines?.[engine];
505
+ if (!engineDeps?.required) return;
506
+
507
+ this.onLog(`Preloading ${engine} bundles...`);
508
+
509
+ await this.loadBundles(engineDeps.required);
510
+ this.onLog(`Preload complete: ${engineDeps.required.length} bundles loaded`);
511
+ }
512
+ }
513
+
514
+ /**
515
+ * Detect the appropriate engine from LaTeX source.
516
+ * @param {string} source - LaTeX source
517
+ * @returns {'pdflatex'|'xelatex'} Detected engine
518
+ */
519
+ export function detectEngine(source) {
520
+ // XeLaTeX indicators
521
+ if (source.includes('\\usepackage{fontspec}') ||
522
+ source.includes('\\usepackage{unicode-math}') ||
523
+ source.includes('\\setmainfont') ||
524
+ source.includes('\\setsansfont') ||
525
+ source.includes('\\setmonofont')) {
526
+ return 'xelatex';
527
+ }
528
+
529
+ // pdfLaTeX is default
530
+ return 'pdflatex';
531
+ }
532
+
533
+ /**
534
+ * Extract preamble from LaTeX source (everything before \begin{document}).
535
+ * @param {string} source - LaTeX source
536
+ * @returns {string} Preamble content
537
+ */
538
+ export function extractPreamble(source) {
539
+ const beginDocIdx = source.indexOf('\\begin{document}');
540
+ if (beginDocIdx === -1) return '';
541
+ return source.substring(0, beginDocIdx);
542
+ }
543
+
544
+ // Re-export hashPreamble from centralized hash module (BLAKE3-WASM)
545
+ export { hashPreamble } from './hash.js';