npm - @jacobknightley/fabric-format - Versions diffs - 0.0.6 → 0.0.8 - Mend

@jacobknightley/fabric-format 0.0.6 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

package/README.md +38 -163
package/dist/benchmarks/profile.d.ts +8 -0
package/dist/benchmarks/profile.js +68 -0
package/dist/benchmarks/run.d.ts +7 -0
package/dist/benchmarks/run.js +202 -0
package/dist/cell-formatter.d.ts +39 -0
package/dist/cell-formatter.js +93 -0
package/dist/cli.js +35 -10
package/dist/formatters/index.js +10 -0
package/dist/formatters/python/config.d.ts +21 -0
package/dist/formatters/python/config.js +128 -0
package/dist/formatters/python/index.d.ts +2 -0
package/dist/formatters/python/index.js +5 -0
package/dist/formatters/python/python-formatter.js +137 -11
package/dist/formatters/python/spark-sql-extractor.d.ts +80 -0
package/dist/formatters/python/spark-sql-extractor.js +297 -0
package/dist/formatters/python/spark-sql-formatter.d.ts +61 -0
package/dist/formatters/python/spark-sql-formatter.js +257 -0
package/dist/formatters/sparksql/fmt-detector.d.ts +2 -20
package/dist/formatters/sparksql/fmt-detector.js +1 -17
package/dist/formatters/sparksql/formatter.js +263 -235
package/dist/formatters/sparksql/formatting-context.d.ts +0 -4
package/dist/formatters/sparksql/formatting-context.js +30 -29
package/dist/formatters/sparksql/generated/SqlBaseLexer.d.ts +1 -0
package/dist/formatters/sparksql/generated/SqlBaseLexer.js +1840 -1829
package/dist/formatters/sparksql/generated/SqlBaseParser.d.ts +32 -2
package/dist/formatters/sparksql/generated/SqlBaseParser.js +6746 -6514
package/dist/formatters/sparksql/generated/SqlBaseParserListener.d.ts +2 -0
package/dist/formatters/sparksql/generated/SqlBaseParserListener.js +6 -0
package/dist/formatters/sparksql/generated/SqlBaseParserVisitor.d.ts +1 -0
package/dist/formatters/sparksql/generated/SqlBaseParserVisitor.js +4 -0
package/dist/formatters/sparksql/generated/builtinFunctions.js +6 -0
package/dist/formatters/sparksql/index.d.ts +1 -1
package/dist/formatters/sparksql/newline-calculator.d.ts +1 -1
package/dist/formatters/sparksql/newline-calculator.js +2 -10
package/dist/formatters/sparksql/output-builder.d.ts +1 -0
package/dist/formatters/sparksql/output-builder.js +5 -4
package/dist/formatters/sparksql/output-helpers.js +1 -8
package/dist/formatters/sparksql/parse-tree-analyzer.d.ts +1 -0
package/dist/formatters/sparksql/parse-tree-analyzer.js +137 -6
package/dist/formatters/sparksql/types.d.ts +6 -3
package/dist/formatters/types.d.ts +3 -1
package/dist/index.d.ts +1 -1
package/dist/index.js +1 -1
package/dist/notebook-formatter.js +47 -18
package/package.json +4 -1

package/README.md CHANGED Viewed

@@ -2,13 +2,6 @@
 A zero-config formatter for **Microsoft Fabric notebooks**.
-## Packages
-| Package                                      | Description                                |
-| -------------------------------------------- | ------------------------------------------ |
-| [@jacobknightley/fabric-format](./packages/core) | Core formatting library (npm package)      |
-| [fabric-format-chromium](./packages/chromium)    | Chrome/Edge extension for Fabric notebooks |
 ## Philosophy
 **Opinionated by design.** This formatter has one style, enforced everywhere, with no configuration options—and no plans to add any.
@@ -17,17 +10,28 @@ Built this for teams who want consistent notebook formatting without endless deb
 The focus is on clean, consistent output—not tailored experiences or nuanced edge cases.
+## Browser Extension
+Format Fabric notebooks directly in your browser with a single click.
+   ![ExtensionDemo](https://github.com/user-attachments/assets/30acd57f-0cd3-4edb-a0ae-f7db06ba1de1)
+1. Install the Edge extension [Edge Add-ons](https://microsoftedge.microsoft.com/addons/detail/fabric-format/pagkopelpfjaedelgckkbmcepekgheaj)
+    > Until Chrome is supported, download the [extension](https://github.com/jacobknightley/fabric-format/releases) and [unpack](https://developer.chrome.com/docs/extensions/get-started/tutorial/hello-world#load-unpacked) in chrome developer mode
+3. Open a notebook in Microsoft Fabric
+4. Click the ![Format button in Fabric notebook toolbar](assets/extension-format-button.png) button in the notebook toolbar
 ## CLI
+Format Fabric notebook-content files synced from a workspace in a repository.
-### Installation
 ```bash
+# install
 npm install -g @jacobknightley/fabric-format
-```
-### Usage
-```bash
 # format
 fabfmt format notebook.py                                # Format a single file
 fabfmt format ./src                                      # Format all files in directory
@@ -42,172 +46,43 @@ fabfmt check --type sparksql -i "select * from t"       # Check inline string
 echo "select * from t" | fabfmt check --type sparksql   # Check from stdin
 ```
-### Exit Codes
-| Code | Meaning                                          |
-| ---- | ------------------------------------------------ |
-| 0    | Success (format: no changes needed, check: pass) |
-| 1    | Failure (format: error occurred, check: changes needed) |
-| 2    | Usage error (invalid arguments, missing files)   |
-## Browser Extension
-Format Fabric notebooks directly in your browser with a single click.
-### Installation
-1. Download `fabric-format-chromium.zip` from the [latest release](https://github.com/jacobknightley/fabric-format/releases)
-2. Extract the zip file
-3. Load the unpacked extension in your browser:
-   - **Chrome:** [Install an unpacked extension](https://developer.chrome.com/docs/extensions/get-started/tutorial/hello-world#load-unpacked)
-   - **Edge:** [Sideload an extension](https://learn.microsoft.com/en-us/microsoft-edge/extensions-chromium/getting-started/extension-sideloading)
-> **Note:** Plan to eventually publish to the Chrome Web Store and Edge Add-ons.
-### Browser Compatibility
-| Browser | Version | Status |
-| ------- | ------- | ------ |
-| Chrome  | 88+     | ✅ Supported |
-| Edge    | 88+     | ✅ Supported |
-| Firefox | —       | ❌ Not supported (Manifest V3 only) |
-| Safari  | —       | ❌ Not supported |
-Requires a Chromium-based browser with Manifest V3 and WASM support.
-### Usage
-1. Open a notebook in Microsoft Fabric
-2. Click the **Format** button in the notebook toolbar
-   ![Format button in Fabric notebook toolbar](assets/extension-format-button.png)
-3. All cells in the notebook are formatted instantly
-## Supported File Types
+### Supported File Types
 - `.py` — Python notebooks
 - `.scala` — Scala notebooks
 - `.r` — R notebooks
 - `.sql` — SQL notebooks
-## Supported Languages
-- Spark SQL
-- Python
-> **Note:** All other language cells are preserved as-is.
+## Language Support
 ### Spark SQL
+The SQL formatter uses an ANTLR grammar to parse and reformat Spark SQL. All keywords, functions, and syntax are derived directly from the official Spark SQL grammar.
----
-Custom formatter built on [Apache Spark's official ANTLR grammar](https://github.com/apache/spark/tree/master/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser). If Spark supports the syntax, fabric-format formats it correctly.
-#### Style Overview
-| Element                | Formatting                 |
-| ---------------------- | -------------------------- |
-| Keywords               | `UPPERCASE`                |
-| Built-in functions     | `UPPERCASE()`              |
-| User-defined functions | `preserveCase()`           |
-| Identifiers            | `preserveCase`             |
-| Indentation            | 4 spaces                   |
-| Expression line width  | 140 characters (then wrap) |
-| Commas                 | Leading (comma-first)      |
-See [SQL_STYLE_GUIDE.md](./SQL_STYLE_GUIDE.md) for complete rules and examples.
-#### Format Directives
-##### `fmt: off`
-Skip formatting entirely—preserves original whitespace and casing. Applicable only to the statement directly after it.
-```sql
--- fmt: off
-select  Col_A,Col_B B,Col_C   from   t;
-select  Col_A,Col_B B,Col_C   from   t;
-```
-⬇️ Output
-```sql
--- fmt: off
-select  Col_A,Col_B B,Col_C   from   t;
-SELECT
-     Col_A
-    ,Col_B AS B
-    ,Col_C
-FROM t;
-```
-##### `fmt: inline`
-Suppress line wrapping for long expressions that are wrapped by default at 140 characters.
-```sql
-SELECT
-     conv(right(md5(upper(concat(coalesce(VeryLongTable.VeryLongColumnName, AnotherLongAlias.AnotherLongColumn), SomeOtherReallyLongColumnName))), 16), 16, -10) AS A-- fmt: inline
-    ,conv(right(md5(upper(concat(coalesce(VeryLongTable.VeryLongColumnName, AnotherLongAlias.AnotherLongColumn), SomeOtherReallyLongColumnName))), 16), 16, -10) AS B
-FROM t
-```
-⬇️ Output
-```sql
-SELECT
-     CONV(RIGHT(MD5(UPPER(CONCAT(COALESCE(VeryLongTable.VeryLongColumnName, AnotherLongAlias.AnotherLongColumn), SomeOtherReallyLongColumnName))), 16), 16, -10) AS A -- fmt: inline
-    ,CONV(
-         RIGHT(
-             MD5(UPPER(CONCAT(
-                 COALESCE(VeryLongTable.VeryLongColumnName, AnotherLongAlias.AnotherLongColumn)
-                ,SomeOtherReallyLongColumnName
-            )))
-            ,16
-        )
-        ,16
-        ,-10
-    ) AS B
-FROM t
-```
-### Python
+See [SQL_STYLE_GUIDE.md](SQL_STYLE_GUIDE.md) for formatting rules.
----
+### Python / PySpark
+The Python formatter uses [Ruff](https://github.com/astral-sh/ruff) WASM for:
-Formatted via [Ruff](https://docs.astral.sh/ruff/) with sensible defaults:
+1. **Code formatting** — Consistent styling (line length 140, double quotes, trailing commas)
+2. **Safe lint auto-fixes** — Automatically applies safe fixes from ~60 Ruff rules
-- 140 character line width
-- 4-space indentation
-- Double quotes
-- PEP 8 compliant
+**Included lint auto-fixes:**
+- **Import sorting** (I001) — Organizes imports by standard library, third-party, local
+- **Modernization** (UP008, UP018, UP032) — Updates deprecated patterns to modern Python
+- **Simplifications** (SIM118, SIM201, SIM300) — `key in dict` instead of `key in dict.keys()`, etc.
+- **Bug fixes** (B009, B010) — Use `getattr()`/`setattr()` properly
+- **Style** (E703, E711, F632) — Remove useless semicolons, use `is None`, etc.
+- **Ruff specific** (RUF005) — List concatenation with unpacking
-Magic commands (`%%sql`, `%run`, etc.) are preserved.
+**Explicitly excluded rules** (unsafe for notebooks):
+- **F401, F841** — Unused imports/variables may be used in other cells
+- **Rules that add imports** (RUF017, SIM105, etc.) — Can break cell execution order
+- **RET504** — Removing intermediate variables changes code structure
-#### Format Directives
+The lint fixes are applied automatically—no configuration needed.
-##### `fmt: off` / `fmt: on`
-Disable formatting for a block of code:
-```python
-# fmt: off
-matrix = [
-    1, 0, 0,
-    0, 1, 0,
-    0, 0, 1,
-]
-# fmt: on
-```
-##### `fmt: skip`
-Skip formatting for a single statement:
-```python
-result = some_function(a, b,    c,d,  e)  # fmt: skip
-```
-See [Ruff's documentation](https://docs.astral.sh/ruff/formatter/#format-suppression) for more details.
+## Documentation
+Find all documentation at [fabric-format wiki](https://github.com/JacobKnightley/fabric-format/wiki)

package/dist/benchmarks/profile.d.ts ADDED Viewed

@@ -0,0 +1,8 @@
+/**
+ * SQL Formatter Profiler
+ *
+ * Profiles the SQL formatter to identify performance bottlenecks.
+ * Run with: node --prof dist/benchmarks/profile.js
+ * Then: node --prof-process isolate-*.log > profile.txt
+ */
+export {};

package/dist/benchmarks/profile.js ADDED Viewed

@@ -0,0 +1,68 @@
+/**
+ * SQL Formatter Profiler
+ *
+ * Profiles the SQL formatter to identify performance bottlenecks.
+ * Run with: node --prof dist/benchmarks/profile.js
+ * Then: node --prof-process isolate-*.log > profile.txt
+ */
+import * as fs from 'node:fs';
+import * as path from 'node:path';
+import { fileURLToPath } from 'node:url';
+import { formatSql } from '../formatters/sparksql/index.js';
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = path.dirname(__filename);
+/**
+ * Load a fixture file.
+ */
+function loadFixture(category, name) {
+    const fixturePath = path.join(__dirname, 'fixtures', category, name);
+    return fs.readFileSync(fixturePath, 'utf-8');
+}
+/**
+ * Run many iterations to get enough data for profiling.
+ */
+function profileSql(sql, iterations, label) {
+    console.log(`Profiling ${label}: ${iterations} iterations...`);
+    const start = performance.now();
+    for (let i = 0; i < iterations; i++) {
+        formatSql(sql);
+    }
+    const end = performance.now();
+    console.log(`  Completed in ${(end - start).toFixed(2)}ms`);
+    console.log(`  Average: ${((end - start) / iterations).toFixed(3)}ms per call`);
+}
+async function main() {
+    const args = process.argv.slice(2);
+    const iterations = Number.parseInt(args[0], 10) || 1000;
+    console.log('╔══════════════════════════════════════════════════════════════╗');
+    console.log('║            SQL Formatter Profiler                            ║');
+    console.log('╚══════════════════════════════════════════════════════════════╝');
+    console.log();
+    console.log('Run this script with --prof flag to generate V8 profile data:');
+    console.log('  node --prof dist/benchmarks/profile.js');
+    console.log();
+    console.log('Then process the log:');
+    console.log('  node --prof-process isolate-*.log > profile.txt');
+    console.log();
+    // Load fixtures
+    const smallSql = loadFixture('small', 'simple-select.sql');
+    const mediumSql = loadFixture('medium', 'cte.sql');
+    const largeSql = loadFixture('large', 'complex-analytics.sql');
+    // Warm up
+    console.log('Warming up...');
+    for (let i = 0; i < 10; i++) {
+        formatSql(smallSql);
+        formatSql(mediumSql);
+        formatSql(largeSql);
+    }
+    console.log();
+    console.log(`Running ${iterations} iterations per fixture...`);
+    console.log();
+    // Profile each size
+    profileSql(smallSql, iterations, 'small (simple-select.sql)');
+    profileSql(mediumSql, iterations, 'medium (cte.sql)');
+    profileSql(largeSql, Math.floor(iterations / 2), 'large (complex-analytics.sql)');
+    console.log();
+    console.log('Profile data collection complete.');
+}
+main().catch(console.error);

package/dist/benchmarks/run.d.ts ADDED Viewed

@@ -0,0 +1,7 @@
+/**
+ * SQL Formatter Benchmark Suite
+ *
+ * Measures performance of the SQL formatter across various query sizes and complexity levels.
+ * Outputs timing data for profiling and optimization work.
+ */
+export {};

package/dist/benchmarks/run.js ADDED Viewed

@@ -0,0 +1,202 @@
+/**
+ * SQL Formatter Benchmark Suite
+ *
+ * Measures performance of the SQL formatter across various query sizes and complexity levels.
+ * Outputs timing data for profiling and optimization work.
+ */
+import * as fs from 'node:fs';
+import * as path from 'node:path';
+import { fileURLToPath } from 'node:url';
+import { formatSql } from '../formatters/sparksql/index.js';
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = path.dirname(__filename);
+/**
+ * Run a single benchmark with multiple iterations.
+ */
+function runBenchmark(name, category, sql, iterations) {
+    const times = [];
+    let output = '';
+    // Warm-up run (not counted)
+    formatSql(sql);
+    for (let i = 0; i < iterations; i++) {
+        const start = performance.now();
+        output = formatSql(sql);
+        const end = performance.now();
+        times.push(end - start);
+    }
+    const totalMs = times.reduce((a, b) => a + b, 0);
+    const avgMs = totalMs / iterations;
+    const minMs = Math.min(...times);
+    const maxMs = Math.max(...times);
+    return {
+        name,
+        category,
+        inputSize: sql.length,
+        outputSize: output.length,
+        iterations,
+        totalMs,
+        avgMs,
+        minMs,
+        maxMs,
+        opsPerSec: 1000 / avgMs,
+        changed: output !== sql,
+    };
+}
+/**
+ * Load all SQL files from a directory.
+ */
+function loadFixtures(dir) {
+    const fixtures = new Map();
+    if (!fs.existsSync(dir)) {
+        return fixtures;
+    }
+    const files = fs.readdirSync(dir);
+    for (const file of files) {
+        if (file.endsWith('.sql')) {
+            const content = fs.readFileSync(path.join(dir, file), 'utf-8');
+            fixtures.set(file, content);
+        }
+    }
+    return fixtures;
+}
+/**
+ * Format duration in a human-readable way.
+ */
+function formatDuration(ms) {
+    if (ms < 1) {
+        return `${(ms * 1000).toFixed(1)}µs`;
+    }
+    if (ms < 1000) {
+        return `${ms.toFixed(2)}ms`;
+    }
+    return `${(ms / 1000).toFixed(2)}s`;
+}
+/**
+ * Format a table row with aligned columns.
+ */
+function formatRow(cols, widths, align) {
+    return cols
+        .map((col, i) => {
+        const width = widths[i] || 20;
+        const a = align[i] || 'left';
+        return a === 'left' ? col.padEnd(width) : col.padStart(width);
+    })
+        .join(' │ ');
+}
+/**
+ * Run all benchmarks and output results.
+ */
+async function main() {
+    const args = process.argv.slice(2);
+    const iterations = Number.parseInt(args[0], 10) || 100;
+    const outputJson = args.includes('--json');
+    console.log('╔════════════════════════════════════════════════════════════════╗');
+    console.log('║           SQL Formatter Benchmark Suite                        ║');
+    console.log('╚════════════════════════════════════════════════════════════════╝');
+    console.log();
+    console.log(`Iterations per file: ${iterations}`);
+    console.log();
+    const fixturesDir = path.join(__dirname, 'fixtures');
+    const categories = ['small', 'medium', 'large'];
+    const results = [];
+    const summaries = [];
+    for (const category of categories) {
+        const categoryDir = path.join(fixturesDir, category);
+        const fixtures = loadFixtures(categoryDir);
+        if (fixtures.size === 0) {
+            console.log(`⚠️  No fixtures found in ${category}/`);
+            continue;
+        }
+        console.log(`\n━━━ ${category.toUpperCase()} QUERIES ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━`);
+        const colWidths = [30, 10, 10, 12, 12, 10];
+        const colAlign = [
+            'left',
+            'right',
+            'right',
+            'right',
+            'right',
+            'right',
+        ];
+        console.log(formatRow(['File', 'Size', 'Avg', 'Min', 'Max', 'ops/sec'], colWidths, colAlign));
+        console.log('─'.repeat(90));
+        let categoryTime = 0;
+        let categoryChars = 0;
+        for (const [file, sql] of fixtures) {
+            const result = runBenchmark(file, category, sql, iterations);
+            results.push(result);
+            categoryTime += result.avgMs;
+            categoryChars += result.inputSize;
+            console.log(formatRow([
+                file,
+                `${result.inputSize}`,
+                formatDuration(result.avgMs),
+                formatDuration(result.minMs),
+                formatDuration(result.maxMs),
+                `${result.opsPerSec.toFixed(1)}`,
+            ], colWidths, colAlign));
+        }
+        summaries.push({
+            category,
+            fileCount: fixtures.size,
+            totalInputChars: categoryChars,
+            totalTimeMs: categoryTime,
+            avgTimePerFile: categoryTime / fixtures.size,
+            avgTimePerKilochar: (categoryTime / categoryChars) * 1000,
+        });
+    }
+    // Summary
+    console.log('\n╔════════════════════════════════════════════════════════════════╗');
+    console.log('║                        SUMMARY                                 ║');
+    console.log('╚════════════════════════════════════════════════════════════════╝');
+    console.log();
+    const summaryWidths = [10, 8, 12, 14, 14];
+    const summaryAlign = [
+        'left',
+        'right',
+        'right',
+        'right',
+        'right',
+    ];
+    console.log(formatRow(['Category', 'Files', 'Total Chars', 'Avg/File', 'ms/1K chars'], summaryWidths, summaryAlign));
+    console.log('─'.repeat(65));
+    for (const s of summaries) {
+        console.log(formatRow([
+            s.category,
+            `${s.fileCount}`,
+            `${s.totalInputChars}`,
+            formatDuration(s.avgTimePerFile),
+            s.avgTimePerKilochar.toFixed(3),
+        ], summaryWidths, summaryAlign));
+    }
+    const totalTime = summaries.reduce((a, s) => a + s.totalTimeMs, 0);
+    const totalChars = summaries.reduce((a, s) => a + s.totalInputChars, 0);
+    const totalFiles = summaries.reduce((a, s) => a + s.fileCount, 0);
+    console.log('─'.repeat(65));
+    console.log(formatRow([
+        'TOTAL',
+        `${totalFiles}`,
+        `${totalChars}`,
+        formatDuration(totalTime / totalFiles),
+        ((totalTime / totalChars) * 1000).toFixed(3),
+    ], summaryWidths, summaryAlign));
+    console.log();
+    console.log(`Total benchmark time: ${formatDuration(totalTime * iterations)}`);
+    if (outputJson) {
+        const jsonOutput = {
+            timestamp: new Date().toISOString(),
+            iterations,
+            results,
+            summaries,
+            totals: {
+                files: totalFiles,
+                chars: totalChars,
+                avgTimePerFile: totalTime / totalFiles,
+                msPerKilochar: (totalTime / totalChars) * 1000,
+            },
+        };
+        const jsonPath = path.join(__dirname, 'results.json');
+        fs.writeFileSync(jsonPath, JSON.stringify(jsonOutput, null, 2));
+        console.log(`\nResults saved to: ${jsonPath}`);
+    }
+}
+main().catch(console.error);

package/dist/cell-formatter.d.ts CHANGED Viewed

@@ -82,6 +82,45 @@ export declare function getPythonFormatterInitPromise(): Promise<void> | null;
  * This allows re-initialization with different options.
  */
 export declare function resetPythonFormatterState(): void;
+/**
+ * Detect which languages are present in notebook content.
+ * Uses fast regex patterns - does NOT parse the full notebook structure.
+ *
+ * This enables lazy initialization: only load formatters for languages actually used.
+ *
+ * @param content Raw notebook file content
+ * @returns Set of language identifiers that need formatting
+ *
+ * @example
+ * ```typescript
+ * const languages = detectLanguagesInContent(notebookContent);
+ * // languages might be Set { 'sql', 'python' }
+ * await initializeFormatters(languages);
+ * ```
+ */
+export declare function detectLanguagesInContent(content: string): Set<string>;
+/**
+ * Initialize formatters for the specified languages in parallel.
+ *
+ * This is the recommended way to initialize formatters when you know
+ * which languages you'll need. Unneeded formatters are not loaded.
+ *
+ * @param languages Set of language identifiers to initialize
+ * @param options Optional WASM options for Python formatter
+ *
+ * @example
+ * ```typescript
+ * // Scan files first
+ * const languages = detectLanguagesInContent(content);
+ *
+ * // Initialize only what's needed (parallel)
+ * await initializeFormatters(languages);
+ *
+ * // Now format cells
+ * formatCell(code, 'python');
+ * ```
+ */
+export declare function initializeFormatters(languages: Set<string>, options?: WasmInitOptions): Promise<void>;
 /**
  * Format a single cell's content based on its type.
  *

package/dist/cell-formatter.js CHANGED Viewed

@@ -125,6 +125,99 @@ export function resetPythonFormatterState() {
     resetPythonFormatter();
 }
 // ============================================================================
+// Language Detection for Lazy Initialization
+// ============================================================================
+/**
+ * Detect which languages are present in notebook content.
+ * Uses fast regex patterns - does NOT parse the full notebook structure.
+ *
+ * This enables lazy initialization: only load formatters for languages actually used.
+ *
+ * @param content Raw notebook file content
+ * @returns Set of language identifiers that need formatting
+ *
+ * @example
+ * ```typescript
+ * const languages = detectLanguagesInContent(notebookContent);
+ * // languages might be Set { 'sql', 'python' }
+ * await initializeFormatters(languages);
+ * ```
+ */
+export function detectLanguagesInContent(content) {
+    const languages = new Set();
+    // Pattern: "language": "xxx" in METADATA blocks
+    const languagePattern = /"language"\s*:\s*"(\w+)"/g;
+    let match = languagePattern.exec(content);
+    while (match !== null) {
+        const lang = match[1].toLowerCase();
+        // Normalize language names
+        if (lang === 'sparksql' || lang === 'sql') {
+            languages.add('sql');
+        }
+        else if (lang === 'python' || lang === 'pyspark') {
+            languages.add('python');
+        }
+        else if (lang === 'scala') {
+            languages.add('scala');
+        }
+        else if (lang === 'r') {
+            languages.add('r');
+        }
+        match = languagePattern.exec(content);
+    }
+    // Also check for MAGIC commands (in case metadata is missing)
+    if (content.includes('%%sql')) {
+        languages.add('sql');
+    }
+    if (content.includes('%%python') || content.includes('%%pyspark')) {
+        languages.add('python');
+    }
+    if (content.includes('%%scala')) {
+        languages.add('scala');
+    }
+    if (content.includes('%%r') || content.includes('%%R')) {
+        languages.add('r');
+    }
+    return languages;
+}
+/**
+ * Initialize formatters for the specified languages in parallel.
+ *
+ * This is the recommended way to initialize formatters when you know
+ * which languages you'll need. Unneeded formatters are not loaded.
+ *
+ * @param languages Set of language identifiers to initialize
+ * @param options Optional WASM options for Python formatter
+ *
+ * @example
+ * ```typescript
+ * // Scan files first
+ * const languages = detectLanguagesInContent(content);
+ *
+ * // Initialize only what's needed (parallel)
+ * await initializeFormatters(languages);
+ *
+ * // Now format cells
+ * formatCell(code, 'python');
+ * ```
+ */
+export async function initializeFormatters(languages, options) {
+    const promises = [];
+    // SQL formatter is synchronous (no init needed)
+    // Python formatter needs async WASM loading
+    if (languages.has('python') && !pythonFormatterReady) {
+        promises.push(initializePythonFormatter(options));
+    }
+    // Future: Add scala, r formatters here
+    // if (languages.has('scala')) {
+    //   promises.push(initializeScalaFormatter());
+    // }
+    // if (languages.has('r')) {
+    //   promises.push(initializeRFormatter());
+    // }
+    await Promise.all(promises);
+}
+// ============================================================================
 // Cell Formatting API
 // ============================================================================
 /**