@jacobknightley/fabric-format 0.0.6 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/README.md +38 -163
  2. package/dist/benchmarks/profile.d.ts +8 -0
  3. package/dist/benchmarks/profile.js +68 -0
  4. package/dist/benchmarks/run.d.ts +7 -0
  5. package/dist/benchmarks/run.js +202 -0
  6. package/dist/cell-formatter.d.ts +39 -0
  7. package/dist/cell-formatter.js +93 -0
  8. package/dist/cli.js +35 -10
  9. package/dist/formatters/index.js +10 -0
  10. package/dist/formatters/python/config.d.ts +21 -0
  11. package/dist/formatters/python/config.js +128 -0
  12. package/dist/formatters/python/index.d.ts +2 -0
  13. package/dist/formatters/python/index.js +5 -0
  14. package/dist/formatters/python/python-formatter.js +137 -11
  15. package/dist/formatters/python/spark-sql-extractor.d.ts +80 -0
  16. package/dist/formatters/python/spark-sql-extractor.js +297 -0
  17. package/dist/formatters/python/spark-sql-formatter.d.ts +61 -0
  18. package/dist/formatters/python/spark-sql-formatter.js +257 -0
  19. package/dist/formatters/sparksql/fmt-detector.d.ts +2 -20
  20. package/dist/formatters/sparksql/fmt-detector.js +1 -17
  21. package/dist/formatters/sparksql/formatter.js +263 -235
  22. package/dist/formatters/sparksql/formatting-context.d.ts +0 -4
  23. package/dist/formatters/sparksql/formatting-context.js +30 -29
  24. package/dist/formatters/sparksql/generated/SqlBaseLexer.d.ts +1 -0
  25. package/dist/formatters/sparksql/generated/SqlBaseLexer.js +1840 -1829
  26. package/dist/formatters/sparksql/generated/SqlBaseParser.d.ts +32 -2
  27. package/dist/formatters/sparksql/generated/SqlBaseParser.js +6746 -6514
  28. package/dist/formatters/sparksql/generated/SqlBaseParserListener.d.ts +2 -0
  29. package/dist/formatters/sparksql/generated/SqlBaseParserListener.js +6 -0
  30. package/dist/formatters/sparksql/generated/SqlBaseParserVisitor.d.ts +1 -0
  31. package/dist/formatters/sparksql/generated/SqlBaseParserVisitor.js +4 -0
  32. package/dist/formatters/sparksql/generated/builtinFunctions.js +6 -0
  33. package/dist/formatters/sparksql/index.d.ts +1 -1
  34. package/dist/formatters/sparksql/newline-calculator.d.ts +1 -1
  35. package/dist/formatters/sparksql/newline-calculator.js +2 -10
  36. package/dist/formatters/sparksql/output-builder.d.ts +1 -0
  37. package/dist/formatters/sparksql/output-builder.js +5 -4
  38. package/dist/formatters/sparksql/output-helpers.js +1 -8
  39. package/dist/formatters/sparksql/parse-tree-analyzer.d.ts +1 -0
  40. package/dist/formatters/sparksql/parse-tree-analyzer.js +137 -6
  41. package/dist/formatters/sparksql/types.d.ts +6 -3
  42. package/dist/formatters/types.d.ts +3 -1
  43. package/dist/index.d.ts +1 -1
  44. package/dist/index.js +1 -1
  45. package/dist/notebook-formatter.js +47 -18
  46. package/package.json +4 -1
package/README.md CHANGED
@@ -2,13 +2,6 @@
2
2
 
3
3
  A zero-config formatter for **Microsoft Fabric notebooks**.
4
4
 
5
- ## Packages
6
-
7
- | Package | Description |
8
- | -------------------------------------------- | ------------------------------------------ |
9
- | [@jacobknightley/fabric-format](./packages/core) | Core formatting library (npm package) |
10
- | [fabric-format-chromium](./packages/chromium) | Chrome/Edge extension for Fabric notebooks |
11
-
12
5
  ## Philosophy
13
6
 
14
7
  **Opinionated by design.** This formatter has one style, enforced everywhere, with no configuration options—and no plans to add any.
@@ -17,17 +10,28 @@ Built this for teams who want consistent notebook formatting without endless deb
17
10
 
18
11
  The focus is on clean, consistent output—not tailored experiences or nuanced edge cases.
19
12
 
13
+ ## Browser Extension
14
+
15
+ Format Fabric notebooks directly in your browser with a single click.
16
+
17
+ ![ExtensionDemo](https://github.com/user-attachments/assets/30acd57f-0cd3-4edb-a0ae-f7db06ba1de1)
18
+
19
+ 1. Install the Edge extension [Edge Add-ons](https://microsoftedge.microsoft.com/addons/detail/fabric-format/pagkopelpfjaedelgckkbmcepekgheaj)
20
+ > Until Chrome is supported, download the [extension](https://github.com/jacobknightley/fabric-format/releases) and [unpack](https://developer.chrome.com/docs/extensions/get-started/tutorial/hello-world#load-unpacked) in chrome developer mode
21
+ 3. Open a notebook in Microsoft Fabric
22
+ 4. Click the ![Format button in Fabric notebook toolbar](assets/extension-format-button.png) button in the notebook toolbar
23
+
24
+
25
+
26
+
20
27
  ## CLI
28
+ Format Fabric notebook-content files synced from a workspace in a repository.
21
29
 
22
- ### Installation
23
30
 
24
31
  ```bash
32
+ # install
25
33
  npm install -g @jacobknightley/fabric-format
26
- ```
27
34
 
28
- ### Usage
29
-
30
- ```bash
31
35
  # format
32
36
  fabfmt format notebook.py # Format a single file
33
37
  fabfmt format ./src # Format all files in directory
@@ -42,172 +46,43 @@ fabfmt check --type sparksql -i "select * from t" # Check inline string
42
46
  echo "select * from t" | fabfmt check --type sparksql # Check from stdin
43
47
  ```
44
48
 
45
- ### Exit Codes
46
-
47
- | Code | Meaning |
48
- | ---- | ------------------------------------------------ |
49
- | 0 | Success (format: no changes needed, check: pass) |
50
- | 1 | Failure (format: error occurred, check: changes needed) |
51
- | 2 | Usage error (invalid arguments, missing files) |
52
-
53
- ## Browser Extension
54
-
55
- Format Fabric notebooks directly in your browser with a single click.
56
-
57
- ### Installation
58
-
59
- 1. Download `fabric-format-chromium.zip` from the [latest release](https://github.com/jacobknightley/fabric-format/releases)
60
- 2. Extract the zip file
61
- 3. Load the unpacked extension in your browser:
62
- - **Chrome:** [Install an unpacked extension](https://developer.chrome.com/docs/extensions/get-started/tutorial/hello-world#load-unpacked)
63
- - **Edge:** [Sideload an extension](https://learn.microsoft.com/en-us/microsoft-edge/extensions-chromium/getting-started/extension-sideloading)
64
-
65
- > **Note:** Plan to eventually publish to the Chrome Web Store and Edge Add-ons.
66
-
67
- ### Browser Compatibility
68
-
69
- | Browser | Version | Status |
70
- | ------- | ------- | ------ |
71
- | Chrome | 88+ | ✅ Supported |
72
- | Edge | 88+ | ✅ Supported |
73
- | Firefox | — | ❌ Not supported (Manifest V3 only) |
74
- | Safari | — | ❌ Not supported |
75
-
76
- Requires a Chromium-based browser with Manifest V3 and WASM support.
77
-
78
- ### Usage
79
-
80
- 1. Open a notebook in Microsoft Fabric
81
- 2. Click the **Format** button in the notebook toolbar
82
-
83
- ![Format button in Fabric notebook toolbar](assets/extension-format-button.png)
84
-
85
- 3. All cells in the notebook are formatted instantly
86
49
 
87
- ## Supported File Types
50
+ ### Supported File Types
88
51
 
89
52
  - `.py` — Python notebooks
90
53
  - `.scala` — Scala notebooks
91
54
  - `.r` — R notebooks
92
55
  - `.sql` — SQL notebooks
93
56
 
94
- ## Supported Languages
95
57
 
96
- - Spark SQL
97
- - Python
98
-
99
- > **Note:** All other language cells are preserved as-is.
58
+ ## Language Support
100
59
 
101
60
  ### Spark SQL
61
+ The SQL formatter uses an ANTLR grammar to parse and reformat Spark SQL. All keywords, functions, and syntax are derived directly from the official Spark SQL grammar.
102
62
 
103
- ---
104
-
105
- Custom formatter built on [Apache Spark's official ANTLR grammar](https://github.com/apache/spark/tree/master/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser). If Spark supports the syntax, fabric-format formats it correctly.
106
-
107
- #### Style Overview
108
-
109
- | Element | Formatting |
110
- | ---------------------- | -------------------------- |
111
- | Keywords | `UPPERCASE` |
112
- | Built-in functions | `UPPERCASE()` |
113
- | User-defined functions | `preserveCase()` |
114
- | Identifiers | `preserveCase` |
115
- | Indentation | 4 spaces |
116
- | Expression line width | 140 characters (then wrap) |
117
- | Commas | Leading (comma-first) |
118
-
119
- See [SQL_STYLE_GUIDE.md](./SQL_STYLE_GUIDE.md) for complete rules and examples.
120
-
121
- #### Format Directives
122
-
123
- ##### `fmt: off`
124
-
125
- Skip formatting entirely—preserves original whitespace and casing. Applicable only to the statement directly after it.
126
-
127
- ```sql
128
- -- fmt: off
129
- select Col_A,Col_B B,Col_C from t;
130
- select Col_A,Col_B B,Col_C from t;
131
- ```
132
-
133
- ⬇️ Output
134
-
135
- ```sql
136
- -- fmt: off
137
- select Col_A,Col_B B,Col_C from t;
138
-
139
- SELECT
140
- Col_A
141
- ,Col_B AS B
142
- ,Col_C
143
- FROM t;
144
- ```
145
-
146
- ##### `fmt: inline`
147
-
148
- Suppress line wrapping for long expressions that are wrapped by default at 140 characters.
149
-
150
- ```sql
151
- SELECT
152
- conv(right(md5(upper(concat(coalesce(VeryLongTable.VeryLongColumnName, AnotherLongAlias.AnotherLongColumn), SomeOtherReallyLongColumnName))), 16), 16, -10) AS A-- fmt: inline
153
- ,conv(right(md5(upper(concat(coalesce(VeryLongTable.VeryLongColumnName, AnotherLongAlias.AnotherLongColumn), SomeOtherReallyLongColumnName))), 16), 16, -10) AS B
154
- FROM t
155
- ```
156
-
157
- ⬇️ Output
158
-
159
- ```sql
160
- SELECT
161
- CONV(RIGHT(MD5(UPPER(CONCAT(COALESCE(VeryLongTable.VeryLongColumnName, AnotherLongAlias.AnotherLongColumn), SomeOtherReallyLongColumnName))), 16), 16, -10) AS A -- fmt: inline
162
- ,CONV(
163
- RIGHT(
164
- MD5(UPPER(CONCAT(
165
- COALESCE(VeryLongTable.VeryLongColumnName, AnotherLongAlias.AnotherLongColumn)
166
- ,SomeOtherReallyLongColumnName
167
- )))
168
- ,16
169
- )
170
- ,16
171
- ,-10
172
- ) AS B
173
- FROM t
174
- ```
175
-
176
- ### Python
63
+ See [SQL_STYLE_GUIDE.md](SQL_STYLE_GUIDE.md) for formatting rules.
177
64
 
178
- ---
65
+ ### Python / PySpark
66
+ The Python formatter uses [Ruff](https://github.com/astral-sh/ruff) WASM for:
179
67
 
180
- Formatted via [Ruff](https://docs.astral.sh/ruff/) with sensible defaults:
68
+ 1. **Code formatting** — Consistent styling (line length 140, double quotes, trailing commas)
69
+ 2. **Safe lint auto-fixes** — Automatically applies safe fixes from ~60 Ruff rules
181
70
 
182
- - 140 character line width
183
- - 4-space indentation
184
- - Double quotes
185
- - PEP 8 compliant
71
+ **Included lint auto-fixes:**
72
+ - **Import sorting** (I001) — Organizes imports by standard library, third-party, local
73
+ - **Modernization** (UP008, UP018, UP032) — Updates deprecated patterns to modern Python
74
+ - **Simplifications** (SIM118, SIM201, SIM300) — `key in dict` instead of `key in dict.keys()`, etc.
75
+ - **Bug fixes** (B009, B010) — Use `getattr()`/`setattr()` properly
76
+ - **Style** (E703, E711, F632) — Remove useless semicolons, use `is None`, etc.
77
+ - **Ruff specific** (RUF005) — List concatenation with unpacking
186
78
 
187
- Magic commands (`%%sql`, `%run`, etc.) are preserved.
79
+ **Explicitly excluded rules** (unsafe for notebooks):
80
+ - **F401, F841** — Unused imports/variables may be used in other cells
81
+ - **Rules that add imports** (RUF017, SIM105, etc.) — Can break cell execution order
82
+ - **RET504** — Removing intermediate variables changes code structure
188
83
 
189
- #### Format Directives
84
+ The lint fixes are applied automatically—no configuration needed.
190
85
 
191
- ##### `fmt: off` / `fmt: on`
192
-
193
- Disable formatting for a block of code:
194
-
195
- ```python
196
- # fmt: off
197
- matrix = [
198
- 1, 0, 0,
199
- 0, 1, 0,
200
- 0, 0, 1,
201
- ]
202
- # fmt: on
203
- ```
204
-
205
- ##### `fmt: skip`
206
-
207
- Skip formatting for a single statement:
208
-
209
- ```python
210
- result = some_function(a, b, c,d, e) # fmt: skip
211
- ```
212
86
 
213
- See [Ruff's documentation](https://docs.astral.sh/ruff/formatter/#format-suppression) for more details.
87
+ ## Documentation
88
+ Find all documentation at [fabric-format wiki](https://github.com/JacobKnightley/fabric-format/wiki)
@@ -0,0 +1,8 @@
1
+ /**
2
+ * SQL Formatter Profiler
3
+ *
4
+ * Profiles the SQL formatter to identify performance bottlenecks.
5
+ * Run with: node --prof dist/benchmarks/profile.js
6
+ * Then: node --prof-process isolate-*.log > profile.txt
7
+ */
8
+ export {};
@@ -0,0 +1,68 @@
1
+ /**
2
+ * SQL Formatter Profiler
3
+ *
4
+ * Profiles the SQL formatter to identify performance bottlenecks.
5
+ * Run with: node --prof dist/benchmarks/profile.js
6
+ * Then: node --prof-process isolate-*.log > profile.txt
7
+ */
8
+ import * as fs from 'node:fs';
9
+ import * as path from 'node:path';
10
+ import { fileURLToPath } from 'node:url';
11
+ import { formatSql } from '../formatters/sparksql/index.js';
12
+ const __filename = fileURLToPath(import.meta.url);
13
+ const __dirname = path.dirname(__filename);
14
+ /**
15
+ * Load a fixture file.
16
+ */
17
+ function loadFixture(category, name) {
18
+ const fixturePath = path.join(__dirname, 'fixtures', category, name);
19
+ return fs.readFileSync(fixturePath, 'utf-8');
20
+ }
21
+ /**
22
+ * Run many iterations to get enough data for profiling.
23
+ */
24
+ function profileSql(sql, iterations, label) {
25
+ console.log(`Profiling ${label}: ${iterations} iterations...`);
26
+ const start = performance.now();
27
+ for (let i = 0; i < iterations; i++) {
28
+ formatSql(sql);
29
+ }
30
+ const end = performance.now();
31
+ console.log(` Completed in ${(end - start).toFixed(2)}ms`);
32
+ console.log(` Average: ${((end - start) / iterations).toFixed(3)}ms per call`);
33
+ }
34
+ async function main() {
35
+ const args = process.argv.slice(2);
36
+ const iterations = Number.parseInt(args[0], 10) || 1000;
37
+ console.log('╔══════════════════════════════════════════════════════════════╗');
38
+ console.log('║ SQL Formatter Profiler ║');
39
+ console.log('╚══════════════════════════════════════════════════════════════╝');
40
+ console.log();
41
+ console.log('Run this script with --prof flag to generate V8 profile data:');
42
+ console.log(' node --prof dist/benchmarks/profile.js');
43
+ console.log();
44
+ console.log('Then process the log:');
45
+ console.log(' node --prof-process isolate-*.log > profile.txt');
46
+ console.log();
47
+ // Load fixtures
48
+ const smallSql = loadFixture('small', 'simple-select.sql');
49
+ const mediumSql = loadFixture('medium', 'cte.sql');
50
+ const largeSql = loadFixture('large', 'complex-analytics.sql');
51
+ // Warm up
52
+ console.log('Warming up...');
53
+ for (let i = 0; i < 10; i++) {
54
+ formatSql(smallSql);
55
+ formatSql(mediumSql);
56
+ formatSql(largeSql);
57
+ }
58
+ console.log();
59
+ console.log(`Running ${iterations} iterations per fixture...`);
60
+ console.log();
61
+ // Profile each size
62
+ profileSql(smallSql, iterations, 'small (simple-select.sql)');
63
+ profileSql(mediumSql, iterations, 'medium (cte.sql)');
64
+ profileSql(largeSql, Math.floor(iterations / 2), 'large (complex-analytics.sql)');
65
+ console.log();
66
+ console.log('Profile data collection complete.');
67
+ }
68
+ main().catch(console.error);
@@ -0,0 +1,7 @@
1
+ /**
2
+ * SQL Formatter Benchmark Suite
3
+ *
4
+ * Measures performance of the SQL formatter across various query sizes and complexity levels.
5
+ * Outputs timing data for profiling and optimization work.
6
+ */
7
+ export {};
@@ -0,0 +1,202 @@
1
+ /**
2
+ * SQL Formatter Benchmark Suite
3
+ *
4
+ * Measures performance of the SQL formatter across various query sizes and complexity levels.
5
+ * Outputs timing data for profiling and optimization work.
6
+ */
7
+ import * as fs from 'node:fs';
8
+ import * as path from 'node:path';
9
+ import { fileURLToPath } from 'node:url';
10
+ import { formatSql } from '../formatters/sparksql/index.js';
11
+ const __filename = fileURLToPath(import.meta.url);
12
+ const __dirname = path.dirname(__filename);
13
+ /**
14
+ * Run a single benchmark with multiple iterations.
15
+ */
16
+ function runBenchmark(name, category, sql, iterations) {
17
+ const times = [];
18
+ let output = '';
19
+ // Warm-up run (not counted)
20
+ formatSql(sql);
21
+ for (let i = 0; i < iterations; i++) {
22
+ const start = performance.now();
23
+ output = formatSql(sql);
24
+ const end = performance.now();
25
+ times.push(end - start);
26
+ }
27
+ const totalMs = times.reduce((a, b) => a + b, 0);
28
+ const avgMs = totalMs / iterations;
29
+ const minMs = Math.min(...times);
30
+ const maxMs = Math.max(...times);
31
+ return {
32
+ name,
33
+ category,
34
+ inputSize: sql.length,
35
+ outputSize: output.length,
36
+ iterations,
37
+ totalMs,
38
+ avgMs,
39
+ minMs,
40
+ maxMs,
41
+ opsPerSec: 1000 / avgMs,
42
+ changed: output !== sql,
43
+ };
44
+ }
45
+ /**
46
+ * Load all SQL files from a directory.
47
+ */
48
+ function loadFixtures(dir) {
49
+ const fixtures = new Map();
50
+ if (!fs.existsSync(dir)) {
51
+ return fixtures;
52
+ }
53
+ const files = fs.readdirSync(dir);
54
+ for (const file of files) {
55
+ if (file.endsWith('.sql')) {
56
+ const content = fs.readFileSync(path.join(dir, file), 'utf-8');
57
+ fixtures.set(file, content);
58
+ }
59
+ }
60
+ return fixtures;
61
+ }
62
+ /**
63
+ * Format duration in a human-readable way.
64
+ */
65
+ function formatDuration(ms) {
66
+ if (ms < 1) {
67
+ return `${(ms * 1000).toFixed(1)}µs`;
68
+ }
69
+ if (ms < 1000) {
70
+ return `${ms.toFixed(2)}ms`;
71
+ }
72
+ return `${(ms / 1000).toFixed(2)}s`;
73
+ }
74
+ /**
75
+ * Format a table row with aligned columns.
76
+ */
77
+ function formatRow(cols, widths, align) {
78
+ return cols
79
+ .map((col, i) => {
80
+ const width = widths[i] || 20;
81
+ const a = align[i] || 'left';
82
+ return a === 'left' ? col.padEnd(width) : col.padStart(width);
83
+ })
84
+ .join(' │ ');
85
+ }
86
+ /**
87
+ * Run all benchmarks and output results.
88
+ */
89
+ async function main() {
90
+ const args = process.argv.slice(2);
91
+ const iterations = Number.parseInt(args[0], 10) || 100;
92
+ const outputJson = args.includes('--json');
93
+ console.log('╔════════════════════════════════════════════════════════════════╗');
94
+ console.log('║ SQL Formatter Benchmark Suite ║');
95
+ console.log('╚════════════════════════════════════════════════════════════════╝');
96
+ console.log();
97
+ console.log(`Iterations per file: ${iterations}`);
98
+ console.log();
99
+ const fixturesDir = path.join(__dirname, 'fixtures');
100
+ const categories = ['small', 'medium', 'large'];
101
+ const results = [];
102
+ const summaries = [];
103
+ for (const category of categories) {
104
+ const categoryDir = path.join(fixturesDir, category);
105
+ const fixtures = loadFixtures(categoryDir);
106
+ if (fixtures.size === 0) {
107
+ console.log(`⚠️ No fixtures found in ${category}/`);
108
+ continue;
109
+ }
110
+ console.log(`\n━━━ ${category.toUpperCase()} QUERIES ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━`);
111
+ const colWidths = [30, 10, 10, 12, 12, 10];
112
+ const colAlign = [
113
+ 'left',
114
+ 'right',
115
+ 'right',
116
+ 'right',
117
+ 'right',
118
+ 'right',
119
+ ];
120
+ console.log(formatRow(['File', 'Size', 'Avg', 'Min', 'Max', 'ops/sec'], colWidths, colAlign));
121
+ console.log('─'.repeat(90));
122
+ let categoryTime = 0;
123
+ let categoryChars = 0;
124
+ for (const [file, sql] of fixtures) {
125
+ const result = runBenchmark(file, category, sql, iterations);
126
+ results.push(result);
127
+ categoryTime += result.avgMs;
128
+ categoryChars += result.inputSize;
129
+ console.log(formatRow([
130
+ file,
131
+ `${result.inputSize}`,
132
+ formatDuration(result.avgMs),
133
+ formatDuration(result.minMs),
134
+ formatDuration(result.maxMs),
135
+ `${result.opsPerSec.toFixed(1)}`,
136
+ ], colWidths, colAlign));
137
+ }
138
+ summaries.push({
139
+ category,
140
+ fileCount: fixtures.size,
141
+ totalInputChars: categoryChars,
142
+ totalTimeMs: categoryTime,
143
+ avgTimePerFile: categoryTime / fixtures.size,
144
+ avgTimePerKilochar: (categoryTime / categoryChars) * 1000,
145
+ });
146
+ }
147
+ // Summary
148
+ console.log('\n╔════════════════════════════════════════════════════════════════╗');
149
+ console.log('║ SUMMARY ║');
150
+ console.log('╚════════════════════════════════════════════════════════════════╝');
151
+ console.log();
152
+ const summaryWidths = [10, 8, 12, 14, 14];
153
+ const summaryAlign = [
154
+ 'left',
155
+ 'right',
156
+ 'right',
157
+ 'right',
158
+ 'right',
159
+ ];
160
+ console.log(formatRow(['Category', 'Files', 'Total Chars', 'Avg/File', 'ms/1K chars'], summaryWidths, summaryAlign));
161
+ console.log('─'.repeat(65));
162
+ for (const s of summaries) {
163
+ console.log(formatRow([
164
+ s.category,
165
+ `${s.fileCount}`,
166
+ `${s.totalInputChars}`,
167
+ formatDuration(s.avgTimePerFile),
168
+ s.avgTimePerKilochar.toFixed(3),
169
+ ], summaryWidths, summaryAlign));
170
+ }
171
+ const totalTime = summaries.reduce((a, s) => a + s.totalTimeMs, 0);
172
+ const totalChars = summaries.reduce((a, s) => a + s.totalInputChars, 0);
173
+ const totalFiles = summaries.reduce((a, s) => a + s.fileCount, 0);
174
+ console.log('─'.repeat(65));
175
+ console.log(formatRow([
176
+ 'TOTAL',
177
+ `${totalFiles}`,
178
+ `${totalChars}`,
179
+ formatDuration(totalTime / totalFiles),
180
+ ((totalTime / totalChars) * 1000).toFixed(3),
181
+ ], summaryWidths, summaryAlign));
182
+ console.log();
183
+ console.log(`Total benchmark time: ${formatDuration(totalTime * iterations)}`);
184
+ if (outputJson) {
185
+ const jsonOutput = {
186
+ timestamp: new Date().toISOString(),
187
+ iterations,
188
+ results,
189
+ summaries,
190
+ totals: {
191
+ files: totalFiles,
192
+ chars: totalChars,
193
+ avgTimePerFile: totalTime / totalFiles,
194
+ msPerKilochar: (totalTime / totalChars) * 1000,
195
+ },
196
+ };
197
+ const jsonPath = path.join(__dirname, 'results.json');
198
+ fs.writeFileSync(jsonPath, JSON.stringify(jsonOutput, null, 2));
199
+ console.log(`\nResults saved to: ${jsonPath}`);
200
+ }
201
+ }
202
+ main().catch(console.error);
@@ -82,6 +82,45 @@ export declare function getPythonFormatterInitPromise(): Promise<void> | null;
82
82
  * This allows re-initialization with different options.
83
83
  */
84
84
  export declare function resetPythonFormatterState(): void;
85
+ /**
86
+ * Detect which languages are present in notebook content.
87
+ * Uses fast regex patterns - does NOT parse the full notebook structure.
88
+ *
89
+ * This enables lazy initialization: only load formatters for languages actually used.
90
+ *
91
+ * @param content Raw notebook file content
92
+ * @returns Set of language identifiers that need formatting
93
+ *
94
+ * @example
95
+ * ```typescript
96
+ * const languages = detectLanguagesInContent(notebookContent);
97
+ * // languages might be Set { 'sql', 'python' }
98
+ * await initializeFormatters(languages);
99
+ * ```
100
+ */
101
+ export declare function detectLanguagesInContent(content: string): Set<string>;
102
+ /**
103
+ * Initialize formatters for the specified languages in parallel.
104
+ *
105
+ * This is the recommended way to initialize formatters when you know
106
+ * which languages you'll need. Unneeded formatters are not loaded.
107
+ *
108
+ * @param languages Set of language identifiers to initialize
109
+ * @param options Optional WASM options for Python formatter
110
+ *
111
+ * @example
112
+ * ```typescript
113
+ * // Scan files first
114
+ * const languages = detectLanguagesInContent(content);
115
+ *
116
+ * // Initialize only what's needed (parallel)
117
+ * await initializeFormatters(languages);
118
+ *
119
+ * // Now format cells
120
+ * formatCell(code, 'python');
121
+ * ```
122
+ */
123
+ export declare function initializeFormatters(languages: Set<string>, options?: WasmInitOptions): Promise<void>;
85
124
  /**
86
125
  * Format a single cell's content based on its type.
87
126
  *
@@ -125,6 +125,99 @@ export function resetPythonFormatterState() {
125
125
  resetPythonFormatter();
126
126
  }
127
127
  // ============================================================================
128
+ // Language Detection for Lazy Initialization
129
+ // ============================================================================
130
+ /**
131
+ * Detect which languages are present in notebook content.
132
+ * Uses fast regex patterns - does NOT parse the full notebook structure.
133
+ *
134
+ * This enables lazy initialization: only load formatters for languages actually used.
135
+ *
136
+ * @param content Raw notebook file content
137
+ * @returns Set of language identifiers that need formatting
138
+ *
139
+ * @example
140
+ * ```typescript
141
+ * const languages = detectLanguagesInContent(notebookContent);
142
+ * // languages might be Set { 'sql', 'python' }
143
+ * await initializeFormatters(languages);
144
+ * ```
145
+ */
146
+ export function detectLanguagesInContent(content) {
147
+ const languages = new Set();
148
+ // Pattern: "language": "xxx" in METADATA blocks
149
+ const languagePattern = /"language"\s*:\s*"(\w+)"/g;
150
+ let match = languagePattern.exec(content);
151
+ while (match !== null) {
152
+ const lang = match[1].toLowerCase();
153
+ // Normalize language names
154
+ if (lang === 'sparksql' || lang === 'sql') {
155
+ languages.add('sql');
156
+ }
157
+ else if (lang === 'python' || lang === 'pyspark') {
158
+ languages.add('python');
159
+ }
160
+ else if (lang === 'scala') {
161
+ languages.add('scala');
162
+ }
163
+ else if (lang === 'r') {
164
+ languages.add('r');
165
+ }
166
+ match = languagePattern.exec(content);
167
+ }
168
+ // Also check for MAGIC commands (in case metadata is missing)
169
+ if (content.includes('%%sql')) {
170
+ languages.add('sql');
171
+ }
172
+ if (content.includes('%%python') || content.includes('%%pyspark')) {
173
+ languages.add('python');
174
+ }
175
+ if (content.includes('%%scala')) {
176
+ languages.add('scala');
177
+ }
178
+ if (content.includes('%%r') || content.includes('%%R')) {
179
+ languages.add('r');
180
+ }
181
+ return languages;
182
+ }
183
+ /**
184
+ * Initialize formatters for the specified languages in parallel.
185
+ *
186
+ * This is the recommended way to initialize formatters when you know
187
+ * which languages you'll need. Unneeded formatters are not loaded.
188
+ *
189
+ * @param languages Set of language identifiers to initialize
190
+ * @param options Optional WASM options for Python formatter
191
+ *
192
+ * @example
193
+ * ```typescript
194
+ * // Scan files first
195
+ * const languages = detectLanguagesInContent(content);
196
+ *
197
+ * // Initialize only what's needed (parallel)
198
+ * await initializeFormatters(languages);
199
+ *
200
+ * // Now format cells
201
+ * formatCell(code, 'python');
202
+ * ```
203
+ */
204
+ export async function initializeFormatters(languages, options) {
205
+ const promises = [];
206
+ // SQL formatter is synchronous (no init needed)
207
+ // Python formatter needs async WASM loading
208
+ if (languages.has('python') && !pythonFormatterReady) {
209
+ promises.push(initializePythonFormatter(options));
210
+ }
211
+ // Future: Add scala, r formatters here
212
+ // if (languages.has('scala')) {
213
+ // promises.push(initializeScalaFormatter());
214
+ // }
215
+ // if (languages.has('r')) {
216
+ // promises.push(initializeRFormatter());
217
+ // }
218
+ await Promise.all(promises);
219
+ }
220
+ // ============================================================================
128
221
  // Cell Formatting API
129
222
  // ============================================================================
130
223
  /**