@jacobknightley/fabric-format 0.0.6 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +38 -163
- package/dist/benchmarks/profile.d.ts +8 -0
- package/dist/benchmarks/profile.js +68 -0
- package/dist/benchmarks/run.d.ts +7 -0
- package/dist/benchmarks/run.js +202 -0
- package/dist/cell-formatter.d.ts +39 -0
- package/dist/cell-formatter.js +93 -0
- package/dist/cli.js +35 -10
- package/dist/formatters/index.js +10 -0
- package/dist/formatters/python/config.d.ts +21 -0
- package/dist/formatters/python/config.js +128 -0
- package/dist/formatters/python/index.d.ts +2 -0
- package/dist/formatters/python/index.js +5 -0
- package/dist/formatters/python/python-formatter.js +137 -11
- package/dist/formatters/python/spark-sql-extractor.d.ts +80 -0
- package/dist/formatters/python/spark-sql-extractor.js +297 -0
- package/dist/formatters/python/spark-sql-formatter.d.ts +61 -0
- package/dist/formatters/python/spark-sql-formatter.js +257 -0
- package/dist/formatters/sparksql/fmt-detector.d.ts +2 -20
- package/dist/formatters/sparksql/fmt-detector.js +1 -17
- package/dist/formatters/sparksql/formatter.js +263 -235
- package/dist/formatters/sparksql/formatting-context.d.ts +0 -4
- package/dist/formatters/sparksql/formatting-context.js +30 -29
- package/dist/formatters/sparksql/generated/SqlBaseLexer.d.ts +1 -0
- package/dist/formatters/sparksql/generated/SqlBaseLexer.js +1840 -1829
- package/dist/formatters/sparksql/generated/SqlBaseParser.d.ts +32 -2
- package/dist/formatters/sparksql/generated/SqlBaseParser.js +6746 -6514
- package/dist/formatters/sparksql/generated/SqlBaseParserListener.d.ts +2 -0
- package/dist/formatters/sparksql/generated/SqlBaseParserListener.js +6 -0
- package/dist/formatters/sparksql/generated/SqlBaseParserVisitor.d.ts +1 -0
- package/dist/formatters/sparksql/generated/SqlBaseParserVisitor.js +4 -0
- package/dist/formatters/sparksql/generated/builtinFunctions.js +6 -0
- package/dist/formatters/sparksql/index.d.ts +1 -1
- package/dist/formatters/sparksql/newline-calculator.d.ts +1 -1
- package/dist/formatters/sparksql/newline-calculator.js +2 -10
- package/dist/formatters/sparksql/output-builder.d.ts +1 -0
- package/dist/formatters/sparksql/output-builder.js +5 -4
- package/dist/formatters/sparksql/output-helpers.js +1 -8
- package/dist/formatters/sparksql/parse-tree-analyzer.d.ts +1 -0
- package/dist/formatters/sparksql/parse-tree-analyzer.js +137 -6
- package/dist/formatters/sparksql/types.d.ts +6 -3
- package/dist/formatters/types.d.ts +3 -1
- package/dist/index.d.ts +1 -1
- package/dist/index.js +1 -1
- package/dist/notebook-formatter.js +47 -18
- package/package.json +4 -1
package/README.md
CHANGED
|
@@ -2,13 +2,6 @@
|
|
|
2
2
|
|
|
3
3
|
A zero-config formatter for **Microsoft Fabric notebooks**.
|
|
4
4
|
|
|
5
|
-
## Packages
|
|
6
|
-
|
|
7
|
-
| Package | Description |
|
|
8
|
-
| -------------------------------------------- | ------------------------------------------ |
|
|
9
|
-
| [@jacobknightley/fabric-format](./packages/core) | Core formatting library (npm package) |
|
|
10
|
-
| [fabric-format-chromium](./packages/chromium) | Chrome/Edge extension for Fabric notebooks |
|
|
11
|
-
|
|
12
5
|
## Philosophy
|
|
13
6
|
|
|
14
7
|
**Opinionated by design.** This formatter has one style, enforced everywhere, with no configuration options—and no plans to add any.
|
|
@@ -17,17 +10,28 @@ Built this for teams who want consistent notebook formatting without endless deb
|
|
|
17
10
|
|
|
18
11
|
The focus is on clean, consistent output—not tailored experiences or nuanced edge cases.
|
|
19
12
|
|
|
13
|
+
## Browser Extension
|
|
14
|
+
|
|
15
|
+
Format Fabric notebooks directly in your browser with a single click.
|
|
16
|
+
|
|
17
|
+

|
|
18
|
+
|
|
19
|
+
1. Install the Edge extension [Edge Add-ons](https://microsoftedge.microsoft.com/addons/detail/fabric-format/pagkopelpfjaedelgckkbmcepekgheaj)
|
|
20
|
+
> Until Chrome is supported, download the [extension](https://github.com/jacobknightley/fabric-format/releases) and [unpack](https://developer.chrome.com/docs/extensions/get-started/tutorial/hello-world#load-unpacked) in chrome developer mode
|
|
21
|
+
3. Open a notebook in Microsoft Fabric
|
|
22
|
+
4. Click the  button in the notebook toolbar
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
|
|
20
27
|
## CLI
|
|
28
|
+
Format Fabric notebook-content files synced from a workspace in a repository.
|
|
21
29
|
|
|
22
|
-
### Installation
|
|
23
30
|
|
|
24
31
|
```bash
|
|
32
|
+
# install
|
|
25
33
|
npm install -g @jacobknightley/fabric-format
|
|
26
|
-
```
|
|
27
34
|
|
|
28
|
-
### Usage
|
|
29
|
-
|
|
30
|
-
```bash
|
|
31
35
|
# format
|
|
32
36
|
fabfmt format notebook.py # Format a single file
|
|
33
37
|
fabfmt format ./src # Format all files in directory
|
|
@@ -42,172 +46,43 @@ fabfmt check --type sparksql -i "select * from t" # Check inline string
|
|
|
42
46
|
echo "select * from t" | fabfmt check --type sparksql # Check from stdin
|
|
43
47
|
```
|
|
44
48
|
|
|
45
|
-
### Exit Codes
|
|
46
|
-
|
|
47
|
-
| Code | Meaning |
|
|
48
|
-
| ---- | ------------------------------------------------ |
|
|
49
|
-
| 0 | Success (format: no changes needed, check: pass) |
|
|
50
|
-
| 1 | Failure (format: error occurred, check: changes needed) |
|
|
51
|
-
| 2 | Usage error (invalid arguments, missing files) |
|
|
52
|
-
|
|
53
|
-
## Browser Extension
|
|
54
|
-
|
|
55
|
-
Format Fabric notebooks directly in your browser with a single click.
|
|
56
|
-
|
|
57
|
-
### Installation
|
|
58
|
-
|
|
59
|
-
1. Download `fabric-format-chromium.zip` from the [latest release](https://github.com/jacobknightley/fabric-format/releases)
|
|
60
|
-
2. Extract the zip file
|
|
61
|
-
3. Load the unpacked extension in your browser:
|
|
62
|
-
- **Chrome:** [Install an unpacked extension](https://developer.chrome.com/docs/extensions/get-started/tutorial/hello-world#load-unpacked)
|
|
63
|
-
- **Edge:** [Sideload an extension](https://learn.microsoft.com/en-us/microsoft-edge/extensions-chromium/getting-started/extension-sideloading)
|
|
64
|
-
|
|
65
|
-
> **Note:** Plan to eventually publish to the Chrome Web Store and Edge Add-ons.
|
|
66
|
-
|
|
67
|
-
### Browser Compatibility
|
|
68
|
-
|
|
69
|
-
| Browser | Version | Status |
|
|
70
|
-
| ------- | ------- | ------ |
|
|
71
|
-
| Chrome | 88+ | ✅ Supported |
|
|
72
|
-
| Edge | 88+ | ✅ Supported |
|
|
73
|
-
| Firefox | — | ❌ Not supported (Manifest V3 only) |
|
|
74
|
-
| Safari | — | ❌ Not supported |
|
|
75
|
-
|
|
76
|
-
Requires a Chromium-based browser with Manifest V3 and WASM support.
|
|
77
|
-
|
|
78
|
-
### Usage
|
|
79
|
-
|
|
80
|
-
1. Open a notebook in Microsoft Fabric
|
|
81
|
-
2. Click the **Format** button in the notebook toolbar
|
|
82
|
-
|
|
83
|
-

|
|
84
|
-
|
|
85
|
-
3. All cells in the notebook are formatted instantly
|
|
86
49
|
|
|
87
|
-
|
|
50
|
+
### Supported File Types
|
|
88
51
|
|
|
89
52
|
- `.py` — Python notebooks
|
|
90
53
|
- `.scala` — Scala notebooks
|
|
91
54
|
- `.r` — R notebooks
|
|
92
55
|
- `.sql` — SQL notebooks
|
|
93
56
|
|
|
94
|
-
## Supported Languages
|
|
95
57
|
|
|
96
|
-
|
|
97
|
-
- Python
|
|
98
|
-
|
|
99
|
-
> **Note:** All other language cells are preserved as-is.
|
|
58
|
+
## Language Support
|
|
100
59
|
|
|
101
60
|
### Spark SQL
|
|
61
|
+
The SQL formatter uses an ANTLR grammar to parse and reformat Spark SQL. All keywords, functions, and syntax are derived directly from the official Spark SQL grammar.
|
|
102
62
|
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
Custom formatter built on [Apache Spark's official ANTLR grammar](https://github.com/apache/spark/tree/master/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser). If Spark supports the syntax, fabric-format formats it correctly.
|
|
106
|
-
|
|
107
|
-
#### Style Overview
|
|
108
|
-
|
|
109
|
-
| Element | Formatting |
|
|
110
|
-
| ---------------------- | -------------------------- |
|
|
111
|
-
| Keywords | `UPPERCASE` |
|
|
112
|
-
| Built-in functions | `UPPERCASE()` |
|
|
113
|
-
| User-defined functions | `preserveCase()` |
|
|
114
|
-
| Identifiers | `preserveCase` |
|
|
115
|
-
| Indentation | 4 spaces |
|
|
116
|
-
| Expression line width | 140 characters (then wrap) |
|
|
117
|
-
| Commas | Leading (comma-first) |
|
|
118
|
-
|
|
119
|
-
See [SQL_STYLE_GUIDE.md](./SQL_STYLE_GUIDE.md) for complete rules and examples.
|
|
120
|
-
|
|
121
|
-
#### Format Directives
|
|
122
|
-
|
|
123
|
-
##### `fmt: off`
|
|
124
|
-
|
|
125
|
-
Skip formatting entirely—preserves original whitespace and casing. Applicable only to the statement directly after it.
|
|
126
|
-
|
|
127
|
-
```sql
|
|
128
|
-
-- fmt: off
|
|
129
|
-
select Col_A,Col_B B,Col_C from t;
|
|
130
|
-
select Col_A,Col_B B,Col_C from t;
|
|
131
|
-
```
|
|
132
|
-
|
|
133
|
-
⬇️ Output
|
|
134
|
-
|
|
135
|
-
```sql
|
|
136
|
-
-- fmt: off
|
|
137
|
-
select Col_A,Col_B B,Col_C from t;
|
|
138
|
-
|
|
139
|
-
SELECT
|
|
140
|
-
Col_A
|
|
141
|
-
,Col_B AS B
|
|
142
|
-
,Col_C
|
|
143
|
-
FROM t;
|
|
144
|
-
```
|
|
145
|
-
|
|
146
|
-
##### `fmt: inline`
|
|
147
|
-
|
|
148
|
-
Suppress line wrapping for long expressions that are wrapped by default at 140 characters.
|
|
149
|
-
|
|
150
|
-
```sql
|
|
151
|
-
SELECT
|
|
152
|
-
conv(right(md5(upper(concat(coalesce(VeryLongTable.VeryLongColumnName, AnotherLongAlias.AnotherLongColumn), SomeOtherReallyLongColumnName))), 16), 16, -10) AS A-- fmt: inline
|
|
153
|
-
,conv(right(md5(upper(concat(coalesce(VeryLongTable.VeryLongColumnName, AnotherLongAlias.AnotherLongColumn), SomeOtherReallyLongColumnName))), 16), 16, -10) AS B
|
|
154
|
-
FROM t
|
|
155
|
-
```
|
|
156
|
-
|
|
157
|
-
⬇️ Output
|
|
158
|
-
|
|
159
|
-
```sql
|
|
160
|
-
SELECT
|
|
161
|
-
CONV(RIGHT(MD5(UPPER(CONCAT(COALESCE(VeryLongTable.VeryLongColumnName, AnotherLongAlias.AnotherLongColumn), SomeOtherReallyLongColumnName))), 16), 16, -10) AS A -- fmt: inline
|
|
162
|
-
,CONV(
|
|
163
|
-
RIGHT(
|
|
164
|
-
MD5(UPPER(CONCAT(
|
|
165
|
-
COALESCE(VeryLongTable.VeryLongColumnName, AnotherLongAlias.AnotherLongColumn)
|
|
166
|
-
,SomeOtherReallyLongColumnName
|
|
167
|
-
)))
|
|
168
|
-
,16
|
|
169
|
-
)
|
|
170
|
-
,16
|
|
171
|
-
,-10
|
|
172
|
-
) AS B
|
|
173
|
-
FROM t
|
|
174
|
-
```
|
|
175
|
-
|
|
176
|
-
### Python
|
|
63
|
+
See [SQL_STYLE_GUIDE.md](SQL_STYLE_GUIDE.md) for formatting rules.
|
|
177
64
|
|
|
178
|
-
|
|
65
|
+
### Python / PySpark
|
|
66
|
+
The Python formatter uses [Ruff](https://github.com/astral-sh/ruff) WASM for:
|
|
179
67
|
|
|
180
|
-
|
|
68
|
+
1. **Code formatting** — Consistent styling (line length 140, double quotes, trailing commas)
|
|
69
|
+
2. **Safe lint auto-fixes** — Automatically applies safe fixes from ~60 Ruff rules
|
|
181
70
|
|
|
182
|
-
|
|
183
|
-
-
|
|
184
|
-
-
|
|
185
|
-
-
|
|
71
|
+
**Included lint auto-fixes:**
|
|
72
|
+
- **Import sorting** (I001) — Organizes imports by standard library, third-party, local
|
|
73
|
+
- **Modernization** (UP008, UP018, UP032) — Updates deprecated patterns to modern Python
|
|
74
|
+
- **Simplifications** (SIM118, SIM201, SIM300) — `key in dict` instead of `key in dict.keys()`, etc.
|
|
75
|
+
- **Bug fixes** (B009, B010) — Use `getattr()`/`setattr()` properly
|
|
76
|
+
- **Style** (E703, E711, F632) — Remove useless semicolons, use `is None`, etc.
|
|
77
|
+
- **Ruff specific** (RUF005) — List concatenation with unpacking
|
|
186
78
|
|
|
187
|
-
|
|
79
|
+
**Explicitly excluded rules** (unsafe for notebooks):
|
|
80
|
+
- **F401, F841** — Unused imports/variables may be used in other cells
|
|
81
|
+
- **Rules that add imports** (RUF017, SIM105, etc.) — Can break cell execution order
|
|
82
|
+
- **RET504** — Removing intermediate variables changes code structure
|
|
188
83
|
|
|
189
|
-
|
|
84
|
+
The lint fixes are applied automatically—no configuration needed.
|
|
190
85
|
|
|
191
|
-
##### `fmt: off` / `fmt: on`
|
|
192
|
-
|
|
193
|
-
Disable formatting for a block of code:
|
|
194
|
-
|
|
195
|
-
```python
|
|
196
|
-
# fmt: off
|
|
197
|
-
matrix = [
|
|
198
|
-
1, 0, 0,
|
|
199
|
-
0, 1, 0,
|
|
200
|
-
0, 0, 1,
|
|
201
|
-
]
|
|
202
|
-
# fmt: on
|
|
203
|
-
```
|
|
204
|
-
|
|
205
|
-
##### `fmt: skip`
|
|
206
|
-
|
|
207
|
-
Skip formatting for a single statement:
|
|
208
|
-
|
|
209
|
-
```python
|
|
210
|
-
result = some_function(a, b, c,d, e) # fmt: skip
|
|
211
|
-
```
|
|
212
86
|
|
|
213
|
-
|
|
87
|
+
## Documentation
|
|
88
|
+
Find all documentation at [fabric-format wiki](https://github.com/JacobKnightley/fabric-format/wiki)
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SQL Formatter Profiler
|
|
3
|
+
*
|
|
4
|
+
* Profiles the SQL formatter to identify performance bottlenecks.
|
|
5
|
+
* Run with: node --prof dist/benchmarks/profile.js
|
|
6
|
+
* Then: node --prof-process isolate-*.log > profile.txt
|
|
7
|
+
*/
|
|
8
|
+
import * as fs from 'node:fs';
|
|
9
|
+
import * as path from 'node:path';
|
|
10
|
+
import { fileURLToPath } from 'node:url';
|
|
11
|
+
import { formatSql } from '../formatters/sparksql/index.js';
|
|
12
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
13
|
+
const __dirname = path.dirname(__filename);
|
|
14
|
+
/**
|
|
15
|
+
* Load a fixture file.
|
|
16
|
+
*/
|
|
17
|
+
function loadFixture(category, name) {
|
|
18
|
+
const fixturePath = path.join(__dirname, 'fixtures', category, name);
|
|
19
|
+
return fs.readFileSync(fixturePath, 'utf-8');
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Run many iterations to get enough data for profiling.
|
|
23
|
+
*/
|
|
24
|
+
function profileSql(sql, iterations, label) {
|
|
25
|
+
console.log(`Profiling ${label}: ${iterations} iterations...`);
|
|
26
|
+
const start = performance.now();
|
|
27
|
+
for (let i = 0; i < iterations; i++) {
|
|
28
|
+
formatSql(sql);
|
|
29
|
+
}
|
|
30
|
+
const end = performance.now();
|
|
31
|
+
console.log(` Completed in ${(end - start).toFixed(2)}ms`);
|
|
32
|
+
console.log(` Average: ${((end - start) / iterations).toFixed(3)}ms per call`);
|
|
33
|
+
}
|
|
34
|
+
async function main() {
|
|
35
|
+
const args = process.argv.slice(2);
|
|
36
|
+
const iterations = Number.parseInt(args[0], 10) || 1000;
|
|
37
|
+
console.log('╔══════════════════════════════════════════════════════════════╗');
|
|
38
|
+
console.log('║ SQL Formatter Profiler ║');
|
|
39
|
+
console.log('╚══════════════════════════════════════════════════════════════╝');
|
|
40
|
+
console.log();
|
|
41
|
+
console.log('Run this script with --prof flag to generate V8 profile data:');
|
|
42
|
+
console.log(' node --prof dist/benchmarks/profile.js');
|
|
43
|
+
console.log();
|
|
44
|
+
console.log('Then process the log:');
|
|
45
|
+
console.log(' node --prof-process isolate-*.log > profile.txt');
|
|
46
|
+
console.log();
|
|
47
|
+
// Load fixtures
|
|
48
|
+
const smallSql = loadFixture('small', 'simple-select.sql');
|
|
49
|
+
const mediumSql = loadFixture('medium', 'cte.sql');
|
|
50
|
+
const largeSql = loadFixture('large', 'complex-analytics.sql');
|
|
51
|
+
// Warm up
|
|
52
|
+
console.log('Warming up...');
|
|
53
|
+
for (let i = 0; i < 10; i++) {
|
|
54
|
+
formatSql(smallSql);
|
|
55
|
+
formatSql(mediumSql);
|
|
56
|
+
formatSql(largeSql);
|
|
57
|
+
}
|
|
58
|
+
console.log();
|
|
59
|
+
console.log(`Running ${iterations} iterations per fixture...`);
|
|
60
|
+
console.log();
|
|
61
|
+
// Profile each size
|
|
62
|
+
profileSql(smallSql, iterations, 'small (simple-select.sql)');
|
|
63
|
+
profileSql(mediumSql, iterations, 'medium (cte.sql)');
|
|
64
|
+
profileSql(largeSql, Math.floor(iterations / 2), 'large (complex-analytics.sql)');
|
|
65
|
+
console.log();
|
|
66
|
+
console.log('Profile data collection complete.');
|
|
67
|
+
}
|
|
68
|
+
main().catch(console.error);
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SQL Formatter Benchmark Suite
|
|
3
|
+
*
|
|
4
|
+
* Measures performance of the SQL formatter across various query sizes and complexity levels.
|
|
5
|
+
* Outputs timing data for profiling and optimization work.
|
|
6
|
+
*/
|
|
7
|
+
import * as fs from 'node:fs';
|
|
8
|
+
import * as path from 'node:path';
|
|
9
|
+
import { fileURLToPath } from 'node:url';
|
|
10
|
+
import { formatSql } from '../formatters/sparksql/index.js';
|
|
11
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
12
|
+
const __dirname = path.dirname(__filename);
|
|
13
|
+
/**
|
|
14
|
+
* Run a single benchmark with multiple iterations.
|
|
15
|
+
*/
|
|
16
|
+
function runBenchmark(name, category, sql, iterations) {
|
|
17
|
+
const times = [];
|
|
18
|
+
let output = '';
|
|
19
|
+
// Warm-up run (not counted)
|
|
20
|
+
formatSql(sql);
|
|
21
|
+
for (let i = 0; i < iterations; i++) {
|
|
22
|
+
const start = performance.now();
|
|
23
|
+
output = formatSql(sql);
|
|
24
|
+
const end = performance.now();
|
|
25
|
+
times.push(end - start);
|
|
26
|
+
}
|
|
27
|
+
const totalMs = times.reduce((a, b) => a + b, 0);
|
|
28
|
+
const avgMs = totalMs / iterations;
|
|
29
|
+
const minMs = Math.min(...times);
|
|
30
|
+
const maxMs = Math.max(...times);
|
|
31
|
+
return {
|
|
32
|
+
name,
|
|
33
|
+
category,
|
|
34
|
+
inputSize: sql.length,
|
|
35
|
+
outputSize: output.length,
|
|
36
|
+
iterations,
|
|
37
|
+
totalMs,
|
|
38
|
+
avgMs,
|
|
39
|
+
minMs,
|
|
40
|
+
maxMs,
|
|
41
|
+
opsPerSec: 1000 / avgMs,
|
|
42
|
+
changed: output !== sql,
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Load all SQL files from a directory.
|
|
47
|
+
*/
|
|
48
|
+
function loadFixtures(dir) {
|
|
49
|
+
const fixtures = new Map();
|
|
50
|
+
if (!fs.existsSync(dir)) {
|
|
51
|
+
return fixtures;
|
|
52
|
+
}
|
|
53
|
+
const files = fs.readdirSync(dir);
|
|
54
|
+
for (const file of files) {
|
|
55
|
+
if (file.endsWith('.sql')) {
|
|
56
|
+
const content = fs.readFileSync(path.join(dir, file), 'utf-8');
|
|
57
|
+
fixtures.set(file, content);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
return fixtures;
|
|
61
|
+
}
|
|
62
|
+
/**
|
|
63
|
+
* Format duration in a human-readable way.
|
|
64
|
+
*/
|
|
65
|
+
function formatDuration(ms) {
|
|
66
|
+
if (ms < 1) {
|
|
67
|
+
return `${(ms * 1000).toFixed(1)}µs`;
|
|
68
|
+
}
|
|
69
|
+
if (ms < 1000) {
|
|
70
|
+
return `${ms.toFixed(2)}ms`;
|
|
71
|
+
}
|
|
72
|
+
return `${(ms / 1000).toFixed(2)}s`;
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* Format a table row with aligned columns.
|
|
76
|
+
*/
|
|
77
|
+
function formatRow(cols, widths, align) {
|
|
78
|
+
return cols
|
|
79
|
+
.map((col, i) => {
|
|
80
|
+
const width = widths[i] || 20;
|
|
81
|
+
const a = align[i] || 'left';
|
|
82
|
+
return a === 'left' ? col.padEnd(width) : col.padStart(width);
|
|
83
|
+
})
|
|
84
|
+
.join(' │ ');
|
|
85
|
+
}
|
|
86
|
+
/**
|
|
87
|
+
* Run all benchmarks and output results.
|
|
88
|
+
*/
|
|
89
|
+
async function main() {
|
|
90
|
+
const args = process.argv.slice(2);
|
|
91
|
+
const iterations = Number.parseInt(args[0], 10) || 100;
|
|
92
|
+
const outputJson = args.includes('--json');
|
|
93
|
+
console.log('╔════════════════════════════════════════════════════════════════╗');
|
|
94
|
+
console.log('║ SQL Formatter Benchmark Suite ║');
|
|
95
|
+
console.log('╚════════════════════════════════════════════════════════════════╝');
|
|
96
|
+
console.log();
|
|
97
|
+
console.log(`Iterations per file: ${iterations}`);
|
|
98
|
+
console.log();
|
|
99
|
+
const fixturesDir = path.join(__dirname, 'fixtures');
|
|
100
|
+
const categories = ['small', 'medium', 'large'];
|
|
101
|
+
const results = [];
|
|
102
|
+
const summaries = [];
|
|
103
|
+
for (const category of categories) {
|
|
104
|
+
const categoryDir = path.join(fixturesDir, category);
|
|
105
|
+
const fixtures = loadFixtures(categoryDir);
|
|
106
|
+
if (fixtures.size === 0) {
|
|
107
|
+
console.log(`⚠️ No fixtures found in ${category}/`);
|
|
108
|
+
continue;
|
|
109
|
+
}
|
|
110
|
+
console.log(`\n━━━ ${category.toUpperCase()} QUERIES ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━`);
|
|
111
|
+
const colWidths = [30, 10, 10, 12, 12, 10];
|
|
112
|
+
const colAlign = [
|
|
113
|
+
'left',
|
|
114
|
+
'right',
|
|
115
|
+
'right',
|
|
116
|
+
'right',
|
|
117
|
+
'right',
|
|
118
|
+
'right',
|
|
119
|
+
];
|
|
120
|
+
console.log(formatRow(['File', 'Size', 'Avg', 'Min', 'Max', 'ops/sec'], colWidths, colAlign));
|
|
121
|
+
console.log('─'.repeat(90));
|
|
122
|
+
let categoryTime = 0;
|
|
123
|
+
let categoryChars = 0;
|
|
124
|
+
for (const [file, sql] of fixtures) {
|
|
125
|
+
const result = runBenchmark(file, category, sql, iterations);
|
|
126
|
+
results.push(result);
|
|
127
|
+
categoryTime += result.avgMs;
|
|
128
|
+
categoryChars += result.inputSize;
|
|
129
|
+
console.log(formatRow([
|
|
130
|
+
file,
|
|
131
|
+
`${result.inputSize}`,
|
|
132
|
+
formatDuration(result.avgMs),
|
|
133
|
+
formatDuration(result.minMs),
|
|
134
|
+
formatDuration(result.maxMs),
|
|
135
|
+
`${result.opsPerSec.toFixed(1)}`,
|
|
136
|
+
], colWidths, colAlign));
|
|
137
|
+
}
|
|
138
|
+
summaries.push({
|
|
139
|
+
category,
|
|
140
|
+
fileCount: fixtures.size,
|
|
141
|
+
totalInputChars: categoryChars,
|
|
142
|
+
totalTimeMs: categoryTime,
|
|
143
|
+
avgTimePerFile: categoryTime / fixtures.size,
|
|
144
|
+
avgTimePerKilochar: (categoryTime / categoryChars) * 1000,
|
|
145
|
+
});
|
|
146
|
+
}
|
|
147
|
+
// Summary
|
|
148
|
+
console.log('\n╔════════════════════════════════════════════════════════════════╗');
|
|
149
|
+
console.log('║ SUMMARY ║');
|
|
150
|
+
console.log('╚════════════════════════════════════════════════════════════════╝');
|
|
151
|
+
console.log();
|
|
152
|
+
const summaryWidths = [10, 8, 12, 14, 14];
|
|
153
|
+
const summaryAlign = [
|
|
154
|
+
'left',
|
|
155
|
+
'right',
|
|
156
|
+
'right',
|
|
157
|
+
'right',
|
|
158
|
+
'right',
|
|
159
|
+
];
|
|
160
|
+
console.log(formatRow(['Category', 'Files', 'Total Chars', 'Avg/File', 'ms/1K chars'], summaryWidths, summaryAlign));
|
|
161
|
+
console.log('─'.repeat(65));
|
|
162
|
+
for (const s of summaries) {
|
|
163
|
+
console.log(formatRow([
|
|
164
|
+
s.category,
|
|
165
|
+
`${s.fileCount}`,
|
|
166
|
+
`${s.totalInputChars}`,
|
|
167
|
+
formatDuration(s.avgTimePerFile),
|
|
168
|
+
s.avgTimePerKilochar.toFixed(3),
|
|
169
|
+
], summaryWidths, summaryAlign));
|
|
170
|
+
}
|
|
171
|
+
const totalTime = summaries.reduce((a, s) => a + s.totalTimeMs, 0);
|
|
172
|
+
const totalChars = summaries.reduce((a, s) => a + s.totalInputChars, 0);
|
|
173
|
+
const totalFiles = summaries.reduce((a, s) => a + s.fileCount, 0);
|
|
174
|
+
console.log('─'.repeat(65));
|
|
175
|
+
console.log(formatRow([
|
|
176
|
+
'TOTAL',
|
|
177
|
+
`${totalFiles}`,
|
|
178
|
+
`${totalChars}`,
|
|
179
|
+
formatDuration(totalTime / totalFiles),
|
|
180
|
+
((totalTime / totalChars) * 1000).toFixed(3),
|
|
181
|
+
], summaryWidths, summaryAlign));
|
|
182
|
+
console.log();
|
|
183
|
+
console.log(`Total benchmark time: ${formatDuration(totalTime * iterations)}`);
|
|
184
|
+
if (outputJson) {
|
|
185
|
+
const jsonOutput = {
|
|
186
|
+
timestamp: new Date().toISOString(),
|
|
187
|
+
iterations,
|
|
188
|
+
results,
|
|
189
|
+
summaries,
|
|
190
|
+
totals: {
|
|
191
|
+
files: totalFiles,
|
|
192
|
+
chars: totalChars,
|
|
193
|
+
avgTimePerFile: totalTime / totalFiles,
|
|
194
|
+
msPerKilochar: (totalTime / totalChars) * 1000,
|
|
195
|
+
},
|
|
196
|
+
};
|
|
197
|
+
const jsonPath = path.join(__dirname, 'results.json');
|
|
198
|
+
fs.writeFileSync(jsonPath, JSON.stringify(jsonOutput, null, 2));
|
|
199
|
+
console.log(`\nResults saved to: ${jsonPath}`);
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
main().catch(console.error);
|
package/dist/cell-formatter.d.ts
CHANGED
|
@@ -82,6 +82,45 @@ export declare function getPythonFormatterInitPromise(): Promise<void> | null;
|
|
|
82
82
|
* This allows re-initialization with different options.
|
|
83
83
|
*/
|
|
84
84
|
export declare function resetPythonFormatterState(): void;
|
|
85
|
+
/**
|
|
86
|
+
* Detect which languages are present in notebook content.
|
|
87
|
+
* Uses fast regex patterns - does NOT parse the full notebook structure.
|
|
88
|
+
*
|
|
89
|
+
* This enables lazy initialization: only load formatters for languages actually used.
|
|
90
|
+
*
|
|
91
|
+
* @param content Raw notebook file content
|
|
92
|
+
* @returns Set of language identifiers that need formatting
|
|
93
|
+
*
|
|
94
|
+
* @example
|
|
95
|
+
* ```typescript
|
|
96
|
+
* const languages = detectLanguagesInContent(notebookContent);
|
|
97
|
+
* // languages might be Set { 'sql', 'python' }
|
|
98
|
+
* await initializeFormatters(languages);
|
|
99
|
+
* ```
|
|
100
|
+
*/
|
|
101
|
+
export declare function detectLanguagesInContent(content: string): Set<string>;
|
|
102
|
+
/**
|
|
103
|
+
* Initialize formatters for the specified languages in parallel.
|
|
104
|
+
*
|
|
105
|
+
* This is the recommended way to initialize formatters when you know
|
|
106
|
+
* which languages you'll need. Unneeded formatters are not loaded.
|
|
107
|
+
*
|
|
108
|
+
* @param languages Set of language identifiers to initialize
|
|
109
|
+
* @param options Optional WASM options for Python formatter
|
|
110
|
+
*
|
|
111
|
+
* @example
|
|
112
|
+
* ```typescript
|
|
113
|
+
* // Scan files first
|
|
114
|
+
* const languages = detectLanguagesInContent(content);
|
|
115
|
+
*
|
|
116
|
+
* // Initialize only what's needed (parallel)
|
|
117
|
+
* await initializeFormatters(languages);
|
|
118
|
+
*
|
|
119
|
+
* // Now format cells
|
|
120
|
+
* formatCell(code, 'python');
|
|
121
|
+
* ```
|
|
122
|
+
*/
|
|
123
|
+
export declare function initializeFormatters(languages: Set<string>, options?: WasmInitOptions): Promise<void>;
|
|
85
124
|
/**
|
|
86
125
|
* Format a single cell's content based on its type.
|
|
87
126
|
*
|
package/dist/cell-formatter.js
CHANGED
|
@@ -125,6 +125,99 @@ export function resetPythonFormatterState() {
|
|
|
125
125
|
resetPythonFormatter();
|
|
126
126
|
}
|
|
127
127
|
// ============================================================================
|
|
128
|
+
// Language Detection for Lazy Initialization
|
|
129
|
+
// ============================================================================
|
|
130
|
+
/**
|
|
131
|
+
* Detect which languages are present in notebook content.
|
|
132
|
+
* Uses fast regex patterns - does NOT parse the full notebook structure.
|
|
133
|
+
*
|
|
134
|
+
* This enables lazy initialization: only load formatters for languages actually used.
|
|
135
|
+
*
|
|
136
|
+
* @param content Raw notebook file content
|
|
137
|
+
* @returns Set of language identifiers that need formatting
|
|
138
|
+
*
|
|
139
|
+
* @example
|
|
140
|
+
* ```typescript
|
|
141
|
+
* const languages = detectLanguagesInContent(notebookContent);
|
|
142
|
+
* // languages might be Set { 'sql', 'python' }
|
|
143
|
+
* await initializeFormatters(languages);
|
|
144
|
+
* ```
|
|
145
|
+
*/
|
|
146
|
+
export function detectLanguagesInContent(content) {
|
|
147
|
+
const languages = new Set();
|
|
148
|
+
// Pattern: "language": "xxx" in METADATA blocks
|
|
149
|
+
const languagePattern = /"language"\s*:\s*"(\w+)"/g;
|
|
150
|
+
let match = languagePattern.exec(content);
|
|
151
|
+
while (match !== null) {
|
|
152
|
+
const lang = match[1].toLowerCase();
|
|
153
|
+
// Normalize language names
|
|
154
|
+
if (lang === 'sparksql' || lang === 'sql') {
|
|
155
|
+
languages.add('sql');
|
|
156
|
+
}
|
|
157
|
+
else if (lang === 'python' || lang === 'pyspark') {
|
|
158
|
+
languages.add('python');
|
|
159
|
+
}
|
|
160
|
+
else if (lang === 'scala') {
|
|
161
|
+
languages.add('scala');
|
|
162
|
+
}
|
|
163
|
+
else if (lang === 'r') {
|
|
164
|
+
languages.add('r');
|
|
165
|
+
}
|
|
166
|
+
match = languagePattern.exec(content);
|
|
167
|
+
}
|
|
168
|
+
// Also check for MAGIC commands (in case metadata is missing)
|
|
169
|
+
if (content.includes('%%sql')) {
|
|
170
|
+
languages.add('sql');
|
|
171
|
+
}
|
|
172
|
+
if (content.includes('%%python') || content.includes('%%pyspark')) {
|
|
173
|
+
languages.add('python');
|
|
174
|
+
}
|
|
175
|
+
if (content.includes('%%scala')) {
|
|
176
|
+
languages.add('scala');
|
|
177
|
+
}
|
|
178
|
+
if (content.includes('%%r') || content.includes('%%R')) {
|
|
179
|
+
languages.add('r');
|
|
180
|
+
}
|
|
181
|
+
return languages;
|
|
182
|
+
}
|
|
183
|
+
/**
|
|
184
|
+
* Initialize formatters for the specified languages in parallel.
|
|
185
|
+
*
|
|
186
|
+
* This is the recommended way to initialize formatters when you know
|
|
187
|
+
* which languages you'll need. Unneeded formatters are not loaded.
|
|
188
|
+
*
|
|
189
|
+
* @param languages Set of language identifiers to initialize
|
|
190
|
+
* @param options Optional WASM options for Python formatter
|
|
191
|
+
*
|
|
192
|
+
* @example
|
|
193
|
+
* ```typescript
|
|
194
|
+
* // Scan files first
|
|
195
|
+
* const languages = detectLanguagesInContent(content);
|
|
196
|
+
*
|
|
197
|
+
* // Initialize only what's needed (parallel)
|
|
198
|
+
* await initializeFormatters(languages);
|
|
199
|
+
*
|
|
200
|
+
* // Now format cells
|
|
201
|
+
* formatCell(code, 'python');
|
|
202
|
+
* ```
|
|
203
|
+
*/
|
|
204
|
+
export async function initializeFormatters(languages, options) {
|
|
205
|
+
const promises = [];
|
|
206
|
+
// SQL formatter is synchronous (no init needed)
|
|
207
|
+
// Python formatter needs async WASM loading
|
|
208
|
+
if (languages.has('python') && !pythonFormatterReady) {
|
|
209
|
+
promises.push(initializePythonFormatter(options));
|
|
210
|
+
}
|
|
211
|
+
// Future: Add scala, r formatters here
|
|
212
|
+
// if (languages.has('scala')) {
|
|
213
|
+
// promises.push(initializeScalaFormatter());
|
|
214
|
+
// }
|
|
215
|
+
// if (languages.has('r')) {
|
|
216
|
+
// promises.push(initializeRFormatter());
|
|
217
|
+
// }
|
|
218
|
+
await Promise.all(promises);
|
|
219
|
+
}
|
|
220
|
+
// ============================================================================
|
|
128
221
|
// Cell Formatting API
|
|
129
222
|
// ============================================================================
|
|
130
223
|
/**
|