@jacobknightley/fabric-format 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +196 -0
- package/dist/cell-formatter.d.ts +75 -0
- package/dist/cell-formatter.js +144 -0
- package/dist/cli.d.ts +2 -0
- package/dist/cli.js +435 -0
- package/dist/formatters/index.d.ts +19 -0
- package/dist/formatters/index.js +76 -0
- package/dist/formatters/python/config.d.ts +33 -0
- package/dist/formatters/python/config.js +29 -0
- package/dist/formatters/python/index.d.ts +7 -0
- package/dist/formatters/python/index.js +13 -0
- package/dist/formatters/python/python-formatter.d.ts +51 -0
- package/dist/formatters/python/python-formatter.js +180 -0
- package/dist/formatters/sparksql/constants.d.ts +16 -0
- package/dist/formatters/sparksql/constants.js +16 -0
- package/dist/formatters/sparksql/fmt-detector.d.ts +65 -0
- package/dist/formatters/sparksql/fmt-detector.js +84 -0
- package/dist/formatters/sparksql/formatter.d.ts +24 -0
- package/dist/formatters/sparksql/formatter.js +1276 -0
- package/dist/formatters/sparksql/formatting-context.d.ts +154 -0
- package/dist/formatters/sparksql/formatting-context.js +363 -0
- package/dist/formatters/sparksql/generated/SqlBaseLexer.d.ts +529 -0
- package/dist/formatters/sparksql/generated/SqlBaseLexer.js +2609 -0
- package/dist/formatters/sparksql/generated/SqlBaseParser.d.ts +8195 -0
- package/dist/formatters/sparksql/generated/SqlBaseParser.js +48793 -0
- package/dist/formatters/sparksql/generated/SqlBaseParserListener.d.ts +910 -0
- package/dist/formatters/sparksql/generated/SqlBaseParserListener.js +2730 -0
- package/dist/formatters/sparksql/generated/SqlBaseParserVisitor.d.ts +456 -0
- package/dist/formatters/sparksql/generated/SqlBaseParserVisitor.js +1822 -0
- package/dist/formatters/sparksql/generated/builtinFunctions.d.ts +8 -0
- package/dist/formatters/sparksql/generated/builtinFunctions.js +510 -0
- package/dist/formatters/sparksql/index.d.ts +11 -0
- package/dist/formatters/sparksql/index.js +22 -0
- package/dist/formatters/sparksql/output-builder.d.ts +89 -0
- package/dist/formatters/sparksql/output-builder.js +191 -0
- package/dist/formatters/sparksql/parse-tree-analyzer.d.ts +264 -0
- package/dist/formatters/sparksql/parse-tree-analyzer.js +1956 -0
- package/dist/formatters/sparksql/sql-formatter.d.ts +25 -0
- package/dist/formatters/sparksql/sql-formatter.js +56 -0
- package/dist/formatters/sparksql/token-utils.d.ts +68 -0
- package/dist/formatters/sparksql/token-utils.js +155 -0
- package/dist/formatters/sparksql/types.d.ts +264 -0
- package/dist/formatters/sparksql/types.js +7 -0
- package/dist/formatters/types.d.ts +57 -0
- package/dist/formatters/types.js +7 -0
- package/dist/index.d.ts +18 -0
- package/dist/index.js +41 -0
- package/dist/notebook-formatter.d.ts +107 -0
- package/dist/notebook-formatter.js +424 -0
- package/package.json +63 -0
package/dist/index.js
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* sparkfmt - Spark SQL & Python Formatter
|
|
3
|
+
*
|
|
4
|
+
* A unified formatter for Spark SQL and Python code, designed for
|
|
5
|
+
* Microsoft Fabric notebooks and CI/CD pipelines.
|
|
6
|
+
*
|
|
7
|
+
* Architecture:
|
|
8
|
+
* - formatters/sql/: Core SQL formatting (ANTLR grammar-driven)
|
|
9
|
+
* - formatters/python/: Python formatting via Ruff WASM
|
|
10
|
+
* - notebook-formatter.ts: Fabric notebook handling
|
|
11
|
+
*/
|
|
12
|
+
// ============================================================================
|
|
13
|
+
// SQL Formatter (Core API)
|
|
14
|
+
// ============================================================================
|
|
15
|
+
export { formatSql, needsFormatting } from './formatters/sparksql/index.js';
|
|
16
|
+
// ============================================================================
|
|
17
|
+
// Language Formatters (Extensible)
|
|
18
|
+
// ============================================================================
|
|
19
|
+
export {
|
|
20
|
+
// Registry
|
|
21
|
+
getFormatterRegistry, detectLanguage,
|
|
22
|
+
// SQL
|
|
23
|
+
SqlFormatter, getSqlFormatter, isSqlCode,
|
|
24
|
+
// Python
|
|
25
|
+
PythonFormatter, getPythonFormatter, isPythonCode, } from './formatters/index.js';
|
|
26
|
+
// ============================================================================
|
|
27
|
+
// Cell Formatter (Low-level API)
|
|
28
|
+
// ============================================================================
|
|
29
|
+
export { formatCell, formatCellSync, initializePythonFormatter, isPythonFormatterReady, } from './cell-formatter.js';
|
|
30
|
+
// ============================================================================
|
|
31
|
+
// Notebook Formatter (High-level API)
|
|
32
|
+
// ============================================================================
|
|
33
|
+
export { parseNotebook, formatNotebook, } from './notebook-formatter.js';
|
|
34
|
+
// ============================================================================
|
|
35
|
+
// Configuration (Python/Ruff)
|
|
36
|
+
// ============================================================================
|
|
37
|
+
export { DEFAULT_RUFF_CONFIG, RUFF_WASM_CONFIG, } from './formatters/python/index.js';
|
|
38
|
+
// ============================================================================
|
|
39
|
+
// Format Directives (Spark SQL)
|
|
40
|
+
// ============================================================================
|
|
41
|
+
export { hasFormatOff, detectCollapseDirectives, hasCollapseDirective } from './formatters/sparksql/index.js';
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Notebook Formatter
|
|
3
|
+
*
|
|
4
|
+
* High-level API for parsing and formatting Microsoft Fabric notebook files.
|
|
5
|
+
* Handles the notebook structure, cell extraction, and comment wrapper management.
|
|
6
|
+
*
|
|
7
|
+
* For low-level cell formatting (when you already know the cell type),
|
|
8
|
+
* use the cell-formatter module directly.
|
|
9
|
+
*
|
|
10
|
+
* File formats:
|
|
11
|
+
* - .py files (Python/PySpark) with `# MAGIC` prefix for Spark SQL cells
|
|
12
|
+
* - .scala files (Scala/Spark) with `// MAGIC` prefix for Spark SQL cells
|
|
13
|
+
* - .r files (R/SparkR) with `# MAGIC` prefix for Spark SQL cells
|
|
14
|
+
* - .sql files (Spark SQL) with `-- MAGIC` prefix OR raw SQL
|
|
15
|
+
*
|
|
16
|
+
* Cell structure (Python example):
|
|
17
|
+
* ```
|
|
18
|
+
* # CELL ********************
|
|
19
|
+
*
|
|
20
|
+
* # MAGIC %%sql
|
|
21
|
+
* # MAGIC SELECT *
|
|
22
|
+
* # MAGIC FROM table
|
|
23
|
+
*
|
|
24
|
+
* # METADATA ********************
|
|
25
|
+
* # META {
|
|
26
|
+
* # META "language": "sparksql",
|
|
27
|
+
* # META "language_group": "synapse_pyspark"
|
|
28
|
+
* # META }
|
|
29
|
+
* ```
|
|
30
|
+
*/
|
|
31
|
+
export { formatCell, formatCellSync, initializePythonFormatter, isPythonFormatterReady, type FormatCellResult, type CellType, } from './cell-formatter.js';
|
|
32
|
+
/** Language-specific comment prefixes */
|
|
33
|
+
interface LanguageConfig {
|
|
34
|
+
fabricHeader: string;
|
|
35
|
+
cellMarker: string;
|
|
36
|
+
metadataMarker: string;
|
|
37
|
+
magicPrefix: string;
|
|
38
|
+
magicSqlCommand: string;
|
|
39
|
+
emptyMagic: string;
|
|
40
|
+
/** If true, cells can contain raw SQL without MAGIC prefix */
|
|
41
|
+
supportsRawSql: boolean;
|
|
42
|
+
/** Default cell language when no magic command present */
|
|
43
|
+
defaultLanguage: string;
|
|
44
|
+
}
|
|
45
|
+
/** Represents a cell in a Fabric notebook */
|
|
46
|
+
export interface NotebookCell {
|
|
47
|
+
/** Line index (0-based) where the cell content starts (after CELL marker) */
|
|
48
|
+
contentStartLine: number;
|
|
49
|
+
/** Line index (0-based) where the cell content ends (before METADATA) */
|
|
50
|
+
contentEndLine: number;
|
|
51
|
+
/** The extracted content (without MAGIC prefixes) */
|
|
52
|
+
content: string;
|
|
53
|
+
/** Original lines including MAGIC prefixes */
|
|
54
|
+
originalLines: string[];
|
|
55
|
+
/** Detected language of this cell (from METADATA) */
|
|
56
|
+
language: string;
|
|
57
|
+
/** Whether this is a MAGIC-prefixed cell */
|
|
58
|
+
isMagicCell: boolean;
|
|
59
|
+
/** Whether this is a raw cell (no MAGIC prefix) */
|
|
60
|
+
isRawCell: boolean;
|
|
61
|
+
/** The magic command used in the cell (e.g., 'sql', 'pyspark', 'configure'), or null if no magic */
|
|
62
|
+
magicCommand: string | null;
|
|
63
|
+
}
|
|
64
|
+
/** Represents a parsed Fabric notebook */
|
|
65
|
+
export interface FabricNotebook {
|
|
66
|
+
/** Whether this file is a Fabric notebook */
|
|
67
|
+
isFabricNotebook: boolean;
|
|
68
|
+
/** All detected cells */
|
|
69
|
+
cells: NotebookCell[];
|
|
70
|
+
/** Original file content split into lines */
|
|
71
|
+
lines: string[];
|
|
72
|
+
/** The detected language config */
|
|
73
|
+
config: LanguageConfig | null;
|
|
74
|
+
}
|
|
75
|
+
/** Statistics from formatting operation */
|
|
76
|
+
export interface FormatStats {
|
|
77
|
+
sparkSqlCellsFormatted: number;
|
|
78
|
+
pythonCellsFormatted: number;
|
|
79
|
+
cellsSkipped: number;
|
|
80
|
+
errors: string[];
|
|
81
|
+
}
|
|
82
|
+
/**
|
|
83
|
+
* Parse a Fabric notebook file into cells.
|
|
84
|
+
* @param content The file content
|
|
85
|
+
* @param fileExtension The file extension (e.g., '.py', '.sql', '.scala', '.r')
|
|
86
|
+
*/
|
|
87
|
+
export declare function parseNotebook(content: string, fileExtension: string): FabricNotebook;
|
|
88
|
+
/**
|
|
89
|
+
* Format all cells in a Fabric notebook.
|
|
90
|
+
*
|
|
91
|
+
* This is the high-level API for formatting entire notebook files.
|
|
92
|
+
* It parses the notebook structure, extracts cells, formats them using
|
|
93
|
+
* the low-level formatCell API, and reassembles with proper comment wrappers.
|
|
94
|
+
*
|
|
95
|
+
* @param content The file content
|
|
96
|
+
* @param fileExtension The file extension (e.g., '.py', '.sql', '.scala', '.r')
|
|
97
|
+
* @param options Formatting options
|
|
98
|
+
* @returns Object with formatted content and statistics
|
|
99
|
+
*/
|
|
100
|
+
export declare function formatNotebook(content: string, fileExtension: string, options?: {
|
|
101
|
+
formatSql?: boolean;
|
|
102
|
+
formatPython?: boolean;
|
|
103
|
+
configPath?: string;
|
|
104
|
+
}): Promise<{
|
|
105
|
+
content: string;
|
|
106
|
+
stats: FormatStats;
|
|
107
|
+
}>;
|
|
@@ -0,0 +1,424 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Notebook Formatter
|
|
3
|
+
*
|
|
4
|
+
* High-level API for parsing and formatting Microsoft Fabric notebook files.
|
|
5
|
+
* Handles the notebook structure, cell extraction, and comment wrapper management.
|
|
6
|
+
*
|
|
7
|
+
* For low-level cell formatting (when you already know the cell type),
|
|
8
|
+
* use the cell-formatter module directly.
|
|
9
|
+
*
|
|
10
|
+
* File formats:
|
|
11
|
+
* - .py files (Python/PySpark) with `# MAGIC` prefix for Spark SQL cells
|
|
12
|
+
* - .scala files (Scala/Spark) with `// MAGIC` prefix for Spark SQL cells
|
|
13
|
+
* - .r files (R/SparkR) with `# MAGIC` prefix for Spark SQL cells
|
|
14
|
+
* - .sql files (Spark SQL) with `-- MAGIC` prefix OR raw SQL
|
|
15
|
+
*
|
|
16
|
+
* Cell structure (Python example):
|
|
17
|
+
* ```
|
|
18
|
+
* # CELL ********************
|
|
19
|
+
*
|
|
20
|
+
* # MAGIC %%sql
|
|
21
|
+
* # MAGIC SELECT *
|
|
22
|
+
* # MAGIC FROM table
|
|
23
|
+
*
|
|
24
|
+
* # METADATA ********************
|
|
25
|
+
* # META {
|
|
26
|
+
* # META "language": "sparksql",
|
|
27
|
+
* # META "language_group": "synapse_pyspark"
|
|
28
|
+
* # META }
|
|
29
|
+
* ```
|
|
30
|
+
*/
|
|
31
|
+
import { formatCell, initializePythonFormatter, } from './cell-formatter.js';
|
|
32
|
+
// Re-export cell-formatter types and functions for convenience
|
|
33
|
+
export { formatCell, formatCellSync, initializePythonFormatter, isPythonFormatterReady, } from './cell-formatter.js';
|
|
34
|
+
const PYTHON_CONFIG = {
|
|
35
|
+
fabricHeader: '# Fabric notebook source',
|
|
36
|
+
cellMarker: '# CELL ********************',
|
|
37
|
+
metadataMarker: '# METADATA ********************',
|
|
38
|
+
magicPrefix: '# MAGIC ',
|
|
39
|
+
magicSqlCommand: '# MAGIC %%sql',
|
|
40
|
+
emptyMagic: '# MAGIC',
|
|
41
|
+
supportsRawSql: false,
|
|
42
|
+
defaultLanguage: 'python',
|
|
43
|
+
};
|
|
44
|
+
const SCALA_CONFIG = {
|
|
45
|
+
fabricHeader: '// Fabric notebook source',
|
|
46
|
+
cellMarker: '// CELL ********************',
|
|
47
|
+
metadataMarker: '// METADATA ********************',
|
|
48
|
+
magicPrefix: '// MAGIC ',
|
|
49
|
+
magicSqlCommand: '// MAGIC %%sql',
|
|
50
|
+
emptyMagic: '// MAGIC',
|
|
51
|
+
supportsRawSql: false,
|
|
52
|
+
defaultLanguage: 'scala',
|
|
53
|
+
};
|
|
54
|
+
const SPARKSQL_CONFIG = {
|
|
55
|
+
fabricHeader: '-- Fabric notebook source',
|
|
56
|
+
cellMarker: '-- CELL ********************',
|
|
57
|
+
metadataMarker: '-- METADATA ********************',
|
|
58
|
+
magicPrefix: '-- MAGIC ',
|
|
59
|
+
magicSqlCommand: '-- MAGIC %%sql',
|
|
60
|
+
emptyMagic: '-- MAGIC',
|
|
61
|
+
supportsRawSql: true,
|
|
62
|
+
defaultLanguage: 'sparksql',
|
|
63
|
+
};
|
|
64
|
+
const R_CONFIG = {
|
|
65
|
+
...PYTHON_CONFIG, // R uses same comment syntax as Python
|
|
66
|
+
fabricHeader: '# Fabric notebook source', // Check if R has different header
|
|
67
|
+
defaultLanguage: 'r',
|
|
68
|
+
};
|
|
69
|
+
// ============================================================================
|
|
70
|
+
// INTERNAL UTILITIES
|
|
71
|
+
// ============================================================================
|
|
72
|
+
// ============================================================================
|
|
73
|
+
// INTERNAL UTILITIES
|
|
74
|
+
// ============================================================================
|
|
75
|
+
/**
|
|
76
|
+
* Detect line ending style.
|
|
77
|
+
*/
|
|
78
|
+
function detectLineEnding(content) {
|
|
79
|
+
if (content.includes('\r\n'))
|
|
80
|
+
return '\r\n';
|
|
81
|
+
return '\n';
|
|
82
|
+
}
|
|
83
|
+
/**
|
|
84
|
+
* Get language config based on file extension.
|
|
85
|
+
* The file extension determines the default language and comment syntax.
|
|
86
|
+
*/
|
|
87
|
+
function getLanguageConfig(fileExtension) {
|
|
88
|
+
const ext = fileExtension.toLowerCase();
|
|
89
|
+
switch (ext) {
|
|
90
|
+
case '.py':
|
|
91
|
+
return PYTHON_CONFIG;
|
|
92
|
+
case '.scala':
|
|
93
|
+
return SCALA_CONFIG;
|
|
94
|
+
case '.sql':
|
|
95
|
+
return SPARKSQL_CONFIG;
|
|
96
|
+
case '.r':
|
|
97
|
+
return R_CONFIG;
|
|
98
|
+
default:
|
|
99
|
+
return null;
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
/**
|
|
103
|
+
* Validate that the file content is a Fabric notebook.
|
|
104
|
+
* Checks that the first line matches the expected header for the config.
|
|
105
|
+
*/
|
|
106
|
+
function isFabricNotebookContent(firstLine, config) {
|
|
107
|
+
return firstLine.trim().startsWith(config.fabricHeader);
|
|
108
|
+
}
|
|
109
|
+
/**
|
|
110
|
+
* Extract the magic command from cell lines (e.g., 'sql', 'pyspark', 'configure').
|
|
111
|
+
* Returns null if no magic command is present.
|
|
112
|
+
*/
|
|
113
|
+
function extractMagicCommand(lines, config) {
|
|
114
|
+
for (const line of lines) {
|
|
115
|
+
const trimmed = line.trim();
|
|
116
|
+
// Look for magic command pattern: # MAGIC %%<command>
|
|
117
|
+
const magicPrefix = config.magicPrefix + '%%';
|
|
118
|
+
if (trimmed.startsWith(magicPrefix)) {
|
|
119
|
+
// Extract command name (everything after %% until space or end)
|
|
120
|
+
const afterPrefix = trimmed.slice(magicPrefix.length);
|
|
121
|
+
const match = afterPrefix.match(/^(\w+)/);
|
|
122
|
+
if (match) {
|
|
123
|
+
return match[1].toLowerCase();
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
return null;
|
|
128
|
+
}
|
|
129
|
+
/**
|
|
130
|
+
* Parse the METADATA block to extract the language.
|
|
131
|
+
* Returns the language string or null if not found.
|
|
132
|
+
*/
|
|
133
|
+
function parseMetadataLanguage(lines, startLine, config) {
|
|
134
|
+
// Find the METADATA marker
|
|
135
|
+
let i = startLine;
|
|
136
|
+
while (i < lines.length && lines[i].trim() !== config.metadataMarker) {
|
|
137
|
+
i++;
|
|
138
|
+
}
|
|
139
|
+
if (i >= lines.length)
|
|
140
|
+
return null;
|
|
141
|
+
// Parse the META JSON block
|
|
142
|
+
// Format: # META { ... "language": "sparksql" ... }
|
|
143
|
+
const metaPrefix = config.magicPrefix.replace('MAGIC', 'META').trim();
|
|
144
|
+
let jsonContent = '';
|
|
145
|
+
i++; // Skip the METADATA marker
|
|
146
|
+
while (i < lines.length) {
|
|
147
|
+
const trimmed = lines[i].trim();
|
|
148
|
+
if (trimmed === config.cellMarker || trimmed === config.metadataMarker) {
|
|
149
|
+
break;
|
|
150
|
+
}
|
|
151
|
+
// Extract content after # META prefix
|
|
152
|
+
if (trimmed.startsWith(metaPrefix)) {
|
|
153
|
+
jsonContent += trimmed.slice(metaPrefix.length).trim();
|
|
154
|
+
}
|
|
155
|
+
i++;
|
|
156
|
+
}
|
|
157
|
+
// Try to parse and extract language
|
|
158
|
+
try {
|
|
159
|
+
const meta = JSON.parse(jsonContent);
|
|
160
|
+
if (meta && typeof meta.language === 'string') {
|
|
161
|
+
return meta.language;
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
catch {
|
|
165
|
+
// JSON parsing failed, ignore
|
|
166
|
+
}
|
|
167
|
+
return null;
|
|
168
|
+
}
|
|
169
|
+
/**
|
|
170
|
+
* Detect the language of a cell based on magic commands.
|
|
171
|
+
* @deprecated Use parseMetadataLanguage instead - kept for backward compatibility
|
|
172
|
+
*/
|
|
173
|
+
function detectCellLanguage(lines, config) {
|
|
174
|
+
for (const line of lines) {
|
|
175
|
+
const trimmed = line.trim();
|
|
176
|
+
// Check for magic commands
|
|
177
|
+
if (trimmed === config.magicSqlCommand || trimmed.startsWith(config.magicPrefix + '%%sql')) {
|
|
178
|
+
return 'sparksql';
|
|
179
|
+
}
|
|
180
|
+
if (trimmed === config.magicPrefix + '%%python' || trimmed.startsWith(config.magicPrefix + '%%python ')) {
|
|
181
|
+
return 'python';
|
|
182
|
+
}
|
|
183
|
+
if (trimmed === config.magicPrefix + '%%pyspark' || trimmed.startsWith(config.magicPrefix + '%%pyspark ')) {
|
|
184
|
+
return 'python'; // PySpark is Python
|
|
185
|
+
}
|
|
186
|
+
if (trimmed === config.magicPrefix + '%%scala' || trimmed.startsWith(config.magicPrefix + '%%scala ')) {
|
|
187
|
+
return 'scala';
|
|
188
|
+
}
|
|
189
|
+
if (trimmed === config.magicPrefix + '%%r' || trimmed === config.magicPrefix + '%%R') {
|
|
190
|
+
return 'r';
|
|
191
|
+
}
|
|
192
|
+
// First non-empty, non-magic line determines if it's a raw cell
|
|
193
|
+
if (trimmed !== '' && !trimmed.startsWith(config.magicPrefix.trim())) {
|
|
194
|
+
return config.defaultLanguage;
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
return config.defaultLanguage;
|
|
198
|
+
}
|
|
199
|
+
/**
|
|
200
|
+
* Strip MAGIC prefix from a line.
|
|
201
|
+
*/
|
|
202
|
+
function stripMagicPrefix(line, config) {
|
|
203
|
+
if (line.startsWith(config.magicPrefix)) {
|
|
204
|
+
return line.slice(config.magicPrefix.length);
|
|
205
|
+
}
|
|
206
|
+
if (line.trim() === config.emptyMagic) {
|
|
207
|
+
return '';
|
|
208
|
+
}
|
|
209
|
+
return line;
|
|
210
|
+
}
|
|
211
|
+
/**
|
|
212
|
+
* Add MAGIC prefix to lines.
|
|
213
|
+
*/
|
|
214
|
+
function addMagicPrefix(content, config) {
|
|
215
|
+
return content.split(/\r?\n/).map(line => {
|
|
216
|
+
if (line === '')
|
|
217
|
+
return config.emptyMagic;
|
|
218
|
+
return config.magicPrefix + line;
|
|
219
|
+
});
|
|
220
|
+
}
|
|
221
|
+
/**
|
|
222
|
+
* Parse a Fabric notebook file into cells.
|
|
223
|
+
* @param content The file content
|
|
224
|
+
* @param fileExtension The file extension (e.g., '.py', '.sql', '.scala', '.r')
|
|
225
|
+
*/
|
|
226
|
+
export function parseNotebook(content, fileExtension) {
|
|
227
|
+
const lines = content.split(/\r?\n/);
|
|
228
|
+
const result = {
|
|
229
|
+
isFabricNotebook: false,
|
|
230
|
+
cells: [],
|
|
231
|
+
lines,
|
|
232
|
+
config: null,
|
|
233
|
+
};
|
|
234
|
+
if (lines.length === 0)
|
|
235
|
+
return result;
|
|
236
|
+
const config = getLanguageConfig(fileExtension);
|
|
237
|
+
if (!config)
|
|
238
|
+
return result;
|
|
239
|
+
// Validate it's actually a Fabric notebook
|
|
240
|
+
if (!isFabricNotebookContent(lines[0], config))
|
|
241
|
+
return result;
|
|
242
|
+
result.isFabricNotebook = true;
|
|
243
|
+
result.config = config;
|
|
244
|
+
let i = 0;
|
|
245
|
+
while (i < lines.length) {
|
|
246
|
+
if (lines[i].trim() === config.cellMarker) {
|
|
247
|
+
const cellStartLine = i + 1;
|
|
248
|
+
// Skip empty lines after CELL marker
|
|
249
|
+
let j = cellStartLine;
|
|
250
|
+
while (j < lines.length && lines[j].trim() === '') {
|
|
251
|
+
j++;
|
|
252
|
+
}
|
|
253
|
+
// Find end of cell
|
|
254
|
+
let cellEndLine = j;
|
|
255
|
+
while (cellEndLine < lines.length) {
|
|
256
|
+
const trimmed = lines[cellEndLine].trim();
|
|
257
|
+
if (trimmed === config.metadataMarker || trimmed === config.cellMarker) {
|
|
258
|
+
break;
|
|
259
|
+
}
|
|
260
|
+
cellEndLine++;
|
|
261
|
+
}
|
|
262
|
+
// Back up past trailing empty lines
|
|
263
|
+
let actualEndLine = cellEndLine - 1;
|
|
264
|
+
while (actualEndLine >= j && lines[actualEndLine].trim() === '') {
|
|
265
|
+
actualEndLine--;
|
|
266
|
+
}
|
|
267
|
+
if (actualEndLine >= j) {
|
|
268
|
+
const originalLines = lines.slice(j, actualEndLine + 1);
|
|
269
|
+
// Get language from METADATA block (authoritative source)
|
|
270
|
+
const metadataLanguage = parseMetadataLanguage(lines, cellEndLine, config);
|
|
271
|
+
// Map metadata language to our internal language names
|
|
272
|
+
let language;
|
|
273
|
+
if (metadataLanguage === 'sparksql') {
|
|
274
|
+
language = 'sparksql';
|
|
275
|
+
}
|
|
276
|
+
else if (metadataLanguage === 'python' || metadataLanguage === 'pyspark') {
|
|
277
|
+
language = 'python';
|
|
278
|
+
}
|
|
279
|
+
else if (metadataLanguage === 'scala') {
|
|
280
|
+
language = 'scala';
|
|
281
|
+
}
|
|
282
|
+
else if (metadataLanguage === 'r' || metadataLanguage === 'R') {
|
|
283
|
+
language = 'r';
|
|
284
|
+
}
|
|
285
|
+
else if (metadataLanguage) {
|
|
286
|
+
language = metadataLanguage;
|
|
287
|
+
}
|
|
288
|
+
else {
|
|
289
|
+
// Fallback to magic-based detection if no metadata
|
|
290
|
+
language = detectCellLanguage(originalLines, config);
|
|
291
|
+
}
|
|
292
|
+
// Extract magic command (e.g., 'sql', 'pyspark', 'configure')
|
|
293
|
+
const magicCommand = extractMagicCommand(originalLines, config);
|
|
294
|
+
// Check if it's a MAGIC cell
|
|
295
|
+
const isMagicCell = originalLines.some(l => l.trim().startsWith(config.magicPrefix.trim()));
|
|
296
|
+
// Extract content
|
|
297
|
+
let content;
|
|
298
|
+
let contentStartLine = j;
|
|
299
|
+
if (isMagicCell) {
|
|
300
|
+
// Skip the magic command line (%%sql, %%python, etc.)
|
|
301
|
+
const magicCommandIndex = originalLines.findIndex(l => l.trim().startsWith(config.magicPrefix + '%%'));
|
|
302
|
+
if (magicCommandIndex >= 0) {
|
|
303
|
+
const contentLines = originalLines.slice(magicCommandIndex + 1);
|
|
304
|
+
content = contentLines.map(l => stripMagicPrefix(l, config)).join('\n');
|
|
305
|
+
contentStartLine = j + magicCommandIndex + 1;
|
|
306
|
+
}
|
|
307
|
+
else {
|
|
308
|
+
content = originalLines.map(l => stripMagicPrefix(l, config)).join('\n');
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
else {
|
|
312
|
+
content = originalLines.join('\n');
|
|
313
|
+
}
|
|
314
|
+
result.cells.push({
|
|
315
|
+
contentStartLine: contentStartLine,
|
|
316
|
+
contentEndLine: actualEndLine,
|
|
317
|
+
content,
|
|
318
|
+
originalLines,
|
|
319
|
+
language,
|
|
320
|
+
isMagicCell,
|
|
321
|
+
isRawCell: !isMagicCell,
|
|
322
|
+
magicCommand,
|
|
323
|
+
});
|
|
324
|
+
}
|
|
325
|
+
i = cellEndLine;
|
|
326
|
+
continue;
|
|
327
|
+
}
|
|
328
|
+
i++;
|
|
329
|
+
}
|
|
330
|
+
return result;
|
|
331
|
+
}
|
|
332
|
+
/**
|
|
333
|
+
* Replace a cell's content in the file.
|
|
334
|
+
*/
|
|
335
|
+
function replaceCell(fileContent, cell, formattedContent, config, lineEnding) {
|
|
336
|
+
const lines = fileContent.split(/\r?\n/);
|
|
337
|
+
let newLines;
|
|
338
|
+
if (cell.isMagicCell) {
|
|
339
|
+
newLines = addMagicPrefix(formattedContent, config);
|
|
340
|
+
}
|
|
341
|
+
else {
|
|
342
|
+
newLines = formattedContent.split(/\r?\n/);
|
|
343
|
+
}
|
|
344
|
+
const before = lines.slice(0, cell.contentStartLine);
|
|
345
|
+
const after = lines.slice(cell.contentEndLine + 1);
|
|
346
|
+
return [...before, ...newLines, ...after].join(lineEnding);
|
|
347
|
+
}
|
|
348
|
+
/**
|
|
349
|
+
* Format all cells in a Fabric notebook.
|
|
350
|
+
*
|
|
351
|
+
* This is the high-level API for formatting entire notebook files.
|
|
352
|
+
* It parses the notebook structure, extracts cells, formats them using
|
|
353
|
+
* the low-level formatCell API, and reassembles with proper comment wrappers.
|
|
354
|
+
*
|
|
355
|
+
* @param content The file content
|
|
356
|
+
* @param fileExtension The file extension (e.g., '.py', '.sql', '.scala', '.r')
|
|
357
|
+
* @param options Formatting options
|
|
358
|
+
* @returns Object with formatted content and statistics
|
|
359
|
+
*/
|
|
360
|
+
export async function formatNotebook(content, fileExtension, options) {
|
|
361
|
+
const formatSparkSqlCells = options?.formatSql ?? true;
|
|
362
|
+
const formatPythonCells = options?.formatPython ?? true;
|
|
363
|
+
const stats = {
|
|
364
|
+
sparkSqlCellsFormatted: 0,
|
|
365
|
+
pythonCellsFormatted: 0,
|
|
366
|
+
cellsSkipped: 0,
|
|
367
|
+
errors: [],
|
|
368
|
+
};
|
|
369
|
+
const notebook = parseNotebook(content, fileExtension);
|
|
370
|
+
if (!notebook.isFabricNotebook || notebook.cells.length === 0 || !notebook.config) {
|
|
371
|
+
return { content, stats };
|
|
372
|
+
}
|
|
373
|
+
const lineEnding = detectLineEnding(content);
|
|
374
|
+
// Initialize Python formatter if needed
|
|
375
|
+
if (formatPythonCells) {
|
|
376
|
+
try {
|
|
377
|
+
await initializePythonFormatter();
|
|
378
|
+
}
|
|
379
|
+
catch (error) {
|
|
380
|
+
stats.errors.push(`Python formatter init failed: ${error}`);
|
|
381
|
+
}
|
|
382
|
+
}
|
|
383
|
+
// Process cells in reverse order (to preserve line numbers)
|
|
384
|
+
let result = content;
|
|
385
|
+
const cellsReversed = [...notebook.cells].reverse();
|
|
386
|
+
for (const cell of cellsReversed) {
|
|
387
|
+
// Determine if this cell should be formatted based on language and magic command
|
|
388
|
+
// Spark SQL cells: format only if %%sql magic or no magic command
|
|
389
|
+
// Python cells: format only if %%pyspark magic or no magic command
|
|
390
|
+
const magicCmd = cell.magicCommand;
|
|
391
|
+
const shouldFormatSparkSql = cell.language === 'sparksql' && formatSparkSqlCells &&
|
|
392
|
+
(magicCmd === null || magicCmd === 'sql');
|
|
393
|
+
const shouldFormatPython = cell.language === 'python' && formatPythonCells &&
|
|
394
|
+
(magicCmd === null || magicCmd === 'pyspark');
|
|
395
|
+
if (shouldFormatSparkSql) {
|
|
396
|
+
// Format using low-level API (cell.content is already stripped of MAGIC prefixes)
|
|
397
|
+
const formatResult = formatCell(cell.content, 'sparksql');
|
|
398
|
+
if (formatResult.changed) {
|
|
399
|
+
// replaceCell will add back MAGIC prefixes if needed
|
|
400
|
+
result = replaceCell(result, cell, formatResult.formatted, notebook.config, lineEnding);
|
|
401
|
+
stats.sparkSqlCellsFormatted++;
|
|
402
|
+
}
|
|
403
|
+
if (formatResult.error) {
|
|
404
|
+
stats.errors.push(formatResult.error);
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
else if (shouldFormatPython) {
|
|
408
|
+
// Format using low-level API (cell.content is already stripped of MAGIC prefixes)
|
|
409
|
+
const formatResult = formatCell(cell.content, 'python');
|
|
410
|
+
if (formatResult.changed) {
|
|
411
|
+
// replaceCell will add back MAGIC prefixes if needed
|
|
412
|
+
result = replaceCell(result, cell, formatResult.formatted, notebook.config, lineEnding);
|
|
413
|
+
stats.pythonCellsFormatted++;
|
|
414
|
+
}
|
|
415
|
+
if (formatResult.error) {
|
|
416
|
+
stats.errors.push(formatResult.error);
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
else {
|
|
420
|
+
stats.cellsSkipped++;
|
|
421
|
+
}
|
|
422
|
+
}
|
|
423
|
+
return { content: result, stats };
|
|
424
|
+
}
|
package/package.json
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@jacobknightley/fabric-format",
|
|
3
|
+
"version": "0.0.1",
|
|
4
|
+
"type": "module",
|
|
5
|
+
"description": "A fast, opinionated formatter for Microsoft Fabric notebooks with Spark SQL and Python support",
|
|
6
|
+
"main": "dist/index.js",
|
|
7
|
+
"exports": {
|
|
8
|
+
".": {
|
|
9
|
+
"import": "./dist/index.js",
|
|
10
|
+
"types": "./dist/index.d.ts"
|
|
11
|
+
}
|
|
12
|
+
},
|
|
13
|
+
"types": "dist/index.d.ts",
|
|
14
|
+
"bin": {
|
|
15
|
+
"fabfmt": "dist/cli.js"
|
|
16
|
+
},
|
|
17
|
+
"files": [
|
|
18
|
+
"dist/**/*.js",
|
|
19
|
+
"dist/**/*.d.ts",
|
|
20
|
+
"!dist/tests/**",
|
|
21
|
+
"README.md"
|
|
22
|
+
],
|
|
23
|
+
"keywords": [
|
|
24
|
+
"fabric",
|
|
25
|
+
"microsoft-fabric",
|
|
26
|
+
"spark",
|
|
27
|
+
"sql",
|
|
28
|
+
"python",
|
|
29
|
+
"pyspark",
|
|
30
|
+
"formatter",
|
|
31
|
+
"notebook",
|
|
32
|
+
"delta-lake",
|
|
33
|
+
"antlr",
|
|
34
|
+
"ruff"
|
|
35
|
+
],
|
|
36
|
+
"repository": {
|
|
37
|
+
"type": "git",
|
|
38
|
+
"url": "https://github.com/jacobknightley/fabric-format.git"
|
|
39
|
+
},
|
|
40
|
+
"homepage": "https://github.com/jacobknightley/fabric-format#readme",
|
|
41
|
+
"bugs": {
|
|
42
|
+
"url": "https://github.com/jacobknightley/fabric-format/issues"
|
|
43
|
+
},
|
|
44
|
+
"author": "Jacob Knightley",
|
|
45
|
+
"license": "MIT",
|
|
46
|
+
"scripts": {
|
|
47
|
+
"build:antlr": "python .build/build_antlr_js.py",
|
|
48
|
+
"build:ts": "tsc",
|
|
49
|
+
"build:readme": "node -e \"require('fs').copyFileSync('../../README.md', './README.md')\"",
|
|
50
|
+
"build": "npm run build:readme && npm run build:antlr && npm run build:ts",
|
|
51
|
+
"test": "node dist/tests/index.js",
|
|
52
|
+
"test:verbose": "node dist/tests/index.js --verbose",
|
|
53
|
+
"format": "node dist/cli.js"
|
|
54
|
+
},
|
|
55
|
+
"devDependencies": {
|
|
56
|
+
"@types/node": "^20.10.0",
|
|
57
|
+
"typescript": "^5.3.0"
|
|
58
|
+
},
|
|
59
|
+
"dependencies": {
|
|
60
|
+
"@astral-sh/ruff-wasm-web": "^0.14.10",
|
|
61
|
+
"antlr4": "^4.13.2"
|
|
62
|
+
}
|
|
63
|
+
}
|