@jacobknightley/fabric-format 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/README.md +196 -0
  2. package/dist/cell-formatter.d.ts +75 -0
  3. package/dist/cell-formatter.js +144 -0
  4. package/dist/cli.d.ts +2 -0
  5. package/dist/cli.js +435 -0
  6. package/dist/formatters/index.d.ts +19 -0
  7. package/dist/formatters/index.js +76 -0
  8. package/dist/formatters/python/config.d.ts +33 -0
  9. package/dist/formatters/python/config.js +29 -0
  10. package/dist/formatters/python/index.d.ts +7 -0
  11. package/dist/formatters/python/index.js +13 -0
  12. package/dist/formatters/python/python-formatter.d.ts +51 -0
  13. package/dist/formatters/python/python-formatter.js +180 -0
  14. package/dist/formatters/sparksql/constants.d.ts +16 -0
  15. package/dist/formatters/sparksql/constants.js +16 -0
  16. package/dist/formatters/sparksql/fmt-detector.d.ts +65 -0
  17. package/dist/formatters/sparksql/fmt-detector.js +84 -0
  18. package/dist/formatters/sparksql/formatter.d.ts +24 -0
  19. package/dist/formatters/sparksql/formatter.js +1276 -0
  20. package/dist/formatters/sparksql/formatting-context.d.ts +154 -0
  21. package/dist/formatters/sparksql/formatting-context.js +363 -0
  22. package/dist/formatters/sparksql/generated/SqlBaseLexer.d.ts +529 -0
  23. package/dist/formatters/sparksql/generated/SqlBaseLexer.js +2609 -0
  24. package/dist/formatters/sparksql/generated/SqlBaseParser.d.ts +8195 -0
  25. package/dist/formatters/sparksql/generated/SqlBaseParser.js +48793 -0
  26. package/dist/formatters/sparksql/generated/SqlBaseParserListener.d.ts +910 -0
  27. package/dist/formatters/sparksql/generated/SqlBaseParserListener.js +2730 -0
  28. package/dist/formatters/sparksql/generated/SqlBaseParserVisitor.d.ts +456 -0
  29. package/dist/formatters/sparksql/generated/SqlBaseParserVisitor.js +1822 -0
  30. package/dist/formatters/sparksql/generated/builtinFunctions.d.ts +8 -0
  31. package/dist/formatters/sparksql/generated/builtinFunctions.js +510 -0
  32. package/dist/formatters/sparksql/index.d.ts +11 -0
  33. package/dist/formatters/sparksql/index.js +22 -0
  34. package/dist/formatters/sparksql/output-builder.d.ts +89 -0
  35. package/dist/formatters/sparksql/output-builder.js +191 -0
  36. package/dist/formatters/sparksql/parse-tree-analyzer.d.ts +264 -0
  37. package/dist/formatters/sparksql/parse-tree-analyzer.js +1956 -0
  38. package/dist/formatters/sparksql/sql-formatter.d.ts +25 -0
  39. package/dist/formatters/sparksql/sql-formatter.js +56 -0
  40. package/dist/formatters/sparksql/token-utils.d.ts +68 -0
  41. package/dist/formatters/sparksql/token-utils.js +155 -0
  42. package/dist/formatters/sparksql/types.d.ts +264 -0
  43. package/dist/formatters/sparksql/types.js +7 -0
  44. package/dist/formatters/types.d.ts +57 -0
  45. package/dist/formatters/types.js +7 -0
  46. package/dist/index.d.ts +18 -0
  47. package/dist/index.js +41 -0
  48. package/dist/notebook-formatter.d.ts +107 -0
  49. package/dist/notebook-formatter.js +424 -0
  50. package/package.json +63 -0
@@ -0,0 +1,180 @@
1
+ /**
2
+ * Python Formatter
3
+ *
4
+ * Uses Ruff WASM to format Python/PySpark code.
5
+ * Handles Jupyter/IPython magic commands by preserving them.
6
+ */
7
+ import { RUFF_WASM_CONFIG } from './config.js';
8
+ // Dynamic import for ruff WASM (loaded on demand)
9
+ let ruffModule = null;
10
+ let workspace = null;
11
+ /**
12
+ * Python formatter using Ruff WASM.
13
+ */
14
+ export class PythonFormatter {
15
+ language = 'python';
16
+ displayName = 'Python (Ruff)';
17
+ initialized = false;
18
+ initError = null;
19
+ wasmOptions;
20
+ /**
21
+ * Create a new Python formatter.
22
+ * @param options - Optional WASM initialization options for browser environments
23
+ */
24
+ constructor(options) {
25
+ this.wasmOptions = options;
26
+ }
27
+ isReady() {
28
+ return this.initialized && !this.initError;
29
+ }
30
+ async initialize() {
31
+ if (this.initialized)
32
+ return;
33
+ try {
34
+ // Dynamic import of ruff WASM
35
+ ruffModule = await import('@astral-sh/ruff-wasm-web');
36
+ // Initialize WASM module - this must be called before using any classes
37
+ // The default export is the init function that loads the .wasm binary
38
+ if (this.wasmOptions?.wasmBinary) {
39
+ // Use synchronous initialization with provided binary
40
+ ruffModule.initSync({ module: this.wasmOptions.wasmBinary });
41
+ }
42
+ else if (this.wasmOptions?.wasmUrl) {
43
+ // Use async initialization with provided URL
44
+ await ruffModule.default({ module_or_path: this.wasmOptions.wasmUrl });
45
+ }
46
+ else {
47
+ // Default: let ruff-wasm-web use import.meta.url to find the WASM file
48
+ // This works in Node.js and ESM environments but may fail in bundled IIFE
49
+ await ruffModule.default();
50
+ }
51
+ // Create workspace with config
52
+ // Note: ruff WASM prints debug info to stdout during Workspace creation
53
+ // We suppress this by temporarily replacing stdout.write (Node.js only)
54
+ const hasProcess = typeof process !== 'undefined' && process.stdout?.write;
55
+ const originalWrite = hasProcess ? process.stdout.write.bind(process.stdout) : null;
56
+ if (originalWrite) {
57
+ process.stdout.write = () => true; // Suppress output
58
+ }
59
+ try {
60
+ workspace = new ruffModule.Workspace(RUFF_WASM_CONFIG, ruffModule.PositionEncoding.Utf32);
61
+ }
62
+ finally {
63
+ if (originalWrite) {
64
+ process.stdout.write = originalWrite; // Restore output
65
+ }
66
+ }
67
+ this.initialized = true;
68
+ }
69
+ catch (error) {
70
+ this.initError = error instanceof Error ? error.message : String(error);
71
+ throw new Error(`Failed to initialize Python formatter: ${this.initError}`);
72
+ }
73
+ }
74
+ format(code, options) {
75
+ if (!this.isReady() || !workspace) {
76
+ return {
77
+ formatted: code,
78
+ changed: false,
79
+ error: this.initError ?? 'Python formatter not initialized'
80
+ };
81
+ }
82
+ try {
83
+ // Check if the cell starts with a cell magic (%%magic)
84
+ // %%pyspark and %%python contain Python code - format everything after the magic line
85
+ // Other cell magics (%%sql, %%scala, %%r, %%sh, etc.) are not Python - return as-is
86
+ const cellMagicMatch = code.match(/^(%%(\w+).*)\n?/);
87
+ if (cellMagicMatch) {
88
+ const magicLine = cellMagicMatch[1];
89
+ const magicType = cellMagicMatch[2].toLowerCase();
90
+ // Only format Python-based cell magics
91
+ if (magicType === 'pyspark' || magicType === 'python') {
92
+ // Extract the code after the magic line
93
+ const codeAfterMagic = code.slice(cellMagicMatch[0].length);
94
+ if (!codeAfterMagic.trim()) {
95
+ return { formatted: code, changed: false };
96
+ }
97
+ // Format the Python code
98
+ let formatted = workspace.format(codeAfterMagic);
99
+ // Strip trailing newline if configured
100
+ if (options?.stripTrailingNewline) {
101
+ formatted = formatted.replace(/\n+$/, '');
102
+ }
103
+ // Recombine with magic line
104
+ const result = magicLine + '\n' + formatted;
105
+ return { formatted: result, changed: result !== code };
106
+ }
107
+ // Non-Python cell magics - return as-is
108
+ return { formatted: code, changed: false };
109
+ }
110
+ // Handle line magics (%magic) at the start of lines
111
+ const lines = code.split('\n');
112
+ const magicPrefix = [];
113
+ let pythonStartIndex = 0;
114
+ // Collect leading line magics and comments
115
+ for (let i = 0; i < lines.length; i++) {
116
+ const trimmed = lines[i].trim();
117
+ if (trimmed.startsWith('%') || trimmed.startsWith('#') || trimmed === '') {
118
+ magicPrefix.push(lines[i]);
119
+ pythonStartIndex = i + 1;
120
+ }
121
+ else {
122
+ break;
123
+ }
124
+ }
125
+ // If entire code is magics/comments, return as-is
126
+ if (pythonStartIndex >= lines.length) {
127
+ return { formatted: code, changed: false };
128
+ }
129
+ // Extract Python code to format
130
+ const pythonCode = lines.slice(pythonStartIndex).join('\n');
131
+ // Format the Python portion
132
+ let formatted = workspace.format(pythonCode);
133
+ // Post-processing: Strip trailing newline if configured
134
+ if (options?.stripTrailingNewline) {
135
+ formatted = formatted.replace(/\n+$/, '');
136
+ }
137
+ // Recombine with magic prefix
138
+ if (magicPrefix.length > 0) {
139
+ formatted = magicPrefix.join('\n') + '\n' + formatted;
140
+ }
141
+ const changed = formatted !== code;
142
+ return { formatted, changed };
143
+ }
144
+ catch (error) {
145
+ return {
146
+ formatted: code,
147
+ changed: false,
148
+ error: error instanceof Error ? error.message : String(error)
149
+ };
150
+ }
151
+ }
152
+ needsFormatting(code, options) {
153
+ const result = this.format(code, options);
154
+ return result.changed;
155
+ }
156
+ }
157
+ /**
158
+ * Detect if a cell/file is Python/PySpark.
159
+ */
160
+ export function isPythonCode(cellType) {
161
+ return cellType === 'python' || cellType === 'pyspark';
162
+ }
163
+ /** Singleton instance */
164
+ let pythonFormatterInstance = null;
165
+ /**
166
+ * Get the Python formatter instance (creates on first call).
167
+ * @param options - Optional WASM initialization options. Only used on first call.
168
+ */
169
+ export function getPythonFormatter(options) {
170
+ if (!pythonFormatterInstance) {
171
+ pythonFormatterInstance = new PythonFormatter(options);
172
+ }
173
+ return pythonFormatterInstance;
174
+ }
175
+ /**
176
+ * Reset the Python formatter instance (for testing or reinitialization with different options).
177
+ */
178
+ export function resetPythonFormatter() {
179
+ pythonFormatterInstance = null;
180
+ }
@@ -0,0 +1,16 @@
1
+ /**
2
+ * Formatting Constants
3
+ *
4
+ * Central location for configurable formatting thresholds and limits.
5
+ * These values control line-width based expansion decisions.
6
+ *
7
+ * Line width checks use: currentColumn + expressionSpan > threshold
8
+ * This ensures the FULL LINE (including indentation) stays under the limit,
9
+ * consistent with formatters like ruff, prettier, etc.
10
+ */
11
+ /**
12
+ * Maximum desired line width.
13
+ * Expressions are expanded to multiple lines if they would cause the
14
+ * full line (including indentation) to exceed this width.
15
+ */
16
+ export declare const MAX_LINE_WIDTH = 140;
@@ -0,0 +1,16 @@
1
+ /**
2
+ * Formatting Constants
3
+ *
4
+ * Central location for configurable formatting thresholds and limits.
5
+ * These values control line-width based expansion decisions.
6
+ *
7
+ * Line width checks use: currentColumn + expressionSpan > threshold
8
+ * This ensures the FULL LINE (including indentation) stays under the limit,
9
+ * consistent with formatters like ruff, prettier, etc.
10
+ */
11
+ /**
12
+ * Maximum desired line width.
13
+ * Expressions are expanded to multiple lines if they would cause the
14
+ * full line (including indentation) to exceed this width.
15
+ */
16
+ export const MAX_LINE_WIDTH = 140;
@@ -0,0 +1,65 @@
1
+ /**
2
+ * Format Directive Detection - Identifies formatting suppression directives
3
+ *
4
+ * Supports two types of format directives:
5
+ * 1. Statement-level: "-- fmt: off" or block comment at start of statement
6
+ * - Bypasses all formatting for the entire statement
7
+ * 2. Line-level inline: "-- fmt: inline" or block comment version
8
+ * - Suppresses multi-line expansion while keeping other formatting
9
+ */
10
+ /**
11
+ * A token range that should be forced inline (no expansion).
12
+ * Used for fmt:inline directive processing.
13
+ */
14
+ export interface ForceInlineRange {
15
+ /** Opening token index (e.g., LEFT_PAREN of function) */
16
+ openTokenIndex: number;
17
+ /** Closing token index (e.g., RIGHT_PAREN of function) */
18
+ closeTokenIndex: number;
19
+ }
20
+ /**
21
+ * Information about format directives in the SQL.
22
+ */
23
+ export interface FormatDirectiveInfo {
24
+ /** Set of 1-based line numbers with fmt:inline directives (legacy, for backward compat) */
25
+ collapsedLines: Set<number>;
26
+ /** Token ranges that should be forced inline (grammar-driven approach) */
27
+ forceInlineRanges: ForceInlineRange[];
28
+ }
29
+ /**
30
+ * Check if a statement starts with a fmt:off directive (full bypass).
31
+ *
32
+ * @param statement - The SQL statement to check
33
+ * @returns true if the statement should bypass formatting entirely
34
+ */
35
+ export declare function hasFormatOff(statement: string): boolean;
36
+ /**
37
+ * Detect all fmt:inline directives in a SQL string.
38
+ *
39
+ * @param sql - The SQL string to scan
40
+ * @returns FormatDirectiveInfo with line numbers that have inline directives
41
+ */
42
+ export declare function detectCollapseDirectives(sql: string): FormatDirectiveInfo;
43
+ /**
44
+ * Check if a specific line has an inline directive.
45
+ *
46
+ * @param formatDirectives - The FormatDirectiveInfo from detectCollapseDirectives
47
+ * @param lineNumber - 1-based line number to check
48
+ * @returns true if the line has fmt:inline
49
+ */
50
+ export declare function hasCollapseDirective(formatDirectives: FormatDirectiveInfo, lineNumber: number): boolean;
51
+ /**
52
+ * Check if a comment text contains a fmt:inline directive.
53
+ *
54
+ * @param commentText - The comment text to check (including -- or /* markers)
55
+ * @returns true if the comment contains fmt:inline
56
+ */
57
+ export declare function isFmtInlineComment(commentText: string): boolean;
58
+ /**
59
+ * Check if a token index falls within any force-inline range.
60
+ *
61
+ * @param tokenIndex - The token index to check
62
+ * @param ranges - Array of force-inline ranges
63
+ * @returns true if the token is within a force-inline range
64
+ */
65
+ export declare function isInForceInlineRange(tokenIndex: number, ranges: ForceInlineRange[]): boolean;
@@ -0,0 +1,84 @@
1
+ /**
2
+ * Format Directive Detection - Identifies formatting suppression directives
3
+ *
4
+ * Supports two types of format directives:
5
+ * 1. Statement-level: "-- fmt: off" or block comment at start of statement
6
+ * - Bypasses all formatting for the entire statement
7
+ * 2. Line-level inline: "-- fmt: inline" or block comment version
8
+ * - Suppresses multi-line expansion while keeping other formatting
9
+ */
10
+ // ============================================================================
11
+ // REGEX PATTERNS
12
+ // ============================================================================
13
+ /**
14
+ * Pattern to detect statement-level fmt:off at the start of a statement.
15
+ * Matches: "-- fmt: off" or "-- fmt:off" or block comment version (case-insensitive)
16
+ */
17
+ const STATEMENT_OFF_PATTERN = /^\s*(?:--\s*fmt\s*:\s*off\s*$|--\s*fmt\s*:\s*off\s+|\/\*\s*fmt\s*:\s*off\s*\*\/)/i;
18
+ /**
19
+ * Pattern to detect line-level fmt:inline anywhere on a line.
20
+ * Matches: "-- fmt: inline" or "-- fmt:inline" or block comment version (case-insensitive)
21
+ */
22
+ const COLLAPSE_PATTERN = /(?:--\s*fmt\s*:\s*inline|\/\*\s*fmt\s*:\s*inline\s*\*\/)/i;
23
+ // ============================================================================
24
+ // PUBLIC API
25
+ // ============================================================================
26
+ /**
27
+ * Check if a statement starts with a fmt:off directive (full bypass).
28
+ *
29
+ * @param statement - The SQL statement to check
30
+ * @returns true if the statement should bypass formatting entirely
31
+ */
32
+ export function hasFormatOff(statement) {
33
+ return STATEMENT_OFF_PATTERN.test(statement);
34
+ }
35
+ /**
36
+ * Detect all fmt:inline directives in a SQL string.
37
+ *
38
+ * @param sql - The SQL string to scan
39
+ * @returns FormatDirectiveInfo with line numbers that have inline directives
40
+ */
41
+ export function detectCollapseDirectives(sql) {
42
+ const collapsedLines = new Set();
43
+ const lines = sql.split('\n');
44
+ for (let i = 0; i < lines.length; i++) {
45
+ if (COLLAPSE_PATTERN.test(lines[i])) {
46
+ collapsedLines.add(i + 1); // 1-based line numbers
47
+ }
48
+ }
49
+ return { collapsedLines, forceInlineRanges: [] };
50
+ }
51
+ /**
52
+ * Check if a specific line has an inline directive.
53
+ *
54
+ * @param formatDirectives - The FormatDirectiveInfo from detectCollapseDirectives
55
+ * @param lineNumber - 1-based line number to check
56
+ * @returns true if the line has fmt:inline
57
+ */
58
+ export function hasCollapseDirective(formatDirectives, lineNumber) {
59
+ return formatDirectives.collapsedLines.has(lineNumber);
60
+ }
61
+ /**
62
+ * Check if a comment text contains a fmt:inline directive.
63
+ *
64
+ * @param commentText - The comment text to check (including -- or /* markers)
65
+ * @returns true if the comment contains fmt:inline
66
+ */
67
+ export function isFmtInlineComment(commentText) {
68
+ return COLLAPSE_PATTERN.test(commentText);
69
+ }
70
+ /**
71
+ * Check if a token index falls within any force-inline range.
72
+ *
73
+ * @param tokenIndex - The token index to check
74
+ * @param ranges - Array of force-inline ranges
75
+ * @returns true if the token is within a force-inline range
76
+ */
77
+ export function isInForceInlineRange(tokenIndex, ranges) {
78
+ for (const range of ranges) {
79
+ if (tokenIndex >= range.openTokenIndex && tokenIndex <= range.closeTokenIndex) {
80
+ return true;
81
+ }
82
+ }
83
+ return false;
84
+ }
@@ -0,0 +1,24 @@
1
+ /**
2
+ * Spark SQL Formatter - Main Entry Point
3
+ *
4
+ * This is the 100% grammar-driven SQL formatter for Apache Spark SQL.
5
+ * NO HARDCODED KEYWORD, FUNCTION, OR CLAUSE LISTS.
6
+ * Everything derived from ANTLR lexer symbolicNames and parse tree context.
7
+ *
8
+ * Architecture:
9
+ * - types.ts: TypeScript interfaces
10
+ * - token-utils.ts: Grammar-derived token detection
11
+ * - parse-tree-analyzer.ts: AST visitor that collects formatting context
12
+ * - formatting-context.ts: State management during formatting
13
+ * - output-builder.ts: Output construction with column tracking
14
+ * - formatter.ts (this file): Main orchestration
15
+ */
16
+ /**
17
+ * Format SQL - Main entry point.
18
+ * Handles magic commands, semicolon-separated statements, and formatting.
19
+ */
20
+ export declare function formatSql(sql: string): string;
21
+ /**
22
+ * Check if SQL needs formatting.
23
+ */
24
+ export declare function needsFormatting(sql: string): boolean;