@jacobknightley/fabric-format 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/README.md +196 -0
  2. package/dist/cell-formatter.d.ts +75 -0
  3. package/dist/cell-formatter.js +144 -0
  4. package/dist/cli.d.ts +2 -0
  5. package/dist/cli.js +435 -0
  6. package/dist/formatters/index.d.ts +19 -0
  7. package/dist/formatters/index.js +76 -0
  8. package/dist/formatters/python/config.d.ts +33 -0
  9. package/dist/formatters/python/config.js +29 -0
  10. package/dist/formatters/python/index.d.ts +7 -0
  11. package/dist/formatters/python/index.js +13 -0
  12. package/dist/formatters/python/python-formatter.d.ts +51 -0
  13. package/dist/formatters/python/python-formatter.js +180 -0
  14. package/dist/formatters/sparksql/constants.d.ts +16 -0
  15. package/dist/formatters/sparksql/constants.js +16 -0
  16. package/dist/formatters/sparksql/fmt-detector.d.ts +65 -0
  17. package/dist/formatters/sparksql/fmt-detector.js +84 -0
  18. package/dist/formatters/sparksql/formatter.d.ts +24 -0
  19. package/dist/formatters/sparksql/formatter.js +1276 -0
  20. package/dist/formatters/sparksql/formatting-context.d.ts +154 -0
  21. package/dist/formatters/sparksql/formatting-context.js +363 -0
  22. package/dist/formatters/sparksql/generated/SqlBaseLexer.d.ts +529 -0
  23. package/dist/formatters/sparksql/generated/SqlBaseLexer.js +2609 -0
  24. package/dist/formatters/sparksql/generated/SqlBaseParser.d.ts +8195 -0
  25. package/dist/formatters/sparksql/generated/SqlBaseParser.js +48793 -0
  26. package/dist/formatters/sparksql/generated/SqlBaseParserListener.d.ts +910 -0
  27. package/dist/formatters/sparksql/generated/SqlBaseParserListener.js +2730 -0
  28. package/dist/formatters/sparksql/generated/SqlBaseParserVisitor.d.ts +456 -0
  29. package/dist/formatters/sparksql/generated/SqlBaseParserVisitor.js +1822 -0
  30. package/dist/formatters/sparksql/generated/builtinFunctions.d.ts +8 -0
  31. package/dist/formatters/sparksql/generated/builtinFunctions.js +510 -0
  32. package/dist/formatters/sparksql/index.d.ts +11 -0
  33. package/dist/formatters/sparksql/index.js +22 -0
  34. package/dist/formatters/sparksql/output-builder.d.ts +89 -0
  35. package/dist/formatters/sparksql/output-builder.js +191 -0
  36. package/dist/formatters/sparksql/parse-tree-analyzer.d.ts +264 -0
  37. package/dist/formatters/sparksql/parse-tree-analyzer.js +1956 -0
  38. package/dist/formatters/sparksql/sql-formatter.d.ts +25 -0
  39. package/dist/formatters/sparksql/sql-formatter.js +56 -0
  40. package/dist/formatters/sparksql/token-utils.d.ts +68 -0
  41. package/dist/formatters/sparksql/token-utils.js +155 -0
  42. package/dist/formatters/sparksql/types.d.ts +264 -0
  43. package/dist/formatters/sparksql/types.js +7 -0
  44. package/dist/formatters/types.d.ts +57 -0
  45. package/dist/formatters/types.js +7 -0
  46. package/dist/index.d.ts +18 -0
  47. package/dist/index.js +41 -0
  48. package/dist/notebook-formatter.d.ts +107 -0
  49. package/dist/notebook-formatter.js +424 -0
  50. package/package.json +63 -0
@@ -0,0 +1,191 @@
1
+ /**
2
+ * Output Builder - Token Output and Column Tracking
3
+ *
4
+ * This module handles the construction of the formatted output string.
5
+ * It tracks column position for line-width decisions and provides
6
+ * utilities for spacing and newline insertion.
7
+ */
8
+ import { SqlBaseLexer } from './token-utils.js';
9
+ /**
10
+ * Builds the formatted output string with column tracking.
11
+ */
12
+ export class OutputBuilder {
13
+ output = [];
14
+ currentColumn = 0;
15
+ /**
16
+ * Push text to output and update column tracking.
17
+ */
18
+ push(text) {
19
+ this.output.push(text);
20
+ this.updateColumn(text);
21
+ }
22
+ /**
23
+ * Get the current column position.
24
+ */
25
+ getColumn() {
26
+ return this.currentColumn;
27
+ }
28
+ /**
29
+ * Get the last character that was output.
30
+ */
31
+ getLastChar() {
32
+ if (this.output.length === 0)
33
+ return '';
34
+ const lastStr = this.output[this.output.length - 1];
35
+ return lastStr.charAt(lastStr.length - 1);
36
+ }
37
+ /**
38
+ * Check if output is empty.
39
+ */
40
+ isEmpty() {
41
+ return this.output.length === 0;
42
+ }
43
+ /**
44
+ * Check if the last output ends with a newline.
45
+ */
46
+ endsWithNewline() {
47
+ return this.getLastChar() === '\n';
48
+ }
49
+ /**
50
+ * Get the final formatted string.
51
+ */
52
+ toString() {
53
+ return this.output.join('').trim();
54
+ }
55
+ /**
56
+ * Update column tracking based on text content.
57
+ */
58
+ updateColumn(text) {
59
+ const lastNewline = text.lastIndexOf('\n');
60
+ if (lastNewline >= 0) {
61
+ this.currentColumn = text.length - lastNewline - 1;
62
+ }
63
+ else {
64
+ this.currentColumn += text.length;
65
+ }
66
+ }
67
+ /**
68
+ * Add a space if needed before the next token.
69
+ */
70
+ addSpaceIfNeeded() {
71
+ const lastChar = this.getLastChar();
72
+ if (lastChar !== '' && lastChar !== ' ' && lastChar !== '\n' &&
73
+ lastChar !== '(' && lastChar !== '[') {
74
+ this.push(' ');
75
+ }
76
+ }
77
+ /**
78
+ * Ensure we're at the start of a new line.
79
+ */
80
+ ensureNewline() {
81
+ if (!this.isEmpty() && !this.endsWithNewline()) {
82
+ this.push('\n');
83
+ }
84
+ }
85
+ }
86
+ /**
87
+ * Outputs pending comments with proper formatting.
88
+ */
89
+ export function outputComments(builder, comments, addSpaceBefore = true) {
90
+ if (comments.length === 0) {
91
+ return { outputAny: false, lastWasMultilineBlock: false };
92
+ }
93
+ let lastWasMultilineBlock = false;
94
+ for (const comment of comments) {
95
+ // Preserve blank line before comment if it existed in the original
96
+ if (comment.hadBlankLineBefore && !builder.isEmpty()) {
97
+ builder.ensureNewline();
98
+ builder.push('\n'); // Add extra newline for blank line
99
+ }
100
+ else if (addSpaceBefore && !builder.isEmpty()) {
101
+ const lastChar = builder.getLastChar();
102
+ // Don't add space after newline or space
103
+ // For line comments, add space even after open paren
104
+ const isLineComment = comment.type === SqlBaseLexer.SIMPLE_COMMENT;
105
+ if (lastChar !== '\n' && lastChar !== ' ') {
106
+ if (lastChar !== '(' || isLineComment) {
107
+ builder.push(' ');
108
+ }
109
+ }
110
+ }
111
+ builder.push(comment.text);
112
+ // Track if this is a multi-line block comment
113
+ lastWasMultilineBlock = comment.type === SqlBaseLexer.BRACKETED_COMMENT &&
114
+ comment.text.includes('\n');
115
+ // Add newline after multi-line block comment
116
+ if (lastWasMultilineBlock) {
117
+ builder.push('\n');
118
+ }
119
+ addSpaceBefore = true;
120
+ }
121
+ return { outputAny: true, lastWasMultilineBlock };
122
+ }
123
+ /**
124
+ * Determines if a space should be skipped before the current token.
125
+ */
126
+ export function shouldSkipSpace(builder, text, context) {
127
+ const lastChar = builder.getLastChar();
128
+ // Check for hex/binary literals: X'...' or B'...' (case-insensitive)
129
+ const prevWasHexBinaryPrefix = (context.prevTokenText.toUpperCase() === 'X' ||
130
+ context.prevTokenText.toUpperCase() === 'B') &&
131
+ context.currentTokenIsStringLiteral;
132
+ // Skip space after dot ONLY if previous token was actually a DOT token (member access)
133
+ // Not if it's a decimal literal ending with . like "1."
134
+ const prevWasMemberAccessDot = lastChar === '.' && context.prevWasDotToken;
135
+ // IMPORTANT: Don't skip space between consecutive unary operators like "- -5"
136
+ // Otherwise it becomes "--5" which is a line comment!
137
+ const prevWasUnaryAndCurrentIsUnary = context.prevTokenWasUnaryOperator && context.currentTokenIsUnaryOperator;
138
+ // Inside complex types (ARRAY<INT>, MAP<STRING, INT>, STRUCT<a:INT>)
139
+ // Skip spaces around angle brackets and before commas
140
+ const inComplexType = context.complexTypeDepth > 0;
141
+ const isComplexTypeBracket = text === '<' || text === '>';
142
+ const prevWasComplexTypeBracket = lastChar === '<' || lastChar === '>';
143
+ // If prev was unary and current is also unary (like "- -5"), don't skip space!
144
+ if (prevWasUnaryAndCurrentIsUnary) {
145
+ return false; // Need space to avoid "--" becoming a comment
146
+ }
147
+ return (lastChar === '(' ||
148
+ prevWasMemberAccessDot ||
149
+ lastChar === '\n' ||
150
+ text === ')' ||
151
+ text === '.' ||
152
+ text === ',' || // Never add space before comma
153
+ text === '::' ||
154
+ context.prevIsDoubleColon ||
155
+ (text === '(' && (context.prevWasFunctionName || context.prevWasBuiltInFunctionKeyword)) ||
156
+ context.isLateralViewComma ||
157
+ context.justOutputCommaFirstStyle ||
158
+ context.justOutputMultiArgFunctionNewline ||
159
+ context.justOutputWindowNewline ||
160
+ context.justOutputInListWrapNewline ||
161
+ context.afterWhereKeyword ||
162
+ context.afterHavingKeyword ||
163
+ context.prevTokenWasUnaryOperator ||
164
+ lastChar === '[' ||
165
+ text === '[' ||
166
+ text === ']' ||
167
+ prevWasHexBinaryPrefix ||
168
+ // Complex type handling: no spaces around < and > and : (for struct fields like a:INT)
169
+ (inComplexType && isComplexTypeBracket) ||
170
+ (inComplexType && prevWasComplexTypeBracket) ||
171
+ (text === ':' && inComplexType) || // No space before : in struct field (a:INT)
172
+ (lastChar === ':' && inComplexType) // No space after : in struct field
173
+ );
174
+ }
175
+ /**
176
+ * Determines if a comma-space should be added.
177
+ */
178
+ export function shouldAddCommaSpace(builder, insideParens, justOutputCommaFirstStyle) {
179
+ return builder.getLastChar() === ',' &&
180
+ insideParens > 0 &&
181
+ !justOutputCommaFirstStyle;
182
+ }
183
+ /**
184
+ * Format hint content: uppercase hint names, preserve table names.
185
+ * Example: "broadcast(t1), merge(t2)" → "BROADCAST(t1), MERGE(t2)"
186
+ */
187
+ export function formatHintContent(content) {
188
+ return content.replace(/([a-zA-Z_][a-zA-Z0-9_]*)\s*(\()/g, (match, name, paren) => {
189
+ return name.toUpperCase() + paren;
190
+ });
191
+ }
@@ -0,0 +1,264 @@
1
+ /**
2
+ * Parse Tree Analyzer - Collects Formatting Context from AST
3
+ *
4
+ * This visitor walks the ANTLR parse tree and collects information about:
5
+ * - Identifier tokens (preserve casing)
6
+ * - Function call tokens (uppercase)
7
+ * - Clause-starting tokens (newline before)
8
+ * - List item separators (commas in SELECT, GROUP BY, ORDER BY)
9
+ * - Condition separators (AND/OR in WHERE/HAVING)
10
+ * - Subquery boundaries
11
+ * - And many more context-specific positions
12
+ *
13
+ * This is 100% grammar-driven - no hardcoded keyword lists.
14
+ */
15
+ import SqlBaseParserVisitor from './generated/SqlBaseParserVisitor.js';
16
+ import type { AnalyzerResult, MultiArgFunctionInfo, WindowDefInfo, SimpleQueryInfo, PivotInfo, InListInfo } from './types.js';
17
+ /**
18
+ * Visitor that collects context information from parse tree.
19
+ * After visiting, call getResult() to get the analysis.
20
+ */
21
+ export declare class ParseTreeAnalyzer extends SqlBaseParserVisitor {
22
+ identifierTokens: Set<number>;
23
+ functionCallTokens: Set<number>;
24
+ clauseStartTokens: Set<number>;
25
+ qualifiedNameTokens: Set<number>;
26
+ listItemCommas: Set<number>;
27
+ listFirstItems: Set<number>;
28
+ multiItemClauses: Set<number>;
29
+ conditionOperators: Set<number>;
30
+ multilineConditionClauses: Set<number>;
31
+ betweenAndTokens: Set<number>;
32
+ subqueryDepth: number;
33
+ tokenDepthMap: Map<number, number>;
34
+ subqueryOpenParens: Set<number>;
35
+ subqueryCloseParens: Set<number>;
36
+ setOperandParens: Set<number>;
37
+ aliasInsertPositions: Set<number>;
38
+ tableAliasAsTokens: Set<number>;
39
+ joinOnTokens: Set<number>;
40
+ cteCommas: Set<number>;
41
+ cteMainSelectTokens: Set<number>;
42
+ ddlColumnCommas: Set<number>;
43
+ ddlOpenParens: Set<number>;
44
+ ddlCloseParens: Set<number>;
45
+ ddlFirstColumn: Set<number>;
46
+ ddlMultiColumn: Set<number>;
47
+ valuesCommas: Set<number>;
48
+ valuesHasTuples: boolean;
49
+ setClauseCommas: Set<number>;
50
+ setKeywordToken: number;
51
+ multiWhenCaseTokens: Set<number>;
52
+ caseWhenTokens: Set<number>;
53
+ caseElseTokens: Set<number>;
54
+ caseEndTokens: Set<number>;
55
+ simpleCaseTokens: Set<number>;
56
+ simpleCaseValueEndTokens: Set<number>;
57
+ groupingAnalyticsParens: Set<number>;
58
+ private insideGroupingAnalytics;
59
+ exceptClauseTokens: Set<number>;
60
+ setConfigTokens: Set<number>;
61
+ mergeUsingTokens: Set<number>;
62
+ mergeOnTokens: Set<number>;
63
+ mergeWhenTokens: Set<number>;
64
+ lateralViewCommas: Set<number>;
65
+ groupByAllTokens: Set<number>;
66
+ multiArgFunctionInfo: Map<number, MultiArgFunctionInfo>;
67
+ windowDefInfo: Map<number, WindowDefInfo>;
68
+ pivotInfo: Map<number, PivotInfo>;
69
+ inListInfo: Map<number, InListInfo>;
70
+ simpleQueries: Map<number, SimpleQueryInfo>;
71
+ private currentSelectToken;
72
+ /**
73
+ * Get the complete analysis result after visiting.
74
+ */
75
+ getResult(): AnalyzerResult;
76
+ visit(ctx: any): any;
77
+ visitChildren(ctx: any): any;
78
+ visitIdentifier(ctx: any): any;
79
+ visitStrictIdentifier(ctx: any): any;
80
+ visitQuotedIdentifier(ctx: any): any;
81
+ visitBackQuotedIdentifier(ctx: any): any;
82
+ visitUnquotedIdentifier(ctx: any): any;
83
+ visitErrorCapturingIdentifier(ctx: any): any;
84
+ /**
85
+ * Visit qualified name (e.g., table.column, db.schema.table.column)
86
+ * GRAMMAR-DRIVEN: qualifiedName : identifier (DOT identifier)*
87
+ *
88
+ * Context-sensitive keyword handling: In qualified names, even tokens that are
89
+ * keywords (like USER, TABLE) should be treated as identifiers and preserve casing.
90
+ * This is because the grammar context (qualifiedName rule) makes them identifiers.
91
+ */
92
+ visitQualifiedName(ctx: any): any;
93
+ /**
94
+ * Visit dereference (field access like user.address, table.column)
95
+ * GRAMMAR-DRIVEN: base=primaryExpression DOT fieldName=identifier
96
+ *
97
+ * When a keyword like USER or TABLE appears before DOT, it should be treated
98
+ * as an identifier (table/column alias), not as a keyword.
99
+ * Similarly, keywords appearing as field names (like KEY, ORDER) should preserve casing.
100
+ */
101
+ visitDereference(ctx: any): any;
102
+ visitFunctionCall(ctx: any): any;
103
+ visitFunctionName(ctx: any): any;
104
+ visitFirst(ctx: any): any;
105
+ visitLast(ctx: any): any;
106
+ visitAny_value(ctx: any): any;
107
+ visitStruct(ctx: any): any;
108
+ visitExtract(ctx: any): any;
109
+ visitCast(ctx: any): any;
110
+ visitPosition(ctx: any): any;
111
+ visitTimestampadd(ctx: any): any;
112
+ visitTimestampdiff(ctx: any): any;
113
+ visitLateralView(ctx: any): any;
114
+ visitSearchedCase(ctx: any): any;
115
+ visitSimpleCase(ctx: any): any;
116
+ visitExceptClause(ctx: any): any;
117
+ visitFromClause(ctx: any): any;
118
+ /**
119
+ * Visit table alias context and mark AS tokens for suppression.
120
+ * Style guide says table aliases should NOT have AS keyword.
121
+ * Grammar: tableAlias: (AS? strictIdentifier identifierList?)?
122
+ */
123
+ visitTableAlias(ctx: any): any;
124
+ visitAggregationClause(ctx: any): any;
125
+ visitGroupingAnalytics(ctx: any): any;
126
+ visitQueryOrganization(ctx: any): any;
127
+ visitSortItem(ctx: any): any;
128
+ visitLimitClause(ctx: any): any;
129
+ visitJoinRelation(ctx: any): any;
130
+ visitWindowDef(ctx: any): any;
131
+ visitPivotClause(ctx: any): any;
132
+ visitUnpivotClause(ctx: any): any;
133
+ visitSetOperation(ctx: any): any;
134
+ visitSelectClause(ctx: any): any;
135
+ visitNamedExpression(ctx: any): any;
136
+ visitNamedExpressionSeq(ctx: any): any;
137
+ visitGroupByClause(ctx: any): any;
138
+ visitWhereClause(ctx: any): any;
139
+ visitHavingClause(ctx: any): any;
140
+ visitPredicate(ctx: any): any;
141
+ visitQuery(ctx: any): any;
142
+ private _findFirstSelectToken;
143
+ visitCtes(ctx: any): any;
144
+ visitNamedQuery(ctx: any): any;
145
+ visitAliasedQuery(ctx: any): any;
146
+ visitExists(ctx: any): any;
147
+ visitSubqueryExpression(ctx: any): any;
148
+ visitSubquery(ctx: any): any;
149
+ visitCreateTableHeader(ctx: any): any;
150
+ visitCreateTable(ctx: any): any;
151
+ visitCreateUserDefinedFunction(ctx: any): any;
152
+ visitInsertInto(ctx: any): any;
153
+ visitInlineTable(ctx: any): any;
154
+ visitUpdateTable(ctx: any): any;
155
+ visitSetConfiguration(ctx: any): any;
156
+ visitResetConfiguration(ctx: any): any;
157
+ visitMergeIntoTable(ctx: any): any;
158
+ visitQuerySpecification(ctx: any): any;
159
+ visitRegularQuerySpecification(ctx: any): any;
160
+ private _visitQuerySpec;
161
+ /**
162
+ * Check if a context is inside a CREATE VIEW/TABLE statement at the top level.
163
+ * Queries inside these DDL statements should never be compacted.
164
+ */
165
+ private _isInsideCreateStatement;
166
+ /**
167
+ * Analyze if a query is simple enough to stay on one line.
168
+ * Simple query criteria:
169
+ * - SELECT has 1 item (including *, t.*)
170
+ * - FROM has 1 table (no JOINs)
171
+ * - WHERE has 0 or 1 condition (no AND/OR at top level)
172
+ * - No GROUP BY, ORDER BY, HAVING, or single-item versions
173
+ * - No LIMIT/OFFSET or simple LIMIT
174
+ * - NOT inside a CREATE VIEW/TABLE statement (those always expand)
175
+ */
176
+ private _analyzeSimpleQuery;
177
+ /**
178
+ * Check if FROM clause contains any JOINs.
179
+ */
180
+ private _hasJoinInFromClause;
181
+ /**
182
+ * Check if FROM clause contains PIVOT or UNPIVOT with many items.
183
+ * Simple PIVOT with few items can stay compact.
184
+ */
185
+ private _hasPivotUnpivotInFromClause;
186
+ /**
187
+ * Check if SELECT clause has a single item (*, t.*, or one expression).
188
+ */
189
+ private _hasSingleSelectItem;
190
+ /**
191
+ * Check if a clause contains a CASE expression with multiple WHEN clauses.
192
+ * Such CASE expressions force expansion and make the query non-compact.
193
+ */
194
+ private _hasMultiWhenCase;
195
+ /**
196
+ * Check if WHERE/HAVING clause has multiple conditions (AND/OR at top level).
197
+ */
198
+ private _hasMultipleConditions;
199
+ /**
200
+ * Calculate the expected formatted span length of a context.
201
+ *
202
+ * This walks all tokens within the context and sums:
203
+ * - Each token's text length
204
+ * - One space between each pair of tokens (standard formatting)
205
+ *
206
+ * This gives an accurate estimate of the formatted output length.
207
+ *
208
+ * @param ctx The parse tree context
209
+ * @param forExpansion If true, returns Infinity for multi-line constructs
210
+ * to prevent already-expanded constructs from collapsing.
211
+ * If false, calculates actual span (for simple query detection).
212
+ */
213
+ private _calculateSpanLength;
214
+ /**
215
+ * Calculate normalized span length independent of input formatting.
216
+ * This sums up token text lengths + single spaces between tokens,
217
+ * giving a consistent "single-line" representation length.
218
+ *
219
+ * CRITICAL FOR IDEMPOTENCY: Using character positions (_calculateSpanLength)
220
+ * varies based on how the input is formatted (line breaks, extra spaces).
221
+ * This causes different expansion decisions on subsequent passes.
222
+ * By using token text lengths, we get consistent results regardless of input formatting.
223
+ */
224
+ private _calculateNormalizedSpanLength;
225
+ private _collectMultiArgFunctionInfo;
226
+ private _collectWindowDefInfo;
227
+ /**
228
+ * Collect IN list information for potential wrapping.
229
+ * Structure: expr IN (value1, value2, value3, ...)
230
+ * We want to track the IN list so we can wrap it at max line width.
231
+ */
232
+ private _collectInListInfo;
233
+ /**
234
+ * Collect PIVOT/UNPIVOT clause information for potential expansion.
235
+ * Structure: PIVOT (aggregates FOR column IN (values))
236
+ */
237
+ private _collectPivotInfo;
238
+ private _analyzeCaseExpression;
239
+ private _findTokenInContext;
240
+ private _analyzeJoinConditions;
241
+ private _findOnToken;
242
+ private _findSubqueryContext;
243
+ private _markAllDescendantTokens;
244
+ private _markIdentifier;
245
+ private _markClauseStart;
246
+ private _markListContext;
247
+ private _markCommasInContext;
248
+ private _markListCommasExcludingGroupingAnalytics;
249
+ private _markGroupByAllToken;
250
+ private _findAllTokenInGroupByExpression;
251
+ private _analyzeConditionClause;
252
+ private _countConditionOperators;
253
+ private _scanForBetweenAnd;
254
+ private _markSubqueryParens;
255
+ private _markDdlColumnList;
256
+ private _markDdlCommasInContext;
257
+ private _markValuesCommas;
258
+ private _markSetClause;
259
+ private _markSetConfigTokens;
260
+ private _markResetConfigTokens;
261
+ private _markSetConfigTokensRecursive;
262
+ private _markMergeClauses;
263
+ private _markDepthForContext;
264
+ }