@jacobknightley/fabric-format 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +196 -0
- package/dist/cell-formatter.d.ts +75 -0
- package/dist/cell-formatter.js +144 -0
- package/dist/cli.d.ts +2 -0
- package/dist/cli.js +435 -0
- package/dist/formatters/index.d.ts +19 -0
- package/dist/formatters/index.js +76 -0
- package/dist/formatters/python/config.d.ts +33 -0
- package/dist/formatters/python/config.js +29 -0
- package/dist/formatters/python/index.d.ts +7 -0
- package/dist/formatters/python/index.js +13 -0
- package/dist/formatters/python/python-formatter.d.ts +51 -0
- package/dist/formatters/python/python-formatter.js +180 -0
- package/dist/formatters/sparksql/constants.d.ts +16 -0
- package/dist/formatters/sparksql/constants.js +16 -0
- package/dist/formatters/sparksql/fmt-detector.d.ts +65 -0
- package/dist/formatters/sparksql/fmt-detector.js +84 -0
- package/dist/formatters/sparksql/formatter.d.ts +24 -0
- package/dist/formatters/sparksql/formatter.js +1276 -0
- package/dist/formatters/sparksql/formatting-context.d.ts +154 -0
- package/dist/formatters/sparksql/formatting-context.js +363 -0
- package/dist/formatters/sparksql/generated/SqlBaseLexer.d.ts +529 -0
- package/dist/formatters/sparksql/generated/SqlBaseLexer.js +2609 -0
- package/dist/formatters/sparksql/generated/SqlBaseParser.d.ts +8195 -0
- package/dist/formatters/sparksql/generated/SqlBaseParser.js +48793 -0
- package/dist/formatters/sparksql/generated/SqlBaseParserListener.d.ts +910 -0
- package/dist/formatters/sparksql/generated/SqlBaseParserListener.js +2730 -0
- package/dist/formatters/sparksql/generated/SqlBaseParserVisitor.d.ts +456 -0
- package/dist/formatters/sparksql/generated/SqlBaseParserVisitor.js +1822 -0
- package/dist/formatters/sparksql/generated/builtinFunctions.d.ts +8 -0
- package/dist/formatters/sparksql/generated/builtinFunctions.js +510 -0
- package/dist/formatters/sparksql/index.d.ts +11 -0
- package/dist/formatters/sparksql/index.js +22 -0
- package/dist/formatters/sparksql/output-builder.d.ts +89 -0
- package/dist/formatters/sparksql/output-builder.js +191 -0
- package/dist/formatters/sparksql/parse-tree-analyzer.d.ts +264 -0
- package/dist/formatters/sparksql/parse-tree-analyzer.js +1956 -0
- package/dist/formatters/sparksql/sql-formatter.d.ts +25 -0
- package/dist/formatters/sparksql/sql-formatter.js +56 -0
- package/dist/formatters/sparksql/token-utils.d.ts +68 -0
- package/dist/formatters/sparksql/token-utils.js +155 -0
- package/dist/formatters/sparksql/types.d.ts +264 -0
- package/dist/formatters/sparksql/types.js +7 -0
- package/dist/formatters/types.d.ts +57 -0
- package/dist/formatters/types.js +7 -0
- package/dist/index.d.ts +18 -0
- package/dist/index.js +41 -0
- package/dist/notebook-formatter.d.ts +107 -0
- package/dist/notebook-formatter.js +424 -0
- package/package.json +63 -0
|
@@ -0,0 +1,1276 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Spark SQL Formatter - Main Entry Point
|
|
3
|
+
*
|
|
4
|
+
* This is the 100% grammar-driven SQL formatter for Apache Spark SQL.
|
|
5
|
+
* NO HARDCODED KEYWORD, FUNCTION, OR CLAUSE LISTS.
|
|
6
|
+
* Everything derived from ANTLR lexer symbolicNames and parse tree context.
|
|
7
|
+
*
|
|
8
|
+
* Architecture:
|
|
9
|
+
* - types.ts: TypeScript interfaces
|
|
10
|
+
* - token-utils.ts: Grammar-derived token detection
|
|
11
|
+
* - parse-tree-analyzer.ts: AST visitor that collects formatting context
|
|
12
|
+
* - formatting-context.ts: State management during formatting
|
|
13
|
+
* - output-builder.ts: Output construction with column tracking
|
|
14
|
+
* - formatter.ts (this file): Main orchestration
|
|
15
|
+
*/
|
|
16
|
+
import antlr4 from 'antlr4';
|
|
17
|
+
// Internal modules
|
|
18
|
+
import { SqlBaseLexer, getSymbolicName, isKeywordToken, isFunctionLikeKeyword } from './token-utils.js';
|
|
19
|
+
// @ts-ignore - Generated ANTLR code
|
|
20
|
+
import SqlBaseParser from './generated/SqlBaseParser.js';
|
|
21
|
+
import { ParseTreeAnalyzer } from './parse-tree-analyzer.js';
|
|
22
|
+
import { createInitialState, ExpandedFunctionStack, CommentManager, indentCalc, isUnaryOperator, shouldExpandFunction, shouldExpandWindow, shouldExpandPivot } from './formatting-context.js';
|
|
23
|
+
import { OutputBuilder, outputComments, shouldSkipSpace, shouldAddCommaSpace, formatHintContent } from './output-builder.js';
|
|
24
|
+
import { SPARK_BUILTIN_FUNCTIONS } from './generated/builtinFunctions.js';
|
|
25
|
+
import { MAX_LINE_WIDTH } from './constants.js';
|
|
26
|
+
import { hasFormatOff, detectCollapseDirectives, isFmtInlineComment } from './fmt-detector.js';
|
|
27
|
+
// ============================================================================
|
|
28
|
+
// PUBLIC API
|
|
29
|
+
// ============================================================================
|
|
30
|
+
/**
|
|
31
|
+
* Format SQL - Main entry point.
|
|
32
|
+
* Handles magic commands, semicolon-separated statements, and formatting.
|
|
33
|
+
*/
|
|
34
|
+
export function formatSql(sql) {
|
|
35
|
+
try {
|
|
36
|
+
// Handle magic commands (%%sql only - %sql is not valid in Fabric)
|
|
37
|
+
// Find magic command anywhere in input - only format SQL after it
|
|
38
|
+
// This allows content before the magic (e.g., Python code) to remain untouched
|
|
39
|
+
let prefix = '';
|
|
40
|
+
let magicCommand = '';
|
|
41
|
+
let sqlToFormat = sql;
|
|
42
|
+
const magicMatch = sql.match(/(%%sql)[ \t]*\n?/);
|
|
43
|
+
if (magicMatch && magicMatch.index !== undefined) {
|
|
44
|
+
prefix = sql.substring(0, magicMatch.index);
|
|
45
|
+
magicCommand = magicMatch[1];
|
|
46
|
+
sqlToFormat = sql.substring(magicMatch.index + magicMatch[0].length);
|
|
47
|
+
}
|
|
48
|
+
// Split on semicolons and format each statement
|
|
49
|
+
const statements = splitOnSemicolons(sqlToFormat);
|
|
50
|
+
const formattedStatements = [];
|
|
51
|
+
for (const stmt of statements) {
|
|
52
|
+
if (stmt.trim().length === 0)
|
|
53
|
+
continue;
|
|
54
|
+
// Check for statement-level fmt:off (bypass formatting entirely)
|
|
55
|
+
if (hasFormatOff(stmt.trim())) {
|
|
56
|
+
formattedStatements.push(stmt.trim());
|
|
57
|
+
continue;
|
|
58
|
+
}
|
|
59
|
+
const formatted = formatSingleStatement(stmt.trim());
|
|
60
|
+
formattedStatements.push(formatted);
|
|
61
|
+
}
|
|
62
|
+
let result = formattedStatements.join(';\n\n');
|
|
63
|
+
// Preserve trailing semicolon if original had one
|
|
64
|
+
if (sqlToFormat.trimEnd().endsWith(';')) {
|
|
65
|
+
result += ';';
|
|
66
|
+
}
|
|
67
|
+
// Restore magic command and prefix
|
|
68
|
+
if (magicCommand) {
|
|
69
|
+
result = prefix + magicCommand + '\n' + result;
|
|
70
|
+
}
|
|
71
|
+
return result;
|
|
72
|
+
}
|
|
73
|
+
catch {
|
|
74
|
+
return sql;
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
/**
|
|
78
|
+
* Check if SQL needs formatting.
|
|
79
|
+
*/
|
|
80
|
+
export function needsFormatting(sql) {
|
|
81
|
+
return formatSql(sql) !== sql;
|
|
82
|
+
}
|
|
83
|
+
// ============================================================================
|
|
84
|
+
// STATEMENT SPLITTING
|
|
85
|
+
// ============================================================================
|
|
86
|
+
/**
|
|
87
|
+
* Split SQL on semicolons, but not semicolons inside string literals.
|
|
88
|
+
*/
|
|
89
|
+
function splitOnSemicolons(sql) {
|
|
90
|
+
const statements = [];
|
|
91
|
+
let current = '';
|
|
92
|
+
let inSingleQuote = false;
|
|
93
|
+
let inDoubleQuote = false;
|
|
94
|
+
let escaped = false;
|
|
95
|
+
for (let i = 0; i < sql.length; i++) {
|
|
96
|
+
const ch = sql[i];
|
|
97
|
+
if (escaped) {
|
|
98
|
+
current += ch;
|
|
99
|
+
escaped = false;
|
|
100
|
+
continue;
|
|
101
|
+
}
|
|
102
|
+
if (ch === '\\') {
|
|
103
|
+
current += ch;
|
|
104
|
+
escaped = true;
|
|
105
|
+
continue;
|
|
106
|
+
}
|
|
107
|
+
if (ch === "'" && !inDoubleQuote) {
|
|
108
|
+
inSingleQuote = !inSingleQuote;
|
|
109
|
+
current += ch;
|
|
110
|
+
}
|
|
111
|
+
else if (ch === '"' && !inSingleQuote) {
|
|
112
|
+
inDoubleQuote = !inDoubleQuote;
|
|
113
|
+
current += ch;
|
|
114
|
+
}
|
|
115
|
+
else if (ch === ';' && !inSingleQuote && !inDoubleQuote) {
|
|
116
|
+
if (current.trim().length > 0) {
|
|
117
|
+
statements.push(current);
|
|
118
|
+
}
|
|
119
|
+
current = '';
|
|
120
|
+
}
|
|
121
|
+
else {
|
|
122
|
+
current += ch;
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
if (current.trim().length > 0) {
|
|
126
|
+
statements.push(current);
|
|
127
|
+
}
|
|
128
|
+
return statements;
|
|
129
|
+
}
|
|
130
|
+
// ============================================================================
|
|
131
|
+
// VARIABLE SUBSTITUTION HANDLING
|
|
132
|
+
// ============================================================================
|
|
133
|
+
/**
|
|
134
|
+
* Spark SQL variable substitution pattern: ${variable_name}
|
|
135
|
+
* These must be preserved exactly during formatting.
|
|
136
|
+
*/
|
|
137
|
+
const VARIABLE_PATTERN = /\$\{([^}]+)\}/g;
|
|
138
|
+
/**
|
|
139
|
+
* Replace ${variable} patterns with safe placeholders before formatting.
|
|
140
|
+
* Returns the modified SQL and a map to restore later.
|
|
141
|
+
*/
|
|
142
|
+
function extractVariables(sql) {
|
|
143
|
+
const substitutions = [];
|
|
144
|
+
let index = 0;
|
|
145
|
+
const modifiedSql = sql.replace(VARIABLE_PATTERN, (match) => {
|
|
146
|
+
// Use a placeholder that won't be modified by formatting
|
|
147
|
+
// _SPARKVAR_N_ looks like an identifier and won't get spaces added
|
|
148
|
+
const placeholder = `_SPARKVAR_${index}_`;
|
|
149
|
+
substitutions.push({ placeholder, original: match });
|
|
150
|
+
index++;
|
|
151
|
+
return placeholder;
|
|
152
|
+
});
|
|
153
|
+
return { sql: modifiedSql, substitutions };
|
|
154
|
+
}
|
|
155
|
+
/**
|
|
156
|
+
* Restore original ${variable} patterns after formatting.
|
|
157
|
+
*/
|
|
158
|
+
function restoreVariables(sql, substitutions) {
|
|
159
|
+
let result = sql;
|
|
160
|
+
for (const sub of substitutions) {
|
|
161
|
+
result = result.replace(sub.placeholder, sub.original);
|
|
162
|
+
}
|
|
163
|
+
return result;
|
|
164
|
+
}
|
|
165
|
+
// ============================================================================
|
|
166
|
+
// SINGLE STATEMENT FORMATTING
|
|
167
|
+
// ============================================================================
|
|
168
|
+
/**
|
|
169
|
+
* Pre-normalize SQL to fix tokenization mismatches.
|
|
170
|
+
* Some SQL constructs tokenize differently based on case:
|
|
171
|
+
* - Scientific notation: 1.23e10 (lowercase 'e') vs 1.23E10 (uppercase 'E')
|
|
172
|
+
*
|
|
173
|
+
* We normalize these to uppercase before lexing so both streams align.
|
|
174
|
+
*/
|
|
175
|
+
function normalizeForTokenization(sql) {
|
|
176
|
+
// Normalize scientific notation: replace lowercase 'e' in numbers with uppercase 'E'
|
|
177
|
+
// Pattern matches: integer part (optional decimal), 'e', optional +/-, exponent
|
|
178
|
+
// Examples: 1e10, 1.23e10, .5e-3, 1.e+5
|
|
179
|
+
return sql.replace(/(\d+(?:\.\d*)?|\.\d+)e([+-]?\d+)/gi, (match, mantissa, exponent) => {
|
|
180
|
+
return mantissa + 'E' + exponent;
|
|
181
|
+
});
|
|
182
|
+
}
|
|
183
|
+
/**
|
|
184
|
+
* Format a single SQL statement.
|
|
185
|
+
*/
|
|
186
|
+
function formatSingleStatement(sql) {
|
|
187
|
+
try {
|
|
188
|
+
// Extract ${variable} substitutions before formatting
|
|
189
|
+
const { sql: sqlWithPlaceholders, substitutions } = extractVariables(sql);
|
|
190
|
+
// Pre-normalize SQL to fix tokenization mismatches
|
|
191
|
+
const normalizedSql = normalizeForTokenization(sqlWithPlaceholders);
|
|
192
|
+
// Parse with uppercased SQL (grammar matches uppercase keywords)
|
|
193
|
+
const upperSql = normalizedSql.toUpperCase();
|
|
194
|
+
const chars = new antlr4.InputStream(upperSql);
|
|
195
|
+
const lexer = new SqlBaseLexer(chars);
|
|
196
|
+
const tokens = new antlr4.CommonTokenStream(lexer);
|
|
197
|
+
tokens.fill();
|
|
198
|
+
const parser = new SqlBaseParser(tokens);
|
|
199
|
+
// @ts-ignore
|
|
200
|
+
parser.removeErrorListeners?.();
|
|
201
|
+
let tree;
|
|
202
|
+
try {
|
|
203
|
+
tree = parser.singleStatement();
|
|
204
|
+
}
|
|
205
|
+
catch {
|
|
206
|
+
return sql;
|
|
207
|
+
}
|
|
208
|
+
// Analyze parse tree
|
|
209
|
+
const analyzer = new ParseTreeAnalyzer();
|
|
210
|
+
analyzer.visit(tree);
|
|
211
|
+
const analysis = analyzer.getResult();
|
|
212
|
+
// Re-lex normalized SQL to get token texts (now aligned with uppercase stream)
|
|
213
|
+
const origChars = new antlr4.InputStream(normalizedSql);
|
|
214
|
+
const origLexer = new SqlBaseLexer(origChars);
|
|
215
|
+
const origTokens = new antlr4.CommonTokenStream(origLexer);
|
|
216
|
+
origTokens.fill();
|
|
217
|
+
// Detect fmt:collapse directives
|
|
218
|
+
const formatDirectives = detectCollapseDirectives(normalizedSql);
|
|
219
|
+
// Format tokens
|
|
220
|
+
const formatted = formatTokens(tokens.tokens, origTokens.tokens, analysis, formatDirectives);
|
|
221
|
+
// Restore ${variable} substitutions
|
|
222
|
+
return restoreVariables(formatted, substitutions);
|
|
223
|
+
}
|
|
224
|
+
catch (e) {
|
|
225
|
+
console.error('Formatter error:', e.message, e.stack);
|
|
226
|
+
return sql;
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
/**
|
|
230
|
+
* Format tokens using the analysis result.
|
|
231
|
+
*/
|
|
232
|
+
function formatTokens(tokenList, allOrigTokens, analysis, formatDirectives) {
|
|
233
|
+
const builder = new OutputBuilder();
|
|
234
|
+
const state = createInitialState();
|
|
235
|
+
const expandedFuncs = new ExpandedFunctionStack();
|
|
236
|
+
const comments = new CommentManager();
|
|
237
|
+
let currentExpandedWindow = null;
|
|
238
|
+
let currentExpandedPivot = null;
|
|
239
|
+
let lastProcessedIndex = -1;
|
|
240
|
+
// Populate force-inline ranges from fmt:inline comments (grammar-driven approach)
|
|
241
|
+
const forceInlineRanges = findForceInlineRanges(allOrigTokens, analysis);
|
|
242
|
+
formatDirectives.forceInlineRanges = forceInlineRanges;
|
|
243
|
+
let activeInList = null;
|
|
244
|
+
// Track which simple queries are actually compact (fit within line width)
|
|
245
|
+
const compactQueries = new Set();
|
|
246
|
+
for (const [selectToken, info] of analysis.simpleQueries) {
|
|
247
|
+
// For subqueries (depth > 0), apply tighter width constraint to account for
|
|
248
|
+
// surrounding context (CTE prefix, parentheses, outer query continuation).
|
|
249
|
+
// Typical overhead is 20-40 chars for "WITH name AS (" + ") SELECT ..."
|
|
250
|
+
const effectiveMaxWidth = info.depth > 0 ? MAX_LINE_WIDTH - 40 : MAX_LINE_WIDTH;
|
|
251
|
+
if (info.spanLength <= effectiveMaxWidth) {
|
|
252
|
+
compactQueries.add(selectToken);
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
// Check if set operations should stay inline
|
|
256
|
+
// Only inline if: 1) there are set operation parens, 2) total length is short,
|
|
257
|
+
// 3) ALL queries in the set operation are simple (single-item SELECT)
|
|
258
|
+
let isShortSetOperation = false;
|
|
259
|
+
if (analysis.setOperandParens.size > 0) {
|
|
260
|
+
let estimatedQueryLength = 0;
|
|
261
|
+
for (const tok of tokenList) {
|
|
262
|
+
if (tok.type !== SqlBaseLexer.WS && tok.type !== antlr4.Token.EOF) {
|
|
263
|
+
estimatedQueryLength += (tok.text?.length || 0) + 1; // +1 for space
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
// Only allow inline if short AND no multi-item clauses exist
|
|
267
|
+
const hasMultiItemClause = analysis.multiItemClauses.size > 0;
|
|
268
|
+
isShortSetOperation = estimatedQueryLength <= MAX_LINE_WIDTH && !hasMultiItemClause;
|
|
269
|
+
}
|
|
270
|
+
// Check if VALUES statement should stay inline (simple values list)
|
|
271
|
+
// Only inline if: 1) has values commas, 2) total length is short, 3) NOT tuples (row format)
|
|
272
|
+
// VALUES 1, 2, 3 -> stays inline if short
|
|
273
|
+
// VALUES (1, 'a'), (2, 'b') -> always expands (has tuples)
|
|
274
|
+
let isShortValues = false;
|
|
275
|
+
if (analysis.valuesCommas.size > 0 && !analysis.valuesHasTuples) {
|
|
276
|
+
let estimatedQueryLength = 0;
|
|
277
|
+
for (const tok of tokenList) {
|
|
278
|
+
if (tok.type !== SqlBaseLexer.WS && tok.type !== antlr4.Token.EOF) {
|
|
279
|
+
estimatedQueryLength += (tok.text?.length || 0) + 1; // +1 for space
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
isShortValues = estimatedQueryLength <= MAX_LINE_WIDTH;
|
|
283
|
+
}
|
|
284
|
+
// Helper to find next non-WS token
|
|
285
|
+
const findNextNonWsTokenIndex = (startIdx) => {
|
|
286
|
+
for (let j = startIdx; j < tokenList.length; j++) {
|
|
287
|
+
const tok = tokenList[j];
|
|
288
|
+
if (tok.type !== SqlBaseLexer.WS &&
|
|
289
|
+
tok.type !== antlr4.Token.EOF &&
|
|
290
|
+
tok.type !== SqlBaseLexer.SIMPLE_COMMENT &&
|
|
291
|
+
tok.type !== SqlBaseLexer.BRACKETED_COMMENT) {
|
|
292
|
+
return j;
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
return -1;
|
|
296
|
+
};
|
|
297
|
+
// Helper to collect comments from range
|
|
298
|
+
const collectComments = (startIdx, endIdx) => {
|
|
299
|
+
for (let j = startIdx; j < endIdx; j++) {
|
|
300
|
+
const hiddenToken = allOrigTokens[j];
|
|
301
|
+
if (hiddenToken && hiddenToken.channel === 1) {
|
|
302
|
+
if (hiddenToken.type === SqlBaseLexer.SIMPLE_COMMENT ||
|
|
303
|
+
hiddenToken.type === SqlBaseLexer.BRACKETED_COMMENT) {
|
|
304
|
+
const wasOnOwnLine = CommentManager.checkWasOnOwnLine(j, hiddenToken, allOrigTokens);
|
|
305
|
+
const hadBlankLineBefore = CommentManager.checkHadBlankLineBefore(j, allOrigTokens);
|
|
306
|
+
comments.add({ text: hiddenToken.text, type: hiddenToken.type, wasOnOwnLine, hadBlankLineBefore });
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
};
|
|
311
|
+
for (let i = 0; i < tokenList.length && i < allOrigTokens.length; i++) {
|
|
312
|
+
const token = tokenList[i];
|
|
313
|
+
const origToken = allOrigTokens[i];
|
|
314
|
+
if (token.type === antlr4.Token.EOF)
|
|
315
|
+
continue;
|
|
316
|
+
// Collect hidden tokens (comments)
|
|
317
|
+
const wasAlreadyProcessed = lastProcessedIndex >= i;
|
|
318
|
+
if (!wasAlreadyProcessed) {
|
|
319
|
+
collectComments(lastProcessedIndex + 1, i);
|
|
320
|
+
}
|
|
321
|
+
lastProcessedIndex = Math.max(lastProcessedIndex, i);
|
|
322
|
+
// Skip WS tokens
|
|
323
|
+
if (token.type === SqlBaseLexer.WS)
|
|
324
|
+
continue;
|
|
325
|
+
// Handle comment tokens directly
|
|
326
|
+
if (token.type === SqlBaseLexer.SIMPLE_COMMENT ||
|
|
327
|
+
token.type === SqlBaseLexer.BRACKETED_COMMENT) {
|
|
328
|
+
if (!wasAlreadyProcessed) {
|
|
329
|
+
const wasOnOwnLine = CommentManager.checkWasOnOwnLine(i, origToken, allOrigTokens);
|
|
330
|
+
const hadBlankLineBefore = CommentManager.checkHadBlankLineBefore(i, allOrigTokens);
|
|
331
|
+
comments.add({ text: origToken.text, type: token.type, wasOnOwnLine, hadBlankLineBefore });
|
|
332
|
+
}
|
|
333
|
+
continue;
|
|
334
|
+
}
|
|
335
|
+
const text = origToken.text;
|
|
336
|
+
const tokenType = token.type;
|
|
337
|
+
const tokenIndex = token.tokenIndex;
|
|
338
|
+
const symbolicName = getSymbolicName(tokenType);
|
|
339
|
+
// Handle hints
|
|
340
|
+
if (tokenType === SqlBaseLexer.HENT_START) {
|
|
341
|
+
builder.addSpaceIfNeeded();
|
|
342
|
+
state.insideHint = true;
|
|
343
|
+
state.hintContent = [];
|
|
344
|
+
builder.push('/*+');
|
|
345
|
+
continue;
|
|
346
|
+
}
|
|
347
|
+
if (state.insideHint) {
|
|
348
|
+
if (tokenType === SqlBaseLexer.HENT_END) {
|
|
349
|
+
const formatted = formatHintContent(state.hintContent.join(''));
|
|
350
|
+
builder.push(' ' + formatted + ' ');
|
|
351
|
+
builder.push('*/');
|
|
352
|
+
state.insideHint = false;
|
|
353
|
+
state.hintContent = [];
|
|
354
|
+
state.prevWasFunctionName = false;
|
|
355
|
+
continue;
|
|
356
|
+
}
|
|
357
|
+
else {
|
|
358
|
+
if (state.hintContent.length > 0) {
|
|
359
|
+
const lastElement = state.hintContent[state.hintContent.length - 1];
|
|
360
|
+
const needsSpace = lastElement !== '(' && lastElement !== ' ' &&
|
|
361
|
+
text !== ')' && text !== ',';
|
|
362
|
+
if (needsSpace)
|
|
363
|
+
state.hintContent.push(' ');
|
|
364
|
+
}
|
|
365
|
+
state.hintContent.push(text);
|
|
366
|
+
continue;
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
// Skip AS tokens in table alias context (style: table aliases have no AS)
|
|
370
|
+
if (analysis.tableAliasAsTokens.has(tokenIndex)) {
|
|
371
|
+
continue;
|
|
372
|
+
}
|
|
373
|
+
// Get context from analysis
|
|
374
|
+
const ctx = getTokenContext(tokenIndex, analysis);
|
|
375
|
+
// Compact query tracking: each subquery level is evaluated independently
|
|
376
|
+
// When we hit a SELECT, check if THAT query is compact and push to stack
|
|
377
|
+
const simpleQueryInfo = analysis.simpleQueries.get(tokenIndex);
|
|
378
|
+
if (symbolicName === 'SELECT' && ctx.isClauseStart) {
|
|
379
|
+
const isThisQueryCompact = compactQueries.has(tokenIndex);
|
|
380
|
+
// Push compact state for this query level
|
|
381
|
+
state.compactQueryStack.push({
|
|
382
|
+
isCompact: isThisQueryCompact,
|
|
383
|
+
depth: state.subqueryDepth
|
|
384
|
+
});
|
|
385
|
+
}
|
|
386
|
+
// Pop compact query state when we exit a subquery (depth decreases)
|
|
387
|
+
while (state.compactQueryStack.length > 0 &&
|
|
388
|
+
state.compactQueryStack[state.compactQueryStack.length - 1].depth > state.subqueryDepth) {
|
|
389
|
+
state.compactQueryStack.pop();
|
|
390
|
+
}
|
|
391
|
+
// Also pop on semicolon (statement end at depth 0)
|
|
392
|
+
if (text === ';' && state.subqueryDepth === 0 && state.compactQueryStack.length > 0) {
|
|
393
|
+
state.compactQueryStack.pop();
|
|
394
|
+
}
|
|
395
|
+
// Current query is compact if the top of the stack says so
|
|
396
|
+
const inCompactQuery = state.compactQueryStack.length > 0 &&
|
|
397
|
+
state.compactQueryStack[state.compactQueryStack.length - 1].isCompact;
|
|
398
|
+
// Get multi-arg function info
|
|
399
|
+
const multiArgFuncInfo = analysis.multiArgFunctionInfo.get(tokenIndex);
|
|
400
|
+
const windowDefInfo = analysis.windowDefInfo.get(tokenIndex);
|
|
401
|
+
const pivotInfoLookup = analysis.pivotInfo.get(tokenIndex);
|
|
402
|
+
// Check expanded function state
|
|
403
|
+
const currentFunc = expandedFuncs.current();
|
|
404
|
+
const isExpandedFunctionComma = expandedFuncs.isComma(tokenIndex);
|
|
405
|
+
const isExpandedFunctionCloseParen = expandedFuncs.isCloseParen(tokenIndex);
|
|
406
|
+
// Check expanded window state
|
|
407
|
+
const isExpandedWindowOrderBy = currentExpandedWindow?.orderByTokenIndex === tokenIndex;
|
|
408
|
+
const isExpandedWindowFrame = currentExpandedWindow?.windowFrameTokenIndex === tokenIndex;
|
|
409
|
+
const isExpandedWindowCloseParen = currentExpandedWindow?.closeParenIndex === tokenIndex;
|
|
410
|
+
// Check expanded pivot state
|
|
411
|
+
const isExpandedPivotAggregateComma = currentExpandedPivot?.aggregateCommaIndices.has(tokenIndex) ?? false;
|
|
412
|
+
const isExpandedPivotForKeyword = currentExpandedPivot?.forKeywordIndex === tokenIndex;
|
|
413
|
+
const isExpandedPivotInKeyword = currentExpandedPivot?.inKeywordIndex === tokenIndex;
|
|
414
|
+
// Don't use comma-first expansion for PIVOT IN lists - let IN list wrapping handle it
|
|
415
|
+
const isExpandedPivotInListComma = false; // Disabled - use IN list wrapping instead
|
|
416
|
+
const isExpandedPivotCloseParen = currentExpandedPivot?.closeParenIndex === tokenIndex;
|
|
417
|
+
// Detect unary operator
|
|
418
|
+
const currentTokenIsUnaryOperator = isUnaryOperator(text, state.prevTokenText, state.prevTokenType);
|
|
419
|
+
// Get next token type for lookahead (skip WS tokens)
|
|
420
|
+
let nextTokenType = null;
|
|
421
|
+
for (let j = i + 1; j < tokenList.length; j++) {
|
|
422
|
+
const nextToken = tokenList[j];
|
|
423
|
+
if (nextToken.type !== SqlBaseLexer.WS &&
|
|
424
|
+
nextToken.type !== SqlBaseLexer.SIMPLE_COMMENT &&
|
|
425
|
+
nextToken.type !== SqlBaseLexer.BRACKETED_COMMENT) {
|
|
426
|
+
nextTokenType = nextToken.type;
|
|
427
|
+
break;
|
|
428
|
+
}
|
|
429
|
+
}
|
|
430
|
+
// Determine output text
|
|
431
|
+
const outputText = determineOutputText(tokenIndex, tokenType, text, symbolicName, ctx, analysis, nextTokenType);
|
|
432
|
+
// Check for function-like keyword
|
|
433
|
+
const isBuiltInFunctionKeyword = isFunctionLikeKeyword(tokenType, text);
|
|
434
|
+
// Track function argument depth
|
|
435
|
+
if (text === '(' && (state.prevWasFunctionName || state.prevWasBuiltInFunctionKeyword)) {
|
|
436
|
+
state.insideFunctionArgs++;
|
|
437
|
+
}
|
|
438
|
+
else if (text === ')' && state.insideFunctionArgs > 0) {
|
|
439
|
+
state.insideFunctionArgs--;
|
|
440
|
+
}
|
|
441
|
+
// Track paren depth
|
|
442
|
+
if (text === '(')
|
|
443
|
+
state.insideParens++;
|
|
444
|
+
else if (text === ')' && state.insideParens > 0)
|
|
445
|
+
state.insideParens--;
|
|
446
|
+
// Track complex type depth for ARRAY<>, MAP<>, STRUCT<>
|
|
447
|
+
// These use < and > instead of () for type parameters
|
|
448
|
+
// Note: We increment depth BEFORE processing (for opening <) but decrement AFTER (for closing >)
|
|
449
|
+
const prevSymName = state.prevTokenType >= 0 ? getSymbolicName(state.prevTokenType) : null;
|
|
450
|
+
const prevWasComplexTypeKeyword = prevSymName === 'ARRAY' || prevSymName === 'MAP' || prevSymName === 'STRUCT';
|
|
451
|
+
const wasInsideComplexType = state.complexTypeDepth > 0;
|
|
452
|
+
if (text === '<' && (prevWasComplexTypeKeyword || state.complexTypeDepth > 0)) {
|
|
453
|
+
state.complexTypeDepth++;
|
|
454
|
+
}
|
|
455
|
+
// Store if we should decrement after output (for closing >)
|
|
456
|
+
const shouldDecrementComplexTypeAfter = text === '>' && state.complexTypeDepth > 0;
|
|
457
|
+
// Track IN list wrapping - check if we're entering an IN list
|
|
458
|
+
const inListInfo = analysis.inListInfo.get(tokenIndex);
|
|
459
|
+
// Check if we're exiting an IN list
|
|
460
|
+
if (activeInList && tokenIndex === activeInList.closeParenIndex) {
|
|
461
|
+
// Exiting the IN list
|
|
462
|
+
activeInList = null;
|
|
463
|
+
}
|
|
464
|
+
// Handle AS keyword insertion
|
|
465
|
+
if (analysis.aliasInsertPositions.has(tokenIndex)) {
|
|
466
|
+
builder.addSpaceIfNeeded();
|
|
467
|
+
builder.push('AS');
|
|
468
|
+
}
|
|
469
|
+
// Determine newlines and indent
|
|
470
|
+
const { needsNewline, indent } = determineNewlineAndIndent(tokenIndex, text, symbolicName, ctx, analysis, state, expandedFuncs, currentExpandedWindow, currentExpandedPivot, isExpandedFunctionComma, isExpandedFunctionCloseParen, isExpandedWindowOrderBy, isExpandedWindowFrame, isExpandedWindowCloseParen, isExpandedPivotAggregateComma, isExpandedPivotForKeyword, isExpandedPivotInKeyword, isExpandedPivotInListComma, isExpandedPivotCloseParen, inCompactQuery, isShortSetOperation, isShortValues);
|
|
471
|
+
// Handle list commas - look ahead for comments
|
|
472
|
+
if (ctx.isListComma && state.insideFunctionArgs === 0) {
|
|
473
|
+
const nextIdx = findNextNonWsTokenIndex(i + 1);
|
|
474
|
+
if (nextIdx > 0) {
|
|
475
|
+
collectComments(i + 1, nextIdx);
|
|
476
|
+
lastProcessedIndex = nextIdx - 1;
|
|
477
|
+
}
|
|
478
|
+
}
|
|
479
|
+
// Similar look-ahead for other comma types
|
|
480
|
+
if (ctx.isCteComma || ctx.isDdlComma || ctx.isValuesComma || ctx.isSetComma || isExpandedFunctionComma) {
|
|
481
|
+
const nextIdx = findNextNonWsTokenIndex(i + 1);
|
|
482
|
+
if (nextIdx > 0) {
|
|
483
|
+
collectComments(i + 1, nextIdx);
|
|
484
|
+
lastProcessedIndex = nextIdx - 1;
|
|
485
|
+
}
|
|
486
|
+
}
|
|
487
|
+
// Apply spacing/newlines
|
|
488
|
+
if (needsNewline) {
|
|
489
|
+
outputWithNewline(builder, comments, indent, state);
|
|
490
|
+
}
|
|
491
|
+
else {
|
|
492
|
+
outputWithoutNewline(builder, comments, text, symbolicName, state, currentTokenIsUnaryOperator, ctx.isLateralViewComma);
|
|
493
|
+
}
|
|
494
|
+
builder.push(outputText);
|
|
495
|
+
// Handle IN list wrapping: after outputting a comma in an IN list,
|
|
496
|
+
// check if the next item would exceed line width
|
|
497
|
+
if (activeInList && activeInList.commaIndices.has(tokenIndex) && text === ',') {
|
|
498
|
+
// Look ahead to estimate the length of the next item
|
|
499
|
+
const nextItemLength = estimateNextInListItemLength(tokenList, i, findNextNonWsTokenIndex, activeInList.closeParenIndex);
|
|
500
|
+
const currentCol = builder.getColumn();
|
|
501
|
+
// Add 1 for the space after comma
|
|
502
|
+
if (currentCol + 1 + nextItemLength > MAX_LINE_WIDTH) {
|
|
503
|
+
// Wrap to new line with indent
|
|
504
|
+
builder.push('\n' + ' '.repeat(activeInList.wrapIndent));
|
|
505
|
+
state.justOutputInListWrapNewline = true;
|
|
506
|
+
}
|
|
507
|
+
}
|
|
508
|
+
// Activate IN list tracking AFTER we push the opening paren
|
|
509
|
+
if (inListInfo && text === '(') {
|
|
510
|
+
let wrapIndent = builder.getColumn(); // Column right after the (
|
|
511
|
+
// If wrap indent exceeds 60% of line width, fall back to current indent + 4
|
|
512
|
+
const maxWrapIndent = Math.floor(MAX_LINE_WIDTH * 0.6); // 84 chars
|
|
513
|
+
if (wrapIndent > maxWrapIndent) {
|
|
514
|
+
// Find current line's base indent (position of first non-space on this line)
|
|
515
|
+
// Since we just pushed '(', go back to find the line start indent
|
|
516
|
+
const currentOutput = builder.toString();
|
|
517
|
+
const lastNewline = currentOutput.lastIndexOf('\n');
|
|
518
|
+
const lineStart = lastNewline >= 0 ? currentOutput.slice(lastNewline + 1) : currentOutput;
|
|
519
|
+
const baseIndentMatch = lineStart.match(/^(\s*)/);
|
|
520
|
+
const baseIndent = baseIndentMatch ? baseIndentMatch[1].length : 0;
|
|
521
|
+
wrapIndent = baseIndent + 4; // Fall back to base indent + 1 indent level
|
|
522
|
+
}
|
|
523
|
+
activeInList = {
|
|
524
|
+
wrapIndent,
|
|
525
|
+
closeParenIndex: inListInfo.closeParenIndex,
|
|
526
|
+
commaIndices: new Set(inListInfo.commaIndices),
|
|
527
|
+
};
|
|
528
|
+
}
|
|
529
|
+
// Handle multi-WHEN CASE newline after CASE or after value expression
|
|
530
|
+
// For searchedCase (CASE WHEN ...), newline goes after CASE
|
|
531
|
+
// For simpleCase (CASE x WHEN ...), newline goes after value expression
|
|
532
|
+
if (analysis.multiWhenCaseTokens.has(tokenIndex)) {
|
|
533
|
+
// Check if this CASE has a value expression (simpleCase)
|
|
534
|
+
// If so, we'll add the newline after the value, not here
|
|
535
|
+
const isSimpleCase = analysis.simpleCaseTokens?.has(tokenIndex);
|
|
536
|
+
if (!isSimpleCase) {
|
|
537
|
+
// searchedCase - newline right after CASE
|
|
538
|
+
builder.push('\n');
|
|
539
|
+
}
|
|
540
|
+
state.caseDepth++;
|
|
541
|
+
}
|
|
542
|
+
// For simpleCase, add newline after the value expression
|
|
543
|
+
if (analysis.simpleCaseValueEndTokens?.has(tokenIndex)) {
|
|
544
|
+
builder.push('\n');
|
|
545
|
+
}
|
|
546
|
+
// Track subquery depth changes
|
|
547
|
+
if (ctx.isSubqueryOpenParen)
|
|
548
|
+
state.subqueryDepth++;
|
|
549
|
+
else if (ctx.isSubqueryCloseParen && state.subqueryDepth > 0)
|
|
550
|
+
state.subqueryDepth--;
|
|
551
|
+
// Track DDL depth
|
|
552
|
+
if (ctx.isDdlOpenParen && ctx.isDdlMultiColumn) {
|
|
553
|
+
builder.push('\n' + ' '.repeat(state.subqueryDepth + 1));
|
|
554
|
+
state.ddlDepth++;
|
|
555
|
+
}
|
|
556
|
+
else if (ctx.isDdlCloseParen && state.ddlDepth > 0) {
|
|
557
|
+
state.ddlDepth--;
|
|
558
|
+
}
|
|
559
|
+
// Handle multi-arg function expansion
|
|
560
|
+
// Check if this token is force-inline (either line-based legacy or grammar-driven)
|
|
561
|
+
const tokenLine = allOrigTokens[i]?.line || 0;
|
|
562
|
+
const lineBasedForceCollapse = formatDirectives.collapsedLines.has(tokenLine);
|
|
563
|
+
const grammarBasedForceCollapse = isForceInlineOpen(tokenIndex, forceInlineRanges);
|
|
564
|
+
const forceCollapse = lineBasedForceCollapse || grammarBasedForceCollapse;
|
|
565
|
+
if (multiArgFuncInfo && !forceCollapse && shouldExpandFunction(builder.getColumn(), multiArgFuncInfo)) {
|
|
566
|
+
handleFunctionExpansion(builder, expandedFuncs, multiArgFuncInfo, tokenList, i, findNextNonWsTokenIndex, analysis, state);
|
|
567
|
+
}
|
|
568
|
+
// Handle window expansion (pass multiArgFunctionInfo to check nested function expansion)
|
|
569
|
+
if (windowDefInfo && !forceCollapse && shouldExpandWindow(builder.getColumn(), windowDefInfo, analysis.multiArgFunctionInfo)) {
|
|
570
|
+
currentExpandedWindow = {
|
|
571
|
+
closeParenIndex: windowDefInfo.closeParenIndex,
|
|
572
|
+
orderByTokenIndex: windowDefInfo.orderByTokenIndex,
|
|
573
|
+
windowFrameTokenIndex: windowDefInfo.windowFrameTokenIndex,
|
|
574
|
+
baseDepth: state.subqueryDepth
|
|
575
|
+
};
|
|
576
|
+
const newIndent = '\n' + ' '.repeat(indentCalc.getWindowContentIndent(state.subqueryDepth));
|
|
577
|
+
builder.push(newIndent);
|
|
578
|
+
state.justOutputWindowNewline = true;
|
|
579
|
+
}
|
|
580
|
+
// Handle PIVOT/UNPIVOT expansion
|
|
581
|
+
if (pivotInfoLookup && !forceCollapse && shouldExpandPivot(builder.getColumn(), pivotInfoLookup)) {
|
|
582
|
+
currentExpandedPivot = {
|
|
583
|
+
closeParenIndex: pivotInfoLookup.closeParenIndex,
|
|
584
|
+
aggregateCommaIndices: new Set(pivotInfoLookup.aggregateCommaIndices),
|
|
585
|
+
forKeywordIndex: pivotInfoLookup.forKeywordIndex,
|
|
586
|
+
inKeywordIndex: pivotInfoLookup.inKeywordIndex,
|
|
587
|
+
inListCommaIndices: new Set(pivotInfoLookup.inListCommaIndices),
|
|
588
|
+
depth: state.subqueryDepth,
|
|
589
|
+
openingColumn: builder.getColumn() - 1
|
|
590
|
+
};
|
|
591
|
+
// Output newline after opening paren
|
|
592
|
+
const pivotIndent = '\n' + ' '.repeat(indentCalc.getPivotContentIndent(state.subqueryDepth));
|
|
593
|
+
builder.push(pivotIndent);
|
|
594
|
+
state.justOutputPivotNewline = true;
|
|
595
|
+
}
|
|
596
|
+
// Pop expanded function on close paren
|
|
597
|
+
if (isExpandedFunctionCloseParen && !expandedFuncs.isEmpty()) {
|
|
598
|
+
expandedFuncs.pop();
|
|
599
|
+
}
|
|
600
|
+
// Clear expanded window on close paren
|
|
601
|
+
if (isExpandedWindowCloseParen && currentExpandedWindow) {
|
|
602
|
+
currentExpandedWindow = null;
|
|
603
|
+
}
|
|
604
|
+
// Clear expanded pivot on close paren
|
|
605
|
+
if (isExpandedPivotCloseParen && currentExpandedPivot) {
|
|
606
|
+
currentExpandedPivot = null;
|
|
607
|
+
}
|
|
608
|
+
// Reset flags
|
|
609
|
+
if (state.justOutputMultiArgFunctionNewline && text !== ',' && text !== '(') {
|
|
610
|
+
state.justOutputMultiArgFunctionNewline = false;
|
|
611
|
+
}
|
|
612
|
+
if (state.justOutputWindowNewline && text !== '(' && text !== ',') {
|
|
613
|
+
state.justOutputWindowNewline = false;
|
|
614
|
+
}
|
|
615
|
+
if (state.justOutputPivotNewline && text !== '(' && text !== ',') {
|
|
616
|
+
state.justOutputPivotNewline = false;
|
|
617
|
+
}
|
|
618
|
+
if (state.justOutputInListWrapNewline && text !== ',') {
|
|
619
|
+
state.justOutputInListWrapNewline = false;
|
|
620
|
+
}
|
|
621
|
+
if (state.justOutputCommaFirstStyle && text !== ',') {
|
|
622
|
+
state.justOutputCommaFirstStyle = false;
|
|
623
|
+
}
|
|
624
|
+
// Decrease CASE depth after END
|
|
625
|
+
if (analysis.caseEndTokens.has(tokenIndex) && state.caseDepth > 0) {
|
|
626
|
+
state.caseDepth--;
|
|
627
|
+
}
|
|
628
|
+
// Decrement complex type depth after outputting closing >
|
|
629
|
+
if (shouldDecrementComplexTypeAfter) {
|
|
630
|
+
state.complexTypeDepth--;
|
|
631
|
+
}
|
|
632
|
+
// Reset clause flags
|
|
633
|
+
updateClauseFlags(symbolicName, ctx, state);
|
|
634
|
+
// Check if this token is a partition transform function (followed by paren)
|
|
635
|
+
const partitionTransformFunctions = new Set([
|
|
636
|
+
'BUCKET', 'TRUNCATE',
|
|
637
|
+
'YEAR', 'YEARS', 'MONTH', 'MONTHS',
|
|
638
|
+
'DAY', 'DAYS', 'HOUR', 'HOURS',
|
|
639
|
+
]);
|
|
640
|
+
const isPartitionTransformFunc = partitionTransformFunctions.has(text.toUpperCase()) &&
|
|
641
|
+
nextTokenType !== null && getSymbolicName(nextTokenType) === 'LEFT_PAREN';
|
|
642
|
+
// Update previous token tracking
|
|
643
|
+
state.prevWasFunctionName = ctx.isFunctionCall || isPartitionTransformFunc;
|
|
644
|
+
state.prevWasBuiltInFunctionKeyword = isBuiltInFunctionKeyword;
|
|
645
|
+
state.isFirstNonWsToken = false;
|
|
646
|
+
state.prevTokenWasUnaryOperator = currentTokenIsUnaryOperator;
|
|
647
|
+
state.prevTokenText = text;
|
|
648
|
+
state.prevTokenType = tokenType;
|
|
649
|
+
}
|
|
650
|
+
// Output remaining comments
|
|
651
|
+
if (comments.hasPending()) {
|
|
652
|
+
outputComments(builder, comments.getPending());
|
|
653
|
+
}
|
|
654
|
+
return builder.toString();
|
|
655
|
+
}
|
|
656
|
+
// ============================================================================
|
|
657
|
+
// HELPER FUNCTIONS
|
|
658
|
+
// ============================================================================
|
|
659
|
+
/**
|
|
660
|
+
* Check if a token is a comma inside an IN list.
|
|
661
|
+
* Used to prevent IN list commas from being treated as regular list commas.
|
|
662
|
+
*/
|
|
663
|
+
function isInListComma(tokenIndex, analysis) {
|
|
664
|
+
for (const [, info] of analysis.inListInfo) {
|
|
665
|
+
if (info.commaIndices.includes(tokenIndex)) {
|
|
666
|
+
return true;
|
|
667
|
+
}
|
|
668
|
+
}
|
|
669
|
+
return false;
|
|
670
|
+
}
|
|
671
|
+
/**
|
|
672
|
+
* Scan tokens for fmt:inline comments and find their enclosing expressions.
|
|
673
|
+
* Returns an array of ForceInlineRange for expressions that should not be expanded.
|
|
674
|
+
*
|
|
675
|
+
* The approach:
|
|
676
|
+
* 1. Find all comment tokens that contain fmt:inline
|
|
677
|
+
* 2. For each such comment, find the immediately preceding token (or same position)
|
|
678
|
+
* 3. Check if that token is within any multi-arg function, window def, or pivot
|
|
679
|
+
* 4. If so, add that construct's token range to the force-inline ranges
|
|
680
|
+
*/
|
|
681
|
+
function findForceInlineRanges(allOrigTokens, analysis) {
|
|
682
|
+
const ranges = [];
|
|
683
|
+
const addedRanges = new Set(); // Avoid duplicates: "open-close"
|
|
684
|
+
// Helper to add a range if not already added
|
|
685
|
+
const addRange = (openIdx, closeIdx) => {
|
|
686
|
+
const key = `${openIdx}-${closeIdx}`;
|
|
687
|
+
if (!addedRanges.has(key)) {
|
|
688
|
+
addedRanges.add(key);
|
|
689
|
+
ranges.push({ openTokenIndex: openIdx, closeTokenIndex: closeIdx });
|
|
690
|
+
}
|
|
691
|
+
};
|
|
692
|
+
// Scan all tokens for fmt:inline comments
|
|
693
|
+
for (let i = 0; i < allOrigTokens.length; i++) {
|
|
694
|
+
const token = allOrigTokens[i];
|
|
695
|
+
if (!token)
|
|
696
|
+
continue;
|
|
697
|
+
// Check if this is a comment with fmt:inline
|
|
698
|
+
if (token.type === SqlBaseLexer.SIMPLE_COMMENT ||
|
|
699
|
+
token.type === SqlBaseLexer.BRACKETED_COMMENT) {
|
|
700
|
+
if (isFmtInlineComment(token.text || '')) {
|
|
701
|
+
// Found a fmt:inline comment at token index i
|
|
702
|
+
// Find the closest preceding non-WS, non-comment token
|
|
703
|
+
let precedingTokenIdx = i - 1;
|
|
704
|
+
while (precedingTokenIdx >= 0) {
|
|
705
|
+
const prevToken = allOrigTokens[precedingTokenIdx];
|
|
706
|
+
if (prevToken &&
|
|
707
|
+
prevToken.type !== SqlBaseLexer.WS &&
|
|
708
|
+
prevToken.type !== SqlBaseLexer.SIMPLE_COMMENT &&
|
|
709
|
+
prevToken.type !== SqlBaseLexer.BRACKETED_COMMENT) {
|
|
710
|
+
break;
|
|
711
|
+
}
|
|
712
|
+
precedingTokenIdx--;
|
|
713
|
+
}
|
|
714
|
+
// Now find which expression (if any) contains this position
|
|
715
|
+
// Check multi-arg functions
|
|
716
|
+
for (const [openIdx, info] of analysis.multiArgFunctionInfo) {
|
|
717
|
+
if (precedingTokenIdx >= openIdx && precedingTokenIdx <= info.closeParenIndex) {
|
|
718
|
+
addRange(openIdx, info.closeParenIndex);
|
|
719
|
+
}
|
|
720
|
+
// Also check if comment is right after close paren (common placement)
|
|
721
|
+
if (precedingTokenIdx === info.closeParenIndex) {
|
|
722
|
+
addRange(openIdx, info.closeParenIndex);
|
|
723
|
+
}
|
|
724
|
+
}
|
|
725
|
+
// Check window definitions
|
|
726
|
+
for (const [openIdx, info] of analysis.windowDefInfo) {
|
|
727
|
+
if (precedingTokenIdx >= openIdx && precedingTokenIdx <= info.closeParenIndex) {
|
|
728
|
+
addRange(openIdx, info.closeParenIndex);
|
|
729
|
+
}
|
|
730
|
+
if (precedingTokenIdx === info.closeParenIndex) {
|
|
731
|
+
addRange(openIdx, info.closeParenIndex);
|
|
732
|
+
}
|
|
733
|
+
}
|
|
734
|
+
// Check PIVOT/UNPIVOT
|
|
735
|
+
for (const [openIdx, info] of analysis.pivotInfo) {
|
|
736
|
+
if (precedingTokenIdx >= openIdx && precedingTokenIdx <= info.closeParenIndex) {
|
|
737
|
+
addRange(openIdx, info.closeParenIndex);
|
|
738
|
+
}
|
|
739
|
+
if (precedingTokenIdx === info.closeParenIndex) {
|
|
740
|
+
addRange(openIdx, info.closeParenIndex);
|
|
741
|
+
}
|
|
742
|
+
}
|
|
743
|
+
}
|
|
744
|
+
}
|
|
745
|
+
}
|
|
746
|
+
return ranges;
|
|
747
|
+
}
|
|
748
|
+
/**
|
|
749
|
+
* Check if a token index is the opening of a force-inline expression.
|
|
750
|
+
*/
|
|
751
|
+
function isForceInlineOpen(tokenIndex, ranges) {
|
|
752
|
+
return ranges.some(r => r.openTokenIndex === tokenIndex);
|
|
753
|
+
}
|
|
754
|
+
/**
|
|
755
|
+
* Estimate the length of the next item in an IN list.
|
|
756
|
+
* Looks ahead from the current comma to find the next comma or close paren.
|
|
757
|
+
*/
|
|
758
|
+
function estimateNextInListItemLength(tokenList, currentIndex, findNextNonWsTokenIndex, closeParenIndex) {
|
|
759
|
+
let length = 0;
|
|
760
|
+
let idx = findNextNonWsTokenIndex(currentIndex + 1);
|
|
761
|
+
let depth = 0;
|
|
762
|
+
while (idx >= 0 && idx < tokenList.length) {
|
|
763
|
+
const token = tokenList[idx];
|
|
764
|
+
const tokenIndex = token.tokenIndex;
|
|
765
|
+
const text = token.text || '';
|
|
766
|
+
const symName = SqlBaseLexer.symbolicNames[token.type];
|
|
767
|
+
// Stop at the close paren of the IN list
|
|
768
|
+
if (tokenIndex === closeParenIndex) {
|
|
769
|
+
break;
|
|
770
|
+
}
|
|
771
|
+
// Track nested parens
|
|
772
|
+
if (symName === 'LEFT_PAREN') {
|
|
773
|
+
depth++;
|
|
774
|
+
length += text.length;
|
|
775
|
+
}
|
|
776
|
+
else if (symName === 'RIGHT_PAREN') {
|
|
777
|
+
if (depth > 0) {
|
|
778
|
+
depth--;
|
|
779
|
+
length += text.length;
|
|
780
|
+
}
|
|
781
|
+
else {
|
|
782
|
+
break; // Reached closing paren
|
|
783
|
+
}
|
|
784
|
+
}
|
|
785
|
+
else if (symName === 'COMMA' && depth === 0) {
|
|
786
|
+
// Found the next comma at top level - this is the end of the item
|
|
787
|
+
break;
|
|
788
|
+
}
|
|
789
|
+
else {
|
|
790
|
+
length += text.length;
|
|
791
|
+
// Add space between tokens (rough estimate)
|
|
792
|
+
length += 1;
|
|
793
|
+
}
|
|
794
|
+
idx = findNextNonWsTokenIndex(idx + 1);
|
|
795
|
+
}
|
|
796
|
+
return length;
|
|
797
|
+
}
|
|
798
|
+
/**
|
|
799
|
+
* Extract token context from analysis result.
|
|
800
|
+
*/
|
|
801
|
+
function getTokenContext(tokenIndex, analysis) {
|
|
802
|
+
return {
|
|
803
|
+
isInIdentifierContext: analysis.identifierTokens.has(tokenIndex),
|
|
804
|
+
isInQualifiedName: analysis.qualifiedNameTokens.has(tokenIndex),
|
|
805
|
+
isFunctionCall: analysis.functionCallTokens.has(tokenIndex),
|
|
806
|
+
isClauseStart: analysis.clauseStartTokens.has(tokenIndex),
|
|
807
|
+
isListComma: analysis.listItemCommas.has(tokenIndex),
|
|
808
|
+
isConditionOperator: analysis.conditionOperators.has(tokenIndex),
|
|
809
|
+
isBetweenAnd: analysis.betweenAndTokens.has(tokenIndex),
|
|
810
|
+
isJoinOn: analysis.joinOnTokens.has(tokenIndex),
|
|
811
|
+
isSubqueryOpenParen: analysis.subqueryOpenParens.has(tokenIndex),
|
|
812
|
+
isSubqueryCloseParen: analysis.subqueryCloseParens.has(tokenIndex),
|
|
813
|
+
isSetOperandParen: analysis.setOperandParens.has(tokenIndex),
|
|
814
|
+
isCteComma: analysis.cteCommas.has(tokenIndex),
|
|
815
|
+
isCteMainSelect: analysis.cteMainSelectTokens.has(tokenIndex),
|
|
816
|
+
isDdlComma: analysis.ddlColumnCommas.has(tokenIndex),
|
|
817
|
+
isDdlOpenParen: analysis.ddlOpenParens.has(tokenIndex),
|
|
818
|
+
isDdlCloseParen: analysis.ddlCloseParens.has(tokenIndex),
|
|
819
|
+
isDdlMultiColumn: analysis.ddlMultiColumn.has(tokenIndex),
|
|
820
|
+
isValuesComma: analysis.valuesCommas.has(tokenIndex),
|
|
821
|
+
isSetComma: analysis.setClauseCommas.has(tokenIndex),
|
|
822
|
+
isSetKeyword: tokenIndex === analysis.setKeywordToken,
|
|
823
|
+
isLateralViewComma: analysis.lateralViewCommas.has(tokenIndex),
|
|
824
|
+
isMergeUsing: analysis.mergeUsingTokens.has(tokenIndex),
|
|
825
|
+
isMergeOn: analysis.mergeOnTokens.has(tokenIndex),
|
|
826
|
+
isMergeWhen: analysis.mergeWhenTokens.has(tokenIndex),
|
|
827
|
+
};
|
|
828
|
+
}
|
|
829
|
+
/**
|
|
830
|
+
* Determine the output text for a token (casing rules).
|
|
831
|
+
*/
|
|
832
|
+
function determineOutputText(tokenIndex, tokenType, text, symbolicName, ctx, analysis, nextTokenType // Added: peek at next token
|
|
833
|
+
) {
|
|
834
|
+
// SET config tokens - preserve casing
|
|
835
|
+
if (analysis.setConfigTokens.has(tokenIndex)) {
|
|
836
|
+
return text;
|
|
837
|
+
}
|
|
838
|
+
// GROUP BY ALL - uppercase
|
|
839
|
+
if (analysis.groupByAllTokens.has(tokenIndex)) {
|
|
840
|
+
return text.toUpperCase();
|
|
841
|
+
}
|
|
842
|
+
// Function call context
|
|
843
|
+
if (ctx.isFunctionCall) {
|
|
844
|
+
const funcLower = text.toLowerCase();
|
|
845
|
+
const isBuiltIn = SPARK_BUILTIN_FUNCTIONS.has(funcLower) || isKeywordToken(tokenType, text);
|
|
846
|
+
return isBuiltIn ? text.toUpperCase() : text;
|
|
847
|
+
}
|
|
848
|
+
// Structural keywords that should always be uppercase, even in identifier contexts.
|
|
849
|
+
// These are syntactic markers, not actual identifier names.
|
|
850
|
+
// e.g., "LATERAL VIEW EXPLODE(arr) AS item" - AS is a keyword, not an identifier.
|
|
851
|
+
const structuralKeywords = new Set(['AS', 'ON', 'AND', 'OR', 'IN', 'FOR', 'USING']);
|
|
852
|
+
if (symbolicName && structuralKeywords.has(symbolicName)) {
|
|
853
|
+
return text.toUpperCase();
|
|
854
|
+
}
|
|
855
|
+
// Extension keywords: Should always be uppercase, even in identifier context.
|
|
856
|
+
// Keywords not in Spark grammar (Delta Lake extensions).
|
|
857
|
+
const extensionKeywords = new Set([
|
|
858
|
+
// Spark SQL extensions not in grammar
|
|
859
|
+
'SYSTEM', // SHOW SYSTEM FUNCTIONS
|
|
860
|
+
'NOSCAN', // ANALYZE TABLE ... NOSCAN
|
|
861
|
+
// Delta Lake keywords (none are in the Apache Spark grammar)
|
|
862
|
+
'VACUUM', 'RETAIN',
|
|
863
|
+
'RESTORE',
|
|
864
|
+
'CLONE', 'SHALLOW', 'DEEP',
|
|
865
|
+
'OPTIMIZE', 'ZORDER',
|
|
866
|
+
]);
|
|
867
|
+
const textUpper = text.toUpperCase();
|
|
868
|
+
if (extensionKeywords.has(textUpper)) {
|
|
869
|
+
return textUpper;
|
|
870
|
+
}
|
|
871
|
+
// Partition transform functions: uppercase only when followed by '('
|
|
872
|
+
// These are grammar keywords but appear as transformName=identifier in grammar.
|
|
873
|
+
// When used as column names (not followed by '('), they should preserve casing.
|
|
874
|
+
// e.g., "PARTITIONED BY (bucket(3, col))" - BUCKET uppercase
|
|
875
|
+
// e.g., "SELECT year FROM t" - year lowercase (it's a column name)
|
|
876
|
+
const partitionTransformFunctions = new Set([
|
|
877
|
+
'BUCKET', 'TRUNCATE',
|
|
878
|
+
'YEAR', 'YEARS', 'MONTH', 'MONTHS',
|
|
879
|
+
'DAY', 'DAYS', 'HOUR', 'HOURS',
|
|
880
|
+
]);
|
|
881
|
+
if (partitionTransformFunctions.has(textUpper)) {
|
|
882
|
+
// Check if next token is '(' (function call context)
|
|
883
|
+
const isFollowedByParen = nextTokenType !== null &&
|
|
884
|
+
getSymbolicName(nextTokenType) === 'LEFT_PAREN';
|
|
885
|
+
if (isFollowedByParen) {
|
|
886
|
+
return textUpper;
|
|
887
|
+
}
|
|
888
|
+
// Not followed by paren - treat as regular identifier, preserve casing
|
|
889
|
+
}
|
|
890
|
+
// Identifier context - preserve casing
|
|
891
|
+
// When a token is marked as identifier by the parse tree, it means the grammar
|
|
892
|
+
// is using it as an identifier (column name, table name, etc.), so preserve casing.
|
|
893
|
+
if (ctx.isInIdentifierContext) {
|
|
894
|
+
return text;
|
|
895
|
+
}
|
|
896
|
+
// Keyword - uppercase
|
|
897
|
+
if (isKeywordToken(tokenType, text)) {
|
|
898
|
+
return text.toUpperCase();
|
|
899
|
+
}
|
|
900
|
+
// Default - preserve
|
|
901
|
+
return text;
|
|
902
|
+
}
|
|
903
|
+
/**
|
|
904
|
+
* Determine if a newline and indent are needed before this token.
|
|
905
|
+
*/
|
|
906
|
+
function determineNewlineAndIndent(tokenIndex, text, symbolicName, ctx, analysis, state, expandedFuncs, currentExpandedWindow, currentExpandedPivot, isExpandedFunctionComma, isExpandedFunctionCloseParen, isExpandedWindowOrderBy, isExpandedWindowFrame, isExpandedWindowCloseParen, isExpandedPivotAggregateComma, isExpandedPivotForKeyword, isExpandedPivotInKeyword, isExpandedPivotInListComma, isExpandedPivotCloseParen, inCompactQuery, isShortSetOperation, isShortValues) {
|
|
907
|
+
let needsNewline = false;
|
|
908
|
+
let indent = '';
|
|
909
|
+
const baseIndent = indentCalc.getBaseIndent(state.subqueryDepth, state.ddlDepth);
|
|
910
|
+
// Clause state updates
|
|
911
|
+
if (symbolicName === 'SELECT' && ctx.isClauseStart) {
|
|
912
|
+
state.afterSelectKeyword = true;
|
|
913
|
+
state.isFirstListItem = true;
|
|
914
|
+
state.currentClauseIsMultiItem = analysis.multiItemClauses.has(tokenIndex);
|
|
915
|
+
}
|
|
916
|
+
else if (symbolicName === 'GROUP' && ctx.isClauseStart) {
|
|
917
|
+
state.afterGroupByKeyword = true;
|
|
918
|
+
state.isFirstListItem = true;
|
|
919
|
+
state.currentClauseIsMultiItem = analysis.multiItemClauses.has(tokenIndex);
|
|
920
|
+
}
|
|
921
|
+
else if (symbolicName === 'ORDER' && ctx.isClauseStart) {
|
|
922
|
+
state.afterOrderByKeyword = true;
|
|
923
|
+
state.isFirstListItem = true;
|
|
924
|
+
state.currentClauseIsMultiItem = analysis.multiItemClauses.has(tokenIndex);
|
|
925
|
+
}
|
|
926
|
+
else if (symbolicName === 'WHERE' && ctx.isClauseStart) {
|
|
927
|
+
if (analysis.multilineConditionClauses.has(tokenIndex)) {
|
|
928
|
+
state.afterWhereKeyword = true;
|
|
929
|
+
}
|
|
930
|
+
}
|
|
931
|
+
else if (symbolicName === 'HAVING' && ctx.isClauseStart) {
|
|
932
|
+
if (analysis.multilineConditionClauses.has(tokenIndex)) {
|
|
933
|
+
state.afterHavingKeyword = true;
|
|
934
|
+
}
|
|
935
|
+
}
|
|
936
|
+
else if (symbolicName === 'ON' && ctx.isJoinOn && !state.isFirstNonWsToken) {
|
|
937
|
+
needsNewline = true;
|
|
938
|
+
indent = indentCalc.getOnClauseIndent(state.subqueryDepth, state.ddlDepth);
|
|
939
|
+
}
|
|
940
|
+
else if (symbolicName === 'SET' && ctx.isSetKeyword) {
|
|
941
|
+
state.afterSetKeyword = true;
|
|
942
|
+
state.isFirstListItem = true;
|
|
943
|
+
state.currentClauseIsMultiItem = analysis.multiItemClauses.has(tokenIndex);
|
|
944
|
+
}
|
|
945
|
+
else if (symbolicName === 'VALUES') {
|
|
946
|
+
state.afterValuesKeyword = true;
|
|
947
|
+
state.isFirstListItem = true;
|
|
948
|
+
}
|
|
949
|
+
// CASE expression handling
|
|
950
|
+
// Nested multi-WHEN CASE after THEN should go to new line with extra indent
|
|
951
|
+
if (symbolicName === 'CASE' && analysis.multiWhenCaseTokens.has(tokenIndex) && state.prevTokenText === 'THEN') {
|
|
952
|
+
needsNewline = true;
|
|
953
|
+
// Nested CASE is indented 4 more than the current WHEN level
|
|
954
|
+
// caseDepth represents how many multi-WHEN CASEs we're inside (after their CASE keyword)
|
|
955
|
+
// So nested CASE indent = WHEN indent + 4 = base + 8 + (caseDepth-1)*4 + 4 = base + 8 + caseDepth*4
|
|
956
|
+
const nestingOffset = state.caseDepth * 4;
|
|
957
|
+
indent = indentCalc.getCaseWhenIndent(state.subqueryDepth, state.ddlDepth) + ' '.repeat(nestingOffset);
|
|
958
|
+
}
|
|
959
|
+
if (analysis.caseWhenTokens.has(tokenIndex)) {
|
|
960
|
+
needsNewline = true;
|
|
961
|
+
// WHEN/ELSE indent = base + 8 + (caseDepth-1)*4 for caseDepth >= 1
|
|
962
|
+
const nestingOffset = state.caseDepth > 0 ? (state.caseDepth - 1) * 4 : 0;
|
|
963
|
+
indent = indentCalc.getCaseWhenIndent(state.subqueryDepth, state.ddlDepth) + ' '.repeat(nestingOffset);
|
|
964
|
+
}
|
|
965
|
+
else if (analysis.caseElseTokens.has(tokenIndex)) {
|
|
966
|
+
needsNewline = true;
|
|
967
|
+
const nestingOffset = state.caseDepth > 0 ? (state.caseDepth - 1) * 4 : 0;
|
|
968
|
+
indent = indentCalc.getCaseWhenIndent(state.subqueryDepth, state.ddlDepth) + ' '.repeat(nestingOffset);
|
|
969
|
+
}
|
|
970
|
+
else if (analysis.caseEndTokens.has(tokenIndex)) {
|
|
971
|
+
needsNewline = true;
|
|
972
|
+
// END aligns with its CASE, which is 3 less than WHEN (getCaseEndIndent vs getCaseWhenIndent)
|
|
973
|
+
const nestingOffset = state.caseDepth > 0 ? (state.caseDepth - 1) * 4 : 0;
|
|
974
|
+
indent = indentCalc.getCaseEndIndent(state.subqueryDepth, state.ddlDepth) + ' '.repeat(nestingOffset);
|
|
975
|
+
}
|
|
976
|
+
// MERGE clause handling
|
|
977
|
+
if ((ctx.isMergeUsing || ctx.isMergeOn || ctx.isMergeWhen) && !state.isFirstNonWsToken) {
|
|
978
|
+
needsNewline = true;
|
|
979
|
+
indent = baseIndent;
|
|
980
|
+
}
|
|
981
|
+
// CTE main SELECT - always add newline after CTE block (per STYLE_GUIDE)
|
|
982
|
+
// This takes precedence over compact query logic because the CTE body may have expanded
|
|
983
|
+
if (ctx.isCteMainSelect && !state.isFirstNonWsToken) {
|
|
984
|
+
needsNewline = true;
|
|
985
|
+
indent = baseIndent;
|
|
986
|
+
}
|
|
987
|
+
// Clause start newline - SKIP if inside a compact query OR short set operation
|
|
988
|
+
if (!state.isFirstNonWsToken && ctx.isClauseStart && !ctx.isInIdentifierContext && !inCompactQuery && !isShortSetOperation) {
|
|
989
|
+
needsNewline = true;
|
|
990
|
+
indent = baseIndent;
|
|
991
|
+
}
|
|
992
|
+
// Set operation operand parens - SKIP if short set operation
|
|
993
|
+
if (ctx.isSetOperandParen && !state.isFirstNonWsToken && !isShortSetOperation) {
|
|
994
|
+
needsNewline = true;
|
|
995
|
+
indent = baseIndent;
|
|
996
|
+
}
|
|
997
|
+
// Subquery close paren - only add newline if NOT in a compact query
|
|
998
|
+
if (ctx.isSubqueryCloseParen && !inCompactQuery) {
|
|
999
|
+
needsNewline = true;
|
|
1000
|
+
indent = indentCalc.getBaseIndent(state.subqueryDepth - 1);
|
|
1001
|
+
}
|
|
1002
|
+
// DDL close paren
|
|
1003
|
+
if (ctx.isDdlCloseParen && state.ddlDepth > 0) {
|
|
1004
|
+
needsNewline = true;
|
|
1005
|
+
indent = ' '.repeat(state.subqueryDepth + state.ddlDepth - 1);
|
|
1006
|
+
}
|
|
1007
|
+
// Expanded function close paren
|
|
1008
|
+
if (isExpandedFunctionCloseParen && expandedFuncs.current()) {
|
|
1009
|
+
needsNewline = true;
|
|
1010
|
+
indent = ' '.repeat(indentCalc.getExpandedFunctionCloseIndent(expandedFuncs.current().depth));
|
|
1011
|
+
}
|
|
1012
|
+
// Expanded window handling
|
|
1013
|
+
// Skip if we just output a window expansion newline (don't double-newline)
|
|
1014
|
+
if (isExpandedWindowOrderBy && currentExpandedWindow && !state.justOutputWindowNewline) {
|
|
1015
|
+
needsNewline = true;
|
|
1016
|
+
indent = ' '.repeat(indentCalc.getWindowContentIndent(currentExpandedWindow.baseDepth));
|
|
1017
|
+
}
|
|
1018
|
+
if (isExpandedWindowFrame && currentExpandedWindow && !state.justOutputWindowNewline) {
|
|
1019
|
+
needsNewline = true;
|
|
1020
|
+
indent = ' '.repeat(indentCalc.getWindowContentIndent(currentExpandedWindow.baseDepth));
|
|
1021
|
+
}
|
|
1022
|
+
if (isExpandedWindowCloseParen && currentExpandedWindow) {
|
|
1023
|
+
needsNewline = true;
|
|
1024
|
+
indent = ' '.repeat(indentCalc.getWindowCloseIndent(currentExpandedWindow.baseDepth));
|
|
1025
|
+
}
|
|
1026
|
+
// Expanded PIVOT/UNPIVOT handling
|
|
1027
|
+
if (isExpandedPivotAggregateComma && currentExpandedPivot) {
|
|
1028
|
+
needsNewline = true;
|
|
1029
|
+
indent = ' '.repeat(indentCalc.getPivotCommaIndent(currentExpandedPivot.depth));
|
|
1030
|
+
state.justOutputCommaFirstStyle = true;
|
|
1031
|
+
}
|
|
1032
|
+
if (isExpandedPivotForKeyword && currentExpandedPivot) {
|
|
1033
|
+
needsNewline = true;
|
|
1034
|
+
indent = ' '.repeat(indentCalc.getPivotContentIndent(currentExpandedPivot.depth));
|
|
1035
|
+
}
|
|
1036
|
+
if (isExpandedPivotInListComma && currentExpandedPivot) {
|
|
1037
|
+
needsNewline = true;
|
|
1038
|
+
indent = ' '.repeat(indentCalc.getPivotCommaIndent(currentExpandedPivot.depth) + 4); // Extra indent for IN list
|
|
1039
|
+
state.justOutputCommaFirstStyle = true;
|
|
1040
|
+
}
|
|
1041
|
+
if (isExpandedPivotCloseParen && currentExpandedPivot) {
|
|
1042
|
+
needsNewline = true;
|
|
1043
|
+
indent = ' '.repeat(indentCalc.getPivotCloseIndent(currentExpandedPivot.depth));
|
|
1044
|
+
}
|
|
1045
|
+
// List comma handling - but NOT for IN list commas (those use wrap logic instead)
|
|
1046
|
+
// Also skip for commas inside complex types like MAP<STRING, INT>
|
|
1047
|
+
// Also skip for commas inside EXCEPT clause (column exclusion)
|
|
1048
|
+
const isExceptClauseToken = analysis.exceptClauseTokens.has(tokenIndex);
|
|
1049
|
+
if (ctx.isListComma && state.insideFunctionArgs === 0 && !isInListComma(tokenIndex, analysis) && state.complexTypeDepth === 0 && !isExceptClauseToken) {
|
|
1050
|
+
needsNewline = true;
|
|
1051
|
+
indent = indentCalc.getCommaIndent(state.subqueryDepth, state.ddlDepth);
|
|
1052
|
+
state.isFirstListItem = false;
|
|
1053
|
+
state.justOutputCommaFirstStyle = true;
|
|
1054
|
+
}
|
|
1055
|
+
// CTE comma
|
|
1056
|
+
if (ctx.isCteComma) {
|
|
1057
|
+
needsNewline = true;
|
|
1058
|
+
indent = '';
|
|
1059
|
+
state.justOutputCommaFirstStyle = true;
|
|
1060
|
+
}
|
|
1061
|
+
// DDL comma
|
|
1062
|
+
if (ctx.isDdlComma) {
|
|
1063
|
+
needsNewline = true;
|
|
1064
|
+
indent = indentCalc.getCommaIndent(state.subqueryDepth);
|
|
1065
|
+
state.justOutputCommaFirstStyle = true;
|
|
1066
|
+
}
|
|
1067
|
+
// VALUES comma - expand only if the VALUES statement is long
|
|
1068
|
+
if (ctx.isValuesComma && !isShortValues) {
|
|
1069
|
+
needsNewline = true;
|
|
1070
|
+
indent = baseIndent;
|
|
1071
|
+
state.justOutputCommaFirstStyle = true;
|
|
1072
|
+
}
|
|
1073
|
+
// SET comma
|
|
1074
|
+
if (ctx.isSetComma) {
|
|
1075
|
+
needsNewline = true;
|
|
1076
|
+
indent = indentCalc.getCommaIndent(state.subqueryDepth, state.ddlDepth);
|
|
1077
|
+
state.justOutputCommaFirstStyle = true;
|
|
1078
|
+
}
|
|
1079
|
+
// Expanded function comma
|
|
1080
|
+
if (isExpandedFunctionComma && expandedFuncs.current()) {
|
|
1081
|
+
needsNewline = true;
|
|
1082
|
+
indent = ' '.repeat(indentCalc.getExpandedFunctionCommaIndent(expandedFuncs.current().depth));
|
|
1083
|
+
state.justOutputCommaFirstStyle = true;
|
|
1084
|
+
}
|
|
1085
|
+
// Condition operator (AND/OR) - but not BETWEEN's AND
|
|
1086
|
+
if (ctx.isConditionOperator && !ctx.isBetweenAnd) {
|
|
1087
|
+
needsNewline = true;
|
|
1088
|
+
indent = indentCalc.getCommaIndent(state.subqueryDepth, state.ddlDepth);
|
|
1089
|
+
}
|
|
1090
|
+
// First list item after SELECT/GROUP BY/ORDER BY
|
|
1091
|
+
if (!ctx.isListComma && (state.afterSelectKeyword || state.afterGroupByKeyword || state.afterOrderByKeyword)) {
|
|
1092
|
+
if (symbolicName !== 'SELECT' && symbolicName !== 'GROUP' && symbolicName !== 'ORDER') {
|
|
1093
|
+
if ((state.afterGroupByKeyword && symbolicName === 'BY') ||
|
|
1094
|
+
(state.afterOrderByKeyword && symbolicName === 'BY') ||
|
|
1095
|
+
symbolicName === 'DISTINCT') {
|
|
1096
|
+
// Skip BY or DISTINCT
|
|
1097
|
+
}
|
|
1098
|
+
else if (state.isFirstListItem && state.currentClauseIsMultiItem) {
|
|
1099
|
+
needsNewline = true;
|
|
1100
|
+
indent = indentCalc.getFirstItemIndent(state.subqueryDepth, state.ddlDepth);
|
|
1101
|
+
state.isFirstListItem = false;
|
|
1102
|
+
}
|
|
1103
|
+
else if (state.isFirstListItem) {
|
|
1104
|
+
state.isFirstListItem = false;
|
|
1105
|
+
}
|
|
1106
|
+
}
|
|
1107
|
+
}
|
|
1108
|
+
// First assignment after SET
|
|
1109
|
+
if (!ctx.isSetComma && state.afterSetKeyword && symbolicName !== 'SET' && state.isFirstListItem) {
|
|
1110
|
+
if (state.currentClauseIsMultiItem) {
|
|
1111
|
+
needsNewline = true;
|
|
1112
|
+
indent = indentCalc.getFirstItemIndent(state.subqueryDepth, state.ddlDepth);
|
|
1113
|
+
}
|
|
1114
|
+
state.isFirstListItem = false;
|
|
1115
|
+
state.afterSetKeyword = false;
|
|
1116
|
+
}
|
|
1117
|
+
// First tuple after VALUES - expand only if the VALUES statement is long
|
|
1118
|
+
if (!ctx.isValuesComma && state.afterValuesKeyword && symbolicName !== 'VALUES' && state.isFirstListItem) {
|
|
1119
|
+
if (!isShortValues) {
|
|
1120
|
+
needsNewline = true;
|
|
1121
|
+
indent = baseIndent;
|
|
1122
|
+
}
|
|
1123
|
+
state.isFirstListItem = false;
|
|
1124
|
+
state.afterValuesKeyword = false;
|
|
1125
|
+
}
|
|
1126
|
+
// First condition after WHERE/HAVING
|
|
1127
|
+
if (!ctx.isConditionOperator && (state.afterWhereKeyword || state.afterHavingKeyword)) {
|
|
1128
|
+
if (symbolicName !== 'WHERE' && symbolicName !== 'HAVING') {
|
|
1129
|
+
needsNewline = true;
|
|
1130
|
+
indent = indentCalc.getCommaIndent(state.subqueryDepth, state.ddlDepth);
|
|
1131
|
+
state.afterWhereKeyword = false;
|
|
1132
|
+
state.afterHavingKeyword = false;
|
|
1133
|
+
}
|
|
1134
|
+
}
|
|
1135
|
+
return { needsNewline, indent };
|
|
1136
|
+
}
|
|
1137
|
+
/**
|
|
1138
|
+
* Output token with newline handling.
|
|
1139
|
+
*/
|
|
1140
|
+
function outputWithNewline(builder, comments, indent, state) {
|
|
1141
|
+
const inlineComments = comments.getInlineComments();
|
|
1142
|
+
const ownLineComments = comments.getOwnLineComments();
|
|
1143
|
+
// Output inline comments before newline
|
|
1144
|
+
if (inlineComments.length > 0) {
|
|
1145
|
+
outputComments(builder, inlineComments);
|
|
1146
|
+
}
|
|
1147
|
+
// Add newline
|
|
1148
|
+
builder.ensureNewline();
|
|
1149
|
+
// Output own-line comments with indent
|
|
1150
|
+
for (const comment of ownLineComments) {
|
|
1151
|
+
// Preserve blank line before comment if it existed in the original
|
|
1152
|
+
if (comment.hadBlankLineBefore && !builder.isEmpty()) {
|
|
1153
|
+
builder.push('\n'); // Add extra newline for blank line
|
|
1154
|
+
}
|
|
1155
|
+
if (indent)
|
|
1156
|
+
builder.push(indent);
|
|
1157
|
+
builder.push(comment.text);
|
|
1158
|
+
if (comment.type === SqlBaseLexer.BRACKETED_COMMENT && !comment.text.endsWith('\n')) {
|
|
1159
|
+
builder.push('\n');
|
|
1160
|
+
}
|
|
1161
|
+
}
|
|
1162
|
+
// Add indent for token
|
|
1163
|
+
if (indent)
|
|
1164
|
+
builder.push(indent);
|
|
1165
|
+
comments.clear();
|
|
1166
|
+
}
|
|
1167
|
+
/**
|
|
1168
|
+
* Output token without newline.
|
|
1169
|
+
*/
|
|
1170
|
+
function outputWithoutNewline(builder, comments, text, symbolicName, state, currentTokenIsUnaryOperator, isLateralViewComma = false) {
|
|
1171
|
+
if (comments.hasPending()) {
|
|
1172
|
+
outputComments(builder, comments.getPending(), !builder.isEmpty());
|
|
1173
|
+
comments.clear();
|
|
1174
|
+
}
|
|
1175
|
+
if (!builder.isEmpty()) {
|
|
1176
|
+
const lastChar = builder.getLastChar();
|
|
1177
|
+
const prevIsDoubleColon = lastChar === ':' && text !== ':';
|
|
1178
|
+
// Check if previous token was actually a DOT token (member access), not a decimal like "1."
|
|
1179
|
+
const prevSymbolicName = state.prevTokenType >= 0 ? getSymbolicName(state.prevTokenType) : null;
|
|
1180
|
+
const prevWasDotToken = prevSymbolicName === 'DOT';
|
|
1181
|
+
const skipSpace = shouldSkipSpace(builder, text, {
|
|
1182
|
+
prevWasFunctionName: state.prevWasFunctionName,
|
|
1183
|
+
prevWasBuiltInFunctionKeyword: state.prevWasBuiltInFunctionKeyword,
|
|
1184
|
+
insideParens: state.insideParens,
|
|
1185
|
+
justOutputCommaFirstStyle: state.justOutputCommaFirstStyle,
|
|
1186
|
+
justOutputMultiArgFunctionNewline: state.justOutputMultiArgFunctionNewline,
|
|
1187
|
+
justOutputWindowNewline: state.justOutputWindowNewline,
|
|
1188
|
+
justOutputInListWrapNewline: state.justOutputInListWrapNewline,
|
|
1189
|
+
afterWhereKeyword: state.afterWhereKeyword,
|
|
1190
|
+
afterHavingKeyword: state.afterHavingKeyword,
|
|
1191
|
+
prevTokenWasUnaryOperator: state.prevTokenWasUnaryOperator &&
|
|
1192
|
+
(state.prevTokenText === '-' || state.prevTokenText === '+' || state.prevTokenText === '~'),
|
|
1193
|
+
currentTokenIsUnaryOperator,
|
|
1194
|
+
isLateralViewComma,
|
|
1195
|
+
prevIsDoubleColon,
|
|
1196
|
+
prevTokenText: state.prevTokenText,
|
|
1197
|
+
currentTokenIsStringLiteral: symbolicName === 'STRING_LITERAL',
|
|
1198
|
+
prevWasDotToken,
|
|
1199
|
+
complexTypeDepth: state.complexTypeDepth,
|
|
1200
|
+
});
|
|
1201
|
+
const needsCommaSpace = shouldAddCommaSpace(builder, state.insideParens, state.justOutputCommaFirstStyle);
|
|
1202
|
+
if (!skipSpace || needsCommaSpace) {
|
|
1203
|
+
builder.push(' ');
|
|
1204
|
+
}
|
|
1205
|
+
}
|
|
1206
|
+
}
|
|
1207
|
+
/**
|
|
1208
|
+
* Handle multi-arg function expansion.
|
|
1209
|
+
*/
|
|
1210
|
+
function handleFunctionExpansion(builder, expandedFuncs, funcInfo, tokenList, currentIndex, findNextNonWsTokenIndex, analysis, state) {
|
|
1211
|
+
const depth = expandedFuncs.depth;
|
|
1212
|
+
// Check for chained function pattern
|
|
1213
|
+
let firstArgIsChainedFunc = false;
|
|
1214
|
+
const shouldConsiderChaining = depth % 2 === 1;
|
|
1215
|
+
if (shouldConsiderChaining) {
|
|
1216
|
+
const nextTokenIdx = findNextNonWsTokenIndex(currentIndex + 1);
|
|
1217
|
+
if (nextTokenIdx > 0 && nextTokenIdx < tokenList.length) {
|
|
1218
|
+
const nextToken = tokenList[nextTokenIdx];
|
|
1219
|
+
const isNextTokenFuncName = analysis.functionCallTokens.has(nextToken.tokenIndex);
|
|
1220
|
+
if (isNextTokenFuncName) {
|
|
1221
|
+
const parenIdx = findNextNonWsTokenIndex(nextTokenIdx + 1);
|
|
1222
|
+
if (parenIdx > 0 && parenIdx < tokenList.length) {
|
|
1223
|
+
const parenToken = tokenList[parenIdx];
|
|
1224
|
+
const nestedFuncInfo = analysis.multiArgFunctionInfo.get(parenToken.tokenIndex);
|
|
1225
|
+
if (nestedFuncInfo) {
|
|
1226
|
+
firstArgIsChainedFunc = true;
|
|
1227
|
+
}
|
|
1228
|
+
}
|
|
1229
|
+
}
|
|
1230
|
+
}
|
|
1231
|
+
}
|
|
1232
|
+
// For STACK function, calculate which commas should NOT get newlines (pair grouping)
|
|
1233
|
+
// STACK format: STACK(count, alias1, col1, alias2, col2, ...)
|
|
1234
|
+
// We want: count on its own, then pairs of (alias, col) on each line
|
|
1235
|
+
// So after the first comma (after count), every ODD comma (1st, 3rd, 5th...) gets newline,
|
|
1236
|
+
// every EVEN comma (2nd, 4th, 6th...) stays inline
|
|
1237
|
+
let skipNewlineCommas;
|
|
1238
|
+
if (funcInfo.functionName === 'STACK' && funcInfo.commaIndices.length >= 2) {
|
|
1239
|
+
skipNewlineCommas = new Set();
|
|
1240
|
+
// Skip newline for commas at indices 1, 3, 5... (0-based, so 2nd, 4th, 6th commas)
|
|
1241
|
+
for (let i = 1; i < funcInfo.commaIndices.length; i += 2) {
|
|
1242
|
+
skipNewlineCommas.add(funcInfo.commaIndices[i]);
|
|
1243
|
+
}
|
|
1244
|
+
}
|
|
1245
|
+
expandedFuncs.push({
|
|
1246
|
+
closeParenIndex: funcInfo.closeParenIndex,
|
|
1247
|
+
commaIndices: new Set(funcInfo.commaIndices),
|
|
1248
|
+
depth,
|
|
1249
|
+
openingColumn: builder.getColumn() - 1,
|
|
1250
|
+
firstArgIsChainedFunc,
|
|
1251
|
+
functionName: funcInfo.functionName,
|
|
1252
|
+
skipNewlineCommas,
|
|
1253
|
+
});
|
|
1254
|
+
if (!firstArgIsChainedFunc) {
|
|
1255
|
+
const contentIndent = indentCalc.getExpandedFunctionContentIndent(depth);
|
|
1256
|
+
builder.push('\n' + ' '.repeat(contentIndent));
|
|
1257
|
+
state.justOutputMultiArgFunctionNewline = true;
|
|
1258
|
+
}
|
|
1259
|
+
}
|
|
1260
|
+
/**
|
|
1261
|
+
* Update clause tracking flags after processing a token.
|
|
1262
|
+
*/
|
|
1263
|
+
function updateClauseFlags(symbolicName, ctx, state) {
|
|
1264
|
+
if (symbolicName !== 'SELECT' && symbolicName !== 'DISTINCT' &&
|
|
1265
|
+
state.afterSelectKeyword && !ctx.isListComma) {
|
|
1266
|
+
state.afterSelectKeyword = false;
|
|
1267
|
+
}
|
|
1268
|
+
if (symbolicName !== 'GROUP' && symbolicName !== 'BY' &&
|
|
1269
|
+
state.afterGroupByKeyword && !ctx.isListComma) {
|
|
1270
|
+
state.afterGroupByKeyword = false;
|
|
1271
|
+
}
|
|
1272
|
+
if (symbolicName !== 'ORDER' && symbolicName !== 'BY' &&
|
|
1273
|
+
state.afterOrderByKeyword && !ctx.isListComma) {
|
|
1274
|
+
state.afterOrderByKeyword = false;
|
|
1275
|
+
}
|
|
1276
|
+
}
|