@jacobknightley/fabric-format 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +196 -0
- package/dist/cell-formatter.d.ts +75 -0
- package/dist/cell-formatter.js +144 -0
- package/dist/cli.d.ts +2 -0
- package/dist/cli.js +435 -0
- package/dist/formatters/index.d.ts +19 -0
- package/dist/formatters/index.js +76 -0
- package/dist/formatters/python/config.d.ts +33 -0
- package/dist/formatters/python/config.js +29 -0
- package/dist/formatters/python/index.d.ts +7 -0
- package/dist/formatters/python/index.js +13 -0
- package/dist/formatters/python/python-formatter.d.ts +51 -0
- package/dist/formatters/python/python-formatter.js +180 -0
- package/dist/formatters/sparksql/constants.d.ts +16 -0
- package/dist/formatters/sparksql/constants.js +16 -0
- package/dist/formatters/sparksql/fmt-detector.d.ts +65 -0
- package/dist/formatters/sparksql/fmt-detector.js +84 -0
- package/dist/formatters/sparksql/formatter.d.ts +24 -0
- package/dist/formatters/sparksql/formatter.js +1276 -0
- package/dist/formatters/sparksql/formatting-context.d.ts +154 -0
- package/dist/formatters/sparksql/formatting-context.js +363 -0
- package/dist/formatters/sparksql/generated/SqlBaseLexer.d.ts +529 -0
- package/dist/formatters/sparksql/generated/SqlBaseLexer.js +2609 -0
- package/dist/formatters/sparksql/generated/SqlBaseParser.d.ts +8195 -0
- package/dist/formatters/sparksql/generated/SqlBaseParser.js +48793 -0
- package/dist/formatters/sparksql/generated/SqlBaseParserListener.d.ts +910 -0
- package/dist/formatters/sparksql/generated/SqlBaseParserListener.js +2730 -0
- package/dist/formatters/sparksql/generated/SqlBaseParserVisitor.d.ts +456 -0
- package/dist/formatters/sparksql/generated/SqlBaseParserVisitor.js +1822 -0
- package/dist/formatters/sparksql/generated/builtinFunctions.d.ts +8 -0
- package/dist/formatters/sparksql/generated/builtinFunctions.js +510 -0
- package/dist/formatters/sparksql/index.d.ts +11 -0
- package/dist/formatters/sparksql/index.js +22 -0
- package/dist/formatters/sparksql/output-builder.d.ts +89 -0
- package/dist/formatters/sparksql/output-builder.js +191 -0
- package/dist/formatters/sparksql/parse-tree-analyzer.d.ts +264 -0
- package/dist/formatters/sparksql/parse-tree-analyzer.js +1956 -0
- package/dist/formatters/sparksql/sql-formatter.d.ts +25 -0
- package/dist/formatters/sparksql/sql-formatter.js +56 -0
- package/dist/formatters/sparksql/token-utils.d.ts +68 -0
- package/dist/formatters/sparksql/token-utils.js +155 -0
- package/dist/formatters/sparksql/types.d.ts +264 -0
- package/dist/formatters/sparksql/types.js +7 -0
- package/dist/formatters/types.d.ts +57 -0
- package/dist/formatters/types.js +7 -0
- package/dist/index.d.ts +18 -0
- package/dist/index.js +41 -0
- package/dist/notebook-formatter.d.ts +107 -0
- package/dist/notebook-formatter.js +424 -0
- package/package.json +63 -0
|
@@ -0,0 +1,1956 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Parse Tree Analyzer - Collects Formatting Context from AST
|
|
3
|
+
*
|
|
4
|
+
* This visitor walks the ANTLR parse tree and collects information about:
|
|
5
|
+
* - Identifier tokens (preserve casing)
|
|
6
|
+
* - Function call tokens (uppercase)
|
|
7
|
+
* - Clause-starting tokens (newline before)
|
|
8
|
+
* - List item separators (commas in SELECT, GROUP BY, ORDER BY)
|
|
9
|
+
* - Condition separators (AND/OR in WHERE/HAVING)
|
|
10
|
+
* - Subquery boundaries
|
|
11
|
+
* - And many more context-specific positions
|
|
12
|
+
*
|
|
13
|
+
* This is 100% grammar-driven - no hardcoded keyword lists.
|
|
14
|
+
*/
|
|
15
|
+
// @ts-ignore - Generated ANTLR code
|
|
16
|
+
import SqlBaseLexer from './generated/SqlBaseLexer.js';
|
|
17
|
+
// @ts-ignore - Generated ANTLR code
|
|
18
|
+
import SqlBaseParser from './generated/SqlBaseParser.js';
|
|
19
|
+
// @ts-ignore - Generated ANTLR code
|
|
20
|
+
import SqlBaseParserVisitor from './generated/SqlBaseParserVisitor.js';
|
|
21
|
+
import { getTokenType } from './token-utils.js';
|
|
22
|
+
/**
|
|
23
|
+
* Visitor that collects context information from parse tree.
|
|
24
|
+
* After visiting, call getResult() to get the analysis.
|
|
25
|
+
*/
|
|
26
|
+
export class ParseTreeAnalyzer extends SqlBaseParserVisitor {
|
|
27
|
+
// ========== TOKEN POSITION SETS ==========
|
|
28
|
+
identifierTokens = new Set();
|
|
29
|
+
functionCallTokens = new Set();
|
|
30
|
+
clauseStartTokens = new Set();
|
|
31
|
+
qualifiedNameTokens = new Set(); // Tokens that are part of qualified names (t.column)
|
|
32
|
+
// List formatting
|
|
33
|
+
listItemCommas = new Set();
|
|
34
|
+
listFirstItems = new Set();
|
|
35
|
+
multiItemClauses = new Set();
|
|
36
|
+
// Condition formatting
|
|
37
|
+
conditionOperators = new Set();
|
|
38
|
+
multilineConditionClauses = new Set();
|
|
39
|
+
betweenAndTokens = new Set();
|
|
40
|
+
// Subquery tracking
|
|
41
|
+
subqueryDepth = 0;
|
|
42
|
+
tokenDepthMap = new Map();
|
|
43
|
+
subqueryOpenParens = new Set();
|
|
44
|
+
subqueryCloseParens = new Set();
|
|
45
|
+
setOperandParens = new Set();
|
|
46
|
+
// Alias handling
|
|
47
|
+
aliasInsertPositions = new Set();
|
|
48
|
+
tableAliasAsTokens = new Set(); // AS tokens in table alias context (to be suppressed)
|
|
49
|
+
// JOIN handling
|
|
50
|
+
joinOnTokens = new Set();
|
|
51
|
+
// CTE handling
|
|
52
|
+
cteCommas = new Set();
|
|
53
|
+
cteMainSelectTokens = new Set(); // SELECT tokens of main query after CTE block
|
|
54
|
+
// DDL handling
|
|
55
|
+
ddlColumnCommas = new Set();
|
|
56
|
+
ddlOpenParens = new Set();
|
|
57
|
+
ddlCloseParens = new Set();
|
|
58
|
+
ddlFirstColumn = new Set();
|
|
59
|
+
ddlMultiColumn = new Set();
|
|
60
|
+
// DML handling
|
|
61
|
+
valuesCommas = new Set();
|
|
62
|
+
valuesHasTuples = false; // true if VALUES contains tuples like (a, b), (c, d)
|
|
63
|
+
setClauseCommas = new Set();
|
|
64
|
+
setKeywordToken = -1;
|
|
65
|
+
// CASE expression handling
|
|
66
|
+
multiWhenCaseTokens = new Set();
|
|
67
|
+
caseWhenTokens = new Set();
|
|
68
|
+
caseElseTokens = new Set();
|
|
69
|
+
caseEndTokens = new Set();
|
|
70
|
+
simpleCaseTokens = new Set(); // CASE tokens that have value expressions (simpleCase)
|
|
71
|
+
simpleCaseValueEndTokens = new Set(); // Tokens after value in CASE x WHEN ...
|
|
72
|
+
// Grouping analytics
|
|
73
|
+
groupingAnalyticsParens = new Set();
|
|
74
|
+
insideGroupingAnalytics = false;
|
|
75
|
+
// EXCEPT clause (column exclusion in SELECT)
|
|
76
|
+
exceptClauseTokens = new Set(); // tokens inside EXCEPT (...) for column exclusion
|
|
77
|
+
// SET configuration
|
|
78
|
+
setConfigTokens = new Set();
|
|
79
|
+
// MERGE statement
|
|
80
|
+
mergeUsingTokens = new Set();
|
|
81
|
+
mergeOnTokens = new Set();
|
|
82
|
+
mergeWhenTokens = new Set();
|
|
83
|
+
// LATERAL VIEW
|
|
84
|
+
lateralViewCommas = new Set();
|
|
85
|
+
// GROUP BY ALL
|
|
86
|
+
groupByAllTokens = new Set();
|
|
87
|
+
// Multi-arg function expansion
|
|
88
|
+
multiArgFunctionInfo = new Map();
|
|
89
|
+
// Window definition expansion
|
|
90
|
+
windowDefInfo = new Map();
|
|
91
|
+
// PIVOT/UNPIVOT expansion
|
|
92
|
+
pivotInfo = new Map();
|
|
93
|
+
// IN list wrapping
|
|
94
|
+
inListInfo = new Map();
|
|
95
|
+
// Simple query compaction
|
|
96
|
+
simpleQueries = new Map();
|
|
97
|
+
// Internal state
|
|
98
|
+
currentSelectToken = -1;
|
|
99
|
+
// ========== PUBLIC API ==========
|
|
100
|
+
/**
|
|
101
|
+
* Get the complete analysis result after visiting.
|
|
102
|
+
*/
|
|
103
|
+
getResult() {
|
|
104
|
+
return {
|
|
105
|
+
identifierTokens: this.identifierTokens,
|
|
106
|
+
functionCallTokens: this.functionCallTokens,
|
|
107
|
+
clauseStartTokens: this.clauseStartTokens,
|
|
108
|
+
qualifiedNameTokens: this.qualifiedNameTokens,
|
|
109
|
+
listItemCommas: this.listItemCommas,
|
|
110
|
+
listFirstItems: this.listFirstItems,
|
|
111
|
+
multiItemClauses: this.multiItemClauses,
|
|
112
|
+
conditionOperators: this.conditionOperators,
|
|
113
|
+
multilineConditionClauses: this.multilineConditionClauses,
|
|
114
|
+
betweenAndTokens: this.betweenAndTokens,
|
|
115
|
+
tokenDepthMap: this.tokenDepthMap,
|
|
116
|
+
subqueryOpenParens: this.subqueryOpenParens,
|
|
117
|
+
subqueryCloseParens: this.subqueryCloseParens,
|
|
118
|
+
setOperandParens: this.setOperandParens,
|
|
119
|
+
aliasInsertPositions: this.aliasInsertPositions,
|
|
120
|
+
tableAliasAsTokens: this.tableAliasAsTokens,
|
|
121
|
+
joinOnTokens: this.joinOnTokens,
|
|
122
|
+
cteCommas: this.cteCommas,
|
|
123
|
+
cteMainSelectTokens: this.cteMainSelectTokens,
|
|
124
|
+
ddlColumnCommas: this.ddlColumnCommas,
|
|
125
|
+
ddlOpenParens: this.ddlOpenParens,
|
|
126
|
+
ddlCloseParens: this.ddlCloseParens,
|
|
127
|
+
ddlFirstColumn: this.ddlFirstColumn,
|
|
128
|
+
ddlMultiColumn: this.ddlMultiColumn,
|
|
129
|
+
valuesCommas: this.valuesCommas,
|
|
130
|
+
valuesHasTuples: this.valuesHasTuples,
|
|
131
|
+
setClauseCommas: this.setClauseCommas,
|
|
132
|
+
setKeywordToken: this.setKeywordToken,
|
|
133
|
+
multiWhenCaseTokens: this.multiWhenCaseTokens,
|
|
134
|
+
caseWhenTokens: this.caseWhenTokens,
|
|
135
|
+
caseElseTokens: this.caseElseTokens,
|
|
136
|
+
caseEndTokens: this.caseEndTokens,
|
|
137
|
+
simpleCaseTokens: this.simpleCaseTokens,
|
|
138
|
+
simpleCaseValueEndTokens: this.simpleCaseValueEndTokens,
|
|
139
|
+
groupingAnalyticsParens: this.groupingAnalyticsParens,
|
|
140
|
+
exceptClauseTokens: this.exceptClauseTokens,
|
|
141
|
+
setConfigTokens: this.setConfigTokens,
|
|
142
|
+
mergeUsingTokens: this.mergeUsingTokens,
|
|
143
|
+
mergeOnTokens: this.mergeOnTokens,
|
|
144
|
+
mergeWhenTokens: this.mergeWhenTokens,
|
|
145
|
+
lateralViewCommas: this.lateralViewCommas,
|
|
146
|
+
groupByAllTokens: this.groupByAllTokens,
|
|
147
|
+
multiArgFunctionInfo: this.multiArgFunctionInfo,
|
|
148
|
+
windowDefInfo: this.windowDefInfo,
|
|
149
|
+
pivotInfo: this.pivotInfo,
|
|
150
|
+
inListInfo: this.inListInfo,
|
|
151
|
+
simpleQueries: this.simpleQueries,
|
|
152
|
+
};
|
|
153
|
+
}
|
|
154
|
+
// ========== VISITOR INFRASTRUCTURE ==========
|
|
155
|
+
visit(ctx) {
|
|
156
|
+
if (!ctx)
|
|
157
|
+
return null;
|
|
158
|
+
return this.visitChildren(ctx);
|
|
159
|
+
}
|
|
160
|
+
visitChildren(ctx) {
|
|
161
|
+
if (!ctx?.children)
|
|
162
|
+
return null;
|
|
163
|
+
for (const child of ctx.children) {
|
|
164
|
+
if (child?.accept)
|
|
165
|
+
child.accept(this);
|
|
166
|
+
}
|
|
167
|
+
return null;
|
|
168
|
+
}
|
|
169
|
+
// ========== IDENTIFIER CONTEXTS ==========
|
|
170
|
+
visitIdentifier(ctx) {
|
|
171
|
+
this._markIdentifier(ctx);
|
|
172
|
+
return this.visitChildren(ctx);
|
|
173
|
+
}
|
|
174
|
+
visitStrictIdentifier(ctx) {
|
|
175
|
+
this._markIdentifier(ctx);
|
|
176
|
+
return this.visitChildren(ctx);
|
|
177
|
+
}
|
|
178
|
+
visitQuotedIdentifier(ctx) {
|
|
179
|
+
this._markIdentifier(ctx);
|
|
180
|
+
return this.visitChildren(ctx);
|
|
181
|
+
}
|
|
182
|
+
visitBackQuotedIdentifier(ctx) {
|
|
183
|
+
this._markIdentifier(ctx);
|
|
184
|
+
return this.visitChildren(ctx);
|
|
185
|
+
}
|
|
186
|
+
visitUnquotedIdentifier(ctx) {
|
|
187
|
+
this._markIdentifier(ctx);
|
|
188
|
+
return this.visitChildren(ctx);
|
|
189
|
+
}
|
|
190
|
+
visitErrorCapturingIdentifier(ctx) {
|
|
191
|
+
this._markIdentifier(ctx);
|
|
192
|
+
return this.visitChildren(ctx);
|
|
193
|
+
}
|
|
194
|
+
/**
|
|
195
|
+
* Visit qualified name (e.g., table.column, db.schema.table.column)
|
|
196
|
+
* GRAMMAR-DRIVEN: qualifiedName : identifier (DOT identifier)*
|
|
197
|
+
*
|
|
198
|
+
* Context-sensitive keyword handling: In qualified names, even tokens that are
|
|
199
|
+
* keywords (like USER, TABLE) should be treated as identifiers and preserve casing.
|
|
200
|
+
* This is because the grammar context (qualifiedName rule) makes them identifiers.
|
|
201
|
+
*/
|
|
202
|
+
visitQualifiedName(ctx) {
|
|
203
|
+
// Mark all tokens in the qualified name as identifiers, except DOT tokens
|
|
204
|
+
if (ctx.start && ctx.stop) {
|
|
205
|
+
for (let i = ctx.start.tokenIndex; i <= ctx.stop.tokenIndex; i++) {
|
|
206
|
+
this.identifierTokens.add(i);
|
|
207
|
+
this.qualifiedNameTokens.add(i); // Also track as qualified name
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
// Still visit children to handle nested contexts
|
|
211
|
+
return this.visitChildren(ctx);
|
|
212
|
+
}
|
|
213
|
+
/**
|
|
214
|
+
* Visit dereference (field access like user.address, table.column)
|
|
215
|
+
* GRAMMAR-DRIVEN: base=primaryExpression DOT fieldName=identifier
|
|
216
|
+
*
|
|
217
|
+
* When a keyword like USER or TABLE appears before DOT, it should be treated
|
|
218
|
+
* as an identifier (table/column alias), not as a keyword.
|
|
219
|
+
* Similarly, keywords appearing as field names (like KEY, ORDER) should preserve casing.
|
|
220
|
+
*/
|
|
221
|
+
visitDereference(ctx) {
|
|
222
|
+
// Mark the base token as an identifier when it's being dereferenced
|
|
223
|
+
// This handles cases like: user.address where USER is a keyword but should be preserved
|
|
224
|
+
if (ctx.base && ctx.base.start) {
|
|
225
|
+
// Mark the base expression tokens as identifiers
|
|
226
|
+
for (let i = ctx.base.start.tokenIndex; i <= (ctx.base.stop?.tokenIndex ?? ctx.base.start.tokenIndex); i++) {
|
|
227
|
+
this.identifierTokens.add(i);
|
|
228
|
+
this.qualifiedNameTokens.add(i); // Also track as qualified name
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
// Mark the field name (right side after dot) as an identifier
|
|
232
|
+
// This handles cases like: a.key, a.order where KEY, ORDER are keywords but used as column names
|
|
233
|
+
if (ctx.fieldName && ctx.fieldName.start) {
|
|
234
|
+
for (let i = ctx.fieldName.start.tokenIndex; i <= (ctx.fieldName.stop?.tokenIndex ?? ctx.fieldName.start.tokenIndex); i++) {
|
|
235
|
+
this.identifierTokens.add(i);
|
|
236
|
+
this.qualifiedNameTokens.add(i); // Also track as qualified name
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
return this.visitChildren(ctx);
|
|
240
|
+
}
|
|
241
|
+
// ========== FUNCTION CALL CONTEXTS ==========
|
|
242
|
+
visitFunctionCall(ctx) {
|
|
243
|
+
if (ctx.start) {
|
|
244
|
+
this.functionCallTokens.add(ctx.start.tokenIndex);
|
|
245
|
+
}
|
|
246
|
+
// Check for multi-arg functions
|
|
247
|
+
const args = ctx.argument;
|
|
248
|
+
if (args && args.length >= 2) {
|
|
249
|
+
this._collectMultiArgFunctionInfo(ctx, args.length);
|
|
250
|
+
}
|
|
251
|
+
return this.visitChildren(ctx);
|
|
252
|
+
}
|
|
253
|
+
visitFunctionName(ctx) {
|
|
254
|
+
if (ctx.start) {
|
|
255
|
+
this.functionCallTokens.add(ctx.start.tokenIndex);
|
|
256
|
+
}
|
|
257
|
+
return this.visitChildren(ctx);
|
|
258
|
+
}
|
|
259
|
+
visitFirst(ctx) {
|
|
260
|
+
if (ctx.start)
|
|
261
|
+
this.functionCallTokens.add(ctx.start.tokenIndex);
|
|
262
|
+
return this.visitChildren(ctx);
|
|
263
|
+
}
|
|
264
|
+
visitLast(ctx) {
|
|
265
|
+
if (ctx.start)
|
|
266
|
+
this.functionCallTokens.add(ctx.start.tokenIndex);
|
|
267
|
+
return this.visitChildren(ctx);
|
|
268
|
+
}
|
|
269
|
+
visitAny_value(ctx) {
|
|
270
|
+
if (ctx.start)
|
|
271
|
+
this.functionCallTokens.add(ctx.start.tokenIndex);
|
|
272
|
+
return this.visitChildren(ctx);
|
|
273
|
+
}
|
|
274
|
+
visitStruct(ctx) {
|
|
275
|
+
if (ctx.start)
|
|
276
|
+
this.functionCallTokens.add(ctx.start.tokenIndex);
|
|
277
|
+
return this.visitChildren(ctx);
|
|
278
|
+
}
|
|
279
|
+
visitExtract(ctx) {
|
|
280
|
+
if (ctx.start)
|
|
281
|
+
this.functionCallTokens.add(ctx.start.tokenIndex);
|
|
282
|
+
return this.visitChildren(ctx);
|
|
283
|
+
}
|
|
284
|
+
visitCast(ctx) {
|
|
285
|
+
if (ctx.start)
|
|
286
|
+
this.functionCallTokens.add(ctx.start.tokenIndex);
|
|
287
|
+
// Collect CAST as potentially expandable
|
|
288
|
+
if (ctx.children) {
|
|
289
|
+
let leftParenTokenIndex = null;
|
|
290
|
+
let leftParenCharStart = 0;
|
|
291
|
+
let rightParenTokenIndex = null;
|
|
292
|
+
for (const child of ctx.children) {
|
|
293
|
+
if (child.symbol) {
|
|
294
|
+
const symName = SqlBaseLexer.symbolicNames[child.symbol.type];
|
|
295
|
+
if (symName === 'LEFT_PAREN' && leftParenTokenIndex === null) {
|
|
296
|
+
leftParenTokenIndex = child.symbol.tokenIndex;
|
|
297
|
+
leftParenCharStart = child.symbol.start ?? 0;
|
|
298
|
+
}
|
|
299
|
+
else if (symName === 'RIGHT_PAREN') {
|
|
300
|
+
rightParenTokenIndex = child.symbol.tokenIndex;
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
if (leftParenTokenIndex !== null && rightParenTokenIndex !== null) {
|
|
305
|
+
const spanLength = this._calculateNormalizedSpanLength(ctx);
|
|
306
|
+
this.multiArgFunctionInfo.set(leftParenTokenIndex, {
|
|
307
|
+
closeParenIndex: rightParenTokenIndex,
|
|
308
|
+
commaIndices: [],
|
|
309
|
+
spanLength: spanLength,
|
|
310
|
+
functionName: 'CAST',
|
|
311
|
+
charStart: leftParenCharStart
|
|
312
|
+
});
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
return this.visitChildren(ctx);
|
|
316
|
+
}
|
|
317
|
+
visitPosition(ctx) {
|
|
318
|
+
if (ctx.start)
|
|
319
|
+
this.functionCallTokens.add(ctx.start.tokenIndex);
|
|
320
|
+
return this.visitChildren(ctx);
|
|
321
|
+
}
|
|
322
|
+
visitTimestampadd(ctx) {
|
|
323
|
+
if (ctx.start)
|
|
324
|
+
this.functionCallTokens.add(ctx.start.tokenIndex);
|
|
325
|
+
return this.visitChildren(ctx);
|
|
326
|
+
}
|
|
327
|
+
visitTimestampdiff(ctx) {
|
|
328
|
+
if (ctx.start)
|
|
329
|
+
this.functionCallTokens.add(ctx.start.tokenIndex);
|
|
330
|
+
return this.visitChildren(ctx);
|
|
331
|
+
}
|
|
332
|
+
visitLateralView(ctx) {
|
|
333
|
+
if (ctx.children) {
|
|
334
|
+
let foundRightParen = false;
|
|
335
|
+
for (const child of ctx.children) {
|
|
336
|
+
if (child.ruleIndex !== undefined) {
|
|
337
|
+
const ruleName = SqlBaseParser.ruleNames[child.ruleIndex];
|
|
338
|
+
if (ruleName === 'qualifiedName' && child.start) {
|
|
339
|
+
this.functionCallTokens.add(child.start.tokenIndex);
|
|
340
|
+
}
|
|
341
|
+
}
|
|
342
|
+
if (child.symbol) {
|
|
343
|
+
const symName = SqlBaseLexer.symbolicNames[child.symbol.type];
|
|
344
|
+
if (symName === 'RIGHT_PAREN') {
|
|
345
|
+
foundRightParen = true;
|
|
346
|
+
}
|
|
347
|
+
if (foundRightParen && symName === 'COMMA') {
|
|
348
|
+
this.lateralViewCommas.add(child.symbol.tokenIndex);
|
|
349
|
+
}
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
}
|
|
353
|
+
return this.visitChildren(ctx);
|
|
354
|
+
}
|
|
355
|
+
// ========== CASE EXPRESSION CONTEXTS ==========
|
|
356
|
+
visitSearchedCase(ctx) {
|
|
357
|
+
this._analyzeCaseExpression(ctx);
|
|
358
|
+
return this.visitChildren(ctx);
|
|
359
|
+
}
|
|
360
|
+
visitSimpleCase(ctx) {
|
|
361
|
+
this._analyzeCaseExpression(ctx);
|
|
362
|
+
return this.visitChildren(ctx);
|
|
363
|
+
}
|
|
364
|
+
// ========== CLAUSE-STARTING CONTEXTS ==========
|
|
365
|
+
visitExceptClause(ctx) {
|
|
366
|
+
// Mark all tokens inside EXCEPT (...) clause for column exclusion
|
|
367
|
+
// These tokens should not trigger expansion
|
|
368
|
+
this._markAllDescendantTokens(ctx, this.exceptClauseTokens);
|
|
369
|
+
return this.visitChildren(ctx);
|
|
370
|
+
}
|
|
371
|
+
visitFromClause(ctx) {
|
|
372
|
+
this._markClauseStart(ctx);
|
|
373
|
+
return this.visitChildren(ctx);
|
|
374
|
+
}
|
|
375
|
+
/**
|
|
376
|
+
* Visit table alias context and mark AS tokens for suppression.
|
|
377
|
+
* Style guide says table aliases should NOT have AS keyword.
|
|
378
|
+
* Grammar: tableAlias: (AS? strictIdentifier identifierList?)?
|
|
379
|
+
*/
|
|
380
|
+
visitTableAlias(ctx) {
|
|
381
|
+
// Check if this table alias has an AS keyword
|
|
382
|
+
if (ctx.AS && typeof ctx.AS === 'function') {
|
|
383
|
+
const asToken = ctx.AS();
|
|
384
|
+
if (asToken && asToken.symbol) {
|
|
385
|
+
this.tableAliasAsTokens.add(asToken.symbol.tokenIndex);
|
|
386
|
+
}
|
|
387
|
+
}
|
|
388
|
+
return this.visitChildren(ctx);
|
|
389
|
+
}
|
|
390
|
+
visitAggregationClause(ctx) {
|
|
391
|
+
this._markClauseStart(ctx);
|
|
392
|
+
this._markGroupByAllToken(ctx);
|
|
393
|
+
const commaCount = this._markListCommasExcludingGroupingAnalytics(ctx);
|
|
394
|
+
if (commaCount > 0 && ctx.start) {
|
|
395
|
+
let actualCommaCount = 0;
|
|
396
|
+
if (ctx.children) {
|
|
397
|
+
for (const child of ctx.children) {
|
|
398
|
+
if (child.symbol && child.symbol.type === getTokenType('COMMA')) {
|
|
399
|
+
if (this.listItemCommas.has(child.symbol.tokenIndex)) {
|
|
400
|
+
actualCommaCount++;
|
|
401
|
+
}
|
|
402
|
+
}
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
if (actualCommaCount > 0) {
|
|
406
|
+
this.multiItemClauses.add(ctx.start.tokenIndex);
|
|
407
|
+
}
|
|
408
|
+
}
|
|
409
|
+
return this.visitChildren(ctx);
|
|
410
|
+
}
|
|
411
|
+
visitGroupingAnalytics(ctx) {
|
|
412
|
+
let isRollupOrCube = false;
|
|
413
|
+
if (ctx.children) {
|
|
414
|
+
for (const child of ctx.children) {
|
|
415
|
+
if (child.symbol) {
|
|
416
|
+
const symName = SqlBaseLexer.symbolicNames[child.symbol.type];
|
|
417
|
+
if (symName === 'ROLLUP' || symName === 'CUBE') {
|
|
418
|
+
isRollupOrCube = true;
|
|
419
|
+
}
|
|
420
|
+
else if (symName === 'LEFT_PAREN') {
|
|
421
|
+
this.groupingAnalyticsParens.add(child.symbol.tokenIndex);
|
|
422
|
+
if (isRollupOrCube) {
|
|
423
|
+
const parenIndex = child.symbol.tokenIndex;
|
|
424
|
+
for (const c of ctx.children) {
|
|
425
|
+
if (c.symbol) {
|
|
426
|
+
const sn = SqlBaseLexer.symbolicNames[c.symbol.type];
|
|
427
|
+
if ((sn === 'ROLLUP' || sn === 'CUBE') && c.symbol.tokenIndex < parenIndex) {
|
|
428
|
+
this.functionCallTokens.add(c.symbol.tokenIndex);
|
|
429
|
+
break;
|
|
430
|
+
}
|
|
431
|
+
}
|
|
432
|
+
}
|
|
433
|
+
}
|
|
434
|
+
break;
|
|
435
|
+
}
|
|
436
|
+
}
|
|
437
|
+
}
|
|
438
|
+
}
|
|
439
|
+
const wasInside = this.insideGroupingAnalytics;
|
|
440
|
+
this.insideGroupingAnalytics = true;
|
|
441
|
+
const result = this.visitChildren(ctx);
|
|
442
|
+
this.insideGroupingAnalytics = wasInside;
|
|
443
|
+
return result;
|
|
444
|
+
}
|
|
445
|
+
visitQueryOrganization(ctx) {
|
|
446
|
+
let orderTokenIndex = null;
|
|
447
|
+
if (ctx.children) {
|
|
448
|
+
for (const child of ctx.children) {
|
|
449
|
+
if (child.symbol) {
|
|
450
|
+
const symName = SqlBaseLexer.symbolicNames[child.symbol.type];
|
|
451
|
+
if (symName === 'ORDER') {
|
|
452
|
+
this.clauseStartTokens.add(child.symbol.tokenIndex);
|
|
453
|
+
orderTokenIndex = child.symbol.tokenIndex;
|
|
454
|
+
}
|
|
455
|
+
else if (symName === 'LIMIT') {
|
|
456
|
+
this.clauseStartTokens.add(child.symbol.tokenIndex);
|
|
457
|
+
}
|
|
458
|
+
}
|
|
459
|
+
}
|
|
460
|
+
}
|
|
461
|
+
const commaCount = this._markListCommasExcludingGroupingAnalytics(ctx);
|
|
462
|
+
if (commaCount > 0 && orderTokenIndex !== null) {
|
|
463
|
+
this.multiItemClauses.add(orderTokenIndex);
|
|
464
|
+
}
|
|
465
|
+
return this.visitChildren(ctx);
|
|
466
|
+
}
|
|
467
|
+
visitSortItem(ctx) {
|
|
468
|
+
return this.visitChildren(ctx);
|
|
469
|
+
}
|
|
470
|
+
visitLimitClause(ctx) {
|
|
471
|
+
this._markClauseStart(ctx);
|
|
472
|
+
return this.visitChildren(ctx);
|
|
473
|
+
}
|
|
474
|
+
visitJoinRelation(ctx) {
|
|
475
|
+
this._markClauseStart(ctx);
|
|
476
|
+
this._analyzeJoinConditions(ctx);
|
|
477
|
+
const onTokenIndex = this._findOnToken(ctx);
|
|
478
|
+
if (onTokenIndex !== -1) {
|
|
479
|
+
this.joinOnTokens.add(onTokenIndex);
|
|
480
|
+
}
|
|
481
|
+
return this.visitChildren(ctx);
|
|
482
|
+
}
|
|
483
|
+
visitWindowDef(ctx) {
|
|
484
|
+
// Visit children FIRST so nested functions are collected before we check them
|
|
485
|
+
const result = this.visitChildren(ctx);
|
|
486
|
+
this._collectWindowDefInfo(ctx);
|
|
487
|
+
return result;
|
|
488
|
+
}
|
|
489
|
+
// ========== PIVOT/UNPIVOT CONTEXTS ==========
|
|
490
|
+
visitPivotClause(ctx) {
|
|
491
|
+
this._collectPivotInfo(ctx, false);
|
|
492
|
+
return this.visitChildren(ctx);
|
|
493
|
+
}
|
|
494
|
+
visitUnpivotClause(ctx) {
|
|
495
|
+
this._collectPivotInfo(ctx, true);
|
|
496
|
+
return this.visitChildren(ctx);
|
|
497
|
+
}
|
|
498
|
+
visitSetOperation(ctx) {
|
|
499
|
+
if (ctx.children) {
|
|
500
|
+
let foundSetOperator = false;
|
|
501
|
+
for (const child of ctx.children) {
|
|
502
|
+
if (child.symbol) {
|
|
503
|
+
const symName = SqlBaseLexer.symbolicNames[child.symbol.type];
|
|
504
|
+
if (symName === 'UNION' || symName === 'EXCEPT' || symName === 'INTERSECT') {
|
|
505
|
+
this.clauseStartTokens.add(child.symbol.tokenIndex);
|
|
506
|
+
foundSetOperator = true;
|
|
507
|
+
}
|
|
508
|
+
}
|
|
509
|
+
else {
|
|
510
|
+
if (foundSetOperator) {
|
|
511
|
+
const subquery = this._findSubqueryContext(child);
|
|
512
|
+
if (subquery && subquery.start) {
|
|
513
|
+
this.setOperandParens.add(subquery.start.tokenIndex);
|
|
514
|
+
}
|
|
515
|
+
}
|
|
516
|
+
}
|
|
517
|
+
}
|
|
518
|
+
}
|
|
519
|
+
return this.visitChildren(ctx);
|
|
520
|
+
}
|
|
521
|
+
visitSelectClause(ctx) {
|
|
522
|
+
this._markClauseStart(ctx);
|
|
523
|
+
if (ctx.start) {
|
|
524
|
+
this.currentSelectToken = ctx.start.tokenIndex;
|
|
525
|
+
}
|
|
526
|
+
return this.visitChildren(ctx);
|
|
527
|
+
}
|
|
528
|
+
visitNamedExpression(ctx) {
|
|
529
|
+
const hasAlias = ctx.errorCapturingIdentifier && ctx.errorCapturingIdentifier();
|
|
530
|
+
const hasAS = ctx.AS && ctx.AS();
|
|
531
|
+
if (hasAlias && !hasAS) {
|
|
532
|
+
const expr = ctx.expression && ctx.expression();
|
|
533
|
+
const alias = ctx.errorCapturingIdentifier();
|
|
534
|
+
if (expr && expr.stop && alias && alias.start) {
|
|
535
|
+
const aliasIndex = alias.start.tokenIndex;
|
|
536
|
+
this.aliasInsertPositions.add(aliasIndex);
|
|
537
|
+
}
|
|
538
|
+
}
|
|
539
|
+
return this.visitChildren(ctx);
|
|
540
|
+
}
|
|
541
|
+
// ========== LIST CONTEXTS ==========
|
|
542
|
+
visitNamedExpressionSeq(ctx) {
|
|
543
|
+
const parentClass = ctx.parentCtx?.constructor?.name || '';
|
|
544
|
+
if (parentClass === 'PivotClauseContext' ||
|
|
545
|
+
parentClass === 'UnpivotClauseContext' ||
|
|
546
|
+
parentClass === 'LateralViewContext') {
|
|
547
|
+
return this.visitChildren(ctx);
|
|
548
|
+
}
|
|
549
|
+
const hasMultiple = this._markListContext(ctx);
|
|
550
|
+
if (hasMultiple && this.currentSelectToken >= 0) {
|
|
551
|
+
this.multiItemClauses.add(this.currentSelectToken);
|
|
552
|
+
}
|
|
553
|
+
return this.visitChildren(ctx);
|
|
554
|
+
}
|
|
555
|
+
visitGroupByClause(ctx) {
|
|
556
|
+
return this.visitChildren(ctx);
|
|
557
|
+
}
|
|
558
|
+
// ========== CONDITION CONTEXTS ==========
|
|
559
|
+
visitWhereClause(ctx) {
|
|
560
|
+
this._markClauseStart(ctx);
|
|
561
|
+
this._scanForBetweenAnd(ctx);
|
|
562
|
+
this._analyzeConditionClause(ctx);
|
|
563
|
+
return this.visitChildren(ctx);
|
|
564
|
+
}
|
|
565
|
+
visitHavingClause(ctx) {
|
|
566
|
+
this._markClauseStart(ctx);
|
|
567
|
+
this._scanForBetweenAnd(ctx);
|
|
568
|
+
this._analyzeConditionClause(ctx);
|
|
569
|
+
return this.visitChildren(ctx);
|
|
570
|
+
}
|
|
571
|
+
visitPredicate(ctx) {
|
|
572
|
+
if (ctx.children) {
|
|
573
|
+
let hasBetween = false;
|
|
574
|
+
let hasQuery = false;
|
|
575
|
+
for (const child of ctx.children) {
|
|
576
|
+
if (child.symbol) {
|
|
577
|
+
const symName = SqlBaseLexer.symbolicNames[child.symbol.type];
|
|
578
|
+
if (symName === 'BETWEEN') {
|
|
579
|
+
hasBetween = true;
|
|
580
|
+
}
|
|
581
|
+
else if (symName === 'AND' && hasBetween) {
|
|
582
|
+
this.betweenAndTokens.add(child.symbol.tokenIndex);
|
|
583
|
+
}
|
|
584
|
+
}
|
|
585
|
+
else if (child.ruleIndex !== undefined) {
|
|
586
|
+
const ruleName = child.constructor?.name;
|
|
587
|
+
if (ruleName === 'QueryContext') {
|
|
588
|
+
hasQuery = true;
|
|
589
|
+
}
|
|
590
|
+
}
|
|
591
|
+
}
|
|
592
|
+
if (hasQuery) {
|
|
593
|
+
this._markSubqueryParens(ctx);
|
|
594
|
+
}
|
|
595
|
+
}
|
|
596
|
+
// Also collect IN list info for wrapping
|
|
597
|
+
this._collectInListInfo(ctx);
|
|
598
|
+
return this.visitChildren(ctx);
|
|
599
|
+
}
|
|
600
|
+
// ========== CTE CONTEXTS ==========
|
|
601
|
+
// Handle the top-level query rule: query = ctes? queryTerm queryOrganization
|
|
602
|
+
// This marks the main SELECT after CTE definitions
|
|
603
|
+
visitQuery(ctx) {
|
|
604
|
+
// Check if this query has CTEs
|
|
605
|
+
let hasCtes = false;
|
|
606
|
+
let queryTermChild = null;
|
|
607
|
+
if (ctx.children) {
|
|
608
|
+
for (const child of ctx.children) {
|
|
609
|
+
if (child.ruleIndex !== undefined) {
|
|
610
|
+
const ruleName = SqlBaseParser.ruleNames[child.ruleIndex];
|
|
611
|
+
if (ruleName === 'ctes') {
|
|
612
|
+
hasCtes = true;
|
|
613
|
+
}
|
|
614
|
+
else if (ruleName === 'queryTerm') {
|
|
615
|
+
queryTermChild = child;
|
|
616
|
+
}
|
|
617
|
+
}
|
|
618
|
+
}
|
|
619
|
+
}
|
|
620
|
+
// If CTEs exist, find and mark the first SELECT token of the main query
|
|
621
|
+
if (hasCtes && queryTermChild) {
|
|
622
|
+
const selectToken = this._findFirstSelectToken(queryTermChild);
|
|
623
|
+
if (selectToken !== null) {
|
|
624
|
+
this.cteMainSelectTokens.add(selectToken);
|
|
625
|
+
}
|
|
626
|
+
}
|
|
627
|
+
return this.visitChildren(ctx);
|
|
628
|
+
}
|
|
629
|
+
// Helper to find the first SELECT token in a queryTerm subtree
|
|
630
|
+
_findFirstSelectToken(ctx) {
|
|
631
|
+
if (!ctx)
|
|
632
|
+
return null;
|
|
633
|
+
// Check if this node has a SELECT token
|
|
634
|
+
if (ctx.symbol && ctx.symbol.type === getTokenType('SELECT')) {
|
|
635
|
+
return ctx.symbol.tokenIndex;
|
|
636
|
+
}
|
|
637
|
+
// Recurse into children
|
|
638
|
+
if (ctx.children) {
|
|
639
|
+
for (const child of ctx.children) {
|
|
640
|
+
const result = this._findFirstSelectToken(child);
|
|
641
|
+
if (result !== null)
|
|
642
|
+
return result;
|
|
643
|
+
}
|
|
644
|
+
}
|
|
645
|
+
return null;
|
|
646
|
+
}
|
|
647
|
+
visitCtes(ctx) {
|
|
648
|
+
this._markClauseStart(ctx);
|
|
649
|
+
if (ctx.children) {
|
|
650
|
+
for (const child of ctx.children) {
|
|
651
|
+
if (child.symbol && child.symbol.type === getTokenType('COMMA')) {
|
|
652
|
+
this.cteCommas.add(child.symbol.tokenIndex);
|
|
653
|
+
}
|
|
654
|
+
}
|
|
655
|
+
}
|
|
656
|
+
return this.visitChildren(ctx);
|
|
657
|
+
}
|
|
658
|
+
visitNamedQuery(ctx) {
|
|
659
|
+
// Increment depth for CTE body - it's effectively a subquery
|
|
660
|
+
this.subqueryDepth++;
|
|
661
|
+
if (ctx.children) {
|
|
662
|
+
for (const child of ctx.children) {
|
|
663
|
+
if (child.symbol) {
|
|
664
|
+
const tokenType = child.symbol.type;
|
|
665
|
+
if (tokenType === getTokenType('LEFT_PAREN')) {
|
|
666
|
+
this.subqueryOpenParens.add(child.symbol.tokenIndex);
|
|
667
|
+
}
|
|
668
|
+
else if (tokenType === getTokenType('RIGHT_PAREN')) {
|
|
669
|
+
this.subqueryCloseParens.add(child.symbol.tokenIndex);
|
|
670
|
+
}
|
|
671
|
+
}
|
|
672
|
+
}
|
|
673
|
+
}
|
|
674
|
+
const result = this.visitChildren(ctx);
|
|
675
|
+
this.subqueryDepth--;
|
|
676
|
+
return result;
|
|
677
|
+
}
|
|
678
|
+
// ========== SUBQUERY CONTEXTS ==========
|
|
679
|
+
visitAliasedQuery(ctx) {
|
|
680
|
+
this._markSubqueryParens(ctx);
|
|
681
|
+
return this.visitChildren(ctx);
|
|
682
|
+
}
|
|
683
|
+
visitExists(ctx) {
|
|
684
|
+
this._markSubqueryParens(ctx);
|
|
685
|
+
return this.visitChildren(ctx);
|
|
686
|
+
}
|
|
687
|
+
visitSubqueryExpression(ctx) {
|
|
688
|
+
this._markSubqueryParens(ctx);
|
|
689
|
+
return this.visitChildren(ctx);
|
|
690
|
+
}
|
|
691
|
+
visitSubquery(ctx) {
|
|
692
|
+
this._markSubqueryParens(ctx);
|
|
693
|
+
return this.visitChildren(ctx);
|
|
694
|
+
}
|
|
695
|
+
// ========== DDL CONTEXTS ==========
|
|
696
|
+
visitCreateTableHeader(ctx) {
|
|
697
|
+
return this.visitChildren(ctx);
|
|
698
|
+
}
|
|
699
|
+
visitCreateTable(ctx) {
|
|
700
|
+
this._markDdlColumnList(ctx);
|
|
701
|
+
return this.visitChildren(ctx);
|
|
702
|
+
}
|
|
703
|
+
visitCreateUserDefinedFunction(ctx) {
|
|
704
|
+
// Mark the function name (identifierReference) as a function call
|
|
705
|
+
// so there's no space before the opening paren: CREATE FUNCTION f(...) not f (...)
|
|
706
|
+
if (ctx.children) {
|
|
707
|
+
for (const child of ctx.children) {
|
|
708
|
+
if (child.ruleIndex !== undefined) {
|
|
709
|
+
const ruleName = SqlBaseParser.ruleNames[child.ruleIndex];
|
|
710
|
+
if (ruleName === 'identifierReference' && child.start) {
|
|
711
|
+
this.functionCallTokens.add(child.start.tokenIndex);
|
|
712
|
+
break; // Only mark the first one (function name)
|
|
713
|
+
}
|
|
714
|
+
}
|
|
715
|
+
}
|
|
716
|
+
}
|
|
717
|
+
return this.visitChildren(ctx);
|
|
718
|
+
}
|
|
719
|
+
// ========== DML CONTEXTS ==========
|
|
720
|
+
visitInsertInto(ctx) {
|
|
721
|
+
this._markValuesCommas(ctx);
|
|
722
|
+
return this.visitChildren(ctx);
|
|
723
|
+
}
|
|
724
|
+
visitInlineTable(ctx) {
|
|
725
|
+
this._markValuesCommas(ctx);
|
|
726
|
+
return this.visitChildren(ctx);
|
|
727
|
+
}
|
|
728
|
+
visitUpdateTable(ctx) {
|
|
729
|
+
const commaCount = this._markSetClause(ctx, false, 0);
|
|
730
|
+
if (commaCount > 0 && this.setKeywordToken >= 0) {
|
|
731
|
+
this.multiItemClauses.add(this.setKeywordToken);
|
|
732
|
+
}
|
|
733
|
+
return this.visitChildren(ctx);
|
|
734
|
+
}
|
|
735
|
+
// ========== SET CONFIGURATION ==========
|
|
736
|
+
visitSetConfiguration(ctx) {
|
|
737
|
+
this._markSetConfigTokens(ctx);
|
|
738
|
+
return this.visitChildren(ctx);
|
|
739
|
+
}
|
|
740
|
+
visitResetConfiguration(ctx) {
|
|
741
|
+
// GRAMMAR-DRIVEN: RESET .*?
|
|
742
|
+
// Mark all tokens after RESET as configuration tokens to preserve casing
|
|
743
|
+
this._markResetConfigTokens(ctx);
|
|
744
|
+
return this.visitChildren(ctx);
|
|
745
|
+
}
|
|
746
|
+
// ========== MERGE STATEMENT ==========
|
|
747
|
+
visitMergeIntoTable(ctx) {
|
|
748
|
+
this._markMergeClauses(ctx);
|
|
749
|
+
return this.visitChildren(ctx);
|
|
750
|
+
}
|
|
751
|
+
// ========== QUERY DEPTH TRACKING ==========
|
|
752
|
+
visitQuerySpecification(ctx) {
|
|
753
|
+
return this._visitQuerySpec(ctx);
|
|
754
|
+
}
|
|
755
|
+
visitRegularQuerySpecification(ctx) {
|
|
756
|
+
return this._visitQuerySpec(ctx);
|
|
757
|
+
}
|
|
758
|
+
_visitQuerySpec(ctx) {
|
|
759
|
+
const currentDepth = this.subqueryDepth;
|
|
760
|
+
this.subqueryDepth++;
|
|
761
|
+
this._markDepthForContext(ctx);
|
|
762
|
+
// Analyze if this query is simple enough to stay compact
|
|
763
|
+
this._analyzeSimpleQuery(ctx, currentDepth);
|
|
764
|
+
const result = this.visitChildren(ctx);
|
|
765
|
+
this.subqueryDepth--;
|
|
766
|
+
return result;
|
|
767
|
+
}
|
|
768
|
+
/**
|
|
769
|
+
* Check if a context is inside a CREATE VIEW/TABLE statement at the top level.
|
|
770
|
+
* Queries inside these DDL statements should never be compacted.
|
|
771
|
+
*/
|
|
772
|
+
_isInsideCreateStatement(ctx) {
|
|
773
|
+
let node = ctx?.parentCtx;
|
|
774
|
+
while (node) {
|
|
775
|
+
const className = node.constructor?.name || '';
|
|
776
|
+
// Check for CREATE VIEW variants
|
|
777
|
+
if (className === 'CreateViewContext' ||
|
|
778
|
+
className === 'CreateTempViewUsingContext') {
|
|
779
|
+
return true;
|
|
780
|
+
}
|
|
781
|
+
node = node.parentCtx;
|
|
782
|
+
}
|
|
783
|
+
return false;
|
|
784
|
+
}
|
|
785
|
+
/**
|
|
786
|
+
* Analyze if a query is simple enough to stay on one line.
|
|
787
|
+
* Simple query criteria:
|
|
788
|
+
* - SELECT has 1 item (including *, t.*)
|
|
789
|
+
* - FROM has 1 table (no JOINs)
|
|
790
|
+
* - WHERE has 0 or 1 condition (no AND/OR at top level)
|
|
791
|
+
* - No GROUP BY, ORDER BY, HAVING, or single-item versions
|
|
792
|
+
* - No LIMIT/OFFSET or simple LIMIT
|
|
793
|
+
* - NOT inside a CREATE VIEW/TABLE statement (those always expand)
|
|
794
|
+
*/
|
|
795
|
+
_analyzeSimpleQuery(ctx, depth) {
|
|
796
|
+
if (!ctx || !ctx.children)
|
|
797
|
+
return;
|
|
798
|
+
// Never compact queries inside CREATE statements
|
|
799
|
+
if (depth === 0 && this._isInsideCreateStatement(ctx))
|
|
800
|
+
return;
|
|
801
|
+
let selectClause = null;
|
|
802
|
+
let fromClause = null;
|
|
803
|
+
let whereClause = null;
|
|
804
|
+
let hasJoin = false;
|
|
805
|
+
let hasGroupBy = false;
|
|
806
|
+
let hasOrderBy = false;
|
|
807
|
+
let hasHaving = false;
|
|
808
|
+
let hasLimit = false;
|
|
809
|
+
let selectToken = null;
|
|
810
|
+
// Scan children to find clauses
|
|
811
|
+
for (const child of ctx.children) {
|
|
812
|
+
if (!child)
|
|
813
|
+
continue;
|
|
814
|
+
const ruleName = child.ruleIndex !== undefined ? SqlBaseParser.ruleNames[child.ruleIndex] : null;
|
|
815
|
+
const className = child.constructor?.name || '';
|
|
816
|
+
if (className === 'SelectClauseContext' || ruleName === 'selectClause') {
|
|
817
|
+
selectClause = child;
|
|
818
|
+
if (child.start) {
|
|
819
|
+
selectToken = child.start;
|
|
820
|
+
}
|
|
821
|
+
}
|
|
822
|
+
else if (className === 'FromClauseContext' || ruleName === 'fromClause') {
|
|
823
|
+
fromClause = child;
|
|
824
|
+
// Check for JOINs in FROM clause
|
|
825
|
+
hasJoin = this._hasJoinInFromClause(child);
|
|
826
|
+
// Also check for PIVOT/UNPIVOT in FROM clause
|
|
827
|
+
if (this._hasPivotUnpivotInFromClause(child)) {
|
|
828
|
+
hasJoin = true; // Treat PIVOT/UNPIVOT like a JOIN for simplicity
|
|
829
|
+
}
|
|
830
|
+
}
|
|
831
|
+
else if (className === 'WhereClauseContext' || ruleName === 'whereClause') {
|
|
832
|
+
whereClause = child;
|
|
833
|
+
}
|
|
834
|
+
else if (className === 'AggregationClauseContext' || ruleName === 'aggregationClause') {
|
|
835
|
+
hasGroupBy = true;
|
|
836
|
+
}
|
|
837
|
+
else if (className === 'HavingClauseContext' || ruleName === 'havingClause') {
|
|
838
|
+
hasHaving = true;
|
|
839
|
+
}
|
|
840
|
+
}
|
|
841
|
+
// Check parent for ORDER BY / LIMIT (they're in queryOrganization, not querySpecification)
|
|
842
|
+
// For now, we'll handle this by checking if ORDER BY/LIMIT tokens are in our range
|
|
843
|
+
// Can't be simple if has JOINs
|
|
844
|
+
if (hasJoin)
|
|
845
|
+
return;
|
|
846
|
+
// Can't be simple if has GROUP BY or HAVING (for now - could relax for single-item)
|
|
847
|
+
if (hasGroupBy || hasHaving)
|
|
848
|
+
return;
|
|
849
|
+
// Check SELECT clause - must have single item
|
|
850
|
+
if (!selectClause || !this._hasSingleSelectItem(selectClause))
|
|
851
|
+
return;
|
|
852
|
+
// Check for multi-WHEN CASE expressions (which force expansion)
|
|
853
|
+
if (selectClause && this._hasMultiWhenCase(selectClause))
|
|
854
|
+
return;
|
|
855
|
+
// Check WHERE clause - must have 0 or 1 condition (no AND/OR)
|
|
856
|
+
if (whereClause && this._hasMultipleConditions(whereClause))
|
|
857
|
+
return;
|
|
858
|
+
// This query qualifies as simple
|
|
859
|
+
// Use forExpansion=false to get actual span regardless of input layout
|
|
860
|
+
if (selectToken) {
|
|
861
|
+
const spanLength = this._calculateSpanLength(ctx, false);
|
|
862
|
+
this.simpleQueries.set(selectToken.tokenIndex, {
|
|
863
|
+
selectTokenIndex: selectToken.tokenIndex,
|
|
864
|
+
spanLength: spanLength,
|
|
865
|
+
depth: depth,
|
|
866
|
+
});
|
|
867
|
+
}
|
|
868
|
+
}
|
|
869
|
+
/**
|
|
870
|
+
* Check if FROM clause contains any JOINs.
|
|
871
|
+
*/
|
|
872
|
+
_hasJoinInFromClause(fromClause) {
|
|
873
|
+
if (!fromClause || !fromClause.children)
|
|
874
|
+
return false;
|
|
875
|
+
const checkForJoin = (node) => {
|
|
876
|
+
if (!node)
|
|
877
|
+
return false;
|
|
878
|
+
const className = node.constructor?.name || '';
|
|
879
|
+
if (className === 'JoinRelationContext')
|
|
880
|
+
return true;
|
|
881
|
+
if (node.symbol) {
|
|
882
|
+
const symName = SqlBaseLexer.symbolicNames[node.symbol.type];
|
|
883
|
+
if (symName === 'JOIN' || symName === 'CROSS' || symName === 'NATURAL') {
|
|
884
|
+
return true;
|
|
885
|
+
}
|
|
886
|
+
}
|
|
887
|
+
if (node.children) {
|
|
888
|
+
for (const child of node.children) {
|
|
889
|
+
if (checkForJoin(child))
|
|
890
|
+
return true;
|
|
891
|
+
}
|
|
892
|
+
}
|
|
893
|
+
return false;
|
|
894
|
+
};
|
|
895
|
+
return checkForJoin(fromClause);
|
|
896
|
+
}
|
|
897
|
+
/**
|
|
898
|
+
* Check if FROM clause contains PIVOT or UNPIVOT with many items.
|
|
899
|
+
* Simple PIVOT with few items can stay compact.
|
|
900
|
+
*/
|
|
901
|
+
_hasPivotUnpivotInFromClause(fromClause) {
|
|
902
|
+
if (!fromClause || !fromClause.children)
|
|
903
|
+
return false;
|
|
904
|
+
const checkForComplexPivot = (node) => {
|
|
905
|
+
if (!node)
|
|
906
|
+
return false;
|
|
907
|
+
const className = node.constructor?.name || '';
|
|
908
|
+
if (className === 'PivotClauseContext' || className === 'UnpivotClauseContext') {
|
|
909
|
+
// Count commas to estimate complexity
|
|
910
|
+
let commaCount = 0;
|
|
911
|
+
const countCommas = (n) => {
|
|
912
|
+
if (!n)
|
|
913
|
+
return;
|
|
914
|
+
if (n.symbol && n.symbol.type === getTokenType('COMMA')) {
|
|
915
|
+
commaCount++;
|
|
916
|
+
}
|
|
917
|
+
if (n.children) {
|
|
918
|
+
for (const c of n.children)
|
|
919
|
+
countCommas(c);
|
|
920
|
+
}
|
|
921
|
+
};
|
|
922
|
+
countCommas(node);
|
|
923
|
+
// If more than ~6 commas, it's complex (multiple aggregates + many IN items)
|
|
924
|
+
return commaCount > 6;
|
|
925
|
+
}
|
|
926
|
+
if (node.children) {
|
|
927
|
+
for (const child of node.children) {
|
|
928
|
+
if (checkForComplexPivot(child))
|
|
929
|
+
return true;
|
|
930
|
+
}
|
|
931
|
+
}
|
|
932
|
+
return false;
|
|
933
|
+
};
|
|
934
|
+
return checkForComplexPivot(fromClause);
|
|
935
|
+
}
|
|
936
|
+
/**
|
|
937
|
+
* Check if SELECT clause has a single item (*, t.*, or one expression).
|
|
938
|
+
*/
|
|
939
|
+
_hasSingleSelectItem(selectClause) {
|
|
940
|
+
if (!selectClause || !selectClause.children)
|
|
941
|
+
return false;
|
|
942
|
+
// Look for namedExpressionSeq
|
|
943
|
+
for (const child of selectClause.children) {
|
|
944
|
+
const className = child.constructor?.name || '';
|
|
945
|
+
if (className === 'NamedExpressionSeqContext') {
|
|
946
|
+
// Count items by looking for commas that are DIRECT children of namedExpressionSeq
|
|
947
|
+
// Commas inside function calls, type parameters, etc. should not be counted
|
|
948
|
+
let commaCount = 0;
|
|
949
|
+
if (child.children) {
|
|
950
|
+
for (const seqChild of child.children) {
|
|
951
|
+
if (seqChild.symbol && seqChild.symbol.type === getTokenType('COMMA')) {
|
|
952
|
+
commaCount++;
|
|
953
|
+
}
|
|
954
|
+
}
|
|
955
|
+
}
|
|
956
|
+
return commaCount === 0; // Single item means no commas
|
|
957
|
+
}
|
|
958
|
+
}
|
|
959
|
+
return true; // Default to true if no namedExpressionSeq (like SELECT *)
|
|
960
|
+
}
|
|
961
|
+
/**
|
|
962
|
+
* Check if a clause contains a CASE expression with multiple WHEN clauses.
|
|
963
|
+
* Such CASE expressions force expansion and make the query non-compact.
|
|
964
|
+
*/
|
|
965
|
+
_hasMultiWhenCase(clause) {
|
|
966
|
+
if (!clause)
|
|
967
|
+
return false;
|
|
968
|
+
const checkForMultiWhenCase = (node) => {
|
|
969
|
+
if (!node)
|
|
970
|
+
return false;
|
|
971
|
+
const className = node.constructor?.name || '';
|
|
972
|
+
// Check for simpleCase or searchedCase contexts
|
|
973
|
+
if (className === 'SimpleCaseContext' || className === 'SearchedCaseContext') {
|
|
974
|
+
// Count WHEN tokens
|
|
975
|
+
let whenCount = 0;
|
|
976
|
+
if (node.children) {
|
|
977
|
+
for (const child of node.children) {
|
|
978
|
+
if (child.symbol) {
|
|
979
|
+
const symName = SqlBaseLexer.symbolicNames[child.symbol.type];
|
|
980
|
+
if (symName === 'WHEN')
|
|
981
|
+
whenCount++;
|
|
982
|
+
}
|
|
983
|
+
// Also check whenClause contexts
|
|
984
|
+
const childClassName = child.constructor?.name || '';
|
|
985
|
+
if (childClassName === 'WhenClauseContext')
|
|
986
|
+
whenCount++;
|
|
987
|
+
}
|
|
988
|
+
}
|
|
989
|
+
if (whenCount > 1)
|
|
990
|
+
return true;
|
|
991
|
+
}
|
|
992
|
+
// Recurse into children
|
|
993
|
+
if (node.children) {
|
|
994
|
+
for (const child of node.children) {
|
|
995
|
+
if (checkForMultiWhenCase(child))
|
|
996
|
+
return true;
|
|
997
|
+
}
|
|
998
|
+
}
|
|
999
|
+
return false;
|
|
1000
|
+
};
|
|
1001
|
+
return checkForMultiWhenCase(clause);
|
|
1002
|
+
}
|
|
1003
|
+
/**
|
|
1004
|
+
* Check if WHERE/HAVING clause has multiple conditions (AND/OR at top level).
|
|
1005
|
+
*/
|
|
1006
|
+
_hasMultipleConditions(clause) {
|
|
1007
|
+
if (!clause || !clause.children)
|
|
1008
|
+
return false;
|
|
1009
|
+
// Find the predicated expression and check for AND/OR
|
|
1010
|
+
const checkForAndOr = (node, depth) => {
|
|
1011
|
+
if (!node)
|
|
1012
|
+
return false;
|
|
1013
|
+
if (depth > 3)
|
|
1014
|
+
return false; // Don't go too deep
|
|
1015
|
+
if (node.symbol) {
|
|
1016
|
+
const symName = SqlBaseLexer.symbolicNames[node.symbol.type];
|
|
1017
|
+
if (symName === 'AND' || symName === 'OR') {
|
|
1018
|
+
return true;
|
|
1019
|
+
}
|
|
1020
|
+
}
|
|
1021
|
+
// Check for logicalBinary rule which indicates AND/OR
|
|
1022
|
+
const className = node.constructor?.name || '';
|
|
1023
|
+
if (className === 'LogicalBinaryContext') {
|
|
1024
|
+
return true;
|
|
1025
|
+
}
|
|
1026
|
+
if (node.children) {
|
|
1027
|
+
for (const child of node.children) {
|
|
1028
|
+
if (checkForAndOr(child, depth + 1))
|
|
1029
|
+
return true;
|
|
1030
|
+
}
|
|
1031
|
+
}
|
|
1032
|
+
return false;
|
|
1033
|
+
};
|
|
1034
|
+
return checkForAndOr(clause, 0);
|
|
1035
|
+
}
|
|
1036
|
+
// ========== PRIVATE HELPER METHODS ==========
|
|
1037
|
+
/**
|
|
1038
|
+
* Calculate the expected formatted span length of a context.
|
|
1039
|
+
*
|
|
1040
|
+
* This walks all tokens within the context and sums:
|
|
1041
|
+
* - Each token's text length
|
|
1042
|
+
* - One space between each pair of tokens (standard formatting)
|
|
1043
|
+
*
|
|
1044
|
+
* This gives an accurate estimate of the formatted output length.
|
|
1045
|
+
*
|
|
1046
|
+
* @param ctx The parse tree context
|
|
1047
|
+
* @param forExpansion If true, returns Infinity for multi-line constructs
|
|
1048
|
+
* to prevent already-expanded constructs from collapsing.
|
|
1049
|
+
* If false, calculates actual span (for simple query detection).
|
|
1050
|
+
*/
|
|
1051
|
+
_calculateSpanLength(ctx, forExpansion = true) {
|
|
1052
|
+
if (!ctx || !ctx.start || !ctx.stop)
|
|
1053
|
+
return 0;
|
|
1054
|
+
// For expansion checking: if the construct spans multiple lines, return Infinity
|
|
1055
|
+
// This ensures idempotency: once expanded, it stays expanded
|
|
1056
|
+
// For simple query detection: we want the actual span regardless of input layout
|
|
1057
|
+
if (forExpansion) {
|
|
1058
|
+
const startLine = ctx.start.line;
|
|
1059
|
+
const stopLine = ctx.stop.line;
|
|
1060
|
+
if (startLine !== undefined && stopLine !== undefined && stopLine > startLine) {
|
|
1061
|
+
return Infinity;
|
|
1062
|
+
}
|
|
1063
|
+
}
|
|
1064
|
+
// Collect all tokens within this context by walking the tree
|
|
1065
|
+
const tokens = [];
|
|
1066
|
+
const collectTokens = (node) => {
|
|
1067
|
+
if (!node)
|
|
1068
|
+
return;
|
|
1069
|
+
if (node.symbol) {
|
|
1070
|
+
// This is a terminal node (token)
|
|
1071
|
+
tokens.push(node.symbol.text || '');
|
|
1072
|
+
}
|
|
1073
|
+
else if (node.children) {
|
|
1074
|
+
for (const child of node.children) {
|
|
1075
|
+
collectTokens(child);
|
|
1076
|
+
}
|
|
1077
|
+
}
|
|
1078
|
+
};
|
|
1079
|
+
collectTokens(ctx);
|
|
1080
|
+
if (tokens.length === 0) {
|
|
1081
|
+
// Fallback to character-based
|
|
1082
|
+
const startIdx = ctx.start.start;
|
|
1083
|
+
const stopIdx = ctx.stop.stop;
|
|
1084
|
+
if (startIdx === undefined || stopIdx === undefined)
|
|
1085
|
+
return 0;
|
|
1086
|
+
return stopIdx - startIdx + 1;
|
|
1087
|
+
}
|
|
1088
|
+
// Sum token lengths + (n-1) spaces between tokens
|
|
1089
|
+
const tokenLengths = tokens.reduce((sum, t) => sum + t.length, 0);
|
|
1090
|
+
const spaceBetween = Math.max(0, tokens.length - 1);
|
|
1091
|
+
return tokenLengths + spaceBetween;
|
|
1092
|
+
}
|
|
1093
|
+
/**
|
|
1094
|
+
* Calculate normalized span length independent of input formatting.
|
|
1095
|
+
* This sums up token text lengths + single spaces between tokens,
|
|
1096
|
+
* giving a consistent "single-line" representation length.
|
|
1097
|
+
*
|
|
1098
|
+
* CRITICAL FOR IDEMPOTENCY: Using character positions (_calculateSpanLength)
|
|
1099
|
+
* varies based on how the input is formatted (line breaks, extra spaces).
|
|
1100
|
+
* This causes different expansion decisions on subsequent passes.
|
|
1101
|
+
* By using token text lengths, we get consistent results regardless of input formatting.
|
|
1102
|
+
*/
|
|
1103
|
+
_calculateNormalizedSpanLength(ctx) {
|
|
1104
|
+
if (!ctx || !ctx.start || !ctx.stop)
|
|
1105
|
+
return 0;
|
|
1106
|
+
// Walk through all tokens in the context and sum their text lengths
|
|
1107
|
+
let totalLength = 0;
|
|
1108
|
+
let tokenCount = 0;
|
|
1109
|
+
const collectTokens = (node) => {
|
|
1110
|
+
if (!node)
|
|
1111
|
+
return;
|
|
1112
|
+
// If this is a terminal (token), add its text length
|
|
1113
|
+
if (node.symbol) {
|
|
1114
|
+
const text = node.symbol.text;
|
|
1115
|
+
if (text) {
|
|
1116
|
+
totalLength += text.length;
|
|
1117
|
+
tokenCount++;
|
|
1118
|
+
}
|
|
1119
|
+
return;
|
|
1120
|
+
}
|
|
1121
|
+
// Recurse into children
|
|
1122
|
+
if (node.children) {
|
|
1123
|
+
for (const child of node.children) {
|
|
1124
|
+
collectTokens(child);
|
|
1125
|
+
}
|
|
1126
|
+
}
|
|
1127
|
+
};
|
|
1128
|
+
collectTokens(ctx);
|
|
1129
|
+
// Add single space between each token (normalized spacing)
|
|
1130
|
+
if (tokenCount > 1) {
|
|
1131
|
+
totalLength += tokenCount - 1;
|
|
1132
|
+
}
|
|
1133
|
+
return totalLength;
|
|
1134
|
+
}
|
|
1135
|
+
_collectMultiArgFunctionInfo(ctx, argCount) {
|
|
1136
|
+
if (!ctx.children)
|
|
1137
|
+
return;
|
|
1138
|
+
let leftParenTokenIndex = null;
|
|
1139
|
+
let leftParenCharStart = 0;
|
|
1140
|
+
let rightParenTokenIndex = null;
|
|
1141
|
+
const commaTokenIndices = [];
|
|
1142
|
+
// Try to get function name from functionName child
|
|
1143
|
+
let functionName;
|
|
1144
|
+
if (ctx.functionName) {
|
|
1145
|
+
const fnCtx = ctx.functionName();
|
|
1146
|
+
if (fnCtx && fnCtx.getText) {
|
|
1147
|
+
functionName = fnCtx.getText().toUpperCase();
|
|
1148
|
+
}
|
|
1149
|
+
}
|
|
1150
|
+
for (const child of ctx.children) {
|
|
1151
|
+
if (child.symbol) {
|
|
1152
|
+
const symName = SqlBaseLexer.symbolicNames[child.symbol.type];
|
|
1153
|
+
if (symName === 'LEFT_PAREN' && leftParenTokenIndex === null) {
|
|
1154
|
+
leftParenTokenIndex = child.symbol.tokenIndex;
|
|
1155
|
+
leftParenCharStart = child.symbol.start ?? 0;
|
|
1156
|
+
}
|
|
1157
|
+
else if (symName === 'RIGHT_PAREN') {
|
|
1158
|
+
rightParenTokenIndex = child.symbol.tokenIndex;
|
|
1159
|
+
break;
|
|
1160
|
+
}
|
|
1161
|
+
else if (symName === 'COMMA') {
|
|
1162
|
+
commaTokenIndices.push(child.symbol.tokenIndex);
|
|
1163
|
+
}
|
|
1164
|
+
}
|
|
1165
|
+
}
|
|
1166
|
+
if (leftParenTokenIndex !== null && rightParenTokenIndex !== null &&
|
|
1167
|
+
commaTokenIndices.length === argCount - 1) {
|
|
1168
|
+
const spanLength = this._calculateNormalizedSpanLength(ctx);
|
|
1169
|
+
this.multiArgFunctionInfo.set(leftParenTokenIndex, {
|
|
1170
|
+
closeParenIndex: rightParenTokenIndex,
|
|
1171
|
+
commaIndices: commaTokenIndices,
|
|
1172
|
+
spanLength: spanLength,
|
|
1173
|
+
functionName: functionName,
|
|
1174
|
+
charStart: leftParenCharStart
|
|
1175
|
+
});
|
|
1176
|
+
}
|
|
1177
|
+
}
|
|
1178
|
+
_collectWindowDefInfo(ctx) {
|
|
1179
|
+
if (!ctx.children)
|
|
1180
|
+
return;
|
|
1181
|
+
let leftParenTokenIndex = null;
|
|
1182
|
+
let rightParenTokenIndex = null;
|
|
1183
|
+
let orderByTokenIndex = null;
|
|
1184
|
+
let windowFrameTokenIndex = null;
|
|
1185
|
+
// Get window's start character position for calculating relative offsets
|
|
1186
|
+
const windowStartChar = ctx.start?.start ?? 0;
|
|
1187
|
+
for (const child of ctx.children) {
|
|
1188
|
+
if (child.symbol) {
|
|
1189
|
+
const symName = SqlBaseLexer.symbolicNames[child.symbol.type];
|
|
1190
|
+
if (symName === 'LEFT_PAREN' && leftParenTokenIndex === null) {
|
|
1191
|
+
leftParenTokenIndex = child.symbol.tokenIndex;
|
|
1192
|
+
}
|
|
1193
|
+
else if (symName === 'RIGHT_PAREN') {
|
|
1194
|
+
rightParenTokenIndex = child.symbol.tokenIndex;
|
|
1195
|
+
}
|
|
1196
|
+
else if (symName === 'ORDER' || symName === 'SORT') {
|
|
1197
|
+
orderByTokenIndex = child.symbol.tokenIndex;
|
|
1198
|
+
}
|
|
1199
|
+
}
|
|
1200
|
+
else if (child.ruleIndex !== undefined) {
|
|
1201
|
+
const ruleName = SqlBaseParser.ruleNames[child.ruleIndex];
|
|
1202
|
+
if (ruleName === 'windowFrame' && child.children?.[0]?.symbol) {
|
|
1203
|
+
windowFrameTokenIndex = child.children[0].symbol.tokenIndex;
|
|
1204
|
+
}
|
|
1205
|
+
}
|
|
1206
|
+
}
|
|
1207
|
+
// Collect nested multi-arg functions with their relative character offsets
|
|
1208
|
+
const nestedFunctions = [];
|
|
1209
|
+
if (leftParenTokenIndex !== null && rightParenTokenIndex !== null) {
|
|
1210
|
+
for (const [funcIdx, funcInfo] of this.multiArgFunctionInfo) {
|
|
1211
|
+
if (funcIdx > leftParenTokenIndex && funcIdx < rightParenTokenIndex) {
|
|
1212
|
+
// Use the charStart from the function info to calculate relative offset
|
|
1213
|
+
const relativeOffset = funcInfo.charStart - windowStartChar;
|
|
1214
|
+
nestedFunctions.push({ funcIdx, relativeOffset });
|
|
1215
|
+
}
|
|
1216
|
+
}
|
|
1217
|
+
}
|
|
1218
|
+
if (leftParenTokenIndex !== null && rightParenTokenIndex !== null) {
|
|
1219
|
+
const spanLength = this._calculateNormalizedSpanLength(ctx);
|
|
1220
|
+
this.windowDefInfo.set(leftParenTokenIndex, {
|
|
1221
|
+
closeParenIndex: rightParenTokenIndex,
|
|
1222
|
+
orderByTokenIndex: orderByTokenIndex,
|
|
1223
|
+
windowFrameTokenIndex: windowFrameTokenIndex,
|
|
1224
|
+
spanLength: spanLength,
|
|
1225
|
+
nestedFunctions: nestedFunctions
|
|
1226
|
+
});
|
|
1227
|
+
}
|
|
1228
|
+
}
|
|
1229
|
+
/**
|
|
1230
|
+
* Collect IN list information for potential wrapping.
|
|
1231
|
+
* Structure: expr IN (value1, value2, value3, ...)
|
|
1232
|
+
* We want to track the IN list so we can wrap it at max line width.
|
|
1233
|
+
*/
|
|
1234
|
+
_collectInListInfo(ctx) {
|
|
1235
|
+
if (!ctx.children)
|
|
1236
|
+
return;
|
|
1237
|
+
// Check if this is an IN predicate (kind=IN)
|
|
1238
|
+
let isInPredicate = false;
|
|
1239
|
+
let inKeywordIndex = null;
|
|
1240
|
+
for (const child of ctx.children) {
|
|
1241
|
+
if (child.symbol) {
|
|
1242
|
+
const symName = SqlBaseLexer.symbolicNames[child.symbol.type];
|
|
1243
|
+
if (symName === 'IN') {
|
|
1244
|
+
isInPredicate = true;
|
|
1245
|
+
inKeywordIndex = child.symbol.tokenIndex;
|
|
1246
|
+
break;
|
|
1247
|
+
}
|
|
1248
|
+
}
|
|
1249
|
+
}
|
|
1250
|
+
if (!isInPredicate || inKeywordIndex === null)
|
|
1251
|
+
return;
|
|
1252
|
+
// Check if there's a subquery inside - if so, don't treat as IN list
|
|
1253
|
+
// Subquery IN: IN (SELECT ...)
|
|
1254
|
+
let hasSubquery = false;
|
|
1255
|
+
for (const child of ctx.children) {
|
|
1256
|
+
if (child.ruleIndex !== undefined) {
|
|
1257
|
+
const ruleName = SqlBaseParser.ruleNames[child.ruleIndex];
|
|
1258
|
+
if (ruleName === 'query') {
|
|
1259
|
+
hasSubquery = true;
|
|
1260
|
+
break;
|
|
1261
|
+
}
|
|
1262
|
+
}
|
|
1263
|
+
}
|
|
1264
|
+
if (hasSubquery)
|
|
1265
|
+
return; // Don't track IN (SELECT ...) as an IN list
|
|
1266
|
+
// Now find the open paren, close paren, and commas using recursive walk
|
|
1267
|
+
let openParenIndex = null;
|
|
1268
|
+
let closeParenIndex = null;
|
|
1269
|
+
const commaIndices = [];
|
|
1270
|
+
let depth = 0;
|
|
1271
|
+
let foundOpenParen = false;
|
|
1272
|
+
const walkForTokens = (node) => {
|
|
1273
|
+
if (!node)
|
|
1274
|
+
return;
|
|
1275
|
+
if (node.symbol) {
|
|
1276
|
+
const symName = SqlBaseLexer.symbolicNames[node.symbol.type];
|
|
1277
|
+
const tokenIndex = node.symbol.tokenIndex;
|
|
1278
|
+
if (tokenIndex <= inKeywordIndex)
|
|
1279
|
+
return; // Skip tokens before/at IN
|
|
1280
|
+
if (symName === 'LEFT_PAREN') {
|
|
1281
|
+
if (!foundOpenParen) {
|
|
1282
|
+
openParenIndex = tokenIndex;
|
|
1283
|
+
foundOpenParen = true;
|
|
1284
|
+
}
|
|
1285
|
+
else {
|
|
1286
|
+
depth++;
|
|
1287
|
+
}
|
|
1288
|
+
}
|
|
1289
|
+
else if (symName === 'RIGHT_PAREN') {
|
|
1290
|
+
if (depth > 0) {
|
|
1291
|
+
depth--;
|
|
1292
|
+
}
|
|
1293
|
+
else if (foundOpenParen && closeParenIndex === null) {
|
|
1294
|
+
closeParenIndex = tokenIndex;
|
|
1295
|
+
return; // Found the closing paren, stop
|
|
1296
|
+
}
|
|
1297
|
+
}
|
|
1298
|
+
else if (symName === 'COMMA' && depth === 0 && foundOpenParen) {
|
|
1299
|
+
commaIndices.push(tokenIndex);
|
|
1300
|
+
}
|
|
1301
|
+
}
|
|
1302
|
+
if (node.children) {
|
|
1303
|
+
for (const child of node.children) {
|
|
1304
|
+
if (closeParenIndex !== null)
|
|
1305
|
+
return; // Stop if we found close paren
|
|
1306
|
+
walkForTokens(child);
|
|
1307
|
+
}
|
|
1308
|
+
}
|
|
1309
|
+
};
|
|
1310
|
+
walkForTokens(ctx);
|
|
1311
|
+
if (openParenIndex !== null && closeParenIndex !== null) {
|
|
1312
|
+
this.inListInfo.set(openParenIndex, {
|
|
1313
|
+
openParenIndex,
|
|
1314
|
+
closeParenIndex,
|
|
1315
|
+
commaIndices,
|
|
1316
|
+
isInPivot: false, // WHERE IN, not PIVOT IN
|
|
1317
|
+
});
|
|
1318
|
+
}
|
|
1319
|
+
}
|
|
1320
|
+
/**
|
|
1321
|
+
* Collect PIVOT/UNPIVOT clause information for potential expansion.
|
|
1322
|
+
* Structure: PIVOT (aggregates FOR column IN (values))
|
|
1323
|
+
*/
|
|
1324
|
+
_collectPivotInfo(ctx, isUnpivot) {
|
|
1325
|
+
if (!ctx.children)
|
|
1326
|
+
return;
|
|
1327
|
+
let openParenIndex = null;
|
|
1328
|
+
let closeParenIndex = null;
|
|
1329
|
+
let forKeywordIndex = null;
|
|
1330
|
+
let inKeywordIndex = null;
|
|
1331
|
+
let inListOpenParen = null;
|
|
1332
|
+
const aggregateCommaIndices = [];
|
|
1333
|
+
const inListCommaIndices = [];
|
|
1334
|
+
let foundFor = false;
|
|
1335
|
+
let foundIn = false;
|
|
1336
|
+
let inListDepth = 0; // Depth within IN list parens (0 = top level of IN list)
|
|
1337
|
+
// Walk through children to find structure
|
|
1338
|
+
const walkForTokens = (node) => {
|
|
1339
|
+
if (!node)
|
|
1340
|
+
return;
|
|
1341
|
+
if (node.symbol) {
|
|
1342
|
+
const symName = SqlBaseLexer.symbolicNames[node.symbol.type];
|
|
1343
|
+
const tokenIndex = node.symbol.tokenIndex;
|
|
1344
|
+
if (symName === 'LEFT_PAREN') {
|
|
1345
|
+
if (openParenIndex === null) {
|
|
1346
|
+
// First paren is the PIVOT open paren
|
|
1347
|
+
openParenIndex = tokenIndex;
|
|
1348
|
+
}
|
|
1349
|
+
else if (foundIn && inListOpenParen === null) {
|
|
1350
|
+
// First paren after IN is the IN list open paren
|
|
1351
|
+
inListOpenParen = tokenIndex;
|
|
1352
|
+
}
|
|
1353
|
+
else if (foundIn) {
|
|
1354
|
+
// Nested paren within IN list items
|
|
1355
|
+
inListDepth++;
|
|
1356
|
+
}
|
|
1357
|
+
}
|
|
1358
|
+
else if (symName === 'RIGHT_PAREN') {
|
|
1359
|
+
if (foundIn && inListDepth > 0) {
|
|
1360
|
+
// Closing a nested paren within IN list
|
|
1361
|
+
inListDepth--;
|
|
1362
|
+
}
|
|
1363
|
+
else if (foundIn && inListOpenParen !== null) {
|
|
1364
|
+
// Closing the IN list paren - this is also close of PIVOT
|
|
1365
|
+
closeParenIndex = tokenIndex;
|
|
1366
|
+
}
|
|
1367
|
+
else {
|
|
1368
|
+
// Outer PIVOT close paren
|
|
1369
|
+
closeParenIndex = tokenIndex;
|
|
1370
|
+
}
|
|
1371
|
+
}
|
|
1372
|
+
else if (symName === 'FOR') {
|
|
1373
|
+
foundFor = true;
|
|
1374
|
+
forKeywordIndex = tokenIndex;
|
|
1375
|
+
}
|
|
1376
|
+
else if (symName === 'IN') {
|
|
1377
|
+
foundIn = true;
|
|
1378
|
+
inKeywordIndex = tokenIndex;
|
|
1379
|
+
}
|
|
1380
|
+
else if (symName === 'COMMA') {
|
|
1381
|
+
if (foundIn && inListOpenParen !== null && inListDepth === 0) {
|
|
1382
|
+
// Comma in IN list at top level
|
|
1383
|
+
inListCommaIndices.push(tokenIndex);
|
|
1384
|
+
}
|
|
1385
|
+
else if (!foundFor) {
|
|
1386
|
+
// Comma before FOR - aggregate list
|
|
1387
|
+
aggregateCommaIndices.push(tokenIndex);
|
|
1388
|
+
}
|
|
1389
|
+
}
|
|
1390
|
+
}
|
|
1391
|
+
if (node.children) {
|
|
1392
|
+
for (const child of node.children) {
|
|
1393
|
+
walkForTokens(child);
|
|
1394
|
+
}
|
|
1395
|
+
}
|
|
1396
|
+
};
|
|
1397
|
+
walkForTokens(ctx);
|
|
1398
|
+
if (openParenIndex !== null && closeParenIndex !== null) {
|
|
1399
|
+
const spanLength = this._calculateNormalizedSpanLength(ctx);
|
|
1400
|
+
this.pivotInfo.set(openParenIndex, {
|
|
1401
|
+
openParenIndex,
|
|
1402
|
+
closeParenIndex,
|
|
1403
|
+
aggregateCommaIndices,
|
|
1404
|
+
forKeywordIndex,
|
|
1405
|
+
inKeywordIndex,
|
|
1406
|
+
inListCommaIndices,
|
|
1407
|
+
spanLength,
|
|
1408
|
+
isUnpivot
|
|
1409
|
+
});
|
|
1410
|
+
// Also store the PIVOT IN list in inListInfo for consistent wrapping
|
|
1411
|
+
if (inListOpenParen !== null) {
|
|
1412
|
+
// Find the IN list close paren (it's one before the PIVOT close paren)
|
|
1413
|
+
// We need to find the actual IN list close paren
|
|
1414
|
+
let inListCloseParen = closeParenIndex; // Default to same as PIVOT close
|
|
1415
|
+
this.inListInfo.set(inListOpenParen, {
|
|
1416
|
+
openParenIndex: inListOpenParen,
|
|
1417
|
+
closeParenIndex: inListCloseParen,
|
|
1418
|
+
commaIndices: inListCommaIndices,
|
|
1419
|
+
isInPivot: true,
|
|
1420
|
+
});
|
|
1421
|
+
}
|
|
1422
|
+
}
|
|
1423
|
+
}
|
|
1424
|
+
_analyzeCaseExpression(ctx) {
|
|
1425
|
+
if (!ctx.children)
|
|
1426
|
+
return;
|
|
1427
|
+
let whenCount = 0;
|
|
1428
|
+
let caseToken = null;
|
|
1429
|
+
let elseToken = null;
|
|
1430
|
+
let endToken = null;
|
|
1431
|
+
let valueExpression = null;
|
|
1432
|
+
const whenTokens = [];
|
|
1433
|
+
// Check if this is a simpleCase (has 'value' property) vs searchedCase
|
|
1434
|
+
// simpleCase: CASE value=expression whenClause+ ELSE? END
|
|
1435
|
+
// searchedCase: CASE whenClause+ ELSE? END
|
|
1436
|
+
const isSimpleCase = ctx.value !== undefined;
|
|
1437
|
+
if (isSimpleCase && ctx.value) {
|
|
1438
|
+
valueExpression = ctx.value;
|
|
1439
|
+
}
|
|
1440
|
+
for (const child of ctx.children) {
|
|
1441
|
+
if (child.symbol) {
|
|
1442
|
+
const symName = SqlBaseLexer.symbolicNames[child.symbol.type];
|
|
1443
|
+
if (symName === 'CASE') {
|
|
1444
|
+
caseToken = child.symbol;
|
|
1445
|
+
}
|
|
1446
|
+
else if (symName === 'ELSE') {
|
|
1447
|
+
elseToken = child.symbol;
|
|
1448
|
+
}
|
|
1449
|
+
else if (symName === 'END') {
|
|
1450
|
+
endToken = child.symbol;
|
|
1451
|
+
}
|
|
1452
|
+
else if (symName === 'WHEN') {
|
|
1453
|
+
whenCount++;
|
|
1454
|
+
whenTokens.push(child.symbol);
|
|
1455
|
+
}
|
|
1456
|
+
}
|
|
1457
|
+
if (child.ruleIndex !== undefined) {
|
|
1458
|
+
const ruleName = SqlBaseParser.ruleNames[child.ruleIndex];
|
|
1459
|
+
if (ruleName === 'whenClause') {
|
|
1460
|
+
const whenToken = this._findTokenInContext(child, 'WHEN');
|
|
1461
|
+
if (whenToken && !whenTokens.find((t) => t.tokenIndex === whenToken.tokenIndex)) {
|
|
1462
|
+
whenCount++;
|
|
1463
|
+
whenTokens.push(whenToken);
|
|
1464
|
+
}
|
|
1465
|
+
}
|
|
1466
|
+
}
|
|
1467
|
+
}
|
|
1468
|
+
if (whenCount > 1 && caseToken) {
|
|
1469
|
+
this.multiWhenCaseTokens.add(caseToken.tokenIndex);
|
|
1470
|
+
// For simpleCase with value, mark the CASE token and the position after value expression
|
|
1471
|
+
// So the newline goes after "CASE x" not after "CASE"
|
|
1472
|
+
if (isSimpleCase && valueExpression && valueExpression.stop) {
|
|
1473
|
+
this.simpleCaseTokens.add(caseToken.tokenIndex);
|
|
1474
|
+
this.simpleCaseValueEndTokens.add(valueExpression.stop.tokenIndex);
|
|
1475
|
+
}
|
|
1476
|
+
for (const whenToken of whenTokens) {
|
|
1477
|
+
this.caseWhenTokens.add(whenToken.tokenIndex);
|
|
1478
|
+
}
|
|
1479
|
+
if (elseToken) {
|
|
1480
|
+
this.caseElseTokens.add(elseToken.tokenIndex);
|
|
1481
|
+
}
|
|
1482
|
+
if (endToken) {
|
|
1483
|
+
this.caseEndTokens.add(endToken.tokenIndex);
|
|
1484
|
+
}
|
|
1485
|
+
if (this.currentSelectToken >= 0) {
|
|
1486
|
+
this.multiItemClauses.add(this.currentSelectToken);
|
|
1487
|
+
}
|
|
1488
|
+
}
|
|
1489
|
+
}
|
|
1490
|
+
_findTokenInContext(ctx, symbolicName) {
|
|
1491
|
+
if (!ctx)
|
|
1492
|
+
return null;
|
|
1493
|
+
if (ctx.symbol) {
|
|
1494
|
+
const symName = SqlBaseLexer.symbolicNames[ctx.symbol.type];
|
|
1495
|
+
if (symName === symbolicName) {
|
|
1496
|
+
return ctx.symbol;
|
|
1497
|
+
}
|
|
1498
|
+
}
|
|
1499
|
+
if (ctx.children) {
|
|
1500
|
+
for (const child of ctx.children) {
|
|
1501
|
+
const found = this._findTokenInContext(child, symbolicName);
|
|
1502
|
+
if (found)
|
|
1503
|
+
return found;
|
|
1504
|
+
}
|
|
1505
|
+
}
|
|
1506
|
+
return null;
|
|
1507
|
+
}
|
|
1508
|
+
_analyzeJoinConditions(ctx) {
|
|
1509
|
+
const operators = this._countConditionOperators(ctx);
|
|
1510
|
+
if (operators > 0) {
|
|
1511
|
+
const onTokenIndex = this._findOnToken(ctx);
|
|
1512
|
+
if (onTokenIndex !== -1) {
|
|
1513
|
+
this.multilineConditionClauses.add(onTokenIndex);
|
|
1514
|
+
}
|
|
1515
|
+
}
|
|
1516
|
+
}
|
|
1517
|
+
_findOnToken(ctx) {
|
|
1518
|
+
if (!ctx)
|
|
1519
|
+
return -1;
|
|
1520
|
+
if (ctx.symbol) {
|
|
1521
|
+
const symName = SqlBaseLexer.symbolicNames[ctx.symbol.type];
|
|
1522
|
+
if (symName === 'ON') {
|
|
1523
|
+
return ctx.symbol.tokenIndex;
|
|
1524
|
+
}
|
|
1525
|
+
}
|
|
1526
|
+
if (ctx.children) {
|
|
1527
|
+
for (const child of ctx.children) {
|
|
1528
|
+
const result = this._findOnToken(child);
|
|
1529
|
+
if (result !== -1)
|
|
1530
|
+
return result;
|
|
1531
|
+
}
|
|
1532
|
+
}
|
|
1533
|
+
return -1;
|
|
1534
|
+
}
|
|
1535
|
+
_findSubqueryContext(ctx) {
|
|
1536
|
+
if (!ctx)
|
|
1537
|
+
return null;
|
|
1538
|
+
const className = ctx.constructor?.name || '';
|
|
1539
|
+
if (className === 'SubqueryContext')
|
|
1540
|
+
return ctx;
|
|
1541
|
+
if (ctx.children) {
|
|
1542
|
+
for (const child of ctx.children) {
|
|
1543
|
+
if (!child.symbol) {
|
|
1544
|
+
const found = this._findSubqueryContext(child);
|
|
1545
|
+
if (found)
|
|
1546
|
+
return found;
|
|
1547
|
+
}
|
|
1548
|
+
}
|
|
1549
|
+
}
|
|
1550
|
+
return null;
|
|
1551
|
+
}
|
|
1552
|
+
_markAllDescendantTokens(ctx, targetSet) {
|
|
1553
|
+
// Mark all tokens in this context and its descendants
|
|
1554
|
+
if (!ctx)
|
|
1555
|
+
return;
|
|
1556
|
+
if (ctx.symbol) {
|
|
1557
|
+
targetSet.add(ctx.symbol.tokenIndex);
|
|
1558
|
+
}
|
|
1559
|
+
else if (ctx.children) {
|
|
1560
|
+
for (const child of ctx.children) {
|
|
1561
|
+
this._markAllDescendantTokens(child, targetSet);
|
|
1562
|
+
}
|
|
1563
|
+
}
|
|
1564
|
+
}
|
|
1565
|
+
_markIdentifier(ctx) {
|
|
1566
|
+
if (ctx.start) {
|
|
1567
|
+
for (let i = ctx.start.tokenIndex; i <= (ctx.stop?.tokenIndex ?? ctx.start.tokenIndex); i++) {
|
|
1568
|
+
this.identifierTokens.add(i);
|
|
1569
|
+
}
|
|
1570
|
+
}
|
|
1571
|
+
}
|
|
1572
|
+
_markClauseStart(ctx) {
|
|
1573
|
+
if (ctx.start) {
|
|
1574
|
+
this.clauseStartTokens.add(ctx.start.tokenIndex);
|
|
1575
|
+
}
|
|
1576
|
+
}
|
|
1577
|
+
_markListContext(ctx) {
|
|
1578
|
+
let hasCommas = false;
|
|
1579
|
+
if (ctx.children) {
|
|
1580
|
+
let isFirst = true;
|
|
1581
|
+
for (const child of ctx.children) {
|
|
1582
|
+
if (child.symbol) {
|
|
1583
|
+
const tokenType = child.symbol.type;
|
|
1584
|
+
if (tokenType === getTokenType('COMMA')) {
|
|
1585
|
+
this.listItemCommas.add(child.symbol.tokenIndex);
|
|
1586
|
+
hasCommas = true;
|
|
1587
|
+
}
|
|
1588
|
+
else if (isFirst && tokenType !== getTokenType('COMMA') && child.symbol.tokenIndex >= 0) {
|
|
1589
|
+
this.listFirstItems.add(child.symbol.tokenIndex);
|
|
1590
|
+
isFirst = false;
|
|
1591
|
+
}
|
|
1592
|
+
}
|
|
1593
|
+
else if (child.children) {
|
|
1594
|
+
this._markCommasInContext(child);
|
|
1595
|
+
}
|
|
1596
|
+
}
|
|
1597
|
+
}
|
|
1598
|
+
return hasCommas;
|
|
1599
|
+
}
|
|
1600
|
+
_markCommasInContext(ctx) {
|
|
1601
|
+
if (!ctx || !ctx.children)
|
|
1602
|
+
return;
|
|
1603
|
+
const className = ctx.constructor?.name || '';
|
|
1604
|
+
if (className === 'FunctionCallContext')
|
|
1605
|
+
return;
|
|
1606
|
+
if (className === 'PivotClauseContext' || className === 'UnpivotClauseContext')
|
|
1607
|
+
return;
|
|
1608
|
+
if (className === 'LateralViewContext')
|
|
1609
|
+
return;
|
|
1610
|
+
for (const child of ctx.children) {
|
|
1611
|
+
if (child.symbol) {
|
|
1612
|
+
if (child.symbol.type === getTokenType('COMMA')) {
|
|
1613
|
+
this.listItemCommas.add(child.symbol.tokenIndex);
|
|
1614
|
+
}
|
|
1615
|
+
}
|
|
1616
|
+
else if (child.children) {
|
|
1617
|
+
this._markCommasInContext(child);
|
|
1618
|
+
}
|
|
1619
|
+
}
|
|
1620
|
+
}
|
|
1621
|
+
_markListCommasExcludingGroupingAnalytics(ctx) {
|
|
1622
|
+
let count = 0;
|
|
1623
|
+
if (!ctx || !ctx.children)
|
|
1624
|
+
return 0;
|
|
1625
|
+
const isGroupingAnalytics = ctx.ruleIndex !== undefined &&
|
|
1626
|
+
SqlBaseParser.ruleNames[ctx.ruleIndex] === 'groupingAnalytics';
|
|
1627
|
+
for (const child of ctx.children) {
|
|
1628
|
+
if (child.symbol) {
|
|
1629
|
+
if (child.symbol.type === getTokenType('COMMA')) {
|
|
1630
|
+
if (!isGroupingAnalytics) {
|
|
1631
|
+
this.listItemCommas.add(child.symbol.tokenIndex);
|
|
1632
|
+
}
|
|
1633
|
+
count++;
|
|
1634
|
+
}
|
|
1635
|
+
}
|
|
1636
|
+
else if (child.ruleIndex !== undefined) {
|
|
1637
|
+
count += this._markListCommasExcludingGroupingAnalytics(child);
|
|
1638
|
+
}
|
|
1639
|
+
}
|
|
1640
|
+
return count;
|
|
1641
|
+
}
|
|
1642
|
+
_markGroupByAllToken(ctx) {
|
|
1643
|
+
if (!ctx || !ctx.children)
|
|
1644
|
+
return;
|
|
1645
|
+
let foundGroupBy = false;
|
|
1646
|
+
for (const child of ctx.children) {
|
|
1647
|
+
if (child.symbol) {
|
|
1648
|
+
const symName = SqlBaseLexer.symbolicNames[child.symbol.type];
|
|
1649
|
+
if (symName === 'BY') {
|
|
1650
|
+
foundGroupBy = true;
|
|
1651
|
+
}
|
|
1652
|
+
else if (foundGroupBy && symName === 'ALL') {
|
|
1653
|
+
this.groupByAllTokens.add(child.symbol.tokenIndex);
|
|
1654
|
+
return;
|
|
1655
|
+
}
|
|
1656
|
+
}
|
|
1657
|
+
else if (foundGroupBy && child.ruleIndex !== undefined) {
|
|
1658
|
+
const allToken = this._findAllTokenInGroupByExpression(child);
|
|
1659
|
+
if (allToken) {
|
|
1660
|
+
this.groupByAllTokens.add(allToken.tokenIndex);
|
|
1661
|
+
return;
|
|
1662
|
+
}
|
|
1663
|
+
}
|
|
1664
|
+
}
|
|
1665
|
+
}
|
|
1666
|
+
_findAllTokenInGroupByExpression(ctx) {
|
|
1667
|
+
if (!ctx)
|
|
1668
|
+
return null;
|
|
1669
|
+
if (ctx.symbol) {
|
|
1670
|
+
const symName = SqlBaseLexer.symbolicNames[ctx.symbol.type];
|
|
1671
|
+
if (symName === 'ALL') {
|
|
1672
|
+
return ctx.symbol;
|
|
1673
|
+
}
|
|
1674
|
+
return null;
|
|
1675
|
+
}
|
|
1676
|
+
if (!ctx.children)
|
|
1677
|
+
return null;
|
|
1678
|
+
const ruleName = ctx.ruleIndex !== undefined ? SqlBaseParser.ruleNames[ctx.ruleIndex] : null;
|
|
1679
|
+
const identifierPathRules = new Set([
|
|
1680
|
+
'groupByClause', 'expression', 'booleanExpression', 'valueExpression',
|
|
1681
|
+
'primaryExpression', 'columnReference', 'identifier', 'strictIdentifier',
|
|
1682
|
+
'nonReserved', 'namedExpression'
|
|
1683
|
+
]);
|
|
1684
|
+
if (ruleName && identifierPathRules.has(ruleName)) {
|
|
1685
|
+
const meaningfulChildren = ctx.children.filter((c) => c.symbol || (c.ruleIndex !== undefined));
|
|
1686
|
+
if (meaningfulChildren.length === 1) {
|
|
1687
|
+
return this._findAllTokenInGroupByExpression(meaningfulChildren[0]);
|
|
1688
|
+
}
|
|
1689
|
+
}
|
|
1690
|
+
return null;
|
|
1691
|
+
}
|
|
1692
|
+
_analyzeConditionClause(ctx) {
|
|
1693
|
+
const operators = this._countConditionOperators(ctx);
|
|
1694
|
+
if (operators > 0) {
|
|
1695
|
+
if (ctx.start) {
|
|
1696
|
+
this.multilineConditionClauses.add(ctx.start.tokenIndex);
|
|
1697
|
+
}
|
|
1698
|
+
}
|
|
1699
|
+
}
|
|
1700
|
+
_countConditionOperators(ctx, parenDepth = 0) {
|
|
1701
|
+
let count = 0;
|
|
1702
|
+
if (!ctx)
|
|
1703
|
+
return count;
|
|
1704
|
+
if (ctx.children) {
|
|
1705
|
+
let currentParenDepth = parenDepth;
|
|
1706
|
+
for (const child of ctx.children) {
|
|
1707
|
+
if (child.symbol) {
|
|
1708
|
+
const symbolicName = SqlBaseLexer.symbolicNames[child.symbol.type];
|
|
1709
|
+
if (symbolicName === 'LEFT_PAREN') {
|
|
1710
|
+
currentParenDepth++;
|
|
1711
|
+
}
|
|
1712
|
+
else if (symbolicName === 'RIGHT_PAREN') {
|
|
1713
|
+
currentParenDepth--;
|
|
1714
|
+
}
|
|
1715
|
+
else if (symbolicName === 'AND' || symbolicName === 'OR') {
|
|
1716
|
+
if (!this.betweenAndTokens.has(child.symbol.tokenIndex) && currentParenDepth === 0) {
|
|
1717
|
+
count++;
|
|
1718
|
+
this.conditionOperators.add(child.symbol.tokenIndex);
|
|
1719
|
+
}
|
|
1720
|
+
}
|
|
1721
|
+
}
|
|
1722
|
+
count += this._countConditionOperators(child, currentParenDepth);
|
|
1723
|
+
}
|
|
1724
|
+
}
|
|
1725
|
+
return count;
|
|
1726
|
+
}
|
|
1727
|
+
_scanForBetweenAnd(ctx) {
|
|
1728
|
+
if (!ctx)
|
|
1729
|
+
return;
|
|
1730
|
+
if (ctx.children) {
|
|
1731
|
+
let hasBetween = false;
|
|
1732
|
+
for (const child of ctx.children) {
|
|
1733
|
+
if (child.symbol) {
|
|
1734
|
+
const symName = SqlBaseLexer.symbolicNames[child.symbol.type];
|
|
1735
|
+
if (symName === 'BETWEEN') {
|
|
1736
|
+
hasBetween = true;
|
|
1737
|
+
}
|
|
1738
|
+
else if (symName === 'AND' && hasBetween) {
|
|
1739
|
+
this.betweenAndTokens.add(child.symbol.tokenIndex);
|
|
1740
|
+
hasBetween = false;
|
|
1741
|
+
}
|
|
1742
|
+
}
|
|
1743
|
+
this._scanForBetweenAnd(child);
|
|
1744
|
+
}
|
|
1745
|
+
}
|
|
1746
|
+
}
|
|
1747
|
+
_markSubqueryParens(ctx) {
|
|
1748
|
+
if (ctx.children) {
|
|
1749
|
+
for (const child of ctx.children) {
|
|
1750
|
+
if (child.symbol) {
|
|
1751
|
+
const tokenType = child.symbol.type;
|
|
1752
|
+
if (tokenType === getTokenType('LEFT_PAREN')) {
|
|
1753
|
+
this.subqueryOpenParens.add(child.symbol.tokenIndex);
|
|
1754
|
+
}
|
|
1755
|
+
else if (tokenType === getTokenType('RIGHT_PAREN')) {
|
|
1756
|
+
this.subqueryCloseParens.add(child.symbol.tokenIndex);
|
|
1757
|
+
}
|
|
1758
|
+
}
|
|
1759
|
+
}
|
|
1760
|
+
}
|
|
1761
|
+
}
|
|
1762
|
+
_markDdlColumnList(ctx) {
|
|
1763
|
+
if (!ctx || !ctx.children)
|
|
1764
|
+
return;
|
|
1765
|
+
let foundLeftParen = false;
|
|
1766
|
+
let leftParenIndex = -1;
|
|
1767
|
+
let commaCount = 0;
|
|
1768
|
+
for (const child of ctx.children) {
|
|
1769
|
+
if (child.symbol) {
|
|
1770
|
+
const tokenType = child.symbol.type;
|
|
1771
|
+
if (tokenType === getTokenType('LEFT_PAREN') && !foundLeftParen) {
|
|
1772
|
+
foundLeftParen = true;
|
|
1773
|
+
leftParenIndex = child.symbol.tokenIndex;
|
|
1774
|
+
this.ddlOpenParens.add(leftParenIndex);
|
|
1775
|
+
}
|
|
1776
|
+
else if (tokenType === getTokenType('RIGHT_PAREN') && foundLeftParen) {
|
|
1777
|
+
this.ddlCloseParens.add(child.symbol.tokenIndex);
|
|
1778
|
+
}
|
|
1779
|
+
else if (tokenType === getTokenType('COMMA') && foundLeftParen) {
|
|
1780
|
+
this.ddlColumnCommas.add(child.symbol.tokenIndex);
|
|
1781
|
+
commaCount++;
|
|
1782
|
+
}
|
|
1783
|
+
}
|
|
1784
|
+
else if (child.children && foundLeftParen) {
|
|
1785
|
+
commaCount += this._markDdlCommasInContext(child);
|
|
1786
|
+
}
|
|
1787
|
+
}
|
|
1788
|
+
if (commaCount > 0 && leftParenIndex >= 0) {
|
|
1789
|
+
this.ddlMultiColumn.add(leftParenIndex);
|
|
1790
|
+
}
|
|
1791
|
+
}
|
|
1792
|
+
_markDdlCommasInContext(ctx, angleDepth = 0) {
|
|
1793
|
+
if (!ctx || !ctx.children)
|
|
1794
|
+
return 0;
|
|
1795
|
+
let count = 0;
|
|
1796
|
+
for (const child of ctx.children) {
|
|
1797
|
+
if (child.symbol) {
|
|
1798
|
+
const tokenType = child.symbol.type;
|
|
1799
|
+
if (tokenType === getTokenType('LT')) {
|
|
1800
|
+
// Entering complex type like ARRAY<...> or MAP<...>
|
|
1801
|
+
angleDepth++;
|
|
1802
|
+
}
|
|
1803
|
+
else if (tokenType === getTokenType('GT')) {
|
|
1804
|
+
// Exiting complex type
|
|
1805
|
+
if (angleDepth > 0)
|
|
1806
|
+
angleDepth--;
|
|
1807
|
+
}
|
|
1808
|
+
else if (tokenType === getTokenType('COMMA') && angleDepth === 0) {
|
|
1809
|
+
// Only mark as DDL comma if not inside angle brackets (complex type)
|
|
1810
|
+
this.ddlColumnCommas.add(child.symbol.tokenIndex);
|
|
1811
|
+
count++;
|
|
1812
|
+
}
|
|
1813
|
+
}
|
|
1814
|
+
else if (child.children) {
|
|
1815
|
+
count += this._markDdlCommasInContext(child, angleDepth);
|
|
1816
|
+
}
|
|
1817
|
+
}
|
|
1818
|
+
return count;
|
|
1819
|
+
}
|
|
1820
|
+
_markValuesCommas(ctx, foundValues = false) {
|
|
1821
|
+
if (!ctx || !ctx.children)
|
|
1822
|
+
return;
|
|
1823
|
+
let parenDepth = 0;
|
|
1824
|
+
for (const child of ctx.children) {
|
|
1825
|
+
if (child.symbol) {
|
|
1826
|
+
const symName = SqlBaseLexer.symbolicNames[child.symbol.type];
|
|
1827
|
+
const tokenType = child.symbol.type;
|
|
1828
|
+
if (symName === 'VALUES') {
|
|
1829
|
+
foundValues = true;
|
|
1830
|
+
}
|
|
1831
|
+
else if (foundValues && tokenType === getTokenType('LEFT_PAREN')) {
|
|
1832
|
+
parenDepth++;
|
|
1833
|
+
// If we see a paren right after VALUES, we have tuples
|
|
1834
|
+
if (parenDepth === 1) {
|
|
1835
|
+
this.valuesHasTuples = true;
|
|
1836
|
+
}
|
|
1837
|
+
}
|
|
1838
|
+
else if (foundValues && tokenType === getTokenType('RIGHT_PAREN')) {
|
|
1839
|
+
parenDepth--;
|
|
1840
|
+
}
|
|
1841
|
+
else if (foundValues && parenDepth === 0 && tokenType === getTokenType('COMMA')) {
|
|
1842
|
+
this.valuesCommas.add(child.symbol.tokenIndex);
|
|
1843
|
+
}
|
|
1844
|
+
}
|
|
1845
|
+
else if (child.children) {
|
|
1846
|
+
this._markValuesCommas(child, foundValues);
|
|
1847
|
+
}
|
|
1848
|
+
}
|
|
1849
|
+
}
|
|
1850
|
+
_markSetClause(ctx, foundSet, commaCount) {
|
|
1851
|
+
if (!ctx || !ctx.children)
|
|
1852
|
+
return commaCount;
|
|
1853
|
+
for (const child of ctx.children) {
|
|
1854
|
+
if (child.symbol) {
|
|
1855
|
+
const symName = SqlBaseLexer.symbolicNames[child.symbol.type];
|
|
1856
|
+
if (symName === 'SET') {
|
|
1857
|
+
foundSet = true;
|
|
1858
|
+
this.setKeywordToken = child.symbol.tokenIndex;
|
|
1859
|
+
this.clauseStartTokens.add(child.symbol.tokenIndex);
|
|
1860
|
+
}
|
|
1861
|
+
else if (foundSet && child.symbol.type === getTokenType('COMMA')) {
|
|
1862
|
+
this.setClauseCommas.add(child.symbol.tokenIndex);
|
|
1863
|
+
commaCount++;
|
|
1864
|
+
}
|
|
1865
|
+
else if (foundSet && symName === 'WHERE') {
|
|
1866
|
+
return commaCount;
|
|
1867
|
+
}
|
|
1868
|
+
}
|
|
1869
|
+
else if (child.children) {
|
|
1870
|
+
commaCount = this._markSetClause(child, foundSet, commaCount);
|
|
1871
|
+
}
|
|
1872
|
+
}
|
|
1873
|
+
return commaCount;
|
|
1874
|
+
}
|
|
1875
|
+
_markSetConfigTokens(ctx) {
|
|
1876
|
+
if (!ctx || !ctx.children)
|
|
1877
|
+
return;
|
|
1878
|
+
let foundSet = false;
|
|
1879
|
+
for (const child of ctx.children) {
|
|
1880
|
+
if (child.symbol) {
|
|
1881
|
+
const symName = SqlBaseLexer.symbolicNames[child.symbol.type];
|
|
1882
|
+
if (symName === 'SET') {
|
|
1883
|
+
foundSet = true;
|
|
1884
|
+
}
|
|
1885
|
+
else if (foundSet) {
|
|
1886
|
+
this.setConfigTokens.add(child.symbol.tokenIndex);
|
|
1887
|
+
}
|
|
1888
|
+
}
|
|
1889
|
+
else if (child.children && foundSet) {
|
|
1890
|
+
this._markSetConfigTokensRecursive(child);
|
|
1891
|
+
}
|
|
1892
|
+
}
|
|
1893
|
+
}
|
|
1894
|
+
_markResetConfigTokens(ctx) {
|
|
1895
|
+
// Similar to SET, mark all tokens after RESET keyword
|
|
1896
|
+
if (!ctx || !ctx.children)
|
|
1897
|
+
return;
|
|
1898
|
+
let foundReset = false;
|
|
1899
|
+
for (const child of ctx.children) {
|
|
1900
|
+
if (child.symbol) {
|
|
1901
|
+
const symName = SqlBaseLexer.symbolicNames[child.symbol.type];
|
|
1902
|
+
if (symName === 'RESET') {
|
|
1903
|
+
foundReset = true;
|
|
1904
|
+
}
|
|
1905
|
+
else if (foundReset) {
|
|
1906
|
+
this.setConfigTokens.add(child.symbol.tokenIndex);
|
|
1907
|
+
}
|
|
1908
|
+
}
|
|
1909
|
+
else if (child.children && foundReset) {
|
|
1910
|
+
this._markSetConfigTokensRecursive(child);
|
|
1911
|
+
}
|
|
1912
|
+
}
|
|
1913
|
+
}
|
|
1914
|
+
_markSetConfigTokensRecursive(ctx) {
|
|
1915
|
+
if (!ctx)
|
|
1916
|
+
return;
|
|
1917
|
+
if (ctx.symbol) {
|
|
1918
|
+
this.setConfigTokens.add(ctx.symbol.tokenIndex);
|
|
1919
|
+
}
|
|
1920
|
+
if (ctx.children) {
|
|
1921
|
+
for (const child of ctx.children) {
|
|
1922
|
+
this._markSetConfigTokensRecursive(child);
|
|
1923
|
+
}
|
|
1924
|
+
}
|
|
1925
|
+
}
|
|
1926
|
+
_markMergeClauses(ctx) {
|
|
1927
|
+
if (!ctx || !ctx.children)
|
|
1928
|
+
return;
|
|
1929
|
+
for (const child of ctx.children) {
|
|
1930
|
+
if (child.symbol) {
|
|
1931
|
+
const symName = SqlBaseLexer.symbolicNames[child.symbol.type];
|
|
1932
|
+
if (symName === 'USING') {
|
|
1933
|
+
this.mergeUsingTokens.add(child.symbol.tokenIndex);
|
|
1934
|
+
}
|
|
1935
|
+
else if (symName === 'ON') {
|
|
1936
|
+
this.mergeOnTokens.add(child.symbol.tokenIndex);
|
|
1937
|
+
}
|
|
1938
|
+
else if (symName === 'WHEN') {
|
|
1939
|
+
this.mergeWhenTokens.add(child.symbol.tokenIndex);
|
|
1940
|
+
}
|
|
1941
|
+
}
|
|
1942
|
+
else if (child.children) {
|
|
1943
|
+
this._markMergeClauses(child);
|
|
1944
|
+
}
|
|
1945
|
+
}
|
|
1946
|
+
}
|
|
1947
|
+
_markDepthForContext(ctx) {
|
|
1948
|
+
if (ctx.start && ctx.stop) {
|
|
1949
|
+
for (let i = ctx.start.tokenIndex; i <= ctx.stop.tokenIndex; i++) {
|
|
1950
|
+
if (!this.tokenDepthMap.has(i)) {
|
|
1951
|
+
this.tokenDepthMap.set(i, this.subqueryDepth);
|
|
1952
|
+
}
|
|
1953
|
+
}
|
|
1954
|
+
}
|
|
1955
|
+
}
|
|
1956
|
+
}
|