@jacobknightley/fabric-format 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/README.md +196 -0
  2. package/dist/cell-formatter.d.ts +75 -0
  3. package/dist/cell-formatter.js +144 -0
  4. package/dist/cli.d.ts +2 -0
  5. package/dist/cli.js +435 -0
  6. package/dist/formatters/index.d.ts +19 -0
  7. package/dist/formatters/index.js +76 -0
  8. package/dist/formatters/python/config.d.ts +33 -0
  9. package/dist/formatters/python/config.js +29 -0
  10. package/dist/formatters/python/index.d.ts +7 -0
  11. package/dist/formatters/python/index.js +13 -0
  12. package/dist/formatters/python/python-formatter.d.ts +51 -0
  13. package/dist/formatters/python/python-formatter.js +180 -0
  14. package/dist/formatters/sparksql/constants.d.ts +16 -0
  15. package/dist/formatters/sparksql/constants.js +16 -0
  16. package/dist/formatters/sparksql/fmt-detector.d.ts +65 -0
  17. package/dist/formatters/sparksql/fmt-detector.js +84 -0
  18. package/dist/formatters/sparksql/formatter.d.ts +24 -0
  19. package/dist/formatters/sparksql/formatter.js +1276 -0
  20. package/dist/formatters/sparksql/formatting-context.d.ts +154 -0
  21. package/dist/formatters/sparksql/formatting-context.js +363 -0
  22. package/dist/formatters/sparksql/generated/SqlBaseLexer.d.ts +529 -0
  23. package/dist/formatters/sparksql/generated/SqlBaseLexer.js +2609 -0
  24. package/dist/formatters/sparksql/generated/SqlBaseParser.d.ts +8195 -0
  25. package/dist/formatters/sparksql/generated/SqlBaseParser.js +48793 -0
  26. package/dist/formatters/sparksql/generated/SqlBaseParserListener.d.ts +910 -0
  27. package/dist/formatters/sparksql/generated/SqlBaseParserListener.js +2730 -0
  28. package/dist/formatters/sparksql/generated/SqlBaseParserVisitor.d.ts +456 -0
  29. package/dist/formatters/sparksql/generated/SqlBaseParserVisitor.js +1822 -0
  30. package/dist/formatters/sparksql/generated/builtinFunctions.d.ts +8 -0
  31. package/dist/formatters/sparksql/generated/builtinFunctions.js +510 -0
  32. package/dist/formatters/sparksql/index.d.ts +11 -0
  33. package/dist/formatters/sparksql/index.js +22 -0
  34. package/dist/formatters/sparksql/output-builder.d.ts +89 -0
  35. package/dist/formatters/sparksql/output-builder.js +191 -0
  36. package/dist/formatters/sparksql/parse-tree-analyzer.d.ts +264 -0
  37. package/dist/formatters/sparksql/parse-tree-analyzer.js +1956 -0
  38. package/dist/formatters/sparksql/sql-formatter.d.ts +25 -0
  39. package/dist/formatters/sparksql/sql-formatter.js +56 -0
  40. package/dist/formatters/sparksql/token-utils.d.ts +68 -0
  41. package/dist/formatters/sparksql/token-utils.js +155 -0
  42. package/dist/formatters/sparksql/types.d.ts +264 -0
  43. package/dist/formatters/sparksql/types.js +7 -0
  44. package/dist/formatters/types.d.ts +57 -0
  45. package/dist/formatters/types.js +7 -0
  46. package/dist/index.d.ts +18 -0
  47. package/dist/index.js +41 -0
  48. package/dist/notebook-formatter.d.ts +107 -0
  49. package/dist/notebook-formatter.js +424 -0
  50. package/package.json +63 -0
@@ -0,0 +1,1956 @@
1
+ /**
2
+ * Parse Tree Analyzer - Collects Formatting Context from AST
3
+ *
4
+ * This visitor walks the ANTLR parse tree and collects information about:
5
+ * - Identifier tokens (preserve casing)
6
+ * - Function call tokens (uppercase)
7
+ * - Clause-starting tokens (newline before)
8
+ * - List item separators (commas in SELECT, GROUP BY, ORDER BY)
9
+ * - Condition separators (AND/OR in WHERE/HAVING)
10
+ * - Subquery boundaries
11
+ * - And many more context-specific positions
12
+ *
13
+ * This is 100% grammar-driven - no hardcoded keyword lists.
14
+ */
15
+ // @ts-ignore - Generated ANTLR code
16
+ import SqlBaseLexer from './generated/SqlBaseLexer.js';
17
+ // @ts-ignore - Generated ANTLR code
18
+ import SqlBaseParser from './generated/SqlBaseParser.js';
19
+ // @ts-ignore - Generated ANTLR code
20
+ import SqlBaseParserVisitor from './generated/SqlBaseParserVisitor.js';
21
+ import { getTokenType } from './token-utils.js';
22
+ /**
23
+ * Visitor that collects context information from parse tree.
24
+ * After visiting, call getResult() to get the analysis.
25
+ */
26
+ export class ParseTreeAnalyzer extends SqlBaseParserVisitor {
27
+ // ========== TOKEN POSITION SETS ==========
28
+ identifierTokens = new Set();
29
+ functionCallTokens = new Set();
30
+ clauseStartTokens = new Set();
31
+ qualifiedNameTokens = new Set(); // Tokens that are part of qualified names (t.column)
32
+ // List formatting
33
+ listItemCommas = new Set();
34
+ listFirstItems = new Set();
35
+ multiItemClauses = new Set();
36
+ // Condition formatting
37
+ conditionOperators = new Set();
38
+ multilineConditionClauses = new Set();
39
+ betweenAndTokens = new Set();
40
+ // Subquery tracking
41
+ subqueryDepth = 0;
42
+ tokenDepthMap = new Map();
43
+ subqueryOpenParens = new Set();
44
+ subqueryCloseParens = new Set();
45
+ setOperandParens = new Set();
46
+ // Alias handling
47
+ aliasInsertPositions = new Set();
48
+ tableAliasAsTokens = new Set(); // AS tokens in table alias context (to be suppressed)
49
+ // JOIN handling
50
+ joinOnTokens = new Set();
51
+ // CTE handling
52
+ cteCommas = new Set();
53
+ cteMainSelectTokens = new Set(); // SELECT tokens of main query after CTE block
54
+ // DDL handling
55
+ ddlColumnCommas = new Set();
56
+ ddlOpenParens = new Set();
57
+ ddlCloseParens = new Set();
58
+ ddlFirstColumn = new Set();
59
+ ddlMultiColumn = new Set();
60
+ // DML handling
61
+ valuesCommas = new Set();
62
+ valuesHasTuples = false; // true if VALUES contains tuples like (a, b), (c, d)
63
+ setClauseCommas = new Set();
64
+ setKeywordToken = -1;
65
+ // CASE expression handling
66
+ multiWhenCaseTokens = new Set();
67
+ caseWhenTokens = new Set();
68
+ caseElseTokens = new Set();
69
+ caseEndTokens = new Set();
70
+ simpleCaseTokens = new Set(); // CASE tokens that have value expressions (simpleCase)
71
+ simpleCaseValueEndTokens = new Set(); // Tokens after value in CASE x WHEN ...
72
+ // Grouping analytics
73
+ groupingAnalyticsParens = new Set();
74
+ insideGroupingAnalytics = false;
75
+ // EXCEPT clause (column exclusion in SELECT)
76
+ exceptClauseTokens = new Set(); // tokens inside EXCEPT (...) for column exclusion
77
+ // SET configuration
78
+ setConfigTokens = new Set();
79
+ // MERGE statement
80
+ mergeUsingTokens = new Set();
81
+ mergeOnTokens = new Set();
82
+ mergeWhenTokens = new Set();
83
+ // LATERAL VIEW
84
+ lateralViewCommas = new Set();
85
+ // GROUP BY ALL
86
+ groupByAllTokens = new Set();
87
+ // Multi-arg function expansion
88
+ multiArgFunctionInfo = new Map();
89
+ // Window definition expansion
90
+ windowDefInfo = new Map();
91
+ // PIVOT/UNPIVOT expansion
92
+ pivotInfo = new Map();
93
+ // IN list wrapping
94
+ inListInfo = new Map();
95
+ // Simple query compaction
96
+ simpleQueries = new Map();
97
+ // Internal state
98
+ currentSelectToken = -1;
99
+ // ========== PUBLIC API ==========
100
+ /**
101
+ * Get the complete analysis result after visiting.
102
+ */
103
+ getResult() {
104
+ return {
105
+ identifierTokens: this.identifierTokens,
106
+ functionCallTokens: this.functionCallTokens,
107
+ clauseStartTokens: this.clauseStartTokens,
108
+ qualifiedNameTokens: this.qualifiedNameTokens,
109
+ listItemCommas: this.listItemCommas,
110
+ listFirstItems: this.listFirstItems,
111
+ multiItemClauses: this.multiItemClauses,
112
+ conditionOperators: this.conditionOperators,
113
+ multilineConditionClauses: this.multilineConditionClauses,
114
+ betweenAndTokens: this.betweenAndTokens,
115
+ tokenDepthMap: this.tokenDepthMap,
116
+ subqueryOpenParens: this.subqueryOpenParens,
117
+ subqueryCloseParens: this.subqueryCloseParens,
118
+ setOperandParens: this.setOperandParens,
119
+ aliasInsertPositions: this.aliasInsertPositions,
120
+ tableAliasAsTokens: this.tableAliasAsTokens,
121
+ joinOnTokens: this.joinOnTokens,
122
+ cteCommas: this.cteCommas,
123
+ cteMainSelectTokens: this.cteMainSelectTokens,
124
+ ddlColumnCommas: this.ddlColumnCommas,
125
+ ddlOpenParens: this.ddlOpenParens,
126
+ ddlCloseParens: this.ddlCloseParens,
127
+ ddlFirstColumn: this.ddlFirstColumn,
128
+ ddlMultiColumn: this.ddlMultiColumn,
129
+ valuesCommas: this.valuesCommas,
130
+ valuesHasTuples: this.valuesHasTuples,
131
+ setClauseCommas: this.setClauseCommas,
132
+ setKeywordToken: this.setKeywordToken,
133
+ multiWhenCaseTokens: this.multiWhenCaseTokens,
134
+ caseWhenTokens: this.caseWhenTokens,
135
+ caseElseTokens: this.caseElseTokens,
136
+ caseEndTokens: this.caseEndTokens,
137
+ simpleCaseTokens: this.simpleCaseTokens,
138
+ simpleCaseValueEndTokens: this.simpleCaseValueEndTokens,
139
+ groupingAnalyticsParens: this.groupingAnalyticsParens,
140
+ exceptClauseTokens: this.exceptClauseTokens,
141
+ setConfigTokens: this.setConfigTokens,
142
+ mergeUsingTokens: this.mergeUsingTokens,
143
+ mergeOnTokens: this.mergeOnTokens,
144
+ mergeWhenTokens: this.mergeWhenTokens,
145
+ lateralViewCommas: this.lateralViewCommas,
146
+ groupByAllTokens: this.groupByAllTokens,
147
+ multiArgFunctionInfo: this.multiArgFunctionInfo,
148
+ windowDefInfo: this.windowDefInfo,
149
+ pivotInfo: this.pivotInfo,
150
+ inListInfo: this.inListInfo,
151
+ simpleQueries: this.simpleQueries,
152
+ };
153
+ }
154
+ // ========== VISITOR INFRASTRUCTURE ==========
155
+ visit(ctx) {
156
+ if (!ctx)
157
+ return null;
158
+ return this.visitChildren(ctx);
159
+ }
160
+ visitChildren(ctx) {
161
+ if (!ctx?.children)
162
+ return null;
163
+ for (const child of ctx.children) {
164
+ if (child?.accept)
165
+ child.accept(this);
166
+ }
167
+ return null;
168
+ }
169
+ // ========== IDENTIFIER CONTEXTS ==========
170
+ visitIdentifier(ctx) {
171
+ this._markIdentifier(ctx);
172
+ return this.visitChildren(ctx);
173
+ }
174
+ visitStrictIdentifier(ctx) {
175
+ this._markIdentifier(ctx);
176
+ return this.visitChildren(ctx);
177
+ }
178
+ visitQuotedIdentifier(ctx) {
179
+ this._markIdentifier(ctx);
180
+ return this.visitChildren(ctx);
181
+ }
182
+ visitBackQuotedIdentifier(ctx) {
183
+ this._markIdentifier(ctx);
184
+ return this.visitChildren(ctx);
185
+ }
186
+ visitUnquotedIdentifier(ctx) {
187
+ this._markIdentifier(ctx);
188
+ return this.visitChildren(ctx);
189
+ }
190
+ visitErrorCapturingIdentifier(ctx) {
191
+ this._markIdentifier(ctx);
192
+ return this.visitChildren(ctx);
193
+ }
194
+ /**
195
+ * Visit qualified name (e.g., table.column, db.schema.table.column)
196
+ * GRAMMAR-DRIVEN: qualifiedName : identifier (DOT identifier)*
197
+ *
198
+ * Context-sensitive keyword handling: In qualified names, even tokens that are
199
+ * keywords (like USER, TABLE) should be treated as identifiers and preserve casing.
200
+ * This is because the grammar context (qualifiedName rule) makes them identifiers.
201
+ */
202
+ visitQualifiedName(ctx) {
203
+ // Mark all tokens in the qualified name as identifiers, except DOT tokens
204
+ if (ctx.start && ctx.stop) {
205
+ for (let i = ctx.start.tokenIndex; i <= ctx.stop.tokenIndex; i++) {
206
+ this.identifierTokens.add(i);
207
+ this.qualifiedNameTokens.add(i); // Also track as qualified name
208
+ }
209
+ }
210
+ // Still visit children to handle nested contexts
211
+ return this.visitChildren(ctx);
212
+ }
213
+ /**
214
+ * Visit dereference (field access like user.address, table.column)
215
+ * GRAMMAR-DRIVEN: base=primaryExpression DOT fieldName=identifier
216
+ *
217
+ * When a keyword like USER or TABLE appears before DOT, it should be treated
218
+ * as an identifier (table/column alias), not as a keyword.
219
+ * Similarly, keywords appearing as field names (like KEY, ORDER) should preserve casing.
220
+ */
221
+ visitDereference(ctx) {
222
+ // Mark the base token as an identifier when it's being dereferenced
223
+ // This handles cases like: user.address where USER is a keyword but should be preserved
224
+ if (ctx.base && ctx.base.start) {
225
+ // Mark the base expression tokens as identifiers
226
+ for (let i = ctx.base.start.tokenIndex; i <= (ctx.base.stop?.tokenIndex ?? ctx.base.start.tokenIndex); i++) {
227
+ this.identifierTokens.add(i);
228
+ this.qualifiedNameTokens.add(i); // Also track as qualified name
229
+ }
230
+ }
231
+ // Mark the field name (right side after dot) as an identifier
232
+ // This handles cases like: a.key, a.order where KEY, ORDER are keywords but used as column names
233
+ if (ctx.fieldName && ctx.fieldName.start) {
234
+ for (let i = ctx.fieldName.start.tokenIndex; i <= (ctx.fieldName.stop?.tokenIndex ?? ctx.fieldName.start.tokenIndex); i++) {
235
+ this.identifierTokens.add(i);
236
+ this.qualifiedNameTokens.add(i); // Also track as qualified name
237
+ }
238
+ }
239
+ return this.visitChildren(ctx);
240
+ }
241
+ // ========== FUNCTION CALL CONTEXTS ==========
242
+ visitFunctionCall(ctx) {
243
+ if (ctx.start) {
244
+ this.functionCallTokens.add(ctx.start.tokenIndex);
245
+ }
246
+ // Check for multi-arg functions
247
+ const args = ctx.argument;
248
+ if (args && args.length >= 2) {
249
+ this._collectMultiArgFunctionInfo(ctx, args.length);
250
+ }
251
+ return this.visitChildren(ctx);
252
+ }
253
+ visitFunctionName(ctx) {
254
+ if (ctx.start) {
255
+ this.functionCallTokens.add(ctx.start.tokenIndex);
256
+ }
257
+ return this.visitChildren(ctx);
258
+ }
259
+ visitFirst(ctx) {
260
+ if (ctx.start)
261
+ this.functionCallTokens.add(ctx.start.tokenIndex);
262
+ return this.visitChildren(ctx);
263
+ }
264
+ visitLast(ctx) {
265
+ if (ctx.start)
266
+ this.functionCallTokens.add(ctx.start.tokenIndex);
267
+ return this.visitChildren(ctx);
268
+ }
269
+ visitAny_value(ctx) {
270
+ if (ctx.start)
271
+ this.functionCallTokens.add(ctx.start.tokenIndex);
272
+ return this.visitChildren(ctx);
273
+ }
274
+ visitStruct(ctx) {
275
+ if (ctx.start)
276
+ this.functionCallTokens.add(ctx.start.tokenIndex);
277
+ return this.visitChildren(ctx);
278
+ }
279
+ visitExtract(ctx) {
280
+ if (ctx.start)
281
+ this.functionCallTokens.add(ctx.start.tokenIndex);
282
+ return this.visitChildren(ctx);
283
+ }
284
+ visitCast(ctx) {
285
+ if (ctx.start)
286
+ this.functionCallTokens.add(ctx.start.tokenIndex);
287
+ // Collect CAST as potentially expandable
288
+ if (ctx.children) {
289
+ let leftParenTokenIndex = null;
290
+ let leftParenCharStart = 0;
291
+ let rightParenTokenIndex = null;
292
+ for (const child of ctx.children) {
293
+ if (child.symbol) {
294
+ const symName = SqlBaseLexer.symbolicNames[child.symbol.type];
295
+ if (symName === 'LEFT_PAREN' && leftParenTokenIndex === null) {
296
+ leftParenTokenIndex = child.symbol.tokenIndex;
297
+ leftParenCharStart = child.symbol.start ?? 0;
298
+ }
299
+ else if (symName === 'RIGHT_PAREN') {
300
+ rightParenTokenIndex = child.symbol.tokenIndex;
301
+ }
302
+ }
303
+ }
304
+ if (leftParenTokenIndex !== null && rightParenTokenIndex !== null) {
305
+ const spanLength = this._calculateNormalizedSpanLength(ctx);
306
+ this.multiArgFunctionInfo.set(leftParenTokenIndex, {
307
+ closeParenIndex: rightParenTokenIndex,
308
+ commaIndices: [],
309
+ spanLength: spanLength,
310
+ functionName: 'CAST',
311
+ charStart: leftParenCharStart
312
+ });
313
+ }
314
+ }
315
+ return this.visitChildren(ctx);
316
+ }
317
+ visitPosition(ctx) {
318
+ if (ctx.start)
319
+ this.functionCallTokens.add(ctx.start.tokenIndex);
320
+ return this.visitChildren(ctx);
321
+ }
322
+ visitTimestampadd(ctx) {
323
+ if (ctx.start)
324
+ this.functionCallTokens.add(ctx.start.tokenIndex);
325
+ return this.visitChildren(ctx);
326
+ }
327
+ visitTimestampdiff(ctx) {
328
+ if (ctx.start)
329
+ this.functionCallTokens.add(ctx.start.tokenIndex);
330
+ return this.visitChildren(ctx);
331
+ }
332
+ visitLateralView(ctx) {
333
+ if (ctx.children) {
334
+ let foundRightParen = false;
335
+ for (const child of ctx.children) {
336
+ if (child.ruleIndex !== undefined) {
337
+ const ruleName = SqlBaseParser.ruleNames[child.ruleIndex];
338
+ if (ruleName === 'qualifiedName' && child.start) {
339
+ this.functionCallTokens.add(child.start.tokenIndex);
340
+ }
341
+ }
342
+ if (child.symbol) {
343
+ const symName = SqlBaseLexer.symbolicNames[child.symbol.type];
344
+ if (symName === 'RIGHT_PAREN') {
345
+ foundRightParen = true;
346
+ }
347
+ if (foundRightParen && symName === 'COMMA') {
348
+ this.lateralViewCommas.add(child.symbol.tokenIndex);
349
+ }
350
+ }
351
+ }
352
+ }
353
+ return this.visitChildren(ctx);
354
+ }
355
+ // ========== CASE EXPRESSION CONTEXTS ==========
356
+ visitSearchedCase(ctx) {
357
+ this._analyzeCaseExpression(ctx);
358
+ return this.visitChildren(ctx);
359
+ }
360
+ visitSimpleCase(ctx) {
361
+ this._analyzeCaseExpression(ctx);
362
+ return this.visitChildren(ctx);
363
+ }
364
+ // ========== CLAUSE-STARTING CONTEXTS ==========
365
+ visitExceptClause(ctx) {
366
+ // Mark all tokens inside EXCEPT (...) clause for column exclusion
367
+ // These tokens should not trigger expansion
368
+ this._markAllDescendantTokens(ctx, this.exceptClauseTokens);
369
+ return this.visitChildren(ctx);
370
+ }
371
+ visitFromClause(ctx) {
372
+ this._markClauseStart(ctx);
373
+ return this.visitChildren(ctx);
374
+ }
375
+ /**
376
+ * Visit table alias context and mark AS tokens for suppression.
377
+ * Style guide says table aliases should NOT have AS keyword.
378
+ * Grammar: tableAlias: (AS? strictIdentifier identifierList?)?
379
+ */
380
+ visitTableAlias(ctx) {
381
+ // Check if this table alias has an AS keyword
382
+ if (ctx.AS && typeof ctx.AS === 'function') {
383
+ const asToken = ctx.AS();
384
+ if (asToken && asToken.symbol) {
385
+ this.tableAliasAsTokens.add(asToken.symbol.tokenIndex);
386
+ }
387
+ }
388
+ return this.visitChildren(ctx);
389
+ }
390
+ visitAggregationClause(ctx) {
391
+ this._markClauseStart(ctx);
392
+ this._markGroupByAllToken(ctx);
393
+ const commaCount = this._markListCommasExcludingGroupingAnalytics(ctx);
394
+ if (commaCount > 0 && ctx.start) {
395
+ let actualCommaCount = 0;
396
+ if (ctx.children) {
397
+ for (const child of ctx.children) {
398
+ if (child.symbol && child.symbol.type === getTokenType('COMMA')) {
399
+ if (this.listItemCommas.has(child.symbol.tokenIndex)) {
400
+ actualCommaCount++;
401
+ }
402
+ }
403
+ }
404
+ }
405
+ if (actualCommaCount > 0) {
406
+ this.multiItemClauses.add(ctx.start.tokenIndex);
407
+ }
408
+ }
409
+ return this.visitChildren(ctx);
410
+ }
411
+ visitGroupingAnalytics(ctx) {
412
+ let isRollupOrCube = false;
413
+ if (ctx.children) {
414
+ for (const child of ctx.children) {
415
+ if (child.symbol) {
416
+ const symName = SqlBaseLexer.symbolicNames[child.symbol.type];
417
+ if (symName === 'ROLLUP' || symName === 'CUBE') {
418
+ isRollupOrCube = true;
419
+ }
420
+ else if (symName === 'LEFT_PAREN') {
421
+ this.groupingAnalyticsParens.add(child.symbol.tokenIndex);
422
+ if (isRollupOrCube) {
423
+ const parenIndex = child.symbol.tokenIndex;
424
+ for (const c of ctx.children) {
425
+ if (c.symbol) {
426
+ const sn = SqlBaseLexer.symbolicNames[c.symbol.type];
427
+ if ((sn === 'ROLLUP' || sn === 'CUBE') && c.symbol.tokenIndex < parenIndex) {
428
+ this.functionCallTokens.add(c.symbol.tokenIndex);
429
+ break;
430
+ }
431
+ }
432
+ }
433
+ }
434
+ break;
435
+ }
436
+ }
437
+ }
438
+ }
439
+ const wasInside = this.insideGroupingAnalytics;
440
+ this.insideGroupingAnalytics = true;
441
+ const result = this.visitChildren(ctx);
442
+ this.insideGroupingAnalytics = wasInside;
443
+ return result;
444
+ }
445
+ visitQueryOrganization(ctx) {
446
+ let orderTokenIndex = null;
447
+ if (ctx.children) {
448
+ for (const child of ctx.children) {
449
+ if (child.symbol) {
450
+ const symName = SqlBaseLexer.symbolicNames[child.symbol.type];
451
+ if (symName === 'ORDER') {
452
+ this.clauseStartTokens.add(child.symbol.tokenIndex);
453
+ orderTokenIndex = child.symbol.tokenIndex;
454
+ }
455
+ else if (symName === 'LIMIT') {
456
+ this.clauseStartTokens.add(child.symbol.tokenIndex);
457
+ }
458
+ }
459
+ }
460
+ }
461
+ const commaCount = this._markListCommasExcludingGroupingAnalytics(ctx);
462
+ if (commaCount > 0 && orderTokenIndex !== null) {
463
+ this.multiItemClauses.add(orderTokenIndex);
464
+ }
465
+ return this.visitChildren(ctx);
466
+ }
467
+ visitSortItem(ctx) {
468
+ return this.visitChildren(ctx);
469
+ }
470
+ visitLimitClause(ctx) {
471
+ this._markClauseStart(ctx);
472
+ return this.visitChildren(ctx);
473
+ }
474
+ visitJoinRelation(ctx) {
475
+ this._markClauseStart(ctx);
476
+ this._analyzeJoinConditions(ctx);
477
+ const onTokenIndex = this._findOnToken(ctx);
478
+ if (onTokenIndex !== -1) {
479
+ this.joinOnTokens.add(onTokenIndex);
480
+ }
481
+ return this.visitChildren(ctx);
482
+ }
483
+ visitWindowDef(ctx) {
484
+ // Visit children FIRST so nested functions are collected before we check them
485
+ const result = this.visitChildren(ctx);
486
+ this._collectWindowDefInfo(ctx);
487
+ return result;
488
+ }
489
+ // ========== PIVOT/UNPIVOT CONTEXTS ==========
490
+ visitPivotClause(ctx) {
491
+ this._collectPivotInfo(ctx, false);
492
+ return this.visitChildren(ctx);
493
+ }
494
+ visitUnpivotClause(ctx) {
495
+ this._collectPivotInfo(ctx, true);
496
+ return this.visitChildren(ctx);
497
+ }
498
+ visitSetOperation(ctx) {
499
+ if (ctx.children) {
500
+ let foundSetOperator = false;
501
+ for (const child of ctx.children) {
502
+ if (child.symbol) {
503
+ const symName = SqlBaseLexer.symbolicNames[child.symbol.type];
504
+ if (symName === 'UNION' || symName === 'EXCEPT' || symName === 'INTERSECT') {
505
+ this.clauseStartTokens.add(child.symbol.tokenIndex);
506
+ foundSetOperator = true;
507
+ }
508
+ }
509
+ else {
510
+ if (foundSetOperator) {
511
+ const subquery = this._findSubqueryContext(child);
512
+ if (subquery && subquery.start) {
513
+ this.setOperandParens.add(subquery.start.tokenIndex);
514
+ }
515
+ }
516
+ }
517
+ }
518
+ }
519
+ return this.visitChildren(ctx);
520
+ }
521
+ visitSelectClause(ctx) {
522
+ this._markClauseStart(ctx);
523
+ if (ctx.start) {
524
+ this.currentSelectToken = ctx.start.tokenIndex;
525
+ }
526
+ return this.visitChildren(ctx);
527
+ }
528
+ visitNamedExpression(ctx) {
529
+ const hasAlias = ctx.errorCapturingIdentifier && ctx.errorCapturingIdentifier();
530
+ const hasAS = ctx.AS && ctx.AS();
531
+ if (hasAlias && !hasAS) {
532
+ const expr = ctx.expression && ctx.expression();
533
+ const alias = ctx.errorCapturingIdentifier();
534
+ if (expr && expr.stop && alias && alias.start) {
535
+ const aliasIndex = alias.start.tokenIndex;
536
+ this.aliasInsertPositions.add(aliasIndex);
537
+ }
538
+ }
539
+ return this.visitChildren(ctx);
540
+ }
541
+ // ========== LIST CONTEXTS ==========
542
+ visitNamedExpressionSeq(ctx) {
543
+ const parentClass = ctx.parentCtx?.constructor?.name || '';
544
+ if (parentClass === 'PivotClauseContext' ||
545
+ parentClass === 'UnpivotClauseContext' ||
546
+ parentClass === 'LateralViewContext') {
547
+ return this.visitChildren(ctx);
548
+ }
549
+ const hasMultiple = this._markListContext(ctx);
550
+ if (hasMultiple && this.currentSelectToken >= 0) {
551
+ this.multiItemClauses.add(this.currentSelectToken);
552
+ }
553
+ return this.visitChildren(ctx);
554
+ }
555
+ visitGroupByClause(ctx) {
556
+ return this.visitChildren(ctx);
557
+ }
558
+ // ========== CONDITION CONTEXTS ==========
559
+ visitWhereClause(ctx) {
560
+ this._markClauseStart(ctx);
561
+ this._scanForBetweenAnd(ctx);
562
+ this._analyzeConditionClause(ctx);
563
+ return this.visitChildren(ctx);
564
+ }
565
+ visitHavingClause(ctx) {
566
+ this._markClauseStart(ctx);
567
+ this._scanForBetweenAnd(ctx);
568
+ this._analyzeConditionClause(ctx);
569
+ return this.visitChildren(ctx);
570
+ }
571
+ visitPredicate(ctx) {
572
+ if (ctx.children) {
573
+ let hasBetween = false;
574
+ let hasQuery = false;
575
+ for (const child of ctx.children) {
576
+ if (child.symbol) {
577
+ const symName = SqlBaseLexer.symbolicNames[child.symbol.type];
578
+ if (symName === 'BETWEEN') {
579
+ hasBetween = true;
580
+ }
581
+ else if (symName === 'AND' && hasBetween) {
582
+ this.betweenAndTokens.add(child.symbol.tokenIndex);
583
+ }
584
+ }
585
+ else if (child.ruleIndex !== undefined) {
586
+ const ruleName = child.constructor?.name;
587
+ if (ruleName === 'QueryContext') {
588
+ hasQuery = true;
589
+ }
590
+ }
591
+ }
592
+ if (hasQuery) {
593
+ this._markSubqueryParens(ctx);
594
+ }
595
+ }
596
+ // Also collect IN list info for wrapping
597
+ this._collectInListInfo(ctx);
598
+ return this.visitChildren(ctx);
599
+ }
600
+ // ========== CTE CONTEXTS ==========
601
+ // Handle the top-level query rule: query = ctes? queryTerm queryOrganization
602
+ // This marks the main SELECT after CTE definitions
603
+ visitQuery(ctx) {
604
+ // Check if this query has CTEs
605
+ let hasCtes = false;
606
+ let queryTermChild = null;
607
+ if (ctx.children) {
608
+ for (const child of ctx.children) {
609
+ if (child.ruleIndex !== undefined) {
610
+ const ruleName = SqlBaseParser.ruleNames[child.ruleIndex];
611
+ if (ruleName === 'ctes') {
612
+ hasCtes = true;
613
+ }
614
+ else if (ruleName === 'queryTerm') {
615
+ queryTermChild = child;
616
+ }
617
+ }
618
+ }
619
+ }
620
+ // If CTEs exist, find and mark the first SELECT token of the main query
621
+ if (hasCtes && queryTermChild) {
622
+ const selectToken = this._findFirstSelectToken(queryTermChild);
623
+ if (selectToken !== null) {
624
+ this.cteMainSelectTokens.add(selectToken);
625
+ }
626
+ }
627
+ return this.visitChildren(ctx);
628
+ }
629
+ // Helper to find the first SELECT token in a queryTerm subtree
630
+ _findFirstSelectToken(ctx) {
631
+ if (!ctx)
632
+ return null;
633
+ // Check if this node has a SELECT token
634
+ if (ctx.symbol && ctx.symbol.type === getTokenType('SELECT')) {
635
+ return ctx.symbol.tokenIndex;
636
+ }
637
+ // Recurse into children
638
+ if (ctx.children) {
639
+ for (const child of ctx.children) {
640
+ const result = this._findFirstSelectToken(child);
641
+ if (result !== null)
642
+ return result;
643
+ }
644
+ }
645
+ return null;
646
+ }
647
+ visitCtes(ctx) {
648
+ this._markClauseStart(ctx);
649
+ if (ctx.children) {
650
+ for (const child of ctx.children) {
651
+ if (child.symbol && child.symbol.type === getTokenType('COMMA')) {
652
+ this.cteCommas.add(child.symbol.tokenIndex);
653
+ }
654
+ }
655
+ }
656
+ return this.visitChildren(ctx);
657
+ }
658
+ visitNamedQuery(ctx) {
659
+ // Increment depth for CTE body - it's effectively a subquery
660
+ this.subqueryDepth++;
661
+ if (ctx.children) {
662
+ for (const child of ctx.children) {
663
+ if (child.symbol) {
664
+ const tokenType = child.symbol.type;
665
+ if (tokenType === getTokenType('LEFT_PAREN')) {
666
+ this.subqueryOpenParens.add(child.symbol.tokenIndex);
667
+ }
668
+ else if (tokenType === getTokenType('RIGHT_PAREN')) {
669
+ this.subqueryCloseParens.add(child.symbol.tokenIndex);
670
+ }
671
+ }
672
+ }
673
+ }
674
+ const result = this.visitChildren(ctx);
675
+ this.subqueryDepth--;
676
+ return result;
677
+ }
678
+ // ========== SUBQUERY CONTEXTS ==========
679
+ visitAliasedQuery(ctx) {
680
+ this._markSubqueryParens(ctx);
681
+ return this.visitChildren(ctx);
682
+ }
683
+ visitExists(ctx) {
684
+ this._markSubqueryParens(ctx);
685
+ return this.visitChildren(ctx);
686
+ }
687
+ visitSubqueryExpression(ctx) {
688
+ this._markSubqueryParens(ctx);
689
+ return this.visitChildren(ctx);
690
+ }
691
+ visitSubquery(ctx) {
692
+ this._markSubqueryParens(ctx);
693
+ return this.visitChildren(ctx);
694
+ }
695
+ // ========== DDL CONTEXTS ==========
696
+ visitCreateTableHeader(ctx) {
697
+ return this.visitChildren(ctx);
698
+ }
699
+ visitCreateTable(ctx) {
700
+ this._markDdlColumnList(ctx);
701
+ return this.visitChildren(ctx);
702
+ }
703
+ visitCreateUserDefinedFunction(ctx) {
704
+ // Mark the function name (identifierReference) as a function call
705
+ // so there's no space before the opening paren: CREATE FUNCTION f(...) not f (...)
706
+ if (ctx.children) {
707
+ for (const child of ctx.children) {
708
+ if (child.ruleIndex !== undefined) {
709
+ const ruleName = SqlBaseParser.ruleNames[child.ruleIndex];
710
+ if (ruleName === 'identifierReference' && child.start) {
711
+ this.functionCallTokens.add(child.start.tokenIndex);
712
+ break; // Only mark the first one (function name)
713
+ }
714
+ }
715
+ }
716
+ }
717
+ return this.visitChildren(ctx);
718
+ }
719
+ // ========== DML CONTEXTS ==========
720
+ visitInsertInto(ctx) {
721
+ this._markValuesCommas(ctx);
722
+ return this.visitChildren(ctx);
723
+ }
724
+ visitInlineTable(ctx) {
725
+ this._markValuesCommas(ctx);
726
+ return this.visitChildren(ctx);
727
+ }
728
+ visitUpdateTable(ctx) {
729
+ const commaCount = this._markSetClause(ctx, false, 0);
730
+ if (commaCount > 0 && this.setKeywordToken >= 0) {
731
+ this.multiItemClauses.add(this.setKeywordToken);
732
+ }
733
+ return this.visitChildren(ctx);
734
+ }
735
+ // ========== SET CONFIGURATION ==========
736
+ visitSetConfiguration(ctx) {
737
+ this._markSetConfigTokens(ctx);
738
+ return this.visitChildren(ctx);
739
+ }
740
+ visitResetConfiguration(ctx) {
741
+ // GRAMMAR-DRIVEN: RESET .*?
742
+ // Mark all tokens after RESET as configuration tokens to preserve casing
743
+ this._markResetConfigTokens(ctx);
744
+ return this.visitChildren(ctx);
745
+ }
746
+ // ========== MERGE STATEMENT ==========
747
+ visitMergeIntoTable(ctx) {
748
+ this._markMergeClauses(ctx);
749
+ return this.visitChildren(ctx);
750
+ }
751
+ // ========== QUERY DEPTH TRACKING ==========
752
+ visitQuerySpecification(ctx) {
753
+ return this._visitQuerySpec(ctx);
754
+ }
755
+ visitRegularQuerySpecification(ctx) {
756
+ return this._visitQuerySpec(ctx);
757
+ }
758
+ _visitQuerySpec(ctx) {
759
+ const currentDepth = this.subqueryDepth;
760
+ this.subqueryDepth++;
761
+ this._markDepthForContext(ctx);
762
+ // Analyze if this query is simple enough to stay compact
763
+ this._analyzeSimpleQuery(ctx, currentDepth);
764
+ const result = this.visitChildren(ctx);
765
+ this.subqueryDepth--;
766
+ return result;
767
+ }
768
+ /**
769
+ * Check if a context is inside a CREATE VIEW/TABLE statement at the top level.
770
+ * Queries inside these DDL statements should never be compacted.
771
+ */
772
+ _isInsideCreateStatement(ctx) {
773
+ let node = ctx?.parentCtx;
774
+ while (node) {
775
+ const className = node.constructor?.name || '';
776
+ // Check for CREATE VIEW variants
777
+ if (className === 'CreateViewContext' ||
778
+ className === 'CreateTempViewUsingContext') {
779
+ return true;
780
+ }
781
+ node = node.parentCtx;
782
+ }
783
+ return false;
784
+ }
785
+ /**
786
+ * Analyze if a query is simple enough to stay on one line.
787
+ * Simple query criteria:
788
+ * - SELECT has 1 item (including *, t.*)
789
+ * - FROM has 1 table (no JOINs)
790
+ * - WHERE has 0 or 1 condition (no AND/OR at top level)
791
+ * - No GROUP BY, ORDER BY, HAVING, or single-item versions
792
+ * - No LIMIT/OFFSET or simple LIMIT
793
+ * - NOT inside a CREATE VIEW/TABLE statement (those always expand)
794
+ */
795
+ _analyzeSimpleQuery(ctx, depth) {
796
+ if (!ctx || !ctx.children)
797
+ return;
798
+ // Never compact queries inside CREATE statements
799
+ if (depth === 0 && this._isInsideCreateStatement(ctx))
800
+ return;
801
+ let selectClause = null;
802
+ let fromClause = null;
803
+ let whereClause = null;
804
+ let hasJoin = false;
805
+ let hasGroupBy = false;
806
+ let hasOrderBy = false;
807
+ let hasHaving = false;
808
+ let hasLimit = false;
809
+ let selectToken = null;
810
+ // Scan children to find clauses
811
+ for (const child of ctx.children) {
812
+ if (!child)
813
+ continue;
814
+ const ruleName = child.ruleIndex !== undefined ? SqlBaseParser.ruleNames[child.ruleIndex] : null;
815
+ const className = child.constructor?.name || '';
816
+ if (className === 'SelectClauseContext' || ruleName === 'selectClause') {
817
+ selectClause = child;
818
+ if (child.start) {
819
+ selectToken = child.start;
820
+ }
821
+ }
822
+ else if (className === 'FromClauseContext' || ruleName === 'fromClause') {
823
+ fromClause = child;
824
+ // Check for JOINs in FROM clause
825
+ hasJoin = this._hasJoinInFromClause(child);
826
+ // Also check for PIVOT/UNPIVOT in FROM clause
827
+ if (this._hasPivotUnpivotInFromClause(child)) {
828
+ hasJoin = true; // Treat PIVOT/UNPIVOT like a JOIN for simplicity
829
+ }
830
+ }
831
+ else if (className === 'WhereClauseContext' || ruleName === 'whereClause') {
832
+ whereClause = child;
833
+ }
834
+ else if (className === 'AggregationClauseContext' || ruleName === 'aggregationClause') {
835
+ hasGroupBy = true;
836
+ }
837
+ else if (className === 'HavingClauseContext' || ruleName === 'havingClause') {
838
+ hasHaving = true;
839
+ }
840
+ }
841
+ // Check parent for ORDER BY / LIMIT (they're in queryOrganization, not querySpecification)
842
+ // For now, we'll handle this by checking if ORDER BY/LIMIT tokens are in our range
843
+ // Can't be simple if has JOINs
844
+ if (hasJoin)
845
+ return;
846
+ // Can't be simple if has GROUP BY or HAVING (for now - could relax for single-item)
847
+ if (hasGroupBy || hasHaving)
848
+ return;
849
+ // Check SELECT clause - must have single item
850
+ if (!selectClause || !this._hasSingleSelectItem(selectClause))
851
+ return;
852
+ // Check for multi-WHEN CASE expressions (which force expansion)
853
+ if (selectClause && this._hasMultiWhenCase(selectClause))
854
+ return;
855
+ // Check WHERE clause - must have 0 or 1 condition (no AND/OR)
856
+ if (whereClause && this._hasMultipleConditions(whereClause))
857
+ return;
858
+ // This query qualifies as simple
859
+ // Use forExpansion=false to get actual span regardless of input layout
860
+ if (selectToken) {
861
+ const spanLength = this._calculateSpanLength(ctx, false);
862
+ this.simpleQueries.set(selectToken.tokenIndex, {
863
+ selectTokenIndex: selectToken.tokenIndex,
864
+ spanLength: spanLength,
865
+ depth: depth,
866
+ });
867
+ }
868
+ }
869
+ /**
870
+ * Check if FROM clause contains any JOINs.
871
+ */
872
+ _hasJoinInFromClause(fromClause) {
873
+ if (!fromClause || !fromClause.children)
874
+ return false;
875
+ const checkForJoin = (node) => {
876
+ if (!node)
877
+ return false;
878
+ const className = node.constructor?.name || '';
879
+ if (className === 'JoinRelationContext')
880
+ return true;
881
+ if (node.symbol) {
882
+ const symName = SqlBaseLexer.symbolicNames[node.symbol.type];
883
+ if (symName === 'JOIN' || symName === 'CROSS' || symName === 'NATURAL') {
884
+ return true;
885
+ }
886
+ }
887
+ if (node.children) {
888
+ for (const child of node.children) {
889
+ if (checkForJoin(child))
890
+ return true;
891
+ }
892
+ }
893
+ return false;
894
+ };
895
+ return checkForJoin(fromClause);
896
+ }
897
+ /**
898
+ * Check if FROM clause contains PIVOT or UNPIVOT with many items.
899
+ * Simple PIVOT with few items can stay compact.
900
+ */
901
+ _hasPivotUnpivotInFromClause(fromClause) {
902
+ if (!fromClause || !fromClause.children)
903
+ return false;
904
+ const checkForComplexPivot = (node) => {
905
+ if (!node)
906
+ return false;
907
+ const className = node.constructor?.name || '';
908
+ if (className === 'PivotClauseContext' || className === 'UnpivotClauseContext') {
909
+ // Count commas to estimate complexity
910
+ let commaCount = 0;
911
+ const countCommas = (n) => {
912
+ if (!n)
913
+ return;
914
+ if (n.symbol && n.symbol.type === getTokenType('COMMA')) {
915
+ commaCount++;
916
+ }
917
+ if (n.children) {
918
+ for (const c of n.children)
919
+ countCommas(c);
920
+ }
921
+ };
922
+ countCommas(node);
923
+ // If more than ~6 commas, it's complex (multiple aggregates + many IN items)
924
+ return commaCount > 6;
925
+ }
926
+ if (node.children) {
927
+ for (const child of node.children) {
928
+ if (checkForComplexPivot(child))
929
+ return true;
930
+ }
931
+ }
932
+ return false;
933
+ };
934
+ return checkForComplexPivot(fromClause);
935
+ }
936
+ /**
937
+ * Check if SELECT clause has a single item (*, t.*, or one expression).
938
+ */
939
+ _hasSingleSelectItem(selectClause) {
940
+ if (!selectClause || !selectClause.children)
941
+ return false;
942
+ // Look for namedExpressionSeq
943
+ for (const child of selectClause.children) {
944
+ const className = child.constructor?.name || '';
945
+ if (className === 'NamedExpressionSeqContext') {
946
+ // Count items by looking for commas that are DIRECT children of namedExpressionSeq
947
+ // Commas inside function calls, type parameters, etc. should not be counted
948
+ let commaCount = 0;
949
+ if (child.children) {
950
+ for (const seqChild of child.children) {
951
+ if (seqChild.symbol && seqChild.symbol.type === getTokenType('COMMA')) {
952
+ commaCount++;
953
+ }
954
+ }
955
+ }
956
+ return commaCount === 0; // Single item means no commas
957
+ }
958
+ }
959
+ return true; // Default to true if no namedExpressionSeq (like SELECT *)
960
+ }
961
+ /**
962
+ * Check if a clause contains a CASE expression with multiple WHEN clauses.
963
+ * Such CASE expressions force expansion and make the query non-compact.
964
+ */
965
+ _hasMultiWhenCase(clause) {
966
+ if (!clause)
967
+ return false;
968
+ const checkForMultiWhenCase = (node) => {
969
+ if (!node)
970
+ return false;
971
+ const className = node.constructor?.name || '';
972
+ // Check for simpleCase or searchedCase contexts
973
+ if (className === 'SimpleCaseContext' || className === 'SearchedCaseContext') {
974
+ // Count WHEN tokens
975
+ let whenCount = 0;
976
+ if (node.children) {
977
+ for (const child of node.children) {
978
+ if (child.symbol) {
979
+ const symName = SqlBaseLexer.symbolicNames[child.symbol.type];
980
+ if (symName === 'WHEN')
981
+ whenCount++;
982
+ }
983
+ // Also check whenClause contexts
984
+ const childClassName = child.constructor?.name || '';
985
+ if (childClassName === 'WhenClauseContext')
986
+ whenCount++;
987
+ }
988
+ }
989
+ if (whenCount > 1)
990
+ return true;
991
+ }
992
+ // Recurse into children
993
+ if (node.children) {
994
+ for (const child of node.children) {
995
+ if (checkForMultiWhenCase(child))
996
+ return true;
997
+ }
998
+ }
999
+ return false;
1000
+ };
1001
+ return checkForMultiWhenCase(clause);
1002
+ }
1003
+ /**
1004
+ * Check if WHERE/HAVING clause has multiple conditions (AND/OR at top level).
1005
+ */
1006
+ _hasMultipleConditions(clause) {
1007
+ if (!clause || !clause.children)
1008
+ return false;
1009
+ // Find the predicated expression and check for AND/OR
1010
+ const checkForAndOr = (node, depth) => {
1011
+ if (!node)
1012
+ return false;
1013
+ if (depth > 3)
1014
+ return false; // Don't go too deep
1015
+ if (node.symbol) {
1016
+ const symName = SqlBaseLexer.symbolicNames[node.symbol.type];
1017
+ if (symName === 'AND' || symName === 'OR') {
1018
+ return true;
1019
+ }
1020
+ }
1021
+ // Check for logicalBinary rule which indicates AND/OR
1022
+ const className = node.constructor?.name || '';
1023
+ if (className === 'LogicalBinaryContext') {
1024
+ return true;
1025
+ }
1026
+ if (node.children) {
1027
+ for (const child of node.children) {
1028
+ if (checkForAndOr(child, depth + 1))
1029
+ return true;
1030
+ }
1031
+ }
1032
+ return false;
1033
+ };
1034
+ return checkForAndOr(clause, 0);
1035
+ }
1036
+ // ========== PRIVATE HELPER METHODS ==========
1037
+ /**
1038
+ * Calculate the expected formatted span length of a context.
1039
+ *
1040
+ * This walks all tokens within the context and sums:
1041
+ * - Each token's text length
1042
+ * - One space between each pair of tokens (standard formatting)
1043
+ *
1044
+ * This gives an accurate estimate of the formatted output length.
1045
+ *
1046
+ * @param ctx The parse tree context
1047
+ * @param forExpansion If true, returns Infinity for multi-line constructs
1048
+ * to prevent already-expanded constructs from collapsing.
1049
+ * If false, calculates actual span (for simple query detection).
1050
+ */
1051
+ _calculateSpanLength(ctx, forExpansion = true) {
1052
+ if (!ctx || !ctx.start || !ctx.stop)
1053
+ return 0;
1054
+ // For expansion checking: if the construct spans multiple lines, return Infinity
1055
+ // This ensures idempotency: once expanded, it stays expanded
1056
+ // For simple query detection: we want the actual span regardless of input layout
1057
+ if (forExpansion) {
1058
+ const startLine = ctx.start.line;
1059
+ const stopLine = ctx.stop.line;
1060
+ if (startLine !== undefined && stopLine !== undefined && stopLine > startLine) {
1061
+ return Infinity;
1062
+ }
1063
+ }
1064
+ // Collect all tokens within this context by walking the tree
1065
+ const tokens = [];
1066
+ const collectTokens = (node) => {
1067
+ if (!node)
1068
+ return;
1069
+ if (node.symbol) {
1070
+ // This is a terminal node (token)
1071
+ tokens.push(node.symbol.text || '');
1072
+ }
1073
+ else if (node.children) {
1074
+ for (const child of node.children) {
1075
+ collectTokens(child);
1076
+ }
1077
+ }
1078
+ };
1079
+ collectTokens(ctx);
1080
+ if (tokens.length === 0) {
1081
+ // Fallback to character-based
1082
+ const startIdx = ctx.start.start;
1083
+ const stopIdx = ctx.stop.stop;
1084
+ if (startIdx === undefined || stopIdx === undefined)
1085
+ return 0;
1086
+ return stopIdx - startIdx + 1;
1087
+ }
1088
+ // Sum token lengths + (n-1) spaces between tokens
1089
+ const tokenLengths = tokens.reduce((sum, t) => sum + t.length, 0);
1090
+ const spaceBetween = Math.max(0, tokens.length - 1);
1091
+ return tokenLengths + spaceBetween;
1092
+ }
1093
+ /**
1094
+ * Calculate normalized span length independent of input formatting.
1095
+ * This sums up token text lengths + single spaces between tokens,
1096
+ * giving a consistent "single-line" representation length.
1097
+ *
1098
+ * CRITICAL FOR IDEMPOTENCY: Using character positions (_calculateSpanLength)
1099
+ * varies based on how the input is formatted (line breaks, extra spaces).
1100
+ * This causes different expansion decisions on subsequent passes.
1101
+ * By using token text lengths, we get consistent results regardless of input formatting.
1102
+ */
1103
+ _calculateNormalizedSpanLength(ctx) {
1104
+ if (!ctx || !ctx.start || !ctx.stop)
1105
+ return 0;
1106
+ // Walk through all tokens in the context and sum their text lengths
1107
+ let totalLength = 0;
1108
+ let tokenCount = 0;
1109
+ const collectTokens = (node) => {
1110
+ if (!node)
1111
+ return;
1112
+ // If this is a terminal (token), add its text length
1113
+ if (node.symbol) {
1114
+ const text = node.symbol.text;
1115
+ if (text) {
1116
+ totalLength += text.length;
1117
+ tokenCount++;
1118
+ }
1119
+ return;
1120
+ }
1121
+ // Recurse into children
1122
+ if (node.children) {
1123
+ for (const child of node.children) {
1124
+ collectTokens(child);
1125
+ }
1126
+ }
1127
+ };
1128
+ collectTokens(ctx);
1129
+ // Add single space between each token (normalized spacing)
1130
+ if (tokenCount > 1) {
1131
+ totalLength += tokenCount - 1;
1132
+ }
1133
+ return totalLength;
1134
+ }
1135
+ _collectMultiArgFunctionInfo(ctx, argCount) {
1136
+ if (!ctx.children)
1137
+ return;
1138
+ let leftParenTokenIndex = null;
1139
+ let leftParenCharStart = 0;
1140
+ let rightParenTokenIndex = null;
1141
+ const commaTokenIndices = [];
1142
+ // Try to get function name from functionName child
1143
+ let functionName;
1144
+ if (ctx.functionName) {
1145
+ const fnCtx = ctx.functionName();
1146
+ if (fnCtx && fnCtx.getText) {
1147
+ functionName = fnCtx.getText().toUpperCase();
1148
+ }
1149
+ }
1150
+ for (const child of ctx.children) {
1151
+ if (child.symbol) {
1152
+ const symName = SqlBaseLexer.symbolicNames[child.symbol.type];
1153
+ if (symName === 'LEFT_PAREN' && leftParenTokenIndex === null) {
1154
+ leftParenTokenIndex = child.symbol.tokenIndex;
1155
+ leftParenCharStart = child.symbol.start ?? 0;
1156
+ }
1157
+ else if (symName === 'RIGHT_PAREN') {
1158
+ rightParenTokenIndex = child.symbol.tokenIndex;
1159
+ break;
1160
+ }
1161
+ else if (symName === 'COMMA') {
1162
+ commaTokenIndices.push(child.symbol.tokenIndex);
1163
+ }
1164
+ }
1165
+ }
1166
+ if (leftParenTokenIndex !== null && rightParenTokenIndex !== null &&
1167
+ commaTokenIndices.length === argCount - 1) {
1168
+ const spanLength = this._calculateNormalizedSpanLength(ctx);
1169
+ this.multiArgFunctionInfo.set(leftParenTokenIndex, {
1170
+ closeParenIndex: rightParenTokenIndex,
1171
+ commaIndices: commaTokenIndices,
1172
+ spanLength: spanLength,
1173
+ functionName: functionName,
1174
+ charStart: leftParenCharStart
1175
+ });
1176
+ }
1177
+ }
1178
+ _collectWindowDefInfo(ctx) {
1179
+ if (!ctx.children)
1180
+ return;
1181
+ let leftParenTokenIndex = null;
1182
+ let rightParenTokenIndex = null;
1183
+ let orderByTokenIndex = null;
1184
+ let windowFrameTokenIndex = null;
1185
+ // Get window's start character position for calculating relative offsets
1186
+ const windowStartChar = ctx.start?.start ?? 0;
1187
+ for (const child of ctx.children) {
1188
+ if (child.symbol) {
1189
+ const symName = SqlBaseLexer.symbolicNames[child.symbol.type];
1190
+ if (symName === 'LEFT_PAREN' && leftParenTokenIndex === null) {
1191
+ leftParenTokenIndex = child.symbol.tokenIndex;
1192
+ }
1193
+ else if (symName === 'RIGHT_PAREN') {
1194
+ rightParenTokenIndex = child.symbol.tokenIndex;
1195
+ }
1196
+ else if (symName === 'ORDER' || symName === 'SORT') {
1197
+ orderByTokenIndex = child.symbol.tokenIndex;
1198
+ }
1199
+ }
1200
+ else if (child.ruleIndex !== undefined) {
1201
+ const ruleName = SqlBaseParser.ruleNames[child.ruleIndex];
1202
+ if (ruleName === 'windowFrame' && child.children?.[0]?.symbol) {
1203
+ windowFrameTokenIndex = child.children[0].symbol.tokenIndex;
1204
+ }
1205
+ }
1206
+ }
1207
+ // Collect nested multi-arg functions with their relative character offsets
1208
+ const nestedFunctions = [];
1209
+ if (leftParenTokenIndex !== null && rightParenTokenIndex !== null) {
1210
+ for (const [funcIdx, funcInfo] of this.multiArgFunctionInfo) {
1211
+ if (funcIdx > leftParenTokenIndex && funcIdx < rightParenTokenIndex) {
1212
+ // Use the charStart from the function info to calculate relative offset
1213
+ const relativeOffset = funcInfo.charStart - windowStartChar;
1214
+ nestedFunctions.push({ funcIdx, relativeOffset });
1215
+ }
1216
+ }
1217
+ }
1218
+ if (leftParenTokenIndex !== null && rightParenTokenIndex !== null) {
1219
+ const spanLength = this._calculateNormalizedSpanLength(ctx);
1220
+ this.windowDefInfo.set(leftParenTokenIndex, {
1221
+ closeParenIndex: rightParenTokenIndex,
1222
+ orderByTokenIndex: orderByTokenIndex,
1223
+ windowFrameTokenIndex: windowFrameTokenIndex,
1224
+ spanLength: spanLength,
1225
+ nestedFunctions: nestedFunctions
1226
+ });
1227
+ }
1228
+ }
1229
+ /**
1230
+ * Collect IN list information for potential wrapping.
1231
+ * Structure: expr IN (value1, value2, value3, ...)
1232
+ * We want to track the IN list so we can wrap it at max line width.
1233
+ */
1234
+ _collectInListInfo(ctx) {
1235
+ if (!ctx.children)
1236
+ return;
1237
+ // Check if this is an IN predicate (kind=IN)
1238
+ let isInPredicate = false;
1239
+ let inKeywordIndex = null;
1240
+ for (const child of ctx.children) {
1241
+ if (child.symbol) {
1242
+ const symName = SqlBaseLexer.symbolicNames[child.symbol.type];
1243
+ if (symName === 'IN') {
1244
+ isInPredicate = true;
1245
+ inKeywordIndex = child.symbol.tokenIndex;
1246
+ break;
1247
+ }
1248
+ }
1249
+ }
1250
+ if (!isInPredicate || inKeywordIndex === null)
1251
+ return;
1252
+ // Check if there's a subquery inside - if so, don't treat as IN list
1253
+ // Subquery IN: IN (SELECT ...)
1254
+ let hasSubquery = false;
1255
+ for (const child of ctx.children) {
1256
+ if (child.ruleIndex !== undefined) {
1257
+ const ruleName = SqlBaseParser.ruleNames[child.ruleIndex];
1258
+ if (ruleName === 'query') {
1259
+ hasSubquery = true;
1260
+ break;
1261
+ }
1262
+ }
1263
+ }
1264
+ if (hasSubquery)
1265
+ return; // Don't track IN (SELECT ...) as an IN list
1266
+ // Now find the open paren, close paren, and commas using recursive walk
1267
+ let openParenIndex = null;
1268
+ let closeParenIndex = null;
1269
+ const commaIndices = [];
1270
+ let depth = 0;
1271
+ let foundOpenParen = false;
1272
+ const walkForTokens = (node) => {
1273
+ if (!node)
1274
+ return;
1275
+ if (node.symbol) {
1276
+ const symName = SqlBaseLexer.symbolicNames[node.symbol.type];
1277
+ const tokenIndex = node.symbol.tokenIndex;
1278
+ if (tokenIndex <= inKeywordIndex)
1279
+ return; // Skip tokens before/at IN
1280
+ if (symName === 'LEFT_PAREN') {
1281
+ if (!foundOpenParen) {
1282
+ openParenIndex = tokenIndex;
1283
+ foundOpenParen = true;
1284
+ }
1285
+ else {
1286
+ depth++;
1287
+ }
1288
+ }
1289
+ else if (symName === 'RIGHT_PAREN') {
1290
+ if (depth > 0) {
1291
+ depth--;
1292
+ }
1293
+ else if (foundOpenParen && closeParenIndex === null) {
1294
+ closeParenIndex = tokenIndex;
1295
+ return; // Found the closing paren, stop
1296
+ }
1297
+ }
1298
+ else if (symName === 'COMMA' && depth === 0 && foundOpenParen) {
1299
+ commaIndices.push(tokenIndex);
1300
+ }
1301
+ }
1302
+ if (node.children) {
1303
+ for (const child of node.children) {
1304
+ if (closeParenIndex !== null)
1305
+ return; // Stop if we found close paren
1306
+ walkForTokens(child);
1307
+ }
1308
+ }
1309
+ };
1310
+ walkForTokens(ctx);
1311
+ if (openParenIndex !== null && closeParenIndex !== null) {
1312
+ this.inListInfo.set(openParenIndex, {
1313
+ openParenIndex,
1314
+ closeParenIndex,
1315
+ commaIndices,
1316
+ isInPivot: false, // WHERE IN, not PIVOT IN
1317
+ });
1318
+ }
1319
+ }
1320
+ /**
1321
+ * Collect PIVOT/UNPIVOT clause information for potential expansion.
1322
+ * Structure: PIVOT (aggregates FOR column IN (values))
1323
+ */
1324
+ _collectPivotInfo(ctx, isUnpivot) {
1325
+ if (!ctx.children)
1326
+ return;
1327
+ let openParenIndex = null;
1328
+ let closeParenIndex = null;
1329
+ let forKeywordIndex = null;
1330
+ let inKeywordIndex = null;
1331
+ let inListOpenParen = null;
1332
+ const aggregateCommaIndices = [];
1333
+ const inListCommaIndices = [];
1334
+ let foundFor = false;
1335
+ let foundIn = false;
1336
+ let inListDepth = 0; // Depth within IN list parens (0 = top level of IN list)
1337
+ // Walk through children to find structure
1338
+ const walkForTokens = (node) => {
1339
+ if (!node)
1340
+ return;
1341
+ if (node.symbol) {
1342
+ const symName = SqlBaseLexer.symbolicNames[node.symbol.type];
1343
+ const tokenIndex = node.symbol.tokenIndex;
1344
+ if (symName === 'LEFT_PAREN') {
1345
+ if (openParenIndex === null) {
1346
+ // First paren is the PIVOT open paren
1347
+ openParenIndex = tokenIndex;
1348
+ }
1349
+ else if (foundIn && inListOpenParen === null) {
1350
+ // First paren after IN is the IN list open paren
1351
+ inListOpenParen = tokenIndex;
1352
+ }
1353
+ else if (foundIn) {
1354
+ // Nested paren within IN list items
1355
+ inListDepth++;
1356
+ }
1357
+ }
1358
+ else if (symName === 'RIGHT_PAREN') {
1359
+ if (foundIn && inListDepth > 0) {
1360
+ // Closing a nested paren within IN list
1361
+ inListDepth--;
1362
+ }
1363
+ else if (foundIn && inListOpenParen !== null) {
1364
+ // Closing the IN list paren - this is also close of PIVOT
1365
+ closeParenIndex = tokenIndex;
1366
+ }
1367
+ else {
1368
+ // Outer PIVOT close paren
1369
+ closeParenIndex = tokenIndex;
1370
+ }
1371
+ }
1372
+ else if (symName === 'FOR') {
1373
+ foundFor = true;
1374
+ forKeywordIndex = tokenIndex;
1375
+ }
1376
+ else if (symName === 'IN') {
1377
+ foundIn = true;
1378
+ inKeywordIndex = tokenIndex;
1379
+ }
1380
+ else if (symName === 'COMMA') {
1381
+ if (foundIn && inListOpenParen !== null && inListDepth === 0) {
1382
+ // Comma in IN list at top level
1383
+ inListCommaIndices.push(tokenIndex);
1384
+ }
1385
+ else if (!foundFor) {
1386
+ // Comma before FOR - aggregate list
1387
+ aggregateCommaIndices.push(tokenIndex);
1388
+ }
1389
+ }
1390
+ }
1391
+ if (node.children) {
1392
+ for (const child of node.children) {
1393
+ walkForTokens(child);
1394
+ }
1395
+ }
1396
+ };
1397
+ walkForTokens(ctx);
1398
+ if (openParenIndex !== null && closeParenIndex !== null) {
1399
+ const spanLength = this._calculateNormalizedSpanLength(ctx);
1400
+ this.pivotInfo.set(openParenIndex, {
1401
+ openParenIndex,
1402
+ closeParenIndex,
1403
+ aggregateCommaIndices,
1404
+ forKeywordIndex,
1405
+ inKeywordIndex,
1406
+ inListCommaIndices,
1407
+ spanLength,
1408
+ isUnpivot
1409
+ });
1410
+ // Also store the PIVOT IN list in inListInfo for consistent wrapping
1411
+ if (inListOpenParen !== null) {
1412
+ // Find the IN list close paren (it's one before the PIVOT close paren)
1413
+ // We need to find the actual IN list close paren
1414
+ let inListCloseParen = closeParenIndex; // Default to same as PIVOT close
1415
+ this.inListInfo.set(inListOpenParen, {
1416
+ openParenIndex: inListOpenParen,
1417
+ closeParenIndex: inListCloseParen,
1418
+ commaIndices: inListCommaIndices,
1419
+ isInPivot: true,
1420
+ });
1421
+ }
1422
+ }
1423
+ }
1424
+ _analyzeCaseExpression(ctx) {
1425
+ if (!ctx.children)
1426
+ return;
1427
+ let whenCount = 0;
1428
+ let caseToken = null;
1429
+ let elseToken = null;
1430
+ let endToken = null;
1431
+ let valueExpression = null;
1432
+ const whenTokens = [];
1433
+ // Check if this is a simpleCase (has 'value' property) vs searchedCase
1434
+ // simpleCase: CASE value=expression whenClause+ ELSE? END
1435
+ // searchedCase: CASE whenClause+ ELSE? END
1436
+ const isSimpleCase = ctx.value !== undefined;
1437
+ if (isSimpleCase && ctx.value) {
1438
+ valueExpression = ctx.value;
1439
+ }
1440
+ for (const child of ctx.children) {
1441
+ if (child.symbol) {
1442
+ const symName = SqlBaseLexer.symbolicNames[child.symbol.type];
1443
+ if (symName === 'CASE') {
1444
+ caseToken = child.symbol;
1445
+ }
1446
+ else if (symName === 'ELSE') {
1447
+ elseToken = child.symbol;
1448
+ }
1449
+ else if (symName === 'END') {
1450
+ endToken = child.symbol;
1451
+ }
1452
+ else if (symName === 'WHEN') {
1453
+ whenCount++;
1454
+ whenTokens.push(child.symbol);
1455
+ }
1456
+ }
1457
+ if (child.ruleIndex !== undefined) {
1458
+ const ruleName = SqlBaseParser.ruleNames[child.ruleIndex];
1459
+ if (ruleName === 'whenClause') {
1460
+ const whenToken = this._findTokenInContext(child, 'WHEN');
1461
+ if (whenToken && !whenTokens.find((t) => t.tokenIndex === whenToken.tokenIndex)) {
1462
+ whenCount++;
1463
+ whenTokens.push(whenToken);
1464
+ }
1465
+ }
1466
+ }
1467
+ }
1468
+ if (whenCount > 1 && caseToken) {
1469
+ this.multiWhenCaseTokens.add(caseToken.tokenIndex);
1470
+ // For simpleCase with value, mark the CASE token and the position after value expression
1471
+ // So the newline goes after "CASE x" not after "CASE"
1472
+ if (isSimpleCase && valueExpression && valueExpression.stop) {
1473
+ this.simpleCaseTokens.add(caseToken.tokenIndex);
1474
+ this.simpleCaseValueEndTokens.add(valueExpression.stop.tokenIndex);
1475
+ }
1476
+ for (const whenToken of whenTokens) {
1477
+ this.caseWhenTokens.add(whenToken.tokenIndex);
1478
+ }
1479
+ if (elseToken) {
1480
+ this.caseElseTokens.add(elseToken.tokenIndex);
1481
+ }
1482
+ if (endToken) {
1483
+ this.caseEndTokens.add(endToken.tokenIndex);
1484
+ }
1485
+ if (this.currentSelectToken >= 0) {
1486
+ this.multiItemClauses.add(this.currentSelectToken);
1487
+ }
1488
+ }
1489
+ }
1490
+ _findTokenInContext(ctx, symbolicName) {
1491
+ if (!ctx)
1492
+ return null;
1493
+ if (ctx.symbol) {
1494
+ const symName = SqlBaseLexer.symbolicNames[ctx.symbol.type];
1495
+ if (symName === symbolicName) {
1496
+ return ctx.symbol;
1497
+ }
1498
+ }
1499
+ if (ctx.children) {
1500
+ for (const child of ctx.children) {
1501
+ const found = this._findTokenInContext(child, symbolicName);
1502
+ if (found)
1503
+ return found;
1504
+ }
1505
+ }
1506
+ return null;
1507
+ }
1508
+ _analyzeJoinConditions(ctx) {
1509
+ const operators = this._countConditionOperators(ctx);
1510
+ if (operators > 0) {
1511
+ const onTokenIndex = this._findOnToken(ctx);
1512
+ if (onTokenIndex !== -1) {
1513
+ this.multilineConditionClauses.add(onTokenIndex);
1514
+ }
1515
+ }
1516
+ }
1517
+ _findOnToken(ctx) {
1518
+ if (!ctx)
1519
+ return -1;
1520
+ if (ctx.symbol) {
1521
+ const symName = SqlBaseLexer.symbolicNames[ctx.symbol.type];
1522
+ if (symName === 'ON') {
1523
+ return ctx.symbol.tokenIndex;
1524
+ }
1525
+ }
1526
+ if (ctx.children) {
1527
+ for (const child of ctx.children) {
1528
+ const result = this._findOnToken(child);
1529
+ if (result !== -1)
1530
+ return result;
1531
+ }
1532
+ }
1533
+ return -1;
1534
+ }
1535
+ _findSubqueryContext(ctx) {
1536
+ if (!ctx)
1537
+ return null;
1538
+ const className = ctx.constructor?.name || '';
1539
+ if (className === 'SubqueryContext')
1540
+ return ctx;
1541
+ if (ctx.children) {
1542
+ for (const child of ctx.children) {
1543
+ if (!child.symbol) {
1544
+ const found = this._findSubqueryContext(child);
1545
+ if (found)
1546
+ return found;
1547
+ }
1548
+ }
1549
+ }
1550
+ return null;
1551
+ }
1552
+ _markAllDescendantTokens(ctx, targetSet) {
1553
+ // Mark all tokens in this context and its descendants
1554
+ if (!ctx)
1555
+ return;
1556
+ if (ctx.symbol) {
1557
+ targetSet.add(ctx.symbol.tokenIndex);
1558
+ }
1559
+ else if (ctx.children) {
1560
+ for (const child of ctx.children) {
1561
+ this._markAllDescendantTokens(child, targetSet);
1562
+ }
1563
+ }
1564
+ }
1565
+ _markIdentifier(ctx) {
1566
+ if (ctx.start) {
1567
+ for (let i = ctx.start.tokenIndex; i <= (ctx.stop?.tokenIndex ?? ctx.start.tokenIndex); i++) {
1568
+ this.identifierTokens.add(i);
1569
+ }
1570
+ }
1571
+ }
1572
+ _markClauseStart(ctx) {
1573
+ if (ctx.start) {
1574
+ this.clauseStartTokens.add(ctx.start.tokenIndex);
1575
+ }
1576
+ }
1577
+ _markListContext(ctx) {
1578
+ let hasCommas = false;
1579
+ if (ctx.children) {
1580
+ let isFirst = true;
1581
+ for (const child of ctx.children) {
1582
+ if (child.symbol) {
1583
+ const tokenType = child.symbol.type;
1584
+ if (tokenType === getTokenType('COMMA')) {
1585
+ this.listItemCommas.add(child.symbol.tokenIndex);
1586
+ hasCommas = true;
1587
+ }
1588
+ else if (isFirst && tokenType !== getTokenType('COMMA') && child.symbol.tokenIndex >= 0) {
1589
+ this.listFirstItems.add(child.symbol.tokenIndex);
1590
+ isFirst = false;
1591
+ }
1592
+ }
1593
+ else if (child.children) {
1594
+ this._markCommasInContext(child);
1595
+ }
1596
+ }
1597
+ }
1598
+ return hasCommas;
1599
+ }
1600
+ _markCommasInContext(ctx) {
1601
+ if (!ctx || !ctx.children)
1602
+ return;
1603
+ const className = ctx.constructor?.name || '';
1604
+ if (className === 'FunctionCallContext')
1605
+ return;
1606
+ if (className === 'PivotClauseContext' || className === 'UnpivotClauseContext')
1607
+ return;
1608
+ if (className === 'LateralViewContext')
1609
+ return;
1610
+ for (const child of ctx.children) {
1611
+ if (child.symbol) {
1612
+ if (child.symbol.type === getTokenType('COMMA')) {
1613
+ this.listItemCommas.add(child.symbol.tokenIndex);
1614
+ }
1615
+ }
1616
+ else if (child.children) {
1617
+ this._markCommasInContext(child);
1618
+ }
1619
+ }
1620
+ }
1621
+ _markListCommasExcludingGroupingAnalytics(ctx) {
1622
+ let count = 0;
1623
+ if (!ctx || !ctx.children)
1624
+ return 0;
1625
+ const isGroupingAnalytics = ctx.ruleIndex !== undefined &&
1626
+ SqlBaseParser.ruleNames[ctx.ruleIndex] === 'groupingAnalytics';
1627
+ for (const child of ctx.children) {
1628
+ if (child.symbol) {
1629
+ if (child.symbol.type === getTokenType('COMMA')) {
1630
+ if (!isGroupingAnalytics) {
1631
+ this.listItemCommas.add(child.symbol.tokenIndex);
1632
+ }
1633
+ count++;
1634
+ }
1635
+ }
1636
+ else if (child.ruleIndex !== undefined) {
1637
+ count += this._markListCommasExcludingGroupingAnalytics(child);
1638
+ }
1639
+ }
1640
+ return count;
1641
+ }
1642
+ _markGroupByAllToken(ctx) {
1643
+ if (!ctx || !ctx.children)
1644
+ return;
1645
+ let foundGroupBy = false;
1646
+ for (const child of ctx.children) {
1647
+ if (child.symbol) {
1648
+ const symName = SqlBaseLexer.symbolicNames[child.symbol.type];
1649
+ if (symName === 'BY') {
1650
+ foundGroupBy = true;
1651
+ }
1652
+ else if (foundGroupBy && symName === 'ALL') {
1653
+ this.groupByAllTokens.add(child.symbol.tokenIndex);
1654
+ return;
1655
+ }
1656
+ }
1657
+ else if (foundGroupBy && child.ruleIndex !== undefined) {
1658
+ const allToken = this._findAllTokenInGroupByExpression(child);
1659
+ if (allToken) {
1660
+ this.groupByAllTokens.add(allToken.tokenIndex);
1661
+ return;
1662
+ }
1663
+ }
1664
+ }
1665
+ }
1666
+ _findAllTokenInGroupByExpression(ctx) {
1667
+ if (!ctx)
1668
+ return null;
1669
+ if (ctx.symbol) {
1670
+ const symName = SqlBaseLexer.symbolicNames[ctx.symbol.type];
1671
+ if (symName === 'ALL') {
1672
+ return ctx.symbol;
1673
+ }
1674
+ return null;
1675
+ }
1676
+ if (!ctx.children)
1677
+ return null;
1678
+ const ruleName = ctx.ruleIndex !== undefined ? SqlBaseParser.ruleNames[ctx.ruleIndex] : null;
1679
+ const identifierPathRules = new Set([
1680
+ 'groupByClause', 'expression', 'booleanExpression', 'valueExpression',
1681
+ 'primaryExpression', 'columnReference', 'identifier', 'strictIdentifier',
1682
+ 'nonReserved', 'namedExpression'
1683
+ ]);
1684
+ if (ruleName && identifierPathRules.has(ruleName)) {
1685
+ const meaningfulChildren = ctx.children.filter((c) => c.symbol || (c.ruleIndex !== undefined));
1686
+ if (meaningfulChildren.length === 1) {
1687
+ return this._findAllTokenInGroupByExpression(meaningfulChildren[0]);
1688
+ }
1689
+ }
1690
+ return null;
1691
+ }
1692
+ _analyzeConditionClause(ctx) {
1693
+ const operators = this._countConditionOperators(ctx);
1694
+ if (operators > 0) {
1695
+ if (ctx.start) {
1696
+ this.multilineConditionClauses.add(ctx.start.tokenIndex);
1697
+ }
1698
+ }
1699
+ }
1700
+ _countConditionOperators(ctx, parenDepth = 0) {
1701
+ let count = 0;
1702
+ if (!ctx)
1703
+ return count;
1704
+ if (ctx.children) {
1705
+ let currentParenDepth = parenDepth;
1706
+ for (const child of ctx.children) {
1707
+ if (child.symbol) {
1708
+ const symbolicName = SqlBaseLexer.symbolicNames[child.symbol.type];
1709
+ if (symbolicName === 'LEFT_PAREN') {
1710
+ currentParenDepth++;
1711
+ }
1712
+ else if (symbolicName === 'RIGHT_PAREN') {
1713
+ currentParenDepth--;
1714
+ }
1715
+ else if (symbolicName === 'AND' || symbolicName === 'OR') {
1716
+ if (!this.betweenAndTokens.has(child.symbol.tokenIndex) && currentParenDepth === 0) {
1717
+ count++;
1718
+ this.conditionOperators.add(child.symbol.tokenIndex);
1719
+ }
1720
+ }
1721
+ }
1722
+ count += this._countConditionOperators(child, currentParenDepth);
1723
+ }
1724
+ }
1725
+ return count;
1726
+ }
1727
+ _scanForBetweenAnd(ctx) {
1728
+ if (!ctx)
1729
+ return;
1730
+ if (ctx.children) {
1731
+ let hasBetween = false;
1732
+ for (const child of ctx.children) {
1733
+ if (child.symbol) {
1734
+ const symName = SqlBaseLexer.symbolicNames[child.symbol.type];
1735
+ if (symName === 'BETWEEN') {
1736
+ hasBetween = true;
1737
+ }
1738
+ else if (symName === 'AND' && hasBetween) {
1739
+ this.betweenAndTokens.add(child.symbol.tokenIndex);
1740
+ hasBetween = false;
1741
+ }
1742
+ }
1743
+ this._scanForBetweenAnd(child);
1744
+ }
1745
+ }
1746
+ }
1747
+ _markSubqueryParens(ctx) {
1748
+ if (ctx.children) {
1749
+ for (const child of ctx.children) {
1750
+ if (child.symbol) {
1751
+ const tokenType = child.symbol.type;
1752
+ if (tokenType === getTokenType('LEFT_PAREN')) {
1753
+ this.subqueryOpenParens.add(child.symbol.tokenIndex);
1754
+ }
1755
+ else if (tokenType === getTokenType('RIGHT_PAREN')) {
1756
+ this.subqueryCloseParens.add(child.symbol.tokenIndex);
1757
+ }
1758
+ }
1759
+ }
1760
+ }
1761
+ }
1762
+ _markDdlColumnList(ctx) {
1763
+ if (!ctx || !ctx.children)
1764
+ return;
1765
+ let foundLeftParen = false;
1766
+ let leftParenIndex = -1;
1767
+ let commaCount = 0;
1768
+ for (const child of ctx.children) {
1769
+ if (child.symbol) {
1770
+ const tokenType = child.symbol.type;
1771
+ if (tokenType === getTokenType('LEFT_PAREN') && !foundLeftParen) {
1772
+ foundLeftParen = true;
1773
+ leftParenIndex = child.symbol.tokenIndex;
1774
+ this.ddlOpenParens.add(leftParenIndex);
1775
+ }
1776
+ else if (tokenType === getTokenType('RIGHT_PAREN') && foundLeftParen) {
1777
+ this.ddlCloseParens.add(child.symbol.tokenIndex);
1778
+ }
1779
+ else if (tokenType === getTokenType('COMMA') && foundLeftParen) {
1780
+ this.ddlColumnCommas.add(child.symbol.tokenIndex);
1781
+ commaCount++;
1782
+ }
1783
+ }
1784
+ else if (child.children && foundLeftParen) {
1785
+ commaCount += this._markDdlCommasInContext(child);
1786
+ }
1787
+ }
1788
+ if (commaCount > 0 && leftParenIndex >= 0) {
1789
+ this.ddlMultiColumn.add(leftParenIndex);
1790
+ }
1791
+ }
1792
+ _markDdlCommasInContext(ctx, angleDepth = 0) {
1793
+ if (!ctx || !ctx.children)
1794
+ return 0;
1795
+ let count = 0;
1796
+ for (const child of ctx.children) {
1797
+ if (child.symbol) {
1798
+ const tokenType = child.symbol.type;
1799
+ if (tokenType === getTokenType('LT')) {
1800
+ // Entering complex type like ARRAY<...> or MAP<...>
1801
+ angleDepth++;
1802
+ }
1803
+ else if (tokenType === getTokenType('GT')) {
1804
+ // Exiting complex type
1805
+ if (angleDepth > 0)
1806
+ angleDepth--;
1807
+ }
1808
+ else if (tokenType === getTokenType('COMMA') && angleDepth === 0) {
1809
+ // Only mark as DDL comma if not inside angle brackets (complex type)
1810
+ this.ddlColumnCommas.add(child.symbol.tokenIndex);
1811
+ count++;
1812
+ }
1813
+ }
1814
+ else if (child.children) {
1815
+ count += this._markDdlCommasInContext(child, angleDepth);
1816
+ }
1817
+ }
1818
+ return count;
1819
+ }
1820
+ _markValuesCommas(ctx, foundValues = false) {
1821
+ if (!ctx || !ctx.children)
1822
+ return;
1823
+ let parenDepth = 0;
1824
+ for (const child of ctx.children) {
1825
+ if (child.symbol) {
1826
+ const symName = SqlBaseLexer.symbolicNames[child.symbol.type];
1827
+ const tokenType = child.symbol.type;
1828
+ if (symName === 'VALUES') {
1829
+ foundValues = true;
1830
+ }
1831
+ else if (foundValues && tokenType === getTokenType('LEFT_PAREN')) {
1832
+ parenDepth++;
1833
+ // If we see a paren right after VALUES, we have tuples
1834
+ if (parenDepth === 1) {
1835
+ this.valuesHasTuples = true;
1836
+ }
1837
+ }
1838
+ else if (foundValues && tokenType === getTokenType('RIGHT_PAREN')) {
1839
+ parenDepth--;
1840
+ }
1841
+ else if (foundValues && parenDepth === 0 && tokenType === getTokenType('COMMA')) {
1842
+ this.valuesCommas.add(child.symbol.tokenIndex);
1843
+ }
1844
+ }
1845
+ else if (child.children) {
1846
+ this._markValuesCommas(child, foundValues);
1847
+ }
1848
+ }
1849
+ }
1850
+ _markSetClause(ctx, foundSet, commaCount) {
1851
+ if (!ctx || !ctx.children)
1852
+ return commaCount;
1853
+ for (const child of ctx.children) {
1854
+ if (child.symbol) {
1855
+ const symName = SqlBaseLexer.symbolicNames[child.symbol.type];
1856
+ if (symName === 'SET') {
1857
+ foundSet = true;
1858
+ this.setKeywordToken = child.symbol.tokenIndex;
1859
+ this.clauseStartTokens.add(child.symbol.tokenIndex);
1860
+ }
1861
+ else if (foundSet && child.symbol.type === getTokenType('COMMA')) {
1862
+ this.setClauseCommas.add(child.symbol.tokenIndex);
1863
+ commaCount++;
1864
+ }
1865
+ else if (foundSet && symName === 'WHERE') {
1866
+ return commaCount;
1867
+ }
1868
+ }
1869
+ else if (child.children) {
1870
+ commaCount = this._markSetClause(child, foundSet, commaCount);
1871
+ }
1872
+ }
1873
+ return commaCount;
1874
+ }
1875
+ _markSetConfigTokens(ctx) {
1876
+ if (!ctx || !ctx.children)
1877
+ return;
1878
+ let foundSet = false;
1879
+ for (const child of ctx.children) {
1880
+ if (child.symbol) {
1881
+ const symName = SqlBaseLexer.symbolicNames[child.symbol.type];
1882
+ if (symName === 'SET') {
1883
+ foundSet = true;
1884
+ }
1885
+ else if (foundSet) {
1886
+ this.setConfigTokens.add(child.symbol.tokenIndex);
1887
+ }
1888
+ }
1889
+ else if (child.children && foundSet) {
1890
+ this._markSetConfigTokensRecursive(child);
1891
+ }
1892
+ }
1893
+ }
1894
+ _markResetConfigTokens(ctx) {
1895
+ // Similar to SET, mark all tokens after RESET keyword
1896
+ if (!ctx || !ctx.children)
1897
+ return;
1898
+ let foundReset = false;
1899
+ for (const child of ctx.children) {
1900
+ if (child.symbol) {
1901
+ const symName = SqlBaseLexer.symbolicNames[child.symbol.type];
1902
+ if (symName === 'RESET') {
1903
+ foundReset = true;
1904
+ }
1905
+ else if (foundReset) {
1906
+ this.setConfigTokens.add(child.symbol.tokenIndex);
1907
+ }
1908
+ }
1909
+ else if (child.children && foundReset) {
1910
+ this._markSetConfigTokensRecursive(child);
1911
+ }
1912
+ }
1913
+ }
1914
+ _markSetConfigTokensRecursive(ctx) {
1915
+ if (!ctx)
1916
+ return;
1917
+ if (ctx.symbol) {
1918
+ this.setConfigTokens.add(ctx.symbol.tokenIndex);
1919
+ }
1920
+ if (ctx.children) {
1921
+ for (const child of ctx.children) {
1922
+ this._markSetConfigTokensRecursive(child);
1923
+ }
1924
+ }
1925
+ }
1926
+ _markMergeClauses(ctx) {
1927
+ if (!ctx || !ctx.children)
1928
+ return;
1929
+ for (const child of ctx.children) {
1930
+ if (child.symbol) {
1931
+ const symName = SqlBaseLexer.symbolicNames[child.symbol.type];
1932
+ if (symName === 'USING') {
1933
+ this.mergeUsingTokens.add(child.symbol.tokenIndex);
1934
+ }
1935
+ else if (symName === 'ON') {
1936
+ this.mergeOnTokens.add(child.symbol.tokenIndex);
1937
+ }
1938
+ else if (symName === 'WHEN') {
1939
+ this.mergeWhenTokens.add(child.symbol.tokenIndex);
1940
+ }
1941
+ }
1942
+ else if (child.children) {
1943
+ this._markMergeClauses(child);
1944
+ }
1945
+ }
1946
+ }
1947
+ _markDepthForContext(ctx) {
1948
+ if (ctx.start && ctx.stop) {
1949
+ for (let i = ctx.start.tokenIndex; i <= ctx.stop.tokenIndex; i++) {
1950
+ if (!this.tokenDepthMap.has(i)) {
1951
+ this.tokenDepthMap.set(i, this.subqueryDepth);
1952
+ }
1953
+ }
1954
+ }
1955
+ }
1956
+ }