tplm-lang 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/README.md +357 -0
  2. package/dist/compiler/grid-spec-builder.d.ts +30 -0
  3. package/dist/compiler/grid-spec-builder.js +1836 -0
  4. package/dist/compiler/index.d.ts +11 -0
  5. package/dist/compiler/index.js +13 -0
  6. package/dist/compiler/malloy-generator.d.ts +36 -0
  7. package/dist/compiler/malloy-generator.js +141 -0
  8. package/dist/compiler/multi-query-utils.d.ts +42 -0
  9. package/dist/compiler/multi-query-utils.js +185 -0
  10. package/dist/compiler/query-plan-generator.d.ts +77 -0
  11. package/dist/compiler/query-plan-generator.js +1456 -0
  12. package/dist/compiler/table-spec-builder.d.ts +11 -0
  13. package/dist/compiler/table-spec-builder.js +588 -0
  14. package/dist/compiler/table-spec.d.ts +434 -0
  15. package/dist/compiler/table-spec.js +274 -0
  16. package/dist/executor/index.d.ts +71 -0
  17. package/dist/executor/index.js +232 -0
  18. package/dist/index.d.ts +214 -0
  19. package/dist/index.js +220 -0
  20. package/dist/parser/ast.d.ts +253 -0
  21. package/dist/parser/ast.js +164 -0
  22. package/dist/parser/chevrotain-parser.d.ts +118 -0
  23. package/dist/parser/chevrotain-parser.js +1266 -0
  24. package/dist/parser/index.d.ts +30 -0
  25. package/dist/parser/index.js +36 -0
  26. package/dist/parser/parser.d.ts +4 -0
  27. package/dist/parser/parser.js +4354 -0
  28. package/dist/parser/prettifier.d.ts +14 -0
  29. package/dist/parser/prettifier.js +380 -0
  30. package/dist/renderer/grid-renderer.d.ts +19 -0
  31. package/dist/renderer/grid-renderer.js +541 -0
  32. package/dist/renderer/index.d.ts +4 -0
  33. package/dist/renderer/index.js +4 -0
  34. package/package.json +67 -0
  35. package/packages/parser/tpl.pegjs +568 -0
  36. package/packages/renderer/tpl-table.css +182 -0
@@ -0,0 +1,1456 @@
1
+ /**
2
+ * Query Plan Generator
3
+ *
4
+ * Generates tagged, deduplicated query specifications from a TableSpec.
5
+ *
6
+ * Key features:
7
+ * 1. Tags each query with its position in the axis trees (TreePath)
8
+ * 2. Deduplicates queries with identical structure
9
+ * 3. Handles ACROSS modifiers for cross-dimensional aggregation
10
+ * 4. Preserves all metadata (labels, formats, limits)
11
+ */
12
+ import { serializeTreePath, collectBranches, } from './table-spec.js';
13
+ import { escapeFieldName, buildAggExpression, buildPercentageAggExpression } from './multi-query-utils.js';
14
+ // ---
15
+ // ORDER-BY HELPERS
16
+ // ---
17
+ /**
18
+ * Build the order_by field for a limit.
19
+ *
20
+ * - If limit.orderBy is specified, use that (field or aggregate expression)
21
+ * - If no orderBy, order by the dimension name (alphabetic sort)
22
+ *
23
+ * @param limit The limit spec (may have orderBy)
24
+ * @param dimensionName The dimension being limited (for alphabetic fallback)
25
+ * @returns The Malloy order_by field expression
26
+ */
27
+ function buildOrderByField(limit, dimensionName) {
28
+ return buildOrderByExpression(limit.orderBy, dimensionName);
29
+ }
30
+ /**
31
+ * Build the order_by field for an order spec (without limit).
32
+ *
33
+ * - If order.orderBy is specified, use that (field or aggregate expression)
34
+ * - If no orderBy, order by the dimension name (alphabetic sort)
35
+ *
36
+ * @param order The order spec (may have orderBy)
37
+ * @param dimensionName The dimension being ordered (for alphabetic fallback)
38
+ * @returns The Malloy order_by field expression
39
+ */
40
+ function buildOrderByFieldFromOrder(order, dimensionName) {
41
+ return buildOrderByExpression(order.orderBy, dimensionName);
42
+ }
43
+ /**
44
+ * Build the order_by field expression from an orderBy value.
45
+ * Shared logic for both limits and orders.
46
+ */
47
+ function buildOrderByExpression(orderBy, dimensionName) {
48
+ if (!orderBy) {
49
+ // No explicit orderBy - order alphabetically by the dimension
50
+ return escapeFieldName(dimensionName);
51
+ }
52
+ if (typeof orderBy === 'string') {
53
+ // Simple field reference: @births or @revenue
54
+ return escapeFieldName(orderBy);
55
+ }
56
+ // Complex expression: AggregateExpr or RatioExpr
57
+ const expr = orderBy;
58
+ if (expr.type === 'aggregateExpr') {
59
+ // e.g., @births.sum -> births_sum
60
+ return buildAggregateOrderByName(expr);
61
+ }
62
+ if (expr.type === 'ratioExpr') {
63
+ // Ratios are more complex - for now, use numerator's aggregate
64
+ // Full ratio support would require computing the ratio field
65
+ return buildAggregateOrderByName(expr.numerator);
66
+ }
67
+ // Fallback
68
+ return escapeFieldName(dimensionName);
69
+ }
70
+ /**
71
+ * Build the aggregate name for order_by from an AggregateExpr.
72
+ * e.g., { field: 'births', function: 'sum' } -> 'births_sum'
73
+ */
74
+ function buildAggregateOrderByName(expr) {
75
+ // AggregationMethod uses 'mean' not 'avg', so no conversion needed
76
+ const name = `${expr.field}_${expr.function}`;
77
+ return escapeFieldName(name);
78
+ }
79
+ /**
80
+ * Ensure that an orderBy aggregate is included in the aggregates list.
81
+ * If the orderBy references an aggregate that's not in the list, add it.
82
+ */
83
+ function ensureOrderByAggregateInList(orderByExpr, aggregates) {
84
+ if (!orderByExpr || typeof orderByExpr === 'string') {
85
+ return aggregates;
86
+ }
87
+ const extractAggInfo = (expr) => {
88
+ if (expr.type === 'aggregateExpr') {
89
+ return { measure: expr.field, aggregation: expr.function };
90
+ }
91
+ return null;
92
+ };
93
+ let aggInfo = null;
94
+ if (orderByExpr.type === 'aggregateExpr') {
95
+ aggInfo = extractAggInfo(orderByExpr);
96
+ }
97
+ else if (orderByExpr.type === 'ratioExpr') {
98
+ // For ratio, use the numerator aggregate
99
+ aggInfo = extractAggInfo(orderByExpr.numerator);
100
+ }
101
+ if (!aggInfo) {
102
+ return aggregates;
103
+ }
104
+ // Check if this aggregate is already in the list
105
+ const aggName = `${aggInfo.measure}_${aggInfo.aggregation}`;
106
+ const exists = aggregates.some(a => a.name === aggName);
107
+ if (exists) {
108
+ return aggregates;
109
+ }
110
+ // Add the aggregate
111
+ const newAgg = {
112
+ name: aggName,
113
+ measure: aggInfo.measure,
114
+ aggregation: aggInfo.aggregation,
115
+ label: undefined,
116
+ isPercentage: false,
117
+ };
118
+ return [...aggregates, newAgg];
119
+ }
120
+ // ---
121
+ // MAIN GENERATOR FUNCTION
122
+ // ---
123
+ /**
124
+ * Generate a query plan from a TableSpec.
125
+ *
126
+ * This produces a set of deduplicated and merged queries, each tagged with
127
+ * their position in the axis trees for result mapping.
128
+ *
129
+ * The plan undergoes three stages:
130
+ * 1. Generate raw queries for each row × column branch combination
131
+ * 2. Deduplicate queries with identical signatures
132
+ * 3. Merge queries that share the same row structure (column sibling optimization)
133
+ */
134
+ export function generateQueryPlan(spec) {
135
+ // Collect all branches from both axes
136
+ const rowBranches = spec.rowAxis ? collectBranches(spec.rowAxis) : [[]];
137
+ const colBranches = spec.colAxis ? collectBranches(spec.colAxis) : [[]];
138
+ // Generate raw queries for each row × column branch combination
139
+ const rawQueries = [];
140
+ for (const rowPath of rowBranches) {
141
+ for (const colPath of colBranches) {
142
+ const query = buildQueryFromPaths(spec, rowPath, colPath);
143
+ if (query) {
144
+ rawQueries.push(query);
145
+ }
146
+ }
147
+ }
148
+ // Deduplicate by signature
149
+ const deduped = deduplicateQueries(rawQueries);
150
+ // Merge queries that share the same row structure
151
+ // This combines queries like COLS dim1 | dim2 into a single query with multiple nests
152
+ const merged = mergeColumnVariants(deduped.queries, deduped.pathToQuery);
153
+ // Build the final query plan
154
+ return {
155
+ queries: merged.queries,
156
+ pathToQuery: merged.pathToQuery,
157
+ mergeOrder: merged.queries.map(q => q.id),
158
+ };
159
+ }
160
+ // ---
161
+ // QUERY BUILDING
162
+ // ---
163
+ /**
164
+ * Build a query specification from row and column paths.
165
+ */
166
+ function buildQueryFromPaths(spec, rowPath, colPath) {
167
+ // Extract groupings from paths
168
+ const rowGroupings = extractGroupingsFromPath(spec.rowAxis, rowPath);
169
+ const colGroupings = extractGroupingsFromPath(spec.colAxis, colPath);
170
+ // Check for totals in paths
171
+ const isRowTotal = pathHasTotal(rowPath);
172
+ const hasColTotal = pathHasTotal(colPath);
173
+ // Get total labels if present
174
+ const rowTotalLabel = isRowTotal ? getTotalLabelFromPath(spec.rowAxis, rowPath) : undefined;
175
+ const colTotalLabel = hasColTotal ? getTotalLabelFromPath(spec.colAxis, colPath) : undefined;
176
+ // Use global aggregates
177
+ const aggregates = spec.aggregates;
178
+ // Build signature for deduplication
179
+ const signature = buildQuerySignature(rowGroupings, colGroupings, aggregates, isRowTotal, hasColTotal);
180
+ return {
181
+ rowPath,
182
+ colPath,
183
+ rowGroupings,
184
+ colGroupings,
185
+ aggregates,
186
+ isRowTotal,
187
+ hasColTotal,
188
+ rowTotalLabel,
189
+ colTotalLabel,
190
+ signature,
191
+ };
192
+ }
193
+ /**
194
+ * Extract GroupingInfo array from a tree path.
195
+ */
196
+ function extractGroupingsFromPath(tree, path) {
197
+ if (!tree)
198
+ return [];
199
+ const groupings = [];
200
+ // Navigate the tree following the path
201
+ let currentNode = tree;
202
+ let pathIndex = 0;
203
+ while (currentNode && pathIndex < path.length) {
204
+ const segment = path[pathIndex];
205
+ switch (segment.type) {
206
+ case 'dimension':
207
+ if (currentNode.nodeType === 'dimension' && currentNode.name === segment.name) {
208
+ groupings.push({
209
+ dimension: currentNode.name,
210
+ label: currentNode.label,
211
+ suppressLabel: currentNode.suppressLabel,
212
+ limit: currentNode.limit,
213
+ order: currentNode.order,
214
+ acrossDimensions: currentNode.acrossDimensions,
215
+ });
216
+ currentNode = currentNode.child ?? null;
217
+ pathIndex++;
218
+ }
219
+ else {
220
+ // Path doesn't match tree - shouldn't happen
221
+ break;
222
+ }
223
+ break;
224
+ case 'sibling':
225
+ if (currentNode.nodeType === 'siblings') {
226
+ currentNode = currentNode.children[segment.index] ?? null;
227
+ pathIndex++;
228
+ }
229
+ else {
230
+ break;
231
+ }
232
+ break;
233
+ case 'total':
234
+ if (currentNode.nodeType === 'total') {
235
+ // Total doesn't add a grouping - it collapses the parent
236
+ currentNode = currentNode.child ?? null;
237
+ pathIndex++;
238
+ }
239
+ else {
240
+ break;
241
+ }
242
+ break;
243
+ case 'aggregate':
244
+ // Aggregates don't add groupings
245
+ pathIndex++;
246
+ break;
247
+ }
248
+ }
249
+ return groupings;
250
+ }
251
+ /**
252
+ * Check if a path includes a total node.
253
+ */
254
+ function pathHasTotal(path) {
255
+ return path.some(segment => segment.type === 'total');
256
+ }
257
+ /**
258
+ * Get the label from a total node in the path.
259
+ */
260
+ function getTotalLabelFromPath(tree, path) {
261
+ if (!tree)
262
+ return undefined;
263
+ // Find the total segment and navigate to get its label
264
+ let currentNode = tree;
265
+ let pathIndex = 0;
266
+ while (currentNode && pathIndex < path.length) {
267
+ const segment = path[pathIndex];
268
+ if (segment.type === 'total' && currentNode.nodeType === 'total') {
269
+ return currentNode.label;
270
+ }
271
+ switch (currentNode.nodeType) {
272
+ case 'dimension':
273
+ currentNode = currentNode.child ?? null;
274
+ pathIndex++;
275
+ break;
276
+ case 'siblings':
277
+ if (segment.type === 'sibling') {
278
+ currentNode = currentNode.children[segment.index] ?? null;
279
+ pathIndex++;
280
+ }
281
+ else {
282
+ currentNode = null;
283
+ }
284
+ break;
285
+ case 'total':
286
+ currentNode = currentNode.child ?? null;
287
+ pathIndex++;
288
+ break;
289
+ case 'aggregate':
290
+ currentNode = null;
291
+ break;
292
+ }
293
+ }
294
+ return undefined;
295
+ }
296
+ /**
297
+ * Build a signature string for query deduplication.
298
+ *
299
+ * Two queries are duplicates if they have:
300
+ * - Same row groupings (dimensions in same order with same limits)
301
+ * - Same column groupings
302
+ * - Same aggregates
303
+ * - Same total flags
304
+ */
305
+ function buildQuerySignature(rowGroupings, colGroupings, aggregates, isRowTotal, hasColTotal) {
306
+ const parts = [];
307
+ // Row groupings
308
+ const rowPart = rowGroupings.map(g => {
309
+ let s = g.dimension;
310
+ if (g.limit) {
311
+ s += `[${g.limit.direction === 'desc' ? '-' : ''}${g.limit.count}]`;
312
+ }
313
+ if (g.acrossDimensions) {
314
+ s += `<${g.acrossDimensions.join(',')}>`;
315
+ }
316
+ return s;
317
+ }).join('*');
318
+ parts.push(`R:${rowPart || 'TOTAL'}`);
319
+ // Column groupings
320
+ const colPart = colGroupings.map(g => {
321
+ let s = g.dimension;
322
+ if (g.limit) {
323
+ s += `[${g.limit.direction === 'desc' ? '-' : ''}${g.limit.count}]`;
324
+ }
325
+ return s;
326
+ }).join('*');
327
+ parts.push(`C:${colPart || (hasColTotal ? 'TOTAL' : 'NONE')}`);
328
+ // Aggregates
329
+ const aggPart = aggregates.map(a => a.name).sort().join(',');
330
+ parts.push(`A:${aggPart}`);
331
+ // Flags
332
+ parts.push(`T:${isRowTotal ? '1' : '0'}${hasColTotal ? '1' : '0'}`);
333
+ return parts.join('|');
334
+ }
335
+ /**
336
+ * Deduplicate queries by signature.
337
+ *
338
+ * Returns unique queries with IDs, and a mapping from paths to query IDs.
339
+ */
340
+ function deduplicateQueries(rawQueries) {
341
+ const signatureToQuery = new Map();
342
+ const pathToQuery = new Map();
343
+ let queryIndex = 0;
344
+ for (const raw of rawQueries) {
345
+ const pathKey = `${serializeTreePath(raw.rowPath)}::${serializeTreePath(raw.colPath)}`;
346
+ if (signatureToQuery.has(raw.signature)) {
347
+ // Duplicate - map this path to the existing query
348
+ const existing = signatureToQuery.get(raw.signature);
349
+ pathToQuery.set(pathKey, existing.id);
350
+ }
351
+ else {
352
+ // New unique query
353
+ const queryId = `q${queryIndex++}`;
354
+ const tagged = {
355
+ id: queryId,
356
+ rowPath: raw.rowPath,
357
+ colPath: raw.colPath,
358
+ rowGroupings: raw.rowGroupings,
359
+ colGroupings: raw.colGroupings,
360
+ aggregates: raw.aggregates,
361
+ isRowTotal: raw.isRowTotal,
362
+ hasColTotal: raw.hasColTotal,
363
+ rowTotalLabel: raw.rowTotalLabel,
364
+ colTotalLabel: raw.colTotalLabel,
365
+ signature: raw.signature,
366
+ };
367
+ signatureToQuery.set(raw.signature, tagged);
368
+ pathToQuery.set(pathKey, queryId);
369
+ }
370
+ }
371
+ return {
372
+ queries: Array.from(signatureToQuery.values()),
373
+ pathToQuery,
374
+ };
375
+ }
376
+ /**
377
+ * Build a row-only signature for grouping queries by row structure.
378
+ *
379
+ * Two queries can be merged if they have the same row signature.
380
+ * The row signature includes:
381
+ * - Row groupings (dimensions, limits)
382
+ * - Row total flag and label
383
+ * - Aggregates (must be identical)
384
+ */
385
+ function buildRowSignature(query) {
386
+ const parts = [];
387
+ // Row groupings
388
+ const rowPart = query.rowGroupings.map(g => {
389
+ let s = g.dimension;
390
+ if (g.limit) {
391
+ s += `[${g.limit.direction === 'desc' ? '-' : ''}${g.limit.count}]`;
392
+ }
393
+ if (g.acrossDimensions) {
394
+ s += `<${g.acrossDimensions.join(',')}>`;
395
+ }
396
+ return s;
397
+ }).join('*');
398
+ parts.push(`R:${rowPart || 'TOTAL'}`);
399
+ // Aggregates (must be same for merge)
400
+ const aggPart = query.aggregates.map(a => a.name).sort().join(',');
401
+ parts.push(`A:${aggPart}`);
402
+ // Row total flag
403
+ parts.push(`RT:${query.isRowTotal ? '1' : '0'}`);
404
+ if (query.rowTotalLabel) {
405
+ parts.push(`RTL:${query.rowTotalLabel}`);
406
+ }
407
+ return parts.join('|');
408
+ }
409
+ /**
410
+ * Check if a query can participate in merging.
411
+ *
412
+ * Some queries cannot be merged:
413
+ * - Queries with column groupings that have limits (requires restructuring)
414
+ * - Queries with percentage aggregates that need flat structure
415
+ * (merging would break cross-scope all() expressions)
416
+ */
417
+ function canMergeQuery(query) {
418
+ // Check if any column grouping has a limit
419
+ // Limits require restructured query builders that don't handle merging
420
+ for (const g of query.colGroupings) {
421
+ if (g.limit) {
422
+ return false;
423
+ }
424
+ }
425
+ // Check for percentage aggregates with specific scopes
426
+ for (const agg of query.aggregates) {
427
+ if (agg.isPercentage && agg.denominatorScope) {
428
+ const scope = agg.denominatorScope;
429
+ // 'all' scope with columns needs flat query
430
+ if (scope === 'all' && query.colGroupings.length > 0) {
431
+ return false;
432
+ }
433
+ // 'rows' or 'cols' scope needs flat query
434
+ if (scope === 'rows' || scope === 'cols') {
435
+ return false;
436
+ }
437
+ // Specific dimension scopes may need flat query
438
+ if (Array.isArray(scope)) {
439
+ return false;
440
+ }
441
+ }
442
+ }
443
+ return true;
444
+ }
445
+ /**
446
+ * Merge queries that share the same row structure into combined queries.
447
+ *
448
+ * This optimization reduces the number of database queries by combining
449
+ * queries that differ only in their column groupings. For example:
450
+ *
451
+ * Before: COLS gender | sector_label generates 2 queries
452
+ * After: Single query with 2 nests: by_gender, by_sector_label
453
+ *
454
+ * The merged query produces all the data needed for both column variants
455
+ * in a single database round-trip.
456
+ */
457
+ function mergeColumnVariants(queries, pathToQuery) {
458
+ // Separate mergeable from non-mergeable queries
459
+ const mergeable = [];
460
+ const nonMergeable = [];
461
+ for (const q of queries) {
462
+ if (canMergeQuery(q)) {
463
+ mergeable.push(q);
464
+ }
465
+ else {
466
+ nonMergeable.push(q);
467
+ }
468
+ }
469
+ // Group mergeable queries by row signature
470
+ const rowGroups = new Map();
471
+ for (const q of mergeable) {
472
+ const sig = buildRowSignature(q);
473
+ const group = rowGroups.get(sig) || [];
474
+ group.push(q);
475
+ rowGroups.set(sig, group);
476
+ }
477
+ // Build merged queries
478
+ const mergedQueries = [];
479
+ const newPathToQuery = new Map();
480
+ let queryIndex = 0;
481
+ // Process each group
482
+ for (const group of rowGroups.values()) {
483
+ if (group.length === 1) {
484
+ // Single query - no merging needed
485
+ const q = group[0];
486
+ const newId = `q${queryIndex++}`;
487
+ const newQuery = {
488
+ ...q,
489
+ id: newId,
490
+ };
491
+ mergedQueries.push(newQuery);
492
+ // Update path mapping
493
+ for (const [path, oldId] of pathToQuery.entries()) {
494
+ if (oldId === q.id) {
495
+ newPathToQuery.set(path, newId);
496
+ }
497
+ }
498
+ }
499
+ else {
500
+ // Multiple queries to merge
501
+ const merged = mergeQueryGroup(group, queryIndex++);
502
+ mergedQueries.push(merged);
503
+ // Update path mappings for all queries in the group
504
+ for (const q of group) {
505
+ for (const [path, oldId] of pathToQuery.entries()) {
506
+ if (oldId === q.id) {
507
+ newPathToQuery.set(path, merged.id);
508
+ }
509
+ }
510
+ }
511
+ }
512
+ }
513
+ // Add non-mergeable queries with new IDs
514
+ for (const q of nonMergeable) {
515
+ const newId = `q${queryIndex++}`;
516
+ const newQuery = {
517
+ ...q,
518
+ id: newId,
519
+ };
520
+ mergedQueries.push(newQuery);
521
+ // Update path mapping
522
+ for (const [path, oldId] of pathToQuery.entries()) {
523
+ if (oldId === q.id) {
524
+ newPathToQuery.set(path, newId);
525
+ }
526
+ }
527
+ }
528
+ return {
529
+ queries: mergedQueries,
530
+ pathToQuery: newPathToQuery,
531
+ };
532
+ }
533
+ /**
534
+ * Merge a group of queries with the same row structure into one.
535
+ *
536
+ * The first query becomes the primary, and others become additionalColVariants.
537
+ */
538
+ function mergeQueryGroup(group, index) {
539
+ // Use the first query as the base
540
+ const primary = group[0];
541
+ // Create column variants from all other queries
542
+ const additionalColVariants = group.slice(1).map(q => ({
543
+ colGroupings: q.colGroupings,
544
+ isTotal: q.hasColTotal,
545
+ totalLabel: q.colTotalLabel,
546
+ colPath: q.colPath,
547
+ }));
548
+ // Build a new signature that reflects the merged state
549
+ // Include column info from all variants for debugging
550
+ const colParts = [
551
+ buildColSignaturePart(primary.colGroupings, primary.hasColTotal),
552
+ ...additionalColVariants.map(v => buildColSignaturePart(v.colGroupings, v.isTotal))
553
+ ].join('+');
554
+ const mergedSignature = `${buildRowSignature(primary)}|MERGED:${colParts}`;
555
+ return {
556
+ id: `q${index}`,
557
+ rowPath: primary.rowPath,
558
+ colPath: primary.colPath, // Primary's col path
559
+ rowGroupings: primary.rowGroupings,
560
+ colGroupings: primary.colGroupings,
561
+ aggregates: primary.aggregates,
562
+ isRowTotal: primary.isRowTotal,
563
+ hasColTotal: primary.hasColTotal,
564
+ rowTotalLabel: primary.rowTotalLabel,
565
+ colTotalLabel: primary.colTotalLabel,
566
+ signature: mergedSignature,
567
+ additionalColVariants,
568
+ };
569
+ }
570
+ /**
571
+ * Build a column signature part for debugging/identification.
572
+ */
573
+ function buildColSignaturePart(colGroupings, isTotal) {
574
+ if (isTotal && colGroupings.length === 0) {
575
+ return 'ALL';
576
+ }
577
+ return colGroupings.map(g => {
578
+ let s = g.dimension;
579
+ if (g.limit) {
580
+ s += `[${g.limit.direction === 'desc' ? '-' : ''}${g.limit.count}]`;
581
+ }
582
+ return s;
583
+ }).join('*') || 'NONE';
584
+ }
585
+ // ---
586
+ // DEBUGGING
587
+ // ---
588
+ /**
589
+ * Print a query plan for debugging.
590
+ */
591
+ export function printQueryPlan(plan) {
592
+ const lines = [];
593
+ lines.push('QueryPlan:');
594
+ lines.push(` Total queries: ${plan.queries.length}`);
595
+ lines.push(` Path mappings: ${plan.pathToQuery.size}`);
596
+ lines.push('');
597
+ for (const query of plan.queries) {
598
+ lines.push(` Query ${query.id}:`);
599
+ lines.push(` signature: ${query.signature}`);
600
+ lines.push(` rowPath: ${serializeTreePath(query.rowPath)}`);
601
+ lines.push(` colPath: ${serializeTreePath(query.colPath)}`);
602
+ const rowDims = query.rowGroupings.map(g => {
603
+ let s = g.dimension;
604
+ if (g.limit)
605
+ s += `[${g.limit.direction === 'desc' ? '-' : ''}${g.limit.count}]`;
606
+ if (g.acrossDimensions)
607
+ s += ` ACROSS(${g.acrossDimensions.join(',')})`;
608
+ return s;
609
+ });
610
+ lines.push(` rowGroupings: [${rowDims.join(', ')}]`);
611
+ const colDims = query.colGroupings.map(g => g.dimension);
612
+ lines.push(` colGroupings: [${colDims.join(', ')}]`);
613
+ lines.push(` aggregates: [${query.aggregates.map(a => a.name).join(', ')}]`);
614
+ lines.push(` isRowTotal: ${query.isRowTotal}`);
615
+ lines.push(` hasColTotal: ${query.hasColTotal}`);
616
+ if (query.rowTotalLabel)
617
+ lines.push(` rowTotalLabel: "${query.rowTotalLabel}"`);
618
+ if (query.colTotalLabel)
619
+ lines.push(` colTotalLabel: "${query.colTotalLabel}"`);
620
+ lines.push('');
621
+ }
622
+ // Show dedup info
623
+ const uniqueSigs = new Set(plan.queries.map(q => q.signature));
624
+ if (plan.pathToQuery.size > uniqueSigs.size) {
625
+ lines.push(` Deduplicated: ${plan.pathToQuery.size - uniqueSigs.size} duplicate queries merged`);
626
+ }
627
+ return lines.join('\n');
628
+ }
629
+ /**
630
+ * Count how many queries would be generated without deduplication.
631
+ */
632
+ export function countRawQueries(spec) {
633
+ const rowBranches = spec.rowAxis ? collectBranches(spec.rowAxis) : [[]];
634
+ const colBranches = spec.colAxis ? collectBranches(spec.colAxis) : [[]];
635
+ return rowBranches.length * colBranches.length;
636
+ }
637
+ /**
638
+ * Generate Malloy query strings from a QueryPlan.
639
+ *
640
+ * @param plan The query plan
641
+ * @param sourceName The Malloy source name (e.g., 'names')
642
+ * @param options Optional settings including WHERE clause
643
+ * @returns Array of Malloy query specifications
644
+ */
645
+ export function generateMalloyQueries(plan, sourceName, options = {}) {
646
+ const firstAxis = options.firstAxis ?? 'row';
647
+ return plan.queries.map(query => {
648
+ const result = buildMalloyFromSpec(query, sourceName, options.where, firstAxis);
649
+ return {
650
+ id: query.id,
651
+ malloy: result.malloy,
652
+ rowGroupings: query.rowGroupings,
653
+ colGroupings: query.colGroupings,
654
+ axesInverted: result.axesInverted,
655
+ isFlatQuery: result.isFlatQuery,
656
+ };
657
+ });
658
+ }
659
+ /**
660
+ * Check if a query needs flat structure due to percentage aggregates.
661
+ *
662
+ * Flat queries are needed when:
663
+ * - We have percentage aggregates with specific dimension scopes
664
+ * - AND those dimensions span both row and column axes
665
+ *
666
+ * In nested queries, `all(agg, dim)` only works if `dim` is in the current scope.
667
+ * Flat queries put all dimensions in the same group_by, making all dims accessible.
668
+ */
669
+ function needsFlatQueryForPercentage(query) {
670
+ // Check if any aggregate has a denominatorScope referencing specific dimensions
671
+ for (const agg of query.aggregates) {
672
+ if (agg.isPercentage && agg.denominatorScope) {
673
+ const scope = agg.denominatorScope;
674
+ // 'all' scope needs flat query when we have column groupings
675
+ // because all() inside a nest only computes total within the nest scope
676
+ if (scope === 'all') {
677
+ if (query.colGroupings.length > 0) {
678
+ return true; // Need flat query for true grand total
679
+ }
680
+ continue; // No cols, nested structure is fine
681
+ }
682
+ // 'rows' and 'cols' need flat structure if we have both axes
683
+ if ((scope === 'rows' || scope === 'cols') && query.colGroupings.length > 0) {
684
+ return true;
685
+ }
686
+ // Specific dimension scope - check if it crosses axes
687
+ if (Array.isArray(scope) && query.colGroupings.length > 0) {
688
+ const rowDims = new Set(query.rowGroupings.map(g => g.dimension));
689
+ const colDims = new Set(query.colGroupings.map(g => g.dimension));
690
+ // If any scope dimension is in cols but aggregate computed at row level, need flat
691
+ for (const dim of scope) {
692
+ if (colDims.has(dim) || rowDims.has(dim)) {
693
+ return true; // Any dimension reference needs flat for cross-scope access
694
+ }
695
+ }
696
+ }
697
+ }
698
+ }
699
+ return false;
700
+ }
701
+ /**
702
+ * Build a Malloy query string from a TaggedQuerySpec.
703
+ *
704
+ * Key insight for limits:
705
+ * - Declaration order determines priority: first-declared axis gets global limits
706
+ * - Second-declared axis limits become per-parent (within the first axis's data)
707
+ * - Within-axis nesting is always hierarchical (e.g., a * b[-3] = top 3 b per a)
708
+ *
709
+ * When the first-declared axis has limits, it becomes the outer query level.
710
+ * The second-declared axis is nested inside, with its limits applied per-parent.
711
+ */
712
+ function buildMalloyFromSpec(query, sourceName, where, firstAxis) {
713
+ // Check if we need flat query structure for percentage aggregates
714
+ if (needsFlatQueryForPercentage(query)) {
715
+ return {
716
+ malloy: buildFlatQuery(query, sourceName, where),
717
+ axesInverted: false,
718
+ isFlatQuery: true,
719
+ };
720
+ }
721
+ // Find the first column grouping with a limit (no ACROSS)
722
+ const limitedColIndex = query.colGroupings.findIndex(g => {
723
+ const hasLimit = g.limit !== undefined;
724
+ const hasAcross = g.acrossDimensions && g.acrossDimensions.length > 0;
725
+ return hasLimit && !hasAcross;
726
+ });
727
+ // Find the first row grouping with a limit (no ACROSS)
728
+ const limitedRowIndex = query.rowGroupings.findIndex(g => {
729
+ const hasLimit = g.limit !== undefined;
730
+ const hasAcross = g.acrossDimensions && g.acrossDimensions.length > 0;
731
+ return hasLimit && !hasAcross;
732
+ });
733
+ // Check if first row has a limit (for global row limit handling)
734
+ const firstRowHasLimit = limitedRowIndex === 0;
735
+ const colsHaveLimit = limitedColIndex !== -1;
736
+ const rowsHaveLimit = limitedRowIndex !== -1;
737
+ // Determine restructuring based on declaration order priority:
738
+ // - First-declared axis with limits gets priority (global limits)
739
+ // - Second-declared axis limits become per-parent
740
+ // Column restructuring: make columns outer, rows nested inside
741
+ // Do this when: cols are first AND have limits, OR cols have limits and rows don't
742
+ const needsColRestructure = colsHaveLimit &&
743
+ query.rowGroupings.length > 0 &&
744
+ !query.isRowTotal &&
745
+ (firstAxis === 'col' || !firstRowHasLimit);
746
+ if (needsColRestructure) {
747
+ return {
748
+ malloy: buildRestructuredQueryForColLimit(query, sourceName, where, limitedColIndex),
749
+ axesInverted: true,
750
+ isFlatQuery: false,
751
+ };
752
+ }
753
+ // Row restructuring: when there's hierarchy in rows with limits
754
+ // This handles cases like state[-5] * gender where gender needs to be nested under state,
755
+ // OR cases like state * city[-3] where state goes to group_by and city is nested.
756
+ // Restructure whenever there are multiple row dimensions and one has a limit.
757
+ const needsRowRestructure = rowsHaveLimit && query.rowGroupings.length > 1;
758
+ if (needsRowRestructure) {
759
+ return {
760
+ malloy: buildRestructuredQueryForRowLimit(query, sourceName, where, limitedRowIndex),
761
+ axesInverted: false, // Row dims stay as "rows" for rendering
762
+ isFlatQuery: false,
763
+ };
764
+ }
765
+ // Standard query structure: rows are outer, cols are nested
766
+ // This is the default and works when:
767
+ // - Rows are first with limits (row limits are global, col limits per-row)
768
+ // - No cross-axis limit conflicts
769
+ return {
770
+ malloy: buildStandardQuery(query, sourceName, where),
771
+ axesInverted: false,
772
+ isFlatQuery: false,
773
+ };
774
+ }
775
+ /**
776
+ * Build a restructured query where column dimensions with limits are applied
777
+ * ACROSS the row dimensions (not per-row).
778
+ *
779
+ * Structure for COLS state * name[-3] ROWS year:
780
+ * ```
781
+ * group_by: state // col dims before limited one
782
+ * nest: by_name is { // limited col dim
783
+ * group_by: name
784
+ * aggregate: births_sum
785
+ * nest: by_year is { // row dims nested inside
786
+ * group_by: year
787
+ * aggregate: births_sum
788
+ * }
789
+ * order_by: births_sum desc // limit applied here
790
+ * limit: 3
791
+ * }
792
+ * ```
793
+ *
794
+ * @param limitedColIndex Index of the column grouping with the limit
795
+ */
796
+ function buildRestructuredQueryForColLimit(query, sourceName, where, limitedColIndex) {
797
+ const lines = [];
798
+ const limitedColGrouping = query.colGroupings[limitedColIndex];
799
+ // Column groupings before the limited one
800
+ const colsBefore = query.colGroupings.slice(0, limitedColIndex);
801
+ // Column groupings after the limited one (if any)
802
+ const colsAfter = query.colGroupings.slice(limitedColIndex + 1);
803
+ lines.push(`run: ${sourceName} -> {`);
804
+ if (where) {
805
+ lines.push(` where: ${where}`);
806
+ }
807
+ // Column dimensions BEFORE the limited one become outer group_by
808
+ if (colsBefore.length > 0) {
809
+ const groupByParts = [];
810
+ for (const g of colsBefore) {
811
+ const escaped = escapeFieldName(g.dimension);
812
+ if (g.label && g.label !== g.dimension) {
813
+ groupByParts.push(`\`${g.label}\` is ${escaped}`);
814
+ }
815
+ else {
816
+ groupByParts.push(escaped);
817
+ }
818
+ }
819
+ lines.push(` group_by:`);
820
+ for (const part of groupByParts) {
821
+ lines.push(` ${part}`);
822
+ }
823
+ }
824
+ // The limited column dimension becomes a nest with limit
825
+ const limitedEscaped = escapeFieldName(limitedColGrouping.dimension);
826
+ const nestName = `by_${limitedColGrouping.dimension}`;
827
+ lines.push(` nest: ${nestName} is {`);
828
+ // Group by the limited dimension
829
+ if (limitedColGrouping.label && limitedColGrouping.label !== limitedColGrouping.dimension) {
830
+ lines.push(` group_by: \`${limitedColGrouping.label}\` is ${limitedEscaped}`);
831
+ }
832
+ else {
833
+ lines.push(` group_by: ${limitedEscaped}`);
834
+ }
835
+ // Aggregate at this level for ordering
836
+ // Ensure the orderBy aggregate is included (it might not be displayed but is needed for ordering)
837
+ let aggregatesToUse = query.aggregates;
838
+ if (limitedColGrouping.limit?.orderBy) {
839
+ aggregatesToUse = ensureOrderByAggregateInList(limitedColGrouping.limit.orderBy, aggregatesToUse);
840
+ }
841
+ const aggLines = buildAggregateLines(aggregatesToUse, ' ', query.rowGroupings, query.colGroupings);
842
+ lines.push(...aggLines);
843
+ // Nest remaining col dimensions (colsAfter) first, then row dimensions inside those
844
+ // This maintains the col axis hierarchy: state > gender, with year inside
845
+ if (colsAfter.length > 0) {
846
+ const colNestLines = buildColNestForRestructured(colsAfter, query.aggregates, query.rowGroupings, ' ');
847
+ lines.push(...colNestLines);
848
+ }
849
+ else if (query.rowGroupings.length > 0) {
850
+ // No remaining col dimensions, just nest row dimensions directly
851
+ const rowNestLines = buildRowNestForRestructured(query.rowGroupings, query.aggregates, [], ' ');
852
+ lines.push(...rowNestLines);
853
+ }
854
+ // Apply limit at the limited dimension level
855
+ const orderDir = limitedColGrouping.limit.direction === 'desc' ? 'desc' : 'asc';
856
+ const orderField = buildOrderByField(limitedColGrouping.limit, limitedColGrouping.dimension);
857
+ lines.push(` order_by: ${orderField} ${orderDir}`);
858
+ lines.push(` limit: ${limitedColGrouping.limit.count}`);
859
+ lines.push(` }`);
860
+ lines.push('}');
861
+ return lines.join('\n');
862
+ }
863
+ /**
864
+ * Build a restructured query where row dimensions with limits are applied
865
+ * ACROSS the column dimensions (not per-column).
866
+ *
867
+ * Structure for ROWS state * name[-3] COLS year:
868
+ * ```
869
+ * group_by: state // row dims before limited one
870
+ * nest: by_name is { // limited row dim
871
+ * group_by: name
872
+ * aggregate: births_sum
873
+ * nest: by_year is { // col dims nested inside
874
+ * group_by: year
875
+ * aggregate: births_sum
876
+ * }
877
+ * order_by: births_sum desc // limit applied here
878
+ * limit: 3
879
+ * }
880
+ * ```
881
+ *
882
+ * @param limitedRowIndex Index of the row grouping with the limit
883
+ */
884
+ function buildRestructuredQueryForRowLimit(query, sourceName, where, limitedRowIndex) {
885
+ const lines = [];
886
+ const limitedRowGrouping = query.rowGroupings[limitedRowIndex];
887
+ // Row groupings before the limited one
888
+ const rowsBefore = query.rowGroupings.slice(0, limitedRowIndex);
889
+ // Row groupings after the limited one (if any)
890
+ const rowsAfter = query.rowGroupings.slice(limitedRowIndex + 1);
891
+ lines.push(`run: ${sourceName} -> {`);
892
+ if (where) {
893
+ lines.push(` where: ${where}`);
894
+ }
895
+ // Row dimensions BEFORE the limited one become outer group_by
896
+ if (rowsBefore.length > 0) {
897
+ const groupByParts = [];
898
+ for (const g of rowsBefore) {
899
+ const escaped = escapeFieldName(g.dimension);
900
+ if (g.label && g.label !== g.dimension) {
901
+ groupByParts.push(`\`${g.label}\` is ${escaped}`);
902
+ }
903
+ else {
904
+ groupByParts.push(escaped);
905
+ }
906
+ }
907
+ lines.push(` group_by: ${groupByParts.join(', ')}`);
908
+ }
909
+ // The limited row dimension becomes a nest with limit
910
+ const limitedEscaped = escapeFieldName(limitedRowGrouping.dimension);
911
+ const nestName = `by_${limitedRowGrouping.dimension}`;
912
+ lines.push(` nest: ${nestName} is {`);
913
+ // Group by the limited dimension
914
+ if (limitedRowGrouping.label && limitedRowGrouping.label !== limitedRowGrouping.dimension) {
915
+ lines.push(` group_by: \`${limitedRowGrouping.label}\` is ${limitedEscaped}`);
916
+ }
917
+ else {
918
+ lines.push(` group_by: ${limitedEscaped}`);
919
+ }
920
+ // Aggregate at this level for ordering
921
+ // Ensure the orderBy aggregate is included (it might not be displayed but is needed for ordering)
922
+ let aggregatesToUse = query.aggregates;
923
+ if (limitedRowGrouping.limit?.orderBy) {
924
+ aggregatesToUse = ensureOrderByAggregateInList(limitedRowGrouping.limit.orderBy, aggregatesToUse);
925
+ }
926
+ const aggLines = buildAggregateLines(aggregatesToUse, ' ', query.rowGroupings, query.colGroupings);
927
+ lines.push(...aggLines);
928
+ // Nest any row dimensions after the limited one, with column dimensions nested inside
929
+ // This creates the proper hierarchy: state > gender > year (not state > [gender, year])
930
+ if (rowsAfter.length > 0) {
931
+ const rowNestLines = buildRowNestForRestructured(rowsAfter, query.aggregates, query.colGroupings, ' ');
932
+ lines.push(...rowNestLines);
933
+ }
934
+ else if (query.colGroupings.length > 0) {
935
+ // No more row dimensions, just nest columns directly
936
+ const colNestLines = buildNestClauseWithIndent(query.colGroupings, query.aggregates, false, ' ');
937
+ lines.push(...colNestLines);
938
+ }
939
+ // Apply limit at the limited dimension level
940
+ const orderDir = limitedRowGrouping.limit.direction === 'desc' ? 'desc' : 'asc';
941
+ const orderField = buildOrderByField(limitedRowGrouping.limit, limitedRowGrouping.dimension);
942
+ lines.push(` order_by: ${orderField} ${orderDir}`);
943
+ lines.push(` limit: ${limitedRowGrouping.limit.count}`);
944
+ lines.push(` }`);
945
+ lines.push('}');
946
+ return lines.join('\n');
947
+ }
948
+ /**
949
+ * Build a nest clause for row dimensions in a restructured query.
950
+ * Column groupings are nested inside the innermost row dimension.
951
+ */
952
+ function buildRowNestForRestructured(rowGroupings, aggregates, colGroupings = [], baseIndent = ' ') {
953
+ if (rowGroupings.length === 0)
954
+ return [];
955
+ const result = [];
956
+ const firstRowGroup = rowGroupings[0];
957
+ const remainingRowGroups = rowGroupings.slice(1);
958
+ // Create nest for the first row dimension
959
+ const escaped = escapeFieldName(firstRowGroup.dimension);
960
+ const nestName = `by_${firstRowGroup.dimension}`;
961
+ result.push(`${baseIndent}nest: ${nestName} is {`);
962
+ // Group by this dimension
963
+ if (firstRowGroup.label && firstRowGroup.label !== firstRowGroup.dimension) {
964
+ result.push(`${baseIndent} group_by: \`${firstRowGroup.label}\` is ${escaped}`);
965
+ }
966
+ else {
967
+ result.push(`${baseIndent} group_by: ${escaped}`);
968
+ }
969
+ // Add aggregates
970
+ result.push(`${baseIndent} aggregate:`);
971
+ for (const agg of aggregates) {
972
+ const expr = buildAggExpression(agg.measure, agg.aggregation);
973
+ const escapedName = escapeFieldName(agg.name);
974
+ result.push(`${baseIndent} ${escapedName} is ${expr}`);
975
+ }
976
+ // Recursively nest remaining row dimensions, with columns at the innermost level
977
+ if (remainingRowGroups.length > 0) {
978
+ const innerRowLines = buildRowNestForRestructured(remainingRowGroups, aggregates, colGroupings, baseIndent + ' ');
979
+ result.push(...innerRowLines);
980
+ }
981
+ else if (colGroupings.length > 0) {
982
+ // No more row dimensions - nest columns here
983
+ const colNestLines = buildNestClauseWithIndent(colGroupings, aggregates, false, baseIndent + ' ');
984
+ result.push(...colNestLines);
985
+ }
986
+ // Apply limit if this row grouping has one
987
+ if (firstRowGroup.limit) {
988
+ const orderDir = firstRowGroup.limit.direction === 'desc' ? 'desc' : 'asc';
989
+ const orderField = buildOrderByField(firstRowGroup.limit, firstRowGroup.dimension);
990
+ result.push(`${baseIndent} order_by: ${orderField} ${orderDir}`);
991
+ result.push(`${baseIndent} limit: ${firstRowGroup.limit.count}`);
992
+ }
993
+ else if (firstRowGroup.order?.direction) {
994
+ // Explicit order without limit
995
+ const orderField = buildOrderByFieldFromOrder(firstRowGroup.order, firstRowGroup.dimension);
996
+ result.push(`${baseIndent} order_by: ${orderField} ${firstRowGroup.order.direction}`);
997
+ }
998
+ result.push(`${baseIndent}}`);
999
+ return result;
1000
+ }
1001
+ /**
1002
+ * Build a nest clause for column dimensions in a restructured query.
1003
+ * Row groupings are nested inside the innermost column dimension.
1004
+ * This is the mirror of buildRowNestForRestructured for col-first queries.
1005
+ */
1006
+ function buildColNestForRestructured(colGroupings, aggregates, rowGroupings = [], baseIndent = ' ') {
1007
+ if (colGroupings.length === 0)
1008
+ return [];
1009
+ const result = [];
1010
+ const firstColGroup = colGroupings[0];
1011
+ const remainingColGroups = colGroupings.slice(1);
1012
+ // Create nest for the first column dimension
1013
+ const escaped = escapeFieldName(firstColGroup.dimension);
1014
+ const nestName = `by_${firstColGroup.dimension}`;
1015
+ result.push(`${baseIndent}nest: ${nestName} is {`);
1016
+ // Group by this dimension
1017
+ if (firstColGroup.label && firstColGroup.label !== firstColGroup.dimension) {
1018
+ result.push(`${baseIndent} group_by: \`${firstColGroup.label}\` is ${escaped}`);
1019
+ }
1020
+ else {
1021
+ result.push(`${baseIndent} group_by: ${escaped}`);
1022
+ }
1023
+ // Add aggregates
1024
+ result.push(`${baseIndent} aggregate:`);
1025
+ for (const agg of aggregates) {
1026
+ const expr = buildAggExpression(agg.measure, agg.aggregation);
1027
+ const escapedName = escapeFieldName(agg.name);
1028
+ result.push(`${baseIndent} ${escapedName} is ${expr}`);
1029
+ }
1030
+ // Recursively nest remaining column dimensions, with rows at the innermost level
1031
+ if (remainingColGroups.length > 0) {
1032
+ const innerColLines = buildColNestForRestructured(remainingColGroups, aggregates, rowGroupings, baseIndent + ' ');
1033
+ result.push(...innerColLines);
1034
+ }
1035
+ else if (rowGroupings.length > 0) {
1036
+ // No more column dimensions - nest rows here
1037
+ const rowNestLines = buildRowNestForRestructured(rowGroupings, aggregates, [], baseIndent + ' ');
1038
+ result.push(...rowNestLines);
1039
+ }
1040
+ // Apply limit if this column grouping has one
1041
+ if (firstColGroup.limit) {
1042
+ const orderDir = firstColGroup.limit.direction === 'desc' ? 'desc' : 'asc';
1043
+ const orderField = buildOrderByField(firstColGroup.limit, firstColGroup.dimension);
1044
+ result.push(`${baseIndent} order_by: ${orderField} ${orderDir}`);
1045
+ result.push(`${baseIndent} limit: ${firstColGroup.limit.count}`);
1046
+ }
1047
+ else if (firstColGroup.order?.direction) {
1048
+ // Explicit order without limit
1049
+ const orderField = buildOrderByFieldFromOrder(firstColGroup.order, firstColGroup.dimension);
1050
+ result.push(`${baseIndent} order_by: ${orderField} ${firstColGroup.order.direction}`);
1051
+ }
1052
+ result.push(`${baseIndent}}`);
1053
+ return result;
1054
+ }
1055
+ /**
1056
+ * Build a nest clause with custom indentation.
1057
+ */
1058
+ function buildNestClauseWithIndent(colGroupings, aggregates, skipLimits, baseIndent) {
1059
+ if (colGroupings.length === 0)
1060
+ return [];
1061
+ // Build nested structure for column groupings
1062
+ function buildLevel(groupings, level) {
1063
+ if (groupings.length === 0)
1064
+ return [];
1065
+ const currentGroup = groupings[0];
1066
+ const remaining = groupings.slice(1);
1067
+ const indent = baseIndent + ' '.repeat(level);
1068
+ const result = [];
1069
+ const nestName = `by_${currentGroup.dimension}`;
1070
+ result.push(`${indent}nest: ${nestName} is {`);
1071
+ const escaped = escapeFieldName(currentGroup.dimension);
1072
+ if (currentGroup.label && currentGroup.label !== currentGroup.dimension) {
1073
+ result.push(`${indent} group_by: \`${currentGroup.label}\` is ${escaped}`);
1074
+ }
1075
+ else {
1076
+ result.push(`${indent} group_by: ${escaped}`);
1077
+ }
1078
+ // Add aggregates
1079
+ result.push(`${indent} aggregate:`);
1080
+ for (const agg of aggregates) {
1081
+ const expr = buildAggExpression(agg.measure, agg.aggregation);
1082
+ const escapedName = escapeFieldName(agg.name);
1083
+ result.push(`${indent} ${escapedName} is ${expr}`);
1084
+ }
1085
+ // Recurse for remaining groupings
1086
+ if (remaining.length > 0) {
1087
+ result.push(...buildLevel(remaining, level + 1));
1088
+ }
1089
+ // Apply limit if present and not skipping
1090
+ if (!skipLimits && currentGroup.limit) {
1091
+ const orderDir = currentGroup.limit.direction === 'desc' ? 'desc' : 'asc';
1092
+ const orderField = buildOrderByField(currentGroup.limit, currentGroup.dimension);
1093
+ result.push(`${indent} order_by: ${orderField} ${orderDir}`);
1094
+ result.push(`${indent} limit: ${currentGroup.limit.count}`);
1095
+ }
1096
+ else if (currentGroup.order?.direction) {
1097
+ // Explicit order without limit
1098
+ const orderField = buildOrderByFieldFromOrder(currentGroup.order, currentGroup.dimension);
1099
+ result.push(`${indent} order_by: ${orderField} ${currentGroup.order.direction}`);
1100
+ }
1101
+ result.push(`${indent}}`);
1102
+ return result;
1103
+ }
1104
+ return buildLevel(colGroupings, 0);
1105
+ }
1106
+ /**
1107
+ * Build a standard (non-restructured) query.
1108
+ *
1109
+ * For merged queries with additionalColVariants, this generates multiple nests
1110
+ * and/or outer aggregates in a single query:
1111
+ *
1112
+ * COLS gender | sector_label → two nests: by_gender, by_sector_label
1113
+ * COLS education | ALL → one nest (by_education) + outer aggregate
1114
+ */
1115
+ function buildStandardQuery(query, sourceName, where) {
1116
+ const lines = [];
1117
+ // Start with run statement
1118
+ lines.push(`run: ${sourceName} -> {`);
1119
+ // Add WHERE clause if present (distributed to ALL queries)
1120
+ if (where) {
1121
+ lines.push(` where: ${where}`);
1122
+ }
1123
+ // Build group_by clause from row groupings (non-total dimensions)
1124
+ const groupByDims = [];
1125
+ for (const g of query.rowGroupings) {
1126
+ const escaped = escapeFieldName(g.dimension);
1127
+ // Handle labels
1128
+ if (g.label && g.label !== g.dimension) {
1129
+ groupByDims.push(`\`${g.label}\` is ${escaped}`);
1130
+ }
1131
+ else {
1132
+ groupByDims.push(escaped);
1133
+ }
1134
+ }
1135
+ if (groupByDims.length > 0) {
1136
+ lines.push(` group_by: ${groupByDims.join(', ')}`);
1137
+ }
1138
+ // Check if we need outer aggregate for ordering (row-level limits/orders when columns exist)
1139
+ const firstRowDimWithLimit = query.rowGroupings.find(g => g.limit);
1140
+ const firstRowDimWithOrder = query.rowGroupings.find(g => g.order?.orderBy);
1141
+ // Collect all column variants for merged query handling
1142
+ const allColVariants = collectAllColVariants(query);
1143
+ // Determine if any variant has column groupings (nesting)
1144
+ const hasAnyColGroupings = allColVariants.some(v => v.colGroupings.length > 0);
1145
+ const needsOuterAggregate = (firstRowDimWithLimit || firstRowDimWithOrder) && hasAnyColGroupings;
1146
+ // Check if we need outer aggregate for a total variant (col groupings = 0, isTotal = true)
1147
+ const hasTotalVariant = allColVariants.some(v => v.colGroupings.length === 0 && v.isTotal);
1148
+ // Ensure orderBy aggregate is included if needed
1149
+ let aggregatesToUse = query.aggregates;
1150
+ if (firstRowDimWithLimit?.limit?.orderBy) {
1151
+ aggregatesToUse = ensureOrderByAggregateInList(firstRowDimWithLimit.limit.orderBy, aggregatesToUse);
1152
+ }
1153
+ else if (firstRowDimWithOrder?.order?.orderBy) {
1154
+ aggregatesToUse = ensureOrderByAggregateInList(firstRowDimWithOrder.order.orderBy, aggregatesToUse);
1155
+ }
1156
+ // Build aggregate clause at outer level if:
1157
+ // 1. No column groupings at all (row-only query)
1158
+ // 2. Need outer aggregate for ordering
1159
+ // 3. Have a total variant (ALL in COLS)
1160
+ if (!hasAnyColGroupings || needsOuterAggregate || hasTotalVariant) {
1161
+ const aggLines = buildAggregateLines(aggregatesToUse, ' ', query.rowGroupings, query.colGroupings);
1162
+ lines.push(...aggLines);
1163
+ }
1164
+ // Build nest clauses for all column variants that have groupings
1165
+ for (const variant of allColVariants) {
1166
+ if (variant.colGroupings.length > 0) {
1167
+ const nestLines = buildNestClause(variant.colGroupings, query.aggregates, false, query.rowGroupings);
1168
+ lines.push(...nestLines);
1169
+ }
1170
+ }
1171
+ // Add limit/order for row dimensions
1172
+ if (firstRowDimWithLimit) {
1173
+ const g = firstRowDimWithLimit;
1174
+ const orderDir = g.limit.direction === 'desc' ? 'desc' : 'asc';
1175
+ const orderField = buildOrderByField(g.limit, g.dimension);
1176
+ lines.push(` order_by: ${orderField} ${orderDir}`);
1177
+ lines.push(` limit: ${g.limit.count}`);
1178
+ }
1179
+ else if (groupByDims.length > 0) {
1180
+ // Check if any row dimension has explicit order without limit
1181
+ const firstDimWithOrder = query.rowGroupings.find(g => g.order?.direction);
1182
+ if (firstDimWithOrder && firstDimWithOrder.order) {
1183
+ const orderField = buildOrderByFieldFromOrder(firstDimWithOrder.order, firstDimWithOrder.dimension);
1184
+ lines.push(` order_by: ${orderField} ${firstDimWithOrder.order.direction}`);
1185
+ }
1186
+ // No explicit limit - add a high default limit to avoid Malloy's default 10-row limit.
1187
+ // This ensures all row dimension combinations are returned.
1188
+ // We use a very high limit rather than no limit to be explicit.
1189
+ lines.push(` limit: 100000`);
1190
+ }
1191
+ lines.push('}');
1192
+ return lines.join('\n');
1193
+ }
1194
+ /**
1195
+ * Collect all column variants for a query (primary + additional).
1196
+ *
1197
+ * Returns a normalized array of column variants for consistent handling
1198
+ * of both simple and merged queries.
1199
+ */
1200
+ function collectAllColVariants(query) {
1201
+ // Start with the primary column variant
1202
+ const primary = {
1203
+ colGroupings: query.colGroupings,
1204
+ isTotal: query.hasColTotal,
1205
+ totalLabel: query.colTotalLabel,
1206
+ colPath: query.colPath,
1207
+ };
1208
+ const variants = [primary];
1209
+ // Add additional variants if this is a merged query
1210
+ if (query.additionalColVariants) {
1211
+ variants.push(...query.additionalColVariants);
1212
+ }
1213
+ return variants;
1214
+ }
1215
+ /**
1216
+ * Build a flat query with all dimensions in a single group_by.
1217
+ *
1218
+ * This is used when percentage aggregates need cross-scope access to dimensions.
1219
+ * Flat queries allow all dimensions to be referenced in all() expressions.
1220
+ *
1221
+ * The grid spec builder can still render this as a proper crosstab by knowing
1222
+ * which dimensions are rows vs columns from the query metadata.
1223
+ */
1224
+ function buildFlatQuery(query, sourceName, where) {
1225
+ const lines = [];
1226
+ lines.push(`run: ${sourceName} -> {`);
1227
+ if (where) {
1228
+ lines.push(` where: ${where}`);
1229
+ }
1230
+ // Put ALL dimensions in a single group_by
1231
+ const allDims = [];
1232
+ for (const g of query.rowGroupings) {
1233
+ const escaped = escapeFieldName(g.dimension);
1234
+ if (g.label && g.label !== g.dimension) {
1235
+ allDims.push(`\`${g.label}\` is ${escaped}`);
1236
+ }
1237
+ else {
1238
+ allDims.push(escaped);
1239
+ }
1240
+ }
1241
+ for (const g of query.colGroupings) {
1242
+ const escaped = escapeFieldName(g.dimension);
1243
+ if (g.label && g.label !== g.dimension) {
1244
+ allDims.push(`\`${g.label}\` is ${escaped}`);
1245
+ }
1246
+ else {
1247
+ allDims.push(escaped);
1248
+ }
1249
+ }
1250
+ if (allDims.length > 0) {
1251
+ lines.push(` group_by: ${allDims.join(', ')}`);
1252
+ }
1253
+ // Apply first limit from either axis (simplified - full limit handling would be more complex)
1254
+ const firstRowLimit = query.rowGroupings.find(g => g.limit);
1255
+ const firstColLimit = query.colGroupings.find(g => g.limit);
1256
+ const primaryLimit = firstRowLimit ?? firstColLimit;
1257
+ // Build aggregates with all dimensions in scope
1258
+ // Ensure the orderBy aggregate is included (it might not be displayed but is needed for ordering)
1259
+ let aggregatesToUse = query.aggregates;
1260
+ if (primaryLimit?.limit?.orderBy) {
1261
+ aggregatesToUse = ensureOrderByAggregateInList(primaryLimit.limit.orderBy, aggregatesToUse);
1262
+ }
1263
+ const aggLines = buildAggregateLines(aggregatesToUse, ' ', query.rowGroupings, query.colGroupings);
1264
+ lines.push(...aggLines);
1265
+ if (primaryLimit?.limit) {
1266
+ const orderDir = primaryLimit.limit.direction === 'desc' ? 'desc' : 'asc';
1267
+ const orderField = buildOrderByField(primaryLimit.limit, primaryLimit.dimension);
1268
+ lines.push(` order_by: ${orderField} ${orderDir}`);
1269
+ lines.push(` limit: ${primaryLimit.limit.count * 100}`); // Higher limit for flat queries
1270
+ }
1271
+ else {
1272
+ lines.push(` limit: 100000`);
1273
+ }
1274
+ lines.push('}');
1275
+ return lines.join('\n');
1276
+ }
1277
+ /**
1278
+ * Build a map from dimension name to output name (label if aliased, else dimension name).
1279
+ * This is needed for all() expressions which must reference the output column names.
1280
+ */
1281
+ function buildDimToOutputNameMap(rowGroupings = [], colGroupings = []) {
1282
+ const map = new Map();
1283
+ for (const g of rowGroupings) {
1284
+ // If there's a label and it's different from dimension, use the label as output name
1285
+ map.set(g.dimension, g.label && g.label !== g.dimension ? g.label : g.dimension);
1286
+ }
1287
+ for (const g of colGroupings) {
1288
+ map.set(g.dimension, g.label && g.label !== g.dimension ? g.label : g.dimension);
1289
+ }
1290
+ return map;
1291
+ }
1292
+ /**
1293
+ * Build aggregate lines for Malloy.
1294
+ *
1295
+ * @param aggregates The aggregates to build
1296
+ * @param indent The indentation prefix
1297
+ * @param rowGroupings Row groupings (for percentage ACROSS COLS and label mapping)
1298
+ * @param colGroupings Column groupings (for percentage ACROSS ROWS and label mapping)
1299
+ */
1300
+ function buildAggregateLines(aggregates, indent, rowGroupings = [], colGroupings = []) {
1301
+ if (aggregates.length === 0)
1302
+ return [];
1303
+ const rowDimensions = rowGroupings.map(g => g.dimension);
1304
+ const colDimensions = colGroupings.map(g => g.dimension);
1305
+ const dimToOutputName = buildDimToOutputNameMap(rowGroupings, colGroupings);
1306
+ const lines = [];
1307
+ lines.push(`${indent}aggregate:`);
1308
+ for (const agg of aggregates) {
1309
+ let expr;
1310
+ if (agg.isPercentage && agg.denominatorScope) {
1311
+ // Build percentage expression with all() for denominator
1312
+ expr = buildPercentageAggExpression(agg.measure, agg.aggregation, agg.denominatorScope, rowDimensions, colDimensions, dimToOutputName);
1313
+ }
1314
+ else {
1315
+ expr = buildAggExpression(agg.measure, agg.aggregation);
1316
+ }
1317
+ const escapedName = escapeFieldName(agg.name);
1318
+ lines.push(`${indent} ${escapedName} is ${expr}`);
1319
+ }
1320
+ return lines;
1321
+ }
1322
+ /**
1323
+ * Build the nest clause for column pivots.
1324
+ *
1325
+ * @param colGroupings Column groupings to nest
1326
+ * @param aggregates Aggregates to compute
1327
+ * @param skipLimits If true, don't apply limits (used for total queries)
1328
+ * @param rowGroupings Row groupings (for percentage calculations and label mapping)
1329
+ */
1330
+ function buildNestClause(colGroupings, aggregates, skipLimits = false, rowGroupings = []) {
1331
+ if (colGroupings.length === 0)
1332
+ return [];
1333
+ const indent = ' ';
1334
+ const rowDimensions = rowGroupings.map(g => g.dimension);
1335
+ const colDimensions = colGroupings.map(g => g.dimension);
1336
+ const dimToOutputName = buildDimToOutputNameMap(rowGroupings, colGroupings);
1337
+ // Helper to build aggregate expression (handles percentages)
1338
+ function buildAggExpr(agg) {
1339
+ if (agg.isPercentage && agg.denominatorScope) {
1340
+ return buildPercentageAggExpression(agg.measure, agg.aggregation, agg.denominatorScope, rowDimensions, colDimensions, dimToOutputName);
1341
+ }
1342
+ return buildAggExpression(agg.measure, agg.aggregation);
1343
+ }
1344
+ // Helper to extract aggregate from orderBy expression and add it to aggregates if not already present
1345
+ function ensureOrderByAggregate(orderByExpr, aggregatesToUse) {
1346
+ if (!orderByExpr || typeof orderByExpr === 'string') {
1347
+ return aggregatesToUse;
1348
+ }
1349
+ const extractAggInfo = (expr) => {
1350
+ if (expr.type === 'aggregateExpr') {
1351
+ return { measure: expr.field, aggregation: expr.function };
1352
+ }
1353
+ return null;
1354
+ };
1355
+ let aggInfo = null;
1356
+ if (orderByExpr.type === 'aggregateExpr') {
1357
+ aggInfo = extractAggInfo(orderByExpr);
1358
+ }
1359
+ else if (orderByExpr.type === 'ratioExpr') {
1360
+ // For ratio, use the numerator aggregate
1361
+ aggInfo = extractAggInfo(orderByExpr.numerator);
1362
+ }
1363
+ if (!aggInfo) {
1364
+ return aggregatesToUse;
1365
+ }
1366
+ // Check if this aggregate is already in the list
1367
+ const aggName = `${aggInfo.measure}_${aggInfo.aggregation}`;
1368
+ const exists = aggregatesToUse.some(a => a.name === aggName);
1369
+ if (exists) {
1370
+ return aggregatesToUse;
1371
+ }
1372
+ // Add the aggregate
1373
+ const newAgg = {
1374
+ name: aggName,
1375
+ measure: aggInfo.measure,
1376
+ aggregation: aggInfo.aggregation,
1377
+ label: undefined,
1378
+ isPercentage: false,
1379
+ };
1380
+ return [...aggregatesToUse, newAgg];
1381
+ }
1382
+ // Recursively build nested structure
1383
+ // Aggregates are ALWAYS needed at the leaf level for cell values.
1384
+ // They're also needed at levels with limits for order_by to work.
1385
+ function buildLevel(groupings, depth) {
1386
+ if (groupings.length === 0) {
1387
+ // Leaf level - always add aggregates here for cell values
1388
+ const result = [];
1389
+ result.push(`${indent.repeat(depth)}aggregate:`);
1390
+ for (const agg of aggregates) {
1391
+ const expr = buildAggExpr(agg);
1392
+ const escapedName = escapeFieldName(agg.name);
1393
+ result.push(`${indent.repeat(depth + 1)}${escapedName} is ${expr}`);
1394
+ }
1395
+ return result;
1396
+ }
1397
+ const g = groupings[0];
1398
+ const escaped = escapeFieldName(g.dimension);
1399
+ const nestName = `by_${g.dimension}`;
1400
+ const remaining = groupings.slice(1);
1401
+ const isLeaf = remaining.length === 0;
1402
+ // Check if we should apply limit at this level
1403
+ const applyLimit = g.limit && !skipLimits;
1404
+ // Check if order needs aggregates (orderBy is an expression, not just a field name)
1405
+ const orderNeedsAggregate = g.order?.orderBy && typeof g.order.orderBy !== 'string';
1406
+ // Ensure orderBy aggregate is in the list (for both limit and order)
1407
+ let aggregatesToUse = aggregates;
1408
+ if (g.limit?.orderBy) {
1409
+ aggregatesToUse = ensureOrderByAggregate(g.limit.orderBy, aggregatesToUse);
1410
+ }
1411
+ else if (g.order?.orderBy) {
1412
+ aggregatesToUse = ensureOrderByAggregate(g.order.orderBy, aggregatesToUse);
1413
+ }
1414
+ const result = [];
1415
+ result.push(`${indent.repeat(depth)}nest: ${nestName} is {`);
1416
+ // Group by with label if present
1417
+ if (g.label && g.label !== g.dimension) {
1418
+ result.push(`${indent.repeat(depth + 1)}group_by: \`${g.label}\` is ${escaped}`);
1419
+ }
1420
+ else {
1421
+ result.push(`${indent.repeat(depth + 1)}group_by: ${escaped}`);
1422
+ }
1423
+ // Add aggregates if:
1424
+ // 1. There's a limit at this level (needed for order_by), OR
1425
+ // 2. There's an order with aggregate expression (needed for order_by), OR
1426
+ // 3. This is the leaf level (needed for cell values)
1427
+ if (applyLimit || orderNeedsAggregate || isLeaf) {
1428
+ result.push(`${indent.repeat(depth + 1)}aggregate:`);
1429
+ for (const agg of aggregatesToUse) {
1430
+ const expr = buildAggExpr(agg);
1431
+ const escapedName = escapeFieldName(agg.name);
1432
+ result.push(`${indent.repeat(depth + 2)}${escapedName} is ${expr}`);
1433
+ }
1434
+ }
1435
+ // If there's a limit and we're not skipping limits, add order_by and limit
1436
+ if (applyLimit) {
1437
+ const orderDir = g.limit.direction === 'desc' ? 'desc' : 'asc';
1438
+ const orderField = buildOrderByField(g.limit, g.dimension);
1439
+ result.push(`${indent.repeat(depth + 1)}order_by: ${orderField} ${orderDir}`);
1440
+ result.push(`${indent.repeat(depth + 1)}limit: ${g.limit.count}`);
1441
+ }
1442
+ else if (g.order?.direction) {
1443
+ // Explicit order without limit - add order_by only
1444
+ const orderField = buildOrderByFieldFromOrder(g.order, g.dimension);
1445
+ result.push(`${indent.repeat(depth + 1)}order_by: ${orderField} ${g.order.direction}`);
1446
+ }
1447
+ // Recurse for next level (skip if leaf - we already added aggregates)
1448
+ if (!isLeaf) {
1449
+ const innerLines = buildLevel(remaining, depth + 1);
1450
+ result.push(...innerLines);
1451
+ }
1452
+ result.push(`${indent.repeat(depth)}}`);
1453
+ return result;
1454
+ }
1455
+ return buildLevel(colGroupings, 1);
1456
+ }