coalesce-transform-mcp 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +304 -0
  3. package/dist/cache-dir.d.ts +26 -0
  4. package/dist/cache-dir.js +106 -0
  5. package/dist/client.d.ts +25 -0
  6. package/dist/client.js +212 -0
  7. package/dist/coalesce/api/environments.d.ts +20 -0
  8. package/dist/coalesce/api/environments.js +15 -0
  9. package/dist/coalesce/api/git-accounts.d.ts +21 -0
  10. package/dist/coalesce/api/git-accounts.js +21 -0
  11. package/dist/coalesce/api/jobs.d.ts +25 -0
  12. package/dist/coalesce/api/jobs.js +21 -0
  13. package/dist/coalesce/api/nodes.d.ts +29 -0
  14. package/dist/coalesce/api/nodes.js +33 -0
  15. package/dist/coalesce/api/projects.d.ts +22 -0
  16. package/dist/coalesce/api/projects.js +25 -0
  17. package/dist/coalesce/api/runs.d.ts +19 -0
  18. package/dist/coalesce/api/runs.js +34 -0
  19. package/dist/coalesce/api/subgraphs.d.ts +20 -0
  20. package/dist/coalesce/api/subgraphs.js +17 -0
  21. package/dist/coalesce/api/users.d.ts +30 -0
  22. package/dist/coalesce/api/users.js +31 -0
  23. package/dist/coalesce/types.d.ts +298 -0
  24. package/dist/coalesce/types.js +746 -0
  25. package/dist/generated/.gitkeep +0 -0
  26. package/dist/generated/node-type-corpus.json +42656 -0
  27. package/dist/index.d.ts +2 -0
  28. package/dist/index.js +10 -0
  29. package/dist/mcp/cache.d.ts +3 -0
  30. package/dist/mcp/cache.js +137 -0
  31. package/dist/mcp/environments.d.ts +3 -0
  32. package/dist/mcp/environments.js +61 -0
  33. package/dist/mcp/git-accounts.d.ts +3 -0
  34. package/dist/mcp/git-accounts.js +70 -0
  35. package/dist/mcp/jobs.d.ts +3 -0
  36. package/dist/mcp/jobs.js +77 -0
  37. package/dist/mcp/node-type-corpus.d.ts +3 -0
  38. package/dist/mcp/node-type-corpus.js +173 -0
  39. package/dist/mcp/nodes.d.ts +3 -0
  40. package/dist/mcp/nodes.js +341 -0
  41. package/dist/mcp/pipelines.d.ts +3 -0
  42. package/dist/mcp/pipelines.js +342 -0
  43. package/dist/mcp/projects.d.ts +3 -0
  44. package/dist/mcp/projects.js +70 -0
  45. package/dist/mcp/repo-node-types.d.ts +135 -0
  46. package/dist/mcp/repo-node-types.js +387 -0
  47. package/dist/mcp/runs.d.ts +3 -0
  48. package/dist/mcp/runs.js +92 -0
  49. package/dist/mcp/subgraphs.d.ts +3 -0
  50. package/dist/mcp/subgraphs.js +60 -0
  51. package/dist/mcp/users.d.ts +3 -0
  52. package/dist/mcp/users.js +107 -0
  53. package/dist/prompts/index.d.ts +2 -0
  54. package/dist/prompts/index.js +58 -0
  55. package/dist/resources/context/aggregation-patterns.md +145 -0
  56. package/dist/resources/context/data-engineering-principles.md +183 -0
  57. package/dist/resources/context/hydrated-metadata.md +92 -0
  58. package/dist/resources/context/id-discovery.md +64 -0
  59. package/dist/resources/context/intelligent-node-configuration.md +162 -0
  60. package/dist/resources/context/node-creation-decision-tree.md +156 -0
  61. package/dist/resources/context/node-operations.md +316 -0
  62. package/dist/resources/context/node-payloads.md +114 -0
  63. package/dist/resources/context/node-type-corpus.md +166 -0
  64. package/dist/resources/context/node-type-selection-guide.md +96 -0
  65. package/dist/resources/context/overview.md +135 -0
  66. package/dist/resources/context/pipeline-workflows.md +355 -0
  67. package/dist/resources/context/run-operations.md +55 -0
  68. package/dist/resources/context/sql-bigquery.md +41 -0
  69. package/dist/resources/context/sql-databricks.md +40 -0
  70. package/dist/resources/context/sql-platform-selection.md +70 -0
  71. package/dist/resources/context/sql-snowflake.md +43 -0
  72. package/dist/resources/context/storage-mappings.md +49 -0
  73. package/dist/resources/context/tool-usage.md +98 -0
  74. package/dist/resources/index.d.ts +5 -0
  75. package/dist/resources/index.js +254 -0
  76. package/dist/schemas/node-payloads.d.ts +5019 -0
  77. package/dist/schemas/node-payloads.js +147 -0
  78. package/dist/server.d.ts +7 -0
  79. package/dist/server.js +63 -0
  80. package/dist/services/cache/snapshots.d.ts +108 -0
  81. package/dist/services/cache/snapshots.js +275 -0
  82. package/dist/services/config/context-analyzer.d.ts +14 -0
  83. package/dist/services/config/context-analyzer.js +76 -0
  84. package/dist/services/config/field-classifier.d.ts +23 -0
  85. package/dist/services/config/field-classifier.js +47 -0
  86. package/dist/services/config/intelligent.d.ts +55 -0
  87. package/dist/services/config/intelligent.js +306 -0
  88. package/dist/services/config/rules.d.ts +6 -0
  89. package/dist/services/config/rules.js +44 -0
  90. package/dist/services/config/schema-resolver.d.ts +18 -0
  91. package/dist/services/config/schema-resolver.js +80 -0
  92. package/dist/services/corpus/loader.d.ts +56 -0
  93. package/dist/services/corpus/loader.js +25 -0
  94. package/dist/services/corpus/search.d.ts +49 -0
  95. package/dist/services/corpus/search.js +69 -0
  96. package/dist/services/corpus/templates.d.ts +4 -0
  97. package/dist/services/corpus/templates.js +11 -0
  98. package/dist/services/pipelines/execution.d.ts +20 -0
  99. package/dist/services/pipelines/execution.js +290 -0
  100. package/dist/services/pipelines/node-type-intent.d.ts +96 -0
  101. package/dist/services/pipelines/node-type-intent.js +356 -0
  102. package/dist/services/pipelines/node-type-selection.d.ts +66 -0
  103. package/dist/services/pipelines/node-type-selection.js +758 -0
  104. package/dist/services/pipelines/planning.d.ts +543 -0
  105. package/dist/services/pipelines/planning.js +1839 -0
  106. package/dist/services/policies/sql-override.d.ts +7 -0
  107. package/dist/services/policies/sql-override.js +109 -0
  108. package/dist/services/repo/operations.d.ts +6 -0
  109. package/dist/services/repo/operations.js +10 -0
  110. package/dist/services/repo/parser.d.ts +70 -0
  111. package/dist/services/repo/parser.js +365 -0
  112. package/dist/services/repo/path.d.ts +2 -0
  113. package/dist/services/repo/path.js +58 -0
  114. package/dist/services/templates/nodes.d.ts +50 -0
  115. package/dist/services/templates/nodes.js +336 -0
  116. package/dist/services/workspace/analysis.d.ts +56 -0
  117. package/dist/services/workspace/analysis.js +151 -0
  118. package/dist/services/workspace/mutations.d.ts +150 -0
  119. package/dist/services/workspace/mutations.js +1718 -0
  120. package/dist/utils.d.ts +5 -0
  121. package/dist/utils.js +7 -0
  122. package/dist/workflows/get-environment-overview.d.ts +9 -0
  123. package/dist/workflows/get-environment-overview.js +23 -0
  124. package/dist/workflows/get-run-details.d.ts +10 -0
  125. package/dist/workflows/get-run-details.js +28 -0
  126. package/dist/workflows/progress.d.ts +20 -0
  127. package/dist/workflows/progress.js +54 -0
  128. package/dist/workflows/retry-and-wait.d.ts +13 -0
  129. package/dist/workflows/retry-and-wait.js +139 -0
  130. package/dist/workflows/run-and-wait.d.ts +13 -0
  131. package/dist/workflows/run-and-wait.js +141 -0
  132. package/dist/workflows/run-status.d.ts +10 -0
  133. package/dist/workflows/run-status.js +27 -0
  134. package/package.json +34 -0
@@ -0,0 +1,1718 @@
1
+ import { randomUUID } from "node:crypto";
2
+ import { getWorkspaceNode, setWorkspaceNode, createWorkspaceNode, } from "../../coalesce/api/nodes.js";
3
+ import { fetchAllWorkspaceNodes } from "../cache/snapshots.js";
4
+ import { assertNoSqlOverridePayload } from "../policies/sql-override.js";
5
+ import { completeNodeConfiguration } from "../config/intelligent.js";
6
+ import { isPlainObject } from "../../utils.js";
7
+ import { selectPipelineNodeType } from "../pipelines/node-type-selection.js";
8
+ import { detectSpecializedPatternPenalty } from "../pipelines/node-type-intent.js";
9
+ /**
10
+ * Validates the requested nodeType against all available types from repo + workspace.
11
+ * Returns ranking info so the agent (and user) can see all considered options.
12
+ * Throws if the requested type is excluded (e.g., inputMode: 'sql').
13
+ * Returns null on non-critical validation failures (creation proceeds).
14
+ */
15
+ async function validateNodeTypeChoice(client, params) {
16
+ try {
17
+ const nodeTypesResult = await listWorkspaceNodeTypes(client, {
18
+ workspaceID: params.workspaceID,
19
+ });
20
+ const selectionResult = selectPipelineNodeType({
21
+ explicitNodeType: params.nodeType,
22
+ sourceCount: params.predecessorCount,
23
+ workspaceNodeTypes: nodeTypesResult.nodeTypes,
24
+ workspaceNodeTypeCounts: nodeTypesResult.counts,
25
+ repoPath: params.repoPath,
26
+ goal: params.goal,
27
+ });
28
+ // Hard block: if the requested type was excluded (e.g., inputMode: 'sql'), throw
29
+ const exclusionWarning = selectionResult.warnings.find((w) => w.includes("is excluded because it relies on raw SQL override"));
30
+ if (exclusionWarning) {
31
+ throw new Error(exclusionWarning);
32
+ }
33
+ const topRanked = selectionResult.selection.consideredNodeTypes[0] ?? null;
34
+ const isTopRanked = topRanked?.nodeType === params.nodeType;
35
+ const validation = {
36
+ requestedNodeType: params.nodeType,
37
+ topRankedNodeType: topRanked?.nodeType ?? null,
38
+ isTopRanked,
39
+ strategy: selectionResult.selection.strategy,
40
+ consideredNodeTypes: selectionResult.selection.consideredNodeTypes.slice(0, 5).map((c) => ({
41
+ nodeType: c.nodeType,
42
+ displayName: c.displayName,
43
+ score: c.score,
44
+ reasons: c.reasons,
45
+ })),
46
+ };
47
+ if (!isTopRanked && topRanked) {
48
+ validation.warning =
49
+ `Requested nodeType "${params.nodeType}" is not the top-ranked type. ` +
50
+ `Consider using "${topRanked.nodeType}" (score: ${topRanked.score}) instead. ` +
51
+ `Call plan-pipeline first to discover and rank all available node types.`;
52
+ }
53
+ // Hard block specialized materialization patterns (Dynamic Tables, Incremental, etc.)
54
+ // unless the goal explicitly requests them.
55
+ // Build candidate signals from the matched type's display name and short name,
56
+ // since the raw nodeType ID (e.g., "65") won't match pattern detection.
57
+ const matchedCandidate = selectionResult.selection.consideredNodeTypes.find((c) => c.nodeType === params.nodeType);
58
+ const candidateSignals = [
59
+ params.nodeType,
60
+ matchedCandidate?.displayName ?? "",
61
+ matchedCandidate?.shortName ?? "",
62
+ ].join(" ");
63
+ const contextText = params.goal ?? "";
64
+ const specializedPenalty = detectSpecializedPatternPenalty(candidateSignals, contextText);
65
+ if (specializedPenalty) {
66
+ // Hard block — specialized types require explicit context
67
+ const patternName = specializedPenalty.reason.split(" pattern")[0] ?? "Specialized";
68
+ throw new Error(`Cannot create node with nodeType "${params.nodeType}" (${matchedCandidate?.displayName ?? "unknown"}): ` +
69
+ `${specializedPenalty.reason}. ` +
70
+ `${patternName} types require an explicit use case (e.g., "${patternName.toLowerCase()}" in the goal). ` +
71
+ `For standard batch ETL, staging, joins, and aggregations, use a general-purpose type ` +
72
+ `(Stage, Work, View, Dimension, Fact). ` +
73
+ `Call plan-pipeline to discover the correct nodeType for your use case.`);
74
+ }
75
+ return validation;
76
+ }
77
+ catch (error) {
78
+ // Re-throw hard block errors — exclusion and specialized pattern blocks
79
+ if (error instanceof Error && (error.message.includes("is excluded") ||
80
+ error.message.startsWith("Cannot create node"))) {
81
+ throw error;
82
+ }
83
+ return null;
84
+ }
85
+ }
86
+ /**
87
+ * Blocks creation of "Source" node types — Source nodes are read-only data
88
+ * definitions created via the Coalesce UI, not downstream processing nodes.
89
+ * Agents often confuse "Source" with "Stage".
90
+ */
91
+ function assertNotSourceNodeType(nodeType) {
92
+ const normalized = nodeType.toLowerCase().replace(/.*:::/, "");
93
+ if (normalized === "source") {
94
+ throw new Error(`Cannot create a node with nodeType "Source". Source nodes are read-only data ` +
95
+ `definitions — they represent external tables and are created via the Coalesce UI, ` +
96
+ `not via the API. You probably want "Stage" for a staging/transform node. ` +
97
+ `Call plan-pipeline to discover the correct nodeType.`);
98
+ }
99
+ }
100
+ function mergeWorkspaceNodeChanges(current, changes) {
101
+ if (Array.isArray(changes)) {
102
+ return changes;
103
+ }
104
+ if (isPlainObject(current) && isPlainObject(changes)) {
105
+ const merged = { ...current };
106
+ for (const [key, value] of Object.entries(changes)) {
107
+ merged[key] = mergeWorkspaceNodeChanges(current[key], value);
108
+ }
109
+ return merged;
110
+ }
111
+ return changes;
112
+ }
113
+ function syncNodeNameIntoMetadataSourceMapping(current, merged, changes) {
114
+ if (typeof changes.name !== "string") {
115
+ return merged;
116
+ }
117
+ const metadataChanges = isPlainObject(changes.metadata) ? changes.metadata : undefined;
118
+ if (metadataChanges && "sourceMapping" in metadataChanges) {
119
+ return merged;
120
+ }
121
+ const mergedMetadata = isPlainObject(merged.metadata) ? merged.metadata : undefined;
122
+ if (!mergedMetadata || !Array.isArray(mergedMetadata.sourceMapping)) {
123
+ return merged;
124
+ }
125
+ const previousName = typeof current.name === "string" && current.name.trim().length > 0
126
+ ? current.name
127
+ : null;
128
+ const updateSingleUnnamedMapping = previousName === null && mergedMetadata.sourceMapping.length === 1;
129
+ return {
130
+ ...merged,
131
+ metadata: {
132
+ ...mergedMetadata,
133
+ sourceMapping: mergedMetadata.sourceMapping.map((entry) => {
134
+ if (!isPlainObject(entry)) {
135
+ return entry;
136
+ }
137
+ const shouldRename = (previousName !== null && entry.name === previousName) ||
138
+ updateSingleUnnamedMapping;
139
+ if (!shouldRename) {
140
+ return entry;
141
+ }
142
+ return {
143
+ ...entry,
144
+ name: changes.name,
145
+ };
146
+ }),
147
+ },
148
+ };
149
+ }
150
+ export function buildUpdatedWorkspaceNodeBody(current, changes) {
151
+ if (!isPlainObject(current)) {
152
+ throw new Error("Workspace node response was not an object");
153
+ }
154
+ const merged = mergeWorkspaceNodeChanges(current, changes);
155
+ if (!isPlainObject(merged)) {
156
+ throw new Error("Merged workspace node update was not an object");
157
+ }
158
+ const synchronized = syncNodeNameIntoMetadataSourceMapping(current, merged, changes);
159
+ if (!isPlainObject(synchronized)) {
160
+ throw new Error("Synchronized workspace node update was not an object");
161
+ }
162
+ // Preserve the node name casing as provided by the user or existing node.
163
+ // Snowflake treats unquoted identifiers as uppercase, but users may choose
164
+ // lowercase names (e.g., for Databricks or personal preference) — respect that.
165
+ // Validate nodeType and materializationType compatibility
166
+ validateNodeTypeMaterializationCompatibility(synchronized.nodeType, synchronized.materializationType);
167
+ // Strip invalid fields from metadata before sending to Coalesce
168
+ if (isPlainObject(synchronized.metadata)) {
169
+ synchronized.metadata = cleanMetadata(synchronized.metadata);
170
+ }
171
+ // Ensure API-required fields (table, overrideSQL, columnIDs) are preserved
172
+ ensureRequiredApiFields(current, synchronized);
173
+ return synchronized;
174
+ }
175
+ function getNodeColumnCount(node) {
176
+ const metadata = isPlainObject(node.metadata) ? node.metadata : undefined;
177
+ return Array.isArray(metadata?.columns) ? metadata.columns.length : 0;
178
+ }
179
+ function getNodeStorageLocationCount(node) {
180
+ return Array.isArray(node.storageLocations) ? node.storageLocations.length : 0;
181
+ }
182
+ function getNodeConfigKeyCount(node) {
183
+ return isPlainObject(node.config) ? Object.keys(node.config).length : 0;
184
+ }
185
+ function getRequestedNodeName(changes) {
186
+ return typeof changes.name === "string" && changes.name.trim().length > 0
187
+ ? changes.name
188
+ : undefined;
189
+ }
190
+ function getRequestedColumnNames(changes) {
191
+ const metadata = isPlainObject(changes.metadata) ? changes.metadata : undefined;
192
+ if (!metadata || !Array.isArray(metadata.columns)) {
193
+ return [];
194
+ }
195
+ const names = [];
196
+ for (const column of metadata.columns) {
197
+ if (isPlainObject(column) && typeof column.name === "string" && column.name.trim().length > 0) {
198
+ names.push(column.name);
199
+ }
200
+ }
201
+ return names;
202
+ }
203
+ function getRequestedConfig(changes) {
204
+ return isPlainObject(changes.config) ? changes.config : undefined;
205
+ }
206
+ function getRequestedLocationFields(changes) {
207
+ const requested = {};
208
+ for (const key of ["database", "schema", "locationName"]) {
209
+ if (Object.prototype.hasOwnProperty.call(changes, key)) {
210
+ requested[key] = changes[key];
211
+ }
212
+ }
213
+ return requested;
214
+ }
215
+ function getNodeColumnNames(node) {
216
+ const metadata = isPlainObject(node.metadata) ? node.metadata : undefined;
217
+ if (!Array.isArray(metadata?.columns)) {
218
+ return [];
219
+ }
220
+ return metadata.columns.flatMap((column) => {
221
+ if (!isPlainObject(column) || typeof column.name !== "string") {
222
+ return [];
223
+ }
224
+ return [column.name];
225
+ });
226
+ }
227
+ function getNodeDependencyNames(node) {
228
+ const metadata = isPlainObject(node.metadata) ? node.metadata : undefined;
229
+ if (!Array.isArray(metadata?.sourceMapping)) {
230
+ return [];
231
+ }
232
+ return metadata.sourceMapping.flatMap((mapping) => {
233
+ if (!isPlainObject(mapping) || !Array.isArray(mapping.dependencies)) {
234
+ return [];
235
+ }
236
+ return mapping.dependencies.flatMap((dependency) => {
237
+ if (!isPlainObject(dependency) || typeof dependency.nodeName !== "string") {
238
+ return [];
239
+ }
240
+ return [dependency.nodeName];
241
+ });
242
+ });
243
+ }
244
+ function getReferencedPredecessorNodeIDs(node, predecessorNodeIDs) {
245
+ const predecessorSet = new Set(predecessorNodeIDs);
246
+ const metadata = isPlainObject(node.metadata) ? node.metadata : undefined;
247
+ if (!Array.isArray(metadata?.columns)) {
248
+ return [];
249
+ }
250
+ const referenced = new Set();
251
+ for (const column of metadata.columns) {
252
+ if (!isPlainObject(column) || !Array.isArray(column.sources)) {
253
+ continue;
254
+ }
255
+ for (const source of column.sources) {
256
+ if (!isPlainObject(source) || !Array.isArray(source.columnReferences)) {
257
+ continue;
258
+ }
259
+ for (const ref of source.columnReferences) {
260
+ if (isPlainObject(ref) && typeof ref.nodeID === "string" && predecessorSet.has(ref.nodeID)) {
261
+ referenced.add(ref.nodeID);
262
+ }
263
+ }
264
+ }
265
+ }
266
+ return predecessorNodeIDs.filter((nodeID) => referenced.has(nodeID));
267
+ }
268
+ function normalizeColumnName(name) {
269
+ return name.trim().toUpperCase();
270
+ }
271
+ function buildPredecessorSummary(requestedNodeID, node) {
272
+ return {
273
+ nodeID: requestedNodeID,
274
+ nodeName: typeof node.name === "string" ? node.name : null,
275
+ columnCount: getNodeColumnCount(node),
276
+ columnNames: getNodeColumnNames(node),
277
+ };
278
+ }
279
+ /**
280
+ * Valid metadata fields for the Coalesce PUT API.
281
+ * The GET response may include additional read-only fields (e.g., appliedNodeTests,
282
+ * cteString, materializationOption) that the PUT schema rejects as additional properties.
283
+ */
284
+ const VALID_METADATA_FIELDS = new Set([
285
+ "columns",
286
+ "sourceMapping",
287
+ "enabledColumnTestIDs",
288
+ ]);
289
+ function cleanMetadata(metadata) {
290
+ if (!isPlainObject(metadata)) {
291
+ return {};
292
+ }
293
+ const cleaned = {};
294
+ for (const key of Object.keys(metadata)) {
295
+ if (VALID_METADATA_FIELDS.has(key)) {
296
+ cleaned[key] = metadata[key];
297
+ }
298
+ }
299
+ return cleaned;
300
+ }
301
+ /**
302
+ * Preserves columnID, sources, columnReference, and placement from existing
303
+ * columns when new columns are provided by the agent.
304
+ *
305
+ * When agents provide columns in changes.metadata.columns, those columns
306
+ * typically lack the source linkage fields that Coalesce auto-populates
307
+ * from predecessors. This function restores them by matching column names.
308
+ */
309
+ function preserveColumnLinkage(currentMetadata, mergedMetadata) {
310
+ if (!Array.isArray(currentMetadata.columns) || !Array.isArray(mergedMetadata.columns)) {
311
+ return;
312
+ }
313
+ const existingByName = new Map();
314
+ for (const col of currentMetadata.columns) {
315
+ if (isPlainObject(col) && typeof col.name === "string") {
316
+ existingByName.set(normalizeColumnName(col.name), col);
317
+ }
318
+ }
319
+ if (existingByName.size === 0) {
320
+ return;
321
+ }
322
+ for (const col of mergedMetadata.columns) {
323
+ if (!isPlainObject(col) || typeof col.name !== "string") {
324
+ continue;
325
+ }
326
+ const existing = existingByName.get(normalizeColumnName(col.name));
327
+ if (!existing) {
328
+ continue;
329
+ }
330
+ // Preserve columnID if not already set
331
+ if ((!col.columnID || (typeof col.columnID === "string" && col.columnID.length === 0))
332
+ && typeof existing.columnID === "string") {
333
+ col.columnID = existing.columnID;
334
+ }
335
+ // Preserve source linkage fields — these connect the column back to its
336
+ // predecessor node. Agents almost never provide these, so inherit from
337
+ // the auto-populated column.
338
+ if (!Array.isArray(col.sources) && Array.isArray(existing.sources)) {
339
+ const clonedSources = structuredClone(existing.sources);
340
+ col.sources = clonedSources;
341
+ // If the agent provided a non-passthrough transform on the top-level column
342
+ // (e.g., UPPER("TABLE"."COL")), propagate it into sources[*].transform so
343
+ // the Coalesce UI displays it correctly.
344
+ // The UI reads transforms from sources[0].transform, not a top-level transform field.
345
+ if (typeof col.transform === "string" &&
346
+ col.transform.trim().length > 0 &&
347
+ typeof col.name === "string" &&
348
+ !isPassthroughTransform(col.transform, col.name)) {
349
+ for (const source of clonedSources) {
350
+ if (isPlainObject(source)) {
351
+ source.transform = col.transform;
352
+ }
353
+ }
354
+ }
355
+ }
356
+ if (!isPlainObject(col.columnReference) && isPlainObject(existing.columnReference)) {
357
+ col.columnReference = existing.columnReference;
358
+ }
359
+ // Preserve placement (column ordering metadata)
360
+ if (col.placement === undefined && existing.placement !== undefined) {
361
+ col.placement = existing.placement;
362
+ }
363
+ }
364
+ }
365
+ /**
366
+ * Detects whether a column transform is just a passthrough — i.e., it only
367
+ * references the column's own name without any actual transformation.
368
+ *
369
+ * Passthrough patterns:
370
+ * "ALIAS"."COLUMN_NAME"
371
+ * {{ ref('NODE', 'SOURCE') }}."COLUMN_NAME"
372
+ * COLUMN_NAME (bare name)
373
+ */
374
+ function isPassthroughTransform(transform, columnName) {
375
+ const trimmed = transform.trim();
376
+ if (trimmed.length === 0)
377
+ return true;
378
+ const upperName = columnName.trim().toUpperCase();
379
+ const upperTransform = trimmed.toUpperCase();
380
+ // Bare column name: COLUMN_NAME
381
+ if (upperTransform === upperName)
382
+ return true;
383
+ // Quoted bare name: "COLUMN_NAME"
384
+ if (upperTransform === `"${upperName}"`)
385
+ return true;
386
+ // "ALIAS"."COLUMN_NAME" — any single-segment alias
387
+ const aliasColPattern = /^"[^"]+"\s*\.\s*"([^"]+)"$/i;
388
+ const aliasMatch = trimmed.match(aliasColPattern);
389
+ if (aliasMatch && aliasMatch[1].toUpperCase() === upperName)
390
+ return true;
391
+ // {{ ref(...) }}."COLUMN_NAME"
392
+ const refPattern = /^\{\{\s*ref\s*\([^)]*\)\s*\}\}\s*\.\s*"([^"]+)"$/i;
393
+ const refMatch = trimmed.match(refPattern);
394
+ if (refMatch && refMatch[1].toUpperCase() === upperName)
395
+ return true;
396
+ return false;
397
+ }
398
+ /**
399
+ * Ensures the Coalesce API required fields (table, overrideSQL) are present
400
+ * in the body being sent to the PUT endpoint. These are preserved from the
401
+ * current node — the agent is never allowed to set overrideSQL.
402
+ */
403
+ function ensureRequiredApiFields(current, body) {
404
+ // Ensure 'table' is present — required by the Coalesce PUT API.
405
+ // Prefer current node's value, fall back to node name.
406
+ if (!body.table || (typeof body.table === "string" && body.table.trim().length === 0)) {
407
+ if (current.table && typeof current.table === "string" && current.table.trim().length > 0) {
408
+ body.table = current.table;
409
+ }
410
+ else {
411
+ const name = typeof body.name === "string" ? body.name : typeof current.name === "string" ? current.name : "";
412
+ if (name.length > 0) {
413
+ body.table = name;
414
+ }
415
+ }
416
+ }
417
+ // Preserve 'overrideSQL' from current node — agent must never set this
418
+ if ("overrideSQL" in current) {
419
+ body.overrideSQL = current.overrideSQL;
420
+ }
421
+ // Preserve columnIDs from current node's columns
422
+ const currentMetadata = isPlainObject(current.metadata) ? current.metadata : undefined;
423
+ const bodyMetadata = isPlainObject(body.metadata) ? body.metadata : undefined;
424
+ // Strip backslash-escaped quotes from transforms BEFORE preserveColumnLinkage,
425
+ // so cleaned transforms get propagated into sources[*].transform correctly.
426
+ if (bodyMetadata && Array.isArray(bodyMetadata.columns)) {
427
+ for (const col of bodyMetadata.columns) {
428
+ if (isPlainObject(col) && typeof col.transform === "string" && col.transform.includes("\\")) {
429
+ col.transform = col.transform.replace(/\\"/g, '"');
430
+ }
431
+ }
432
+ }
433
+ if (currentMetadata && bodyMetadata) {
434
+ preserveColumnLinkage(currentMetadata, bodyMetadata);
435
+ }
436
+ // Ensure all columns have required fields and strip invalid properties
437
+ if (bodyMetadata && Array.isArray(bodyMetadata.columns)) {
438
+ // Build lookup of current node's columns for dataType fallback
439
+ const currentColumns = currentMetadata && Array.isArray(currentMetadata.columns) ? currentMetadata.columns : [];
440
+ const currentDataTypes = new Map();
441
+ for (const col of currentColumns) {
442
+ if (isPlainObject(col) && typeof col.name === "string" && typeof col.dataType === "string") {
443
+ currentDataTypes.set(col.name.toUpperCase(), col.dataType);
444
+ }
445
+ }
446
+ for (const col of bodyMetadata.columns) {
447
+ if (!isPlainObject(col))
448
+ continue;
449
+ // Ensure 'dataType' — required by the Coalesce PUT API.
450
+ // Prefer the current node's dataType for the same column name, otherwise default to "VARCHAR".
451
+ if (!col.dataType || (typeof col.dataType === "string" && col.dataType.trim().length === 0)) {
452
+ const colName = typeof col.name === "string" ? col.name.toUpperCase() : "";
453
+ const existingType = currentDataTypes.get(colName);
454
+ col.dataType = existingType ?? "VARCHAR";
455
+ }
456
+ // Ensure 'nullable' — required by the Coalesce PUT API
457
+ if (!("nullable" in col)) {
458
+ col.nullable = true;
459
+ }
460
+ // Ensure 'description' — required by the Coalesce PUT API
461
+ if (!("description" in col)) {
462
+ col.description = "";
463
+ }
464
+ // Generate columnID for columns that don't have one —
465
+ // the PUT API requires columnID on every column
466
+ if (!col.columnID || (typeof col.columnID === "string" && col.columnID.length === 0)) {
467
+ col.columnID = randomUUID();
468
+ }
469
+ // Strip passthrough transforms — if the transform just references
470
+ // the column's own name (e.g., "ALIAS"."COL" or {{ ref(...) }}."COL"),
471
+ // remove it so Coalesce auto-populates the source mapping.
472
+ if (typeof col.transform === "string" && typeof col.name === "string") {
473
+ if (isPassthroughTransform(col.transform, col.name)) {
474
+ delete col.transform;
475
+ }
476
+ }
477
+ // Ensure computed columns (new columns with a transform but no sources) get a
478
+ // synthetic sources entry so the Coalesce UI displays the transform correctly.
479
+ // The UI reads transforms from sources[0].transform, not the top-level transform field.
480
+ if (typeof col.transform === "string" &&
481
+ col.transform.trim().length > 0 &&
482
+ !Array.isArray(col.sources)) {
483
+ col.sources = [{ transform: col.transform, columnReferences: [] }];
484
+ }
485
+ // Strip properties that are not valid Coalesce column fields.
486
+ // Valid: name, dataType, transform, nullable, description, columnID,
487
+ // sources, placement, plus columnSelector attributes (isBusinessKey, etc.)
488
+ delete col.primaryKey;
489
+ delete col.foreignKey;
490
+ delete col.unique;
491
+ delete col.index;
492
+ }
493
+ }
494
+ // Ensure 'enabledColumnTestIDs' is present on metadata — required by the Coalesce PUT API
495
+ if (bodyMetadata && !Array.isArray(bodyMetadata.enabledColumnTestIDs)) {
496
+ const currentEnabled = currentMetadata && Array.isArray(currentMetadata.enabledColumnTestIDs)
497
+ ? currentMetadata.enabledColumnTestIDs
498
+ : [];
499
+ bodyMetadata.enabledColumnTestIDs = currentEnabled;
500
+ }
501
+ }
502
+ /**
503
+ * Validates that nodeType and materializationType are compatible
504
+ * Throws error with actionable message if incompatible
505
+ */
506
+ function validateNodeTypeMaterializationCompatibility(nodeType, materializationType) {
507
+ if (typeof nodeType !== "string" || typeof materializationType !== "string") {
508
+ return; // Skip validation if either field is missing or invalid type
509
+ }
510
+ const normalizedNodeType = nodeType.toLowerCase();
511
+ const normalizedMaterialization = materializationType.toLowerCase();
512
+ // View nodes can ONLY be materialized as views
513
+ if (normalizedNodeType === "view" && normalizedMaterialization === "table") {
514
+ throw new Error(`Invalid configuration: nodeType "View" cannot use materializationType "table". ` +
515
+ `Either keep materializationType as "view" OR change nodeType to a table-capable type like "Dimension", "Fact", "Stage", or "Work".`);
516
+ }
517
+ // Note: Other node types (Dimension, Fact, Stage, Work, etc.) can be either table or view
518
+ // So we only need to check the View + table combination
519
+ }
520
+ function buildJoinSuggestions(predecessors) {
521
+ const suggestions = [];
522
+ for (let leftIndex = 0; leftIndex < predecessors.length; leftIndex += 1) {
523
+ for (let rightIndex = leftIndex + 1; rightIndex < predecessors.length; rightIndex += 1) {
524
+ const left = predecessors[leftIndex];
525
+ const right = predecessors[rightIndex];
526
+ const leftColumns = new Map();
527
+ for (const columnName of left.columnNames) {
528
+ const normalized = normalizeColumnName(columnName);
529
+ if (!leftColumns.has(normalized)) {
530
+ leftColumns.set(normalized, columnName);
531
+ }
532
+ }
533
+ const rightColumns = new Map();
534
+ for (const columnName of right.columnNames) {
535
+ const normalized = normalizeColumnName(columnName);
536
+ if (!rightColumns.has(normalized)) {
537
+ rightColumns.set(normalized, columnName);
538
+ }
539
+ }
540
+ const commonColumns = [];
541
+ for (const [normalizedName, leftColumnName] of leftColumns.entries()) {
542
+ const rightColumnName = rightColumns.get(normalizedName);
543
+ if (rightColumnName) {
544
+ commonColumns.push({
545
+ normalizedName,
546
+ leftColumnName,
547
+ rightColumnName,
548
+ });
549
+ }
550
+ }
551
+ commonColumns.sort((a, b) => a.normalizedName.localeCompare(b.normalizedName));
552
+ suggestions.push({
553
+ leftPredecessorNodeID: left.nodeID,
554
+ leftPredecessorName: left.nodeName,
555
+ rightPredecessorNodeID: right.nodeID,
556
+ rightPredecessorName: right.nodeName,
557
+ commonColumns,
558
+ });
559
+ }
560
+ }
561
+ return suggestions;
562
+ }
563
+ function generateJoinSQL(joinSuggestions, joinType = "INNER JOIN") {
564
+ if (joinSuggestions.length === 0) {
565
+ return {
566
+ fromClause: "",
567
+ joinClauses: [],
568
+ fullSQL: "",
569
+ };
570
+ }
571
+ const firstSuggestion = joinSuggestions[0];
572
+ const leftTableName = firstSuggestion.leftPredecessorName || "LEFT_TABLE";
573
+ const leftAlias = `"${leftTableName}"`;
574
+ const fromClause = `FROM ${leftAlias}`;
575
+ const joinClauses = [];
576
+ const sqlParts = [fromClause];
577
+ for (const suggestion of joinSuggestions) {
578
+ const rightTableName = suggestion.rightPredecessorName || "RIGHT_TABLE";
579
+ const rightAlias = `"${rightTableName}"`;
580
+ const onConditions = suggestion.commonColumns.map((col) => `${leftAlias}."${col.leftColumnName}" = ${rightAlias}."${col.rightColumnName}"`);
581
+ const joinClause = {
582
+ type: joinType,
583
+ rightTable: rightTableName,
584
+ rightTableAlias: rightAlias,
585
+ onConditions,
586
+ };
587
+ joinClauses.push(joinClause);
588
+ const joinSQL = [
589
+ `${joinType} ${rightAlias}`,
590
+ ` ON ${onConditions.join("\n AND ")}`,
591
+ ].join("\n");
592
+ sqlParts.push(joinSQL);
593
+ }
594
+ return {
595
+ fromClause,
596
+ joinClauses,
597
+ fullSQL: sqlParts.join("\n"),
598
+ };
599
+ }
600
+ function inferDatatype(transform) {
601
+ const upperTransform = transform.toUpperCase();
602
+ // Date/Time functions - check these FIRST before MIN/MAX
603
+ if (upperTransform.includes("DATEDIFF("))
604
+ return "NUMBER";
605
+ if (upperTransform.includes("DATEADD("))
606
+ return "DATE";
607
+ if (upperTransform.includes("CURRENT_DATE"))
608
+ return "DATE";
609
+ if (upperTransform.includes("CURRENT_TIMESTAMP"))
610
+ return "TIMESTAMP_NTZ(9)";
611
+ // Aggregate functions
612
+ if (upperTransform.includes("COUNT(DISTINCT"))
613
+ return "NUMBER";
614
+ if (upperTransform.includes("COUNT("))
615
+ return "NUMBER";
616
+ if (upperTransform.includes("SUM("))
617
+ return "NUMBER(38,4)";
618
+ if (upperTransform.includes("AVG("))
619
+ return "NUMBER(38,4)";
620
+ if (upperTransform.includes("STDDEV("))
621
+ return "NUMBER(38,4)";
622
+ if (upperTransform.includes("VARIANCE("))
623
+ return "NUMBER(38,4)";
624
+ // MIN/MAX with timestamp/date context
625
+ if (upperTransform.includes("MIN(") && upperTransform.includes("_TS"))
626
+ return "TIMESTAMP_NTZ(9)";
627
+ if (upperTransform.includes("MAX(") && upperTransform.includes("_TS"))
628
+ return "TIMESTAMP_NTZ(9)";
629
+ if (upperTransform.includes("MIN(") && upperTransform.includes("_DATE"))
630
+ return "DATE";
631
+ if (upperTransform.includes("MAX(") && upperTransform.includes("_DATE"))
632
+ return "DATE";
633
+ // String functions
634
+ if (upperTransform.includes("CONCAT("))
635
+ return "VARCHAR";
636
+ if (upperTransform.includes("UPPER("))
637
+ return "VARCHAR";
638
+ if (upperTransform.includes("LOWER("))
639
+ return "VARCHAR";
640
+ if (upperTransform.includes("TRIM("))
641
+ return "VARCHAR";
642
+ if (upperTransform.includes("SUBSTR("))
643
+ return "VARCHAR";
644
+ if (upperTransform.includes("LEFT("))
645
+ return "VARCHAR";
646
+ if (upperTransform.includes("RIGHT("))
647
+ return "VARCHAR";
648
+ // Boolean
649
+ if (upperTransform.includes("CASE"))
650
+ return "VARCHAR";
651
+ // Window functions
652
+ if (upperTransform.includes("ROW_NUMBER()"))
653
+ return "NUMBER";
654
+ if (upperTransform.includes("RANK()"))
655
+ return "NUMBER";
656
+ if (upperTransform.includes("DENSE_RANK()"))
657
+ return "NUMBER";
658
+ return undefined;
659
+ }
660
+ function analyzeColumnsForGroupBy(columns) {
661
+ const aggregateFunctions = [
662
+ "COUNT(",
663
+ "SUM(",
664
+ "AVG(",
665
+ "MIN(",
666
+ "MAX(",
667
+ "STDDEV(",
668
+ "VARIANCE(",
669
+ "LISTAGG(",
670
+ "ARRAY_AGG(",
671
+ ];
672
+ const windowFunctions = [
673
+ "ROW_NUMBER()",
674
+ "RANK()",
675
+ "DENSE_RANK()",
676
+ "LEAD(",
677
+ "LAG(",
678
+ "FIRST_VALUE(",
679
+ "LAST_VALUE(",
680
+ ];
681
+ const groupByColumns = [];
682
+ const aggregateColumns = [];
683
+ const errors = [];
684
+ for (const col of columns) {
685
+ const upperTransform = col.transform.toUpperCase();
686
+ const isAggregate = aggregateFunctions.some((fn) => upperTransform.includes(fn));
687
+ const isWindow = windowFunctions.some((fn) => upperTransform.includes(fn));
688
+ if (isAggregate || isWindow) {
689
+ aggregateColumns.push({ name: col.name, transform: col.transform });
690
+ }
691
+ else {
692
+ // This is a non-aggregate column, needs to be in GROUP BY
693
+ groupByColumns.push(col.transform);
694
+ }
695
+ }
696
+ const hasAggregates = aggregateColumns.length > 0;
697
+ // Validation: if we have aggregates, we need GROUP BY for non-aggregate columns
698
+ let valid = true;
699
+ if (hasAggregates && groupByColumns.length === 0 && columns.length > 1) {
700
+ errors.push("Query has aggregate functions but no GROUP BY columns. All non-aggregate columns must be in GROUP BY.");
701
+ valid = false;
702
+ }
703
+ const groupByClause = hasAggregates && groupByColumns.length > 0
704
+ ? `GROUP BY ${groupByColumns.join(", ")}`
705
+ : "";
706
+ return {
707
+ groupByColumns,
708
+ aggregateColumns,
709
+ hasAggregates,
710
+ groupByClause,
711
+ validation: {
712
+ valid,
713
+ errors,
714
+ },
715
+ };
716
+ }
717
+ export async function updateWorkspaceNode(client, params) {
718
+ assertNoSqlOverridePayload(params.changes, "update-workspace-node changes");
719
+ const current = await getWorkspaceNode(client, params);
720
+ if (!isPlainObject(current)) {
721
+ throw new Error("Workspace node response was not an object");
722
+ }
723
+ const body = buildUpdatedWorkspaceNodeBody(current, params.changes);
724
+ return setWorkspaceNode(client, {
725
+ workspaceID: params.workspaceID,
726
+ nodeID: params.nodeID,
727
+ body,
728
+ });
729
+ }
730
+ export async function replaceWorkspaceNodeColumns(client, params) {
731
+ if (params.additionalChanges) {
732
+ assertNoSqlOverridePayload(params.additionalChanges, "replace-workspace-node-columns additionalChanges");
733
+ // Block sourceMapping in additionalChanges — use apply-join-condition or convert-join-to-aggregation instead
734
+ const additionalMeta = isPlainObject(params.additionalChanges.metadata)
735
+ ? params.additionalChanges.metadata
736
+ : null;
737
+ if (additionalMeta && ("sourceMapping" in additionalMeta || "customSQL" in additionalMeta)) {
738
+ throw new Error("replace-workspace-node-columns additionalChanges cannot set sourceMapping or customSQL. " +
739
+ "Use the joinCondition parameter to set WHERE filters, apply-join-condition for join setup, " +
740
+ "or convert-join-to-aggregation for GROUP BY patterns.");
741
+ }
742
+ }
743
+ const current = await getWorkspaceNode(client, params);
744
+ if (!isPlainObject(current)) {
745
+ throw new Error("Workspace node response was not an object");
746
+ }
747
+ // Build changes: merge additionalChanges first, then overlay columns so params.columns always wins
748
+ const columnChanges = {
749
+ metadata: {
750
+ columns: params.columns,
751
+ },
752
+ };
753
+ const changes = params.additionalChanges
754
+ ? mergeWorkspaceNodeChanges(params.additionalChanges, columnChanges)
755
+ : columnChanges;
756
+ // Use shared logic to build clean update body
757
+ // This handles: merging, name synchronization, and metadata cleaning
758
+ const updated = buildUpdatedWorkspaceNodeBody(current, changes);
759
+ // Append WHERE condition to existing joinCondition if provided
760
+ if (params.whereCondition && typeof params.whereCondition === "string") {
761
+ appendWhereToJoinCondition(updated, params.whereCondition);
762
+ }
763
+ return setWorkspaceNode(client, {
764
+ workspaceID: params.workspaceID,
765
+ nodeID: params.nodeID,
766
+ body: updated,
767
+ });
768
+ }
769
+ /**
770
+ * Append a WHERE condition to the existing joinCondition in the first sourceMapping entry.
771
+ * The FROM/JOIN clause from node creation is preserved — only the WHERE is added.
772
+ * If no existing joinCondition exists, creates one with just the WHERE clause.
773
+ */
774
+ function appendWhereToJoinCondition(body, whereCondition) {
775
+ const metadata = isPlainObject(body.metadata) ? body.metadata : null;
776
+ if (!metadata)
777
+ return;
778
+ const sourceMapping = Array.isArray(metadata.sourceMapping) ? metadata.sourceMapping : [];
779
+ if (sourceMapping.length === 0)
780
+ return;
781
+ const first = sourceMapping[0];
782
+ if (!isPlainObject(first))
783
+ return;
784
+ const join = isPlainObject(first.join) ? { ...first.join } : {};
785
+ const existing = typeof join.joinCondition === "string" ? join.joinCondition.trim() : "";
786
+ // Strip backslash-escaped quotes (agents sometimes over-escape: \" → ")
787
+ const unescaped = whereCondition.replace(/\\"/g, '"');
788
+ // Normalize: strip leading "WHERE" if the user included it
789
+ const cleanWhere = unescaped.replace(/^\s*WHERE\s+/i, "").trim();
790
+ if (!cleanWhere)
791
+ return;
792
+ if (existing) {
793
+ // Append WHERE to existing FROM/JOIN clause
794
+ // Check if existing already has a WHERE — if so, add with AND
795
+ if (/\bWHERE\b/i.test(existing)) {
796
+ join.joinCondition = `${existing}\n AND ${cleanWhere}`;
797
+ }
798
+ else {
799
+ join.joinCondition = `${existing}\nWHERE ${cleanWhere}`;
800
+ }
801
+ }
802
+ else {
803
+ join.joinCondition = `WHERE ${cleanWhere}`;
804
+ }
805
+ first.join = join;
806
+ }
807
+ function buildScratchNodeChanges(params) {
808
+ let merged = params.changes ? { ...params.changes } : {};
809
+ if (params.name !== undefined) {
810
+ merged = mergeWorkspaceNodeChanges(merged, { name: params.name });
811
+ }
812
+ if (params.description !== undefined) {
813
+ merged = mergeWorkspaceNodeChanges(merged, {
814
+ description: params.description,
815
+ });
816
+ }
817
+ if (params.storageLocations !== undefined) {
818
+ merged = mergeWorkspaceNodeChanges(merged, {
819
+ storageLocations: params.storageLocations,
820
+ });
821
+ }
822
+ if (params.config !== undefined) {
823
+ merged = mergeWorkspaceNodeChanges(merged, {
824
+ config: params.config,
825
+ });
826
+ }
827
+ if (params.metadata !== undefined) {
828
+ merged = mergeWorkspaceNodeChanges(merged, {
829
+ metadata: params.metadata,
830
+ });
831
+ }
832
+ return merged;
833
+ }
834
+ function buildScratchNodeValidation(node, completionLevel, requested) {
835
+ const requestedName = getRequestedNodeName(requested.changes);
836
+ const requestedNameSatisfied = requestedName !== undefined ? node.name === requestedName : true;
837
+ const requestedColumnNames = getRequestedColumnNames(requested.changes);
838
+ const actualColumnNameSet = new Set(getNodeColumnNames(node).map((name) => normalizeColumnName(name)));
839
+ const requestedColumnsSatisfied = requestedColumnNames.length === 0
840
+ ? getNodeColumnCount(node) > 0
841
+ : requestedColumnNames.every((name) => actualColumnNameSet.has(normalizeColumnName(name)));
842
+ const requestedConfig = getRequestedConfig(requested.changes);
843
+ const nodeConfig = isPlainObject(node.config) ? node.config : undefined;
844
+ const requestedConfigSatisfied = requestedConfig === undefined
845
+ ? isPlainObject(node.config)
846
+ : Object.entries(requestedConfig).every(([key, value]) => nodeConfig && Object.is(nodeConfig[key], value));
847
+ const requestedLocationFields = getRequestedLocationFields(requested.changes);
848
+ const requestedLocationSatisfied = Object.entries(requestedLocationFields).every(([key, value]) => Object.is(node[key], value));
849
+ const nameSet = typeof node.name === "string" && node.name.trim().length > 0;
850
+ const storageLocationsSet = getNodeStorageLocationCount(node) > 0;
851
+ const columnCount = getNodeColumnCount(node);
852
+ const configPresent = isPlainObject(node.config);
853
+ const configKeyCount = getNodeConfigKeyCount(node);
854
+ const nameRequired = completionLevel !== "created" || requestedName !== undefined;
855
+ const storageLocationsRequired = requested.storageLocations !== undefined;
856
+ let completionSatisfied = true;
857
+ if (completionLevel === "named") {
858
+ completionSatisfied =
859
+ (!nameRequired || (requestedName ? requestedNameSatisfied : nameSet)) &&
860
+ requestedLocationSatisfied &&
861
+ (!storageLocationsRequired || storageLocationsSet);
862
+ }
863
+ else if (completionLevel === "configured") {
864
+ completionSatisfied =
865
+ (!nameRequired || (requestedName ? requestedNameSatisfied : nameSet)) &&
866
+ requestedLocationSatisfied &&
867
+ (!storageLocationsRequired || storageLocationsSet) &&
868
+ requestedColumnsSatisfied &&
869
+ requestedConfigSatisfied;
870
+ }
871
+ return {
872
+ requestedCompletionLevel: completionLevel,
873
+ completionSatisfied,
874
+ nameRequired,
875
+ nameSet,
876
+ requestedName: requestedName ?? null,
877
+ requestedNameSatisfied,
878
+ requestedLocationKeys: Object.keys(requestedLocationFields),
879
+ requestedLocationSatisfied,
880
+ storageLocationsRequired,
881
+ storageLocationCount: getNodeStorageLocationCount(node),
882
+ storageLocationsSet,
883
+ columnCount,
884
+ configPresent,
885
+ configKeyCount,
886
+ requestedColumnCount: requestedColumnNames.length,
887
+ requestedColumnNames,
888
+ requestedColumnsSatisfied,
889
+ requestedConfigKeys: requestedConfig ? Object.keys(requestedConfig) : [],
890
+ requestedConfigSatisfied,
891
+ };
892
+ }
893
+ function assertConfiguredScratchInput(changes) {
894
+ const requestedName = getRequestedNodeName(changes);
895
+ const requestedColumnNames = getRequestedColumnNames(changes);
896
+ const missing = [];
897
+ if (!requestedName) {
898
+ missing.push("name");
899
+ }
900
+ if (requestedColumnNames.length === 0) {
901
+ missing.push("metadata.columns");
902
+ }
903
+ if (missing.length > 0) {
904
+ throw new Error(`Configured scratch node creation requires ${missing.join(" and ")}. Provide them explicitly or lower completionLevel to "named" or "created".`);
905
+ }
906
+ }
907
+ function buildScratchNodeNextSteps(nodeType, node) {
908
+ const steps = [];
909
+ const family = inferNodeTypeFamily(nodeType);
910
+ // Naming convention
911
+ const currentName = typeof node.name === "string" ? node.name : "";
912
+ if (!currentName || currentName === nodeType || /^[A-Z]+_\d+$/.test(currentName)) {
913
+ steps.push(`Name this node following conventions: ${suggestNamingConvention(family)}`);
914
+ }
915
+ // Scratch nodes have no predecessors — remind to add columns if missing
916
+ const metadata = isPlainObject(node.metadata) ? node.metadata : {};
917
+ const columns = Array.isArray(metadata.columns) ? metadata.columns : [];
918
+ if (columns.length === 0) {
919
+ steps.push("This node has no columns. Add columns using replace-workspace-node-columns or update-workspace-node.");
920
+ }
921
+ // Family-specific guidance
922
+ if (family === "fact" || family === "dimension") {
923
+ steps.push(`Verify materialization: ${family === "fact" ? "Fact" : "Dimension"} nodes should typically materialize as tables, not views. ` +
924
+ "Check that materializationType is 'table' in the config.");
925
+ if (family === "dimension") {
926
+ steps.push("For dimensions: identify the business key (natural key from the source system) and mark it isBusinessKey = true. " +
927
+ "If this is a slowly changing dimension (SCD Type 2), ensure START_DATE/END_DATE/IS_CURRENT columns exist.");
928
+ }
929
+ }
930
+ // Verification
931
+ steps.push("Verify the node: call get-workspace-node to confirm columns and config are correct before proceeding.");
932
+ return steps;
933
+ }
934
+ export async function createWorkspaceNodeFromScratch(client, params) {
935
+ assertNotSourceNodeType(params.nodeType);
936
+ const completionLevel = params.completionLevel ?? "configured";
937
+ const scratchChanges = buildScratchNodeChanges(params);
938
+ assertNoSqlOverridePayload(scratchChanges, "create-workspace-node-from-scratch changes");
939
+ if (completionLevel === "configured") {
940
+ assertConfiguredScratchInput(scratchChanges);
941
+ }
942
+ // Validate node type choice — throws if the type is excluded (e.g., inputMode: 'sql')
943
+ // or if a specialized pattern is detected without matching context
944
+ const nodeTypeValidation = await validateNodeTypeChoice(client, {
945
+ workspaceID: params.workspaceID,
946
+ nodeType: params.nodeType,
947
+ predecessorCount: 0,
948
+ repoPath: params.repoPath,
949
+ goal: params.goal,
950
+ });
951
+ const created = await createWorkspaceNode(client, {
952
+ workspaceID: params.workspaceID,
953
+ nodeType: params.nodeType,
954
+ });
955
+ if (!isPlainObject(created) || typeof created.id !== "string") {
956
+ throw new Error("Workspace node creation did not return a node ID");
957
+ }
958
+ const createdNode = await getWorkspaceNode(client, {
959
+ workspaceID: params.workspaceID,
960
+ nodeID: created.id,
961
+ });
962
+ if (!isPlainObject(createdNode)) {
963
+ throw new Error("Created workspace node response was not an object");
964
+ }
965
+ let finalNode = createdNode;
966
+ if (Object.keys(scratchChanges).length > 0) {
967
+ const body = buildUpdatedWorkspaceNodeBody(createdNode, scratchChanges);
968
+ await setWorkspaceNode(client, {
969
+ workspaceID: params.workspaceID,
970
+ nodeID: created.id,
971
+ body,
972
+ });
973
+ finalNode = await getWorkspaceNode(client, {
974
+ workspaceID: params.workspaceID,
975
+ nodeID: created.id,
976
+ });
977
+ }
978
+ if (!isPlainObject(finalNode)) {
979
+ throw new Error("Final workspace node response was not an object");
980
+ }
981
+ const validation = buildScratchNodeValidation(finalNode, completionLevel, {
982
+ changes: scratchChanges,
983
+ storageLocations: params.storageLocations,
984
+ });
985
+ const nextSteps = buildScratchNodeNextSteps(params.nodeType, finalNode);
986
+ if (!validation.completionSatisfied) {
987
+ if (completionLevel === "configured") {
988
+ throw new Error(`Workspace node ${created.id} was created, but configured scratch validation failed. ` +
989
+ `Check name, metadata.columns, and config values on the saved node body. ` +
990
+ `To clean up, use delete-workspace-node with nodeID "${created.id}".`);
991
+ }
992
+ return {
993
+ node: finalNode,
994
+ validation,
995
+ nextSteps,
996
+ warning: "Workspace node was created, but the requested scratch completion level was not fully satisfied. Review the node body and provide any missing name, storageLocations, metadata.columns, or config fields.",
997
+ ...(nodeTypeValidation ? { nodeTypeValidation } : {}),
998
+ };
999
+ }
1000
+ // Automatically complete node configuration using intelligent rules (best-effort)
1001
+ try {
1002
+ const configCompletion = await completeNodeConfiguration(client, {
1003
+ workspaceID: params.workspaceID,
1004
+ nodeID: created.id,
1005
+ repoPath: params.repoPath,
1006
+ });
1007
+ return {
1008
+ node: configCompletion.node,
1009
+ validation,
1010
+ nextSteps,
1011
+ configCompletion,
1012
+ ...(nodeTypeValidation ? { nodeTypeValidation } : {}),
1013
+ };
1014
+ }
1015
+ catch {
1016
+ return {
1017
+ node: finalNode,
1018
+ validation,
1019
+ nextSteps,
1020
+ configCompletionSkipped: "Config completion failed — call complete-node-configuration with repoPath after creation to apply node type config and column-level attributes.",
1021
+ ...(nodeTypeValidation ? { nodeTypeValidation } : {}),
1022
+ };
1023
+ }
1024
+ }
1025
+ function inferNodeTypeFamily(nodeType) {
1026
+ const normalized = nodeType.toLowerCase().replace(/.*:::/, "");
1027
+ if (/dimension|(?:^|[-_])dim(?:$|[-_])/.test(normalized))
1028
+ return "dimension";
1029
+ if (/fact|(?:^|[-_])fct(?:$|[-_])/.test(normalized))
1030
+ return "fact";
1031
+ if (/(?:^|[-_])hub(?:$|[-_])/.test(normalized))
1032
+ return "hub";
1033
+ if (/satellite|(?:^|[-_])sat(?:$|[-_])/.test(normalized))
1034
+ return "satellite";
1035
+ if (/(?:^|[-_])link(?:$|[-_])/.test(normalized))
1036
+ return "link";
1037
+ if (/(?:^|[-_])view(?:$|[-_])/.test(normalized))
1038
+ return "view";
1039
+ if (/(?:^|[-_])work(?:$|[-_])/.test(normalized))
1040
+ return "work";
1041
+ if (/stage|(?:^|[-_])stg(?:$|[-_])|persistent/.test(normalized))
1042
+ return "stage";
1043
+ return "stage";
1044
+ }
1045
+ function suggestNamingConvention(family) {
1046
+ const conventions = {
1047
+ stage: "STG_<SOURCE_NAME> (e.g., STG_CUSTOMERS, STG_ORDERS)",
1048
+ dimension: "DIM_<ENTITY> (e.g., DIM_CUSTOMER, DIM_PRODUCT)",
1049
+ fact: "FACT_<BUSINESS_PROCESS> or FCT_<BUSINESS_PROCESS> (e.g., FACT_SALES, FACT_CLV)",
1050
+ view: "V_<PURPOSE> or INT_<PURPOSE> (e.g., V_CUSTOMER_ORDERS)",
1051
+ work: "INT_<PURPOSE> or WRK_<PURPOSE> (e.g., INT_ORDER_ENRICHMENT)",
1052
+ hub: "HUB_<BUSINESS_KEY> (e.g., HUB_CUSTOMER)",
1053
+ satellite: "SAT_<HUB>_<CONTEXT> (e.g., SAT_CUSTOMER_DETAILS)",
1054
+ link: "LNK_<RELATIONSHIP> (e.g., LNK_CUSTOMER_ORDER)",
1055
+ };
1056
+ return conventions[family] ?? "Use a descriptive, layer-appropriate name";
1057
+ }
1058
+ function buildPostCreationNextSteps(predecessorCount, nodeType, joinSuggestions, node) {
1059
+ const steps = [];
1060
+ const family = inferNodeTypeFamily(nodeType);
1061
+ // Naming convention
1062
+ const currentName = typeof node.name === "string" ? node.name : "";
1063
+ if (!currentName || currentName === nodeType || /^[A-Z]+_\d+$/.test(currentName)) {
1064
+ steps.push(`Name this node following conventions: ${suggestNamingConvention(family)}`);
1065
+ }
1066
+ // Multi-predecessor: join setup is REQUIRED
1067
+ if (predecessorCount > 1) {
1068
+ const hasCommonColumns = joinSuggestions.some((s) => s.commonColumns.length > 0);
1069
+ steps.push("REQUIRED: Set up the join condition. This multi-predecessor node needs a FROM/JOIN/ON clause in the joinCondition. " +
1070
+ "Review joinSuggestions above to identify join columns, then either:" +
1071
+ "\n - Call convert-join-to-aggregation (for GROUP BY / aggregation use cases)" +
1072
+ "\n - Call apply-join-condition (for row-level joins — auto-generates FROM/JOIN/ON with {{ ref() }} syntax)");
1073
+ if (hasCommonColumns) {
1074
+ steps.push("Verify join columns: joinSuggestions shows common column names between predecessors. " +
1075
+ "Confirm these are the correct join keys (business keys, not surrogate keys). " +
1076
+ "Choose the right join type: INNER JOIN (only matching rows), LEFT JOIN (keep all rows from first table), " +
1077
+ "FULL OUTER JOIN (keep all rows from both).");
1078
+ }
1079
+ else {
1080
+ steps.push("WARNING: No common columns found between predecessors. You may need a cross join, " +
1081
+ "or the join columns have different names. Verify the correct join keys with the user.");
1082
+ }
1083
+ }
1084
+ // Family-specific guidance
1085
+ if (family === "fact" || family === "dimension") {
1086
+ steps.push(`Verify materialization: ${family === "fact" ? "Fact" : "Dimension"} nodes should typically materialize as tables, not views. ` +
1087
+ "Check that materializationType is 'table' in the config.");
1088
+ if (family === "fact" && predecessorCount > 1) {
1089
+ steps.push("For fact tables: define the grain (the set of columns that uniquely identify each row). " +
1090
+ "These grain columns become your GROUP BY columns in convert-join-to-aggregation. " +
1091
+ "Mark them as isBusinessKey = true.");
1092
+ }
1093
+ if (family === "dimension") {
1094
+ steps.push("For dimensions: identify the business key (natural key from the source system) and mark it isBusinessKey = true. " +
1095
+ "If this is a slowly changing dimension (SCD Type 2), ensure START_DATE/END_DATE/IS_CURRENT columns exist.");
1096
+ }
1097
+ }
1098
+ // Single predecessor: simpler guidance
1099
+ if (predecessorCount === 1) {
1100
+ steps.push("Review auto-populated columns. Remove columns you don't need and add transforms where appropriate. " +
1101
+ "Columns without transforms are pass-throughs (inherited as-is from the predecessor).");
1102
+ }
1103
+ // Verification
1104
+ steps.push("Verify the node: call get-workspace-node to confirm columns, config, and join condition are correct before proceeding to downstream nodes.");
1105
+ return steps;
1106
+ }
1107
+ export async function createWorkspaceNodeFromPredecessor(client, params) {
1108
+ assertNotSourceNodeType(params.nodeType);
1109
+ if (params.changes) {
1110
+ assertNoSqlOverridePayload(params.changes, "create-workspace-node-from-predecessor changes");
1111
+ }
1112
+ // Validate mutually exclusive params
1113
+ if (params.columns && (params.groupByColumns || params.aggregates)) {
1114
+ throw new Error("Cannot provide both 'columns' and 'groupByColumns'/'aggregates'. " +
1115
+ "Use 'columns' for column replacement, or 'groupByColumns'+'aggregates' for aggregation.");
1116
+ }
1117
+ if (params.aggregates && !params.groupByColumns) {
1118
+ throw new Error("'aggregates' requires 'groupByColumns' to be provided.");
1119
+ }
1120
+ if (params.groupByColumns && !params.aggregates) {
1121
+ throw new Error("'groupByColumns' requires 'aggregates' to be provided.");
1122
+ }
1123
+ if (params.whereCondition && params.groupByColumns) {
1124
+ throw new Error("'whereCondition' cannot be combined with 'groupByColumns'/'aggregates'. " +
1125
+ "For aggregation nodes, WHERE/HAVING filters should be applied via a separate update-workspace-node call.");
1126
+ }
1127
+ // Validate node type choice and fetch predecessors in parallel
1128
+ // validateNodeTypeChoice throws if the type is excluded (e.g., inputMode: 'sql')
1129
+ // or if a specialized pattern is detected without matching context
1130
+ const [nodeTypeValidation, predecessorNodes] = await Promise.all([
1131
+ validateNodeTypeChoice(client, {
1132
+ workspaceID: params.workspaceID,
1133
+ nodeType: params.nodeType,
1134
+ predecessorCount: params.predecessorNodeIDs.length,
1135
+ repoPath: params.repoPath,
1136
+ goal: params.goal,
1137
+ }),
1138
+ Promise.all(params.predecessorNodeIDs.map(async (nodeID) => {
1139
+ const predecessor = await getWorkspaceNode(client, {
1140
+ workspaceID: params.workspaceID,
1141
+ nodeID,
1142
+ });
1143
+ if (!isPlainObject(predecessor)) {
1144
+ throw new Error(`Predecessor node response was not an object for nodeID ${nodeID}`);
1145
+ }
1146
+ return buildPredecessorSummary(nodeID, predecessor);
1147
+ })),
1148
+ ]);
1149
+ const joinSuggestions = buildJoinSuggestions(predecessorNodes);
1150
+ const created = await createWorkspaceNode(client, {
1151
+ workspaceID: params.workspaceID,
1152
+ nodeType: params.nodeType,
1153
+ predecessorNodeIDs: params.predecessorNodeIDs,
1154
+ });
1155
+ if (!isPlainObject(created) || typeof created.id !== "string") {
1156
+ throw new Error("Workspace node creation did not return a node ID");
1157
+ }
1158
+ const createdNode = await getWorkspaceNode(client, {
1159
+ workspaceID: params.workspaceID,
1160
+ nodeID: created.id,
1161
+ });
1162
+ if (!isPlainObject(createdNode)) {
1163
+ throw new Error("Created workspace node response was not an object");
1164
+ }
1165
+ const referencedPredecessorNodeIDs = getReferencedPredecessorNodeIDs(createdNode, params.predecessorNodeIDs);
1166
+ const allPredecessorsRepresented = referencedPredecessorNodeIDs.length === params.predecessorNodeIDs.length;
1167
+ const autoPopulatedColumns = getNodeColumnCount(createdNode) > 0 &&
1168
+ (params.predecessorNodeIDs.length === 1
1169
+ ? referencedPredecessorNodeIDs.length > 0
1170
+ : allPredecessorsRepresented);
1171
+ const validation = {
1172
+ autoPopulatedColumns,
1173
+ allPredecessorsRepresented,
1174
+ columnCount: getNodeColumnCount(createdNode),
1175
+ dependencyCount: getNodeDependencyNames(createdNode).length,
1176
+ dependencyNames: getNodeDependencyNames(createdNode),
1177
+ predecessorNodeIDs: params.predecessorNodeIDs,
1178
+ referencedPredecessorNodeIDs,
1179
+ };
1180
+ // Build context-aware next steps for multi-predecessor nodes
1181
+ const nextSteps = buildPostCreationNextSteps(params.predecessorNodeIDs.length, params.nodeType, joinSuggestions, createdNode);
1182
+ if (!validation.autoPopulatedColumns) {
1183
+ const warning = params.predecessorNodeIDs.length > 1
1184
+ ? "Workspace node was created from predecessor(s), but columns were not auto-populated from all requested predecessors. Review the suggested join columns and verify the node in Coalesce before proceeding."
1185
+ : "Workspace node was created from predecessor(s), but columns were not auto-populated. Verify the node in Coalesce before proceeding.";
1186
+ return {
1187
+ node: createdNode,
1188
+ predecessors: predecessorNodes,
1189
+ joinSuggestions,
1190
+ validation,
1191
+ warning,
1192
+ nextSteps,
1193
+ ...(nodeTypeValidation ? { nodeTypeValidation } : {}),
1194
+ };
1195
+ }
1196
+ if (params.changes && Object.keys(params.changes).length > 0) {
1197
+ const body = buildUpdatedWorkspaceNodeBody(createdNode, params.changes);
1198
+ await setWorkspaceNode(client, {
1199
+ workspaceID: params.workspaceID,
1200
+ nodeID: created.id,
1201
+ body,
1202
+ });
1203
+ }
1204
+ // Single-call aggregation path: create + replace columns + write joinCondition + config completion
1205
+ // convertJoinToAggregation handles all of this internally, including config completion.
1206
+ if (params.groupByColumns && params.aggregates) {
1207
+ const aggResult = await convertJoinToAggregation(client, {
1208
+ workspaceID: params.workspaceID,
1209
+ nodeID: created.id,
1210
+ groupByColumns: params.groupByColumns,
1211
+ aggregates: params.aggregates,
1212
+ joinType: params.joinType,
1213
+ repoPath: params.repoPath,
1214
+ });
1215
+ return {
1216
+ node: aggResult.node,
1217
+ predecessors: predecessorNodes,
1218
+ joinSuggestions,
1219
+ validation,
1220
+ joinSQL: aggResult.joinSQL,
1221
+ groupByAnalysis: aggResult.groupByAnalysis,
1222
+ aggregationValidation: aggResult.validation,
1223
+ ...(aggResult.configCompletion ? { configCompletion: aggResult.configCompletion } : {}),
1224
+ ...(aggResult.configCompletionSkipped ? { configCompletionSkipped: aggResult.configCompletionSkipped } : {}),
1225
+ nextSteps,
1226
+ ...(nodeTypeValidation ? { nodeTypeValidation } : {}),
1227
+ };
1228
+ }
1229
+ // Single-call column replacement path: create + replace columns + WHERE + config completion
1230
+ if (params.columns) {
1231
+ await replaceWorkspaceNodeColumns(client, {
1232
+ workspaceID: params.workspaceID,
1233
+ nodeID: created.id,
1234
+ columns: params.columns,
1235
+ whereCondition: params.whereCondition,
1236
+ });
1237
+ }
1238
+ // Automatically complete node configuration using intelligent rules (best-effort)
1239
+ try {
1240
+ const configCompletion = await completeNodeConfiguration(client, {
1241
+ workspaceID: params.workspaceID,
1242
+ nodeID: created.id,
1243
+ repoPath: params.repoPath,
1244
+ });
1245
+ return {
1246
+ node: configCompletion.node,
1247
+ predecessors: predecessorNodes,
1248
+ joinSuggestions,
1249
+ validation,
1250
+ configCompletion,
1251
+ nextSteps,
1252
+ ...(nodeTypeValidation ? { nodeTypeValidation } : {}),
1253
+ };
1254
+ }
1255
+ catch {
1256
+ // Re-fetch the node after any changes were applied
1257
+ const hasChanges = (params.changes && Object.keys(params.changes).length > 0) || params.columns;
1258
+ const latestNode = hasChanges
1259
+ ? await getWorkspaceNode(client, { workspaceID: params.workspaceID, nodeID: created.id })
1260
+ : createdNode;
1261
+ return {
1262
+ node: latestNode,
1263
+ predecessors: predecessorNodes,
1264
+ joinSuggestions,
1265
+ validation,
1266
+ configCompletionSkipped: "Config completion failed — call complete-node-configuration with repoPath after creation to apply node type config and column-level attributes.",
1267
+ nextSteps,
1268
+ ...(nodeTypeValidation ? { nodeTypeValidation } : {}),
1269
+ };
1270
+ }
1271
+ }
1272
+ /**
1273
+ * Extract predecessor node IDs from a node's sourceMapping aliases
1274
+ * and column-level source references (fallback).
1275
+ *
1276
+ * In Coalesce, sourceMapping.dependencies[] has nodeName/locationName but NOT nodeID.
1277
+ * The nodeID is available in sourceMapping.aliases (name→nodeID map) and in
1278
+ * column sources[].columnReferences[].nodeID.
1279
+ */
1280
+ function extractPredecessorNodeIDs(metadata) {
1281
+ const sourceMapping = Array.isArray(metadata.sourceMapping)
1282
+ ? metadata.sourceMapping
1283
+ : [];
1284
+ const ids = new Set();
1285
+ // First: extract from aliases (alias → nodeID map)
1286
+ for (const mapping of sourceMapping) {
1287
+ if (isPlainObject(mapping) && isPlainObject(mapping.aliases)) {
1288
+ for (const nodeID of Object.values(mapping.aliases)) {
1289
+ if (typeof nodeID === "string" && nodeID.length > 0) {
1290
+ ids.add(nodeID);
1291
+ }
1292
+ }
1293
+ }
1294
+ }
1295
+ // Second: extract from column-level source references as fallback
1296
+ if (ids.size === 0 && Array.isArray(metadata.columns)) {
1297
+ for (const column of metadata.columns) {
1298
+ if (!isPlainObject(column) || !Array.isArray(column.sources))
1299
+ continue;
1300
+ for (const source of column.sources) {
1301
+ if (!isPlainObject(source) || !Array.isArray(source.columnReferences))
1302
+ continue;
1303
+ for (const ref of source.columnReferences) {
1304
+ if (isPlainObject(ref) && typeof ref.nodeID === "string") {
1305
+ ids.add(ref.nodeID);
1306
+ }
1307
+ }
1308
+ }
1309
+ }
1310
+ }
1311
+ return Array.from(ids);
1312
+ }
1313
+ export async function convertJoinToAggregation(client, params) {
1314
+ // Get the current node to analyze predecessors
1315
+ const current = await getWorkspaceNode(client, {
1316
+ workspaceID: params.workspaceID,
1317
+ nodeID: params.nodeID,
1318
+ });
1319
+ if (!isPlainObject(current)) {
1320
+ throw new Error("Node response was not an object");
1321
+ }
1322
+ const metadata = isPlainObject(current.metadata) ? current.metadata : {};
1323
+ const predecessorNodeIDs = extractPredecessorNodeIDs(metadata);
1324
+ // Fetch predecessor nodes to build join suggestions and ref info
1325
+ const predecessorNodes = [];
1326
+ const predecessorRefInfos = [];
1327
+ const shouldMaintainJoins = params.maintainJoins !== false; // default true
1328
+ if (shouldMaintainJoins && predecessorNodeIDs.length > 0) {
1329
+ const fetched = await Promise.all(predecessorNodeIDs.map(async (nodeID) => ({
1330
+ nodeID,
1331
+ node: await getWorkspaceNode(client, { workspaceID: params.workspaceID, nodeID }),
1332
+ })));
1333
+ for (const { nodeID, node: predecessor } of fetched) {
1334
+ if (isPlainObject(predecessor)) {
1335
+ predecessorNodes.push(buildPredecessorSummary(nodeID, predecessor));
1336
+ const refInfo = extractPredecessorRefInfo(nodeID, predecessor);
1337
+ if (refInfo) {
1338
+ predecessorRefInfos.push(refInfo);
1339
+ }
1340
+ }
1341
+ }
1342
+ }
1343
+ // Generate JOIN SQL with {{ ref() }} syntax if maintaining joins
1344
+ const joinSuggestions = buildJoinSuggestions(predecessorNodes);
1345
+ let joinSQL;
1346
+ if (predecessorRefInfos.length >= 2) {
1347
+ joinSQL = generateRefJoinSQL(predecessorRefInfos, joinSuggestions, params.joinType || "INNER JOIN");
1348
+ }
1349
+ else {
1350
+ // Fallback: predecessors missing locationName — use bare-name join SQL
1351
+ const bareJoin = generateJoinSQL(joinSuggestions, params.joinType || "INNER JOIN");
1352
+ joinSQL = {
1353
+ fromClause: bareJoin.fromClause,
1354
+ joinClauses: bareJoin.joinClauses.map((jc) => `${jc.type} ${jc.rightTableAlias}\n ON ${jc.onConditions.join("\n AND ")}`),
1355
+ fullSQL: bareJoin.fullSQL,
1356
+ warnings: predecessorNodeIDs.length >= 2
1357
+ ? ["Predecessors are missing locationName — generated join uses bare table names instead of {{ ref() }} syntax. Set locationName on predecessor nodes for proper Coalesce references."]
1358
+ : [],
1359
+ };
1360
+ }
1361
+ // Build a lookup map of existing column datatypes from the current node
1362
+ // so GROUP BY pass-through columns can inherit their predecessor's dataType
1363
+ const existingColumns = Array.isArray(metadata.columns) ? metadata.columns : [];
1364
+ const existingDataTypeByName = new Map();
1365
+ for (const col of existingColumns) {
1366
+ if (isPlainObject(col) && typeof col.name === "string" && typeof col.dataType === "string") {
1367
+ existingDataTypeByName.set(normalizeColumnName(col.name), col.dataType);
1368
+ }
1369
+ }
1370
+ // Build columns: group by columns + aggregates
1371
+ const columns = [];
1372
+ // Add GROUP BY columns
1373
+ for (const groupByCol of params.groupByColumns) {
1374
+ const colName = groupByCol.split(".").pop()?.replace(/"/g, "") || groupByCol;
1375
+ const inferredDatatype = inferDatatype(groupByCol)
1376
+ ?? existingDataTypeByName.get(normalizeColumnName(colName))
1377
+ ?? "VARCHAR";
1378
+ columns.push({
1379
+ name: colName,
1380
+ transform: groupByCol,
1381
+ dataType: inferredDatatype,
1382
+ });
1383
+ }
1384
+ // Add aggregate columns
1385
+ for (const agg of params.aggregates) {
1386
+ const transform = `${agg.function}(${agg.expression})`;
1387
+ const inferredDatatype = inferDatatype(transform) ?? "VARCHAR";
1388
+ columns.push({
1389
+ name: agg.name,
1390
+ transform,
1391
+ dataType: inferredDatatype,
1392
+ description: agg.description,
1393
+ });
1394
+ }
1395
+ // Analyze GROUP BY requirements
1396
+ const groupByAnalysis = analyzeColumnsForGroupBy(columns);
1397
+ const warnings = [];
1398
+ if (!groupByAnalysis.validation.valid) {
1399
+ warnings.push(...groupByAnalysis.validation.errors);
1400
+ }
1401
+ // Derive business key and change tracking column names
1402
+ // Business key = GROUP BY columns (dimensions)
1403
+ // Change tracking = aggregate columns (measures that change over time)
1404
+ const businessKeyColumnNames = new Set(params.groupByColumns.map((col) => col.split(".").pop()?.replace(/"/g, "") || col));
1405
+ const changeTrackingColumnNames = new Set(params.aggregates.map((agg) => agg.name));
1406
+ // Convert columns to metadata format with column-level attributes
1407
+ // columnSelector attributes (isBusinessKey, isChangeTracking) are set directly
1408
+ // on each column object — this is how Coalesce node type definitions work
1409
+ const metadataColumns = columns.map((col) => {
1410
+ const metadataCol = {
1411
+ name: col.name,
1412
+ dataType: col.dataType ?? "VARCHAR",
1413
+ transform: col.transform,
1414
+ nullable: true,
1415
+ };
1416
+ if (col.description) {
1417
+ metadataCol.description = col.description;
1418
+ }
1419
+ if (businessKeyColumnNames.has(col.name)) {
1420
+ metadataCol.isBusinessKey = true;
1421
+ }
1422
+ if (changeTrackingColumnNames.has(col.name)) {
1423
+ metadataCol.isChangeTracking = true;
1424
+ }
1425
+ return metadataCol;
1426
+ });
1427
+ // Replace columns with aggregation columns
1428
+ const updated = await replaceWorkspaceNodeColumns(client, {
1429
+ workspaceID: params.workspaceID,
1430
+ nodeID: params.nodeID,
1431
+ columns: metadataColumns,
1432
+ });
1433
+ // Write the generated JOIN SQL and/or GROUP BY directly to the node's sourceMapping.
1434
+ // Re-fetch the node to get fresh sourceMapping after column replacement.
1435
+ const hasJoinSQL = joinSQL.fullSQL.length > 0;
1436
+ const hasGroupBy = groupByAnalysis.groupByColumns.length > 0;
1437
+ if (hasJoinSQL || hasGroupBy) {
1438
+ const freshNode = await getWorkspaceNode(client, {
1439
+ workspaceID: params.workspaceID,
1440
+ nodeID: params.nodeID,
1441
+ });
1442
+ const freshMetadata = isPlainObject(freshNode) && isPlainObject(freshNode.metadata)
1443
+ ? freshNode.metadata
1444
+ : {};
1445
+ const freshSourceMapping = Array.isArray(freshMetadata.sourceMapping)
1446
+ ? freshMetadata.sourceMapping
1447
+ : [];
1448
+ const firstEntry = freshSourceMapping.find(isPlainObject);
1449
+ if (!firstEntry) {
1450
+ joinSQL.warnings.push("Could not write joinCondition — node has no sourceMapping entries. " +
1451
+ "The generated SQL is returned but was not persisted to the node.");
1452
+ }
1453
+ else {
1454
+ const existingJoin = isPlainObject(firstEntry.join) ? firstEntry.join : {};
1455
+ const existingJoinCondition = typeof existingJoin.joinCondition === "string"
1456
+ ? existingJoin.joinCondition.trim()
1457
+ : "";
1458
+ let fullJoinCondition;
1459
+ if (hasJoinSQL) {
1460
+ // Multi-predecessor: use generated FROM/JOIN + GROUP BY
1461
+ const groupByClause = hasGroupBy
1462
+ ? `\nGROUP BY ${groupByAnalysis.groupByColumns.join(", ")}`
1463
+ : "";
1464
+ fullJoinCondition = joinSQL.fullSQL + groupByClause;
1465
+ }
1466
+ else {
1467
+ // Single-predecessor aggregation: append GROUP BY to existing joinCondition
1468
+ const groupByClause = `\nGROUP BY ${groupByAnalysis.groupByColumns.join(", ")}`;
1469
+ fullJoinCondition = existingJoinCondition.length > 0
1470
+ ? existingJoinCondition + groupByClause
1471
+ : groupByClause.trim();
1472
+ }
1473
+ const updatedSourceMapping = freshSourceMapping.map((entry) => entry === firstEntry
1474
+ ? { ...firstEntry, join: { ...existingJoin, joinCondition: fullJoinCondition } }
1475
+ : entry);
1476
+ await updateWorkspaceNode(client, {
1477
+ workspaceID: params.workspaceID,
1478
+ nodeID: params.nodeID,
1479
+ changes: {
1480
+ metadata: {
1481
+ sourceMapping: updatedSourceMapping,
1482
+ },
1483
+ },
1484
+ });
1485
+ }
1486
+ }
1487
+ // Complete configuration with intelligent rules (best-effort)
1488
+ try {
1489
+ const configCompletion = await completeNodeConfiguration(client, {
1490
+ workspaceID: params.workspaceID,
1491
+ nodeID: params.nodeID,
1492
+ repoPath: params.repoPath,
1493
+ });
1494
+ return {
1495
+ node: configCompletion.node,
1496
+ joinSQL,
1497
+ groupByAnalysis,
1498
+ validation: {
1499
+ valid: groupByAnalysis.validation.valid && warnings.length === 0,
1500
+ warnings,
1501
+ },
1502
+ configCompletion,
1503
+ };
1504
+ }
1505
+ catch {
1506
+ return {
1507
+ node: updated,
1508
+ joinSQL,
1509
+ groupByAnalysis,
1510
+ validation: {
1511
+ valid: groupByAnalysis.validation.valid && warnings.length === 0,
1512
+ warnings,
1513
+ },
1514
+ configCompletionSkipped: "Config completion failed — call complete-node-configuration with repoPath after creation to apply node type config and column-level attributes.",
1515
+ };
1516
+ }
1517
+ }
1518
+ function extractPredecessorRefInfo(nodeID, node) {
1519
+ const nodeName = typeof node.name === "string" ? node.name : null;
1520
+ const locationName = typeof node.locationName === "string" ? node.locationName : null;
1521
+ if (!nodeName || !locationName)
1522
+ return null;
1523
+ return {
1524
+ nodeID,
1525
+ nodeName,
1526
+ locationName,
1527
+ columnNames: getNodeColumnNames(node),
1528
+ };
1529
+ }
1530
+ function generateRefJoinSQL(predecessors, joinSuggestions, joinType, joinColumnOverrides) {
1531
+ if (predecessors.length === 0) {
1532
+ return { fromClause: "", joinClauses: [], fullSQL: "", warnings: [] };
1533
+ }
1534
+ const warnings = [];
1535
+ const primary = predecessors[0];
1536
+ const fromClause = `FROM {{ ref('${primary.locationName}', '${primary.nodeName}') }} "${primary.nodeName}"`;
1537
+ const joinClauses = [];
1538
+ // Build a lookup from nodeID → PredecessorRefInfo
1539
+ const predByID = new Map(predecessors.map((p) => [p.nodeID, p]));
1540
+ const predByName = new Map(predecessors.map((p) => [p.nodeName.toUpperCase(), p]));
1541
+ // Track which predecessors got joined
1542
+ const joinedPredecessors = new Set([primary.nodeID]);
1543
+ for (const suggestion of joinSuggestions) {
1544
+ const right = predByID.get(suggestion.rightPredecessorNodeID)
1545
+ ?? predByName.get((suggestion.rightPredecessorName ?? "").toUpperCase());
1546
+ if (!right)
1547
+ continue;
1548
+ if (joinedPredecessors.has(right.nodeID))
1549
+ continue; // Already joined — skip duplicate pair
1550
+ joinedPredecessors.add(right.nodeID);
1551
+ // Check for explicit overrides for this pair
1552
+ const overridesForPair = joinColumnOverrides?.filter((o) => (o.leftPredecessor === suggestion.leftPredecessorName ||
1553
+ o.leftPredecessor === suggestion.leftPredecessorNodeID) &&
1554
+ (o.rightPredecessor === suggestion.rightPredecessorName ||
1555
+ o.rightPredecessor === suggestion.rightPredecessorNodeID));
1556
+ let onConditions;
1557
+ if (overridesForPair && overridesForPair.length > 0) {
1558
+ onConditions = overridesForPair.map((o) => `"${suggestion.leftPredecessorName}"."${o.leftColumn}" = "${right.nodeName}"."${o.rightColumn}"`);
1559
+ }
1560
+ else if (suggestion.commonColumns.length > 0) {
1561
+ onConditions = suggestion.commonColumns.map((col) => `"${suggestion.leftPredecessorName}"."${col.leftColumnName}" = "${right.nodeName}"."${col.rightColumnName}"`);
1562
+ }
1563
+ else {
1564
+ warnings.push(`No common columns between "${suggestion.leftPredecessorName}" and "${right.nodeName}". ` +
1565
+ `Provide joinColumnOverrides to specify the join keys explicitly.`);
1566
+ continue;
1567
+ }
1568
+ const clause = `${joinType} {{ ref('${right.locationName}', '${right.nodeName}') }} "${right.nodeName}"\n ON ${onConditions.join("\n AND ")}`;
1569
+ joinClauses.push(clause);
1570
+ }
1571
+ // Warn about predecessors that weren't joined
1572
+ for (const pred of predecessors) {
1573
+ if (!joinedPredecessors.has(pred.nodeID)) {
1574
+ warnings.push(`Predecessor "${pred.nodeName}" was not included in any join. ` +
1575
+ `It has no common columns with other predecessors. Provide joinColumnOverrides to specify the join keys.`);
1576
+ }
1577
+ }
1578
+ const fullSQL = [fromClause, ...joinClauses].join("\n");
1579
+ return { fromClause, joinClauses, fullSQL, warnings };
1580
+ }
1581
+ export async function applyJoinCondition(client, params) {
1582
+ const joinType = params.joinType ?? "INNER JOIN";
1583
+ // Fetch the node to extract predecessors
1584
+ const current = await getWorkspaceNode(client, {
1585
+ workspaceID: params.workspaceID,
1586
+ nodeID: params.nodeID,
1587
+ });
1588
+ if (!isPlainObject(current)) {
1589
+ throw new Error("Node response was not an object");
1590
+ }
1591
+ const metadata = isPlainObject(current.metadata) ? current.metadata : {};
1592
+ const sourceMapping = Array.isArray(metadata.sourceMapping) ? metadata.sourceMapping : [];
1593
+ const predecessorNodeIDs = extractPredecessorNodeIDs(metadata);
1594
+ if (predecessorNodeIDs.length < 2) {
1595
+ throw new Error("apply-join-condition requires a node with 2+ predecessors. " +
1596
+ "This node has " + predecessorNodeIDs.length + " predecessor(s). " +
1597
+ "For single-predecessor nodes, set the joinCondition directly via update-workspace-node.");
1598
+ }
1599
+ // Fetch all predecessors in parallel to get their names, locationNames, and columns
1600
+ const predecessorRefInfos = [];
1601
+ const predecessorSummaries = [];
1602
+ const warnings = [];
1603
+ const fetchedPredecessors = await Promise.all(predecessorNodeIDs.map(async (nodeID) => ({
1604
+ nodeID,
1605
+ node: await getWorkspaceNode(client, { workspaceID: params.workspaceID, nodeID }),
1606
+ })));
1607
+ for (const { nodeID, node: predecessor } of fetchedPredecessors) {
1608
+ if (!isPlainObject(predecessor)) {
1609
+ warnings.push(`Could not fetch predecessor ${nodeID}`);
1610
+ continue;
1611
+ }
1612
+ const refInfo = extractPredecessorRefInfo(nodeID, predecessor);
1613
+ if (!refInfo) {
1614
+ const name = typeof predecessor.name === "string" ? predecessor.name : nodeID;
1615
+ warnings.push(`Predecessor "${name}" is missing locationName. ` +
1616
+ `Set it in the Coalesce UI or via update-workspace-node before applying joins.`);
1617
+ continue;
1618
+ }
1619
+ predecessorRefInfos.push(refInfo);
1620
+ predecessorSummaries.push(buildPredecessorSummary(nodeID, predecessor));
1621
+ }
1622
+ if (predecessorRefInfos.length < 2) {
1623
+ throw new Error("Could not resolve 2+ predecessors with valid name and locationName. " +
1624
+ "Ensure all predecessor nodes have a locationName set.");
1625
+ }
1626
+ // Build join suggestions from common columns
1627
+ const joinSuggestions = buildJoinSuggestions(predecessorSummaries);
1628
+ // Generate FROM/JOIN/ON with {{ ref() }} syntax
1629
+ const joinResult = generateRefJoinSQL(predecessorRefInfos, joinSuggestions, joinType, params.joinColumnOverrides);
1630
+ warnings.push(...joinResult.warnings);
1631
+ // Build full joinCondition: FROM/JOIN + WHERE + QUALIFY
1632
+ const parts = [joinResult.fullSQL];
1633
+ if (params.whereClause) {
1634
+ const trimmedWhere = params.whereClause.trim();
1635
+ const whereStr = /^where\b/i.test(trimmedWhere)
1636
+ ? trimmedWhere
1637
+ : `WHERE ${trimmedWhere}`;
1638
+ parts.push(whereStr);
1639
+ }
1640
+ if (params.qualifyClause) {
1641
+ const trimmedQualify = params.qualifyClause.trim();
1642
+ const qualifyStr = /^qualify\b/i.test(trimmedQualify)
1643
+ ? trimmedQualify
1644
+ : `QUALIFY ${trimmedQualify}`;
1645
+ parts.push(qualifyStr);
1646
+ }
1647
+ const fullJoinCondition = parts.join("\n");
1648
+ // Write to node's sourceMapping
1649
+ const firstEntry = sourceMapping.find(isPlainObject);
1650
+ if (!firstEntry) {
1651
+ warnings.push("Could not write joinCondition — node has no sourceMapping entries. " +
1652
+ "The generated SQL is returned but was not persisted to the node.");
1653
+ }
1654
+ else {
1655
+ const updatedSourceMapping = sourceMapping.map((entry) => entry === firstEntry
1656
+ ? {
1657
+ ...firstEntry,
1658
+ join: {
1659
+ ...(isPlainObject(firstEntry.join) ? firstEntry.join : {}),
1660
+ joinCondition: fullJoinCondition,
1661
+ },
1662
+ }
1663
+ : entry);
1664
+ await updateWorkspaceNode(client, {
1665
+ workspaceID: params.workspaceID,
1666
+ nodeID: params.nodeID,
1667
+ changes: {
1668
+ metadata: {
1669
+ sourceMapping: updatedSourceMapping,
1670
+ },
1671
+ },
1672
+ });
1673
+ }
1674
+ return {
1675
+ joinCondition: fullJoinCondition,
1676
+ joinSuggestions,
1677
+ predecessors: predecessorRefInfos.map((p) => ({
1678
+ nodeID: p.nodeID,
1679
+ nodeName: p.nodeName,
1680
+ locationName: p.locationName,
1681
+ columnCount: p.columnNames.length,
1682
+ })),
1683
+ warnings,
1684
+ };
1685
+ }
1686
+ /**
1687
+ * Returns the distinct node types observed in existing workspace nodes.
1688
+ * This is intentionally observation-based and should not be treated as a true
1689
+ * installed-type registry for the workspace.
1690
+ */
1691
+ export async function listWorkspaceNodeTypes(client, params) {
1692
+ const { workspaceID } = params;
1693
+ const nodes = await fetchAllWorkspaceNodes(client, {
1694
+ workspaceID,
1695
+ detail: false,
1696
+ });
1697
+ const data = nodes.items;
1698
+ const counts = {};
1699
+ let total = 0;
1700
+ for (const node of data) {
1701
+ if (!isPlainObject(node)) {
1702
+ continue;
1703
+ }
1704
+ const nodeType = node.nodeType;
1705
+ if (typeof nodeType === 'string' && nodeType.length > 0) {
1706
+ counts[nodeType] = (counts[nodeType] ?? 0) + 1;
1707
+ total++;
1708
+ }
1709
+ }
1710
+ const nodeTypes = Object.keys(counts).sort((a, b) => counts[b] - counts[a]);
1711
+ return {
1712
+ workspaceID,
1713
+ basis: "observed_nodes",
1714
+ nodeTypes,
1715
+ counts,
1716
+ total
1717
+ };
1718
+ }