coalesce-transform-mcp 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +304 -0
  3. package/dist/cache-dir.d.ts +26 -0
  4. package/dist/cache-dir.js +106 -0
  5. package/dist/client.d.ts +25 -0
  6. package/dist/client.js +212 -0
  7. package/dist/coalesce/api/environments.d.ts +20 -0
  8. package/dist/coalesce/api/environments.js +15 -0
  9. package/dist/coalesce/api/git-accounts.d.ts +21 -0
  10. package/dist/coalesce/api/git-accounts.js +21 -0
  11. package/dist/coalesce/api/jobs.d.ts +25 -0
  12. package/dist/coalesce/api/jobs.js +21 -0
  13. package/dist/coalesce/api/nodes.d.ts +29 -0
  14. package/dist/coalesce/api/nodes.js +33 -0
  15. package/dist/coalesce/api/projects.d.ts +22 -0
  16. package/dist/coalesce/api/projects.js +25 -0
  17. package/dist/coalesce/api/runs.d.ts +19 -0
  18. package/dist/coalesce/api/runs.js +34 -0
  19. package/dist/coalesce/api/subgraphs.d.ts +20 -0
  20. package/dist/coalesce/api/subgraphs.js +17 -0
  21. package/dist/coalesce/api/users.d.ts +30 -0
  22. package/dist/coalesce/api/users.js +31 -0
  23. package/dist/coalesce/types.d.ts +298 -0
  24. package/dist/coalesce/types.js +746 -0
  25. package/dist/generated/.gitkeep +0 -0
  26. package/dist/generated/node-type-corpus.json +42656 -0
  27. package/dist/index.d.ts +2 -0
  28. package/dist/index.js +10 -0
  29. package/dist/mcp/cache.d.ts +3 -0
  30. package/dist/mcp/cache.js +137 -0
  31. package/dist/mcp/environments.d.ts +3 -0
  32. package/dist/mcp/environments.js +61 -0
  33. package/dist/mcp/git-accounts.d.ts +3 -0
  34. package/dist/mcp/git-accounts.js +70 -0
  35. package/dist/mcp/jobs.d.ts +3 -0
  36. package/dist/mcp/jobs.js +77 -0
  37. package/dist/mcp/node-type-corpus.d.ts +3 -0
  38. package/dist/mcp/node-type-corpus.js +173 -0
  39. package/dist/mcp/nodes.d.ts +3 -0
  40. package/dist/mcp/nodes.js +341 -0
  41. package/dist/mcp/pipelines.d.ts +3 -0
  42. package/dist/mcp/pipelines.js +342 -0
  43. package/dist/mcp/projects.d.ts +3 -0
  44. package/dist/mcp/projects.js +70 -0
  45. package/dist/mcp/repo-node-types.d.ts +135 -0
  46. package/dist/mcp/repo-node-types.js +387 -0
  47. package/dist/mcp/runs.d.ts +3 -0
  48. package/dist/mcp/runs.js +92 -0
  49. package/dist/mcp/subgraphs.d.ts +3 -0
  50. package/dist/mcp/subgraphs.js +60 -0
  51. package/dist/mcp/users.d.ts +3 -0
  52. package/dist/mcp/users.js +107 -0
  53. package/dist/prompts/index.d.ts +2 -0
  54. package/dist/prompts/index.js +58 -0
  55. package/dist/resources/context/aggregation-patterns.md +145 -0
  56. package/dist/resources/context/data-engineering-principles.md +183 -0
  57. package/dist/resources/context/hydrated-metadata.md +92 -0
  58. package/dist/resources/context/id-discovery.md +64 -0
  59. package/dist/resources/context/intelligent-node-configuration.md +162 -0
  60. package/dist/resources/context/node-creation-decision-tree.md +156 -0
  61. package/dist/resources/context/node-operations.md +316 -0
  62. package/dist/resources/context/node-payloads.md +114 -0
  63. package/dist/resources/context/node-type-corpus.md +166 -0
  64. package/dist/resources/context/node-type-selection-guide.md +96 -0
  65. package/dist/resources/context/overview.md +135 -0
  66. package/dist/resources/context/pipeline-workflows.md +355 -0
  67. package/dist/resources/context/run-operations.md +55 -0
  68. package/dist/resources/context/sql-bigquery.md +41 -0
  69. package/dist/resources/context/sql-databricks.md +40 -0
  70. package/dist/resources/context/sql-platform-selection.md +70 -0
  71. package/dist/resources/context/sql-snowflake.md +43 -0
  72. package/dist/resources/context/storage-mappings.md +49 -0
  73. package/dist/resources/context/tool-usage.md +98 -0
  74. package/dist/resources/index.d.ts +5 -0
  75. package/dist/resources/index.js +254 -0
  76. package/dist/schemas/node-payloads.d.ts +5019 -0
  77. package/dist/schemas/node-payloads.js +147 -0
  78. package/dist/server.d.ts +7 -0
  79. package/dist/server.js +63 -0
  80. package/dist/services/cache/snapshots.d.ts +108 -0
  81. package/dist/services/cache/snapshots.js +275 -0
  82. package/dist/services/config/context-analyzer.d.ts +14 -0
  83. package/dist/services/config/context-analyzer.js +76 -0
  84. package/dist/services/config/field-classifier.d.ts +23 -0
  85. package/dist/services/config/field-classifier.js +47 -0
  86. package/dist/services/config/intelligent.d.ts +55 -0
  87. package/dist/services/config/intelligent.js +306 -0
  88. package/dist/services/config/rules.d.ts +6 -0
  89. package/dist/services/config/rules.js +44 -0
  90. package/dist/services/config/schema-resolver.d.ts +18 -0
  91. package/dist/services/config/schema-resolver.js +80 -0
  92. package/dist/services/corpus/loader.d.ts +56 -0
  93. package/dist/services/corpus/loader.js +25 -0
  94. package/dist/services/corpus/search.d.ts +49 -0
  95. package/dist/services/corpus/search.js +69 -0
  96. package/dist/services/corpus/templates.d.ts +4 -0
  97. package/dist/services/corpus/templates.js +11 -0
  98. package/dist/services/pipelines/execution.d.ts +20 -0
  99. package/dist/services/pipelines/execution.js +290 -0
  100. package/dist/services/pipelines/node-type-intent.d.ts +96 -0
  101. package/dist/services/pipelines/node-type-intent.js +356 -0
  102. package/dist/services/pipelines/node-type-selection.d.ts +66 -0
  103. package/dist/services/pipelines/node-type-selection.js +758 -0
  104. package/dist/services/pipelines/planning.d.ts +543 -0
  105. package/dist/services/pipelines/planning.js +1839 -0
  106. package/dist/services/policies/sql-override.d.ts +7 -0
  107. package/dist/services/policies/sql-override.js +109 -0
  108. package/dist/services/repo/operations.d.ts +6 -0
  109. package/dist/services/repo/operations.js +10 -0
  110. package/dist/services/repo/parser.d.ts +70 -0
  111. package/dist/services/repo/parser.js +365 -0
  112. package/dist/services/repo/path.d.ts +2 -0
  113. package/dist/services/repo/path.js +58 -0
  114. package/dist/services/templates/nodes.d.ts +50 -0
  115. package/dist/services/templates/nodes.js +336 -0
  116. package/dist/services/workspace/analysis.d.ts +56 -0
  117. package/dist/services/workspace/analysis.js +151 -0
  118. package/dist/services/workspace/mutations.d.ts +150 -0
  119. package/dist/services/workspace/mutations.js +1718 -0
  120. package/dist/utils.d.ts +5 -0
  121. package/dist/utils.js +7 -0
  122. package/dist/workflows/get-environment-overview.d.ts +9 -0
  123. package/dist/workflows/get-environment-overview.js +23 -0
  124. package/dist/workflows/get-run-details.d.ts +10 -0
  125. package/dist/workflows/get-run-details.js +28 -0
  126. package/dist/workflows/progress.d.ts +20 -0
  127. package/dist/workflows/progress.js +54 -0
  128. package/dist/workflows/retry-and-wait.d.ts +13 -0
  129. package/dist/workflows/retry-and-wait.js +139 -0
  130. package/dist/workflows/run-and-wait.d.ts +13 -0
  131. package/dist/workflows/run-and-wait.js +141 -0
  132. package/dist/workflows/run-status.d.ts +10 -0
  133. package/dist/workflows/run-status.js +27 -0
  134. package/package.json +34 -0
@@ -0,0 +1,162 @@
1
+ # Intelligent Node Configuration
2
+
3
+ ## Overview
4
+
5
+ The intelligent node configuration system automatically completes all required and contextual config fields based on node type schemas and node analysis.
6
+
7
+ ## Tools
8
+
9
+ ### `complete-node-configuration`
10
+
11
+ Standalone tool that completes config for any workspace node.
12
+
13
+ **Input:**
14
+ - `workspaceID`: Workspace containing the node
15
+ - `nodeID`: Node to configure
16
+ - `repoPath`: (Optional) Path to committed repo for accurate schema resolution
17
+
18
+ **Process:**
19
+ 1. Fetches current node
20
+ 2. Resolves node type schema (repo → corpus)
21
+ 3. Analyzes node context (columns, sources, joins)
22
+ 4. Classifies config fields (required, conditional, optional, contextual)
23
+ 5. Applies intelligence rules
24
+ 6. Updates node with complete config
25
+
26
+ **Output:**
27
+
28
+ - `node`: Updated node with complete config
29
+ - `configChanges`: What was changed (required, contextual, defaults, preserved)
30
+ - `configReview`: Status summary of the node's configuration
31
+ - `status`: `complete` | `needs_attention` | `incomplete`
32
+ - `summary`: Human-readable status description
33
+ - `missingRequired`: Required fields or columnSelectors still unset
34
+ - `warnings`: Issues needing manual review (e.g., missing business keys on dimension/fact nodes)
35
+ - `suggestions`: Optional improvements (e.g., change tracking, materialization changes)
36
+ - `columnAttributeChanges`: Column-level attributes applied (isBusinessKey, isChangeTracking)
37
+ - `reasoning`: Why each decision was made
38
+
39
+ ### `convert-join-to-aggregation`
40
+
41
+ Enhanced with automatic config completion. After transformation, automatically calls `complete-node-configuration` to fill all remaining config fields.
42
+
43
+ **New output field:** `configCompletion` with changes and analysis
44
+
45
+ ## Intelligence Rules
46
+
47
+ ### Multi-Source Strategy
48
+
49
+ **Trigger:** Node has multiple sources
50
+
51
+ **Action:**
52
+
53
+ - If aggregates present: `insertStrategy: "UNION"` (deduplication)
54
+ - If no aggregates: `insertStrategy: "UNION ALL"` (performance)
55
+
56
+ ### Aggregation Compatibility
57
+
58
+ **Trigger:** Node has aggregate columns (COUNT, SUM, AVG, etc.)
59
+
60
+ **Action:**
61
+
62
+ - `selectDistinct: false` (incompatible with aggregates)
63
+
64
+ ### View Materialization
65
+
66
+ **Trigger:** Node materialized as a view without aggregates
67
+
68
+ **Action:**
69
+
70
+ - `selectDistinct: false` (default; set to true only if deduplication is needed)
71
+
72
+ ### Table Materialization
73
+
74
+ **Trigger:** Node materialized as a table
75
+
76
+ **Action:**
77
+
78
+ - `truncateBefore: false` (safe default to preserve existing data)
79
+
80
+ ### Timestamp Column Detection
81
+
82
+ **Trigger:** Columns matching `*_TS`, `*_DATE`, `*_TIMESTAMP` patterns
83
+
84
+ **Action:**
85
+
86
+ - Documents candidates in `detectedPatterns.candidateColumns`
87
+ - Does NOT auto-enable `lastModifiedComparison` (user choice)
88
+ - If a table has NO timestamp/date columns, a reasoning note suggests adding audit columns
89
+
90
+ ### Type 2 SCD Detection
91
+
92
+ **Trigger:** Columns include START_DATE/EFFECTIVE_DATE, END_DATE/EXPIRY_DATE, and IS_CURRENT/CURRENT_FLAG
93
+
94
+ **Action:**
95
+
96
+ - Documents detection in reasoning
97
+ - Does NOT auto-enable SCD config (requires verification)
98
+
99
+ ## Schema Resolution
100
+
101
+ Priority order:
102
+ 1. **Repo-backed** (if `repoPath` provided) - most accurate
103
+ 2. **Corpus** (fallback) - standard Coalesce node types
104
+ 3. **Error** (if neither available)
105
+
106
+ ## Preservation Rules
107
+
108
+ **Never overwrite:**
109
+ - Existing non-null config values (except required fields)
110
+ - User-set fields take precedence
111
+
112
+ **Always set:**
113
+ - Required fields (even if overwriting)
114
+ - Aggregation-specific fields from transformation
115
+
116
+ **Smart merge:**
117
+ - If field is empty/null, set it
118
+ - If field is default and context suggests better value, update it
119
+
120
+ ## Usage Examples
121
+
122
+ ### Complete Existing Node
123
+
124
+ ```typescript
125
+ await completeNodeConfiguration(client, {
126
+ workspaceID: "ws-123",
127
+ nodeID: "dim-customers",
128
+ repoPath: "/path/to/repo"
129
+ });
130
+ ```
131
+
132
+ ### Transform with Auto-Config
133
+
134
+ ```typescript
135
+ await convertJoinToAggregation(client, {
136
+ workspaceID: "ws-123",
137
+ nodeID: "fact-orders",
138
+ groupByColumns: ['"ORDERS"."CUSTOMER_ID"'],
139
+ aggregates: [
140
+ { name: "TOTAL", function: "COUNT", expression: "*" }
141
+ ],
142
+ maintainJoins: true,
143
+ repoPath: "/path/to/repo"
144
+ });
145
+
146
+ // Returns fully transformed AND configured node
147
+ ```
148
+
149
+ ## Troubleshooting
150
+
151
+ **"Cannot resolve node type schema":**
152
+ - Ensure package is committed to repo or available in corpus
153
+ - Check nodeType format (may need package prefix like "PackageName:::ID")
154
+
155
+ **"Required field could not be auto-determined":**
156
+ - Some required fields need manual input
157
+ - Check `configReview.missingRequired` for details
158
+
159
+ **Config not as expected:**
160
+ - Check `configChanges.preserved` — may have existing values that weren't overwritten
161
+ - Check `configReview.warnings` and `configReview.suggestions` for actionable guidance
162
+ - Verify node context (sources, columns, materialization type)
@@ -0,0 +1,156 @@
1
+ # Node Creation Decision Tree
2
+
3
+ Use this resource before creating or heavily editing workspace nodes.
4
+
5
+ ## Before You Start
6
+
7
+ **Always check observed node types first:**
8
+
9
+ 1. Call `list-workspace-node-types` to discover which node types are already observed in current workspace nodes
10
+ 2. Do not treat that scan as a true installed-type registry; if the desired node type is unobserved, confirm installation in the Coalesce UI before proceeding
11
+ 3. If a local committed repo is available, install the package in Coalesce, commit the workspace branch, update the local clone, then use repo-aware tools with explicit `repoPath` or the `COALESCE_REPO_PATH` fallback
12
+ 4. If the repo does not contain the committed definition, consult `coalesce://context/node-type-corpus` for fallback patterns and metadata structure
13
+
14
+ ---
15
+
16
+ ## Choose the Right Tool
17
+
18
+ ### Step 1: Always plan first to discover the correct node type
19
+
20
+ **Before creating any node**, call `plan-pipeline` to discover and rank available node types from the repo:
21
+
22
+ ```javascript
23
+ plan-pipeline({
24
+ workspaceID,
25
+ goal: "stage truck data",
26
+ sourceNodeIDs: ["source-id-1"],
27
+ repoPath: "/path/to/repo" // or rely on COALESCE_REPO_PATH
28
+ })
29
+ ```
30
+
31
+ The planner scans the repo for all committed node type definitions, scores them against your use case, and returns `nodeTypeSelection.consideredNodeTypes` with the best match. **Use the `nodeType` from the plan** when calling `create-workspace-node-from-predecessor` — do not guess node types like "Stage" or "View".
32
+
33
+ If the user provides SQL, pass it directly:
34
+
35
+ ```javascript
36
+ plan-pipeline({ workspaceID, sql: "<USER-PROVIDED SQL HERE>" })
37
+ ```
38
+
39
+ Do NOT author SQL yourself to pass to these tools.
40
+
41
+ ### Step 2: Create the node with the planned node type
42
+
43
+ Use `create-workspace-node-from-predecessor` with the node type from the plan:
44
+
45
+ ```javascript
46
+ create-workspace-node-from-predecessor({
47
+ workspaceID,
48
+ nodeType: "base-nodes:::Stage", // from plan-pipeline result
49
+ predecessorNodeIDs: ["source-id-1"],
50
+ changes: { name: "STG_CUSTOMER" }
51
+ })
52
+ ```
53
+
54
+ Good fit:
55
+ - stage from a source
56
+ - join from multiple upstream nodes
57
+ - transform node that should inherit upstream columns
58
+
59
+ After the call:
60
+ - inspect `validation`
61
+ - inspect `joinSuggestions`
62
+ - inspect `nodeTypeValidation.warning` — if present, the node type may be wrong for this use case
63
+ - stop if `warning` is present
64
+ - for validation field details, see `coalesce://context/node-payloads`
65
+
66
+ For joins and aggregations, follow up with `convert-join-to-aggregation`.
67
+
68
+ **IMPORTANT — Column handling in `changes`:**
69
+
70
+ When creating from predecessor, columns are **auto-populated** with proper source linkage (`sources`, `columnReference`). Only include columns in `changes.metadata.columns` that have **actual transforms** (UPPER, CAST, CASE, aggregation, derived expressions). Do NOT include passthrough columns — they already exist with correct source references.
71
+
72
+ If the user's SQL has 10 columns but only 3 have transforms, create the node first (columns auto-populate), then use `replace-workspace-node-columns` to add the 3 transformed columns plus any new derived columns. This preserves source linkage for all passthrough columns.
73
+
74
+ ### If the user provides SQL to convert
75
+
76
+ - prefer `plan-pipeline` with the user's SQL to preview the plan first
77
+ - prefer `create-pipeline-from-sql` with the user's SQL for one-step conversion
78
+ - prefer `create-pipeline-from-plan` when a plan has already been reviewed and approved
79
+
80
+ Do NOT author SQL yourself to pass to these tools.
81
+
82
+ ### If the node already exists and only part of it should change
83
+
84
+ Use `update-workspace-node`.
85
+
86
+ Good fit:
87
+ - descriptions
88
+ - config updates
89
+ - top-level location fields
90
+ - full replacement of `metadata.columns`
91
+
92
+ ### If you intentionally want to replace the full node body
93
+
94
+ Use `set-workspace-node`.
95
+
96
+ Only do this when you already have the exact full node body to persist.
97
+
98
+ ## Multi-Predecessor and Join Requests
99
+
100
+ When the user wants a join:
101
+
102
+ 1. Read the predecessor nodes if you need more context.
103
+ 2. Use `create-workspace-node-from-predecessor`.
104
+ 3. Inspect `joinSuggestions` for common column names.
105
+ 4. Confirm `validation.allPredecessorsRepresented` and inspect `validation.predecessorCoverage` before assuming the join node is ready.
106
+
107
+ **CRITICAL: The node is NOT complete after step 2.** Multi-predecessor nodes are created with columns but NO join condition. You MUST complete the join setup:
108
+
109
+ 5. **Review `nextSteps`** in the response — it contains context-aware guidance for your specific node.
110
+ 6. **Set up the join condition** by calling one of:
111
+ - `convert-join-to-aggregation` — for GROUP BY / fact table / aggregation use cases
112
+ - `apply-join-condition` — for row-level joins (auto-generates FROM/JOIN/ON with `{{ ref() }}` syntax)
113
+ - `update-workspace-node` — to set joinCondition manually when you need full control
114
+ 7. **Verify the join** — call `get-workspace-node` to confirm the joinCondition is set and columns are correct.
115
+
116
+ **Join type selection:**
117
+
118
+ | Scenario | Join Type | When to Use |
119
+ |----------|-----------|-------------|
120
+ | Every record must exist in both tables | `INNER JOIN` | Matching orders to known customers |
121
+ | Keep all from primary, allow nulls from secondary | `LEFT JOIN` | All customers, even those with no orders |
122
+ | Keep all from both, allow nulls on either side | `FULL OUTER JOIN` | Reconciliation between two systems |
123
+
124
+ **Join key verification:**
125
+ - Use **business keys** (e.g., CUSTOMER_ID, ORDER_NUMBER), not surrogate keys
126
+ - Confirm at least one side of the join is unique on the join key to avoid fan-out (row multiplication)
127
+ - If join keys have different names across predecessors (e.g., `CUST_ID` vs `CUSTOMER_ID`), the agent must map them explicitly
128
+
129
+ ## Node Configuration Is Automatic
130
+
131
+ When you use `create-workspace-node-from-predecessor` or `create-workspace-node-from-scratch` with `repoPath`, node configuration is completed automatically:
132
+
133
+ - Node-level config defaults are applied from the node type definition
134
+ - Column-level attributes (`isBusinessKey`, `isChangeTracking`, etc.) are inferred and set
135
+ - The `configCompletion` field in the response shows exactly what was applied
136
+
137
+ If `repoPath` is not provided or config completion fails, the response includes `configCompletionSkipped` — call `complete-node-configuration` with `repoPath` to retry.
138
+
139
+ **Always use `create-workspace-node-from-predecessor` or `create-workspace-node-from-scratch`** — they handle validation and config completion automatically.
140
+
141
+ ## Storage and SQL Follow-Up
142
+
143
+ After creation:
144
+
145
+ 1. Determine the SQL platform with `coalesce://context/sql-platform-selection`.
146
+ 2. Verify storage and `{{ ref(...) }}` assumptions with `coalesce://context/storage-mappings`.
147
+ 3. For payload-heavy edits, use:
148
+ - `coalesce://context/node-payloads`
149
+ - `coalesce://context/hydrated-metadata`
150
+
151
+ ## Related Resources
152
+
153
+ - `coalesce://context/pipeline-workflows`
154
+ - `coalesce://context/node-operations`
155
+ - `coalesce://context/data-engineering-principles`
156
+ - `coalesce://context/node-payloads`
@@ -0,0 +1,316 @@
1
+ # Node Operations
2
+
3
+ How to edit existing workspace nodes: join conditions, columns, config, renames, and SQL conversion.
4
+
5
+ ## Applying Join Conditions
6
+
7
+ When `create-workspace-node-from-predecessor` returns `joinSuggestions` for a multi-predecessor node, you MUST set the join condition — without it the node fails at compile time.
8
+
9
+ > **Platform note**: Examples use Snowflake double-quote syntax (`"TABLE"."COLUMN"`). For Databricks use backticks, for BigQuery use backticks with project.dataset prefix. Determine platform first via `coalesce://context/sql-platform-selection`.
10
+
11
+ **Option A — Automatic (recommended for aggregations):**
12
+
13
+ Call `convert-join-to-aggregation` with `maintainJoins: true`. It reads predecessors, finds common columns, and generates the full JOIN ON clause automatically.
14
+
15
+ **Option B — Automatic (recommended for row-level joins):**
16
+
17
+ Call `apply-join-condition` — it reads predecessors, finds common columns, generates FROM/JOIN/ON with `{{ ref() }}` syntax, and writes the joinCondition to the node automatically:
18
+
19
+ ```javascript
20
+ apply-join-condition({
21
+ workspaceID, nodeID: "join-node-id",
22
+ joinType: "LEFT JOIN" // defaults to INNER JOIN
23
+ })
24
+ ```
25
+
26
+ For mismatched column names across predecessors:
27
+
28
+ ```javascript
29
+ apply-join-condition({
30
+ workspaceID, nodeID: "join-node-id",
31
+ joinType: "LEFT JOIN",
32
+ joinColumnOverrides: [{
33
+ leftPredecessor: "STG_CUSTOMER",
34
+ rightPredecessor: "STG_ORDERS",
35
+ leftColumn: "CUST_ID",
36
+ rightColumn: "CUSTOMER_ID"
37
+ }]
38
+ })
39
+ ```
40
+
41
+ **Option C — Manual (when you need full control):**
42
+
43
+ Read each predecessor to get its `locationName`, then build the join condition manually:
44
+
45
+ ```javascript
46
+ update-workspace-node({
47
+ workspaceID, nodeID: "join-node-id",
48
+ changes: {
49
+ metadata: {
50
+ sourceMapping: [{
51
+ name: "JOIN_NODE_NAME",
52
+ dependencies: [
53
+ { locationName: "STAGING", nodeName: "STG_CUSTOMER" },
54
+ { locationName: "STAGING", nodeName: "STG_ORDERS" }
55
+ ],
56
+ join: {
57
+ joinCondition: 'FROM {{ ref(\'STAGING\', \'STG_CUSTOMER\') }} "STG_CUSTOMER"\nJOIN {{ ref(\'STAGING\', \'STG_ORDERS\') }} "STG_ORDERS"\n ON "STG_CUSTOMER"."CUSTOMER_ID" = "STG_ORDERS"."CUSTOMER_ID"'
58
+ },
59
+ customSQL: { customSQL: "" }, aliases: {}, noLinkRefs: []
60
+ }]
61
+ }
62
+ }
63
+ })
64
+ ```
65
+
66
+ Always read `locationName` from `get-workspace-node` — never hardcode it.
67
+
68
+ **3+ table joins**: Chain JOIN clauses in the same joinCondition string. All dependencies go in one sourceMapping entry. Use `joinSuggestions` which returns common columns for each predecessor PAIR.
69
+
70
+ **Changing join type**: Read the existing sourceMapping, modify the JOIN keyword (e.g., `JOIN` -> `LEFT JOIN`), write back using read-modify-write pattern.
71
+
72
+ ## Column Operations
73
+
74
+ ### Replacing All Columns
75
+
76
+ Use `replace-workspace-node-columns`:
77
+
78
+ ```javascript
79
+ replace-workspace-node-columns({
80
+ workspaceID, nodeID: "node-id",
81
+ columns: [
82
+ { name: "CUSTOMER_ID" }, // passthrough — omit transform
83
+ { name: "TOTAL_ORDERS", transform: 'COUNT(DISTINCT "STG_ORDER"."ORDER_ID")' }
84
+ ]
85
+ })
86
+ ```
87
+
88
+ ### Adding a Column
89
+
90
+ `metadata.columns` is a full-replacement array. Read the current columns, append the new one, send the full array:
91
+
92
+ ```javascript
93
+ replace-workspace-node-columns({
94
+ workspaceID, nodeID: "node-id",
95
+ columns: [...existingColumns, { name: "DISCOUNT_AMOUNT", transform: '"STG_ORDERS"."ORDER_TOTAL" * 0.1' }]
96
+ })
97
+ ```
98
+
99
+ Only include `transform` on the new column if it has an actual transformation. Passthrough columns from `existingColumns` already have their transforms set by Coalesce.
100
+
101
+ ### Resetting Columns to Match a Predecessor
102
+
103
+ Build passthrough columns from the predecessor — omit `transform` since these are all passthroughs:
104
+
105
+ ```javascript
106
+ const resetColumns = predecessorColumns.map(col => ({
107
+ name: col.name, dataType: col.dataType
108
+ }));
109
+ replace-workspace-node-columns({ workspaceID, nodeID: "node-id", columns: resetColumns })
110
+ ```
111
+
112
+ Do NOT copy raw column objects — their `columnReference`, `sources`, and `columnID` belong to the predecessor.
113
+
114
+ ### Custom Column Transforms
115
+
116
+ The `transform` field is the SQL expression for the column's SELECT clause.
117
+
118
+ **Passthrough columns**: If a column has no transformation (just passes through from the predecessor), **omit the `transform` field entirely**. Coalesce auto-populates passthrough transforms. Only specify `transform` when you are applying an actual transformation (UPPER, CAST, CASE, arithmetic, aggregation, etc.).
119
+
120
+ **Use hardcoded aliases, not `{{ ref() }}`**: In the `transform` field, use the table alias directly (e.g., `"STG_ORDERS"."PRICE"`). Do NOT use `{{ ref() }}` syntax in transforms — that syntax is for `joinCondition` only. The alias comes from the node's `joinCondition` (e.g., `FROM {{ ref('STAGING', 'STG_ORDERS') }} "STG_ORDERS"` means the alias is `"STG_ORDERS"`).
121
+
122
+ **Finding the correct alias**: Read the node and inspect `metadata.sourceMapping[].join.joinCondition`. If it contains `FROM {{ ref('RAW', 'CUSTOMER') }} "CUSTOMER"`, the alias is `"CUSTOMER"` (the source name), NOT the current node's name.
123
+
124
+ Common patterns:
125
+
126
+ ```javascript
127
+ // Passthrough — omit transform entirely
128
+ { name: "CUSTOMER_ID" }
129
+ // Computed
130
+ { name: "LINE_TOTAL", transform: '"STG_ORDERS"."PRICE" * "STG_ORDERS"."QUANTITY"', dataType: "NUMBER(38,4)" }
131
+ // Type cast
132
+ { name: "ORDER_DATE", transform: 'CAST("STG_ORDERS"."ORDER_DATE_STR" AS DATE)', dataType: "DATE" }
133
+ // Conditional
134
+ { name: "ORDER_STATUS", transform: 'CASE WHEN "STG_ORDERS"."IS_CANCELLED" = TRUE THEN \'CANCELLED\' ELSE \'ACTIVE\' END', dataType: "VARCHAR" }
135
+ // String transform
136
+ { name: "CITY", transform: 'UPPER("STG_LOCATION"."CITY")' }
137
+ ```
138
+
139
+ **Aggregate vs scalar**: If a column uses an aggregate function, the joinCondition MUST include GROUP BY. Use `convert-join-to-aggregation` or add GROUP BY manually. Scalar transforms (CASE, CAST, arithmetic) work without GROUP BY.
140
+
141
+ ### Bulk Column Operations
142
+
143
+ When renaming columns across a pipeline:
144
+
145
+ 1. List all nodes and find affected ones
146
+ 2. Map the dependency chain (upstream rename breaks downstream transforms)
147
+ 3. Update bottom-up — upstream first, then fix downstream `transform` references and `joinCondition` ON clauses
148
+
149
+ IMPORTANT: Coalesce does NOT cascade column renames. You must update every downstream reference manually.
150
+
151
+ ## Adding WHERE, QUALIFY, or GROUP BY
152
+
153
+ These go in the joinCondition, not in column transforms. Always use read-modify-write:
154
+
155
+ ```javascript
156
+ // 1. Read current sourceMapping
157
+ get-workspace-node({ workspaceID, nodeID: "node-id" })
158
+
159
+ // 2. Append clause to existing joinCondition string
160
+ // e.g., add WHERE: existingJoinCondition + '\nWHERE "STG_ORDERS"."ORDER_DATE" >= \'2024-01-01\''
161
+ // e.g., add QUALIFY: existingJoinCondition + '\nQUALIFY ROW_NUMBER() OVER (PARTITION BY ... ORDER BY ...) = 1'
162
+
163
+ // 3. Write back complete sourceMapping
164
+ update-workspace-node({
165
+ workspaceID, nodeID: "node-id",
166
+ changes: { metadata: { sourceMapping: [modifiedSourceMapping] } }
167
+ })
168
+ ```
169
+
170
+ Preserve all existing fields (name, dependencies, aliases, customSQL, noLinkRefs) — only modify `join.joinCondition`.
171
+
172
+ ## Column-Level Attributes (columnSelector)
173
+
174
+ Some node type config items are column-level, not node-level. They have `"type": "columnSelector"` in the node type definition.
175
+
176
+ **How to discover:** Look up the definition with `get-repo-node-type-definition`. Find items with `"type": "columnSelector"` and note the `attributeName`.
177
+
178
+ **How to set:**
179
+
180
+ ```javascript
181
+ update-workspace-node({
182
+ workspaceID, nodeID: "node-id",
183
+ changes: {
184
+ metadata: {
185
+ columns: [
186
+ { name: "CUSTOMER_ID", isBusinessKey: true, ... },
187
+ { name: "ORDER_TOTAL", isChangeTracking: true, ... }
188
+ ]
189
+ }
190
+ }
191
+ })
192
+ ```
193
+
194
+ Common attributes:
195
+
196
+ | Node Type | attributeName | Purpose |
197
+ |-----------|--------------|---------|
198
+ | Dimension | `isBusinessKey` | Natural key column(s) |
199
+ | Dimension | `isChangeTracking` | Columns monitored for SCD Type 2 |
200
+ | Persistent Stage | `isBusinessKey` | Record uniqueness key |
201
+ | Persistent Stage | `isChangeTracking` | Columns to detect changes |
202
+ | Fact | `isBusinessKey` | Degenerate dimension / merge key |
203
+
204
+ Always look up actual attribute names from the node type definition — they vary by package.
205
+
206
+ ## Common Config Fields
207
+
208
+ Set via `update-workspace-node({ changes: { config: { ... } } })`:
209
+
210
+ | Field | Type | Default | Description |
211
+ |-------|------|---------|-------------|
212
+ | `truncateBefore` | boolean | true (Stage) | Truncate before insert (full reload) |
213
+ | `testsEnabled` | boolean | true (Stage) | Enable data quality tests |
214
+ | `insertStrategy` | string | "INSERT" | Multi-source: "INSERT", "UNION", "UNION ALL" |
215
+ | `selectDistinct` | boolean | false | Apply DISTINCT to SELECT |
216
+ | `groupByAll` | boolean | false | GROUP BY ALL (mutually exclusive with selectDistinct) |
217
+ | `preSQL` | string | "" | SQL before main insert |
218
+ | `postSQL` | string | "" | SQL after main insert |
219
+
220
+ For node-type-specific fields, use `get-repo-node-type-definition` or inspect the definition in the repo under `nodeTypes/`.
221
+
222
+ ## Adding a Predecessor to an Existing Node
223
+
224
+ 1. Read the existing node to get current sourceMapping and columns
225
+ 2. Read the new predecessor to get its columns and `locationName`
226
+ 3. Update sourceMapping with the new dependency and extend joinCondition:
227
+
228
+ ```javascript
229
+ update-workspace-node({
230
+ workspaceID, nodeID: "existing-join-id",
231
+ changes: {
232
+ metadata: {
233
+ sourceMapping: [{
234
+ ...existingSourceMapping,
235
+ dependencies: [...existingDependencies, { locationName: "STAGING", nodeName: "STG_RETURNS" }],
236
+ join: {
237
+ joinCondition: existingJoinCondition + '\nJOIN {{ ref(\'STAGING\', \'STG_RETURNS\') }} "STG_RETURNS"\n ON "STG_ORDERS"."ORDER_ID" = "STG_RETURNS"."ORDER_ID"'
238
+ }
239
+ }]
240
+ }
241
+ }
242
+ })
243
+ ```
244
+
245
+ 4. Add columns from the new predecessor using `replace-workspace-node-columns`
246
+
247
+ Note: This updates metadata but does NOT create the DAG edge. The DAG link is established at creation time. The node may need to be recreated with the full set of `predecessorNodeIDs` if a true DAG predecessor is needed.
248
+
249
+ ## Rename Safety
250
+
251
+ Renaming via `update-workspace-node({ changes: { name: "NEW_NAME" } })` updates the node itself (including its sourceMapping entry name), but does NOT update downstream nodes. After renaming:
252
+
253
+ 1. Find downstream nodes whose `metadata.sourceMapping[].dependencies[].nodeName` references the old name
254
+ 2. Update each downstream node's dependencies `nodeName` and `{{ ref() }}` calls in joinCondition
255
+ 3. Update column transforms referencing the old name as a table alias
256
+
257
+ The same cascade applies to location changes.
258
+
259
+ ## Duplicate Node Names
260
+
261
+ Nodes in the same storage location must have distinct names — duplicate names make `{{ ref() }}` ambiguous. Nodes in different locations CAN share a name.
262
+
263
+ ## SQL-to-Graph Conversion
264
+
265
+ When a user pastes SQL for conversion into Coalesce nodes:
266
+
267
+ ### SQL with CTEs
268
+
269
+ Create each CTE as a separate node bottom-up. Use `View` or `Work` for intermediate CTEs, the target type for the final query.
270
+
271
+ ### Raw SQL (no ref syntax)
272
+
273
+ 1. Identify table names in the SQL
274
+ 2. Match to workspace nodes via `list-workspace-nodes` (case-insensitive)
275
+ 3. Get each node's `locationName` via `get-workspace-node`
276
+ 4. Pass the user's exact SQL unchanged to `plan-pipeline` or `create-pipeline-from-sql`
277
+ 5. Do NOT rewrite table names into `{{ ref('LOCATION', 'NODE') }}` syntax for these pipeline tools — the planner resolves references automatically
278
+
279
+ ### Large Queries (many columns)
280
+
281
+ Create the node first with `create-workspace-node-from-predecessor`, then set columns with `replace-workspace-node-columns`. Always send the full column array — arrays are replaced, not merged.
282
+
283
+ ## Debugging Incorrect Data
284
+
285
+ When output data looks wrong after a successful run:
286
+
287
+ 1. Read the node — check column `transform` expressions, joinCondition (join type, ON conditions), and dependencies
288
+ 2. Common issues:
289
+ - **Values too high**: Missing/incorrect JOIN ON (cartesian product), or LEFT JOIN that should be INNER
290
+ - **Missing rows**: INNER JOIN filtering unmatched rows, or overly restrictive WHERE
291
+ - **Duplicates**: Missing DISTINCT, wrong GROUP BY, or fan-out from one-to-many join
292
+ - **NULLs**: LEFT JOIN on non-matching column, or missing COALESCE
293
+ 3. Compare with predecessors to verify column names and data types match
294
+
295
+ ## Exploring a Workspace
296
+
297
+ - **Summary**: `analyze-workspace-patterns` — package adoption, layers, methodology
298
+ - **All nodes**: `list-workspace-nodes` with `detail: true`
299
+ - **Specific node**: `get-workspace-node` — full body with columns, config, sourceMapping
300
+ - **Node types**: `list-workspace-node-types` — distinct types observed
301
+ - **Projects**: `list-projects` with `includeWorkspaces: true`
302
+ - **Environments**: `list-environments` — deployment targets (DEV, QA, PROD)
303
+
304
+ **Large workspaces (100+ nodes)**: Use `list-workspace-nodes` WITHOUT `detail` first, find target by name, then `get-workspace-node` on its ID. Use `cache-workspace-nodes` for repeated searches.
305
+
306
+ **Tracing lineage**: Read the node, find predecessors in `metadata.sourceMapping[].dependencies`, recurse until you reach nodes with no dependencies. For column-level lineage, inspect `metadata.columns[].sources[].columnReferences`.
307
+
308
+ **Downstream impact**: List workspace nodes and check which reference the target node in their dependencies. Recurse for the full impact chain.
309
+
310
+ ## Related Resources
311
+
312
+ - `coalesce://context/pipeline-workflows` — building pipelines end-to-end
313
+ - `coalesce://context/node-creation-decision-tree` — which tool to use
314
+ - `coalesce://context/aggregation-patterns` — GROUP BY, datatype inference
315
+ - `coalesce://context/node-payloads` — full node body editing
316
+ - `coalesce://context/hydrated-metadata` — advanced metadata structures