coalesce-transform-mcp 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +304 -0
- package/dist/cache-dir.d.ts +26 -0
- package/dist/cache-dir.js +106 -0
- package/dist/client.d.ts +25 -0
- package/dist/client.js +212 -0
- package/dist/coalesce/api/environments.d.ts +20 -0
- package/dist/coalesce/api/environments.js +15 -0
- package/dist/coalesce/api/git-accounts.d.ts +21 -0
- package/dist/coalesce/api/git-accounts.js +21 -0
- package/dist/coalesce/api/jobs.d.ts +25 -0
- package/dist/coalesce/api/jobs.js +21 -0
- package/dist/coalesce/api/nodes.d.ts +29 -0
- package/dist/coalesce/api/nodes.js +33 -0
- package/dist/coalesce/api/projects.d.ts +22 -0
- package/dist/coalesce/api/projects.js +25 -0
- package/dist/coalesce/api/runs.d.ts +19 -0
- package/dist/coalesce/api/runs.js +34 -0
- package/dist/coalesce/api/subgraphs.d.ts +20 -0
- package/dist/coalesce/api/subgraphs.js +17 -0
- package/dist/coalesce/api/users.d.ts +30 -0
- package/dist/coalesce/api/users.js +31 -0
- package/dist/coalesce/types.d.ts +298 -0
- package/dist/coalesce/types.js +746 -0
- package/dist/generated/.gitkeep +0 -0
- package/dist/generated/node-type-corpus.json +42656 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +10 -0
- package/dist/mcp/cache.d.ts +3 -0
- package/dist/mcp/cache.js +137 -0
- package/dist/mcp/environments.d.ts +3 -0
- package/dist/mcp/environments.js +61 -0
- package/dist/mcp/git-accounts.d.ts +3 -0
- package/dist/mcp/git-accounts.js +70 -0
- package/dist/mcp/jobs.d.ts +3 -0
- package/dist/mcp/jobs.js +77 -0
- package/dist/mcp/node-type-corpus.d.ts +3 -0
- package/dist/mcp/node-type-corpus.js +173 -0
- package/dist/mcp/nodes.d.ts +3 -0
- package/dist/mcp/nodes.js +341 -0
- package/dist/mcp/pipelines.d.ts +3 -0
- package/dist/mcp/pipelines.js +342 -0
- package/dist/mcp/projects.d.ts +3 -0
- package/dist/mcp/projects.js +70 -0
- package/dist/mcp/repo-node-types.d.ts +135 -0
- package/dist/mcp/repo-node-types.js +387 -0
- package/dist/mcp/runs.d.ts +3 -0
- package/dist/mcp/runs.js +92 -0
- package/dist/mcp/subgraphs.d.ts +3 -0
- package/dist/mcp/subgraphs.js +60 -0
- package/dist/mcp/users.d.ts +3 -0
- package/dist/mcp/users.js +107 -0
- package/dist/prompts/index.d.ts +2 -0
- package/dist/prompts/index.js +58 -0
- package/dist/resources/context/aggregation-patterns.md +145 -0
- package/dist/resources/context/data-engineering-principles.md +183 -0
- package/dist/resources/context/hydrated-metadata.md +92 -0
- package/dist/resources/context/id-discovery.md +64 -0
- package/dist/resources/context/intelligent-node-configuration.md +162 -0
- package/dist/resources/context/node-creation-decision-tree.md +156 -0
- package/dist/resources/context/node-operations.md +316 -0
- package/dist/resources/context/node-payloads.md +114 -0
- package/dist/resources/context/node-type-corpus.md +166 -0
- package/dist/resources/context/node-type-selection-guide.md +96 -0
- package/dist/resources/context/overview.md +135 -0
- package/dist/resources/context/pipeline-workflows.md +355 -0
- package/dist/resources/context/run-operations.md +55 -0
- package/dist/resources/context/sql-bigquery.md +41 -0
- package/dist/resources/context/sql-databricks.md +40 -0
- package/dist/resources/context/sql-platform-selection.md +70 -0
- package/dist/resources/context/sql-snowflake.md +43 -0
- package/dist/resources/context/storage-mappings.md +49 -0
- package/dist/resources/context/tool-usage.md +98 -0
- package/dist/resources/index.d.ts +5 -0
- package/dist/resources/index.js +254 -0
- package/dist/schemas/node-payloads.d.ts +5019 -0
- package/dist/schemas/node-payloads.js +147 -0
- package/dist/server.d.ts +7 -0
- package/dist/server.js +63 -0
- package/dist/services/cache/snapshots.d.ts +108 -0
- package/dist/services/cache/snapshots.js +275 -0
- package/dist/services/config/context-analyzer.d.ts +14 -0
- package/dist/services/config/context-analyzer.js +76 -0
- package/dist/services/config/field-classifier.d.ts +23 -0
- package/dist/services/config/field-classifier.js +47 -0
- package/dist/services/config/intelligent.d.ts +55 -0
- package/dist/services/config/intelligent.js +306 -0
- package/dist/services/config/rules.d.ts +6 -0
- package/dist/services/config/rules.js +44 -0
- package/dist/services/config/schema-resolver.d.ts +18 -0
- package/dist/services/config/schema-resolver.js +80 -0
- package/dist/services/corpus/loader.d.ts +56 -0
- package/dist/services/corpus/loader.js +25 -0
- package/dist/services/corpus/search.d.ts +49 -0
- package/dist/services/corpus/search.js +69 -0
- package/dist/services/corpus/templates.d.ts +4 -0
- package/dist/services/corpus/templates.js +11 -0
- package/dist/services/pipelines/execution.d.ts +20 -0
- package/dist/services/pipelines/execution.js +290 -0
- package/dist/services/pipelines/node-type-intent.d.ts +96 -0
- package/dist/services/pipelines/node-type-intent.js +356 -0
- package/dist/services/pipelines/node-type-selection.d.ts +66 -0
- package/dist/services/pipelines/node-type-selection.js +758 -0
- package/dist/services/pipelines/planning.d.ts +543 -0
- package/dist/services/pipelines/planning.js +1839 -0
- package/dist/services/policies/sql-override.d.ts +7 -0
- package/dist/services/policies/sql-override.js +109 -0
- package/dist/services/repo/operations.d.ts +6 -0
- package/dist/services/repo/operations.js +10 -0
- package/dist/services/repo/parser.d.ts +70 -0
- package/dist/services/repo/parser.js +365 -0
- package/dist/services/repo/path.d.ts +2 -0
- package/dist/services/repo/path.js +58 -0
- package/dist/services/templates/nodes.d.ts +50 -0
- package/dist/services/templates/nodes.js +336 -0
- package/dist/services/workspace/analysis.d.ts +56 -0
- package/dist/services/workspace/analysis.js +151 -0
- package/dist/services/workspace/mutations.d.ts +150 -0
- package/dist/services/workspace/mutations.js +1718 -0
- package/dist/utils.d.ts +5 -0
- package/dist/utils.js +7 -0
- package/dist/workflows/get-environment-overview.d.ts +9 -0
- package/dist/workflows/get-environment-overview.js +23 -0
- package/dist/workflows/get-run-details.d.ts +10 -0
- package/dist/workflows/get-run-details.js +28 -0
- package/dist/workflows/progress.d.ts +20 -0
- package/dist/workflows/progress.js +54 -0
- package/dist/workflows/retry-and-wait.d.ts +13 -0
- package/dist/workflows/retry-and-wait.js +139 -0
- package/dist/workflows/run-and-wait.d.ts +13 -0
- package/dist/workflows/run-and-wait.js +141 -0
- package/dist/workflows/run-status.d.ts +10 -0
- package/dist/workflows/run-status.js +27 -0
- package/package.json +34 -0
|
@@ -0,0 +1,758 @@
|
|
|
1
|
+
import { parseRepo, resolveRepoNodeType, } from "../repo/parser.js";
|
|
2
|
+
import { resolveOptionalRepoPathInput } from "../repo/path.js";
|
|
3
|
+
import { buildSetWorkspaceNodeTemplateFromDefinition } from "../templates/nodes.js";
|
|
4
|
+
import { isPlainObject } from "../../utils.js";
|
|
5
|
+
import { NODE_TYPE_INTENT, hasAntiSignal, detectSpecializedPatternPenalty, detectSpecializedPatternMatch } from "./node-type-intent.js";
|
|
6
|
+
function getString(value) {
|
|
7
|
+
return typeof value === "string" ? value : null;
|
|
8
|
+
}
|
|
9
|
+
function compareStrings(left, right) {
|
|
10
|
+
return left.localeCompare(right, undefined, {
|
|
11
|
+
numeric: true,
|
|
12
|
+
sensitivity: "case",
|
|
13
|
+
});
|
|
14
|
+
}
|
|
15
|
+
function nodeTypeID(nodeType) {
|
|
16
|
+
const delimiterIndex = nodeType.indexOf(":::");
|
|
17
|
+
return delimiterIndex === -1 ? nodeType : nodeType.slice(delimiterIndex + 3);
|
|
18
|
+
}
|
|
19
|
+
/** Node type IDs excluded from selection — these are not valid pipeline transform types. */
|
|
20
|
+
const EXCLUDED_NODE_TYPE_IDS = new Set(["SQL", "Source"]);
|
|
21
|
+
function isExcludedNodeTypeID(nodeType) {
|
|
22
|
+
return EXCLUDED_NODE_TYPE_IDS.has(nodeTypeID(nodeType));
|
|
23
|
+
}
|
|
24
|
+
function isExcludedByInputMode(resolution) {
|
|
25
|
+
// inputMode can be in outerDefinition (top-level) or nodeDefinition (nodeMetadataSpec)
|
|
26
|
+
if (resolution.nodeTypeRecord.outerDefinition.inputMode === "sql") {
|
|
27
|
+
return true;
|
|
28
|
+
}
|
|
29
|
+
const nodeDefinition = resolution.nodeTypeRecord.nodeDefinition;
|
|
30
|
+
if (nodeDefinition && typeof nodeDefinition.inputMode === "string") {
|
|
31
|
+
return nodeDefinition.inputMode === "sql";
|
|
32
|
+
}
|
|
33
|
+
return false;
|
|
34
|
+
}
|
|
35
|
+
function isDisabledNodeType(resolution) {
|
|
36
|
+
return resolution.nodeTypeRecord.outerDefinition.isDisabled === true;
|
|
37
|
+
}
|
|
38
|
+
function matchesNodeTypeIdentity(left, right) {
|
|
39
|
+
return left === right || nodeTypeID(left) === nodeTypeID(right);
|
|
40
|
+
}
|
|
41
|
+
function collectRepoResolutions(parsedRepo) {
|
|
42
|
+
const resolutions = [];
|
|
43
|
+
for (const [id, matches] of parsedRepo.nodeTypesByID.entries()) {
|
|
44
|
+
if (matches.length !== 1) {
|
|
45
|
+
continue;
|
|
46
|
+
}
|
|
47
|
+
resolutions.push(resolveRepoNodeType(parsedRepo, id));
|
|
48
|
+
}
|
|
49
|
+
for (const packageRecord of parsedRepo.packages) {
|
|
50
|
+
const packageMatches = parsedRepo.packagesByAlias.get(packageRecord.alias) ?? [];
|
|
51
|
+
if (packageMatches.length !== 1) {
|
|
52
|
+
continue;
|
|
53
|
+
}
|
|
54
|
+
for (const definitionID of packageRecord.resolvedDefinitionIDs) {
|
|
55
|
+
resolutions.push(resolveRepoNodeType(parsedRepo, `${packageRecord.alias}:::${definitionID}`));
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
return resolutions.sort((left, right) => compareStrings(left.resolvedNodeType, right.resolvedNodeType));
|
|
59
|
+
}
|
|
60
|
+
export function inferFamily(signals) {
|
|
61
|
+
const combined = signals
|
|
62
|
+
.filter((value) => value.trim().length > 0)
|
|
63
|
+
.join(" ")
|
|
64
|
+
.toLowerCase();
|
|
65
|
+
if (/(^|[\s_-])persistent\s*stage([\s_-]|$)|persistentstage/u.test(combined)) {
|
|
66
|
+
return "persistent-stage";
|
|
67
|
+
}
|
|
68
|
+
if (/(^|[\s_-])stage([\s_-]|$)|\bstg\b/u.test(combined)) {
|
|
69
|
+
return "stage";
|
|
70
|
+
}
|
|
71
|
+
if (/(^|[\s_-])view([\s_-]|$)|\bvw\b/u.test(combined)) {
|
|
72
|
+
return "view";
|
|
73
|
+
}
|
|
74
|
+
if (/(^|[\s_-])work([\s_-]|$)|\bwrk\b|\bcwrk\b/u.test(combined)) {
|
|
75
|
+
return "work";
|
|
76
|
+
}
|
|
77
|
+
if (/(^|[\s_-])dimension([\s_-]|$)|\bdim\b/u.test(combined)) {
|
|
78
|
+
return "dimension";
|
|
79
|
+
}
|
|
80
|
+
if (/(^|[\s_-])fact([\s_-]|$)|\bfct\b/u.test(combined)) {
|
|
81
|
+
return "fact";
|
|
82
|
+
}
|
|
83
|
+
if (/(^|[\s_-])hub([\s_-]|$)/u.test(combined)) {
|
|
84
|
+
return "hub";
|
|
85
|
+
}
|
|
86
|
+
if (/(^|[\s_-])satellite([\s_-]|$)|(^|[\s_-])sat([\s_-]|$)/u.test(combined)) {
|
|
87
|
+
return "satellite";
|
|
88
|
+
}
|
|
89
|
+
if (/(^|[\s_-])link([\s_-]|$)/u.test(combined)) {
|
|
90
|
+
return "link";
|
|
91
|
+
}
|
|
92
|
+
return "unknown";
|
|
93
|
+
}
|
|
94
|
+
function getDefinitionConfigItems(nodeDefinition) {
|
|
95
|
+
const groups = Array.isArray(nodeDefinition.config)
|
|
96
|
+
? nodeDefinition.config.filter(isPlainObject)
|
|
97
|
+
: [];
|
|
98
|
+
return groups.flatMap((group) => Array.isArray(group.items) ? group.items.filter(isPlainObject) : []);
|
|
99
|
+
}
|
|
100
|
+
function analyzeDefinition(nodeDefinition) {
|
|
101
|
+
if (!nodeDefinition) {
|
|
102
|
+
return {
|
|
103
|
+
semanticSignals: [],
|
|
104
|
+
missingDefaultFields: [],
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
const semanticSignals = new Set();
|
|
108
|
+
const missingDefaultFields = new Set();
|
|
109
|
+
for (const item of getDefinitionConfigItems(nodeDefinition)) {
|
|
110
|
+
const label = getString(item.attributeName) ??
|
|
111
|
+
getString(item.displayName) ??
|
|
112
|
+
getString(item.type) ??
|
|
113
|
+
"unknown";
|
|
114
|
+
const normalizedLabel = label.toLowerCase();
|
|
115
|
+
const itemType = getString(item.type) ?? "";
|
|
116
|
+
const hasDefault = Object.prototype.hasOwnProperty.call(item, "default");
|
|
117
|
+
if (/(business.?key|surrogate|scd|effective|current.?flag|grain|hash|hub|satellite|link|fact|dimension|merge.?key)/u.test(normalizedLabel)) {
|
|
118
|
+
semanticSignals.add(label);
|
|
119
|
+
}
|
|
120
|
+
if (!hasDefault &&
|
|
121
|
+
itemType !== "materializationSelector" &&
|
|
122
|
+
itemType !== "multisourceToggle" &&
|
|
123
|
+
itemType !== "overrideSQLToggle") {
|
|
124
|
+
missingDefaultFields.add(label);
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
return {
|
|
128
|
+
semanticSignals: Array.from(semanticSignals).sort(compareStrings),
|
|
129
|
+
missingDefaultFields: Array.from(missingDefaultFields).sort(compareStrings),
|
|
130
|
+
};
|
|
131
|
+
}
|
|
132
|
+
function isAutoExecutableFamily(family) {
|
|
133
|
+
return (family === "stage" ||
|
|
134
|
+
family === "persistent-stage" ||
|
|
135
|
+
family === "view" ||
|
|
136
|
+
family === "work");
|
|
137
|
+
}
|
|
138
|
+
const CATEGORY_FAMILIES = {
|
|
139
|
+
"general-purpose": ["stage", "work"],
|
|
140
|
+
"view": ["view"],
|
|
141
|
+
"persistent": ["persistent-stage"],
|
|
142
|
+
"dimensional": ["dimension", "fact"],
|
|
143
|
+
"data-vault": ["hub", "satellite", "link"],
|
|
144
|
+
};
|
|
145
|
+
/**
|
|
146
|
+
* Determine the desired category from context.
|
|
147
|
+
*
|
|
148
|
+
* Decision tree:
|
|
149
|
+
* 1. Explicit dimensional/data-vault/CDC/view intent → that category
|
|
150
|
+
* 2. Strong signal from name (dim_, fct_, hub_, etc.) → that category
|
|
151
|
+
* 3. Otherwise → general-purpose (stage/work, pick by workspace usage)
|
|
152
|
+
*
|
|
153
|
+
* Stage vs Work within general-purpose: prefer whichever the workspace
|
|
154
|
+
* already uses more, or default to stage.
|
|
155
|
+
*/
|
|
156
|
+
function buildUseCaseContext(context) {
|
|
157
|
+
const freeText = [context.goal, context.targetName].filter(Boolean).join(" ").toLowerCase();
|
|
158
|
+
const multiSource = context.sourceCount > 1;
|
|
159
|
+
// Dimensional modeling requires explicit intent — not just GROUP BY
|
|
160
|
+
const dimensionalModeling = /\bdimension(al)?\s+model/u.test(freeText) ||
|
|
161
|
+
/\bstar\s+schema\b/u.test(freeText) ||
|
|
162
|
+
/\bsnowflake\s+schema\b/u.test(freeText);
|
|
163
|
+
// Data Vault requires explicit intent
|
|
164
|
+
const dataVaultIntent = /\bdata\s*vault\b/u.test(freeText);
|
|
165
|
+
// CDC / persistent stage requires explicit intent
|
|
166
|
+
const persistentIntent = /\bpersistent\s*stage\b/u.test(freeText) ||
|
|
167
|
+
/\bcdc\b/u.test(freeText) ||
|
|
168
|
+
/\bchange\s*track/u.test(freeText);
|
|
169
|
+
// View requires explicit intent
|
|
170
|
+
const viewIntent = /\bview\b/u.test(freeText) ||
|
|
171
|
+
/\bno\s+materialization/u.test(freeText) ||
|
|
172
|
+
/\bvirtual\s+table/u.test(freeText);
|
|
173
|
+
// Check name-based strong signals ONLY for specialized categories
|
|
174
|
+
// (not for stage/work — those are general-purpose regardless of name)
|
|
175
|
+
const targetName = (context.targetName ?? "").toLowerCase();
|
|
176
|
+
const combinedText = `${targetName} ${freeText}`;
|
|
177
|
+
const specializedSignalChecks = [
|
|
178
|
+
{ category: "persistent", families: ["persistent-stage"] },
|
|
179
|
+
{ category: "dimensional", families: ["dimension", "fact"] },
|
|
180
|
+
{ category: "data-vault", families: ["hub", "satellite", "link"] },
|
|
181
|
+
{ category: "view", families: ["view"] },
|
|
182
|
+
];
|
|
183
|
+
// 1. Explicit intent from goal text
|
|
184
|
+
if (dataVaultIntent) {
|
|
185
|
+
return { desiredFamilies: ["hub", "satellite", "link"], category: "data-vault", dimensionalModeling, multiSource };
|
|
186
|
+
}
|
|
187
|
+
if (dimensionalModeling) {
|
|
188
|
+
return { desiredFamilies: ["dimension", "fact"], category: "dimensional", dimensionalModeling, multiSource };
|
|
189
|
+
}
|
|
190
|
+
if (persistentIntent) {
|
|
191
|
+
return { desiredFamilies: ["persistent-stage", "stage"], category: "persistent", dimensionalModeling, multiSource };
|
|
192
|
+
}
|
|
193
|
+
if (viewIntent && !context.hasJoin && !context.hasGroupBy) {
|
|
194
|
+
return { desiredFamilies: ["view", "stage"], category: "view", dimensionalModeling, multiSource };
|
|
195
|
+
}
|
|
196
|
+
// 2. Strong signal from name (only for specialized families)
|
|
197
|
+
for (const { category, families } of specializedSignalChecks) {
|
|
198
|
+
for (const family of families) {
|
|
199
|
+
const intent = NODE_TYPE_INTENT[family];
|
|
200
|
+
if (intent.strongSignals.test(combinedText)) {
|
|
201
|
+
return { desiredFamilies: families, category, dimensionalModeling, multiSource };
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
// 3. General-purpose — stage and work are interchangeable.
|
|
206
|
+
// Tiebreaker priority:
|
|
207
|
+
// a) Workspace pattern — which does the workspace already use more?
|
|
208
|
+
// b) Default to stage (base node type package always has Stage)
|
|
209
|
+
const counts = context.workspaceNodeTypeCounts ?? {};
|
|
210
|
+
const stageUsage = Object.entries(counts)
|
|
211
|
+
.filter(([nodeType]) => inferFamily([nodeType]) === "stage")
|
|
212
|
+
.reduce((sum, [, count]) => sum + count, 0);
|
|
213
|
+
const workUsage = Object.entries(counts)
|
|
214
|
+
.filter(([nodeType]) => inferFamily([nodeType]) === "work")
|
|
215
|
+
.reduce((sum, [, count]) => sum + count, 0);
|
|
216
|
+
const desiredFamilies = workUsage > stageUsage
|
|
217
|
+
? ["work", "stage", "view"]
|
|
218
|
+
: ["stage", "work", "view"];
|
|
219
|
+
return { desiredFamilies, category: "general-purpose", dimensionalModeling, multiSource };
|
|
220
|
+
}
|
|
221
|
+
function familyScore(candidate, useCase) {
|
|
222
|
+
const { desiredFamilies, category } = useCase;
|
|
223
|
+
if (desiredFamilies.length === 0) {
|
|
224
|
+
return { score: 0, reasons: [] };
|
|
225
|
+
}
|
|
226
|
+
// For general-purpose category, stage and work get the same top score.
|
|
227
|
+
// They're interchangeable — the tiebreaker is workspace usage.
|
|
228
|
+
// Candidates observed in the workspace get a bonus to prefer established patterns.
|
|
229
|
+
if (category === "general-purpose") {
|
|
230
|
+
if (candidate.family === "stage" || candidate.family === "work") {
|
|
231
|
+
const workspaceBonus = candidate.observedInWorkspace ? 20 : 0;
|
|
232
|
+
const reasons = [`general-purpose ${candidate.family} node — fits standard transforms`];
|
|
233
|
+
if (workspaceBonus > 0) {
|
|
234
|
+
reasons.push("preferred — already used in this workspace");
|
|
235
|
+
}
|
|
236
|
+
return {
|
|
237
|
+
score: 120 + workspaceBonus,
|
|
238
|
+
reasons,
|
|
239
|
+
};
|
|
240
|
+
}
|
|
241
|
+
if (candidate.family === "view") {
|
|
242
|
+
return {
|
|
243
|
+
score: 60,
|
|
244
|
+
reasons: ["view is acceptable for general-purpose transforms"],
|
|
245
|
+
};
|
|
246
|
+
}
|
|
247
|
+
return { score: 0, reasons: [] };
|
|
248
|
+
}
|
|
249
|
+
// For specialized categories, rank by position in desired families list
|
|
250
|
+
if (desiredFamilies.includes(candidate.family)) {
|
|
251
|
+
const position = desiredFamilies.indexOf(candidate.family);
|
|
252
|
+
const score = position === 0 ? 120 : 60;
|
|
253
|
+
return {
|
|
254
|
+
score,
|
|
255
|
+
reasons: [`matches the ${category} category (${candidate.family})`],
|
|
256
|
+
};
|
|
257
|
+
}
|
|
258
|
+
// General-purpose families are always a fallback for specialized categories
|
|
259
|
+
if (candidate.family === "stage" || candidate.family === "work") {
|
|
260
|
+
return {
|
|
261
|
+
score: 25,
|
|
262
|
+
reasons: [`general-purpose fallback for ${category} category`],
|
|
263
|
+
};
|
|
264
|
+
}
|
|
265
|
+
return { score: 0, reasons: [] };
|
|
266
|
+
}
|
|
267
|
+
function scoreCandidate(candidate, context) {
|
|
268
|
+
const reasons = [...candidate.reasons];
|
|
269
|
+
let score = candidate.score;
|
|
270
|
+
const useCase = buildUseCaseContext(context);
|
|
271
|
+
if (context.explicitNodeType) {
|
|
272
|
+
if (candidate.nodeType === context.explicitNodeType) {
|
|
273
|
+
score += 1000;
|
|
274
|
+
reasons.push("matches the explicit targetNodeType override");
|
|
275
|
+
}
|
|
276
|
+
else if (matchesNodeTypeIdentity(candidate.nodeType, context.explicitNodeType)) {
|
|
277
|
+
score += 900;
|
|
278
|
+
reasons.push("matches the explicit targetNodeType ID");
|
|
279
|
+
}
|
|
280
|
+
else {
|
|
281
|
+
score -= 200;
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
const familyMatch = familyScore(candidate, useCase);
|
|
285
|
+
score += familyMatch.score;
|
|
286
|
+
reasons.push(...familyMatch.reasons);
|
|
287
|
+
if (useCase.multiSource && isAutoExecutableFamily(candidate.family)) {
|
|
288
|
+
score += 15;
|
|
289
|
+
reasons.push("fits a multisource projection workflow");
|
|
290
|
+
}
|
|
291
|
+
if (useCase.dimensionalModeling && (candidate.family === "dimension" || candidate.family === "fact")) {
|
|
292
|
+
score += 40;
|
|
293
|
+
reasons.push(`${candidate.family} is designed for dimensional modeling with business keys`);
|
|
294
|
+
}
|
|
295
|
+
if (!useCase.multiSource && (candidate.family === "stage" || candidate.family === "work")) {
|
|
296
|
+
score += 10;
|
|
297
|
+
reasons.push("general-purpose node for single-source transforms");
|
|
298
|
+
}
|
|
299
|
+
// Anti-signal penalty: if this family's anti-signals match the context, penalize it.
|
|
300
|
+
// Prevents dimension/fact from being chosen for generic transforms.
|
|
301
|
+
const contextText = [context.goal, context.targetName].filter(Boolean).join(" ");
|
|
302
|
+
if (contextText.length > 0 && hasAntiSignal(candidate.family, contextText)) {
|
|
303
|
+
score -= 30;
|
|
304
|
+
reasons.push(`context suggests this is not a ${candidate.family} use case`);
|
|
305
|
+
}
|
|
306
|
+
// Semantic config penalty: types that require business keys, SCD, etc.
|
|
307
|
+
// get penalized when there's no dimensional modeling intent
|
|
308
|
+
const intent = NODE_TYPE_INTENT[candidate.family];
|
|
309
|
+
if (intent.requiresSemanticConfig && !useCase.dimensionalModeling) {
|
|
310
|
+
score -= 15;
|
|
311
|
+
reasons.push(`${candidate.family} requires semantic config (business keys, SCD) — no dimensional modeling intent detected`);
|
|
312
|
+
}
|
|
313
|
+
// Specialized materialization patterns: Dynamic Tables, Incremental Loads, etc.
|
|
314
|
+
// Decision is binary from node-type-intent.ts:
|
|
315
|
+
// - If context explicitly requests the pattern (contextRequired matches) → keep it, add bonus
|
|
316
|
+
// - If context doesn't request it → mark as not applicable (score = -Infinity)
|
|
317
|
+
// This is the same logic as validateNodeTypeChoice() at creation time.
|
|
318
|
+
const candidateSignals = [candidate.nodeType, candidate.displayName ?? "", candidate.shortName ?? ""].join(" ");
|
|
319
|
+
const specializedResult = detectSpecializedPatternPenalty(candidateSignals, contextText);
|
|
320
|
+
if (specializedResult) {
|
|
321
|
+
// Context doesn't match — this specialized type is not appropriate
|
|
322
|
+
score = -Infinity;
|
|
323
|
+
reasons.push(`not applicable: ${specializedResult.reason}`);
|
|
324
|
+
}
|
|
325
|
+
else {
|
|
326
|
+
// Check if context positively matches a specialized pattern (context requested it)
|
|
327
|
+
const positiveMatch = detectSpecializedPatternMatch(candidateSignals, contextText);
|
|
328
|
+
if (positiveMatch) {
|
|
329
|
+
score += 50;
|
|
330
|
+
reasons.push(`context explicitly requests ${positiveMatch} pattern`);
|
|
331
|
+
}
|
|
332
|
+
}
|
|
333
|
+
// Hard exclusion: data-vault package types are NEVER selected unless
|
|
334
|
+
// the context explicitly requests data vault. These types serve a fundamentally
|
|
335
|
+
// different modeling paradigm and should not appear in standard pipelines.
|
|
336
|
+
if (!useCase.dimensionalModeling &&
|
|
337
|
+
candidate.family !== "hub" && candidate.family !== "satellite" && candidate.family !== "link" &&
|
|
338
|
+
((candidate.packageAlias && /data.vault/iu.test(candidate.packageAlias)) ||
|
|
339
|
+
/data.vault/iu.test(candidate.nodeType))) {
|
|
340
|
+
const hasDataVaultIntent = /\bdata\s*vault\b/iu.test(contextText);
|
|
341
|
+
if (!hasDataVaultIntent) {
|
|
342
|
+
score = -Infinity;
|
|
343
|
+
reasons.push(`data vault package type excluded — no data vault intent in context`);
|
|
344
|
+
return { ...candidate, score, reasons: Array.from(new Set(reasons)) };
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
// Non-base type exclusion for general-purpose selections.
|
|
348
|
+
// Priority: workspace pattern > base node type package > 4 defaults.
|
|
349
|
+
// Types NOT from the base package are EXCLUDED unless observed in workspace.
|
|
350
|
+
if (useCase.category === "general-purpose" && candidate.source === "repo") {
|
|
351
|
+
const isBasePackage = candidate.packageAlias && /base.node.type/iu.test(candidate.packageAlias);
|
|
352
|
+
if (isBasePackage) {
|
|
353
|
+
score += 15;
|
|
354
|
+
reasons.push("from base node type package — preferred default");
|
|
355
|
+
}
|
|
356
|
+
else if (!candidate.observedInWorkspace) {
|
|
357
|
+
// Non-base, non-workspace types are excluded for general-purpose transforms.
|
|
358
|
+
// Only types from the base node type package or already in use in the workspace
|
|
359
|
+
// are eligible for standard staging/transform/join operations.
|
|
360
|
+
score = -Infinity;
|
|
361
|
+
const source = candidate.packageAlias
|
|
362
|
+
? `non-base package "${candidate.packageAlias}"`
|
|
363
|
+
: `custom repo type "${candidate.nodeType}"`;
|
|
364
|
+
reasons.push(`excluded: ${source} — not observed in workspace. Use base node types or workspace-established types.`);
|
|
365
|
+
return { ...candidate, score, reasons: Array.from(new Set(reasons)) };
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
// Penalize "Copy of" types — these are user-cloned definitions.
|
|
369
|
+
// The original base type should be preferred unless explicitly requested.
|
|
370
|
+
if (candidate.displayName &&
|
|
371
|
+
/\bcopy\s+of\b/iu.test(candidate.displayName) &&
|
|
372
|
+
!context.explicitNodeType) {
|
|
373
|
+
score -= 30;
|
|
374
|
+
reasons.push(`"${candidate.displayName}" is a cloned type — prefer the original`);
|
|
375
|
+
}
|
|
376
|
+
// Unknown family types should never beat known general-purpose types.
|
|
377
|
+
// If inferFamily couldn't classify it, it's likely a custom/specialized type
|
|
378
|
+
// that shouldn't be auto-selected for standard transforms.
|
|
379
|
+
if (candidate.family === "unknown" && !context.explicitNodeType) {
|
|
380
|
+
score -= 50;
|
|
381
|
+
reasons.push("unknown node type family — cannot verify suitability for this use case");
|
|
382
|
+
}
|
|
383
|
+
if (candidate.autoExecutable) {
|
|
384
|
+
score += 25;
|
|
385
|
+
reasons.push("supports template-based automatic creation");
|
|
386
|
+
}
|
|
387
|
+
else {
|
|
388
|
+
score -= 25;
|
|
389
|
+
reasons.push("likely needs extra semantic configuration before automatic creation");
|
|
390
|
+
}
|
|
391
|
+
if (candidate.missingDefaultFields.length > 0) {
|
|
392
|
+
score -= candidate.missingDefaultFields.length * 8;
|
|
393
|
+
reasons.push(`has config fields without defaults: ${candidate.missingDefaultFields.join(", ")}`);
|
|
394
|
+
}
|
|
395
|
+
if (candidate.semanticSignals.length > 0) {
|
|
396
|
+
score -= candidate.semanticSignals.length * 6;
|
|
397
|
+
reasons.push(`exposes semantic config signals: ${candidate.semanticSignals.join(", ")}`);
|
|
398
|
+
}
|
|
399
|
+
if (candidate.templateWarnings.length > 0) {
|
|
400
|
+
score -= Math.min(candidate.templateWarnings.length, 3) * 3;
|
|
401
|
+
}
|
|
402
|
+
return {
|
|
403
|
+
...candidate,
|
|
404
|
+
score,
|
|
405
|
+
reasons: Array.from(new Set(reasons)),
|
|
406
|
+
};
|
|
407
|
+
}
|
|
408
|
+
function buildRepoCandidate(resolution, workspaceNodeTypes, workspaceNodeTypeCounts) {
|
|
409
|
+
const generated = resolution.nodeTypeRecord.nodeDefinition
|
|
410
|
+
? buildSetWorkspaceNodeTemplateFromDefinition(resolution.nodeTypeRecord.nodeDefinition, { nodeType: resolution.resolvedNodeType })
|
|
411
|
+
: undefined;
|
|
412
|
+
const displayName = getString(resolution.nodeTypeRecord.outerDefinition.name) ??
|
|
413
|
+
generated?.definitionSummary.capitalized ??
|
|
414
|
+
null;
|
|
415
|
+
const shortName = generated?.definitionSummary.short ?? null;
|
|
416
|
+
const family = inferFamily([
|
|
417
|
+
resolution.resolvedNodeType,
|
|
418
|
+
resolution.nodeTypeRecord.dirName,
|
|
419
|
+
displayName ?? "",
|
|
420
|
+
shortName ?? "",
|
|
421
|
+
generated?.definitionSummary.capitalized ?? "",
|
|
422
|
+
].filter((value) => value.length > 0));
|
|
423
|
+
const insights = analyzeDefinition(resolution.nodeTypeRecord.nodeDefinition);
|
|
424
|
+
const observedInWorkspace = workspaceNodeTypes.some((nodeType) => matchesNodeTypeIdentity(nodeType, resolution.resolvedNodeType));
|
|
425
|
+
const workspaceUsageCount = workspaceNodeTypes.reduce((sum, nodeType) => {
|
|
426
|
+
if (!matchesNodeTypeIdentity(nodeType, resolution.resolvedNodeType)) {
|
|
427
|
+
return sum;
|
|
428
|
+
}
|
|
429
|
+
return sum + (workspaceNodeTypeCounts[nodeType] ?? 0);
|
|
430
|
+
}, 0);
|
|
431
|
+
const autoExecutable = (isAutoExecutableFamily(family) ||
|
|
432
|
+
(family === "unknown" &&
|
|
433
|
+
insights.semanticSignals.length === 0 &&
|
|
434
|
+
insights.missingDefaultFields.length === 0 &&
|
|
435
|
+
!!generated)) &&
|
|
436
|
+
!generated?.warnings.some((warning) => warning.includes("does not map cleanly"));
|
|
437
|
+
const reasons = [];
|
|
438
|
+
if (resolution.usageCount > 0) {
|
|
439
|
+
reasons.push(`used ${resolution.usageCount} time(s) in committed nodes/`);
|
|
440
|
+
}
|
|
441
|
+
if (observedInWorkspace) {
|
|
442
|
+
reasons.push("already observed in current workspace nodes");
|
|
443
|
+
}
|
|
444
|
+
if (displayName) {
|
|
445
|
+
reasons.push(`definition resolves to ${displayName}`);
|
|
446
|
+
}
|
|
447
|
+
return {
|
|
448
|
+
nodeType: resolution.resolvedNodeType,
|
|
449
|
+
displayName,
|
|
450
|
+
shortName,
|
|
451
|
+
family,
|
|
452
|
+
usageCount: resolution.usageCount,
|
|
453
|
+
workspaceUsageCount,
|
|
454
|
+
observedInWorkspace,
|
|
455
|
+
autoExecutable,
|
|
456
|
+
semanticSignals: insights.semanticSignals,
|
|
457
|
+
missingDefaultFields: insights.missingDefaultFields,
|
|
458
|
+
templateWarnings: generated?.warnings ?? [],
|
|
459
|
+
templateDefaults: generated
|
|
460
|
+
? {
|
|
461
|
+
inferredTopLevelFields: generated.inferredTopLevelFields,
|
|
462
|
+
inferredConfig: generated.inferredConfig,
|
|
463
|
+
}
|
|
464
|
+
: undefined,
|
|
465
|
+
score: resolution.usageCount * 20 + workspaceUsageCount * 10 + (generated ? 5 : 0),
|
|
466
|
+
reasons,
|
|
467
|
+
source: "repo",
|
|
468
|
+
resolutionKind: resolution.resolutionKind,
|
|
469
|
+
...(resolution.resolutionKind === "package"
|
|
470
|
+
? { packageAlias: resolution.packageAlias }
|
|
471
|
+
: {}),
|
|
472
|
+
};
|
|
473
|
+
}
|
|
474
|
+
function buildWorkspaceCandidate(nodeType, workspaceNodeTypeCounts) {
|
|
475
|
+
const family = inferFamily([nodeType]);
|
|
476
|
+
return {
|
|
477
|
+
nodeType,
|
|
478
|
+
displayName: nodeType,
|
|
479
|
+
shortName: null,
|
|
480
|
+
family,
|
|
481
|
+
usageCount: 0,
|
|
482
|
+
workspaceUsageCount: workspaceNodeTypeCounts[nodeType] ?? 0,
|
|
483
|
+
observedInWorkspace: true,
|
|
484
|
+
autoExecutable: isAutoExecutableFamily(family),
|
|
485
|
+
semanticSignals: [],
|
|
486
|
+
missingDefaultFields: [],
|
|
487
|
+
templateWarnings: [],
|
|
488
|
+
score: (workspaceNodeTypeCounts[nodeType] ?? 0) * 10,
|
|
489
|
+
reasons: ["observed in current workspace nodes"],
|
|
490
|
+
source: "workspace",
|
|
491
|
+
};
|
|
492
|
+
}
|
|
493
|
+
/**
|
|
494
|
+
* Sort candidates by score descending (highest first).
|
|
495
|
+
*/
|
|
496
|
+
function rankCandidates(candidates) {
|
|
497
|
+
return [...candidates].sort((a, b) => b.score - a.score);
|
|
498
|
+
}
|
|
499
|
+
/**
|
|
500
|
+
* Challenge the top-ranked candidate against the intent corpus.
|
|
501
|
+
* Returns an array of challenge reasons. Empty = candidate passed.
|
|
502
|
+
*
|
|
503
|
+
* Checks:
|
|
504
|
+
* 1. Anti-signals from the intent doc match the context
|
|
505
|
+
* 2. Specialized pattern not requested by context
|
|
506
|
+
* 3. Requires semantic config but no dimensional modeling intent
|
|
507
|
+
* 4. Another family's strong signal matches context but this candidate doesn't belong to it
|
|
508
|
+
* 5. doNotUseWhen entries match the context
|
|
509
|
+
*/
|
|
510
|
+
function challengeCandidate(candidate, context, contextText) {
|
|
511
|
+
const challenges = [];
|
|
512
|
+
const intent = NODE_TYPE_INTENT[candidate.family];
|
|
513
|
+
const contextLower = contextText.toLowerCase();
|
|
514
|
+
// 1. Anti-signals: the intent doc says this family should NOT be used for this context
|
|
515
|
+
if (intent.antiSignals !== null && intent.antiSignals.test(contextLower)) {
|
|
516
|
+
challenges.push(`${candidate.family} anti-signal matched — intent says not for this context`);
|
|
517
|
+
}
|
|
518
|
+
// 2. Specialized pattern penalty: candidate is a specialized type but context doesn't request it
|
|
519
|
+
const candidateSignals = [
|
|
520
|
+
candidate.nodeType,
|
|
521
|
+
candidate.displayName ?? "",
|
|
522
|
+
candidate.shortName ?? "",
|
|
523
|
+
].join(" ");
|
|
524
|
+
const specializedPenalty = detectSpecializedPatternPenalty(candidateSignals, contextText);
|
|
525
|
+
if (specializedPenalty) {
|
|
526
|
+
challenges.push(specializedPenalty.reason);
|
|
527
|
+
}
|
|
528
|
+
// 3. Requires semantic config (business keys, SCD) but context has no dimensional modeling intent
|
|
529
|
+
if (intent.requiresSemanticConfig) {
|
|
530
|
+
const hasDimensionalIntent = /\bdimension(al)?\s+model/u.test(contextLower) ||
|
|
531
|
+
/\bstar\s+schema\b/u.test(contextLower) ||
|
|
532
|
+
/\bsnowflake\s+schema\b/u.test(contextLower) ||
|
|
533
|
+
/\bdata\s*vault\b/u.test(contextLower);
|
|
534
|
+
// Also check if context has a strong signal for THIS family (e.g. name starts with dim_)
|
|
535
|
+
const hasOwnStrongSignal = intent.strongSignals.test(contextLower);
|
|
536
|
+
if (!hasDimensionalIntent && !hasOwnStrongSignal) {
|
|
537
|
+
challenges.push(`${candidate.family} requires semantic config (business keys, SCD) but no dimensional modeling intent detected`);
|
|
538
|
+
}
|
|
539
|
+
}
|
|
540
|
+
// 4. Another family has a strong signal match but this candidate is a different CATEGORY
|
|
541
|
+
// Stage and work are interchangeable — don't challenge one for the other.
|
|
542
|
+
const generalPurposeFamilies = new Set(["stage", "work"]);
|
|
543
|
+
const signalCheckOrder = [
|
|
544
|
+
"persistent-stage", "dimension", "fact", "hub", "satellite", "link",
|
|
545
|
+
"view", "work", "stage",
|
|
546
|
+
];
|
|
547
|
+
for (const family of signalCheckOrder) {
|
|
548
|
+
if (family === candidate.family)
|
|
549
|
+
continue;
|
|
550
|
+
// Skip stage↔work challenges — they're the same category
|
|
551
|
+
if (generalPurposeFamilies.has(family) && generalPurposeFamilies.has(candidate.family))
|
|
552
|
+
continue;
|
|
553
|
+
const otherIntent = NODE_TYPE_INTENT[family];
|
|
554
|
+
if (otherIntent.strongSignals.test(contextLower)) {
|
|
555
|
+
challenges.push(`context has a strong signal for ${family} but candidate is ${candidate.family}`);
|
|
556
|
+
break; // One mismatch is enough
|
|
557
|
+
}
|
|
558
|
+
}
|
|
559
|
+
// 5. doNotUseWhen: check if any anti-pattern descriptions match the context
|
|
560
|
+
for (const antiPattern of intent.doNotUseWhen) {
|
|
561
|
+
const antiLower = antiPattern.toLowerCase();
|
|
562
|
+
// Extract key phrases from the doNotUseWhen text and check against context
|
|
563
|
+
// Only trigger for phrases that are specific enough (> 10 chars, not generic advice)
|
|
564
|
+
if (antiLower.length > 10) {
|
|
565
|
+
// Check for CTE decomposition anti-pattern
|
|
566
|
+
if (/cte\s+decomposition/u.test(antiLower) && /cte\s+decomposition/u.test(contextLower)) {
|
|
567
|
+
challenges.push(`intent says do not use ${candidate.family} for CTE decomposition`);
|
|
568
|
+
}
|
|
569
|
+
// Check for batch ETL anti-pattern
|
|
570
|
+
if (/batch\s+etl/u.test(antiLower) && /batch\s+etl/u.test(contextLower)) {
|
|
571
|
+
challenges.push(`intent says do not use ${candidate.family} for batch ETL`);
|
|
572
|
+
}
|
|
573
|
+
// Check for general/simple transforms anti-pattern
|
|
574
|
+
if (/general.purpose|simple\s+stag/u.test(antiLower) && /general|simple|basic/u.test(contextLower)) {
|
|
575
|
+
challenges.push(`intent says do not use ${candidate.family} for general-purpose transforms`);
|
|
576
|
+
}
|
|
577
|
+
}
|
|
578
|
+
}
|
|
579
|
+
// 6. Package-level challenge: non-base packages for general-purpose context
|
|
580
|
+
// Data vault, functional, and other specialized packages should not be selected
|
|
581
|
+
// for standard staging/transform/join operations — unless already in workspace.
|
|
582
|
+
if (candidate.packageAlias &&
|
|
583
|
+
!/base.node.type/iu.test(candidate.packageAlias) &&
|
|
584
|
+
!candidate.observedInWorkspace &&
|
|
585
|
+
/batch\s+etl|staging|transform|general/iu.test(contextLower)) {
|
|
586
|
+
challenges.push(`from specialized package "${candidate.packageAlias}" — not appropriate for general-purpose transforms`);
|
|
587
|
+
}
|
|
588
|
+
// 7. "Copy of" types should be challenged in favor of originals
|
|
589
|
+
if (candidate.displayName && /\bcopy\s+of\b/iu.test(candidate.displayName)) {
|
|
590
|
+
challenges.push(`"${candidate.displayName}" is a cloned type — original should be preferred`);
|
|
591
|
+
}
|
|
592
|
+
return challenges;
|
|
593
|
+
}
|
|
594
|
+
export function selectPipelineNodeType(context) {
|
|
595
|
+
const warnings = [];
|
|
596
|
+
const workspaceNodeTypes = context.workspaceNodeTypes ?? [];
|
|
597
|
+
const workspaceNodeTypeCounts = context.workspaceNodeTypeCounts ?? {};
|
|
598
|
+
const repoPath = resolveOptionalRepoPathInput(context.repoPath);
|
|
599
|
+
let parsedRepo;
|
|
600
|
+
if (repoPath) {
|
|
601
|
+
try {
|
|
602
|
+
parsedRepo = parseRepo(repoPath);
|
|
603
|
+
}
|
|
604
|
+
catch (error) {
|
|
605
|
+
warnings.push(error instanceof Error
|
|
606
|
+
? error.message
|
|
607
|
+
: `Repo-backed planning could not parse ${repoPath}.`);
|
|
608
|
+
}
|
|
609
|
+
}
|
|
610
|
+
const candidates = [];
|
|
611
|
+
const seen = new Set();
|
|
612
|
+
const excludedByInputMode = new Set();
|
|
613
|
+
if (parsedRepo) {
|
|
614
|
+
for (const resolution of collectRepoResolutions(parsedRepo)) {
|
|
615
|
+
if (isExcludedNodeTypeID(resolution.resolvedNodeType) || isExcludedByInputMode(resolution) || isDisabledNodeType(resolution)) {
|
|
616
|
+
excludedByInputMode.add(resolution.resolvedNodeType);
|
|
617
|
+
seen.add(resolution.resolvedNodeType);
|
|
618
|
+
continue;
|
|
619
|
+
}
|
|
620
|
+
const candidate = buildRepoCandidate(resolution, workspaceNodeTypes, workspaceNodeTypeCounts);
|
|
621
|
+
if (seen.has(candidate.nodeType)) {
|
|
622
|
+
continue;
|
|
623
|
+
}
|
|
624
|
+
seen.add(candidate.nodeType);
|
|
625
|
+
candidates.push(scoreCandidate(candidate, context));
|
|
626
|
+
}
|
|
627
|
+
}
|
|
628
|
+
for (const nodeType of workspaceNodeTypes) {
|
|
629
|
+
if (seen.has(nodeType) ||
|
|
630
|
+
isExcludedNodeTypeID(nodeType) ||
|
|
631
|
+
Array.from(excludedByInputMode).some((excluded) => matchesNodeTypeIdentity(excluded, nodeType))) {
|
|
632
|
+
continue;
|
|
633
|
+
}
|
|
634
|
+
seen.add(nodeType);
|
|
635
|
+
candidates.push(scoreCandidate(buildWorkspaceCandidate(nodeType, workspaceNodeTypeCounts), context));
|
|
636
|
+
}
|
|
637
|
+
// === DELIBERATIVE SELECTION: Match → Rank → Challenge → Repeat (twice) ===
|
|
638
|
+
// Two rounds of scoring + challenge to ensure the best type is selected.
|
|
639
|
+
// The challenge step uses the intent doc to verify the top candidate is appropriate.
|
|
640
|
+
const contextText = [context.goal, context.targetName].filter(Boolean).join(" ");
|
|
641
|
+
let sorted = rankCandidates(candidates);
|
|
642
|
+
const challengeLog = [];
|
|
643
|
+
for (let round = 1; round <= 2; round++) {
|
|
644
|
+
const top = sorted[0] ?? null;
|
|
645
|
+
if (!top)
|
|
646
|
+
break;
|
|
647
|
+
const challenges = challengeCandidate(top, context, contextText);
|
|
648
|
+
if (challenges.length > 0) {
|
|
649
|
+
challengeLog.push(`Round ${round}: challenged "${top.nodeType}" (${top.displayName ?? top.family}) — ${challenges.join("; ")}`);
|
|
650
|
+
// Disqualify the top candidate and re-rank
|
|
651
|
+
top.score = -Infinity;
|
|
652
|
+
top.reasons.push(...challenges.map((c) => `CHALLENGED: ${c}`));
|
|
653
|
+
sorted = rankCandidates(sorted);
|
|
654
|
+
}
|
|
655
|
+
else {
|
|
656
|
+
challengeLog.push(`Round ${round}: "${top.nodeType}" (${top.displayName ?? top.family}) passed challenge`);
|
|
657
|
+
break; // Candidate passed — no need for another round
|
|
658
|
+
}
|
|
659
|
+
}
|
|
660
|
+
let selected = sorted[0] ?? null;
|
|
661
|
+
// Skip candidates that were disqualified
|
|
662
|
+
if (selected && selected.score === -Infinity) {
|
|
663
|
+
selected = sorted.find((c) => c.score > -Infinity) ?? null;
|
|
664
|
+
}
|
|
665
|
+
let strategy = parsedRepo
|
|
666
|
+
? "repo-ranked"
|
|
667
|
+
: workspaceNodeTypes.length > 0
|
|
668
|
+
? "workspace-ranked"
|
|
669
|
+
: "fallback";
|
|
670
|
+
const isExcludedExplicit = context.explicitNodeType
|
|
671
|
+
? isExcludedNodeTypeID(context.explicitNodeType) ||
|
|
672
|
+
excludedByInputMode.has(context.explicitNodeType) ||
|
|
673
|
+
Array.from(excludedByInputMode).some((excluded) => matchesNodeTypeIdentity(excluded, context.explicitNodeType))
|
|
674
|
+
: false;
|
|
675
|
+
if (context.explicitNodeType) {
|
|
676
|
+
if (isExcludedExplicit) {
|
|
677
|
+
warnings.push(`targetNodeType "${context.explicitNodeType}" is excluded because it relies on raw SQL override, which is disallowed in this project. Use a declarative node type (Stage, View, Dimension, Fact, etc.) instead.`);
|
|
678
|
+
}
|
|
679
|
+
else {
|
|
680
|
+
const explicitMatch = sorted.find((candidate) => candidate.nodeType === context.explicitNodeType) ??
|
|
681
|
+
sorted.find((candidate) => matchesNodeTypeIdentity(candidate.nodeType, context.explicitNodeType)) ??
|
|
682
|
+
null;
|
|
683
|
+
if (explicitMatch) {
|
|
684
|
+
selected = explicitMatch;
|
|
685
|
+
strategy = "explicit";
|
|
686
|
+
}
|
|
687
|
+
else {
|
|
688
|
+
warnings.push(`targetNodeType ${context.explicitNodeType} could not be matched to repo-backed or observed workspace node types.`);
|
|
689
|
+
}
|
|
690
|
+
}
|
|
691
|
+
}
|
|
692
|
+
if (!selected && context.explicitNodeType && !isExcludedExplicit) {
|
|
693
|
+
const explicitFamily = inferFamily([context.explicitNodeType]);
|
|
694
|
+
selected = {
|
|
695
|
+
...buildWorkspaceCandidate(context.explicitNodeType, workspaceNodeTypeCounts),
|
|
696
|
+
family: explicitFamily,
|
|
697
|
+
observedInWorkspace: false,
|
|
698
|
+
reasons: ["provided as an explicit targetNodeType override"],
|
|
699
|
+
score: 500,
|
|
700
|
+
};
|
|
701
|
+
}
|
|
702
|
+
if (challengeLog.length > 0) {
|
|
703
|
+
warnings.push(...challengeLog);
|
|
704
|
+
}
|
|
705
|
+
const nextBest = sorted.find((candidate) => candidate.nodeType !== selected?.nodeType && candidate.score > -Infinity) ?? null;
|
|
706
|
+
const gap = selected ? selected.score - (nextBest?.score ?? 0) : 0;
|
|
707
|
+
const confidence = selected && selected.autoExecutable && gap >= 40
|
|
708
|
+
? "high"
|
|
709
|
+
: selected && gap >= 15
|
|
710
|
+
? "medium"
|
|
711
|
+
: "low";
|
|
712
|
+
const selection = {
|
|
713
|
+
strategy,
|
|
714
|
+
selectedNodeType: selected?.nodeType ?? null,
|
|
715
|
+
selectedDisplayName: selected?.displayName ?? null,
|
|
716
|
+
selectedShortName: selected?.shortName ?? null,
|
|
717
|
+
selectedFamily: selected?.family ?? null,
|
|
718
|
+
confidence,
|
|
719
|
+
autoExecutable: selected?.autoExecutable ?? false,
|
|
720
|
+
supportedNodeTypes: sorted
|
|
721
|
+
.filter((candidate) => candidate.autoExecutable)
|
|
722
|
+
.map((candidate) => candidate.nodeType)
|
|
723
|
+
.slice(0, 10),
|
|
724
|
+
repoPath: repoPath ?? null,
|
|
725
|
+
resolvedRepoPath: parsedRepo?.summary.resolvedRepoPath ?? null,
|
|
726
|
+
repoWarnings: parsedRepo?.summary.warnings ?? [],
|
|
727
|
+
workspaceObservedNodeTypes: workspaceNodeTypes,
|
|
728
|
+
consideredNodeTypes: sorted.slice(0, 10).map((candidate) => ({
|
|
729
|
+
nodeType: candidate.nodeType,
|
|
730
|
+
displayName: candidate.displayName,
|
|
731
|
+
shortName: candidate.shortName,
|
|
732
|
+
family: candidate.family,
|
|
733
|
+
usageCount: candidate.usageCount,
|
|
734
|
+
workspaceUsageCount: candidate.workspaceUsageCount,
|
|
735
|
+
observedInWorkspace: candidate.observedInWorkspace,
|
|
736
|
+
autoExecutable: candidate.autoExecutable,
|
|
737
|
+
score: candidate.score,
|
|
738
|
+
reasons: candidate.reasons,
|
|
739
|
+
})),
|
|
740
|
+
};
|
|
741
|
+
return {
|
|
742
|
+
selectedCandidate: selected
|
|
743
|
+
? {
|
|
744
|
+
nodeType: selected.nodeType,
|
|
745
|
+
displayName: selected.displayName,
|
|
746
|
+
shortName: selected.shortName,
|
|
747
|
+
family: selected.family,
|
|
748
|
+
autoExecutable: selected.autoExecutable,
|
|
749
|
+
semanticSignals: selected.semanticSignals,
|
|
750
|
+
missingDefaultFields: selected.missingDefaultFields,
|
|
751
|
+
templateWarnings: selected.templateWarnings,
|
|
752
|
+
templateDefaults: selected.templateDefaults,
|
|
753
|
+
}
|
|
754
|
+
: null,
|
|
755
|
+
selection,
|
|
756
|
+
warnings,
|
|
757
|
+
};
|
|
758
|
+
}
|