coalesce-transform-mcp 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +304 -0
- package/dist/cache-dir.d.ts +26 -0
- package/dist/cache-dir.js +106 -0
- package/dist/client.d.ts +25 -0
- package/dist/client.js +212 -0
- package/dist/coalesce/api/environments.d.ts +20 -0
- package/dist/coalesce/api/environments.js +15 -0
- package/dist/coalesce/api/git-accounts.d.ts +21 -0
- package/dist/coalesce/api/git-accounts.js +21 -0
- package/dist/coalesce/api/jobs.d.ts +25 -0
- package/dist/coalesce/api/jobs.js +21 -0
- package/dist/coalesce/api/nodes.d.ts +29 -0
- package/dist/coalesce/api/nodes.js +33 -0
- package/dist/coalesce/api/projects.d.ts +22 -0
- package/dist/coalesce/api/projects.js +25 -0
- package/dist/coalesce/api/runs.d.ts +19 -0
- package/dist/coalesce/api/runs.js +34 -0
- package/dist/coalesce/api/subgraphs.d.ts +20 -0
- package/dist/coalesce/api/subgraphs.js +17 -0
- package/dist/coalesce/api/users.d.ts +30 -0
- package/dist/coalesce/api/users.js +31 -0
- package/dist/coalesce/types.d.ts +298 -0
- package/dist/coalesce/types.js +746 -0
- package/dist/generated/.gitkeep +0 -0
- package/dist/generated/node-type-corpus.json +42656 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +10 -0
- package/dist/mcp/cache.d.ts +3 -0
- package/dist/mcp/cache.js +137 -0
- package/dist/mcp/environments.d.ts +3 -0
- package/dist/mcp/environments.js +61 -0
- package/dist/mcp/git-accounts.d.ts +3 -0
- package/dist/mcp/git-accounts.js +70 -0
- package/dist/mcp/jobs.d.ts +3 -0
- package/dist/mcp/jobs.js +77 -0
- package/dist/mcp/node-type-corpus.d.ts +3 -0
- package/dist/mcp/node-type-corpus.js +173 -0
- package/dist/mcp/nodes.d.ts +3 -0
- package/dist/mcp/nodes.js +341 -0
- package/dist/mcp/pipelines.d.ts +3 -0
- package/dist/mcp/pipelines.js +342 -0
- package/dist/mcp/projects.d.ts +3 -0
- package/dist/mcp/projects.js +70 -0
- package/dist/mcp/repo-node-types.d.ts +135 -0
- package/dist/mcp/repo-node-types.js +387 -0
- package/dist/mcp/runs.d.ts +3 -0
- package/dist/mcp/runs.js +92 -0
- package/dist/mcp/subgraphs.d.ts +3 -0
- package/dist/mcp/subgraphs.js +60 -0
- package/dist/mcp/users.d.ts +3 -0
- package/dist/mcp/users.js +107 -0
- package/dist/prompts/index.d.ts +2 -0
- package/dist/prompts/index.js +58 -0
- package/dist/resources/context/aggregation-patterns.md +145 -0
- package/dist/resources/context/data-engineering-principles.md +183 -0
- package/dist/resources/context/hydrated-metadata.md +92 -0
- package/dist/resources/context/id-discovery.md +64 -0
- package/dist/resources/context/intelligent-node-configuration.md +162 -0
- package/dist/resources/context/node-creation-decision-tree.md +156 -0
- package/dist/resources/context/node-operations.md +316 -0
- package/dist/resources/context/node-payloads.md +114 -0
- package/dist/resources/context/node-type-corpus.md +166 -0
- package/dist/resources/context/node-type-selection-guide.md +96 -0
- package/dist/resources/context/overview.md +135 -0
- package/dist/resources/context/pipeline-workflows.md +355 -0
- package/dist/resources/context/run-operations.md +55 -0
- package/dist/resources/context/sql-bigquery.md +41 -0
- package/dist/resources/context/sql-databricks.md +40 -0
- package/dist/resources/context/sql-platform-selection.md +70 -0
- package/dist/resources/context/sql-snowflake.md +43 -0
- package/dist/resources/context/storage-mappings.md +49 -0
- package/dist/resources/context/tool-usage.md +98 -0
- package/dist/resources/index.d.ts +5 -0
- package/dist/resources/index.js +254 -0
- package/dist/schemas/node-payloads.d.ts +5019 -0
- package/dist/schemas/node-payloads.js +147 -0
- package/dist/server.d.ts +7 -0
- package/dist/server.js +63 -0
- package/dist/services/cache/snapshots.d.ts +108 -0
- package/dist/services/cache/snapshots.js +275 -0
- package/dist/services/config/context-analyzer.d.ts +14 -0
- package/dist/services/config/context-analyzer.js +76 -0
- package/dist/services/config/field-classifier.d.ts +23 -0
- package/dist/services/config/field-classifier.js +47 -0
- package/dist/services/config/intelligent.d.ts +55 -0
- package/dist/services/config/intelligent.js +306 -0
- package/dist/services/config/rules.d.ts +6 -0
- package/dist/services/config/rules.js +44 -0
- package/dist/services/config/schema-resolver.d.ts +18 -0
- package/dist/services/config/schema-resolver.js +80 -0
- package/dist/services/corpus/loader.d.ts +56 -0
- package/dist/services/corpus/loader.js +25 -0
- package/dist/services/corpus/search.d.ts +49 -0
- package/dist/services/corpus/search.js +69 -0
- package/dist/services/corpus/templates.d.ts +4 -0
- package/dist/services/corpus/templates.js +11 -0
- package/dist/services/pipelines/execution.d.ts +20 -0
- package/dist/services/pipelines/execution.js +290 -0
- package/dist/services/pipelines/node-type-intent.d.ts +96 -0
- package/dist/services/pipelines/node-type-intent.js +356 -0
- package/dist/services/pipelines/node-type-selection.d.ts +66 -0
- package/dist/services/pipelines/node-type-selection.js +758 -0
- package/dist/services/pipelines/planning.d.ts +543 -0
- package/dist/services/pipelines/planning.js +1839 -0
- package/dist/services/policies/sql-override.d.ts +7 -0
- package/dist/services/policies/sql-override.js +109 -0
- package/dist/services/repo/operations.d.ts +6 -0
- package/dist/services/repo/operations.js +10 -0
- package/dist/services/repo/parser.d.ts +70 -0
- package/dist/services/repo/parser.js +365 -0
- package/dist/services/repo/path.d.ts +2 -0
- package/dist/services/repo/path.js +58 -0
- package/dist/services/templates/nodes.d.ts +50 -0
- package/dist/services/templates/nodes.js +336 -0
- package/dist/services/workspace/analysis.d.ts +56 -0
- package/dist/services/workspace/analysis.js +151 -0
- package/dist/services/workspace/mutations.d.ts +150 -0
- package/dist/services/workspace/mutations.js +1718 -0
- package/dist/utils.d.ts +5 -0
- package/dist/utils.js +7 -0
- package/dist/workflows/get-environment-overview.d.ts +9 -0
- package/dist/workflows/get-environment-overview.js +23 -0
- package/dist/workflows/get-run-details.d.ts +10 -0
- package/dist/workflows/get-run-details.js +28 -0
- package/dist/workflows/progress.d.ts +20 -0
- package/dist/workflows/progress.js +54 -0
- package/dist/workflows/retry-and-wait.d.ts +13 -0
- package/dist/workflows/retry-and-wait.js +139 -0
- package/dist/workflows/run-and-wait.d.ts +13 -0
- package/dist/workflows/run-and-wait.js +141 -0
- package/dist/workflows/run-status.d.ts +10 -0
- package/dist/workflows/run-status.js +27 -0
- package/package.json +34 -0
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
export function analyzeNodeContext(node) {
|
|
2
|
+
const metadata = (node.metadata ?? {});
|
|
3
|
+
const sourceMapping = Array.isArray(metadata.sourceMapping)
|
|
4
|
+
? metadata.sourceMapping
|
|
5
|
+
: [];
|
|
6
|
+
const columns = Array.isArray(metadata.columns)
|
|
7
|
+
? metadata.columns
|
|
8
|
+
: [];
|
|
9
|
+
// Detect multiple sources from sourceMapping dependencies
|
|
10
|
+
const totalDependencies = sourceMapping.reduce((count, mapping) => {
|
|
11
|
+
return count + (Array.isArray(mapping?.dependencies) ? mapping.dependencies.length : 0);
|
|
12
|
+
}, 0);
|
|
13
|
+
// Detect aggregates
|
|
14
|
+
const aggregatePattern = /\b(COUNT|SUM|AVG|MIN|MAX|STDDEV|VARIANCE|LISTAGG|ARRAY_AGG)\s*\(/i;
|
|
15
|
+
const hasAggregates = columns.some((col) => aggregatePattern.test(typeof col.transform === "string" ? col.transform : ""));
|
|
16
|
+
// Detect timestamp columns
|
|
17
|
+
const timestampPattern = /_TS$|_TIMESTAMP$|TIMESTAMP_/i;
|
|
18
|
+
const datePattern = /_DATE$|_DT$|DATE_/i;
|
|
19
|
+
const colName = (col) => typeof col.name === "string" ? col.name : "";
|
|
20
|
+
const timestamps = columns
|
|
21
|
+
.filter((col) => timestampPattern.test(colName(col)))
|
|
22
|
+
.map(colName);
|
|
23
|
+
const dates = columns
|
|
24
|
+
.filter((col) => datePattern.test(colName(col)))
|
|
25
|
+
.map(colName);
|
|
26
|
+
const hasTimestampColumns = timestamps.length > 0 || dates.length > 0;
|
|
27
|
+
// Detect Type 2 SCD pattern (START_DATE, END_DATE, IS_CURRENT)
|
|
28
|
+
const hasStartDate = columns.some((col) => /START_DATE|EFFECTIVE_DATE/i.test(colName(col)));
|
|
29
|
+
const hasEndDate = columns.some((col) => /END_DATE|EXPIRY_DATE/i.test(colName(col)));
|
|
30
|
+
const hasCurrentFlag = columns.some((col) => /IS_CURRENT|CURRENT_FLAG/i.test(colName(col)));
|
|
31
|
+
const hasType2Pattern = hasStartDate && hasEndDate && hasCurrentFlag;
|
|
32
|
+
// Detect business key candidates: columns with ID/KEY/CODE patterns
|
|
33
|
+
const businessKeys = columns
|
|
34
|
+
.filter((col) => {
|
|
35
|
+
const name = colName(col);
|
|
36
|
+
return /_(ID|KEY|CODE|NUM)$/i.test(name) || /^(ID|KEY|CODE)_/i.test(name);
|
|
37
|
+
})
|
|
38
|
+
.map(colName);
|
|
39
|
+
// Detect change tracking candidates: non-key, non-aggregate, non-system columns
|
|
40
|
+
// These are columns that represent mutable business data
|
|
41
|
+
const systemColumnPattern = /^(SYS_|DW_|ETL_|LOAD_|CREATED_|UPDATED_|MODIFIED_|INSERT_|UPDATE_)/i;
|
|
42
|
+
const businessKeySet = new Set(businessKeys);
|
|
43
|
+
const changeTrackingCandidates = columns
|
|
44
|
+
.filter((col) => {
|
|
45
|
+
const name = colName(col);
|
|
46
|
+
if (businessKeySet.has(name))
|
|
47
|
+
return false;
|
|
48
|
+
if (systemColumnPattern.test(name))
|
|
49
|
+
return false;
|
|
50
|
+
if (aggregatePattern.test(typeof col.transform === "string" ? col.transform : ""))
|
|
51
|
+
return false;
|
|
52
|
+
if (col.isBusinessKey === true)
|
|
53
|
+
return false;
|
|
54
|
+
return name.length > 0;
|
|
55
|
+
})
|
|
56
|
+
.map(colName);
|
|
57
|
+
// Read actual materialization type from node config
|
|
58
|
+
const config = (node.config ?? {});
|
|
59
|
+
const rawMaterialization = typeof config.materializationType === "string"
|
|
60
|
+
? config.materializationType.toLowerCase()
|
|
61
|
+
: "";
|
|
62
|
+
const materializationType = rawMaterialization.includes("view") ? "view" : "table";
|
|
63
|
+
return {
|
|
64
|
+
hasMultipleSources: totalDependencies > 1,
|
|
65
|
+
hasAggregates,
|
|
66
|
+
hasTimestampColumns,
|
|
67
|
+
hasType2Pattern,
|
|
68
|
+
materializationType,
|
|
69
|
+
columnPatterns: {
|
|
70
|
+
timestamps,
|
|
71
|
+
dates,
|
|
72
|
+
businessKeys,
|
|
73
|
+
changeTrackingCandidates,
|
|
74
|
+
},
|
|
75
|
+
};
|
|
76
|
+
}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
export interface ConfigItem {
|
|
2
|
+
attributeName?: string;
|
|
3
|
+
type: string;
|
|
4
|
+
isRequired?: boolean | string;
|
|
5
|
+
default?: unknown;
|
|
6
|
+
enableIf?: string;
|
|
7
|
+
displayName?: string;
|
|
8
|
+
}
|
|
9
|
+
export interface ClassifiedFields {
|
|
10
|
+
required: string[];
|
|
11
|
+
conditionalRequired: string[];
|
|
12
|
+
optionalWithDefaults: string[];
|
|
13
|
+
contextual: string[];
|
|
14
|
+
columnSelectors: Array<{
|
|
15
|
+
attributeName: string;
|
|
16
|
+
displayName: string | undefined;
|
|
17
|
+
isRequired: boolean;
|
|
18
|
+
}>;
|
|
19
|
+
}
|
|
20
|
+
export declare function classifyConfigFields(config: Array<{
|
|
21
|
+
groupName: string;
|
|
22
|
+
items: ConfigItem[];
|
|
23
|
+
}>): ClassifiedFields;
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
export function classifyConfigFields(config) {
|
|
2
|
+
const required = [];
|
|
3
|
+
const conditionalRequired = [];
|
|
4
|
+
const optionalWithDefaults = [];
|
|
5
|
+
const contextual = [];
|
|
6
|
+
const columnSelectors = [];
|
|
7
|
+
for (const group of config) {
|
|
8
|
+
for (const item of group.items) {
|
|
9
|
+
// Skip items without attributeName
|
|
10
|
+
if (!item.attributeName) {
|
|
11
|
+
continue;
|
|
12
|
+
}
|
|
13
|
+
// columnSelector items are column-level attributes, not node-level config
|
|
14
|
+
if (item.type === "columnSelector") {
|
|
15
|
+
columnSelectors.push({
|
|
16
|
+
attributeName: item.attributeName,
|
|
17
|
+
displayName: item.displayName,
|
|
18
|
+
isRequired: item.isRequired === true,
|
|
19
|
+
});
|
|
20
|
+
continue;
|
|
21
|
+
}
|
|
22
|
+
// Classify required fields (isRequired === true)
|
|
23
|
+
if (item.isRequired === true) {
|
|
24
|
+
required.push(item.attributeName);
|
|
25
|
+
}
|
|
26
|
+
// Conditional required (isRequired is string)
|
|
27
|
+
else if (typeof item.isRequired === "string") {
|
|
28
|
+
conditionalRequired.push(item.attributeName);
|
|
29
|
+
}
|
|
30
|
+
// Optional with defaults (default !== undefined)
|
|
31
|
+
else if (item.default !== undefined) {
|
|
32
|
+
optionalWithDefaults.push(item.attributeName);
|
|
33
|
+
}
|
|
34
|
+
// Contextual (everything else)
|
|
35
|
+
else {
|
|
36
|
+
contextual.push(item.attributeName);
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
return {
|
|
41
|
+
required,
|
|
42
|
+
conditionalRequired,
|
|
43
|
+
optionalWithDefaults,
|
|
44
|
+
contextual,
|
|
45
|
+
columnSelectors,
|
|
46
|
+
};
|
|
47
|
+
}
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import type { CoalesceClient } from "../../client.js";
|
|
2
|
+
export interface ConfigReview {
|
|
3
|
+
status: "complete" | "needs_attention" | "incomplete";
|
|
4
|
+
summary: string;
|
|
5
|
+
missingRequired: string[];
|
|
6
|
+
warnings: string[];
|
|
7
|
+
suggestions: string[];
|
|
8
|
+
}
|
|
9
|
+
export interface ConfigCompletionResult {
|
|
10
|
+
node: unknown;
|
|
11
|
+
schemaSource: "repo" | "corpus";
|
|
12
|
+
classification: {
|
|
13
|
+
required: string[];
|
|
14
|
+
conditionalRequired: string[];
|
|
15
|
+
optionalWithDefaults: string[];
|
|
16
|
+
contextual: string[];
|
|
17
|
+
columnSelectors: Array<{
|
|
18
|
+
attributeName: string;
|
|
19
|
+
displayName: string | undefined;
|
|
20
|
+
isRequired: boolean;
|
|
21
|
+
}>;
|
|
22
|
+
};
|
|
23
|
+
context: {
|
|
24
|
+
hasMultipleSources: boolean;
|
|
25
|
+
hasAggregates: boolean;
|
|
26
|
+
hasTimestampColumns: boolean;
|
|
27
|
+
hasType2Pattern: boolean;
|
|
28
|
+
materializationType: "table" | "view";
|
|
29
|
+
};
|
|
30
|
+
appliedConfig: Record<string, unknown>;
|
|
31
|
+
configChanges: {
|
|
32
|
+
required: Record<string, unknown>;
|
|
33
|
+
contextual: Record<string, unknown>;
|
|
34
|
+
preserved: Record<string, unknown>;
|
|
35
|
+
defaults: Record<string, unknown>;
|
|
36
|
+
};
|
|
37
|
+
columnAttributeChanges: {
|
|
38
|
+
applied: Array<{
|
|
39
|
+
columnName: string;
|
|
40
|
+
attribute: string;
|
|
41
|
+
value: boolean;
|
|
42
|
+
}>;
|
|
43
|
+
reasoning: string[];
|
|
44
|
+
};
|
|
45
|
+
reasoning: string[];
|
|
46
|
+
detectedPatterns: {
|
|
47
|
+
candidateColumns: string[];
|
|
48
|
+
};
|
|
49
|
+
configReview: ConfigReview;
|
|
50
|
+
}
|
|
51
|
+
export declare function completeNodeConfiguration(client: CoalesceClient, params: {
|
|
52
|
+
workspaceID: string;
|
|
53
|
+
nodeID: string;
|
|
54
|
+
repoPath?: string;
|
|
55
|
+
}): Promise<ConfigCompletionResult>;
|
|
@@ -0,0 +1,306 @@
|
|
|
1
|
+
import { getWorkspaceNode } from "../../coalesce/api/nodes.js";
|
|
2
|
+
import { resolveNodeTypeSchema } from "./schema-resolver.js";
|
|
3
|
+
import { analyzeNodeContext } from "./context-analyzer.js";
|
|
4
|
+
import { classifyConfigFields } from "./field-classifier.js";
|
|
5
|
+
import { applyIntelligenceRules } from "./rules.js";
|
|
6
|
+
import { updateWorkspaceNode } from "../workspace/mutations.js";
|
|
7
|
+
import { isPlainObject } from "../../utils.js";
|
|
8
|
+
import { NODE_TYPE_INTENT } from "../pipelines/node-type-intent.js";
|
|
9
|
+
import { inferFamily } from "../pipelines/node-type-selection.js";
|
|
10
|
+
function getNodeMetadataColumns(node) {
|
|
11
|
+
const metadata = isPlainObject(node.metadata) ? node.metadata : undefined;
|
|
12
|
+
const columns = metadata?.["columns"];
|
|
13
|
+
if (!Array.isArray(columns)) {
|
|
14
|
+
return [];
|
|
15
|
+
}
|
|
16
|
+
return columns.filter(isPlainObject);
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* Intelligently determines which columns should receive a columnSelector attribute.
|
|
20
|
+
* Returns a map of columnName → true for columns that should be marked.
|
|
21
|
+
*/
|
|
22
|
+
function inferColumnSelectorAssignments(attributeName, columns, context) {
|
|
23
|
+
const assignments = new Map();
|
|
24
|
+
switch (attributeName) {
|
|
25
|
+
case "isBusinessKey": {
|
|
26
|
+
// Use columns already marked, or fall back to detected business key candidates
|
|
27
|
+
const alreadyMarked = columns.filter((c) => c.isBusinessKey === true);
|
|
28
|
+
if (alreadyMarked.length > 0) {
|
|
29
|
+
for (const col of alreadyMarked) {
|
|
30
|
+
if (typeof col.name === "string") {
|
|
31
|
+
assignments.set(col.name, true);
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
return { assignments, reasoning: `Preserved ${alreadyMarked.length} existing isBusinessKey column(s)` };
|
|
35
|
+
}
|
|
36
|
+
// Infer from column name patterns
|
|
37
|
+
for (const candidateName of context.columnPatterns.businessKeys) {
|
|
38
|
+
assignments.set(candidateName, true);
|
|
39
|
+
}
|
|
40
|
+
if (assignments.size > 0) {
|
|
41
|
+
return { assignments, reasoning: `Inferred isBusinessKey from ID/KEY/CODE column name patterns: ${[...assignments.keys()].join(", ")}` };
|
|
42
|
+
}
|
|
43
|
+
return { assignments, reasoning: "No business key candidates detected — set isBusinessKey manually on the appropriate column(s)" };
|
|
44
|
+
}
|
|
45
|
+
case "isChangeTracking": {
|
|
46
|
+
// Use columns already marked, or fall back to detected candidates
|
|
47
|
+
const alreadyMarked = columns.filter((c) => c.isChangeTracking === true);
|
|
48
|
+
if (alreadyMarked.length > 0) {
|
|
49
|
+
for (const col of alreadyMarked) {
|
|
50
|
+
if (typeof col.name === "string") {
|
|
51
|
+
assignments.set(col.name, true);
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
return { assignments, reasoning: `Preserved ${alreadyMarked.length} existing isChangeTracking column(s)` };
|
|
55
|
+
}
|
|
56
|
+
// For change tracking, mark non-key mutable columns
|
|
57
|
+
for (const candidateName of context.columnPatterns.changeTrackingCandidates) {
|
|
58
|
+
assignments.set(candidateName, true);
|
|
59
|
+
}
|
|
60
|
+
if (assignments.size > 0) {
|
|
61
|
+
return { assignments, reasoning: `Inferred isChangeTracking for non-key columns: ${[...assignments.keys()].join(", ")}` };
|
|
62
|
+
}
|
|
63
|
+
return { assignments, reasoning: "No change tracking candidates detected" };
|
|
64
|
+
}
|
|
65
|
+
default: {
|
|
66
|
+
// For unknown columnSelector attributes, preserve existing values only
|
|
67
|
+
const alreadyMarked = columns.filter((c) => c[attributeName] === true);
|
|
68
|
+
for (const col of alreadyMarked) {
|
|
69
|
+
if (typeof col.name === "string") {
|
|
70
|
+
assignments.set(col.name, true);
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
return {
|
|
74
|
+
assignments,
|
|
75
|
+
reasoning: alreadyMarked.length > 0
|
|
76
|
+
? `Preserved ${alreadyMarked.length} existing ${attributeName} column(s)`
|
|
77
|
+
: `Unknown columnSelector '${attributeName}' — skipped automatic assignment. Set manually if needed.`,
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
function inferFamilyFromNodeType(nodeType) {
|
|
83
|
+
return inferFamily([nodeType]);
|
|
84
|
+
}
|
|
85
|
+
/**
|
|
86
|
+
* Build a config review that summarizes the state of the node's configuration
|
|
87
|
+
* based on the node type's intent, what fields were filled, and what's still missing.
|
|
88
|
+
*/
|
|
89
|
+
function buildConfigReview(nodeType, appliedConfig, classification, context, columnAttributeApplied) {
|
|
90
|
+
const missingRequired = [];
|
|
91
|
+
const warnings = [];
|
|
92
|
+
const suggestions = [];
|
|
93
|
+
// Check for missing required fields that weren't filled
|
|
94
|
+
for (const fieldName of classification.required) {
|
|
95
|
+
if (!(fieldName in appliedConfig) || appliedConfig[fieldName] === undefined) {
|
|
96
|
+
missingRequired.push(fieldName);
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
// Intent-aware checks based on node type family
|
|
100
|
+
const family = inferFamilyFromNodeType(nodeType);
|
|
101
|
+
const intent = NODE_TYPE_INTENT[family];
|
|
102
|
+
// Check column-level attributes
|
|
103
|
+
const hasBusinessKeySelector = classification.columnSelectors.some((s) => s.attributeName === "isBusinessKey");
|
|
104
|
+
const businessKeysApplied = columnAttributeApplied.filter((a) => a.attribute === "isBusinessKey" && a.value);
|
|
105
|
+
if (intent.requiresSemanticConfig) {
|
|
106
|
+
// Node types that require semantic config need business keys
|
|
107
|
+
if (hasBusinessKeySelector && businessKeysApplied.length === 0 && context.columnPatterns.businessKeys.length === 0) {
|
|
108
|
+
warnings.push(`${family} nodes require business keys but none were detected or set. ` +
|
|
109
|
+
`Set isBusinessKey: true on the appropriate column(s) via replace-workspace-node-columns.`);
|
|
110
|
+
}
|
|
111
|
+
// Dimension/Persistent Stage with no change tracking
|
|
112
|
+
const hasChangeTrackingSelector = classification.columnSelectors.some((s) => s.attributeName === "isChangeTracking");
|
|
113
|
+
if (hasChangeTrackingSelector) {
|
|
114
|
+
const changeTrackingApplied = columnAttributeApplied.filter((a) => a.attribute === "isChangeTracking" && a.value);
|
|
115
|
+
if (changeTrackingApplied.length === 0 && context.columnPatterns.changeTrackingCandidates.length === 0) {
|
|
116
|
+
suggestions.push(`${family} supports change tracking but no isChangeTracking columns were detected. ` +
|
|
117
|
+
`If CDC is needed, set isChangeTracking: true on mutable columns.`);
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
// Check for required columnSelectors that have no assignments
|
|
122
|
+
for (const selector of classification.columnSelectors) {
|
|
123
|
+
if (selector.isRequired) {
|
|
124
|
+
const applied = columnAttributeApplied.filter((a) => a.attribute === selector.attributeName && a.value);
|
|
125
|
+
if (applied.length === 0) {
|
|
126
|
+
missingRequired.push(`columnSelector:${selector.attributeName}`);
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
// Materialization-specific suggestions
|
|
131
|
+
if (context.materializationType === "view" && context.hasAggregates) {
|
|
132
|
+
suggestions.push("This view contains aggregations that recalculate on every query. " +
|
|
133
|
+
"Consider table materialization if performance is important.");
|
|
134
|
+
}
|
|
135
|
+
// Determine overall status
|
|
136
|
+
let status;
|
|
137
|
+
if (missingRequired.length > 0) {
|
|
138
|
+
status = "incomplete";
|
|
139
|
+
}
|
|
140
|
+
else if (warnings.length > 0) {
|
|
141
|
+
status = "needs_attention";
|
|
142
|
+
}
|
|
143
|
+
else {
|
|
144
|
+
status = "complete";
|
|
145
|
+
}
|
|
146
|
+
// Build summary
|
|
147
|
+
let summary;
|
|
148
|
+
if (status === "complete") {
|
|
149
|
+
summary = `Config is complete. All required fields are set.`;
|
|
150
|
+
if (suggestions.length > 0) {
|
|
151
|
+
summary += ` ${suggestions.length} optional suggestion(s) available.`;
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
else if (status === "needs_attention") {
|
|
155
|
+
summary = `Config has ${warnings.length} warning(s) that may need manual review.`;
|
|
156
|
+
}
|
|
157
|
+
else {
|
|
158
|
+
summary = `Config is incomplete — ${missingRequired.length} required field(s) missing: ${missingRequired.join(", ")}.`;
|
|
159
|
+
}
|
|
160
|
+
return { status, summary, missingRequired, warnings, suggestions };
|
|
161
|
+
}
|
|
162
|
+
export async function completeNodeConfiguration(client, params) {
|
|
163
|
+
// Step 1: Fetch node
|
|
164
|
+
const node = await getWorkspaceNode(client, {
|
|
165
|
+
workspaceID: params.workspaceID,
|
|
166
|
+
nodeID: params.nodeID,
|
|
167
|
+
});
|
|
168
|
+
if (!isPlainObject(node)) {
|
|
169
|
+
throw new Error("Node response was not an object");
|
|
170
|
+
}
|
|
171
|
+
const nodeType = typeof node.nodeType === "string" ? node.nodeType : "";
|
|
172
|
+
if (!nodeType) {
|
|
173
|
+
throw new Error("Node has no nodeType");
|
|
174
|
+
}
|
|
175
|
+
// Step 2: Resolve schema
|
|
176
|
+
const schemaResolution = await resolveNodeTypeSchema(nodeType, params.repoPath);
|
|
177
|
+
// Step 3: Analyze context
|
|
178
|
+
const context = analyzeNodeContext(node);
|
|
179
|
+
// Step 4: Classify fields (now separates columnSelector items)
|
|
180
|
+
const classification = classifyConfigFields(schemaResolution.schema.config);
|
|
181
|
+
// Step 5: Apply intelligence rules (get suggestions)
|
|
182
|
+
const rulesResult = applyIntelligenceRules(context);
|
|
183
|
+
// Step 6: Build config changes from rules and schema defaults
|
|
184
|
+
const existingConfig = isPlainObject(node.config) ? node.config : {};
|
|
185
|
+
const requiredChanges = {};
|
|
186
|
+
const contextualChanges = {};
|
|
187
|
+
const preservedFields = {};
|
|
188
|
+
const defaultChanges = {};
|
|
189
|
+
// Apply required field defaults from schema
|
|
190
|
+
for (const fieldName of classification.required) {
|
|
191
|
+
if (!(fieldName in existingConfig)) {
|
|
192
|
+
for (const group of schemaResolution.schema.config) {
|
|
193
|
+
for (const item of group.items) {
|
|
194
|
+
if (item.attributeName === fieldName && item.default !== undefined) {
|
|
195
|
+
requiredChanges[fieldName] = item.default;
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
else {
|
|
201
|
+
preservedFields[fieldName] = existingConfig[fieldName];
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
// Apply schema defaults for optional fields that aren't set yet
|
|
205
|
+
for (const fieldName of classification.optionalWithDefaults) {
|
|
206
|
+
if (!(fieldName in existingConfig)) {
|
|
207
|
+
for (const group of schemaResolution.schema.config) {
|
|
208
|
+
for (const item of group.items) {
|
|
209
|
+
if (item.attributeName === fieldName && item.default !== undefined) {
|
|
210
|
+
defaultChanges[fieldName] = item.default;
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
// Apply contextual suggestions from rules
|
|
217
|
+
for (const [key, value] of Object.entries(rulesResult.suggestions)) {
|
|
218
|
+
if (classification.contextual.includes(key) || classification.optionalWithDefaults.includes(key)) {
|
|
219
|
+
contextualChanges[key] = value;
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
// Merge all config changes (contextual overrides defaults when both apply)
|
|
223
|
+
const appliedConfig = {
|
|
224
|
+
...existingConfig,
|
|
225
|
+
...defaultChanges,
|
|
226
|
+
...requiredChanges,
|
|
227
|
+
...contextualChanges,
|
|
228
|
+
};
|
|
229
|
+
// Step 7: Handle columnSelector attributes (column-level)
|
|
230
|
+
const columns = getNodeMetadataColumns(node);
|
|
231
|
+
const columnAttributeApplied = [];
|
|
232
|
+
const columnAttributeReasoning = [];
|
|
233
|
+
let columnsModified = false;
|
|
234
|
+
for (const selector of classification.columnSelectors) {
|
|
235
|
+
const { assignments, reasoning } = inferColumnSelectorAssignments(selector.attributeName, columns, context);
|
|
236
|
+
columnAttributeReasoning.push(`${selector.attributeName}: ${reasoning}`);
|
|
237
|
+
for (const [colName, value] of assignments) {
|
|
238
|
+
const col = columns.find((c) => typeof c.name === "string" && c.name === colName);
|
|
239
|
+
if (col && col[selector.attributeName] !== value) {
|
|
240
|
+
col[selector.attributeName] = value;
|
|
241
|
+
columnsModified = true;
|
|
242
|
+
columnAttributeApplied.push({
|
|
243
|
+
columnName: colName,
|
|
244
|
+
attribute: selector.attributeName,
|
|
245
|
+
value,
|
|
246
|
+
});
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
// Step 8: Update node with config changes and/or column attribute changes
|
|
251
|
+
const hasConfigChanges = Object.keys(requiredChanges).length > 0 ||
|
|
252
|
+
Object.keys(contextualChanges).length > 0 ||
|
|
253
|
+
Object.keys(defaultChanges).length > 0;
|
|
254
|
+
let updatedNode = node;
|
|
255
|
+
if (hasConfigChanges || columnsModified) {
|
|
256
|
+
const changes = {};
|
|
257
|
+
if (hasConfigChanges) {
|
|
258
|
+
changes.config = appliedConfig;
|
|
259
|
+
}
|
|
260
|
+
if (columnsModified) {
|
|
261
|
+
changes.metadata = { columns };
|
|
262
|
+
}
|
|
263
|
+
updatedNode = await updateWorkspaceNode(client, {
|
|
264
|
+
workspaceID: params.workspaceID,
|
|
265
|
+
nodeID: params.nodeID,
|
|
266
|
+
changes,
|
|
267
|
+
});
|
|
268
|
+
}
|
|
269
|
+
// Step 9: Detect candidate columns for reporting
|
|
270
|
+
const candidateColumns = columns
|
|
271
|
+
.filter((col) => {
|
|
272
|
+
const name = typeof col.name === "string" ? col.name : "";
|
|
273
|
+
return /_(ID|KEY|CODE|NUM)$/i.test(name) || /^(ID|KEY|CODE)_/i.test(name);
|
|
274
|
+
})
|
|
275
|
+
.flatMap((col) => (typeof col.name === "string" ? [col.name] : []));
|
|
276
|
+
// Step 10: Build config review — summarizes what's set, what's missing, and what needs attention
|
|
277
|
+
const configReview = buildConfigReview(nodeType, appliedConfig, classification, context, columnAttributeApplied);
|
|
278
|
+
return {
|
|
279
|
+
node: updatedNode,
|
|
280
|
+
schemaSource: schemaResolution.source,
|
|
281
|
+
classification,
|
|
282
|
+
context: {
|
|
283
|
+
hasMultipleSources: context.hasMultipleSources,
|
|
284
|
+
hasAggregates: context.hasAggregates,
|
|
285
|
+
hasTimestampColumns: context.hasTimestampColumns,
|
|
286
|
+
hasType2Pattern: context.hasType2Pattern,
|
|
287
|
+
materializationType: context.materializationType,
|
|
288
|
+
},
|
|
289
|
+
appliedConfig,
|
|
290
|
+
configChanges: {
|
|
291
|
+
required: requiredChanges,
|
|
292
|
+
contextual: contextualChanges,
|
|
293
|
+
preserved: preservedFields,
|
|
294
|
+
defaults: defaultChanges,
|
|
295
|
+
},
|
|
296
|
+
columnAttributeChanges: {
|
|
297
|
+
applied: columnAttributeApplied,
|
|
298
|
+
reasoning: columnAttributeReasoning,
|
|
299
|
+
},
|
|
300
|
+
reasoning: rulesResult.reasoning,
|
|
301
|
+
detectedPatterns: {
|
|
302
|
+
candidateColumns,
|
|
303
|
+
},
|
|
304
|
+
configReview,
|
|
305
|
+
};
|
|
306
|
+
}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
export function applyIntelligenceRules(context) {
|
|
2
|
+
const suggestions = {};
|
|
3
|
+
const reasoning = [];
|
|
4
|
+
// Rule: Multi-source → insertStrategy (UNION vs UNION ALL based on aggregates)
|
|
5
|
+
if (context.hasMultipleSources) {
|
|
6
|
+
if (context.hasAggregates) {
|
|
7
|
+
suggestions.insertStrategy = "UNION";
|
|
8
|
+
reasoning.push("Multi-source node with aggregates suggests UNION to avoid duplicate aggregated rows");
|
|
9
|
+
}
|
|
10
|
+
else {
|
|
11
|
+
suggestions.insertStrategy = "UNION ALL";
|
|
12
|
+
reasoning.push("Multi-source node without aggregates suggests UNION ALL for better performance");
|
|
13
|
+
}
|
|
14
|
+
}
|
|
15
|
+
// Rule: aggregates → selectDistinct: false
|
|
16
|
+
if (context.hasAggregates) {
|
|
17
|
+
suggestions.selectDistinct = false;
|
|
18
|
+
reasoning.push("Aggregates are incompatible with SELECT DISTINCT; suggests selectDistinct: false");
|
|
19
|
+
}
|
|
20
|
+
// Rule: table materialization → truncateBefore: false (preserve data by default)
|
|
21
|
+
if (context.materializationType === "table") {
|
|
22
|
+
suggestions.truncateBefore = false;
|
|
23
|
+
reasoning.push("Table materialization suggests truncateBefore: false to preserve existing data");
|
|
24
|
+
}
|
|
25
|
+
// Rule: view materialization → selectDistinct is often useful
|
|
26
|
+
if (context.materializationType === "view" && !context.hasAggregates) {
|
|
27
|
+
suggestions.selectDistinct = false;
|
|
28
|
+
reasoning.push("View without aggregates — selectDistinct defaults to false; set to true only if deduplication is needed");
|
|
29
|
+
}
|
|
30
|
+
// Rule: Type 2 SCD pattern detected → suggest enableIf-dependent fields
|
|
31
|
+
if (context.hasType2Pattern) {
|
|
32
|
+
reasoning.push("Type 2 SCD pattern detected (START_DATE + END_DATE + IS_CURRENT columns). " +
|
|
33
|
+
"Verify that the node type's SCD config is set appropriately.");
|
|
34
|
+
}
|
|
35
|
+
// Rule: No timestamp/date columns in a table → note for auditing
|
|
36
|
+
if (context.materializationType === "table" && !context.hasTimestampColumns) {
|
|
37
|
+
reasoning.push("No timestamp or date columns detected. Consider adding audit columns " +
|
|
38
|
+
"(e.g., DW_LOAD_TS, DW_UPDATE_TS) for data lineage tracking.");
|
|
39
|
+
}
|
|
40
|
+
return {
|
|
41
|
+
suggestions,
|
|
42
|
+
reasoning,
|
|
43
|
+
};
|
|
44
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
export interface NodeTypeSchema {
|
|
2
|
+
config: Array<{
|
|
3
|
+
groupName: string;
|
|
4
|
+
items: Array<{
|
|
5
|
+
attributeName?: string;
|
|
6
|
+
type: string;
|
|
7
|
+
isRequired?: boolean | string;
|
|
8
|
+
default?: unknown;
|
|
9
|
+
enableIf?: string;
|
|
10
|
+
displayName?: string;
|
|
11
|
+
}>;
|
|
12
|
+
}>;
|
|
13
|
+
}
|
|
14
|
+
export interface SchemaResolution {
|
|
15
|
+
source: "repo" | "corpus";
|
|
16
|
+
schema: NodeTypeSchema;
|
|
17
|
+
}
|
|
18
|
+
export declare function resolveNodeTypeSchema(nodeType: string, repoPath?: string): Promise<SchemaResolution>;
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import { getRepoNodeTypeDefinition } from "../repo/operations.js";
|
|
2
|
+
import { loadNodeTypeCorpusSnapshot } from "../corpus/loader.js";
|
|
3
|
+
import { searchNodeTypeCorpusVariants } from "../corpus/search.js";
|
|
4
|
+
import { resolveOptionalRepoPathInput } from "../repo/path.js";
|
|
5
|
+
import { isPlainObject } from "../../utils.js";
|
|
6
|
+
function normalizeNodeTypeFamily(nodeType) {
|
|
7
|
+
return nodeType.trim().toLowerCase().replace(/[^a-z0-9]/g, "");
|
|
8
|
+
}
|
|
9
|
+
function parseNodeTypeSchema(nodeDefinition, sourceLabel) {
|
|
10
|
+
if (!isPlainObject(nodeDefinition) || !Array.isArray(nodeDefinition.config)) {
|
|
11
|
+
throw new Error(`${sourceLabel} node definition does not contain a config array`);
|
|
12
|
+
}
|
|
13
|
+
const config = nodeDefinition.config.flatMap((group) => {
|
|
14
|
+
if (!isPlainObject(group) || !Array.isArray(group.items)) {
|
|
15
|
+
return [];
|
|
16
|
+
}
|
|
17
|
+
const items = group.items.flatMap((item) => {
|
|
18
|
+
if (!isPlainObject(item) || typeof item.type !== "string") {
|
|
19
|
+
return [];
|
|
20
|
+
}
|
|
21
|
+
return [{
|
|
22
|
+
attributeName: typeof item.attributeName === "string" ? item.attributeName : undefined,
|
|
23
|
+
type: item.type,
|
|
24
|
+
isRequired: typeof item.isRequired === "boolean" || typeof item.isRequired === "string"
|
|
25
|
+
? item.isRequired
|
|
26
|
+
: undefined,
|
|
27
|
+
default: item.default,
|
|
28
|
+
enableIf: typeof item.enableIf === "string" ? item.enableIf : undefined,
|
|
29
|
+
displayName: typeof item.displayName === "string" ? item.displayName : undefined,
|
|
30
|
+
}];
|
|
31
|
+
});
|
|
32
|
+
return [{
|
|
33
|
+
groupName: typeof group.groupName === "string" ? group.groupName : "Config",
|
|
34
|
+
items,
|
|
35
|
+
}];
|
|
36
|
+
});
|
|
37
|
+
return { config };
|
|
38
|
+
}
|
|
39
|
+
export async function resolveNodeTypeSchema(nodeType, repoPath) {
|
|
40
|
+
// Resolve repoPath with COALESCE_REPO_PATH env var fallback
|
|
41
|
+
const resolvedRepoPath = resolveOptionalRepoPathInput(repoPath);
|
|
42
|
+
// Try repo first if path provided
|
|
43
|
+
if (resolvedRepoPath) {
|
|
44
|
+
try {
|
|
45
|
+
const def = await getRepoNodeTypeDefinition(resolvedRepoPath, nodeType);
|
|
46
|
+
return {
|
|
47
|
+
source: "repo",
|
|
48
|
+
schema: parseNodeTypeSchema(def.nodeDefinition, "Repo"),
|
|
49
|
+
};
|
|
50
|
+
}
|
|
51
|
+
catch (error) {
|
|
52
|
+
// Fall through to corpus
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
// Try corpus fallback
|
|
56
|
+
try {
|
|
57
|
+
const snapshot = loadNodeTypeCorpusSnapshot();
|
|
58
|
+
const normalizedFamily = normalizeNodeTypeFamily(nodeType);
|
|
59
|
+
const result = searchNodeTypeCorpusVariants(snapshot, {
|
|
60
|
+
normalizedFamily,
|
|
61
|
+
supportStatus: "supported",
|
|
62
|
+
limit: 1,
|
|
63
|
+
});
|
|
64
|
+
if (result.matches.length === 0) {
|
|
65
|
+
throw new Error(`No supported corpus variant found for normalized family '${normalizedFamily}'`);
|
|
66
|
+
}
|
|
67
|
+
const variant = snapshot.variants.find((v) => v.variantKey === result.matches[0].variantKey);
|
|
68
|
+
if (!variant?.nodeDefinition) {
|
|
69
|
+
throw new Error(`Corpus variant ${result.matches[0].variantKey} has no parseable definition`);
|
|
70
|
+
}
|
|
71
|
+
return {
|
|
72
|
+
source: "corpus",
|
|
73
|
+
schema: parseNodeTypeSchema(variant.nodeDefinition, "Corpus"),
|
|
74
|
+
};
|
|
75
|
+
}
|
|
76
|
+
catch (error) {
|
|
77
|
+
throw new Error(`Cannot resolve node type schema for '${nodeType}'. ` +
|
|
78
|
+
`Repo resolution failed${resolvedRepoPath ? "" : " (no repoPath provided, COALESCE_REPO_PATH not set)"} and corpus lookup failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
79
|
+
}
|
|
80
|
+
}
|