coalesce-transform-mcp 0.3.0 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +74 -3
- package/dist/client.d.ts.map +1 -1
- package/dist/client.js +6 -2
- package/dist/client.js.map +1 -1
- package/dist/coalesce/api/environments.d.ts +0 -12
- package/dist/coalesce/api/environments.d.ts.map +1 -1
- package/dist/coalesce/api/environments.js +0 -4
- package/dist/coalesce/api/environments.js.map +1 -1
- package/dist/coalesce/api/jobs.d.ts +3 -5
- package/dist/coalesce/api/jobs.d.ts.map +1 -1
- package/dist/coalesce/api/jobs.js +3 -6
- package/dist/coalesce/api/jobs.js.map +1 -1
- package/dist/coalesce/api/nodes.d.ts +3 -3
- package/dist/coalesce/api/nodes.d.ts.map +1 -1
- package/dist/coalesce/api/nodes.js +6 -4
- package/dist/coalesce/api/nodes.js.map +1 -1
- package/dist/coalesce/api/runs.d.ts.map +1 -1
- package/dist/coalesce/api/runs.js +11 -1
- package/dist/coalesce/api/runs.js.map +1 -1
- package/dist/coalesce/api/scan.d.ts +14 -0
- package/dist/coalesce/api/scan.d.ts.map +1 -0
- package/dist/coalesce/api/scan.js +64 -0
- package/dist/coalesce/api/scan.js.map +1 -0
- package/dist/coalesce/api/subgraphs.d.ts +3 -2
- package/dist/coalesce/api/subgraphs.d.ts.map +1 -1
- package/dist/coalesce/api/subgraphs.js +3 -2
- package/dist/coalesce/api/subgraphs.js.map +1 -1
- package/dist/coalesce/run-schemas.d.ts.map +1 -1
- package/dist/coalesce/run-schemas.js +26 -16
- package/dist/coalesce/run-schemas.js.map +1 -1
- package/dist/coalesce/tool-response.d.ts +1 -13
- package/dist/coalesce/tool-response.d.ts.map +1 -1
- package/dist/coalesce/tool-response.js +20 -6
- package/dist/coalesce/tool-response.js.map +1 -1
- package/dist/coalesce/tool-schemas.d.ts +1 -2
- package/dist/coalesce/tool-schemas.d.ts.map +1 -1
- package/dist/coalesce/tool-schemas.js +368 -5
- package/dist/coalesce/tool-schemas.js.map +1 -1
- package/dist/coalesce/types.d.ts +8 -0
- package/dist/coalesce/types.d.ts.map +1 -1
- package/dist/coalesce/types.js +3 -1
- package/dist/coalesce/types.js.map +1 -1
- package/dist/constants.d.ts +18 -0
- package/dist/constants.d.ts.map +1 -0
- package/dist/constants.js +21 -0
- package/dist/constants.js.map +1 -0
- package/dist/mcp/cache.d.ts +2 -1
- package/dist/mcp/cache.d.ts.map +1 -1
- package/dist/mcp/cache.js +122 -138
- package/dist/mcp/cache.js.map +1 -1
- package/dist/mcp/environments.d.ts +2 -1
- package/dist/mcp/environments.d.ts.map +1 -1
- package/dist/mcp/environments.js +56 -112
- package/dist/mcp/environments.js.map +1 -1
- package/dist/mcp/git-accounts.d.ts +2 -1
- package/dist/mcp/git-accounts.d.ts.map +1 -1
- package/dist/mcp/git-accounts.js +74 -96
- package/dist/mcp/git-accounts.js.map +1 -1
- package/dist/mcp/jobs.d.ts +2 -1
- package/dist/mcp/jobs.d.ts.map +1 -1
- package/dist/mcp/jobs.js +68 -122
- package/dist/mcp/jobs.js.map +1 -1
- package/dist/mcp/lineage.d.ts +5 -0
- package/dist/mcp/lineage.d.ts.map +1 -0
- package/dist/mcp/lineage.js +410 -0
- package/dist/mcp/lineage.js.map +1 -0
- package/dist/mcp/node-type-corpus.d.ts +2 -1
- package/dist/mcp/node-type-corpus.d.ts.map +1 -1
- package/dist/mcp/node-type-corpus.js +148 -151
- package/dist/mcp/node-type-corpus.js.map +1 -1
- package/dist/mcp/nodes.d.ts +2 -1
- package/dist/mcp/nodes.d.ts.map +1 -1
- package/dist/mcp/nodes.js +358 -464
- package/dist/mcp/nodes.js.map +1 -1
- package/dist/mcp/pipelines.d.ts +2 -1
- package/dist/mcp/pipelines.d.ts.map +1 -1
- package/dist/mcp/pipelines.js +514 -314
- package/dist/mcp/pipelines.js.map +1 -1
- package/dist/mcp/projects.d.ts +2 -1
- package/dist/mcp/projects.d.ts.map +1 -1
- package/dist/mcp/projects.js +66 -100
- package/dist/mcp/projects.js.map +1 -1
- package/dist/mcp/repo-node-types.d.ts +2 -1
- package/dist/mcp/repo-node-types.d.ts.map +1 -1
- package/dist/mcp/repo-node-types.js +92 -121
- package/dist/mcp/repo-node-types.js.map +1 -1
- package/dist/mcp/runs.d.ts +3 -2
- package/dist/mcp/runs.d.ts.map +1 -1
- package/dist/mcp/runs.js +93 -148
- package/dist/mcp/runs.js.map +1 -1
- package/dist/mcp/skills.d.ts +13 -0
- package/dist/mcp/skills.d.ts.map +1 -0
- package/dist/mcp/skills.js +85 -0
- package/dist/mcp/skills.js.map +1 -0
- package/dist/mcp/subgraphs.d.ts +2 -1
- package/dist/mcp/subgraphs.d.ts.map +1 -1
- package/dist/mcp/subgraphs.js +61 -98
- package/dist/mcp/subgraphs.js.map +1 -1
- package/dist/mcp/tool-helpers.d.ts +37 -0
- package/dist/mcp/tool-helpers.d.ts.map +1 -0
- package/dist/mcp/tool-helpers.js +82 -0
- package/dist/mcp/tool-helpers.js.map +1 -0
- package/dist/mcp/users.d.ts +2 -1
- package/dist/mcp/users.d.ts.map +1 -1
- package/dist/mcp/users.js +92 -145
- package/dist/mcp/users.js.map +1 -1
- package/dist/mcp/workshop.d.ts +2 -1
- package/dist/mcp/workshop.d.ts.map +1 -1
- package/dist/mcp/workshop.js +66 -101
- package/dist/mcp/workshop.js.map +1 -1
- package/dist/mcp/workspaces.d.ts +2 -1
- package/dist/mcp/workspaces.d.ts.map +1 -1
- package/dist/mcp/workspaces.js +19 -34
- package/dist/mcp/workspaces.js.map +1 -1
- package/dist/prompts/index.d.ts.map +1 -1
- package/dist/prompts/index.js +85 -0
- package/dist/prompts/index.js.map +1 -1
- package/dist/resources/context/pipeline-workshop-guide.md +1 -1
- package/dist/resources/context/tool-usage.md +7 -0
- package/dist/resources/index.d.ts +13 -0
- package/dist/resources/index.d.ts.map +1 -1
- package/dist/resources/index.js +105 -5
- package/dist/resources/index.js.map +1 -1
- package/dist/schemas/node-payloads.d.ts +2 -2
- package/dist/server.d.ts +2 -1
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +158 -41
- package/dist/server.js.map +1 -1
- package/dist/services/cache/snapshots.d.ts.map +1 -1
- package/dist/services/cache/snapshots.js +9 -5
- package/dist/services/cache/snapshots.js.map +1 -1
- package/dist/services/config/schema-resolver.d.ts.map +1 -1
- package/dist/services/config/schema-resolver.js +3 -6
- package/dist/services/config/schema-resolver.js.map +1 -1
- package/dist/services/lineage/lineage-cache.d.ts +53 -0
- package/dist/services/lineage/lineage-cache.d.ts.map +1 -0
- package/dist/services/lineage/lineage-cache.js +335 -0
- package/dist/services/lineage/lineage-cache.js.map +1 -0
- package/dist/services/lineage/lineage-documentation.d.ts +29 -0
- package/dist/services/lineage/lineage-documentation.d.ts.map +1 -0
- package/dist/services/lineage/lineage-documentation.js +80 -0
- package/dist/services/lineage/lineage-documentation.js.map +1 -0
- package/dist/services/lineage/lineage-propagation.d.ts +47 -0
- package/dist/services/lineage/lineage-propagation.d.ts.map +1 -0
- package/dist/services/lineage/lineage-propagation.js +176 -0
- package/dist/services/lineage/lineage-propagation.js.map +1 -0
- package/dist/services/lineage/lineage-search.d.ts +33 -0
- package/dist/services/lineage/lineage-search.d.ts.map +1 -0
- package/dist/services/lineage/lineage-search.js +133 -0
- package/dist/services/lineage/lineage-search.js.map +1 -0
- package/dist/services/lineage/lineage-traversal.d.ts +34 -0
- package/dist/services/lineage/lineage-traversal.d.ts.map +1 -0
- package/dist/services/lineage/lineage-traversal.js +283 -0
- package/dist/services/lineage/lineage-traversal.js.map +1 -0
- package/dist/services/pipelines/clause-extraction.d.ts +3 -0
- package/dist/services/pipelines/clause-extraction.d.ts.map +1 -0
- package/dist/services/pipelines/clause-extraction.js +27 -0
- package/dist/services/pipelines/clause-extraction.js.map +1 -0
- package/dist/services/pipelines/column-helpers.d.ts +8 -0
- package/dist/services/pipelines/column-helpers.d.ts.map +1 -0
- package/dist/services/pipelines/column-helpers.js +125 -0
- package/dist/services/pipelines/column-helpers.js.map +1 -0
- package/dist/services/pipelines/cte-parsing.d.ts +29 -0
- package/dist/services/pipelines/cte-parsing.d.ts.map +1 -0
- package/dist/services/pipelines/cte-parsing.js +160 -0
- package/dist/services/pipelines/cte-parsing.js.map +1 -0
- package/dist/services/pipelines/cte-planning.d.ts +22 -0
- package/dist/services/pipelines/cte-planning.d.ts.map +1 -0
- package/dist/services/pipelines/cte-planning.js +206 -0
- package/dist/services/pipelines/cte-planning.js.map +1 -0
- package/dist/services/pipelines/execution.d.ts.map +1 -1
- package/dist/services/pipelines/execution.js +0 -1
- package/dist/services/pipelines/execution.js.map +1 -1
- package/dist/services/pipelines/intent-parsing.d.ts +24 -0
- package/dist/services/pipelines/intent-parsing.d.ts.map +1 -0
- package/dist/services/pipelines/intent-parsing.js +245 -0
- package/dist/services/pipelines/intent-parsing.js.map +1 -0
- package/dist/services/pipelines/intent-resolution.d.ts +24 -0
- package/dist/services/pipelines/intent-resolution.d.ts.map +1 -0
- package/dist/services/pipelines/intent-resolution.js +141 -0
- package/dist/services/pipelines/intent-resolution.js.map +1 -0
- package/dist/services/pipelines/intent.d.ts +4 -45
- package/dist/services/pipelines/intent.d.ts.map +1 -1
- package/dist/services/pipelines/intent.js +14 -408
- package/dist/services/pipelines/intent.js.map +1 -1
- package/dist/services/pipelines/node-type-candidates.d.ts +6 -0
- package/dist/services/pipelines/node-type-candidates.d.ts.map +1 -0
- package/dist/services/pipelines/node-type-candidates.js +165 -0
- package/dist/services/pipelines/node-type-candidates.js.map +1 -0
- package/dist/services/pipelines/node-type-intent.d.ts +1 -5
- package/dist/services/pipelines/node-type-intent.d.ts.map +1 -1
- package/dist/services/pipelines/node-type-intent.js +1 -5
- package/dist/services/pipelines/node-type-intent.js.map +1 -1
- package/dist/services/pipelines/node-type-scoring.d.ts +13 -0
- package/dist/services/pipelines/node-type-scoring.d.ts.map +1 -0
- package/dist/services/pipelines/node-type-scoring.js +322 -0
- package/dist/services/pipelines/node-type-scoring.js.map +1 -0
- package/dist/services/pipelines/node-type-selection.d.ts +22 -2
- package/dist/services/pipelines/node-type-selection.d.ts.map +1 -1
- package/dist/services/pipelines/node-type-selection.js +16 -538
- package/dist/services/pipelines/node-type-selection.js.map +1 -1
- package/dist/services/pipelines/plan-builder.d.ts +33 -0
- package/dist/services/pipelines/plan-builder.d.ts.map +1 -0
- package/dist/services/pipelines/plan-builder.js +224 -0
- package/dist/services/pipelines/plan-builder.js.map +1 -0
- package/dist/services/pipelines/planning-types.d.ts +543 -0
- package/dist/services/pipelines/planning-types.d.ts.map +1 -0
- package/dist/services/pipelines/planning-types.js +85 -0
- package/dist/services/pipelines/planning-types.js.map +1 -0
- package/dist/services/pipelines/planning.d.ts +8 -537
- package/dist/services/pipelines/planning.d.ts.map +1 -1
- package/dist/services/pipelines/planning.js +10 -1956
- package/dist/services/pipelines/planning.js.map +1 -1
- package/dist/services/pipelines/review.d.ts.map +1 -1
- package/dist/services/pipelines/review.js +3 -8
- package/dist/services/pipelines/review.js.map +1 -1
- package/dist/services/pipelines/select-parsing.d.ts +7 -0
- package/dist/services/pipelines/select-parsing.d.ts.map +1 -0
- package/dist/services/pipelines/select-parsing.js +185 -0
- package/dist/services/pipelines/select-parsing.js.map +1 -0
- package/dist/services/pipelines/source-parsing.d.ts +8 -0
- package/dist/services/pipelines/source-parsing.d.ts.map +1 -0
- package/dist/services/pipelines/source-parsing.js +151 -0
- package/dist/services/pipelines/source-parsing.js.map +1 -0
- package/dist/services/pipelines/sql-parsing.d.ts +8 -0
- package/dist/services/pipelines/sql-parsing.d.ts.map +1 -0
- package/dist/services/pipelines/sql-parsing.js +9 -0
- package/dist/services/pipelines/sql-parsing.js.map +1 -0
- package/dist/services/pipelines/sql-tokenizer.d.ts +42 -0
- package/dist/services/pipelines/sql-tokenizer.d.ts.map +1 -0
- package/dist/services/pipelines/sql-tokenizer.js +493 -0
- package/dist/services/pipelines/sql-tokenizer.js.map +1 -0
- package/dist/services/pipelines/sql-utils.d.ts +30 -0
- package/dist/services/pipelines/sql-utils.d.ts.map +1 -0
- package/dist/services/pipelines/sql-utils.js +62 -0
- package/dist/services/pipelines/sql-utils.js.map +1 -0
- package/dist/services/pipelines/workshop.d.ts.map +1 -1
- package/dist/services/pipelines/workshop.js +53 -25
- package/dist/services/pipelines/workshop.js.map +1 -1
- package/dist/services/pipelines/workspace-resolution.d.ts +18 -0
- package/dist/services/pipelines/workspace-resolution.d.ts.map +1 -0
- package/dist/services/pipelines/workspace-resolution.js +279 -0
- package/dist/services/pipelines/workspace-resolution.js.map +1 -0
- package/dist/services/runs/diagnostics.d.ts.map +1 -1
- package/dist/services/runs/diagnostics.js +3 -8
- package/dist/services/runs/diagnostics.js.map +1 -1
- package/dist/services/shared/elicitation.d.ts +14 -0
- package/dist/services/shared/elicitation.d.ts.map +1 -0
- package/dist/services/shared/elicitation.js +56 -0
- package/dist/services/shared/elicitation.js.map +1 -0
- package/dist/services/workspace/node-creation.d.ts.map +1 -1
- package/dist/services/workspace/node-creation.js +5 -1
- package/dist/services/workspace/node-creation.js.map +1 -1
- package/dist/services/workspace/node-update-helpers.d.ts.map +1 -1
- package/dist/services/workspace/node-update-helpers.js +3 -8
- package/dist/services/workspace/node-update-helpers.js.map +1 -1
- package/dist/utils.d.ts +11 -0
- package/dist/utils.d.ts.map +1 -1
- package/dist/utils.js +20 -1
- package/dist/utils.js.map +1 -1
- package/dist/workflows/get-environment-health.d.ts +49 -0
- package/dist/workflows/get-environment-health.d.ts.map +1 -0
- package/dist/workflows/get-environment-health.js +310 -0
- package/dist/workflows/get-environment-health.js.map +1 -0
- package/dist/workflows/get-environment-overview.d.ts +2 -1
- package/dist/workflows/get-environment-overview.d.ts.map +1 -1
- package/dist/workflows/get-environment-overview.js +13 -19
- package/dist/workflows/get-environment-overview.js.map +1 -1
- package/dist/workflows/get-run-details.d.ts +2 -2
- package/dist/workflows/get-run-details.d.ts.map +1 -1
- package/dist/workflows/get-run-details.js +14 -19
- package/dist/workflows/get-run-details.js.map +1 -1
- package/dist/workflows/retry-and-wait.d.ts.map +1 -1
- package/dist/workflows/retry-and-wait.js +3 -2
- package/dist/workflows/retry-and-wait.js.map +1 -1
- package/dist/workflows/run-and-wait.d.ts.map +1 -1
- package/dist/workflows/run-and-wait.js +3 -2
- package/dist/workflows/run-and-wait.js.map +1 -1
- package/package.json +2 -2
|
@@ -1,1857 +1,19 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
import {
|
|
7
|
-
import {
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
expression: z.string(),
|
|
11
|
-
outputName: z.string().nullable(),
|
|
12
|
-
sourceNodeAlias: z.string().nullable(),
|
|
13
|
-
sourceNodeName: z.string().nullable(),
|
|
14
|
-
sourceNodeID: z.string().nullable(),
|
|
15
|
-
sourceColumnName: z.string().nullable(),
|
|
16
|
-
kind: z.enum(["column", "expression"]),
|
|
17
|
-
supported: z.boolean(),
|
|
18
|
-
reason: z.string().optional(),
|
|
19
|
-
})
|
|
20
|
-
.strict();
|
|
21
|
-
const PlannedPipelineNodeSchema = z
|
|
22
|
-
.object({
|
|
23
|
-
planNodeID: z.string(),
|
|
24
|
-
name: z.string(),
|
|
25
|
-
nodeType: z.string(),
|
|
26
|
-
nodeTypeFamily: z
|
|
27
|
-
.enum(PIPELINE_NODE_TYPE_FAMILIES)
|
|
28
|
-
.nullable()
|
|
29
|
-
.optional(),
|
|
30
|
-
predecessorNodeIDs: z.array(z.string()),
|
|
31
|
-
predecessorPlanNodeIDs: z.array(z.string()),
|
|
32
|
-
predecessorNodeNames: z.array(z.string()),
|
|
33
|
-
description: z.string().nullable(),
|
|
34
|
-
sql: z.string().nullable(),
|
|
35
|
-
selectItems: z.array(PlannedSelectItemSchema),
|
|
36
|
-
outputColumnNames: z.array(z.string()),
|
|
37
|
-
configOverrides: NodeConfigInputSchema,
|
|
38
|
-
sourceRefs: z.array(z
|
|
39
|
-
.object({
|
|
40
|
-
locationName: z.string(),
|
|
41
|
-
nodeName: z.string(),
|
|
42
|
-
alias: z.string().nullable(),
|
|
43
|
-
nodeID: z.string().nullable(),
|
|
44
|
-
})
|
|
45
|
-
.strict()),
|
|
46
|
-
joinCondition: z.string().nullable(),
|
|
47
|
-
location: z
|
|
48
|
-
.object({
|
|
49
|
-
locationName: z.string().optional(),
|
|
50
|
-
database: z.string().optional(),
|
|
51
|
-
schema: z.string().optional(),
|
|
52
|
-
})
|
|
53
|
-
.strict(),
|
|
54
|
-
requiresFullSetNode: z.boolean(),
|
|
55
|
-
templateDefaults: z
|
|
56
|
-
.object({
|
|
57
|
-
inferredTopLevelFields: z.record(z.unknown()),
|
|
58
|
-
inferredConfig: NodeConfigInputSchema,
|
|
59
|
-
})
|
|
60
|
-
.strict()
|
|
61
|
-
.optional(),
|
|
62
|
-
})
|
|
63
|
-
.strict();
|
|
64
|
-
export const PipelinePlanSchema = z
|
|
65
|
-
.object({
|
|
66
|
-
version: z.literal(1),
|
|
67
|
-
intent: z.enum(["sql", "goal"]),
|
|
68
|
-
status: z.enum(["ready", "needs_clarification"]),
|
|
69
|
-
workspaceID: z.string(),
|
|
70
|
-
platform: z.string().nullable(),
|
|
71
|
-
goal: z.string().nullable(),
|
|
72
|
-
sql: z.string().nullable(),
|
|
73
|
-
nodes: z.array(PlannedPipelineNodeSchema),
|
|
74
|
-
assumptions: z.array(z.string()),
|
|
75
|
-
openQuestions: z.array(z.string()),
|
|
76
|
-
warnings: z.array(z.string()),
|
|
77
|
-
supportedNodeTypes: z.array(z.string()),
|
|
78
|
-
nodeTypeSelection: z.record(z.unknown()).optional(),
|
|
79
|
-
cteNodeSummary: z.array(z.record(z.unknown())).optional(),
|
|
80
|
-
STOP_AND_CONFIRM: z.string().optional(),
|
|
81
|
-
})
|
|
82
|
-
.strict();
|
|
83
|
-
const WORKSPACE_NODE_PAGE_LIMIT = 200;
|
|
84
|
-
export const DEFAULT_STAGE_CONFIG = {
|
|
85
|
-
postSQL: "",
|
|
86
|
-
preSQL: "",
|
|
87
|
-
testsEnabled: true,
|
|
88
|
-
};
|
|
89
|
-
export function normalizeSqlIdentifier(identifier) {
|
|
90
|
-
return identifier.trim().replace(/^["`[]|["`\]]$/g, "").toUpperCase();
|
|
91
|
-
}
|
|
92
|
-
export function deepClone(value) {
|
|
93
|
-
return JSON.parse(JSON.stringify(value));
|
|
94
|
-
}
|
|
95
|
-
export function normalizeWhitespace(value) {
|
|
96
|
-
return value.replace(/\s+/g, " ").trim();
|
|
97
|
-
}
|
|
98
|
-
export function buildSourceDependencyKey(locationName, nodeName) {
|
|
99
|
-
return `${normalizeSqlIdentifier(locationName ?? "")}::${normalizeSqlIdentifier(nodeName)}`;
|
|
100
|
-
}
|
|
101
|
-
export function getUniqueSourceDependencies(sourceRefs) {
|
|
102
|
-
const seen = new Set();
|
|
103
|
-
const dependencies = [];
|
|
104
|
-
for (const ref of sourceRefs) {
|
|
105
|
-
const key = buildSourceDependencyKey(ref.locationName, ref.nodeName);
|
|
106
|
-
if (seen.has(key)) {
|
|
107
|
-
continue;
|
|
108
|
-
}
|
|
109
|
-
seen.add(key);
|
|
110
|
-
dependencies.push({
|
|
111
|
-
locationName: ref.locationName,
|
|
112
|
-
nodeName: ref.nodeName,
|
|
113
|
-
});
|
|
114
|
-
}
|
|
115
|
-
return dependencies;
|
|
116
|
-
}
|
|
117
|
-
function isIdentifierChar(char) {
|
|
118
|
-
return !!char && /[A-Za-z0-9_$]/.test(char);
|
|
119
|
-
}
|
|
120
|
-
function stripIdentifierQuotes(identifier) {
|
|
121
|
-
const trimmed = identifier.trim();
|
|
122
|
-
if ((trimmed.startsWith('"') && trimmed.endsWith('"')) ||
|
|
123
|
-
(trimmed.startsWith("`") && trimmed.endsWith("`")) ||
|
|
124
|
-
(trimmed.startsWith("[") && trimmed.endsWith("]"))) {
|
|
125
|
-
return trimmed.slice(1, -1);
|
|
126
|
-
}
|
|
127
|
-
return trimmed;
|
|
128
|
-
}
|
|
129
|
-
function findTopLevelKeywordIndex(sql, keyword, startIndex = 0) {
|
|
130
|
-
const lowerKeyword = keyword.toLowerCase();
|
|
131
|
-
let parenDepth = 0;
|
|
132
|
-
let inSingleQuote = false;
|
|
133
|
-
let inDoubleQuote = false;
|
|
134
|
-
let inBacktick = false;
|
|
135
|
-
let inBracket = false;
|
|
136
|
-
let inLineComment = false;
|
|
137
|
-
let inBlockComment = false;
|
|
138
|
-
for (let index = startIndex; index < sql.length; index += 1) {
|
|
139
|
-
const char = sql[index];
|
|
140
|
-
const next = sql[index + 1];
|
|
141
|
-
if (inLineComment) {
|
|
142
|
-
if (char === "\n") {
|
|
143
|
-
inLineComment = false;
|
|
144
|
-
}
|
|
145
|
-
continue;
|
|
146
|
-
}
|
|
147
|
-
if (inBlockComment) {
|
|
148
|
-
if (char === "*" && next === "/") {
|
|
149
|
-
inBlockComment = false;
|
|
150
|
-
index += 1;
|
|
151
|
-
}
|
|
152
|
-
continue;
|
|
153
|
-
}
|
|
154
|
-
if (inSingleQuote) {
|
|
155
|
-
if (char === "'" && next === "'") {
|
|
156
|
-
index += 1;
|
|
157
|
-
}
|
|
158
|
-
else if (char === "'") {
|
|
159
|
-
inSingleQuote = false;
|
|
160
|
-
}
|
|
161
|
-
continue;
|
|
162
|
-
}
|
|
163
|
-
if (inDoubleQuote) {
|
|
164
|
-
if (char === '"') {
|
|
165
|
-
inDoubleQuote = false;
|
|
166
|
-
}
|
|
167
|
-
continue;
|
|
168
|
-
}
|
|
169
|
-
if (inBacktick) {
|
|
170
|
-
if (char === "`") {
|
|
171
|
-
inBacktick = false;
|
|
172
|
-
}
|
|
173
|
-
continue;
|
|
174
|
-
}
|
|
175
|
-
if (inBracket) {
|
|
176
|
-
if (char === "]") {
|
|
177
|
-
inBracket = false;
|
|
178
|
-
}
|
|
179
|
-
continue;
|
|
180
|
-
}
|
|
181
|
-
if (char === "'") {
|
|
182
|
-
inSingleQuote = true;
|
|
183
|
-
continue;
|
|
184
|
-
}
|
|
185
|
-
if (char === '"') {
|
|
186
|
-
inDoubleQuote = true;
|
|
187
|
-
continue;
|
|
188
|
-
}
|
|
189
|
-
if (char === "`") {
|
|
190
|
-
inBacktick = true;
|
|
191
|
-
continue;
|
|
192
|
-
}
|
|
193
|
-
if (char === "[") {
|
|
194
|
-
inBracket = true;
|
|
195
|
-
continue;
|
|
196
|
-
}
|
|
197
|
-
if (char === "-" && next === "-") {
|
|
198
|
-
inLineComment = true;
|
|
199
|
-
index += 1;
|
|
200
|
-
continue;
|
|
201
|
-
}
|
|
202
|
-
if (char === "/" && next === "*") {
|
|
203
|
-
inBlockComment = true;
|
|
204
|
-
index += 1;
|
|
205
|
-
continue;
|
|
206
|
-
}
|
|
207
|
-
if (char === "(") {
|
|
208
|
-
parenDepth += 1;
|
|
209
|
-
continue;
|
|
210
|
-
}
|
|
211
|
-
if (char === ")" && parenDepth > 0) {
|
|
212
|
-
parenDepth -= 1;
|
|
213
|
-
continue;
|
|
214
|
-
}
|
|
215
|
-
if (parenDepth !== 0) {
|
|
216
|
-
continue;
|
|
217
|
-
}
|
|
218
|
-
if (sql.slice(index, index + lowerKeyword.length).toLowerCase() === lowerKeyword &&
|
|
219
|
-
!isIdentifierChar(sql[index - 1]) &&
|
|
220
|
-
!isIdentifierChar(sql[index + lowerKeyword.length])) {
|
|
221
|
-
return index;
|
|
222
|
-
}
|
|
223
|
-
}
|
|
224
|
-
return -1;
|
|
225
|
-
}
|
|
226
|
-
/**
|
|
227
|
-
* Iterates through a SQL string character-by-character, tracking quoting and
|
|
228
|
-
* parenthesis depth. For each top-level (unquoted, depth-0) character the
|
|
229
|
-
* callback receives the character, its index, and the current paren depth.
|
|
230
|
-
* The callback returns `true` to continue or `false` to stop early.
|
|
231
|
-
*
|
|
232
|
-
* The scanner handles: single-quoted strings (with '' escapes), double-quoted
|
|
233
|
-
* identifiers, backtick-quoted identifiers, bracket-quoted identifiers, block
|
|
234
|
-
* comments, and line comments.
|
|
235
|
-
*/
|
|
236
|
-
function scanTopLevel(value, callback) {
|
|
237
|
-
let parenDepth = 0;
|
|
238
|
-
let inSingleQuote = false;
|
|
239
|
-
let inDoubleQuote = false;
|
|
240
|
-
let inBacktick = false;
|
|
241
|
-
let inBracket = false;
|
|
242
|
-
let inLineComment = false;
|
|
243
|
-
let inBlockComment = false;
|
|
244
|
-
for (let index = 0; index < value.length; index += 1) {
|
|
245
|
-
const char = value[index];
|
|
246
|
-
const next = value[index + 1];
|
|
247
|
-
if (inLineComment) {
|
|
248
|
-
if (char === "\n") {
|
|
249
|
-
inLineComment = false;
|
|
250
|
-
}
|
|
251
|
-
continue;
|
|
252
|
-
}
|
|
253
|
-
if (inBlockComment) {
|
|
254
|
-
if (char === "*" && next === "/") {
|
|
255
|
-
inBlockComment = false;
|
|
256
|
-
index += 1;
|
|
257
|
-
}
|
|
258
|
-
continue;
|
|
259
|
-
}
|
|
260
|
-
if (inSingleQuote) {
|
|
261
|
-
if (char === "'" && next === "'") {
|
|
262
|
-
index += 1;
|
|
263
|
-
}
|
|
264
|
-
else if (char === "'") {
|
|
265
|
-
inSingleQuote = false;
|
|
266
|
-
}
|
|
267
|
-
continue;
|
|
268
|
-
}
|
|
269
|
-
if (inDoubleQuote) {
|
|
270
|
-
if (char === '"')
|
|
271
|
-
inDoubleQuote = false;
|
|
272
|
-
continue;
|
|
273
|
-
}
|
|
274
|
-
if (inBacktick) {
|
|
275
|
-
if (char === "`")
|
|
276
|
-
inBacktick = false;
|
|
277
|
-
continue;
|
|
278
|
-
}
|
|
279
|
-
if (inBracket) {
|
|
280
|
-
if (char === "]")
|
|
281
|
-
inBracket = false;
|
|
282
|
-
continue;
|
|
283
|
-
}
|
|
284
|
-
if (char === "'") {
|
|
285
|
-
inSingleQuote = true;
|
|
286
|
-
continue;
|
|
287
|
-
}
|
|
288
|
-
if (char === '"') {
|
|
289
|
-
inDoubleQuote = true;
|
|
290
|
-
continue;
|
|
291
|
-
}
|
|
292
|
-
if (char === "`") {
|
|
293
|
-
inBacktick = true;
|
|
294
|
-
continue;
|
|
295
|
-
}
|
|
296
|
-
if (char === "[") {
|
|
297
|
-
inBracket = true;
|
|
298
|
-
continue;
|
|
299
|
-
}
|
|
300
|
-
if (char === "-" && next === "-") {
|
|
301
|
-
inLineComment = true;
|
|
302
|
-
index += 1;
|
|
303
|
-
continue;
|
|
304
|
-
}
|
|
305
|
-
if (char === "/" && next === "*") {
|
|
306
|
-
inBlockComment = true;
|
|
307
|
-
index += 1;
|
|
308
|
-
continue;
|
|
309
|
-
}
|
|
310
|
-
if (char === "(") {
|
|
311
|
-
parenDepth += 1;
|
|
312
|
-
continue;
|
|
313
|
-
}
|
|
314
|
-
if (char === ")" && parenDepth > 0) {
|
|
315
|
-
parenDepth -= 1;
|
|
316
|
-
continue;
|
|
317
|
-
}
|
|
318
|
-
if (!callback(char, index, parenDepth)) {
|
|
319
|
-
return;
|
|
320
|
-
}
|
|
321
|
-
}
|
|
322
|
-
}
|
|
323
|
-
function splitTopLevel(value, delimiter) {
|
|
324
|
-
const parts = [];
|
|
325
|
-
let start = 0;
|
|
326
|
-
scanTopLevel(value, (char, index, parenDepth) => {
|
|
327
|
-
if (char === delimiter && parenDepth === 0) {
|
|
328
|
-
parts.push(value.slice(start, index).trim());
|
|
329
|
-
start = index + 1;
|
|
330
|
-
}
|
|
331
|
-
return true;
|
|
332
|
-
});
|
|
333
|
-
const tail = value.slice(start).trim();
|
|
334
|
-
if (tail.length > 0) {
|
|
335
|
-
parts.push(tail);
|
|
336
|
-
}
|
|
337
|
-
return parts;
|
|
338
|
-
}
|
|
339
|
-
function tokenizeTopLevelWhitespace(value) {
|
|
340
|
-
const parts = [];
|
|
341
|
-
let tokenStart = null;
|
|
342
|
-
let tokenEnd = 0;
|
|
343
|
-
let tokenText = "";
|
|
344
|
-
let parenDepth = 0;
|
|
345
|
-
let inSingleQuote = false;
|
|
346
|
-
let inDoubleQuote = false;
|
|
347
|
-
let inBacktick = false;
|
|
348
|
-
let inBracket = false;
|
|
349
|
-
let inLineComment = false;
|
|
350
|
-
let inBlockComment = false;
|
|
351
|
-
const appendChar = (char, index) => {
|
|
352
|
-
if (tokenStart === null) {
|
|
353
|
-
tokenStart = index;
|
|
354
|
-
}
|
|
355
|
-
tokenText += char;
|
|
356
|
-
tokenEnd = index + 1;
|
|
357
|
-
};
|
|
358
|
-
const flushToken = () => {
|
|
359
|
-
if (tokenStart === null || tokenText.length === 0) {
|
|
360
|
-
tokenStart = null;
|
|
361
|
-
tokenEnd = 0;
|
|
362
|
-
tokenText = "";
|
|
363
|
-
return;
|
|
364
|
-
}
|
|
365
|
-
parts.push({
|
|
366
|
-
text: tokenText,
|
|
367
|
-
start: tokenStart,
|
|
368
|
-
end: tokenEnd,
|
|
369
|
-
});
|
|
370
|
-
tokenStart = null;
|
|
371
|
-
tokenEnd = 0;
|
|
372
|
-
tokenText = "";
|
|
373
|
-
};
|
|
374
|
-
for (let index = 0; index < value.length; index += 1) {
|
|
375
|
-
const char = value[index];
|
|
376
|
-
const next = value[index + 1];
|
|
377
|
-
if (inLineComment) {
|
|
378
|
-
if (char === "\n") {
|
|
379
|
-
inLineComment = false;
|
|
380
|
-
}
|
|
381
|
-
continue;
|
|
382
|
-
}
|
|
383
|
-
if (inBlockComment) {
|
|
384
|
-
if (char === "*" && next === "/") {
|
|
385
|
-
inBlockComment = false;
|
|
386
|
-
index += 1;
|
|
387
|
-
}
|
|
388
|
-
continue;
|
|
389
|
-
}
|
|
390
|
-
if (inSingleQuote) {
|
|
391
|
-
appendChar(char, index);
|
|
392
|
-
if (char === "'" && next === "'") {
|
|
393
|
-
appendChar(next, index + 1);
|
|
394
|
-
index += 1;
|
|
395
|
-
}
|
|
396
|
-
else if (char === "'") {
|
|
397
|
-
inSingleQuote = false;
|
|
398
|
-
}
|
|
399
|
-
continue;
|
|
400
|
-
}
|
|
401
|
-
if (inDoubleQuote) {
|
|
402
|
-
appendChar(char, index);
|
|
403
|
-
if (char === '"') {
|
|
404
|
-
inDoubleQuote = false;
|
|
405
|
-
}
|
|
406
|
-
continue;
|
|
407
|
-
}
|
|
408
|
-
if (inBacktick) {
|
|
409
|
-
appendChar(char, index);
|
|
410
|
-
if (char === "`") {
|
|
411
|
-
inBacktick = false;
|
|
412
|
-
}
|
|
413
|
-
continue;
|
|
414
|
-
}
|
|
415
|
-
if (inBracket) {
|
|
416
|
-
appendChar(char, index);
|
|
417
|
-
if (char === "]") {
|
|
418
|
-
inBracket = false;
|
|
419
|
-
}
|
|
420
|
-
continue;
|
|
421
|
-
}
|
|
422
|
-
if (char === "-" && next === "-" && parenDepth === 0) {
|
|
423
|
-
flushToken();
|
|
424
|
-
inLineComment = true;
|
|
425
|
-
index += 1;
|
|
426
|
-
continue;
|
|
427
|
-
}
|
|
428
|
-
if (char === "/" && next === "*" && parenDepth === 0) {
|
|
429
|
-
flushToken();
|
|
430
|
-
inBlockComment = true;
|
|
431
|
-
index += 1;
|
|
432
|
-
continue;
|
|
433
|
-
}
|
|
434
|
-
if (/\s/u.test(char) && parenDepth === 0) {
|
|
435
|
-
flushToken();
|
|
436
|
-
continue;
|
|
437
|
-
}
|
|
438
|
-
if (char === "'") {
|
|
439
|
-
appendChar(char, index);
|
|
440
|
-
inSingleQuote = true;
|
|
441
|
-
continue;
|
|
442
|
-
}
|
|
443
|
-
if (char === '"') {
|
|
444
|
-
appendChar(char, index);
|
|
445
|
-
inDoubleQuote = true;
|
|
446
|
-
continue;
|
|
447
|
-
}
|
|
448
|
-
if (char === "`") {
|
|
449
|
-
appendChar(char, index);
|
|
450
|
-
inBacktick = true;
|
|
451
|
-
continue;
|
|
452
|
-
}
|
|
453
|
-
if (char === "[") {
|
|
454
|
-
appendChar(char, index);
|
|
455
|
-
inBracket = true;
|
|
456
|
-
continue;
|
|
457
|
-
}
|
|
458
|
-
if (char === "(") {
|
|
459
|
-
appendChar(char, index);
|
|
460
|
-
parenDepth += 1;
|
|
461
|
-
continue;
|
|
462
|
-
}
|
|
463
|
-
if (char === ")" && parenDepth > 0) {
|
|
464
|
-
parenDepth -= 1;
|
|
465
|
-
appendChar(char, index);
|
|
466
|
-
continue;
|
|
467
|
-
}
|
|
468
|
-
appendChar(char, index);
|
|
469
|
-
}
|
|
470
|
-
flushToken();
|
|
471
|
-
return parts;
|
|
472
|
-
}
|
|
473
|
-
function splitTopLevelWhitespace(value) {
|
|
474
|
-
return tokenizeTopLevelWhitespace(value).map((part) => part.text);
|
|
475
|
-
}
|
|
476
|
-
function skipSqlTrivia(value, index) {
|
|
477
|
-
let nextIndex = index;
|
|
478
|
-
while (nextIndex < value.length) {
|
|
479
|
-
if (/\s/u.test(value[nextIndex] ?? "")) {
|
|
480
|
-
nextIndex += 1;
|
|
481
|
-
continue;
|
|
482
|
-
}
|
|
483
|
-
if (value[nextIndex] === "-" && value[nextIndex + 1] === "-") {
|
|
484
|
-
nextIndex += 2;
|
|
485
|
-
while (nextIndex < value.length && value[nextIndex] !== "\n") {
|
|
486
|
-
nextIndex += 1;
|
|
487
|
-
}
|
|
488
|
-
continue;
|
|
489
|
-
}
|
|
490
|
-
if (value[nextIndex] === "/" && value[nextIndex + 1] === "*") {
|
|
491
|
-
const blockEnd = value.indexOf("*/", nextIndex + 2);
|
|
492
|
-
nextIndex = blockEnd >= 0 ? blockEnd + 2 : value.length;
|
|
493
|
-
continue;
|
|
494
|
-
}
|
|
495
|
-
break;
|
|
496
|
-
}
|
|
497
|
-
return nextIndex;
|
|
498
|
-
}
|
|
499
|
-
function matchesKeywordAt(value, index, keyword) {
|
|
500
|
-
return (value.slice(index, index + keyword.length).toLowerCase() === keyword &&
|
|
501
|
-
!isIdentifierChar(value[index - 1]) &&
|
|
502
|
-
!isIdentifierChar(value[index + keyword.length]));
|
|
503
|
-
}
|
|
504
|
-
function extractSelectClause(sql) {
|
|
505
|
-
const selectIndex = findTopLevelKeywordIndex(sql, "select");
|
|
506
|
-
if (selectIndex < 0) {
|
|
507
|
-
return null;
|
|
508
|
-
}
|
|
509
|
-
const fromIndex = findTopLevelKeywordIndex(sql, "from", selectIndex + 6);
|
|
510
|
-
if (fromIndex < 0) {
|
|
511
|
-
return null;
|
|
512
|
-
}
|
|
513
|
-
return sql.slice(selectIndex + 6, fromIndex).trim();
|
|
514
|
-
}
|
|
515
|
-
function extractFromClause(sql) {
|
|
516
|
-
const selectIndex = findTopLevelKeywordIndex(sql, "select");
|
|
517
|
-
if (selectIndex < 0) {
|
|
518
|
-
return null;
|
|
519
|
-
}
|
|
520
|
-
const fromIndex = findTopLevelKeywordIndex(sql, "from", selectIndex + 6);
|
|
521
|
-
if (fromIndex < 0) {
|
|
522
|
-
return null;
|
|
523
|
-
}
|
|
524
|
-
return sql
|
|
525
|
-
.slice(fromIndex)
|
|
526
|
-
.trim()
|
|
527
|
-
.replace(/;+\s*$/u, "");
|
|
528
|
-
}
|
|
529
|
-
/** Keywords that terminate a source segment in a FROM clause. */
|
|
530
|
-
const SOURCE_SEGMENT_TERMINATORS = [
|
|
531
|
-
"join", "left", "right", "inner", "full", "cross", "natural", "lateral",
|
|
532
|
-
"on", "using",
|
|
533
|
-
"where", "group", "order", "having", "limit", "qualify",
|
|
534
|
-
"union", "intersect", "except", "window", "fetch",
|
|
535
|
-
];
|
|
536
|
-
function findTerminatorKeyword(value, index) {
|
|
537
|
-
for (const keyword of SOURCE_SEGMENT_TERMINATORS) {
|
|
538
|
-
if (matchesKeywordAt(value, index, keyword)) {
|
|
539
|
-
return keyword;
|
|
540
|
-
}
|
|
541
|
-
}
|
|
542
|
-
return null;
|
|
543
|
-
}
|
|
544
|
-
function extractTopLevelSourceSegments(fromClause) {
|
|
545
|
-
const segments = [];
|
|
546
|
-
let captureStart = null;
|
|
547
|
-
const pushSegment = (endIndex) => {
|
|
548
|
-
if (captureStart === null) {
|
|
549
|
-
return;
|
|
550
|
-
}
|
|
551
|
-
let trimmedEnd = endIndex;
|
|
552
|
-
while (trimmedEnd > captureStart && /\s/u.test(fromClause[trimmedEnd - 1] ?? "")) {
|
|
553
|
-
trimmedEnd -= 1;
|
|
554
|
-
}
|
|
555
|
-
if (trimmedEnd > captureStart) {
|
|
556
|
-
segments.push({
|
|
557
|
-
text: fromClause.slice(captureStart, trimmedEnd),
|
|
558
|
-
relationStart: captureStart,
|
|
559
|
-
relationEnd: trimmedEnd,
|
|
560
|
-
});
|
|
561
|
-
}
|
|
562
|
-
};
|
|
563
|
-
scanTopLevel(fromClause, (char, index, parenDepth) => {
|
|
564
|
-
if (parenDepth !== 0) {
|
|
565
|
-
return true;
|
|
566
|
-
}
|
|
567
|
-
if (captureStart === null) {
|
|
568
|
-
if (matchesKeywordAt(fromClause, index, "from")) {
|
|
569
|
-
captureStart = skipSqlTrivia(fromClause, index + 4);
|
|
570
|
-
}
|
|
571
|
-
else if (matchesKeywordAt(fromClause, index, "join")) {
|
|
572
|
-
captureStart = skipSqlTrivia(fromClause, index + 4);
|
|
573
|
-
}
|
|
574
|
-
else if (char === ",") {
|
|
575
|
-
captureStart = skipSqlTrivia(fromClause, index + 1);
|
|
576
|
-
}
|
|
577
|
-
return true;
|
|
578
|
-
}
|
|
579
|
-
if (char === ",") {
|
|
580
|
-
pushSegment(index);
|
|
581
|
-
captureStart = skipSqlTrivia(fromClause, index + 1);
|
|
582
|
-
return true;
|
|
583
|
-
}
|
|
584
|
-
const terminator = findTerminatorKeyword(fromClause, index);
|
|
585
|
-
if (terminator) {
|
|
586
|
-
pushSegment(index);
|
|
587
|
-
captureStart =
|
|
588
|
-
terminator === "join"
|
|
589
|
-
? skipSqlTrivia(fromClause, index + terminator.length)
|
|
590
|
-
: null;
|
|
591
|
-
}
|
|
592
|
-
return true;
|
|
593
|
-
});
|
|
594
|
-
pushSegment(fromClause.length);
|
|
595
|
-
return segments;
|
|
596
|
-
}
|
|
597
|
-
function isSupportedIdentifierToken(token) {
|
|
598
|
-
return (/^[A-Za-z_][\w$]*$/u.test(token) ||
|
|
599
|
-
/^"[^"]+"$/u.test(token) ||
|
|
600
|
-
/^`[^`]+`$/u.test(token) ||
|
|
601
|
-
/^\[[^\]]+\]$/u.test(token));
|
|
602
|
-
}
|
|
603
|
-
function parseSqlSourceSegment(segment) {
|
|
604
|
-
const relationOffset = skipSqlTrivia(segment.text, 0);
|
|
605
|
-
if (relationOffset >= segment.text.length) {
|
|
606
|
-
return null;
|
|
607
|
-
}
|
|
608
|
-
let relationText;
|
|
609
|
-
let relationTokenStart;
|
|
610
|
-
let relationTokenEnd;
|
|
611
|
-
let aliasTokens;
|
|
612
|
-
if (segment.text.slice(relationOffset).startsWith("{{")) {
|
|
613
|
-
const closingIndex = segment.text.indexOf("}}", relationOffset);
|
|
614
|
-
if (closingIndex < 0) {
|
|
615
|
-
return null;
|
|
616
|
-
}
|
|
617
|
-
relationTokenStart = relationOffset;
|
|
618
|
-
relationTokenEnd = closingIndex + 2;
|
|
619
|
-
relationText = segment.text.slice(relationTokenStart, relationTokenEnd).trim();
|
|
620
|
-
aliasTokens = tokenizeTopLevelWhitespace(segment.text.slice(relationTokenEnd)).map((token) => token.text);
|
|
621
|
-
}
|
|
622
|
-
else {
|
|
623
|
-
const tokens = tokenizeTopLevelWhitespace(segment.text);
|
|
624
|
-
if (tokens.length === 0) {
|
|
625
|
-
return null;
|
|
626
|
-
}
|
|
627
|
-
const relationToken = tokens[0];
|
|
628
|
-
relationText = relationToken.text;
|
|
629
|
-
relationTokenStart = relationToken.start;
|
|
630
|
-
relationTokenEnd = relationToken.end;
|
|
631
|
-
aliasTokens = tokens.slice(1).map((token) => token.text);
|
|
632
|
-
}
|
|
633
|
-
const alias = aliasTokens[0]?.toLowerCase() === "as"
|
|
634
|
-
? (aliasTokens[1] ? stripIdentifierQuotes(aliasTokens[1]) : null)
|
|
635
|
-
: aliasTokens[0]
|
|
636
|
-
? stripIdentifierQuotes(aliasTokens[0])
|
|
637
|
-
: null;
|
|
638
|
-
const refMatch = relationText.match(/^\{\{\s*ref\(\s*(['"])([^'"]+)\1\s*,\s*(['"])([^'"]+)\3\s*\)\s*\}\}$/iu);
|
|
639
|
-
if (refMatch) {
|
|
640
|
-
return {
|
|
641
|
-
locationName: refMatch[2] ?? "",
|
|
642
|
-
nodeName: refMatch[4] ?? "",
|
|
643
|
-
alias,
|
|
644
|
-
nodeID: null,
|
|
645
|
-
sourceStyle: "coalesce_ref",
|
|
646
|
-
locationCandidates: refMatch[2] ? [refMatch[2]] : [],
|
|
647
|
-
relationStart: segment.relationStart + relationTokenStart,
|
|
648
|
-
relationEnd: segment.relationStart + relationTokenEnd,
|
|
649
|
-
};
|
|
650
|
-
}
|
|
651
|
-
if (relationText.startsWith("(")) {
|
|
652
|
-
return null;
|
|
653
|
-
}
|
|
654
|
-
const parts = splitTopLevel(relationText, ".").map((part) => part.trim());
|
|
655
|
-
if (parts.length === 0 ||
|
|
656
|
-
parts.some((part) => part.length === 0 || !isSupportedIdentifierToken(part))) {
|
|
657
|
-
return null;
|
|
658
|
-
}
|
|
659
|
-
const normalizedParts = parts.map(stripIdentifierQuotes);
|
|
660
|
-
const nodeName = normalizedParts[normalizedParts.length - 1] ?? "";
|
|
661
|
-
return {
|
|
662
|
-
locationName: "",
|
|
663
|
-
nodeName,
|
|
664
|
-
alias,
|
|
665
|
-
nodeID: null,
|
|
666
|
-
sourceStyle: "table_name",
|
|
667
|
-
locationCandidates: normalizedParts.slice(0, -1).reverse(),
|
|
668
|
-
relationStart: segment.relationStart + relationTokenStart,
|
|
669
|
-
relationEnd: segment.relationStart + relationTokenEnd,
|
|
670
|
-
};
|
|
671
|
-
}
|
|
672
|
-
function parseSqlSourceRefs(sql) {
|
|
673
|
-
const fromClause = extractFromClause(sql);
|
|
674
|
-
if (!fromClause) {
|
|
675
|
-
return { fromClause: "", refs: [] };
|
|
676
|
-
}
|
|
677
|
-
const refs = extractTopLevelSourceSegments(fromClause)
|
|
678
|
-
.map(parseSqlSourceSegment)
|
|
679
|
-
.filter((ref) => ref !== null);
|
|
680
|
-
return { fromClause, refs };
|
|
681
|
-
}
|
|
682
|
-
function splitExpressionAlias(rawItem) {
|
|
683
|
-
const asMatch = rawItem.match(/^(.*?)(?:\s+AS\s+)([A-Za-z_][\w$]*|"[^"]+"|`[^`]+`|\[[^\]]+\])$/i);
|
|
684
|
-
if (asMatch) {
|
|
685
|
-
return {
|
|
686
|
-
expression: asMatch[1]?.trim() ?? rawItem.trim(),
|
|
687
|
-
outputName: stripIdentifierQuotes(asMatch[2] ?? ""),
|
|
688
|
-
};
|
|
689
|
-
}
|
|
690
|
-
const bareAliasMatch = rawItem.match(/^(.*?)(?:\s+)([A-Za-z_][\w$]*|"[^"]+"|`[^`]+`|\[[^\]]+\])$/);
|
|
691
|
-
if (bareAliasMatch) {
|
|
692
|
-
const candidateExpression = bareAliasMatch[1]?.trim() ?? rawItem.trim();
|
|
693
|
-
if (candidateExpression.includes(".") || candidateExpression.includes("(")) {
|
|
694
|
-
return {
|
|
695
|
-
expression: candidateExpression,
|
|
696
|
-
outputName: stripIdentifierQuotes(bareAliasMatch[2] ?? ""),
|
|
697
|
-
};
|
|
698
|
-
}
|
|
699
|
-
}
|
|
700
|
-
return {
|
|
701
|
-
expression: rawItem.trim(),
|
|
702
|
-
outputName: null,
|
|
703
|
-
};
|
|
704
|
-
}
|
|
705
|
-
function parseDirectColumnExpression(expression) {
|
|
706
|
-
const trimmed = expression.trim();
|
|
707
|
-
if (trimmed === "*") {
|
|
708
|
-
return null;
|
|
709
|
-
}
|
|
710
|
-
const parts = splitTopLevel(trimmed, ".").map((part) => part.trim());
|
|
711
|
-
if (parts.length === 0 ||
|
|
712
|
-
parts.some((part) => part.length === 0 || !isSupportedIdentifierToken(part))) {
|
|
713
|
-
return null;
|
|
714
|
-
}
|
|
715
|
-
return {
|
|
716
|
-
sourceNodeAlias: parts.length >= 2 ? stripIdentifierQuotes(parts[parts.length - 2] ?? "") : null,
|
|
717
|
-
sourceColumnName: stripIdentifierQuotes(parts[parts.length - 1] ?? ""),
|
|
718
|
-
};
|
|
719
|
-
}
|
|
720
|
-
function parseWildcardExpression(expression) {
|
|
721
|
-
const trimmed = expression.trim();
|
|
722
|
-
if (trimmed === "*") {
|
|
723
|
-
return { sourceNodeAlias: null };
|
|
724
|
-
}
|
|
725
|
-
const parts = splitTopLevel(trimmed, ".").map((part) => part.trim());
|
|
726
|
-
if (parts.length < 2 ||
|
|
727
|
-
parts[parts.length - 1] !== "*" ||
|
|
728
|
-
parts.slice(0, -1).some((part) => part.length === 0 || !isSupportedIdentifierToken(part))) {
|
|
729
|
-
return null;
|
|
730
|
-
}
|
|
731
|
-
return {
|
|
732
|
-
sourceNodeAlias: stripIdentifierQuotes(parts[parts.length - 2] ?? ""),
|
|
733
|
-
};
|
|
734
|
-
}
|
|
735
|
-
function listToQuestion(values) {
|
|
736
|
-
return values.join(", ");
|
|
737
|
-
}
|
|
738
|
-
function parseSqlSelectItems(sql, refs) {
|
|
739
|
-
const warnings = [];
|
|
740
|
-
const refsByAlias = new Map();
|
|
741
|
-
for (const ref of refs) {
|
|
742
|
-
refsByAlias.set(normalizeSqlIdentifier(ref.alias ?? ref.nodeName), ref);
|
|
743
|
-
}
|
|
744
|
-
const selectClause = extractSelectClause(sql);
|
|
745
|
-
if (!selectClause) {
|
|
746
|
-
return {
|
|
747
|
-
refs,
|
|
748
|
-
selectItems: [],
|
|
749
|
-
warnings: ["Could not find a top-level SELECT ... FROM clause in the SQL."],
|
|
750
|
-
};
|
|
751
|
-
}
|
|
752
|
-
const rawItems = splitTopLevel(selectClause, ",");
|
|
753
|
-
const selectItems = [];
|
|
754
|
-
for (const rawItem of rawItems) {
|
|
755
|
-
const { expression, outputName } = splitExpressionAlias(rawItem);
|
|
756
|
-
const wildcard = parseWildcardExpression(expression);
|
|
757
|
-
if (wildcard) {
|
|
758
|
-
if (wildcard.sourceNodeAlias === null && refs.length !== 1) {
|
|
759
|
-
selectItems.push({
|
|
760
|
-
expression,
|
|
761
|
-
outputName: null,
|
|
762
|
-
sourceNodeAlias: null,
|
|
763
|
-
sourceNodeName: null,
|
|
764
|
-
sourceNodeID: null,
|
|
765
|
-
sourceColumnName: null,
|
|
766
|
-
kind: "expression",
|
|
767
|
-
supported: false,
|
|
768
|
-
reason: "Unqualified * is only supported when exactly one predecessor ref is present.",
|
|
769
|
-
});
|
|
770
|
-
continue;
|
|
771
|
-
}
|
|
772
|
-
const ref = wildcard.sourceNodeAlias === null
|
|
773
|
-
? refs[0] ?? null
|
|
774
|
-
: refsByAlias.get(normalizeSqlIdentifier(wildcard.sourceNodeAlias)) ?? null;
|
|
775
|
-
if (!ref) {
|
|
776
|
-
selectItems.push({
|
|
777
|
-
expression,
|
|
778
|
-
outputName: null,
|
|
779
|
-
sourceNodeAlias: wildcard.sourceNodeAlias,
|
|
780
|
-
sourceNodeName: null,
|
|
781
|
-
sourceNodeID: null,
|
|
782
|
-
sourceColumnName: null,
|
|
783
|
-
kind: "expression",
|
|
784
|
-
supported: false,
|
|
785
|
-
reason: "Wildcard source alias could not be resolved to a predecessor ref.",
|
|
786
|
-
});
|
|
787
|
-
continue;
|
|
788
|
-
}
|
|
789
|
-
// Wildcards are expanded later after predecessor nodes are fetched.
|
|
790
|
-
selectItems.push({
|
|
791
|
-
expression,
|
|
792
|
-
outputName: null,
|
|
793
|
-
sourceNodeAlias: wildcard.sourceNodeAlias ?? ref.alias ?? ref.nodeName,
|
|
794
|
-
sourceNodeName: ref.nodeName,
|
|
795
|
-
sourceNodeID: ref.nodeID,
|
|
796
|
-
sourceColumnName: "*",
|
|
797
|
-
kind: "expression",
|
|
798
|
-
supported: true,
|
|
799
|
-
});
|
|
800
|
-
continue;
|
|
801
|
-
}
|
|
802
|
-
const directColumn = parseDirectColumnExpression(expression);
|
|
803
|
-
if (!directColumn) {
|
|
804
|
-
// Expression is not a direct column reference - it's a computed expression
|
|
805
|
-
// Support it if it has an output name (alias)
|
|
806
|
-
if (outputName === null) {
|
|
807
|
-
selectItems.push({
|
|
808
|
-
expression,
|
|
809
|
-
outputName: null,
|
|
810
|
-
sourceNodeAlias: null,
|
|
811
|
-
sourceNodeName: null,
|
|
812
|
-
sourceNodeID: null,
|
|
813
|
-
sourceColumnName: null,
|
|
814
|
-
kind: "expression",
|
|
815
|
-
supported: false,
|
|
816
|
-
reason: "Computed expressions require an alias (e.g., CASE ... END AS column_name)",
|
|
817
|
-
});
|
|
818
|
-
continue;
|
|
819
|
-
}
|
|
820
|
-
// Computed expression with alias - supported
|
|
821
|
-
selectItems.push({
|
|
822
|
-
expression,
|
|
823
|
-
outputName,
|
|
824
|
-
sourceNodeAlias: null,
|
|
825
|
-
sourceNodeName: null,
|
|
826
|
-
sourceNodeID: null,
|
|
827
|
-
sourceColumnName: null,
|
|
828
|
-
kind: "expression",
|
|
829
|
-
supported: true,
|
|
830
|
-
});
|
|
831
|
-
continue;
|
|
832
|
-
}
|
|
833
|
-
const ref = directColumn.sourceNodeAlias === null
|
|
834
|
-
? refs.length === 1
|
|
835
|
-
? refs[0] ?? null
|
|
836
|
-
: null
|
|
837
|
-
: refsByAlias.get(normalizeSqlIdentifier(directColumn.sourceNodeAlias)) ?? null;
|
|
838
|
-
if (!ref) {
|
|
839
|
-
selectItems.push({
|
|
840
|
-
expression,
|
|
841
|
-
outputName: outputName ?? directColumn.sourceColumnName,
|
|
842
|
-
sourceNodeAlias: directColumn.sourceNodeAlias,
|
|
843
|
-
sourceNodeName: null,
|
|
844
|
-
sourceNodeID: null,
|
|
845
|
-
sourceColumnName: directColumn.sourceColumnName,
|
|
846
|
-
kind: "column",
|
|
847
|
-
supported: false,
|
|
848
|
-
reason: directColumn.sourceNodeAlias === null
|
|
849
|
-
? "Unqualified columns are only supported when exactly one predecessor ref is present."
|
|
850
|
-
: `The source alias ${directColumn.sourceNodeAlias} did not match a predecessor ref.`,
|
|
851
|
-
});
|
|
852
|
-
continue;
|
|
853
|
-
}
|
|
854
|
-
selectItems.push({
|
|
855
|
-
expression,
|
|
856
|
-
outputName: outputName ?? directColumn.sourceColumnName,
|
|
857
|
-
sourceNodeAlias: directColumn.sourceNodeAlias ?? ref.alias ?? ref.nodeName,
|
|
858
|
-
sourceNodeName: ref.nodeName,
|
|
859
|
-
sourceNodeID: ref.nodeID,
|
|
860
|
-
sourceColumnName: directColumn.sourceColumnName,
|
|
861
|
-
kind: "column",
|
|
862
|
-
supported: true,
|
|
863
|
-
});
|
|
864
|
-
}
|
|
865
|
-
if (selectItems.length === 0) {
|
|
866
|
-
warnings.push("The SQL SELECT clause did not produce any supported projected columns.");
|
|
867
|
-
}
|
|
868
|
-
return { refs, selectItems, warnings };
|
|
869
|
-
}
|
|
870
|
-
async function listAllWorkspaceNodes(client, workspaceID) {
|
|
871
|
-
const nodes = [];
|
|
872
|
-
const seenCursors = new Set();
|
|
873
|
-
let next;
|
|
874
|
-
let isFirstPage = true;
|
|
875
|
-
while (isFirstPage || next) {
|
|
876
|
-
const response = await listWorkspaceNodes(client, {
|
|
877
|
-
workspaceID,
|
|
878
|
-
limit: WORKSPACE_NODE_PAGE_LIMIT,
|
|
879
|
-
orderBy: "id",
|
|
880
|
-
...(next ? { startingFrom: next } : {}),
|
|
881
|
-
});
|
|
882
|
-
if (!isPlainObject(response)) {
|
|
883
|
-
throw new Error("Workspace node list response was not an object");
|
|
884
|
-
}
|
|
885
|
-
if (Array.isArray(response.data)) {
|
|
886
|
-
for (const item of response.data) {
|
|
887
|
-
if (!isPlainObject(item) || typeof item.id !== "string" || typeof item.name !== "string") {
|
|
888
|
-
continue;
|
|
889
|
-
}
|
|
890
|
-
nodes.push({
|
|
891
|
-
id: item.id,
|
|
892
|
-
name: item.name,
|
|
893
|
-
nodeType: typeof item.nodeType === "string" ? item.nodeType : null,
|
|
894
|
-
locationName: typeof item.locationName === "string" ? item.locationName : null,
|
|
895
|
-
});
|
|
896
|
-
}
|
|
897
|
-
}
|
|
898
|
-
const responseNext = typeof response.next === "string" && response.next.trim().length > 0
|
|
899
|
-
? response.next
|
|
900
|
-
: typeof response.next === "number"
|
|
901
|
-
? String(response.next)
|
|
902
|
-
: undefined;
|
|
903
|
-
if (responseNext) {
|
|
904
|
-
if (seenCursors.has(responseNext)) {
|
|
905
|
-
throw new Error(`Workspace node pagination repeated cursor ${responseNext}`);
|
|
906
|
-
}
|
|
907
|
-
seenCursors.add(responseNext);
|
|
908
|
-
}
|
|
909
|
-
next = responseNext;
|
|
910
|
-
isFirstPage = false;
|
|
911
|
-
}
|
|
912
|
-
return nodes;
|
|
913
|
-
}
|
|
914
|
-
function getNodeLocationName(node) {
|
|
915
|
-
if (typeof node.locationName === "string" && node.locationName.trim().length > 0) {
|
|
916
|
-
return node.locationName;
|
|
917
|
-
}
|
|
918
|
-
return null;
|
|
919
|
-
}
|
|
920
|
-
async function resolveSqlRefsToWorkspaceNodes(client, workspaceID, refs) {
|
|
921
|
-
const warnings = [];
|
|
922
|
-
const openQuestions = [];
|
|
923
|
-
const predecessorNodes = {};
|
|
924
|
-
if (refs.length === 0) {
|
|
925
|
-
openQuestions.push("Which upstream Coalesce node(s) should this pipeline build from? Use a top-level FROM/JOIN that names existing workspace nodes (raw table names or {{ ref('LOCATION', 'NODE') }} syntax), or provide sourceNodeIDs.");
|
|
926
|
-
return { refs, openQuestions, warnings, predecessorNodes };
|
|
927
|
-
}
|
|
928
|
-
const workspaceNodes = await listAllWorkspaceNodes(client, workspaceID);
|
|
929
|
-
const nodesByNormalizedName = new Map();
|
|
930
|
-
for (const node of workspaceNodes) {
|
|
931
|
-
const normalized = normalizeSqlIdentifier(node.name);
|
|
932
|
-
const existing = nodesByNormalizedName.get(normalized) ?? [];
|
|
933
|
-
existing.push(node);
|
|
934
|
-
nodesByNormalizedName.set(normalized, existing);
|
|
935
|
-
}
|
|
936
|
-
for (const ref of refs) {
|
|
937
|
-
const matches = nodesByNormalizedName.get(normalizeSqlIdentifier(ref.nodeName)) ?? [];
|
|
938
|
-
if (matches.length === 0) {
|
|
939
|
-
openQuestions.push(`Could not resolve the SQL source ${ref.nodeName} to a workspace node ID in workspace ${workspaceID}.`);
|
|
940
|
-
continue;
|
|
941
|
-
}
|
|
942
|
-
const locationHints = [
|
|
943
|
-
...(ref.locationName ? [ref.locationName] : []),
|
|
944
|
-
...ref.locationCandidates,
|
|
945
|
-
].map(normalizeSqlIdentifier);
|
|
946
|
-
const hintedMatches = locationHints.length > 0
|
|
947
|
-
? matches.filter((entry) => entry.locationName &&
|
|
948
|
-
locationHints.includes(normalizeSqlIdentifier(entry.locationName)))
|
|
949
|
-
: [];
|
|
950
|
-
if (hintedMatches.length === 1) {
|
|
951
|
-
ref.nodeID = hintedMatches[0]?.id ?? null;
|
|
952
|
-
if (!ref.locationName && hintedMatches[0]?.locationName) {
|
|
953
|
-
ref.locationName = hintedMatches[0].locationName;
|
|
954
|
-
}
|
|
955
|
-
continue;
|
|
956
|
-
}
|
|
957
|
-
if (hintedMatches.length > 1) {
|
|
958
|
-
openQuestions.push(`Multiple workspace nodes matched the SQL source ${ref.nodeName}. Resolve the exact node before creation.`);
|
|
959
|
-
continue;
|
|
960
|
-
}
|
|
961
|
-
if (matches.length === 1) {
|
|
962
|
-
ref.nodeID = matches[0]?.id ?? null;
|
|
963
|
-
if (!ref.locationName && matches[0]?.locationName) {
|
|
964
|
-
ref.locationName = matches[0].locationName;
|
|
965
|
-
}
|
|
966
|
-
continue;
|
|
967
|
-
}
|
|
968
|
-
if (matches.length > 1) {
|
|
969
|
-
const detailedMatches = await Promise.all(matches.map(async (match) => {
|
|
970
|
-
const node = await getWorkspaceNode(client, {
|
|
971
|
-
workspaceID,
|
|
972
|
-
nodeID: match.id,
|
|
973
|
-
});
|
|
974
|
-
return {
|
|
975
|
-
match,
|
|
976
|
-
node: isPlainObject(node) ? node : null,
|
|
977
|
-
};
|
|
978
|
-
}));
|
|
979
|
-
const exactLocationMatches = locationHints.length > 0
|
|
980
|
-
? detailedMatches.filter((candidate) => candidate.node &&
|
|
981
|
-
getNodeLocationName(candidate.node) &&
|
|
982
|
-
locationHints.includes(normalizeSqlIdentifier(getNodeLocationName(candidate.node) ?? "")))
|
|
983
|
-
: [];
|
|
984
|
-
if (exactLocationMatches.length === 1) {
|
|
985
|
-
ref.nodeID = exactLocationMatches[0]?.match.id ?? null;
|
|
986
|
-
if (!ref.locationName) {
|
|
987
|
-
ref.locationName = getNodeLocationName(exactLocationMatches[0]?.node ?? {}) ?? "";
|
|
988
|
-
}
|
|
989
|
-
continue;
|
|
990
|
-
}
|
|
991
|
-
if (exactLocationMatches.length > 1) {
|
|
992
|
-
openQuestions.push(`Multiple workspace nodes matched the SQL source ${ref.nodeName}. Resolve the exact node before creation.`);
|
|
993
|
-
continue;
|
|
994
|
-
}
|
|
995
|
-
if (ref.sourceStyle === "coalesce_ref" && ref.locationName) {
|
|
996
|
-
openQuestions.push(`Workspace nodes named ${ref.nodeName} were found, but none matched the requested location ${ref.locationName}.`);
|
|
997
|
-
continue;
|
|
998
|
-
}
|
|
999
|
-
openQuestions.push(`Multiple workspace nodes named ${ref.nodeName} were found. Qualify the SQL source more clearly or provide sourceNodeIDs before creation.`);
|
|
1000
|
-
continue;
|
|
1001
|
-
}
|
|
1002
|
-
}
|
|
1003
|
-
for (const ref of refs) {
|
|
1004
|
-
if (!ref.nodeID) {
|
|
1005
|
-
continue;
|
|
1006
|
-
}
|
|
1007
|
-
const predecessor = await getWorkspaceNode(client, {
|
|
1008
|
-
workspaceID,
|
|
1009
|
-
nodeID: ref.nodeID,
|
|
1010
|
-
});
|
|
1011
|
-
if (!isPlainObject(predecessor)) {
|
|
1012
|
-
warnings.push(`Resolved predecessor ${ref.nodeName} did not return an object body.`);
|
|
1013
|
-
continue;
|
|
1014
|
-
}
|
|
1015
|
-
const predecessorLocationName = getNodeLocationName(predecessor);
|
|
1016
|
-
if (ref.sourceStyle === "coalesce_ref" &&
|
|
1017
|
-
predecessorLocationName &&
|
|
1018
|
-
normalizeSqlIdentifier(predecessorLocationName) !==
|
|
1019
|
-
normalizeSqlIdentifier(ref.locationName)) {
|
|
1020
|
-
ref.nodeID = null;
|
|
1021
|
-
openQuestions.push(`Resolved node ${ref.nodeName} is in location ${predecessorLocationName}, not the requested location ${ref.locationName}.`);
|
|
1022
|
-
continue;
|
|
1023
|
-
}
|
|
1024
|
-
if (!ref.locationName && predecessorLocationName) {
|
|
1025
|
-
ref.locationName = predecessorLocationName;
|
|
1026
|
-
}
|
|
1027
|
-
predecessorNodes[ref.nodeID] = predecessor;
|
|
1028
|
-
}
|
|
1029
|
-
return { refs, openQuestions, warnings, predecessorNodes };
|
|
1030
|
-
}
|
|
1031
|
-
function buildJoinConditionFromSql(sql, refs) {
|
|
1032
|
-
const fromClause = extractFromClause(sql);
|
|
1033
|
-
if (!fromClause) {
|
|
1034
|
-
return null;
|
|
1035
|
-
}
|
|
1036
|
-
let joinCondition = fromClause;
|
|
1037
|
-
for (const ref of [...refs]
|
|
1038
|
-
.filter((candidate) => candidate.sourceStyle === "table_name" && candidate.locationName)
|
|
1039
|
-
.sort((left, right) => right.relationStart - left.relationStart)) {
|
|
1040
|
-
const replacement = `{{ ref('${ref.locationName}', '${ref.nodeName}') }}`;
|
|
1041
|
-
joinCondition =
|
|
1042
|
-
joinCondition.slice(0, ref.relationStart) +
|
|
1043
|
-
replacement +
|
|
1044
|
-
joinCondition.slice(ref.relationEnd);
|
|
1045
|
-
}
|
|
1046
|
-
return joinCondition;
|
|
1047
|
-
}
|
|
1048
|
-
export function getColumnNamesFromNode(node) {
|
|
1049
|
-
const metadata = isPlainObject(node.metadata) ? node.metadata : undefined;
|
|
1050
|
-
if (!Array.isArray(metadata?.columns)) {
|
|
1051
|
-
return [];
|
|
1052
|
-
}
|
|
1053
|
-
return metadata.columns.flatMap((column) => {
|
|
1054
|
-
if (!isPlainObject(column) || typeof column.name !== "string") {
|
|
1055
|
-
return [];
|
|
1056
|
-
}
|
|
1057
|
-
return [column.name];
|
|
1058
|
-
});
|
|
1059
|
-
}
|
|
1060
|
-
function buildSelectItemsFromSourceNode(sourceNodeID, sourceNodeName, node) {
|
|
1061
|
-
return getColumnNamesFromNode(node).map((columnName) => ({
|
|
1062
|
-
expression: `${sourceNodeName}.${columnName}`,
|
|
1063
|
-
outputName: columnName,
|
|
1064
|
-
sourceNodeAlias: sourceNodeName,
|
|
1065
|
-
sourceNodeName,
|
|
1066
|
-
sourceNodeID,
|
|
1067
|
-
sourceColumnName: columnName,
|
|
1068
|
-
kind: "column",
|
|
1069
|
-
supported: true,
|
|
1070
|
-
}));
|
|
1071
|
-
}
|
|
1072
|
-
async function getSourceNodesByID(client, workspaceID, sourceNodeIDs) {
|
|
1073
|
-
const sourceRefs = [];
|
|
1074
|
-
const predecessorNodes = {};
|
|
1075
|
-
const openQuestions = [];
|
|
1076
|
-
const warnings = [];
|
|
1077
|
-
for (const sourceNodeID of sourceNodeIDs) {
|
|
1078
|
-
const node = await getWorkspaceNode(client, {
|
|
1079
|
-
workspaceID,
|
|
1080
|
-
nodeID: sourceNodeID,
|
|
1081
|
-
});
|
|
1082
|
-
if (!isPlainObject(node)) {
|
|
1083
|
-
openQuestions.push(`Could not read source node ${sourceNodeID} in workspace ${workspaceID}.`);
|
|
1084
|
-
continue;
|
|
1085
|
-
}
|
|
1086
|
-
if (typeof node.name !== "string" || node.name.trim().length === 0) {
|
|
1087
|
-
openQuestions.push(`Source node ${sourceNodeID} does not have a usable name.`);
|
|
1088
|
-
continue;
|
|
1089
|
-
}
|
|
1090
|
-
const locationName = getNodeLocationName(node);
|
|
1091
|
-
if (!locationName) {
|
|
1092
|
-
openQuestions.push(`Source node ${node.name} does not expose locationName. Clarify the Coalesce location before generating ref() SQL for this pipeline.`);
|
|
1093
|
-
}
|
|
1094
|
-
predecessorNodes[sourceNodeID] = node;
|
|
1095
|
-
sourceRefs.push({
|
|
1096
|
-
locationName: locationName ?? "UNKNOWN_LOCATION",
|
|
1097
|
-
nodeName: node.name,
|
|
1098
|
-
alias: node.name,
|
|
1099
|
-
nodeID: sourceNodeID,
|
|
1100
|
-
});
|
|
1101
|
-
}
|
|
1102
|
-
return {
|
|
1103
|
-
sourceRefs,
|
|
1104
|
-
predecessorNodes,
|
|
1105
|
-
openQuestions,
|
|
1106
|
-
warnings,
|
|
1107
|
-
};
|
|
1108
|
-
}
|
|
1109
|
-
function expandWildcardSelectItems(selectItems, refs, predecessorNodes) {
|
|
1110
|
-
const expanded = [];
|
|
1111
|
-
for (const item of selectItems) {
|
|
1112
|
-
if (item.sourceColumnName !== "*" || !item.supported) {
|
|
1113
|
-
expanded.push(item);
|
|
1114
|
-
continue;
|
|
1115
|
-
}
|
|
1116
|
-
const ref = item.sourceNodeID
|
|
1117
|
-
? refs.find((candidate) => candidate.nodeID === item.sourceNodeID) ?? null
|
|
1118
|
-
: refs.find((candidate) => normalizeSqlIdentifier(candidate.alias ?? candidate.nodeName) ===
|
|
1119
|
-
normalizeSqlIdentifier(item.sourceNodeAlias ?? "")) ?? null;
|
|
1120
|
-
if (!ref?.nodeID) {
|
|
1121
|
-
expanded.push({
|
|
1122
|
-
...item,
|
|
1123
|
-
supported: false,
|
|
1124
|
-
reason: "Wildcard source could not be resolved to a concrete predecessor node.",
|
|
1125
|
-
});
|
|
1126
|
-
continue;
|
|
1127
|
-
}
|
|
1128
|
-
const predecessor = predecessorNodes[ref.nodeID];
|
|
1129
|
-
if (!predecessor) {
|
|
1130
|
-
expanded.push({
|
|
1131
|
-
...item,
|
|
1132
|
-
supported: false,
|
|
1133
|
-
reason: "Wildcard source predecessor body was not available for column expansion.",
|
|
1134
|
-
});
|
|
1135
|
-
continue;
|
|
1136
|
-
}
|
|
1137
|
-
const columnNames = getColumnNamesFromNode(predecessor);
|
|
1138
|
-
if (columnNames.length === 0) {
|
|
1139
|
-
expanded.push({
|
|
1140
|
-
...item,
|
|
1141
|
-
supported: false,
|
|
1142
|
-
reason: "Wildcard source predecessor has no columns to expand.",
|
|
1143
|
-
});
|
|
1144
|
-
continue;
|
|
1145
|
-
}
|
|
1146
|
-
for (const columnName of columnNames) {
|
|
1147
|
-
expanded.push({
|
|
1148
|
-
expression: item.sourceNodeAlias && item.sourceNodeAlias.length > 0
|
|
1149
|
-
? `${item.sourceNodeAlias}.${columnName}`
|
|
1150
|
-
: columnName,
|
|
1151
|
-
outputName: columnName,
|
|
1152
|
-
sourceNodeAlias: item.sourceNodeAlias,
|
|
1153
|
-
sourceNodeName: item.sourceNodeName,
|
|
1154
|
-
sourceNodeID: ref.nodeID,
|
|
1155
|
-
sourceColumnName: columnName,
|
|
1156
|
-
kind: "column",
|
|
1157
|
-
supported: true,
|
|
1158
|
-
});
|
|
1159
|
-
}
|
|
1160
|
-
}
|
|
1161
|
-
return expanded;
|
|
1162
|
-
}
|
|
1163
|
-
function buildDefaultNodePrefix(nodeTypeFamily, shortName) {
|
|
1164
|
-
if (shortName && shortName.trim().length > 0) {
|
|
1165
|
-
return shortName.trim().toUpperCase().replace(/[^A-Z0-9]+/g, "_");
|
|
1166
|
-
}
|
|
1167
|
-
switch (nodeTypeFamily) {
|
|
1168
|
-
case "stage":
|
|
1169
|
-
return "STG";
|
|
1170
|
-
case "persistent-stage":
|
|
1171
|
-
return "PSTG";
|
|
1172
|
-
case "view":
|
|
1173
|
-
return "VW";
|
|
1174
|
-
case "work":
|
|
1175
|
-
return "WRK";
|
|
1176
|
-
case "dimension":
|
|
1177
|
-
return "DIM";
|
|
1178
|
-
case "fact":
|
|
1179
|
-
return "FACT";
|
|
1180
|
-
case "hub":
|
|
1181
|
-
return "HUB";
|
|
1182
|
-
case "satellite":
|
|
1183
|
-
return "SAT";
|
|
1184
|
-
case "link":
|
|
1185
|
-
return "LNK";
|
|
1186
|
-
default:
|
|
1187
|
-
return "NODE";
|
|
1188
|
-
}
|
|
1189
|
-
}
|
|
1190
|
-
function buildDefaultNodeName(targetName, refs, nodeTypeFamily, shortName) {
|
|
1191
|
-
if (targetName && targetName.trim().length > 0) {
|
|
1192
|
-
return targetName.trim();
|
|
1193
|
-
}
|
|
1194
|
-
const prefix = buildDefaultNodePrefix(nodeTypeFamily, shortName);
|
|
1195
|
-
const firstRef = refs[0];
|
|
1196
|
-
if (!firstRef) {
|
|
1197
|
-
return `${prefix}_NEW_PIPELINE`;
|
|
1198
|
-
}
|
|
1199
|
-
const stripped = firstRef.nodeName.replace(/^(SRC[_-]?|STG[_-]?|DIM[_-]?|FACT[_-]?|FCT[_-]?|INT[_-]?|WORK[_-]?|VW[_-]?)/i, "");
|
|
1200
|
-
return `${prefix}_${stripped}`.toUpperCase().replace(/__+/g, "_");
|
|
1201
|
-
}
|
|
1202
|
-
function matchesObservedNodeType(requestedNodeType, observedNodeTypes) {
|
|
1203
|
-
const requestedID = requestedNodeType.includes(":::")
|
|
1204
|
-
? requestedNodeType.split(":::")[1] ?? requestedNodeType
|
|
1205
|
-
: requestedNodeType;
|
|
1206
|
-
return observedNodeTypes.some((observed) => {
|
|
1207
|
-
if (observed === requestedNodeType) {
|
|
1208
|
-
return true;
|
|
1209
|
-
}
|
|
1210
|
-
const observedID = observed.includes(":::") ? observed.split(":::")[1] ?? observed : observed;
|
|
1211
|
-
return observedID === requestedID;
|
|
1212
|
-
});
|
|
1213
|
-
}
|
|
1214
|
-
async function getWorkspaceNodeTypeInventory(client, workspaceID) {
|
|
1215
|
-
try {
|
|
1216
|
-
const result = await listWorkspaceNodeTypes(client, { workspaceID });
|
|
1217
|
-
return {
|
|
1218
|
-
nodeTypes: result.nodeTypes ?? [],
|
|
1219
|
-
counts: result.counts ?? {},
|
|
1220
|
-
total: result.total ?? 0,
|
|
1221
|
-
warnings: [],
|
|
1222
|
-
};
|
|
1223
|
-
}
|
|
1224
|
-
catch (error) {
|
|
1225
|
-
// Auth and network errors indicate a broken session — let them propagate
|
|
1226
|
-
if (error instanceof CoalesceApiError && [401, 403, 500, 503].includes(error.status)) {
|
|
1227
|
-
throw error;
|
|
1228
|
-
}
|
|
1229
|
-
const reason = error instanceof Error ? error.message : String(error);
|
|
1230
|
-
return {
|
|
1231
|
-
nodeTypes: [],
|
|
1232
|
-
counts: {},
|
|
1233
|
-
total: 0,
|
|
1234
|
-
warnings: [
|
|
1235
|
-
`Observed workspace node types could not be fetched for workspace ${workspaceID} (${reason}). ` +
|
|
1236
|
-
`Node type selection will use defaults — use list_workspace_node_types or cache_workspace_nodes to confirm installation before execution.`,
|
|
1237
|
-
],
|
|
1238
|
-
};
|
|
1239
|
-
}
|
|
1240
|
-
}
|
|
1241
|
-
function applyWorkspaceNodeTypeValidation(plan, inventory, requestedNodeType) {
|
|
1242
|
-
plan.warnings.push(...inventory.warnings);
|
|
1243
|
-
if (inventory.total === 0) {
|
|
1244
|
-
return;
|
|
1245
|
-
}
|
|
1246
|
-
const recommendedTypes = (plan.nodes ?? [])
|
|
1247
|
-
.map((node) => node.nodeType)
|
|
1248
|
-
.filter((nodeType) => typeof nodeType === "string" && nodeType.length > 0);
|
|
1249
|
-
if (requestedNodeType && requestedNodeType.trim().length > 0) {
|
|
1250
|
-
recommendedTypes.push(requestedNodeType);
|
|
1251
|
-
}
|
|
1252
|
-
const missingTypes = Array.from(new Set(recommendedTypes)).filter((nodeType) => !matchesObservedNodeType(nodeType, inventory.nodeTypes));
|
|
1253
|
-
if (missingTypes.length > 0) {
|
|
1254
|
-
plan.warnings.push(`The following node types were not observed in current workspace nodes: ${missingTypes.join(", ")}. This observation is based on existing nodes, not a true installed-type registry. Confirm installation in Coalesce before creating nodes of these types.`);
|
|
1255
|
-
plan.status = "needs_clarification";
|
|
1256
|
-
}
|
|
1257
|
-
}
|
|
1258
|
-
function buildPlanFromSql(params, parseResult, predecessorNodes, openQuestions, warnings) {
|
|
1259
|
-
const nodeType = params.selectedNodeType?.nodeType ?? params.targetNodeType ?? "Stage";
|
|
1260
|
-
const planOpenQuestions = [...openQuestions];
|
|
1261
|
-
if (!params.selectedNodeType) {
|
|
1262
|
-
warnings.push(`No ranked node type candidate was available, so planning fell back to ${nodeType}.`);
|
|
1263
|
-
}
|
|
1264
|
-
else if (!params.selectedNodeType.autoExecutable) {
|
|
1265
|
-
warnings.push(`Planner selected node type ${nodeType}, but it likely needs additional semantic configuration before automatic creation.`);
|
|
1266
|
-
if (params.selectedNodeType.semanticSignals.length > 0) {
|
|
1267
|
-
planOpenQuestions.push(`Confirm the required configuration for ${nodeType}: ${params.selectedNodeType.semanticSignals.join(", ")}.`);
|
|
1268
|
-
}
|
|
1269
|
-
if (params.selectedNodeType.missingDefaultFields.length > 0) {
|
|
1270
|
-
planOpenQuestions.push(`Provide values for ${nodeType} config fields without defaults: ${params.selectedNodeType.missingDefaultFields.join(", ")}.`);
|
|
1271
|
-
}
|
|
1272
|
-
}
|
|
1273
|
-
const expandedSelectItems = expandWildcardSelectItems(parseResult.selectItems, parseResult.refs, predecessorNodes);
|
|
1274
|
-
const unsupportedItems = expandedSelectItems.filter((item) => !item.supported);
|
|
1275
|
-
if (unsupportedItems.length > 0) {
|
|
1276
|
-
for (const item of unsupportedItems) {
|
|
1277
|
-
warnings.push(item.reason
|
|
1278
|
-
? `${item.expression}: ${item.reason}`
|
|
1279
|
-
: `${item.expression}: unsupported SQL projection in v1`);
|
|
1280
|
-
}
|
|
1281
|
-
}
|
|
1282
|
-
const supportedOutputColumnCount = expandedSelectItems.filter((item) => item.supported && item.outputName).length;
|
|
1283
|
-
if (parseResult.warnings.some((warning) => warning.includes("Could not find a top-level SELECT ... FROM clause"))) {
|
|
1284
|
-
planOpenQuestions.push("Provide a top-level SELECT ... FROM query using direct column projections before creating this pipeline.");
|
|
1285
|
-
}
|
|
1286
|
-
else if (supportedOutputColumnCount === 0) {
|
|
1287
|
-
planOpenQuestions.push("Specify at least one supported projected column before creating this pipeline.");
|
|
1288
|
-
}
|
|
1289
|
-
const predecessorNodeIDs = uniqueInOrder(parseResult.refs.flatMap((ref) => ref.nodeID ? [ref.nodeID] : []));
|
|
1290
|
-
const predecessorNodeNames = parseResult.refs.map((ref) => ref.nodeName);
|
|
1291
|
-
const ready = (params.selectedNodeType?.autoExecutable ?? true) &&
|
|
1292
|
-
predecessorNodeIDs.length > 0 &&
|
|
1293
|
-
supportedOutputColumnCount > 0 &&
|
|
1294
|
-
unsupportedItems.length === 0 &&
|
|
1295
|
-
parseResult.warnings.length === 0 &&
|
|
1296
|
-
planOpenQuestions.length === 0;
|
|
1297
|
-
const name = buildDefaultNodeName(params.targetName, parseResult.refs, params.selectedNodeType?.family ?? null, params.selectedNodeType?.shortName ?? null);
|
|
1298
|
-
const plan = {
|
|
1299
|
-
version: 1,
|
|
1300
|
-
intent: "sql",
|
|
1301
|
-
status: ready ? "ready" : "needs_clarification",
|
|
1302
|
-
workspaceID: params.workspaceID,
|
|
1303
|
-
platform: null,
|
|
1304
|
-
goal: params.goal ?? null,
|
|
1305
|
-
sql: params.sql,
|
|
1306
|
-
nodes: [
|
|
1307
|
-
{
|
|
1308
|
-
planNodeID: "node-1",
|
|
1309
|
-
name,
|
|
1310
|
-
nodeType,
|
|
1311
|
-
nodeTypeFamily: params.selectedNodeType?.family ?? null,
|
|
1312
|
-
predecessorNodeIDs,
|
|
1313
|
-
predecessorPlanNodeIDs: [],
|
|
1314
|
-
predecessorNodeNames,
|
|
1315
|
-
description: params.description ?? null,
|
|
1316
|
-
sql: params.sql,
|
|
1317
|
-
selectItems: expandedSelectItems,
|
|
1318
|
-
outputColumnNames: expandedSelectItems.flatMap((item) => item.outputName ? [item.outputName] : []),
|
|
1319
|
-
configOverrides: params.configOverrides ? deepClone(params.configOverrides) : {},
|
|
1320
|
-
sourceRefs: parseResult.refs.map((ref) => ({
|
|
1321
|
-
locationName: ref.locationName,
|
|
1322
|
-
nodeName: ref.nodeName,
|
|
1323
|
-
alias: ref.alias,
|
|
1324
|
-
nodeID: ref.nodeID,
|
|
1325
|
-
})),
|
|
1326
|
-
joinCondition: buildJoinConditionFromSql(params.sql, parseResult.refs),
|
|
1327
|
-
location: params.location ?? {},
|
|
1328
|
-
requiresFullSetNode: true,
|
|
1329
|
-
...(params.selectedNodeType?.templateDefaults
|
|
1330
|
-
? { templateDefaults: params.selectedNodeType.templateDefaults }
|
|
1331
|
-
: {}),
|
|
1332
|
-
},
|
|
1333
|
-
],
|
|
1334
|
-
assumptions: [
|
|
1335
|
-
`Planner ${params.nodeTypeSelection.strategy} selected ${nodeType} from repo/workspace candidates.`,
|
|
1336
|
-
"The generated plan uses create_workspace_node_from_predecessor followed by set_workspace_node when the selected type is projection-capable.",
|
|
1337
|
-
],
|
|
1338
|
-
openQuestions: planOpenQuestions,
|
|
1339
|
-
warnings: [...parseResult.warnings, ...warnings],
|
|
1340
|
-
supportedNodeTypes: params.nodeTypeSelection.supportedNodeTypes.length > 0
|
|
1341
|
-
? params.nodeTypeSelection.supportedNodeTypes
|
|
1342
|
-
: [nodeType],
|
|
1343
|
-
nodeTypeSelection: params.nodeTypeSelection,
|
|
1344
|
-
};
|
|
1345
|
-
return plan;
|
|
1346
|
-
}
|
|
1347
|
-
/**
|
|
1348
|
-
* Extract CTEs with their bodies from SQL.
|
|
1349
|
-
* Uses quoting-aware scanning to find CTE headers and balanced parentheses,
|
|
1350
|
-
* avoiding false matches inside string literals, quoted identifiers, and comments.
|
|
1351
|
-
*/
|
|
1352
|
-
function extractCtes(sql) {
|
|
1353
|
-
const trimmed = sql.trim();
|
|
1354
|
-
// Check for leading WITH keyword using quoting-aware search
|
|
1355
|
-
const withIdx = findTopLevelKeywordIndex(trimmed, "WITH");
|
|
1356
|
-
if (withIdx !== 0)
|
|
1357
|
-
return [];
|
|
1358
|
-
const ctes = [];
|
|
1359
|
-
// Scan for CTE definitions: name AS ( ... )
|
|
1360
|
-
// After WITH, and after each CTE body followed by a comma, look for: identifier AS (
|
|
1361
|
-
let cursor = withIdx + 4; // skip past "WITH"
|
|
1362
|
-
while (cursor < trimmed.length) {
|
|
1363
|
-
// Skip whitespace and commas between CTEs
|
|
1364
|
-
const rest = trimmed.slice(cursor);
|
|
1365
|
-
const leadingMatch = rest.match(/^[\s,]+/);
|
|
1366
|
-
if (leadingMatch)
|
|
1367
|
-
cursor += leadingMatch[0].length;
|
|
1368
|
-
if (cursor >= trimmed.length)
|
|
1369
|
-
break;
|
|
1370
|
-
// Try to match: identifier AS (
|
|
1371
|
-
// identifier can be unquoted, double-quoted, backtick-quoted, or bracket-quoted
|
|
1372
|
-
const headerMatch = trimmed.slice(cursor).match(/^([A-Za-z_][\w$]*|"[^"]+"|`[^`]+`|\[[^\]]+\])\s+AS\s*\(/i);
|
|
1373
|
-
if (!headerMatch)
|
|
1374
|
-
break; // No more CTE headers — rest is the final SELECT
|
|
1375
|
-
const rawName = stripIdentifierQuotes(headerMatch[1]);
|
|
1376
|
-
const name = rawName.toUpperCase();
|
|
1377
|
-
const bodyStart = cursor + headerMatch[0].length;
|
|
1378
|
-
const body = extractParenBody(trimmed, bodyStart);
|
|
1379
|
-
const closeIdx = findClosingParen(trimmed, bodyStart);
|
|
1380
|
-
if (closeIdx >= 0) {
|
|
1381
|
-
const body = trimmed.slice(bodyStart, closeIdx).trim();
|
|
1382
|
-
const columns = parseCteColumns(body);
|
|
1383
|
-
const whereClause = extractCteWhereClause(body);
|
|
1384
|
-
const sourceTable = extractCteSourceTable(body);
|
|
1385
|
-
const hasGroupBy = findTopLevelKeywordIndex(body, "GROUP") >= 0;
|
|
1386
|
-
const hasJoin = findTopLevelKeywordIndex(body, "JOIN") >= 0;
|
|
1387
|
-
ctes.push({ name, body, columns, whereClause, sourceTable, hasGroupBy, hasJoin });
|
|
1388
|
-
// Move cursor past the closing paren
|
|
1389
|
-
cursor = closeIdx + 1;
|
|
1390
|
-
}
|
|
1391
|
-
else {
|
|
1392
|
-
ctes.push({ name, body: "", columns: [], whereClause: null, sourceTable: null, hasGroupBy: false, hasJoin: false });
|
|
1393
|
-
break;
|
|
1394
|
-
}
|
|
1395
|
-
}
|
|
1396
|
-
return ctes;
|
|
1397
|
-
}
|
|
1398
|
-
/**
|
|
1399
|
-
* Find the index of the closing parenthesis that balances the opening one.
|
|
1400
|
-
* `startIndex` should be the position right after the opening '('.
|
|
1401
|
-
* Returns the index of the closing ')' or -1 if unbalanced.
|
|
1402
|
-
*
|
|
1403
|
-
* Handles all SQL quoting contexts: single-quoted strings, double-quoted
|
|
1404
|
-
* identifiers, backtick-quoted identifiers, bracket-quoted identifiers,
|
|
1405
|
-
* line comments (`--`), and block comments.
|
|
1406
|
-
*/
|
|
1407
|
-
function findClosingParen(sql, startIndex) {
|
|
1408
|
-
let depth = 1;
|
|
1409
|
-
let inSingleQuote = false;
|
|
1410
|
-
let inDoubleQuote = false;
|
|
1411
|
-
let inBacktick = false;
|
|
1412
|
-
let inBracket = false;
|
|
1413
|
-
let inLineComment = false;
|
|
1414
|
-
let inBlockComment = false;
|
|
1415
|
-
for (let i = startIndex; i < sql.length; i++) {
|
|
1416
|
-
const ch = sql[i];
|
|
1417
|
-
const next = sql[i + 1];
|
|
1418
|
-
if (inLineComment) {
|
|
1419
|
-
if (ch === "\n")
|
|
1420
|
-
inLineComment = false;
|
|
1421
|
-
continue;
|
|
1422
|
-
}
|
|
1423
|
-
if (inBlockComment) {
|
|
1424
|
-
if (ch === "*" && next === "/") {
|
|
1425
|
-
inBlockComment = false;
|
|
1426
|
-
i++;
|
|
1427
|
-
}
|
|
1428
|
-
continue;
|
|
1429
|
-
}
|
|
1430
|
-
if (inSingleQuote) {
|
|
1431
|
-
if (ch === "'" && next === "'") {
|
|
1432
|
-
i++;
|
|
1433
|
-
}
|
|
1434
|
-
else if (ch === "'") {
|
|
1435
|
-
inSingleQuote = false;
|
|
1436
|
-
}
|
|
1437
|
-
continue;
|
|
1438
|
-
}
|
|
1439
|
-
if (inDoubleQuote) {
|
|
1440
|
-
if (ch === '"')
|
|
1441
|
-
inDoubleQuote = false;
|
|
1442
|
-
continue;
|
|
1443
|
-
}
|
|
1444
|
-
if (inBacktick) {
|
|
1445
|
-
if (ch === "`")
|
|
1446
|
-
inBacktick = false;
|
|
1447
|
-
continue;
|
|
1448
|
-
}
|
|
1449
|
-
if (inBracket) {
|
|
1450
|
-
if (ch === "]")
|
|
1451
|
-
inBracket = false;
|
|
1452
|
-
continue;
|
|
1453
|
-
}
|
|
1454
|
-
if (ch === "'") {
|
|
1455
|
-
inSingleQuote = true;
|
|
1456
|
-
continue;
|
|
1457
|
-
}
|
|
1458
|
-
if (ch === '"') {
|
|
1459
|
-
inDoubleQuote = true;
|
|
1460
|
-
continue;
|
|
1461
|
-
}
|
|
1462
|
-
if (ch === "`") {
|
|
1463
|
-
inBacktick = true;
|
|
1464
|
-
continue;
|
|
1465
|
-
}
|
|
1466
|
-
if (ch === "[") {
|
|
1467
|
-
inBracket = true;
|
|
1468
|
-
continue;
|
|
1469
|
-
}
|
|
1470
|
-
if (ch === "-" && next === "-") {
|
|
1471
|
-
inLineComment = true;
|
|
1472
|
-
i++;
|
|
1473
|
-
continue;
|
|
1474
|
-
}
|
|
1475
|
-
if (ch === "/" && next === "*") {
|
|
1476
|
-
inBlockComment = true;
|
|
1477
|
-
i++;
|
|
1478
|
-
continue;
|
|
1479
|
-
}
|
|
1480
|
-
if (ch === "(") {
|
|
1481
|
-
depth++;
|
|
1482
|
-
}
|
|
1483
|
-
else if (ch === ")") {
|
|
1484
|
-
depth--;
|
|
1485
|
-
if (depth === 0)
|
|
1486
|
-
return i;
|
|
1487
|
-
}
|
|
1488
|
-
}
|
|
1489
|
-
return -1;
|
|
1490
|
-
}
|
|
1491
|
-
/**
|
|
1492
|
-
* Extract the body between balanced parentheses.
|
|
1493
|
-
* `startIndex` should be the position right after the opening '('.
|
|
1494
|
-
*/
|
|
1495
|
-
function extractParenBody(sql, startIndex) {
|
|
1496
|
-
const closeIdx = findClosingParen(sql, startIndex);
|
|
1497
|
-
if (closeIdx < 0)
|
|
1498
|
-
return null;
|
|
1499
|
-
return sql.slice(startIndex, closeIdx).trim();
|
|
1500
|
-
}
|
|
1501
|
-
/**
|
|
1502
|
-
* Parse a CTE body's SELECT list into columns with transform detection.
|
|
1503
|
-
*
|
|
1504
|
-
* Handles `SELECT * FROM (subquery) WHERE ...` by recursing into the subquery.
|
|
1505
|
-
*/
|
|
1506
|
-
function parseCteColumns(body) {
|
|
1507
|
-
const selectClause = extractSelectClause(body);
|
|
1508
|
-
if (!selectClause)
|
|
1509
|
-
return [];
|
|
1510
|
-
const rawItems = splitTopLevel(selectClause, ",");
|
|
1511
|
-
// Detect "SELECT * FROM (subquery)" — recurse into the subquery
|
|
1512
|
-
if (rawItems.length === 1 && /^\*$/.test(rawItems[0].trim())) {
|
|
1513
|
-
const subqueryBody = extractSubqueryFromFrom(body);
|
|
1514
|
-
if (subqueryBody) {
|
|
1515
|
-
return parseCteColumns(subqueryBody);
|
|
1516
|
-
}
|
|
1517
|
-
return [];
|
|
1518
|
-
}
|
|
1519
|
-
const columns = [];
|
|
1520
|
-
for (const rawItem of rawItems) {
|
|
1521
|
-
const { expression, outputName } = splitExpressionAlias(rawItem);
|
|
1522
|
-
const trimmedExpr = expression.trim();
|
|
1523
|
-
// Skip wildcards
|
|
1524
|
-
if (/^\*$/.test(trimmedExpr) || /\.\*$/.test(trimmedExpr))
|
|
1525
|
-
continue;
|
|
1526
|
-
const bareColName = extractBareColumnName(trimmedExpr)?.toUpperCase() ?? null;
|
|
1527
|
-
const colName = (outputName?.toUpperCase() ?? bareColName);
|
|
1528
|
-
if (!colName)
|
|
1529
|
-
continue;
|
|
1530
|
-
// Detect transforms: anything that isn't a simple column reference,
|
|
1531
|
-
// OR a column rename (AS alias differs from the source column name).
|
|
1532
|
-
// Renames need a transform so preserveColumnLinkage can match by the NEW name
|
|
1533
|
-
// and propagate the expression into sources[*].transform.
|
|
1534
|
-
const isRename = outputName !== null && bareColName !== null && outputName.toUpperCase() !== bareColName;
|
|
1535
|
-
const isTransform = !isSimpleColumnRef(trimmedExpr) || isRename;
|
|
1536
|
-
columns.push({
|
|
1537
|
-
outputName: colName,
|
|
1538
|
-
expression: trimmedExpr,
|
|
1539
|
-
isTransform,
|
|
1540
|
-
});
|
|
1541
|
-
}
|
|
1542
|
-
return columns;
|
|
1543
|
-
}
|
|
1544
|
-
/**
|
|
1545
|
-
* Extract the subquery body from `FROM (subquery)`.
|
|
1546
|
-
* Returns the SQL inside the parentheses, or null if FROM doesn't start with a subquery.
|
|
1547
|
-
*/
|
|
1548
|
-
function extractSubqueryFromFrom(sql) {
|
|
1549
|
-
const fromIndex = findTopLevelKeywordIndex(sql, "from");
|
|
1550
|
-
if (fromIndex < 0)
|
|
1551
|
-
return null;
|
|
1552
|
-
const afterFrom = sql.slice(fromIndex + 4).trimStart();
|
|
1553
|
-
if (!afterFrom.startsWith("("))
|
|
1554
|
-
return null;
|
|
1555
|
-
return extractParenBody(afterFrom, 1);
|
|
1556
|
-
}
|
|
1557
|
-
/**
|
|
1558
|
-
* Check if an expression is a simple column reference (no transform needed).
|
|
1559
|
-
* Simple: `col`, `"col"`, `table.col`, `table."col"`, `"table"."col"`
|
|
1560
|
-
*/
|
|
1561
|
-
function isSimpleColumnRef(expr) {
|
|
1562
|
-
// Simple: identifier or qualified identifier (with optional quotes)
|
|
1563
|
-
return /^(?:[A-Za-z_][\w$]*|"[^"]+")(?:\.(?:[A-Za-z_][\w$]*|"[^"]+"))?$/.test(expr.trim());
|
|
1564
|
-
}
|
|
1565
|
-
/**
|
|
1566
|
-
* Extract a bare column name from a simple reference like `table.col` or `col`.
|
|
1567
|
-
*/
|
|
1568
|
-
function extractBareColumnName(expr) {
|
|
1569
|
-
const match = expr.trim().match(/(?:.*\.)?([A-Za-z_][\w$]*|"[^"]+")$/);
|
|
1570
|
-
if (!match?.[1])
|
|
1571
|
-
return null;
|
|
1572
|
-
return stripIdentifierQuotes(match[1]);
|
|
1573
|
-
}
|
|
1574
|
-
/**
|
|
1575
|
-
* Extract WHERE clause from a CTE body (ignoring subqueries).
|
|
1576
|
-
* Uses quoting-aware keyword search to avoid matching inside strings or comments.
|
|
1577
|
-
*/
|
|
1578
|
-
function extractCteWhereClause(body) {
|
|
1579
|
-
const whereIdx = findTopLevelKeywordIndex(body, "WHERE");
|
|
1580
|
-
if (whereIdx < 0)
|
|
1581
|
-
return null;
|
|
1582
|
-
const afterWhere = whereIdx + 5; // "WHERE".length
|
|
1583
|
-
// Find the first clause terminator after WHERE
|
|
1584
|
-
const terminators = ["GROUP", "ORDER", "HAVING", "LIMIT", "QUALIFY"];
|
|
1585
|
-
let endIdx = body.length;
|
|
1586
|
-
for (const kw of terminators) {
|
|
1587
|
-
const idx = findTopLevelKeywordIndex(body, kw, afterWhere);
|
|
1588
|
-
if (idx >= 0 && idx < endIdx) {
|
|
1589
|
-
endIdx = idx;
|
|
1590
|
-
}
|
|
1591
|
-
}
|
|
1592
|
-
const clause = body.slice(afterWhere, endIdx).trim();
|
|
1593
|
-
return clause || null;
|
|
1594
|
-
}
|
|
1595
|
-
const AGGREGATE_FUNCTIONS = new Set([
|
|
1596
|
-
"COUNT", "SUM", "AVG", "MIN", "MAX",
|
|
1597
|
-
"LISTAGG", "ARRAY_AGG", "MEDIAN", "MODE",
|
|
1598
|
-
"STDDEV", "VARIANCE", "ANY_VALUE",
|
|
1599
|
-
"COUNT_IF", "SUM_IF", "AVG_IF",
|
|
1600
|
-
"APPROX_COUNT_DISTINCT", "HLL",
|
|
1601
|
-
]);
|
|
1602
|
-
function isAggregateFn(name) {
|
|
1603
|
-
return AGGREGATE_FUNCTIONS.has(name.toUpperCase());
|
|
1604
|
-
}
|
|
1605
|
-
/**
|
|
1606
|
-
* Extract the main source table from a CTE body's FROM clause.
|
|
1607
|
-
* Uses quoting-aware keyword search to avoid matching FROM inside strings or comments.
|
|
1608
|
-
*/
|
|
1609
|
-
function extractCteSourceTable(body) {
|
|
1610
|
-
const fromIdx = findTopLevelKeywordIndex(body, "FROM");
|
|
1611
|
-
if (fromIdx < 0)
|
|
1612
|
-
return null;
|
|
1613
|
-
const afterFrom = body.slice(fromIdx + 4).trimStart();
|
|
1614
|
-
const tableMatch = afterFrom.match(/^([A-Za-z_][\w$.]*(?:\.[A-Za-z_][\w$]*)*)/);
|
|
1615
|
-
return tableMatch?.[1]?.toUpperCase() ?? null;
|
|
1616
|
-
}
|
|
1617
|
-
/**
|
|
1618
|
-
* Classify a CTE's pattern to pick the right node type.
|
|
1619
|
-
*/
|
|
1620
|
-
function classifyCtePattern(cte) {
|
|
1621
|
-
if (cte.hasGroupBy)
|
|
1622
|
-
return "aggregation";
|
|
1623
|
-
if (cte.hasJoin)
|
|
1624
|
-
return "multiSource";
|
|
1625
|
-
return "staging";
|
|
1626
|
-
}
|
|
1627
|
-
/**
|
|
1628
|
-
* Build a per-CTE instruction block that tells the agent exactly what transforms
|
|
1629
|
-
* and filters to apply for this CTE.
|
|
1630
|
-
*/
|
|
1631
|
-
function buildCteNodeInstruction(cte, nodeType) {
|
|
1632
|
-
const lines = [];
|
|
1633
|
-
lines.push(`## ${cte.name}`);
|
|
1634
|
-
lines.push(`- nodeType: "${nodeType}"`);
|
|
1635
|
-
if (cte.sourceTable) {
|
|
1636
|
-
lines.push(`- source: ${cte.sourceTable}`);
|
|
1637
|
-
}
|
|
1638
|
-
const transforms = cte.columns.filter((c) => c.isTransform);
|
|
1639
|
-
const passthroughCols = cte.columns.filter((c) => !c.isTransform);
|
|
1640
|
-
if (cte.hasGroupBy) {
|
|
1641
|
-
lines.push(`- AGGREGATION NODE: pass groupByColumns + aggregates directly to create_workspace_node_from_predecessor (single call)`);
|
|
1642
|
-
}
|
|
1643
|
-
else if (cte.columns.length > 0) {
|
|
1644
|
-
lines.push(`- Pass columns array + whereCondition directly to create_workspace_node_from_predecessor (single call)`);
|
|
1645
|
-
}
|
|
1646
|
-
if (transforms.length > 0) {
|
|
1647
|
-
lines.push(`- Column transforms:`);
|
|
1648
|
-
for (const col of transforms) {
|
|
1649
|
-
lines.push(` - ${col.outputName}: ${col.expression}`);
|
|
1650
|
-
}
|
|
1651
|
-
}
|
|
1652
|
-
if (passthroughCols.length > 0) {
|
|
1653
|
-
lines.push(`- Passthrough columns: ${passthroughCols.map((c) => c.outputName).join(", ")}`);
|
|
1654
|
-
}
|
|
1655
|
-
if (cte.columns.length > 0) {
|
|
1656
|
-
lines.push(`- ONLY keep these ${cte.columns.length} columns: ${cte.columns.map((c) => c.outputName).join(", ")}`);
|
|
1657
|
-
}
|
|
1658
|
-
if (cte.whereClause) {
|
|
1659
|
-
lines.push(`- WHERE filter (pass as whereCondition — do NOT construct {{ ref() }}): ${cte.whereClause}`);
|
|
1660
|
-
}
|
|
1661
|
-
if (cte.hasJoin) {
|
|
1662
|
-
lines.push(`- Has JOIN — use apply_join_condition or update_workspace_node for join setup`);
|
|
1663
|
-
}
|
|
1664
|
-
return lines.join("\n");
|
|
1665
|
-
}
|
|
1666
|
-
/**
|
|
1667
|
-
* When the user's SQL contains CTEs, return a plan that instructs the agent
|
|
1668
|
-
* to break each CTE into a separate Coalesce node using the declarative tools.
|
|
1669
|
-
* CTEs are not supported in Coalesce — each CTE should be its own node.
|
|
1670
|
-
*
|
|
1671
|
-
* The plan includes per-CTE structured data: column transforms, WHERE clauses,
|
|
1672
|
-
* source tables, and which columns to keep/remove.
|
|
1673
|
-
*/
|
|
1674
|
-
function buildCtePlan(params, ctes, nodeTypeSelections) {
|
|
1675
|
-
const stagingType = nodeTypeSelections.staging.selectedNodeType ?? "Stage";
|
|
1676
|
-
const multiSourceType = nodeTypeSelections.multiSource.selectedNodeType ?? stagingType;
|
|
1677
|
-
const aggregationType = nodeTypeSelections.aggregation.selectedNodeType ?? stagingType;
|
|
1678
|
-
const typeMap = {
|
|
1679
|
-
staging: stagingType,
|
|
1680
|
-
multiSource: multiSourceType,
|
|
1681
|
-
aggregation: aggregationType,
|
|
1682
|
-
};
|
|
1683
|
-
// Build per-CTE instructions
|
|
1684
|
-
const cteInstructions = [];
|
|
1685
|
-
for (const cte of ctes) {
|
|
1686
|
-
const pattern = classifyCtePattern(cte);
|
|
1687
|
-
const nodeType = typeMap[pattern];
|
|
1688
|
-
cteInstructions.push(buildCteNodeInstruction(cte, nodeType));
|
|
1689
|
-
}
|
|
1690
|
-
// Detect if any CTE references another CTE (pipeline dependency)
|
|
1691
|
-
const cteNameSet = new Set(ctes.map((c) => c.name));
|
|
1692
|
-
const cteDependencies = [];
|
|
1693
|
-
for (const cte of ctes) {
|
|
1694
|
-
const deps = ctes
|
|
1695
|
-
.filter((other) => other.name !== cte.name && cte.body.toUpperCase().includes(other.name))
|
|
1696
|
-
.map((other) => other.name);
|
|
1697
|
-
if (deps.length > 0) {
|
|
1698
|
-
cteDependencies.push(`${cte.name} depends on: ${deps.join(", ")}`);
|
|
1699
|
-
}
|
|
1700
|
-
}
|
|
1701
|
-
// Detect the final SELECT after all CTEs
|
|
1702
|
-
const finalSelectNote = extractFinalSelectFromCteQuery(params.sql ?? "", cteNameSet);
|
|
1703
|
-
const allTransformCount = ctes.reduce((sum, cte) => sum + cte.columns.filter((c) => c.isTransform).length, 0);
|
|
1704
|
-
const allFilterCount = ctes.filter((c) => c.whereClause).length;
|
|
1705
|
-
// Build structured per-CTE summary for easy agent consumption
|
|
1706
|
-
// Includes columnsParam / groupByColumnsParam / aggregatesParam for single-call creation
|
|
1707
|
-
const cteNodeSummary = ctes.map((cte) => {
|
|
1708
|
-
const pattern = classifyCtePattern(cte);
|
|
1709
|
-
const nodeType = typeMap[pattern];
|
|
1710
|
-
const transforms = cte.columns.filter((c) => c.isTransform);
|
|
1711
|
-
const summary = {
|
|
1712
|
-
name: cte.name,
|
|
1713
|
-
nodeType,
|
|
1714
|
-
pattern,
|
|
1715
|
-
sourceTable: cte.sourceTable,
|
|
1716
|
-
columnCount: cte.columns.length,
|
|
1717
|
-
transforms: transforms.map((c) => ({ column: c.outputName, expression: c.expression })),
|
|
1718
|
-
passthroughColumns: cte.columns.filter((c) => !c.isTransform).map((c) => c.outputName),
|
|
1719
|
-
whereFilter: cte.whereClause,
|
|
1720
|
-
hasGroupBy: cte.hasGroupBy,
|
|
1721
|
-
hasJoin: cte.hasJoin,
|
|
1722
|
-
dependsOn: ctes
|
|
1723
|
-
.filter((other) => other.name !== cte.name && cte.body.toUpperCase().includes(other.name))
|
|
1724
|
-
.map((other) => other.name),
|
|
1725
|
-
};
|
|
1726
|
-
// Add structured params for single-call creation
|
|
1727
|
-
if (cte.hasGroupBy && cte.columns.length > 0) {
|
|
1728
|
-
// GROUP BY CTEs: split columns into group-by (passthrough) and aggregates (transforms with agg functions)
|
|
1729
|
-
const groupByCols = [];
|
|
1730
|
-
const aggCols = [];
|
|
1731
|
-
for (const col of cte.columns) {
|
|
1732
|
-
const aggMatch = col.expression.match(/^(\w+)\s*\((.*)\)$/s);
|
|
1733
|
-
if (col.isTransform && aggMatch && isAggregateFn(aggMatch[1])) {
|
|
1734
|
-
aggCols.push({
|
|
1735
|
-
name: col.outputName,
|
|
1736
|
-
function: aggMatch[1].toUpperCase(),
|
|
1737
|
-
expression: aggMatch[2].trim(),
|
|
1738
|
-
});
|
|
1739
|
-
}
|
|
1740
|
-
else {
|
|
1741
|
-
// Non-aggregate columns in a GROUP BY CTE are the GROUP BY dimensions
|
|
1742
|
-
groupByCols.push(col.expression);
|
|
1743
|
-
}
|
|
1744
|
-
}
|
|
1745
|
-
if (groupByCols.length > 0 && aggCols.length > 0) {
|
|
1746
|
-
summary.groupByColumnsParam = groupByCols;
|
|
1747
|
-
summary.aggregatesParam = aggCols;
|
|
1748
|
-
}
|
|
1749
|
-
}
|
|
1750
|
-
else if (cte.columns.length > 0 && !cte.hasJoin) {
|
|
1751
|
-
// Only set columnsParam for single-source CTEs where expressions can be passed directly.
|
|
1752
|
-
// Multi-source JOIN CTEs have SQL aliases (soh.*, sl.*) that don't map to Coalesce node names —
|
|
1753
|
-
// the agent must translate these to "NODE_NAME"."COLUMN" format.
|
|
1754
|
-
summary.columnsParam = cte.columns.map((c) => ({
|
|
1755
|
-
name: c.outputName,
|
|
1756
|
-
...(c.isTransform ? { transform: c.expression } : {}),
|
|
1757
|
-
}));
|
|
1758
|
-
}
|
|
1759
|
-
return summary;
|
|
1760
|
-
});
|
|
1761
|
-
return {
|
|
1762
|
-
version: 1,
|
|
1763
|
-
intent: "sql",
|
|
1764
|
-
status: "needs_clarification",
|
|
1765
|
-
STOP_AND_CONFIRM: `STOP. Present the pipeline summary to the user in a table format and ask for confirmation BEFORE creating any nodes. For EACH node in cteNodeSummary, display: name, the EXACT nodeType string (e.g. "Coalesce-Base-Node-Types:::Stage"), pattern, transforms, and whereFilter. Use the cteNodeSummary array — do NOT paraphrase or simplify the nodeType values. Do NOT proceed until the user explicitly approves.`,
|
|
1766
|
-
workspaceID: params.workspaceID,
|
|
1767
|
-
platform: null,
|
|
1768
|
-
goal: params.goal ?? null,
|
|
1769
|
-
sql: params.sql ?? null,
|
|
1770
|
-
nodes: [],
|
|
1771
|
-
cteNodeSummary,
|
|
1772
|
-
assumptions: [
|
|
1773
|
-
`Parsed ${ctes.length} CTEs with ${allTransformCount} column transforms and ${allFilterCount} WHERE filters.`,
|
|
1774
|
-
`Staging and aggregation CTEs: 1 call per node. Multi-source JOIN CTEs: 2 calls (create + apply_join_condition).`,
|
|
1775
|
-
],
|
|
1776
|
-
openQuestions: [
|
|
1777
|
-
`STOP: Present this pipeline summary to the user and ask "Should I proceed with creating these ${ctes.length} nodes?" Do NOT create nodes until the user confirms.`,
|
|
1778
|
-
`This SQL uses CTEs (WITH ... AS), which Coalesce does not support as a single node. Each CTE must become a separate node.`,
|
|
1779
|
-
`--- PER-CTE INSTRUCTIONS ---\n\n${cteInstructions.join("\n\n")}`,
|
|
1780
|
-
...(cteDependencies.length > 0
|
|
1781
|
-
? [`CTE dependencies (create in order):\n${cteDependencies.map((d) => ` - ${d}`).join("\n")}`]
|
|
1782
|
-
: []),
|
|
1783
|
-
...(finalSelectNote ? [finalSelectNote] : []),
|
|
1784
|
-
`Node type guidance (do NOT use list_workspace_node_types):\n` +
|
|
1785
|
-
`- Staging CTEs (single-source): nodeType "${stagingType}"\n` +
|
|
1786
|
-
`- Join/transform CTEs (multi-source): nodeType "${multiSourceType}"\n` +
|
|
1787
|
-
`- Aggregation CTEs (GROUP BY): nodeType "${aggregationType}"`,
|
|
1788
|
-
`Workflow per CTE:\n` +
|
|
1789
|
-
`create_workspace_node_from_predecessor accepts columns, whereCondition, groupByColumns, and aggregates directly:\n` +
|
|
1790
|
-
`- For staging/transform CTEs (single-source): 1 call — pass columns (from cteNodeSummary.columnsParam) + whereCondition\n` +
|
|
1791
|
-
`- For GROUP BY CTEs: 1 call — pass groupByColumns (from cteNodeSummary.groupByColumnsParam) + aggregates (from cteNodeSummary.aggregatesParam)\n` +
|
|
1792
|
-
`- For multi-source JOIN CTEs: 2 calls — first create_workspace_node_from_predecessor with columns + whereCondition, then apply_join_condition to set up FROM/JOIN/ON\n` +
|
|
1793
|
-
`- Do NOT construct {{ ref() }} syntax — the FROM clause and joins are auto-generated\n` +
|
|
1794
|
-
`- Pass repoPath to each call for automatic config completion`,
|
|
1795
|
-
],
|
|
1796
|
-
warnings: [
|
|
1797
|
-
`SQL contains ${ctes.length} CTEs: ${ctes.map((c) => c.name).join(", ")}. Each must be a separate Coalesce node.` +
|
|
1798
|
-
(allTransformCount > 0 ? ` ${allTransformCount} column transforms detected.` : ``),
|
|
1799
|
-
],
|
|
1800
|
-
supportedNodeTypes: nodeTypeSelections.staging.supportedNodeTypes.length > 0
|
|
1801
|
-
? nodeTypeSelections.staging.supportedNodeTypes
|
|
1802
|
-
: [stagingType],
|
|
1803
|
-
nodeTypeSelection: nodeTypeSelections.staging,
|
|
1804
|
-
};
|
|
1805
|
-
}
|
|
1806
|
-
/**
|
|
1807
|
-
* Extract information about the final SELECT after all CTEs.
|
|
1808
|
-
*/
|
|
1809
|
-
/**
|
|
1810
|
-
* Escape a string for use in a RegExp constructor, ensuring special characters
|
|
1811
|
-
* like `$` in CTE names are treated as literals.
|
|
1812
|
-
*/
|
|
1813
|
-
function escapeRegExp(value) {
|
|
1814
|
-
return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
1815
|
-
}
|
|
1816
|
-
function extractFinalSelectFromCteQuery(sql, cteNames) {
|
|
1817
|
-
// Find the last top-level SELECT using quoting-aware scanning.
|
|
1818
|
-
const trimmed = sql.trim();
|
|
1819
|
-
let lastSelectIdx = -1;
|
|
1820
|
-
scanTopLevel(trimmed, (_char, index, parenDepth) => {
|
|
1821
|
-
if (parenDepth === 0 &&
|
|
1822
|
-
trimmed.slice(index, index + 6).toUpperCase() === "SELECT" &&
|
|
1823
|
-
!isIdentifierChar(trimmed[index - 1]) &&
|
|
1824
|
-
!isIdentifierChar(trimmed[index + 6])) {
|
|
1825
|
-
lastSelectIdx = index;
|
|
1826
|
-
}
|
|
1827
|
-
return true;
|
|
1828
|
-
});
|
|
1829
|
-
if (lastSelectIdx < 0)
|
|
1830
|
-
return null;
|
|
1831
|
-
const finalSelect = trimmed.slice(lastSelectIdx).trim();
|
|
1832
|
-
// Check which CTEs the final SELECT references (escape names for safe regex)
|
|
1833
|
-
const referencedCtes = [...cteNames].filter((name) => new RegExp(`\\b${escapeRegExp(name)}\\b`, "i").test(finalSelect));
|
|
1834
|
-
if (referencedCtes.length === 0)
|
|
1835
|
-
return null;
|
|
1836
|
-
// Check if the final SELECT is just `SELECT * FROM single_cte` — redundant
|
|
1837
|
-
const selectStarFromOne = referencedCtes.length === 1 &&
|
|
1838
|
-
/^SELECT\s+\*\s+FROM\s+\w+\s*;?\s*$/i.test(finalSelect);
|
|
1839
|
-
if (selectStarFromOne) {
|
|
1840
|
-
return (`Final SELECT is just \`SELECT * FROM ${referencedCtes[0]}\` — this is redundant. ` +
|
|
1841
|
-
`The last CTE node (${referencedCtes[0]}) already represents the final output. ` +
|
|
1842
|
-
`Do NOT create an additional node for this.`);
|
|
1843
|
-
}
|
|
1844
|
-
return (`Final output query references: ${referencedCtes.join(", ")}. ` +
|
|
1845
|
-
`Create a final node with these as predecessors. ` +
|
|
1846
|
-
`The final SELECT is:\n${finalSelect.slice(0, 500)}${finalSelect.length > 500 ? "..." : ""}`);
|
|
1847
|
-
}
|
|
1
|
+
// Re-exports for backward compatibility
|
|
2
|
+
export { PipelinePlanSchema, DEFAULT_STAGE_CONFIG } from "./planning-types.js";
|
|
3
|
+
export { normalizeSqlIdentifier, deepClone, normalizeWhitespace, buildSourceDependencyKey, getUniqueSourceDependencies, parseSqlSourceRefs, parseSqlSelectItems, extractCtes, escapeRegExp } from "./sql-parsing.js";
|
|
4
|
+
export { getColumnNamesFromNode, getNodeColumnArray, getColumnSourceNodeIDs, findMatchingBaseColumn, renameSourceMappingEntries, buildStageSourceMappingFromPlan } from "./column-helpers.js";
|
|
5
|
+
export { getWorkspaceNodeTypeInventory } from "./workspace-resolution.js";
|
|
6
|
+
import { extractCtes, parseSqlSourceRefs, parseSqlSelectItems, buildCtePlan, deepClone, } from "./sql-parsing.js";
|
|
7
|
+
import { resolveSqlRefsToWorkspaceNodes, getSourceNodesByID, buildSelectItemsFromSourceNode, buildDefaultNodeName, buildDefaultNodePrefix, buildPlanFromSql, applyWorkspaceNodeTypeValidation, getWorkspaceNodeTypeInventory as getInventory, } from "./workspace-resolution.js";
|
|
8
|
+
import { selectPipelineNodeType } from "./node-type-selection.js";
|
|
9
|
+
import { uniqueInOrder } from "../../utils.js";
|
|
1848
10
|
export async function planPipeline(client, params) {
|
|
1849
11
|
const location = {
|
|
1850
12
|
...(params.locationName ? { locationName: params.locationName } : {}),
|
|
1851
13
|
...(params.database ? { database: params.database } : {}),
|
|
1852
14
|
...(params.schema ? { schema: params.schema } : {}),
|
|
1853
15
|
};
|
|
1854
|
-
const workspaceNodeTypeInventory = await
|
|
16
|
+
const workspaceNodeTypeInventory = await getInventory(client, params.workspaceID);
|
|
1855
17
|
if (params.sql && params.sql.trim().length > 0) {
|
|
1856
18
|
// Detect CTEs — Coalesce does not support CTEs. Each CTE should be a separate node.
|
|
1857
19
|
const ctes = extractCtes(params.sql);
|
|
@@ -2122,112 +284,4 @@ export async function planPipeline(client, params) {
|
|
|
2122
284
|
applyWorkspaceNodeTypeValidation(plan, workspaceNodeTypeInventory, params.targetNodeType);
|
|
2123
285
|
return plan;
|
|
2124
286
|
}
|
|
2125
|
-
export function getNodeColumnArray(node) {
|
|
2126
|
-
const metadata = isPlainObject(node.metadata) ? node.metadata : undefined;
|
|
2127
|
-
if (!Array.isArray(metadata?.columns)) {
|
|
2128
|
-
return [];
|
|
2129
|
-
}
|
|
2130
|
-
return metadata.columns.filter(isPlainObject);
|
|
2131
|
-
}
|
|
2132
|
-
export function getColumnSourceNodeIDs(column) {
|
|
2133
|
-
if (!Array.isArray(column.sources)) {
|
|
2134
|
-
return [];
|
|
2135
|
-
}
|
|
2136
|
-
const ids = new Set();
|
|
2137
|
-
for (const source of column.sources) {
|
|
2138
|
-
if (!isPlainObject(source) || !Array.isArray(source.columnReferences)) {
|
|
2139
|
-
continue;
|
|
2140
|
-
}
|
|
2141
|
-
for (const ref of source.columnReferences) {
|
|
2142
|
-
if (isPlainObject(ref) && typeof ref.nodeID === "string") {
|
|
2143
|
-
ids.add(ref.nodeID);
|
|
2144
|
-
}
|
|
2145
|
-
}
|
|
2146
|
-
}
|
|
2147
|
-
return Array.from(ids);
|
|
2148
|
-
}
|
|
2149
|
-
export function findMatchingBaseColumn(node, selectItem) {
|
|
2150
|
-
const normalizedTargetName = normalizeSqlIdentifier(selectItem.sourceColumnName ?? "");
|
|
2151
|
-
for (const column of getNodeColumnArray(node)) {
|
|
2152
|
-
if (typeof column.name !== "string" ||
|
|
2153
|
-
normalizeSqlIdentifier(column.name) !== normalizedTargetName) {
|
|
2154
|
-
continue;
|
|
2155
|
-
}
|
|
2156
|
-
const sourceNodeIDs = getColumnSourceNodeIDs(column);
|
|
2157
|
-
if (selectItem.sourceNodeID && sourceNodeIDs.includes(selectItem.sourceNodeID)) {
|
|
2158
|
-
return deepClone(column);
|
|
2159
|
-
}
|
|
2160
|
-
if (!selectItem.sourceNodeID) {
|
|
2161
|
-
return deepClone(column);
|
|
2162
|
-
}
|
|
2163
|
-
}
|
|
2164
|
-
return null;
|
|
2165
|
-
}
|
|
2166
|
-
export function renameSourceMappingEntries(node, newName) {
|
|
2167
|
-
const metadata = isPlainObject(node.metadata) ? node.metadata : undefined;
|
|
2168
|
-
if (!metadata || !Array.isArray(metadata.sourceMapping)) {
|
|
2169
|
-
return node;
|
|
2170
|
-
}
|
|
2171
|
-
const previousName = typeof node.name === "string" && node.name.trim().length > 0 ? node.name : null;
|
|
2172
|
-
const updateSingleUnnamedMapping = previousName === null && metadata.sourceMapping.length === 1;
|
|
2173
|
-
return {
|
|
2174
|
-
...node,
|
|
2175
|
-
metadata: {
|
|
2176
|
-
...metadata,
|
|
2177
|
-
sourceMapping: metadata.sourceMapping.map((entry) => {
|
|
2178
|
-
if (!isPlainObject(entry)) {
|
|
2179
|
-
return entry;
|
|
2180
|
-
}
|
|
2181
|
-
const shouldRename = (previousName !== null && entry.name === previousName) ||
|
|
2182
|
-
updateSingleUnnamedMapping;
|
|
2183
|
-
if (!shouldRename) {
|
|
2184
|
-
return entry;
|
|
2185
|
-
}
|
|
2186
|
-
return {
|
|
2187
|
-
...entry,
|
|
2188
|
-
name: newName,
|
|
2189
|
-
};
|
|
2190
|
-
}),
|
|
2191
|
-
},
|
|
2192
|
-
};
|
|
2193
|
-
}
|
|
2194
|
-
export function buildStageSourceMappingFromPlan(currentNode, nodePlan) {
|
|
2195
|
-
const metadata = isPlainObject(currentNode.metadata) ? currentNode.metadata : undefined;
|
|
2196
|
-
const existingEntry = metadata && Array.isArray(metadata.sourceMapping)
|
|
2197
|
-
? metadata.sourceMapping.find(isPlainObject)
|
|
2198
|
-
: undefined;
|
|
2199
|
-
const aliases = {};
|
|
2200
|
-
for (const ref of nodePlan.sourceRefs) {
|
|
2201
|
-
if (!ref.nodeID) {
|
|
2202
|
-
continue;
|
|
2203
|
-
}
|
|
2204
|
-
const alias = ref.alias ?? ref.nodeName;
|
|
2205
|
-
if (nodePlan.sourceRefs.length > 1 || ref.alias) {
|
|
2206
|
-
aliases[alias] = ref.nodeID;
|
|
2207
|
-
}
|
|
2208
|
-
}
|
|
2209
|
-
return [
|
|
2210
|
-
{
|
|
2211
|
-
...(isPlainObject(existingEntry) ? existingEntry : {}),
|
|
2212
|
-
aliases,
|
|
2213
|
-
customSQL: {
|
|
2214
|
-
...(isPlainObject(existingEntry) && isPlainObject(existingEntry.customSQL)
|
|
2215
|
-
? existingEntry.customSQL
|
|
2216
|
-
: {}),
|
|
2217
|
-
customSQL: "",
|
|
2218
|
-
},
|
|
2219
|
-
dependencies: getUniqueSourceDependencies(nodePlan.sourceRefs),
|
|
2220
|
-
join: {
|
|
2221
|
-
...(isPlainObject(existingEntry) && isPlainObject(existingEntry.join)
|
|
2222
|
-
? existingEntry.join
|
|
2223
|
-
: {}),
|
|
2224
|
-
joinCondition: nodePlan.joinCondition ?? "",
|
|
2225
|
-
},
|
|
2226
|
-
name: nodePlan.name,
|
|
2227
|
-
noLinkRefs: isPlainObject(existingEntry) && Array.isArray(existingEntry.noLinkRefs)
|
|
2228
|
-
? existingEntry.noLinkRefs
|
|
2229
|
-
: [],
|
|
2230
|
-
},
|
|
2231
|
-
];
|
|
2232
|
-
}
|
|
2233
287
|
//# sourceMappingURL=planning.js.map
|