coalesce-transform-mcp 0.3.0 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (279) hide show
  1. package/README.md +74 -3
  2. package/dist/client.d.ts.map +1 -1
  3. package/dist/client.js +6 -2
  4. package/dist/client.js.map +1 -1
  5. package/dist/coalesce/api/environments.d.ts +0 -12
  6. package/dist/coalesce/api/environments.d.ts.map +1 -1
  7. package/dist/coalesce/api/environments.js +0 -4
  8. package/dist/coalesce/api/environments.js.map +1 -1
  9. package/dist/coalesce/api/jobs.d.ts +3 -5
  10. package/dist/coalesce/api/jobs.d.ts.map +1 -1
  11. package/dist/coalesce/api/jobs.js +3 -6
  12. package/dist/coalesce/api/jobs.js.map +1 -1
  13. package/dist/coalesce/api/nodes.d.ts +3 -3
  14. package/dist/coalesce/api/nodes.d.ts.map +1 -1
  15. package/dist/coalesce/api/nodes.js +6 -4
  16. package/dist/coalesce/api/nodes.js.map +1 -1
  17. package/dist/coalesce/api/runs.d.ts.map +1 -1
  18. package/dist/coalesce/api/runs.js +11 -1
  19. package/dist/coalesce/api/runs.js.map +1 -1
  20. package/dist/coalesce/api/scan.d.ts +14 -0
  21. package/dist/coalesce/api/scan.d.ts.map +1 -0
  22. package/dist/coalesce/api/scan.js +64 -0
  23. package/dist/coalesce/api/scan.js.map +1 -0
  24. package/dist/coalesce/api/subgraphs.d.ts +3 -2
  25. package/dist/coalesce/api/subgraphs.d.ts.map +1 -1
  26. package/dist/coalesce/api/subgraphs.js +3 -2
  27. package/dist/coalesce/api/subgraphs.js.map +1 -1
  28. package/dist/coalesce/run-schemas.d.ts.map +1 -1
  29. package/dist/coalesce/run-schemas.js +26 -16
  30. package/dist/coalesce/run-schemas.js.map +1 -1
  31. package/dist/coalesce/tool-response.d.ts +1 -13
  32. package/dist/coalesce/tool-response.d.ts.map +1 -1
  33. package/dist/coalesce/tool-response.js +20 -6
  34. package/dist/coalesce/tool-response.js.map +1 -1
  35. package/dist/coalesce/tool-schemas.d.ts +1 -2
  36. package/dist/coalesce/tool-schemas.d.ts.map +1 -1
  37. package/dist/coalesce/tool-schemas.js +368 -5
  38. package/dist/coalesce/tool-schemas.js.map +1 -1
  39. package/dist/coalesce/types.d.ts +8 -0
  40. package/dist/coalesce/types.d.ts.map +1 -1
  41. package/dist/coalesce/types.js +3 -1
  42. package/dist/coalesce/types.js.map +1 -1
  43. package/dist/constants.d.ts +18 -0
  44. package/dist/constants.d.ts.map +1 -0
  45. package/dist/constants.js +21 -0
  46. package/dist/constants.js.map +1 -0
  47. package/dist/mcp/cache.d.ts +2 -1
  48. package/dist/mcp/cache.d.ts.map +1 -1
  49. package/dist/mcp/cache.js +122 -138
  50. package/dist/mcp/cache.js.map +1 -1
  51. package/dist/mcp/environments.d.ts +2 -1
  52. package/dist/mcp/environments.d.ts.map +1 -1
  53. package/dist/mcp/environments.js +56 -112
  54. package/dist/mcp/environments.js.map +1 -1
  55. package/dist/mcp/git-accounts.d.ts +2 -1
  56. package/dist/mcp/git-accounts.d.ts.map +1 -1
  57. package/dist/mcp/git-accounts.js +74 -96
  58. package/dist/mcp/git-accounts.js.map +1 -1
  59. package/dist/mcp/jobs.d.ts +2 -1
  60. package/dist/mcp/jobs.d.ts.map +1 -1
  61. package/dist/mcp/jobs.js +68 -122
  62. package/dist/mcp/jobs.js.map +1 -1
  63. package/dist/mcp/lineage.d.ts +5 -0
  64. package/dist/mcp/lineage.d.ts.map +1 -0
  65. package/dist/mcp/lineage.js +410 -0
  66. package/dist/mcp/lineage.js.map +1 -0
  67. package/dist/mcp/node-type-corpus.d.ts +2 -1
  68. package/dist/mcp/node-type-corpus.d.ts.map +1 -1
  69. package/dist/mcp/node-type-corpus.js +148 -151
  70. package/dist/mcp/node-type-corpus.js.map +1 -1
  71. package/dist/mcp/nodes.d.ts +2 -1
  72. package/dist/mcp/nodes.d.ts.map +1 -1
  73. package/dist/mcp/nodes.js +358 -464
  74. package/dist/mcp/nodes.js.map +1 -1
  75. package/dist/mcp/pipelines.d.ts +2 -1
  76. package/dist/mcp/pipelines.d.ts.map +1 -1
  77. package/dist/mcp/pipelines.js +514 -314
  78. package/dist/mcp/pipelines.js.map +1 -1
  79. package/dist/mcp/projects.d.ts +2 -1
  80. package/dist/mcp/projects.d.ts.map +1 -1
  81. package/dist/mcp/projects.js +66 -100
  82. package/dist/mcp/projects.js.map +1 -1
  83. package/dist/mcp/repo-node-types.d.ts +2 -1
  84. package/dist/mcp/repo-node-types.d.ts.map +1 -1
  85. package/dist/mcp/repo-node-types.js +92 -121
  86. package/dist/mcp/repo-node-types.js.map +1 -1
  87. package/dist/mcp/runs.d.ts +3 -2
  88. package/dist/mcp/runs.d.ts.map +1 -1
  89. package/dist/mcp/runs.js +93 -148
  90. package/dist/mcp/runs.js.map +1 -1
  91. package/dist/mcp/skills.d.ts +13 -0
  92. package/dist/mcp/skills.d.ts.map +1 -0
  93. package/dist/mcp/skills.js +85 -0
  94. package/dist/mcp/skills.js.map +1 -0
  95. package/dist/mcp/subgraphs.d.ts +2 -1
  96. package/dist/mcp/subgraphs.d.ts.map +1 -1
  97. package/dist/mcp/subgraphs.js +61 -98
  98. package/dist/mcp/subgraphs.js.map +1 -1
  99. package/dist/mcp/tool-helpers.d.ts +37 -0
  100. package/dist/mcp/tool-helpers.d.ts.map +1 -0
  101. package/dist/mcp/tool-helpers.js +82 -0
  102. package/dist/mcp/tool-helpers.js.map +1 -0
  103. package/dist/mcp/users.d.ts +2 -1
  104. package/dist/mcp/users.d.ts.map +1 -1
  105. package/dist/mcp/users.js +92 -145
  106. package/dist/mcp/users.js.map +1 -1
  107. package/dist/mcp/workshop.d.ts +2 -1
  108. package/dist/mcp/workshop.d.ts.map +1 -1
  109. package/dist/mcp/workshop.js +66 -101
  110. package/dist/mcp/workshop.js.map +1 -1
  111. package/dist/mcp/workspaces.d.ts +2 -1
  112. package/dist/mcp/workspaces.d.ts.map +1 -1
  113. package/dist/mcp/workspaces.js +19 -34
  114. package/dist/mcp/workspaces.js.map +1 -1
  115. package/dist/prompts/index.d.ts.map +1 -1
  116. package/dist/prompts/index.js +85 -0
  117. package/dist/prompts/index.js.map +1 -1
  118. package/dist/resources/context/pipeline-workshop-guide.md +1 -1
  119. package/dist/resources/context/tool-usage.md +7 -0
  120. package/dist/resources/index.d.ts +13 -0
  121. package/dist/resources/index.d.ts.map +1 -1
  122. package/dist/resources/index.js +105 -5
  123. package/dist/resources/index.js.map +1 -1
  124. package/dist/schemas/node-payloads.d.ts +2 -2
  125. package/dist/server.d.ts +2 -1
  126. package/dist/server.d.ts.map +1 -1
  127. package/dist/server.js +158 -41
  128. package/dist/server.js.map +1 -1
  129. package/dist/services/cache/snapshots.d.ts.map +1 -1
  130. package/dist/services/cache/snapshots.js +9 -5
  131. package/dist/services/cache/snapshots.js.map +1 -1
  132. package/dist/services/config/schema-resolver.d.ts.map +1 -1
  133. package/dist/services/config/schema-resolver.js +3 -6
  134. package/dist/services/config/schema-resolver.js.map +1 -1
  135. package/dist/services/lineage/lineage-cache.d.ts +53 -0
  136. package/dist/services/lineage/lineage-cache.d.ts.map +1 -0
  137. package/dist/services/lineage/lineage-cache.js +335 -0
  138. package/dist/services/lineage/lineage-cache.js.map +1 -0
  139. package/dist/services/lineage/lineage-documentation.d.ts +29 -0
  140. package/dist/services/lineage/lineage-documentation.d.ts.map +1 -0
  141. package/dist/services/lineage/lineage-documentation.js +80 -0
  142. package/dist/services/lineage/lineage-documentation.js.map +1 -0
  143. package/dist/services/lineage/lineage-propagation.d.ts +47 -0
  144. package/dist/services/lineage/lineage-propagation.d.ts.map +1 -0
  145. package/dist/services/lineage/lineage-propagation.js +176 -0
  146. package/dist/services/lineage/lineage-propagation.js.map +1 -0
  147. package/dist/services/lineage/lineage-search.d.ts +33 -0
  148. package/dist/services/lineage/lineage-search.d.ts.map +1 -0
  149. package/dist/services/lineage/lineage-search.js +133 -0
  150. package/dist/services/lineage/lineage-search.js.map +1 -0
  151. package/dist/services/lineage/lineage-traversal.d.ts +34 -0
  152. package/dist/services/lineage/lineage-traversal.d.ts.map +1 -0
  153. package/dist/services/lineage/lineage-traversal.js +283 -0
  154. package/dist/services/lineage/lineage-traversal.js.map +1 -0
  155. package/dist/services/pipelines/clause-extraction.d.ts +3 -0
  156. package/dist/services/pipelines/clause-extraction.d.ts.map +1 -0
  157. package/dist/services/pipelines/clause-extraction.js +27 -0
  158. package/dist/services/pipelines/clause-extraction.js.map +1 -0
  159. package/dist/services/pipelines/column-helpers.d.ts +8 -0
  160. package/dist/services/pipelines/column-helpers.d.ts.map +1 -0
  161. package/dist/services/pipelines/column-helpers.js +125 -0
  162. package/dist/services/pipelines/column-helpers.js.map +1 -0
  163. package/dist/services/pipelines/cte-parsing.d.ts +29 -0
  164. package/dist/services/pipelines/cte-parsing.d.ts.map +1 -0
  165. package/dist/services/pipelines/cte-parsing.js +160 -0
  166. package/dist/services/pipelines/cte-parsing.js.map +1 -0
  167. package/dist/services/pipelines/cte-planning.d.ts +22 -0
  168. package/dist/services/pipelines/cte-planning.d.ts.map +1 -0
  169. package/dist/services/pipelines/cte-planning.js +206 -0
  170. package/dist/services/pipelines/cte-planning.js.map +1 -0
  171. package/dist/services/pipelines/execution.d.ts.map +1 -1
  172. package/dist/services/pipelines/execution.js +0 -1
  173. package/dist/services/pipelines/execution.js.map +1 -1
  174. package/dist/services/pipelines/intent-parsing.d.ts +24 -0
  175. package/dist/services/pipelines/intent-parsing.d.ts.map +1 -0
  176. package/dist/services/pipelines/intent-parsing.js +245 -0
  177. package/dist/services/pipelines/intent-parsing.js.map +1 -0
  178. package/dist/services/pipelines/intent-resolution.d.ts +24 -0
  179. package/dist/services/pipelines/intent-resolution.d.ts.map +1 -0
  180. package/dist/services/pipelines/intent-resolution.js +141 -0
  181. package/dist/services/pipelines/intent-resolution.js.map +1 -0
  182. package/dist/services/pipelines/intent.d.ts +4 -45
  183. package/dist/services/pipelines/intent.d.ts.map +1 -1
  184. package/dist/services/pipelines/intent.js +14 -408
  185. package/dist/services/pipelines/intent.js.map +1 -1
  186. package/dist/services/pipelines/node-type-candidates.d.ts +6 -0
  187. package/dist/services/pipelines/node-type-candidates.d.ts.map +1 -0
  188. package/dist/services/pipelines/node-type-candidates.js +165 -0
  189. package/dist/services/pipelines/node-type-candidates.js.map +1 -0
  190. package/dist/services/pipelines/node-type-intent.d.ts +1 -5
  191. package/dist/services/pipelines/node-type-intent.d.ts.map +1 -1
  192. package/dist/services/pipelines/node-type-intent.js +1 -5
  193. package/dist/services/pipelines/node-type-intent.js.map +1 -1
  194. package/dist/services/pipelines/node-type-scoring.d.ts +13 -0
  195. package/dist/services/pipelines/node-type-scoring.d.ts.map +1 -0
  196. package/dist/services/pipelines/node-type-scoring.js +322 -0
  197. package/dist/services/pipelines/node-type-scoring.js.map +1 -0
  198. package/dist/services/pipelines/node-type-selection.d.ts +22 -2
  199. package/dist/services/pipelines/node-type-selection.d.ts.map +1 -1
  200. package/dist/services/pipelines/node-type-selection.js +16 -538
  201. package/dist/services/pipelines/node-type-selection.js.map +1 -1
  202. package/dist/services/pipelines/plan-builder.d.ts +33 -0
  203. package/dist/services/pipelines/plan-builder.d.ts.map +1 -0
  204. package/dist/services/pipelines/plan-builder.js +224 -0
  205. package/dist/services/pipelines/plan-builder.js.map +1 -0
  206. package/dist/services/pipelines/planning-types.d.ts +543 -0
  207. package/dist/services/pipelines/planning-types.d.ts.map +1 -0
  208. package/dist/services/pipelines/planning-types.js +85 -0
  209. package/dist/services/pipelines/planning-types.js.map +1 -0
  210. package/dist/services/pipelines/planning.d.ts +8 -537
  211. package/dist/services/pipelines/planning.d.ts.map +1 -1
  212. package/dist/services/pipelines/planning.js +10 -1956
  213. package/dist/services/pipelines/planning.js.map +1 -1
  214. package/dist/services/pipelines/review.d.ts.map +1 -1
  215. package/dist/services/pipelines/review.js +3 -8
  216. package/dist/services/pipelines/review.js.map +1 -1
  217. package/dist/services/pipelines/select-parsing.d.ts +7 -0
  218. package/dist/services/pipelines/select-parsing.d.ts.map +1 -0
  219. package/dist/services/pipelines/select-parsing.js +185 -0
  220. package/dist/services/pipelines/select-parsing.js.map +1 -0
  221. package/dist/services/pipelines/source-parsing.d.ts +8 -0
  222. package/dist/services/pipelines/source-parsing.d.ts.map +1 -0
  223. package/dist/services/pipelines/source-parsing.js +151 -0
  224. package/dist/services/pipelines/source-parsing.js.map +1 -0
  225. package/dist/services/pipelines/sql-parsing.d.ts +8 -0
  226. package/dist/services/pipelines/sql-parsing.d.ts.map +1 -0
  227. package/dist/services/pipelines/sql-parsing.js +9 -0
  228. package/dist/services/pipelines/sql-parsing.js.map +1 -0
  229. package/dist/services/pipelines/sql-tokenizer.d.ts +42 -0
  230. package/dist/services/pipelines/sql-tokenizer.d.ts.map +1 -0
  231. package/dist/services/pipelines/sql-tokenizer.js +493 -0
  232. package/dist/services/pipelines/sql-tokenizer.js.map +1 -0
  233. package/dist/services/pipelines/sql-utils.d.ts +30 -0
  234. package/dist/services/pipelines/sql-utils.d.ts.map +1 -0
  235. package/dist/services/pipelines/sql-utils.js +62 -0
  236. package/dist/services/pipelines/sql-utils.js.map +1 -0
  237. package/dist/services/pipelines/workshop.d.ts.map +1 -1
  238. package/dist/services/pipelines/workshop.js +53 -25
  239. package/dist/services/pipelines/workshop.js.map +1 -1
  240. package/dist/services/pipelines/workspace-resolution.d.ts +18 -0
  241. package/dist/services/pipelines/workspace-resolution.d.ts.map +1 -0
  242. package/dist/services/pipelines/workspace-resolution.js +279 -0
  243. package/dist/services/pipelines/workspace-resolution.js.map +1 -0
  244. package/dist/services/runs/diagnostics.d.ts.map +1 -1
  245. package/dist/services/runs/diagnostics.js +3 -8
  246. package/dist/services/runs/diagnostics.js.map +1 -1
  247. package/dist/services/shared/elicitation.d.ts +14 -0
  248. package/dist/services/shared/elicitation.d.ts.map +1 -0
  249. package/dist/services/shared/elicitation.js +56 -0
  250. package/dist/services/shared/elicitation.js.map +1 -0
  251. package/dist/services/workspace/node-creation.d.ts.map +1 -1
  252. package/dist/services/workspace/node-creation.js +5 -1
  253. package/dist/services/workspace/node-creation.js.map +1 -1
  254. package/dist/services/workspace/node-update-helpers.d.ts.map +1 -1
  255. package/dist/services/workspace/node-update-helpers.js +3 -8
  256. package/dist/services/workspace/node-update-helpers.js.map +1 -1
  257. package/dist/utils.d.ts +11 -0
  258. package/dist/utils.d.ts.map +1 -1
  259. package/dist/utils.js +20 -1
  260. package/dist/utils.js.map +1 -1
  261. package/dist/workflows/get-environment-health.d.ts +49 -0
  262. package/dist/workflows/get-environment-health.d.ts.map +1 -0
  263. package/dist/workflows/get-environment-health.js +310 -0
  264. package/dist/workflows/get-environment-health.js.map +1 -0
  265. package/dist/workflows/get-environment-overview.d.ts +2 -1
  266. package/dist/workflows/get-environment-overview.d.ts.map +1 -1
  267. package/dist/workflows/get-environment-overview.js +13 -19
  268. package/dist/workflows/get-environment-overview.js.map +1 -1
  269. package/dist/workflows/get-run-details.d.ts +2 -2
  270. package/dist/workflows/get-run-details.d.ts.map +1 -1
  271. package/dist/workflows/get-run-details.js +14 -19
  272. package/dist/workflows/get-run-details.js.map +1 -1
  273. package/dist/workflows/retry-and-wait.d.ts.map +1 -1
  274. package/dist/workflows/retry-and-wait.js +3 -2
  275. package/dist/workflows/retry-and-wait.js.map +1 -1
  276. package/dist/workflows/run-and-wait.d.ts.map +1 -1
  277. package/dist/workflows/run-and-wait.js +3 -2
  278. package/dist/workflows/run-and-wait.js.map +1 -1
  279. package/package.json +2 -2
@@ -1,1857 +1,19 @@
1
- import { z } from "zod";
2
- import { CoalesceApiError } from "../../client.js";
3
- import { getWorkspaceNode, listWorkspaceNodes, } from "../../coalesce/api/nodes.js";
4
- import { listWorkspaceNodeTypes } from "../workspace/mutations.js";
5
- import { isPlainObject, uniqueInOrder } from "../../utils.js";
6
- import { NodeConfigInputSchema } from "../../schemas/node-payloads.js";
7
- import { selectPipelineNodeType, PIPELINE_NODE_TYPE_FAMILIES, } from "./node-type-selection.js";
8
- const PlannedSelectItemSchema = z
9
- .object({
10
- expression: z.string(),
11
- outputName: z.string().nullable(),
12
- sourceNodeAlias: z.string().nullable(),
13
- sourceNodeName: z.string().nullable(),
14
- sourceNodeID: z.string().nullable(),
15
- sourceColumnName: z.string().nullable(),
16
- kind: z.enum(["column", "expression"]),
17
- supported: z.boolean(),
18
- reason: z.string().optional(),
19
- })
20
- .strict();
21
- const PlannedPipelineNodeSchema = z
22
- .object({
23
- planNodeID: z.string(),
24
- name: z.string(),
25
- nodeType: z.string(),
26
- nodeTypeFamily: z
27
- .enum(PIPELINE_NODE_TYPE_FAMILIES)
28
- .nullable()
29
- .optional(),
30
- predecessorNodeIDs: z.array(z.string()),
31
- predecessorPlanNodeIDs: z.array(z.string()),
32
- predecessorNodeNames: z.array(z.string()),
33
- description: z.string().nullable(),
34
- sql: z.string().nullable(),
35
- selectItems: z.array(PlannedSelectItemSchema),
36
- outputColumnNames: z.array(z.string()),
37
- configOverrides: NodeConfigInputSchema,
38
- sourceRefs: z.array(z
39
- .object({
40
- locationName: z.string(),
41
- nodeName: z.string(),
42
- alias: z.string().nullable(),
43
- nodeID: z.string().nullable(),
44
- })
45
- .strict()),
46
- joinCondition: z.string().nullable(),
47
- location: z
48
- .object({
49
- locationName: z.string().optional(),
50
- database: z.string().optional(),
51
- schema: z.string().optional(),
52
- })
53
- .strict(),
54
- requiresFullSetNode: z.boolean(),
55
- templateDefaults: z
56
- .object({
57
- inferredTopLevelFields: z.record(z.unknown()),
58
- inferredConfig: NodeConfigInputSchema,
59
- })
60
- .strict()
61
- .optional(),
62
- })
63
- .strict();
64
- export const PipelinePlanSchema = z
65
- .object({
66
- version: z.literal(1),
67
- intent: z.enum(["sql", "goal"]),
68
- status: z.enum(["ready", "needs_clarification"]),
69
- workspaceID: z.string(),
70
- platform: z.string().nullable(),
71
- goal: z.string().nullable(),
72
- sql: z.string().nullable(),
73
- nodes: z.array(PlannedPipelineNodeSchema),
74
- assumptions: z.array(z.string()),
75
- openQuestions: z.array(z.string()),
76
- warnings: z.array(z.string()),
77
- supportedNodeTypes: z.array(z.string()),
78
- nodeTypeSelection: z.record(z.unknown()).optional(),
79
- cteNodeSummary: z.array(z.record(z.unknown())).optional(),
80
- STOP_AND_CONFIRM: z.string().optional(),
81
- })
82
- .strict();
83
- const WORKSPACE_NODE_PAGE_LIMIT = 200;
84
- export const DEFAULT_STAGE_CONFIG = {
85
- postSQL: "",
86
- preSQL: "",
87
- testsEnabled: true,
88
- };
89
- export function normalizeSqlIdentifier(identifier) {
90
- return identifier.trim().replace(/^["`[]|["`\]]$/g, "").toUpperCase();
91
- }
92
- export function deepClone(value) {
93
- return JSON.parse(JSON.stringify(value));
94
- }
95
- export function normalizeWhitespace(value) {
96
- return value.replace(/\s+/g, " ").trim();
97
- }
98
- export function buildSourceDependencyKey(locationName, nodeName) {
99
- return `${normalizeSqlIdentifier(locationName ?? "")}::${normalizeSqlIdentifier(nodeName)}`;
100
- }
101
- export function getUniqueSourceDependencies(sourceRefs) {
102
- const seen = new Set();
103
- const dependencies = [];
104
- for (const ref of sourceRefs) {
105
- const key = buildSourceDependencyKey(ref.locationName, ref.nodeName);
106
- if (seen.has(key)) {
107
- continue;
108
- }
109
- seen.add(key);
110
- dependencies.push({
111
- locationName: ref.locationName,
112
- nodeName: ref.nodeName,
113
- });
114
- }
115
- return dependencies;
116
- }
117
- function isIdentifierChar(char) {
118
- return !!char && /[A-Za-z0-9_$]/.test(char);
119
- }
120
- function stripIdentifierQuotes(identifier) {
121
- const trimmed = identifier.trim();
122
- if ((trimmed.startsWith('"') && trimmed.endsWith('"')) ||
123
- (trimmed.startsWith("`") && trimmed.endsWith("`")) ||
124
- (trimmed.startsWith("[") && trimmed.endsWith("]"))) {
125
- return trimmed.slice(1, -1);
126
- }
127
- return trimmed;
128
- }
129
- function findTopLevelKeywordIndex(sql, keyword, startIndex = 0) {
130
- const lowerKeyword = keyword.toLowerCase();
131
- let parenDepth = 0;
132
- let inSingleQuote = false;
133
- let inDoubleQuote = false;
134
- let inBacktick = false;
135
- let inBracket = false;
136
- let inLineComment = false;
137
- let inBlockComment = false;
138
- for (let index = startIndex; index < sql.length; index += 1) {
139
- const char = sql[index];
140
- const next = sql[index + 1];
141
- if (inLineComment) {
142
- if (char === "\n") {
143
- inLineComment = false;
144
- }
145
- continue;
146
- }
147
- if (inBlockComment) {
148
- if (char === "*" && next === "/") {
149
- inBlockComment = false;
150
- index += 1;
151
- }
152
- continue;
153
- }
154
- if (inSingleQuote) {
155
- if (char === "'" && next === "'") {
156
- index += 1;
157
- }
158
- else if (char === "'") {
159
- inSingleQuote = false;
160
- }
161
- continue;
162
- }
163
- if (inDoubleQuote) {
164
- if (char === '"') {
165
- inDoubleQuote = false;
166
- }
167
- continue;
168
- }
169
- if (inBacktick) {
170
- if (char === "`") {
171
- inBacktick = false;
172
- }
173
- continue;
174
- }
175
- if (inBracket) {
176
- if (char === "]") {
177
- inBracket = false;
178
- }
179
- continue;
180
- }
181
- if (char === "'") {
182
- inSingleQuote = true;
183
- continue;
184
- }
185
- if (char === '"') {
186
- inDoubleQuote = true;
187
- continue;
188
- }
189
- if (char === "`") {
190
- inBacktick = true;
191
- continue;
192
- }
193
- if (char === "[") {
194
- inBracket = true;
195
- continue;
196
- }
197
- if (char === "-" && next === "-") {
198
- inLineComment = true;
199
- index += 1;
200
- continue;
201
- }
202
- if (char === "/" && next === "*") {
203
- inBlockComment = true;
204
- index += 1;
205
- continue;
206
- }
207
- if (char === "(") {
208
- parenDepth += 1;
209
- continue;
210
- }
211
- if (char === ")" && parenDepth > 0) {
212
- parenDepth -= 1;
213
- continue;
214
- }
215
- if (parenDepth !== 0) {
216
- continue;
217
- }
218
- if (sql.slice(index, index + lowerKeyword.length).toLowerCase() === lowerKeyword &&
219
- !isIdentifierChar(sql[index - 1]) &&
220
- !isIdentifierChar(sql[index + lowerKeyword.length])) {
221
- return index;
222
- }
223
- }
224
- return -1;
225
- }
226
- /**
227
- * Iterates through a SQL string character-by-character, tracking quoting and
228
- * parenthesis depth. For each top-level (unquoted, depth-0) character the
229
- * callback receives the character, its index, and the current paren depth.
230
- * The callback returns `true` to continue or `false` to stop early.
231
- *
232
- * The scanner handles: single-quoted strings (with '' escapes), double-quoted
233
- * identifiers, backtick-quoted identifiers, bracket-quoted identifiers, block
234
- * comments, and line comments.
235
- */
236
- function scanTopLevel(value, callback) {
237
- let parenDepth = 0;
238
- let inSingleQuote = false;
239
- let inDoubleQuote = false;
240
- let inBacktick = false;
241
- let inBracket = false;
242
- let inLineComment = false;
243
- let inBlockComment = false;
244
- for (let index = 0; index < value.length; index += 1) {
245
- const char = value[index];
246
- const next = value[index + 1];
247
- if (inLineComment) {
248
- if (char === "\n") {
249
- inLineComment = false;
250
- }
251
- continue;
252
- }
253
- if (inBlockComment) {
254
- if (char === "*" && next === "/") {
255
- inBlockComment = false;
256
- index += 1;
257
- }
258
- continue;
259
- }
260
- if (inSingleQuote) {
261
- if (char === "'" && next === "'") {
262
- index += 1;
263
- }
264
- else if (char === "'") {
265
- inSingleQuote = false;
266
- }
267
- continue;
268
- }
269
- if (inDoubleQuote) {
270
- if (char === '"')
271
- inDoubleQuote = false;
272
- continue;
273
- }
274
- if (inBacktick) {
275
- if (char === "`")
276
- inBacktick = false;
277
- continue;
278
- }
279
- if (inBracket) {
280
- if (char === "]")
281
- inBracket = false;
282
- continue;
283
- }
284
- if (char === "'") {
285
- inSingleQuote = true;
286
- continue;
287
- }
288
- if (char === '"') {
289
- inDoubleQuote = true;
290
- continue;
291
- }
292
- if (char === "`") {
293
- inBacktick = true;
294
- continue;
295
- }
296
- if (char === "[") {
297
- inBracket = true;
298
- continue;
299
- }
300
- if (char === "-" && next === "-") {
301
- inLineComment = true;
302
- index += 1;
303
- continue;
304
- }
305
- if (char === "/" && next === "*") {
306
- inBlockComment = true;
307
- index += 1;
308
- continue;
309
- }
310
- if (char === "(") {
311
- parenDepth += 1;
312
- continue;
313
- }
314
- if (char === ")" && parenDepth > 0) {
315
- parenDepth -= 1;
316
- continue;
317
- }
318
- if (!callback(char, index, parenDepth)) {
319
- return;
320
- }
321
- }
322
- }
323
- function splitTopLevel(value, delimiter) {
324
- const parts = [];
325
- let start = 0;
326
- scanTopLevel(value, (char, index, parenDepth) => {
327
- if (char === delimiter && parenDepth === 0) {
328
- parts.push(value.slice(start, index).trim());
329
- start = index + 1;
330
- }
331
- return true;
332
- });
333
- const tail = value.slice(start).trim();
334
- if (tail.length > 0) {
335
- parts.push(tail);
336
- }
337
- return parts;
338
- }
339
- function tokenizeTopLevelWhitespace(value) {
340
- const parts = [];
341
- let tokenStart = null;
342
- let tokenEnd = 0;
343
- let tokenText = "";
344
- let parenDepth = 0;
345
- let inSingleQuote = false;
346
- let inDoubleQuote = false;
347
- let inBacktick = false;
348
- let inBracket = false;
349
- let inLineComment = false;
350
- let inBlockComment = false;
351
- const appendChar = (char, index) => {
352
- if (tokenStart === null) {
353
- tokenStart = index;
354
- }
355
- tokenText += char;
356
- tokenEnd = index + 1;
357
- };
358
- const flushToken = () => {
359
- if (tokenStart === null || tokenText.length === 0) {
360
- tokenStart = null;
361
- tokenEnd = 0;
362
- tokenText = "";
363
- return;
364
- }
365
- parts.push({
366
- text: tokenText,
367
- start: tokenStart,
368
- end: tokenEnd,
369
- });
370
- tokenStart = null;
371
- tokenEnd = 0;
372
- tokenText = "";
373
- };
374
- for (let index = 0; index < value.length; index += 1) {
375
- const char = value[index];
376
- const next = value[index + 1];
377
- if (inLineComment) {
378
- if (char === "\n") {
379
- inLineComment = false;
380
- }
381
- continue;
382
- }
383
- if (inBlockComment) {
384
- if (char === "*" && next === "/") {
385
- inBlockComment = false;
386
- index += 1;
387
- }
388
- continue;
389
- }
390
- if (inSingleQuote) {
391
- appendChar(char, index);
392
- if (char === "'" && next === "'") {
393
- appendChar(next, index + 1);
394
- index += 1;
395
- }
396
- else if (char === "'") {
397
- inSingleQuote = false;
398
- }
399
- continue;
400
- }
401
- if (inDoubleQuote) {
402
- appendChar(char, index);
403
- if (char === '"') {
404
- inDoubleQuote = false;
405
- }
406
- continue;
407
- }
408
- if (inBacktick) {
409
- appendChar(char, index);
410
- if (char === "`") {
411
- inBacktick = false;
412
- }
413
- continue;
414
- }
415
- if (inBracket) {
416
- appendChar(char, index);
417
- if (char === "]") {
418
- inBracket = false;
419
- }
420
- continue;
421
- }
422
- if (char === "-" && next === "-" && parenDepth === 0) {
423
- flushToken();
424
- inLineComment = true;
425
- index += 1;
426
- continue;
427
- }
428
- if (char === "/" && next === "*" && parenDepth === 0) {
429
- flushToken();
430
- inBlockComment = true;
431
- index += 1;
432
- continue;
433
- }
434
- if (/\s/u.test(char) && parenDepth === 0) {
435
- flushToken();
436
- continue;
437
- }
438
- if (char === "'") {
439
- appendChar(char, index);
440
- inSingleQuote = true;
441
- continue;
442
- }
443
- if (char === '"') {
444
- appendChar(char, index);
445
- inDoubleQuote = true;
446
- continue;
447
- }
448
- if (char === "`") {
449
- appendChar(char, index);
450
- inBacktick = true;
451
- continue;
452
- }
453
- if (char === "[") {
454
- appendChar(char, index);
455
- inBracket = true;
456
- continue;
457
- }
458
- if (char === "(") {
459
- appendChar(char, index);
460
- parenDepth += 1;
461
- continue;
462
- }
463
- if (char === ")" && parenDepth > 0) {
464
- parenDepth -= 1;
465
- appendChar(char, index);
466
- continue;
467
- }
468
- appendChar(char, index);
469
- }
470
- flushToken();
471
- return parts;
472
- }
473
- function splitTopLevelWhitespace(value) {
474
- return tokenizeTopLevelWhitespace(value).map((part) => part.text);
475
- }
476
- function skipSqlTrivia(value, index) {
477
- let nextIndex = index;
478
- while (nextIndex < value.length) {
479
- if (/\s/u.test(value[nextIndex] ?? "")) {
480
- nextIndex += 1;
481
- continue;
482
- }
483
- if (value[nextIndex] === "-" && value[nextIndex + 1] === "-") {
484
- nextIndex += 2;
485
- while (nextIndex < value.length && value[nextIndex] !== "\n") {
486
- nextIndex += 1;
487
- }
488
- continue;
489
- }
490
- if (value[nextIndex] === "/" && value[nextIndex + 1] === "*") {
491
- const blockEnd = value.indexOf("*/", nextIndex + 2);
492
- nextIndex = blockEnd >= 0 ? blockEnd + 2 : value.length;
493
- continue;
494
- }
495
- break;
496
- }
497
- return nextIndex;
498
- }
499
- function matchesKeywordAt(value, index, keyword) {
500
- return (value.slice(index, index + keyword.length).toLowerCase() === keyword &&
501
- !isIdentifierChar(value[index - 1]) &&
502
- !isIdentifierChar(value[index + keyword.length]));
503
- }
504
- function extractSelectClause(sql) {
505
- const selectIndex = findTopLevelKeywordIndex(sql, "select");
506
- if (selectIndex < 0) {
507
- return null;
508
- }
509
- const fromIndex = findTopLevelKeywordIndex(sql, "from", selectIndex + 6);
510
- if (fromIndex < 0) {
511
- return null;
512
- }
513
- return sql.slice(selectIndex + 6, fromIndex).trim();
514
- }
515
- function extractFromClause(sql) {
516
- const selectIndex = findTopLevelKeywordIndex(sql, "select");
517
- if (selectIndex < 0) {
518
- return null;
519
- }
520
- const fromIndex = findTopLevelKeywordIndex(sql, "from", selectIndex + 6);
521
- if (fromIndex < 0) {
522
- return null;
523
- }
524
- return sql
525
- .slice(fromIndex)
526
- .trim()
527
- .replace(/;+\s*$/u, "");
528
- }
529
- /** Keywords that terminate a source segment in a FROM clause. */
530
- const SOURCE_SEGMENT_TERMINATORS = [
531
- "join", "left", "right", "inner", "full", "cross", "natural", "lateral",
532
- "on", "using",
533
- "where", "group", "order", "having", "limit", "qualify",
534
- "union", "intersect", "except", "window", "fetch",
535
- ];
536
- function findTerminatorKeyword(value, index) {
537
- for (const keyword of SOURCE_SEGMENT_TERMINATORS) {
538
- if (matchesKeywordAt(value, index, keyword)) {
539
- return keyword;
540
- }
541
- }
542
- return null;
543
- }
544
- function extractTopLevelSourceSegments(fromClause) {
545
- const segments = [];
546
- let captureStart = null;
547
- const pushSegment = (endIndex) => {
548
- if (captureStart === null) {
549
- return;
550
- }
551
- let trimmedEnd = endIndex;
552
- while (trimmedEnd > captureStart && /\s/u.test(fromClause[trimmedEnd - 1] ?? "")) {
553
- trimmedEnd -= 1;
554
- }
555
- if (trimmedEnd > captureStart) {
556
- segments.push({
557
- text: fromClause.slice(captureStart, trimmedEnd),
558
- relationStart: captureStart,
559
- relationEnd: trimmedEnd,
560
- });
561
- }
562
- };
563
- scanTopLevel(fromClause, (char, index, parenDepth) => {
564
- if (parenDepth !== 0) {
565
- return true;
566
- }
567
- if (captureStart === null) {
568
- if (matchesKeywordAt(fromClause, index, "from")) {
569
- captureStart = skipSqlTrivia(fromClause, index + 4);
570
- }
571
- else if (matchesKeywordAt(fromClause, index, "join")) {
572
- captureStart = skipSqlTrivia(fromClause, index + 4);
573
- }
574
- else if (char === ",") {
575
- captureStart = skipSqlTrivia(fromClause, index + 1);
576
- }
577
- return true;
578
- }
579
- if (char === ",") {
580
- pushSegment(index);
581
- captureStart = skipSqlTrivia(fromClause, index + 1);
582
- return true;
583
- }
584
- const terminator = findTerminatorKeyword(fromClause, index);
585
- if (terminator) {
586
- pushSegment(index);
587
- captureStart =
588
- terminator === "join"
589
- ? skipSqlTrivia(fromClause, index + terminator.length)
590
- : null;
591
- }
592
- return true;
593
- });
594
- pushSegment(fromClause.length);
595
- return segments;
596
- }
597
- function isSupportedIdentifierToken(token) {
598
- return (/^[A-Za-z_][\w$]*$/u.test(token) ||
599
- /^"[^"]+"$/u.test(token) ||
600
- /^`[^`]+`$/u.test(token) ||
601
- /^\[[^\]]+\]$/u.test(token));
602
- }
603
- function parseSqlSourceSegment(segment) {
604
- const relationOffset = skipSqlTrivia(segment.text, 0);
605
- if (relationOffset >= segment.text.length) {
606
- return null;
607
- }
608
- let relationText;
609
- let relationTokenStart;
610
- let relationTokenEnd;
611
- let aliasTokens;
612
- if (segment.text.slice(relationOffset).startsWith("{{")) {
613
- const closingIndex = segment.text.indexOf("}}", relationOffset);
614
- if (closingIndex < 0) {
615
- return null;
616
- }
617
- relationTokenStart = relationOffset;
618
- relationTokenEnd = closingIndex + 2;
619
- relationText = segment.text.slice(relationTokenStart, relationTokenEnd).trim();
620
- aliasTokens = tokenizeTopLevelWhitespace(segment.text.slice(relationTokenEnd)).map((token) => token.text);
621
- }
622
- else {
623
- const tokens = tokenizeTopLevelWhitespace(segment.text);
624
- if (tokens.length === 0) {
625
- return null;
626
- }
627
- const relationToken = tokens[0];
628
- relationText = relationToken.text;
629
- relationTokenStart = relationToken.start;
630
- relationTokenEnd = relationToken.end;
631
- aliasTokens = tokens.slice(1).map((token) => token.text);
632
- }
633
- const alias = aliasTokens[0]?.toLowerCase() === "as"
634
- ? (aliasTokens[1] ? stripIdentifierQuotes(aliasTokens[1]) : null)
635
- : aliasTokens[0]
636
- ? stripIdentifierQuotes(aliasTokens[0])
637
- : null;
638
- const refMatch = relationText.match(/^\{\{\s*ref\(\s*(['"])([^'"]+)\1\s*,\s*(['"])([^'"]+)\3\s*\)\s*\}\}$/iu);
639
- if (refMatch) {
640
- return {
641
- locationName: refMatch[2] ?? "",
642
- nodeName: refMatch[4] ?? "",
643
- alias,
644
- nodeID: null,
645
- sourceStyle: "coalesce_ref",
646
- locationCandidates: refMatch[2] ? [refMatch[2]] : [],
647
- relationStart: segment.relationStart + relationTokenStart,
648
- relationEnd: segment.relationStart + relationTokenEnd,
649
- };
650
- }
651
- if (relationText.startsWith("(")) {
652
- return null;
653
- }
654
- const parts = splitTopLevel(relationText, ".").map((part) => part.trim());
655
- if (parts.length === 0 ||
656
- parts.some((part) => part.length === 0 || !isSupportedIdentifierToken(part))) {
657
- return null;
658
- }
659
- const normalizedParts = parts.map(stripIdentifierQuotes);
660
- const nodeName = normalizedParts[normalizedParts.length - 1] ?? "";
661
- return {
662
- locationName: "",
663
- nodeName,
664
- alias,
665
- nodeID: null,
666
- sourceStyle: "table_name",
667
- locationCandidates: normalizedParts.slice(0, -1).reverse(),
668
- relationStart: segment.relationStart + relationTokenStart,
669
- relationEnd: segment.relationStart + relationTokenEnd,
670
- };
671
- }
672
- function parseSqlSourceRefs(sql) {
673
- const fromClause = extractFromClause(sql);
674
- if (!fromClause) {
675
- return { fromClause: "", refs: [] };
676
- }
677
- const refs = extractTopLevelSourceSegments(fromClause)
678
- .map(parseSqlSourceSegment)
679
- .filter((ref) => ref !== null);
680
- return { fromClause, refs };
681
- }
682
- function splitExpressionAlias(rawItem) {
683
- const asMatch = rawItem.match(/^(.*?)(?:\s+AS\s+)([A-Za-z_][\w$]*|"[^"]+"|`[^`]+`|\[[^\]]+\])$/i);
684
- if (asMatch) {
685
- return {
686
- expression: asMatch[1]?.trim() ?? rawItem.trim(),
687
- outputName: stripIdentifierQuotes(asMatch[2] ?? ""),
688
- };
689
- }
690
- const bareAliasMatch = rawItem.match(/^(.*?)(?:\s+)([A-Za-z_][\w$]*|"[^"]+"|`[^`]+`|\[[^\]]+\])$/);
691
- if (bareAliasMatch) {
692
- const candidateExpression = bareAliasMatch[1]?.trim() ?? rawItem.trim();
693
- if (candidateExpression.includes(".") || candidateExpression.includes("(")) {
694
- return {
695
- expression: candidateExpression,
696
- outputName: stripIdentifierQuotes(bareAliasMatch[2] ?? ""),
697
- };
698
- }
699
- }
700
- return {
701
- expression: rawItem.trim(),
702
- outputName: null,
703
- };
704
- }
705
- function parseDirectColumnExpression(expression) {
706
- const trimmed = expression.trim();
707
- if (trimmed === "*") {
708
- return null;
709
- }
710
- const parts = splitTopLevel(trimmed, ".").map((part) => part.trim());
711
- if (parts.length === 0 ||
712
- parts.some((part) => part.length === 0 || !isSupportedIdentifierToken(part))) {
713
- return null;
714
- }
715
- return {
716
- sourceNodeAlias: parts.length >= 2 ? stripIdentifierQuotes(parts[parts.length - 2] ?? "") : null,
717
- sourceColumnName: stripIdentifierQuotes(parts[parts.length - 1] ?? ""),
718
- };
719
- }
720
- function parseWildcardExpression(expression) {
721
- const trimmed = expression.trim();
722
- if (trimmed === "*") {
723
- return { sourceNodeAlias: null };
724
- }
725
- const parts = splitTopLevel(trimmed, ".").map((part) => part.trim());
726
- if (parts.length < 2 ||
727
- parts[parts.length - 1] !== "*" ||
728
- parts.slice(0, -1).some((part) => part.length === 0 || !isSupportedIdentifierToken(part))) {
729
- return null;
730
- }
731
- return {
732
- sourceNodeAlias: stripIdentifierQuotes(parts[parts.length - 2] ?? ""),
733
- };
734
- }
735
- function listToQuestion(values) {
736
- return values.join(", ");
737
- }
738
- function parseSqlSelectItems(sql, refs) {
739
- const warnings = [];
740
- const refsByAlias = new Map();
741
- for (const ref of refs) {
742
- refsByAlias.set(normalizeSqlIdentifier(ref.alias ?? ref.nodeName), ref);
743
- }
744
- const selectClause = extractSelectClause(sql);
745
- if (!selectClause) {
746
- return {
747
- refs,
748
- selectItems: [],
749
- warnings: ["Could not find a top-level SELECT ... FROM clause in the SQL."],
750
- };
751
- }
752
- const rawItems = splitTopLevel(selectClause, ",");
753
- const selectItems = [];
754
- for (const rawItem of rawItems) {
755
- const { expression, outputName } = splitExpressionAlias(rawItem);
756
- const wildcard = parseWildcardExpression(expression);
757
- if (wildcard) {
758
- if (wildcard.sourceNodeAlias === null && refs.length !== 1) {
759
- selectItems.push({
760
- expression,
761
- outputName: null,
762
- sourceNodeAlias: null,
763
- sourceNodeName: null,
764
- sourceNodeID: null,
765
- sourceColumnName: null,
766
- kind: "expression",
767
- supported: false,
768
- reason: "Unqualified * is only supported when exactly one predecessor ref is present.",
769
- });
770
- continue;
771
- }
772
- const ref = wildcard.sourceNodeAlias === null
773
- ? refs[0] ?? null
774
- : refsByAlias.get(normalizeSqlIdentifier(wildcard.sourceNodeAlias)) ?? null;
775
- if (!ref) {
776
- selectItems.push({
777
- expression,
778
- outputName: null,
779
- sourceNodeAlias: wildcard.sourceNodeAlias,
780
- sourceNodeName: null,
781
- sourceNodeID: null,
782
- sourceColumnName: null,
783
- kind: "expression",
784
- supported: false,
785
- reason: "Wildcard source alias could not be resolved to a predecessor ref.",
786
- });
787
- continue;
788
- }
789
- // Wildcards are expanded later after predecessor nodes are fetched.
790
- selectItems.push({
791
- expression,
792
- outputName: null,
793
- sourceNodeAlias: wildcard.sourceNodeAlias ?? ref.alias ?? ref.nodeName,
794
- sourceNodeName: ref.nodeName,
795
- sourceNodeID: ref.nodeID,
796
- sourceColumnName: "*",
797
- kind: "expression",
798
- supported: true,
799
- });
800
- continue;
801
- }
802
- const directColumn = parseDirectColumnExpression(expression);
803
- if (!directColumn) {
804
- // Expression is not a direct column reference - it's a computed expression
805
- // Support it if it has an output name (alias)
806
- if (outputName === null) {
807
- selectItems.push({
808
- expression,
809
- outputName: null,
810
- sourceNodeAlias: null,
811
- sourceNodeName: null,
812
- sourceNodeID: null,
813
- sourceColumnName: null,
814
- kind: "expression",
815
- supported: false,
816
- reason: "Computed expressions require an alias (e.g., CASE ... END AS column_name)",
817
- });
818
- continue;
819
- }
820
- // Computed expression with alias - supported
821
- selectItems.push({
822
- expression,
823
- outputName,
824
- sourceNodeAlias: null,
825
- sourceNodeName: null,
826
- sourceNodeID: null,
827
- sourceColumnName: null,
828
- kind: "expression",
829
- supported: true,
830
- });
831
- continue;
832
- }
833
- const ref = directColumn.sourceNodeAlias === null
834
- ? refs.length === 1
835
- ? refs[0] ?? null
836
- : null
837
- : refsByAlias.get(normalizeSqlIdentifier(directColumn.sourceNodeAlias)) ?? null;
838
- if (!ref) {
839
- selectItems.push({
840
- expression,
841
- outputName: outputName ?? directColumn.sourceColumnName,
842
- sourceNodeAlias: directColumn.sourceNodeAlias,
843
- sourceNodeName: null,
844
- sourceNodeID: null,
845
- sourceColumnName: directColumn.sourceColumnName,
846
- kind: "column",
847
- supported: false,
848
- reason: directColumn.sourceNodeAlias === null
849
- ? "Unqualified columns are only supported when exactly one predecessor ref is present."
850
- : `The source alias ${directColumn.sourceNodeAlias} did not match a predecessor ref.`,
851
- });
852
- continue;
853
- }
854
- selectItems.push({
855
- expression,
856
- outputName: outputName ?? directColumn.sourceColumnName,
857
- sourceNodeAlias: directColumn.sourceNodeAlias ?? ref.alias ?? ref.nodeName,
858
- sourceNodeName: ref.nodeName,
859
- sourceNodeID: ref.nodeID,
860
- sourceColumnName: directColumn.sourceColumnName,
861
- kind: "column",
862
- supported: true,
863
- });
864
- }
865
- if (selectItems.length === 0) {
866
- warnings.push("The SQL SELECT clause did not produce any supported projected columns.");
867
- }
868
- return { refs, selectItems, warnings };
869
- }
870
- async function listAllWorkspaceNodes(client, workspaceID) {
871
- const nodes = [];
872
- const seenCursors = new Set();
873
- let next;
874
- let isFirstPage = true;
875
- while (isFirstPage || next) {
876
- const response = await listWorkspaceNodes(client, {
877
- workspaceID,
878
- limit: WORKSPACE_NODE_PAGE_LIMIT,
879
- orderBy: "id",
880
- ...(next ? { startingFrom: next } : {}),
881
- });
882
- if (!isPlainObject(response)) {
883
- throw new Error("Workspace node list response was not an object");
884
- }
885
- if (Array.isArray(response.data)) {
886
- for (const item of response.data) {
887
- if (!isPlainObject(item) || typeof item.id !== "string" || typeof item.name !== "string") {
888
- continue;
889
- }
890
- nodes.push({
891
- id: item.id,
892
- name: item.name,
893
- nodeType: typeof item.nodeType === "string" ? item.nodeType : null,
894
- locationName: typeof item.locationName === "string" ? item.locationName : null,
895
- });
896
- }
897
- }
898
- const responseNext = typeof response.next === "string" && response.next.trim().length > 0
899
- ? response.next
900
- : typeof response.next === "number"
901
- ? String(response.next)
902
- : undefined;
903
- if (responseNext) {
904
- if (seenCursors.has(responseNext)) {
905
- throw new Error(`Workspace node pagination repeated cursor ${responseNext}`);
906
- }
907
- seenCursors.add(responseNext);
908
- }
909
- next = responseNext;
910
- isFirstPage = false;
911
- }
912
- return nodes;
913
- }
914
- function getNodeLocationName(node) {
915
- if (typeof node.locationName === "string" && node.locationName.trim().length > 0) {
916
- return node.locationName;
917
- }
918
- return null;
919
- }
920
- async function resolveSqlRefsToWorkspaceNodes(client, workspaceID, refs) {
921
- const warnings = [];
922
- const openQuestions = [];
923
- const predecessorNodes = {};
924
- if (refs.length === 0) {
925
- openQuestions.push("Which upstream Coalesce node(s) should this pipeline build from? Use a top-level FROM/JOIN that names existing workspace nodes (raw table names or {{ ref('LOCATION', 'NODE') }} syntax), or provide sourceNodeIDs.");
926
- return { refs, openQuestions, warnings, predecessorNodes };
927
- }
928
- const workspaceNodes = await listAllWorkspaceNodes(client, workspaceID);
929
- const nodesByNormalizedName = new Map();
930
- for (const node of workspaceNodes) {
931
- const normalized = normalizeSqlIdentifier(node.name);
932
- const existing = nodesByNormalizedName.get(normalized) ?? [];
933
- existing.push(node);
934
- nodesByNormalizedName.set(normalized, existing);
935
- }
936
- for (const ref of refs) {
937
- const matches = nodesByNormalizedName.get(normalizeSqlIdentifier(ref.nodeName)) ?? [];
938
- if (matches.length === 0) {
939
- openQuestions.push(`Could not resolve the SQL source ${ref.nodeName} to a workspace node ID in workspace ${workspaceID}.`);
940
- continue;
941
- }
942
- const locationHints = [
943
- ...(ref.locationName ? [ref.locationName] : []),
944
- ...ref.locationCandidates,
945
- ].map(normalizeSqlIdentifier);
946
- const hintedMatches = locationHints.length > 0
947
- ? matches.filter((entry) => entry.locationName &&
948
- locationHints.includes(normalizeSqlIdentifier(entry.locationName)))
949
- : [];
950
- if (hintedMatches.length === 1) {
951
- ref.nodeID = hintedMatches[0]?.id ?? null;
952
- if (!ref.locationName && hintedMatches[0]?.locationName) {
953
- ref.locationName = hintedMatches[0].locationName;
954
- }
955
- continue;
956
- }
957
- if (hintedMatches.length > 1) {
958
- openQuestions.push(`Multiple workspace nodes matched the SQL source ${ref.nodeName}. Resolve the exact node before creation.`);
959
- continue;
960
- }
961
- if (matches.length === 1) {
962
- ref.nodeID = matches[0]?.id ?? null;
963
- if (!ref.locationName && matches[0]?.locationName) {
964
- ref.locationName = matches[0].locationName;
965
- }
966
- continue;
967
- }
968
- if (matches.length > 1) {
969
- const detailedMatches = await Promise.all(matches.map(async (match) => {
970
- const node = await getWorkspaceNode(client, {
971
- workspaceID,
972
- nodeID: match.id,
973
- });
974
- return {
975
- match,
976
- node: isPlainObject(node) ? node : null,
977
- };
978
- }));
979
- const exactLocationMatches = locationHints.length > 0
980
- ? detailedMatches.filter((candidate) => candidate.node &&
981
- getNodeLocationName(candidate.node) &&
982
- locationHints.includes(normalizeSqlIdentifier(getNodeLocationName(candidate.node) ?? "")))
983
- : [];
984
- if (exactLocationMatches.length === 1) {
985
- ref.nodeID = exactLocationMatches[0]?.match.id ?? null;
986
- if (!ref.locationName) {
987
- ref.locationName = getNodeLocationName(exactLocationMatches[0]?.node ?? {}) ?? "";
988
- }
989
- continue;
990
- }
991
- if (exactLocationMatches.length > 1) {
992
- openQuestions.push(`Multiple workspace nodes matched the SQL source ${ref.nodeName}. Resolve the exact node before creation.`);
993
- continue;
994
- }
995
- if (ref.sourceStyle === "coalesce_ref" && ref.locationName) {
996
- openQuestions.push(`Workspace nodes named ${ref.nodeName} were found, but none matched the requested location ${ref.locationName}.`);
997
- continue;
998
- }
999
- openQuestions.push(`Multiple workspace nodes named ${ref.nodeName} were found. Qualify the SQL source more clearly or provide sourceNodeIDs before creation.`);
1000
- continue;
1001
- }
1002
- }
1003
- for (const ref of refs) {
1004
- if (!ref.nodeID) {
1005
- continue;
1006
- }
1007
- const predecessor = await getWorkspaceNode(client, {
1008
- workspaceID,
1009
- nodeID: ref.nodeID,
1010
- });
1011
- if (!isPlainObject(predecessor)) {
1012
- warnings.push(`Resolved predecessor ${ref.nodeName} did not return an object body.`);
1013
- continue;
1014
- }
1015
- const predecessorLocationName = getNodeLocationName(predecessor);
1016
- if (ref.sourceStyle === "coalesce_ref" &&
1017
- predecessorLocationName &&
1018
- normalizeSqlIdentifier(predecessorLocationName) !==
1019
- normalizeSqlIdentifier(ref.locationName)) {
1020
- ref.nodeID = null;
1021
- openQuestions.push(`Resolved node ${ref.nodeName} is in location ${predecessorLocationName}, not the requested location ${ref.locationName}.`);
1022
- continue;
1023
- }
1024
- if (!ref.locationName && predecessorLocationName) {
1025
- ref.locationName = predecessorLocationName;
1026
- }
1027
- predecessorNodes[ref.nodeID] = predecessor;
1028
- }
1029
- return { refs, openQuestions, warnings, predecessorNodes };
1030
- }
1031
- function buildJoinConditionFromSql(sql, refs) {
1032
- const fromClause = extractFromClause(sql);
1033
- if (!fromClause) {
1034
- return null;
1035
- }
1036
- let joinCondition = fromClause;
1037
- for (const ref of [...refs]
1038
- .filter((candidate) => candidate.sourceStyle === "table_name" && candidate.locationName)
1039
- .sort((left, right) => right.relationStart - left.relationStart)) {
1040
- const replacement = `{{ ref('${ref.locationName}', '${ref.nodeName}') }}`;
1041
- joinCondition =
1042
- joinCondition.slice(0, ref.relationStart) +
1043
- replacement +
1044
- joinCondition.slice(ref.relationEnd);
1045
- }
1046
- return joinCondition;
1047
- }
1048
- export function getColumnNamesFromNode(node) {
1049
- const metadata = isPlainObject(node.metadata) ? node.metadata : undefined;
1050
- if (!Array.isArray(metadata?.columns)) {
1051
- return [];
1052
- }
1053
- return metadata.columns.flatMap((column) => {
1054
- if (!isPlainObject(column) || typeof column.name !== "string") {
1055
- return [];
1056
- }
1057
- return [column.name];
1058
- });
1059
- }
1060
- function buildSelectItemsFromSourceNode(sourceNodeID, sourceNodeName, node) {
1061
- return getColumnNamesFromNode(node).map((columnName) => ({
1062
- expression: `${sourceNodeName}.${columnName}`,
1063
- outputName: columnName,
1064
- sourceNodeAlias: sourceNodeName,
1065
- sourceNodeName,
1066
- sourceNodeID,
1067
- sourceColumnName: columnName,
1068
- kind: "column",
1069
- supported: true,
1070
- }));
1071
- }
1072
- async function getSourceNodesByID(client, workspaceID, sourceNodeIDs) {
1073
- const sourceRefs = [];
1074
- const predecessorNodes = {};
1075
- const openQuestions = [];
1076
- const warnings = [];
1077
- for (const sourceNodeID of sourceNodeIDs) {
1078
- const node = await getWorkspaceNode(client, {
1079
- workspaceID,
1080
- nodeID: sourceNodeID,
1081
- });
1082
- if (!isPlainObject(node)) {
1083
- openQuestions.push(`Could not read source node ${sourceNodeID} in workspace ${workspaceID}.`);
1084
- continue;
1085
- }
1086
- if (typeof node.name !== "string" || node.name.trim().length === 0) {
1087
- openQuestions.push(`Source node ${sourceNodeID} does not have a usable name.`);
1088
- continue;
1089
- }
1090
- const locationName = getNodeLocationName(node);
1091
- if (!locationName) {
1092
- openQuestions.push(`Source node ${node.name} does not expose locationName. Clarify the Coalesce location before generating ref() SQL for this pipeline.`);
1093
- }
1094
- predecessorNodes[sourceNodeID] = node;
1095
- sourceRefs.push({
1096
- locationName: locationName ?? "UNKNOWN_LOCATION",
1097
- nodeName: node.name,
1098
- alias: node.name,
1099
- nodeID: sourceNodeID,
1100
- });
1101
- }
1102
- return {
1103
- sourceRefs,
1104
- predecessorNodes,
1105
- openQuestions,
1106
- warnings,
1107
- };
1108
- }
1109
- function expandWildcardSelectItems(selectItems, refs, predecessorNodes) {
1110
- const expanded = [];
1111
- for (const item of selectItems) {
1112
- if (item.sourceColumnName !== "*" || !item.supported) {
1113
- expanded.push(item);
1114
- continue;
1115
- }
1116
- const ref = item.sourceNodeID
1117
- ? refs.find((candidate) => candidate.nodeID === item.sourceNodeID) ?? null
1118
- : refs.find((candidate) => normalizeSqlIdentifier(candidate.alias ?? candidate.nodeName) ===
1119
- normalizeSqlIdentifier(item.sourceNodeAlias ?? "")) ?? null;
1120
- if (!ref?.nodeID) {
1121
- expanded.push({
1122
- ...item,
1123
- supported: false,
1124
- reason: "Wildcard source could not be resolved to a concrete predecessor node.",
1125
- });
1126
- continue;
1127
- }
1128
- const predecessor = predecessorNodes[ref.nodeID];
1129
- if (!predecessor) {
1130
- expanded.push({
1131
- ...item,
1132
- supported: false,
1133
- reason: "Wildcard source predecessor body was not available for column expansion.",
1134
- });
1135
- continue;
1136
- }
1137
- const columnNames = getColumnNamesFromNode(predecessor);
1138
- if (columnNames.length === 0) {
1139
- expanded.push({
1140
- ...item,
1141
- supported: false,
1142
- reason: "Wildcard source predecessor has no columns to expand.",
1143
- });
1144
- continue;
1145
- }
1146
- for (const columnName of columnNames) {
1147
- expanded.push({
1148
- expression: item.sourceNodeAlias && item.sourceNodeAlias.length > 0
1149
- ? `${item.sourceNodeAlias}.${columnName}`
1150
- : columnName,
1151
- outputName: columnName,
1152
- sourceNodeAlias: item.sourceNodeAlias,
1153
- sourceNodeName: item.sourceNodeName,
1154
- sourceNodeID: ref.nodeID,
1155
- sourceColumnName: columnName,
1156
- kind: "column",
1157
- supported: true,
1158
- });
1159
- }
1160
- }
1161
- return expanded;
1162
- }
1163
- function buildDefaultNodePrefix(nodeTypeFamily, shortName) {
1164
- if (shortName && shortName.trim().length > 0) {
1165
- return shortName.trim().toUpperCase().replace(/[^A-Z0-9]+/g, "_");
1166
- }
1167
- switch (nodeTypeFamily) {
1168
- case "stage":
1169
- return "STG";
1170
- case "persistent-stage":
1171
- return "PSTG";
1172
- case "view":
1173
- return "VW";
1174
- case "work":
1175
- return "WRK";
1176
- case "dimension":
1177
- return "DIM";
1178
- case "fact":
1179
- return "FACT";
1180
- case "hub":
1181
- return "HUB";
1182
- case "satellite":
1183
- return "SAT";
1184
- case "link":
1185
- return "LNK";
1186
- default:
1187
- return "NODE";
1188
- }
1189
- }
1190
- function buildDefaultNodeName(targetName, refs, nodeTypeFamily, shortName) {
1191
- if (targetName && targetName.trim().length > 0) {
1192
- return targetName.trim();
1193
- }
1194
- const prefix = buildDefaultNodePrefix(nodeTypeFamily, shortName);
1195
- const firstRef = refs[0];
1196
- if (!firstRef) {
1197
- return `${prefix}_NEW_PIPELINE`;
1198
- }
1199
- const stripped = firstRef.nodeName.replace(/^(SRC[_-]?|STG[_-]?|DIM[_-]?|FACT[_-]?|FCT[_-]?|INT[_-]?|WORK[_-]?|VW[_-]?)/i, "");
1200
- return `${prefix}_${stripped}`.toUpperCase().replace(/__+/g, "_");
1201
- }
1202
- function matchesObservedNodeType(requestedNodeType, observedNodeTypes) {
1203
- const requestedID = requestedNodeType.includes(":::")
1204
- ? requestedNodeType.split(":::")[1] ?? requestedNodeType
1205
- : requestedNodeType;
1206
- return observedNodeTypes.some((observed) => {
1207
- if (observed === requestedNodeType) {
1208
- return true;
1209
- }
1210
- const observedID = observed.includes(":::") ? observed.split(":::")[1] ?? observed : observed;
1211
- return observedID === requestedID;
1212
- });
1213
- }
1214
- async function getWorkspaceNodeTypeInventory(client, workspaceID) {
1215
- try {
1216
- const result = await listWorkspaceNodeTypes(client, { workspaceID });
1217
- return {
1218
- nodeTypes: result.nodeTypes ?? [],
1219
- counts: result.counts ?? {},
1220
- total: result.total ?? 0,
1221
- warnings: [],
1222
- };
1223
- }
1224
- catch (error) {
1225
- // Auth and network errors indicate a broken session — let them propagate
1226
- if (error instanceof CoalesceApiError && [401, 403, 500, 503].includes(error.status)) {
1227
- throw error;
1228
- }
1229
- const reason = error instanceof Error ? error.message : String(error);
1230
- return {
1231
- nodeTypes: [],
1232
- counts: {},
1233
- total: 0,
1234
- warnings: [
1235
- `Observed workspace node types could not be fetched for workspace ${workspaceID} (${reason}). ` +
1236
- `Node type selection will use defaults — use list_workspace_node_types or cache_workspace_nodes to confirm installation before execution.`,
1237
- ],
1238
- };
1239
- }
1240
- }
1241
- function applyWorkspaceNodeTypeValidation(plan, inventory, requestedNodeType) {
1242
- plan.warnings.push(...inventory.warnings);
1243
- if (inventory.total === 0) {
1244
- return;
1245
- }
1246
- const recommendedTypes = (plan.nodes ?? [])
1247
- .map((node) => node.nodeType)
1248
- .filter((nodeType) => typeof nodeType === "string" && nodeType.length > 0);
1249
- if (requestedNodeType && requestedNodeType.trim().length > 0) {
1250
- recommendedTypes.push(requestedNodeType);
1251
- }
1252
- const missingTypes = Array.from(new Set(recommendedTypes)).filter((nodeType) => !matchesObservedNodeType(nodeType, inventory.nodeTypes));
1253
- if (missingTypes.length > 0) {
1254
- plan.warnings.push(`The following node types were not observed in current workspace nodes: ${missingTypes.join(", ")}. This observation is based on existing nodes, not a true installed-type registry. Confirm installation in Coalesce before creating nodes of these types.`);
1255
- plan.status = "needs_clarification";
1256
- }
1257
- }
1258
- function buildPlanFromSql(params, parseResult, predecessorNodes, openQuestions, warnings) {
1259
- const nodeType = params.selectedNodeType?.nodeType ?? params.targetNodeType ?? "Stage";
1260
- const planOpenQuestions = [...openQuestions];
1261
- if (!params.selectedNodeType) {
1262
- warnings.push(`No ranked node type candidate was available, so planning fell back to ${nodeType}.`);
1263
- }
1264
- else if (!params.selectedNodeType.autoExecutable) {
1265
- warnings.push(`Planner selected node type ${nodeType}, but it likely needs additional semantic configuration before automatic creation.`);
1266
- if (params.selectedNodeType.semanticSignals.length > 0) {
1267
- planOpenQuestions.push(`Confirm the required configuration for ${nodeType}: ${params.selectedNodeType.semanticSignals.join(", ")}.`);
1268
- }
1269
- if (params.selectedNodeType.missingDefaultFields.length > 0) {
1270
- planOpenQuestions.push(`Provide values for ${nodeType} config fields without defaults: ${params.selectedNodeType.missingDefaultFields.join(", ")}.`);
1271
- }
1272
- }
1273
- const expandedSelectItems = expandWildcardSelectItems(parseResult.selectItems, parseResult.refs, predecessorNodes);
1274
- const unsupportedItems = expandedSelectItems.filter((item) => !item.supported);
1275
- if (unsupportedItems.length > 0) {
1276
- for (const item of unsupportedItems) {
1277
- warnings.push(item.reason
1278
- ? `${item.expression}: ${item.reason}`
1279
- : `${item.expression}: unsupported SQL projection in v1`);
1280
- }
1281
- }
1282
- const supportedOutputColumnCount = expandedSelectItems.filter((item) => item.supported && item.outputName).length;
1283
- if (parseResult.warnings.some((warning) => warning.includes("Could not find a top-level SELECT ... FROM clause"))) {
1284
- planOpenQuestions.push("Provide a top-level SELECT ... FROM query using direct column projections before creating this pipeline.");
1285
- }
1286
- else if (supportedOutputColumnCount === 0) {
1287
- planOpenQuestions.push("Specify at least one supported projected column before creating this pipeline.");
1288
- }
1289
- const predecessorNodeIDs = uniqueInOrder(parseResult.refs.flatMap((ref) => ref.nodeID ? [ref.nodeID] : []));
1290
- const predecessorNodeNames = parseResult.refs.map((ref) => ref.nodeName);
1291
- const ready = (params.selectedNodeType?.autoExecutable ?? true) &&
1292
- predecessorNodeIDs.length > 0 &&
1293
- supportedOutputColumnCount > 0 &&
1294
- unsupportedItems.length === 0 &&
1295
- parseResult.warnings.length === 0 &&
1296
- planOpenQuestions.length === 0;
1297
- const name = buildDefaultNodeName(params.targetName, parseResult.refs, params.selectedNodeType?.family ?? null, params.selectedNodeType?.shortName ?? null);
1298
- const plan = {
1299
- version: 1,
1300
- intent: "sql",
1301
- status: ready ? "ready" : "needs_clarification",
1302
- workspaceID: params.workspaceID,
1303
- platform: null,
1304
- goal: params.goal ?? null,
1305
- sql: params.sql,
1306
- nodes: [
1307
- {
1308
- planNodeID: "node-1",
1309
- name,
1310
- nodeType,
1311
- nodeTypeFamily: params.selectedNodeType?.family ?? null,
1312
- predecessorNodeIDs,
1313
- predecessorPlanNodeIDs: [],
1314
- predecessorNodeNames,
1315
- description: params.description ?? null,
1316
- sql: params.sql,
1317
- selectItems: expandedSelectItems,
1318
- outputColumnNames: expandedSelectItems.flatMap((item) => item.outputName ? [item.outputName] : []),
1319
- configOverrides: params.configOverrides ? deepClone(params.configOverrides) : {},
1320
- sourceRefs: parseResult.refs.map((ref) => ({
1321
- locationName: ref.locationName,
1322
- nodeName: ref.nodeName,
1323
- alias: ref.alias,
1324
- nodeID: ref.nodeID,
1325
- })),
1326
- joinCondition: buildJoinConditionFromSql(params.sql, parseResult.refs),
1327
- location: params.location ?? {},
1328
- requiresFullSetNode: true,
1329
- ...(params.selectedNodeType?.templateDefaults
1330
- ? { templateDefaults: params.selectedNodeType.templateDefaults }
1331
- : {}),
1332
- },
1333
- ],
1334
- assumptions: [
1335
- `Planner ${params.nodeTypeSelection.strategy} selected ${nodeType} from repo/workspace candidates.`,
1336
- "The generated plan uses create_workspace_node_from_predecessor followed by set_workspace_node when the selected type is projection-capable.",
1337
- ],
1338
- openQuestions: planOpenQuestions,
1339
- warnings: [...parseResult.warnings, ...warnings],
1340
- supportedNodeTypes: params.nodeTypeSelection.supportedNodeTypes.length > 0
1341
- ? params.nodeTypeSelection.supportedNodeTypes
1342
- : [nodeType],
1343
- nodeTypeSelection: params.nodeTypeSelection,
1344
- };
1345
- return plan;
1346
- }
1347
- /**
1348
- * Extract CTEs with their bodies from SQL.
1349
- * Uses quoting-aware scanning to find CTE headers and balanced parentheses,
1350
- * avoiding false matches inside string literals, quoted identifiers, and comments.
1351
- */
1352
- function extractCtes(sql) {
1353
- const trimmed = sql.trim();
1354
- // Check for leading WITH keyword using quoting-aware search
1355
- const withIdx = findTopLevelKeywordIndex(trimmed, "WITH");
1356
- if (withIdx !== 0)
1357
- return [];
1358
- const ctes = [];
1359
- // Scan for CTE definitions: name AS ( ... )
1360
- // After WITH, and after each CTE body followed by a comma, look for: identifier AS (
1361
- let cursor = withIdx + 4; // skip past "WITH"
1362
- while (cursor < trimmed.length) {
1363
- // Skip whitespace and commas between CTEs
1364
- const rest = trimmed.slice(cursor);
1365
- const leadingMatch = rest.match(/^[\s,]+/);
1366
- if (leadingMatch)
1367
- cursor += leadingMatch[0].length;
1368
- if (cursor >= trimmed.length)
1369
- break;
1370
- // Try to match: identifier AS (
1371
- // identifier can be unquoted, double-quoted, backtick-quoted, or bracket-quoted
1372
- const headerMatch = trimmed.slice(cursor).match(/^([A-Za-z_][\w$]*|"[^"]+"|`[^`]+`|\[[^\]]+\])\s+AS\s*\(/i);
1373
- if (!headerMatch)
1374
- break; // No more CTE headers — rest is the final SELECT
1375
- const rawName = stripIdentifierQuotes(headerMatch[1]);
1376
- const name = rawName.toUpperCase();
1377
- const bodyStart = cursor + headerMatch[0].length;
1378
- const body = extractParenBody(trimmed, bodyStart);
1379
- const closeIdx = findClosingParen(trimmed, bodyStart);
1380
- if (closeIdx >= 0) {
1381
- const body = trimmed.slice(bodyStart, closeIdx).trim();
1382
- const columns = parseCteColumns(body);
1383
- const whereClause = extractCteWhereClause(body);
1384
- const sourceTable = extractCteSourceTable(body);
1385
- const hasGroupBy = findTopLevelKeywordIndex(body, "GROUP") >= 0;
1386
- const hasJoin = findTopLevelKeywordIndex(body, "JOIN") >= 0;
1387
- ctes.push({ name, body, columns, whereClause, sourceTable, hasGroupBy, hasJoin });
1388
- // Move cursor past the closing paren
1389
- cursor = closeIdx + 1;
1390
- }
1391
- else {
1392
- ctes.push({ name, body: "", columns: [], whereClause: null, sourceTable: null, hasGroupBy: false, hasJoin: false });
1393
- break;
1394
- }
1395
- }
1396
- return ctes;
1397
- }
1398
- /**
1399
- * Find the index of the closing parenthesis that balances the opening one.
1400
- * `startIndex` should be the position right after the opening '('.
1401
- * Returns the index of the closing ')' or -1 if unbalanced.
1402
- *
1403
- * Handles all SQL quoting contexts: single-quoted strings, double-quoted
1404
- * identifiers, backtick-quoted identifiers, bracket-quoted identifiers,
1405
- * line comments (`--`), and block comments.
1406
- */
1407
- function findClosingParen(sql, startIndex) {
1408
- let depth = 1;
1409
- let inSingleQuote = false;
1410
- let inDoubleQuote = false;
1411
- let inBacktick = false;
1412
- let inBracket = false;
1413
- let inLineComment = false;
1414
- let inBlockComment = false;
1415
- for (let i = startIndex; i < sql.length; i++) {
1416
- const ch = sql[i];
1417
- const next = sql[i + 1];
1418
- if (inLineComment) {
1419
- if (ch === "\n")
1420
- inLineComment = false;
1421
- continue;
1422
- }
1423
- if (inBlockComment) {
1424
- if (ch === "*" && next === "/") {
1425
- inBlockComment = false;
1426
- i++;
1427
- }
1428
- continue;
1429
- }
1430
- if (inSingleQuote) {
1431
- if (ch === "'" && next === "'") {
1432
- i++;
1433
- }
1434
- else if (ch === "'") {
1435
- inSingleQuote = false;
1436
- }
1437
- continue;
1438
- }
1439
- if (inDoubleQuote) {
1440
- if (ch === '"')
1441
- inDoubleQuote = false;
1442
- continue;
1443
- }
1444
- if (inBacktick) {
1445
- if (ch === "`")
1446
- inBacktick = false;
1447
- continue;
1448
- }
1449
- if (inBracket) {
1450
- if (ch === "]")
1451
- inBracket = false;
1452
- continue;
1453
- }
1454
- if (ch === "'") {
1455
- inSingleQuote = true;
1456
- continue;
1457
- }
1458
- if (ch === '"') {
1459
- inDoubleQuote = true;
1460
- continue;
1461
- }
1462
- if (ch === "`") {
1463
- inBacktick = true;
1464
- continue;
1465
- }
1466
- if (ch === "[") {
1467
- inBracket = true;
1468
- continue;
1469
- }
1470
- if (ch === "-" && next === "-") {
1471
- inLineComment = true;
1472
- i++;
1473
- continue;
1474
- }
1475
- if (ch === "/" && next === "*") {
1476
- inBlockComment = true;
1477
- i++;
1478
- continue;
1479
- }
1480
- if (ch === "(") {
1481
- depth++;
1482
- }
1483
- else if (ch === ")") {
1484
- depth--;
1485
- if (depth === 0)
1486
- return i;
1487
- }
1488
- }
1489
- return -1;
1490
- }
1491
- /**
1492
- * Extract the body between balanced parentheses.
1493
- * `startIndex` should be the position right after the opening '('.
1494
- */
1495
- function extractParenBody(sql, startIndex) {
1496
- const closeIdx = findClosingParen(sql, startIndex);
1497
- if (closeIdx < 0)
1498
- return null;
1499
- return sql.slice(startIndex, closeIdx).trim();
1500
- }
1501
- /**
1502
- * Parse a CTE body's SELECT list into columns with transform detection.
1503
- *
1504
- * Handles `SELECT * FROM (subquery) WHERE ...` by recursing into the subquery.
1505
- */
1506
- function parseCteColumns(body) {
1507
- const selectClause = extractSelectClause(body);
1508
- if (!selectClause)
1509
- return [];
1510
- const rawItems = splitTopLevel(selectClause, ",");
1511
- // Detect "SELECT * FROM (subquery)" — recurse into the subquery
1512
- if (rawItems.length === 1 && /^\*$/.test(rawItems[0].trim())) {
1513
- const subqueryBody = extractSubqueryFromFrom(body);
1514
- if (subqueryBody) {
1515
- return parseCteColumns(subqueryBody);
1516
- }
1517
- return [];
1518
- }
1519
- const columns = [];
1520
- for (const rawItem of rawItems) {
1521
- const { expression, outputName } = splitExpressionAlias(rawItem);
1522
- const trimmedExpr = expression.trim();
1523
- // Skip wildcards
1524
- if (/^\*$/.test(trimmedExpr) || /\.\*$/.test(trimmedExpr))
1525
- continue;
1526
- const bareColName = extractBareColumnName(trimmedExpr)?.toUpperCase() ?? null;
1527
- const colName = (outputName?.toUpperCase() ?? bareColName);
1528
- if (!colName)
1529
- continue;
1530
- // Detect transforms: anything that isn't a simple column reference,
1531
- // OR a column rename (AS alias differs from the source column name).
1532
- // Renames need a transform so preserveColumnLinkage can match by the NEW name
1533
- // and propagate the expression into sources[*].transform.
1534
- const isRename = outputName !== null && bareColName !== null && outputName.toUpperCase() !== bareColName;
1535
- const isTransform = !isSimpleColumnRef(trimmedExpr) || isRename;
1536
- columns.push({
1537
- outputName: colName,
1538
- expression: trimmedExpr,
1539
- isTransform,
1540
- });
1541
- }
1542
- return columns;
1543
- }
1544
- /**
1545
- * Extract the subquery body from `FROM (subquery)`.
1546
- * Returns the SQL inside the parentheses, or null if FROM doesn't start with a subquery.
1547
- */
1548
- function extractSubqueryFromFrom(sql) {
1549
- const fromIndex = findTopLevelKeywordIndex(sql, "from");
1550
- if (fromIndex < 0)
1551
- return null;
1552
- const afterFrom = sql.slice(fromIndex + 4).trimStart();
1553
- if (!afterFrom.startsWith("("))
1554
- return null;
1555
- return extractParenBody(afterFrom, 1);
1556
- }
1557
- /**
1558
- * Check if an expression is a simple column reference (no transform needed).
1559
- * Simple: `col`, `"col"`, `table.col`, `table."col"`, `"table"."col"`
1560
- */
1561
- function isSimpleColumnRef(expr) {
1562
- // Simple: identifier or qualified identifier (with optional quotes)
1563
- return /^(?:[A-Za-z_][\w$]*|"[^"]+")(?:\.(?:[A-Za-z_][\w$]*|"[^"]+"))?$/.test(expr.trim());
1564
- }
1565
- /**
1566
- * Extract a bare column name from a simple reference like `table.col` or `col`.
1567
- */
1568
- function extractBareColumnName(expr) {
1569
- const match = expr.trim().match(/(?:.*\.)?([A-Za-z_][\w$]*|"[^"]+")$/);
1570
- if (!match?.[1])
1571
- return null;
1572
- return stripIdentifierQuotes(match[1]);
1573
- }
1574
- /**
1575
- * Extract WHERE clause from a CTE body (ignoring subqueries).
1576
- * Uses quoting-aware keyword search to avoid matching inside strings or comments.
1577
- */
1578
- function extractCteWhereClause(body) {
1579
- const whereIdx = findTopLevelKeywordIndex(body, "WHERE");
1580
- if (whereIdx < 0)
1581
- return null;
1582
- const afterWhere = whereIdx + 5; // "WHERE".length
1583
- // Find the first clause terminator after WHERE
1584
- const terminators = ["GROUP", "ORDER", "HAVING", "LIMIT", "QUALIFY"];
1585
- let endIdx = body.length;
1586
- for (const kw of terminators) {
1587
- const idx = findTopLevelKeywordIndex(body, kw, afterWhere);
1588
- if (idx >= 0 && idx < endIdx) {
1589
- endIdx = idx;
1590
- }
1591
- }
1592
- const clause = body.slice(afterWhere, endIdx).trim();
1593
- return clause || null;
1594
- }
1595
- const AGGREGATE_FUNCTIONS = new Set([
1596
- "COUNT", "SUM", "AVG", "MIN", "MAX",
1597
- "LISTAGG", "ARRAY_AGG", "MEDIAN", "MODE",
1598
- "STDDEV", "VARIANCE", "ANY_VALUE",
1599
- "COUNT_IF", "SUM_IF", "AVG_IF",
1600
- "APPROX_COUNT_DISTINCT", "HLL",
1601
- ]);
1602
- function isAggregateFn(name) {
1603
- return AGGREGATE_FUNCTIONS.has(name.toUpperCase());
1604
- }
1605
- /**
1606
- * Extract the main source table from a CTE body's FROM clause.
1607
- * Uses quoting-aware keyword search to avoid matching FROM inside strings or comments.
1608
- */
1609
- function extractCteSourceTable(body) {
1610
- const fromIdx = findTopLevelKeywordIndex(body, "FROM");
1611
- if (fromIdx < 0)
1612
- return null;
1613
- const afterFrom = body.slice(fromIdx + 4).trimStart();
1614
- const tableMatch = afterFrom.match(/^([A-Za-z_][\w$.]*(?:\.[A-Za-z_][\w$]*)*)/);
1615
- return tableMatch?.[1]?.toUpperCase() ?? null;
1616
- }
1617
- /**
1618
- * Classify a CTE's pattern to pick the right node type.
1619
- */
1620
- function classifyCtePattern(cte) {
1621
- if (cte.hasGroupBy)
1622
- return "aggregation";
1623
- if (cte.hasJoin)
1624
- return "multiSource";
1625
- return "staging";
1626
- }
1627
- /**
1628
- * Build a per-CTE instruction block that tells the agent exactly what transforms
1629
- * and filters to apply for this CTE.
1630
- */
1631
- function buildCteNodeInstruction(cte, nodeType) {
1632
- const lines = [];
1633
- lines.push(`## ${cte.name}`);
1634
- lines.push(`- nodeType: "${nodeType}"`);
1635
- if (cte.sourceTable) {
1636
- lines.push(`- source: ${cte.sourceTable}`);
1637
- }
1638
- const transforms = cte.columns.filter((c) => c.isTransform);
1639
- const passthroughCols = cte.columns.filter((c) => !c.isTransform);
1640
- if (cte.hasGroupBy) {
1641
- lines.push(`- AGGREGATION NODE: pass groupByColumns + aggregates directly to create_workspace_node_from_predecessor (single call)`);
1642
- }
1643
- else if (cte.columns.length > 0) {
1644
- lines.push(`- Pass columns array + whereCondition directly to create_workspace_node_from_predecessor (single call)`);
1645
- }
1646
- if (transforms.length > 0) {
1647
- lines.push(`- Column transforms:`);
1648
- for (const col of transforms) {
1649
- lines.push(` - ${col.outputName}: ${col.expression}`);
1650
- }
1651
- }
1652
- if (passthroughCols.length > 0) {
1653
- lines.push(`- Passthrough columns: ${passthroughCols.map((c) => c.outputName).join(", ")}`);
1654
- }
1655
- if (cte.columns.length > 0) {
1656
- lines.push(`- ONLY keep these ${cte.columns.length} columns: ${cte.columns.map((c) => c.outputName).join(", ")}`);
1657
- }
1658
- if (cte.whereClause) {
1659
- lines.push(`- WHERE filter (pass as whereCondition — do NOT construct {{ ref() }}): ${cte.whereClause}`);
1660
- }
1661
- if (cte.hasJoin) {
1662
- lines.push(`- Has JOIN — use apply_join_condition or update_workspace_node for join setup`);
1663
- }
1664
- return lines.join("\n");
1665
- }
1666
- /**
1667
- * When the user's SQL contains CTEs, return a plan that instructs the agent
1668
- * to break each CTE into a separate Coalesce node using the declarative tools.
1669
- * CTEs are not supported in Coalesce — each CTE should be its own node.
1670
- *
1671
- * The plan includes per-CTE structured data: column transforms, WHERE clauses,
1672
- * source tables, and which columns to keep/remove.
1673
- */
1674
- function buildCtePlan(params, ctes, nodeTypeSelections) {
1675
- const stagingType = nodeTypeSelections.staging.selectedNodeType ?? "Stage";
1676
- const multiSourceType = nodeTypeSelections.multiSource.selectedNodeType ?? stagingType;
1677
- const aggregationType = nodeTypeSelections.aggregation.selectedNodeType ?? stagingType;
1678
- const typeMap = {
1679
- staging: stagingType,
1680
- multiSource: multiSourceType,
1681
- aggregation: aggregationType,
1682
- };
1683
- // Build per-CTE instructions
1684
- const cteInstructions = [];
1685
- for (const cte of ctes) {
1686
- const pattern = classifyCtePattern(cte);
1687
- const nodeType = typeMap[pattern];
1688
- cteInstructions.push(buildCteNodeInstruction(cte, nodeType));
1689
- }
1690
- // Detect if any CTE references another CTE (pipeline dependency)
1691
- const cteNameSet = new Set(ctes.map((c) => c.name));
1692
- const cteDependencies = [];
1693
- for (const cte of ctes) {
1694
- const deps = ctes
1695
- .filter((other) => other.name !== cte.name && cte.body.toUpperCase().includes(other.name))
1696
- .map((other) => other.name);
1697
- if (deps.length > 0) {
1698
- cteDependencies.push(`${cte.name} depends on: ${deps.join(", ")}`);
1699
- }
1700
- }
1701
- // Detect the final SELECT after all CTEs
1702
- const finalSelectNote = extractFinalSelectFromCteQuery(params.sql ?? "", cteNameSet);
1703
- const allTransformCount = ctes.reduce((sum, cte) => sum + cte.columns.filter((c) => c.isTransform).length, 0);
1704
- const allFilterCount = ctes.filter((c) => c.whereClause).length;
1705
- // Build structured per-CTE summary for easy agent consumption
1706
- // Includes columnsParam / groupByColumnsParam / aggregatesParam for single-call creation
1707
- const cteNodeSummary = ctes.map((cte) => {
1708
- const pattern = classifyCtePattern(cte);
1709
- const nodeType = typeMap[pattern];
1710
- const transforms = cte.columns.filter((c) => c.isTransform);
1711
- const summary = {
1712
- name: cte.name,
1713
- nodeType,
1714
- pattern,
1715
- sourceTable: cte.sourceTable,
1716
- columnCount: cte.columns.length,
1717
- transforms: transforms.map((c) => ({ column: c.outputName, expression: c.expression })),
1718
- passthroughColumns: cte.columns.filter((c) => !c.isTransform).map((c) => c.outputName),
1719
- whereFilter: cte.whereClause,
1720
- hasGroupBy: cte.hasGroupBy,
1721
- hasJoin: cte.hasJoin,
1722
- dependsOn: ctes
1723
- .filter((other) => other.name !== cte.name && cte.body.toUpperCase().includes(other.name))
1724
- .map((other) => other.name),
1725
- };
1726
- // Add structured params for single-call creation
1727
- if (cte.hasGroupBy && cte.columns.length > 0) {
1728
- // GROUP BY CTEs: split columns into group-by (passthrough) and aggregates (transforms with agg functions)
1729
- const groupByCols = [];
1730
- const aggCols = [];
1731
- for (const col of cte.columns) {
1732
- const aggMatch = col.expression.match(/^(\w+)\s*\((.*)\)$/s);
1733
- if (col.isTransform && aggMatch && isAggregateFn(aggMatch[1])) {
1734
- aggCols.push({
1735
- name: col.outputName,
1736
- function: aggMatch[1].toUpperCase(),
1737
- expression: aggMatch[2].trim(),
1738
- });
1739
- }
1740
- else {
1741
- // Non-aggregate columns in a GROUP BY CTE are the GROUP BY dimensions
1742
- groupByCols.push(col.expression);
1743
- }
1744
- }
1745
- if (groupByCols.length > 0 && aggCols.length > 0) {
1746
- summary.groupByColumnsParam = groupByCols;
1747
- summary.aggregatesParam = aggCols;
1748
- }
1749
- }
1750
- else if (cte.columns.length > 0 && !cte.hasJoin) {
1751
- // Only set columnsParam for single-source CTEs where expressions can be passed directly.
1752
- // Multi-source JOIN CTEs have SQL aliases (soh.*, sl.*) that don't map to Coalesce node names —
1753
- // the agent must translate these to "NODE_NAME"."COLUMN" format.
1754
- summary.columnsParam = cte.columns.map((c) => ({
1755
- name: c.outputName,
1756
- ...(c.isTransform ? { transform: c.expression } : {}),
1757
- }));
1758
- }
1759
- return summary;
1760
- });
1761
- return {
1762
- version: 1,
1763
- intent: "sql",
1764
- status: "needs_clarification",
1765
- STOP_AND_CONFIRM: `STOP. Present the pipeline summary to the user in a table format and ask for confirmation BEFORE creating any nodes. For EACH node in cteNodeSummary, display: name, the EXACT nodeType string (e.g. "Coalesce-Base-Node-Types:::Stage"), pattern, transforms, and whereFilter. Use the cteNodeSummary array — do NOT paraphrase or simplify the nodeType values. Do NOT proceed until the user explicitly approves.`,
1766
- workspaceID: params.workspaceID,
1767
- platform: null,
1768
- goal: params.goal ?? null,
1769
- sql: params.sql ?? null,
1770
- nodes: [],
1771
- cteNodeSummary,
1772
- assumptions: [
1773
- `Parsed ${ctes.length} CTEs with ${allTransformCount} column transforms and ${allFilterCount} WHERE filters.`,
1774
- `Staging and aggregation CTEs: 1 call per node. Multi-source JOIN CTEs: 2 calls (create + apply_join_condition).`,
1775
- ],
1776
- openQuestions: [
1777
- `STOP: Present this pipeline summary to the user and ask "Should I proceed with creating these ${ctes.length} nodes?" Do NOT create nodes until the user confirms.`,
1778
- `This SQL uses CTEs (WITH ... AS), which Coalesce does not support as a single node. Each CTE must become a separate node.`,
1779
- `--- PER-CTE INSTRUCTIONS ---\n\n${cteInstructions.join("\n\n")}`,
1780
- ...(cteDependencies.length > 0
1781
- ? [`CTE dependencies (create in order):\n${cteDependencies.map((d) => ` - ${d}`).join("\n")}`]
1782
- : []),
1783
- ...(finalSelectNote ? [finalSelectNote] : []),
1784
- `Node type guidance (do NOT use list_workspace_node_types):\n` +
1785
- `- Staging CTEs (single-source): nodeType "${stagingType}"\n` +
1786
- `- Join/transform CTEs (multi-source): nodeType "${multiSourceType}"\n` +
1787
- `- Aggregation CTEs (GROUP BY): nodeType "${aggregationType}"`,
1788
- `Workflow per CTE:\n` +
1789
- `create_workspace_node_from_predecessor accepts columns, whereCondition, groupByColumns, and aggregates directly:\n` +
1790
- `- For staging/transform CTEs (single-source): 1 call — pass columns (from cteNodeSummary.columnsParam) + whereCondition\n` +
1791
- `- For GROUP BY CTEs: 1 call — pass groupByColumns (from cteNodeSummary.groupByColumnsParam) + aggregates (from cteNodeSummary.aggregatesParam)\n` +
1792
- `- For multi-source JOIN CTEs: 2 calls — first create_workspace_node_from_predecessor with columns + whereCondition, then apply_join_condition to set up FROM/JOIN/ON\n` +
1793
- `- Do NOT construct {{ ref() }} syntax — the FROM clause and joins are auto-generated\n` +
1794
- `- Pass repoPath to each call for automatic config completion`,
1795
- ],
1796
- warnings: [
1797
- `SQL contains ${ctes.length} CTEs: ${ctes.map((c) => c.name).join(", ")}. Each must be a separate Coalesce node.` +
1798
- (allTransformCount > 0 ? ` ${allTransformCount} column transforms detected.` : ``),
1799
- ],
1800
- supportedNodeTypes: nodeTypeSelections.staging.supportedNodeTypes.length > 0
1801
- ? nodeTypeSelections.staging.supportedNodeTypes
1802
- : [stagingType],
1803
- nodeTypeSelection: nodeTypeSelections.staging,
1804
- };
1805
- }
1806
- /**
1807
- * Extract information about the final SELECT after all CTEs.
1808
- */
1809
- /**
1810
- * Escape a string for use in a RegExp constructor, ensuring special characters
1811
- * like `$` in CTE names are treated as literals.
1812
- */
1813
- function escapeRegExp(value) {
1814
- return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
1815
- }
1816
- function extractFinalSelectFromCteQuery(sql, cteNames) {
1817
- // Find the last top-level SELECT using quoting-aware scanning.
1818
- const trimmed = sql.trim();
1819
- let lastSelectIdx = -1;
1820
- scanTopLevel(trimmed, (_char, index, parenDepth) => {
1821
- if (parenDepth === 0 &&
1822
- trimmed.slice(index, index + 6).toUpperCase() === "SELECT" &&
1823
- !isIdentifierChar(trimmed[index - 1]) &&
1824
- !isIdentifierChar(trimmed[index + 6])) {
1825
- lastSelectIdx = index;
1826
- }
1827
- return true;
1828
- });
1829
- if (lastSelectIdx < 0)
1830
- return null;
1831
- const finalSelect = trimmed.slice(lastSelectIdx).trim();
1832
- // Check which CTEs the final SELECT references (escape names for safe regex)
1833
- const referencedCtes = [...cteNames].filter((name) => new RegExp(`\\b${escapeRegExp(name)}\\b`, "i").test(finalSelect));
1834
- if (referencedCtes.length === 0)
1835
- return null;
1836
- // Check if the final SELECT is just `SELECT * FROM single_cte` — redundant
1837
- const selectStarFromOne = referencedCtes.length === 1 &&
1838
- /^SELECT\s+\*\s+FROM\s+\w+\s*;?\s*$/i.test(finalSelect);
1839
- if (selectStarFromOne) {
1840
- return (`Final SELECT is just \`SELECT * FROM ${referencedCtes[0]}\` — this is redundant. ` +
1841
- `The last CTE node (${referencedCtes[0]}) already represents the final output. ` +
1842
- `Do NOT create an additional node for this.`);
1843
- }
1844
- return (`Final output query references: ${referencedCtes.join(", ")}. ` +
1845
- `Create a final node with these as predecessors. ` +
1846
- `The final SELECT is:\n${finalSelect.slice(0, 500)}${finalSelect.length > 500 ? "..." : ""}`);
1847
- }
1
+ // Re-exports for backward compatibility
2
+ export { PipelinePlanSchema, DEFAULT_STAGE_CONFIG } from "./planning-types.js";
3
+ export { normalizeSqlIdentifier, deepClone, normalizeWhitespace, buildSourceDependencyKey, getUniqueSourceDependencies, parseSqlSourceRefs, parseSqlSelectItems, extractCtes, escapeRegExp } from "./sql-parsing.js";
4
+ export { getColumnNamesFromNode, getNodeColumnArray, getColumnSourceNodeIDs, findMatchingBaseColumn, renameSourceMappingEntries, buildStageSourceMappingFromPlan } from "./column-helpers.js";
5
+ export { getWorkspaceNodeTypeInventory } from "./workspace-resolution.js";
6
+ import { extractCtes, parseSqlSourceRefs, parseSqlSelectItems, buildCtePlan, deepClone, } from "./sql-parsing.js";
7
+ import { resolveSqlRefsToWorkspaceNodes, getSourceNodesByID, buildSelectItemsFromSourceNode, buildDefaultNodeName, buildDefaultNodePrefix, buildPlanFromSql, applyWorkspaceNodeTypeValidation, getWorkspaceNodeTypeInventory as getInventory, } from "./workspace-resolution.js";
8
+ import { selectPipelineNodeType } from "./node-type-selection.js";
9
+ import { uniqueInOrder } from "../../utils.js";
1848
10
  export async function planPipeline(client, params) {
1849
11
  const location = {
1850
12
  ...(params.locationName ? { locationName: params.locationName } : {}),
1851
13
  ...(params.database ? { database: params.database } : {}),
1852
14
  ...(params.schema ? { schema: params.schema } : {}),
1853
15
  };
1854
- const workspaceNodeTypeInventory = await getWorkspaceNodeTypeInventory(client, params.workspaceID);
16
+ const workspaceNodeTypeInventory = await getInventory(client, params.workspaceID);
1855
17
  if (params.sql && params.sql.trim().length > 0) {
1856
18
  // Detect CTEs — Coalesce does not support CTEs. Each CTE should be a separate node.
1857
19
  const ctes = extractCtes(params.sql);
@@ -2122,112 +284,4 @@ export async function planPipeline(client, params) {
2122
284
  applyWorkspaceNodeTypeValidation(plan, workspaceNodeTypeInventory, params.targetNodeType);
2123
285
  return plan;
2124
286
  }
2125
- export function getNodeColumnArray(node) {
2126
- const metadata = isPlainObject(node.metadata) ? node.metadata : undefined;
2127
- if (!Array.isArray(metadata?.columns)) {
2128
- return [];
2129
- }
2130
- return metadata.columns.filter(isPlainObject);
2131
- }
2132
- export function getColumnSourceNodeIDs(column) {
2133
- if (!Array.isArray(column.sources)) {
2134
- return [];
2135
- }
2136
- const ids = new Set();
2137
- for (const source of column.sources) {
2138
- if (!isPlainObject(source) || !Array.isArray(source.columnReferences)) {
2139
- continue;
2140
- }
2141
- for (const ref of source.columnReferences) {
2142
- if (isPlainObject(ref) && typeof ref.nodeID === "string") {
2143
- ids.add(ref.nodeID);
2144
- }
2145
- }
2146
- }
2147
- return Array.from(ids);
2148
- }
2149
- export function findMatchingBaseColumn(node, selectItem) {
2150
- const normalizedTargetName = normalizeSqlIdentifier(selectItem.sourceColumnName ?? "");
2151
- for (const column of getNodeColumnArray(node)) {
2152
- if (typeof column.name !== "string" ||
2153
- normalizeSqlIdentifier(column.name) !== normalizedTargetName) {
2154
- continue;
2155
- }
2156
- const sourceNodeIDs = getColumnSourceNodeIDs(column);
2157
- if (selectItem.sourceNodeID && sourceNodeIDs.includes(selectItem.sourceNodeID)) {
2158
- return deepClone(column);
2159
- }
2160
- if (!selectItem.sourceNodeID) {
2161
- return deepClone(column);
2162
- }
2163
- }
2164
- return null;
2165
- }
2166
- export function renameSourceMappingEntries(node, newName) {
2167
- const metadata = isPlainObject(node.metadata) ? node.metadata : undefined;
2168
- if (!metadata || !Array.isArray(metadata.sourceMapping)) {
2169
- return node;
2170
- }
2171
- const previousName = typeof node.name === "string" && node.name.trim().length > 0 ? node.name : null;
2172
- const updateSingleUnnamedMapping = previousName === null && metadata.sourceMapping.length === 1;
2173
- return {
2174
- ...node,
2175
- metadata: {
2176
- ...metadata,
2177
- sourceMapping: metadata.sourceMapping.map((entry) => {
2178
- if (!isPlainObject(entry)) {
2179
- return entry;
2180
- }
2181
- const shouldRename = (previousName !== null && entry.name === previousName) ||
2182
- updateSingleUnnamedMapping;
2183
- if (!shouldRename) {
2184
- return entry;
2185
- }
2186
- return {
2187
- ...entry,
2188
- name: newName,
2189
- };
2190
- }),
2191
- },
2192
- };
2193
- }
2194
- export function buildStageSourceMappingFromPlan(currentNode, nodePlan) {
2195
- const metadata = isPlainObject(currentNode.metadata) ? currentNode.metadata : undefined;
2196
- const existingEntry = metadata && Array.isArray(metadata.sourceMapping)
2197
- ? metadata.sourceMapping.find(isPlainObject)
2198
- : undefined;
2199
- const aliases = {};
2200
- for (const ref of nodePlan.sourceRefs) {
2201
- if (!ref.nodeID) {
2202
- continue;
2203
- }
2204
- const alias = ref.alias ?? ref.nodeName;
2205
- if (nodePlan.sourceRefs.length > 1 || ref.alias) {
2206
- aliases[alias] = ref.nodeID;
2207
- }
2208
- }
2209
- return [
2210
- {
2211
- ...(isPlainObject(existingEntry) ? existingEntry : {}),
2212
- aliases,
2213
- customSQL: {
2214
- ...(isPlainObject(existingEntry) && isPlainObject(existingEntry.customSQL)
2215
- ? existingEntry.customSQL
2216
- : {}),
2217
- customSQL: "",
2218
- },
2219
- dependencies: getUniqueSourceDependencies(nodePlan.sourceRefs),
2220
- join: {
2221
- ...(isPlainObject(existingEntry) && isPlainObject(existingEntry.join)
2222
- ? existingEntry.join
2223
- : {}),
2224
- joinCondition: nodePlan.joinCondition ?? "",
2225
- },
2226
- name: nodePlan.name,
2227
- noLinkRefs: isPlainObject(existingEntry) && Array.isArray(existingEntry.noLinkRefs)
2228
- ? existingEntry.noLinkRefs
2229
- : [],
2230
- },
2231
- ];
2232
- }
2233
287
  //# sourceMappingURL=planning.js.map