coalesce-transform-mcp 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +304 -0
- package/dist/cache-dir.d.ts +26 -0
- package/dist/cache-dir.js +106 -0
- package/dist/client.d.ts +25 -0
- package/dist/client.js +212 -0
- package/dist/coalesce/api/environments.d.ts +20 -0
- package/dist/coalesce/api/environments.js +15 -0
- package/dist/coalesce/api/git-accounts.d.ts +21 -0
- package/dist/coalesce/api/git-accounts.js +21 -0
- package/dist/coalesce/api/jobs.d.ts +25 -0
- package/dist/coalesce/api/jobs.js +21 -0
- package/dist/coalesce/api/nodes.d.ts +29 -0
- package/dist/coalesce/api/nodes.js +33 -0
- package/dist/coalesce/api/projects.d.ts +22 -0
- package/dist/coalesce/api/projects.js +25 -0
- package/dist/coalesce/api/runs.d.ts +19 -0
- package/dist/coalesce/api/runs.js +34 -0
- package/dist/coalesce/api/subgraphs.d.ts +20 -0
- package/dist/coalesce/api/subgraphs.js +17 -0
- package/dist/coalesce/api/users.d.ts +30 -0
- package/dist/coalesce/api/users.js +31 -0
- package/dist/coalesce/types.d.ts +298 -0
- package/dist/coalesce/types.js +746 -0
- package/dist/generated/.gitkeep +0 -0
- package/dist/generated/node-type-corpus.json +42656 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +10 -0
- package/dist/mcp/cache.d.ts +3 -0
- package/dist/mcp/cache.js +137 -0
- package/dist/mcp/environments.d.ts +3 -0
- package/dist/mcp/environments.js +61 -0
- package/dist/mcp/git-accounts.d.ts +3 -0
- package/dist/mcp/git-accounts.js +70 -0
- package/dist/mcp/jobs.d.ts +3 -0
- package/dist/mcp/jobs.js +77 -0
- package/dist/mcp/node-type-corpus.d.ts +3 -0
- package/dist/mcp/node-type-corpus.js +173 -0
- package/dist/mcp/nodes.d.ts +3 -0
- package/dist/mcp/nodes.js +341 -0
- package/dist/mcp/pipelines.d.ts +3 -0
- package/dist/mcp/pipelines.js +342 -0
- package/dist/mcp/projects.d.ts +3 -0
- package/dist/mcp/projects.js +70 -0
- package/dist/mcp/repo-node-types.d.ts +135 -0
- package/dist/mcp/repo-node-types.js +387 -0
- package/dist/mcp/runs.d.ts +3 -0
- package/dist/mcp/runs.js +92 -0
- package/dist/mcp/subgraphs.d.ts +3 -0
- package/dist/mcp/subgraphs.js +60 -0
- package/dist/mcp/users.d.ts +3 -0
- package/dist/mcp/users.js +107 -0
- package/dist/prompts/index.d.ts +2 -0
- package/dist/prompts/index.js +58 -0
- package/dist/resources/context/aggregation-patterns.md +145 -0
- package/dist/resources/context/data-engineering-principles.md +183 -0
- package/dist/resources/context/hydrated-metadata.md +92 -0
- package/dist/resources/context/id-discovery.md +64 -0
- package/dist/resources/context/intelligent-node-configuration.md +162 -0
- package/dist/resources/context/node-creation-decision-tree.md +156 -0
- package/dist/resources/context/node-operations.md +316 -0
- package/dist/resources/context/node-payloads.md +114 -0
- package/dist/resources/context/node-type-corpus.md +166 -0
- package/dist/resources/context/node-type-selection-guide.md +96 -0
- package/dist/resources/context/overview.md +135 -0
- package/dist/resources/context/pipeline-workflows.md +355 -0
- package/dist/resources/context/run-operations.md +55 -0
- package/dist/resources/context/sql-bigquery.md +41 -0
- package/dist/resources/context/sql-databricks.md +40 -0
- package/dist/resources/context/sql-platform-selection.md +70 -0
- package/dist/resources/context/sql-snowflake.md +43 -0
- package/dist/resources/context/storage-mappings.md +49 -0
- package/dist/resources/context/tool-usage.md +98 -0
- package/dist/resources/index.d.ts +5 -0
- package/dist/resources/index.js +254 -0
- package/dist/schemas/node-payloads.d.ts +5019 -0
- package/dist/schemas/node-payloads.js +147 -0
- package/dist/server.d.ts +7 -0
- package/dist/server.js +63 -0
- package/dist/services/cache/snapshots.d.ts +108 -0
- package/dist/services/cache/snapshots.js +275 -0
- package/dist/services/config/context-analyzer.d.ts +14 -0
- package/dist/services/config/context-analyzer.js +76 -0
- package/dist/services/config/field-classifier.d.ts +23 -0
- package/dist/services/config/field-classifier.js +47 -0
- package/dist/services/config/intelligent.d.ts +55 -0
- package/dist/services/config/intelligent.js +306 -0
- package/dist/services/config/rules.d.ts +6 -0
- package/dist/services/config/rules.js +44 -0
- package/dist/services/config/schema-resolver.d.ts +18 -0
- package/dist/services/config/schema-resolver.js +80 -0
- package/dist/services/corpus/loader.d.ts +56 -0
- package/dist/services/corpus/loader.js +25 -0
- package/dist/services/corpus/search.d.ts +49 -0
- package/dist/services/corpus/search.js +69 -0
- package/dist/services/corpus/templates.d.ts +4 -0
- package/dist/services/corpus/templates.js +11 -0
- package/dist/services/pipelines/execution.d.ts +20 -0
- package/dist/services/pipelines/execution.js +290 -0
- package/dist/services/pipelines/node-type-intent.d.ts +96 -0
- package/dist/services/pipelines/node-type-intent.js +356 -0
- package/dist/services/pipelines/node-type-selection.d.ts +66 -0
- package/dist/services/pipelines/node-type-selection.js +758 -0
- package/dist/services/pipelines/planning.d.ts +543 -0
- package/dist/services/pipelines/planning.js +1839 -0
- package/dist/services/policies/sql-override.d.ts +7 -0
- package/dist/services/policies/sql-override.js +109 -0
- package/dist/services/repo/operations.d.ts +6 -0
- package/dist/services/repo/operations.js +10 -0
- package/dist/services/repo/parser.d.ts +70 -0
- package/dist/services/repo/parser.js +365 -0
- package/dist/services/repo/path.d.ts +2 -0
- package/dist/services/repo/path.js +58 -0
- package/dist/services/templates/nodes.d.ts +50 -0
- package/dist/services/templates/nodes.js +336 -0
- package/dist/services/workspace/analysis.d.ts +56 -0
- package/dist/services/workspace/analysis.js +151 -0
- package/dist/services/workspace/mutations.d.ts +150 -0
- package/dist/services/workspace/mutations.js +1718 -0
- package/dist/utils.d.ts +5 -0
- package/dist/utils.js +7 -0
- package/dist/workflows/get-environment-overview.d.ts +9 -0
- package/dist/workflows/get-environment-overview.js +23 -0
- package/dist/workflows/get-run-details.d.ts +10 -0
- package/dist/workflows/get-run-details.js +28 -0
- package/dist/workflows/progress.d.ts +20 -0
- package/dist/workflows/progress.js +54 -0
- package/dist/workflows/retry-and-wait.d.ts +13 -0
- package/dist/workflows/retry-and-wait.js +139 -0
- package/dist/workflows/run-and-wait.d.ts +13 -0
- package/dist/workflows/run-and-wait.js +141 -0
- package/dist/workflows/run-status.d.ts +10 -0
- package/dist/workflows/run-status.js +27 -0
- package/package.json +34 -0
|
@@ -0,0 +1,356 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Node Type Intent Corpus
|
|
3
|
+
*
|
|
4
|
+
* Authoritative reference for what each Coalesce node type family is designed for,
|
|
5
|
+
* derived from official Coalesce node type package READMEs.
|
|
6
|
+
*
|
|
7
|
+
* Used by the scoring logic to make informed decisions about which node type
|
|
8
|
+
* fits a given use case — instead of relying on name heuristics alone.
|
|
9
|
+
*/
|
|
10
|
+
/**
|
|
11
|
+
* Intent corpus indexed by family.
|
|
12
|
+
*
|
|
13
|
+
* Source: Coalesce node type package READMEs
|
|
14
|
+
* - Coalesce-Base-Node-Types
|
|
15
|
+
* - Coalesce-Base-Node-Types---Advanced-Deploy
|
|
16
|
+
* - Dynamic-Table-Nodes
|
|
17
|
+
* - Incremental-Nodes
|
|
18
|
+
* - Materialized-View-Node
|
|
19
|
+
* - functional-node-types
|
|
20
|
+
* - create-alter-node-types
|
|
21
|
+
*/
|
|
22
|
+
export const NODE_TYPE_INTENT = {
|
|
23
|
+
stage: {
|
|
24
|
+
family: "stage",
|
|
25
|
+
purpose: "General-purpose intermediate staging for raw data processing before transformation. The default workhorse node type.",
|
|
26
|
+
materialization: ["table", "view"],
|
|
27
|
+
useWhen: [
|
|
28
|
+
"Single-source SELECT/WHERE transforms",
|
|
29
|
+
"Column renames, type casts, filters",
|
|
30
|
+
"Landing raw data into a staging layer",
|
|
31
|
+
"Any general-purpose transformation without special requirements",
|
|
32
|
+
"GROUP BY aggregations (Stage/Work handle these natively)",
|
|
33
|
+
"Multi-source joins (Stage/Work both support joins via sourceMapping)",
|
|
34
|
+
],
|
|
35
|
+
doNotUseWhen: [
|
|
36
|
+
"You need change tracking / CDC (use Persistent Stage)",
|
|
37
|
+
"You need SCD Type 1/2 with business keys (use Dimension)",
|
|
38
|
+
"You need no materialization at all (use View)",
|
|
39
|
+
],
|
|
40
|
+
requiresSemanticConfig: false,
|
|
41
|
+
supportsMultiSource: true,
|
|
42
|
+
strongSignals: /\bstag(e|ing)\b|\bstg[_-]|\bstage[_-]|\bbronze\b|\blanding\b|\bsilver\b/u,
|
|
43
|
+
antiSignals: null,
|
|
44
|
+
},
|
|
45
|
+
work: {
|
|
46
|
+
family: "work",
|
|
47
|
+
purpose: "Intermediary processing node for joins, transforms, and multi-source operations. Interchangeable with Stage for most patterns.",
|
|
48
|
+
materialization: ["table", "view"],
|
|
49
|
+
useWhen: [
|
|
50
|
+
"Multi-source joins (INNER, LEFT, RIGHT, FULL OUTER)",
|
|
51
|
+
"Intermediate transforms between staging and mart layers",
|
|
52
|
+
"UNION / UNION ALL operations",
|
|
53
|
+
"Any general-purpose transformation",
|
|
54
|
+
"GROUP BY aggregations",
|
|
55
|
+
],
|
|
56
|
+
doNotUseWhen: [
|
|
57
|
+
"You need change tracking / CDC (use Persistent Stage)",
|
|
58
|
+
"You need SCD Type 1/2 with business keys (use Dimension)",
|
|
59
|
+
],
|
|
60
|
+
requiresSemanticConfig: false,
|
|
61
|
+
supportsMultiSource: true,
|
|
62
|
+
strongSignals: /\bwork\b|\bwork[_-]|\bwrk[_-]|\bintermediate\b|\btransform\b/u,
|
|
63
|
+
antiSignals: null,
|
|
64
|
+
},
|
|
65
|
+
view: {
|
|
66
|
+
family: "view",
|
|
67
|
+
purpose: "Virtual table with no physical materialization. Query recalculates on every access. Good for lightweight transforms or cost savings.",
|
|
68
|
+
materialization: ["view"],
|
|
69
|
+
useWhen: [
|
|
70
|
+
"No materialization needed (virtual table)",
|
|
71
|
+
"Lightweight transforms or simple projections",
|
|
72
|
+
"Secure views for data access control",
|
|
73
|
+
"Cost optimization when re-computation is acceptable",
|
|
74
|
+
],
|
|
75
|
+
doNotUseWhen: [
|
|
76
|
+
"Performance-critical queries that run frequently",
|
|
77
|
+
"Large aggregations that are expensive to recompute",
|
|
78
|
+
"You need persistent storage for downstream consumers",
|
|
79
|
+
],
|
|
80
|
+
requiresSemanticConfig: false,
|
|
81
|
+
supportsMultiSource: true,
|
|
82
|
+
strongSignals: /\bview\b|\bview[_-]|\bvw[_-]|\bsecure\s*view\b/u,
|
|
83
|
+
antiSignals: null,
|
|
84
|
+
},
|
|
85
|
+
"persistent-stage": {
|
|
86
|
+
family: "persistent-stage",
|
|
87
|
+
purpose: "Maintain data persistence across execution cycles with change tracking. Supports business keys and Type 1/Type 2 CDC.",
|
|
88
|
+
materialization: ["table"],
|
|
89
|
+
useWhen: [
|
|
90
|
+
"Change data capture (CDC) is required",
|
|
91
|
+
"Track historical changes using business keys",
|
|
92
|
+
"Type 1 or Type 2 slowly changing data",
|
|
93
|
+
"Data persistence across multiple execution cycles",
|
|
94
|
+
],
|
|
95
|
+
doNotUseWhen: [
|
|
96
|
+
"Simple staging without change tracking — use Stage/Work",
|
|
97
|
+
"General-purpose transforms — use Stage/Work",
|
|
98
|
+
"No business key is defined",
|
|
99
|
+
"Batch ETL where you just need TRUNCATE+INSERT",
|
|
100
|
+
],
|
|
101
|
+
requiresSemanticConfig: true,
|
|
102
|
+
supportsMultiSource: true,
|
|
103
|
+
strongSignals: /\bpersistent\s*stage\b|\bcdc\b|\bchange\s*track/u,
|
|
104
|
+
antiSignals: /\bstaging\s+layer\b|\bgeneral\b|\bsimple\b/u,
|
|
105
|
+
},
|
|
106
|
+
dimension: {
|
|
107
|
+
family: "dimension",
|
|
108
|
+
purpose: "Store descriptive business context (customers, products, locations). Requires business keys. Supports SCD Type 1/2, zero key records.",
|
|
109
|
+
materialization: ["table", "view"],
|
|
110
|
+
useWhen: [
|
|
111
|
+
"Building a dimensional model (star/snowflake schema)",
|
|
112
|
+
"Descriptive entity tables (customers, products, locations, employees)",
|
|
113
|
+
"SCD Type 1 or Type 2 tracking on business entities",
|
|
114
|
+
"Node name explicitly starts with dim_ or dimension_",
|
|
115
|
+
],
|
|
116
|
+
doNotUseWhen: [
|
|
117
|
+
"Just doing a GROUP BY — that's a transform, use Stage/Work",
|
|
118
|
+
"No business key is defined",
|
|
119
|
+
"Generic data processing or staging",
|
|
120
|
+
"SQL has aggregation functions but no dimensional modeling intent",
|
|
121
|
+
"CTE decomposition — CTEs become Stage/Work nodes, not Dimensions",
|
|
122
|
+
],
|
|
123
|
+
requiresSemanticConfig: true,
|
|
124
|
+
supportsMultiSource: true,
|
|
125
|
+
strongSignals: /\bdimension\b|\bdimension[_-]|\bdim[_-]|\bscd\b|\bslowly\s*changing/u,
|
|
126
|
+
antiSignals: /\bstaging\b|\btransform\b|\bintermediate\b/u,
|
|
127
|
+
},
|
|
128
|
+
fact: {
|
|
129
|
+
family: "fact",
|
|
130
|
+
purpose: "Aggregate measures and numerical business data (sales, costs, profits). Requires business keys. Part of dimensional modeling.",
|
|
131
|
+
materialization: ["table", "view"],
|
|
132
|
+
useWhen: [
|
|
133
|
+
"Building a fact table in a dimensional model",
|
|
134
|
+
"Storing business measures (revenue, quantity, cost)",
|
|
135
|
+
"Node name explicitly starts with fct_ or fact_",
|
|
136
|
+
"Grain-level transactional data with foreign keys to dimensions",
|
|
137
|
+
],
|
|
138
|
+
doNotUseWhen: [
|
|
139
|
+
"Just doing a GROUP BY or SUM — that's a transform, use Stage/Work",
|
|
140
|
+
"No business key or grain is defined",
|
|
141
|
+
"Generic aggregation or intermediate processing",
|
|
142
|
+
"CTE decomposition — aggregation CTEs become Stage/Work, not Facts",
|
|
143
|
+
],
|
|
144
|
+
requiresSemanticConfig: true,
|
|
145
|
+
supportsMultiSource: true,
|
|
146
|
+
strongSignals: /\bfact\b|\bfact[_-]|\bfct[_-]|\bgrain\b|\bmeasure\b.*\bdimensional\b/u,
|
|
147
|
+
antiSignals: /\bstaging\b|\btransform\b|\bintermediate\b/u,
|
|
148
|
+
},
|
|
149
|
+
hub: {
|
|
150
|
+
family: "hub",
|
|
151
|
+
purpose: "Data Vault hub entity. Stores unique business keys for core business concepts.",
|
|
152
|
+
materialization: ["table"],
|
|
153
|
+
useWhen: [
|
|
154
|
+
"Building a Data Vault model",
|
|
155
|
+
"Hub entity with unique business keys",
|
|
156
|
+
],
|
|
157
|
+
doNotUseWhen: [
|
|
158
|
+
"Not building a Data Vault",
|
|
159
|
+
"General-purpose transforms",
|
|
160
|
+
],
|
|
161
|
+
requiresSemanticConfig: true,
|
|
162
|
+
supportsMultiSource: false,
|
|
163
|
+
strongSignals: /\bhub\b|\bhub[_-]|\bdata\s*vault\b/u,
|
|
164
|
+
antiSignals: null,
|
|
165
|
+
},
|
|
166
|
+
satellite: {
|
|
167
|
+
family: "satellite",
|
|
168
|
+
purpose: "Data Vault satellite. Stores descriptive attributes and change history for a hub.",
|
|
169
|
+
materialization: ["table"],
|
|
170
|
+
useWhen: [
|
|
171
|
+
"Building a Data Vault model",
|
|
172
|
+
"Satellite with descriptive attributes linked to a hub",
|
|
173
|
+
],
|
|
174
|
+
doNotUseWhen: [
|
|
175
|
+
"Not building a Data Vault",
|
|
176
|
+
"General-purpose transforms",
|
|
177
|
+
],
|
|
178
|
+
requiresSemanticConfig: true,
|
|
179
|
+
supportsMultiSource: false,
|
|
180
|
+
strongSignals: /\bsatellite\b|\bsat[_-]/u,
|
|
181
|
+
antiSignals: null,
|
|
182
|
+
},
|
|
183
|
+
link: {
|
|
184
|
+
family: "link",
|
|
185
|
+
purpose: "Data Vault link. Stores relationships between two or more hubs.",
|
|
186
|
+
materialization: ["table"],
|
|
187
|
+
useWhen: [
|
|
188
|
+
"Building a Data Vault model",
|
|
189
|
+
"Link entity connecting multiple hubs",
|
|
190
|
+
],
|
|
191
|
+
doNotUseWhen: [
|
|
192
|
+
"Not building a Data Vault",
|
|
193
|
+
"General-purpose joins (use Work/Stage)",
|
|
194
|
+
],
|
|
195
|
+
requiresSemanticConfig: true,
|
|
196
|
+
supportsMultiSource: true,
|
|
197
|
+
strongSignals: /\blink[_-]|\bdata\s*vault.*link/u,
|
|
198
|
+
antiSignals: null,
|
|
199
|
+
},
|
|
200
|
+
unknown: {
|
|
201
|
+
family: "unknown",
|
|
202
|
+
purpose: "Unrecognized node type family. May be a custom or specialized type.",
|
|
203
|
+
materialization: ["table", "view"],
|
|
204
|
+
useWhen: [],
|
|
205
|
+
doNotUseWhen: [
|
|
206
|
+
"A known family matches the use case",
|
|
207
|
+
],
|
|
208
|
+
requiresSemanticConfig: false,
|
|
209
|
+
supportsMultiSource: false,
|
|
210
|
+
strongSignals: /(?!)/u, // never matches
|
|
211
|
+
antiSignals: null,
|
|
212
|
+
},
|
|
213
|
+
};
|
|
214
|
+
/**
|
|
215
|
+
* Specialized materialization patterns that cross-cut families.
|
|
216
|
+
*
|
|
217
|
+
* These detect node types with specialized materialization behavior
|
|
218
|
+
* (Dynamic Tables, Incremental Loads, Materialized Views, etc.)
|
|
219
|
+
* and penalize them when the context doesn't explicitly call for that pattern.
|
|
220
|
+
*
|
|
221
|
+
* Source: Coalesce node type package READMEs
|
|
222
|
+
*/
|
|
223
|
+
export const SPECIALIZED_PATTERNS = [
|
|
224
|
+
{
|
|
225
|
+
name: "Dynamic Table",
|
|
226
|
+
detect: /dynamic\s*table|dt[_\s-]/u,
|
|
227
|
+
contextRequired: /dynamic\s*table|auto[\s-]*refresh|continuous[\s-]*refresh|near[\s-]*real[\s-]*time|low[\s-]*latency/u,
|
|
228
|
+
penalty: 50,
|
|
229
|
+
purpose: "Snowflake Dynamic Tables with declarative orchestration and automatic lag-based refresh. Snowflake manages the refresh DAG — no manual scheduling needed.",
|
|
230
|
+
doNotUseWhen: [
|
|
231
|
+
"Batch ETL — scheduled runs where you control when data refreshes",
|
|
232
|
+
"Cost-sensitive workloads — DTs incur continuous compute for refresh monitoring",
|
|
233
|
+
"One-time or ad-hoc transforms — no ongoing refresh needed",
|
|
234
|
+
"Standard staging/transform pipelines — use Stage/Work with table materialization",
|
|
235
|
+
"CTE decomposition — CTEs are batch patterns, not streaming",
|
|
236
|
+
],
|
|
237
|
+
},
|
|
238
|
+
{
|
|
239
|
+
name: "Incremental Load",
|
|
240
|
+
detect: /incremental\s*load|looped\s*load|grouped\s*incremental/u,
|
|
241
|
+
contextRequired: /incremental|high[\s-]*water[\s-]*mark|append[\s-]*only|delta[\s-]*load/u,
|
|
242
|
+
penalty: 50,
|
|
243
|
+
purpose: "Process only new/modified records by comparing against a persistent table using high-water mark tracking. For large tables where full refresh is too expensive.",
|
|
244
|
+
doNotUseWhen: [
|
|
245
|
+
"Full refresh is acceptable (most staging tables)",
|
|
246
|
+
"Source is small enough for TRUNCATE+INSERT",
|
|
247
|
+
"CTE decomposition — CTEs represent full-refresh batch logic",
|
|
248
|
+
],
|
|
249
|
+
},
|
|
250
|
+
{
|
|
251
|
+
name: "Deferred Merge",
|
|
252
|
+
detect: /deferred\s*merge|append\s*stream|delta\s*stream/u,
|
|
253
|
+
contextRequired: /deferred\s*merge|stream|merge\s*task|high[\s-]*frequency\s*ingestion/u,
|
|
254
|
+
penalty: 50,
|
|
255
|
+
purpose: "Capture incremental changes via Snowflake Streams with scheduled merge tasks. For high-frequency ingestion where immediate merge is too expensive.",
|
|
256
|
+
doNotUseWhen: [
|
|
257
|
+
"Batch ETL with scheduled full or incremental loads",
|
|
258
|
+
"Standard staging/transform pipelines",
|
|
259
|
+
"CTE decomposition",
|
|
260
|
+
],
|
|
261
|
+
},
|
|
262
|
+
{
|
|
263
|
+
name: "Materialized View",
|
|
264
|
+
detect: /materialized\s*view/u,
|
|
265
|
+
contextRequired: /materialized\s*view|pre[\s-]*compute|expensive\s*aggregat/u,
|
|
266
|
+
penalty: 40,
|
|
267
|
+
purpose: "Snowflake Materialized Views — pre-computed query results that auto-refresh when base data changes. Single-source only. No GROUP BY ALL.",
|
|
268
|
+
doNotUseWhen: [
|
|
269
|
+
"Multi-source joins (materialized views are single-source only)",
|
|
270
|
+
"Standard transforms — use Stage/Work",
|
|
271
|
+
"Views as source (not supported)",
|
|
272
|
+
],
|
|
273
|
+
},
|
|
274
|
+
{
|
|
275
|
+
name: "Task/DAG",
|
|
276
|
+
detect: /\btask\b|dag\s*root/u,
|
|
277
|
+
contextRequired: /\btask\b|\bdag\b|\bschedul(e|ing)\b|\bcron\b/u,
|
|
278
|
+
penalty: 50,
|
|
279
|
+
purpose: "Snowflake Tasks for scheduled or DAG-based orchestration. Creates task objects, not tables.",
|
|
280
|
+
doNotUseWhen: [
|
|
281
|
+
"Building data transformation nodes",
|
|
282
|
+
"Standard staging/transform pipelines",
|
|
283
|
+
],
|
|
284
|
+
},
|
|
285
|
+
{
|
|
286
|
+
name: "Data Quality",
|
|
287
|
+
detect: /data\s*quality|dmf|data\s*profil/u,
|
|
288
|
+
contextRequired: /data\s*quality|dmf|profil|metric\s*function/u,
|
|
289
|
+
penalty: 50,
|
|
290
|
+
purpose: "Data quality monitoring (DMF) or statistical profiling. Creates monitoring metadata, not transformation tables.",
|
|
291
|
+
doNotUseWhen: [
|
|
292
|
+
"Building data transformation pipelines",
|
|
293
|
+
"Standard staging/transform/mart patterns",
|
|
294
|
+
],
|
|
295
|
+
},
|
|
296
|
+
{
|
|
297
|
+
name: "Functional (Date/Time Dimension, Pivot, etc.)",
|
|
298
|
+
detect: /\bpivot\b|\bunpivot\b|match[\s_]recognize|recursive\s*cte|date\s*dimension|time\s*dimension/u,
|
|
299
|
+
contextRequired: /\bpivot\b|\bunpivot\b|match[\s_]recognize|recursive|date\s*dim|time\s*dim|calendar/u,
|
|
300
|
+
penalty: 40,
|
|
301
|
+
purpose: "Specialized transformation patterns: Pivot/Unpivot rows↔columns, Match Recognize for pattern detection, Recursive CTE for hierarchies, Date/Time Dimension generators.",
|
|
302
|
+
doNotUseWhen: [
|
|
303
|
+
"Standard SELECT/WHERE/JOIN transforms",
|
|
304
|
+
"CTE decomposition (unless the CTE itself is a PIVOT/UNPIVOT)",
|
|
305
|
+
],
|
|
306
|
+
},
|
|
307
|
+
];
|
|
308
|
+
/**
|
|
309
|
+
* Check whether a use case matches a family's anti-signals.
|
|
310
|
+
* Anti-signals indicate the family should NOT be used for this context.
|
|
311
|
+
*/
|
|
312
|
+
export function hasAntiSignal(family, text) {
|
|
313
|
+
const intent = NODE_TYPE_INTENT[family];
|
|
314
|
+
return intent.antiSignals !== null && intent.antiSignals.test(text.toLowerCase());
|
|
315
|
+
}
|
|
316
|
+
/**
|
|
317
|
+
* Check whether a use case matches a family's strong signals.
|
|
318
|
+
*/
|
|
319
|
+
export function hasStrongSignal(family, text) {
|
|
320
|
+
return NODE_TYPE_INTENT[family].strongSignals.test(text.toLowerCase());
|
|
321
|
+
}
|
|
322
|
+
/**
|
|
323
|
+
* Detect specialized materialization patterns in a candidate and return
|
|
324
|
+
* the penalty to apply if the context doesn't explicitly request it.
|
|
325
|
+
*/
|
|
326
|
+
/**
|
|
327
|
+
* Detect if a candidate matches a specialized pattern where the context
|
|
328
|
+
* does NOT request it. Returns the pattern info if the type should not be used.
|
|
329
|
+
*/
|
|
330
|
+
export function detectSpecializedPatternPenalty(candidateSignals, contextText) {
|
|
331
|
+
const lower = candidateSignals.toLowerCase();
|
|
332
|
+
const contextLower = contextText.toLowerCase();
|
|
333
|
+
for (const pattern of SPECIALIZED_PATTERNS) {
|
|
334
|
+
if (pattern.detect.test(lower) && !pattern.contextRequired.test(contextLower)) {
|
|
335
|
+
return {
|
|
336
|
+
penalty: pattern.penalty,
|
|
337
|
+
reason: `${pattern.name} pattern not requested — ${pattern.doNotUseWhen[0] ?? "use standard node types instead"}`,
|
|
338
|
+
};
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
return null;
|
|
342
|
+
}
|
|
343
|
+
/**
|
|
344
|
+
* Detect if a candidate matches a specialized pattern AND the context
|
|
345
|
+
* explicitly requests it. Returns the pattern name if it's a positive match.
|
|
346
|
+
*/
|
|
347
|
+
export function detectSpecializedPatternMatch(candidateSignals, contextText) {
|
|
348
|
+
const lower = candidateSignals.toLowerCase();
|
|
349
|
+
const contextLower = contextText.toLowerCase();
|
|
350
|
+
for (const pattern of SPECIALIZED_PATTERNS) {
|
|
351
|
+
if (pattern.detect.test(lower) && pattern.contextRequired.test(contextLower)) {
|
|
352
|
+
return pattern.name;
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
return null;
|
|
356
|
+
}
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
export type PipelineTemplateDefaults = {
|
|
2
|
+
inferredTopLevelFields: Record<string, unknown>;
|
|
3
|
+
inferredConfig: Record<string, unknown>;
|
|
4
|
+
};
|
|
5
|
+
export type PipelineNodeTypeFamily = "stage" | "persistent-stage" | "view" | "work" | "dimension" | "fact" | "hub" | "satellite" | "link" | "unknown";
|
|
6
|
+
export type PipelineNodeTypeSelectionCandidate = {
|
|
7
|
+
nodeType: string;
|
|
8
|
+
displayName: string | null;
|
|
9
|
+
shortName: string | null;
|
|
10
|
+
family: PipelineNodeTypeFamily;
|
|
11
|
+
usageCount: number;
|
|
12
|
+
workspaceUsageCount: number;
|
|
13
|
+
observedInWorkspace: boolean;
|
|
14
|
+
autoExecutable: boolean;
|
|
15
|
+
score: number;
|
|
16
|
+
reasons: string[];
|
|
17
|
+
};
|
|
18
|
+
export type PipelineNodeTypeSelection = {
|
|
19
|
+
strategy: "explicit" | "repo-ranked" | "workspace-ranked" | "fallback";
|
|
20
|
+
selectedNodeType: string | null;
|
|
21
|
+
selectedDisplayName: string | null;
|
|
22
|
+
selectedShortName: string | null;
|
|
23
|
+
selectedFamily: PipelineNodeTypeFamily | null;
|
|
24
|
+
confidence: "high" | "medium" | "low";
|
|
25
|
+
autoExecutable: boolean;
|
|
26
|
+
supportedNodeTypes: string[];
|
|
27
|
+
repoPath: string | null;
|
|
28
|
+
resolvedRepoPath: string | null;
|
|
29
|
+
repoWarnings: string[];
|
|
30
|
+
workspaceObservedNodeTypes: string[];
|
|
31
|
+
consideredNodeTypes: PipelineNodeTypeSelectionCandidate[];
|
|
32
|
+
};
|
|
33
|
+
export type PipelineNodeTypeSelectionResult = {
|
|
34
|
+
selectedCandidate: {
|
|
35
|
+
nodeType: string;
|
|
36
|
+
displayName: string | null;
|
|
37
|
+
shortName: string | null;
|
|
38
|
+
family: PipelineNodeTypeFamily;
|
|
39
|
+
autoExecutable: boolean;
|
|
40
|
+
semanticSignals: string[];
|
|
41
|
+
missingDefaultFields: string[];
|
|
42
|
+
templateWarnings: string[];
|
|
43
|
+
templateDefaults?: PipelineTemplateDefaults;
|
|
44
|
+
} | null;
|
|
45
|
+
selection: PipelineNodeTypeSelection;
|
|
46
|
+
warnings: string[];
|
|
47
|
+
};
|
|
48
|
+
type PipelineNodeTypeSelectionContext = {
|
|
49
|
+
explicitNodeType?: string;
|
|
50
|
+
goal?: string;
|
|
51
|
+
targetName?: string;
|
|
52
|
+
sql?: string;
|
|
53
|
+
sourceCount: number;
|
|
54
|
+
workspaceNodeTypes?: string[];
|
|
55
|
+
workspaceNodeTypeCounts?: Record<string, number>;
|
|
56
|
+
repoPath?: string;
|
|
57
|
+
/** Structural hint: does the SQL/transform contain JOINs? */
|
|
58
|
+
hasJoin?: boolean;
|
|
59
|
+
/** Structural hint: does the SQL/transform contain GROUP BY? */
|
|
60
|
+
hasGroupBy?: boolean;
|
|
61
|
+
/** Structural hint: are business keys explicitly defined? */
|
|
62
|
+
hasBusinessKeys?: boolean;
|
|
63
|
+
};
|
|
64
|
+
export declare function inferFamily(signals: string[]): PipelineNodeTypeFamily;
|
|
65
|
+
export declare function selectPipelineNodeType(context: PipelineNodeTypeSelectionContext): PipelineNodeTypeSelectionResult;
|
|
66
|
+
export {};
|