datahub-agent-context 1.3.1.10rc1__py3-none-any.whl → 1.4.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datahub_agent_context/__init__.py +11 -3
- datahub_agent_context/_version.py +1 -1
- datahub_agent_context/cli.py +152 -0
- datahub_agent_context/context.py +47 -34
- datahub_agent_context/langchain_tools/builder.py +6 -4
- datahub_agent_context/mcp_tools/base.py +6 -3
- datahub_agent_context/mcp_tools/save_document.py +634 -0
- datahub_agent_context/snowflake/__init__.py +0 -0
- datahub_agent_context/snowflake/generate_udfs.py +306 -0
- datahub_agent_context/snowflake/generators/__init__.py +21 -0
- datahub_agent_context/snowflake/generators/configuration.py +104 -0
- datahub_agent_context/snowflake/generators/cortex_agent.py +725 -0
- datahub_agent_context/snowflake/generators/network_rules.py +53 -0
- datahub_agent_context/snowflake/generators/stored_procedure.py +87 -0
- datahub_agent_context/snowflake/snowflake.py +662 -0
- datahub_agent_context/snowflake/udfs/__init__.py +1 -0
- datahub_agent_context/snowflake/udfs/add_glossary_terms.py +61 -0
- datahub_agent_context/snowflake/udfs/add_owners.py +59 -0
- datahub_agent_context/snowflake/udfs/add_structured_properties.py +57 -0
- datahub_agent_context/snowflake/udfs/add_tags.py +61 -0
- datahub_agent_context/snowflake/udfs/base.py +45 -0
- datahub_agent_context/snowflake/udfs/get_dataset_queries.py +68 -0
- datahub_agent_context/snowflake/udfs/get_entities.py +47 -0
- datahub_agent_context/snowflake/udfs/get_lineage.py +61 -0
- datahub_agent_context/snowflake/udfs/get_lineage_paths_between.py +69 -0
- datahub_agent_context/snowflake/udfs/get_me.py +51 -0
- datahub_agent_context/snowflake/udfs/grep_documents.py +70 -0
- datahub_agent_context/snowflake/udfs/list_schema_fields.py +80 -0
- datahub_agent_context/snowflake/udfs/remove_domains.py +45 -0
- datahub_agent_context/snowflake/udfs/remove_glossary_terms.py +57 -0
- datahub_agent_context/snowflake/udfs/remove_owners.py +56 -0
- datahub_agent_context/snowflake/udfs/remove_structured_properties.py +56 -0
- datahub_agent_context/snowflake/udfs/remove_tags.py +57 -0
- datahub_agent_context/snowflake/udfs/search_datahub.py +71 -0
- datahub_agent_context/snowflake/udfs/search_documents.py +58 -0
- datahub_agent_context/snowflake/udfs/set_domains.py +55 -0
- datahub_agent_context/snowflake/udfs/update_description.py +60 -0
- {datahub_agent_context-1.3.1.10rc1.dist-info → datahub_agent_context-1.4.0rc2.dist-info}/METADATA +21 -14
- datahub_agent_context-1.4.0rc2.dist-info/RECORD +66 -0
- datahub_agent_context-1.3.1.10rc1.dist-info/RECORD +0 -34
- {datahub_agent_context-1.3.1.10rc1.dist-info → datahub_agent_context-1.4.0rc2.dist-info}/WHEEL +0 -0
- {datahub_agent_context-1.3.1.10rc1.dist-info → datahub_agent_context-1.4.0rc2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,725 @@
|
|
|
1
|
+
"""Generate Snowflake Cortex Agent SQL."""
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def generate_cortex_agent_sql(
|
|
5
|
+
agent_name: str,
|
|
6
|
+
agent_display_name: str,
|
|
7
|
+
agent_color: str,
|
|
8
|
+
sf_warehouse: str | None,
|
|
9
|
+
sf_database: str | None,
|
|
10
|
+
sf_schema: str | None,
|
|
11
|
+
include_mutations: bool = True,
|
|
12
|
+
) -> str:
|
|
13
|
+
"""Generate Cortex Agent SQL that uses configuration variables with DataHub tools.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
agent_name: Agent name
|
|
17
|
+
agent_display_name: Agent display name
|
|
18
|
+
agent_color: Agent color
|
|
19
|
+
sf_warehouse: Snowflake warehouse name (uses placeholder if None)
|
|
20
|
+
sf_database: Snowflake database name (uses placeholder if None)
|
|
21
|
+
sf_schema: Snowflake schema name (uses placeholder if None)
|
|
22
|
+
include_mutations: Whether to include mutation/write tools (default: True)
|
|
23
|
+
"""
|
|
24
|
+
# Use placeholders for None values - these will be set via SQL variables at runtime
|
|
25
|
+
warehouse = sf_warehouse or "MY_WAREHOUSE"
|
|
26
|
+
database = sf_database or "MY_DATABASE"
|
|
27
|
+
schema = sf_schema or "MY_SCHEMA"
|
|
28
|
+
|
|
29
|
+
# Build instructions based on whether mutations are enabled
|
|
30
|
+
if include_mutations:
|
|
31
|
+
capabilities = """1. Find and query data (search, schema exploration, SQL generation)
|
|
32
|
+
2. Understand data lineage and relationships
|
|
33
|
+
3. Manage metadata (tags, descriptions, owners, domains, glossary terms)
|
|
34
|
+
4. Search documentation and runbooks"""
|
|
35
|
+
|
|
36
|
+
system_capabilities = """- Search and discovery (search_datahub, search_documents)
|
|
37
|
+
- Schema exploration (get_entities, list_schema_fields)
|
|
38
|
+
- Lineage analysis (get_lineage, get_lineage_paths_between)
|
|
39
|
+
- Query patterns (get_dataset_queries)
|
|
40
|
+
- Metadata management (tags, descriptions, owners, domains, glossary terms)
|
|
41
|
+
- User information (get_me)"""
|
|
42
|
+
|
|
43
|
+
orchestration_guidance = """For data queries:
|
|
44
|
+
1. Use search_datahub to find relevant datasets
|
|
45
|
+
2. Use get_entities or list_schema_fields for schema details
|
|
46
|
+
3. Generate SQL based on actual schema
|
|
47
|
+
4. Execute using SqlExecutor
|
|
48
|
+
|
|
49
|
+
For lineage questions:
|
|
50
|
+
1. Use get_lineage to explore upstream/downstream dependencies
|
|
51
|
+
2. Use get_lineage_paths_between for detailed transformation chains
|
|
52
|
+
|
|
53
|
+
For metadata management:
|
|
54
|
+
1. Search for entities first to get URNs
|
|
55
|
+
2. Use appropriate tools (add_tags, update_description, etc.)
|
|
56
|
+
3. Confirm changes were successful"""
|
|
57
|
+
|
|
58
|
+
metadata_note = """Always use DataHub tools before generating SQL to ensure accuracy.
|
|
59
|
+
When managing metadata, confirm changes with the user first."""
|
|
60
|
+
else:
|
|
61
|
+
capabilities = """1. Find and query data (search, schema exploration, SQL generation)
|
|
62
|
+
2. Understand data lineage and relationships
|
|
63
|
+
3. Search documentation and runbooks"""
|
|
64
|
+
|
|
65
|
+
system_capabilities = """- Search and discovery (search_datahub, search_documents)
|
|
66
|
+
- Schema exploration (get_entities, list_schema_fields)
|
|
67
|
+
- Lineage analysis (get_lineage, get_lineage_paths_between)
|
|
68
|
+
- Query patterns (get_dataset_queries)
|
|
69
|
+
- User information (get_me)"""
|
|
70
|
+
|
|
71
|
+
orchestration_guidance = """For data queries:
|
|
72
|
+
1. Use search_datahub to find relevant datasets
|
|
73
|
+
2. Use get_entities or list_schema_fields for schema details
|
|
74
|
+
3. Generate SQL based on actual schema
|
|
75
|
+
4. Execute using SqlExecutor
|
|
76
|
+
|
|
77
|
+
For lineage questions:
|
|
78
|
+
1. Use get_lineage to explore upstream/downstream dependencies
|
|
79
|
+
2. Use get_lineage_paths_between for detailed transformation chains"""
|
|
80
|
+
|
|
81
|
+
metadata_note = (
|
|
82
|
+
"Always use DataHub tools before generating SQL to ensure accuracy."
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
# Build mutation tools section if enabled
|
|
86
|
+
mutation_tools = (
|
|
87
|
+
"""
|
|
88
|
+
# Tag Management Tools
|
|
89
|
+
- tool_spec:
|
|
90
|
+
type: "generic"
|
|
91
|
+
name: "add_tags"
|
|
92
|
+
description: "Add tags to entities or columns. Confirm with user first before making changes."
|
|
93
|
+
input_schema:
|
|
94
|
+
type: "object"
|
|
95
|
+
properties:
|
|
96
|
+
tag_urns:
|
|
97
|
+
type: "string"
|
|
98
|
+
description: "JSON array of tag URNs (e.g., '[\\\"urn:li:tag:PII\\\"]')"
|
|
99
|
+
entity_urns:
|
|
100
|
+
type: "string"
|
|
101
|
+
description: "JSON array of entity URNs"
|
|
102
|
+
column_paths:
|
|
103
|
+
type: "string"
|
|
104
|
+
description: "JSON array of column names. Default: null (entity-level tagging)"
|
|
105
|
+
required: [tag_urns, entity_urns, column_paths]
|
|
106
|
+
|
|
107
|
+
- tool_spec:
|
|
108
|
+
type: "generic"
|
|
109
|
+
name: "remove_tags"
|
|
110
|
+
description: "Remove tags from entities or columns. Confirm with user first."
|
|
111
|
+
input_schema:
|
|
112
|
+
type: "object"
|
|
113
|
+
properties:
|
|
114
|
+
tag_urns:
|
|
115
|
+
type: "string"
|
|
116
|
+
description: "JSON array of tag URNs to remove"
|
|
117
|
+
entity_urns:
|
|
118
|
+
type: "string"
|
|
119
|
+
description: "JSON array of entity URNs"
|
|
120
|
+
column_paths:
|
|
121
|
+
type: "string"
|
|
122
|
+
description: "JSON array of column names. Default: null (entity-level tag removal)"
|
|
123
|
+
required: [tag_urns, entity_urns, column_paths]
|
|
124
|
+
|
|
125
|
+
# Description Management
|
|
126
|
+
- tool_spec:
|
|
127
|
+
type: "generic"
|
|
128
|
+
name: "update_description"
|
|
129
|
+
description: "Update entity/column descriptions. Operations: 'replace', 'append', 'remove'. Confirm with user first."
|
|
130
|
+
input_schema:
|
|
131
|
+
type: "object"
|
|
132
|
+
properties:
|
|
133
|
+
entity_urn:
|
|
134
|
+
type: "string"
|
|
135
|
+
description: "Entity URN"
|
|
136
|
+
operation:
|
|
137
|
+
type: "string"
|
|
138
|
+
description: "'replace', 'append', or 'remove'"
|
|
139
|
+
description:
|
|
140
|
+
type: "string"
|
|
141
|
+
description: "Description text. Default: null (not needed for 'remove' operation)"
|
|
142
|
+
column_path:
|
|
143
|
+
type: "string"
|
|
144
|
+
description: "Column name. Default: null (entity-level description)"
|
|
145
|
+
required: [entity_urn, operation, description, column_path]
|
|
146
|
+
|
|
147
|
+
# Domain Management
|
|
148
|
+
- tool_spec:
|
|
149
|
+
type: "generic"
|
|
150
|
+
name: "set_domains"
|
|
151
|
+
description: "Assign a domain to entities. Confirm with user first."
|
|
152
|
+
input_schema:
|
|
153
|
+
type: "object"
|
|
154
|
+
properties:
|
|
155
|
+
domain_urn:
|
|
156
|
+
type: "string"
|
|
157
|
+
description: "Domain URN (e.g., 'urn:li:domain:marketing')"
|
|
158
|
+
entity_urns:
|
|
159
|
+
type: "string"
|
|
160
|
+
description: "JSON array of entity URNs"
|
|
161
|
+
required: [domain_urn, entity_urns]
|
|
162
|
+
|
|
163
|
+
- tool_spec:
|
|
164
|
+
type: "generic"
|
|
165
|
+
name: "remove_domains"
|
|
166
|
+
description: "Remove domain assignments from entities. Confirm with user first."
|
|
167
|
+
input_schema:
|
|
168
|
+
type: "object"
|
|
169
|
+
properties:
|
|
170
|
+
entity_urns:
|
|
171
|
+
type: "string"
|
|
172
|
+
description: "JSON array of entity URNs"
|
|
173
|
+
required: [entity_urns]
|
|
174
|
+
|
|
175
|
+
# Owner Management
|
|
176
|
+
- tool_spec:
|
|
177
|
+
type: "generic"
|
|
178
|
+
name: "add_owners"
|
|
179
|
+
description: "Add owners to entities. Confirm with user first."
|
|
180
|
+
input_schema:
|
|
181
|
+
type: "object"
|
|
182
|
+
properties:
|
|
183
|
+
owner_urns:
|
|
184
|
+
type: "string"
|
|
185
|
+
description: "JSON array of owner URNs (CorpUser or CorpGroup)"
|
|
186
|
+
entity_urns:
|
|
187
|
+
type: "string"
|
|
188
|
+
description: "JSON array of entity URNs"
|
|
189
|
+
ownership_type_urn:
|
|
190
|
+
type: "string"
|
|
191
|
+
description: "Ownership type URN. Default: null (uses default ownership type)"
|
|
192
|
+
required: [owner_urns, entity_urns, ownership_type_urn]
|
|
193
|
+
|
|
194
|
+
- tool_spec:
|
|
195
|
+
type: "generic"
|
|
196
|
+
name: "remove_owners"
|
|
197
|
+
description: "Remove owners from entities. Confirm with user first."
|
|
198
|
+
input_schema:
|
|
199
|
+
type: "object"
|
|
200
|
+
properties:
|
|
201
|
+
owner_urns:
|
|
202
|
+
type: "string"
|
|
203
|
+
description: "JSON array of owner URNs"
|
|
204
|
+
entity_urns:
|
|
205
|
+
type: "string"
|
|
206
|
+
description: "JSON array of entity URNs"
|
|
207
|
+
ownership_type_urn:
|
|
208
|
+
type: "string"
|
|
209
|
+
description: "Ownership type URN. Default: null (removes all ownership types)"
|
|
210
|
+
required: [owner_urns, entity_urns, ownership_type_urn]
|
|
211
|
+
|
|
212
|
+
# Glossary Term Management
|
|
213
|
+
- tool_spec:
|
|
214
|
+
type: "generic"
|
|
215
|
+
name: "add_glossary_terms"
|
|
216
|
+
description: "Add glossary terms to entities or columns. Confirm with user first."
|
|
217
|
+
input_schema:
|
|
218
|
+
type: "object"
|
|
219
|
+
properties:
|
|
220
|
+
term_urns:
|
|
221
|
+
type: "string"
|
|
222
|
+
description: "JSON array of glossary term URNs"
|
|
223
|
+
entity_urns:
|
|
224
|
+
type: "string"
|
|
225
|
+
description: "JSON array of entity URNs"
|
|
226
|
+
column_paths:
|
|
227
|
+
type: "string"
|
|
228
|
+
description: "JSON array of column names. Default: null (entity-level glossary terms)"
|
|
229
|
+
required: [term_urns, entity_urns, column_paths]
|
|
230
|
+
|
|
231
|
+
- tool_spec:
|
|
232
|
+
type: "generic"
|
|
233
|
+
name: "remove_glossary_terms"
|
|
234
|
+
description: "Remove glossary terms from entities or columns. Confirm with user first."
|
|
235
|
+
input_schema:
|
|
236
|
+
type: "object"
|
|
237
|
+
properties:
|
|
238
|
+
term_urns:
|
|
239
|
+
type: "string"
|
|
240
|
+
description: "JSON array of glossary term URNs"
|
|
241
|
+
entity_urns:
|
|
242
|
+
type: "string"
|
|
243
|
+
description: "JSON array of entity URNs"
|
|
244
|
+
column_paths:
|
|
245
|
+
type: "string"
|
|
246
|
+
description: "JSON array of column names. Default: null (entity-level glossary terms)"
|
|
247
|
+
required: [term_urns, entity_urns, column_paths]
|
|
248
|
+
|
|
249
|
+
# Structured Property Management
|
|
250
|
+
- tool_spec:
|
|
251
|
+
type: "generic"
|
|
252
|
+
name: "add_structured_properties"
|
|
253
|
+
description: "Add structured properties to entities or columns. Confirm with user first."
|
|
254
|
+
input_schema:
|
|
255
|
+
type: "object"
|
|
256
|
+
properties:
|
|
257
|
+
property_values:
|
|
258
|
+
type: "string"
|
|
259
|
+
description: "JSON array of {{propertyUrn, value}} objects"
|
|
260
|
+
entity_urns:
|
|
261
|
+
type: "string"
|
|
262
|
+
description: "JSON array of entity URNs"
|
|
263
|
+
column_paths:
|
|
264
|
+
type: "string"
|
|
265
|
+
description: "JSON array of column names. Default: null (entity-level structured properties)"
|
|
266
|
+
required: [property_values, entity_urns, column_paths]
|
|
267
|
+
|
|
268
|
+
- tool_spec:
|
|
269
|
+
type: "generic"
|
|
270
|
+
name: "remove_structured_properties"
|
|
271
|
+
description: "Remove structured properties from entities or columns. Confirm with user first."
|
|
272
|
+
input_schema:
|
|
273
|
+
type: "object"
|
|
274
|
+
properties:
|
|
275
|
+
property_urns:
|
|
276
|
+
type: "string"
|
|
277
|
+
description: "JSON array of property URNs to remove"
|
|
278
|
+
entity_urns:
|
|
279
|
+
type: "string"
|
|
280
|
+
description: "JSON array of entity URNs"
|
|
281
|
+
column_paths:
|
|
282
|
+
type: "string"
|
|
283
|
+
description: "JSON array of column names. Default: null (entity-level structured properties)"
|
|
284
|
+
required: [property_urns, entity_urns, column_paths]
|
|
285
|
+
"""
|
|
286
|
+
if include_mutations
|
|
287
|
+
else ""
|
|
288
|
+
)
|
|
289
|
+
|
|
290
|
+
# Build mutation tool resources section if enabled
|
|
291
|
+
mutation_tool_resources = (
|
|
292
|
+
f"""
|
|
293
|
+
# Tags
|
|
294
|
+
add_tags:
|
|
295
|
+
type: "function"
|
|
296
|
+
execution_environment:
|
|
297
|
+
type: "warehouse"
|
|
298
|
+
warehouse: {warehouse}
|
|
299
|
+
identifier: {database}.{schema}.ADD_TAGS
|
|
300
|
+
|
|
301
|
+
remove_tags:
|
|
302
|
+
type: "function"
|
|
303
|
+
execution_environment:
|
|
304
|
+
type: "warehouse"
|
|
305
|
+
warehouse: {warehouse}
|
|
306
|
+
identifier: {database}.{schema}.REMOVE_TAGS
|
|
307
|
+
|
|
308
|
+
# Descriptions
|
|
309
|
+
update_description:
|
|
310
|
+
type: "function"
|
|
311
|
+
execution_environment:
|
|
312
|
+
type: "warehouse"
|
|
313
|
+
warehouse: {warehouse}
|
|
314
|
+
identifier: {database}.{schema}.UPDATE_DESCRIPTION
|
|
315
|
+
|
|
316
|
+
# Domains
|
|
317
|
+
set_domains:
|
|
318
|
+
type: "function"
|
|
319
|
+
execution_environment:
|
|
320
|
+
type: "warehouse"
|
|
321
|
+
warehouse: {warehouse}
|
|
322
|
+
identifier: {database}.{schema}.SET_DOMAINS
|
|
323
|
+
|
|
324
|
+
remove_domains:
|
|
325
|
+
type: "function"
|
|
326
|
+
execution_environment:
|
|
327
|
+
type: "warehouse"
|
|
328
|
+
warehouse: {warehouse}
|
|
329
|
+
identifier: {database}.{schema}.REMOVE_DOMAINS
|
|
330
|
+
|
|
331
|
+
# Owners
|
|
332
|
+
add_owners:
|
|
333
|
+
type: "function"
|
|
334
|
+
execution_environment:
|
|
335
|
+
type: "warehouse"
|
|
336
|
+
warehouse: {warehouse}
|
|
337
|
+
identifier: {database}.{schema}.ADD_OWNERS
|
|
338
|
+
|
|
339
|
+
remove_owners:
|
|
340
|
+
type: "function"
|
|
341
|
+
execution_environment:
|
|
342
|
+
type: "warehouse"
|
|
343
|
+
warehouse: {warehouse}
|
|
344
|
+
identifier: {database}.{schema}.REMOVE_OWNERS
|
|
345
|
+
|
|
346
|
+
# Glossary Terms
|
|
347
|
+
add_glossary_terms:
|
|
348
|
+
type: "function"
|
|
349
|
+
execution_environment:
|
|
350
|
+
type: "warehouse"
|
|
351
|
+
warehouse: {warehouse}
|
|
352
|
+
identifier: {database}.{schema}.ADD_GLOSSARY_TERMS
|
|
353
|
+
|
|
354
|
+
remove_glossary_terms:
|
|
355
|
+
type: "function"
|
|
356
|
+
execution_environment:
|
|
357
|
+
type: "warehouse"
|
|
358
|
+
warehouse: {warehouse}
|
|
359
|
+
identifier: {database}.{schema}.REMOVE_GLOSSARY_TERMS
|
|
360
|
+
|
|
361
|
+
# Structured Properties
|
|
362
|
+
add_structured_properties:
|
|
363
|
+
type: "function"
|
|
364
|
+
execution_environment:
|
|
365
|
+
type: "warehouse"
|
|
366
|
+
warehouse: {warehouse}
|
|
367
|
+
identifier: {database}.{schema}.ADD_STRUCTURED_PROPERTIES
|
|
368
|
+
|
|
369
|
+
remove_structured_properties:
|
|
370
|
+
type: "function"
|
|
371
|
+
execution_environment:
|
|
372
|
+
type: "warehouse"
|
|
373
|
+
warehouse: {warehouse}
|
|
374
|
+
identifier: {database}.{schema}.REMOVE_STRUCTURED_PROPERTIES
|
|
375
|
+
"""
|
|
376
|
+
if include_mutations
|
|
377
|
+
else ""
|
|
378
|
+
)
|
|
379
|
+
|
|
380
|
+
tool_count_note = (
|
|
381
|
+
"20 tools (read + write)" if include_mutations else "9 tools (read-only)"
|
|
382
|
+
)
|
|
383
|
+
query_description = " and manage metadata" if include_mutations else ""
|
|
384
|
+
comment_suffix = " and metadata management" if include_mutations else ""
|
|
385
|
+
|
|
386
|
+
# Build sample questions based on whether mutations are enabled
|
|
387
|
+
sample_questions_with_mutations = '''
|
|
388
|
+
- question: "What tables contain customer data?"
|
|
389
|
+
answer: "I'll search DataHub for datasets related to customer data."
|
|
390
|
+
- question: "Show me the lineage for the sales_monthly table"
|
|
391
|
+
answer: "I'll retrieve the lineage information for the sales_monthly table."
|
|
392
|
+
- question: "Tag all PII datasets in the finance domain"
|
|
393
|
+
answer: "I'll search for datasets in the finance domain and add PII tags to them."
|
|
394
|
+
- question: "What queries use the users table?"
|
|
395
|
+
answer: "I'll retrieve the SQL queries that reference the users table."
|
|
396
|
+
- question: "Add a description to the revenue column"
|
|
397
|
+
answer: "I'll update the description for the revenue column."
|
|
398
|
+
- question: "Who owns the analytics datasets?"
|
|
399
|
+
answer: "I'll search for analytics datasets and show their ownership information."'''
|
|
400
|
+
|
|
401
|
+
sample_questions_readonly = '''
|
|
402
|
+
- question: "What tables contain customer data?"
|
|
403
|
+
answer: "I'll search DataHub for datasets related to customer data."
|
|
404
|
+
- question: "Show me the lineage for the sales_monthly table"
|
|
405
|
+
answer: "I'll retrieve the lineage information for the sales_monthly table."
|
|
406
|
+
- question: "What queries use the users table?"
|
|
407
|
+
answer: "I'll retrieve the SQL queries that reference the users table."
|
|
408
|
+
- question: "Who owns the analytics datasets?"
|
|
409
|
+
answer: "I'll search for analytics datasets and show their ownership information."'''
|
|
410
|
+
|
|
411
|
+
sample_questions = (
|
|
412
|
+
sample_questions_with_mutations
|
|
413
|
+
if include_mutations
|
|
414
|
+
else sample_questions_readonly
|
|
415
|
+
)
|
|
416
|
+
return f"""-- ============================================================================
|
|
417
|
+
-- Step 4: Create Cortex Agent with DataHub Tools
|
|
418
|
+
-- ============================================================================
|
|
419
|
+
-- This creates a Snowflake Cortex Agent that uses DataHub metadata
|
|
420
|
+
-- to generate accurate SQL queries{query_description}
|
|
421
|
+
--
|
|
422
|
+
-- Prerequisites:
|
|
423
|
+
-- - Run 00_configuration.sql first to set variables
|
|
424
|
+
-- - Run 01_network_rules.sql to set up network access
|
|
425
|
+
-- - Run 02_datahub_udfs.sql to create DataHub UDFs ({tool_count_note})
|
|
426
|
+
-- - Run 03_stored_procedure.sql to create EXECUTE_DYNAMIC_SQL
|
|
427
|
+
-- ============================================================================
|
|
428
|
+
|
|
429
|
+
USE DATABASE IDENTIFIER($SF_DATABASE);
|
|
430
|
+
USE SCHEMA IDENTIFIER($SF_SCHEMA);
|
|
431
|
+
USE WAREHOUSE IDENTIFIER($SF_WAREHOUSE);
|
|
432
|
+
|
|
433
|
+
CREATE OR REPLACE AGENT {agent_name}
|
|
434
|
+
COMMENT = 'Agent that uses DataHub metadata for SQL generation{comment_suffix}'
|
|
435
|
+
PROFILE = '{{"display_name": "{agent_display_name}", "color": "{agent_color}"}}'
|
|
436
|
+
FROM SPECIFICATION
|
|
437
|
+
$$
|
|
438
|
+
models:
|
|
439
|
+
orchestration: auto
|
|
440
|
+
|
|
441
|
+
orchestration:
|
|
442
|
+
budget:
|
|
443
|
+
seconds: 60
|
|
444
|
+
tokens: 32000
|
|
445
|
+
|
|
446
|
+
instructions:
|
|
447
|
+
response: |
|
|
448
|
+
You are a comprehensive data assistant with access to DataHub metadata.
|
|
449
|
+
You can help users:
|
|
450
|
+
{capabilities}
|
|
451
|
+
|
|
452
|
+
{metadata_note}
|
|
453
|
+
|
|
454
|
+
orchestration: |
|
|
455
|
+
{orchestration_guidance}
|
|
456
|
+
|
|
457
|
+
system: |
|
|
458
|
+
You have comprehensive access to DataHub including:
|
|
459
|
+
{system_capabilities}
|
|
460
|
+
|
|
461
|
+
sample_questions:{sample_questions}
|
|
462
|
+
|
|
463
|
+
tools:
|
|
464
|
+
# Core Search & Discovery Tools
|
|
465
|
+
- tool_spec:
|
|
466
|
+
type: "generic"
|
|
467
|
+
name: "search_datahub"
|
|
468
|
+
description: "Search DataHub for entities (datasets, dashboards, etc.). Use /q prefix for structured queries. Returns URNs, names, descriptions, and metadata."
|
|
469
|
+
input_schema:
|
|
470
|
+
type: "object"
|
|
471
|
+
properties:
|
|
472
|
+
search_query:
|
|
473
|
+
type: "string"
|
|
474
|
+
description: "Search query (e.g., 'customer', '/q user+transaction')"
|
|
475
|
+
entity_type:
|
|
476
|
+
type: "string"
|
|
477
|
+
description: "Entity type filter (e.g., 'dataset', 'tag', etc.). Default: null (all entity types)"
|
|
478
|
+
required: [search_query, entity_type]
|
|
479
|
+
|
|
480
|
+
- tool_spec:
|
|
481
|
+
type: "generic"
|
|
482
|
+
name: "get_entities"
|
|
483
|
+
description: "Get detailed entity information including schema, tags, owners, lineage summary. Use URN from search results."
|
|
484
|
+
input_schema:
|
|
485
|
+
type: "object"
|
|
486
|
+
properties:
|
|
487
|
+
entity_urn:
|
|
488
|
+
type: "string"
|
|
489
|
+
description: "Entity URN from search results"
|
|
490
|
+
required: [entity_urn]
|
|
491
|
+
|
|
492
|
+
- tool_spec:
|
|
493
|
+
type: "generic"
|
|
494
|
+
name: "list_schema_fields"
|
|
495
|
+
description: "List schema fields with filtering and pagination. Useful for large schemas or finding specific columns."
|
|
496
|
+
input_schema:
|
|
497
|
+
type: "object"
|
|
498
|
+
properties:
|
|
499
|
+
dataset_urn:
|
|
500
|
+
type: "string"
|
|
501
|
+
description: "Dataset URN"
|
|
502
|
+
keywords:
|
|
503
|
+
type: "string"
|
|
504
|
+
description: "Keywords to filter fields (single string or JSON array). Default: null (no filtering)"
|
|
505
|
+
limit:
|
|
506
|
+
type: "number"
|
|
507
|
+
description: "Max fields to return. Default: 100"
|
|
508
|
+
required: [dataset_urn, keywords, limit]
|
|
509
|
+
|
|
510
|
+
# Lineage Tools
|
|
511
|
+
- tool_spec:
|
|
512
|
+
type: "generic"
|
|
513
|
+
name: "get_lineage"
|
|
514
|
+
description: "Get upstream or downstream lineage for entities or columns. Returns lineage graph with metadata."
|
|
515
|
+
input_schema:
|
|
516
|
+
type: "object"
|
|
517
|
+
properties:
|
|
518
|
+
urn:
|
|
519
|
+
type: "string"
|
|
520
|
+
description: "Entity URN"
|
|
521
|
+
column_name:
|
|
522
|
+
type: "string"
|
|
523
|
+
description: "Column name for column-level lineage. Default: null (entity-level lineage)"
|
|
524
|
+
upstream:
|
|
525
|
+
type: "number"
|
|
526
|
+
description: "1 for upstream, 0 for downstream. Default: 1"
|
|
527
|
+
max_hops:
|
|
528
|
+
type: "number"
|
|
529
|
+
description: "Max hops (1-3+). Default: 1"
|
|
530
|
+
max_results:
|
|
531
|
+
type: "number"
|
|
532
|
+
description: "Max results. Default: 30"
|
|
533
|
+
required: [urn, column_name, upstream, max_hops, max_results]
|
|
534
|
+
|
|
535
|
+
- tool_spec:
|
|
536
|
+
type: "generic"
|
|
537
|
+
name: "get_lineage_paths_between"
|
|
538
|
+
description: "Get detailed transformation paths between two entities/columns. Shows intermediate steps and queries."
|
|
539
|
+
input_schema:
|
|
540
|
+
type: "object"
|
|
541
|
+
properties:
|
|
542
|
+
source_urn:
|
|
543
|
+
type: "string"
|
|
544
|
+
description: "Source dataset URN"
|
|
545
|
+
target_urn:
|
|
546
|
+
type: "string"
|
|
547
|
+
description: "Target dataset URN"
|
|
548
|
+
source_column:
|
|
549
|
+
type: "string"
|
|
550
|
+
description: "Source column name. Default: null (dataset-level lineage)"
|
|
551
|
+
target_column:
|
|
552
|
+
type: "string"
|
|
553
|
+
description: "Target column name. Default: null (dataset-level lineage)"
|
|
554
|
+
required: [source_urn, target_urn, source_column, target_column]
|
|
555
|
+
|
|
556
|
+
# Query Analysis Tools
|
|
557
|
+
- tool_spec:
|
|
558
|
+
type: "generic"
|
|
559
|
+
name: "get_dataset_queries"
|
|
560
|
+
description: "Get SQL queries that use a dataset/column. Filter by MANUAL (user queries) or SYSTEM (BI tools)."
|
|
561
|
+
input_schema:
|
|
562
|
+
type: "object"
|
|
563
|
+
properties:
|
|
564
|
+
urn:
|
|
565
|
+
type: "string"
|
|
566
|
+
description: "Dataset URN"
|
|
567
|
+
column_name:
|
|
568
|
+
type: "string"
|
|
569
|
+
description: "Column name to filter queries. Default: null (queries for all columns)"
|
|
570
|
+
source:
|
|
571
|
+
type: "string"
|
|
572
|
+
description: "'MANUAL', 'SYSTEM', or null for both. Default: null"
|
|
573
|
+
count:
|
|
574
|
+
type: "number"
|
|
575
|
+
description: "Number of queries. Default: 10"
|
|
576
|
+
required: [urn, column_name, source, count]
|
|
577
|
+
|
|
578
|
+
# Document Search Tools
|
|
579
|
+
- tool_spec:
|
|
580
|
+
type: "generic"
|
|
581
|
+
name: "search_documents"
|
|
582
|
+
description: "Search organization documents (runbooks, FAQs, knowledge articles from Notion, Confluence, etc.)."
|
|
583
|
+
input_schema:
|
|
584
|
+
type: "object"
|
|
585
|
+
properties:
|
|
586
|
+
search_query:
|
|
587
|
+
type: "string"
|
|
588
|
+
description: "Search query"
|
|
589
|
+
num_results:
|
|
590
|
+
type: "number"
|
|
591
|
+
description: "Max results. Default: 10"
|
|
592
|
+
required: [search_query, num_results]
|
|
593
|
+
|
|
594
|
+
- tool_spec:
|
|
595
|
+
type: "generic"
|
|
596
|
+
name: "grep_documents"
|
|
597
|
+
description: "Search within document content using regex patterns. Use after search_documents to find specific content."
|
|
598
|
+
input_schema:
|
|
599
|
+
type: "object"
|
|
600
|
+
properties:
|
|
601
|
+
urns:
|
|
602
|
+
type: "string"
|
|
603
|
+
description: "JSON array of document URNs"
|
|
604
|
+
pattern:
|
|
605
|
+
type: "string"
|
|
606
|
+
description: "Regex pattern to search for"
|
|
607
|
+
context_chars:
|
|
608
|
+
type: "number"
|
|
609
|
+
description: "Context characters. Default: 200"
|
|
610
|
+
max_matches_per_doc:
|
|
611
|
+
type: "number"
|
|
612
|
+
description: "Max matches per document. Default: 5"
|
|
613
|
+
required: [urns, pattern, context_chars, max_matches_per_doc]
|
|
614
|
+
{mutation_tools}
|
|
615
|
+
# User Info
|
|
616
|
+
- tool_spec:
|
|
617
|
+
type: "generic"
|
|
618
|
+
name: "get_me"
|
|
619
|
+
description: "Get information about the authenticated user (profile, groups, privileges). This tool takes no parameters."
|
|
620
|
+
input_schema:
|
|
621
|
+
type: "object"
|
|
622
|
+
properties: {{}}
|
|
623
|
+
|
|
624
|
+
# SQL Executor
|
|
625
|
+
- tool_spec:
|
|
626
|
+
type: "generic"
|
|
627
|
+
name: "SqlExecutor"
|
|
628
|
+
description: "Execute SELECT SQL queries and return results. Use after generating SQL from DataHub metadata."
|
|
629
|
+
input_schema:
|
|
630
|
+
type: "object"
|
|
631
|
+
properties:
|
|
632
|
+
SQL_TEXT:
|
|
633
|
+
type: "string"
|
|
634
|
+
description: "SELECT SQL query (must start with SELECT)"
|
|
635
|
+
required: [SQL_TEXT]
|
|
636
|
+
|
|
637
|
+
tool_resources:
|
|
638
|
+
# Search & Discovery
|
|
639
|
+
search_datahub:
|
|
640
|
+
type: "function"
|
|
641
|
+
execution_environment:
|
|
642
|
+
type: "warehouse"
|
|
643
|
+
warehouse: {warehouse}
|
|
644
|
+
identifier: {database}.{schema}.SEARCH_DATAHUB
|
|
645
|
+
|
|
646
|
+
get_entities:
|
|
647
|
+
type: "function"
|
|
648
|
+
execution_environment:
|
|
649
|
+
type: "warehouse"
|
|
650
|
+
warehouse: {warehouse}
|
|
651
|
+
identifier: {database}.{schema}.GET_ENTITIES
|
|
652
|
+
|
|
653
|
+
list_schema_fields:
|
|
654
|
+
type: "function"
|
|
655
|
+
execution_environment:
|
|
656
|
+
type: "warehouse"
|
|
657
|
+
warehouse: {warehouse}
|
|
658
|
+
identifier: {database}.{schema}.LIST_SCHEMA_FIELDS
|
|
659
|
+
|
|
660
|
+
# Lineage
|
|
661
|
+
get_lineage:
|
|
662
|
+
type: "function"
|
|
663
|
+
execution_environment:
|
|
664
|
+
type: "warehouse"
|
|
665
|
+
warehouse: {warehouse}
|
|
666
|
+
identifier: {database}.{schema}.GET_LINEAGE
|
|
667
|
+
|
|
668
|
+
get_lineage_paths_between:
|
|
669
|
+
type: "function"
|
|
670
|
+
execution_environment:
|
|
671
|
+
type: "warehouse"
|
|
672
|
+
warehouse: {warehouse}
|
|
673
|
+
identifier: {database}.{schema}.GET_LINEAGE_PATHS_BETWEEN
|
|
674
|
+
|
|
675
|
+
# Query Analysis
|
|
676
|
+
get_dataset_queries:
|
|
677
|
+
type: "function"
|
|
678
|
+
execution_environment:
|
|
679
|
+
type: "warehouse"
|
|
680
|
+
warehouse: {warehouse}
|
|
681
|
+
identifier: {database}.{schema}.GET_DATASET_QUERIES
|
|
682
|
+
|
|
683
|
+
# Documents
|
|
684
|
+
search_documents:
|
|
685
|
+
type: "function"
|
|
686
|
+
execution_environment:
|
|
687
|
+
type: "warehouse"
|
|
688
|
+
warehouse: {warehouse}
|
|
689
|
+
identifier: {database}.{schema}.SEARCH_DOCUMENTS
|
|
690
|
+
|
|
691
|
+
grep_documents:
|
|
692
|
+
type: "function"
|
|
693
|
+
execution_environment:
|
|
694
|
+
type: "warehouse"
|
|
695
|
+
warehouse: {warehouse}
|
|
696
|
+
identifier: {database}.{schema}.GREP_DOCUMENTS
|
|
697
|
+
{mutation_tool_resources}
|
|
698
|
+
# User Info
|
|
699
|
+
get_me:
|
|
700
|
+
type: "function"
|
|
701
|
+
execution_environment:
|
|
702
|
+
type: "warehouse"
|
|
703
|
+
warehouse: {warehouse}
|
|
704
|
+
identifier: {database}.{schema}.GET_ME
|
|
705
|
+
|
|
706
|
+
# SQL Executor
|
|
707
|
+
SqlExecutor:
|
|
708
|
+
type: "procedure"
|
|
709
|
+
execution_environment:
|
|
710
|
+
type: "warehouse"
|
|
711
|
+
warehouse: {warehouse}
|
|
712
|
+
identifier: {database}.{schema}.EXECUTE_DYNAMIC_SQL
|
|
713
|
+
$$;
|
|
714
|
+
|
|
715
|
+
-- Grant usage to the specified role
|
|
716
|
+
GRANT USAGE ON AGENT {agent_name} TO ROLE IDENTIFIER($SF_ROLE);
|
|
717
|
+
|
|
718
|
+
-- Verify the agent was created
|
|
719
|
+
DESCRIBE AGENT {agent_name};
|
|
720
|
+
|
|
721
|
+
SELECT
|
|
722
|
+
'Agent created successfully with {"20 DataHub tools (read + write)" if include_mutations else "9 DataHub tools (read-only)"}!' AS status,
|
|
723
|
+
'{agent_name}' AS agent_name,
|
|
724
|
+
'You can now use this agent in Snowflake Intelligence UI for {"SQL generation and metadata management" if include_mutations else "SQL generation and metadata exploration"}' AS next_steps;
|
|
725
|
+
"""
|