datahub-agent-context 1.3.1.10rc1__py3-none-any.whl → 1.4.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. datahub_agent_context/__init__.py +11 -3
  2. datahub_agent_context/_version.py +1 -1
  3. datahub_agent_context/cli.py +152 -0
  4. datahub_agent_context/context.py +47 -34
  5. datahub_agent_context/langchain_tools/builder.py +6 -4
  6. datahub_agent_context/mcp_tools/base.py +6 -3
  7. datahub_agent_context/mcp_tools/save_document.py +634 -0
  8. datahub_agent_context/snowflake/__init__.py +0 -0
  9. datahub_agent_context/snowflake/generate_udfs.py +306 -0
  10. datahub_agent_context/snowflake/generators/__init__.py +21 -0
  11. datahub_agent_context/snowflake/generators/configuration.py +104 -0
  12. datahub_agent_context/snowflake/generators/cortex_agent.py +725 -0
  13. datahub_agent_context/snowflake/generators/network_rules.py +53 -0
  14. datahub_agent_context/snowflake/generators/stored_procedure.py +87 -0
  15. datahub_agent_context/snowflake/snowflake.py +662 -0
  16. datahub_agent_context/snowflake/udfs/__init__.py +1 -0
  17. datahub_agent_context/snowflake/udfs/add_glossary_terms.py +61 -0
  18. datahub_agent_context/snowflake/udfs/add_owners.py +59 -0
  19. datahub_agent_context/snowflake/udfs/add_structured_properties.py +57 -0
  20. datahub_agent_context/snowflake/udfs/add_tags.py +61 -0
  21. datahub_agent_context/snowflake/udfs/base.py +45 -0
  22. datahub_agent_context/snowflake/udfs/get_dataset_queries.py +68 -0
  23. datahub_agent_context/snowflake/udfs/get_entities.py +47 -0
  24. datahub_agent_context/snowflake/udfs/get_lineage.py +61 -0
  25. datahub_agent_context/snowflake/udfs/get_lineage_paths_between.py +69 -0
  26. datahub_agent_context/snowflake/udfs/get_me.py +51 -0
  27. datahub_agent_context/snowflake/udfs/grep_documents.py +70 -0
  28. datahub_agent_context/snowflake/udfs/list_schema_fields.py +80 -0
  29. datahub_agent_context/snowflake/udfs/remove_domains.py +45 -0
  30. datahub_agent_context/snowflake/udfs/remove_glossary_terms.py +57 -0
  31. datahub_agent_context/snowflake/udfs/remove_owners.py +56 -0
  32. datahub_agent_context/snowflake/udfs/remove_structured_properties.py +56 -0
  33. datahub_agent_context/snowflake/udfs/remove_tags.py +57 -0
  34. datahub_agent_context/snowflake/udfs/search_datahub.py +71 -0
  35. datahub_agent_context/snowflake/udfs/search_documents.py +58 -0
  36. datahub_agent_context/snowflake/udfs/set_domains.py +55 -0
  37. datahub_agent_context/snowflake/udfs/update_description.py +60 -0
  38. {datahub_agent_context-1.3.1.10rc1.dist-info → datahub_agent_context-1.4.0rc2.dist-info}/METADATA +21 -14
  39. datahub_agent_context-1.4.0rc2.dist-info/RECORD +66 -0
  40. datahub_agent_context-1.3.1.10rc1.dist-info/RECORD +0 -34
  41. {datahub_agent_context-1.3.1.10rc1.dist-info → datahub_agent_context-1.4.0rc2.dist-info}/WHEEL +0 -0
  42. {datahub_agent_context-1.3.1.10rc1.dist-info → datahub_agent_context-1.4.0rc2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,725 @@
1
+ """Generate Snowflake Cortex Agent SQL."""
2
+
3
+
4
+ def generate_cortex_agent_sql(
5
+ agent_name: str,
6
+ agent_display_name: str,
7
+ agent_color: str,
8
+ sf_warehouse: str | None,
9
+ sf_database: str | None,
10
+ sf_schema: str | None,
11
+ include_mutations: bool = True,
12
+ ) -> str:
13
+ """Generate Cortex Agent SQL that uses configuration variables with DataHub tools.
14
+
15
+ Args:
16
+ agent_name: Agent name
17
+ agent_display_name: Agent display name
18
+ agent_color: Agent color
19
+ sf_warehouse: Snowflake warehouse name (uses placeholder if None)
20
+ sf_database: Snowflake database name (uses placeholder if None)
21
+ sf_schema: Snowflake schema name (uses placeholder if None)
22
+ include_mutations: Whether to include mutation/write tools (default: True)
23
+ """
24
+ # Use placeholders for None values - these will be set via SQL variables at runtime
25
+ warehouse = sf_warehouse or "MY_WAREHOUSE"
26
+ database = sf_database or "MY_DATABASE"
27
+ schema = sf_schema or "MY_SCHEMA"
28
+
29
+ # Build instructions based on whether mutations are enabled
30
+ if include_mutations:
31
+ capabilities = """1. Find and query data (search, schema exploration, SQL generation)
32
+ 2. Understand data lineage and relationships
33
+ 3. Manage metadata (tags, descriptions, owners, domains, glossary terms)
34
+ 4. Search documentation and runbooks"""
35
+
36
+ system_capabilities = """- Search and discovery (search_datahub, search_documents)
37
+ - Schema exploration (get_entities, list_schema_fields)
38
+ - Lineage analysis (get_lineage, get_lineage_paths_between)
39
+ - Query patterns (get_dataset_queries)
40
+ - Metadata management (tags, descriptions, owners, domains, glossary terms)
41
+ - User information (get_me)"""
42
+
43
+ orchestration_guidance = """For data queries:
44
+ 1. Use search_datahub to find relevant datasets
45
+ 2. Use get_entities or list_schema_fields for schema details
46
+ 3. Generate SQL based on actual schema
47
+ 4. Execute using SqlExecutor
48
+
49
+ For lineage questions:
50
+ 1. Use get_lineage to explore upstream/downstream dependencies
51
+ 2. Use get_lineage_paths_between for detailed transformation chains
52
+
53
+ For metadata management:
54
+ 1. Search for entities first to get URNs
55
+ 2. Use appropriate tools (add_tags, update_description, etc.)
56
+ 3. Confirm changes were successful"""
57
+
58
+ metadata_note = """Always use DataHub tools before generating SQL to ensure accuracy.
59
+ When managing metadata, confirm changes with the user first."""
60
+ else:
61
+ capabilities = """1. Find and query data (search, schema exploration, SQL generation)
62
+ 2. Understand data lineage and relationships
63
+ 3. Search documentation and runbooks"""
64
+
65
+ system_capabilities = """- Search and discovery (search_datahub, search_documents)
66
+ - Schema exploration (get_entities, list_schema_fields)
67
+ - Lineage analysis (get_lineage, get_lineage_paths_between)
68
+ - Query patterns (get_dataset_queries)
69
+ - User information (get_me)"""
70
+
71
+ orchestration_guidance = """For data queries:
72
+ 1. Use search_datahub to find relevant datasets
73
+ 2. Use get_entities or list_schema_fields for schema details
74
+ 3. Generate SQL based on actual schema
75
+ 4. Execute using SqlExecutor
76
+
77
+ For lineage questions:
78
+ 1. Use get_lineage to explore upstream/downstream dependencies
79
+ 2. Use get_lineage_paths_between for detailed transformation chains"""
80
+
81
+ metadata_note = (
82
+ "Always use DataHub tools before generating SQL to ensure accuracy."
83
+ )
84
+
85
+ # Build mutation tools section if enabled
86
+ mutation_tools = (
87
+ """
88
+ # Tag Management Tools
89
+ - tool_spec:
90
+ type: "generic"
91
+ name: "add_tags"
92
+ description: "Add tags to entities or columns. Confirm with user first before making changes."
93
+ input_schema:
94
+ type: "object"
95
+ properties:
96
+ tag_urns:
97
+ type: "string"
98
+ description: "JSON array of tag URNs (e.g., '[\\\"urn:li:tag:PII\\\"]')"
99
+ entity_urns:
100
+ type: "string"
101
+ description: "JSON array of entity URNs"
102
+ column_paths:
103
+ type: "string"
104
+ description: "JSON array of column names. Default: null (entity-level tagging)"
105
+ required: [tag_urns, entity_urns, column_paths]
106
+
107
+ - tool_spec:
108
+ type: "generic"
109
+ name: "remove_tags"
110
+ description: "Remove tags from entities or columns. Confirm with user first."
111
+ input_schema:
112
+ type: "object"
113
+ properties:
114
+ tag_urns:
115
+ type: "string"
116
+ description: "JSON array of tag URNs to remove"
117
+ entity_urns:
118
+ type: "string"
119
+ description: "JSON array of entity URNs"
120
+ column_paths:
121
+ type: "string"
122
+ description: "JSON array of column names. Default: null (entity-level tag removal)"
123
+ required: [tag_urns, entity_urns, column_paths]
124
+
125
+ # Description Management
126
+ - tool_spec:
127
+ type: "generic"
128
+ name: "update_description"
129
+ description: "Update entity/column descriptions. Operations: 'replace', 'append', 'remove'. Confirm with user first."
130
+ input_schema:
131
+ type: "object"
132
+ properties:
133
+ entity_urn:
134
+ type: "string"
135
+ description: "Entity URN"
136
+ operation:
137
+ type: "string"
138
+ description: "'replace', 'append', or 'remove'"
139
+ description:
140
+ type: "string"
141
+ description: "Description text. Default: null (not needed for 'remove' operation)"
142
+ column_path:
143
+ type: "string"
144
+ description: "Column name. Default: null (entity-level description)"
145
+ required: [entity_urn, operation, description, column_path]
146
+
147
+ # Domain Management
148
+ - tool_spec:
149
+ type: "generic"
150
+ name: "set_domains"
151
+ description: "Assign a domain to entities. Confirm with user first."
152
+ input_schema:
153
+ type: "object"
154
+ properties:
155
+ domain_urn:
156
+ type: "string"
157
+ description: "Domain URN (e.g., 'urn:li:domain:marketing')"
158
+ entity_urns:
159
+ type: "string"
160
+ description: "JSON array of entity URNs"
161
+ required: [domain_urn, entity_urns]
162
+
163
+ - tool_spec:
164
+ type: "generic"
165
+ name: "remove_domains"
166
+ description: "Remove domain assignments from entities. Confirm with user first."
167
+ input_schema:
168
+ type: "object"
169
+ properties:
170
+ entity_urns:
171
+ type: "string"
172
+ description: "JSON array of entity URNs"
173
+ required: [entity_urns]
174
+
175
+ # Owner Management
176
+ - tool_spec:
177
+ type: "generic"
178
+ name: "add_owners"
179
+ description: "Add owners to entities. Confirm with user first."
180
+ input_schema:
181
+ type: "object"
182
+ properties:
183
+ owner_urns:
184
+ type: "string"
185
+ description: "JSON array of owner URNs (CorpUser or CorpGroup)"
186
+ entity_urns:
187
+ type: "string"
188
+ description: "JSON array of entity URNs"
189
+ ownership_type_urn:
190
+ type: "string"
191
+ description: "Ownership type URN. Default: null (uses default ownership type)"
192
+ required: [owner_urns, entity_urns, ownership_type_urn]
193
+
194
+ - tool_spec:
195
+ type: "generic"
196
+ name: "remove_owners"
197
+ description: "Remove owners from entities. Confirm with user first."
198
+ input_schema:
199
+ type: "object"
200
+ properties:
201
+ owner_urns:
202
+ type: "string"
203
+ description: "JSON array of owner URNs"
204
+ entity_urns:
205
+ type: "string"
206
+ description: "JSON array of entity URNs"
207
+ ownership_type_urn:
208
+ type: "string"
209
+ description: "Ownership type URN. Default: null (removes all ownership types)"
210
+ required: [owner_urns, entity_urns, ownership_type_urn]
211
+
212
+ # Glossary Term Management
213
+ - tool_spec:
214
+ type: "generic"
215
+ name: "add_glossary_terms"
216
+ description: "Add glossary terms to entities or columns. Confirm with user first."
217
+ input_schema:
218
+ type: "object"
219
+ properties:
220
+ term_urns:
221
+ type: "string"
222
+ description: "JSON array of glossary term URNs"
223
+ entity_urns:
224
+ type: "string"
225
+ description: "JSON array of entity URNs"
226
+ column_paths:
227
+ type: "string"
228
+ description: "JSON array of column names. Default: null (entity-level glossary terms)"
229
+ required: [term_urns, entity_urns, column_paths]
230
+
231
+ - tool_spec:
232
+ type: "generic"
233
+ name: "remove_glossary_terms"
234
+ description: "Remove glossary terms from entities or columns. Confirm with user first."
235
+ input_schema:
236
+ type: "object"
237
+ properties:
238
+ term_urns:
239
+ type: "string"
240
+ description: "JSON array of glossary term URNs"
241
+ entity_urns:
242
+ type: "string"
243
+ description: "JSON array of entity URNs"
244
+ column_paths:
245
+ type: "string"
246
+ description: "JSON array of column names. Default: null (entity-level glossary terms)"
247
+ required: [term_urns, entity_urns, column_paths]
248
+
249
+ # Structured Property Management
250
+ - tool_spec:
251
+ type: "generic"
252
+ name: "add_structured_properties"
253
+ description: "Add structured properties to entities or columns. Confirm with user first."
254
+ input_schema:
255
+ type: "object"
256
+ properties:
257
+ property_values:
258
+ type: "string"
259
+ description: "JSON array of {{propertyUrn, value}} objects"
260
+ entity_urns:
261
+ type: "string"
262
+ description: "JSON array of entity URNs"
263
+ column_paths:
264
+ type: "string"
265
+ description: "JSON array of column names. Default: null (entity-level structured properties)"
266
+ required: [property_values, entity_urns, column_paths]
267
+
268
+ - tool_spec:
269
+ type: "generic"
270
+ name: "remove_structured_properties"
271
+ description: "Remove structured properties from entities or columns. Confirm with user first."
272
+ input_schema:
273
+ type: "object"
274
+ properties:
275
+ property_urns:
276
+ type: "string"
277
+ description: "JSON array of property URNs to remove"
278
+ entity_urns:
279
+ type: "string"
280
+ description: "JSON array of entity URNs"
281
+ column_paths:
282
+ type: "string"
283
+ description: "JSON array of column names. Default: null (entity-level structured properties)"
284
+ required: [property_urns, entity_urns, column_paths]
285
+ """
286
+ if include_mutations
287
+ else ""
288
+ )
289
+
290
+ # Build mutation tool resources section if enabled
291
+ mutation_tool_resources = (
292
+ f"""
293
+ # Tags
294
+ add_tags:
295
+ type: "function"
296
+ execution_environment:
297
+ type: "warehouse"
298
+ warehouse: {warehouse}
299
+ identifier: {database}.{schema}.ADD_TAGS
300
+
301
+ remove_tags:
302
+ type: "function"
303
+ execution_environment:
304
+ type: "warehouse"
305
+ warehouse: {warehouse}
306
+ identifier: {database}.{schema}.REMOVE_TAGS
307
+
308
+ # Descriptions
309
+ update_description:
310
+ type: "function"
311
+ execution_environment:
312
+ type: "warehouse"
313
+ warehouse: {warehouse}
314
+ identifier: {database}.{schema}.UPDATE_DESCRIPTION
315
+
316
+ # Domains
317
+ set_domains:
318
+ type: "function"
319
+ execution_environment:
320
+ type: "warehouse"
321
+ warehouse: {warehouse}
322
+ identifier: {database}.{schema}.SET_DOMAINS
323
+
324
+ remove_domains:
325
+ type: "function"
326
+ execution_environment:
327
+ type: "warehouse"
328
+ warehouse: {warehouse}
329
+ identifier: {database}.{schema}.REMOVE_DOMAINS
330
+
331
+ # Owners
332
+ add_owners:
333
+ type: "function"
334
+ execution_environment:
335
+ type: "warehouse"
336
+ warehouse: {warehouse}
337
+ identifier: {database}.{schema}.ADD_OWNERS
338
+
339
+ remove_owners:
340
+ type: "function"
341
+ execution_environment:
342
+ type: "warehouse"
343
+ warehouse: {warehouse}
344
+ identifier: {database}.{schema}.REMOVE_OWNERS
345
+
346
+ # Glossary Terms
347
+ add_glossary_terms:
348
+ type: "function"
349
+ execution_environment:
350
+ type: "warehouse"
351
+ warehouse: {warehouse}
352
+ identifier: {database}.{schema}.ADD_GLOSSARY_TERMS
353
+
354
+ remove_glossary_terms:
355
+ type: "function"
356
+ execution_environment:
357
+ type: "warehouse"
358
+ warehouse: {warehouse}
359
+ identifier: {database}.{schema}.REMOVE_GLOSSARY_TERMS
360
+
361
+ # Structured Properties
362
+ add_structured_properties:
363
+ type: "function"
364
+ execution_environment:
365
+ type: "warehouse"
366
+ warehouse: {warehouse}
367
+ identifier: {database}.{schema}.ADD_STRUCTURED_PROPERTIES
368
+
369
+ remove_structured_properties:
370
+ type: "function"
371
+ execution_environment:
372
+ type: "warehouse"
373
+ warehouse: {warehouse}
374
+ identifier: {database}.{schema}.REMOVE_STRUCTURED_PROPERTIES
375
+ """
376
+ if include_mutations
377
+ else ""
378
+ )
379
+
380
+ tool_count_note = (
381
+ "20 tools (read + write)" if include_mutations else "9 tools (read-only)"
382
+ )
383
+ query_description = " and manage metadata" if include_mutations else ""
384
+ comment_suffix = " and metadata management" if include_mutations else ""
385
+
386
+ # Build sample questions based on whether mutations are enabled
387
+ sample_questions_with_mutations = '''
388
+ - question: "What tables contain customer data?"
389
+ answer: "I'll search DataHub for datasets related to customer data."
390
+ - question: "Show me the lineage for the sales_monthly table"
391
+ answer: "I'll retrieve the lineage information for the sales_monthly table."
392
+ - question: "Tag all PII datasets in the finance domain"
393
+ answer: "I'll search for datasets in the finance domain and add PII tags to them."
394
+ - question: "What queries use the users table?"
395
+ answer: "I'll retrieve the SQL queries that reference the users table."
396
+ - question: "Add a description to the revenue column"
397
+ answer: "I'll update the description for the revenue column."
398
+ - question: "Who owns the analytics datasets?"
399
+ answer: "I'll search for analytics datasets and show their ownership information."'''
400
+
401
+ sample_questions_readonly = '''
402
+ - question: "What tables contain customer data?"
403
+ answer: "I'll search DataHub for datasets related to customer data."
404
+ - question: "Show me the lineage for the sales_monthly table"
405
+ answer: "I'll retrieve the lineage information for the sales_monthly table."
406
+ - question: "What queries use the users table?"
407
+ answer: "I'll retrieve the SQL queries that reference the users table."
408
+ - question: "Who owns the analytics datasets?"
409
+ answer: "I'll search for analytics datasets and show their ownership information."'''
410
+
411
+ sample_questions = (
412
+ sample_questions_with_mutations
413
+ if include_mutations
414
+ else sample_questions_readonly
415
+ )
416
+ return f"""-- ============================================================================
417
+ -- Step 4: Create Cortex Agent with DataHub Tools
418
+ -- ============================================================================
419
+ -- This creates a Snowflake Cortex Agent that uses DataHub metadata
420
+ -- to generate accurate SQL queries{query_description}
421
+ --
422
+ -- Prerequisites:
423
+ -- - Run 00_configuration.sql first to set variables
424
+ -- - Run 01_network_rules.sql to set up network access
425
+ -- - Run 02_datahub_udfs.sql to create DataHub UDFs ({tool_count_note})
426
+ -- - Run 03_stored_procedure.sql to create EXECUTE_DYNAMIC_SQL
427
+ -- ============================================================================
428
+
429
+ USE DATABASE IDENTIFIER($SF_DATABASE);
430
+ USE SCHEMA IDENTIFIER($SF_SCHEMA);
431
+ USE WAREHOUSE IDENTIFIER($SF_WAREHOUSE);
432
+
433
+ CREATE OR REPLACE AGENT {agent_name}
434
+ COMMENT = 'Agent that uses DataHub metadata for SQL generation{comment_suffix}'
435
+ PROFILE = '{{"display_name": "{agent_display_name}", "color": "{agent_color}"}}'
436
+ FROM SPECIFICATION
437
+ $$
438
+ models:
439
+ orchestration: auto
440
+
441
+ orchestration:
442
+ budget:
443
+ seconds: 60
444
+ tokens: 32000
445
+
446
+ instructions:
447
+ response: |
448
+ You are a comprehensive data assistant with access to DataHub metadata.
449
+ You can help users:
450
+ {capabilities}
451
+
452
+ {metadata_note}
453
+
454
+ orchestration: |
455
+ {orchestration_guidance}
456
+
457
+ system: |
458
+ You have comprehensive access to DataHub including:
459
+ {system_capabilities}
460
+
461
+ sample_questions:{sample_questions}
462
+
463
+ tools:
464
+ # Core Search & Discovery Tools
465
+ - tool_spec:
466
+ type: "generic"
467
+ name: "search_datahub"
468
+ description: "Search DataHub for entities (datasets, dashboards, etc.). Use /q prefix for structured queries. Returns URNs, names, descriptions, and metadata."
469
+ input_schema:
470
+ type: "object"
471
+ properties:
472
+ search_query:
473
+ type: "string"
474
+ description: "Search query (e.g., 'customer', '/q user+transaction')"
475
+ entity_type:
476
+ type: "string"
477
+ description: "Entity type filter (e.g., 'dataset', 'tag', etc.). Default: null (all entity types)"
478
+ required: [search_query, entity_type]
479
+
480
+ - tool_spec:
481
+ type: "generic"
482
+ name: "get_entities"
483
+ description: "Get detailed entity information including schema, tags, owners, lineage summary. Use URN from search results."
484
+ input_schema:
485
+ type: "object"
486
+ properties:
487
+ entity_urn:
488
+ type: "string"
489
+ description: "Entity URN from search results"
490
+ required: [entity_urn]
491
+
492
+ - tool_spec:
493
+ type: "generic"
494
+ name: "list_schema_fields"
495
+ description: "List schema fields with filtering and pagination. Useful for large schemas or finding specific columns."
496
+ input_schema:
497
+ type: "object"
498
+ properties:
499
+ dataset_urn:
500
+ type: "string"
501
+ description: "Dataset URN"
502
+ keywords:
503
+ type: "string"
504
+ description: "Keywords to filter fields (single string or JSON array). Default: null (no filtering)"
505
+ limit:
506
+ type: "number"
507
+ description: "Max fields to return. Default: 100"
508
+ required: [dataset_urn, keywords, limit]
509
+
510
+ # Lineage Tools
511
+ - tool_spec:
512
+ type: "generic"
513
+ name: "get_lineage"
514
+ description: "Get upstream or downstream lineage for entities or columns. Returns lineage graph with metadata."
515
+ input_schema:
516
+ type: "object"
517
+ properties:
518
+ urn:
519
+ type: "string"
520
+ description: "Entity URN"
521
+ column_name:
522
+ type: "string"
523
+ description: "Column name for column-level lineage. Default: null (entity-level lineage)"
524
+ upstream:
525
+ type: "number"
526
+ description: "1 for upstream, 0 for downstream. Default: 1"
527
+ max_hops:
528
+ type: "number"
529
+ description: "Max hops (1-3+). Default: 1"
530
+ max_results:
531
+ type: "number"
532
+ description: "Max results. Default: 30"
533
+ required: [urn, column_name, upstream, max_hops, max_results]
534
+
535
+ - tool_spec:
536
+ type: "generic"
537
+ name: "get_lineage_paths_between"
538
+ description: "Get detailed transformation paths between two entities/columns. Shows intermediate steps and queries."
539
+ input_schema:
540
+ type: "object"
541
+ properties:
542
+ source_urn:
543
+ type: "string"
544
+ description: "Source dataset URN"
545
+ target_urn:
546
+ type: "string"
547
+ description: "Target dataset URN"
548
+ source_column:
549
+ type: "string"
550
+ description: "Source column name. Default: null (dataset-level lineage)"
551
+ target_column:
552
+ type: "string"
553
+ description: "Target column name. Default: null (dataset-level lineage)"
554
+ required: [source_urn, target_urn, source_column, target_column]
555
+
556
+ # Query Analysis Tools
557
+ - tool_spec:
558
+ type: "generic"
559
+ name: "get_dataset_queries"
560
+ description: "Get SQL queries that use a dataset/column. Filter by MANUAL (user queries) or SYSTEM (BI tools)."
561
+ input_schema:
562
+ type: "object"
563
+ properties:
564
+ urn:
565
+ type: "string"
566
+ description: "Dataset URN"
567
+ column_name:
568
+ type: "string"
569
+ description: "Column name to filter queries. Default: null (queries for all columns)"
570
+ source:
571
+ type: "string"
572
+ description: "'MANUAL', 'SYSTEM', or null for both. Default: null"
573
+ count:
574
+ type: "number"
575
+ description: "Number of queries. Default: 10"
576
+ required: [urn, column_name, source, count]
577
+
578
+ # Document Search Tools
579
+ - tool_spec:
580
+ type: "generic"
581
+ name: "search_documents"
582
+ description: "Search organization documents (runbooks, FAQs, knowledge articles from Notion, Confluence, etc.)."
583
+ input_schema:
584
+ type: "object"
585
+ properties:
586
+ search_query:
587
+ type: "string"
588
+ description: "Search query"
589
+ num_results:
590
+ type: "number"
591
+ description: "Max results. Default: 10"
592
+ required: [search_query, num_results]
593
+
594
+ - tool_spec:
595
+ type: "generic"
596
+ name: "grep_documents"
597
+ description: "Search within document content using regex patterns. Use after search_documents to find specific content."
598
+ input_schema:
599
+ type: "object"
600
+ properties:
601
+ urns:
602
+ type: "string"
603
+ description: "JSON array of document URNs"
604
+ pattern:
605
+ type: "string"
606
+ description: "Regex pattern to search for"
607
+ context_chars:
608
+ type: "number"
609
+ description: "Context characters. Default: 200"
610
+ max_matches_per_doc:
611
+ type: "number"
612
+ description: "Max matches per document. Default: 5"
613
+ required: [urns, pattern, context_chars, max_matches_per_doc]
614
+ {mutation_tools}
615
+ # User Info
616
+ - tool_spec:
617
+ type: "generic"
618
+ name: "get_me"
619
+ description: "Get information about the authenticated user (profile, groups, privileges). This tool takes no parameters."
620
+ input_schema:
621
+ type: "object"
622
+ properties: {{}}
623
+
624
+ # SQL Executor
625
+ - tool_spec:
626
+ type: "generic"
627
+ name: "SqlExecutor"
628
+ description: "Execute SELECT SQL queries and return results. Use after generating SQL from DataHub metadata."
629
+ input_schema:
630
+ type: "object"
631
+ properties:
632
+ SQL_TEXT:
633
+ type: "string"
634
+ description: "SELECT SQL query (must start with SELECT)"
635
+ required: [SQL_TEXT]
636
+
637
+ tool_resources:
638
+ # Search & Discovery
639
+ search_datahub:
640
+ type: "function"
641
+ execution_environment:
642
+ type: "warehouse"
643
+ warehouse: {warehouse}
644
+ identifier: {database}.{schema}.SEARCH_DATAHUB
645
+
646
+ get_entities:
647
+ type: "function"
648
+ execution_environment:
649
+ type: "warehouse"
650
+ warehouse: {warehouse}
651
+ identifier: {database}.{schema}.GET_ENTITIES
652
+
653
+ list_schema_fields:
654
+ type: "function"
655
+ execution_environment:
656
+ type: "warehouse"
657
+ warehouse: {warehouse}
658
+ identifier: {database}.{schema}.LIST_SCHEMA_FIELDS
659
+
660
+ # Lineage
661
+ get_lineage:
662
+ type: "function"
663
+ execution_environment:
664
+ type: "warehouse"
665
+ warehouse: {warehouse}
666
+ identifier: {database}.{schema}.GET_LINEAGE
667
+
668
+ get_lineage_paths_between:
669
+ type: "function"
670
+ execution_environment:
671
+ type: "warehouse"
672
+ warehouse: {warehouse}
673
+ identifier: {database}.{schema}.GET_LINEAGE_PATHS_BETWEEN
674
+
675
+ # Query Analysis
676
+ get_dataset_queries:
677
+ type: "function"
678
+ execution_environment:
679
+ type: "warehouse"
680
+ warehouse: {warehouse}
681
+ identifier: {database}.{schema}.GET_DATASET_QUERIES
682
+
683
+ # Documents
684
+ search_documents:
685
+ type: "function"
686
+ execution_environment:
687
+ type: "warehouse"
688
+ warehouse: {warehouse}
689
+ identifier: {database}.{schema}.SEARCH_DOCUMENTS
690
+
691
+ grep_documents:
692
+ type: "function"
693
+ execution_environment:
694
+ type: "warehouse"
695
+ warehouse: {warehouse}
696
+ identifier: {database}.{schema}.GREP_DOCUMENTS
697
+ {mutation_tool_resources}
698
+ # User Info
699
+ get_me:
700
+ type: "function"
701
+ execution_environment:
702
+ type: "warehouse"
703
+ warehouse: {warehouse}
704
+ identifier: {database}.{schema}.GET_ME
705
+
706
+ # SQL Executor
707
+ SqlExecutor:
708
+ type: "procedure"
709
+ execution_environment:
710
+ type: "warehouse"
711
+ warehouse: {warehouse}
712
+ identifier: {database}.{schema}.EXECUTE_DYNAMIC_SQL
713
+ $$;
714
+
715
+ -- Grant usage to the specified role
716
+ GRANT USAGE ON AGENT {agent_name} TO ROLE IDENTIFIER($SF_ROLE);
717
+
718
+ -- Verify the agent was created
719
+ DESCRIBE AGENT {agent_name};
720
+
721
+ SELECT
722
+ 'Agent created successfully with {"20 DataHub tools (read + write)" if include_mutations else "9 DataHub tools (read-only)"}!' AS status,
723
+ '{agent_name}' AS agent_name,
724
+ 'You can now use this agent in Snowflake Intelligence UI for {"SQL generation and metadata management" if include_mutations else "SQL generation and metadata exploration"}' AS next_steps;
725
+ """