datahub-agent-context 1.3.1.10rc1__py3-none-any.whl → 1.4.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datahub_agent_context/__init__.py +11 -3
- datahub_agent_context/_version.py +1 -1
- datahub_agent_context/cli.py +152 -0
- datahub_agent_context/context.py +47 -34
- datahub_agent_context/langchain_tools/builder.py +6 -4
- datahub_agent_context/mcp_tools/base.py +6 -3
- datahub_agent_context/mcp_tools/save_document.py +634 -0
- datahub_agent_context/snowflake/__init__.py +0 -0
- datahub_agent_context/snowflake/generate_udfs.py +306 -0
- datahub_agent_context/snowflake/generators/__init__.py +21 -0
- datahub_agent_context/snowflake/generators/configuration.py +104 -0
- datahub_agent_context/snowflake/generators/cortex_agent.py +725 -0
- datahub_agent_context/snowflake/generators/network_rules.py +53 -0
- datahub_agent_context/snowflake/generators/stored_procedure.py +87 -0
- datahub_agent_context/snowflake/snowflake.py +662 -0
- datahub_agent_context/snowflake/udfs/__init__.py +1 -0
- datahub_agent_context/snowflake/udfs/add_glossary_terms.py +61 -0
- datahub_agent_context/snowflake/udfs/add_owners.py +59 -0
- datahub_agent_context/snowflake/udfs/add_structured_properties.py +57 -0
- datahub_agent_context/snowflake/udfs/add_tags.py +61 -0
- datahub_agent_context/snowflake/udfs/base.py +45 -0
- datahub_agent_context/snowflake/udfs/get_dataset_queries.py +68 -0
- datahub_agent_context/snowflake/udfs/get_entities.py +47 -0
- datahub_agent_context/snowflake/udfs/get_lineage.py +61 -0
- datahub_agent_context/snowflake/udfs/get_lineage_paths_between.py +69 -0
- datahub_agent_context/snowflake/udfs/get_me.py +51 -0
- datahub_agent_context/snowflake/udfs/grep_documents.py +70 -0
- datahub_agent_context/snowflake/udfs/list_schema_fields.py +80 -0
- datahub_agent_context/snowflake/udfs/remove_domains.py +45 -0
- datahub_agent_context/snowflake/udfs/remove_glossary_terms.py +57 -0
- datahub_agent_context/snowflake/udfs/remove_owners.py +56 -0
- datahub_agent_context/snowflake/udfs/remove_structured_properties.py +56 -0
- datahub_agent_context/snowflake/udfs/remove_tags.py +57 -0
- datahub_agent_context/snowflake/udfs/search_datahub.py +71 -0
- datahub_agent_context/snowflake/udfs/search_documents.py +58 -0
- datahub_agent_context/snowflake/udfs/set_domains.py +55 -0
- datahub_agent_context/snowflake/udfs/update_description.py +60 -0
- {datahub_agent_context-1.3.1.10rc1.dist-info → datahub_agent_context-1.4.0rc2.dist-info}/METADATA +21 -14
- datahub_agent_context-1.4.0rc2.dist-info/RECORD +66 -0
- datahub_agent_context-1.3.1.10rc1.dist-info/RECORD +0 -34
- {datahub_agent_context-1.3.1.10rc1.dist-info → datahub_agent_context-1.4.0rc2.dist-info}/WHEEL +0 -0
- {datahub_agent_context-1.3.1.10rc1.dist-info → datahub_agent_context-1.4.0rc2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,662 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
import click
|
|
6
|
+
|
|
7
|
+
from datahub_agent_context.snowflake.generate_udfs import (
|
|
8
|
+
generate_all_udfs,
|
|
9
|
+
generate_datahub_udfs_sql,
|
|
10
|
+
)
|
|
11
|
+
from datahub_agent_context.snowflake.generators import (
|
|
12
|
+
generate_configuration_sql,
|
|
13
|
+
generate_cortex_agent_sql,
|
|
14
|
+
generate_network_rules_sql,
|
|
15
|
+
generate_stored_procedure_sql,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def extract_domain_from_url(datahub_url: str) -> str:
|
|
22
|
+
"""Extract domain from DataHub URL for network rules.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
datahub_url: DataHub instance URL (e.g., https://example.datahubproject.io)
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
Domain without protocol or path (e.g., example.datahubproject.io)
|
|
29
|
+
|
|
30
|
+
Raises:
|
|
31
|
+
ValueError: If the URL is invalid or missing required components
|
|
32
|
+
"""
|
|
33
|
+
if not datahub_url or not isinstance(datahub_url, str):
|
|
34
|
+
raise ValueError("DataHub URL must be a non-empty string")
|
|
35
|
+
|
|
36
|
+
# Strip whitespace
|
|
37
|
+
datahub_url = datahub_url.strip()
|
|
38
|
+
|
|
39
|
+
# Check if URL has a protocol
|
|
40
|
+
if not datahub_url.startswith(("http://", "https://")):
|
|
41
|
+
raise ValueError(
|
|
42
|
+
f"DataHub URL must start with http:// or https://, got: {datahub_url}"
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
# Extract domain
|
|
46
|
+
domain = datahub_url.replace("https://", "").replace("http://", "")
|
|
47
|
+
domain = domain.split("/")[0]
|
|
48
|
+
|
|
49
|
+
# Validate domain is not empty and has valid characters
|
|
50
|
+
if not domain:
|
|
51
|
+
raise ValueError(f"Could not extract domain from URL: {datahub_url}")
|
|
52
|
+
|
|
53
|
+
# Basic domain validation - must have at least one dot or be localhost
|
|
54
|
+
if "." not in domain and not domain.startswith("localhost"):
|
|
55
|
+
raise ValueError(
|
|
56
|
+
f"Invalid domain format (must contain at least one dot or be localhost): {domain}"
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
return domain
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def build_connection_params(
|
|
63
|
+
sf_account: str | None,
|
|
64
|
+
sf_user: str | None,
|
|
65
|
+
sf_role: str | None,
|
|
66
|
+
sf_warehouse: str | None,
|
|
67
|
+
sf_password: str | None,
|
|
68
|
+
sf_authenticator: str,
|
|
69
|
+
) -> dict[str, Any]:
|
|
70
|
+
"""Build Snowflake connection parameters based on authentication type.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
sf_account: Snowflake account identifier
|
|
74
|
+
sf_user: Snowflake user name
|
|
75
|
+
sf_role: Snowflake role
|
|
76
|
+
sf_warehouse: Snowflake warehouse name
|
|
77
|
+
sf_password: Snowflake password (for password auth)
|
|
78
|
+
sf_authenticator: Authentication method (snowflake, externalbrowser, oauth)
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
Dictionary of connection parameters for snowflake.connector.connect()
|
|
82
|
+
"""
|
|
83
|
+
connection_params: dict[str, Any] = {}
|
|
84
|
+
|
|
85
|
+
if sf_user:
|
|
86
|
+
connection_params["user"] = sf_user
|
|
87
|
+
if sf_account:
|
|
88
|
+
connection_params["account"] = sf_account
|
|
89
|
+
if sf_role:
|
|
90
|
+
connection_params["role"] = sf_role
|
|
91
|
+
if sf_warehouse:
|
|
92
|
+
connection_params["warehouse"] = sf_warehouse
|
|
93
|
+
|
|
94
|
+
if sf_authenticator == "snowflake":
|
|
95
|
+
if sf_password:
|
|
96
|
+
connection_params["password"] = sf_password
|
|
97
|
+
elif sf_authenticator == "externalbrowser":
|
|
98
|
+
connection_params["authenticator"] = "externalbrowser"
|
|
99
|
+
elif sf_authenticator == "oauth":
|
|
100
|
+
connection_params["authenticator"] = "oauth"
|
|
101
|
+
|
|
102
|
+
return connection_params
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def generate_all_sql_scripts(
|
|
106
|
+
sf_account: str | None,
|
|
107
|
+
sf_user: str | None,
|
|
108
|
+
sf_role: str | None,
|
|
109
|
+
sf_warehouse: str | None,
|
|
110
|
+
sf_database: str | None,
|
|
111
|
+
sf_schema: str | None,
|
|
112
|
+
datahub_url: str,
|
|
113
|
+
datahub_token: str,
|
|
114
|
+
agent_name: str,
|
|
115
|
+
agent_display_name: str,
|
|
116
|
+
agent_color: str,
|
|
117
|
+
enable_mutations: bool,
|
|
118
|
+
execute_mode: bool,
|
|
119
|
+
) -> list[tuple[str, str]]:
|
|
120
|
+
"""Generate all SQL scripts for Snowflake agent setup.
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
sf_account: Snowflake account identifier
|
|
124
|
+
sf_user: Snowflake user name
|
|
125
|
+
sf_role: Snowflake role
|
|
126
|
+
sf_warehouse: Snowflake warehouse name
|
|
127
|
+
sf_database: Snowflake database name
|
|
128
|
+
sf_schema: Snowflake schema name
|
|
129
|
+
datahub_url: DataHub instance URL
|
|
130
|
+
datahub_token: DataHub Personal Access Token
|
|
131
|
+
agent_name: Agent name in Snowflake
|
|
132
|
+
agent_display_name: Agent display name in Snowflake UI
|
|
133
|
+
agent_color: Agent color in Snowflake UI
|
|
134
|
+
enable_mutations: Include mutation/write tools
|
|
135
|
+
execute_mode: Whether to include actual tokens in SQL (for execution)
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
List of (script_name, script_content) tuples in execution order
|
|
139
|
+
"""
|
|
140
|
+
config_sql = generate_configuration_sql(
|
|
141
|
+
sf_account=sf_account,
|
|
142
|
+
sf_user=sf_user,
|
|
143
|
+
sf_role=sf_role,
|
|
144
|
+
sf_warehouse=sf_warehouse,
|
|
145
|
+
sf_database=sf_database,
|
|
146
|
+
sf_schema=sf_schema,
|
|
147
|
+
datahub_url=datahub_url,
|
|
148
|
+
datahub_token=datahub_token,
|
|
149
|
+
agent_name=agent_name,
|
|
150
|
+
agent_display_name=agent_display_name,
|
|
151
|
+
agent_color=agent_color,
|
|
152
|
+
execute=execute_mode,
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
datahub_domain = extract_domain_from_url(datahub_url)
|
|
156
|
+
network_rules_sql = generate_network_rules_sql(datahub_domain)
|
|
157
|
+
|
|
158
|
+
datahub_udfs_sql = generate_datahub_udfs_sql(include_mutations=enable_mutations)
|
|
159
|
+
|
|
160
|
+
stored_proc_sql = generate_stored_procedure_sql()
|
|
161
|
+
|
|
162
|
+
cortex_agent_sql = generate_cortex_agent_sql(
|
|
163
|
+
agent_name=agent_name,
|
|
164
|
+
agent_display_name=agent_display_name,
|
|
165
|
+
agent_color=agent_color,
|
|
166
|
+
sf_warehouse=sf_warehouse,
|
|
167
|
+
sf_database=sf_database,
|
|
168
|
+
sf_schema=sf_schema,
|
|
169
|
+
include_mutations=enable_mutations,
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
return [
|
|
173
|
+
("00_configuration.sql", config_sql),
|
|
174
|
+
("01_network_rules.sql", network_rules_sql),
|
|
175
|
+
("02_datahub_udfs.sql", datahub_udfs_sql),
|
|
176
|
+
("03_stored_procedure.sql", stored_proc_sql),
|
|
177
|
+
("04_cortex_agent.sql", cortex_agent_sql),
|
|
178
|
+
]
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def write_sql_files_to_disk(
|
|
182
|
+
output_path: Path,
|
|
183
|
+
scripts: list[tuple[str, str]],
|
|
184
|
+
enable_mutations: bool,
|
|
185
|
+
) -> None:
|
|
186
|
+
"""Write generated SQL scripts to disk.
|
|
187
|
+
|
|
188
|
+
Args:
|
|
189
|
+
output_path: Directory to write SQL files to
|
|
190
|
+
scripts: List of (script_name, script_content) tuples
|
|
191
|
+
enable_mutations: Whether mutations are enabled (for messaging)
|
|
192
|
+
"""
|
|
193
|
+
for script_name, script_content in scripts:
|
|
194
|
+
file_path = output_path / script_name
|
|
195
|
+
file_path.write_text(script_content)
|
|
196
|
+
|
|
197
|
+
if script_name == "02_datahub_udfs.sql":
|
|
198
|
+
udf_count = len(generate_all_udfs(include_mutations=enable_mutations))
|
|
199
|
+
mutation_note = " (read + write)" if enable_mutations else " (read-only)"
|
|
200
|
+
click.echo(f"✓ Generated {file_path} - {udf_count} UDFs{mutation_note}")
|
|
201
|
+
else:
|
|
202
|
+
click.echo(f"✓ Generated {file_path}")
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def execute_sql_scripts_in_snowflake(
|
|
206
|
+
connection: Any,
|
|
207
|
+
scripts: list[tuple[str, str]],
|
|
208
|
+
) -> bool:
|
|
209
|
+
"""Execute a list of SQL scripts in Snowflake in order.
|
|
210
|
+
|
|
211
|
+
Args:
|
|
212
|
+
connection: Snowflake connection object
|
|
213
|
+
scripts: List of (script_name, script_content) tuples to execute
|
|
214
|
+
|
|
215
|
+
Returns:
|
|
216
|
+
True if all scripts executed successfully, False otherwise
|
|
217
|
+
"""
|
|
218
|
+
all_success = True
|
|
219
|
+
for script_name, script_content in scripts:
|
|
220
|
+
click.echo(f"\n📝 Executing {script_name}...")
|
|
221
|
+
success = execute_sql_in_snowflake(connection, script_content, script_name)
|
|
222
|
+
if success:
|
|
223
|
+
click.echo(f" ✓ {script_name} completed successfully")
|
|
224
|
+
else:
|
|
225
|
+
click.echo(f" ✗ {script_name} failed", err=True)
|
|
226
|
+
all_success = False
|
|
227
|
+
break
|
|
228
|
+
|
|
229
|
+
return all_success
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def execute_sql_in_snowflake(
|
|
233
|
+
connection: Any,
|
|
234
|
+
sql_content: str,
|
|
235
|
+
script_name: str,
|
|
236
|
+
) -> bool:
|
|
237
|
+
"""
|
|
238
|
+
Execute SQL content in Snowflake using execute_string.
|
|
239
|
+
|
|
240
|
+
Args:
|
|
241
|
+
connection: Snowflake connection object
|
|
242
|
+
sql_content: SQL script content to execute (can contain multiple statements)
|
|
243
|
+
script_name: Name of the script (for logging)
|
|
244
|
+
|
|
245
|
+
Returns:
|
|
246
|
+
True if successful, False otherwise
|
|
247
|
+
"""
|
|
248
|
+
try:
|
|
249
|
+
# Use execute_string to handle multi-statement SQL
|
|
250
|
+
# This is the recommended way to execute multiple SQL statements
|
|
251
|
+
# Returns a list of cursors, one per statement
|
|
252
|
+
click.echo(f" Executing {script_name}...")
|
|
253
|
+
|
|
254
|
+
statement_count = 0
|
|
255
|
+
|
|
256
|
+
# execute_string returns a list of cursors
|
|
257
|
+
cursors = connection.execute_string(sql_content, remove_comments=True)
|
|
258
|
+
|
|
259
|
+
for cursor in cursors:
|
|
260
|
+
statement_count += 1
|
|
261
|
+
|
|
262
|
+
# Fetch results if available
|
|
263
|
+
if cursor.description:
|
|
264
|
+
try:
|
|
265
|
+
results = cursor.fetchall()
|
|
266
|
+
if results:
|
|
267
|
+
click.echo(
|
|
268
|
+
f" Statement {statement_count}: {len(results)} row(s) returned"
|
|
269
|
+
)
|
|
270
|
+
except Exception as e:
|
|
271
|
+
# Some statements don't return results, that's okay
|
|
272
|
+
logger.debug(f"No results for statement {statement_count}: {e}")
|
|
273
|
+
|
|
274
|
+
cursor.close()
|
|
275
|
+
|
|
276
|
+
click.echo(f" ✓ Executed {statement_count} statement(s) successfully")
|
|
277
|
+
return True
|
|
278
|
+
|
|
279
|
+
except Exception as e:
|
|
280
|
+
click.echo(f"✗ Error executing {script_name}: {e}", err=True)
|
|
281
|
+
logger.error(f"Error executing {script_name}: {e}")
|
|
282
|
+
logger.error(f"Error executing {sql_content}")
|
|
283
|
+
return False
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
def auto_detect_snowflake_params(
|
|
287
|
+
connection: Any,
|
|
288
|
+
sf_account: str | None,
|
|
289
|
+
sf_user: str | None,
|
|
290
|
+
sf_role: str | None,
|
|
291
|
+
sf_warehouse: str | None,
|
|
292
|
+
sf_database: str | None,
|
|
293
|
+
sf_schema: str | None,
|
|
294
|
+
) -> tuple[str, str, str, str, str, str]:
|
|
295
|
+
"""Auto-detect Snowflake connection parameters from an active connection.
|
|
296
|
+
|
|
297
|
+
Args:
|
|
298
|
+
connection: Active Snowflake connection object
|
|
299
|
+
sf_account: Account (auto-detected if None)
|
|
300
|
+
sf_user: User (auto-detected if None)
|
|
301
|
+
sf_role: Role (auto-detected if None)
|
|
302
|
+
sf_warehouse: Warehouse (auto-detected if None)
|
|
303
|
+
sf_database: Database (auto-detected if None)
|
|
304
|
+
sf_schema: Schema (auto-detected if None)
|
|
305
|
+
|
|
306
|
+
Returns:
|
|
307
|
+
Tuple of (account, user, role, warehouse, database, schema)
|
|
308
|
+
|
|
309
|
+
Raises:
|
|
310
|
+
ValueError: If required parameters cannot be auto-detected
|
|
311
|
+
"""
|
|
312
|
+
cursor = connection.cursor()
|
|
313
|
+
try:
|
|
314
|
+
if not sf_account:
|
|
315
|
+
cursor.execute("SELECT CURRENT_ACCOUNT()")
|
|
316
|
+
sf_account = cursor.fetchone()[0]
|
|
317
|
+
click.echo(f" Auto-detected account: {sf_account}")
|
|
318
|
+
|
|
319
|
+
if not sf_user:
|
|
320
|
+
cursor.execute("SELECT CURRENT_USER()")
|
|
321
|
+
sf_user = cursor.fetchone()[0]
|
|
322
|
+
click.echo(f" Auto-detected user: {sf_user}")
|
|
323
|
+
|
|
324
|
+
if not sf_role:
|
|
325
|
+
cursor.execute("SELECT CURRENT_ROLE()")
|
|
326
|
+
sf_role = cursor.fetchone()[0]
|
|
327
|
+
click.echo(f" Auto-detected role: {sf_role}")
|
|
328
|
+
|
|
329
|
+
if not sf_warehouse:
|
|
330
|
+
cursor.execute("SELECT CURRENT_WAREHOUSE()")
|
|
331
|
+
result = cursor.fetchone()
|
|
332
|
+
if result and result[0]:
|
|
333
|
+
sf_warehouse = result[0]
|
|
334
|
+
click.echo(f" Auto-detected warehouse: {sf_warehouse}")
|
|
335
|
+
else:
|
|
336
|
+
raise ValueError(
|
|
337
|
+
"No warehouse is currently in use. Please specify --sf-warehouse"
|
|
338
|
+
)
|
|
339
|
+
|
|
340
|
+
if not sf_database:
|
|
341
|
+
cursor.execute("SELECT CURRENT_DATABASE()")
|
|
342
|
+
result = cursor.fetchone()
|
|
343
|
+
if result and result[0]:
|
|
344
|
+
sf_database = result[0]
|
|
345
|
+
click.echo(f" Auto-detected database: {sf_database}")
|
|
346
|
+
else:
|
|
347
|
+
raise ValueError(
|
|
348
|
+
"No database is currently in use. Please specify --sf-database"
|
|
349
|
+
)
|
|
350
|
+
|
|
351
|
+
if not sf_schema:
|
|
352
|
+
cursor.execute("SELECT CURRENT_SCHEMA()")
|
|
353
|
+
result = cursor.fetchone()
|
|
354
|
+
if result and result[0]:
|
|
355
|
+
sf_schema = result[0]
|
|
356
|
+
click.echo(f" Auto-detected schema: {sf_schema}")
|
|
357
|
+
else:
|
|
358
|
+
raise ValueError(
|
|
359
|
+
"No schema is currently in use. Please specify --sf-schema"
|
|
360
|
+
)
|
|
361
|
+
|
|
362
|
+
return sf_account, sf_user, sf_role, sf_warehouse, sf_database, sf_schema
|
|
363
|
+
finally:
|
|
364
|
+
cursor.close()
|
|
365
|
+
|
|
366
|
+
|
|
367
|
+
@click.command()
|
|
368
|
+
@click.option(
|
|
369
|
+
"--sf-account",
|
|
370
|
+
default=None,
|
|
371
|
+
help="Snowflake account identifier (auto-detected if not provided)",
|
|
372
|
+
)
|
|
373
|
+
@click.option(
|
|
374
|
+
"--sf-user",
|
|
375
|
+
default=None,
|
|
376
|
+
help="Snowflake user name (auto-detected if not provided)",
|
|
377
|
+
)
|
|
378
|
+
@click.option(
|
|
379
|
+
"--sf-role", default=None, help="Snowflake role (auto-detected if not provided)"
|
|
380
|
+
)
|
|
381
|
+
@click.option(
|
|
382
|
+
"--sf-warehouse",
|
|
383
|
+
default=None,
|
|
384
|
+
help="Snowflake warehouse name (auto-detected if not provided)",
|
|
385
|
+
)
|
|
386
|
+
@click.option(
|
|
387
|
+
"--sf-database",
|
|
388
|
+
default=None,
|
|
389
|
+
help="Snowflake database name (auto-detected if not provided)",
|
|
390
|
+
)
|
|
391
|
+
@click.option(
|
|
392
|
+
"--sf-schema",
|
|
393
|
+
default=None,
|
|
394
|
+
help="Snowflake schema name (auto-detected if not provided)",
|
|
395
|
+
)
|
|
396
|
+
@click.option("--datahub-url", required=True, help="DataHub instance URL")
|
|
397
|
+
@click.option("--datahub-token", required=True, help="DataHub Personal Access Token")
|
|
398
|
+
@click.option(
|
|
399
|
+
"--agent-name", default="DATAHUB_SQL_AGENT", help="Agent name in Snowflake"
|
|
400
|
+
)
|
|
401
|
+
@click.option(
|
|
402
|
+
"--agent-display-name",
|
|
403
|
+
default="DataHub SQL Assistant",
|
|
404
|
+
help="Agent display name in Snowflake UI",
|
|
405
|
+
)
|
|
406
|
+
@click.option(
|
|
407
|
+
"--agent-color",
|
|
408
|
+
default="blue",
|
|
409
|
+
help="Agent color in Snowflake UI",
|
|
410
|
+
type=click.Choice(["blue", "red", "green", "yellow", "purple", "orange"]),
|
|
411
|
+
)
|
|
412
|
+
@click.option(
|
|
413
|
+
"--output-dir",
|
|
414
|
+
default="./snowflake_setup",
|
|
415
|
+
help="Output directory for generated SQL files",
|
|
416
|
+
)
|
|
417
|
+
@click.option(
|
|
418
|
+
"--execute",
|
|
419
|
+
is_flag=True,
|
|
420
|
+
default=False,
|
|
421
|
+
help="Connect to Snowflake and execute the SQL scripts directly",
|
|
422
|
+
)
|
|
423
|
+
@click.option(
|
|
424
|
+
"--sf-password",
|
|
425
|
+
help="Snowflake password (required if --execute is used with password authentication)",
|
|
426
|
+
)
|
|
427
|
+
@click.option(
|
|
428
|
+
"--sf-authenticator",
|
|
429
|
+
default="snowflake",
|
|
430
|
+
type=click.Choice(["snowflake", "externalbrowser", "oauth"], case_sensitive=False),
|
|
431
|
+
help="Authentication method: 'snowflake' (password), 'externalbrowser' (SSO), or 'oauth' (token-based). Default: snowflake",
|
|
432
|
+
)
|
|
433
|
+
@click.option(
|
|
434
|
+
"--enable-mutations/--no-enable-mutations",
|
|
435
|
+
default=True,
|
|
436
|
+
help="Include mutation/write tools (tags, descriptions, owners, etc.). Default: enabled",
|
|
437
|
+
)
|
|
438
|
+
def create_snowflake_agent(
|
|
439
|
+
sf_account: str | None,
|
|
440
|
+
sf_user: str | None,
|
|
441
|
+
sf_role: str | None,
|
|
442
|
+
sf_warehouse: str | None,
|
|
443
|
+
sf_database: str | None,
|
|
444
|
+
sf_schema: str | None,
|
|
445
|
+
datahub_url: str,
|
|
446
|
+
datahub_token: str,
|
|
447
|
+
agent_name: str,
|
|
448
|
+
agent_display_name: str,
|
|
449
|
+
agent_color: str,
|
|
450
|
+
output_dir: str,
|
|
451
|
+
execute: bool,
|
|
452
|
+
sf_password: str | None,
|
|
453
|
+
sf_authenticator: str,
|
|
454
|
+
enable_mutations: bool,
|
|
455
|
+
) -> None:
|
|
456
|
+
"""Create a Snowflake agent on DataHub by generating SQL setup scripts.
|
|
457
|
+
|
|
458
|
+
This command generates all necessary SQL files to set up a Snowflake Cortex Agent
|
|
459
|
+
with DataHub integration using the datahub-agent-context package.
|
|
460
|
+
"""
|
|
461
|
+
click.echo(
|
|
462
|
+
"Generating Snowflake agent setup SQL files with datahub-agent-context..."
|
|
463
|
+
)
|
|
464
|
+
logger.info("Snowflake agent creation initiated")
|
|
465
|
+
|
|
466
|
+
output_path = Path(output_dir)
|
|
467
|
+
output_path.mkdir(parents=True, exist_ok=True)
|
|
468
|
+
|
|
469
|
+
# Generate all SQL scripts for file output (without token exposure)
|
|
470
|
+
scripts = generate_all_sql_scripts(
|
|
471
|
+
sf_account=sf_account,
|
|
472
|
+
sf_user=sf_user,
|
|
473
|
+
sf_role=sf_role,
|
|
474
|
+
sf_warehouse=sf_warehouse,
|
|
475
|
+
sf_database=sf_database,
|
|
476
|
+
sf_schema=sf_schema,
|
|
477
|
+
datahub_url=datahub_url,
|
|
478
|
+
datahub_token=datahub_token,
|
|
479
|
+
agent_name=agent_name,
|
|
480
|
+
agent_display_name=agent_display_name,
|
|
481
|
+
agent_color=agent_color,
|
|
482
|
+
enable_mutations=enable_mutations,
|
|
483
|
+
execute_mode=False,
|
|
484
|
+
)
|
|
485
|
+
|
|
486
|
+
# Write scripts to disk
|
|
487
|
+
write_sql_files_to_disk(output_path, scripts, enable_mutations)
|
|
488
|
+
click.echo(f"\n✅ Snowflake agent setup files generated in: {output_path}")
|
|
489
|
+
|
|
490
|
+
# Execute SQL scripts if --execute flag is set
|
|
491
|
+
if execute:
|
|
492
|
+
_execute_mode(
|
|
493
|
+
sf_account=sf_account,
|
|
494
|
+
sf_user=sf_user,
|
|
495
|
+
sf_role=sf_role,
|
|
496
|
+
sf_warehouse=sf_warehouse,
|
|
497
|
+
sf_database=sf_database,
|
|
498
|
+
sf_schema=sf_schema,
|
|
499
|
+
datahub_url=datahub_url,
|
|
500
|
+
datahub_token=datahub_token,
|
|
501
|
+
agent_name=agent_name,
|
|
502
|
+
agent_display_name=agent_display_name,
|
|
503
|
+
agent_color=agent_color,
|
|
504
|
+
sf_password=sf_password,
|
|
505
|
+
sf_authenticator=sf_authenticator,
|
|
506
|
+
enable_mutations=enable_mutations,
|
|
507
|
+
)
|
|
508
|
+
else:
|
|
509
|
+
_show_manual_instructions()
|
|
510
|
+
|
|
511
|
+
logger.info(f"Snowflake agent setup files generated in: {output_path}")
|
|
512
|
+
|
|
513
|
+
|
|
514
|
+
def _execute_mode(
|
|
515
|
+
sf_account: str | None,
|
|
516
|
+
sf_user: str | None,
|
|
517
|
+
sf_role: str | None,
|
|
518
|
+
sf_warehouse: str | None,
|
|
519
|
+
sf_database: str | None,
|
|
520
|
+
sf_schema: str | None,
|
|
521
|
+
datahub_url: str,
|
|
522
|
+
datahub_token: str,
|
|
523
|
+
agent_name: str,
|
|
524
|
+
agent_display_name: str,
|
|
525
|
+
agent_color: str,
|
|
526
|
+
sf_password: str | None,
|
|
527
|
+
sf_authenticator: str,
|
|
528
|
+
enable_mutations: bool,
|
|
529
|
+
) -> None:
|
|
530
|
+
"""Execute SQL scripts directly in Snowflake."""
|
|
531
|
+
if sf_authenticator == "snowflake" and not sf_password:
|
|
532
|
+
click.echo(
|
|
533
|
+
"\n✗ Error: --sf-password is required when using --execute with password authentication (--sf-authenticator=snowflake)",
|
|
534
|
+
err=True,
|
|
535
|
+
)
|
|
536
|
+
logger.error("Password required for snowflake authenticator")
|
|
537
|
+
return
|
|
538
|
+
|
|
539
|
+
click.echo("\n🔄 Connecting to Snowflake and executing setup scripts...")
|
|
540
|
+
if sf_authenticator == "externalbrowser":
|
|
541
|
+
click.echo(
|
|
542
|
+
" Using SSO authentication - your browser will open for authentication..."
|
|
543
|
+
)
|
|
544
|
+
|
|
545
|
+
try:
|
|
546
|
+
import snowflake.connector
|
|
547
|
+
except ImportError:
|
|
548
|
+
click.echo(
|
|
549
|
+
"\n✗ Error: snowflake-connector-python package is not installed",
|
|
550
|
+
err=True,
|
|
551
|
+
)
|
|
552
|
+
click.echo("Install it with: pip install snowflake-connector-python", err=True)
|
|
553
|
+
logger.error("snowflake-connector-python not installed")
|
|
554
|
+
return
|
|
555
|
+
|
|
556
|
+
try:
|
|
557
|
+
click.echo(" Connecting to Snowflake...")
|
|
558
|
+
|
|
559
|
+
connection_params = build_connection_params(
|
|
560
|
+
sf_account=sf_account,
|
|
561
|
+
sf_user=sf_user,
|
|
562
|
+
sf_role=sf_role,
|
|
563
|
+
sf_warehouse=sf_warehouse,
|
|
564
|
+
sf_password=sf_password,
|
|
565
|
+
sf_authenticator=sf_authenticator,
|
|
566
|
+
)
|
|
567
|
+
|
|
568
|
+
if sf_authenticator == "oauth":
|
|
569
|
+
click.echo(
|
|
570
|
+
" Note: OAuth authentication requires additional token configuration",
|
|
571
|
+
err=True,
|
|
572
|
+
)
|
|
573
|
+
|
|
574
|
+
conn = snowflake.connector.connect(**connection_params)
|
|
575
|
+
click.echo(" ✓ Connected successfully")
|
|
576
|
+
|
|
577
|
+
# Auto-detect values from the Snowflake connection if not provided
|
|
578
|
+
try:
|
|
579
|
+
(
|
|
580
|
+
sf_account,
|
|
581
|
+
sf_user,
|
|
582
|
+
sf_role,
|
|
583
|
+
sf_warehouse,
|
|
584
|
+
sf_database,
|
|
585
|
+
sf_schema,
|
|
586
|
+
) = auto_detect_snowflake_params(
|
|
587
|
+
conn,
|
|
588
|
+
sf_account,
|
|
589
|
+
sf_user,
|
|
590
|
+
sf_role,
|
|
591
|
+
sf_warehouse,
|
|
592
|
+
sf_database,
|
|
593
|
+
sf_schema,
|
|
594
|
+
)
|
|
595
|
+
except ValueError as e:
|
|
596
|
+
click.echo(f" ✗ Error: {e}", err=True)
|
|
597
|
+
conn.close()
|
|
598
|
+
return
|
|
599
|
+
|
|
600
|
+
# Regenerate SQL with the detected values for execute mode (with actual token)
|
|
601
|
+
scripts = generate_all_sql_scripts(
|
|
602
|
+
sf_account=sf_account,
|
|
603
|
+
sf_user=sf_user,
|
|
604
|
+
sf_role=sf_role,
|
|
605
|
+
sf_warehouse=sf_warehouse,
|
|
606
|
+
sf_database=sf_database,
|
|
607
|
+
sf_schema=sf_schema,
|
|
608
|
+
datahub_url=datahub_url,
|
|
609
|
+
datahub_token=datahub_token,
|
|
610
|
+
agent_name=agent_name,
|
|
611
|
+
agent_display_name=agent_display_name,
|
|
612
|
+
agent_color=agent_color,
|
|
613
|
+
enable_mutations=enable_mutations,
|
|
614
|
+
execute_mode=True,
|
|
615
|
+
)
|
|
616
|
+
|
|
617
|
+
# Execute scripts in order
|
|
618
|
+
all_success = execute_sql_scripts_in_snowflake(conn, scripts)
|
|
619
|
+
|
|
620
|
+
conn.close()
|
|
621
|
+
|
|
622
|
+
if all_success:
|
|
623
|
+
click.echo("\n✅ All scripts executed successfully!")
|
|
624
|
+
click.echo(
|
|
625
|
+
f"\nYour DataHub agent '{agent_name}' is now ready to use in Snowflake Intelligence UI"
|
|
626
|
+
)
|
|
627
|
+
else:
|
|
628
|
+
click.echo(
|
|
629
|
+
"\n⚠️ Some scripts failed. Check the errors above and review the generated SQL files.",
|
|
630
|
+
err=True,
|
|
631
|
+
)
|
|
632
|
+
|
|
633
|
+
except Exception as e:
|
|
634
|
+
click.echo(f"\n✗ Error connecting to Snowflake: {e}", err=True)
|
|
635
|
+
logger.error(f"Snowflake connection error: {e}")
|
|
636
|
+
click.echo("\nYou can still manually run the generated SQL files in Snowflake.")
|
|
637
|
+
|
|
638
|
+
|
|
639
|
+
def _show_manual_instructions() -> None:
|
|
640
|
+
"""Show manual execution instructions for non-execute mode."""
|
|
641
|
+
click.echo("\nNext steps:")
|
|
642
|
+
click.echo("1. Review the generated SQL files")
|
|
643
|
+
click.echo("2. Run them in order:")
|
|
644
|
+
click.echo(" a. 00_configuration.sql - Set up configuration variables")
|
|
645
|
+
click.echo(
|
|
646
|
+
" b. 01_network_rules.sql - Create network rules and access integration"
|
|
647
|
+
)
|
|
648
|
+
click.echo(" c. 02_datahub_udfs.sql - Create DataHub API UDFs")
|
|
649
|
+
click.echo(" d. 03_stored_procedure.sql - Create SQL execution procedure")
|
|
650
|
+
click.echo(" e. 04_cortex_agent.sql - Create the Cortex Agent")
|
|
651
|
+
click.echo("3. Test your agent in Snowflake Intelligence UI")
|
|
652
|
+
click.echo("\nNote: The UDFs use Snowflake secrets for secure credential storage.")
|
|
653
|
+
click.echo(
|
|
654
|
+
"\nTip: Use --execute flag to automatically run these scripts in Snowflake"
|
|
655
|
+
)
|
|
656
|
+
click.echo(
|
|
657
|
+
" For SSO authentication, use: --execute --sf-authenticator=externalbrowser"
|
|
658
|
+
)
|
|
659
|
+
|
|
660
|
+
|
|
661
|
+
if __name__ == "__main__":
|
|
662
|
+
create_snowflake_agent()
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Snowflake UDF generators for DataHub integration."""
|