datahub-agent-context 1.3.1.10rc1__tar.gz → 1.4.0rc1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. {datahub_agent_context-1.3.1.10rc1 → datahub_agent_context-1.4.0rc1}/PKG-INFO +18 -12
  2. {datahub_agent_context-1.3.1.10rc1 → datahub_agent_context-1.4.0rc1}/setup.py +8 -0
  3. {datahub_agent_context-1.3.1.10rc1 → datahub_agent_context-1.4.0rc1}/src/datahub_agent_context/_version.py +1 -1
  4. datahub_agent_context-1.4.0rc1/src/datahub_agent_context/cli.py +152 -0
  5. {datahub_agent_context-1.3.1.10rc1 → datahub_agent_context-1.4.0rc1}/src/datahub_agent_context/mcp_tools/base.py +6 -3
  6. datahub_agent_context-1.4.0rc1/src/datahub_agent_context/snowflake/__init__.py +0 -0
  7. datahub_agent_context-1.4.0rc1/src/datahub_agent_context/snowflake/generate_udfs.py +306 -0
  8. datahub_agent_context-1.4.0rc1/src/datahub_agent_context/snowflake/generators/__init__.py +21 -0
  9. datahub_agent_context-1.4.0rc1/src/datahub_agent_context/snowflake/generators/configuration.py +104 -0
  10. datahub_agent_context-1.4.0rc1/src/datahub_agent_context/snowflake/generators/cortex_agent.py +725 -0
  11. datahub_agent_context-1.4.0rc1/src/datahub_agent_context/snowflake/generators/network_rules.py +53 -0
  12. datahub_agent_context-1.4.0rc1/src/datahub_agent_context/snowflake/generators/stored_procedure.py +87 -0
  13. datahub_agent_context-1.4.0rc1/src/datahub_agent_context/snowflake/snowflake.py +662 -0
  14. datahub_agent_context-1.4.0rc1/src/datahub_agent_context/snowflake/udfs/__init__.py +1 -0
  15. datahub_agent_context-1.4.0rc1/src/datahub_agent_context/snowflake/udfs/add_glossary_terms.py +61 -0
  16. datahub_agent_context-1.4.0rc1/src/datahub_agent_context/snowflake/udfs/add_owners.py +59 -0
  17. datahub_agent_context-1.4.0rc1/src/datahub_agent_context/snowflake/udfs/add_structured_properties.py +57 -0
  18. datahub_agent_context-1.4.0rc1/src/datahub_agent_context/snowflake/udfs/add_tags.py +61 -0
  19. datahub_agent_context-1.4.0rc1/src/datahub_agent_context/snowflake/udfs/base.py +45 -0
  20. datahub_agent_context-1.4.0rc1/src/datahub_agent_context/snowflake/udfs/get_dataset_queries.py +68 -0
  21. datahub_agent_context-1.4.0rc1/src/datahub_agent_context/snowflake/udfs/get_entities.py +47 -0
  22. datahub_agent_context-1.4.0rc1/src/datahub_agent_context/snowflake/udfs/get_lineage.py +61 -0
  23. datahub_agent_context-1.4.0rc1/src/datahub_agent_context/snowflake/udfs/get_lineage_paths_between.py +69 -0
  24. datahub_agent_context-1.4.0rc1/src/datahub_agent_context/snowflake/udfs/get_me.py +51 -0
  25. datahub_agent_context-1.4.0rc1/src/datahub_agent_context/snowflake/udfs/grep_documents.py +70 -0
  26. datahub_agent_context-1.4.0rc1/src/datahub_agent_context/snowflake/udfs/list_schema_fields.py +80 -0
  27. datahub_agent_context-1.4.0rc1/src/datahub_agent_context/snowflake/udfs/remove_domains.py +45 -0
  28. datahub_agent_context-1.4.0rc1/src/datahub_agent_context/snowflake/udfs/remove_glossary_terms.py +57 -0
  29. datahub_agent_context-1.4.0rc1/src/datahub_agent_context/snowflake/udfs/remove_owners.py +56 -0
  30. datahub_agent_context-1.4.0rc1/src/datahub_agent_context/snowflake/udfs/remove_structured_properties.py +56 -0
  31. datahub_agent_context-1.4.0rc1/src/datahub_agent_context/snowflake/udfs/remove_tags.py +57 -0
  32. datahub_agent_context-1.4.0rc1/src/datahub_agent_context/snowflake/udfs/search_datahub.py +71 -0
  33. datahub_agent_context-1.4.0rc1/src/datahub_agent_context/snowflake/udfs/search_documents.py +58 -0
  34. datahub_agent_context-1.4.0rc1/src/datahub_agent_context/snowflake/udfs/set_domains.py +55 -0
  35. datahub_agent_context-1.4.0rc1/src/datahub_agent_context/snowflake/udfs/update_description.py +60 -0
  36. {datahub_agent_context-1.3.1.10rc1 → datahub_agent_context-1.4.0rc1}/src/datahub_agent_context.egg-info/PKG-INFO +18 -12
  37. datahub_agent_context-1.4.0rc1/src/datahub_agent_context.egg-info/SOURCES.txt +71 -0
  38. {datahub_agent_context-1.3.1.10rc1 → datahub_agent_context-1.4.0rc1}/src/datahub_agent_context.egg-info/requires.txt +18 -11
  39. datahub_agent_context-1.3.1.10rc1/src/datahub_agent_context.egg-info/SOURCES.txt +0 -40
  40. {datahub_agent_context-1.3.1.10rc1 → datahub_agent_context-1.4.0rc1}/README.md +0 -0
  41. {datahub_agent_context-1.3.1.10rc1 → datahub_agent_context-1.4.0rc1}/pyproject.toml +0 -0
  42. {datahub_agent_context-1.3.1.10rc1 → datahub_agent_context-1.4.0rc1}/setup.cfg +0 -0
  43. {datahub_agent_context-1.3.1.10rc1 → datahub_agent_context-1.4.0rc1}/src/datahub_agent_context/__init__.py +0 -0
  44. {datahub_agent_context-1.3.1.10rc1 → datahub_agent_context-1.4.0rc1}/src/datahub_agent_context/context.py +0 -0
  45. {datahub_agent_context-1.3.1.10rc1 → datahub_agent_context-1.4.0rc1}/src/datahub_agent_context/langchain_tools/__init__.py +0 -0
  46. {datahub_agent_context-1.3.1.10rc1 → datahub_agent_context-1.4.0rc1}/src/datahub_agent_context/langchain_tools/builder.py +0 -0
  47. {datahub_agent_context-1.3.1.10rc1 → datahub_agent_context-1.4.0rc1}/src/datahub_agent_context/mcp_tools/__init__.py +0 -0
  48. {datahub_agent_context-1.3.1.10rc1 → datahub_agent_context-1.4.0rc1}/src/datahub_agent_context/mcp_tools/_token_estimator.py +0 -0
  49. {datahub_agent_context-1.3.1.10rc1 → datahub_agent_context-1.4.0rc1}/src/datahub_agent_context/mcp_tools/descriptions.py +0 -0
  50. {datahub_agent_context-1.3.1.10rc1 → datahub_agent_context-1.4.0rc1}/src/datahub_agent_context/mcp_tools/documents.py +0 -0
  51. {datahub_agent_context-1.3.1.10rc1 → datahub_agent_context-1.4.0rc1}/src/datahub_agent_context/mcp_tools/domains.py +0 -0
  52. {datahub_agent_context-1.3.1.10rc1 → datahub_agent_context-1.4.0rc1}/src/datahub_agent_context/mcp_tools/entities.py +0 -0
  53. {datahub_agent_context-1.3.1.10rc1 → datahub_agent_context-1.4.0rc1}/src/datahub_agent_context/mcp_tools/get_me.py +0 -0
  54. {datahub_agent_context-1.3.1.10rc1 → datahub_agent_context-1.4.0rc1}/src/datahub_agent_context/mcp_tools/gql/__init__.py +0 -0
  55. {datahub_agent_context-1.3.1.10rc1 → datahub_agent_context-1.4.0rc1}/src/datahub_agent_context/mcp_tools/gql/document_search.gql +0 -0
  56. {datahub_agent_context-1.3.1.10rc1 → datahub_agent_context-1.4.0rc1}/src/datahub_agent_context/mcp_tools/gql/document_semantic_search.gql +0 -0
  57. {datahub_agent_context-1.3.1.10rc1 → datahub_agent_context-1.4.0rc1}/src/datahub_agent_context/mcp_tools/gql/entity_details.gql +0 -0
  58. {datahub_agent_context-1.3.1.10rc1 → datahub_agent_context-1.4.0rc1}/src/datahub_agent_context/mcp_tools/gql/queries.gql +0 -0
  59. {datahub_agent_context-1.3.1.10rc1 → datahub_agent_context-1.4.0rc1}/src/datahub_agent_context/mcp_tools/gql/query_entity.gql +0 -0
  60. {datahub_agent_context-1.3.1.10rc1 → datahub_agent_context-1.4.0rc1}/src/datahub_agent_context/mcp_tools/gql/read_documents.gql +0 -0
  61. {datahub_agent_context-1.3.1.10rc1 → datahub_agent_context-1.4.0rc1}/src/datahub_agent_context/mcp_tools/gql/search.gql +0 -0
  62. {datahub_agent_context-1.3.1.10rc1 → datahub_agent_context-1.4.0rc1}/src/datahub_agent_context/mcp_tools/helpers.py +0 -0
  63. {datahub_agent_context-1.3.1.10rc1 → datahub_agent_context-1.4.0rc1}/src/datahub_agent_context/mcp_tools/lineage.py +0 -0
  64. {datahub_agent_context-1.3.1.10rc1 → datahub_agent_context-1.4.0rc1}/src/datahub_agent_context/mcp_tools/owners.py +0 -0
  65. {datahub_agent_context-1.3.1.10rc1 → datahub_agent_context-1.4.0rc1}/src/datahub_agent_context/mcp_tools/queries.py +0 -0
  66. {datahub_agent_context-1.3.1.10rc1 → datahub_agent_context-1.4.0rc1}/src/datahub_agent_context/mcp_tools/search.py +0 -0
  67. {datahub_agent_context-1.3.1.10rc1 → datahub_agent_context-1.4.0rc1}/src/datahub_agent_context/mcp_tools/structured_properties.py +0 -0
  68. {datahub_agent_context-1.3.1.10rc1 → datahub_agent_context-1.4.0rc1}/src/datahub_agent_context/mcp_tools/tags.py +0 -0
  69. {datahub_agent_context-1.3.1.10rc1 → datahub_agent_context-1.4.0rc1}/src/datahub_agent_context/mcp_tools/terms.py +0 -0
  70. {datahub_agent_context-1.3.1.10rc1 → datahub_agent_context-1.4.0rc1}/src/datahub_agent_context/py.typed +0 -0
  71. {datahub_agent_context-1.3.1.10rc1 → datahub_agent_context-1.4.0rc1}/src/datahub_agent_context.egg-info/dependency_links.txt +0 -0
  72. {datahub_agent_context-1.3.1.10rc1 → datahub_agent_context-1.4.0rc1}/src/datahub_agent_context.egg-info/not-zip-safe +0 -0
  73. {datahub_agent_context-1.3.1.10rc1 → datahub_agent_context-1.4.0rc1}/src/datahub_agent_context.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datahub-agent-context
3
- Version: 1.3.1.10rc1
3
+ Version: 1.4.0rc1
4
4
  Summary: DataHub Agent Context - MCP Tools for AI Agents
5
5
  Home-page: https://datahub.io/
6
6
  License: Apache License 2.0
@@ -28,27 +28,33 @@ Classifier: Environment :: MacOS X
28
28
  Classifier: Topic :: Software Development
29
29
  Requires-Python: >=3.9
30
30
  Description-Content-Type: text/markdown
31
- Requires-Dist: jmespath<2.0.0,>=1.0.0
32
- Requires-Dist: cachetools<7.0.0,>=5.0.0
33
- Requires-Dist: acryl-datahub==1.3.1.10rc1
34
- Requires-Dist: pydantic<3.0.0,>=2.0.0
35
31
  Requires-Dist: json-repair<1.0.0,>=0.25.0
32
+ Requires-Dist: jmespath<2.0.0,>=1.0.0
33
+ Requires-Dist: httpcore<2.0,>=1.0.9
36
34
  Requires-Dist: h11<1.0,>=0.16
35
+ Requires-Dist: acryl-datahub==1.4.0rc1
36
+ Requires-Dist: pydantic<3.0.0,>=2.0.0
37
+ Requires-Dist: cachetools<7.0.0,>=5.0.0
37
38
  Requires-Dist: google-re2<2.0,>=1.0
38
- Requires-Dist: httpcore<2.0,>=1.0.9
39
39
  Provides-Extra: dev
40
- Requires-Dist: types-jmespath<2.0.0,>=1.0.0; extra == "dev"
41
- Requires-Dist: types-PyYAML<7.0.0,>=6.0.0; extra == "dev"
42
- Requires-Dist: mypy==1.17.1; extra == "dev"
43
- Requires-Dist: types-toml<1.0.0,>=0.10.0; extra == "dev"
40
+ Requires-Dist: snowflake-connector-python<4.0.0,>=3.0.0; extra == "dev"
44
41
  Requires-Dist: types-cachetools<7.0.0,>=5.0.0; extra == "dev"
45
- Requires-Dist: pytest<9.0.0,>=8.3.4; extra == "dev"
46
- Requires-Dist: pytest-cov<7.0.0,>=2.8.0; extra == "dev"
42
+ Requires-Dist: types-toml<1.0.0,>=0.10.0; extra == "dev"
47
43
  Requires-Dist: types-requests<3.0.0,>=2.0.0; extra == "dev"
44
+ Requires-Dist: click<9.0.0,>=8.0.0; extra == "dev"
48
45
  Requires-Dist: tox<5.0.0,>=4.0.0; extra == "dev"
46
+ Requires-Dist: types-jmespath<2.0.0,>=1.0.0; extra == "dev"
49
47
  Requires-Dist: ruff==0.11.7; extra == "dev"
48
+ Requires-Dist: langchain-core<2.0.0,>=1.2.7; extra == "dev"
49
+ Requires-Dist: mypy==1.17.1; extra == "dev"
50
+ Requires-Dist: pytest<9.0.0,>=8.3.4; extra == "dev"
51
+ Requires-Dist: types-PyYAML<7.0.0,>=6.0.0; extra == "dev"
52
+ Requires-Dist: pytest-cov<7.0.0,>=2.8.0; extra == "dev"
50
53
  Provides-Extra: langchain
51
54
  Requires-Dist: langchain-core<2.0.0,>=1.2.7; extra == "langchain"
55
+ Provides-Extra: snowflake
56
+ Requires-Dist: snowflake-connector-python<4.0.0,>=3.0.0; extra == "snowflake"
57
+ Requires-Dist: click<9.0.0,>=8.0.0; extra == "snowflake"
52
58
  Dynamic: classifier
53
59
  Dynamic: description
54
60
  Dynamic: description-content-type
@@ -69,9 +69,16 @@ langchain_requirements = {
69
69
  "langchain-core>=1.2.7,<2.0.0",
70
70
  }
71
71
 
72
+ snowflake_requirements = {
73
+ "click>=8.0.0,<9.0.0",
74
+ "snowflake-connector-python>=3.0.0,<4.0.0",
75
+ }
76
+
72
77
  dev_requirements = {
73
78
  *lint_requirements,
74
79
  *mypy_stubs,
80
+ *snowflake_requirements,
81
+ *langchain_requirements,
75
82
  "pytest>=8.3.4,<9.0.0",
76
83
  "pytest-cov>=2.8.0,<7.0.0",
77
84
  "tox>=4.0.0,<5.0.0",
@@ -123,5 +130,6 @@ setuptools.setup(
123
130
  extras_require={
124
131
  "dev": list(dev_requirements),
125
132
  "langchain": list(langchain_requirements),
133
+ "snowflake": list(snowflake_requirements),
126
134
  },
127
135
  )
@@ -13,4 +13,4 @@
13
13
  # limitations under the License.
14
14
 
15
15
  __package_name__ = "datahub-agent-context"
16
- __version__ = "1.3.1.10rc1"
16
+ __version__ = "1.4.0rc1"
@@ -0,0 +1,152 @@
1
+ import logging
2
+
3
+ import click
4
+
5
+ logger = logging.getLogger(__name__)
6
+
7
+
8
+ @click.group()
9
+ def agent() -> None:
10
+ """Helper commands for Creating and managing Agent on DataHub."""
11
+ pass
12
+
13
+
14
+ @agent.group()
15
+ def create() -> None:
16
+ """Create an agent on DataHub."""
17
+ pass
18
+
19
+
20
+ @create.command(name="snowflake")
21
+ @click.option(
22
+ "--sf-account",
23
+ default=None,
24
+ help="Snowflake account identifier (auto-detected if not provided)",
25
+ )
26
+ @click.option(
27
+ "--sf-user",
28
+ default=None,
29
+ help="Snowflake user name (auto-detected if not provided)",
30
+ )
31
+ @click.option(
32
+ "--sf-role",
33
+ default=None,
34
+ help="Snowflake role (auto-detected if not provided)",
35
+ )
36
+ @click.option(
37
+ "--sf-warehouse",
38
+ default=None,
39
+ help="Snowflake warehouse name (auto-detected if not provided)",
40
+ )
41
+ @click.option(
42
+ "--sf-database",
43
+ default=None,
44
+ help="Snowflake database name (auto-detected if not provided)",
45
+ )
46
+ @click.option(
47
+ "--sf-schema",
48
+ default=None,
49
+ help="Snowflake schema name (auto-detected if not provided)",
50
+ )
51
+ @click.option("--datahub-url", required=True, help="DataHub instance URL")
52
+ @click.option("--datahub-token", required=True, help="DataHub Personal Access Token")
53
+ @click.option(
54
+ "--agent-name", default="DATAHUB_SQL_AGENT", help="Agent name in Snowflake"
55
+ )
56
+ @click.option(
57
+ "--agent-display-name",
58
+ default="DataHub SQL Assistant",
59
+ help="Agent display name in Snowflake UI",
60
+ )
61
+ @click.option(
62
+ "--agent-color",
63
+ default="blue",
64
+ help="Agent color in Snowflake UI",
65
+ type=click.Choice(["blue", "red", "green", "yellow", "purple", "orange"]),
66
+ )
67
+ @click.option(
68
+ "--output-dir",
69
+ default="./snowflake_setup",
70
+ help="Output directory for generated SQL files",
71
+ )
72
+ @click.option(
73
+ "--execute",
74
+ is_flag=True,
75
+ default=False,
76
+ help="Connect to Snowflake and execute the SQL scripts directly",
77
+ )
78
+ @click.option(
79
+ "--sf-password",
80
+ help="Snowflake password (required if --execute is used with password authentication)",
81
+ )
82
+ @click.option(
83
+ "--sf-authenticator",
84
+ default="snowflake",
85
+ type=click.Choice(["snowflake", "externalbrowser", "oauth"], case_sensitive=False),
86
+ help="Authentication method: 'snowflake' (password), 'externalbrowser' (SSO), or 'oauth' (token-based). Default: snowflake",
87
+ )
88
+ @click.option(
89
+ "--enable-mutations/--no-enable-mutations",
90
+ default=True,
91
+ help="Include mutation/write tools (tags, descriptions, owners, etc.). Default: enabled",
92
+ )
93
+ def create_snowflake(
94
+ sf_account: str | None,
95
+ sf_user: str | None,
96
+ sf_role: str | None,
97
+ sf_warehouse: str | None,
98
+ sf_database: str | None,
99
+ sf_schema: str | None,
100
+ datahub_url: str,
101
+ datahub_token: str,
102
+ agent_name: str,
103
+ agent_display_name: str,
104
+ agent_color: str,
105
+ output_dir: str,
106
+ execute: bool,
107
+ sf_password: str | None,
108
+ sf_authenticator: str,
109
+ enable_mutations: bool,
110
+ ) -> None:
111
+ """Create a Snowflake agent on DataHub.
112
+
113
+ Snowflake connection parameters (account, user, role, warehouse, database, schema)
114
+ will be auto-detected from your current session if not provided explicitly.
115
+
116
+ Authentication methods:
117
+ - snowflake (default): Standard password authentication
118
+ - externalbrowser: SSO authentication via browser (no password required)
119
+ - oauth: OAuth token-based authentication
120
+
121
+ Examples:
122
+ # Generate SQL files only (default)
123
+ $ datahub agent create snowflake --datahub-url=... --datahub-token=...
124
+
125
+ # Execute with password authentication
126
+ $ datahub agent create snowflake --execute --sf-password=secret ...
127
+
128
+ # Execute with SSO authentication (browser-based)
129
+ $ datahub agent create snowflake --execute --sf-authenticator=externalbrowser ...
130
+ """
131
+ from datahub_agent_context.snowflake.snowflake import create_snowflake_agent
132
+
133
+ ctx = click.get_current_context()
134
+ ctx.invoke(
135
+ create_snowflake_agent,
136
+ sf_account=sf_account,
137
+ sf_user=sf_user,
138
+ sf_role=sf_role,
139
+ sf_warehouse=sf_warehouse,
140
+ sf_database=sf_database,
141
+ sf_schema=sf_schema,
142
+ datahub_url=datahub_url,
143
+ datahub_token=datahub_token,
144
+ agent_name=agent_name,
145
+ agent_display_name=agent_display_name,
146
+ agent_color=agent_color,
147
+ output_dir=output_dir,
148
+ execute=execute,
149
+ sf_password=sf_password,
150
+ sf_authenticator=sf_authenticator,
151
+ enable_mutations=enable_mutations,
152
+ )
@@ -152,9 +152,12 @@ def _is_datahub_cloud(graph: DataHubGraph) -> bool:
152
152
  )
153
153
  return False
154
154
 
155
- is_cloud = hasattr(graph, "frontend_base_url") and graph.frontend_base_url
156
- logger.debug(f"Cloud detection: {is_cloud}")
157
- return bool(is_cloud)
155
+ try:
156
+ is_cloud = hasattr(graph, "frontend_base_url") and graph.frontend_base_url
157
+ logger.debug(f"Cloud detection: {is_cloud}")
158
+ return bool(is_cloud)
159
+ except ValueError:
160
+ return False
158
161
 
159
162
 
160
163
  def _is_field_validation_error(error_msg: str) -> bool:
@@ -0,0 +1,306 @@
1
+ """
2
+ Generate Snowflake UDFs that use datahub-agent-context wrapper methods.
3
+
4
+ This module generates Python UDF code that uses the datahub-agent-context package
5
+ to interact with DataHub, instead of making direct HTTP API calls.
6
+ """
7
+
8
+ import logging
9
+ from pathlib import Path
10
+
11
+ import click
12
+
13
+ from datahub_agent_context.snowflake.udfs.add_glossary_terms import (
14
+ generate_add_glossary_terms_udf,
15
+ )
16
+ from datahub_agent_context.snowflake.udfs.add_owners import generate_add_owners_udf
17
+ from datahub_agent_context.snowflake.udfs.add_structured_properties import (
18
+ generate_add_structured_properties_udf,
19
+ )
20
+ from datahub_agent_context.snowflake.udfs.add_tags import generate_add_tags_udf
21
+ from datahub_agent_context.snowflake.udfs.get_dataset_queries import (
22
+ generate_get_dataset_queries_udf,
23
+ )
24
+ from datahub_agent_context.snowflake.udfs.get_entities import generate_get_entities_udf
25
+ from datahub_agent_context.snowflake.udfs.get_lineage import generate_get_lineage_udf
26
+ from datahub_agent_context.snowflake.udfs.get_lineage_paths_between import (
27
+ generate_get_lineage_paths_between_udf,
28
+ )
29
+ from datahub_agent_context.snowflake.udfs.get_me import generate_get_me_udf
30
+ from datahub_agent_context.snowflake.udfs.grep_documents import (
31
+ generate_grep_documents_udf,
32
+ )
33
+ from datahub_agent_context.snowflake.udfs.list_schema_fields import (
34
+ generate_list_schema_fields_udf,
35
+ )
36
+ from datahub_agent_context.snowflake.udfs.remove_domains import (
37
+ generate_remove_domains_udf,
38
+ )
39
+ from datahub_agent_context.snowflake.udfs.remove_glossary_terms import (
40
+ generate_remove_glossary_terms_udf,
41
+ )
42
+ from datahub_agent_context.snowflake.udfs.remove_owners import (
43
+ generate_remove_owners_udf,
44
+ )
45
+ from datahub_agent_context.snowflake.udfs.remove_structured_properties import (
46
+ generate_remove_structured_properties_udf,
47
+ )
48
+ from datahub_agent_context.snowflake.udfs.remove_tags import generate_remove_tags_udf
49
+ from datahub_agent_context.snowflake.udfs.search_datahub import (
50
+ generate_search_datahub_udf,
51
+ )
52
+ from datahub_agent_context.snowflake.udfs.search_documents import (
53
+ generate_search_documents_udf,
54
+ )
55
+ from datahub_agent_context.snowflake.udfs.set_domains import generate_set_domains_udf
56
+ from datahub_agent_context.snowflake.udfs.update_description import (
57
+ generate_update_description_udf,
58
+ )
59
+
60
+ logger = logging.getLogger(__name__)
61
+
62
+
63
+ def extract_function_signature(udf_sql: str) -> str:
64
+ """Extract function parameter signature from UDF SQL.
65
+
66
+ Args:
67
+ udf_sql: The SQL CREATE FUNCTION statement
68
+
69
+ Returns:
70
+ String of Snowflake parameter types (e.g., "STRING, NUMBER")
71
+ Empty string if function has no parameters
72
+ """
73
+ import re
74
+
75
+ # Match the function parameters between parentheses
76
+ # The SQL format is: CREATE OR REPLACE FUNCTION name(params) RETURNS ...
77
+ match = re.search(r"FUNCTION\s+\w+\s*\((.*?)\)\s*RETURNS", udf_sql, re.DOTALL)
78
+ if not match:
79
+ return ""
80
+
81
+ params_str = match.group(1).strip()
82
+ if not params_str:
83
+ return ""
84
+
85
+ # Extract just the types (STRING, NUMBER, etc.)
86
+ # Parameter format is: param_name TYPE
87
+ param_types = []
88
+ for param in params_str.split(","):
89
+ param = param.strip()
90
+ if param:
91
+ # Split on whitespace and take the last part (the type)
92
+ parts = param.split()
93
+ if len(parts) >= 2:
94
+ param_types.append(parts[-1])
95
+
96
+ return ", ".join(param_types) if param_types else ""
97
+
98
+
99
+ def generate_all_udfs(include_mutations: bool = True) -> dict[str, str]:
100
+ """Generate all DataHub UDFs from datahub-agent-context tools.
101
+
102
+ Returns all 20 tools from datahub-agent-context as Snowflake UDFs, including
103
+ both read operations (search, get_entities, etc.) and write operations (add_tags,
104
+ update_description, etc.).
105
+
106
+ Write operations enable automated governance workflows from Snowflake, such as:
107
+ - Tagging datasets based on query analysis
108
+ - Enriching metadata with descriptions and owners
109
+ - Bulk operations on multiple entities from SQL
110
+
111
+ Args:
112
+ include_mutations: Whether to include mutation/write tools (default: True)
113
+
114
+ Returns:
115
+ Dictionary mapping function names to their SQL definitions
116
+ """
117
+ udfs = {
118
+ # Core search and entity tools (read-only)
119
+ "SEARCH_DATAHUB": generate_search_datahub_udf(),
120
+ "GET_ENTITIES": generate_get_entities_udf(),
121
+ "LIST_SCHEMA_FIELDS": generate_list_schema_fields_udf(),
122
+ # Lineage tools (read-only)
123
+ "GET_LINEAGE": generate_get_lineage_udf(),
124
+ "GET_LINEAGE_PATHS_BETWEEN": generate_get_lineage_paths_between_udf(),
125
+ # Query analysis tools (read-only)
126
+ "GET_DATASET_QUERIES": generate_get_dataset_queries_udf(),
127
+ # Document search tools (read-only)
128
+ "SEARCH_DOCUMENTS": generate_search_documents_udf(),
129
+ "GREP_DOCUMENTS": generate_grep_documents_udf(),
130
+ # User info tool (read-only)
131
+ "GET_ME": generate_get_me_udf(),
132
+ }
133
+
134
+ if include_mutations:
135
+ # Mutation/write tools - only include if enabled
136
+ udfs.update(
137
+ {
138
+ # Tag management tools
139
+ "ADD_TAGS": generate_add_tags_udf(),
140
+ "REMOVE_TAGS": generate_remove_tags_udf(),
141
+ # Description management tool
142
+ "UPDATE_DESCRIPTION": generate_update_description_udf(),
143
+ # Domain management tools
144
+ "SET_DOMAINS": generate_set_domains_udf(),
145
+ "REMOVE_DOMAINS": generate_remove_domains_udf(),
146
+ # Owner management tools
147
+ "ADD_OWNERS": generate_add_owners_udf(),
148
+ "REMOVE_OWNERS": generate_remove_owners_udf(),
149
+ # Glossary term management tools
150
+ "ADD_GLOSSARY_TERMS": generate_add_glossary_terms_udf(),
151
+ "REMOVE_GLOSSARY_TERMS": generate_remove_glossary_terms_udf(),
152
+ # Structured property management tools
153
+ "ADD_STRUCTURED_PROPERTIES": generate_add_structured_properties_udf(),
154
+ "REMOVE_STRUCTURED_PROPERTIES": generate_remove_structured_properties_udf(),
155
+ }
156
+ )
157
+
158
+ return udfs
159
+
160
+
161
+ def generate_datahub_udfs_sql(include_mutations: bool = True) -> str:
162
+ """Generate complete SQL script with DataHub UDFs using datahub-agent-context.
163
+
164
+ Args:
165
+ include_mutations: Whether to include mutation/write tools (default: True)
166
+ """
167
+ # Generate read-only UDFs first to get count
168
+ read_only_udfs = generate_all_udfs(include_mutations=False)
169
+ read_ops_count = len(read_only_udfs)
170
+
171
+ # Generate all UDFs (read + write if enabled)
172
+ all_udfs = generate_all_udfs(include_mutations=include_mutations)
173
+ total_udfs = len(all_udfs)
174
+ write_ops_count = total_udfs - read_ops_count
175
+
176
+ udf_sections = []
177
+ grant_statements = []
178
+ show_statements = []
179
+ function_list = []
180
+
181
+ for function_name, udf_sql in all_udfs.items():
182
+ udf_sections.append(f"""-- ============================================================================
183
+ -- UDF: {function_name}
184
+ -- ============================================================================
185
+ {udf_sql}""")
186
+
187
+ # Generate GRANT statement based on function signature extracted from SQL
188
+ signature = extract_function_signature(udf_sql)
189
+ grant_statements.append(
190
+ f"GRANT USAGE ON FUNCTION {function_name}({signature}) TO ROLE IDENTIFIER($SF_ROLE);"
191
+ )
192
+
193
+ show_statements.append(f"SHOW FUNCTIONS LIKE '{function_name}';")
194
+ function_list.append(
195
+ f" $SF_DATABASE || '.' || $SF_SCHEMA || '.{function_name}' AS {function_name.lower()}"
196
+ )
197
+
198
+ function_list_joined = ("," + chr(10)).join(function_list)
199
+
200
+ write_ops_section = (
201
+ f"""--
202
+ -- Write Operations ({write_ops_count}):
203
+ -- - ADD_TAGS, REMOVE_TAGS: Tag management
204
+ -- - UPDATE_DESCRIPTION: Description management
205
+ -- - SET_DOMAINS, REMOVE_DOMAINS: Domain management
206
+ -- - ADD_OWNERS, REMOVE_OWNERS: Owner management
207
+ -- - ADD_GLOSSARY_TERMS, REMOVE_GLOSSARY_TERMS: Glossary term management
208
+ -- - ADD_STRUCTURED_PROPERTIES, REMOVE_STRUCTURED_PROPERTIES: Structured property management
209
+ """
210
+ if include_mutations
211
+ else ""
212
+ )
213
+
214
+ return f"""-- ============================================================================
215
+ -- Step 2: DataHub API UDFs for Cortex Agent (using datahub-agent-context)
216
+ -- ============================================================================
217
+ -- This script creates {total_udfs} Python UDFs that enable Snowflake Intelligence to
218
+ -- query DataHub for metadata{" and manage metadata programmatically" if include_mutations else ""}.
219
+ --
220
+ -- These UDFs use the datahub-agent-context package wrapper methods.
221
+ --
222
+ -- UDFs included:
223
+ -- Read Operations ({read_ops_count}):
224
+ -- - SEARCH_DATAHUB: Search for entities
225
+ -- - GET_ENTITIES: Get entity details
226
+ -- - LIST_SCHEMA_FIELDS: List schema fields with filtering
227
+ -- - GET_LINEAGE: Get upstream/downstream lineage
228
+ -- - GET_LINEAGE_PATHS_BETWEEN: Get detailed transformation paths
229
+ -- - GET_DATASET_QUERIES: Get SQL queries using a dataset
230
+ -- - SEARCH_DOCUMENTS: Search organization documents
231
+ -- - GREP_DOCUMENTS: Regex search within documents
232
+ -- - GET_ME: Get authenticated user information
233
+ {write_ops_section}--
234
+ -- Prerequisites:
235
+ -- - Run 00_configuration.sql first to set variables
236
+ -- - Run 01_network_rules.sql to create network rules and secrets
237
+ -- - You must have appropriate privileges to create functions
238
+ -- ============================================================================
239
+
240
+ USE DATABASE IDENTIFIER($SF_DATABASE);
241
+ USE SCHEMA IDENTIFIER($SF_SCHEMA);
242
+ USE WAREHOUSE IDENTIFIER($SF_WAREHOUSE);
243
+
244
+ {chr(10).join(udf_sections)}
245
+
246
+ -- ============================================================================
247
+ -- Grant Usage Permissions
248
+ -- ============================================================================
249
+ {chr(10).join(grant_statements)}
250
+
251
+ -- ============================================================================
252
+ -- Verify All UDFs Were Created
253
+ -- ============================================================================
254
+ {chr(10).join(show_statements)}
255
+
256
+ SELECT
257
+ 'All {total_udfs} DataHub UDFs created successfully!' AS status,
258
+ {function_list_joined};
259
+ """
260
+
261
+
262
+ @click.command()
263
+ @click.option(
264
+ "--output",
265
+ "-o",
266
+ type=click.Path(dir_okay=False, writable=True),
267
+ help="Output file path for generated SQL (default: print to stdout)",
268
+ )
269
+ @click.option(
270
+ "--enable-mutations/--no-enable-mutations",
271
+ default=True,
272
+ help="Include mutation/write tools (tags, descriptions, owners, etc.). Default: enabled",
273
+ )
274
+ def main(output: str | None, enable_mutations: bool) -> None:
275
+ """Generate Snowflake UDF SQL for DataHub integration.
276
+
277
+ This command generates SQL scripts that create Snowflake User-Defined Functions (UDFs)
278
+ for interacting with DataHub metadata from Snowflake.
279
+
280
+ Generates all UDFs using the datahub-agent-context package.
281
+
282
+ Examples:
283
+ # Print SQL to stdout with all tools (read + write)
284
+ python -m datahub.ai.snowflake.generate_udfs
285
+
286
+ # Generate read-only tools (no mutations)
287
+ python -m datahub.ai.snowflake.generate_udfs --no-enable-mutations
288
+
289
+ # Save to file with mutations enabled
290
+ python -m datahub.ai.snowflake.generate_udfs -o datahub_udfs.sql
291
+ """
292
+ sql_content = generate_datahub_udfs_sql(include_mutations=enable_mutations)
293
+
294
+ if output:
295
+ output_path = Path(output)
296
+ output_path.parent.mkdir(parents=True, exist_ok=True)
297
+ output_path.write_text(sql_content)
298
+ udf_count = len(generate_all_udfs(include_mutations=enable_mutations))
299
+ click.echo(f"✓ Generated {udf_count} Snowflake UDF(s) to: {output_path}")
300
+ logger.info(f"Generated Snowflake UDF SQL to {output_path}")
301
+ else:
302
+ click.echo(sql_content)
303
+
304
+
305
+ if __name__ == "__main__":
306
+ main()
@@ -0,0 +1,21 @@
1
+ """SQL generators for Snowflake agent setup."""
2
+
3
+ from datahub_agent_context.snowflake.generators.configuration import (
4
+ generate_configuration_sql,
5
+ )
6
+ from datahub_agent_context.snowflake.generators.cortex_agent import (
7
+ generate_cortex_agent_sql,
8
+ )
9
+ from datahub_agent_context.snowflake.generators.network_rules import (
10
+ generate_network_rules_sql,
11
+ )
12
+ from datahub_agent_context.snowflake.generators.stored_procedure import (
13
+ generate_stored_procedure_sql,
14
+ )
15
+
16
+ __all__ = [
17
+ "generate_configuration_sql",
18
+ "generate_network_rules_sql",
19
+ "generate_stored_procedure_sql",
20
+ "generate_cortex_agent_sql",
21
+ ]
@@ -0,0 +1,104 @@
1
+ """Generate Snowflake configuration SQL."""
2
+
3
+
4
+ def generate_configuration_sql(
5
+ sf_account: str | None,
6
+ sf_user: str | None,
7
+ sf_role: str | None,
8
+ sf_warehouse: str | None,
9
+ sf_database: str | None,
10
+ sf_schema: str | None,
11
+ datahub_url: str,
12
+ datahub_token: str,
13
+ agent_name: str,
14
+ agent_display_name: str,
15
+ agent_color: str,
16
+ execute: bool = False,
17
+ ) -> str:
18
+ """Generate configuration SQL based on provided parameters.
19
+
20
+ If parameters are None, uses Snowflake SQL functions to auto-detect values.
21
+ In non-execute mode, uses placeholder for token for security.
22
+
23
+ Args:
24
+ execute: If True, includes actual token. If False, uses placeholder.
25
+ """
26
+ # Use SQL functions for auto-detection when values are not provided
27
+ account_value = f"'{sf_account}'" if sf_account else "CURRENT_ACCOUNT()"
28
+ user_value = f"'{sf_user}'" if sf_user else "CURRENT_USER()"
29
+ role_value = f"'{sf_role}'" if sf_role else "CURRENT_ROLE()"
30
+ warehouse_value = f"'{sf_warehouse}'" if sf_warehouse else "CURRENT_WAREHOUSE()"
31
+ database_value = f"'{sf_database}'" if sf_database else "CURRENT_DATABASE()"
32
+ schema_value = f"'{sf_schema}'" if sf_schema else "CURRENT_SCHEMA()"
33
+
34
+ # Use placeholder for token in non-execute mode for security
35
+ token_value = datahub_token if execute else "<DATAHUB_TOKEN>"
36
+
37
+ return f"""-- ============================================================================
38
+ -- CONFIGURATION FILE - Customize these values for your environment
39
+ -- ============================================================================
40
+ -- Values are auto-detected from your current session where possible
41
+ -- Then run the numbered scripts in order (01, 02, 03, 04, 05)
42
+
43
+ -- ============================================================================
44
+ -- SNOWFLAKE CONFIGURATION
45
+ -- ============================================================================
46
+
47
+ -- Your Snowflake account identifier (e.g., 'xy12345' or 'xy12345.us-east-1')
48
+ SET SF_ACCOUNT = {account_value};
49
+
50
+ -- Your Snowflake user (the user who will run the agent)
51
+ SET SF_USER = {user_value};
52
+
53
+ -- Your Snowflake role (must have permissions to create objects)
54
+ SET SF_ROLE = {role_value};
55
+
56
+ -- Your Snowflake warehouse name
57
+ SET SF_WAREHOUSE = {warehouse_value};
58
+
59
+ -- Your Snowflake database name
60
+ SET SF_DATABASE = {database_value};
61
+
62
+ -- Your Snowflake schema name (where UDFs, procedures, and agent will be created)
63
+ SET SF_SCHEMA = {schema_value};
64
+
65
+ -- Set the database, schema, and warehouse context
66
+ USE DATABASE IDENTIFIER($SF_DATABASE);
67
+ USE SCHEMA IDENTIFIER($SF_SCHEMA);
68
+ USE WAREHOUSE IDENTIFIER($SF_WAREHOUSE);
69
+
70
+ -- ============================================================================
71
+ -- DATAHUB CONFIGURATION
72
+ -- ============================================================================
73
+
74
+ -- Your DataHub instance URL (without /gms or trailing slash)
75
+ -- Examples:
76
+ -- https://fieldeng.acryl.io
77
+ -- https://your-company.acryl.io
78
+ -- https://datahub.your-company.com
79
+ SET DATAHUB_URL = '{datahub_url}';
80
+
81
+ -- Your DataHub Personal Access Token (PAT)
82
+ -- Get this from DataHub UI: Settings > Access Tokens > Create Token
83
+ -- Create Snowflake secrets to store credentials securely
84
+ CREATE OR REPLACE SECRET datahub_url
85
+ TYPE = GENERIC_STRING
86
+ SECRET_STRING = '{datahub_url}';
87
+
88
+ CREATE OR REPLACE SECRET datahub_token
89
+ TYPE = GENERIC_STRING
90
+ SECRET_STRING = '{token_value}';
91
+
92
+ -- ============================================================================
93
+ -- AGENT CONFIGURATION
94
+ -- ============================================================================
95
+
96
+ -- Agent name (will be created as: <DATABASE>.<SCHEMA>.<AGENT_NAME>)
97
+ SET AGENT_NAME = '{agent_name}';
98
+
99
+ -- Agent display name (shown in Snowflake Intelligence UI)
100
+ SET AGENT_DISPLAY_NAME = '{agent_display_name}';
101
+
102
+ -- Agent color (for UI display)
103
+ SET AGENT_COLOR = '{agent_color}';
104
+ """