datahub-agent-context 1.3.1.10rc1__py3-none-any.whl → 1.4.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datahub_agent_context/__init__.py +11 -3
- datahub_agent_context/_version.py +1 -1
- datahub_agent_context/cli.py +152 -0
- datahub_agent_context/context.py +47 -34
- datahub_agent_context/langchain_tools/builder.py +6 -4
- datahub_agent_context/mcp_tools/base.py +6 -3
- datahub_agent_context/mcp_tools/save_document.py +634 -0
- datahub_agent_context/snowflake/__init__.py +0 -0
- datahub_agent_context/snowflake/generate_udfs.py +306 -0
- datahub_agent_context/snowflake/generators/__init__.py +21 -0
- datahub_agent_context/snowflake/generators/configuration.py +104 -0
- datahub_agent_context/snowflake/generators/cortex_agent.py +725 -0
- datahub_agent_context/snowflake/generators/network_rules.py +53 -0
- datahub_agent_context/snowflake/generators/stored_procedure.py +87 -0
- datahub_agent_context/snowflake/snowflake.py +662 -0
- datahub_agent_context/snowflake/udfs/__init__.py +1 -0
- datahub_agent_context/snowflake/udfs/add_glossary_terms.py +61 -0
- datahub_agent_context/snowflake/udfs/add_owners.py +59 -0
- datahub_agent_context/snowflake/udfs/add_structured_properties.py +57 -0
- datahub_agent_context/snowflake/udfs/add_tags.py +61 -0
- datahub_agent_context/snowflake/udfs/base.py +45 -0
- datahub_agent_context/snowflake/udfs/get_dataset_queries.py +68 -0
- datahub_agent_context/snowflake/udfs/get_entities.py +47 -0
- datahub_agent_context/snowflake/udfs/get_lineage.py +61 -0
- datahub_agent_context/snowflake/udfs/get_lineage_paths_between.py +69 -0
- datahub_agent_context/snowflake/udfs/get_me.py +51 -0
- datahub_agent_context/snowflake/udfs/grep_documents.py +70 -0
- datahub_agent_context/snowflake/udfs/list_schema_fields.py +80 -0
- datahub_agent_context/snowflake/udfs/remove_domains.py +45 -0
- datahub_agent_context/snowflake/udfs/remove_glossary_terms.py +57 -0
- datahub_agent_context/snowflake/udfs/remove_owners.py +56 -0
- datahub_agent_context/snowflake/udfs/remove_structured_properties.py +56 -0
- datahub_agent_context/snowflake/udfs/remove_tags.py +57 -0
- datahub_agent_context/snowflake/udfs/search_datahub.py +71 -0
- datahub_agent_context/snowflake/udfs/search_documents.py +58 -0
- datahub_agent_context/snowflake/udfs/set_domains.py +55 -0
- datahub_agent_context/snowflake/udfs/update_description.py +60 -0
- {datahub_agent_context-1.3.1.10rc1.dist-info → datahub_agent_context-1.4.0rc2.dist-info}/METADATA +21 -14
- datahub_agent_context-1.4.0rc2.dist-info/RECORD +66 -0
- datahub_agent_context-1.3.1.10rc1.dist-info/RECORD +0 -34
- {datahub_agent_context-1.3.1.10rc1.dist-info → datahub_agent_context-1.4.0rc2.dist-info}/WHEEL +0 -0
- {datahub_agent_context-1.3.1.10rc1.dist-info → datahub_agent_context-1.4.0rc2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
"""ADD_GLOSSARY_TERMS UDF generator."""
|
|
2
|
+
|
|
3
|
+
from datahub_agent_context.snowflake.udfs.base import generate_python_udf_code
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def generate_add_glossary_terms_udf() -> str:
|
|
7
|
+
"""Generate ADD_GLOSSARY_TERMS UDF using datahub-agent-context.
|
|
8
|
+
|
|
9
|
+
This UDF wraps datahub_agent_context.mcp_tools.add_glossary_terms() to add
|
|
10
|
+
glossary terms to multiple DataHub entities or their columns.
|
|
11
|
+
|
|
12
|
+
Parameters:
|
|
13
|
+
term_urns (STRING): JSON array of glossary term URNs
|
|
14
|
+
entity_urns (STRING): JSON array of entity URNs to annotate
|
|
15
|
+
column_paths (STRING): Optional JSON array of column names (use NULL for entity-level)
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
VARIANT: Dictionary with success status and message
|
|
19
|
+
|
|
20
|
+
Examples:
|
|
21
|
+
- Add terms: ADD_GLOSSARY_TERMS('["urn:li:glossaryTerm:CustomerData"]', '["urn:li:dataset:(...)"]', NULL)
|
|
22
|
+
- Add to columns: ADD_GLOSSARY_TERMS('["urn:li:glossaryTerm:Email"]', '["urn:li:dataset:(...)"]', '["email"]')
|
|
23
|
+
"""
|
|
24
|
+
function_body = """from datahub_agent_context.mcp_tools import add_glossary_terms
|
|
25
|
+
import json
|
|
26
|
+
try:
|
|
27
|
+
datahub_url = _snowflake.get_generic_secret_string('datahub_url_secret')
|
|
28
|
+
datahub_token = _snowflake.get_generic_secret_string('datahub_token_secret')
|
|
29
|
+
datahub_url = datahub_url.rstrip('/')
|
|
30
|
+
|
|
31
|
+
graph = DataHubGraph(
|
|
32
|
+
config=DatahubClientConfig(server=datahub_url, token=datahub_token)
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
term_urn_list = json.loads(term_urns) if isinstance(term_urns, str) else term_urns
|
|
36
|
+
entity_urn_list = json.loads(entity_urns) if isinstance(entity_urns, str) else entity_urns
|
|
37
|
+
column_path_list = json.loads(column_paths) if column_paths and isinstance(column_paths, str) else None
|
|
38
|
+
|
|
39
|
+
with DataHubContext(graph):
|
|
40
|
+
return add_glossary_terms(
|
|
41
|
+
term_urns=term_urn_list,
|
|
42
|
+
entity_urns=entity_urn_list,
|
|
43
|
+
column_paths=column_path_list
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
except Exception as e:
|
|
47
|
+
return {
|
|
48
|
+
'success': False,
|
|
49
|
+
'error': str(e)
|
|
50
|
+
}"""
|
|
51
|
+
|
|
52
|
+
return generate_python_udf_code(
|
|
53
|
+
function_name="ADD_GLOSSARY_TERMS",
|
|
54
|
+
parameters=[
|
|
55
|
+
("term_urns", "STRING"),
|
|
56
|
+
("entity_urns", "STRING"),
|
|
57
|
+
("column_paths", "STRING"),
|
|
58
|
+
],
|
|
59
|
+
return_type="VARIANT",
|
|
60
|
+
function_body=function_body,
|
|
61
|
+
)
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
"""ADD_OWNERS UDF generator."""
|
|
2
|
+
|
|
3
|
+
from datahub_agent_context.snowflake.udfs.base import generate_python_udf_code
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def generate_add_owners_udf() -> str:
|
|
7
|
+
"""Generate ADD_OWNERS UDF using datahub-agent-context.
|
|
8
|
+
|
|
9
|
+
This UDF wraps datahub_agent_context.mcp_tools.add_owners() to add owners
|
|
10
|
+
to multiple DataHub entities.
|
|
11
|
+
|
|
12
|
+
Parameters:
|
|
13
|
+
owner_urns (STRING): JSON array of owner URNs (CorpUser or CorpGroup)
|
|
14
|
+
entity_urns (STRING): JSON array of entity URNs to assign ownership
|
|
15
|
+
ownership_type_urn (STRING): Optional ownership type URN (use NULL for default)
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
VARIANT: Dictionary with success status and message
|
|
19
|
+
|
|
20
|
+
Example:
|
|
21
|
+
- Add owners: ADD_OWNERS('["urn:li:corpuser:john"]', '["urn:li:dataset:(...)"]', NULL)
|
|
22
|
+
"""
|
|
23
|
+
function_body = """from datahub_agent_context.mcp_tools import add_owners
|
|
24
|
+
import json
|
|
25
|
+
try:
|
|
26
|
+
datahub_url = _snowflake.get_generic_secret_string('datahub_url_secret')
|
|
27
|
+
datahub_token = _snowflake.get_generic_secret_string('datahub_token_secret')
|
|
28
|
+
datahub_url = datahub_url.rstrip('/')
|
|
29
|
+
|
|
30
|
+
graph = DataHubGraph(
|
|
31
|
+
config=DatahubClientConfig(server=datahub_url, token=datahub_token)
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
owner_urn_list = json.loads(owner_urns) if isinstance(owner_urns, str) else owner_urns
|
|
35
|
+
entity_urn_list = json.loads(entity_urns) if isinstance(entity_urns, str) else entity_urns
|
|
36
|
+
|
|
37
|
+
with DataHubContext(graph):
|
|
38
|
+
return add_owners(
|
|
39
|
+
owner_urns=owner_urn_list,
|
|
40
|
+
entity_urns=entity_urn_list,
|
|
41
|
+
ownership_type_urn=ownership_type_urn if ownership_type_urn else None
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
except Exception as e:
|
|
45
|
+
return {
|
|
46
|
+
'success': False,
|
|
47
|
+
'error': str(e)
|
|
48
|
+
}"""
|
|
49
|
+
|
|
50
|
+
return generate_python_udf_code(
|
|
51
|
+
function_name="ADD_OWNERS",
|
|
52
|
+
parameters=[
|
|
53
|
+
("owner_urns", "STRING"),
|
|
54
|
+
("entity_urns", "STRING"),
|
|
55
|
+
("ownership_type_urn", "STRING"),
|
|
56
|
+
],
|
|
57
|
+
return_type="VARIANT",
|
|
58
|
+
function_body=function_body,
|
|
59
|
+
)
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
"""ADD_STRUCTURED_PROPERTIES UDF generator."""
|
|
2
|
+
|
|
3
|
+
from datahub_agent_context.snowflake.udfs.base import generate_python_udf_code
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def generate_add_structured_properties_udf() -> str:
|
|
7
|
+
"""Generate ADD_STRUCTURED_PROPERTIES UDF using datahub-agent-context.
|
|
8
|
+
|
|
9
|
+
This UDF wraps datahub_agent_context.mcp_tools.add_structured_properties() to
|
|
10
|
+
add structured properties with values to multiple DataHub entities.
|
|
11
|
+
|
|
12
|
+
Parameters:
|
|
13
|
+
property_values (STRING): JSON object mapping property URNs to value arrays
|
|
14
|
+
(e.g., '{"urn:li:structuredProperty:retentionTime": ["90"]}')
|
|
15
|
+
entity_urns (STRING): JSON array of entity URNs to assign properties to
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
VARIANT: Dictionary with success status and message
|
|
19
|
+
|
|
20
|
+
Example:
|
|
21
|
+
- Add property: ADD_STRUCTURED_PROPERTIES('{"urn:li:structuredProperty:retentionTime": ["90"]}', '["urn:li:dataset:(...)"]')
|
|
22
|
+
"""
|
|
23
|
+
function_body = """from datahub_agent_context.mcp_tools import add_structured_properties
|
|
24
|
+
import json
|
|
25
|
+
try:
|
|
26
|
+
datahub_url = _snowflake.get_generic_secret_string('datahub_url_secret')
|
|
27
|
+
datahub_token = _snowflake.get_generic_secret_string('datahub_token_secret')
|
|
28
|
+
datahub_url = datahub_url.rstrip('/')
|
|
29
|
+
|
|
30
|
+
graph = DataHubGraph(
|
|
31
|
+
config=DatahubClientConfig(server=datahub_url, token=datahub_token)
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
property_values_dict = json.loads(property_values) if isinstance(property_values, str) else property_values
|
|
35
|
+
entity_urn_list = json.loads(entity_urns) if isinstance(entity_urns, str) else entity_urns
|
|
36
|
+
|
|
37
|
+
with DataHubContext(graph):
|
|
38
|
+
return add_structured_properties(
|
|
39
|
+
property_values=property_values_dict,
|
|
40
|
+
entity_urns=entity_urn_list
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
except Exception as e:
|
|
44
|
+
return {
|
|
45
|
+
'success': False,
|
|
46
|
+
'error': str(e)
|
|
47
|
+
}"""
|
|
48
|
+
|
|
49
|
+
return generate_python_udf_code(
|
|
50
|
+
function_name="ADD_STRUCTURED_PROPERTIES",
|
|
51
|
+
parameters=[
|
|
52
|
+
("property_values", "STRING"),
|
|
53
|
+
("entity_urns", "STRING"),
|
|
54
|
+
],
|
|
55
|
+
return_type="VARIANT",
|
|
56
|
+
function_body=function_body,
|
|
57
|
+
)
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
"""ADD_TAGS UDF generator."""
|
|
2
|
+
|
|
3
|
+
from datahub_agent_context.snowflake.udfs.base import generate_python_udf_code
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def generate_add_tags_udf() -> str:
|
|
7
|
+
"""Generate ADD_TAGS UDF using datahub-agent-context.
|
|
8
|
+
|
|
9
|
+
This UDF wraps datahub_agent_context.mcp_tools.add_tags() to add tags to
|
|
10
|
+
multiple DataHub entities or their columns in a single operation.
|
|
11
|
+
|
|
12
|
+
Parameters:
|
|
13
|
+
tag_urns (STRING): JSON array of tag URNs (e.g., '["urn:li:tag:PII", "urn:li:tag:Sensitive"]')
|
|
14
|
+
entity_urns (STRING): JSON array of entity URNs to tag
|
|
15
|
+
column_paths (STRING): Optional JSON array of column names (use NULL for entity-level tags)
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
VARIANT: Dictionary with success status and message
|
|
19
|
+
|
|
20
|
+
Examples:
|
|
21
|
+
- Tag datasets: ADD_TAGS('["urn:li:tag:PII"]', '["urn:li:dataset:(...)"]', NULL)
|
|
22
|
+
- Tag columns: ADD_TAGS('["urn:li:tag:PII"]', '["urn:li:dataset:(...)"]', '["email"]')
|
|
23
|
+
"""
|
|
24
|
+
function_body = """from datahub_agent_context.mcp_tools import add_tags
|
|
25
|
+
import json
|
|
26
|
+
try:
|
|
27
|
+
datahub_url = _snowflake.get_generic_secret_string('datahub_url_secret')
|
|
28
|
+
datahub_token = _snowflake.get_generic_secret_string('datahub_token_secret')
|
|
29
|
+
datahub_url = datahub_url.rstrip('/')
|
|
30
|
+
|
|
31
|
+
graph = DataHubGraph(
|
|
32
|
+
config=DatahubClientConfig(server=datahub_url, token=datahub_token)
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
tag_urn_list = json.loads(tag_urns) if isinstance(tag_urns, str) else tag_urns
|
|
36
|
+
entity_urn_list = json.loads(entity_urns) if isinstance(entity_urns, str) else entity_urns
|
|
37
|
+
column_path_list = json.loads(column_paths) if column_paths and isinstance(column_paths, str) else None
|
|
38
|
+
|
|
39
|
+
with DataHubContext(graph):
|
|
40
|
+
return add_tags(
|
|
41
|
+
tag_urns=tag_urn_list,
|
|
42
|
+
entity_urns=entity_urn_list,
|
|
43
|
+
column_paths=column_path_list
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
except Exception as e:
|
|
47
|
+
return {
|
|
48
|
+
'success': False,
|
|
49
|
+
'error': str(e)
|
|
50
|
+
}"""
|
|
51
|
+
|
|
52
|
+
return generate_python_udf_code(
|
|
53
|
+
function_name="ADD_TAGS",
|
|
54
|
+
parameters=[
|
|
55
|
+
("tag_urns", "STRING"),
|
|
56
|
+
("entity_urns", "STRING"),
|
|
57
|
+
("column_paths", "STRING"),
|
|
58
|
+
],
|
|
59
|
+
return_type="VARIANT",
|
|
60
|
+
function_body=function_body,
|
|
61
|
+
)
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
"""Base utilities for generating Snowflake UDFs."""
|
|
2
|
+
|
|
3
|
+
import textwrap
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def generate_python_udf_code(
|
|
7
|
+
function_name: str,
|
|
8
|
+
parameters: list[tuple[str, str]],
|
|
9
|
+
return_type: str,
|
|
10
|
+
function_body: str,
|
|
11
|
+
) -> str:
|
|
12
|
+
"""
|
|
13
|
+
Generate the SQL CREATE FUNCTION statement for a Python UDF.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
function_name: Name of the UDF to create
|
|
17
|
+
parameters: List of (param_name, param_type) tuples
|
|
18
|
+
return_type: Return type of the function (e.g., 'VARIANT', 'STRING')
|
|
19
|
+
function_body: Python code for the function body (without def statement)
|
|
20
|
+
|
|
21
|
+
Returns:
|
|
22
|
+
Complete SQL CREATE FUNCTION statement
|
|
23
|
+
"""
|
|
24
|
+
param_signature = ", ".join(f"{name} {type_}" for name, type_ in parameters)
|
|
25
|
+
py_param_names = ", ".join(name for name, _ in parameters)
|
|
26
|
+
|
|
27
|
+
udf_template = f"""CREATE OR REPLACE FUNCTION {function_name}({param_signature})
|
|
28
|
+
RETURNS {return_type}
|
|
29
|
+
LANGUAGE PYTHON
|
|
30
|
+
RUNTIME_VERSION = '3.10'
|
|
31
|
+
ARTIFACT_REPOSITORY = snowflake.snowpark.pypi_shared_repository
|
|
32
|
+
PACKAGES = ('datahub-agent-context>=1.3.1.8')
|
|
33
|
+
SECRETS = ('datahub_url_secret' = datahub_url, 'datahub_token_secret' = datahub_token)
|
|
34
|
+
EXTERNAL_ACCESS_INTEGRATIONS = (datahub_access)
|
|
35
|
+
HANDLER = '{function_name.lower()}'
|
|
36
|
+
AS $$
|
|
37
|
+
import _snowflake
|
|
38
|
+
from datahub.ingestion.graph.client import DataHubGraph, DatahubClientConfig
|
|
39
|
+
from datahub_agent_context.context import DataHubContext
|
|
40
|
+
|
|
41
|
+
def {function_name.lower()}({py_param_names}):
|
|
42
|
+
{textwrap.indent(function_body, " ")}
|
|
43
|
+
$$;"""
|
|
44
|
+
|
|
45
|
+
return udf_template
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
"""GET_DATASET_QUERIES UDF generator."""
|
|
2
|
+
|
|
3
|
+
from datahub_agent_context.snowflake.udfs.base import generate_python_udf_code
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def generate_get_dataset_queries_udf() -> str:
|
|
7
|
+
"""Generate GET_DATASET_QUERIES UDF using datahub-agent-context.
|
|
8
|
+
|
|
9
|
+
This UDF wraps datahub_agent_context.mcp_tools.get_dataset_queries() to retrieve
|
|
10
|
+
SQL queries associated with a dataset or column to understand usage patterns.
|
|
11
|
+
|
|
12
|
+
Useful for understanding how data is used, common JOIN patterns, typical filters,
|
|
13
|
+
and aggregation logic. Can filter by query source (MANUAL vs SYSTEM).
|
|
14
|
+
|
|
15
|
+
Parameters:
|
|
16
|
+
urn (STRING): Dataset URN
|
|
17
|
+
column_name (STRING): Optional column name to filter queries (use NULL for all dataset queries)
|
|
18
|
+
source (STRING): Filter by query origin - 'MANUAL', 'SYSTEM', or NULL for both
|
|
19
|
+
count (NUMBER): Number of queries to return (default: 10)
|
|
20
|
+
|
|
21
|
+
Returns:
|
|
22
|
+
VARIANT: Dictionary with:
|
|
23
|
+
- total: Total number of queries matching criteria
|
|
24
|
+
- queries: Array of query objects with SQL statements and metadata
|
|
25
|
+
- start: Starting offset
|
|
26
|
+
- count: Number of results returned
|
|
27
|
+
|
|
28
|
+
Examples:
|
|
29
|
+
- Manual queries: GET_DATASET_QUERIES(urn, NULL, 'MANUAL', 10)
|
|
30
|
+
- System queries: GET_DATASET_QUERIES(urn, NULL, 'SYSTEM', 20)
|
|
31
|
+
- Column queries: GET_DATASET_QUERIES(urn, 'customer_id', 'MANUAL', 5)
|
|
32
|
+
"""
|
|
33
|
+
function_body = """from datahub_agent_context.mcp_tools import get_dataset_queries
|
|
34
|
+
try:
|
|
35
|
+
datahub_url = _snowflake.get_generic_secret_string('datahub_url_secret')
|
|
36
|
+
datahub_token = _snowflake.get_generic_secret_string('datahub_token_secret')
|
|
37
|
+
datahub_url = datahub_url.rstrip('/')
|
|
38
|
+
|
|
39
|
+
graph = DataHubGraph(
|
|
40
|
+
config=DatahubClientConfig(server=datahub_url, token=datahub_token)
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
with DataHubContext(graph):
|
|
44
|
+
return get_dataset_queries(
|
|
45
|
+
urn=urn,
|
|
46
|
+
column=column_name if column_name else None,
|
|
47
|
+
source=source if source else None,
|
|
48
|
+
count=int(count) if count else 10
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
except Exception as e:
|
|
52
|
+
return {
|
|
53
|
+
'success': False,
|
|
54
|
+
'error': str(e),
|
|
55
|
+
'urn': urn
|
|
56
|
+
}"""
|
|
57
|
+
|
|
58
|
+
return generate_python_udf_code(
|
|
59
|
+
function_name="GET_DATASET_QUERIES",
|
|
60
|
+
parameters=[
|
|
61
|
+
("urn", "STRING"),
|
|
62
|
+
("column_name", "STRING"),
|
|
63
|
+
("source", "STRING"),
|
|
64
|
+
("count", "NUMBER"),
|
|
65
|
+
],
|
|
66
|
+
return_type="VARIANT",
|
|
67
|
+
function_body=function_body,
|
|
68
|
+
)
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
"""GET_ENTITIES UDF generator."""
|
|
2
|
+
|
|
3
|
+
from datahub_agent_context.snowflake.udfs.base import generate_python_udf_code
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def generate_get_entities_udf() -> str:
|
|
7
|
+
"""Generate GET_ENTITIES UDF using datahub-agent-context.
|
|
8
|
+
|
|
9
|
+
This UDF wraps datahub_agent_context.mcp_tools.get_entities() to retrieve
|
|
10
|
+
detailed information about entities by their DataHub URNs from Snowflake.
|
|
11
|
+
|
|
12
|
+
The underlying function accepts arrays of URNs for efficient batch retrieval,
|
|
13
|
+
but this UDF is simplified to accept a single URN string.
|
|
14
|
+
|
|
15
|
+
Parameters:
|
|
16
|
+
entity_urn (STRING): Entity URN (e.g., "urn:li:dataset:(...)")
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
VARIANT: Dictionary with entity details including schema metadata, ownership,
|
|
20
|
+
tags, glossary terms, and other metadata aspects
|
|
21
|
+
"""
|
|
22
|
+
function_body = """from datahub_agent_context.mcp_tools import get_entities
|
|
23
|
+
try:
|
|
24
|
+
datahub_url = _snowflake.get_generic_secret_string('datahub_url_secret')
|
|
25
|
+
datahub_token = _snowflake.get_generic_secret_string('datahub_token_secret')
|
|
26
|
+
datahub_url = datahub_url.rstrip('/')
|
|
27
|
+
|
|
28
|
+
graph = DataHubGraph(
|
|
29
|
+
config=DatahubClientConfig(server=datahub_url, token=datahub_token)
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
with DataHubContext(graph):
|
|
33
|
+
return get_entities([entity_urn])
|
|
34
|
+
|
|
35
|
+
except Exception as e:
|
|
36
|
+
return {
|
|
37
|
+
'success': False,
|
|
38
|
+
'error': str(e),
|
|
39
|
+
'urn': entity_urn
|
|
40
|
+
}"""
|
|
41
|
+
|
|
42
|
+
return generate_python_udf_code(
|
|
43
|
+
function_name="GET_ENTITIES",
|
|
44
|
+
parameters=[("entity_urn", "STRING")],
|
|
45
|
+
return_type="VARIANT",
|
|
46
|
+
function_body=function_body,
|
|
47
|
+
)
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
"""GET_LINEAGE UDF generator."""
|
|
2
|
+
|
|
3
|
+
from datahub_agent_context.snowflake.udfs.base import generate_python_udf_code
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def generate_get_lineage_udf() -> str:
|
|
7
|
+
"""Generate GET_LINEAGE UDF using datahub-agent-context.
|
|
8
|
+
|
|
9
|
+
This UDF wraps datahub_agent_context.mcp_tools.get_lineage() to get upstream
|
|
10
|
+
or downstream lineage for any entity from Snowflake.
|
|
11
|
+
|
|
12
|
+
Parameters:
|
|
13
|
+
urn (STRING): Entity URN
|
|
14
|
+
column_name (STRING): Optional column name for column-level lineage (use NULL for entity-level)
|
|
15
|
+
upstream (NUMBER): 1 for upstream lineage, 0 for downstream lineage
|
|
16
|
+
max_hops (NUMBER): Maximum number of hops (1-3+, default: 1)
|
|
17
|
+
max_results (NUMBER): Maximum number of results to return (default: 30)
|
|
18
|
+
|
|
19
|
+
Returns:
|
|
20
|
+
VARIANT: Dictionary with upstreams or downstreams field containing lineage entities,
|
|
21
|
+
facets, and metadata. For column-level lineage, includes lineageColumns showing
|
|
22
|
+
which columns have relationships.
|
|
23
|
+
"""
|
|
24
|
+
function_body = """from datahub_agent_context.mcp_tools import get_lineage
|
|
25
|
+
try:
|
|
26
|
+
datahub_url = _snowflake.get_generic_secret_string('datahub_url_secret')
|
|
27
|
+
datahub_token = _snowflake.get_generic_secret_string('datahub_token_secret')
|
|
28
|
+
datahub_url = datahub_url.rstrip('/')
|
|
29
|
+
|
|
30
|
+
graph = DataHubGraph(
|
|
31
|
+
config=DatahubClientConfig(server=datahub_url, token=datahub_token)
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
with DataHubContext(graph):
|
|
35
|
+
return get_lineage(
|
|
36
|
+
urn=urn,
|
|
37
|
+
column=column_name if column_name else None,
|
|
38
|
+
upstream=bool(upstream),
|
|
39
|
+
max_hops=int(max_hops) if max_hops else 1,
|
|
40
|
+
max_results=int(max_results) if max_results else 30
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
except Exception as e:
|
|
44
|
+
return {
|
|
45
|
+
'success': False,
|
|
46
|
+
'error': str(e),
|
|
47
|
+
'urn': urn
|
|
48
|
+
}"""
|
|
49
|
+
|
|
50
|
+
return generate_python_udf_code(
|
|
51
|
+
function_name="GET_LINEAGE",
|
|
52
|
+
parameters=[
|
|
53
|
+
("urn", "STRING"),
|
|
54
|
+
("column_name", "STRING"),
|
|
55
|
+
("upstream", "NUMBER"),
|
|
56
|
+
("max_hops", "NUMBER"),
|
|
57
|
+
("max_results", "NUMBER"),
|
|
58
|
+
],
|
|
59
|
+
return_type="VARIANT",
|
|
60
|
+
function_body=function_body,
|
|
61
|
+
)
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
"""GET_LINEAGE_PATHS_BETWEEN UDF generator."""
|
|
2
|
+
|
|
3
|
+
from datahub_agent_context.snowflake.udfs.base import generate_python_udf_code
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def generate_get_lineage_paths_between_udf() -> str:
|
|
7
|
+
"""Generate GET_LINEAGE_PATHS_BETWEEN UDF using datahub-agent-context.
|
|
8
|
+
|
|
9
|
+
This UDF wraps datahub_agent_context.mcp_tools.get_lineage_paths_between() to get
|
|
10
|
+
detailed lineage paths between two specific entities or columns.
|
|
11
|
+
|
|
12
|
+
Returns the paths array showing the exact transformation chain(s) including
|
|
13
|
+
intermediate entities and transformation query URNs.
|
|
14
|
+
|
|
15
|
+
Parameters:
|
|
16
|
+
source_urn (STRING): URN of the source dataset
|
|
17
|
+
target_urn (STRING): URN of the target dataset
|
|
18
|
+
source_column (STRING): Optional column name in source dataset (use NULL for dataset-level)
|
|
19
|
+
target_column (STRING): Optional column name in target dataset (use NULL for dataset-level)
|
|
20
|
+
|
|
21
|
+
Returns:
|
|
22
|
+
VARIANT: Dictionary with:
|
|
23
|
+
- source: Source entity/column info
|
|
24
|
+
- target: Target entity/column info
|
|
25
|
+
- paths: Array of path objects showing transformation chains
|
|
26
|
+
- pathCount: Number of paths found
|
|
27
|
+
- metadata: Query metadata including direction and path type
|
|
28
|
+
|
|
29
|
+
Examples:
|
|
30
|
+
- Dataset-level: GET_LINEAGE_PATHS_BETWEEN(source_urn, target_urn, NULL, NULL)
|
|
31
|
+
- Column-level: GET_LINEAGE_PATHS_BETWEEN(source_urn, target_urn, 'user_id', 'customer_id')
|
|
32
|
+
"""
|
|
33
|
+
function_body = """from datahub_agent_context.mcp_tools import get_lineage_paths_between
|
|
34
|
+
try:
|
|
35
|
+
datahub_url = _snowflake.get_generic_secret_string('datahub_url_secret')
|
|
36
|
+
datahub_token = _snowflake.get_generic_secret_string('datahub_token_secret')
|
|
37
|
+
datahub_url = datahub_url.rstrip('/')
|
|
38
|
+
|
|
39
|
+
graph = DataHubGraph(
|
|
40
|
+
config=DatahubClientConfig(server=datahub_url, token=datahub_token)
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
with DataHubContext(graph):
|
|
44
|
+
return get_lineage_paths_between(
|
|
45
|
+
source_urn=source_urn,
|
|
46
|
+
target_urn=target_urn,
|
|
47
|
+
source_column=source_column if source_column else None,
|
|
48
|
+
target_column=target_column if target_column else None
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
except Exception as e:
|
|
52
|
+
return {
|
|
53
|
+
'success': False,
|
|
54
|
+
'error': str(e),
|
|
55
|
+
'source_urn': source_urn,
|
|
56
|
+
'target_urn': target_urn
|
|
57
|
+
}"""
|
|
58
|
+
|
|
59
|
+
return generate_python_udf_code(
|
|
60
|
+
function_name="GET_LINEAGE_PATHS_BETWEEN",
|
|
61
|
+
parameters=[
|
|
62
|
+
("source_urn", "STRING"),
|
|
63
|
+
("target_urn", "STRING"),
|
|
64
|
+
("source_column", "STRING"),
|
|
65
|
+
("target_column", "STRING"),
|
|
66
|
+
],
|
|
67
|
+
return_type="VARIANT",
|
|
68
|
+
function_body=function_body,
|
|
69
|
+
)
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
"""GET_ME UDF generator."""
|
|
2
|
+
|
|
3
|
+
from datahub_agent_context.snowflake.udfs.base import generate_python_udf_code
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def generate_get_me_udf() -> str:
|
|
7
|
+
"""Generate GET_ME UDF using datahub-agent-context.
|
|
8
|
+
|
|
9
|
+
This UDF wraps datahub_agent_context.mcp_tools.get_me() to get information
|
|
10
|
+
about the currently authenticated user.
|
|
11
|
+
|
|
12
|
+
Returns user profile information, platform privileges, group memberships,
|
|
13
|
+
and user settings.
|
|
14
|
+
|
|
15
|
+
Parameters:
|
|
16
|
+
None
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
VARIANT: Dictionary with:
|
|
20
|
+
- success: Boolean indicating if operation succeeded
|
|
21
|
+
- data: User information including corpUser details
|
|
22
|
+
- message: Success or error message
|
|
23
|
+
|
|
24
|
+
Example:
|
|
25
|
+
- Get current user: SELECT GET_ME()
|
|
26
|
+
"""
|
|
27
|
+
function_body = """from datahub_agent_context.mcp_tools import get_me
|
|
28
|
+
try:
|
|
29
|
+
datahub_url = _snowflake.get_generic_secret_string('datahub_url_secret')
|
|
30
|
+
datahub_token = _snowflake.get_generic_secret_string('datahub_token_secret')
|
|
31
|
+
datahub_url = datahub_url.rstrip('/')
|
|
32
|
+
|
|
33
|
+
graph = DataHubGraph(
|
|
34
|
+
config=DatahubClientConfig(server=datahub_url, token=datahub_token)
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
with DataHubContext(graph):
|
|
38
|
+
return get_me()
|
|
39
|
+
|
|
40
|
+
except Exception as e:
|
|
41
|
+
return {
|
|
42
|
+
'success': False,
|
|
43
|
+
'error': str(e)
|
|
44
|
+
}"""
|
|
45
|
+
|
|
46
|
+
return generate_python_udf_code(
|
|
47
|
+
function_name="GET_ME",
|
|
48
|
+
parameters=[],
|
|
49
|
+
return_type="VARIANT",
|
|
50
|
+
function_body=function_body,
|
|
51
|
+
)
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
"""GREP_DOCUMENTS UDF generator."""
|
|
2
|
+
|
|
3
|
+
from datahub_agent_context.snowflake.udfs.base import generate_python_udf_code
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def generate_grep_documents_udf() -> str:
|
|
7
|
+
"""Generate GREP_DOCUMENTS UDF using datahub-agent-context.
|
|
8
|
+
|
|
9
|
+
This UDF wraps datahub_agent_context.mcp_tools.grep_documents() to search within
|
|
10
|
+
document content using regex patterns (similar to ripgrep/grep).
|
|
11
|
+
|
|
12
|
+
Use SEARCH_DOCUMENTS first to find relevant document URNs, then use this tool
|
|
13
|
+
to search within their content.
|
|
14
|
+
|
|
15
|
+
Parameters:
|
|
16
|
+
urns (STRING): JSON array of document URNs to search within (e.g., '["urn:li:document:doc1"]')
|
|
17
|
+
pattern (STRING): Regex pattern to search for (e.g., 'kubernetes', '(?i)deploy.*production')
|
|
18
|
+
context_chars (NUMBER): Characters to show before/after matches (default: 200)
|
|
19
|
+
max_matches_per_doc (NUMBER): Maximum matches per document (default: 5)
|
|
20
|
+
|
|
21
|
+
Returns:
|
|
22
|
+
VARIANT: Dictionary with:
|
|
23
|
+
- results: List of documents with matching excerpts
|
|
24
|
+
- total_matches: Total matches across all documents
|
|
25
|
+
- documents_with_matches: Number of documents containing matches
|
|
26
|
+
|
|
27
|
+
Examples:
|
|
28
|
+
- Find kubectl commands: GREP_DOCUMENTS('["urn:li:document:runbook1"]', 'kubectl apply', 300, 5)
|
|
29
|
+
- Case insensitive: GREP_DOCUMENTS('["urn:li:document:doc1"]', '(?i)error|exception', 200, 10)
|
|
30
|
+
"""
|
|
31
|
+
function_body = """from datahub_agent_context.mcp_tools import grep_documents
|
|
32
|
+
import json
|
|
33
|
+
try:
|
|
34
|
+
datahub_url = _snowflake.get_generic_secret_string('datahub_url_secret')
|
|
35
|
+
datahub_token = _snowflake.get_generic_secret_string('datahub_token_secret')
|
|
36
|
+
datahub_url = datahub_url.rstrip('/')
|
|
37
|
+
|
|
38
|
+
graph = DataHubGraph(
|
|
39
|
+
config=DatahubClientConfig(server=datahub_url, token=datahub_token)
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
urn_list = json.loads(urns) if isinstance(urns, str) else urns
|
|
43
|
+
|
|
44
|
+
with DataHubContext(graph):
|
|
45
|
+
return grep_documents(
|
|
46
|
+
urns=urn_list,
|
|
47
|
+
pattern=pattern,
|
|
48
|
+
context_chars=int(context_chars) if context_chars else 200,
|
|
49
|
+
max_matches_per_doc=int(max_matches_per_doc) if max_matches_per_doc else 5
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
except Exception as e:
|
|
53
|
+
return {
|
|
54
|
+
'success': False,
|
|
55
|
+
'error': str(e),
|
|
56
|
+
'urns': urns,
|
|
57
|
+
'pattern': pattern
|
|
58
|
+
}"""
|
|
59
|
+
|
|
60
|
+
return generate_python_udf_code(
|
|
61
|
+
function_name="GREP_DOCUMENTS",
|
|
62
|
+
parameters=[
|
|
63
|
+
("urns", "STRING"),
|
|
64
|
+
("pattern", "STRING"),
|
|
65
|
+
("context_chars", "NUMBER"),
|
|
66
|
+
("max_matches_per_doc", "NUMBER"),
|
|
67
|
+
],
|
|
68
|
+
return_type="VARIANT",
|
|
69
|
+
function_body=function_body,
|
|
70
|
+
)
|