datahub-agent-context 1.3.1.10rc1__py3-none-any.whl → 1.4.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. datahub_agent_context/__init__.py +11 -3
  2. datahub_agent_context/_version.py +1 -1
  3. datahub_agent_context/cli.py +152 -0
  4. datahub_agent_context/context.py +47 -34
  5. datahub_agent_context/langchain_tools/builder.py +6 -4
  6. datahub_agent_context/mcp_tools/base.py +6 -3
  7. datahub_agent_context/mcp_tools/save_document.py +634 -0
  8. datahub_agent_context/snowflake/__init__.py +0 -0
  9. datahub_agent_context/snowflake/generate_udfs.py +306 -0
  10. datahub_agent_context/snowflake/generators/__init__.py +21 -0
  11. datahub_agent_context/snowflake/generators/configuration.py +104 -0
  12. datahub_agent_context/snowflake/generators/cortex_agent.py +725 -0
  13. datahub_agent_context/snowflake/generators/network_rules.py +53 -0
  14. datahub_agent_context/snowflake/generators/stored_procedure.py +87 -0
  15. datahub_agent_context/snowflake/snowflake.py +662 -0
  16. datahub_agent_context/snowflake/udfs/__init__.py +1 -0
  17. datahub_agent_context/snowflake/udfs/add_glossary_terms.py +61 -0
  18. datahub_agent_context/snowflake/udfs/add_owners.py +59 -0
  19. datahub_agent_context/snowflake/udfs/add_structured_properties.py +57 -0
  20. datahub_agent_context/snowflake/udfs/add_tags.py +61 -0
  21. datahub_agent_context/snowflake/udfs/base.py +45 -0
  22. datahub_agent_context/snowflake/udfs/get_dataset_queries.py +68 -0
  23. datahub_agent_context/snowflake/udfs/get_entities.py +47 -0
  24. datahub_agent_context/snowflake/udfs/get_lineage.py +61 -0
  25. datahub_agent_context/snowflake/udfs/get_lineage_paths_between.py +69 -0
  26. datahub_agent_context/snowflake/udfs/get_me.py +51 -0
  27. datahub_agent_context/snowflake/udfs/grep_documents.py +70 -0
  28. datahub_agent_context/snowflake/udfs/list_schema_fields.py +80 -0
  29. datahub_agent_context/snowflake/udfs/remove_domains.py +45 -0
  30. datahub_agent_context/snowflake/udfs/remove_glossary_terms.py +57 -0
  31. datahub_agent_context/snowflake/udfs/remove_owners.py +56 -0
  32. datahub_agent_context/snowflake/udfs/remove_structured_properties.py +56 -0
  33. datahub_agent_context/snowflake/udfs/remove_tags.py +57 -0
  34. datahub_agent_context/snowflake/udfs/search_datahub.py +71 -0
  35. datahub_agent_context/snowflake/udfs/search_documents.py +58 -0
  36. datahub_agent_context/snowflake/udfs/set_domains.py +55 -0
  37. datahub_agent_context/snowflake/udfs/update_description.py +60 -0
  38. {datahub_agent_context-1.3.1.10rc1.dist-info → datahub_agent_context-1.4.0rc2.dist-info}/METADATA +21 -14
  39. datahub_agent_context-1.4.0rc2.dist-info/RECORD +66 -0
  40. datahub_agent_context-1.3.1.10rc1.dist-info/RECORD +0 -34
  41. {datahub_agent_context-1.3.1.10rc1.dist-info → datahub_agent_context-1.4.0rc2.dist-info}/WHEEL +0 -0
  42. {datahub_agent_context-1.3.1.10rc1.dist-info → datahub_agent_context-1.4.0rc2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,61 @@
1
+ """ADD_GLOSSARY_TERMS UDF generator."""
2
+
3
+ from datahub_agent_context.snowflake.udfs.base import generate_python_udf_code
4
+
5
+
6
+ def generate_add_glossary_terms_udf() -> str:
7
+ """Generate ADD_GLOSSARY_TERMS UDF using datahub-agent-context.
8
+
9
+ This UDF wraps datahub_agent_context.mcp_tools.add_glossary_terms() to add
10
+ glossary terms to multiple DataHub entities or their columns.
11
+
12
+ Parameters:
13
+ term_urns (STRING): JSON array of glossary term URNs
14
+ entity_urns (STRING): JSON array of entity URNs to annotate
15
+ column_paths (STRING): Optional JSON array of column names (use NULL for entity-level)
16
+
17
+ Returns:
18
+ VARIANT: Dictionary with success status and message
19
+
20
+ Examples:
21
+ - Add terms: ADD_GLOSSARY_TERMS('["urn:li:glossaryTerm:CustomerData"]', '["urn:li:dataset:(...)"]', NULL)
22
+ - Add to columns: ADD_GLOSSARY_TERMS('["urn:li:glossaryTerm:Email"]', '["urn:li:dataset:(...)"]', '["email"]')
23
+ """
24
+ function_body = """from datahub_agent_context.mcp_tools import add_glossary_terms
25
+ import json
26
+ try:
27
+ datahub_url = _snowflake.get_generic_secret_string('datahub_url_secret')
28
+ datahub_token = _snowflake.get_generic_secret_string('datahub_token_secret')
29
+ datahub_url = datahub_url.rstrip('/')
30
+
31
+ graph = DataHubGraph(
32
+ config=DatahubClientConfig(server=datahub_url, token=datahub_token)
33
+ )
34
+
35
+ term_urn_list = json.loads(term_urns) if isinstance(term_urns, str) else term_urns
36
+ entity_urn_list = json.loads(entity_urns) if isinstance(entity_urns, str) else entity_urns
37
+ column_path_list = json.loads(column_paths) if column_paths and isinstance(column_paths, str) else None
38
+
39
+ with DataHubContext(graph):
40
+ return add_glossary_terms(
41
+ term_urns=term_urn_list,
42
+ entity_urns=entity_urn_list,
43
+ column_paths=column_path_list
44
+ )
45
+
46
+ except Exception as e:
47
+ return {
48
+ 'success': False,
49
+ 'error': str(e)
50
+ }"""
51
+
52
+ return generate_python_udf_code(
53
+ function_name="ADD_GLOSSARY_TERMS",
54
+ parameters=[
55
+ ("term_urns", "STRING"),
56
+ ("entity_urns", "STRING"),
57
+ ("column_paths", "STRING"),
58
+ ],
59
+ return_type="VARIANT",
60
+ function_body=function_body,
61
+ )
@@ -0,0 +1,59 @@
1
+ """ADD_OWNERS UDF generator."""
2
+
3
+ from datahub_agent_context.snowflake.udfs.base import generate_python_udf_code
4
+
5
+
6
+ def generate_add_owners_udf() -> str:
7
+ """Generate ADD_OWNERS UDF using datahub-agent-context.
8
+
9
+ This UDF wraps datahub_agent_context.mcp_tools.add_owners() to add owners
10
+ to multiple DataHub entities.
11
+
12
+ Parameters:
13
+ owner_urns (STRING): JSON array of owner URNs (CorpUser or CorpGroup)
14
+ entity_urns (STRING): JSON array of entity URNs to assign ownership
15
+ ownership_type_urn (STRING): Optional ownership type URN (use NULL for default)
16
+
17
+ Returns:
18
+ VARIANT: Dictionary with success status and message
19
+
20
+ Example:
21
+ - Add owners: ADD_OWNERS('["urn:li:corpuser:john"]', '["urn:li:dataset:(...)"]', NULL)
22
+ """
23
+ function_body = """from datahub_agent_context.mcp_tools import add_owners
24
+ import json
25
+ try:
26
+ datahub_url = _snowflake.get_generic_secret_string('datahub_url_secret')
27
+ datahub_token = _snowflake.get_generic_secret_string('datahub_token_secret')
28
+ datahub_url = datahub_url.rstrip('/')
29
+
30
+ graph = DataHubGraph(
31
+ config=DatahubClientConfig(server=datahub_url, token=datahub_token)
32
+ )
33
+
34
+ owner_urn_list = json.loads(owner_urns) if isinstance(owner_urns, str) else owner_urns
35
+ entity_urn_list = json.loads(entity_urns) if isinstance(entity_urns, str) else entity_urns
36
+
37
+ with DataHubContext(graph):
38
+ return add_owners(
39
+ owner_urns=owner_urn_list,
40
+ entity_urns=entity_urn_list,
41
+ ownership_type_urn=ownership_type_urn if ownership_type_urn else None
42
+ )
43
+
44
+ except Exception as e:
45
+ return {
46
+ 'success': False,
47
+ 'error': str(e)
48
+ }"""
49
+
50
+ return generate_python_udf_code(
51
+ function_name="ADD_OWNERS",
52
+ parameters=[
53
+ ("owner_urns", "STRING"),
54
+ ("entity_urns", "STRING"),
55
+ ("ownership_type_urn", "STRING"),
56
+ ],
57
+ return_type="VARIANT",
58
+ function_body=function_body,
59
+ )
@@ -0,0 +1,57 @@
1
+ """ADD_STRUCTURED_PROPERTIES UDF generator."""
2
+
3
+ from datahub_agent_context.snowflake.udfs.base import generate_python_udf_code
4
+
5
+
6
+ def generate_add_structured_properties_udf() -> str:
7
+ """Generate ADD_STRUCTURED_PROPERTIES UDF using datahub-agent-context.
8
+
9
+ This UDF wraps datahub_agent_context.mcp_tools.add_structured_properties() to
10
+ add structured properties with values to multiple DataHub entities.
11
+
12
+ Parameters:
13
+ property_values (STRING): JSON object mapping property URNs to value arrays
14
+ (e.g., '{"urn:li:structuredProperty:retentionTime": ["90"]}')
15
+ entity_urns (STRING): JSON array of entity URNs to assign properties to
16
+
17
+ Returns:
18
+ VARIANT: Dictionary with success status and message
19
+
20
+ Example:
21
+ - Add property: ADD_STRUCTURED_PROPERTIES('{"urn:li:structuredProperty:retentionTime": ["90"]}', '["urn:li:dataset:(...)"]')
22
+ """
23
+ function_body = """from datahub_agent_context.mcp_tools import add_structured_properties
24
+ import json
25
+ try:
26
+ datahub_url = _snowflake.get_generic_secret_string('datahub_url_secret')
27
+ datahub_token = _snowflake.get_generic_secret_string('datahub_token_secret')
28
+ datahub_url = datahub_url.rstrip('/')
29
+
30
+ graph = DataHubGraph(
31
+ config=DatahubClientConfig(server=datahub_url, token=datahub_token)
32
+ )
33
+
34
+ property_values_dict = json.loads(property_values) if isinstance(property_values, str) else property_values
35
+ entity_urn_list = json.loads(entity_urns) if isinstance(entity_urns, str) else entity_urns
36
+
37
+ with DataHubContext(graph):
38
+ return add_structured_properties(
39
+ property_values=property_values_dict,
40
+ entity_urns=entity_urn_list
41
+ )
42
+
43
+ except Exception as e:
44
+ return {
45
+ 'success': False,
46
+ 'error': str(e)
47
+ }"""
48
+
49
+ return generate_python_udf_code(
50
+ function_name="ADD_STRUCTURED_PROPERTIES",
51
+ parameters=[
52
+ ("property_values", "STRING"),
53
+ ("entity_urns", "STRING"),
54
+ ],
55
+ return_type="VARIANT",
56
+ function_body=function_body,
57
+ )
@@ -0,0 +1,61 @@
1
+ """ADD_TAGS UDF generator."""
2
+
3
+ from datahub_agent_context.snowflake.udfs.base import generate_python_udf_code
4
+
5
+
6
+ def generate_add_tags_udf() -> str:
7
+ """Generate ADD_TAGS UDF using datahub-agent-context.
8
+
9
+ This UDF wraps datahub_agent_context.mcp_tools.add_tags() to add tags to
10
+ multiple DataHub entities or their columns in a single operation.
11
+
12
+ Parameters:
13
+ tag_urns (STRING): JSON array of tag URNs (e.g., '["urn:li:tag:PII", "urn:li:tag:Sensitive"]')
14
+ entity_urns (STRING): JSON array of entity URNs to tag
15
+ column_paths (STRING): Optional JSON array of column names (use NULL for entity-level tags)
16
+
17
+ Returns:
18
+ VARIANT: Dictionary with success status and message
19
+
20
+ Examples:
21
+ - Tag datasets: ADD_TAGS('["urn:li:tag:PII"]', '["urn:li:dataset:(...)"]', NULL)
22
+ - Tag columns: ADD_TAGS('["urn:li:tag:PII"]', '["urn:li:dataset:(...)"]', '["email"]')
23
+ """
24
+ function_body = """from datahub_agent_context.mcp_tools import add_tags
25
+ import json
26
+ try:
27
+ datahub_url = _snowflake.get_generic_secret_string('datahub_url_secret')
28
+ datahub_token = _snowflake.get_generic_secret_string('datahub_token_secret')
29
+ datahub_url = datahub_url.rstrip('/')
30
+
31
+ graph = DataHubGraph(
32
+ config=DatahubClientConfig(server=datahub_url, token=datahub_token)
33
+ )
34
+
35
+ tag_urn_list = json.loads(tag_urns) if isinstance(tag_urns, str) else tag_urns
36
+ entity_urn_list = json.loads(entity_urns) if isinstance(entity_urns, str) else entity_urns
37
+ column_path_list = json.loads(column_paths) if column_paths and isinstance(column_paths, str) else None
38
+
39
+ with DataHubContext(graph):
40
+ return add_tags(
41
+ tag_urns=tag_urn_list,
42
+ entity_urns=entity_urn_list,
43
+ column_paths=column_path_list
44
+ )
45
+
46
+ except Exception as e:
47
+ return {
48
+ 'success': False,
49
+ 'error': str(e)
50
+ }"""
51
+
52
+ return generate_python_udf_code(
53
+ function_name="ADD_TAGS",
54
+ parameters=[
55
+ ("tag_urns", "STRING"),
56
+ ("entity_urns", "STRING"),
57
+ ("column_paths", "STRING"),
58
+ ],
59
+ return_type="VARIANT",
60
+ function_body=function_body,
61
+ )
@@ -0,0 +1,45 @@
1
+ """Base utilities for generating Snowflake UDFs."""
2
+
3
+ import textwrap
4
+
5
+
6
+ def generate_python_udf_code(
7
+ function_name: str,
8
+ parameters: list[tuple[str, str]],
9
+ return_type: str,
10
+ function_body: str,
11
+ ) -> str:
12
+ """
13
+ Generate the SQL CREATE FUNCTION statement for a Python UDF.
14
+
15
+ Args:
16
+ function_name: Name of the UDF to create
17
+ parameters: List of (param_name, param_type) tuples
18
+ return_type: Return type of the function (e.g., 'VARIANT', 'STRING')
19
+ function_body: Python code for the function body (without def statement)
20
+
21
+ Returns:
22
+ Complete SQL CREATE FUNCTION statement
23
+ """
24
+ param_signature = ", ".join(f"{name} {type_}" for name, type_ in parameters)
25
+ py_param_names = ", ".join(name for name, _ in parameters)
26
+
27
+ udf_template = f"""CREATE OR REPLACE FUNCTION {function_name}({param_signature})
28
+ RETURNS {return_type}
29
+ LANGUAGE PYTHON
30
+ RUNTIME_VERSION = '3.10'
31
+ ARTIFACT_REPOSITORY = snowflake.snowpark.pypi_shared_repository
32
+ PACKAGES = ('datahub-agent-context>=1.3.1.8')
33
+ SECRETS = ('datahub_url_secret' = datahub_url, 'datahub_token_secret' = datahub_token)
34
+ EXTERNAL_ACCESS_INTEGRATIONS = (datahub_access)
35
+ HANDLER = '{function_name.lower()}'
36
+ AS $$
37
+ import _snowflake
38
+ from datahub.ingestion.graph.client import DataHubGraph, DatahubClientConfig
39
+ from datahub_agent_context.context import DataHubContext
40
+
41
+ def {function_name.lower()}({py_param_names}):
42
+ {textwrap.indent(function_body, " ")}
43
+ $$;"""
44
+
45
+ return udf_template
@@ -0,0 +1,68 @@
1
+ """GET_DATASET_QUERIES UDF generator."""
2
+
3
+ from datahub_agent_context.snowflake.udfs.base import generate_python_udf_code
4
+
5
+
6
+ def generate_get_dataset_queries_udf() -> str:
7
+ """Generate GET_DATASET_QUERIES UDF using datahub-agent-context.
8
+
9
+ This UDF wraps datahub_agent_context.mcp_tools.get_dataset_queries() to retrieve
10
+ SQL queries associated with a dataset or column to understand usage patterns.
11
+
12
+ Useful for understanding how data is used, common JOIN patterns, typical filters,
13
+ and aggregation logic. Can filter by query source (MANUAL vs SYSTEM).
14
+
15
+ Parameters:
16
+ urn (STRING): Dataset URN
17
+ column_name (STRING): Optional column name to filter queries (use NULL for all dataset queries)
18
+ source (STRING): Filter by query origin - 'MANUAL', 'SYSTEM', or NULL for both
19
+ count (NUMBER): Number of queries to return (default: 10)
20
+
21
+ Returns:
22
+ VARIANT: Dictionary with:
23
+ - total: Total number of queries matching criteria
24
+ - queries: Array of query objects with SQL statements and metadata
25
+ - start: Starting offset
26
+ - count: Number of results returned
27
+
28
+ Examples:
29
+ - Manual queries: GET_DATASET_QUERIES(urn, NULL, 'MANUAL', 10)
30
+ - System queries: GET_DATASET_QUERIES(urn, NULL, 'SYSTEM', 20)
31
+ - Column queries: GET_DATASET_QUERIES(urn, 'customer_id', 'MANUAL', 5)
32
+ """
33
+ function_body = """from datahub_agent_context.mcp_tools import get_dataset_queries
34
+ try:
35
+ datahub_url = _snowflake.get_generic_secret_string('datahub_url_secret')
36
+ datahub_token = _snowflake.get_generic_secret_string('datahub_token_secret')
37
+ datahub_url = datahub_url.rstrip('/')
38
+
39
+ graph = DataHubGraph(
40
+ config=DatahubClientConfig(server=datahub_url, token=datahub_token)
41
+ )
42
+
43
+ with DataHubContext(graph):
44
+ return get_dataset_queries(
45
+ urn=urn,
46
+ column=column_name if column_name else None,
47
+ source=source if source else None,
48
+ count=int(count) if count else 10
49
+ )
50
+
51
+ except Exception as e:
52
+ return {
53
+ 'success': False,
54
+ 'error': str(e),
55
+ 'urn': urn
56
+ }"""
57
+
58
+ return generate_python_udf_code(
59
+ function_name="GET_DATASET_QUERIES",
60
+ parameters=[
61
+ ("urn", "STRING"),
62
+ ("column_name", "STRING"),
63
+ ("source", "STRING"),
64
+ ("count", "NUMBER"),
65
+ ],
66
+ return_type="VARIANT",
67
+ function_body=function_body,
68
+ )
@@ -0,0 +1,47 @@
1
+ """GET_ENTITIES UDF generator."""
2
+
3
+ from datahub_agent_context.snowflake.udfs.base import generate_python_udf_code
4
+
5
+
6
+ def generate_get_entities_udf() -> str:
7
+ """Generate GET_ENTITIES UDF using datahub-agent-context.
8
+
9
+ This UDF wraps datahub_agent_context.mcp_tools.get_entities() to retrieve
10
+ detailed information about entities by their DataHub URNs from Snowflake.
11
+
12
+ The underlying function accepts arrays of URNs for efficient batch retrieval,
13
+ but this UDF is simplified to accept a single URN string.
14
+
15
+ Parameters:
16
+ entity_urn (STRING): Entity URN (e.g., "urn:li:dataset:(...)")
17
+
18
+ Returns:
19
+ VARIANT: Dictionary with entity details including schema metadata, ownership,
20
+ tags, glossary terms, and other metadata aspects
21
+ """
22
+ function_body = """from datahub_agent_context.mcp_tools import get_entities
23
+ try:
24
+ datahub_url = _snowflake.get_generic_secret_string('datahub_url_secret')
25
+ datahub_token = _snowflake.get_generic_secret_string('datahub_token_secret')
26
+ datahub_url = datahub_url.rstrip('/')
27
+
28
+ graph = DataHubGraph(
29
+ config=DatahubClientConfig(server=datahub_url, token=datahub_token)
30
+ )
31
+
32
+ with DataHubContext(graph):
33
+ return get_entities([entity_urn])
34
+
35
+ except Exception as e:
36
+ return {
37
+ 'success': False,
38
+ 'error': str(e),
39
+ 'urn': entity_urn
40
+ }"""
41
+
42
+ return generate_python_udf_code(
43
+ function_name="GET_ENTITIES",
44
+ parameters=[("entity_urn", "STRING")],
45
+ return_type="VARIANT",
46
+ function_body=function_body,
47
+ )
@@ -0,0 +1,61 @@
1
+ """GET_LINEAGE UDF generator."""
2
+
3
+ from datahub_agent_context.snowflake.udfs.base import generate_python_udf_code
4
+
5
+
6
+ def generate_get_lineage_udf() -> str:
7
+ """Generate GET_LINEAGE UDF using datahub-agent-context.
8
+
9
+ This UDF wraps datahub_agent_context.mcp_tools.get_lineage() to get upstream
10
+ or downstream lineage for any entity from Snowflake.
11
+
12
+ Parameters:
13
+ urn (STRING): Entity URN
14
+ column_name (STRING): Optional column name for column-level lineage (use NULL for entity-level)
15
+ upstream (NUMBER): 1 for upstream lineage, 0 for downstream lineage
16
+ max_hops (NUMBER): Maximum number of hops (1-3+, default: 1)
17
+ max_results (NUMBER): Maximum number of results to return (default: 30)
18
+
19
+ Returns:
20
+ VARIANT: Dictionary with upstreams or downstreams field containing lineage entities,
21
+ facets, and metadata. For column-level lineage, includes lineageColumns showing
22
+ which columns have relationships.
23
+ """
24
+ function_body = """from datahub_agent_context.mcp_tools import get_lineage
25
+ try:
26
+ datahub_url = _snowflake.get_generic_secret_string('datahub_url_secret')
27
+ datahub_token = _snowflake.get_generic_secret_string('datahub_token_secret')
28
+ datahub_url = datahub_url.rstrip('/')
29
+
30
+ graph = DataHubGraph(
31
+ config=DatahubClientConfig(server=datahub_url, token=datahub_token)
32
+ )
33
+
34
+ with DataHubContext(graph):
35
+ return get_lineage(
36
+ urn=urn,
37
+ column=column_name if column_name else None,
38
+ upstream=bool(upstream),
39
+ max_hops=int(max_hops) if max_hops else 1,
40
+ max_results=int(max_results) if max_results else 30
41
+ )
42
+
43
+ except Exception as e:
44
+ return {
45
+ 'success': False,
46
+ 'error': str(e),
47
+ 'urn': urn
48
+ }"""
49
+
50
+ return generate_python_udf_code(
51
+ function_name="GET_LINEAGE",
52
+ parameters=[
53
+ ("urn", "STRING"),
54
+ ("column_name", "STRING"),
55
+ ("upstream", "NUMBER"),
56
+ ("max_hops", "NUMBER"),
57
+ ("max_results", "NUMBER"),
58
+ ],
59
+ return_type="VARIANT",
60
+ function_body=function_body,
61
+ )
@@ -0,0 +1,69 @@
1
+ """GET_LINEAGE_PATHS_BETWEEN UDF generator."""
2
+
3
+ from datahub_agent_context.snowflake.udfs.base import generate_python_udf_code
4
+
5
+
6
+ def generate_get_lineage_paths_between_udf() -> str:
7
+ """Generate GET_LINEAGE_PATHS_BETWEEN UDF using datahub-agent-context.
8
+
9
+ This UDF wraps datahub_agent_context.mcp_tools.get_lineage_paths_between() to get
10
+ detailed lineage paths between two specific entities or columns.
11
+
12
+ Returns the paths array showing the exact transformation chain(s) including
13
+ intermediate entities and transformation query URNs.
14
+
15
+ Parameters:
16
+ source_urn (STRING): URN of the source dataset
17
+ target_urn (STRING): URN of the target dataset
18
+ source_column (STRING): Optional column name in source dataset (use NULL for dataset-level)
19
+ target_column (STRING): Optional column name in target dataset (use NULL for dataset-level)
20
+
21
+ Returns:
22
+ VARIANT: Dictionary with:
23
+ - source: Source entity/column info
24
+ - target: Target entity/column info
25
+ - paths: Array of path objects showing transformation chains
26
+ - pathCount: Number of paths found
27
+ - metadata: Query metadata including direction and path type
28
+
29
+ Examples:
30
+ - Dataset-level: GET_LINEAGE_PATHS_BETWEEN(source_urn, target_urn, NULL, NULL)
31
+ - Column-level: GET_LINEAGE_PATHS_BETWEEN(source_urn, target_urn, 'user_id', 'customer_id')
32
+ """
33
+ function_body = """from datahub_agent_context.mcp_tools import get_lineage_paths_between
34
+ try:
35
+ datahub_url = _snowflake.get_generic_secret_string('datahub_url_secret')
36
+ datahub_token = _snowflake.get_generic_secret_string('datahub_token_secret')
37
+ datahub_url = datahub_url.rstrip('/')
38
+
39
+ graph = DataHubGraph(
40
+ config=DatahubClientConfig(server=datahub_url, token=datahub_token)
41
+ )
42
+
43
+ with DataHubContext(graph):
44
+ return get_lineage_paths_between(
45
+ source_urn=source_urn,
46
+ target_urn=target_urn,
47
+ source_column=source_column if source_column else None,
48
+ target_column=target_column if target_column else None
49
+ )
50
+
51
+ except Exception as e:
52
+ return {
53
+ 'success': False,
54
+ 'error': str(e),
55
+ 'source_urn': source_urn,
56
+ 'target_urn': target_urn
57
+ }"""
58
+
59
+ return generate_python_udf_code(
60
+ function_name="GET_LINEAGE_PATHS_BETWEEN",
61
+ parameters=[
62
+ ("source_urn", "STRING"),
63
+ ("target_urn", "STRING"),
64
+ ("source_column", "STRING"),
65
+ ("target_column", "STRING"),
66
+ ],
67
+ return_type="VARIANT",
68
+ function_body=function_body,
69
+ )
@@ -0,0 +1,51 @@
1
+ """GET_ME UDF generator."""
2
+
3
+ from datahub_agent_context.snowflake.udfs.base import generate_python_udf_code
4
+
5
+
6
+ def generate_get_me_udf() -> str:
7
+ """Generate GET_ME UDF using datahub-agent-context.
8
+
9
+ This UDF wraps datahub_agent_context.mcp_tools.get_me() to get information
10
+ about the currently authenticated user.
11
+
12
+ Returns user profile information, platform privileges, group memberships,
13
+ and user settings.
14
+
15
+ Parameters:
16
+ None
17
+
18
+ Returns:
19
+ VARIANT: Dictionary with:
20
+ - success: Boolean indicating if operation succeeded
21
+ - data: User information including corpUser details
22
+ - message: Success or error message
23
+
24
+ Example:
25
+ - Get current user: SELECT GET_ME()
26
+ """
27
+ function_body = """from datahub_agent_context.mcp_tools import get_me
28
+ try:
29
+ datahub_url = _snowflake.get_generic_secret_string('datahub_url_secret')
30
+ datahub_token = _snowflake.get_generic_secret_string('datahub_token_secret')
31
+ datahub_url = datahub_url.rstrip('/')
32
+
33
+ graph = DataHubGraph(
34
+ config=DatahubClientConfig(server=datahub_url, token=datahub_token)
35
+ )
36
+
37
+ with DataHubContext(graph):
38
+ return get_me()
39
+
40
+ except Exception as e:
41
+ return {
42
+ 'success': False,
43
+ 'error': str(e)
44
+ }"""
45
+
46
+ return generate_python_udf_code(
47
+ function_name="GET_ME",
48
+ parameters=[],
49
+ return_type="VARIANT",
50
+ function_body=function_body,
51
+ )
@@ -0,0 +1,70 @@
1
+ """GREP_DOCUMENTS UDF generator."""
2
+
3
+ from datahub_agent_context.snowflake.udfs.base import generate_python_udf_code
4
+
5
+
6
+ def generate_grep_documents_udf() -> str:
7
+ """Generate GREP_DOCUMENTS UDF using datahub-agent-context.
8
+
9
+ This UDF wraps datahub_agent_context.mcp_tools.grep_documents() to search within
10
+ document content using regex patterns (similar to ripgrep/grep).
11
+
12
+ Use SEARCH_DOCUMENTS first to find relevant document URNs, then use this tool
13
+ to search within their content.
14
+
15
+ Parameters:
16
+ urns (STRING): JSON array of document URNs to search within (e.g., '["urn:li:document:doc1"]')
17
+ pattern (STRING): Regex pattern to search for (e.g., 'kubernetes', '(?i)deploy.*production')
18
+ context_chars (NUMBER): Characters to show before/after matches (default: 200)
19
+ max_matches_per_doc (NUMBER): Maximum matches per document (default: 5)
20
+
21
+ Returns:
22
+ VARIANT: Dictionary with:
23
+ - results: List of documents with matching excerpts
24
+ - total_matches: Total matches across all documents
25
+ - documents_with_matches: Number of documents containing matches
26
+
27
+ Examples:
28
+ - Find kubectl commands: GREP_DOCUMENTS('["urn:li:document:runbook1"]', 'kubectl apply', 300, 5)
29
+ - Case insensitive: GREP_DOCUMENTS('["urn:li:document:doc1"]', '(?i)error|exception', 200, 10)
30
+ """
31
+ function_body = """from datahub_agent_context.mcp_tools import grep_documents
32
+ import json
33
+ try:
34
+ datahub_url = _snowflake.get_generic_secret_string('datahub_url_secret')
35
+ datahub_token = _snowflake.get_generic_secret_string('datahub_token_secret')
36
+ datahub_url = datahub_url.rstrip('/')
37
+
38
+ graph = DataHubGraph(
39
+ config=DatahubClientConfig(server=datahub_url, token=datahub_token)
40
+ )
41
+
42
+ urn_list = json.loads(urns) if isinstance(urns, str) else urns
43
+
44
+ with DataHubContext(graph):
45
+ return grep_documents(
46
+ urns=urn_list,
47
+ pattern=pattern,
48
+ context_chars=int(context_chars) if context_chars else 200,
49
+ max_matches_per_doc=int(max_matches_per_doc) if max_matches_per_doc else 5
50
+ )
51
+
52
+ except Exception as e:
53
+ return {
54
+ 'success': False,
55
+ 'error': str(e),
56
+ 'urns': urns,
57
+ 'pattern': pattern
58
+ }"""
59
+
60
+ return generate_python_udf_code(
61
+ function_name="GREP_DOCUMENTS",
62
+ parameters=[
63
+ ("urns", "STRING"),
64
+ ("pattern", "STRING"),
65
+ ("context_chars", "NUMBER"),
66
+ ("max_matches_per_doc", "NUMBER"),
67
+ ],
68
+ return_type="VARIANT",
69
+ function_body=function_body,
70
+ )