datahub-agent-context 1.3.1.10rc1__py3-none-any.whl → 1.4.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datahub_agent_context/__init__.py +11 -3
- datahub_agent_context/_version.py +1 -1
- datahub_agent_context/cli.py +152 -0
- datahub_agent_context/context.py +47 -34
- datahub_agent_context/langchain_tools/builder.py +6 -4
- datahub_agent_context/mcp_tools/base.py +6 -3
- datahub_agent_context/mcp_tools/save_document.py +634 -0
- datahub_agent_context/snowflake/__init__.py +0 -0
- datahub_agent_context/snowflake/generate_udfs.py +306 -0
- datahub_agent_context/snowflake/generators/__init__.py +21 -0
- datahub_agent_context/snowflake/generators/configuration.py +104 -0
- datahub_agent_context/snowflake/generators/cortex_agent.py +725 -0
- datahub_agent_context/snowflake/generators/network_rules.py +53 -0
- datahub_agent_context/snowflake/generators/stored_procedure.py +87 -0
- datahub_agent_context/snowflake/snowflake.py +662 -0
- datahub_agent_context/snowflake/udfs/__init__.py +1 -0
- datahub_agent_context/snowflake/udfs/add_glossary_terms.py +61 -0
- datahub_agent_context/snowflake/udfs/add_owners.py +59 -0
- datahub_agent_context/snowflake/udfs/add_structured_properties.py +57 -0
- datahub_agent_context/snowflake/udfs/add_tags.py +61 -0
- datahub_agent_context/snowflake/udfs/base.py +45 -0
- datahub_agent_context/snowflake/udfs/get_dataset_queries.py +68 -0
- datahub_agent_context/snowflake/udfs/get_entities.py +47 -0
- datahub_agent_context/snowflake/udfs/get_lineage.py +61 -0
- datahub_agent_context/snowflake/udfs/get_lineage_paths_between.py +69 -0
- datahub_agent_context/snowflake/udfs/get_me.py +51 -0
- datahub_agent_context/snowflake/udfs/grep_documents.py +70 -0
- datahub_agent_context/snowflake/udfs/list_schema_fields.py +80 -0
- datahub_agent_context/snowflake/udfs/remove_domains.py +45 -0
- datahub_agent_context/snowflake/udfs/remove_glossary_terms.py +57 -0
- datahub_agent_context/snowflake/udfs/remove_owners.py +56 -0
- datahub_agent_context/snowflake/udfs/remove_structured_properties.py +56 -0
- datahub_agent_context/snowflake/udfs/remove_tags.py +57 -0
- datahub_agent_context/snowflake/udfs/search_datahub.py +71 -0
- datahub_agent_context/snowflake/udfs/search_documents.py +58 -0
- datahub_agent_context/snowflake/udfs/set_domains.py +55 -0
- datahub_agent_context/snowflake/udfs/update_description.py +60 -0
- {datahub_agent_context-1.3.1.10rc1.dist-info → datahub_agent_context-1.4.0rc2.dist-info}/METADATA +21 -14
- datahub_agent_context-1.4.0rc2.dist-info/RECORD +66 -0
- datahub_agent_context-1.3.1.10rc1.dist-info/RECORD +0 -34
- {datahub_agent_context-1.3.1.10rc1.dist-info → datahub_agent_context-1.4.0rc2.dist-info}/WHEEL +0 -0
- {datahub_agent_context-1.3.1.10rc1.dist-info → datahub_agent_context-1.4.0rc2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
"""LIST_SCHEMA_FIELDS UDF generator."""
|
|
2
|
+
|
|
3
|
+
from datahub_agent_context.snowflake.udfs.base import generate_python_udf_code
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def generate_list_schema_fields_udf() -> str:
|
|
7
|
+
"""Generate LIST_SCHEMA_FIELDS UDF using datahub-agent-context.
|
|
8
|
+
|
|
9
|
+
This UDF wraps datahub_agent_context.mcp_tools.list_schema_fields() to list
|
|
10
|
+
schema fields for a dataset with optional keyword filtering and pagination.
|
|
11
|
+
|
|
12
|
+
Useful when schema fields were truncated in search results and you need to
|
|
13
|
+
explore specific columns. Supports pagination for large schemas.
|
|
14
|
+
|
|
15
|
+
Parameters:
|
|
16
|
+
dataset_urn (STRING): Dataset URN
|
|
17
|
+
keywords (STRING): Optional keywords to filter schema fields (OR matching).
|
|
18
|
+
Can be a JSON array string or a single keyword.
|
|
19
|
+
- Single string: Treated as one keyword for exact field name/phrase
|
|
20
|
+
- JSON array: Multiple keywords, matches any (OR logic)
|
|
21
|
+
- Empty/null: Returns all fields in priority order
|
|
22
|
+
Matches against fieldPath, description, label, tags, and glossary terms.
|
|
23
|
+
limit (NUMBER): Maximum number of fields to return (default: 100)
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
VARIANT: Dictionary with:
|
|
27
|
+
- urn: The dataset URN
|
|
28
|
+
- fields: List of schema fields (paginated)
|
|
29
|
+
- totalFields: Total number of fields in the schema
|
|
30
|
+
- returned: Number of fields actually returned
|
|
31
|
+
- remainingCount: Number of fields not included
|
|
32
|
+
- matchingCount: Number of fields that matched keywords (if provided)
|
|
33
|
+
|
|
34
|
+
Examples:
|
|
35
|
+
- Single keyword: LIST_SCHEMA_FIELDS(urn, 'user_email', 100)
|
|
36
|
+
- Multiple keywords: LIST_SCHEMA_FIELDS(urn, '["email", "user"]', 100)
|
|
37
|
+
- All fields: LIST_SCHEMA_FIELDS(urn, NULL, 100)
|
|
38
|
+
"""
|
|
39
|
+
function_body = """from datahub_agent_context.mcp_tools import list_schema_fields
|
|
40
|
+
import json
|
|
41
|
+
try:
|
|
42
|
+
datahub_url = _snowflake.get_generic_secret_string('datahub_url_secret')
|
|
43
|
+
datahub_token = _snowflake.get_generic_secret_string('datahub_token_secret')
|
|
44
|
+
datahub_url = datahub_url.rstrip('/')
|
|
45
|
+
|
|
46
|
+
graph = DataHubGraph(
|
|
47
|
+
config=DatahubClientConfig(server=datahub_url, token=datahub_token)
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
keyword_list = None
|
|
51
|
+
if keywords:
|
|
52
|
+
try:
|
|
53
|
+
keyword_list = json.loads(keywords) if isinstance(keywords, str) else keywords
|
|
54
|
+
except json.JSONDecodeError:
|
|
55
|
+
keyword_list = [keywords]
|
|
56
|
+
|
|
57
|
+
with DataHubContext(graph):
|
|
58
|
+
return list_schema_fields(
|
|
59
|
+
urn=dataset_urn,
|
|
60
|
+
keywords=keyword_list,
|
|
61
|
+
limit=limit
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
except Exception as e:
|
|
65
|
+
return {
|
|
66
|
+
'success': False,
|
|
67
|
+
'error': str(e),
|
|
68
|
+
'urn': dataset_urn
|
|
69
|
+
}"""
|
|
70
|
+
|
|
71
|
+
return generate_python_udf_code(
|
|
72
|
+
function_name="LIST_SCHEMA_FIELDS",
|
|
73
|
+
parameters=[
|
|
74
|
+
("dataset_urn", "STRING"),
|
|
75
|
+
("keywords", "STRING"),
|
|
76
|
+
("limit", "NUMBER"),
|
|
77
|
+
],
|
|
78
|
+
return_type="VARIANT",
|
|
79
|
+
function_body=function_body,
|
|
80
|
+
)
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
"""REMOVE_DOMAINS UDF generator."""
|
|
2
|
+
|
|
3
|
+
from datahub_agent_context.snowflake.udfs.base import generate_python_udf_code
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def generate_remove_domains_udf() -> str:
|
|
7
|
+
"""Generate REMOVE_DOMAINS UDF using datahub-agent-context.
|
|
8
|
+
|
|
9
|
+
This UDF wraps datahub_agent_context.mcp_tools.remove_domains() to remove
|
|
10
|
+
domain assignments from multiple DataHub entities.
|
|
11
|
+
|
|
12
|
+
Parameters:
|
|
13
|
+
entity_urns (STRING): JSON array of entity URNs to remove domain from
|
|
14
|
+
|
|
15
|
+
Returns:
|
|
16
|
+
VARIANT: Dictionary with success status and message
|
|
17
|
+
"""
|
|
18
|
+
function_body = """from datahub_agent_context.mcp_tools import remove_domains
|
|
19
|
+
import json
|
|
20
|
+
try:
|
|
21
|
+
datahub_url = _snowflake.get_generic_secret_string('datahub_url_secret')
|
|
22
|
+
datahub_token = _snowflake.get_generic_secret_string('datahub_token_secret')
|
|
23
|
+
datahub_url = datahub_url.rstrip('/')
|
|
24
|
+
|
|
25
|
+
graph = DataHubGraph(
|
|
26
|
+
config=DatahubClientConfig(server=datahub_url, token=datahub_token)
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
entity_urn_list = json.loads(entity_urns) if isinstance(entity_urns, str) else entity_urns
|
|
30
|
+
|
|
31
|
+
with DataHubContext(graph):
|
|
32
|
+
return remove_domains(entity_urns=entity_urn_list)
|
|
33
|
+
|
|
34
|
+
except Exception as e:
|
|
35
|
+
return {
|
|
36
|
+
'success': False,
|
|
37
|
+
'error': str(e)
|
|
38
|
+
}"""
|
|
39
|
+
|
|
40
|
+
return generate_python_udf_code(
|
|
41
|
+
function_name="REMOVE_DOMAINS",
|
|
42
|
+
parameters=[("entity_urns", "STRING")],
|
|
43
|
+
return_type="VARIANT",
|
|
44
|
+
function_body=function_body,
|
|
45
|
+
)
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
"""REMOVE_GLOSSARY_TERMS UDF generator."""
|
|
2
|
+
|
|
3
|
+
from datahub_agent_context.snowflake.udfs.base import generate_python_udf_code
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def generate_remove_glossary_terms_udf() -> str:
|
|
7
|
+
"""Generate REMOVE_GLOSSARY_TERMS UDF using datahub-agent-context.
|
|
8
|
+
|
|
9
|
+
This UDF wraps datahub_agent_context.mcp_tools.remove_glossary_terms() to
|
|
10
|
+
remove glossary terms from multiple DataHub entities or their columns.
|
|
11
|
+
|
|
12
|
+
Parameters:
|
|
13
|
+
term_urns (STRING): JSON array of glossary term URNs to remove
|
|
14
|
+
entity_urns (STRING): JSON array of entity URNs
|
|
15
|
+
column_paths (STRING): Optional JSON array of column names (use NULL for entity-level)
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
VARIANT: Dictionary with success status and message
|
|
19
|
+
"""
|
|
20
|
+
function_body = """from datahub_agent_context.mcp_tools import remove_glossary_terms
|
|
21
|
+
import json
|
|
22
|
+
try:
|
|
23
|
+
datahub_url = _snowflake.get_generic_secret_string('datahub_url_secret')
|
|
24
|
+
datahub_token = _snowflake.get_generic_secret_string('datahub_token_secret')
|
|
25
|
+
datahub_url = datahub_url.rstrip('/')
|
|
26
|
+
|
|
27
|
+
graph = DataHubGraph(
|
|
28
|
+
config=DatahubClientConfig(server=datahub_url, token=datahub_token)
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
term_urn_list = json.loads(term_urns) if isinstance(term_urns, str) else term_urns
|
|
32
|
+
entity_urn_list = json.loads(entity_urns) if isinstance(entity_urns, str) else entity_urns
|
|
33
|
+
column_path_list = json.loads(column_paths) if column_paths and isinstance(column_paths, str) else None
|
|
34
|
+
|
|
35
|
+
with DataHubContext(graph):
|
|
36
|
+
return remove_glossary_terms(
|
|
37
|
+
term_urns=term_urn_list,
|
|
38
|
+
entity_urns=entity_urn_list,
|
|
39
|
+
column_paths=column_path_list
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
except Exception as e:
|
|
43
|
+
return {
|
|
44
|
+
'success': False,
|
|
45
|
+
'error': str(e)
|
|
46
|
+
}"""
|
|
47
|
+
|
|
48
|
+
return generate_python_udf_code(
|
|
49
|
+
function_name="REMOVE_GLOSSARY_TERMS",
|
|
50
|
+
parameters=[
|
|
51
|
+
("term_urns", "STRING"),
|
|
52
|
+
("entity_urns", "STRING"),
|
|
53
|
+
("column_paths", "STRING"),
|
|
54
|
+
],
|
|
55
|
+
return_type="VARIANT",
|
|
56
|
+
function_body=function_body,
|
|
57
|
+
)
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
"""REMOVE_OWNERS UDF generator."""
|
|
2
|
+
|
|
3
|
+
from datahub_agent_context.snowflake.udfs.base import generate_python_udf_code
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def generate_remove_owners_udf() -> str:
|
|
7
|
+
"""Generate REMOVE_OWNERS UDF using datahub-agent-context.
|
|
8
|
+
|
|
9
|
+
This UDF wraps datahub_agent_context.mcp_tools.remove_owners() to remove
|
|
10
|
+
owners from multiple DataHub entities.
|
|
11
|
+
|
|
12
|
+
Parameters:
|
|
13
|
+
owner_urns (STRING): JSON array of owner URNs to remove
|
|
14
|
+
entity_urns (STRING): JSON array of entity URNs to remove ownership from
|
|
15
|
+
ownership_type_urn (STRING): Optional ownership type URN (use NULL for all types)
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
VARIANT: Dictionary with success status and message
|
|
19
|
+
"""
|
|
20
|
+
function_body = """from datahub_agent_context.mcp_tools import remove_owners
|
|
21
|
+
import json
|
|
22
|
+
try:
|
|
23
|
+
datahub_url = _snowflake.get_generic_secret_string('datahub_url_secret')
|
|
24
|
+
datahub_token = _snowflake.get_generic_secret_string('datahub_token_secret')
|
|
25
|
+
datahub_url = datahub_url.rstrip('/')
|
|
26
|
+
|
|
27
|
+
graph = DataHubGraph(
|
|
28
|
+
config=DatahubClientConfig(server=datahub_url, token=datahub_token)
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
owner_urn_list = json.loads(owner_urns) if isinstance(owner_urns, str) else owner_urns
|
|
32
|
+
entity_urn_list = json.loads(entity_urns) if isinstance(entity_urns, str) else entity_urns
|
|
33
|
+
|
|
34
|
+
with DataHubContext(graph):
|
|
35
|
+
return remove_owners(
|
|
36
|
+
owner_urns=owner_urn_list,
|
|
37
|
+
entity_urns=entity_urn_list,
|
|
38
|
+
ownership_type_urn=ownership_type_urn if ownership_type_urn else None
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
except Exception as e:
|
|
42
|
+
return {
|
|
43
|
+
'success': False,
|
|
44
|
+
'error': str(e)
|
|
45
|
+
}"""
|
|
46
|
+
|
|
47
|
+
return generate_python_udf_code(
|
|
48
|
+
function_name="REMOVE_OWNERS",
|
|
49
|
+
parameters=[
|
|
50
|
+
("owner_urns", "STRING"),
|
|
51
|
+
("entity_urns", "STRING"),
|
|
52
|
+
("ownership_type_urn", "STRING"),
|
|
53
|
+
],
|
|
54
|
+
return_type="VARIANT",
|
|
55
|
+
function_body=function_body,
|
|
56
|
+
)
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
"""REMOVE_STRUCTURED_PROPERTIES UDF generator."""
|
|
2
|
+
|
|
3
|
+
from datahub_agent_context.snowflake.udfs.base import generate_python_udf_code
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def generate_remove_structured_properties_udf() -> str:
|
|
7
|
+
"""Generate REMOVE_STRUCTURED_PROPERTIES UDF using datahub-agent-context.
|
|
8
|
+
|
|
9
|
+
This UDF wraps datahub_agent_context.mcp_tools.remove_structured_properties()
|
|
10
|
+
to remove structured properties from multiple DataHub entities.
|
|
11
|
+
|
|
12
|
+
Parameters:
|
|
13
|
+
property_urns (STRING): JSON array of structured property URNs to remove
|
|
14
|
+
entity_urns (STRING): JSON array of entity URNs to remove properties from
|
|
15
|
+
|
|
16
|
+
Returns:
|
|
17
|
+
VARIANT: Dictionary with success status and message
|
|
18
|
+
|
|
19
|
+
Example:
|
|
20
|
+
- Remove: REMOVE_STRUCTURED_PROPERTIES('["urn:li:structuredProperty:retentionTime"]', '["urn:li:dataset:(...)"]')
|
|
21
|
+
"""
|
|
22
|
+
function_body = """from datahub_agent_context.mcp_tools import remove_structured_properties
|
|
23
|
+
import json
|
|
24
|
+
try:
|
|
25
|
+
datahub_url = _snowflake.get_generic_secret_string('datahub_url_secret')
|
|
26
|
+
datahub_token = _snowflake.get_generic_secret_string('datahub_token_secret')
|
|
27
|
+
datahub_url = datahub_url.rstrip('/')
|
|
28
|
+
|
|
29
|
+
graph = DataHubGraph(
|
|
30
|
+
config=DatahubClientConfig(server=datahub_url, token=datahub_token)
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
property_urn_list = json.loads(property_urns) if isinstance(property_urns, str) else property_urns
|
|
34
|
+
entity_urn_list = json.loads(entity_urns) if isinstance(entity_urns, str) else entity_urns
|
|
35
|
+
|
|
36
|
+
with DataHubContext(graph):
|
|
37
|
+
return remove_structured_properties(
|
|
38
|
+
property_urns=property_urn_list,
|
|
39
|
+
entity_urns=entity_urn_list
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
except Exception as e:
|
|
43
|
+
return {
|
|
44
|
+
'success': False,
|
|
45
|
+
'error': str(e)
|
|
46
|
+
}"""
|
|
47
|
+
|
|
48
|
+
return generate_python_udf_code(
|
|
49
|
+
function_name="REMOVE_STRUCTURED_PROPERTIES",
|
|
50
|
+
parameters=[
|
|
51
|
+
("property_urns", "STRING"),
|
|
52
|
+
("entity_urns", "STRING"),
|
|
53
|
+
],
|
|
54
|
+
return_type="VARIANT",
|
|
55
|
+
function_body=function_body,
|
|
56
|
+
)
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
"""REMOVE_TAGS UDF generator."""
|
|
2
|
+
|
|
3
|
+
from datahub_agent_context.snowflake.udfs.base import generate_python_udf_code
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def generate_remove_tags_udf() -> str:
|
|
7
|
+
"""Generate REMOVE_TAGS UDF using datahub-agent-context.
|
|
8
|
+
|
|
9
|
+
This UDF wraps datahub_agent_context.mcp_tools.remove_tags() to remove tags
|
|
10
|
+
from multiple DataHub entities or their columns.
|
|
11
|
+
|
|
12
|
+
Parameters:
|
|
13
|
+
tag_urns (STRING): JSON array of tag URNs to remove
|
|
14
|
+
entity_urns (STRING): JSON array of entity URNs to untag
|
|
15
|
+
column_paths (STRING): Optional JSON array of column names (use NULL for entity-level)
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
VARIANT: Dictionary with success status and message
|
|
19
|
+
"""
|
|
20
|
+
function_body = """from datahub_agent_context.mcp_tools import remove_tags
|
|
21
|
+
import json
|
|
22
|
+
try:
|
|
23
|
+
datahub_url = _snowflake.get_generic_secret_string('datahub_url_secret')
|
|
24
|
+
datahub_token = _snowflake.get_generic_secret_string('datahub_token_secret')
|
|
25
|
+
datahub_url = datahub_url.rstrip('/')
|
|
26
|
+
|
|
27
|
+
graph = DataHubGraph(
|
|
28
|
+
config=DatahubClientConfig(server=datahub_url, token=datahub_token)
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
tag_urn_list = json.loads(tag_urns) if isinstance(tag_urns, str) else tag_urns
|
|
32
|
+
entity_urn_list = json.loads(entity_urns) if isinstance(entity_urns, str) else entity_urns
|
|
33
|
+
column_path_list = json.loads(column_paths) if column_paths and isinstance(column_paths, str) else None
|
|
34
|
+
|
|
35
|
+
with DataHubContext(graph):
|
|
36
|
+
return remove_tags(
|
|
37
|
+
tag_urns=tag_urn_list,
|
|
38
|
+
entity_urns=entity_urn_list,
|
|
39
|
+
column_paths=column_path_list
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
except Exception as e:
|
|
43
|
+
return {
|
|
44
|
+
'success': False,
|
|
45
|
+
'error': str(e)
|
|
46
|
+
}"""
|
|
47
|
+
|
|
48
|
+
return generate_python_udf_code(
|
|
49
|
+
function_name="REMOVE_TAGS",
|
|
50
|
+
parameters=[
|
|
51
|
+
("tag_urns", "STRING"),
|
|
52
|
+
("entity_urns", "STRING"),
|
|
53
|
+
("column_paths", "STRING"),
|
|
54
|
+
],
|
|
55
|
+
return_type="VARIANT",
|
|
56
|
+
function_body=function_body,
|
|
57
|
+
)
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
"""SEARCH_DATAHUB UDF generator."""
|
|
2
|
+
|
|
3
|
+
from datahub_agent_context.snowflake.udfs.base import generate_python_udf_code
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def generate_search_datahub_udf() -> str:
|
|
7
|
+
"""Generate SEARCH_DATAHUB UDF using datahub-agent-context.
|
|
8
|
+
|
|
9
|
+
This UDF wraps datahub_agent_context.mcp_tools.search() to enable searching
|
|
10
|
+
across DataHub entities using structured full-text search from Snowflake.
|
|
11
|
+
|
|
12
|
+
Search Syntax:
|
|
13
|
+
- Structured full-text search - always start queries with /q
|
|
14
|
+
- Use + operator for AND (handles punctuation better than quotes)
|
|
15
|
+
- Supports boolean logic: AND (default), OR, NOT, parentheses, field searches
|
|
16
|
+
- Examples:
|
|
17
|
+
• /q user+transaction → requires both terms
|
|
18
|
+
• /q wizard OR pet → entities containing either term
|
|
19
|
+
• /q revenue* → wildcard matching
|
|
20
|
+
• /q tag:PII → search by tag name
|
|
21
|
+
|
|
22
|
+
Parameters:
|
|
23
|
+
search_query (STRING): Search query string (use /q prefix for structured queries)
|
|
24
|
+
entity_type (STRING): Optional entity type filter as JSON array string
|
|
25
|
+
(e.g., '["dataset"]', '["dashboard", "chart"]')
|
|
26
|
+
Use NULL to search across all entity types
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
VARIANT: Dictionary with search results, facets, and metadata
|
|
30
|
+
"""
|
|
31
|
+
function_body = """from datahub_agent_context.mcp_tools import search
|
|
32
|
+
import json
|
|
33
|
+
try:
|
|
34
|
+
datahub_url = _snowflake.get_generic_secret_string('datahub_url_secret')
|
|
35
|
+
datahub_token = _snowflake.get_generic_secret_string('datahub_token_secret')
|
|
36
|
+
datahub_url = datahub_url.rstrip('/')
|
|
37
|
+
|
|
38
|
+
graph = DataHubGraph(
|
|
39
|
+
config=DatahubClientConfig(server=datahub_url, token=datahub_token)
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
# Parse entity_type filter if provided
|
|
43
|
+
filters = {}
|
|
44
|
+
if entity_type:
|
|
45
|
+
try:
|
|
46
|
+
entity_type_list = json.loads(entity_type) if isinstance(entity_type, str) else entity_type
|
|
47
|
+
filters["entity_type"] = entity_type_list
|
|
48
|
+
except json.JSONDecodeError:
|
|
49
|
+
# If not valid JSON, treat as single entity type
|
|
50
|
+
filters["entity_type"] = [entity_type]
|
|
51
|
+
|
|
52
|
+
with DataHubContext(graph):
|
|
53
|
+
return search(
|
|
54
|
+
query=search_query,
|
|
55
|
+
filters=filters if filters else None,
|
|
56
|
+
num_results=10
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
except Exception as e:
|
|
60
|
+
return {
|
|
61
|
+
'success': False,
|
|
62
|
+
'error': str(e),
|
|
63
|
+
'query': search_query
|
|
64
|
+
}"""
|
|
65
|
+
|
|
66
|
+
return generate_python_udf_code(
|
|
67
|
+
function_name="SEARCH_DATAHUB",
|
|
68
|
+
parameters=[("search_query", "STRING"), ("entity_type", "STRING")],
|
|
69
|
+
return_type="VARIANT",
|
|
70
|
+
function_body=function_body,
|
|
71
|
+
)
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
"""SEARCH_DOCUMENTS UDF generator."""
|
|
2
|
+
|
|
3
|
+
from datahub_agent_context.snowflake.udfs.base import generate_python_udf_code
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def generate_search_documents_udf() -> str:
|
|
7
|
+
"""Generate SEARCH_DOCUMENTS UDF using datahub-agent-context.
|
|
8
|
+
|
|
9
|
+
This UDF wraps datahub_agent_context.mcp_tools.search_documents() to search for
|
|
10
|
+
documents stored in DataHub (runbooks, FAQs, knowledge articles from Notion,
|
|
11
|
+
Confluence, etc.).
|
|
12
|
+
|
|
13
|
+
Returns document metadata without content. Use GET_ENTITIES with document URN
|
|
14
|
+
to retrieve full content.
|
|
15
|
+
|
|
16
|
+
Parameters:
|
|
17
|
+
search_query (STRING): Search query string (use /q prefix for structured queries)
|
|
18
|
+
num_results (NUMBER): Number of results to return (max 50, default: 10)
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
VARIANT: Dictionary with search results and facets showing document metadata
|
|
22
|
+
|
|
23
|
+
Examples:
|
|
24
|
+
- Find deployment docs: SEARCH_DOCUMENTS('deployment', 10)
|
|
25
|
+
- Structured search: SEARCH_DOCUMENTS('/q kubernetes OR k8s', 20)
|
|
26
|
+
"""
|
|
27
|
+
function_body = """from datahub_agent_context.mcp_tools import search_documents
|
|
28
|
+
try:
|
|
29
|
+
datahub_url = _snowflake.get_generic_secret_string('datahub_url_secret')
|
|
30
|
+
datahub_token = _snowflake.get_generic_secret_string('datahub_token_secret')
|
|
31
|
+
datahub_url = datahub_url.rstrip('/')
|
|
32
|
+
|
|
33
|
+
graph = DataHubGraph(
|
|
34
|
+
config=DatahubClientConfig(server=datahub_url, token=datahub_token)
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
with DataHubContext(graph):
|
|
38
|
+
return search_documents(
|
|
39
|
+
query=search_query,
|
|
40
|
+
num_results=int(num_results) if num_results else 10
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
except Exception as e:
|
|
44
|
+
return {
|
|
45
|
+
'success': False,
|
|
46
|
+
'error': str(e),
|
|
47
|
+
'query': search_query
|
|
48
|
+
}"""
|
|
49
|
+
|
|
50
|
+
return generate_python_udf_code(
|
|
51
|
+
function_name="SEARCH_DOCUMENTS",
|
|
52
|
+
parameters=[
|
|
53
|
+
("search_query", "STRING"),
|
|
54
|
+
("num_results", "NUMBER"),
|
|
55
|
+
],
|
|
56
|
+
return_type="VARIANT",
|
|
57
|
+
function_body=function_body,
|
|
58
|
+
)
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
"""SET_DOMAINS UDF generator."""
|
|
2
|
+
|
|
3
|
+
from datahub_agent_context.snowflake.udfs.base import generate_python_udf_code
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def generate_set_domains_udf() -> str:
|
|
7
|
+
"""Generate SET_DOMAINS UDF using datahub-agent-context.
|
|
8
|
+
|
|
9
|
+
This UDF wraps datahub_agent_context.mcp_tools.set_domains() to assign a domain
|
|
10
|
+
to multiple DataHub entities.
|
|
11
|
+
|
|
12
|
+
Parameters:
|
|
13
|
+
domain_urn (STRING): Domain URN to assign (e.g., 'urn:li:domain:marketing')
|
|
14
|
+
entity_urns (STRING): JSON array of entity URNs to assign to the domain
|
|
15
|
+
|
|
16
|
+
Returns:
|
|
17
|
+
VARIANT: Dictionary with success status and message
|
|
18
|
+
|
|
19
|
+
Example:
|
|
20
|
+
- Set domain: SET_DOMAINS('urn:li:domain:marketing', '["urn:li:dataset:(...)"]')
|
|
21
|
+
"""
|
|
22
|
+
function_body = """from datahub_agent_context.mcp_tools import set_domains
|
|
23
|
+
import json
|
|
24
|
+
try:
|
|
25
|
+
datahub_url = _snowflake.get_generic_secret_string('datahub_url_secret')
|
|
26
|
+
datahub_token = _snowflake.get_generic_secret_string('datahub_token_secret')
|
|
27
|
+
datahub_url = datahub_url.rstrip('/')
|
|
28
|
+
|
|
29
|
+
graph = DataHubGraph(
|
|
30
|
+
config=DatahubClientConfig(server=datahub_url, token=datahub_token)
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
entity_urn_list = json.loads(entity_urns) if isinstance(entity_urns, str) else entity_urns
|
|
34
|
+
|
|
35
|
+
with DataHubContext(graph):
|
|
36
|
+
return set_domains(
|
|
37
|
+
domain_urn=domain_urn,
|
|
38
|
+
entity_urns=entity_urn_list
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
except Exception as e:
|
|
42
|
+
return {
|
|
43
|
+
'success': False,
|
|
44
|
+
'error': str(e)
|
|
45
|
+
}"""
|
|
46
|
+
|
|
47
|
+
return generate_python_udf_code(
|
|
48
|
+
function_name="SET_DOMAINS",
|
|
49
|
+
parameters=[
|
|
50
|
+
("domain_urn", "STRING"),
|
|
51
|
+
("entity_urns", "STRING"),
|
|
52
|
+
],
|
|
53
|
+
return_type="VARIANT",
|
|
54
|
+
function_body=function_body,
|
|
55
|
+
)
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
"""UPDATE_DESCRIPTION UDF generator."""
|
|
2
|
+
|
|
3
|
+
from datahub_agent_context.snowflake.udfs.base import generate_python_udf_code
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def generate_update_description_udf() -> str:
|
|
7
|
+
"""Generate UPDATE_DESCRIPTION UDF using datahub-agent-context.
|
|
8
|
+
|
|
9
|
+
This UDF wraps datahub_agent_context.mcp_tools.update_description() to update
|
|
10
|
+
descriptions for DataHub entities or their columns.
|
|
11
|
+
|
|
12
|
+
Parameters:
|
|
13
|
+
entity_urn (STRING): Entity URN to update
|
|
14
|
+
operation (STRING): Operation type - 'replace', 'append', or 'remove'
|
|
15
|
+
description (STRING): Description text (not needed for 'remove')
|
|
16
|
+
column_path (STRING): Optional column name (use NULL for entity-level)
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
VARIANT: Dictionary with success status and message
|
|
20
|
+
|
|
21
|
+
Examples:
|
|
22
|
+
- Replace: UPDATE_DESCRIPTION(urn, 'replace', 'New description', NULL)
|
|
23
|
+
- Append: UPDATE_DESCRIPTION(urn, 'append', ' (PII)', 'email')
|
|
24
|
+
- Remove: UPDATE_DESCRIPTION(urn, 'remove', NULL, 'old_field')
|
|
25
|
+
"""
|
|
26
|
+
function_body = """from datahub_agent_context.mcp_tools import update_description
|
|
27
|
+
try:
|
|
28
|
+
datahub_url = _snowflake.get_generic_secret_string('datahub_url_secret')
|
|
29
|
+
datahub_token = _snowflake.get_generic_secret_string('datahub_token_secret')
|
|
30
|
+
datahub_url = datahub_url.rstrip('/')
|
|
31
|
+
|
|
32
|
+
graph = DataHubGraph(
|
|
33
|
+
config=DatahubClientConfig(server=datahub_url, token=datahub_token)
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
with DataHubContext(graph):
|
|
37
|
+
return update_description(
|
|
38
|
+
entity_urn=entity_urn,
|
|
39
|
+
operation=operation,
|
|
40
|
+
description=description if description else None,
|
|
41
|
+
column_path=column_path if column_path else None
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
except Exception as e:
|
|
45
|
+
return {
|
|
46
|
+
'success': False,
|
|
47
|
+
'error': str(e)
|
|
48
|
+
}"""
|
|
49
|
+
|
|
50
|
+
return generate_python_udf_code(
|
|
51
|
+
function_name="UPDATE_DESCRIPTION",
|
|
52
|
+
parameters=[
|
|
53
|
+
("entity_urn", "STRING"),
|
|
54
|
+
("operation", "STRING"),
|
|
55
|
+
("description", "STRING"),
|
|
56
|
+
("column_path", "STRING"),
|
|
57
|
+
],
|
|
58
|
+
return_type="VARIANT",
|
|
59
|
+
function_body=function_body,
|
|
60
|
+
)
|
{datahub_agent_context-1.3.1.10rc1.dist-info → datahub_agent_context-1.4.0rc2.dist-info}/METADATA
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datahub-agent-context
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.4.0rc2
|
|
4
4
|
Summary: DataHub Agent Context - MCP Tools for AI Agents
|
|
5
5
|
Home-page: https://datahub.io/
|
|
6
6
|
License: Apache License 2.0
|
|
@@ -28,27 +28,33 @@ Classifier: Environment :: MacOS X
|
|
|
28
28
|
Classifier: Topic :: Software Development
|
|
29
29
|
Requires-Python: >=3.9
|
|
30
30
|
Description-Content-Type: text/markdown
|
|
31
|
-
Requires-Dist:
|
|
32
|
-
Requires-Dist: json-repair<1.0.0,>=0.25.0
|
|
33
|
-
Requires-Dist: pydantic<3.0.0,>=2.0.0
|
|
31
|
+
Requires-Dist: h11<1.0,>=0.16
|
|
34
32
|
Requires-Dist: google-re2<2.0,>=1.0
|
|
35
|
-
Requires-Dist:
|
|
33
|
+
Requires-Dist: acryl-datahub==1.4.0rc2
|
|
34
|
+
Requires-Dist: pydantic<3.0.0,>=2.0.0
|
|
36
35
|
Requires-Dist: httpcore<2.0,>=1.0.9
|
|
37
|
-
Requires-Dist:
|
|
38
|
-
Requires-Dist:
|
|
36
|
+
Requires-Dist: jmespath<2.0.0,>=1.0.0
|
|
37
|
+
Requires-Dist: json-repair<1.0.0,>=0.25.0
|
|
38
|
+
Requires-Dist: cachetools<7.0.0,>=5.0.0
|
|
39
39
|
Provides-Extra: dev
|
|
40
|
-
Requires-Dist: mypy==1.17.1; extra == "dev"
|
|
41
40
|
Requires-Dist: ruff==0.11.7; extra == "dev"
|
|
41
|
+
Requires-Dist: langchain-core<2.0.0,>=1.2.7; extra == "dev"
|
|
42
|
+
Requires-Dist: snowflake-connector-python<4.0.0,>=3.0.0; extra == "dev"
|
|
43
|
+
Requires-Dist: pytest-cov<7.0.0,>=2.8.0; extra == "dev"
|
|
42
44
|
Requires-Dist: types-cachetools<7.0.0,>=5.0.0; extra == "dev"
|
|
43
|
-
Requires-Dist: types-requests<3.0.0,>=2.0.0; extra == "dev"
|
|
44
45
|
Requires-Dist: types-toml<1.0.0,>=0.10.0; extra == "dev"
|
|
45
|
-
Requires-Dist:
|
|
46
|
+
Requires-Dist: tox<5.0.0,>=4.0.0; extra == "dev"
|
|
47
|
+
Requires-Dist: click<9.0.0,>=8.0.0; extra == "dev"
|
|
48
|
+
Requires-Dist: types-requests<3.0.0,>=2.0.0; extra == "dev"
|
|
49
|
+
Requires-Dist: mypy==1.17.1; extra == "dev"
|
|
46
50
|
Requires-Dist: types-PyYAML<7.0.0,>=6.0.0; extra == "dev"
|
|
47
|
-
Requires-Dist: pytest-cov<7.0.0,>=2.8.0; extra == "dev"
|
|
48
51
|
Requires-Dist: pytest<9.0.0,>=8.3.4; extra == "dev"
|
|
49
|
-
Requires-Dist:
|
|
52
|
+
Requires-Dist: types-jmespath<2.0.0,>=1.0.0; extra == "dev"
|
|
50
53
|
Provides-Extra: langchain
|
|
51
54
|
Requires-Dist: langchain-core<2.0.0,>=1.2.7; extra == "langchain"
|
|
55
|
+
Provides-Extra: snowflake
|
|
56
|
+
Requires-Dist: snowflake-connector-python<4.0.0,>=3.0.0; extra == "snowflake"
|
|
57
|
+
Requires-Dist: click<9.0.0,>=8.0.0; extra == "snowflake"
|
|
52
58
|
Dynamic: classifier
|
|
53
59
|
Dynamic: description
|
|
54
60
|
Dynamic: description-content-type
|
|
@@ -105,7 +111,7 @@ from datahub_agent_context.mcp_tools.entities import get_entities
|
|
|
105
111
|
client = DataHubClient.from_env()
|
|
106
112
|
|
|
107
113
|
# Search for datasets
|
|
108
|
-
with client
|
|
114
|
+
with client as client:
|
|
109
115
|
results = search(
|
|
110
116
|
query="user_data",
|
|
111
117
|
filters={"entity_type": ["dataset"]},
|
|
@@ -113,7 +119,7 @@ with client.graph as graph:
|
|
|
113
119
|
)
|
|
114
120
|
|
|
115
121
|
# Get detailed entity information
|
|
116
|
-
with client
|
|
122
|
+
with client as client:
|
|
117
123
|
entities = get_entities(
|
|
118
124
|
urns=[result["entity"]["urn"] for result in results["searchResults"]]
|
|
119
125
|
)
|
|
@@ -175,6 +181,7 @@ agent = create_agent(model, tools=tools, system_prompt="...")
|
|
|
175
181
|
- `add_owners()`, `remove_owners()` - Manage owners
|
|
176
182
|
- `add_glossary_terms()`, `remove_glossary_terms()` - Manage glossary terms
|
|
177
183
|
- `add_structured_properties()`, `remove_structured_properties()` - Manage structured properties
|
|
184
|
+
- `save_document()` - Save or update a Document.
|
|
178
185
|
|
|
179
186
|
#### User Tools
|
|
180
187
|
|