datahub-agent-context 1.4.0rc1__py3-none-any.whl → 1.4.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datahub_agent_context/__init__.py +11 -3
- datahub_agent_context/_version.py +1 -1
- datahub_agent_context/context.py +47 -34
- datahub_agent_context/langchain_tools/builder.py +6 -4
- datahub_agent_context/mcp_tools/save_document.py +634 -0
- {datahub_agent_context-1.4.0rc1.dist-info → datahub_agent_context-1.4.0rc2.dist-info}/METADATA +20 -19
- {datahub_agent_context-1.4.0rc1.dist-info → datahub_agent_context-1.4.0rc2.dist-info}/RECORD +9 -8
- {datahub_agent_context-1.4.0rc1.dist-info → datahub_agent_context-1.4.0rc2.dist-info}/WHEEL +0 -0
- {datahub_agent_context-1.4.0rc1.dist-info → datahub_agent_context-1.4.0rc2.dist-info}/top_level.txt +0 -0
|
@@ -17,9 +17,17 @@
|
|
|
17
17
|
from datahub_agent_context._version import __version__
|
|
18
18
|
from datahub_agent_context.context import (
|
|
19
19
|
DataHubContext,
|
|
20
|
+
get_datahub_client,
|
|
20
21
|
get_graph,
|
|
21
|
-
|
|
22
|
-
|
|
22
|
+
reset_client,
|
|
23
|
+
set_client,
|
|
23
24
|
)
|
|
24
25
|
|
|
25
|
-
__all__ = [
|
|
26
|
+
__all__ = [
|
|
27
|
+
"__version__",
|
|
28
|
+
"DataHubContext",
|
|
29
|
+
"get_datahub_client",
|
|
30
|
+
"get_graph",
|
|
31
|
+
"set_client",
|
|
32
|
+
"reset_client",
|
|
33
|
+
]
|
datahub_agent_context/context.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"""Context management for DataHub tools.
|
|
2
2
|
|
|
3
|
-
This module provides a context manager pattern for managing
|
|
3
|
+
This module provides a context manager pattern for managing DataHubClient instances
|
|
4
4
|
across tool calls without explicit parameter passing.
|
|
5
5
|
"""
|
|
6
6
|
|
|
@@ -9,56 +9,69 @@ from typing import TYPE_CHECKING, Optional
|
|
|
9
9
|
|
|
10
10
|
if TYPE_CHECKING:
|
|
11
11
|
from datahub.ingestion.graph.client import DataHubGraph
|
|
12
|
+
from datahub.sdk.main_client import DataHubClient
|
|
12
13
|
|
|
13
|
-
# Context variable to store the current
|
|
14
|
-
|
|
15
|
-
contextvars.ContextVar("
|
|
14
|
+
# Context variable to store the current DataHubClient instance
|
|
15
|
+
_client_context: contextvars.ContextVar[Optional["DataHubClient"]] = (
|
|
16
|
+
contextvars.ContextVar("datahub_client", default=None)
|
|
16
17
|
)
|
|
17
18
|
|
|
18
19
|
|
|
19
|
-
def
|
|
20
|
-
"""Get the current
|
|
20
|
+
def get_datahub_client() -> "DataHubClient":
|
|
21
|
+
"""Get the current DataHubClient from context.
|
|
21
22
|
|
|
22
23
|
Returns:
|
|
23
|
-
|
|
24
|
+
DataHubClient instance from context
|
|
24
25
|
|
|
25
26
|
Raises:
|
|
26
|
-
RuntimeError: If no
|
|
27
|
+
RuntimeError: If no client is set in context
|
|
27
28
|
"""
|
|
28
|
-
|
|
29
|
-
if
|
|
29
|
+
client = _client_context.get()
|
|
30
|
+
if client is None:
|
|
30
31
|
raise RuntimeError(
|
|
31
|
-
"No
|
|
32
|
-
"Make sure to use DataHubContext context manager or
|
|
32
|
+
"No DataHubClient in context. "
|
|
33
|
+
"Make sure to use DataHubContext context manager or set_client() before calling tools."
|
|
33
34
|
)
|
|
34
|
-
return
|
|
35
|
+
return client
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def get_graph() -> "DataHubGraph":
|
|
39
|
+
"""Get the current DataHubGraph from context (convenience method).
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
DataHubGraph instance from the client in context
|
|
43
|
+
|
|
44
|
+
Raises:
|
|
45
|
+
RuntimeError: If no client is set in context
|
|
46
|
+
"""
|
|
47
|
+
return get_datahub_client()._graph
|
|
35
48
|
|
|
36
49
|
|
|
37
|
-
def
|
|
38
|
-
"""Set the
|
|
50
|
+
def set_client(client: "DataHubClient") -> contextvars.Token:
|
|
51
|
+
"""Set the DataHubClient in context.
|
|
39
52
|
|
|
40
53
|
Args:
|
|
41
|
-
|
|
54
|
+
client: DataHubClient instance to set
|
|
42
55
|
|
|
43
56
|
Returns:
|
|
44
57
|
Token that can be used to reset the context
|
|
45
58
|
"""
|
|
46
|
-
return
|
|
59
|
+
return _client_context.set(client)
|
|
47
60
|
|
|
48
61
|
|
|
49
|
-
def
|
|
50
|
-
"""Reset the
|
|
62
|
+
def reset_client(token: contextvars.Token) -> None:
|
|
63
|
+
"""Reset the DataHubClient context to its previous value.
|
|
51
64
|
|
|
52
65
|
Args:
|
|
53
|
-
token: Token returned by
|
|
66
|
+
token: Token returned by set_client()
|
|
54
67
|
"""
|
|
55
|
-
|
|
68
|
+
_client_context.reset(token)
|
|
56
69
|
|
|
57
70
|
|
|
58
71
|
class DataHubContext:
|
|
59
72
|
"""Context manager for DataHub tool execution.
|
|
60
73
|
|
|
61
|
-
This context manager sets the
|
|
74
|
+
This context manager sets the DataHubClient in context for the duration
|
|
62
75
|
of the with block, allowing tools to access it without explicit parameter passing.
|
|
63
76
|
|
|
64
77
|
Example:
|
|
@@ -68,30 +81,30 @@ class DataHubContext:
|
|
|
68
81
|
|
|
69
82
|
client = DataHubClient(...)
|
|
70
83
|
|
|
71
|
-
with DataHubContext(client
|
|
72
|
-
results = search(query="users") # No
|
|
84
|
+
with DataHubContext(client):
|
|
85
|
+
results = search(query="users") # No client parameter needed!
|
|
73
86
|
"""
|
|
74
87
|
|
|
75
|
-
def __init__(self,
|
|
88
|
+
def __init__(self, client: "DataHubClient"):
|
|
76
89
|
"""Initialize the context manager.
|
|
77
90
|
|
|
78
91
|
Args:
|
|
79
|
-
|
|
92
|
+
client: DataHubClient instance to use in this context
|
|
80
93
|
"""
|
|
81
|
-
self.
|
|
94
|
+
self.client = client
|
|
82
95
|
self._token: Optional[contextvars.Token] = None
|
|
83
96
|
|
|
84
|
-
def __enter__(self) -> "
|
|
85
|
-
"""Enter the context and set the
|
|
97
|
+
def __enter__(self) -> "DataHubClient":
|
|
98
|
+
"""Enter the context and set the client.
|
|
86
99
|
|
|
87
100
|
Returns:
|
|
88
|
-
The
|
|
101
|
+
The DataHubClient instance
|
|
89
102
|
"""
|
|
90
|
-
self._token =
|
|
91
|
-
return self.
|
|
103
|
+
self._token = set_client(self.client)
|
|
104
|
+
return self.client
|
|
92
105
|
|
|
93
106
|
def __exit__(self, exc_type, exc_val, exc_tb) -> None:
|
|
94
|
-
"""Exit the context and reset the
|
|
107
|
+
"""Exit the context and reset the client."""
|
|
95
108
|
if self._token is not None:
|
|
96
|
-
|
|
109
|
+
reset_client(self._token)
|
|
97
110
|
self._token = None
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
import functools
|
|
4
4
|
from typing import TYPE_CHECKING, Callable
|
|
5
5
|
|
|
6
|
-
from datahub_agent_context.context import
|
|
6
|
+
from datahub_agent_context.context import set_client
|
|
7
7
|
from datahub_agent_context.mcp_tools import get_me
|
|
8
8
|
from datahub_agent_context.mcp_tools.documents import grep_documents, search_documents
|
|
9
9
|
from datahub_agent_context.mcp_tools.domains import remove_domains, set_domains
|
|
@@ -32,6 +32,7 @@ from datahub_agent_context.mcp_tools.lineage import (
|
|
|
32
32
|
)
|
|
33
33
|
from datahub_agent_context.mcp_tools.owners import add_owners, remove_owners
|
|
34
34
|
from datahub_agent_context.mcp_tools.queries import get_dataset_queries
|
|
35
|
+
from datahub_agent_context.mcp_tools.save_document import save_document
|
|
35
36
|
from datahub_agent_context.mcp_tools.search import search
|
|
36
37
|
from datahub_agent_context.mcp_tools.tags import add_tags, remove_tags
|
|
37
38
|
from datahub_agent_context.mcp_tools.terms import (
|
|
@@ -57,14 +58,14 @@ def create_context_wrapper(func: Callable, client: "DataHubClient") -> Callable:
|
|
|
57
58
|
@functools.wraps(func)
|
|
58
59
|
def wrapper(*args, **kwargs):
|
|
59
60
|
# Set graph in context for this function call
|
|
60
|
-
token =
|
|
61
|
+
token = set_client(client)
|
|
61
62
|
try:
|
|
62
63
|
return func(*args, **kwargs)
|
|
63
64
|
finally:
|
|
64
65
|
# Always reset context, even if function raises
|
|
65
|
-
from datahub_agent_context.context import
|
|
66
|
+
from datahub_agent_context.context import reset_client
|
|
66
67
|
|
|
67
|
-
|
|
68
|
+
reset_client(token)
|
|
68
69
|
|
|
69
70
|
return wrapper
|
|
70
71
|
|
|
@@ -123,5 +124,6 @@ def build_langchain_tools(
|
|
|
123
124
|
tools.append(tool(create_context_wrapper(remove_tags, client)))
|
|
124
125
|
tools.append(tool(create_context_wrapper(add_glossary_terms, client)))
|
|
125
126
|
tools.append(tool(create_context_wrapper(remove_glossary_terms, client)))
|
|
127
|
+
tools.append(tool(create_context_wrapper(save_document, client)))
|
|
126
128
|
|
|
127
129
|
return tools
|
|
@@ -0,0 +1,634 @@
|
|
|
1
|
+
"""Document saving tool for DataHub MCP server.
|
|
2
|
+
|
|
3
|
+
This tool enables AI agents to save documents to DataHub's knowledge base.
|
|
4
|
+
Documents are organized under a configurable parent folder (default: "Shared"),
|
|
5
|
+
optionally with per-user subfolders for organization.
|
|
6
|
+
|
|
7
|
+
Configuration via environment variables:
|
|
8
|
+
- SAVE_DOCUMENT_TOOL_ENABLED: Set to "false" to disable this tool (default: enabled). Also requires TOOLS_IS_MUTATION_ENABLED enabled.
|
|
9
|
+
- SAVE_DOCUMENT_PARENT_TITLE: Custom title for the parent folder (default: "Shared")
|
|
10
|
+
- SAVE_DOCUMENT_ORGANIZE_BY_USER: Set to "true" to enable per-user organization (default: false)
|
|
11
|
+
- SAVE_DOCUMENT_RESTRICT_UPDATES: Set to "false" to allow updating any document (default: true - only agent-created docs can be updated)
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import logging
|
|
15
|
+
import os
|
|
16
|
+
import re
|
|
17
|
+
import uuid
|
|
18
|
+
from datetime import datetime
|
|
19
|
+
from typing import Dict, List, Literal, Optional, Tuple
|
|
20
|
+
|
|
21
|
+
from datahub.metadata import schema_classes as models
|
|
22
|
+
from datahub.sdk import Document
|
|
23
|
+
from datahub_agent_context.context import get_datahub_client
|
|
24
|
+
from datahub_agent_context.mcp_tools.base import execute_graphql
|
|
25
|
+
|
|
26
|
+
logger = logging.getLogger(__name__)
|
|
27
|
+
|
|
28
|
+
# Fixed root parent document ID - independent of title for future flexibility
|
|
29
|
+
ROOT_PARENT_DOC_ID = "__system_shared_documents"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _get_parent_title() -> str:
|
|
33
|
+
"""Get the configurable parent document title from environment."""
|
|
34
|
+
return os.environ.get("SAVE_DOCUMENT_PARENT_TITLE", "Shared")
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _is_organize_by_user_enabled() -> bool:
|
|
38
|
+
"""Check if per-user organization is enabled (default: False)."""
|
|
39
|
+
value = os.environ.get("SAVE_DOCUMENT_ORGANIZE_BY_USER", "false")
|
|
40
|
+
return value.lower() in ("true", "1", "yes")
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _restrict_updates_to_shared_folder() -> bool:
|
|
44
|
+
"""Check if updates should be restricted to the shared folder (default: True).
|
|
45
|
+
|
|
46
|
+
When enabled, only documents inside the shared folder can be updated.
|
|
47
|
+
This prevents accidental modification of user-created or imported documents.
|
|
48
|
+
"""
|
|
49
|
+
value = os.environ.get("SAVE_DOCUMENT_RESTRICT_UPDATES", "true")
|
|
50
|
+
return value.lower() in ("true", "1", "yes")
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _make_safe_id(text: str, max_length: int = 30) -> str:
|
|
54
|
+
"""Convert text to a safe ID string."""
|
|
55
|
+
safe_id = "".join(c if c.isalnum() else "-" for c in text.lower())[:max_length]
|
|
56
|
+
safe_id = re.sub(r"-+", "-", safe_id) # Collapse multiple dashes
|
|
57
|
+
return safe_id.strip("-")
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _get_root_parent_id() -> str:
|
|
61
|
+
"""Get the root parent document ID.
|
|
62
|
+
|
|
63
|
+
Uses a fixed ID independent of the display title to allow changing
|
|
64
|
+
the title without requiring data migration.
|
|
65
|
+
"""
|
|
66
|
+
return ROOT_PARENT_DOC_ID
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _get_root_parent_urn() -> str:
|
|
70
|
+
"""Get the root parent document URN."""
|
|
71
|
+
return f"urn:li:document:{_get_root_parent_id()}"
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
# Supported document types (subtypes)
|
|
75
|
+
DocumentType = Literal[
|
|
76
|
+
"Insight",
|
|
77
|
+
"Decision",
|
|
78
|
+
"FAQ",
|
|
79
|
+
"Analysis",
|
|
80
|
+
"Summary",
|
|
81
|
+
"Recommendation",
|
|
82
|
+
"Note",
|
|
83
|
+
"Context",
|
|
84
|
+
]
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _get_current_user_info() -> Optional[Dict]:
|
|
88
|
+
"""Fetch the current authenticated user's information."""
|
|
89
|
+
|
|
90
|
+
client = get_datahub_client()
|
|
91
|
+
|
|
92
|
+
query = """
|
|
93
|
+
query getMe {
|
|
94
|
+
me {
|
|
95
|
+
corpUser {
|
|
96
|
+
urn
|
|
97
|
+
username
|
|
98
|
+
info {
|
|
99
|
+
displayName
|
|
100
|
+
fullName
|
|
101
|
+
firstName
|
|
102
|
+
lastName
|
|
103
|
+
}
|
|
104
|
+
editableProperties {
|
|
105
|
+
displayName
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
"""
|
|
111
|
+
|
|
112
|
+
try:
|
|
113
|
+
result = execute_graphql(
|
|
114
|
+
client._graph,
|
|
115
|
+
query=query,
|
|
116
|
+
variables={},
|
|
117
|
+
operation_name="getMe",
|
|
118
|
+
)
|
|
119
|
+
me_data = result.get("me", {})
|
|
120
|
+
return me_data.get("corpUser") if me_data else None
|
|
121
|
+
except Exception as e:
|
|
122
|
+
logger.warning(f"Failed to get current user info: {e}")
|
|
123
|
+
return None
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def _get_user_display_name(user_info: Optional[Dict]) -> str:
|
|
127
|
+
"""Extract the best display name from user info."""
|
|
128
|
+
if not user_info:
|
|
129
|
+
return "Unknown User"
|
|
130
|
+
|
|
131
|
+
# Try editable displayName first, then info fields
|
|
132
|
+
editable = user_info.get("editableProperties") or {}
|
|
133
|
+
info = user_info.get("info") or {}
|
|
134
|
+
|
|
135
|
+
return (
|
|
136
|
+
editable.get("displayName")
|
|
137
|
+
or info.get("displayName")
|
|
138
|
+
or info.get("fullName")
|
|
139
|
+
or f"{info.get('firstName', '')} {info.get('lastName', '')}".strip()
|
|
140
|
+
or user_info.get("username")
|
|
141
|
+
or "Unknown User"
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def _generate_document_id() -> str:
|
|
146
|
+
"""Generate a unique document ID using UUID.
|
|
147
|
+
|
|
148
|
+
Each save creates a new document with a unique ID.
|
|
149
|
+
Format: shared-<uuid>
|
|
150
|
+
"""
|
|
151
|
+
unique_id = str(uuid.uuid4())
|
|
152
|
+
return f"shared-{unique_id}"
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def _is_document_in_shared_folder(document_urn: str) -> Tuple[bool, Optional[str]]:
|
|
156
|
+
"""Check if a document is within the shared documents folder.
|
|
157
|
+
|
|
158
|
+
Simple validation: document must have the shared folder as a parent/ancestor.
|
|
159
|
+
|
|
160
|
+
Returns:
|
|
161
|
+
Tuple of (is_valid, error_message)
|
|
162
|
+
- (True, None) if document is in the shared folder
|
|
163
|
+
- (False, error_message) if document is outside the folder
|
|
164
|
+
"""
|
|
165
|
+
|
|
166
|
+
client = get_datahub_client()
|
|
167
|
+
root_parent_urn = _get_root_parent_urn()
|
|
168
|
+
|
|
169
|
+
# Can't update the root folder itself
|
|
170
|
+
if document_urn == root_parent_urn:
|
|
171
|
+
return False, (
|
|
172
|
+
"Cannot update the root shared documents folder. "
|
|
173
|
+
"Only documents within this folder can be updated."
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
try:
|
|
177
|
+
# Fetch the document
|
|
178
|
+
doc = client.entities.get(document_urn)
|
|
179
|
+
logger.debug(
|
|
180
|
+
f"Validating document {document_urn} for update, fetched: {doc is not None}"
|
|
181
|
+
)
|
|
182
|
+
if doc is None:
|
|
183
|
+
# Document doesn't exist yet - allow (will be created)
|
|
184
|
+
logger.debug(f"Document {document_urn} does not exist, allowing update")
|
|
185
|
+
return True, None
|
|
186
|
+
|
|
187
|
+
# Get documentInfo aspect
|
|
188
|
+
aspects = getattr(doc, "aspects", None) or getattr(doc, "_aspects", {})
|
|
189
|
+
logger.debug(
|
|
190
|
+
f"Document aspects type: {type(aspects)}, keys: {list(aspects.keys()) if isinstance(aspects, dict) else 'N/A'}"
|
|
191
|
+
)
|
|
192
|
+
doc_info = aspects.get("documentInfo") if isinstance(aspects, dict) else None
|
|
193
|
+
|
|
194
|
+
if doc_info is None:
|
|
195
|
+
logger.debug(f"Document {document_urn} has no documentInfo aspect")
|
|
196
|
+
return False, (
|
|
197
|
+
f"Document '{document_urn}' has no document info. "
|
|
198
|
+
"Cannot verify it's in the shared folder."
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
# Walk up the parent chain looking for the shared folder
|
|
202
|
+
# parentDocument is a ParentDocumentClass with a .document field containing the URN
|
|
203
|
+
parent_doc_obj = getattr(doc_info, "parentDocument", None)
|
|
204
|
+
logger.debug(f"Document parentDocument object: {parent_doc_obj}")
|
|
205
|
+
current_parent_urn = (
|
|
206
|
+
getattr(parent_doc_obj, "document", None) if parent_doc_obj else None
|
|
207
|
+
)
|
|
208
|
+
logger.debug(
|
|
209
|
+
f"Document parent URN: {current_parent_urn}, looking for root: {root_parent_urn}"
|
|
210
|
+
)
|
|
211
|
+
visited = set()
|
|
212
|
+
|
|
213
|
+
while current_parent_urn:
|
|
214
|
+
if current_parent_urn in visited:
|
|
215
|
+
break
|
|
216
|
+
visited.add(current_parent_urn)
|
|
217
|
+
|
|
218
|
+
# Found the shared folder - document is valid
|
|
219
|
+
if current_parent_urn == root_parent_urn:
|
|
220
|
+
return True, None
|
|
221
|
+
|
|
222
|
+
# Fetch parent and continue walking up
|
|
223
|
+
try:
|
|
224
|
+
parent_doc = client.entities.get(current_parent_urn)
|
|
225
|
+
if parent_doc is None:
|
|
226
|
+
break
|
|
227
|
+
parent_aspects = getattr(parent_doc, "aspects", None) or getattr(
|
|
228
|
+
parent_doc, "_aspects", {}
|
|
229
|
+
)
|
|
230
|
+
parent_info = (
|
|
231
|
+
parent_aspects.get("documentInfo")
|
|
232
|
+
if isinstance(parent_aspects, dict)
|
|
233
|
+
else None
|
|
234
|
+
)
|
|
235
|
+
if parent_info is None:
|
|
236
|
+
break
|
|
237
|
+
# Get next parent - again, it's a ParentDocumentClass object
|
|
238
|
+
next_parent_obj = getattr(parent_info, "parentDocument", None)
|
|
239
|
+
current_parent_urn = (
|
|
240
|
+
getattr(next_parent_obj, "document", None)
|
|
241
|
+
if next_parent_obj
|
|
242
|
+
else None
|
|
243
|
+
)
|
|
244
|
+
except Exception:
|
|
245
|
+
break
|
|
246
|
+
|
|
247
|
+
return False, (
|
|
248
|
+
f"Document '{document_urn}' is not in the shared documents folder. "
|
|
249
|
+
"Only documents in this folder can be updated."
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
except Exception as e:
|
|
253
|
+
logger.error(f"Failed to validate document hierarchy: {e}", exc_info=True)
|
|
254
|
+
# Fail closed - if we can't validate, don't allow the update
|
|
255
|
+
return False, (
|
|
256
|
+
f"Failed to validate document hierarchy for '{document_urn}': {str(e)}. "
|
|
257
|
+
"Cannot update document without verifying it's in the shared folder."
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def _ensure_document_exists(
|
|
262
|
+
doc_id: str,
|
|
263
|
+
title: str,
|
|
264
|
+
description: str,
|
|
265
|
+
parent_urn: Optional[str] = None,
|
|
266
|
+
) -> str:
|
|
267
|
+
"""Ensure a document exists, creating it if necessary. Returns the URN."""
|
|
268
|
+
|
|
269
|
+
client = get_datahub_client()
|
|
270
|
+
doc_urn = f"urn:li:document:{doc_id}"
|
|
271
|
+
|
|
272
|
+
try:
|
|
273
|
+
existing = client.entities.get(doc_urn)
|
|
274
|
+
if existing is not None:
|
|
275
|
+
return doc_urn
|
|
276
|
+
except Exception:
|
|
277
|
+
pass
|
|
278
|
+
|
|
279
|
+
# Create the document
|
|
280
|
+
doc = Document.create_document(
|
|
281
|
+
id=doc_id,
|
|
282
|
+
title=title,
|
|
283
|
+
text=description,
|
|
284
|
+
subtype="Folder",
|
|
285
|
+
parent_document=parent_urn,
|
|
286
|
+
show_in_global_context=True,
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
try:
|
|
290
|
+
client.entities.upsert(doc)
|
|
291
|
+
logger.info(f"Created folder document: {doc_urn}")
|
|
292
|
+
except Exception as e:
|
|
293
|
+
logger.warning(f"Failed to create folder document (may already exist): {e}")
|
|
294
|
+
|
|
295
|
+
return doc_urn
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
def _ensure_parent_hierarchy(user_info: Optional[Dict]) -> Tuple[str, Optional[str]]:
|
|
299
|
+
"""Ensure the parent document hierarchy exists.
|
|
300
|
+
|
|
301
|
+
Returns:
|
|
302
|
+
Tuple of (parent_urn_for_document, user_urn_if_available)
|
|
303
|
+
"""
|
|
304
|
+
root_title = _get_parent_title()
|
|
305
|
+
root_id = _get_root_parent_id()
|
|
306
|
+
|
|
307
|
+
# Always create the root parent
|
|
308
|
+
root_urn = _ensure_document_exists(
|
|
309
|
+
doc_id=root_id,
|
|
310
|
+
title=root_title,
|
|
311
|
+
description="Contains shared documents authored through AI agents like Ask DataHub.",
|
|
312
|
+
parent_urn=None,
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
# If per-user organization is disabled, return root as parent
|
|
316
|
+
if not _is_organize_by_user_enabled():
|
|
317
|
+
return root_urn, user_info.get("urn") if user_info else None
|
|
318
|
+
|
|
319
|
+
# Create user-specific folder if we have user info
|
|
320
|
+
if user_info:
|
|
321
|
+
user_urn = user_info.get("urn")
|
|
322
|
+
username = user_info.get("username", "unknown")
|
|
323
|
+
display_name = _get_user_display_name(user_info)
|
|
324
|
+
|
|
325
|
+
# Create user folder under root
|
|
326
|
+
user_folder_id = f"agent-docs-user-{_make_safe_id(username, max_length=30)}"
|
|
327
|
+
user_folder_urn = _ensure_document_exists(
|
|
328
|
+
doc_id=user_folder_id,
|
|
329
|
+
title=display_name,
|
|
330
|
+
description=f"Contains documents authored in sessions for {display_name}.",
|
|
331
|
+
parent_urn=root_urn,
|
|
332
|
+
)
|
|
333
|
+
return user_folder_urn, user_urn
|
|
334
|
+
|
|
335
|
+
# No user info available, use root
|
|
336
|
+
return root_urn, None
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
def save_document(
|
|
340
|
+
document_type: DocumentType,
|
|
341
|
+
title: str,
|
|
342
|
+
content: str,
|
|
343
|
+
urn: Optional[str] = None,
|
|
344
|
+
topics: Optional[List[str]] = None,
|
|
345
|
+
related_documents: Optional[List[str]] = None,
|
|
346
|
+
related_assets: Optional[List[str]] = None,
|
|
347
|
+
) -> dict:
|
|
348
|
+
"""Save or update a STANDALONE document in DataHub's knowledge base. Once saved,
|
|
349
|
+
a document will be visible to all users of DataHub and to Ask DataHub AI assistant.
|
|
350
|
+
|
|
351
|
+
NOTE: This tool is for creating standalone documents (insights, FAQs, notes, etc.),
|
|
352
|
+
NOT for updating descriptions on data assets like datasets or dashboards.
|
|
353
|
+
Use update_description for asset descriptions.
|
|
354
|
+
|
|
355
|
+
WHEN TO USE THIS TOOL:
|
|
356
|
+
|
|
357
|
+
Use this tool when the user explicitly requests to save information:
|
|
358
|
+
- "Save this for later..."
|
|
359
|
+
- "Bookmark this.."
|
|
360
|
+
- "Document this insight.."
|
|
361
|
+
- "Remember this.."
|
|
362
|
+
- "Add this to our knowledge base.."
|
|
363
|
+
- "Create a document about this.."
|
|
364
|
+
|
|
365
|
+
Also SUGGEST using this tool when the user provides valuable information such as:
|
|
366
|
+
- Useful SQL queries they want to reuse
|
|
367
|
+
- Decisions about data modeling or architecture
|
|
368
|
+
- FAQs or common questions about data
|
|
369
|
+
- Analysis results worth sharing with the team
|
|
370
|
+
- Corrections or clarifications about data, service, business definitions, etc.
|
|
371
|
+
|
|
372
|
+
⚠️ IMPORTANT: Before calling this tool, you SHOULD confirm with the user that
|
|
373
|
+
they want to save this document. Present the title, content summary,
|
|
374
|
+
and any related assets, and ask for their approval before proceeding. Do not attempt to save
|
|
375
|
+
information that would be private or user-specific.
|
|
376
|
+
|
|
377
|
+
This tool persists insights, decisions, FAQs, and other contextual information
|
|
378
|
+
as documents in DataHub. Documents are organized hierarchically:
|
|
379
|
+
- Under a configurable parent folder (default: "Shared" for global context)
|
|
380
|
+
- Optionally grouped by the user who authored them
|
|
381
|
+
|
|
382
|
+
UPSERT BEHAVIOR:
|
|
383
|
+
- If `urn` is NOT provided: Creates a NEW document with a unique URN
|
|
384
|
+
- If `urn` IS provided: Updates the EXISTING document with that URN
|
|
385
|
+
|
|
386
|
+
IMPORTANT USAGE GUIDELINES:
|
|
387
|
+
- Always confirm with the user before saving
|
|
388
|
+
- Provide a clear summary of what will be saved
|
|
389
|
+
- Ask if the user wants to proceed with creating/updating the document
|
|
390
|
+
|
|
391
|
+
REQUIRED PARAMETERS:
|
|
392
|
+
|
|
393
|
+
document_type: The type of document being saved. For example:
|
|
394
|
+
- "Insight": Data insights or discoveries
|
|
395
|
+
- "Decision": Documented decisions with rationale
|
|
396
|
+
- "FAQ": Frequently asked questions and answers
|
|
397
|
+
- "Analysis": Data analysis findings
|
|
398
|
+
- "Summary": Summaries of complex information
|
|
399
|
+
- "Recommendation": Suggested actions or improvements
|
|
400
|
+
- "Note": General notes or observations
|
|
401
|
+
|
|
402
|
+
title: A descriptive title for the document.
|
|
403
|
+
- Example: "Sales Data Quality Issues - Q4 2024"
|
|
404
|
+
- Example: "Decision: Deprecating Legacy Customer Table"
|
|
405
|
+
|
|
406
|
+
content: The full content of the document (supports markdown formatting).
|
|
407
|
+
- Can include headers, lists, code blocks, tables, etc.
|
|
408
|
+
- Example: "## Summary\\n\\nThe orders table shows 15% null values..."
|
|
409
|
+
|
|
410
|
+
OPTIONAL PARAMETERS:
|
|
411
|
+
|
|
412
|
+
urn: The URN of an existing document to update.
|
|
413
|
+
- ONLY use after a search_documents or get_entity call returns a document URN
|
|
414
|
+
- Example: "urn:li:document:agent-insight-abc123"
|
|
415
|
+
- If not provided, a new document is created with a unique URN
|
|
416
|
+
- If provided, the existing document is updated (upsert operation)
|
|
417
|
+
|
|
418
|
+
topics: List of topic tags for categorization and discovery (like a word cloud).
|
|
419
|
+
- These become searchable tags in DataHub that users can click to find related documents
|
|
420
|
+
- Example: ["data-quality", "customer-data", "Q4-2024"]
|
|
421
|
+
- Example: ["high-priority", "sales", "email", "null-values"]
|
|
422
|
+
|
|
423
|
+
related_documents: URNs of related documents.
|
|
424
|
+
- Example: ["urn:li:document:agent-insight-sales-abc123"]
|
|
425
|
+
- Creates links between related knowledge
|
|
426
|
+
|
|
427
|
+
related_assets: URNs of related data assets (tables, dashboards, etc).
|
|
428
|
+
- Example: ["urn:li:dataset:(urn:li:dataPlatform:snowflake,db.orders,PROD)"]
|
|
429
|
+
- Links the document to specific data assets in the catalog
|
|
430
|
+
- Users can then see this document when viewing those assets
|
|
431
|
+
|
|
432
|
+
Returns:
|
|
433
|
+
Dictionary with:
|
|
434
|
+
- success: Boolean indicating if the operation succeeded
|
|
435
|
+
- urn: The URN of the created/updated document
|
|
436
|
+
- message: Success or error message
|
|
437
|
+
- author: The user who authored the document (if available)
|
|
438
|
+
|
|
439
|
+
RECOMMENDED WORKFLOW:
|
|
440
|
+
|
|
441
|
+
1. Gather information you want to save publicly
|
|
442
|
+
2. Present a summary to the user:
|
|
443
|
+
"I'd like to save the following insight to DataHub:
|
|
444
|
+
- Title: High Null Rate in Customer Emails
|
|
445
|
+
- Type: Insight
|
|
446
|
+
- Related to: customers table
|
|
447
|
+
Would you like me to save this?"
|
|
448
|
+
3. Only call save_document after user confirms
|
|
449
|
+
|
|
450
|
+
EXAMPLE USAGE:
|
|
451
|
+
|
|
452
|
+
1. Create a new insight (after user confirmation):
|
|
453
|
+
save_document(
|
|
454
|
+
document_type="Insight",
|
|
455
|
+
title="High Null Rate in Customer Emails",
|
|
456
|
+
content="## Finding\\n\\n23% of customer records have null email...",
|
|
457
|
+
topics=["data-quality", "customer-data", "email", "high-severity"],
|
|
458
|
+
related_assets=["urn:li:dataset:(urn:li:dataPlatform:snowflake,customers,PROD)"]
|
|
459
|
+
)
|
|
460
|
+
|
|
461
|
+
2. Update an existing document (after finding it via search_documents):
|
|
462
|
+
save_document(
|
|
463
|
+
urn="urn:li:document:agent-insight-abc123", # From search_documents result
|
|
464
|
+
document_type="Insight",
|
|
465
|
+
title="High Null Rate in Customer Emails (Updated)",
|
|
466
|
+
content="## Finding\\n\\nUpdated: Now 18% of customer records have null email...",
|
|
467
|
+
topics=["data-quality", "customer-data", "email", "resolved"]
|
|
468
|
+
)
|
|
469
|
+
|
|
470
|
+
3. Document a decision:
|
|
471
|
+
save_document(
|
|
472
|
+
document_type="Decision",
|
|
473
|
+
title="Migrating to New Production Database",
|
|
474
|
+
content="## Decision\\n\\nWe will migrate to v2 schema...\\n\\n## Rationale\\n...",
|
|
475
|
+
topics=["architecture", "data-model", "migration", "approved"]
|
|
476
|
+
)
|
|
477
|
+
"""
|
|
478
|
+
|
|
479
|
+
client = get_datahub_client()
|
|
480
|
+
|
|
481
|
+
# Validate inputs
|
|
482
|
+
if not title or not title.strip():
|
|
483
|
+
return {
|
|
484
|
+
"success": False,
|
|
485
|
+
"urn": None,
|
|
486
|
+
"message": "title cannot be empty",
|
|
487
|
+
"author": None,
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
if not content or not content.strip():
|
|
491
|
+
return {
|
|
492
|
+
"success": False,
|
|
493
|
+
"urn": None,
|
|
494
|
+
"message": "content cannot be empty",
|
|
495
|
+
"author": None,
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
valid_document_types = [
|
|
499
|
+
"Insight",
|
|
500
|
+
"Decision",
|
|
501
|
+
"FAQ",
|
|
502
|
+
"Analysis",
|
|
503
|
+
"Summary",
|
|
504
|
+
"Recommendation",
|
|
505
|
+
"Note",
|
|
506
|
+
"Context",
|
|
507
|
+
]
|
|
508
|
+
if document_type not in valid_document_types:
|
|
509
|
+
return {
|
|
510
|
+
"success": False,
|
|
511
|
+
"urn": None,
|
|
512
|
+
"message": f"Invalid document_type '{document_type}'. Must be one of: {', '.join(valid_document_types)}",
|
|
513
|
+
"author": None,
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
# Validate URN format if provided
|
|
517
|
+
if urn is not None:
|
|
518
|
+
if not urn.startswith("urn:li:document:"):
|
|
519
|
+
return {
|
|
520
|
+
"success": False,
|
|
521
|
+
"urn": None,
|
|
522
|
+
"message": f"Invalid urn format '{urn}'. Must start with 'urn:li:document:'",
|
|
523
|
+
"author": None,
|
|
524
|
+
}
|
|
525
|
+
|
|
526
|
+
# Validate that the document is within the agent-authored hierarchy
|
|
527
|
+
# This prevents accidental modification of user-created or imported documents
|
|
528
|
+
if _restrict_updates_to_shared_folder():
|
|
529
|
+
is_valid, error_message = _is_document_in_shared_folder(urn)
|
|
530
|
+
if not is_valid:
|
|
531
|
+
return {
|
|
532
|
+
"success": False,
|
|
533
|
+
"urn": None,
|
|
534
|
+
"message": error_message,
|
|
535
|
+
"author": None,
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
is_update = True
|
|
539
|
+
document_urn = urn
|
|
540
|
+
# Extract document ID from URN
|
|
541
|
+
document_id = urn.replace("urn:li:document:", "")
|
|
542
|
+
else:
|
|
543
|
+
is_update = False
|
|
544
|
+
# Generate new document ID
|
|
545
|
+
document_id = _generate_document_id()
|
|
546
|
+
document_urn = f"urn:li:document:{document_id}"
|
|
547
|
+
|
|
548
|
+
try:
|
|
549
|
+
# Get current user info for attribution and organization
|
|
550
|
+
user_info = _get_current_user_info()
|
|
551
|
+
user_display_name = _get_user_display_name(user_info) if user_info else None
|
|
552
|
+
user_urn = user_info.get("urn") if user_info else None
|
|
553
|
+
|
|
554
|
+
# Ensure parent hierarchy exists and get the parent URN for our document
|
|
555
|
+
# For updates, we still want to maintain proper parent hierarchy
|
|
556
|
+
parent_urn, _ = _ensure_parent_hierarchy(user_info)
|
|
557
|
+
|
|
558
|
+
# Set current user as owner only for NEW documents (captures authorship)
|
|
559
|
+
# For updates, preserve existing ownership by not setting owners
|
|
560
|
+
# The SDK expects owner URNs as strings in a list
|
|
561
|
+
if is_update:
|
|
562
|
+
owners = None # Don't overwrite existing ownership on updates
|
|
563
|
+
logger.info("Updating existing document - preserving existing ownership")
|
|
564
|
+
else:
|
|
565
|
+
owners = [user_urn] if user_urn else None
|
|
566
|
+
logger.info(f"Creating new document - setting owners: {owners}")
|
|
567
|
+
|
|
568
|
+
# Convert topics to tag URNs (DataHub expects full URNs)
|
|
569
|
+
# TODO: Decide whether tags are the right abstraction here.
|
|
570
|
+
# Alternative: Use Structured Properties, custom properties, or custom label.
|
|
571
|
+
# Just needs to be searchable later on.
|
|
572
|
+
tag_urns = None
|
|
573
|
+
if topics:
|
|
574
|
+
tag_urns = [f"urn:li:tag:{topic}" for topic in topics]
|
|
575
|
+
|
|
576
|
+
# Create the document
|
|
577
|
+
doc = Document.create_document(
|
|
578
|
+
id=document_id,
|
|
579
|
+
title=title,
|
|
580
|
+
text=content,
|
|
581
|
+
subtype=document_type,
|
|
582
|
+
parent_document=parent_urn,
|
|
583
|
+
related_documents=related_documents,
|
|
584
|
+
related_assets=related_assets,
|
|
585
|
+
owners=owners,
|
|
586
|
+
tags=tag_urns, # Use topics as tags for searchability
|
|
587
|
+
show_in_global_context=True,
|
|
588
|
+
)
|
|
589
|
+
|
|
590
|
+
# Manually add DocumentSettings to ensure showInGlobalContext=True is set
|
|
591
|
+
# This is a workaround until the SDK is updated to always emit this aspect
|
|
592
|
+
# TODO: Remove this once SDK properly emits DocumentSettings for show_in_global_context=True
|
|
593
|
+
actor_urn = user_urn or "urn:li:corpuser:datahub"
|
|
594
|
+
settings_audit = models.AuditStampClass(
|
|
595
|
+
time=int(datetime.now().timestamp() * 1000),
|
|
596
|
+
actor=actor_urn,
|
|
597
|
+
)
|
|
598
|
+
document_settings = models.DocumentSettingsClass(
|
|
599
|
+
showInGlobalContext=True,
|
|
600
|
+
lastModified=settings_audit,
|
|
601
|
+
)
|
|
602
|
+
doc._set_aspect(document_settings)
|
|
603
|
+
|
|
604
|
+
# Log document details before upsert
|
|
605
|
+
logger.info(
|
|
606
|
+
f"Document to upsert: URN={document_urn}, owners={owners}, parent={parent_urn}"
|
|
607
|
+
)
|
|
608
|
+
|
|
609
|
+
# Upsert the document
|
|
610
|
+
try:
|
|
611
|
+
client.entities.upsert(doc)
|
|
612
|
+
logger.info("Upsert completed successfully")
|
|
613
|
+
except Exception as upsert_error:
|
|
614
|
+
logger.error(f"Failed to upsert document: {upsert_error}", exc_info=True)
|
|
615
|
+
raise
|
|
616
|
+
|
|
617
|
+
action = "updated" if is_update else "created"
|
|
618
|
+
logger.info(f"Successfully {action} document: {document_urn}")
|
|
619
|
+
|
|
620
|
+
return {
|
|
621
|
+
"success": True,
|
|
622
|
+
"urn": document_urn,
|
|
623
|
+
"message": f"Successfully {action} document: {title}",
|
|
624
|
+
"author": user_display_name,
|
|
625
|
+
}
|
|
626
|
+
|
|
627
|
+
except Exception as e:
|
|
628
|
+
logger.error(f"Failed to save document: {e}")
|
|
629
|
+
return {
|
|
630
|
+
"success": False,
|
|
631
|
+
"urn": None,
|
|
632
|
+
"message": f"Error saving document: {str(e)}",
|
|
633
|
+
"author": None,
|
|
634
|
+
}
|
{datahub_agent_context-1.4.0rc1.dist-info → datahub_agent_context-1.4.0rc2.dist-info}/METADATA
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datahub-agent-context
|
|
3
|
-
Version: 1.4.
|
|
3
|
+
Version: 1.4.0rc2
|
|
4
4
|
Summary: DataHub Agent Context - MCP Tools for AI Agents
|
|
5
5
|
Home-page: https://datahub.io/
|
|
6
6
|
License: Apache License 2.0
|
|
@@ -28,28 +28,28 @@ Classifier: Environment :: MacOS X
|
|
|
28
28
|
Classifier: Topic :: Software Development
|
|
29
29
|
Requires-Python: >=3.9
|
|
30
30
|
Description-Content-Type: text/markdown
|
|
31
|
-
Requires-Dist: cachetools<7.0.0,>=5.0.0
|
|
32
|
-
Requires-Dist: httpcore<2.0,>=1.0.9
|
|
33
|
-
Requires-Dist: json-repair<1.0.0,>=0.25.0
|
|
34
31
|
Requires-Dist: h11<1.0,>=0.16
|
|
35
|
-
Requires-Dist: jmespath<2.0.0,>=1.0.0
|
|
36
|
-
Requires-Dist: acryl-datahub==1.4.0rc1
|
|
37
|
-
Requires-Dist: pydantic<3.0.0,>=2.0.0
|
|
38
32
|
Requires-Dist: google-re2<2.0,>=1.0
|
|
33
|
+
Requires-Dist: acryl-datahub==1.4.0rc2
|
|
34
|
+
Requires-Dist: pydantic<3.0.0,>=2.0.0
|
|
35
|
+
Requires-Dist: httpcore<2.0,>=1.0.9
|
|
36
|
+
Requires-Dist: jmespath<2.0.0,>=1.0.0
|
|
37
|
+
Requires-Dist: json-repair<1.0.0,>=0.25.0
|
|
38
|
+
Requires-Dist: cachetools<7.0.0,>=5.0.0
|
|
39
39
|
Provides-Extra: dev
|
|
40
|
-
Requires-Dist: tox<5.0.0,>=4.0.0; extra == "dev"
|
|
41
|
-
Requires-Dist: types-PyYAML<7.0.0,>=6.0.0; extra == "dev"
|
|
42
|
-
Requires-Dist: mypy==1.17.1; extra == "dev"
|
|
43
|
-
Requires-Dist: types-jmespath<2.0.0,>=1.0.0; extra == "dev"
|
|
44
|
-
Requires-Dist: snowflake-connector-python<4.0.0,>=3.0.0; extra == "dev"
|
|
45
|
-
Requires-Dist: click<9.0.0,>=8.0.0; extra == "dev"
|
|
46
|
-
Requires-Dist: types-toml<1.0.0,>=0.10.0; extra == "dev"
|
|
47
|
-
Requires-Dist: pytest-cov<7.0.0,>=2.8.0; extra == "dev"
|
|
48
40
|
Requires-Dist: ruff==0.11.7; extra == "dev"
|
|
49
|
-
Requires-Dist: pytest<9.0.0,>=8.3.4; extra == "dev"
|
|
50
|
-
Requires-Dist: types-requests<3.0.0,>=2.0.0; extra == "dev"
|
|
51
41
|
Requires-Dist: langchain-core<2.0.0,>=1.2.7; extra == "dev"
|
|
42
|
+
Requires-Dist: snowflake-connector-python<4.0.0,>=3.0.0; extra == "dev"
|
|
43
|
+
Requires-Dist: pytest-cov<7.0.0,>=2.8.0; extra == "dev"
|
|
52
44
|
Requires-Dist: types-cachetools<7.0.0,>=5.0.0; extra == "dev"
|
|
45
|
+
Requires-Dist: types-toml<1.0.0,>=0.10.0; extra == "dev"
|
|
46
|
+
Requires-Dist: tox<5.0.0,>=4.0.0; extra == "dev"
|
|
47
|
+
Requires-Dist: click<9.0.0,>=8.0.0; extra == "dev"
|
|
48
|
+
Requires-Dist: types-requests<3.0.0,>=2.0.0; extra == "dev"
|
|
49
|
+
Requires-Dist: mypy==1.17.1; extra == "dev"
|
|
50
|
+
Requires-Dist: types-PyYAML<7.0.0,>=6.0.0; extra == "dev"
|
|
51
|
+
Requires-Dist: pytest<9.0.0,>=8.3.4; extra == "dev"
|
|
52
|
+
Requires-Dist: types-jmespath<2.0.0,>=1.0.0; extra == "dev"
|
|
53
53
|
Provides-Extra: langchain
|
|
54
54
|
Requires-Dist: langchain-core<2.0.0,>=1.2.7; extra == "langchain"
|
|
55
55
|
Provides-Extra: snowflake
|
|
@@ -111,7 +111,7 @@ from datahub_agent_context.mcp_tools.entities import get_entities
|
|
|
111
111
|
client = DataHubClient.from_env()
|
|
112
112
|
|
|
113
113
|
# Search for datasets
|
|
114
|
-
with client
|
|
114
|
+
with client as client:
|
|
115
115
|
results = search(
|
|
116
116
|
query="user_data",
|
|
117
117
|
filters={"entity_type": ["dataset"]},
|
|
@@ -119,7 +119,7 @@ with client.graph as graph:
|
|
|
119
119
|
)
|
|
120
120
|
|
|
121
121
|
# Get detailed entity information
|
|
122
|
-
with client
|
|
122
|
+
with client as client:
|
|
123
123
|
entities = get_entities(
|
|
124
124
|
urns=[result["entity"]["urn"] for result in results["searchResults"]]
|
|
125
125
|
)
|
|
@@ -181,6 +181,7 @@ agent = create_agent(model, tools=tools, system_prompt="...")
|
|
|
181
181
|
- `add_owners()`, `remove_owners()` - Manage owners
|
|
182
182
|
- `add_glossary_terms()`, `remove_glossary_terms()` - Manage glossary terms
|
|
183
183
|
- `add_structured_properties()`, `remove_structured_properties()` - Manage structured properties
|
|
184
|
+
- `save_document()` - Save or update a Document.
|
|
184
185
|
|
|
185
186
|
#### User Tools
|
|
186
187
|
|
{datahub_agent_context-1.4.0rc1.dist-info → datahub_agent_context-1.4.0rc2.dist-info}/RECORD
RENAMED
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
datahub_agent_context/__init__.py,sha256=
|
|
2
|
-
datahub_agent_context/_version.py,sha256=
|
|
1
|
+
datahub_agent_context/__init__.py,sha256=WgJFMZaA5ae_9ntP686UXd0TvZpbGwQRdSISi0JHsvU,967
|
|
2
|
+
datahub_agent_context/_version.py,sha256=sjXQOjG_dBO3UkwODDbFi7aLjoKMlkbubWQwtnl0qh0,648
|
|
3
3
|
datahub_agent_context/cli.py,sha256=ND0KLT3cFb6KnQl6kEb7B74tAOu6yfS4dO6mJjZW1x4,4441
|
|
4
|
-
datahub_agent_context/context.py,sha256=
|
|
4
|
+
datahub_agent_context/context.py,sha256=wj9q9hGf72q6oarnfEFHzqgS-vwtMO79hhjz8GNC0QQ,3163
|
|
5
5
|
datahub_agent_context/py.typed,sha256=kO13kg6OXApIRwKRcPpEOL09GZHx2Pk8Rp2KZpxv0lw,63
|
|
6
6
|
datahub_agent_context/langchain_tools/__init__.py,sha256=M0tn6fD9qY5Wc1XdptQuIf_7MSKLX8OSBaBxcPo5wmw,259
|
|
7
|
-
datahub_agent_context/langchain_tools/builder.py,sha256
|
|
7
|
+
datahub_agent_context/langchain_tools/builder.py,sha256=-h8IuFWfIJGhQXnuK5ASCMwhoNmIhxLV2yu3ZOZPwpg,5288
|
|
8
8
|
datahub_agent_context/mcp_tools/__init__.py,sha256=7iUoWuT-KvszOqnmL3_co2LVQdhZtkQKRLRE98Hn8WM,1544
|
|
9
9
|
datahub_agent_context/mcp_tools/_token_estimator.py,sha256=U0kTqPZKBkKwxe7JZaLxIIFEobNSrEEHoM4NQbrmmAE,2782
|
|
10
10
|
datahub_agent_context/mcp_tools/base.py,sha256=UFqMe9yS-YikgaKOnu2DkaMLaHRzwSUBOFXOWxBjULA,11054
|
|
@@ -17,6 +17,7 @@ datahub_agent_context/mcp_tools/helpers.py,sha256=NRIoVEB62vDWDg26UOFv-IhM8mEQd4
|
|
|
17
17
|
datahub_agent_context/mcp_tools/lineage.py,sha256=sJVR2jJkbGU_KjjtqZ8IJVOKDaIjDdtQKtAIxYWq71Q,26753
|
|
18
18
|
datahub_agent_context/mcp_tools/owners.py,sha256=LGZ5n5a3xRKSttay2NLf_rq97_Dl9pGIcVFi-l7uJK8,11798
|
|
19
19
|
datahub_agent_context/mcp_tools/queries.py,sha256=V4-yFcCi3c8r4Xy7XVKfQ7s3SsIWXMAHRrI8Sqf2g20,6864
|
|
20
|
+
datahub_agent_context/mcp_tools/save_document.py,sha256=r3nYUQSQjmgCybX7X39zu8Ef4FON284K_D1X9Y53cIE,23221
|
|
20
21
|
datahub_agent_context/mcp_tools/search.py,sha256=z5Hy1jLV4uDO26nb_oFuP5w6GX0DYcYWRIWn3kDp7dY,9880
|
|
21
22
|
datahub_agent_context/mcp_tools/structured_properties.py,sha256=amj7C-sbeAyctrXY_rpc2vCNTaJy2aTRx21TioeKEJk,15745
|
|
22
23
|
datahub_agent_context/mcp_tools/tags.py,sha256=5_Wg1Jqf_FgPgYuUV5bDwQ6J8t_sECcSM5yVtwQruPs,10814
|
|
@@ -59,7 +60,7 @@ datahub_agent_context/snowflake/udfs/search_datahub.py,sha256=-El0JnpkClaaxX5tCZ
|
|
|
59
60
|
datahub_agent_context/snowflake/udfs/search_documents.py,sha256=yrcLsSNyq_M-CsgwGrsgSXFVeusKAvuNqEI_DvHtC08,1944
|
|
60
61
|
datahub_agent_context/snowflake/udfs/set_domains.py,sha256=nKFJ9ZMBCz9qWgJv-FkOkeEmRE-QYBUCKbHw1RFdJmc,1701
|
|
61
62
|
datahub_agent_context/snowflake/udfs/update_description.py,sha256=laJjqqRARDy3VCo0xX_5lvTrZn2U3Dwm7XEliPdDRSA,2096
|
|
62
|
-
datahub_agent_context-1.4.
|
|
63
|
-
datahub_agent_context-1.4.
|
|
64
|
-
datahub_agent_context-1.4.
|
|
65
|
-
datahub_agent_context-1.4.
|
|
63
|
+
datahub_agent_context-1.4.0rc2.dist-info/METADATA,sha256=GzhPDkJ4HtHmsyCeu_Lm_z44amlbDRHevW7R9hSH0Y8,7795
|
|
64
|
+
datahub_agent_context-1.4.0rc2.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
65
|
+
datahub_agent_context-1.4.0rc2.dist-info/top_level.txt,sha256=Tv1bg7ZwDOKM9u9RHj5m1Zbx2LDf4lVBBRNHi_gBBTI,22
|
|
66
|
+
datahub_agent_context-1.4.0rc2.dist-info/RECORD,,
|
|
File without changes
|
{datahub_agent_context-1.4.0rc1.dist-info → datahub_agent_context-1.4.0rc2.dist-info}/top_level.txt
RENAMED
|
File without changes
|