datahub-agent-context 1.3.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. datahub_agent_context/__init__.py +25 -0
  2. datahub_agent_context/_version.py +16 -0
  3. datahub_agent_context/context.py +97 -0
  4. datahub_agent_context/langchain_tools/__init__.py +8 -0
  5. datahub_agent_context/langchain_tools/builder.py +127 -0
  6. datahub_agent_context/mcp_tools/__init__.py +46 -0
  7. datahub_agent_context/mcp_tools/_token_estimator.py +71 -0
  8. datahub_agent_context/mcp_tools/base.py +325 -0
  9. datahub_agent_context/mcp_tools/descriptions.py +299 -0
  10. datahub_agent_context/mcp_tools/documents.py +473 -0
  11. datahub_agent_context/mcp_tools/domains.py +246 -0
  12. datahub_agent_context/mcp_tools/entities.py +349 -0
  13. datahub_agent_context/mcp_tools/get_me.py +99 -0
  14. datahub_agent_context/mcp_tools/gql/__init__.py +13 -0
  15. datahub_agent_context/mcp_tools/gql/document_search.gql +114 -0
  16. datahub_agent_context/mcp_tools/gql/document_semantic_search.gql +111 -0
  17. datahub_agent_context/mcp_tools/gql/entity_details.gql +1682 -0
  18. datahub_agent_context/mcp_tools/gql/queries.gql +51 -0
  19. datahub_agent_context/mcp_tools/gql/query_entity.gql +37 -0
  20. datahub_agent_context/mcp_tools/gql/read_documents.gql +16 -0
  21. datahub_agent_context/mcp_tools/gql/search.gql +242 -0
  22. datahub_agent_context/mcp_tools/helpers.py +448 -0
  23. datahub_agent_context/mcp_tools/lineage.py +698 -0
  24. datahub_agent_context/mcp_tools/owners.py +318 -0
  25. datahub_agent_context/mcp_tools/queries.py +191 -0
  26. datahub_agent_context/mcp_tools/search.py +239 -0
  27. datahub_agent_context/mcp_tools/structured_properties.py +447 -0
  28. datahub_agent_context/mcp_tools/tags.py +296 -0
  29. datahub_agent_context/mcp_tools/terms.py +295 -0
  30. datahub_agent_context/py.typed +2 -0
  31. datahub_agent_context-1.3.1.8.dist-info/METADATA +233 -0
  32. datahub_agent_context-1.3.1.8.dist-info/RECORD +34 -0
  33. datahub_agent_context-1.3.1.8.dist-info/WHEEL +5 -0
  34. datahub_agent_context-1.3.1.8.dist-info/top_level.txt +1 -0
@@ -0,0 +1,246 @@
1
+ """Domain management tools for DataHub MCP server."""
2
+
3
+ import logging
4
+ from typing import List
5
+
6
+ from datahub_agent_context.context import get_graph
7
+ from datahub_agent_context.mcp_tools.base import execute_graphql
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ def _validate_domain_urn(domain_urn: str) -> None:
13
+ """
14
+ Validate that the domain URN exists in DataHub.
15
+
16
+ Raises:
17
+ ValueError: If the domain URN does not exist or is invalid
18
+ """
19
+ graph = get_graph()
20
+ query = """
21
+ query getDomain($urn: String!) {
22
+ entity(urn: $urn) {
23
+ urn
24
+ type
25
+ ... on Domain {
26
+ properties {
27
+ name
28
+ }
29
+ }
30
+ }
31
+ }
32
+ """
33
+
34
+ try:
35
+ result = execute_graphql(
36
+ graph,
37
+ query=query,
38
+ variables={"urn": domain_urn},
39
+ operation_name="getDomain",
40
+ )
41
+
42
+ entity = result.get("entity")
43
+
44
+ if entity is None:
45
+ raise ValueError(
46
+ f"Domain URN does not exist in DataHub: {domain_urn}. "
47
+ f"Please use the search tool with entity_type filter to find existing domains, "
48
+ f"or create the domain first before assigning it."
49
+ )
50
+
51
+ if entity.get("type") != "DOMAIN":
52
+ raise ValueError(
53
+ f"The URN is not a domain entity: {domain_urn} (type: {entity.get('type')})"
54
+ )
55
+
56
+ except Exception as e:
57
+ if isinstance(e, ValueError):
58
+ raise
59
+ raise ValueError(f"Failed to validate domain URN: {str(e)}") from e
60
+
61
+
62
+ def set_domains(
63
+ domain_urn: str,
64
+ entity_urns: List[str],
65
+ ) -> dict:
66
+ """Set domain for multiple DataHub entities.
67
+
68
+ This tool allows you to assign a domain to multiple entities in a single operation.
69
+ Useful for organizing datasets, dashboards, and other entities into logical business domains.
70
+
71
+ Note: Domain assignment in DataHub is entity-level only. Each entity can belong to exactly one domain.
72
+ Setting a new domain will replace any existing domain assignment.
73
+
74
+ Args:
75
+ domain_urn: Domain URN to assign (e.g., "urn:li:domain:marketing")
76
+ entity_urns: List of entity URNs to assign to the domain (e.g., dataset URNs, dashboard URNs)
77
+
78
+ Returns:
79
+ Dictionary with:
80
+ - success: Boolean indicating if the operation succeeded
81
+ - message: Success or error message
82
+
83
+ Examples:
84
+ # Set domain for multiple datasets
85
+ set_domains(
86
+ domain_urn="urn:li:domain:marketing",
87
+ entity_urns=[
88
+ "urn:li:dataset:(urn:li:dataPlatform:snowflake,db.schema.campaigns,PROD)",
89
+ "urn:li:dataset:(urn:li:dataPlatform:snowflake,db.schema.customers,PROD)"
90
+ ]
91
+ )
92
+
93
+ # Set domain for dashboards
94
+ set_domains(
95
+ domain_urn="urn:li:domain:finance",
96
+ entity_urns=[
97
+ "urn:li:dashboard:(urn:li:dataPlatform:looker,revenue_dashboard,PROD)",
98
+ "urn:li:dashboard:(urn:li:dataPlatform:looker,expense_dashboard,PROD)"
99
+ ]
100
+ )
101
+
102
+ # Set domain for mixed entity types
103
+ set_domains(
104
+ domain_urn="urn:li:domain:engineering",
105
+ entity_urns=[
106
+ "urn:li:dataset:(urn:li:dataPlatform:snowflake,db.schema.logs,PROD)",
107
+ "urn:li:dataFlow:(urn:li:dataPlatform:airflow,etl_pipeline,PROD)",
108
+ "urn:li:dashboard:(urn:li:dataPlatform:superset,metrics,PROD)"
109
+ ]
110
+ )
111
+
112
+ Example:
113
+ from datahub_agent_context.context import DataHubContext
114
+
115
+ with DataHubContext(client.graph):
116
+ result = set_domains(
117
+ domain_urn="urn:li:domain:marketing",
118
+ entity_urns=["urn:li:dataset:(...)"]
119
+ )
120
+ """
121
+ graph = get_graph()
122
+ if not domain_urn:
123
+ raise ValueError("domain_urn cannot be empty")
124
+ if not entity_urns:
125
+ raise ValueError("entity_urns cannot be empty")
126
+
127
+ _validate_domain_urn(domain_urn)
128
+
129
+ resources = []
130
+ for resource_urn in entity_urns:
131
+ resources.append({"resourceUrn": resource_urn})
132
+
133
+ mutation = """
134
+ mutation batchSetDomain($input: BatchSetDomainInput!) {
135
+ batchSetDomain(input: $input)
136
+ }
137
+ """
138
+
139
+ variables = {"input": {"domainUrn": domain_urn, "resources": resources}}
140
+
141
+ try:
142
+ result = execute_graphql(
143
+ graph,
144
+ query=mutation,
145
+ variables=variables,
146
+ operation_name="batchSetDomain",
147
+ )
148
+
149
+ if result.get("batchSetDomain", False):
150
+ return {
151
+ "success": True,
152
+ "message": f"Successfully set domain for {len(entity_urns)} entit(ies)",
153
+ }
154
+ else:
155
+ raise RuntimeError("Failed to set domain - operation returned false")
156
+
157
+ except Exception as e:
158
+ if isinstance(e, RuntimeError):
159
+ raise
160
+ raise RuntimeError(f"Error setting domain: {str(e)}") from e
161
+
162
+
163
+ def remove_domains(
164
+ entity_urns: List[str],
165
+ ) -> dict:
166
+ """Remove domain assignment from multiple DataHub entities.
167
+
168
+ This tool allows you to unset the domain for multiple entities in a single operation.
169
+ Useful for removing domain assignments when reorganizing entities or correcting misassignments.
170
+
171
+ Args:
172
+ entity_urns: List of entity URNs to remove domain from (e.g., dataset URNs, dashboard URNs)
173
+
174
+ Returns:
175
+ Dictionary with:
176
+ - success: Boolean indicating if the operation succeeded
177
+ - message: Success or error message
178
+
179
+ Examples:
180
+ # Remove domain from multiple datasets
181
+ remove_domains(
182
+ entity_urns=[
183
+ "urn:li:dataset:(urn:li:dataPlatform:snowflake,db.schema.old_table,PROD)",
184
+ "urn:li:dataset:(urn:li:dataPlatform:snowflake,db.schema.deprecated,PROD)"
185
+ ]
186
+ )
187
+
188
+ # Remove domain from dashboards
189
+ remove_domains(
190
+ entity_urns=[
191
+ "urn:li:dashboard:(urn:li:dataPlatform:looker,old_dashboard,PROD)",
192
+ "urn:li:dashboard:(urn:li:dataPlatform:looker,temp_dashboard,PROD)"
193
+ ]
194
+ )
195
+
196
+ # Remove domain from mixed entity types
197
+ remove_domains(
198
+ entity_urns=[
199
+ "urn:li:dataset:(urn:li:dataPlatform:snowflake,db.schema.temp,PROD)",
200
+ "urn:li:dataFlow:(urn:li:dataPlatform:airflow,old_pipeline,PROD)",
201
+ "urn:li:dashboard:(urn:li:dataPlatform:superset,test,PROD)"
202
+ ]
203
+ )
204
+
205
+ Example:
206
+ from datahub_agent_context.context import DataHubContext
207
+
208
+ with DataHubContext(client.graph):
209
+ result = remove_domains(entity_urns=["urn:li:dataset:(...)"])
210
+ """
211
+ graph = get_graph()
212
+ if not entity_urns:
213
+ raise ValueError("entity_urns cannot be empty")
214
+
215
+ resources = []
216
+ for resource_urn in entity_urns:
217
+ resources.append({"resourceUrn": resource_urn})
218
+
219
+ mutation = """
220
+ mutation batchSetDomain($input: BatchSetDomainInput!) {
221
+ batchSetDomain(input: $input)
222
+ }
223
+ """
224
+
225
+ variables = {"input": {"domainUrn": None, "resources": resources}}
226
+
227
+ try:
228
+ result = execute_graphql(
229
+ graph,
230
+ query=mutation,
231
+ variables=variables,
232
+ operation_name="batchSetDomain",
233
+ )
234
+
235
+ if result.get("batchSetDomain", False):
236
+ return {
237
+ "success": True,
238
+ "message": f"Successfully removed domain from {len(entity_urns)} entit(ies)",
239
+ }
240
+ else:
241
+ raise RuntimeError("Failed to remove domain - operation returned false")
242
+
243
+ except Exception as e:
244
+ if isinstance(e, RuntimeError):
245
+ raise
246
+ raise RuntimeError(f"Error removing domain: {str(e)}") from e
@@ -0,0 +1,349 @@
1
+ """Tools for getting entity information."""
2
+
3
+ import json
4
+ import logging
5
+ import pathlib
6
+ from typing import Iterator, List, Optional
7
+
8
+ from json_repair import repair_json
9
+
10
+ from datahub.errors import ItemNotFoundError
11
+ from datahub_agent_context.context import get_graph
12
+ from datahub_agent_context.mcp_tools.base import execute_graphql
13
+ from datahub_agent_context.mcp_tools.helpers import (
14
+ clean_get_entities_response,
15
+ inject_urls_for_urns,
16
+ truncate_descriptions,
17
+ )
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+ # Load GraphQL queries
22
+ entity_details_fragment_gql = (
23
+ pathlib.Path(__file__).parent / "gql/entity_details.gql"
24
+ ).read_text()
25
+ query_entity_gql = (pathlib.Path(__file__).parent / "gql/query_entity.gql").read_text()
26
+
27
+
28
+ def get_entities(urns: List[str] | str) -> List[dict] | dict:
29
+ """Get detailed information about one or more entities by their DataHub URNs.
30
+
31
+ IMPORTANT: Pass an array of URNs to retrieve multiple entities in a single call - this is much
32
+ more efficient than calling this tool multiple times. When examining search results, always pass
33
+ an array with the top 3-10 result URNs to compare and find the best match.
34
+
35
+ Accepts an array of URNs or a single URN. Supports all entity types including datasets,
36
+ assertions, incidents, dashboards, charts, users, groups, and more. The response fields vary
37
+ based on the entity type.
38
+
39
+ Args:
40
+ urns: List of URNs or a single URN string
41
+
42
+ Returns:
43
+ Single dict if single URN provided, list of dicts if multiple URNs provided.
44
+ Each result contains entity details or error information.
45
+
46
+ Raises:
47
+ ItemNotFoundError: If single URN provided and entity not found
48
+
49
+ Example:
50
+ from datahub_agent_context.context import DataHubContext
51
+
52
+ with DataHubContext(client.graph):
53
+ result = get_entities(urns=["urn:li:dataset:(...)"])
54
+ """
55
+ graph = get_graph()
56
+ # Handle JSON-stringified arrays
57
+ # Some MCP clients/LLMs pass arrays as JSON strings instead of proper lists
58
+ if isinstance(urns, str):
59
+ urns_str = urns.strip() # Remove leading/trailing whitespace
60
+
61
+ # Try to parse as JSON array first
62
+ if urns_str.startswith("["):
63
+ try:
64
+ # Use json_repair to handle malformed JSON from LLMs
65
+ urns = json.loads(repair_json(urns_str))
66
+ return_single = False
67
+ except (json.JSONDecodeError, Exception) as e:
68
+ logger.warning(
69
+ f"Failed to parse URNs as JSON array: {e}. Treating as single URN."
70
+ )
71
+ # Not valid JSON, treat as single URN string
72
+ urns = [urns_str]
73
+ return_single = True
74
+ else:
75
+ # Single URN string
76
+ urns = [urns_str]
77
+ return_single = True
78
+ else:
79
+ return_single = False
80
+
81
+ # Trim whitespace from each URN (defensive against string concatenation issues)
82
+ urns = [urn.strip() for urn in urns]
83
+
84
+ results = []
85
+ for urn in urns:
86
+ try:
87
+ # Check if entity exists first
88
+ if not graph.exists(urn):
89
+ logger.warning(f"Entity not found during existence check: {urn}")
90
+ if return_single:
91
+ raise ItemNotFoundError(f"Entity {urn} not found")
92
+ results.append({"error": f"Entity {urn} not found", "urn": urn})
93
+ continue
94
+
95
+ # Special handling for Query entities (not part of Entity union type)
96
+ is_query = urn.startswith("urn:li:query:")
97
+
98
+ # Execute the appropriate GraphQL query
99
+ variables = {"urn": urn}
100
+ if is_query:
101
+ result = execute_graphql(
102
+ graph,
103
+ query=query_entity_gql,
104
+ variables=variables,
105
+ operation_name="GetQueryEntity",
106
+ )["entity"]
107
+ else:
108
+ result = execute_graphql(
109
+ graph,
110
+ query=entity_details_fragment_gql,
111
+ variables=variables,
112
+ operation_name="GetEntity",
113
+ )["entity"]
114
+
115
+ # Check if entity data was returned
116
+ if result is None:
117
+ raise ItemNotFoundError(
118
+ f"Entity {urn} exists but no data could be retrieved. "
119
+ f"This can happen if the entity has no aspects ingested yet, or if there's a permissions issue."
120
+ )
121
+
122
+ inject_urls_for_urns(graph, result, [""])
123
+ truncate_descriptions(result)
124
+
125
+ results.append(clean_get_entities_response(result))
126
+
127
+ except Exception as e:
128
+ logger.warning(f"Error fetching entity {urn}: {e}")
129
+ if return_single:
130
+ raise
131
+ results.append({"error": str(e), "urn": urn})
132
+
133
+ # Return single dict if single URN was passed, array otherwise
134
+ return results[0] if return_single else results
135
+
136
+
137
+ def list_schema_fields(
138
+ urn: str,
139
+ keywords: Optional[List[str] | str] = None,
140
+ limit: int = 100,
141
+ offset: int = 0,
142
+ ) -> dict:
143
+ """List schema fields for a dataset, with optional keyword filtering and pagination.
144
+
145
+ Useful when schema fields were truncated in search results (schemaFieldsTruncated present)
146
+ and you need to explore specific columns. Supports pagination for large schemas.
147
+
148
+ Args:
149
+ urn: Dataset URN
150
+ keywords: Optional keywords to filter schema fields (OR matching).
151
+ - Single string: Treated as one keyword (NOT split on whitespace). Use for field names or exact phrases.
152
+ - List of strings: Multiple keywords, matches any (OR logic).
153
+ - None or empty list: Returns all fields in priority order (same as get_entities).
154
+ Matches against fieldPath, description, label, tags, and glossary terms.
155
+ Matching fields are returned first, sorted by match count.
156
+ limit: Maximum number of fields to return (default: 100)
157
+ offset: Number of fields to skip for pagination (default: 0)
158
+
159
+ Returns:
160
+ Dictionary with:
161
+ - urn: The dataset URN
162
+ - fields: List of schema fields (paginated)
163
+ - totalFields: Total number of fields in the schema
164
+ - returned: Number of fields actually returned
165
+ - remainingCount: Number of fields not included after offset (accounts for limit and token budget)
166
+ - matchingCount: Number of fields that matched keywords (if keywords provided, None otherwise)
167
+ - offset: The offset used
168
+
169
+ Examples:
170
+ # Single keyword (string) - search for exact field name or phrase
171
+ list_schema_fields(urn="urn:li:dataset:(...)", keywords="user_email")
172
+ # Returns fields matching "user_email" (like user_email_address, primary_user_email)
173
+
174
+ # Multiple keywords (list) - OR matching
175
+ list_schema_fields(urn="urn:li:dataset:(...)", keywords=["email", "user"])
176
+ # Returns fields containing "email" OR "user" (user_email, contact_email, user_id, etc.)
177
+
178
+ # Pagination through all fields
179
+ list_schema_fields(urn="urn:li:dataset:(...)", limit=100, offset=0) # First 100
180
+ list_schema_fields(urn="urn:li:dataset:(...)", limit=100, offset=100) # Next 100
181
+
182
+ # Combine filtering + pagination
183
+ list_schema_fields(urn="urn:li:dataset:(...)", keywords=["user"], limit=50, offset=0)
184
+
185
+ Example:
186
+ from datahub_agent_context.context import DataHubContext
187
+
188
+ with DataHubContext(client.graph):
189
+ result = list_schema_fields(urn="urn:li:dataset:(...)", keywords="email")
190
+
191
+ Raises:
192
+ ItemNotFoundError: If entity not found
193
+ """
194
+ graph = get_graph()
195
+ # Normalize keywords to list (None means no filtering)
196
+ keywords_lower = None
197
+ if keywords is not None:
198
+ if isinstance(keywords, str):
199
+ keywords = [keywords]
200
+ keywords_lower = [kw.lower() for kw in keywords]
201
+
202
+ # Fetch entity
203
+ if not graph.exists(urn):
204
+ raise ItemNotFoundError(f"Entity {urn} not found")
205
+
206
+ # Execute GraphQL query to get full schema
207
+ variables = {"urn": urn}
208
+ result = execute_graphql(
209
+ graph,
210
+ query=entity_details_fragment_gql,
211
+ variables=variables,
212
+ operation_name="GetEntity",
213
+ )["entity"]
214
+
215
+ # Check if entity data was returned
216
+ if result is None:
217
+ raise ItemNotFoundError(
218
+ f"Entity {urn} exists but no data could be retrieved. "
219
+ f"This can happen if the entity has no aspects ingested yet, or if there's a permissions issue."
220
+ )
221
+
222
+ # Apply same preprocessing as get_entities
223
+ inject_urls_for_urns(graph, result, [""])
224
+ truncate_descriptions(result)
225
+
226
+ # Extract total field count before processing
227
+ total_fields = len(result.get("schemaMetadata", {}).get("fields", []))
228
+
229
+ if total_fields == 0:
230
+ return {
231
+ "urn": urn,
232
+ "fields": [],
233
+ "totalFields": 0,
234
+ "returned": 0,
235
+ "remainingCount": 0,
236
+ "matchingCount": None,
237
+ "offset": offset,
238
+ }
239
+
240
+ # Define custom sorting function for keyword matching
241
+ sort_fn = None
242
+ matching_count = None
243
+
244
+ if keywords_lower:
245
+ # Helper function to score a field by keyword matches
246
+ def score_field_by_keywords(field: dict) -> int:
247
+ """Score a field by counting keyword match coverage across its metadata.
248
+
249
+ Scoring logic (OR matching):
250
+ - Each keyword gets +1 if it appears in ANY searchable text (substring match)
251
+ - Multiple occurrences of the same keyword in one text still count as +1
252
+ - Higher score = more aspects of the field match the keywords
253
+
254
+ Searchable texts (in order of priority):
255
+ 1. fieldPath (column name)
256
+ 2. description
257
+ 3. label
258
+ 4. tag names
259
+ 5. glossary term names
260
+
261
+ Example:
262
+ keywords = ["email", "user"]
263
+ field = {
264
+ "fieldPath": "user_email", # matches both
265
+ "description": "User's email", # matches both
266
+ "tags": ["PII"] # matches neither
267
+ }
268
+ Score = 4 (email in fieldPath + email in desc + user in fieldPath + user in desc)
269
+
270
+ Returns:
271
+ Integer score (0 = no matches, higher = more coverage)
272
+ """
273
+ searchable_texts = [
274
+ field.get("fieldPath", ""),
275
+ field.get("description", ""),
276
+ field.get("label", ""),
277
+ ]
278
+
279
+ # Add tag names
280
+ if tags := field.get("tags"):
281
+ if tag_list := tags.get("tags"):
282
+ searchable_texts.extend(
283
+ [
284
+ (t.get("tag", {}).get("properties") or {}).get("name", "")
285
+ for t in tag_list
286
+ ]
287
+ )
288
+
289
+ # Add glossary term names
290
+ if glossary_terms := field.get("glossaryTerms"):
291
+ if terms_list := glossary_terms.get("terms"):
292
+ searchable_texts.extend(
293
+ [
294
+ (t.get("term", {}).get("properties") or {}).get("name", "")
295
+ for t in terms_list
296
+ ]
297
+ )
298
+
299
+ # Count keyword coverage: +1 for each (keyword, text) pair that matches
300
+ # Note: Substring matching, case-insensitive
301
+ return sum(
302
+ 1
303
+ for kw in keywords_lower
304
+ for text in searchable_texts
305
+ if text and kw in text.lower()
306
+ )
307
+
308
+ # Pre-compute matching count (need all fields for this)
309
+ fields_for_counting = result.get("schemaMetadata", {}).get("fields", [])
310
+ matching_count = sum(
311
+ 1 for field in fields_for_counting if score_field_by_keywords(field) > 0
312
+ )
313
+
314
+ # Define sort function for clean_get_entities_response
315
+ def sort_by_keyword_match(fields: List[dict]) -> Iterator[dict]:
316
+ """Sort fields by keyword match count (descending), then alphabetically."""
317
+ scored_fields = [
318
+ (score_field_by_keywords(field), field) for field in fields
319
+ ]
320
+ scored_fields.sort(key=lambda x: (-x[0], x[1].get("fieldPath", "")))
321
+ return iter(field for _, field in scored_fields)
322
+
323
+ sort_fn = sort_by_keyword_match
324
+
325
+ # Use clean_get_entities_response for consistent processing
326
+ cleaned_entity = clean_get_entities_response(
327
+ result,
328
+ sort_fn=sort_fn,
329
+ offset=offset,
330
+ limit=limit,
331
+ )
332
+
333
+ # Extract the cleaned fields and metadata
334
+ schema_metadata = cleaned_entity.get("schemaMetadata", {})
335
+ cleaned_fields = schema_metadata.get("fields", [])
336
+
337
+ # Calculate how many fields remain after what we returned
338
+ # This accounts for both pagination and token budget constraints
339
+ remaining_count = total_fields - offset - len(cleaned_fields)
340
+
341
+ return {
342
+ "urn": urn,
343
+ "fields": cleaned_fields,
344
+ "totalFields": total_fields,
345
+ "returned": len(cleaned_fields),
346
+ "remainingCount": remaining_count,
347
+ "matchingCount": matching_count,
348
+ "offset": offset,
349
+ }
@@ -0,0 +1,99 @@
1
+ """Get authenticated user information tool for DataHub MCP server."""
2
+
3
+ import logging
4
+ from typing import Any
5
+
6
+ from datahub_agent_context.context import get_graph
7
+ from datahub_agent_context.mcp_tools.base import execute_graphql
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ def get_me() -> dict[str, Any]:
13
+ """Get information about the currently authenticated user.
14
+
15
+ This tool fetches detailed information about the authenticated user including:
16
+ - User profile information (username, email, full name, etc.)
17
+ - Platform privileges (what the user can do in DataHub)
18
+ - Group memberships
19
+ - User settings and preferences
20
+
21
+ Returns:
22
+ Dictionary with:
23
+ - success: Boolean indicating if the operation succeeded
24
+ - data: User information including corpUser and platformPrivileges
25
+ - message: Success or error message
26
+
27
+ Example:
28
+ from datahub_agent_context.context import DataHubContext
29
+
30
+ with DataHubContext(client.graph):
31
+ result = get_me()
32
+ """
33
+ graph = get_graph()
34
+ # GraphQL query to get authenticated user information
35
+ query = """
36
+ query getMe {
37
+ me {
38
+ corpUser {
39
+ type
40
+ urn
41
+ username
42
+ info {
43
+ active
44
+ displayName
45
+ title
46
+ firstName
47
+ lastName
48
+ fullName
49
+ email
50
+ }
51
+ editableProperties {
52
+ displayName
53
+ title
54
+ pictureLink
55
+ teams
56
+ skills
57
+ }
58
+ groups: relationships(
59
+ input: { types: ["IsMemberOfGroup", "IsMemberOfNativeGroup"], direction: OUTGOING, start: 0, count: 50 }
60
+ ) {
61
+ relationships {
62
+ entity {
63
+ ... on CorpGroup {
64
+ urn
65
+ name
66
+ properties {
67
+ displayName
68
+ }
69
+ }
70
+ }
71
+ }
72
+ }
73
+ }
74
+ }
75
+ }
76
+ """
77
+
78
+ try:
79
+ result = execute_graphql(
80
+ graph,
81
+ query=query,
82
+ variables={},
83
+ operation_name="getMe",
84
+ )
85
+
86
+ me_data = result.get("me")
87
+ if me_data:
88
+ return {
89
+ "success": True,
90
+ "data": me_data,
91
+ "message": "Successfully retrieved authenticated user information",
92
+ }
93
+ else:
94
+ raise RuntimeError("No authenticated user found")
95
+
96
+ except Exception as e:
97
+ if isinstance(e, RuntimeError):
98
+ raise
99
+ raise RuntimeError(f"Error retrieving user information: {str(e)}") from e