datahub-agent-context 1.3.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. datahub_agent_context/__init__.py +25 -0
  2. datahub_agent_context/_version.py +16 -0
  3. datahub_agent_context/context.py +97 -0
  4. datahub_agent_context/langchain_tools/__init__.py +8 -0
  5. datahub_agent_context/langchain_tools/builder.py +127 -0
  6. datahub_agent_context/mcp_tools/__init__.py +46 -0
  7. datahub_agent_context/mcp_tools/_token_estimator.py +71 -0
  8. datahub_agent_context/mcp_tools/base.py +325 -0
  9. datahub_agent_context/mcp_tools/descriptions.py +299 -0
  10. datahub_agent_context/mcp_tools/documents.py +473 -0
  11. datahub_agent_context/mcp_tools/domains.py +246 -0
  12. datahub_agent_context/mcp_tools/entities.py +349 -0
  13. datahub_agent_context/mcp_tools/get_me.py +99 -0
  14. datahub_agent_context/mcp_tools/gql/__init__.py +13 -0
  15. datahub_agent_context/mcp_tools/gql/document_search.gql +114 -0
  16. datahub_agent_context/mcp_tools/gql/document_semantic_search.gql +111 -0
  17. datahub_agent_context/mcp_tools/gql/entity_details.gql +1682 -0
  18. datahub_agent_context/mcp_tools/gql/queries.gql +51 -0
  19. datahub_agent_context/mcp_tools/gql/query_entity.gql +37 -0
  20. datahub_agent_context/mcp_tools/gql/read_documents.gql +16 -0
  21. datahub_agent_context/mcp_tools/gql/search.gql +242 -0
  22. datahub_agent_context/mcp_tools/helpers.py +448 -0
  23. datahub_agent_context/mcp_tools/lineage.py +698 -0
  24. datahub_agent_context/mcp_tools/owners.py +318 -0
  25. datahub_agent_context/mcp_tools/queries.py +191 -0
  26. datahub_agent_context/mcp_tools/search.py +239 -0
  27. datahub_agent_context/mcp_tools/structured_properties.py +447 -0
  28. datahub_agent_context/mcp_tools/tags.py +296 -0
  29. datahub_agent_context/mcp_tools/terms.py +295 -0
  30. datahub_agent_context/py.typed +2 -0
  31. datahub_agent_context-1.3.1.8.dist-info/METADATA +233 -0
  32. datahub_agent_context-1.3.1.8.dist-info/RECORD +34 -0
  33. datahub_agent_context-1.3.1.8.dist-info/WHEEL +5 -0
  34. datahub_agent_context-1.3.1.8.dist-info/top_level.txt +1 -0
@@ -0,0 +1,239 @@
1
+ """Search tools for DataHub."""
2
+
3
+ import json
4
+ import logging
5
+ import pathlib
6
+ from typing import Any, Dict, Literal, Optional
7
+
8
+ from datahub.sdk.search_client import compile_filters
9
+ from datahub.sdk.search_filters import Filter, load_filters
10
+ from datahub_agent_context.context import get_graph
11
+ from datahub_agent_context.mcp_tools.base import (
12
+ clean_gql_response,
13
+ execute_graphql,
14
+ fetch_global_default_view,
15
+ )
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+ # Load GraphQL queries
20
+ _gql_dir = pathlib.Path(__file__).parent / "gql"
21
+ search_gql = (_gql_dir / "search.gql").read_text()
22
+
23
+
24
+ def _convert_custom_filter_format(filters_obj: Any) -> Any:
25
+ """
26
+ Convert chatbot's intuitive {"custom": {...}} format to the format expected by _CustomCondition.
27
+
28
+ Transforms:
29
+ {"custom": {"field": "urn", "condition": "EQUAL", "values": [...]}}
30
+
31
+ Into:
32
+ {"field": "urn", "condition": "EQUAL", "values": [...]}
33
+
34
+ This allows the discriminator to correctly identify it as _custom.
35
+ """
36
+ if isinstance(filters_obj, dict):
37
+ # Check if this is a "custom" or "custom_condition" wrapper that needs unwrapping
38
+ if len(filters_obj) == 1 and (
39
+ "custom" in filters_obj or "custom_condition" in filters_obj
40
+ ):
41
+ wrapper_key = "custom" if "custom" in filters_obj else "custom_condition"
42
+ custom_content = filters_obj[wrapper_key]
43
+ # Ensure it has the expected structure for _CustomCondition
44
+ if isinstance(custom_content, dict) and "field" in custom_content:
45
+ return custom_content
46
+
47
+ # Recursively process nested filters (for "and", "or", etc.)
48
+ result = {}
49
+ for key, value in filters_obj.items():
50
+ if isinstance(value, (list, dict)):
51
+ result[key] = _convert_custom_filter_format(value)
52
+ else:
53
+ result[key] = value
54
+ return result
55
+ elif isinstance(filters_obj, list):
56
+ # Process list of filters
57
+ return [_convert_custom_filter_format(item) for item in filters_obj]
58
+ else:
59
+ # Return primitive values unchanged
60
+ return filters_obj
61
+
62
+
63
+ def search(
64
+ query: str = "*",
65
+ filters: Optional[Filter | str] = None,
66
+ num_results: int = 10,
67
+ sort_by: Optional[str] = None,
68
+ sort_order: Optional[Literal["asc", "desc"]] = "desc",
69
+ offset: int = 0,
70
+ ) -> dict:
71
+ """Search across DataHub entities using structured full-text search.
72
+ Results are ordered by relevance and importance - examine top results first.
73
+
74
+ SEARCH SYNTAX:
75
+ - Structured full-text search - **always start queries with /q**
76
+ - **Recommended: Use + operator for AND** (handles punctuation better than quotes)
77
+ - Supports full boolean logic: AND (default), OR, NOT, parentheses, field searches
78
+ - Examples:
79
+ • /q user+transaction → requires both terms (better for field names with _ or punctuation)
80
+ • /q point+sale+app → requires all terms (works with point_of_sale_app_usage)
81
+ • /q wizard OR pet → entities containing either term
82
+ • /q revenue* → wildcard matching (revenue_2023, revenue_2024, revenue_monthly, etc.)
83
+ • /q tag:PII → search by tag name
84
+ • /q "exact table name" → exact phrase matching (use sparingly)
85
+ • /q (sales OR revenue) AND quarterly → complex boolean combinations
86
+ - Fast and precise for exact matching, technical terms, and complex queries
87
+ - Best for: entity names, identifiers, column names, or any search needing boolean logic
88
+
89
+ PAGINATION:
90
+ - num_results: Number of results to return per page (max: 50)
91
+ - offset: Starting position in results (default: 0)
92
+ - Examples:
93
+ • First page: offset=0, num_results=10
94
+ • Second page: offset=10, num_results=10
95
+ • Third page: offset=20, num_results=10
96
+
97
+ FACET EXPLORATION - Discover metadata without returning results:
98
+ - Set num_results=0 to get ONLY facets (no search results)
99
+ - Facets show ALL tags, glossaryTerms, platforms, domains used in the catalog
100
+ - Example: search(query="*", filters={"entity_type": ["DATASET"]}, num_results=0)
101
+ → Returns facets showing all tags/glossaryTerms applied to datasets
102
+ - Use this to discover what metadata exists before doing filtered searches
103
+
104
+ TYPICAL WORKFLOW:
105
+ 1. Facet exploration: search(query="*", filters={"entity_type": ["DATASET"]}, num_results=0)
106
+ → Examine tags/glossaryTerms facets to see what metadata exists
107
+ 2. Filtered search: search(query="*", filters={"tag": ["urn:li:tag:pii"]}, num_results=30)
108
+ → Get entities with specific tag using URN from step 1
109
+ 3. Get details: Use get_entities() on specific results
110
+
111
+ Here are some example filters:
112
+ - All Looker assets
113
+ ```
114
+ {"platform": ["looker"]}
115
+ ```
116
+ - Production environment warehouse assets
117
+ ```
118
+ {
119
+ "and": [
120
+ {"env": ["PROD"]},
121
+ {"platform": ["snowflake", "bigquery", "redshift"]}
122
+ ]
123
+ }
124
+ ```
125
+ - Filter by domain (MUST use full URN format)
126
+ ```
127
+ {"domain": ["urn:li:domain:marketing"]}
128
+ {"domain": ["urn:li:domain:9f8e7d6c-5b4a-3928-1765-432109876543", "urn:li:domain:7c6b5a49-3827-1654-9032-8f7e6d5c4b3a"]}
129
+ ```
130
+ IMPORTANT: Domain filters require full URN format starting with "urn:li:domain:",
131
+ NOT short names like "marketing" or "customer". Domain URNs can be readable names
132
+ or GUIDs. Always search with {"entity_type": ["domain"]}
133
+ to find valid domain URNs first, then use the exact URN from the results.
134
+
135
+ SUPPORTED FILTER TYPES (only these will work):
136
+ - entity_type: ["dataset"], ["dashboard", "chart"], ["corp_user"], ["corp_group"]
137
+ - entity_subtype: ["Table"], ["View", "Model"]
138
+ - platform: ["snowflake"], ["looker", "tableau"]
139
+ - domain: ["urn:li:domain:marketing"] (full URN required)
140
+ - container: ["urn:li:container:..."] (full URN required)
141
+ - tag: ["urn:li:tag:PII"] (full tag URN required)
142
+ - glossary_term: ["urn:li:glossaryTerm:uuid"] (full term URN required)
143
+ - owner: ["urn:li:corpuser:alice", "urn:li:corpGroup:marketing"] (full user or group URN required)
144
+ - custom: {"field": "fieldName", "condition": "EQUAL", "values": [...]}
145
+ - status: ["NOT_SOFT_DELETED"] (for non-deleted entities)
146
+ - env: ["PROD"], ["DEV", "STAGING"] (Should not use unless explicitly requested)
147
+ - and: [filter1, filter2] (combines multiple filters)
148
+ - or: [filter1, filter2] (matches any filter)
149
+ - not: {"entity_type": ["dataset"]} (excludes matches)
150
+
151
+ CRITICAL: Use only ONE discriminator key per filter object. Never mix
152
+ entity_type with custom, domain, etc. at the same level. Use "and" or "or" to combine.
153
+
154
+ SEARCH STRATEGY EXAMPLES:
155
+ - /q customer+behavior → finds tables with both terms (works with customer_behavior fields)
156
+ - /q customer OR user → finds tables with either term
157
+ - /q (financial OR revenue) AND metrics → complex boolean logic
158
+
159
+ SORTING - Order results by specific fields:
160
+ - sort_by: Field name to sort by (optional)
161
+ - sort_order: "desc" (default) or "asc"
162
+
163
+ Note: If sort_by is not provided, search results use default ranking by relevance and
164
+ importance. When using sort_by, results are strictly ordered by that field.
165
+
166
+ Args:
167
+ query: Search query string (use /q prefix for structured queries)
168
+ filters: Optional filter object or JSON string
169
+ num_results: Number of results to return (max 50)
170
+ sort_by: Optional field name to sort by
171
+ sort_order: Sort order ("asc" or "desc")
172
+ offset: Starting position for pagination
173
+
174
+ Returns:
175
+ Dictionary with search results, facets, and metadata
176
+
177
+ Example:
178
+ from datahub_agent_context.context import DataHubContext
179
+
180
+ with DataHubContext(client.graph):
181
+ result = search(query="/q users", filters={"entity_type": ["dataset"]})
182
+ """
183
+ graph = get_graph()
184
+ # Cap num_results at 50 to prevent excessive requests
185
+ num_results = min(num_results, 50)
186
+
187
+ # Handle stringified JSON filters or dict filters
188
+ if isinstance(filters, str):
189
+ # Parse JSON first to allow preprocessing
190
+ filters_dict = json.loads(filters)
191
+
192
+ # Convert "custom" wrapper to direct _custom format for compatibility
193
+ filters_dict = _convert_custom_filter_format(filters_dict)
194
+
195
+ filters = load_filters(filters_dict)
196
+ elif isinstance(filters, dict):
197
+ # Convert dict to Filter object
198
+ filters_dict = _convert_custom_filter_format(filters)
199
+ filters = load_filters(filters_dict)
200
+
201
+ types, compiled_filters = compile_filters(filters)
202
+
203
+ # Fetch and apply default view (returns None if disabled or not configured)
204
+ view_urn = fetch_global_default_view(graph)
205
+ if view_urn:
206
+ logger.debug(f"Applying default view: {view_urn}")
207
+ else:
208
+ logger.debug("No default view to apply")
209
+
210
+ variables: Dict[str, Any] = {
211
+ "query": query,
212
+ "types": types,
213
+ "orFilters": compiled_filters,
214
+ "count": max(num_results, 1), # 0 is not a valid value for count.
215
+ "start": offset,
216
+ "viewUrn": view_urn, # Will be None if disabled or not set
217
+ }
218
+
219
+ # Add sorting if requested
220
+ if sort_by is not None:
221
+ sort_order_enum = "ASCENDING" if sort_order == "asc" else "DESCENDING"
222
+ variables["sortInput"] = {
223
+ "sortCriteria": [{"field": sort_by, "sortOrder": sort_order_enum}]
224
+ }
225
+
226
+ # Use keyword search
227
+ response = execute_graphql(
228
+ graph,
229
+ query=search_gql,
230
+ variables=variables,
231
+ operation_name="search",
232
+ )["searchAcrossEntities"]
233
+
234
+ if num_results == 0 and isinstance(response, dict):
235
+ # Hack to support num_results=0 without support for it in the backend.
236
+ response.pop("searchResults", None)
237
+ response.pop("count", None)
238
+
239
+ return clean_gql_response(response)
@@ -0,0 +1,447 @@
1
+ """Structured property management tools for DataHub MCP server."""
2
+
3
+ import logging
4
+ from datetime import datetime
5
+ from typing import Dict, List, Union
6
+
7
+ from datahub.utilities.urns._urn_base import Urn
8
+ from datahub_agent_context.context import get_graph
9
+ from datahub_agent_context.mcp_tools.base import execute_graphql
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ def _validate_and_fetch_structured_property(property_urn: str) -> Dict:
15
+ """
16
+ Validate that the structured property exists and fetch its definition.
17
+
18
+ Returns:
19
+ Dictionary with property definition including valueType and entityTypes
20
+
21
+ Raises:
22
+ ValueError: If the property URN does not exist or is invalid
23
+ """
24
+ graph = get_graph()
25
+ query = """
26
+ query getStructuredProperty($urn: String!) {
27
+ entity(urn: $urn) {
28
+ urn
29
+ type
30
+ ... on StructuredPropertyEntity {
31
+ definition {
32
+ qualifiedName
33
+ entityTypes {
34
+ urn
35
+ type
36
+ info {
37
+ type
38
+ }
39
+ }
40
+ valueType {
41
+ urn
42
+ info {
43
+ qualifiedName
44
+ }
45
+ }
46
+ cardinality
47
+ }
48
+ }
49
+ }
50
+ }
51
+ """
52
+
53
+ try:
54
+ result = execute_graphql(
55
+ graph,
56
+ query=query,
57
+ variables={"urn": property_urn},
58
+ operation_name="getStructuredProperty",
59
+ )
60
+
61
+ entity = result.get("entity")
62
+
63
+ if entity is None:
64
+ raise ValueError(
65
+ f"Structured property URN does not exist in DataHub: {property_urn}. "
66
+ f"Please use the search tool to find existing structured properties, "
67
+ f"or create the property first before assigning it."
68
+ )
69
+
70
+ if entity.get("type") != "STRUCTURED_PROPERTY":
71
+ raise ValueError(
72
+ f"The URN is not a structured property entity: {property_urn} (type: {entity.get('type')})"
73
+ )
74
+
75
+ return entity.get("definition", {})
76
+
77
+ except Exception as e:
78
+ if isinstance(e, ValueError):
79
+ raise
80
+ raise ValueError(f"Failed to validate structured property URN: {str(e)}") from e
81
+
82
+
83
+ def _validate_property_value(
84
+ property_definition: Dict, value: Union[str, float, int]
85
+ ) -> Dict:
86
+ """
87
+ Validate and convert a property value to the appropriate GraphQL format.
88
+
89
+ Supports 5 data types:
90
+ - datahub.string: Plain text strings
91
+ - datahub.number: Numeric values (int, float, double, long)
92
+ - datahub.urn: DataHub URN references
93
+ - datahub.date: ISO 8601 date strings
94
+ - datahub.rich_text: Rich text/markdown content
95
+
96
+ Args:
97
+ property_definition: The property definition containing valueType info
98
+ value: The value to validate and convert
99
+
100
+ Returns:
101
+ Dictionary with either stringValue or numberValue key
102
+
103
+ Raises:
104
+ ValueError: If the value type doesn't match the property's valueType
105
+ """
106
+ value_type_info = property_definition.get("valueType", {}).get("info", {})
107
+ qualified_name = value_type_info.get("qualifiedName", "").lower()
108
+
109
+ # Determine the data type
110
+ is_numeric_type = any(
111
+ numeric_type in qualified_name
112
+ for numeric_type in ["number", "int", "float", "double", "long"]
113
+ )
114
+ is_urn_type = "urn" in qualified_name and "datahub.urn" in qualified_name
115
+ is_date_type = "date" in qualified_name
116
+ is_rich_text_type = "rich_text" in qualified_name or "richtext" in qualified_name
117
+
118
+ if is_numeric_type:
119
+ # Value should be numeric
120
+ if isinstance(value, (int, float)):
121
+ return {"numberValue": float(value)}
122
+ elif isinstance(value, str):
123
+ try:
124
+ return {"numberValue": float(value)}
125
+ except ValueError as e:
126
+ raise ValueError(
127
+ f"Property expects numeric type ({qualified_name}), but got non-numeric string: {value}"
128
+ ) from e
129
+ else:
130
+ raise ValueError(
131
+ f"Property expects numeric type ({qualified_name}), got {type(value).__name__}"
132
+ )
133
+
134
+ elif is_urn_type:
135
+ # Value should be a valid DataHub URN
136
+ if not isinstance(value, str):
137
+ value = str(value)
138
+
139
+ try:
140
+ # Validate URN format
141
+ Urn.from_string(value)
142
+ return {"stringValue": value}
143
+ except Exception as e:
144
+ raise ValueError(
145
+ f"Property expects URN type ({qualified_name}), but got invalid URN: {value}. "
146
+ f"URNs must be in format 'urn:li:entityType:...' Error: {str(e)}"
147
+ ) from e
148
+
149
+ elif is_date_type:
150
+ # Value should be an ISO 8601 date string
151
+ if not isinstance(value, str):
152
+ value = str(value)
153
+
154
+ # Try to parse as ISO 8601 date
155
+ try:
156
+ # Support various ISO 8601 formats
157
+ # Examples: 2024-12-22, 2024-12-22T10:30:00, 2024-12-22T10:30:00Z, 2024-12-22T10:30:00+00:00
158
+ datetime.fromisoformat(value.replace("Z", "+00:00"))
159
+ return {"stringValue": value}
160
+ except ValueError as e:
161
+ raise ValueError(
162
+ f"Property expects date type ({qualified_name}), but got invalid date format: {value}. "
163
+ f"Dates must be in ISO 8601 format (e.g., '2024-12-22', '2024-12-22T10:30:00Z')"
164
+ ) from e
165
+
166
+ elif is_rich_text_type:
167
+ # Value should be string (can contain markdown/HTML)
168
+ if isinstance(value, str):
169
+ return {"stringValue": value}
170
+ else:
171
+ # Convert to string for non-string types
172
+ return {"stringValue": str(value)}
173
+
174
+ else:
175
+ # Default to string type (datahub.string or unknown types)
176
+ if isinstance(value, str):
177
+ return {"stringValue": value}
178
+ else:
179
+ # Convert to string for non-string types
180
+ return {"stringValue": str(value)}
181
+
182
+ raise ValueError(
183
+ f"Value type mismatch: property expects {qualified_name}, got {type(value).__name__}"
184
+ )
185
+
186
+
187
+ def add_structured_properties(
188
+ property_values: Dict[str, List[Union[str, float, int]]],
189
+ entity_urns: List[str],
190
+ ) -> dict:
191
+ """Add structured properties with values to multiple DataHub entities.
192
+
193
+ This tool allows you to assign structured properties to multiple entities in a single operation.
194
+ Structured properties are schema-defined metadata fields that can store typed values (strings, numbers, etc.).
195
+
196
+ Args:
197
+ property_values: Dictionary mapping structured property URNs to lists of values.
198
+ Example: {
199
+ "urn:li:structuredProperty:io.acryl.privacy.retentionTime": ["90"],
200
+ "urn:li:structuredProperty:io.acryl.common.businessCriticality": ["HIGH"]
201
+ }
202
+ entity_urns: List of entity URNs to assign properties to (e.g., dataset URNs, dashboard URNs)
203
+
204
+ Examples:
205
+ # Add retention time and criticality to datasets
206
+ add_structured_properties(
207
+ property_values={
208
+ "urn:li:structuredProperty:io.acryl.privacy.retentionTime": ["90"],
209
+ "urn:li:structuredProperty:io.acryl.common.businessCriticality": ["HIGH"]
210
+ },
211
+ entity_urns=[
212
+ "urn:li:dataset:(urn:li:dataPlatform:snowflake,db.schema.users,PROD)",
213
+ "urn:li:dataset:(urn:li:dataPlatform:snowflake,db.schema.customers,PROD)"
214
+ ]
215
+ )
216
+
217
+ # Add numeric property
218
+ add_structured_properties(
219
+ property_values={
220
+ "urn:li:structuredProperty:io.acryl.dataQuality.scoreThreshold": [0.95]
221
+ },
222
+ entity_urns=[
223
+ "urn:li:dataset:(urn:li:dataPlatform:snowflake,db.schema.verified_data,PROD)"
224
+ ]
225
+ )
226
+
227
+ # Add multiple values for a multi-valued property
228
+ add_structured_properties(
229
+ property_values={
230
+ "urn:li:structuredProperty:io.acryl.common.dataClassification": ["PII", "SENSITIVE"]
231
+ },
232
+ entity_urns=[
233
+ "urn:li:dataset:(urn:li:dataPlatform:snowflake,db.schema.users,PROD)"
234
+ ]
235
+ )
236
+
237
+ Example:
238
+ from datahub_agent_context.context import DataHubContext
239
+
240
+ with DataHubContext(client.graph):
241
+ result = add_structured_properties(
242
+ property_values={"urn:li:structuredProperty:...": ["value"]},
243
+ entity_urns=["urn:li:dataset:(...)"]
244
+ )
245
+ """
246
+ graph = get_graph()
247
+ if not property_values:
248
+ raise ValueError("property_values cannot be empty")
249
+ if not entity_urns:
250
+ raise ValueError("entity_urns cannot be empty")
251
+
252
+ # Validate all structured properties and fetch their definitions
253
+ property_definitions = {}
254
+ for property_urn in property_values:
255
+ property_definitions[property_urn] = _validate_and_fetch_structured_property(
256
+ property_urn
257
+ )
258
+
259
+ # Build structured property input params with type validation
260
+ structured_property_params = []
261
+ for property_urn, values in property_values.items():
262
+ property_def = property_definitions[property_urn]
263
+
264
+ # Validate and convert each value
265
+ converted_values = []
266
+ for value in values:
267
+ try:
268
+ converted_value = _validate_property_value(property_def, value)
269
+ converted_values.append(converted_value)
270
+ except ValueError as e:
271
+ raise ValueError(
272
+ f"Value validation failed for {property_urn}: {str(e)}"
273
+ ) from e
274
+
275
+ structured_property_params.append(
276
+ {"structuredPropertyUrn": property_urn, "values": converted_values}
277
+ )
278
+
279
+ # Execute upsert for each entity
280
+ mutation = """
281
+ mutation upsertStructuredProperties($input: UpsertStructuredPropertiesInput!) {
282
+ upsertStructuredProperties(input: $input) {
283
+ properties {
284
+ structuredProperty {
285
+ urn
286
+ }
287
+ }
288
+ }
289
+ }
290
+ """
291
+
292
+ success_count = 0
293
+ failed_urns = []
294
+ error_messages = []
295
+
296
+ for entity_urn in entity_urns:
297
+ variables = {
298
+ "input": {
299
+ "assetUrn": entity_urn,
300
+ "structuredPropertyInputParams": structured_property_params,
301
+ }
302
+ }
303
+
304
+ try:
305
+ result = execute_graphql(
306
+ graph,
307
+ query=mutation,
308
+ variables=variables,
309
+ operation_name="upsertStructuredProperties",
310
+ )
311
+
312
+ if result.get("upsertStructuredProperties"):
313
+ success_count += 1
314
+ else:
315
+ failed_urns.append(entity_urn)
316
+ error_messages.append(
317
+ f"{entity_urn}: operation returned false or empty result"
318
+ )
319
+
320
+ except Exception as e:
321
+ failed_urns.append(entity_urn)
322
+ error_messages.append(f"{entity_urn}: {str(e)}")
323
+
324
+ if failed_urns:
325
+ error_details = "; ".join(error_messages[:3])
326
+ if len(error_messages) > 3:
327
+ error_details += f"; and {len(error_messages) - 3} more error(s)"
328
+ raise RuntimeError(
329
+ f"Failed to add structured properties to {len(failed_urns)} entit(ies). Errors: {error_details}"
330
+ )
331
+
332
+ return {
333
+ "success": True,
334
+ "message": f"Successfully added {len(property_values)} structured propert(ies) to {success_count} entit(ies)",
335
+ }
336
+
337
+
338
+ def remove_structured_properties(
339
+ property_urns: List[str],
340
+ entity_urns: List[str],
341
+ ) -> dict:
342
+ """Remove structured properties from multiple DataHub entities.
343
+
344
+ This tool allows you to remove structured property assignments from multiple entities in a single operation.
345
+
346
+ Args:
347
+ property_urns: List of structured property URNs to remove
348
+ Example: ["urn:li:structuredProperty:io.acryl.privacy.retentionTime"]
349
+ entity_urns: List of entity URNs to remove properties from (e.g., dataset URNs, dashboard URNs)
350
+
351
+ Examples:
352
+ # Remove retention time property from datasets
353
+ remove_structured_properties(
354
+ property_urns=["urn:li:structuredProperty:io.acryl.privacy.retentionTime"],
355
+ entity_urns=[
356
+ "urn:li:dataset:(urn:li:dataPlatform:snowflake,db.schema.old_data,PROD)",
357
+ "urn:li:dataset:(urn:li:dataPlatform:snowflake,db.schema.archived,PROD)"
358
+ ]
359
+ )
360
+
361
+ # Remove multiple properties at once
362
+ remove_structured_properties(
363
+ property_urns=[
364
+ "urn:li:structuredProperty:io.acryl.privacy.retentionTime",
365
+ "urn:li:structuredProperty:io.acryl.common.businessCriticality"
366
+ ],
367
+ entity_urns=[
368
+ "urn:li:dataset:(urn:li:dataPlatform:snowflake,db.schema.temp_table,PROD)"
369
+ ]
370
+ )
371
+
372
+ Example:
373
+ from datahub_agent_context.context import DataHubContext
374
+
375
+ with DataHubContext(client.graph):
376
+ result = remove_structured_properties(
377
+ property_urns=["urn:li:structuredProperty:..."],
378
+ entity_urns=["urn:li:dataset:(...)"]
379
+ )
380
+ """
381
+ graph = get_graph()
382
+ if not property_urns:
383
+ raise ValueError("property_urns cannot be empty")
384
+ if not entity_urns:
385
+ raise ValueError("entity_urns cannot be empty")
386
+
387
+ # Validate all structured properties exist
388
+ for property_urn in property_urns:
389
+ _validate_and_fetch_structured_property(property_urn)
390
+
391
+ # Execute remove for each entity
392
+ mutation = """
393
+ mutation removeStructuredProperties($input: RemoveStructuredPropertiesInput!) {
394
+ removeStructuredProperties(input: $input) {
395
+ properties {
396
+ structuredProperty {
397
+ urn
398
+ }
399
+ }
400
+ }
401
+ }
402
+ """
403
+
404
+ success_count = 0
405
+ failed_urns = []
406
+ error_messages = []
407
+
408
+ for entity_urn in entity_urns:
409
+ variables = {
410
+ "input": {
411
+ "assetUrn": entity_urn,
412
+ "structuredPropertyUrns": property_urns,
413
+ }
414
+ }
415
+
416
+ try:
417
+ result = execute_graphql(
418
+ graph,
419
+ query=mutation,
420
+ variables=variables,
421
+ operation_name="removeStructuredProperties",
422
+ )
423
+
424
+ if result.get("removeStructuredProperties"):
425
+ success_count += 1
426
+ else:
427
+ failed_urns.append(entity_urn)
428
+ error_messages.append(
429
+ f"{entity_urn}: operation returned false or empty result"
430
+ )
431
+
432
+ except Exception as e:
433
+ failed_urns.append(entity_urn)
434
+ error_messages.append(f"{entity_urn}: {str(e)}")
435
+
436
+ if failed_urns:
437
+ error_details = "; ".join(error_messages[:3])
438
+ if len(error_messages) > 3:
439
+ error_details += f"; and {len(error_messages) - 3} more error(s)"
440
+ raise RuntimeError(
441
+ f"Failed to remove structured properties from {len(failed_urns)} entit(ies). Errors: {error_details}"
442
+ )
443
+
444
+ return {
445
+ "success": True,
446
+ "message": f"Successfully removed {len(property_urns)} structured propert(ies) from {success_count} entit(ies)",
447
+ }