datahub-agent-context 1.3.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. datahub_agent_context/__init__.py +25 -0
  2. datahub_agent_context/_version.py +16 -0
  3. datahub_agent_context/context.py +97 -0
  4. datahub_agent_context/langchain_tools/__init__.py +8 -0
  5. datahub_agent_context/langchain_tools/builder.py +127 -0
  6. datahub_agent_context/mcp_tools/__init__.py +46 -0
  7. datahub_agent_context/mcp_tools/_token_estimator.py +71 -0
  8. datahub_agent_context/mcp_tools/base.py +325 -0
  9. datahub_agent_context/mcp_tools/descriptions.py +299 -0
  10. datahub_agent_context/mcp_tools/documents.py +473 -0
  11. datahub_agent_context/mcp_tools/domains.py +246 -0
  12. datahub_agent_context/mcp_tools/entities.py +349 -0
  13. datahub_agent_context/mcp_tools/get_me.py +99 -0
  14. datahub_agent_context/mcp_tools/gql/__init__.py +13 -0
  15. datahub_agent_context/mcp_tools/gql/document_search.gql +114 -0
  16. datahub_agent_context/mcp_tools/gql/document_semantic_search.gql +111 -0
  17. datahub_agent_context/mcp_tools/gql/entity_details.gql +1682 -0
  18. datahub_agent_context/mcp_tools/gql/queries.gql +51 -0
  19. datahub_agent_context/mcp_tools/gql/query_entity.gql +37 -0
  20. datahub_agent_context/mcp_tools/gql/read_documents.gql +16 -0
  21. datahub_agent_context/mcp_tools/gql/search.gql +242 -0
  22. datahub_agent_context/mcp_tools/helpers.py +448 -0
  23. datahub_agent_context/mcp_tools/lineage.py +698 -0
  24. datahub_agent_context/mcp_tools/owners.py +318 -0
  25. datahub_agent_context/mcp_tools/queries.py +191 -0
  26. datahub_agent_context/mcp_tools/search.py +239 -0
  27. datahub_agent_context/mcp_tools/structured_properties.py +447 -0
  28. datahub_agent_context/mcp_tools/tags.py +296 -0
  29. datahub_agent_context/mcp_tools/terms.py +295 -0
  30. datahub_agent_context/py.typed +2 -0
  31. datahub_agent_context-1.3.1.8.dist-info/METADATA +233 -0
  32. datahub_agent_context-1.3.1.8.dist-info/RECORD +34 -0
  33. datahub_agent_context-1.3.1.8.dist-info/WHEEL +5 -0
  34. datahub_agent_context-1.3.1.8.dist-info/top_level.txt +1 -0
@@ -0,0 +1,296 @@
1
+ """Tag management tools for DataHub MCP server."""
2
+
3
+ import logging
4
+ from typing import List, Literal, Optional
5
+
6
+ from datahub_agent_context.context import get_graph
7
+ from datahub_agent_context.mcp_tools.base import execute_graphql
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ def _validate_tag_urns(tag_urns: List[str]) -> None:
13
+ """
14
+ Validate that all tag URNs exist in DataHub.
15
+
16
+ Raises:
17
+ ValueError: If any tag URN does not exist or is invalid
18
+ """
19
+ graph = get_graph()
20
+ query = """
21
+ query getTags($urns: [String!]!) {
22
+ entities(urns: $urns) {
23
+ urn
24
+ type
25
+ ... on Tag {
26
+ properties {
27
+ name
28
+ }
29
+ }
30
+ }
31
+ }
32
+ """
33
+
34
+ try:
35
+ result = execute_graphql(
36
+ graph,
37
+ query=query,
38
+ variables={"urns": tag_urns},
39
+ operation_name="getTags",
40
+ )
41
+
42
+ entities = result.get("entities", [])
43
+
44
+ # Build a map of found URNs
45
+ found_urns = {entity["urn"] for entity in entities if entity is not None}
46
+
47
+ # Check for missing or invalid tags
48
+ missing_urns = [urn for urn in tag_urns if urn not in found_urns]
49
+
50
+ if missing_urns:
51
+ raise ValueError(
52
+ f"The following tag URNs do not exist in DataHub: {', '.join(missing_urns)}. "
53
+ f"Please use the search tool with entity_type filter to find existing tags, "
54
+ f"or create the tags first before assigning them."
55
+ )
56
+
57
+ # Verify all returned entities are actually Tags
58
+ non_tag_entities = [
59
+ entity["urn"]
60
+ for entity in entities
61
+ if entity and entity.get("type") != "TAG"
62
+ ]
63
+ if non_tag_entities:
64
+ raise ValueError(
65
+ f"The following URNs are not tag entities: {', '.join(non_tag_entities)}"
66
+ )
67
+
68
+ except Exception as e:
69
+ if isinstance(e, ValueError):
70
+ raise
71
+ raise ValueError(f"Failed to validate tag URNs: {str(e)}") from e
72
+
73
+
74
+ def _batch_modify_tags(
75
+ tag_urns: List[str],
76
+ entity_urns: List[str],
77
+ column_paths: Optional[List[Optional[str]]],
78
+ operation: Literal["add", "remove"],
79
+ ) -> dict:
80
+ """
81
+ Internal helper for batch tag operations (add/remove).
82
+
83
+ Validates inputs, constructs GraphQL mutation, and executes the operation.
84
+ """
85
+ graph = get_graph()
86
+ # Validate inputs
87
+ if not tag_urns:
88
+ raise ValueError("tag_urns cannot be empty")
89
+ if not entity_urns:
90
+ raise ValueError("entity_urns cannot be empty")
91
+
92
+ # Validate that all tag URNs exist
93
+ _validate_tag_urns(tag_urns)
94
+
95
+ # Handle column_paths - if not provided, create list of Nones
96
+ if column_paths is None:
97
+ column_paths = [None] * len(entity_urns)
98
+ elif len(column_paths) != len(entity_urns):
99
+ raise ValueError(
100
+ f"column_paths length ({len(column_paths)}) must match entity_urns length ({len(entity_urns)})"
101
+ )
102
+
103
+ # Build the resources list for GraphQL mutation
104
+ resources = []
105
+ for resource_urn, column_path in zip(entity_urns, column_paths, strict=True):
106
+ resource_input = {"resourceUrn": resource_urn}
107
+
108
+ # Add subresource fields if provided (for column-level tagging)
109
+ if column_path:
110
+ resource_input["subResource"] = column_path
111
+ resource_input["subResourceType"] = "DATASET_FIELD"
112
+
113
+ resources.append(resource_input)
114
+
115
+ # Determine mutation and operation name based on operation type
116
+ if operation == "add":
117
+ mutation = """
118
+ mutation batchAddTags($input: BatchAddTagsInput!) {
119
+ batchAddTags(input: $input)
120
+ }
121
+ """
122
+ operation_name = "batchAddTags"
123
+ success_verb = "added"
124
+ failure_verb = "add"
125
+ else: # remove
126
+ mutation = """
127
+ mutation batchRemoveTags($input: BatchRemoveTagsInput!) {
128
+ batchRemoveTags(input: $input)
129
+ }
130
+ """
131
+ operation_name = "batchRemoveTags"
132
+ success_verb = "removed"
133
+ failure_verb = "remove"
134
+
135
+ variables = {"input": {"tagUrns": tag_urns, "resources": resources}}
136
+
137
+ try:
138
+ result = execute_graphql(
139
+ graph,
140
+ query=mutation,
141
+ variables=variables,
142
+ operation_name=operation_name,
143
+ )
144
+
145
+ success = result.get(operation_name, False)
146
+ if success:
147
+ preposition = "to" if operation == "add" else "from"
148
+ return {
149
+ "success": True,
150
+ "message": f"Successfully {success_verb} {len(tag_urns)} tag(s) {preposition} {len(entity_urns)} entit(ies)",
151
+ }
152
+ else:
153
+ raise RuntimeError(
154
+ f"Failed to {failure_verb} tags - operation returned false"
155
+ )
156
+
157
+ except Exception as e:
158
+ if isinstance(e, RuntimeError):
159
+ raise
160
+ raise RuntimeError(f"Error {failure_verb} tags: {str(e)}") from e
161
+
162
+
163
+ def add_tags(
164
+ tag_urns: List[str],
165
+ entity_urns: List[str],
166
+ column_paths: Optional[List[Optional[str]]] = None,
167
+ ) -> dict:
168
+ """Add one or more tags to multiple DataHub entities or their columns (e.g., schema fields).
169
+
170
+ This tool allows you to tag multiple entities or their columns with multiple tags in a single operation.
171
+ Useful for bulk tagging operations like marking multiple datasets as PII, deprecated, or applying
172
+ governance classifications.
173
+
174
+ Args:
175
+ tag_urns: List of tag URNs to add (e.g., ["urn:li:tag:PII", "urn:li:tag:Sensitive"])
176
+ entity_urns: List of entity URNs to tag (e.g., dataset URNs, dashboard URNs)
177
+ column_paths: Optional list of column_path identifiers (e.g., column names for schema fields).
178
+ Must be same length as entity_urns if provided.
179
+ Use None or empty string for entity-level tags.
180
+ For column-level tags, provide the column name (e.g., "email_address").
181
+ Verify that the column_paths are correct and valid via the schemaMetadata.
182
+ Use get_entity tool to verify.
183
+
184
+ Returns:
185
+ Dictionary with:
186
+ - success: Boolean indicating if the operation succeeded
187
+ - message: Success or error message
188
+
189
+ Examples:
190
+ # Add tags to multiple datasets
191
+ add_tags(
192
+ tag_urns=["urn:li:tag:PII", "urn:li:tag:Sensitive"],
193
+ entity_urns=[
194
+ "urn:li:dataset:(urn:li:dataPlatform:snowflake,db.schema.users,PROD)",
195
+ "urn:li:dataset:(urn:li:dataPlatform:snowflake,db.schema.customers,PROD)"
196
+ ]
197
+ )
198
+
199
+ # Add tags to specific columns
200
+ add_tags(
201
+ tag_urns=["urn:li:tag:PII"],
202
+ entity_urns=[
203
+ "urn:li:dataset:(urn:li:dataPlatform:snowflake,db.schema.users,PROD)",
204
+ "urn:li:dataset:(urn:li:dataPlatform:snowflake,db.schema.users,PROD)"
205
+ ],
206
+ column_paths=["email", "phone_number"]
207
+ )
208
+
209
+ # Mix entity-level and column-level tags
210
+ add_tags(
211
+ tag_urns=["urn:li:tag:Deprecated"],
212
+ entity_urns=[
213
+ "urn:li:dataset:(urn:li:dataPlatform:snowflake,db.schema.old_table,PROD)",
214
+ "urn:li:dataset:(urn:li:dataPlatform:snowflake,db.schema.users,PROD)"
215
+ ],
216
+ column_paths=[None, "deprecated_column"] # Tag whole table and a specific column
217
+ )
218
+
219
+ Example:
220
+ from datahub_agent_context.context import DataHubContext
221
+
222
+ with DataHubContext(client.graph):
223
+ result = add_tags(
224
+ tag_urns=["urn:li:tag:PII"],
225
+ entity_urns=["urn:li:dataset:(...)"]
226
+ )
227
+ """
228
+ return _batch_modify_tags(tag_urns, entity_urns, column_paths, "add")
229
+
230
+
231
+ def remove_tags(
232
+ tag_urns: List[str],
233
+ entity_urns: List[str],
234
+ column_paths: Optional[List[Optional[str]]] = None,
235
+ ) -> dict:
236
+ """Remove one or more tags from multiple DataHub entities or their column_paths (e.g., schema fields).
237
+
238
+ This tool allows you to untag multiple entities or their columns with multiple tags in a single operation.
239
+ Useful for bulk tag removal operations like removing deprecated tags, correcting misapplied classifications,
240
+ or cleaning up governance metadata.
241
+
242
+ Args:
243
+ tag_urns: List of tag URNs to remove (e.g., ["urn:li:tag:PII", "urn:li:tag:Sensitive"])
244
+ entity_urns: List of entity URNs to untag (e.g., dataset URNs, dashboard URNs)
245
+ column_paths: Optional list of column_path identifiers (e.g., column names for schema fields).
246
+ Must be same length as entity_urns if provided.
247
+ Use None or empty string for entity-level tag removal.
248
+ For column-level tag removal, provide the column name (e.g., "email_address").
249
+ Verify that the column_paths are correct and valid via the schemaMetadata.
250
+ Use get_entity tool to verify.
251
+
252
+ Returns:
253
+ Dictionary with:
254
+ - success: Boolean indicating if the operation succeeded
255
+ - message: Success or error message
256
+
257
+ Examples:
258
+ # Remove tags from multiple datasets
259
+ remove_tags(
260
+ tag_urns=["urn:li:tag:Deprecated", "urn:li:tag:Legacy"],
261
+ entity_urns=[
262
+ "urn:li:dataset:(urn:li:dataPlatform:snowflake,db.schema.old_users,PROD)",
263
+ "urn:li:dataset:(urn:li:dataPlatform:snowflake,db.schema.old_customers,PROD)"
264
+ ]
265
+ )
266
+
267
+ # Remove tags from specific columns
268
+ remove_tags(
269
+ tag_urns=["urn:li:tag:PII"],
270
+ entity_urns=[
271
+ "urn:li:dataset:(urn:li:dataPlatform:snowflake,db.schema.users,PROD)",
272
+ "urn:li:dataset:(urn:li:dataPlatform:snowflake,db.schema.users,PROD)"
273
+ ],
274
+ column_paths=["old_email_field", "deprecated_phone"]
275
+ )
276
+
277
+ # Mix entity-level and column-level tag removal
278
+ remove_tags(
279
+ tag_urns=["urn:li:tag:Experimental"],
280
+ entity_urns=[
281
+ "urn:li:dataset:(urn:li:dataPlatform:snowflake,db.schema.stable_table,PROD)",
282
+ "urn:li:dataset:(urn:li:dataPlatform:snowflake,db.schema.users,PROD)"
283
+ ],
284
+ column_paths=[None, "test_column"] # Remove from whole table and a specific column
285
+ )
286
+
287
+ Example:
288
+ from datahub_agent_context.context import DataHubContext
289
+
290
+ with DataHubContext(client.graph):
291
+ result = remove_tags(
292
+ tag_urns=["urn:li:tag:Deprecated"],
293
+ entity_urns=["urn:li:dataset:(...)"]
294
+ )
295
+ """
296
+ return _batch_modify_tags(tag_urns, entity_urns, column_paths, "remove")
@@ -0,0 +1,295 @@
1
+ """Terms management tools for DataHub MCP server."""
2
+
3
+ import logging
4
+ from typing import List, Literal, Optional
5
+
6
+ from datahub_agent_context.context import get_graph
7
+ from datahub_agent_context.mcp_tools.base import execute_graphql
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ def _validate_glossary_term_urns(term_urns: List[str]) -> None:
13
+ """
14
+ Validate that all glossary term URNs exist in DataHub.
15
+
16
+ Raises:
17
+ ValueError: If any term URN does not exist or is invalid
18
+ """
19
+ graph = get_graph()
20
+ # Query to check if glossary terms exist
21
+ query = """
22
+ query getGlossaryTerms($urns: [String!]!) {
23
+ entities(urns: $urns) {
24
+ urn
25
+ type
26
+ ... on GlossaryTerm {
27
+ name
28
+ }
29
+ }
30
+ }
31
+ """
32
+
33
+ try:
34
+ result = execute_graphql(
35
+ graph,
36
+ query=query,
37
+ variables={"urns": term_urns},
38
+ operation_name="getGlossaryTerms",
39
+ )
40
+
41
+ entities = result.get("entities", [])
42
+
43
+ # Build a map of found URNs
44
+ found_urns = {entity["urn"] for entity in entities if entity is not None}
45
+
46
+ # Check for missing or invalid terms
47
+ missing_urns = [urn for urn in term_urns if urn not in found_urns]
48
+
49
+ if missing_urns:
50
+ raise ValueError(
51
+ f"The following glossary term URNs do not exist in DataHub: {', '.join(missing_urns)}. "
52
+ f"Please use the search tool with entity_type filter to find existing glossary terms, "
53
+ f"or create the terms first before assigning them."
54
+ )
55
+
56
+ # Verify all returned entities are actually GlossaryTerms
57
+ non_term_entities = [
58
+ entity["urn"]
59
+ for entity in entities
60
+ if entity and entity.get("type") != "GLOSSARY_TERM"
61
+ ]
62
+ if non_term_entities:
63
+ raise ValueError(
64
+ f"The following URNs are not glossary term entities: {', '.join(non_term_entities)}"
65
+ )
66
+
67
+ except Exception as e:
68
+ if isinstance(e, ValueError):
69
+ raise
70
+ raise ValueError(f"Failed to validate glossary term URNs: {str(e)}") from e
71
+
72
+
73
+ def _batch_modify_glossary_terms(
74
+ term_urns: List[str],
75
+ entity_urns: List[str],
76
+ column_paths: Optional[List[Optional[str]]],
77
+ operation: Literal["add", "remove"],
78
+ ) -> dict:
79
+ """
80
+ Internal helper for batch glossary term operations (add/remove).
81
+
82
+ Validates inputs, constructs GraphQL mutation, and executes the operation.
83
+ """
84
+ graph = get_graph()
85
+ # Validate inputs
86
+ if not term_urns:
87
+ raise ValueError("term_urns cannot be empty")
88
+ if not entity_urns:
89
+ raise ValueError("entity_urns cannot be empty")
90
+
91
+ # Validate that all glossary term URNs exist
92
+ _validate_glossary_term_urns(term_urns)
93
+
94
+ # Handle column_paths - if not provided, create list of Nones
95
+ if column_paths is None:
96
+ column_paths = [None] * len(entity_urns)
97
+ elif len(column_paths) != len(entity_urns):
98
+ raise ValueError(
99
+ f"column_paths length ({len(column_paths)}) must match entity_urns length ({len(entity_urns)})"
100
+ )
101
+
102
+ # Build the resources list for GraphQL mutation
103
+ resources = []
104
+ for resource_urn, column_path in zip(entity_urns, column_paths, strict=True):
105
+ resource_input = {"resourceUrn": resource_urn}
106
+
107
+ # Add subresource fields if provided (for column-level glossary terms)
108
+ if column_path:
109
+ resource_input["subResource"] = column_path
110
+ resource_input["subResourceType"] = "DATASET_FIELD"
111
+
112
+ resources.append(resource_input)
113
+
114
+ # Determine mutation and operation name based on operation type
115
+ if operation == "add":
116
+ mutation = """
117
+ mutation batchAddTerms($input: BatchAddTermsInput!) {
118
+ batchAddTerms(input: $input)
119
+ }
120
+ """
121
+ operation_name = "batchAddTerms"
122
+ success_verb = "added"
123
+ failure_verb = "add"
124
+ else: # remove
125
+ mutation = """
126
+ mutation batchRemoveTerms($input: BatchRemoveTermsInput!) {
127
+ batchRemoveTerms(input: $input)
128
+ }
129
+ """
130
+ operation_name = "batchRemoveTerms"
131
+ success_verb = "removed"
132
+ failure_verb = "remove"
133
+
134
+ variables = {"input": {"termUrns": term_urns, "resources": resources}}
135
+
136
+ try:
137
+ result = execute_graphql(
138
+ graph,
139
+ query=mutation,
140
+ variables=variables,
141
+ operation_name=operation_name,
142
+ )
143
+
144
+ success = result.get(operation_name, False)
145
+ if success:
146
+ preposition = "to" if operation == "add" else "from"
147
+ return {
148
+ "success": True,
149
+ "message": f"Successfully {success_verb} {len(term_urns)} glossary term(s) {preposition} {len(entity_urns)} entit(ies)",
150
+ }
151
+ else:
152
+ raise RuntimeError(
153
+ f"Failed to {failure_verb} glossary terms - operation returned false"
154
+ )
155
+
156
+ except Exception as e:
157
+ if isinstance(e, RuntimeError):
158
+ raise
159
+ raise RuntimeError(f"Error {failure_verb} glossary terms: {str(e)}") from e
160
+
161
+
162
+ def add_glossary_terms(
163
+ term_urns: List[str],
164
+ entity_urns: List[str],
165
+ column_paths: Optional[List[Optional[str]]] = None,
166
+ ) -> dict:
167
+ """Add one or more glossary terms (terms) to multiple DataHub entities or their columns (e.g., schema fields).
168
+
169
+ This tool allows you to associate multiple entities or their columns with multiple glossary terms in a single operation.
170
+ Useful for bulk term assignment operations like applying business definitions, standardizing terminology,
171
+ or enriching metadata with domain knowledge.
172
+
173
+ Args:
174
+ term_urns: List of glossary term URNs to add (e.g., ["urn:li:glossaryTerm:CustomerData", "urn:li:glossaryTerm:SensitiveInfo"])
175
+ entity_urns: List of entity URNs to annotate (e.g., dataset URNs, dashboard URNs)
176
+ column_paths: Optional list of column_path identifiers (e.g., column names for schema fields).
177
+ Must be same length as entity_urns if provided.
178
+ Use None or empty string for entity-level glossary terms.
179
+ For column-level glossary terms, provide the column name (e.g., "customer_email").
180
+ Verify that the column_paths are correct and valid via the schemaMetadata.
181
+ Use get_entity tool to verify.
182
+
183
+ Returns:
184
+ Dictionary with:
185
+ - success: Boolean indicating if the operation succeeded
186
+ - message: Success or error message
187
+
188
+ Examples:
189
+ # Add glossary terms to multiple datasets
190
+ add_glossary_terms(
191
+ term_urns=["urn:li:glossaryTerm:CustomerData", "urn:li:glossaryTerm:PersonalInformation"],
192
+ entity_urns=[
193
+ "urn:li:dataset:(urn:li:dataPlatform:snowflake,db.schema.users,PROD)",
194
+ "urn:li:dataset:(urn:li:dataPlatform:snowflake,db.schema.customers,PROD)"
195
+ ]
196
+ )
197
+
198
+ # Add glossary terms to specific columns
199
+ add_glossary_terms(
200
+ term_urns=["urn:li:glossaryTerm:EmailAddress"],
201
+ entity_urns=[
202
+ "urn:li:dataset:(urn:li:dataPlatform:snowflake,db.schema.users,PROD)",
203
+ "urn:li:dataset:(urn:li:dataPlatform:snowflake,db.schema.users,PROD)"
204
+ ],
205
+ column_paths=["email", "contact_email"]
206
+ )
207
+
208
+ # Mix entity-level and column-level glossary terms
209
+ add_glossary_terms(
210
+ term_urns=["urn:li:glossaryTerm:Revenue"],
211
+ entity_urns=[
212
+ "urn:li:dataset:(urn:li:dataPlatform:snowflake,db.schema.sales,PROD)",
213
+ "urn:li:dataset:(urn:li:dataPlatform:snowflake,db.schema.transactions,PROD)"
214
+ ],
215
+ column_paths=[None, "total_amount"] # Term for whole table and a specific column
216
+ )
217
+
218
+ Example:
219
+ from datahub_agent_context.context import DataHubContext
220
+
221
+ with DataHubContext(client.graph):
222
+ result = add_glossary_terms(
223
+ term_urns=["urn:li:glossaryTerm:CustomerData"],
224
+ entity_urns=["urn:li:dataset:(...)"]
225
+ )
226
+ """
227
+ return _batch_modify_glossary_terms(term_urns, entity_urns, column_paths, "add")
228
+
229
+
230
+ def remove_glossary_terms(
231
+ term_urns: List[str],
232
+ entity_urns: List[str],
233
+ column_paths: Optional[List[Optional[str]]] = None,
234
+ ) -> dict:
235
+ """Remove one or more glossary terms (terms) from multiple DataHub entities or their column_paths (e.g., schema fields).
236
+
237
+ This tool allows you to disassociate multiple entities or their columns from multiple glossary terms in a single operation.
238
+ Useful for bulk term removal operations like correcting misapplied business definitions, updating terminology,
239
+ or cleaning up metadata.
240
+
241
+ Args:
242
+ term_urns: List of glossary term URNs to remove (e.g., ["urn:li:glossaryTerm:Deprecated", "urn:li:glossaryTerm:Legacy"])
243
+ entity_urns: List of entity URNs to remove terms from (e.g., dataset URNs, dashboard URNs)
244
+ column_paths: Optional list of column_path identifiers (e.g., column names for schema fields).
245
+ Must be same length as entity_urns if provided.
246
+ Use None or empty string for entity-level glossary term removal.
247
+ For column-level glossary term removal, provide the column name (e.g., "old_field").
248
+ Verify that the column_paths are correct and valid via the schemaMetadata.
249
+ Use get_entity tool to verify.
250
+
251
+ Returns:
252
+ Dictionary with:
253
+ - success: Boolean indicating if the operation succeeded
254
+ - message: Success or error message
255
+
256
+ Examples:
257
+ # Remove glossary terms from multiple datasets
258
+ remove_glossary_terms(
259
+ term_urns=["urn:li:glossaryTerm:Deprecated", "urn:li:glossaryTerm:LegacySystem"],
260
+ entity_urns=[
261
+ "urn:li:dataset:(urn:li:dataPlatform:snowflake,db.schema.old_users,PROD)",
262
+ "urn:li:dataset:(urn:li:dataPlatform:snowflake,db.schema.old_customers,PROD)"
263
+ ]
264
+ )
265
+
266
+ # Remove glossary terms from specific columns
267
+ remove_glossary_terms(
268
+ term_urns=["urn:li:glossaryTerm:Confidential"],
269
+ entity_urns=[
270
+ "urn:li:dataset:(urn:li:dataPlatform:snowflake,db.schema.users,PROD)",
271
+ "urn:li:dataset:(urn:li:dataPlatform:snowflake,db.schema.users,PROD)"
272
+ ],
273
+ column_paths=["old_ssn_field", "legacy_tax_id"]
274
+ )
275
+
276
+ # Mix entity-level and column-level glossary term removal
277
+ remove_glossary_terms(
278
+ term_urns=["urn:li:glossaryTerm:Experimental"],
279
+ entity_urns=[
280
+ "urn:li:dataset:(urn:li:dataPlatform:snowflake,db.schema.production_table,PROD)",
281
+ "urn:li:dataset:(urn:li:dataPlatform:snowflake,db.schema.users,PROD)"
282
+ ],
283
+ column_paths=[None, "beta_feature"] # Remove from whole table and a specific column
284
+ )
285
+
286
+ Example:
287
+ from datahub_agent_context.context import DataHubContext
288
+
289
+ with DataHubContext(client.graph):
290
+ result = remove_glossary_terms(
291
+ term_urns=["urn:li:glossaryTerm:Deprecated"],
292
+ entity_urns=["urn:li:dataset:(...)"]
293
+ )
294
+ """
295
+ return _batch_modify_glossary_terms(term_urns, entity_urns, column_paths, "remove")
@@ -0,0 +1,2 @@
1
+ # Marker file for PEP 561.
2
+ # This package supports type hints.