acryl-datahub 0.15.0.1rc13__py3-none-any.whl → 0.15.0.1rc15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (36) hide show
  1. {acryl_datahub-0.15.0.1rc13.dist-info → acryl_datahub-0.15.0.1rc15.dist-info}/METADATA +2413 -2413
  2. {acryl_datahub-0.15.0.1rc13.dist-info → acryl_datahub-0.15.0.1rc15.dist-info}/RECORD +33 -30
  3. {acryl_datahub-0.15.0.1rc13.dist-info → acryl_datahub-0.15.0.1rc15.dist-info}/WHEEL +1 -1
  4. datahub/__init__.py +1 -1
  5. datahub/emitter/mce_builder.py +3 -3
  6. datahub/emitter/mcp_patch_builder.py +36 -12
  7. datahub/ingestion/source/bigquery_v2/bigquery.py +10 -18
  8. datahub/ingestion/source/bigquery_v2/bigquery_config.py +3 -9
  9. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +11 -17
  10. datahub/ingestion/source/bigquery_v2/lineage.py +9 -22
  11. datahub/ingestion/source/gc/datahub_gc.py +3 -0
  12. datahub/ingestion/source/gc/execution_request_cleanup.py +13 -5
  13. datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +28 -21
  14. datahub/ingestion/source/snowflake/snowflake_queries.py +6 -4
  15. datahub/ingestion/source/tableau/tableau.py +53 -18
  16. datahub/ingestion/source/tableau/tableau_common.py +18 -0
  17. datahub/ingestion/source/usage/usage_common.py +15 -1
  18. datahub/specific/aspect_helpers/__init__.py +0 -0
  19. datahub/specific/aspect_helpers/custom_properties.py +79 -0
  20. datahub/specific/aspect_helpers/ownership.py +67 -0
  21. datahub/specific/aspect_helpers/structured_properties.py +72 -0
  22. datahub/specific/aspect_helpers/tags.py +42 -0
  23. datahub/specific/aspect_helpers/terms.py +43 -0
  24. datahub/specific/chart.py +28 -184
  25. datahub/specific/dashboard.py +31 -196
  26. datahub/specific/datajob.py +34 -189
  27. datahub/specific/dataproduct.py +24 -86
  28. datahub/specific/dataset.py +48 -133
  29. datahub/specific/form.py +12 -32
  30. datahub/specific/structured_property.py +9 -9
  31. datahub/sql_parsing/sql_parsing_aggregator.py +1 -3
  32. datahub/specific/custom_properties.py +0 -37
  33. datahub/specific/ownership.py +0 -48
  34. datahub/specific/structured_properties.py +0 -53
  35. {acryl_datahub-0.15.0.1rc13.dist-info → acryl_datahub-0.15.0.1rc15.dist-info}/entry_points.txt +0 -0
  36. {acryl_datahub-0.15.0.1rc13.dist-info → acryl_datahub-0.15.0.1rc15.dist-info}/top_level.txt +0 -0
@@ -2,9 +2,9 @@ import json
2
2
  import logging
3
3
  import re
4
4
  import time
5
- from collections import OrderedDict
6
- from dataclasses import dataclass
7
- from datetime import datetime
5
+ from collections import OrderedDict, defaultdict
6
+ from dataclasses import dataclass, field as dataclass_field
7
+ from datetime import datetime, timedelta, timezone
8
8
  from functools import lru_cache
9
9
  from typing import (
10
10
  Any,
@@ -109,6 +109,7 @@ from datahub.ingestion.source.tableau.tableau_common import (
109
109
  make_filter,
110
110
  make_fine_grained_lineage_class,
111
111
  make_upstream_class,
112
+ optimize_query_filter,
112
113
  published_datasource_graphql_query,
113
114
  query_metadata_cursor_based_pagination,
114
115
  sheet_graphql_query,
@@ -195,6 +196,11 @@ RETRIABLE_ERROR_CODES = [
195
196
  504, # Gateway Timeout
196
197
  ]
197
198
 
199
+ # From experience, this expiry time typically ranges from 50 minutes
200
+ # to 2 hours but might as well be configurable. We will allow upto
201
+ # 10 minutes of such expiry time
202
+ REGULAR_AUTH_EXPIRY_PERIOD = timedelta(minutes=10)
203
+
198
204
  logger: logging.Logger = logging.getLogger(__name__)
199
205
 
200
206
  # Replace / with |
@@ -636,6 +642,7 @@ class SiteIdContentUrl:
636
642
  site_content_url: str
637
643
 
638
644
 
645
+ @dataclass
639
646
  class TableauSourceReport(StaleEntityRemovalSourceReport):
640
647
  get_all_datasources_query_failed: bool = False
641
648
  num_get_datasource_query_failures: int = 0
@@ -652,7 +659,14 @@ class TableauSourceReport(StaleEntityRemovalSourceReport):
652
659
  num_upstream_table_lineage_failed_parse_sql: int = 0
653
660
  num_upstream_fine_grained_lineage_failed_parse_sql: int = 0
654
661
  num_hidden_assets_skipped: int = 0
655
- logged_in_user: List[UserInfo] = []
662
+ logged_in_user: List[UserInfo] = dataclass_field(default_factory=list)
663
+ last_authenticated_at: Optional[datetime] = None
664
+
665
+ num_expected_tableau_metadata_queries: int = 0
666
+ num_actual_tableau_metadata_queries: int = 0
667
+ tableau_server_error_stats: Dict[str, int] = dataclass_field(
668
+ default_factory=(lambda: defaultdict(int))
669
+ )
656
670
 
657
671
 
658
672
  def report_user_role(report: TableauSourceReport, server: Server) -> None:
@@ -723,6 +737,7 @@ class TableauSource(StatefulIngestionSourceBase, TestableSource):
723
737
  try:
724
738
  logger.info(f"Authenticated to Tableau site: '{site_content_url}'")
725
739
  self.server = self.config.make_tableau_client(site_content_url)
740
+ self.report.last_authenticated_at = datetime.now(timezone.utc)
726
741
  report_user_role(report=self.report, server=self.server)
727
742
  # Note that we're not catching ConfigurationError, since we want that to throw.
728
743
  except ValueError as e:
@@ -806,10 +821,13 @@ class TableauSource(StatefulIngestionSourceBase, TestableSource):
806
821
  site_source = TableauSiteSource(
807
822
  config=self.config,
808
823
  ctx=self.ctx,
809
- site=site
810
- if site
811
- else SiteIdContentUrl(
812
- site_id=self.server.site_id, site_content_url=self.config.site
824
+ site=(
825
+ site
826
+ if site
827
+ else SiteIdContentUrl(
828
+ site_id=self.server.site_id,
829
+ site_content_url=self.config.site,
830
+ )
813
831
  ),
814
832
  report=self.report,
815
833
  server=self.server,
@@ -924,6 +942,7 @@ class TableauSiteSource:
924
942
  # Sign-in again may not be enough because Tableau sometimes caches invalid sessions
925
943
  # so we need to recreate the Tableau Server object
926
944
  self.server = self.config.make_tableau_client(self.site_content_url)
945
+ self.report.last_authenticated_at = datetime.now(timezone.utc)
927
946
 
928
947
  def _populate_usage_stat_registry(self) -> None:
929
948
  if self.server is None:
@@ -1189,6 +1208,7 @@ class TableauSiteSource:
1189
1208
  )
1190
1209
  try:
1191
1210
  assert self.server is not None
1211
+ self.report.num_actual_tableau_metadata_queries += 1
1192
1212
  query_data = query_metadata_cursor_based_pagination(
1193
1213
  server=self.server,
1194
1214
  main_query=query,
@@ -1198,25 +1218,36 @@ class TableauSiteSource:
1198
1218
  qry_filter=query_filter,
1199
1219
  )
1200
1220
 
1201
- except REAUTHENTICATE_ERRORS:
1202
- if not retry_on_auth_error:
1221
+ except REAUTHENTICATE_ERRORS as e:
1222
+ self.report.tableau_server_error_stats[e.__class__.__name__] += 1
1223
+ if not retry_on_auth_error or retries_remaining <= 0:
1203
1224
  raise
1204
1225
 
1205
- # If ingestion has been running for over 2 hours, the Tableau
1206
- # temporary credentials will expire. If this happens, this exception
1207
- # will be thrown, and we need to re-authenticate and retry.
1208
- self._re_authenticate()
1226
+ # We have been getting some irregular authorization errors like below well before the expected expiry time
1227
+ # - within few seconds of initial authentication . We'll retry without re-auth for such cases.
1228
+ # <class 'tableauserverclient.server.endpoint.exceptions.NonXMLResponseError'>:
1229
+ # b'{"timestamp":"xxx","status":401,"error":"Unauthorized","path":"/relationship-service-war/graphql"}'
1230
+ if self.report.last_authenticated_at and (
1231
+ datetime.now(timezone.utc) - self.report.last_authenticated_at
1232
+ > REGULAR_AUTH_EXPIRY_PERIOD
1233
+ ):
1234
+ # If ingestion has been running for over 2 hours, the Tableau
1235
+ # temporary credentials will expire. If this happens, this exception
1236
+ # will be thrown, and we need to re-authenticate and retry.
1237
+ self._re_authenticate()
1238
+
1209
1239
  return self.get_connection_object_page(
1210
1240
  query=query,
1211
1241
  connection_type=connection_type,
1212
1242
  query_filter=query_filter,
1213
1243
  fetch_size=fetch_size,
1214
1244
  current_cursor=current_cursor,
1215
- retry_on_auth_error=False,
1245
+ retry_on_auth_error=True,
1216
1246
  retries_remaining=retries_remaining - 1,
1217
1247
  )
1218
1248
 
1219
1249
  except InternalServerError as ise:
1250
+ self.report.tableau_server_error_stats[InternalServerError.__name__] += 1
1220
1251
  # In some cases Tableau Server returns 504 error, which is a timeout error, so it worths to retry.
1221
1252
  # Extended with other retryable errors.
1222
1253
  if ise.code in RETRIABLE_ERROR_CODES:
@@ -1229,13 +1260,14 @@ class TableauSiteSource:
1229
1260
  query_filter=query_filter,
1230
1261
  fetch_size=fetch_size,
1231
1262
  current_cursor=current_cursor,
1232
- retry_on_auth_error=False,
1263
+ retry_on_auth_error=True,
1233
1264
  retries_remaining=retries_remaining - 1,
1234
1265
  )
1235
1266
  else:
1236
1267
  raise ise
1237
1268
 
1238
1269
  except OSError:
1270
+ self.report.tableau_server_error_stats[OSError.__name__] += 1
1239
1271
  # In tableauseverclient 0.26 (which was yanked and released in 0.28 on 2023-10-04),
1240
1272
  # the request logic was changed to use threads.
1241
1273
  # https://github.com/tableau/server-client-python/commit/307d8a20a30f32c1ce615cca7c6a78b9b9bff081
@@ -1250,7 +1282,7 @@ class TableauSiteSource:
1250
1282
  query_filter=query_filter,
1251
1283
  fetch_size=fetch_size,
1252
1284
  current_cursor=current_cursor,
1253
- retry_on_auth_error=False,
1285
+ retry_on_auth_error=True,
1254
1286
  retries_remaining=retries_remaining - 1,
1255
1287
  )
1256
1288
 
@@ -1338,7 +1370,7 @@ class TableauSiteSource:
1338
1370
  query_filter=query_filter,
1339
1371
  fetch_size=fetch_size,
1340
1372
  current_cursor=current_cursor,
1341
- retry_on_auth_error=False,
1373
+ retry_on_auth_error=True,
1342
1374
  retries_remaining=retries_remaining,
1343
1375
  )
1344
1376
  raise RuntimeError(f"Query {connection_type} error: {errors}")
@@ -1363,6 +1395,8 @@ class TableauSiteSource:
1363
1395
  query_filter: dict = {},
1364
1396
  page_size_override: Optional[int] = None,
1365
1397
  ) -> Iterable[dict]:
1398
+ query_filter = optimize_query_filter(query_filter)
1399
+
1366
1400
  # Calls the get_connection_object_page function to get the objects,
1367
1401
  # and automatically handles pagination.
1368
1402
  page_size = page_size_override or self.config.page_size
@@ -1374,6 +1408,7 @@ class TableauSiteSource:
1374
1408
  while has_next_page:
1375
1409
  filter_: str = make_filter(filter_page)
1376
1410
 
1411
+ self.report.num_expected_tableau_metadata_queries += 1
1377
1412
  (
1378
1413
  connection_objects,
1379
1414
  current_cursor,
@@ -1,3 +1,4 @@
1
+ import copy
1
2
  import html
2
3
  import json
3
4
  import logging
@@ -35,6 +36,7 @@ from datahub.metadata.schema_classes import (
35
36
  UpstreamClass,
36
37
  )
37
38
  from datahub.sql_parsing.sqlglot_lineage import ColumnLineageInfo, SqlParsingResult
39
+ from datahub.utilities.ordered_set import OrderedSet
38
40
 
39
41
  logger = logging.getLogger(__name__)
40
42
 
@@ -1000,3 +1002,19 @@ def get_filter_pages(query_filter: dict, page_size: int) -> List[dict]:
1000
1002
  ]
1001
1003
 
1002
1004
  return filter_pages
1005
+
1006
+
1007
+ def optimize_query_filter(query_filter: dict) -> dict:
1008
+ """
1009
+ Duplicates in the filter cause duplicates in the result,
1010
+ leading to entities/aspects being emitted multiple times unnecessarily
1011
+ """
1012
+ optimized_query = copy.deepcopy(query_filter)
1013
+
1014
+ if query_filter.get(c.ID_WITH_IN):
1015
+ optimized_query[c.ID_WITH_IN] = list(OrderedSet(query_filter[c.ID_WITH_IN]))
1016
+ if query_filter.get(c.PROJECT_NAME_WITH_IN):
1017
+ optimized_query[c.PROJECT_NAME_WITH_IN] = list(
1018
+ OrderedSet(query_filter[c.PROJECT_NAME_WITH_IN])
1019
+ )
1020
+ return optimized_query
@@ -54,6 +54,20 @@ def default_user_urn_builder(email: str) -> str:
54
54
  return builder.make_user_urn(email.split("@")[0])
55
55
 
56
56
 
57
+ def extract_user_email(user: str) -> Optional[str]:
58
+ """Extracts user email from user input
59
+
60
+ >>> extract_user_email('urn:li:corpuser:abc@xyz.com')
61
+ 'abc@xyz.com'
62
+ >>> extract_user_email('urn:li:corpuser:abc')
63
+ >>> extract_user_email('abc@xyz.com')
64
+ 'abc@xyz.com'
65
+ """
66
+ if user.startswith(("urn:li:corpuser:", "urn:li:corpGroup:")):
67
+ user = user.split(":")[-1]
68
+ return user if "@" in user else None
69
+
70
+
57
71
  def make_usage_workunit(
58
72
  bucket_start_time: datetime,
59
73
  resource: ResourceType,
@@ -104,7 +118,7 @@ def make_usage_workunit(
104
118
  DatasetUserUsageCountsClass(
105
119
  user=user_urn_builder(user),
106
120
  count=count,
107
- userEmail=user if "@" in user else None,
121
+ userEmail=extract_user_email(user),
108
122
  )
109
123
  for user, count in user_freq
110
124
  ],
File without changes
@@ -0,0 +1,79 @@
1
+ from abc import abstractmethod
2
+ from typing import Dict, Optional, Tuple
3
+
4
+ from typing_extensions import Self
5
+
6
+ from datahub.emitter.mcp_patch_builder import MetadataPatchProposal, PatchPath
7
+
8
+
9
+ class HasCustomPropertiesPatch(MetadataPatchProposal):
10
+ @classmethod
11
+ @abstractmethod
12
+ def _custom_properties_location(self) -> Tuple[str, PatchPath]:
13
+ ...
14
+
15
+ def add_custom_property(self, key: str, value: str) -> Self:
16
+ """Add a custom property to the entity.
17
+
18
+ Args:
19
+ key: The key of the custom property.
20
+ value: The value of the custom property.
21
+
22
+ Returns:
23
+ The patch builder instance.
24
+ """
25
+ aspect_name, path = self._custom_properties_location()
26
+ self._add_patch(
27
+ aspect_name,
28
+ "add",
29
+ path=(*path, key),
30
+ value=value,
31
+ )
32
+ return self
33
+
34
+ def add_custom_properties(
35
+ self, custom_properties: Optional[Dict[str, str]] = None
36
+ ) -> Self:
37
+ if custom_properties is not None:
38
+ for key, value in custom_properties.items():
39
+ self.add_custom_property(key, value)
40
+ return self
41
+
42
+ def remove_custom_property(self, key: str) -> Self:
43
+ """Remove a custom property from the entity.
44
+
45
+ Args:
46
+ key: The key of the custom property to remove.
47
+
48
+ Returns:
49
+ The patch builder instance.
50
+ """
51
+ aspect_name, path = self._custom_properties_location()
52
+ self._add_patch(
53
+ aspect_name,
54
+ "remove",
55
+ path=(*path, key),
56
+ value={},
57
+ )
58
+ return self
59
+
60
+ def set_custom_properties(self, custom_properties: Dict[str, str]) -> Self:
61
+ """Sets the custom properties of the entity.
62
+
63
+ This method replaces all existing custom properties with the given dictionary.
64
+
65
+ Args:
66
+ custom_properties: A dictionary containing the custom properties to be set.
67
+
68
+ Returns:
69
+ The patch builder instance.
70
+ """
71
+
72
+ aspect_name, path = self._custom_properties_location()
73
+ self._add_patch(
74
+ aspect_name,
75
+ "add",
76
+ path=path,
77
+ value=custom_properties,
78
+ )
79
+ return self
@@ -0,0 +1,67 @@
1
+ from typing import List, Optional
2
+
3
+ from typing_extensions import Self
4
+
5
+ from datahub.emitter.mcp_patch_builder import MetadataPatchProposal
6
+ from datahub.metadata.schema_classes import (
7
+ OwnerClass,
8
+ OwnershipClass,
9
+ OwnershipTypeClass,
10
+ )
11
+
12
+
13
+ class HasOwnershipPatch(MetadataPatchProposal):
14
+ def add_owner(self, owner: OwnerClass) -> Self:
15
+ """Add an owner to the entity.
16
+
17
+ Args:
18
+ owner: The Owner object to add.
19
+
20
+ Returns:
21
+ The patch builder instance.
22
+ """
23
+ self._add_patch(
24
+ OwnershipClass.ASPECT_NAME,
25
+ "add",
26
+ path=("owners", owner.owner, str(owner.type)),
27
+ value=owner,
28
+ )
29
+ return self
30
+
31
+ def remove_owner(
32
+ self, owner: str, owner_type: Optional[OwnershipTypeClass] = None
33
+ ) -> Self:
34
+ """Remove an owner from the entity.
35
+
36
+ If owner_type is not provided, the owner will be removed regardless of ownership type.
37
+
38
+ Args:
39
+ owner: The owner to remove.
40
+ owner_type: The ownership type of the owner (optional).
41
+
42
+ Returns:
43
+ The patch builder instance.
44
+ """
45
+ self._add_patch(
46
+ OwnershipClass.ASPECT_NAME,
47
+ "remove",
48
+ path=("owners", owner) + ((str(owner_type),) if owner_type else ()),
49
+ value=owner,
50
+ )
51
+ return self
52
+
53
+ def set_owners(self, owners: List[OwnerClass]) -> Self:
54
+ """Set the owners of the entity.
55
+
56
+ This will effectively replace all existing owners with the new list - it doesn't really patch things.
57
+
58
+ Args:
59
+ owners: The list of owners to set.
60
+
61
+ Returns:
62
+ The patch builder instance.
63
+ """
64
+ self._add_patch(
65
+ OwnershipClass.ASPECT_NAME, "add", path=("owners",), value=owners
66
+ )
67
+ return self
@@ -0,0 +1,72 @@
1
+ from typing import List, Union
2
+
3
+ from typing_extensions import Self
4
+
5
+ from datahub.emitter.mcp_patch_builder import MetadataPatchProposal
6
+ from datahub.metadata.schema_classes import (
7
+ StructuredPropertiesClass,
8
+ StructuredPropertyValueAssignmentClass,
9
+ )
10
+ from datahub.utilities.urns.structured_properties_urn import (
11
+ make_structured_property_urn,
12
+ )
13
+
14
+
15
+ class HasStructuredPropertiesPatch(MetadataPatchProposal):
16
+ def set_structured_property(
17
+ self, key: str, value: Union[str, float, List[Union[str, float]]]
18
+ ) -> Self:
19
+ """Add or update a structured property.
20
+
21
+ Args:
22
+ key: the name of the property (either bare or urn form)
23
+ value: the value of the property (for multi-valued properties, this can be a list)
24
+
25
+ Returns:
26
+ The patch builder instance.
27
+ """
28
+ self.remove_structured_property(key)
29
+ self.add_structured_property(key, value)
30
+ return self
31
+
32
+ def remove_structured_property(self, key: str) -> Self:
33
+ """Remove a structured property.
34
+
35
+ Args:
36
+ key: the name of the property (either bare or urn form)
37
+
38
+ Returns:
39
+ The patch builder instance.
40
+ """
41
+
42
+ self._add_patch(
43
+ StructuredPropertiesClass.ASPECT_NAME,
44
+ "remove",
45
+ path=("properties", make_structured_property_urn(key)),
46
+ value={},
47
+ )
48
+ return self
49
+
50
+ def add_structured_property(
51
+ self, key: str, value: Union[str, float, List[Union[str, float]]]
52
+ ) -> Self:
53
+ """Add a structured property.
54
+
55
+ Args:
56
+ key: the name of the property (either bare or urn form)
57
+ value: the value of the property (for multi-valued properties, this value will be appended to the list)
58
+
59
+ Returns:
60
+ The patch builder instance.
61
+ """
62
+
63
+ self._add_patch(
64
+ StructuredPropertiesClass.ASPECT_NAME,
65
+ "add",
66
+ path=("properties", make_structured_property_urn(key)),
67
+ value=StructuredPropertyValueAssignmentClass(
68
+ propertyUrn=make_structured_property_urn(key),
69
+ values=value if isinstance(value, list) else [value],
70
+ ),
71
+ )
72
+ return self
@@ -0,0 +1,42 @@
1
+ from typing import Union
2
+
3
+ from typing_extensions import Self
4
+
5
+ from datahub.emitter.mcp_patch_builder import MetadataPatchProposal
6
+ from datahub.metadata.schema_classes import (
7
+ GlobalTagsClass as GlobalTags,
8
+ TagAssociationClass as Tag,
9
+ )
10
+ from datahub.metadata.urns import TagUrn, Urn
11
+
12
+
13
+ class HasTagsPatch(MetadataPatchProposal):
14
+ def add_tag(self, tag: Tag) -> Self:
15
+ """Adds a tag to the entity.
16
+
17
+ Args:
18
+ tag: The Tag object representing the tag to be added.
19
+
20
+ Returns:
21
+ The patch builder instance.
22
+ """
23
+
24
+ # TODO: Make this support raw strings, in addition to Tag objects.
25
+ self._add_patch(
26
+ GlobalTags.ASPECT_NAME, "add", path=("tags", tag.tag), value=tag
27
+ )
28
+ return self
29
+
30
+ def remove_tag(self, tag: Union[str, Urn]) -> Self:
31
+ """Removes a tag from the entity.
32
+
33
+ Args:
34
+ tag: The tag to remove, specified as a string or Urn object.
35
+
36
+ Returns:
37
+ The patch builder instance.
38
+ """
39
+ if isinstance(tag, str) and not tag.startswith("urn:li:tag:"):
40
+ tag = TagUrn.create_from_id(tag)
41
+ self._add_patch(GlobalTags.ASPECT_NAME, "remove", path=("tags", tag), value={})
42
+ return self
@@ -0,0 +1,43 @@
1
+ from typing import Union
2
+
3
+ from typing_extensions import Self
4
+
5
+ from datahub.emitter.mcp_patch_builder import MetadataPatchProposal
6
+ from datahub.metadata.schema_classes import (
7
+ GlossaryTermAssociationClass as Term,
8
+ GlossaryTermsClass,
9
+ )
10
+ from datahub.metadata.urns import GlossaryTermUrn, Urn
11
+
12
+
13
+ class HasTermsPatch(MetadataPatchProposal):
14
+ def add_term(self, term: Term) -> Self:
15
+ """Adds a glossary term to the entity.
16
+
17
+ Args:
18
+ term: The Term object representing the glossary term to be added.
19
+
20
+ Returns:
21
+ The patch builder instance.
22
+ """
23
+ # TODO: Make this support raw strings, in addition to Term objects.
24
+ self._add_patch(
25
+ GlossaryTermsClass.ASPECT_NAME, "add", path=("terms", term.urn), value=term
26
+ )
27
+ return self
28
+
29
+ def remove_term(self, term: Union[str, Urn]) -> Self:
30
+ """Removes a glossary term from the entity.
31
+
32
+ Args:
33
+ term: The term to remove, specified as a string or Urn object.
34
+
35
+ Returns:
36
+ The patch builder instance.
37
+ """
38
+ if isinstance(term, str) and not term.startswith("urn:li:glossaryTerm:"):
39
+ term = GlossaryTermUrn(term)
40
+ self._add_patch(
41
+ GlossaryTermsClass.ASPECT_NAME, "remove", path=("terms", term), value={}
42
+ )
43
+ return self