acryl-datahub 0.15.0.1rc13__py3-none-any.whl → 0.15.0.1rc15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0.1rc13.dist-info → acryl_datahub-0.15.0.1rc15.dist-info}/METADATA +2413 -2413
- {acryl_datahub-0.15.0.1rc13.dist-info → acryl_datahub-0.15.0.1rc15.dist-info}/RECORD +33 -30
- {acryl_datahub-0.15.0.1rc13.dist-info → acryl_datahub-0.15.0.1rc15.dist-info}/WHEEL +1 -1
- datahub/__init__.py +1 -1
- datahub/emitter/mce_builder.py +3 -3
- datahub/emitter/mcp_patch_builder.py +36 -12
- datahub/ingestion/source/bigquery_v2/bigquery.py +10 -18
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +3 -9
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +11 -17
- datahub/ingestion/source/bigquery_v2/lineage.py +9 -22
- datahub/ingestion/source/gc/datahub_gc.py +3 -0
- datahub/ingestion/source/gc/execution_request_cleanup.py +13 -5
- datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +28 -21
- datahub/ingestion/source/snowflake/snowflake_queries.py +6 -4
- datahub/ingestion/source/tableau/tableau.py +53 -18
- datahub/ingestion/source/tableau/tableau_common.py +18 -0
- datahub/ingestion/source/usage/usage_common.py +15 -1
- datahub/specific/aspect_helpers/__init__.py +0 -0
- datahub/specific/aspect_helpers/custom_properties.py +79 -0
- datahub/specific/aspect_helpers/ownership.py +67 -0
- datahub/specific/aspect_helpers/structured_properties.py +72 -0
- datahub/specific/aspect_helpers/tags.py +42 -0
- datahub/specific/aspect_helpers/terms.py +43 -0
- datahub/specific/chart.py +28 -184
- datahub/specific/dashboard.py +31 -196
- datahub/specific/datajob.py +34 -189
- datahub/specific/dataproduct.py +24 -86
- datahub/specific/dataset.py +48 -133
- datahub/specific/form.py +12 -32
- datahub/specific/structured_property.py +9 -9
- datahub/sql_parsing/sql_parsing_aggregator.py +1 -3
- datahub/specific/custom_properties.py +0 -37
- datahub/specific/ownership.py +0 -48
- datahub/specific/structured_properties.py +0 -53
- {acryl_datahub-0.15.0.1rc13.dist-info → acryl_datahub-0.15.0.1rc15.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-0.15.0.1rc13.dist-info → acryl_datahub-0.15.0.1rc15.dist-info}/top_level.txt +0 -0
|
@@ -2,9 +2,9 @@ import json
|
|
|
2
2
|
import logging
|
|
3
3
|
import re
|
|
4
4
|
import time
|
|
5
|
-
from collections import OrderedDict
|
|
6
|
-
from dataclasses import dataclass
|
|
7
|
-
from datetime import datetime
|
|
5
|
+
from collections import OrderedDict, defaultdict
|
|
6
|
+
from dataclasses import dataclass, field as dataclass_field
|
|
7
|
+
from datetime import datetime, timedelta, timezone
|
|
8
8
|
from functools import lru_cache
|
|
9
9
|
from typing import (
|
|
10
10
|
Any,
|
|
@@ -109,6 +109,7 @@ from datahub.ingestion.source.tableau.tableau_common import (
|
|
|
109
109
|
make_filter,
|
|
110
110
|
make_fine_grained_lineage_class,
|
|
111
111
|
make_upstream_class,
|
|
112
|
+
optimize_query_filter,
|
|
112
113
|
published_datasource_graphql_query,
|
|
113
114
|
query_metadata_cursor_based_pagination,
|
|
114
115
|
sheet_graphql_query,
|
|
@@ -195,6 +196,11 @@ RETRIABLE_ERROR_CODES = [
|
|
|
195
196
|
504, # Gateway Timeout
|
|
196
197
|
]
|
|
197
198
|
|
|
199
|
+
# From experience, this expiry time typically ranges from 50 minutes
|
|
200
|
+
# to 2 hours but might as well be configurable. We will allow upto
|
|
201
|
+
# 10 minutes of such expiry time
|
|
202
|
+
REGULAR_AUTH_EXPIRY_PERIOD = timedelta(minutes=10)
|
|
203
|
+
|
|
198
204
|
logger: logging.Logger = logging.getLogger(__name__)
|
|
199
205
|
|
|
200
206
|
# Replace / with |
|
|
@@ -636,6 +642,7 @@ class SiteIdContentUrl:
|
|
|
636
642
|
site_content_url: str
|
|
637
643
|
|
|
638
644
|
|
|
645
|
+
@dataclass
|
|
639
646
|
class TableauSourceReport(StaleEntityRemovalSourceReport):
|
|
640
647
|
get_all_datasources_query_failed: bool = False
|
|
641
648
|
num_get_datasource_query_failures: int = 0
|
|
@@ -652,7 +659,14 @@ class TableauSourceReport(StaleEntityRemovalSourceReport):
|
|
|
652
659
|
num_upstream_table_lineage_failed_parse_sql: int = 0
|
|
653
660
|
num_upstream_fine_grained_lineage_failed_parse_sql: int = 0
|
|
654
661
|
num_hidden_assets_skipped: int = 0
|
|
655
|
-
logged_in_user: List[UserInfo] =
|
|
662
|
+
logged_in_user: List[UserInfo] = dataclass_field(default_factory=list)
|
|
663
|
+
last_authenticated_at: Optional[datetime] = None
|
|
664
|
+
|
|
665
|
+
num_expected_tableau_metadata_queries: int = 0
|
|
666
|
+
num_actual_tableau_metadata_queries: int = 0
|
|
667
|
+
tableau_server_error_stats: Dict[str, int] = dataclass_field(
|
|
668
|
+
default_factory=(lambda: defaultdict(int))
|
|
669
|
+
)
|
|
656
670
|
|
|
657
671
|
|
|
658
672
|
def report_user_role(report: TableauSourceReport, server: Server) -> None:
|
|
@@ -723,6 +737,7 @@ class TableauSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
723
737
|
try:
|
|
724
738
|
logger.info(f"Authenticated to Tableau site: '{site_content_url}'")
|
|
725
739
|
self.server = self.config.make_tableau_client(site_content_url)
|
|
740
|
+
self.report.last_authenticated_at = datetime.now(timezone.utc)
|
|
726
741
|
report_user_role(report=self.report, server=self.server)
|
|
727
742
|
# Note that we're not catching ConfigurationError, since we want that to throw.
|
|
728
743
|
except ValueError as e:
|
|
@@ -806,10 +821,13 @@ class TableauSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
806
821
|
site_source = TableauSiteSource(
|
|
807
822
|
config=self.config,
|
|
808
823
|
ctx=self.ctx,
|
|
809
|
-
site=
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
824
|
+
site=(
|
|
825
|
+
site
|
|
826
|
+
if site
|
|
827
|
+
else SiteIdContentUrl(
|
|
828
|
+
site_id=self.server.site_id,
|
|
829
|
+
site_content_url=self.config.site,
|
|
830
|
+
)
|
|
813
831
|
),
|
|
814
832
|
report=self.report,
|
|
815
833
|
server=self.server,
|
|
@@ -924,6 +942,7 @@ class TableauSiteSource:
|
|
|
924
942
|
# Sign-in again may not be enough because Tableau sometimes caches invalid sessions
|
|
925
943
|
# so we need to recreate the Tableau Server object
|
|
926
944
|
self.server = self.config.make_tableau_client(self.site_content_url)
|
|
945
|
+
self.report.last_authenticated_at = datetime.now(timezone.utc)
|
|
927
946
|
|
|
928
947
|
def _populate_usage_stat_registry(self) -> None:
|
|
929
948
|
if self.server is None:
|
|
@@ -1189,6 +1208,7 @@ class TableauSiteSource:
|
|
|
1189
1208
|
)
|
|
1190
1209
|
try:
|
|
1191
1210
|
assert self.server is not None
|
|
1211
|
+
self.report.num_actual_tableau_metadata_queries += 1
|
|
1192
1212
|
query_data = query_metadata_cursor_based_pagination(
|
|
1193
1213
|
server=self.server,
|
|
1194
1214
|
main_query=query,
|
|
@@ -1198,25 +1218,36 @@ class TableauSiteSource:
|
|
|
1198
1218
|
qry_filter=query_filter,
|
|
1199
1219
|
)
|
|
1200
1220
|
|
|
1201
|
-
except REAUTHENTICATE_ERRORS:
|
|
1202
|
-
|
|
1221
|
+
except REAUTHENTICATE_ERRORS as e:
|
|
1222
|
+
self.report.tableau_server_error_stats[e.__class__.__name__] += 1
|
|
1223
|
+
if not retry_on_auth_error or retries_remaining <= 0:
|
|
1203
1224
|
raise
|
|
1204
1225
|
|
|
1205
|
-
#
|
|
1206
|
-
#
|
|
1207
|
-
#
|
|
1208
|
-
|
|
1226
|
+
# We have been getting some irregular authorization errors like below well before the expected expiry time
|
|
1227
|
+
# - within few seconds of initial authentication . We'll retry without re-auth for such cases.
|
|
1228
|
+
# <class 'tableauserverclient.server.endpoint.exceptions.NonXMLResponseError'>:
|
|
1229
|
+
# b'{"timestamp":"xxx","status":401,"error":"Unauthorized","path":"/relationship-service-war/graphql"}'
|
|
1230
|
+
if self.report.last_authenticated_at and (
|
|
1231
|
+
datetime.now(timezone.utc) - self.report.last_authenticated_at
|
|
1232
|
+
> REGULAR_AUTH_EXPIRY_PERIOD
|
|
1233
|
+
):
|
|
1234
|
+
# If ingestion has been running for over 2 hours, the Tableau
|
|
1235
|
+
# temporary credentials will expire. If this happens, this exception
|
|
1236
|
+
# will be thrown, and we need to re-authenticate and retry.
|
|
1237
|
+
self._re_authenticate()
|
|
1238
|
+
|
|
1209
1239
|
return self.get_connection_object_page(
|
|
1210
1240
|
query=query,
|
|
1211
1241
|
connection_type=connection_type,
|
|
1212
1242
|
query_filter=query_filter,
|
|
1213
1243
|
fetch_size=fetch_size,
|
|
1214
1244
|
current_cursor=current_cursor,
|
|
1215
|
-
retry_on_auth_error=
|
|
1245
|
+
retry_on_auth_error=True,
|
|
1216
1246
|
retries_remaining=retries_remaining - 1,
|
|
1217
1247
|
)
|
|
1218
1248
|
|
|
1219
1249
|
except InternalServerError as ise:
|
|
1250
|
+
self.report.tableau_server_error_stats[InternalServerError.__name__] += 1
|
|
1220
1251
|
# In some cases Tableau Server returns 504 error, which is a timeout error, so it worths to retry.
|
|
1221
1252
|
# Extended with other retryable errors.
|
|
1222
1253
|
if ise.code in RETRIABLE_ERROR_CODES:
|
|
@@ -1229,13 +1260,14 @@ class TableauSiteSource:
|
|
|
1229
1260
|
query_filter=query_filter,
|
|
1230
1261
|
fetch_size=fetch_size,
|
|
1231
1262
|
current_cursor=current_cursor,
|
|
1232
|
-
retry_on_auth_error=
|
|
1263
|
+
retry_on_auth_error=True,
|
|
1233
1264
|
retries_remaining=retries_remaining - 1,
|
|
1234
1265
|
)
|
|
1235
1266
|
else:
|
|
1236
1267
|
raise ise
|
|
1237
1268
|
|
|
1238
1269
|
except OSError:
|
|
1270
|
+
self.report.tableau_server_error_stats[OSError.__name__] += 1
|
|
1239
1271
|
# In tableauseverclient 0.26 (which was yanked and released in 0.28 on 2023-10-04),
|
|
1240
1272
|
# the request logic was changed to use threads.
|
|
1241
1273
|
# https://github.com/tableau/server-client-python/commit/307d8a20a30f32c1ce615cca7c6a78b9b9bff081
|
|
@@ -1250,7 +1282,7 @@ class TableauSiteSource:
|
|
|
1250
1282
|
query_filter=query_filter,
|
|
1251
1283
|
fetch_size=fetch_size,
|
|
1252
1284
|
current_cursor=current_cursor,
|
|
1253
|
-
retry_on_auth_error=
|
|
1285
|
+
retry_on_auth_error=True,
|
|
1254
1286
|
retries_remaining=retries_remaining - 1,
|
|
1255
1287
|
)
|
|
1256
1288
|
|
|
@@ -1338,7 +1370,7 @@ class TableauSiteSource:
|
|
|
1338
1370
|
query_filter=query_filter,
|
|
1339
1371
|
fetch_size=fetch_size,
|
|
1340
1372
|
current_cursor=current_cursor,
|
|
1341
|
-
retry_on_auth_error=
|
|
1373
|
+
retry_on_auth_error=True,
|
|
1342
1374
|
retries_remaining=retries_remaining,
|
|
1343
1375
|
)
|
|
1344
1376
|
raise RuntimeError(f"Query {connection_type} error: {errors}")
|
|
@@ -1363,6 +1395,8 @@ class TableauSiteSource:
|
|
|
1363
1395
|
query_filter: dict = {},
|
|
1364
1396
|
page_size_override: Optional[int] = None,
|
|
1365
1397
|
) -> Iterable[dict]:
|
|
1398
|
+
query_filter = optimize_query_filter(query_filter)
|
|
1399
|
+
|
|
1366
1400
|
# Calls the get_connection_object_page function to get the objects,
|
|
1367
1401
|
# and automatically handles pagination.
|
|
1368
1402
|
page_size = page_size_override or self.config.page_size
|
|
@@ -1374,6 +1408,7 @@ class TableauSiteSource:
|
|
|
1374
1408
|
while has_next_page:
|
|
1375
1409
|
filter_: str = make_filter(filter_page)
|
|
1376
1410
|
|
|
1411
|
+
self.report.num_expected_tableau_metadata_queries += 1
|
|
1377
1412
|
(
|
|
1378
1413
|
connection_objects,
|
|
1379
1414
|
current_cursor,
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import copy
|
|
1
2
|
import html
|
|
2
3
|
import json
|
|
3
4
|
import logging
|
|
@@ -35,6 +36,7 @@ from datahub.metadata.schema_classes import (
|
|
|
35
36
|
UpstreamClass,
|
|
36
37
|
)
|
|
37
38
|
from datahub.sql_parsing.sqlglot_lineage import ColumnLineageInfo, SqlParsingResult
|
|
39
|
+
from datahub.utilities.ordered_set import OrderedSet
|
|
38
40
|
|
|
39
41
|
logger = logging.getLogger(__name__)
|
|
40
42
|
|
|
@@ -1000,3 +1002,19 @@ def get_filter_pages(query_filter: dict, page_size: int) -> List[dict]:
|
|
|
1000
1002
|
]
|
|
1001
1003
|
|
|
1002
1004
|
return filter_pages
|
|
1005
|
+
|
|
1006
|
+
|
|
1007
|
+
def optimize_query_filter(query_filter: dict) -> dict:
|
|
1008
|
+
"""
|
|
1009
|
+
Duplicates in the filter cause duplicates in the result,
|
|
1010
|
+
leading to entities/aspects being emitted multiple times unnecessarily
|
|
1011
|
+
"""
|
|
1012
|
+
optimized_query = copy.deepcopy(query_filter)
|
|
1013
|
+
|
|
1014
|
+
if query_filter.get(c.ID_WITH_IN):
|
|
1015
|
+
optimized_query[c.ID_WITH_IN] = list(OrderedSet(query_filter[c.ID_WITH_IN]))
|
|
1016
|
+
if query_filter.get(c.PROJECT_NAME_WITH_IN):
|
|
1017
|
+
optimized_query[c.PROJECT_NAME_WITH_IN] = list(
|
|
1018
|
+
OrderedSet(query_filter[c.PROJECT_NAME_WITH_IN])
|
|
1019
|
+
)
|
|
1020
|
+
return optimized_query
|
|
@@ -54,6 +54,20 @@ def default_user_urn_builder(email: str) -> str:
|
|
|
54
54
|
return builder.make_user_urn(email.split("@")[0])
|
|
55
55
|
|
|
56
56
|
|
|
57
|
+
def extract_user_email(user: str) -> Optional[str]:
|
|
58
|
+
"""Extracts user email from user input
|
|
59
|
+
|
|
60
|
+
>>> extract_user_email('urn:li:corpuser:abc@xyz.com')
|
|
61
|
+
'abc@xyz.com'
|
|
62
|
+
>>> extract_user_email('urn:li:corpuser:abc')
|
|
63
|
+
>>> extract_user_email('abc@xyz.com')
|
|
64
|
+
'abc@xyz.com'
|
|
65
|
+
"""
|
|
66
|
+
if user.startswith(("urn:li:corpuser:", "urn:li:corpGroup:")):
|
|
67
|
+
user = user.split(":")[-1]
|
|
68
|
+
return user if "@" in user else None
|
|
69
|
+
|
|
70
|
+
|
|
57
71
|
def make_usage_workunit(
|
|
58
72
|
bucket_start_time: datetime,
|
|
59
73
|
resource: ResourceType,
|
|
@@ -104,7 +118,7 @@ def make_usage_workunit(
|
|
|
104
118
|
DatasetUserUsageCountsClass(
|
|
105
119
|
user=user_urn_builder(user),
|
|
106
120
|
count=count,
|
|
107
|
-
userEmail=user
|
|
121
|
+
userEmail=extract_user_email(user),
|
|
108
122
|
)
|
|
109
123
|
for user, count in user_freq
|
|
110
124
|
],
|
|
File without changes
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
from abc import abstractmethod
|
|
2
|
+
from typing import Dict, Optional, Tuple
|
|
3
|
+
|
|
4
|
+
from typing_extensions import Self
|
|
5
|
+
|
|
6
|
+
from datahub.emitter.mcp_patch_builder import MetadataPatchProposal, PatchPath
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class HasCustomPropertiesPatch(MetadataPatchProposal):
|
|
10
|
+
@classmethod
|
|
11
|
+
@abstractmethod
|
|
12
|
+
def _custom_properties_location(self) -> Tuple[str, PatchPath]:
|
|
13
|
+
...
|
|
14
|
+
|
|
15
|
+
def add_custom_property(self, key: str, value: str) -> Self:
|
|
16
|
+
"""Add a custom property to the entity.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
key: The key of the custom property.
|
|
20
|
+
value: The value of the custom property.
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
The patch builder instance.
|
|
24
|
+
"""
|
|
25
|
+
aspect_name, path = self._custom_properties_location()
|
|
26
|
+
self._add_patch(
|
|
27
|
+
aspect_name,
|
|
28
|
+
"add",
|
|
29
|
+
path=(*path, key),
|
|
30
|
+
value=value,
|
|
31
|
+
)
|
|
32
|
+
return self
|
|
33
|
+
|
|
34
|
+
def add_custom_properties(
|
|
35
|
+
self, custom_properties: Optional[Dict[str, str]] = None
|
|
36
|
+
) -> Self:
|
|
37
|
+
if custom_properties is not None:
|
|
38
|
+
for key, value in custom_properties.items():
|
|
39
|
+
self.add_custom_property(key, value)
|
|
40
|
+
return self
|
|
41
|
+
|
|
42
|
+
def remove_custom_property(self, key: str) -> Self:
|
|
43
|
+
"""Remove a custom property from the entity.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
key: The key of the custom property to remove.
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
The patch builder instance.
|
|
50
|
+
"""
|
|
51
|
+
aspect_name, path = self._custom_properties_location()
|
|
52
|
+
self._add_patch(
|
|
53
|
+
aspect_name,
|
|
54
|
+
"remove",
|
|
55
|
+
path=(*path, key),
|
|
56
|
+
value={},
|
|
57
|
+
)
|
|
58
|
+
return self
|
|
59
|
+
|
|
60
|
+
def set_custom_properties(self, custom_properties: Dict[str, str]) -> Self:
|
|
61
|
+
"""Sets the custom properties of the entity.
|
|
62
|
+
|
|
63
|
+
This method replaces all existing custom properties with the given dictionary.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
custom_properties: A dictionary containing the custom properties to be set.
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
The patch builder instance.
|
|
70
|
+
"""
|
|
71
|
+
|
|
72
|
+
aspect_name, path = self._custom_properties_location()
|
|
73
|
+
self._add_patch(
|
|
74
|
+
aspect_name,
|
|
75
|
+
"add",
|
|
76
|
+
path=path,
|
|
77
|
+
value=custom_properties,
|
|
78
|
+
)
|
|
79
|
+
return self
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
from typing import List, Optional
|
|
2
|
+
|
|
3
|
+
from typing_extensions import Self
|
|
4
|
+
|
|
5
|
+
from datahub.emitter.mcp_patch_builder import MetadataPatchProposal
|
|
6
|
+
from datahub.metadata.schema_classes import (
|
|
7
|
+
OwnerClass,
|
|
8
|
+
OwnershipClass,
|
|
9
|
+
OwnershipTypeClass,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class HasOwnershipPatch(MetadataPatchProposal):
|
|
14
|
+
def add_owner(self, owner: OwnerClass) -> Self:
|
|
15
|
+
"""Add an owner to the entity.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
owner: The Owner object to add.
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
The patch builder instance.
|
|
22
|
+
"""
|
|
23
|
+
self._add_patch(
|
|
24
|
+
OwnershipClass.ASPECT_NAME,
|
|
25
|
+
"add",
|
|
26
|
+
path=("owners", owner.owner, str(owner.type)),
|
|
27
|
+
value=owner,
|
|
28
|
+
)
|
|
29
|
+
return self
|
|
30
|
+
|
|
31
|
+
def remove_owner(
|
|
32
|
+
self, owner: str, owner_type: Optional[OwnershipTypeClass] = None
|
|
33
|
+
) -> Self:
|
|
34
|
+
"""Remove an owner from the entity.
|
|
35
|
+
|
|
36
|
+
If owner_type is not provided, the owner will be removed regardless of ownership type.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
owner: The owner to remove.
|
|
40
|
+
owner_type: The ownership type of the owner (optional).
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
The patch builder instance.
|
|
44
|
+
"""
|
|
45
|
+
self._add_patch(
|
|
46
|
+
OwnershipClass.ASPECT_NAME,
|
|
47
|
+
"remove",
|
|
48
|
+
path=("owners", owner) + ((str(owner_type),) if owner_type else ()),
|
|
49
|
+
value=owner,
|
|
50
|
+
)
|
|
51
|
+
return self
|
|
52
|
+
|
|
53
|
+
def set_owners(self, owners: List[OwnerClass]) -> Self:
|
|
54
|
+
"""Set the owners of the entity.
|
|
55
|
+
|
|
56
|
+
This will effectively replace all existing owners with the new list - it doesn't really patch things.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
owners: The list of owners to set.
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
The patch builder instance.
|
|
63
|
+
"""
|
|
64
|
+
self._add_patch(
|
|
65
|
+
OwnershipClass.ASPECT_NAME, "add", path=("owners",), value=owners
|
|
66
|
+
)
|
|
67
|
+
return self
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
from typing import List, Union
|
|
2
|
+
|
|
3
|
+
from typing_extensions import Self
|
|
4
|
+
|
|
5
|
+
from datahub.emitter.mcp_patch_builder import MetadataPatchProposal
|
|
6
|
+
from datahub.metadata.schema_classes import (
|
|
7
|
+
StructuredPropertiesClass,
|
|
8
|
+
StructuredPropertyValueAssignmentClass,
|
|
9
|
+
)
|
|
10
|
+
from datahub.utilities.urns.structured_properties_urn import (
|
|
11
|
+
make_structured_property_urn,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class HasStructuredPropertiesPatch(MetadataPatchProposal):
|
|
16
|
+
def set_structured_property(
|
|
17
|
+
self, key: str, value: Union[str, float, List[Union[str, float]]]
|
|
18
|
+
) -> Self:
|
|
19
|
+
"""Add or update a structured property.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
key: the name of the property (either bare or urn form)
|
|
23
|
+
value: the value of the property (for multi-valued properties, this can be a list)
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
The patch builder instance.
|
|
27
|
+
"""
|
|
28
|
+
self.remove_structured_property(key)
|
|
29
|
+
self.add_structured_property(key, value)
|
|
30
|
+
return self
|
|
31
|
+
|
|
32
|
+
def remove_structured_property(self, key: str) -> Self:
|
|
33
|
+
"""Remove a structured property.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
key: the name of the property (either bare or urn form)
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
The patch builder instance.
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
self._add_patch(
|
|
43
|
+
StructuredPropertiesClass.ASPECT_NAME,
|
|
44
|
+
"remove",
|
|
45
|
+
path=("properties", make_structured_property_urn(key)),
|
|
46
|
+
value={},
|
|
47
|
+
)
|
|
48
|
+
return self
|
|
49
|
+
|
|
50
|
+
def add_structured_property(
|
|
51
|
+
self, key: str, value: Union[str, float, List[Union[str, float]]]
|
|
52
|
+
) -> Self:
|
|
53
|
+
"""Add a structured property.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
key: the name of the property (either bare or urn form)
|
|
57
|
+
value: the value of the property (for multi-valued properties, this value will be appended to the list)
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
The patch builder instance.
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
self._add_patch(
|
|
64
|
+
StructuredPropertiesClass.ASPECT_NAME,
|
|
65
|
+
"add",
|
|
66
|
+
path=("properties", make_structured_property_urn(key)),
|
|
67
|
+
value=StructuredPropertyValueAssignmentClass(
|
|
68
|
+
propertyUrn=make_structured_property_urn(key),
|
|
69
|
+
values=value if isinstance(value, list) else [value],
|
|
70
|
+
),
|
|
71
|
+
)
|
|
72
|
+
return self
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
from typing import Union
|
|
2
|
+
|
|
3
|
+
from typing_extensions import Self
|
|
4
|
+
|
|
5
|
+
from datahub.emitter.mcp_patch_builder import MetadataPatchProposal
|
|
6
|
+
from datahub.metadata.schema_classes import (
|
|
7
|
+
GlobalTagsClass as GlobalTags,
|
|
8
|
+
TagAssociationClass as Tag,
|
|
9
|
+
)
|
|
10
|
+
from datahub.metadata.urns import TagUrn, Urn
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class HasTagsPatch(MetadataPatchProposal):
|
|
14
|
+
def add_tag(self, tag: Tag) -> Self:
|
|
15
|
+
"""Adds a tag to the entity.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
tag: The Tag object representing the tag to be added.
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
The patch builder instance.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
# TODO: Make this support raw strings, in addition to Tag objects.
|
|
25
|
+
self._add_patch(
|
|
26
|
+
GlobalTags.ASPECT_NAME, "add", path=("tags", tag.tag), value=tag
|
|
27
|
+
)
|
|
28
|
+
return self
|
|
29
|
+
|
|
30
|
+
def remove_tag(self, tag: Union[str, Urn]) -> Self:
|
|
31
|
+
"""Removes a tag from the entity.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
tag: The tag to remove, specified as a string or Urn object.
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
The patch builder instance.
|
|
38
|
+
"""
|
|
39
|
+
if isinstance(tag, str) and not tag.startswith("urn:li:tag:"):
|
|
40
|
+
tag = TagUrn.create_from_id(tag)
|
|
41
|
+
self._add_patch(GlobalTags.ASPECT_NAME, "remove", path=("tags", tag), value={})
|
|
42
|
+
return self
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
from typing import Union
|
|
2
|
+
|
|
3
|
+
from typing_extensions import Self
|
|
4
|
+
|
|
5
|
+
from datahub.emitter.mcp_patch_builder import MetadataPatchProposal
|
|
6
|
+
from datahub.metadata.schema_classes import (
|
|
7
|
+
GlossaryTermAssociationClass as Term,
|
|
8
|
+
GlossaryTermsClass,
|
|
9
|
+
)
|
|
10
|
+
from datahub.metadata.urns import GlossaryTermUrn, Urn
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class HasTermsPatch(MetadataPatchProposal):
|
|
14
|
+
def add_term(self, term: Term) -> Self:
|
|
15
|
+
"""Adds a glossary term to the entity.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
term: The Term object representing the glossary term to be added.
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
The patch builder instance.
|
|
22
|
+
"""
|
|
23
|
+
# TODO: Make this support raw strings, in addition to Term objects.
|
|
24
|
+
self._add_patch(
|
|
25
|
+
GlossaryTermsClass.ASPECT_NAME, "add", path=("terms", term.urn), value=term
|
|
26
|
+
)
|
|
27
|
+
return self
|
|
28
|
+
|
|
29
|
+
def remove_term(self, term: Union[str, Urn]) -> Self:
|
|
30
|
+
"""Removes a glossary term from the entity.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
term: The term to remove, specified as a string or Urn object.
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
The patch builder instance.
|
|
37
|
+
"""
|
|
38
|
+
if isinstance(term, str) and not term.startswith("urn:li:glossaryTerm:"):
|
|
39
|
+
term = GlossaryTermUrn(term)
|
|
40
|
+
self._add_patch(
|
|
41
|
+
GlossaryTermsClass.ASPECT_NAME, "remove", path=("terms", term), value={}
|
|
42
|
+
)
|
|
43
|
+
return self
|