acryl-datahub 1.1.0rc4__py3-none-any.whl → 1.1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.1.0rc4.dist-info → acryl_datahub-1.1.0.1.dist-info}/METADATA +2414 -2412
- {acryl_datahub-1.1.0rc4.dist-info → acryl_datahub-1.1.0.1.dist-info}/RECORD +87 -70
- {acryl_datahub-1.1.0rc4.dist-info → acryl_datahub-1.1.0.1.dist-info}/WHEEL +1 -1
- datahub/_version.py +1 -1
- datahub/api/entities/dataset/dataset.py +9 -8
- datahub/api/entities/external/__init__.py +0 -0
- datahub/api/entities/external/external_entities.py +239 -0
- datahub/api/entities/external/external_tag.py +145 -0
- datahub/api/entities/external/restricted_text.py +247 -0
- datahub/api/entities/external/unity_catalog_external_entites.py +170 -0
- datahub/api/entities/structuredproperties/structuredproperties.py +2 -2
- datahub/cli/delete_cli.py +4 -4
- datahub/cli/ingest_cli.py +9 -1
- datahub/emitter/mce_builder.py +3 -1
- datahub/emitter/response_helper.py +86 -1
- datahub/emitter/rest_emitter.py +1 -1
- datahub/ingestion/graph/client.py +3 -3
- datahub/ingestion/source/apply/datahub_apply.py +4 -4
- datahub/ingestion/source/data_lake_common/data_lake_utils.py +22 -10
- datahub/ingestion/source/data_lake_common/object_store.py +644 -0
- datahub/ingestion/source/datahub/config.py +11 -0
- datahub/ingestion/source/datahub/datahub_database_reader.py +186 -33
- datahub/ingestion/source/datahub/datahub_source.py +1 -1
- datahub/ingestion/source/dbt/dbt_common.py +30 -11
- datahub/ingestion/source/gcs/gcs_source.py +22 -7
- datahub/ingestion/source/gcs/gcs_utils.py +36 -9
- datahub/ingestion/source/hex/query_fetcher.py +9 -3
- datahub/ingestion/source/openapi.py +12 -0
- datahub/ingestion/source/openapi_parser.py +56 -37
- datahub/ingestion/source/s3/source.py +65 -6
- datahub/ingestion/source/snowflake/snowflake_config.py +13 -0
- datahub/ingestion/source/snowflake/snowflake_queries.py +44 -21
- datahub/ingestion/source/snowflake/snowflake_query.py +0 -7
- datahub/ingestion/source/snowflake/snowflake_v2.py +17 -6
- datahub/ingestion/source/sql/athena.py +1 -0
- datahub/ingestion/source/sql/hive.py +2 -3
- datahub/ingestion/source/sql/sql_common.py +98 -34
- datahub/ingestion/source/sql/sql_types.py +5 -2
- datahub/ingestion/source/unity/config.py +5 -0
- datahub/ingestion/source/unity/proxy.py +117 -0
- datahub/ingestion/source/unity/source.py +167 -15
- datahub/ingestion/source/unity/tag_entities.py +295 -0
- datahub/metadata/_internal_schema_classes.py +667 -522
- datahub/metadata/_urns/urn_defs.py +1804 -1748
- datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- datahub/metadata/schema.avsc +17358 -17584
- datahub/metadata/schemas/ApplicationKey.avsc +31 -0
- datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
- datahub/metadata/schemas/Applications.avsc +38 -0
- datahub/metadata/schemas/ChartKey.avsc +1 -0
- datahub/metadata/schemas/ContainerKey.avsc +1 -0
- datahub/metadata/schemas/DashboardKey.avsc +1 -0
- datahub/metadata/schemas/DataFlowKey.avsc +1 -0
- datahub/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
- datahub/metadata/schemas/DataJobKey.avsc +1 -0
- datahub/metadata/schemas/DataProductKey.avsc +1 -0
- datahub/metadata/schemas/DataProductProperties.avsc +1 -1
- datahub/metadata/schemas/DatasetKey.avsc +1 -0
- datahub/metadata/schemas/ExecutionRequestInput.avsc +5 -0
- datahub/metadata/schemas/GlossaryTermKey.avsc +1 -0
- datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
- datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +1 -0
- datahub/metadata/schemas/MLModelKey.avsc +1 -0
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
- datahub/metadata/schemas/NotebookKey.avsc +1 -0
- datahub/metadata/schemas/__init__.py +3 -3
- datahub/sdk/__init__.py +6 -0
- datahub/sdk/_all_entities.py +11 -0
- datahub/sdk/_shared.py +118 -1
- datahub/sdk/chart.py +315 -0
- datahub/sdk/container.py +7 -0
- datahub/sdk/dashboard.py +432 -0
- datahub/sdk/dataflow.py +309 -0
- datahub/sdk/datajob.py +342 -0
- datahub/sdk/dataset.py +8 -2
- datahub/sdk/entity_client.py +90 -2
- datahub/sdk/lineage_client.py +681 -82
- datahub/sdk/main_client.py +27 -8
- datahub/sdk/mlmodel.py +101 -38
- datahub/sdk/mlmodelgroup.py +7 -0
- datahub/sql_parsing/sql_parsing_aggregator.py +1 -1
- datahub/testing/mce_helpers.py +421 -0
- datahub/testing/sdk_v2_helpers.py +18 -0
- {acryl_datahub-1.1.0rc4.dist-info → acryl_datahub-1.1.0.1.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.1.0rc4.dist-info → acryl_datahub-1.1.0.1.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.1.0rc4.dist-info → acryl_datahub-1.1.0.1.dist-info}/top_level.txt +0 -0
datahub/sdk/entity_client.py
CHANGED
|
@@ -9,7 +9,11 @@ from datahub.emitter.mcp_patch_builder import MetadataPatchProposal
|
|
|
9
9
|
from datahub.errors import IngestionAttributionWarning, ItemNotFoundError, SdkUsageError
|
|
10
10
|
from datahub.ingestion.graph.client import DataHubGraph
|
|
11
11
|
from datahub.metadata.urns import (
|
|
12
|
+
ChartUrn,
|
|
12
13
|
ContainerUrn,
|
|
14
|
+
DashboardUrn,
|
|
15
|
+
DataFlowUrn,
|
|
16
|
+
DataJobUrn,
|
|
13
17
|
DatasetUrn,
|
|
14
18
|
MlModelGroupUrn,
|
|
15
19
|
MlModelUrn,
|
|
@@ -17,7 +21,11 @@ from datahub.metadata.urns import (
|
|
|
17
21
|
)
|
|
18
22
|
from datahub.sdk._all_entities import ENTITY_CLASSES
|
|
19
23
|
from datahub.sdk._shared import UrnOrStr
|
|
24
|
+
from datahub.sdk.chart import Chart
|
|
20
25
|
from datahub.sdk.container import Container
|
|
26
|
+
from datahub.sdk.dashboard import Dashboard
|
|
27
|
+
from datahub.sdk.dataflow import DataFlow
|
|
28
|
+
from datahub.sdk.datajob import DataJob
|
|
21
29
|
from datahub.sdk.dataset import Dataset
|
|
22
30
|
from datahub.sdk.entity import Entity
|
|
23
31
|
from datahub.sdk.mlmodel import MLModel
|
|
@@ -57,6 +65,14 @@ class EntityClient:
|
|
|
57
65
|
@overload
|
|
58
66
|
def get(self, urn: MlModelGroupUrn) -> MLModelGroup: ...
|
|
59
67
|
@overload
|
|
68
|
+
def get(self, urn: DataFlowUrn) -> DataFlow: ...
|
|
69
|
+
@overload
|
|
70
|
+
def get(self, urn: DataJobUrn) -> DataJob: ...
|
|
71
|
+
@overload
|
|
72
|
+
def get(self, urn: DashboardUrn) -> Dashboard: ...
|
|
73
|
+
@overload
|
|
74
|
+
def get(self, urn: ChartUrn) -> Chart: ...
|
|
75
|
+
@overload
|
|
60
76
|
def get(self, urn: Union[Urn, str]) -> Entity: ...
|
|
61
77
|
def get(self, urn: UrnOrStr) -> Entity:
|
|
62
78
|
"""Retrieve an entity by its urn.
|
|
@@ -76,7 +92,26 @@ class EntityClient:
|
|
|
76
92
|
urn = Urn.from_string(urn)
|
|
77
93
|
|
|
78
94
|
# TODO: add error handling around this with a suggested alternative if not yet supported
|
|
79
|
-
|
|
95
|
+
try:
|
|
96
|
+
EntityClass = ENTITY_CLASSES[urn.entity_type]
|
|
97
|
+
except KeyError as e:
|
|
98
|
+
# Try to import cloud-specific entities if not found
|
|
99
|
+
try:
|
|
100
|
+
from acryl_datahub_cloud._sdk_extras.entities.assertion import Assertion
|
|
101
|
+
from acryl_datahub_cloud._sdk_extras.entities.monitor import Monitor
|
|
102
|
+
|
|
103
|
+
if urn.entity_type == "assertion":
|
|
104
|
+
EntityClass = Assertion
|
|
105
|
+
elif urn.entity_type == "monitor":
|
|
106
|
+
EntityClass = Monitor
|
|
107
|
+
else:
|
|
108
|
+
raise SdkUsageError(
|
|
109
|
+
f"Entity type {urn.entity_type} is not yet supported"
|
|
110
|
+
) from e
|
|
111
|
+
except ImportError as e:
|
|
112
|
+
raise SdkUsageError(
|
|
113
|
+
f"Entity type {urn.entity_type} is not yet supported"
|
|
114
|
+
) from e
|
|
80
115
|
|
|
81
116
|
if not self._graph.exists(str(urn)):
|
|
82
117
|
raise ItemNotFoundError(f"Entity {urn} not found")
|
|
@@ -84,7 +119,19 @@ class EntityClient:
|
|
|
84
119
|
aspects = self._graph.get_entity_semityped(str(urn))
|
|
85
120
|
|
|
86
121
|
# TODO: save the timestamp so we can use If-Unmodified-Since on the updates
|
|
87
|
-
|
|
122
|
+
entity = EntityClass._new_from_graph(urn, aspects)
|
|
123
|
+
|
|
124
|
+
# Type narrowing for cloud-specific entities
|
|
125
|
+
if urn.entity_type == "assertion":
|
|
126
|
+
from acryl_datahub_cloud._sdk_extras.entities.assertion import Assertion
|
|
127
|
+
|
|
128
|
+
assert isinstance(entity, Assertion)
|
|
129
|
+
elif urn.entity_type == "monitor":
|
|
130
|
+
from acryl_datahub_cloud._sdk_extras.entities.monitor import Monitor
|
|
131
|
+
|
|
132
|
+
assert isinstance(entity, Monitor)
|
|
133
|
+
|
|
134
|
+
return entity
|
|
88
135
|
|
|
89
136
|
def create(self, entity: Entity) -> None:
|
|
90
137
|
mcps = []
|
|
@@ -145,3 +192,44 @@ class EntityClient:
|
|
|
145
192
|
|
|
146
193
|
mcps = updater.build()
|
|
147
194
|
self._graph.emit_mcps(mcps)
|
|
195
|
+
|
|
196
|
+
def delete(
|
|
197
|
+
self,
|
|
198
|
+
urn: UrnOrStr,
|
|
199
|
+
check_exists: bool = True,
|
|
200
|
+
cascade: bool = False,
|
|
201
|
+
hard: bool = False,
|
|
202
|
+
) -> None:
|
|
203
|
+
"""Delete an entity by its urn.
|
|
204
|
+
|
|
205
|
+
Args:
|
|
206
|
+
urn: The urn of the entity to delete. Can be a string or :py:class:`Urn` object.
|
|
207
|
+
check_exists: Whether to check if the entity exists before deletion. Defaults to True.
|
|
208
|
+
cascade: Whether to cascade delete related entities. When True, deletes child entities
|
|
209
|
+
like datajobs within dataflows, datasets within containers, etc. Not yet supported.
|
|
210
|
+
hard: Whether to perform a hard delete (permanent) or soft delete. Defaults to False.
|
|
211
|
+
|
|
212
|
+
Raises:
|
|
213
|
+
SdkUsageError: If the entity does not exist and check_exists is True, or if cascade is True (not supported).
|
|
214
|
+
|
|
215
|
+
Note:
|
|
216
|
+
When hard is True, the operation is irreversible and the entity will be permanently removed.
|
|
217
|
+
|
|
218
|
+
Impact of cascade deletion (still to be done) depends on the input entity type:
|
|
219
|
+
- Container: Recursively deletes all containers and data assets within the container.
|
|
220
|
+
- Dataflow: Recursively deletes all data jobs within the dataflow.
|
|
221
|
+
- Dashboard: TBD
|
|
222
|
+
- DataPlatformInstance: TBD
|
|
223
|
+
- ...
|
|
224
|
+
"""
|
|
225
|
+
urn_str = str(urn) if isinstance(urn, Urn) else urn
|
|
226
|
+
if check_exists and not self._graph.exists(entity_urn=urn_str):
|
|
227
|
+
raise SdkUsageError(
|
|
228
|
+
f"Entity {urn_str} does not exist, and hence cannot be deleted. "
|
|
229
|
+
"You can bypass this check by setting check_exists=False."
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
if cascade:
|
|
233
|
+
raise SdkUsageError("The 'cascade' parameter is not yet supported.")
|
|
234
|
+
|
|
235
|
+
self._graph.delete_entity(urn=urn_str, hard=hard)
|