acryl-datahub 0.15.0.6rc2__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/METADATA +2522 -2493
- {acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/RECORD +205 -192
- {acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/WHEEL +1 -1
- {acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/entry_points.txt +1 -0
- datahub/_version.py +1 -1
- datahub/api/entities/common/serialized_value.py +4 -3
- datahub/api/entities/dataset/dataset.py +731 -42
- datahub/api/entities/structuredproperties/structuredproperties.py +2 -2
- datahub/cli/check_cli.py +72 -19
- datahub/cli/docker_cli.py +3 -3
- datahub/cli/iceberg_cli.py +31 -7
- datahub/cli/ingest_cli.py +30 -93
- datahub/cli/lite_cli.py +4 -2
- datahub/cli/specific/dataproduct_cli.py +1 -1
- datahub/cli/specific/dataset_cli.py +128 -14
- datahub/configuration/common.py +10 -2
- datahub/configuration/git.py +1 -3
- datahub/configuration/kafka.py +1 -1
- datahub/emitter/mce_builder.py +28 -13
- datahub/emitter/mcp_builder.py +4 -1
- datahub/emitter/response_helper.py +145 -0
- datahub/emitter/rest_emitter.py +323 -10
- datahub/ingestion/api/decorators.py +1 -1
- datahub/ingestion/api/source_helpers.py +4 -0
- datahub/ingestion/fs/s3_fs.py +2 -2
- datahub/ingestion/glossary/classification_mixin.py +1 -5
- datahub/ingestion/graph/client.py +41 -22
- datahub/ingestion/graph/entity_versioning.py +3 -3
- datahub/ingestion/graph/filters.py +64 -37
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +1 -6
- datahub/ingestion/run/pipeline.py +112 -148
- datahub/ingestion/run/sink_callback.py +77 -0
- datahub/ingestion/sink/datahub_rest.py +8 -0
- datahub/ingestion/source/abs/config.py +2 -4
- datahub/ingestion/source/bigquery_v2/bigquery_audit.py +1 -1
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +2 -46
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +6 -1
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +7 -4
- datahub/ingestion/source/cassandra/cassandra.py +152 -233
- datahub/ingestion/source/cassandra/cassandra_api.py +13 -5
- datahub/ingestion/source/common/gcp_credentials_config.py +53 -0
- datahub/ingestion/source/common/subtypes.py +12 -0
- datahub/ingestion/source/csv_enricher.py +3 -3
- datahub/ingestion/source/data_lake_common/path_spec.py +1 -3
- datahub/ingestion/source/dbt/dbt_common.py +8 -5
- datahub/ingestion/source/dbt/dbt_core.py +11 -9
- datahub/ingestion/source/dbt/dbt_tests.py +4 -8
- datahub/ingestion/source/delta_lake/config.py +8 -1
- datahub/ingestion/source/delta_lake/report.py +4 -2
- datahub/ingestion/source/delta_lake/source.py +20 -5
- datahub/ingestion/source/dremio/dremio_api.py +4 -8
- datahub/ingestion/source/dremio/dremio_aspects.py +3 -5
- datahub/ingestion/source/dynamodb/dynamodb.py +6 -0
- datahub/ingestion/source/elastic_search.py +26 -6
- datahub/ingestion/source/feast.py +27 -8
- datahub/ingestion/source/file.py +6 -3
- datahub/ingestion/source/gc/dataprocess_cleanup.py +1 -1
- datahub/ingestion/source/gc/execution_request_cleanup.py +2 -1
- datahub/ingestion/source/ge_data_profiler.py +12 -15
- datahub/ingestion/source/iceberg/iceberg.py +46 -12
- datahub/ingestion/source/iceberg/iceberg_common.py +71 -21
- datahub/ingestion/source/identity/okta.py +37 -7
- datahub/ingestion/source/kafka/kafka.py +1 -1
- datahub/ingestion/source/kafka_connect/common.py +2 -7
- datahub/ingestion/source/kafka_connect/kafka_connect.py +97 -4
- datahub/ingestion/source/kafka_connect/sink_connectors.py +2 -2
- datahub/ingestion/source/kafka_connect/source_connectors.py +6 -9
- datahub/ingestion/source/looker/looker_common.py +6 -5
- datahub/ingestion/source/looker/looker_file_loader.py +2 -2
- datahub/ingestion/source/looker/looker_lib_wrapper.py +2 -1
- datahub/ingestion/source/looker/looker_source.py +1 -1
- datahub/ingestion/source/looker/looker_template_language.py +4 -2
- datahub/ingestion/source/looker/lookml_source.py +3 -2
- datahub/ingestion/source/metabase.py +57 -35
- datahub/ingestion/source/metadata/business_glossary.py +45 -3
- datahub/ingestion/source/metadata/lineage.py +2 -2
- datahub/ingestion/source/mlflow.py +365 -35
- datahub/ingestion/source/mode.py +18 -8
- datahub/ingestion/source/neo4j/neo4j_source.py +27 -7
- datahub/ingestion/source/nifi.py +37 -11
- datahub/ingestion/source/openapi.py +1 -1
- datahub/ingestion/source/openapi_parser.py +49 -17
- datahub/ingestion/source/powerbi/m_query/parser.py +3 -2
- datahub/ingestion/source/powerbi/m_query/tree_function.py +2 -1
- datahub/ingestion/source/powerbi/powerbi.py +1 -3
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -1
- datahub/ingestion/source/powerbi_report_server/report_server.py +26 -7
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +1 -1
- datahub/ingestion/source/preset.py +7 -4
- datahub/ingestion/source/pulsar.py +3 -2
- datahub/ingestion/source/qlik_sense/websocket_connection.py +4 -2
- datahub/ingestion/source/redash.py +31 -7
- datahub/ingestion/source/redshift/config.py +4 -0
- datahub/ingestion/source/redshift/datashares.py +236 -0
- datahub/ingestion/source/redshift/lineage.py +6 -2
- datahub/ingestion/source/redshift/lineage_v2.py +24 -9
- datahub/ingestion/source/redshift/profile.py +1 -1
- datahub/ingestion/source/redshift/query.py +133 -33
- datahub/ingestion/source/redshift/redshift.py +46 -73
- datahub/ingestion/source/redshift/redshift_schema.py +186 -6
- datahub/ingestion/source/redshift/report.py +3 -0
- datahub/ingestion/source/s3/config.py +5 -5
- datahub/ingestion/source/s3/source.py +20 -41
- datahub/ingestion/source/salesforce.py +550 -275
- datahub/ingestion/source/schema_inference/object.py +1 -1
- datahub/ingestion/source/sigma/sigma.py +1 -1
- datahub/ingestion/source/slack/slack.py +31 -10
- datahub/ingestion/source/snowflake/snowflake_connection.py +2 -2
- datahub/ingestion/source/snowflake/snowflake_queries.py +19 -13
- datahub/ingestion/source/snowflake/snowflake_query.py +6 -4
- datahub/ingestion/source/snowflake/snowflake_schema.py +3 -4
- datahub/ingestion/source/snowflake/snowflake_v2.py +1 -1
- datahub/ingestion/source/sql/athena.py +10 -16
- datahub/ingestion/source/sql/druid.py +1 -5
- datahub/ingestion/source/sql/hive.py +15 -6
- datahub/ingestion/source/sql/hive_metastore.py +3 -2
- datahub/ingestion/source/sql/mssql/job_models.py +29 -0
- datahub/ingestion/source/sql/mssql/source.py +11 -5
- datahub/ingestion/source/sql/oracle.py +127 -63
- datahub/ingestion/source/sql/sql_common.py +16 -18
- datahub/ingestion/source/sql/sql_types.py +2 -2
- datahub/ingestion/source/sql/teradata.py +19 -5
- datahub/ingestion/source/sql/trino.py +2 -2
- datahub/ingestion/source/state/stale_entity_removal_handler.py +4 -8
- datahub/ingestion/source/superset.py +222 -62
- datahub/ingestion/source/tableau/tableau.py +22 -6
- datahub/ingestion/source/tableau/tableau_common.py +3 -2
- datahub/ingestion/source/unity/ge_profiler.py +2 -1
- datahub/ingestion/source/unity/source.py +11 -1
- datahub/ingestion/source/vertexai.py +697 -0
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/ingestion/transformer/pattern_cleanup_ownership.py +25 -7
- datahub/lite/duckdb_lite.py +3 -10
- datahub/lite/lite_local.py +1 -1
- datahub/lite/lite_util.py +4 -3
- datahub/metadata/_schema_classes.py +714 -417
- datahub/metadata/_urns/urn_defs.py +1673 -1649
- datahub/metadata/com/linkedin/pegasus2avro/incident/__init__.py +4 -0
- datahub/metadata/schema.avsc +16438 -16603
- datahub/metadata/schemas/AssertionInfo.avsc +3 -1
- datahub/metadata/schemas/BusinessAttributeInfo.avsc +6 -2
- datahub/metadata/schemas/BusinessAttributes.avsc +6 -0
- datahub/metadata/schemas/ChartInfo.avsc +1 -0
- datahub/metadata/schemas/CorpGroupKey.avsc +2 -1
- datahub/metadata/schemas/CorpUserInfo.avsc +13 -0
- datahub/metadata/schemas/CorpUserKey.avsc +2 -1
- datahub/metadata/schemas/DataHubIngestionSourceInfo.avsc +8 -3
- datahub/metadata/schemas/DataProcessInstanceInput.avsc +129 -1
- datahub/metadata/schemas/DataProcessInstanceOutput.avsc +131 -3
- datahub/metadata/schemas/DataProcessKey.avsc +2 -1
- datahub/metadata/schemas/DataProductKey.avsc +2 -1
- datahub/metadata/schemas/DomainKey.avsc +2 -1
- datahub/metadata/schemas/EditableSchemaMetadata.avsc +6 -2
- datahub/metadata/schemas/GlossaryNodeKey.avsc +3 -1
- datahub/metadata/schemas/GlossaryTermKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryTerms.avsc +3 -1
- datahub/metadata/schemas/IncidentInfo.avsc +130 -46
- datahub/metadata/schemas/InputFields.avsc +3 -1
- datahub/metadata/schemas/MLFeatureKey.avsc +2 -1
- datahub/metadata/schemas/MLFeatureTableKey.avsc +2 -1
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +2 -1
- datahub/metadata/schemas/MLModelGroupKey.avsc +3 -1
- datahub/metadata/schemas/MLModelKey.avsc +3 -1
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +2 -1
- datahub/metadata/schemas/MetadataChangeEvent.avsc +20 -2
- datahub/metadata/schemas/PostKey.avsc +2 -1
- datahub/metadata/schemas/SchemaFieldKey.avsc +2 -1
- datahub/metadata/schemas/SchemaMetadata.avsc +3 -1
- datahub/metadata/schemas/StructuredPropertyDefinition.avsc +14 -0
- datahub/metadata/schemas/VersionProperties.avsc +18 -0
- datahub/metadata/schemas/VersionSetProperties.avsc +5 -0
- datahub/pydantic/__init__.py +0 -0
- datahub/pydantic/compat.py +58 -0
- datahub/sdk/__init__.py +30 -12
- datahub/sdk/_all_entities.py +1 -1
- datahub/sdk/_attribution.py +4 -0
- datahub/sdk/_shared.py +258 -16
- datahub/sdk/_utils.py +35 -0
- datahub/sdk/container.py +30 -6
- datahub/sdk/dataset.py +118 -20
- datahub/sdk/{_entity.py → entity.py} +24 -1
- datahub/sdk/entity_client.py +1 -1
- datahub/sdk/main_client.py +23 -0
- datahub/sdk/resolver_client.py +17 -29
- datahub/sdk/search_client.py +50 -0
- datahub/sdk/search_filters.py +374 -0
- datahub/specific/dataset.py +3 -4
- datahub/sql_parsing/_sqlglot_patch.py +2 -10
- datahub/sql_parsing/schema_resolver.py +1 -1
- datahub/sql_parsing/split_statements.py +220 -126
- datahub/sql_parsing/sql_parsing_common.py +7 -0
- datahub/sql_parsing/sqlglot_lineage.py +1 -1
- datahub/sql_parsing/sqlglot_utils.py +1 -4
- datahub/testing/check_sql_parser_result.py +5 -6
- datahub/testing/compare_metadata_json.py +7 -6
- datahub/testing/pytest_hooks.py +56 -0
- datahub/upgrade/upgrade.py +2 -2
- datahub/utilities/file_backed_collections.py +3 -14
- datahub/utilities/ingest_utils.py +106 -0
- datahub/utilities/mapping.py +1 -1
- datahub/utilities/memory_footprint.py +3 -2
- datahub/utilities/sentinels.py +22 -0
- datahub/utilities/unified_diff.py +5 -1
- {acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/LICENSE +0 -0
- {acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/top_level.txt +0 -0
datahub/sdk/_shared.py
CHANGED
|
@@ -1,14 +1,18 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import warnings
|
|
2
4
|
from datetime import datetime
|
|
3
5
|
from typing import (
|
|
4
6
|
TYPE_CHECKING,
|
|
7
|
+
Callable,
|
|
5
8
|
List,
|
|
6
9
|
Optional,
|
|
10
|
+
Sequence,
|
|
7
11
|
Tuple,
|
|
8
12
|
Union,
|
|
9
13
|
)
|
|
10
14
|
|
|
11
|
-
from typing_extensions import TypeAlias
|
|
15
|
+
from typing_extensions import TypeAlias, assert_never
|
|
12
16
|
|
|
13
17
|
import datahub.metadata.schema_classes as models
|
|
14
18
|
from datahub.emitter.mce_builder import (
|
|
@@ -20,6 +24,7 @@ from datahub.emitter.mce_builder import (
|
|
|
20
24
|
from datahub.emitter.mcp_builder import ContainerKey
|
|
21
25
|
from datahub.errors import MultipleSubtypesWarning, SdkUsageError
|
|
22
26
|
from datahub.metadata.urns import (
|
|
27
|
+
ContainerUrn,
|
|
23
28
|
CorpGroupUrn,
|
|
24
29
|
CorpUserUrn,
|
|
25
30
|
DataJobUrn,
|
|
@@ -32,7 +37,8 @@ from datahub.metadata.urns import (
|
|
|
32
37
|
TagUrn,
|
|
33
38
|
Urn,
|
|
34
39
|
)
|
|
35
|
-
from datahub.sdk.
|
|
40
|
+
from datahub.sdk._utils import add_list_unique, remove_list_unique
|
|
41
|
+
from datahub.sdk.entity import Entity
|
|
36
42
|
from datahub.utilities.urns.error import InvalidUrnError
|
|
37
43
|
|
|
38
44
|
if TYPE_CHECKING:
|
|
@@ -44,6 +50,8 @@ DatajobUrnOrStr: TypeAlias = Union[str, DataJobUrn]
|
|
|
44
50
|
|
|
45
51
|
ActorUrn: TypeAlias = Union[CorpUserUrn, CorpGroupUrn]
|
|
46
52
|
|
|
53
|
+
_DEFAULT_ACTOR_URN = CorpUserUrn("__ingestion").urn()
|
|
54
|
+
|
|
47
55
|
|
|
48
56
|
def make_time_stamp(ts: Optional[datetime]) -> Optional[models.TimeStampClass]:
|
|
49
57
|
if ts is None:
|
|
@@ -83,6 +91,13 @@ class HasPlatformInstance(Entity):
|
|
|
83
91
|
)
|
|
84
92
|
)
|
|
85
93
|
|
|
94
|
+
@property
|
|
95
|
+
def platform(self) -> Optional[DataPlatformUrn]:
|
|
96
|
+
dataPlatform = self._get_aspect(models.DataPlatformInstanceClass)
|
|
97
|
+
if dataPlatform and dataPlatform.platform:
|
|
98
|
+
return DataPlatformUrn.from_string(dataPlatform.platform)
|
|
99
|
+
return None
|
|
100
|
+
|
|
86
101
|
@property
|
|
87
102
|
def platform_instance(self) -> Optional[DataPlatformInstanceUrn]:
|
|
88
103
|
dataPlatformInstance = self._get_aspect(models.DataPlatformInstanceClass)
|
|
@@ -112,11 +127,11 @@ class HasSubtype(Entity):
|
|
|
112
127
|
self._set_aspect(models.SubTypesClass(typeNames=[subtype]))
|
|
113
128
|
|
|
114
129
|
|
|
130
|
+
# TODO: Reference OwnershipTypeClass as the valid ownership type enum.
|
|
115
131
|
OwnershipTypeType: TypeAlias = Union[str, OwnershipTypeUrn]
|
|
116
132
|
OwnerInputType: TypeAlias = Union[
|
|
117
|
-
str,
|
|
118
133
|
ActorUrn,
|
|
119
|
-
Tuple[
|
|
134
|
+
Tuple[ActorUrn, OwnershipTypeType],
|
|
120
135
|
models.OwnerClass,
|
|
121
136
|
]
|
|
122
137
|
OwnersInputType: TypeAlias = List[OwnerInputType]
|
|
@@ -126,15 +141,17 @@ class HasOwnership(Entity):
|
|
|
126
141
|
__slots__ = ()
|
|
127
142
|
|
|
128
143
|
@staticmethod
|
|
129
|
-
def _parse_owner_class(owner: OwnerInputType) -> models.OwnerClass:
|
|
144
|
+
def _parse_owner_class(owner: OwnerInputType) -> Tuple[models.OwnerClass, bool]:
|
|
130
145
|
if isinstance(owner, models.OwnerClass):
|
|
131
|
-
return owner
|
|
146
|
+
return owner, False
|
|
132
147
|
|
|
148
|
+
was_type_specified = False
|
|
133
149
|
owner_type = models.OwnershipTypeClass.TECHNICAL_OWNER
|
|
134
150
|
owner_type_urn = None
|
|
135
151
|
|
|
136
152
|
if isinstance(owner, tuple):
|
|
137
153
|
raw_owner, raw_owner_type = owner
|
|
154
|
+
was_type_specified = True
|
|
138
155
|
|
|
139
156
|
if isinstance(raw_owner_type, OwnershipTypeUrn):
|
|
140
157
|
owner_type = models.OwnershipTypeClass.CUSTOM
|
|
@@ -151,17 +168,15 @@ class HasOwnership(Entity):
|
|
|
151
168
|
owner=make_user_urn(raw_owner),
|
|
152
169
|
type=owner_type,
|
|
153
170
|
typeUrn=owner_type_urn,
|
|
154
|
-
)
|
|
171
|
+
), was_type_specified
|
|
155
172
|
elif isinstance(raw_owner, Urn):
|
|
156
173
|
return models.OwnerClass(
|
|
157
174
|
owner=str(raw_owner),
|
|
158
175
|
type=owner_type,
|
|
159
176
|
typeUrn=owner_type_urn,
|
|
160
|
-
)
|
|
177
|
+
), was_type_specified
|
|
161
178
|
else:
|
|
162
|
-
|
|
163
|
-
f"Invalid owner {owner}: {type(owner)} is not a valid owner type"
|
|
164
|
-
)
|
|
179
|
+
assert_never(raw_owner)
|
|
165
180
|
|
|
166
181
|
# TODO: Return a custom type with deserialized urns, instead of the raw aspect.
|
|
167
182
|
# Ideally we'd also use first-class ownership type urns here, not strings.
|
|
@@ -173,21 +188,74 @@ class HasOwnership(Entity):
|
|
|
173
188
|
|
|
174
189
|
def set_owners(self, owners: OwnersInputType) -> None:
|
|
175
190
|
# TODO: add docs on the default parsing + default ownership type
|
|
176
|
-
parsed_owners = [self._parse_owner_class(owner) for owner in owners]
|
|
191
|
+
parsed_owners = [self._parse_owner_class(owner)[0] for owner in owners]
|
|
177
192
|
self._set_aspect(models.OwnershipClass(owners=parsed_owners))
|
|
178
193
|
|
|
194
|
+
@classmethod
|
|
195
|
+
def _owner_key_method(
|
|
196
|
+
cls, consider_owner_type: bool
|
|
197
|
+
) -> Callable[[models.OwnerClass], Tuple[str, ...]]:
|
|
198
|
+
if consider_owner_type:
|
|
199
|
+
return cls._typed_owner_key
|
|
200
|
+
else:
|
|
201
|
+
return cls._simple_owner_key
|
|
179
202
|
|
|
180
|
-
|
|
203
|
+
@classmethod
|
|
204
|
+
def _typed_owner_key(cls, owner: models.OwnerClass) -> Tuple[str, str]:
|
|
205
|
+
return (owner.owner, owner.typeUrn or str(owner.type))
|
|
206
|
+
|
|
207
|
+
@classmethod
|
|
208
|
+
def _simple_owner_key(cls, owner: models.OwnerClass) -> Tuple[str,]:
|
|
209
|
+
return (owner.owner,)
|
|
210
|
+
|
|
211
|
+
def _ensure_owners(self) -> List[models.OwnerClass]:
|
|
212
|
+
owners = self._setdefault_aspect(models.OwnershipClass(owners=[])).owners
|
|
213
|
+
return owners
|
|
214
|
+
|
|
215
|
+
def add_owner(self, owner: OwnerInputType) -> None:
|
|
216
|
+
# Tricky: when adding an owner, we always use the ownership type.
|
|
217
|
+
# For removals, we only use it if it was explicitly specified.
|
|
218
|
+
parsed_owner, _ = self._parse_owner_class(owner)
|
|
219
|
+
add_list_unique(
|
|
220
|
+
self._ensure_owners(),
|
|
221
|
+
key=self._typed_owner_key,
|
|
222
|
+
item=parsed_owner,
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
def remove_owner(self, owner: OwnerInputType) -> None:
|
|
226
|
+
parsed_owner, was_type_specified = self._parse_owner_class(owner)
|
|
227
|
+
remove_list_unique(
|
|
228
|
+
self._ensure_owners(),
|
|
229
|
+
key=self._owner_key_method(was_type_specified),
|
|
230
|
+
item=parsed_owner,
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
# If you pass in a container object, we can build on top of its browse path.
|
|
235
|
+
# If you pass in a ContainerKey, we can use parent_key() to build the browse path.
|
|
236
|
+
# If you pass in a list of urns, we'll use that as the browse path. Any non-urn strings
|
|
237
|
+
# will be treated as raw ids.
|
|
238
|
+
ParentContainerInputType: TypeAlias = Union["Container", ContainerKey, List[UrnOrStr]]
|
|
181
239
|
|
|
182
240
|
|
|
183
241
|
class HasContainer(Entity):
|
|
184
242
|
__slots__ = ()
|
|
185
243
|
|
|
186
|
-
|
|
244
|
+
@staticmethod
|
|
245
|
+
def _maybe_parse_as_urn(urn: UrnOrStr) -> UrnOrStr:
|
|
246
|
+
if isinstance(urn, Urn):
|
|
247
|
+
return urn
|
|
248
|
+
elif urn.startswith("urn:li:"):
|
|
249
|
+
return Urn.from_string(urn)
|
|
250
|
+
else:
|
|
251
|
+
return urn
|
|
252
|
+
|
|
253
|
+
def _set_container(self, container: Optional[ParentContainerInputType]) -> None:
|
|
187
254
|
# We need to allow container to be None. It won't happen for datasets much, but
|
|
188
255
|
# will be required for root containers.
|
|
189
256
|
from datahub.sdk.container import Container
|
|
190
257
|
|
|
258
|
+
container_urn: Optional[str]
|
|
191
259
|
browse_path: List[Union[str, models.BrowsePathEntryClass]] = []
|
|
192
260
|
if isinstance(container, Container):
|
|
193
261
|
container_urn = container.urn.urn()
|
|
@@ -204,6 +272,29 @@ class HasContainer(Entity):
|
|
|
204
272
|
urn=container_urn,
|
|
205
273
|
),
|
|
206
274
|
]
|
|
275
|
+
elif isinstance(container, list):
|
|
276
|
+
parsed_path = [self._maybe_parse_as_urn(entry) for entry in container]
|
|
277
|
+
|
|
278
|
+
# Use the last container in the path as the container urn.
|
|
279
|
+
container_urns = [
|
|
280
|
+
urn.urn() for urn in parsed_path if isinstance(urn, ContainerUrn)
|
|
281
|
+
]
|
|
282
|
+
container_urn = container_urns[-1] if container_urns else None
|
|
283
|
+
|
|
284
|
+
browse_path = [
|
|
285
|
+
(
|
|
286
|
+
models.BrowsePathEntryClass(
|
|
287
|
+
id=str(entry),
|
|
288
|
+
urn=str(entry),
|
|
289
|
+
)
|
|
290
|
+
if isinstance(entry, Urn)
|
|
291
|
+
else models.BrowsePathEntryClass(
|
|
292
|
+
id=entry,
|
|
293
|
+
urn=None,
|
|
294
|
+
)
|
|
295
|
+
)
|
|
296
|
+
for entry in parsed_path
|
|
297
|
+
]
|
|
207
298
|
elif container is not None:
|
|
208
299
|
container_urn = container.as_urn()
|
|
209
300
|
|
|
@@ -212,6 +303,13 @@ class HasContainer(Entity):
|
|
|
212
303
|
while parent_key is not None:
|
|
213
304
|
browse_path_reversed.append(parent_key.as_urn())
|
|
214
305
|
parent_key = parent_key.parent_key()
|
|
306
|
+
if container.instance is not None:
|
|
307
|
+
browse_path_reversed.append(
|
|
308
|
+
DataPlatformInstanceUrn(
|
|
309
|
+
container.platform, container.instance
|
|
310
|
+
).urn()
|
|
311
|
+
)
|
|
312
|
+
|
|
215
313
|
browse_path = list(reversed(browse_path_reversed))
|
|
216
314
|
else:
|
|
217
315
|
container_urn = None
|
|
@@ -236,6 +334,24 @@ class HasContainer(Entity):
|
|
|
236
334
|
)
|
|
237
335
|
)
|
|
238
336
|
|
|
337
|
+
@property
|
|
338
|
+
def parent_container(self) -> Optional[ContainerUrn]:
|
|
339
|
+
if container := self._get_aspect(models.ContainerClass):
|
|
340
|
+
return ContainerUrn.from_string(container.container)
|
|
341
|
+
return None
|
|
342
|
+
|
|
343
|
+
@property
|
|
344
|
+
def browse_path(self) -> Optional[List[UrnOrStr]]:
|
|
345
|
+
if browse_path := self._get_aspect(models.BrowsePathsV2Class):
|
|
346
|
+
path: List[UrnOrStr] = []
|
|
347
|
+
for entry in browse_path.path:
|
|
348
|
+
if entry.urn:
|
|
349
|
+
path.append(Urn.from_string(entry.urn))
|
|
350
|
+
else:
|
|
351
|
+
path.append(entry.id)
|
|
352
|
+
return path
|
|
353
|
+
return None
|
|
354
|
+
|
|
239
355
|
|
|
240
356
|
TagInputType: TypeAlias = Union[str, TagUrn, models.TagAssociationClass]
|
|
241
357
|
TagsInputType: TypeAlias = List[TagInputType]
|
|
@@ -244,6 +360,9 @@ TagsInputType: TypeAlias = List[TagInputType]
|
|
|
244
360
|
class HasTags(Entity):
|
|
245
361
|
__slots__ = ()
|
|
246
362
|
|
|
363
|
+
def _ensure_tags(self) -> List[models.TagAssociationClass]:
|
|
364
|
+
return self._setdefault_aspect(models.GlobalTagsClass(tags=[])).tags
|
|
365
|
+
|
|
247
366
|
# TODO: Return a custom type with deserialized urns, instead of the raw aspect.
|
|
248
367
|
@property
|
|
249
368
|
def tags(self) -> Optional[List[models.TagAssociationClass]]:
|
|
@@ -268,6 +387,24 @@ class HasTags(Entity):
|
|
|
268
387
|
)
|
|
269
388
|
)
|
|
270
389
|
|
|
390
|
+
@classmethod
|
|
391
|
+
def _tag_key(cls, tag: models.TagAssociationClass) -> str:
|
|
392
|
+
return tag.tag
|
|
393
|
+
|
|
394
|
+
def add_tag(self, tag: TagInputType) -> None:
|
|
395
|
+
add_list_unique(
|
|
396
|
+
self._ensure_tags(),
|
|
397
|
+
self._tag_key,
|
|
398
|
+
self._parse_tag_association_class(tag),
|
|
399
|
+
)
|
|
400
|
+
|
|
401
|
+
def remove_tag(self, tag: TagInputType) -> None:
|
|
402
|
+
remove_list_unique(
|
|
403
|
+
self._ensure_tags(),
|
|
404
|
+
self._tag_key,
|
|
405
|
+
self._parse_tag_association_class(tag),
|
|
406
|
+
)
|
|
407
|
+
|
|
271
408
|
|
|
272
409
|
TermInputType: TypeAlias = Union[
|
|
273
410
|
str, GlossaryTermUrn, models.GlossaryTermAssociationClass
|
|
@@ -278,6 +415,11 @@ TermsInputType: TypeAlias = List[TermInputType]
|
|
|
278
415
|
class HasTerms(Entity):
|
|
279
416
|
__slots__ = ()
|
|
280
417
|
|
|
418
|
+
def _ensure_terms(self) -> List[models.GlossaryTermAssociationClass]:
|
|
419
|
+
return self._setdefault_aspect(
|
|
420
|
+
models.GlossaryTermsClass(terms=[], auditStamp=self._terms_audit_stamp())
|
|
421
|
+
).terms
|
|
422
|
+
|
|
281
423
|
# TODO: Return a custom type with deserialized urns, instead of the raw aspect.
|
|
282
424
|
@property
|
|
283
425
|
def terms(self) -> Optional[List[models.GlossaryTermAssociationClass]]:
|
|
@@ -299,8 +441,7 @@ class HasTerms(Entity):
|
|
|
299
441
|
def _terms_audit_stamp(self) -> models.AuditStampClass:
|
|
300
442
|
return models.AuditStampClass(
|
|
301
443
|
time=0,
|
|
302
|
-
|
|
303
|
-
actor=CorpUserUrn("__ingestion").urn(),
|
|
444
|
+
actor=_DEFAULT_ACTOR_URN,
|
|
304
445
|
)
|
|
305
446
|
|
|
306
447
|
def set_terms(self, terms: TermsInputType) -> None:
|
|
@@ -313,6 +454,24 @@ class HasTerms(Entity):
|
|
|
313
454
|
)
|
|
314
455
|
)
|
|
315
456
|
|
|
457
|
+
@classmethod
|
|
458
|
+
def _terms_key(self, term: models.GlossaryTermAssociationClass) -> str:
|
|
459
|
+
return term.urn
|
|
460
|
+
|
|
461
|
+
def add_term(self, term: TermInputType) -> None:
|
|
462
|
+
add_list_unique(
|
|
463
|
+
self._ensure_terms(),
|
|
464
|
+
self._terms_key,
|
|
465
|
+
self._parse_glossary_term_association_class(term),
|
|
466
|
+
)
|
|
467
|
+
|
|
468
|
+
def remove_term(self, term: TermInputType) -> None:
|
|
469
|
+
remove_list_unique(
|
|
470
|
+
self._ensure_terms(),
|
|
471
|
+
self._terms_key,
|
|
472
|
+
self._parse_glossary_term_association_class(term),
|
|
473
|
+
)
|
|
474
|
+
|
|
316
475
|
|
|
317
476
|
DomainInputType: TypeAlias = Union[str, DomainUrn]
|
|
318
477
|
|
|
@@ -336,3 +495,86 @@ class HasDomain(Entity):
|
|
|
336
495
|
def set_domain(self, domain: DomainInputType) -> None:
|
|
337
496
|
domain_urn = DomainUrn.from_string(domain) # basically a type assertion
|
|
338
497
|
self._set_aspect(models.DomainsClass(domains=[str(domain_urn)]))
|
|
498
|
+
|
|
499
|
+
|
|
500
|
+
LinkInputType: TypeAlias = Union[
|
|
501
|
+
str,
|
|
502
|
+
Tuple[str, str], # url, description
|
|
503
|
+
models.InstitutionalMemoryMetadataClass,
|
|
504
|
+
]
|
|
505
|
+
LinksInputType: TypeAlias = Sequence[LinkInputType]
|
|
506
|
+
|
|
507
|
+
|
|
508
|
+
class HasInstitutionalMemory(Entity):
|
|
509
|
+
__slots__ = ()
|
|
510
|
+
|
|
511
|
+
# Internally the aspect is called institutionalMemory, and so much of the code
|
|
512
|
+
# uses that name. However, the public-facing API is called "links", since
|
|
513
|
+
# that's what we call these in the UI.
|
|
514
|
+
|
|
515
|
+
def _ensure_institutional_memory(
|
|
516
|
+
self,
|
|
517
|
+
) -> List[models.InstitutionalMemoryMetadataClass]:
|
|
518
|
+
return self._setdefault_aspect(
|
|
519
|
+
models.InstitutionalMemoryClass(elements=[])
|
|
520
|
+
).elements
|
|
521
|
+
|
|
522
|
+
@property
|
|
523
|
+
def links(self) -> Optional[List[models.InstitutionalMemoryMetadataClass]]:
|
|
524
|
+
if institutional_memory := self._get_aspect(models.InstitutionalMemoryClass):
|
|
525
|
+
return institutional_memory.elements
|
|
526
|
+
return None
|
|
527
|
+
|
|
528
|
+
@classmethod
|
|
529
|
+
def _institutional_memory_audit_stamp(self) -> models.AuditStampClass:
|
|
530
|
+
return models.AuditStampClass(
|
|
531
|
+
time=0,
|
|
532
|
+
actor=_DEFAULT_ACTOR_URN,
|
|
533
|
+
)
|
|
534
|
+
|
|
535
|
+
@classmethod
|
|
536
|
+
def _parse_link_association_class(
|
|
537
|
+
cls, link: LinkInputType
|
|
538
|
+
) -> models.InstitutionalMemoryMetadataClass:
|
|
539
|
+
if isinstance(link, models.InstitutionalMemoryMetadataClass):
|
|
540
|
+
return link
|
|
541
|
+
elif isinstance(link, str):
|
|
542
|
+
return models.InstitutionalMemoryMetadataClass(
|
|
543
|
+
url=link,
|
|
544
|
+
description=link,
|
|
545
|
+
createStamp=cls._institutional_memory_audit_stamp(),
|
|
546
|
+
)
|
|
547
|
+
elif isinstance(link, tuple) and len(link) == 2:
|
|
548
|
+
url, description = link
|
|
549
|
+
return models.InstitutionalMemoryMetadataClass(
|
|
550
|
+
url=url,
|
|
551
|
+
description=description,
|
|
552
|
+
createStamp=cls._institutional_memory_audit_stamp(),
|
|
553
|
+
)
|
|
554
|
+
else:
|
|
555
|
+
assert_never(link)
|
|
556
|
+
|
|
557
|
+
def set_links(self, links: LinksInputType) -> None:
|
|
558
|
+
self._set_aspect(
|
|
559
|
+
models.InstitutionalMemoryClass(
|
|
560
|
+
elements=[self._parse_link_association_class(link) for link in links]
|
|
561
|
+
)
|
|
562
|
+
)
|
|
563
|
+
|
|
564
|
+
@classmethod
|
|
565
|
+
def _link_key(self, link: models.InstitutionalMemoryMetadataClass) -> str:
|
|
566
|
+
return link.url
|
|
567
|
+
|
|
568
|
+
def add_link(self, link: LinkInputType) -> None:
|
|
569
|
+
add_list_unique(
|
|
570
|
+
self._ensure_institutional_memory(),
|
|
571
|
+
self._link_key,
|
|
572
|
+
self._parse_link_association_class(link),
|
|
573
|
+
)
|
|
574
|
+
|
|
575
|
+
def remove_link(self, link: LinkInputType) -> None:
|
|
576
|
+
remove_list_unique(
|
|
577
|
+
self._ensure_institutional_memory(),
|
|
578
|
+
self._link_key,
|
|
579
|
+
self._parse_link_association_class(link),
|
|
580
|
+
)
|
datahub/sdk/_utils.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
from typing import Any, Callable, List, Protocol, TypeVar
|
|
2
|
+
|
|
3
|
+
from datahub.errors import ItemNotFoundError
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class _SupportsEq(Protocol):
|
|
7
|
+
def __eq__(self, other: Any) -> bool: ...
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
T = TypeVar("T")
|
|
11
|
+
K = TypeVar("K", bound=_SupportsEq)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def add_list_unique(lst: List[T], key: Callable[[T], K], item: T) -> None:
|
|
15
|
+
item_key = key(item)
|
|
16
|
+
for i, existing in enumerate(lst):
|
|
17
|
+
if key(existing) == item_key:
|
|
18
|
+
lst[i] = item
|
|
19
|
+
return
|
|
20
|
+
lst.append(item)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def remove_list_unique(
|
|
24
|
+
lst: List[T], key: Callable[[T], K], item: T, *, missing_ok: bool = True
|
|
25
|
+
) -> None:
|
|
26
|
+
# Poor man's patch implementation.
|
|
27
|
+
item_key = key(item)
|
|
28
|
+
removed = False
|
|
29
|
+
for i, existing in enumerate(lst):
|
|
30
|
+
if key(existing) == item_key:
|
|
31
|
+
lst.pop(i)
|
|
32
|
+
removed = True
|
|
33
|
+
# Tricky: no break. In case there's already duplicates, we want to remove all of them.
|
|
34
|
+
if not removed and not missing_ok:
|
|
35
|
+
raise ItemNotFoundError(f"Cannot remove item {item} from list: not found")
|
datahub/sdk/container.py
CHANGED
|
@@ -16,22 +16,26 @@ from datahub.metadata.urns import (
|
|
|
16
16
|
ContainerUrn,
|
|
17
17
|
Urn,
|
|
18
18
|
)
|
|
19
|
-
from datahub.sdk._entity import Entity
|
|
20
19
|
from datahub.sdk._shared import (
|
|
21
20
|
DomainInputType,
|
|
22
21
|
HasContainer,
|
|
23
22
|
HasDomain,
|
|
23
|
+
HasInstitutionalMemory,
|
|
24
24
|
HasOwnership,
|
|
25
25
|
HasPlatformInstance,
|
|
26
26
|
HasSubtype,
|
|
27
27
|
HasTags,
|
|
28
28
|
HasTerms,
|
|
29
|
+
LinksInputType,
|
|
29
30
|
OwnersInputType,
|
|
31
|
+
ParentContainerInputType,
|
|
30
32
|
TagsInputType,
|
|
31
33
|
TermsInputType,
|
|
32
34
|
make_time_stamp,
|
|
33
35
|
parse_time_stamp,
|
|
34
36
|
)
|
|
37
|
+
from datahub.sdk.entity import Entity, ExtraAspectsType
|
|
38
|
+
from datahub.utilities.sentinels import Auto, auto
|
|
35
39
|
|
|
36
40
|
|
|
37
41
|
class Container(
|
|
@@ -39,6 +43,7 @@ class Container(
|
|
|
39
43
|
HasSubtype,
|
|
40
44
|
HasContainer,
|
|
41
45
|
HasOwnership,
|
|
46
|
+
HasInstitutionalMemory,
|
|
42
47
|
HasTags,
|
|
43
48
|
HasTerms,
|
|
44
49
|
HasDomain,
|
|
@@ -54,7 +59,7 @@ class Container(
|
|
|
54
59
|
self,
|
|
55
60
|
/,
|
|
56
61
|
# Identity.
|
|
57
|
-
container_key: ContainerKey
|
|
62
|
+
container_key: ContainerKey,
|
|
58
63
|
*,
|
|
59
64
|
# Container attributes.
|
|
60
65
|
display_name: str,
|
|
@@ -66,17 +71,23 @@ class Container(
|
|
|
66
71
|
created: Optional[datetime] = None,
|
|
67
72
|
last_modified: Optional[datetime] = None,
|
|
68
73
|
# Standard aspects.
|
|
74
|
+
parent_container: Auto | ParentContainerInputType | None = auto,
|
|
69
75
|
subtype: Optional[str] = None,
|
|
70
76
|
owners: Optional[OwnersInputType] = None,
|
|
77
|
+
links: Optional[LinksInputType] = None,
|
|
71
78
|
tags: Optional[TagsInputType] = None,
|
|
72
79
|
terms: Optional[TermsInputType] = None,
|
|
73
80
|
domain: Optional[DomainInputType] = None,
|
|
81
|
+
extra_aspects: ExtraAspectsType = None,
|
|
74
82
|
):
|
|
83
|
+
# Hack: while the type annotations say container_key is always a ContainerKey,
|
|
84
|
+
# we allow ContainerUrn to make the graph-based constructor work.
|
|
75
85
|
if isinstance(container_key, ContainerUrn):
|
|
76
86
|
urn = container_key
|
|
77
87
|
else:
|
|
78
88
|
urn = ContainerUrn.from_string(container_key.as_urn())
|
|
79
89
|
super().__init__(urn)
|
|
90
|
+
self._set_extra_aspects(extra_aspects)
|
|
80
91
|
|
|
81
92
|
# This needs to come first to ensure that the display name is registered.
|
|
82
93
|
self._ensure_container_props(name=display_name)
|
|
@@ -85,8 +96,6 @@ class Container(
|
|
|
85
96
|
if isinstance(container_key, ContainerKey):
|
|
86
97
|
self._set_platform_instance(container_key.platform, container_key.instance)
|
|
87
98
|
|
|
88
|
-
self._set_container(container_key.parent_key())
|
|
89
|
-
|
|
90
99
|
self.set_custom_properties(
|
|
91
100
|
{
|
|
92
101
|
**container_key.property_dict(),
|
|
@@ -100,6 +109,18 @@ class Container(
|
|
|
100
109
|
env = container_key.env if container_key.env in ALL_ENV_TYPES else None
|
|
101
110
|
if _INCLUDE_ENV_IN_CONTAINER_PROPERTIES and env is not None:
|
|
102
111
|
self._ensure_container_props().env = env
|
|
112
|
+
else:
|
|
113
|
+
self.set_custom_properties(extra_properties or {})
|
|
114
|
+
|
|
115
|
+
if parent_container is auto:
|
|
116
|
+
if not isinstance(container_key, ContainerKey):
|
|
117
|
+
raise SdkUsageError(
|
|
118
|
+
"Either a container_key or parent_container must be provided"
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
self._set_container(container_key.parent_key())
|
|
122
|
+
else:
|
|
123
|
+
self._set_container(parent_container)
|
|
103
124
|
|
|
104
125
|
if description is not None:
|
|
105
126
|
self.set_description(description)
|
|
@@ -116,6 +137,8 @@ class Container(
|
|
|
116
137
|
self.set_subtype(subtype)
|
|
117
138
|
if owners is not None:
|
|
118
139
|
self.set_owners(owners)
|
|
140
|
+
if links is not None:
|
|
141
|
+
self.set_links(links)
|
|
119
142
|
if tags is not None:
|
|
120
143
|
self.set_tags(tags)
|
|
121
144
|
if terms is not None:
|
|
@@ -126,7 +149,8 @@ class Container(
|
|
|
126
149
|
@classmethod
|
|
127
150
|
def _new_from_graph(cls, urn: Urn, current_aspects: models.AspectBag) -> Self:
|
|
128
151
|
assert isinstance(urn, ContainerUrn)
|
|
129
|
-
|
|
152
|
+
|
|
153
|
+
entity = cls(urn, display_name="__dummy_value__", parent_container=None) # type: ignore[arg-type]
|
|
130
154
|
return entity._init_from_graph(current_aspects)
|
|
131
155
|
|
|
132
156
|
def _ensure_container_props(
|
|
@@ -147,7 +171,7 @@ class Container(
|
|
|
147
171
|
return self._ensure_container_props().name
|
|
148
172
|
|
|
149
173
|
def set_display_name(self, value: str) -> None:
|
|
150
|
-
self._ensure_container_props().name = value
|
|
174
|
+
self._ensure_container_props(name=value).name = value
|
|
151
175
|
|
|
152
176
|
@property
|
|
153
177
|
def description(self) -> Optional[str]:
|