acryl-datahub 0.15.0.4rc3__py3-none-any.whl → 0.15.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0.4rc3.dist-info → acryl_datahub-0.15.0.5.dist-info}/METADATA +2507 -2470
- {acryl_datahub-0.15.0.4rc3.dist-info → acryl_datahub-0.15.0.5.dist-info}/RECORD +95 -86
- {acryl_datahub-0.15.0.4rc3.dist-info → acryl_datahub-0.15.0.5.dist-info}/entry_points.txt +1 -0
- datahub/__init__.py +1 -25
- datahub/_version.py +13 -0
- datahub/api/entities/dataprocess/dataprocess_instance.py +104 -11
- datahub/cli/check_cli.py +1 -1
- datahub/cli/cli_utils.py +3 -3
- datahub/cli/container_cli.py +1 -64
- datahub/cli/iceberg_cli.py +707 -0
- datahub/cli/ingest_cli.py +2 -2
- datahub/emitter/composite_emitter.py +36 -0
- datahub/emitter/rest_emitter.py +1 -1
- datahub/entrypoints.py +26 -5
- datahub/ingestion/api/incremental_lineage_helper.py +4 -0
- datahub/ingestion/api/registry.py +1 -1
- datahub/ingestion/glossary/classification_mixin.py +6 -0
- datahub/ingestion/glossary/classifier.py +3 -2
- datahub/ingestion/graph/client.py +2 -1
- datahub/ingestion/graph/entity_versioning.py +201 -0
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +1 -1
- datahub/ingestion/run/connection.py +1 -1
- datahub/ingestion/run/pipeline.py +3 -3
- datahub/ingestion/source/abs/report.py +2 -2
- datahub/ingestion/source/apply/__init__.py +0 -0
- datahub/ingestion/source/apply/datahub_apply.py +223 -0
- datahub/ingestion/source/aws/glue.py +5 -2
- datahub/ingestion/source/aws/sagemaker_processors/common.py +3 -2
- datahub/ingestion/source/bigquery_v2/bigquery_report.py +1 -1
- datahub/ingestion/source/dbt/dbt_core.py +1 -1
- datahub/ingestion/source/delta_lake/report.py +2 -2
- datahub/ingestion/source/dynamodb/dynamodb.py +2 -1
- datahub/ingestion/source/elastic_search.py +2 -1
- datahub/ingestion/source/ge_profiling_config.py +11 -7
- datahub/ingestion/source/iceberg/iceberg_common.py +3 -2
- datahub/ingestion/source/identity/azure_ad.py +6 -14
- datahub/ingestion/source/identity/okta.py +2 -1
- datahub/ingestion/source/kafka/kafka.py +2 -1
- datahub/ingestion/source/kafka_connect/common.py +2 -1
- datahub/ingestion/source/ldap.py +2 -1
- datahub/ingestion/source/looker/looker_config.py +3 -1
- datahub/ingestion/source/looker/looker_dataclasses.py +8 -0
- datahub/ingestion/source/looker/looker_file_loader.py +14 -3
- datahub/ingestion/source/looker/looker_template_language.py +104 -14
- datahub/ingestion/source/looker/lookml_config.py +29 -8
- datahub/ingestion/source/looker/lookml_source.py +110 -22
- datahub/ingestion/source/mode.py +2 -4
- datahub/ingestion/source/mongodb.py +2 -1
- datahub/ingestion/source/nifi.py +2 -1
- datahub/ingestion/source/powerbi/config.py +2 -2
- datahub/ingestion/source/powerbi_report_server/report_server.py +2 -1
- datahub/ingestion/source/redash.py +5 -5
- datahub/ingestion/source/salesforce.py +4 -1
- datahub/ingestion/source/snowflake/snowflake_config.py +13 -0
- datahub/ingestion/source/snowflake/snowflake_query.py +11 -0
- datahub/ingestion/source/snowflake/snowflake_report.py +3 -1
- datahub/ingestion/source/snowflake/snowflake_schema.py +17 -0
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +35 -43
- datahub/ingestion/source/snowflake/snowflake_tag.py +57 -3
- datahub/ingestion/source/snowflake/snowflake_v2.py +42 -4
- datahub/ingestion/source/sql/clickhouse.py +5 -43
- datahub/ingestion/source/sql/mssql/job_models.py +37 -8
- datahub/ingestion/source/sql/mssql/source.py +17 -0
- datahub/ingestion/source/sql/sql_config.py +0 -10
- datahub/ingestion/source/tableau/tableau.py +16 -13
- datahub/ingestion/source/tableau/tableau_common.py +1 -1
- datahub/ingestion/source/unity/ge_profiler.py +55 -4
- datahub/ingestion/source/unity/proxy.py +2 -2
- datahub/ingestion/source/unity/report.py +1 -0
- datahub/ingestion/source_config/operation_config.py +9 -0
- datahub/ingestion/source_report/pulsar.py +5 -4
- datahub/metadata/_schema_classes.py +304 -6
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +6 -0
- datahub/metadata/com/linkedin/pegasus2avro/dataplatforminstance/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/dataset/__init__.py +2 -0
- datahub/metadata/schema.avsc +211 -12
- datahub/metadata/schemas/AssertionInfo.avsc +2 -2
- datahub/metadata/schemas/CorpUserSettings.avsc +9 -0
- datahub/metadata/schemas/DashboardInfo.avsc +5 -5
- datahub/metadata/schemas/DataPlatformInstanceKey.avsc +2 -1
- datahub/metadata/schemas/DatasetKey.avsc +2 -1
- datahub/metadata/schemas/Deprecation.avsc +12 -0
- datahub/metadata/schemas/DisplayProperties.avsc +62 -0
- datahub/metadata/schemas/IcebergCatalogInfo.avsc +28 -0
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +92 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +17 -5
- datahub/metadata/schemas/PostInfo.avsc +28 -2
- datahub/metadata/schemas/SchemaFieldKey.avsc +2 -1
- datahub/specific/dashboard.py +43 -1
- datahub/telemetry/telemetry.py +4 -4
- datahub/testing/check_imports.py +28 -0
- datahub/upgrade/upgrade.py +17 -9
- {acryl_datahub-0.15.0.4rc3.dist-info → acryl_datahub-0.15.0.5.dist-info}/LICENSE +0 -0
- {acryl_datahub-0.15.0.4rc3.dist-info → acryl_datahub-0.15.0.5.dist-info}/WHEEL +0 -0
- {acryl_datahub-0.15.0.4rc3.dist-info → acryl_datahub-0.15.0.5.dist-info}/top_level.txt +0 -0
datahub/cli/ingest_cli.py
CHANGED
|
@@ -12,7 +12,7 @@ import click_spinner
|
|
|
12
12
|
from click_default_group import DefaultGroup
|
|
13
13
|
from tabulate import tabulate
|
|
14
14
|
|
|
15
|
-
|
|
15
|
+
from datahub._version import nice_version_name
|
|
16
16
|
from datahub.cli import cli_utils
|
|
17
17
|
from datahub.cli.config_utils import CONDENSED_DATAHUB_CONFIG_PATH
|
|
18
18
|
from datahub.configuration.common import ConfigModel, GraphError
|
|
@@ -147,7 +147,7 @@ def run(
|
|
|
147
147
|
return ret
|
|
148
148
|
|
|
149
149
|
# main function begins
|
|
150
|
-
logger.info("DataHub CLI version: %s",
|
|
150
|
+
logger.info("DataHub CLI version: %s", nice_version_name())
|
|
151
151
|
|
|
152
152
|
pipeline_config = load_config_file(
|
|
153
153
|
config,
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
from typing import Callable, List, Optional, Union
|
|
2
|
+
|
|
3
|
+
from datahub.emitter.generic_emitter import Emitter
|
|
4
|
+
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
5
|
+
from datahub.metadata.com.linkedin.pegasus2avro.mxe import (
|
|
6
|
+
MetadataChangeEvent,
|
|
7
|
+
MetadataChangeProposal,
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
# Experimental composite emitter that allows multiple emitters to be used in a single ingestion job
|
|
12
|
+
class CompositeEmitter(Emitter):
|
|
13
|
+
def __init__(self, emitters: List[Emitter]) -> None:
|
|
14
|
+
self.emitters = emitters
|
|
15
|
+
|
|
16
|
+
def emit(
|
|
17
|
+
self,
|
|
18
|
+
item: Union[
|
|
19
|
+
MetadataChangeEvent,
|
|
20
|
+
MetadataChangeProposal,
|
|
21
|
+
MetadataChangeProposalWrapper,
|
|
22
|
+
],
|
|
23
|
+
callback: Optional[Callable[[Exception, str], None]] = None,
|
|
24
|
+
) -> None:
|
|
25
|
+
callback_called = False
|
|
26
|
+
for emitter in self.emitters:
|
|
27
|
+
if not callback_called:
|
|
28
|
+
# We want to ensure that the callback is only called once and we tie it to the first emitter
|
|
29
|
+
emitter.emit(item, callback)
|
|
30
|
+
callback_called = True
|
|
31
|
+
else:
|
|
32
|
+
emitter.emit(item)
|
|
33
|
+
|
|
34
|
+
def flush(self) -> None:
|
|
35
|
+
for emitter in self.emitters:
|
|
36
|
+
emitter.flush()
|
datahub/emitter/rest_emitter.py
CHANGED
|
@@ -22,7 +22,7 @@ from deprecated import deprecated
|
|
|
22
22
|
from requests.adapters import HTTPAdapter, Retry
|
|
23
23
|
from requests.exceptions import HTTPError, RequestException
|
|
24
24
|
|
|
25
|
-
from datahub import nice_version_name
|
|
25
|
+
from datahub._version import nice_version_name
|
|
26
26
|
from datahub.cli import config_utils
|
|
27
27
|
from datahub.cli.cli_utils import ensure_has_system_metadata, fixup_gms_url, get_or_else
|
|
28
28
|
from datahub.cli.env_utils import get_boolean_env_variable
|
datahub/entrypoints.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
import multiprocessing
|
|
2
3
|
import os
|
|
3
4
|
import platform
|
|
4
5
|
import sys
|
|
@@ -6,7 +7,7 @@ from typing import ContextManager, Optional
|
|
|
6
7
|
|
|
7
8
|
import click
|
|
8
9
|
|
|
9
|
-
import datahub as
|
|
10
|
+
import datahub._version as datahub_version
|
|
10
11
|
from datahub.cli.check_cli import check
|
|
11
12
|
from datahub.cli.cli_utils import (
|
|
12
13
|
fixup_gms_url,
|
|
@@ -74,8 +75,8 @@ if sys.version_info >= (3, 12):
|
|
|
74
75
|
help="Write debug-level logs to a file.",
|
|
75
76
|
)
|
|
76
77
|
@click.version_option(
|
|
77
|
-
version=
|
|
78
|
-
prog_name=
|
|
78
|
+
version=datahub_version.nice_version_name(),
|
|
79
|
+
prog_name=datahub_version.__package_name__,
|
|
79
80
|
)
|
|
80
81
|
def datahub(
|
|
81
82
|
debug: bool,
|
|
@@ -112,7 +113,7 @@ def datahub(
|
|
|
112
113
|
def version(include_server: bool = False) -> None:
|
|
113
114
|
"""Print version number and exit."""
|
|
114
115
|
|
|
115
|
-
click.echo(f"DataHub CLI version: {
|
|
116
|
+
click.echo(f"DataHub CLI version: {datahub_version.nice_version_name()}")
|
|
116
117
|
click.echo(f"Models: {model_version_name()}")
|
|
117
118
|
click.echo(f"Python version: {sys.version}")
|
|
118
119
|
if include_server:
|
|
@@ -183,6 +184,18 @@ datahub.add_command(datacontract)
|
|
|
183
184
|
datahub.add_command(assertions)
|
|
184
185
|
datahub.add_command(container)
|
|
185
186
|
|
|
187
|
+
try:
|
|
188
|
+
from datahub.cli.iceberg_cli import iceberg
|
|
189
|
+
|
|
190
|
+
datahub.add_command(iceberg)
|
|
191
|
+
except ImportError as e:
|
|
192
|
+
logger.debug(f"Failed to load datahub iceberg command: {e}")
|
|
193
|
+
datahub.add_command(
|
|
194
|
+
make_shim_command(
|
|
195
|
+
"iceberg", "run `pip install 'acryl-datahub[iceberg-catalog]'`"
|
|
196
|
+
)
|
|
197
|
+
)
|
|
198
|
+
|
|
186
199
|
try:
|
|
187
200
|
from datahub.cli.lite_cli import lite
|
|
188
201
|
|
|
@@ -205,6 +218,14 @@ except ImportError as e:
|
|
|
205
218
|
|
|
206
219
|
|
|
207
220
|
def main(**kwargs):
|
|
221
|
+
# We use threads in a variety of places within our CLI. The multiprocessing
|
|
222
|
+
# "fork" start method is not safe to use with threads.
|
|
223
|
+
# MacOS and Windows already default to "spawn", and Linux will as well starting in Python 3.14.
|
|
224
|
+
# https://docs.python.org/3/library/multiprocessing.html#contexts-and-start-methods
|
|
225
|
+
# Eventually it may make sense to use "forkserver" as the default where available,
|
|
226
|
+
# but we can revisit that in the future.
|
|
227
|
+
multiprocessing.set_start_method("spawn", force=True)
|
|
228
|
+
|
|
208
229
|
# This wrapper prevents click from suppressing errors.
|
|
209
230
|
try:
|
|
210
231
|
sys.exit(datahub(standalone_mode=False, **kwargs))
|
|
@@ -223,7 +244,7 @@ def main(**kwargs):
|
|
|
223
244
|
logger.exception(f"Command failed: {exc}")
|
|
224
245
|
|
|
225
246
|
logger.debug(
|
|
226
|
-
f"DataHub CLI version: {
|
|
247
|
+
f"DataHub CLI version: {datahub_version.__version__} at {__file__}"
|
|
227
248
|
)
|
|
228
249
|
logger.debug(
|
|
229
250
|
f"Python version: {sys.version} at {sys.executable} on {platform.platform()}"
|
|
@@ -102,6 +102,10 @@ def convert_dashboard_info_to_patch(
|
|
|
102
102
|
if aspect.datasets:
|
|
103
103
|
patch_builder.add_datasets(aspect.datasets)
|
|
104
104
|
|
|
105
|
+
if aspect.dashboards:
|
|
106
|
+
for dashboard in aspect.dashboards:
|
|
107
|
+
patch_builder.add_dashboard(dashboard)
|
|
108
|
+
|
|
105
109
|
if aspect.access:
|
|
106
110
|
patch_builder.set_access(aspect.access)
|
|
107
111
|
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import concurrent.futures
|
|
2
2
|
import logging
|
|
3
|
+
import multiprocessing
|
|
3
4
|
from dataclasses import dataclass, field
|
|
4
5
|
from functools import partial
|
|
5
6
|
from math import ceil
|
|
@@ -182,6 +183,11 @@ class ClassificationHandler:
|
|
|
182
183
|
|
|
183
184
|
with concurrent.futures.ProcessPoolExecutor(
|
|
184
185
|
max_workers=self.config.classification.max_workers,
|
|
186
|
+
# The fork start method, which is the default on Linux for Python < 3.14, is not
|
|
187
|
+
# safe when the main process uses threads. The default start method on windows/macOS is
|
|
188
|
+
# already spawn, and will be changed to spawn for Linux in Python 3.14.
|
|
189
|
+
# https://docs.python.org/3/library/multiprocessing.html#contexts-and-start-methods
|
|
190
|
+
mp_context=multiprocessing.get_context("spawn"),
|
|
185
191
|
) as executor:
|
|
186
192
|
column_info_proposal_futures = [
|
|
187
193
|
executor.submit(
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import os
|
|
1
2
|
from abc import ABCMeta, abstractmethod
|
|
2
3
|
from dataclasses import dataclass
|
|
3
4
|
from typing import Any, Dict, List, Optional
|
|
@@ -37,8 +38,8 @@ class ClassificationConfig(ConfigModel):
|
|
|
37
38
|
)
|
|
38
39
|
|
|
39
40
|
max_workers: int = Field(
|
|
40
|
-
default=
|
|
41
|
-
description="Number of worker processes to use for classification.
|
|
41
|
+
default=(os.cpu_count() or 4),
|
|
42
|
+
description="Number of worker processes to use for classification. Set to 1 to disable.",
|
|
42
43
|
)
|
|
43
44
|
|
|
44
45
|
table_pattern: AllowDenyPattern = Field(
|
|
@@ -40,6 +40,7 @@ from datahub.ingestion.graph.connections import (
|
|
|
40
40
|
connections_gql,
|
|
41
41
|
get_id_from_connection_urn,
|
|
42
42
|
)
|
|
43
|
+
from datahub.ingestion.graph.entity_versioning import EntityVersioningAPI
|
|
43
44
|
from datahub.ingestion.graph.filters import (
|
|
44
45
|
RemovedStatusFilter,
|
|
45
46
|
SearchFilterRule,
|
|
@@ -125,7 +126,7 @@ def _graphql_entity_type(entity_type: str) -> str:
|
|
|
125
126
|
return entity_type
|
|
126
127
|
|
|
127
128
|
|
|
128
|
-
class DataHubGraph(DatahubRestEmitter):
|
|
129
|
+
class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
129
130
|
def __init__(self, config: DatahubClientConfig) -> None:
|
|
130
131
|
self.config = config
|
|
131
132
|
super().__init__(
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
import uuid
|
|
2
|
+
from typing import Dict, Optional, Protocol, Type
|
|
3
|
+
|
|
4
|
+
from datahub.emitter.mce_builder import Aspect
|
|
5
|
+
from datahub.metadata.schema_classes import (
|
|
6
|
+
VersionPropertiesClass,
|
|
7
|
+
VersionSetPropertiesClass,
|
|
8
|
+
)
|
|
9
|
+
from datahub.metadata.urns import VersionSetUrn
|
|
10
|
+
from datahub.utilities.urns.urn import guess_entity_type
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class DataHubGraphProtocol(Protocol):
|
|
14
|
+
def execute_graphql(
|
|
15
|
+
self,
|
|
16
|
+
query: str,
|
|
17
|
+
variables: Optional[Dict],
|
|
18
|
+
operation_name: Optional[str] = None,
|
|
19
|
+
format_exception: bool = True,
|
|
20
|
+
) -> Dict: ...
|
|
21
|
+
|
|
22
|
+
def get_aspect(
|
|
23
|
+
self,
|
|
24
|
+
entity_urn: str,
|
|
25
|
+
aspect_type: Type[Aspect],
|
|
26
|
+
version: int = 0,
|
|
27
|
+
) -> Optional[Aspect]: ...
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class EntityVersioningAPI(DataHubGraphProtocol):
|
|
31
|
+
LINK_VERSION_MUTATION = """
|
|
32
|
+
mutation($input: LinkVersionInput!) {
|
|
33
|
+
linkAssetVersion(input: $input) {
|
|
34
|
+
urn
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
UNLINK_VERSION_MUTATION = """
|
|
40
|
+
mutation($input: UnlinkVersionInput!) {
|
|
41
|
+
unlinkAssetVersion(input: $input) {
|
|
42
|
+
urn
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
def link_asset_to_version_set(
|
|
48
|
+
self,
|
|
49
|
+
asset_urn: str,
|
|
50
|
+
version_set_urn: Optional[str],
|
|
51
|
+
label: str,
|
|
52
|
+
*,
|
|
53
|
+
comment: Optional[str] = None,
|
|
54
|
+
) -> Optional[str]:
|
|
55
|
+
"""Sets an entity as the latest version of a version set.
|
|
56
|
+
Can also be used to create a new version set, with `asset_urn` as the first version.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
asset_urn: URN of the entity.
|
|
60
|
+
version_set_urn: URN of the version set, or None to generate a new version set urn
|
|
61
|
+
label: Label of the version.
|
|
62
|
+
comment: Comment about the version.
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
URN of the version set to which `asset_urn` was linked,
|
|
66
|
+
or None if the `asset_urn` was already linked to `version_set_urn`.
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
entity_type = guess_entity_type(asset_urn)
|
|
70
|
+
if version_set_urn is None:
|
|
71
|
+
version_set_urn = VersionSetUrn(str(uuid.uuid4()), entity_type).urn()
|
|
72
|
+
elif guess_entity_type(version_set_urn) != "versionSet":
|
|
73
|
+
raise ValueError(f"Expected version set URN, got {version_set_urn}")
|
|
74
|
+
|
|
75
|
+
entity_version = self.get_aspect(asset_urn, VersionPropertiesClass)
|
|
76
|
+
if entity_version and entity_version.versionSet:
|
|
77
|
+
if entity_version.versionSet == version_set_urn:
|
|
78
|
+
return None
|
|
79
|
+
else:
|
|
80
|
+
raise ValueError(
|
|
81
|
+
f"Asset {asset_urn} is already a version of {entity_version.versionSet}"
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
variables = {
|
|
85
|
+
"input": {
|
|
86
|
+
"versionSet": version_set_urn,
|
|
87
|
+
"linkedEntity": asset_urn,
|
|
88
|
+
"version": label,
|
|
89
|
+
"comment": comment,
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
response = self.execute_graphql(self.LINK_VERSION_MUTATION, variables)
|
|
93
|
+
try:
|
|
94
|
+
return response["linkAssetVersion"]["urn"]
|
|
95
|
+
except KeyError:
|
|
96
|
+
raise ValueError(f"Unexpected response: {response}")
|
|
97
|
+
|
|
98
|
+
def link_asset_to_versioned_asset(
|
|
99
|
+
self,
|
|
100
|
+
new_asset_urn: str,
|
|
101
|
+
old_asset_urn: str,
|
|
102
|
+
label: str,
|
|
103
|
+
*,
|
|
104
|
+
comment: Optional[str] = None,
|
|
105
|
+
) -> Optional[str]:
|
|
106
|
+
"""Sets an entity as the latest version of an existing versioned entity.
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
new_asset_urn: URN of the new latest entity.
|
|
110
|
+
old_asset_urn: URN of an existing versioned entity to link onto.
|
|
111
|
+
label: Label of the version.
|
|
112
|
+
comment: Comment about the version.
|
|
113
|
+
|
|
114
|
+
Returns:
|
|
115
|
+
URN of the version set to which `new_asset_urn` was linked,
|
|
116
|
+
or None if the `new_asset_urn` was already linked to `old_asset_urn`.
|
|
117
|
+
"""
|
|
118
|
+
|
|
119
|
+
new_entity_type = guess_entity_type(new_asset_urn)
|
|
120
|
+
old_entity_type = guess_entity_type(old_asset_urn)
|
|
121
|
+
if new_entity_type != old_entity_type:
|
|
122
|
+
raise ValueError(
|
|
123
|
+
f"Expected URNs of the same type, got {new_entity_type} and {old_entity_type}"
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
old_entity_version = self.get_aspect(old_asset_urn, VersionPropertiesClass)
|
|
127
|
+
if not old_entity_version:
|
|
128
|
+
raise ValueError(f"Asset {old_asset_urn} is not versioned")
|
|
129
|
+
|
|
130
|
+
new_entity_version = self.get_aspect(new_asset_urn, VersionPropertiesClass)
|
|
131
|
+
if new_entity_version:
|
|
132
|
+
if new_entity_version.versionSet == old_entity_version.versionSet:
|
|
133
|
+
return None
|
|
134
|
+
else:
|
|
135
|
+
raise ValueError(
|
|
136
|
+
f"Asset {new_asset_urn} is already a version of {new_entity_version.versionSet}"
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
return self.link_asset_to_version_set(
|
|
140
|
+
new_asset_urn, old_entity_version.versionSet, label, comment=comment
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
def unlink_asset_from_version_set(self, asset_urn: str) -> Optional[str]:
|
|
144
|
+
"""Unlinks an entity from its version set.
|
|
145
|
+
|
|
146
|
+
Args:
|
|
147
|
+
asset_urn: URN of the entity to unlink from its version set.
|
|
148
|
+
|
|
149
|
+
Returns:
|
|
150
|
+
If successful, the URN of the version set from which `asset_urn` was unlinked,
|
|
151
|
+
or None if `asset_urn` was not linked to any version set.
|
|
152
|
+
"""
|
|
153
|
+
|
|
154
|
+
entity_version = self.get_aspect(asset_urn, VersionPropertiesClass)
|
|
155
|
+
if not entity_version:
|
|
156
|
+
return None
|
|
157
|
+
|
|
158
|
+
variables = {
|
|
159
|
+
"input": {
|
|
160
|
+
"versionSet": entity_version.versionSet,
|
|
161
|
+
"unlinkedEntity": asset_urn,
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
response = self.execute_graphql(self.UNLINK_VERSION_MUTATION, variables)
|
|
165
|
+
try:
|
|
166
|
+
return response["unlinkAssetVersion"]["urn"]
|
|
167
|
+
except KeyError:
|
|
168
|
+
raise ValueError(f"Unexpected response: {response}")
|
|
169
|
+
|
|
170
|
+
def unlink_latest_asset_from_version_set(
|
|
171
|
+
self, version_set_urn: str
|
|
172
|
+
) -> Optional[str]:
|
|
173
|
+
"""Unlinks the latest version of a version set.
|
|
174
|
+
|
|
175
|
+
Args:
|
|
176
|
+
version_set_urn: URN of the version set.
|
|
177
|
+
|
|
178
|
+
Returns:
|
|
179
|
+
If successful, the URN of the entity that was unlinked from `version_set_urn`,
|
|
180
|
+
or None if no entity was unlinked.
|
|
181
|
+
"""
|
|
182
|
+
|
|
183
|
+
version_set_properties = self.get_aspect(
|
|
184
|
+
version_set_urn, VersionSetPropertiesClass
|
|
185
|
+
)
|
|
186
|
+
if not version_set_properties:
|
|
187
|
+
raise ValueError(
|
|
188
|
+
f"Version set {version_set_urn} does not exist or has no versions"
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
variables = {
|
|
192
|
+
"input": {
|
|
193
|
+
"versionSet": version_set_urn,
|
|
194
|
+
"unlinkedEntity": version_set_properties.latest,
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
response = self.execute_graphql(self.UNLINK_VERSION_MUTATION, variables)
|
|
198
|
+
try:
|
|
199
|
+
return response["unlinkAssetVersion"]["urn"]
|
|
200
|
+
except KeyError:
|
|
201
|
+
raise ValueError(f"Unexpected response: {response}")
|
|
@@ -15,7 +15,7 @@ import click
|
|
|
15
15
|
import humanfriendly
|
|
16
16
|
import psutil
|
|
17
17
|
|
|
18
|
-
import
|
|
18
|
+
from datahub._version import nice_version_name
|
|
19
19
|
from datahub.configuration.common import (
|
|
20
20
|
ConfigModel,
|
|
21
21
|
IgnorableError,
|
|
@@ -144,8 +144,8 @@ def _add_init_error_context(step: str) -> Iterator[None]:
|
|
|
144
144
|
|
|
145
145
|
@dataclass
|
|
146
146
|
class CliReport(Report):
|
|
147
|
-
cli_version: str =
|
|
148
|
-
cli_entry_location: str =
|
|
147
|
+
cli_version: str = nice_version_name()
|
|
148
|
+
cli_entry_location: str = __file__
|
|
149
149
|
models_version: str = model_version_name()
|
|
150
150
|
py_version: str = sys.version
|
|
151
151
|
py_exec_path: str = sys.executable
|
|
@@ -1,16 +1,16 @@
|
|
|
1
1
|
import dataclasses
|
|
2
2
|
from dataclasses import field as dataclass_field
|
|
3
|
-
from typing import List
|
|
4
3
|
|
|
5
4
|
from datahub.ingestion.source.state.stale_entity_removal_handler import (
|
|
6
5
|
StaleEntityRemovalSourceReport,
|
|
7
6
|
)
|
|
7
|
+
from datahub.utilities.lossy_collections import LossyList
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
@dataclasses.dataclass
|
|
11
11
|
class DataLakeSourceReport(StaleEntityRemovalSourceReport):
|
|
12
12
|
files_scanned = 0
|
|
13
|
-
filtered:
|
|
13
|
+
filtered: LossyList[str] = dataclass_field(default_factory=LossyList)
|
|
14
14
|
|
|
15
15
|
def report_file_scanned(self) -> None:
|
|
16
16
|
self.files_scanned += 1
|
|
File without changes
|