acryl-datahub 0.15.0.6rc2__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/METADATA +2522 -2493
- {acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/RECORD +205 -192
- {acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/WHEEL +1 -1
- {acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/entry_points.txt +1 -0
- datahub/_version.py +1 -1
- datahub/api/entities/common/serialized_value.py +4 -3
- datahub/api/entities/dataset/dataset.py +731 -42
- datahub/api/entities/structuredproperties/structuredproperties.py +2 -2
- datahub/cli/check_cli.py +72 -19
- datahub/cli/docker_cli.py +3 -3
- datahub/cli/iceberg_cli.py +31 -7
- datahub/cli/ingest_cli.py +30 -93
- datahub/cli/lite_cli.py +4 -2
- datahub/cli/specific/dataproduct_cli.py +1 -1
- datahub/cli/specific/dataset_cli.py +128 -14
- datahub/configuration/common.py +10 -2
- datahub/configuration/git.py +1 -3
- datahub/configuration/kafka.py +1 -1
- datahub/emitter/mce_builder.py +28 -13
- datahub/emitter/mcp_builder.py +4 -1
- datahub/emitter/response_helper.py +145 -0
- datahub/emitter/rest_emitter.py +323 -10
- datahub/ingestion/api/decorators.py +1 -1
- datahub/ingestion/api/source_helpers.py +4 -0
- datahub/ingestion/fs/s3_fs.py +2 -2
- datahub/ingestion/glossary/classification_mixin.py +1 -5
- datahub/ingestion/graph/client.py +41 -22
- datahub/ingestion/graph/entity_versioning.py +3 -3
- datahub/ingestion/graph/filters.py +64 -37
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +1 -6
- datahub/ingestion/run/pipeline.py +112 -148
- datahub/ingestion/run/sink_callback.py +77 -0
- datahub/ingestion/sink/datahub_rest.py +8 -0
- datahub/ingestion/source/abs/config.py +2 -4
- datahub/ingestion/source/bigquery_v2/bigquery_audit.py +1 -1
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +2 -46
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +6 -1
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +7 -4
- datahub/ingestion/source/cassandra/cassandra.py +152 -233
- datahub/ingestion/source/cassandra/cassandra_api.py +13 -5
- datahub/ingestion/source/common/gcp_credentials_config.py +53 -0
- datahub/ingestion/source/common/subtypes.py +12 -0
- datahub/ingestion/source/csv_enricher.py +3 -3
- datahub/ingestion/source/data_lake_common/path_spec.py +1 -3
- datahub/ingestion/source/dbt/dbt_common.py +8 -5
- datahub/ingestion/source/dbt/dbt_core.py +11 -9
- datahub/ingestion/source/dbt/dbt_tests.py +4 -8
- datahub/ingestion/source/delta_lake/config.py +8 -1
- datahub/ingestion/source/delta_lake/report.py +4 -2
- datahub/ingestion/source/delta_lake/source.py +20 -5
- datahub/ingestion/source/dremio/dremio_api.py +4 -8
- datahub/ingestion/source/dremio/dremio_aspects.py +3 -5
- datahub/ingestion/source/dynamodb/dynamodb.py +6 -0
- datahub/ingestion/source/elastic_search.py +26 -6
- datahub/ingestion/source/feast.py +27 -8
- datahub/ingestion/source/file.py +6 -3
- datahub/ingestion/source/gc/dataprocess_cleanup.py +1 -1
- datahub/ingestion/source/gc/execution_request_cleanup.py +2 -1
- datahub/ingestion/source/ge_data_profiler.py +12 -15
- datahub/ingestion/source/iceberg/iceberg.py +46 -12
- datahub/ingestion/source/iceberg/iceberg_common.py +71 -21
- datahub/ingestion/source/identity/okta.py +37 -7
- datahub/ingestion/source/kafka/kafka.py +1 -1
- datahub/ingestion/source/kafka_connect/common.py +2 -7
- datahub/ingestion/source/kafka_connect/kafka_connect.py +97 -4
- datahub/ingestion/source/kafka_connect/sink_connectors.py +2 -2
- datahub/ingestion/source/kafka_connect/source_connectors.py +6 -9
- datahub/ingestion/source/looker/looker_common.py +6 -5
- datahub/ingestion/source/looker/looker_file_loader.py +2 -2
- datahub/ingestion/source/looker/looker_lib_wrapper.py +2 -1
- datahub/ingestion/source/looker/looker_source.py +1 -1
- datahub/ingestion/source/looker/looker_template_language.py +4 -2
- datahub/ingestion/source/looker/lookml_source.py +3 -2
- datahub/ingestion/source/metabase.py +57 -35
- datahub/ingestion/source/metadata/business_glossary.py +45 -3
- datahub/ingestion/source/metadata/lineage.py +2 -2
- datahub/ingestion/source/mlflow.py +365 -35
- datahub/ingestion/source/mode.py +18 -8
- datahub/ingestion/source/neo4j/neo4j_source.py +27 -7
- datahub/ingestion/source/nifi.py +37 -11
- datahub/ingestion/source/openapi.py +1 -1
- datahub/ingestion/source/openapi_parser.py +49 -17
- datahub/ingestion/source/powerbi/m_query/parser.py +3 -2
- datahub/ingestion/source/powerbi/m_query/tree_function.py +2 -1
- datahub/ingestion/source/powerbi/powerbi.py +1 -3
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -1
- datahub/ingestion/source/powerbi_report_server/report_server.py +26 -7
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +1 -1
- datahub/ingestion/source/preset.py +7 -4
- datahub/ingestion/source/pulsar.py +3 -2
- datahub/ingestion/source/qlik_sense/websocket_connection.py +4 -2
- datahub/ingestion/source/redash.py +31 -7
- datahub/ingestion/source/redshift/config.py +4 -0
- datahub/ingestion/source/redshift/datashares.py +236 -0
- datahub/ingestion/source/redshift/lineage.py +6 -2
- datahub/ingestion/source/redshift/lineage_v2.py +24 -9
- datahub/ingestion/source/redshift/profile.py +1 -1
- datahub/ingestion/source/redshift/query.py +133 -33
- datahub/ingestion/source/redshift/redshift.py +46 -73
- datahub/ingestion/source/redshift/redshift_schema.py +186 -6
- datahub/ingestion/source/redshift/report.py +3 -0
- datahub/ingestion/source/s3/config.py +5 -5
- datahub/ingestion/source/s3/source.py +20 -41
- datahub/ingestion/source/salesforce.py +550 -275
- datahub/ingestion/source/schema_inference/object.py +1 -1
- datahub/ingestion/source/sigma/sigma.py +1 -1
- datahub/ingestion/source/slack/slack.py +31 -10
- datahub/ingestion/source/snowflake/snowflake_connection.py +2 -2
- datahub/ingestion/source/snowflake/snowflake_queries.py +19 -13
- datahub/ingestion/source/snowflake/snowflake_query.py +6 -4
- datahub/ingestion/source/snowflake/snowflake_schema.py +3 -4
- datahub/ingestion/source/snowflake/snowflake_v2.py +1 -1
- datahub/ingestion/source/sql/athena.py +10 -16
- datahub/ingestion/source/sql/druid.py +1 -5
- datahub/ingestion/source/sql/hive.py +15 -6
- datahub/ingestion/source/sql/hive_metastore.py +3 -2
- datahub/ingestion/source/sql/mssql/job_models.py +29 -0
- datahub/ingestion/source/sql/mssql/source.py +11 -5
- datahub/ingestion/source/sql/oracle.py +127 -63
- datahub/ingestion/source/sql/sql_common.py +16 -18
- datahub/ingestion/source/sql/sql_types.py +2 -2
- datahub/ingestion/source/sql/teradata.py +19 -5
- datahub/ingestion/source/sql/trino.py +2 -2
- datahub/ingestion/source/state/stale_entity_removal_handler.py +4 -8
- datahub/ingestion/source/superset.py +222 -62
- datahub/ingestion/source/tableau/tableau.py +22 -6
- datahub/ingestion/source/tableau/tableau_common.py +3 -2
- datahub/ingestion/source/unity/ge_profiler.py +2 -1
- datahub/ingestion/source/unity/source.py +11 -1
- datahub/ingestion/source/vertexai.py +697 -0
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/ingestion/transformer/pattern_cleanup_ownership.py +25 -7
- datahub/lite/duckdb_lite.py +3 -10
- datahub/lite/lite_local.py +1 -1
- datahub/lite/lite_util.py +4 -3
- datahub/metadata/_schema_classes.py +714 -417
- datahub/metadata/_urns/urn_defs.py +1673 -1649
- datahub/metadata/com/linkedin/pegasus2avro/incident/__init__.py +4 -0
- datahub/metadata/schema.avsc +16438 -16603
- datahub/metadata/schemas/AssertionInfo.avsc +3 -1
- datahub/metadata/schemas/BusinessAttributeInfo.avsc +6 -2
- datahub/metadata/schemas/BusinessAttributes.avsc +6 -0
- datahub/metadata/schemas/ChartInfo.avsc +1 -0
- datahub/metadata/schemas/CorpGroupKey.avsc +2 -1
- datahub/metadata/schemas/CorpUserInfo.avsc +13 -0
- datahub/metadata/schemas/CorpUserKey.avsc +2 -1
- datahub/metadata/schemas/DataHubIngestionSourceInfo.avsc +8 -3
- datahub/metadata/schemas/DataProcessInstanceInput.avsc +129 -1
- datahub/metadata/schemas/DataProcessInstanceOutput.avsc +131 -3
- datahub/metadata/schemas/DataProcessKey.avsc +2 -1
- datahub/metadata/schemas/DataProductKey.avsc +2 -1
- datahub/metadata/schemas/DomainKey.avsc +2 -1
- datahub/metadata/schemas/EditableSchemaMetadata.avsc +6 -2
- datahub/metadata/schemas/GlossaryNodeKey.avsc +3 -1
- datahub/metadata/schemas/GlossaryTermKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryTerms.avsc +3 -1
- datahub/metadata/schemas/IncidentInfo.avsc +130 -46
- datahub/metadata/schemas/InputFields.avsc +3 -1
- datahub/metadata/schemas/MLFeatureKey.avsc +2 -1
- datahub/metadata/schemas/MLFeatureTableKey.avsc +2 -1
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +2 -1
- datahub/metadata/schemas/MLModelGroupKey.avsc +3 -1
- datahub/metadata/schemas/MLModelKey.avsc +3 -1
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +2 -1
- datahub/metadata/schemas/MetadataChangeEvent.avsc +20 -2
- datahub/metadata/schemas/PostKey.avsc +2 -1
- datahub/metadata/schemas/SchemaFieldKey.avsc +2 -1
- datahub/metadata/schemas/SchemaMetadata.avsc +3 -1
- datahub/metadata/schemas/StructuredPropertyDefinition.avsc +14 -0
- datahub/metadata/schemas/VersionProperties.avsc +18 -0
- datahub/metadata/schemas/VersionSetProperties.avsc +5 -0
- datahub/pydantic/__init__.py +0 -0
- datahub/pydantic/compat.py +58 -0
- datahub/sdk/__init__.py +30 -12
- datahub/sdk/_all_entities.py +1 -1
- datahub/sdk/_attribution.py +4 -0
- datahub/sdk/_shared.py +258 -16
- datahub/sdk/_utils.py +35 -0
- datahub/sdk/container.py +30 -6
- datahub/sdk/dataset.py +118 -20
- datahub/sdk/{_entity.py → entity.py} +24 -1
- datahub/sdk/entity_client.py +1 -1
- datahub/sdk/main_client.py +23 -0
- datahub/sdk/resolver_client.py +17 -29
- datahub/sdk/search_client.py +50 -0
- datahub/sdk/search_filters.py +374 -0
- datahub/specific/dataset.py +3 -4
- datahub/sql_parsing/_sqlglot_patch.py +2 -10
- datahub/sql_parsing/schema_resolver.py +1 -1
- datahub/sql_parsing/split_statements.py +220 -126
- datahub/sql_parsing/sql_parsing_common.py +7 -0
- datahub/sql_parsing/sqlglot_lineage.py +1 -1
- datahub/sql_parsing/sqlglot_utils.py +1 -4
- datahub/testing/check_sql_parser_result.py +5 -6
- datahub/testing/compare_metadata_json.py +7 -6
- datahub/testing/pytest_hooks.py +56 -0
- datahub/upgrade/upgrade.py +2 -2
- datahub/utilities/file_backed_collections.py +3 -14
- datahub/utilities/ingest_utils.py +106 -0
- datahub/utilities/mapping.py +1 -1
- datahub/utilities/memory_footprint.py +3 -2
- datahub/utilities/sentinels.py +22 -0
- datahub/utilities/unified_diff.py +5 -1
- {acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/LICENSE +0 -0
- {acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from enum import Enum
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
from typing import Iterable, List, Optional
|
|
4
|
+
from typing import Iterable, List, Optional, Union
|
|
5
5
|
|
|
6
6
|
import yaml
|
|
7
7
|
from pydantic import validator
|
|
@@ -38,7 +38,7 @@ class AllowedTypes(Enum):
|
|
|
38
38
|
|
|
39
39
|
|
|
40
40
|
class AllowedValue(ConfigModel):
|
|
41
|
-
value: str
|
|
41
|
+
value: Union[int, float, str]
|
|
42
42
|
description: Optional[str] = None
|
|
43
43
|
|
|
44
44
|
|
datahub/cli/check_cli.py
CHANGED
|
@@ -5,7 +5,8 @@ import pathlib
|
|
|
5
5
|
import pprint
|
|
6
6
|
import shutil
|
|
7
7
|
import tempfile
|
|
8
|
-
from
|
|
8
|
+
from datetime import datetime
|
|
9
|
+
from typing import Any, Dict, List, Optional, Union
|
|
9
10
|
|
|
10
11
|
import click
|
|
11
12
|
|
|
@@ -20,7 +21,10 @@ from datahub.ingestion.sink.sink_registry import sink_registry
|
|
|
20
21
|
from datahub.ingestion.source.source_registry import source_registry
|
|
21
22
|
from datahub.ingestion.transformer.transform_registry import transform_registry
|
|
22
23
|
from datahub.telemetry import telemetry
|
|
23
|
-
from datahub.utilities.file_backed_collections import
|
|
24
|
+
from datahub.utilities.file_backed_collections import (
|
|
25
|
+
ConnectionWrapper,
|
|
26
|
+
FileBackedDict,
|
|
27
|
+
)
|
|
24
28
|
|
|
25
29
|
logger = logging.getLogger(__name__)
|
|
26
30
|
|
|
@@ -391,29 +395,78 @@ def test_path_spec(config: str, input: str, path_spec_key: str) -> None:
|
|
|
391
395
|
raise e
|
|
392
396
|
|
|
393
397
|
|
|
398
|
+
def _jsonify(data: Any) -> Any:
|
|
399
|
+
if dataclasses.is_dataclass(data):
|
|
400
|
+
# dataclasses.asdict() is recursive. We're doing the recursion
|
|
401
|
+
# manually here via _jsonify calls, so we can't use
|
|
402
|
+
# dataclasses.asdict() here.
|
|
403
|
+
return {
|
|
404
|
+
f.name: _jsonify(getattr(data, f.name)) for f in dataclasses.fields(data)
|
|
405
|
+
}
|
|
406
|
+
elif isinstance(data, list):
|
|
407
|
+
return [_jsonify(item) for item in data]
|
|
408
|
+
elif isinstance(data, dict):
|
|
409
|
+
return {_jsonify(k): _jsonify(v) for k, v in data.items()}
|
|
410
|
+
elif isinstance(data, datetime):
|
|
411
|
+
return data.isoformat()
|
|
412
|
+
else:
|
|
413
|
+
return data
|
|
414
|
+
|
|
415
|
+
|
|
394
416
|
@check.command()
|
|
395
|
-
@click.argument("
|
|
396
|
-
|
|
397
|
-
def extract_sql_agg_log(query_log_file: str, output: Optional[str]) -> None:
|
|
417
|
+
@click.argument("db-file", type=click.Path(exists=True, dir_okay=False))
|
|
418
|
+
def extract_sql_agg_log(db_file: str) -> None:
|
|
398
419
|
"""Convert a sqlite db generated by the SqlParsingAggregator into a JSON."""
|
|
399
420
|
|
|
400
|
-
|
|
421
|
+
if pathlib.Path(db_file).suffix != ".db":
|
|
422
|
+
raise click.UsageError("DB file must be a sqlite db")
|
|
423
|
+
|
|
424
|
+
output_dir = pathlib.Path(db_file).with_suffix("")
|
|
425
|
+
output_dir.mkdir(exist_ok=True)
|
|
426
|
+
|
|
427
|
+
shared_connection = ConnectionWrapper(pathlib.Path(db_file))
|
|
428
|
+
|
|
429
|
+
tables: List[str] = [
|
|
430
|
+
row[0]
|
|
431
|
+
for row in shared_connection.execute(
|
|
432
|
+
"""\
|
|
433
|
+
SELECT
|
|
434
|
+
name
|
|
435
|
+
FROM
|
|
436
|
+
sqlite_schema
|
|
437
|
+
WHERE
|
|
438
|
+
type ='table' AND
|
|
439
|
+
name NOT LIKE 'sqlite_%';
|
|
440
|
+
""",
|
|
441
|
+
parameters={},
|
|
442
|
+
)
|
|
443
|
+
]
|
|
444
|
+
logger.info(f"Extracting {len(tables)} tables from {db_file}: {tables}")
|
|
445
|
+
|
|
446
|
+
for table in tables:
|
|
447
|
+
table_output_path = output_dir / f"{table}.json"
|
|
448
|
+
if table_output_path.exists():
|
|
449
|
+
logger.info(f"Skipping {table_output_path} because it already exists")
|
|
450
|
+
continue
|
|
401
451
|
|
|
402
|
-
|
|
452
|
+
# Some of the tables might actually be FileBackedList. Because
|
|
453
|
+
# the list is built on top of the FileBackedDict, we don't
|
|
454
|
+
# need to distinguish between the two cases.
|
|
403
455
|
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
)
|
|
408
|
-
logger.info(f"Extracting {len(query_log)} queries from {query_log_file}")
|
|
409
|
-
queries = [dataclasses.asdict(query) for query in query_log]
|
|
456
|
+
table_data: FileBackedDict[Any] = FileBackedDict(
|
|
457
|
+
shared_connection=shared_connection, tablename=table
|
|
458
|
+
)
|
|
410
459
|
|
|
411
|
-
|
|
412
|
-
with
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
460
|
+
data = {}
|
|
461
|
+
with click.progressbar(
|
|
462
|
+
table_data.items(), length=len(table_data), label=f"Extracting {table}"
|
|
463
|
+
) as items:
|
|
464
|
+
for k, v in items:
|
|
465
|
+
data[k] = _jsonify(v)
|
|
466
|
+
|
|
467
|
+
with open(table_output_path, "w") as f:
|
|
468
|
+
json.dump(data, f, indent=2, default=str)
|
|
469
|
+
logger.info(f"Extracted {len(data)} entries to {table_output_path}")
|
|
417
470
|
|
|
418
471
|
|
|
419
472
|
@check.command()
|
datahub/cli/docker_cli.py
CHANGED
|
@@ -231,7 +231,7 @@ def _docker_compose_v2() -> List[str]:
|
|
|
231
231
|
# docker-compose v1 is not installed either.
|
|
232
232
|
raise DockerComposeVersionError(
|
|
233
233
|
"You don't have Docker Compose installed. Please install Docker Compose. See https://docs.docker.com/compose/install/.",
|
|
234
|
-
)
|
|
234
|
+
) from None
|
|
235
235
|
|
|
236
236
|
|
|
237
237
|
def _attempt_stop(quickstart_compose_file: List[pathlib.Path]) -> None:
|
|
@@ -430,7 +430,7 @@ def detect_quickstart_arch(arch: Optional[str]) -> Architectures:
|
|
|
430
430
|
return quickstart_arch
|
|
431
431
|
|
|
432
432
|
|
|
433
|
-
@docker.command()
|
|
433
|
+
@docker.command()
|
|
434
434
|
@click.option(
|
|
435
435
|
"--version",
|
|
436
436
|
type=str,
|
|
@@ -592,7 +592,7 @@ def detect_quickstart_arch(arch: Optional[str]) -> Architectures:
|
|
|
592
592
|
"arch",
|
|
593
593
|
]
|
|
594
594
|
)
|
|
595
|
-
def quickstart(
|
|
595
|
+
def quickstart(
|
|
596
596
|
version: Optional[str],
|
|
597
597
|
build_locally: bool,
|
|
598
598
|
pull_images: bool,
|
datahub/cli/iceberg_cli.py
CHANGED
|
@@ -14,6 +14,7 @@ from datahub.cli.cli_utils import post_entity
|
|
|
14
14
|
from datahub.configuration.common import GraphError
|
|
15
15
|
from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
|
|
16
16
|
from datahub.metadata.schema_classes import SystemMetadataClass
|
|
17
|
+
from datahub.telemetry import telemetry
|
|
17
18
|
|
|
18
19
|
logger = logging.getLogger(__name__)
|
|
19
20
|
|
|
@@ -161,6 +162,7 @@ def validate_warehouse(data_root: str) -> None:
|
|
|
161
162
|
type=int,
|
|
162
163
|
help=f"Expiration duration for temporary credentials used for role. Defaults to {DEFAULT_CREDS_EXPIRY_DURATION_SECONDS} seconds if unspecified",
|
|
163
164
|
)
|
|
165
|
+
@telemetry.with_telemetry(capture_kwargs=["duration_seconds"])
|
|
164
166
|
def create(
|
|
165
167
|
warehouse: str,
|
|
166
168
|
description: Optional[str],
|
|
@@ -313,6 +315,7 @@ def create(
|
|
|
313
315
|
type=int,
|
|
314
316
|
help=f"Expiration duration for temporary credentials used for role. Defaults to {DEFAULT_CREDS_EXPIRY_DURATION_SECONDS} seconds if unspecified",
|
|
315
317
|
)
|
|
318
|
+
@telemetry.with_telemetry(capture_kwargs=["duration_seconds"])
|
|
316
319
|
def update(
|
|
317
320
|
warehouse: str,
|
|
318
321
|
data_root: str,
|
|
@@ -398,6 +401,7 @@ def update(
|
|
|
398
401
|
|
|
399
402
|
|
|
400
403
|
@iceberg.command()
|
|
404
|
+
@telemetry.with_telemetry()
|
|
401
405
|
def list() -> None:
|
|
402
406
|
"""
|
|
403
407
|
List iceberg warehouses
|
|
@@ -413,6 +417,7 @@ def list() -> None:
|
|
|
413
417
|
@click.option(
|
|
414
418
|
"-w", "--warehouse", required=True, type=str, help="The name of the warehouse"
|
|
415
419
|
)
|
|
420
|
+
@telemetry.with_telemetry()
|
|
416
421
|
def get(warehouse: str) -> None:
|
|
417
422
|
"""Fetches the details of the specified iceberg warehouse"""
|
|
418
423
|
client = get_default_graph()
|
|
@@ -442,6 +447,7 @@ def get(warehouse: str) -> None:
|
|
|
442
447
|
is_flag=True,
|
|
443
448
|
help="force the delete if set without confirmation",
|
|
444
449
|
)
|
|
450
|
+
@telemetry.with_telemetry(capture_kwargs=["dry_run", "force"])
|
|
445
451
|
def delete(warehouse: str, dry_run: bool, force: bool) -> None:
|
|
446
452
|
"""
|
|
447
453
|
Delete warehouse
|
|
@@ -470,11 +476,19 @@ def delete(warehouse: str, dry_run: bool, force: bool) -> None:
|
|
|
470
476
|
# Do we really need this double-check?
|
|
471
477
|
if "__typename" in entity and "urn" in entity:
|
|
472
478
|
if entity["__typename"] in ["Container", "Dataset"]:
|
|
479
|
+
# add the Platform Resource URN to also be deleted for each dataset.
|
|
480
|
+
# This is not user visible, so no need to show a name to the user and include it in the count. Each
|
|
481
|
+
# instance corresponds to a dataset whose name is shown.
|
|
482
|
+
if entity["__typename"] == "Dataset":
|
|
483
|
+
resource_urn = platform_resource_urn(
|
|
484
|
+
entity["properties"]["qualifiedName"]
|
|
485
|
+
)
|
|
486
|
+
urns_to_delete.append(resource_urn)
|
|
487
|
+
|
|
473
488
|
urns_to_delete.append(entity["urn"])
|
|
474
489
|
resource_names_to_be_deleted.append(
|
|
475
490
|
entity.get("name", entity.get("urn"))
|
|
476
491
|
)
|
|
477
|
-
# TODO: PlatformResource associated with datasets need to be deleted.
|
|
478
492
|
|
|
479
493
|
if dry_run:
|
|
480
494
|
click.echo(
|
|
@@ -485,18 +499,21 @@ def delete(warehouse: str, dry_run: bool, force: bool) -> None:
|
|
|
485
499
|
else:
|
|
486
500
|
if not force:
|
|
487
501
|
click.confirm(
|
|
488
|
-
f"This will delete {warehouse} warehouse, credentials, and {len(
|
|
502
|
+
f"This will delete {warehouse} warehouse, credentials, and {len(resource_names_to_be_deleted)} datasets and namespaces from DataHub. Do you want to continue?",
|
|
489
503
|
abort=True,
|
|
490
504
|
)
|
|
491
|
-
client.hard_delete_entity(urn)
|
|
492
|
-
client.hard_delete_entity(warehouse_aspect.clientId)
|
|
493
|
-
client.hard_delete_entity(warehouse_aspect.clientSecret)
|
|
494
505
|
|
|
506
|
+
# Delete the resources in the warehouse first, so that in case it is interrupted, the warehouse itself is
|
|
507
|
+
# still available to enumerate the resources in it that are not yet deleted.
|
|
495
508
|
for urn_to_delete in urns_to_delete:
|
|
496
509
|
client.hard_delete_entity(urn_to_delete)
|
|
497
510
|
|
|
511
|
+
client.hard_delete_entity(urn)
|
|
512
|
+
client.hard_delete_entity(warehouse_aspect.clientId)
|
|
513
|
+
client.hard_delete_entity(warehouse_aspect.clientSecret)
|
|
514
|
+
|
|
498
515
|
click.echo(
|
|
499
|
-
f"✅ Successfully deleted iceberg warehouse {warehouse} and associated credentials, {len(
|
|
516
|
+
f"✅ Successfully deleted iceberg warehouse {warehouse} and associated credentials, {len(resource_names_to_be_deleted)} datasets and namespaces"
|
|
500
517
|
)
|
|
501
518
|
|
|
502
519
|
|
|
@@ -504,6 +521,10 @@ def iceberg_data_platform_instance_urn(warehouse: str) -> str:
|
|
|
504
521
|
return f"urn:li:dataPlatformInstance:({iceberg_data_platform()},{warehouse})"
|
|
505
522
|
|
|
506
523
|
|
|
524
|
+
def platform_resource_urn(dataset_name: str) -> str:
|
|
525
|
+
return f"urn:li:platformResource:iceberg.{dataset_name}"
|
|
526
|
+
|
|
527
|
+
|
|
507
528
|
def iceberg_data_platform() -> str:
|
|
508
529
|
return "urn:li:dataPlatform:iceberg"
|
|
509
530
|
|
|
@@ -624,7 +645,7 @@ def get_all_warehouses(client: DataHubGraph) -> Iterator[str]:
|
|
|
624
645
|
graph_query = """
|
|
625
646
|
query getIcebergWarehouses($start: Int, $count: Int) {
|
|
626
647
|
search(
|
|
627
|
-
input: {type: DATA_PLATFORM_INSTANCE, query: "
|
|
648
|
+
input: {type: DATA_PLATFORM_INSTANCE, query: "dataPlatform:iceberg", start: $start, count: $count}
|
|
628
649
|
) {
|
|
629
650
|
start
|
|
630
651
|
total
|
|
@@ -677,6 +698,9 @@ def get_related_entities_for_platform_instance(
|
|
|
677
698
|
... on Dataset {
|
|
678
699
|
urn
|
|
679
700
|
name
|
|
701
|
+
properties{
|
|
702
|
+
qualifiedName
|
|
703
|
+
}
|
|
680
704
|
}
|
|
681
705
|
}
|
|
682
706
|
}
|
datahub/cli/ingest_cli.py
CHANGED
|
@@ -15,14 +15,14 @@ from tabulate import tabulate
|
|
|
15
15
|
from datahub._version import nice_version_name
|
|
16
16
|
from datahub.cli import cli_utils
|
|
17
17
|
from datahub.cli.config_utils import CONDENSED_DATAHUB_CONFIG_PATH
|
|
18
|
-
from datahub.configuration.common import
|
|
18
|
+
from datahub.configuration.common import GraphError
|
|
19
19
|
from datahub.configuration.config_loader import load_config_file
|
|
20
|
-
from datahub.emitter.mce_builder import datahub_guid
|
|
21
20
|
from datahub.ingestion.graph.client import get_default_graph
|
|
22
21
|
from datahub.ingestion.run.connection import ConnectionManager
|
|
23
22
|
from datahub.ingestion.run.pipeline import Pipeline
|
|
24
23
|
from datahub.telemetry import telemetry
|
|
25
24
|
from datahub.upgrade import upgrade
|
|
25
|
+
from datahub.utilities.ingest_utils import deploy_source_vars
|
|
26
26
|
from datahub.utilities.perf_timer import PerfTimer
|
|
27
27
|
|
|
28
28
|
logger = logging.getLogger(__name__)
|
|
@@ -191,23 +191,6 @@ def run(
|
|
|
191
191
|
# don't raise SystemExit if there's no error
|
|
192
192
|
|
|
193
193
|
|
|
194
|
-
def _make_ingestion_urn(name: str) -> str:
|
|
195
|
-
guid = datahub_guid(
|
|
196
|
-
{
|
|
197
|
-
"name": name,
|
|
198
|
-
}
|
|
199
|
-
)
|
|
200
|
-
return f"urn:li:dataHubIngestionSource:deploy-{guid}"
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
class DeployOptions(ConfigModel):
|
|
204
|
-
name: str
|
|
205
|
-
schedule: Optional[str] = None
|
|
206
|
-
time_zone: str = "UTC"
|
|
207
|
-
cli_version: Optional[str] = None
|
|
208
|
-
executor_id: str = "default"
|
|
209
|
-
|
|
210
|
-
|
|
211
194
|
@ingest.command()
|
|
212
195
|
@upgrade.check_upgrade
|
|
213
196
|
@telemetry.with_telemetry()
|
|
@@ -258,6 +241,16 @@ class DeployOptions(ConfigModel):
|
|
|
258
241
|
required=False,
|
|
259
242
|
default="UTC",
|
|
260
243
|
)
|
|
244
|
+
@click.option(
|
|
245
|
+
"--debug", type=bool, help="Should we debug.", required=False, default=False
|
|
246
|
+
)
|
|
247
|
+
@click.option(
|
|
248
|
+
"--extra-pip",
|
|
249
|
+
type=str,
|
|
250
|
+
help='Extra pip packages. e.g. ["memray"]',
|
|
251
|
+
required=False,
|
|
252
|
+
default=None,
|
|
253
|
+
)
|
|
261
254
|
def deploy(
|
|
262
255
|
name: Optional[str],
|
|
263
256
|
config: str,
|
|
@@ -266,6 +259,8 @@ def deploy(
|
|
|
266
259
|
cli_version: Optional[str],
|
|
267
260
|
schedule: Optional[str],
|
|
268
261
|
time_zone: str,
|
|
262
|
+
extra_pip: Optional[str],
|
|
263
|
+
debug: bool = False,
|
|
269
264
|
) -> None:
|
|
270
265
|
"""
|
|
271
266
|
Deploy an ingestion recipe to your DataHub instance.
|
|
@@ -276,83 +271,23 @@ def deploy(
|
|
|
276
271
|
|
|
277
272
|
datahub_graph = get_default_graph()
|
|
278
273
|
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
274
|
+
variables = deploy_source_vars(
|
|
275
|
+
name=name,
|
|
276
|
+
config=config,
|
|
277
|
+
urn=urn,
|
|
278
|
+
executor_id=executor_id,
|
|
279
|
+
cli_version=cli_version,
|
|
280
|
+
schedule=schedule,
|
|
281
|
+
time_zone=time_zone,
|
|
282
|
+
extra_pip=extra_pip,
|
|
283
|
+
debug=debug,
|
|
284
284
|
)
|
|
285
285
|
|
|
286
|
-
deploy_options_raw = pipeline_config.pop("deployment", None)
|
|
287
|
-
if deploy_options_raw is not None:
|
|
288
|
-
deploy_options = DeployOptions.parse_obj(deploy_options_raw)
|
|
289
|
-
|
|
290
|
-
if name:
|
|
291
|
-
logger.info(f"Overriding deployment name {deploy_options.name} with {name}")
|
|
292
|
-
deploy_options.name = name
|
|
293
|
-
else:
|
|
294
|
-
if not name:
|
|
295
|
-
raise click.UsageError(
|
|
296
|
-
"Either --name must be set or deployment_name specified in the config"
|
|
297
|
-
)
|
|
298
|
-
deploy_options = DeployOptions(name=name)
|
|
299
|
-
|
|
300
|
-
# Use remaining CLI args to override deploy_options
|
|
301
|
-
if schedule:
|
|
302
|
-
deploy_options.schedule = schedule
|
|
303
|
-
if time_zone:
|
|
304
|
-
deploy_options.time_zone = time_zone
|
|
305
|
-
if cli_version:
|
|
306
|
-
deploy_options.cli_version = cli_version
|
|
307
|
-
if executor_id:
|
|
308
|
-
deploy_options.executor_id = executor_id
|
|
309
|
-
|
|
310
|
-
logger.info(f"Using {repr(deploy_options)}")
|
|
311
|
-
|
|
312
|
-
if not urn:
|
|
313
|
-
# When urn/name is not specified, we will generate a unique urn based on the deployment name.
|
|
314
|
-
urn = _make_ingestion_urn(deploy_options.name)
|
|
315
|
-
logger.info(f"Using recipe urn: {urn}")
|
|
316
|
-
|
|
317
|
-
# Invariant - at this point, both urn and deploy_options are set.
|
|
318
|
-
|
|
319
|
-
variables: dict = {
|
|
320
|
-
"urn": urn,
|
|
321
|
-
"name": deploy_options.name,
|
|
322
|
-
"type": pipeline_config["source"]["type"],
|
|
323
|
-
"recipe": json.dumps(pipeline_config),
|
|
324
|
-
"executorId": deploy_options.executor_id,
|
|
325
|
-
"version": deploy_options.cli_version,
|
|
326
|
-
}
|
|
327
|
-
|
|
328
|
-
if deploy_options.schedule is not None:
|
|
329
|
-
variables["schedule"] = {
|
|
330
|
-
"interval": deploy_options.schedule,
|
|
331
|
-
"timezone": deploy_options.time_zone,
|
|
332
|
-
}
|
|
333
|
-
|
|
334
286
|
# The updateIngestionSource endpoint can actually do upserts as well.
|
|
335
287
|
graphql_query: str = textwrap.dedent(
|
|
336
288
|
"""
|
|
337
|
-
mutation updateIngestionSource(
|
|
338
|
-
$urn:
|
|
339
|
-
$name: String!,
|
|
340
|
-
$type: String!,
|
|
341
|
-
$schedule: UpdateIngestionSourceScheduleInput,
|
|
342
|
-
$recipe: String!,
|
|
343
|
-
$executorId: String!
|
|
344
|
-
$version: String) {
|
|
345
|
-
|
|
346
|
-
updateIngestionSource(urn: $urn, input: {
|
|
347
|
-
name: $name,
|
|
348
|
-
type: $type,
|
|
349
|
-
schedule: $schedule,
|
|
350
|
-
config: {
|
|
351
|
-
recipe: $recipe,
|
|
352
|
-
executorId: $executorId,
|
|
353
|
-
version: $version,
|
|
354
|
-
}
|
|
355
|
-
})
|
|
289
|
+
mutation updateIngestionSource($urn: String!, $input: UpdateIngestionSourceInput!) {
|
|
290
|
+
updateIngestionSource(urn: $urn, input: $input)
|
|
356
291
|
}
|
|
357
292
|
"""
|
|
358
293
|
)
|
|
@@ -372,7 +307,7 @@ def deploy(
|
|
|
372
307
|
sys.exit(1)
|
|
373
308
|
|
|
374
309
|
click.echo(
|
|
375
|
-
f"✅ Successfully wrote data ingestion source metadata for recipe {
|
|
310
|
+
f"✅ Successfully wrote data ingestion source metadata for recipe {variables['input']['name']}:"
|
|
376
311
|
)
|
|
377
312
|
click.echo(response)
|
|
378
313
|
|
|
@@ -414,7 +349,9 @@ def parse_restli_response(response):
|
|
|
414
349
|
|
|
415
350
|
|
|
416
351
|
@ingest.command()
|
|
417
|
-
@click.argument(
|
|
352
|
+
@click.argument(
|
|
353
|
+
"path", type=click.Path(exists=False)
|
|
354
|
+
) # exists=False since it only supports local filesystems
|
|
418
355
|
def mcps(path: str) -> None:
|
|
419
356
|
"""
|
|
420
357
|
Ingest metadata from a mcp json file or directory of files.
|
datahub/cli/lite_cli.py
CHANGED
|
@@ -285,10 +285,12 @@ def search(
|
|
|
285
285
|
ctx: click.Context,
|
|
286
286
|
query: str = "",
|
|
287
287
|
flavor: str = SearchFlavor.FREE_TEXT.name.lower(),
|
|
288
|
-
aspect: List[str] =
|
|
288
|
+
aspect: Optional[List[str]] = None,
|
|
289
289
|
details: bool = True,
|
|
290
290
|
) -> None:
|
|
291
291
|
"""Search with a free text or exact query string"""
|
|
292
|
+
if aspect is None:
|
|
293
|
+
aspect = []
|
|
292
294
|
|
|
293
295
|
# query flavor should be sanitized by now, but we still need to convert it to a SearchFlavor
|
|
294
296
|
try:
|
|
@@ -296,7 +298,7 @@ def search(
|
|
|
296
298
|
except KeyError:
|
|
297
299
|
raise click.UsageError(
|
|
298
300
|
f"Failed to find a matching query flavor for {flavor}. Valid values are {[x.lower() for x in SearchFlavor._member_names_]}"
|
|
299
|
-
)
|
|
301
|
+
) from None
|
|
300
302
|
catalog = _get_datahub_lite(read_only=True)
|
|
301
303
|
# sanitize query
|
|
302
304
|
result_ids = set()
|
|
@@ -49,7 +49,7 @@ def _abort_if_non_existent_urn(graph: DataHubGraph, urn: str, operation: str) ->
|
|
|
49
49
|
entity_type = parsed_urn.get_type()
|
|
50
50
|
except Exception:
|
|
51
51
|
click.secho(f"Provided urn {urn} does not seem valid", fg="red")
|
|
52
|
-
raise click.Abort()
|
|
52
|
+
raise click.Abort() from None
|
|
53
53
|
else:
|
|
54
54
|
if not graph.exists(urn):
|
|
55
55
|
click.secho(
|
|
@@ -1,12 +1,15 @@
|
|
|
1
|
+
import filecmp
|
|
1
2
|
import json
|
|
2
3
|
import logging
|
|
4
|
+
import os
|
|
5
|
+
import shutil
|
|
3
6
|
from pathlib import Path
|
|
4
|
-
from typing import Set, Tuple
|
|
7
|
+
from typing import List, Set, Tuple
|
|
5
8
|
|
|
6
9
|
import click
|
|
7
10
|
from click_default_group import DefaultGroup
|
|
8
11
|
|
|
9
|
-
from datahub.api.entities.dataset.dataset import Dataset
|
|
12
|
+
from datahub.api.entities.dataset.dataset import Dataset, DatasetRetrievalConfig
|
|
10
13
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
11
14
|
from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
|
|
12
15
|
from datahub.metadata.com.linkedin.pegasus2avro.common import Siblings
|
|
@@ -30,18 +33,9 @@ def dataset() -> None:
|
|
|
30
33
|
@telemetry.with_telemetry()
|
|
31
34
|
def upsert(file: Path) -> None:
|
|
32
35
|
"""Upsert attributes to a Dataset in DataHub."""
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
try:
|
|
37
|
-
for mcp in dataset.generate_mcp():
|
|
38
|
-
graph.emit(mcp)
|
|
39
|
-
click.secho(f"Update succeeded for urn {dataset.urn}.", fg="green")
|
|
40
|
-
except Exception as e:
|
|
41
|
-
click.secho(
|
|
42
|
-
f"Update failed for id {id}. due to {e}",
|
|
43
|
-
fg="red",
|
|
44
|
-
)
|
|
36
|
+
# Call the sync command with to_datahub=True to perform the upsert operation
|
|
37
|
+
ctx = click.get_current_context()
|
|
38
|
+
ctx.invoke(sync, file=str(file), to_datahub=True)
|
|
45
39
|
|
|
46
40
|
|
|
47
41
|
@dataset.command(
|
|
@@ -111,3 +105,123 @@ def _get_existing_siblings(graph: DataHubGraph, urn: str) -> Set[str]:
|
|
|
111
105
|
return set(existing.siblings)
|
|
112
106
|
else:
|
|
113
107
|
return set()
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
@dataset.command(
|
|
111
|
+
name="file",
|
|
112
|
+
)
|
|
113
|
+
@click.option("--lintCheck", required=False, is_flag=True)
|
|
114
|
+
@click.option("--lintFix", required=False, is_flag=True)
|
|
115
|
+
@click.argument("file", type=click.Path(exists=True))
|
|
116
|
+
@upgrade.check_upgrade
|
|
117
|
+
@telemetry.with_telemetry()
|
|
118
|
+
def file(lintcheck: bool, lintfix: bool, file: str) -> None:
|
|
119
|
+
"""Operate on a Dataset file"""
|
|
120
|
+
|
|
121
|
+
if lintcheck or lintfix:
|
|
122
|
+
import tempfile
|
|
123
|
+
from pathlib import Path
|
|
124
|
+
|
|
125
|
+
# Create a temporary file in a secure way
|
|
126
|
+
# The file will be automatically deleted when the context manager exits
|
|
127
|
+
with tempfile.NamedTemporaryFile(suffix=".yml", delete=False) as temp:
|
|
128
|
+
temp_path = Path(temp.name)
|
|
129
|
+
try:
|
|
130
|
+
# Copy content to the temporary file
|
|
131
|
+
shutil.copyfile(file, temp_path)
|
|
132
|
+
|
|
133
|
+
# Run the linting
|
|
134
|
+
datasets = Dataset.from_yaml(temp.name)
|
|
135
|
+
for dataset in datasets:
|
|
136
|
+
dataset.to_yaml(temp_path)
|
|
137
|
+
|
|
138
|
+
# Compare the files
|
|
139
|
+
files_match = filecmp.cmp(file, temp_path)
|
|
140
|
+
|
|
141
|
+
if files_match:
|
|
142
|
+
click.secho("No differences found", fg="green")
|
|
143
|
+
else:
|
|
144
|
+
# Show diff for visibility
|
|
145
|
+
os.system(f"diff {file} {temp_path}")
|
|
146
|
+
|
|
147
|
+
if lintfix:
|
|
148
|
+
shutil.copyfile(temp_path, file)
|
|
149
|
+
click.secho(f"Fixed linting issues in {file}", fg="green")
|
|
150
|
+
else:
|
|
151
|
+
click.secho(
|
|
152
|
+
f"To fix these differences, run 'datahub dataset file --lintFix {file}'",
|
|
153
|
+
fg="yellow",
|
|
154
|
+
)
|
|
155
|
+
finally:
|
|
156
|
+
# Ensure the temporary file is removed
|
|
157
|
+
if temp_path.exists():
|
|
158
|
+
temp_path.unlink()
|
|
159
|
+
else:
|
|
160
|
+
click.secho(
|
|
161
|
+
"No operation specified. Choose from --lintCheck or --lintFix", fg="yellow"
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
@dataset.command(
|
|
166
|
+
name="sync",
|
|
167
|
+
)
|
|
168
|
+
@click.option("-f", "--file", required=True, type=click.Path(exists=True))
|
|
169
|
+
@click.option("--to-datahub/--from-datahub", required=True, is_flag=True)
|
|
170
|
+
@upgrade.check_upgrade
|
|
171
|
+
@telemetry.with_telemetry()
|
|
172
|
+
def sync(file: str, to_datahub: bool) -> None:
|
|
173
|
+
"""Sync a Dataset file to/from DataHub"""
|
|
174
|
+
|
|
175
|
+
failures: List[str] = []
|
|
176
|
+
with get_default_graph() as graph:
|
|
177
|
+
datasets = Dataset.from_yaml(file)
|
|
178
|
+
for dataset in datasets:
|
|
179
|
+
assert (
|
|
180
|
+
dataset.urn is not None
|
|
181
|
+
) # Validator should have ensured this is filled. Tell mypy it's not None
|
|
182
|
+
if to_datahub:
|
|
183
|
+
missing_entity_references = [
|
|
184
|
+
entity_reference
|
|
185
|
+
for entity_reference in dataset.entity_references()
|
|
186
|
+
if not graph.exists(entity_reference)
|
|
187
|
+
]
|
|
188
|
+
if missing_entity_references:
|
|
189
|
+
click.secho(
|
|
190
|
+
"\n\t- ".join(
|
|
191
|
+
[
|
|
192
|
+
f"Skipping Dataset {dataset.urn} due to missing entity references: "
|
|
193
|
+
]
|
|
194
|
+
+ missing_entity_references
|
|
195
|
+
),
|
|
196
|
+
fg="red",
|
|
197
|
+
)
|
|
198
|
+
failures.append(dataset.urn)
|
|
199
|
+
continue
|
|
200
|
+
try:
|
|
201
|
+
for mcp in dataset.generate_mcp():
|
|
202
|
+
graph.emit(mcp)
|
|
203
|
+
click.secho(f"Update succeeded for urn {dataset.urn}.", fg="green")
|
|
204
|
+
except Exception as e:
|
|
205
|
+
click.secho(
|
|
206
|
+
f"Update failed for id {id}. due to {e}",
|
|
207
|
+
fg="red",
|
|
208
|
+
)
|
|
209
|
+
else:
|
|
210
|
+
# Sync from DataHub
|
|
211
|
+
if graph.exists(dataset.urn):
|
|
212
|
+
dataset_get_config = DatasetRetrievalConfig()
|
|
213
|
+
if dataset.downstreams:
|
|
214
|
+
dataset_get_config.include_downstreams = True
|
|
215
|
+
existing_dataset: Dataset = Dataset.from_datahub(
|
|
216
|
+
graph=graph, urn=dataset.urn, config=dataset_get_config
|
|
217
|
+
)
|
|
218
|
+
existing_dataset.to_yaml(Path(file))
|
|
219
|
+
else:
|
|
220
|
+
click.secho(f"Dataset {dataset.urn} does not exist")
|
|
221
|
+
failures.append(dataset.urn)
|
|
222
|
+
if failures:
|
|
223
|
+
click.secho(
|
|
224
|
+
f"\nFailed to sync the following Datasets: {', '.join(failures)}",
|
|
225
|
+
fg="red",
|
|
226
|
+
)
|
|
227
|
+
raise click.Abort()
|