acryl-datahub 1.1.1rc4__py3-none-any.whl → 1.2.0.1rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.2.0.1rc1.dist-info}/METADATA +2617 -2590
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.2.0.1rc1.dist-info}/RECORD +223 -189
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.2.0.1rc1.dist-info}/entry_points.txt +2 -0
- datahub/_version.py +1 -1
- datahub/api/entities/dataset/dataset.py +1 -1
- datahub/api/entities/external/__init__.py +0 -0
- datahub/api/entities/external/external_entities.py +239 -0
- datahub/api/entities/external/external_tag.py +145 -0
- datahub/api/entities/external/lake_formation_external_entites.py +161 -0
- datahub/api/entities/external/restricted_text.py +247 -0
- datahub/api/entities/external/unity_catalog_external_entites.py +173 -0
- datahub/cli/check_cli.py +88 -7
- datahub/cli/cli_utils.py +63 -0
- datahub/cli/container_cli.py +5 -0
- datahub/cli/delete_cli.py +124 -27
- datahub/cli/docker_check.py +107 -12
- datahub/cli/docker_cli.py +149 -227
- datahub/cli/exists_cli.py +0 -2
- datahub/cli/get_cli.py +0 -2
- datahub/cli/iceberg_cli.py +5 -0
- datahub/cli/ingest_cli.py +3 -15
- datahub/cli/migrate.py +2 -0
- datahub/cli/put_cli.py +1 -4
- datahub/cli/quickstart_versioning.py +50 -7
- datahub/cli/specific/assertions_cli.py +0 -4
- datahub/cli/specific/datacontract_cli.py +0 -3
- datahub/cli/specific/dataproduct_cli.py +0 -11
- datahub/cli/specific/dataset_cli.py +1 -8
- datahub/cli/specific/forms_cli.py +0 -4
- datahub/cli/specific/group_cli.py +0 -2
- datahub/cli/specific/structuredproperties_cli.py +1 -4
- datahub/cli/specific/user_cli.py +0 -2
- datahub/cli/state_cli.py +0 -2
- datahub/cli/timeline_cli.py +0 -2
- datahub/configuration/pydantic_migration_helpers.py +7 -5
- datahub/emitter/rest_emitter.py +70 -12
- datahub/entrypoints.py +4 -3
- datahub/ingestion/api/decorators.py +15 -3
- datahub/ingestion/api/report.py +332 -3
- datahub/ingestion/api/sink.py +3 -0
- datahub/ingestion/api/source.py +48 -44
- datahub/ingestion/autogenerated/__init__.py +0 -0
- datahub/ingestion/autogenerated/capability_summary.json +3449 -0
- datahub/ingestion/autogenerated/lineage.json +401 -0
- datahub/ingestion/autogenerated/lineage_helper.py +177 -0
- datahub/ingestion/extractor/schema_util.py +13 -4
- datahub/ingestion/glossary/classification_mixin.py +5 -0
- datahub/ingestion/graph/client.py +100 -15
- datahub/ingestion/graph/config.py +1 -0
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +20 -10
- datahub/ingestion/run/pipeline.py +54 -2
- datahub/ingestion/sink/datahub_rest.py +13 -0
- datahub/ingestion/source/abs/source.py +1 -1
- datahub/ingestion/source/aws/aws_common.py +4 -0
- datahub/ingestion/source/aws/glue.py +489 -244
- datahub/ingestion/source/aws/tag_entities.py +292 -0
- datahub/ingestion/source/azure/azure_common.py +2 -2
- datahub/ingestion/source/bigquery_v2/bigquery.py +50 -23
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +1 -1
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +1 -0
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +2 -0
- datahub/ingestion/source/bigquery_v2/common.py +1 -1
- datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
- datahub/ingestion/source/bigquery_v2/queries.py +3 -3
- datahub/ingestion/source/cassandra/cassandra.py +1 -1
- datahub/ingestion/source/cassandra/cassandra_profiling.py +6 -5
- datahub/ingestion/source/common/subtypes.py +45 -0
- datahub/ingestion/source/data_lake_common/object_store.py +115 -27
- datahub/ingestion/source/data_lake_common/path_spec.py +10 -21
- datahub/ingestion/source/datahub/datahub_database_reader.py +1 -2
- datahub/ingestion/source/dbt/dbt_cloud.py +10 -2
- datahub/ingestion/source/dbt/dbt_common.py +6 -2
- datahub/ingestion/source/dbt/dbt_core.py +3 -0
- datahub/ingestion/source/debug/__init__.py +0 -0
- datahub/ingestion/source/debug/datahub_debug.py +300 -0
- datahub/ingestion/source/dremio/dremio_api.py +114 -73
- datahub/ingestion/source/dremio/dremio_config.py +2 -0
- datahub/ingestion/source/dremio/dremio_reporting.py +23 -2
- datahub/ingestion/source/dremio/dremio_source.py +94 -81
- datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
- datahub/ingestion/source/file.py +3 -0
- datahub/ingestion/source/fivetran/fivetran.py +34 -26
- datahub/ingestion/source/gcs/gcs_source.py +13 -2
- datahub/ingestion/source/ge_data_profiler.py +76 -28
- datahub/ingestion/source/ge_profiling_config.py +11 -0
- datahub/ingestion/source/hex/api.py +26 -1
- datahub/ingestion/source/iceberg/iceberg.py +3 -1
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +1 -14
- datahub/ingestion/source/kafka/kafka.py +16 -0
- datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
- datahub/ingestion/source/kafka_connect/source_connectors.py +59 -4
- datahub/ingestion/source/looker/looker_source.py +1 -0
- datahub/ingestion/source/mlflow.py +11 -1
- datahub/ingestion/source/mock_data/__init__.py +0 -0
- datahub/ingestion/source/mock_data/datahub_mock_data.py +507 -0
- datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
- datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
- datahub/ingestion/source/nifi.py +1 -1
- datahub/ingestion/source/powerbi/powerbi.py +1 -5
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
- datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
- datahub/ingestion/source/preset.py +2 -2
- datahub/ingestion/source/qlik_sense/qlik_sense.py +1 -0
- datahub/ingestion/source/redshift/redshift.py +21 -1
- datahub/ingestion/source/redshift/usage.py +4 -3
- datahub/ingestion/source/s3/report.py +4 -2
- datahub/ingestion/source/s3/source.py +367 -115
- datahub/ingestion/source/sac/sac.py +3 -1
- datahub/ingestion/source/salesforce.py +6 -3
- datahub/ingestion/source/sigma/sigma.py +7 -1
- datahub/ingestion/source/slack/slack.py +2 -1
- datahub/ingestion/source/snowflake/snowflake_config.py +30 -7
- datahub/ingestion/source/snowflake/snowflake_queries.py +348 -82
- datahub/ingestion/source/snowflake/snowflake_summary.py +5 -0
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
- datahub/ingestion/source/snowflake/snowflake_utils.py +2 -7
- datahub/ingestion/source/snowflake/snowflake_v2.py +16 -2
- datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
- datahub/ingestion/source/sql/athena.py +119 -11
- datahub/ingestion/source/sql/athena_properties_extractor.py +777 -0
- datahub/ingestion/source/sql/clickhouse.py +3 -1
- datahub/ingestion/source/sql/cockroachdb.py +0 -1
- datahub/ingestion/source/sql/hana.py +3 -1
- datahub/ingestion/source/sql/hive_metastore.py +3 -11
- datahub/ingestion/source/sql/mariadb.py +0 -1
- datahub/ingestion/source/sql/mssql/source.py +239 -34
- datahub/ingestion/source/sql/mysql.py +0 -1
- datahub/ingestion/source/sql/oracle.py +1 -1
- datahub/ingestion/source/sql/postgres.py +0 -1
- datahub/ingestion/source/sql/sql_common.py +121 -34
- datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
- datahub/ingestion/source/sql/teradata.py +997 -235
- datahub/ingestion/source/sql/vertica.py +10 -6
- datahub/ingestion/source/sql_queries.py +2 -2
- datahub/ingestion/source/state/stateful_ingestion_base.py +1 -1
- datahub/ingestion/source/superset.py +58 -3
- datahub/ingestion/source/tableau/tableau.py +58 -37
- datahub/ingestion/source/tableau/tableau_common.py +4 -2
- datahub/ingestion/source/tableau/tableau_constant.py +0 -4
- datahub/ingestion/source/unity/config.py +5 -0
- datahub/ingestion/source/unity/proxy.py +118 -0
- datahub/ingestion/source/unity/source.py +195 -17
- datahub/ingestion/source/unity/tag_entities.py +295 -0
- datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
- datahub/ingestion/source/usage/starburst_trino_usage.py +3 -0
- datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
- datahub/integrations/assertion/snowflake/compiler.py +4 -3
- datahub/metadata/_internal_schema_classes.py +1522 -569
- datahub/metadata/_urns/urn_defs.py +1826 -1658
- datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +29 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +25 -0
- datahub/metadata/schema.avsc +17758 -17097
- datahub/metadata/schemas/ApplicationKey.avsc +31 -0
- datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
- datahub/metadata/schemas/Applications.avsc +38 -0
- datahub/metadata/schemas/ChartKey.avsc +1 -0
- datahub/metadata/schemas/ContainerKey.avsc +1 -0
- datahub/metadata/schemas/ContainerProperties.avsc +8 -0
- datahub/metadata/schemas/CorpUserSettings.avsc +41 -0
- datahub/metadata/schemas/DashboardKey.avsc +1 -0
- datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
- datahub/metadata/schemas/DataFlowKey.avsc +1 -0
- datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +237 -0
- datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +175 -0
- datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- datahub/metadata/schemas/DataJobInfo.avsc +8 -0
- datahub/metadata/schemas/DataJobKey.avsc +1 -0
- datahub/metadata/schemas/DataProcessKey.avsc +8 -0
- datahub/metadata/schemas/DataProductKey.avsc +1 -0
- datahub/metadata/schemas/DataProductProperties.avsc +1 -1
- datahub/metadata/schemas/DatasetKey.avsc +11 -1
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +62 -0
- datahub/metadata/schemas/GlossaryTermKey.avsc +1 -0
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
- datahub/metadata/schemas/LogicalParent.avsc +140 -0
- datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
- datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +9 -0
- datahub/metadata/schemas/MLModelKey.avsc +9 -0
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +20 -1
- datahub/metadata/schemas/NotebookKey.avsc +1 -0
- datahub/metadata/schemas/QuerySubjects.avsc +1 -12
- datahub/metadata/schemas/SchemaFieldKey.avsc +2 -1
- datahub/metadata/schemas/__init__.py +3 -3
- datahub/sdk/__init__.py +2 -0
- datahub/sdk/_all_entities.py +7 -0
- datahub/sdk/_shared.py +116 -0
- datahub/sdk/chart.py +315 -0
- datahub/sdk/container.py +7 -0
- datahub/sdk/dashboard.py +432 -0
- datahub/sdk/dataflow.py +7 -0
- datahub/sdk/datajob.py +45 -13
- datahub/sdk/dataset.py +8 -2
- datahub/sdk/entity_client.py +82 -2
- datahub/sdk/lineage_client.py +683 -82
- datahub/sdk/main_client.py +46 -16
- datahub/sdk/mlmodel.py +101 -38
- datahub/sdk/mlmodelgroup.py +7 -0
- datahub/sdk/search_client.py +4 -3
- datahub/sdk/search_filters.py +95 -27
- datahub/specific/chart.py +1 -1
- datahub/specific/dataproduct.py +4 -0
- datahub/sql_parsing/sql_parsing_aggregator.py +29 -17
- datahub/sql_parsing/sqlglot_lineage.py +62 -13
- datahub/telemetry/telemetry.py +17 -11
- datahub/testing/sdk_v2_helpers.py +7 -1
- datahub/upgrade/upgrade.py +56 -14
- datahub/utilities/server_config_util.py +8 -0
- datahub/utilities/sqlalchemy_query_combiner.py +5 -2
- datahub/utilities/stats_collections.py +4 -0
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.2.0.1rc1.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.2.0.1rc1.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.2.0.1rc1.dist-info}/top_level.txt +0 -0
datahub/cli/docker_cli.py
CHANGED
|
@@ -20,6 +20,7 @@ import requests
|
|
|
20
20
|
from expandvars import expandvars
|
|
21
21
|
from requests_file import FileAdapter
|
|
22
22
|
|
|
23
|
+
from datahub._version import __version__, is_dev_mode, nice_version_name
|
|
23
24
|
from datahub.cli.config_utils import DATAHUB_ROOT_FOLDER
|
|
24
25
|
from datahub.cli.docker_check import (
|
|
25
26
|
DATAHUB_COMPOSE_LEGACY_VOLUME_FILTERS,
|
|
@@ -28,45 +29,86 @@ from datahub.cli.docker_check import (
|
|
|
28
29
|
DockerComposeVersionError,
|
|
29
30
|
QuickstartStatus,
|
|
30
31
|
check_docker_quickstart,
|
|
32
|
+
check_upgrade_supported,
|
|
31
33
|
get_docker_client,
|
|
32
34
|
run_quickstart_preflight_checks,
|
|
33
35
|
)
|
|
34
|
-
from datahub.cli.quickstart_versioning import
|
|
36
|
+
from datahub.cli.quickstart_versioning import (
|
|
37
|
+
QuickstartVersionMappingConfig,
|
|
38
|
+
)
|
|
35
39
|
from datahub.ingestion.run.pipeline import Pipeline
|
|
36
40
|
from datahub.telemetry import telemetry
|
|
37
41
|
from datahub.upgrade import upgrade
|
|
38
42
|
from datahub.utilities.perf_timer import PerfTimer
|
|
39
43
|
|
|
40
44
|
logger = logging.getLogger(__name__)
|
|
41
|
-
_ClickPositiveInt = click.IntRange(min=1)
|
|
42
45
|
|
|
43
|
-
|
|
44
|
-
"docker/quickstart/docker-compose.quickstart.yml"
|
|
45
|
-
)
|
|
46
|
-
ELASTIC_QUICKSTART_COMPOSE_FILE = (
|
|
47
|
-
"docker/quickstart/docker-compose-without-neo4j.quickstart.yml"
|
|
48
|
-
)
|
|
49
|
-
NEO4J_AND_ELASTIC_M1_QUICKSTART_COMPOSE_FILE = (
|
|
50
|
-
"docker/quickstart/docker-compose-m1.quickstart.yml"
|
|
51
|
-
)
|
|
52
|
-
ELASTIC_M1_QUICKSTART_COMPOSE_FILE = (
|
|
53
|
-
"docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml"
|
|
54
|
-
)
|
|
55
|
-
CONSUMERS_QUICKSTART_COMPOSE_FILE = (
|
|
56
|
-
"docker/quickstart/docker-compose.consumers.quickstart.yml"
|
|
57
|
-
)
|
|
58
|
-
ELASTIC_CONSUMERS_QUICKSTART_COMPOSE_FILE = (
|
|
59
|
-
"docker/quickstart/docker-compose.consumers-without-neo4j.quickstart.yml"
|
|
60
|
-
)
|
|
61
|
-
KAFKA_SETUP_QUICKSTART_COMPOSE_FILE = (
|
|
62
|
-
"docker/quickstart/docker-compose.kafka-setup.quickstart.yml"
|
|
63
|
-
)
|
|
46
|
+
_ClickPositiveInt = click.IntRange(min=1)
|
|
64
47
|
|
|
48
|
+
QUICKSTART_COMPOSE_FILE = "docker/quickstart/docker-compose.quickstart-profile.yml"
|
|
65
49
|
|
|
66
50
|
_QUICKSTART_MAX_WAIT_TIME = datetime.timedelta(minutes=10)
|
|
67
51
|
_QUICKSTART_UP_TIMEOUT = datetime.timedelta(seconds=100)
|
|
68
52
|
_QUICKSTART_STATUS_CHECK_INTERVAL = datetime.timedelta(seconds=2)
|
|
69
53
|
|
|
54
|
+
MIGRATION_REQUIRED_INSTRUCTIONS = f"""
|
|
55
|
+
Your existing DataHub server was installed with an \
|
|
56
|
+
older CLI and is incompatible with the current CLI (version {nice_version_name}).
|
|
57
|
+
|
|
58
|
+
Required steps to upgrade:
|
|
59
|
+
1. Backup your data (recommended): datahub docker quickstart --backup
|
|
60
|
+
Guide: https://docs.datahub.com/docs/quickstart#back-up-datahub
|
|
61
|
+
|
|
62
|
+
2. Remove old installation: datahub docker nuke
|
|
63
|
+
|
|
64
|
+
3. Start fresh installation: datahub docker quickstart
|
|
65
|
+
|
|
66
|
+
4. Restore data:
|
|
67
|
+
datahub docker quickstart --restore
|
|
68
|
+
|
|
69
|
+
⚠️ Without backup, all existing data will be lost.
|
|
70
|
+
|
|
71
|
+
For fresh start (if data is not needed):
|
|
72
|
+
1. Remove installation:
|
|
73
|
+
datahub docker nuke
|
|
74
|
+
|
|
75
|
+
2. Start fresh:
|
|
76
|
+
datahub docker quickstart
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
REPAIR_REQUIRED_INSTRUCTIONS = f"""
|
|
80
|
+
Unhealthy DataHub Installation Detected
|
|
81
|
+
|
|
82
|
+
Your DataHub installation has issues that cannot be fixed with the current CLI.
|
|
83
|
+
|
|
84
|
+
Your options:
|
|
85
|
+
|
|
86
|
+
OPTION 1 - Preserve data (if needed):
|
|
87
|
+
1. Downgrade CLI to version 1.1:
|
|
88
|
+
pip install acryl-datahub==1.1
|
|
89
|
+
2. Fix the installation:
|
|
90
|
+
datahub docker quickstart
|
|
91
|
+
3. Create backup:
|
|
92
|
+
datahub docker quickstart --backup
|
|
93
|
+
4. Upgrade CLI back:
|
|
94
|
+
pip install acryl-datahub=={nice_version_name()}
|
|
95
|
+
5. Migrate:
|
|
96
|
+
datahub docker nuke && datahub docker quickstart
|
|
97
|
+
6. Restore data:
|
|
98
|
+
datahub docker quickstart --restore
|
|
99
|
+
|
|
100
|
+
OPTION 2 - Fresh start (if data not needed):
|
|
101
|
+
1. Remove installation:
|
|
102
|
+
datahub docker nuke
|
|
103
|
+
2. Start fresh:
|
|
104
|
+
datahub docker quickstart
|
|
105
|
+
|
|
106
|
+
⚠️ The current CLI cannot repair installations created by older versions.
|
|
107
|
+
|
|
108
|
+
Additional information on backup and restore: https://docs.datahub.com/docs/quickstart#back-up-datahub
|
|
109
|
+
Troubleshooting guide: https://docs.datahub.com/docs/troubleshooting/quickstart
|
|
110
|
+
"""
|
|
111
|
+
|
|
70
112
|
|
|
71
113
|
class Architectures(Enum):
|
|
72
114
|
x86 = "x86"
|
|
@@ -89,6 +131,14 @@ def _docker_subprocess_env() -> Dict[str, str]:
|
|
|
89
131
|
return env
|
|
90
132
|
|
|
91
133
|
|
|
134
|
+
def show_migration_instructions():
|
|
135
|
+
click.secho(MIGRATION_REQUIRED_INSTRUCTIONS, fg="red")
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def show_repair_instructions():
|
|
139
|
+
click.secho(REPAIR_REQUIRED_INSTRUCTIONS, fg="red")
|
|
140
|
+
|
|
141
|
+
|
|
92
142
|
@click.group()
|
|
93
143
|
def docker() -> None:
|
|
94
144
|
"""Helper commands for setting up and interacting with a local
|
|
@@ -97,19 +147,22 @@ def docker() -> None:
|
|
|
97
147
|
|
|
98
148
|
|
|
99
149
|
@docker.command()
|
|
100
|
-
@upgrade.check_upgrade
|
|
101
|
-
@telemetry.with_telemetry()
|
|
102
150
|
def check() -> None:
|
|
103
151
|
"""Check that the Docker containers are healthy"""
|
|
104
152
|
status = check_docker_quickstart()
|
|
153
|
+
|
|
105
154
|
if status.is_ok():
|
|
106
155
|
click.secho("✔ No issues detected", fg="green")
|
|
156
|
+
if status.running_unsupported_version:
|
|
157
|
+
show_migration_instructions()
|
|
107
158
|
else:
|
|
159
|
+
if status.running_unsupported_version:
|
|
160
|
+
show_repair_instructions()
|
|
108
161
|
raise status.to_exception("The following issues were detected:")
|
|
109
162
|
|
|
110
163
|
|
|
111
|
-
def
|
|
112
|
-
"""Check whether we are running on an
|
|
164
|
+
def is_apple_silicon() -> bool:
|
|
165
|
+
"""Check whether we are running on an Apple Silicon machine"""
|
|
113
166
|
try:
|
|
114
167
|
return (
|
|
115
168
|
platform.uname().machine == "arm64" and platform.uname().system == "Darwin"
|
|
@@ -119,52 +172,11 @@ def is_m1() -> bool:
|
|
|
119
172
|
return False
|
|
120
173
|
|
|
121
174
|
|
|
122
|
-
def is_arch_m1(arch: Architectures) -> bool:
|
|
123
|
-
return arch in [Architectures.arm64, Architectures.m1, Architectures.m2]
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
def should_use_neo4j_for_graph_service(graph_service_override: Optional[str]) -> bool:
|
|
127
|
-
if graph_service_override is not None:
|
|
128
|
-
if graph_service_override == "elasticsearch":
|
|
129
|
-
click.echo("Starting with elasticsearch due to graph-service-impl param\n")
|
|
130
|
-
return False
|
|
131
|
-
if graph_service_override == "neo4j":
|
|
132
|
-
click.echo("Starting with neo4j due to graph-service-impl param\n")
|
|
133
|
-
return True
|
|
134
|
-
else:
|
|
135
|
-
click.secho(
|
|
136
|
-
graph_service_override
|
|
137
|
-
+ " is not a valid graph service option. Choose either `neo4j` or "
|
|
138
|
-
"`elasticsearch`\n",
|
|
139
|
-
fg="red",
|
|
140
|
-
)
|
|
141
|
-
raise ValueError(f"invalid graph service option: {graph_service_override}")
|
|
142
|
-
with get_docker_client() as client:
|
|
143
|
-
if len(client.volumes.list(filters={"name": "datahub_neo4jdata"})) > 0:
|
|
144
|
-
click.echo(
|
|
145
|
-
"Datahub Neo4j volume found, starting with neo4j as graph service.\n"
|
|
146
|
-
"If you want to run using elastic, run `datahub docker nuke` and re-ingest your data.\n"
|
|
147
|
-
)
|
|
148
|
-
return True
|
|
149
|
-
|
|
150
|
-
logger.debug(
|
|
151
|
-
"No Datahub Neo4j volume found, starting with elasticsearch as graph service.\n"
|
|
152
|
-
"To use neo4j as a graph backend, run \n"
|
|
153
|
-
"`datahub docker quickstart --graph-service-impl neo4j`"
|
|
154
|
-
"\nfrom the root of the datahub repo\n"
|
|
155
|
-
)
|
|
156
|
-
return False
|
|
157
|
-
|
|
158
|
-
|
|
159
175
|
def _set_environment_variables(
|
|
160
176
|
version: Optional[str],
|
|
161
|
-
mysql_version: Optional[str],
|
|
162
177
|
mysql_port: Optional[int],
|
|
163
|
-
zk_port: Optional[int],
|
|
164
178
|
kafka_broker_port: Optional[int],
|
|
165
|
-
schema_registry_port: Optional[int],
|
|
166
179
|
elastic_port: Optional[int],
|
|
167
|
-
kafka_setup: Optional[bool],
|
|
168
180
|
) -> None:
|
|
169
181
|
if version is not None:
|
|
170
182
|
if not version.startswith("v") and "." in version:
|
|
@@ -173,24 +185,25 @@ def _set_environment_variables(
|
|
|
173
185
|
)
|
|
174
186
|
version = f"v{version}"
|
|
175
187
|
os.environ["DATAHUB_VERSION"] = version
|
|
176
|
-
if mysql_version is not None:
|
|
177
|
-
os.environ["DATAHUB_MYSQL_VERSION"] = mysql_version
|
|
178
188
|
if mysql_port is not None:
|
|
179
189
|
os.environ["DATAHUB_MAPPED_MYSQL_PORT"] = str(mysql_port)
|
|
180
190
|
|
|
181
|
-
if zk_port is not None:
|
|
182
|
-
os.environ["DATAHUB_MAPPED_ZK_PORT"] = str(zk_port)
|
|
183
|
-
|
|
184
191
|
if kafka_broker_port is not None:
|
|
185
192
|
os.environ["DATAHUB_MAPPED_KAFKA_BROKER_PORT"] = str(kafka_broker_port)
|
|
186
193
|
|
|
187
|
-
if schema_registry_port is not None:
|
|
188
|
-
os.environ["DATAHUB_MAPPED_SCHEMA_REGISTRY_PORT"] = str(schema_registry_port)
|
|
189
|
-
|
|
190
194
|
if elastic_port is not None:
|
|
191
195
|
os.environ["DATAHUB_MAPPED_ELASTIC_PORT"] = str(elastic_port)
|
|
192
|
-
|
|
193
|
-
|
|
196
|
+
|
|
197
|
+
os.environ["METADATA_SERVICE_AUTH_ENABLED"] = "false"
|
|
198
|
+
|
|
199
|
+
cliVersion = nice_version_name()
|
|
200
|
+
if is_dev_mode(): # This should only happen during development/CI.
|
|
201
|
+
cliVersion = __version__.replace(".dev0", "")
|
|
202
|
+
logger.info(
|
|
203
|
+
f"Development build: Using {cliVersion} instead of '{__version__}' version of CLI for UI ingestion"
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
os.environ["UI_INGESTION_DEFAULT_CLI_VERSION"] = cliVersion
|
|
194
207
|
|
|
195
208
|
|
|
196
209
|
def _get_default_quickstart_compose_file() -> Optional[str]:
|
|
@@ -250,6 +263,8 @@ def _attempt_stop(quickstart_compose_file: List[pathlib.Path]) -> None:
|
|
|
250
263
|
compose = _docker_compose_v2()
|
|
251
264
|
base_command: List[str] = [
|
|
252
265
|
*compose,
|
|
266
|
+
"--profile",
|
|
267
|
+
"quickstart",
|
|
253
268
|
*itertools.chain.from_iterable(
|
|
254
269
|
("-f", f"{path}") for path in compose_files_for_stopping
|
|
255
270
|
),
|
|
@@ -346,12 +361,15 @@ EBEAN_DATASOURCE_HOST=mysql:${DATAHUB_MAPPED_MYSQL_PORT:-3306}
|
|
|
346
361
|
EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:${DATAHUB_MAPPED_MYSQL_PORT:-3306}/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8
|
|
347
362
|
EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver
|
|
348
363
|
ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml
|
|
349
|
-
|
|
364
|
+
GRAPH_SERVICE_IMPL=elasticsearch
|
|
350
365
|
KAFKA_BOOTSTRAP_SERVER=broker:29092
|
|
351
|
-
KAFKA_SCHEMAREGISTRY_URL=http://schema-registry
|
|
366
|
+
KAFKA_SCHEMAREGISTRY_URL=http://datahub-gms:8080/schema-registry/api/
|
|
367
|
+
SCHEMA_REGISTRY_TYPE=INTERNAL
|
|
352
368
|
|
|
353
|
-
ELASTICSEARCH_HOST=
|
|
369
|
+
ELASTICSEARCH_HOST=search
|
|
354
370
|
ELASTICSEARCH_PORT=${DATAHUB_MAPPED_ELASTIC_PORT:-9200}
|
|
371
|
+
ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX=true
|
|
372
|
+
ELASTICSEARCH_PROTOCOL=http
|
|
355
373
|
|
|
356
374
|
#NEO4J_HOST=http://<your-neo-host>:7474
|
|
357
375
|
#NEO4J_URI=bolt://<your-neo-host>
|
|
@@ -385,6 +403,7 @@ DATAHUB_MAE_CONSUMER_PORT=9091
|
|
|
385
403
|
logger.debug(f"Env file contents: {env_fp_reader.read()}")
|
|
386
404
|
|
|
387
405
|
# continue to issue the restore indices command
|
|
406
|
+
# TODO Use --version if passed
|
|
388
407
|
command = (
|
|
389
408
|
"docker pull acryldata/datahub-upgrade:${DATAHUB_VERSION:-head}"
|
|
390
409
|
+ f" && docker run --network datahub_network --env-file {env_fp.name} "
|
|
@@ -412,12 +431,16 @@ DATAHUB_MAE_CONSUMER_PORT=9091
|
|
|
412
431
|
return result.returncode
|
|
413
432
|
|
|
414
433
|
|
|
434
|
+
# TODO: Do we really need this? If someone wants to use a different arg, they can still pass the standard docker env var DOCKER_DEFAULT_PLATFORM
|
|
435
|
+
# We dont really need to select a different image unlike earlier (mysql vs mariadb) since we do publish both archs for all images (or are available for external images).
|
|
415
436
|
def detect_quickstart_arch(arch: Optional[str]) -> Architectures:
|
|
416
|
-
|
|
417
|
-
if
|
|
418
|
-
click.secho("Detected
|
|
437
|
+
running_on_apple_silicon = is_apple_silicon()
|
|
438
|
+
if running_on_apple_silicon:
|
|
439
|
+
click.secho("Detected Apple Silicon", fg="yellow")
|
|
419
440
|
|
|
420
|
-
quickstart_arch =
|
|
441
|
+
quickstart_arch = (
|
|
442
|
+
Architectures.x86 if not running_on_apple_silicon else Architectures.arm64
|
|
443
|
+
)
|
|
421
444
|
if arch:
|
|
422
445
|
matched_arch = [a for a in Architectures if arch.lower() == a.value]
|
|
423
446
|
if not matched_arch:
|
|
@@ -437,13 +460,6 @@ def detect_quickstart_arch(arch: Optional[str]) -> Architectures:
|
|
|
437
460
|
default="default",
|
|
438
461
|
help="Datahub version to be deployed. If not set, deploy using the defaults from the quickstart compose. Use 'stable' to start the latest stable version.",
|
|
439
462
|
)
|
|
440
|
-
@click.option(
|
|
441
|
-
"--build-locally",
|
|
442
|
-
type=bool,
|
|
443
|
-
is_flag=True,
|
|
444
|
-
default=False,
|
|
445
|
-
help="Attempt to build the containers locally before starting",
|
|
446
|
-
)
|
|
447
463
|
@click.option(
|
|
448
464
|
"--pull-images/--no-pull-images",
|
|
449
465
|
type=bool,
|
|
@@ -466,13 +482,6 @@ def detect_quickstart_arch(arch: Optional[str]) -> Architectures:
|
|
|
466
482
|
default=False,
|
|
467
483
|
help="If true, the docker-compose logs will be printed to console if something fails",
|
|
468
484
|
)
|
|
469
|
-
@click.option(
|
|
470
|
-
"--graph-service-impl",
|
|
471
|
-
type=str,
|
|
472
|
-
is_flag=False,
|
|
473
|
-
default=None,
|
|
474
|
-
help="If set, forces docker-compose to use that graph service implementation",
|
|
475
|
-
)
|
|
476
485
|
@click.option(
|
|
477
486
|
"--mysql-port",
|
|
478
487
|
type=_ClickPositiveInt,
|
|
@@ -480,13 +489,6 @@ def detect_quickstart_arch(arch: Optional[str]) -> Architectures:
|
|
|
480
489
|
default=None,
|
|
481
490
|
help="If there is an existing mysql instance running on port 3306, set this to a free port to avoid port conflicts on startup",
|
|
482
491
|
)
|
|
483
|
-
@click.option(
|
|
484
|
-
"--zk-port",
|
|
485
|
-
type=_ClickPositiveInt,
|
|
486
|
-
is_flag=False,
|
|
487
|
-
default=None,
|
|
488
|
-
help="If there is an existing zookeeper instance running on port 2181, set this to a free port to avoid port conflicts on startup",
|
|
489
|
-
)
|
|
490
492
|
@click.option(
|
|
491
493
|
"--kafka-broker-port",
|
|
492
494
|
type=_ClickPositiveInt,
|
|
@@ -494,13 +496,6 @@ def detect_quickstart_arch(arch: Optional[str]) -> Architectures:
|
|
|
494
496
|
default=None,
|
|
495
497
|
help="If there is an existing Kafka broker running on port 9092, set this to a free port to avoid port conflicts on startup",
|
|
496
498
|
)
|
|
497
|
-
@click.option(
|
|
498
|
-
"--schema-registry-port",
|
|
499
|
-
type=_ClickPositiveInt,
|
|
500
|
-
is_flag=False,
|
|
501
|
-
default=None,
|
|
502
|
-
help="If there is an existing process running on port 8081, set this to a free port to avoid port conflicts with Kafka schema registry on startup",
|
|
503
|
-
)
|
|
504
499
|
@click.option(
|
|
505
500
|
"--elastic-port",
|
|
506
501
|
type=_ClickPositiveInt,
|
|
@@ -558,51 +553,29 @@ def detect_quickstart_arch(arch: Optional[str]) -> Architectures:
|
|
|
558
553
|
default=False,
|
|
559
554
|
help="Disables the restoration of indices of a running quickstart instance when used in conjunction with --restore.",
|
|
560
555
|
)
|
|
561
|
-
@click.option(
|
|
562
|
-
"--standalone_consumers",
|
|
563
|
-
required=False,
|
|
564
|
-
is_flag=True,
|
|
565
|
-
default=False,
|
|
566
|
-
help="Launches MAE & MCE consumers as stand alone docker containers",
|
|
567
|
-
)
|
|
568
|
-
@click.option(
|
|
569
|
-
"--kafka-setup",
|
|
570
|
-
required=False,
|
|
571
|
-
is_flag=True,
|
|
572
|
-
default=False,
|
|
573
|
-
help="Launches Kafka setup job as part of the compose deployment",
|
|
574
|
-
)
|
|
575
556
|
@click.option(
|
|
576
557
|
"--arch",
|
|
577
558
|
required=False,
|
|
578
559
|
help="Specify the architecture for the quickstart images to use. Options are x86, arm64, m1 etc.",
|
|
579
560
|
)
|
|
580
|
-
@upgrade.check_upgrade
|
|
581
561
|
@telemetry.with_telemetry(
|
|
582
562
|
capture_kwargs=[
|
|
583
563
|
"version",
|
|
584
|
-
"build_locally",
|
|
585
564
|
"pull_images",
|
|
586
565
|
"stop",
|
|
587
566
|
"backup",
|
|
588
567
|
"restore",
|
|
589
568
|
"restore_indices",
|
|
590
|
-
"standalone_consumers",
|
|
591
|
-
"kafka_setup",
|
|
592
569
|
"arch",
|
|
593
570
|
]
|
|
594
571
|
)
|
|
595
572
|
def quickstart(
|
|
596
573
|
version: Optional[str],
|
|
597
|
-
build_locally: bool,
|
|
598
574
|
pull_images: bool,
|
|
599
575
|
quickstart_compose_file: List[pathlib.Path],
|
|
600
576
|
dump_logs_on_failure: bool,
|
|
601
|
-
graph_service_impl: Optional[str],
|
|
602
577
|
mysql_port: Optional[int],
|
|
603
|
-
zk_port: Optional[int],
|
|
604
578
|
kafka_broker_port: Optional[int],
|
|
605
|
-
schema_registry_port: Optional[int],
|
|
606
579
|
elastic_port: Optional[int],
|
|
607
580
|
stop: bool,
|
|
608
581
|
backup: bool,
|
|
@@ -611,8 +584,6 @@ def quickstart(
|
|
|
611
584
|
restore_file: str,
|
|
612
585
|
restore_indices: bool,
|
|
613
586
|
no_restore_indices: bool,
|
|
614
|
-
standalone_consumers: bool,
|
|
615
|
-
kafka_setup: bool,
|
|
616
587
|
arch: Optional[str],
|
|
617
588
|
) -> None:
|
|
618
589
|
"""Start an instance of DataHub locally using docker-compose.
|
|
@@ -641,8 +612,8 @@ def quickstart(
|
|
|
641
612
|
)
|
|
642
613
|
return
|
|
643
614
|
|
|
644
|
-
quickstart_arch = detect_quickstart_arch(arch)
|
|
645
615
|
quickstart_versioning = QuickstartVersionMappingConfig.fetch_quickstart_config()
|
|
616
|
+
|
|
646
617
|
quickstart_execution_plan = quickstart_versioning.get_quickstart_execution_plan(
|
|
647
618
|
version
|
|
648
619
|
)
|
|
@@ -668,28 +639,26 @@ def quickstart(
|
|
|
668
639
|
download_compose_files(
|
|
669
640
|
quickstart_compose_file_name,
|
|
670
641
|
quickstart_compose_file,
|
|
671
|
-
graph_service_impl,
|
|
672
|
-
kafka_setup,
|
|
673
|
-
quickstart_arch,
|
|
674
|
-
standalone_consumers,
|
|
675
642
|
quickstart_execution_plan.composefile_git_ref,
|
|
676
643
|
)
|
|
677
644
|
|
|
645
|
+
# check if running datahub can be upgraded to the latest version.
|
|
646
|
+
if not _check_upgrade_and_show_instructions(quickstart_compose_file):
|
|
647
|
+
sys.exit(1)
|
|
648
|
+
|
|
678
649
|
# set version
|
|
679
650
|
_set_environment_variables(
|
|
680
651
|
version=quickstart_execution_plan.docker_tag,
|
|
681
|
-
mysql_version=quickstart_execution_plan.mysql_tag,
|
|
682
652
|
mysql_port=mysql_port,
|
|
683
|
-
zk_port=zk_port,
|
|
684
653
|
kafka_broker_port=kafka_broker_port,
|
|
685
|
-
schema_registry_port=schema_registry_port,
|
|
686
654
|
elastic_port=elastic_port,
|
|
687
|
-
kafka_setup=kafka_setup,
|
|
688
655
|
)
|
|
689
656
|
|
|
690
657
|
compose = _docker_compose_v2()
|
|
691
658
|
base_command: List[str] = [
|
|
692
659
|
*compose,
|
|
660
|
+
"--profile",
|
|
661
|
+
"quickstart",
|
|
693
662
|
*itertools.chain.from_iterable(
|
|
694
663
|
("-f", f"{path}") for path in quickstart_compose_file
|
|
695
664
|
),
|
|
@@ -697,6 +666,8 @@ def quickstart(
|
|
|
697
666
|
DOCKER_COMPOSE_PROJECT_NAME,
|
|
698
667
|
]
|
|
699
668
|
|
|
669
|
+
click.echo(f"base_command: {base_command}")
|
|
670
|
+
|
|
700
671
|
# Pull and possibly build the latest containers.
|
|
701
672
|
try:
|
|
702
673
|
if pull_images:
|
|
@@ -737,15 +708,6 @@ def quickstart(
|
|
|
737
708
|
fg="red",
|
|
738
709
|
)
|
|
739
710
|
|
|
740
|
-
if build_locally:
|
|
741
|
-
logger.info("Building docker images locally...")
|
|
742
|
-
subprocess.run(
|
|
743
|
-
base_command + ["build", "--pull", "-q"],
|
|
744
|
-
check=True,
|
|
745
|
-
env=_docker_subprocess_env(),
|
|
746
|
-
)
|
|
747
|
-
logger.info("Finished building docker images!")
|
|
748
|
-
|
|
749
711
|
# Start it up! (with retries)
|
|
750
712
|
click.echo("\nStarting up DataHub...")
|
|
751
713
|
start_time = datetime.datetime.now()
|
|
@@ -836,36 +798,17 @@ def get_docker_compose_base_url(version_tag: str) -> str:
|
|
|
836
798
|
return f"https://raw.githubusercontent.com/datahub-project/datahub/{version_tag}"
|
|
837
799
|
|
|
838
800
|
|
|
839
|
-
def get_github_file_url(
|
|
801
|
+
def get_github_file_url(release_version_tag: str) -> str:
|
|
840
802
|
base_url = get_docker_compose_base_url(release_version_tag)
|
|
841
|
-
|
|
842
|
-
github_file = (
|
|
843
|
-
f"{base_url}/{NEO4J_AND_ELASTIC_QUICKSTART_COMPOSE_FILE}"
|
|
844
|
-
if not is_m1
|
|
845
|
-
else f"{base_url}/{NEO4J_AND_ELASTIC_M1_QUICKSTART_COMPOSE_FILE}"
|
|
846
|
-
)
|
|
847
|
-
else:
|
|
848
|
-
github_file = (
|
|
849
|
-
f"{base_url}/{ELASTIC_QUICKSTART_COMPOSE_FILE}"
|
|
850
|
-
if not is_m1
|
|
851
|
-
else f"{base_url}/{ELASTIC_M1_QUICKSTART_COMPOSE_FILE}"
|
|
852
|
-
)
|
|
803
|
+
github_file = f"{base_url}/{QUICKSTART_COMPOSE_FILE}"
|
|
853
804
|
return github_file
|
|
854
805
|
|
|
855
806
|
|
|
856
807
|
def download_compose_files(
|
|
857
|
-
quickstart_compose_file_name,
|
|
858
|
-
quickstart_compose_file_list,
|
|
859
|
-
graph_service_impl,
|
|
860
|
-
kafka_setup,
|
|
861
|
-
quickstart_arch,
|
|
862
|
-
standalone_consumers,
|
|
863
|
-
compose_git_ref,
|
|
808
|
+
quickstart_compose_file_name, quickstart_compose_file_list, compose_git_ref
|
|
864
809
|
):
|
|
865
810
|
# download appropriate quickstart file
|
|
866
|
-
|
|
867
|
-
is_m1 = is_arch_m1(quickstart_arch)
|
|
868
|
-
github_file = get_github_file_url(should_use_neo4j, is_m1, compose_git_ref)
|
|
811
|
+
github_file = get_github_file_url(compose_git_ref)
|
|
869
812
|
# also allow local files
|
|
870
813
|
request_session = requests.Session()
|
|
871
814
|
request_session.mount("file://", FileAdapter())
|
|
@@ -879,57 +822,14 @@ def download_compose_files(
|
|
|
879
822
|
logger.info(f"Fetching docker-compose file {github_file} from GitHub")
|
|
880
823
|
# Download the quickstart docker-compose file from GitHub.
|
|
881
824
|
quickstart_download_response = request_session.get(github_file)
|
|
825
|
+
if quickstart_download_response.status_code == 404:
|
|
826
|
+
raise click.ClickException(
|
|
827
|
+
f"Could not find quickstart compose file for version {compose_git_ref}. "
|
|
828
|
+
"Please try a different version or check the version exists at https://github.com/datahub-project/datahub/releases"
|
|
829
|
+
)
|
|
882
830
|
quickstart_download_response.raise_for_status()
|
|
883
831
|
tmp_file.write(quickstart_download_response.content)
|
|
884
832
|
logger.debug(f"Copied to {path}")
|
|
885
|
-
if standalone_consumers:
|
|
886
|
-
base_url = get_docker_compose_base_url(compose_git_ref)
|
|
887
|
-
consumer_github_file = (
|
|
888
|
-
f"{base_url}/{CONSUMERS_QUICKSTART_COMPOSE_FILE}"
|
|
889
|
-
if should_use_neo4j
|
|
890
|
-
else f"{base_url}/{ELASTIC_CONSUMERS_QUICKSTART_COMPOSE_FILE}"
|
|
891
|
-
)
|
|
892
|
-
|
|
893
|
-
default_consumer_compose_file = (
|
|
894
|
-
Path(DATAHUB_ROOT_FOLDER) / "quickstart/docker-compose.consumers.yml"
|
|
895
|
-
)
|
|
896
|
-
with (
|
|
897
|
-
open(default_consumer_compose_file, "wb")
|
|
898
|
-
if default_consumer_compose_file
|
|
899
|
-
else tempfile.NamedTemporaryFile(suffix=".yml", delete=False)
|
|
900
|
-
) as tmp_file:
|
|
901
|
-
path = pathlib.Path(tmp_file.name)
|
|
902
|
-
quickstart_compose_file_list.append(path)
|
|
903
|
-
click.echo(
|
|
904
|
-
f"Fetching consumer docker-compose file {consumer_github_file} from GitHub"
|
|
905
|
-
)
|
|
906
|
-
# Download the quickstart docker-compose file from GitHub.
|
|
907
|
-
quickstart_download_response = request_session.get(consumer_github_file)
|
|
908
|
-
quickstart_download_response.raise_for_status()
|
|
909
|
-
tmp_file.write(quickstart_download_response.content)
|
|
910
|
-
logger.debug(f"Copied to {path}")
|
|
911
|
-
if kafka_setup:
|
|
912
|
-
base_url = get_docker_compose_base_url(compose_git_ref)
|
|
913
|
-
kafka_setup_github_file = f"{base_url}/{KAFKA_SETUP_QUICKSTART_COMPOSE_FILE}"
|
|
914
|
-
|
|
915
|
-
default_kafka_compose_file = (
|
|
916
|
-
Path(DATAHUB_ROOT_FOLDER) / "quickstart/docker-compose.kafka-setup.yml"
|
|
917
|
-
)
|
|
918
|
-
with (
|
|
919
|
-
open(default_kafka_compose_file, "wb")
|
|
920
|
-
if default_kafka_compose_file
|
|
921
|
-
else tempfile.NamedTemporaryFile(suffix=".yml", delete=False)
|
|
922
|
-
) as tmp_file:
|
|
923
|
-
path = pathlib.Path(tmp_file.name)
|
|
924
|
-
quickstart_compose_file_list.append(path)
|
|
925
|
-
click.echo(
|
|
926
|
-
f"Fetching consumer docker-compose file {kafka_setup_github_file} from GitHub"
|
|
927
|
-
)
|
|
928
|
-
# Download the quickstart docker-compose file from GitHub.
|
|
929
|
-
quickstart_download_response = request_session.get(kafka_setup_github_file)
|
|
930
|
-
quickstart_download_response.raise_for_status()
|
|
931
|
-
tmp_file.write(quickstart_download_response.content)
|
|
932
|
-
logger.debug(f"Copied to {path}")
|
|
933
833
|
|
|
934
834
|
|
|
935
835
|
def valid_restore_options(
|
|
@@ -963,7 +863,7 @@ def valid_restore_options(
|
|
|
963
863
|
default=None,
|
|
964
864
|
help="The token to be used when ingesting, used when datahub is deployed with METADATA_SERVICE_AUTH_ENABLED=true",
|
|
965
865
|
)
|
|
966
|
-
@
|
|
866
|
+
@upgrade.check_upgrade
|
|
967
867
|
def ingest_sample_data(token: Optional[str]) -> None:
|
|
968
868
|
"""Ingest sample data into a running DataHub instance."""
|
|
969
869
|
|
|
@@ -1031,3 +931,25 @@ def nuke(keep_data: bool) -> None:
|
|
|
1031
931
|
click.echo(f"Removing networks in the {DOCKER_COMPOSE_PROJECT_NAME} project")
|
|
1032
932
|
for network in client.networks.list(filters=DATAHUB_COMPOSE_PROJECT_FILTER):
|
|
1033
933
|
network.remove()
|
|
934
|
+
|
|
935
|
+
|
|
936
|
+
def _check_upgrade_and_show_instructions(
|
|
937
|
+
quickstart_compose_file: List[pathlib.Path],
|
|
938
|
+
) -> bool:
|
|
939
|
+
"""Check if running datahub can be upgraded to the latest version and show appropriate instructions.
|
|
940
|
+
|
|
941
|
+
Args:
|
|
942
|
+
quickstart_compose_file: List of compose file paths
|
|
943
|
+
|
|
944
|
+
Returns:
|
|
945
|
+
bool: True if upgrade is supported, False otherwise
|
|
946
|
+
"""
|
|
947
|
+
quickstart_status = check_docker_quickstart()
|
|
948
|
+
|
|
949
|
+
if not check_upgrade_supported(quickstart_compose_file, quickstart_status):
|
|
950
|
+
if quickstart_status.is_ok():
|
|
951
|
+
show_migration_instructions()
|
|
952
|
+
else:
|
|
953
|
+
show_repair_instructions()
|
|
954
|
+
return False
|
|
955
|
+
return True
|
datahub/cli/exists_cli.py
CHANGED
|
@@ -7,7 +7,6 @@ from click_default_group import DefaultGroup
|
|
|
7
7
|
|
|
8
8
|
from datahub.ingestion.graph.client import get_default_graph
|
|
9
9
|
from datahub.ingestion.graph.config import ClientMode
|
|
10
|
-
from datahub.telemetry import telemetry
|
|
11
10
|
from datahub.upgrade import upgrade
|
|
12
11
|
|
|
13
12
|
logger = logging.getLogger(__name__)
|
|
@@ -23,7 +22,6 @@ def exists() -> None:
|
|
|
23
22
|
@click.option("--urn", required=False, type=str)
|
|
24
23
|
@click.pass_context
|
|
25
24
|
@upgrade.check_upgrade
|
|
26
|
-
@telemetry.with_telemetry()
|
|
27
25
|
def urn(ctx: Any, urn: Optional[str]) -> None:
|
|
28
26
|
"""
|
|
29
27
|
Get metadata for an entity with an optional list of aspects to project.
|
datahub/cli/get_cli.py
CHANGED
|
@@ -8,7 +8,6 @@ from click_default_group import DefaultGroup
|
|
|
8
8
|
from datahub.cli.cli_utils import get_aspects_for_entity
|
|
9
9
|
from datahub.ingestion.graph.client import get_default_graph
|
|
10
10
|
from datahub.ingestion.graph.config import ClientMode
|
|
11
|
-
from datahub.telemetry import telemetry
|
|
12
11
|
from datahub.upgrade import upgrade
|
|
13
12
|
|
|
14
13
|
logger = logging.getLogger(__name__)
|
|
@@ -32,7 +31,6 @@ def get() -> None:
|
|
|
32
31
|
)
|
|
33
32
|
@click.pass_context
|
|
34
33
|
@upgrade.check_upgrade
|
|
35
|
-
@telemetry.with_telemetry()
|
|
36
34
|
def urn(ctx: Any, urn: Optional[str], aspect: List[str], details: bool) -> None:
|
|
37
35
|
"""
|
|
38
36
|
Get metadata for an entity with an optional list of aspects to project.
|
datahub/cli/iceberg_cli.py
CHANGED
|
@@ -16,6 +16,7 @@ from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
|
|
|
16
16
|
from datahub.ingestion.graph.config import ClientMode
|
|
17
17
|
from datahub.metadata.schema_classes import SystemMetadataClass
|
|
18
18
|
from datahub.telemetry import telemetry
|
|
19
|
+
from datahub.upgrade import upgrade
|
|
19
20
|
|
|
20
21
|
logger = logging.getLogger(__name__)
|
|
21
22
|
|
|
@@ -164,6 +165,7 @@ def validate_warehouse(data_root: str) -> None:
|
|
|
164
165
|
help=f"Expiration duration for temporary credentials used for role. Defaults to {DEFAULT_CREDS_EXPIRY_DURATION_SECONDS} seconds if unspecified",
|
|
165
166
|
)
|
|
166
167
|
@telemetry.with_telemetry(capture_kwargs=["duration_seconds"])
|
|
168
|
+
@upgrade.check_upgrade
|
|
167
169
|
def create(
|
|
168
170
|
warehouse: str,
|
|
169
171
|
description: Optional[str],
|
|
@@ -317,6 +319,7 @@ def create(
|
|
|
317
319
|
help=f"Expiration duration for temporary credentials used for role. Defaults to {DEFAULT_CREDS_EXPIRY_DURATION_SECONDS} seconds if unspecified",
|
|
318
320
|
)
|
|
319
321
|
@telemetry.with_telemetry(capture_kwargs=["duration_seconds"])
|
|
322
|
+
@upgrade.check_upgrade
|
|
320
323
|
def update(
|
|
321
324
|
warehouse: str,
|
|
322
325
|
data_root: str,
|
|
@@ -403,6 +406,7 @@ def update(
|
|
|
403
406
|
|
|
404
407
|
@iceberg.command()
|
|
405
408
|
@telemetry.with_telemetry()
|
|
409
|
+
@upgrade.check_upgrade
|
|
406
410
|
def list() -> None:
|
|
407
411
|
"""
|
|
408
412
|
List iceberg warehouses
|
|
@@ -419,6 +423,7 @@ def list() -> None:
|
|
|
419
423
|
"-w", "--warehouse", required=True, type=str, help="The name of the warehouse"
|
|
420
424
|
)
|
|
421
425
|
@telemetry.with_telemetry()
|
|
426
|
+
@upgrade.check_upgrade
|
|
422
427
|
def get(warehouse: str) -> None:
|
|
423
428
|
"""Fetches the details of the specified iceberg warehouse"""
|
|
424
429
|
client = get_default_graph(ClientMode.CLI)
|