acryl-datahub 1.1.0.5rc7__py3-none-any.whl → 1.1.0.5rc8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.1.0.5rc7.dist-info → acryl_datahub-1.1.0.5rc8.dist-info}/METADATA +2463 -2465
- {acryl_datahub-1.1.0.5rc7.dist-info → acryl_datahub-1.1.0.5rc8.dist-info}/RECORD +58 -58
- datahub/_version.py +1 -1
- datahub/cli/check_cli.py +0 -7
- datahub/cli/cli_utils.py +73 -0
- datahub/cli/delete_cli.py +0 -6
- datahub/cli/docker_check.py +107 -12
- datahub/cli/docker_cli.py +148 -228
- datahub/cli/exists_cli.py +0 -4
- datahub/cli/get_cli.py +0 -4
- datahub/cli/ingest_cli.py +1 -20
- datahub/cli/put_cli.py +0 -6
- datahub/cli/quickstart_versioning.py +50 -5
- datahub/cli/specific/assertions_cli.py +0 -6
- datahub/cli/specific/datacontract_cli.py +0 -6
- datahub/cli/specific/dataproduct_cli.py +0 -22
- datahub/cli/specific/dataset_cli.py +0 -11
- datahub/cli/specific/forms_cli.py +0 -6
- datahub/cli/specific/group_cli.py +0 -4
- datahub/cli/specific/structuredproperties_cli.py +0 -7
- datahub/cli/specific/user_cli.py +0 -4
- datahub/cli/state_cli.py +0 -4
- datahub/cli/timeline_cli.py +0 -4
- datahub/entrypoints.py +4 -3
- datahub/ingestion/autogenerated/capability_summary.json +88 -23
- datahub/ingestion/extractor/schema_util.py +13 -4
- datahub/ingestion/graph/client.py +2 -2
- datahub/ingestion/run/pipeline.py +43 -0
- datahub/ingestion/source/bigquery_v2/bigquery.py +9 -1
- datahub/ingestion/source/datahub/datahub_database_reader.py +1 -2
- datahub/ingestion/source/dremio/dremio_source.py +1 -4
- datahub/ingestion/source/gcs/gcs_source.py +9 -1
- datahub/ingestion/source/identity/okta.py +0 -13
- datahub/ingestion/source/powerbi/powerbi.py +0 -5
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
- datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
- datahub/ingestion/source/sigma/sigma.py +6 -1
- datahub/ingestion/source/snowflake/snowflake_config.py +11 -0
- datahub/ingestion/source/snowflake/snowflake_queries.py +100 -58
- datahub/ingestion/source/snowflake/snowflake_v2.py +11 -1
- datahub/ingestion/source/snowflake/stored_proc_lineage.py +1 -1
- datahub/ingestion/source/sql/hive_metastore.py +0 -10
- datahub/ingestion/source/sql/sql_common.py +4 -0
- datahub/ingestion/source/sql/vertica.py +0 -4
- datahub/ingestion/source/sql_queries.py +2 -2
- datahub/ingestion/source/superset.py +56 -1
- datahub/ingestion/source/tableau/tableau.py +40 -34
- datahub/ingestion/source/tableau/tableau_constant.py +0 -2
- datahub/ingestion/source/unity/source.py +9 -1
- datahub/sdk/lineage_client.py +2 -2
- datahub/sql_parsing/sql_parsing_aggregator.py +21 -12
- datahub/sql_parsing/sqlglot_lineage.py +40 -15
- datahub/upgrade/upgrade.py +46 -13
- datahub/utilities/server_config_util.py +8 -0
- {acryl_datahub-1.1.0.5rc7.dist-info → acryl_datahub-1.1.0.5rc8.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.1.0.5rc7.dist-info → acryl_datahub-1.1.0.5rc8.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.1.0.5rc7.dist-info → acryl_datahub-1.1.0.5rc8.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.1.0.5rc7.dist-info → acryl_datahub-1.1.0.5rc8.dist-info}/top_level.txt +0 -0
datahub/cli/docker_cli.py
CHANGED
|
@@ -20,6 +20,7 @@ import requests
|
|
|
20
20
|
from expandvars import expandvars
|
|
21
21
|
from requests_file import FileAdapter
|
|
22
22
|
|
|
23
|
+
from datahub._version import __version__, is_dev_mode, nice_version_name
|
|
23
24
|
from datahub.cli.config_utils import DATAHUB_ROOT_FOLDER
|
|
24
25
|
from datahub.cli.docker_check import (
|
|
25
26
|
DATAHUB_COMPOSE_LEGACY_VOLUME_FILTERS,
|
|
@@ -28,45 +29,85 @@ from datahub.cli.docker_check import (
|
|
|
28
29
|
DockerComposeVersionError,
|
|
29
30
|
QuickstartStatus,
|
|
30
31
|
check_docker_quickstart,
|
|
32
|
+
check_upgrade_supported,
|
|
31
33
|
get_docker_client,
|
|
32
34
|
run_quickstart_preflight_checks,
|
|
33
35
|
)
|
|
34
|
-
from datahub.cli.quickstart_versioning import
|
|
36
|
+
from datahub.cli.quickstart_versioning import (
|
|
37
|
+
QuickstartVersionMappingConfig,
|
|
38
|
+
)
|
|
35
39
|
from datahub.ingestion.run.pipeline import Pipeline
|
|
36
40
|
from datahub.telemetry import telemetry
|
|
37
|
-
from datahub.upgrade import upgrade
|
|
38
41
|
from datahub.utilities.perf_timer import PerfTimer
|
|
39
42
|
|
|
40
43
|
logger = logging.getLogger(__name__)
|
|
41
|
-
_ClickPositiveInt = click.IntRange(min=1)
|
|
42
44
|
|
|
43
|
-
|
|
44
|
-
"docker/quickstart/docker-compose.quickstart.yml"
|
|
45
|
-
)
|
|
46
|
-
ELASTIC_QUICKSTART_COMPOSE_FILE = (
|
|
47
|
-
"docker/quickstart/docker-compose-without-neo4j.quickstart.yml"
|
|
48
|
-
)
|
|
49
|
-
NEO4J_AND_ELASTIC_M1_QUICKSTART_COMPOSE_FILE = (
|
|
50
|
-
"docker/quickstart/docker-compose-m1.quickstart.yml"
|
|
51
|
-
)
|
|
52
|
-
ELASTIC_M1_QUICKSTART_COMPOSE_FILE = (
|
|
53
|
-
"docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml"
|
|
54
|
-
)
|
|
55
|
-
CONSUMERS_QUICKSTART_COMPOSE_FILE = (
|
|
56
|
-
"docker/quickstart/docker-compose.consumers.quickstart.yml"
|
|
57
|
-
)
|
|
58
|
-
ELASTIC_CONSUMERS_QUICKSTART_COMPOSE_FILE = (
|
|
59
|
-
"docker/quickstart/docker-compose.consumers-without-neo4j.quickstart.yml"
|
|
60
|
-
)
|
|
61
|
-
KAFKA_SETUP_QUICKSTART_COMPOSE_FILE = (
|
|
62
|
-
"docker/quickstart/docker-compose.kafka-setup.quickstart.yml"
|
|
63
|
-
)
|
|
45
|
+
_ClickPositiveInt = click.IntRange(min=1)
|
|
64
46
|
|
|
47
|
+
QUICKSTART_COMPOSE_FILE = "docker/quickstart/docker-compose.quickstart-profile.yml"
|
|
65
48
|
|
|
66
49
|
_QUICKSTART_MAX_WAIT_TIME = datetime.timedelta(minutes=10)
|
|
67
50
|
_QUICKSTART_UP_TIMEOUT = datetime.timedelta(seconds=100)
|
|
68
51
|
_QUICKSTART_STATUS_CHECK_INTERVAL = datetime.timedelta(seconds=2)
|
|
69
52
|
|
|
53
|
+
MIGRATION_REQUIRED_INSTRUCTIONS = f"""
|
|
54
|
+
Your existing DataHub server was installed with an \
|
|
55
|
+
older CLI and is incompatible with the current CLI (version {nice_version_name}).
|
|
56
|
+
|
|
57
|
+
Required steps to upgrade:
|
|
58
|
+
1. Backup your data (recommended): datahub docker quickstart --backup
|
|
59
|
+
Guide: https://docs.datahub.com/docs/quickstart#back-up-datahub
|
|
60
|
+
|
|
61
|
+
2. Remove old installation: datahub docker nuke
|
|
62
|
+
|
|
63
|
+
3. Start fresh installation: datahub docker quickstart
|
|
64
|
+
|
|
65
|
+
4. Restore data:
|
|
66
|
+
datahub docker quickstart --restore
|
|
67
|
+
|
|
68
|
+
⚠️ Without backup, all existing data will be lost.
|
|
69
|
+
|
|
70
|
+
For fresh start (if data is not needed):
|
|
71
|
+
1. Remove installation:
|
|
72
|
+
datahub docker nuke
|
|
73
|
+
|
|
74
|
+
2. Start fresh:
|
|
75
|
+
datahub docker quickstart
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
REPAIR_REQUIRED_INSTRUCTIONS = f"""
|
|
79
|
+
Unhealthy DataHub Installation Detected
|
|
80
|
+
|
|
81
|
+
Your DataHub installation has issues that cannot be fixed with the current CLI.
|
|
82
|
+
|
|
83
|
+
Your options:
|
|
84
|
+
|
|
85
|
+
OPTION 1 - Preserve data (if needed):
|
|
86
|
+
1. Downgrade CLI to version 1.1:
|
|
87
|
+
pip install acryl-datahub==1.1
|
|
88
|
+
2. Fix the installation:
|
|
89
|
+
datahub docker quickstart
|
|
90
|
+
3. Create backup:
|
|
91
|
+
datahub docker quickstart --backup
|
|
92
|
+
4. Upgrade CLI back:
|
|
93
|
+
pip install acryl-datahub=={nice_version_name()}
|
|
94
|
+
5. Migrate:
|
|
95
|
+
datahub docker nuke && datahub docker quickstart
|
|
96
|
+
6. Restore data:
|
|
97
|
+
datahub docker quickstart --restore
|
|
98
|
+
|
|
99
|
+
OPTION 2 - Fresh start (if data not needed):
|
|
100
|
+
1. Remove installation:
|
|
101
|
+
datahub docker nuke
|
|
102
|
+
2. Start fresh:
|
|
103
|
+
datahub docker quickstart
|
|
104
|
+
|
|
105
|
+
⚠️ The current CLI cannot repair installations created by older versions.
|
|
106
|
+
|
|
107
|
+
Additional information on backup and restore: https://docs.datahub.com/docs/quickstart#back-up-datahub
|
|
108
|
+
Troubleshooting guide: https://docs.datahub.com/docs/troubleshooting/quickstart
|
|
109
|
+
"""
|
|
110
|
+
|
|
70
111
|
|
|
71
112
|
class Architectures(Enum):
|
|
72
113
|
x86 = "x86"
|
|
@@ -89,6 +130,14 @@ def _docker_subprocess_env() -> Dict[str, str]:
|
|
|
89
130
|
return env
|
|
90
131
|
|
|
91
132
|
|
|
133
|
+
def show_migration_instructions():
|
|
134
|
+
click.secho(MIGRATION_REQUIRED_INSTRUCTIONS, fg="red")
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def show_repair_instructions():
|
|
138
|
+
click.secho(REPAIR_REQUIRED_INSTRUCTIONS, fg="red")
|
|
139
|
+
|
|
140
|
+
|
|
92
141
|
@click.group()
|
|
93
142
|
def docker() -> None:
|
|
94
143
|
"""Helper commands for setting up and interacting with a local
|
|
@@ -97,19 +146,22 @@ def docker() -> None:
|
|
|
97
146
|
|
|
98
147
|
|
|
99
148
|
@docker.command()
|
|
100
|
-
@upgrade.check_upgrade
|
|
101
|
-
@telemetry.with_telemetry()
|
|
102
149
|
def check() -> None:
|
|
103
150
|
"""Check that the Docker containers are healthy"""
|
|
104
151
|
status = check_docker_quickstart()
|
|
152
|
+
|
|
105
153
|
if status.is_ok():
|
|
106
154
|
click.secho("✔ No issues detected", fg="green")
|
|
155
|
+
if status.running_unsupported_version:
|
|
156
|
+
show_migration_instructions()
|
|
107
157
|
else:
|
|
158
|
+
if status.running_unsupported_version:
|
|
159
|
+
show_repair_instructions()
|
|
108
160
|
raise status.to_exception("The following issues were detected:")
|
|
109
161
|
|
|
110
162
|
|
|
111
|
-
def
|
|
112
|
-
"""Check whether we are running on an
|
|
163
|
+
def is_apple_silicon() -> bool:
|
|
164
|
+
"""Check whether we are running on an Apple Silicon machine"""
|
|
113
165
|
try:
|
|
114
166
|
return (
|
|
115
167
|
platform.uname().machine == "arm64" and platform.uname().system == "Darwin"
|
|
@@ -119,52 +171,11 @@ def is_m1() -> bool:
|
|
|
119
171
|
return False
|
|
120
172
|
|
|
121
173
|
|
|
122
|
-
def is_arch_m1(arch: Architectures) -> bool:
|
|
123
|
-
return arch in [Architectures.arm64, Architectures.m1, Architectures.m2]
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
def should_use_neo4j_for_graph_service(graph_service_override: Optional[str]) -> bool:
|
|
127
|
-
if graph_service_override is not None:
|
|
128
|
-
if graph_service_override == "elasticsearch":
|
|
129
|
-
click.echo("Starting with elasticsearch due to graph-service-impl param\n")
|
|
130
|
-
return False
|
|
131
|
-
if graph_service_override == "neo4j":
|
|
132
|
-
click.echo("Starting with neo4j due to graph-service-impl param\n")
|
|
133
|
-
return True
|
|
134
|
-
else:
|
|
135
|
-
click.secho(
|
|
136
|
-
graph_service_override
|
|
137
|
-
+ " is not a valid graph service option. Choose either `neo4j` or "
|
|
138
|
-
"`elasticsearch`\n",
|
|
139
|
-
fg="red",
|
|
140
|
-
)
|
|
141
|
-
raise ValueError(f"invalid graph service option: {graph_service_override}")
|
|
142
|
-
with get_docker_client() as client:
|
|
143
|
-
if len(client.volumes.list(filters={"name": "datahub_neo4jdata"})) > 0:
|
|
144
|
-
click.echo(
|
|
145
|
-
"Datahub Neo4j volume found, starting with neo4j as graph service.\n"
|
|
146
|
-
"If you want to run using elastic, run `datahub docker nuke` and re-ingest your data.\n"
|
|
147
|
-
)
|
|
148
|
-
return True
|
|
149
|
-
|
|
150
|
-
logger.debug(
|
|
151
|
-
"No Datahub Neo4j volume found, starting with elasticsearch as graph service.\n"
|
|
152
|
-
"To use neo4j as a graph backend, run \n"
|
|
153
|
-
"`datahub docker quickstart --graph-service-impl neo4j`"
|
|
154
|
-
"\nfrom the root of the datahub repo\n"
|
|
155
|
-
)
|
|
156
|
-
return False
|
|
157
|
-
|
|
158
|
-
|
|
159
174
|
def _set_environment_variables(
|
|
160
175
|
version: Optional[str],
|
|
161
|
-
mysql_version: Optional[str],
|
|
162
176
|
mysql_port: Optional[int],
|
|
163
|
-
zk_port: Optional[int],
|
|
164
177
|
kafka_broker_port: Optional[int],
|
|
165
|
-
schema_registry_port: Optional[int],
|
|
166
178
|
elastic_port: Optional[int],
|
|
167
|
-
kafka_setup: Optional[bool],
|
|
168
179
|
) -> None:
|
|
169
180
|
if version is not None:
|
|
170
181
|
if not version.startswith("v") and "." in version:
|
|
@@ -173,24 +184,25 @@ def _set_environment_variables(
|
|
|
173
184
|
)
|
|
174
185
|
version = f"v{version}"
|
|
175
186
|
os.environ["DATAHUB_VERSION"] = version
|
|
176
|
-
if mysql_version is not None:
|
|
177
|
-
os.environ["DATAHUB_MYSQL_VERSION"] = mysql_version
|
|
178
187
|
if mysql_port is not None:
|
|
179
188
|
os.environ["DATAHUB_MAPPED_MYSQL_PORT"] = str(mysql_port)
|
|
180
189
|
|
|
181
|
-
if zk_port is not None:
|
|
182
|
-
os.environ["DATAHUB_MAPPED_ZK_PORT"] = str(zk_port)
|
|
183
|
-
|
|
184
190
|
if kafka_broker_port is not None:
|
|
185
191
|
os.environ["DATAHUB_MAPPED_KAFKA_BROKER_PORT"] = str(kafka_broker_port)
|
|
186
192
|
|
|
187
|
-
if schema_registry_port is not None:
|
|
188
|
-
os.environ["DATAHUB_MAPPED_SCHEMA_REGISTRY_PORT"] = str(schema_registry_port)
|
|
189
|
-
|
|
190
193
|
if elastic_port is not None:
|
|
191
194
|
os.environ["DATAHUB_MAPPED_ELASTIC_PORT"] = str(elastic_port)
|
|
192
|
-
|
|
193
|
-
|
|
195
|
+
|
|
196
|
+
os.environ["METADATA_SERVICE_AUTH_ENABLED"] = "false"
|
|
197
|
+
|
|
198
|
+
cliVersion = nice_version_name()
|
|
199
|
+
if is_dev_mode(): # This should only happen during development/CI.
|
|
200
|
+
cliVersion = __version__.replace(".dev0", "")
|
|
201
|
+
logger.info(
|
|
202
|
+
f"Development build: Using {cliVersion} instead of '{__version__}' version of CLI for UI ingestion"
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
os.environ["UI_INGESTION_DEFAULT_CLI_VERSION"] = cliVersion
|
|
194
206
|
|
|
195
207
|
|
|
196
208
|
def _get_default_quickstart_compose_file() -> Optional[str]:
|
|
@@ -250,6 +262,8 @@ def _attempt_stop(quickstart_compose_file: List[pathlib.Path]) -> None:
|
|
|
250
262
|
compose = _docker_compose_v2()
|
|
251
263
|
base_command: List[str] = [
|
|
252
264
|
*compose,
|
|
265
|
+
"--profile",
|
|
266
|
+
"quickstart",
|
|
253
267
|
*itertools.chain.from_iterable(
|
|
254
268
|
("-f", f"{path}") for path in compose_files_for_stopping
|
|
255
269
|
),
|
|
@@ -346,12 +360,15 @@ EBEAN_DATASOURCE_HOST=mysql:${DATAHUB_MAPPED_MYSQL_PORT:-3306}
|
|
|
346
360
|
EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:${DATAHUB_MAPPED_MYSQL_PORT:-3306}/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8
|
|
347
361
|
EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver
|
|
348
362
|
ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml
|
|
349
|
-
|
|
363
|
+
GRAPH_SERVICE_IMPL=elasticsearch
|
|
350
364
|
KAFKA_BOOTSTRAP_SERVER=broker:29092
|
|
351
|
-
KAFKA_SCHEMAREGISTRY_URL=http://schema-registry
|
|
365
|
+
KAFKA_SCHEMAREGISTRY_URL=http://datahub-gms:8080/schema-registry/api/
|
|
366
|
+
SCHEMA_REGISTRY_TYPE=INTERNAL
|
|
352
367
|
|
|
353
|
-
ELASTICSEARCH_HOST=
|
|
368
|
+
ELASTICSEARCH_HOST=search
|
|
354
369
|
ELASTICSEARCH_PORT=${DATAHUB_MAPPED_ELASTIC_PORT:-9200}
|
|
370
|
+
ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX=true
|
|
371
|
+
ELASTICSEARCH_PROTOCOL=http
|
|
355
372
|
|
|
356
373
|
#NEO4J_HOST=http://<your-neo-host>:7474
|
|
357
374
|
#NEO4J_URI=bolt://<your-neo-host>
|
|
@@ -385,6 +402,7 @@ DATAHUB_MAE_CONSUMER_PORT=9091
|
|
|
385
402
|
logger.debug(f"Env file contents: {env_fp_reader.read()}")
|
|
386
403
|
|
|
387
404
|
# continue to issue the restore indices command
|
|
405
|
+
# TODO Use --version if passed
|
|
388
406
|
command = (
|
|
389
407
|
"docker pull acryldata/datahub-upgrade:${DATAHUB_VERSION:-head}"
|
|
390
408
|
+ f" && docker run --network datahub_network --env-file {env_fp.name} "
|
|
@@ -412,12 +430,16 @@ DATAHUB_MAE_CONSUMER_PORT=9091
|
|
|
412
430
|
return result.returncode
|
|
413
431
|
|
|
414
432
|
|
|
433
|
+
# TODO: Do we really need this? If someone wants to use a different arg, they can still pass the standard docker env var DOCKER_DEFAULT_PLATFORM
|
|
434
|
+
# We dont really need to select a different image unlike earlier (mysql vs mariadb) since we do publish both archs for all images (or are available for external images).
|
|
415
435
|
def detect_quickstart_arch(arch: Optional[str]) -> Architectures:
|
|
416
|
-
|
|
417
|
-
if
|
|
418
|
-
click.secho("Detected
|
|
436
|
+
running_on_apple_silicon = is_apple_silicon()
|
|
437
|
+
if running_on_apple_silicon:
|
|
438
|
+
click.secho("Detected Apple Silicon", fg="yellow")
|
|
419
439
|
|
|
420
|
-
quickstart_arch =
|
|
440
|
+
quickstart_arch = (
|
|
441
|
+
Architectures.x86 if not running_on_apple_silicon else Architectures.arm64
|
|
442
|
+
)
|
|
421
443
|
if arch:
|
|
422
444
|
matched_arch = [a for a in Architectures if arch.lower() == a.value]
|
|
423
445
|
if not matched_arch:
|
|
@@ -437,13 +459,6 @@ def detect_quickstart_arch(arch: Optional[str]) -> Architectures:
|
|
|
437
459
|
default="default",
|
|
438
460
|
help="Datahub version to be deployed. If not set, deploy using the defaults from the quickstart compose. Use 'stable' to start the latest stable version.",
|
|
439
461
|
)
|
|
440
|
-
@click.option(
|
|
441
|
-
"--build-locally",
|
|
442
|
-
type=bool,
|
|
443
|
-
is_flag=True,
|
|
444
|
-
default=False,
|
|
445
|
-
help="Attempt to build the containers locally before starting",
|
|
446
|
-
)
|
|
447
462
|
@click.option(
|
|
448
463
|
"--pull-images/--no-pull-images",
|
|
449
464
|
type=bool,
|
|
@@ -466,13 +481,6 @@ def detect_quickstart_arch(arch: Optional[str]) -> Architectures:
|
|
|
466
481
|
default=False,
|
|
467
482
|
help="If true, the docker-compose logs will be printed to console if something fails",
|
|
468
483
|
)
|
|
469
|
-
@click.option(
|
|
470
|
-
"--graph-service-impl",
|
|
471
|
-
type=str,
|
|
472
|
-
is_flag=False,
|
|
473
|
-
default=None,
|
|
474
|
-
help="If set, forces docker-compose to use that graph service implementation",
|
|
475
|
-
)
|
|
476
484
|
@click.option(
|
|
477
485
|
"--mysql-port",
|
|
478
486
|
type=_ClickPositiveInt,
|
|
@@ -480,13 +488,6 @@ def detect_quickstart_arch(arch: Optional[str]) -> Architectures:
|
|
|
480
488
|
default=None,
|
|
481
489
|
help="If there is an existing mysql instance running on port 3306, set this to a free port to avoid port conflicts on startup",
|
|
482
490
|
)
|
|
483
|
-
@click.option(
|
|
484
|
-
"--zk-port",
|
|
485
|
-
type=_ClickPositiveInt,
|
|
486
|
-
is_flag=False,
|
|
487
|
-
default=None,
|
|
488
|
-
help="If there is an existing zookeeper instance running on port 2181, set this to a free port to avoid port conflicts on startup",
|
|
489
|
-
)
|
|
490
491
|
@click.option(
|
|
491
492
|
"--kafka-broker-port",
|
|
492
493
|
type=_ClickPositiveInt,
|
|
@@ -494,13 +495,6 @@ def detect_quickstart_arch(arch: Optional[str]) -> Architectures:
|
|
|
494
495
|
default=None,
|
|
495
496
|
help="If there is an existing Kafka broker running on port 9092, set this to a free port to avoid port conflicts on startup",
|
|
496
497
|
)
|
|
497
|
-
@click.option(
|
|
498
|
-
"--schema-registry-port",
|
|
499
|
-
type=_ClickPositiveInt,
|
|
500
|
-
is_flag=False,
|
|
501
|
-
default=None,
|
|
502
|
-
help="If there is an existing process running on port 8081, set this to a free port to avoid port conflicts with Kafka schema registry on startup",
|
|
503
|
-
)
|
|
504
498
|
@click.option(
|
|
505
499
|
"--elastic-port",
|
|
506
500
|
type=_ClickPositiveInt,
|
|
@@ -558,51 +552,29 @@ def detect_quickstart_arch(arch: Optional[str]) -> Architectures:
|
|
|
558
552
|
default=False,
|
|
559
553
|
help="Disables the restoration of indices of a running quickstart instance when used in conjunction with --restore.",
|
|
560
554
|
)
|
|
561
|
-
@click.option(
|
|
562
|
-
"--standalone_consumers",
|
|
563
|
-
required=False,
|
|
564
|
-
is_flag=True,
|
|
565
|
-
default=False,
|
|
566
|
-
help="Launches MAE & MCE consumers as stand alone docker containers",
|
|
567
|
-
)
|
|
568
|
-
@click.option(
|
|
569
|
-
"--kafka-setup",
|
|
570
|
-
required=False,
|
|
571
|
-
is_flag=True,
|
|
572
|
-
default=False,
|
|
573
|
-
help="Launches Kafka setup job as part of the compose deployment",
|
|
574
|
-
)
|
|
575
555
|
@click.option(
|
|
576
556
|
"--arch",
|
|
577
557
|
required=False,
|
|
578
558
|
help="Specify the architecture for the quickstart images to use. Options are x86, arm64, m1 etc.",
|
|
579
559
|
)
|
|
580
|
-
@upgrade.check_upgrade
|
|
581
560
|
@telemetry.with_telemetry(
|
|
582
561
|
capture_kwargs=[
|
|
583
562
|
"version",
|
|
584
|
-
"build_locally",
|
|
585
563
|
"pull_images",
|
|
586
564
|
"stop",
|
|
587
565
|
"backup",
|
|
588
566
|
"restore",
|
|
589
567
|
"restore_indices",
|
|
590
|
-
"standalone_consumers",
|
|
591
|
-
"kafka_setup",
|
|
592
568
|
"arch",
|
|
593
569
|
]
|
|
594
570
|
)
|
|
595
571
|
def quickstart(
|
|
596
572
|
version: Optional[str],
|
|
597
|
-
build_locally: bool,
|
|
598
573
|
pull_images: bool,
|
|
599
574
|
quickstart_compose_file: List[pathlib.Path],
|
|
600
575
|
dump_logs_on_failure: bool,
|
|
601
|
-
graph_service_impl: Optional[str],
|
|
602
576
|
mysql_port: Optional[int],
|
|
603
|
-
zk_port: Optional[int],
|
|
604
577
|
kafka_broker_port: Optional[int],
|
|
605
|
-
schema_registry_port: Optional[int],
|
|
606
578
|
elastic_port: Optional[int],
|
|
607
579
|
stop: bool,
|
|
608
580
|
backup: bool,
|
|
@@ -611,8 +583,6 @@ def quickstart(
|
|
|
611
583
|
restore_file: str,
|
|
612
584
|
restore_indices: bool,
|
|
613
585
|
no_restore_indices: bool,
|
|
614
|
-
standalone_consumers: bool,
|
|
615
|
-
kafka_setup: bool,
|
|
616
586
|
arch: Optional[str],
|
|
617
587
|
) -> None:
|
|
618
588
|
"""Start an instance of DataHub locally using docker-compose.
|
|
@@ -641,8 +611,8 @@ def quickstart(
|
|
|
641
611
|
)
|
|
642
612
|
return
|
|
643
613
|
|
|
644
|
-
quickstart_arch = detect_quickstart_arch(arch)
|
|
645
614
|
quickstart_versioning = QuickstartVersionMappingConfig.fetch_quickstart_config()
|
|
615
|
+
|
|
646
616
|
quickstart_execution_plan = quickstart_versioning.get_quickstart_execution_plan(
|
|
647
617
|
version
|
|
648
618
|
)
|
|
@@ -668,28 +638,26 @@ def quickstart(
|
|
|
668
638
|
download_compose_files(
|
|
669
639
|
quickstart_compose_file_name,
|
|
670
640
|
quickstart_compose_file,
|
|
671
|
-
graph_service_impl,
|
|
672
|
-
kafka_setup,
|
|
673
|
-
quickstart_arch,
|
|
674
|
-
standalone_consumers,
|
|
675
641
|
quickstart_execution_plan.composefile_git_ref,
|
|
676
642
|
)
|
|
677
643
|
|
|
644
|
+
# check if running datahub can be upgraded to the latest version.
|
|
645
|
+
if not _check_upgrade_and_show_instructions(quickstart_compose_file):
|
|
646
|
+
sys.exit(1)
|
|
647
|
+
|
|
678
648
|
# set version
|
|
679
649
|
_set_environment_variables(
|
|
680
650
|
version=quickstart_execution_plan.docker_tag,
|
|
681
|
-
mysql_version=quickstart_execution_plan.mysql_tag,
|
|
682
651
|
mysql_port=mysql_port,
|
|
683
|
-
zk_port=zk_port,
|
|
684
652
|
kafka_broker_port=kafka_broker_port,
|
|
685
|
-
schema_registry_port=schema_registry_port,
|
|
686
653
|
elastic_port=elastic_port,
|
|
687
|
-
kafka_setup=kafka_setup,
|
|
688
654
|
)
|
|
689
655
|
|
|
690
656
|
compose = _docker_compose_v2()
|
|
691
657
|
base_command: List[str] = [
|
|
692
658
|
*compose,
|
|
659
|
+
"--profile",
|
|
660
|
+
"quickstart",
|
|
693
661
|
*itertools.chain.from_iterable(
|
|
694
662
|
("-f", f"{path}") for path in quickstart_compose_file
|
|
695
663
|
),
|
|
@@ -697,6 +665,8 @@ def quickstart(
|
|
|
697
665
|
DOCKER_COMPOSE_PROJECT_NAME,
|
|
698
666
|
]
|
|
699
667
|
|
|
668
|
+
click.echo(f"base_command: {base_command}")
|
|
669
|
+
|
|
700
670
|
# Pull and possibly build the latest containers.
|
|
701
671
|
try:
|
|
702
672
|
if pull_images:
|
|
@@ -737,15 +707,6 @@ def quickstart(
|
|
|
737
707
|
fg="red",
|
|
738
708
|
)
|
|
739
709
|
|
|
740
|
-
if build_locally:
|
|
741
|
-
logger.info("Building docker images locally...")
|
|
742
|
-
subprocess.run(
|
|
743
|
-
base_command + ["build", "--pull", "-q"],
|
|
744
|
-
check=True,
|
|
745
|
-
env=_docker_subprocess_env(),
|
|
746
|
-
)
|
|
747
|
-
logger.info("Finished building docker images!")
|
|
748
|
-
|
|
749
710
|
# Start it up! (with retries)
|
|
750
711
|
click.echo("\nStarting up DataHub...")
|
|
751
712
|
start_time = datetime.datetime.now()
|
|
@@ -836,36 +797,17 @@ def get_docker_compose_base_url(version_tag: str) -> str:
|
|
|
836
797
|
return f"https://raw.githubusercontent.com/datahub-project/datahub/{version_tag}"
|
|
837
798
|
|
|
838
799
|
|
|
839
|
-
def get_github_file_url(
|
|
800
|
+
def get_github_file_url(release_version_tag: str) -> str:
|
|
840
801
|
base_url = get_docker_compose_base_url(release_version_tag)
|
|
841
|
-
|
|
842
|
-
github_file = (
|
|
843
|
-
f"{base_url}/{NEO4J_AND_ELASTIC_QUICKSTART_COMPOSE_FILE}"
|
|
844
|
-
if not is_m1
|
|
845
|
-
else f"{base_url}/{NEO4J_AND_ELASTIC_M1_QUICKSTART_COMPOSE_FILE}"
|
|
846
|
-
)
|
|
847
|
-
else:
|
|
848
|
-
github_file = (
|
|
849
|
-
f"{base_url}/{ELASTIC_QUICKSTART_COMPOSE_FILE}"
|
|
850
|
-
if not is_m1
|
|
851
|
-
else f"{base_url}/{ELASTIC_M1_QUICKSTART_COMPOSE_FILE}"
|
|
852
|
-
)
|
|
802
|
+
github_file = f"{base_url}/{QUICKSTART_COMPOSE_FILE}"
|
|
853
803
|
return github_file
|
|
854
804
|
|
|
855
805
|
|
|
856
806
|
def download_compose_files(
|
|
857
|
-
quickstart_compose_file_name,
|
|
858
|
-
quickstart_compose_file_list,
|
|
859
|
-
graph_service_impl,
|
|
860
|
-
kafka_setup,
|
|
861
|
-
quickstart_arch,
|
|
862
|
-
standalone_consumers,
|
|
863
|
-
compose_git_ref,
|
|
807
|
+
quickstart_compose_file_name, quickstart_compose_file_list, compose_git_ref
|
|
864
808
|
):
|
|
865
809
|
# download appropriate quickstart file
|
|
866
|
-
|
|
867
|
-
is_m1 = is_arch_m1(quickstart_arch)
|
|
868
|
-
github_file = get_github_file_url(should_use_neo4j, is_m1, compose_git_ref)
|
|
810
|
+
github_file = get_github_file_url(compose_git_ref)
|
|
869
811
|
# also allow local files
|
|
870
812
|
request_session = requests.Session()
|
|
871
813
|
request_session.mount("file://", FileAdapter())
|
|
@@ -879,57 +821,14 @@ def download_compose_files(
|
|
|
879
821
|
logger.info(f"Fetching docker-compose file {github_file} from GitHub")
|
|
880
822
|
# Download the quickstart docker-compose file from GitHub.
|
|
881
823
|
quickstart_download_response = request_session.get(github_file)
|
|
824
|
+
if quickstart_download_response.status_code == 404:
|
|
825
|
+
raise click.ClickException(
|
|
826
|
+
f"Could not find quickstart compose file for version {compose_git_ref}. "
|
|
827
|
+
"Please try a different version or check the version exists at https://github.com/datahub-project/datahub/releases"
|
|
828
|
+
)
|
|
882
829
|
quickstart_download_response.raise_for_status()
|
|
883
830
|
tmp_file.write(quickstart_download_response.content)
|
|
884
831
|
logger.debug(f"Copied to {path}")
|
|
885
|
-
if standalone_consumers:
|
|
886
|
-
base_url = get_docker_compose_base_url(compose_git_ref)
|
|
887
|
-
consumer_github_file = (
|
|
888
|
-
f"{base_url}/{CONSUMERS_QUICKSTART_COMPOSE_FILE}"
|
|
889
|
-
if should_use_neo4j
|
|
890
|
-
else f"{base_url}/{ELASTIC_CONSUMERS_QUICKSTART_COMPOSE_FILE}"
|
|
891
|
-
)
|
|
892
|
-
|
|
893
|
-
default_consumer_compose_file = (
|
|
894
|
-
Path(DATAHUB_ROOT_FOLDER) / "quickstart/docker-compose.consumers.yml"
|
|
895
|
-
)
|
|
896
|
-
with (
|
|
897
|
-
open(default_consumer_compose_file, "wb")
|
|
898
|
-
if default_consumer_compose_file
|
|
899
|
-
else tempfile.NamedTemporaryFile(suffix=".yml", delete=False)
|
|
900
|
-
) as tmp_file:
|
|
901
|
-
path = pathlib.Path(tmp_file.name)
|
|
902
|
-
quickstart_compose_file_list.append(path)
|
|
903
|
-
click.echo(
|
|
904
|
-
f"Fetching consumer docker-compose file {consumer_github_file} from GitHub"
|
|
905
|
-
)
|
|
906
|
-
# Download the quickstart docker-compose file from GitHub.
|
|
907
|
-
quickstart_download_response = request_session.get(consumer_github_file)
|
|
908
|
-
quickstart_download_response.raise_for_status()
|
|
909
|
-
tmp_file.write(quickstart_download_response.content)
|
|
910
|
-
logger.debug(f"Copied to {path}")
|
|
911
|
-
if kafka_setup:
|
|
912
|
-
base_url = get_docker_compose_base_url(compose_git_ref)
|
|
913
|
-
kafka_setup_github_file = f"{base_url}/{KAFKA_SETUP_QUICKSTART_COMPOSE_FILE}"
|
|
914
|
-
|
|
915
|
-
default_kafka_compose_file = (
|
|
916
|
-
Path(DATAHUB_ROOT_FOLDER) / "quickstart/docker-compose.kafka-setup.yml"
|
|
917
|
-
)
|
|
918
|
-
with (
|
|
919
|
-
open(default_kafka_compose_file, "wb")
|
|
920
|
-
if default_kafka_compose_file
|
|
921
|
-
else tempfile.NamedTemporaryFile(suffix=".yml", delete=False)
|
|
922
|
-
) as tmp_file:
|
|
923
|
-
path = pathlib.Path(tmp_file.name)
|
|
924
|
-
quickstart_compose_file_list.append(path)
|
|
925
|
-
click.echo(
|
|
926
|
-
f"Fetching consumer docker-compose file {kafka_setup_github_file} from GitHub"
|
|
927
|
-
)
|
|
928
|
-
# Download the quickstart docker-compose file from GitHub.
|
|
929
|
-
quickstart_download_response = request_session.get(kafka_setup_github_file)
|
|
930
|
-
quickstart_download_response.raise_for_status()
|
|
931
|
-
tmp_file.write(quickstart_download_response.content)
|
|
932
|
-
logger.debug(f"Copied to {path}")
|
|
933
832
|
|
|
934
833
|
|
|
935
834
|
def valid_restore_options(
|
|
@@ -963,7 +862,6 @@ def valid_restore_options(
|
|
|
963
862
|
default=None,
|
|
964
863
|
help="The token to be used when ingesting, used when datahub is deployed with METADATA_SERVICE_AUTH_ENABLED=true",
|
|
965
864
|
)
|
|
966
|
-
@telemetry.with_telemetry()
|
|
967
865
|
def ingest_sample_data(token: Optional[str]) -> None:
|
|
968
866
|
"""Ingest sample data into a running DataHub instance."""
|
|
969
867
|
|
|
@@ -1031,3 +929,25 @@ def nuke(keep_data: bool) -> None:
|
|
|
1031
929
|
click.echo(f"Removing networks in the {DOCKER_COMPOSE_PROJECT_NAME} project")
|
|
1032
930
|
for network in client.networks.list(filters=DATAHUB_COMPOSE_PROJECT_FILTER):
|
|
1033
931
|
network.remove()
|
|
932
|
+
|
|
933
|
+
|
|
934
|
+
def _check_upgrade_and_show_instructions(
|
|
935
|
+
quickstart_compose_file: List[pathlib.Path],
|
|
936
|
+
) -> bool:
|
|
937
|
+
"""Check if running datahub can be upgraded to the latest version and show appropriate instructions.
|
|
938
|
+
|
|
939
|
+
Args:
|
|
940
|
+
quickstart_compose_file: List of compose file paths
|
|
941
|
+
|
|
942
|
+
Returns:
|
|
943
|
+
bool: True if upgrade is supported, False otherwise
|
|
944
|
+
"""
|
|
945
|
+
quickstart_status = check_docker_quickstart()
|
|
946
|
+
|
|
947
|
+
if not check_upgrade_supported(quickstart_compose_file, quickstart_status):
|
|
948
|
+
if quickstart_status.is_ok():
|
|
949
|
+
show_migration_instructions()
|
|
950
|
+
else:
|
|
951
|
+
show_repair_instructions()
|
|
952
|
+
return False
|
|
953
|
+
return True
|
datahub/cli/exists_cli.py
CHANGED
|
@@ -7,8 +7,6 @@ from click_default_group import DefaultGroup
|
|
|
7
7
|
|
|
8
8
|
from datahub.ingestion.graph.client import get_default_graph
|
|
9
9
|
from datahub.ingestion.graph.config import ClientMode
|
|
10
|
-
from datahub.telemetry import telemetry
|
|
11
|
-
from datahub.upgrade import upgrade
|
|
12
10
|
|
|
13
11
|
logger = logging.getLogger(__name__)
|
|
14
12
|
|
|
@@ -22,8 +20,6 @@ def exists() -> None:
|
|
|
22
20
|
@exists.command()
|
|
23
21
|
@click.option("--urn", required=False, type=str)
|
|
24
22
|
@click.pass_context
|
|
25
|
-
@upgrade.check_upgrade
|
|
26
|
-
@telemetry.with_telemetry()
|
|
27
23
|
def urn(ctx: Any, urn: Optional[str]) -> None:
|
|
28
24
|
"""
|
|
29
25
|
Get metadata for an entity with an optional list of aspects to project.
|
datahub/cli/get_cli.py
CHANGED
|
@@ -8,8 +8,6 @@ from click_default_group import DefaultGroup
|
|
|
8
8
|
from datahub.cli.cli_utils import get_aspects_for_entity
|
|
9
9
|
from datahub.ingestion.graph.client import get_default_graph
|
|
10
10
|
from datahub.ingestion.graph.config import ClientMode
|
|
11
|
-
from datahub.telemetry import telemetry
|
|
12
|
-
from datahub.upgrade import upgrade
|
|
13
11
|
|
|
14
12
|
logger = logging.getLogger(__name__)
|
|
15
13
|
|
|
@@ -31,8 +29,6 @@ def get() -> None:
|
|
|
31
29
|
help="Whether to print details from database which help in audit.",
|
|
32
30
|
)
|
|
33
31
|
@click.pass_context
|
|
34
|
-
@upgrade.check_upgrade
|
|
35
|
-
@telemetry.with_telemetry()
|
|
36
32
|
def urn(ctx: Any, urn: Optional[str], aspect: List[str], details: bool) -> None:
|
|
37
33
|
"""
|
|
38
34
|
Get metadata for an entity with an optional list of aspects to project.
|