acryl-datahub 0.15.0.5rc7__py3-none-any.whl → 0.15.0.5rc9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0.5rc7.dist-info → acryl_datahub-0.15.0.5rc9.dist-info}/METADATA +2493 -2463
- {acryl_datahub-0.15.0.5rc7.dist-info → acryl_datahub-0.15.0.5rc9.dist-info}/RECORD +38 -35
- datahub/_version.py +1 -1
- datahub/cli/iceberg_cli.py +707 -0
- datahub/entrypoints.py +21 -0
- datahub/ingestion/api/incremental_lineage_helper.py +4 -0
- datahub/ingestion/glossary/classification_mixin.py +6 -0
- datahub/ingestion/glossary/classifier.py +3 -2
- datahub/ingestion/source/aws/glue.py +3 -2
- datahub/ingestion/source/identity/azure_ad.py +6 -14
- datahub/ingestion/source/mode.py +2 -4
- datahub/ingestion/source/snowflake/snowflake_config.py +13 -0
- datahub/ingestion/source/snowflake/snowflake_query.py +11 -0
- datahub/ingestion/source/snowflake/snowflake_report.py +1 -0
- datahub/ingestion/source/snowflake/snowflake_schema.py +17 -0
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +18 -36
- datahub/ingestion/source/snowflake/snowflake_tag.py +57 -3
- datahub/ingestion/source/snowflake/snowflake_v2.py +1 -0
- datahub/ingestion/source/sql/mssql/job_models.py +37 -8
- datahub/ingestion/source/sql/mssql/source.py +17 -0
- datahub/ingestion/source/tableau/tableau.py +14 -12
- datahub/ingestion/source/tableau/tableau_common.py +1 -1
- datahub/metadata/_schema_classes.py +160 -2
- datahub/metadata/com/linkedin/pegasus2avro/dataplatforminstance/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/dataset/__init__.py +2 -0
- datahub/metadata/schema.avsc +96 -7
- datahub/metadata/schemas/DashboardInfo.avsc +5 -5
- datahub/metadata/schemas/DataPlatformInstanceKey.avsc +2 -1
- datahub/metadata/schemas/DatasetKey.avsc +2 -1
- datahub/metadata/schemas/IcebergCatalogInfo.avsc +28 -0
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +92 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +5 -5
- datahub/specific/dashboard.py +43 -1
- datahub/upgrade/upgrade.py +13 -5
- {acryl_datahub-0.15.0.5rc7.dist-info → acryl_datahub-0.15.0.5rc9.dist-info}/LICENSE +0 -0
- {acryl_datahub-0.15.0.5rc7.dist-info → acryl_datahub-0.15.0.5rc9.dist-info}/WHEEL +0 -0
- {acryl_datahub-0.15.0.5rc7.dist-info → acryl_datahub-0.15.0.5rc9.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-0.15.0.5rc7.dist-info → acryl_datahub-0.15.0.5rc9.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,707 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
import sys
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
|
|
6
|
+
from urllib.parse import urlparse
|
|
7
|
+
|
|
8
|
+
import boto3
|
|
9
|
+
import botocore
|
|
10
|
+
import click
|
|
11
|
+
|
|
12
|
+
import datahub.metadata.schema_classes
|
|
13
|
+
from datahub.cli.cli_utils import post_entity
|
|
14
|
+
from datahub.configuration.common import GraphError
|
|
15
|
+
from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
|
|
16
|
+
from datahub.metadata.schema_classes import SystemMetadataClass
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
DEFAULT_CREDS_EXPIRY_DURATION_SECONDS = 60 * 60
|
|
21
|
+
DEFAULT_FABRIC_TYPE = datahub.metadata.schema_classes.FabricTypeClass.PROD
|
|
22
|
+
|
|
23
|
+
DATA_PLATFORM_INSTANCE_WAREHOUSE_ASPECT = "icebergWarehouseInfo"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@click.group()
|
|
27
|
+
def iceberg() -> None:
|
|
28
|
+
"""A group of commands to manage Iceberg warehouses using DataHub as the Iceberg Catalog."""
|
|
29
|
+
pass
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def validate_creds(client_id: str, client_secret: str, region: str) -> Any:
|
|
33
|
+
try:
|
|
34
|
+
# Create a boto3 client with the provided credentials
|
|
35
|
+
# Using STS (Security Token Service) for validation
|
|
36
|
+
sts_client = boto3.client(
|
|
37
|
+
"sts",
|
|
38
|
+
aws_access_key_id=client_id,
|
|
39
|
+
aws_secret_access_key=client_secret,
|
|
40
|
+
region_name=region,
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
# Try to get caller identity
|
|
44
|
+
sts_client.get_caller_identity()
|
|
45
|
+
|
|
46
|
+
# If successful, return True and the account info
|
|
47
|
+
return sts_client
|
|
48
|
+
|
|
49
|
+
except (
|
|
50
|
+
botocore.exceptions.ClientError,
|
|
51
|
+
botocore.exceptions.NoCredentialsError,
|
|
52
|
+
):
|
|
53
|
+
# If credentials are invalid, return False with error message
|
|
54
|
+
click.secho(
|
|
55
|
+
"Invalid credentials",
|
|
56
|
+
fg="red",
|
|
57
|
+
err=True,
|
|
58
|
+
)
|
|
59
|
+
sys.exit(1)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def validate_role(role: str, sts_client: Any, duration_seconds: Optional[int]) -> None:
|
|
63
|
+
try:
|
|
64
|
+
session_name = (
|
|
65
|
+
f"datahub-cli-iceberg-validation-{datetime.now().strftime('%Y%m%d%H%M%S')}"
|
|
66
|
+
)
|
|
67
|
+
# Assume the IAM role to ensure the settings we have are valid and if not, can report them at config time.
|
|
68
|
+
|
|
69
|
+
# If duration_seconds is not specified, datahub will attempt to default to an internal default
|
|
70
|
+
# defined in S3CredentialProvider.java DEFAULT_CREDS_DURATION_SECS. However, it is not possible to know for sure
|
|
71
|
+
# if that value is permitted based on how the role is configured. So, during the configuration of the warehouse
|
|
72
|
+
# we must attempt to use the intended expiration duration (default or explicitly supplied) to ensure it
|
|
73
|
+
# actually does work.
|
|
74
|
+
if duration_seconds is None:
|
|
75
|
+
duration_seconds = DEFAULT_CREDS_EXPIRY_DURATION_SECONDS
|
|
76
|
+
|
|
77
|
+
assumed_role = sts_client.assume_role(
|
|
78
|
+
RoleArn=role,
|
|
79
|
+
RoleSessionName=session_name,
|
|
80
|
+
DurationSeconds=duration_seconds,
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
# Extract the temporary credentials
|
|
84
|
+
credentials = assumed_role["Credentials"]
|
|
85
|
+
return credentials
|
|
86
|
+
|
|
87
|
+
except Exception as e:
|
|
88
|
+
click.secho(
|
|
89
|
+
f"Failed to assume role using '{role}' with error: {e}",
|
|
90
|
+
fg="red",
|
|
91
|
+
err=True,
|
|
92
|
+
)
|
|
93
|
+
sys.exit(1)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def validate_warehouse(data_root: str) -> None:
|
|
97
|
+
# validate data_root location
|
|
98
|
+
scheme = urlparse(data_root).scheme
|
|
99
|
+
if scheme != "s3":
|
|
100
|
+
click.secho(
|
|
101
|
+
f"Unsupported warehouse location '{data_root}', supported schemes: s3",
|
|
102
|
+
fg="red",
|
|
103
|
+
err=True,
|
|
104
|
+
)
|
|
105
|
+
sys.exit(1)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
@iceberg.command()
|
|
109
|
+
@click.option(
|
|
110
|
+
"-w", "--warehouse", required=True, type=str, help="The name of the warehouse"
|
|
111
|
+
)
|
|
112
|
+
@click.option(
|
|
113
|
+
"-p", "--description", required=False, type=str, help="Description of the warehouse"
|
|
114
|
+
)
|
|
115
|
+
@click.option(
|
|
116
|
+
"-d",
|
|
117
|
+
"--data_root",
|
|
118
|
+
required=True,
|
|
119
|
+
type=str,
|
|
120
|
+
help="The path to the data root for the warehouse data",
|
|
121
|
+
)
|
|
122
|
+
@click.option(
|
|
123
|
+
"-i",
|
|
124
|
+
"--client_id",
|
|
125
|
+
required=True,
|
|
126
|
+
type=str,
|
|
127
|
+
help="Client ID to authenticate with the storage provider of the data root",
|
|
128
|
+
)
|
|
129
|
+
@click.option(
|
|
130
|
+
"-s",
|
|
131
|
+
"--client_secret",
|
|
132
|
+
required=True,
|
|
133
|
+
type=str,
|
|
134
|
+
help="Client Secret to authenticate with the storage provider of the data root",
|
|
135
|
+
)
|
|
136
|
+
@click.option(
|
|
137
|
+
"-g",
|
|
138
|
+
"--region",
|
|
139
|
+
required=True,
|
|
140
|
+
type=str,
|
|
141
|
+
help="Storage provider specific region where the warehouse data root is located",
|
|
142
|
+
)
|
|
143
|
+
@click.option(
|
|
144
|
+
"-r",
|
|
145
|
+
"--role",
|
|
146
|
+
required=True,
|
|
147
|
+
type=str,
|
|
148
|
+
help="Storage provider specific role to be used when vending credentials",
|
|
149
|
+
)
|
|
150
|
+
@click.option(
|
|
151
|
+
"-e",
|
|
152
|
+
"--env",
|
|
153
|
+
required=False,
|
|
154
|
+
type=str,
|
|
155
|
+
help=f"Environment where all assets stored in this warehouse belong to. Defaults to {DEFAULT_FABRIC_TYPE} if unspecified",
|
|
156
|
+
)
|
|
157
|
+
@click.option(
|
|
158
|
+
"-x",
|
|
159
|
+
"--duration_seconds",
|
|
160
|
+
required=False,
|
|
161
|
+
type=int,
|
|
162
|
+
help=f"Expiration duration for temporary credentials used for role. Defaults to {DEFAULT_CREDS_EXPIRY_DURATION_SECONDS} seconds if unspecified",
|
|
163
|
+
)
|
|
164
|
+
def create(
|
|
165
|
+
warehouse: str,
|
|
166
|
+
description: Optional[str],
|
|
167
|
+
data_root: str,
|
|
168
|
+
client_id: str,
|
|
169
|
+
client_secret: str,
|
|
170
|
+
region: str,
|
|
171
|
+
role: str,
|
|
172
|
+
duration_seconds: Optional[int],
|
|
173
|
+
env: Optional[str],
|
|
174
|
+
) -> None:
|
|
175
|
+
"""
|
|
176
|
+
Create an iceberg warehouse.
|
|
177
|
+
"""
|
|
178
|
+
|
|
179
|
+
client = get_default_graph()
|
|
180
|
+
|
|
181
|
+
urn = iceberg_data_platform_instance_urn(warehouse)
|
|
182
|
+
|
|
183
|
+
if client.exists(urn):
|
|
184
|
+
click.secho(
|
|
185
|
+
f"Warehouse with name {warehouse} already exists",
|
|
186
|
+
fg="red",
|
|
187
|
+
err=True,
|
|
188
|
+
)
|
|
189
|
+
sys.exit(1)
|
|
190
|
+
|
|
191
|
+
# will throw an actionable error message if invalid.
|
|
192
|
+
validate_warehouse(data_root)
|
|
193
|
+
storage_client = validate_creds(client_id, client_secret, region)
|
|
194
|
+
validate_role(role, storage_client, duration_seconds)
|
|
195
|
+
|
|
196
|
+
client_id_urn, client_secret_urn = create_iceberg_secrets(
|
|
197
|
+
client, warehouse, client_id, client_secret
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
if env is None:
|
|
201
|
+
env = DEFAULT_FABRIC_TYPE
|
|
202
|
+
|
|
203
|
+
warehouse_aspect = DATA_PLATFORM_INSTANCE_WAREHOUSE_ASPECT
|
|
204
|
+
warehouse_aspect_obj: Dict[str, Any] = {
|
|
205
|
+
"dataRoot": data_root,
|
|
206
|
+
"clientId": client_id_urn,
|
|
207
|
+
"clientSecret": client_secret_urn,
|
|
208
|
+
"region": region,
|
|
209
|
+
"role": role,
|
|
210
|
+
"env": env,
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
if duration_seconds:
|
|
214
|
+
warehouse_aspect_obj["tempCredentialExpirationSeconds"] = duration_seconds
|
|
215
|
+
|
|
216
|
+
data_platform_instance_properties_aspect_obj = {
|
|
217
|
+
"name": warehouse,
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
if description:
|
|
221
|
+
data_platform_instance_properties_aspect_obj["description"] = description
|
|
222
|
+
|
|
223
|
+
data_platform_instance_properties_aspect = "dataPlatformInstanceProperties"
|
|
224
|
+
|
|
225
|
+
entity_type = "dataPlatformInstance"
|
|
226
|
+
system_metadata: Union[None, SystemMetadataClass] = None
|
|
227
|
+
|
|
228
|
+
post_entity(
|
|
229
|
+
client._session,
|
|
230
|
+
client.config.server,
|
|
231
|
+
urn=urn,
|
|
232
|
+
aspect_name=data_platform_instance_properties_aspect,
|
|
233
|
+
entity_type=entity_type,
|
|
234
|
+
aspect_value=data_platform_instance_properties_aspect_obj,
|
|
235
|
+
system_metadata=system_metadata,
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
# If status is non 200, post_entity will raise an exception.
|
|
239
|
+
|
|
240
|
+
post_entity(
|
|
241
|
+
client._session,
|
|
242
|
+
client.config.server,
|
|
243
|
+
urn=urn,
|
|
244
|
+
aspect_name=warehouse_aspect,
|
|
245
|
+
entity_type=entity_type,
|
|
246
|
+
aspect_value=warehouse_aspect_obj,
|
|
247
|
+
system_metadata=system_metadata,
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
click.secho(
|
|
251
|
+
f"✅ Created warehouse with urn {urn}, clientID: {client_id_urn}, and clientSecret: {client_secret_urn}",
|
|
252
|
+
fg="green",
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
@iceberg.command()
|
|
257
|
+
@click.option(
|
|
258
|
+
"-w", "--warehouse", required=True, type=str, help="The name of the warehouse"
|
|
259
|
+
)
|
|
260
|
+
@click.option(
|
|
261
|
+
"-p",
|
|
262
|
+
"--description",
|
|
263
|
+
required=False,
|
|
264
|
+
type=str,
|
|
265
|
+
help="Description of the warehouse",
|
|
266
|
+
)
|
|
267
|
+
@click.option(
|
|
268
|
+
"-d",
|
|
269
|
+
"--data_root",
|
|
270
|
+
required=True,
|
|
271
|
+
type=str,
|
|
272
|
+
help="The path to the data root for the warehouse data",
|
|
273
|
+
)
|
|
274
|
+
@click.option(
|
|
275
|
+
"-i",
|
|
276
|
+
"--client_id",
|
|
277
|
+
required=True,
|
|
278
|
+
type=str,
|
|
279
|
+
help="Client ID to authenticate with the storage provider of the data root",
|
|
280
|
+
)
|
|
281
|
+
@click.option(
|
|
282
|
+
"-s",
|
|
283
|
+
"--client_secret",
|
|
284
|
+
required=True,
|
|
285
|
+
type=str,
|
|
286
|
+
help="Client Secret to authenticate with the storage provider of the data root",
|
|
287
|
+
)
|
|
288
|
+
@click.option(
|
|
289
|
+
"-g",
|
|
290
|
+
"--region",
|
|
291
|
+
required=True,
|
|
292
|
+
type=str,
|
|
293
|
+
help="Storage provider specific region where the warehouse data root is located",
|
|
294
|
+
)
|
|
295
|
+
@click.option(
|
|
296
|
+
"-r",
|
|
297
|
+
"--role",
|
|
298
|
+
required=True,
|
|
299
|
+
type=str,
|
|
300
|
+
help="Storage provider specific role to be used when vending credentials",
|
|
301
|
+
)
|
|
302
|
+
@click.option(
|
|
303
|
+
"-e",
|
|
304
|
+
"--env",
|
|
305
|
+
required=False,
|
|
306
|
+
type=str,
|
|
307
|
+
help=f"Environment where all assets stored in this warehouse belong to. Defaults to {DEFAULT_FABRIC_TYPE} if unspecified",
|
|
308
|
+
)
|
|
309
|
+
@click.option(
|
|
310
|
+
"-x",
|
|
311
|
+
"--duration_seconds",
|
|
312
|
+
required=False,
|
|
313
|
+
type=int,
|
|
314
|
+
help=f"Expiration duration for temporary credentials used for role. Defaults to {DEFAULT_CREDS_EXPIRY_DURATION_SECONDS} seconds if unspecified",
|
|
315
|
+
)
|
|
316
|
+
def update(
|
|
317
|
+
warehouse: str,
|
|
318
|
+
data_root: str,
|
|
319
|
+
description: Optional[str],
|
|
320
|
+
client_id: str,
|
|
321
|
+
client_secret: str,
|
|
322
|
+
region: str,
|
|
323
|
+
role: str,
|
|
324
|
+
env: Optional[str],
|
|
325
|
+
duration_seconds: Optional[int],
|
|
326
|
+
) -> None:
|
|
327
|
+
"""
|
|
328
|
+
Update iceberg warehouses. Can only update credentials, and role. Cannot update region
|
|
329
|
+
"""
|
|
330
|
+
|
|
331
|
+
client = get_default_graph()
|
|
332
|
+
|
|
333
|
+
urn = iceberg_data_platform_instance_urn(warehouse)
|
|
334
|
+
|
|
335
|
+
if not client.exists(urn):
|
|
336
|
+
raise click.ClickException(f"Warehouse with name {warehouse} does not exist")
|
|
337
|
+
|
|
338
|
+
validate_warehouse(data_root)
|
|
339
|
+
storage_client = validate_creds(client_id, client_secret, region)
|
|
340
|
+
validate_role(role, storage_client, duration_seconds)
|
|
341
|
+
|
|
342
|
+
client_id_urn, client_secret_urn = update_iceberg_secrets(
|
|
343
|
+
client, warehouse, client_id, client_secret
|
|
344
|
+
)
|
|
345
|
+
|
|
346
|
+
if env is None:
|
|
347
|
+
env = DEFAULT_FABRIC_TYPE
|
|
348
|
+
|
|
349
|
+
warehouse_aspect = DATA_PLATFORM_INSTANCE_WAREHOUSE_ASPECT
|
|
350
|
+
warehouse_aspect_obj: Dict[str, Any] = {
|
|
351
|
+
"dataRoot": data_root,
|
|
352
|
+
"clientId": client_id_urn,
|
|
353
|
+
"clientSecret": client_secret_urn,
|
|
354
|
+
"region": region,
|
|
355
|
+
"role": role,
|
|
356
|
+
"env": env,
|
|
357
|
+
}
|
|
358
|
+
if duration_seconds:
|
|
359
|
+
warehouse_aspect_obj["tempCredentialExpirationSeconds"] = duration_seconds
|
|
360
|
+
|
|
361
|
+
data_platform_instance_properties_aspect_obj = {
|
|
362
|
+
"name": warehouse,
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
if description:
|
|
366
|
+
data_platform_instance_properties_aspect_obj["description"] = description
|
|
367
|
+
|
|
368
|
+
data_platform_instance_properties_aspect = "dataPlatformInstanceProperties"
|
|
369
|
+
|
|
370
|
+
entity_type = "dataPlatformInstance"
|
|
371
|
+
system_metadata: Union[None, SystemMetadataClass] = None
|
|
372
|
+
|
|
373
|
+
post_entity(
|
|
374
|
+
client._session,
|
|
375
|
+
client.config.server,
|
|
376
|
+
urn=urn,
|
|
377
|
+
aspect_name=data_platform_instance_properties_aspect,
|
|
378
|
+
entity_type=entity_type,
|
|
379
|
+
aspect_value=data_platform_instance_properties_aspect_obj,
|
|
380
|
+
system_metadata=system_metadata,
|
|
381
|
+
)
|
|
382
|
+
|
|
383
|
+
# If status is non 200, post_entity will raise an exception.
|
|
384
|
+
post_entity(
|
|
385
|
+
client._session,
|
|
386
|
+
client.config.server,
|
|
387
|
+
urn=urn,
|
|
388
|
+
aspect_name=warehouse_aspect,
|
|
389
|
+
entity_type=entity_type,
|
|
390
|
+
aspect_value=warehouse_aspect_obj,
|
|
391
|
+
system_metadata=system_metadata,
|
|
392
|
+
)
|
|
393
|
+
|
|
394
|
+
click.secho(
|
|
395
|
+
f"✅ Updated warehouse with urn {urn}, clientID: {client_id_urn}, and clientSecret: {client_secret_urn}",
|
|
396
|
+
fg="green",
|
|
397
|
+
)
|
|
398
|
+
|
|
399
|
+
|
|
400
|
+
@iceberg.command()
|
|
401
|
+
def list() -> None:
|
|
402
|
+
"""
|
|
403
|
+
List iceberg warehouses
|
|
404
|
+
"""
|
|
405
|
+
|
|
406
|
+
client = get_default_graph()
|
|
407
|
+
|
|
408
|
+
for warehouse in get_all_warehouses(client):
|
|
409
|
+
click.echo(warehouse)
|
|
410
|
+
|
|
411
|
+
|
|
412
|
+
@iceberg.command()
|
|
413
|
+
@click.option(
|
|
414
|
+
"-w", "--warehouse", required=True, type=str, help="The name of the warehouse"
|
|
415
|
+
)
|
|
416
|
+
def get(warehouse: str) -> None:
|
|
417
|
+
"""Fetches the details of the specified iceberg warehouse"""
|
|
418
|
+
client = get_default_graph()
|
|
419
|
+
urn = iceberg_data_platform_instance_urn(warehouse)
|
|
420
|
+
|
|
421
|
+
if client.exists(urn):
|
|
422
|
+
warehouse_aspect = client.get_aspect(
|
|
423
|
+
entity_urn=urn,
|
|
424
|
+
aspect_type=datahub.metadata.schema_classes.IcebergWarehouseInfoClass,
|
|
425
|
+
)
|
|
426
|
+
click.echo(urn)
|
|
427
|
+
if warehouse_aspect:
|
|
428
|
+
click.echo(json.dumps(warehouse_aspect.to_obj(), sort_keys=True, indent=2))
|
|
429
|
+
else:
|
|
430
|
+
raise click.ClickException(f"Iceberg warehouse {warehouse} does not exist")
|
|
431
|
+
|
|
432
|
+
|
|
433
|
+
@iceberg.command()
|
|
434
|
+
@click.option(
|
|
435
|
+
"-w", "--warehouse", required=True, type=str, help="The name of the warehouse"
|
|
436
|
+
)
|
|
437
|
+
@click.option("-n", "--dry-run", required=False, is_flag=True)
|
|
438
|
+
@click.option(
|
|
439
|
+
"-f",
|
|
440
|
+
"--force",
|
|
441
|
+
required=False,
|
|
442
|
+
is_flag=True,
|
|
443
|
+
help="force the delete if set without confirmation",
|
|
444
|
+
)
|
|
445
|
+
def delete(warehouse: str, dry_run: bool, force: bool) -> None:
|
|
446
|
+
"""
|
|
447
|
+
Delete warehouse
|
|
448
|
+
"""
|
|
449
|
+
|
|
450
|
+
urn = iceberg_data_platform_instance_urn(warehouse)
|
|
451
|
+
|
|
452
|
+
client = get_default_graph()
|
|
453
|
+
|
|
454
|
+
if not client.exists(urn):
|
|
455
|
+
raise click.ClickException(f"urn {urn} not found")
|
|
456
|
+
|
|
457
|
+
# Confirm this is a managed warehouse by checking for presence of IcebergWarehouse aspect
|
|
458
|
+
aspect = client.get_aspect(
|
|
459
|
+
entity_urn=urn,
|
|
460
|
+
aspect_type=datahub.metadata.schema_classes.IcebergWarehouseInfoClass,
|
|
461
|
+
)
|
|
462
|
+
if aspect:
|
|
463
|
+
warehouse_aspect: datahub.metadata.schema_classes.IcebergWarehouseInfoClass = (
|
|
464
|
+
aspect
|
|
465
|
+
)
|
|
466
|
+
|
|
467
|
+
urns_to_delete: List = []
|
|
468
|
+
resource_names_to_be_deleted: List = []
|
|
469
|
+
for entity in get_related_entities_for_platform_instance(client, urn):
|
|
470
|
+
# Do we really need this double-check?
|
|
471
|
+
if "__typename" in entity and "urn" in entity:
|
|
472
|
+
if entity["__typename"] in ["Container", "Dataset"]:
|
|
473
|
+
urns_to_delete.append(entity["urn"])
|
|
474
|
+
resource_names_to_be_deleted.append(
|
|
475
|
+
entity.get("name", entity.get("urn"))
|
|
476
|
+
)
|
|
477
|
+
# TODO: PlatformResource associated with datasets need to be deleted.
|
|
478
|
+
|
|
479
|
+
if dry_run:
|
|
480
|
+
click.echo(
|
|
481
|
+
f"[Dry-run] Would delete warehouse {urn} and the following datasets and namespaces"
|
|
482
|
+
)
|
|
483
|
+
for resource in resource_names_to_be_deleted:
|
|
484
|
+
click.echo(f" {resource}")
|
|
485
|
+
else:
|
|
486
|
+
if not force:
|
|
487
|
+
click.confirm(
|
|
488
|
+
f"This will delete {warehouse} warehouse, credentials, and {len(urns_to_delete)} datasets and namespaces from DataHub. Do you want to continue?",
|
|
489
|
+
abort=True,
|
|
490
|
+
)
|
|
491
|
+
client.hard_delete_entity(urn)
|
|
492
|
+
client.hard_delete_entity(warehouse_aspect.clientId)
|
|
493
|
+
client.hard_delete_entity(warehouse_aspect.clientSecret)
|
|
494
|
+
|
|
495
|
+
for urn_to_delete in urns_to_delete:
|
|
496
|
+
client.hard_delete_entity(urn_to_delete)
|
|
497
|
+
|
|
498
|
+
click.echo(
|
|
499
|
+
f"✅ Successfully deleted iceberg warehouse {warehouse} and associated credentials, {len(urns_to_delete)} datasets and namespaces"
|
|
500
|
+
)
|
|
501
|
+
|
|
502
|
+
|
|
503
|
+
def iceberg_data_platform_instance_urn(warehouse: str) -> str:
|
|
504
|
+
return f"urn:li:dataPlatformInstance:({iceberg_data_platform()},{warehouse})"
|
|
505
|
+
|
|
506
|
+
|
|
507
|
+
def iceberg_data_platform() -> str:
|
|
508
|
+
return "urn:li:dataPlatform:iceberg"
|
|
509
|
+
|
|
510
|
+
|
|
511
|
+
def iceberg_client_id_urn(warehouse):
|
|
512
|
+
return f"urn:li:dataHubSecret:{warehouse}-client_id"
|
|
513
|
+
|
|
514
|
+
|
|
515
|
+
def iceberg_client_secret_urn(warehouse):
|
|
516
|
+
return f"urn:li:dataHubSecret:{warehouse}-client_secret"
|
|
517
|
+
|
|
518
|
+
|
|
519
|
+
def create_iceberg_secrets(
|
|
520
|
+
client: DataHubGraph, warehouse: str, client_id: str, client_secret: str
|
|
521
|
+
) -> Tuple[str, str]:
|
|
522
|
+
graphql_query = """
|
|
523
|
+
mutation createIcebergSecrets($clientIdName: String!, $clientId: String!, $clientSecretName: String!, $clientSecret: String!) {
|
|
524
|
+
createClientId: createSecret(
|
|
525
|
+
input: {name: $clientIdName, value: $clientId}
|
|
526
|
+
)
|
|
527
|
+
createClientSecret: createSecret(
|
|
528
|
+
input: {name: $clientSecretName, value: $clientSecret}
|
|
529
|
+
)
|
|
530
|
+
}
|
|
531
|
+
"""
|
|
532
|
+
variables = {
|
|
533
|
+
"clientIdName": f"{warehouse}-client_id",
|
|
534
|
+
"clientId": client_id,
|
|
535
|
+
"clientSecretName": f"{warehouse}-client_secret",
|
|
536
|
+
"clientSecret": client_secret,
|
|
537
|
+
}
|
|
538
|
+
try:
|
|
539
|
+
response = client.execute_graphql(
|
|
540
|
+
graphql_query, variables=variables, format_exception=False
|
|
541
|
+
)
|
|
542
|
+
except GraphError as graph_error:
|
|
543
|
+
try:
|
|
544
|
+
error = json.loads(str(graph_error).replace('"', '\\"').replace("'", '"'))
|
|
545
|
+
click.secho(
|
|
546
|
+
f"Failed to save Iceberg warehouse credentials :{error[0]['message']}",
|
|
547
|
+
fg="red",
|
|
548
|
+
err=True,
|
|
549
|
+
)
|
|
550
|
+
except Exception:
|
|
551
|
+
click.secho(
|
|
552
|
+
f"Failed to save Iceberg warehouse credentials :\n{graph_error}",
|
|
553
|
+
fg="red",
|
|
554
|
+
err=True,
|
|
555
|
+
)
|
|
556
|
+
sys.exit(1)
|
|
557
|
+
|
|
558
|
+
if "createClientId" in response and "createClientSecret" in response:
|
|
559
|
+
return response["createClientId"], response["createClientSecret"]
|
|
560
|
+
|
|
561
|
+
click.secho(
|
|
562
|
+
f"Internal error: Unexpected response saving credentials:\n{response}",
|
|
563
|
+
fg="red",
|
|
564
|
+
err=True,
|
|
565
|
+
)
|
|
566
|
+
sys.exit(1)
|
|
567
|
+
|
|
568
|
+
|
|
569
|
+
def update_iceberg_secrets(
|
|
570
|
+
client: DataHubGraph, warehouse: str, client_id: str, client_secret: str
|
|
571
|
+
) -> Tuple[str, str]:
|
|
572
|
+
graphql_query = """
|
|
573
|
+
mutation updateIcebergSecrets($clientIdUrn: String!, $clientIdName: String!, $clientId: String!, $clientSecretUrn: String!, $clientSecretName: String!, $clientSecret: String!) {
|
|
574
|
+
updateClientId: updateSecret(
|
|
575
|
+
input: {urn: $clientIdUrn, name: $clientIdName, value: $clientId}
|
|
576
|
+
)
|
|
577
|
+
updateClientSecret: updateSecret(
|
|
578
|
+
input: {urn: $clientSecretUrn, name: $clientSecretName, value: $clientSecret}
|
|
579
|
+
)
|
|
580
|
+
}
|
|
581
|
+
"""
|
|
582
|
+
variables = {
|
|
583
|
+
"clientIdUrn": iceberg_client_id_urn(warehouse),
|
|
584
|
+
"clientIdName": f"{warehouse}-client_id",
|
|
585
|
+
"clientId": client_id,
|
|
586
|
+
"clientSecretUrn": iceberg_client_secret_urn(warehouse),
|
|
587
|
+
"clientSecretName": f"{warehouse}-client_secret",
|
|
588
|
+
"clientSecret": client_secret,
|
|
589
|
+
}
|
|
590
|
+
try:
|
|
591
|
+
response = client.execute_graphql(
|
|
592
|
+
graphql_query, variables=variables, format_exception=False
|
|
593
|
+
)
|
|
594
|
+
except GraphError as graph_error:
|
|
595
|
+
try:
|
|
596
|
+
error = json.loads(str(graph_error).replace('"', '\\"').replace("'", '"'))
|
|
597
|
+
click.secho(
|
|
598
|
+
f"Failed to save Iceberg warehouse credentials :{error[0]['message']}",
|
|
599
|
+
fg="red",
|
|
600
|
+
err=True,
|
|
601
|
+
)
|
|
602
|
+
except Exception:
|
|
603
|
+
click.secho(
|
|
604
|
+
f"Failed to save Iceberg warehouse credentials :\n{graph_error}",
|
|
605
|
+
fg="red",
|
|
606
|
+
err=True,
|
|
607
|
+
)
|
|
608
|
+
sys.exit(1)
|
|
609
|
+
|
|
610
|
+
if "updateClientId" in response and "updateClientSecret" in response:
|
|
611
|
+
return response["updateClientId"], response["updateClientSecret"]
|
|
612
|
+
|
|
613
|
+
click.secho(
|
|
614
|
+
f"Internal error: Unexpected response saving credentials:\n{response}",
|
|
615
|
+
fg="red",
|
|
616
|
+
err=True,
|
|
617
|
+
)
|
|
618
|
+
sys.exit(1)
|
|
619
|
+
|
|
620
|
+
|
|
621
|
+
def get_all_warehouses(client: DataHubGraph) -> Iterator[str]:
|
|
622
|
+
start: int = 0
|
|
623
|
+
total = None
|
|
624
|
+
graph_query = """
|
|
625
|
+
query getIcebergWarehouses($start: Int, $count: Int) {
|
|
626
|
+
search(
|
|
627
|
+
input: {type: DATA_PLATFORM_INSTANCE, query: "*", start: $start, count: $count}
|
|
628
|
+
) {
|
|
629
|
+
start
|
|
630
|
+
total
|
|
631
|
+
searchResults {
|
|
632
|
+
entity {
|
|
633
|
+
urn
|
|
634
|
+
... on DataPlatformInstance {
|
|
635
|
+
instanceId
|
|
636
|
+
}
|
|
637
|
+
}
|
|
638
|
+
}
|
|
639
|
+
}
|
|
640
|
+
}
|
|
641
|
+
"""
|
|
642
|
+
count = 10
|
|
643
|
+
variables = {"start": start, "count": count}
|
|
644
|
+
while total is None or start < total:
|
|
645
|
+
response = client.execute_graphql(
|
|
646
|
+
graph_query, variables=variables, format_exception=True
|
|
647
|
+
)
|
|
648
|
+
if "search" in response and "total" in response["search"]:
|
|
649
|
+
total = response["search"]["total"]
|
|
650
|
+
search_results = response["search"].get("searchResults", [])
|
|
651
|
+
for result in search_results:
|
|
652
|
+
yield result["entity"]["instanceId"]
|
|
653
|
+
start += count
|
|
654
|
+
variables = {"start": start, "count": count}
|
|
655
|
+
# if total is not None and
|
|
656
|
+
else:
|
|
657
|
+
break
|
|
658
|
+
|
|
659
|
+
|
|
660
|
+
def get_related_entities_for_platform_instance(
|
|
661
|
+
client: DataHubGraph, data_platform_instance_urn: str
|
|
662
|
+
) -> Iterator[Dict]:
|
|
663
|
+
start: int = 0
|
|
664
|
+
total = None
|
|
665
|
+
|
|
666
|
+
graph_query = """
|
|
667
|
+
query getIcebergResources($platformInstanceUrn: String!, $start: Int!, $count: Int!) {
|
|
668
|
+
searchAcrossEntities(
|
|
669
|
+
input: {types: [DATASET, CONTAINER], query: "*", start: $start, count: $count, orFilters: [{and: [{field: "platformInstance", values: [$platformInstanceUrn]}]}]}
|
|
670
|
+
) {
|
|
671
|
+
start
|
|
672
|
+
total
|
|
673
|
+
searchResults {
|
|
674
|
+
entity {
|
|
675
|
+
__typename
|
|
676
|
+
urn
|
|
677
|
+
... on Dataset {
|
|
678
|
+
urn
|
|
679
|
+
name
|
|
680
|
+
}
|
|
681
|
+
}
|
|
682
|
+
}
|
|
683
|
+
}
|
|
684
|
+
}
|
|
685
|
+
"""
|
|
686
|
+
count = 10
|
|
687
|
+
variables = {
|
|
688
|
+
"start": start,
|
|
689
|
+
"count": count,
|
|
690
|
+
"platformInstanceUrn": data_platform_instance_urn,
|
|
691
|
+
}
|
|
692
|
+
while total is None or start < total:
|
|
693
|
+
response = client.execute_graphql(
|
|
694
|
+
graph_query, variables=variables, format_exception=True
|
|
695
|
+
)
|
|
696
|
+
if (
|
|
697
|
+
"searchAcrossEntities" in response
|
|
698
|
+
and "total" in response["searchAcrossEntities"]
|
|
699
|
+
):
|
|
700
|
+
total = response["searchAcrossEntities"]["total"]
|
|
701
|
+
search_results = response["searchAcrossEntities"].get("searchResults", [])
|
|
702
|
+
for result in search_results:
|
|
703
|
+
yield result["entity"]
|
|
704
|
+
start += count
|
|
705
|
+
variables["start"] = start
|
|
706
|
+
else:
|
|
707
|
+
break
|