acryl-datahub 0.15.0.5rc7__py3-none-any.whl → 0.15.0.5rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (38) hide show
  1. {acryl_datahub-0.15.0.5rc7.dist-info → acryl_datahub-0.15.0.5rc9.dist-info}/METADATA +2493 -2463
  2. {acryl_datahub-0.15.0.5rc7.dist-info → acryl_datahub-0.15.0.5rc9.dist-info}/RECORD +38 -35
  3. datahub/_version.py +1 -1
  4. datahub/cli/iceberg_cli.py +707 -0
  5. datahub/entrypoints.py +21 -0
  6. datahub/ingestion/api/incremental_lineage_helper.py +4 -0
  7. datahub/ingestion/glossary/classification_mixin.py +6 -0
  8. datahub/ingestion/glossary/classifier.py +3 -2
  9. datahub/ingestion/source/aws/glue.py +3 -2
  10. datahub/ingestion/source/identity/azure_ad.py +6 -14
  11. datahub/ingestion/source/mode.py +2 -4
  12. datahub/ingestion/source/snowflake/snowflake_config.py +13 -0
  13. datahub/ingestion/source/snowflake/snowflake_query.py +11 -0
  14. datahub/ingestion/source/snowflake/snowflake_report.py +1 -0
  15. datahub/ingestion/source/snowflake/snowflake_schema.py +17 -0
  16. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +18 -36
  17. datahub/ingestion/source/snowflake/snowflake_tag.py +57 -3
  18. datahub/ingestion/source/snowflake/snowflake_v2.py +1 -0
  19. datahub/ingestion/source/sql/mssql/job_models.py +37 -8
  20. datahub/ingestion/source/sql/mssql/source.py +17 -0
  21. datahub/ingestion/source/tableau/tableau.py +14 -12
  22. datahub/ingestion/source/tableau/tableau_common.py +1 -1
  23. datahub/metadata/_schema_classes.py +160 -2
  24. datahub/metadata/com/linkedin/pegasus2avro/dataplatforminstance/__init__.py +2 -0
  25. datahub/metadata/com/linkedin/pegasus2avro/dataset/__init__.py +2 -0
  26. datahub/metadata/schema.avsc +96 -7
  27. datahub/metadata/schemas/DashboardInfo.avsc +5 -5
  28. datahub/metadata/schemas/DataPlatformInstanceKey.avsc +2 -1
  29. datahub/metadata/schemas/DatasetKey.avsc +2 -1
  30. datahub/metadata/schemas/IcebergCatalogInfo.avsc +28 -0
  31. datahub/metadata/schemas/IcebergWarehouseInfo.avsc +92 -0
  32. datahub/metadata/schemas/MetadataChangeEvent.avsc +5 -5
  33. datahub/specific/dashboard.py +43 -1
  34. datahub/upgrade/upgrade.py +13 -5
  35. {acryl_datahub-0.15.0.5rc7.dist-info → acryl_datahub-0.15.0.5rc9.dist-info}/LICENSE +0 -0
  36. {acryl_datahub-0.15.0.5rc7.dist-info → acryl_datahub-0.15.0.5rc9.dist-info}/WHEEL +0 -0
  37. {acryl_datahub-0.15.0.5rc7.dist-info → acryl_datahub-0.15.0.5rc9.dist-info}/entry_points.txt +0 -0
  38. {acryl_datahub-0.15.0.5rc7.dist-info → acryl_datahub-0.15.0.5rc9.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,707 @@
1
+ import json
2
+ import logging
3
+ import sys
4
+ from datetime import datetime
5
+ from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
6
+ from urllib.parse import urlparse
7
+
8
+ import boto3
9
+ import botocore
10
+ import click
11
+
12
+ import datahub.metadata.schema_classes
13
+ from datahub.cli.cli_utils import post_entity
14
+ from datahub.configuration.common import GraphError
15
+ from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
16
+ from datahub.metadata.schema_classes import SystemMetadataClass
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+ DEFAULT_CREDS_EXPIRY_DURATION_SECONDS = 60 * 60
21
+ DEFAULT_FABRIC_TYPE = datahub.metadata.schema_classes.FabricTypeClass.PROD
22
+
23
+ DATA_PLATFORM_INSTANCE_WAREHOUSE_ASPECT = "icebergWarehouseInfo"
24
+
25
+
26
+ @click.group()
27
+ def iceberg() -> None:
28
+ """A group of commands to manage Iceberg warehouses using DataHub as the Iceberg Catalog."""
29
+ pass
30
+
31
+
32
+ def validate_creds(client_id: str, client_secret: str, region: str) -> Any:
33
+ try:
34
+ # Create a boto3 client with the provided credentials
35
+ # Using STS (Security Token Service) for validation
36
+ sts_client = boto3.client(
37
+ "sts",
38
+ aws_access_key_id=client_id,
39
+ aws_secret_access_key=client_secret,
40
+ region_name=region,
41
+ )
42
+
43
+ # Try to get caller identity
44
+ sts_client.get_caller_identity()
45
+
46
+ # If successful, return True and the account info
47
+ return sts_client
48
+
49
+ except (
50
+ botocore.exceptions.ClientError,
51
+ botocore.exceptions.NoCredentialsError,
52
+ ):
53
+ # If credentials are invalid, return False with error message
54
+ click.secho(
55
+ "Invalid credentials",
56
+ fg="red",
57
+ err=True,
58
+ )
59
+ sys.exit(1)
60
+
61
+
62
+ def validate_role(role: str, sts_client: Any, duration_seconds: Optional[int]) -> None:
63
+ try:
64
+ session_name = (
65
+ f"datahub-cli-iceberg-validation-{datetime.now().strftime('%Y%m%d%H%M%S')}"
66
+ )
67
+ # Assume the IAM role to ensure the settings we have are valid and if not, can report them at config time.
68
+
69
+ # If duration_seconds is not specified, datahub will attempt to default to an internal default
70
+ # defined in S3CredentialProvider.java DEFAULT_CREDS_DURATION_SECS. However, it is not possible to know for sure
71
+ # if that value is permitted based on how the role is configured. So, during the configuration of the warehouse
72
+ # we must attempt to use the intended expiration duration (default or explicitly supplied) to ensure it
73
+ # actually does work.
74
+ if duration_seconds is None:
75
+ duration_seconds = DEFAULT_CREDS_EXPIRY_DURATION_SECONDS
76
+
77
+ assumed_role = sts_client.assume_role(
78
+ RoleArn=role,
79
+ RoleSessionName=session_name,
80
+ DurationSeconds=duration_seconds,
81
+ )
82
+
83
+ # Extract the temporary credentials
84
+ credentials = assumed_role["Credentials"]
85
+ return credentials
86
+
87
+ except Exception as e:
88
+ click.secho(
89
+ f"Failed to assume role using '{role}' with error: {e}",
90
+ fg="red",
91
+ err=True,
92
+ )
93
+ sys.exit(1)
94
+
95
+
96
+ def validate_warehouse(data_root: str) -> None:
97
+ # validate data_root location
98
+ scheme = urlparse(data_root).scheme
99
+ if scheme != "s3":
100
+ click.secho(
101
+ f"Unsupported warehouse location '{data_root}', supported schemes: s3",
102
+ fg="red",
103
+ err=True,
104
+ )
105
+ sys.exit(1)
106
+
107
+
108
+ @iceberg.command()
109
+ @click.option(
110
+ "-w", "--warehouse", required=True, type=str, help="The name of the warehouse"
111
+ )
112
+ @click.option(
113
+ "-p", "--description", required=False, type=str, help="Description of the warehouse"
114
+ )
115
+ @click.option(
116
+ "-d",
117
+ "--data_root",
118
+ required=True,
119
+ type=str,
120
+ help="The path to the data root for the warehouse data",
121
+ )
122
+ @click.option(
123
+ "-i",
124
+ "--client_id",
125
+ required=True,
126
+ type=str,
127
+ help="Client ID to authenticate with the storage provider of the data root",
128
+ )
129
+ @click.option(
130
+ "-s",
131
+ "--client_secret",
132
+ required=True,
133
+ type=str,
134
+ help="Client Secret to authenticate with the storage provider of the data root",
135
+ )
136
+ @click.option(
137
+ "-g",
138
+ "--region",
139
+ required=True,
140
+ type=str,
141
+ help="Storage provider specific region where the warehouse data root is located",
142
+ )
143
+ @click.option(
144
+ "-r",
145
+ "--role",
146
+ required=True,
147
+ type=str,
148
+ help="Storage provider specific role to be used when vending credentials",
149
+ )
150
+ @click.option(
151
+ "-e",
152
+ "--env",
153
+ required=False,
154
+ type=str,
155
+ help=f"Environment where all assets stored in this warehouse belong to. Defaults to {DEFAULT_FABRIC_TYPE} if unspecified",
156
+ )
157
+ @click.option(
158
+ "-x",
159
+ "--duration_seconds",
160
+ required=False,
161
+ type=int,
162
+ help=f"Expiration duration for temporary credentials used for role. Defaults to {DEFAULT_CREDS_EXPIRY_DURATION_SECONDS} seconds if unspecified",
163
+ )
164
+ def create(
165
+ warehouse: str,
166
+ description: Optional[str],
167
+ data_root: str,
168
+ client_id: str,
169
+ client_secret: str,
170
+ region: str,
171
+ role: str,
172
+ duration_seconds: Optional[int],
173
+ env: Optional[str],
174
+ ) -> None:
175
+ """
176
+ Create an iceberg warehouse.
177
+ """
178
+
179
+ client = get_default_graph()
180
+
181
+ urn = iceberg_data_platform_instance_urn(warehouse)
182
+
183
+ if client.exists(urn):
184
+ click.secho(
185
+ f"Warehouse with name {warehouse} already exists",
186
+ fg="red",
187
+ err=True,
188
+ )
189
+ sys.exit(1)
190
+
191
+ # will throw an actionable error message if invalid.
192
+ validate_warehouse(data_root)
193
+ storage_client = validate_creds(client_id, client_secret, region)
194
+ validate_role(role, storage_client, duration_seconds)
195
+
196
+ client_id_urn, client_secret_urn = create_iceberg_secrets(
197
+ client, warehouse, client_id, client_secret
198
+ )
199
+
200
+ if env is None:
201
+ env = DEFAULT_FABRIC_TYPE
202
+
203
+ warehouse_aspect = DATA_PLATFORM_INSTANCE_WAREHOUSE_ASPECT
204
+ warehouse_aspect_obj: Dict[str, Any] = {
205
+ "dataRoot": data_root,
206
+ "clientId": client_id_urn,
207
+ "clientSecret": client_secret_urn,
208
+ "region": region,
209
+ "role": role,
210
+ "env": env,
211
+ }
212
+
213
+ if duration_seconds:
214
+ warehouse_aspect_obj["tempCredentialExpirationSeconds"] = duration_seconds
215
+
216
+ data_platform_instance_properties_aspect_obj = {
217
+ "name": warehouse,
218
+ }
219
+
220
+ if description:
221
+ data_platform_instance_properties_aspect_obj["description"] = description
222
+
223
+ data_platform_instance_properties_aspect = "dataPlatformInstanceProperties"
224
+
225
+ entity_type = "dataPlatformInstance"
226
+ system_metadata: Union[None, SystemMetadataClass] = None
227
+
228
+ post_entity(
229
+ client._session,
230
+ client.config.server,
231
+ urn=urn,
232
+ aspect_name=data_platform_instance_properties_aspect,
233
+ entity_type=entity_type,
234
+ aspect_value=data_platform_instance_properties_aspect_obj,
235
+ system_metadata=system_metadata,
236
+ )
237
+
238
+ # If status is non 200, post_entity will raise an exception.
239
+
240
+ post_entity(
241
+ client._session,
242
+ client.config.server,
243
+ urn=urn,
244
+ aspect_name=warehouse_aspect,
245
+ entity_type=entity_type,
246
+ aspect_value=warehouse_aspect_obj,
247
+ system_metadata=system_metadata,
248
+ )
249
+
250
+ click.secho(
251
+ f"✅ Created warehouse with urn {urn}, clientID: {client_id_urn}, and clientSecret: {client_secret_urn}",
252
+ fg="green",
253
+ )
254
+
255
+
256
+ @iceberg.command()
257
+ @click.option(
258
+ "-w", "--warehouse", required=True, type=str, help="The name of the warehouse"
259
+ )
260
+ @click.option(
261
+ "-p",
262
+ "--description",
263
+ required=False,
264
+ type=str,
265
+ help="Description of the warehouse",
266
+ )
267
+ @click.option(
268
+ "-d",
269
+ "--data_root",
270
+ required=True,
271
+ type=str,
272
+ help="The path to the data root for the warehouse data",
273
+ )
274
+ @click.option(
275
+ "-i",
276
+ "--client_id",
277
+ required=True,
278
+ type=str,
279
+ help="Client ID to authenticate with the storage provider of the data root",
280
+ )
281
+ @click.option(
282
+ "-s",
283
+ "--client_secret",
284
+ required=True,
285
+ type=str,
286
+ help="Client Secret to authenticate with the storage provider of the data root",
287
+ )
288
+ @click.option(
289
+ "-g",
290
+ "--region",
291
+ required=True,
292
+ type=str,
293
+ help="Storage provider specific region where the warehouse data root is located",
294
+ )
295
+ @click.option(
296
+ "-r",
297
+ "--role",
298
+ required=True,
299
+ type=str,
300
+ help="Storage provider specific role to be used when vending credentials",
301
+ )
302
+ @click.option(
303
+ "-e",
304
+ "--env",
305
+ required=False,
306
+ type=str,
307
+ help=f"Environment where all assets stored in this warehouse belong to. Defaults to {DEFAULT_FABRIC_TYPE} if unspecified",
308
+ )
309
+ @click.option(
310
+ "-x",
311
+ "--duration_seconds",
312
+ required=False,
313
+ type=int,
314
+ help=f"Expiration duration for temporary credentials used for role. Defaults to {DEFAULT_CREDS_EXPIRY_DURATION_SECONDS} seconds if unspecified",
315
+ )
316
+ def update(
317
+ warehouse: str,
318
+ data_root: str,
319
+ description: Optional[str],
320
+ client_id: str,
321
+ client_secret: str,
322
+ region: str,
323
+ role: str,
324
+ env: Optional[str],
325
+ duration_seconds: Optional[int],
326
+ ) -> None:
327
+ """
328
+ Update iceberg warehouses. Can only update credentials, and role. Cannot update region
329
+ """
330
+
331
+ client = get_default_graph()
332
+
333
+ urn = iceberg_data_platform_instance_urn(warehouse)
334
+
335
+ if not client.exists(urn):
336
+ raise click.ClickException(f"Warehouse with name {warehouse} does not exist")
337
+
338
+ validate_warehouse(data_root)
339
+ storage_client = validate_creds(client_id, client_secret, region)
340
+ validate_role(role, storage_client, duration_seconds)
341
+
342
+ client_id_urn, client_secret_urn = update_iceberg_secrets(
343
+ client, warehouse, client_id, client_secret
344
+ )
345
+
346
+ if env is None:
347
+ env = DEFAULT_FABRIC_TYPE
348
+
349
+ warehouse_aspect = DATA_PLATFORM_INSTANCE_WAREHOUSE_ASPECT
350
+ warehouse_aspect_obj: Dict[str, Any] = {
351
+ "dataRoot": data_root,
352
+ "clientId": client_id_urn,
353
+ "clientSecret": client_secret_urn,
354
+ "region": region,
355
+ "role": role,
356
+ "env": env,
357
+ }
358
+ if duration_seconds:
359
+ warehouse_aspect_obj["tempCredentialExpirationSeconds"] = duration_seconds
360
+
361
+ data_platform_instance_properties_aspect_obj = {
362
+ "name": warehouse,
363
+ }
364
+
365
+ if description:
366
+ data_platform_instance_properties_aspect_obj["description"] = description
367
+
368
+ data_platform_instance_properties_aspect = "dataPlatformInstanceProperties"
369
+
370
+ entity_type = "dataPlatformInstance"
371
+ system_metadata: Union[None, SystemMetadataClass] = None
372
+
373
+ post_entity(
374
+ client._session,
375
+ client.config.server,
376
+ urn=urn,
377
+ aspect_name=data_platform_instance_properties_aspect,
378
+ entity_type=entity_type,
379
+ aspect_value=data_platform_instance_properties_aspect_obj,
380
+ system_metadata=system_metadata,
381
+ )
382
+
383
+ # If status is non 200, post_entity will raise an exception.
384
+ post_entity(
385
+ client._session,
386
+ client.config.server,
387
+ urn=urn,
388
+ aspect_name=warehouse_aspect,
389
+ entity_type=entity_type,
390
+ aspect_value=warehouse_aspect_obj,
391
+ system_metadata=system_metadata,
392
+ )
393
+
394
+ click.secho(
395
+ f"✅ Updated warehouse with urn {urn}, clientID: {client_id_urn}, and clientSecret: {client_secret_urn}",
396
+ fg="green",
397
+ )
398
+
399
+
400
+ @iceberg.command()
401
+ def list() -> None:
402
+ """
403
+ List iceberg warehouses
404
+ """
405
+
406
+ client = get_default_graph()
407
+
408
+ for warehouse in get_all_warehouses(client):
409
+ click.echo(warehouse)
410
+
411
+
412
+ @iceberg.command()
413
+ @click.option(
414
+ "-w", "--warehouse", required=True, type=str, help="The name of the warehouse"
415
+ )
416
+ def get(warehouse: str) -> None:
417
+ """Fetches the details of the specified iceberg warehouse"""
418
+ client = get_default_graph()
419
+ urn = iceberg_data_platform_instance_urn(warehouse)
420
+
421
+ if client.exists(urn):
422
+ warehouse_aspect = client.get_aspect(
423
+ entity_urn=urn,
424
+ aspect_type=datahub.metadata.schema_classes.IcebergWarehouseInfoClass,
425
+ )
426
+ click.echo(urn)
427
+ if warehouse_aspect:
428
+ click.echo(json.dumps(warehouse_aspect.to_obj(), sort_keys=True, indent=2))
429
+ else:
430
+ raise click.ClickException(f"Iceberg warehouse {warehouse} does not exist")
431
+
432
+
433
+ @iceberg.command()
434
+ @click.option(
435
+ "-w", "--warehouse", required=True, type=str, help="The name of the warehouse"
436
+ )
437
+ @click.option("-n", "--dry-run", required=False, is_flag=True)
438
+ @click.option(
439
+ "-f",
440
+ "--force",
441
+ required=False,
442
+ is_flag=True,
443
+ help="force the delete if set without confirmation",
444
+ )
445
+ def delete(warehouse: str, dry_run: bool, force: bool) -> None:
446
+ """
447
+ Delete warehouse
448
+ """
449
+
450
+ urn = iceberg_data_platform_instance_urn(warehouse)
451
+
452
+ client = get_default_graph()
453
+
454
+ if not client.exists(urn):
455
+ raise click.ClickException(f"urn {urn} not found")
456
+
457
+ # Confirm this is a managed warehouse by checking for presence of IcebergWarehouse aspect
458
+ aspect = client.get_aspect(
459
+ entity_urn=urn,
460
+ aspect_type=datahub.metadata.schema_classes.IcebergWarehouseInfoClass,
461
+ )
462
+ if aspect:
463
+ warehouse_aspect: datahub.metadata.schema_classes.IcebergWarehouseInfoClass = (
464
+ aspect
465
+ )
466
+
467
+ urns_to_delete: List = []
468
+ resource_names_to_be_deleted: List = []
469
+ for entity in get_related_entities_for_platform_instance(client, urn):
470
+ # Do we really need this double-check?
471
+ if "__typename" in entity and "urn" in entity:
472
+ if entity["__typename"] in ["Container", "Dataset"]:
473
+ urns_to_delete.append(entity["urn"])
474
+ resource_names_to_be_deleted.append(
475
+ entity.get("name", entity.get("urn"))
476
+ )
477
+ # TODO: PlatformResource associated with datasets need to be deleted.
478
+
479
+ if dry_run:
480
+ click.echo(
481
+ f"[Dry-run] Would delete warehouse {urn} and the following datasets and namespaces"
482
+ )
483
+ for resource in resource_names_to_be_deleted:
484
+ click.echo(f" {resource}")
485
+ else:
486
+ if not force:
487
+ click.confirm(
488
+ f"This will delete {warehouse} warehouse, credentials, and {len(urns_to_delete)} datasets and namespaces from DataHub. Do you want to continue?",
489
+ abort=True,
490
+ )
491
+ client.hard_delete_entity(urn)
492
+ client.hard_delete_entity(warehouse_aspect.clientId)
493
+ client.hard_delete_entity(warehouse_aspect.clientSecret)
494
+
495
+ for urn_to_delete in urns_to_delete:
496
+ client.hard_delete_entity(urn_to_delete)
497
+
498
+ click.echo(
499
+ f"✅ Successfully deleted iceberg warehouse {warehouse} and associated credentials, {len(urns_to_delete)} datasets and namespaces"
500
+ )
501
+
502
+
503
+ def iceberg_data_platform_instance_urn(warehouse: str) -> str:
504
+ return f"urn:li:dataPlatformInstance:({iceberg_data_platform()},{warehouse})"
505
+
506
+
507
+ def iceberg_data_platform() -> str:
508
+ return "urn:li:dataPlatform:iceberg"
509
+
510
+
511
+ def iceberg_client_id_urn(warehouse):
512
+ return f"urn:li:dataHubSecret:{warehouse}-client_id"
513
+
514
+
515
+ def iceberg_client_secret_urn(warehouse):
516
+ return f"urn:li:dataHubSecret:{warehouse}-client_secret"
517
+
518
+
519
+ def create_iceberg_secrets(
520
+ client: DataHubGraph, warehouse: str, client_id: str, client_secret: str
521
+ ) -> Tuple[str, str]:
522
+ graphql_query = """
523
+ mutation createIcebergSecrets($clientIdName: String!, $clientId: String!, $clientSecretName: String!, $clientSecret: String!) {
524
+ createClientId: createSecret(
525
+ input: {name: $clientIdName, value: $clientId}
526
+ )
527
+ createClientSecret: createSecret(
528
+ input: {name: $clientSecretName, value: $clientSecret}
529
+ )
530
+ }
531
+ """
532
+ variables = {
533
+ "clientIdName": f"{warehouse}-client_id",
534
+ "clientId": client_id,
535
+ "clientSecretName": f"{warehouse}-client_secret",
536
+ "clientSecret": client_secret,
537
+ }
538
+ try:
539
+ response = client.execute_graphql(
540
+ graphql_query, variables=variables, format_exception=False
541
+ )
542
+ except GraphError as graph_error:
543
+ try:
544
+ error = json.loads(str(graph_error).replace('"', '\\"').replace("'", '"'))
545
+ click.secho(
546
+ f"Failed to save Iceberg warehouse credentials :{error[0]['message']}",
547
+ fg="red",
548
+ err=True,
549
+ )
550
+ except Exception:
551
+ click.secho(
552
+ f"Failed to save Iceberg warehouse credentials :\n{graph_error}",
553
+ fg="red",
554
+ err=True,
555
+ )
556
+ sys.exit(1)
557
+
558
+ if "createClientId" in response and "createClientSecret" in response:
559
+ return response["createClientId"], response["createClientSecret"]
560
+
561
+ click.secho(
562
+ f"Internal error: Unexpected response saving credentials:\n{response}",
563
+ fg="red",
564
+ err=True,
565
+ )
566
+ sys.exit(1)
567
+
568
+
569
+ def update_iceberg_secrets(
570
+ client: DataHubGraph, warehouse: str, client_id: str, client_secret: str
571
+ ) -> Tuple[str, str]:
572
+ graphql_query = """
573
+ mutation updateIcebergSecrets($clientIdUrn: String!, $clientIdName: String!, $clientId: String!, $clientSecretUrn: String!, $clientSecretName: String!, $clientSecret: String!) {
574
+ updateClientId: updateSecret(
575
+ input: {urn: $clientIdUrn, name: $clientIdName, value: $clientId}
576
+ )
577
+ updateClientSecret: updateSecret(
578
+ input: {urn: $clientSecretUrn, name: $clientSecretName, value: $clientSecret}
579
+ )
580
+ }
581
+ """
582
+ variables = {
583
+ "clientIdUrn": iceberg_client_id_urn(warehouse),
584
+ "clientIdName": f"{warehouse}-client_id",
585
+ "clientId": client_id,
586
+ "clientSecretUrn": iceberg_client_secret_urn(warehouse),
587
+ "clientSecretName": f"{warehouse}-client_secret",
588
+ "clientSecret": client_secret,
589
+ }
590
+ try:
591
+ response = client.execute_graphql(
592
+ graphql_query, variables=variables, format_exception=False
593
+ )
594
+ except GraphError as graph_error:
595
+ try:
596
+ error = json.loads(str(graph_error).replace('"', '\\"').replace("'", '"'))
597
+ click.secho(
598
+ f"Failed to save Iceberg warehouse credentials :{error[0]['message']}",
599
+ fg="red",
600
+ err=True,
601
+ )
602
+ except Exception:
603
+ click.secho(
604
+ f"Failed to save Iceberg warehouse credentials :\n{graph_error}",
605
+ fg="red",
606
+ err=True,
607
+ )
608
+ sys.exit(1)
609
+
610
+ if "updateClientId" in response and "updateClientSecret" in response:
611
+ return response["updateClientId"], response["updateClientSecret"]
612
+
613
+ click.secho(
614
+ f"Internal error: Unexpected response saving credentials:\n{response}",
615
+ fg="red",
616
+ err=True,
617
+ )
618
+ sys.exit(1)
619
+
620
+
621
+ def get_all_warehouses(client: DataHubGraph) -> Iterator[str]:
622
+ start: int = 0
623
+ total = None
624
+ graph_query = """
625
+ query getIcebergWarehouses($start: Int, $count: Int) {
626
+ search(
627
+ input: {type: DATA_PLATFORM_INSTANCE, query: "*", start: $start, count: $count}
628
+ ) {
629
+ start
630
+ total
631
+ searchResults {
632
+ entity {
633
+ urn
634
+ ... on DataPlatformInstance {
635
+ instanceId
636
+ }
637
+ }
638
+ }
639
+ }
640
+ }
641
+ """
642
+ count = 10
643
+ variables = {"start": start, "count": count}
644
+ while total is None or start < total:
645
+ response = client.execute_graphql(
646
+ graph_query, variables=variables, format_exception=True
647
+ )
648
+ if "search" in response and "total" in response["search"]:
649
+ total = response["search"]["total"]
650
+ search_results = response["search"].get("searchResults", [])
651
+ for result in search_results:
652
+ yield result["entity"]["instanceId"]
653
+ start += count
654
+ variables = {"start": start, "count": count}
655
+ # if total is not None and
656
+ else:
657
+ break
658
+
659
+
660
+ def get_related_entities_for_platform_instance(
661
+ client: DataHubGraph, data_platform_instance_urn: str
662
+ ) -> Iterator[Dict]:
663
+ start: int = 0
664
+ total = None
665
+
666
+ graph_query = """
667
+ query getIcebergResources($platformInstanceUrn: String!, $start: Int!, $count: Int!) {
668
+ searchAcrossEntities(
669
+ input: {types: [DATASET, CONTAINER], query: "*", start: $start, count: $count, orFilters: [{and: [{field: "platformInstance", values: [$platformInstanceUrn]}]}]}
670
+ ) {
671
+ start
672
+ total
673
+ searchResults {
674
+ entity {
675
+ __typename
676
+ urn
677
+ ... on Dataset {
678
+ urn
679
+ name
680
+ }
681
+ }
682
+ }
683
+ }
684
+ }
685
+ """
686
+ count = 10
687
+ variables = {
688
+ "start": start,
689
+ "count": count,
690
+ "platformInstanceUrn": data_platform_instance_urn,
691
+ }
692
+ while total is None or start < total:
693
+ response = client.execute_graphql(
694
+ graph_query, variables=variables, format_exception=True
695
+ )
696
+ if (
697
+ "searchAcrossEntities" in response
698
+ and "total" in response["searchAcrossEntities"]
699
+ ):
700
+ total = response["searchAcrossEntities"]["total"]
701
+ search_results = response["searchAcrossEntities"].get("searchResults", [])
702
+ for result in search_results:
703
+ yield result["entity"]
704
+ start += count
705
+ variables["start"] = start
706
+ else:
707
+ break