acryl-datahub 0.15.0rc6__py3-none-any.whl → 0.15.0rc8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0rc6.dist-info → acryl_datahub-0.15.0rc8.dist-info}/METADATA +2433 -2433
- {acryl_datahub-0.15.0rc6.dist-info → acryl_datahub-0.15.0rc8.dist-info}/RECORD +29 -29
- datahub/__init__.py +1 -1
- datahub/api/entities/structuredproperties/structuredproperties.py +1 -1
- datahub/cli/put_cli.py +1 -1
- datahub/cli/specific/dataproduct_cli.py +1 -1
- datahub/emitter/mcp_patch_builder.py +43 -0
- datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +9 -4
- datahub/ingestion/source/aws/sagemaker_processors/models.py +30 -1
- datahub/ingestion/source/bigquery_v2/bigquery_audit.py +1 -1
- datahub/ingestion/source/csv_enricher.py +1 -1
- datahub/ingestion/source/dremio/dremio_source.py +4 -2
- datahub/ingestion/source/elastic_search.py +1 -1
- datahub/ingestion/source/gc/dataprocess_cleanup.py +12 -7
- datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +1 -1
- datahub/ingestion/source/ge_data_profiler.py +23 -1
- datahub/ingestion/source/sql/sql_types.py +14 -2
- datahub/ingestion/transformer/add_dataset_tags.py +1 -1
- datahub/ingestion/transformer/generic_aspect_transformer.py +1 -1
- datahub/integrations/assertion/common.py +1 -1
- datahub/lite/duckdb_lite.py +12 -17
- datahub/specific/chart.py +0 -39
- datahub/specific/dashboard.py +0 -39
- datahub/specific/datajob.py +3 -47
- datahub/utilities/urns/_urn_base.py +1 -1
- datahub/utilities/urns/structured_properties_urn.py +1 -1
- {acryl_datahub-0.15.0rc6.dist-info → acryl_datahub-0.15.0rc8.dist-info}/WHEEL +0 -0
- {acryl_datahub-0.15.0rc6.dist-info → acryl_datahub-0.15.0rc8.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-0.15.0rc6.dist-info → acryl_datahub-0.15.0rc8.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
datahub/__init__.py,sha256=
|
|
1
|
+
datahub/__init__.py,sha256=NMZEhV_gq-gUecvUHX3Sa6_GbgxJuCJ7XuBUcPhuIRs,574
|
|
2
2
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
3
3
|
datahub/entrypoints.py,sha256=3-qSfXAx3Z0FEkBV5tlO8fQr4xk4ySeDRMVTpS5Xd6A,7793
|
|
4
4
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -52,7 +52,7 @@ datahub/api/entities/forms/forms_graphql_constants.py,sha256=DKpnKlMKTjmnyrCTvp6
|
|
|
52
52
|
datahub/api/entities/platformresource/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
53
53
|
datahub/api/entities/platformresource/platform_resource.py,sha256=pVAjv6NoH746Mfvdak7ji0eqlEcEeV-Ji7M5gyNXmds,10603
|
|
54
54
|
datahub/api/entities/structuredproperties/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
55
|
-
datahub/api/entities/structuredproperties/structuredproperties.py,sha256=
|
|
55
|
+
datahub/api/entities/structuredproperties/structuredproperties.py,sha256=UeNPicCmrYJThv0msUlYUNArHGcjdc_0EX5yLijc_Ao,9267
|
|
56
56
|
datahub/api/graphql/__init__.py,sha256=5yl0dJxO-2d_QuykdJrDIbWq4ja9bo0t2dAEh89JOog,142
|
|
57
57
|
datahub/api/graphql/assertion.py,sha256=ponITypRQ8vE8kiqRNpvdoniNJzi4aeBK97UvkF0VhA,2818
|
|
58
58
|
datahub/api/graphql/base.py,sha256=9q637r6v-RGOd8Mk8HW2g0vt9zpqFexsQ5R6TPEHVbs,1614
|
|
@@ -72,7 +72,7 @@ datahub/cli/json_file.py,sha256=nWo-VVthaaW4Do1eUqgrzk0fShb29MjiKXvZVOTq76c,943
|
|
|
72
72
|
datahub/cli/lite_cli.py,sha256=UmlMMquce6lHiPaKUBBT0XQtqR9SHEmrGlJyKV9YY60,13030
|
|
73
73
|
datahub/cli/migrate.py,sha256=p42vixwKzi9OHQnIa0K2FxwGvt-1OxXeuYGJzfu5Sqo,17939
|
|
74
74
|
datahub/cli/migration_utils.py,sha256=0qHo_9eSR4buyV_K_tdcHSLBufKphBWwwwT1iK_I4S8,9382
|
|
75
|
-
datahub/cli/put_cli.py,sha256=
|
|
75
|
+
datahub/cli/put_cli.py,sha256=4ol9aLdidX1VXjVxMG2tkfEMPyjLpgOk2pfl0Gvb8iU,3841
|
|
76
76
|
datahub/cli/quickstart_versioning.py,sha256=MyWvw92s4b84wIEizjSUZjoMClwLbhpgMdHeDav-x2o,5713
|
|
77
77
|
datahub/cli/state_cli.py,sha256=TkIzohZOJYBowniJpTipPxiQTziErVeZCxq197GlvUQ,1142
|
|
78
78
|
datahub/cli/telemetry.py,sha256=xw3SiAn2je48Qv4kXPYN5EPVKHWEWZc2LGoF7UzGs8U,489
|
|
@@ -80,7 +80,7 @@ datahub/cli/timeline_cli.py,sha256=kxs-kJNFxDT27RQeh2i3WiQ6a8SYkPeJbksWUc8kUhc,7
|
|
|
80
80
|
datahub/cli/specific/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
81
81
|
datahub/cli/specific/assertions_cli.py,sha256=I4_MCwYoWDturxYvdRuA-MbSvYMeM0yFQm9Bdj_4Uj4,5375
|
|
82
82
|
datahub/cli/specific/datacontract_cli.py,sha256=IkBovwuPT5jNB8X-8AQJRO4C9cFSNm1at8v4YctLFgQ,2531
|
|
83
|
-
datahub/cli/specific/dataproduct_cli.py,sha256=
|
|
83
|
+
datahub/cli/specific/dataproduct_cli.py,sha256=c22W35wuTiZR4MKHP-P6ER5ABok1tooV1JzECFhR9uY,15081
|
|
84
84
|
datahub/cli/specific/dataset_cli.py,sha256=AwSmIiuV3XbgprW4_1Wj-EJq1OPqFyolSNczQm5BROs,3441
|
|
85
85
|
datahub/cli/specific/file_loader.py,sha256=YMyv_evdKyHSft5Tm_kOcqJ4ALpRmMm54ZJAyl7Nxqs,773
|
|
86
86
|
datahub/cli/specific/forms_cli.py,sha256=OLVeG8NtK1eDBuUKCT5Ald35np8__f8mLzbZM_zUfWU,1484
|
|
@@ -117,7 +117,7 @@ datahub/emitter/kafka_emitter.py,sha256=Uix1W1WaXF8VqUTUfzdRZKca2XrR1w50Anx2LVkR
|
|
|
117
117
|
datahub/emitter/mce_builder.py,sha256=CMES7gTj4w_kZhOsWuxbLwdz5e2tdiilibnZXstRGhw,16117
|
|
118
118
|
datahub/emitter/mcp.py,sha256=hAAYziDdkwjazQU0DtWMbQWY8wS09ACrKJbqxoWXdgc,9637
|
|
119
119
|
datahub/emitter/mcp_builder.py,sha256=ju-1dZMKs5dlWcTi4zcNRVmhkfhmfX3JFULZSbgxSFs,9968
|
|
120
|
-
datahub/emitter/mcp_patch_builder.py,sha256=
|
|
120
|
+
datahub/emitter/mcp_patch_builder.py,sha256=W85q1maVUMpOIo5lwLRn82rLXRVoZ_gurl_a-pvVCpE,4291
|
|
121
121
|
datahub/emitter/request_helper.py,sha256=33ORG3S3OVy97_jlWBRn7yUM5XCIkRN6WSdJvN7Ofcg,670
|
|
122
122
|
datahub/emitter/rest_emitter.py,sha256=rIWqEJjcSIM16_8DXqNqZ_h5s_nj46DTiyRKA5EQHXQ,15021
|
|
123
123
|
datahub/emitter/serialization_helper.py,sha256=q12Avmf70Vy4ttQGMJoTKlE5EsybMKNg2w3MQeZiHvk,3652
|
|
@@ -184,12 +184,12 @@ datahub/ingestion/sink/file.py,sha256=SxXJPJpkIGoaqRjCcSmj2ZE3xE4rLlBABBGwpTj5LW
|
|
|
184
184
|
datahub/ingestion/sink/sink_registry.py,sha256=JRBWx8qEYg0ubSTyhqwgSWctgxwyp6fva9GoN2LwBao,490
|
|
185
185
|
datahub/ingestion/source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
186
186
|
datahub/ingestion/source/confluent_schema_registry.py,sha256=_h9D8bUXoaGcwgwB94dX6aTyLY5ve7XGdcVFSJHGSJc,18804
|
|
187
|
-
datahub/ingestion/source/csv_enricher.py,sha256=
|
|
187
|
+
datahub/ingestion/source/csv_enricher.py,sha256=AIxQFkmSzFgCa_Fzt2EiFMyojQMFKmnPt878WypSPa4,29491
|
|
188
188
|
datahub/ingestion/source/demo_data.py,sha256=yzA_R-wfSX2WPz0i5ukYlscpmpb0Pt8D7EkhtKfftvo,1286
|
|
189
|
-
datahub/ingestion/source/elastic_search.py,sha256=
|
|
189
|
+
datahub/ingestion/source/elastic_search.py,sha256=uT4I0GyqSiD16BURqsXWyPN9wNBc3wLomz1nG-OxHec,22634
|
|
190
190
|
datahub/ingestion/source/feast.py,sha256=uZpeUkJsiNlvZcUkARiEuZT_3n6sbGc0yFzwqhtnefA,18103
|
|
191
191
|
datahub/ingestion/source/file.py,sha256=pH-Qkjh5FQ2XvyYPE7Z8XEY4vUk_SUHxm8p8IxG12tU,15879
|
|
192
|
-
datahub/ingestion/source/ge_data_profiler.py,sha256=
|
|
192
|
+
datahub/ingestion/source/ge_data_profiler.py,sha256=7-ciHphLU8O259OU2WMDfCDpoqvDLUy_XcG4EM0agFc,64983
|
|
193
193
|
datahub/ingestion/source/ge_profiling_config.py,sha256=P-9pd20koFvpxeEL_pqFvKWWz-qnpZ6XkELUyBKr7is,10807
|
|
194
194
|
datahub/ingestion/source/glue_profiling_config.py,sha256=vpMJH4Lf_qgR32BZy58suabri1yV5geaAPjzg2eORDc,2559
|
|
195
195
|
datahub/ingestion/source/ldap.py,sha256=Vnzg8tpwBYeyM-KBVVsUJvGZGBMJiCJ_i_FhxaFRQ9A,18627
|
|
@@ -221,18 +221,18 @@ datahub/ingestion/source/aws/s3_util.py,sha256=OFypcgmVC6jnZM90-gjcPpAMtTV1lbnre
|
|
|
221
221
|
datahub/ingestion/source/aws/sagemaker.py,sha256=23m8a9-VofWDJZWm4uCrf0MLkFZKbxce7839qDYTh7w,4995
|
|
222
222
|
datahub/ingestion/source/aws/sagemaker_processors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
223
223
|
datahub/ingestion/source/aws/sagemaker_processors/common.py,sha256=SSvpOszqJAHoZB3jMZgx8odInEy3lTZZCThhpOWkyvE,2012
|
|
224
|
-
datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py,sha256=
|
|
224
|
+
datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py,sha256=bnx6uKwXvzafYhcIl112INTMmotu6xy8FjFNhTO4b6c,10384
|
|
225
225
|
datahub/ingestion/source/aws/sagemaker_processors/job_classes.py,sha256=CfJkzjZU2uvZvw7qvmxfNgeWI1EvgHFY-7bn5Ih71no,9154
|
|
226
226
|
datahub/ingestion/source/aws/sagemaker_processors/jobs.py,sha256=OHLiqeZCTR9GgfmSx6O8oX9ZCd983RiFnx23JTiKZ3I,32395
|
|
227
227
|
datahub/ingestion/source/aws/sagemaker_processors/lineage.py,sha256=dvSCoiZhJLN4Hic5nRH3REI7SxMdMsm_4Ugmv0U8Zdg,9290
|
|
228
|
-
datahub/ingestion/source/aws/sagemaker_processors/models.py,sha256=
|
|
228
|
+
datahub/ingestion/source/aws/sagemaker_processors/models.py,sha256=6Ltmy6MAwbexN_JRYu7LXlAKpihXGlW4WXxo7qdwEF8,19845
|
|
229
229
|
datahub/ingestion/source/azure/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
230
230
|
datahub/ingestion/source/azure/abs_folder_utils.py,sha256=7skXus-4fSIoKpqCeU-GG0ch1oF2SJSYDZ1JMB_Onso,7605
|
|
231
231
|
datahub/ingestion/source/azure/abs_utils.py,sha256=KdAlCK-PMrn35kFHxz5vrsjajyx2PD5GRgoBKdoRvcg,2075
|
|
232
232
|
datahub/ingestion/source/azure/azure_common.py,sha256=Zl0pPuE6L3QcM5B1P0LsPthZmD0h7fUUS0kg2okl6IY,4053
|
|
233
233
|
datahub/ingestion/source/bigquery_v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
234
234
|
datahub/ingestion/source/bigquery_v2/bigquery.py,sha256=-12CZWeSIAkI6Kb4AY8NAF3wsC_2lxhPErm5o0oUUes,14116
|
|
235
|
-
datahub/ingestion/source/bigquery_v2/bigquery_audit.py,sha256=
|
|
235
|
+
datahub/ingestion/source/bigquery_v2/bigquery_audit.py,sha256=IlbHA8a-gNJvnubgBfxVHpUk8rFNIG80gk5HWXa2lyE,25108
|
|
236
236
|
datahub/ingestion/source/bigquery_v2/bigquery_audit_log_api.py,sha256=LuGJ6LgPViLIfDQfylxlQ3CA7fZYM5MDt8M-7sfzm84,5096
|
|
237
237
|
datahub/ingestion/source/bigquery_v2/bigquery_config.py,sha256=xnYWxbhvv-rJRHLGkOWIAn4Ir__hwinEZF1F7TWWirE,26086
|
|
238
238
|
datahub/ingestion/source/bigquery_v2/bigquery_data_reader.py,sha256=DeT3v_Z82__8En0FcZ0kavBAWQoRvSZ5Rppm9eeDAb8,2393
|
|
@@ -288,7 +288,7 @@ datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py,sha256=YkYC3-TB
|
|
|
288
288
|
datahub/ingestion/source/dremio/dremio_entities.py,sha256=3H3vIvj5ab4d8gmB9-rbZfwRgW87gT1DdjWiMjNgqJ4,15069
|
|
289
289
|
datahub/ingestion/source/dremio/dremio_profiling.py,sha256=TAcnpo8ZRKhLDHnQSJzJg3YdwTSyEa73LUAzENs7wG4,12287
|
|
290
290
|
datahub/ingestion/source/dremio/dremio_reporting.py,sha256=IPgv7lOnhK6mQeqwRsPscKnXhzgVZG8Id3yNcsmG7nw,1273
|
|
291
|
-
datahub/ingestion/source/dremio/dremio_source.py,sha256=
|
|
291
|
+
datahub/ingestion/source/dremio/dremio_source.py,sha256=NJxDXWd19A3MPplPiLPAjxTmjeJBA04PcPytRSslmYQ,26323
|
|
292
292
|
datahub/ingestion/source/dremio/dremio_sql_queries.py,sha256=W0rcXawlwJOHNYr5o73rilMijtFOO3cVkn6pY-JLc6o,8186
|
|
293
293
|
datahub/ingestion/source/dynamodb/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
294
294
|
datahub/ingestion/source/dynamodb/data_reader.py,sha256=vC77KpcP8LJN0g8wsPRDVw4sebv0ZWIP3tJkEIHaomA,3120
|
|
@@ -301,9 +301,9 @@ datahub/ingestion/source/fivetran/fivetran_log_api.py,sha256=EAak3hJpe75WZSgz6wP
|
|
|
301
301
|
datahub/ingestion/source/fivetran/fivetran_query.py,sha256=vLrTj7e-0NxZ2U4bWTB57pih42WirqPlUvwtIRfStlQ,5275
|
|
302
302
|
datahub/ingestion/source/gc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
303
303
|
datahub/ingestion/source/gc/datahub_gc.py,sha256=f6Erj3KfD0Hx3ydwL5MUVCZgFzS9c6U2Pkr54JLIUOA,12394
|
|
304
|
-
datahub/ingestion/source/gc/dataprocess_cleanup.py,sha256=
|
|
304
|
+
datahub/ingestion/source/gc/dataprocess_cleanup.py,sha256=CiwUyTHUdrRhvQz0S-63IO6qTkoePxygKNtqdxjwrdM,14468
|
|
305
305
|
datahub/ingestion/source/gc/execution_request_cleanup.py,sha256=cHJmxz4NmA7VjTX2iGEo3wZ_SDrjC_rCQcnRxKgfUVI,8713
|
|
306
|
-
datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py,sha256=
|
|
306
|
+
datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py,sha256=_tms5AqNAJRDRzQmyN_VydzXbdME2lkvTwa5u1La5z8,7353
|
|
307
307
|
datahub/ingestion/source/gcs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
308
308
|
datahub/ingestion/source/gcs/gcs_source.py,sha256=iwvj4JwjyVWRP1Vq106sUtQhh0GuOYVSu9zCa1wCZN0,6189
|
|
309
309
|
datahub/ingestion/source/gcs/gcs_utils.py,sha256=_78KM863XXgkVLmZLtYGF5PJNnZas1go-XRtOq-79lo,1047
|
|
@@ -457,7 +457,7 @@ datahub/ingestion/source/sql/sql_config.py,sha256=M-l_uXau0ODolLZHBzAXhy-Rq5yYxv
|
|
|
457
457
|
datahub/ingestion/source/sql/sql_generic.py,sha256=9AERvkK8kdJUeDOzCYJDb93xdv6Z4DGho0NfeHj5Uyg,2740
|
|
458
458
|
datahub/ingestion/source/sql/sql_generic_profiler.py,sha256=6QbhkQH_F13GV1HsavVTq3BE9F7Pr_vfGOjCX2o2c60,11675
|
|
459
459
|
datahub/ingestion/source/sql/sql_report.py,sha256=19YVvatcCZsBP533HWn0X9Y30jo4TUxSkQ9rYpMQpT4,2487
|
|
460
|
-
datahub/ingestion/source/sql/sql_types.py,sha256=
|
|
460
|
+
datahub/ingestion/source/sql/sql_types.py,sha256=lrJpavRTE7aDVAKOrKZcrp4CsKydiiaza1wt2ieqWzs,15041
|
|
461
461
|
datahub/ingestion/source/sql/sql_utils.py,sha256=w9YFNm_qJNjOcWAWBI_lUoFMbd0wT8q0LoT7Ia71tIE,8100
|
|
462
462
|
datahub/ingestion/source/sql/sqlalchemy_data_reader.py,sha256=FvHZ4JEK3aR2DYOBZiT_ZsAy12RjTu4t_KIR_92B11k,2644
|
|
463
463
|
datahub/ingestion/source/sql/sqlalchemy_uri_mapper.py,sha256=KOpbmDIE2h1hyYEsbVHJi2B7FlsyUMTXZx4diyzltQg,1826
|
|
@@ -519,7 +519,7 @@ datahub/ingestion/transformer/add_dataset_ownership.py,sha256=xuv6qymZceHYOtFIdo
|
|
|
519
519
|
datahub/ingestion/transformer/add_dataset_properties.py,sha256=bNFu-Yv6nmiGuZCgPUnCHBuoDbVPpUTYKc_7zzgWZxg,5605
|
|
520
520
|
datahub/ingestion/transformer/add_dataset_schema_tags.py,sha256=9bCgQNKhu0uGaQoQsfauNCcaBW64DMsSaJbmJfSp-zk,5664
|
|
521
521
|
datahub/ingestion/transformer/add_dataset_schema_terms.py,sha256=-fHMP9xgJDSqAFW8zIP2TUR_XLX-Zcd93IBiSdyigB4,6548
|
|
522
|
-
datahub/ingestion/transformer/add_dataset_tags.py,sha256=
|
|
522
|
+
datahub/ingestion/transformer/add_dataset_tags.py,sha256=eyvQpb0hntSmaP9HSHcIiHLvLFtkEeX-BKmHVtuaF3Y,4737
|
|
523
523
|
datahub/ingestion/transformer/add_dataset_terms.py,sha256=F3DgVFJfm_Ofh4NOt7caKbVx4FQE-tOns17E0qDn43o,5799
|
|
524
524
|
datahub/ingestion/transformer/auto_helper_transformer.py,sha256=MuxoHr0_SPi8LpHbYYOYaEO8d-7XBu2zRn-PDnqrIHU,2843
|
|
525
525
|
datahub/ingestion/transformer/base_transformer.py,sha256=j5HmnplhY1K_7oa9DNHhCegs6eMKndc-VFoT8GGxgpU,12383
|
|
@@ -528,7 +528,7 @@ datahub/ingestion/transformer/dataset_domain_based_on_tags.py,sha256=V_FGZ-H-cRn
|
|
|
528
528
|
datahub/ingestion/transformer/dataset_transformer.py,sha256=dOK0oO6R6dbuxk5i5Za6hkzy8xCEpQxG8iKjXeIZKM4,5305
|
|
529
529
|
datahub/ingestion/transformer/extract_dataset_tags.py,sha256=uCxf7L9AdMjVs7gvq1k1geuxcWDxv0LXEwXj6EQlWSE,2499
|
|
530
530
|
datahub/ingestion/transformer/extract_ownership_from_tags.py,sha256=DQh0dETRzALR1qhN6aKeAs2YPCpquxu3wGU_MSospQs,6381
|
|
531
|
-
datahub/ingestion/transformer/generic_aspect_transformer.py,sha256
|
|
531
|
+
datahub/ingestion/transformer/generic_aspect_transformer.py,sha256=-1g-tIgWPFhCmzTPcV60CIw8SAFD9ML5ai70lMxfXMo,5595
|
|
532
532
|
datahub/ingestion/transformer/mark_dataset_status.py,sha256=mg-BWLxYmiEFBH8ErNcUPd-X4FThMWoloS9Lu15awC8,1323
|
|
533
533
|
datahub/ingestion/transformer/pattern_cleanup_dataset_usage_user.py,sha256=jTURuh6tDJDnelxxsNzmJjyIucMUryEDOLa1i7rb9-o,2422
|
|
534
534
|
datahub/ingestion/transformer/pattern_cleanup_ownership.py,sha256=axZYHbbYGRQmlc8jKdObDt1H3aM3SU9vV8TDJKZCLdw,2932
|
|
@@ -539,7 +539,7 @@ datahub/ingestion/transformer/tags_to_terms.py,sha256=-BC9GeZDz5oPBkaTWmKMNtyEUa
|
|
|
539
539
|
datahub/ingestion/transformer/transform_registry.py,sha256=bartmA1zEaULNy5W1Q7gRF8h5Y57BFC6XNOGfCzh1Zw,251
|
|
540
540
|
datahub/integrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
541
541
|
datahub/integrations/assertion/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
542
|
-
datahub/integrations/assertion/common.py,sha256=
|
|
542
|
+
datahub/integrations/assertion/common.py,sha256=eYE8by2GubKe1xtWgAEKvrO4mqPtfCxL7XWH2jpi3cA,2209
|
|
543
543
|
datahub/integrations/assertion/registry.py,sha256=mmeYpQREFVrLWpLcA0qYZtbrMX4vAGcDoQ59wtMgEcg,307
|
|
544
544
|
datahub/integrations/assertion/snowflake/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
545
545
|
datahub/integrations/assertion/snowflake/compiler.py,sha256=xL7TsGRAiLE53so1g8Xgk_OfPpp4gu6pJYQ5p63EoJI,9947
|
|
@@ -551,7 +551,7 @@ datahub/integrations/assertion/snowflake/metric_sql_generator.py,sha256=7lCSZJ9P
|
|
|
551
551
|
datahub/integrations/great_expectations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
552
552
|
datahub/integrations/great_expectations/action.py,sha256=78ywIwsfmxXbQQ0emou15ziasdr852dDk9qqSolaHac,100
|
|
553
553
|
datahub/lite/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
554
|
-
datahub/lite/duckdb_lite.py,sha256=
|
|
554
|
+
datahub/lite/duckdb_lite.py,sha256=eB4DL_qhfOLmhjmbMSYM37Q4cO5dm1uoVCA9AtENXLA,32712
|
|
555
555
|
datahub/lite/duckdb_lite_config.py,sha256=PGY5Hab_xbbqoA1hf7OKySBJ2JQJaLNKl-4CO39ad3g,157
|
|
556
556
|
datahub/lite/lite_local.py,sha256=Aa-_E9o1y-z8ks9b1JuBeaECdgi6oU8xGb1drPA9Q6E,2846
|
|
557
557
|
datahub/lite/lite_registry.py,sha256=bpH0kasP-LtwwUFNA2QsOIehfekAYfJtN-AkQLmSWnw,286
|
|
@@ -850,10 +850,10 @@ datahub/secret/datahub_secrets_client.py,sha256=WkoJDip7IAKSGDM5oHeZVL8878pd4Bix
|
|
|
850
850
|
datahub/secret/secret_common.py,sha256=PeRFNljPlGfNrmn3VtDVbazQE6J3Q1nA3L-z3cS8LEA,2522
|
|
851
851
|
datahub/secret/secret_store.py,sha256=2VP_Vd336Cy7C-2kwp4rx8MAqtYgtwv8XyzzNTXE5x8,1124
|
|
852
852
|
datahub/specific/__init__.py,sha256=r5RYM5mDnskLzin3vc87HV-9GSz3P6uQw8AlsN14LaI,88
|
|
853
|
-
datahub/specific/chart.py,sha256=
|
|
853
|
+
datahub/specific/chart.py,sha256=DsLA5qHBIMNc1pIZ1AC5kLvwpRDd79Q56N4SANOofps,11324
|
|
854
854
|
datahub/specific/custom_properties.py,sha256=Ob8L9b9QIbUvHfzWo4L-SNY1QSRhgRy30kLRDdenGEs,1024
|
|
855
|
-
datahub/specific/dashboard.py,sha256=
|
|
856
|
-
datahub/specific/datajob.py,sha256=
|
|
855
|
+
datahub/specific/dashboard.py,sha256=kRfyJsm7piugxBg0IfIbLmvv6Smk3D44IGVw8THLqPE,15100
|
|
856
|
+
datahub/specific/datajob.py,sha256=Yp_LSy12ogbz9KYKTkdg6J9ScaFgg-o5--VkRfC1qRo,18793
|
|
857
857
|
datahub/specific/dataproduct.py,sha256=Mt-QlndY4Die87XwakYTAcvyDzaB5fmyn1NpQGGcZyI,5235
|
|
858
858
|
datahub/specific/dataset.py,sha256=TAI8SRhhhsv1zEi3lGv24NX6PTJDrEyt5v0Sdg-uFY8,13568
|
|
859
859
|
datahub/specific/form.py,sha256=jVI0JD-o2-XkD1suW_ITnTZUF0GNbGjaNb9-PXdfdkA,4549
|
|
@@ -939,7 +939,7 @@ datahub/utilities/yaml_sync_utils.py,sha256=65IEe8quW3_zHCR8CyoDkZyopeZJazU-IyMr
|
|
|
939
939
|
datahub/utilities/registries/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
940
940
|
datahub/utilities/registries/domain_registry.py,sha256=0SfcZNop-PXBbl-AWw92vAyb28i0YXTr-TKdBwixmOw,2452
|
|
941
941
|
datahub/utilities/urns/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
942
|
-
datahub/utilities/urns/_urn_base.py,sha256=
|
|
942
|
+
datahub/utilities/urns/_urn_base.py,sha256=IOYjC3NPn0DlB26cPdyihFAah3mUc4CHpS2dVM65ctI,9295
|
|
943
943
|
datahub/utilities/urns/corp_group_urn.py,sha256=6H5Q6nZvAXu80IZBDCeM8xo_9ap9pgwtyi60QXx3hzY,75
|
|
944
944
|
datahub/utilities/urns/corpuser_urn.py,sha256=h-Yh-9QRbtQOhxxzxEBc7skoavpGaKDKVNrsxSXZ1yQ,88
|
|
945
945
|
datahub/utilities/urns/data_flow_urn.py,sha256=w1Z7ET1L1OtYD1w-xiUYtyCczsxZZ1l3LRyTRv5NdpE,73
|
|
@@ -951,7 +951,7 @@ datahub/utilities/urns/domain_urn.py,sha256=wfpQx33jRtq0TGx2JVIZGJJf_L_BYeNn1RCE
|
|
|
951
951
|
datahub/utilities/urns/error.py,sha256=1MMg3UyN4rQSdka1O0489rsZ7t5Hs2aDxIaHyPbE80E,43
|
|
952
952
|
datahub/utilities/urns/field_paths.py,sha256=ra-o_fMGkBRLgzMewAJN5-HqAyo3PIpXQ0KbHeymjU4,521
|
|
953
953
|
datahub/utilities/urns/notebook_urn.py,sha256=CHqGrV45ReVODlFx7js2WUxjcXxt8B63-xsBZpujmtY,73
|
|
954
|
-
datahub/utilities/urns/structured_properties_urn.py,sha256=
|
|
954
|
+
datahub/utilities/urns/structured_properties_urn.py,sha256=fjA1Ysg7IQSly8IVYx1R8HnwnojQz6jZWbqfk_XVvno,271
|
|
955
955
|
datahub/utilities/urns/tag_urn.py,sha256=MqEJdIaCnAyjYe_8VdNnUjOVV4TS8xMlv4pRsy8wwXY,63
|
|
956
956
|
datahub/utilities/urns/urn.py,sha256=B4nYxiFT8s5DLA2NJsWg0KoiUDp9UWg1nvL0j7Sx-h8,218
|
|
957
957
|
datahub/utilities/urns/urn_iter.py,sha256=m5--PO-Oohw_BQXUCW1z-Ku3vtTcT81AxGLDkMiTaAs,4734
|
|
@@ -974,8 +974,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
974
974
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
975
975
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
976
976
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
977
|
-
acryl_datahub-0.15.
|
|
978
|
-
acryl_datahub-0.15.
|
|
979
|
-
acryl_datahub-0.15.
|
|
980
|
-
acryl_datahub-0.15.
|
|
981
|
-
acryl_datahub-0.15.
|
|
977
|
+
acryl_datahub-0.15.0rc8.dist-info/METADATA,sha256=vcaeap-Oa5mvYQ2wgEU68vacyO2hz4Esi5FQE4D82DQ,172484
|
|
978
|
+
acryl_datahub-0.15.0rc8.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
979
|
+
acryl_datahub-0.15.0rc8.dist-info/entry_points.txt,sha256=Yj0PWB0LQOq4Rj2fyR6ETx4BUGw4TOcNL0ZNoAZ9kQg,9504
|
|
980
|
+
acryl_datahub-0.15.0rc8.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
981
|
+
acryl_datahub-0.15.0rc8.dist-info/RECORD,,
|
datahub/__init__.py
CHANGED
|
@@ -121,7 +121,7 @@ class StructuredProperties(ConfigModel):
|
|
|
121
121
|
return (
|
|
122
122
|
self.qualified_name
|
|
123
123
|
or self.id
|
|
124
|
-
or Urn.
|
|
124
|
+
or Urn.from_string(self.urn).get_entity_id()[0]
|
|
125
125
|
)
|
|
126
126
|
|
|
127
127
|
@validator("urn", pre=True, always=True)
|
datahub/cli/put_cli.py
CHANGED
|
@@ -105,7 +105,7 @@ def platform(
|
|
|
105
105
|
"""
|
|
106
106
|
|
|
107
107
|
if name.startswith(f"urn:li:{DataPlatformUrn.ENTITY_TYPE}"):
|
|
108
|
-
platform_urn = DataPlatformUrn.
|
|
108
|
+
platform_urn = DataPlatformUrn.from_string(name)
|
|
109
109
|
platform_name = platform_urn.get_entity_id_as_string()
|
|
110
110
|
else:
|
|
111
111
|
platform_name = name.lower()
|
|
@@ -45,7 +45,7 @@ def _get_owner_urn(maybe_urn: str) -> str:
|
|
|
45
45
|
|
|
46
46
|
def _abort_if_non_existent_urn(graph: DataHubGraph, urn: str, operation: str) -> None:
|
|
47
47
|
try:
|
|
48
|
-
parsed_urn: Urn = Urn.
|
|
48
|
+
parsed_urn: Urn = Urn.from_string(urn)
|
|
49
49
|
entity_type = parsed_urn.get_type()
|
|
50
50
|
except Exception:
|
|
51
51
|
click.secho(f"Provided urn {urn} does not seem valid", fg="red")
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import json
|
|
2
|
+
import time
|
|
2
3
|
from collections import defaultdict
|
|
3
4
|
from dataclasses import dataclass
|
|
4
5
|
from typing import Any, Dict, Iterable, List, Optional, Sequence, Union
|
|
@@ -6,12 +7,15 @@ from typing import Any, Dict, Iterable, List, Optional, Sequence, Union
|
|
|
6
7
|
from datahub.emitter.aspect import JSON_PATCH_CONTENT_TYPE
|
|
7
8
|
from datahub.emitter.serialization_helper import pre_json_transform
|
|
8
9
|
from datahub.metadata.schema_classes import (
|
|
10
|
+
AuditStampClass,
|
|
9
11
|
ChangeTypeClass,
|
|
12
|
+
EdgeClass,
|
|
10
13
|
GenericAspectClass,
|
|
11
14
|
KafkaAuditHeaderClass,
|
|
12
15
|
MetadataChangeProposalClass,
|
|
13
16
|
SystemMetadataClass,
|
|
14
17
|
)
|
|
18
|
+
from datahub.metadata.urns import Urn
|
|
15
19
|
from datahub.utilities.urns.urn import guess_entity_type
|
|
16
20
|
|
|
17
21
|
|
|
@@ -89,3 +93,42 @@ class MetadataPatchProposal:
|
|
|
89
93
|
)
|
|
90
94
|
for aspect_name, patches in self.patches.items()
|
|
91
95
|
]
|
|
96
|
+
|
|
97
|
+
@classmethod
|
|
98
|
+
def _mint_auditstamp(cls, message: Optional[str] = None) -> AuditStampClass:
|
|
99
|
+
"""
|
|
100
|
+
Creates an AuditStampClass instance with the current timestamp and other default values.
|
|
101
|
+
|
|
102
|
+
Args:
|
|
103
|
+
message: The message associated with the audit stamp (optional).
|
|
104
|
+
|
|
105
|
+
Returns:
|
|
106
|
+
An instance of AuditStampClass.
|
|
107
|
+
"""
|
|
108
|
+
return AuditStampClass(
|
|
109
|
+
time=int(time.time() * 1000.0),
|
|
110
|
+
actor="urn:li:corpuser:datahub",
|
|
111
|
+
message=message,
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
@classmethod
|
|
115
|
+
def _ensure_urn_type(
|
|
116
|
+
cls, entity_type: str, edges: List[EdgeClass], context: str
|
|
117
|
+
) -> None:
|
|
118
|
+
"""
|
|
119
|
+
Ensures that the destination URNs in the given edges have the specified entity type.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
entity_type: The entity type to check against.
|
|
123
|
+
edges: A list of Edge objects.
|
|
124
|
+
context: The context or description of the operation.
|
|
125
|
+
|
|
126
|
+
Raises:
|
|
127
|
+
ValueError: If any of the destination URNs is not of the specified entity type.
|
|
128
|
+
"""
|
|
129
|
+
for e in edges:
|
|
130
|
+
urn = Urn.from_string(e.destinationUrn)
|
|
131
|
+
if not urn.entity_type == entity_type:
|
|
132
|
+
raise ValueError(
|
|
133
|
+
f"{context}: {e.destinationUrn} is not of type {entity_type}"
|
|
134
|
+
)
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import textwrap
|
|
1
3
|
from dataclasses import dataclass
|
|
2
4
|
from typing import TYPE_CHECKING, Iterable, List
|
|
3
5
|
|
|
@@ -28,6 +30,8 @@ if TYPE_CHECKING:
|
|
|
28
30
|
FeatureGroupSummaryTypeDef,
|
|
29
31
|
)
|
|
30
32
|
|
|
33
|
+
logger = logging.getLogger(__name__)
|
|
34
|
+
|
|
31
35
|
|
|
32
36
|
@dataclass
|
|
33
37
|
class FeatureGroupProcessor:
|
|
@@ -197,11 +201,12 @@ class FeatureGroupProcessor:
|
|
|
197
201
|
|
|
198
202
|
full_table_name = f"{glue_database}.{glue_table}"
|
|
199
203
|
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
204
|
+
logging.info(
|
|
205
|
+
textwrap.dedent(
|
|
206
|
+
f"""Note: table {full_table_name} is an AWS Glue object. This source does not ingest all metadata for Glue tables.
|
|
203
207
|
To view full table metadata, run Glue ingestion
|
|
204
|
-
(see https://datahubproject.io/docs/
|
|
208
|
+
(see https://datahubproject.io/docs/generated/ingestion/sources/glue)"""
|
|
209
|
+
)
|
|
205
210
|
)
|
|
206
211
|
|
|
207
212
|
feature_sources.append(
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import logging
|
|
1
2
|
from collections import defaultdict
|
|
2
3
|
from dataclasses import dataclass, field
|
|
3
4
|
from datetime import datetime
|
|
@@ -65,6 +66,8 @@ ENDPOINT_STATUS_MAP: Dict[str, str] = {
|
|
|
65
66
|
"Unknown": DeploymentStatusClass.UNKNOWN,
|
|
66
67
|
}
|
|
67
68
|
|
|
69
|
+
logger = logging.getLogger(__name__)
|
|
70
|
+
|
|
68
71
|
|
|
69
72
|
@dataclass
|
|
70
73
|
class ModelProcessor:
|
|
@@ -385,6 +388,26 @@ class ModelProcessor:
|
|
|
385
388
|
model_metrics,
|
|
386
389
|
)
|
|
387
390
|
|
|
391
|
+
@staticmethod
|
|
392
|
+
def get_group_name_from_arn(arn: str) -> str:
|
|
393
|
+
"""
|
|
394
|
+
Extract model package group name from a SageMaker ARN.
|
|
395
|
+
|
|
396
|
+
Args:
|
|
397
|
+
arn (str): Full ARN of the model package group
|
|
398
|
+
|
|
399
|
+
Returns:
|
|
400
|
+
str: Name of the model package group
|
|
401
|
+
|
|
402
|
+
Example:
|
|
403
|
+
>>> ModelProcessor.get_group_name_from_arn("arn:aws:sagemaker:eu-west-1:123456789:model-package-group/my-model-group")
|
|
404
|
+
'my-model-group'
|
|
405
|
+
"""
|
|
406
|
+
logger.debug(
|
|
407
|
+
f"Extracting group name from ARN: {arn} because group was not seen before"
|
|
408
|
+
)
|
|
409
|
+
return arn.split("/")[-1]
|
|
410
|
+
|
|
388
411
|
def get_model_wu(
|
|
389
412
|
self,
|
|
390
413
|
model_details: "DescribeModelOutputTypeDef",
|
|
@@ -425,8 +448,14 @@ class ModelProcessor:
|
|
|
425
448
|
model_group_arns = model_uri_groups | model_image_groups
|
|
426
449
|
|
|
427
450
|
model_group_names = sorted(
|
|
428
|
-
[
|
|
451
|
+
[
|
|
452
|
+
self.group_arn_to_name[x]
|
|
453
|
+
if x in self.group_arn_to_name
|
|
454
|
+
else self.get_group_name_from_arn(x)
|
|
455
|
+
for x in model_group_arns
|
|
456
|
+
]
|
|
429
457
|
)
|
|
458
|
+
|
|
430
459
|
model_group_urns = [
|
|
431
460
|
builder.make_ml_model_group_urn("sagemaker", x, self.env)
|
|
432
461
|
for x in model_group_names
|
|
@@ -190,7 +190,7 @@ class BigQueryTableRef:
|
|
|
190
190
|
@classmethod
|
|
191
191
|
def from_urn(cls, urn: str) -> "BigQueryTableRef":
|
|
192
192
|
"""Raises: ValueError if urn is not a valid BigQuery table URN."""
|
|
193
|
-
dataset_urn = DatasetUrn.
|
|
193
|
+
dataset_urn = DatasetUrn.from_string(urn)
|
|
194
194
|
split = dataset_urn.name.rsplit(".", 3)
|
|
195
195
|
if len(split) == 3:
|
|
196
196
|
project, dataset, table = split
|
|
@@ -653,7 +653,7 @@ class CSVEnricherSource(Source):
|
|
|
653
653
|
|
|
654
654
|
is_resource_row: bool = not row["subresource"]
|
|
655
655
|
entity_urn = row["resource"]
|
|
656
|
-
entity_type = Urn.
|
|
656
|
+
entity_type = Urn.from_string(row["resource"]).get_type()
|
|
657
657
|
|
|
658
658
|
term_associations: List[
|
|
659
659
|
GlossaryTermAssociationClass
|
|
@@ -396,10 +396,12 @@ class DremioSource(StatefulIngestionSourceBase):
|
|
|
396
396
|
):
|
|
397
397
|
yield dremio_mcp
|
|
398
398
|
# Check if the emitted aspect is SchemaMetadataClass
|
|
399
|
-
if isinstance(
|
|
399
|
+
if isinstance(
|
|
400
|
+
dremio_mcp.metadata, MetadataChangeProposalWrapper
|
|
401
|
+
) and isinstance(dremio_mcp.metadata.aspect, SchemaMetadataClass):
|
|
400
402
|
self.sql_parsing_aggregator.register_schema(
|
|
401
403
|
urn=dataset_urn,
|
|
402
|
-
schema=dremio_mcp.metadata,
|
|
404
|
+
schema=dremio_mcp.metadata.aspect,
|
|
403
405
|
)
|
|
404
406
|
|
|
405
407
|
if dataset_info.dataset_type == DremioDatasetType.VIEW:
|
|
@@ -227,7 +227,7 @@ def collapse_name(name: str, collapse_urns: CollapseUrns) -> str:
|
|
|
227
227
|
def collapse_urn(urn: str, collapse_urns: CollapseUrns) -> str:
|
|
228
228
|
if len(collapse_urns.urns_suffix_regex) == 0:
|
|
229
229
|
return urn
|
|
230
|
-
urn_obj = DatasetUrn.
|
|
230
|
+
urn_obj = DatasetUrn.from_string(urn)
|
|
231
231
|
name = collapse_name(name=urn_obj.get_dataset_name(), collapse_urns=collapse_urns)
|
|
232
232
|
data_platform_urn = urn_obj.get_data_platform_urn()
|
|
233
233
|
return str(
|
|
@@ -114,11 +114,11 @@ class DataProcessCleanupConfig(ConfigModel):
|
|
|
114
114
|
)
|
|
115
115
|
|
|
116
116
|
delete_empty_data_jobs: bool = Field(
|
|
117
|
-
True, description="
|
|
117
|
+
True, description="Whether to delete Data Jobs without runs"
|
|
118
118
|
)
|
|
119
119
|
|
|
120
120
|
delete_empty_data_flows: bool = Field(
|
|
121
|
-
True, description="
|
|
121
|
+
True, description="Whether to delete Data Flows without runs"
|
|
122
122
|
)
|
|
123
123
|
|
|
124
124
|
hard_delete_entities: bool = Field(
|
|
@@ -128,7 +128,7 @@ class DataProcessCleanupConfig(ConfigModel):
|
|
|
128
128
|
|
|
129
129
|
batch_size: int = Field(
|
|
130
130
|
500,
|
|
131
|
-
description="The number of entities to get in a batch from
|
|
131
|
+
description="The number of entities to get in a batch from API",
|
|
132
132
|
)
|
|
133
133
|
|
|
134
134
|
max_workers: int = Field(
|
|
@@ -173,9 +173,9 @@ class DataProcessCleanup:
|
|
|
173
173
|
"""
|
|
174
174
|
This source is a maintenance source which cleans up old/unused aspects.
|
|
175
175
|
|
|
176
|
-
Currently it only supports
|
|
176
|
+
Currently it only supports:
|
|
177
177
|
- DataFlow
|
|
178
|
-
-DataJob
|
|
178
|
+
- DataJob
|
|
179
179
|
- DataProcessInstance
|
|
180
180
|
|
|
181
181
|
"""
|
|
@@ -267,7 +267,7 @@ class DataProcessCleanup:
|
|
|
267
267
|
|
|
268
268
|
if self.dry_run:
|
|
269
269
|
logger.info(
|
|
270
|
-
f"Dry run is on otherwise it would have deleted {urn} with hard deletion is{self.config.hard_delete_entities}"
|
|
270
|
+
f"Dry run is on otherwise it would have deleted {urn} with hard deletion is {self.config.hard_delete_entities}"
|
|
271
271
|
)
|
|
272
272
|
return
|
|
273
273
|
|
|
@@ -277,7 +277,12 @@ class DataProcessCleanup:
|
|
|
277
277
|
assert self.ctx.graph
|
|
278
278
|
|
|
279
279
|
dpis = self.fetch_dpis(job.urn, self.config.batch_size)
|
|
280
|
-
dpis.sort(
|
|
280
|
+
dpis.sort(
|
|
281
|
+
key=lambda x: x["created"]["time"]
|
|
282
|
+
if x["created"] and x["created"]["time"]
|
|
283
|
+
else 0,
|
|
284
|
+
reverse=True,
|
|
285
|
+
)
|
|
281
286
|
|
|
282
287
|
with ThreadPoolExecutor(max_workers=self.config.max_workers) as executor:
|
|
283
288
|
if self.config.keep_last_n:
|
|
@@ -104,7 +104,7 @@ class SoftDeletedEntitiesCleanup:
|
|
|
104
104
|
def delete_entity(self, urn: str) -> None:
|
|
105
105
|
assert self.ctx.graph
|
|
106
106
|
|
|
107
|
-
entity_urn = Urn.
|
|
107
|
+
entity_urn = Urn.from_string(urn)
|
|
108
108
|
self.report.num_soft_deleted_entity_removed += 1
|
|
109
109
|
self.report.num_soft_deleted_entity_removed_by_type[entity_urn.entity_type] = (
|
|
110
110
|
self.report.num_soft_deleted_entity_removed_by_type.get(
|
|
@@ -57,7 +57,11 @@ from datahub.ingestion.source.profiling.common import (
|
|
|
57
57
|
convert_to_cardinality,
|
|
58
58
|
)
|
|
59
59
|
from datahub.ingestion.source.sql.sql_report import SQLSourceReport
|
|
60
|
-
from datahub.
|
|
60
|
+
from datahub.ingestion.source.sql.sql_types import resolve_sql_type
|
|
61
|
+
from datahub.metadata.com.linkedin.pegasus2avro.schema import (
|
|
62
|
+
EditableSchemaMetadata,
|
|
63
|
+
NumberType,
|
|
64
|
+
)
|
|
61
65
|
from datahub.metadata.schema_classes import (
|
|
62
66
|
DatasetFieldProfileClass,
|
|
63
67
|
DatasetProfileClass,
|
|
@@ -361,6 +365,8 @@ class _SingleDatasetProfiler(BasicDatasetProfilerBase):
|
|
|
361
365
|
platform: str
|
|
362
366
|
env: str
|
|
363
367
|
|
|
368
|
+
column_types: Dict[str, str] = dataclasses.field(default_factory=dict)
|
|
369
|
+
|
|
364
370
|
def _get_columns_to_profile(self) -> List[str]:
|
|
365
371
|
if not self.config.any_field_level_metrics_enabled():
|
|
366
372
|
return []
|
|
@@ -374,6 +380,7 @@ class _SingleDatasetProfiler(BasicDatasetProfilerBase):
|
|
|
374
380
|
|
|
375
381
|
for col_dict in self.dataset.columns:
|
|
376
382
|
col = col_dict["name"]
|
|
383
|
+
self.column_types[col] = str(col_dict["type"])
|
|
377
384
|
# We expect the allow/deny patterns to specify '<table_pattern>.<column_pattern>'
|
|
378
385
|
if not self.config._allow_deny_patterns.allowed(
|
|
379
386
|
f"{self.dataset_name}.{col}"
|
|
@@ -430,6 +437,21 @@ class _SingleDatasetProfiler(BasicDatasetProfilerBase):
|
|
|
430
437
|
self.dataset, column
|
|
431
438
|
)
|
|
432
439
|
|
|
440
|
+
if column_spec.type_ == ProfilerDataType.UNKNOWN:
|
|
441
|
+
try:
|
|
442
|
+
datahub_field_type = resolve_sql_type(
|
|
443
|
+
self.column_types[column], self.dataset.engine.dialect.name.lower()
|
|
444
|
+
)
|
|
445
|
+
except Exception as e:
|
|
446
|
+
logger.debug(
|
|
447
|
+
f"Error resolving sql type {self.column_types[column]}: {e}"
|
|
448
|
+
)
|
|
449
|
+
datahub_field_type = None
|
|
450
|
+
if datahub_field_type is None:
|
|
451
|
+
return
|
|
452
|
+
if isinstance(datahub_field_type, NumberType):
|
|
453
|
+
column_spec.type_ = ProfilerDataType.NUMERIC
|
|
454
|
+
|
|
433
455
|
@_run_with_query_combiner
|
|
434
456
|
def _get_column_cardinality(
|
|
435
457
|
self, column_spec: _SingleColumnSpec, column: str
|
|
@@ -276,7 +276,6 @@ def resolve_vertica_modified_type(type_string: str) -> Any:
|
|
|
276
276
|
return VERTICA_SQL_TYPES_MAP[type_string]
|
|
277
277
|
|
|
278
278
|
|
|
279
|
-
# see https://docs.snowflake.com/en/sql-reference/intro-summary-data-types.html
|
|
280
279
|
SNOWFLAKE_TYPES_MAP: Dict[str, Any] = {
|
|
281
280
|
"NUMBER": NumberType,
|
|
282
281
|
"DECIMAL": NumberType,
|
|
@@ -312,6 +311,18 @@ SNOWFLAKE_TYPES_MAP: Dict[str, Any] = {
|
|
|
312
311
|
"GEOGRAPHY": None,
|
|
313
312
|
}
|
|
314
313
|
|
|
314
|
+
|
|
315
|
+
def resolve_snowflake_modified_type(type_string: str) -> Any:
|
|
316
|
+
# Match types with precision and scale, e.g., 'DECIMAL(38,0)'
|
|
317
|
+
match = re.match(r"([a-zA-Z_]+)\(\d+,\s\d+\)", type_string)
|
|
318
|
+
if match:
|
|
319
|
+
modified_type_base = match.group(1) # Extract the base type
|
|
320
|
+
return SNOWFLAKE_TYPES_MAP.get(modified_type_base, None)
|
|
321
|
+
|
|
322
|
+
# Fallback for types without precision/scale
|
|
323
|
+
return SNOWFLAKE_TYPES_MAP.get(type_string, None)
|
|
324
|
+
|
|
325
|
+
|
|
315
326
|
# see https://github.com/googleapis/python-bigquery-sqlalchemy/blob/main/sqlalchemy_bigquery/_types.py#L32
|
|
316
327
|
BIGQUERY_TYPES_MAP: Dict[str, Any] = {
|
|
317
328
|
"STRING": StringType,
|
|
@@ -380,6 +391,7 @@ TRINO_SQL_TYPES_MAP: Dict[str, Any] = {
|
|
|
380
391
|
"row": RecordType,
|
|
381
392
|
"map": MapType,
|
|
382
393
|
"array": ArrayType,
|
|
394
|
+
"json": RecordType,
|
|
383
395
|
}
|
|
384
396
|
|
|
385
397
|
# https://docs.aws.amazon.com/athena/latest/ug/data-types.html
|
|
@@ -490,7 +502,7 @@ def resolve_sql_type(
|
|
|
490
502
|
TypeClass = resolve_vertica_modified_type(column_type)
|
|
491
503
|
elif platform == "snowflake":
|
|
492
504
|
# Snowflake types are uppercase, so we check that.
|
|
493
|
-
TypeClass =
|
|
505
|
+
TypeClass = resolve_snowflake_modified_type(column_type.upper())
|
|
494
506
|
|
|
495
507
|
if TypeClass:
|
|
496
508
|
return TypeClass()
|
|
@@ -74,7 +74,7 @@ class AddDatasetTags(DatasetTagsTransformer):
|
|
|
74
74
|
logger.debug("Generating tags")
|
|
75
75
|
|
|
76
76
|
for tag_association in self.processed_tags.values():
|
|
77
|
-
tag_urn = TagUrn.
|
|
77
|
+
tag_urn = TagUrn.from_string(tag_association.tag)
|
|
78
78
|
mcps.append(
|
|
79
79
|
MetadataChangeProposalWrapper(
|
|
80
80
|
entityUrn=tag_urn.urn(),
|
|
@@ -100,7 +100,7 @@ class GenericAspectTransformer(
|
|
|
100
100
|
)
|
|
101
101
|
if transformed_aspect:
|
|
102
102
|
# for end of stream records, we modify the workunit-id
|
|
103
|
-
structured_urn = Urn.
|
|
103
|
+
structured_urn = Urn.from_string(urn)
|
|
104
104
|
simple_name = "-".join(structured_urn.get_entity_id())
|
|
105
105
|
record_metadata = envelope.metadata.copy()
|
|
106
106
|
record_metadata.update(
|
|
@@ -42,7 +42,7 @@ def get_entity_name(assertion: BaseEntityAssertion) -> Tuple[str, str, str]:
|
|
|
42
42
|
if qualified_name is not None:
|
|
43
43
|
parts = qualified_name.split(".")
|
|
44
44
|
else:
|
|
45
|
-
urn_id = Urn.
|
|
45
|
+
urn_id = Urn.from_string(assertion.entity).entity_ids[1]
|
|
46
46
|
parts = urn_id.split(".")
|
|
47
47
|
if len(parts) > 3:
|
|
48
48
|
parts = parts[-3:]
|