acryl-datahub 0.15.0rc6__py3-none-any.whl → 0.15.0rc8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (29) hide show
  1. {acryl_datahub-0.15.0rc6.dist-info → acryl_datahub-0.15.0rc8.dist-info}/METADATA +2433 -2433
  2. {acryl_datahub-0.15.0rc6.dist-info → acryl_datahub-0.15.0rc8.dist-info}/RECORD +29 -29
  3. datahub/__init__.py +1 -1
  4. datahub/api/entities/structuredproperties/structuredproperties.py +1 -1
  5. datahub/cli/put_cli.py +1 -1
  6. datahub/cli/specific/dataproduct_cli.py +1 -1
  7. datahub/emitter/mcp_patch_builder.py +43 -0
  8. datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +9 -4
  9. datahub/ingestion/source/aws/sagemaker_processors/models.py +30 -1
  10. datahub/ingestion/source/bigquery_v2/bigquery_audit.py +1 -1
  11. datahub/ingestion/source/csv_enricher.py +1 -1
  12. datahub/ingestion/source/dremio/dremio_source.py +4 -2
  13. datahub/ingestion/source/elastic_search.py +1 -1
  14. datahub/ingestion/source/gc/dataprocess_cleanup.py +12 -7
  15. datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +1 -1
  16. datahub/ingestion/source/ge_data_profiler.py +23 -1
  17. datahub/ingestion/source/sql/sql_types.py +14 -2
  18. datahub/ingestion/transformer/add_dataset_tags.py +1 -1
  19. datahub/ingestion/transformer/generic_aspect_transformer.py +1 -1
  20. datahub/integrations/assertion/common.py +1 -1
  21. datahub/lite/duckdb_lite.py +12 -17
  22. datahub/specific/chart.py +0 -39
  23. datahub/specific/dashboard.py +0 -39
  24. datahub/specific/datajob.py +3 -47
  25. datahub/utilities/urns/_urn_base.py +1 -1
  26. datahub/utilities/urns/structured_properties_urn.py +1 -1
  27. {acryl_datahub-0.15.0rc6.dist-info → acryl_datahub-0.15.0rc8.dist-info}/WHEEL +0 -0
  28. {acryl_datahub-0.15.0rc6.dist-info → acryl_datahub-0.15.0rc8.dist-info}/entry_points.txt +0 -0
  29. {acryl_datahub-0.15.0rc6.dist-info → acryl_datahub-0.15.0rc8.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
1
- datahub/__init__.py,sha256=n6y65QeCMsW4IG81drsaM-il8e5WBN_r5cUcIbgDNm8,574
1
+ datahub/__init__.py,sha256=NMZEhV_gq-gUecvUHX3Sa6_GbgxJuCJ7XuBUcPhuIRs,574
2
2
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
3
3
  datahub/entrypoints.py,sha256=3-qSfXAx3Z0FEkBV5tlO8fQr4xk4ySeDRMVTpS5Xd6A,7793
4
4
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -52,7 +52,7 @@ datahub/api/entities/forms/forms_graphql_constants.py,sha256=DKpnKlMKTjmnyrCTvp6
52
52
  datahub/api/entities/platformresource/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
53
53
  datahub/api/entities/platformresource/platform_resource.py,sha256=pVAjv6NoH746Mfvdak7ji0eqlEcEeV-Ji7M5gyNXmds,10603
54
54
  datahub/api/entities/structuredproperties/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
55
- datahub/api/entities/structuredproperties/structuredproperties.py,sha256=mqSvD_EtGs-G7tXQpcZ3hweFCrnWubdJnoDYKr7J0Pk,9274
55
+ datahub/api/entities/structuredproperties/structuredproperties.py,sha256=UeNPicCmrYJThv0msUlYUNArHGcjdc_0EX5yLijc_Ao,9267
56
56
  datahub/api/graphql/__init__.py,sha256=5yl0dJxO-2d_QuykdJrDIbWq4ja9bo0t2dAEh89JOog,142
57
57
  datahub/api/graphql/assertion.py,sha256=ponITypRQ8vE8kiqRNpvdoniNJzi4aeBK97UvkF0VhA,2818
58
58
  datahub/api/graphql/base.py,sha256=9q637r6v-RGOd8Mk8HW2g0vt9zpqFexsQ5R6TPEHVbs,1614
@@ -72,7 +72,7 @@ datahub/cli/json_file.py,sha256=nWo-VVthaaW4Do1eUqgrzk0fShb29MjiKXvZVOTq76c,943
72
72
  datahub/cli/lite_cli.py,sha256=UmlMMquce6lHiPaKUBBT0XQtqR9SHEmrGlJyKV9YY60,13030
73
73
  datahub/cli/migrate.py,sha256=p42vixwKzi9OHQnIa0K2FxwGvt-1OxXeuYGJzfu5Sqo,17939
74
74
  datahub/cli/migration_utils.py,sha256=0qHo_9eSR4buyV_K_tdcHSLBufKphBWwwwT1iK_I4S8,9382
75
- datahub/cli/put_cli.py,sha256=Dq1q2i3Fc-1tn08SdT4HRrG9FTZ4HInoY3miZawAb8o,3848
75
+ datahub/cli/put_cli.py,sha256=4ol9aLdidX1VXjVxMG2tkfEMPyjLpgOk2pfl0Gvb8iU,3841
76
76
  datahub/cli/quickstart_versioning.py,sha256=MyWvw92s4b84wIEizjSUZjoMClwLbhpgMdHeDav-x2o,5713
77
77
  datahub/cli/state_cli.py,sha256=TkIzohZOJYBowniJpTipPxiQTziErVeZCxq197GlvUQ,1142
78
78
  datahub/cli/telemetry.py,sha256=xw3SiAn2je48Qv4kXPYN5EPVKHWEWZc2LGoF7UzGs8U,489
@@ -80,7 +80,7 @@ datahub/cli/timeline_cli.py,sha256=kxs-kJNFxDT27RQeh2i3WiQ6a8SYkPeJbksWUc8kUhc,7
80
80
  datahub/cli/specific/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
81
81
  datahub/cli/specific/assertions_cli.py,sha256=I4_MCwYoWDturxYvdRuA-MbSvYMeM0yFQm9Bdj_4Uj4,5375
82
82
  datahub/cli/specific/datacontract_cli.py,sha256=IkBovwuPT5jNB8X-8AQJRO4C9cFSNm1at8v4YctLFgQ,2531
83
- datahub/cli/specific/dataproduct_cli.py,sha256=2LfUxiaHeVnW2XlY40TNCAVVWIWEcnkd9_fFzD2lVhY,15088
83
+ datahub/cli/specific/dataproduct_cli.py,sha256=c22W35wuTiZR4MKHP-P6ER5ABok1tooV1JzECFhR9uY,15081
84
84
  datahub/cli/specific/dataset_cli.py,sha256=AwSmIiuV3XbgprW4_1Wj-EJq1OPqFyolSNczQm5BROs,3441
85
85
  datahub/cli/specific/file_loader.py,sha256=YMyv_evdKyHSft5Tm_kOcqJ4ALpRmMm54ZJAyl7Nxqs,773
86
86
  datahub/cli/specific/forms_cli.py,sha256=OLVeG8NtK1eDBuUKCT5Ald35np8__f8mLzbZM_zUfWU,1484
@@ -117,7 +117,7 @@ datahub/emitter/kafka_emitter.py,sha256=Uix1W1WaXF8VqUTUfzdRZKca2XrR1w50Anx2LVkR
117
117
  datahub/emitter/mce_builder.py,sha256=CMES7gTj4w_kZhOsWuxbLwdz5e2tdiilibnZXstRGhw,16117
118
118
  datahub/emitter/mcp.py,sha256=hAAYziDdkwjazQU0DtWMbQWY8wS09ACrKJbqxoWXdgc,9637
119
119
  datahub/emitter/mcp_builder.py,sha256=ju-1dZMKs5dlWcTi4zcNRVmhkfhmfX3JFULZSbgxSFs,9968
120
- datahub/emitter/mcp_patch_builder.py,sha256=AcWoeZmK_2AlCLOLLGrmwLT5GHZvUK9n05S8Df-saFk,2874
120
+ datahub/emitter/mcp_patch_builder.py,sha256=W85q1maVUMpOIo5lwLRn82rLXRVoZ_gurl_a-pvVCpE,4291
121
121
  datahub/emitter/request_helper.py,sha256=33ORG3S3OVy97_jlWBRn7yUM5XCIkRN6WSdJvN7Ofcg,670
122
122
  datahub/emitter/rest_emitter.py,sha256=rIWqEJjcSIM16_8DXqNqZ_h5s_nj46DTiyRKA5EQHXQ,15021
123
123
  datahub/emitter/serialization_helper.py,sha256=q12Avmf70Vy4ttQGMJoTKlE5EsybMKNg2w3MQeZiHvk,3652
@@ -184,12 +184,12 @@ datahub/ingestion/sink/file.py,sha256=SxXJPJpkIGoaqRjCcSmj2ZE3xE4rLlBABBGwpTj5LW
184
184
  datahub/ingestion/sink/sink_registry.py,sha256=JRBWx8qEYg0ubSTyhqwgSWctgxwyp6fva9GoN2LwBao,490
185
185
  datahub/ingestion/source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
186
186
  datahub/ingestion/source/confluent_schema_registry.py,sha256=_h9D8bUXoaGcwgwB94dX6aTyLY5ve7XGdcVFSJHGSJc,18804
187
- datahub/ingestion/source/csv_enricher.py,sha256=xjCbcsSMM8l_ASCRAnNsUGKuYMrD1lec19Waixub1EM,29498
187
+ datahub/ingestion/source/csv_enricher.py,sha256=AIxQFkmSzFgCa_Fzt2EiFMyojQMFKmnPt878WypSPa4,29491
188
188
  datahub/ingestion/source/demo_data.py,sha256=yzA_R-wfSX2WPz0i5ukYlscpmpb0Pt8D7EkhtKfftvo,1286
189
- datahub/ingestion/source/elastic_search.py,sha256=qFUVNzynTVJTabASTjGMu8Qhf9UpNbEtSBFjaPQjBJE,22641
189
+ datahub/ingestion/source/elastic_search.py,sha256=uT4I0GyqSiD16BURqsXWyPN9wNBc3wLomz1nG-OxHec,22634
190
190
  datahub/ingestion/source/feast.py,sha256=uZpeUkJsiNlvZcUkARiEuZT_3n6sbGc0yFzwqhtnefA,18103
191
191
  datahub/ingestion/source/file.py,sha256=pH-Qkjh5FQ2XvyYPE7Z8XEY4vUk_SUHxm8p8IxG12tU,15879
192
- datahub/ingestion/source/ge_data_profiler.py,sha256=JqTonv8y7Re4Rfn2YKOEaLufiiAOWKfK1XQvJfV5dvs,64126
192
+ datahub/ingestion/source/ge_data_profiler.py,sha256=7-ciHphLU8O259OU2WMDfCDpoqvDLUy_XcG4EM0agFc,64983
193
193
  datahub/ingestion/source/ge_profiling_config.py,sha256=P-9pd20koFvpxeEL_pqFvKWWz-qnpZ6XkELUyBKr7is,10807
194
194
  datahub/ingestion/source/glue_profiling_config.py,sha256=vpMJH4Lf_qgR32BZy58suabri1yV5geaAPjzg2eORDc,2559
195
195
  datahub/ingestion/source/ldap.py,sha256=Vnzg8tpwBYeyM-KBVVsUJvGZGBMJiCJ_i_FhxaFRQ9A,18627
@@ -221,18 +221,18 @@ datahub/ingestion/source/aws/s3_util.py,sha256=OFypcgmVC6jnZM90-gjcPpAMtTV1lbnre
221
221
  datahub/ingestion/source/aws/sagemaker.py,sha256=23m8a9-VofWDJZWm4uCrf0MLkFZKbxce7839qDYTh7w,4995
222
222
  datahub/ingestion/source/aws/sagemaker_processors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
223
223
  datahub/ingestion/source/aws/sagemaker_processors/common.py,sha256=SSvpOszqJAHoZB3jMZgx8odInEy3lTZZCThhpOWkyvE,2012
224
- datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py,sha256=obHm_rwSQMh07iBxb6N9XBZcmcjdx5_Fdn5F3aJQU_8,10247
224
+ datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py,sha256=bnx6uKwXvzafYhcIl112INTMmotu6xy8FjFNhTO4b6c,10384
225
225
  datahub/ingestion/source/aws/sagemaker_processors/job_classes.py,sha256=CfJkzjZU2uvZvw7qvmxfNgeWI1EvgHFY-7bn5Ih71no,9154
226
226
  datahub/ingestion/source/aws/sagemaker_processors/jobs.py,sha256=OHLiqeZCTR9GgfmSx6O8oX9ZCd983RiFnx23JTiKZ3I,32395
227
227
  datahub/ingestion/source/aws/sagemaker_processors/lineage.py,sha256=dvSCoiZhJLN4Hic5nRH3REI7SxMdMsm_4Ugmv0U8Zdg,9290
228
- datahub/ingestion/source/aws/sagemaker_processors/models.py,sha256=C2RVFpQUUe-7mRr8zz0yyPIuFRGK54mIisMkhqcUFZw,19017
228
+ datahub/ingestion/source/aws/sagemaker_processors/models.py,sha256=6Ltmy6MAwbexN_JRYu7LXlAKpihXGlW4WXxo7qdwEF8,19845
229
229
  datahub/ingestion/source/azure/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
230
230
  datahub/ingestion/source/azure/abs_folder_utils.py,sha256=7skXus-4fSIoKpqCeU-GG0ch1oF2SJSYDZ1JMB_Onso,7605
231
231
  datahub/ingestion/source/azure/abs_utils.py,sha256=KdAlCK-PMrn35kFHxz5vrsjajyx2PD5GRgoBKdoRvcg,2075
232
232
  datahub/ingestion/source/azure/azure_common.py,sha256=Zl0pPuE6L3QcM5B1P0LsPthZmD0h7fUUS0kg2okl6IY,4053
233
233
  datahub/ingestion/source/bigquery_v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
234
234
  datahub/ingestion/source/bigquery_v2/bigquery.py,sha256=-12CZWeSIAkI6Kb4AY8NAF3wsC_2lxhPErm5o0oUUes,14116
235
- datahub/ingestion/source/bigquery_v2/bigquery_audit.py,sha256=oFccDfFNU3vjjfe5QlV0EtSp8Ow4SBd6h2KdGgj7XW8,25115
235
+ datahub/ingestion/source/bigquery_v2/bigquery_audit.py,sha256=IlbHA8a-gNJvnubgBfxVHpUk8rFNIG80gk5HWXa2lyE,25108
236
236
  datahub/ingestion/source/bigquery_v2/bigquery_audit_log_api.py,sha256=LuGJ6LgPViLIfDQfylxlQ3CA7fZYM5MDt8M-7sfzm84,5096
237
237
  datahub/ingestion/source/bigquery_v2/bigquery_config.py,sha256=xnYWxbhvv-rJRHLGkOWIAn4Ir__hwinEZF1F7TWWirE,26086
238
238
  datahub/ingestion/source/bigquery_v2/bigquery_data_reader.py,sha256=DeT3v_Z82__8En0FcZ0kavBAWQoRvSZ5Rppm9eeDAb8,2393
@@ -288,7 +288,7 @@ datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py,sha256=YkYC3-TB
288
288
  datahub/ingestion/source/dremio/dremio_entities.py,sha256=3H3vIvj5ab4d8gmB9-rbZfwRgW87gT1DdjWiMjNgqJ4,15069
289
289
  datahub/ingestion/source/dremio/dremio_profiling.py,sha256=TAcnpo8ZRKhLDHnQSJzJg3YdwTSyEa73LUAzENs7wG4,12287
290
290
  datahub/ingestion/source/dremio/dremio_reporting.py,sha256=IPgv7lOnhK6mQeqwRsPscKnXhzgVZG8Id3yNcsmG7nw,1273
291
- datahub/ingestion/source/dremio/dremio_source.py,sha256=DMztf08dZ3jt1AKMsWVMgj8qpp2dkB-hh5yncKDBW_k,26210
291
+ datahub/ingestion/source/dremio/dremio_source.py,sha256=NJxDXWd19A3MPplPiLPAjxTmjeJBA04PcPytRSslmYQ,26323
292
292
  datahub/ingestion/source/dremio/dremio_sql_queries.py,sha256=W0rcXawlwJOHNYr5o73rilMijtFOO3cVkn6pY-JLc6o,8186
293
293
  datahub/ingestion/source/dynamodb/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
294
294
  datahub/ingestion/source/dynamodb/data_reader.py,sha256=vC77KpcP8LJN0g8wsPRDVw4sebv0ZWIP3tJkEIHaomA,3120
@@ -301,9 +301,9 @@ datahub/ingestion/source/fivetran/fivetran_log_api.py,sha256=EAak3hJpe75WZSgz6wP
301
301
  datahub/ingestion/source/fivetran/fivetran_query.py,sha256=vLrTj7e-0NxZ2U4bWTB57pih42WirqPlUvwtIRfStlQ,5275
302
302
  datahub/ingestion/source/gc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
303
303
  datahub/ingestion/source/gc/datahub_gc.py,sha256=f6Erj3KfD0Hx3ydwL5MUVCZgFzS9c6U2Pkr54JLIUOA,12394
304
- datahub/ingestion/source/gc/dataprocess_cleanup.py,sha256=IEEHO6UvDWWK3W5siqFrk4J1zUKbL6TrKNUaXdNiEW4,14362
304
+ datahub/ingestion/source/gc/dataprocess_cleanup.py,sha256=CiwUyTHUdrRhvQz0S-63IO6qTkoePxygKNtqdxjwrdM,14468
305
305
  datahub/ingestion/source/gc/execution_request_cleanup.py,sha256=cHJmxz4NmA7VjTX2iGEo3wZ_SDrjC_rCQcnRxKgfUVI,8713
306
- datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py,sha256=qLgdr-Rrsba0z_Y-CaHT9d1zSgy2jzg6CXaCKoN2jFk,7360
306
+ datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py,sha256=_tms5AqNAJRDRzQmyN_VydzXbdME2lkvTwa5u1La5z8,7353
307
307
  datahub/ingestion/source/gcs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
308
308
  datahub/ingestion/source/gcs/gcs_source.py,sha256=iwvj4JwjyVWRP1Vq106sUtQhh0GuOYVSu9zCa1wCZN0,6189
309
309
  datahub/ingestion/source/gcs/gcs_utils.py,sha256=_78KM863XXgkVLmZLtYGF5PJNnZas1go-XRtOq-79lo,1047
@@ -457,7 +457,7 @@ datahub/ingestion/source/sql/sql_config.py,sha256=M-l_uXau0ODolLZHBzAXhy-Rq5yYxv
457
457
  datahub/ingestion/source/sql/sql_generic.py,sha256=9AERvkK8kdJUeDOzCYJDb93xdv6Z4DGho0NfeHj5Uyg,2740
458
458
  datahub/ingestion/source/sql/sql_generic_profiler.py,sha256=6QbhkQH_F13GV1HsavVTq3BE9F7Pr_vfGOjCX2o2c60,11675
459
459
  datahub/ingestion/source/sql/sql_report.py,sha256=19YVvatcCZsBP533HWn0X9Y30jo4TUxSkQ9rYpMQpT4,2487
460
- datahub/ingestion/source/sql/sql_types.py,sha256=2GqYrW2sJyX_QU5goIUCyafxF2S07JEVydAgMFygNg4,14638
460
+ datahub/ingestion/source/sql/sql_types.py,sha256=lrJpavRTE7aDVAKOrKZcrp4CsKydiiaza1wt2ieqWzs,15041
461
461
  datahub/ingestion/source/sql/sql_utils.py,sha256=w9YFNm_qJNjOcWAWBI_lUoFMbd0wT8q0LoT7Ia71tIE,8100
462
462
  datahub/ingestion/source/sql/sqlalchemy_data_reader.py,sha256=FvHZ4JEK3aR2DYOBZiT_ZsAy12RjTu4t_KIR_92B11k,2644
463
463
  datahub/ingestion/source/sql/sqlalchemy_uri_mapper.py,sha256=KOpbmDIE2h1hyYEsbVHJi2B7FlsyUMTXZx4diyzltQg,1826
@@ -519,7 +519,7 @@ datahub/ingestion/transformer/add_dataset_ownership.py,sha256=xuv6qymZceHYOtFIdo
519
519
  datahub/ingestion/transformer/add_dataset_properties.py,sha256=bNFu-Yv6nmiGuZCgPUnCHBuoDbVPpUTYKc_7zzgWZxg,5605
520
520
  datahub/ingestion/transformer/add_dataset_schema_tags.py,sha256=9bCgQNKhu0uGaQoQsfauNCcaBW64DMsSaJbmJfSp-zk,5664
521
521
  datahub/ingestion/transformer/add_dataset_schema_terms.py,sha256=-fHMP9xgJDSqAFW8zIP2TUR_XLX-Zcd93IBiSdyigB4,6548
522
- datahub/ingestion/transformer/add_dataset_tags.py,sha256=VF1524IZ0GqOzl9fI3FhGikvwt3YyCCltqNzVcWqOmU,4744
522
+ datahub/ingestion/transformer/add_dataset_tags.py,sha256=eyvQpb0hntSmaP9HSHcIiHLvLFtkEeX-BKmHVtuaF3Y,4737
523
523
  datahub/ingestion/transformer/add_dataset_terms.py,sha256=F3DgVFJfm_Ofh4NOt7caKbVx4FQE-tOns17E0qDn43o,5799
524
524
  datahub/ingestion/transformer/auto_helper_transformer.py,sha256=MuxoHr0_SPi8LpHbYYOYaEO8d-7XBu2zRn-PDnqrIHU,2843
525
525
  datahub/ingestion/transformer/base_transformer.py,sha256=j5HmnplhY1K_7oa9DNHhCegs6eMKndc-VFoT8GGxgpU,12383
@@ -528,7 +528,7 @@ datahub/ingestion/transformer/dataset_domain_based_on_tags.py,sha256=V_FGZ-H-cRn
528
528
  datahub/ingestion/transformer/dataset_transformer.py,sha256=dOK0oO6R6dbuxk5i5Za6hkzy8xCEpQxG8iKjXeIZKM4,5305
529
529
  datahub/ingestion/transformer/extract_dataset_tags.py,sha256=uCxf7L9AdMjVs7gvq1k1geuxcWDxv0LXEwXj6EQlWSE,2499
530
530
  datahub/ingestion/transformer/extract_ownership_from_tags.py,sha256=DQh0dETRzALR1qhN6aKeAs2YPCpquxu3wGU_MSospQs,6381
531
- datahub/ingestion/transformer/generic_aspect_transformer.py,sha256=X80Jt2XKbVn7leHJjW2sOvTYzbxzG73zag9GQNZKWdE,5602
531
+ datahub/ingestion/transformer/generic_aspect_transformer.py,sha256=-1g-tIgWPFhCmzTPcV60CIw8SAFD9ML5ai70lMxfXMo,5595
532
532
  datahub/ingestion/transformer/mark_dataset_status.py,sha256=mg-BWLxYmiEFBH8ErNcUPd-X4FThMWoloS9Lu15awC8,1323
533
533
  datahub/ingestion/transformer/pattern_cleanup_dataset_usage_user.py,sha256=jTURuh6tDJDnelxxsNzmJjyIucMUryEDOLa1i7rb9-o,2422
534
534
  datahub/ingestion/transformer/pattern_cleanup_ownership.py,sha256=axZYHbbYGRQmlc8jKdObDt1H3aM3SU9vV8TDJKZCLdw,2932
@@ -539,7 +539,7 @@ datahub/ingestion/transformer/tags_to_terms.py,sha256=-BC9GeZDz5oPBkaTWmKMNtyEUa
539
539
  datahub/ingestion/transformer/transform_registry.py,sha256=bartmA1zEaULNy5W1Q7gRF8h5Y57BFC6XNOGfCzh1Zw,251
540
540
  datahub/integrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
541
541
  datahub/integrations/assertion/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
542
- datahub/integrations/assertion/common.py,sha256=VGd10aLAaNzw3kauC5cr-4J3FSEzUb60VK7mi_Kc6m4,2216
542
+ datahub/integrations/assertion/common.py,sha256=eYE8by2GubKe1xtWgAEKvrO4mqPtfCxL7XWH2jpi3cA,2209
543
543
  datahub/integrations/assertion/registry.py,sha256=mmeYpQREFVrLWpLcA0qYZtbrMX4vAGcDoQ59wtMgEcg,307
544
544
  datahub/integrations/assertion/snowflake/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
545
545
  datahub/integrations/assertion/snowflake/compiler.py,sha256=xL7TsGRAiLE53so1g8Xgk_OfPpp4gu6pJYQ5p63EoJI,9947
@@ -551,7 +551,7 @@ datahub/integrations/assertion/snowflake/metric_sql_generator.py,sha256=7lCSZJ9P
551
551
  datahub/integrations/great_expectations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
552
552
  datahub/integrations/great_expectations/action.py,sha256=78ywIwsfmxXbQQ0emou15ziasdr852dDk9qqSolaHac,100
553
553
  datahub/lite/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
554
- datahub/lite/duckdb_lite.py,sha256=UvDtLZYfIK1wbGVaFT2dI3QhhGaQ0e8MEhG-ujn6xu4,32892
554
+ datahub/lite/duckdb_lite.py,sha256=eB4DL_qhfOLmhjmbMSYM37Q4cO5dm1uoVCA9AtENXLA,32712
555
555
  datahub/lite/duckdb_lite_config.py,sha256=PGY5Hab_xbbqoA1hf7OKySBJ2JQJaLNKl-4CO39ad3g,157
556
556
  datahub/lite/lite_local.py,sha256=Aa-_E9o1y-z8ks9b1JuBeaECdgi6oU8xGb1drPA9Q6E,2846
557
557
  datahub/lite/lite_registry.py,sha256=bpH0kasP-LtwwUFNA2QsOIehfekAYfJtN-AkQLmSWnw,286
@@ -850,10 +850,10 @@ datahub/secret/datahub_secrets_client.py,sha256=WkoJDip7IAKSGDM5oHeZVL8878pd4Bix
850
850
  datahub/secret/secret_common.py,sha256=PeRFNljPlGfNrmn3VtDVbazQE6J3Q1nA3L-z3cS8LEA,2522
851
851
  datahub/secret/secret_store.py,sha256=2VP_Vd336Cy7C-2kwp4rx8MAqtYgtwv8XyzzNTXE5x8,1124
852
852
  datahub/specific/__init__.py,sha256=r5RYM5mDnskLzin3vc87HV-9GSz3P6uQw8AlsN14LaI,88
853
- datahub/specific/chart.py,sha256=xLHinaUFWNSSt9_UUnRScVvO1U9ECUZ-qXQ8yHhl1N8,12657
853
+ datahub/specific/chart.py,sha256=DsLA5qHBIMNc1pIZ1AC5kLvwpRDd79Q56N4SANOofps,11324
854
854
  datahub/specific/custom_properties.py,sha256=Ob8L9b9QIbUvHfzWo4L-SNY1QSRhgRy30kLRDdenGEs,1024
855
- datahub/specific/dashboard.py,sha256=MMB9AGgk3lQHMGgmP2qPXMULB3WEgRXstk7v6S4E80M,16433
856
- datahub/specific/datajob.py,sha256=Y_0L3F9ifBHmZthrnuNtGMDaqBtGEvs991FqxUoBnhw,20390
855
+ datahub/specific/dashboard.py,sha256=kRfyJsm7piugxBg0IfIbLmvv6Smk3D44IGVw8THLqPE,15100
856
+ datahub/specific/datajob.py,sha256=Yp_LSy12ogbz9KYKTkdg6J9ScaFgg-o5--VkRfC1qRo,18793
857
857
  datahub/specific/dataproduct.py,sha256=Mt-QlndY4Die87XwakYTAcvyDzaB5fmyn1NpQGGcZyI,5235
858
858
  datahub/specific/dataset.py,sha256=TAI8SRhhhsv1zEi3lGv24NX6PTJDrEyt5v0Sdg-uFY8,13568
859
859
  datahub/specific/form.py,sha256=jVI0JD-o2-XkD1suW_ITnTZUF0GNbGjaNb9-PXdfdkA,4549
@@ -939,7 +939,7 @@ datahub/utilities/yaml_sync_utils.py,sha256=65IEe8quW3_zHCR8CyoDkZyopeZJazU-IyMr
939
939
  datahub/utilities/registries/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
940
940
  datahub/utilities/registries/domain_registry.py,sha256=0SfcZNop-PXBbl-AWw92vAyb28i0YXTr-TKdBwixmOw,2452
941
941
  datahub/utilities/urns/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
942
- datahub/utilities/urns/_urn_base.py,sha256=HoB-h8QOOaUh4a563C7Y4_uGQOkeZKBEHx4O7sgczbg,9302
942
+ datahub/utilities/urns/_urn_base.py,sha256=IOYjC3NPn0DlB26cPdyihFAah3mUc4CHpS2dVM65ctI,9295
943
943
  datahub/utilities/urns/corp_group_urn.py,sha256=6H5Q6nZvAXu80IZBDCeM8xo_9ap9pgwtyi60QXx3hzY,75
944
944
  datahub/utilities/urns/corpuser_urn.py,sha256=h-Yh-9QRbtQOhxxzxEBc7skoavpGaKDKVNrsxSXZ1yQ,88
945
945
  datahub/utilities/urns/data_flow_urn.py,sha256=w1Z7ET1L1OtYD1w-xiUYtyCczsxZZ1l3LRyTRv5NdpE,73
@@ -951,7 +951,7 @@ datahub/utilities/urns/domain_urn.py,sha256=wfpQx33jRtq0TGx2JVIZGJJf_L_BYeNn1RCE
951
951
  datahub/utilities/urns/error.py,sha256=1MMg3UyN4rQSdka1O0489rsZ7t5Hs2aDxIaHyPbE80E,43
952
952
  datahub/utilities/urns/field_paths.py,sha256=ra-o_fMGkBRLgzMewAJN5-HqAyo3PIpXQ0KbHeymjU4,521
953
953
  datahub/utilities/urns/notebook_urn.py,sha256=CHqGrV45ReVODlFx7js2WUxjcXxt8B63-xsBZpujmtY,73
954
- datahub/utilities/urns/structured_properties_urn.py,sha256=XgT-_Lf-ExL4jjVQD8wTTVqvHp-OcjHZsfposd5fG3A,278
954
+ datahub/utilities/urns/structured_properties_urn.py,sha256=fjA1Ysg7IQSly8IVYx1R8HnwnojQz6jZWbqfk_XVvno,271
955
955
  datahub/utilities/urns/tag_urn.py,sha256=MqEJdIaCnAyjYe_8VdNnUjOVV4TS8xMlv4pRsy8wwXY,63
956
956
  datahub/utilities/urns/urn.py,sha256=B4nYxiFT8s5DLA2NJsWg0KoiUDp9UWg1nvL0j7Sx-h8,218
957
957
  datahub/utilities/urns/urn_iter.py,sha256=m5--PO-Oohw_BQXUCW1z-Ku3vtTcT81AxGLDkMiTaAs,4734
@@ -974,8 +974,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
974
974
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
975
975
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
976
976
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
977
- acryl_datahub-0.15.0rc6.dist-info/METADATA,sha256=cr6WxunP6sUsI91SLZaMuuQDrseSlU3gn-UPTsadHS4,172484
978
- acryl_datahub-0.15.0rc6.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
979
- acryl_datahub-0.15.0rc6.dist-info/entry_points.txt,sha256=Yj0PWB0LQOq4Rj2fyR6ETx4BUGw4TOcNL0ZNoAZ9kQg,9504
980
- acryl_datahub-0.15.0rc6.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
981
- acryl_datahub-0.15.0rc6.dist-info/RECORD,,
977
+ acryl_datahub-0.15.0rc8.dist-info/METADATA,sha256=vcaeap-Oa5mvYQ2wgEU68vacyO2hz4Esi5FQE4D82DQ,172484
978
+ acryl_datahub-0.15.0rc8.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
979
+ acryl_datahub-0.15.0rc8.dist-info/entry_points.txt,sha256=Yj0PWB0LQOq4Rj2fyR6ETx4BUGw4TOcNL0ZNoAZ9kQg,9504
980
+ acryl_datahub-0.15.0rc8.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
981
+ acryl_datahub-0.15.0rc8.dist-info/RECORD,,
datahub/__init__.py CHANGED
@@ -3,7 +3,7 @@ import warnings
3
3
 
4
4
  # Published at https://pypi.org/project/acryl-datahub/.
5
5
  __package_name__ = "acryl-datahub"
6
- __version__ = "0.15.0rc6"
6
+ __version__ = "0.15.0rc8"
7
7
 
8
8
 
9
9
  def is_dev_mode() -> bool:
@@ -121,7 +121,7 @@ class StructuredProperties(ConfigModel):
121
121
  return (
122
122
  self.qualified_name
123
123
  or self.id
124
- or Urn.create_from_string(self.urn).get_entity_id()[0]
124
+ or Urn.from_string(self.urn).get_entity_id()[0]
125
125
  )
126
126
 
127
127
  @validator("urn", pre=True, always=True)
datahub/cli/put_cli.py CHANGED
@@ -105,7 +105,7 @@ def platform(
105
105
  """
106
106
 
107
107
  if name.startswith(f"urn:li:{DataPlatformUrn.ENTITY_TYPE}"):
108
- platform_urn = DataPlatformUrn.create_from_string(name)
108
+ platform_urn = DataPlatformUrn.from_string(name)
109
109
  platform_name = platform_urn.get_entity_id_as_string()
110
110
  else:
111
111
  platform_name = name.lower()
@@ -45,7 +45,7 @@ def _get_owner_urn(maybe_urn: str) -> str:
45
45
 
46
46
  def _abort_if_non_existent_urn(graph: DataHubGraph, urn: str, operation: str) -> None:
47
47
  try:
48
- parsed_urn: Urn = Urn.create_from_string(urn)
48
+ parsed_urn: Urn = Urn.from_string(urn)
49
49
  entity_type = parsed_urn.get_type()
50
50
  except Exception:
51
51
  click.secho(f"Provided urn {urn} does not seem valid", fg="red")
@@ -1,4 +1,5 @@
1
1
  import json
2
+ import time
2
3
  from collections import defaultdict
3
4
  from dataclasses import dataclass
4
5
  from typing import Any, Dict, Iterable, List, Optional, Sequence, Union
@@ -6,12 +7,15 @@ from typing import Any, Dict, Iterable, List, Optional, Sequence, Union
6
7
  from datahub.emitter.aspect import JSON_PATCH_CONTENT_TYPE
7
8
  from datahub.emitter.serialization_helper import pre_json_transform
8
9
  from datahub.metadata.schema_classes import (
10
+ AuditStampClass,
9
11
  ChangeTypeClass,
12
+ EdgeClass,
10
13
  GenericAspectClass,
11
14
  KafkaAuditHeaderClass,
12
15
  MetadataChangeProposalClass,
13
16
  SystemMetadataClass,
14
17
  )
18
+ from datahub.metadata.urns import Urn
15
19
  from datahub.utilities.urns.urn import guess_entity_type
16
20
 
17
21
 
@@ -89,3 +93,42 @@ class MetadataPatchProposal:
89
93
  )
90
94
  for aspect_name, patches in self.patches.items()
91
95
  ]
96
+
97
+ @classmethod
98
+ def _mint_auditstamp(cls, message: Optional[str] = None) -> AuditStampClass:
99
+ """
100
+ Creates an AuditStampClass instance with the current timestamp and other default values.
101
+
102
+ Args:
103
+ message: The message associated with the audit stamp (optional).
104
+
105
+ Returns:
106
+ An instance of AuditStampClass.
107
+ """
108
+ return AuditStampClass(
109
+ time=int(time.time() * 1000.0),
110
+ actor="urn:li:corpuser:datahub",
111
+ message=message,
112
+ )
113
+
114
+ @classmethod
115
+ def _ensure_urn_type(
116
+ cls, entity_type: str, edges: List[EdgeClass], context: str
117
+ ) -> None:
118
+ """
119
+ Ensures that the destination URNs in the given edges have the specified entity type.
120
+
121
+ Args:
122
+ entity_type: The entity type to check against.
123
+ edges: A list of Edge objects.
124
+ context: The context or description of the operation.
125
+
126
+ Raises:
127
+ ValueError: If any of the destination URNs is not of the specified entity type.
128
+ """
129
+ for e in edges:
130
+ urn = Urn.from_string(e.destinationUrn)
131
+ if not urn.entity_type == entity_type:
132
+ raise ValueError(
133
+ f"{context}: {e.destinationUrn} is not of type {entity_type}"
134
+ )
@@ -1,3 +1,5 @@
1
+ import logging
2
+ import textwrap
1
3
  from dataclasses import dataclass
2
4
  from typing import TYPE_CHECKING, Iterable, List
3
5
 
@@ -28,6 +30,8 @@ if TYPE_CHECKING:
28
30
  FeatureGroupSummaryTypeDef,
29
31
  )
30
32
 
33
+ logger = logging.getLogger(__name__)
34
+
31
35
 
32
36
  @dataclass
33
37
  class FeatureGroupProcessor:
@@ -197,11 +201,12 @@ class FeatureGroupProcessor:
197
201
 
198
202
  full_table_name = f"{glue_database}.{glue_table}"
199
203
 
200
- self.report.report_warning(
201
- full_table_name,
202
- f"""Note: table {full_table_name} is an AWS Glue object.
204
+ logging.info(
205
+ textwrap.dedent(
206
+ f"""Note: table {full_table_name} is an AWS Glue object. This source does not ingest all metadata for Glue tables.
203
207
  To view full table metadata, run Glue ingestion
204
- (see https://datahubproject.io/docs/metadata-ingestion/#aws-glue-glue)""",
208
+ (see https://datahubproject.io/docs/generated/ingestion/sources/glue)"""
209
+ )
205
210
  )
206
211
 
207
212
  feature_sources.append(
@@ -1,3 +1,4 @@
1
+ import logging
1
2
  from collections import defaultdict
2
3
  from dataclasses import dataclass, field
3
4
  from datetime import datetime
@@ -65,6 +66,8 @@ ENDPOINT_STATUS_MAP: Dict[str, str] = {
65
66
  "Unknown": DeploymentStatusClass.UNKNOWN,
66
67
  }
67
68
 
69
+ logger = logging.getLogger(__name__)
70
+
68
71
 
69
72
  @dataclass
70
73
  class ModelProcessor:
@@ -385,6 +388,26 @@ class ModelProcessor:
385
388
  model_metrics,
386
389
  )
387
390
 
391
+ @staticmethod
392
+ def get_group_name_from_arn(arn: str) -> str:
393
+ """
394
+ Extract model package group name from a SageMaker ARN.
395
+
396
+ Args:
397
+ arn (str): Full ARN of the model package group
398
+
399
+ Returns:
400
+ str: Name of the model package group
401
+
402
+ Example:
403
+ >>> ModelProcessor.get_group_name_from_arn("arn:aws:sagemaker:eu-west-1:123456789:model-package-group/my-model-group")
404
+ 'my-model-group'
405
+ """
406
+ logger.debug(
407
+ f"Extracting group name from ARN: {arn} because group was not seen before"
408
+ )
409
+ return arn.split("/")[-1]
410
+
388
411
  def get_model_wu(
389
412
  self,
390
413
  model_details: "DescribeModelOutputTypeDef",
@@ -425,8 +448,14 @@ class ModelProcessor:
425
448
  model_group_arns = model_uri_groups | model_image_groups
426
449
 
427
450
  model_group_names = sorted(
428
- [self.group_arn_to_name[x] for x in model_group_arns]
451
+ [
452
+ self.group_arn_to_name[x]
453
+ if x in self.group_arn_to_name
454
+ else self.get_group_name_from_arn(x)
455
+ for x in model_group_arns
456
+ ]
429
457
  )
458
+
430
459
  model_group_urns = [
431
460
  builder.make_ml_model_group_urn("sagemaker", x, self.env)
432
461
  for x in model_group_names
@@ -190,7 +190,7 @@ class BigQueryTableRef:
190
190
  @classmethod
191
191
  def from_urn(cls, urn: str) -> "BigQueryTableRef":
192
192
  """Raises: ValueError if urn is not a valid BigQuery table URN."""
193
- dataset_urn = DatasetUrn.create_from_string(urn)
193
+ dataset_urn = DatasetUrn.from_string(urn)
194
194
  split = dataset_urn.name.rsplit(".", 3)
195
195
  if len(split) == 3:
196
196
  project, dataset, table = split
@@ -653,7 +653,7 @@ class CSVEnricherSource(Source):
653
653
 
654
654
  is_resource_row: bool = not row["subresource"]
655
655
  entity_urn = row["resource"]
656
- entity_type = Urn.create_from_string(row["resource"]).get_type()
656
+ entity_type = Urn.from_string(row["resource"]).get_type()
657
657
 
658
658
  term_associations: List[
659
659
  GlossaryTermAssociationClass
@@ -396,10 +396,12 @@ class DremioSource(StatefulIngestionSourceBase):
396
396
  ):
397
397
  yield dremio_mcp
398
398
  # Check if the emitted aspect is SchemaMetadataClass
399
- if isinstance(dremio_mcp.metadata, SchemaMetadataClass):
399
+ if isinstance(
400
+ dremio_mcp.metadata, MetadataChangeProposalWrapper
401
+ ) and isinstance(dremio_mcp.metadata.aspect, SchemaMetadataClass):
400
402
  self.sql_parsing_aggregator.register_schema(
401
403
  urn=dataset_urn,
402
- schema=dremio_mcp.metadata,
404
+ schema=dremio_mcp.metadata.aspect,
403
405
  )
404
406
 
405
407
  if dataset_info.dataset_type == DremioDatasetType.VIEW:
@@ -227,7 +227,7 @@ def collapse_name(name: str, collapse_urns: CollapseUrns) -> str:
227
227
  def collapse_urn(urn: str, collapse_urns: CollapseUrns) -> str:
228
228
  if len(collapse_urns.urns_suffix_regex) == 0:
229
229
  return urn
230
- urn_obj = DatasetUrn.create_from_string(urn)
230
+ urn_obj = DatasetUrn.from_string(urn)
231
231
  name = collapse_name(name=urn_obj.get_dataset_name(), collapse_urns=collapse_urns)
232
232
  data_platform_urn = urn_obj.get_data_platform_urn()
233
233
  return str(
@@ -114,11 +114,11 @@ class DataProcessCleanupConfig(ConfigModel):
114
114
  )
115
115
 
116
116
  delete_empty_data_jobs: bool = Field(
117
- True, description="Wether to delete Data Jobs without runs"
117
+ True, description="Whether to delete Data Jobs without runs"
118
118
  )
119
119
 
120
120
  delete_empty_data_flows: bool = Field(
121
- True, description="Wether to delete Data Flows without runs"
121
+ True, description="Whether to delete Data Flows without runs"
122
122
  )
123
123
 
124
124
  hard_delete_entities: bool = Field(
@@ -128,7 +128,7 @@ class DataProcessCleanupConfig(ConfigModel):
128
128
 
129
129
  batch_size: int = Field(
130
130
  500,
131
- description="The number of entities to get in a batch from GraphQL",
131
+ description="The number of entities to get in a batch from API",
132
132
  )
133
133
 
134
134
  max_workers: int = Field(
@@ -173,9 +173,9 @@ class DataProcessCleanup:
173
173
  """
174
174
  This source is a maintenance source which cleans up old/unused aspects.
175
175
 
176
- Currently it only supports:.
176
+ Currently it only supports:
177
177
  - DataFlow
178
- -DataJob
178
+ - DataJob
179
179
  - DataProcessInstance
180
180
 
181
181
  """
@@ -267,7 +267,7 @@ class DataProcessCleanup:
267
267
 
268
268
  if self.dry_run:
269
269
  logger.info(
270
- f"Dry run is on otherwise it would have deleted {urn} with hard deletion is{self.config.hard_delete_entities}"
270
+ f"Dry run is on otherwise it would have deleted {urn} with hard deletion is {self.config.hard_delete_entities}"
271
271
  )
272
272
  return
273
273
 
@@ -277,7 +277,12 @@ class DataProcessCleanup:
277
277
  assert self.ctx.graph
278
278
 
279
279
  dpis = self.fetch_dpis(job.urn, self.config.batch_size)
280
- dpis.sort(key=lambda x: x["created"]["time"], reverse=True)
280
+ dpis.sort(
281
+ key=lambda x: x["created"]["time"]
282
+ if x["created"] and x["created"]["time"]
283
+ else 0,
284
+ reverse=True,
285
+ )
281
286
 
282
287
  with ThreadPoolExecutor(max_workers=self.config.max_workers) as executor:
283
288
  if self.config.keep_last_n:
@@ -104,7 +104,7 @@ class SoftDeletedEntitiesCleanup:
104
104
  def delete_entity(self, urn: str) -> None:
105
105
  assert self.ctx.graph
106
106
 
107
- entity_urn = Urn.create_from_string(urn)
107
+ entity_urn = Urn.from_string(urn)
108
108
  self.report.num_soft_deleted_entity_removed += 1
109
109
  self.report.num_soft_deleted_entity_removed_by_type[entity_urn.entity_type] = (
110
110
  self.report.num_soft_deleted_entity_removed_by_type.get(
@@ -57,7 +57,11 @@ from datahub.ingestion.source.profiling.common import (
57
57
  convert_to_cardinality,
58
58
  )
59
59
  from datahub.ingestion.source.sql.sql_report import SQLSourceReport
60
- from datahub.metadata.com.linkedin.pegasus2avro.schema import EditableSchemaMetadata
60
+ from datahub.ingestion.source.sql.sql_types import resolve_sql_type
61
+ from datahub.metadata.com.linkedin.pegasus2avro.schema import (
62
+ EditableSchemaMetadata,
63
+ NumberType,
64
+ )
61
65
  from datahub.metadata.schema_classes import (
62
66
  DatasetFieldProfileClass,
63
67
  DatasetProfileClass,
@@ -361,6 +365,8 @@ class _SingleDatasetProfiler(BasicDatasetProfilerBase):
361
365
  platform: str
362
366
  env: str
363
367
 
368
+ column_types: Dict[str, str] = dataclasses.field(default_factory=dict)
369
+
364
370
  def _get_columns_to_profile(self) -> List[str]:
365
371
  if not self.config.any_field_level_metrics_enabled():
366
372
  return []
@@ -374,6 +380,7 @@ class _SingleDatasetProfiler(BasicDatasetProfilerBase):
374
380
 
375
381
  for col_dict in self.dataset.columns:
376
382
  col = col_dict["name"]
383
+ self.column_types[col] = str(col_dict["type"])
377
384
  # We expect the allow/deny patterns to specify '<table_pattern>.<column_pattern>'
378
385
  if not self.config._allow_deny_patterns.allowed(
379
386
  f"{self.dataset_name}.{col}"
@@ -430,6 +437,21 @@ class _SingleDatasetProfiler(BasicDatasetProfilerBase):
430
437
  self.dataset, column
431
438
  )
432
439
 
440
+ if column_spec.type_ == ProfilerDataType.UNKNOWN:
441
+ try:
442
+ datahub_field_type = resolve_sql_type(
443
+ self.column_types[column], self.dataset.engine.dialect.name.lower()
444
+ )
445
+ except Exception as e:
446
+ logger.debug(
447
+ f"Error resolving sql type {self.column_types[column]}: {e}"
448
+ )
449
+ datahub_field_type = None
450
+ if datahub_field_type is None:
451
+ return
452
+ if isinstance(datahub_field_type, NumberType):
453
+ column_spec.type_ = ProfilerDataType.NUMERIC
454
+
433
455
  @_run_with_query_combiner
434
456
  def _get_column_cardinality(
435
457
  self, column_spec: _SingleColumnSpec, column: str
@@ -276,7 +276,6 @@ def resolve_vertica_modified_type(type_string: str) -> Any:
276
276
  return VERTICA_SQL_TYPES_MAP[type_string]
277
277
 
278
278
 
279
- # see https://docs.snowflake.com/en/sql-reference/intro-summary-data-types.html
280
279
  SNOWFLAKE_TYPES_MAP: Dict[str, Any] = {
281
280
  "NUMBER": NumberType,
282
281
  "DECIMAL": NumberType,
@@ -312,6 +311,18 @@ SNOWFLAKE_TYPES_MAP: Dict[str, Any] = {
312
311
  "GEOGRAPHY": None,
313
312
  }
314
313
 
314
+
315
+ def resolve_snowflake_modified_type(type_string: str) -> Any:
316
+ # Match types with precision and scale, e.g., 'DECIMAL(38,0)'
317
+ match = re.match(r"([a-zA-Z_]+)\(\d+,\s\d+\)", type_string)
318
+ if match:
319
+ modified_type_base = match.group(1) # Extract the base type
320
+ return SNOWFLAKE_TYPES_MAP.get(modified_type_base, None)
321
+
322
+ # Fallback for types without precision/scale
323
+ return SNOWFLAKE_TYPES_MAP.get(type_string, None)
324
+
325
+
315
326
  # see https://github.com/googleapis/python-bigquery-sqlalchemy/blob/main/sqlalchemy_bigquery/_types.py#L32
316
327
  BIGQUERY_TYPES_MAP: Dict[str, Any] = {
317
328
  "STRING": StringType,
@@ -380,6 +391,7 @@ TRINO_SQL_TYPES_MAP: Dict[str, Any] = {
380
391
  "row": RecordType,
381
392
  "map": MapType,
382
393
  "array": ArrayType,
394
+ "json": RecordType,
383
395
  }
384
396
 
385
397
  # https://docs.aws.amazon.com/athena/latest/ug/data-types.html
@@ -490,7 +502,7 @@ def resolve_sql_type(
490
502
  TypeClass = resolve_vertica_modified_type(column_type)
491
503
  elif platform == "snowflake":
492
504
  # Snowflake types are uppercase, so we check that.
493
- TypeClass = _merged_mapping.get(column_type.upper())
505
+ TypeClass = resolve_snowflake_modified_type(column_type.upper())
494
506
 
495
507
  if TypeClass:
496
508
  return TypeClass()
@@ -74,7 +74,7 @@ class AddDatasetTags(DatasetTagsTransformer):
74
74
  logger.debug("Generating tags")
75
75
 
76
76
  for tag_association in self.processed_tags.values():
77
- tag_urn = TagUrn.create_from_string(tag_association.tag)
77
+ tag_urn = TagUrn.from_string(tag_association.tag)
78
78
  mcps.append(
79
79
  MetadataChangeProposalWrapper(
80
80
  entityUrn=tag_urn.urn(),
@@ -100,7 +100,7 @@ class GenericAspectTransformer(
100
100
  )
101
101
  if transformed_aspect:
102
102
  # for end of stream records, we modify the workunit-id
103
- structured_urn = Urn.create_from_string(urn)
103
+ structured_urn = Urn.from_string(urn)
104
104
  simple_name = "-".join(structured_urn.get_entity_id())
105
105
  record_metadata = envelope.metadata.copy()
106
106
  record_metadata.update(
@@ -42,7 +42,7 @@ def get_entity_name(assertion: BaseEntityAssertion) -> Tuple[str, str, str]:
42
42
  if qualified_name is not None:
43
43
  parts = qualified_name.split(".")
44
44
  else:
45
- urn_id = Urn.create_from_string(assertion.entity).entity_ids[1]
45
+ urn_id = Urn.from_string(assertion.entity).entity_ids[1]
46
46
  parts = urn_id.split(".")
47
47
  if len(parts) > 3:
48
48
  parts = parts[-3:]