acryl-datahub 0.15.0.1rc13__py3-none-any.whl → 0.15.0.1rc15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (36) hide show
  1. {acryl_datahub-0.15.0.1rc13.dist-info → acryl_datahub-0.15.0.1rc15.dist-info}/METADATA +2413 -2413
  2. {acryl_datahub-0.15.0.1rc13.dist-info → acryl_datahub-0.15.0.1rc15.dist-info}/RECORD +33 -30
  3. {acryl_datahub-0.15.0.1rc13.dist-info → acryl_datahub-0.15.0.1rc15.dist-info}/WHEEL +1 -1
  4. datahub/__init__.py +1 -1
  5. datahub/emitter/mce_builder.py +3 -3
  6. datahub/emitter/mcp_patch_builder.py +36 -12
  7. datahub/ingestion/source/bigquery_v2/bigquery.py +10 -18
  8. datahub/ingestion/source/bigquery_v2/bigquery_config.py +3 -9
  9. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +11 -17
  10. datahub/ingestion/source/bigquery_v2/lineage.py +9 -22
  11. datahub/ingestion/source/gc/datahub_gc.py +3 -0
  12. datahub/ingestion/source/gc/execution_request_cleanup.py +13 -5
  13. datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +28 -21
  14. datahub/ingestion/source/snowflake/snowflake_queries.py +6 -4
  15. datahub/ingestion/source/tableau/tableau.py +53 -18
  16. datahub/ingestion/source/tableau/tableau_common.py +18 -0
  17. datahub/ingestion/source/usage/usage_common.py +15 -1
  18. datahub/specific/aspect_helpers/__init__.py +0 -0
  19. datahub/specific/aspect_helpers/custom_properties.py +79 -0
  20. datahub/specific/aspect_helpers/ownership.py +67 -0
  21. datahub/specific/aspect_helpers/structured_properties.py +72 -0
  22. datahub/specific/aspect_helpers/tags.py +42 -0
  23. datahub/specific/aspect_helpers/terms.py +43 -0
  24. datahub/specific/chart.py +28 -184
  25. datahub/specific/dashboard.py +31 -196
  26. datahub/specific/datajob.py +34 -189
  27. datahub/specific/dataproduct.py +24 -86
  28. datahub/specific/dataset.py +48 -133
  29. datahub/specific/form.py +12 -32
  30. datahub/specific/structured_property.py +9 -9
  31. datahub/sql_parsing/sql_parsing_aggregator.py +1 -3
  32. datahub/specific/custom_properties.py +0 -37
  33. datahub/specific/ownership.py +0 -48
  34. datahub/specific/structured_properties.py +0 -53
  35. {acryl_datahub-0.15.0.1rc13.dist-info → acryl_datahub-0.15.0.1rc15.dist-info}/entry_points.txt +0 -0
  36. {acryl_datahub-0.15.0.1rc13.dist-info → acryl_datahub-0.15.0.1rc15.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
1
- datahub/__init__.py,sha256=kKM5imQ7UziWDSMvn1Ic5ZENvcshwalM2y2qGjZxUHY,577
1
+ datahub/__init__.py,sha256=6Tp9zNxURUFH-S9D0fryVGXYLG9rychhSBb3nVh2ras,577
2
2
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
3
3
  datahub/entrypoints.py,sha256=3-qSfXAx3Z0FEkBV5tlO8fQr4xk4ySeDRMVTpS5Xd6A,7793
4
4
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -114,10 +114,10 @@ datahub/emitter/aspect.py,sha256=ef0DVycqg-tRPurkYjc-5zknmLP2p2Y2RxP55WkvAEc,480
114
114
  datahub/emitter/enum_helpers.py,sha256=ZeALUAPi10Q4Z6VM0_WiU9Y60_d0ugZHcUoVmuOCEec,321
115
115
  datahub/emitter/generic_emitter.py,sha256=i37ZFm9VR_tmiZm9kIypEkQEB_cLKbzj_tJvViN-fm8,828
116
116
  datahub/emitter/kafka_emitter.py,sha256=Uix1W1WaXF8VqUTUfzdRZKca2XrR1w50Anx2LVkROlc,5822
117
- datahub/emitter/mce_builder.py,sha256=5oZHXs85GGwfL8tY72IPnicyYrRXraN4LgtVQQcZyq8,16417
117
+ datahub/emitter/mce_builder.py,sha256=IqHOm0cpzdVC_mQOqk0yEVJUEj9xn8am2OFAwwQeX_8,16342
118
118
  datahub/emitter/mcp.py,sha256=hAAYziDdkwjazQU0DtWMbQWY8wS09ACrKJbqxoWXdgc,9637
119
119
  datahub/emitter/mcp_builder.py,sha256=eOcuz41c4a3oTkNk39yYl9bTxpksxqATPHLcqyhPGT0,9856
120
- datahub/emitter/mcp_patch_builder.py,sha256=ykQFJshFrVF6DjkjcHQ8ZhDEws3ki0gmNjkHNfQtHwQ,4277
120
+ datahub/emitter/mcp_patch_builder.py,sha256=oonC8iGOvDzqj890CxOjWlBdDEF1RnwvbSZy1sivlTY,4572
121
121
  datahub/emitter/request_helper.py,sha256=33ORG3S3OVy97_jlWBRn7yUM5XCIkRN6WSdJvN7Ofcg,670
122
122
  datahub/emitter/rest_emitter.py,sha256=oqyRuXG1o1dYjiEIH5TFMb1q0xhRbpxPIA5qkyz0iQ8,16407
123
123
  datahub/emitter/serialization_helper.py,sha256=q12Avmf70Vy4ttQGMJoTKlE5EsybMKNg2w3MQeZiHvk,3652
@@ -233,20 +233,20 @@ datahub/ingestion/source/azure/abs_folder_utils.py,sha256=7skXus-4fSIoKpqCeU-GG0
233
233
  datahub/ingestion/source/azure/abs_utils.py,sha256=KdAlCK-PMrn35kFHxz5vrsjajyx2PD5GRgoBKdoRvcg,2075
234
234
  datahub/ingestion/source/azure/azure_common.py,sha256=Zl0pPuE6L3QcM5B1P0LsPthZmD0h7fUUS0kg2okl6IY,4053
235
235
  datahub/ingestion/source/bigquery_v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
236
- datahub/ingestion/source/bigquery_v2/bigquery.py,sha256=-12CZWeSIAkI6Kb4AY8NAF3wsC_2lxhPErm5o0oUUes,14116
236
+ datahub/ingestion/source/bigquery_v2/bigquery.py,sha256=YMsyj6s7fggzisWfDdbT4w1MKJ3eRdNERsCShnu0Zqo,13681
237
237
  datahub/ingestion/source/bigquery_v2/bigquery_audit.py,sha256=IlbHA8a-gNJvnubgBfxVHpUk8rFNIG80gk5HWXa2lyE,25108
238
238
  datahub/ingestion/source/bigquery_v2/bigquery_audit_log_api.py,sha256=LuGJ6LgPViLIfDQfylxlQ3CA7fZYM5MDt8M-7sfzm84,5096
239
- datahub/ingestion/source/bigquery_v2/bigquery_config.py,sha256=xnYWxbhvv-rJRHLGkOWIAn4Ir__hwinEZF1F7TWWirE,26086
239
+ datahub/ingestion/source/bigquery_v2/bigquery_config.py,sha256=PqbYNqM4-KukCU1meuvsk0qbiWa7UFh5hqHrHsvOSWQ,25889
240
240
  datahub/ingestion/source/bigquery_v2/bigquery_data_reader.py,sha256=DeT3v_Z82__8En0FcZ0kavBAWQoRvSZ5Rppm9eeDAb8,2393
241
241
  datahub/ingestion/source/bigquery_v2/bigquery_helper.py,sha256=QER3gY8e_k1_eNVj7cBso7ZzrWl_vO5PYSa6CpvqNx8,1554
242
242
  datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py,sha256=8nuQ8hMuJEswWDZtV2RjbK8RvDJUzT_S74dnyPpGFdQ,4857
243
243
  datahub/ingestion/source/bigquery_v2/bigquery_queries.py,sha256=EoHo9twb0_QdX7Nvd1HJC1Yn0rqtrfR52EVk7Hu3XOQ,3296
244
244
  datahub/ingestion/source/bigquery_v2/bigquery_report.py,sha256=WxiLPFc7LwZXNDYfV9oySUD43kc2GcOf_pUokp3vFNM,8098
245
245
  datahub/ingestion/source/bigquery_v2/bigquery_schema.py,sha256=E5GOx4NWjyZM0xzdpBlNXbvDdKNfW9UtS64XtCYFpzI,31809
246
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py,sha256=Sv6BrK62nu3xpgjYGE-x1xdSTouvvnKDJtazPobhiKQ,50813
246
+ datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py,sha256=o2My5Q7ab39qHP3jjVFCQSErogGYb14s6397xHIZSqc,50568
247
247
  datahub/ingestion/source/bigquery_v2/bigquery_test_connection.py,sha256=cATxwi5IPzj3BldRRAVcLqzSFmmYEPvqa7U0RFJbaAc,7645
248
248
  datahub/ingestion/source/bigquery_v2/common.py,sha256=Cxjf1a8ibkL_YRQeS0BqsjlyMgFJpaZ3iq_d7e8T8MQ,4030
249
- datahub/ingestion/source/bigquery_v2/lineage.py,sha256=Jg_pwnaj7l_KEcgq0enJXwrKh5jyUfBl4YB05YpkIVg,45415
249
+ datahub/ingestion/source/bigquery_v2/lineage.py,sha256=LJqdkCR8H55b3txCVBM-cs1T5QWxSTimJ3ebSgtXjgI,44874
250
250
  datahub/ingestion/source/bigquery_v2/profiler.py,sha256=8-yAoq8sX0E6VIwr75YbM8wITRNhGfxgte9BCeGNkMM,10681
251
251
  datahub/ingestion/source/bigquery_v2/queries.py,sha256=B2vJLZYfwM1J5JAckijKJTxLhDYA0yw3kfzj5oRQB5c,20151
252
252
  datahub/ingestion/source/bigquery_v2/queries_extractor.py,sha256=xLf-vCUAnNuDdTHghxJvPOyGeA_XLCW3r-xj-8cfn3Q,19528
@@ -302,10 +302,10 @@ datahub/ingestion/source/fivetran/fivetran.py,sha256=uKbM5czPz-6LOseoh1FwavWDIuL
302
302
  datahub/ingestion/source/fivetran/fivetran_log_api.py,sha256=EAak3hJpe75WZSgz6wP_CyAT5Cian2N4a-lb8x1NKHk,12776
303
303
  datahub/ingestion/source/fivetran/fivetran_query.py,sha256=vLrTj7e-0NxZ2U4bWTB57pih42WirqPlUvwtIRfStlQ,5275
304
304
  datahub/ingestion/source/gc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
305
- datahub/ingestion/source/gc/datahub_gc.py,sha256=WOg3yIaNmwdbSTwytKeSfIUihsM7FMYBip9u2Dnwk3c,12849
305
+ datahub/ingestion/source/gc/datahub_gc.py,sha256=W6uoeV7B4WIXdxT4tOEdDksdJm656WwwvkH79L7f_8Q,12969
306
306
  datahub/ingestion/source/gc/dataprocess_cleanup.py,sha256=86Tm3NNWMf0xM4TklNIEeNOjEingKpYy-XvCPeaAb4k,17125
307
- datahub/ingestion/source/gc/execution_request_cleanup.py,sha256=sZbdkg3MuPVGf8eeeRg_2khGMZ01QoH4dgJiTxf7Srg,9813
308
- datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py,sha256=LvDGTaAaI-T0OZ3fkaFwipLdzPePunuSVWoEuSBsfEM,11099
307
+ datahub/ingestion/source/gc/execution_request_cleanup.py,sha256=9jsyCIspWSSYSAVPHjKHr05885rXxM6FCH7KzTBceic,10139
308
+ datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py,sha256=o4tQFO7AELB7K9VnKSwq8Ll4RdxC9vDZAmSssYPgvhw,11186
309
309
  datahub/ingestion/source/gcs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
310
310
  datahub/ingestion/source/gcs/gcs_source.py,sha256=iwvj4JwjyVWRP1Vq106sUtQhh0GuOYVSu9zCa1wCZN0,6189
311
311
  datahub/ingestion/source/gcs/gcs_utils.py,sha256=_78KM863XXgkVLmZLtYGF5PJNnZas1go-XRtOq-79lo,1047
@@ -434,7 +434,7 @@ datahub/ingestion/source/snowflake/snowflake_connection.py,sha256=yzv-01FdmfDSCJ
434
434
  datahub/ingestion/source/snowflake/snowflake_data_reader.py,sha256=ffR5E2uhD71FUMXd3XOg2rHwrp1rbbGEFTAbqKcmI2s,2195
435
435
  datahub/ingestion/source/snowflake/snowflake_lineage_v2.py,sha256=5Lpy_irZlbOFJbvVkgsZSBjdLCT3VZNjlEvttzSQAU4,21121
436
436
  datahub/ingestion/source/snowflake/snowflake_profiler.py,sha256=0DJiSwII6FY34urlBja2FW66NaVvhbBWmG0p7u8Xyrc,7548
437
- datahub/ingestion/source/snowflake/snowflake_queries.py,sha256=Lhc5FAx8pXiUyfODGNkQJhjThSCIjPqG2R82dHN-jg0,26889
437
+ datahub/ingestion/source/snowflake/snowflake_queries.py,sha256=jTpnFWRqqFId6DKJvvAbNuFPxyNi1oQxxDUyMvh1iu4,26968
438
438
  datahub/ingestion/source/snowflake/snowflake_query.py,sha256=5po2FWz41UVowykJYbTFGxsltbmlHBCPcHG20VOhdOE,38469
439
439
  datahub/ingestion/source/snowflake/snowflake_report.py,sha256=_-rD7Q4MzKY8fYzJHSBnGX4gurwujL3UoRzcP_TZURs,6468
440
440
  datahub/ingestion/source/snowflake/snowflake_schema.py,sha256=z5ZPgh-TILAz0DeIwDxRCsj980CM2BbftXiFpM1dV_Y,21674
@@ -491,8 +491,8 @@ datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider
491
491
  datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py,sha256=xsH7Ao_05VTjqpkzLkhdf5B1ULMzFoD8vkJJIJU9w-U,4077
492
492
  datahub/ingestion/source/state_provider/state_provider_registry.py,sha256=SVq4mIyGNmLXE9OZx1taOiNPqDoQp03-Ot9rYnB5F3k,401
493
493
  datahub/ingestion/source/tableau/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
494
- datahub/ingestion/source/tableau/tableau.py,sha256=SWEJi0LoIhb8rVVmmhVxngENo53QtXFvJE02aOIzG6Q,140034
495
- datahub/ingestion/source/tableau/tableau_common.py,sha256=9gQLq_3BlAsKll83uVlnWJRWaIDtFtREUyuimXF13Z0,26219
494
+ datahub/ingestion/source/tableau/tableau.py,sha256=KAwyM9XiJUXFPwuVQM7GcHntcTFxMFAN4j3xSIOMbgg,142010
495
+ datahub/ingestion/source/tableau/tableau_common.py,sha256=a3Nu0Upy6_pnrd7XpSMcYHdnYca1JBW7H0jMqkYr0ME,26871
496
496
  datahub/ingestion/source/tableau/tableau_constant.py,sha256=ZcAeHsQUXVVL26ORly0ByZk_GJAFbxaKuJAlX_sYMac,2686
497
497
  datahub/ingestion/source/tableau/tableau_server_wrapper.py,sha256=nSyx9RzC6TCQDm-cTVJ657qT8iDwzk_8JMKpohhmOc4,1046
498
498
  datahub/ingestion/source/tableau/tableau_validation.py,sha256=pd--LcTLTfrFsouhCOvGC_2IjeMfKbJV81EEo3ibMwE,1820
@@ -511,7 +511,7 @@ datahub/ingestion/source/unity/usage.py,sha256=igRxYg8usukTAA229uJWi-0y-Zd0yOq9d
511
511
  datahub/ingestion/source/usage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
512
512
  datahub/ingestion/source/usage/clickhouse_usage.py,sha256=8nQqNAPKqivywjzsvqH0-HWFwjd4gECpw_xahLXk5ek,9970
513
513
  datahub/ingestion/source/usage/starburst_trino_usage.py,sha256=R1DDs98tYn2WW0_tGXQhk7lqEU0ru2SgrvMBtV305ps,10542
514
- datahub/ingestion/source/usage/usage_common.py,sha256=e7fcTd_vbUFv3xu5iY0mkEaAywjAufxV0Mw2Mu54IMY,11805
514
+ datahub/ingestion/source/usage/usage_common.py,sha256=poNlVKx1VRPRfE4K3yAyIS96DkGAt3MC17vQlwqBNvw,12235
515
515
  datahub/ingestion/source_config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
516
516
  datahub/ingestion/source_config/csv_enricher.py,sha256=IROxxfFJA56dHkmmbjjhb7h1pZSi33tzW9sQb7ZEgac,1733
517
517
  datahub/ingestion/source_config/operation_config.py,sha256=Q0NlqiEh4s4DFIII5NsAp5hxWTVyyJz-ldcQmH-B47s,3504
@@ -861,16 +861,19 @@ datahub/secret/datahub_secrets_client.py,sha256=WkoJDip7IAKSGDM5oHeZVL8878pd4Bix
861
861
  datahub/secret/secret_common.py,sha256=PeRFNljPlGfNrmn3VtDVbazQE6J3Q1nA3L-z3cS8LEA,2522
862
862
  datahub/secret/secret_store.py,sha256=2VP_Vd336Cy7C-2kwp4rx8MAqtYgtwv8XyzzNTXE5x8,1124
863
863
  datahub/specific/__init__.py,sha256=r5RYM5mDnskLzin3vc87HV-9GSz3P6uQw8AlsN14LaI,88
864
- datahub/specific/chart.py,sha256=DsLA5qHBIMNc1pIZ1AC5kLvwpRDd79Q56N4SANOofps,11324
865
- datahub/specific/custom_properties.py,sha256=Ob8L9b9QIbUvHfzWo4L-SNY1QSRhgRy30kLRDdenGEs,1024
866
- datahub/specific/dashboard.py,sha256=kRfyJsm7piugxBg0IfIbLmvv6Smk3D44IGVw8THLqPE,15100
867
- datahub/specific/datajob.py,sha256=5pEBrN6llpgS7jWYEfrvqpbT2vMVVpepH71jIUJUo4U,18480
868
- datahub/specific/dataproduct.py,sha256=lVv3TGkZyZ0t9CUXLnkwMhr8GK1HB-fiyRyjxTdvb7s,5259
869
- datahub/specific/dataset.py,sha256=TAI8SRhhhsv1zEi3lGv24NX6PTJDrEyt5v0Sdg-uFY8,13568
870
- datahub/specific/form.py,sha256=jVI0JD-o2-XkD1suW_ITnTZUF0GNbGjaNb9-PXdfdkA,4549
871
- datahub/specific/ownership.py,sha256=KlYnk7o0Tq2EVugW7qRWR9D3v0C8PuqIdwgUzYwlkDM,1446
872
- datahub/specific/structured_properties.py,sha256=unc0VllBdbOm7KIWf_5tFkP4TQusN7JUKoYyOFvFQhs,1767
873
- datahub/specific/structured_property.py,sha256=IYeFyafPidNrDbn1sU65rEPwIZDS-wLY1SYXSNUUbHQ,4038
864
+ datahub/specific/chart.py,sha256=NPdzDHcZkPodthOn9c8QF_aDEo2y4lCJ4t1sI556uZ0,6684
865
+ datahub/specific/dashboard.py,sha256=D8CnOSScQ0-UICFjQnQOtqL-SlNSxhSuub4vZ3BpcuI,10017
866
+ datahub/specific/datajob.py,sha256=yZXic3CuCGxg-ewnaHphoH9Jjpe-P09XbN7T-rrvkyE,13711
867
+ datahub/specific/dataproduct.py,sha256=SrBNYACDVatuXA64GCHA0Igaes24ajJqTcXmDTT1FPA,2877
868
+ datahub/specific/dataset.py,sha256=je9j3rVzpSiXoOe0UmfD7mc5vCpLAAO74Z8q1SvwPX0,9725
869
+ datahub/specific/form.py,sha256=hbxmmBWHma0d4NCZEGR6Nr6R-5A5gYgl1mmkGgnM97o,3834
870
+ datahub/specific/structured_property.py,sha256=NZ2yppDgtqrH04Wn3_m5IupyLeWoBCMygyr3nEi1A3o,4038
871
+ datahub/specific/aspect_helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
872
+ datahub/specific/aspect_helpers/custom_properties.py,sha256=s87_Aq7BgF_t_I0MCjNEJxYyrNxMTb1N0hCifT8Y6Cw,2255
873
+ datahub/specific/aspect_helpers/ownership.py,sha256=rNYiJSqb_FJQhFRSIQScg4mfxgYhPvjeaYyvutY6CN0,1861
874
+ datahub/specific/aspect_helpers/structured_properties.py,sha256=EVnFS025r-PG5PAC7VENVJO-JvDYif2VeYonsC3Z8m8,2255
875
+ datahub/specific/aspect_helpers/tags.py,sha256=YHcKfRaIvv12wcmfMc8-Dk6gf6xIvJedkn451uBuz-Y,1254
876
+ datahub/specific/aspect_helpers/terms.py,sha256=l8xoOLQ2RsIl3UnKhLisQNwrGTFIPrzfvP4zjH-AhwI,1352
874
877
  datahub/sql_parsing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
875
878
  datahub/sql_parsing/_models.py,sha256=il-xm1RcLdi1phJUV3xrTecdOGH31akqheuSC2N4YhQ,3141
876
879
  datahub/sql_parsing/_sqlglot_patch.py,sha256=iYJ8zOThHqqbamD5jdNr9iHTWD7ewNeHzPiTb6-rO3Y,7043
@@ -878,7 +881,7 @@ datahub/sql_parsing/datajob.py,sha256=1X8KpEk-y3_8xJuA_Po27EHZgOcxK9QADI6Om9gSGn
878
881
  datahub/sql_parsing/query_types.py,sha256=FKjDzszZzsrCfYfm7dgD6T_8865qxWl767fdGyHWBh4,2720
879
882
  datahub/sql_parsing/schema_resolver.py,sha256=9INZWdxA2dMSLK6RXaVqjbjyLY_VKMhCkQv_Xd6Ln3I,10848
880
883
  datahub/sql_parsing/split_statements.py,sha256=uZhAXLaRxDfmK0lPBW2oM_YVdJfSMhdgndnfd9iIXuA,5001
881
- datahub/sql_parsing/sql_parsing_aggregator.py,sha256=LBs1RjRqh3natrx4WfgRQGNpI56o12jtbABO5ipEBWA,69889
884
+ datahub/sql_parsing/sql_parsing_aggregator.py,sha256=jVF6TbyM71XdJ34K0Setz3LgJALvJrJs1mVKdxU_6d4,69830
882
885
  datahub/sql_parsing/sql_parsing_common.py,sha256=h_V_m54hJ9EUh5kczq7cYOIeNeo4bgf0Px0H-Nq-UIg,2602
883
886
  datahub/sql_parsing/sql_parsing_result_utils.py,sha256=prwWTj1EB2fRPv1eMB4EkpFNafIYAt-X8TIK0NWqank,796
884
887
  datahub/sql_parsing/sqlglot_lineage.py,sha256=gUVq3NwZUzQByJs43JZXz8lZf0ZVzVt0FzaW5wZOwK4,47460
@@ -983,8 +986,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
983
986
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
984
987
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
985
988
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
986
- acryl_datahub-0.15.0.1rc13.dist-info/METADATA,sha256=KnCOYV5Kg855hgL3B3zmYHzPnXVeMoZYf_3ScEj1cyA,173444
987
- acryl_datahub-0.15.0.1rc13.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
988
- acryl_datahub-0.15.0.1rc13.dist-info/entry_points.txt,sha256=xnPSPLK3bJGADxe4TDS4wL4u0FT_PGlahDa-ENYdYCQ,9512
989
- acryl_datahub-0.15.0.1rc13.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
990
- acryl_datahub-0.15.0.1rc13.dist-info/RECORD,,
989
+ acryl_datahub-0.15.0.1rc15.dist-info/METADATA,sha256=QjbFVb0BeepeeYv3vVWsSsJlUi1XNmMvkMID2aiJZCM,173444
990
+ acryl_datahub-0.15.0.1rc15.dist-info/WHEEL,sha256=A3WOREP4zgxI0fKrHUG8DC8013e3dK3n7a6HDbcEIwE,91
991
+ acryl_datahub-0.15.0.1rc15.dist-info/entry_points.txt,sha256=xnPSPLK3bJGADxe4TDS4wL4u0FT_PGlahDa-ENYdYCQ,9512
992
+ acryl_datahub-0.15.0.1rc15.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
993
+ acryl_datahub-0.15.0.1rc15.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.6.0)
2
+ Generator: setuptools (75.7.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
datahub/__init__.py CHANGED
@@ -3,7 +3,7 @@ import warnings
3
3
 
4
4
  # Published at https://pypi.org/project/acryl-datahub/.
5
5
  __package_name__ = "acryl-datahub"
6
- __version__ = "0.15.0.1rc13"
6
+ __version__ = "0.15.0.1rc15"
7
7
 
8
8
 
9
9
  def is_dev_mode() -> bool:
@@ -24,6 +24,7 @@ from typing import (
24
24
 
25
25
  import typing_inspect
26
26
  from avrogen.dict_wrapper import DictWrapper
27
+ from typing_extensions import assert_never
27
28
 
28
29
  from datahub.emitter.enum_helpers import get_enum_options
29
30
  from datahub.metadata.schema_classes import (
@@ -269,9 +270,8 @@ def make_owner_urn(owner: str, owner_type: OwnerType) -> str:
269
270
  return make_user_urn(owner)
270
271
  elif owner_type == OwnerType.GROUP:
271
272
  return make_group_urn(owner)
272
- # This should pretty much never happen.
273
- # TODO: With Python 3.11, we can use typing.assert_never() here.
274
- return f"urn:li:{owner_type.value}:{owner}"
273
+ else:
274
+ assert_never(owner_type)
275
275
 
276
276
 
277
277
  def make_ownership_type_urn(type: str) -> str:
@@ -2,7 +2,19 @@ import json
2
2
  import time
3
3
  from collections import defaultdict
4
4
  from dataclasses import dataclass
5
- from typing import Any, Dict, List, Optional, Sequence, Union
5
+ from typing import (
6
+ Any,
7
+ Dict,
8
+ List,
9
+ Literal,
10
+ Optional,
11
+ Protocol,
12
+ Tuple,
13
+ Union,
14
+ runtime_checkable,
15
+ )
16
+
17
+ from typing_extensions import LiteralString
6
18
 
7
19
  from datahub.emitter.aspect import JSON_PATCH_CONTENT_TYPE
8
20
  from datahub.emitter.serialization_helper import pre_json_transform
@@ -19,25 +31,36 @@ from datahub.metadata.urns import Urn
19
31
  from datahub.utilities.urns.urn import guess_entity_type
20
32
 
21
33
 
34
+ @runtime_checkable
35
+ class SupportsToObj(Protocol):
36
+ def to_obj(self) -> Any:
37
+ ...
38
+
39
+
22
40
  def _recursive_to_obj(obj: Any) -> Any:
23
41
  if isinstance(obj, list):
24
42
  return [_recursive_to_obj(v) for v in obj]
25
- elif hasattr(obj, "to_obj"):
43
+ elif isinstance(obj, SupportsToObj):
26
44
  return obj.to_obj()
27
45
  else:
28
46
  return obj
29
47
 
30
48
 
49
+ PatchPath = Tuple[Union[LiteralString, Urn], ...]
50
+ PatchOp = Literal["add", "remove", "replace"]
51
+
52
+
31
53
  @dataclass
32
- class _Patch:
33
- op: str # one of ['add', 'remove', 'replace']; we don't support move, copy or test
34
- path: str
54
+ class _Patch(SupportsToObj):
55
+ op: PatchOp
56
+ path: PatchPath
35
57
  value: Any
36
58
 
37
59
  def to_obj(self) -> Dict:
60
+ quoted_path = "/" + "/".join(MetadataPatchProposal.quote(p) for p in self.path)
38
61
  return {
39
62
  "op": self.op,
40
- "path": self.path,
63
+ "path": quoted_path,
41
64
  "value": _recursive_to_obj(self.value),
42
65
  }
43
66
 
@@ -63,15 +86,16 @@ class MetadataPatchProposal:
63
86
 
64
87
  # Json Patch quoting based on https://jsonpatch.com/#json-pointer
65
88
  @classmethod
66
- def quote(cls, value: str) -> str:
67
- return value.replace("~", "~0").replace("/", "~1")
89
+ def quote(cls, value: Union[str, Urn]) -> str:
90
+ return str(value).replace("~", "~0").replace("/", "~1")
68
91
 
69
92
  def _add_patch(
70
- self, aspect_name: str, op: str, path: Union[str, Sequence[str]], value: Any
93
+ self,
94
+ aspect_name: str,
95
+ op: PatchOp,
96
+ path: PatchPath,
97
+ value: Any,
71
98
  ) -> None:
72
- if not isinstance(path, str):
73
- path = "/" + "/".join(self.quote(p) for p in path)
74
-
75
99
  # TODO: Validate that aspectName is a valid aspect for this entityType
76
100
  self.patches[aspect_name].append(_Patch(op, path, value))
77
101
 
@@ -206,9 +206,7 @@ class BigqueryV2Source(StatefulIngestionSourceBase, TestableSource):
206
206
 
207
207
  def _init_schema_resolver(self) -> SchemaResolver:
208
208
  schema_resolution_required = (
209
- self.config.use_queries_v2
210
- or self.config.lineage_parse_view_ddl
211
- or self.config.lineage_use_sql_parser
209
+ self.config.use_queries_v2 or self.config.lineage_use_sql_parser
212
210
  )
213
211
  schema_ingestion_enabled = (
214
212
  self.config.include_schema_metadata
@@ -255,18 +253,16 @@ class BigqueryV2Source(StatefulIngestionSourceBase, TestableSource):
255
253
  for project in projects:
256
254
  yield from self.bq_schema_extractor.get_project_workunits(project)
257
255
 
258
- if self.config.use_queries_v2:
259
- # Always ingest View and Snapshot lineage with schema ingestion
260
- self.report.set_ingestion_stage("*", "View and Snapshot Lineage")
261
-
262
- yield from self.lineage_extractor.get_lineage_workunits_for_views_and_snapshots(
263
- [p.id for p in projects],
264
- self.bq_schema_extractor.view_refs_by_project,
265
- self.bq_schema_extractor.view_definitions,
266
- self.bq_schema_extractor.snapshot_refs_by_project,
267
- self.bq_schema_extractor.snapshots_by_ref,
268
- )
256
+ self.report.set_ingestion_stage("*", "View and Snapshot Lineage")
257
+ yield from self.lineage_extractor.get_lineage_workunits_for_views_and_snapshots(
258
+ [p.id for p in projects],
259
+ self.bq_schema_extractor.view_refs_by_project,
260
+ self.bq_schema_extractor.view_definitions,
261
+ self.bq_schema_extractor.snapshot_refs_by_project,
262
+ self.bq_schema_extractor.snapshots_by_ref,
263
+ )
269
264
 
265
+ if self.config.use_queries_v2:
270
266
  # if both usage and lineage are disabled then skip queries extractor piece
271
267
  if (
272
268
  not self.config.include_usage_statistics
@@ -306,10 +302,6 @@ class BigqueryV2Source(StatefulIngestionSourceBase, TestableSource):
306
302
  if self.config.include_table_lineage:
307
303
  yield from self.lineage_extractor.get_lineage_workunits(
308
304
  [p.id for p in projects],
309
- self.bq_schema_extractor.view_refs_by_project,
310
- self.bq_schema_extractor.view_definitions,
311
- self.bq_schema_extractor.snapshot_refs_by_project,
312
- self.bq_schema_extractor.snapshots_by_ref,
313
305
  self.bq_schema_extractor.table_refs,
314
306
  )
315
307
 
@@ -463,10 +463,6 @@ class BigQueryV2Config(
463
463
  default=True,
464
464
  description="Use sql parser to resolve view/table lineage.",
465
465
  )
466
- lineage_parse_view_ddl: bool = Field(
467
- default=True,
468
- description="Sql parse view ddl to get lineage.",
469
- )
470
466
 
471
467
  lineage_sql_parser_use_raw_names: bool = Field(
472
468
  default=False,
@@ -572,11 +568,9 @@ class BigQueryV2Config(
572
568
  "See [this](https://cloud.google.com/bigquery/docs/information-schema-jobs#scope_and_syntax) for details.",
573
569
  )
574
570
 
575
- # include_view_lineage and include_view_column_lineage are inherited from SQLCommonConfig
576
- # but not used in bigquery so we hide them from docs.
577
- include_view_lineage: bool = Field(default=True, hidden_from_docs=True)
578
-
579
- include_view_column_lineage: bool = Field(default=True, hidden_from_docs=True)
571
+ _include_view_lineage = pydantic_removed_field("include_view_lineage")
572
+ _include_view_column_lineage = pydantic_removed_field("include_view_column_lineage")
573
+ _lineage_parse_view_ddl = pydantic_removed_field("lineage_parse_view_ddl")
580
574
 
581
575
  @root_validator(pre=True)
582
576
  def set_include_schema_metadata(cls, values: Dict) -> Dict:
@@ -653,14 +653,11 @@ class BigQuerySchemaGenerator:
653
653
  self.report.report_dropped(table_identifier.raw_table_name())
654
654
  return
655
655
 
656
- if self.store_table_refs:
657
- table_ref = str(
658
- BigQueryTableRef(table_identifier).get_sanitized_table_ref()
659
- )
660
- self.table_refs.add(table_ref)
661
- if self.config.lineage_parse_view_ddl and view.view_definition:
662
- self.view_refs_by_project[project_id].add(table_ref)
663
- self.view_definitions[table_ref] = view.view_definition
656
+ table_ref = str(BigQueryTableRef(table_identifier).get_sanitized_table_ref())
657
+ self.table_refs.add(table_ref)
658
+ if view.view_definition:
659
+ self.view_refs_by_project[project_id].add(table_ref)
660
+ self.view_definitions[table_ref] = view.view_definition
664
661
 
665
662
  view.column_count = len(columns)
666
663
  if not view.column_count:
@@ -701,14 +698,11 @@ class BigQuerySchemaGenerator:
701
698
  f"Snapshot doesn't have any column or unable to get columns for snapshot: {table_identifier}"
702
699
  )
703
700
 
704
- if self.store_table_refs:
705
- table_ref = str(
706
- BigQueryTableRef(table_identifier).get_sanitized_table_ref()
707
- )
708
- self.table_refs.add(table_ref)
709
- if snapshot.base_table_identifier:
710
- self.snapshot_refs_by_project[project_id].add(table_ref)
711
- self.snapshots_by_ref[table_ref] = snapshot
701
+ table_ref = str(BigQueryTableRef(table_identifier).get_sanitized_table_ref())
702
+ self.table_refs.add(table_ref)
703
+ if snapshot.base_table_identifier:
704
+ self.snapshot_refs_by_project[project_id].add(table_ref)
705
+ self.snapshots_by_ref[table_ref] = snapshot
712
706
 
713
707
  yield from self.gen_snapshot_dataset_workunits(
714
708
  table=snapshot,
@@ -1148,7 +1142,7 @@ class BigQuerySchemaGenerator:
1148
1142
  foreignKeys=foreign_keys if foreign_keys else None,
1149
1143
  )
1150
1144
 
1151
- if self.config.lineage_parse_view_ddl or self.config.lineage_use_sql_parser:
1145
+ if self.config.lineage_use_sql_parser:
1152
1146
  self.sql_parser_schema_resolver.add_schema_metadata(
1153
1147
  dataset_urn, schema_metadata
1154
1148
  )
@@ -291,16 +291,15 @@ class BigqueryLineageExtractor:
291
291
  snapshots_by_ref: FileBackedDict[BigqueryTableSnapshot],
292
292
  ) -> Iterable[MetadataWorkUnit]:
293
293
  for project in projects:
294
- if self.config.lineage_parse_view_ddl:
295
- for view in view_refs_by_project[project]:
296
- self.datasets_skip_audit_log_lineage.add(view)
297
- self.aggregator.add_view_definition(
298
- view_urn=self.identifiers.gen_dataset_urn_from_raw_ref(
299
- BigQueryTableRef.from_string_name(view)
300
- ),
301
- view_definition=view_definitions[view],
302
- default_db=project,
303
- )
294
+ for view in view_refs_by_project[project]:
295
+ self.datasets_skip_audit_log_lineage.add(view)
296
+ self.aggregator.add_view_definition(
297
+ view_urn=self.identifiers.gen_dataset_urn_from_raw_ref(
298
+ BigQueryTableRef.from_string_name(view)
299
+ ),
300
+ view_definition=view_definitions[view],
301
+ default_db=project,
302
+ )
304
303
 
305
304
  for snapshot_ref in snapshot_refs_by_project[project]:
306
305
  snapshot = snapshots_by_ref[snapshot_ref]
@@ -322,23 +321,11 @@ class BigqueryLineageExtractor:
322
321
  def get_lineage_workunits(
323
322
  self,
324
323
  projects: List[str],
325
- view_refs_by_project: Dict[str, Set[str]],
326
- view_definitions: FileBackedDict[str],
327
- snapshot_refs_by_project: Dict[str, Set[str]],
328
- snapshots_by_ref: FileBackedDict[BigqueryTableSnapshot],
329
324
  table_refs: Set[str],
330
325
  ) -> Iterable[MetadataWorkUnit]:
331
326
  if not self._should_ingest_lineage():
332
327
  return
333
328
 
334
- yield from self.get_lineage_workunits_for_views_and_snapshots(
335
- projects,
336
- view_refs_by_project,
337
- view_definitions,
338
- snapshot_refs_by_project,
339
- snapshots_by_ref,
340
- )
341
-
342
329
  if self.config.use_exported_bigquery_audit_metadata:
343
330
  projects = ["*"] # project_id not used when using exported metadata
344
331
 
@@ -188,6 +188,9 @@ class DataHubGcSource(Source):
188
188
  self._truncate_timeseries_helper(
189
189
  aspect_name="dashboardUsageStatistics", entity_type="dashboard"
190
190
  )
191
+ self._truncate_timeseries_helper(
192
+ aspect_name="queryusagestatistics", entity_type="query"
193
+ )
191
194
 
192
195
  def _truncate_timeseries_helper(self, aspect_name: str, entity_type: str) -> None:
193
196
  self._truncate_timeseries_with_watch_optional(
@@ -141,7 +141,9 @@ class DatahubExecutionRequestCleanup:
141
141
  break
142
142
  if self.report.ergc_read_errors >= self.config.max_read_errors:
143
143
  self.report.failure(
144
- f"ergc({self.instance_id}): too many read errors, aborting."
144
+ title="Too many read errors, aborting",
145
+ message="Too many read errors, aborting",
146
+ context=str(self.instance_id),
145
147
  )
146
148
  break
147
149
  try:
@@ -158,8 +160,11 @@ class DatahubExecutionRequestCleanup:
158
160
  break
159
161
  params["scrollId"] = document["scrollId"]
160
162
  except Exception as e:
161
- logger.error(
162
- f"ergc({self.instance_id}): failed to fetch next batch of execution requests: {e}"
163
+ self.report.failure(
164
+ title="Failed to fetch next batch of execution requests",
165
+ message="Failed to fetch next batch of execution requests",
166
+ context=str(self.instance_id),
167
+ exc=e,
163
168
  )
164
169
  self.report.ergc_read_errors += 1
165
170
 
@@ -231,8 +236,11 @@ class DatahubExecutionRequestCleanup:
231
236
  self.graph.delete_entity(entry.urn, True)
232
237
  except Exception as e:
233
238
  self.report.ergc_delete_errors += 1
234
- logger.error(
235
- f"ergc({self.instance_id}): failed to delete ExecutionRequest {entry.request_id}: {e}"
239
+ self.report.failure(
240
+ title="Failed to delete ExecutionRequest",
241
+ message="Failed to delete ExecutionRequest",
242
+ context=str(self.instance_id),
243
+ exc=e,
236
244
  )
237
245
 
238
246
  def _reached_runtime_limit(self) -> bool:
@@ -105,6 +105,8 @@ class SoftDeletedEntitiesReport(SourceReport):
105
105
  sample_hard_deleted_aspects_by_type: TopKDict[str, LossyList[str]] = field(
106
106
  default_factory=TopKDict
107
107
  )
108
+ runtime_limit_reached: bool = False
109
+ deletion_limit_reached: bool = False
108
110
 
109
111
 
110
112
  class SoftDeletedEntitiesCleanup:
@@ -163,6 +165,8 @@ class SoftDeletedEntitiesCleanup:
163
165
  f"Dry run is on otherwise it would have deleted {urn} with hard deletion"
164
166
  )
165
167
  return
168
+ if self._deletion_limit_reached() or self._times_up():
169
+ return
166
170
  self._increment_removal_started_count()
167
171
  self.ctx.graph.delete_entity(urn=urn, hard=True)
168
172
  self.ctx.graph.delete_references_to_urn(
@@ -203,11 +207,10 @@ class SoftDeletedEntitiesCleanup:
203
207
  for future in done:
204
208
  self._print_report()
205
209
  if future.exception():
206
- logger.error(
207
- f"Failed to delete entity {futures[future]}: {future.exception()}"
208
- )
209
210
  self.report.failure(
210
- f"Failed to delete entity {futures[future]}",
211
+ title="Failed to delete entity",
212
+ message="Failed to delete entity",
213
+ context=futures[future],
211
214
  exc=future.exception(),
212
215
  )
213
216
  self.report.num_soft_deleted_entity_processed += 1
@@ -274,6 +277,26 @@ class SoftDeletedEntitiesCleanup:
274
277
  )
275
278
  yield from self._get_soft_deleted_queries()
276
279
 
280
+ def _times_up(self) -> bool:
281
+ if (
282
+ self.config.runtime_limit_seconds
283
+ and time.time() - self.start_time > self.config.runtime_limit_seconds
284
+ ):
285
+ with self._report_lock:
286
+ self.report.runtime_limit_reached = True
287
+ return True
288
+ return False
289
+
290
+ def _deletion_limit_reached(self) -> bool:
291
+ if (
292
+ self.config.limit_entities_delete
293
+ and self.report.num_hard_deleted > self.config.limit_entities_delete
294
+ ):
295
+ with self._report_lock:
296
+ self.report.deletion_limit_reached = True
297
+ return True
298
+ return False
299
+
277
300
  def cleanup_soft_deleted_entities(self) -> None:
278
301
  if not self.config.enabled:
279
302
  return
@@ -285,24 +308,8 @@ class SoftDeletedEntitiesCleanup:
285
308
  self._print_report()
286
309
  while len(futures) >= self.config.futures_max_at_time:
287
310
  futures = self._process_futures(futures)
288
- if (
289
- self.config.limit_entities_delete
290
- and self.report.num_hard_deleted > self.config.limit_entities_delete
291
- ):
292
- logger.info(
293
- f"Limit of {self.config.limit_entities_delete} entities reached. Stopped adding more."
294
- )
311
+ if self._deletion_limit_reached() or self._times_up():
295
312
  break
296
- if (
297
- self.config.runtime_limit_seconds
298
- and time.time() - self.start_time
299
- > self.config.runtime_limit_seconds
300
- ):
301
- logger.info(
302
- f"Runtime limit of {self.config.runtime_limit_seconds} seconds reached. Not submitting more futures."
303
- )
304
- break
305
-
306
313
  future = executor.submit(self.delete_soft_deleted_entity, urn)
307
314
  futures[future] = urn
308
315
 
@@ -61,6 +61,7 @@ from datahub.sql_parsing.sqlglot_lineage import (
61
61
  ColumnRef,
62
62
  DownstreamColumnRef,
63
63
  )
64
+ from datahub.sql_parsing.sqlglot_utils import get_query_fingerprint
64
65
  from datahub.utilities.file_backed_collections import ConnectionWrapper, FileBackedList
65
66
  from datahub.utilities.perf_timer import PerfTimer
66
67
 
@@ -475,10 +476,11 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
475
476
 
476
477
  entry = PreparsedQuery(
477
478
  # Despite having Snowflake's fingerprints available, our own fingerprinting logic does a better
478
- # job at eliminating redundant / repetitive queries. As such, we don't include the fingerprint
479
- # here so that the aggregator auto-generates one.
480
- # query_id=res["query_fingerprint"],
481
- query_id=None,
479
+ # job at eliminating redundant / repetitive queries. As such, we include the fast fingerprint
480
+ # here
481
+ query_id=get_query_fingerprint(
482
+ res["query_text"], self.identifiers.platform, fast=True
483
+ ),
482
484
  query_text=res["query_text"],
483
485
  upstreams=upstreams,
484
486
  downstream=downstream,