acryl-datahub 0.15.0.1rc12__py3-none-any.whl → 0.15.0.1rc14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (38) hide show
  1. {acryl_datahub-0.15.0.1rc12.dist-info → acryl_datahub-0.15.0.1rc14.dist-info}/METADATA +2369 -2369
  2. {acryl_datahub-0.15.0.1rc12.dist-info → acryl_datahub-0.15.0.1rc14.dist-info}/RECORD +35 -31
  3. datahub/__init__.py +1 -1
  4. datahub/emitter/mce_builder.py +3 -3
  5. datahub/emitter/mcp_patch_builder.py +36 -12
  6. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +44 -1
  7. datahub/ingestion/source/bigquery_v2/bigquery.py +10 -18
  8. datahub/ingestion/source/bigquery_v2/bigquery_config.py +3 -9
  9. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +11 -17
  10. datahub/ingestion/source/bigquery_v2/lineage.py +9 -22
  11. datahub/ingestion/source/gc/dataprocess_cleanup.py +4 -4
  12. datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +159 -71
  13. datahub/ingestion/source/tableau/tableau.py +3 -0
  14. datahub/ingestion/source/tableau/tableau_common.py +18 -0
  15. datahub/metadata/_schema_classes.py +61 -1
  16. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +4 -0
  17. datahub/metadata/schema.avsc +64 -29
  18. datahub/metadata/schemas/DataJobKey.avsc +2 -1
  19. datahub/metadata/schemas/DataTransformLogic.avsc +63 -0
  20. datahub/specific/aspect_helpers/__init__.py +0 -0
  21. datahub/specific/aspect_helpers/custom_properties.py +79 -0
  22. datahub/specific/aspect_helpers/ownership.py +67 -0
  23. datahub/specific/aspect_helpers/structured_properties.py +72 -0
  24. datahub/specific/aspect_helpers/tags.py +42 -0
  25. datahub/specific/aspect_helpers/terms.py +43 -0
  26. datahub/specific/chart.py +28 -184
  27. datahub/specific/dashboard.py +31 -196
  28. datahub/specific/datajob.py +34 -189
  29. datahub/specific/dataproduct.py +24 -86
  30. datahub/specific/dataset.py +48 -133
  31. datahub/specific/form.py +12 -32
  32. datahub/specific/structured_property.py +9 -9
  33. datahub/specific/custom_properties.py +0 -37
  34. datahub/specific/ownership.py +0 -48
  35. datahub/specific/structured_properties.py +0 -53
  36. {acryl_datahub-0.15.0.1rc12.dist-info → acryl_datahub-0.15.0.1rc14.dist-info}/WHEEL +0 -0
  37. {acryl_datahub-0.15.0.1rc12.dist-info → acryl_datahub-0.15.0.1rc14.dist-info}/entry_points.txt +0 -0
  38. {acryl_datahub-0.15.0.1rc12.dist-info → acryl_datahub-0.15.0.1rc14.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
1
- datahub/__init__.py,sha256=LdmQMvZSZxJJKW3u06itY2EkTfgOjWTGkJHo9YvmkV0,577
1
+ datahub/__init__.py,sha256=tC4XcRTMJqr-bc6T1QdoRI7MvbIkSZk8AscmW3iOtOo,577
2
2
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
3
3
  datahub/entrypoints.py,sha256=3-qSfXAx3Z0FEkBV5tlO8fQr4xk4ySeDRMVTpS5Xd6A,7793
4
4
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -114,10 +114,10 @@ datahub/emitter/aspect.py,sha256=ef0DVycqg-tRPurkYjc-5zknmLP2p2Y2RxP55WkvAEc,480
114
114
  datahub/emitter/enum_helpers.py,sha256=ZeALUAPi10Q4Z6VM0_WiU9Y60_d0ugZHcUoVmuOCEec,321
115
115
  datahub/emitter/generic_emitter.py,sha256=i37ZFm9VR_tmiZm9kIypEkQEB_cLKbzj_tJvViN-fm8,828
116
116
  datahub/emitter/kafka_emitter.py,sha256=Uix1W1WaXF8VqUTUfzdRZKca2XrR1w50Anx2LVkROlc,5822
117
- datahub/emitter/mce_builder.py,sha256=5oZHXs85GGwfL8tY72IPnicyYrRXraN4LgtVQQcZyq8,16417
117
+ datahub/emitter/mce_builder.py,sha256=IqHOm0cpzdVC_mQOqk0yEVJUEj9xn8am2OFAwwQeX_8,16342
118
118
  datahub/emitter/mcp.py,sha256=hAAYziDdkwjazQU0DtWMbQWY8wS09ACrKJbqxoWXdgc,9637
119
119
  datahub/emitter/mcp_builder.py,sha256=eOcuz41c4a3oTkNk39yYl9bTxpksxqATPHLcqyhPGT0,9856
120
- datahub/emitter/mcp_patch_builder.py,sha256=ykQFJshFrVF6DjkjcHQ8ZhDEws3ki0gmNjkHNfQtHwQ,4277
120
+ datahub/emitter/mcp_patch_builder.py,sha256=oonC8iGOvDzqj890CxOjWlBdDEF1RnwvbSZy1sivlTY,4572
121
121
  datahub/emitter/request_helper.py,sha256=33ORG3S3OVy97_jlWBRn7yUM5XCIkRN6WSdJvN7Ofcg,670
122
122
  datahub/emitter/rest_emitter.py,sha256=oqyRuXG1o1dYjiEIH5TFMb1q0xhRbpxPIA5qkyz0iQ8,16407
123
123
  datahub/emitter/serialization_helper.py,sha256=q12Avmf70Vy4ttQGMJoTKlE5EsybMKNg2w3MQeZiHvk,3652
@@ -169,7 +169,7 @@ datahub/ingestion/graph/config.py,sha256=3b_Gxa5wcBnphP63bBiAFdWS7PJhUHRE1WZL_q4
169
169
  datahub/ingestion/graph/connections.py,sha256=9462L0ZWGKURyypAln25eMPhK3pcufBar9tNDoqspXs,741
170
170
  datahub/ingestion/graph/filters.py,sha256=UeUZQHoimavIYx-jXLA0WGkOUe10TaO8uEZkfa-QgNE,6188
171
171
  datahub/ingestion/reporting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
172
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py,sha256=rrnlgptYF3YkxWlLYpkLm3mgrmzHcy6AwTHUG18bKVA,8373
172
+ datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py,sha256=O2SGDU2_qMtyr_1BH9-WkNOojFWig2z4O3M21nTRo70,9908
173
173
  datahub/ingestion/reporting/file_reporter.py,sha256=tiWukmMxHrTQI3rOAumsq6lRlw8T6spqpS6XBDYnrZU,1640
174
174
  datahub/ingestion/reporting/reporting_provider_registry.py,sha256=jTYSh3T4sensjnHQfPLiIcbA2dG8w0px9ghChAJjGdU,310
175
175
  datahub/ingestion/run/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -233,20 +233,20 @@ datahub/ingestion/source/azure/abs_folder_utils.py,sha256=7skXus-4fSIoKpqCeU-GG0
233
233
  datahub/ingestion/source/azure/abs_utils.py,sha256=KdAlCK-PMrn35kFHxz5vrsjajyx2PD5GRgoBKdoRvcg,2075
234
234
  datahub/ingestion/source/azure/azure_common.py,sha256=Zl0pPuE6L3QcM5B1P0LsPthZmD0h7fUUS0kg2okl6IY,4053
235
235
  datahub/ingestion/source/bigquery_v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
236
- datahub/ingestion/source/bigquery_v2/bigquery.py,sha256=-12CZWeSIAkI6Kb4AY8NAF3wsC_2lxhPErm5o0oUUes,14116
236
+ datahub/ingestion/source/bigquery_v2/bigquery.py,sha256=YMsyj6s7fggzisWfDdbT4w1MKJ3eRdNERsCShnu0Zqo,13681
237
237
  datahub/ingestion/source/bigquery_v2/bigquery_audit.py,sha256=IlbHA8a-gNJvnubgBfxVHpUk8rFNIG80gk5HWXa2lyE,25108
238
238
  datahub/ingestion/source/bigquery_v2/bigquery_audit_log_api.py,sha256=LuGJ6LgPViLIfDQfylxlQ3CA7fZYM5MDt8M-7sfzm84,5096
239
- datahub/ingestion/source/bigquery_v2/bigquery_config.py,sha256=xnYWxbhvv-rJRHLGkOWIAn4Ir__hwinEZF1F7TWWirE,26086
239
+ datahub/ingestion/source/bigquery_v2/bigquery_config.py,sha256=PqbYNqM4-KukCU1meuvsk0qbiWa7UFh5hqHrHsvOSWQ,25889
240
240
  datahub/ingestion/source/bigquery_v2/bigquery_data_reader.py,sha256=DeT3v_Z82__8En0FcZ0kavBAWQoRvSZ5Rppm9eeDAb8,2393
241
241
  datahub/ingestion/source/bigquery_v2/bigquery_helper.py,sha256=QER3gY8e_k1_eNVj7cBso7ZzrWl_vO5PYSa6CpvqNx8,1554
242
242
  datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py,sha256=8nuQ8hMuJEswWDZtV2RjbK8RvDJUzT_S74dnyPpGFdQ,4857
243
243
  datahub/ingestion/source/bigquery_v2/bigquery_queries.py,sha256=EoHo9twb0_QdX7Nvd1HJC1Yn0rqtrfR52EVk7Hu3XOQ,3296
244
244
  datahub/ingestion/source/bigquery_v2/bigquery_report.py,sha256=WxiLPFc7LwZXNDYfV9oySUD43kc2GcOf_pUokp3vFNM,8098
245
245
  datahub/ingestion/source/bigquery_v2/bigquery_schema.py,sha256=E5GOx4NWjyZM0xzdpBlNXbvDdKNfW9UtS64XtCYFpzI,31809
246
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py,sha256=Sv6BrK62nu3xpgjYGE-x1xdSTouvvnKDJtazPobhiKQ,50813
246
+ datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py,sha256=o2My5Q7ab39qHP3jjVFCQSErogGYb14s6397xHIZSqc,50568
247
247
  datahub/ingestion/source/bigquery_v2/bigquery_test_connection.py,sha256=cATxwi5IPzj3BldRRAVcLqzSFmmYEPvqa7U0RFJbaAc,7645
248
248
  datahub/ingestion/source/bigquery_v2/common.py,sha256=Cxjf1a8ibkL_YRQeS0BqsjlyMgFJpaZ3iq_d7e8T8MQ,4030
249
- datahub/ingestion/source/bigquery_v2/lineage.py,sha256=Jg_pwnaj7l_KEcgq0enJXwrKh5jyUfBl4YB05YpkIVg,45415
249
+ datahub/ingestion/source/bigquery_v2/lineage.py,sha256=LJqdkCR8H55b3txCVBM-cs1T5QWxSTimJ3ebSgtXjgI,44874
250
250
  datahub/ingestion/source/bigquery_v2/profiler.py,sha256=8-yAoq8sX0E6VIwr75YbM8wITRNhGfxgte9BCeGNkMM,10681
251
251
  datahub/ingestion/source/bigquery_v2/queries.py,sha256=B2vJLZYfwM1J5JAckijKJTxLhDYA0yw3kfzj5oRQB5c,20151
252
252
  datahub/ingestion/source/bigquery_v2/queries_extractor.py,sha256=xLf-vCUAnNuDdTHghxJvPOyGeA_XLCW3r-xj-8cfn3Q,19528
@@ -303,9 +303,9 @@ datahub/ingestion/source/fivetran/fivetran_log_api.py,sha256=EAak3hJpe75WZSgz6wP
303
303
  datahub/ingestion/source/fivetran/fivetran_query.py,sha256=vLrTj7e-0NxZ2U4bWTB57pih42WirqPlUvwtIRfStlQ,5275
304
304
  datahub/ingestion/source/gc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
305
305
  datahub/ingestion/source/gc/datahub_gc.py,sha256=WOg3yIaNmwdbSTwytKeSfIUihsM7FMYBip9u2Dnwk3c,12849
306
- datahub/ingestion/source/gc/dataprocess_cleanup.py,sha256=9brJW_HVrxJk1kAP20M7flmgYOMemOmaEl2zheWFW3c,17105
306
+ datahub/ingestion/source/gc/dataprocess_cleanup.py,sha256=86Tm3NNWMf0xM4TklNIEeNOjEingKpYy-XvCPeaAb4k,17125
307
307
  datahub/ingestion/source/gc/execution_request_cleanup.py,sha256=sZbdkg3MuPVGf8eeeRg_2khGMZ01QoH4dgJiTxf7Srg,9813
308
- datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py,sha256=wRnRaIVUG483tY4nyDkEn6Xi2RL5MjrVvoCoZimqwSg,7514
308
+ datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py,sha256=LvDGTaAaI-T0OZ3fkaFwipLdzPePunuSVWoEuSBsfEM,11099
309
309
  datahub/ingestion/source/gcs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
310
310
  datahub/ingestion/source/gcs/gcs_source.py,sha256=iwvj4JwjyVWRP1Vq106sUtQhh0GuOYVSu9zCa1wCZN0,6189
311
311
  datahub/ingestion/source/gcs/gcs_utils.py,sha256=_78KM863XXgkVLmZLtYGF5PJNnZas1go-XRtOq-79lo,1047
@@ -491,8 +491,8 @@ datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider
491
491
  datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py,sha256=xsH7Ao_05VTjqpkzLkhdf5B1ULMzFoD8vkJJIJU9w-U,4077
492
492
  datahub/ingestion/source/state_provider/state_provider_registry.py,sha256=SVq4mIyGNmLXE9OZx1taOiNPqDoQp03-Ot9rYnB5F3k,401
493
493
  datahub/ingestion/source/tableau/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
494
- datahub/ingestion/source/tableau/tableau.py,sha256=SWEJi0LoIhb8rVVmmhVxngENo53QtXFvJE02aOIzG6Q,140034
495
- datahub/ingestion/source/tableau/tableau_common.py,sha256=9gQLq_3BlAsKll83uVlnWJRWaIDtFtREUyuimXF13Z0,26219
494
+ datahub/ingestion/source/tableau/tableau.py,sha256=fY--jFtPtCuDBAruiMStAoT7HqaTDYtiVEKzEYuzCag,140121
495
+ datahub/ingestion/source/tableau/tableau_common.py,sha256=a3Nu0Upy6_pnrd7XpSMcYHdnYca1JBW7H0jMqkYr0ME,26871
496
496
  datahub/ingestion/source/tableau/tableau_constant.py,sha256=ZcAeHsQUXVVL26ORly0ByZk_GJAFbxaKuJAlX_sYMac,2686
497
497
  datahub/ingestion/source/tableau/tableau_server_wrapper.py,sha256=nSyx9RzC6TCQDm-cTVJ657qT8iDwzk_8JMKpohhmOc4,1046
498
498
  datahub/ingestion/source/tableau/tableau_validation.py,sha256=pd--LcTLTfrFsouhCOvGC_2IjeMfKbJV81EEo3ibMwE,1820
@@ -566,8 +566,8 @@ datahub/lite/lite_registry.py,sha256=bpH0kasP-LtwwUFNA2QsOIehfekAYfJtN-AkQLmSWnw
566
566
  datahub/lite/lite_server.py,sha256=p9Oa2nNs65mqcssSIVOr7VOzWqfVstz6ZQEdT4f82S0,1949
567
567
  datahub/lite/lite_util.py,sha256=pgBpT3vTO1YCQ2njZRNyicSkHYeEmQCt41BaXU8WvMo,4503
568
568
  datahub/metadata/__init__.py,sha256=AjhXPjI6cnpdcrBRrE5gOWo15vv2TTl2ctU4UAnUN7A,238
569
- datahub/metadata/_schema_classes.py,sha256=NbZUezNRH72XQUl4i_DlV-oRT4KzEBYFYcHcwGqXq9A,962516
570
- datahub/metadata/schema.avsc,sha256=Ulqzumt0EK7nD_OATi0hbCgw42ngoenja9SXWWsobIk,728543
569
+ datahub/metadata/_schema_classes.py,sha256=IAWpWPxOeGmvmc96dapE0CySk1Rikbh-YieT-K9YTMY,964636
570
+ datahub/metadata/schema.avsc,sha256=CeVb_Z7k0e5kmeqDUXUW7JDL6KSKBCdfAZzqRI_mLZo,729869
571
571
  datahub/metadata/schema_classes.py,sha256=X5Jl5EaSxyHdXOQv14pJ5WkQALun4MRpJ4q12wVFE18,1299
572
572
  datahub/metadata/urns.py,sha256=nfrCTExR-k2P9w272WVtWSN3xW1VUJngPwP3xnvULjU,1217
573
573
  datahub/metadata/_urns/__init__.py,sha256=cOF3GHMDgPhmbLKbN02NPpuLGHSu0qNgQyBRv08eqF0,243
@@ -581,7 +581,7 @@ datahub/metadata/com/linkedin/pegasus2avro/access/token/__init__.py,sha256=P9M7N
581
581
  datahub/metadata/com/linkedin/pegasus2avro/assertion/__init__.py,sha256=PgK5O-6pVRaEcvmwXAsSkwRLe8NjGiLH8AVBXeArqK8,5751
582
582
  datahub/metadata/com/linkedin/pegasus2avro/businessattribute/__init__.py,sha256=N8kO-eUi0_Rt7weizIExxlnJ2_kZRtPrZLWCC1xtDMA,653
583
583
  datahub/metadata/com/linkedin/pegasus2avro/chart/__init__.py,sha256=RNyyHLBNp_fxgFcBOLWO2UsXR1ofD_JczcBdPEQSusg,848
584
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py,sha256=yD2OyoQhMT3KnvxRyzPXiHsyeH-wHG1NBlLn64iCE4A,5333
584
+ datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py,sha256=ukX0VnveTrMx9G6uDaTkuk4Z2kxXr2hUK8srZuRPxj0,5520
585
585
  datahub/metadata/com/linkedin/pegasus2avro/common/fieldtransformer/__init__.py,sha256=FN63vLiB3FCmIRqBjTA-0Xt7M6i7h5NhaVzbA1ysv18,396
586
586
  datahub/metadata/com/linkedin/pegasus2avro/connection/__init__.py,sha256=qRtw-dB14pzVzgQ0pDK8kyBplNdpRxVKNj4D70e_FqI,564
587
587
  datahub/metadata/com/linkedin/pegasus2avro/container/__init__.py,sha256=3yWt36KqDKFhRc9pzvt0AMnbMTlhKurGvT3BUvc25QU,510
@@ -705,7 +705,7 @@ datahub/metadata/schemas/DataHubViewInfo.avsc,sha256=U3fBIoG9ietLUpOknfQGNekqBdP
705
705
  datahub/metadata/schemas/DataHubViewKey.avsc,sha256=p53axIdSVbubo3r23Vpsed7NqRcQBMGveVikEHAVAok,424
706
706
  datahub/metadata/schemas/DataJobInfo.avsc,sha256=--obUbt_4X2paB39EeRKP13sBSiK-r0nq070EamoV1w,7212
707
707
  datahub/metadata/schemas/DataJobInputOutput.avsc,sha256=H1O8eAzZV34tvULdu67iBSWkdn08rt7wS208b8Nisbk,15268
708
- datahub/metadata/schemas/DataJobKey.avsc,sha256=_fSAQDgP_UPtZfqAZPhJmsHxxltuMh9btgw20z4R6Xk,1555
708
+ datahub/metadata/schemas/DataJobKey.avsc,sha256=4F3myS-O6n7AlUqTvCkMSFvsYAjVhUq6uaQVbqLoYdM,1583
709
709
  datahub/metadata/schemas/DataPlatformInfo.avsc,sha256=WGPFumBNHbR75vsLrivnRCbBc8vSCuxDw2UlylMieh4,2686
710
710
  datahub/metadata/schemas/DataPlatformInstance.avsc,sha256=SNd3v_YyyLaDflv8Rd5cQR9GrVuky_cDTkYM6FqJiM8,1058
711
711
  datahub/metadata/schemas/DataPlatformInstanceKey.avsc,sha256=sXUV5EMT6N-x8d6s8ebcJ5JdFIOsJCtiiU5Jtm-ncIk,800
@@ -721,6 +721,7 @@ datahub/metadata/schemas/DataProcessInstanceRunEvent.avsc,sha256=zwTYULEnpMbqwkL
721
721
  datahub/metadata/schemas/DataProcessKey.avsc,sha256=mY1BDiEYo8RchI9DckQEz9Vks5Ibt2RdWZU8OYGnrHA,2240
722
722
  datahub/metadata/schemas/DataProductKey.avsc,sha256=tcdQNWk3pLA3xZzOnHvZuq2u4SQuk2YcAlsxE8CcEeU,621
723
723
  datahub/metadata/schemas/DataProductProperties.avsc,sha256=nYEK6JgpTprU0iZaqWLZsBGYJLkh6HCi1qCu-wbYhvM,6925
724
+ datahub/metadata/schemas/DataTransformLogic.avsc,sha256=wDng1GK9znVoK0INHGiSCSa-AH5MrDkVdMzz4wOWmrY,2011
724
725
  datahub/metadata/schemas/DataTypeInfo.avsc,sha256=MCjzal71P8uIXZg161LrU8rZTJocZeizK-YxYA0Det0,704
725
726
  datahub/metadata/schemas/DataTypeKey.avsc,sha256=Gs5uc_azwg10e36ZbwDTFQMevr0IfiFvJoEGHRzEilw,546
726
727
  datahub/metadata/schemas/DatahubIngestionCheckpoint.avsc,sha256=m2Zyrx3ZWDc5gHuwbmBSRJ3JN4NFkpUhDEKM2Yeuqrw,5681
@@ -860,16 +861,19 @@ datahub/secret/datahub_secrets_client.py,sha256=WkoJDip7IAKSGDM5oHeZVL8878pd4Bix
860
861
  datahub/secret/secret_common.py,sha256=PeRFNljPlGfNrmn3VtDVbazQE6J3Q1nA3L-z3cS8LEA,2522
861
862
  datahub/secret/secret_store.py,sha256=2VP_Vd336Cy7C-2kwp4rx8MAqtYgtwv8XyzzNTXE5x8,1124
862
863
  datahub/specific/__init__.py,sha256=r5RYM5mDnskLzin3vc87HV-9GSz3P6uQw8AlsN14LaI,88
863
- datahub/specific/chart.py,sha256=DsLA5qHBIMNc1pIZ1AC5kLvwpRDd79Q56N4SANOofps,11324
864
- datahub/specific/custom_properties.py,sha256=Ob8L9b9QIbUvHfzWo4L-SNY1QSRhgRy30kLRDdenGEs,1024
865
- datahub/specific/dashboard.py,sha256=kRfyJsm7piugxBg0IfIbLmvv6Smk3D44IGVw8THLqPE,15100
866
- datahub/specific/datajob.py,sha256=5pEBrN6llpgS7jWYEfrvqpbT2vMVVpepH71jIUJUo4U,18480
867
- datahub/specific/dataproduct.py,sha256=lVv3TGkZyZ0t9CUXLnkwMhr8GK1HB-fiyRyjxTdvb7s,5259
868
- datahub/specific/dataset.py,sha256=TAI8SRhhhsv1zEi3lGv24NX6PTJDrEyt5v0Sdg-uFY8,13568
869
- datahub/specific/form.py,sha256=jVI0JD-o2-XkD1suW_ITnTZUF0GNbGjaNb9-PXdfdkA,4549
870
- datahub/specific/ownership.py,sha256=KlYnk7o0Tq2EVugW7qRWR9D3v0C8PuqIdwgUzYwlkDM,1446
871
- datahub/specific/structured_properties.py,sha256=unc0VllBdbOm7KIWf_5tFkP4TQusN7JUKoYyOFvFQhs,1767
872
- datahub/specific/structured_property.py,sha256=IYeFyafPidNrDbn1sU65rEPwIZDS-wLY1SYXSNUUbHQ,4038
864
+ datahub/specific/chart.py,sha256=NPdzDHcZkPodthOn9c8QF_aDEo2y4lCJ4t1sI556uZ0,6684
865
+ datahub/specific/dashboard.py,sha256=D8CnOSScQ0-UICFjQnQOtqL-SlNSxhSuub4vZ3BpcuI,10017
866
+ datahub/specific/datajob.py,sha256=yZXic3CuCGxg-ewnaHphoH9Jjpe-P09XbN7T-rrvkyE,13711
867
+ datahub/specific/dataproduct.py,sha256=SrBNYACDVatuXA64GCHA0Igaes24ajJqTcXmDTT1FPA,2877
868
+ datahub/specific/dataset.py,sha256=je9j3rVzpSiXoOe0UmfD7mc5vCpLAAO74Z8q1SvwPX0,9725
869
+ datahub/specific/form.py,sha256=hbxmmBWHma0d4NCZEGR6Nr6R-5A5gYgl1mmkGgnM97o,3834
870
+ datahub/specific/structured_property.py,sha256=NZ2yppDgtqrH04Wn3_m5IupyLeWoBCMygyr3nEi1A3o,4038
871
+ datahub/specific/aspect_helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
872
+ datahub/specific/aspect_helpers/custom_properties.py,sha256=s87_Aq7BgF_t_I0MCjNEJxYyrNxMTb1N0hCifT8Y6Cw,2255
873
+ datahub/specific/aspect_helpers/ownership.py,sha256=rNYiJSqb_FJQhFRSIQScg4mfxgYhPvjeaYyvutY6CN0,1861
874
+ datahub/specific/aspect_helpers/structured_properties.py,sha256=EVnFS025r-PG5PAC7VENVJO-JvDYif2VeYonsC3Z8m8,2255
875
+ datahub/specific/aspect_helpers/tags.py,sha256=YHcKfRaIvv12wcmfMc8-Dk6gf6xIvJedkn451uBuz-Y,1254
876
+ datahub/specific/aspect_helpers/terms.py,sha256=l8xoOLQ2RsIl3UnKhLisQNwrGTFIPrzfvP4zjH-AhwI,1352
873
877
  datahub/sql_parsing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
874
878
  datahub/sql_parsing/_models.py,sha256=il-xm1RcLdi1phJUV3xrTecdOGH31akqheuSC2N4YhQ,3141
875
879
  datahub/sql_parsing/_sqlglot_patch.py,sha256=iYJ8zOThHqqbamD5jdNr9iHTWD7ewNeHzPiTb6-rO3Y,7043
@@ -982,8 +986,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
982
986
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
983
987
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
984
988
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
985
- acryl_datahub-0.15.0.1rc12.dist-info/METADATA,sha256=w8H0vrzaAbeZnX_mqNopX7I929V_AYXeWGKidlrUrE8,173444
986
- acryl_datahub-0.15.0.1rc12.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
987
- acryl_datahub-0.15.0.1rc12.dist-info/entry_points.txt,sha256=xnPSPLK3bJGADxe4TDS4wL4u0FT_PGlahDa-ENYdYCQ,9512
988
- acryl_datahub-0.15.0.1rc12.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
989
- acryl_datahub-0.15.0.1rc12.dist-info/RECORD,,
989
+ acryl_datahub-0.15.0.1rc14.dist-info/METADATA,sha256=na5JJwiilGTUFiwOBRULg2a8NxVvzNRgwodacg0LOSU,173444
990
+ acryl_datahub-0.15.0.1rc14.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
991
+ acryl_datahub-0.15.0.1rc14.dist-info/entry_points.txt,sha256=xnPSPLK3bJGADxe4TDS4wL4u0FT_PGlahDa-ENYdYCQ,9512
992
+ acryl_datahub-0.15.0.1rc14.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
993
+ acryl_datahub-0.15.0.1rc14.dist-info/RECORD,,
datahub/__init__.py CHANGED
@@ -3,7 +3,7 @@ import warnings
3
3
 
4
4
  # Published at https://pypi.org/project/acryl-datahub/.
5
5
  __package_name__ = "acryl-datahub"
6
- __version__ = "0.15.0.1rc12"
6
+ __version__ = "0.15.0.1rc14"
7
7
 
8
8
 
9
9
  def is_dev_mode() -> bool:
@@ -24,6 +24,7 @@ from typing import (
24
24
 
25
25
  import typing_inspect
26
26
  from avrogen.dict_wrapper import DictWrapper
27
+ from typing_extensions import assert_never
27
28
 
28
29
  from datahub.emitter.enum_helpers import get_enum_options
29
30
  from datahub.metadata.schema_classes import (
@@ -269,9 +270,8 @@ def make_owner_urn(owner: str, owner_type: OwnerType) -> str:
269
270
  return make_user_urn(owner)
270
271
  elif owner_type == OwnerType.GROUP:
271
272
  return make_group_urn(owner)
272
- # This should pretty much never happen.
273
- # TODO: With Python 3.11, we can use typing.assert_never() here.
274
- return f"urn:li:{owner_type.value}:{owner}"
273
+ else:
274
+ assert_never(owner_type)
275
275
 
276
276
 
277
277
  def make_ownership_type_urn(type: str) -> str:
@@ -2,7 +2,19 @@ import json
2
2
  import time
3
3
  from collections import defaultdict
4
4
  from dataclasses import dataclass
5
- from typing import Any, Dict, List, Optional, Sequence, Union
5
+ from typing import (
6
+ Any,
7
+ Dict,
8
+ List,
9
+ Literal,
10
+ Optional,
11
+ Protocol,
12
+ Tuple,
13
+ Union,
14
+ runtime_checkable,
15
+ )
16
+
17
+ from typing_extensions import LiteralString
6
18
 
7
19
  from datahub.emitter.aspect import JSON_PATCH_CONTENT_TYPE
8
20
  from datahub.emitter.serialization_helper import pre_json_transform
@@ -19,25 +31,36 @@ from datahub.metadata.urns import Urn
19
31
  from datahub.utilities.urns.urn import guess_entity_type
20
32
 
21
33
 
34
+ @runtime_checkable
35
+ class SupportsToObj(Protocol):
36
+ def to_obj(self) -> Any:
37
+ ...
38
+
39
+
22
40
  def _recursive_to_obj(obj: Any) -> Any:
23
41
  if isinstance(obj, list):
24
42
  return [_recursive_to_obj(v) for v in obj]
25
- elif hasattr(obj, "to_obj"):
43
+ elif isinstance(obj, SupportsToObj):
26
44
  return obj.to_obj()
27
45
  else:
28
46
  return obj
29
47
 
30
48
 
49
+ PatchPath = Tuple[Union[LiteralString, Urn], ...]
50
+ PatchOp = Literal["add", "remove", "replace"]
51
+
52
+
31
53
  @dataclass
32
- class _Patch:
33
- op: str # one of ['add', 'remove', 'replace']; we don't support move, copy or test
34
- path: str
54
+ class _Patch(SupportsToObj):
55
+ op: PatchOp
56
+ path: PatchPath
35
57
  value: Any
36
58
 
37
59
  def to_obj(self) -> Dict:
60
+ quoted_path = "/" + "/".join(MetadataPatchProposal.quote(p) for p in self.path)
38
61
  return {
39
62
  "op": self.op,
40
- "path": self.path,
63
+ "path": quoted_path,
41
64
  "value": _recursive_to_obj(self.value),
42
65
  }
43
66
 
@@ -63,15 +86,16 @@ class MetadataPatchProposal:
63
86
 
64
87
  # Json Patch quoting based on https://jsonpatch.com/#json-pointer
65
88
  @classmethod
66
- def quote(cls, value: str) -> str:
67
- return value.replace("~", "~0").replace("/", "~1")
89
+ def quote(cls, value: Union[str, Urn]) -> str:
90
+ return str(value).replace("~", "~0").replace("/", "~1")
68
91
 
69
92
  def _add_patch(
70
- self, aspect_name: str, op: str, path: Union[str, Sequence[str]], value: Any
93
+ self,
94
+ aspect_name: str,
95
+ op: PatchOp,
96
+ path: PatchPath,
97
+ value: Any,
71
98
  ) -> None:
72
- if not isinstance(path, str):
73
- path = "/" + "/".join(self.quote(p) for p in path)
74
-
75
99
  # TODO: Validate that aspectName is a valid aspect for this entityType
76
100
  self.patches[aspect_name].append(_Patch(op, path, value))
77
101
 
@@ -146,12 +146,55 @@ class DatahubIngestionRunSummaryProvider(PipelineRunListener):
146
146
  aspect_value=source_info_aspect,
147
147
  )
148
148
 
149
+ @staticmethod
150
+ def _convert_sets_to_lists(obj: Any) -> Any:
151
+ """
152
+ Recursively converts all sets to lists in a Python object.
153
+ Works with nested dictionaries, lists, and sets.
154
+
155
+ Args:
156
+ obj: Any Python object that might contain sets
157
+
158
+ Returns:
159
+ The object with all sets converted to lists
160
+ """
161
+ if isinstance(obj, dict):
162
+ return {
163
+ key: DatahubIngestionRunSummaryProvider._convert_sets_to_lists(value)
164
+ for key, value in obj.items()
165
+ }
166
+ elif isinstance(obj, list):
167
+ return [
168
+ DatahubIngestionRunSummaryProvider._convert_sets_to_lists(element)
169
+ for element in obj
170
+ ]
171
+ elif isinstance(obj, set):
172
+ return [
173
+ DatahubIngestionRunSummaryProvider._convert_sets_to_lists(element)
174
+ for element in obj
175
+ ]
176
+ elif isinstance(obj, tuple):
177
+ return tuple(
178
+ DatahubIngestionRunSummaryProvider._convert_sets_to_lists(element)
179
+ for element in obj
180
+ )
181
+ else:
182
+ return obj
183
+
149
184
  def _get_recipe_to_report(self, ctx: PipelineContext) -> str:
150
185
  assert ctx.pipeline_config
151
186
  if not self.report_recipe or not ctx.pipeline_config.get_raw_dict():
152
187
  return ""
153
188
  else:
154
- return json.dumps(redact_raw_config(ctx.pipeline_config.get_raw_dict()))
189
+ redacted_recipe = redact_raw_config(ctx.pipeline_config.get_raw_dict())
190
+ # This is required otherwise json dumps will fail
191
+ # with a TypeError: Object of type set is not JSON serializable
192
+ converted_recipe = (
193
+ DatahubIngestionRunSummaryProvider._convert_sets_to_lists(
194
+ redacted_recipe
195
+ )
196
+ )
197
+ return json.dumps(converted_recipe)
155
198
 
156
199
  def _emit_aspect(self, entity_urn: Urn, aspect_value: _Aspect) -> None:
157
200
  self.sink.write_record_async(
@@ -206,9 +206,7 @@ class BigqueryV2Source(StatefulIngestionSourceBase, TestableSource):
206
206
 
207
207
  def _init_schema_resolver(self) -> SchemaResolver:
208
208
  schema_resolution_required = (
209
- self.config.use_queries_v2
210
- or self.config.lineage_parse_view_ddl
211
- or self.config.lineage_use_sql_parser
209
+ self.config.use_queries_v2 or self.config.lineage_use_sql_parser
212
210
  )
213
211
  schema_ingestion_enabled = (
214
212
  self.config.include_schema_metadata
@@ -255,18 +253,16 @@ class BigqueryV2Source(StatefulIngestionSourceBase, TestableSource):
255
253
  for project in projects:
256
254
  yield from self.bq_schema_extractor.get_project_workunits(project)
257
255
 
258
- if self.config.use_queries_v2:
259
- # Always ingest View and Snapshot lineage with schema ingestion
260
- self.report.set_ingestion_stage("*", "View and Snapshot Lineage")
261
-
262
- yield from self.lineage_extractor.get_lineage_workunits_for_views_and_snapshots(
263
- [p.id for p in projects],
264
- self.bq_schema_extractor.view_refs_by_project,
265
- self.bq_schema_extractor.view_definitions,
266
- self.bq_schema_extractor.snapshot_refs_by_project,
267
- self.bq_schema_extractor.snapshots_by_ref,
268
- )
256
+ self.report.set_ingestion_stage("*", "View and Snapshot Lineage")
257
+ yield from self.lineage_extractor.get_lineage_workunits_for_views_and_snapshots(
258
+ [p.id for p in projects],
259
+ self.bq_schema_extractor.view_refs_by_project,
260
+ self.bq_schema_extractor.view_definitions,
261
+ self.bq_schema_extractor.snapshot_refs_by_project,
262
+ self.bq_schema_extractor.snapshots_by_ref,
263
+ )
269
264
 
265
+ if self.config.use_queries_v2:
270
266
  # if both usage and lineage are disabled then skip queries extractor piece
271
267
  if (
272
268
  not self.config.include_usage_statistics
@@ -306,10 +302,6 @@ class BigqueryV2Source(StatefulIngestionSourceBase, TestableSource):
306
302
  if self.config.include_table_lineage:
307
303
  yield from self.lineage_extractor.get_lineage_workunits(
308
304
  [p.id for p in projects],
309
- self.bq_schema_extractor.view_refs_by_project,
310
- self.bq_schema_extractor.view_definitions,
311
- self.bq_schema_extractor.snapshot_refs_by_project,
312
- self.bq_schema_extractor.snapshots_by_ref,
313
305
  self.bq_schema_extractor.table_refs,
314
306
  )
315
307
 
@@ -463,10 +463,6 @@ class BigQueryV2Config(
463
463
  default=True,
464
464
  description="Use sql parser to resolve view/table lineage.",
465
465
  )
466
- lineage_parse_view_ddl: bool = Field(
467
- default=True,
468
- description="Sql parse view ddl to get lineage.",
469
- )
470
466
 
471
467
  lineage_sql_parser_use_raw_names: bool = Field(
472
468
  default=False,
@@ -572,11 +568,9 @@ class BigQueryV2Config(
572
568
  "See [this](https://cloud.google.com/bigquery/docs/information-schema-jobs#scope_and_syntax) for details.",
573
569
  )
574
570
 
575
- # include_view_lineage and include_view_column_lineage are inherited from SQLCommonConfig
576
- # but not used in bigquery so we hide them from docs.
577
- include_view_lineage: bool = Field(default=True, hidden_from_docs=True)
578
-
579
- include_view_column_lineage: bool = Field(default=True, hidden_from_docs=True)
571
+ _include_view_lineage = pydantic_removed_field("include_view_lineage")
572
+ _include_view_column_lineage = pydantic_removed_field("include_view_column_lineage")
573
+ _lineage_parse_view_ddl = pydantic_removed_field("lineage_parse_view_ddl")
580
574
 
581
575
  @root_validator(pre=True)
582
576
  def set_include_schema_metadata(cls, values: Dict) -> Dict:
@@ -653,14 +653,11 @@ class BigQuerySchemaGenerator:
653
653
  self.report.report_dropped(table_identifier.raw_table_name())
654
654
  return
655
655
 
656
- if self.store_table_refs:
657
- table_ref = str(
658
- BigQueryTableRef(table_identifier).get_sanitized_table_ref()
659
- )
660
- self.table_refs.add(table_ref)
661
- if self.config.lineage_parse_view_ddl and view.view_definition:
662
- self.view_refs_by_project[project_id].add(table_ref)
663
- self.view_definitions[table_ref] = view.view_definition
656
+ table_ref = str(BigQueryTableRef(table_identifier).get_sanitized_table_ref())
657
+ self.table_refs.add(table_ref)
658
+ if view.view_definition:
659
+ self.view_refs_by_project[project_id].add(table_ref)
660
+ self.view_definitions[table_ref] = view.view_definition
664
661
 
665
662
  view.column_count = len(columns)
666
663
  if not view.column_count:
@@ -701,14 +698,11 @@ class BigQuerySchemaGenerator:
701
698
  f"Snapshot doesn't have any column or unable to get columns for snapshot: {table_identifier}"
702
699
  )
703
700
 
704
- if self.store_table_refs:
705
- table_ref = str(
706
- BigQueryTableRef(table_identifier).get_sanitized_table_ref()
707
- )
708
- self.table_refs.add(table_ref)
709
- if snapshot.base_table_identifier:
710
- self.snapshot_refs_by_project[project_id].add(table_ref)
711
- self.snapshots_by_ref[table_ref] = snapshot
701
+ table_ref = str(BigQueryTableRef(table_identifier).get_sanitized_table_ref())
702
+ self.table_refs.add(table_ref)
703
+ if snapshot.base_table_identifier:
704
+ self.snapshot_refs_by_project[project_id].add(table_ref)
705
+ self.snapshots_by_ref[table_ref] = snapshot
712
706
 
713
707
  yield from self.gen_snapshot_dataset_workunits(
714
708
  table=snapshot,
@@ -1148,7 +1142,7 @@ class BigQuerySchemaGenerator:
1148
1142
  foreignKeys=foreign_keys if foreign_keys else None,
1149
1143
  )
1150
1144
 
1151
- if self.config.lineage_parse_view_ddl or self.config.lineage_use_sql_parser:
1145
+ if self.config.lineage_use_sql_parser:
1152
1146
  self.sql_parser_schema_resolver.add_schema_metadata(
1153
1147
  dataset_urn, schema_metadata
1154
1148
  )
@@ -291,16 +291,15 @@ class BigqueryLineageExtractor:
291
291
  snapshots_by_ref: FileBackedDict[BigqueryTableSnapshot],
292
292
  ) -> Iterable[MetadataWorkUnit]:
293
293
  for project in projects:
294
- if self.config.lineage_parse_view_ddl:
295
- for view in view_refs_by_project[project]:
296
- self.datasets_skip_audit_log_lineage.add(view)
297
- self.aggregator.add_view_definition(
298
- view_urn=self.identifiers.gen_dataset_urn_from_raw_ref(
299
- BigQueryTableRef.from_string_name(view)
300
- ),
301
- view_definition=view_definitions[view],
302
- default_db=project,
303
- )
294
+ for view in view_refs_by_project[project]:
295
+ self.datasets_skip_audit_log_lineage.add(view)
296
+ self.aggregator.add_view_definition(
297
+ view_urn=self.identifiers.gen_dataset_urn_from_raw_ref(
298
+ BigQueryTableRef.from_string_name(view)
299
+ ),
300
+ view_definition=view_definitions[view],
301
+ default_db=project,
302
+ )
304
303
 
305
304
  for snapshot_ref in snapshot_refs_by_project[project]:
306
305
  snapshot = snapshots_by_ref[snapshot_ref]
@@ -322,23 +321,11 @@ class BigqueryLineageExtractor:
322
321
  def get_lineage_workunits(
323
322
  self,
324
323
  projects: List[str],
325
- view_refs_by_project: Dict[str, Set[str]],
326
- view_definitions: FileBackedDict[str],
327
- snapshot_refs_by_project: Dict[str, Set[str]],
328
- snapshots_by_ref: FileBackedDict[BigqueryTableSnapshot],
329
324
  table_refs: Set[str],
330
325
  ) -> Iterable[MetadataWorkUnit]:
331
326
  if not self._should_ingest_lineage():
332
327
  return
333
328
 
334
- yield from self.get_lineage_workunits_for_views_and_snapshots(
335
- projects,
336
- view_refs_by_project,
337
- view_definitions,
338
- snapshot_refs_by_project,
339
- snapshots_by_ref,
340
- )
341
-
342
329
  if self.config.use_exported_bigquery_audit_metadata:
343
330
  projects = ["*"] # project_id not used when using exported metadata
344
331
 
@@ -167,7 +167,7 @@ class DataJobEntity:
167
167
  class DataProcessCleanupReport(SourceReport):
168
168
  num_aspects_removed: int = 0
169
169
  num_aspect_removed_by_type: TopKDict[str, int] = field(default_factory=TopKDict)
170
- sample_removed_aspects_by_type: TopKDict[str, LossyList[str]] = field(
170
+ sample_soft_deleted_aspects_by_type: TopKDict[str, LossyList[str]] = field(
171
171
  default_factory=TopKDict
172
172
  )
173
173
  num_data_flows_found: int = 0
@@ -286,9 +286,9 @@ class DataProcessCleanup:
286
286
  self.report.num_aspect_removed_by_type[type] = (
287
287
  self.report.num_aspect_removed_by_type.get(type, 0) + 1
288
288
  )
289
- if type not in self.report.sample_removed_aspects_by_type:
290
- self.report.sample_removed_aspects_by_type[type] = LossyList()
291
- self.report.sample_removed_aspects_by_type[type].append(urn)
289
+ if type not in self.report.sample_soft_deleted_aspects_by_type:
290
+ self.report.sample_soft_deleted_aspects_by_type[type] = LossyList()
291
+ self.report.sample_soft_deleted_aspects_by_type[type].append(urn)
292
292
 
293
293
  if self.dry_run:
294
294
  logger.info(