acryl-datahub 0.15.0.1rc12__py3-none-any.whl → 0.15.0.1rc14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0.1rc12.dist-info → acryl_datahub-0.15.0.1rc14.dist-info}/METADATA +2369 -2369
- {acryl_datahub-0.15.0.1rc12.dist-info → acryl_datahub-0.15.0.1rc14.dist-info}/RECORD +35 -31
- datahub/__init__.py +1 -1
- datahub/emitter/mce_builder.py +3 -3
- datahub/emitter/mcp_patch_builder.py +36 -12
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +44 -1
- datahub/ingestion/source/bigquery_v2/bigquery.py +10 -18
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +3 -9
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +11 -17
- datahub/ingestion/source/bigquery_v2/lineage.py +9 -22
- datahub/ingestion/source/gc/dataprocess_cleanup.py +4 -4
- datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +159 -71
- datahub/ingestion/source/tableau/tableau.py +3 -0
- datahub/ingestion/source/tableau/tableau_common.py +18 -0
- datahub/metadata/_schema_classes.py +61 -1
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +4 -0
- datahub/metadata/schema.avsc +64 -29
- datahub/metadata/schemas/DataJobKey.avsc +2 -1
- datahub/metadata/schemas/DataTransformLogic.avsc +63 -0
- datahub/specific/aspect_helpers/__init__.py +0 -0
- datahub/specific/aspect_helpers/custom_properties.py +79 -0
- datahub/specific/aspect_helpers/ownership.py +67 -0
- datahub/specific/aspect_helpers/structured_properties.py +72 -0
- datahub/specific/aspect_helpers/tags.py +42 -0
- datahub/specific/aspect_helpers/terms.py +43 -0
- datahub/specific/chart.py +28 -184
- datahub/specific/dashboard.py +31 -196
- datahub/specific/datajob.py +34 -189
- datahub/specific/dataproduct.py +24 -86
- datahub/specific/dataset.py +48 -133
- datahub/specific/form.py +12 -32
- datahub/specific/structured_property.py +9 -9
- datahub/specific/custom_properties.py +0 -37
- datahub/specific/ownership.py +0 -48
- datahub/specific/structured_properties.py +0 -53
- {acryl_datahub-0.15.0.1rc12.dist-info → acryl_datahub-0.15.0.1rc14.dist-info}/WHEEL +0 -0
- {acryl_datahub-0.15.0.1rc12.dist-info → acryl_datahub-0.15.0.1rc14.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-0.15.0.1rc12.dist-info → acryl_datahub-0.15.0.1rc14.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
datahub/__init__.py,sha256=
|
|
1
|
+
datahub/__init__.py,sha256=tC4XcRTMJqr-bc6T1QdoRI7MvbIkSZk8AscmW3iOtOo,577
|
|
2
2
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
3
3
|
datahub/entrypoints.py,sha256=3-qSfXAx3Z0FEkBV5tlO8fQr4xk4ySeDRMVTpS5Xd6A,7793
|
|
4
4
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -114,10 +114,10 @@ datahub/emitter/aspect.py,sha256=ef0DVycqg-tRPurkYjc-5zknmLP2p2Y2RxP55WkvAEc,480
|
|
|
114
114
|
datahub/emitter/enum_helpers.py,sha256=ZeALUAPi10Q4Z6VM0_WiU9Y60_d0ugZHcUoVmuOCEec,321
|
|
115
115
|
datahub/emitter/generic_emitter.py,sha256=i37ZFm9VR_tmiZm9kIypEkQEB_cLKbzj_tJvViN-fm8,828
|
|
116
116
|
datahub/emitter/kafka_emitter.py,sha256=Uix1W1WaXF8VqUTUfzdRZKca2XrR1w50Anx2LVkROlc,5822
|
|
117
|
-
datahub/emitter/mce_builder.py,sha256=
|
|
117
|
+
datahub/emitter/mce_builder.py,sha256=IqHOm0cpzdVC_mQOqk0yEVJUEj9xn8am2OFAwwQeX_8,16342
|
|
118
118
|
datahub/emitter/mcp.py,sha256=hAAYziDdkwjazQU0DtWMbQWY8wS09ACrKJbqxoWXdgc,9637
|
|
119
119
|
datahub/emitter/mcp_builder.py,sha256=eOcuz41c4a3oTkNk39yYl9bTxpksxqATPHLcqyhPGT0,9856
|
|
120
|
-
datahub/emitter/mcp_patch_builder.py,sha256=
|
|
120
|
+
datahub/emitter/mcp_patch_builder.py,sha256=oonC8iGOvDzqj890CxOjWlBdDEF1RnwvbSZy1sivlTY,4572
|
|
121
121
|
datahub/emitter/request_helper.py,sha256=33ORG3S3OVy97_jlWBRn7yUM5XCIkRN6WSdJvN7Ofcg,670
|
|
122
122
|
datahub/emitter/rest_emitter.py,sha256=oqyRuXG1o1dYjiEIH5TFMb1q0xhRbpxPIA5qkyz0iQ8,16407
|
|
123
123
|
datahub/emitter/serialization_helper.py,sha256=q12Avmf70Vy4ttQGMJoTKlE5EsybMKNg2w3MQeZiHvk,3652
|
|
@@ -169,7 +169,7 @@ datahub/ingestion/graph/config.py,sha256=3b_Gxa5wcBnphP63bBiAFdWS7PJhUHRE1WZL_q4
|
|
|
169
169
|
datahub/ingestion/graph/connections.py,sha256=9462L0ZWGKURyypAln25eMPhK3pcufBar9tNDoqspXs,741
|
|
170
170
|
datahub/ingestion/graph/filters.py,sha256=UeUZQHoimavIYx-jXLA0WGkOUe10TaO8uEZkfa-QgNE,6188
|
|
171
171
|
datahub/ingestion/reporting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
172
|
-
datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py,sha256=
|
|
172
|
+
datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py,sha256=O2SGDU2_qMtyr_1BH9-WkNOojFWig2z4O3M21nTRo70,9908
|
|
173
173
|
datahub/ingestion/reporting/file_reporter.py,sha256=tiWukmMxHrTQI3rOAumsq6lRlw8T6spqpS6XBDYnrZU,1640
|
|
174
174
|
datahub/ingestion/reporting/reporting_provider_registry.py,sha256=jTYSh3T4sensjnHQfPLiIcbA2dG8w0px9ghChAJjGdU,310
|
|
175
175
|
datahub/ingestion/run/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -233,20 +233,20 @@ datahub/ingestion/source/azure/abs_folder_utils.py,sha256=7skXus-4fSIoKpqCeU-GG0
|
|
|
233
233
|
datahub/ingestion/source/azure/abs_utils.py,sha256=KdAlCK-PMrn35kFHxz5vrsjajyx2PD5GRgoBKdoRvcg,2075
|
|
234
234
|
datahub/ingestion/source/azure/azure_common.py,sha256=Zl0pPuE6L3QcM5B1P0LsPthZmD0h7fUUS0kg2okl6IY,4053
|
|
235
235
|
datahub/ingestion/source/bigquery_v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
236
|
-
datahub/ingestion/source/bigquery_v2/bigquery.py,sha256
|
|
236
|
+
datahub/ingestion/source/bigquery_v2/bigquery.py,sha256=YMsyj6s7fggzisWfDdbT4w1MKJ3eRdNERsCShnu0Zqo,13681
|
|
237
237
|
datahub/ingestion/source/bigquery_v2/bigquery_audit.py,sha256=IlbHA8a-gNJvnubgBfxVHpUk8rFNIG80gk5HWXa2lyE,25108
|
|
238
238
|
datahub/ingestion/source/bigquery_v2/bigquery_audit_log_api.py,sha256=LuGJ6LgPViLIfDQfylxlQ3CA7fZYM5MDt8M-7sfzm84,5096
|
|
239
|
-
datahub/ingestion/source/bigquery_v2/bigquery_config.py,sha256=
|
|
239
|
+
datahub/ingestion/source/bigquery_v2/bigquery_config.py,sha256=PqbYNqM4-KukCU1meuvsk0qbiWa7UFh5hqHrHsvOSWQ,25889
|
|
240
240
|
datahub/ingestion/source/bigquery_v2/bigquery_data_reader.py,sha256=DeT3v_Z82__8En0FcZ0kavBAWQoRvSZ5Rppm9eeDAb8,2393
|
|
241
241
|
datahub/ingestion/source/bigquery_v2/bigquery_helper.py,sha256=QER3gY8e_k1_eNVj7cBso7ZzrWl_vO5PYSa6CpvqNx8,1554
|
|
242
242
|
datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py,sha256=8nuQ8hMuJEswWDZtV2RjbK8RvDJUzT_S74dnyPpGFdQ,4857
|
|
243
243
|
datahub/ingestion/source/bigquery_v2/bigquery_queries.py,sha256=EoHo9twb0_QdX7Nvd1HJC1Yn0rqtrfR52EVk7Hu3XOQ,3296
|
|
244
244
|
datahub/ingestion/source/bigquery_v2/bigquery_report.py,sha256=WxiLPFc7LwZXNDYfV9oySUD43kc2GcOf_pUokp3vFNM,8098
|
|
245
245
|
datahub/ingestion/source/bigquery_v2/bigquery_schema.py,sha256=E5GOx4NWjyZM0xzdpBlNXbvDdKNfW9UtS64XtCYFpzI,31809
|
|
246
|
-
datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py,sha256=
|
|
246
|
+
datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py,sha256=o2My5Q7ab39qHP3jjVFCQSErogGYb14s6397xHIZSqc,50568
|
|
247
247
|
datahub/ingestion/source/bigquery_v2/bigquery_test_connection.py,sha256=cATxwi5IPzj3BldRRAVcLqzSFmmYEPvqa7U0RFJbaAc,7645
|
|
248
248
|
datahub/ingestion/source/bigquery_v2/common.py,sha256=Cxjf1a8ibkL_YRQeS0BqsjlyMgFJpaZ3iq_d7e8T8MQ,4030
|
|
249
|
-
datahub/ingestion/source/bigquery_v2/lineage.py,sha256=
|
|
249
|
+
datahub/ingestion/source/bigquery_v2/lineage.py,sha256=LJqdkCR8H55b3txCVBM-cs1T5QWxSTimJ3ebSgtXjgI,44874
|
|
250
250
|
datahub/ingestion/source/bigquery_v2/profiler.py,sha256=8-yAoq8sX0E6VIwr75YbM8wITRNhGfxgte9BCeGNkMM,10681
|
|
251
251
|
datahub/ingestion/source/bigquery_v2/queries.py,sha256=B2vJLZYfwM1J5JAckijKJTxLhDYA0yw3kfzj5oRQB5c,20151
|
|
252
252
|
datahub/ingestion/source/bigquery_v2/queries_extractor.py,sha256=xLf-vCUAnNuDdTHghxJvPOyGeA_XLCW3r-xj-8cfn3Q,19528
|
|
@@ -303,9 +303,9 @@ datahub/ingestion/source/fivetran/fivetran_log_api.py,sha256=EAak3hJpe75WZSgz6wP
|
|
|
303
303
|
datahub/ingestion/source/fivetran/fivetran_query.py,sha256=vLrTj7e-0NxZ2U4bWTB57pih42WirqPlUvwtIRfStlQ,5275
|
|
304
304
|
datahub/ingestion/source/gc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
305
305
|
datahub/ingestion/source/gc/datahub_gc.py,sha256=WOg3yIaNmwdbSTwytKeSfIUihsM7FMYBip9u2Dnwk3c,12849
|
|
306
|
-
datahub/ingestion/source/gc/dataprocess_cleanup.py,sha256=
|
|
306
|
+
datahub/ingestion/source/gc/dataprocess_cleanup.py,sha256=86Tm3NNWMf0xM4TklNIEeNOjEingKpYy-XvCPeaAb4k,17125
|
|
307
307
|
datahub/ingestion/source/gc/execution_request_cleanup.py,sha256=sZbdkg3MuPVGf8eeeRg_2khGMZ01QoH4dgJiTxf7Srg,9813
|
|
308
|
-
datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py,sha256=
|
|
308
|
+
datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py,sha256=LvDGTaAaI-T0OZ3fkaFwipLdzPePunuSVWoEuSBsfEM,11099
|
|
309
309
|
datahub/ingestion/source/gcs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
310
310
|
datahub/ingestion/source/gcs/gcs_source.py,sha256=iwvj4JwjyVWRP1Vq106sUtQhh0GuOYVSu9zCa1wCZN0,6189
|
|
311
311
|
datahub/ingestion/source/gcs/gcs_utils.py,sha256=_78KM863XXgkVLmZLtYGF5PJNnZas1go-XRtOq-79lo,1047
|
|
@@ -491,8 +491,8 @@ datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider
|
|
|
491
491
|
datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py,sha256=xsH7Ao_05VTjqpkzLkhdf5B1ULMzFoD8vkJJIJU9w-U,4077
|
|
492
492
|
datahub/ingestion/source/state_provider/state_provider_registry.py,sha256=SVq4mIyGNmLXE9OZx1taOiNPqDoQp03-Ot9rYnB5F3k,401
|
|
493
493
|
datahub/ingestion/source/tableau/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
494
|
-
datahub/ingestion/source/tableau/tableau.py,sha256=
|
|
495
|
-
datahub/ingestion/source/tableau/tableau_common.py,sha256=
|
|
494
|
+
datahub/ingestion/source/tableau/tableau.py,sha256=fY--jFtPtCuDBAruiMStAoT7HqaTDYtiVEKzEYuzCag,140121
|
|
495
|
+
datahub/ingestion/source/tableau/tableau_common.py,sha256=a3Nu0Upy6_pnrd7XpSMcYHdnYca1JBW7H0jMqkYr0ME,26871
|
|
496
496
|
datahub/ingestion/source/tableau/tableau_constant.py,sha256=ZcAeHsQUXVVL26ORly0ByZk_GJAFbxaKuJAlX_sYMac,2686
|
|
497
497
|
datahub/ingestion/source/tableau/tableau_server_wrapper.py,sha256=nSyx9RzC6TCQDm-cTVJ657qT8iDwzk_8JMKpohhmOc4,1046
|
|
498
498
|
datahub/ingestion/source/tableau/tableau_validation.py,sha256=pd--LcTLTfrFsouhCOvGC_2IjeMfKbJV81EEo3ibMwE,1820
|
|
@@ -566,8 +566,8 @@ datahub/lite/lite_registry.py,sha256=bpH0kasP-LtwwUFNA2QsOIehfekAYfJtN-AkQLmSWnw
|
|
|
566
566
|
datahub/lite/lite_server.py,sha256=p9Oa2nNs65mqcssSIVOr7VOzWqfVstz6ZQEdT4f82S0,1949
|
|
567
567
|
datahub/lite/lite_util.py,sha256=pgBpT3vTO1YCQ2njZRNyicSkHYeEmQCt41BaXU8WvMo,4503
|
|
568
568
|
datahub/metadata/__init__.py,sha256=AjhXPjI6cnpdcrBRrE5gOWo15vv2TTl2ctU4UAnUN7A,238
|
|
569
|
-
datahub/metadata/_schema_classes.py,sha256=
|
|
570
|
-
datahub/metadata/schema.avsc,sha256=
|
|
569
|
+
datahub/metadata/_schema_classes.py,sha256=IAWpWPxOeGmvmc96dapE0CySk1Rikbh-YieT-K9YTMY,964636
|
|
570
|
+
datahub/metadata/schema.avsc,sha256=CeVb_Z7k0e5kmeqDUXUW7JDL6KSKBCdfAZzqRI_mLZo,729869
|
|
571
571
|
datahub/metadata/schema_classes.py,sha256=X5Jl5EaSxyHdXOQv14pJ5WkQALun4MRpJ4q12wVFE18,1299
|
|
572
572
|
datahub/metadata/urns.py,sha256=nfrCTExR-k2P9w272WVtWSN3xW1VUJngPwP3xnvULjU,1217
|
|
573
573
|
datahub/metadata/_urns/__init__.py,sha256=cOF3GHMDgPhmbLKbN02NPpuLGHSu0qNgQyBRv08eqF0,243
|
|
@@ -581,7 +581,7 @@ datahub/metadata/com/linkedin/pegasus2avro/access/token/__init__.py,sha256=P9M7N
|
|
|
581
581
|
datahub/metadata/com/linkedin/pegasus2avro/assertion/__init__.py,sha256=PgK5O-6pVRaEcvmwXAsSkwRLe8NjGiLH8AVBXeArqK8,5751
|
|
582
582
|
datahub/metadata/com/linkedin/pegasus2avro/businessattribute/__init__.py,sha256=N8kO-eUi0_Rt7weizIExxlnJ2_kZRtPrZLWCC1xtDMA,653
|
|
583
583
|
datahub/metadata/com/linkedin/pegasus2avro/chart/__init__.py,sha256=RNyyHLBNp_fxgFcBOLWO2UsXR1ofD_JczcBdPEQSusg,848
|
|
584
|
-
datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py,sha256=
|
|
584
|
+
datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py,sha256=ukX0VnveTrMx9G6uDaTkuk4Z2kxXr2hUK8srZuRPxj0,5520
|
|
585
585
|
datahub/metadata/com/linkedin/pegasus2avro/common/fieldtransformer/__init__.py,sha256=FN63vLiB3FCmIRqBjTA-0Xt7M6i7h5NhaVzbA1ysv18,396
|
|
586
586
|
datahub/metadata/com/linkedin/pegasus2avro/connection/__init__.py,sha256=qRtw-dB14pzVzgQ0pDK8kyBplNdpRxVKNj4D70e_FqI,564
|
|
587
587
|
datahub/metadata/com/linkedin/pegasus2avro/container/__init__.py,sha256=3yWt36KqDKFhRc9pzvt0AMnbMTlhKurGvT3BUvc25QU,510
|
|
@@ -705,7 +705,7 @@ datahub/metadata/schemas/DataHubViewInfo.avsc,sha256=U3fBIoG9ietLUpOknfQGNekqBdP
|
|
|
705
705
|
datahub/metadata/schemas/DataHubViewKey.avsc,sha256=p53axIdSVbubo3r23Vpsed7NqRcQBMGveVikEHAVAok,424
|
|
706
706
|
datahub/metadata/schemas/DataJobInfo.avsc,sha256=--obUbt_4X2paB39EeRKP13sBSiK-r0nq070EamoV1w,7212
|
|
707
707
|
datahub/metadata/schemas/DataJobInputOutput.avsc,sha256=H1O8eAzZV34tvULdu67iBSWkdn08rt7wS208b8Nisbk,15268
|
|
708
|
-
datahub/metadata/schemas/DataJobKey.avsc,sha256=
|
|
708
|
+
datahub/metadata/schemas/DataJobKey.avsc,sha256=4F3myS-O6n7AlUqTvCkMSFvsYAjVhUq6uaQVbqLoYdM,1583
|
|
709
709
|
datahub/metadata/schemas/DataPlatformInfo.avsc,sha256=WGPFumBNHbR75vsLrivnRCbBc8vSCuxDw2UlylMieh4,2686
|
|
710
710
|
datahub/metadata/schemas/DataPlatformInstance.avsc,sha256=SNd3v_YyyLaDflv8Rd5cQR9GrVuky_cDTkYM6FqJiM8,1058
|
|
711
711
|
datahub/metadata/schemas/DataPlatformInstanceKey.avsc,sha256=sXUV5EMT6N-x8d6s8ebcJ5JdFIOsJCtiiU5Jtm-ncIk,800
|
|
@@ -721,6 +721,7 @@ datahub/metadata/schemas/DataProcessInstanceRunEvent.avsc,sha256=zwTYULEnpMbqwkL
|
|
|
721
721
|
datahub/metadata/schemas/DataProcessKey.avsc,sha256=mY1BDiEYo8RchI9DckQEz9Vks5Ibt2RdWZU8OYGnrHA,2240
|
|
722
722
|
datahub/metadata/schemas/DataProductKey.avsc,sha256=tcdQNWk3pLA3xZzOnHvZuq2u4SQuk2YcAlsxE8CcEeU,621
|
|
723
723
|
datahub/metadata/schemas/DataProductProperties.avsc,sha256=nYEK6JgpTprU0iZaqWLZsBGYJLkh6HCi1qCu-wbYhvM,6925
|
|
724
|
+
datahub/metadata/schemas/DataTransformLogic.avsc,sha256=wDng1GK9znVoK0INHGiSCSa-AH5MrDkVdMzz4wOWmrY,2011
|
|
724
725
|
datahub/metadata/schemas/DataTypeInfo.avsc,sha256=MCjzal71P8uIXZg161LrU8rZTJocZeizK-YxYA0Det0,704
|
|
725
726
|
datahub/metadata/schemas/DataTypeKey.avsc,sha256=Gs5uc_azwg10e36ZbwDTFQMevr0IfiFvJoEGHRzEilw,546
|
|
726
727
|
datahub/metadata/schemas/DatahubIngestionCheckpoint.avsc,sha256=m2Zyrx3ZWDc5gHuwbmBSRJ3JN4NFkpUhDEKM2Yeuqrw,5681
|
|
@@ -860,16 +861,19 @@ datahub/secret/datahub_secrets_client.py,sha256=WkoJDip7IAKSGDM5oHeZVL8878pd4Bix
|
|
|
860
861
|
datahub/secret/secret_common.py,sha256=PeRFNljPlGfNrmn3VtDVbazQE6J3Q1nA3L-z3cS8LEA,2522
|
|
861
862
|
datahub/secret/secret_store.py,sha256=2VP_Vd336Cy7C-2kwp4rx8MAqtYgtwv8XyzzNTXE5x8,1124
|
|
862
863
|
datahub/specific/__init__.py,sha256=r5RYM5mDnskLzin3vc87HV-9GSz3P6uQw8AlsN14LaI,88
|
|
863
|
-
datahub/specific/chart.py,sha256=
|
|
864
|
-
datahub/specific/
|
|
865
|
-
datahub/specific/
|
|
866
|
-
datahub/specific/
|
|
867
|
-
datahub/specific/
|
|
868
|
-
datahub/specific/
|
|
869
|
-
datahub/specific/
|
|
870
|
-
datahub/specific/
|
|
871
|
-
datahub/specific/
|
|
872
|
-
datahub/specific/
|
|
864
|
+
datahub/specific/chart.py,sha256=NPdzDHcZkPodthOn9c8QF_aDEo2y4lCJ4t1sI556uZ0,6684
|
|
865
|
+
datahub/specific/dashboard.py,sha256=D8CnOSScQ0-UICFjQnQOtqL-SlNSxhSuub4vZ3BpcuI,10017
|
|
866
|
+
datahub/specific/datajob.py,sha256=yZXic3CuCGxg-ewnaHphoH9Jjpe-P09XbN7T-rrvkyE,13711
|
|
867
|
+
datahub/specific/dataproduct.py,sha256=SrBNYACDVatuXA64GCHA0Igaes24ajJqTcXmDTT1FPA,2877
|
|
868
|
+
datahub/specific/dataset.py,sha256=je9j3rVzpSiXoOe0UmfD7mc5vCpLAAO74Z8q1SvwPX0,9725
|
|
869
|
+
datahub/specific/form.py,sha256=hbxmmBWHma0d4NCZEGR6Nr6R-5A5gYgl1mmkGgnM97o,3834
|
|
870
|
+
datahub/specific/structured_property.py,sha256=NZ2yppDgtqrH04Wn3_m5IupyLeWoBCMygyr3nEi1A3o,4038
|
|
871
|
+
datahub/specific/aspect_helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
872
|
+
datahub/specific/aspect_helpers/custom_properties.py,sha256=s87_Aq7BgF_t_I0MCjNEJxYyrNxMTb1N0hCifT8Y6Cw,2255
|
|
873
|
+
datahub/specific/aspect_helpers/ownership.py,sha256=rNYiJSqb_FJQhFRSIQScg4mfxgYhPvjeaYyvutY6CN0,1861
|
|
874
|
+
datahub/specific/aspect_helpers/structured_properties.py,sha256=EVnFS025r-PG5PAC7VENVJO-JvDYif2VeYonsC3Z8m8,2255
|
|
875
|
+
datahub/specific/aspect_helpers/tags.py,sha256=YHcKfRaIvv12wcmfMc8-Dk6gf6xIvJedkn451uBuz-Y,1254
|
|
876
|
+
datahub/specific/aspect_helpers/terms.py,sha256=l8xoOLQ2RsIl3UnKhLisQNwrGTFIPrzfvP4zjH-AhwI,1352
|
|
873
877
|
datahub/sql_parsing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
874
878
|
datahub/sql_parsing/_models.py,sha256=il-xm1RcLdi1phJUV3xrTecdOGH31akqheuSC2N4YhQ,3141
|
|
875
879
|
datahub/sql_parsing/_sqlglot_patch.py,sha256=iYJ8zOThHqqbamD5jdNr9iHTWD7ewNeHzPiTb6-rO3Y,7043
|
|
@@ -982,8 +986,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
982
986
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
983
987
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
984
988
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
985
|
-
acryl_datahub-0.15.0.
|
|
986
|
-
acryl_datahub-0.15.0.
|
|
987
|
-
acryl_datahub-0.15.0.
|
|
988
|
-
acryl_datahub-0.15.0.
|
|
989
|
-
acryl_datahub-0.15.0.
|
|
989
|
+
acryl_datahub-0.15.0.1rc14.dist-info/METADATA,sha256=na5JJwiilGTUFiwOBRULg2a8NxVvzNRgwodacg0LOSU,173444
|
|
990
|
+
acryl_datahub-0.15.0.1rc14.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
991
|
+
acryl_datahub-0.15.0.1rc14.dist-info/entry_points.txt,sha256=xnPSPLK3bJGADxe4TDS4wL4u0FT_PGlahDa-ENYdYCQ,9512
|
|
992
|
+
acryl_datahub-0.15.0.1rc14.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
993
|
+
acryl_datahub-0.15.0.1rc14.dist-info/RECORD,,
|
datahub/__init__.py
CHANGED
datahub/emitter/mce_builder.py
CHANGED
|
@@ -24,6 +24,7 @@ from typing import (
|
|
|
24
24
|
|
|
25
25
|
import typing_inspect
|
|
26
26
|
from avrogen.dict_wrapper import DictWrapper
|
|
27
|
+
from typing_extensions import assert_never
|
|
27
28
|
|
|
28
29
|
from datahub.emitter.enum_helpers import get_enum_options
|
|
29
30
|
from datahub.metadata.schema_classes import (
|
|
@@ -269,9 +270,8 @@ def make_owner_urn(owner: str, owner_type: OwnerType) -> str:
|
|
|
269
270
|
return make_user_urn(owner)
|
|
270
271
|
elif owner_type == OwnerType.GROUP:
|
|
271
272
|
return make_group_urn(owner)
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
return f"urn:li:{owner_type.value}:{owner}"
|
|
273
|
+
else:
|
|
274
|
+
assert_never(owner_type)
|
|
275
275
|
|
|
276
276
|
|
|
277
277
|
def make_ownership_type_urn(type: str) -> str:
|
|
@@ -2,7 +2,19 @@ import json
|
|
|
2
2
|
import time
|
|
3
3
|
from collections import defaultdict
|
|
4
4
|
from dataclasses import dataclass
|
|
5
|
-
from typing import
|
|
5
|
+
from typing import (
|
|
6
|
+
Any,
|
|
7
|
+
Dict,
|
|
8
|
+
List,
|
|
9
|
+
Literal,
|
|
10
|
+
Optional,
|
|
11
|
+
Protocol,
|
|
12
|
+
Tuple,
|
|
13
|
+
Union,
|
|
14
|
+
runtime_checkable,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
from typing_extensions import LiteralString
|
|
6
18
|
|
|
7
19
|
from datahub.emitter.aspect import JSON_PATCH_CONTENT_TYPE
|
|
8
20
|
from datahub.emitter.serialization_helper import pre_json_transform
|
|
@@ -19,25 +31,36 @@ from datahub.metadata.urns import Urn
|
|
|
19
31
|
from datahub.utilities.urns.urn import guess_entity_type
|
|
20
32
|
|
|
21
33
|
|
|
34
|
+
@runtime_checkable
|
|
35
|
+
class SupportsToObj(Protocol):
|
|
36
|
+
def to_obj(self) -> Any:
|
|
37
|
+
...
|
|
38
|
+
|
|
39
|
+
|
|
22
40
|
def _recursive_to_obj(obj: Any) -> Any:
|
|
23
41
|
if isinstance(obj, list):
|
|
24
42
|
return [_recursive_to_obj(v) for v in obj]
|
|
25
|
-
elif
|
|
43
|
+
elif isinstance(obj, SupportsToObj):
|
|
26
44
|
return obj.to_obj()
|
|
27
45
|
else:
|
|
28
46
|
return obj
|
|
29
47
|
|
|
30
48
|
|
|
49
|
+
PatchPath = Tuple[Union[LiteralString, Urn], ...]
|
|
50
|
+
PatchOp = Literal["add", "remove", "replace"]
|
|
51
|
+
|
|
52
|
+
|
|
31
53
|
@dataclass
|
|
32
|
-
class _Patch:
|
|
33
|
-
op:
|
|
34
|
-
path:
|
|
54
|
+
class _Patch(SupportsToObj):
|
|
55
|
+
op: PatchOp
|
|
56
|
+
path: PatchPath
|
|
35
57
|
value: Any
|
|
36
58
|
|
|
37
59
|
def to_obj(self) -> Dict:
|
|
60
|
+
quoted_path = "/" + "/".join(MetadataPatchProposal.quote(p) for p in self.path)
|
|
38
61
|
return {
|
|
39
62
|
"op": self.op,
|
|
40
|
-
"path":
|
|
63
|
+
"path": quoted_path,
|
|
41
64
|
"value": _recursive_to_obj(self.value),
|
|
42
65
|
}
|
|
43
66
|
|
|
@@ -63,15 +86,16 @@ class MetadataPatchProposal:
|
|
|
63
86
|
|
|
64
87
|
# Json Patch quoting based on https://jsonpatch.com/#json-pointer
|
|
65
88
|
@classmethod
|
|
66
|
-
def quote(cls, value: str) -> str:
|
|
67
|
-
return value.replace("~", "~0").replace("/", "~1")
|
|
89
|
+
def quote(cls, value: Union[str, Urn]) -> str:
|
|
90
|
+
return str(value).replace("~", "~0").replace("/", "~1")
|
|
68
91
|
|
|
69
92
|
def _add_patch(
|
|
70
|
-
self,
|
|
93
|
+
self,
|
|
94
|
+
aspect_name: str,
|
|
95
|
+
op: PatchOp,
|
|
96
|
+
path: PatchPath,
|
|
97
|
+
value: Any,
|
|
71
98
|
) -> None:
|
|
72
|
-
if not isinstance(path, str):
|
|
73
|
-
path = "/" + "/".join(self.quote(p) for p in path)
|
|
74
|
-
|
|
75
99
|
# TODO: Validate that aspectName is a valid aspect for this entityType
|
|
76
100
|
self.patches[aspect_name].append(_Patch(op, path, value))
|
|
77
101
|
|
|
@@ -146,12 +146,55 @@ class DatahubIngestionRunSummaryProvider(PipelineRunListener):
|
|
|
146
146
|
aspect_value=source_info_aspect,
|
|
147
147
|
)
|
|
148
148
|
|
|
149
|
+
@staticmethod
|
|
150
|
+
def _convert_sets_to_lists(obj: Any) -> Any:
|
|
151
|
+
"""
|
|
152
|
+
Recursively converts all sets to lists in a Python object.
|
|
153
|
+
Works with nested dictionaries, lists, and sets.
|
|
154
|
+
|
|
155
|
+
Args:
|
|
156
|
+
obj: Any Python object that might contain sets
|
|
157
|
+
|
|
158
|
+
Returns:
|
|
159
|
+
The object with all sets converted to lists
|
|
160
|
+
"""
|
|
161
|
+
if isinstance(obj, dict):
|
|
162
|
+
return {
|
|
163
|
+
key: DatahubIngestionRunSummaryProvider._convert_sets_to_lists(value)
|
|
164
|
+
for key, value in obj.items()
|
|
165
|
+
}
|
|
166
|
+
elif isinstance(obj, list):
|
|
167
|
+
return [
|
|
168
|
+
DatahubIngestionRunSummaryProvider._convert_sets_to_lists(element)
|
|
169
|
+
for element in obj
|
|
170
|
+
]
|
|
171
|
+
elif isinstance(obj, set):
|
|
172
|
+
return [
|
|
173
|
+
DatahubIngestionRunSummaryProvider._convert_sets_to_lists(element)
|
|
174
|
+
for element in obj
|
|
175
|
+
]
|
|
176
|
+
elif isinstance(obj, tuple):
|
|
177
|
+
return tuple(
|
|
178
|
+
DatahubIngestionRunSummaryProvider._convert_sets_to_lists(element)
|
|
179
|
+
for element in obj
|
|
180
|
+
)
|
|
181
|
+
else:
|
|
182
|
+
return obj
|
|
183
|
+
|
|
149
184
|
def _get_recipe_to_report(self, ctx: PipelineContext) -> str:
|
|
150
185
|
assert ctx.pipeline_config
|
|
151
186
|
if not self.report_recipe or not ctx.pipeline_config.get_raw_dict():
|
|
152
187
|
return ""
|
|
153
188
|
else:
|
|
154
|
-
|
|
189
|
+
redacted_recipe = redact_raw_config(ctx.pipeline_config.get_raw_dict())
|
|
190
|
+
# This is required otherwise json dumps will fail
|
|
191
|
+
# with a TypeError: Object of type set is not JSON serializable
|
|
192
|
+
converted_recipe = (
|
|
193
|
+
DatahubIngestionRunSummaryProvider._convert_sets_to_lists(
|
|
194
|
+
redacted_recipe
|
|
195
|
+
)
|
|
196
|
+
)
|
|
197
|
+
return json.dumps(converted_recipe)
|
|
155
198
|
|
|
156
199
|
def _emit_aspect(self, entity_urn: Urn, aspect_value: _Aspect) -> None:
|
|
157
200
|
self.sink.write_record_async(
|
|
@@ -206,9 +206,7 @@ class BigqueryV2Source(StatefulIngestionSourceBase, TestableSource):
|
|
|
206
206
|
|
|
207
207
|
def _init_schema_resolver(self) -> SchemaResolver:
|
|
208
208
|
schema_resolution_required = (
|
|
209
|
-
self.config.use_queries_v2
|
|
210
|
-
or self.config.lineage_parse_view_ddl
|
|
211
|
-
or self.config.lineage_use_sql_parser
|
|
209
|
+
self.config.use_queries_v2 or self.config.lineage_use_sql_parser
|
|
212
210
|
)
|
|
213
211
|
schema_ingestion_enabled = (
|
|
214
212
|
self.config.include_schema_metadata
|
|
@@ -255,18 +253,16 @@ class BigqueryV2Source(StatefulIngestionSourceBase, TestableSource):
|
|
|
255
253
|
for project in projects:
|
|
256
254
|
yield from self.bq_schema_extractor.get_project_workunits(project)
|
|
257
255
|
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
self.bq_schema_extractor.snapshot_refs_by_project,
|
|
267
|
-
self.bq_schema_extractor.snapshots_by_ref,
|
|
268
|
-
)
|
|
256
|
+
self.report.set_ingestion_stage("*", "View and Snapshot Lineage")
|
|
257
|
+
yield from self.lineage_extractor.get_lineage_workunits_for_views_and_snapshots(
|
|
258
|
+
[p.id for p in projects],
|
|
259
|
+
self.bq_schema_extractor.view_refs_by_project,
|
|
260
|
+
self.bq_schema_extractor.view_definitions,
|
|
261
|
+
self.bq_schema_extractor.snapshot_refs_by_project,
|
|
262
|
+
self.bq_schema_extractor.snapshots_by_ref,
|
|
263
|
+
)
|
|
269
264
|
|
|
265
|
+
if self.config.use_queries_v2:
|
|
270
266
|
# if both usage and lineage are disabled then skip queries extractor piece
|
|
271
267
|
if (
|
|
272
268
|
not self.config.include_usage_statistics
|
|
@@ -306,10 +302,6 @@ class BigqueryV2Source(StatefulIngestionSourceBase, TestableSource):
|
|
|
306
302
|
if self.config.include_table_lineage:
|
|
307
303
|
yield from self.lineage_extractor.get_lineage_workunits(
|
|
308
304
|
[p.id for p in projects],
|
|
309
|
-
self.bq_schema_extractor.view_refs_by_project,
|
|
310
|
-
self.bq_schema_extractor.view_definitions,
|
|
311
|
-
self.bq_schema_extractor.snapshot_refs_by_project,
|
|
312
|
-
self.bq_schema_extractor.snapshots_by_ref,
|
|
313
305
|
self.bq_schema_extractor.table_refs,
|
|
314
306
|
)
|
|
315
307
|
|
|
@@ -463,10 +463,6 @@ class BigQueryV2Config(
|
|
|
463
463
|
default=True,
|
|
464
464
|
description="Use sql parser to resolve view/table lineage.",
|
|
465
465
|
)
|
|
466
|
-
lineage_parse_view_ddl: bool = Field(
|
|
467
|
-
default=True,
|
|
468
|
-
description="Sql parse view ddl to get lineage.",
|
|
469
|
-
)
|
|
470
466
|
|
|
471
467
|
lineage_sql_parser_use_raw_names: bool = Field(
|
|
472
468
|
default=False,
|
|
@@ -572,11 +568,9 @@ class BigQueryV2Config(
|
|
|
572
568
|
"See [this](https://cloud.google.com/bigquery/docs/information-schema-jobs#scope_and_syntax) for details.",
|
|
573
569
|
)
|
|
574
570
|
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
include_view_column_lineage: bool = Field(default=True, hidden_from_docs=True)
|
|
571
|
+
_include_view_lineage = pydantic_removed_field("include_view_lineage")
|
|
572
|
+
_include_view_column_lineage = pydantic_removed_field("include_view_column_lineage")
|
|
573
|
+
_lineage_parse_view_ddl = pydantic_removed_field("lineage_parse_view_ddl")
|
|
580
574
|
|
|
581
575
|
@root_validator(pre=True)
|
|
582
576
|
def set_include_schema_metadata(cls, values: Dict) -> Dict:
|
|
@@ -653,14 +653,11 @@ class BigQuerySchemaGenerator:
|
|
|
653
653
|
self.report.report_dropped(table_identifier.raw_table_name())
|
|
654
654
|
return
|
|
655
655
|
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
)
|
|
660
|
-
self.
|
|
661
|
-
if self.config.lineage_parse_view_ddl and view.view_definition:
|
|
662
|
-
self.view_refs_by_project[project_id].add(table_ref)
|
|
663
|
-
self.view_definitions[table_ref] = view.view_definition
|
|
656
|
+
table_ref = str(BigQueryTableRef(table_identifier).get_sanitized_table_ref())
|
|
657
|
+
self.table_refs.add(table_ref)
|
|
658
|
+
if view.view_definition:
|
|
659
|
+
self.view_refs_by_project[project_id].add(table_ref)
|
|
660
|
+
self.view_definitions[table_ref] = view.view_definition
|
|
664
661
|
|
|
665
662
|
view.column_count = len(columns)
|
|
666
663
|
if not view.column_count:
|
|
@@ -701,14 +698,11 @@ class BigQuerySchemaGenerator:
|
|
|
701
698
|
f"Snapshot doesn't have any column or unable to get columns for snapshot: {table_identifier}"
|
|
702
699
|
)
|
|
703
700
|
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
)
|
|
708
|
-
self.
|
|
709
|
-
if snapshot.base_table_identifier:
|
|
710
|
-
self.snapshot_refs_by_project[project_id].add(table_ref)
|
|
711
|
-
self.snapshots_by_ref[table_ref] = snapshot
|
|
701
|
+
table_ref = str(BigQueryTableRef(table_identifier).get_sanitized_table_ref())
|
|
702
|
+
self.table_refs.add(table_ref)
|
|
703
|
+
if snapshot.base_table_identifier:
|
|
704
|
+
self.snapshot_refs_by_project[project_id].add(table_ref)
|
|
705
|
+
self.snapshots_by_ref[table_ref] = snapshot
|
|
712
706
|
|
|
713
707
|
yield from self.gen_snapshot_dataset_workunits(
|
|
714
708
|
table=snapshot,
|
|
@@ -1148,7 +1142,7 @@ class BigQuerySchemaGenerator:
|
|
|
1148
1142
|
foreignKeys=foreign_keys if foreign_keys else None,
|
|
1149
1143
|
)
|
|
1150
1144
|
|
|
1151
|
-
if self.config.
|
|
1145
|
+
if self.config.lineage_use_sql_parser:
|
|
1152
1146
|
self.sql_parser_schema_resolver.add_schema_metadata(
|
|
1153
1147
|
dataset_urn, schema_metadata
|
|
1154
1148
|
)
|
|
@@ -291,16 +291,15 @@ class BigqueryLineageExtractor:
|
|
|
291
291
|
snapshots_by_ref: FileBackedDict[BigqueryTableSnapshot],
|
|
292
292
|
) -> Iterable[MetadataWorkUnit]:
|
|
293
293
|
for project in projects:
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
self.
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
)
|
|
294
|
+
for view in view_refs_by_project[project]:
|
|
295
|
+
self.datasets_skip_audit_log_lineage.add(view)
|
|
296
|
+
self.aggregator.add_view_definition(
|
|
297
|
+
view_urn=self.identifiers.gen_dataset_urn_from_raw_ref(
|
|
298
|
+
BigQueryTableRef.from_string_name(view)
|
|
299
|
+
),
|
|
300
|
+
view_definition=view_definitions[view],
|
|
301
|
+
default_db=project,
|
|
302
|
+
)
|
|
304
303
|
|
|
305
304
|
for snapshot_ref in snapshot_refs_by_project[project]:
|
|
306
305
|
snapshot = snapshots_by_ref[snapshot_ref]
|
|
@@ -322,23 +321,11 @@ class BigqueryLineageExtractor:
|
|
|
322
321
|
def get_lineage_workunits(
|
|
323
322
|
self,
|
|
324
323
|
projects: List[str],
|
|
325
|
-
view_refs_by_project: Dict[str, Set[str]],
|
|
326
|
-
view_definitions: FileBackedDict[str],
|
|
327
|
-
snapshot_refs_by_project: Dict[str, Set[str]],
|
|
328
|
-
snapshots_by_ref: FileBackedDict[BigqueryTableSnapshot],
|
|
329
324
|
table_refs: Set[str],
|
|
330
325
|
) -> Iterable[MetadataWorkUnit]:
|
|
331
326
|
if not self._should_ingest_lineage():
|
|
332
327
|
return
|
|
333
328
|
|
|
334
|
-
yield from self.get_lineage_workunits_for_views_and_snapshots(
|
|
335
|
-
projects,
|
|
336
|
-
view_refs_by_project,
|
|
337
|
-
view_definitions,
|
|
338
|
-
snapshot_refs_by_project,
|
|
339
|
-
snapshots_by_ref,
|
|
340
|
-
)
|
|
341
|
-
|
|
342
329
|
if self.config.use_exported_bigquery_audit_metadata:
|
|
343
330
|
projects = ["*"] # project_id not used when using exported metadata
|
|
344
331
|
|
|
@@ -167,7 +167,7 @@ class DataJobEntity:
|
|
|
167
167
|
class DataProcessCleanupReport(SourceReport):
|
|
168
168
|
num_aspects_removed: int = 0
|
|
169
169
|
num_aspect_removed_by_type: TopKDict[str, int] = field(default_factory=TopKDict)
|
|
170
|
-
|
|
170
|
+
sample_soft_deleted_aspects_by_type: TopKDict[str, LossyList[str]] = field(
|
|
171
171
|
default_factory=TopKDict
|
|
172
172
|
)
|
|
173
173
|
num_data_flows_found: int = 0
|
|
@@ -286,9 +286,9 @@ class DataProcessCleanup:
|
|
|
286
286
|
self.report.num_aspect_removed_by_type[type] = (
|
|
287
287
|
self.report.num_aspect_removed_by_type.get(type, 0) + 1
|
|
288
288
|
)
|
|
289
|
-
if type not in self.report.
|
|
290
|
-
self.report.
|
|
291
|
-
self.report.
|
|
289
|
+
if type not in self.report.sample_soft_deleted_aspects_by_type:
|
|
290
|
+
self.report.sample_soft_deleted_aspects_by_type[type] = LossyList()
|
|
291
|
+
self.report.sample_soft_deleted_aspects_by_type[type].append(urn)
|
|
292
292
|
|
|
293
293
|
if self.dry_run:
|
|
294
294
|
logger.info(
|