acryl-datahub 1.1.0.4rc3__py3-none-any.whl → 1.1.0.5rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.1.0.4rc3.dist-info → acryl_datahub-1.1.0.5rc1.dist-info}/METADATA +2355 -2355
- {acryl_datahub-1.1.0.4rc3.dist-info → acryl_datahub-1.1.0.5rc1.dist-info}/RECORD +26 -23
- {acryl_datahub-1.1.0.4rc3.dist-info → acryl_datahub-1.1.0.5rc1.dist-info}/entry_points.txt +1 -0
- datahub/_version.py +1 -1
- datahub/ingestion/api/sink.py +3 -0
- datahub/ingestion/run/pipeline.py +1 -1
- datahub/ingestion/sink/datahub_rest.py +12 -0
- datahub/ingestion/source/mock_data/__init__.py +0 -0
- datahub/ingestion/source/mock_data/datahub_mock_data.py +384 -0
- datahub/ingestion/source/mock_data/table_naming_helper.py +91 -0
- datahub/ingestion/source/preset.py +1 -1
- datahub/metadata/_internal_schema_classes.py +3 -0
- datahub/metadata/schema.avsc +2 -0
- datahub/metadata/schemas/ContainerProperties.avsc +2 -0
- datahub/metadata/schemas/DataFlowInfo.avsc +2 -0
- datahub/metadata/schemas/DataJobInfo.avsc +2 -0
- datahub/metadata/schemas/DataProcessKey.avsc +2 -0
- datahub/metadata/schemas/DatasetKey.avsc +2 -0
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +2 -0
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +2 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +2 -0
- datahub/metadata/schemas/MLModelKey.avsc +2 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +2 -0
- {acryl_datahub-1.1.0.4rc3.dist-info → acryl_datahub-1.1.0.5rc1.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.1.0.4rc3.dist-info → acryl_datahub-1.1.0.5rc1.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.1.0.4rc3.dist-info → acryl_datahub-1.1.0.5rc1.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
acryl_datahub-1.1.0.
|
|
1
|
+
acryl_datahub-1.1.0.5rc1.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
|
|
2
2
|
datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
|
|
3
3
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
4
|
-
datahub/_version.py,sha256=
|
|
4
|
+
datahub/_version.py,sha256=TZdOK9cFifb1yYky21qrv8R4D8Q8htQJWiKxMGKJpS0,323
|
|
5
5
|
datahub/entrypoints.py,sha256=H-YFTvxTJOgpWsFBVlxyb1opjkq-hjTzNmjy5Fq3RHg,8992
|
|
6
6
|
datahub/errors.py,sha256=p5rFAdAGVCk4Lqolol1YvthceadUSwpaCxLXRcyCCFQ,676
|
|
7
7
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -150,7 +150,7 @@ datahub/ingestion/api/pipeline_run_listener.py,sha256=5uBP__LbMQxJ2utlf07cIzQINq
|
|
|
150
150
|
datahub/ingestion/api/registry.py,sha256=LbdZr89465Lj7ptQRVB4vI1JR1igWABvQFj9-WX63bI,7454
|
|
151
151
|
datahub/ingestion/api/report.py,sha256=eM_TWWz6iJNd-c_S2_4eg2qKLGYP8vSROb_TMiCwBhY,4644
|
|
152
152
|
datahub/ingestion/api/report_helpers.py,sha256=WbUC1kQeaKqIagGV3XzfPmPs7slAT1mfNY4og2BH2A8,994
|
|
153
|
-
datahub/ingestion/api/sink.py,sha256=
|
|
153
|
+
datahub/ingestion/api/sink.py,sha256=GZt48PV56FAhNoma-V5EwwRZvezhb40YH_zprm8_Yo0,4961
|
|
154
154
|
datahub/ingestion/api/source.py,sha256=hYwh4LHcG5RS6xQ9QAh5Zlijjl6r1JaibKPb75Hne7A,19518
|
|
155
155
|
datahub/ingestion/api/source_helpers.py,sha256=XT9y5HgfVeF52jrX39vlLn1SdXpLVyT2Su8oGNsddYo,21148
|
|
156
156
|
datahub/ingestion/api/transform.py,sha256=X0GpjMJzYkLuZx8MTWxH50cWGm9rGsnn3k188mmC8J8,582
|
|
@@ -189,7 +189,7 @@ datahub/ingestion/reporting/file_reporter.py,sha256=tiWukmMxHrTQI3rOAumsq6lRlw8T
|
|
|
189
189
|
datahub/ingestion/reporting/reporting_provider_registry.py,sha256=jTYSh3T4sensjnHQfPLiIcbA2dG8w0px9ghChAJjGdU,310
|
|
190
190
|
datahub/ingestion/run/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
191
191
|
datahub/ingestion/run/connection.py,sha256=mngNzr5aRLUDa5Izqxa0xkdDEqEqcDuacWSKIlkdvPc,1483
|
|
192
|
-
datahub/ingestion/run/pipeline.py,sha256=
|
|
192
|
+
datahub/ingestion/run/pipeline.py,sha256=TYE1Vm144uHFmqEsrJcbrD0fcg2M-ZvYEGGGbIp1Rmk,29943
|
|
193
193
|
datahub/ingestion/run/pipeline_config.py,sha256=joG1j9OlwJhb8zqv4TY6_FSzOaKOx6xsBu255A5lP8g,4101
|
|
194
194
|
datahub/ingestion/run/sink_callback.py,sha256=xZAzaKkR0dcQP838pMJWsA52oaQXV5BiqXkpxEpJ_9U,2856
|
|
195
195
|
datahub/ingestion/sink/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -197,7 +197,7 @@ datahub/ingestion/sink/blackhole.py,sha256=-jYcWo4i8q7312bCIoHrGr7nT9JdPvA7c4jvS
|
|
|
197
197
|
datahub/ingestion/sink/console.py,sha256=TZfhA0Ec2eNCrMH7RRy2JOdUE-U-hkoIQrPm1CmKLQs,591
|
|
198
198
|
datahub/ingestion/sink/datahub_kafka.py,sha256=_cjuXu5I6G0zJ2UK7hMbaKjMPZXeIwRMgm7CVeTiNtc,2578
|
|
199
199
|
datahub/ingestion/sink/datahub_lite.py,sha256=7u2aWm7ENLshKHl-PkjJg6Mrw4bWs8sTfKIBz4mm8Ak,1879
|
|
200
|
-
datahub/ingestion/sink/datahub_rest.py,sha256=
|
|
200
|
+
datahub/ingestion/sink/datahub_rest.py,sha256=DOhtTHqKpmqgI3rUY9ri2QZAyXYDFINWMG6ne7VYUXI,13463
|
|
201
201
|
datahub/ingestion/sink/file.py,sha256=SxXJPJpkIGoaqRjCcSmj2ZE3xE4rLlBABBGwpTj5LWI,3271
|
|
202
202
|
datahub/ingestion/sink/sink_registry.py,sha256=JRBWx8qEYg0ubSTyhqwgSWctgxwyp6fva9GoN2LwBao,490
|
|
203
203
|
datahub/ingestion/source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -218,7 +218,7 @@ datahub/ingestion/source/mongodb.py,sha256=2C2Cxn8DXL53IbNiywIuKt8UT_EMcPg9f8su-
|
|
|
218
218
|
datahub/ingestion/source/nifi.py,sha256=2jxSzuHIRQFF7DLoceGbUd_10lkhHRlqA9hhApSt3Yw,56882
|
|
219
219
|
datahub/ingestion/source/openapi.py,sha256=VaR2xYaH1IhvRixpTBC7-168F74eIIyKiEKb5EqTO64,19253
|
|
220
220
|
datahub/ingestion/source/openapi_parser.py,sha256=T87e2r-oPGgQl_FDMHnSGFZzApvWDCyKWnzIrVI5Alo,15420
|
|
221
|
-
datahub/ingestion/source/preset.py,sha256=
|
|
221
|
+
datahub/ingestion/source/preset.py,sha256=1goxuFoLw50dokr2gp1MhUrDJ8CFNX-wBZIvv7laEXA,3966
|
|
222
222
|
datahub/ingestion/source/pulsar.py,sha256=u5F8QnCLJsht5-7XCiUTsnfhCPIpKVB_l32CgMCU-As,20187
|
|
223
223
|
datahub/ingestion/source/redash.py,sha256=YxjSad-X_wPmxYH8dJmFz_VCFhiLTCTSlK99WdvcYiA,30653
|
|
224
224
|
datahub/ingestion/source/salesforce.py,sha256=CQtDFv1OsbC1vyzNbKOc6GxhFQ5GdYj45hgAF0-oIcw,40487
|
|
@@ -386,6 +386,9 @@ datahub/ingestion/source/looker/view_upstream.py,sha256=4FCjZaU6p2G7npB2RJpP4Gv2
|
|
|
386
386
|
datahub/ingestion/source/metadata/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
387
387
|
datahub/ingestion/source/metadata/business_glossary.py,sha256=T_RJHst6iQRghJNmLLPeSBMEDsbEKf3yBldOAgMcGuo,19666
|
|
388
388
|
datahub/ingestion/source/metadata/lineage.py,sha256=PA4JwSeQ-30XFMN4O5tPwIu-hZF1e-xMZ_CnEUE2c-Q,9595
|
|
389
|
+
datahub/ingestion/source/mock_data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
390
|
+
datahub/ingestion/source/mock_data/datahub_mock_data.py,sha256=i19UFii3wOPG5CVQfV_20MwIRAJjr0TxsIiVjKcBND4,14314
|
|
391
|
+
datahub/ingestion/source/mock_data/table_naming_helper.py,sha256=oIC1vcOx76Vl63O9kcjP_iInBHyS-ATdN3Y932TCCZg,3283
|
|
389
392
|
datahub/ingestion/source/neo4j/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
390
393
|
datahub/ingestion/source/neo4j/neo4j_source.py,sha256=JqKCwxBJfOrC8SF7CmDG0cseWxHk_7E2v4Diw3Q0-WM,14181
|
|
391
394
|
datahub/ingestion/source/powerbi/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -608,8 +611,8 @@ datahub/lite/lite_registry.py,sha256=bpH0kasP-LtwwUFNA2QsOIehfekAYfJtN-AkQLmSWnw
|
|
|
608
611
|
datahub/lite/lite_server.py,sha256=p9Oa2nNs65mqcssSIVOr7VOzWqfVstz6ZQEdT4f82S0,1949
|
|
609
612
|
datahub/lite/lite_util.py,sha256=G0LQHKkyEb1pc_q183g6hflShclGx7kikgMaOxtVVcs,4545
|
|
610
613
|
datahub/metadata/__init__.py,sha256=AjhXPjI6cnpdcrBRrE5gOWo15vv2TTl2ctU4UAnUN7A,238
|
|
611
|
-
datahub/metadata/_internal_schema_classes.py,sha256=
|
|
612
|
-
datahub/metadata/schema.avsc,sha256=
|
|
614
|
+
datahub/metadata/_internal_schema_classes.py,sha256=UACA9XSSdTueZTUW4v_4OWIsWga3T99I3gnGAPCe2w8,1019257
|
|
615
|
+
datahub/metadata/schema.avsc,sha256=GsQZCPSD3_KKXvGALSzgTgIONL0r3tXme9M4rXQy_q4,707244
|
|
613
616
|
datahub/metadata/schema_classes.py,sha256=tPT8iHCak4IsZi_oL0nirbPpI8ETTPTZzapqLRpeKU4,1326
|
|
614
617
|
datahub/metadata/urns.py,sha256=nfrCTExR-k2P9w272WVtWSN3xW1VUJngPwP3xnvULjU,1217
|
|
615
618
|
datahub/metadata/_urns/__init__.py,sha256=cOF3GHMDgPhmbLKbN02NPpuLGHSu0qNgQyBRv08eqF0,243
|
|
@@ -711,7 +714,7 @@ datahub/metadata/schemas/ChartQuery.avsc,sha256=6qz8Q5wa5h0GS6QcwfzVZrRi26PoEwXy
|
|
|
711
714
|
datahub/metadata/schemas/ChartUsageStatistics.avsc,sha256=FjEEPj_19jUvJcS9cyZtHqByQyPdt2xhmxfFdw9mSM4,5881
|
|
712
715
|
datahub/metadata/schemas/Container.avsc,sha256=pEpRQgClrJRm59eAiltc4YoP0pg7TG6Bu-ENCh11-mw,813
|
|
713
716
|
datahub/metadata/schemas/ContainerKey.avsc,sha256=5wUgUbR1zzNcSpeDOHm4XWFf_xDtXFuIikbjKXMFe80,939
|
|
714
|
-
datahub/metadata/schemas/ContainerProperties.avsc,sha256=
|
|
717
|
+
datahub/metadata/schemas/ContainerProperties.avsc,sha256=BPhrRf4I7UImQhB-55Qby82MXKuDQL0cwlvdg0aZEHs,5112
|
|
715
718
|
datahub/metadata/schemas/CorpGroupEditableInfo.avsc,sha256=sItWMAGfQdYwmF5xLzMnYGl_rtXvGlxPbEjmRm1E6u4,1290
|
|
716
719
|
datahub/metadata/schemas/CorpGroupInfo.avsc,sha256=G83lndR7C6WPfccFg7qFE5Ely5vrdxC8x7zJB3fW3Sw,4740
|
|
717
720
|
datahub/metadata/schemas/CorpGroupKey.avsc,sha256=B_RMHAFF_nd86qtO2p1slAZnxruCmBM7DUfILAU_UaI,953
|
|
@@ -728,7 +731,7 @@ datahub/metadata/schemas/DashboardUsageStatistics.avsc,sha256=pUAKqs49Wy5pAL92g_
|
|
|
728
731
|
datahub/metadata/schemas/DataContractKey.avsc,sha256=m0ej_Wu7NcuZQCRwQI3Sidfv9bUy5mvuhlpgax6i1xA,511
|
|
729
732
|
datahub/metadata/schemas/DataContractProperties.avsc,sha256=RCxuJMlZwqEE0iHTpuXvcH6zRFoOt7ysQFPrJRp3RqE,4763
|
|
730
733
|
datahub/metadata/schemas/DataContractStatus.avsc,sha256=5yvT43AIB13Dn_h0-4s7fsL7BTuXhkK5pi2KJug4_qg,1029
|
|
731
|
-
datahub/metadata/schemas/DataFlowInfo.avsc,sha256=
|
|
734
|
+
datahub/metadata/schemas/DataFlowInfo.avsc,sha256=SOXI26Vgv3O-t9z6_ff0gzVL5-M1XBlCOUCI9vB-jpk,4958
|
|
732
735
|
datahub/metadata/schemas/DataFlowKey.avsc,sha256=lIXr1oVJIHxOEibTx1YWFhGY2VQyWs9AW65eePPmdXI,1345
|
|
733
736
|
datahub/metadata/schemas/DataHubAccessTokenInfo.avsc,sha256=WS77M5w7GJFxUAiyXaxUvBqO0XFV2FnKPxXSXYbXHTE,1646
|
|
734
737
|
datahub/metadata/schemas/DataHubAccessTokenKey.avsc,sha256=3EspNIxgb_I4WwV0a2o4NJOB5yODVr9J-wZzkZanEgo,483
|
|
@@ -755,7 +758,7 @@ datahub/metadata/schemas/DataHubUpgradeRequest.avsc,sha256=3xYsf3XVHBcclHdRnLVMo
|
|
|
755
758
|
datahub/metadata/schemas/DataHubUpgradeResult.avsc,sha256=VydVb4yqjIviR73-T6TooF6OiahwxTHciP97NuF4qvI,1385
|
|
756
759
|
datahub/metadata/schemas/DataHubViewInfo.avsc,sha256=U3fBIoG9ietLUpOknfQGNekqBdPQYwvhhv9RQv6gEeg,11642
|
|
757
760
|
datahub/metadata/schemas/DataHubViewKey.avsc,sha256=p53axIdSVbubo3r23Vpsed7NqRcQBMGveVikEHAVAok,424
|
|
758
|
-
datahub/metadata/schemas/DataJobInfo.avsc,sha256=
|
|
761
|
+
datahub/metadata/schemas/DataJobInfo.avsc,sha256=Bc9qdDcXI0GQdEgNTpgHaBbnrppDKQ-1xR26diOSVIQ,7488
|
|
759
762
|
datahub/metadata/schemas/DataJobInputOutput.avsc,sha256=H1O8eAzZV34tvULdu67iBSWkdn08rt7wS208b8Nisbk,15268
|
|
760
763
|
datahub/metadata/schemas/DataJobKey.avsc,sha256=S7egH8jWjKW52MG6Pg7plDoP15XfTTiMde5V6nR6ycE,1624
|
|
761
764
|
datahub/metadata/schemas/DataPlatformInfo.avsc,sha256=WGPFumBNHbR75vsLrivnRCbBc8vSCuxDw2UlylMieh4,2686
|
|
@@ -770,7 +773,7 @@ datahub/metadata/schemas/DataProcessInstanceOutput.avsc,sha256=xyGBUf3vFHrMLtmZj
|
|
|
770
773
|
datahub/metadata/schemas/DataProcessInstanceProperties.avsc,sha256=2qsDFeSA2-ag5IVetgD8mW2k--F6CwmYXM3KOE6edU8,3836
|
|
771
774
|
datahub/metadata/schemas/DataProcessInstanceRelationships.avsc,sha256=VhBpnyGGvO06WEnM6zy4PmjiT0nivRQfkSdJCUgIavw,2358
|
|
772
775
|
datahub/metadata/schemas/DataProcessInstanceRunEvent.avsc,sha256=zwTYULEnpMbqwkLN8NbXW9PQWFG4X6TZkZwTQ1Wb53Y,6713
|
|
773
|
-
datahub/metadata/schemas/DataProcessKey.avsc,sha256=
|
|
776
|
+
datahub/metadata/schemas/DataProcessKey.avsc,sha256=ZZE2HN4mwZtm_TJNcdohFS97WXytFq9HAs_-shor6sY,2518
|
|
774
777
|
datahub/metadata/schemas/DataProductKey.avsc,sha256=SyjmL2ieea1P6uipXst37mD5NdGPTqDvJAL3CVo91wk,661
|
|
775
778
|
datahub/metadata/schemas/DataProductProperties.avsc,sha256=Lc3duV7YMJLvo_RwckLbW4bbmPrhSS1D-bxVVboNX2c,6930
|
|
776
779
|
datahub/metadata/schemas/DataTransformLogic.avsc,sha256=nHTH6UzJ2Zz88N2aWa96hawLUR20HP7eSynfPtI1kzg,2111
|
|
@@ -779,7 +782,7 @@ datahub/metadata/schemas/DataTypeKey.avsc,sha256=Gs5uc_azwg10e36ZbwDTFQMevr0IfiF
|
|
|
779
782
|
datahub/metadata/schemas/DatahubIngestionCheckpoint.avsc,sha256=m2Zyrx3ZWDc5gHuwbmBSRJ3JN4NFkpUhDEKM2Yeuqrw,5681
|
|
780
783
|
datahub/metadata/schemas/DatahubIngestionRunSummary.avsc,sha256=_Ek7NqfJVTLqlM0NR9BRA57N9_ejwDdQvz7B1tVxSEE,9367
|
|
781
784
|
datahub/metadata/schemas/DatasetDeprecation.avsc,sha256=ucXxaDcAUib9_y0k5qOINMn5VK2X3trHK2dcpNcsR2Q,1256
|
|
782
|
-
datahub/metadata/schemas/DatasetKey.avsc,sha256=
|
|
785
|
+
datahub/metadata/schemas/DatasetKey.avsc,sha256=xNh2Zbg1POuHD3qu42-__zNVhKWx6QkA2LY7Dlk0YSY,3473
|
|
783
786
|
datahub/metadata/schemas/DatasetProfile.avsc,sha256=3ZCU9JD6l2razACp0AY6LLMgnkMTj6D_5Xk9np6WWRM,9965
|
|
784
787
|
datahub/metadata/schemas/DatasetProperties.avsc,sha256=DFJn75feqaoQk84zin_o_lqsFFhqkwya5LGC5LLJXbU,4209
|
|
785
788
|
datahub/metadata/schemas/DatasetUpstreamLineage.avsc,sha256=PjAWPbsqwH7FjX2kFDy0dE6ENYOwRynH9vJerWisr2A,5365
|
|
@@ -832,7 +835,7 @@ datahub/metadata/schemas/GlossaryTermKey.avsc,sha256=00paBmYoFIlIUebS0X6BL9y5xqn
|
|
|
832
835
|
datahub/metadata/schemas/GlossaryTerms.avsc,sha256=ogOFO6Hr2Xb7s1JHqxsCPY8r_qY_9kwu69k5-E3j2BM,7123
|
|
833
836
|
datahub/metadata/schemas/GroupMembership.avsc,sha256=wT3Hbpv2Z7V4X_-rIoed0cukAOMyYEL93udK8mMCjn0,557
|
|
834
837
|
datahub/metadata/schemas/IcebergCatalogInfo.avsc,sha256=X9Ejqzn1DyxNIth7vDhtPjGG4xMPQMhl7f-S7fBFxek,691
|
|
835
|
-
datahub/metadata/schemas/IcebergWarehouseInfo.avsc,sha256=
|
|
838
|
+
datahub/metadata/schemas/IcebergWarehouseInfo.avsc,sha256=0m7cQm8cCnBWNI5jGGgr5ZdOg66RQGWSf3gf8ay53So,2705
|
|
836
839
|
datahub/metadata/schemas/IncidentInfo.avsc,sha256=L8xldmWyOW4Ml2Fm9XTRL13lP1CAEP0kgXsd_jLZaEU,12425
|
|
837
840
|
datahub/metadata/schemas/IncidentKey.avsc,sha256=Pip__DyNNTal7NxryM3kFi9qHlwntp1rIA8Al8Zz264,542
|
|
838
841
|
datahub/metadata/schemas/IncidentSource.avsc,sha256=lY_SarA3cM55KNENcB5z1Gu2MygxEl9l7R8LdMak9AQ,1199
|
|
@@ -848,17 +851,17 @@ datahub/metadata/schemas/MLFeatureTableKey.avsc,sha256=hVxNEqsx4GgG11GVryn9ms16O
|
|
|
848
851
|
datahub/metadata/schemas/MLFeatureTableProperties.avsc,sha256=BtrqcsxoQXObPZXSGRNYtIBJCoeHkMK_Zr_imBWF2Zk,2008
|
|
849
852
|
datahub/metadata/schemas/MLHyperParam.avsc,sha256=dE6i5r6LTYMNrQe9yy-jKoP09GOJUf__1bO69ldpydc,833
|
|
850
853
|
datahub/metadata/schemas/MLMetric.avsc,sha256=y8WPVVwjhu3YGtqpFFJYNYK8w778RRL_d2sHG1Dc7uM,804
|
|
851
|
-
datahub/metadata/schemas/MLModelDeploymentKey.avsc,sha256=
|
|
854
|
+
datahub/metadata/schemas/MLModelDeploymentKey.avsc,sha256=EcA0z4sQfqe3IJ8PO8cGW34XMxc9Q2BbCBjgkJTaznE,2685
|
|
852
855
|
datahub/metadata/schemas/MLModelDeploymentProperties.avsc,sha256=7IlGrMmX8nfgezvaZyrXskCTCRlwvRzGOYUOpFV3r6Y,5480
|
|
853
856
|
datahub/metadata/schemas/MLModelFactorPrompts.avsc,sha256=8kX-P4F4mVLFT980z3MwIautt1_6uA-c_Z87nYNDK-k,2712
|
|
854
|
-
datahub/metadata/schemas/MLModelGroupKey.avsc,sha256=
|
|
857
|
+
datahub/metadata/schemas/MLModelGroupKey.avsc,sha256=zIyIHI-23i_oQMbc1sigar9sJNJsa6CYfHHy-nH5IXE,2779
|
|
855
858
|
datahub/metadata/schemas/MLModelGroupProperties.avsc,sha256=AZ5Pohk3_pCctQ4hcE1UOURQFYHQne0dw_lRUpOu5WY,6924
|
|
856
|
-
datahub/metadata/schemas/MLModelKey.avsc,sha256=
|
|
859
|
+
datahub/metadata/schemas/MLModelKey.avsc,sha256=zwoY9opTL5tMm5aoRHoWcNv5DjERYS-hWR05kVFlcTw,3148
|
|
857
860
|
datahub/metadata/schemas/MLModelProperties.avsc,sha256=hDCBHxGe-cmCBeU1k0ANuQlKjtZsDcTfl2X_jWmtFqo,12355
|
|
858
861
|
datahub/metadata/schemas/MLPrimaryKeyKey.avsc,sha256=F3lgpMnHBhXsqGncHE9x06P-0RiNCrzbUUWlMkPJxFI,1132
|
|
859
862
|
datahub/metadata/schemas/MLPrimaryKeyProperties.avsc,sha256=URIuOpS93RVk8MZVcbZ-dmTwu_cN3KSOKxSR8fm-eTo,6744
|
|
860
863
|
datahub/metadata/schemas/MLTrainingRunProperties.avsc,sha256=WGgj0MuQrGD4UgvyHCJHzTnHja2LlJTOr1gLu8SySj0,4269
|
|
861
|
-
datahub/metadata/schemas/MetadataChangeEvent.avsc,sha256=
|
|
864
|
+
datahub/metadata/schemas/MetadataChangeEvent.avsc,sha256=kwEwLUSPuqdrx_7uOX2XnEZ6Olm4p2ezYt0bTQSgaTk,377034
|
|
862
865
|
datahub/metadata/schemas/MetadataChangeLog.avsc,sha256=Cf5eECeShCA_XHFr2MRhRQpPE61F6Xv-z1jjoBLJLgc,12239
|
|
863
866
|
datahub/metadata/schemas/MetadataChangeProposal.avsc,sha256=tvO5cGIqZAIvUbMon1RAKgSY4E0jvBqT5VmLWAuNGkY,9770
|
|
864
867
|
datahub/metadata/schemas/Metrics.avsc,sha256=O7DJGjOwmHbb1x_Zj7AuM_HaHKjBvkfJKfUsX8icXD4,690
|
|
@@ -1075,8 +1078,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
1075
1078
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
1076
1079
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
1077
1080
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
1078
|
-
acryl_datahub-1.1.0.
|
|
1079
|
-
acryl_datahub-1.1.0.
|
|
1080
|
-
acryl_datahub-1.1.0.
|
|
1081
|
-
acryl_datahub-1.1.0.
|
|
1082
|
-
acryl_datahub-1.1.0.
|
|
1081
|
+
acryl_datahub-1.1.0.5rc1.dist-info/METADATA,sha256=sKbnEOEUAOv5hgUfoytH-EIKqUc7QxK96ENS7SZUIPs,182347
|
|
1082
|
+
acryl_datahub-1.1.0.5rc1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
1083
|
+
acryl_datahub-1.1.0.5rc1.dist-info/entry_points.txt,sha256=bnGf6eX9UhiW8yVHtt6MJCVcmLErvrVQxTJAayA-PKc,9885
|
|
1084
|
+
acryl_datahub-1.1.0.5rc1.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
1085
|
+
acryl_datahub-1.1.0.5rc1.dist-info/RECORD,,
|
|
@@ -39,6 +39,7 @@ datahub-business-glossary = datahub.ingestion.source.metadata.business_glossary:
|
|
|
39
39
|
datahub-debug = datahub.ingestion.source.debug.datahub_debug:DataHubDebugSource
|
|
40
40
|
datahub-gc = datahub.ingestion.source.gc.datahub_gc:DataHubGcSource
|
|
41
41
|
datahub-lineage-file = datahub.ingestion.source.metadata.lineage:LineageFileSource
|
|
42
|
+
datahub-mock-data = datahub.ingestion.source.mock_data.datahub_mock_data:DataHubMockDataSource
|
|
42
43
|
dbt = datahub.ingestion.source.dbt.dbt_core:DBTCoreSource
|
|
43
44
|
dbt-cloud = datahub.ingestion.source.dbt.dbt_cloud:DBTCloudSource
|
|
44
45
|
delta-lake = datahub.ingestion.source.delta_lake:DeltaLakeSource
|
datahub/_version.py
CHANGED
datahub/ingestion/api/sink.py
CHANGED
|
@@ -147,6 +147,9 @@ class Sink(Generic[SinkConfig, SinkReportType], Closeable, metaclass=ABCMeta):
|
|
|
147
147
|
def close(self) -> None:
|
|
148
148
|
pass
|
|
149
149
|
|
|
150
|
+
def flush(self) -> None:
|
|
151
|
+
pass
|
|
152
|
+
|
|
150
153
|
def configured(self) -> str:
|
|
151
154
|
"""Override this method to output a human-readable and scrubbed version of the configured sink"""
|
|
152
155
|
return ""
|
|
@@ -502,7 +502,7 @@ class Pipeline:
|
|
|
502
502
|
self._handle_uncaught_pipeline_exception(exc)
|
|
503
503
|
finally:
|
|
504
504
|
clear_global_warnings()
|
|
505
|
-
|
|
505
|
+
self.sink.flush()
|
|
506
506
|
self._notify_reporters_on_ingestion_completion()
|
|
507
507
|
|
|
508
508
|
def transform(self, records: Iterable[RecordEnvelope]) -> Iterable[RecordEnvelope]:
|
|
@@ -5,6 +5,7 @@ import functools
|
|
|
5
5
|
import logging
|
|
6
6
|
import os
|
|
7
7
|
import threading
|
|
8
|
+
import time
|
|
8
9
|
import uuid
|
|
9
10
|
from enum import auto
|
|
10
11
|
from typing import List, Optional, Tuple, Union
|
|
@@ -346,6 +347,17 @@ class DatahubRestSink(Sink[DatahubRestSinkConfig, DataHubRestSinkReport]):
|
|
|
346
347
|
RecordEnvelope(item, metadata={}), NoopWriteCallback()
|
|
347
348
|
)
|
|
348
349
|
|
|
350
|
+
def flush(self) -> None:
|
|
351
|
+
"""Wait for all pending records to be written."""
|
|
352
|
+
i = 0
|
|
353
|
+
while self.report.pending_requests > 0:
|
|
354
|
+
time.sleep(0.1)
|
|
355
|
+
i += 1
|
|
356
|
+
if i % 1000 == 0:
|
|
357
|
+
logger.info(
|
|
358
|
+
f"Waiting for {self.report.pending_requests} records to be written"
|
|
359
|
+
)
|
|
360
|
+
|
|
349
361
|
def close(self):
|
|
350
362
|
with self.report.main_thread_blocking_timer:
|
|
351
363
|
self.executor.shutdown()
|
|
File without changes
|
|
@@ -0,0 +1,384 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Dict, Iterable, List, Optional, Tuple
|
|
3
|
+
|
|
4
|
+
from pydantic import Field
|
|
5
|
+
|
|
6
|
+
from datahub.configuration.common import ConfigModel
|
|
7
|
+
from datahub.emitter.mce_builder import make_dataset_urn
|
|
8
|
+
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
9
|
+
from datahub.ingestion.api.common import PipelineContext
|
|
10
|
+
from datahub.ingestion.api.decorators import (
|
|
11
|
+
SupportStatus,
|
|
12
|
+
config_class,
|
|
13
|
+
platform_name,
|
|
14
|
+
support_status,
|
|
15
|
+
)
|
|
16
|
+
from datahub.ingestion.api.source import Source, SourceReport
|
|
17
|
+
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
18
|
+
from datahub.ingestion.source.mock_data.table_naming_helper import TableNamingHelper
|
|
19
|
+
from datahub.metadata.schema_classes import (
|
|
20
|
+
DatasetLineageTypeClass,
|
|
21
|
+
StatusClass,
|
|
22
|
+
SubTypesClass,
|
|
23
|
+
UpstreamClass,
|
|
24
|
+
UpstreamLineageClass,
|
|
25
|
+
)
|
|
26
|
+
from datahub.utilities.str_enum import StrEnum
|
|
27
|
+
|
|
28
|
+
logger = logging.getLogger(__name__)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class SubTypePattern(StrEnum):
|
|
32
|
+
ALTERNATING = "alternating"
|
|
33
|
+
ALL_TABLE = "all_table"
|
|
34
|
+
ALL_VIEW = "all_view"
|
|
35
|
+
LEVEL_BASED = "level_based"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class LineageConfigGen1(ConfigModel):
|
|
39
|
+
"""
|
|
40
|
+
Configuration for generating mock lineage data for testing purposes.
|
|
41
|
+
|
|
42
|
+
This configuration controls how the mock data source generates a hierarchical
|
|
43
|
+
lineage graph with multiple levels of upstream/downstream relationships.
|
|
44
|
+
|
|
45
|
+
The lineage graph is structured as follows:
|
|
46
|
+
- Level 0: 1 table (root)
|
|
47
|
+
- Level 1: lineage_fan_out tables (each connected to the root)
|
|
48
|
+
- Level 2+: If lineage_fan_out_after_first_hop is set, uses that value;
|
|
49
|
+
otherwise uses lineage_fan_out^level tables (each connected to a level 1 table)
|
|
50
|
+
- ... and so on for lineage_hops levels
|
|
51
|
+
|
|
52
|
+
Examples:
|
|
53
|
+
- With lineage_fan_out=2, lineage_hops=1: Creates 3 tables total
|
|
54
|
+
(1 root + 2 downstream) with 2 lineage relationships
|
|
55
|
+
- With lineage_fan_out=3, lineage_hops=2: Creates 13 tables total
|
|
56
|
+
(1 + 3 + 9) with 12 lineage relationships
|
|
57
|
+
- With lineage_fan_out=4, lineage_hops=1: Creates 5 tables total
|
|
58
|
+
(1 + 4) with 4 lineage relationships
|
|
59
|
+
- With lineage_fan_out=3, lineage_hops=3, lineage_fan_out_after_first_hop=2:
|
|
60
|
+
Creates 1 + 3 + 6 + 12 = 22 tables total (prevents exponential growth)
|
|
61
|
+
|
|
62
|
+
Table naming convention: "hops_{lineage_hops}_f_{lineage_fan_out}_h{level}_t{table_index}"
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
emit_lineage: bool = Field(
|
|
66
|
+
default=False,
|
|
67
|
+
description="Whether to emit lineage data for testing purposes. When False, no lineage data is generated regardless of other settings.",
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
lineage_fan_out: int = Field(
|
|
71
|
+
default=3,
|
|
72
|
+
description="Number of downstream tables that each upstream table connects to. This controls the 'width' of the lineage graph. Higher values create more parallel downstream tables per level.",
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
lineage_hops: int = Field(
|
|
76
|
+
default=2,
|
|
77
|
+
description="Number of hops (levels) in the lineage graph. This controls the 'depth' of the lineage graph. Level 0 is the root table, and each subsequent level contains downstream tables. Higher values create deeper lineage chains.",
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
lineage_fan_out_after_first_hop: Optional[int] = Field(
|
|
81
|
+
default=None,
|
|
82
|
+
description="Optional limit on fanout for hops after the first hop. When set, prevents exponential growth by limiting the number of downstream tables per upstream table at levels 2 and beyond. When None, uses the standard exponential growth (lineage_fan_out^level).",
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
subtype_pattern: SubTypePattern = Field(
|
|
86
|
+
default=SubTypePattern.ALTERNATING,
|
|
87
|
+
description="Pattern for determining SubTypes. Options: 'alternating', 'all_table', 'all_view', 'level_based'",
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
level_subtypes: Dict[int, str] = Field(
|
|
91
|
+
default={0: "Table", 1: "View", 2: "Table"},
|
|
92
|
+
description="Mapping of level to subtype for level_based pattern",
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
class DataHubMockDataConfig(ConfigModel):
|
|
97
|
+
enabled: bool = Field(
|
|
98
|
+
default=True,
|
|
99
|
+
description="Whether this source is enabled",
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
gen_1: LineageConfigGen1 = Field(
|
|
103
|
+
default_factory=LineageConfigGen1,
|
|
104
|
+
description="Configuration for lineage data generation",
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
@platform_name("DataHubMockData")
|
|
109
|
+
@config_class(DataHubMockDataConfig)
|
|
110
|
+
@support_status(SupportStatus.TESTING)
|
|
111
|
+
class DataHubMockDataSource(Source):
|
|
112
|
+
"""
|
|
113
|
+
This source is for generating mock data for testing purposes.
|
|
114
|
+
Expect breaking changes as we iterate on the mock data source.
|
|
115
|
+
"""
|
|
116
|
+
|
|
117
|
+
def __init__(self, ctx: PipelineContext, config: DataHubMockDataConfig):
|
|
118
|
+
self.ctx = ctx
|
|
119
|
+
self.config = config
|
|
120
|
+
self.report = SourceReport()
|
|
121
|
+
|
|
122
|
+
def get_workunits(self) -> Iterable[MetadataWorkUnit]:
|
|
123
|
+
# We don't want any implicit aspects to be produced
|
|
124
|
+
# so we are not using get_workunits_internal
|
|
125
|
+
if self.config.gen_1.emit_lineage:
|
|
126
|
+
for wu in self._data_gen_1():
|
|
127
|
+
self.report.report_workunit(wu)
|
|
128
|
+
yield wu
|
|
129
|
+
|
|
130
|
+
yield from []
|
|
131
|
+
|
|
132
|
+
def _calculate_lineage_tables(
|
|
133
|
+
self, fan_out: int, hops: int, fan_out_after_first: Optional[int] = None
|
|
134
|
+
) -> Tuple[int, List[int]]:
|
|
135
|
+
"""
|
|
136
|
+
Calculate the total number of tables and tables at each level for lineage generation.
|
|
137
|
+
|
|
138
|
+
Args:
|
|
139
|
+
fan_out: Number of downstream tables per upstream table at level 1
|
|
140
|
+
hops: Number of hops (levels) in the lineage graph
|
|
141
|
+
fan_out_after_first: Optional limit on fanout for hops after the first hop
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
Tuple of (total_tables, tables_at_levels) where tables_at_levels is a list
|
|
145
|
+
containing the number of tables at each level (index 0 = level 0, etc.)
|
|
146
|
+
"""
|
|
147
|
+
tables_to_be_created = 0
|
|
148
|
+
tables_at_levels: List[int] = []
|
|
149
|
+
|
|
150
|
+
for i in range(hops + 1):
|
|
151
|
+
if i == 0:
|
|
152
|
+
# Level 0: always 1 table
|
|
153
|
+
tables_at_level = 1
|
|
154
|
+
elif i == 1:
|
|
155
|
+
# Level 1: uses lineage_fan_out
|
|
156
|
+
tables_at_level = fan_out
|
|
157
|
+
else:
|
|
158
|
+
# Level 2+: use fan_out_after_first_hop if set, otherwise exponential growth
|
|
159
|
+
if fan_out_after_first is not None:
|
|
160
|
+
# Each table at previous level creates fan_out_after_first tables
|
|
161
|
+
tables_at_level = tables_at_levels[i - 1] * fan_out_after_first
|
|
162
|
+
else:
|
|
163
|
+
# Original exponential behavior
|
|
164
|
+
tables_at_level = fan_out**i
|
|
165
|
+
|
|
166
|
+
tables_at_levels.append(tables_at_level)
|
|
167
|
+
tables_to_be_created += tables_at_level
|
|
168
|
+
|
|
169
|
+
return tables_to_be_created, tables_at_levels
|
|
170
|
+
|
|
171
|
+
def _calculate_fanout_for_level(
|
|
172
|
+
self, level: int, fan_out: int, fan_out_after_first: Optional[int] = None
|
|
173
|
+
) -> int:
|
|
174
|
+
"""
|
|
175
|
+
Calculate the fanout (number of downstream tables) for a specific level.
|
|
176
|
+
|
|
177
|
+
Args:
|
|
178
|
+
level: The current level (0-based)
|
|
179
|
+
fan_out: Number of downstream tables per upstream table at level 1
|
|
180
|
+
fan_out_after_first: Optional limit on fanout for hops after the first hop
|
|
181
|
+
|
|
182
|
+
Returns:
|
|
183
|
+
The number of downstream tables that each table at this level should connect to
|
|
184
|
+
"""
|
|
185
|
+
if level == 0:
|
|
186
|
+
# Level 0: uses the standard fan_out
|
|
187
|
+
return fan_out
|
|
188
|
+
else:
|
|
189
|
+
# Level 1+: use fan_out_after_first if set, otherwise use fan_out
|
|
190
|
+
return fan_out_after_first if fan_out_after_first is not None else fan_out
|
|
191
|
+
|
|
192
|
+
def _determine_subtype(
|
|
193
|
+
self, table_name: str, table_level: int, table_index: int
|
|
194
|
+
) -> str:
|
|
195
|
+
"""
|
|
196
|
+
Determine subtype based on configured pattern.
|
|
197
|
+
|
|
198
|
+
Args:
|
|
199
|
+
table_name: Name of the table
|
|
200
|
+
table_level: Level of the table in the lineage graph
|
|
201
|
+
table_index: Index of the table within its level
|
|
202
|
+
|
|
203
|
+
Returns:
|
|
204
|
+
The determined subtype ("Table" or "View")
|
|
205
|
+
"""
|
|
206
|
+
pattern = self.config.gen_1.subtype_pattern
|
|
207
|
+
|
|
208
|
+
if pattern == SubTypePattern.ALTERNATING:
|
|
209
|
+
return "Table" if table_index % 2 == 0 else "View"
|
|
210
|
+
elif pattern == SubTypePattern.LEVEL_BASED:
|
|
211
|
+
return self.config.gen_1.level_subtypes.get(table_level, "Table")
|
|
212
|
+
elif pattern == SubTypePattern.ALL_TABLE:
|
|
213
|
+
return "Table"
|
|
214
|
+
elif pattern == SubTypePattern.ALL_VIEW:
|
|
215
|
+
return "View"
|
|
216
|
+
else:
|
|
217
|
+
return "Table" # default
|
|
218
|
+
|
|
219
|
+
def _get_subtypes_aspect(
|
|
220
|
+
self, table_name: str, table_level: int, table_index: int
|
|
221
|
+
) -> MetadataWorkUnit:
|
|
222
|
+
"""
|
|
223
|
+
Create a SubTypes aspect for a table based on deterministic pattern.
|
|
224
|
+
|
|
225
|
+
Args:
|
|
226
|
+
table_name: Name of the table
|
|
227
|
+
table_level: Level of the table in the lineage graph
|
|
228
|
+
table_index: Index of the table within its level
|
|
229
|
+
|
|
230
|
+
Returns:
|
|
231
|
+
MetadataWorkUnit containing the SubTypes aspect
|
|
232
|
+
"""
|
|
233
|
+
# Determine subtype based on pattern
|
|
234
|
+
subtype = self._determine_subtype(table_name, table_level, table_index)
|
|
235
|
+
|
|
236
|
+
urn = make_dataset_urn(platform="fake", name=table_name)
|
|
237
|
+
mcp = MetadataChangeProposalWrapper(
|
|
238
|
+
entityUrn=urn,
|
|
239
|
+
entityType="dataset",
|
|
240
|
+
aspect=SubTypesClass(typeNames=[subtype]),
|
|
241
|
+
)
|
|
242
|
+
return mcp.as_workunit()
|
|
243
|
+
|
|
244
|
+
def _data_gen_1(self) -> Iterable[MetadataWorkUnit]:
|
|
245
|
+
"""Generate mock lineage data for testing purposes."""
|
|
246
|
+
gen_1 = self.config.gen_1
|
|
247
|
+
fan_out = gen_1.lineage_fan_out
|
|
248
|
+
hops = gen_1.lineage_hops
|
|
249
|
+
fan_out_after_first = gen_1.lineage_fan_out_after_first_hop
|
|
250
|
+
|
|
251
|
+
logger.info(
|
|
252
|
+
f"Generating lineage data with fan_out={fan_out}, hops={hops}, fan_out_after_first={fan_out_after_first}"
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
tables_to_be_created, tables_at_levels = self._calculate_lineage_tables(
|
|
256
|
+
fan_out, hops, fan_out_after_first
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
logger.info(
|
|
260
|
+
f"About to create {tables_to_be_created} tables for lineage testing"
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
current_progress = 0
|
|
264
|
+
for i in range(hops + 1):
|
|
265
|
+
tables_at_level = tables_at_levels[i]
|
|
266
|
+
|
|
267
|
+
for j in range(tables_at_level):
|
|
268
|
+
table_name = TableNamingHelper.generate_table_name(hops, fan_out, i, j)
|
|
269
|
+
|
|
270
|
+
yield self._get_status_aspect(table_name)
|
|
271
|
+
|
|
272
|
+
yield self._get_subtypes_aspect(table_name, i, j)
|
|
273
|
+
|
|
274
|
+
yield from self._generate_lineage_for_table(
|
|
275
|
+
table_name=table_name,
|
|
276
|
+
table_level=i,
|
|
277
|
+
table_index=j,
|
|
278
|
+
hops=hops,
|
|
279
|
+
fan_out=fan_out,
|
|
280
|
+
fan_out_after_first=fan_out_after_first,
|
|
281
|
+
tables_at_levels=tables_at_levels,
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
current_progress += 1
|
|
285
|
+
if current_progress % 1000 == 0:
|
|
286
|
+
logger.info(
|
|
287
|
+
f"Progress: {current_progress}/{tables_to_be_created} tables processed"
|
|
288
|
+
)
|
|
289
|
+
|
|
290
|
+
def _generate_lineage_for_table(
|
|
291
|
+
self,
|
|
292
|
+
table_name: str,
|
|
293
|
+
table_level: int,
|
|
294
|
+
table_index: int,
|
|
295
|
+
hops: int,
|
|
296
|
+
fan_out: int,
|
|
297
|
+
fan_out_after_first: Optional[int],
|
|
298
|
+
tables_at_levels: List[int],
|
|
299
|
+
) -> Iterable[MetadataWorkUnit]:
|
|
300
|
+
"""Generate lineage relationships for a specific table."""
|
|
301
|
+
# Only generate lineage if there are downstream levels
|
|
302
|
+
if table_level + 1 > hops:
|
|
303
|
+
return
|
|
304
|
+
|
|
305
|
+
current_fan_out = self._calculate_fanout_for_level(
|
|
306
|
+
table_level, fan_out, fan_out_after_first
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
yield from self._generate_downstream_lineage(
|
|
310
|
+
upstream_table_name=table_name,
|
|
311
|
+
upstream_table_index=table_index,
|
|
312
|
+
upstream_table_level=table_level,
|
|
313
|
+
current_fan_out=current_fan_out,
|
|
314
|
+
hops=hops,
|
|
315
|
+
fan_out=fan_out,
|
|
316
|
+
tables_at_levels=tables_at_levels,
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
def _generate_downstream_lineage(
|
|
320
|
+
self,
|
|
321
|
+
upstream_table_name: str,
|
|
322
|
+
upstream_table_index: int,
|
|
323
|
+
upstream_table_level: int,
|
|
324
|
+
current_fan_out: int,
|
|
325
|
+
hops: int,
|
|
326
|
+
fan_out: int,
|
|
327
|
+
tables_at_levels: List[int],
|
|
328
|
+
) -> Iterable[MetadataWorkUnit]:
|
|
329
|
+
"""Generate lineage relationships to downstream tables."""
|
|
330
|
+
downstream_level = upstream_table_level + 1
|
|
331
|
+
downstream_tables_count = tables_at_levels[downstream_level]
|
|
332
|
+
|
|
333
|
+
# Calculate range of downstream tables this upstream table connects to
|
|
334
|
+
start_downstream = upstream_table_index * current_fan_out
|
|
335
|
+
end_downstream = min(
|
|
336
|
+
(upstream_table_index + 1) * current_fan_out, downstream_tables_count
|
|
337
|
+
)
|
|
338
|
+
|
|
339
|
+
for downstream_index in range(start_downstream, end_downstream):
|
|
340
|
+
downstream_table_name = TableNamingHelper.generate_table_name(
|
|
341
|
+
hops, fan_out, downstream_level, downstream_index
|
|
342
|
+
)
|
|
343
|
+
yield self._get_upstream_aspect(
|
|
344
|
+
upstream_table=upstream_table_name,
|
|
345
|
+
downstream_table=downstream_table_name,
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
def _get_status_aspect(self, table: str) -> MetadataWorkUnit:
|
|
349
|
+
urn = make_dataset_urn(
|
|
350
|
+
platform="fake",
|
|
351
|
+
name=table,
|
|
352
|
+
)
|
|
353
|
+
mcp = MetadataChangeProposalWrapper(
|
|
354
|
+
entityUrn=urn,
|
|
355
|
+
entityType="dataset",
|
|
356
|
+
aspect=StatusClass(removed=False),
|
|
357
|
+
)
|
|
358
|
+
return mcp.as_workunit()
|
|
359
|
+
|
|
360
|
+
def _get_upstream_aspect(
|
|
361
|
+
self, upstream_table: str, downstream_table: str
|
|
362
|
+
) -> MetadataWorkUnit:
|
|
363
|
+
mcp = MetadataChangeProposalWrapper(
|
|
364
|
+
entityUrn=make_dataset_urn(
|
|
365
|
+
platform="fake",
|
|
366
|
+
name=downstream_table,
|
|
367
|
+
),
|
|
368
|
+
entityType="dataset",
|
|
369
|
+
aspect=UpstreamLineageClass(
|
|
370
|
+
upstreams=[
|
|
371
|
+
UpstreamClass(
|
|
372
|
+
dataset=make_dataset_urn(
|
|
373
|
+
platform="fake",
|
|
374
|
+
name=upstream_table,
|
|
375
|
+
),
|
|
376
|
+
type=DatasetLineageTypeClass.TRANSFORMED,
|
|
377
|
+
)
|
|
378
|
+
],
|
|
379
|
+
),
|
|
380
|
+
)
|
|
381
|
+
return mcp.as_workunit()
|
|
382
|
+
|
|
383
|
+
def get_report(self) -> SourceReport:
|
|
384
|
+
return self.report
|