acryl-datahub 1.1.0.4rc3__py3-none-any.whl → 1.1.0.5rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (26) hide show
  1. {acryl_datahub-1.1.0.4rc3.dist-info → acryl_datahub-1.1.0.5rc1.dist-info}/METADATA +2355 -2355
  2. {acryl_datahub-1.1.0.4rc3.dist-info → acryl_datahub-1.1.0.5rc1.dist-info}/RECORD +26 -23
  3. {acryl_datahub-1.1.0.4rc3.dist-info → acryl_datahub-1.1.0.5rc1.dist-info}/entry_points.txt +1 -0
  4. datahub/_version.py +1 -1
  5. datahub/ingestion/api/sink.py +3 -0
  6. datahub/ingestion/run/pipeline.py +1 -1
  7. datahub/ingestion/sink/datahub_rest.py +12 -0
  8. datahub/ingestion/source/mock_data/__init__.py +0 -0
  9. datahub/ingestion/source/mock_data/datahub_mock_data.py +384 -0
  10. datahub/ingestion/source/mock_data/table_naming_helper.py +91 -0
  11. datahub/ingestion/source/preset.py +1 -1
  12. datahub/metadata/_internal_schema_classes.py +3 -0
  13. datahub/metadata/schema.avsc +2 -0
  14. datahub/metadata/schemas/ContainerProperties.avsc +2 -0
  15. datahub/metadata/schemas/DataFlowInfo.avsc +2 -0
  16. datahub/metadata/schemas/DataJobInfo.avsc +2 -0
  17. datahub/metadata/schemas/DataProcessKey.avsc +2 -0
  18. datahub/metadata/schemas/DatasetKey.avsc +2 -0
  19. datahub/metadata/schemas/IcebergWarehouseInfo.avsc +2 -0
  20. datahub/metadata/schemas/MLModelDeploymentKey.avsc +2 -0
  21. datahub/metadata/schemas/MLModelGroupKey.avsc +2 -0
  22. datahub/metadata/schemas/MLModelKey.avsc +2 -0
  23. datahub/metadata/schemas/MetadataChangeEvent.avsc +2 -0
  24. {acryl_datahub-1.1.0.4rc3.dist-info → acryl_datahub-1.1.0.5rc1.dist-info}/WHEEL +0 -0
  25. {acryl_datahub-1.1.0.4rc3.dist-info → acryl_datahub-1.1.0.5rc1.dist-info}/licenses/LICENSE +0 -0
  26. {acryl_datahub-1.1.0.4rc3.dist-info → acryl_datahub-1.1.0.5rc1.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,7 @@
1
- acryl_datahub-1.1.0.4rc3.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
1
+ acryl_datahub-1.1.0.5rc1.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
2
2
  datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
3
3
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
4
- datahub/_version.py,sha256=OF251LJLh7moYp7lXruZj0uH4nIUOIgEh8RcvTFCPqU,323
4
+ datahub/_version.py,sha256=TZdOK9cFifb1yYky21qrv8R4D8Q8htQJWiKxMGKJpS0,323
5
5
  datahub/entrypoints.py,sha256=H-YFTvxTJOgpWsFBVlxyb1opjkq-hjTzNmjy5Fq3RHg,8992
6
6
  datahub/errors.py,sha256=p5rFAdAGVCk4Lqolol1YvthceadUSwpaCxLXRcyCCFQ,676
7
7
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -150,7 +150,7 @@ datahub/ingestion/api/pipeline_run_listener.py,sha256=5uBP__LbMQxJ2utlf07cIzQINq
150
150
  datahub/ingestion/api/registry.py,sha256=LbdZr89465Lj7ptQRVB4vI1JR1igWABvQFj9-WX63bI,7454
151
151
  datahub/ingestion/api/report.py,sha256=eM_TWWz6iJNd-c_S2_4eg2qKLGYP8vSROb_TMiCwBhY,4644
152
152
  datahub/ingestion/api/report_helpers.py,sha256=WbUC1kQeaKqIagGV3XzfPmPs7slAT1mfNY4og2BH2A8,994
153
- datahub/ingestion/api/sink.py,sha256=nfal7nsYY1AT2WQRjqO48uAHitpjax7TsRVzYXnqbeM,4918
153
+ datahub/ingestion/api/sink.py,sha256=GZt48PV56FAhNoma-V5EwwRZvezhb40YH_zprm8_Yo0,4961
154
154
  datahub/ingestion/api/source.py,sha256=hYwh4LHcG5RS6xQ9QAh5Zlijjl6r1JaibKPb75Hne7A,19518
155
155
  datahub/ingestion/api/source_helpers.py,sha256=XT9y5HgfVeF52jrX39vlLn1SdXpLVyT2Su8oGNsddYo,21148
156
156
  datahub/ingestion/api/transform.py,sha256=X0GpjMJzYkLuZx8MTWxH50cWGm9rGsnn3k188mmC8J8,582
@@ -189,7 +189,7 @@ datahub/ingestion/reporting/file_reporter.py,sha256=tiWukmMxHrTQI3rOAumsq6lRlw8T
189
189
  datahub/ingestion/reporting/reporting_provider_registry.py,sha256=jTYSh3T4sensjnHQfPLiIcbA2dG8w0px9ghChAJjGdU,310
190
190
  datahub/ingestion/run/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
191
191
  datahub/ingestion/run/connection.py,sha256=mngNzr5aRLUDa5Izqxa0xkdDEqEqcDuacWSKIlkdvPc,1483
192
- datahub/ingestion/run/pipeline.py,sha256=h_WAAgORQDVeMO0FmYeLBsAzN9WsDU8yKGwBDzLC2FM,29910
192
+ datahub/ingestion/run/pipeline.py,sha256=TYE1Vm144uHFmqEsrJcbrD0fcg2M-ZvYEGGGbIp1Rmk,29943
193
193
  datahub/ingestion/run/pipeline_config.py,sha256=joG1j9OlwJhb8zqv4TY6_FSzOaKOx6xsBu255A5lP8g,4101
194
194
  datahub/ingestion/run/sink_callback.py,sha256=xZAzaKkR0dcQP838pMJWsA52oaQXV5BiqXkpxEpJ_9U,2856
195
195
  datahub/ingestion/sink/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -197,7 +197,7 @@ datahub/ingestion/sink/blackhole.py,sha256=-jYcWo4i8q7312bCIoHrGr7nT9JdPvA7c4jvS
197
197
  datahub/ingestion/sink/console.py,sha256=TZfhA0Ec2eNCrMH7RRy2JOdUE-U-hkoIQrPm1CmKLQs,591
198
198
  datahub/ingestion/sink/datahub_kafka.py,sha256=_cjuXu5I6G0zJ2UK7hMbaKjMPZXeIwRMgm7CVeTiNtc,2578
199
199
  datahub/ingestion/sink/datahub_lite.py,sha256=7u2aWm7ENLshKHl-PkjJg6Mrw4bWs8sTfKIBz4mm8Ak,1879
200
- datahub/ingestion/sink/datahub_rest.py,sha256=xXCYqYMc9DuNQv22DBYrEpJFvGBrKVPmZuaZRbMOONA,13089
200
+ datahub/ingestion/sink/datahub_rest.py,sha256=DOhtTHqKpmqgI3rUY9ri2QZAyXYDFINWMG6ne7VYUXI,13463
201
201
  datahub/ingestion/sink/file.py,sha256=SxXJPJpkIGoaqRjCcSmj2ZE3xE4rLlBABBGwpTj5LWI,3271
202
202
  datahub/ingestion/sink/sink_registry.py,sha256=JRBWx8qEYg0ubSTyhqwgSWctgxwyp6fva9GoN2LwBao,490
203
203
  datahub/ingestion/source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -218,7 +218,7 @@ datahub/ingestion/source/mongodb.py,sha256=2C2Cxn8DXL53IbNiywIuKt8UT_EMcPg9f8su-
218
218
  datahub/ingestion/source/nifi.py,sha256=2jxSzuHIRQFF7DLoceGbUd_10lkhHRlqA9hhApSt3Yw,56882
219
219
  datahub/ingestion/source/openapi.py,sha256=VaR2xYaH1IhvRixpTBC7-168F74eIIyKiEKb5EqTO64,19253
220
220
  datahub/ingestion/source/openapi_parser.py,sha256=T87e2r-oPGgQl_FDMHnSGFZzApvWDCyKWnzIrVI5Alo,15420
221
- datahub/ingestion/source/preset.py,sha256=bbh0ZWiAZMy2zuJDmaRY07_OuGJ9tdtKjwvIxqbY5II,3964
221
+ datahub/ingestion/source/preset.py,sha256=1goxuFoLw50dokr2gp1MhUrDJ8CFNX-wBZIvv7laEXA,3966
222
222
  datahub/ingestion/source/pulsar.py,sha256=u5F8QnCLJsht5-7XCiUTsnfhCPIpKVB_l32CgMCU-As,20187
223
223
  datahub/ingestion/source/redash.py,sha256=YxjSad-X_wPmxYH8dJmFz_VCFhiLTCTSlK99WdvcYiA,30653
224
224
  datahub/ingestion/source/salesforce.py,sha256=CQtDFv1OsbC1vyzNbKOc6GxhFQ5GdYj45hgAF0-oIcw,40487
@@ -386,6 +386,9 @@ datahub/ingestion/source/looker/view_upstream.py,sha256=4FCjZaU6p2G7npB2RJpP4Gv2
386
386
  datahub/ingestion/source/metadata/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
387
387
  datahub/ingestion/source/metadata/business_glossary.py,sha256=T_RJHst6iQRghJNmLLPeSBMEDsbEKf3yBldOAgMcGuo,19666
388
388
  datahub/ingestion/source/metadata/lineage.py,sha256=PA4JwSeQ-30XFMN4O5tPwIu-hZF1e-xMZ_CnEUE2c-Q,9595
389
+ datahub/ingestion/source/mock_data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
390
+ datahub/ingestion/source/mock_data/datahub_mock_data.py,sha256=i19UFii3wOPG5CVQfV_20MwIRAJjr0TxsIiVjKcBND4,14314
391
+ datahub/ingestion/source/mock_data/table_naming_helper.py,sha256=oIC1vcOx76Vl63O9kcjP_iInBHyS-ATdN3Y932TCCZg,3283
389
392
  datahub/ingestion/source/neo4j/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
390
393
  datahub/ingestion/source/neo4j/neo4j_source.py,sha256=JqKCwxBJfOrC8SF7CmDG0cseWxHk_7E2v4Diw3Q0-WM,14181
391
394
  datahub/ingestion/source/powerbi/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -608,8 +611,8 @@ datahub/lite/lite_registry.py,sha256=bpH0kasP-LtwwUFNA2QsOIehfekAYfJtN-AkQLmSWnw
608
611
  datahub/lite/lite_server.py,sha256=p9Oa2nNs65mqcssSIVOr7VOzWqfVstz6ZQEdT4f82S0,1949
609
612
  datahub/lite/lite_util.py,sha256=G0LQHKkyEb1pc_q183g6hflShclGx7kikgMaOxtVVcs,4545
610
613
  datahub/metadata/__init__.py,sha256=AjhXPjI6cnpdcrBRrE5gOWo15vv2TTl2ctU4UAnUN7A,238
611
- datahub/metadata/_internal_schema_classes.py,sha256=_TONNRIlWJ5xKruHRzmgjg73T5BUc8Z1xgw4yBaaZ5Q,1019193
612
- datahub/metadata/schema.avsc,sha256=ICovNj8D1XGTQUGA7x9gbjNTJ9li00a6MSz8_UvZN2s,707166
614
+ datahub/metadata/_internal_schema_classes.py,sha256=UACA9XSSdTueZTUW4v_4OWIsWga3T99I3gnGAPCe2w8,1019257
615
+ datahub/metadata/schema.avsc,sha256=GsQZCPSD3_KKXvGALSzgTgIONL0r3tXme9M4rXQy_q4,707244
613
616
  datahub/metadata/schema_classes.py,sha256=tPT8iHCak4IsZi_oL0nirbPpI8ETTPTZzapqLRpeKU4,1326
614
617
  datahub/metadata/urns.py,sha256=nfrCTExR-k2P9w272WVtWSN3xW1VUJngPwP3xnvULjU,1217
615
618
  datahub/metadata/_urns/__init__.py,sha256=cOF3GHMDgPhmbLKbN02NPpuLGHSu0qNgQyBRv08eqF0,243
@@ -711,7 +714,7 @@ datahub/metadata/schemas/ChartQuery.avsc,sha256=6qz8Q5wa5h0GS6QcwfzVZrRi26PoEwXy
711
714
  datahub/metadata/schemas/ChartUsageStatistics.avsc,sha256=FjEEPj_19jUvJcS9cyZtHqByQyPdt2xhmxfFdw9mSM4,5881
712
715
  datahub/metadata/schemas/Container.avsc,sha256=pEpRQgClrJRm59eAiltc4YoP0pg7TG6Bu-ENCh11-mw,813
713
716
  datahub/metadata/schemas/ContainerKey.avsc,sha256=5wUgUbR1zzNcSpeDOHm4XWFf_xDtXFuIikbjKXMFe80,939
714
- datahub/metadata/schemas/ContainerProperties.avsc,sha256=PfWz9Nd6ib3dVzdPwJp62iLyqynsYrWrrLdqk5oWN4g,5038
717
+ datahub/metadata/schemas/ContainerProperties.avsc,sha256=BPhrRf4I7UImQhB-55Qby82MXKuDQL0cwlvdg0aZEHs,5112
715
718
  datahub/metadata/schemas/CorpGroupEditableInfo.avsc,sha256=sItWMAGfQdYwmF5xLzMnYGl_rtXvGlxPbEjmRm1E6u4,1290
716
719
  datahub/metadata/schemas/CorpGroupInfo.avsc,sha256=G83lndR7C6WPfccFg7qFE5Ely5vrdxC8x7zJB3fW3Sw,4740
717
720
  datahub/metadata/schemas/CorpGroupKey.avsc,sha256=B_RMHAFF_nd86qtO2p1slAZnxruCmBM7DUfILAU_UaI,953
@@ -728,7 +731,7 @@ datahub/metadata/schemas/DashboardUsageStatistics.avsc,sha256=pUAKqs49Wy5pAL92g_
728
731
  datahub/metadata/schemas/DataContractKey.avsc,sha256=m0ej_Wu7NcuZQCRwQI3Sidfv9bUy5mvuhlpgax6i1xA,511
729
732
  datahub/metadata/schemas/DataContractProperties.avsc,sha256=RCxuJMlZwqEE0iHTpuXvcH6zRFoOt7ysQFPrJRp3RqE,4763
730
733
  datahub/metadata/schemas/DataContractStatus.avsc,sha256=5yvT43AIB13Dn_h0-4s7fsL7BTuXhkK5pi2KJug4_qg,1029
731
- datahub/metadata/schemas/DataFlowInfo.avsc,sha256=hcceLpSVLSx5N4YEPTDtXMOqLF0bWCQE5WNltQ-PJXo,4884
734
+ datahub/metadata/schemas/DataFlowInfo.avsc,sha256=SOXI26Vgv3O-t9z6_ff0gzVL5-M1XBlCOUCI9vB-jpk,4958
732
735
  datahub/metadata/schemas/DataFlowKey.avsc,sha256=lIXr1oVJIHxOEibTx1YWFhGY2VQyWs9AW65eePPmdXI,1345
733
736
  datahub/metadata/schemas/DataHubAccessTokenInfo.avsc,sha256=WS77M5w7GJFxUAiyXaxUvBqO0XFV2FnKPxXSXYbXHTE,1646
734
737
  datahub/metadata/schemas/DataHubAccessTokenKey.avsc,sha256=3EspNIxgb_I4WwV0a2o4NJOB5yODVr9J-wZzkZanEgo,483
@@ -755,7 +758,7 @@ datahub/metadata/schemas/DataHubUpgradeRequest.avsc,sha256=3xYsf3XVHBcclHdRnLVMo
755
758
  datahub/metadata/schemas/DataHubUpgradeResult.avsc,sha256=VydVb4yqjIviR73-T6TooF6OiahwxTHciP97NuF4qvI,1385
756
759
  datahub/metadata/schemas/DataHubViewInfo.avsc,sha256=U3fBIoG9ietLUpOknfQGNekqBdPQYwvhhv9RQv6gEeg,11642
757
760
  datahub/metadata/schemas/DataHubViewKey.avsc,sha256=p53axIdSVbubo3r23Vpsed7NqRcQBMGveVikEHAVAok,424
758
- datahub/metadata/schemas/DataJobInfo.avsc,sha256=gU8aGuDp1PDlu-fBTH04CDofx9qpegxGMwG46UaGLSs,7414
761
+ datahub/metadata/schemas/DataJobInfo.avsc,sha256=Bc9qdDcXI0GQdEgNTpgHaBbnrppDKQ-1xR26diOSVIQ,7488
759
762
  datahub/metadata/schemas/DataJobInputOutput.avsc,sha256=H1O8eAzZV34tvULdu67iBSWkdn08rt7wS208b8Nisbk,15268
760
763
  datahub/metadata/schemas/DataJobKey.avsc,sha256=S7egH8jWjKW52MG6Pg7plDoP15XfTTiMde5V6nR6ycE,1624
761
764
  datahub/metadata/schemas/DataPlatformInfo.avsc,sha256=WGPFumBNHbR75vsLrivnRCbBc8vSCuxDw2UlylMieh4,2686
@@ -770,7 +773,7 @@ datahub/metadata/schemas/DataProcessInstanceOutput.avsc,sha256=xyGBUf3vFHrMLtmZj
770
773
  datahub/metadata/schemas/DataProcessInstanceProperties.avsc,sha256=2qsDFeSA2-ag5IVetgD8mW2k--F6CwmYXM3KOE6edU8,3836
771
774
  datahub/metadata/schemas/DataProcessInstanceRelationships.avsc,sha256=VhBpnyGGvO06WEnM6zy4PmjiT0nivRQfkSdJCUgIavw,2358
772
775
  datahub/metadata/schemas/DataProcessInstanceRunEvent.avsc,sha256=zwTYULEnpMbqwkLN8NbXW9PQWFG4X6TZkZwTQ1Wb53Y,6713
773
- datahub/metadata/schemas/DataProcessKey.avsc,sha256=3N6xt_rxPZMzi7XZQz-4OLnqMQJ88Oxj5OAr4buDVPY,2448
776
+ datahub/metadata/schemas/DataProcessKey.avsc,sha256=ZZE2HN4mwZtm_TJNcdohFS97WXytFq9HAs_-shor6sY,2518
774
777
  datahub/metadata/schemas/DataProductKey.avsc,sha256=SyjmL2ieea1P6uipXst37mD5NdGPTqDvJAL3CVo91wk,661
775
778
  datahub/metadata/schemas/DataProductProperties.avsc,sha256=Lc3duV7YMJLvo_RwckLbW4bbmPrhSS1D-bxVVboNX2c,6930
776
779
  datahub/metadata/schemas/DataTransformLogic.avsc,sha256=nHTH6UzJ2Zz88N2aWa96hawLUR20HP7eSynfPtI1kzg,2111
@@ -779,7 +782,7 @@ datahub/metadata/schemas/DataTypeKey.avsc,sha256=Gs5uc_azwg10e36ZbwDTFQMevr0IfiF
779
782
  datahub/metadata/schemas/DatahubIngestionCheckpoint.avsc,sha256=m2Zyrx3ZWDc5gHuwbmBSRJ3JN4NFkpUhDEKM2Yeuqrw,5681
780
783
  datahub/metadata/schemas/DatahubIngestionRunSummary.avsc,sha256=_Ek7NqfJVTLqlM0NR9BRA57N9_ejwDdQvz7B1tVxSEE,9367
781
784
  datahub/metadata/schemas/DatasetDeprecation.avsc,sha256=ucXxaDcAUib9_y0k5qOINMn5VK2X3trHK2dcpNcsR2Q,1256
782
- datahub/metadata/schemas/DatasetKey.avsc,sha256=kaP4yGbUk2kHivbh5H-RnSFcmiZsEfHsKVYV5VNweos,3403
785
+ datahub/metadata/schemas/DatasetKey.avsc,sha256=xNh2Zbg1POuHD3qu42-__zNVhKWx6QkA2LY7Dlk0YSY,3473
783
786
  datahub/metadata/schemas/DatasetProfile.avsc,sha256=3ZCU9JD6l2razACp0AY6LLMgnkMTj6D_5Xk9np6WWRM,9965
784
787
  datahub/metadata/schemas/DatasetProperties.avsc,sha256=DFJn75feqaoQk84zin_o_lqsFFhqkwya5LGC5LLJXbU,4209
785
788
  datahub/metadata/schemas/DatasetUpstreamLineage.avsc,sha256=PjAWPbsqwH7FjX2kFDy0dE6ENYOwRynH9vJerWisr2A,5365
@@ -832,7 +835,7 @@ datahub/metadata/schemas/GlossaryTermKey.avsc,sha256=00paBmYoFIlIUebS0X6BL9y5xqn
832
835
  datahub/metadata/schemas/GlossaryTerms.avsc,sha256=ogOFO6Hr2Xb7s1JHqxsCPY8r_qY_9kwu69k5-E3j2BM,7123
833
836
  datahub/metadata/schemas/GroupMembership.avsc,sha256=wT3Hbpv2Z7V4X_-rIoed0cukAOMyYEL93udK8mMCjn0,557
834
837
  datahub/metadata/schemas/IcebergCatalogInfo.avsc,sha256=X9Ejqzn1DyxNIth7vDhtPjGG4xMPQMhl7f-S7fBFxek,691
835
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc,sha256=9v2iVfg1GTr9cw1o7tEDXNPqXWDD9I98NMNo0PdJ2ic,2635
838
+ datahub/metadata/schemas/IcebergWarehouseInfo.avsc,sha256=0m7cQm8cCnBWNI5jGGgr5ZdOg66RQGWSf3gf8ay53So,2705
836
839
  datahub/metadata/schemas/IncidentInfo.avsc,sha256=L8xldmWyOW4Ml2Fm9XTRL13lP1CAEP0kgXsd_jLZaEU,12425
837
840
  datahub/metadata/schemas/IncidentKey.avsc,sha256=Pip__DyNNTal7NxryM3kFi9qHlwntp1rIA8Al8Zz264,542
838
841
  datahub/metadata/schemas/IncidentSource.avsc,sha256=lY_SarA3cM55KNENcB5z1Gu2MygxEl9l7R8LdMak9AQ,1199
@@ -848,17 +851,17 @@ datahub/metadata/schemas/MLFeatureTableKey.avsc,sha256=hVxNEqsx4GgG11GVryn9ms16O
848
851
  datahub/metadata/schemas/MLFeatureTableProperties.avsc,sha256=BtrqcsxoQXObPZXSGRNYtIBJCoeHkMK_Zr_imBWF2Zk,2008
849
852
  datahub/metadata/schemas/MLHyperParam.avsc,sha256=dE6i5r6LTYMNrQe9yy-jKoP09GOJUf__1bO69ldpydc,833
850
853
  datahub/metadata/schemas/MLMetric.avsc,sha256=y8WPVVwjhu3YGtqpFFJYNYK8w778RRL_d2sHG1Dc7uM,804
851
- datahub/metadata/schemas/MLModelDeploymentKey.avsc,sha256=9fSHt_AvnRv983oxABSf-EZYz-Uyn5BW7DLpQ61v-8Y,2615
854
+ datahub/metadata/schemas/MLModelDeploymentKey.avsc,sha256=EcA0z4sQfqe3IJ8PO8cGW34XMxc9Q2BbCBjgkJTaznE,2685
852
855
  datahub/metadata/schemas/MLModelDeploymentProperties.avsc,sha256=7IlGrMmX8nfgezvaZyrXskCTCRlwvRzGOYUOpFV3r6Y,5480
853
856
  datahub/metadata/schemas/MLModelFactorPrompts.avsc,sha256=8kX-P4F4mVLFT980z3MwIautt1_6uA-c_Z87nYNDK-k,2712
854
- datahub/metadata/schemas/MLModelGroupKey.avsc,sha256=Hhk6L63fITV8Qu7h9a7khrfkUrRPcUMFvccrVreUT2Y,2709
857
+ datahub/metadata/schemas/MLModelGroupKey.avsc,sha256=zIyIHI-23i_oQMbc1sigar9sJNJsa6CYfHHy-nH5IXE,2779
855
858
  datahub/metadata/schemas/MLModelGroupProperties.avsc,sha256=AZ5Pohk3_pCctQ4hcE1UOURQFYHQne0dw_lRUpOu5WY,6924
856
- datahub/metadata/schemas/MLModelKey.avsc,sha256=zKFzorbGgKGO56ILU2UmwUjRQRoRJ9rhu0yhTdUIfD8,3078
859
+ datahub/metadata/schemas/MLModelKey.avsc,sha256=zwoY9opTL5tMm5aoRHoWcNv5DjERYS-hWR05kVFlcTw,3148
857
860
  datahub/metadata/schemas/MLModelProperties.avsc,sha256=hDCBHxGe-cmCBeU1k0ANuQlKjtZsDcTfl2X_jWmtFqo,12355
858
861
  datahub/metadata/schemas/MLPrimaryKeyKey.avsc,sha256=F3lgpMnHBhXsqGncHE9x06P-0RiNCrzbUUWlMkPJxFI,1132
859
862
  datahub/metadata/schemas/MLPrimaryKeyProperties.avsc,sha256=URIuOpS93RVk8MZVcbZ-dmTwu_cN3KSOKxSR8fm-eTo,6744
860
863
  datahub/metadata/schemas/MLTrainingRunProperties.avsc,sha256=WGgj0MuQrGD4UgvyHCJHzTnHja2LlJTOr1gLu8SySj0,4269
861
- datahub/metadata/schemas/MetadataChangeEvent.avsc,sha256=bApzACHQ70FESJuQ-24dfwk3H3R255f1uG9ZSY89RwM,376924
864
+ datahub/metadata/schemas/MetadataChangeEvent.avsc,sha256=kwEwLUSPuqdrx_7uOX2XnEZ6Olm4p2ezYt0bTQSgaTk,377034
862
865
  datahub/metadata/schemas/MetadataChangeLog.avsc,sha256=Cf5eECeShCA_XHFr2MRhRQpPE61F6Xv-z1jjoBLJLgc,12239
863
866
  datahub/metadata/schemas/MetadataChangeProposal.avsc,sha256=tvO5cGIqZAIvUbMon1RAKgSY4E0jvBqT5VmLWAuNGkY,9770
864
867
  datahub/metadata/schemas/Metrics.avsc,sha256=O7DJGjOwmHbb1x_Zj7AuM_HaHKjBvkfJKfUsX8icXD4,690
@@ -1075,8 +1078,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
1075
1078
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
1076
1079
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
1077
1080
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
1078
- acryl_datahub-1.1.0.4rc3.dist-info/METADATA,sha256=2NyzaDzCIUEF2g5CM-vfMVqvzJvKXnWdJxawDLVJ_7c,182347
1079
- acryl_datahub-1.1.0.4rc3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
1080
- acryl_datahub-1.1.0.4rc3.dist-info/entry_points.txt,sha256=-N2PGtn1uwKR7-VM9spziE_RNyOdKm_XNpOWL1lnaj4,9790
1081
- acryl_datahub-1.1.0.4rc3.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1082
- acryl_datahub-1.1.0.4rc3.dist-info/RECORD,,
1081
+ acryl_datahub-1.1.0.5rc1.dist-info/METADATA,sha256=sKbnEOEUAOv5hgUfoytH-EIKqUc7QxK96ENS7SZUIPs,182347
1082
+ acryl_datahub-1.1.0.5rc1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
1083
+ acryl_datahub-1.1.0.5rc1.dist-info/entry_points.txt,sha256=bnGf6eX9UhiW8yVHtt6MJCVcmLErvrVQxTJAayA-PKc,9885
1084
+ acryl_datahub-1.1.0.5rc1.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1085
+ acryl_datahub-1.1.0.5rc1.dist-info/RECORD,,
@@ -39,6 +39,7 @@ datahub-business-glossary = datahub.ingestion.source.metadata.business_glossary:
39
39
  datahub-debug = datahub.ingestion.source.debug.datahub_debug:DataHubDebugSource
40
40
  datahub-gc = datahub.ingestion.source.gc.datahub_gc:DataHubGcSource
41
41
  datahub-lineage-file = datahub.ingestion.source.metadata.lineage:LineageFileSource
42
+ datahub-mock-data = datahub.ingestion.source.mock_data.datahub_mock_data:DataHubMockDataSource
42
43
  dbt = datahub.ingestion.source.dbt.dbt_core:DBTCoreSource
43
44
  dbt-cloud = datahub.ingestion.source.dbt.dbt_cloud:DBTCloudSource
44
45
  delta-lake = datahub.ingestion.source.delta_lake:DeltaLakeSource
datahub/_version.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Published at https://pypi.org/project/acryl-datahub/.
2
2
  __package_name__ = "acryl-datahub"
3
- __version__ = "1.1.0.4rc3"
3
+ __version__ = "1.1.0.5rc1"
4
4
 
5
5
 
6
6
  def is_dev_mode() -> bool:
@@ -147,6 +147,9 @@ class Sink(Generic[SinkConfig, SinkReportType], Closeable, metaclass=ABCMeta):
147
147
  def close(self) -> None:
148
148
  pass
149
149
 
150
+ def flush(self) -> None:
151
+ pass
152
+
150
153
  def configured(self) -> str:
151
154
  """Override this method to output a human-readable and scrubbed version of the configured sink"""
152
155
  return ""
@@ -502,7 +502,7 @@ class Pipeline:
502
502
  self._handle_uncaught_pipeline_exception(exc)
503
503
  finally:
504
504
  clear_global_warnings()
505
-
505
+ self.sink.flush()
506
506
  self._notify_reporters_on_ingestion_completion()
507
507
 
508
508
  def transform(self, records: Iterable[RecordEnvelope]) -> Iterable[RecordEnvelope]:
@@ -5,6 +5,7 @@ import functools
5
5
  import logging
6
6
  import os
7
7
  import threading
8
+ import time
8
9
  import uuid
9
10
  from enum import auto
10
11
  from typing import List, Optional, Tuple, Union
@@ -346,6 +347,17 @@ class DatahubRestSink(Sink[DatahubRestSinkConfig, DataHubRestSinkReport]):
346
347
  RecordEnvelope(item, metadata={}), NoopWriteCallback()
347
348
  )
348
349
 
350
+ def flush(self) -> None:
351
+ """Wait for all pending records to be written."""
352
+ i = 0
353
+ while self.report.pending_requests > 0:
354
+ time.sleep(0.1)
355
+ i += 1
356
+ if i % 1000 == 0:
357
+ logger.info(
358
+ f"Waiting for {self.report.pending_requests} records to be written"
359
+ )
360
+
349
361
  def close(self):
350
362
  with self.report.main_thread_blocking_timer:
351
363
  self.executor.shutdown()
File without changes
@@ -0,0 +1,384 @@
1
+ import logging
2
+ from typing import Dict, Iterable, List, Optional, Tuple
3
+
4
+ from pydantic import Field
5
+
6
+ from datahub.configuration.common import ConfigModel
7
+ from datahub.emitter.mce_builder import make_dataset_urn
8
+ from datahub.emitter.mcp import MetadataChangeProposalWrapper
9
+ from datahub.ingestion.api.common import PipelineContext
10
+ from datahub.ingestion.api.decorators import (
11
+ SupportStatus,
12
+ config_class,
13
+ platform_name,
14
+ support_status,
15
+ )
16
+ from datahub.ingestion.api.source import Source, SourceReport
17
+ from datahub.ingestion.api.workunit import MetadataWorkUnit
18
+ from datahub.ingestion.source.mock_data.table_naming_helper import TableNamingHelper
19
+ from datahub.metadata.schema_classes import (
20
+ DatasetLineageTypeClass,
21
+ StatusClass,
22
+ SubTypesClass,
23
+ UpstreamClass,
24
+ UpstreamLineageClass,
25
+ )
26
+ from datahub.utilities.str_enum import StrEnum
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+
31
+ class SubTypePattern(StrEnum):
32
+ ALTERNATING = "alternating"
33
+ ALL_TABLE = "all_table"
34
+ ALL_VIEW = "all_view"
35
+ LEVEL_BASED = "level_based"
36
+
37
+
38
+ class LineageConfigGen1(ConfigModel):
39
+ """
40
+ Configuration for generating mock lineage data for testing purposes.
41
+
42
+ This configuration controls how the mock data source generates a hierarchical
43
+ lineage graph with multiple levels of upstream/downstream relationships.
44
+
45
+ The lineage graph is structured as follows:
46
+ - Level 0: 1 table (root)
47
+ - Level 1: lineage_fan_out tables (each connected to the root)
48
+ - Level 2+: If lineage_fan_out_after_first_hop is set, uses that value;
49
+ otherwise uses lineage_fan_out^level tables (each connected to a level 1 table)
50
+ - ... and so on for lineage_hops levels
51
+
52
+ Examples:
53
+ - With lineage_fan_out=2, lineage_hops=1: Creates 3 tables total
54
+ (1 root + 2 downstream) with 2 lineage relationships
55
+ - With lineage_fan_out=3, lineage_hops=2: Creates 13 tables total
56
+ (1 + 3 + 9) with 12 lineage relationships
57
+ - With lineage_fan_out=4, lineage_hops=1: Creates 5 tables total
58
+ (1 + 4) with 4 lineage relationships
59
+ - With lineage_fan_out=3, lineage_hops=3, lineage_fan_out_after_first_hop=2:
60
+ Creates 1 + 3 + 6 + 12 = 22 tables total (prevents exponential growth)
61
+
62
+ Table naming convention: "hops_{lineage_hops}_f_{lineage_fan_out}_h{level}_t{table_index}"
63
+ """
64
+
65
+ emit_lineage: bool = Field(
66
+ default=False,
67
+ description="Whether to emit lineage data for testing purposes. When False, no lineage data is generated regardless of other settings.",
68
+ )
69
+
70
+ lineage_fan_out: int = Field(
71
+ default=3,
72
+ description="Number of downstream tables that each upstream table connects to. This controls the 'width' of the lineage graph. Higher values create more parallel downstream tables per level.",
73
+ )
74
+
75
+ lineage_hops: int = Field(
76
+ default=2,
77
+ description="Number of hops (levels) in the lineage graph. This controls the 'depth' of the lineage graph. Level 0 is the root table, and each subsequent level contains downstream tables. Higher values create deeper lineage chains.",
78
+ )
79
+
80
+ lineage_fan_out_after_first_hop: Optional[int] = Field(
81
+ default=None,
82
+ description="Optional limit on fanout for hops after the first hop. When set, prevents exponential growth by limiting the number of downstream tables per upstream table at levels 2 and beyond. When None, uses the standard exponential growth (lineage_fan_out^level).",
83
+ )
84
+
85
+ subtype_pattern: SubTypePattern = Field(
86
+ default=SubTypePattern.ALTERNATING,
87
+ description="Pattern for determining SubTypes. Options: 'alternating', 'all_table', 'all_view', 'level_based'",
88
+ )
89
+
90
+ level_subtypes: Dict[int, str] = Field(
91
+ default={0: "Table", 1: "View", 2: "Table"},
92
+ description="Mapping of level to subtype for level_based pattern",
93
+ )
94
+
95
+
96
+ class DataHubMockDataConfig(ConfigModel):
97
+ enabled: bool = Field(
98
+ default=True,
99
+ description="Whether this source is enabled",
100
+ )
101
+
102
+ gen_1: LineageConfigGen1 = Field(
103
+ default_factory=LineageConfigGen1,
104
+ description="Configuration for lineage data generation",
105
+ )
106
+
107
+
108
+ @platform_name("DataHubMockData")
109
+ @config_class(DataHubMockDataConfig)
110
+ @support_status(SupportStatus.TESTING)
111
+ class DataHubMockDataSource(Source):
112
+ """
113
+ This source is for generating mock data for testing purposes.
114
+ Expect breaking changes as we iterate on the mock data source.
115
+ """
116
+
117
+ def __init__(self, ctx: PipelineContext, config: DataHubMockDataConfig):
118
+ self.ctx = ctx
119
+ self.config = config
120
+ self.report = SourceReport()
121
+
122
+ def get_workunits(self) -> Iterable[MetadataWorkUnit]:
123
+ # We don't want any implicit aspects to be produced
124
+ # so we are not using get_workunits_internal
125
+ if self.config.gen_1.emit_lineage:
126
+ for wu in self._data_gen_1():
127
+ self.report.report_workunit(wu)
128
+ yield wu
129
+
130
+ yield from []
131
+
132
+ def _calculate_lineage_tables(
133
+ self, fan_out: int, hops: int, fan_out_after_first: Optional[int] = None
134
+ ) -> Tuple[int, List[int]]:
135
+ """
136
+ Calculate the total number of tables and tables at each level for lineage generation.
137
+
138
+ Args:
139
+ fan_out: Number of downstream tables per upstream table at level 1
140
+ hops: Number of hops (levels) in the lineage graph
141
+ fan_out_after_first: Optional limit on fanout for hops after the first hop
142
+
143
+ Returns:
144
+ Tuple of (total_tables, tables_at_levels) where tables_at_levels is a list
145
+ containing the number of tables at each level (index 0 = level 0, etc.)
146
+ """
147
+ tables_to_be_created = 0
148
+ tables_at_levels: List[int] = []
149
+
150
+ for i in range(hops + 1):
151
+ if i == 0:
152
+ # Level 0: always 1 table
153
+ tables_at_level = 1
154
+ elif i == 1:
155
+ # Level 1: uses lineage_fan_out
156
+ tables_at_level = fan_out
157
+ else:
158
+ # Level 2+: use fan_out_after_first_hop if set, otherwise exponential growth
159
+ if fan_out_after_first is not None:
160
+ # Each table at previous level creates fan_out_after_first tables
161
+ tables_at_level = tables_at_levels[i - 1] * fan_out_after_first
162
+ else:
163
+ # Original exponential behavior
164
+ tables_at_level = fan_out**i
165
+
166
+ tables_at_levels.append(tables_at_level)
167
+ tables_to_be_created += tables_at_level
168
+
169
+ return tables_to_be_created, tables_at_levels
170
+
171
+ def _calculate_fanout_for_level(
172
+ self, level: int, fan_out: int, fan_out_after_first: Optional[int] = None
173
+ ) -> int:
174
+ """
175
+ Calculate the fanout (number of downstream tables) for a specific level.
176
+
177
+ Args:
178
+ level: The current level (0-based)
179
+ fan_out: Number of downstream tables per upstream table at level 1
180
+ fan_out_after_first: Optional limit on fanout for hops after the first hop
181
+
182
+ Returns:
183
+ The number of downstream tables that each table at this level should connect to
184
+ """
185
+ if level == 0:
186
+ # Level 0: uses the standard fan_out
187
+ return fan_out
188
+ else:
189
+ # Level 1+: use fan_out_after_first if set, otherwise use fan_out
190
+ return fan_out_after_first if fan_out_after_first is not None else fan_out
191
+
192
+ def _determine_subtype(
193
+ self, table_name: str, table_level: int, table_index: int
194
+ ) -> str:
195
+ """
196
+ Determine subtype based on configured pattern.
197
+
198
+ Args:
199
+ table_name: Name of the table
200
+ table_level: Level of the table in the lineage graph
201
+ table_index: Index of the table within its level
202
+
203
+ Returns:
204
+ The determined subtype ("Table" or "View")
205
+ """
206
+ pattern = self.config.gen_1.subtype_pattern
207
+
208
+ if pattern == SubTypePattern.ALTERNATING:
209
+ return "Table" if table_index % 2 == 0 else "View"
210
+ elif pattern == SubTypePattern.LEVEL_BASED:
211
+ return self.config.gen_1.level_subtypes.get(table_level, "Table")
212
+ elif pattern == SubTypePattern.ALL_TABLE:
213
+ return "Table"
214
+ elif pattern == SubTypePattern.ALL_VIEW:
215
+ return "View"
216
+ else:
217
+ return "Table" # default
218
+
219
+ def _get_subtypes_aspect(
220
+ self, table_name: str, table_level: int, table_index: int
221
+ ) -> MetadataWorkUnit:
222
+ """
223
+ Create a SubTypes aspect for a table based on deterministic pattern.
224
+
225
+ Args:
226
+ table_name: Name of the table
227
+ table_level: Level of the table in the lineage graph
228
+ table_index: Index of the table within its level
229
+
230
+ Returns:
231
+ MetadataWorkUnit containing the SubTypes aspect
232
+ """
233
+ # Determine subtype based on pattern
234
+ subtype = self._determine_subtype(table_name, table_level, table_index)
235
+
236
+ urn = make_dataset_urn(platform="fake", name=table_name)
237
+ mcp = MetadataChangeProposalWrapper(
238
+ entityUrn=urn,
239
+ entityType="dataset",
240
+ aspect=SubTypesClass(typeNames=[subtype]),
241
+ )
242
+ return mcp.as_workunit()
243
+
244
+ def _data_gen_1(self) -> Iterable[MetadataWorkUnit]:
245
+ """Generate mock lineage data for testing purposes."""
246
+ gen_1 = self.config.gen_1
247
+ fan_out = gen_1.lineage_fan_out
248
+ hops = gen_1.lineage_hops
249
+ fan_out_after_first = gen_1.lineage_fan_out_after_first_hop
250
+
251
+ logger.info(
252
+ f"Generating lineage data with fan_out={fan_out}, hops={hops}, fan_out_after_first={fan_out_after_first}"
253
+ )
254
+
255
+ tables_to_be_created, tables_at_levels = self._calculate_lineage_tables(
256
+ fan_out, hops, fan_out_after_first
257
+ )
258
+
259
+ logger.info(
260
+ f"About to create {tables_to_be_created} tables for lineage testing"
261
+ )
262
+
263
+ current_progress = 0
264
+ for i in range(hops + 1):
265
+ tables_at_level = tables_at_levels[i]
266
+
267
+ for j in range(tables_at_level):
268
+ table_name = TableNamingHelper.generate_table_name(hops, fan_out, i, j)
269
+
270
+ yield self._get_status_aspect(table_name)
271
+
272
+ yield self._get_subtypes_aspect(table_name, i, j)
273
+
274
+ yield from self._generate_lineage_for_table(
275
+ table_name=table_name,
276
+ table_level=i,
277
+ table_index=j,
278
+ hops=hops,
279
+ fan_out=fan_out,
280
+ fan_out_after_first=fan_out_after_first,
281
+ tables_at_levels=tables_at_levels,
282
+ )
283
+
284
+ current_progress += 1
285
+ if current_progress % 1000 == 0:
286
+ logger.info(
287
+ f"Progress: {current_progress}/{tables_to_be_created} tables processed"
288
+ )
289
+
290
+ def _generate_lineage_for_table(
291
+ self,
292
+ table_name: str,
293
+ table_level: int,
294
+ table_index: int,
295
+ hops: int,
296
+ fan_out: int,
297
+ fan_out_after_first: Optional[int],
298
+ tables_at_levels: List[int],
299
+ ) -> Iterable[MetadataWorkUnit]:
300
+ """Generate lineage relationships for a specific table."""
301
+ # Only generate lineage if there are downstream levels
302
+ if table_level + 1 > hops:
303
+ return
304
+
305
+ current_fan_out = self._calculate_fanout_for_level(
306
+ table_level, fan_out, fan_out_after_first
307
+ )
308
+
309
+ yield from self._generate_downstream_lineage(
310
+ upstream_table_name=table_name,
311
+ upstream_table_index=table_index,
312
+ upstream_table_level=table_level,
313
+ current_fan_out=current_fan_out,
314
+ hops=hops,
315
+ fan_out=fan_out,
316
+ tables_at_levels=tables_at_levels,
317
+ )
318
+
319
+ def _generate_downstream_lineage(
320
+ self,
321
+ upstream_table_name: str,
322
+ upstream_table_index: int,
323
+ upstream_table_level: int,
324
+ current_fan_out: int,
325
+ hops: int,
326
+ fan_out: int,
327
+ tables_at_levels: List[int],
328
+ ) -> Iterable[MetadataWorkUnit]:
329
+ """Generate lineage relationships to downstream tables."""
330
+ downstream_level = upstream_table_level + 1
331
+ downstream_tables_count = tables_at_levels[downstream_level]
332
+
333
+ # Calculate range of downstream tables this upstream table connects to
334
+ start_downstream = upstream_table_index * current_fan_out
335
+ end_downstream = min(
336
+ (upstream_table_index + 1) * current_fan_out, downstream_tables_count
337
+ )
338
+
339
+ for downstream_index in range(start_downstream, end_downstream):
340
+ downstream_table_name = TableNamingHelper.generate_table_name(
341
+ hops, fan_out, downstream_level, downstream_index
342
+ )
343
+ yield self._get_upstream_aspect(
344
+ upstream_table=upstream_table_name,
345
+ downstream_table=downstream_table_name,
346
+ )
347
+
348
+ def _get_status_aspect(self, table: str) -> MetadataWorkUnit:
349
+ urn = make_dataset_urn(
350
+ platform="fake",
351
+ name=table,
352
+ )
353
+ mcp = MetadataChangeProposalWrapper(
354
+ entityUrn=urn,
355
+ entityType="dataset",
356
+ aspect=StatusClass(removed=False),
357
+ )
358
+ return mcp.as_workunit()
359
+
360
+ def _get_upstream_aspect(
361
+ self, upstream_table: str, downstream_table: str
362
+ ) -> MetadataWorkUnit:
363
+ mcp = MetadataChangeProposalWrapper(
364
+ entityUrn=make_dataset_urn(
365
+ platform="fake",
366
+ name=downstream_table,
367
+ ),
368
+ entityType="dataset",
369
+ aspect=UpstreamLineageClass(
370
+ upstreams=[
371
+ UpstreamClass(
372
+ dataset=make_dataset_urn(
373
+ platform="fake",
374
+ name=upstream_table,
375
+ ),
376
+ type=DatasetLineageTypeClass.TRANSFORMED,
377
+ )
378
+ ],
379
+ ),
380
+ )
381
+ return mcp.as_workunit()
382
+
383
+ def get_report(self) -> SourceReport:
384
+ return self.report