acryl-datahub 0.15.0.5rc5__py3-none-any.whl → 0.15.0.5rc7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (25) hide show
  1. {acryl_datahub-0.15.0.5rc5.dist-info → acryl_datahub-0.15.0.5rc7.dist-info}/METADATA +2423 -2416
  2. {acryl_datahub-0.15.0.5rc5.dist-info → acryl_datahub-0.15.0.5rc7.dist-info}/RECORD +25 -21
  3. {acryl_datahub-0.15.0.5rc5.dist-info → acryl_datahub-0.15.0.5rc7.dist-info}/entry_points.txt +1 -0
  4. datahub/_version.py +1 -1
  5. datahub/api/entities/dataprocess/dataprocess_instance.py +104 -11
  6. datahub/cli/container_cli.py +1 -64
  7. datahub/emitter/composite_emitter.py +36 -0
  8. datahub/ingestion/source/apply/__init__.py +0 -0
  9. datahub/ingestion/source/apply/datahub_apply.py +223 -0
  10. datahub/ingestion/source/dbt/dbt_core.py +1 -1
  11. datahub/ingestion/source/sql/sql_config.py +0 -10
  12. datahub/ingestion/source_config/operation_config.py +9 -0
  13. datahub/metadata/_schema_classes.py +144 -4
  14. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +6 -0
  15. datahub/metadata/schema.avsc +115 -5
  16. datahub/metadata/schemas/AssertionInfo.avsc +2 -2
  17. datahub/metadata/schemas/CorpUserSettings.avsc +9 -0
  18. datahub/metadata/schemas/Deprecation.avsc +12 -0
  19. datahub/metadata/schemas/DisplayProperties.avsc +62 -0
  20. datahub/metadata/schemas/MetadataChangeEvent.avsc +12 -0
  21. datahub/metadata/schemas/PostInfo.avsc +28 -2
  22. datahub/metadata/schemas/SchemaFieldKey.avsc +2 -1
  23. {acryl_datahub-0.15.0.5rc5.dist-info → acryl_datahub-0.15.0.5rc7.dist-info}/LICENSE +0 -0
  24. {acryl_datahub-0.15.0.5rc5.dist-info → acryl_datahub-0.15.0.5rc7.dist-info}/WHEEL +0 -0
  25. {acryl_datahub-0.15.0.5rc5.dist-info → acryl_datahub-0.15.0.5rc7.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
2
2
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
3
- datahub/_version.py,sha256=k2KgFitDFziMKt2iil2MW_OYVP7lggtvH9A6OAJFD9c,324
3
+ datahub/_version.py,sha256=glwuUeYA29D6Lba9dmX7Q88WndlOTabakvHafVUtwjM,324
4
4
  datahub/entrypoints.py,sha256=osv2ailvuW-HHlAE0fOtyblJI1X7HInZutd9DC66jqQ,8022
5
5
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  datahub/_codegen/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -42,7 +42,7 @@ datahub/api/entities/datajob/__init__.py,sha256=suzCs4cLphQ64oDG7meXsJVp6ya9_Glz
42
42
  datahub/api/entities/datajob/dataflow.py,sha256=Ajs6XP7-WMK4OwSm0JdFuGwggZhx-l5S-fwor68BBYk,6954
43
43
  datahub/api/entities/datajob/datajob.py,sha256=NxaRhpr135Pq1XAxcATEXfXYennT92GL_b7NROUxI98,7202
44
44
  datahub/api/entities/dataprocess/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
45
- datahub/api/entities/dataprocess/dataprocess_instance.py,sha256=i_ZrhyXfcbVs8d8s4kLxZbUUyul-2iyBBgf7Yw82flM,14962
45
+ datahub/api/entities/dataprocess/dataprocess_instance.py,sha256=PIiGXTQLPWl_1rY43N1H-ygDXyzQrSQl0FOLhZ9-YX4,18853
46
46
  datahub/api/entities/dataproduct/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
47
47
  datahub/api/entities/dataproduct/dataproduct.py,sha256=148TmItxDDyGNzfZdL8aDreSEtyAw79IN8N8oSmNOPE,21461
48
48
  datahub/api/entities/dataset/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -62,7 +62,7 @@ datahub/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
62
62
  datahub/cli/check_cli.py,sha256=ajrWVMAHYbgvYi4OFitFXx7Y6oigvZFgIeUiKV9ECik,12859
63
63
  datahub/cli/cli_utils.py,sha256=2uvPv6WqxbRdH7UteHwhRash4E0ncU5P6XebrFLeECo,13584
64
64
  datahub/cli/config_utils.py,sha256=yuXw7RzpRY5x_-MAoqWbv46qUkIeRNAJL4_OeJpYdBE,4879
65
- datahub/cli/container_cli.py,sha256=8D73hLfTHsDg4Cedh_2x0utl7ppOeB1TUJVRgur-Crw,3624
65
+ datahub/cli/container_cli.py,sha256=uDOwewGEPYHQt-ppYEb8ESXhZjPNIZG0Rt3cm2FzPqc,1569
66
66
  datahub/cli/delete_cli.py,sha256=oQ4Yy6hxZHcl67MYJiQumLs_8QmFEj7SPZFzxFXvDk8,23481
67
67
  datahub/cli/docker_check.py,sha256=rED4wHXqxcQ_qNFyIgFEZ85BHT9ZTE5YC-oUKqbRqi0,9432
68
68
  datahub/cli/docker_cli.py,sha256=w9ZQMRVlHwfJI2XDe7mO0lwnT7-dZoK6tPadSMgwEM8,36493
@@ -113,6 +113,7 @@ datahub/configuration/validate_multiline_string.py,sha256=l9PF6_EAC_1lWxU_RWrvPB
113
113
  datahub/configuration/yaml.py,sha256=dLmjCalPOjgdc7mmJxtlP7uOrIHZiAWxD1gwAFOdtUU,308
114
114
  datahub/emitter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
115
115
  datahub/emitter/aspect.py,sha256=ef0DVycqg-tRPurkYjc-5zknmLP2p2Y2RxP55WkvAEc,480
116
+ datahub/emitter/composite_emitter.py,sha256=ZU-IdlAXKGPtmyT0JJgYC09vRn-TmeNaA6VP8V0fioM,1212
116
117
  datahub/emitter/enum_helpers.py,sha256=QBOEUu_hDCvyL_v4ayNQV8XwJbf5zKyu0Xat0mI1Kgo,376
117
118
  datahub/emitter/generic_emitter.py,sha256=i37ZFm9VR_tmiZm9kIypEkQEB_cLKbzj_tJvViN-fm8,828
118
119
  datahub/emitter/kafka_emitter.py,sha256=Uix1W1WaXF8VqUTUfzdRZKca2XrR1w50Anx2LVkROlc,5822
@@ -217,6 +218,8 @@ datahub/ingestion/source/abs/datalake_profiler_config.py,sha256=Rkf64evufyVGPiE4
217
218
  datahub/ingestion/source/abs/profiling.py,sha256=yKNCKpr6w7qpCH-baeSkNE9VjkN6eBot_weD-2_Jxzk,17579
218
219
  datahub/ingestion/source/abs/report.py,sha256=fzkTdTewYlWrTk4f2Cyl-e8RV4qw9wEVtm0cdKD-Xgw,542
219
220
  datahub/ingestion/source/abs/source.py,sha256=cuMezUzr-Smp5tok2ceYor5I5jp52NDMjfeN8kfIbvg,24816
221
+ datahub/ingestion/source/apply/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
222
+ datahub/ingestion/source/apply/datahub_apply.py,sha256=NGhbszi6ObfJoh3YPGjcVWHJsbNboVwKUgnrCSt8HJU,7629
220
223
  datahub/ingestion/source/aws/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
221
224
  datahub/ingestion/source/aws/aws_common.py,sha256=DfdQgkJ_s2isFx8WvqKTlAcBk4KE8SgfpmA5BgC3fgY,17716
222
225
  datahub/ingestion/source/aws/glue.py,sha256=DwROr923M01QnvImUbMoHS6TTTT9kBz2tEmQ3Sv4EoY,58019
@@ -277,7 +280,7 @@ datahub/ingestion/source/datahub/state.py,sha256=PZoT7sSK1wadVf5vN6phrgr7I6LL7eP
277
280
  datahub/ingestion/source/dbt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
278
281
  datahub/ingestion/source/dbt/dbt_cloud.py,sha256=tNpSHbPlLq-oFGbJsdkWY9kIaWmpjcZLWhj1CSewGGY,17981
279
282
  datahub/ingestion/source/dbt/dbt_common.py,sha256=y4VINaQQ-WhEf-rICGLGi1U88nKmRdVQPmh88OJROWg,80536
280
- datahub/ingestion/source/dbt/dbt_core.py,sha256=m6cA9vVd4Nh2arc-T2_xeQoxvreRbMhTDIJuYsx3wHc,22722
283
+ datahub/ingestion/source/dbt/dbt_core.py,sha256=SHtZg8ZAtmUwegpltIU8MhxBYuB_-oPOY4iBXc4SQIY,22713
281
284
  datahub/ingestion/source/dbt/dbt_tests.py,sha256=Q5KISW_AOOWqyxmyOgJQquyX7xlfOqKu9WhrHoLKC0M,9881
282
285
  datahub/ingestion/source/delta_lake/__init__.py,sha256=u5oqUeus81ONAtdl6o9Puw33ODSMun-0wLIamrZ4BUM,71
283
286
  datahub/ingestion/source/delta_lake/config.py,sha256=bVBwGjCPiXyjbCLQsamt4hAsKJMtMuxupKjwZEwtU78,3374
@@ -461,7 +464,7 @@ datahub/ingestion/source/sql/oracle.py,sha256=tVP3AiZO97psM8O8UzBb9C7__s8y4fkyQb
461
464
  datahub/ingestion/source/sql/postgres.py,sha256=uC1kYEI8VdxiZ1Y9IxMWzwmg11wtMqYN0e2fkok1rxo,11972
462
465
  datahub/ingestion/source/sql/presto.py,sha256=PB-CS5MX2dSRFRHjlxfkLHGXLZXFNCsVAAyRBtY6HMg,3611
463
466
  datahub/ingestion/source/sql/sql_common.py,sha256=E1QmJ35ZuDLiZj-s1niHvIdNMyEsZrwvq_Wuy2EoYMQ,48586
464
- datahub/ingestion/source/sql/sql_config.py,sha256=M-l_uXau0ODolLZHBzAXhy-Rq5yYxvJ6cLbCIea7Mww,9449
467
+ datahub/ingestion/source/sql/sql_config.py,sha256=CBXkCpzBAGrWAXJFte_i5TmpzcsMJwEjGHpfzd6vAow,8964
465
468
  datahub/ingestion/source/sql/sql_generic.py,sha256=9AERvkK8kdJUeDOzCYJDb93xdv6Z4DGho0NfeHj5Uyg,2740
466
469
  datahub/ingestion/source/sql/sql_generic_profiler.py,sha256=8cDmNpT_UXzYmP8-RWoDCnewmVGCj2cYCzH9_gSsF3o,11590
467
470
  datahub/ingestion/source/sql/sql_report.py,sha256=gw-OPHSExp_b6DRjvwqE1U6BpkwekxGrsvNMGYSGDio,2671
@@ -516,7 +519,7 @@ datahub/ingestion/source/usage/starburst_trino_usage.py,sha256=O3EDEZsXPNdsKGD-j
516
519
  datahub/ingestion/source/usage/usage_common.py,sha256=YGszLjmESiUXnpcPfnyQHtoM57HyWsIiLOQd5_sxECg,12238
517
520
  datahub/ingestion/source_config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
518
521
  datahub/ingestion/source_config/csv_enricher.py,sha256=IROxxfFJA56dHkmmbjjhb7h1pZSi33tzW9sQb7ZEgac,1733
519
- datahub/ingestion/source_config/operation_config.py,sha256=Q0NlqiEh4s4DFIII5NsAp5hxWTVyyJz-ldcQmH-B47s,3504
522
+ datahub/ingestion/source_config/operation_config.py,sha256=hxF2RM0jk0HUPXYiliMniXBC-wz-ZPcs90ZGLfHT8rE,3924
520
523
  datahub/ingestion/source_config/pulsar.py,sha256=sklDkh62CrWV-i7Ifh6R3T3smYVso6gyRJG8HVc6RdA,5533
521
524
  datahub/ingestion/source_report/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
522
525
  datahub/ingestion/source_report/ingestion_stage.py,sha256=SU_FKFZhShZATLcFr735i_hWpdqNGdAWoZxh22p3P1k,1787
@@ -568,8 +571,8 @@ datahub/lite/lite_registry.py,sha256=bpH0kasP-LtwwUFNA2QsOIehfekAYfJtN-AkQLmSWnw
568
571
  datahub/lite/lite_server.py,sha256=p9Oa2nNs65mqcssSIVOr7VOzWqfVstz6ZQEdT4f82S0,1949
569
572
  datahub/lite/lite_util.py,sha256=pgBpT3vTO1YCQ2njZRNyicSkHYeEmQCt41BaXU8WvMo,4503
570
573
  datahub/metadata/__init__.py,sha256=AjhXPjI6cnpdcrBRrE5gOWo15vv2TTl2ctU4UAnUN7A,238
571
- datahub/metadata/_schema_classes.py,sha256=GMLN7Ov0m39EWaXlziVgINqxhihZDzNy2BztBIR9YM8,975061
572
- datahub/metadata/schema.avsc,sha256=sAPtgHSNJ1a126Vz7OjVIMKFjrrIG9f4cvRH6SkJ0jc,640786
574
+ datahub/metadata/_schema_classes.py,sha256=qfhPuX_aX16gaIzfsvAPE4ZxXtU0xI-cLoStOqKKE-c,979680
575
+ datahub/metadata/schema.avsc,sha256=ogTZxGz8yS6daHcxFzNpfnNNvwXtegrGXj4VgQdsEO4,643954
573
576
  datahub/metadata/schema_classes.py,sha256=X5Jl5EaSxyHdXOQv14pJ5WkQALun4MRpJ4q12wVFE18,1299
574
577
  datahub/metadata/urns.py,sha256=nfrCTExR-k2P9w272WVtWSN3xW1VUJngPwP3xnvULjU,1217
575
578
  datahub/metadata/_urns/__init__.py,sha256=cOF3GHMDgPhmbLKbN02NPpuLGHSu0qNgQyBRv08eqF0,243
@@ -583,7 +586,7 @@ datahub/metadata/com/linkedin/pegasus2avro/access/token/__init__.py,sha256=P9M7N
583
586
  datahub/metadata/com/linkedin/pegasus2avro/assertion/__init__.py,sha256=PgK5O-6pVRaEcvmwXAsSkwRLe8NjGiLH8AVBXeArqK8,5751
584
587
  datahub/metadata/com/linkedin/pegasus2avro/businessattribute/__init__.py,sha256=N8kO-eUi0_Rt7weizIExxlnJ2_kZRtPrZLWCC1xtDMA,653
585
588
  datahub/metadata/com/linkedin/pegasus2avro/chart/__init__.py,sha256=RNyyHLBNp_fxgFcBOLWO2UsXR1ofD_JczcBdPEQSusg,848
586
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py,sha256=x3AG1BxTAQijzsm_eKaP0P9VFraUT32d0glfvbQBrVI,5618
589
+ datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py,sha256=D5rQ4RNxPzVaZOedwUup9-LMy1pIBhQxL7EQ7jaxvaM,5885
587
590
  datahub/metadata/com/linkedin/pegasus2avro/common/fieldtransformer/__init__.py,sha256=FN63vLiB3FCmIRqBjTA-0Xt7M6i7h5NhaVzbA1ysv18,396
588
591
  datahub/metadata/com/linkedin/pegasus2avro/connection/__init__.py,sha256=qRtw-dB14pzVzgQ0pDK8kyBplNdpRxVKNj4D70e_FqI,564
589
592
  datahub/metadata/com/linkedin/pegasus2avro/container/__init__.py,sha256=3yWt36KqDKFhRc9pzvt0AMnbMTlhKurGvT3BUvc25QU,510
@@ -648,7 +651,7 @@ datahub/metadata/com/linkedin/pegasus2avro/view/__init__.py,sha256=-Le-jOqUJKv3p
648
651
  datahub/metadata/schemas/Access.avsc,sha256=gdEfWJLkvjIz-jzlceK4Dl5pBDdCHG423Ba_EYGQgUk,1562
649
652
  datahub/metadata/schemas/Actors.avsc,sha256=M76L2_Dlp7VyhVtu9__jhnh8rBNvNobtNJUfvl7bcPE,1188
650
653
  datahub/metadata/schemas/AssertionActions.avsc,sha256=zrvXzX2Nv_dmK6I3ZXCVWQ1bVs1q01Gl9sDRJA8oSDU,1618
651
- datahub/metadata/schemas/AssertionInfo.avsc,sha256=BRkjHzkTCjfqaYpm8EDiLtTAhN50yMkHH5eE3QLpGbY,125083
654
+ datahub/metadata/schemas/AssertionInfo.avsc,sha256=ElESZKMXPjp_ay1BR8jWrY8jD4dehY2p5JgUvBI6xhM,125123
652
655
  datahub/metadata/schemas/AssertionKey.avsc,sha256=EjNaTyzGxtWzTsZd87P2ZSaGX5dn8Y7HGBdqvlQVrFI,638
653
656
  datahub/metadata/schemas/AssertionRunEvent.avsc,sha256=FUyV73bUliBC-a_XFUlfgh75o99-lu1fl36b2q8Pqx4,12886
654
657
  datahub/metadata/schemas/BrowsePaths.avsc,sha256=NR_4dKuJMk1X2RB4DLkHVSqyMQc4PvT7eR0n6lM5aOM,654
@@ -671,7 +674,7 @@ datahub/metadata/schemas/CorpUserCredentials.avsc,sha256=S7FkV9K_DGxhb4GFYbM5_lP
671
674
  datahub/metadata/schemas/CorpUserEditableInfo.avsc,sha256=VazSsT1oQZNHeG8rAXPSKV79W6ZcCgUL1J7yKJUTDvU,3380
672
675
  datahub/metadata/schemas/CorpUserInfo.avsc,sha256=jcfg1NI5DeJsmFEIYy6ZI65mDDwJRfdTu3WeltfeU1A,3686
673
676
  datahub/metadata/schemas/CorpUserKey.avsc,sha256=-Spvvcss0sJoADygdChWa99rYiMMRwEx77GvW-jLTN0,984
674
- datahub/metadata/schemas/CorpUserSettings.avsc,sha256=MQEquhbkAs8zVlqnxYplR7WiyLKfFMiU9a4VHYLyavA,1847
677
+ datahub/metadata/schemas/CorpUserSettings.avsc,sha256=fS2HUD0L9_rsPyqo0DRxibzPi8_IkkWTY6Zuqob1sPg,2097
675
678
  datahub/metadata/schemas/CorpUserStatus.avsc,sha256=yqojAXEQ9CjRhY58RPyTUxzmFbHSANGGaMMbqiYZZIE,2538
676
679
  datahub/metadata/schemas/Cost.avsc,sha256=o4kYZSss2uEwJ6gCA9fhBUoyD5xUqcSxz78vkIXXzGQ,1494
677
680
  datahub/metadata/schemas/DashboardInfo.avsc,sha256=kuRyOSQWRvV9ydhozTWKWrNfbD8ZNeWawGGR4xSHhaI,12917
@@ -735,7 +738,8 @@ datahub/metadata/schemas/DatasetProfile.avsc,sha256=3ZCU9JD6l2razACp0AY6LLMgnkMT
735
738
  datahub/metadata/schemas/DatasetProperties.avsc,sha256=DFJn75feqaoQk84zin_o_lqsFFhqkwya5LGC5LLJXbU,4209
736
739
  datahub/metadata/schemas/DatasetUpstreamLineage.avsc,sha256=PjAWPbsqwH7FjX2kFDy0dE6ENYOwRynH9vJerWisr2A,5365
737
740
  datahub/metadata/schemas/DatasetUsageStatistics.avsc,sha256=JKNy_KlUqr3kt7o1Cu2DiQx3XUG_NQbVFVpeNr4ntCc,6999
738
- datahub/metadata/schemas/Deprecation.avsc,sha256=KItgnvran9xSSpAjmt2RYsF_5ERh5FnlygeMT6bnhiA,1053
741
+ datahub/metadata/schemas/Deprecation.avsc,sha256=SmbTlMB9fujdMBjYEQkzaU4XJzwM1gD6E8L2zoL1b4Q,1280
742
+ datahub/metadata/schemas/DisplayProperties.avsc,sha256=MTa_g2s0roxNFFggWU8rslUH3UFe3xe11uUXyh0Go_I,1732
739
743
  datahub/metadata/schemas/Documentation.avsc,sha256=9vIJG9B08FFrC3y5c1XVaT5U3c-b5sOAc5foUxMnyCs,4836
740
744
  datahub/metadata/schemas/DomainKey.avsc,sha256=1_kbsMTsO2ebB3zW7KpB71QfkGGR0mAgpNOKRoWHsJU,649
741
745
  datahub/metadata/schemas/DomainProperties.avsc,sha256=6do6wZ9G6gyt1QowQyi1xldqgdTXspb05FaqWpKJ6eM,3843
@@ -806,7 +810,7 @@ datahub/metadata/schemas/MLModelProperties.avsc,sha256=hDCBHxGe-cmCBeU1k0ANuQlKj
806
810
  datahub/metadata/schemas/MLPrimaryKeyKey.avsc,sha256=mX4CQcoN3FC_VQDBCkhlmJk4pfQKDrSeuqqCTTXTmq8,1092
807
811
  datahub/metadata/schemas/MLPrimaryKeyProperties.avsc,sha256=URIuOpS93RVk8MZVcbZ-dmTwu_cN3KSOKxSR8fm-eTo,6744
808
812
  datahub/metadata/schemas/MLTrainingRunProperties.avsc,sha256=WGgj0MuQrGD4UgvyHCJHzTnHja2LlJTOr1gLu8SySj0,4269
809
- datahub/metadata/schemas/MetadataChangeEvent.avsc,sha256=mk-ggA71SFW1H_BdzaLXMgH01wVzImCNPviGiH_v94Y,372781
813
+ datahub/metadata/schemas/MetadataChangeEvent.avsc,sha256=pbt_zFR9fPRm5ek1cuYa1eMCNs3aD7F6jOruQaee2NY,373224
810
814
  datahub/metadata/schemas/MetadataChangeLog.avsc,sha256=mpdodpx25E6M1Gq_7slEcPAm-1Es5xPsoqV60HgO7zg,12167
811
815
  datahub/metadata/schemas/MetadataChangeProposal.avsc,sha256=EMfQrYsuHf1p6UvBjoLtfdTHGe-vGNJaCFEHz8hdKU0,9698
812
816
  datahub/metadata/schemas/Metrics.avsc,sha256=O7DJGjOwmHbb1x_Zj7AuM_HaHKjBvkfJKfUsX8icXD4,690
@@ -823,7 +827,7 @@ datahub/metadata/schemas/PartitionsSummary.avsc,sha256=bl6_ElsVS8OowD5HsfQo9ZhfH
823
827
  datahub/metadata/schemas/PlatformEvent.avsc,sha256=W3BmAMBVGlAXcWvVv4Yy3NUJs-t-apxNdtyYI-uPH1g,1568
824
828
  datahub/metadata/schemas/PlatformResourceInfo.avsc,sha256=k67bOPnoOwn1-3TldvWZqPrCwedgt2QNGhqmeo0jAys,4146
825
829
  datahub/metadata/schemas/PlatformResourceKey.avsc,sha256=XLRvsdUD73r2GaKidYYC0-FK-naclWM1FBw0L_28xaI,1687
826
- datahub/metadata/schemas/PostInfo.avsc,sha256=tPD_4SWGst3oWbIAPKHYSpC_K6_gKNaYDTj1EqLitJQ,7208
830
+ datahub/metadata/schemas/PostInfo.avsc,sha256=CxL1Z-6jWO-xQ7mei5dwAFe_U7sfkQrkSBP5Aab98lU,7884
827
831
  datahub/metadata/schemas/PostKey.avsc,sha256=ITlTYSqVPbmiE-6EmwNN50VZRrZGnzjj8551Gs0eOHY,457
828
832
  datahub/metadata/schemas/QuantitativeAnalyses.avsc,sha256=XrGYVoGUh7ZRjAbMORSiqF5Zl4a0IbYDPiQuSJhABZM,960
829
833
  datahub/metadata/schemas/QueryKey.avsc,sha256=VI4oIHvAO7f0lN_7V3QVuBfHcPz31c57XtW6IrlDfxc,518
@@ -835,7 +839,7 @@ datahub/metadata/schemas/RoleMembership.avsc,sha256=Al3LXKRowCiHhgTfwr3a-piID3Ld
835
839
  datahub/metadata/schemas/RoleProperties.avsc,sha256=tDw-WF1uBGIcrk38nOnXs3FCF_YjBhScarJbreQvwjE,3037
836
840
  datahub/metadata/schemas/SchemaFieldAliases.avsc,sha256=El_cxn0KUhMf2LGfMPzcZ6Xtths2wQOaF9fnM1KQmxQ,560
837
841
  datahub/metadata/schemas/SchemaFieldInfo.avsc,sha256=Gf9EGqrEf10554hd4Eut7T8ZdOR-9OHgDXVRhFo311o,800
838
- datahub/metadata/schemas/SchemaFieldKey.avsc,sha256=ClAugan-eR71rp38YJklEglca8EW5MMAxEQLoSX-L6Y,946
842
+ datahub/metadata/schemas/SchemaFieldKey.avsc,sha256=TK_RiRbTm9965OxlQH28-x-KH0lpavW3zMlXx5ChnJ0,967
839
843
  datahub/metadata/schemas/SchemaMetadata.avsc,sha256=1rUs2G3tpG02FNxRGMlRaW0FDeTEwS36Rmh2Obz4h40,40849
840
844
  datahub/metadata/schemas/Siblings.avsc,sha256=NTktntlHuA1InH3TgrspWlFBntYlqmp3erUd-JFMsps,842
841
845
  datahub/metadata/schemas/SourceCode.avsc,sha256=tUgo2rczO5x1fxw3fYNWQj-51vRNmNIj38b1wayA0aQ,1370
@@ -993,9 +997,9 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
993
997
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
994
998
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
995
999
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
996
- acryl_datahub-0.15.0.5rc5.dist-info/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
997
- acryl_datahub-0.15.0.5rc5.dist-info/METADATA,sha256=iUCOkI7iz8GUQSvFsn9nLdzi1GxoElLbxrV96MnC9BM,173382
998
- acryl_datahub-0.15.0.5rc5.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
999
- acryl_datahub-0.15.0.5rc5.dist-info/entry_points.txt,sha256=xnPSPLK3bJGADxe4TDS4wL4u0FT_PGlahDa-ENYdYCQ,9512
1000
- acryl_datahub-0.15.0.5rc5.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1001
- acryl_datahub-0.15.0.5rc5.dist-info/RECORD,,
1000
+ acryl_datahub-0.15.0.5rc7.dist-info/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
1001
+ acryl_datahub-0.15.0.5rc7.dist-info/METADATA,sha256=CTqpaDCQfvjCXNo_ReZIQHhdGq48p6usxoVlxcWzAXY,173703
1002
+ acryl_datahub-0.15.0.5rc7.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
1003
+ acryl_datahub-0.15.0.5rc7.dist-info/entry_points.txt,sha256=U1e5ZwqPX1OaIbvGrwvozcdB8SbzFYXQM7plpdLKKeo,9592
1004
+ acryl_datahub-0.15.0.5rc7.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1005
+ acryl_datahub-0.15.0.5rc7.dist-info/RECORD,,
@@ -34,6 +34,7 @@ clickhouse-usage = datahub.ingestion.source.usage.clickhouse_usage:ClickHouseUsa
34
34
  cockroachdb = datahub.ingestion.source.sql.cockroachdb:CockroachDBSource
35
35
  csv-enricher = datahub.ingestion.source.csv_enricher:CSVEnricherSource
36
36
  datahub = datahub.ingestion.source.datahub.datahub_source:DataHubSource
37
+ datahub-apply = datahub.ingestion.source.apply.datahub_apply:DataHubApplySource
37
38
  datahub-business-glossary = datahub.ingestion.source.metadata.business_glossary:BusinessGlossaryFileSource
38
39
  datahub-gc = datahub.ingestion.source.gc.datahub_gc:DataHubGcSource
39
40
  datahub-lineage-file = datahub.ingestion.source.metadata.lineage:LineageFileSource
datahub/_version.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Published at https://pypi.org/project/acryl-datahub/.
2
2
  __package_name__ = "acryl-datahub"
3
- __version__ = "0.15.0.5rc5"
3
+ __version__ = "0.15.0.5rc7"
4
4
 
5
5
 
6
6
  def is_dev_mode() -> bool:
@@ -5,7 +5,7 @@ from typing import Callable, Dict, Iterable, List, Optional, Union, cast
5
5
  from datahub.api.entities.datajob import DataFlow, DataJob
6
6
  from datahub.emitter.generic_emitter import Emitter
7
7
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
8
- from datahub.emitter.mcp_builder import DatahubKey
8
+ from datahub.emitter.mcp_builder import ContainerKey, DatahubKey
9
9
  from datahub.metadata.com.linkedin.pegasus2avro.dataprocess import (
10
10
  DataProcessInstanceInput,
11
11
  DataProcessInstanceOutput,
@@ -15,11 +15,15 @@ from datahub.metadata.com.linkedin.pegasus2avro.dataprocess import (
15
15
  )
16
16
  from datahub.metadata.schema_classes import (
17
17
  AuditStampClass,
18
+ ContainerClass,
19
+ DataPlatformInstanceClass,
18
20
  DataProcessInstanceRunEventClass,
19
21
  DataProcessInstanceRunResultClass,
20
22
  DataProcessRunStatusClass,
21
23
  DataProcessTypeClass,
24
+ SubTypesClass,
22
25
  )
26
+ from datahub.metadata.urns import DataPlatformInstanceUrn, DataPlatformUrn
23
27
  from datahub.utilities.str_enum import StrEnum
24
28
  from datahub.utilities.urns.data_flow_urn import DataFlowUrn
25
29
  from datahub.utilities.urns.data_job_urn import DataJobUrn
@@ -42,7 +46,7 @@ class InstanceRunResult(StrEnum):
42
46
 
43
47
  @dataclass
44
48
  class DataProcessInstance:
45
- """This is a DataProcessInstance class which represent an instance of a DataFlow or DataJob.
49
+ """This is a DataProcessInstance class which represents an instance of a DataFlow, DataJob, or a standalone process within a Container.
46
50
 
47
51
  Args:
48
52
  id: The id of the dataprocess instance execution.
@@ -71,6 +75,10 @@ class DataProcessInstance:
71
75
  _template_object: Optional[Union[DataJob, DataFlow]] = field(
72
76
  init=False, default=None, repr=False
73
77
  )
78
+ data_platform_instance: Optional[str] = None
79
+ subtype: Optional[str] = None
80
+ container_urn: Optional[str] = None
81
+ _platform: Optional[str] = field(init=False, repr=False, default=None)
74
82
 
75
83
  def __post_init__(self):
76
84
  self.urn = DataProcessInstanceUrn(
@@ -80,6 +88,28 @@ class DataProcessInstance:
80
88
  id=self.id,
81
89
  ).guid()
82
90
  )
91
+ self._platform = self.orchestrator
92
+
93
+ try:
94
+ # We first try to create from string assuming its an urn
95
+ self._platform = str(DataPlatformUrn.from_string(self._platform))
96
+ except Exception:
97
+ # If it fails, we assume its an id
98
+ self._platform = str(DataPlatformUrn(self._platform))
99
+
100
+ if self.data_platform_instance is not None:
101
+ try:
102
+ # We first try to create from string assuming its an urn
103
+ self.data_platform_instance = str(
104
+ DataPlatformInstanceUrn.from_string(self.data_platform_instance)
105
+ )
106
+ except Exception:
107
+ # If it fails, we assume its an id
108
+ self.data_platform_instance = str(
109
+ DataPlatformInstanceUrn(
110
+ platform=self._platform, instance=self.data_platform_instance
111
+ )
112
+ )
83
113
 
84
114
  def start_event_mcp(
85
115
  self, start_timestamp_millis: int, attempt: Optional[int] = None
@@ -269,6 +299,29 @@ class DataProcessInstance:
269
299
  )
270
300
  yield mcp
271
301
 
302
+ assert self._platform
303
+ if self.data_platform_instance:
304
+ mcp = MetadataChangeProposalWrapper(
305
+ entityUrn=str(self.urn),
306
+ aspect=DataPlatformInstanceClass(
307
+ platform=self._platform, instance=self.data_platform_instance
308
+ ),
309
+ )
310
+ yield mcp
311
+
312
+ if self.subtype:
313
+ mcp = MetadataChangeProposalWrapper(
314
+ entityUrn=str(self.urn), aspect=SubTypesClass(typeNames=[self.subtype])
315
+ )
316
+ yield mcp
317
+
318
+ if self.container_urn:
319
+ mcp = MetadataChangeProposalWrapper(
320
+ entityUrn=str(self.urn),
321
+ aspect=ContainerClass(container=self.container_urn),
322
+ )
323
+ yield mcp
324
+
272
325
  yield from self.generate_inlet_outlet_mcp(materialize_iolets=materialize_iolets)
273
326
 
274
327
  @staticmethod
@@ -309,13 +362,20 @@ class DataProcessInstance:
309
362
  clone_outlets: bool = False,
310
363
  ) -> "DataProcessInstance":
311
364
  """
312
- Generates DataProcessInstance from a DataJob
365
+ Generates a DataProcessInstance from a given DataJob.
313
366
 
314
- :param datajob: (DataJob) the datajob from generate the DataProcessInstance
315
- :param id: (str) the id for the DataProcessInstance
316
- :param clone_inlets: (bool) whether to clone datajob's inlets
317
- :param clone_outlets: (bool) whether to clone datajob's outlets
318
- :return: DataProcessInstance
367
+ This method creates a DataProcessInstance object using the provided DataJob
368
+ and assigns it a unique identifier. Optionally, it can clone the inlets and
369
+ outlets from the DataJob to the DataProcessInstance.
370
+
371
+ Args:
372
+ datajob (DataJob): The DataJob instance from which to generate the DataProcessInstance.
373
+ id (str): The unique identifier for the DataProcessInstance.
374
+ clone_inlets (bool, optional): If True, clones the inlets from the DataJob to the DataProcessInstance. Defaults to False.
375
+ clone_outlets (bool, optional): If True, clones the outlets from the DataJob to the DataProcessInstance. Defaults to False.
376
+
377
+ Returns:
378
+ DataProcessInstance: The generated DataProcessInstance object.
319
379
  """
320
380
  dpi: DataProcessInstance = DataProcessInstance(
321
381
  orchestrator=datajob.flow_urn.orchestrator,
@@ -332,14 +392,47 @@ class DataProcessInstance:
332
392
  return dpi
333
393
 
334
394
  @staticmethod
335
- def from_dataflow(dataflow: DataFlow, id: str) -> "DataProcessInstance":
395
+ def from_container(
396
+ container_key: ContainerKey,
397
+ id: str,
398
+ ) -> "DataProcessInstance":
336
399
  """
337
- Generates DataProcessInstance from a DataFlow
400
+ Create a DataProcessInstance that is located within a Container.
401
+ Use this method when you need to represent a DataProcessInstance that
402
+ is not an instance of a DataJob or a DataFlow.
403
+ e.g. If recording an ad-hoc training run that is just associated with an Experiment.
338
404
 
339
- :param dataflow: (DataFlow) the DataFlow from generate the DataProcessInstance
405
+ :param container_key: (ContainerKey) the container key to generate the DataProcessInstance
340
406
  :param id: (str) the id for the DataProcessInstance
341
407
  :return: DataProcessInstance
342
408
  """
409
+ dpi: DataProcessInstance = DataProcessInstance(
410
+ id=id,
411
+ orchestrator=DataPlatformUrn.from_string(
412
+ container_key.platform
413
+ ).platform_name,
414
+ template_urn=None,
415
+ container_urn=container_key.as_urn(),
416
+ )
417
+
418
+ return dpi
419
+
420
+ @staticmethod
421
+ def from_dataflow(dataflow: DataFlow, id: str) -> "DataProcessInstance":
422
+ """
423
+ Creates a DataProcessInstance from a given DataFlow.
424
+
425
+ This method generates a DataProcessInstance object using the provided DataFlow
426
+ and a specified id. The DataProcessInstance will inherit properties from the
427
+ DataFlow such as orchestrator, environment, and template URN.
428
+
429
+ Args:
430
+ dataflow (DataFlow): The DataFlow object from which to generate the DataProcessInstance.
431
+ id (str): The unique identifier for the DataProcessInstance.
432
+
433
+ Returns:
434
+ DataProcessInstance: The newly created DataProcessInstance object.
435
+ """
343
436
  dpi = DataProcessInstance(
344
437
  id=id,
345
438
  orchestrator=dataflow.orchestrator,
@@ -1,19 +1,8 @@
1
1
  import logging
2
- from typing import Any, List
3
2
 
4
3
  import click
5
- import progressbar
6
4
 
7
- from datahub.emitter.mcp import MetadataChangeProposalWrapper
8
- from datahub.ingestion.graph.client import get_default_graph
9
- from datahub.metadata.schema_classes import (
10
- DomainsClass,
11
- GlossaryTermAssociationClass,
12
- OwnerClass,
13
- OwnershipTypeClass,
14
- TagAssociationClass,
15
- )
16
- from datahub.specific.dataset import DatasetPatchBuilder
5
+ from datahub.ingestion.source.apply.datahub_apply import apply_association_to_container
17
6
 
18
7
  logger = logging.getLogger(__name__)
19
8
 
@@ -24,58 +13,6 @@ def container() -> None:
24
13
  pass
25
14
 
26
15
 
27
- def apply_association_to_container(
28
- container_urn: str,
29
- association_urn: str,
30
- association_type: str,
31
- ) -> None:
32
- """
33
- Common function to add either tags, terms, domains, or owners to child datasets (for now).
34
-
35
- Args:
36
- container_urn: The URN of the container
37
- association_urn: The URN of the tag, term, or user to apply
38
- association_type: One of 'tag', 'term', 'domain' or 'owner'
39
- """
40
- urns: List[str] = []
41
- graph = get_default_graph()
42
- logger.info(f"Using {graph}")
43
- urns.extend(
44
- graph.get_urns_by_filter(
45
- container=container_urn, batch_size=1000, entity_types=["dataset"]
46
- )
47
- )
48
-
49
- all_patches: List[Any] = []
50
- for urn in urns:
51
- builder = DatasetPatchBuilder(urn)
52
- patches: List[Any] = []
53
- if association_type == "tag":
54
- patches = builder.add_tag(TagAssociationClass(association_urn)).build()
55
- elif association_type == "term":
56
- patches = builder.add_term(
57
- GlossaryTermAssociationClass(association_urn)
58
- ).build()
59
- elif association_type == "owner":
60
- patches = builder.add_owner(
61
- OwnerClass(
62
- owner=association_urn,
63
- type=OwnershipTypeClass.TECHNICAL_OWNER,
64
- )
65
- ).build()
66
- elif association_type == "domain":
67
- patches = [
68
- MetadataChangeProposalWrapper(
69
- entityUrn=urn,
70
- aspect=DomainsClass(domains=[association_urn]),
71
- )
72
- ]
73
- all_patches.extend(patches)
74
- mcps_iter = progressbar.progressbar(all_patches, redirect_stdout=True)
75
- for mcp in mcps_iter:
76
- graph.emit(mcp)
77
-
78
-
79
16
  @container.command()
80
17
  @click.option("--container-urn", required=True, type=str)
81
18
  @click.option("--tag-urn", required=True, type=str)
@@ -0,0 +1,36 @@
1
+ from typing import Callable, List, Optional, Union
2
+
3
+ from datahub.emitter.generic_emitter import Emitter
4
+ from datahub.emitter.mcp import MetadataChangeProposalWrapper
5
+ from datahub.metadata.com.linkedin.pegasus2avro.mxe import (
6
+ MetadataChangeEvent,
7
+ MetadataChangeProposal,
8
+ )
9
+
10
+
11
+ # Experimental composite emitter that allows multiple emitters to be used in a single ingestion job
12
+ class CompositeEmitter(Emitter):
13
+ def __init__(self, emitters: List[Emitter]) -> None:
14
+ self.emitters = emitters
15
+
16
+ def emit(
17
+ self,
18
+ item: Union[
19
+ MetadataChangeEvent,
20
+ MetadataChangeProposal,
21
+ MetadataChangeProposalWrapper,
22
+ ],
23
+ callback: Optional[Callable[[Exception, str], None]] = None,
24
+ ) -> None:
25
+ callback_called = False
26
+ for emitter in self.emitters:
27
+ if not callback_called:
28
+ # We want to ensure that the callback is only called once and we tie it to the first emitter
29
+ emitter.emit(item, callback)
30
+ callback_called = True
31
+ else:
32
+ emitter.emit(item)
33
+
34
+ def flush(self) -> None:
35
+ for emitter in self.emitters:
36
+ emitter.flush()
File without changes