acryl-datahub 0.15.0.5rc5__py3-none-any.whl → 0.15.0.5rc7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0.5rc5.dist-info → acryl_datahub-0.15.0.5rc7.dist-info}/METADATA +2423 -2416
- {acryl_datahub-0.15.0.5rc5.dist-info → acryl_datahub-0.15.0.5rc7.dist-info}/RECORD +25 -21
- {acryl_datahub-0.15.0.5rc5.dist-info → acryl_datahub-0.15.0.5rc7.dist-info}/entry_points.txt +1 -0
- datahub/_version.py +1 -1
- datahub/api/entities/dataprocess/dataprocess_instance.py +104 -11
- datahub/cli/container_cli.py +1 -64
- datahub/emitter/composite_emitter.py +36 -0
- datahub/ingestion/source/apply/__init__.py +0 -0
- datahub/ingestion/source/apply/datahub_apply.py +223 -0
- datahub/ingestion/source/dbt/dbt_core.py +1 -1
- datahub/ingestion/source/sql/sql_config.py +0 -10
- datahub/ingestion/source_config/operation_config.py +9 -0
- datahub/metadata/_schema_classes.py +144 -4
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +6 -0
- datahub/metadata/schema.avsc +115 -5
- datahub/metadata/schemas/AssertionInfo.avsc +2 -2
- datahub/metadata/schemas/CorpUserSettings.avsc +9 -0
- datahub/metadata/schemas/Deprecation.avsc +12 -0
- datahub/metadata/schemas/DisplayProperties.avsc +62 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +12 -0
- datahub/metadata/schemas/PostInfo.avsc +28 -2
- datahub/metadata/schemas/SchemaFieldKey.avsc +2 -1
- {acryl_datahub-0.15.0.5rc5.dist-info → acryl_datahub-0.15.0.5rc7.dist-info}/LICENSE +0 -0
- {acryl_datahub-0.15.0.5rc5.dist-info → acryl_datahub-0.15.0.5rc7.dist-info}/WHEEL +0 -0
- {acryl_datahub-0.15.0.5rc5.dist-info → acryl_datahub-0.15.0.5rc7.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
|
|
2
2
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
3
|
-
datahub/_version.py,sha256=
|
|
3
|
+
datahub/_version.py,sha256=glwuUeYA29D6Lba9dmX7Q88WndlOTabakvHafVUtwjM,324
|
|
4
4
|
datahub/entrypoints.py,sha256=osv2ailvuW-HHlAE0fOtyblJI1X7HInZutd9DC66jqQ,8022
|
|
5
5
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
6
|
datahub/_codegen/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -42,7 +42,7 @@ datahub/api/entities/datajob/__init__.py,sha256=suzCs4cLphQ64oDG7meXsJVp6ya9_Glz
|
|
|
42
42
|
datahub/api/entities/datajob/dataflow.py,sha256=Ajs6XP7-WMK4OwSm0JdFuGwggZhx-l5S-fwor68BBYk,6954
|
|
43
43
|
datahub/api/entities/datajob/datajob.py,sha256=NxaRhpr135Pq1XAxcATEXfXYennT92GL_b7NROUxI98,7202
|
|
44
44
|
datahub/api/entities/dataprocess/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
45
|
-
datahub/api/entities/dataprocess/dataprocess_instance.py,sha256=
|
|
45
|
+
datahub/api/entities/dataprocess/dataprocess_instance.py,sha256=PIiGXTQLPWl_1rY43N1H-ygDXyzQrSQl0FOLhZ9-YX4,18853
|
|
46
46
|
datahub/api/entities/dataproduct/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
47
47
|
datahub/api/entities/dataproduct/dataproduct.py,sha256=148TmItxDDyGNzfZdL8aDreSEtyAw79IN8N8oSmNOPE,21461
|
|
48
48
|
datahub/api/entities/dataset/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -62,7 +62,7 @@ datahub/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
62
62
|
datahub/cli/check_cli.py,sha256=ajrWVMAHYbgvYi4OFitFXx7Y6oigvZFgIeUiKV9ECik,12859
|
|
63
63
|
datahub/cli/cli_utils.py,sha256=2uvPv6WqxbRdH7UteHwhRash4E0ncU5P6XebrFLeECo,13584
|
|
64
64
|
datahub/cli/config_utils.py,sha256=yuXw7RzpRY5x_-MAoqWbv46qUkIeRNAJL4_OeJpYdBE,4879
|
|
65
|
-
datahub/cli/container_cli.py,sha256=
|
|
65
|
+
datahub/cli/container_cli.py,sha256=uDOwewGEPYHQt-ppYEb8ESXhZjPNIZG0Rt3cm2FzPqc,1569
|
|
66
66
|
datahub/cli/delete_cli.py,sha256=oQ4Yy6hxZHcl67MYJiQumLs_8QmFEj7SPZFzxFXvDk8,23481
|
|
67
67
|
datahub/cli/docker_check.py,sha256=rED4wHXqxcQ_qNFyIgFEZ85BHT9ZTE5YC-oUKqbRqi0,9432
|
|
68
68
|
datahub/cli/docker_cli.py,sha256=w9ZQMRVlHwfJI2XDe7mO0lwnT7-dZoK6tPadSMgwEM8,36493
|
|
@@ -113,6 +113,7 @@ datahub/configuration/validate_multiline_string.py,sha256=l9PF6_EAC_1lWxU_RWrvPB
|
|
|
113
113
|
datahub/configuration/yaml.py,sha256=dLmjCalPOjgdc7mmJxtlP7uOrIHZiAWxD1gwAFOdtUU,308
|
|
114
114
|
datahub/emitter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
115
115
|
datahub/emitter/aspect.py,sha256=ef0DVycqg-tRPurkYjc-5zknmLP2p2Y2RxP55WkvAEc,480
|
|
116
|
+
datahub/emitter/composite_emitter.py,sha256=ZU-IdlAXKGPtmyT0JJgYC09vRn-TmeNaA6VP8V0fioM,1212
|
|
116
117
|
datahub/emitter/enum_helpers.py,sha256=QBOEUu_hDCvyL_v4ayNQV8XwJbf5zKyu0Xat0mI1Kgo,376
|
|
117
118
|
datahub/emitter/generic_emitter.py,sha256=i37ZFm9VR_tmiZm9kIypEkQEB_cLKbzj_tJvViN-fm8,828
|
|
118
119
|
datahub/emitter/kafka_emitter.py,sha256=Uix1W1WaXF8VqUTUfzdRZKca2XrR1w50Anx2LVkROlc,5822
|
|
@@ -217,6 +218,8 @@ datahub/ingestion/source/abs/datalake_profiler_config.py,sha256=Rkf64evufyVGPiE4
|
|
|
217
218
|
datahub/ingestion/source/abs/profiling.py,sha256=yKNCKpr6w7qpCH-baeSkNE9VjkN6eBot_weD-2_Jxzk,17579
|
|
218
219
|
datahub/ingestion/source/abs/report.py,sha256=fzkTdTewYlWrTk4f2Cyl-e8RV4qw9wEVtm0cdKD-Xgw,542
|
|
219
220
|
datahub/ingestion/source/abs/source.py,sha256=cuMezUzr-Smp5tok2ceYor5I5jp52NDMjfeN8kfIbvg,24816
|
|
221
|
+
datahub/ingestion/source/apply/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
222
|
+
datahub/ingestion/source/apply/datahub_apply.py,sha256=NGhbszi6ObfJoh3YPGjcVWHJsbNboVwKUgnrCSt8HJU,7629
|
|
220
223
|
datahub/ingestion/source/aws/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
221
224
|
datahub/ingestion/source/aws/aws_common.py,sha256=DfdQgkJ_s2isFx8WvqKTlAcBk4KE8SgfpmA5BgC3fgY,17716
|
|
222
225
|
datahub/ingestion/source/aws/glue.py,sha256=DwROr923M01QnvImUbMoHS6TTTT9kBz2tEmQ3Sv4EoY,58019
|
|
@@ -277,7 +280,7 @@ datahub/ingestion/source/datahub/state.py,sha256=PZoT7sSK1wadVf5vN6phrgr7I6LL7eP
|
|
|
277
280
|
datahub/ingestion/source/dbt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
278
281
|
datahub/ingestion/source/dbt/dbt_cloud.py,sha256=tNpSHbPlLq-oFGbJsdkWY9kIaWmpjcZLWhj1CSewGGY,17981
|
|
279
282
|
datahub/ingestion/source/dbt/dbt_common.py,sha256=y4VINaQQ-WhEf-rICGLGi1U88nKmRdVQPmh88OJROWg,80536
|
|
280
|
-
datahub/ingestion/source/dbt/dbt_core.py,sha256=
|
|
283
|
+
datahub/ingestion/source/dbt/dbt_core.py,sha256=SHtZg8ZAtmUwegpltIU8MhxBYuB_-oPOY4iBXc4SQIY,22713
|
|
281
284
|
datahub/ingestion/source/dbt/dbt_tests.py,sha256=Q5KISW_AOOWqyxmyOgJQquyX7xlfOqKu9WhrHoLKC0M,9881
|
|
282
285
|
datahub/ingestion/source/delta_lake/__init__.py,sha256=u5oqUeus81ONAtdl6o9Puw33ODSMun-0wLIamrZ4BUM,71
|
|
283
286
|
datahub/ingestion/source/delta_lake/config.py,sha256=bVBwGjCPiXyjbCLQsamt4hAsKJMtMuxupKjwZEwtU78,3374
|
|
@@ -461,7 +464,7 @@ datahub/ingestion/source/sql/oracle.py,sha256=tVP3AiZO97psM8O8UzBb9C7__s8y4fkyQb
|
|
|
461
464
|
datahub/ingestion/source/sql/postgres.py,sha256=uC1kYEI8VdxiZ1Y9IxMWzwmg11wtMqYN0e2fkok1rxo,11972
|
|
462
465
|
datahub/ingestion/source/sql/presto.py,sha256=PB-CS5MX2dSRFRHjlxfkLHGXLZXFNCsVAAyRBtY6HMg,3611
|
|
463
466
|
datahub/ingestion/source/sql/sql_common.py,sha256=E1QmJ35ZuDLiZj-s1niHvIdNMyEsZrwvq_Wuy2EoYMQ,48586
|
|
464
|
-
datahub/ingestion/source/sql/sql_config.py,sha256=
|
|
467
|
+
datahub/ingestion/source/sql/sql_config.py,sha256=CBXkCpzBAGrWAXJFte_i5TmpzcsMJwEjGHpfzd6vAow,8964
|
|
465
468
|
datahub/ingestion/source/sql/sql_generic.py,sha256=9AERvkK8kdJUeDOzCYJDb93xdv6Z4DGho0NfeHj5Uyg,2740
|
|
466
469
|
datahub/ingestion/source/sql/sql_generic_profiler.py,sha256=8cDmNpT_UXzYmP8-RWoDCnewmVGCj2cYCzH9_gSsF3o,11590
|
|
467
470
|
datahub/ingestion/source/sql/sql_report.py,sha256=gw-OPHSExp_b6DRjvwqE1U6BpkwekxGrsvNMGYSGDio,2671
|
|
@@ -516,7 +519,7 @@ datahub/ingestion/source/usage/starburst_trino_usage.py,sha256=O3EDEZsXPNdsKGD-j
|
|
|
516
519
|
datahub/ingestion/source/usage/usage_common.py,sha256=YGszLjmESiUXnpcPfnyQHtoM57HyWsIiLOQd5_sxECg,12238
|
|
517
520
|
datahub/ingestion/source_config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
518
521
|
datahub/ingestion/source_config/csv_enricher.py,sha256=IROxxfFJA56dHkmmbjjhb7h1pZSi33tzW9sQb7ZEgac,1733
|
|
519
|
-
datahub/ingestion/source_config/operation_config.py,sha256=
|
|
522
|
+
datahub/ingestion/source_config/operation_config.py,sha256=hxF2RM0jk0HUPXYiliMniXBC-wz-ZPcs90ZGLfHT8rE,3924
|
|
520
523
|
datahub/ingestion/source_config/pulsar.py,sha256=sklDkh62CrWV-i7Ifh6R3T3smYVso6gyRJG8HVc6RdA,5533
|
|
521
524
|
datahub/ingestion/source_report/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
522
525
|
datahub/ingestion/source_report/ingestion_stage.py,sha256=SU_FKFZhShZATLcFr735i_hWpdqNGdAWoZxh22p3P1k,1787
|
|
@@ -568,8 +571,8 @@ datahub/lite/lite_registry.py,sha256=bpH0kasP-LtwwUFNA2QsOIehfekAYfJtN-AkQLmSWnw
|
|
|
568
571
|
datahub/lite/lite_server.py,sha256=p9Oa2nNs65mqcssSIVOr7VOzWqfVstz6ZQEdT4f82S0,1949
|
|
569
572
|
datahub/lite/lite_util.py,sha256=pgBpT3vTO1YCQ2njZRNyicSkHYeEmQCt41BaXU8WvMo,4503
|
|
570
573
|
datahub/metadata/__init__.py,sha256=AjhXPjI6cnpdcrBRrE5gOWo15vv2TTl2ctU4UAnUN7A,238
|
|
571
|
-
datahub/metadata/_schema_classes.py,sha256=
|
|
572
|
-
datahub/metadata/schema.avsc,sha256=
|
|
574
|
+
datahub/metadata/_schema_classes.py,sha256=qfhPuX_aX16gaIzfsvAPE4ZxXtU0xI-cLoStOqKKE-c,979680
|
|
575
|
+
datahub/metadata/schema.avsc,sha256=ogTZxGz8yS6daHcxFzNpfnNNvwXtegrGXj4VgQdsEO4,643954
|
|
573
576
|
datahub/metadata/schema_classes.py,sha256=X5Jl5EaSxyHdXOQv14pJ5WkQALun4MRpJ4q12wVFE18,1299
|
|
574
577
|
datahub/metadata/urns.py,sha256=nfrCTExR-k2P9w272WVtWSN3xW1VUJngPwP3xnvULjU,1217
|
|
575
578
|
datahub/metadata/_urns/__init__.py,sha256=cOF3GHMDgPhmbLKbN02NPpuLGHSu0qNgQyBRv08eqF0,243
|
|
@@ -583,7 +586,7 @@ datahub/metadata/com/linkedin/pegasus2avro/access/token/__init__.py,sha256=P9M7N
|
|
|
583
586
|
datahub/metadata/com/linkedin/pegasus2avro/assertion/__init__.py,sha256=PgK5O-6pVRaEcvmwXAsSkwRLe8NjGiLH8AVBXeArqK8,5751
|
|
584
587
|
datahub/metadata/com/linkedin/pegasus2avro/businessattribute/__init__.py,sha256=N8kO-eUi0_Rt7weizIExxlnJ2_kZRtPrZLWCC1xtDMA,653
|
|
585
588
|
datahub/metadata/com/linkedin/pegasus2avro/chart/__init__.py,sha256=RNyyHLBNp_fxgFcBOLWO2UsXR1ofD_JczcBdPEQSusg,848
|
|
586
|
-
datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py,sha256=
|
|
589
|
+
datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py,sha256=D5rQ4RNxPzVaZOedwUup9-LMy1pIBhQxL7EQ7jaxvaM,5885
|
|
587
590
|
datahub/metadata/com/linkedin/pegasus2avro/common/fieldtransformer/__init__.py,sha256=FN63vLiB3FCmIRqBjTA-0Xt7M6i7h5NhaVzbA1ysv18,396
|
|
588
591
|
datahub/metadata/com/linkedin/pegasus2avro/connection/__init__.py,sha256=qRtw-dB14pzVzgQ0pDK8kyBplNdpRxVKNj4D70e_FqI,564
|
|
589
592
|
datahub/metadata/com/linkedin/pegasus2avro/container/__init__.py,sha256=3yWt36KqDKFhRc9pzvt0AMnbMTlhKurGvT3BUvc25QU,510
|
|
@@ -648,7 +651,7 @@ datahub/metadata/com/linkedin/pegasus2avro/view/__init__.py,sha256=-Le-jOqUJKv3p
|
|
|
648
651
|
datahub/metadata/schemas/Access.avsc,sha256=gdEfWJLkvjIz-jzlceK4Dl5pBDdCHG423Ba_EYGQgUk,1562
|
|
649
652
|
datahub/metadata/schemas/Actors.avsc,sha256=M76L2_Dlp7VyhVtu9__jhnh8rBNvNobtNJUfvl7bcPE,1188
|
|
650
653
|
datahub/metadata/schemas/AssertionActions.avsc,sha256=zrvXzX2Nv_dmK6I3ZXCVWQ1bVs1q01Gl9sDRJA8oSDU,1618
|
|
651
|
-
datahub/metadata/schemas/AssertionInfo.avsc,sha256=
|
|
654
|
+
datahub/metadata/schemas/AssertionInfo.avsc,sha256=ElESZKMXPjp_ay1BR8jWrY8jD4dehY2p5JgUvBI6xhM,125123
|
|
652
655
|
datahub/metadata/schemas/AssertionKey.avsc,sha256=EjNaTyzGxtWzTsZd87P2ZSaGX5dn8Y7HGBdqvlQVrFI,638
|
|
653
656
|
datahub/metadata/schemas/AssertionRunEvent.avsc,sha256=FUyV73bUliBC-a_XFUlfgh75o99-lu1fl36b2q8Pqx4,12886
|
|
654
657
|
datahub/metadata/schemas/BrowsePaths.avsc,sha256=NR_4dKuJMk1X2RB4DLkHVSqyMQc4PvT7eR0n6lM5aOM,654
|
|
@@ -671,7 +674,7 @@ datahub/metadata/schemas/CorpUserCredentials.avsc,sha256=S7FkV9K_DGxhb4GFYbM5_lP
|
|
|
671
674
|
datahub/metadata/schemas/CorpUserEditableInfo.avsc,sha256=VazSsT1oQZNHeG8rAXPSKV79W6ZcCgUL1J7yKJUTDvU,3380
|
|
672
675
|
datahub/metadata/schemas/CorpUserInfo.avsc,sha256=jcfg1NI5DeJsmFEIYy6ZI65mDDwJRfdTu3WeltfeU1A,3686
|
|
673
676
|
datahub/metadata/schemas/CorpUserKey.avsc,sha256=-Spvvcss0sJoADygdChWa99rYiMMRwEx77GvW-jLTN0,984
|
|
674
|
-
datahub/metadata/schemas/CorpUserSettings.avsc,sha256=
|
|
677
|
+
datahub/metadata/schemas/CorpUserSettings.avsc,sha256=fS2HUD0L9_rsPyqo0DRxibzPi8_IkkWTY6Zuqob1sPg,2097
|
|
675
678
|
datahub/metadata/schemas/CorpUserStatus.avsc,sha256=yqojAXEQ9CjRhY58RPyTUxzmFbHSANGGaMMbqiYZZIE,2538
|
|
676
679
|
datahub/metadata/schemas/Cost.avsc,sha256=o4kYZSss2uEwJ6gCA9fhBUoyD5xUqcSxz78vkIXXzGQ,1494
|
|
677
680
|
datahub/metadata/schemas/DashboardInfo.avsc,sha256=kuRyOSQWRvV9ydhozTWKWrNfbD8ZNeWawGGR4xSHhaI,12917
|
|
@@ -735,7 +738,8 @@ datahub/metadata/schemas/DatasetProfile.avsc,sha256=3ZCU9JD6l2razACp0AY6LLMgnkMT
|
|
|
735
738
|
datahub/metadata/schemas/DatasetProperties.avsc,sha256=DFJn75feqaoQk84zin_o_lqsFFhqkwya5LGC5LLJXbU,4209
|
|
736
739
|
datahub/metadata/schemas/DatasetUpstreamLineage.avsc,sha256=PjAWPbsqwH7FjX2kFDy0dE6ENYOwRynH9vJerWisr2A,5365
|
|
737
740
|
datahub/metadata/schemas/DatasetUsageStatistics.avsc,sha256=JKNy_KlUqr3kt7o1Cu2DiQx3XUG_NQbVFVpeNr4ntCc,6999
|
|
738
|
-
datahub/metadata/schemas/Deprecation.avsc,sha256=
|
|
741
|
+
datahub/metadata/schemas/Deprecation.avsc,sha256=SmbTlMB9fujdMBjYEQkzaU4XJzwM1gD6E8L2zoL1b4Q,1280
|
|
742
|
+
datahub/metadata/schemas/DisplayProperties.avsc,sha256=MTa_g2s0roxNFFggWU8rslUH3UFe3xe11uUXyh0Go_I,1732
|
|
739
743
|
datahub/metadata/schemas/Documentation.avsc,sha256=9vIJG9B08FFrC3y5c1XVaT5U3c-b5sOAc5foUxMnyCs,4836
|
|
740
744
|
datahub/metadata/schemas/DomainKey.avsc,sha256=1_kbsMTsO2ebB3zW7KpB71QfkGGR0mAgpNOKRoWHsJU,649
|
|
741
745
|
datahub/metadata/schemas/DomainProperties.avsc,sha256=6do6wZ9G6gyt1QowQyi1xldqgdTXspb05FaqWpKJ6eM,3843
|
|
@@ -806,7 +810,7 @@ datahub/metadata/schemas/MLModelProperties.avsc,sha256=hDCBHxGe-cmCBeU1k0ANuQlKj
|
|
|
806
810
|
datahub/metadata/schemas/MLPrimaryKeyKey.avsc,sha256=mX4CQcoN3FC_VQDBCkhlmJk4pfQKDrSeuqqCTTXTmq8,1092
|
|
807
811
|
datahub/metadata/schemas/MLPrimaryKeyProperties.avsc,sha256=URIuOpS93RVk8MZVcbZ-dmTwu_cN3KSOKxSR8fm-eTo,6744
|
|
808
812
|
datahub/metadata/schemas/MLTrainingRunProperties.avsc,sha256=WGgj0MuQrGD4UgvyHCJHzTnHja2LlJTOr1gLu8SySj0,4269
|
|
809
|
-
datahub/metadata/schemas/MetadataChangeEvent.avsc,sha256=
|
|
813
|
+
datahub/metadata/schemas/MetadataChangeEvent.avsc,sha256=pbt_zFR9fPRm5ek1cuYa1eMCNs3aD7F6jOruQaee2NY,373224
|
|
810
814
|
datahub/metadata/schemas/MetadataChangeLog.avsc,sha256=mpdodpx25E6M1Gq_7slEcPAm-1Es5xPsoqV60HgO7zg,12167
|
|
811
815
|
datahub/metadata/schemas/MetadataChangeProposal.avsc,sha256=EMfQrYsuHf1p6UvBjoLtfdTHGe-vGNJaCFEHz8hdKU0,9698
|
|
812
816
|
datahub/metadata/schemas/Metrics.avsc,sha256=O7DJGjOwmHbb1x_Zj7AuM_HaHKjBvkfJKfUsX8icXD4,690
|
|
@@ -823,7 +827,7 @@ datahub/metadata/schemas/PartitionsSummary.avsc,sha256=bl6_ElsVS8OowD5HsfQo9ZhfH
|
|
|
823
827
|
datahub/metadata/schemas/PlatformEvent.avsc,sha256=W3BmAMBVGlAXcWvVv4Yy3NUJs-t-apxNdtyYI-uPH1g,1568
|
|
824
828
|
datahub/metadata/schemas/PlatformResourceInfo.avsc,sha256=k67bOPnoOwn1-3TldvWZqPrCwedgt2QNGhqmeo0jAys,4146
|
|
825
829
|
datahub/metadata/schemas/PlatformResourceKey.avsc,sha256=XLRvsdUD73r2GaKidYYC0-FK-naclWM1FBw0L_28xaI,1687
|
|
826
|
-
datahub/metadata/schemas/PostInfo.avsc,sha256=
|
|
830
|
+
datahub/metadata/schemas/PostInfo.avsc,sha256=CxL1Z-6jWO-xQ7mei5dwAFe_U7sfkQrkSBP5Aab98lU,7884
|
|
827
831
|
datahub/metadata/schemas/PostKey.avsc,sha256=ITlTYSqVPbmiE-6EmwNN50VZRrZGnzjj8551Gs0eOHY,457
|
|
828
832
|
datahub/metadata/schemas/QuantitativeAnalyses.avsc,sha256=XrGYVoGUh7ZRjAbMORSiqF5Zl4a0IbYDPiQuSJhABZM,960
|
|
829
833
|
datahub/metadata/schemas/QueryKey.avsc,sha256=VI4oIHvAO7f0lN_7V3QVuBfHcPz31c57XtW6IrlDfxc,518
|
|
@@ -835,7 +839,7 @@ datahub/metadata/schemas/RoleMembership.avsc,sha256=Al3LXKRowCiHhgTfwr3a-piID3Ld
|
|
|
835
839
|
datahub/metadata/schemas/RoleProperties.avsc,sha256=tDw-WF1uBGIcrk38nOnXs3FCF_YjBhScarJbreQvwjE,3037
|
|
836
840
|
datahub/metadata/schemas/SchemaFieldAliases.avsc,sha256=El_cxn0KUhMf2LGfMPzcZ6Xtths2wQOaF9fnM1KQmxQ,560
|
|
837
841
|
datahub/metadata/schemas/SchemaFieldInfo.avsc,sha256=Gf9EGqrEf10554hd4Eut7T8ZdOR-9OHgDXVRhFo311o,800
|
|
838
|
-
datahub/metadata/schemas/SchemaFieldKey.avsc,sha256=
|
|
842
|
+
datahub/metadata/schemas/SchemaFieldKey.avsc,sha256=TK_RiRbTm9965OxlQH28-x-KH0lpavW3zMlXx5ChnJ0,967
|
|
839
843
|
datahub/metadata/schemas/SchemaMetadata.avsc,sha256=1rUs2G3tpG02FNxRGMlRaW0FDeTEwS36Rmh2Obz4h40,40849
|
|
840
844
|
datahub/metadata/schemas/Siblings.avsc,sha256=NTktntlHuA1InH3TgrspWlFBntYlqmp3erUd-JFMsps,842
|
|
841
845
|
datahub/metadata/schemas/SourceCode.avsc,sha256=tUgo2rczO5x1fxw3fYNWQj-51vRNmNIj38b1wayA0aQ,1370
|
|
@@ -993,9 +997,9 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
993
997
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
994
998
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
995
999
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
996
|
-
acryl_datahub-0.15.0.
|
|
997
|
-
acryl_datahub-0.15.0.
|
|
998
|
-
acryl_datahub-0.15.0.
|
|
999
|
-
acryl_datahub-0.15.0.
|
|
1000
|
-
acryl_datahub-0.15.0.
|
|
1001
|
-
acryl_datahub-0.15.0.
|
|
1000
|
+
acryl_datahub-0.15.0.5rc7.dist-info/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
|
|
1001
|
+
acryl_datahub-0.15.0.5rc7.dist-info/METADATA,sha256=CTqpaDCQfvjCXNo_ReZIQHhdGq48p6usxoVlxcWzAXY,173703
|
|
1002
|
+
acryl_datahub-0.15.0.5rc7.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
1003
|
+
acryl_datahub-0.15.0.5rc7.dist-info/entry_points.txt,sha256=U1e5ZwqPX1OaIbvGrwvozcdB8SbzFYXQM7plpdLKKeo,9592
|
|
1004
|
+
acryl_datahub-0.15.0.5rc7.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
1005
|
+
acryl_datahub-0.15.0.5rc7.dist-info/RECORD,,
|
{acryl_datahub-0.15.0.5rc5.dist-info → acryl_datahub-0.15.0.5rc7.dist-info}/entry_points.txt
RENAMED
|
@@ -34,6 +34,7 @@ clickhouse-usage = datahub.ingestion.source.usage.clickhouse_usage:ClickHouseUsa
|
|
|
34
34
|
cockroachdb = datahub.ingestion.source.sql.cockroachdb:CockroachDBSource
|
|
35
35
|
csv-enricher = datahub.ingestion.source.csv_enricher:CSVEnricherSource
|
|
36
36
|
datahub = datahub.ingestion.source.datahub.datahub_source:DataHubSource
|
|
37
|
+
datahub-apply = datahub.ingestion.source.apply.datahub_apply:DataHubApplySource
|
|
37
38
|
datahub-business-glossary = datahub.ingestion.source.metadata.business_glossary:BusinessGlossaryFileSource
|
|
38
39
|
datahub-gc = datahub.ingestion.source.gc.datahub_gc:DataHubGcSource
|
|
39
40
|
datahub-lineage-file = datahub.ingestion.source.metadata.lineage:LineageFileSource
|
datahub/_version.py
CHANGED
|
@@ -5,7 +5,7 @@ from typing import Callable, Dict, Iterable, List, Optional, Union, cast
|
|
|
5
5
|
from datahub.api.entities.datajob import DataFlow, DataJob
|
|
6
6
|
from datahub.emitter.generic_emitter import Emitter
|
|
7
7
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
8
|
-
from datahub.emitter.mcp_builder import DatahubKey
|
|
8
|
+
from datahub.emitter.mcp_builder import ContainerKey, DatahubKey
|
|
9
9
|
from datahub.metadata.com.linkedin.pegasus2avro.dataprocess import (
|
|
10
10
|
DataProcessInstanceInput,
|
|
11
11
|
DataProcessInstanceOutput,
|
|
@@ -15,11 +15,15 @@ from datahub.metadata.com.linkedin.pegasus2avro.dataprocess import (
|
|
|
15
15
|
)
|
|
16
16
|
from datahub.metadata.schema_classes import (
|
|
17
17
|
AuditStampClass,
|
|
18
|
+
ContainerClass,
|
|
19
|
+
DataPlatformInstanceClass,
|
|
18
20
|
DataProcessInstanceRunEventClass,
|
|
19
21
|
DataProcessInstanceRunResultClass,
|
|
20
22
|
DataProcessRunStatusClass,
|
|
21
23
|
DataProcessTypeClass,
|
|
24
|
+
SubTypesClass,
|
|
22
25
|
)
|
|
26
|
+
from datahub.metadata.urns import DataPlatformInstanceUrn, DataPlatformUrn
|
|
23
27
|
from datahub.utilities.str_enum import StrEnum
|
|
24
28
|
from datahub.utilities.urns.data_flow_urn import DataFlowUrn
|
|
25
29
|
from datahub.utilities.urns.data_job_urn import DataJobUrn
|
|
@@ -42,7 +46,7 @@ class InstanceRunResult(StrEnum):
|
|
|
42
46
|
|
|
43
47
|
@dataclass
|
|
44
48
|
class DataProcessInstance:
|
|
45
|
-
"""This is a DataProcessInstance class which
|
|
49
|
+
"""This is a DataProcessInstance class which represents an instance of a DataFlow, DataJob, or a standalone process within a Container.
|
|
46
50
|
|
|
47
51
|
Args:
|
|
48
52
|
id: The id of the dataprocess instance execution.
|
|
@@ -71,6 +75,10 @@ class DataProcessInstance:
|
|
|
71
75
|
_template_object: Optional[Union[DataJob, DataFlow]] = field(
|
|
72
76
|
init=False, default=None, repr=False
|
|
73
77
|
)
|
|
78
|
+
data_platform_instance: Optional[str] = None
|
|
79
|
+
subtype: Optional[str] = None
|
|
80
|
+
container_urn: Optional[str] = None
|
|
81
|
+
_platform: Optional[str] = field(init=False, repr=False, default=None)
|
|
74
82
|
|
|
75
83
|
def __post_init__(self):
|
|
76
84
|
self.urn = DataProcessInstanceUrn(
|
|
@@ -80,6 +88,28 @@ class DataProcessInstance:
|
|
|
80
88
|
id=self.id,
|
|
81
89
|
).guid()
|
|
82
90
|
)
|
|
91
|
+
self._platform = self.orchestrator
|
|
92
|
+
|
|
93
|
+
try:
|
|
94
|
+
# We first try to create from string assuming its an urn
|
|
95
|
+
self._platform = str(DataPlatformUrn.from_string(self._platform))
|
|
96
|
+
except Exception:
|
|
97
|
+
# If it fails, we assume its an id
|
|
98
|
+
self._platform = str(DataPlatformUrn(self._platform))
|
|
99
|
+
|
|
100
|
+
if self.data_platform_instance is not None:
|
|
101
|
+
try:
|
|
102
|
+
# We first try to create from string assuming its an urn
|
|
103
|
+
self.data_platform_instance = str(
|
|
104
|
+
DataPlatformInstanceUrn.from_string(self.data_platform_instance)
|
|
105
|
+
)
|
|
106
|
+
except Exception:
|
|
107
|
+
# If it fails, we assume its an id
|
|
108
|
+
self.data_platform_instance = str(
|
|
109
|
+
DataPlatformInstanceUrn(
|
|
110
|
+
platform=self._platform, instance=self.data_platform_instance
|
|
111
|
+
)
|
|
112
|
+
)
|
|
83
113
|
|
|
84
114
|
def start_event_mcp(
|
|
85
115
|
self, start_timestamp_millis: int, attempt: Optional[int] = None
|
|
@@ -269,6 +299,29 @@ class DataProcessInstance:
|
|
|
269
299
|
)
|
|
270
300
|
yield mcp
|
|
271
301
|
|
|
302
|
+
assert self._platform
|
|
303
|
+
if self.data_platform_instance:
|
|
304
|
+
mcp = MetadataChangeProposalWrapper(
|
|
305
|
+
entityUrn=str(self.urn),
|
|
306
|
+
aspect=DataPlatformInstanceClass(
|
|
307
|
+
platform=self._platform, instance=self.data_platform_instance
|
|
308
|
+
),
|
|
309
|
+
)
|
|
310
|
+
yield mcp
|
|
311
|
+
|
|
312
|
+
if self.subtype:
|
|
313
|
+
mcp = MetadataChangeProposalWrapper(
|
|
314
|
+
entityUrn=str(self.urn), aspect=SubTypesClass(typeNames=[self.subtype])
|
|
315
|
+
)
|
|
316
|
+
yield mcp
|
|
317
|
+
|
|
318
|
+
if self.container_urn:
|
|
319
|
+
mcp = MetadataChangeProposalWrapper(
|
|
320
|
+
entityUrn=str(self.urn),
|
|
321
|
+
aspect=ContainerClass(container=self.container_urn),
|
|
322
|
+
)
|
|
323
|
+
yield mcp
|
|
324
|
+
|
|
272
325
|
yield from self.generate_inlet_outlet_mcp(materialize_iolets=materialize_iolets)
|
|
273
326
|
|
|
274
327
|
@staticmethod
|
|
@@ -309,13 +362,20 @@ class DataProcessInstance:
|
|
|
309
362
|
clone_outlets: bool = False,
|
|
310
363
|
) -> "DataProcessInstance":
|
|
311
364
|
"""
|
|
312
|
-
Generates DataProcessInstance from a DataJob
|
|
365
|
+
Generates a DataProcessInstance from a given DataJob.
|
|
313
366
|
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
:
|
|
367
|
+
This method creates a DataProcessInstance object using the provided DataJob
|
|
368
|
+
and assigns it a unique identifier. Optionally, it can clone the inlets and
|
|
369
|
+
outlets from the DataJob to the DataProcessInstance.
|
|
370
|
+
|
|
371
|
+
Args:
|
|
372
|
+
datajob (DataJob): The DataJob instance from which to generate the DataProcessInstance.
|
|
373
|
+
id (str): The unique identifier for the DataProcessInstance.
|
|
374
|
+
clone_inlets (bool, optional): If True, clones the inlets from the DataJob to the DataProcessInstance. Defaults to False.
|
|
375
|
+
clone_outlets (bool, optional): If True, clones the outlets from the DataJob to the DataProcessInstance. Defaults to False.
|
|
376
|
+
|
|
377
|
+
Returns:
|
|
378
|
+
DataProcessInstance: The generated DataProcessInstance object.
|
|
319
379
|
"""
|
|
320
380
|
dpi: DataProcessInstance = DataProcessInstance(
|
|
321
381
|
orchestrator=datajob.flow_urn.orchestrator,
|
|
@@ -332,14 +392,47 @@ class DataProcessInstance:
|
|
|
332
392
|
return dpi
|
|
333
393
|
|
|
334
394
|
@staticmethod
|
|
335
|
-
def
|
|
395
|
+
def from_container(
|
|
396
|
+
container_key: ContainerKey,
|
|
397
|
+
id: str,
|
|
398
|
+
) -> "DataProcessInstance":
|
|
336
399
|
"""
|
|
337
|
-
|
|
400
|
+
Create a DataProcessInstance that is located within a Container.
|
|
401
|
+
Use this method when you need to represent a DataProcessInstance that
|
|
402
|
+
is not an instance of a DataJob or a DataFlow.
|
|
403
|
+
e.g. If recording an ad-hoc training run that is just associated with an Experiment.
|
|
338
404
|
|
|
339
|
-
:param
|
|
405
|
+
:param container_key: (ContainerKey) the container key to generate the DataProcessInstance
|
|
340
406
|
:param id: (str) the id for the DataProcessInstance
|
|
341
407
|
:return: DataProcessInstance
|
|
342
408
|
"""
|
|
409
|
+
dpi: DataProcessInstance = DataProcessInstance(
|
|
410
|
+
id=id,
|
|
411
|
+
orchestrator=DataPlatformUrn.from_string(
|
|
412
|
+
container_key.platform
|
|
413
|
+
).platform_name,
|
|
414
|
+
template_urn=None,
|
|
415
|
+
container_urn=container_key.as_urn(),
|
|
416
|
+
)
|
|
417
|
+
|
|
418
|
+
return dpi
|
|
419
|
+
|
|
420
|
+
@staticmethod
|
|
421
|
+
def from_dataflow(dataflow: DataFlow, id: str) -> "DataProcessInstance":
|
|
422
|
+
"""
|
|
423
|
+
Creates a DataProcessInstance from a given DataFlow.
|
|
424
|
+
|
|
425
|
+
This method generates a DataProcessInstance object using the provided DataFlow
|
|
426
|
+
and a specified id. The DataProcessInstance will inherit properties from the
|
|
427
|
+
DataFlow such as orchestrator, environment, and template URN.
|
|
428
|
+
|
|
429
|
+
Args:
|
|
430
|
+
dataflow (DataFlow): The DataFlow object from which to generate the DataProcessInstance.
|
|
431
|
+
id (str): The unique identifier for the DataProcessInstance.
|
|
432
|
+
|
|
433
|
+
Returns:
|
|
434
|
+
DataProcessInstance: The newly created DataProcessInstance object.
|
|
435
|
+
"""
|
|
343
436
|
dpi = DataProcessInstance(
|
|
344
437
|
id=id,
|
|
345
438
|
orchestrator=dataflow.orchestrator,
|
datahub/cli/container_cli.py
CHANGED
|
@@ -1,19 +1,8 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from typing import Any, List
|
|
3
2
|
|
|
4
3
|
import click
|
|
5
|
-
import progressbar
|
|
6
4
|
|
|
7
|
-
from datahub.
|
|
8
|
-
from datahub.ingestion.graph.client import get_default_graph
|
|
9
|
-
from datahub.metadata.schema_classes import (
|
|
10
|
-
DomainsClass,
|
|
11
|
-
GlossaryTermAssociationClass,
|
|
12
|
-
OwnerClass,
|
|
13
|
-
OwnershipTypeClass,
|
|
14
|
-
TagAssociationClass,
|
|
15
|
-
)
|
|
16
|
-
from datahub.specific.dataset import DatasetPatchBuilder
|
|
5
|
+
from datahub.ingestion.source.apply.datahub_apply import apply_association_to_container
|
|
17
6
|
|
|
18
7
|
logger = logging.getLogger(__name__)
|
|
19
8
|
|
|
@@ -24,58 +13,6 @@ def container() -> None:
|
|
|
24
13
|
pass
|
|
25
14
|
|
|
26
15
|
|
|
27
|
-
def apply_association_to_container(
|
|
28
|
-
container_urn: str,
|
|
29
|
-
association_urn: str,
|
|
30
|
-
association_type: str,
|
|
31
|
-
) -> None:
|
|
32
|
-
"""
|
|
33
|
-
Common function to add either tags, terms, domains, or owners to child datasets (for now).
|
|
34
|
-
|
|
35
|
-
Args:
|
|
36
|
-
container_urn: The URN of the container
|
|
37
|
-
association_urn: The URN of the tag, term, or user to apply
|
|
38
|
-
association_type: One of 'tag', 'term', 'domain' or 'owner'
|
|
39
|
-
"""
|
|
40
|
-
urns: List[str] = []
|
|
41
|
-
graph = get_default_graph()
|
|
42
|
-
logger.info(f"Using {graph}")
|
|
43
|
-
urns.extend(
|
|
44
|
-
graph.get_urns_by_filter(
|
|
45
|
-
container=container_urn, batch_size=1000, entity_types=["dataset"]
|
|
46
|
-
)
|
|
47
|
-
)
|
|
48
|
-
|
|
49
|
-
all_patches: List[Any] = []
|
|
50
|
-
for urn in urns:
|
|
51
|
-
builder = DatasetPatchBuilder(urn)
|
|
52
|
-
patches: List[Any] = []
|
|
53
|
-
if association_type == "tag":
|
|
54
|
-
patches = builder.add_tag(TagAssociationClass(association_urn)).build()
|
|
55
|
-
elif association_type == "term":
|
|
56
|
-
patches = builder.add_term(
|
|
57
|
-
GlossaryTermAssociationClass(association_urn)
|
|
58
|
-
).build()
|
|
59
|
-
elif association_type == "owner":
|
|
60
|
-
patches = builder.add_owner(
|
|
61
|
-
OwnerClass(
|
|
62
|
-
owner=association_urn,
|
|
63
|
-
type=OwnershipTypeClass.TECHNICAL_OWNER,
|
|
64
|
-
)
|
|
65
|
-
).build()
|
|
66
|
-
elif association_type == "domain":
|
|
67
|
-
patches = [
|
|
68
|
-
MetadataChangeProposalWrapper(
|
|
69
|
-
entityUrn=urn,
|
|
70
|
-
aspect=DomainsClass(domains=[association_urn]),
|
|
71
|
-
)
|
|
72
|
-
]
|
|
73
|
-
all_patches.extend(patches)
|
|
74
|
-
mcps_iter = progressbar.progressbar(all_patches, redirect_stdout=True)
|
|
75
|
-
for mcp in mcps_iter:
|
|
76
|
-
graph.emit(mcp)
|
|
77
|
-
|
|
78
|
-
|
|
79
16
|
@container.command()
|
|
80
17
|
@click.option("--container-urn", required=True, type=str)
|
|
81
18
|
@click.option("--tag-urn", required=True, type=str)
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
from typing import Callable, List, Optional, Union
|
|
2
|
+
|
|
3
|
+
from datahub.emitter.generic_emitter import Emitter
|
|
4
|
+
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
5
|
+
from datahub.metadata.com.linkedin.pegasus2avro.mxe import (
|
|
6
|
+
MetadataChangeEvent,
|
|
7
|
+
MetadataChangeProposal,
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
# Experimental composite emitter that allows multiple emitters to be used in a single ingestion job
|
|
12
|
+
class CompositeEmitter(Emitter):
|
|
13
|
+
def __init__(self, emitters: List[Emitter]) -> None:
|
|
14
|
+
self.emitters = emitters
|
|
15
|
+
|
|
16
|
+
def emit(
|
|
17
|
+
self,
|
|
18
|
+
item: Union[
|
|
19
|
+
MetadataChangeEvent,
|
|
20
|
+
MetadataChangeProposal,
|
|
21
|
+
MetadataChangeProposalWrapper,
|
|
22
|
+
],
|
|
23
|
+
callback: Optional[Callable[[Exception, str], None]] = None,
|
|
24
|
+
) -> None:
|
|
25
|
+
callback_called = False
|
|
26
|
+
for emitter in self.emitters:
|
|
27
|
+
if not callback_called:
|
|
28
|
+
# We want to ensure that the callback is only called once and we tie it to the first emitter
|
|
29
|
+
emitter.emit(item, callback)
|
|
30
|
+
callback_called = True
|
|
31
|
+
else:
|
|
32
|
+
emitter.emit(item)
|
|
33
|
+
|
|
34
|
+
def flush(self) -> None:
|
|
35
|
+
for emitter in self.emitters:
|
|
36
|
+
emitter.flush()
|
|
File without changes
|