acryl-datahub 1.0.0rc4__py3-none-any.whl → 1.0.0rc5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0rc4.dist-info → acryl_datahub-1.0.0rc5.dist-info}/METADATA +2411 -2411
- {acryl_datahub-1.0.0rc4.dist-info → acryl_datahub-1.0.0rc5.dist-info}/RECORD +27 -25
- datahub/_version.py +1 -1
- datahub/emitter/mcp_builder.py +4 -1
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +5 -0
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +7 -4
- datahub/ingestion/source/openapi_parser.py +46 -14
- datahub/metadata/_schema_classes.py +17 -0
- datahub/metadata/schema.avsc +21 -3
- datahub/metadata/schemas/CorpUserInfo.avsc +13 -0
- datahub/metadata/schemas/DataHubIngestionSourceInfo.avsc +8 -3
- datahub/metadata/schemas/MetadataChangeEvent.avsc +13 -0
- datahub/sdk/_attribution.py +4 -0
- datahub/sdk/_entity.py +2 -0
- datahub/sdk/_shared.py +163 -13
- datahub/sdk/_utils.py +35 -0
- datahub/sdk/container.py +20 -4
- datahub/sdk/dataset.py +104 -14
- datahub/sdk/main_client.py +17 -0
- datahub/specific/dataset.py +3 -4
- datahub/sql_parsing/split_statements.py +20 -13
- datahub/utilities/file_backed_collections.py +3 -14
- datahub/utilities/sentinels.py +22 -0
- {acryl_datahub-1.0.0rc4.dist-info → acryl_datahub-1.0.0rc5.dist-info}/LICENSE +0 -0
- {acryl_datahub-1.0.0rc4.dist-info → acryl_datahub-1.0.0rc5.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.0.0rc4.dist-info → acryl_datahub-1.0.0rc5.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.0.0rc4.dist-info → acryl_datahub-1.0.0rc5.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
|
|
2
2
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
3
|
-
datahub/_version.py,sha256=
|
|
3
|
+
datahub/_version.py,sha256=XDZKgHc2jpOiHirNgM_KDhk5UIQ7ZfxWKcRnl8Las0M,321
|
|
4
4
|
datahub/entrypoints.py,sha256=2TYgHhs3sCxJlojIHjqfxzt3_ImPwPzq4vBtsUuMqu4,8885
|
|
5
5
|
datahub/errors.py,sha256=w6h8b27j9XlmPbTwqpu7-wgiTrXlHzcnUOnJ_iOrwzo,520
|
|
6
6
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -121,7 +121,7 @@ datahub/emitter/generic_emitter.py,sha256=i37ZFm9VR_tmiZm9kIypEkQEB_cLKbzj_tJvVi
|
|
|
121
121
|
datahub/emitter/kafka_emitter.py,sha256=Uix1W1WaXF8VqUTUfzdRZKca2XrR1w50Anx2LVkROlc,5822
|
|
122
122
|
datahub/emitter/mce_builder.py,sha256=9wjXG1WmWZUN7-_JdRJ5OcH8IPG0b3TGzxry4yscOR0,16545
|
|
123
123
|
datahub/emitter/mcp.py,sha256=hAAYziDdkwjazQU0DtWMbQWY8wS09ACrKJbqxoWXdgc,9637
|
|
124
|
-
datahub/emitter/mcp_builder.py,sha256=
|
|
124
|
+
datahub/emitter/mcp_builder.py,sha256=Q1bX2BthNvZ7ae71XYF6ICoiN8IOqaAd_h3zOct57Q0,11752
|
|
125
125
|
datahub/emitter/mcp_patch_builder.py,sha256=u7cpW6DkiN7KpLapmMaXgL_FneoN69boxiANbVgMdSI,4564
|
|
126
126
|
datahub/emitter/request_helper.py,sha256=33ORG3S3OVy97_jlWBRn7yUM5XCIkRN6WSdJvN7Ofcg,670
|
|
127
127
|
datahub/emitter/rest_emitter.py,sha256=zPQNTtZsY75gh7MabexNag-M4nATcumka_An0nNI3j0,17889
|
|
@@ -207,7 +207,7 @@ datahub/ingestion/source/mode.py,sha256=IBWpG0w47y1ivyo5oID0lmvwvLQvbNYL73x5EYHb
|
|
|
207
207
|
datahub/ingestion/source/mongodb.py,sha256=2C2Cxn8DXL53IbNiywIuKt8UT_EMcPg9f8su-OPSNGU,21237
|
|
208
208
|
datahub/ingestion/source/nifi.py,sha256=dK__nKtxt7dx9JlrL3abJr_D7oZY4LbCXdMCCdYcCKg,56061
|
|
209
209
|
datahub/ingestion/source/openapi.py,sha256=39ep3etbWh8NBPjTXXwH3mieC5P6bMVAjhvK7UvcTis,17372
|
|
210
|
-
datahub/ingestion/source/openapi_parser.py,sha256=
|
|
210
|
+
datahub/ingestion/source/openapi_parser.py,sha256=YoVVATtuisvFo9qAVXq4ggNbaESHD01N5Ajp_OE-RD8,14882
|
|
211
211
|
datahub/ingestion/source/preset.py,sha256=fByqamRLnXxsfCGdLPzWN_5LJR_s2_G2f_zwSKUc8EA,3981
|
|
212
212
|
datahub/ingestion/source/pulsar.py,sha256=7rTOEqYmeOuRZl5DG8d5OFkb4l9H6-1bETZfa-4DfmI,20163
|
|
213
213
|
datahub/ingestion/source/redash.py,sha256=U0AfnYpZlAPN0peiu7pOpB8MQZ4JOO0yKEYChucMrlY,29915
|
|
@@ -250,8 +250,8 @@ datahub/ingestion/source/bigquery_v2/bigquery_helper.py,sha256=QER3gY8e_k1_eNVj7
|
|
|
250
250
|
datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py,sha256=9_sfX8BE2vt9RjBMyq27UxCxBaSlD5o3L4gQxrwlPvA,4961
|
|
251
251
|
datahub/ingestion/source/bigquery_v2/bigquery_queries.py,sha256=EoHo9twb0_QdX7Nvd1HJC1Yn0rqtrfR52EVk7Hu3XOQ,3296
|
|
252
252
|
datahub/ingestion/source/bigquery_v2/bigquery_report.py,sha256=v7_zkZzymKPmZKWAxnxmvmHC-8TQVGHUT-pBQFNehqc,7962
|
|
253
|
-
datahub/ingestion/source/bigquery_v2/bigquery_schema.py,sha256=
|
|
254
|
-
datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py,sha256=
|
|
253
|
+
datahub/ingestion/source/bigquery_v2/bigquery_schema.py,sha256=gxu-AvNRpGdTjGeY3L-_92DS1FvoDBb5eqWwDEOj4qk,32532
|
|
254
|
+
datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py,sha256=Fq9tAVSvYepwweiZuJB5mbT0Y6EzFOEZWtdL4Zafd4E,50999
|
|
255
255
|
datahub/ingestion/source/bigquery_v2/bigquery_test_connection.py,sha256=cATxwi5IPzj3BldRRAVcLqzSFmmYEPvqa7U0RFJbaAc,7645
|
|
256
256
|
datahub/ingestion/source/bigquery_v2/common.py,sha256=Cxjf1a8ibkL_YRQeS0BqsjlyMgFJpaZ3iq_d7e8T8MQ,4030
|
|
257
257
|
datahub/ingestion/source/bigquery_v2/lineage.py,sha256=Dkig1SEfPxw6zZDeSulUYnqsu4WGCVPXypGPEUVriyU,44907
|
|
@@ -574,8 +574,8 @@ datahub/lite/lite_registry.py,sha256=bpH0kasP-LtwwUFNA2QsOIehfekAYfJtN-AkQLmSWnw
|
|
|
574
574
|
datahub/lite/lite_server.py,sha256=p9Oa2nNs65mqcssSIVOr7VOzWqfVstz6ZQEdT4f82S0,1949
|
|
575
575
|
datahub/lite/lite_util.py,sha256=pgBpT3vTO1YCQ2njZRNyicSkHYeEmQCt41BaXU8WvMo,4503
|
|
576
576
|
datahub/metadata/__init__.py,sha256=AjhXPjI6cnpdcrBRrE5gOWo15vv2TTl2ctU4UAnUN7A,238
|
|
577
|
-
datahub/metadata/_schema_classes.py,sha256=
|
|
578
|
-
datahub/metadata/schema.avsc,sha256=
|
|
577
|
+
datahub/metadata/_schema_classes.py,sha256=HwQSL_dZMZ9u3nc1wRvPoQGOCYqSZnDBZrPPgepiXDQ,985759
|
|
578
|
+
datahub/metadata/schema.avsc,sha256=uk5Dh7BzZ4NeUarAub0HIz8XgD_DPYZRmcJF6Uubv4Q,664807
|
|
579
579
|
datahub/metadata/schema_classes.py,sha256=X5Jl5EaSxyHdXOQv14pJ5WkQALun4MRpJ4q12wVFE18,1299
|
|
580
580
|
datahub/metadata/urns.py,sha256=nfrCTExR-k2P9w272WVtWSN3xW1VUJngPwP3xnvULjU,1217
|
|
581
581
|
datahub/metadata/_urns/__init__.py,sha256=cOF3GHMDgPhmbLKbN02NPpuLGHSu0qNgQyBRv08eqF0,243
|
|
@@ -675,7 +675,7 @@ datahub/metadata/schemas/CorpGroupInfo.avsc,sha256=G83lndR7C6WPfccFg7qFE5Ely5vrd
|
|
|
675
675
|
datahub/metadata/schemas/CorpGroupKey.avsc,sha256=S6hpzy70w5bmE6jg2nP1OSmvZw_yMkq9rc8VpwWweNU,935
|
|
676
676
|
datahub/metadata/schemas/CorpUserCredentials.avsc,sha256=S7FkV9K_DGxhb4GFYbM5_lPvtPsOWKffjSOpfzicl-o,980
|
|
677
677
|
datahub/metadata/schemas/CorpUserEditableInfo.avsc,sha256=VazSsT1oQZNHeG8rAXPSKV79W6ZcCgUL1J7yKJUTDvU,3380
|
|
678
|
-
datahub/metadata/schemas/CorpUserInfo.avsc,sha256=
|
|
678
|
+
datahub/metadata/schemas/CorpUserInfo.avsc,sha256=oObOza-5FLjZyCjj0FN4MNV1DodgTwJSV4APduAggjk,3955
|
|
679
679
|
datahub/metadata/schemas/CorpUserKey.avsc,sha256=-Spvvcss0sJoADygdChWa99rYiMMRwEx77GvW-jLTN0,984
|
|
680
680
|
datahub/metadata/schemas/CorpUserSettings.avsc,sha256=fS2HUD0L9_rsPyqo0DRxibzPi8_IkkWTY6Zuqob1sPg,2097
|
|
681
681
|
datahub/metadata/schemas/CorpUserStatus.avsc,sha256=yqojAXEQ9CjRhY58RPyTUxzmFbHSANGGaMMbqiYZZIE,2538
|
|
@@ -693,7 +693,7 @@ datahub/metadata/schemas/DataHubAccessTokenKey.avsc,sha256=3EspNIxgb_I4WwV0a2o4N
|
|
|
693
693
|
datahub/metadata/schemas/DataHubActionKey.avsc,sha256=bjiKcoyvUPQKaGUi2ICBMJ_ukwnt7dh0szJS4WBZE0A,448
|
|
694
694
|
datahub/metadata/schemas/DataHubConnectionDetails.avsc,sha256=IvZj6OA7HRvy-ZIIn0UbXdJNnyt_oTn16XIe5ZlcqGk,1661
|
|
695
695
|
datahub/metadata/schemas/DataHubConnectionKey.avsc,sha256=VwbamVFoEdp6epz1lJm_UShBl6ksBxoA7jAYuPI5u3M,522
|
|
696
|
-
datahub/metadata/schemas/DataHubIngestionSourceInfo.avsc,sha256=
|
|
696
|
+
datahub/metadata/schemas/DataHubIngestionSourceInfo.avsc,sha256=4wac7sluRIq-0ZjODE5SmuVKuQeW8ajLJNRpqEBRyio,4601
|
|
697
697
|
datahub/metadata/schemas/DataHubIngestionSourceKey.avsc,sha256=CSmoOx_Eqa1-he5dRaVOUQWIv1l2e2lraEPIixKK-lo,526
|
|
698
698
|
datahub/metadata/schemas/DataHubPersonaInfo.avsc,sha256=OUvbTgPQsBtzkDDb9pxHXpQ6A7dkL77ZnCXZ-MLEG14,227
|
|
699
699
|
datahub/metadata/schemas/DataHubPersonaKey.avsc,sha256=ddj-DhXa0_YMdLaGkKLLSklfIeDRvSwPXu8o__YEXUE,448
|
|
@@ -815,7 +815,7 @@ datahub/metadata/schemas/MLModelProperties.avsc,sha256=hDCBHxGe-cmCBeU1k0ANuQlKj
|
|
|
815
815
|
datahub/metadata/schemas/MLPrimaryKeyKey.avsc,sha256=mX4CQcoN3FC_VQDBCkhlmJk4pfQKDrSeuqqCTTXTmq8,1092
|
|
816
816
|
datahub/metadata/schemas/MLPrimaryKeyProperties.avsc,sha256=URIuOpS93RVk8MZVcbZ-dmTwu_cN3KSOKxSR8fm-eTo,6744
|
|
817
817
|
datahub/metadata/schemas/MLTrainingRunProperties.avsc,sha256=WGgj0MuQrGD4UgvyHCJHzTnHja2LlJTOr1gLu8SySj0,4269
|
|
818
|
-
datahub/metadata/schemas/MetadataChangeEvent.avsc,sha256
|
|
818
|
+
datahub/metadata/schemas/MetadataChangeEvent.avsc,sha256=-U5dR6xh79FS6Q6Xji3ExCvWUdJqUMh5_eiIVDr8jl0,374076
|
|
819
819
|
datahub/metadata/schemas/MetadataChangeLog.avsc,sha256=mpdodpx25E6M1Gq_7slEcPAm-1Es5xPsoqV60HgO7zg,12167
|
|
820
820
|
datahub/metadata/schemas/MetadataChangeProposal.avsc,sha256=EMfQrYsuHf1p6UvBjoLtfdTHGe-vGNJaCFEHz8hdKU0,9698
|
|
821
821
|
datahub/metadata/schemas/Metrics.avsc,sha256=O7DJGjOwmHbb1x_Zj7AuM_HaHKjBvkfJKfUsX8icXD4,690
|
|
@@ -872,13 +872,14 @@ datahub/metadata/schemas/ViewProperties.avsc,sha256=3HhcbH5493dJUnEUtFMYMVfbYQ52
|
|
|
872
872
|
datahub/metadata/schemas/__init__.py,sha256=kCcak_fBn_KyuysZTJIoipAzZ8EO44Amk4DWSEvplEY,581
|
|
873
873
|
datahub/sdk/__init__.py,sha256=fYD-f338EW5WPFW2NSiirMAsHkNgZfolIvneM7yxgBk,977
|
|
874
874
|
datahub/sdk/_all_entities.py,sha256=0XFtmgeEtrWOXy_oWcwqrtvfvzo8obPIq3Z1fEr5-34,400
|
|
875
|
-
datahub/sdk/_attribution.py,sha256=
|
|
876
|
-
datahub/sdk/_entity.py,sha256=
|
|
877
|
-
datahub/sdk/_shared.py,sha256=
|
|
878
|
-
datahub/sdk/
|
|
879
|
-
datahub/sdk/
|
|
875
|
+
datahub/sdk/_attribution.py,sha256=0Trh8steVd27GOr9MKCZeawbuDD2_q3GIsZlCtHqEUg,1321
|
|
876
|
+
datahub/sdk/_entity.py,sha256=zYLnJoht3Mhr6dJGhPG9sh4-GF-Q6Cky6TqufDkg55E,3253
|
|
877
|
+
datahub/sdk/_shared.py,sha256=tvXMH-3Q-QHIEyaWUyBPzyCXz6ZQdKo89h0LanPAKNI,16826
|
|
878
|
+
datahub/sdk/_utils.py,sha256=aGE665Su8SGtj2CRDiTaXNYrJ8ADBsS0m4DmaXw79b8,1027
|
|
879
|
+
datahub/sdk/container.py,sha256=NyiMyWpIMet6Y7FJ8jPxJWs3kcN4m6LEq2mDnEWxuT4,7247
|
|
880
|
+
datahub/sdk/dataset.py,sha256=zDVzgq4vsH3q8B6TY5TyTyFAqNDmIHHtRnT4oX-KE0w,24844
|
|
880
881
|
datahub/sdk/entity_client.py,sha256=DcHytfCM8X9J6mm_QXzFR-2vDQa88I9Q2ktSNC2oSUI,4277
|
|
881
|
-
datahub/sdk/main_client.py,sha256=
|
|
882
|
+
datahub/sdk/main_client.py,sha256=CDkwbnjVuLRq6XXoP2og6ZpP26sSU-dPICrtLRt16LU,2628
|
|
882
883
|
datahub/sdk/resolver_client.py,sha256=UxI0bwg0ITm9dWPmAAKxkTvpLiGACtSCuEDNxLJipjs,3395
|
|
883
884
|
datahub/secret/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
884
885
|
datahub/secret/datahub_secret_store.py,sha256=9u9S87-15jwhj4h0EsAVIMdQLgvstKc8voQux2slxgU,2477
|
|
@@ -890,7 +891,7 @@ datahub/specific/chart.py,sha256=NPdzDHcZkPodthOn9c8QF_aDEo2y4lCJ4t1sI556uZ0,668
|
|
|
890
891
|
datahub/specific/dashboard.py,sha256=3AsXZ1Cp03uaTHsOmJqEiXzJjZUBgDbX-zmgwMw908o,11514
|
|
891
892
|
datahub/specific/datajob.py,sha256=yZXic3CuCGxg-ewnaHphoH9Jjpe-P09XbN7T-rrvkyE,13711
|
|
892
893
|
datahub/specific/dataproduct.py,sha256=SrBNYACDVatuXA64GCHA0Igaes24ajJqTcXmDTT1FPA,2877
|
|
893
|
-
datahub/specific/dataset.py,sha256=
|
|
894
|
+
datahub/specific/dataset.py,sha256=EhSjarFfvxF-JbVuNBNIcV1pEebqAdcYHDZIhnG_8sk,9714
|
|
894
895
|
datahub/specific/form.py,sha256=hbxmmBWHma0d4NCZEGR6Nr6R-5A5gYgl1mmkGgnM97o,3834
|
|
895
896
|
datahub/specific/structured_property.py,sha256=NZ2yppDgtqrH04Wn3_m5IupyLeWoBCMygyr3nEi1A3o,4038
|
|
896
897
|
datahub/specific/aspect_helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -905,7 +906,7 @@ datahub/sql_parsing/_sqlglot_patch.py,sha256=iYJ8zOThHqqbamD5jdNr9iHTWD7ewNeHzPi
|
|
|
905
906
|
datahub/sql_parsing/datajob.py,sha256=1X8KpEk-y3_8xJuA_Po27EHZgOcxK9QADI6Om9gSGn0,1751
|
|
906
907
|
datahub/sql_parsing/query_types.py,sha256=FKjDzszZzsrCfYfm7dgD6T_8865qxWl767fdGyHWBh4,2720
|
|
907
908
|
datahub/sql_parsing/schema_resolver.py,sha256=ISuingLcQnOJZkNXBkc73uPwYUbbOtERAjgGhJajDiQ,10782
|
|
908
|
-
datahub/sql_parsing/split_statements.py,sha256=
|
|
909
|
+
datahub/sql_parsing/split_statements.py,sha256=6KUoIPG7H8Rja3lrPjSrSfhFfwW4oqgfoNQeTbbOWNg,8953
|
|
909
910
|
datahub/sql_parsing/sql_parsing_aggregator.py,sha256=XNZWjeaRhzaT92mzsJZGJfYaxJENsyp5dSHTmL81RIc,70130
|
|
910
911
|
datahub/sql_parsing/sql_parsing_common.py,sha256=h_V_m54hJ9EUh5kczq7cYOIeNeo4bgf0Px0H-Nq-UIg,2602
|
|
911
912
|
datahub/sql_parsing/sql_parsing_result_utils.py,sha256=prwWTj1EB2fRPv1eMB4EkpFNafIYAt-X8TIK0NWqank,796
|
|
@@ -938,7 +939,7 @@ datahub/utilities/dedup_list.py,sha256=dUSpe1AajfuwlHVJKNv-CzDXSCkaw0HgSMOsxqUkQ
|
|
|
938
939
|
datahub/utilities/delayed_iter.py,sha256=XlsI0DCXkVVejFKOW_uMT0E8DTqqOHQN3Ooak4EcULE,645
|
|
939
940
|
datahub/utilities/delta.py,sha256=hkpF8W7Lvg2gUJBQR3mmIzOxsRQ6i5cchRPFlAVoV10,1128
|
|
940
941
|
datahub/utilities/docs_build.py,sha256=uFMK3z1d4BExpsrvguHunidbEDAzQ8hoOP7iQ0A_IVw,211
|
|
941
|
-
datahub/utilities/file_backed_collections.py,sha256=
|
|
942
|
+
datahub/utilities/file_backed_collections.py,sha256=zW-Xy6zO1E6R-FRJKjlimMKr-emWrgcXvFFY6QQ23aA,21715
|
|
942
943
|
datahub/utilities/global_warning_util.py,sha256=adrEl3WhetQ-bymrPINjd976ZFndhbvk3QosUYGsos8,261
|
|
943
944
|
datahub/utilities/groupby.py,sha256=pe6rP4ZCttYB98yjbs0Aey8C32aLb7rq-NJ_BFky0H4,524
|
|
944
945
|
datahub/utilities/hive_schema_to_avro.py,sha256=1MP0a6FFVEYxLg_4lKF7hPxbHJJy0uRQYkML5zRwV3Q,11622
|
|
@@ -958,6 +959,7 @@ datahub/utilities/progress_timer.py,sha256=w0b3wIEGL8fQy2BKYVPiKDcO5ATUlt1kQr74a
|
|
|
958
959
|
datahub/utilities/ratelimiter.py,sha256=6a058tTzydqK9gf0C2RR2iZZmh-mm4o3W0MMlDNfIoo,2201
|
|
959
960
|
datahub/utilities/sample_data.py,sha256=DMwN3yAqJPrF_rZF6CU0_iLSNBFfsAQ-no0KQhgwklg,766
|
|
960
961
|
datahub/utilities/search_utils.py,sha256=BdZfixXrB6lcD_ec6pCotGtVKQ4ekTtmaZDFGyeZfBg,9683
|
|
962
|
+
datahub/utilities/sentinels.py,sha256=p299J2YR62K2Mbuy-2lrGH4BnF9dkRqGo59Wub0hYV4,417
|
|
961
963
|
datahub/utilities/serialized_lru_cache.py,sha256=R9enyN5BlatknX4KFq9euYc2yiE1rRMucPbAHg09Goc,3247
|
|
962
964
|
datahub/utilities/server_config_util.py,sha256=ibCl4FedJWa9oc7_ZWS3vrHRNlsmKLB-eCDVCbUs25A,698
|
|
963
965
|
datahub/utilities/sql_formatter.py,sha256=tYXIsKjKmpKh0JXGxeAPrHkUWYd1SwJNLjUZsfQP2h0,1016
|
|
@@ -1014,9 +1016,9 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
1014
1016
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
1015
1017
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
1016
1018
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
1017
|
-
acryl_datahub-1.0.
|
|
1018
|
-
acryl_datahub-1.0.
|
|
1019
|
-
acryl_datahub-1.0.
|
|
1020
|
-
acryl_datahub-1.0.
|
|
1021
|
-
acryl_datahub-1.0.
|
|
1022
|
-
acryl_datahub-1.0.
|
|
1019
|
+
acryl_datahub-1.0.0rc5.dist-info/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
|
|
1020
|
+
acryl_datahub-1.0.0rc5.dist-info/METADATA,sha256=Tcm-KvY8FCzFEI8qsrAwbVwvBpqhzT4dQMxOCw_hAnk,175366
|
|
1021
|
+
acryl_datahub-1.0.0rc5.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
1022
|
+
acryl_datahub-1.0.0rc5.dist-info/entry_points.txt,sha256=U1e5ZwqPX1OaIbvGrwvozcdB8SbzFYXQM7plpdLKKeo,9592
|
|
1023
|
+
acryl_datahub-1.0.0rc5.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
1024
|
+
acryl_datahub-1.0.0rc5.dist-info/RECORD,,
|
datahub/_version.py
CHANGED
datahub/emitter/mcp_builder.py
CHANGED
|
@@ -36,7 +36,7 @@ from datahub.metadata.schema_classes import (
|
|
|
36
36
|
SubTypesClass,
|
|
37
37
|
TagAssociationClass,
|
|
38
38
|
)
|
|
39
|
-
from datahub.metadata.urns import StructuredPropertyUrn
|
|
39
|
+
from datahub.metadata.urns import ContainerUrn, StructuredPropertyUrn
|
|
40
40
|
|
|
41
41
|
# In https://github.com/datahub-project/datahub/pull/11214, we added a
|
|
42
42
|
# new env field to container properties. However, populating this field
|
|
@@ -87,6 +87,9 @@ class ContainerKey(DatahubKey):
|
|
|
87
87
|
def property_dict(self) -> Dict[str, str]:
|
|
88
88
|
return self.dict(by_alias=True, exclude_none=True)
|
|
89
89
|
|
|
90
|
+
def as_urn_typed(self) -> ContainerUrn:
|
|
91
|
+
return ContainerUrn.from_string(self.as_urn())
|
|
92
|
+
|
|
90
93
|
def as_urn(self) -> str:
|
|
91
94
|
return make_container_urn(guid=self.guid())
|
|
92
95
|
|
|
@@ -292,6 +292,11 @@ class BigQuerySchemaApi:
|
|
|
292
292
|
if hasattr(d, "_properties") and isinstance(d._properties, dict)
|
|
293
293
|
else None
|
|
294
294
|
),
|
|
295
|
+
# TODO: Fetch dataset description individually impacts overall performance if the number of datasets is high (hundreds); instead we should fetch in batch for all datasets.
|
|
296
|
+
# TODO: Given we are calling get_dataset for each dataset, we may consume and publish other fields too, such as created, modified, etc...
|
|
297
|
+
# https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.client.Client#google_cloud_bigquery_client_Client_get_dataset
|
|
298
|
+
# https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.dataset.Dataset
|
|
299
|
+
comment=self.bq_client.get_dataset(d.reference).description,
|
|
295
300
|
)
|
|
296
301
|
for d in datasets
|
|
297
302
|
]
|
|
@@ -296,6 +296,7 @@ class BigQuerySchemaGenerator:
|
|
|
296
296
|
self,
|
|
297
297
|
dataset: str,
|
|
298
298
|
project_id: str,
|
|
299
|
+
description: Optional[str] = None,
|
|
299
300
|
tags: Optional[Dict[str, str]] = None,
|
|
300
301
|
extra_properties: Optional[Dict[str, str]] = None,
|
|
301
302
|
) -> Iterable[MetadataWorkUnit]:
|
|
@@ -336,6 +337,7 @@ class BigQuerySchemaGenerator:
|
|
|
336
337
|
domain_config=self.config.domain,
|
|
337
338
|
schema_container_key=schema_container_key,
|
|
338
339
|
database_container_key=database_container_key,
|
|
340
|
+
description=description,
|
|
339
341
|
external_url=(
|
|
340
342
|
BQ_EXTERNAL_DATASET_URL_TEMPLATE.format(
|
|
341
343
|
project=project_id, dataset=dataset
|
|
@@ -471,14 +473,15 @@ class BigQuerySchemaGenerator:
|
|
|
471
473
|
|
|
472
474
|
if self.config.include_schema_metadata:
|
|
473
475
|
yield from self.gen_dataset_containers(
|
|
474
|
-
dataset_name,
|
|
475
|
-
project_id,
|
|
476
|
-
bigquery_dataset.labels,
|
|
477
|
-
(
|
|
476
|
+
dataset=dataset_name,
|
|
477
|
+
project_id=project_id,
|
|
478
|
+
tags=bigquery_dataset.labels,
|
|
479
|
+
extra_properties=(
|
|
478
480
|
{"location": bigquery_dataset.location}
|
|
479
481
|
if bigquery_dataset.location
|
|
480
482
|
else None
|
|
481
483
|
),
|
|
484
|
+
description=bigquery_dataset.comment,
|
|
482
485
|
)
|
|
483
486
|
|
|
484
487
|
columns = None
|
|
@@ -12,7 +12,11 @@ from datahub.metadata.com.linkedin.pegasus2avro.schema import (
|
|
|
12
12
|
SchemaField,
|
|
13
13
|
SchemaMetadata,
|
|
14
14
|
)
|
|
15
|
-
from datahub.metadata.schema_classes import
|
|
15
|
+
from datahub.metadata.schema_classes import (
|
|
16
|
+
RecordTypeClass,
|
|
17
|
+
SchemaFieldDataTypeClass,
|
|
18
|
+
StringTypeClass,
|
|
19
|
+
)
|
|
16
20
|
|
|
17
21
|
logger = logging.getLogger(__name__)
|
|
18
22
|
|
|
@@ -20,9 +24,12 @@ logger = logging.getLogger(__name__)
|
|
|
20
24
|
def flatten(d: dict, prefix: str = "") -> Generator:
|
|
21
25
|
for k, v in d.items():
|
|
22
26
|
if isinstance(v, dict):
|
|
27
|
+
# First yield the parent field
|
|
28
|
+
yield f"{prefix}.{k}".strip(".")
|
|
29
|
+
# Then yield all nested fields
|
|
23
30
|
yield from flatten(v, f"{prefix}.{k}")
|
|
24
31
|
else:
|
|
25
|
-
yield f"{prefix}
|
|
32
|
+
yield f"{prefix}.{k}".strip(".") # Use dot instead of hyphen
|
|
26
33
|
|
|
27
34
|
|
|
28
35
|
def flatten2list(d: dict) -> list:
|
|
@@ -34,7 +41,7 @@ def flatten2list(d: dict) -> list:
|
|
|
34
41
|
"anotherone": {"third_a": {"last": 3}}
|
|
35
42
|
}
|
|
36
43
|
|
|
37
|
-
|
|
44
|
+
yields:
|
|
38
45
|
|
|
39
46
|
["first.second_a",
|
|
40
47
|
"first.second_b",
|
|
@@ -43,7 +50,7 @@ def flatten2list(d: dict) -> list:
|
|
|
43
50
|
]
|
|
44
51
|
"""
|
|
45
52
|
fl_l = list(flatten(d))
|
|
46
|
-
return
|
|
53
|
+
return fl_l
|
|
47
54
|
|
|
48
55
|
|
|
49
56
|
def request_call(
|
|
@@ -322,6 +329,8 @@ def extract_fields(
|
|
|
322
329
|
return ["contains_a_string"], {"contains_a_string": dict_data[0]}
|
|
323
330
|
else:
|
|
324
331
|
raise ValueError("unknown format")
|
|
332
|
+
elif not dict_data: # Handle empty dict case
|
|
333
|
+
return [], {}
|
|
325
334
|
if len(dict_data) > 1:
|
|
326
335
|
# the elements are directly inside the dict
|
|
327
336
|
return flatten2list(dict_data), dict_data
|
|
@@ -384,16 +393,39 @@ def set_metadata(
|
|
|
384
393
|
dataset_name: str, fields: List, platform: str = "api"
|
|
385
394
|
) -> SchemaMetadata:
|
|
386
395
|
canonical_schema: List[SchemaField] = []
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
396
|
+
seen_paths = set()
|
|
397
|
+
|
|
398
|
+
# Process all flattened fields
|
|
399
|
+
for field_path in fields:
|
|
400
|
+
parts = field_path.split(".")
|
|
401
|
+
|
|
402
|
+
# Add struct/object fields for each ancestor path
|
|
403
|
+
current_path: List[str] = []
|
|
404
|
+
for part in parts[:-1]:
|
|
405
|
+
ancestor_path = ".".join(current_path + [part])
|
|
406
|
+
if ancestor_path not in seen_paths:
|
|
407
|
+
struct_field = SchemaField(
|
|
408
|
+
fieldPath=ancestor_path,
|
|
409
|
+
nativeDataType="object", # OpenAPI term for struct/record
|
|
410
|
+
type=SchemaFieldDataTypeClass(type=RecordTypeClass()),
|
|
411
|
+
description="",
|
|
412
|
+
recursive=False,
|
|
413
|
+
)
|
|
414
|
+
canonical_schema.append(struct_field)
|
|
415
|
+
seen_paths.add(ancestor_path)
|
|
416
|
+
current_path.append(part)
|
|
417
|
+
|
|
418
|
+
# Add the leaf field if not already seen
|
|
419
|
+
if field_path not in seen_paths:
|
|
420
|
+
leaf_field = SchemaField(
|
|
421
|
+
fieldPath=field_path,
|
|
422
|
+
nativeDataType="str", # Keeping `str` for backwards compatability, ideally this is the correct type
|
|
423
|
+
type=SchemaFieldDataTypeClass(type=StringTypeClass()),
|
|
424
|
+
description="",
|
|
425
|
+
recursive=False,
|
|
426
|
+
)
|
|
427
|
+
canonical_schema.append(leaf_field)
|
|
428
|
+
seen_paths.add(field_path)
|
|
397
429
|
|
|
398
430
|
schema_metadata = SchemaMetadata(
|
|
399
431
|
schemaName=dataset_name,
|
|
@@ -13524,6 +13524,7 @@ class CorpUserInfoClass(_Aspect):
|
|
|
13524
13524
|
lastName: Union[None, str]=None,
|
|
13525
13525
|
fullName: Union[None, str]=None,
|
|
13526
13526
|
countryCode: Union[None, str]=None,
|
|
13527
|
+
system: Optional[Union[bool, None]]=None,
|
|
13527
13528
|
):
|
|
13528
13529
|
super().__init__()
|
|
13529
13530
|
|
|
@@ -13543,6 +13544,11 @@ class CorpUserInfoClass(_Aspect):
|
|
|
13543
13544
|
self.lastName = lastName
|
|
13544
13545
|
self.fullName = fullName
|
|
13545
13546
|
self.countryCode = countryCode
|
|
13547
|
+
if system is None:
|
|
13548
|
+
# default: False
|
|
13549
|
+
self.system = self.RECORD_SCHEMA.fields_dict["system"].default
|
|
13550
|
+
else:
|
|
13551
|
+
self.system = system
|
|
13546
13552
|
|
|
13547
13553
|
def _restore_defaults(self) -> None:
|
|
13548
13554
|
self.customProperties = dict()
|
|
@@ -13557,6 +13563,7 @@ class CorpUserInfoClass(_Aspect):
|
|
|
13557
13563
|
self.lastName = self.RECORD_SCHEMA.fields_dict["lastName"].default
|
|
13558
13564
|
self.fullName = self.RECORD_SCHEMA.fields_dict["fullName"].default
|
|
13559
13565
|
self.countryCode = self.RECORD_SCHEMA.fields_dict["countryCode"].default
|
|
13566
|
+
self.system = self.RECORD_SCHEMA.fields_dict["system"].default
|
|
13560
13567
|
|
|
13561
13568
|
|
|
13562
13569
|
@property
|
|
@@ -13679,6 +13686,16 @@ class CorpUserInfoClass(_Aspect):
|
|
|
13679
13686
|
self._inner_dict['countryCode'] = value
|
|
13680
13687
|
|
|
13681
13688
|
|
|
13689
|
+
@property
|
|
13690
|
+
def system(self) -> Union[bool, None]:
|
|
13691
|
+
"""Whether the corpUser is a system user."""
|
|
13692
|
+
return self._inner_dict.get('system') # type: ignore
|
|
13693
|
+
|
|
13694
|
+
@system.setter
|
|
13695
|
+
def system(self, value: Union[bool, None]) -> None:
|
|
13696
|
+
self._inner_dict['system'] = value
|
|
13697
|
+
|
|
13698
|
+
|
|
13682
13699
|
class CorpUserSettingsClass(_Aspect):
|
|
13683
13700
|
"""Settings that a user can customize through the datahub ui"""
|
|
13684
13701
|
|
datahub/metadata/schema.avsc
CHANGED
|
@@ -1154,6 +1154,19 @@
|
|
|
1154
1154
|
"name": "countryCode",
|
|
1155
1155
|
"default": null,
|
|
1156
1156
|
"doc": "two uppercase letters country code. e.g. US"
|
|
1157
|
+
},
|
|
1158
|
+
{
|
|
1159
|
+
"Searchable": {
|
|
1160
|
+
"fieldType": "BOOLEAN",
|
|
1161
|
+
"queryByDefault": false
|
|
1162
|
+
},
|
|
1163
|
+
"type": [
|
|
1164
|
+
"boolean",
|
|
1165
|
+
"null"
|
|
1166
|
+
],
|
|
1167
|
+
"name": "system",
|
|
1168
|
+
"default": false,
|
|
1169
|
+
"doc": "Whether the corpUser is a system user."
|
|
1157
1170
|
}
|
|
1158
1171
|
],
|
|
1159
1172
|
"doc": "Linkedin corp user information"
|
|
@@ -17718,7 +17731,8 @@
|
|
|
17718
17731
|
},
|
|
17719
17732
|
{
|
|
17720
17733
|
"Searchable": {
|
|
17721
|
-
"fieldType": "
|
|
17734
|
+
"fieldType": "KEYWORD",
|
|
17735
|
+
"queryByDefault": false
|
|
17722
17736
|
},
|
|
17723
17737
|
"type": "string",
|
|
17724
17738
|
"name": "type",
|
|
@@ -17785,7 +17799,9 @@
|
|
|
17785
17799
|
},
|
|
17786
17800
|
{
|
|
17787
17801
|
"Searchable": {
|
|
17788
|
-
"fieldName": "sourceExecutorId"
|
|
17802
|
+
"fieldName": "sourceExecutorId",
|
|
17803
|
+
"fieldType": "KEYWORD",
|
|
17804
|
+
"queryByDefault": false
|
|
17789
17805
|
},
|
|
17790
17806
|
"type": [
|
|
17791
17807
|
"null",
|
|
@@ -17831,7 +17847,9 @@
|
|
|
17831
17847
|
"fields": [
|
|
17832
17848
|
{
|
|
17833
17849
|
"Searchable": {
|
|
17834
|
-
"fieldName": "sourceType"
|
|
17850
|
+
"fieldName": "sourceType",
|
|
17851
|
+
"fieldType": "KEYWORD",
|
|
17852
|
+
"queryByDefault": false
|
|
17835
17853
|
},
|
|
17836
17854
|
"type": {
|
|
17837
17855
|
"type": "enum",
|
|
@@ -165,6 +165,19 @@
|
|
|
165
165
|
"name": "countryCode",
|
|
166
166
|
"default": null,
|
|
167
167
|
"doc": "two uppercase letters country code. e.g. US"
|
|
168
|
+
},
|
|
169
|
+
{
|
|
170
|
+
"Searchable": {
|
|
171
|
+
"fieldType": "BOOLEAN",
|
|
172
|
+
"queryByDefault": false
|
|
173
|
+
},
|
|
174
|
+
"type": [
|
|
175
|
+
"boolean",
|
|
176
|
+
"null"
|
|
177
|
+
],
|
|
178
|
+
"name": "system",
|
|
179
|
+
"default": false,
|
|
180
|
+
"doc": "Whether the corpUser is a system user."
|
|
168
181
|
}
|
|
169
182
|
],
|
|
170
183
|
"doc": "Linkedin corp user information"
|
|
@@ -16,7 +16,8 @@
|
|
|
16
16
|
},
|
|
17
17
|
{
|
|
18
18
|
"Searchable": {
|
|
19
|
-
"fieldType": "
|
|
19
|
+
"fieldType": "KEYWORD",
|
|
20
|
+
"queryByDefault": false
|
|
20
21
|
},
|
|
21
22
|
"type": "string",
|
|
22
23
|
"name": "type",
|
|
@@ -83,7 +84,9 @@
|
|
|
83
84
|
},
|
|
84
85
|
{
|
|
85
86
|
"Searchable": {
|
|
86
|
-
"fieldName": "sourceExecutorId"
|
|
87
|
+
"fieldName": "sourceExecutorId",
|
|
88
|
+
"fieldType": "KEYWORD",
|
|
89
|
+
"queryByDefault": false
|
|
87
90
|
},
|
|
88
91
|
"type": [
|
|
89
92
|
"null",
|
|
@@ -129,7 +132,9 @@
|
|
|
129
132
|
"fields": [
|
|
130
133
|
{
|
|
131
134
|
"Searchable": {
|
|
132
|
-
"fieldName": "sourceType"
|
|
135
|
+
"fieldName": "sourceType",
|
|
136
|
+
"fieldType": "KEYWORD",
|
|
137
|
+
"queryByDefault": false
|
|
133
138
|
},
|
|
134
139
|
"type": {
|
|
135
140
|
"type": "enum",
|
|
@@ -1619,6 +1619,19 @@
|
|
|
1619
1619
|
"name": "countryCode",
|
|
1620
1620
|
"default": null,
|
|
1621
1621
|
"doc": "two uppercase letters country code. e.g. US"
|
|
1622
|
+
},
|
|
1623
|
+
{
|
|
1624
|
+
"Searchable": {
|
|
1625
|
+
"fieldType": "BOOLEAN",
|
|
1626
|
+
"queryByDefault": false
|
|
1627
|
+
},
|
|
1628
|
+
"type": [
|
|
1629
|
+
"boolean",
|
|
1630
|
+
"null"
|
|
1631
|
+
],
|
|
1632
|
+
"name": "system",
|
|
1633
|
+
"default": false,
|
|
1634
|
+
"doc": "Whether the corpUser is a system user."
|
|
1622
1635
|
}
|
|
1623
1636
|
],
|
|
1624
1637
|
"doc": "Linkedin corp user information"
|
datahub/sdk/_attribution.py
CHANGED
|
@@ -5,6 +5,10 @@ from typing import Iterator
|
|
|
5
5
|
|
|
6
6
|
from datahub.utilities.str_enum import StrEnum
|
|
7
7
|
|
|
8
|
+
# TODO: This attribution setup is not the final form. I expect that once we have better
|
|
9
|
+
# backend support for attribution and attribution-oriented patch, this will become a bit
|
|
10
|
+
# more sophisticated.
|
|
11
|
+
|
|
8
12
|
|
|
9
13
|
class KnownAttribution(StrEnum):
|
|
10
14
|
INGESTION = "INGESTION"
|
datahub/sdk/_entity.py
CHANGED
|
@@ -36,6 +36,8 @@ class Entity:
|
|
|
36
36
|
|
|
37
37
|
def _init_from_graph(self, current_aspects: models.AspectBag) -> Self:
|
|
38
38
|
self._prev_aspects = current_aspects
|
|
39
|
+
|
|
40
|
+
self._aspects = {}
|
|
39
41
|
aspect: models._Aspect
|
|
40
42
|
for aspect_name, aspect in (current_aspects or {}).items(): # type: ignore
|
|
41
43
|
aspect_copy = type(aspect).from_obj(aspect.to_obj())
|