acryl-datahub 1.0.0rc3__py3-none-any.whl → 1.0.0rc5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0rc3.dist-info → acryl_datahub-1.0.0rc5.dist-info}/METADATA +2377 -2377
- {acryl_datahub-1.0.0rc3.dist-info → acryl_datahub-1.0.0rc5.dist-info}/RECORD +30 -27
- datahub/_version.py +1 -1
- datahub/cli/ingest_cli.py +27 -92
- datahub/emitter/mcp_builder.py +4 -1
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +5 -0
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +7 -4
- datahub/ingestion/source/openapi_parser.py +46 -14
- datahub/ingestion/source/unity/source.py +11 -1
- datahub/metadata/_schema_classes.py +17 -0
- datahub/metadata/schema.avsc +21 -3
- datahub/metadata/schemas/CorpUserInfo.avsc +13 -0
- datahub/metadata/schemas/DataHubIngestionSourceInfo.avsc +8 -3
- datahub/metadata/schemas/MetadataChangeEvent.avsc +13 -0
- datahub/sdk/_attribution.py +4 -0
- datahub/sdk/_entity.py +2 -0
- datahub/sdk/_shared.py +163 -13
- datahub/sdk/_utils.py +35 -0
- datahub/sdk/container.py +20 -4
- datahub/sdk/dataset.py +104 -14
- datahub/sdk/main_client.py +17 -0
- datahub/specific/dataset.py +3 -4
- datahub/sql_parsing/split_statements.py +20 -13
- datahub/utilities/file_backed_collections.py +3 -14
- datahub/utilities/ingest_utils.py +106 -0
- datahub/utilities/sentinels.py +22 -0
- {acryl_datahub-1.0.0rc3.dist-info → acryl_datahub-1.0.0rc5.dist-info}/LICENSE +0 -0
- {acryl_datahub-1.0.0rc3.dist-info → acryl_datahub-1.0.0rc5.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.0.0rc3.dist-info → acryl_datahub-1.0.0rc5.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.0.0rc3.dist-info → acryl_datahub-1.0.0rc5.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
|
|
2
2
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
3
|
-
datahub/_version.py,sha256=
|
|
3
|
+
datahub/_version.py,sha256=XDZKgHc2jpOiHirNgM_KDhk5UIQ7ZfxWKcRnl8Las0M,321
|
|
4
4
|
datahub/entrypoints.py,sha256=2TYgHhs3sCxJlojIHjqfxzt3_ImPwPzq4vBtsUuMqu4,8885
|
|
5
5
|
datahub/errors.py,sha256=w6h8b27j9XlmPbTwqpu7-wgiTrXlHzcnUOnJ_iOrwzo,520
|
|
6
6
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -71,7 +71,7 @@ datahub/cli/env_utils.py,sha256=RQzjg4JE29hjPt4v7p-RuqoOr99w8E3DBHWiN2Sm7T4,252
|
|
|
71
71
|
datahub/cli/exists_cli.py,sha256=IsuU86R-g7BJjAl1vULH6d-BWJHAKa4XHLZl5WxGUEM,1233
|
|
72
72
|
datahub/cli/get_cli.py,sha256=VV80BCXfZ0-C8fr2k43SIuN9DB-fOYP9StWsTHnXwFw,2327
|
|
73
73
|
datahub/cli/iceberg_cli.py,sha256=Jp3si_xZkOYr1uKA3h9_GlLJbiZPtVN_SpMgLa8OgoE,22984
|
|
74
|
-
datahub/cli/ingest_cli.py,sha256=
|
|
74
|
+
datahub/cli/ingest_cli.py,sha256=_DznLADNNPe4sm_pFPC1OLT6a5qGRVXNOPTkk721uKE,20453
|
|
75
75
|
datahub/cli/json_file.py,sha256=nWo-VVthaaW4Do1eUqgrzk0fShb29MjiKXvZVOTq76c,943
|
|
76
76
|
datahub/cli/lite_cli.py,sha256=lolCnWWMMYojRMebbYTpHWBmOBQF_729RpW4A_y_xF4,13034
|
|
77
77
|
datahub/cli/migrate.py,sha256=3orGfLNsdh1Q7gkPaCaf2bBWM5b3Ih4fGFw3poe0wiA,17937
|
|
@@ -121,7 +121,7 @@ datahub/emitter/generic_emitter.py,sha256=i37ZFm9VR_tmiZm9kIypEkQEB_cLKbzj_tJvVi
|
|
|
121
121
|
datahub/emitter/kafka_emitter.py,sha256=Uix1W1WaXF8VqUTUfzdRZKca2XrR1w50Anx2LVkROlc,5822
|
|
122
122
|
datahub/emitter/mce_builder.py,sha256=9wjXG1WmWZUN7-_JdRJ5OcH8IPG0b3TGzxry4yscOR0,16545
|
|
123
123
|
datahub/emitter/mcp.py,sha256=hAAYziDdkwjazQU0DtWMbQWY8wS09ACrKJbqxoWXdgc,9637
|
|
124
|
-
datahub/emitter/mcp_builder.py,sha256=
|
|
124
|
+
datahub/emitter/mcp_builder.py,sha256=Q1bX2BthNvZ7ae71XYF6ICoiN8IOqaAd_h3zOct57Q0,11752
|
|
125
125
|
datahub/emitter/mcp_patch_builder.py,sha256=u7cpW6DkiN7KpLapmMaXgL_FneoN69boxiANbVgMdSI,4564
|
|
126
126
|
datahub/emitter/request_helper.py,sha256=33ORG3S3OVy97_jlWBRn7yUM5XCIkRN6WSdJvN7Ofcg,670
|
|
127
127
|
datahub/emitter/rest_emitter.py,sha256=zPQNTtZsY75gh7MabexNag-M4nATcumka_An0nNI3j0,17889
|
|
@@ -207,7 +207,7 @@ datahub/ingestion/source/mode.py,sha256=IBWpG0w47y1ivyo5oID0lmvwvLQvbNYL73x5EYHb
|
|
|
207
207
|
datahub/ingestion/source/mongodb.py,sha256=2C2Cxn8DXL53IbNiywIuKt8UT_EMcPg9f8su-OPSNGU,21237
|
|
208
208
|
datahub/ingestion/source/nifi.py,sha256=dK__nKtxt7dx9JlrL3abJr_D7oZY4LbCXdMCCdYcCKg,56061
|
|
209
209
|
datahub/ingestion/source/openapi.py,sha256=39ep3etbWh8NBPjTXXwH3mieC5P6bMVAjhvK7UvcTis,17372
|
|
210
|
-
datahub/ingestion/source/openapi_parser.py,sha256=
|
|
210
|
+
datahub/ingestion/source/openapi_parser.py,sha256=YoVVATtuisvFo9qAVXq4ggNbaESHD01N5Ajp_OE-RD8,14882
|
|
211
211
|
datahub/ingestion/source/preset.py,sha256=fByqamRLnXxsfCGdLPzWN_5LJR_s2_G2f_zwSKUc8EA,3981
|
|
212
212
|
datahub/ingestion/source/pulsar.py,sha256=7rTOEqYmeOuRZl5DG8d5OFkb4l9H6-1bETZfa-4DfmI,20163
|
|
213
213
|
datahub/ingestion/source/redash.py,sha256=U0AfnYpZlAPN0peiu7pOpB8MQZ4JOO0yKEYChucMrlY,29915
|
|
@@ -250,8 +250,8 @@ datahub/ingestion/source/bigquery_v2/bigquery_helper.py,sha256=QER3gY8e_k1_eNVj7
|
|
|
250
250
|
datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py,sha256=9_sfX8BE2vt9RjBMyq27UxCxBaSlD5o3L4gQxrwlPvA,4961
|
|
251
251
|
datahub/ingestion/source/bigquery_v2/bigquery_queries.py,sha256=EoHo9twb0_QdX7Nvd1HJC1Yn0rqtrfR52EVk7Hu3XOQ,3296
|
|
252
252
|
datahub/ingestion/source/bigquery_v2/bigquery_report.py,sha256=v7_zkZzymKPmZKWAxnxmvmHC-8TQVGHUT-pBQFNehqc,7962
|
|
253
|
-
datahub/ingestion/source/bigquery_v2/bigquery_schema.py,sha256=
|
|
254
|
-
datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py,sha256=
|
|
253
|
+
datahub/ingestion/source/bigquery_v2/bigquery_schema.py,sha256=gxu-AvNRpGdTjGeY3L-_92DS1FvoDBb5eqWwDEOj4qk,32532
|
|
254
|
+
datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py,sha256=Fq9tAVSvYepwweiZuJB5mbT0Y6EzFOEZWtdL4Zafd4E,50999
|
|
255
255
|
datahub/ingestion/source/bigquery_v2/bigquery_test_connection.py,sha256=cATxwi5IPzj3BldRRAVcLqzSFmmYEPvqa7U0RFJbaAc,7645
|
|
256
256
|
datahub/ingestion/source/bigquery_v2/common.py,sha256=Cxjf1a8ibkL_YRQeS0BqsjlyMgFJpaZ3iq_d7e8T8MQ,4030
|
|
257
257
|
datahub/ingestion/source/bigquery_v2/lineage.py,sha256=Dkig1SEfPxw6zZDeSulUYnqsu4WGCVPXypGPEUVriyU,44907
|
|
@@ -514,7 +514,7 @@ datahub/ingestion/source/unity/proxy.py,sha256=qYgjw0COscvUk8TvgWwZKgYvkYyA3j4yc
|
|
|
514
514
|
datahub/ingestion/source/unity/proxy_profiling.py,sha256=WLqvYP6MziaisA4LYL4T_GA-kPt6Xdde7bfaYsjYw40,9663
|
|
515
515
|
datahub/ingestion/source/unity/proxy_types.py,sha256=qrvHiwPzl5cPX-KRvcIGGeJVdr0I8XUQmoAI6ErZ-v8,9371
|
|
516
516
|
datahub/ingestion/source/unity/report.py,sha256=XFT9oQfvEB4RkTvWGgFOoQuLPUN_AIoPXZ79xeDhGHQ,2831
|
|
517
|
-
datahub/ingestion/source/unity/source.py,sha256=
|
|
517
|
+
datahub/ingestion/source/unity/source.py,sha256=5w24IC4oDhsycdt3TG7rtXOkoQpxE_-dHlLGYui4K8I,42368
|
|
518
518
|
datahub/ingestion/source/unity/usage.py,sha256=0wETBAaZvHI_EGgBlxX3bKsVHEAdnUV8_bKI_lbyWjY,11500
|
|
519
519
|
datahub/ingestion/source/usage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
520
520
|
datahub/ingestion/source/usage/clickhouse_usage.py,sha256=jJ-EUJdS7t4d9RVjLWQQ2e36wmYzs8xtpD632z6pLiw,9974
|
|
@@ -574,8 +574,8 @@ datahub/lite/lite_registry.py,sha256=bpH0kasP-LtwwUFNA2QsOIehfekAYfJtN-AkQLmSWnw
|
|
|
574
574
|
datahub/lite/lite_server.py,sha256=p9Oa2nNs65mqcssSIVOr7VOzWqfVstz6ZQEdT4f82S0,1949
|
|
575
575
|
datahub/lite/lite_util.py,sha256=pgBpT3vTO1YCQ2njZRNyicSkHYeEmQCt41BaXU8WvMo,4503
|
|
576
576
|
datahub/metadata/__init__.py,sha256=AjhXPjI6cnpdcrBRrE5gOWo15vv2TTl2ctU4UAnUN7A,238
|
|
577
|
-
datahub/metadata/_schema_classes.py,sha256=
|
|
578
|
-
datahub/metadata/schema.avsc,sha256=
|
|
577
|
+
datahub/metadata/_schema_classes.py,sha256=HwQSL_dZMZ9u3nc1wRvPoQGOCYqSZnDBZrPPgepiXDQ,985759
|
|
578
|
+
datahub/metadata/schema.avsc,sha256=uk5Dh7BzZ4NeUarAub0HIz8XgD_DPYZRmcJF6Uubv4Q,664807
|
|
579
579
|
datahub/metadata/schema_classes.py,sha256=X5Jl5EaSxyHdXOQv14pJ5WkQALun4MRpJ4q12wVFE18,1299
|
|
580
580
|
datahub/metadata/urns.py,sha256=nfrCTExR-k2P9w272WVtWSN3xW1VUJngPwP3xnvULjU,1217
|
|
581
581
|
datahub/metadata/_urns/__init__.py,sha256=cOF3GHMDgPhmbLKbN02NPpuLGHSu0qNgQyBRv08eqF0,243
|
|
@@ -675,7 +675,7 @@ datahub/metadata/schemas/CorpGroupInfo.avsc,sha256=G83lndR7C6WPfccFg7qFE5Ely5vrd
|
|
|
675
675
|
datahub/metadata/schemas/CorpGroupKey.avsc,sha256=S6hpzy70w5bmE6jg2nP1OSmvZw_yMkq9rc8VpwWweNU,935
|
|
676
676
|
datahub/metadata/schemas/CorpUserCredentials.avsc,sha256=S7FkV9K_DGxhb4GFYbM5_lPvtPsOWKffjSOpfzicl-o,980
|
|
677
677
|
datahub/metadata/schemas/CorpUserEditableInfo.avsc,sha256=VazSsT1oQZNHeG8rAXPSKV79W6ZcCgUL1J7yKJUTDvU,3380
|
|
678
|
-
datahub/metadata/schemas/CorpUserInfo.avsc,sha256=
|
|
678
|
+
datahub/metadata/schemas/CorpUserInfo.avsc,sha256=oObOza-5FLjZyCjj0FN4MNV1DodgTwJSV4APduAggjk,3955
|
|
679
679
|
datahub/metadata/schemas/CorpUserKey.avsc,sha256=-Spvvcss0sJoADygdChWa99rYiMMRwEx77GvW-jLTN0,984
|
|
680
680
|
datahub/metadata/schemas/CorpUserSettings.avsc,sha256=fS2HUD0L9_rsPyqo0DRxibzPi8_IkkWTY6Zuqob1sPg,2097
|
|
681
681
|
datahub/metadata/schemas/CorpUserStatus.avsc,sha256=yqojAXEQ9CjRhY58RPyTUxzmFbHSANGGaMMbqiYZZIE,2538
|
|
@@ -693,7 +693,7 @@ datahub/metadata/schemas/DataHubAccessTokenKey.avsc,sha256=3EspNIxgb_I4WwV0a2o4N
|
|
|
693
693
|
datahub/metadata/schemas/DataHubActionKey.avsc,sha256=bjiKcoyvUPQKaGUi2ICBMJ_ukwnt7dh0szJS4WBZE0A,448
|
|
694
694
|
datahub/metadata/schemas/DataHubConnectionDetails.avsc,sha256=IvZj6OA7HRvy-ZIIn0UbXdJNnyt_oTn16XIe5ZlcqGk,1661
|
|
695
695
|
datahub/metadata/schemas/DataHubConnectionKey.avsc,sha256=VwbamVFoEdp6epz1lJm_UShBl6ksBxoA7jAYuPI5u3M,522
|
|
696
|
-
datahub/metadata/schemas/DataHubIngestionSourceInfo.avsc,sha256=
|
|
696
|
+
datahub/metadata/schemas/DataHubIngestionSourceInfo.avsc,sha256=4wac7sluRIq-0ZjODE5SmuVKuQeW8ajLJNRpqEBRyio,4601
|
|
697
697
|
datahub/metadata/schemas/DataHubIngestionSourceKey.avsc,sha256=CSmoOx_Eqa1-he5dRaVOUQWIv1l2e2lraEPIixKK-lo,526
|
|
698
698
|
datahub/metadata/schemas/DataHubPersonaInfo.avsc,sha256=OUvbTgPQsBtzkDDb9pxHXpQ6A7dkL77ZnCXZ-MLEG14,227
|
|
699
699
|
datahub/metadata/schemas/DataHubPersonaKey.avsc,sha256=ddj-DhXa0_YMdLaGkKLLSklfIeDRvSwPXu8o__YEXUE,448
|
|
@@ -815,7 +815,7 @@ datahub/metadata/schemas/MLModelProperties.avsc,sha256=hDCBHxGe-cmCBeU1k0ANuQlKj
|
|
|
815
815
|
datahub/metadata/schemas/MLPrimaryKeyKey.avsc,sha256=mX4CQcoN3FC_VQDBCkhlmJk4pfQKDrSeuqqCTTXTmq8,1092
|
|
816
816
|
datahub/metadata/schemas/MLPrimaryKeyProperties.avsc,sha256=URIuOpS93RVk8MZVcbZ-dmTwu_cN3KSOKxSR8fm-eTo,6744
|
|
817
817
|
datahub/metadata/schemas/MLTrainingRunProperties.avsc,sha256=WGgj0MuQrGD4UgvyHCJHzTnHja2LlJTOr1gLu8SySj0,4269
|
|
818
|
-
datahub/metadata/schemas/MetadataChangeEvent.avsc,sha256
|
|
818
|
+
datahub/metadata/schemas/MetadataChangeEvent.avsc,sha256=-U5dR6xh79FS6Q6Xji3ExCvWUdJqUMh5_eiIVDr8jl0,374076
|
|
819
819
|
datahub/metadata/schemas/MetadataChangeLog.avsc,sha256=mpdodpx25E6M1Gq_7slEcPAm-1Es5xPsoqV60HgO7zg,12167
|
|
820
820
|
datahub/metadata/schemas/MetadataChangeProposal.avsc,sha256=EMfQrYsuHf1p6UvBjoLtfdTHGe-vGNJaCFEHz8hdKU0,9698
|
|
821
821
|
datahub/metadata/schemas/Metrics.avsc,sha256=O7DJGjOwmHbb1x_Zj7AuM_HaHKjBvkfJKfUsX8icXD4,690
|
|
@@ -872,13 +872,14 @@ datahub/metadata/schemas/ViewProperties.avsc,sha256=3HhcbH5493dJUnEUtFMYMVfbYQ52
|
|
|
872
872
|
datahub/metadata/schemas/__init__.py,sha256=kCcak_fBn_KyuysZTJIoipAzZ8EO44Amk4DWSEvplEY,581
|
|
873
873
|
datahub/sdk/__init__.py,sha256=fYD-f338EW5WPFW2NSiirMAsHkNgZfolIvneM7yxgBk,977
|
|
874
874
|
datahub/sdk/_all_entities.py,sha256=0XFtmgeEtrWOXy_oWcwqrtvfvzo8obPIq3Z1fEr5-34,400
|
|
875
|
-
datahub/sdk/_attribution.py,sha256=
|
|
876
|
-
datahub/sdk/_entity.py,sha256=
|
|
877
|
-
datahub/sdk/_shared.py,sha256=
|
|
878
|
-
datahub/sdk/
|
|
879
|
-
datahub/sdk/
|
|
875
|
+
datahub/sdk/_attribution.py,sha256=0Trh8steVd27GOr9MKCZeawbuDD2_q3GIsZlCtHqEUg,1321
|
|
876
|
+
datahub/sdk/_entity.py,sha256=zYLnJoht3Mhr6dJGhPG9sh4-GF-Q6Cky6TqufDkg55E,3253
|
|
877
|
+
datahub/sdk/_shared.py,sha256=tvXMH-3Q-QHIEyaWUyBPzyCXz6ZQdKo89h0LanPAKNI,16826
|
|
878
|
+
datahub/sdk/_utils.py,sha256=aGE665Su8SGtj2CRDiTaXNYrJ8ADBsS0m4DmaXw79b8,1027
|
|
879
|
+
datahub/sdk/container.py,sha256=NyiMyWpIMet6Y7FJ8jPxJWs3kcN4m6LEq2mDnEWxuT4,7247
|
|
880
|
+
datahub/sdk/dataset.py,sha256=zDVzgq4vsH3q8B6TY5TyTyFAqNDmIHHtRnT4oX-KE0w,24844
|
|
880
881
|
datahub/sdk/entity_client.py,sha256=DcHytfCM8X9J6mm_QXzFR-2vDQa88I9Q2ktSNC2oSUI,4277
|
|
881
|
-
datahub/sdk/main_client.py,sha256=
|
|
882
|
+
datahub/sdk/main_client.py,sha256=CDkwbnjVuLRq6XXoP2og6ZpP26sSU-dPICrtLRt16LU,2628
|
|
882
883
|
datahub/sdk/resolver_client.py,sha256=UxI0bwg0ITm9dWPmAAKxkTvpLiGACtSCuEDNxLJipjs,3395
|
|
883
884
|
datahub/secret/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
884
885
|
datahub/secret/datahub_secret_store.py,sha256=9u9S87-15jwhj4h0EsAVIMdQLgvstKc8voQux2slxgU,2477
|
|
@@ -890,7 +891,7 @@ datahub/specific/chart.py,sha256=NPdzDHcZkPodthOn9c8QF_aDEo2y4lCJ4t1sI556uZ0,668
|
|
|
890
891
|
datahub/specific/dashboard.py,sha256=3AsXZ1Cp03uaTHsOmJqEiXzJjZUBgDbX-zmgwMw908o,11514
|
|
891
892
|
datahub/specific/datajob.py,sha256=yZXic3CuCGxg-ewnaHphoH9Jjpe-P09XbN7T-rrvkyE,13711
|
|
892
893
|
datahub/specific/dataproduct.py,sha256=SrBNYACDVatuXA64GCHA0Igaes24ajJqTcXmDTT1FPA,2877
|
|
893
|
-
datahub/specific/dataset.py,sha256=
|
|
894
|
+
datahub/specific/dataset.py,sha256=EhSjarFfvxF-JbVuNBNIcV1pEebqAdcYHDZIhnG_8sk,9714
|
|
894
895
|
datahub/specific/form.py,sha256=hbxmmBWHma0d4NCZEGR6Nr6R-5A5gYgl1mmkGgnM97o,3834
|
|
895
896
|
datahub/specific/structured_property.py,sha256=NZ2yppDgtqrH04Wn3_m5IupyLeWoBCMygyr3nEi1A3o,4038
|
|
896
897
|
datahub/specific/aspect_helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -905,7 +906,7 @@ datahub/sql_parsing/_sqlglot_patch.py,sha256=iYJ8zOThHqqbamD5jdNr9iHTWD7ewNeHzPi
|
|
|
905
906
|
datahub/sql_parsing/datajob.py,sha256=1X8KpEk-y3_8xJuA_Po27EHZgOcxK9QADI6Om9gSGn0,1751
|
|
906
907
|
datahub/sql_parsing/query_types.py,sha256=FKjDzszZzsrCfYfm7dgD6T_8865qxWl767fdGyHWBh4,2720
|
|
907
908
|
datahub/sql_parsing/schema_resolver.py,sha256=ISuingLcQnOJZkNXBkc73uPwYUbbOtERAjgGhJajDiQ,10782
|
|
908
|
-
datahub/sql_parsing/split_statements.py,sha256=
|
|
909
|
+
datahub/sql_parsing/split_statements.py,sha256=6KUoIPG7H8Rja3lrPjSrSfhFfwW4oqgfoNQeTbbOWNg,8953
|
|
909
910
|
datahub/sql_parsing/sql_parsing_aggregator.py,sha256=XNZWjeaRhzaT92mzsJZGJfYaxJENsyp5dSHTmL81RIc,70130
|
|
910
911
|
datahub/sql_parsing/sql_parsing_common.py,sha256=h_V_m54hJ9EUh5kczq7cYOIeNeo4bgf0Px0H-Nq-UIg,2602
|
|
911
912
|
datahub/sql_parsing/sql_parsing_result_utils.py,sha256=prwWTj1EB2fRPv1eMB4EkpFNafIYAt-X8TIK0NWqank,796
|
|
@@ -938,10 +939,11 @@ datahub/utilities/dedup_list.py,sha256=dUSpe1AajfuwlHVJKNv-CzDXSCkaw0HgSMOsxqUkQ
|
|
|
938
939
|
datahub/utilities/delayed_iter.py,sha256=XlsI0DCXkVVejFKOW_uMT0E8DTqqOHQN3Ooak4EcULE,645
|
|
939
940
|
datahub/utilities/delta.py,sha256=hkpF8W7Lvg2gUJBQR3mmIzOxsRQ6i5cchRPFlAVoV10,1128
|
|
940
941
|
datahub/utilities/docs_build.py,sha256=uFMK3z1d4BExpsrvguHunidbEDAzQ8hoOP7iQ0A_IVw,211
|
|
941
|
-
datahub/utilities/file_backed_collections.py,sha256=
|
|
942
|
+
datahub/utilities/file_backed_collections.py,sha256=zW-Xy6zO1E6R-FRJKjlimMKr-emWrgcXvFFY6QQ23aA,21715
|
|
942
943
|
datahub/utilities/global_warning_util.py,sha256=adrEl3WhetQ-bymrPINjd976ZFndhbvk3QosUYGsos8,261
|
|
943
944
|
datahub/utilities/groupby.py,sha256=pe6rP4ZCttYB98yjbs0Aey8C32aLb7rq-NJ_BFky0H4,524
|
|
944
945
|
datahub/utilities/hive_schema_to_avro.py,sha256=1MP0a6FFVEYxLg_4lKF7hPxbHJJy0uRQYkML5zRwV3Q,11622
|
|
946
|
+
datahub/utilities/ingest_utils.py,sha256=znIuvFkCdOAOg1dkF-mJn03A2YYFPHlDPZsfCPxKkaQ,3117
|
|
945
947
|
datahub/utilities/is_pytest.py,sha256=2m9T4S9IIKhI5RfTqrB2ZmumzHocdxBHpM1HroWj2XQ,138
|
|
946
948
|
datahub/utilities/logging_manager.py,sha256=bc-x5VZGvFUHT0HD-TF3Uz_nzw3dpKdJSbz6kjpAqAQ,10073
|
|
947
949
|
datahub/utilities/lossy_collections.py,sha256=5rdtfK2pjwvOrrzLf_KGFOMiVvLLmoXj5EVQXTFSR3E,5704
|
|
@@ -957,6 +959,7 @@ datahub/utilities/progress_timer.py,sha256=w0b3wIEGL8fQy2BKYVPiKDcO5ATUlt1kQr74a
|
|
|
957
959
|
datahub/utilities/ratelimiter.py,sha256=6a058tTzydqK9gf0C2RR2iZZmh-mm4o3W0MMlDNfIoo,2201
|
|
958
960
|
datahub/utilities/sample_data.py,sha256=DMwN3yAqJPrF_rZF6CU0_iLSNBFfsAQ-no0KQhgwklg,766
|
|
959
961
|
datahub/utilities/search_utils.py,sha256=BdZfixXrB6lcD_ec6pCotGtVKQ4ekTtmaZDFGyeZfBg,9683
|
|
962
|
+
datahub/utilities/sentinels.py,sha256=p299J2YR62K2Mbuy-2lrGH4BnF9dkRqGo59Wub0hYV4,417
|
|
960
963
|
datahub/utilities/serialized_lru_cache.py,sha256=R9enyN5BlatknX4KFq9euYc2yiE1rRMucPbAHg09Goc,3247
|
|
961
964
|
datahub/utilities/server_config_util.py,sha256=ibCl4FedJWa9oc7_ZWS3vrHRNlsmKLB-eCDVCbUs25A,698
|
|
962
965
|
datahub/utilities/sql_formatter.py,sha256=tYXIsKjKmpKh0JXGxeAPrHkUWYd1SwJNLjUZsfQP2h0,1016
|
|
@@ -1013,9 +1016,9 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
1013
1016
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
1014
1017
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
1015
1018
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
1016
|
-
acryl_datahub-1.0.
|
|
1017
|
-
acryl_datahub-1.0.
|
|
1018
|
-
acryl_datahub-1.0.
|
|
1019
|
-
acryl_datahub-1.0.
|
|
1020
|
-
acryl_datahub-1.0.
|
|
1021
|
-
acryl_datahub-1.0.
|
|
1019
|
+
acryl_datahub-1.0.0rc5.dist-info/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
|
|
1020
|
+
acryl_datahub-1.0.0rc5.dist-info/METADATA,sha256=Tcm-KvY8FCzFEI8qsrAwbVwvBpqhzT4dQMxOCw_hAnk,175366
|
|
1021
|
+
acryl_datahub-1.0.0rc5.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
1022
|
+
acryl_datahub-1.0.0rc5.dist-info/entry_points.txt,sha256=U1e5ZwqPX1OaIbvGrwvozcdB8SbzFYXQM7plpdLKKeo,9592
|
|
1023
|
+
acryl_datahub-1.0.0rc5.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
1024
|
+
acryl_datahub-1.0.0rc5.dist-info/RECORD,,
|
datahub/_version.py
CHANGED
datahub/cli/ingest_cli.py
CHANGED
|
@@ -15,14 +15,14 @@ from tabulate import tabulate
|
|
|
15
15
|
from datahub._version import nice_version_name
|
|
16
16
|
from datahub.cli import cli_utils
|
|
17
17
|
from datahub.cli.config_utils import CONDENSED_DATAHUB_CONFIG_PATH
|
|
18
|
-
from datahub.configuration.common import
|
|
18
|
+
from datahub.configuration.common import GraphError
|
|
19
19
|
from datahub.configuration.config_loader import load_config_file
|
|
20
|
-
from datahub.emitter.mce_builder import datahub_guid
|
|
21
20
|
from datahub.ingestion.graph.client import get_default_graph
|
|
22
21
|
from datahub.ingestion.run.connection import ConnectionManager
|
|
23
22
|
from datahub.ingestion.run.pipeline import Pipeline
|
|
24
23
|
from datahub.telemetry import telemetry
|
|
25
24
|
from datahub.upgrade import upgrade
|
|
25
|
+
from datahub.utilities.ingest_utils import deploy_source_vars
|
|
26
26
|
from datahub.utilities.perf_timer import PerfTimer
|
|
27
27
|
|
|
28
28
|
logger = logging.getLogger(__name__)
|
|
@@ -191,23 +191,6 @@ def run(
|
|
|
191
191
|
# don't raise SystemExit if there's no error
|
|
192
192
|
|
|
193
193
|
|
|
194
|
-
def _make_ingestion_urn(name: str) -> str:
|
|
195
|
-
guid = datahub_guid(
|
|
196
|
-
{
|
|
197
|
-
"name": name,
|
|
198
|
-
}
|
|
199
|
-
)
|
|
200
|
-
return f"urn:li:dataHubIngestionSource:deploy-{guid}"
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
class DeployOptions(ConfigModel):
|
|
204
|
-
name: str
|
|
205
|
-
schedule: Optional[str] = None
|
|
206
|
-
time_zone: str = "UTC"
|
|
207
|
-
cli_version: Optional[str] = None
|
|
208
|
-
executor_id: str = "default"
|
|
209
|
-
|
|
210
|
-
|
|
211
194
|
@ingest.command()
|
|
212
195
|
@upgrade.check_upgrade
|
|
213
196
|
@telemetry.with_telemetry()
|
|
@@ -258,6 +241,16 @@ class DeployOptions(ConfigModel):
|
|
|
258
241
|
required=False,
|
|
259
242
|
default="UTC",
|
|
260
243
|
)
|
|
244
|
+
@click.option(
|
|
245
|
+
"--debug", type=bool, help="Should we debug.", required=False, default=False
|
|
246
|
+
)
|
|
247
|
+
@click.option(
|
|
248
|
+
"--extra-pip",
|
|
249
|
+
type=str,
|
|
250
|
+
help='Extra pip packages. e.g. ["memray"]',
|
|
251
|
+
required=False,
|
|
252
|
+
default=None,
|
|
253
|
+
)
|
|
261
254
|
def deploy(
|
|
262
255
|
name: Optional[str],
|
|
263
256
|
config: str,
|
|
@@ -266,6 +259,8 @@ def deploy(
|
|
|
266
259
|
cli_version: Optional[str],
|
|
267
260
|
schedule: Optional[str],
|
|
268
261
|
time_zone: str,
|
|
262
|
+
extra_pip: Optional[str],
|
|
263
|
+
debug: bool = False,
|
|
269
264
|
) -> None:
|
|
270
265
|
"""
|
|
271
266
|
Deploy an ingestion recipe to your DataHub instance.
|
|
@@ -276,83 +271,23 @@ def deploy(
|
|
|
276
271
|
|
|
277
272
|
datahub_graph = get_default_graph()
|
|
278
273
|
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
274
|
+
variables = deploy_source_vars(
|
|
275
|
+
name=name,
|
|
276
|
+
config=config,
|
|
277
|
+
urn=urn,
|
|
278
|
+
executor_id=executor_id,
|
|
279
|
+
cli_version=cli_version,
|
|
280
|
+
schedule=schedule,
|
|
281
|
+
time_zone=time_zone,
|
|
282
|
+
extra_pip=extra_pip,
|
|
283
|
+
debug=debug,
|
|
284
284
|
)
|
|
285
285
|
|
|
286
|
-
deploy_options_raw = pipeline_config.pop("deployment", None)
|
|
287
|
-
if deploy_options_raw is not None:
|
|
288
|
-
deploy_options = DeployOptions.parse_obj(deploy_options_raw)
|
|
289
|
-
|
|
290
|
-
if name:
|
|
291
|
-
logger.info(f"Overriding deployment name {deploy_options.name} with {name}")
|
|
292
|
-
deploy_options.name = name
|
|
293
|
-
else:
|
|
294
|
-
if not name:
|
|
295
|
-
raise click.UsageError(
|
|
296
|
-
"Either --name must be set or deployment_name specified in the config"
|
|
297
|
-
)
|
|
298
|
-
deploy_options = DeployOptions(name=name)
|
|
299
|
-
|
|
300
|
-
# Use remaining CLI args to override deploy_options
|
|
301
|
-
if schedule:
|
|
302
|
-
deploy_options.schedule = schedule
|
|
303
|
-
if time_zone:
|
|
304
|
-
deploy_options.time_zone = time_zone
|
|
305
|
-
if cli_version:
|
|
306
|
-
deploy_options.cli_version = cli_version
|
|
307
|
-
if executor_id:
|
|
308
|
-
deploy_options.executor_id = executor_id
|
|
309
|
-
|
|
310
|
-
logger.info(f"Using {repr(deploy_options)}")
|
|
311
|
-
|
|
312
|
-
if not urn:
|
|
313
|
-
# When urn/name is not specified, we will generate a unique urn based on the deployment name.
|
|
314
|
-
urn = _make_ingestion_urn(deploy_options.name)
|
|
315
|
-
logger.info(f"Using recipe urn: {urn}")
|
|
316
|
-
|
|
317
|
-
# Invariant - at this point, both urn and deploy_options are set.
|
|
318
|
-
|
|
319
|
-
variables: dict = {
|
|
320
|
-
"urn": urn,
|
|
321
|
-
"name": deploy_options.name,
|
|
322
|
-
"type": pipeline_config["source"]["type"],
|
|
323
|
-
"recipe": json.dumps(pipeline_config),
|
|
324
|
-
"executorId": deploy_options.executor_id,
|
|
325
|
-
"version": deploy_options.cli_version,
|
|
326
|
-
}
|
|
327
|
-
|
|
328
|
-
if deploy_options.schedule is not None:
|
|
329
|
-
variables["schedule"] = {
|
|
330
|
-
"interval": deploy_options.schedule,
|
|
331
|
-
"timezone": deploy_options.time_zone,
|
|
332
|
-
}
|
|
333
|
-
|
|
334
286
|
# The updateIngestionSource endpoint can actually do upserts as well.
|
|
335
287
|
graphql_query: str = textwrap.dedent(
|
|
336
288
|
"""
|
|
337
|
-
mutation updateIngestionSource(
|
|
338
|
-
$urn:
|
|
339
|
-
$name: String!,
|
|
340
|
-
$type: String!,
|
|
341
|
-
$schedule: UpdateIngestionSourceScheduleInput,
|
|
342
|
-
$recipe: String!,
|
|
343
|
-
$executorId: String!
|
|
344
|
-
$version: String) {
|
|
345
|
-
|
|
346
|
-
updateIngestionSource(urn: $urn, input: {
|
|
347
|
-
name: $name,
|
|
348
|
-
type: $type,
|
|
349
|
-
schedule: $schedule,
|
|
350
|
-
config: {
|
|
351
|
-
recipe: $recipe,
|
|
352
|
-
executorId: $executorId,
|
|
353
|
-
version: $version,
|
|
354
|
-
}
|
|
355
|
-
})
|
|
289
|
+
mutation updateIngestionSource($urn: String!, $input: UpdateIngestionSourceInput!) {
|
|
290
|
+
updateIngestionSource(urn: $urn, input: $input)
|
|
356
291
|
}
|
|
357
292
|
"""
|
|
358
293
|
)
|
|
@@ -372,7 +307,7 @@ def deploy(
|
|
|
372
307
|
sys.exit(1)
|
|
373
308
|
|
|
374
309
|
click.echo(
|
|
375
|
-
f"✅ Successfully wrote data ingestion source metadata for recipe {
|
|
310
|
+
f"✅ Successfully wrote data ingestion source metadata for recipe {variables['name']}:"
|
|
376
311
|
)
|
|
377
312
|
click.echo(response)
|
|
378
313
|
|
datahub/emitter/mcp_builder.py
CHANGED
|
@@ -36,7 +36,7 @@ from datahub.metadata.schema_classes import (
|
|
|
36
36
|
SubTypesClass,
|
|
37
37
|
TagAssociationClass,
|
|
38
38
|
)
|
|
39
|
-
from datahub.metadata.urns import StructuredPropertyUrn
|
|
39
|
+
from datahub.metadata.urns import ContainerUrn, StructuredPropertyUrn
|
|
40
40
|
|
|
41
41
|
# In https://github.com/datahub-project/datahub/pull/11214, we added a
|
|
42
42
|
# new env field to container properties. However, populating this field
|
|
@@ -87,6 +87,9 @@ class ContainerKey(DatahubKey):
|
|
|
87
87
|
def property_dict(self) -> Dict[str, str]:
|
|
88
88
|
return self.dict(by_alias=True, exclude_none=True)
|
|
89
89
|
|
|
90
|
+
def as_urn_typed(self) -> ContainerUrn:
|
|
91
|
+
return ContainerUrn.from_string(self.as_urn())
|
|
92
|
+
|
|
90
93
|
def as_urn(self) -> str:
|
|
91
94
|
return make_container_urn(guid=self.guid())
|
|
92
95
|
|
|
@@ -292,6 +292,11 @@ class BigQuerySchemaApi:
|
|
|
292
292
|
if hasattr(d, "_properties") and isinstance(d._properties, dict)
|
|
293
293
|
else None
|
|
294
294
|
),
|
|
295
|
+
# TODO: Fetch dataset description individually impacts overall performance if the number of datasets is high (hundreds); instead we should fetch in batch for all datasets.
|
|
296
|
+
# TODO: Given we are calling get_dataset for each dataset, we may consume and publish other fields too, such as created, modified, etc...
|
|
297
|
+
# https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.client.Client#google_cloud_bigquery_client_Client_get_dataset
|
|
298
|
+
# https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.dataset.Dataset
|
|
299
|
+
comment=self.bq_client.get_dataset(d.reference).description,
|
|
295
300
|
)
|
|
296
301
|
for d in datasets
|
|
297
302
|
]
|
|
@@ -296,6 +296,7 @@ class BigQuerySchemaGenerator:
|
|
|
296
296
|
self,
|
|
297
297
|
dataset: str,
|
|
298
298
|
project_id: str,
|
|
299
|
+
description: Optional[str] = None,
|
|
299
300
|
tags: Optional[Dict[str, str]] = None,
|
|
300
301
|
extra_properties: Optional[Dict[str, str]] = None,
|
|
301
302
|
) -> Iterable[MetadataWorkUnit]:
|
|
@@ -336,6 +337,7 @@ class BigQuerySchemaGenerator:
|
|
|
336
337
|
domain_config=self.config.domain,
|
|
337
338
|
schema_container_key=schema_container_key,
|
|
338
339
|
database_container_key=database_container_key,
|
|
340
|
+
description=description,
|
|
339
341
|
external_url=(
|
|
340
342
|
BQ_EXTERNAL_DATASET_URL_TEMPLATE.format(
|
|
341
343
|
project=project_id, dataset=dataset
|
|
@@ -471,14 +473,15 @@ class BigQuerySchemaGenerator:
|
|
|
471
473
|
|
|
472
474
|
if self.config.include_schema_metadata:
|
|
473
475
|
yield from self.gen_dataset_containers(
|
|
474
|
-
dataset_name,
|
|
475
|
-
project_id,
|
|
476
|
-
bigquery_dataset.labels,
|
|
477
|
-
(
|
|
476
|
+
dataset=dataset_name,
|
|
477
|
+
project_id=project_id,
|
|
478
|
+
tags=bigquery_dataset.labels,
|
|
479
|
+
extra_properties=(
|
|
478
480
|
{"location": bigquery_dataset.location}
|
|
479
481
|
if bigquery_dataset.location
|
|
480
482
|
else None
|
|
481
483
|
),
|
|
484
|
+
description=bigquery_dataset.comment,
|
|
482
485
|
)
|
|
483
486
|
|
|
484
487
|
columns = None
|
|
@@ -12,7 +12,11 @@ from datahub.metadata.com.linkedin.pegasus2avro.schema import (
|
|
|
12
12
|
SchemaField,
|
|
13
13
|
SchemaMetadata,
|
|
14
14
|
)
|
|
15
|
-
from datahub.metadata.schema_classes import
|
|
15
|
+
from datahub.metadata.schema_classes import (
|
|
16
|
+
RecordTypeClass,
|
|
17
|
+
SchemaFieldDataTypeClass,
|
|
18
|
+
StringTypeClass,
|
|
19
|
+
)
|
|
16
20
|
|
|
17
21
|
logger = logging.getLogger(__name__)
|
|
18
22
|
|
|
@@ -20,9 +24,12 @@ logger = logging.getLogger(__name__)
|
|
|
20
24
|
def flatten(d: dict, prefix: str = "") -> Generator:
|
|
21
25
|
for k, v in d.items():
|
|
22
26
|
if isinstance(v, dict):
|
|
27
|
+
# First yield the parent field
|
|
28
|
+
yield f"{prefix}.{k}".strip(".")
|
|
29
|
+
# Then yield all nested fields
|
|
23
30
|
yield from flatten(v, f"{prefix}.{k}")
|
|
24
31
|
else:
|
|
25
|
-
yield f"{prefix}
|
|
32
|
+
yield f"{prefix}.{k}".strip(".") # Use dot instead of hyphen
|
|
26
33
|
|
|
27
34
|
|
|
28
35
|
def flatten2list(d: dict) -> list:
|
|
@@ -34,7 +41,7 @@ def flatten2list(d: dict) -> list:
|
|
|
34
41
|
"anotherone": {"third_a": {"last": 3}}
|
|
35
42
|
}
|
|
36
43
|
|
|
37
|
-
|
|
44
|
+
yields:
|
|
38
45
|
|
|
39
46
|
["first.second_a",
|
|
40
47
|
"first.second_b",
|
|
@@ -43,7 +50,7 @@ def flatten2list(d: dict) -> list:
|
|
|
43
50
|
]
|
|
44
51
|
"""
|
|
45
52
|
fl_l = list(flatten(d))
|
|
46
|
-
return
|
|
53
|
+
return fl_l
|
|
47
54
|
|
|
48
55
|
|
|
49
56
|
def request_call(
|
|
@@ -322,6 +329,8 @@ def extract_fields(
|
|
|
322
329
|
return ["contains_a_string"], {"contains_a_string": dict_data[0]}
|
|
323
330
|
else:
|
|
324
331
|
raise ValueError("unknown format")
|
|
332
|
+
elif not dict_data: # Handle empty dict case
|
|
333
|
+
return [], {}
|
|
325
334
|
if len(dict_data) > 1:
|
|
326
335
|
# the elements are directly inside the dict
|
|
327
336
|
return flatten2list(dict_data), dict_data
|
|
@@ -384,16 +393,39 @@ def set_metadata(
|
|
|
384
393
|
dataset_name: str, fields: List, platform: str = "api"
|
|
385
394
|
) -> SchemaMetadata:
|
|
386
395
|
canonical_schema: List[SchemaField] = []
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
396
|
+
seen_paths = set()
|
|
397
|
+
|
|
398
|
+
# Process all flattened fields
|
|
399
|
+
for field_path in fields:
|
|
400
|
+
parts = field_path.split(".")
|
|
401
|
+
|
|
402
|
+
# Add struct/object fields for each ancestor path
|
|
403
|
+
current_path: List[str] = []
|
|
404
|
+
for part in parts[:-1]:
|
|
405
|
+
ancestor_path = ".".join(current_path + [part])
|
|
406
|
+
if ancestor_path not in seen_paths:
|
|
407
|
+
struct_field = SchemaField(
|
|
408
|
+
fieldPath=ancestor_path,
|
|
409
|
+
nativeDataType="object", # OpenAPI term for struct/record
|
|
410
|
+
type=SchemaFieldDataTypeClass(type=RecordTypeClass()),
|
|
411
|
+
description="",
|
|
412
|
+
recursive=False,
|
|
413
|
+
)
|
|
414
|
+
canonical_schema.append(struct_field)
|
|
415
|
+
seen_paths.add(ancestor_path)
|
|
416
|
+
current_path.append(part)
|
|
417
|
+
|
|
418
|
+
# Add the leaf field if not already seen
|
|
419
|
+
if field_path not in seen_paths:
|
|
420
|
+
leaf_field = SchemaField(
|
|
421
|
+
fieldPath=field_path,
|
|
422
|
+
nativeDataType="str", # Keeping `str` for backwards compatability, ideally this is the correct type
|
|
423
|
+
type=SchemaFieldDataTypeClass(type=StringTypeClass()),
|
|
424
|
+
description="",
|
|
425
|
+
recursive=False,
|
|
426
|
+
)
|
|
427
|
+
canonical_schema.append(leaf_field)
|
|
428
|
+
seen_paths.add(field_path)
|
|
397
429
|
|
|
398
430
|
schema_metadata = SchemaMetadata(
|
|
399
431
|
schemaName=dataset_name,
|
|
@@ -464,7 +464,17 @@ class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
464
464
|
|
|
465
465
|
with self.report.new_stage(f"Ingest schema {schema.id}"):
|
|
466
466
|
yield from self.gen_schema_containers(schema)
|
|
467
|
-
|
|
467
|
+
try:
|
|
468
|
+
yield from self.process_tables(schema)
|
|
469
|
+
except Exception as e:
|
|
470
|
+
logger.exception(f"Error parsing schema {schema}")
|
|
471
|
+
self.report.report_warning(
|
|
472
|
+
message="Missed schema because of parsing issues",
|
|
473
|
+
context=str(schema),
|
|
474
|
+
title="Error parsing schema",
|
|
475
|
+
exc=e,
|
|
476
|
+
)
|
|
477
|
+
continue
|
|
468
478
|
|
|
469
479
|
self.report.schemas.processed(schema.id)
|
|
470
480
|
|
|
@@ -13524,6 +13524,7 @@ class CorpUserInfoClass(_Aspect):
|
|
|
13524
13524
|
lastName: Union[None, str]=None,
|
|
13525
13525
|
fullName: Union[None, str]=None,
|
|
13526
13526
|
countryCode: Union[None, str]=None,
|
|
13527
|
+
system: Optional[Union[bool, None]]=None,
|
|
13527
13528
|
):
|
|
13528
13529
|
super().__init__()
|
|
13529
13530
|
|
|
@@ -13543,6 +13544,11 @@ class CorpUserInfoClass(_Aspect):
|
|
|
13543
13544
|
self.lastName = lastName
|
|
13544
13545
|
self.fullName = fullName
|
|
13545
13546
|
self.countryCode = countryCode
|
|
13547
|
+
if system is None:
|
|
13548
|
+
# default: False
|
|
13549
|
+
self.system = self.RECORD_SCHEMA.fields_dict["system"].default
|
|
13550
|
+
else:
|
|
13551
|
+
self.system = system
|
|
13546
13552
|
|
|
13547
13553
|
def _restore_defaults(self) -> None:
|
|
13548
13554
|
self.customProperties = dict()
|
|
@@ -13557,6 +13563,7 @@ class CorpUserInfoClass(_Aspect):
|
|
|
13557
13563
|
self.lastName = self.RECORD_SCHEMA.fields_dict["lastName"].default
|
|
13558
13564
|
self.fullName = self.RECORD_SCHEMA.fields_dict["fullName"].default
|
|
13559
13565
|
self.countryCode = self.RECORD_SCHEMA.fields_dict["countryCode"].default
|
|
13566
|
+
self.system = self.RECORD_SCHEMA.fields_dict["system"].default
|
|
13560
13567
|
|
|
13561
13568
|
|
|
13562
13569
|
@property
|
|
@@ -13679,6 +13686,16 @@ class CorpUserInfoClass(_Aspect):
|
|
|
13679
13686
|
self._inner_dict['countryCode'] = value
|
|
13680
13687
|
|
|
13681
13688
|
|
|
13689
|
+
@property
|
|
13690
|
+
def system(self) -> Union[bool, None]:
|
|
13691
|
+
"""Whether the corpUser is a system user."""
|
|
13692
|
+
return self._inner_dict.get('system') # type: ignore
|
|
13693
|
+
|
|
13694
|
+
@system.setter
|
|
13695
|
+
def system(self, value: Union[bool, None]) -> None:
|
|
13696
|
+
self._inner_dict['system'] = value
|
|
13697
|
+
|
|
13698
|
+
|
|
13682
13699
|
class CorpUserSettingsClass(_Aspect):
|
|
13683
13700
|
"""Settings that a user can customize through the datahub ui"""
|
|
13684
13701
|
|
datahub/metadata/schema.avsc
CHANGED
|
@@ -1154,6 +1154,19 @@
|
|
|
1154
1154
|
"name": "countryCode",
|
|
1155
1155
|
"default": null,
|
|
1156
1156
|
"doc": "two uppercase letters country code. e.g. US"
|
|
1157
|
+
},
|
|
1158
|
+
{
|
|
1159
|
+
"Searchable": {
|
|
1160
|
+
"fieldType": "BOOLEAN",
|
|
1161
|
+
"queryByDefault": false
|
|
1162
|
+
},
|
|
1163
|
+
"type": [
|
|
1164
|
+
"boolean",
|
|
1165
|
+
"null"
|
|
1166
|
+
],
|
|
1167
|
+
"name": "system",
|
|
1168
|
+
"default": false,
|
|
1169
|
+
"doc": "Whether the corpUser is a system user."
|
|
1157
1170
|
}
|
|
1158
1171
|
],
|
|
1159
1172
|
"doc": "Linkedin corp user information"
|
|
@@ -17718,7 +17731,8 @@
|
|
|
17718
17731
|
},
|
|
17719
17732
|
{
|
|
17720
17733
|
"Searchable": {
|
|
17721
|
-
"fieldType": "
|
|
17734
|
+
"fieldType": "KEYWORD",
|
|
17735
|
+
"queryByDefault": false
|
|
17722
17736
|
},
|
|
17723
17737
|
"type": "string",
|
|
17724
17738
|
"name": "type",
|
|
@@ -17785,7 +17799,9 @@
|
|
|
17785
17799
|
},
|
|
17786
17800
|
{
|
|
17787
17801
|
"Searchable": {
|
|
17788
|
-
"fieldName": "sourceExecutorId"
|
|
17802
|
+
"fieldName": "sourceExecutorId",
|
|
17803
|
+
"fieldType": "KEYWORD",
|
|
17804
|
+
"queryByDefault": false
|
|
17789
17805
|
},
|
|
17790
17806
|
"type": [
|
|
17791
17807
|
"null",
|
|
@@ -17831,7 +17847,9 @@
|
|
|
17831
17847
|
"fields": [
|
|
17832
17848
|
{
|
|
17833
17849
|
"Searchable": {
|
|
17834
|
-
"fieldName": "sourceType"
|
|
17850
|
+
"fieldName": "sourceType",
|
|
17851
|
+
"fieldType": "KEYWORD",
|
|
17852
|
+
"queryByDefault": false
|
|
17835
17853
|
},
|
|
17836
17854
|
"type": {
|
|
17837
17855
|
"type": "enum",
|
|
@@ -165,6 +165,19 @@
|
|
|
165
165
|
"name": "countryCode",
|
|
166
166
|
"default": null,
|
|
167
167
|
"doc": "two uppercase letters country code. e.g. US"
|
|
168
|
+
},
|
|
169
|
+
{
|
|
170
|
+
"Searchable": {
|
|
171
|
+
"fieldType": "BOOLEAN",
|
|
172
|
+
"queryByDefault": false
|
|
173
|
+
},
|
|
174
|
+
"type": [
|
|
175
|
+
"boolean",
|
|
176
|
+
"null"
|
|
177
|
+
],
|
|
178
|
+
"name": "system",
|
|
179
|
+
"default": false,
|
|
180
|
+
"doc": "Whether the corpUser is a system user."
|
|
168
181
|
}
|
|
169
182
|
],
|
|
170
183
|
"doc": "Linkedin corp user information"
|