acryl-datahub 0.15.0.5rc8__py3-none-any.whl → 0.15.0.5rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

@@ -1,7 +1,7 @@
1
1
  datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
2
2
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
3
- datahub/_version.py,sha256=CilbE6n7KTTI5xCR6qR6cgnmqSiSFBe-Kp22mwO5MGQ,324
4
- datahub/entrypoints.py,sha256=56zAREkeLjycFIl0fubt9Haido463IUgl1QzPfcmAac,8344
3
+ datahub/_version.py,sha256=KhEsNpGTq01OO4NFbvH7t1xq-yYSpEDm5CLJZkdZi6o,324
4
+ datahub/entrypoints.py,sha256=2TYgHhs3sCxJlojIHjqfxzt3_ImPwPzq4vBtsUuMqu4,8885
5
5
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  datahub/_codegen/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
7
  datahub/_codegen/aspect.py,sha256=PJRa-Z4ouXHq3OkulfyWhwZn-fFUBDK_UPvmqaWdbWk,1063
@@ -134,7 +134,7 @@ datahub/ingestion/api/committable.py,sha256=4S6GuBzvX2vb1A8P506NbspOKfZ1621sBG8t
134
134
  datahub/ingestion/api/common.py,sha256=nJVL8YdvokYFajOjmVpSNlLbZJ5iVOFS4KJDlGtJ_jc,2735
135
135
  datahub/ingestion/api/decorators.py,sha256=b9bxHXlqCLDgqrVdPU6WNQg1koZcK62AkZ9vNwvWeK4,4029
136
136
  datahub/ingestion/api/global_context.py,sha256=OdSJg4a_RKE52nu8MSiEkK2UqRRDhDTyOleHEAzPKho,575
137
- datahub/ingestion/api/incremental_lineage_helper.py,sha256=JTmJvXzzwI04oTUTIeTKKscT_hjnr8nW34NFWJvCXDc,5871
137
+ datahub/ingestion/api/incremental_lineage_helper.py,sha256=7a6FTJ_uz4EEJS1vPtbYB2KvNlcZB3py28_FKxmRiSk,5993
138
138
  datahub/ingestion/api/incremental_properties_helper.py,sha256=KzdxdrQtaMV2XMHfPsCtRa7ffDGPA1w1hgPUjeenZBU,2514
139
139
  datahub/ingestion/api/ingestion_job_checkpointing_provider_base.py,sha256=3lLdkkxVqE9MVc26cdXImPeWy16az5BwgcorWxeBV50,1759
140
140
  datahub/ingestion/api/pipeline_run_listener.py,sha256=5uBP__LbMQxJ2utlf07cIzQINqPbUOKiZyOJta6a0og,713
@@ -163,8 +163,8 @@ datahub/ingestion/fs/http_fs.py,sha256=NBIKp4vl7mW0YfVfkfpO3R6DBGqSC7f6EE_da0yz2
163
163
  datahub/ingestion/fs/local_fs.py,sha256=oWf-PZsl5sI-9eHWGeKlfKYagbQaSZ9fGfNbxcFji14,885
164
164
  datahub/ingestion/fs/s3_fs.py,sha256=kGq4lWjTLCfPNinYfEiasADFyYb1PToD_8HXSisjdRY,3199
165
165
  datahub/ingestion/glossary/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
166
- datahub/ingestion/glossary/classification_mixin.py,sha256=pkb0Rv2SQH7VwAV5DPLoJLJwkDwTjIhOhg4mbXiz9CI,13332
167
- datahub/ingestion/glossary/classifier.py,sha256=zp8Fe3he80H5Zz1EwymKjThUPkTpw6PgEJQvlmqrJmQ,3006
166
+ datahub/ingestion/glossary/classification_mixin.py,sha256=3vMyHmo-nL1G8OijDQk1XSBxkmXv2ziyUSPeUIUSLSI,13789
167
+ datahub/ingestion/glossary/classifier.py,sha256=daLxnVv_JlfB_jBOxH5LrU_xQRndrsGot6z9Cir5Vuc,2981
168
168
  datahub/ingestion/glossary/classifier_registry.py,sha256=yFOYLQhDgCLqXYMG3L1BquXafeLcZDcmp8meyw6k9ts,307
169
169
  datahub/ingestion/glossary/datahub_classifier.py,sha256=O7wm6gQT1Jf2QSKdWjJQbS5oSzJwplXzfza26Gdq5Mg,7555
170
170
  datahub/ingestion/graph/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -201,7 +201,7 @@ datahub/ingestion/source/glue_profiling_config.py,sha256=vpMJH4Lf_qgR32BZy58suab
201
201
  datahub/ingestion/source/ldap.py,sha256=Vnzg8tpwBYeyM-KBVVsUJvGZGBMJiCJ_i_FhxaFRQ9A,18627
202
202
  datahub/ingestion/source/metabase.py,sha256=m9Gfhrs8F1z23ci8CIxdE5cW--25stgxg_IQTKwkFrk,31532
203
203
  datahub/ingestion/source/mlflow.py,sha256=pmIkmsfidi7dOGdQ61rab7m8AnKZhIRE2IA9in9HGFU,12144
204
- datahub/ingestion/source/mode.py,sha256=XeWL2yX2aJ_jC15jgseetBNswhtPOZnySlRspy9LZmY,63499
204
+ datahub/ingestion/source/mode.py,sha256=HVxhzMIY4HjkAG_T6y00Po2B9XwjALP6i5XQThuyYM4,63488
205
205
  datahub/ingestion/source/mongodb.py,sha256=Hucd3rfxwRcc_rNOJbpSPmSZdKqN6Fi9L7KcUZ80YKM,21104
206
206
  datahub/ingestion/source/nifi.py,sha256=BszXfFonfHB63Zt85lHDh4W_V-gIJKtxS6q3cdPDc4U,56021
207
207
  datahub/ingestion/source/openapi.py,sha256=MGsRLseZompW10UVMN_tU1GZgqPgTAM4lnqCJ8eVRoY,17386
@@ -324,7 +324,7 @@ datahub/ingestion/source/iceberg/iceberg.py,sha256=2j-MKCa0o6m1btlgYssYTEAjcD6Zp
324
324
  datahub/ingestion/source/iceberg/iceberg_common.py,sha256=LEZaJleL5KJt1u_pLRUkeCqPEsthzH7tG8FgBwd9MC8,10218
325
325
  datahub/ingestion/source/iceberg/iceberg_profiler.py,sha256=CkBB5fryMVoqqCM6eLSIeb4yP85ABHONNRm0QqZKrnw,9977
326
326
  datahub/ingestion/source/identity/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
327
- datahub/ingestion/source/identity/azure_ad.py,sha256=OEkd7X7w2H4mrMo3xXUfzEQlALoogjR2QabZm8CrAW0,28811
327
+ datahub/ingestion/source/identity/azure_ad.py,sha256=9Hrvm4CSfc02yjnPUsCYSY4Qw9fXPnDFWLexab0mcpc,28559
328
328
  datahub/ingestion/source/identity/okta.py,sha256=LMbW5N1j9kMjcvGnmcff8LpIDPwmscmPPOTZD88KZZg,30758
329
329
  datahub/ingestion/source/kafka/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
330
330
  datahub/ingestion/source/kafka/kafka.py,sha256=ZK2NQi5GRobruwn58LR6JMKsnZl269YZzgYAMyI1Y3s,26504
@@ -435,7 +435,7 @@ datahub/ingestion/source/snowflake/constants.py,sha256=22n-0r04nuy-ImxWFFpmbrt_G
435
435
  datahub/ingestion/source/snowflake/oauth_config.py,sha256=ol9D3RmruGStJAeL8PYSQguSqcD2HfkjPkMF2AB_eZs,1277
436
436
  datahub/ingestion/source/snowflake/oauth_generator.py,sha256=fu2VnREGuJXeTqIV2jx4TwieVnznf83HQkrE0h2DGGM,3423
437
437
  datahub/ingestion/source/snowflake/snowflake_assertion.py,sha256=_l3k4aI9wvioE81xxdeizJn9nJCZ_nMIXgk9N6pEk5o,4803
438
- datahub/ingestion/source/snowflake/snowflake_config.py,sha256=V07ELzDFcQMIBX89ojKoJDxEOJzq_eCKsMehhmuCfYI,19231
438
+ datahub/ingestion/source/snowflake/snowflake_config.py,sha256=Y3LoqBavhc3Cm0nyAr3fnd_-i4gReDfaAuUdp7EgwPQ,19603
439
439
  datahub/ingestion/source/snowflake/snowflake_connection.py,sha256=e9dCARIQtGB8G1cSMRLorCbNLcPUD2g9gBL-LLLKjFE,17793
440
440
  datahub/ingestion/source/snowflake/snowflake_data_reader.py,sha256=ffR5E2uhD71FUMXd3XOg2rHwrp1rbbGEFTAbqKcmI2s,2195
441
441
  datahub/ingestion/source/snowflake/snowflake_lineage_v2.py,sha256=FBmiONx4EGHWV8RNJT6zHZyntKinPFFyd2oKbTUIbhE,21319
@@ -450,7 +450,7 @@ datahub/ingestion/source/snowflake/snowflake_summary.py,sha256=kTmuCtRnvHqM8WBYh
450
450
  datahub/ingestion/source/snowflake/snowflake_tag.py,sha256=1eLYTcgmfzDs9xktMTTE74L5SeNP48Qg3uLr9y-Ez3Y,8733
451
451
  datahub/ingestion/source/snowflake/snowflake_usage_v2.py,sha256=ySFm7WDk8FW9KjCnX4HQfTqObIrlUS-V8WIHl3j0CTI,24848
452
452
  datahub/ingestion/source/snowflake/snowflake_utils.py,sha256=xq58c47zmaQPkTVqjKW25iViX8VJuHdQDTFY4jxzZ2o,12778
453
- datahub/ingestion/source/snowflake/snowflake_v2.py,sha256=vyTqC_C5Bf0AMRVyoxUfl1CdlgeQouX20msP2FsMqnk,33439
453
+ datahub/ingestion/source/snowflake/snowflake_v2.py,sha256=NidPSzXh2UajcvgeDoTmk31UW1dAeQBCCFjumZajzcI,33524
454
454
  datahub/ingestion/source/sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
455
455
  datahub/ingestion/source/sql/athena.py,sha256=Uh9wGLOqAkcphffxOPIQNyXvjeRm74XIpaLb4rjqMjM,24045
456
456
  datahub/ingestion/source/sql/clickhouse.py,sha256=uSRy-HKAiGFTHVLoVtGoh23X0O1lwyYUaK8BaWkYhps,25555
@@ -573,7 +573,7 @@ datahub/lite/lite_server.py,sha256=p9Oa2nNs65mqcssSIVOr7VOzWqfVstz6ZQEdT4f82S0,1
573
573
  datahub/lite/lite_util.py,sha256=pgBpT3vTO1YCQ2njZRNyicSkHYeEmQCt41BaXU8WvMo,4503
574
574
  datahub/metadata/__init__.py,sha256=AjhXPjI6cnpdcrBRrE5gOWo15vv2TTl2ctU4UAnUN7A,238
575
575
  datahub/metadata/_schema_classes.py,sha256=xDKwU0EtWDoHPgetCMWFYK80SRbZWONFE4HXApDZGo8,985155
576
- datahub/metadata/schema.avsc,sha256=ISpdTivunuDG6EebIw8N0oWkUrsc-h9N4q1XcfdDhxk,646307
576
+ datahub/metadata/schema.avsc,sha256=i4ukol5SJl7lyGsw0cXfOirqVnQfAVzQx5tUDH-n-EU,646297
577
577
  datahub/metadata/schema_classes.py,sha256=X5Jl5EaSxyHdXOQv14pJ5WkQALun4MRpJ4q12wVFE18,1299
578
578
  datahub/metadata/urns.py,sha256=nfrCTExR-k2P9w272WVtWSN3xW1VUJngPwP3xnvULjU,1217
579
579
  datahub/metadata/_urns/__init__.py,sha256=cOF3GHMDgPhmbLKbN02NPpuLGHSu0qNgQyBRv08eqF0,243
@@ -678,7 +678,7 @@ datahub/metadata/schemas/CorpUserKey.avsc,sha256=-Spvvcss0sJoADygdChWa99rYiMMRwE
678
678
  datahub/metadata/schemas/CorpUserSettings.avsc,sha256=fS2HUD0L9_rsPyqo0DRxibzPi8_IkkWTY6Zuqob1sPg,2097
679
679
  datahub/metadata/schemas/CorpUserStatus.avsc,sha256=yqojAXEQ9CjRhY58RPyTUxzmFbHSANGGaMMbqiYZZIE,2538
680
680
  datahub/metadata/schemas/Cost.avsc,sha256=o4kYZSss2uEwJ6gCA9fhBUoyD5xUqcSxz78vkIXXzGQ,1494
681
- datahub/metadata/schemas/DashboardInfo.avsc,sha256=kuRyOSQWRvV9ydhozTWKWrNfbD8ZNeWawGGR4xSHhaI,12917
681
+ datahub/metadata/schemas/DashboardInfo.avsc,sha256=li2lSV6R4V-nz6foOi-NYxt_8ShHWfoKRw6M2BG5530,12907
682
682
  datahub/metadata/schemas/DashboardKey.avsc,sha256=yKlusgebWTvZhVeGgRNLZW6Qu6Fg_K0e2EbV8zr3jvA,1360
683
683
  datahub/metadata/schemas/DashboardUsageStatistics.avsc,sha256=pUAKqs49Wy5pAL92g_6QcFtJeoYeMWRGiHWS68IJN2A,7693
684
684
  datahub/metadata/schemas/DataContractKey.avsc,sha256=Oceu7P26--E0812IFrX3RiEY0Ktam869iiYN30zBudc,481
@@ -813,7 +813,7 @@ datahub/metadata/schemas/MLModelProperties.avsc,sha256=hDCBHxGe-cmCBeU1k0ANuQlKj
813
813
  datahub/metadata/schemas/MLPrimaryKeyKey.avsc,sha256=mX4CQcoN3FC_VQDBCkhlmJk4pfQKDrSeuqqCTTXTmq8,1092
814
814
  datahub/metadata/schemas/MLPrimaryKeyProperties.avsc,sha256=URIuOpS93RVk8MZVcbZ-dmTwu_cN3KSOKxSR8fm-eTo,6744
815
815
  datahub/metadata/schemas/MLTrainingRunProperties.avsc,sha256=WGgj0MuQrGD4UgvyHCJHzTnHja2LlJTOr1gLu8SySj0,4269
816
- datahub/metadata/schemas/MetadataChangeEvent.avsc,sha256=pbt_zFR9fPRm5ek1cuYa1eMCNs3aD7F6jOruQaee2NY,373224
816
+ datahub/metadata/schemas/MetadataChangeEvent.avsc,sha256=73m6GXjSHTILKxczi7IxN8Dm0iBFTps0QKph0AkaRag,373214
817
817
  datahub/metadata/schemas/MetadataChangeLog.avsc,sha256=mpdodpx25E6M1Gq_7slEcPAm-1Es5xPsoqV60HgO7zg,12167
818
818
  datahub/metadata/schemas/MetadataChangeProposal.avsc,sha256=EMfQrYsuHf1p6UvBjoLtfdTHGe-vGNJaCFEHz8hdKU0,9698
819
819
  datahub/metadata/schemas/Metrics.avsc,sha256=O7DJGjOwmHbb1x_Zj7AuM_HaHKjBvkfJKfUsX8icXD4,690
@@ -875,7 +875,7 @@ datahub/secret/secret_common.py,sha256=g4anQtYPm7cI6kEJUZHjpBqeCyiUKIim2rJQByaeO
875
875
  datahub/secret/secret_store.py,sha256=2VP_Vd336Cy7C-2kwp4rx8MAqtYgtwv8XyzzNTXE5x8,1124
876
876
  datahub/specific/__init__.py,sha256=r5RYM5mDnskLzin3vc87HV-9GSz3P6uQw8AlsN14LaI,88
877
877
  datahub/specific/chart.py,sha256=NPdzDHcZkPodthOn9c8QF_aDEo2y4lCJ4t1sI556uZ0,6684
878
- datahub/specific/dashboard.py,sha256=D8CnOSScQ0-UICFjQnQOtqL-SlNSxhSuub4vZ3BpcuI,10017
878
+ datahub/specific/dashboard.py,sha256=3AsXZ1Cp03uaTHsOmJqEiXzJjZUBgDbX-zmgwMw908o,11514
879
879
  datahub/specific/datajob.py,sha256=yZXic3CuCGxg-ewnaHphoH9Jjpe-P09XbN7T-rrvkyE,13711
880
880
  datahub/specific/dataproduct.py,sha256=SrBNYACDVatuXA64GCHA0Igaes24ajJqTcXmDTT1FPA,2877
881
881
  datahub/specific/dataset.py,sha256=je9j3rVzpSiXoOe0UmfD7mc5vCpLAAO74Z8q1SvwPX0,9725
@@ -912,7 +912,7 @@ datahub/testing/docker_utils.py,sha256=g169iy_jNR_mg0p8X31cChZqjOryutAIHUYLq3xqu
912
912
  datahub/testing/doctest.py,sha256=1_8WEhHZ2eRQtw8vsXKzr9L5zzvs0Tcr6q4mnkyyvtw,295
913
913
  datahub/testing/mcp_diff.py,sha256=Dxde5uZHqZf1EjOkHm405OHY5PPJp03agZJM9SyR4yE,10717
914
914
  datahub/upgrade/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
915
- datahub/upgrade/upgrade.py,sha256=Qy0pHqxpIPbD16mUcyk6Wkq9vbvnUPvtI5d9fovrY3Q,16338
915
+ datahub/upgrade/upgrade.py,sha256=iDjIDY2YBl2XlKLvb5EMMdYOZ6KraeItgiu9Y4wIM1Q,16666
916
916
  datahub/utilities/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
917
917
  datahub/utilities/_custom_package_loader.py,sha256=9kgPE7Y77E-hNee8l4sKtVby-btUNum3dBfDixMzcVA,2059
918
918
  datahub/utilities/_markupsafe_compat.py,sha256=QX7c9KiHs56ASl7bJlgR4FAf3CGiY94zIr0h6Ak15To,444
@@ -1000,9 +1000,9 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
1000
1000
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
1001
1001
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
1002
1002
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
1003
- acryl_datahub-0.15.0.5rc8.dist-info/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
1004
- acryl_datahub-0.15.0.5rc8.dist-info/METADATA,sha256=dv5hJaDf3GORg52Q4TCqkatjmEdPrpFbIgIE22kxvAY,175284
1005
- acryl_datahub-0.15.0.5rc8.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
1006
- acryl_datahub-0.15.0.5rc8.dist-info/entry_points.txt,sha256=U1e5ZwqPX1OaIbvGrwvozcdB8SbzFYXQM7plpdLKKeo,9592
1007
- acryl_datahub-0.15.0.5rc8.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1008
- acryl_datahub-0.15.0.5rc8.dist-info/RECORD,,
1003
+ acryl_datahub-0.15.0.5rc9.dist-info/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
1004
+ acryl_datahub-0.15.0.5rc9.dist-info/METADATA,sha256=VuKgWOBCJTCnPCsvt0eB4LoZYj-ig56pUIgrs86xB7w,175375
1005
+ acryl_datahub-0.15.0.5rc9.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
1006
+ acryl_datahub-0.15.0.5rc9.dist-info/entry_points.txt,sha256=U1e5ZwqPX1OaIbvGrwvozcdB8SbzFYXQM7plpdLKKeo,9592
1007
+ acryl_datahub-0.15.0.5rc9.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1008
+ acryl_datahub-0.15.0.5rc9.dist-info/RECORD,,
datahub/_version.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Published at https://pypi.org/project/acryl-datahub/.
2
2
  __package_name__ = "acryl-datahub"
3
- __version__ = "0.15.0.5rc8"
3
+ __version__ = "0.15.0.5rc9"
4
4
 
5
5
 
6
6
  def is_dev_mode() -> bool:
datahub/entrypoints.py CHANGED
@@ -1,4 +1,5 @@
1
1
  import logging
2
+ import multiprocessing
2
3
  import os
3
4
  import platform
4
5
  import sys
@@ -217,6 +218,14 @@ except ImportError as e:
217
218
 
218
219
 
219
220
  def main(**kwargs):
221
+ # We use threads in a variety of places within our CLI. The multiprocessing
222
+ # "fork" start method is not safe to use with threads.
223
+ # MacOS and Windows already default to "spawn", and Linux will as well starting in Python 3.14.
224
+ # https://docs.python.org/3/library/multiprocessing.html#contexts-and-start-methods
225
+ # Eventually it may make sense to use "forkserver" as the default where available,
226
+ # but we can revisit that in the future.
227
+ multiprocessing.set_start_method("spawn", force=True)
228
+
220
229
  # This wrapper prevents click from suppressing errors.
221
230
  try:
222
231
  sys.exit(datahub(standalone_mode=False, **kwargs))
@@ -102,6 +102,10 @@ def convert_dashboard_info_to_patch(
102
102
  if aspect.datasets:
103
103
  patch_builder.add_datasets(aspect.datasets)
104
104
 
105
+ if aspect.dashboards:
106
+ for dashboard in aspect.dashboards:
107
+ patch_builder.add_dashboard(dashboard)
108
+
105
109
  if aspect.access:
106
110
  patch_builder.set_access(aspect.access)
107
111
 
@@ -1,5 +1,6 @@
1
1
  import concurrent.futures
2
2
  import logging
3
+ import multiprocessing
3
4
  from dataclasses import dataclass, field
4
5
  from functools import partial
5
6
  from math import ceil
@@ -182,6 +183,11 @@ class ClassificationHandler:
182
183
 
183
184
  with concurrent.futures.ProcessPoolExecutor(
184
185
  max_workers=self.config.classification.max_workers,
186
+ # The fork start method, which is the default on Linux for Python < 3.14, is not
187
+ # safe when the main process uses threads. The default start method on windows/macOS is
188
+ # already spawn, and will be changed to spawn for Linux in Python 3.14.
189
+ # https://docs.python.org/3/library/multiprocessing.html#contexts-and-start-methods
190
+ mp_context=multiprocessing.get_context("spawn"),
185
191
  ) as executor:
186
192
  column_info_proposal_futures = [
187
193
  executor.submit(
@@ -1,3 +1,4 @@
1
+ import os
1
2
  from abc import ABCMeta, abstractmethod
2
3
  from dataclasses import dataclass
3
4
  from typing import Any, Dict, List, Optional
@@ -37,8 +38,8 @@ class ClassificationConfig(ConfigModel):
37
38
  )
38
39
 
39
40
  max_workers: int = Field(
40
- default=1,
41
- description="Number of worker processes to use for classification. Note that any number above 1 might lead to a deadlock. Set to 1 to disable.",
41
+ default=(os.cpu_count() or 4),
42
+ description="Number of worker processes to use for classification. Set to 1 to disable.",
42
43
  )
43
44
 
44
45
  table_pattern: AllowDenyPattern = Field(
@@ -13,6 +13,7 @@ from requests.adapters import HTTPAdapter, Retry
13
13
 
14
14
  from datahub.configuration.common import AllowDenyPattern
15
15
  from datahub.configuration.source_common import DatasetSourceConfigMixin
16
+ from datahub.configuration.validate_field_removal import pydantic_removed_field
16
17
  from datahub.emitter.mce_builder import make_group_urn, make_user_urn
17
18
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
18
19
  from datahub.ingestion.api.common import PipelineContext
@@ -51,6 +52,7 @@ from datahub.metadata.schema_classes import (
51
52
  OriginTypeClass,
52
53
  StatusClass,
53
54
  )
55
+ from datahub.utilities.lossy_collections import LossyList
54
56
 
55
57
  logger = logging.getLogger(__name__)
56
58
 
@@ -132,11 +134,7 @@ class AzureADConfig(StatefulIngestionConfigBase, DatasetSourceConfigMixin):
132
134
  description="regex patterns for groups to include in ingestion.",
133
135
  )
134
136
 
135
- # If enabled, report will contain names of filtered users and groups.
136
- filtered_tracking: bool = Field(
137
- default=True,
138
- description="If enabled, report will contain names of filtered users and groups.",
139
- )
137
+ _remove_filtered_tracking = pydantic_removed_field("filtered_tracking")
140
138
 
141
139
  # Optional: Whether to mask sensitive information from workunit ID's. On by default.
142
140
  mask_group_id: bool = Field(
@@ -156,14 +154,10 @@ class AzureADConfig(StatefulIngestionConfigBase, DatasetSourceConfigMixin):
156
154
 
157
155
  @dataclass
158
156
  class AzureADSourceReport(StaleEntityRemovalSourceReport):
159
- filtered: List[str] = field(default_factory=list)
160
- filtered_tracking: bool = field(default=True, repr=False)
161
- filtered_count: int = field(default=0)
157
+ filtered: LossyList[str] = field(default_factory=LossyList)
162
158
 
163
159
  def report_filtered(self, name: str) -> None:
164
- self.filtered_count += 1
165
- if self.filtered_tracking:
166
- self.filtered.append(name)
160
+ self.filtered.append(name)
167
161
 
168
162
 
169
163
  # Source that extracts Azure AD users, groups and group memberships using Microsoft Graph REST API
@@ -266,9 +260,7 @@ class AzureADSource(StatefulIngestionSourceBase):
266
260
  def __init__(self, config: AzureADConfig, ctx: PipelineContext):
267
261
  super().__init__(config, ctx)
268
262
  self.config = config
269
- self.report = AzureADSourceReport(
270
- filtered_tracking=self.config.filtered_tracking
271
- )
263
+ self.report = AzureADSourceReport()
272
264
  session = requests.Session()
273
265
  retries = Retry(
274
266
  total=5, backoff_factor=1, status_forcelist=[429, 500, 502, 503, 504]
@@ -24,6 +24,7 @@ from tenacity import retry_if_exception_type, stop_after_attempt, wait_exponenti
24
24
  import datahub.emitter.mce_builder as builder
25
25
  from datahub.configuration.common import AllowDenyPattern, ConfigModel
26
26
  from datahub.configuration.source_common import DatasetLineageProviderConfigBase
27
+ from datahub.configuration.validate_field_removal import pydantic_removed_field
27
28
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
28
29
  from datahub.emitter.mcp_builder import (
29
30
  ContainerKey,
@@ -155,10 +156,7 @@ class ModeConfig(StatefulIngestionConfigBase, DatasetLineageProviderConfigBase):
155
156
  workspace: str = Field(
156
157
  description="The Mode workspace name. Find it in Settings > Workspace > Details."
157
158
  )
158
- default_schema: str = Field(
159
- default="public",
160
- description="Default schema to use when schema is not provided in an SQL query",
161
- )
159
+ _default_schema = pydantic_removed_field("default_schema")
162
160
 
163
161
  space_pattern: AllowDenyPattern = Field(
164
162
  default=AllowDenyPattern(
@@ -308,6 +308,13 @@ class SnowflakeV2Config(
308
308
  " assertions CLI in snowflake",
309
309
  )
310
310
 
311
+ pushdown_deny_usernames: List[str] = Field(
312
+ default=[],
313
+ description="List of snowflake usernames which will not be considered for lineage/usage/queries extraction. "
314
+ "This is primarily useful for improving performance by filtering out users with extremely high query volumes. "
315
+ "Only applicable if `use_queries_v2` is enabled.",
316
+ )
317
+
311
318
  @validator("convert_urns_to_lowercase")
312
319
  def validate_convert_urns_to_lowercase(cls, v):
313
320
  if not v:
@@ -567,6 +567,7 @@ class SnowflakeV2Source(
567
567
  include_queries=self.config.include_queries,
568
568
  include_query_usage_statistics=self.config.include_query_usage_statistics,
569
569
  user_email_pattern=self.config.user_email_pattern,
570
+ pushdown_deny_usernames=self.config.pushdown_deny_usernames,
570
571
  ),
571
572
  structured_report=self.report,
572
573
  filters=self.filters,
@@ -4730,16 +4730,16 @@
4730
4730
  {
4731
4731
  "Relationship": {
4732
4732
  "/*/destinationUrn": {
4733
- "createdActor": "datasetEdges/*/created/actor",
4734
- "createdOn": "datasetEdges/*/created/time",
4733
+ "createdActor": "dashboards/*/created/actor",
4734
+ "createdOn": "dashboards/*/created/time",
4735
4735
  "entityTypes": [
4736
4736
  "dashboard"
4737
4737
  ],
4738
4738
  "isLineage": true,
4739
4739
  "name": "DashboardContainsDashboard",
4740
- "properties": "datasetEdges/*/properties",
4741
- "updatedActor": "datasetEdges/*/lastModified/actor",
4742
- "updatedOn": "datasetEdges/*/lastModified/time"
4740
+ "properties": "dashboards/*/properties",
4741
+ "updatedActor": "dashboards/*/lastModified/actor",
4742
+ "updatedOn": "dashboards/*/lastModified/time"
4743
4743
  }
4744
4744
  },
4745
4745
  "type": {
@@ -258,16 +258,16 @@
258
258
  {
259
259
  "Relationship": {
260
260
  "/*/destinationUrn": {
261
- "createdActor": "datasetEdges/*/created/actor",
262
- "createdOn": "datasetEdges/*/created/time",
261
+ "createdActor": "dashboards/*/created/actor",
262
+ "createdOn": "dashboards/*/created/time",
263
263
  "entityTypes": [
264
264
  "dashboard"
265
265
  ],
266
266
  "isLineage": true,
267
267
  "name": "DashboardContainsDashboard",
268
- "properties": "datasetEdges/*/properties",
269
- "updatedActor": "datasetEdges/*/lastModified/actor",
270
- "updatedOn": "datasetEdges/*/lastModified/time"
268
+ "properties": "dashboards/*/properties",
269
+ "updatedActor": "dashboards/*/lastModified/actor",
270
+ "updatedOn": "dashboards/*/lastModified/time"
271
271
  }
272
272
  },
273
273
  "type": {
@@ -2049,16 +2049,16 @@
2049
2049
  {
2050
2050
  "Relationship": {
2051
2051
  "/*/destinationUrn": {
2052
- "createdActor": "datasetEdges/*/created/actor",
2053
- "createdOn": "datasetEdges/*/created/time",
2052
+ "createdActor": "dashboards/*/created/actor",
2053
+ "createdOn": "dashboards/*/created/time",
2054
2054
  "entityTypes": [
2055
2055
  "dashboard"
2056
2056
  ],
2057
2057
  "isLineage": true,
2058
2058
  "name": "DashboardContainsDashboard",
2059
- "properties": "datasetEdges/*/properties",
2060
- "updatedActor": "datasetEdges/*/lastModified/actor",
2061
- "updatedOn": "datasetEdges/*/lastModified/time"
2059
+ "properties": "dashboards/*/properties",
2060
+ "updatedActor": "dashboards/*/lastModified/actor",
2061
+ "updatedOn": "dashboards/*/lastModified/time"
2062
2062
  }
2063
2063
  },
2064
2064
  "type": {
@@ -161,7 +161,7 @@ class DashboardPatchBuilder(
161
161
  lastModified=self._mint_auditstamp(),
162
162
  )
163
163
 
164
- self._ensure_urn_type("dataset", [chart_edge], "add_chart_edge")
164
+ self._ensure_urn_type("chart", [chart_edge], "add_chart_edge")
165
165
  self._add_patch(
166
166
  DashboardInfo.ASPECT_NAME,
167
167
  "add",
@@ -271,6 +271,48 @@ class DashboardPatchBuilder(
271
271
 
272
272
  return self
273
273
 
274
+ def add_dashboard(
275
+ self, dashboard: Union[Edge, Urn, str]
276
+ ) -> "DashboardPatchBuilder":
277
+ """
278
+ Adds an dashboard to the DashboardPatchBuilder.
279
+
280
+ Args:
281
+ dashboard: The dashboard, which can be an Edge object, Urn object, or a string.
282
+
283
+ Returns:
284
+ The DashboardPatchBuilder instance.
285
+
286
+ Raises:
287
+ ValueError: If the dashboard is not a Dashboard urn.
288
+
289
+ Notes:
290
+ If `dashboard` is an Edge object, it is used directly. If `dashboard` is a Urn object or string,
291
+ it is converted to an Edge object and added with default audit stamps.
292
+ """
293
+ if isinstance(dashboard, Edge):
294
+ dashboard_urn: str = dashboard.destinationUrn
295
+ dashboard_edge: Edge = dashboard
296
+ elif isinstance(dashboard, (Urn, str)):
297
+ dashboard_urn = str(dashboard)
298
+ if not dashboard_urn.startswith("urn:li:dashboard:"):
299
+ raise ValueError(f"Input {dashboard} is not a Dashboard urn")
300
+
301
+ dashboard_edge = Edge(
302
+ destinationUrn=dashboard_urn,
303
+ created=self._mint_auditstamp(),
304
+ lastModified=self._mint_auditstamp(),
305
+ )
306
+
307
+ self._ensure_urn_type("dashboard", [dashboard_edge], "add_dashboard")
308
+ self._add_patch(
309
+ DashboardInfo.ASPECT_NAME,
310
+ "add",
311
+ path=("dashboards", dashboard_urn),
312
+ value=dashboard_edge,
313
+ )
314
+ return self
315
+
274
316
  def set_dashboard_url(
275
317
  self, dashboard_url: Optional[str]
276
318
  ) -> "DashboardPatchBuilder":
@@ -55,11 +55,19 @@ async def get_client_version_stats():
55
55
  async with session.get(pypi_url) as resp:
56
56
  response_json = await resp.json()
57
57
  try:
58
- releases = response_json.get("releases", [])
59
- sorted_releases = sorted(releases.keys(), key=lambda x: Version(x))
60
- latest_cli_release_string = [
61
- x for x in sorted_releases if "rc" not in x
62
- ][-1]
58
+ releases = response_json.get("releases", {})
59
+ filtered_releases = {
60
+ version: release_files
61
+ for version, release_files in releases.items()
62
+ if not all(
63
+ release_file.get("yanked") for release_file in release_files
64
+ )
65
+ and "rc" not in version
66
+ }
67
+ sorted_releases = sorted(
68
+ filtered_releases.keys(), key=lambda x: Version(x)
69
+ )
70
+ latest_cli_release_string = sorted_releases[-1]
63
71
  latest_cli_release = Version(latest_cli_release_string)
64
72
  current_version_info = releases.get(current_version_string)
65
73
  current_version_date = None