PyPI - acryl-datahub - Versions diffs - 0.15.0.6rc1__py3-none-any.whl → 0.15.0.6rc3__py3-none-any.whl - Mend

acryl-datahub 0.15.0.6rc1py3-none-any.whl → 0.15.0.6rc3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of acryl-datahub might be problematic. Click here for more details.

Files changed (30) hide show

{acryl_datahub-0.15.0.6rc1.dist-info → acryl_datahub-0.15.0.6rc3.dist-info}/METADATA +2505 -2505
{acryl_datahub-0.15.0.6rc1.dist-info → acryl_datahub-0.15.0.6rc3.dist-info}/RECORD +30 -19
datahub/_version.py +1 -1
datahub/cli/iceberg_cli.py +30 -6
datahub/errors.py +35 -0
datahub/ingestion/source/dbt/dbt_common.py +5 -0
datahub/ingestion/source/dbt/dbt_core.py +11 -9
datahub/ingestion/source/dynamodb/dynamodb.py +5 -0
datahub/ingestion/source/looker/looker_common.py +3 -2
datahub/ingestion/source/mongodb.py +17 -16
datahub/ingestion/source/s3/source.py +14 -5
datahub/ingestion/source/snowflake/snowflake_schema.py +10 -0
datahub/ingestion/source/snowflake/snowflake_schema_gen.py +11 -14
datahub/ingestion/source/sql/sql_common.py +10 -6
datahub/ingestion/source/sql/teradata.py +12 -0
datahub/sdk/__init__.py +33 -0
datahub/sdk/_all_entities.py +15 -0
datahub/sdk/_attribution.py +48 -0
datahub/sdk/_entity.py +89 -0
datahub/sdk/_shared.py +345 -0
datahub/sdk/container.py +193 -0
datahub/sdk/dataset.py +584 -0
datahub/sdk/entity_client.py +115 -0
datahub/sdk/main_client.py +56 -0
datahub/sdk/resolver_client.py +101 -0
datahub/sql_parsing/split_statements.py +209 -122
{acryl_datahub-0.15.0.6rc1.dist-info → acryl_datahub-0.15.0.6rc3.dist-info}/LICENSE +0 -0
{acryl_datahub-0.15.0.6rc1.dist-info → acryl_datahub-0.15.0.6rc3.dist-info}/WHEEL +0 -0
{acryl_datahub-0.15.0.6rc1.dist-info → acryl_datahub-0.15.0.6rc3.dist-info}/entry_points.txt +0 -0
{acryl_datahub-0.15.0.6rc1.dist-info → acryl_datahub-0.15.0.6rc3.dist-info}/top_level.txt +0 -0

{acryl_datahub-0.15.0.6rc1.dist-info → acryl_datahub-0.15.0.6rc3.dist-info}/RECORD RENAMED Viewed

@@ -1,7 +1,8 @@
 datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
 datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
-datahub/_version.py,sha256=U9IGE-YR9bmigwAxXVjXLxWIGHYH0FW2G6D5UE_-ZIg,324
+datahub/_version.py,sha256=OMwbVmOu0H5NbvD0sIz05kG0aYyg0A9YFsBgwLwAWq0,324
 datahub/entrypoints.py,sha256=2TYgHhs3sCxJlojIHjqfxzt3_ImPwPzq4vBtsUuMqu4,8885
+datahub/errors.py,sha256=w6h8b27j9XlmPbTwqpu7-wgiTrXlHzcnUOnJ_iOrwzo,520
 datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 datahub/_codegen/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 datahub/_codegen/aspect.py,sha256=PJRa-Z4ouXHq3OkulfyWhwZn-fFUBDK_UPvmqaWdbWk,1063
@@ -69,7 +70,7 @@ datahub/cli/docker_cli.py,sha256=w9ZQMRVlHwfJI2XDe7mO0lwnT7-dZoK6tPadSMgwEM8,364
 datahub/cli/env_utils.py,sha256=RQzjg4JE29hjPt4v7p-RuqoOr99w8E3DBHWiN2Sm7T4,252
 datahub/cli/exists_cli.py,sha256=IsuU86R-g7BJjAl1vULH6d-BWJHAKa4XHLZl5WxGUEM,1233
 datahub/cli/get_cli.py,sha256=VV80BCXfZ0-C8fr2k43SIuN9DB-fOYP9StWsTHnXwFw,2327
-datahub/cli/iceberg_cli.py,sha256=8pnFYZM4enmOqhBh1c02Q8o25vkaY7PXW5KpQ_jp0hk,21786
+datahub/cli/iceberg_cli.py,sha256=Jp3si_xZkOYr1uKA3h9_GlLJbiZPtVN_SpMgLa8OgoE,22984
 datahub/cli/ingest_cli.py,sha256=WAS_8BkoumzYVOsN8vbptKtQiQ61T958B_k49xJKdqI,22531
 datahub/cli/json_file.py,sha256=nWo-VVthaaW4Do1eUqgrzk0fShb29MjiKXvZVOTq76c,943
 datahub/cli/lite_cli.py,sha256=lolCnWWMMYojRMebbYTpHWBmOBQF_729RpW4A_y_xF4,13034
@@ -203,7 +204,7 @@ datahub/ingestion/source/ldap.py,sha256=CNr3foofIpoCXu_GGqfcajlQE2qkHr5isYwVcDut
 datahub/ingestion/source/metabase.py,sha256=m9Gfhrs8F1z23ci8CIxdE5cW--25stgxg_IQTKwkFrk,31532
 datahub/ingestion/source/mlflow.py,sha256=pmIkmsfidi7dOGdQ61rab7m8AnKZhIRE2IA9in9HGFU,12144
 datahub/ingestion/source/mode.py,sha256=HVxhzMIY4HjkAG_T6y00Po2B9XwjALP6i5XQThuyYM4,63488
-datahub/ingestion/source/mongodb.py,sha256=0P3PHVvMSXFkFimGvQzOQZF7APjsFOyzQAVQjVlVbuk,21172
+datahub/ingestion/source/mongodb.py,sha256=2C2Cxn8DXL53IbNiywIuKt8UT_EMcPg9f8su-OPSNGU,21237
 datahub/ingestion/source/nifi.py,sha256=FgIbZSCu-mcdnbIpqwvmADnUIxptogUq1sSEkrkwtrc,56089
 datahub/ingestion/source/openapi.py,sha256=MGsRLseZompW10UVMN_tU1GZgqPgTAM4lnqCJ8eVRoY,17386
 datahub/ingestion/source/openapi_parser.py,sha256=1_68wHWe_SzWYEyC1YVDw9vxoadKjW1yv8DecvyIhwY,13606
@@ -281,8 +282,8 @@ datahub/ingestion/source/datahub/report.py,sha256=VHBfCbwFRzdLdB7hQG9ST4EiZxl_vB
 datahub/ingestion/source/datahub/state.py,sha256=PZoT7sSK1wadVf5vN6phrgr7I6LL7ePP-EJjP1OO0bQ,3507
 datahub/ingestion/source/dbt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 datahub/ingestion/source/dbt/dbt_cloud.py,sha256=tNpSHbPlLq-oFGbJsdkWY9kIaWmpjcZLWhj1CSewGGY,17981
-datahub/ingestion/source/dbt/dbt_common.py,sha256=y4VINaQQ-WhEf-rICGLGi1U88nKmRdVQPmh88OJROWg,80536
-datahub/ingestion/source/dbt/dbt_core.py,sha256=SHtZg8ZAtmUwegpltIU8MhxBYuB_-oPOY4iBXc4SQIY,22713
+datahub/ingestion/source/dbt/dbt_common.py,sha256=bZN3J0FhqpKw-DXwE1SPYbsuOx-IaF57VoR4XUhP118,80763
+datahub/ingestion/source/dbt/dbt_core.py,sha256=izfsJhPyv5e14H-5BXWhEeN1P6hdZvcjmutEptVxY4U,22987
 datahub/ingestion/source/dbt/dbt_tests.py,sha256=Q5KISW_AOOWqyxmyOgJQquyX7xlfOqKu9WhrHoLKC0M,9881
 datahub/ingestion/source/delta_lake/__init__.py,sha256=u5oqUeus81ONAtdl6o9Puw33ODSMun-0wLIamrZ4BUM,71
 datahub/ingestion/source/delta_lake/config.py,sha256=bVBwGjCPiXyjbCLQsamt4hAsKJMtMuxupKjwZEwtU78,3374
@@ -301,7 +302,7 @@ datahub/ingestion/source/dremio/dremio_source.py,sha256=XMx3EP0ciIaQjMffNljp8w-G
 datahub/ingestion/source/dremio/dremio_sql_queries.py,sha256=W0rcXawlwJOHNYr5o73rilMijtFOO3cVkn6pY-JLc6o,8186
 datahub/ingestion/source/dynamodb/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 datahub/ingestion/source/dynamodb/data_reader.py,sha256=vC77KpcP8LJN0g8wsPRDVw4sebv0ZWIP3tJkEIHaomA,3120
-datahub/ingestion/source/dynamodb/dynamodb.py,sha256=o2wM1cVmkAhur4uAbpBl-PxiRG3nO7sdA3sATQjJrMo,22463
+datahub/ingestion/source/dynamodb/dynamodb.py,sha256=hyWUFWjyfSqjs9ljM-GcS0IVmaeIufUJu4pkOvZwKoI,22599
 datahub/ingestion/source/fivetran/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 datahub/ingestion/source/fivetran/config.py,sha256=BP3KRfAQ6H5qyEeJNu9vNfZNwLoyj4Tl2kXiLVR5DNM,9027
 datahub/ingestion/source/fivetran/data_classes.py,sha256=ecdUJH5BEze0yv-uFpKWPNaNmV1gORDA2XMFk0zhcBw,595
@@ -337,7 +338,7 @@ datahub/ingestion/source/kafka_connect/sink_connectors.py,sha256=sbLntDi0c52i8uU
 datahub/ingestion/source/kafka_connect/source_connectors.py,sha256=-rFNXKD8_EFoXuU1CiKF3wHnsBtKCJrcYDwdTno98Xk,21265
 datahub/ingestion/source/looker/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 datahub/ingestion/source/looker/lkml_patched.py,sha256=XShEU7Wbz0DubDhYMjKf9wjKZrBJa2XPg9MIjp8rPhk,733
-datahub/ingestion/source/looker/looker_common.py,sha256=squUUBHxsLeT5xbZOTO66irtOB8fL0V4Q8Tgd9EJMYU,62067
+datahub/ingestion/source/looker/looker_common.py,sha256=dmcrzEWFxPzZhIeyUYLZuMzhgx7QzvGp4xLTrTYISCA,62136
 datahub/ingestion/source/looker/looker_config.py,sha256=eVKw1nn9D8hUFdRfNyT3MtzL8w-zWhFeokiwSnNKQuc,13607
 datahub/ingestion/source/looker/looker_connection.py,sha256=yDmC6lDsHmL2e_Pw8ULylwOIHPWPp_6gT1iyLvD0fTw,2075
 datahub/ingestion/source/looker/looker_constant.py,sha256=GMKYtNXlpojPxa9azridKfcGLSJwKdUCTesp7U8dIrQ,402
@@ -410,7 +411,7 @@ datahub/ingestion/source/s3/config.py,sha256=Zs1nrBZKLImteZreIcSMMRLj8vBGgxakNDs
 datahub/ingestion/source/s3/datalake_profiler_config.py,sha256=FfrcgK-JEF94vw-l3q6pN6FENXb-wZzW2w1VUZVkwW8,3620
 datahub/ingestion/source/s3/profiling.py,sha256=yKNCKpr6w7qpCH-baeSkNE9VjkN6eBot_weD-2_Jxzk,17579
 datahub/ingestion/source/s3/report.py,sha256=fzkTdTewYlWrTk4f2Cyl-e8RV4qw9wEVtm0cdKD-Xgw,542
-datahub/ingestion/source/s3/source.py,sha256=IE_K_HE_S7w8fpGPT8OptU5-VmwapntsI5PePv_wUQA,47412
+datahub/ingestion/source/s3/source.py,sha256=JwEmVWDEFtPt6iMo82n5DQava8QAKXk_xYl01KAfdHk,47614
 datahub/ingestion/source/sac/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 datahub/ingestion/source/sac/sac.py,sha256=zPSO9ukuyhvNaaVzeAYpA-_sFma_XMcCQMPaGvDWuTk,30226
 datahub/ingestion/source/sac/sac_common.py,sha256=-xQTDBtgH56AnpRXWGDnlmQqUuLRx-7wF1U1kQFWtX8,998
@@ -444,8 +445,8 @@ datahub/ingestion/source/snowflake/snowflake_profiler.py,sha256=0DJiSwII6FY34url
 datahub/ingestion/source/snowflake/snowflake_queries.py,sha256=M-FBoYeiW91-g3gOUpCTj8cKWHH-wqyFtD5UcewfI2k,28121
 datahub/ingestion/source/snowflake/snowflake_query.py,sha256=Ex9FZZzz02cQis4bV3tzd53Pmf8p3AreuWnv9w95pJ0,39642
 datahub/ingestion/source/snowflake/snowflake_report.py,sha256=ahea-bwpW6T0iDehGo0Qq_J7wKxPkV61aYHm8bGwDqo,6651
-datahub/ingestion/source/snowflake/snowflake_schema.py,sha256=fdDN7jO5aHsmTFYC8cahXRT9BSAoDY72heM_WrkSxXo,25648
-datahub/ingestion/source/snowflake/snowflake_schema_gen.py,sha256=5DUP_uTmbLU01NegFXClqPkpB8LTruIkyIaGUBOTCQw,54718
+datahub/ingestion/source/snowflake/snowflake_schema.py,sha256=GFgcKV5T6VHyNwPBzzw_f8cWA9YFlWug0m6nkLoGXus,25979
+datahub/ingestion/source/snowflake/snowflake_schema_gen.py,sha256=Yba6OIWYtukAFMoNtEtX2BXWwJee17Dl58DUyK0myho,54530
 datahub/ingestion/source/snowflake/snowflake_shares.py,sha256=maZyFkfrbVogEFM0tTKRiNp9c_1muv6YfleSd3q0umI,6341
 datahub/ingestion/source/snowflake/snowflake_summary.py,sha256=kTmuCtRnvHqM8WBYhWeK4XafJq3ssFL9kcS03jEeWT4,5506
 datahub/ingestion/source/snowflake/snowflake_tag.py,sha256=1eLYTcgmfzDs9xktMTTE74L5SeNP48Qg3uLr9y-Ez3Y,8733
@@ -465,7 +466,7 @@ datahub/ingestion/source/sql/mysql.py,sha256=nDWK4YbqomcJgnit9b8geUGrp_3eix4bt0_
 datahub/ingestion/source/sql/oracle.py,sha256=tVP3AiZO97psM8O8UzBb9C7__s8y4fkyQbXBv3m1LU4,24503
 datahub/ingestion/source/sql/postgres.py,sha256=uC1kYEI8VdxiZ1Y9IxMWzwmg11wtMqYN0e2fkok1rxo,11972
 datahub/ingestion/source/sql/presto.py,sha256=PB-CS5MX2dSRFRHjlxfkLHGXLZXFNCsVAAyRBtY6HMg,3611
-datahub/ingestion/source/sql/sql_common.py,sha256=E1QmJ35ZuDLiZj-s1niHvIdNMyEsZrwvq_Wuy2EoYMQ,48586
+datahub/ingestion/source/sql/sql_common.py,sha256=98vCNU_ch41i_QSQSRWLIVRaTVfw6RWv4bP28SypfH0,48812
 datahub/ingestion/source/sql/sql_config.py,sha256=CBXkCpzBAGrWAXJFte_i5TmpzcsMJwEjGHpfzd6vAow,8964
 datahub/ingestion/source/sql/sql_generic.py,sha256=9AERvkK8kdJUeDOzCYJDb93xdv6Z4DGho0NfeHj5Uyg,2740
 datahub/ingestion/source/sql/sql_generic_profiler.py,sha256=8cDmNpT_UXzYmP8-RWoDCnewmVGCj2cYCzH9_gSsF3o,11590
@@ -474,7 +475,7 @@ datahub/ingestion/source/sql/sql_types.py,sha256=uuU3taVe4oCTXkqg1wSMGzTwVleRyUR
 datahub/ingestion/source/sql/sql_utils.py,sha256=q-Bsk6WxlsRtrw9RXBxvqI3zuaMTC_F25T2VrCziR9I,8418
 datahub/ingestion/source/sql/sqlalchemy_data_reader.py,sha256=FvHZ4JEK3aR2DYOBZiT_ZsAy12RjTu4t_KIR_92B11k,2644
 datahub/ingestion/source/sql/sqlalchemy_uri_mapper.py,sha256=KOpbmDIE2h1hyYEsbVHJi2B7FlsyUMTXZx4diyzltQg,1826
-datahub/ingestion/source/sql/teradata.py,sha256=5lTNMOOOmrG71fTAyTs7iYFroeTiGIdATwXQmH6sWJg,32741
+datahub/ingestion/source/sql/teradata.py,sha256=79jaYgU8QjxnZ3nQ-wq-4xMFoTjCDOfNrpVNbUoS_wU,33449
 datahub/ingestion/source/sql/trino.py,sha256=FEn_BQ3pm23hKx94ek5kk5IXGNYcBqZEhllRJFUzfU8,17895
 datahub/ingestion/source/sql/two_tier_sql_source.py,sha256=YDrGBb5WKVls6qv17QU5foKrf71SydzEltc3WsVAhQc,5732
 datahub/ingestion/source/sql/vertica.py,sha256=_9OgSgIgqBml0av063rb8nACiT3SAmzpw0ouyF91wv8,33382
@@ -869,6 +870,16 @@ datahub/metadata/schemas/VersionSetKey.avsc,sha256=psjGNNcFua3Zs9Xlh4HnUHNmBEU74
 datahub/metadata/schemas/VersionSetProperties.avsc,sha256=yrhhVNioD11nFlDO7IfUbxAQjhA9Tr-4wnAYH5I9W74,1172
 datahub/metadata/schemas/ViewProperties.avsc,sha256=3HhcbH5493dJUnEUtFMYMVfbYQ52aDedm5L4j77Nym4,1032
 datahub/metadata/schemas/__init__.py,sha256=uvLNC3VyCkWA_v8e9FdA1leFf46NFKDD0AajCfihepI,581
+datahub/sdk/__init__.py,sha256=fYD-f338EW5WPFW2NSiirMAsHkNgZfolIvneM7yxgBk,977
+datahub/sdk/_all_entities.py,sha256=0XFtmgeEtrWOXy_oWcwqrtvfvzo8obPIq3Z1fEr5-34,400
+datahub/sdk/_attribution.py,sha256=05iNVT_IDO7aU3vU6dJKCF8EoDAI1mwh4mAg_EBf3RY,1121
+datahub/sdk/_entity.py,sha256=A_AWqNjaV6Y1YvmIiPNm0UyC0aO52G_L-iwgp9XEtYs,3225
+datahub/sdk/_shared.py,sha256=qBqJnY-793Tfg4eBFNt3VEcckiyymJsBBNgZBc3PkJI,11384
+datahub/sdk/container.py,sha256=aqp175PGQ225dpi9vS6LbaGdUVcjsEblsZc4doIsHvU,6521
+datahub/sdk/dataset.py,sha256=BydV2papBEYL2de72UMNjSkAbsDNpVlXqiEkMlXJiLY,21514
+datahub/sdk/entity_client.py,sha256=DcHytfCM8X9J6mm_QXzFR-2vDQa88I9Q2ktSNC2oSUI,4277
+datahub/sdk/main_client.py,sha256=IKKzBMiKtT5zIMEHrvaMyxJ7DYBVNpqua70J0Ydl74Y,2068
+datahub/sdk/resolver_client.py,sha256=UxI0bwg0ITm9dWPmAAKxkTvpLiGACtSCuEDNxLJipjs,3395
 datahub/secret/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 datahub/secret/datahub_secret_store.py,sha256=9u9S87-15jwhj4h0EsAVIMdQLgvstKc8voQux2slxgU,2477
 datahub/secret/datahub_secrets_client.py,sha256=nDmhziKdvseJHlaDVUcAwK8Fv8maeAaG-ktZtWG2b70,1316
@@ -894,7 +905,7 @@ datahub/sql_parsing/_sqlglot_patch.py,sha256=iYJ8zOThHqqbamD5jdNr9iHTWD7ewNeHzPi
 datahub/sql_parsing/datajob.py,sha256=1X8KpEk-y3_8xJuA_Po27EHZgOcxK9QADI6Om9gSGn0,1751
 datahub/sql_parsing/query_types.py,sha256=FKjDzszZzsrCfYfm7dgD6T_8865qxWl767fdGyHWBh4,2720
 datahub/sql_parsing/schema_resolver.py,sha256=8dYz6pC3Y35pXBn41grOE2dKkSiSeLHOz-N138uWQg4,10796
-datahub/sql_parsing/split_statements.py,sha256=uZhAXLaRxDfmK0lPBW2oM_YVdJfSMhdgndnfd9iIXuA,5001
+datahub/sql_parsing/split_statements.py,sha256=Vi8VAgaEkGcK5bs48L-Krig3qhjxcvqvotCpL0ux-EY,8674
 datahub/sql_parsing/sql_parsing_aggregator.py,sha256=XNZWjeaRhzaT92mzsJZGJfYaxJENsyp5dSHTmL81RIc,70130
 datahub/sql_parsing/sql_parsing_common.py,sha256=h_V_m54hJ9EUh5kczq7cYOIeNeo4bgf0Px0H-Nq-UIg,2602
 datahub/sql_parsing/sql_parsing_result_utils.py,sha256=prwWTj1EB2fRPv1eMB4EkpFNafIYAt-X8TIK0NWqank,796
@@ -1001,9 +1012,9 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
 datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
 datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
 datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
-acryl_datahub-0.15.0.6rc1.dist-info/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
-acryl_datahub-0.15.0.6rc1.dist-info/METADATA,sha256=YPtgGtRPaltE8LkNq8PlWsrlzhbE-FQV0wWYkkuPqDc,175375
-acryl_datahub-0.15.0.6rc1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
-acryl_datahub-0.15.0.6rc1.dist-info/entry_points.txt,sha256=U1e5ZwqPX1OaIbvGrwvozcdB8SbzFYXQM7plpdLKKeo,9592
-acryl_datahub-0.15.0.6rc1.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
-acryl_datahub-0.15.0.6rc1.dist-info/RECORD,,
+acryl_datahub-0.15.0.6rc3.dist-info/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
+acryl_datahub-0.15.0.6rc3.dist-info/METADATA,sha256=8X_DKc1kQE8SWU_bx3T5ty-_G0WurcrlnGOoSEDRCZs,175375
+acryl_datahub-0.15.0.6rc3.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
+acryl_datahub-0.15.0.6rc3.dist-info/entry_points.txt,sha256=U1e5ZwqPX1OaIbvGrwvozcdB8SbzFYXQM7plpdLKKeo,9592
+acryl_datahub-0.15.0.6rc3.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
+acryl_datahub-0.15.0.6rc3.dist-info/RECORD,,

datahub/_version.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # Published at https://pypi.org/project/acryl-datahub/.
 __package_name__ = "acryl-datahub"
-__version__ = "0.15.0.6rc1"
+__version__ = "0.15.0.6rc3"
 def is_dev_mode() -> bool:

datahub/cli/iceberg_cli.py CHANGED Viewed

@@ -14,6 +14,7 @@ from datahub.cli.cli_utils import post_entity
 from datahub.configuration.common import GraphError
 from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
 from datahub.metadata.schema_classes import SystemMetadataClass
+from datahub.telemetry import telemetry
 logger = logging.getLogger(__name__)
@@ -161,6 +162,7 @@ def validate_warehouse(data_root: str) -> None:
     type=int,
     help=f"Expiration duration for temporary credentials used for role. Defaults to {DEFAULT_CREDS_EXPIRY_DURATION_SECONDS} seconds if unspecified",
 )
+@telemetry.with_telemetry(capture_kwargs=["duration_seconds"])
 def create(
     warehouse: str,
     description: Optional[str],
@@ -313,6 +315,7 @@ def create(
     type=int,
     help=f"Expiration duration for temporary credentials used for role. Defaults to {DEFAULT_CREDS_EXPIRY_DURATION_SECONDS} seconds if unspecified",
 )
+@telemetry.with_telemetry(capture_kwargs=["duration_seconds"])
 def update(
     warehouse: str,
     data_root: str,
@@ -398,6 +401,7 @@ def update(
 @iceberg.command()
+@telemetry.with_telemetry()
 def list() -> None:
     """
     List iceberg warehouses
@@ -413,6 +417,7 @@ def list() -> None:
 @click.option(
     "-w", "--warehouse", required=True, type=str, help="The name of the warehouse"
 )
+@telemetry.with_telemetry()
 def get(warehouse: str) -> None:
     """Fetches the details of the specified iceberg warehouse"""
     client = get_default_graph()
@@ -442,6 +447,7 @@ def get(warehouse: str) -> None:
     is_flag=True,
     help="force the delete if set without confirmation",
 )
+@telemetry.with_telemetry(capture_kwargs=["dry_run", "force"])
 def delete(warehouse: str, dry_run: bool, force: bool) -> None:
     """
     Delete warehouse
@@ -470,11 +476,19 @@ def delete(warehouse: str, dry_run: bool, force: bool) -> None:
             # Do we really need this double-check?
             if "__typename" in entity and "urn" in entity:
                 if entity["__typename"] in ["Container", "Dataset"]:
+                    # add the Platform Resource URN to also be deleted for each dataset.
+                    # This is not user visible, so no need to show a name to the user and include it in the count. Each
+                    # instance corresponds to a dataset whose name is shown.
+                    if entity["__typename"] == "Dataset":
+                        resource_urn = platform_resource_urn(
+                            entity["properties"]["qualifiedName"]
+                        )
+                        urns_to_delete.append(resource_urn)
                     urns_to_delete.append(entity["urn"])
                     resource_names_to_be_deleted.append(
                         entity.get("name", entity.get("urn"))
                     )
-            # TODO: PlatformResource associated with datasets need to be deleted.
         if dry_run:
             click.echo(
@@ -485,18 +499,21 @@ def delete(warehouse: str, dry_run: bool, force: bool) -> None:
         else:
             if not force:
                 click.confirm(
-                    f"This will delete {warehouse} warehouse, credentials, and {len(urns_to_delete)} datasets and namespaces from DataHub. Do you want to continue?",
+                    f"This will delete {warehouse} warehouse, credentials, and {len(resource_names_to_be_deleted)} datasets and namespaces from DataHub. Do you want to continue?",
                     abort=True,
                 )
-            client.hard_delete_entity(urn)
-            client.hard_delete_entity(warehouse_aspect.clientId)
-            client.hard_delete_entity(warehouse_aspect.clientSecret)
+            # Delete the resources in the warehouse first, so that in case it is interrupted, the warehouse itself is
+            # still available to enumerate the resources in it that are not yet deleted.
             for urn_to_delete in urns_to_delete:
                 client.hard_delete_entity(urn_to_delete)
+            client.hard_delete_entity(urn)
+            client.hard_delete_entity(warehouse_aspect.clientId)
+            client.hard_delete_entity(warehouse_aspect.clientSecret)
             click.echo(
-                f"✅ Successfully deleted iceberg warehouse {warehouse} and associated credentials, {len(urns_to_delete)} datasets and namespaces"
+                f"✅ Successfully deleted iceberg warehouse {warehouse} and associated credentials, {len(resource_names_to_be_deleted)} datasets and namespaces"
             )
@@ -504,6 +521,10 @@ def iceberg_data_platform_instance_urn(warehouse: str) -> str:
     return f"urn:li:dataPlatformInstance:({iceberg_data_platform()},{warehouse})"
+def platform_resource_urn(dataset_name: str) -> str:
+    return f"urn:li:platformResource:iceberg.{dataset_name}"
 def iceberg_data_platform() -> str:
     return "urn:li:dataPlatform:iceberg"
@@ -677,6 +698,9 @@ def get_related_entities_for_platform_instance(
                 ... on Dataset {
                   urn
                   name
+                  properties{
+                    qualifiedName
+                  }
                 }
               }
             }

datahub/errors.py ADDED Viewed

@@ -0,0 +1,35 @@
+from datahub.configuration.common import MetaError
+# TODO: Move all other error types to this file.
+class SdkUsageError(MetaError):
+    pass
+class AlreadyExistsError(SdkUsageError):
+    pass
+class ItemNotFoundError(SdkUsageError):
+    pass
+class MultipleItemsFoundError(SdkUsageError):
+    pass
+class SchemaFieldKeyError(SdkUsageError, KeyError):
+    pass
+class IngestionAttributionWarning(Warning):
+    pass
+class MultipleSubtypesWarning(Warning):
+    pass
+class ExperimentalWarning(Warning):
+    pass

datahub/ingestion/source/dbt/dbt_common.py CHANGED Viewed

@@ -357,6 +357,11 @@ class DBTCommonConfig(
         default=True,
         description="When enabled, includes the compiled code in the emitted metadata.",
     )
+    include_database_name: bool = Field(
+        default=True,
+        description="Whether to add database name to the table urn. "
+        "Set to False to skip it for engines like AWS Athena where it's not required.",
+    )
     @validator("target_platform")
     def validate_target_platform_value(cls, target_platform: str) -> str:

datahub/ingestion/source/dbt/dbt_core.py CHANGED Viewed

@@ -167,6 +167,7 @@ def extract_dbt_entities(
     use_identifiers: bool,
     tag_prefix: str,
     only_include_if_in_catalog: bool,
+    include_database_name: bool,
     report: DBTSourceReport,
 ) -> List[DBTNode]:
     sources_by_id = {x["unique_id"]: x for x in sources_results}
@@ -267,7 +268,7 @@ def extract_dbt_entities(
             dbt_name=key,
             dbt_adapter=manifest_adapter,
             dbt_package_name=manifest_node.get("package_name"),
-            database=manifest_node["database"],
+            database=manifest_node["database"] if include_database_name else None,
             schema=manifest_node["schema"],
             name=name,
             alias=manifest_node.get("alias"),
@@ -543,14 +544,15 @@ class DBTCoreSource(DBTSourceBase, TestableSource):
         all_catalog_entities = {**catalog_nodes, **catalog_sources}
         nodes = extract_dbt_entities(
-            all_manifest_entities,
-            all_catalog_entities,
-            sources_results,
-            manifest_adapter,
-            self.config.use_identifiers,
-            self.config.tag_prefix,
-            self.config.only_include_if_in_catalog,
-            self.report,
+            all_manifest_entities=all_manifest_entities,
+            all_catalog_entities=all_catalog_entities,
+            sources_results=sources_results,
+            manifest_adapter=manifest_adapter,
+            use_identifiers=self.config.use_identifiers,
+            tag_prefix=self.config.tag_prefix,
+            only_include_if_in_catalog=self.config.only_include_if_in_catalog,
+            include_database_name=self.config.include_database_name,
+            report=self.report,
         )
         return (

datahub/ingestion/source/dynamodb/dynamodb.py CHANGED Viewed

@@ -165,6 +165,10 @@ _attribute_type_to_field_type_mapping: Dict[str, Type] = {
     SourceCapability.PLATFORM_INSTANCE,
     "By default, platform_instance will use the AWS account id",
 )
+@capability(
+    SourceCapability.CLASSIFICATION,
+    "Optionally enabled via `classification.enabled`",
+)
 class DynamoDBSource(StatefulIngestionSourceBase):
     """
     This plugin extracts the following:
@@ -244,6 +248,7 @@ class DynamoDBSource(StatefulIngestionSourceBase):
             name=dataset_name,
         )
         dataset_properties = DatasetPropertiesClass(
+            name=table_name,
             tags=[],
             customProperties={
                 "table.arn": table_info["TableArn"],

datahub/ingestion/source/looker/looker_common.py CHANGED Viewed

@@ -1673,10 +1673,11 @@ class LookerUserRegistry:
                 primary_key="",
             )
-            # Extract user email mappings
+            # Extract user email mappings.
+            # Sort it to ensure the order is deterministic.
             user_email_cache = {
                 user_id: user.email
-                for user_id, user in self._user_cache.items()
+                for user_id, user in sorted(self._user_cache.items())
                 if user.email
             }

datahub/ingestion/source/mongodb.py CHANGED Viewed

@@ -219,26 +219,27 @@ def construct_schema_pymongo(
     """
     aggregations: List[Dict] = []
+    # The order of the aggregations impacts execution time. By setting the sample/limit aggregation first,
+    # the subsequent aggregations process a much smaller dataset, improving performance.
+    if sample_size:
+        if use_random_sampling:
+            aggregations.append({"$sample": {"size": sample_size}})
+        else:
+            aggregations.append({"$limit": sample_size})
     if should_add_document_size_filter:
         doc_size_field = "temporary_doc_size_field"
         # create a temporary field to store the size of the document. filter on it and then remove it.
-        aggregations = [
-            {"$addFields": {doc_size_field: {"$bsonSize": "$$ROOT"}}},
-            {"$match": {doc_size_field: {"$lt": max_document_size}}},
-            {"$project": {doc_size_field: 0}},
-        ]
-    if use_random_sampling:
-        # get sample documents in collection
-        if sample_size:
-            aggregations.append({"$sample": {"size": sample_size}})
-        documents = collection.aggregate(
-            aggregations,
-            allowDiskUse=True,
+        aggregations.extend(
+            [
+                {"$addFields": {doc_size_field: {"$bsonSize": "$$ROOT"}}},
+                {"$match": {doc_size_field: {"$lt": max_document_size}}},
+                {"$project": {doc_size_field: 0}},
+            ]
         )
-    else:
-        if sample_size:
-            aggregations.append({"$limit": sample_size})
-        documents = collection.aggregate(aggregations, allowDiskUse=True)
+    documents = collection.aggregate(aggregations, allowDiskUse=True)
     return construct_schema(list(documents), delimiter)

datahub/ingestion/source/s3/source.py CHANGED Viewed

@@ -866,8 +866,21 @@ class S3Source(StatefulIngestionSourceBase):
         Returns:
         List[Folder]: A list of Folder objects representing the partitions found.
         """
+        def _is_allowed_path(path_spec_: PathSpec, s3_uri: str) -> bool:
+            allowed = path_spec_.allowed(s3_uri)
+            if not allowed:
+                logger.debug(f"File {s3_uri} not allowed and skipping")
+                self.report.report_file_dropped(s3_uri)
+            return allowed
+        s3_objects = (
+            obj
+            for obj in bucket.objects.filter(Prefix=prefix).page_size(PAGE_SIZE)
+            if _is_allowed_path(path_spec, f"s3://{obj.bucket_name}/{obj.key}")
+        )
         partitions: List[Folder] = []
-        s3_objects = bucket.objects.filter(Prefix=prefix).page_size(PAGE_SIZE)
         grouped_s3_objects_by_dirname = groupby_unsorted(
             s3_objects,
             key=lambda obj: obj.key.rsplit("/", 1)[0],
@@ -878,10 +891,6 @@ class S3Source(StatefulIngestionSourceBase):
             modification_time = None
             for item in group:
-                file_path = self.create_s3_path(item.bucket_name, item.key)
-                if not path_spec.allowed(file_path):
-                    logger.debug(f"File {file_path} not allowed and skipping")
-                    continue
                 file_size += item.size
                 if creation_time is None or item.last_modified < creation_time:
                     creation_time = item.last_modified

datahub/ingestion/source/snowflake/snowflake_schema.py CHANGED Viewed

@@ -6,6 +6,7 @@ from datetime import datetime
 from typing import Callable, Dict, Iterable, List, MutableMapping, Optional
 from datahub.ingestion.api.report import SupportsAsObj
+from datahub.ingestion.source.common.subtypes import DatasetSubTypes
 from datahub.ingestion.source.snowflake.constants import SnowflakeObjectDomain
 from datahub.ingestion.source.snowflake.snowflake_connection import SnowflakeConnection
 from datahub.ingestion.source.snowflake.snowflake_query import (
@@ -100,6 +101,9 @@ class SnowflakeTable(BaseTable):
     def is_hybrid(self) -> bool:
         return self.type is not None and self.type == "HYBRID TABLE"
+    def get_subtype(self) -> DatasetSubTypes:
+        return DatasetSubTypes.TABLE
 @dataclass
 class SnowflakeView(BaseView):
@@ -109,6 +113,9 @@ class SnowflakeView(BaseView):
     column_tags: Dict[str, List[SnowflakeTag]] = field(default_factory=dict)
     is_secure: bool = False
+    def get_subtype(self) -> DatasetSubTypes:
+        return DatasetSubTypes.VIEW
 @dataclass
 class SnowflakeSchema:
@@ -154,6 +161,9 @@ class SnowflakeStream:
     column_tags: Dict[str, List[SnowflakeTag]] = field(default_factory=dict)
     last_altered: Optional[datetime] = None
+    def get_subtype(self) -> DatasetSubTypes:
+        return DatasetSubTypes.SNOWFLAKE_STREAM
 class _SnowflakeTagCache:
     def __init__(self) -> None:

datahub/ingestion/source/snowflake/snowflake_schema_gen.py CHANGED Viewed

@@ -21,7 +21,6 @@ from datahub.ingestion.glossary.classification_mixin import (
 from datahub.ingestion.source.aws.s3_util import make_s3_urn_for_lineage
 from datahub.ingestion.source.common.subtypes import (
     DatasetContainerSubTypes,
-    DatasetSubTypes,
 )
 from datahub.ingestion.source.snowflake.constants import (
     GENERIC_PERMISSION_ERROR_KEY,
@@ -467,7 +466,13 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
                 context=f"{db_name}.{schema_name}",
             )
-    def _process_tags(self, snowflake_schema, schema_name, db_name, domain):
+    def _process_tags(
+        self,
+        snowflake_schema: SnowflakeSchema,
+        schema_name: str,
+        db_name: str,
+        domain: str,
+    ) -> None:
         snowflake_schema.tags = self.tag_extractor.get_tags_on_object(
             schema_name=schema_name, db_name=db_name, domain=domain
         )
@@ -837,15 +842,7 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
         if dpi_aspect:
             yield dpi_aspect
-        subTypes = SubTypes(
-            typeNames=(
-                [DatasetSubTypes.SNOWFLAKE_STREAM]
-                if isinstance(table, SnowflakeStream)
-                else [DatasetSubTypes.VIEW]
-                if isinstance(table, SnowflakeView)
-                else [DatasetSubTypes.TABLE]
-            )
-        )
+        subTypes = SubTypes(typeNames=[table.get_subtype()])
         yield MetadataChangeProposalWrapper(
             entityUrn=dataset_urn, aspect=subTypes
@@ -932,9 +929,9 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
                         "OWNER_ROLE_TYPE": table.owner_role_type,
                         "TABLE_NAME": table.table_name,
                         "BASE_TABLES": table.base_tables,
-                        "STALE_AFTER": table.stale_after.isoformat()
-                        if table.stale_after
-                        else None,
+                        "STALE_AFTER": (
+                            table.stale_after.isoformat() if table.stale_after else None
+                        ),
                     }.items()
                     if v
                 }

datahub/ingestion/source/sql/sql_common.py CHANGED Viewed

@@ -352,6 +352,15 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
         )
         self.report.sql_aggregator = self.aggregator.report
+    def _add_default_options(self, sql_config: SQLCommonConfig) -> None:
+        """Add default SQLAlchemy options. Can be overridden by subclasses to add additional defaults."""
+        # Extra default SQLAlchemy option for better connection pooling and threading.
+        # https://docs.sqlalchemy.org/en/14/core/pooling.html#sqlalchemy.pool.QueuePool.params.max_overflow
+        if sql_config.is_profiling_enabled():
+            sql_config.options.setdefault(
+                "max_overflow", sql_config.profiling.max_workers
+            )
     @classmethod
     def test_connection(cls, config_dict: dict) -> TestConnectionReport:
         test_report = TestConnectionReport()
@@ -519,12 +528,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
             # Known issue with sqlalchemy https://stackoverflow.com/questions/60804288/pycharm-duplicated-log-for-sqlalchemy-echo-true
             sqlalchemy_log._add_default_handler = lambda x: None  # type: ignore
-        # Extra default SQLAlchemy option for better connection pooling and threading.
-        # https://docs.sqlalchemy.org/en/14/core/pooling.html#sqlalchemy.pool.QueuePool.params.max_overflow
-        if sql_config.is_profiling_enabled():
-            sql_config.options.setdefault(
-                "max_overflow", sql_config.profiling.max_workers
-            )
+        self._add_default_options(sql_config)
         for inspector in self.get_inspectors():
             profiler = None

datahub/ingestion/source/sql/teradata.py CHANGED Viewed

@@ -22,6 +22,7 @@ from sqlalchemy import create_engine, inspect
 from sqlalchemy.engine import Engine
 from sqlalchemy.engine.base import Connection
 from sqlalchemy.engine.reflection import Inspector
+from sqlalchemy.pool import QueuePool
 from sqlalchemy.sql.expression import text
 from teradatasqlalchemy.dialect import TeradataDialect
 from teradatasqlalchemy.options import configure
@@ -678,6 +679,16 @@ ORDER by DataBaseName, TableName;
             if self.config.stateful_ingestion:
                 self.config.stateful_ingestion.remove_stale_metadata = False
+    def _add_default_options(self, sql_config: SQLCommonConfig) -> None:
+        """Add Teradata-specific default options"""
+        super()._add_default_options(sql_config)
+        if sql_config.is_profiling_enabled():
+            # Sqlalchemy uses QueuePool by default however Teradata uses SingletonThreadPool.
+            # SingletonThreadPool does not support parellel connections. For using profiling, we need to use QueuePool.
+            # https://docs.sqlalchemy.org/en/20/core/pooling.html#connection-pool-configuration
+            # https://github.com/Teradata/sqlalchemy-teradata/issues/96
+            sql_config.options.setdefault("poolclass", QueuePool)
     @classmethod
     def create(cls, config_dict, ctx):
         config = TeradataConfig.parse_obj(config_dict)
@@ -705,6 +716,7 @@ ORDER by DataBaseName, TableName;
         # This method can be overridden in the case that you want to dynamically
         # run on multiple databases.
         url = self.config.get_sql_alchemy_url()
         logger.debug(f"sql_alchemy_url={url}")
         engine = create_engine(url, **self.config.options)
         with engine.connect() as conn:

datahub/sdk/__init__.py ADDED Viewed

@@ -0,0 +1,33 @@
+import warnings
+import datahub.metadata.schema_classes as models
+from datahub.errors import ExperimentalWarning, SdkUsageError
+from datahub.ingestion.graph.config import DatahubClientConfig
+from datahub.metadata.urns import (
+    ChartUrn,
+    ContainerUrn,
+    CorpGroupUrn,
+    CorpUserUrn,
+    DashboardUrn,
+    DataPlatformInstanceUrn,
+    DataPlatformUrn,
+    DatasetUrn,
+    DomainUrn,
+    GlossaryTermUrn,
+    SchemaFieldUrn,
+    TagUrn,
+)
+from datahub.sdk.container import Container
+from datahub.sdk.dataset import Dataset
+from datahub.sdk.main_client import DataHubClient
+warnings.warn(
+    "The new datahub SDK (e.g. datahub.sdk.*) is experimental. "
+    "Our typical backwards-compatibility and stability guarantees do not apply to this code. "
+    "When it's promoted to stable, the import path will change "
+    "from `from datahub.sdk import ...` to `from datahub import ...`.",
+    ExperimentalWarning,
+    stacklevel=2,
+)
+del warnings
+del ExperimentalWarning

acryl-datahub 0.15.0.6rc1__py3-none-any.whl → 0.15.0.6rc3__py3-none-any.whl

Potentially problematic release.

acryl-datahub 0.15.0.6rc1py3-none-any.whl → 0.15.0.6rc3py3-none-any.whl