acryl-datahub 0.15.0.6rc1__py3-none-any.whl → 0.15.0.6rc3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0.6rc1.dist-info → acryl_datahub-0.15.0.6rc3.dist-info}/METADATA +2505 -2505
- {acryl_datahub-0.15.0.6rc1.dist-info → acryl_datahub-0.15.0.6rc3.dist-info}/RECORD +30 -19
- datahub/_version.py +1 -1
- datahub/cli/iceberg_cli.py +30 -6
- datahub/errors.py +35 -0
- datahub/ingestion/source/dbt/dbt_common.py +5 -0
- datahub/ingestion/source/dbt/dbt_core.py +11 -9
- datahub/ingestion/source/dynamodb/dynamodb.py +5 -0
- datahub/ingestion/source/looker/looker_common.py +3 -2
- datahub/ingestion/source/mongodb.py +17 -16
- datahub/ingestion/source/s3/source.py +14 -5
- datahub/ingestion/source/snowflake/snowflake_schema.py +10 -0
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +11 -14
- datahub/ingestion/source/sql/sql_common.py +10 -6
- datahub/ingestion/source/sql/teradata.py +12 -0
- datahub/sdk/__init__.py +33 -0
- datahub/sdk/_all_entities.py +15 -0
- datahub/sdk/_attribution.py +48 -0
- datahub/sdk/_entity.py +89 -0
- datahub/sdk/_shared.py +345 -0
- datahub/sdk/container.py +193 -0
- datahub/sdk/dataset.py +584 -0
- datahub/sdk/entity_client.py +115 -0
- datahub/sdk/main_client.py +56 -0
- datahub/sdk/resolver_client.py +101 -0
- datahub/sql_parsing/split_statements.py +209 -122
- {acryl_datahub-0.15.0.6rc1.dist-info → acryl_datahub-0.15.0.6rc3.dist-info}/LICENSE +0 -0
- {acryl_datahub-0.15.0.6rc1.dist-info → acryl_datahub-0.15.0.6rc3.dist-info}/WHEEL +0 -0
- {acryl_datahub-0.15.0.6rc1.dist-info → acryl_datahub-0.15.0.6rc3.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-0.15.0.6rc1.dist-info → acryl_datahub-0.15.0.6rc3.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
|
|
2
2
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
3
|
-
datahub/_version.py,sha256=
|
|
3
|
+
datahub/_version.py,sha256=OMwbVmOu0H5NbvD0sIz05kG0aYyg0A9YFsBgwLwAWq0,324
|
|
4
4
|
datahub/entrypoints.py,sha256=2TYgHhs3sCxJlojIHjqfxzt3_ImPwPzq4vBtsUuMqu4,8885
|
|
5
|
+
datahub/errors.py,sha256=w6h8b27j9XlmPbTwqpu7-wgiTrXlHzcnUOnJ_iOrwzo,520
|
|
5
6
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
7
|
datahub/_codegen/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
8
|
datahub/_codegen/aspect.py,sha256=PJRa-Z4ouXHq3OkulfyWhwZn-fFUBDK_UPvmqaWdbWk,1063
|
|
@@ -69,7 +70,7 @@ datahub/cli/docker_cli.py,sha256=w9ZQMRVlHwfJI2XDe7mO0lwnT7-dZoK6tPadSMgwEM8,364
|
|
|
69
70
|
datahub/cli/env_utils.py,sha256=RQzjg4JE29hjPt4v7p-RuqoOr99w8E3DBHWiN2Sm7T4,252
|
|
70
71
|
datahub/cli/exists_cli.py,sha256=IsuU86R-g7BJjAl1vULH6d-BWJHAKa4XHLZl5WxGUEM,1233
|
|
71
72
|
datahub/cli/get_cli.py,sha256=VV80BCXfZ0-C8fr2k43SIuN9DB-fOYP9StWsTHnXwFw,2327
|
|
72
|
-
datahub/cli/iceberg_cli.py,sha256=
|
|
73
|
+
datahub/cli/iceberg_cli.py,sha256=Jp3si_xZkOYr1uKA3h9_GlLJbiZPtVN_SpMgLa8OgoE,22984
|
|
73
74
|
datahub/cli/ingest_cli.py,sha256=WAS_8BkoumzYVOsN8vbptKtQiQ61T958B_k49xJKdqI,22531
|
|
74
75
|
datahub/cli/json_file.py,sha256=nWo-VVthaaW4Do1eUqgrzk0fShb29MjiKXvZVOTq76c,943
|
|
75
76
|
datahub/cli/lite_cli.py,sha256=lolCnWWMMYojRMebbYTpHWBmOBQF_729RpW4A_y_xF4,13034
|
|
@@ -203,7 +204,7 @@ datahub/ingestion/source/ldap.py,sha256=CNr3foofIpoCXu_GGqfcajlQE2qkHr5isYwVcDut
|
|
|
203
204
|
datahub/ingestion/source/metabase.py,sha256=m9Gfhrs8F1z23ci8CIxdE5cW--25stgxg_IQTKwkFrk,31532
|
|
204
205
|
datahub/ingestion/source/mlflow.py,sha256=pmIkmsfidi7dOGdQ61rab7m8AnKZhIRE2IA9in9HGFU,12144
|
|
205
206
|
datahub/ingestion/source/mode.py,sha256=HVxhzMIY4HjkAG_T6y00Po2B9XwjALP6i5XQThuyYM4,63488
|
|
206
|
-
datahub/ingestion/source/mongodb.py,sha256=
|
|
207
|
+
datahub/ingestion/source/mongodb.py,sha256=2C2Cxn8DXL53IbNiywIuKt8UT_EMcPg9f8su-OPSNGU,21237
|
|
207
208
|
datahub/ingestion/source/nifi.py,sha256=FgIbZSCu-mcdnbIpqwvmADnUIxptogUq1sSEkrkwtrc,56089
|
|
208
209
|
datahub/ingestion/source/openapi.py,sha256=MGsRLseZompW10UVMN_tU1GZgqPgTAM4lnqCJ8eVRoY,17386
|
|
209
210
|
datahub/ingestion/source/openapi_parser.py,sha256=1_68wHWe_SzWYEyC1YVDw9vxoadKjW1yv8DecvyIhwY,13606
|
|
@@ -281,8 +282,8 @@ datahub/ingestion/source/datahub/report.py,sha256=VHBfCbwFRzdLdB7hQG9ST4EiZxl_vB
|
|
|
281
282
|
datahub/ingestion/source/datahub/state.py,sha256=PZoT7sSK1wadVf5vN6phrgr7I6LL7ePP-EJjP1OO0bQ,3507
|
|
282
283
|
datahub/ingestion/source/dbt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
283
284
|
datahub/ingestion/source/dbt/dbt_cloud.py,sha256=tNpSHbPlLq-oFGbJsdkWY9kIaWmpjcZLWhj1CSewGGY,17981
|
|
284
|
-
datahub/ingestion/source/dbt/dbt_common.py,sha256=
|
|
285
|
-
datahub/ingestion/source/dbt/dbt_core.py,sha256=
|
|
285
|
+
datahub/ingestion/source/dbt/dbt_common.py,sha256=bZN3J0FhqpKw-DXwE1SPYbsuOx-IaF57VoR4XUhP118,80763
|
|
286
|
+
datahub/ingestion/source/dbt/dbt_core.py,sha256=izfsJhPyv5e14H-5BXWhEeN1P6hdZvcjmutEptVxY4U,22987
|
|
286
287
|
datahub/ingestion/source/dbt/dbt_tests.py,sha256=Q5KISW_AOOWqyxmyOgJQquyX7xlfOqKu9WhrHoLKC0M,9881
|
|
287
288
|
datahub/ingestion/source/delta_lake/__init__.py,sha256=u5oqUeus81ONAtdl6o9Puw33ODSMun-0wLIamrZ4BUM,71
|
|
288
289
|
datahub/ingestion/source/delta_lake/config.py,sha256=bVBwGjCPiXyjbCLQsamt4hAsKJMtMuxupKjwZEwtU78,3374
|
|
@@ -301,7 +302,7 @@ datahub/ingestion/source/dremio/dremio_source.py,sha256=XMx3EP0ciIaQjMffNljp8w-G
|
|
|
301
302
|
datahub/ingestion/source/dremio/dremio_sql_queries.py,sha256=W0rcXawlwJOHNYr5o73rilMijtFOO3cVkn6pY-JLc6o,8186
|
|
302
303
|
datahub/ingestion/source/dynamodb/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
303
304
|
datahub/ingestion/source/dynamodb/data_reader.py,sha256=vC77KpcP8LJN0g8wsPRDVw4sebv0ZWIP3tJkEIHaomA,3120
|
|
304
|
-
datahub/ingestion/source/dynamodb/dynamodb.py,sha256=
|
|
305
|
+
datahub/ingestion/source/dynamodb/dynamodb.py,sha256=hyWUFWjyfSqjs9ljM-GcS0IVmaeIufUJu4pkOvZwKoI,22599
|
|
305
306
|
datahub/ingestion/source/fivetran/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
306
307
|
datahub/ingestion/source/fivetran/config.py,sha256=BP3KRfAQ6H5qyEeJNu9vNfZNwLoyj4Tl2kXiLVR5DNM,9027
|
|
307
308
|
datahub/ingestion/source/fivetran/data_classes.py,sha256=ecdUJH5BEze0yv-uFpKWPNaNmV1gORDA2XMFk0zhcBw,595
|
|
@@ -337,7 +338,7 @@ datahub/ingestion/source/kafka_connect/sink_connectors.py,sha256=sbLntDi0c52i8uU
|
|
|
337
338
|
datahub/ingestion/source/kafka_connect/source_connectors.py,sha256=-rFNXKD8_EFoXuU1CiKF3wHnsBtKCJrcYDwdTno98Xk,21265
|
|
338
339
|
datahub/ingestion/source/looker/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
339
340
|
datahub/ingestion/source/looker/lkml_patched.py,sha256=XShEU7Wbz0DubDhYMjKf9wjKZrBJa2XPg9MIjp8rPhk,733
|
|
340
|
-
datahub/ingestion/source/looker/looker_common.py,sha256=
|
|
341
|
+
datahub/ingestion/source/looker/looker_common.py,sha256=dmcrzEWFxPzZhIeyUYLZuMzhgx7QzvGp4xLTrTYISCA,62136
|
|
341
342
|
datahub/ingestion/source/looker/looker_config.py,sha256=eVKw1nn9D8hUFdRfNyT3MtzL8w-zWhFeokiwSnNKQuc,13607
|
|
342
343
|
datahub/ingestion/source/looker/looker_connection.py,sha256=yDmC6lDsHmL2e_Pw8ULylwOIHPWPp_6gT1iyLvD0fTw,2075
|
|
343
344
|
datahub/ingestion/source/looker/looker_constant.py,sha256=GMKYtNXlpojPxa9azridKfcGLSJwKdUCTesp7U8dIrQ,402
|
|
@@ -410,7 +411,7 @@ datahub/ingestion/source/s3/config.py,sha256=Zs1nrBZKLImteZreIcSMMRLj8vBGgxakNDs
|
|
|
410
411
|
datahub/ingestion/source/s3/datalake_profiler_config.py,sha256=FfrcgK-JEF94vw-l3q6pN6FENXb-wZzW2w1VUZVkwW8,3620
|
|
411
412
|
datahub/ingestion/source/s3/profiling.py,sha256=yKNCKpr6w7qpCH-baeSkNE9VjkN6eBot_weD-2_Jxzk,17579
|
|
412
413
|
datahub/ingestion/source/s3/report.py,sha256=fzkTdTewYlWrTk4f2Cyl-e8RV4qw9wEVtm0cdKD-Xgw,542
|
|
413
|
-
datahub/ingestion/source/s3/source.py,sha256=
|
|
414
|
+
datahub/ingestion/source/s3/source.py,sha256=JwEmVWDEFtPt6iMo82n5DQava8QAKXk_xYl01KAfdHk,47614
|
|
414
415
|
datahub/ingestion/source/sac/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
415
416
|
datahub/ingestion/source/sac/sac.py,sha256=zPSO9ukuyhvNaaVzeAYpA-_sFma_XMcCQMPaGvDWuTk,30226
|
|
416
417
|
datahub/ingestion/source/sac/sac_common.py,sha256=-xQTDBtgH56AnpRXWGDnlmQqUuLRx-7wF1U1kQFWtX8,998
|
|
@@ -444,8 +445,8 @@ datahub/ingestion/source/snowflake/snowflake_profiler.py,sha256=0DJiSwII6FY34url
|
|
|
444
445
|
datahub/ingestion/source/snowflake/snowflake_queries.py,sha256=M-FBoYeiW91-g3gOUpCTj8cKWHH-wqyFtD5UcewfI2k,28121
|
|
445
446
|
datahub/ingestion/source/snowflake/snowflake_query.py,sha256=Ex9FZZzz02cQis4bV3tzd53Pmf8p3AreuWnv9w95pJ0,39642
|
|
446
447
|
datahub/ingestion/source/snowflake/snowflake_report.py,sha256=ahea-bwpW6T0iDehGo0Qq_J7wKxPkV61aYHm8bGwDqo,6651
|
|
447
|
-
datahub/ingestion/source/snowflake/snowflake_schema.py,sha256=
|
|
448
|
-
datahub/ingestion/source/snowflake/snowflake_schema_gen.py,sha256=
|
|
448
|
+
datahub/ingestion/source/snowflake/snowflake_schema.py,sha256=GFgcKV5T6VHyNwPBzzw_f8cWA9YFlWug0m6nkLoGXus,25979
|
|
449
|
+
datahub/ingestion/source/snowflake/snowflake_schema_gen.py,sha256=Yba6OIWYtukAFMoNtEtX2BXWwJee17Dl58DUyK0myho,54530
|
|
449
450
|
datahub/ingestion/source/snowflake/snowflake_shares.py,sha256=maZyFkfrbVogEFM0tTKRiNp9c_1muv6YfleSd3q0umI,6341
|
|
450
451
|
datahub/ingestion/source/snowflake/snowflake_summary.py,sha256=kTmuCtRnvHqM8WBYhWeK4XafJq3ssFL9kcS03jEeWT4,5506
|
|
451
452
|
datahub/ingestion/source/snowflake/snowflake_tag.py,sha256=1eLYTcgmfzDs9xktMTTE74L5SeNP48Qg3uLr9y-Ez3Y,8733
|
|
@@ -465,7 +466,7 @@ datahub/ingestion/source/sql/mysql.py,sha256=nDWK4YbqomcJgnit9b8geUGrp_3eix4bt0_
|
|
|
465
466
|
datahub/ingestion/source/sql/oracle.py,sha256=tVP3AiZO97psM8O8UzBb9C7__s8y4fkyQbXBv3m1LU4,24503
|
|
466
467
|
datahub/ingestion/source/sql/postgres.py,sha256=uC1kYEI8VdxiZ1Y9IxMWzwmg11wtMqYN0e2fkok1rxo,11972
|
|
467
468
|
datahub/ingestion/source/sql/presto.py,sha256=PB-CS5MX2dSRFRHjlxfkLHGXLZXFNCsVAAyRBtY6HMg,3611
|
|
468
|
-
datahub/ingestion/source/sql/sql_common.py,sha256=
|
|
469
|
+
datahub/ingestion/source/sql/sql_common.py,sha256=98vCNU_ch41i_QSQSRWLIVRaTVfw6RWv4bP28SypfH0,48812
|
|
469
470
|
datahub/ingestion/source/sql/sql_config.py,sha256=CBXkCpzBAGrWAXJFte_i5TmpzcsMJwEjGHpfzd6vAow,8964
|
|
470
471
|
datahub/ingestion/source/sql/sql_generic.py,sha256=9AERvkK8kdJUeDOzCYJDb93xdv6Z4DGho0NfeHj5Uyg,2740
|
|
471
472
|
datahub/ingestion/source/sql/sql_generic_profiler.py,sha256=8cDmNpT_UXzYmP8-RWoDCnewmVGCj2cYCzH9_gSsF3o,11590
|
|
@@ -474,7 +475,7 @@ datahub/ingestion/source/sql/sql_types.py,sha256=uuU3taVe4oCTXkqg1wSMGzTwVleRyUR
|
|
|
474
475
|
datahub/ingestion/source/sql/sql_utils.py,sha256=q-Bsk6WxlsRtrw9RXBxvqI3zuaMTC_F25T2VrCziR9I,8418
|
|
475
476
|
datahub/ingestion/source/sql/sqlalchemy_data_reader.py,sha256=FvHZ4JEK3aR2DYOBZiT_ZsAy12RjTu4t_KIR_92B11k,2644
|
|
476
477
|
datahub/ingestion/source/sql/sqlalchemy_uri_mapper.py,sha256=KOpbmDIE2h1hyYEsbVHJi2B7FlsyUMTXZx4diyzltQg,1826
|
|
477
|
-
datahub/ingestion/source/sql/teradata.py,sha256=
|
|
478
|
+
datahub/ingestion/source/sql/teradata.py,sha256=79jaYgU8QjxnZ3nQ-wq-4xMFoTjCDOfNrpVNbUoS_wU,33449
|
|
478
479
|
datahub/ingestion/source/sql/trino.py,sha256=FEn_BQ3pm23hKx94ek5kk5IXGNYcBqZEhllRJFUzfU8,17895
|
|
479
480
|
datahub/ingestion/source/sql/two_tier_sql_source.py,sha256=YDrGBb5WKVls6qv17QU5foKrf71SydzEltc3WsVAhQc,5732
|
|
480
481
|
datahub/ingestion/source/sql/vertica.py,sha256=_9OgSgIgqBml0av063rb8nACiT3SAmzpw0ouyF91wv8,33382
|
|
@@ -869,6 +870,16 @@ datahub/metadata/schemas/VersionSetKey.avsc,sha256=psjGNNcFua3Zs9Xlh4HnUHNmBEU74
|
|
|
869
870
|
datahub/metadata/schemas/VersionSetProperties.avsc,sha256=yrhhVNioD11nFlDO7IfUbxAQjhA9Tr-4wnAYH5I9W74,1172
|
|
870
871
|
datahub/metadata/schemas/ViewProperties.avsc,sha256=3HhcbH5493dJUnEUtFMYMVfbYQ52aDedm5L4j77Nym4,1032
|
|
871
872
|
datahub/metadata/schemas/__init__.py,sha256=uvLNC3VyCkWA_v8e9FdA1leFf46NFKDD0AajCfihepI,581
|
|
873
|
+
datahub/sdk/__init__.py,sha256=fYD-f338EW5WPFW2NSiirMAsHkNgZfolIvneM7yxgBk,977
|
|
874
|
+
datahub/sdk/_all_entities.py,sha256=0XFtmgeEtrWOXy_oWcwqrtvfvzo8obPIq3Z1fEr5-34,400
|
|
875
|
+
datahub/sdk/_attribution.py,sha256=05iNVT_IDO7aU3vU6dJKCF8EoDAI1mwh4mAg_EBf3RY,1121
|
|
876
|
+
datahub/sdk/_entity.py,sha256=A_AWqNjaV6Y1YvmIiPNm0UyC0aO52G_L-iwgp9XEtYs,3225
|
|
877
|
+
datahub/sdk/_shared.py,sha256=qBqJnY-793Tfg4eBFNt3VEcckiyymJsBBNgZBc3PkJI,11384
|
|
878
|
+
datahub/sdk/container.py,sha256=aqp175PGQ225dpi9vS6LbaGdUVcjsEblsZc4doIsHvU,6521
|
|
879
|
+
datahub/sdk/dataset.py,sha256=BydV2papBEYL2de72UMNjSkAbsDNpVlXqiEkMlXJiLY,21514
|
|
880
|
+
datahub/sdk/entity_client.py,sha256=DcHytfCM8X9J6mm_QXzFR-2vDQa88I9Q2ktSNC2oSUI,4277
|
|
881
|
+
datahub/sdk/main_client.py,sha256=IKKzBMiKtT5zIMEHrvaMyxJ7DYBVNpqua70J0Ydl74Y,2068
|
|
882
|
+
datahub/sdk/resolver_client.py,sha256=UxI0bwg0ITm9dWPmAAKxkTvpLiGACtSCuEDNxLJipjs,3395
|
|
872
883
|
datahub/secret/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
873
884
|
datahub/secret/datahub_secret_store.py,sha256=9u9S87-15jwhj4h0EsAVIMdQLgvstKc8voQux2slxgU,2477
|
|
874
885
|
datahub/secret/datahub_secrets_client.py,sha256=nDmhziKdvseJHlaDVUcAwK8Fv8maeAaG-ktZtWG2b70,1316
|
|
@@ -894,7 +905,7 @@ datahub/sql_parsing/_sqlglot_patch.py,sha256=iYJ8zOThHqqbamD5jdNr9iHTWD7ewNeHzPi
|
|
|
894
905
|
datahub/sql_parsing/datajob.py,sha256=1X8KpEk-y3_8xJuA_Po27EHZgOcxK9QADI6Om9gSGn0,1751
|
|
895
906
|
datahub/sql_parsing/query_types.py,sha256=FKjDzszZzsrCfYfm7dgD6T_8865qxWl767fdGyHWBh4,2720
|
|
896
907
|
datahub/sql_parsing/schema_resolver.py,sha256=8dYz6pC3Y35pXBn41grOE2dKkSiSeLHOz-N138uWQg4,10796
|
|
897
|
-
datahub/sql_parsing/split_statements.py,sha256=
|
|
908
|
+
datahub/sql_parsing/split_statements.py,sha256=Vi8VAgaEkGcK5bs48L-Krig3qhjxcvqvotCpL0ux-EY,8674
|
|
898
909
|
datahub/sql_parsing/sql_parsing_aggregator.py,sha256=XNZWjeaRhzaT92mzsJZGJfYaxJENsyp5dSHTmL81RIc,70130
|
|
899
910
|
datahub/sql_parsing/sql_parsing_common.py,sha256=h_V_m54hJ9EUh5kczq7cYOIeNeo4bgf0Px0H-Nq-UIg,2602
|
|
900
911
|
datahub/sql_parsing/sql_parsing_result_utils.py,sha256=prwWTj1EB2fRPv1eMB4EkpFNafIYAt-X8TIK0NWqank,796
|
|
@@ -1001,9 +1012,9 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
1001
1012
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
1002
1013
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
1003
1014
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
1004
|
-
acryl_datahub-0.15.0.
|
|
1005
|
-
acryl_datahub-0.15.0.
|
|
1006
|
-
acryl_datahub-0.15.0.
|
|
1007
|
-
acryl_datahub-0.15.0.
|
|
1008
|
-
acryl_datahub-0.15.0.
|
|
1009
|
-
acryl_datahub-0.15.0.
|
|
1015
|
+
acryl_datahub-0.15.0.6rc3.dist-info/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
|
|
1016
|
+
acryl_datahub-0.15.0.6rc3.dist-info/METADATA,sha256=8X_DKc1kQE8SWU_bx3T5ty-_G0WurcrlnGOoSEDRCZs,175375
|
|
1017
|
+
acryl_datahub-0.15.0.6rc3.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
1018
|
+
acryl_datahub-0.15.0.6rc3.dist-info/entry_points.txt,sha256=U1e5ZwqPX1OaIbvGrwvozcdB8SbzFYXQM7plpdLKKeo,9592
|
|
1019
|
+
acryl_datahub-0.15.0.6rc3.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
1020
|
+
acryl_datahub-0.15.0.6rc3.dist-info/RECORD,,
|
datahub/_version.py
CHANGED
datahub/cli/iceberg_cli.py
CHANGED
|
@@ -14,6 +14,7 @@ from datahub.cli.cli_utils import post_entity
|
|
|
14
14
|
from datahub.configuration.common import GraphError
|
|
15
15
|
from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
|
|
16
16
|
from datahub.metadata.schema_classes import SystemMetadataClass
|
|
17
|
+
from datahub.telemetry import telemetry
|
|
17
18
|
|
|
18
19
|
logger = logging.getLogger(__name__)
|
|
19
20
|
|
|
@@ -161,6 +162,7 @@ def validate_warehouse(data_root: str) -> None:
|
|
|
161
162
|
type=int,
|
|
162
163
|
help=f"Expiration duration for temporary credentials used for role. Defaults to {DEFAULT_CREDS_EXPIRY_DURATION_SECONDS} seconds if unspecified",
|
|
163
164
|
)
|
|
165
|
+
@telemetry.with_telemetry(capture_kwargs=["duration_seconds"])
|
|
164
166
|
def create(
|
|
165
167
|
warehouse: str,
|
|
166
168
|
description: Optional[str],
|
|
@@ -313,6 +315,7 @@ def create(
|
|
|
313
315
|
type=int,
|
|
314
316
|
help=f"Expiration duration for temporary credentials used for role. Defaults to {DEFAULT_CREDS_EXPIRY_DURATION_SECONDS} seconds if unspecified",
|
|
315
317
|
)
|
|
318
|
+
@telemetry.with_telemetry(capture_kwargs=["duration_seconds"])
|
|
316
319
|
def update(
|
|
317
320
|
warehouse: str,
|
|
318
321
|
data_root: str,
|
|
@@ -398,6 +401,7 @@ def update(
|
|
|
398
401
|
|
|
399
402
|
|
|
400
403
|
@iceberg.command()
|
|
404
|
+
@telemetry.with_telemetry()
|
|
401
405
|
def list() -> None:
|
|
402
406
|
"""
|
|
403
407
|
List iceberg warehouses
|
|
@@ -413,6 +417,7 @@ def list() -> None:
|
|
|
413
417
|
@click.option(
|
|
414
418
|
"-w", "--warehouse", required=True, type=str, help="The name of the warehouse"
|
|
415
419
|
)
|
|
420
|
+
@telemetry.with_telemetry()
|
|
416
421
|
def get(warehouse: str) -> None:
|
|
417
422
|
"""Fetches the details of the specified iceberg warehouse"""
|
|
418
423
|
client = get_default_graph()
|
|
@@ -442,6 +447,7 @@ def get(warehouse: str) -> None:
|
|
|
442
447
|
is_flag=True,
|
|
443
448
|
help="force the delete if set without confirmation",
|
|
444
449
|
)
|
|
450
|
+
@telemetry.with_telemetry(capture_kwargs=["dry_run", "force"])
|
|
445
451
|
def delete(warehouse: str, dry_run: bool, force: bool) -> None:
|
|
446
452
|
"""
|
|
447
453
|
Delete warehouse
|
|
@@ -470,11 +476,19 @@ def delete(warehouse: str, dry_run: bool, force: bool) -> None:
|
|
|
470
476
|
# Do we really need this double-check?
|
|
471
477
|
if "__typename" in entity and "urn" in entity:
|
|
472
478
|
if entity["__typename"] in ["Container", "Dataset"]:
|
|
479
|
+
# add the Platform Resource URN to also be deleted for each dataset.
|
|
480
|
+
# This is not user visible, so no need to show a name to the user and include it in the count. Each
|
|
481
|
+
# instance corresponds to a dataset whose name is shown.
|
|
482
|
+
if entity["__typename"] == "Dataset":
|
|
483
|
+
resource_urn = platform_resource_urn(
|
|
484
|
+
entity["properties"]["qualifiedName"]
|
|
485
|
+
)
|
|
486
|
+
urns_to_delete.append(resource_urn)
|
|
487
|
+
|
|
473
488
|
urns_to_delete.append(entity["urn"])
|
|
474
489
|
resource_names_to_be_deleted.append(
|
|
475
490
|
entity.get("name", entity.get("urn"))
|
|
476
491
|
)
|
|
477
|
-
# TODO: PlatformResource associated with datasets need to be deleted.
|
|
478
492
|
|
|
479
493
|
if dry_run:
|
|
480
494
|
click.echo(
|
|
@@ -485,18 +499,21 @@ def delete(warehouse: str, dry_run: bool, force: bool) -> None:
|
|
|
485
499
|
else:
|
|
486
500
|
if not force:
|
|
487
501
|
click.confirm(
|
|
488
|
-
f"This will delete {warehouse} warehouse, credentials, and {len(
|
|
502
|
+
f"This will delete {warehouse} warehouse, credentials, and {len(resource_names_to_be_deleted)} datasets and namespaces from DataHub. Do you want to continue?",
|
|
489
503
|
abort=True,
|
|
490
504
|
)
|
|
491
|
-
client.hard_delete_entity(urn)
|
|
492
|
-
client.hard_delete_entity(warehouse_aspect.clientId)
|
|
493
|
-
client.hard_delete_entity(warehouse_aspect.clientSecret)
|
|
494
505
|
|
|
506
|
+
# Delete the resources in the warehouse first, so that in case it is interrupted, the warehouse itself is
|
|
507
|
+
# still available to enumerate the resources in it that are not yet deleted.
|
|
495
508
|
for urn_to_delete in urns_to_delete:
|
|
496
509
|
client.hard_delete_entity(urn_to_delete)
|
|
497
510
|
|
|
511
|
+
client.hard_delete_entity(urn)
|
|
512
|
+
client.hard_delete_entity(warehouse_aspect.clientId)
|
|
513
|
+
client.hard_delete_entity(warehouse_aspect.clientSecret)
|
|
514
|
+
|
|
498
515
|
click.echo(
|
|
499
|
-
f"✅ Successfully deleted iceberg warehouse {warehouse} and associated credentials, {len(
|
|
516
|
+
f"✅ Successfully deleted iceberg warehouse {warehouse} and associated credentials, {len(resource_names_to_be_deleted)} datasets and namespaces"
|
|
500
517
|
)
|
|
501
518
|
|
|
502
519
|
|
|
@@ -504,6 +521,10 @@ def iceberg_data_platform_instance_urn(warehouse: str) -> str:
|
|
|
504
521
|
return f"urn:li:dataPlatformInstance:({iceberg_data_platform()},{warehouse})"
|
|
505
522
|
|
|
506
523
|
|
|
524
|
+
def platform_resource_urn(dataset_name: str) -> str:
|
|
525
|
+
return f"urn:li:platformResource:iceberg.{dataset_name}"
|
|
526
|
+
|
|
527
|
+
|
|
507
528
|
def iceberg_data_platform() -> str:
|
|
508
529
|
return "urn:li:dataPlatform:iceberg"
|
|
509
530
|
|
|
@@ -677,6 +698,9 @@ def get_related_entities_for_platform_instance(
|
|
|
677
698
|
... on Dataset {
|
|
678
699
|
urn
|
|
679
700
|
name
|
|
701
|
+
properties{
|
|
702
|
+
qualifiedName
|
|
703
|
+
}
|
|
680
704
|
}
|
|
681
705
|
}
|
|
682
706
|
}
|
datahub/errors.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
from datahub.configuration.common import MetaError
|
|
2
|
+
|
|
3
|
+
# TODO: Move all other error types to this file.
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class SdkUsageError(MetaError):
|
|
7
|
+
pass
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class AlreadyExistsError(SdkUsageError):
|
|
11
|
+
pass
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class ItemNotFoundError(SdkUsageError):
|
|
15
|
+
pass
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class MultipleItemsFoundError(SdkUsageError):
|
|
19
|
+
pass
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class SchemaFieldKeyError(SdkUsageError, KeyError):
|
|
23
|
+
pass
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class IngestionAttributionWarning(Warning):
|
|
27
|
+
pass
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class MultipleSubtypesWarning(Warning):
|
|
31
|
+
pass
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class ExperimentalWarning(Warning):
|
|
35
|
+
pass
|
|
@@ -357,6 +357,11 @@ class DBTCommonConfig(
|
|
|
357
357
|
default=True,
|
|
358
358
|
description="When enabled, includes the compiled code in the emitted metadata.",
|
|
359
359
|
)
|
|
360
|
+
include_database_name: bool = Field(
|
|
361
|
+
default=True,
|
|
362
|
+
description="Whether to add database name to the table urn. "
|
|
363
|
+
"Set to False to skip it for engines like AWS Athena where it's not required.",
|
|
364
|
+
)
|
|
360
365
|
|
|
361
366
|
@validator("target_platform")
|
|
362
367
|
def validate_target_platform_value(cls, target_platform: str) -> str:
|
|
@@ -167,6 +167,7 @@ def extract_dbt_entities(
|
|
|
167
167
|
use_identifiers: bool,
|
|
168
168
|
tag_prefix: str,
|
|
169
169
|
only_include_if_in_catalog: bool,
|
|
170
|
+
include_database_name: bool,
|
|
170
171
|
report: DBTSourceReport,
|
|
171
172
|
) -> List[DBTNode]:
|
|
172
173
|
sources_by_id = {x["unique_id"]: x for x in sources_results}
|
|
@@ -267,7 +268,7 @@ def extract_dbt_entities(
|
|
|
267
268
|
dbt_name=key,
|
|
268
269
|
dbt_adapter=manifest_adapter,
|
|
269
270
|
dbt_package_name=manifest_node.get("package_name"),
|
|
270
|
-
database=manifest_node["database"],
|
|
271
|
+
database=manifest_node["database"] if include_database_name else None,
|
|
271
272
|
schema=manifest_node["schema"],
|
|
272
273
|
name=name,
|
|
273
274
|
alias=manifest_node.get("alias"),
|
|
@@ -543,14 +544,15 @@ class DBTCoreSource(DBTSourceBase, TestableSource):
|
|
|
543
544
|
all_catalog_entities = {**catalog_nodes, **catalog_sources}
|
|
544
545
|
|
|
545
546
|
nodes = extract_dbt_entities(
|
|
546
|
-
all_manifest_entities,
|
|
547
|
-
all_catalog_entities,
|
|
548
|
-
sources_results,
|
|
549
|
-
manifest_adapter,
|
|
550
|
-
self.config.use_identifiers,
|
|
551
|
-
self.config.tag_prefix,
|
|
552
|
-
self.config.only_include_if_in_catalog,
|
|
553
|
-
self.
|
|
547
|
+
all_manifest_entities=all_manifest_entities,
|
|
548
|
+
all_catalog_entities=all_catalog_entities,
|
|
549
|
+
sources_results=sources_results,
|
|
550
|
+
manifest_adapter=manifest_adapter,
|
|
551
|
+
use_identifiers=self.config.use_identifiers,
|
|
552
|
+
tag_prefix=self.config.tag_prefix,
|
|
553
|
+
only_include_if_in_catalog=self.config.only_include_if_in_catalog,
|
|
554
|
+
include_database_name=self.config.include_database_name,
|
|
555
|
+
report=self.report,
|
|
554
556
|
)
|
|
555
557
|
|
|
556
558
|
return (
|
|
@@ -165,6 +165,10 @@ _attribute_type_to_field_type_mapping: Dict[str, Type] = {
|
|
|
165
165
|
SourceCapability.PLATFORM_INSTANCE,
|
|
166
166
|
"By default, platform_instance will use the AWS account id",
|
|
167
167
|
)
|
|
168
|
+
@capability(
|
|
169
|
+
SourceCapability.CLASSIFICATION,
|
|
170
|
+
"Optionally enabled via `classification.enabled`",
|
|
171
|
+
)
|
|
168
172
|
class DynamoDBSource(StatefulIngestionSourceBase):
|
|
169
173
|
"""
|
|
170
174
|
This plugin extracts the following:
|
|
@@ -244,6 +248,7 @@ class DynamoDBSource(StatefulIngestionSourceBase):
|
|
|
244
248
|
name=dataset_name,
|
|
245
249
|
)
|
|
246
250
|
dataset_properties = DatasetPropertiesClass(
|
|
251
|
+
name=table_name,
|
|
247
252
|
tags=[],
|
|
248
253
|
customProperties={
|
|
249
254
|
"table.arn": table_info["TableArn"],
|
|
@@ -1673,10 +1673,11 @@ class LookerUserRegistry:
|
|
|
1673
1673
|
primary_key="",
|
|
1674
1674
|
)
|
|
1675
1675
|
|
|
1676
|
-
# Extract user email mappings
|
|
1676
|
+
# Extract user email mappings.
|
|
1677
|
+
# Sort it to ensure the order is deterministic.
|
|
1677
1678
|
user_email_cache = {
|
|
1678
1679
|
user_id: user.email
|
|
1679
|
-
for user_id, user in self._user_cache.items()
|
|
1680
|
+
for user_id, user in sorted(self._user_cache.items())
|
|
1680
1681
|
if user.email
|
|
1681
1682
|
}
|
|
1682
1683
|
|
|
@@ -219,26 +219,27 @@ def construct_schema_pymongo(
|
|
|
219
219
|
"""
|
|
220
220
|
|
|
221
221
|
aggregations: List[Dict] = []
|
|
222
|
+
|
|
223
|
+
# The order of the aggregations impacts execution time. By setting the sample/limit aggregation first,
|
|
224
|
+
# the subsequent aggregations process a much smaller dataset, improving performance.
|
|
225
|
+
if sample_size:
|
|
226
|
+
if use_random_sampling:
|
|
227
|
+
aggregations.append({"$sample": {"size": sample_size}})
|
|
228
|
+
else:
|
|
229
|
+
aggregations.append({"$limit": sample_size})
|
|
230
|
+
|
|
222
231
|
if should_add_document_size_filter:
|
|
223
232
|
doc_size_field = "temporary_doc_size_field"
|
|
224
233
|
# create a temporary field to store the size of the document. filter on it and then remove it.
|
|
225
|
-
aggregations
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
# get sample documents in collection
|
|
232
|
-
if sample_size:
|
|
233
|
-
aggregations.append({"$sample": {"size": sample_size}})
|
|
234
|
-
documents = collection.aggregate(
|
|
235
|
-
aggregations,
|
|
236
|
-
allowDiskUse=True,
|
|
234
|
+
aggregations.extend(
|
|
235
|
+
[
|
|
236
|
+
{"$addFields": {doc_size_field: {"$bsonSize": "$$ROOT"}}},
|
|
237
|
+
{"$match": {doc_size_field: {"$lt": max_document_size}}},
|
|
238
|
+
{"$project": {doc_size_field: 0}},
|
|
239
|
+
]
|
|
237
240
|
)
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
aggregations.append({"$limit": sample_size})
|
|
241
|
-
documents = collection.aggregate(aggregations, allowDiskUse=True)
|
|
241
|
+
|
|
242
|
+
documents = collection.aggregate(aggregations, allowDiskUse=True)
|
|
242
243
|
|
|
243
244
|
return construct_schema(list(documents), delimiter)
|
|
244
245
|
|
|
@@ -866,8 +866,21 @@ class S3Source(StatefulIngestionSourceBase):
|
|
|
866
866
|
Returns:
|
|
867
867
|
List[Folder]: A list of Folder objects representing the partitions found.
|
|
868
868
|
"""
|
|
869
|
+
|
|
870
|
+
def _is_allowed_path(path_spec_: PathSpec, s3_uri: str) -> bool:
|
|
871
|
+
allowed = path_spec_.allowed(s3_uri)
|
|
872
|
+
if not allowed:
|
|
873
|
+
logger.debug(f"File {s3_uri} not allowed and skipping")
|
|
874
|
+
self.report.report_file_dropped(s3_uri)
|
|
875
|
+
return allowed
|
|
876
|
+
|
|
877
|
+
s3_objects = (
|
|
878
|
+
obj
|
|
879
|
+
for obj in bucket.objects.filter(Prefix=prefix).page_size(PAGE_SIZE)
|
|
880
|
+
if _is_allowed_path(path_spec, f"s3://{obj.bucket_name}/{obj.key}")
|
|
881
|
+
)
|
|
882
|
+
|
|
869
883
|
partitions: List[Folder] = []
|
|
870
|
-
s3_objects = bucket.objects.filter(Prefix=prefix).page_size(PAGE_SIZE)
|
|
871
884
|
grouped_s3_objects_by_dirname = groupby_unsorted(
|
|
872
885
|
s3_objects,
|
|
873
886
|
key=lambda obj: obj.key.rsplit("/", 1)[0],
|
|
@@ -878,10 +891,6 @@ class S3Source(StatefulIngestionSourceBase):
|
|
|
878
891
|
modification_time = None
|
|
879
892
|
|
|
880
893
|
for item in group:
|
|
881
|
-
file_path = self.create_s3_path(item.bucket_name, item.key)
|
|
882
|
-
if not path_spec.allowed(file_path):
|
|
883
|
-
logger.debug(f"File {file_path} not allowed and skipping")
|
|
884
|
-
continue
|
|
885
894
|
file_size += item.size
|
|
886
895
|
if creation_time is None or item.last_modified < creation_time:
|
|
887
896
|
creation_time = item.last_modified
|
|
@@ -6,6 +6,7 @@ from datetime import datetime
|
|
|
6
6
|
from typing import Callable, Dict, Iterable, List, MutableMapping, Optional
|
|
7
7
|
|
|
8
8
|
from datahub.ingestion.api.report import SupportsAsObj
|
|
9
|
+
from datahub.ingestion.source.common.subtypes import DatasetSubTypes
|
|
9
10
|
from datahub.ingestion.source.snowflake.constants import SnowflakeObjectDomain
|
|
10
11
|
from datahub.ingestion.source.snowflake.snowflake_connection import SnowflakeConnection
|
|
11
12
|
from datahub.ingestion.source.snowflake.snowflake_query import (
|
|
@@ -100,6 +101,9 @@ class SnowflakeTable(BaseTable):
|
|
|
100
101
|
def is_hybrid(self) -> bool:
|
|
101
102
|
return self.type is not None and self.type == "HYBRID TABLE"
|
|
102
103
|
|
|
104
|
+
def get_subtype(self) -> DatasetSubTypes:
|
|
105
|
+
return DatasetSubTypes.TABLE
|
|
106
|
+
|
|
103
107
|
|
|
104
108
|
@dataclass
|
|
105
109
|
class SnowflakeView(BaseView):
|
|
@@ -109,6 +113,9 @@ class SnowflakeView(BaseView):
|
|
|
109
113
|
column_tags: Dict[str, List[SnowflakeTag]] = field(default_factory=dict)
|
|
110
114
|
is_secure: bool = False
|
|
111
115
|
|
|
116
|
+
def get_subtype(self) -> DatasetSubTypes:
|
|
117
|
+
return DatasetSubTypes.VIEW
|
|
118
|
+
|
|
112
119
|
|
|
113
120
|
@dataclass
|
|
114
121
|
class SnowflakeSchema:
|
|
@@ -154,6 +161,9 @@ class SnowflakeStream:
|
|
|
154
161
|
column_tags: Dict[str, List[SnowflakeTag]] = field(default_factory=dict)
|
|
155
162
|
last_altered: Optional[datetime] = None
|
|
156
163
|
|
|
164
|
+
def get_subtype(self) -> DatasetSubTypes:
|
|
165
|
+
return DatasetSubTypes.SNOWFLAKE_STREAM
|
|
166
|
+
|
|
157
167
|
|
|
158
168
|
class _SnowflakeTagCache:
|
|
159
169
|
def __init__(self) -> None:
|
|
@@ -21,7 +21,6 @@ from datahub.ingestion.glossary.classification_mixin import (
|
|
|
21
21
|
from datahub.ingestion.source.aws.s3_util import make_s3_urn_for_lineage
|
|
22
22
|
from datahub.ingestion.source.common.subtypes import (
|
|
23
23
|
DatasetContainerSubTypes,
|
|
24
|
-
DatasetSubTypes,
|
|
25
24
|
)
|
|
26
25
|
from datahub.ingestion.source.snowflake.constants import (
|
|
27
26
|
GENERIC_PERMISSION_ERROR_KEY,
|
|
@@ -467,7 +466,13 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
|
|
|
467
466
|
context=f"{db_name}.{schema_name}",
|
|
468
467
|
)
|
|
469
468
|
|
|
470
|
-
def _process_tags(
|
|
469
|
+
def _process_tags(
|
|
470
|
+
self,
|
|
471
|
+
snowflake_schema: SnowflakeSchema,
|
|
472
|
+
schema_name: str,
|
|
473
|
+
db_name: str,
|
|
474
|
+
domain: str,
|
|
475
|
+
) -> None:
|
|
471
476
|
snowflake_schema.tags = self.tag_extractor.get_tags_on_object(
|
|
472
477
|
schema_name=schema_name, db_name=db_name, domain=domain
|
|
473
478
|
)
|
|
@@ -837,15 +842,7 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
|
|
|
837
842
|
if dpi_aspect:
|
|
838
843
|
yield dpi_aspect
|
|
839
844
|
|
|
840
|
-
subTypes = SubTypes(
|
|
841
|
-
typeNames=(
|
|
842
|
-
[DatasetSubTypes.SNOWFLAKE_STREAM]
|
|
843
|
-
if isinstance(table, SnowflakeStream)
|
|
844
|
-
else [DatasetSubTypes.VIEW]
|
|
845
|
-
if isinstance(table, SnowflakeView)
|
|
846
|
-
else [DatasetSubTypes.TABLE]
|
|
847
|
-
)
|
|
848
|
-
)
|
|
845
|
+
subTypes = SubTypes(typeNames=[table.get_subtype()])
|
|
849
846
|
|
|
850
847
|
yield MetadataChangeProposalWrapper(
|
|
851
848
|
entityUrn=dataset_urn, aspect=subTypes
|
|
@@ -932,9 +929,9 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
|
|
|
932
929
|
"OWNER_ROLE_TYPE": table.owner_role_type,
|
|
933
930
|
"TABLE_NAME": table.table_name,
|
|
934
931
|
"BASE_TABLES": table.base_tables,
|
|
935
|
-
"STALE_AFTER":
|
|
936
|
-
|
|
937
|
-
|
|
932
|
+
"STALE_AFTER": (
|
|
933
|
+
table.stale_after.isoformat() if table.stale_after else None
|
|
934
|
+
),
|
|
938
935
|
}.items()
|
|
939
936
|
if v
|
|
940
937
|
}
|
|
@@ -352,6 +352,15 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
|
|
|
352
352
|
)
|
|
353
353
|
self.report.sql_aggregator = self.aggregator.report
|
|
354
354
|
|
|
355
|
+
def _add_default_options(self, sql_config: SQLCommonConfig) -> None:
|
|
356
|
+
"""Add default SQLAlchemy options. Can be overridden by subclasses to add additional defaults."""
|
|
357
|
+
# Extra default SQLAlchemy option for better connection pooling and threading.
|
|
358
|
+
# https://docs.sqlalchemy.org/en/14/core/pooling.html#sqlalchemy.pool.QueuePool.params.max_overflow
|
|
359
|
+
if sql_config.is_profiling_enabled():
|
|
360
|
+
sql_config.options.setdefault(
|
|
361
|
+
"max_overflow", sql_config.profiling.max_workers
|
|
362
|
+
)
|
|
363
|
+
|
|
355
364
|
@classmethod
|
|
356
365
|
def test_connection(cls, config_dict: dict) -> TestConnectionReport:
|
|
357
366
|
test_report = TestConnectionReport()
|
|
@@ -519,12 +528,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
|
|
|
519
528
|
# Known issue with sqlalchemy https://stackoverflow.com/questions/60804288/pycharm-duplicated-log-for-sqlalchemy-echo-true
|
|
520
529
|
sqlalchemy_log._add_default_handler = lambda x: None # type: ignore
|
|
521
530
|
|
|
522
|
-
|
|
523
|
-
# https://docs.sqlalchemy.org/en/14/core/pooling.html#sqlalchemy.pool.QueuePool.params.max_overflow
|
|
524
|
-
if sql_config.is_profiling_enabled():
|
|
525
|
-
sql_config.options.setdefault(
|
|
526
|
-
"max_overflow", sql_config.profiling.max_workers
|
|
527
|
-
)
|
|
531
|
+
self._add_default_options(sql_config)
|
|
528
532
|
|
|
529
533
|
for inspector in self.get_inspectors():
|
|
530
534
|
profiler = None
|
|
@@ -22,6 +22,7 @@ from sqlalchemy import create_engine, inspect
|
|
|
22
22
|
from sqlalchemy.engine import Engine
|
|
23
23
|
from sqlalchemy.engine.base import Connection
|
|
24
24
|
from sqlalchemy.engine.reflection import Inspector
|
|
25
|
+
from sqlalchemy.pool import QueuePool
|
|
25
26
|
from sqlalchemy.sql.expression import text
|
|
26
27
|
from teradatasqlalchemy.dialect import TeradataDialect
|
|
27
28
|
from teradatasqlalchemy.options import configure
|
|
@@ -678,6 +679,16 @@ ORDER by DataBaseName, TableName;
|
|
|
678
679
|
if self.config.stateful_ingestion:
|
|
679
680
|
self.config.stateful_ingestion.remove_stale_metadata = False
|
|
680
681
|
|
|
682
|
+
def _add_default_options(self, sql_config: SQLCommonConfig) -> None:
|
|
683
|
+
"""Add Teradata-specific default options"""
|
|
684
|
+
super()._add_default_options(sql_config)
|
|
685
|
+
if sql_config.is_profiling_enabled():
|
|
686
|
+
# Sqlalchemy uses QueuePool by default however Teradata uses SingletonThreadPool.
|
|
687
|
+
# SingletonThreadPool does not support parellel connections. For using profiling, we need to use QueuePool.
|
|
688
|
+
# https://docs.sqlalchemy.org/en/20/core/pooling.html#connection-pool-configuration
|
|
689
|
+
# https://github.com/Teradata/sqlalchemy-teradata/issues/96
|
|
690
|
+
sql_config.options.setdefault("poolclass", QueuePool)
|
|
691
|
+
|
|
681
692
|
@classmethod
|
|
682
693
|
def create(cls, config_dict, ctx):
|
|
683
694
|
config = TeradataConfig.parse_obj(config_dict)
|
|
@@ -705,6 +716,7 @@ ORDER by DataBaseName, TableName;
|
|
|
705
716
|
# This method can be overridden in the case that you want to dynamically
|
|
706
717
|
# run on multiple databases.
|
|
707
718
|
url = self.config.get_sql_alchemy_url()
|
|
719
|
+
|
|
708
720
|
logger.debug(f"sql_alchemy_url={url}")
|
|
709
721
|
engine = create_engine(url, **self.config.options)
|
|
710
722
|
with engine.connect() as conn:
|
datahub/sdk/__init__.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import warnings
|
|
2
|
+
|
|
3
|
+
import datahub.metadata.schema_classes as models
|
|
4
|
+
from datahub.errors import ExperimentalWarning, SdkUsageError
|
|
5
|
+
from datahub.ingestion.graph.config import DatahubClientConfig
|
|
6
|
+
from datahub.metadata.urns import (
|
|
7
|
+
ChartUrn,
|
|
8
|
+
ContainerUrn,
|
|
9
|
+
CorpGroupUrn,
|
|
10
|
+
CorpUserUrn,
|
|
11
|
+
DashboardUrn,
|
|
12
|
+
DataPlatformInstanceUrn,
|
|
13
|
+
DataPlatformUrn,
|
|
14
|
+
DatasetUrn,
|
|
15
|
+
DomainUrn,
|
|
16
|
+
GlossaryTermUrn,
|
|
17
|
+
SchemaFieldUrn,
|
|
18
|
+
TagUrn,
|
|
19
|
+
)
|
|
20
|
+
from datahub.sdk.container import Container
|
|
21
|
+
from datahub.sdk.dataset import Dataset
|
|
22
|
+
from datahub.sdk.main_client import DataHubClient
|
|
23
|
+
|
|
24
|
+
warnings.warn(
|
|
25
|
+
"The new datahub SDK (e.g. datahub.sdk.*) is experimental. "
|
|
26
|
+
"Our typical backwards-compatibility and stability guarantees do not apply to this code. "
|
|
27
|
+
"When it's promoted to stable, the import path will change "
|
|
28
|
+
"from `from datahub.sdk import ...` to `from datahub import ...`.",
|
|
29
|
+
ExperimentalWarning,
|
|
30
|
+
stacklevel=2,
|
|
31
|
+
)
|
|
32
|
+
del warnings
|
|
33
|
+
del ExperimentalWarning
|