acryl-datahub 0.15.0.5rc10__py3-none-any.whl → 0.15.0.6rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0.5rc10.dist-info → acryl_datahub-0.15.0.6rc2.dist-info}/METADATA +2482 -2482
- {acryl_datahub-0.15.0.5rc10.dist-info → acryl_datahub-0.15.0.6rc2.dist-info}/RECORD +35 -24
- datahub/_version.py +1 -1
- datahub/errors.py +35 -0
- datahub/ingestion/source/common/subtypes.py +1 -0
- datahub/ingestion/source/mongodb.py +17 -16
- datahub/ingestion/source/powerbi/config.py +1 -0
- datahub/ingestion/source/powerbi/powerbi.py +28 -3
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py +6 -2
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +11 -36
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +17 -4
- datahub/ingestion/source/s3/source.py +14 -5
- datahub/ingestion/source/snowflake/constants.py +1 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +10 -0
- datahub/ingestion/source/snowflake/snowflake_queries.py +45 -10
- datahub/ingestion/source/snowflake/snowflake_query.py +20 -1
- datahub/ingestion/source/snowflake/snowflake_report.py +6 -0
- datahub/ingestion/source/snowflake/snowflake_schema.py +108 -4
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +298 -69
- datahub/ingestion/source/snowflake/snowflake_utils.py +17 -8
- datahub/ingestion/source/snowflake/snowflake_v2.py +15 -3
- datahub/sdk/__init__.py +33 -0
- datahub/sdk/_all_entities.py +15 -0
- datahub/sdk/_attribution.py +48 -0
- datahub/sdk/_entity.py +89 -0
- datahub/sdk/_shared.py +338 -0
- datahub/sdk/container.py +193 -0
- datahub/sdk/dataset.py +584 -0
- datahub/sdk/entity_client.py +115 -0
- datahub/sdk/main_client.py +56 -0
- datahub/sdk/resolver_client.py +101 -0
- {acryl_datahub-0.15.0.5rc10.dist-info → acryl_datahub-0.15.0.6rc2.dist-info}/LICENSE +0 -0
- {acryl_datahub-0.15.0.5rc10.dist-info → acryl_datahub-0.15.0.6rc2.dist-info}/WHEEL +0 -0
- {acryl_datahub-0.15.0.5rc10.dist-info → acryl_datahub-0.15.0.6rc2.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-0.15.0.5rc10.dist-info → acryl_datahub-0.15.0.6rc2.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
|
|
2
2
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
3
|
-
datahub/_version.py,sha256=
|
|
3
|
+
datahub/_version.py,sha256=BhSRdcjgmWwrve2zKjSYh9Z1fk_7CHpbmijLhhWMrJM,324
|
|
4
4
|
datahub/entrypoints.py,sha256=2TYgHhs3sCxJlojIHjqfxzt3_ImPwPzq4vBtsUuMqu4,8885
|
|
5
|
+
datahub/errors.py,sha256=w6h8b27j9XlmPbTwqpu7-wgiTrXlHzcnUOnJ_iOrwzo,520
|
|
5
6
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
7
|
datahub/_codegen/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
8
|
datahub/_codegen/aspect.py,sha256=PJRa-Z4ouXHq3OkulfyWhwZn-fFUBDK_UPvmqaWdbWk,1063
|
|
@@ -203,7 +204,7 @@ datahub/ingestion/source/ldap.py,sha256=CNr3foofIpoCXu_GGqfcajlQE2qkHr5isYwVcDut
|
|
|
203
204
|
datahub/ingestion/source/metabase.py,sha256=m9Gfhrs8F1z23ci8CIxdE5cW--25stgxg_IQTKwkFrk,31532
|
|
204
205
|
datahub/ingestion/source/mlflow.py,sha256=pmIkmsfidi7dOGdQ61rab7m8AnKZhIRE2IA9in9HGFU,12144
|
|
205
206
|
datahub/ingestion/source/mode.py,sha256=HVxhzMIY4HjkAG_T6y00Po2B9XwjALP6i5XQThuyYM4,63488
|
|
206
|
-
datahub/ingestion/source/mongodb.py,sha256=
|
|
207
|
+
datahub/ingestion/source/mongodb.py,sha256=2C2Cxn8DXL53IbNiywIuKt8UT_EMcPg9f8su-OPSNGU,21237
|
|
207
208
|
datahub/ingestion/source/nifi.py,sha256=FgIbZSCu-mcdnbIpqwvmADnUIxptogUq1sSEkrkwtrc,56089
|
|
208
209
|
datahub/ingestion/source/openapi.py,sha256=MGsRLseZompW10UVMN_tU1GZgqPgTAM4lnqCJ8eVRoY,17386
|
|
209
210
|
datahub/ingestion/source/openapi_parser.py,sha256=1_68wHWe_SzWYEyC1YVDw9vxoadKjW1yv8DecvyIhwY,13606
|
|
@@ -266,7 +267,7 @@ datahub/ingestion/source/cassandra/cassandra_profiling.py,sha256=DkSIryZNwLei5Pa
|
|
|
266
267
|
datahub/ingestion/source/cassandra/cassandra_utils.py,sha256=j-LidYkaCTmGnpUVNLsax_c3z32PsQbsbHeYojygd1s,5105
|
|
267
268
|
datahub/ingestion/source/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
268
269
|
datahub/ingestion/source/common/data_reader.py,sha256=XbSxiRTYrk6seOz0ZjVjzSpGvP8lEjmqXrNI4cdYYmQ,1819
|
|
269
|
-
datahub/ingestion/source/common/subtypes.py,sha256=
|
|
270
|
+
datahub/ingestion/source/common/subtypes.py,sha256=S0ssIxV7V38HGQwl-h5izYWyj1MQgmvJk4k_Q-5VGJ8,2329
|
|
270
271
|
datahub/ingestion/source/data_lake_common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
271
272
|
datahub/ingestion/source/data_lake_common/config.py,sha256=qUk83B01hjuBKHvVz8SmXnVCy5eFj-2-2QLEOrAdbgk,359
|
|
272
273
|
datahub/ingestion/source/data_lake_common/data_lake_utils.py,sha256=nxu7osuzqxScPFc-1ODA2M1c_xPNPpRH_SMMU7zKOIE,6212
|
|
@@ -363,10 +364,10 @@ datahub/ingestion/source/metadata/lineage.py,sha256=XiZGuY6k3O9qBmgo7AzosIndJHwr
|
|
|
363
364
|
datahub/ingestion/source/neo4j/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
364
365
|
datahub/ingestion/source/neo4j/neo4j_source.py,sha256=76Z-2Td4_3PH2wWL1XJrpV2Egre5YVh6bMXeDS5ZonE,12405
|
|
365
366
|
datahub/ingestion/source/powerbi/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
366
|
-
datahub/ingestion/source/powerbi/config.py,sha256=
|
|
367
|
+
datahub/ingestion/source/powerbi/config.py,sha256=1WFK-JxcgBEIZ2XTwuH1PvNXYcwqEJR-IYTUTv3Z4o8,22820
|
|
367
368
|
datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py,sha256=-njW1kJOy-LY5JFwJLhVQ0bMBj9NQz5TZhQqsSi_KsM,2285
|
|
368
369
|
datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule,sha256=5df3qvalCS9hZ46DPXs6XDcw9-IofGf8Eol_rUC7LHI,20329
|
|
369
|
-
datahub/ingestion/source/powerbi/powerbi.py,sha256=
|
|
370
|
+
datahub/ingestion/source/powerbi/powerbi.py,sha256=a4LG4pxrZ6N5I6HmKFCfv9HMyvl4ZChy2XbVY17E-To,55570
|
|
370
371
|
datahub/ingestion/source/powerbi/m_query/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
371
372
|
datahub/ingestion/source/powerbi/m_query/data_classes.py,sha256=EbaEasEOGZ73jz0cQofH9ez65wSvRBof0R6GQaIVLnM,2009
|
|
372
373
|
datahub/ingestion/source/powerbi/m_query/native_sql_parser.py,sha256=zzKVDGeUM3Yv3-zNah4D6mSnr6jXsstNuLmzczcPQEE,3683
|
|
@@ -376,9 +377,9 @@ datahub/ingestion/source/powerbi/m_query/resolver.py,sha256=t0n1dDYjlzElSJo5ztea
|
|
|
376
377
|
datahub/ingestion/source/powerbi/m_query/tree_function.py,sha256=h77DunhlgOP0fAg8UXDXxxInOi7Pay85_d1Ca4YqyKs,6134
|
|
377
378
|
datahub/ingestion/source/powerbi/m_query/validator.py,sha256=crG-VZy2XPieiDliP9yVMgiFcc8b2xbZyDFEATXqEAQ,1155
|
|
378
379
|
datahub/ingestion/source/powerbi/rest_api_wrapper/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
379
|
-
datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py,sha256=
|
|
380
|
-
datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py,sha256
|
|
381
|
-
datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py,sha256=
|
|
380
|
+
datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py,sha256=4Kr7cLXpsWGtg-M18aXyhij9k9Ll5dGv3EaCS3d2DRk,8590
|
|
381
|
+
datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py,sha256=-EHDvVmr69bP11bFm0bW0Lf1I95lPHU7sdMX1Q70roI,38503
|
|
382
|
+
datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py,sha256=NrhgwREmkWTvlhpEs7dAEEJfOxQRalA02ArKr2LLjeY,27666
|
|
382
383
|
datahub/ingestion/source/powerbi/rest_api_wrapper/profiling_utils.py,sha256=bgcPheyqOj6KdRjDyANDK5yggItglcBIjbGFIwAxSds,1392
|
|
383
384
|
datahub/ingestion/source/powerbi/rest_api_wrapper/query.py,sha256=VNw1Uvli6g0pnu9FpigYmnCdEPbVEipz7vdZU_WmHf4,616
|
|
384
385
|
datahub/ingestion/source/powerbi_report_server/__init__.py,sha256=N9fGcrHXBbuPmx9rpGjd_jkMC3smXmfiwISDP1QZapk,324
|
|
@@ -410,7 +411,7 @@ datahub/ingestion/source/s3/config.py,sha256=Zs1nrBZKLImteZreIcSMMRLj8vBGgxakNDs
|
|
|
410
411
|
datahub/ingestion/source/s3/datalake_profiler_config.py,sha256=FfrcgK-JEF94vw-l3q6pN6FENXb-wZzW2w1VUZVkwW8,3620
|
|
411
412
|
datahub/ingestion/source/s3/profiling.py,sha256=yKNCKpr6w7qpCH-baeSkNE9VjkN6eBot_weD-2_Jxzk,17579
|
|
412
413
|
datahub/ingestion/source/s3/report.py,sha256=fzkTdTewYlWrTk4f2Cyl-e8RV4qw9wEVtm0cdKD-Xgw,542
|
|
413
|
-
datahub/ingestion/source/s3/source.py,sha256=
|
|
414
|
+
datahub/ingestion/source/s3/source.py,sha256=JwEmVWDEFtPt6iMo82n5DQava8QAKXk_xYl01KAfdHk,47614
|
|
414
415
|
datahub/ingestion/source/sac/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
415
416
|
datahub/ingestion/source/sac/sac.py,sha256=zPSO9ukuyhvNaaVzeAYpA-_sFma_XMcCQMPaGvDWuTk,30226
|
|
416
417
|
datahub/ingestion/source/sac/sac_common.py,sha256=-xQTDBtgH56AnpRXWGDnlmQqUuLRx-7wF1U1kQFWtX8,998
|
|
@@ -432,26 +433,26 @@ datahub/ingestion/source/sigma/sigma_api.py,sha256=SVvbUs2vjueUdDa-3FzeMsaX5pNpA
|
|
|
432
433
|
datahub/ingestion/source/slack/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
433
434
|
datahub/ingestion/source/slack/slack.py,sha256=VpLS-6zuQa8hIuHnZhLf8wRdN72Xell3ZMd0kK3A0i8,13188
|
|
434
435
|
datahub/ingestion/source/snowflake/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
435
|
-
datahub/ingestion/source/snowflake/constants.py,sha256=
|
|
436
|
+
datahub/ingestion/source/snowflake/constants.py,sha256=SZzQTO6spPeHfV55tIbg7SL2ecsQF_z169HJ8u2G3Hk,2650
|
|
436
437
|
datahub/ingestion/source/snowflake/oauth_config.py,sha256=ol9D3RmruGStJAeL8PYSQguSqcD2HfkjPkMF2AB_eZs,1277
|
|
437
438
|
datahub/ingestion/source/snowflake/oauth_generator.py,sha256=fu2VnREGuJXeTqIV2jx4TwieVnznf83HQkrE0h2DGGM,3423
|
|
438
439
|
datahub/ingestion/source/snowflake/snowflake_assertion.py,sha256=_l3k4aI9wvioE81xxdeizJn9nJCZ_nMIXgk9N6pEk5o,4803
|
|
439
|
-
datahub/ingestion/source/snowflake/snowflake_config.py,sha256=
|
|
440
|
+
datahub/ingestion/source/snowflake/snowflake_config.py,sha256=c9LE7nl6tkz7P9tc4EkSKphextW5pejLzdP3qS_iL1s,20196
|
|
440
441
|
datahub/ingestion/source/snowflake/snowflake_connection.py,sha256=e9dCARIQtGB8G1cSMRLorCbNLcPUD2g9gBL-LLLKjFE,17793
|
|
441
442
|
datahub/ingestion/source/snowflake/snowflake_data_reader.py,sha256=ffR5E2uhD71FUMXd3XOg2rHwrp1rbbGEFTAbqKcmI2s,2195
|
|
442
443
|
datahub/ingestion/source/snowflake/snowflake_lineage_v2.py,sha256=FBmiONx4EGHWV8RNJT6zHZyntKinPFFyd2oKbTUIbhE,21319
|
|
443
444
|
datahub/ingestion/source/snowflake/snowflake_profiler.py,sha256=0DJiSwII6FY34urlBja2FW66NaVvhbBWmG0p7u8Xyrc,7548
|
|
444
|
-
datahub/ingestion/source/snowflake/snowflake_queries.py,sha256=
|
|
445
|
-
datahub/ingestion/source/snowflake/snowflake_query.py,sha256
|
|
446
|
-
datahub/ingestion/source/snowflake/snowflake_report.py,sha256=
|
|
447
|
-
datahub/ingestion/source/snowflake/snowflake_schema.py,sha256=
|
|
448
|
-
datahub/ingestion/source/snowflake/snowflake_schema_gen.py,sha256=
|
|
445
|
+
datahub/ingestion/source/snowflake/snowflake_queries.py,sha256=M-FBoYeiW91-g3gOUpCTj8cKWHH-wqyFtD5UcewfI2k,28121
|
|
446
|
+
datahub/ingestion/source/snowflake/snowflake_query.py,sha256=Ex9FZZzz02cQis4bV3tzd53Pmf8p3AreuWnv9w95pJ0,39642
|
|
447
|
+
datahub/ingestion/source/snowflake/snowflake_report.py,sha256=ahea-bwpW6T0iDehGo0Qq_J7wKxPkV61aYHm8bGwDqo,6651
|
|
448
|
+
datahub/ingestion/source/snowflake/snowflake_schema.py,sha256=GFgcKV5T6VHyNwPBzzw_f8cWA9YFlWug0m6nkLoGXus,25979
|
|
449
|
+
datahub/ingestion/source/snowflake/snowflake_schema_gen.py,sha256=Yba6OIWYtukAFMoNtEtX2BXWwJee17Dl58DUyK0myho,54530
|
|
449
450
|
datahub/ingestion/source/snowflake/snowflake_shares.py,sha256=maZyFkfrbVogEFM0tTKRiNp9c_1muv6YfleSd3q0umI,6341
|
|
450
451
|
datahub/ingestion/source/snowflake/snowflake_summary.py,sha256=kTmuCtRnvHqM8WBYhWeK4XafJq3ssFL9kcS03jEeWT4,5506
|
|
451
452
|
datahub/ingestion/source/snowflake/snowflake_tag.py,sha256=1eLYTcgmfzDs9xktMTTE74L5SeNP48Qg3uLr9y-Ez3Y,8733
|
|
452
453
|
datahub/ingestion/source/snowflake/snowflake_usage_v2.py,sha256=ySFm7WDk8FW9KjCnX4HQfTqObIrlUS-V8WIHl3j0CTI,24848
|
|
453
|
-
datahub/ingestion/source/snowflake/snowflake_utils.py,sha256=
|
|
454
|
-
datahub/ingestion/source/snowflake/snowflake_v2.py,sha256=
|
|
454
|
+
datahub/ingestion/source/snowflake/snowflake_utils.py,sha256=EmYb2FEcdLwei92atRBQ3iKH7av4YBZCIFTgPmLo0Ng,13092
|
|
455
|
+
datahub/ingestion/source/snowflake/snowflake_v2.py,sha256=kx8aFalByIAfrp0a_kq5iyojzh9vI4od5eVGthAR5RY,33912
|
|
455
456
|
datahub/ingestion/source/sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
456
457
|
datahub/ingestion/source/sql/athena.py,sha256=Uh9wGLOqAkcphffxOPIQNyXvjeRm74XIpaLb4rjqMjM,24045
|
|
457
458
|
datahub/ingestion/source/sql/clickhouse.py,sha256=uSRy-HKAiGFTHVLoVtGoh23X0O1lwyYUaK8BaWkYhps,25555
|
|
@@ -869,6 +870,16 @@ datahub/metadata/schemas/VersionSetKey.avsc,sha256=psjGNNcFua3Zs9Xlh4HnUHNmBEU74
|
|
|
869
870
|
datahub/metadata/schemas/VersionSetProperties.avsc,sha256=yrhhVNioD11nFlDO7IfUbxAQjhA9Tr-4wnAYH5I9W74,1172
|
|
870
871
|
datahub/metadata/schemas/ViewProperties.avsc,sha256=3HhcbH5493dJUnEUtFMYMVfbYQ52aDedm5L4j77Nym4,1032
|
|
871
872
|
datahub/metadata/schemas/__init__.py,sha256=uvLNC3VyCkWA_v8e9FdA1leFf46NFKDD0AajCfihepI,581
|
|
873
|
+
datahub/sdk/__init__.py,sha256=fYD-f338EW5WPFW2NSiirMAsHkNgZfolIvneM7yxgBk,977
|
|
874
|
+
datahub/sdk/_all_entities.py,sha256=0XFtmgeEtrWOXy_oWcwqrtvfvzo8obPIq3Z1fEr5-34,400
|
|
875
|
+
datahub/sdk/_attribution.py,sha256=05iNVT_IDO7aU3vU6dJKCF8EoDAI1mwh4mAg_EBf3RY,1121
|
|
876
|
+
datahub/sdk/_entity.py,sha256=A_AWqNjaV6Y1YvmIiPNm0UyC0aO52G_L-iwgp9XEtYs,3225
|
|
877
|
+
datahub/sdk/_shared.py,sha256=FIEcYGm3LgRKLqn_i1FDePaqvMljkXIA9f3RMqaF1kY,11137
|
|
878
|
+
datahub/sdk/container.py,sha256=6Y19pQLN5vXGM86TEKW7ItTBwE3S0U4BsAMilWhyXqw,6511
|
|
879
|
+
datahub/sdk/dataset.py,sha256=BydV2papBEYL2de72UMNjSkAbsDNpVlXqiEkMlXJiLY,21514
|
|
880
|
+
datahub/sdk/entity_client.py,sha256=DcHytfCM8X9J6mm_QXzFR-2vDQa88I9Q2ktSNC2oSUI,4277
|
|
881
|
+
datahub/sdk/main_client.py,sha256=IKKzBMiKtT5zIMEHrvaMyxJ7DYBVNpqua70J0Ydl74Y,2068
|
|
882
|
+
datahub/sdk/resolver_client.py,sha256=UxI0bwg0ITm9dWPmAAKxkTvpLiGACtSCuEDNxLJipjs,3395
|
|
872
883
|
datahub/secret/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
873
884
|
datahub/secret/datahub_secret_store.py,sha256=9u9S87-15jwhj4h0EsAVIMdQLgvstKc8voQux2slxgU,2477
|
|
874
885
|
datahub/secret/datahub_secrets_client.py,sha256=nDmhziKdvseJHlaDVUcAwK8Fv8maeAaG-ktZtWG2b70,1316
|
|
@@ -1001,9 +1012,9 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
1001
1012
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
1002
1013
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
1003
1014
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
1004
|
-
acryl_datahub-0.15.0.
|
|
1005
|
-
acryl_datahub-0.15.0.
|
|
1006
|
-
acryl_datahub-0.15.0.
|
|
1007
|
-
acryl_datahub-0.15.0.
|
|
1008
|
-
acryl_datahub-0.15.0.
|
|
1009
|
-
acryl_datahub-0.15.0.
|
|
1015
|
+
acryl_datahub-0.15.0.6rc2.dist-info/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
|
|
1016
|
+
acryl_datahub-0.15.0.6rc2.dist-info/METADATA,sha256=Bv5h1BaxWarbwfDdiiaD1wwyonRclpbgYtingIwN4B4,175375
|
|
1017
|
+
acryl_datahub-0.15.0.6rc2.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
1018
|
+
acryl_datahub-0.15.0.6rc2.dist-info/entry_points.txt,sha256=U1e5ZwqPX1OaIbvGrwvozcdB8SbzFYXQM7plpdLKKeo,9592
|
|
1019
|
+
acryl_datahub-0.15.0.6rc2.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
1020
|
+
acryl_datahub-0.15.0.6rc2.dist-info/RECORD,,
|
datahub/_version.py
CHANGED
datahub/errors.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
from datahub.configuration.common import MetaError
|
|
2
|
+
|
|
3
|
+
# TODO: Move all other error types to this file.
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class SdkUsageError(MetaError):
|
|
7
|
+
pass
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class AlreadyExistsError(SdkUsageError):
|
|
11
|
+
pass
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class ItemNotFoundError(SdkUsageError):
|
|
15
|
+
pass
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class MultipleItemsFoundError(SdkUsageError):
|
|
19
|
+
pass
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class SchemaFieldKeyError(SdkUsageError, KeyError):
|
|
23
|
+
pass
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class IngestionAttributionWarning(Warning):
|
|
27
|
+
pass
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class MultipleSubtypesWarning(Warning):
|
|
31
|
+
pass
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class ExperimentalWarning(Warning):
|
|
35
|
+
pass
|
|
@@ -219,26 +219,27 @@ def construct_schema_pymongo(
|
|
|
219
219
|
"""
|
|
220
220
|
|
|
221
221
|
aggregations: List[Dict] = []
|
|
222
|
+
|
|
223
|
+
# The order of the aggregations impacts execution time. By setting the sample/limit aggregation first,
|
|
224
|
+
# the subsequent aggregations process a much smaller dataset, improving performance.
|
|
225
|
+
if sample_size:
|
|
226
|
+
if use_random_sampling:
|
|
227
|
+
aggregations.append({"$sample": {"size": sample_size}})
|
|
228
|
+
else:
|
|
229
|
+
aggregations.append({"$limit": sample_size})
|
|
230
|
+
|
|
222
231
|
if should_add_document_size_filter:
|
|
223
232
|
doc_size_field = "temporary_doc_size_field"
|
|
224
233
|
# create a temporary field to store the size of the document. filter on it and then remove it.
|
|
225
|
-
aggregations
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
# get sample documents in collection
|
|
232
|
-
if sample_size:
|
|
233
|
-
aggregations.append({"$sample": {"size": sample_size}})
|
|
234
|
-
documents = collection.aggregate(
|
|
235
|
-
aggregations,
|
|
236
|
-
allowDiskUse=True,
|
|
234
|
+
aggregations.extend(
|
|
235
|
+
[
|
|
236
|
+
{"$addFields": {doc_size_field: {"$bsonSize": "$$ROOT"}}},
|
|
237
|
+
{"$match": {doc_size_field: {"$lt": max_document_size}}},
|
|
238
|
+
{"$project": {doc_size_field: 0}},
|
|
239
|
+
]
|
|
237
240
|
)
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
aggregations.append({"$limit": sample_size})
|
|
241
|
-
documents = collection.aggregate(aggregations, allowDiskUse=True)
|
|
241
|
+
|
|
242
|
+
documents = collection.aggregate(aggregations, allowDiskUse=True)
|
|
242
243
|
|
|
243
244
|
return construct_schema(list(documents), delimiter)
|
|
244
245
|
|
|
@@ -582,8 +582,11 @@ class Mapper:
|
|
|
582
582
|
if tile.dataset is not None and tile.dataset.webUrl is not None:
|
|
583
583
|
custom_properties[Constant.DATASET_WEB_URL] = tile.dataset.webUrl
|
|
584
584
|
|
|
585
|
-
if tile.
|
|
586
|
-
custom_properties[Constant.REPORT_ID] = tile.
|
|
585
|
+
if tile.report_id is not None:
|
|
586
|
+
custom_properties[Constant.REPORT_ID] = tile.report_id
|
|
587
|
+
|
|
588
|
+
if tile.report is not None and tile.report.webUrl is not None:
|
|
589
|
+
custom_properties[Constant.REPORT_WEB_URL] = tile.report.webUrl
|
|
587
590
|
|
|
588
591
|
return custom_properties
|
|
589
592
|
|
|
@@ -1053,6 +1056,7 @@ class Mapper:
|
|
|
1053
1056
|
report: powerbi_data_classes.Report,
|
|
1054
1057
|
chart_mcps: List[MetadataChangeProposalWrapper],
|
|
1055
1058
|
user_mcps: List[MetadataChangeProposalWrapper],
|
|
1059
|
+
dashboard_edges: List[EdgeClass],
|
|
1056
1060
|
) -> List[MetadataChangeProposalWrapper]:
|
|
1057
1061
|
"""
|
|
1058
1062
|
Map PowerBi report to Datahub dashboard
|
|
@@ -1074,6 +1078,7 @@ class Mapper:
|
|
|
1074
1078
|
charts=chart_urn_list,
|
|
1075
1079
|
lastModified=ChangeAuditStamps(),
|
|
1076
1080
|
dashboardUrl=report.webUrl,
|
|
1081
|
+
dashboards=dashboard_edges,
|
|
1077
1082
|
)
|
|
1078
1083
|
|
|
1079
1084
|
info_mcp = self.new_mcp(
|
|
@@ -1167,8 +1172,28 @@ class Mapper:
|
|
|
1167
1172
|
ds_mcps = self.to_datahub_dataset(report.dataset, workspace)
|
|
1168
1173
|
chart_mcps = self.pages_to_chart(report.pages, workspace, ds_mcps)
|
|
1169
1174
|
|
|
1175
|
+
# find all dashboards with a Tile referencing this report
|
|
1176
|
+
downstream_dashboards_edges = []
|
|
1177
|
+
for d in workspace.dashboards.values():
|
|
1178
|
+
if any(t.report_id == report.id for t in d.tiles):
|
|
1179
|
+
dashboard_urn = builder.make_dashboard_urn(
|
|
1180
|
+
platform=self.__config.platform_name,
|
|
1181
|
+
platform_instance=self.__config.platform_instance,
|
|
1182
|
+
name=d.get_urn_part(),
|
|
1183
|
+
)
|
|
1184
|
+
edge = EdgeClass(
|
|
1185
|
+
destinationUrn=dashboard_urn,
|
|
1186
|
+
sourceUrn=None,
|
|
1187
|
+
created=None,
|
|
1188
|
+
lastModified=None,
|
|
1189
|
+
properties=None,
|
|
1190
|
+
)
|
|
1191
|
+
downstream_dashboards_edges.append(edge)
|
|
1192
|
+
|
|
1170
1193
|
# Let's convert report to datahub dashboard
|
|
1171
|
-
report_mcps = self.report_to_dashboard(
|
|
1194
|
+
report_mcps = self.report_to_dashboard(
|
|
1195
|
+
workspace, report, chart_mcps, user_mcps, downstream_dashboards_edges
|
|
1196
|
+
)
|
|
1172
1197
|
|
|
1173
1198
|
# Now add MCPs in sequence
|
|
1174
1199
|
mcps.extend(ds_mcps)
|
|
@@ -286,11 +286,15 @@ class Tile:
|
|
|
286
286
|
id: str
|
|
287
287
|
title: str
|
|
288
288
|
embedUrl: str
|
|
289
|
-
dataset: Optional["PowerBIDataset"]
|
|
290
289
|
dataset_id: Optional[str]
|
|
291
|
-
|
|
290
|
+
report_id: Optional[str]
|
|
292
291
|
createdFrom: CreatedFrom
|
|
293
292
|
|
|
293
|
+
# In a first pass, `dataset_id` and/or `report_id` are filled in.
|
|
294
|
+
# In a subsequent pass, the objects are populated.
|
|
295
|
+
dataset: Optional["PowerBIDataset"]
|
|
296
|
+
report: Optional[Report]
|
|
297
|
+
|
|
294
298
|
def get_urn_part(self):
|
|
295
299
|
return f"charts.{self.id}"
|
|
296
300
|
|
|
@@ -337,41 +337,6 @@ class DataResolverBase(ABC):
|
|
|
337
337
|
-tiles), there is no information available on pagination
|
|
338
338
|
|
|
339
339
|
"""
|
|
340
|
-
|
|
341
|
-
def new_dataset_or_report(tile_instance: Any) -> dict:
|
|
342
|
-
"""
|
|
343
|
-
Find out which is the data source for tile. It is either REPORT or DATASET
|
|
344
|
-
"""
|
|
345
|
-
report_fields = {
|
|
346
|
-
Constant.REPORT: (
|
|
347
|
-
self.get_report(
|
|
348
|
-
workspace=workspace,
|
|
349
|
-
report_id=tile_instance.get(Constant.REPORT_ID),
|
|
350
|
-
)
|
|
351
|
-
if tile_instance.get(Constant.REPORT_ID) is not None
|
|
352
|
-
else None
|
|
353
|
-
),
|
|
354
|
-
Constant.CREATED_FROM: Tile.CreatedFrom.UNKNOWN,
|
|
355
|
-
}
|
|
356
|
-
|
|
357
|
-
# reportId and datasetId are exclusive in tile_instance
|
|
358
|
-
# if datasetId is present that means tile is created from dataset
|
|
359
|
-
# if reportId is present that means tile is created from report
|
|
360
|
-
# if both i.e. reportId and datasetId are not present then tile is created from some visualization
|
|
361
|
-
if tile_instance.get(Constant.REPORT_ID) is not None:
|
|
362
|
-
report_fields[Constant.CREATED_FROM] = Tile.CreatedFrom.REPORT
|
|
363
|
-
elif tile_instance.get(Constant.DATASET_ID) is not None:
|
|
364
|
-
report_fields[Constant.CREATED_FROM] = Tile.CreatedFrom.DATASET
|
|
365
|
-
else:
|
|
366
|
-
report_fields[Constant.CREATED_FROM] = Tile.CreatedFrom.VISUALIZATION
|
|
367
|
-
|
|
368
|
-
title: Optional[str] = tile_instance.get(Constant.TITLE)
|
|
369
|
-
_id: Optional[str] = tile_instance.get(Constant.ID)
|
|
370
|
-
created_from: Any = report_fields[Constant.CREATED_FROM]
|
|
371
|
-
logger.info(f"Tile {title}({_id}) is created from {created_from}")
|
|
372
|
-
|
|
373
|
-
return report_fields
|
|
374
|
-
|
|
375
340
|
tile_list_endpoint: str = self.get_tiles_endpoint(
|
|
376
341
|
workspace, dashboard_id=dashboard.id
|
|
377
342
|
)
|
|
@@ -393,8 +358,18 @@ class DataResolverBase(ABC):
|
|
|
393
358
|
title=instance.get(Constant.TITLE),
|
|
394
359
|
embedUrl=instance.get(Constant.EMBED_URL),
|
|
395
360
|
dataset_id=instance.get(Constant.DATASET_ID),
|
|
361
|
+
report_id=instance.get(Constant.REPORT_ID),
|
|
396
362
|
dataset=None,
|
|
397
|
-
|
|
363
|
+
report=None,
|
|
364
|
+
createdFrom=(
|
|
365
|
+
# In the past we considered that only one of the two report_id or dataset_id would be present
|
|
366
|
+
# but we have seen cases where both are present. If both are present, we prioritize the report.
|
|
367
|
+
Tile.CreatedFrom.REPORT
|
|
368
|
+
if instance.get(Constant.REPORT_ID)
|
|
369
|
+
else Tile.CreatedFrom.DATASET
|
|
370
|
+
if instance.get(Constant.DATASET_ID)
|
|
371
|
+
else Tile.CreatedFrom.VISUALIZATION
|
|
372
|
+
),
|
|
398
373
|
)
|
|
399
374
|
for instance in tile_dict
|
|
400
375
|
if instance is not None
|
|
@@ -625,13 +625,26 @@ class PowerBiAPI:
|
|
|
625
625
|
dashboard.tiles = self._get_resolver().get_tiles(
|
|
626
626
|
workspace, dashboard=dashboard
|
|
627
627
|
)
|
|
628
|
-
# set the dataset for tiles
|
|
628
|
+
# set the dataset and the report for tiles
|
|
629
629
|
for tile in dashboard.tiles:
|
|
630
|
+
# In Power BI, dashboards, reports, and datasets are tightly scoped to the workspace they belong to.
|
|
631
|
+
# https://learn.microsoft.com/en-us/power-bi/collaborate-share/service-new-workspaces
|
|
632
|
+
if tile.report_id:
|
|
633
|
+
tile.report = workspace.reports.get(tile.report_id)
|
|
634
|
+
if tile.report is None:
|
|
635
|
+
self.reporter.info(
|
|
636
|
+
title="Missing Report Lineage For Tile",
|
|
637
|
+
message="A Report reference that failed to be resolved. Please ensure that 'extract_reports' is set to True in the configuration.",
|
|
638
|
+
context=f"workspace-name: {workspace.name}, tile-name: {tile.title}, report-id: {tile.report_id}",
|
|
639
|
+
)
|
|
640
|
+
# However, semantic models (aka datasets) can be shared accross workspaces
|
|
641
|
+
# https://learn.microsoft.com/en-us/fabric/admin/portal-workspace#use-semantic-models-across-workspaces
|
|
642
|
+
# That's why the global 'dataset_registry' is required
|
|
630
643
|
if tile.dataset_id:
|
|
631
644
|
tile.dataset = self.dataset_registry.get(tile.dataset_id)
|
|
632
645
|
if tile.dataset is None:
|
|
633
646
|
self.reporter.info(
|
|
634
|
-
title="Missing Lineage For Tile",
|
|
647
|
+
title="Missing Dataset Lineage For Tile",
|
|
635
648
|
message="A cross-workspace reference that failed to be resolved. Please ensure that no global workspace is being filtered out due to the workspace_id_pattern.",
|
|
636
649
|
context=f"workspace-name: {workspace.name}, tile-name: {tile.title}, dataset-id: {tile.dataset_id}",
|
|
637
650
|
)
|
|
@@ -653,10 +666,10 @@ class PowerBiAPI:
|
|
|
653
666
|
for dashboard in workspace.dashboards.values():
|
|
654
667
|
dashboard.tags = workspace.dashboard_endorsements.get(dashboard.id, [])
|
|
655
668
|
|
|
669
|
+
# fill reports first since some dashboard may reference a report
|
|
670
|
+
fill_reports()
|
|
656
671
|
if self.__config.extract_dashboards:
|
|
657
672
|
fill_dashboards()
|
|
658
|
-
|
|
659
|
-
fill_reports()
|
|
660
673
|
fill_dashboard_tags()
|
|
661
674
|
self._fill_independent_datasets(workspace=workspace)
|
|
662
675
|
|
|
@@ -866,8 +866,21 @@ class S3Source(StatefulIngestionSourceBase):
|
|
|
866
866
|
Returns:
|
|
867
867
|
List[Folder]: A list of Folder objects representing the partitions found.
|
|
868
868
|
"""
|
|
869
|
+
|
|
870
|
+
def _is_allowed_path(path_spec_: PathSpec, s3_uri: str) -> bool:
|
|
871
|
+
allowed = path_spec_.allowed(s3_uri)
|
|
872
|
+
if not allowed:
|
|
873
|
+
logger.debug(f"File {s3_uri} not allowed and skipping")
|
|
874
|
+
self.report.report_file_dropped(s3_uri)
|
|
875
|
+
return allowed
|
|
876
|
+
|
|
877
|
+
s3_objects = (
|
|
878
|
+
obj
|
|
879
|
+
for obj in bucket.objects.filter(Prefix=prefix).page_size(PAGE_SIZE)
|
|
880
|
+
if _is_allowed_path(path_spec, f"s3://{obj.bucket_name}/{obj.key}")
|
|
881
|
+
)
|
|
882
|
+
|
|
869
883
|
partitions: List[Folder] = []
|
|
870
|
-
s3_objects = bucket.objects.filter(Prefix=prefix).page_size(PAGE_SIZE)
|
|
871
884
|
grouped_s3_objects_by_dirname = groupby_unsorted(
|
|
872
885
|
s3_objects,
|
|
873
886
|
key=lambda obj: obj.key.rsplit("/", 1)[0],
|
|
@@ -878,10 +891,6 @@ class S3Source(StatefulIngestionSourceBase):
|
|
|
878
891
|
modification_time = None
|
|
879
892
|
|
|
880
893
|
for item in group:
|
|
881
|
-
file_path = self.create_s3_path(item.bucket_name, item.key)
|
|
882
|
-
if not path_spec.allowed(file_path):
|
|
883
|
-
logger.debug(f"File {file_path} not allowed and skipping")
|
|
884
|
-
continue
|
|
885
894
|
file_size += item.size
|
|
886
895
|
if creation_time is None or item.last_modified < creation_time:
|
|
887
896
|
creation_time = item.last_modified
|
|
@@ -98,6 +98,11 @@ class SnowflakeFilterConfig(SQLFilterConfig):
|
|
|
98
98
|
)
|
|
99
99
|
# table_pattern and view_pattern are inherited from SQLFilterConfig
|
|
100
100
|
|
|
101
|
+
stream_pattern: AllowDenyPattern = Field(
|
|
102
|
+
default=AllowDenyPattern.allow_all(),
|
|
103
|
+
description="Regex patterns for streams to filter in ingestion. Note: Defaults to table_pattern if not specified. Specify regex to match the entire view name in database.schema.view format. e.g. to match all views starting with customer in Customer database and public schema, use the regex 'Customer.public.customer.*'",
|
|
104
|
+
)
|
|
105
|
+
|
|
101
106
|
match_fully_qualified_names: bool = Field(
|
|
102
107
|
default=False,
|
|
103
108
|
description="Whether `schema_pattern` is matched against fully qualified schema name `<catalog>.<schema>`.",
|
|
@@ -274,6 +279,11 @@ class SnowflakeV2Config(
|
|
|
274
279
|
description="List of regex patterns for tags to include in ingestion. Only used if `extract_tags` is enabled.",
|
|
275
280
|
)
|
|
276
281
|
|
|
282
|
+
include_streams: bool = Field(
|
|
283
|
+
default=True,
|
|
284
|
+
description="If enabled, streams will be ingested as separate entities from tables/views.",
|
|
285
|
+
)
|
|
286
|
+
|
|
277
287
|
structured_property_pattern: AllowDenyPattern = Field(
|
|
278
288
|
default=AllowDenyPattern.allow_all(),
|
|
279
289
|
description=(
|
|
@@ -49,6 +49,7 @@ from datahub.metadata.urns import CorpUserUrn
|
|
|
49
49
|
from datahub.sql_parsing.schema_resolver import SchemaResolver
|
|
50
50
|
from datahub.sql_parsing.sql_parsing_aggregator import (
|
|
51
51
|
KnownLineageMapping,
|
|
52
|
+
ObservedQuery,
|
|
52
53
|
PreparsedQuery,
|
|
53
54
|
SqlAggregatorReport,
|
|
54
55
|
SqlParsingAggregator,
|
|
@@ -241,7 +242,13 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
|
|
|
241
242
|
use_cached_audit_log = audit_log_file.exists()
|
|
242
243
|
|
|
243
244
|
queries: FileBackedList[
|
|
244
|
-
Union[
|
|
245
|
+
Union[
|
|
246
|
+
KnownLineageMapping,
|
|
247
|
+
PreparsedQuery,
|
|
248
|
+
TableRename,
|
|
249
|
+
TableSwap,
|
|
250
|
+
ObservedQuery,
|
|
251
|
+
]
|
|
245
252
|
]
|
|
246
253
|
if use_cached_audit_log:
|
|
247
254
|
logger.info("Using cached audit log")
|
|
@@ -252,7 +259,13 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
|
|
|
252
259
|
|
|
253
260
|
shared_connection = ConnectionWrapper(audit_log_file)
|
|
254
261
|
queries = FileBackedList(shared_connection)
|
|
255
|
-
entry: Union[
|
|
262
|
+
entry: Union[
|
|
263
|
+
KnownLineageMapping,
|
|
264
|
+
PreparsedQuery,
|
|
265
|
+
TableRename,
|
|
266
|
+
TableSwap,
|
|
267
|
+
ObservedQuery,
|
|
268
|
+
]
|
|
256
269
|
|
|
257
270
|
with self.report.copy_history_fetch_timer:
|
|
258
271
|
for entry in self.fetch_copy_history():
|
|
@@ -329,7 +342,7 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
|
|
|
329
342
|
|
|
330
343
|
def fetch_query_log(
|
|
331
344
|
self, users: UsersMapping
|
|
332
|
-
) -> Iterable[Union[PreparsedQuery, TableRename, TableSwap]]:
|
|
345
|
+
) -> Iterable[Union[PreparsedQuery, TableRename, TableSwap, ObservedQuery]]:
|
|
333
346
|
query_log_query = _build_enriched_query_log_query(
|
|
334
347
|
start_time=self.config.window.start_time,
|
|
335
348
|
end_time=self.config.window.end_time,
|
|
@@ -362,7 +375,7 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
|
|
|
362
375
|
|
|
363
376
|
def _parse_audit_log_row(
|
|
364
377
|
self, row: Dict[str, Any], users: UsersMapping
|
|
365
|
-
) -> Optional[Union[TableRename, TableSwap, PreparsedQuery]]:
|
|
378
|
+
) -> Optional[Union[TableRename, TableSwap, PreparsedQuery, ObservedQuery]]:
|
|
366
379
|
json_fields = {
|
|
367
380
|
"DIRECT_OBJECTS_ACCESSED",
|
|
368
381
|
"OBJECTS_MODIFIED",
|
|
@@ -398,6 +411,34 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
|
|
|
398
411
|
pass
|
|
399
412
|
else:
|
|
400
413
|
return None
|
|
414
|
+
|
|
415
|
+
user = CorpUserUrn(
|
|
416
|
+
self.identifiers.get_user_identifier(
|
|
417
|
+
res["user_name"], users.get(res["user_name"])
|
|
418
|
+
)
|
|
419
|
+
)
|
|
420
|
+
|
|
421
|
+
# Use direct_objects_accessed instead objects_modified
|
|
422
|
+
# objects_modified returns $SYS_VIEW_X with no mapping
|
|
423
|
+
has_stream_objects = any(
|
|
424
|
+
obj.get("objectDomain") == "Stream" for obj in direct_objects_accessed
|
|
425
|
+
)
|
|
426
|
+
|
|
427
|
+
# If a stream is used, default to query parsing.
|
|
428
|
+
if has_stream_objects:
|
|
429
|
+
logger.debug("Found matching stream object")
|
|
430
|
+
return ObservedQuery(
|
|
431
|
+
query=res["query_text"],
|
|
432
|
+
session_id=res["session_id"],
|
|
433
|
+
timestamp=res["query_start_time"].astimezone(timezone.utc),
|
|
434
|
+
user=user,
|
|
435
|
+
default_db=res["default_db"],
|
|
436
|
+
default_schema=res["default_schema"],
|
|
437
|
+
query_hash=get_query_fingerprint(
|
|
438
|
+
res["query_text"], self.identifiers.platform, fast=True
|
|
439
|
+
),
|
|
440
|
+
)
|
|
441
|
+
|
|
401
442
|
upstreams = []
|
|
402
443
|
column_usage = {}
|
|
403
444
|
|
|
@@ -460,12 +501,6 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
|
|
|
460
501
|
)
|
|
461
502
|
)
|
|
462
503
|
|
|
463
|
-
user = CorpUserUrn(
|
|
464
|
-
self.identifiers.get_user_identifier(
|
|
465
|
-
res["user_name"], users.get(res["user_name"])
|
|
466
|
-
)
|
|
467
|
-
)
|
|
468
|
-
|
|
469
504
|
timestamp: datetime = res["query_start_time"]
|
|
470
505
|
timestamp = timestamp.astimezone(timezone.utc)
|
|
471
506
|
|
|
@@ -9,6 +9,7 @@ from datahub.ingestion.source.snowflake.snowflake_config import (
|
|
|
9
9
|
from datahub.utilities.prefix_batch_builder import PrefixGroup
|
|
10
10
|
|
|
11
11
|
SHOW_VIEWS_MAX_PAGE_SIZE = 10000
|
|
12
|
+
SHOW_STREAM_MAX_PAGE_SIZE = 10000
|
|
12
13
|
|
|
13
14
|
|
|
14
15
|
def create_deny_regex_sql_filter(
|
|
@@ -36,6 +37,7 @@ class SnowflakeQuery:
|
|
|
36
37
|
SnowflakeObjectDomain.VIEW.capitalize(),
|
|
37
38
|
SnowflakeObjectDomain.MATERIALIZED_VIEW.capitalize(),
|
|
38
39
|
SnowflakeObjectDomain.ICEBERG_TABLE.capitalize(),
|
|
40
|
+
SnowflakeObjectDomain.STREAM.capitalize(),
|
|
39
41
|
}
|
|
40
42
|
|
|
41
43
|
ACCESS_HISTORY_TABLE_VIEW_DOMAINS_FILTER = "({})".format(
|
|
@@ -44,7 +46,8 @@ class SnowflakeQuery:
|
|
|
44
46
|
ACCESS_HISTORY_TABLE_DOMAINS_FILTER = (
|
|
45
47
|
"("
|
|
46
48
|
f"'{SnowflakeObjectDomain.TABLE.capitalize()}',"
|
|
47
|
-
f"'{SnowflakeObjectDomain.VIEW.capitalize()}'"
|
|
49
|
+
f"'{SnowflakeObjectDomain.VIEW.capitalize()}',"
|
|
50
|
+
f"'{SnowflakeObjectDomain.STREAM.capitalize()}',"
|
|
48
51
|
")"
|
|
49
52
|
)
|
|
50
53
|
|
|
@@ -963,3 +966,19 @@ WHERE table_schema='{schema_name}' AND {extra_clause}"""
|
|
|
963
966
|
@staticmethod
|
|
964
967
|
def get_all_users() -> str:
|
|
965
968
|
return """SELECT name as "NAME", email as "EMAIL" FROM SNOWFLAKE.ACCOUNT_USAGE.USERS"""
|
|
969
|
+
|
|
970
|
+
@staticmethod
|
|
971
|
+
def streams_for_database(
|
|
972
|
+
db_name: str,
|
|
973
|
+
limit: int = SHOW_STREAM_MAX_PAGE_SIZE,
|
|
974
|
+
stream_pagination_marker: Optional[str] = None,
|
|
975
|
+
) -> str:
|
|
976
|
+
# SHOW STREAMS can return a maximum of 10000 rows.
|
|
977
|
+
# https://docs.snowflake.com/en/sql-reference/sql/show-streams#usage-notes
|
|
978
|
+
assert limit <= SHOW_STREAM_MAX_PAGE_SIZE
|
|
979
|
+
|
|
980
|
+
# To work around this, we paginate through the results using the FROM clause.
|
|
981
|
+
from_clause = (
|
|
982
|
+
f"""FROM '{stream_pagination_marker}'""" if stream_pagination_marker else ""
|
|
983
|
+
)
|
|
984
|
+
return f"""SHOW STREAMS IN DATABASE {db_name} LIMIT {limit} {from_clause};"""
|