acryl-datahub 0.15.0.5rc10__py3-none-any.whl → 0.15.0.6rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (35) hide show
  1. {acryl_datahub-0.15.0.5rc10.dist-info → acryl_datahub-0.15.0.6rc2.dist-info}/METADATA +2482 -2482
  2. {acryl_datahub-0.15.0.5rc10.dist-info → acryl_datahub-0.15.0.6rc2.dist-info}/RECORD +35 -24
  3. datahub/_version.py +1 -1
  4. datahub/errors.py +35 -0
  5. datahub/ingestion/source/common/subtypes.py +1 -0
  6. datahub/ingestion/source/mongodb.py +17 -16
  7. datahub/ingestion/source/powerbi/config.py +1 -0
  8. datahub/ingestion/source/powerbi/powerbi.py +28 -3
  9. datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py +6 -2
  10. datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +11 -36
  11. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +17 -4
  12. datahub/ingestion/source/s3/source.py +14 -5
  13. datahub/ingestion/source/snowflake/constants.py +1 -0
  14. datahub/ingestion/source/snowflake/snowflake_config.py +10 -0
  15. datahub/ingestion/source/snowflake/snowflake_queries.py +45 -10
  16. datahub/ingestion/source/snowflake/snowflake_query.py +20 -1
  17. datahub/ingestion/source/snowflake/snowflake_report.py +6 -0
  18. datahub/ingestion/source/snowflake/snowflake_schema.py +108 -4
  19. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +298 -69
  20. datahub/ingestion/source/snowflake/snowflake_utils.py +17 -8
  21. datahub/ingestion/source/snowflake/snowflake_v2.py +15 -3
  22. datahub/sdk/__init__.py +33 -0
  23. datahub/sdk/_all_entities.py +15 -0
  24. datahub/sdk/_attribution.py +48 -0
  25. datahub/sdk/_entity.py +89 -0
  26. datahub/sdk/_shared.py +338 -0
  27. datahub/sdk/container.py +193 -0
  28. datahub/sdk/dataset.py +584 -0
  29. datahub/sdk/entity_client.py +115 -0
  30. datahub/sdk/main_client.py +56 -0
  31. datahub/sdk/resolver_client.py +101 -0
  32. {acryl_datahub-0.15.0.5rc10.dist-info → acryl_datahub-0.15.0.6rc2.dist-info}/LICENSE +0 -0
  33. {acryl_datahub-0.15.0.5rc10.dist-info → acryl_datahub-0.15.0.6rc2.dist-info}/WHEEL +0 -0
  34. {acryl_datahub-0.15.0.5rc10.dist-info → acryl_datahub-0.15.0.6rc2.dist-info}/entry_points.txt +0 -0
  35. {acryl_datahub-0.15.0.5rc10.dist-info → acryl_datahub-0.15.0.6rc2.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,8 @@
1
1
  datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
2
2
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
3
- datahub/_version.py,sha256=VqyyvoJV4bnpYg2UMs1kyEsTgRq0wVADin5Gxtvya04,325
3
+ datahub/_version.py,sha256=BhSRdcjgmWwrve2zKjSYh9Z1fk_7CHpbmijLhhWMrJM,324
4
4
  datahub/entrypoints.py,sha256=2TYgHhs3sCxJlojIHjqfxzt3_ImPwPzq4vBtsUuMqu4,8885
5
+ datahub/errors.py,sha256=w6h8b27j9XlmPbTwqpu7-wgiTrXlHzcnUOnJ_iOrwzo,520
5
6
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
7
  datahub/_codegen/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
8
  datahub/_codegen/aspect.py,sha256=PJRa-Z4ouXHq3OkulfyWhwZn-fFUBDK_UPvmqaWdbWk,1063
@@ -203,7 +204,7 @@ datahub/ingestion/source/ldap.py,sha256=CNr3foofIpoCXu_GGqfcajlQE2qkHr5isYwVcDut
203
204
  datahub/ingestion/source/metabase.py,sha256=m9Gfhrs8F1z23ci8CIxdE5cW--25stgxg_IQTKwkFrk,31532
204
205
  datahub/ingestion/source/mlflow.py,sha256=pmIkmsfidi7dOGdQ61rab7m8AnKZhIRE2IA9in9HGFU,12144
205
206
  datahub/ingestion/source/mode.py,sha256=HVxhzMIY4HjkAG_T6y00Po2B9XwjALP6i5XQThuyYM4,63488
206
- datahub/ingestion/source/mongodb.py,sha256=0P3PHVvMSXFkFimGvQzOQZF7APjsFOyzQAVQjVlVbuk,21172
207
+ datahub/ingestion/source/mongodb.py,sha256=2C2Cxn8DXL53IbNiywIuKt8UT_EMcPg9f8su-OPSNGU,21237
207
208
  datahub/ingestion/source/nifi.py,sha256=FgIbZSCu-mcdnbIpqwvmADnUIxptogUq1sSEkrkwtrc,56089
208
209
  datahub/ingestion/source/openapi.py,sha256=MGsRLseZompW10UVMN_tU1GZgqPgTAM4lnqCJ8eVRoY,17386
209
210
  datahub/ingestion/source/openapi_parser.py,sha256=1_68wHWe_SzWYEyC1YVDw9vxoadKjW1yv8DecvyIhwY,13606
@@ -266,7 +267,7 @@ datahub/ingestion/source/cassandra/cassandra_profiling.py,sha256=DkSIryZNwLei5Pa
266
267
  datahub/ingestion/source/cassandra/cassandra_utils.py,sha256=j-LidYkaCTmGnpUVNLsax_c3z32PsQbsbHeYojygd1s,5105
267
268
  datahub/ingestion/source/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
268
269
  datahub/ingestion/source/common/data_reader.py,sha256=XbSxiRTYrk6seOz0ZjVjzSpGvP8lEjmqXrNI4cdYYmQ,1819
269
- datahub/ingestion/source/common/subtypes.py,sha256=zxBQkRxsG_XMMz6Pmw_yMQiuFOhapOFVUOtXw8yHz7Q,2287
270
+ datahub/ingestion/source/common/subtypes.py,sha256=S0ssIxV7V38HGQwl-h5izYWyj1MQgmvJk4k_Q-5VGJ8,2329
270
271
  datahub/ingestion/source/data_lake_common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
271
272
  datahub/ingestion/source/data_lake_common/config.py,sha256=qUk83B01hjuBKHvVz8SmXnVCy5eFj-2-2QLEOrAdbgk,359
272
273
  datahub/ingestion/source/data_lake_common/data_lake_utils.py,sha256=nxu7osuzqxScPFc-1ODA2M1c_xPNPpRH_SMMU7zKOIE,6212
@@ -363,10 +364,10 @@ datahub/ingestion/source/metadata/lineage.py,sha256=XiZGuY6k3O9qBmgo7AzosIndJHwr
363
364
  datahub/ingestion/source/neo4j/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
364
365
  datahub/ingestion/source/neo4j/neo4j_source.py,sha256=76Z-2Td4_3PH2wWL1XJrpV2Egre5YVh6bMXeDS5ZonE,12405
365
366
  datahub/ingestion/source/powerbi/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
366
- datahub/ingestion/source/powerbi/config.py,sha256=DadG3Y3R-emmEL7vW2vutL3TXXVe-_t6DA_S2kWUvLA,22784
367
+ datahub/ingestion/source/powerbi/config.py,sha256=1WFK-JxcgBEIZ2XTwuH1PvNXYcwqEJR-IYTUTv3Z4o8,22820
367
368
  datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py,sha256=-njW1kJOy-LY5JFwJLhVQ0bMBj9NQz5TZhQqsSi_KsM,2285
368
369
  datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule,sha256=5df3qvalCS9hZ46DPXs6XDcw9-IofGf8Eol_rUC7LHI,20329
369
- datahub/ingestion/source/powerbi/powerbi.py,sha256=xCNMgL-KuPGpIFv_PP1woyiddY_PpbX1HEl3aDk7F1c,54535
370
+ datahub/ingestion/source/powerbi/powerbi.py,sha256=a4LG4pxrZ6N5I6HmKFCfv9HMyvl4ZChy2XbVY17E-To,55570
370
371
  datahub/ingestion/source/powerbi/m_query/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
371
372
  datahub/ingestion/source/powerbi/m_query/data_classes.py,sha256=EbaEasEOGZ73jz0cQofH9ez65wSvRBof0R6GQaIVLnM,2009
372
373
  datahub/ingestion/source/powerbi/m_query/native_sql_parser.py,sha256=zzKVDGeUM3Yv3-zNah4D6mSnr6jXsstNuLmzczcPQEE,3683
@@ -376,9 +377,9 @@ datahub/ingestion/source/powerbi/m_query/resolver.py,sha256=t0n1dDYjlzElSJo5ztea
376
377
  datahub/ingestion/source/powerbi/m_query/tree_function.py,sha256=h77DunhlgOP0fAg8UXDXxxInOi7Pay85_d1Ca4YqyKs,6134
377
378
  datahub/ingestion/source/powerbi/m_query/validator.py,sha256=crG-VZy2XPieiDliP9yVMgiFcc8b2xbZyDFEATXqEAQ,1155
378
379
  datahub/ingestion/source/powerbi/rest_api_wrapper/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
379
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py,sha256=kS337FgY-fLPjeRryQ-adVm1VAEThI88svii2Q9sGTc,8435
380
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py,sha256=eNKW9ShWJ5F3pKgTVQ6xc1H1rl-JBIy9ye1pq5C2Kb0,39598
381
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py,sha256=k8rP2uwXb6maS7VzprUcqr2ggjimz0tILVJezze0jyA,26441
380
+ datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py,sha256=4Kr7cLXpsWGtg-M18aXyhij9k9Ll5dGv3EaCS3d2DRk,8590
381
+ datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py,sha256=-EHDvVmr69bP11bFm0bW0Lf1I95lPHU7sdMX1Q70roI,38503
382
+ datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py,sha256=NrhgwREmkWTvlhpEs7dAEEJfOxQRalA02ArKr2LLjeY,27666
382
383
  datahub/ingestion/source/powerbi/rest_api_wrapper/profiling_utils.py,sha256=bgcPheyqOj6KdRjDyANDK5yggItglcBIjbGFIwAxSds,1392
383
384
  datahub/ingestion/source/powerbi/rest_api_wrapper/query.py,sha256=VNw1Uvli6g0pnu9FpigYmnCdEPbVEipz7vdZU_WmHf4,616
384
385
  datahub/ingestion/source/powerbi_report_server/__init__.py,sha256=N9fGcrHXBbuPmx9rpGjd_jkMC3smXmfiwISDP1QZapk,324
@@ -410,7 +411,7 @@ datahub/ingestion/source/s3/config.py,sha256=Zs1nrBZKLImteZreIcSMMRLj8vBGgxakNDs
410
411
  datahub/ingestion/source/s3/datalake_profiler_config.py,sha256=FfrcgK-JEF94vw-l3q6pN6FENXb-wZzW2w1VUZVkwW8,3620
411
412
  datahub/ingestion/source/s3/profiling.py,sha256=yKNCKpr6w7qpCH-baeSkNE9VjkN6eBot_weD-2_Jxzk,17579
412
413
  datahub/ingestion/source/s3/report.py,sha256=fzkTdTewYlWrTk4f2Cyl-e8RV4qw9wEVtm0cdKD-Xgw,542
413
- datahub/ingestion/source/s3/source.py,sha256=IE_K_HE_S7w8fpGPT8OptU5-VmwapntsI5PePv_wUQA,47412
414
+ datahub/ingestion/source/s3/source.py,sha256=JwEmVWDEFtPt6iMo82n5DQava8QAKXk_xYl01KAfdHk,47614
414
415
  datahub/ingestion/source/sac/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
415
416
  datahub/ingestion/source/sac/sac.py,sha256=zPSO9ukuyhvNaaVzeAYpA-_sFma_XMcCQMPaGvDWuTk,30226
416
417
  datahub/ingestion/source/sac/sac_common.py,sha256=-xQTDBtgH56AnpRXWGDnlmQqUuLRx-7wF1U1kQFWtX8,998
@@ -432,26 +433,26 @@ datahub/ingestion/source/sigma/sigma_api.py,sha256=SVvbUs2vjueUdDa-3FzeMsaX5pNpA
432
433
  datahub/ingestion/source/slack/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
433
434
  datahub/ingestion/source/slack/slack.py,sha256=VpLS-6zuQa8hIuHnZhLf8wRdN72Xell3ZMd0kK3A0i8,13188
434
435
  datahub/ingestion/source/snowflake/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
435
- datahub/ingestion/source/snowflake/constants.py,sha256=22n-0r04nuy-ImxWFFpmbrt_GrNdxV9WZKri7rmtrpQ,2628
436
+ datahub/ingestion/source/snowflake/constants.py,sha256=SZzQTO6spPeHfV55tIbg7SL2ecsQF_z169HJ8u2G3Hk,2650
436
437
  datahub/ingestion/source/snowflake/oauth_config.py,sha256=ol9D3RmruGStJAeL8PYSQguSqcD2HfkjPkMF2AB_eZs,1277
437
438
  datahub/ingestion/source/snowflake/oauth_generator.py,sha256=fu2VnREGuJXeTqIV2jx4TwieVnznf83HQkrE0h2DGGM,3423
438
439
  datahub/ingestion/source/snowflake/snowflake_assertion.py,sha256=_l3k4aI9wvioE81xxdeizJn9nJCZ_nMIXgk9N6pEk5o,4803
439
- datahub/ingestion/source/snowflake/snowflake_config.py,sha256=Y3LoqBavhc3Cm0nyAr3fnd_-i4gReDfaAuUdp7EgwPQ,19603
440
+ datahub/ingestion/source/snowflake/snowflake_config.py,sha256=c9LE7nl6tkz7P9tc4EkSKphextW5pejLzdP3qS_iL1s,20196
440
441
  datahub/ingestion/source/snowflake/snowflake_connection.py,sha256=e9dCARIQtGB8G1cSMRLorCbNLcPUD2g9gBL-LLLKjFE,17793
441
442
  datahub/ingestion/source/snowflake/snowflake_data_reader.py,sha256=ffR5E2uhD71FUMXd3XOg2rHwrp1rbbGEFTAbqKcmI2s,2195
442
443
  datahub/ingestion/source/snowflake/snowflake_lineage_v2.py,sha256=FBmiONx4EGHWV8RNJT6zHZyntKinPFFyd2oKbTUIbhE,21319
443
444
  datahub/ingestion/source/snowflake/snowflake_profiler.py,sha256=0DJiSwII6FY34urlBja2FW66NaVvhbBWmG0p7u8Xyrc,7548
444
- datahub/ingestion/source/snowflake/snowflake_queries.py,sha256=x6__7kmlIKXdnvENyN9AloE9h-vOlrjcWL95A2DGW5g,26968
445
- datahub/ingestion/source/snowflake/snowflake_query.py,sha256=-vjc2-sGKN0odt-IWEbx6Lhz4UlRwctlEHUnOr3_Mkg,38821
446
- datahub/ingestion/source/snowflake/snowflake_report.py,sha256=xSRNSG_iZcLTf9inNtlCQTDSNiDYm-jGpvAAGrRMTWI,6454
447
- datahub/ingestion/source/snowflake/snowflake_schema.py,sha256=qG9MXutXcVyp5p59j2CPsj1zRPTdeActEqVpwlsgMKk,22217
448
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py,sha256=c2PTnsMDD21qw_71T96xi9ylMpAXnTEyA1SK4qq528w,46105
445
+ datahub/ingestion/source/snowflake/snowflake_queries.py,sha256=M-FBoYeiW91-g3gOUpCTj8cKWHH-wqyFtD5UcewfI2k,28121
446
+ datahub/ingestion/source/snowflake/snowflake_query.py,sha256=Ex9FZZzz02cQis4bV3tzd53Pmf8p3AreuWnv9w95pJ0,39642
447
+ datahub/ingestion/source/snowflake/snowflake_report.py,sha256=ahea-bwpW6T0iDehGo0Qq_J7wKxPkV61aYHm8bGwDqo,6651
448
+ datahub/ingestion/source/snowflake/snowflake_schema.py,sha256=GFgcKV5T6VHyNwPBzzw_f8cWA9YFlWug0m6nkLoGXus,25979
449
+ datahub/ingestion/source/snowflake/snowflake_schema_gen.py,sha256=Yba6OIWYtukAFMoNtEtX2BXWwJee17Dl58DUyK0myho,54530
449
450
  datahub/ingestion/source/snowflake/snowflake_shares.py,sha256=maZyFkfrbVogEFM0tTKRiNp9c_1muv6YfleSd3q0umI,6341
450
451
  datahub/ingestion/source/snowflake/snowflake_summary.py,sha256=kTmuCtRnvHqM8WBYhWeK4XafJq3ssFL9kcS03jEeWT4,5506
451
452
  datahub/ingestion/source/snowflake/snowflake_tag.py,sha256=1eLYTcgmfzDs9xktMTTE74L5SeNP48Qg3uLr9y-Ez3Y,8733
452
453
  datahub/ingestion/source/snowflake/snowflake_usage_v2.py,sha256=ySFm7WDk8FW9KjCnX4HQfTqObIrlUS-V8WIHl3j0CTI,24848
453
- datahub/ingestion/source/snowflake/snowflake_utils.py,sha256=xq58c47zmaQPkTVqjKW25iViX8VJuHdQDTFY4jxzZ2o,12778
454
- datahub/ingestion/source/snowflake/snowflake_v2.py,sha256=NidPSzXh2UajcvgeDoTmk31UW1dAeQBCCFjumZajzcI,33524
454
+ datahub/ingestion/source/snowflake/snowflake_utils.py,sha256=EmYb2FEcdLwei92atRBQ3iKH7av4YBZCIFTgPmLo0Ng,13092
455
+ datahub/ingestion/source/snowflake/snowflake_v2.py,sha256=kx8aFalByIAfrp0a_kq5iyojzh9vI4od5eVGthAR5RY,33912
455
456
  datahub/ingestion/source/sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
456
457
  datahub/ingestion/source/sql/athena.py,sha256=Uh9wGLOqAkcphffxOPIQNyXvjeRm74XIpaLb4rjqMjM,24045
457
458
  datahub/ingestion/source/sql/clickhouse.py,sha256=uSRy-HKAiGFTHVLoVtGoh23X0O1lwyYUaK8BaWkYhps,25555
@@ -869,6 +870,16 @@ datahub/metadata/schemas/VersionSetKey.avsc,sha256=psjGNNcFua3Zs9Xlh4HnUHNmBEU74
869
870
  datahub/metadata/schemas/VersionSetProperties.avsc,sha256=yrhhVNioD11nFlDO7IfUbxAQjhA9Tr-4wnAYH5I9W74,1172
870
871
  datahub/metadata/schemas/ViewProperties.avsc,sha256=3HhcbH5493dJUnEUtFMYMVfbYQ52aDedm5L4j77Nym4,1032
871
872
  datahub/metadata/schemas/__init__.py,sha256=uvLNC3VyCkWA_v8e9FdA1leFf46NFKDD0AajCfihepI,581
873
+ datahub/sdk/__init__.py,sha256=fYD-f338EW5WPFW2NSiirMAsHkNgZfolIvneM7yxgBk,977
874
+ datahub/sdk/_all_entities.py,sha256=0XFtmgeEtrWOXy_oWcwqrtvfvzo8obPIq3Z1fEr5-34,400
875
+ datahub/sdk/_attribution.py,sha256=05iNVT_IDO7aU3vU6dJKCF8EoDAI1mwh4mAg_EBf3RY,1121
876
+ datahub/sdk/_entity.py,sha256=A_AWqNjaV6Y1YvmIiPNm0UyC0aO52G_L-iwgp9XEtYs,3225
877
+ datahub/sdk/_shared.py,sha256=FIEcYGm3LgRKLqn_i1FDePaqvMljkXIA9f3RMqaF1kY,11137
878
+ datahub/sdk/container.py,sha256=6Y19pQLN5vXGM86TEKW7ItTBwE3S0U4BsAMilWhyXqw,6511
879
+ datahub/sdk/dataset.py,sha256=BydV2papBEYL2de72UMNjSkAbsDNpVlXqiEkMlXJiLY,21514
880
+ datahub/sdk/entity_client.py,sha256=DcHytfCM8X9J6mm_QXzFR-2vDQa88I9Q2ktSNC2oSUI,4277
881
+ datahub/sdk/main_client.py,sha256=IKKzBMiKtT5zIMEHrvaMyxJ7DYBVNpqua70J0Ydl74Y,2068
882
+ datahub/sdk/resolver_client.py,sha256=UxI0bwg0ITm9dWPmAAKxkTvpLiGACtSCuEDNxLJipjs,3395
872
883
  datahub/secret/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
873
884
  datahub/secret/datahub_secret_store.py,sha256=9u9S87-15jwhj4h0EsAVIMdQLgvstKc8voQux2slxgU,2477
874
885
  datahub/secret/datahub_secrets_client.py,sha256=nDmhziKdvseJHlaDVUcAwK8Fv8maeAaG-ktZtWG2b70,1316
@@ -1001,9 +1012,9 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
1001
1012
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
1002
1013
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
1003
1014
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
1004
- acryl_datahub-0.15.0.5rc10.dist-info/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
1005
- acryl_datahub-0.15.0.5rc10.dist-info/METADATA,sha256=9INBTHoM-5OTwef7bW9Y31njlOljSaconFYwwbNHeSc,175378
1006
- acryl_datahub-0.15.0.5rc10.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
1007
- acryl_datahub-0.15.0.5rc10.dist-info/entry_points.txt,sha256=U1e5ZwqPX1OaIbvGrwvozcdB8SbzFYXQM7plpdLKKeo,9592
1008
- acryl_datahub-0.15.0.5rc10.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1009
- acryl_datahub-0.15.0.5rc10.dist-info/RECORD,,
1015
+ acryl_datahub-0.15.0.6rc2.dist-info/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
1016
+ acryl_datahub-0.15.0.6rc2.dist-info/METADATA,sha256=Bv5h1BaxWarbwfDdiiaD1wwyonRclpbgYtingIwN4B4,175375
1017
+ acryl_datahub-0.15.0.6rc2.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
1018
+ acryl_datahub-0.15.0.6rc2.dist-info/entry_points.txt,sha256=U1e5ZwqPX1OaIbvGrwvozcdB8SbzFYXQM7plpdLKKeo,9592
1019
+ acryl_datahub-0.15.0.6rc2.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1020
+ acryl_datahub-0.15.0.6rc2.dist-info/RECORD,,
datahub/_version.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Published at https://pypi.org/project/acryl-datahub/.
2
2
  __package_name__ = "acryl-datahub"
3
- __version__ = "0.15.0.5rc10"
3
+ __version__ = "0.15.0.6rc2"
4
4
 
5
5
 
6
6
  def is_dev_mode() -> bool:
datahub/errors.py ADDED
@@ -0,0 +1,35 @@
1
+ from datahub.configuration.common import MetaError
2
+
3
+ # TODO: Move all other error types to this file.
4
+
5
+
6
+ class SdkUsageError(MetaError):
7
+ pass
8
+
9
+
10
+ class AlreadyExistsError(SdkUsageError):
11
+ pass
12
+
13
+
14
+ class ItemNotFoundError(SdkUsageError):
15
+ pass
16
+
17
+
18
+ class MultipleItemsFoundError(SdkUsageError):
19
+ pass
20
+
21
+
22
+ class SchemaFieldKeyError(SdkUsageError, KeyError):
23
+ pass
24
+
25
+
26
+ class IngestionAttributionWarning(Warning):
27
+ pass
28
+
29
+
30
+ class MultipleSubtypesWarning(Warning):
31
+ pass
32
+
33
+
34
+ class ExperimentalWarning(Warning):
35
+ pass
@@ -24,6 +24,7 @@ class DatasetSubTypes(StrEnum):
24
24
  SAC_LIVE_DATA_MODEL = "Live Data Model"
25
25
  NEO4J_NODE = "Neo4j Node"
26
26
  NEO4J_RELATIONSHIP = "Neo4j Relationship"
27
+ SNOWFLAKE_STREAM = "Snowflake Stream"
27
28
 
28
29
  # TODO: Create separate entity...
29
30
  NOTEBOOK = "Notebook"
@@ -219,26 +219,27 @@ def construct_schema_pymongo(
219
219
  """
220
220
 
221
221
  aggregations: List[Dict] = []
222
+
223
+ # The order of the aggregations impacts execution time. By setting the sample/limit aggregation first,
224
+ # the subsequent aggregations process a much smaller dataset, improving performance.
225
+ if sample_size:
226
+ if use_random_sampling:
227
+ aggregations.append({"$sample": {"size": sample_size}})
228
+ else:
229
+ aggregations.append({"$limit": sample_size})
230
+
222
231
  if should_add_document_size_filter:
223
232
  doc_size_field = "temporary_doc_size_field"
224
233
  # create a temporary field to store the size of the document. filter on it and then remove it.
225
- aggregations = [
226
- {"$addFields": {doc_size_field: {"$bsonSize": "$$ROOT"}}},
227
- {"$match": {doc_size_field: {"$lt": max_document_size}}},
228
- {"$project": {doc_size_field: 0}},
229
- ]
230
- if use_random_sampling:
231
- # get sample documents in collection
232
- if sample_size:
233
- aggregations.append({"$sample": {"size": sample_size}})
234
- documents = collection.aggregate(
235
- aggregations,
236
- allowDiskUse=True,
234
+ aggregations.extend(
235
+ [
236
+ {"$addFields": {doc_size_field: {"$bsonSize": "$$ROOT"}}},
237
+ {"$match": {doc_size_field: {"$lt": max_document_size}}},
238
+ {"$project": {doc_size_field: 0}},
239
+ ]
237
240
  )
238
- else:
239
- if sample_size:
240
- aggregations.append({"$limit": sample_size})
241
- documents = collection.aggregate(aggregations, allowDiskUse=True)
241
+
242
+ documents = collection.aggregate(aggregations, allowDiskUse=True)
242
243
 
243
244
  return construct_schema(list(documents), delimiter)
244
245
 
@@ -132,6 +132,7 @@ class Constant:
132
132
  ACTIVE = "Active"
133
133
  SQL_PARSING_FAILURE = "SQL Parsing Failure"
134
134
  M_QUERY_NULL = '"null"'
135
+ REPORT_WEB_URL = "reportWebUrl"
135
136
 
136
137
 
137
138
  @dataclass
@@ -582,8 +582,11 @@ class Mapper:
582
582
  if tile.dataset is not None and tile.dataset.webUrl is not None:
583
583
  custom_properties[Constant.DATASET_WEB_URL] = tile.dataset.webUrl
584
584
 
585
- if tile.report is not None and tile.report.id is not None:
586
- custom_properties[Constant.REPORT_ID] = tile.report.id
585
+ if tile.report_id is not None:
586
+ custom_properties[Constant.REPORT_ID] = tile.report_id
587
+
588
+ if tile.report is not None and tile.report.webUrl is not None:
589
+ custom_properties[Constant.REPORT_WEB_URL] = tile.report.webUrl
587
590
 
588
591
  return custom_properties
589
592
 
@@ -1053,6 +1056,7 @@ class Mapper:
1053
1056
  report: powerbi_data_classes.Report,
1054
1057
  chart_mcps: List[MetadataChangeProposalWrapper],
1055
1058
  user_mcps: List[MetadataChangeProposalWrapper],
1059
+ dashboard_edges: List[EdgeClass],
1056
1060
  ) -> List[MetadataChangeProposalWrapper]:
1057
1061
  """
1058
1062
  Map PowerBi report to Datahub dashboard
@@ -1074,6 +1078,7 @@ class Mapper:
1074
1078
  charts=chart_urn_list,
1075
1079
  lastModified=ChangeAuditStamps(),
1076
1080
  dashboardUrl=report.webUrl,
1081
+ dashboards=dashboard_edges,
1077
1082
  )
1078
1083
 
1079
1084
  info_mcp = self.new_mcp(
@@ -1167,8 +1172,28 @@ class Mapper:
1167
1172
  ds_mcps = self.to_datahub_dataset(report.dataset, workspace)
1168
1173
  chart_mcps = self.pages_to_chart(report.pages, workspace, ds_mcps)
1169
1174
 
1175
+ # find all dashboards with a Tile referencing this report
1176
+ downstream_dashboards_edges = []
1177
+ for d in workspace.dashboards.values():
1178
+ if any(t.report_id == report.id for t in d.tiles):
1179
+ dashboard_urn = builder.make_dashboard_urn(
1180
+ platform=self.__config.platform_name,
1181
+ platform_instance=self.__config.platform_instance,
1182
+ name=d.get_urn_part(),
1183
+ )
1184
+ edge = EdgeClass(
1185
+ destinationUrn=dashboard_urn,
1186
+ sourceUrn=None,
1187
+ created=None,
1188
+ lastModified=None,
1189
+ properties=None,
1190
+ )
1191
+ downstream_dashboards_edges.append(edge)
1192
+
1170
1193
  # Let's convert report to datahub dashboard
1171
- report_mcps = self.report_to_dashboard(workspace, report, chart_mcps, user_mcps)
1194
+ report_mcps = self.report_to_dashboard(
1195
+ workspace, report, chart_mcps, user_mcps, downstream_dashboards_edges
1196
+ )
1172
1197
 
1173
1198
  # Now add MCPs in sequence
1174
1199
  mcps.extend(ds_mcps)
@@ -286,11 +286,15 @@ class Tile:
286
286
  id: str
287
287
  title: str
288
288
  embedUrl: str
289
- dataset: Optional["PowerBIDataset"]
290
289
  dataset_id: Optional[str]
291
- report: Optional[Report]
290
+ report_id: Optional[str]
292
291
  createdFrom: CreatedFrom
293
292
 
293
+ # In a first pass, `dataset_id` and/or `report_id` are filled in.
294
+ # In a subsequent pass, the objects are populated.
295
+ dataset: Optional["PowerBIDataset"]
296
+ report: Optional[Report]
297
+
294
298
  def get_urn_part(self):
295
299
  return f"charts.{self.id}"
296
300
 
@@ -337,41 +337,6 @@ class DataResolverBase(ABC):
337
337
  -tiles), there is no information available on pagination
338
338
 
339
339
  """
340
-
341
- def new_dataset_or_report(tile_instance: Any) -> dict:
342
- """
343
- Find out which is the data source for tile. It is either REPORT or DATASET
344
- """
345
- report_fields = {
346
- Constant.REPORT: (
347
- self.get_report(
348
- workspace=workspace,
349
- report_id=tile_instance.get(Constant.REPORT_ID),
350
- )
351
- if tile_instance.get(Constant.REPORT_ID) is not None
352
- else None
353
- ),
354
- Constant.CREATED_FROM: Tile.CreatedFrom.UNKNOWN,
355
- }
356
-
357
- # reportId and datasetId are exclusive in tile_instance
358
- # if datasetId is present that means tile is created from dataset
359
- # if reportId is present that means tile is created from report
360
- # if both i.e. reportId and datasetId are not present then tile is created from some visualization
361
- if tile_instance.get(Constant.REPORT_ID) is not None:
362
- report_fields[Constant.CREATED_FROM] = Tile.CreatedFrom.REPORT
363
- elif tile_instance.get(Constant.DATASET_ID) is not None:
364
- report_fields[Constant.CREATED_FROM] = Tile.CreatedFrom.DATASET
365
- else:
366
- report_fields[Constant.CREATED_FROM] = Tile.CreatedFrom.VISUALIZATION
367
-
368
- title: Optional[str] = tile_instance.get(Constant.TITLE)
369
- _id: Optional[str] = tile_instance.get(Constant.ID)
370
- created_from: Any = report_fields[Constant.CREATED_FROM]
371
- logger.info(f"Tile {title}({_id}) is created from {created_from}")
372
-
373
- return report_fields
374
-
375
340
  tile_list_endpoint: str = self.get_tiles_endpoint(
376
341
  workspace, dashboard_id=dashboard.id
377
342
  )
@@ -393,8 +358,18 @@ class DataResolverBase(ABC):
393
358
  title=instance.get(Constant.TITLE),
394
359
  embedUrl=instance.get(Constant.EMBED_URL),
395
360
  dataset_id=instance.get(Constant.DATASET_ID),
361
+ report_id=instance.get(Constant.REPORT_ID),
396
362
  dataset=None,
397
- **new_dataset_or_report(instance),
363
+ report=None,
364
+ createdFrom=(
365
+ # In the past we considered that only one of the two report_id or dataset_id would be present
366
+ # but we have seen cases where both are present. If both are present, we prioritize the report.
367
+ Tile.CreatedFrom.REPORT
368
+ if instance.get(Constant.REPORT_ID)
369
+ else Tile.CreatedFrom.DATASET
370
+ if instance.get(Constant.DATASET_ID)
371
+ else Tile.CreatedFrom.VISUALIZATION
372
+ ),
398
373
  )
399
374
  for instance in tile_dict
400
375
  if instance is not None
@@ -625,13 +625,26 @@ class PowerBiAPI:
625
625
  dashboard.tiles = self._get_resolver().get_tiles(
626
626
  workspace, dashboard=dashboard
627
627
  )
628
- # set the dataset for tiles
628
+ # set the dataset and the report for tiles
629
629
  for tile in dashboard.tiles:
630
+ # In Power BI, dashboards, reports, and datasets are tightly scoped to the workspace they belong to.
631
+ # https://learn.microsoft.com/en-us/power-bi/collaborate-share/service-new-workspaces
632
+ if tile.report_id:
633
+ tile.report = workspace.reports.get(tile.report_id)
634
+ if tile.report is None:
635
+ self.reporter.info(
636
+ title="Missing Report Lineage For Tile",
637
+ message="A Report reference that failed to be resolved. Please ensure that 'extract_reports' is set to True in the configuration.",
638
+ context=f"workspace-name: {workspace.name}, tile-name: {tile.title}, report-id: {tile.report_id}",
639
+ )
640
+ # However, semantic models (aka datasets) can be shared accross workspaces
641
+ # https://learn.microsoft.com/en-us/fabric/admin/portal-workspace#use-semantic-models-across-workspaces
642
+ # That's why the global 'dataset_registry' is required
630
643
  if tile.dataset_id:
631
644
  tile.dataset = self.dataset_registry.get(tile.dataset_id)
632
645
  if tile.dataset is None:
633
646
  self.reporter.info(
634
- title="Missing Lineage For Tile",
647
+ title="Missing Dataset Lineage For Tile",
635
648
  message="A cross-workspace reference that failed to be resolved. Please ensure that no global workspace is being filtered out due to the workspace_id_pattern.",
636
649
  context=f"workspace-name: {workspace.name}, tile-name: {tile.title}, dataset-id: {tile.dataset_id}",
637
650
  )
@@ -653,10 +666,10 @@ class PowerBiAPI:
653
666
  for dashboard in workspace.dashboards.values():
654
667
  dashboard.tags = workspace.dashboard_endorsements.get(dashboard.id, [])
655
668
 
669
+ # fill reports first since some dashboard may reference a report
670
+ fill_reports()
656
671
  if self.__config.extract_dashboards:
657
672
  fill_dashboards()
658
-
659
- fill_reports()
660
673
  fill_dashboard_tags()
661
674
  self._fill_independent_datasets(workspace=workspace)
662
675
 
@@ -866,8 +866,21 @@ class S3Source(StatefulIngestionSourceBase):
866
866
  Returns:
867
867
  List[Folder]: A list of Folder objects representing the partitions found.
868
868
  """
869
+
870
+ def _is_allowed_path(path_spec_: PathSpec, s3_uri: str) -> bool:
871
+ allowed = path_spec_.allowed(s3_uri)
872
+ if not allowed:
873
+ logger.debug(f"File {s3_uri} not allowed and skipping")
874
+ self.report.report_file_dropped(s3_uri)
875
+ return allowed
876
+
877
+ s3_objects = (
878
+ obj
879
+ for obj in bucket.objects.filter(Prefix=prefix).page_size(PAGE_SIZE)
880
+ if _is_allowed_path(path_spec, f"s3://{obj.bucket_name}/{obj.key}")
881
+ )
882
+
869
883
  partitions: List[Folder] = []
870
- s3_objects = bucket.objects.filter(Prefix=prefix).page_size(PAGE_SIZE)
871
884
  grouped_s3_objects_by_dirname = groupby_unsorted(
872
885
  s3_objects,
873
886
  key=lambda obj: obj.key.rsplit("/", 1)[0],
@@ -878,10 +891,6 @@ class S3Source(StatefulIngestionSourceBase):
878
891
  modification_time = None
879
892
 
880
893
  for item in group:
881
- file_path = self.create_s3_path(item.bucket_name, item.key)
882
- if not path_spec.allowed(file_path):
883
- logger.debug(f"File {file_path} not allowed and skipping")
884
- continue
885
894
  file_size += item.size
886
895
  if creation_time is None or item.last_modified < creation_time:
887
896
  creation_time = item.last_modified
@@ -53,6 +53,7 @@ class SnowflakeObjectDomain(StrEnum):
53
53
  SCHEMA = "schema"
54
54
  COLUMN = "column"
55
55
  ICEBERG_TABLE = "iceberg table"
56
+ STREAM = "stream"
56
57
 
57
58
 
58
59
  GENERIC_PERMISSION_ERROR_KEY = "permission-error"
@@ -98,6 +98,11 @@ class SnowflakeFilterConfig(SQLFilterConfig):
98
98
  )
99
99
  # table_pattern and view_pattern are inherited from SQLFilterConfig
100
100
 
101
+ stream_pattern: AllowDenyPattern = Field(
102
+ default=AllowDenyPattern.allow_all(),
103
+ description="Regex patterns for streams to filter in ingestion. Note: Defaults to table_pattern if not specified. Specify regex to match the entire view name in database.schema.view format. e.g. to match all views starting with customer in Customer database and public schema, use the regex 'Customer.public.customer.*'",
104
+ )
105
+
101
106
  match_fully_qualified_names: bool = Field(
102
107
  default=False,
103
108
  description="Whether `schema_pattern` is matched against fully qualified schema name `<catalog>.<schema>`.",
@@ -274,6 +279,11 @@ class SnowflakeV2Config(
274
279
  description="List of regex patterns for tags to include in ingestion. Only used if `extract_tags` is enabled.",
275
280
  )
276
281
 
282
+ include_streams: bool = Field(
283
+ default=True,
284
+ description="If enabled, streams will be ingested as separate entities from tables/views.",
285
+ )
286
+
277
287
  structured_property_pattern: AllowDenyPattern = Field(
278
288
  default=AllowDenyPattern.allow_all(),
279
289
  description=(
@@ -49,6 +49,7 @@ from datahub.metadata.urns import CorpUserUrn
49
49
  from datahub.sql_parsing.schema_resolver import SchemaResolver
50
50
  from datahub.sql_parsing.sql_parsing_aggregator import (
51
51
  KnownLineageMapping,
52
+ ObservedQuery,
52
53
  PreparsedQuery,
53
54
  SqlAggregatorReport,
54
55
  SqlParsingAggregator,
@@ -241,7 +242,13 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
241
242
  use_cached_audit_log = audit_log_file.exists()
242
243
 
243
244
  queries: FileBackedList[
244
- Union[KnownLineageMapping, PreparsedQuery, TableRename, TableSwap]
245
+ Union[
246
+ KnownLineageMapping,
247
+ PreparsedQuery,
248
+ TableRename,
249
+ TableSwap,
250
+ ObservedQuery,
251
+ ]
245
252
  ]
246
253
  if use_cached_audit_log:
247
254
  logger.info("Using cached audit log")
@@ -252,7 +259,13 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
252
259
 
253
260
  shared_connection = ConnectionWrapper(audit_log_file)
254
261
  queries = FileBackedList(shared_connection)
255
- entry: Union[KnownLineageMapping, PreparsedQuery, TableRename, TableSwap]
262
+ entry: Union[
263
+ KnownLineageMapping,
264
+ PreparsedQuery,
265
+ TableRename,
266
+ TableSwap,
267
+ ObservedQuery,
268
+ ]
256
269
 
257
270
  with self.report.copy_history_fetch_timer:
258
271
  for entry in self.fetch_copy_history():
@@ -329,7 +342,7 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
329
342
 
330
343
  def fetch_query_log(
331
344
  self, users: UsersMapping
332
- ) -> Iterable[Union[PreparsedQuery, TableRename, TableSwap]]:
345
+ ) -> Iterable[Union[PreparsedQuery, TableRename, TableSwap, ObservedQuery]]:
333
346
  query_log_query = _build_enriched_query_log_query(
334
347
  start_time=self.config.window.start_time,
335
348
  end_time=self.config.window.end_time,
@@ -362,7 +375,7 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
362
375
 
363
376
  def _parse_audit_log_row(
364
377
  self, row: Dict[str, Any], users: UsersMapping
365
- ) -> Optional[Union[TableRename, TableSwap, PreparsedQuery]]:
378
+ ) -> Optional[Union[TableRename, TableSwap, PreparsedQuery, ObservedQuery]]:
366
379
  json_fields = {
367
380
  "DIRECT_OBJECTS_ACCESSED",
368
381
  "OBJECTS_MODIFIED",
@@ -398,6 +411,34 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
398
411
  pass
399
412
  else:
400
413
  return None
414
+
415
+ user = CorpUserUrn(
416
+ self.identifiers.get_user_identifier(
417
+ res["user_name"], users.get(res["user_name"])
418
+ )
419
+ )
420
+
421
+ # Use direct_objects_accessed instead objects_modified
422
+ # objects_modified returns $SYS_VIEW_X with no mapping
423
+ has_stream_objects = any(
424
+ obj.get("objectDomain") == "Stream" for obj in direct_objects_accessed
425
+ )
426
+
427
+ # If a stream is used, default to query parsing.
428
+ if has_stream_objects:
429
+ logger.debug("Found matching stream object")
430
+ return ObservedQuery(
431
+ query=res["query_text"],
432
+ session_id=res["session_id"],
433
+ timestamp=res["query_start_time"].astimezone(timezone.utc),
434
+ user=user,
435
+ default_db=res["default_db"],
436
+ default_schema=res["default_schema"],
437
+ query_hash=get_query_fingerprint(
438
+ res["query_text"], self.identifiers.platform, fast=True
439
+ ),
440
+ )
441
+
401
442
  upstreams = []
402
443
  column_usage = {}
403
444
 
@@ -460,12 +501,6 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
460
501
  )
461
502
  )
462
503
 
463
- user = CorpUserUrn(
464
- self.identifiers.get_user_identifier(
465
- res["user_name"], users.get(res["user_name"])
466
- )
467
- )
468
-
469
504
  timestamp: datetime = res["query_start_time"]
470
505
  timestamp = timestamp.astimezone(timezone.utc)
471
506
 
@@ -9,6 +9,7 @@ from datahub.ingestion.source.snowflake.snowflake_config import (
9
9
  from datahub.utilities.prefix_batch_builder import PrefixGroup
10
10
 
11
11
  SHOW_VIEWS_MAX_PAGE_SIZE = 10000
12
+ SHOW_STREAM_MAX_PAGE_SIZE = 10000
12
13
 
13
14
 
14
15
  def create_deny_regex_sql_filter(
@@ -36,6 +37,7 @@ class SnowflakeQuery:
36
37
  SnowflakeObjectDomain.VIEW.capitalize(),
37
38
  SnowflakeObjectDomain.MATERIALIZED_VIEW.capitalize(),
38
39
  SnowflakeObjectDomain.ICEBERG_TABLE.capitalize(),
40
+ SnowflakeObjectDomain.STREAM.capitalize(),
39
41
  }
40
42
 
41
43
  ACCESS_HISTORY_TABLE_VIEW_DOMAINS_FILTER = "({})".format(
@@ -44,7 +46,8 @@ class SnowflakeQuery:
44
46
  ACCESS_HISTORY_TABLE_DOMAINS_FILTER = (
45
47
  "("
46
48
  f"'{SnowflakeObjectDomain.TABLE.capitalize()}',"
47
- f"'{SnowflakeObjectDomain.VIEW.capitalize()}'"
49
+ f"'{SnowflakeObjectDomain.VIEW.capitalize()}',"
50
+ f"'{SnowflakeObjectDomain.STREAM.capitalize()}',"
48
51
  ")"
49
52
  )
50
53
 
@@ -963,3 +966,19 @@ WHERE table_schema='{schema_name}' AND {extra_clause}"""
963
966
  @staticmethod
964
967
  def get_all_users() -> str:
965
968
  return """SELECT name as "NAME", email as "EMAIL" FROM SNOWFLAKE.ACCOUNT_USAGE.USERS"""
969
+
970
+ @staticmethod
971
+ def streams_for_database(
972
+ db_name: str,
973
+ limit: int = SHOW_STREAM_MAX_PAGE_SIZE,
974
+ stream_pagination_marker: Optional[str] = None,
975
+ ) -> str:
976
+ # SHOW STREAMS can return a maximum of 10000 rows.
977
+ # https://docs.snowflake.com/en/sql-reference/sql/show-streams#usage-notes
978
+ assert limit <= SHOW_STREAM_MAX_PAGE_SIZE
979
+
980
+ # To work around this, we paginate through the results using the FROM clause.
981
+ from_clause = (
982
+ f"""FROM '{stream_pagination_marker}'""" if stream_pagination_marker else ""
983
+ )
984
+ return f"""SHOW STREAMS IN DATABASE {db_name} LIMIT {limit} {from_clause};"""