acryl-datahub 0.15.0.5rc10__py3-none-any.whl → 0.15.0.6rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (22) hide show
  1. {acryl_datahub-0.15.0.5rc10.dist-info → acryl_datahub-0.15.0.6rc1.dist-info}/METADATA +2394 -2394
  2. {acryl_datahub-0.15.0.5rc10.dist-info → acryl_datahub-0.15.0.6rc1.dist-info}/RECORD +22 -22
  3. datahub/_version.py +1 -1
  4. datahub/ingestion/source/common/subtypes.py +1 -0
  5. datahub/ingestion/source/powerbi/config.py +1 -0
  6. datahub/ingestion/source/powerbi/powerbi.py +28 -3
  7. datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py +6 -2
  8. datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +11 -36
  9. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +17 -4
  10. datahub/ingestion/source/snowflake/constants.py +1 -0
  11. datahub/ingestion/source/snowflake/snowflake_config.py +10 -0
  12. datahub/ingestion/source/snowflake/snowflake_queries.py +45 -10
  13. datahub/ingestion/source/snowflake/snowflake_query.py +20 -1
  14. datahub/ingestion/source/snowflake/snowflake_report.py +6 -0
  15. datahub/ingestion/source/snowflake/snowflake_schema.py +98 -4
  16. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +294 -62
  17. datahub/ingestion/source/snowflake/snowflake_utils.py +17 -8
  18. datahub/ingestion/source/snowflake/snowflake_v2.py +15 -3
  19. {acryl_datahub-0.15.0.5rc10.dist-info → acryl_datahub-0.15.0.6rc1.dist-info}/LICENSE +0 -0
  20. {acryl_datahub-0.15.0.5rc10.dist-info → acryl_datahub-0.15.0.6rc1.dist-info}/WHEEL +0 -0
  21. {acryl_datahub-0.15.0.5rc10.dist-info → acryl_datahub-0.15.0.6rc1.dist-info}/entry_points.txt +0 -0
  22. {acryl_datahub-0.15.0.5rc10.dist-info → acryl_datahub-0.15.0.6rc1.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
2
2
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
3
- datahub/_version.py,sha256=VqyyvoJV4bnpYg2UMs1kyEsTgRq0wVADin5Gxtvya04,325
3
+ datahub/_version.py,sha256=U9IGE-YR9bmigwAxXVjXLxWIGHYH0FW2G6D5UE_-ZIg,324
4
4
  datahub/entrypoints.py,sha256=2TYgHhs3sCxJlojIHjqfxzt3_ImPwPzq4vBtsUuMqu4,8885
5
5
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  datahub/_codegen/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -266,7 +266,7 @@ datahub/ingestion/source/cassandra/cassandra_profiling.py,sha256=DkSIryZNwLei5Pa
266
266
  datahub/ingestion/source/cassandra/cassandra_utils.py,sha256=j-LidYkaCTmGnpUVNLsax_c3z32PsQbsbHeYojygd1s,5105
267
267
  datahub/ingestion/source/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
268
268
  datahub/ingestion/source/common/data_reader.py,sha256=XbSxiRTYrk6seOz0ZjVjzSpGvP8lEjmqXrNI4cdYYmQ,1819
269
- datahub/ingestion/source/common/subtypes.py,sha256=zxBQkRxsG_XMMz6Pmw_yMQiuFOhapOFVUOtXw8yHz7Q,2287
269
+ datahub/ingestion/source/common/subtypes.py,sha256=S0ssIxV7V38HGQwl-h5izYWyj1MQgmvJk4k_Q-5VGJ8,2329
270
270
  datahub/ingestion/source/data_lake_common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
271
271
  datahub/ingestion/source/data_lake_common/config.py,sha256=qUk83B01hjuBKHvVz8SmXnVCy5eFj-2-2QLEOrAdbgk,359
272
272
  datahub/ingestion/source/data_lake_common/data_lake_utils.py,sha256=nxu7osuzqxScPFc-1ODA2M1c_xPNPpRH_SMMU7zKOIE,6212
@@ -363,10 +363,10 @@ datahub/ingestion/source/metadata/lineage.py,sha256=XiZGuY6k3O9qBmgo7AzosIndJHwr
363
363
  datahub/ingestion/source/neo4j/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
364
364
  datahub/ingestion/source/neo4j/neo4j_source.py,sha256=76Z-2Td4_3PH2wWL1XJrpV2Egre5YVh6bMXeDS5ZonE,12405
365
365
  datahub/ingestion/source/powerbi/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
366
- datahub/ingestion/source/powerbi/config.py,sha256=DadG3Y3R-emmEL7vW2vutL3TXXVe-_t6DA_S2kWUvLA,22784
366
+ datahub/ingestion/source/powerbi/config.py,sha256=1WFK-JxcgBEIZ2XTwuH1PvNXYcwqEJR-IYTUTv3Z4o8,22820
367
367
  datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py,sha256=-njW1kJOy-LY5JFwJLhVQ0bMBj9NQz5TZhQqsSi_KsM,2285
368
368
  datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule,sha256=5df3qvalCS9hZ46DPXs6XDcw9-IofGf8Eol_rUC7LHI,20329
369
- datahub/ingestion/source/powerbi/powerbi.py,sha256=xCNMgL-KuPGpIFv_PP1woyiddY_PpbX1HEl3aDk7F1c,54535
369
+ datahub/ingestion/source/powerbi/powerbi.py,sha256=a4LG4pxrZ6N5I6HmKFCfv9HMyvl4ZChy2XbVY17E-To,55570
370
370
  datahub/ingestion/source/powerbi/m_query/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
371
371
  datahub/ingestion/source/powerbi/m_query/data_classes.py,sha256=EbaEasEOGZ73jz0cQofH9ez65wSvRBof0R6GQaIVLnM,2009
372
372
  datahub/ingestion/source/powerbi/m_query/native_sql_parser.py,sha256=zzKVDGeUM3Yv3-zNah4D6mSnr6jXsstNuLmzczcPQEE,3683
@@ -376,9 +376,9 @@ datahub/ingestion/source/powerbi/m_query/resolver.py,sha256=t0n1dDYjlzElSJo5ztea
376
376
  datahub/ingestion/source/powerbi/m_query/tree_function.py,sha256=h77DunhlgOP0fAg8UXDXxxInOi7Pay85_d1Ca4YqyKs,6134
377
377
  datahub/ingestion/source/powerbi/m_query/validator.py,sha256=crG-VZy2XPieiDliP9yVMgiFcc8b2xbZyDFEATXqEAQ,1155
378
378
  datahub/ingestion/source/powerbi/rest_api_wrapper/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
379
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py,sha256=kS337FgY-fLPjeRryQ-adVm1VAEThI88svii2Q9sGTc,8435
380
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py,sha256=eNKW9ShWJ5F3pKgTVQ6xc1H1rl-JBIy9ye1pq5C2Kb0,39598
381
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py,sha256=k8rP2uwXb6maS7VzprUcqr2ggjimz0tILVJezze0jyA,26441
379
+ datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py,sha256=4Kr7cLXpsWGtg-M18aXyhij9k9Ll5dGv3EaCS3d2DRk,8590
380
+ datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py,sha256=-EHDvVmr69bP11bFm0bW0Lf1I95lPHU7sdMX1Q70roI,38503
381
+ datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py,sha256=NrhgwREmkWTvlhpEs7dAEEJfOxQRalA02ArKr2LLjeY,27666
382
382
  datahub/ingestion/source/powerbi/rest_api_wrapper/profiling_utils.py,sha256=bgcPheyqOj6KdRjDyANDK5yggItglcBIjbGFIwAxSds,1392
383
383
  datahub/ingestion/source/powerbi/rest_api_wrapper/query.py,sha256=VNw1Uvli6g0pnu9FpigYmnCdEPbVEipz7vdZU_WmHf4,616
384
384
  datahub/ingestion/source/powerbi_report_server/__init__.py,sha256=N9fGcrHXBbuPmx9rpGjd_jkMC3smXmfiwISDP1QZapk,324
@@ -432,26 +432,26 @@ datahub/ingestion/source/sigma/sigma_api.py,sha256=SVvbUs2vjueUdDa-3FzeMsaX5pNpA
432
432
  datahub/ingestion/source/slack/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
433
433
  datahub/ingestion/source/slack/slack.py,sha256=VpLS-6zuQa8hIuHnZhLf8wRdN72Xell3ZMd0kK3A0i8,13188
434
434
  datahub/ingestion/source/snowflake/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
435
- datahub/ingestion/source/snowflake/constants.py,sha256=22n-0r04nuy-ImxWFFpmbrt_GrNdxV9WZKri7rmtrpQ,2628
435
+ datahub/ingestion/source/snowflake/constants.py,sha256=SZzQTO6spPeHfV55tIbg7SL2ecsQF_z169HJ8u2G3Hk,2650
436
436
  datahub/ingestion/source/snowflake/oauth_config.py,sha256=ol9D3RmruGStJAeL8PYSQguSqcD2HfkjPkMF2AB_eZs,1277
437
437
  datahub/ingestion/source/snowflake/oauth_generator.py,sha256=fu2VnREGuJXeTqIV2jx4TwieVnznf83HQkrE0h2DGGM,3423
438
438
  datahub/ingestion/source/snowflake/snowflake_assertion.py,sha256=_l3k4aI9wvioE81xxdeizJn9nJCZ_nMIXgk9N6pEk5o,4803
439
- datahub/ingestion/source/snowflake/snowflake_config.py,sha256=Y3LoqBavhc3Cm0nyAr3fnd_-i4gReDfaAuUdp7EgwPQ,19603
439
+ datahub/ingestion/source/snowflake/snowflake_config.py,sha256=c9LE7nl6tkz7P9tc4EkSKphextW5pejLzdP3qS_iL1s,20196
440
440
  datahub/ingestion/source/snowflake/snowflake_connection.py,sha256=e9dCARIQtGB8G1cSMRLorCbNLcPUD2g9gBL-LLLKjFE,17793
441
441
  datahub/ingestion/source/snowflake/snowflake_data_reader.py,sha256=ffR5E2uhD71FUMXd3XOg2rHwrp1rbbGEFTAbqKcmI2s,2195
442
442
  datahub/ingestion/source/snowflake/snowflake_lineage_v2.py,sha256=FBmiONx4EGHWV8RNJT6zHZyntKinPFFyd2oKbTUIbhE,21319
443
443
  datahub/ingestion/source/snowflake/snowflake_profiler.py,sha256=0DJiSwII6FY34urlBja2FW66NaVvhbBWmG0p7u8Xyrc,7548
444
- datahub/ingestion/source/snowflake/snowflake_queries.py,sha256=x6__7kmlIKXdnvENyN9AloE9h-vOlrjcWL95A2DGW5g,26968
445
- datahub/ingestion/source/snowflake/snowflake_query.py,sha256=-vjc2-sGKN0odt-IWEbx6Lhz4UlRwctlEHUnOr3_Mkg,38821
446
- datahub/ingestion/source/snowflake/snowflake_report.py,sha256=xSRNSG_iZcLTf9inNtlCQTDSNiDYm-jGpvAAGrRMTWI,6454
447
- datahub/ingestion/source/snowflake/snowflake_schema.py,sha256=qG9MXutXcVyp5p59j2CPsj1zRPTdeActEqVpwlsgMKk,22217
448
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py,sha256=c2PTnsMDD21qw_71T96xi9ylMpAXnTEyA1SK4qq528w,46105
444
+ datahub/ingestion/source/snowflake/snowflake_queries.py,sha256=M-FBoYeiW91-g3gOUpCTj8cKWHH-wqyFtD5UcewfI2k,28121
445
+ datahub/ingestion/source/snowflake/snowflake_query.py,sha256=Ex9FZZzz02cQis4bV3tzd53Pmf8p3AreuWnv9w95pJ0,39642
446
+ datahub/ingestion/source/snowflake/snowflake_report.py,sha256=ahea-bwpW6T0iDehGo0Qq_J7wKxPkV61aYHm8bGwDqo,6651
447
+ datahub/ingestion/source/snowflake/snowflake_schema.py,sha256=fdDN7jO5aHsmTFYC8cahXRT9BSAoDY72heM_WrkSxXo,25648
448
+ datahub/ingestion/source/snowflake/snowflake_schema_gen.py,sha256=5DUP_uTmbLU01NegFXClqPkpB8LTruIkyIaGUBOTCQw,54718
449
449
  datahub/ingestion/source/snowflake/snowflake_shares.py,sha256=maZyFkfrbVogEFM0tTKRiNp9c_1muv6YfleSd3q0umI,6341
450
450
  datahub/ingestion/source/snowflake/snowflake_summary.py,sha256=kTmuCtRnvHqM8WBYhWeK4XafJq3ssFL9kcS03jEeWT4,5506
451
451
  datahub/ingestion/source/snowflake/snowflake_tag.py,sha256=1eLYTcgmfzDs9xktMTTE74L5SeNP48Qg3uLr9y-Ez3Y,8733
452
452
  datahub/ingestion/source/snowflake/snowflake_usage_v2.py,sha256=ySFm7WDk8FW9KjCnX4HQfTqObIrlUS-V8WIHl3j0CTI,24848
453
- datahub/ingestion/source/snowflake/snowflake_utils.py,sha256=xq58c47zmaQPkTVqjKW25iViX8VJuHdQDTFY4jxzZ2o,12778
454
- datahub/ingestion/source/snowflake/snowflake_v2.py,sha256=NidPSzXh2UajcvgeDoTmk31UW1dAeQBCCFjumZajzcI,33524
453
+ datahub/ingestion/source/snowflake/snowflake_utils.py,sha256=EmYb2FEcdLwei92atRBQ3iKH7av4YBZCIFTgPmLo0Ng,13092
454
+ datahub/ingestion/source/snowflake/snowflake_v2.py,sha256=kx8aFalByIAfrp0a_kq5iyojzh9vI4od5eVGthAR5RY,33912
455
455
  datahub/ingestion/source/sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
456
456
  datahub/ingestion/source/sql/athena.py,sha256=Uh9wGLOqAkcphffxOPIQNyXvjeRm74XIpaLb4rjqMjM,24045
457
457
  datahub/ingestion/source/sql/clickhouse.py,sha256=uSRy-HKAiGFTHVLoVtGoh23X0O1lwyYUaK8BaWkYhps,25555
@@ -1001,9 +1001,9 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
1001
1001
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
1002
1002
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
1003
1003
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
1004
- acryl_datahub-0.15.0.5rc10.dist-info/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
1005
- acryl_datahub-0.15.0.5rc10.dist-info/METADATA,sha256=9INBTHoM-5OTwef7bW9Y31njlOljSaconFYwwbNHeSc,175378
1006
- acryl_datahub-0.15.0.5rc10.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
1007
- acryl_datahub-0.15.0.5rc10.dist-info/entry_points.txt,sha256=U1e5ZwqPX1OaIbvGrwvozcdB8SbzFYXQM7plpdLKKeo,9592
1008
- acryl_datahub-0.15.0.5rc10.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1009
- acryl_datahub-0.15.0.5rc10.dist-info/RECORD,,
1004
+ acryl_datahub-0.15.0.6rc1.dist-info/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
1005
+ acryl_datahub-0.15.0.6rc1.dist-info/METADATA,sha256=YPtgGtRPaltE8LkNq8PlWsrlzhbE-FQV0wWYkkuPqDc,175375
1006
+ acryl_datahub-0.15.0.6rc1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
1007
+ acryl_datahub-0.15.0.6rc1.dist-info/entry_points.txt,sha256=U1e5ZwqPX1OaIbvGrwvozcdB8SbzFYXQM7plpdLKKeo,9592
1008
+ acryl_datahub-0.15.0.6rc1.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1009
+ acryl_datahub-0.15.0.6rc1.dist-info/RECORD,,
datahub/_version.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Published at https://pypi.org/project/acryl-datahub/.
2
2
  __package_name__ = "acryl-datahub"
3
- __version__ = "0.15.0.5rc10"
3
+ __version__ = "0.15.0.6rc1"
4
4
 
5
5
 
6
6
  def is_dev_mode() -> bool:
@@ -24,6 +24,7 @@ class DatasetSubTypes(StrEnum):
24
24
  SAC_LIVE_DATA_MODEL = "Live Data Model"
25
25
  NEO4J_NODE = "Neo4j Node"
26
26
  NEO4J_RELATIONSHIP = "Neo4j Relationship"
27
+ SNOWFLAKE_STREAM = "Snowflake Stream"
27
28
 
28
29
  # TODO: Create separate entity...
29
30
  NOTEBOOK = "Notebook"
@@ -132,6 +132,7 @@ class Constant:
132
132
  ACTIVE = "Active"
133
133
  SQL_PARSING_FAILURE = "SQL Parsing Failure"
134
134
  M_QUERY_NULL = '"null"'
135
+ REPORT_WEB_URL = "reportWebUrl"
135
136
 
136
137
 
137
138
  @dataclass
@@ -582,8 +582,11 @@ class Mapper:
582
582
  if tile.dataset is not None and tile.dataset.webUrl is not None:
583
583
  custom_properties[Constant.DATASET_WEB_URL] = tile.dataset.webUrl
584
584
 
585
- if tile.report is not None and tile.report.id is not None:
586
- custom_properties[Constant.REPORT_ID] = tile.report.id
585
+ if tile.report_id is not None:
586
+ custom_properties[Constant.REPORT_ID] = tile.report_id
587
+
588
+ if tile.report is not None and tile.report.webUrl is not None:
589
+ custom_properties[Constant.REPORT_WEB_URL] = tile.report.webUrl
587
590
 
588
591
  return custom_properties
589
592
 
@@ -1053,6 +1056,7 @@ class Mapper:
1053
1056
  report: powerbi_data_classes.Report,
1054
1057
  chart_mcps: List[MetadataChangeProposalWrapper],
1055
1058
  user_mcps: List[MetadataChangeProposalWrapper],
1059
+ dashboard_edges: List[EdgeClass],
1056
1060
  ) -> List[MetadataChangeProposalWrapper]:
1057
1061
  """
1058
1062
  Map PowerBi report to Datahub dashboard
@@ -1074,6 +1078,7 @@ class Mapper:
1074
1078
  charts=chart_urn_list,
1075
1079
  lastModified=ChangeAuditStamps(),
1076
1080
  dashboardUrl=report.webUrl,
1081
+ dashboards=dashboard_edges,
1077
1082
  )
1078
1083
 
1079
1084
  info_mcp = self.new_mcp(
@@ -1167,8 +1172,28 @@ class Mapper:
1167
1172
  ds_mcps = self.to_datahub_dataset(report.dataset, workspace)
1168
1173
  chart_mcps = self.pages_to_chart(report.pages, workspace, ds_mcps)
1169
1174
 
1175
+ # find all dashboards with a Tile referencing this report
1176
+ downstream_dashboards_edges = []
1177
+ for d in workspace.dashboards.values():
1178
+ if any(t.report_id == report.id for t in d.tiles):
1179
+ dashboard_urn = builder.make_dashboard_urn(
1180
+ platform=self.__config.platform_name,
1181
+ platform_instance=self.__config.platform_instance,
1182
+ name=d.get_urn_part(),
1183
+ )
1184
+ edge = EdgeClass(
1185
+ destinationUrn=dashboard_urn,
1186
+ sourceUrn=None,
1187
+ created=None,
1188
+ lastModified=None,
1189
+ properties=None,
1190
+ )
1191
+ downstream_dashboards_edges.append(edge)
1192
+
1170
1193
  # Let's convert report to datahub dashboard
1171
- report_mcps = self.report_to_dashboard(workspace, report, chart_mcps, user_mcps)
1194
+ report_mcps = self.report_to_dashboard(
1195
+ workspace, report, chart_mcps, user_mcps, downstream_dashboards_edges
1196
+ )
1172
1197
 
1173
1198
  # Now add MCPs in sequence
1174
1199
  mcps.extend(ds_mcps)
@@ -286,11 +286,15 @@ class Tile:
286
286
  id: str
287
287
  title: str
288
288
  embedUrl: str
289
- dataset: Optional["PowerBIDataset"]
290
289
  dataset_id: Optional[str]
291
- report: Optional[Report]
290
+ report_id: Optional[str]
292
291
  createdFrom: CreatedFrom
293
292
 
293
+ # In a first pass, `dataset_id` and/or `report_id` are filled in.
294
+ # In a subsequent pass, the objects are populated.
295
+ dataset: Optional["PowerBIDataset"]
296
+ report: Optional[Report]
297
+
294
298
  def get_urn_part(self):
295
299
  return f"charts.{self.id}"
296
300
 
@@ -337,41 +337,6 @@ class DataResolverBase(ABC):
337
337
  -tiles), there is no information available on pagination
338
338
 
339
339
  """
340
-
341
- def new_dataset_or_report(tile_instance: Any) -> dict:
342
- """
343
- Find out which is the data source for tile. It is either REPORT or DATASET
344
- """
345
- report_fields = {
346
- Constant.REPORT: (
347
- self.get_report(
348
- workspace=workspace,
349
- report_id=tile_instance.get(Constant.REPORT_ID),
350
- )
351
- if tile_instance.get(Constant.REPORT_ID) is not None
352
- else None
353
- ),
354
- Constant.CREATED_FROM: Tile.CreatedFrom.UNKNOWN,
355
- }
356
-
357
- # reportId and datasetId are exclusive in tile_instance
358
- # if datasetId is present that means tile is created from dataset
359
- # if reportId is present that means tile is created from report
360
- # if both i.e. reportId and datasetId are not present then tile is created from some visualization
361
- if tile_instance.get(Constant.REPORT_ID) is not None:
362
- report_fields[Constant.CREATED_FROM] = Tile.CreatedFrom.REPORT
363
- elif tile_instance.get(Constant.DATASET_ID) is not None:
364
- report_fields[Constant.CREATED_FROM] = Tile.CreatedFrom.DATASET
365
- else:
366
- report_fields[Constant.CREATED_FROM] = Tile.CreatedFrom.VISUALIZATION
367
-
368
- title: Optional[str] = tile_instance.get(Constant.TITLE)
369
- _id: Optional[str] = tile_instance.get(Constant.ID)
370
- created_from: Any = report_fields[Constant.CREATED_FROM]
371
- logger.info(f"Tile {title}({_id}) is created from {created_from}")
372
-
373
- return report_fields
374
-
375
340
  tile_list_endpoint: str = self.get_tiles_endpoint(
376
341
  workspace, dashboard_id=dashboard.id
377
342
  )
@@ -393,8 +358,18 @@ class DataResolverBase(ABC):
393
358
  title=instance.get(Constant.TITLE),
394
359
  embedUrl=instance.get(Constant.EMBED_URL),
395
360
  dataset_id=instance.get(Constant.DATASET_ID),
361
+ report_id=instance.get(Constant.REPORT_ID),
396
362
  dataset=None,
397
- **new_dataset_or_report(instance),
363
+ report=None,
364
+ createdFrom=(
365
+ # In the past we considered that only one of the two report_id or dataset_id would be present
366
+ # but we have seen cases where both are present. If both are present, we prioritize the report.
367
+ Tile.CreatedFrom.REPORT
368
+ if instance.get(Constant.REPORT_ID)
369
+ else Tile.CreatedFrom.DATASET
370
+ if instance.get(Constant.DATASET_ID)
371
+ else Tile.CreatedFrom.VISUALIZATION
372
+ ),
398
373
  )
399
374
  for instance in tile_dict
400
375
  if instance is not None
@@ -625,13 +625,26 @@ class PowerBiAPI:
625
625
  dashboard.tiles = self._get_resolver().get_tiles(
626
626
  workspace, dashboard=dashboard
627
627
  )
628
- # set the dataset for tiles
628
+ # set the dataset and the report for tiles
629
629
  for tile in dashboard.tiles:
630
+ # In Power BI, dashboards, reports, and datasets are tightly scoped to the workspace they belong to.
631
+ # https://learn.microsoft.com/en-us/power-bi/collaborate-share/service-new-workspaces
632
+ if tile.report_id:
633
+ tile.report = workspace.reports.get(tile.report_id)
634
+ if tile.report is None:
635
+ self.reporter.info(
636
+ title="Missing Report Lineage For Tile",
637
+ message="A Report reference that failed to be resolved. Please ensure that 'extract_reports' is set to True in the configuration.",
638
+ context=f"workspace-name: {workspace.name}, tile-name: {tile.title}, report-id: {tile.report_id}",
639
+ )
640
+ # However, semantic models (aka datasets) can be shared accross workspaces
641
+ # https://learn.microsoft.com/en-us/fabric/admin/portal-workspace#use-semantic-models-across-workspaces
642
+ # That's why the global 'dataset_registry' is required
630
643
  if tile.dataset_id:
631
644
  tile.dataset = self.dataset_registry.get(tile.dataset_id)
632
645
  if tile.dataset is None:
633
646
  self.reporter.info(
634
- title="Missing Lineage For Tile",
647
+ title="Missing Dataset Lineage For Tile",
635
648
  message="A cross-workspace reference that failed to be resolved. Please ensure that no global workspace is being filtered out due to the workspace_id_pattern.",
636
649
  context=f"workspace-name: {workspace.name}, tile-name: {tile.title}, dataset-id: {tile.dataset_id}",
637
650
  )
@@ -653,10 +666,10 @@ class PowerBiAPI:
653
666
  for dashboard in workspace.dashboards.values():
654
667
  dashboard.tags = workspace.dashboard_endorsements.get(dashboard.id, [])
655
668
 
669
+ # fill reports first since some dashboard may reference a report
670
+ fill_reports()
656
671
  if self.__config.extract_dashboards:
657
672
  fill_dashboards()
658
-
659
- fill_reports()
660
673
  fill_dashboard_tags()
661
674
  self._fill_independent_datasets(workspace=workspace)
662
675
 
@@ -53,6 +53,7 @@ class SnowflakeObjectDomain(StrEnum):
53
53
  SCHEMA = "schema"
54
54
  COLUMN = "column"
55
55
  ICEBERG_TABLE = "iceberg table"
56
+ STREAM = "stream"
56
57
 
57
58
 
58
59
  GENERIC_PERMISSION_ERROR_KEY = "permission-error"
@@ -98,6 +98,11 @@ class SnowflakeFilterConfig(SQLFilterConfig):
98
98
  )
99
99
  # table_pattern and view_pattern are inherited from SQLFilterConfig
100
100
 
101
+ stream_pattern: AllowDenyPattern = Field(
102
+ default=AllowDenyPattern.allow_all(),
103
+ description="Regex patterns for streams to filter in ingestion. Note: Defaults to table_pattern if not specified. Specify regex to match the entire view name in database.schema.view format. e.g. to match all views starting with customer in Customer database and public schema, use the regex 'Customer.public.customer.*'",
104
+ )
105
+
101
106
  match_fully_qualified_names: bool = Field(
102
107
  default=False,
103
108
  description="Whether `schema_pattern` is matched against fully qualified schema name `<catalog>.<schema>`.",
@@ -274,6 +279,11 @@ class SnowflakeV2Config(
274
279
  description="List of regex patterns for tags to include in ingestion. Only used if `extract_tags` is enabled.",
275
280
  )
276
281
 
282
+ include_streams: bool = Field(
283
+ default=True,
284
+ description="If enabled, streams will be ingested as separate entities from tables/views.",
285
+ )
286
+
277
287
  structured_property_pattern: AllowDenyPattern = Field(
278
288
  default=AllowDenyPattern.allow_all(),
279
289
  description=(
@@ -49,6 +49,7 @@ from datahub.metadata.urns import CorpUserUrn
49
49
  from datahub.sql_parsing.schema_resolver import SchemaResolver
50
50
  from datahub.sql_parsing.sql_parsing_aggregator import (
51
51
  KnownLineageMapping,
52
+ ObservedQuery,
52
53
  PreparsedQuery,
53
54
  SqlAggregatorReport,
54
55
  SqlParsingAggregator,
@@ -241,7 +242,13 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
241
242
  use_cached_audit_log = audit_log_file.exists()
242
243
 
243
244
  queries: FileBackedList[
244
- Union[KnownLineageMapping, PreparsedQuery, TableRename, TableSwap]
245
+ Union[
246
+ KnownLineageMapping,
247
+ PreparsedQuery,
248
+ TableRename,
249
+ TableSwap,
250
+ ObservedQuery,
251
+ ]
245
252
  ]
246
253
  if use_cached_audit_log:
247
254
  logger.info("Using cached audit log")
@@ -252,7 +259,13 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
252
259
 
253
260
  shared_connection = ConnectionWrapper(audit_log_file)
254
261
  queries = FileBackedList(shared_connection)
255
- entry: Union[KnownLineageMapping, PreparsedQuery, TableRename, TableSwap]
262
+ entry: Union[
263
+ KnownLineageMapping,
264
+ PreparsedQuery,
265
+ TableRename,
266
+ TableSwap,
267
+ ObservedQuery,
268
+ ]
256
269
 
257
270
  with self.report.copy_history_fetch_timer:
258
271
  for entry in self.fetch_copy_history():
@@ -329,7 +342,7 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
329
342
 
330
343
  def fetch_query_log(
331
344
  self, users: UsersMapping
332
- ) -> Iterable[Union[PreparsedQuery, TableRename, TableSwap]]:
345
+ ) -> Iterable[Union[PreparsedQuery, TableRename, TableSwap, ObservedQuery]]:
333
346
  query_log_query = _build_enriched_query_log_query(
334
347
  start_time=self.config.window.start_time,
335
348
  end_time=self.config.window.end_time,
@@ -362,7 +375,7 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
362
375
 
363
376
  def _parse_audit_log_row(
364
377
  self, row: Dict[str, Any], users: UsersMapping
365
- ) -> Optional[Union[TableRename, TableSwap, PreparsedQuery]]:
378
+ ) -> Optional[Union[TableRename, TableSwap, PreparsedQuery, ObservedQuery]]:
366
379
  json_fields = {
367
380
  "DIRECT_OBJECTS_ACCESSED",
368
381
  "OBJECTS_MODIFIED",
@@ -398,6 +411,34 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
398
411
  pass
399
412
  else:
400
413
  return None
414
+
415
+ user = CorpUserUrn(
416
+ self.identifiers.get_user_identifier(
417
+ res["user_name"], users.get(res["user_name"])
418
+ )
419
+ )
420
+
421
+ # Use direct_objects_accessed instead objects_modified
422
+ # objects_modified returns $SYS_VIEW_X with no mapping
423
+ has_stream_objects = any(
424
+ obj.get("objectDomain") == "Stream" for obj in direct_objects_accessed
425
+ )
426
+
427
+ # If a stream is used, default to query parsing.
428
+ if has_stream_objects:
429
+ logger.debug("Found matching stream object")
430
+ return ObservedQuery(
431
+ query=res["query_text"],
432
+ session_id=res["session_id"],
433
+ timestamp=res["query_start_time"].astimezone(timezone.utc),
434
+ user=user,
435
+ default_db=res["default_db"],
436
+ default_schema=res["default_schema"],
437
+ query_hash=get_query_fingerprint(
438
+ res["query_text"], self.identifiers.platform, fast=True
439
+ ),
440
+ )
441
+
401
442
  upstreams = []
402
443
  column_usage = {}
403
444
 
@@ -460,12 +501,6 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
460
501
  )
461
502
  )
462
503
 
463
- user = CorpUserUrn(
464
- self.identifiers.get_user_identifier(
465
- res["user_name"], users.get(res["user_name"])
466
- )
467
- )
468
-
469
504
  timestamp: datetime = res["query_start_time"]
470
505
  timestamp = timestamp.astimezone(timezone.utc)
471
506
 
@@ -9,6 +9,7 @@ from datahub.ingestion.source.snowflake.snowflake_config import (
9
9
  from datahub.utilities.prefix_batch_builder import PrefixGroup
10
10
 
11
11
  SHOW_VIEWS_MAX_PAGE_SIZE = 10000
12
+ SHOW_STREAM_MAX_PAGE_SIZE = 10000
12
13
 
13
14
 
14
15
  def create_deny_regex_sql_filter(
@@ -36,6 +37,7 @@ class SnowflakeQuery:
36
37
  SnowflakeObjectDomain.VIEW.capitalize(),
37
38
  SnowflakeObjectDomain.MATERIALIZED_VIEW.capitalize(),
38
39
  SnowflakeObjectDomain.ICEBERG_TABLE.capitalize(),
40
+ SnowflakeObjectDomain.STREAM.capitalize(),
39
41
  }
40
42
 
41
43
  ACCESS_HISTORY_TABLE_VIEW_DOMAINS_FILTER = "({})".format(
@@ -44,7 +46,8 @@ class SnowflakeQuery:
44
46
  ACCESS_HISTORY_TABLE_DOMAINS_FILTER = (
45
47
  "("
46
48
  f"'{SnowflakeObjectDomain.TABLE.capitalize()}',"
47
- f"'{SnowflakeObjectDomain.VIEW.capitalize()}'"
49
+ f"'{SnowflakeObjectDomain.VIEW.capitalize()}',"
50
+ f"'{SnowflakeObjectDomain.STREAM.capitalize()}',"
48
51
  ")"
49
52
  )
50
53
 
@@ -963,3 +966,19 @@ WHERE table_schema='{schema_name}' AND {extra_clause}"""
963
966
  @staticmethod
964
967
  def get_all_users() -> str:
965
968
  return """SELECT name as "NAME", email as "EMAIL" FROM SNOWFLAKE.ACCOUNT_USAGE.USERS"""
969
+
970
+ @staticmethod
971
+ def streams_for_database(
972
+ db_name: str,
973
+ limit: int = SHOW_STREAM_MAX_PAGE_SIZE,
974
+ stream_pagination_marker: Optional[str] = None,
975
+ ) -> str:
976
+ # SHOW STREAMS can return a maximum of 10000 rows.
977
+ # https://docs.snowflake.com/en/sql-reference/sql/show-streams#usage-notes
978
+ assert limit <= SHOW_STREAM_MAX_PAGE_SIZE
979
+
980
+ # To work around this, we paginate through the results using the FROM clause.
981
+ from_clause = (
982
+ f"""FROM '{stream_pagination_marker}'""" if stream_pagination_marker else ""
983
+ )
984
+ return f"""SHOW STREAMS IN DATABASE {db_name} LIMIT {limit} {from_clause};"""
@@ -104,6 +104,7 @@ class SnowflakeV2Report(
104
104
  schemas_scanned: int = 0
105
105
  databases_scanned: int = 0
106
106
  tags_scanned: int = 0
107
+ streams_scanned: int = 0
107
108
 
108
109
  include_usage_stats: bool = False
109
110
  include_operational_stats: bool = False
@@ -113,6 +114,7 @@ class SnowflakeV2Report(
113
114
  table_lineage_query_secs: float = -1
114
115
  external_lineage_queries_secs: float = -1
115
116
  num_tables_with_known_upstreams: int = 0
117
+ num_streams_with_known_upstreams: int = 0
116
118
  num_upstream_lineage_edge_parsing_failed: int = 0
117
119
  num_secure_views_missing_definition: int = 0
118
120
  num_structured_property_templates_created: int = 0
@@ -131,6 +133,8 @@ class SnowflakeV2Report(
131
133
  num_get_tags_for_object_queries: int = 0
132
134
  num_get_tags_on_columns_for_table_queries: int = 0
133
135
 
136
+ num_get_streams_for_schema_queries: int = 0
137
+
134
138
  rows_zero_objects_modified: int = 0
135
139
 
136
140
  _processed_tags: MutableSet[str] = field(default_factory=set)
@@ -157,6 +161,8 @@ class SnowflakeV2Report(
157
161
  return
158
162
  self._scanned_tags.add(name)
159
163
  self.tags_scanned += 1
164
+ elif ent_type == "stream":
165
+ self.streams_scanned += 1
160
166
  else:
161
167
  raise KeyError(f"Unknown entity {ent_type}.")
162
168