acryl-datahub 0.15.0.5rc10__py3-none-any.whl → 0.15.0.6rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0.5rc10.dist-info → acryl_datahub-0.15.0.6rc1.dist-info}/METADATA +2394 -2394
- {acryl_datahub-0.15.0.5rc10.dist-info → acryl_datahub-0.15.0.6rc1.dist-info}/RECORD +22 -22
- datahub/_version.py +1 -1
- datahub/ingestion/source/common/subtypes.py +1 -0
- datahub/ingestion/source/powerbi/config.py +1 -0
- datahub/ingestion/source/powerbi/powerbi.py +28 -3
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py +6 -2
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +11 -36
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +17 -4
- datahub/ingestion/source/snowflake/constants.py +1 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +10 -0
- datahub/ingestion/source/snowflake/snowflake_queries.py +45 -10
- datahub/ingestion/source/snowflake/snowflake_query.py +20 -1
- datahub/ingestion/source/snowflake/snowflake_report.py +6 -0
- datahub/ingestion/source/snowflake/snowflake_schema.py +98 -4
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +294 -62
- datahub/ingestion/source/snowflake/snowflake_utils.py +17 -8
- datahub/ingestion/source/snowflake/snowflake_v2.py +15 -3
- {acryl_datahub-0.15.0.5rc10.dist-info → acryl_datahub-0.15.0.6rc1.dist-info}/LICENSE +0 -0
- {acryl_datahub-0.15.0.5rc10.dist-info → acryl_datahub-0.15.0.6rc1.dist-info}/WHEEL +0 -0
- {acryl_datahub-0.15.0.5rc10.dist-info → acryl_datahub-0.15.0.6rc1.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-0.15.0.5rc10.dist-info → acryl_datahub-0.15.0.6rc1.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
|
|
2
2
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
3
|
-
datahub/_version.py,sha256=
|
|
3
|
+
datahub/_version.py,sha256=U9IGE-YR9bmigwAxXVjXLxWIGHYH0FW2G6D5UE_-ZIg,324
|
|
4
4
|
datahub/entrypoints.py,sha256=2TYgHhs3sCxJlojIHjqfxzt3_ImPwPzq4vBtsUuMqu4,8885
|
|
5
5
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
6
|
datahub/_codegen/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -266,7 +266,7 @@ datahub/ingestion/source/cassandra/cassandra_profiling.py,sha256=DkSIryZNwLei5Pa
|
|
|
266
266
|
datahub/ingestion/source/cassandra/cassandra_utils.py,sha256=j-LidYkaCTmGnpUVNLsax_c3z32PsQbsbHeYojygd1s,5105
|
|
267
267
|
datahub/ingestion/source/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
268
268
|
datahub/ingestion/source/common/data_reader.py,sha256=XbSxiRTYrk6seOz0ZjVjzSpGvP8lEjmqXrNI4cdYYmQ,1819
|
|
269
|
-
datahub/ingestion/source/common/subtypes.py,sha256=
|
|
269
|
+
datahub/ingestion/source/common/subtypes.py,sha256=S0ssIxV7V38HGQwl-h5izYWyj1MQgmvJk4k_Q-5VGJ8,2329
|
|
270
270
|
datahub/ingestion/source/data_lake_common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
271
271
|
datahub/ingestion/source/data_lake_common/config.py,sha256=qUk83B01hjuBKHvVz8SmXnVCy5eFj-2-2QLEOrAdbgk,359
|
|
272
272
|
datahub/ingestion/source/data_lake_common/data_lake_utils.py,sha256=nxu7osuzqxScPFc-1ODA2M1c_xPNPpRH_SMMU7zKOIE,6212
|
|
@@ -363,10 +363,10 @@ datahub/ingestion/source/metadata/lineage.py,sha256=XiZGuY6k3O9qBmgo7AzosIndJHwr
|
|
|
363
363
|
datahub/ingestion/source/neo4j/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
364
364
|
datahub/ingestion/source/neo4j/neo4j_source.py,sha256=76Z-2Td4_3PH2wWL1XJrpV2Egre5YVh6bMXeDS5ZonE,12405
|
|
365
365
|
datahub/ingestion/source/powerbi/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
366
|
-
datahub/ingestion/source/powerbi/config.py,sha256=
|
|
366
|
+
datahub/ingestion/source/powerbi/config.py,sha256=1WFK-JxcgBEIZ2XTwuH1PvNXYcwqEJR-IYTUTv3Z4o8,22820
|
|
367
367
|
datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py,sha256=-njW1kJOy-LY5JFwJLhVQ0bMBj9NQz5TZhQqsSi_KsM,2285
|
|
368
368
|
datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule,sha256=5df3qvalCS9hZ46DPXs6XDcw9-IofGf8Eol_rUC7LHI,20329
|
|
369
|
-
datahub/ingestion/source/powerbi/powerbi.py,sha256=
|
|
369
|
+
datahub/ingestion/source/powerbi/powerbi.py,sha256=a4LG4pxrZ6N5I6HmKFCfv9HMyvl4ZChy2XbVY17E-To,55570
|
|
370
370
|
datahub/ingestion/source/powerbi/m_query/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
371
371
|
datahub/ingestion/source/powerbi/m_query/data_classes.py,sha256=EbaEasEOGZ73jz0cQofH9ez65wSvRBof0R6GQaIVLnM,2009
|
|
372
372
|
datahub/ingestion/source/powerbi/m_query/native_sql_parser.py,sha256=zzKVDGeUM3Yv3-zNah4D6mSnr6jXsstNuLmzczcPQEE,3683
|
|
@@ -376,9 +376,9 @@ datahub/ingestion/source/powerbi/m_query/resolver.py,sha256=t0n1dDYjlzElSJo5ztea
|
|
|
376
376
|
datahub/ingestion/source/powerbi/m_query/tree_function.py,sha256=h77DunhlgOP0fAg8UXDXxxInOi7Pay85_d1Ca4YqyKs,6134
|
|
377
377
|
datahub/ingestion/source/powerbi/m_query/validator.py,sha256=crG-VZy2XPieiDliP9yVMgiFcc8b2xbZyDFEATXqEAQ,1155
|
|
378
378
|
datahub/ingestion/source/powerbi/rest_api_wrapper/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
379
|
-
datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py,sha256=
|
|
380
|
-
datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py,sha256
|
|
381
|
-
datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py,sha256=
|
|
379
|
+
datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py,sha256=4Kr7cLXpsWGtg-M18aXyhij9k9Ll5dGv3EaCS3d2DRk,8590
|
|
380
|
+
datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py,sha256=-EHDvVmr69bP11bFm0bW0Lf1I95lPHU7sdMX1Q70roI,38503
|
|
381
|
+
datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py,sha256=NrhgwREmkWTvlhpEs7dAEEJfOxQRalA02ArKr2LLjeY,27666
|
|
382
382
|
datahub/ingestion/source/powerbi/rest_api_wrapper/profiling_utils.py,sha256=bgcPheyqOj6KdRjDyANDK5yggItglcBIjbGFIwAxSds,1392
|
|
383
383
|
datahub/ingestion/source/powerbi/rest_api_wrapper/query.py,sha256=VNw1Uvli6g0pnu9FpigYmnCdEPbVEipz7vdZU_WmHf4,616
|
|
384
384
|
datahub/ingestion/source/powerbi_report_server/__init__.py,sha256=N9fGcrHXBbuPmx9rpGjd_jkMC3smXmfiwISDP1QZapk,324
|
|
@@ -432,26 +432,26 @@ datahub/ingestion/source/sigma/sigma_api.py,sha256=SVvbUs2vjueUdDa-3FzeMsaX5pNpA
|
|
|
432
432
|
datahub/ingestion/source/slack/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
433
433
|
datahub/ingestion/source/slack/slack.py,sha256=VpLS-6zuQa8hIuHnZhLf8wRdN72Xell3ZMd0kK3A0i8,13188
|
|
434
434
|
datahub/ingestion/source/snowflake/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
435
|
-
datahub/ingestion/source/snowflake/constants.py,sha256=
|
|
435
|
+
datahub/ingestion/source/snowflake/constants.py,sha256=SZzQTO6spPeHfV55tIbg7SL2ecsQF_z169HJ8u2G3Hk,2650
|
|
436
436
|
datahub/ingestion/source/snowflake/oauth_config.py,sha256=ol9D3RmruGStJAeL8PYSQguSqcD2HfkjPkMF2AB_eZs,1277
|
|
437
437
|
datahub/ingestion/source/snowflake/oauth_generator.py,sha256=fu2VnREGuJXeTqIV2jx4TwieVnznf83HQkrE0h2DGGM,3423
|
|
438
438
|
datahub/ingestion/source/snowflake/snowflake_assertion.py,sha256=_l3k4aI9wvioE81xxdeizJn9nJCZ_nMIXgk9N6pEk5o,4803
|
|
439
|
-
datahub/ingestion/source/snowflake/snowflake_config.py,sha256=
|
|
439
|
+
datahub/ingestion/source/snowflake/snowflake_config.py,sha256=c9LE7nl6tkz7P9tc4EkSKphextW5pejLzdP3qS_iL1s,20196
|
|
440
440
|
datahub/ingestion/source/snowflake/snowflake_connection.py,sha256=e9dCARIQtGB8G1cSMRLorCbNLcPUD2g9gBL-LLLKjFE,17793
|
|
441
441
|
datahub/ingestion/source/snowflake/snowflake_data_reader.py,sha256=ffR5E2uhD71FUMXd3XOg2rHwrp1rbbGEFTAbqKcmI2s,2195
|
|
442
442
|
datahub/ingestion/source/snowflake/snowflake_lineage_v2.py,sha256=FBmiONx4EGHWV8RNJT6zHZyntKinPFFyd2oKbTUIbhE,21319
|
|
443
443
|
datahub/ingestion/source/snowflake/snowflake_profiler.py,sha256=0DJiSwII6FY34urlBja2FW66NaVvhbBWmG0p7u8Xyrc,7548
|
|
444
|
-
datahub/ingestion/source/snowflake/snowflake_queries.py,sha256=
|
|
445
|
-
datahub/ingestion/source/snowflake/snowflake_query.py,sha256
|
|
446
|
-
datahub/ingestion/source/snowflake/snowflake_report.py,sha256=
|
|
447
|
-
datahub/ingestion/source/snowflake/snowflake_schema.py,sha256=
|
|
448
|
-
datahub/ingestion/source/snowflake/snowflake_schema_gen.py,sha256=
|
|
444
|
+
datahub/ingestion/source/snowflake/snowflake_queries.py,sha256=M-FBoYeiW91-g3gOUpCTj8cKWHH-wqyFtD5UcewfI2k,28121
|
|
445
|
+
datahub/ingestion/source/snowflake/snowflake_query.py,sha256=Ex9FZZzz02cQis4bV3tzd53Pmf8p3AreuWnv9w95pJ0,39642
|
|
446
|
+
datahub/ingestion/source/snowflake/snowflake_report.py,sha256=ahea-bwpW6T0iDehGo0Qq_J7wKxPkV61aYHm8bGwDqo,6651
|
|
447
|
+
datahub/ingestion/source/snowflake/snowflake_schema.py,sha256=fdDN7jO5aHsmTFYC8cahXRT9BSAoDY72heM_WrkSxXo,25648
|
|
448
|
+
datahub/ingestion/source/snowflake/snowflake_schema_gen.py,sha256=5DUP_uTmbLU01NegFXClqPkpB8LTruIkyIaGUBOTCQw,54718
|
|
449
449
|
datahub/ingestion/source/snowflake/snowflake_shares.py,sha256=maZyFkfrbVogEFM0tTKRiNp9c_1muv6YfleSd3q0umI,6341
|
|
450
450
|
datahub/ingestion/source/snowflake/snowflake_summary.py,sha256=kTmuCtRnvHqM8WBYhWeK4XafJq3ssFL9kcS03jEeWT4,5506
|
|
451
451
|
datahub/ingestion/source/snowflake/snowflake_tag.py,sha256=1eLYTcgmfzDs9xktMTTE74L5SeNP48Qg3uLr9y-Ez3Y,8733
|
|
452
452
|
datahub/ingestion/source/snowflake/snowflake_usage_v2.py,sha256=ySFm7WDk8FW9KjCnX4HQfTqObIrlUS-V8WIHl3j0CTI,24848
|
|
453
|
-
datahub/ingestion/source/snowflake/snowflake_utils.py,sha256=
|
|
454
|
-
datahub/ingestion/source/snowflake/snowflake_v2.py,sha256=
|
|
453
|
+
datahub/ingestion/source/snowflake/snowflake_utils.py,sha256=EmYb2FEcdLwei92atRBQ3iKH7av4YBZCIFTgPmLo0Ng,13092
|
|
454
|
+
datahub/ingestion/source/snowflake/snowflake_v2.py,sha256=kx8aFalByIAfrp0a_kq5iyojzh9vI4od5eVGthAR5RY,33912
|
|
455
455
|
datahub/ingestion/source/sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
456
456
|
datahub/ingestion/source/sql/athena.py,sha256=Uh9wGLOqAkcphffxOPIQNyXvjeRm74XIpaLb4rjqMjM,24045
|
|
457
457
|
datahub/ingestion/source/sql/clickhouse.py,sha256=uSRy-HKAiGFTHVLoVtGoh23X0O1lwyYUaK8BaWkYhps,25555
|
|
@@ -1001,9 +1001,9 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
1001
1001
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
1002
1002
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
1003
1003
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
1004
|
-
acryl_datahub-0.15.0.
|
|
1005
|
-
acryl_datahub-0.15.0.
|
|
1006
|
-
acryl_datahub-0.15.0.
|
|
1007
|
-
acryl_datahub-0.15.0.
|
|
1008
|
-
acryl_datahub-0.15.0.
|
|
1009
|
-
acryl_datahub-0.15.0.
|
|
1004
|
+
acryl_datahub-0.15.0.6rc1.dist-info/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
|
|
1005
|
+
acryl_datahub-0.15.0.6rc1.dist-info/METADATA,sha256=YPtgGtRPaltE8LkNq8PlWsrlzhbE-FQV0wWYkkuPqDc,175375
|
|
1006
|
+
acryl_datahub-0.15.0.6rc1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
1007
|
+
acryl_datahub-0.15.0.6rc1.dist-info/entry_points.txt,sha256=U1e5ZwqPX1OaIbvGrwvozcdB8SbzFYXQM7plpdLKKeo,9592
|
|
1008
|
+
acryl_datahub-0.15.0.6rc1.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
1009
|
+
acryl_datahub-0.15.0.6rc1.dist-info/RECORD,,
|
datahub/_version.py
CHANGED
|
@@ -582,8 +582,11 @@ class Mapper:
|
|
|
582
582
|
if tile.dataset is not None and tile.dataset.webUrl is not None:
|
|
583
583
|
custom_properties[Constant.DATASET_WEB_URL] = tile.dataset.webUrl
|
|
584
584
|
|
|
585
|
-
if tile.
|
|
586
|
-
custom_properties[Constant.REPORT_ID] = tile.
|
|
585
|
+
if tile.report_id is not None:
|
|
586
|
+
custom_properties[Constant.REPORT_ID] = tile.report_id
|
|
587
|
+
|
|
588
|
+
if tile.report is not None and tile.report.webUrl is not None:
|
|
589
|
+
custom_properties[Constant.REPORT_WEB_URL] = tile.report.webUrl
|
|
587
590
|
|
|
588
591
|
return custom_properties
|
|
589
592
|
|
|
@@ -1053,6 +1056,7 @@ class Mapper:
|
|
|
1053
1056
|
report: powerbi_data_classes.Report,
|
|
1054
1057
|
chart_mcps: List[MetadataChangeProposalWrapper],
|
|
1055
1058
|
user_mcps: List[MetadataChangeProposalWrapper],
|
|
1059
|
+
dashboard_edges: List[EdgeClass],
|
|
1056
1060
|
) -> List[MetadataChangeProposalWrapper]:
|
|
1057
1061
|
"""
|
|
1058
1062
|
Map PowerBi report to Datahub dashboard
|
|
@@ -1074,6 +1078,7 @@ class Mapper:
|
|
|
1074
1078
|
charts=chart_urn_list,
|
|
1075
1079
|
lastModified=ChangeAuditStamps(),
|
|
1076
1080
|
dashboardUrl=report.webUrl,
|
|
1081
|
+
dashboards=dashboard_edges,
|
|
1077
1082
|
)
|
|
1078
1083
|
|
|
1079
1084
|
info_mcp = self.new_mcp(
|
|
@@ -1167,8 +1172,28 @@ class Mapper:
|
|
|
1167
1172
|
ds_mcps = self.to_datahub_dataset(report.dataset, workspace)
|
|
1168
1173
|
chart_mcps = self.pages_to_chart(report.pages, workspace, ds_mcps)
|
|
1169
1174
|
|
|
1175
|
+
# find all dashboards with a Tile referencing this report
|
|
1176
|
+
downstream_dashboards_edges = []
|
|
1177
|
+
for d in workspace.dashboards.values():
|
|
1178
|
+
if any(t.report_id == report.id for t in d.tiles):
|
|
1179
|
+
dashboard_urn = builder.make_dashboard_urn(
|
|
1180
|
+
platform=self.__config.platform_name,
|
|
1181
|
+
platform_instance=self.__config.platform_instance,
|
|
1182
|
+
name=d.get_urn_part(),
|
|
1183
|
+
)
|
|
1184
|
+
edge = EdgeClass(
|
|
1185
|
+
destinationUrn=dashboard_urn,
|
|
1186
|
+
sourceUrn=None,
|
|
1187
|
+
created=None,
|
|
1188
|
+
lastModified=None,
|
|
1189
|
+
properties=None,
|
|
1190
|
+
)
|
|
1191
|
+
downstream_dashboards_edges.append(edge)
|
|
1192
|
+
|
|
1170
1193
|
# Let's convert report to datahub dashboard
|
|
1171
|
-
report_mcps = self.report_to_dashboard(
|
|
1194
|
+
report_mcps = self.report_to_dashboard(
|
|
1195
|
+
workspace, report, chart_mcps, user_mcps, downstream_dashboards_edges
|
|
1196
|
+
)
|
|
1172
1197
|
|
|
1173
1198
|
# Now add MCPs in sequence
|
|
1174
1199
|
mcps.extend(ds_mcps)
|
|
@@ -286,11 +286,15 @@ class Tile:
|
|
|
286
286
|
id: str
|
|
287
287
|
title: str
|
|
288
288
|
embedUrl: str
|
|
289
|
-
dataset: Optional["PowerBIDataset"]
|
|
290
289
|
dataset_id: Optional[str]
|
|
291
|
-
|
|
290
|
+
report_id: Optional[str]
|
|
292
291
|
createdFrom: CreatedFrom
|
|
293
292
|
|
|
293
|
+
# In a first pass, `dataset_id` and/or `report_id` are filled in.
|
|
294
|
+
# In a subsequent pass, the objects are populated.
|
|
295
|
+
dataset: Optional["PowerBIDataset"]
|
|
296
|
+
report: Optional[Report]
|
|
297
|
+
|
|
294
298
|
def get_urn_part(self):
|
|
295
299
|
return f"charts.{self.id}"
|
|
296
300
|
|
|
@@ -337,41 +337,6 @@ class DataResolverBase(ABC):
|
|
|
337
337
|
-tiles), there is no information available on pagination
|
|
338
338
|
|
|
339
339
|
"""
|
|
340
|
-
|
|
341
|
-
def new_dataset_or_report(tile_instance: Any) -> dict:
|
|
342
|
-
"""
|
|
343
|
-
Find out which is the data source for tile. It is either REPORT or DATASET
|
|
344
|
-
"""
|
|
345
|
-
report_fields = {
|
|
346
|
-
Constant.REPORT: (
|
|
347
|
-
self.get_report(
|
|
348
|
-
workspace=workspace,
|
|
349
|
-
report_id=tile_instance.get(Constant.REPORT_ID),
|
|
350
|
-
)
|
|
351
|
-
if tile_instance.get(Constant.REPORT_ID) is not None
|
|
352
|
-
else None
|
|
353
|
-
),
|
|
354
|
-
Constant.CREATED_FROM: Tile.CreatedFrom.UNKNOWN,
|
|
355
|
-
}
|
|
356
|
-
|
|
357
|
-
# reportId and datasetId are exclusive in tile_instance
|
|
358
|
-
# if datasetId is present that means tile is created from dataset
|
|
359
|
-
# if reportId is present that means tile is created from report
|
|
360
|
-
# if both i.e. reportId and datasetId are not present then tile is created from some visualization
|
|
361
|
-
if tile_instance.get(Constant.REPORT_ID) is not None:
|
|
362
|
-
report_fields[Constant.CREATED_FROM] = Tile.CreatedFrom.REPORT
|
|
363
|
-
elif tile_instance.get(Constant.DATASET_ID) is not None:
|
|
364
|
-
report_fields[Constant.CREATED_FROM] = Tile.CreatedFrom.DATASET
|
|
365
|
-
else:
|
|
366
|
-
report_fields[Constant.CREATED_FROM] = Tile.CreatedFrom.VISUALIZATION
|
|
367
|
-
|
|
368
|
-
title: Optional[str] = tile_instance.get(Constant.TITLE)
|
|
369
|
-
_id: Optional[str] = tile_instance.get(Constant.ID)
|
|
370
|
-
created_from: Any = report_fields[Constant.CREATED_FROM]
|
|
371
|
-
logger.info(f"Tile {title}({_id}) is created from {created_from}")
|
|
372
|
-
|
|
373
|
-
return report_fields
|
|
374
|
-
|
|
375
340
|
tile_list_endpoint: str = self.get_tiles_endpoint(
|
|
376
341
|
workspace, dashboard_id=dashboard.id
|
|
377
342
|
)
|
|
@@ -393,8 +358,18 @@ class DataResolverBase(ABC):
|
|
|
393
358
|
title=instance.get(Constant.TITLE),
|
|
394
359
|
embedUrl=instance.get(Constant.EMBED_URL),
|
|
395
360
|
dataset_id=instance.get(Constant.DATASET_ID),
|
|
361
|
+
report_id=instance.get(Constant.REPORT_ID),
|
|
396
362
|
dataset=None,
|
|
397
|
-
|
|
363
|
+
report=None,
|
|
364
|
+
createdFrom=(
|
|
365
|
+
# In the past we considered that only one of the two report_id or dataset_id would be present
|
|
366
|
+
# but we have seen cases where both are present. If both are present, we prioritize the report.
|
|
367
|
+
Tile.CreatedFrom.REPORT
|
|
368
|
+
if instance.get(Constant.REPORT_ID)
|
|
369
|
+
else Tile.CreatedFrom.DATASET
|
|
370
|
+
if instance.get(Constant.DATASET_ID)
|
|
371
|
+
else Tile.CreatedFrom.VISUALIZATION
|
|
372
|
+
),
|
|
398
373
|
)
|
|
399
374
|
for instance in tile_dict
|
|
400
375
|
if instance is not None
|
|
@@ -625,13 +625,26 @@ class PowerBiAPI:
|
|
|
625
625
|
dashboard.tiles = self._get_resolver().get_tiles(
|
|
626
626
|
workspace, dashboard=dashboard
|
|
627
627
|
)
|
|
628
|
-
# set the dataset for tiles
|
|
628
|
+
# set the dataset and the report for tiles
|
|
629
629
|
for tile in dashboard.tiles:
|
|
630
|
+
# In Power BI, dashboards, reports, and datasets are tightly scoped to the workspace they belong to.
|
|
631
|
+
# https://learn.microsoft.com/en-us/power-bi/collaborate-share/service-new-workspaces
|
|
632
|
+
if tile.report_id:
|
|
633
|
+
tile.report = workspace.reports.get(tile.report_id)
|
|
634
|
+
if tile.report is None:
|
|
635
|
+
self.reporter.info(
|
|
636
|
+
title="Missing Report Lineage For Tile",
|
|
637
|
+
message="A Report reference that failed to be resolved. Please ensure that 'extract_reports' is set to True in the configuration.",
|
|
638
|
+
context=f"workspace-name: {workspace.name}, tile-name: {tile.title}, report-id: {tile.report_id}",
|
|
639
|
+
)
|
|
640
|
+
# However, semantic models (aka datasets) can be shared accross workspaces
|
|
641
|
+
# https://learn.microsoft.com/en-us/fabric/admin/portal-workspace#use-semantic-models-across-workspaces
|
|
642
|
+
# That's why the global 'dataset_registry' is required
|
|
630
643
|
if tile.dataset_id:
|
|
631
644
|
tile.dataset = self.dataset_registry.get(tile.dataset_id)
|
|
632
645
|
if tile.dataset is None:
|
|
633
646
|
self.reporter.info(
|
|
634
|
-
title="Missing Lineage For Tile",
|
|
647
|
+
title="Missing Dataset Lineage For Tile",
|
|
635
648
|
message="A cross-workspace reference that failed to be resolved. Please ensure that no global workspace is being filtered out due to the workspace_id_pattern.",
|
|
636
649
|
context=f"workspace-name: {workspace.name}, tile-name: {tile.title}, dataset-id: {tile.dataset_id}",
|
|
637
650
|
)
|
|
@@ -653,10 +666,10 @@ class PowerBiAPI:
|
|
|
653
666
|
for dashboard in workspace.dashboards.values():
|
|
654
667
|
dashboard.tags = workspace.dashboard_endorsements.get(dashboard.id, [])
|
|
655
668
|
|
|
669
|
+
# fill reports first since some dashboard may reference a report
|
|
670
|
+
fill_reports()
|
|
656
671
|
if self.__config.extract_dashboards:
|
|
657
672
|
fill_dashboards()
|
|
658
|
-
|
|
659
|
-
fill_reports()
|
|
660
673
|
fill_dashboard_tags()
|
|
661
674
|
self._fill_independent_datasets(workspace=workspace)
|
|
662
675
|
|
|
@@ -98,6 +98,11 @@ class SnowflakeFilterConfig(SQLFilterConfig):
|
|
|
98
98
|
)
|
|
99
99
|
# table_pattern and view_pattern are inherited from SQLFilterConfig
|
|
100
100
|
|
|
101
|
+
stream_pattern: AllowDenyPattern = Field(
|
|
102
|
+
default=AllowDenyPattern.allow_all(),
|
|
103
|
+
description="Regex patterns for streams to filter in ingestion. Note: Defaults to table_pattern if not specified. Specify regex to match the entire view name in database.schema.view format. e.g. to match all views starting with customer in Customer database and public schema, use the regex 'Customer.public.customer.*'",
|
|
104
|
+
)
|
|
105
|
+
|
|
101
106
|
match_fully_qualified_names: bool = Field(
|
|
102
107
|
default=False,
|
|
103
108
|
description="Whether `schema_pattern` is matched against fully qualified schema name `<catalog>.<schema>`.",
|
|
@@ -274,6 +279,11 @@ class SnowflakeV2Config(
|
|
|
274
279
|
description="List of regex patterns for tags to include in ingestion. Only used if `extract_tags` is enabled.",
|
|
275
280
|
)
|
|
276
281
|
|
|
282
|
+
include_streams: bool = Field(
|
|
283
|
+
default=True,
|
|
284
|
+
description="If enabled, streams will be ingested as separate entities from tables/views.",
|
|
285
|
+
)
|
|
286
|
+
|
|
277
287
|
structured_property_pattern: AllowDenyPattern = Field(
|
|
278
288
|
default=AllowDenyPattern.allow_all(),
|
|
279
289
|
description=(
|
|
@@ -49,6 +49,7 @@ from datahub.metadata.urns import CorpUserUrn
|
|
|
49
49
|
from datahub.sql_parsing.schema_resolver import SchemaResolver
|
|
50
50
|
from datahub.sql_parsing.sql_parsing_aggregator import (
|
|
51
51
|
KnownLineageMapping,
|
|
52
|
+
ObservedQuery,
|
|
52
53
|
PreparsedQuery,
|
|
53
54
|
SqlAggregatorReport,
|
|
54
55
|
SqlParsingAggregator,
|
|
@@ -241,7 +242,13 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
|
|
|
241
242
|
use_cached_audit_log = audit_log_file.exists()
|
|
242
243
|
|
|
243
244
|
queries: FileBackedList[
|
|
244
|
-
Union[
|
|
245
|
+
Union[
|
|
246
|
+
KnownLineageMapping,
|
|
247
|
+
PreparsedQuery,
|
|
248
|
+
TableRename,
|
|
249
|
+
TableSwap,
|
|
250
|
+
ObservedQuery,
|
|
251
|
+
]
|
|
245
252
|
]
|
|
246
253
|
if use_cached_audit_log:
|
|
247
254
|
logger.info("Using cached audit log")
|
|
@@ -252,7 +259,13 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
|
|
|
252
259
|
|
|
253
260
|
shared_connection = ConnectionWrapper(audit_log_file)
|
|
254
261
|
queries = FileBackedList(shared_connection)
|
|
255
|
-
entry: Union[
|
|
262
|
+
entry: Union[
|
|
263
|
+
KnownLineageMapping,
|
|
264
|
+
PreparsedQuery,
|
|
265
|
+
TableRename,
|
|
266
|
+
TableSwap,
|
|
267
|
+
ObservedQuery,
|
|
268
|
+
]
|
|
256
269
|
|
|
257
270
|
with self.report.copy_history_fetch_timer:
|
|
258
271
|
for entry in self.fetch_copy_history():
|
|
@@ -329,7 +342,7 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
|
|
|
329
342
|
|
|
330
343
|
def fetch_query_log(
|
|
331
344
|
self, users: UsersMapping
|
|
332
|
-
) -> Iterable[Union[PreparsedQuery, TableRename, TableSwap]]:
|
|
345
|
+
) -> Iterable[Union[PreparsedQuery, TableRename, TableSwap, ObservedQuery]]:
|
|
333
346
|
query_log_query = _build_enriched_query_log_query(
|
|
334
347
|
start_time=self.config.window.start_time,
|
|
335
348
|
end_time=self.config.window.end_time,
|
|
@@ -362,7 +375,7 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
|
|
|
362
375
|
|
|
363
376
|
def _parse_audit_log_row(
|
|
364
377
|
self, row: Dict[str, Any], users: UsersMapping
|
|
365
|
-
) -> Optional[Union[TableRename, TableSwap, PreparsedQuery]]:
|
|
378
|
+
) -> Optional[Union[TableRename, TableSwap, PreparsedQuery, ObservedQuery]]:
|
|
366
379
|
json_fields = {
|
|
367
380
|
"DIRECT_OBJECTS_ACCESSED",
|
|
368
381
|
"OBJECTS_MODIFIED",
|
|
@@ -398,6 +411,34 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
|
|
|
398
411
|
pass
|
|
399
412
|
else:
|
|
400
413
|
return None
|
|
414
|
+
|
|
415
|
+
user = CorpUserUrn(
|
|
416
|
+
self.identifiers.get_user_identifier(
|
|
417
|
+
res["user_name"], users.get(res["user_name"])
|
|
418
|
+
)
|
|
419
|
+
)
|
|
420
|
+
|
|
421
|
+
# Use direct_objects_accessed instead objects_modified
|
|
422
|
+
# objects_modified returns $SYS_VIEW_X with no mapping
|
|
423
|
+
has_stream_objects = any(
|
|
424
|
+
obj.get("objectDomain") == "Stream" for obj in direct_objects_accessed
|
|
425
|
+
)
|
|
426
|
+
|
|
427
|
+
# If a stream is used, default to query parsing.
|
|
428
|
+
if has_stream_objects:
|
|
429
|
+
logger.debug("Found matching stream object")
|
|
430
|
+
return ObservedQuery(
|
|
431
|
+
query=res["query_text"],
|
|
432
|
+
session_id=res["session_id"],
|
|
433
|
+
timestamp=res["query_start_time"].astimezone(timezone.utc),
|
|
434
|
+
user=user,
|
|
435
|
+
default_db=res["default_db"],
|
|
436
|
+
default_schema=res["default_schema"],
|
|
437
|
+
query_hash=get_query_fingerprint(
|
|
438
|
+
res["query_text"], self.identifiers.platform, fast=True
|
|
439
|
+
),
|
|
440
|
+
)
|
|
441
|
+
|
|
401
442
|
upstreams = []
|
|
402
443
|
column_usage = {}
|
|
403
444
|
|
|
@@ -460,12 +501,6 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
|
|
|
460
501
|
)
|
|
461
502
|
)
|
|
462
503
|
|
|
463
|
-
user = CorpUserUrn(
|
|
464
|
-
self.identifiers.get_user_identifier(
|
|
465
|
-
res["user_name"], users.get(res["user_name"])
|
|
466
|
-
)
|
|
467
|
-
)
|
|
468
|
-
|
|
469
504
|
timestamp: datetime = res["query_start_time"]
|
|
470
505
|
timestamp = timestamp.astimezone(timezone.utc)
|
|
471
506
|
|
|
@@ -9,6 +9,7 @@ from datahub.ingestion.source.snowflake.snowflake_config import (
|
|
|
9
9
|
from datahub.utilities.prefix_batch_builder import PrefixGroup
|
|
10
10
|
|
|
11
11
|
SHOW_VIEWS_MAX_PAGE_SIZE = 10000
|
|
12
|
+
SHOW_STREAM_MAX_PAGE_SIZE = 10000
|
|
12
13
|
|
|
13
14
|
|
|
14
15
|
def create_deny_regex_sql_filter(
|
|
@@ -36,6 +37,7 @@ class SnowflakeQuery:
|
|
|
36
37
|
SnowflakeObjectDomain.VIEW.capitalize(),
|
|
37
38
|
SnowflakeObjectDomain.MATERIALIZED_VIEW.capitalize(),
|
|
38
39
|
SnowflakeObjectDomain.ICEBERG_TABLE.capitalize(),
|
|
40
|
+
SnowflakeObjectDomain.STREAM.capitalize(),
|
|
39
41
|
}
|
|
40
42
|
|
|
41
43
|
ACCESS_HISTORY_TABLE_VIEW_DOMAINS_FILTER = "({})".format(
|
|
@@ -44,7 +46,8 @@ class SnowflakeQuery:
|
|
|
44
46
|
ACCESS_HISTORY_TABLE_DOMAINS_FILTER = (
|
|
45
47
|
"("
|
|
46
48
|
f"'{SnowflakeObjectDomain.TABLE.capitalize()}',"
|
|
47
|
-
f"'{SnowflakeObjectDomain.VIEW.capitalize()}'"
|
|
49
|
+
f"'{SnowflakeObjectDomain.VIEW.capitalize()}',"
|
|
50
|
+
f"'{SnowflakeObjectDomain.STREAM.capitalize()}',"
|
|
48
51
|
")"
|
|
49
52
|
)
|
|
50
53
|
|
|
@@ -963,3 +966,19 @@ WHERE table_schema='{schema_name}' AND {extra_clause}"""
|
|
|
963
966
|
@staticmethod
|
|
964
967
|
def get_all_users() -> str:
|
|
965
968
|
return """SELECT name as "NAME", email as "EMAIL" FROM SNOWFLAKE.ACCOUNT_USAGE.USERS"""
|
|
969
|
+
|
|
970
|
+
@staticmethod
|
|
971
|
+
def streams_for_database(
|
|
972
|
+
db_name: str,
|
|
973
|
+
limit: int = SHOW_STREAM_MAX_PAGE_SIZE,
|
|
974
|
+
stream_pagination_marker: Optional[str] = None,
|
|
975
|
+
) -> str:
|
|
976
|
+
# SHOW STREAMS can return a maximum of 10000 rows.
|
|
977
|
+
# https://docs.snowflake.com/en/sql-reference/sql/show-streams#usage-notes
|
|
978
|
+
assert limit <= SHOW_STREAM_MAX_PAGE_SIZE
|
|
979
|
+
|
|
980
|
+
# To work around this, we paginate through the results using the FROM clause.
|
|
981
|
+
from_clause = (
|
|
982
|
+
f"""FROM '{stream_pagination_marker}'""" if stream_pagination_marker else ""
|
|
983
|
+
)
|
|
984
|
+
return f"""SHOW STREAMS IN DATABASE {db_name} LIMIT {limit} {from_clause};"""
|
|
@@ -104,6 +104,7 @@ class SnowflakeV2Report(
|
|
|
104
104
|
schemas_scanned: int = 0
|
|
105
105
|
databases_scanned: int = 0
|
|
106
106
|
tags_scanned: int = 0
|
|
107
|
+
streams_scanned: int = 0
|
|
107
108
|
|
|
108
109
|
include_usage_stats: bool = False
|
|
109
110
|
include_operational_stats: bool = False
|
|
@@ -113,6 +114,7 @@ class SnowflakeV2Report(
|
|
|
113
114
|
table_lineage_query_secs: float = -1
|
|
114
115
|
external_lineage_queries_secs: float = -1
|
|
115
116
|
num_tables_with_known_upstreams: int = 0
|
|
117
|
+
num_streams_with_known_upstreams: int = 0
|
|
116
118
|
num_upstream_lineage_edge_parsing_failed: int = 0
|
|
117
119
|
num_secure_views_missing_definition: int = 0
|
|
118
120
|
num_structured_property_templates_created: int = 0
|
|
@@ -131,6 +133,8 @@ class SnowflakeV2Report(
|
|
|
131
133
|
num_get_tags_for_object_queries: int = 0
|
|
132
134
|
num_get_tags_on_columns_for_table_queries: int = 0
|
|
133
135
|
|
|
136
|
+
num_get_streams_for_schema_queries: int = 0
|
|
137
|
+
|
|
134
138
|
rows_zero_objects_modified: int = 0
|
|
135
139
|
|
|
136
140
|
_processed_tags: MutableSet[str] = field(default_factory=set)
|
|
@@ -157,6 +161,8 @@ class SnowflakeV2Report(
|
|
|
157
161
|
return
|
|
158
162
|
self._scanned_tags.add(name)
|
|
159
163
|
self.tags_scanned += 1
|
|
164
|
+
elif ent_type == "stream":
|
|
165
|
+
self.streams_scanned += 1
|
|
160
166
|
else:
|
|
161
167
|
raise KeyError(f"Unknown entity {ent_type}.")
|
|
162
168
|
|