semantic-link-labs 0.8.11__py3-none-any.whl → 0.9.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of semantic-link-labs might be problematic. Click here for more details.
- {semantic_link_labs-0.8.11.dist-info → semantic_link_labs-0.9.1.dist-info}/METADATA +9 -6
- {semantic_link_labs-0.8.11.dist-info → semantic_link_labs-0.9.1.dist-info}/RECORD +40 -40
- {semantic_link_labs-0.8.11.dist-info → semantic_link_labs-0.9.1.dist-info}/WHEEL +1 -1
- sempy_labs/__init__.py +29 -2
- sempy_labs/_authentication.py +78 -4
- sempy_labs/_capacities.py +770 -200
- sempy_labs/_capacity_migration.py +7 -37
- sempy_labs/_clear_cache.py +8 -8
- sempy_labs/_deployment_pipelines.py +1 -1
- sempy_labs/_gateways.py +2 -0
- sempy_labs/_generate_semantic_model.py +8 -0
- sempy_labs/_helper_functions.py +119 -79
- sempy_labs/_job_scheduler.py +138 -3
- sempy_labs/_list_functions.py +40 -31
- sempy_labs/_model_bpa.py +207 -204
- sempy_labs/_model_bpa_bulk.py +2 -2
- sempy_labs/_model_bpa_rules.py +3 -3
- sempy_labs/_notebooks.py +2 -0
- sempy_labs/_query_scale_out.py +8 -0
- sempy_labs/_sql.py +11 -7
- sempy_labs/_vertipaq.py +4 -2
- sempy_labs/_warehouses.py +6 -6
- sempy_labs/admin/_basic_functions.py +156 -103
- sempy_labs/admin/_domains.py +7 -2
- sempy_labs/admin/_git.py +4 -1
- sempy_labs/admin/_items.py +7 -2
- sempy_labs/admin/_scanner.py +7 -4
- sempy_labs/directlake/_directlake_schema_compare.py +7 -2
- sempy_labs/directlake/_directlake_schema_sync.py +6 -0
- sempy_labs/directlake/_dl_helper.py +51 -31
- sempy_labs/directlake/_get_directlake_lakehouse.py +20 -27
- sempy_labs/directlake/_update_directlake_partition_entity.py +5 -0
- sempy_labs/lakehouse/_get_lakehouse_columns.py +17 -22
- sempy_labs/lakehouse/_get_lakehouse_tables.py +20 -32
- sempy_labs/lakehouse/_lakehouse.py +2 -19
- sempy_labs/report/_generate_report.py +45 -0
- sempy_labs/report/_report_bpa.py +2 -2
- sempy_labs/tom/_model.py +97 -16
- {semantic_link_labs-0.8.11.dist-info → semantic_link_labs-0.9.1.dist-info}/LICENSE +0 -0
- {semantic_link_labs-0.8.11.dist-info → semantic_link_labs-0.9.1.dist-info}/top_level.txt +0 -0
sempy_labs/admin/_scanner.py
CHANGED
|
@@ -6,6 +6,7 @@ import numpy as np
|
|
|
6
6
|
import time
|
|
7
7
|
from sempy_labs.admin._basic_functions import list_workspaces
|
|
8
8
|
from sempy._utils._log import log
|
|
9
|
+
import sempy_labs._authentication as auth
|
|
9
10
|
|
|
10
11
|
|
|
11
12
|
@log
|
|
@@ -25,6 +26,8 @@ def scan_workspaces(
|
|
|
25
26
|
`Admin - WorkspaceInfo GetScanStatus <https://learn.microsoft.com/rest/api/power-bi/admin/workspace-info-get-scan-status>`_.
|
|
26
27
|
`Admin - WorkspaceInfo GetScanResult <https://learn.microsoft.com/rest/api/power-bi/admin/workspace-info-get-scan-result>`_.
|
|
27
28
|
|
|
29
|
+
Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
|
|
30
|
+
|
|
28
31
|
Parameters
|
|
29
32
|
----------
|
|
30
33
|
data_source_details : bool, default=False
|
|
@@ -32,11 +35,11 @@ def scan_workspaces(
|
|
|
32
35
|
dataset_schema: bool = False
|
|
33
36
|
Whether to return dataset schema (tables, columns and measures). If you set this parameter to true, you must fully enable metadata scanning in order for data to be returned. For more information, see Enable tenant settings for metadata scanning.
|
|
34
37
|
dataset_expressions : bool, default=False
|
|
35
|
-
Whether to return data source details
|
|
38
|
+
Whether to return data source details.
|
|
36
39
|
lineage : bool, default=False
|
|
37
|
-
Whether to return lineage info (upstream dataflows, tiles, data source IDs)
|
|
40
|
+
Whether to return lineage info (upstream dataflows, tiles, data source IDs).
|
|
38
41
|
artifact_users : bool, default=False
|
|
39
|
-
Whether to return user details for a Power BI item (such as a report or a dashboard)
|
|
42
|
+
Whether to return user details for a Power BI item (such as a report or a dashboard).
|
|
40
43
|
workspace : str | List[str] | uuid.UUID | List[uuid.UUID], default=None
|
|
41
44
|
The required workspace name(s) or id(s) to be scanned
|
|
42
45
|
|
|
@@ -51,7 +54,7 @@ def scan_workspaces(
|
|
|
51
54
|
"misconfiguredDatasourceInstances": [],
|
|
52
55
|
}
|
|
53
56
|
|
|
54
|
-
client = fabric.FabricRestClient()
|
|
57
|
+
client = fabric.FabricRestClient(token_provider=auth.token_provider.get())
|
|
55
58
|
|
|
56
59
|
if workspace is None:
|
|
57
60
|
workspace = fabric.resolve_workspace_name()
|
|
@@ -50,7 +50,6 @@ def direct_lake_schema_compare(
|
|
|
50
50
|
artifact_type, lakehouse_name, lakehouse_id, lakehouse_workspace_id = (
|
|
51
51
|
get_direct_lake_source(dataset=dataset_id, workspace=workspace_id)
|
|
52
52
|
)
|
|
53
|
-
lakehouse_workspace = fabric.resolve_workspace_name(lakehouse_workspace_id)
|
|
54
53
|
|
|
55
54
|
if artifact_type == "Warehouse":
|
|
56
55
|
raise ValueError(
|
|
@@ -59,11 +58,17 @@ def direct_lake_schema_compare(
|
|
|
59
58
|
|
|
60
59
|
dfP = fabric.list_partitions(dataset=dataset_id, workspace=workspace_id)
|
|
61
60
|
|
|
62
|
-
if not any(r["Mode"] == "DirectLake" for
|
|
61
|
+
if not any(r["Mode"] == "DirectLake" for _, r in dfP.iterrows()):
|
|
63
62
|
raise ValueError(
|
|
64
63
|
f"{icons.red_dot} The '{dataset_name}' semantic model within the '{workspace_name}' workspace is not in Direct Lake mode."
|
|
65
64
|
)
|
|
66
65
|
|
|
66
|
+
if artifact_type is None:
|
|
67
|
+
raise ValueError(
|
|
68
|
+
f"{icons.red_dot} This function only supports Direct Lake semantic models where the source lakehouse resides in the same workpace as the semantic model."
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
lakehouse_workspace = fabric.resolve_workspace_name(lakehouse_workspace_id)
|
|
67
72
|
dfT = fabric.list_tables(dataset=dataset_id, workspace=workspace_id)
|
|
68
73
|
dfC = fabric.list_columns(dataset=dataset_id, workspace=workspace_id)
|
|
69
74
|
lc = get_lakehouse_columns(lakehouse_name, lakehouse_workspace)
|
|
@@ -61,6 +61,12 @@ def direct_lake_schema_sync(
|
|
|
61
61
|
raise ValueError(
|
|
62
62
|
f"{icons.red_dot} This function is only valid for Direct Lake semantic models which source from Fabric lakehouses (not warehouses)."
|
|
63
63
|
)
|
|
64
|
+
|
|
65
|
+
if artifact_type is None:
|
|
66
|
+
raise ValueError(
|
|
67
|
+
f"{icons.red_dot} This function only supports Direct Lake semantic models where the source lakehouse resides in the same workpace as the semantic model."
|
|
68
|
+
)
|
|
69
|
+
|
|
64
70
|
lakehouse_workspace = fabric.resolve_workspace_name(lakehouse_workspace_id)
|
|
65
71
|
|
|
66
72
|
lc = get_lakehouse_columns(lakehouse_name, lakehouse_workspace)
|
|
@@ -205,7 +205,7 @@ def get_direct_lake_source(
|
|
|
205
205
|
dataset: str | UUID, workspace: Optional[str | UUID] = None
|
|
206
206
|
) -> Tuple[str, str, UUID, UUID]:
|
|
207
207
|
"""
|
|
208
|
-
Obtains the source information for a direct lake semantic model.
|
|
208
|
+
Obtains the source information for a direct lake semantic model (if the source is located in the same workspace as the semantic model).
|
|
209
209
|
|
|
210
210
|
Parameters
|
|
211
211
|
----------
|
|
@@ -224,34 +224,54 @@ def get_direct_lake_source(
|
|
|
224
224
|
If the semantic model is not a Direct Lake semantic model, it will return None, None, None.
|
|
225
225
|
"""
|
|
226
226
|
|
|
227
|
-
|
|
228
|
-
(dataset_name, dataset_id) = resolve_dataset_name_and_id(dataset, workspace_id)
|
|
229
|
-
client = fabric.PowerBIRestClient()
|
|
230
|
-
request_body = {
|
|
231
|
-
"artifacts": [
|
|
232
|
-
{
|
|
233
|
-
"objectId": dataset_id,
|
|
234
|
-
"type": "dataset",
|
|
235
|
-
}
|
|
236
|
-
]
|
|
237
|
-
}
|
|
238
|
-
response = client.post(
|
|
239
|
-
"metadata/relations/upstream?apiVersion=3", json=request_body
|
|
240
|
-
)
|
|
241
|
-
|
|
242
|
-
artifacts = response.json().get("artifacts", [])
|
|
243
|
-
sql_id, sql_object_name, sql_workspace_id, artifact_type = None, None, None, None
|
|
227
|
+
from sempy_labs._helper_functions import get_direct_lake_sql_endpoint
|
|
244
228
|
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
229
|
+
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
230
|
+
sql_endpoint_id = get_direct_lake_sql_endpoint(dataset=dataset, workspace=workspace)
|
|
231
|
+
dfI = fabric.list_items(workspace=workspace)
|
|
232
|
+
dfI_filt = dfI[(dfI["Id"] == sql_endpoint_id) & (dfI["Type"] == "SQLEndpoint")]
|
|
233
|
+
|
|
234
|
+
artifact_type, artifact_name, artifact_id = None, None, None
|
|
235
|
+
|
|
236
|
+
if not dfI_filt.empty:
|
|
237
|
+
artifact_name = dfI_filt["Display Name"].iloc[0]
|
|
238
|
+
artifact_id = dfI[
|
|
239
|
+
(dfI["Display Name"] == artifact_name)
|
|
240
|
+
& (dfI["Type"].isin(["Lakehouse", "Warehouse"]))
|
|
241
|
+
]["Id"].iloc[0]
|
|
242
|
+
artifact_type = dfI[
|
|
243
|
+
(dfI["Display Name"] == artifact_name)
|
|
244
|
+
& (dfI["Type"].isin(["Lakehouse", "Warehouse"]))
|
|
245
|
+
]["Type"].iloc[0]
|
|
246
|
+
|
|
247
|
+
return artifact_type, artifact_name, artifact_id, workspace_id
|
|
248
|
+
|
|
249
|
+
# client = fabric.PowerBIRestClient()
|
|
250
|
+
# request_body = {
|
|
251
|
+
# "artifacts": [
|
|
252
|
+
# {
|
|
253
|
+
# "objectId": dataset_id,
|
|
254
|
+
# "type": "dataset",
|
|
255
|
+
# }
|
|
256
|
+
# ]
|
|
257
|
+
# }
|
|
258
|
+
# response = client.post(
|
|
259
|
+
# "metadata/relations/upstream?apiVersion=3", json=request_body
|
|
260
|
+
# )
|
|
261
|
+
|
|
262
|
+
# artifacts = response.json().get("artifacts", [])
|
|
263
|
+
# sql_id, sql_object_name, sql_workspace_id, artifact_type = None, None, None, None
|
|
264
|
+
|
|
265
|
+
# for artifact in artifacts:
|
|
266
|
+
# object_type = artifact.get("typeName")
|
|
267
|
+
# display_name = artifact.get("displayName")
|
|
268
|
+
# if object_type in ["Datawarehouse", "Lakewarehouse"]:
|
|
269
|
+
# artifact_type = (
|
|
270
|
+
# "Warehouse" if object_type == "Datawarehouse" else "Lakehouse"
|
|
271
|
+
# )
|
|
272
|
+
# sql_id = artifact.get("objectId")
|
|
273
|
+
# sql_workspace_id = artifact.get("workspace", {}).get("objectId")
|
|
274
|
+
# sql_object_name = display_name
|
|
275
|
+
# break
|
|
276
|
+
|
|
277
|
+
# return artifact_type, sql_object_name, sql_id, sql_workspace_id
|
|
@@ -8,6 +8,7 @@ from sempy_labs._helper_functions import (
|
|
|
8
8
|
)
|
|
9
9
|
from typing import Optional, Tuple
|
|
10
10
|
from uuid import UUID
|
|
11
|
+
import sempy_labs._icons as icons
|
|
11
12
|
|
|
12
13
|
|
|
13
14
|
def get_direct_lake_lakehouse(
|
|
@@ -41,30 +42,22 @@ def get_direct_lake_lakehouse(
|
|
|
41
42
|
The lakehouse name and lakehouse ID.
|
|
42
43
|
"""
|
|
43
44
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
if
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
dfI = fabric.list_items(workspace=lakehouse_workspace, type="SQLEndpoint")
|
|
65
|
-
dfI_filt = dfI[dfI["Id"] == sqlEndpointId]
|
|
66
|
-
lakehouseName = dfI_filt["Display Name"].iloc[0]
|
|
67
|
-
|
|
68
|
-
lakehouseId = resolve_lakehouse_id(lakehouseName, lakehouse_workspace)
|
|
69
|
-
|
|
70
|
-
return lakehouseName, lakehouseId
|
|
45
|
+
from sempy_labs.directlake._dl_helper import get_direct_lake_source
|
|
46
|
+
|
|
47
|
+
artifact_type, artifact_name, artifact_id, workspace_id = get_direct_lake_source(
|
|
48
|
+
dataset=dataset, workspace=workspace
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
if artifact_type in ["Lakehouse", "Warehouse"]:
|
|
52
|
+
return artifact_name, artifact_id
|
|
53
|
+
else:
|
|
54
|
+
dfP = fabric.list_partitions(dataset=dataset, workspace=workspace)
|
|
55
|
+
dfP_filt = dfP[dfP["Mode"] == "DirectLake"]
|
|
56
|
+
if dfP_filt.empty:
|
|
57
|
+
raise ValueError(
|
|
58
|
+
f"{icons.red_dot} The '{dataset}' semantic model within the '{workspace}' workspace is not in Direct Lake mode."
|
|
59
|
+
)
|
|
60
|
+
lakehouse_id = resolve_lakehouse_id(
|
|
61
|
+
lakehouse=lakehouse, workspace=lakehouse_workspace
|
|
62
|
+
)
|
|
63
|
+
return lakehouse, lakehouse_id
|
|
@@ -125,6 +125,11 @@ def add_table_to_direct_lake_semantic_model(
|
|
|
125
125
|
f"{icons.red_dot} This function is only valid for Direct Lake semantic models which source from Fabric lakehouses (not warehouses)."
|
|
126
126
|
)
|
|
127
127
|
|
|
128
|
+
if artifact_type is None:
|
|
129
|
+
raise ValueError(
|
|
130
|
+
f"{icons.red_dot} This function only supports Direct Lake semantic models where the source lakehouse resides in the same workpace as the semantic model."
|
|
131
|
+
)
|
|
132
|
+
|
|
128
133
|
lakehouse_workspace = fabric.resolve_workspace_name(lakehouse_workspace_id)
|
|
129
134
|
|
|
130
135
|
with connect_semantic_model(
|
|
@@ -1,11 +1,9 @@
|
|
|
1
|
-
import sempy.fabric as fabric
|
|
2
1
|
import pandas as pd
|
|
3
2
|
from pyspark.sql import SparkSession
|
|
4
3
|
from sempy_labs._helper_functions import (
|
|
5
|
-
resolve_lakehouse_name,
|
|
6
4
|
format_dax_object_name,
|
|
7
|
-
resolve_lakehouse_id,
|
|
8
5
|
resolve_workspace_name_and_id,
|
|
6
|
+
resolve_lakehouse_name_and_id,
|
|
9
7
|
)
|
|
10
8
|
from typing import Optional
|
|
11
9
|
from sempy._utils._log import log
|
|
@@ -14,15 +12,15 @@ from uuid import UUID
|
|
|
14
12
|
|
|
15
13
|
@log
|
|
16
14
|
def get_lakehouse_columns(
|
|
17
|
-
lakehouse: Optional[str] = None, workspace: Optional[str | UUID] = None
|
|
15
|
+
lakehouse: Optional[str | UUID] = None, workspace: Optional[str | UUID] = None
|
|
18
16
|
) -> pd.DataFrame:
|
|
19
17
|
"""
|
|
20
18
|
Shows the tables and columns of a lakehouse and their respective properties.
|
|
21
19
|
|
|
22
20
|
Parameters
|
|
23
21
|
----------
|
|
24
|
-
lakehouse : str, default=None
|
|
25
|
-
The Fabric lakehouse.
|
|
22
|
+
lakehouse : str | uuid.UUID, default=None
|
|
23
|
+
The Fabric lakehouse name or ID.
|
|
26
24
|
Defaults to None which resolves to the lakehouse attached to the notebook.
|
|
27
25
|
lakehouse_workspace : str | uuid.UUID, default=None
|
|
28
26
|
The Fabric workspace name or ID used by the lakehouse.
|
|
@@ -49,34 +47,31 @@ def get_lakehouse_columns(
|
|
|
49
47
|
)
|
|
50
48
|
|
|
51
49
|
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
lakehouse = resolve_lakehouse_name(lakehouse_id, workspace_id)
|
|
56
|
-
else:
|
|
57
|
-
lakehouse_id = resolve_lakehouse_id(lakehouse, workspace_id)
|
|
50
|
+
(lakehouse_name, lakehouse_id) = resolve_lakehouse_name_and_id(
|
|
51
|
+
lakehouse=lakehouse, workspace=workspace_id
|
|
52
|
+
)
|
|
58
53
|
|
|
59
54
|
spark = SparkSession.builder.getOrCreate()
|
|
60
55
|
|
|
61
56
|
tables = get_lakehouse_tables(
|
|
62
|
-
lakehouse=
|
|
57
|
+
lakehouse=lakehouse_id, workspace=workspace_id, extended=False, count_rows=False
|
|
63
58
|
)
|
|
64
59
|
tables_filt = tables[tables["Format"] == "delta"]
|
|
65
60
|
|
|
66
|
-
for
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
delta_table = DeltaTable.forPath(spark,
|
|
61
|
+
for _, r in tables_filt.iterrows():
|
|
62
|
+
table_name = r["Table Name"]
|
|
63
|
+
path = r["Location"]
|
|
64
|
+
delta_table = DeltaTable.forPath(spark, path)
|
|
70
65
|
sparkdf = delta_table.toDF()
|
|
71
66
|
|
|
72
|
-
for
|
|
73
|
-
|
|
67
|
+
for col_name, data_type in sparkdf.dtypes:
|
|
68
|
+
full_column_name = format_dax_object_name(table_name, col_name)
|
|
74
69
|
new_data = {
|
|
75
70
|
"Workspace Name": workspace_name,
|
|
76
71
|
"Lakehouse Name": lakehouse,
|
|
77
|
-
"Table Name":
|
|
78
|
-
"Column Name":
|
|
79
|
-
"Full Column Name":
|
|
72
|
+
"Table Name": table_name,
|
|
73
|
+
"Column Name": col_name,
|
|
74
|
+
"Full Column Name": full_column_name,
|
|
80
75
|
"Data Type": data_type,
|
|
81
76
|
}
|
|
82
77
|
df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)
|
|
@@ -4,10 +4,11 @@ from pyspark.sql import SparkSession
|
|
|
4
4
|
import pyarrow.parquet as pq
|
|
5
5
|
import datetime
|
|
6
6
|
from sempy_labs._helper_functions import (
|
|
7
|
-
|
|
8
|
-
resolve_lakehouse_name,
|
|
7
|
+
_get_column_aggregate,
|
|
9
8
|
resolve_workspace_name_and_id,
|
|
9
|
+
resolve_lakehouse_name_and_id,
|
|
10
10
|
pagination,
|
|
11
|
+
save_as_delta_table,
|
|
11
12
|
)
|
|
12
13
|
from sempy_labs.directlake._guardrails import (
|
|
13
14
|
get_sku_size,
|
|
@@ -23,7 +24,7 @@ from uuid import UUID
|
|
|
23
24
|
|
|
24
25
|
@log
|
|
25
26
|
def get_lakehouse_tables(
|
|
26
|
-
lakehouse: Optional[str] = None,
|
|
27
|
+
lakehouse: Optional[str | UUID] = None,
|
|
27
28
|
workspace: Optional[str | UUID] = None,
|
|
28
29
|
extended: bool = False,
|
|
29
30
|
count_rows: bool = False,
|
|
@@ -36,8 +37,8 @@ def get_lakehouse_tables(
|
|
|
36
37
|
|
|
37
38
|
Parameters
|
|
38
39
|
----------
|
|
39
|
-
lakehouse : str, default=None
|
|
40
|
-
The Fabric lakehouse.
|
|
40
|
+
lakehouse : str | uuid.UUID, default=None
|
|
41
|
+
The Fabric lakehouse name or ID.
|
|
41
42
|
Defaults to None which resolves to the lakehouse attached to the notebook.
|
|
42
43
|
workspace : str | uuid.UUID, default=None
|
|
43
44
|
The Fabric workspace name or ID used by the lakehouse.
|
|
@@ -68,12 +69,9 @@ def get_lakehouse_tables(
|
|
|
68
69
|
)
|
|
69
70
|
|
|
70
71
|
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
lakehouse = resolve_lakehouse_name(lakehouse_id, workspace_id)
|
|
75
|
-
else:
|
|
76
|
-
lakehouse_id = resolve_lakehouse_id(lakehouse, workspace_id)
|
|
72
|
+
(lakehouse_name, lakehouse_id) = resolve_lakehouse_name_and_id(
|
|
73
|
+
lakehouse=lakehouse, workspace=workspace_id
|
|
74
|
+
)
|
|
77
75
|
|
|
78
76
|
if count_rows: # Setting countrows defaults to extended=True
|
|
79
77
|
extended = True
|
|
@@ -106,7 +104,7 @@ def get_lakehouse_tables(
|
|
|
106
104
|
for i in r.get("data", []):
|
|
107
105
|
new_data = {
|
|
108
106
|
"Workspace Name": workspace_name,
|
|
109
|
-
"Lakehouse Name":
|
|
107
|
+
"Lakehouse Name": lakehouse_name,
|
|
110
108
|
"Table Name": i.get("name"),
|
|
111
109
|
"Format": i.get("format"),
|
|
112
110
|
"Type": i.get("type"),
|
|
@@ -179,23 +177,17 @@ def get_lakehouse_tables(
|
|
|
179
177
|
f"{icons.red_dot} In order to save the report.json file, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook."
|
|
180
178
|
)
|
|
181
179
|
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
lakehouse_id = fabric.get_lakehouse_id()
|
|
185
|
-
lakehouse = resolve_lakehouse_name(
|
|
186
|
-
lakehouse_id=lakehouse_id, workspace=workspace_id
|
|
187
|
-
)
|
|
180
|
+
(current_lakehouse_name, current_lakehouse_id) = resolve_lakehouse_name_and_id()
|
|
188
181
|
lakeTName = "lakehouse_table_details"
|
|
189
182
|
lakeT_filt = df[df["Table Name"] == lakeTName]
|
|
190
183
|
|
|
191
|
-
query = f"SELECT MAX(RunId) FROM {lakehouse}.{lakeTName}"
|
|
192
|
-
|
|
193
184
|
if len(lakeT_filt) == 0:
|
|
194
|
-
|
|
185
|
+
run_id = 1
|
|
195
186
|
else:
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
187
|
+
max_run_id = _get_column_aggregate(
|
|
188
|
+
lakehouse=current_lakehouse_name, table_name=lakeTName
|
|
189
|
+
)
|
|
190
|
+
run_id = max_run_id + 1
|
|
199
191
|
|
|
200
192
|
export_df = df.copy()
|
|
201
193
|
|
|
@@ -240,15 +232,11 @@ def get_lakehouse_tables(
|
|
|
240
232
|
print(
|
|
241
233
|
f"{icons.in_progress} Saving Lakehouse table properties to the '{lakeTName}' table in the lakehouse...\n"
|
|
242
234
|
)
|
|
243
|
-
|
|
244
|
-
export_df["
|
|
245
|
-
export_df["RunId"] = runId
|
|
235
|
+
export_df["Timestamp"] = datetime.datetime.now()
|
|
236
|
+
export_df["RunId"] = run_id
|
|
246
237
|
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
spark_df.write.mode("append").format("delta").saveAsTable(lakeTName)
|
|
250
|
-
print(
|
|
251
|
-
f"{icons.bullet} Lakehouse table properties have been saved to the '{lakeTName}' delta table."
|
|
238
|
+
save_as_delta_table(
|
|
239
|
+
dataframe=export_df, delta_table_name=lakeTName, write_mode="append"
|
|
252
240
|
)
|
|
253
241
|
|
|
254
242
|
return df
|
|
@@ -1,9 +1,4 @@
|
|
|
1
|
-
import sempy.fabric as fabric
|
|
2
1
|
from tqdm.auto import tqdm
|
|
3
|
-
from sempy_labs._helper_functions import (
|
|
4
|
-
resolve_lakehouse_name,
|
|
5
|
-
resolve_workspace_name_and_id,
|
|
6
|
-
)
|
|
7
2
|
from typing import List, Optional, Union
|
|
8
3
|
from sempy._utils._log import log
|
|
9
4
|
from uuid import UUID
|
|
@@ -56,13 +51,7 @@ def optimize_lakehouse_tables(
|
|
|
56
51
|
from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables
|
|
57
52
|
from delta import DeltaTable
|
|
58
53
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
if lakehouse is None:
|
|
62
|
-
lakehouse_id = fabric.get_lakehouse_id()
|
|
63
|
-
lakehouse = resolve_lakehouse_name(lakehouse_id, workspace_id)
|
|
64
|
-
|
|
65
|
-
lakeTables = get_lakehouse_tables(lakehouse=lakehouse, workspace=workspace_id)
|
|
54
|
+
lakeTables = get_lakehouse_tables(lakehouse=lakehouse, workspace=workspace)
|
|
66
55
|
lakeTablesDelta = lakeTables[lakeTables["Format"] == "delta"]
|
|
67
56
|
|
|
68
57
|
if isinstance(tables, str):
|
|
@@ -115,13 +104,7 @@ def vacuum_lakehouse_tables(
|
|
|
115
104
|
from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables
|
|
116
105
|
from delta import DeltaTable
|
|
117
106
|
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
if lakehouse is None:
|
|
121
|
-
lakehouse_id = fabric.get_lakehouse_id()
|
|
122
|
-
lakehouse = resolve_lakehouse_name(lakehouse_id, workspace_id)
|
|
123
|
-
|
|
124
|
-
lakeTables = get_lakehouse_tables(lakehouse=lakehouse, workspace=workspace_id)
|
|
107
|
+
lakeTables = get_lakehouse_tables(lakehouse=lakehouse, workspace=workspace)
|
|
125
108
|
lakeTablesDelta = lakeTables[lakeTables["Format"] == "delta"]
|
|
126
109
|
|
|
127
110
|
if isinstance(tables, str):
|
|
@@ -8,11 +8,13 @@ from sempy_labs._helper_functions import (
|
|
|
8
8
|
_conv_b64,
|
|
9
9
|
resolve_report_id,
|
|
10
10
|
resolve_dataset_name_and_id,
|
|
11
|
+
resolve_item_name_and_id,
|
|
11
12
|
lro,
|
|
12
13
|
)
|
|
13
14
|
import sempy_labs._icons as icons
|
|
14
15
|
from sempy._utils._log import log
|
|
15
16
|
from uuid import UUID
|
|
17
|
+
from sempy.fabric.exceptions import FabricHTTPException
|
|
16
18
|
|
|
17
19
|
|
|
18
20
|
def create_report_from_reportjson(
|
|
@@ -371,3 +373,46 @@ def _create_report(
|
|
|
371
373
|
report_workspace=report_workspace,
|
|
372
374
|
dataset_workspace=dataset_workspace,
|
|
373
375
|
)
|
|
376
|
+
|
|
377
|
+
|
|
378
|
+
def _get_report(
|
|
379
|
+
report: str | UUID, workspace: Optional[str | UUID] = None
|
|
380
|
+
) -> pd.DataFrame:
|
|
381
|
+
|
|
382
|
+
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
383
|
+
(report_name, report_id) = resolve_item_name_and_id(
|
|
384
|
+
item=report, type="Report", workspace=workspace
|
|
385
|
+
)
|
|
386
|
+
|
|
387
|
+
client = fabric.FabricRestClient()
|
|
388
|
+
response = client.get(f"/v1.0/myorg/groups/{workspace_id}/reports/{report_id}")
|
|
389
|
+
|
|
390
|
+
if response.status_code != 200:
|
|
391
|
+
raise FabricHTTPException(response)
|
|
392
|
+
|
|
393
|
+
result = response.json()
|
|
394
|
+
|
|
395
|
+
new_data = {
|
|
396
|
+
"Id": result.get("id"),
|
|
397
|
+
"Report Type": result.get("reportType"),
|
|
398
|
+
"Name": result.get("name"),
|
|
399
|
+
"Web Url": result.get("webUrl"),
|
|
400
|
+
"Embed Url": result.get("embedUrl"),
|
|
401
|
+
"Is From Pbix": result.get("isFromPbix"),
|
|
402
|
+
"Is Owned By Me": result.get("isOwnedByMe"),
|
|
403
|
+
"Dataset Id": result.get("datasetId"),
|
|
404
|
+
"Dataset Workspace Id": result.get("datasetWorkspaceId"),
|
|
405
|
+
"Users": result.get("users") if result.get("users") is not None else [],
|
|
406
|
+
"Subscriptions": (
|
|
407
|
+
result.get("subscriptions")
|
|
408
|
+
if result.get("subscriptions") is not None
|
|
409
|
+
else []
|
|
410
|
+
),
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
df = pd.DataFrame([new_data])
|
|
414
|
+
|
|
415
|
+
bool_cols = ["Is From Pbix", "Is Owned By Me"]
|
|
416
|
+
df[bool_cols] = df[bool_cols].astype(bool)
|
|
417
|
+
|
|
418
|
+
return df
|
sempy_labs/report/_report_bpa.py
CHANGED
|
@@ -10,7 +10,7 @@ from sempy_labs._helper_functions import (
|
|
|
10
10
|
resolve_report_id,
|
|
11
11
|
resolve_lakehouse_name,
|
|
12
12
|
resolve_workspace_capacity,
|
|
13
|
-
|
|
13
|
+
_get_column_aggregate,
|
|
14
14
|
resolve_workspace_name_and_id,
|
|
15
15
|
)
|
|
16
16
|
from sempy_labs.lakehouse import get_lakehouse_tables, lakehouse_attached
|
|
@@ -217,7 +217,7 @@ def run_report_bpa(
|
|
|
217
217
|
if len(lakeT_filt) == 0:
|
|
218
218
|
runId = 1
|
|
219
219
|
else:
|
|
220
|
-
max_run_id =
|
|
220
|
+
max_run_id = _get_column_aggregate(
|
|
221
221
|
lakehouse=lakehouse, table_name=delta_table_name
|
|
222
222
|
)
|
|
223
223
|
runId = max_run_id + 1
|