semantic-link-labs 0.7.4__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of semantic-link-labs might be problematic. Click here for more details.
- {semantic_link_labs-0.7.4.dist-info → semantic_link_labs-0.8.0.dist-info}/METADATA +7 -3
- {semantic_link_labs-0.7.4.dist-info → semantic_link_labs-0.8.0.dist-info}/RECORD +32 -23
- {semantic_link_labs-0.7.4.dist-info → semantic_link_labs-0.8.0.dist-info}/WHEEL +1 -1
- sempy_labs/__init__.py +57 -18
- sempy_labs/_capacities.py +39 -3
- sempy_labs/_capacity_migration.py +624 -0
- sempy_labs/_clear_cache.py +8 -8
- sempy_labs/_connections.py +15 -13
- sempy_labs/_git.py +20 -21
- sempy_labs/_helper_functions.py +33 -30
- sempy_labs/_icons.py +19 -0
- sempy_labs/_list_functions.py +210 -0
- sempy_labs/_model_bpa.py +1 -1
- sempy_labs/_query_scale_out.py +4 -3
- sempy_labs/_spark.py +31 -36
- sempy_labs/_sql.py +60 -15
- sempy_labs/_vertipaq.py +9 -7
- sempy_labs/admin/__init__.py +53 -0
- sempy_labs/admin/_basic_functions.py +806 -0
- sempy_labs/admin/_domains.py +411 -0
- sempy_labs/directlake/_generate_shared_expression.py +11 -14
- sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py +14 -24
- sempy_labs/report/__init__.py +9 -6
- sempy_labs/report/_report_bpa.py +359 -0
- sempy_labs/report/_report_bpa_rules.py +113 -0
- sempy_labs/report/_report_helper.py +254 -0
- sempy_labs/report/_report_list_functions.py +95 -0
- sempy_labs/report/_report_rebind.py +0 -4
- sempy_labs/report/_reportwrapper.py +2039 -0
- sempy_labs/tom/_model.py +78 -4
- {semantic_link_labs-0.7.4.dist-info → semantic_link_labs-0.8.0.dist-info}/LICENSE +0 -0
- {semantic_link_labs-0.7.4.dist-info → semantic_link_labs-0.8.0.dist-info}/top_level.txt +0 -0
sempy_labs/_connections.py
CHANGED
|
@@ -2,7 +2,6 @@ import sempy.fabric as fabric
|
|
|
2
2
|
import pandas as pd
|
|
3
3
|
from sempy.fabric.exceptions import FabricHTTPException
|
|
4
4
|
from typing import Optional
|
|
5
|
-
import sempy_labs._icons as icons
|
|
6
5
|
from sempy_labs._helper_functions import pagination
|
|
7
6
|
|
|
8
7
|
|
|
@@ -77,8 +76,9 @@ def list_connections() -> pd.DataFrame:
|
|
|
77
76
|
return df
|
|
78
77
|
|
|
79
78
|
|
|
80
|
-
def list_item_connections(
|
|
81
|
-
|
|
79
|
+
def list_item_connections(
|
|
80
|
+
item_name: str, item_type: str, workspace: Optional[str] = None
|
|
81
|
+
) -> pd.DataFrame:
|
|
82
82
|
"""
|
|
83
83
|
Shows the list of connections that the specified item is connected to.
|
|
84
84
|
|
|
@@ -104,7 +104,9 @@ def list_item_connections(item_name: str, item_type: str, workspace: Optional[st
|
|
|
104
104
|
workspace = fabric.resolve_workspace_name(workspace)
|
|
105
105
|
workspace_id = fabric.resolve_workspace_id(workspace)
|
|
106
106
|
item_type = item_type[0].upper() + item_type[1:]
|
|
107
|
-
item_id = fabric.resolve_item_id(
|
|
107
|
+
item_id = fabric.resolve_item_id(
|
|
108
|
+
item_name=item_name, type=item_type, workspace=workspace
|
|
109
|
+
)
|
|
108
110
|
|
|
109
111
|
client = fabric.FabricRestClient()
|
|
110
112
|
response = client.post(f"/v1/workspaces/{workspace_id}/items/{item_id}/connections")
|
|
@@ -123,17 +125,17 @@ def list_item_connections(item_name: str, item_type: str, workspace: Optional[st
|
|
|
123
125
|
if response.status_code != 200:
|
|
124
126
|
raise FabricHTTPException(response)
|
|
125
127
|
|
|
126
|
-
|
|
128
|
+
responses = pagination(client, response)
|
|
127
129
|
|
|
128
|
-
for r in
|
|
129
|
-
for v in r.get(
|
|
130
|
+
for r in responses:
|
|
131
|
+
for v in r.get("value", []):
|
|
130
132
|
new_data = {
|
|
131
|
-
"Connection Name": v.get(
|
|
132
|
-
"Connection Id": v.get(
|
|
133
|
-
"Connectivity Type": v.get(
|
|
134
|
-
"Connection Type": v.get(
|
|
135
|
-
"Connection Path": v.get(
|
|
136
|
-
"Gateway Id": v.get(
|
|
133
|
+
"Connection Name": v.get("displayName"),
|
|
134
|
+
"Connection Id": v.get("id"),
|
|
135
|
+
"Connectivity Type": v.get("connectivityType"),
|
|
136
|
+
"Connection Type": v.get("connectionDetails", {}).get("type"),
|
|
137
|
+
"Connection Path": v.get("connectionDetails", {}).get("path"),
|
|
138
|
+
"Gateway Id": v.get("gatewayId"),
|
|
137
139
|
}
|
|
138
140
|
|
|
139
141
|
df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)
|
sempy_labs/_git.py
CHANGED
|
@@ -132,19 +132,18 @@ def get_git_status(workspace: Optional[str] = None) -> pd.DataFrame:
|
|
|
132
132
|
client = fabric.FabricRestClient()
|
|
133
133
|
response = client.get(f"/v1/workspaces/{workspace_id}/git/status")
|
|
134
134
|
|
|
135
|
-
if response not in [200, 202]:
|
|
135
|
+
if response.status_code not in [200, 202]:
|
|
136
136
|
raise FabricHTTPException(response)
|
|
137
137
|
|
|
138
138
|
result = lro(client, response).json()
|
|
139
139
|
|
|
140
|
-
for
|
|
141
|
-
changes = v.get("changes", [])
|
|
140
|
+
for changes in result.get("changes", []):
|
|
142
141
|
item_metadata = changes.get("itemMetadata", {})
|
|
143
142
|
item_identifier = item_metadata.get("itemIdentifier", {})
|
|
144
143
|
|
|
145
144
|
new_data = {
|
|
146
|
-
"Workspace Head":
|
|
147
|
-
"Remote Commit Hash":
|
|
145
|
+
"Workspace Head": result.get("workspaceHead"),
|
|
146
|
+
"Remote Commit Hash": result.get("remoteCommitHash"),
|
|
148
147
|
"Object ID": item_identifier.get("objectId"),
|
|
149
148
|
"Logical ID": item_identifier.get("logicalId"),
|
|
150
149
|
"Item Type": item_metadata.get("itemType"),
|
|
@@ -199,21 +198,21 @@ def get_git_connection(workspace: Optional[str] = None) -> pd.DataFrame:
|
|
|
199
198
|
if response.status_code != 200:
|
|
200
199
|
raise FabricHTTPException(response)
|
|
201
200
|
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
201
|
+
r = response.json()
|
|
202
|
+
provider_details = r.get("gitProviderDetails", {})
|
|
203
|
+
sync_details = r.get("gitSyncDetails", {})
|
|
204
|
+
new_data = {
|
|
205
|
+
"Organization Name": provider_details.get("organizationName"),
|
|
206
|
+
"Project Name": provider_details.get("projectName"),
|
|
207
|
+
"Git Provider Type": provider_details.get("gitProviderType"),
|
|
208
|
+
"Repository Name": provider_details.get("repositoryName"),
|
|
209
|
+
"Branch Name": provider_details.get("branchName"),
|
|
210
|
+
"Directory Name": provider_details.get("directoryName"),
|
|
211
|
+
"Workspace Head": sync_details.get("head"),
|
|
212
|
+
"Last Sync Time": sync_details.get("lastSyncTime"),
|
|
213
|
+
"Git Connection State": r.get("gitConnectionState"),
|
|
214
|
+
}
|
|
215
|
+
df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)
|
|
217
216
|
|
|
218
217
|
return df
|
|
219
218
|
|
|
@@ -237,7 +236,7 @@ def initialize_git_connection(workspace: Optional[str] = None):
|
|
|
237
236
|
client = fabric.FabricRestClient()
|
|
238
237
|
response = client.post(f"/v1/workspaces/{workspace_id}/git/initializeConnection")
|
|
239
238
|
|
|
240
|
-
if response not in [200, 202]:
|
|
239
|
+
if response.status_code not in [200, 202]:
|
|
241
240
|
raise FabricHTTPException(response)
|
|
242
241
|
|
|
243
242
|
lro(client, response)
|
sempy_labs/_helper_functions.py
CHANGED
|
@@ -2,14 +2,14 @@ import sempy.fabric as fabric
|
|
|
2
2
|
import re
|
|
3
3
|
import json
|
|
4
4
|
import base64
|
|
5
|
+
import time
|
|
6
|
+
from sempy.fabric.exceptions import FabricHTTPException
|
|
5
7
|
import pandas as pd
|
|
6
8
|
from functools import wraps
|
|
7
9
|
import datetime
|
|
8
|
-
import time
|
|
9
10
|
from typing import Optional, Tuple, List
|
|
10
11
|
from uuid import UUID
|
|
11
12
|
import sempy_labs._icons as icons
|
|
12
|
-
from sempy.fabric.exceptions import FabricHTTPException
|
|
13
13
|
import urllib.parse
|
|
14
14
|
from azure.core.credentials import TokenCredential, AccessToken
|
|
15
15
|
|
|
@@ -992,34 +992,6 @@ def get_language_codes(languages: str | List[str]):
|
|
|
992
992
|
return languages
|
|
993
993
|
|
|
994
994
|
|
|
995
|
-
def resolve_environment_id(environment: str, workspace: Optional[str] = None) -> UUID:
|
|
996
|
-
"""
|
|
997
|
-
Obtains the environment Id for a given environment.
|
|
998
|
-
|
|
999
|
-
Parameters
|
|
1000
|
-
----------
|
|
1001
|
-
environment: str
|
|
1002
|
-
Name of the environment.
|
|
1003
|
-
workspace : str, default=None
|
|
1004
|
-
The Fabric workspace name.
|
|
1005
|
-
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
1006
|
-
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
1007
|
-
"""
|
|
1008
|
-
|
|
1009
|
-
from sempy_labs._environments import list_environments
|
|
1010
|
-
|
|
1011
|
-
(workspace, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
1012
|
-
|
|
1013
|
-
dfE = list_environments(workspace=workspace)
|
|
1014
|
-
dfE_filt = dfE[dfE["Environment Name"] == environment]
|
|
1015
|
-
if len(dfE_filt) == 0:
|
|
1016
|
-
raise ValueError(
|
|
1017
|
-
f"{icons.red_dot} The '{environment}' environment does not exist within the '{workspace}' workspace."
|
|
1018
|
-
)
|
|
1019
|
-
|
|
1020
|
-
return dfE_filt["Environment Id"].iloc[0]
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
995
|
def get_azure_token_credentials(
|
|
1024
996
|
key_vault_uri: str,
|
|
1025
997
|
key_vault_tenant_id: str,
|
|
@@ -1048,3 +1020,34 @@ def get_azure_token_credentials(
|
|
|
1048
1020
|
}
|
|
1049
1021
|
|
|
1050
1022
|
return token, credential, headers
|
|
1023
|
+
|
|
1024
|
+
|
|
1025
|
+
def convert_to_alphanumeric_lowercase(input_string):
|
|
1026
|
+
|
|
1027
|
+
cleaned_string = re.sub(r"[^a-zA-Z0-9]", "", input_string)
|
|
1028
|
+
cleaned_string = cleaned_string.lower()
|
|
1029
|
+
|
|
1030
|
+
return cleaned_string
|
|
1031
|
+
|
|
1032
|
+
|
|
1033
|
+
def resolve_environment_id(environment: str, workspace: Optional[str] = None) -> UUID:
|
|
1034
|
+
"""
|
|
1035
|
+
Obtains the environment Id for a given environment.
|
|
1036
|
+
|
|
1037
|
+
Parameters
|
|
1038
|
+
----------
|
|
1039
|
+
environment: str
|
|
1040
|
+
Name of the environment.
|
|
1041
|
+
"""
|
|
1042
|
+
from sempy_labs._environments import list_environments
|
|
1043
|
+
|
|
1044
|
+
(workspace, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
1045
|
+
|
|
1046
|
+
dfE = list_environments(workspace=workspace)
|
|
1047
|
+
dfE_filt = dfE[dfE["Environment Name"] == environment]
|
|
1048
|
+
if len(dfE_filt) == 0:
|
|
1049
|
+
raise ValueError(
|
|
1050
|
+
f"{icons.red_dot} The '{environment}' environment does not exist within the '{workspace}' workspace."
|
|
1051
|
+
)
|
|
1052
|
+
|
|
1053
|
+
return dfE_filt["Environment Id"].iloc[0]
|
sempy_labs/_icons.py
CHANGED
|
@@ -74,3 +74,22 @@ language_map = {
|
|
|
74
74
|
}
|
|
75
75
|
workspace_roles = ["Admin", "Member", "Viewer", "Contributor"]
|
|
76
76
|
principal_types = ["App", "Group", "None", "User"]
|
|
77
|
+
azure_api_version = "2023-11-01"
|
|
78
|
+
migrate_capacity_suffix = "fsku"
|
|
79
|
+
sku_mapping = {
|
|
80
|
+
"A1": "F8",
|
|
81
|
+
"EM1": "F8",
|
|
82
|
+
"A2": "F16",
|
|
83
|
+
"EM2": "F16",
|
|
84
|
+
"A3": "F32",
|
|
85
|
+
"EM3": "F32",
|
|
86
|
+
"A4": "F64",
|
|
87
|
+
"P1": "F64",
|
|
88
|
+
"A5": "F128",
|
|
89
|
+
"P2": "F128",
|
|
90
|
+
"A6": "F256",
|
|
91
|
+
"P3": "F256",
|
|
92
|
+
"A7": "F512",
|
|
93
|
+
"P4": "F512",
|
|
94
|
+
"P5": "F1024",
|
|
95
|
+
}
|
sempy_labs/_list_functions.py
CHANGED
|
@@ -8,6 +8,7 @@ from sempy_labs._helper_functions import (
|
|
|
8
8
|
lro,
|
|
9
9
|
resolve_item_type,
|
|
10
10
|
format_dax_object_name,
|
|
11
|
+
pagination,
|
|
11
12
|
)
|
|
12
13
|
import pandas as pd
|
|
13
14
|
from typing import Optional
|
|
@@ -1774,3 +1775,212 @@ def list_reports_using_semantic_model(
|
|
|
1774
1775
|
df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)
|
|
1775
1776
|
|
|
1776
1777
|
return df
|
|
1778
|
+
|
|
1779
|
+
|
|
1780
|
+
def list_report_semantic_model_objects(
|
|
1781
|
+
dataset: str, workspace: Optional[str] = None, extended: Optional[bool] = False
|
|
1782
|
+
) -> pd.DataFrame:
|
|
1783
|
+
"""
|
|
1784
|
+
Shows a list of semantic model objects (i.e. columns, measures, hierarchies) used in all reports which feed data from
|
|
1785
|
+
a given semantic model.
|
|
1786
|
+
|
|
1787
|
+
Requirement: Reports must be in the PBIR format.
|
|
1788
|
+
|
|
1789
|
+
Parameters
|
|
1790
|
+
----------
|
|
1791
|
+
dataset : str
|
|
1792
|
+
Name of the semantic model.
|
|
1793
|
+
workspace : str, default=None
|
|
1794
|
+
The Fabric workspace name.
|
|
1795
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
1796
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
1797
|
+
extended: bool, default=False
|
|
1798
|
+
If True, adds an extra column called 'Valid Semantic Model Object' which identifies whether the semantic model object used
|
|
1799
|
+
in the report exists in the semantic model which feeds data to the report.
|
|
1800
|
+
|
|
1801
|
+
Returns
|
|
1802
|
+
-------
|
|
1803
|
+
pandas.DataFrame
|
|
1804
|
+
A pandas dataframe showing a list of semantic model objects (i.e. columns, measures, hierarchies) used in all reports which feed data from
|
|
1805
|
+
a given semantic model.
|
|
1806
|
+
"""
|
|
1807
|
+
|
|
1808
|
+
from sempy_labs.report import ReportWrapper
|
|
1809
|
+
from sempy_labs.tom import connect_semantic_model
|
|
1810
|
+
|
|
1811
|
+
dfRO = pd.DataFrame(
|
|
1812
|
+
columns=[
|
|
1813
|
+
"Report Name",
|
|
1814
|
+
"Report Workspace Name",
|
|
1815
|
+
"Table Name",
|
|
1816
|
+
"Object Name",
|
|
1817
|
+
"Object Type",
|
|
1818
|
+
"Report Source",
|
|
1819
|
+
"Report Source Object",
|
|
1820
|
+
]
|
|
1821
|
+
)
|
|
1822
|
+
|
|
1823
|
+
# Collect all reports which use the semantic model
|
|
1824
|
+
dfR = list_reports_using_semantic_model(dataset=dataset, workspace=workspace)
|
|
1825
|
+
|
|
1826
|
+
if len(dfR) == 0:
|
|
1827
|
+
return dfRO
|
|
1828
|
+
|
|
1829
|
+
for _, r in dfR.iterrows():
|
|
1830
|
+
report_name = r["Report Name"]
|
|
1831
|
+
report_workspace = r["Report Workspace Name"]
|
|
1832
|
+
|
|
1833
|
+
rpt = ReportWrapper(report=report_name, workspace=report_workspace)
|
|
1834
|
+
# Collect all semantic model objects used in the report
|
|
1835
|
+
dfRSO = rpt.list_semantic_model_objects()
|
|
1836
|
+
dfRSO["Report Name"] = report_name
|
|
1837
|
+
dfRSO["Report Workspace Name"] = report_workspace
|
|
1838
|
+
colName = "Report Name"
|
|
1839
|
+
dfRSO.insert(0, colName, dfRSO.pop(colName))
|
|
1840
|
+
colName = "Report Workspace Name"
|
|
1841
|
+
dfRSO.insert(1, colName, dfRSO.pop(colName))
|
|
1842
|
+
|
|
1843
|
+
dfRO = pd.concat([dfRO, dfRSO], ignore_index=True)
|
|
1844
|
+
|
|
1845
|
+
# Collect all semantic model objects
|
|
1846
|
+
if extended:
|
|
1847
|
+
with connect_semantic_model(
|
|
1848
|
+
dataset=dataset, readonly=True, workspace=workspace
|
|
1849
|
+
) as tom:
|
|
1850
|
+
for index, row in dfRO.iterrows():
|
|
1851
|
+
object_type = row["Object Type"]
|
|
1852
|
+
if object_type == "Measure":
|
|
1853
|
+
dfRO.at[index, "Valid Semantic Model Object"] = any(
|
|
1854
|
+
o.Name == row["Object Name"] for o in tom.all_measures()
|
|
1855
|
+
)
|
|
1856
|
+
elif object_type == "Column":
|
|
1857
|
+
dfRO.at[index, "Valid Semantic Model Object"] = any(
|
|
1858
|
+
format_dax_object_name(c.Parent.Name, c.Name)
|
|
1859
|
+
== format_dax_object_name(row["Table Name"], row["Object Name"])
|
|
1860
|
+
for c in tom.all_columns()
|
|
1861
|
+
)
|
|
1862
|
+
elif object_type == "Hierarchy":
|
|
1863
|
+
dfRO.at[index, "Valid Semantic Model Object"] = any(
|
|
1864
|
+
format_dax_object_name(h.Parent.Name, h.Name)
|
|
1865
|
+
== format_dax_object_name(row["Table Name"], row["Object Name"])
|
|
1866
|
+
for h in tom.all_hierarchies()
|
|
1867
|
+
)
|
|
1868
|
+
|
|
1869
|
+
return dfRO
|
|
1870
|
+
|
|
1871
|
+
|
|
1872
|
+
def list_semantic_model_object_report_usage(
|
|
1873
|
+
dataset: str,
|
|
1874
|
+
workspace: Optional[str] = None,
|
|
1875
|
+
include_dependencies: Optional[bool] = False,
|
|
1876
|
+
extended: Optional[bool] = False,
|
|
1877
|
+
) -> pd.DataFrame:
|
|
1878
|
+
"""
|
|
1879
|
+
Shows a list of semantic model objects and how many times they are referenced in all reports which rely on this semantic model.
|
|
1880
|
+
|
|
1881
|
+
Requirement: Reports must be in the PBIR format.
|
|
1882
|
+
|
|
1883
|
+
Parameters
|
|
1884
|
+
----------
|
|
1885
|
+
dataset : str
|
|
1886
|
+
Name of the semantic model.
|
|
1887
|
+
workspace : str, default=None
|
|
1888
|
+
The Fabric workspace name.
|
|
1889
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
1890
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
1891
|
+
include_dependencies : bool, default=False
|
|
1892
|
+
If True, includes measure dependencies.
|
|
1893
|
+
extended: bool, default=False
|
|
1894
|
+
If True, adds columns 'Total Size', 'Data Size', 'Dictionary Size', 'Hierarchy Size' based on Vertipaq statistics.
|
|
1895
|
+
|
|
1896
|
+
Returns
|
|
1897
|
+
-------
|
|
1898
|
+
pandas.DataFrame
|
|
1899
|
+
A pandas dataframe showing a list of semantic model objects and how many times they are referenced in all reports which rely on this semantic model. By default, the dataframe
|
|
1900
|
+
is sorted descending by 'Report Usage Count'.
|
|
1901
|
+
"""
|
|
1902
|
+
|
|
1903
|
+
from sempy_labs._model_dependencies import get_measure_dependencies
|
|
1904
|
+
from sempy_labs._helper_functions import format_dax_object_name
|
|
1905
|
+
|
|
1906
|
+
workspace = fabric.resolve_workspace_name(workspace)
|
|
1907
|
+
|
|
1908
|
+
dfR = list_report_semantic_model_objects(dataset=dataset, workspace=workspace)
|
|
1909
|
+
usage_column_name = "Report Usage Count"
|
|
1910
|
+
|
|
1911
|
+
if not include_dependencies:
|
|
1912
|
+
final_df = (
|
|
1913
|
+
dfR.groupby(["Table Name", "Object Name", "Object Type"])
|
|
1914
|
+
.size()
|
|
1915
|
+
.reset_index(name=usage_column_name)
|
|
1916
|
+
)
|
|
1917
|
+
else:
|
|
1918
|
+
df = pd.DataFrame(columns=["Table Name", "Object Name", "Object Type"])
|
|
1919
|
+
dep = get_measure_dependencies(dataset=dataset, workspace=workspace)
|
|
1920
|
+
|
|
1921
|
+
for i, r in dfR.iterrows():
|
|
1922
|
+
object_type = r["Object Type"]
|
|
1923
|
+
table_name = r["Table Name"]
|
|
1924
|
+
object_name = r["Object Name"]
|
|
1925
|
+
new_data = {
|
|
1926
|
+
"Table Name": table_name,
|
|
1927
|
+
"Object Name": object_name,
|
|
1928
|
+
"Object Type": object_type,
|
|
1929
|
+
}
|
|
1930
|
+
df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)
|
|
1931
|
+
if object_type == "Measure":
|
|
1932
|
+
df_filt = dep[dep["Object Name"] == object_name][
|
|
1933
|
+
["Referenced Table", "Referenced Object", "Referenced Object Type"]
|
|
1934
|
+
]
|
|
1935
|
+
df_filt.rename(
|
|
1936
|
+
columns={
|
|
1937
|
+
"Referenced Table": "Table Name",
|
|
1938
|
+
"Referenced Object": "Object Name",
|
|
1939
|
+
"Referenced Object Type": "Object Type",
|
|
1940
|
+
},
|
|
1941
|
+
inplace=True,
|
|
1942
|
+
)
|
|
1943
|
+
|
|
1944
|
+
df = pd.concat([df, df_filt], ignore_index=True)
|
|
1945
|
+
|
|
1946
|
+
final_df = (
|
|
1947
|
+
df.groupby(["Table Name", "Object Name", "Object Type"])
|
|
1948
|
+
.size()
|
|
1949
|
+
.reset_index(name=usage_column_name)
|
|
1950
|
+
)
|
|
1951
|
+
|
|
1952
|
+
if extended:
|
|
1953
|
+
final_df["Object"] = format_dax_object_name(
|
|
1954
|
+
final_df["Table Name"], final_df["Object Name"]
|
|
1955
|
+
)
|
|
1956
|
+
dfC = fabric.list_columns(dataset=dataset, workspace=workspace, extended=True)
|
|
1957
|
+
dfC["Object"] = format_dax_object_name(dfC["Table Name"], dfC["Column Name"])
|
|
1958
|
+
final_df = pd.merge(
|
|
1959
|
+
final_df,
|
|
1960
|
+
dfC[
|
|
1961
|
+
[
|
|
1962
|
+
"Object",
|
|
1963
|
+
"Total Size",
|
|
1964
|
+
"Data Size",
|
|
1965
|
+
"Dictionary Size",
|
|
1966
|
+
"Hierarchy Size",
|
|
1967
|
+
]
|
|
1968
|
+
],
|
|
1969
|
+
on="Object",
|
|
1970
|
+
how="left",
|
|
1971
|
+
)
|
|
1972
|
+
|
|
1973
|
+
ext_int_cols = ["Total Size", "Data Size", "Dictionary Size", "Hierarchy Size"]
|
|
1974
|
+
final_df[ext_int_cols] = final_df[ext_int_cols].fillna(0).astype(int)
|
|
1975
|
+
final_df.drop("Object", axis=1, inplace=True)
|
|
1976
|
+
|
|
1977
|
+
int_cols = [usage_column_name]
|
|
1978
|
+
final_df[int_cols] = final_df[int_cols].astype(int)
|
|
1979
|
+
|
|
1980
|
+
final_df = final_df[final_df["Object Type"] != "Table"].sort_values(
|
|
1981
|
+
by=usage_column_name, ascending=False
|
|
1982
|
+
)
|
|
1983
|
+
|
|
1984
|
+
final_df.reset_index(drop=True, inplace=True)
|
|
1985
|
+
|
|
1986
|
+
return final_df
|
sempy_labs/_model_bpa.py
CHANGED
|
@@ -506,7 +506,7 @@ def run_model_bpa(
|
|
|
506
506
|
content_html += f'<td>{row["Object Type"]}</td>'
|
|
507
507
|
content_html += f'<td>{row["Object Name"]}</td>'
|
|
508
508
|
content_html += f'<td style="text-align: center;">{row["Severity"]}</td>'
|
|
509
|
-
#content_html += f'<td>{row["Severity"]}</td>'
|
|
509
|
+
# content_html += f'<td>{row["Severity"]}</td>'
|
|
510
510
|
content_html += "</tr>"
|
|
511
511
|
content_html += "</table>"
|
|
512
512
|
|
sempy_labs/_query_scale_out.py
CHANGED
|
@@ -339,7 +339,6 @@ def list_qso_settings(
|
|
|
339
339
|
if dataset is not None:
|
|
340
340
|
dataset_id = resolve_dataset_id(dataset, workspace)
|
|
341
341
|
|
|
342
|
-
workspace_id = fabric.get_workspace_id()
|
|
343
342
|
df = pd.DataFrame(
|
|
344
343
|
columns=[
|
|
345
344
|
"Dataset Id",
|
|
@@ -413,10 +412,12 @@ def set_workspace_default_storage_format(
|
|
|
413
412
|
dfW = fabric.list_workspaces(filter=f"name eq '{workspace}'")
|
|
414
413
|
if len(dfW) == 0:
|
|
415
414
|
raise ValueError()
|
|
416
|
-
current_storage_format = dfW[
|
|
415
|
+
current_storage_format = dfW["Default Dataset Storage Format"].iloc[0]
|
|
417
416
|
|
|
418
417
|
if current_storage_format == storage_format:
|
|
419
|
-
print(
|
|
418
|
+
print(
|
|
419
|
+
f"{icons.info} The '{workspace}' is already set to a default storage format of '{current_storage_format}'."
|
|
420
|
+
)
|
|
420
421
|
return
|
|
421
422
|
|
|
422
423
|
request_body = {
|
sempy_labs/_spark.py
CHANGED
|
@@ -298,7 +298,9 @@ def delete_custom_pool(pool_name: str, workspace: Optional[str] = None):
|
|
|
298
298
|
)
|
|
299
299
|
|
|
300
300
|
|
|
301
|
-
def get_spark_settings(
|
|
301
|
+
def get_spark_settings(
|
|
302
|
+
workspace: Optional[str] = None, return_dataframe: Optional[bool] = True
|
|
303
|
+
) -> pd.DataFrame | dict:
|
|
302
304
|
"""
|
|
303
305
|
Shows the spark settings for a workspace.
|
|
304
306
|
|
|
@@ -308,10 +310,12 @@ def get_spark_settings(workspace: Optional[str] = None) -> pd.DataFrame:
|
|
|
308
310
|
The name of the Fabric workspace.
|
|
309
311
|
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
310
312
|
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
313
|
+
return_dataframe : bool, default=True
|
|
314
|
+
If True, returns a pandas dataframe. If False, returns a json dictionary.
|
|
311
315
|
|
|
312
316
|
Returns
|
|
313
317
|
-------
|
|
314
|
-
pandas.DataFrame
|
|
318
|
+
pandas.DataFrame | dict
|
|
315
319
|
A pandas dataframe showing the spark settings for a workspace.
|
|
316
320
|
"""
|
|
317
321
|
|
|
@@ -363,12 +367,15 @@ def get_spark_settings(workspace: Optional[str] = None) -> pd.DataFrame:
|
|
|
363
367
|
"High Concurrency Enabled",
|
|
364
368
|
"Customize Compute Enabled",
|
|
365
369
|
]
|
|
366
|
-
int_cols = ["Max Node Count", "Max Executors"]
|
|
370
|
+
# int_cols = ["Max Node Count", "Max Executors"]
|
|
367
371
|
|
|
368
372
|
df[bool_cols] = df[bool_cols].astype(bool)
|
|
369
|
-
df[int_cols] = df[int_cols].astype(int)
|
|
373
|
+
# df[int_cols] = df[int_cols].astype(int)
|
|
370
374
|
|
|
371
|
-
|
|
375
|
+
if return_dataframe:
|
|
376
|
+
return df
|
|
377
|
+
else:
|
|
378
|
+
return response.json()
|
|
372
379
|
|
|
373
380
|
|
|
374
381
|
def update_spark_settings(
|
|
@@ -420,38 +427,26 @@ def update_spark_settings(
|
|
|
420
427
|
# https://learn.microsoft.com/en-us/rest/api/fabric/spark/workspace-settings/update-spark-settings?tabs=HTTP
|
|
421
428
|
(workspace, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
422
429
|
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
if automatic_log_enabled is None:
|
|
426
|
-
automatic_log_enabled = bool(dfS["Automatic Log Enabled"].iloc[0])
|
|
427
|
-
if high_concurrency_enabled is None:
|
|
428
|
-
high_concurrency_enabled = bool(dfS["High Concurrency Enabled"].iloc[0])
|
|
429
|
-
if customize_compute_enabled is None:
|
|
430
|
-
customize_compute_enabled = bool(dfS["Customize Compute Enabled"].iloc[0])
|
|
431
|
-
if default_pool_name is None:
|
|
432
|
-
default_pool_name = dfS["Default Pool Name"].iloc[0]
|
|
433
|
-
if max_node_count is None:
|
|
434
|
-
max_node_count = int(dfS["Max Node Count"].iloc[0])
|
|
435
|
-
if max_executors is None:
|
|
436
|
-
max_executors = int(dfS["Max Executors"].iloc[0])
|
|
437
|
-
if environment_name is None:
|
|
438
|
-
environment_name = dfS["Environment Name"].iloc[0]
|
|
439
|
-
if runtime_version is None:
|
|
440
|
-
runtime_version = dfS["Runtime Version"].iloc[0]
|
|
430
|
+
request_body = get_spark_settings(workspace=workspace, return_dataframe=False)
|
|
441
431
|
|
|
442
|
-
|
|
443
|
-
"automaticLog"
|
|
444
|
-
|
|
445
|
-
"
|
|
446
|
-
"
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
"
|
|
454
|
-
|
|
432
|
+
if automatic_log_enabled is not None:
|
|
433
|
+
request_body["automaticLog"]["enabled"] = automatic_log_enabled
|
|
434
|
+
if high_concurrency_enabled is not None:
|
|
435
|
+
request_body["highConcurrency"][
|
|
436
|
+
"notebookInteractiveRunEnabled"
|
|
437
|
+
] = high_concurrency_enabled
|
|
438
|
+
if customize_compute_enabled is not None:
|
|
439
|
+
request_body["pool"]["customizeComputeEnabled"] = customize_compute_enabled
|
|
440
|
+
if default_pool_name is not None:
|
|
441
|
+
request_body["pool"]["defaultPool"]["name"] = default_pool_name
|
|
442
|
+
if max_node_count is not None:
|
|
443
|
+
request_body["pool"]["starterPool"]["maxNodeCount"] = max_node_count
|
|
444
|
+
if max_executors is not None:
|
|
445
|
+
request_body["pool"]["starterPool"]["maxExecutors"] = max_executors
|
|
446
|
+
if environment_name is not None:
|
|
447
|
+
request_body["environment"]["name"] = environment_name
|
|
448
|
+
if runtime_version is not None:
|
|
449
|
+
request_body["environment"]["runtimeVersion"] = runtime_version
|
|
455
450
|
|
|
456
451
|
client = fabric.FabricRestClient()
|
|
457
452
|
response = client.patch(
|