semantic-link-labs 0.7.4__py3-none-any.whl → 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of semantic-link-labs might be problematic. Click here for more details.

Files changed (32) hide show
  1. {semantic_link_labs-0.7.4.dist-info → semantic_link_labs-0.8.0.dist-info}/METADATA +7 -3
  2. {semantic_link_labs-0.7.4.dist-info → semantic_link_labs-0.8.0.dist-info}/RECORD +32 -23
  3. {semantic_link_labs-0.7.4.dist-info → semantic_link_labs-0.8.0.dist-info}/WHEEL +1 -1
  4. sempy_labs/__init__.py +57 -18
  5. sempy_labs/_capacities.py +39 -3
  6. sempy_labs/_capacity_migration.py +624 -0
  7. sempy_labs/_clear_cache.py +8 -8
  8. sempy_labs/_connections.py +15 -13
  9. sempy_labs/_git.py +20 -21
  10. sempy_labs/_helper_functions.py +33 -30
  11. sempy_labs/_icons.py +19 -0
  12. sempy_labs/_list_functions.py +210 -0
  13. sempy_labs/_model_bpa.py +1 -1
  14. sempy_labs/_query_scale_out.py +4 -3
  15. sempy_labs/_spark.py +31 -36
  16. sempy_labs/_sql.py +60 -15
  17. sempy_labs/_vertipaq.py +9 -7
  18. sempy_labs/admin/__init__.py +53 -0
  19. sempy_labs/admin/_basic_functions.py +806 -0
  20. sempy_labs/admin/_domains.py +411 -0
  21. sempy_labs/directlake/_generate_shared_expression.py +11 -14
  22. sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py +14 -24
  23. sempy_labs/report/__init__.py +9 -6
  24. sempy_labs/report/_report_bpa.py +359 -0
  25. sempy_labs/report/_report_bpa_rules.py +113 -0
  26. sempy_labs/report/_report_helper.py +254 -0
  27. sempy_labs/report/_report_list_functions.py +95 -0
  28. sempy_labs/report/_report_rebind.py +0 -4
  29. sempy_labs/report/_reportwrapper.py +2039 -0
  30. sempy_labs/tom/_model.py +78 -4
  31. {semantic_link_labs-0.7.4.dist-info → semantic_link_labs-0.8.0.dist-info}/LICENSE +0 -0
  32. {semantic_link_labs-0.7.4.dist-info → semantic_link_labs-0.8.0.dist-info}/top_level.txt +0 -0
@@ -2,7 +2,6 @@ import sempy.fabric as fabric
2
2
  import pandas as pd
3
3
  from sempy.fabric.exceptions import FabricHTTPException
4
4
  from typing import Optional
5
- import sempy_labs._icons as icons
6
5
  from sempy_labs._helper_functions import pagination
7
6
 
8
7
 
@@ -77,8 +76,9 @@ def list_connections() -> pd.DataFrame:
77
76
  return df
78
77
 
79
78
 
80
- def list_item_connections(item_name: str, item_type: str, workspace: Optional[str] = None) -> pd.DataFrame:
81
-
79
+ def list_item_connections(
80
+ item_name: str, item_type: str, workspace: Optional[str] = None
81
+ ) -> pd.DataFrame:
82
82
  """
83
83
  Shows the list of connections that the specified item is connected to.
84
84
 
@@ -104,7 +104,9 @@ def list_item_connections(item_name: str, item_type: str, workspace: Optional[st
104
104
  workspace = fabric.resolve_workspace_name(workspace)
105
105
  workspace_id = fabric.resolve_workspace_id(workspace)
106
106
  item_type = item_type[0].upper() + item_type[1:]
107
- item_id = fabric.resolve_item_id(item_name=item_name, type=item_type, workspace=workspace)
107
+ item_id = fabric.resolve_item_id(
108
+ item_name=item_name, type=item_type, workspace=workspace
109
+ )
108
110
 
109
111
  client = fabric.FabricRestClient()
110
112
  response = client.post(f"/v1/workspaces/{workspace_id}/items/{item_id}/connections")
@@ -123,17 +125,17 @@ def list_item_connections(item_name: str, item_type: str, workspace: Optional[st
123
125
  if response.status_code != 200:
124
126
  raise FabricHTTPException(response)
125
127
 
126
- respnoses = pagination(client, response)
128
+ responses = pagination(client, response)
127
129
 
128
- for r in respnoses:
129
- for v in r.get('value', []):
130
+ for r in responses:
131
+ for v in r.get("value", []):
130
132
  new_data = {
131
- "Connection Name": v.get('displayName'),
132
- "Connection Id": v.get('id'),
133
- "Connectivity Type": v.get('connectivityType'),
134
- "Connection Type": v.get('connectionDetails', {}).get('type'),
135
- "Connection Path": v.get('connectionDetails', {}).get('path'),
136
- "Gateway Id": v.get('gatewayId'),
133
+ "Connection Name": v.get("displayName"),
134
+ "Connection Id": v.get("id"),
135
+ "Connectivity Type": v.get("connectivityType"),
136
+ "Connection Type": v.get("connectionDetails", {}).get("type"),
137
+ "Connection Path": v.get("connectionDetails", {}).get("path"),
138
+ "Gateway Id": v.get("gatewayId"),
137
139
  }
138
140
 
139
141
  df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)
sempy_labs/_git.py CHANGED
@@ -132,19 +132,18 @@ def get_git_status(workspace: Optional[str] = None) -> pd.DataFrame:
132
132
  client = fabric.FabricRestClient()
133
133
  response = client.get(f"/v1/workspaces/{workspace_id}/git/status")
134
134
 
135
- if response not in [200, 202]:
135
+ if response.status_code not in [200, 202]:
136
136
  raise FabricHTTPException(response)
137
137
 
138
138
  result = lro(client, response).json()
139
139
 
140
- for v in result.get("value", []):
141
- changes = v.get("changes", [])
140
+ for changes in result.get("changes", []):
142
141
  item_metadata = changes.get("itemMetadata", {})
143
142
  item_identifier = item_metadata.get("itemIdentifier", {})
144
143
 
145
144
  new_data = {
146
- "Workspace Head": v.get("workspaceHead"),
147
- "Remote Commit Hash": v.get("remoteCommitHash"),
145
+ "Workspace Head": result.get("workspaceHead"),
146
+ "Remote Commit Hash": result.get("remoteCommitHash"),
148
147
  "Object ID": item_identifier.get("objectId"),
149
148
  "Logical ID": item_identifier.get("logicalId"),
150
149
  "Item Type": item_metadata.get("itemType"),
@@ -199,21 +198,21 @@ def get_git_connection(workspace: Optional[str] = None) -> pd.DataFrame:
199
198
  if response.status_code != 200:
200
199
  raise FabricHTTPException(response)
201
200
 
202
- for v in response.json().get("value", []):
203
- provider_details = v.get("gitProviderDetails", {})
204
- sync_details = v.get("gitSyncDetails", {})
205
- new_data = {
206
- "Organization Name": provider_details.get("organizationName"),
207
- "Project Name": provider_details.get("projectName"),
208
- "Git Provider Type": provider_details.get("gitProviderType"),
209
- "Repository Name": provider_details.get("repositoryName"),
210
- "Branch Name": provider_details.get("branchName"),
211
- "Directory Name": provider_details.get("directoryName"),
212
- "Workspace Head": sync_details.get("head"),
213
- "Last Sync Time": sync_details.get("lastSyncTime"),
214
- "Git Conneciton State": v.get("gitConnectionState"),
215
- }
216
- df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)
201
+ r = response.json()
202
+ provider_details = r.get("gitProviderDetails", {})
203
+ sync_details = r.get("gitSyncDetails", {})
204
+ new_data = {
205
+ "Organization Name": provider_details.get("organizationName"),
206
+ "Project Name": provider_details.get("projectName"),
207
+ "Git Provider Type": provider_details.get("gitProviderType"),
208
+ "Repository Name": provider_details.get("repositoryName"),
209
+ "Branch Name": provider_details.get("branchName"),
210
+ "Directory Name": provider_details.get("directoryName"),
211
+ "Workspace Head": sync_details.get("head"),
212
+ "Last Sync Time": sync_details.get("lastSyncTime"),
213
+ "Git Connection State": r.get("gitConnectionState"),
214
+ }
215
+ df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)
217
216
 
218
217
  return df
219
218
 
@@ -237,7 +236,7 @@ def initialize_git_connection(workspace: Optional[str] = None):
237
236
  client = fabric.FabricRestClient()
238
237
  response = client.post(f"/v1/workspaces/{workspace_id}/git/initializeConnection")
239
238
 
240
- if response not in [200, 202]:
239
+ if response.status_code not in [200, 202]:
241
240
  raise FabricHTTPException(response)
242
241
 
243
242
  lro(client, response)
@@ -2,14 +2,14 @@ import sempy.fabric as fabric
2
2
  import re
3
3
  import json
4
4
  import base64
5
+ import time
6
+ from sempy.fabric.exceptions import FabricHTTPException
5
7
  import pandas as pd
6
8
  from functools import wraps
7
9
  import datetime
8
- import time
9
10
  from typing import Optional, Tuple, List
10
11
  from uuid import UUID
11
12
  import sempy_labs._icons as icons
12
- from sempy.fabric.exceptions import FabricHTTPException
13
13
  import urllib.parse
14
14
  from azure.core.credentials import TokenCredential, AccessToken
15
15
 
@@ -992,34 +992,6 @@ def get_language_codes(languages: str | List[str]):
992
992
  return languages
993
993
 
994
994
 
995
- def resolve_environment_id(environment: str, workspace: Optional[str] = None) -> UUID:
996
- """
997
- Obtains the environment Id for a given environment.
998
-
999
- Parameters
1000
- ----------
1001
- environment: str
1002
- Name of the environment.
1003
- workspace : str, default=None
1004
- The Fabric workspace name.
1005
- Defaults to None which resolves to the workspace of the attached lakehouse
1006
- or if no lakehouse attached, resolves to the workspace of the notebook.
1007
- """
1008
-
1009
- from sempy_labs._environments import list_environments
1010
-
1011
- (workspace, workspace_id) = resolve_workspace_name_and_id(workspace)
1012
-
1013
- dfE = list_environments(workspace=workspace)
1014
- dfE_filt = dfE[dfE["Environment Name"] == environment]
1015
- if len(dfE_filt) == 0:
1016
- raise ValueError(
1017
- f"{icons.red_dot} The '{environment}' environment does not exist within the '{workspace}' workspace."
1018
- )
1019
-
1020
- return dfE_filt["Environment Id"].iloc[0]
1021
-
1022
-
1023
995
  def get_azure_token_credentials(
1024
996
  key_vault_uri: str,
1025
997
  key_vault_tenant_id: str,
@@ -1048,3 +1020,34 @@ def get_azure_token_credentials(
1048
1020
  }
1049
1021
 
1050
1022
  return token, credential, headers
1023
+
1024
+
1025
+ def convert_to_alphanumeric_lowercase(input_string):
1026
+
1027
+ cleaned_string = re.sub(r"[^a-zA-Z0-9]", "", input_string)
1028
+ cleaned_string = cleaned_string.lower()
1029
+
1030
+ return cleaned_string
1031
+
1032
+
1033
+ def resolve_environment_id(environment: str, workspace: Optional[str] = None) -> UUID:
1034
+ """
1035
+ Obtains the environment Id for a given environment.
1036
+
1037
+ Parameters
1038
+ ----------
1039
+ environment: str
1040
+ Name of the environment.
1041
+ """
1042
+ from sempy_labs._environments import list_environments
1043
+
1044
+ (workspace, workspace_id) = resolve_workspace_name_and_id(workspace)
1045
+
1046
+ dfE = list_environments(workspace=workspace)
1047
+ dfE_filt = dfE[dfE["Environment Name"] == environment]
1048
+ if len(dfE_filt) == 0:
1049
+ raise ValueError(
1050
+ f"{icons.red_dot} The '{environment}' environment does not exist within the '{workspace}' workspace."
1051
+ )
1052
+
1053
+ return dfE_filt["Environment Id"].iloc[0]
sempy_labs/_icons.py CHANGED
@@ -74,3 +74,22 @@ language_map = {
74
74
  }
75
75
  workspace_roles = ["Admin", "Member", "Viewer", "Contributor"]
76
76
  principal_types = ["App", "Group", "None", "User"]
77
+ azure_api_version = "2023-11-01"
78
+ migrate_capacity_suffix = "fsku"
79
+ sku_mapping = {
80
+ "A1": "F8",
81
+ "EM1": "F8",
82
+ "A2": "F16",
83
+ "EM2": "F16",
84
+ "A3": "F32",
85
+ "EM3": "F32",
86
+ "A4": "F64",
87
+ "P1": "F64",
88
+ "A5": "F128",
89
+ "P2": "F128",
90
+ "A6": "F256",
91
+ "P3": "F256",
92
+ "A7": "F512",
93
+ "P4": "F512",
94
+ "P5": "F1024",
95
+ }
@@ -8,6 +8,7 @@ from sempy_labs._helper_functions import (
8
8
  lro,
9
9
  resolve_item_type,
10
10
  format_dax_object_name,
11
+ pagination,
11
12
  )
12
13
  import pandas as pd
13
14
  from typing import Optional
@@ -1774,3 +1775,212 @@ def list_reports_using_semantic_model(
1774
1775
  df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)
1775
1776
 
1776
1777
  return df
1778
+
1779
+
1780
+ def list_report_semantic_model_objects(
1781
+ dataset: str, workspace: Optional[str] = None, extended: Optional[bool] = False
1782
+ ) -> pd.DataFrame:
1783
+ """
1784
+ Shows a list of semantic model objects (i.e. columns, measures, hierarchies) used in all reports which feed data from
1785
+ a given semantic model.
1786
+
1787
+ Requirement: Reports must be in the PBIR format.
1788
+
1789
+ Parameters
1790
+ ----------
1791
+ dataset : str
1792
+ Name of the semantic model.
1793
+ workspace : str, default=None
1794
+ The Fabric workspace name.
1795
+ Defaults to None which resolves to the workspace of the attached lakehouse
1796
+ or if no lakehouse attached, resolves to the workspace of the notebook.
1797
+ extended: bool, default=False
1798
+ If True, adds an extra column called 'Valid Semantic Model Object' which identifies whether the semantic model object used
1799
+ in the report exists in the semantic model which feeds data to the report.
1800
+
1801
+ Returns
1802
+ -------
1803
+ pandas.DataFrame
1804
+ A pandas dataframe showing a list of semantic model objects (i.e. columns, measures, hierarchies) used in all reports which feed data from
1805
+ a given semantic model.
1806
+ """
1807
+
1808
+ from sempy_labs.report import ReportWrapper
1809
+ from sempy_labs.tom import connect_semantic_model
1810
+
1811
+ dfRO = pd.DataFrame(
1812
+ columns=[
1813
+ "Report Name",
1814
+ "Report Workspace Name",
1815
+ "Table Name",
1816
+ "Object Name",
1817
+ "Object Type",
1818
+ "Report Source",
1819
+ "Report Source Object",
1820
+ ]
1821
+ )
1822
+
1823
+ # Collect all reports which use the semantic model
1824
+ dfR = list_reports_using_semantic_model(dataset=dataset, workspace=workspace)
1825
+
1826
+ if len(dfR) == 0:
1827
+ return dfRO
1828
+
1829
+ for _, r in dfR.iterrows():
1830
+ report_name = r["Report Name"]
1831
+ report_workspace = r["Report Workspace Name"]
1832
+
1833
+ rpt = ReportWrapper(report=report_name, workspace=report_workspace)
1834
+ # Collect all semantic model objects used in the report
1835
+ dfRSO = rpt.list_semantic_model_objects()
1836
+ dfRSO["Report Name"] = report_name
1837
+ dfRSO["Report Workspace Name"] = report_workspace
1838
+ colName = "Report Name"
1839
+ dfRSO.insert(0, colName, dfRSO.pop(colName))
1840
+ colName = "Report Workspace Name"
1841
+ dfRSO.insert(1, colName, dfRSO.pop(colName))
1842
+
1843
+ dfRO = pd.concat([dfRO, dfRSO], ignore_index=True)
1844
+
1845
+ # Collect all semantic model objects
1846
+ if extended:
1847
+ with connect_semantic_model(
1848
+ dataset=dataset, readonly=True, workspace=workspace
1849
+ ) as tom:
1850
+ for index, row in dfRO.iterrows():
1851
+ object_type = row["Object Type"]
1852
+ if object_type == "Measure":
1853
+ dfRO.at[index, "Valid Semantic Model Object"] = any(
1854
+ o.Name == row["Object Name"] for o in tom.all_measures()
1855
+ )
1856
+ elif object_type == "Column":
1857
+ dfRO.at[index, "Valid Semantic Model Object"] = any(
1858
+ format_dax_object_name(c.Parent.Name, c.Name)
1859
+ == format_dax_object_name(row["Table Name"], row["Object Name"])
1860
+ for c in tom.all_columns()
1861
+ )
1862
+ elif object_type == "Hierarchy":
1863
+ dfRO.at[index, "Valid Semantic Model Object"] = any(
1864
+ format_dax_object_name(h.Parent.Name, h.Name)
1865
+ == format_dax_object_name(row["Table Name"], row["Object Name"])
1866
+ for h in tom.all_hierarchies()
1867
+ )
1868
+
1869
+ return dfRO
1870
+
1871
+
1872
+ def list_semantic_model_object_report_usage(
1873
+ dataset: str,
1874
+ workspace: Optional[str] = None,
1875
+ include_dependencies: Optional[bool] = False,
1876
+ extended: Optional[bool] = False,
1877
+ ) -> pd.DataFrame:
1878
+ """
1879
+ Shows a list of semantic model objects and how many times they are referenced in all reports which rely on this semantic model.
1880
+
1881
+ Requirement: Reports must be in the PBIR format.
1882
+
1883
+ Parameters
1884
+ ----------
1885
+ dataset : str
1886
+ Name of the semantic model.
1887
+ workspace : str, default=None
1888
+ The Fabric workspace name.
1889
+ Defaults to None which resolves to the workspace of the attached lakehouse
1890
+ or if no lakehouse attached, resolves to the workspace of the notebook.
1891
+ include_dependencies : bool, default=False
1892
+ If True, includes measure dependencies.
1893
+ extended: bool, default=False
1894
+ If True, adds columns 'Total Size', 'Data Size', 'Dictionary Size', 'Hierarchy Size' based on Vertipaq statistics.
1895
+
1896
+ Returns
1897
+ -------
1898
+ pandas.DataFrame
1899
+ A pandas dataframe showing a list of semantic model objects and how many times they are referenced in all reports which rely on this semantic model. By default, the dataframe
1900
+ is sorted descending by 'Report Usage Count'.
1901
+ """
1902
+
1903
+ from sempy_labs._model_dependencies import get_measure_dependencies
1904
+ from sempy_labs._helper_functions import format_dax_object_name
1905
+
1906
+ workspace = fabric.resolve_workspace_name(workspace)
1907
+
1908
+ dfR = list_report_semantic_model_objects(dataset=dataset, workspace=workspace)
1909
+ usage_column_name = "Report Usage Count"
1910
+
1911
+ if not include_dependencies:
1912
+ final_df = (
1913
+ dfR.groupby(["Table Name", "Object Name", "Object Type"])
1914
+ .size()
1915
+ .reset_index(name=usage_column_name)
1916
+ )
1917
+ else:
1918
+ df = pd.DataFrame(columns=["Table Name", "Object Name", "Object Type"])
1919
+ dep = get_measure_dependencies(dataset=dataset, workspace=workspace)
1920
+
1921
+ for i, r in dfR.iterrows():
1922
+ object_type = r["Object Type"]
1923
+ table_name = r["Table Name"]
1924
+ object_name = r["Object Name"]
1925
+ new_data = {
1926
+ "Table Name": table_name,
1927
+ "Object Name": object_name,
1928
+ "Object Type": object_type,
1929
+ }
1930
+ df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)
1931
+ if object_type == "Measure":
1932
+ df_filt = dep[dep["Object Name"] == object_name][
1933
+ ["Referenced Table", "Referenced Object", "Referenced Object Type"]
1934
+ ]
1935
+ df_filt.rename(
1936
+ columns={
1937
+ "Referenced Table": "Table Name",
1938
+ "Referenced Object": "Object Name",
1939
+ "Referenced Object Type": "Object Type",
1940
+ },
1941
+ inplace=True,
1942
+ )
1943
+
1944
+ df = pd.concat([df, df_filt], ignore_index=True)
1945
+
1946
+ final_df = (
1947
+ df.groupby(["Table Name", "Object Name", "Object Type"])
1948
+ .size()
1949
+ .reset_index(name=usage_column_name)
1950
+ )
1951
+
1952
+ if extended:
1953
+ final_df["Object"] = format_dax_object_name(
1954
+ final_df["Table Name"], final_df["Object Name"]
1955
+ )
1956
+ dfC = fabric.list_columns(dataset=dataset, workspace=workspace, extended=True)
1957
+ dfC["Object"] = format_dax_object_name(dfC["Table Name"], dfC["Column Name"])
1958
+ final_df = pd.merge(
1959
+ final_df,
1960
+ dfC[
1961
+ [
1962
+ "Object",
1963
+ "Total Size",
1964
+ "Data Size",
1965
+ "Dictionary Size",
1966
+ "Hierarchy Size",
1967
+ ]
1968
+ ],
1969
+ on="Object",
1970
+ how="left",
1971
+ )
1972
+
1973
+ ext_int_cols = ["Total Size", "Data Size", "Dictionary Size", "Hierarchy Size"]
1974
+ final_df[ext_int_cols] = final_df[ext_int_cols].fillna(0).astype(int)
1975
+ final_df.drop("Object", axis=1, inplace=True)
1976
+
1977
+ int_cols = [usage_column_name]
1978
+ final_df[int_cols] = final_df[int_cols].astype(int)
1979
+
1980
+ final_df = final_df[final_df["Object Type"] != "Table"].sort_values(
1981
+ by=usage_column_name, ascending=False
1982
+ )
1983
+
1984
+ final_df.reset_index(drop=True, inplace=True)
1985
+
1986
+ return final_df
sempy_labs/_model_bpa.py CHANGED
@@ -506,7 +506,7 @@ def run_model_bpa(
506
506
  content_html += f'<td>{row["Object Type"]}</td>'
507
507
  content_html += f'<td>{row["Object Name"]}</td>'
508
508
  content_html += f'<td style="text-align: center;">{row["Severity"]}</td>'
509
- #content_html += f'<td>{row["Severity"]}</td>'
509
+ # content_html += f'<td>{row["Severity"]}</td>'
510
510
  content_html += "</tr>"
511
511
  content_html += "</table>"
512
512
 
@@ -339,7 +339,6 @@ def list_qso_settings(
339
339
  if dataset is not None:
340
340
  dataset_id = resolve_dataset_id(dataset, workspace)
341
341
 
342
- workspace_id = fabric.get_workspace_id()
343
342
  df = pd.DataFrame(
344
343
  columns=[
345
344
  "Dataset Id",
@@ -413,10 +412,12 @@ def set_workspace_default_storage_format(
413
412
  dfW = fabric.list_workspaces(filter=f"name eq '{workspace}'")
414
413
  if len(dfW) == 0:
415
414
  raise ValueError()
416
- current_storage_format = dfW['Default Dataset Storage Format'].iloc[0]
415
+ current_storage_format = dfW["Default Dataset Storage Format"].iloc[0]
417
416
 
418
417
  if current_storage_format == storage_format:
419
- print(f"{icons.info} The '{workspace}' is already set to a default storage format of '{current_storage_format}'.")
418
+ print(
419
+ f"{icons.info} The '{workspace}' is already set to a default storage format of '{current_storage_format}'."
420
+ )
420
421
  return
421
422
 
422
423
  request_body = {
sempy_labs/_spark.py CHANGED
@@ -298,7 +298,9 @@ def delete_custom_pool(pool_name: str, workspace: Optional[str] = None):
298
298
  )
299
299
 
300
300
 
301
- def get_spark_settings(workspace: Optional[str] = None) -> pd.DataFrame:
301
+ def get_spark_settings(
302
+ workspace: Optional[str] = None, return_dataframe: Optional[bool] = True
303
+ ) -> pd.DataFrame | dict:
302
304
  """
303
305
  Shows the spark settings for a workspace.
304
306
 
@@ -308,10 +310,12 @@ def get_spark_settings(workspace: Optional[str] = None) -> pd.DataFrame:
308
310
  The name of the Fabric workspace.
309
311
  Defaults to None which resolves to the workspace of the attached lakehouse
310
312
  or if no lakehouse attached, resolves to the workspace of the notebook.
313
+ return_dataframe : bool, default=True
314
+ If True, returns a pandas dataframe. If False, returns a json dictionary.
311
315
 
312
316
  Returns
313
317
  -------
314
- pandas.DataFrame
318
+ pandas.DataFrame | dict
315
319
  A pandas dataframe showing the spark settings for a workspace.
316
320
  """
317
321
 
@@ -363,12 +367,15 @@ def get_spark_settings(workspace: Optional[str] = None) -> pd.DataFrame:
363
367
  "High Concurrency Enabled",
364
368
  "Customize Compute Enabled",
365
369
  ]
366
- int_cols = ["Max Node Count", "Max Executors"]
370
+ # int_cols = ["Max Node Count", "Max Executors"]
367
371
 
368
372
  df[bool_cols] = df[bool_cols].astype(bool)
369
- df[int_cols] = df[int_cols].astype(int)
373
+ # df[int_cols] = df[int_cols].astype(int)
370
374
 
371
- return df
375
+ if return_dataframe:
376
+ return df
377
+ else:
378
+ return response.json()
372
379
 
373
380
 
374
381
  def update_spark_settings(
@@ -420,38 +427,26 @@ def update_spark_settings(
420
427
  # https://learn.microsoft.com/en-us/rest/api/fabric/spark/workspace-settings/update-spark-settings?tabs=HTTP
421
428
  (workspace, workspace_id) = resolve_workspace_name_and_id(workspace)
422
429
 
423
- dfS = get_spark_settings(workspace=workspace)
424
-
425
- if automatic_log_enabled is None:
426
- automatic_log_enabled = bool(dfS["Automatic Log Enabled"].iloc[0])
427
- if high_concurrency_enabled is None:
428
- high_concurrency_enabled = bool(dfS["High Concurrency Enabled"].iloc[0])
429
- if customize_compute_enabled is None:
430
- customize_compute_enabled = bool(dfS["Customize Compute Enabled"].iloc[0])
431
- if default_pool_name is None:
432
- default_pool_name = dfS["Default Pool Name"].iloc[0]
433
- if max_node_count is None:
434
- max_node_count = int(dfS["Max Node Count"].iloc[0])
435
- if max_executors is None:
436
- max_executors = int(dfS["Max Executors"].iloc[0])
437
- if environment_name is None:
438
- environment_name = dfS["Environment Name"].iloc[0]
439
- if runtime_version is None:
440
- runtime_version = dfS["Runtime Version"].iloc[0]
430
+ request_body = get_spark_settings(workspace=workspace, return_dataframe=False)
441
431
 
442
- request_body = {
443
- "automaticLog": {"enabled": automatic_log_enabled},
444
- "highConcurrency": {"notebookInteractiveRunEnabled": high_concurrency_enabled},
445
- "pool": {
446
- "customizeComputeEnabled": customize_compute_enabled,
447
- "defaultPool": {"name": default_pool_name, "type": "Workspace"},
448
- "starterPool": {
449
- "maxNodeCount": max_node_count,
450
- "maxExecutors": max_executors,
451
- },
452
- },
453
- "environment": {"name": environment_name, "runtimeVersion": runtime_version},
454
- }
432
+ if automatic_log_enabled is not None:
433
+ request_body["automaticLog"]["enabled"] = automatic_log_enabled
434
+ if high_concurrency_enabled is not None:
435
+ request_body["highConcurrency"][
436
+ "notebookInteractiveRunEnabled"
437
+ ] = high_concurrency_enabled
438
+ if customize_compute_enabled is not None:
439
+ request_body["pool"]["customizeComputeEnabled"] = customize_compute_enabled
440
+ if default_pool_name is not None:
441
+ request_body["pool"]["defaultPool"]["name"] = default_pool_name
442
+ if max_node_count is not None:
443
+ request_body["pool"]["starterPool"]["maxNodeCount"] = max_node_count
444
+ if max_executors is not None:
445
+ request_body["pool"]["starterPool"]["maxExecutors"] = max_executors
446
+ if environment_name is not None:
447
+ request_body["environment"]["name"] = environment_name
448
+ if runtime_version is not None:
449
+ request_body["environment"]["runtimeVersion"] = runtime_version
455
450
 
456
451
  client = fabric.FabricRestClient()
457
452
  response = client.patch(