semantic-link-labs 0.8.0__py3-none-any.whl → 0.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of semantic-link-labs might be problematic. Click here for more details.

Files changed (47) hide show
  1. {semantic_link_labs-0.8.0.dist-info → semantic_link_labs-0.8.1.dist-info}/METADATA +39 -7
  2. {semantic_link_labs-0.8.0.dist-info → semantic_link_labs-0.8.1.dist-info}/RECORD +47 -37
  3. sempy_labs/__init__.py +70 -51
  4. sempy_labs/_ai.py +0 -2
  5. sempy_labs/_capacity_migration.py +1 -2
  6. sempy_labs/_data_pipelines.py +118 -0
  7. sempy_labs/_documentation.py +144 -0
  8. sempy_labs/_eventhouses.py +118 -0
  9. sempy_labs/_eventstreams.py +118 -0
  10. sempy_labs/_generate_semantic_model.py +3 -3
  11. sempy_labs/_git.py +3 -3
  12. sempy_labs/_helper_functions.py +116 -26
  13. sempy_labs/_icons.py +21 -0
  14. sempy_labs/_kql_databases.py +134 -0
  15. sempy_labs/_kql_querysets.py +124 -0
  16. sempy_labs/_list_functions.py +12 -425
  17. sempy_labs/_mirrored_warehouses.py +50 -0
  18. sempy_labs/_ml_experiments.py +122 -0
  19. sempy_labs/_ml_models.py +120 -0
  20. sempy_labs/_model_auto_build.py +0 -4
  21. sempy_labs/_model_bpa.py +9 -11
  22. sempy_labs/_model_bpa_bulk.py +8 -7
  23. sempy_labs/_model_dependencies.py +26 -18
  24. sempy_labs/_notebooks.py +5 -16
  25. sempy_labs/_query_scale_out.py +2 -2
  26. sempy_labs/_refresh_semantic_model.py +7 -19
  27. sempy_labs/_spark.py +10 -10
  28. sempy_labs/_vertipaq.py +16 -18
  29. sempy_labs/_warehouses.py +132 -0
  30. sempy_labs/_workspaces.py +0 -3
  31. sempy_labs/admin/_basic_functions.py +92 -10
  32. sempy_labs/admin/_domains.py +1 -1
  33. sempy_labs/directlake/_directlake_schema_sync.py +1 -1
  34. sempy_labs/directlake/_dl_helper.py +32 -16
  35. sempy_labs/directlake/_guardrails.py +7 -7
  36. sempy_labs/directlake/_update_directlake_partition_entity.py +1 -1
  37. sempy_labs/directlake/_warm_cache.py +1 -1
  38. sempy_labs/lakehouse/_get_lakehouse_tables.py +3 -3
  39. sempy_labs/lakehouse/_lakehouse.py +3 -2
  40. sempy_labs/migration/_migrate_calctables_to_lakehouse.py +5 -0
  41. sempy_labs/report/_generate_report.py +1 -1
  42. sempy_labs/report/_report_bpa.py +13 -3
  43. sempy_labs/report/_reportwrapper.py +14 -16
  44. sempy_labs/tom/_model.py +261 -24
  45. {semantic_link_labs-0.8.0.dist-info → semantic_link_labs-0.8.1.dist-info}/LICENSE +0 -0
  46. {semantic_link_labs-0.8.0.dist-info → semantic_link_labs-0.8.1.dist-info}/WHEEL +0 -0
  47. {semantic_link_labs-0.8.0.dist-info → semantic_link_labs-0.8.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,120 @@
1
+ import sempy.fabric as fabric
2
+ import pandas as pd
3
+ import sempy_labs._icons as icons
4
+ from typing import Optional
5
+ from sempy_labs._helper_functions import (
6
+ resolve_workspace_name_and_id,
7
+ lro,
8
+ pagination,
9
+ )
10
+ from sempy.fabric.exceptions import FabricHTTPException
11
+
12
+
13
+ def list_ml_models(workspace: Optional[str] = None) -> pd.DataFrame:
14
+ """
15
+ Shows the ML models within a workspace.
16
+
17
+ Parameters
18
+ ----------
19
+ workspace : str, default=None
20
+ The Fabric workspace name.
21
+ Defaults to None which resolves to the workspace of the attached lakehouse
22
+ or if no lakehouse attached, resolves to the workspace of the notebook.
23
+
24
+ Returns
25
+ -------
26
+ pandas.DataFrame
27
+ A pandas dataframe showing the ML models within a workspace.
28
+ """
29
+
30
+ df = pd.DataFrame(columns=["ML Model Name", "ML Model Id", "Description"])
31
+
32
+ (workspace, workspace_id) = resolve_workspace_name_and_id(workspace)
33
+
34
+ client = fabric.FabricRestClient()
35
+ response = client.get(f"/v1/workspaces/{workspace_id}/mlModels")
36
+ if response.status_code != 200:
37
+ raise FabricHTTPException(response)
38
+
39
+ responses = pagination(client, response)
40
+
41
+ for r in responses:
42
+ for v in r.get("value", []):
43
+ model_id = v.get("id")
44
+ modelName = v.get("displayName")
45
+ desc = v.get("description")
46
+
47
+ new_data = {
48
+ "ML Model Name": modelName,
49
+ "ML Model Id": model_id,
50
+ "Description": desc,
51
+ }
52
+ df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)
53
+
54
+ return df
55
+
56
+
57
+ def create_ml_model(
58
+ name: str, description: Optional[str] = None, workspace: Optional[str] = None
59
+ ):
60
+ """
61
+ Creates a Fabric ML model.
62
+
63
+ Parameters
64
+ ----------
65
+ name: str
66
+ Name of the ML model.
67
+ description : str, default=None
68
+ A description of the environment.
69
+ workspace : str, default=None
70
+ The Fabric workspace name.
71
+ Defaults to None which resolves to the workspace of the attached lakehouse
72
+ or if no lakehouse attached, resolves to the workspace of the notebook.
73
+ """
74
+
75
+ (workspace, workspace_id) = resolve_workspace_name_and_id(workspace)
76
+
77
+ request_body = {"displayName": name}
78
+
79
+ if description:
80
+ request_body["description"] = description
81
+
82
+ client = fabric.FabricRestClient()
83
+ response = client.post(f"/v1/workspaces/{workspace_id}/mlModels", json=request_body)
84
+
85
+ lro(client, response, status_codes=[201, 202])
86
+
87
+ print(
88
+ f"{icons.green_dot} The '{name}' ML model has been created within the '{workspace}' workspace."
89
+ )
90
+
91
+
92
+ def delete_ml_model(name: str, workspace: Optional[str] = None):
93
+ """
94
+ Deletes a Fabric ML model.
95
+
96
+ Parameters
97
+ ----------
98
+ name: str
99
+ Name of the ML model.
100
+ workspace : str, default=None
101
+ The Fabric workspace name.
102
+ Defaults to None which resolves to the workspace of the attached lakehouse
103
+ or if no lakehouse attached, resolves to the workspace of the notebook.
104
+ """
105
+
106
+ (workspace, workspace_id) = resolve_workspace_name_and_id(workspace)
107
+
108
+ item_id = fabric.resolve_item_id(
109
+ item_name=name, type="MLModel", workspace=workspace
110
+ )
111
+
112
+ client = fabric.FabricRestClient()
113
+ response = client.delete(f"/v1/workspaces/{workspace_id}/mlModels/{item_id}")
114
+
115
+ if response.status_code != 200:
116
+ raise FabricHTTPException(response)
117
+
118
+ print(
119
+ f"{icons.green_dot} The '{name}' ML model within the '{workspace}' workspace has been deleted."
120
+ )
@@ -34,10 +34,6 @@ def model_auto_build(
34
34
  The Fabric workspace used by the lakehouse.
35
35
  Defaults to None which resolves to the workspace of the attached lakehouse
36
36
  or if no lakehouse attached, resolves to the workspace of the notebook.
37
-
38
- Returns
39
- -------
40
-
41
37
  """
42
38
 
43
39
  workspace = fabric.resolve_workspace_name(workspace)
sempy_labs/_model_bpa.py CHANGED
@@ -3,7 +3,6 @@ import pandas as pd
3
3
  import warnings
4
4
  import datetime
5
5
  from IPython.display import display, HTML
6
- from pyspark.sql import SparkSession
7
6
  from sempy_labs._model_dependencies import get_model_calc_dependencies
8
7
  from sempy_labs._helper_functions import (
9
8
  format_dax_object_name,
@@ -13,6 +12,7 @@ from sempy_labs._helper_functions import (
13
12
  resolve_workspace_capacity,
14
13
  resolve_dataset_id,
15
14
  get_language_codes,
15
+ get_max_run_id,
16
16
  )
17
17
  from sempy_labs.lakehouse import get_lakehouse_tables, lakehouse_attached
18
18
  from sempy_labs.tom import connect_semantic_model
@@ -30,9 +30,9 @@ def run_model_bpa(
30
30
  dataset: str,
31
31
  rules: Optional[pd.DataFrame] = None,
32
32
  workspace: Optional[str] = None,
33
- export: Optional[bool] = False,
34
- return_dataframe: Optional[bool] = False,
35
- extended: Optional[bool] = False,
33
+ export: bool = False,
34
+ return_dataframe: bool = False,
35
+ extended: bool = False,
36
36
  language: Optional[str] = None,
37
37
  **kwargs,
38
38
  ):
@@ -151,6 +151,7 @@ def run_model_bpa(
151
151
  def translate_using_spark(rule_file):
152
152
 
153
153
  from synapse.ml.services import Translate
154
+ from pyspark.sql import SparkSession
154
155
 
155
156
  rules_temp = rule_file.copy()
156
157
  rules_temp = rules_temp.drop(["Expression", "URL", "Severity"], axis=1)
@@ -346,15 +347,11 @@ def run_model_bpa(
346
347
 
347
348
  dfExport["Severity"].replace(icons.severity_mapping, inplace=True)
348
349
 
349
- spark = SparkSession.builder.getOrCreate()
350
- query = f"SELECT MAX(RunId) FROM {lakehouse}.{delta_table_name}"
351
-
352
350
  if len(lakeT_filt) == 0:
353
351
  runId = 1
354
352
  else:
355
- dfSpark = spark.sql(query)
356
- maxRunId = dfSpark.collect()[0][0]
357
- runId = maxRunId + 1
353
+ max_run_id = get_max_run_id(table_name=delta_table_name)
354
+ runId = max_run_id + 1
358
355
 
359
356
  now = datetime.datetime.now()
360
357
  dfD = fabric.list_datasets(workspace=workspace, mode="rest")
@@ -514,4 +511,5 @@ def run_model_bpa(
514
511
  tab_html += "</div>"
515
512
 
516
513
  # Display the tabs, tab contents, and run the script
517
- return display(HTML(styles + tab_html + content_html + script))
514
+ if not export:
515
+ return display(HTML(styles + tab_html + content_html + script))
@@ -1,14 +1,17 @@
1
1
  import sempy.fabric as fabric
2
2
  import pandas as pd
3
3
  import datetime
4
- from pyspark.sql import SparkSession
5
4
  from sempy_labs._helper_functions import (
6
5
  resolve_lakehouse_name,
7
6
  save_as_delta_table,
8
7
  resolve_workspace_capacity,
9
8
  retry,
9
+ get_max_run_id,
10
+ )
11
+ from sempy_labs.lakehouse import (
12
+ get_lakehouse_tables,
13
+ lakehouse_attached,
10
14
  )
11
- from sempy_labs.lakehouse import get_lakehouse_tables, lakehouse_attached
12
15
  from sempy_labs._model_bpa import run_model_bpa
13
16
  from typing import Optional, List
14
17
  from sempy._utils._log import log
@@ -18,7 +21,7 @@ import sempy_labs._icons as icons
18
21
  @log
19
22
  def run_model_bpa_bulk(
20
23
  rules: Optional[pd.DataFrame] = None,
21
- extended: Optional[bool] = False,
24
+ extended: bool = False,
22
25
  language: Optional[str] = None,
23
26
  workspace: Optional[str | List[str]] = None,
24
27
  skip_models: Optional[str | List[str]] = ["ModelBPA", "Fabric Capacity Metrics"],
@@ -78,7 +81,6 @@ def run_model_bpa_bulk(
78
81
  ]
79
82
  now = datetime.datetime.now()
80
83
  output_table = "modelbparesults"
81
- spark = SparkSession.builder.getOrCreate()
82
84
  lakehouse_workspace = fabric.resolve_workspace_name()
83
85
  lakehouse_id = fabric.get_lakehouse_id()
84
86
  lakehouse = resolve_lakehouse_name(
@@ -90,9 +92,8 @@ def run_model_bpa_bulk(
90
92
  if len(lakeT_filt) == 0:
91
93
  runId = 1
92
94
  else:
93
- dfSpark = spark.table(f"`{lakehouse_id}`.{output_table}").select(F.max("RunId"))
94
- maxRunId = dfSpark.collect()[0][0]
95
- runId = maxRunId + 1
95
+ max_run_id = get_max_run_id(table_name=output_table)
96
+ runId = max_run_id + 1
96
97
 
97
98
  if isinstance(workspace, str):
98
99
  workspace = [workspace]
@@ -74,7 +74,7 @@ def get_measure_dependencies(
74
74
 
75
75
  for index, dependency in dep_filt.iterrows():
76
76
  d = True
77
- if dependency[5] == "Measure":
77
+ if dependency.iloc[5] == "Measure":
78
78
  d = False
79
79
  df = pd.concat(
80
80
  [
@@ -85,12 +85,14 @@ def get_measure_dependencies(
85
85
  "Table Name": r["Table Name"],
86
86
  "Object Name": r["Object Name"],
87
87
  "Object Type": r["Object Type"],
88
- "Referenced Object": dependency[4],
89
- "Referenced Table": dependency[3],
90
- "Referenced Object Type": dependency[5],
88
+ "Referenced Object": dependency.iloc[4],
89
+ "Referenced Table": dependency.iloc[3],
90
+ "Referenced Object Type": dependency.iloc[
91
+ 5
92
+ ],
91
93
  "Done": d,
92
94
  "Full Object Name": r["Full Object Name"],
93
- "Referenced Full Object Name": dependency[
95
+ "Referenced Full Object Name": dependency.iloc[
94
96
  7
95
97
  ],
96
98
  "Parent Node": rObj,
@@ -110,12 +112,14 @@ def get_measure_dependencies(
110
112
  "Table Name": r["Table Name"],
111
113
  "Object Name": r["Object Name"],
112
114
  "Object Type": r["Object Type"],
113
- "Referenced Object": dependency[4],
114
- "Referenced Table": dependency[3],
115
- "Referenced Object Type": dependency[5],
115
+ "Referenced Object": dependency.iloc[4],
116
+ "Referenced Table": dependency.iloc[3],
117
+ "Referenced Object Type": dependency.iloc[
118
+ 5
119
+ ],
116
120
  "Done": d,
117
121
  "Full Object Name": r["Full Object Name"],
118
- "Referenced Full Object Name": dependency[
122
+ "Referenced Full Object Name": dependency.iloc[
119
123
  7
120
124
  ],
121
125
  "Parent Node": rObj,
@@ -203,7 +207,7 @@ def get_model_calc_dependencies(
203
207
 
204
208
  for index, dependency in dep_filt.iterrows():
205
209
  d = True
206
- if dependency[5] in objs:
210
+ if dependency.iloc[5] in objs:
207
211
  d = False
208
212
  df = pd.concat(
209
213
  [
@@ -214,12 +218,14 @@ def get_model_calc_dependencies(
214
218
  "Table Name": r["Table Name"],
215
219
  "Object Name": r["Object Name"],
216
220
  "Object Type": r["Object Type"],
217
- "Referenced Object": dependency[4],
218
- "Referenced Table": dependency[3],
219
- "Referenced Object Type": dependency[5],
221
+ "Referenced Object": dependency.iloc[4],
222
+ "Referenced Table": dependency.iloc[3],
223
+ "Referenced Object Type": dependency.iloc[
224
+ 5
225
+ ],
220
226
  "Done": d,
221
227
  "Full Object Name": r["Full Object Name"],
222
- "Referenced Full Object Name": dependency[
228
+ "Referenced Full Object Name": dependency.iloc[
223
229
  7
224
230
  ],
225
231
  "Parent Node": rObj,
@@ -239,12 +245,14 @@ def get_model_calc_dependencies(
239
245
  "Table Name": r["Table Name"],
240
246
  "Object Name": r["Object Name"],
241
247
  "Object Type": r["Object Type"],
242
- "Referenced Object": dependency[5],
243
- "Referenced Table": dependency[4],
244
- "Referenced Object Type": dependency[6],
248
+ "Referenced Object": dependency.iloc[5],
249
+ "Referenced Table": dependency.iloc[4],
250
+ "Referenced Object Type": dependency.iloc[
251
+ 6
252
+ ],
245
253
  "Done": d,
246
254
  "Full Object Name": r["Full Object Name"],
247
- "Referenced Full Object Name": dependency[
255
+ "Referenced Full Object Name": dependency.iloc[
248
256
  7
249
257
  ],
250
258
  "Parent Node": rObj,
sempy_labs/_notebooks.py CHANGED
@@ -8,13 +8,14 @@ from sempy_labs._helper_functions import (
8
8
  resolve_workspace_name_and_id,
9
9
  lro,
10
10
  _decode_b64,
11
+ resolve_notebook_id,
11
12
  )
12
13
  from sempy.fabric.exceptions import FabricHTTPException
13
14
 
14
15
 
15
16
  def get_notebook_definition(
16
- notebook_name: str, workspace: Optional[str] = None, decode: Optional[bool] = True
17
- ):
17
+ notebook_name: str, workspace: Optional[str] = None, decode: bool = True
18
+ ) -> str:
18
19
  """
19
20
  Obtains the notebook definition.
20
21
 
@@ -32,21 +33,12 @@ def get_notebook_definition(
32
33
 
33
34
  Returns
34
35
  -------
35
- ipynb
36
+ str
36
37
  The notebook definition.
37
38
  """
38
39
 
39
40
  (workspace, workspace_id) = resolve_workspace_name_and_id(workspace)
40
-
41
- dfI = fabric.list_items(workspace=workspace, type="Notebook")
42
- dfI_filt = dfI[dfI["Display Name"] == notebook_name]
43
-
44
- if len(dfI_filt) == 0:
45
- raise ValueError(
46
- f"{icons.red_dot} The '{notebook_name}' notebook does not exist within the '{workspace}' workspace."
47
- )
48
-
49
- notebook_id = dfI_filt["Id"].iloc[0]
41
+ notebook_id = resolve_notebook_id(notebook=notebook_name, workspace=workspace)
50
42
  client = fabric.FabricRestClient()
51
43
  response = client.post(
52
44
  f"v1/workspaces/{workspace_id}/notebooks/{notebook_id}/getDefinition",
@@ -90,9 +82,6 @@ def import_notebook_from_web(
90
82
  The name of the workspace.
91
83
  Defaults to None which resolves to the workspace of the attached lakehouse
92
84
  or if no lakehouse attached, resolves to the workspace of the notebook.
93
-
94
- Returns
95
- -------
96
85
  """
97
86
 
98
87
  (workspace, workspace_id) = resolve_workspace_name_and_id(workspace)
@@ -181,8 +181,8 @@ def disable_qso(dataset: str, workspace: Optional[str] = None) -> pd.DataFrame:
181
181
 
182
182
  def set_qso(
183
183
  dataset: str,
184
- auto_sync: Optional[bool] = True,
185
- max_read_only_replicas: Optional[int] = -1,
184
+ auto_sync: bool = True,
185
+ max_read_only_replicas: int = -1,
186
186
  workspace: Optional[str] = None,
187
187
  ) -> pd.DataFrame:
188
188
  """
@@ -13,10 +13,10 @@ def refresh_semantic_model(
13
13
  dataset: str,
14
14
  tables: Optional[Union[str, List[str]]] = None,
15
15
  partitions: Optional[Union[str, List[str]]] = None,
16
- refresh_type: Optional[str] = None,
17
- retry_count: Optional[int] = 0,
18
- apply_refresh_policy: Optional[bool] = True,
19
- max_parallelism: Optional[int] = 10,
16
+ refresh_type: str = "full",
17
+ retry_count: int = 0,
18
+ apply_refresh_policy: bool = True,
19
+ max_parallelism: int = 10,
20
20
  workspace: Optional[str] = None,
21
21
  ):
22
22
  """
@@ -30,7 +30,7 @@ def refresh_semantic_model(
30
30
  A string or a list of tables to refresh.
31
31
  partitions: str, List[str], default=None
32
32
  A string or a list of partitions to refresh. Partitions must be formatted as such: 'Table Name'[Partition Name].
33
- refresh_type : str, default='full'
33
+ refresh_type : str, default="full"
34
34
  The type of processing to perform. Types align with the TMSL refresh command types: full, clearValues, calculate, dataOnly, automatic, and defragment. The add type isn't supported. Defaults to "full".
35
35
  retry_count : int, default=0
36
36
  Number of times the operation retries before failing.
@@ -48,9 +48,6 @@ def refresh_semantic_model(
48
48
 
49
49
  workspace = fabric.resolve_workspace_name(workspace)
50
50
 
51
- if refresh_type is None:
52
- refresh_type = "full"
53
-
54
51
  if isinstance(tables, str):
55
52
  tables = [tables]
56
53
  if isinstance(partitions, str):
@@ -74,18 +71,9 @@ def refresh_semantic_model(
74
71
  refresh_type.lower().replace("only", "Only").replace("values", "Values")
75
72
  )
76
73
 
77
- refreshTypes = [
78
- "full",
79
- "automatic",
80
- "dataOnly",
81
- "calculate",
82
- "clearValues",
83
- "defragment",
84
- ]
85
-
86
- if refresh_type not in refreshTypes:
74
+ if refresh_type not in icons.refreshTypes:
87
75
  raise ValueError(
88
- f"{icons.red_dot} Invalid refresh type. Refresh type must be one of these values: {refreshTypes}."
76
+ f"{icons.red_dot} Invalid refresh type. Refresh type must be one of these values: {icons.refreshTypes}."
89
77
  )
90
78
 
91
79
  if len(objects) == 0:
sempy_labs/_spark.py CHANGED
@@ -91,9 +91,9 @@ def create_custom_pool(
91
91
  max_node_count: int,
92
92
  min_executors: int,
93
93
  max_executors: int,
94
- node_family: Optional[str] = "MemoryOptimized",
95
- auto_scale_enabled: Optional[bool] = True,
96
- dynamic_executor_allocation_enabled: Optional[bool] = True,
94
+ node_family: str = "MemoryOptimized",
95
+ auto_scale_enabled: bool = True,
96
+ dynamic_executor_allocation_enabled: bool = True,
97
97
  workspace: Optional[str] = None,
98
98
  ):
99
99
  """
@@ -108,11 +108,11 @@ def create_custom_pool(
108
108
  min_node_count : int
109
109
  The `minimum node count <https://learn.microsoft.com/rest/api/fabric/spark/custom-pools/create-workspace-custom-pool?tabs=HTTP#autoscaleproperties>`_.
110
110
  max_node_count : int
111
- The `maximum node count <https://learn.microsoft.com/en-us/rest/api/fabric/spark/custom-pools/create-workspace-custom-pool?tabs=HTTP#autoscaleproperties>`_.
111
+ The maximum node count.
112
112
  min_executors : int
113
113
  The `minimum executors <https://learn.microsoft.com/rest/api/fabric/spark/custom-pools/create-workspace-custom-pool?tabs=HTTP#dynamicexecutorallocationproperties>`_.
114
114
  max_executors : int
115
- The `maximum executors <https://learn.microsoft.com/en-us/rest/api/fabric/spark/custom-pools/create-workspace-custom-pool?tabs=HTTP#dynamicexecutorallocationproperties>`_.
115
+ The maximum executors.
116
116
  node_family : str, default='MemoryOptimized'
117
117
  The `node family <https://learn.microsoft.com/rest/api/fabric/spark/custom-pools/create-workspace-custom-pool?tabs=HTTP#nodefamily>`_.
118
118
  auto_scale_enabled : bool, default=True
@@ -182,13 +182,13 @@ def update_custom_pool(
182
182
  The `minimum node count <https://learn.microsoft.com/rest/api/fabric/spark/custom-pools/create-workspace-custom-pool?tabs=HTTP#autoscaleproperties>`_.
183
183
  Defaults to None which keeps the existing property setting.
184
184
  max_node_count : int, default=None
185
- The `maximum node count <https://learn.microsoft.com/en-us/rest/api/fabric/spark/custom-pools/create-workspace-custom-pool?tabs=HTTP#autoscaleproperties>`_.
185
+ The maximum node count.
186
186
  Defaults to None which keeps the existing property setting.
187
187
  min_executors : int, default=None
188
188
  The `minimum executors <https://learn.microsoft.com/rest/api/fabric/spark/custom-pools/create-workspace-custom-pool?tabs=HTTP#dynamicexecutorallocationproperties>`_.
189
189
  Defaults to None which keeps the existing property setting.
190
190
  max_executors : int, default=None
191
- The `maximum executors <https://learn.microsoft.com/en-us/rest/api/fabric/spark/custom-pools/create-workspace-custom-pool?tabs=HTTP#dynamicexecutorallocationproperties>`_.
191
+ The maximum executors.
192
192
  Defaults to None which keeps the existing property setting.
193
193
  node_family : str, default=None
194
194
  The `node family <https://learn.microsoft.com/rest/api/fabric/spark/custom-pools/create-workspace-custom-pool?tabs=HTTP#nodefamily>`_.
@@ -299,7 +299,7 @@ def delete_custom_pool(pool_name: str, workspace: Optional[str] = None):
299
299
 
300
300
 
301
301
  def get_spark_settings(
302
- workspace: Optional[str] = None, return_dataframe: Optional[bool] = True
302
+ workspace: Optional[str] = None, return_dataframe: bool = True
303
303
  ) -> pd.DataFrame | dict:
304
304
  """
305
305
  Shows the spark settings for a workspace.
@@ -407,10 +407,10 @@ def update_spark_settings(
407
407
  `Default pool <https://learn.microsoft.com/rest/api/fabric/spark/workspace-settings/update-spark-settings?tabs=HTTP#poolproperties>`_ for workspace.
408
408
  Defaults to None which keeps the existing property setting.
409
409
  max_node_count : int, default=None
410
- The `maximum node count <https://learn.microsoft.com/en-us/rest/api/fabric/spark/workspace-settings/update-spark-settings?tabs=HTTP#starterpoolproperties>`_.
410
+ The maximum node count.
411
411
  Defaults to None which keeps the existing property setting.
412
412
  max_executors : int, default=None
413
- The `maximum executors <https://learn.microsoft.com/rest/api/fabric/spark/workspace-settings/update-spark-settings?tabs=HTTP#starterpoolproperties>`_.
413
+ The maximum executors.
414
414
  Defaults to None which keeps the existing property setting.
415
415
  environment_name : str, default=None
416
416
  The name of the `default environment <https://learn.microsoft.com/rest/api/fabric/spark/workspace-settings/update-spark-settings?tabs=HTTP#environmentproperties>`_. Empty string indicated there is no workspace default environment
sempy_labs/_vertipaq.py CHANGED
@@ -13,6 +13,7 @@ from sempy_labs._helper_functions import (
13
13
  resolve_dataset_id,
14
14
  save_as_delta_table,
15
15
  resolve_workspace_capacity,
16
+ get_max_run_id,
16
17
  )
17
18
  from sempy_labs._list_functions import list_relationships, list_tables
18
19
  from sempy_labs.lakehouse import lakehouse_attached, get_lakehouse_tables
@@ -27,7 +28,7 @@ def vertipaq_analyzer(
27
28
  dataset: str,
28
29
  workspace: Optional[str] = None,
29
30
  export: Optional[str] = None,
30
- read_stats_from_data: Optional[bool] = False,
31
+ read_stats_from_data: bool = False,
31
32
  **kwargs,
32
33
  ):
33
34
  """
@@ -336,10 +337,10 @@ def vertipaq_analyzer(
336
337
  int_cols.append(k)
337
338
  elif v in ["float", "double"] and k != "Temperature":
338
339
  pct_cols.append(k)
339
- colSize[int_cols] = colSize[int_cols].applymap("{:,}".format)
340
- temp[int_cols] = temp[int_cols].applymap("{:,}".format)
341
- colSize[pct_cols] = colSize[pct_cols].applymap("{:.2f}%".format)
342
- temp[pct_cols] = temp[pct_cols].applymap("{:.2f}%".format)
340
+ colSize[int_cols] = colSize[int_cols].map("{:,}".format)
341
+ temp[int_cols] = temp[int_cols].map("{:,}".format)
342
+ colSize[pct_cols] = colSize[pct_cols].map("{:.2f}%".format)
343
+ temp[pct_cols] = temp[pct_cols].map("{:.2f}%".format)
343
344
 
344
345
  # Tables
345
346
  int_cols = []
@@ -351,8 +352,8 @@ def vertipaq_analyzer(
351
352
  pct_cols.append(k)
352
353
  export_Table = dfT.copy()
353
354
 
354
- dfT[int_cols] = dfT[int_cols].applymap("{:,}".format)
355
- dfT[pct_cols] = dfT[pct_cols].applymap("{:.2f}%".format)
355
+ dfT[int_cols] = dfT[int_cols].map("{:,}".format)
356
+ dfT[pct_cols] = dfT[pct_cols].map("{:.2f}%".format)
356
357
 
357
358
  # Relationships
358
359
  dfR = pd.merge(
@@ -391,7 +392,7 @@ def vertipaq_analyzer(
391
392
  int_cols.append(k)
392
393
  if not read_stats_from_data:
393
394
  int_cols.remove("Missing Rows")
394
- dfR[int_cols] = dfR[int_cols].applymap("{:,}".format)
395
+ dfR[int_cols] = dfR[int_cols].map("{:,}".format)
395
396
 
396
397
  # Partitions
397
398
  dfP = dfP[
@@ -414,7 +415,7 @@ def vertipaq_analyzer(
414
415
  if v in ["int", "long", "double", "float"]:
415
416
  int_cols.append(k)
416
417
  intList = ["Record Count", "Segment Count", "Records per Segment"]
417
- dfP[intList] = dfP[intList].applymap("{:,}".format)
418
+ dfP[intList] = dfP[intList].map("{:,}".format)
418
419
 
419
420
  # Hierarchies
420
421
  dfH_filt = dfH[dfH["Level Ordinal"] == 0]
@@ -426,7 +427,7 @@ def vertipaq_analyzer(
426
427
  dfH_filt["Used Size"] = dfH_filt["Used Size"].astype(int)
427
428
  export_Hier = dfH_filt.copy()
428
429
  intList = ["Used Size"]
429
- dfH_filt[intList] = dfH_filt[intList].applymap("{:,}".format)
430
+ dfH_filt[intList] = dfH_filt[intList].map("{:,}".format)
430
431
 
431
432
  # Model
432
433
  # Converting to KB/MB/GB necessitates division by 1024 * 1000.
@@ -456,7 +457,7 @@ def vertipaq_analyzer(
456
457
  for k, v in vertipaq_map["Model"].items():
457
458
  if v in ["long", "int"] and k != "Compatibility Level":
458
459
  int_cols.append(k)
459
- dfModel[int_cols] = dfModel[int_cols].applymap("{:,}".format)
460
+ dfModel[int_cols] = dfModel[int_cols].map("{:,}".format)
460
461
 
461
462
  dataFrames = {
462
463
  "dfModel": dfModel,
@@ -483,26 +484,23 @@ def vertipaq_analyzer(
483
484
  )
484
485
 
485
486
  if export == "table":
486
- spark = SparkSession.builder.getOrCreate()
487
+ # spark = SparkSession.builder.getOrCreate()
487
488
 
488
489
  lakehouse_id = fabric.get_lakehouse_id()
489
490
  lake_workspace = fabric.resolve_workspace_name()
490
491
  lakehouse = resolve_lakehouse_name(
491
492
  lakehouse_id=lakehouse_id, workspace=lake_workspace
492
493
  )
493
- lakeTName = "vertipaq_analyzer_model"
494
+ lakeTName = "vertipaqanalyzer_model"
494
495
 
495
496
  lakeT = get_lakehouse_tables(lakehouse=lakehouse, workspace=lake_workspace)
496
497
  lakeT_filt = lakeT[lakeT["Table Name"] == lakeTName]
497
498
 
498
- query = f"SELECT MAX(RunId) FROM {lakehouse}.{lakeTName}"
499
-
500
499
  if len(lakeT_filt) == 0:
501
500
  runId = 1
502
501
  else:
503
- dfSpark = spark.sql(query)
504
- maxRunId = dfSpark.collect()[0][0]
505
- runId = maxRunId + 1
502
+ max_run_id = get_max_run_id(table_name=lakeTName)
503
+ runId = max_run_id + 1
506
504
 
507
505
  dfMap = {
508
506
  "Columns": ["Columns", export_Col],