semantic-link-labs 0.8.10__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of semantic-link-labs might be problematic. Click here for more details.

Files changed (81) hide show
  1. {semantic_link_labs-0.8.10.dist-info → semantic_link_labs-0.9.0.dist-info}/METADATA +6 -5
  2. {semantic_link_labs-0.8.10.dist-info → semantic_link_labs-0.9.0.dist-info}/RECORD +81 -80
  3. {semantic_link_labs-0.8.10.dist-info → semantic_link_labs-0.9.0.dist-info}/WHEEL +1 -1
  4. sempy_labs/__init__.py +34 -3
  5. sempy_labs/_authentication.py +80 -4
  6. sempy_labs/_capacities.py +770 -200
  7. sempy_labs/_capacity_migration.py +7 -37
  8. sempy_labs/_clear_cache.py +37 -35
  9. sempy_labs/_connections.py +13 -13
  10. sempy_labs/_data_pipelines.py +20 -20
  11. sempy_labs/_dataflows.py +27 -28
  12. sempy_labs/_dax.py +41 -47
  13. sempy_labs/_deployment_pipelines.py +1 -1
  14. sempy_labs/_environments.py +26 -23
  15. sempy_labs/_eventhouses.py +16 -15
  16. sempy_labs/_eventstreams.py +16 -15
  17. sempy_labs/_external_data_shares.py +18 -20
  18. sempy_labs/_gateways.py +16 -14
  19. sempy_labs/_generate_semantic_model.py +107 -62
  20. sempy_labs/_git.py +105 -43
  21. sempy_labs/_helper_functions.py +251 -194
  22. sempy_labs/_job_scheduler.py +227 -0
  23. sempy_labs/_kql_databases.py +16 -15
  24. sempy_labs/_kql_querysets.py +16 -15
  25. sempy_labs/_list_functions.py +150 -126
  26. sempy_labs/_managed_private_endpoints.py +19 -17
  27. sempy_labs/_mirrored_databases.py +51 -48
  28. sempy_labs/_mirrored_warehouses.py +5 -4
  29. sempy_labs/_ml_experiments.py +16 -15
  30. sempy_labs/_ml_models.py +15 -14
  31. sempy_labs/_model_bpa.py +210 -207
  32. sempy_labs/_model_bpa_bulk.py +2 -2
  33. sempy_labs/_model_bpa_rules.py +3 -3
  34. sempy_labs/_model_dependencies.py +55 -29
  35. sempy_labs/_notebooks.py +29 -25
  36. sempy_labs/_one_lake_integration.py +23 -26
  37. sempy_labs/_query_scale_out.py +75 -64
  38. sempy_labs/_refresh_semantic_model.py +25 -26
  39. sempy_labs/_spark.py +33 -32
  40. sempy_labs/_sql.py +19 -12
  41. sempy_labs/_translations.py +10 -7
  42. sempy_labs/_vertipaq.py +38 -33
  43. sempy_labs/_warehouses.py +26 -25
  44. sempy_labs/_workspace_identity.py +11 -10
  45. sempy_labs/_workspaces.py +40 -33
  46. sempy_labs/admin/_basic_functions.py +166 -115
  47. sempy_labs/admin/_domains.py +7 -2
  48. sempy_labs/admin/_external_data_share.py +3 -3
  49. sempy_labs/admin/_git.py +4 -1
  50. sempy_labs/admin/_items.py +11 -6
  51. sempy_labs/admin/_scanner.py +10 -5
  52. sempy_labs/directlake/_directlake_schema_compare.py +25 -16
  53. sempy_labs/directlake/_directlake_schema_sync.py +24 -12
  54. sempy_labs/directlake/_dl_helper.py +74 -55
  55. sempy_labs/directlake/_generate_shared_expression.py +10 -9
  56. sempy_labs/directlake/_get_directlake_lakehouse.py +32 -36
  57. sempy_labs/directlake/_get_shared_expression.py +4 -3
  58. sempy_labs/directlake/_guardrails.py +12 -6
  59. sempy_labs/directlake/_list_directlake_model_calc_tables.py +15 -9
  60. sempy_labs/directlake/_show_unsupported_directlake_objects.py +16 -10
  61. sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py +35 -31
  62. sempy_labs/directlake/_update_directlake_partition_entity.py +39 -31
  63. sempy_labs/directlake/_warm_cache.py +87 -65
  64. sempy_labs/lakehouse/_get_lakehouse_columns.py +23 -26
  65. sempy_labs/lakehouse/_get_lakehouse_tables.py +27 -38
  66. sempy_labs/lakehouse/_lakehouse.py +7 -20
  67. sempy_labs/lakehouse/_shortcuts.py +42 -23
  68. sempy_labs/migration/_create_pqt_file.py +16 -11
  69. sempy_labs/migration/_refresh_calc_tables.py +16 -10
  70. sempy_labs/report/_download_report.py +9 -8
  71. sempy_labs/report/_generate_report.py +85 -44
  72. sempy_labs/report/_paginated.py +9 -9
  73. sempy_labs/report/_report_bpa.py +15 -11
  74. sempy_labs/report/_report_functions.py +80 -91
  75. sempy_labs/report/_report_helper.py +8 -4
  76. sempy_labs/report/_report_list_functions.py +24 -13
  77. sempy_labs/report/_report_rebind.py +17 -16
  78. sempy_labs/report/_reportwrapper.py +41 -33
  79. sempy_labs/tom/_model.py +139 -21
  80. {semantic_link_labs-0.8.10.dist-info → semantic_link_labs-0.9.0.dist-info}/LICENSE +0 -0
  81. {semantic_link_labs-0.8.10.dist-info → semantic_link_labs-0.9.0.dist-info}/top_level.txt +0 -0
@@ -3,34 +3,39 @@ import pandas as pd
3
3
  from tqdm.auto import tqdm
4
4
  import numpy as np
5
5
  import time
6
- from sempy_labs._helper_functions import format_dax_object_name
6
+ from sempy_labs._helper_functions import (
7
+ format_dax_object_name,
8
+ resolve_dataset_name_and_id,
9
+ resolve_workspace_name_and_id,
10
+ )
7
11
  from sempy_labs._refresh_semantic_model import refresh_semantic_model
8
12
  from sempy_labs._model_dependencies import get_measure_dependencies
9
13
  from typing import Optional
10
14
  from sempy._utils._log import log
11
15
  import sempy_labs._icons as icons
16
+ from uuid import UUID
12
17
 
13
18
 
14
19
  @log
15
20
  def warm_direct_lake_cache_perspective(
16
- dataset: str,
21
+ dataset: str | UUID,
17
22
  perspective: str,
18
23
  add_dependencies: bool = False,
19
- workspace: Optional[str] = None,
24
+ workspace: Optional[str | UUID] = None,
20
25
  ) -> pd.DataFrame:
21
26
  """
22
27
  Warms the cache of a Direct Lake semantic model by running a simple DAX query against the columns in a perspective.
23
28
 
24
29
  Parameters
25
30
  ----------
26
- dataset : str
27
- Name of the semantic model.
31
+ dataset : str | uuid.UUID
32
+ Name or ID of the semantic model.
28
33
  perspective : str
29
34
  Name of the perspective which contains objects to be used for warming the cache.
30
35
  add_dependencies : bool, default=False
31
36
  Includes object dependencies in the cache warming process.
32
- workspace : str, default=None
33
- The Fabric workspace name.
37
+ workspace : str | uuid.UUID, default=None
38
+ The Fabric workspace name or ID.
34
39
  Defaults to None which resolves to the workspace of the attached lakehouse
35
40
  or if no lakehouse attached, resolves to the workspace of the notebook.
36
41
 
@@ -40,15 +45,16 @@ def warm_direct_lake_cache_perspective(
40
45
  Returns a pandas dataframe showing the columns that have been put into memory.
41
46
  """
42
47
 
43
- workspace = fabric.resolve_workspace_name(workspace)
48
+ (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
49
+ (dataset_name, dataset_id) = resolve_dataset_name_and_id(dataset, workspace_id)
44
50
 
45
- dfP = fabric.list_partitions(dataset=dataset, workspace=workspace)
46
- if not any(r["Mode"] == "DirectLake" for i, r in dfP.iterrows()):
51
+ dfP = fabric.list_partitions(dataset=dataset_id, workspace=workspace_id)
52
+ if not any(r["Mode"] == "DirectLake" for _, r in dfP.iterrows()):
47
53
  raise ValueError(
48
- f"{icons.red_dot} The '{dataset}' semantic model in the '{workspace}' workspace is not in Direct Lake mode. This function is specifically for semantic models in Direct Lake mode."
54
+ f"{icons.red_dot} The '{dataset_name}' semantic model in the '{workspace_name}' workspace is not in Direct Lake mode. This function is specifically for semantic models in Direct Lake mode."
49
55
  )
50
56
 
51
- dfPersp = fabric.list_perspectives(dataset=dataset, workspace=workspace)
57
+ dfPersp = fabric.list_perspectives(dataset=dataset_id, workspace=workspace_id)
52
58
  dfPersp["DAX Object Name"] = format_dax_object_name(
53
59
  dfPersp["Table Name"], dfPersp["Object Name"]
54
60
  )
@@ -65,7 +71,7 @@ def warm_direct_lake_cache_perspective(
65
71
 
66
72
  if add_dependencies:
67
73
  # Measure dependencies
68
- md = get_measure_dependencies(dataset, workspace)
74
+ md = get_measure_dependencies(dataset_id, workspace_id)
69
75
  md["Referenced Full Object"] = format_dax_object_name(
70
76
  md["Referenced Table"], md["Referenced Object"]
71
77
  )
@@ -78,7 +84,7 @@ def warm_direct_lake_cache_perspective(
78
84
 
79
85
  # Hierarchy dependencies
80
86
  dfPersp_h = dfPersp_filt[(dfPersp_filt["Object Type"] == "Hierarchy")]
81
- dfH = fabric.list_hierarchies(dataset=dataset, workspace=workspace)
87
+ dfH = fabric.list_hierarchies(dataset=dataset_id, workspace=workspace_id)
82
88
  dfH["Hierarchy Object"] = format_dax_object_name(
83
89
  dfH["Table Name"], dfH["Hierarchy Name"]
84
90
  )
@@ -92,7 +98,7 @@ def warm_direct_lake_cache_perspective(
92
98
 
93
99
  # Relationship dependencies
94
100
  unique_table_names = dfPersp_filt["Table Name"].unique()
95
- dfR = fabric.list_relationships(dataset=dataset, workspace=workspace)
101
+ dfR = fabric.list_relationships(dataset=dataset_id, workspace=workspace_id)
96
102
  dfR["From Object"] = format_dax_object_name(
97
103
  dfR["From Table"], dfR["From Column"]
98
104
  )
@@ -120,41 +126,22 @@ def warm_direct_lake_cache_perspective(
120
126
  df["Table Name"] = df["Table Name"].str[1:-1]
121
127
  df["Column Name"] = df["Column Name"].str[0:-1]
122
128
 
123
- tbls = list(set(value.split("[")[0] for value in merged_list_unique))
124
-
125
- for tableName in (bar := tqdm(tbls)):
126
- filtered_list = [
127
- value for value in merged_list_unique if value.startswith(f"{tableName}[")
128
- ]
129
- bar.set_description(f"Warming the '{tableName}' table...")
130
- css = ",".join(map(str, filtered_list))
131
- dax = """EVALUATE TOPN(1,SUMMARIZECOLUMNS(""" + css + "))" ""
132
- fabric.evaluate_dax(dataset=dataset, dax_string=dax, workspace=workspace)
133
-
134
- print(f"{icons.green_dot} The following columns have been put into memory:")
135
-
136
- new_column_order = ["Table Name", "Column Name", "DAX Object Name"]
137
- df = df.reindex(columns=new_column_order)
138
- df = df[["Table Name", "Column Name"]].sort_values(
139
- by=["Table Name", "Column Name"], ascending=True
140
- )
141
-
142
- return df
129
+ return _put_columns_into_memory(dataset=dataset, workspace=workspace, col_df=df)
143
130
 
144
131
 
145
132
  @log
146
133
  def warm_direct_lake_cache_isresident(
147
- dataset: str, workspace: Optional[str] = None
134
+ dataset: str | UUID, workspace: Optional[str | UUID] = None
148
135
  ) -> pd.DataFrame:
149
136
  """
150
137
  Performs a refresh on the semantic model and puts the columns which were in memory prior to the refresh back into memory.
151
138
 
152
139
  Parameters
153
140
  ----------
154
- dataset : str
155
- Name of the semantic model.
156
- workspace : str, default=None
157
- The Fabric workspace name.
141
+ dataset : str | uuid.UUID
142
+ Name or ID of the semantic model.
143
+ workspace : str | uuid.UUID, default=None
144
+ The Fabric workspace name or ID.
158
145
  Defaults to None which resolves to the workspace of the attached lakehouse
159
146
  or if no lakehouse attached, resolves to the workspace of the notebook.
160
147
 
@@ -164,46 +151,81 @@ def warm_direct_lake_cache_isresident(
164
151
  Returns a pandas dataframe showing the columns that have been put into memory.
165
152
  """
166
153
 
167
- workspace = fabric.resolve_workspace_name(workspace)
154
+ (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
155
+ (dataset_name, dataset_id) = resolve_dataset_name_and_id(dataset, workspace_id)
168
156
 
169
- dfP = fabric.list_partitions(dataset=dataset, workspace=workspace)
170
- if not any(r["Mode"] == "DirectLake" for i, r in dfP.iterrows()):
157
+ dfP = fabric.list_partitions(dataset=dataset_id, workspace=workspace_id)
158
+ if not any(r["Mode"] == "DirectLake" for _, r in dfP.iterrows()):
171
159
  raise ValueError(
172
- f"{icons.red_dot} The '{dataset}' semantic model in the '{workspace}' workspace is not in Direct Lake mode. This function is specifically for semantic models in Direct Lake mode."
160
+ f"{icons.red_dot} The '{dataset_name}' semantic model in the '{workspace_name}' workspace is not in Direct Lake mode. This function is specifically for semantic models in Direct Lake mode."
173
161
  )
174
162
 
175
163
  # Identify columns which are currently in memory (Is Resident = True)
176
- dfC = fabric.list_columns(dataset=dataset, workspace=workspace, extended=True)
177
- dfC["DAX Object Name"] = format_dax_object_name(
178
- dfC["Table Name"], dfC["Column Name"]
179
- )
164
+ dfC = fabric.list_columns(dataset=dataset_id, workspace=workspace_id, extended=True)
180
165
  dfC_filtered = dfC[dfC["Is Resident"] == True]
181
166
 
182
167
  if len(dfC_filtered) == 0:
183
168
  raise ValueError(
184
- f"{icons.yellow_dot} At present, no columns are in memory in the '{dataset}' semantic model in the '{workspace}' workspace."
169
+ f"{icons.yellow_dot} At present, no columns are in memory in the '{dataset_name}' semantic model in the '{workspace_name}' workspace."
185
170
  )
186
171
 
187
172
  # Refresh/frame dataset
188
- refresh_semantic_model(dataset=dataset, refresh_type="full", workspace=workspace)
173
+ refresh_semantic_model(
174
+ dataset=dataset_id, refresh_type="full", workspace=workspace_id
175
+ )
189
176
  time.sleep(2)
190
177
 
191
- # Run basic query to get columns into memory; completed one table at a time (so as not to overload the capacity)
192
- tbls = dfC_filtered["Table Name"].unique()
178
+ return _put_columns_into_memory(
179
+ dataset=dataset, workspace=workspace, col_df=dfC_filtered
180
+ )
181
+
182
+
183
+ def _put_columns_into_memory(dataset, workspace, col_df, return_dataframe: bool = True):
184
+
185
+ row_limit = 1000000
186
+
187
+ dfT = fabric.list_tables(dataset=dataset, workspace=workspace, extended=True)
188
+ col_df = col_df.copy()
189
+
190
+ col_df["DAX Object"] = format_dax_object_name(
191
+ col_df["Table Name"], col_df["Column Name"]
192
+ )
193
+ tbls = col_df["Table Name"].unique()
194
+
193
195
  for table_name in (bar := tqdm(tbls)):
194
- bar.set_description(f"Warming the '{table_name}' table...")
195
- css = ", ".join(
196
- dfC_filtered[dfC_filtered["Table Name"] == table_name]["DAX Object Name"]
197
- .astype(str)
198
- .tolist()
196
+ dfT_filt = dfT[dfT["Name"] == table_name]
197
+ col_df_filt = col_df[col_df["Table Name"] == table_name]
198
+ if not dfT_filt.empty:
199
+ row_count = dfT_filt["Row Count"].iloc[0]
200
+ bar.set_description(f"Warming the '{table_name}' table...")
201
+ if row_count < row_limit:
202
+ columns = col_df_filt["DAX Object"].tolist()
203
+ css = ", ".join(columns)
204
+ dax = f"EVALUATE TOPN(1, SELECTCOLUMNS('{table_name}', {css}))"
205
+ fabric.evaluate_dax(
206
+ dataset=dataset, dax_string=dax, workspace=workspace
207
+ )
208
+ else:
209
+ for _, r in col_df_filt.iterrows():
210
+ dax_object = r["DAX Object"]
211
+ dax = f"""EVALUATE TOPN(1, SELECTCOLUMNS('{table_name}', {dax_object}))"""
212
+ fabric.evaluate_dax(
213
+ dataset=dataset, dax_string=dax, workspace=workspace
214
+ )
215
+
216
+ if return_dataframe:
217
+ print(
218
+ f"{icons.green_dot} The following columns have been put into memory. Temperature indicates the current column temperature."
199
219
  )
200
- dax = f"""EVALUATE TOPN(1,SUMMARIZECOLUMNS({css}))"""
201
- fabric.evaluate_dax(dataset=dataset, dax_string=dax, workspace=workspace)
202
220
 
203
- print(
204
- f"{icons.green_dot} The following columns have been put into memory. Temperature indicates the column temperature prior to the semantic model refresh."
205
- )
221
+ dfC = fabric.list_columns(dataset=dataset, workspace=workspace, extended=True)
222
+ dfC["DAX Object"] = format_dax_object_name(
223
+ dfC["Table Name"], dfC["Column Name"]
224
+ )
225
+ dfC_filt = dfC[dfC["DAX Object"].isin(col_df["DAX Object"].values)]
206
226
 
207
- return dfC_filtered[
208
- ["Table Name", "Column Name", "Is Resident", "Temperature"]
209
- ].sort_values(by=["Table Name", "Column Name"], ascending=True)
227
+ return (
228
+ dfC_filt[["Table Name", "Column Name", "Is Resident", "Temperature"]]
229
+ .sort_values(by=["Table Name", "Column Name"], ascending=True)
230
+ .reset_index(drop=True)
231
+ )
@@ -1,29 +1,29 @@
1
- import sempy.fabric as fabric
2
1
  import pandas as pd
3
2
  from pyspark.sql import SparkSession
4
3
  from sempy_labs._helper_functions import (
5
- resolve_lakehouse_name,
6
4
  format_dax_object_name,
7
- resolve_lakehouse_id,
5
+ resolve_workspace_name_and_id,
6
+ resolve_lakehouse_name_and_id,
8
7
  )
9
8
  from typing import Optional
10
9
  from sempy._utils._log import log
10
+ from uuid import UUID
11
11
 
12
12
 
13
13
  @log
14
14
  def get_lakehouse_columns(
15
- lakehouse: Optional[str] = None, workspace: Optional[str] = None
15
+ lakehouse: Optional[str | UUID] = None, workspace: Optional[str | UUID] = None
16
16
  ) -> pd.DataFrame:
17
17
  """
18
18
  Shows the tables and columns of a lakehouse and their respective properties.
19
19
 
20
20
  Parameters
21
21
  ----------
22
- lakehouse : str, default=None
23
- The Fabric lakehouse.
22
+ lakehouse : str | uuid.UUID, default=None
23
+ The Fabric lakehouse name or ID.
24
24
  Defaults to None which resolves to the lakehouse attached to the notebook.
25
- lakehouse_workspace : str, default=None
26
- The Fabric workspace used by the lakehouse.
25
+ lakehouse_workspace : str | uuid.UUID, default=None
26
+ The Fabric workspace name or ID used by the lakehouse.
27
27
  Defaults to None which resolves to the workspace of the attached lakehouse
28
28
  or if no lakehouse attached, resolves to the workspace of the notebook.
29
29
 
@@ -46,35 +46,32 @@ def get_lakehouse_columns(
46
46
  ]
47
47
  )
48
48
 
49
- workspace = fabric.resolve_workspace_name(workspace)
50
-
51
- if lakehouse is None:
52
- lakehouse_id = fabric.get_lakehouse_id()
53
- lakehouse = resolve_lakehouse_name(lakehouse_id, workspace)
54
- else:
55
- lakehouse_id = resolve_lakehouse_id(lakehouse, workspace)
49
+ (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
50
+ (lakehouse_name, lakehouse_id) = resolve_lakehouse_name_and_id(
51
+ lakehouse=lakehouse, workspace=workspace_id
52
+ )
56
53
 
57
54
  spark = SparkSession.builder.getOrCreate()
58
55
 
59
56
  tables = get_lakehouse_tables(
60
- lakehouse=lakehouse, workspace=workspace, extended=False, count_rows=False
57
+ lakehouse=lakehouse_id, workspace=workspace_id, extended=False, count_rows=False
61
58
  )
62
59
  tables_filt = tables[tables["Format"] == "delta"]
63
60
 
64
- for i, r in tables_filt.iterrows():
65
- tName = r["Table Name"]
66
- tPath = r["Location"]
67
- delta_table = DeltaTable.forPath(spark, tPath)
61
+ for _, r in tables_filt.iterrows():
62
+ table_name = r["Table Name"]
63
+ path = r["Location"]
64
+ delta_table = DeltaTable.forPath(spark, path)
68
65
  sparkdf = delta_table.toDF()
69
66
 
70
- for cName, data_type in sparkdf.dtypes:
71
- tc = format_dax_object_name(tName, cName)
67
+ for col_name, data_type in sparkdf.dtypes:
68
+ full_column_name = format_dax_object_name(table_name, col_name)
72
69
  new_data = {
73
- "Workspace Name": workspace,
70
+ "Workspace Name": workspace_name,
74
71
  "Lakehouse Name": lakehouse,
75
- "Table Name": tName,
76
- "Column Name": cName,
77
- "Full Column Name": tc,
72
+ "Table Name": table_name,
73
+ "Column Name": col_name,
74
+ "Full Column Name": full_column_name,
78
75
  "Data Type": data_type,
79
76
  }
80
77
  df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)
@@ -4,10 +4,11 @@ from pyspark.sql import SparkSession
4
4
  import pyarrow.parquet as pq
5
5
  import datetime
6
6
  from sempy_labs._helper_functions import (
7
- resolve_lakehouse_id,
8
- resolve_lakehouse_name,
7
+ _get_column_aggregate,
9
8
  resolve_workspace_name_and_id,
9
+ resolve_lakehouse_name_and_id,
10
10
  pagination,
11
+ save_as_delta_table,
11
12
  )
12
13
  from sempy_labs.directlake._guardrails import (
13
14
  get_sku_size,
@@ -18,12 +19,13 @@ from typing import Optional
18
19
  import sempy_labs._icons as icons
19
20
  from sempy._utils._log import log
20
21
  from sempy.fabric.exceptions import FabricHTTPException
22
+ from uuid import UUID
21
23
 
22
24
 
23
25
  @log
24
26
  def get_lakehouse_tables(
25
- lakehouse: Optional[str] = None,
26
- workspace: Optional[str] = None,
27
+ lakehouse: Optional[str | UUID] = None,
28
+ workspace: Optional[str | UUID] = None,
27
29
  extended: bool = False,
28
30
  count_rows: bool = False,
29
31
  export: bool = False,
@@ -35,11 +37,11 @@ def get_lakehouse_tables(
35
37
 
36
38
  Parameters
37
39
  ----------
38
- lakehouse : str, default=None
39
- The Fabric lakehouse.
40
+ lakehouse : str | uuid.UUID, default=None
41
+ The Fabric lakehouse name or ID.
40
42
  Defaults to None which resolves to the lakehouse attached to the notebook.
41
- workspace : str, default=None
42
- The Fabric workspace used by the lakehouse.
43
+ workspace : str | uuid.UUID, default=None
44
+ The Fabric workspace name or ID used by the lakehouse.
43
45
  Defaults to None which resolves to the workspace of the attached lakehouse
44
46
  or if no lakehouse attached, resolves to the workspace of the notebook.
45
47
  extended : bool, default=False
@@ -66,13 +68,10 @@ def get_lakehouse_tables(
66
68
  ]
67
69
  )
68
70
 
69
- (workspace, workspace_id) = resolve_workspace_name_and_id(workspace)
70
-
71
- if lakehouse is None:
72
- lakehouse_id = fabric.get_lakehouse_id()
73
- lakehouse = resolve_lakehouse_name(lakehouse_id, workspace)
74
- else:
75
- lakehouse_id = resolve_lakehouse_id(lakehouse, workspace)
71
+ (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
72
+ (lakehouse_name, lakehouse_id) = resolve_lakehouse_name_and_id(
73
+ lakehouse=lakehouse, workspace=workspace_id
74
+ )
76
75
 
77
76
  if count_rows: # Setting countrows defaults to extended=True
78
77
  extended = True
@@ -104,8 +103,8 @@ def get_lakehouse_tables(
104
103
  for r in responses:
105
104
  for i in r.get("data", []):
106
105
  new_data = {
107
- "Workspace Name": workspace,
108
- "Lakehouse Name": lakehouse,
106
+ "Workspace Name": workspace_name,
107
+ "Lakehouse Name": lakehouse_name,
109
108
  "Table Name": i.get("name"),
110
109
  "Format": i.get("format"),
111
110
  "Type": i.get("type"),
@@ -117,7 +116,7 @@ def get_lakehouse_tables(
117
116
  df = pd.concat(dfs, ignore_index=True)
118
117
 
119
118
  if extended:
120
- sku_value = get_sku_size(workspace)
119
+ sku_value = get_sku_size(workspace_id)
121
120
  guardrail = get_directlake_guardrails_for_sku(sku_value)
122
121
  spark = SparkSession.builder.getOrCreate()
123
122
  df["Files"] = None
@@ -178,23 +177,17 @@ def get_lakehouse_tables(
178
177
  f"{icons.red_dot} In order to save the report.json file, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook."
179
178
  )
180
179
 
181
- spark = SparkSession.builder.getOrCreate()
182
-
183
- lakehouse_id = fabric.get_lakehouse_id()
184
- lakehouse = resolve_lakehouse_name(
185
- lakehouse_id=lakehouse_id, workspace=workspace
186
- )
180
+ (current_lakehouse_name, current_lakehouse_id) = resolve_lakehouse_name_and_id()
187
181
  lakeTName = "lakehouse_table_details"
188
182
  lakeT_filt = df[df["Table Name"] == lakeTName]
189
183
 
190
- query = f"SELECT MAX(RunId) FROM {lakehouse}.{lakeTName}"
191
-
192
184
  if len(lakeT_filt) == 0:
193
- runId = 1
185
+ run_id = 1
194
186
  else:
195
- dfSpark = spark.sql(query)
196
- maxRunId = dfSpark.collect()[0][0]
197
- runId = maxRunId + 1
187
+ max_run_id = _get_column_aggregate(
188
+ lakehouse=current_lakehouse_name, table_name=lakeTName
189
+ )
190
+ run_id = max_run_id + 1
198
191
 
199
192
  export_df = df.copy()
200
193
 
@@ -239,15 +232,11 @@ def get_lakehouse_tables(
239
232
  print(
240
233
  f"{icons.in_progress} Saving Lakehouse table properties to the '{lakeTName}' table in the lakehouse...\n"
241
234
  )
242
- now = datetime.datetime.now()
243
- export_df["Timestamp"] = now
244
- export_df["RunId"] = runId
235
+ export_df["Timestamp"] = datetime.datetime.now()
236
+ export_df["RunId"] = run_id
245
237
 
246
- export_df.columns = export_df.columns.str.replace(" ", "_")
247
- spark_df = spark.createDataFrame(export_df)
248
- spark_df.write.mode("append").format("delta").saveAsTable(lakeTName)
249
- print(
250
- f"{icons.bullet} Lakehouse table properties have been saved to the '{lakeTName}' delta table."
238
+ save_as_delta_table(
239
+ dataframe=export_df, delta_table_name=lakeTName, write_mode="append"
251
240
  )
252
241
 
253
242
  return df
@@ -1,8 +1,7 @@
1
- import sempy.fabric as fabric
2
1
  from tqdm.auto import tqdm
3
- from sempy_labs._helper_functions import resolve_lakehouse_name
4
2
  from typing import List, Optional, Union
5
3
  from sempy._utils._log import log
4
+ from uuid import UUID
6
5
 
7
6
 
8
7
  def lakehouse_attached() -> bool:
@@ -29,7 +28,7 @@ def lakehouse_attached() -> bool:
29
28
  def optimize_lakehouse_tables(
30
29
  tables: Optional[Union[str, List[str]]] = None,
31
30
  lakehouse: Optional[str] = None,
32
- workspace: Optional[str] = None,
31
+ workspace: Optional[str | UUID] = None,
33
32
  ):
34
33
  """
35
34
  Runs the `OPTIMIZE <https://docs.delta.io/latest/optimizations-oss.html>`_ function over the specified lakehouse tables.
@@ -42,8 +41,8 @@ def optimize_lakehouse_tables(
42
41
  lakehouse : str, default=None
43
42
  The Fabric lakehouse.
44
43
  Defaults to None which resolves to the lakehouse attached to the notebook.
45
- workspace : str, default=None
46
- The Fabric workspace used by the lakehouse.
44
+ workspace : str | uuid.UUID, default=None
45
+ The Fabric workspace name or ID used by the lakehouse.
47
46
  Defaults to None which resolves to the workspace of the attached lakehouse
48
47
  or if no lakehouse attached, resolves to the workspace of the notebook.
49
48
  """
@@ -52,12 +51,6 @@ def optimize_lakehouse_tables(
52
51
  from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables
53
52
  from delta import DeltaTable
54
53
 
55
- workspace = fabric.resolve_workspace_name(workspace)
56
-
57
- if lakehouse is None:
58
- lakehouse_id = fabric.get_lakehouse_id()
59
- lakehouse = resolve_lakehouse_name(lakehouse_id, workspace)
60
-
61
54
  lakeTables = get_lakehouse_tables(lakehouse=lakehouse, workspace=workspace)
62
55
  lakeTablesDelta = lakeTables[lakeTables["Format"] == "delta"]
63
56
 
@@ -83,7 +76,7 @@ def optimize_lakehouse_tables(
83
76
  def vacuum_lakehouse_tables(
84
77
  tables: Optional[Union[str, List[str]]] = None,
85
78
  lakehouse: Optional[str] = None,
86
- workspace: Optional[str] = None,
79
+ workspace: Optional[str | UUID] = None,
87
80
  retain_n_hours: Optional[int] = None,
88
81
  ):
89
82
  """
@@ -96,8 +89,8 @@ def vacuum_lakehouse_tables(
96
89
  lakehouse : str, default=None
97
90
  The Fabric lakehouse.
98
91
  Defaults to None which resolves to the lakehouse attached to the notebook.
99
- workspace : str, default=None
100
- The Fabric workspace used by the lakehouse.
92
+ workspace : str | uuid.UUID, default=None
93
+ The Fabric workspace name or ID used by the lakehouse.
101
94
  Defaults to None which resolves to the workspace of the attached lakehouse
102
95
  or if no lakehouse attached, resolves to the workspace of the notebook.
103
96
  retain_n_hours : int, default=None
@@ -111,12 +104,6 @@ def vacuum_lakehouse_tables(
111
104
  from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables
112
105
  from delta import DeltaTable
113
106
 
114
- workspace = fabric.resolve_workspace_name(workspace)
115
-
116
- if lakehouse is None:
117
- lakehouse_id = fabric.get_lakehouse_id()
118
- lakehouse = resolve_lakehouse_name(lakehouse_id, workspace)
119
-
120
107
  lakeTables = get_lakehouse_tables(lakehouse=lakehouse, workspace=workspace)
121
108
  lakeTablesDelta = lakeTables[lakeTables["Format"] == "delta"]
122
109