semantic-link-labs 0.8.10__py3-none-any.whl → 0.8.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of semantic-link-labs might be problematic. Click here for more details.

Files changed (73) hide show
  1. {semantic_link_labs-0.8.10.dist-info → semantic_link_labs-0.8.11.dist-info}/METADATA +3 -2
  2. {semantic_link_labs-0.8.10.dist-info → semantic_link_labs-0.8.11.dist-info}/RECORD +73 -72
  3. sempy_labs/__init__.py +6 -2
  4. sempy_labs/_clear_cache.py +39 -37
  5. sempy_labs/_connections.py +13 -13
  6. sempy_labs/_data_pipelines.py +20 -20
  7. sempy_labs/_dataflows.py +27 -28
  8. sempy_labs/_dax.py +41 -47
  9. sempy_labs/_environments.py +26 -23
  10. sempy_labs/_eventhouses.py +16 -15
  11. sempy_labs/_eventstreams.py +16 -15
  12. sempy_labs/_external_data_shares.py +18 -20
  13. sempy_labs/_gateways.py +14 -14
  14. sempy_labs/_generate_semantic_model.py +99 -62
  15. sempy_labs/_git.py +105 -43
  16. sempy_labs/_helper_functions.py +148 -131
  17. sempy_labs/_job_scheduler.py +92 -0
  18. sempy_labs/_kql_databases.py +16 -15
  19. sempy_labs/_kql_querysets.py +16 -15
  20. sempy_labs/_list_functions.py +114 -99
  21. sempy_labs/_managed_private_endpoints.py +19 -17
  22. sempy_labs/_mirrored_databases.py +51 -48
  23. sempy_labs/_mirrored_warehouses.py +5 -4
  24. sempy_labs/_ml_experiments.py +16 -15
  25. sempy_labs/_ml_models.py +15 -14
  26. sempy_labs/_model_bpa.py +3 -3
  27. sempy_labs/_model_dependencies.py +55 -29
  28. sempy_labs/_notebooks.py +27 -25
  29. sempy_labs/_one_lake_integration.py +23 -26
  30. sempy_labs/_query_scale_out.py +67 -64
  31. sempy_labs/_refresh_semantic_model.py +25 -26
  32. sempy_labs/_spark.py +33 -32
  33. sempy_labs/_sql.py +12 -9
  34. sempy_labs/_translations.py +10 -7
  35. sempy_labs/_vertipaq.py +34 -31
  36. sempy_labs/_warehouses.py +22 -21
  37. sempy_labs/_workspace_identity.py +11 -10
  38. sempy_labs/_workspaces.py +40 -33
  39. sempy_labs/admin/_basic_functions.py +10 -12
  40. sempy_labs/admin/_external_data_share.py +3 -3
  41. sempy_labs/admin/_items.py +4 -4
  42. sempy_labs/admin/_scanner.py +3 -1
  43. sempy_labs/directlake/_directlake_schema_compare.py +18 -14
  44. sempy_labs/directlake/_directlake_schema_sync.py +18 -12
  45. sempy_labs/directlake/_dl_helper.py +25 -26
  46. sempy_labs/directlake/_generate_shared_expression.py +10 -9
  47. sempy_labs/directlake/_get_directlake_lakehouse.py +16 -13
  48. sempy_labs/directlake/_get_shared_expression.py +4 -3
  49. sempy_labs/directlake/_guardrails.py +12 -6
  50. sempy_labs/directlake/_list_directlake_model_calc_tables.py +15 -9
  51. sempy_labs/directlake/_show_unsupported_directlake_objects.py +16 -10
  52. sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py +35 -31
  53. sempy_labs/directlake/_update_directlake_partition_entity.py +34 -31
  54. sempy_labs/directlake/_warm_cache.py +87 -65
  55. sempy_labs/lakehouse/_get_lakehouse_columns.py +10 -8
  56. sempy_labs/lakehouse/_get_lakehouse_tables.py +10 -9
  57. sempy_labs/lakehouse/_lakehouse.py +17 -13
  58. sempy_labs/lakehouse/_shortcuts.py +42 -23
  59. sempy_labs/migration/_create_pqt_file.py +16 -11
  60. sempy_labs/migration/_refresh_calc_tables.py +16 -10
  61. sempy_labs/report/_download_report.py +9 -8
  62. sempy_labs/report/_generate_report.py +40 -44
  63. sempy_labs/report/_paginated.py +9 -9
  64. sempy_labs/report/_report_bpa.py +13 -9
  65. sempy_labs/report/_report_functions.py +80 -91
  66. sempy_labs/report/_report_helper.py +8 -4
  67. sempy_labs/report/_report_list_functions.py +24 -13
  68. sempy_labs/report/_report_rebind.py +17 -16
  69. sempy_labs/report/_reportwrapper.py +41 -33
  70. sempy_labs/tom/_model.py +43 -6
  71. {semantic_link_labs-0.8.10.dist-info → semantic_link_labs-0.8.11.dist-info}/LICENSE +0 -0
  72. {semantic_link_labs-0.8.10.dist-info → semantic_link_labs-0.8.11.dist-info}/WHEEL +0 -0
  73. {semantic_link_labs-0.8.10.dist-info → semantic_link_labs-0.8.11.dist-info}/top_level.txt +0 -0
@@ -3,36 +3,41 @@ import sempy.fabric as fabric
3
3
  from sempy_labs.tom import connect_semantic_model
4
4
  from sempy_labs._refresh_semantic_model import refresh_semantic_model
5
5
  from sempy_labs.directlake._dl_helper import get_direct_lake_source
6
- from sempy_labs._helper_functions import _convert_data_type
6
+ from sempy_labs._helper_functions import (
7
+ _convert_data_type,
8
+ resolve_dataset_name_and_id,
9
+ resolve_workspace_name_and_id,
10
+ )
7
11
  from typing import List, Optional, Union
8
12
  import sempy_labs._icons as icons
13
+ from uuid import UUID
9
14
 
10
15
 
11
16
  def update_direct_lake_partition_entity(
12
- dataset: str,
17
+ dataset: str | UUID,
13
18
  table_name: Union[str, List[str]],
14
19
  entity_name: Union[str, List[str]],
15
- workspace: Optional[str] = None,
20
+ workspace: Optional[str | UUID] = None,
16
21
  ):
17
22
  """
18
23
  Remaps a table (or tables) in a Direct Lake semantic model to a table in a lakehouse.
19
24
 
20
25
  Parameters
21
26
  ----------
22
- dataset : str
23
- Name of the semantic model.
27
+ dataset : str | uuid.UUID
28
+ Name or ID of the semantic model.
24
29
  table_name : str, List[str]
25
30
  Name of the table(s) in the semantic model.
26
31
  entity_name : str, List[str]
27
32
  Name of the lakehouse table to be mapped to the semantic model table.
28
- workspace : str, default=None
29
- The Fabric workspace name in which the semantic model exists.
33
+ workspace : str | uuid.UUID, default=None
34
+ The Fabric workspace name or ID in which the semantic model exists.
30
35
  Defaults to None which resolves to the workspace of the attached lakehouse
31
36
  or if no lakehouse attached, resolves to the workspace of the notebook.
32
37
  """
33
38
 
34
- if workspace is None:
35
- workspace = fabric.resolve_workspace_name(workspace)
39
+ (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
40
+ (dataset_name, dataset_id) = resolve_dataset_name_and_id(dataset, workspace_id)
36
41
 
37
42
  # Support both str & list types
38
43
  if isinstance(table_name, str):
@@ -48,12 +53,12 @@ def update_direct_lake_partition_entity(
48
53
  icons.sll_tags.append("UpdateDLPartition")
49
54
 
50
55
  with connect_semantic_model(
51
- dataset=dataset, readonly=False, workspace=workspace
56
+ dataset=dataset_id, readonly=False, workspace=workspace_id
52
57
  ) as tom:
53
58
 
54
59
  if not tom.is_direct_lake():
55
60
  raise ValueError(
56
- f"{icons.red_dot} The '{dataset}' semantic model within the '{workspace}' workspace is not in Direct Lake mode."
61
+ f"{icons.red_dot} The '{dataset_name}' semantic model within the '{workspace_name}' workspace is not in Direct Lake mode."
57
62
  )
58
63
 
59
64
  for tName in table_name:
@@ -68,42 +73,39 @@ def update_direct_lake_partition_entity(
68
73
 
69
74
  if part_name is None:
70
75
  raise ValueError(
71
- f"{icons.red_dot} The '{tName}' table in the '{dataset}' semantic model has not been updated."
76
+ f"{icons.red_dot} The '{tName}' table in the '{dataset_name}' semantic model has not been updated."
72
77
  )
73
78
 
74
79
  tom.model.Tables[tName].Partitions[part_name].Source.EntityName = eName
75
80
  print(
76
- f"{icons.green_dot} The '{tName}' table in the '{dataset}' semantic model has been updated to point to the '{eName}' table."
81
+ f"{icons.green_dot} The '{tName}' table in the '{dataset_name}' semantic model within the '{workspace_name}' workspace has been updated to point to the '{eName}' table."
77
82
  )
78
83
 
79
84
 
80
85
  def add_table_to_direct_lake_semantic_model(
81
- dataset: str,
86
+ dataset: str | UUID,
82
87
  table_name: str,
83
88
  lakehouse_table_name: str,
84
89
  refresh: bool = True,
85
- workspace: Optional[str] = None,
90
+ workspace: Optional[str | UUID] = None,
86
91
  ):
87
92
  """
88
93
  Adds a table and all of its columns to a Direct Lake semantic model, based on a Fabric lakehouse table.
89
94
 
90
95
  Parameters
91
96
  ----------
92
- dataset : str
93
- Name of the semantic model.
97
+ dataset : str | uuid.UUID
98
+ Name or ID of the semantic model.
94
99
  table_name : str, List[str]
95
100
  Name of the table in the semantic model.
96
101
  lakehouse_table_name : str
97
102
  The name of the Fabric lakehouse table.
98
103
  refresh : bool, default=True
99
104
  Refreshes the table after it is added to the semantic model.
100
- workspace : str, default=None
101
- The name of the Fabric workspace in which the semantic model resides.
105
+ workspace : str | uuid.UUID, default=None
106
+ The name or ID of the Fabric workspace in which the semantic model resides.
102
107
  Defaults to None which resolves to the workspace of the attached lakehouse
103
108
  or if no lakehouse attached, resolves to the workspace of the notebook.
104
-
105
- Returns
106
- -------
107
109
  """
108
110
 
109
111
  sempy.fabric._client._utils._init_analysis_services()
@@ -111,10 +113,11 @@ def add_table_to_direct_lake_semantic_model(
111
113
  from sempy_labs.lakehouse._get_lakehouse_columns import get_lakehouse_columns
112
114
  from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables
113
115
 
114
- workspace = fabric.resolve_workspace_name(workspace)
116
+ (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
117
+ (dataset_name, dataset_id) = resolve_dataset_name_and_id(dataset, workspace_id)
115
118
 
116
119
  artifact_type, lakehouse_name, lakehouse_id, lakehouse_workspace_id = (
117
- get_direct_lake_source(dataset=dataset, workspace=workspace)
120
+ get_direct_lake_source(dataset=dataset_id, workspace=workspace_id)
118
121
  )
119
122
 
120
123
  if artifact_type == "Warehouse":
@@ -125,7 +128,7 @@ def add_table_to_direct_lake_semantic_model(
125
128
  lakehouse_workspace = fabric.resolve_workspace_name(lakehouse_workspace_id)
126
129
 
127
130
  with connect_semantic_model(
128
- dataset=dataset, readonly=False, workspace=workspace
131
+ dataset=dataset_id, readonly=False, workspace=workspace_id
129
132
  ) as tom:
130
133
 
131
134
  table_count = tom.model.Tables.Count
@@ -148,12 +151,12 @@ def add_table_to_direct_lake_semantic_model(
148
151
  == TOM.PartitionSourceType.Entity
149
152
  )
150
153
  raise ValueError(
151
- f"The '{lakehouse_table_name}' table already exists in the '{dataset}' semantic model within the '{workspace}' workspace as the '{t_name}' table."
154
+ f"The '{lakehouse_table_name}' table already exists in the '{dataset_name}' semantic model within the '{workspace_name}' workspace as the '{t_name}' table."
152
155
  )
153
156
 
154
157
  if any(t.Name == table_name for t in tom.model.Tables):
155
158
  raise ValueError(
156
- f"The '{table_name}' table already exists in the '{dataset}' semantic model within the '{workspace}' workspace."
159
+ f"The '{table_name}' table already exists in the '{dataset_name}' semantic model within the '{workspace_name}' workspace."
157
160
  )
158
161
 
159
162
  dfL = get_lakehouse_tables(
@@ -173,13 +176,13 @@ def add_table_to_direct_lake_semantic_model(
173
176
 
174
177
  tom.add_table(name=table_name)
175
178
  print(
176
- f"{icons.green_dot} The '{table_name}' table has been added to the '{dataset}' semantic model within the '{workspace}' workspace."
179
+ f"{icons.green_dot} The '{table_name}' table has been added to the '{dataset_name}' semantic model within the '{workspace_name}' workspace."
177
180
  )
178
181
  tom.add_entity_partition(
179
182
  table_name=table_name, entity_name=lakehouse_table_name
180
183
  )
181
184
  print(
182
- f"{icons.green_dot} The '{lakehouse_table_name}' partition has been added to the '{table_name}' table in the '{dataset}' semantic model within the '{workspace}' workspace."
185
+ f"{icons.green_dot} The '{lakehouse_table_name}' partition has been added to the '{table_name}' table in the '{dataset_name}' semantic model within the '{workspace_name}' workspace."
183
186
  )
184
187
 
185
188
  for i, r in dfLC_filt.iterrows():
@@ -193,10 +196,10 @@ def add_table_to_direct_lake_semantic_model(
193
196
  data_type=dt,
194
197
  )
195
198
  print(
196
- f"{icons.green_dot} The '{lakeCName}' column has been added to the '{table_name}' table as a '{dt}' data type in the '{dataset}' semantic model within the '{workspace}' workspace."
199
+ f"{icons.green_dot} The '{lakeCName}' column has been added to the '{table_name}' table as a '{dt}' data type in the '{dataset_name}' semantic model within the '{workspace_name}' workspace."
197
200
  )
198
201
 
199
202
  if refresh:
200
203
  refresh_semantic_model(
201
- dataset=dataset, tables=table_name, workspace=workspace
204
+ dataset=dataset_id, tables=table_name, workspace=workspace_id
202
205
  )
@@ -3,34 +3,39 @@ import pandas as pd
3
3
  from tqdm.auto import tqdm
4
4
  import numpy as np
5
5
  import time
6
- from sempy_labs._helper_functions import format_dax_object_name
6
+ from sempy_labs._helper_functions import (
7
+ format_dax_object_name,
8
+ resolve_dataset_name_and_id,
9
+ resolve_workspace_name_and_id,
10
+ )
7
11
  from sempy_labs._refresh_semantic_model import refresh_semantic_model
8
12
  from sempy_labs._model_dependencies import get_measure_dependencies
9
13
  from typing import Optional
10
14
  from sempy._utils._log import log
11
15
  import sempy_labs._icons as icons
16
+ from uuid import UUID
12
17
 
13
18
 
14
19
  @log
15
20
  def warm_direct_lake_cache_perspective(
16
- dataset: str,
21
+ dataset: str | UUID,
17
22
  perspective: str,
18
23
  add_dependencies: bool = False,
19
- workspace: Optional[str] = None,
24
+ workspace: Optional[str | UUID] = None,
20
25
  ) -> pd.DataFrame:
21
26
  """
22
27
  Warms the cache of a Direct Lake semantic model by running a simple DAX query against the columns in a perspective.
23
28
 
24
29
  Parameters
25
30
  ----------
26
- dataset : str
27
- Name of the semantic model.
31
+ dataset : str | uuid.UUID
32
+ Name or ID of the semantic model.
28
33
  perspective : str
29
34
  Name of the perspective which contains objects to be used for warming the cache.
30
35
  add_dependencies : bool, default=False
31
36
  Includes object dependencies in the cache warming process.
32
- workspace : str, default=None
33
- The Fabric workspace name.
37
+ workspace : str | uuid.UUID, default=None
38
+ The Fabric workspace name or ID.
34
39
  Defaults to None which resolves to the workspace of the attached lakehouse
35
40
  or if no lakehouse attached, resolves to the workspace of the notebook.
36
41
 
@@ -40,15 +45,16 @@ def warm_direct_lake_cache_perspective(
40
45
  Returns a pandas dataframe showing the columns that have been put into memory.
41
46
  """
42
47
 
43
- workspace = fabric.resolve_workspace_name(workspace)
48
+ (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
49
+ (dataset_name, dataset_id) = resolve_dataset_name_and_id(dataset, workspace_id)
44
50
 
45
- dfP = fabric.list_partitions(dataset=dataset, workspace=workspace)
46
- if not any(r["Mode"] == "DirectLake" for i, r in dfP.iterrows()):
51
+ dfP = fabric.list_partitions(dataset=dataset_id, workspace=workspace_id)
52
+ if not any(r["Mode"] == "DirectLake" for _, r in dfP.iterrows()):
47
53
  raise ValueError(
48
- f"{icons.red_dot} The '{dataset}' semantic model in the '{workspace}' workspace is not in Direct Lake mode. This function is specifically for semantic models in Direct Lake mode."
54
+ f"{icons.red_dot} The '{dataset_name}' semantic model in the '{workspace_name}' workspace is not in Direct Lake mode. This function is specifically for semantic models in Direct Lake mode."
49
55
  )
50
56
 
51
- dfPersp = fabric.list_perspectives(dataset=dataset, workspace=workspace)
57
+ dfPersp = fabric.list_perspectives(dataset=dataset_id, workspace=workspace_id)
52
58
  dfPersp["DAX Object Name"] = format_dax_object_name(
53
59
  dfPersp["Table Name"], dfPersp["Object Name"]
54
60
  )
@@ -65,7 +71,7 @@ def warm_direct_lake_cache_perspective(
65
71
 
66
72
  if add_dependencies:
67
73
  # Measure dependencies
68
- md = get_measure_dependencies(dataset, workspace)
74
+ md = get_measure_dependencies(dataset_id, workspace_id)
69
75
  md["Referenced Full Object"] = format_dax_object_name(
70
76
  md["Referenced Table"], md["Referenced Object"]
71
77
  )
@@ -78,7 +84,7 @@ def warm_direct_lake_cache_perspective(
78
84
 
79
85
  # Hierarchy dependencies
80
86
  dfPersp_h = dfPersp_filt[(dfPersp_filt["Object Type"] == "Hierarchy")]
81
- dfH = fabric.list_hierarchies(dataset=dataset, workspace=workspace)
87
+ dfH = fabric.list_hierarchies(dataset=dataset_id, workspace=workspace_id)
82
88
  dfH["Hierarchy Object"] = format_dax_object_name(
83
89
  dfH["Table Name"], dfH["Hierarchy Name"]
84
90
  )
@@ -92,7 +98,7 @@ def warm_direct_lake_cache_perspective(
92
98
 
93
99
  # Relationship dependencies
94
100
  unique_table_names = dfPersp_filt["Table Name"].unique()
95
- dfR = fabric.list_relationships(dataset=dataset, workspace=workspace)
101
+ dfR = fabric.list_relationships(dataset=dataset_id, workspace=workspace_id)
96
102
  dfR["From Object"] = format_dax_object_name(
97
103
  dfR["From Table"], dfR["From Column"]
98
104
  )
@@ -120,41 +126,22 @@ def warm_direct_lake_cache_perspective(
120
126
  df["Table Name"] = df["Table Name"].str[1:-1]
121
127
  df["Column Name"] = df["Column Name"].str[0:-1]
122
128
 
123
- tbls = list(set(value.split("[")[0] for value in merged_list_unique))
124
-
125
- for tableName in (bar := tqdm(tbls)):
126
- filtered_list = [
127
- value for value in merged_list_unique if value.startswith(f"{tableName}[")
128
- ]
129
- bar.set_description(f"Warming the '{tableName}' table...")
130
- css = ",".join(map(str, filtered_list))
131
- dax = """EVALUATE TOPN(1,SUMMARIZECOLUMNS(""" + css + "))" ""
132
- fabric.evaluate_dax(dataset=dataset, dax_string=dax, workspace=workspace)
133
-
134
- print(f"{icons.green_dot} The following columns have been put into memory:")
135
-
136
- new_column_order = ["Table Name", "Column Name", "DAX Object Name"]
137
- df = df.reindex(columns=new_column_order)
138
- df = df[["Table Name", "Column Name"]].sort_values(
139
- by=["Table Name", "Column Name"], ascending=True
140
- )
141
-
142
- return df
129
+ return _put_columns_into_memory(dataset=dataset, workspace=workspace, col_df=df)
143
130
 
144
131
 
145
132
  @log
146
133
  def warm_direct_lake_cache_isresident(
147
- dataset: str, workspace: Optional[str] = None
134
+ dataset: str | UUID, workspace: Optional[str | UUID] = None
148
135
  ) -> pd.DataFrame:
149
136
  """
150
137
  Performs a refresh on the semantic model and puts the columns which were in memory prior to the refresh back into memory.
151
138
 
152
139
  Parameters
153
140
  ----------
154
- dataset : str
155
- Name of the semantic model.
156
- workspace : str, default=None
157
- The Fabric workspace name.
141
+ dataset : str | uuid.UUID
142
+ Name or ID of the semantic model.
143
+ workspace : str | uuid.UUID, default=None
144
+ The Fabric workspace name or ID.
158
145
  Defaults to None which resolves to the workspace of the attached lakehouse
159
146
  or if no lakehouse attached, resolves to the workspace of the notebook.
160
147
 
@@ -164,46 +151,81 @@ def warm_direct_lake_cache_isresident(
164
151
  Returns a pandas dataframe showing the columns that have been put into memory.
165
152
  """
166
153
 
167
- workspace = fabric.resolve_workspace_name(workspace)
154
+ (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
155
+ (dataset_name, dataset_id) = resolve_dataset_name_and_id(dataset, workspace_id)
168
156
 
169
- dfP = fabric.list_partitions(dataset=dataset, workspace=workspace)
170
- if not any(r["Mode"] == "DirectLake" for i, r in dfP.iterrows()):
157
+ dfP = fabric.list_partitions(dataset=dataset_id, workspace=workspace_id)
158
+ if not any(r["Mode"] == "DirectLake" for _, r in dfP.iterrows()):
171
159
  raise ValueError(
172
- f"{icons.red_dot} The '{dataset}' semantic model in the '{workspace}' workspace is not in Direct Lake mode. This function is specifically for semantic models in Direct Lake mode."
160
+ f"{icons.red_dot} The '{dataset_name}' semantic model in the '{workspace_name}' workspace is not in Direct Lake mode. This function is specifically for semantic models in Direct Lake mode."
173
161
  )
174
162
 
175
163
  # Identify columns which are currently in memory (Is Resident = True)
176
- dfC = fabric.list_columns(dataset=dataset, workspace=workspace, extended=True)
177
- dfC["DAX Object Name"] = format_dax_object_name(
178
- dfC["Table Name"], dfC["Column Name"]
179
- )
164
+ dfC = fabric.list_columns(dataset=dataset_id, workspace=workspace_id, extended=True)
180
165
  dfC_filtered = dfC[dfC["Is Resident"] == True]
181
166
 
182
167
  if len(dfC_filtered) == 0:
183
168
  raise ValueError(
184
- f"{icons.yellow_dot} At present, no columns are in memory in the '{dataset}' semantic model in the '{workspace}' workspace."
169
+ f"{icons.yellow_dot} At present, no columns are in memory in the '{dataset_name}' semantic model in the '{workspace_name}' workspace."
185
170
  )
186
171
 
187
172
  # Refresh/frame dataset
188
- refresh_semantic_model(dataset=dataset, refresh_type="full", workspace=workspace)
173
+ refresh_semantic_model(
174
+ dataset=dataset_id, refresh_type="full", workspace=workspace_id
175
+ )
189
176
  time.sleep(2)
190
177
 
191
- # Run basic query to get columns into memory; completed one table at a time (so as not to overload the capacity)
192
- tbls = dfC_filtered["Table Name"].unique()
178
+ return _put_columns_into_memory(
179
+ dataset=dataset, workspace=workspace, col_df=dfC_filtered
180
+ )
181
+
182
+
183
+ def _put_columns_into_memory(dataset, workspace, col_df, return_dataframe: bool = True):
184
+
185
+ row_limit = 1000000
186
+
187
+ dfT = fabric.list_tables(dataset=dataset, workspace=workspace, extended=True)
188
+ col_df = col_df.copy()
189
+
190
+ col_df["DAX Object"] = format_dax_object_name(
191
+ col_df["Table Name"], col_df["Column Name"]
192
+ )
193
+ tbls = col_df["Table Name"].unique()
194
+
193
195
  for table_name in (bar := tqdm(tbls)):
194
- bar.set_description(f"Warming the '{table_name}' table...")
195
- css = ", ".join(
196
- dfC_filtered[dfC_filtered["Table Name"] == table_name]["DAX Object Name"]
197
- .astype(str)
198
- .tolist()
196
+ dfT_filt = dfT[dfT["Name"] == table_name]
197
+ col_df_filt = col_df[col_df["Table Name"] == table_name]
198
+ if not dfT_filt.empty:
199
+ row_count = dfT_filt["Row Count"].iloc[0]
200
+ bar.set_description(f"Warming the '{table_name}' table...")
201
+ if row_count < row_limit:
202
+ columns = col_df_filt["DAX Object"].tolist()
203
+ css = ", ".join(columns)
204
+ dax = f"EVALUATE TOPN(1, SELECTCOLUMNS('{table_name}', {css}))"
205
+ fabric.evaluate_dax(
206
+ dataset=dataset, dax_string=dax, workspace=workspace
207
+ )
208
+ else:
209
+ for _, r in col_df_filt.iterrows():
210
+ dax_object = r["DAX Object"]
211
+ dax = f"""EVALUATE TOPN(1, SELECTCOLUMNS('{table_name}', {dax_object}))"""
212
+ fabric.evaluate_dax(
213
+ dataset=dataset, dax_string=dax, workspace=workspace
214
+ )
215
+
216
+ if return_dataframe:
217
+ print(
218
+ f"{icons.green_dot} The following columns have been put into memory. Temperature indicates the current column temperature."
199
219
  )
200
- dax = f"""EVALUATE TOPN(1,SUMMARIZECOLUMNS({css}))"""
201
- fabric.evaluate_dax(dataset=dataset, dax_string=dax, workspace=workspace)
202
220
 
203
- print(
204
- f"{icons.green_dot} The following columns have been put into memory. Temperature indicates the column temperature prior to the semantic model refresh."
205
- )
221
+ dfC = fabric.list_columns(dataset=dataset, workspace=workspace, extended=True)
222
+ dfC["DAX Object"] = format_dax_object_name(
223
+ dfC["Table Name"], dfC["Column Name"]
224
+ )
225
+ dfC_filt = dfC[dfC["DAX Object"].isin(col_df["DAX Object"].values)]
206
226
 
207
- return dfC_filtered[
208
- ["Table Name", "Column Name", "Is Resident", "Temperature"]
209
- ].sort_values(by=["Table Name", "Column Name"], ascending=True)
227
+ return (
228
+ dfC_filt[["Table Name", "Column Name", "Is Resident", "Temperature"]]
229
+ .sort_values(by=["Table Name", "Column Name"], ascending=True)
230
+ .reset_index(drop=True)
231
+ )
@@ -5,14 +5,16 @@ from sempy_labs._helper_functions import (
5
5
  resolve_lakehouse_name,
6
6
  format_dax_object_name,
7
7
  resolve_lakehouse_id,
8
+ resolve_workspace_name_and_id,
8
9
  )
9
10
  from typing import Optional
10
11
  from sempy._utils._log import log
12
+ from uuid import UUID
11
13
 
12
14
 
13
15
  @log
14
16
  def get_lakehouse_columns(
15
- lakehouse: Optional[str] = None, workspace: Optional[str] = None
17
+ lakehouse: Optional[str] = None, workspace: Optional[str | UUID] = None
16
18
  ) -> pd.DataFrame:
17
19
  """
18
20
  Shows the tables and columns of a lakehouse and their respective properties.
@@ -22,8 +24,8 @@ def get_lakehouse_columns(
22
24
  lakehouse : str, default=None
23
25
  The Fabric lakehouse.
24
26
  Defaults to None which resolves to the lakehouse attached to the notebook.
25
- lakehouse_workspace : str, default=None
26
- The Fabric workspace used by the lakehouse.
27
+ lakehouse_workspace : str | uuid.UUID, default=None
28
+ The Fabric workspace name or ID used by the lakehouse.
27
29
  Defaults to None which resolves to the workspace of the attached lakehouse
28
30
  or if no lakehouse attached, resolves to the workspace of the notebook.
29
31
 
@@ -46,18 +48,18 @@ def get_lakehouse_columns(
46
48
  ]
47
49
  )
48
50
 
49
- workspace = fabric.resolve_workspace_name(workspace)
51
+ (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
50
52
 
51
53
  if lakehouse is None:
52
54
  lakehouse_id = fabric.get_lakehouse_id()
53
- lakehouse = resolve_lakehouse_name(lakehouse_id, workspace)
55
+ lakehouse = resolve_lakehouse_name(lakehouse_id, workspace_id)
54
56
  else:
55
- lakehouse_id = resolve_lakehouse_id(lakehouse, workspace)
57
+ lakehouse_id = resolve_lakehouse_id(lakehouse, workspace_id)
56
58
 
57
59
  spark = SparkSession.builder.getOrCreate()
58
60
 
59
61
  tables = get_lakehouse_tables(
60
- lakehouse=lakehouse, workspace=workspace, extended=False, count_rows=False
62
+ lakehouse=lakehouse, workspace=workspace_id, extended=False, count_rows=False
61
63
  )
62
64
  tables_filt = tables[tables["Format"] == "delta"]
63
65
 
@@ -70,7 +72,7 @@ def get_lakehouse_columns(
70
72
  for cName, data_type in sparkdf.dtypes:
71
73
  tc = format_dax_object_name(tName, cName)
72
74
  new_data = {
73
- "Workspace Name": workspace,
75
+ "Workspace Name": workspace_name,
74
76
  "Lakehouse Name": lakehouse,
75
77
  "Table Name": tName,
76
78
  "Column Name": cName,
@@ -18,12 +18,13 @@ from typing import Optional
18
18
  import sempy_labs._icons as icons
19
19
  from sempy._utils._log import log
20
20
  from sempy.fabric.exceptions import FabricHTTPException
21
+ from uuid import UUID
21
22
 
22
23
 
23
24
  @log
24
25
  def get_lakehouse_tables(
25
26
  lakehouse: Optional[str] = None,
26
- workspace: Optional[str] = None,
27
+ workspace: Optional[str | UUID] = None,
27
28
  extended: bool = False,
28
29
  count_rows: bool = False,
29
30
  export: bool = False,
@@ -38,8 +39,8 @@ def get_lakehouse_tables(
38
39
  lakehouse : str, default=None
39
40
  The Fabric lakehouse.
40
41
  Defaults to None which resolves to the lakehouse attached to the notebook.
41
- workspace : str, default=None
42
- The Fabric workspace used by the lakehouse.
42
+ workspace : str | uuid.UUID, default=None
43
+ The Fabric workspace name or ID used by the lakehouse.
43
44
  Defaults to None which resolves to the workspace of the attached lakehouse
44
45
  or if no lakehouse attached, resolves to the workspace of the notebook.
45
46
  extended : bool, default=False
@@ -66,13 +67,13 @@ def get_lakehouse_tables(
66
67
  ]
67
68
  )
68
69
 
69
- (workspace, workspace_id) = resolve_workspace_name_and_id(workspace)
70
+ (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
70
71
 
71
72
  if lakehouse is None:
72
73
  lakehouse_id = fabric.get_lakehouse_id()
73
- lakehouse = resolve_lakehouse_name(lakehouse_id, workspace)
74
+ lakehouse = resolve_lakehouse_name(lakehouse_id, workspace_id)
74
75
  else:
75
- lakehouse_id = resolve_lakehouse_id(lakehouse, workspace)
76
+ lakehouse_id = resolve_lakehouse_id(lakehouse, workspace_id)
76
77
 
77
78
  if count_rows: # Setting countrows defaults to extended=True
78
79
  extended = True
@@ -104,7 +105,7 @@ def get_lakehouse_tables(
104
105
  for r in responses:
105
106
  for i in r.get("data", []):
106
107
  new_data = {
107
- "Workspace Name": workspace,
108
+ "Workspace Name": workspace_name,
108
109
  "Lakehouse Name": lakehouse,
109
110
  "Table Name": i.get("name"),
110
111
  "Format": i.get("format"),
@@ -117,7 +118,7 @@ def get_lakehouse_tables(
117
118
  df = pd.concat(dfs, ignore_index=True)
118
119
 
119
120
  if extended:
120
- sku_value = get_sku_size(workspace)
121
+ sku_value = get_sku_size(workspace_id)
121
122
  guardrail = get_directlake_guardrails_for_sku(sku_value)
122
123
  spark = SparkSession.builder.getOrCreate()
123
124
  df["Files"] = None
@@ -182,7 +183,7 @@ def get_lakehouse_tables(
182
183
 
183
184
  lakehouse_id = fabric.get_lakehouse_id()
184
185
  lakehouse = resolve_lakehouse_name(
185
- lakehouse_id=lakehouse_id, workspace=workspace
186
+ lakehouse_id=lakehouse_id, workspace=workspace_id
186
187
  )
187
188
  lakeTName = "lakehouse_table_details"
188
189
  lakeT_filt = df[df["Table Name"] == lakeTName]
@@ -1,8 +1,12 @@
1
1
  import sempy.fabric as fabric
2
2
  from tqdm.auto import tqdm
3
- from sempy_labs._helper_functions import resolve_lakehouse_name
3
+ from sempy_labs._helper_functions import (
4
+ resolve_lakehouse_name,
5
+ resolve_workspace_name_and_id,
6
+ )
4
7
  from typing import List, Optional, Union
5
8
  from sempy._utils._log import log
9
+ from uuid import UUID
6
10
 
7
11
 
8
12
  def lakehouse_attached() -> bool:
@@ -29,7 +33,7 @@ def lakehouse_attached() -> bool:
29
33
  def optimize_lakehouse_tables(
30
34
  tables: Optional[Union[str, List[str]]] = None,
31
35
  lakehouse: Optional[str] = None,
32
- workspace: Optional[str] = None,
36
+ workspace: Optional[str | UUID] = None,
33
37
  ):
34
38
  """
35
39
  Runs the `OPTIMIZE <https://docs.delta.io/latest/optimizations-oss.html>`_ function over the specified lakehouse tables.
@@ -42,8 +46,8 @@ def optimize_lakehouse_tables(
42
46
  lakehouse : str, default=None
43
47
  The Fabric lakehouse.
44
48
  Defaults to None which resolves to the lakehouse attached to the notebook.
45
- workspace : str, default=None
46
- The Fabric workspace used by the lakehouse.
49
+ workspace : str | uuid.UUID, default=None
50
+ The Fabric workspace name or ID used by the lakehouse.
47
51
  Defaults to None which resolves to the workspace of the attached lakehouse
48
52
  or if no lakehouse attached, resolves to the workspace of the notebook.
49
53
  """
@@ -52,13 +56,13 @@ def optimize_lakehouse_tables(
52
56
  from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables
53
57
  from delta import DeltaTable
54
58
 
55
- workspace = fabric.resolve_workspace_name(workspace)
59
+ (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
56
60
 
57
61
  if lakehouse is None:
58
62
  lakehouse_id = fabric.get_lakehouse_id()
59
- lakehouse = resolve_lakehouse_name(lakehouse_id, workspace)
63
+ lakehouse = resolve_lakehouse_name(lakehouse_id, workspace_id)
60
64
 
61
- lakeTables = get_lakehouse_tables(lakehouse=lakehouse, workspace=workspace)
65
+ lakeTables = get_lakehouse_tables(lakehouse=lakehouse, workspace=workspace_id)
62
66
  lakeTablesDelta = lakeTables[lakeTables["Format"] == "delta"]
63
67
 
64
68
  if isinstance(tables, str):
@@ -83,7 +87,7 @@ def optimize_lakehouse_tables(
83
87
  def vacuum_lakehouse_tables(
84
88
  tables: Optional[Union[str, List[str]]] = None,
85
89
  lakehouse: Optional[str] = None,
86
- workspace: Optional[str] = None,
90
+ workspace: Optional[str | UUID] = None,
87
91
  retain_n_hours: Optional[int] = None,
88
92
  ):
89
93
  """
@@ -96,8 +100,8 @@ def vacuum_lakehouse_tables(
96
100
  lakehouse : str, default=None
97
101
  The Fabric lakehouse.
98
102
  Defaults to None which resolves to the lakehouse attached to the notebook.
99
- workspace : str, default=None
100
- The Fabric workspace used by the lakehouse.
103
+ workspace : str | uuid.UUID, default=None
104
+ The Fabric workspace name or ID used by the lakehouse.
101
105
  Defaults to None which resolves to the workspace of the attached lakehouse
102
106
  or if no lakehouse attached, resolves to the workspace of the notebook.
103
107
  retain_n_hours : int, default=None
@@ -111,13 +115,13 @@ def vacuum_lakehouse_tables(
111
115
  from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables
112
116
  from delta import DeltaTable
113
117
 
114
- workspace = fabric.resolve_workspace_name(workspace)
118
+ (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
115
119
 
116
120
  if lakehouse is None:
117
121
  lakehouse_id = fabric.get_lakehouse_id()
118
- lakehouse = resolve_lakehouse_name(lakehouse_id, workspace)
122
+ lakehouse = resolve_lakehouse_name(lakehouse_id, workspace_id)
119
123
 
120
- lakeTables = get_lakehouse_tables(lakehouse=lakehouse, workspace=workspace)
124
+ lakeTables = get_lakehouse_tables(lakehouse=lakehouse, workspace=workspace_id)
121
125
  lakeTablesDelta = lakeTables[lakeTables["Format"] == "delta"]
122
126
 
123
127
  if isinstance(tables, str):