semantic-link-labs 0.6.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of semantic-link-labs might be problematic. Click here for more details.

Files changed (103) hide show
  1. semantic_link_labs-0.7.0.dist-info/METADATA +148 -0
  2. semantic_link_labs-0.7.0.dist-info/RECORD +111 -0
  3. {semantic_link_labs-0.6.0.dist-info → semantic_link_labs-0.7.0.dist-info}/WHEEL +1 -1
  4. sempy_labs/__init__.py +26 -2
  5. sempy_labs/_ai.py +3 -65
  6. sempy_labs/_bpa_translation/_translations_am-ET.po +828 -0
  7. sempy_labs/_bpa_translation/_translations_ar-AE.po +860 -0
  8. sempy_labs/_bpa_translation/_translations_cs-CZ.po +894 -0
  9. sempy_labs/_bpa_translation/_translations_da-DK.po +894 -0
  10. sempy_labs/_bpa_translation/_translations_de-DE.po +933 -0
  11. sempy_labs/_bpa_translation/_translations_el-GR.po +936 -0
  12. sempy_labs/_bpa_translation/_translations_es-ES.po +915 -0
  13. sempy_labs/_bpa_translation/_translations_fa-IR.po +883 -0
  14. sempy_labs/_bpa_translation/_translations_fr-FR.po +938 -0
  15. sempy_labs/_bpa_translation/_translations_ga-IE.po +912 -0
  16. sempy_labs/_bpa_translation/_translations_he-IL.po +855 -0
  17. sempy_labs/_bpa_translation/_translations_hi-IN.po +892 -0
  18. sempy_labs/_bpa_translation/_translations_hu-HU.po +910 -0
  19. sempy_labs/_bpa_translation/_translations_is-IS.po +887 -0
  20. sempy_labs/_bpa_translation/_translations_it-IT.po +931 -0
  21. sempy_labs/_bpa_translation/_translations_ja-JP.po +805 -0
  22. sempy_labs/_bpa_translation/_translations_nl-NL.po +924 -0
  23. sempy_labs/_bpa_translation/_translations_pl-PL.po +913 -0
  24. sempy_labs/_bpa_translation/_translations_pt-BR.po +909 -0
  25. sempy_labs/_bpa_translation/_translations_pt-PT.po +904 -0
  26. sempy_labs/_bpa_translation/_translations_ru-RU.po +909 -0
  27. sempy_labs/_bpa_translation/_translations_ta-IN.po +922 -0
  28. sempy_labs/_bpa_translation/_translations_te-IN.po +896 -0
  29. sempy_labs/_bpa_translation/_translations_th-TH.po +873 -0
  30. sempy_labs/_bpa_translation/_translations_zh-CN.po +767 -0
  31. sempy_labs/_bpa_translation/_translations_zu-ZA.po +916 -0
  32. sempy_labs/_clear_cache.py +9 -4
  33. sempy_labs/_generate_semantic_model.py +30 -56
  34. sempy_labs/_helper_functions.py +358 -14
  35. sempy_labs/_icons.py +10 -1
  36. sempy_labs/_list_functions.py +478 -237
  37. sempy_labs/_model_bpa.py +194 -18
  38. sempy_labs/_model_bpa_bulk.py +363 -0
  39. sempy_labs/_model_bpa_rules.py +4 -4
  40. sempy_labs/_model_dependencies.py +12 -10
  41. sempy_labs/_one_lake_integration.py +7 -7
  42. sempy_labs/_query_scale_out.py +45 -66
  43. sempy_labs/_refresh_semantic_model.py +7 -0
  44. sempy_labs/_translations.py +154 -1
  45. sempy_labs/_vertipaq.py +103 -90
  46. sempy_labs/directlake/__init__.py +5 -1
  47. sempy_labs/directlake/_directlake_schema_compare.py +27 -31
  48. sempy_labs/directlake/_directlake_schema_sync.py +55 -66
  49. sempy_labs/directlake/_dl_helper.py +233 -0
  50. sempy_labs/directlake/_get_directlake_lakehouse.py +6 -7
  51. sempy_labs/directlake/_get_shared_expression.py +1 -1
  52. sempy_labs/directlake/_guardrails.py +17 -13
  53. sempy_labs/directlake/_update_directlake_partition_entity.py +54 -30
  54. sempy_labs/directlake/_warm_cache.py +1 -1
  55. sempy_labs/lakehouse/_get_lakehouse_tables.py +61 -69
  56. sempy_labs/lakehouse/_lakehouse.py +3 -2
  57. sempy_labs/lakehouse/_shortcuts.py +1 -1
  58. sempy_labs/migration/_create_pqt_file.py +174 -182
  59. sempy_labs/migration/_migrate_calctables_to_lakehouse.py +236 -268
  60. sempy_labs/migration/_migrate_calctables_to_semantic_model.py +75 -73
  61. sempy_labs/migration/_migrate_model_objects_to_semantic_model.py +442 -426
  62. sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py +91 -97
  63. sempy_labs/migration/_refresh_calc_tables.py +92 -101
  64. sempy_labs/report/_BPAReportTemplate.json +232 -0
  65. sempy_labs/report/__init__.py +6 -2
  66. sempy_labs/report/_bpareporttemplate/.pbi/localSettings.json +9 -0
  67. sempy_labs/report/_bpareporttemplate/.platform +11 -0
  68. sempy_labs/report/_bpareporttemplate/StaticResources/SharedResources/BaseThemes/CY24SU06.json +710 -0
  69. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/page.json +11 -0
  70. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/1b08bce3bebabb0a27a8/visual.json +191 -0
  71. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/2f22ddb70c301693c165/visual.json +438 -0
  72. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/3b1182230aa6c600b43a/visual.json +127 -0
  73. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/58577ba6380c69891500/visual.json +576 -0
  74. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/a2a8fa5028b3b776c96c/visual.json +207 -0
  75. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/adfd47ef30652707b987/visual.json +506 -0
  76. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/b6a80ee459e716e170b1/visual.json +127 -0
  77. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/ce3130a721c020cc3d81/visual.json +513 -0
  78. sempy_labs/report/_bpareporttemplate/definition/pages/92735ae19b31712208ad/page.json +8 -0
  79. sempy_labs/report/_bpareporttemplate/definition/pages/92735ae19b31712208ad/visuals/66e60dfb526437cd78d1/visual.json +112 -0
  80. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/page.json +11 -0
  81. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/07deb8bce824e1be37d7/visual.json +513 -0
  82. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/0b1c68838818b32ad03b/visual.json +352 -0
  83. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/0c171de9d2683d10b930/visual.json +37 -0
  84. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/0efa01be0510e40a645e/visual.json +542 -0
  85. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/6bf2f0eb830ab53cc668/visual.json +221 -0
  86. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/88d8141cb8500b60030c/visual.json +127 -0
  87. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/a753273590beed656a03/visual.json +576 -0
  88. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/b8fdc82cddd61ac447bc/visual.json +127 -0
  89. sempy_labs/report/_bpareporttemplate/definition/pages/d37dce724a0ccc30044b/page.json +9 -0
  90. sempy_labs/report/_bpareporttemplate/definition/pages/d37dce724a0ccc30044b/visuals/ce8532a7e25020271077/visual.json +38 -0
  91. sempy_labs/report/_bpareporttemplate/definition/pages/pages.json +10 -0
  92. sempy_labs/report/_bpareporttemplate/definition/report.json +176 -0
  93. sempy_labs/report/_bpareporttemplate/definition/version.json +4 -0
  94. sempy_labs/report/_bpareporttemplate/definition.pbir +14 -0
  95. sempy_labs/report/_generate_report.py +255 -139
  96. sempy_labs/report/_report_functions.py +26 -33
  97. sempy_labs/report/_report_rebind.py +31 -26
  98. sempy_labs/tom/_model.py +75 -58
  99. semantic_link_labs-0.6.0.dist-info/METADATA +0 -22
  100. semantic_link_labs-0.6.0.dist-info/RECORD +0 -54
  101. sempy_labs/directlake/_fallback.py +0 -60
  102. {semantic_link_labs-0.6.0.dist-info → semantic_link_labs-0.7.0.dist-info}/LICENSE +0 -0
  103. {semantic_link_labs-0.6.0.dist-info → semantic_link_labs-0.7.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,233 @@
1
+ import sempy.fabric as fabric
2
+ import numpy as np
3
+ import pandas as pd
4
+ from typing import Optional, List, Union, Tuple
5
+ from uuid import UUID
6
+ import sempy_labs._icons as icons
7
+ from sempy._utils._log import log
8
+ from sempy_labs._helper_functions import retry, resolve_dataset_id
9
+
10
+
11
+ def check_fallback_reason(
12
+ dataset: str, workspace: Optional[str] = None
13
+ ) -> pd.DataFrame:
14
+ """
15
+ Shows the reason a table in a Direct Lake semantic model would fallback to DirectQuery.
16
+
17
+ Parameters
18
+ ----------
19
+ dataset : str
20
+ Name of the semantic model.
21
+ workspace : str, default=None
22
+ The Fabric workspace name.
23
+ Defaults to None which resolves to the workspace of the attached lakehouse
24
+ or if no lakehouse attached, resolves to the workspace of the notebook.
25
+
26
+ Returns
27
+ -------
28
+ pandas.DataFrame
29
+ The tables in the semantic model and their fallback reason.
30
+ """
31
+
32
+ workspace = fabric.resolve_workspace_name(workspace)
33
+
34
+ dfP = fabric.list_partitions(dataset=dataset, workspace=workspace)
35
+ dfP_filt = dfP[dfP["Mode"] == "DirectLake"]
36
+
37
+ if len(dfP_filt) == 0:
38
+ raise ValueError(
39
+ f"{icons.red_dot} The '{dataset}' semantic model is not in Direct Lake. This function is only applicable to Direct Lake semantic models."
40
+ )
41
+
42
+ df = fabric.evaluate_dax(
43
+ dataset=dataset,
44
+ workspace=workspace,
45
+ dax_string="""
46
+ SELECT [TableName] AS [Table Name],[FallbackReason] AS [FallbackReasonID]
47
+ FROM $SYSTEM.TMSCHEMA_DELTA_TABLE_METADATA_STORAGES
48
+ """,
49
+ )
50
+
51
+ value_mapping = {
52
+ 0: "No reason for fallback",
53
+ 1: "This table is not framed",
54
+ 2: "This object is a view in the lakehouse",
55
+ 3: "The table does not exist in the lakehouse",
56
+ 4: "Transient error",
57
+ 5: "Using OLS will result in fallback to DQ",
58
+ 6: "Using RLS will result in fallback to DQ",
59
+ }
60
+
61
+ # Create a new column based on the mapping
62
+ df["Fallback Reason Detail"] = np.vectorize(value_mapping.get)(
63
+ df["FallbackReasonID"]
64
+ )
65
+
66
+ return df
67
+
68
+
69
+ @log
70
+ def generate_direct_lake_semantic_model(
71
+ dataset: str,
72
+ lakehouse_tables: Union[str, List[str]],
73
+ workspace: Optional[str] = None,
74
+ lakehouse: Optional[str] = None,
75
+ lakehouse_workspace: Optional[str] = None,
76
+ overwrite: Optional[bool] = False,
77
+ refresh: Optional[bool] = True,
78
+ ):
79
+ """
80
+ Dynamically generates a Direct Lake semantic model based on tables in a Fabric lakehouse.
81
+
82
+ Parameters
83
+ ----------
84
+ dataset : str
85
+ Name of the semantic model to be created.
86
+ lakehouse_tables : str | List[str]
87
+ The table(s) within the Fabric lakehouse to add to the semantic model. All columns from these tables will be added to the semantic model.
88
+ workspace : str, default=None
89
+ The Fabric workspace name in which the semantic model will reside.
90
+ Defaults to None which resolves to the workspace of the attached lakehouse
91
+ or if no lakehouse attached, resolves to the workspace of the notebook.
92
+ lakehouse : str, default=None
93
+ The lakehouse which stores the delta tables which will feed the Direct Lake semantic model.
94
+ Defaults to None which resolves to the attached lakehouse.
95
+ lakehouse_workspace : str, default=None
96
+ The Fabric workspace in which the lakehouse resides.
97
+ Defaults to None which resolves to the workspace of the attached lakehouse
98
+ or if no lakehouse attached, resolves to the workspace of the notebook.
99
+ overwrite : bool, default=False
100
+ If set to True, overwrites the existing semantic model if it already exists.
101
+ refresh: bool, default=True
102
+ If True, refreshes the newly created semantic model after it is created.
103
+
104
+ Returns
105
+ -------
106
+ """
107
+
108
+ from sempy_labs.lakehouse import get_lakehouse_tables, get_lakehouse_columns
109
+ from sempy_labs import create_blank_semantic_model, refresh_semantic_model
110
+ from sempy_labs.tom import connect_semantic_model
111
+ from sempy_labs.directlake import get_shared_expression
112
+
113
+ if isinstance(lakehouse_tables, str):
114
+ lakehouse_tables = [lakehouse_tables]
115
+
116
+ dfLT = get_lakehouse_tables(lakehouse=lakehouse, workspace=lakehouse_workspace)
117
+
118
+ # Validate lakehouse tables
119
+ for t in lakehouse_tables:
120
+ if t not in dfLT["Table Name"].values:
121
+ raise ValueError(
122
+ f"{icons.red_dot} The '{t}' table does not exist as a delta table in the '{lakehouse}' within the '{workspace}' workspace."
123
+ )
124
+
125
+ dfLC = get_lakehouse_columns(lakehouse=lakehouse, workspace=lakehouse_workspace)
126
+ expr = get_shared_expression(lakehouse=lakehouse, workspace=lakehouse_workspace)
127
+ dfD = fabric.list_datasets(workspace=workspace)
128
+ dfD_filt = dfD[dfD["Dataset Name"] == dataset]
129
+ dfD_filt_len = len(dfD_filt)
130
+
131
+ if dfD_filt_len > 0 and overwrite is False:
132
+ raise ValueError(
133
+ f"{icons.red_dot} The '{dataset}' semantic model within the '{workspace}' workspace already exists. Overwrite is set to False so the new semantic model has not been created."
134
+ )
135
+ if dfD_filt_len > 0 and overwrite:
136
+ print(
137
+ f"{icons.warning} Overwriting the existing '{dataset}' semantic model within the '{workspace}' workspace."
138
+ )
139
+
140
+ create_blank_semantic_model(dataset=dataset, workspace=workspace)
141
+
142
+ @retry(
143
+ sleep_time=1,
144
+ timeout_error_message=f"{icons.red_dot} Function timed out after 1 minute",
145
+ )
146
+ def dyn_connect():
147
+ with connect_semantic_model(
148
+ dataset=dataset, readonly=True, workspace=workspace
149
+ ) as tom:
150
+
151
+ tom.model
152
+
153
+ dyn_connect()
154
+
155
+ expression_name = "DatabaseQuery"
156
+ with connect_semantic_model(
157
+ dataset=dataset, workspace=workspace, readonly=False
158
+ ) as tom:
159
+ if not any(e.Name == expression_name for e in tom.model.Expressions):
160
+ tom.add_expression(name=expression_name, expression=expr)
161
+
162
+ for t in lakehouse_tables:
163
+ tom.add_table(name=t)
164
+ tom.add_entity_partition(table_name=t, entity_name=t)
165
+ dfLC_filt = dfLC[dfLC["Table Name"] == t]
166
+ for i, r in dfLC_filt.iterrows():
167
+ lakeCName = r["Column Name"]
168
+ dType = r["Data Type"]
169
+ dt = icons.data_type_mapping.get(dType)
170
+ tom.add_data_column(
171
+ table_name=t,
172
+ column_name=lakeCName,
173
+ source_column=lakeCName,
174
+ data_type=dt,
175
+ )
176
+
177
+ if refresh:
178
+ refresh_semantic_model(dataset=dataset, workspace=workspace)
179
+
180
+
181
+ def get_direct_lake_source(
182
+ dataset: str, workspace: Optional[str] = None
183
+ ) -> Tuple[str, str, UUID, UUID]:
184
+ """
185
+ Obtains the source information for a direct lake semantic model.
186
+
187
+ Parameters
188
+ ----------
189
+ dataset : str
190
+ The name of the semantic model.
191
+ workspace : str, default=None
192
+ The Fabric workspace name.
193
+ Defaults to None which resolves to the workspace of the attached lakehouse
194
+ or if no lakehouse attached, resolves to the workspace of the notebook.
195
+
196
+ Returns
197
+ -------
198
+ Tuple[str, str, UUID, UUID]
199
+ If the source of the direct lake semantic model is a lakehouse this will return: 'Lakehouse', Lakehouse Name, SQL Endpoint Id, Workspace Id
200
+ If the source of the direct lake semantic model is a warehouse this will return: 'Warehouse', Warehouse Name, Warehouse Id, Workspace Id
201
+ If the semantic model is not a Direct Lake semantic model, it will return None, None, None.
202
+ """
203
+
204
+ workspace = fabric.resolve_workspace_name(workspace)
205
+ dataset_id = resolve_dataset_id(dataset, workspace)
206
+ client = fabric.PowerBIRestClient()
207
+ request_body = {
208
+ "artifacts": [
209
+ {
210
+ "objectId": dataset_id,
211
+ "type": "dataset",
212
+ }
213
+ ]
214
+ }
215
+ response = client.post(
216
+ "metadata/relations/upstream?apiVersion=3", json=request_body
217
+ )
218
+ artifacts = response.json().get("artifacts", [])
219
+ sql_id, sql_object_name, sql_workspace_id, artifact_type = None, None, None, None
220
+
221
+ for artifact in artifacts:
222
+ object_type = artifact.get("typeName")
223
+ display_name = artifact.get("displayName")
224
+ if object_type in ["Datawarehouse", "Lakewarehouse"]:
225
+ artifact_type = (
226
+ "Warehouse" if object_type == "Datawarehouse" else "Lakehouse"
227
+ )
228
+ sql_id = artifact.get("objectId")
229
+ sql_workspace_id = artifact.get("workspace", {}).get("objectId")
230
+ sql_object_name = display_name
231
+ break
232
+
233
+ return artifact_type, sql_object_name, sql_id, sql_workspace_id
@@ -6,7 +6,6 @@ from sempy_labs._helper_functions import (
6
6
  )
7
7
  from typing import Optional, Tuple
8
8
  from uuid import UUID
9
- import sempy_labs._icons as icons
10
9
 
11
10
 
12
11
  def get_direct_lake_lakehouse(
@@ -49,13 +48,13 @@ def get_direct_lake_lakehouse(
49
48
  lakehouse_id = fabric.get_lakehouse_id()
50
49
  lakehouse = resolve_lakehouse_name(lakehouse_id, lakehouse_workspace)
51
50
 
52
- dfP = fabric.list_partitions(dataset=dataset, workspace=workspace)
53
- dfP_filt = dfP[dfP["Mode"] == "DirectLake"]
51
+ # dfP = fabric.list_partitions(dataset=dataset, workspace=workspace)
52
+ # dfP_filt = dfP[dfP["Mode"] == "DirectLake"]
54
53
 
55
- if len(dfP_filt) == 0:
56
- raise ValueError(
57
- f"{icons.red_dot} The '{dataset}' semantic model within the '{workspace}' workspace is not in Direct Lake mode."
58
- )
54
+ # if len(dfP_filt) == 0:
55
+ # raise ValueError(
56
+ # f"{icons.red_dot} The '{dataset}' semantic model within the '{workspace}' workspace is not in Direct Lake mode."
57
+ # )
59
58
 
60
59
  sqlEndpointId = get_direct_lake_sql_endpoint(dataset, workspace)
61
60
 
@@ -7,7 +7,7 @@ import sempy_labs._icons as icons
7
7
 
8
8
  def get_shared_expression(
9
9
  lakehouse: Optional[str] = None, workspace: Optional[str] = None
10
- ):
10
+ ) -> str:
11
11
  """
12
12
  Dynamically generates the M expression used by a Direct Lake model for a given lakehouse.
13
13
 
@@ -1,6 +1,7 @@
1
1
  import sempy.fabric as fabric
2
2
  import pandas as pd
3
3
  from typing import Optional
4
+ import sempy_labs._icons as icons
4
5
 
5
6
 
6
7
  def get_direct_lake_guardrails() -> pd.DataFrame:
@@ -27,14 +28,14 @@ def get_direct_lake_guardrails() -> pd.DataFrame:
27
28
  return df
28
29
 
29
30
 
30
- def get_sku_size(workspace: Optional[str] = None):
31
+ def get_sku_size(workspace: Optional[str] = None) -> str:
31
32
  """
32
33
  Shows the SKU size for a workspace.
33
34
 
34
35
  Parameters
35
36
  ----------
36
37
  workspace : str, default=None
37
- The Fabric workspace.
38
+ The Fabric workspace name.
38
39
  Defaults to None which resolves to the workspace of the attached lakehouse
39
40
  or if no lakehouse attached, resolves to the workspace of the notebook.
40
41
 
@@ -46,18 +47,21 @@ def get_sku_size(workspace: Optional[str] = None):
46
47
 
47
48
  workspace = fabric.resolve_workspace_name(workspace)
48
49
 
50
+ dfW = fabric.list_workspaces(filter=f"name eq '{workspace}'")
51
+
52
+ if len(dfW) == 0:
53
+ raise ValueError(f"{icons.red_dot} The '{workspace}' is not a valid workspace.")
54
+
55
+ capacity_id = dfW["Capacity Id"].iloc[0]
49
56
  dfC = fabric.list_capacities()
50
- dfW = fabric.list_workspaces().sort_values(by="Name", ascending=True)
51
- dfC.rename(columns={"Id": "Capacity Id"}, inplace=True)
52
- dfCW = pd.merge(
53
- dfW,
54
- dfC[["Capacity Id", "Sku", "Region", "State"]],
55
- on="Capacity Id",
56
- how="inner",
57
- )
58
- sku_value = dfCW.loc[dfCW["Name"] == workspace, "Sku"].iloc[0]
59
-
60
- return sku_value
57
+ dfC_filt = dfC[dfC["Id"] == capacity_id]
58
+
59
+ if len(dfC_filt) == 0:
60
+ raise ValueError(
61
+ f"{icons.red_dot} The '{capacity_id}' Id is not a valid capacity Id."
62
+ )
63
+
64
+ return dfC_filt["Sku"].iloc[0]
61
65
 
62
66
 
63
67
  def get_directlake_guardrails_for_sku(sku_size: str) -> pd.DataFrame:
@@ -1,7 +1,8 @@
1
+ import sempy
1
2
  import sempy.fabric as fabric
2
3
  from sempy_labs.tom import connect_semantic_model
3
- from sempy_labs._helper_functions import resolve_lakehouse_name
4
4
  from sempy_labs._refresh_semantic_model import refresh_semantic_model
5
+ from sempy_labs.directlake._dl_helper import get_direct_lake_source
5
6
  from typing import List, Optional, Union
6
7
  import sempy_labs._icons as icons
7
8
 
@@ -11,8 +12,7 @@ def update_direct_lake_partition_entity(
11
12
  table_name: Union[str, List[str]],
12
13
  entity_name: Union[str, List[str]],
13
14
  workspace: Optional[str] = None,
14
- lakehouse: Optional[str] = None,
15
- lakehouse_workspace: Optional[str] = None,
15
+ **kwargs,
16
16
  ):
17
17
  """
18
18
  Remaps a table (or tables) in a Direct Lake semantic model to a table in a lakehouse.
@@ -29,23 +29,30 @@ def update_direct_lake_partition_entity(
29
29
  The Fabric workspace name in which the semantic model exists.
30
30
  Defaults to None which resolves to the workspace of the attached lakehouse
31
31
  or if no lakehouse attached, resolves to the workspace of the notebook.
32
- lakehouse : str, default=None
33
- The Fabric lakehouse used by the Direct Lake semantic model.
34
- Defaults to None which resolves to the lakehouse attached to the notebook.
35
- lakehouse_workspace : str, default=None
36
- The Fabric workspace used by the lakehouse.
37
- Defaults to None which resolves to the workspace of the attached lakehouse
38
- or if no lakehouse attached, resolves to the workspace of the notebook.
39
32
  """
40
33
 
34
+ if "lakehouse" in kwargs:
35
+ print(
36
+ "The 'lakehouse' parameter has been deprecated as it is no longer necessary. Please remove this parameter from the function going forward."
37
+ )
38
+ del kwargs["lakehouse"]
39
+ if "lakehouse_workspace" in kwargs:
40
+ print(
41
+ "The 'lakehouse_workspace' parameter has been deprecated as it is no longer necessary. Please remove this parameter from the function going forward."
42
+ )
43
+ del kwargs["lakehouse_workspace"]
44
+
41
45
  workspace = fabric.resolve_workspace_name(workspace)
42
46
 
43
- if lakehouse_workspace is None:
44
- lakehouse_workspace = workspace
47
+ artifact_type, lakehouse_name, lakehouse_id, lakehouse_workspace_id = (
48
+ get_direct_lake_source(dataset=dataset, workspace=workspace)
49
+ )
45
50
 
46
- if lakehouse is None:
47
- lakehouse_id = fabric.get_lakehouse_id()
48
- lakehouse = resolve_lakehouse_name(lakehouse_id, lakehouse_workspace)
51
+ if artifact_type == "Warehouse":
52
+ raise ValueError(
53
+ f"{icons.red_dot} This function is only valid for Direct Lake semantic models which source from lakehouses, not warehouses."
54
+ )
55
+ lakehouse_workspace = fabric.resolve_workspace_name(lakehouse_workspace_id)
49
56
 
50
57
  # Support both str & list types
51
58
  if isinstance(table_name, str):
@@ -70,7 +77,7 @@ def update_direct_lake_partition_entity(
70
77
  for tName in table_name:
71
78
  i = table_name.index(tName)
72
79
  eName = entity_name[i]
73
- part_name = (
80
+ part_name = next(
74
81
  p.Name
75
82
  for t in tom.model.Tables
76
83
  for p in t.Partitions
@@ -85,7 +92,7 @@ def update_direct_lake_partition_entity(
85
92
  tom.model.Tables[tName].Partitions[part_name].EntityName = eName
86
93
  print(
87
94
  f"{icons.green_dot} The '{tName}' table in the '{dataset}' semantic model has been updated to point to the '{eName}' table "
88
- f"in the '{lakehouse}' lakehouse within the '{lakehouse_workspace}' workspace."
95
+ f"in the '{lakehouse_name}' lakehouse within the '{lakehouse_workspace}' workspace."
89
96
  )
90
97
 
91
98
 
@@ -93,7 +100,8 @@ def add_table_to_direct_lake_semantic_model(
93
100
  dataset: str,
94
101
  table_name: str,
95
102
  lakehouse_table_name: str,
96
- workspace: Optional[str | None] = None,
103
+ refresh: Optional[bool] = True,
104
+ workspace: Optional[str] = None,
97
105
  ):
98
106
  """
99
107
  Adds a table and all of its columns to a Direct Lake semantic model, based on a Fabric lakehouse table.
@@ -106,6 +114,8 @@ def add_table_to_direct_lake_semantic_model(
106
114
  Name of the table in the semantic model.
107
115
  lakehouse_table_name : str
108
116
  The name of the Fabric lakehouse table.
117
+ refresh : bool, default=True
118
+ Refreshes the table after it is added to the semantic model.
109
119
  workspace : str, default=None
110
120
  The name of the Fabric workspace in which the semantic model resides.
111
121
  Defaults to None which resolves to the workspace of the attached lakehouse
@@ -115,22 +125,33 @@ def add_table_to_direct_lake_semantic_model(
115
125
  -------
116
126
  """
117
127
 
128
+ sempy.fabric._client._utils._init_analysis_services()
118
129
  import Microsoft.AnalysisServices.Tabular as TOM
119
130
  from sempy_labs.lakehouse._get_lakehouse_columns import get_lakehouse_columns
120
131
  from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables
121
- from sempy_labs.directlake._get_directlake_lakehouse import (
122
- get_direct_lake_lakehouse,
123
- )
124
132
 
125
133
  workspace = fabric.resolve_workspace_name(workspace)
126
134
 
135
+ artifact_type, lakehouse_name, lakehouse_id, lakehouse_workspace_id = (
136
+ get_direct_lake_source(dataset=dataset, workspace=workspace)
137
+ )
138
+
139
+ if artifact_type == "Warehouse":
140
+ raise ValueError(
141
+ f"{icons.red_dot} This function is only valid for Direct Lake semantic models which source from Fabric lakehouses (not warehouses)."
142
+ )
143
+
144
+ lakehouse_workspace = fabric.resolve_workspace_name(lakehouse_workspace_id)
145
+
127
146
  with connect_semantic_model(
128
147
  dataset=dataset, readonly=False, workspace=workspace
129
148
  ) as tom:
130
149
 
131
- if tom.is_direct_lake() is False:
150
+ table_count = tom.model.Tables.Count
151
+
152
+ if tom.is_direct_lake() is False and table_count > 0:
132
153
  raise ValueError(
133
- "This function is only valid for Direct Lake semantic models."
154
+ "This function is only valid for Direct Lake semantic models or semantic models with no tables."
134
155
  )
135
156
 
136
157
  if any(
@@ -154,19 +175,19 @@ def add_table_to_direct_lake_semantic_model(
154
175
  f"The '{table_name}' table already exists in the '{dataset}' semantic model within the '{workspace}' workspace."
155
176
  )
156
177
 
157
- lake_name, lake_id = get_direct_lake_lakehouse(
158
- dataset=dataset, workspace=workspace
178
+ dfL = get_lakehouse_tables(
179
+ lakehouse=lakehouse_name, workspace=lakehouse_workspace
159
180
  )
160
-
161
- dfL = get_lakehouse_tables(lakehouse=lake_name, workspace=workspace)
162
181
  dfL_filt = dfL[dfL["Table Name"] == lakehouse_table_name]
163
182
 
164
183
  if len(dfL_filt) == 0:
165
184
  raise ValueError(
166
- f"The '{lakehouse_table_name}' table does not exist in the '{lake_name}' lakehouse within the '{workspace}' workspace."
185
+ f"The '{lakehouse_table_name}' table does not exist in the '{lakehouse_name}' lakehouse within the '{lakehouse_workspace}' workspace."
167
186
  )
168
187
 
169
- dfLC = get_lakehouse_columns(lakehouse=lake_name, workspace=workspace)
188
+ dfLC = get_lakehouse_columns(
189
+ lakehouse=lakehouse_name, workspace=lakehouse_workspace
190
+ )
170
191
  dfLC_filt = dfLC[dfLC["Table Name"] == lakehouse_table_name]
171
192
 
172
193
  tom.add_table(name=table_name)
@@ -194,4 +215,7 @@ def add_table_to_direct_lake_semantic_model(
194
215
  f"{icons.green_dot} The '{lakeCName}' column has been added to the '{table_name}' table as a '{dt}' data type in the '{dataset}' semantic model within the '{workspace}' workspace."
195
216
  )
196
217
 
197
- refresh_semantic_model(dataset=dataset, tables=table_name, workspace=workspace)
218
+ if refresh:
219
+ refresh_semantic_model(
220
+ dataset=dataset, tables=table_name, workspace=workspace
221
+ )
@@ -17,7 +17,7 @@ def warm_direct_lake_cache_perspective(
17
17
  perspective: str,
18
18
  add_dependencies: Optional[bool] = False,
19
19
  workspace: Optional[str] = None,
20
- ):
20
+ ) -> pd.DataFrame:
21
21
  """
22
22
  Warms the cache of a Direct Lake semantic model by running a simple DAX query against the columns in a perspective.
23
23
 
@@ -16,6 +16,7 @@ from sempy_labs.lakehouse._lakehouse import lakehouse_attached
16
16
  from typing import Optional
17
17
  import sempy_labs._icons as icons
18
18
  from sempy._utils._log import log
19
+ from sempy.fabric.exceptions import FabricHTTPException
19
20
 
20
21
 
21
22
  @log
@@ -51,6 +52,8 @@ def get_lakehouse_tables(
51
52
  Shows the tables/columns within a lakehouse and their properties.
52
53
  """
53
54
 
55
+ from sempy_labs._helper_functions import pagination
56
+
54
57
  df = pd.DataFrame(
55
58
  columns=[
56
59
  "Workspace Name",
@@ -73,34 +76,52 @@ def get_lakehouse_tables(
73
76
  if count_rows: # Setting countrows defaults to extended=True
74
77
  extended = True
75
78
 
79
+ if (
80
+ workspace_id != fabric.get_workspace_id()
81
+ and lakehouse_id != fabric.get_lakehouse_id()
82
+ and count_rows
83
+ ):
84
+ raise ValueError(
85
+ f"{icons.red_dot} If 'count_rows' is set to True, you must run this function against the default lakehouse attached to the notebook. "
86
+ "Count rows runs a spark query and cross-workspace spark queries are currently not supported."
87
+ )
88
+
76
89
  client = fabric.FabricRestClient()
77
90
  response = client.get(
78
91
  f"/v1/workspaces/{workspace_id}/lakehouses/{lakehouse_id}/tables"
79
92
  )
80
93
 
81
- for i in response.json()["data"]:
82
- tName = i["name"]
83
- tType = i["type"]
84
- tFormat = i["format"]
85
- tLocation = i["location"]
86
- if not extended:
94
+ if response.status_code != 200:
95
+ raise FabricHTTPException(response)
96
+
97
+ responses = pagination(client, response)
98
+
99
+ dfs = []
100
+ for r in responses:
101
+ for i in r.get("data", []):
87
102
  new_data = {
88
103
  "Workspace Name": workspace,
89
104
  "Lakehouse Name": lakehouse,
90
- "Table Name": tName,
91
- "Format": tFormat,
92
- "Type": tType,
93
- "Location": tLocation,
105
+ "Table Name": i.get("name"),
106
+ "Format": i.get("format"),
107
+ "Type": i.get("type"),
108
+ "Location": i.get("location"),
94
109
  }
95
- df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)
96
- else:
97
- sku_value = get_sku_size(workspace)
98
- guardrail = get_directlake_guardrails_for_sku(sku_value)
99
-
100
- spark = SparkSession.builder.getOrCreate()
110
+ dfs.append(pd.DataFrame(new_data, index=[0]))
111
+ df = pd.concat(dfs, ignore_index=True)
101
112
 
102
- intColumns = ["Files", "Row Groups", "Table Size"]
103
- if tType == "Managed" and tFormat == "delta":
113
+ if extended:
114
+ sku_value = get_sku_size(workspace)
115
+ guardrail = get_directlake_guardrails_for_sku(sku_value)
116
+ spark = SparkSession.builder.getOrCreate()
117
+ df["Files"] = None
118
+ df["Row Groups"] = None
119
+ df["Table Size"] = None
120
+ if count_rows:
121
+ df["Row Count"] = None
122
+ for i, r in df.iterrows():
123
+ tName = r["Table Name"]
124
+ if r["Type"] == "Managed" and r["Format"] == "delta":
104
125
  detail_df = spark.sql(f"DESCRIBE DETAIL `{tName}`").collect()[0]
105
126
  num_files = detail_df.numFiles
106
127
  size_in_bytes = detail_df.sizeInBytes
@@ -120,60 +141,31 @@ def get_lakehouse_tables(
120
141
  ).num_row_groups
121
142
  except FileNotFoundError:
122
143
  continue
123
-
124
- if count_rows:
125
- num_rows = spark.table(tName).count()
126
- intColumns.append("Row Count")
127
- new_data = {
128
- "Workspace Name": workspace,
129
- "Lakehouse Name": lakehouse,
130
- "Table Name": tName,
131
- "Format": tFormat,
132
- "Type": tType,
133
- "Location": tLocation,
134
- "Files": num_files,
135
- "Row Groups": num_rowgroups,
136
- "Row Count": num_rows,
137
- "Table Size": size_in_bytes,
138
- }
139
- else:
140
- new_data = {
141
- "Workspace Name": workspace,
142
- "Lakehouse Name": lakehouse,
143
- "Table Name": tName,
144
- "Format": tFormat,
145
- "Type": tType,
146
- "Location": tLocation,
147
- "Files": num_files,
148
- "Row Groups": num_rowgroups,
149
- "Table Size": size_in_bytes,
150
- }
151
-
152
- df = pd.concat(
153
- [df, pd.DataFrame(new_data, index=[0])], ignore_index=True
154
- )
155
- df[intColumns] = df[intColumns].astype(int)
156
-
157
- df["SKU"] = guardrail["Fabric SKUs"].iloc[0]
158
- df["Parquet File Guardrail"] = guardrail["Parquet files per table"].iloc[0]
159
- df["Row Group Guardrail"] = guardrail["Row groups per table"].iloc[0]
160
- df["Row Count Guardrail"] = (
161
- guardrail["Rows per table (millions)"].iloc[0] * 1000000
162
- )
163
-
164
- df["Parquet File Guardrail Hit"] = (
165
- df["Files"] > df["Parquet File Guardrail"]
166
- )
167
- df["Row Group Guardrail Hit"] = df["Row Groups"] > df["Row Group Guardrail"]
168
-
144
+ df.at[i, "Files"] = num_files
145
+ df.at[i, "Row Groups"] = num_rowgroups
146
+ df.at[i, "Table Size"] = size_in_bytes
169
147
  if count_rows:
170
- df["Row Count Guardrail Hit"] = (
171
- df["Row Count"] > df["Row Count Guardrail"]
172
- )
148
+ num_rows = spark.table(tName).count()
149
+ df.at[i, "Row Count"] = num_rows
150
+
151
+ if extended:
152
+ intColumns = ["Files", "Row Groups", "Table Size"]
153
+ df[intColumns] = df[intColumns].astype(int)
154
+ df["SKU"] = guardrail["Fabric SKUs"].iloc[0]
155
+ df["Parquet File Guardrail"] = guardrail["Parquet files per table"].iloc[0]
156
+ df["Row Group Guardrail"] = guardrail["Row groups per table"].iloc[0]
157
+ df["Row Count Guardrail"] = (
158
+ guardrail["Rows per table (millions)"].iloc[0] * 1000000
159
+ )
160
+
161
+ df["Parquet File Guardrail Hit"] = df["Files"] > df["Parquet File Guardrail"]
162
+ df["Row Group Guardrail Hit"] = df["Row Groups"] > df["Row Group Guardrail"]
163
+ if count_rows:
164
+ df["Row Count"] = df["Row Count"].astype(int)
165
+ df["Row Count Guardrail Hit"] = df["Row Count"] > df["Row Count Guardrail"]
173
166
 
174
167
  if export:
175
- lakeAttach = lakehouse_attached()
176
- if lakeAttach is False:
168
+ if not lakehouse_attached():
177
169
  raise ValueError(
178
170
  f"{icons.red_dot} In order to save the report.json file, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook."
179
171
  )