semantic-link-labs 0.5.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of semantic-link-labs might be problematic. Click here for more details.

Files changed (113) hide show
  1. semantic_link_labs-0.7.0.dist-info/METADATA +148 -0
  2. semantic_link_labs-0.7.0.dist-info/RECORD +111 -0
  3. {semantic_link_labs-0.5.0.dist-info → semantic_link_labs-0.7.0.dist-info}/WHEEL +1 -1
  4. sempy_labs/__init__.py +45 -15
  5. sempy_labs/_ai.py +42 -85
  6. sempy_labs/_bpa_translation/_translations_am-ET.po +828 -0
  7. sempy_labs/_bpa_translation/_translations_ar-AE.po +860 -0
  8. sempy_labs/_bpa_translation/_translations_cs-CZ.po +894 -0
  9. sempy_labs/_bpa_translation/_translations_da-DK.po +894 -0
  10. sempy_labs/_bpa_translation/_translations_de-DE.po +933 -0
  11. sempy_labs/_bpa_translation/_translations_el-GR.po +936 -0
  12. sempy_labs/_bpa_translation/_translations_es-ES.po +915 -0
  13. sempy_labs/_bpa_translation/_translations_fa-IR.po +883 -0
  14. sempy_labs/_bpa_translation/_translations_fr-FR.po +938 -0
  15. sempy_labs/_bpa_translation/_translations_ga-IE.po +912 -0
  16. sempy_labs/_bpa_translation/_translations_he-IL.po +855 -0
  17. sempy_labs/_bpa_translation/_translations_hi-IN.po +892 -0
  18. sempy_labs/_bpa_translation/_translations_hu-HU.po +910 -0
  19. sempy_labs/_bpa_translation/_translations_is-IS.po +887 -0
  20. sempy_labs/_bpa_translation/_translations_it-IT.po +931 -0
  21. sempy_labs/_bpa_translation/_translations_ja-JP.po +805 -0
  22. sempy_labs/_bpa_translation/_translations_nl-NL.po +924 -0
  23. sempy_labs/_bpa_translation/_translations_pl-PL.po +913 -0
  24. sempy_labs/_bpa_translation/_translations_pt-BR.po +909 -0
  25. sempy_labs/_bpa_translation/_translations_pt-PT.po +904 -0
  26. sempy_labs/_bpa_translation/_translations_ru-RU.po +909 -0
  27. sempy_labs/_bpa_translation/_translations_ta-IN.po +922 -0
  28. sempy_labs/_bpa_translation/_translations_te-IN.po +896 -0
  29. sempy_labs/_bpa_translation/_translations_th-TH.po +873 -0
  30. sempy_labs/_bpa_translation/_translations_zh-CN.po +767 -0
  31. sempy_labs/_bpa_translation/_translations_zu-ZA.po +916 -0
  32. sempy_labs/_clear_cache.py +12 -8
  33. sempy_labs/_connections.py +77 -70
  34. sempy_labs/_dax.py +7 -9
  35. sempy_labs/_generate_semantic_model.py +75 -90
  36. sempy_labs/_helper_functions.py +371 -20
  37. sempy_labs/_icons.py +23 -0
  38. sempy_labs/_list_functions.py +855 -427
  39. sempy_labs/_model_auto_build.py +4 -3
  40. sempy_labs/_model_bpa.py +307 -1118
  41. sempy_labs/_model_bpa_bulk.py +363 -0
  42. sempy_labs/_model_bpa_rules.py +831 -0
  43. sempy_labs/_model_dependencies.py +20 -16
  44. sempy_labs/_one_lake_integration.py +18 -12
  45. sempy_labs/_query_scale_out.py +116 -129
  46. sempy_labs/_refresh_semantic_model.py +23 -10
  47. sempy_labs/_translations.py +367 -288
  48. sempy_labs/_vertipaq.py +152 -123
  49. sempy_labs/directlake/__init__.py +7 -1
  50. sempy_labs/directlake/_directlake_schema_compare.py +33 -30
  51. sempy_labs/directlake/_directlake_schema_sync.py +60 -77
  52. sempy_labs/directlake/_dl_helper.py +233 -0
  53. sempy_labs/directlake/_get_directlake_lakehouse.py +7 -8
  54. sempy_labs/directlake/_get_shared_expression.py +5 -3
  55. sempy_labs/directlake/_guardrails.py +20 -16
  56. sempy_labs/directlake/_list_directlake_model_calc_tables.py +17 -10
  57. sempy_labs/directlake/_show_unsupported_directlake_objects.py +3 -2
  58. sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py +10 -5
  59. sempy_labs/directlake/_update_directlake_partition_entity.py +169 -22
  60. sempy_labs/directlake/_warm_cache.py +7 -4
  61. sempy_labs/lakehouse/_get_lakehouse_columns.py +1 -1
  62. sempy_labs/lakehouse/_get_lakehouse_tables.py +65 -71
  63. sempy_labs/lakehouse/_lakehouse.py +5 -3
  64. sempy_labs/lakehouse/_shortcuts.py +20 -13
  65. sempy_labs/migration/__init__.py +1 -1
  66. sempy_labs/migration/_create_pqt_file.py +184 -186
  67. sempy_labs/migration/_migrate_calctables_to_lakehouse.py +240 -269
  68. sempy_labs/migration/_migrate_calctables_to_semantic_model.py +78 -77
  69. sempy_labs/migration/_migrate_model_objects_to_semantic_model.py +444 -425
  70. sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py +96 -102
  71. sempy_labs/migration/_migration_validation.py +2 -2
  72. sempy_labs/migration/_refresh_calc_tables.py +94 -100
  73. sempy_labs/report/_BPAReportTemplate.json +232 -0
  74. sempy_labs/report/__init__.py +6 -2
  75. sempy_labs/report/_bpareporttemplate/.pbi/localSettings.json +9 -0
  76. sempy_labs/report/_bpareporttemplate/.platform +11 -0
  77. sempy_labs/report/_bpareporttemplate/StaticResources/SharedResources/BaseThemes/CY24SU06.json +710 -0
  78. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/page.json +11 -0
  79. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/1b08bce3bebabb0a27a8/visual.json +191 -0
  80. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/2f22ddb70c301693c165/visual.json +438 -0
  81. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/3b1182230aa6c600b43a/visual.json +127 -0
  82. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/58577ba6380c69891500/visual.json +576 -0
  83. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/a2a8fa5028b3b776c96c/visual.json +207 -0
  84. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/adfd47ef30652707b987/visual.json +506 -0
  85. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/b6a80ee459e716e170b1/visual.json +127 -0
  86. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/ce3130a721c020cc3d81/visual.json +513 -0
  87. sempy_labs/report/_bpareporttemplate/definition/pages/92735ae19b31712208ad/page.json +8 -0
  88. sempy_labs/report/_bpareporttemplate/definition/pages/92735ae19b31712208ad/visuals/66e60dfb526437cd78d1/visual.json +112 -0
  89. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/page.json +11 -0
  90. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/07deb8bce824e1be37d7/visual.json +513 -0
  91. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/0b1c68838818b32ad03b/visual.json +352 -0
  92. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/0c171de9d2683d10b930/visual.json +37 -0
  93. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/0efa01be0510e40a645e/visual.json +542 -0
  94. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/6bf2f0eb830ab53cc668/visual.json +221 -0
  95. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/88d8141cb8500b60030c/visual.json +127 -0
  96. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/a753273590beed656a03/visual.json +576 -0
  97. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/b8fdc82cddd61ac447bc/visual.json +127 -0
  98. sempy_labs/report/_bpareporttemplate/definition/pages/d37dce724a0ccc30044b/page.json +9 -0
  99. sempy_labs/report/_bpareporttemplate/definition/pages/d37dce724a0ccc30044b/visuals/ce8532a7e25020271077/visual.json +38 -0
  100. sempy_labs/report/_bpareporttemplate/definition/pages/pages.json +10 -0
  101. sempy_labs/report/_bpareporttemplate/definition/report.json +176 -0
  102. sempy_labs/report/_bpareporttemplate/definition/version.json +4 -0
  103. sempy_labs/report/_bpareporttemplate/definition.pbir +14 -0
  104. sempy_labs/report/_generate_report.py +260 -139
  105. sempy_labs/report/_report_functions.py +90 -59
  106. sempy_labs/report/_report_rebind.py +40 -34
  107. sempy_labs/tom/__init__.py +1 -4
  108. sempy_labs/tom/_model.py +601 -181
  109. semantic_link_labs-0.5.0.dist-info/METADATA +0 -22
  110. semantic_link_labs-0.5.0.dist-info/RECORD +0 -53
  111. sempy_labs/directlake/_fallback.py +0 -58
  112. {semantic_link_labs-0.5.0.dist-info → semantic_link_labs-0.7.0.dist-info}/LICENSE +0 -0
  113. {semantic_link_labs-0.5.0.dist-info → semantic_link_labs-0.7.0.dist-info}/top_level.txt +0 -0
@@ -1,17 +1,18 @@
1
1
  import sempy
2
2
  import sempy.fabric as fabric
3
3
  from sempy_labs.tom import connect_semantic_model
4
- from sempy_labs._helper_functions import resolve_lakehouse_name
4
+ from sempy_labs._refresh_semantic_model import refresh_semantic_model
5
+ from sempy_labs.directlake._dl_helper import get_direct_lake_source
5
6
  from typing import List, Optional, Union
6
7
  import sempy_labs._icons as icons
7
8
 
9
+
8
10
  def update_direct_lake_partition_entity(
9
11
  dataset: str,
10
12
  table_name: Union[str, List[str]],
11
13
  entity_name: Union[str, List[str]],
12
14
  workspace: Optional[str] = None,
13
- lakehouse: Optional[str] = None,
14
- lakehouse_workspace: Optional[str] = None
15
+ **kwargs,
15
16
  ):
16
17
  """
17
18
  Remaps a table (or tables) in a Direct Lake semantic model to a table in a lakehouse.
@@ -28,23 +29,30 @@ def update_direct_lake_partition_entity(
28
29
  The Fabric workspace name in which the semantic model exists.
29
30
  Defaults to None which resolves to the workspace of the attached lakehouse
30
31
  or if no lakehouse attached, resolves to the workspace of the notebook.
31
- lakehouse : str, default=None
32
- The Fabric lakehouse used by the Direct Lake semantic model.
33
- Defaults to None which resolves to the lakehouse attached to the notebook.
34
- lakehouse_workspace : str, default=None
35
- The Fabric workspace used by the lakehouse.
36
- Defaults to None which resolves to the workspace of the attached lakehouse
37
- or if no lakehouse attached, resolves to the workspace of the notebook.
38
32
  """
39
33
 
34
+ if "lakehouse" in kwargs:
35
+ print(
36
+ "The 'lakehouse' parameter has been deprecated as it is no longer necessary. Please remove this parameter from the function going forward."
37
+ )
38
+ del kwargs["lakehouse"]
39
+ if "lakehouse_workspace" in kwargs:
40
+ print(
41
+ "The 'lakehouse_workspace' parameter has been deprecated as it is no longer necessary. Please remove this parameter from the function going forward."
42
+ )
43
+ del kwargs["lakehouse_workspace"]
44
+
40
45
  workspace = fabric.resolve_workspace_name(workspace)
41
46
 
42
- if lakehouse_workspace is None:
43
- lakehouse_workspace = workspace
47
+ artifact_type, lakehouse_name, lakehouse_id, lakehouse_workspace_id = (
48
+ get_direct_lake_source(dataset=dataset, workspace=workspace)
49
+ )
44
50
 
45
- if lakehouse is None:
46
- lakehouse_id = fabric.get_lakehouse_id()
47
- lakehouse = resolve_lakehouse_name(lakehouse_id, lakehouse_workspace)
51
+ if artifact_type == "Warehouse":
52
+ raise ValueError(
53
+ f"{icons.red_dot} This function is only valid for Direct Lake semantic models which source from lakehouses, not warehouses."
54
+ )
55
+ lakehouse_workspace = fabric.resolve_workspace_name(lakehouse_workspace_id)
48
56
 
49
57
  # Support both str & list types
50
58
  if isinstance(table_name, str):
@@ -53,22 +61,161 @@ def update_direct_lake_partition_entity(
53
61
  entity_name = [entity_name]
54
62
 
55
63
  if len(table_name) != len(entity_name):
56
- raise ValueError(f"{icons.red_dot} The 'table_name' and 'entity_name' arrays must be of equal length.")
64
+ raise ValueError(
65
+ f"{icons.red_dot} The 'table_name' and 'entity_name' arrays must be of equal length."
66
+ )
57
67
 
58
68
  with connect_semantic_model(
59
69
  dataset=dataset, readonly=False, workspace=workspace
60
70
  ) as tom:
61
71
 
62
72
  if not tom.is_direct_lake():
63
- raise ValueError(f"{icons.red_dot} The '{dataset}' semantic model within the '{workspace}' workspace is not in Direct Lake mode.")
73
+ raise ValueError(
74
+ f"{icons.red_dot} The '{dataset}' semantic model within the '{workspace}' workspace is not in Direct Lake mode."
75
+ )
64
76
 
65
77
  for tName in table_name:
66
78
  i = table_name.index(tName)
67
79
  eName = entity_name[i]
68
- try:
69
- tom.model.Tables[tName].Partitions[0].EntityName = eName
80
+ part_name = next(
81
+ p.Name
82
+ for t in tom.model.Tables
83
+ for p in t.Partitions
84
+ if t.Name == tName
85
+ )
86
+
87
+ if part_name is None:
88
+ raise ValueError(
89
+ f"{icons.red_dot} The '{tName}' table in the '{dataset}' semantic model has not been updated."
90
+ )
91
+ else:
92
+ tom.model.Tables[tName].Partitions[part_name].EntityName = eName
70
93
  print(
71
- f"{icons.green_dot} The '{tName}' table in the '{dataset}' semantic model has been updated to point to the '{eName}' table in the '{lakehouse}' lakehouse within the '{lakehouse_workspace}' workspace."
94
+ f"{icons.green_dot} The '{tName}' table in the '{dataset}' semantic model has been updated to point to the '{eName}' table "
95
+ f"in the '{lakehouse_name}' lakehouse within the '{lakehouse_workspace}' workspace."
72
96
  )
73
- except Exception as e:
74
- raise ValueError(f"{icons.red_dot} The '{tName}' table in the '{dataset}' semantic model has not been updated.") from e
97
+
98
+
99
+ def add_table_to_direct_lake_semantic_model(
100
+ dataset: str,
101
+ table_name: str,
102
+ lakehouse_table_name: str,
103
+ refresh: Optional[bool] = True,
104
+ workspace: Optional[str] = None,
105
+ ):
106
+ """
107
+ Adds a table and all of its columns to a Direct Lake semantic model, based on a Fabric lakehouse table.
108
+
109
+ Parameters
110
+ ----------
111
+ dataset : str
112
+ Name of the semantic model.
113
+ table_name : str, List[str]
114
+ Name of the table in the semantic model.
115
+ lakehouse_table_name : str
116
+ The name of the Fabric lakehouse table.
117
+ refresh : bool, default=True
118
+ Refreshes the table after it is added to the semantic model.
119
+ workspace : str, default=None
120
+ The name of the Fabric workspace in which the semantic model resides.
121
+ Defaults to None which resolves to the workspace of the attached lakehouse
122
+ or if no lakehouse attached, resolves to the workspace of the notebook.
123
+
124
+ Returns
125
+ -------
126
+ """
127
+
128
+ sempy.fabric._client._utils._init_analysis_services()
129
+ import Microsoft.AnalysisServices.Tabular as TOM
130
+ from sempy_labs.lakehouse._get_lakehouse_columns import get_lakehouse_columns
131
+ from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables
132
+
133
+ workspace = fabric.resolve_workspace_name(workspace)
134
+
135
+ artifact_type, lakehouse_name, lakehouse_id, lakehouse_workspace_id = (
136
+ get_direct_lake_source(dataset=dataset, workspace=workspace)
137
+ )
138
+
139
+ if artifact_type == "Warehouse":
140
+ raise ValueError(
141
+ f"{icons.red_dot} This function is only valid for Direct Lake semantic models which source from Fabric lakehouses (not warehouses)."
142
+ )
143
+
144
+ lakehouse_workspace = fabric.resolve_workspace_name(lakehouse_workspace_id)
145
+
146
+ with connect_semantic_model(
147
+ dataset=dataset, readonly=False, workspace=workspace
148
+ ) as tom:
149
+
150
+ table_count = tom.model.Tables.Count
151
+
152
+ if tom.is_direct_lake() is False and table_count > 0:
153
+ raise ValueError(
154
+ "This function is only valid for Direct Lake semantic models or semantic models with no tables."
155
+ )
156
+
157
+ if any(
158
+ p.Name == lakehouse_table_name
159
+ for p in tom.all_partitions()
160
+ if p.SourceType == TOM.PartitionSourceType.Entity
161
+ ):
162
+ t_name = next(
163
+ p.Parent.Name
164
+ for p in tom.all_partitions()
165
+ if p.Name
166
+ == lakehouse_table_name & p.SourceType
167
+ == TOM.PartitionSourceType.Entity
168
+ )
169
+ raise ValueError(
170
+ f"The '{lakehouse_table_name}' table already exists in the '{dataset}' semantic model within the '{workspace}' workspace as the '{t_name}' table."
171
+ )
172
+
173
+ if any(t.Name == table_name for t in tom.model.Tables):
174
+ raise ValueError(
175
+ f"The '{table_name}' table already exists in the '{dataset}' semantic model within the '{workspace}' workspace."
176
+ )
177
+
178
+ dfL = get_lakehouse_tables(
179
+ lakehouse=lakehouse_name, workspace=lakehouse_workspace
180
+ )
181
+ dfL_filt = dfL[dfL["Table Name"] == lakehouse_table_name]
182
+
183
+ if len(dfL_filt) == 0:
184
+ raise ValueError(
185
+ f"The '{lakehouse_table_name}' table does not exist in the '{lakehouse_name}' lakehouse within the '{lakehouse_workspace}' workspace."
186
+ )
187
+
188
+ dfLC = get_lakehouse_columns(
189
+ lakehouse=lakehouse_name, workspace=lakehouse_workspace
190
+ )
191
+ dfLC_filt = dfLC[dfLC["Table Name"] == lakehouse_table_name]
192
+
193
+ tom.add_table(name=table_name)
194
+ print(
195
+ f"{icons.green_dot} The '{table_name}' table has been added to the '{dataset}' semantic model within the '{workspace}' workspace."
196
+ )
197
+ tom.add_entity_partition(
198
+ table_name=table_name, entity_name=lakehouse_table_name
199
+ )
200
+ print(
201
+ f"{icons.green_dot} The '{lakehouse_table_name}' partition has been added to the '{table_name}' table in the '{dataset}' semantic model within the '{workspace}' workspace."
202
+ )
203
+
204
+ for i, r in dfLC_filt.iterrows():
205
+ lakeCName = r["Column Name"]
206
+ dType = r["Data Type"]
207
+ dt = icons.data_type_mapping.get(dType)
208
+ tom.add_data_column(
209
+ table_name=table_name,
210
+ column_name=lakeCName,
211
+ source_column=lakeCName,
212
+ data_type=dt,
213
+ )
214
+ print(
215
+ f"{icons.green_dot} The '{lakeCName}' column has been added to the '{table_name}' table as a '{dt}' data type in the '{dataset}' semantic model within the '{workspace}' workspace."
216
+ )
217
+
218
+ if refresh:
219
+ refresh_semantic_model(
220
+ dataset=dataset, tables=table_name, workspace=workspace
221
+ )
@@ -1,4 +1,3 @@
1
- import sempy
2
1
  import sempy.fabric as fabric
3
2
  import pandas as pd
4
3
  from tqdm.auto import tqdm
@@ -18,7 +17,7 @@ def warm_direct_lake_cache_perspective(
18
17
  perspective: str,
19
18
  add_dependencies: Optional[bool] = False,
20
19
  workspace: Optional[str] = None,
21
- ):
20
+ ) -> pd.DataFrame:
22
21
  """
23
22
  Warms the cache of a Direct Lake semantic model by running a simple DAX query against the columns in a perspective.
24
23
 
@@ -45,7 +44,9 @@ def warm_direct_lake_cache_perspective(
45
44
 
46
45
  dfP = fabric.list_partitions(dataset=dataset, workspace=workspace)
47
46
  if not any(r["Mode"] == "DirectLake" for i, r in dfP.iterrows()):
48
- raise ValueError(f"{icons.red_dot} The '{dataset}' semantic model in the '{workspace}' workspace is not in Direct Lake mode. This function is specifically for semantic models in Direct Lake mode.")
47
+ raise ValueError(
48
+ f"{icons.red_dot} The '{dataset}' semantic model in the '{workspace}' workspace is not in Direct Lake mode. This function is specifically for semantic models in Direct Lake mode."
49
+ )
49
50
 
50
51
  dfPersp = fabric.list_perspectives(dataset=dataset, workspace=workspace)
51
52
  dfPersp["DAX Object Name"] = format_dax_object_name(
@@ -54,7 +55,9 @@ def warm_direct_lake_cache_perspective(
54
55
  dfPersp_filt = dfPersp[dfPersp["Perspective Name"] == perspective]
55
56
 
56
57
  if len(dfPersp_filt) == 0:
57
- raise ValueError(f"{icons.red_dot} The '{perspective} perspective does not exist or contains no objects within the '{dataset}' semantic model in the '{workspace}' workspace.")
58
+ raise ValueError(
59
+ f"{icons.red_dot} The '{perspective} perspective does not exist or contains no objects within the '{dataset}' semantic model in the '{workspace}' workspace."
60
+ )
58
61
 
59
62
  dfPersp_c = dfPersp_filt[dfPersp_filt["Object Type"] == "Column"]
60
63
 
@@ -1,4 +1,3 @@
1
- import sempy
2
1
  import sempy.fabric as fabric
3
2
  import pandas as pd
4
3
  from pyspark.sql import SparkSession
@@ -10,6 +9,7 @@ from sempy_labs._helper_functions import (
10
9
  from typing import Optional
11
10
  from sempy._utils._log import log
12
11
 
12
+
13
13
  @log
14
14
  def get_lakehouse_columns(
15
15
  lakehouse: Optional[str] = None, workspace: Optional[str] = None
@@ -1,4 +1,3 @@
1
- import sempy
2
1
  import sempy.fabric as fabric
3
2
  import pandas as pd
4
3
  from pyspark.sql import SparkSession
@@ -17,6 +16,8 @@ from sempy_labs.lakehouse._lakehouse import lakehouse_attached
17
16
  from typing import Optional
18
17
  import sempy_labs._icons as icons
19
18
  from sempy._utils._log import log
19
+ from sempy.fabric.exceptions import FabricHTTPException
20
+
20
21
 
21
22
  @log
22
23
  def get_lakehouse_tables(
@@ -51,6 +52,8 @@ def get_lakehouse_tables(
51
52
  Shows the tables/columns within a lakehouse and their properties.
52
53
  """
53
54
 
55
+ from sempy_labs._helper_functions import pagination
56
+
54
57
  df = pd.DataFrame(
55
58
  columns=[
56
59
  "Workspace Name",
@@ -73,34 +76,52 @@ def get_lakehouse_tables(
73
76
  if count_rows: # Setting countrows defaults to extended=True
74
77
  extended = True
75
78
 
79
+ if (
80
+ workspace_id != fabric.get_workspace_id()
81
+ and lakehouse_id != fabric.get_lakehouse_id()
82
+ and count_rows
83
+ ):
84
+ raise ValueError(
85
+ f"{icons.red_dot} If 'count_rows' is set to True, you must run this function against the default lakehouse attached to the notebook. "
86
+ "Count rows runs a spark query and cross-workspace spark queries are currently not supported."
87
+ )
88
+
76
89
  client = fabric.FabricRestClient()
77
90
  response = client.get(
78
91
  f"/v1/workspaces/{workspace_id}/lakehouses/{lakehouse_id}/tables"
79
92
  )
80
93
 
81
- for i in response.json()["data"]:
82
- tName = i["name"]
83
- tType = i["type"]
84
- tFormat = i["format"]
85
- tLocation = i["location"]
86
- if not extended:
94
+ if response.status_code != 200:
95
+ raise FabricHTTPException(response)
96
+
97
+ responses = pagination(client, response)
98
+
99
+ dfs = []
100
+ for r in responses:
101
+ for i in r.get("data", []):
87
102
  new_data = {
88
103
  "Workspace Name": workspace,
89
104
  "Lakehouse Name": lakehouse,
90
- "Table Name": tName,
91
- "Format": tFormat,
92
- "Type": tType,
93
- "Location": tLocation,
105
+ "Table Name": i.get("name"),
106
+ "Format": i.get("format"),
107
+ "Type": i.get("type"),
108
+ "Location": i.get("location"),
94
109
  }
95
- df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)
96
- else:
97
- sku_value = get_sku_size(workspace)
98
- guardrail = get_directlake_guardrails_for_sku(sku_value)
99
-
100
- spark = SparkSession.builder.getOrCreate()
110
+ dfs.append(pd.DataFrame(new_data, index=[0]))
111
+ df = pd.concat(dfs, ignore_index=True)
101
112
 
102
- intColumns = ["Files", "Row Groups", "Table Size"]
103
- if tType == "Managed" and tFormat == "delta":
113
+ if extended:
114
+ sku_value = get_sku_size(workspace)
115
+ guardrail = get_directlake_guardrails_for_sku(sku_value)
116
+ spark = SparkSession.builder.getOrCreate()
117
+ df["Files"] = None
118
+ df["Row Groups"] = None
119
+ df["Table Size"] = None
120
+ if count_rows:
121
+ df["Row Count"] = None
122
+ for i, r in df.iterrows():
123
+ tName = r["Table Name"]
124
+ if r["Type"] == "Managed" and r["Format"] == "delta":
104
125
  detail_df = spark.sql(f"DESCRIBE DETAIL `{tName}`").collect()[0]
105
126
  num_files = detail_df.numFiles
106
127
  size_in_bytes = detail_df.sizeInBytes
@@ -120,62 +141,35 @@ def get_lakehouse_tables(
120
141
  ).num_row_groups
121
142
  except FileNotFoundError:
122
143
  continue
144
+ df.at[i, "Files"] = num_files
145
+ df.at[i, "Row Groups"] = num_rowgroups
146
+ df.at[i, "Table Size"] = size_in_bytes
147
+ if count_rows:
148
+ num_rows = spark.table(tName).count()
149
+ df.at[i, "Row Count"] = num_rows
150
+
151
+ if extended:
152
+ intColumns = ["Files", "Row Groups", "Table Size"]
153
+ df[intColumns] = df[intColumns].astype(int)
154
+ df["SKU"] = guardrail["Fabric SKUs"].iloc[0]
155
+ df["Parquet File Guardrail"] = guardrail["Parquet files per table"].iloc[0]
156
+ df["Row Group Guardrail"] = guardrail["Row groups per table"].iloc[0]
157
+ df["Row Count Guardrail"] = (
158
+ guardrail["Rows per table (millions)"].iloc[0] * 1000000
159
+ )
123
160
 
124
- if count_rows:
125
- num_rows = spark.table(tName).count()
126
- intColumns.append("Row Count")
127
- new_data = {
128
- "Workspace Name": workspace,
129
- "Lakehouse Name": lakehouse,
130
- "Table Name": tName,
131
- "Format": tFormat,
132
- "Type": tType,
133
- "Location": tLocation,
134
- "Files": num_files,
135
- "Row Groups": num_rowgroups,
136
- "Row Count": num_rows,
137
- "Table Size": size_in_bytes,
138
- }
139
- else:
140
- new_data = {
141
- "Workspace Name": workspace,
142
- "Lakehouse Name": lakehouse,
143
- "Table Name": tName,
144
- "Format": tFormat,
145
- "Type": tType,
146
- "Location": tLocation,
147
- "Files": num_files,
148
- "Row Groups": num_rowgroups,
149
- "Table Size": size_in_bytes,
150
- }
151
-
152
- df = pd.concat(
153
- [df, pd.DataFrame(new_data, index=[0])], ignore_index=True
154
- )
155
- df[intColumns] = df[intColumns].astype(int)
156
-
157
- df["SKU"] = guardrail["Fabric SKUs"].iloc[0]
158
- df["Parquet File Guardrail"] = guardrail["Parquet files per table"].iloc[0]
159
- df["Row Group Guardrail"] = guardrail["Row groups per table"].iloc[0]
160
- df["Row Count Guardrail"] = (
161
- guardrail["Rows per table (millions)"].iloc[0] * 1000000
162
- )
161
+ df["Parquet File Guardrail Hit"] = df["Files"] > df["Parquet File Guardrail"]
162
+ df["Row Group Guardrail Hit"] = df["Row Groups"] > df["Row Group Guardrail"]
163
+ if count_rows:
164
+ df["Row Count"] = df["Row Count"].astype(int)
165
+ df["Row Count Guardrail Hit"] = df["Row Count"] > df["Row Count Guardrail"]
163
166
 
164
- df["Parquet File Guardrail Hit"] = (
165
- df["Files"] > df["Parquet File Guardrail"]
167
+ if export:
168
+ if not lakehouse_attached():
169
+ raise ValueError(
170
+ f"{icons.red_dot} In order to save the report.json file, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook."
166
171
  )
167
- df["Row Group Guardrail Hit"] = df["Row Groups"] > df["Row Group Guardrail"]
168
172
 
169
- if count_rows:
170
- df["Row Count Guardrail Hit"] = (
171
- df["Row Count"] > df["Row Count Guardrail"]
172
- )
173
-
174
- if export:
175
- lakeAttach = lakehouse_attached()
176
- if lakeAttach is False:
177
- raise ValueError(f"{icons.red_dot} In order to save the report.json file, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook.")
178
-
179
173
  spark = SparkSession.builder.getOrCreate()
180
174
 
181
175
  lakehouse_id = fabric.get_lakehouse_id()
@@ -1,4 +1,3 @@
1
- import sempy
2
1
  import sempy.fabric as fabric
3
2
  from tqdm.auto import tqdm
4
3
  from pyspark.sql import SparkSession
@@ -7,6 +6,7 @@ from typing import List, Optional, Union
7
6
  import sempy_labs._icons as icons
8
7
  from sempy._utils._log import log
9
8
 
9
+
10
10
  def lakehouse_attached() -> bool:
11
11
  """
12
12
  Identifies if a lakehouse is attached to the notebook.
@@ -25,6 +25,7 @@ def lakehouse_attached() -> bool:
25
25
  else:
26
26
  return False
27
27
 
28
+
28
29
  @log
29
30
  def optimize_lakehouse_tables(
30
31
  tables: Optional[Union[str, List[str]]] = None,
@@ -36,8 +37,9 @@ def optimize_lakehouse_tables(
36
37
 
37
38
  Parameters
38
39
  ----------
39
- tables : str | List[str] | None
40
- The table(s) to optimize. If no tables are specified, all tables in the lakehouse will be optimized.
40
+ tables : str | List[str], default=None
41
+ The table(s) to optimize.
42
+ Defaults to None which resovles to optimizing all tables within the lakehouse.
41
43
  lakehouse : str, default=None
42
44
  The Fabric lakehouse.
43
45
  Defaults to None which resolves to the lakehouse attached to the notebook.
@@ -1,4 +1,3 @@
1
- import sempy
2
1
  import sempy.fabric as fabric
3
2
  from sempy_labs._helper_functions import (
4
3
  resolve_lakehouse_name,
@@ -7,6 +6,7 @@ from sempy_labs._helper_functions import (
7
6
  )
8
7
  from typing import Optional
9
8
  import sempy_labs._icons as icons
9
+ from sempy.fabric.exceptions import FabricHTTPException
10
10
 
11
11
 
12
12
  def create_shortcut_onelake(
@@ -53,7 +53,7 @@ def create_shortcut_onelake(
53
53
  shortcut_name = table_name
54
54
 
55
55
  client = fabric.FabricRestClient()
56
- tablePath = "Tables/" + table_name
56
+ tablePath = f"Tables/{table_name}"
57
57
 
58
58
  request_body = {
59
59
  "path": "Tables",
@@ -74,12 +74,15 @@ def create_shortcut_onelake(
74
74
  )
75
75
  if response.status_code == 201:
76
76
  print(
77
- f"{icons.green_dot} The shortcut '{shortcut_name}' was created in the '{destination_lakehouse}' lakehouse within the '{destination_workspace} workspace. It is based on the '{table_name}' table in the '{source_lakehouse}' lakehouse within the '{source_workspace}' workspace."
77
+ f"{icons.green_dot} The shortcut '{shortcut_name}' was created in the '{destination_lakehouse}' lakehouse within"
78
+ f" the '{destination_workspace} workspace. It is based on the '{table_name}' table in the '{source_lakehouse}' lakehouse within the '{source_workspace}' workspace."
78
79
  )
79
80
  else:
80
81
  print(response.status_code)
81
82
  except Exception as e:
82
- raise ValueError(f"{icons.red_dot} Failed to create a shortcut for the '{table_name}' table.") from e
83
+ raise ValueError(
84
+ f"{icons.red_dot} Failed to create a shortcut for the '{table_name}' table."
85
+ ) from e
83
86
 
84
87
 
85
88
  def create_shortcut(
@@ -114,7 +117,9 @@ def create_shortcut(
114
117
  sourceValues = list(source_titles.keys())
115
118
 
116
119
  if source not in sourceValues:
117
- raise ValueError(f"{icons.red_dot} The 'source' parameter must be one of these values: {sourceValues}.")
120
+ raise ValueError(
121
+ f"{icons.red_dot} The 'source' parameter must be one of these values: {sourceValues}."
122
+ )
118
123
 
119
124
  sourceTitle = source_titles[source]
120
125
 
@@ -147,12 +152,15 @@ def create_shortcut(
147
152
  )
148
153
  if response.status_code == 201:
149
154
  print(
150
- f"{icons.green_dot} The shortcut '{shortcutActualName}' was created in the '{lakehouse}' lakehouse within the '{workspace} workspace. It is based on the '{subpath}' table in '{sourceTitle}'."
155
+ f"{icons.green_dot} The shortcut '{shortcutActualName}' was created in the '{lakehouse}' lakehouse within"
156
+ f" the '{workspace} workspace. It is based on the '{subpath}' table in '{sourceTitle}'."
151
157
  )
152
158
  else:
153
159
  print(response.status_code)
154
160
  except Exception as e:
155
- raise ValueError(f"{icons.red_dot} Failed to create a shortcut for the '{shortcut_name}' table.") from e
161
+ raise ValueError(
162
+ f"{icons.red_dot} Failed to create a shortcut for the '{shortcut_name}' table."
163
+ ) from e
156
164
 
157
165
 
158
166
  def delete_shortcut(
@@ -187,9 +195,8 @@ def delete_shortcut(
187
195
  f"/v1/workspaces/{workspace_id}/items/{lakehouse_id}/shortcuts/Tables/{shortcut_name}"
188
196
  )
189
197
 
190
- if response.status_code == 200:
191
- print(
192
- f"{icons.green_dot} The '{shortcut_name}' shortcut in the '{lakehouse}' within the '{workspace}' workspace has been deleted."
193
- )
194
- else:
195
- raise ValueError(f"{icons.red_dot} The '{shortcut_name}' has not been deleted.")
198
+ if response.status_code != 200:
199
+ raise FabricHTTPException(response)
200
+ print(
201
+ f"{icons.green_dot} The '{shortcut_name}' shortcut in the '{lakehouse}' within the '{workspace}' workspace has been deleted."
202
+ )
@@ -27,5 +27,5 @@ __all__ = [
27
27
  "migrate_model_objects_to_semantic_model",
28
28
  "migrate_tables_columns_to_semantic_model",
29
29
  "migration_validation",
30
- "refresh_calc_tables"
30
+ "refresh_calc_tables",
31
31
  ]