semantic-link-labs 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of semantic-link-labs might be problematic. Click here for more details.

Files changed (52) hide show
  1. semantic_link_labs-0.4.1.dist-info/LICENSE +21 -0
  2. semantic_link_labs-0.4.1.dist-info/METADATA +22 -0
  3. semantic_link_labs-0.4.1.dist-info/RECORD +52 -0
  4. semantic_link_labs-0.4.1.dist-info/WHEEL +5 -0
  5. semantic_link_labs-0.4.1.dist-info/top_level.txt +1 -0
  6. sempy_labs/__init__.py +154 -0
  7. sempy_labs/_ai.py +496 -0
  8. sempy_labs/_clear_cache.py +39 -0
  9. sempy_labs/_connections.py +234 -0
  10. sempy_labs/_dax.py +70 -0
  11. sempy_labs/_generate_semantic_model.py +280 -0
  12. sempy_labs/_helper_functions.py +506 -0
  13. sempy_labs/_icons.py +4 -0
  14. sempy_labs/_list_functions.py +1372 -0
  15. sempy_labs/_model_auto_build.py +143 -0
  16. sempy_labs/_model_bpa.py +1354 -0
  17. sempy_labs/_model_dependencies.py +341 -0
  18. sempy_labs/_one_lake_integration.py +155 -0
  19. sempy_labs/_query_scale_out.py +447 -0
  20. sempy_labs/_refresh_semantic_model.py +184 -0
  21. sempy_labs/_tom.py +3766 -0
  22. sempy_labs/_translations.py +378 -0
  23. sempy_labs/_vertipaq.py +893 -0
  24. sempy_labs/directlake/__init__.py +45 -0
  25. sempy_labs/directlake/_directlake_schema_compare.py +110 -0
  26. sempy_labs/directlake/_directlake_schema_sync.py +128 -0
  27. sempy_labs/directlake/_fallback.py +62 -0
  28. sempy_labs/directlake/_get_directlake_lakehouse.py +69 -0
  29. sempy_labs/directlake/_get_shared_expression.py +59 -0
  30. sempy_labs/directlake/_guardrails.py +84 -0
  31. sempy_labs/directlake/_list_directlake_model_calc_tables.py +54 -0
  32. sempy_labs/directlake/_show_unsupported_directlake_objects.py +89 -0
  33. sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py +81 -0
  34. sempy_labs/directlake/_update_directlake_partition_entity.py +64 -0
  35. sempy_labs/directlake/_warm_cache.py +210 -0
  36. sempy_labs/lakehouse/__init__.py +24 -0
  37. sempy_labs/lakehouse/_get_lakehouse_columns.py +81 -0
  38. sempy_labs/lakehouse/_get_lakehouse_tables.py +250 -0
  39. sempy_labs/lakehouse/_lakehouse.py +85 -0
  40. sempy_labs/lakehouse/_shortcuts.py +296 -0
  41. sempy_labs/migration/__init__.py +29 -0
  42. sempy_labs/migration/_create_pqt_file.py +239 -0
  43. sempy_labs/migration/_migrate_calctables_to_lakehouse.py +429 -0
  44. sempy_labs/migration/_migrate_calctables_to_semantic_model.py +150 -0
  45. sempy_labs/migration/_migrate_model_objects_to_semantic_model.py +524 -0
  46. sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py +165 -0
  47. sempy_labs/migration/_migration_validation.py +227 -0
  48. sempy_labs/migration/_refresh_calc_tables.py +129 -0
  49. sempy_labs/report/__init__.py +35 -0
  50. sempy_labs/report/_generate_report.py +253 -0
  51. sempy_labs/report/_report_functions.py +855 -0
  52. sempy_labs/report/_report_rebind.py +131 -0
@@ -0,0 +1,89 @@
1
+ import sempy
2
+ import sempy.fabric as fabric
3
+ import pandas as pd
4
+ from sempy_labs._list_functions import list_tables
5
+ from sempy_labs._helper_functions import format_dax_object_name
6
+ from typing import Optional, Tuple
7
+
8
+
9
+ def show_unsupported_direct_lake_objects(
10
+ dataset: str, workspace: Optional[str] = None
11
+ ) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
12
+ """
13
+ Returns a list of a semantic model's objects which are not supported by Direct Lake based on `official documentation <https://learn.microsoft.com/power-bi/enterprise/directlake-overview#known-issues-and-limitations>`_.
14
+
15
+ Parameters
16
+ ----------
17
+ dataset : str
18
+ Name of the semantic model.
19
+ workspace : str, default=None
20
+ The Fabric workspace name.
21
+ Defaults to None which resolves to the workspace of the attached lakehouse
22
+ or if no lakehouse attached, resolves to the workspace of the notebook.
23
+
24
+ Returns
25
+ -------
26
+ pandas.DataFrame, pandas.DataFrame, pandas.DataFrame
27
+ 3 pandas dataframes showing objects in a semantic model which are not supported by Direct Lake.
28
+ """
29
+
30
+ pd.options.mode.chained_assignment = None
31
+
32
+ if workspace == None:
33
+ workspace_id = fabric.get_workspace_id()
34
+ workspace = fabric.resolve_workspace_name(workspace_id)
35
+
36
+ dfT = list_tables(dataset, workspace)
37
+ dfC = fabric.list_columns(dataset=dataset, workspace=workspace)
38
+ dfR = fabric.list_relationships(dataset=dataset, workspace=workspace)
39
+
40
+ # Calc tables
41
+ dfT_filt = dfT[dfT["Type"] == "Calculated Table"]
42
+ dfT_filt.rename(columns={"Name": "Table Name"}, inplace=True)
43
+ t = dfT_filt[["Table Name", "Type"]]
44
+
45
+ # Calc columns
46
+ dfC_filt = dfC[(dfC["Type"] == "Calculated") | (dfC["Data Type"] == "Binary")]
47
+ c = dfC_filt[["Table Name", "Column Name", "Type", "Data Type", "Source"]]
48
+
49
+ # Relationships
50
+ dfC["Column Object"] = format_dax_object_name(dfC["Table Name"], dfC["Column Name"])
51
+ dfR["From Object"] = format_dax_object_name(dfR["From Table"], dfR["From Column"])
52
+ dfR["To Object"] = format_dax_object_name(dfR["To Table"], dfR["To Column"])
53
+ merged_from = pd.merge(
54
+ dfR, dfC, left_on="From Object", right_on="Column Object", how="left"
55
+ )
56
+ merged_to = pd.merge(
57
+ dfR, dfC, left_on="To Object", right_on="Column Object", how="left"
58
+ )
59
+
60
+ dfR["From Column Data Type"] = merged_from["Data Type"]
61
+ dfR["To Column Data Type"] = merged_to["Data Type"]
62
+
63
+ dfR_filt = dfR[
64
+ (
65
+ (dfR["From Column Data Type"] == "DateTime")
66
+ | (dfR["To Column Data Type"] == "DateTime")
67
+ )
68
+ | (dfR["From Column Data Type"] != dfR["To Column Data Type"])
69
+ ]
70
+ r = dfR_filt[
71
+ [
72
+ "From Table",
73
+ "From Column",
74
+ "To Table",
75
+ "To Column",
76
+ "From Column Data Type",
77
+ "To Column Data Type",
78
+ ]
79
+ ]
80
+
81
+ # print('Calculated Tables are not supported...')
82
+ # display(t)
83
+ # print("Learn more about Direct Lake limitations here: https://learn.microsoft.com/power-bi/enterprise/directlake-overview#known-issues-and-limitations")
84
+ # print('Calculated columns are not supported. Columns of binary data type are not supported.')
85
+ # display(c)
86
+ # print('Columns used for relationship cannot be of data type datetime and they also must be of the same data type.')
87
+ # display(r)
88
+
89
+ return t, c, r
@@ -0,0 +1,81 @@
1
+ import sempy
2
+ import sempy.fabric as fabric
3
+ from sempy_labs.directlake._get_shared_expression import get_shared_expression
4
+ from sempy_labs._helper_functions import (
5
+ resolve_lakehouse_name,
6
+ resolve_workspace_name_and_id,
7
+ )
8
+ from sempy_labs._tom import connect_semantic_model
9
+ from typing import List, Optional, Union
10
+
11
+
12
+ def update_direct_lake_model_lakehouse_connection(
13
+ dataset: str,
14
+ workspace: Optional[str] = None,
15
+ lakehouse: Optional[str] = None,
16
+ lakehouse_workspace: Optional[str] = None,
17
+ ):
18
+ """
19
+ Remaps a Direct Lake semantic model's SQL Endpoint connection to a new lakehouse.
20
+
21
+ Parameters
22
+ ----------
23
+ dataset : str
24
+ Name of the semantic model.
25
+ workspace : str, default=None
26
+ The Fabric workspace name in which the semantic model exists.
27
+ Defaults to None which resolves to the workspace of the attached lakehouse
28
+ or if no lakehouse attached, resolves to the workspace of the notebook.
29
+ lakehouse : str, default=None
30
+ The Fabric lakehouse used by the Direct Lake semantic model.
31
+ Defaults to None which resolves to the lakehouse attached to the notebook.
32
+ lakehouse_workspace : str, default=None
33
+ The Fabric workspace used by the lakehouse.
34
+ Defaults to None which resolves to the workspace of the attached lakehouse
35
+ or if no lakehouse attached, resolves to the workspace of the notebook.
36
+
37
+ Returns
38
+ -------
39
+
40
+ """
41
+
42
+ (workspace, workspace_id) = resolve_workspace_name_and_id(workspace)
43
+
44
+ if lakehouse_workspace == None:
45
+ lakehouse_workspace = workspace
46
+
47
+ if lakehouse == None:
48
+ lakehouse_id = fabric.get_lakehouse_id()
49
+ lakehouse = resolve_lakehouse_name(lakehouse_id, lakehouse_workspace)
50
+
51
+ # Check if lakehouse is valid
52
+ dfI = fabric.list_items(workspace=lakehouse_workspace, type="Lakehouse")
53
+ dfI_filt = dfI[(dfI["Display Name"] == lakehouse)]
54
+
55
+ if len(dfI_filt) == 0:
56
+ print(
57
+ f"The '{lakehouse}' lakehouse does not exist within the '{lakehouse_workspace}' workspace. Therefore it cannot be used to support the '{dataset}' semantic model within the '{workspace}' workspace."
58
+ )
59
+
60
+ dfP = fabric.list_partitions(dataset=dataset, workspace=workspace)
61
+ dfP_filt = dfP[dfP["Mode"] == "DirectLake"]
62
+
63
+ if len(dfP_filt) == 0:
64
+ print(
65
+ f"The '{dataset}' semantic model is not in Direct Lake. This function is only applicable to Direct Lake semantic models."
66
+ )
67
+ else:
68
+ with connect_semantic_model(
69
+ dataset=dataset, readonly=False, workspace=workspace
70
+ ) as tom:
71
+
72
+ shEx = get_shared_expression(lakehouse, lakehouse_workspace)
73
+ try:
74
+ tom.model.Expressions["DatabaseQuery"].Expression = shEx
75
+ print(
76
+ f"The expression in the '{dataset}' semantic model has been updated to point to the '{lakehouse}' lakehouse in the '{lakehouse_workspace}' workspace."
77
+ )
78
+ except:
79
+ print(
80
+ f"ERROR: The expression in the '{dataset}' semantic model was not updated."
81
+ )
@@ -0,0 +1,64 @@
1
+ import sempy.fabric as fabric
2
+ from sempy_labs._tom import connect_semantic_model
3
+ from typing import List, Optional, Union
4
+
5
+
6
+ def update_direct_lake_partition_entity(
7
+ dataset: str,
8
+ table_name: Union[str, List[str]],
9
+ entity_name: Union[str, List[str]],
10
+ workspace: Optional[str] = None,
11
+ ):
12
+ """
13
+ Remaps a table (or tables) in a Direct Lake semantic model to a table in a lakehouse.
14
+
15
+ Parameters
16
+ ----------
17
+ dataset : str
18
+ Name of the semantic model.
19
+ table_name : str, List[str]
20
+ Name of the table(s) in the semantic model.
21
+ entity_name : str, List[str]
22
+ Name of the lakehouse table to be mapped to the semantic model table.
23
+ workspace : str, default=None
24
+ The Fabric workspace name in which the semantic model exists.
25
+ Defaults to None which resolves to the workspace of the attached lakehouse
26
+ or if no lakehouse attached, resolves to the workspace of the notebook.
27
+ """
28
+
29
+ workspace = fabric.resolve_workspace_name(workspace)
30
+
31
+ # Support both str & list types
32
+ if isinstance(table_name, str):
33
+ table_name = [table_name]
34
+ if isinstance(entity_name, str):
35
+ entity_name = [entity_name]
36
+
37
+ if len(table_name) != len(entity_name):
38
+ print(
39
+ f"ERROR: The 'table_name' and 'entity_name' arrays must be of equal length."
40
+ )
41
+ return
42
+
43
+ with connect_semantic_model(
44
+ dataset=dataset, readonly=False, workspace=workspace
45
+ ) as tom:
46
+
47
+ if not tom.is_direct_lake():
48
+ print(
49
+ f"The '{dataset}' semantic model within the '{workspace}' workspace is not in Direct Lake mode."
50
+ )
51
+ return
52
+
53
+ for tName in table_name:
54
+ i = table_name.index(tName)
55
+ eName = entity_name[i]
56
+ try:
57
+ tom.model.Tables[tName].Partitions[0].EntityName = eName
58
+ print(
59
+ f"The '{tName}' table in the '{dataset}' semantic model has been updated to point to the '{eName}' table in the '{lakehouse}' lakehouse within the '{lakehouse_workspace}' workspace."
60
+ )
61
+ except:
62
+ print(
63
+ f"ERROR: The '{tName}' table in the '{dataset}' semantic model has not been updated."
64
+ )
@@ -0,0 +1,210 @@
1
+ import sempy
2
+ import sempy.fabric as fabric
3
+ import pandas as pd
4
+ from tqdm.auto import tqdm
5
+ import numpy as np
6
+ import time
7
+ from sempy_labs._helper_functions import format_dax_object_name
8
+ from sempy_labs._refresh_semantic_model import refresh_semantic_model
9
+ from sempy_labs._model_dependencies import get_measure_dependencies
10
+ from typing import Optional
11
+ from sempy._utils._log import log
12
+ import sempy_labs._icons as icons
13
+
14
+
15
+ @log
16
+ def warm_direct_lake_cache_perspective(
17
+ dataset: str,
18
+ perspective: str,
19
+ add_dependencies: Optional[bool] = False,
20
+ workspace: Optional[str] = None,
21
+ ):
22
+ """
23
+ Warms the cache of a Direct Lake semantic model by running a simple DAX query against the columns in a perspective.
24
+
25
+ Parameters
26
+ ----------
27
+ dataset : str
28
+ Name of the semantic model.
29
+ perspective : str
30
+ Name of the perspective which contains objects to be used for warming the cache.
31
+ add_dependencies : bool, default=False
32
+ Includes object dependencies in the cache warming process.
33
+ workspace : str, default=None
34
+ The Fabric workspace name.
35
+ Defaults to None which resolves to the workspace of the attached lakehouse
36
+ or if no lakehouse attached, resolves to the workspace of the notebook.
37
+
38
+ Returns
39
+ -------
40
+ pandas.DataFrame
41
+ Returns a pandas dataframe showing the columns that have been put into memory.
42
+ """
43
+
44
+ workspace = fabric.resolve_workspace_name(workspace)
45
+
46
+ dfP = fabric.list_partitions(dataset=dataset, workspace=workspace)
47
+ if not any(r["Mode"] == "DirectLake" for i, r in dfP.iterrows()):
48
+ print(
49
+ f"{icons.red_dot} The '{dataset}' semantic model in the '{workspace}' workspace is not in Direct Lake mode. This function is specifically for semantic models in Direct Lake mode."
50
+ )
51
+ return
52
+
53
+ dfPersp = fabric.list_perspectives(dataset=dataset, workspace=workspace)
54
+ dfPersp["DAX Object Name"] = format_dax_object_name(
55
+ dfPersp["Table Name"], dfPersp["Object Name"]
56
+ )
57
+ dfPersp_filt = dfPersp[dfPersp["Perspective Name"] == perspective]
58
+
59
+ if len(dfPersp_filt) == 0:
60
+ print(
61
+ f"{icons.red_dot} The '{perspective} perspective does not exist or contains no objects within the '{dataset}' semantic model in the '{workspace}' workspace."
62
+ )
63
+ return
64
+ dfPersp_c = dfPersp_filt[dfPersp_filt["Object Type"] == "Column"]
65
+
66
+ column_values = dfPersp_c["DAX Object Name"].tolist()
67
+
68
+ if add_dependencies:
69
+ # Measure dependencies
70
+ md = get_measure_dependencies(dataset, workspace)
71
+ md["Referenced Full Object"] = format_dax_object_name(
72
+ md["Referenced Table"], md["Referenced Object"]
73
+ )
74
+ dfPersp_m = dfPersp_filt[(dfPersp_filt["Object Type"] == "Measure")]
75
+ md_filt = md[
76
+ (md["Object Name"].isin(dfPersp_m["Object Name"].values))
77
+ & (md["Referenced Object Type"] == "Column")
78
+ ]
79
+ measureDep = md_filt["Referenced Full Object"].unique()
80
+
81
+ # Hierarchy dependencies
82
+ dfPersp_h = dfPersp_filt[(dfPersp_filt["Object Type"] == "Hierarchy")]
83
+ dfH = fabric.list_hierarchies(dataset=dataset, workspace=workspace)
84
+ dfH["Hierarchy Object"] = format_dax_object_name(
85
+ dfH["Table Name"], dfH["Hierarchy Name"]
86
+ )
87
+ dfH["Column Object"] = format_dax_object_name(
88
+ dfH["Table Name"], dfH["Column Name"]
89
+ )
90
+ dfH_filt = dfH[
91
+ dfH["Hierarchy Object"].isin(dfPersp_h["DAX Object Name"].values)
92
+ ]
93
+ hierarchyDep = dfH_filt["Column Object"].unique()
94
+
95
+ # Relationship dependencies
96
+ unique_table_names = dfPersp_filt["Table Name"].unique()
97
+ dfR = fabric.list_relationships(dataset=dataset, workspace=workspace)
98
+ dfR["From Object"] = format_dax_object_name(
99
+ dfR["From Table"], dfR["From Column"]
100
+ )
101
+ dfR["To Object"] = format_dax_object_name(dfR["To Table"], dfR["To Column"])
102
+ filtered_dfR = dfR[
103
+ dfR["From Table"].isin(unique_table_names)
104
+ & dfR["To Table"].isin(unique_table_names)
105
+ ]
106
+
107
+ fromObjects = filtered_dfR["From Object"].unique()
108
+ toObjects = filtered_dfR["To Object"].unique()
109
+
110
+ merged_list = np.concatenate(
111
+ [column_values, measureDep, hierarchyDep, fromObjects, toObjects]
112
+ )
113
+ merged_list_unique = list(set(merged_list))
114
+
115
+ else:
116
+ merged_list_unique = column_values
117
+
118
+ df = pd.DataFrame(merged_list_unique, columns=["DAX Object Name"])
119
+ df[["Table Name", "Column Name"]] = df["DAX Object Name"].str.split(
120
+ "[", expand=True
121
+ )
122
+ df["Table Name"] = df["Table Name"].str[1:-1]
123
+ df["Column Name"] = df["Column Name"].str[0:-1]
124
+
125
+ tbls = list(set(value.split("[")[0] for value in merged_list_unique))
126
+
127
+ for tableName in (bar := tqdm(tbls)):
128
+ filtered_list = [
129
+ value for value in merged_list_unique if value.startswith(f"{tableName}[")
130
+ ]
131
+ bar.set_description(f"Warming the '{tableName}' table...")
132
+ css = ",".join(map(str, filtered_list))
133
+ dax = """EVALUATE TOPN(1,SUMMARIZECOLUMNS(""" + css + "))" ""
134
+ x = fabric.evaluate_dax(dataset=dataset, dax_string=dax, workspace=workspace)
135
+
136
+ print(f"{icons.green_dot} The following columns have been put into memory:")
137
+
138
+ new_column_order = ["Table Name", "Column Name", "DAX Object Name"]
139
+ df = df.reindex(columns=new_column_order)
140
+ df = df[["Table Name", "Column Name"]].sort_values(
141
+ by=["Table Name", "Column Name"], ascending=True
142
+ )
143
+
144
+ return df
145
+
146
+
147
+ @log
148
+ def warm_direct_lake_cache_isresident(
149
+ dataset: str, workspace: Optional[str] = None
150
+ ) -> pd.DataFrame:
151
+ """
152
+ Performs a refresh on the semantic model and puts the columns which were in memory prior to the refresh back into memory.
153
+
154
+ Parameters
155
+ ----------
156
+ dataset : str
157
+ Name of the semantic model.
158
+ workspace : str, default=None
159
+ The Fabric workspace name.
160
+ Defaults to None which resolves to the workspace of the attached lakehouse
161
+ or if no lakehouse attached, resolves to the workspace of the notebook.
162
+
163
+ Returns
164
+ -------
165
+ pandas.DataFrame
166
+ Returns a pandas dataframe showing the columns that have been put into memory.
167
+ """
168
+
169
+ dfP = fabric.list_partitions(dataset=dataset, workspace=workspace)
170
+ if not any(r["Mode"] == "DirectLake" for i, r in dfP.iterrows()):
171
+ print(
172
+ f"The '{dataset}' semantic model in the '{workspace}' workspace is not in Direct Lake mode. This function is specifically for semantic models in Direct Lake mode."
173
+ )
174
+ return
175
+
176
+ # Identify columns which are currently in memory (Is Resident = True)
177
+ dfC = fabric.list_columns(dataset=dataset, workspace=workspace, extended=True)
178
+ dfC["DAX Object Name"] = format_dax_object_name(
179
+ dfC["Table Name"], dfC["Column Name"]
180
+ )
181
+ dfC_filtered = dfC[dfC["Is Resident"]]
182
+
183
+ if len(dfC_filtered) == 0:
184
+ print(
185
+ f"{icons.yellow_dot} At present, no columns are in memory in the '{dataset}' semantic model in the '{workspace}' workspace."
186
+ )
187
+ return
188
+
189
+ # Refresh/frame dataset
190
+ refresh_semantic_model(dataset=dataset, refresh_type="full", workspace=workspace)
191
+
192
+ time.sleep(2)
193
+
194
+ tbls = dfC_filtered["Table Name"].unique()
195
+ column_values = dfC_filtered["DAX Object Name"].tolist()
196
+
197
+ # Run basic query to get columns into memory; completed one table at a time (so as not to overload the capacity)
198
+ for tableName in (bar := tqdm(tbls)):
199
+ bar.set_description(f"Warming the '{tableName}' table...")
200
+ css = ",".join(map(str, column_values))
201
+ dax = """EVALUATE TOPN(1,SUMMARIZECOLUMNS(""" + css + "))" ""
202
+ x = fabric.evaluate_dax(dataset=dataset, dax_string=dax, workspace=workspace)
203
+
204
+ print(
205
+ f"{icons.green_dot} The following columns have been put into memory. Temperature indicates the column temperature prior to the semantic model refresh."
206
+ )
207
+
208
+ return dfC_filtered[
209
+ ["Table Name", "Column Name", "Is Resident", "Temperature"]
210
+ ].sort_values(by=["Table Name", "Column Name"], ascending=True)
@@ -0,0 +1,24 @@
1
+ from sempy_labs.lakehouse._get_lakehouse_columns import get_lakehouse_columns
2
+ from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables
3
+ from sempy_labs.lakehouse._lakehouse import (
4
+ lakehouse_attached,
5
+ optimize_lakehouse_tables,
6
+ )
7
+
8
+ from sempy_labs.lakehouse._shortcuts import (
9
+ list_shortcuts,
10
+ # create_shortcut,
11
+ create_shortcut_onelake,
12
+ delete_shortcut,
13
+ )
14
+
15
+ __all__ = [
16
+ "get_lakehouse_columns",
17
+ "get_lakehouse_tables",
18
+ "lakehouse_attached",
19
+ "optimize_lakehouse_tables",
20
+ "list_shortcuts",
21
+ # create_shortcut,
22
+ "create_shortcut_onelake",
23
+ "delete_shortcut",
24
+ ]
@@ -0,0 +1,81 @@
1
+ import sempy
2
+ import sempy.fabric as fabric
3
+ import pandas as pd
4
+ from pyspark.sql import SparkSession
5
+ from sempy_labs._helper_functions import (
6
+ resolve_lakehouse_name,
7
+ format_dax_object_name,
8
+ resolve_lakehouse_id,
9
+ )
10
+ from typing import Optional
11
+
12
+
13
+ def get_lakehouse_columns(
14
+ lakehouse: Optional[str] = None, workspace: Optional[str] = None
15
+ ):
16
+ """
17
+ Shows the tables and columns of a lakehouse and their respective properties.
18
+
19
+ Parameters
20
+ ----------
21
+ lakehouse : str, default=None
22
+ The Fabric lakehouse.
23
+ Defaults to None which resolves to the lakehouse attached to the notebook.
24
+ lakehouse_workspace : str, default=None
25
+ The Fabric workspace used by the lakehouse.
26
+ Defaults to None which resolves to the workspace of the attached lakehouse
27
+ or if no lakehouse attached, resolves to the workspace of the notebook.
28
+
29
+ Returns
30
+ -------
31
+ pandas.DataFrame
32
+ Shows the tables/columns within a lakehouse and their properties.
33
+ """
34
+ from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables
35
+ from delta import DeltaTable
36
+
37
+ df = pd.DataFrame(
38
+ columns=[
39
+ "Workspace Name",
40
+ "Lakehouse Name",
41
+ "Table Name",
42
+ "Column Name",
43
+ "Full Column Name",
44
+ "Data Type",
45
+ ]
46
+ )
47
+
48
+ workspace = fabric.resolve_workspace_name(workspace)
49
+
50
+ if lakehouse == None:
51
+ lakehouse_id = fabric.get_lakehouse_id()
52
+ lakehouse = resolve_lakehouse_name(lakehouse_id, workspace)
53
+ else:
54
+ lakehouse_id = resolve_lakehouse_id(lakehouse, workspace)
55
+
56
+ spark = SparkSession.builder.getOrCreate()
57
+
58
+ tables = get_lakehouse_tables(
59
+ lakehouse=lakehouse, workspace=workspace, extended=False, count_rows=False
60
+ )
61
+ tables_filt = tables[tables["Format"] == "delta"]
62
+
63
+ for i, r in tables_filt.iterrows():
64
+ tName = r["Table Name"]
65
+ tPath = r["Location"]
66
+ delta_table = DeltaTable.forPath(spark, tPath)
67
+ sparkdf = delta_table.toDF()
68
+
69
+ for cName, data_type in sparkdf.dtypes:
70
+ tc = format_dax_object_name(tName, cName)
71
+ new_data = {
72
+ "Workspace Name": workspace,
73
+ "Lakehouse Name": lakehouse,
74
+ "Table Name": tName,
75
+ "Column Name": cName,
76
+ "Full Column Name": tc,
77
+ "Data Type": data_type,
78
+ }
79
+ df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)
80
+
81
+ return df