semantic-link-labs 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of semantic-link-labs might be problematic. Click here for more details.
- semantic_link_labs-0.4.1.dist-info/LICENSE +21 -0
- semantic_link_labs-0.4.1.dist-info/METADATA +22 -0
- semantic_link_labs-0.4.1.dist-info/RECORD +52 -0
- semantic_link_labs-0.4.1.dist-info/WHEEL +5 -0
- semantic_link_labs-0.4.1.dist-info/top_level.txt +1 -0
- sempy_labs/__init__.py +154 -0
- sempy_labs/_ai.py +496 -0
- sempy_labs/_clear_cache.py +39 -0
- sempy_labs/_connections.py +234 -0
- sempy_labs/_dax.py +70 -0
- sempy_labs/_generate_semantic_model.py +280 -0
- sempy_labs/_helper_functions.py +506 -0
- sempy_labs/_icons.py +4 -0
- sempy_labs/_list_functions.py +1372 -0
- sempy_labs/_model_auto_build.py +143 -0
- sempy_labs/_model_bpa.py +1354 -0
- sempy_labs/_model_dependencies.py +341 -0
- sempy_labs/_one_lake_integration.py +155 -0
- sempy_labs/_query_scale_out.py +447 -0
- sempy_labs/_refresh_semantic_model.py +184 -0
- sempy_labs/_tom.py +3766 -0
- sempy_labs/_translations.py +378 -0
- sempy_labs/_vertipaq.py +893 -0
- sempy_labs/directlake/__init__.py +45 -0
- sempy_labs/directlake/_directlake_schema_compare.py +110 -0
- sempy_labs/directlake/_directlake_schema_sync.py +128 -0
- sempy_labs/directlake/_fallback.py +62 -0
- sempy_labs/directlake/_get_directlake_lakehouse.py +69 -0
- sempy_labs/directlake/_get_shared_expression.py +59 -0
- sempy_labs/directlake/_guardrails.py +84 -0
- sempy_labs/directlake/_list_directlake_model_calc_tables.py +54 -0
- sempy_labs/directlake/_show_unsupported_directlake_objects.py +89 -0
- sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py +81 -0
- sempy_labs/directlake/_update_directlake_partition_entity.py +64 -0
- sempy_labs/directlake/_warm_cache.py +210 -0
- sempy_labs/lakehouse/__init__.py +24 -0
- sempy_labs/lakehouse/_get_lakehouse_columns.py +81 -0
- sempy_labs/lakehouse/_get_lakehouse_tables.py +250 -0
- sempy_labs/lakehouse/_lakehouse.py +85 -0
- sempy_labs/lakehouse/_shortcuts.py +296 -0
- sempy_labs/migration/__init__.py +29 -0
- sempy_labs/migration/_create_pqt_file.py +239 -0
- sempy_labs/migration/_migrate_calctables_to_lakehouse.py +429 -0
- sempy_labs/migration/_migrate_calctables_to_semantic_model.py +150 -0
- sempy_labs/migration/_migrate_model_objects_to_semantic_model.py +524 -0
- sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py +165 -0
- sempy_labs/migration/_migration_validation.py +227 -0
- sempy_labs/migration/_refresh_calc_tables.py +129 -0
- sempy_labs/report/__init__.py +35 -0
- sempy_labs/report/_generate_report.py +253 -0
- sempy_labs/report/_report_functions.py +855 -0
- sempy_labs/report/_report_rebind.py +131 -0
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
from sempy_labs.directlake._directlake_schema_compare import direct_lake_schema_compare
|
|
2
|
+
from sempy_labs.directlake._directlake_schema_sync import direct_lake_schema_sync
|
|
3
|
+
from sempy_labs.directlake._fallback import (
|
|
4
|
+
check_fallback_reason,
|
|
5
|
+
)
|
|
6
|
+
from sempy_labs.directlake._get_directlake_lakehouse import get_direct_lake_lakehouse
|
|
7
|
+
from sempy_labs.directlake._get_shared_expression import get_shared_expression
|
|
8
|
+
from sempy_labs.directlake._guardrails import (
|
|
9
|
+
get_direct_lake_guardrails,
|
|
10
|
+
get_sku_size,
|
|
11
|
+
get_directlake_guardrails_for_sku,
|
|
12
|
+
)
|
|
13
|
+
from sempy_labs.directlake._list_directlake_model_calc_tables import (
|
|
14
|
+
list_direct_lake_model_calc_tables,
|
|
15
|
+
)
|
|
16
|
+
from sempy_labs.directlake._show_unsupported_directlake_objects import (
|
|
17
|
+
show_unsupported_direct_lake_objects,
|
|
18
|
+
)
|
|
19
|
+
from sempy_labs.directlake._update_directlake_model_lakehouse_connection import (
|
|
20
|
+
update_direct_lake_model_lakehouse_connection,
|
|
21
|
+
)
|
|
22
|
+
from sempy_labs.directlake._update_directlake_partition_entity import (
|
|
23
|
+
update_direct_lake_partition_entity,
|
|
24
|
+
)
|
|
25
|
+
from sempy_labs.directlake._warm_cache import (
|
|
26
|
+
warm_direct_lake_cache_isresident,
|
|
27
|
+
warm_direct_lake_cache_perspective,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
__all__ = [
|
|
31
|
+
"direct_lake_schema_compare",
|
|
32
|
+
"direct_lake_schema_sync",
|
|
33
|
+
"check_fallback_reason",
|
|
34
|
+
"get_direct_lake_lakehouse",
|
|
35
|
+
"get_shared_expression",
|
|
36
|
+
"get_direct_lake_guardrails",
|
|
37
|
+
"get_sku_size",
|
|
38
|
+
"get_directlake_guardrails_for_sku",
|
|
39
|
+
"list_direct_lake_model_calc_tables",
|
|
40
|
+
"show_unsupported_direct_lake_objects",
|
|
41
|
+
"update_direct_lake_model_lakehouse_connection",
|
|
42
|
+
"update_direct_lake_partition_entity",
|
|
43
|
+
"warm_direct_lake_cache_isresident",
|
|
44
|
+
"warm_direct_lake_cache_perspective",
|
|
45
|
+
]
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
import sempy
|
|
2
|
+
import sempy.fabric as fabric
|
|
3
|
+
import pandas as pd
|
|
4
|
+
from sempy_labs._helper_functions import (
|
|
5
|
+
format_dax_object_name,
|
|
6
|
+
resolve_lakehouse_name,
|
|
7
|
+
get_direct_lake_sql_endpoint,
|
|
8
|
+
)
|
|
9
|
+
from IPython.display import display
|
|
10
|
+
from sempy_labs.lakehouse._get_lakehouse_columns import get_lakehouse_columns
|
|
11
|
+
from sempy_labs._list_functions import list_tables
|
|
12
|
+
from typing import Optional
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def direct_lake_schema_compare(
|
|
16
|
+
dataset: str,
|
|
17
|
+
workspace: Optional[str] = None,
|
|
18
|
+
lakehouse: Optional[str] = None,
|
|
19
|
+
lakehouse_workspace: Optional[str] = None,
|
|
20
|
+
):
|
|
21
|
+
"""
|
|
22
|
+
Checks that all the tables in a Direct Lake semantic model map to tables in their corresponding lakehouse and that the columns in each table exist.
|
|
23
|
+
|
|
24
|
+
Parameters
|
|
25
|
+
----------
|
|
26
|
+
dataset : str
|
|
27
|
+
Name of the semantic model.
|
|
28
|
+
workspace : str, default=None
|
|
29
|
+
The Fabric workspace name.
|
|
30
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
31
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
32
|
+
lakehouse : str, default=None
|
|
33
|
+
The Fabric lakehouse used by the Direct Lake semantic model.
|
|
34
|
+
Defaults to None which resolves to the lakehouse attached to the notebook.
|
|
35
|
+
lakehouse_workspace : str, default=None
|
|
36
|
+
The Fabric workspace used by the lakehouse.
|
|
37
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
38
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
if workspace == None:
|
|
42
|
+
workspace_id = fabric.get_workspace_id()
|
|
43
|
+
workspace = fabric.resolve_workspace_name(workspace_id)
|
|
44
|
+
|
|
45
|
+
if lakehouse_workspace is None:
|
|
46
|
+
lakehouse_workspace = workspace
|
|
47
|
+
|
|
48
|
+
if lakehouse == None:
|
|
49
|
+
lakehouse_id = fabric.get_lakehouse_id()
|
|
50
|
+
lakehouse = resolve_lakehouse_name(lakehouse_id, lakehouse_workspace)
|
|
51
|
+
|
|
52
|
+
dfP = fabric.list_partitions(dataset=dataset, workspace=workspace)
|
|
53
|
+
sqlEndpointId = get_direct_lake_sql_endpoint(dataset, workspace)
|
|
54
|
+
dfI = fabric.list_items(workspace=lakehouse_workspace, type="SQLEndpoint")
|
|
55
|
+
dfI_filt = dfI[(dfI["Id"] == sqlEndpointId)]
|
|
56
|
+
|
|
57
|
+
if len(dfI_filt) == 0:
|
|
58
|
+
print(
|
|
59
|
+
f"The SQL Endpoint in the '{dataset}' semantic model in the '{workspace} workspace does not point to the '{lakehouse}' lakehouse in the '{lakehouse_workspace}' workspace as specified."
|
|
60
|
+
)
|
|
61
|
+
return
|
|
62
|
+
|
|
63
|
+
if not any(r["Mode"] == "DirectLake" for i, r in dfP.iterrows()):
|
|
64
|
+
print(f"The '{dataset}' semantic model is not in Direct Lake mode.")
|
|
65
|
+
return
|
|
66
|
+
|
|
67
|
+
dfT = list_tables(dataset, workspace)
|
|
68
|
+
dfC = fabric.list_columns(dataset=dataset, workspace=workspace)
|
|
69
|
+
lc = get_lakehouse_columns(lakehouse, lakehouse_workspace)
|
|
70
|
+
|
|
71
|
+
dfT.rename(columns={"Type": "Table Type"}, inplace=True)
|
|
72
|
+
dfP_filt = dfP[dfP["Mode"] == "DirectLake"]
|
|
73
|
+
dfC = pd.merge(dfC, dfP[["Table Name", "Query"]], on="Table Name", how="inner")
|
|
74
|
+
dfC = pd.merge(
|
|
75
|
+
dfC,
|
|
76
|
+
dfT[["Name", "Table Type"]],
|
|
77
|
+
left_on="Table Name",
|
|
78
|
+
right_on="Name",
|
|
79
|
+
how="inner",
|
|
80
|
+
)
|
|
81
|
+
dfC["Full Column Name"] = format_dax_object_name(dfC["Query"], dfC["Source"])
|
|
82
|
+
dfC_filt = dfC[dfC["Table Type"] == "Table"]
|
|
83
|
+
# Schema compare
|
|
84
|
+
missingtbls = dfP_filt[~dfP_filt["Query"].isin(lc["Table Name"])]
|
|
85
|
+
missingtbls = missingtbls[["Table Name", "Query"]]
|
|
86
|
+
missingtbls.rename(columns={"Query": "Source Table"}, inplace=True)
|
|
87
|
+
missingcols = dfC_filt[~dfC_filt["Full Column Name"].isin(lc["Full Column Name"])]
|
|
88
|
+
missingcols = missingcols[
|
|
89
|
+
["Table Name", "Column Name", "Type", "Data Type", "Source"]
|
|
90
|
+
]
|
|
91
|
+
missingcols.rename(columns={"Source": "Source Column"}, inplace=True)
|
|
92
|
+
|
|
93
|
+
if len(missingtbls) == 0:
|
|
94
|
+
print(
|
|
95
|
+
f"All tables exist in the '{lakehouse}' lakehouse within the '{lakehouse_workspace}' workspace."
|
|
96
|
+
)
|
|
97
|
+
else:
|
|
98
|
+
print(
|
|
99
|
+
f"The following tables exist in the '{dataset}' semantic model within the '{workspace}' workspace but do not exist in the '{lakehouse}' lakehouse within the '{lakehouse_workspace}' workspace."
|
|
100
|
+
)
|
|
101
|
+
display(missingtbls)
|
|
102
|
+
if len(missingcols) == 0:
|
|
103
|
+
print(
|
|
104
|
+
f"All columns exist in the '{lakehouse}' lakehouse within the '{lakehouse_workspace}' workspace."
|
|
105
|
+
)
|
|
106
|
+
else:
|
|
107
|
+
print(
|
|
108
|
+
f"The following columns exist in the '{dataset}' semantic model within the '{workspace}' workspace but do not exist in the '{lakehouse}' lakehouse within the '{lakehouse_workspace}' workspace."
|
|
109
|
+
)
|
|
110
|
+
display(missingcols)
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
import sempy
|
|
2
|
+
import sempy.fabric as fabric
|
|
3
|
+
import pandas as pd
|
|
4
|
+
from sempy_labs.lakehouse._get_lakehouse_columns import get_lakehouse_columns
|
|
5
|
+
from sempy_labs._helper_functions import (
|
|
6
|
+
format_dax_object_name,
|
|
7
|
+
resolve_lakehouse_name,
|
|
8
|
+
get_direct_lake_sql_endpoint,
|
|
9
|
+
)
|
|
10
|
+
from typing import Optional
|
|
11
|
+
from sempy._utils._log import log
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@log
|
|
15
|
+
def direct_lake_schema_sync(
|
|
16
|
+
dataset: str,
|
|
17
|
+
workspace: Optional[str] = None,
|
|
18
|
+
add_to_model: Optional[bool] = False,
|
|
19
|
+
lakehouse: Optional[str] = None,
|
|
20
|
+
lakehouse_workspace: Optional[str] = None,
|
|
21
|
+
):
|
|
22
|
+
"""
|
|
23
|
+
Shows/adds columns which exist in the lakehouse but do not exist in the semantic model (only for tables in the semantic model).
|
|
24
|
+
|
|
25
|
+
Parameters
|
|
26
|
+
----------
|
|
27
|
+
dataset : str
|
|
28
|
+
Name of the semantic model.
|
|
29
|
+
workspace : str, default=None
|
|
30
|
+
The Fabric workspace name.
|
|
31
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
32
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
33
|
+
add_to_model : bool, default=False
|
|
34
|
+
If set to True, columns which exist in the lakehouse but do not exist in the semantic model are added to the semantic model. No new tables are added.
|
|
35
|
+
lakehouse : str, default=None
|
|
36
|
+
The Fabric lakehouse used by the Direct Lake semantic model.
|
|
37
|
+
Defaults to None which resolves to the lakehouse attached to the notebook.
|
|
38
|
+
lakehouse_workspace : str, default=None
|
|
39
|
+
The Fabric workspace used by the lakehouse.
|
|
40
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
41
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
sempy.fabric._client._utils._init_analysis_services()
|
|
45
|
+
import Microsoft.AnalysisServices.Tabular as TOM
|
|
46
|
+
import System
|
|
47
|
+
|
|
48
|
+
if workspace == None:
|
|
49
|
+
workspace_id = fabric.get_workspace_id()
|
|
50
|
+
workspace = fabric.resolve_workspace_name(workspace_id)
|
|
51
|
+
|
|
52
|
+
if lakehouse_workspace is None:
|
|
53
|
+
lakehouse_workspace = workspace
|
|
54
|
+
|
|
55
|
+
if lakehouse == None:
|
|
56
|
+
lakehouse_id = fabric.get_lakehouse_id()
|
|
57
|
+
lakehouse = resolve_lakehouse_name(lakehouse_id, lakehouse_workspace)
|
|
58
|
+
|
|
59
|
+
sqlEndpointId = get_direct_lake_sql_endpoint(dataset, workspace)
|
|
60
|
+
|
|
61
|
+
dfI = fabric.list_items(workspace=lakehouse_workspace, type="SQLEndpoint")
|
|
62
|
+
dfI_filt = dfI[(dfI["Id"] == sqlEndpointId)]
|
|
63
|
+
|
|
64
|
+
if len(dfI_filt) == 0:
|
|
65
|
+
print(
|
|
66
|
+
f"The SQL Endpoint in the '{dataset}' semantic model in the '{workspace} workspace does not point to the '{lakehouse}' lakehouse in the '{lakehouse_workspace}' workspace as specified."
|
|
67
|
+
)
|
|
68
|
+
return
|
|
69
|
+
|
|
70
|
+
dfP = fabric.list_partitions(dataset=dataset, workspace=workspace)
|
|
71
|
+
dfP_filt = dfP[dfP["Source Type"] == "Entity"]
|
|
72
|
+
dfC = fabric.list_columns(dataset=dataset, workspace=workspace)
|
|
73
|
+
dfC_filt = dfC[dfC["Table Name"].isin(dfP_filt["Table Name"].values)]
|
|
74
|
+
dfC_filt = pd.merge(
|
|
75
|
+
dfC_filt, dfP_filt[["Table Name", "Query"]], on="Table Name", how="left"
|
|
76
|
+
)
|
|
77
|
+
dfC_filt["Column Object"] = format_dax_object_name(
|
|
78
|
+
dfC_filt["Query"], dfC_filt["Source"]
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
lc = get_lakehouse_columns(lakehouse, lakehouse_workspace)
|
|
82
|
+
lc_filt = lc[lc["Table Name"].isin(dfP_filt["Query"].values)]
|
|
83
|
+
|
|
84
|
+
mapping = {
|
|
85
|
+
"string": "String",
|
|
86
|
+
"bigint": "Int64",
|
|
87
|
+
"int": "Int64",
|
|
88
|
+
"smallint": "Int64",
|
|
89
|
+
"boolean": "Boolean",
|
|
90
|
+
"timestamp": "DateTime",
|
|
91
|
+
"date": "DateTime",
|
|
92
|
+
"decimal(38,18)": "Decimal",
|
|
93
|
+
"double": "Double",
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
tom_server = fabric.create_tom_server(readonly=False, workspace=workspace)
|
|
97
|
+
m = tom_server.Databases.GetByName(dataset).Model
|
|
98
|
+
for i, r in lc_filt.iterrows():
|
|
99
|
+
lakeTName = r["Table Name"]
|
|
100
|
+
lakeCName = r["Column Name"]
|
|
101
|
+
fullColName = r["Full Column Name"]
|
|
102
|
+
dType = r["Data Type"]
|
|
103
|
+
|
|
104
|
+
if fullColName not in dfC_filt["Column Object"].values:
|
|
105
|
+
dfL = dfP_filt[dfP_filt["Query"] == lakeTName]
|
|
106
|
+
tName = dfL["Table Name"].iloc[0]
|
|
107
|
+
if add_to_model:
|
|
108
|
+
col = TOM.DataColumn()
|
|
109
|
+
col.Name = lakeCName
|
|
110
|
+
col.SourceColumn = lakeCName
|
|
111
|
+
dt = mapping.get(dType)
|
|
112
|
+
try:
|
|
113
|
+
col.DataType = System.Enum.Parse(TOM.DataType, dt)
|
|
114
|
+
except:
|
|
115
|
+
print(
|
|
116
|
+
f"ERROR: '{dType}' data type is not mapped properly to the semantic model data types."
|
|
117
|
+
)
|
|
118
|
+
return
|
|
119
|
+
|
|
120
|
+
m.Tables[tName].Columns.Add(col)
|
|
121
|
+
print(
|
|
122
|
+
f"The '{lakeCName}' column has been added to the '{tName}' table as a '{dt}' data type within the '{dataset}' semantic model within the '{workspace}' workspace."
|
|
123
|
+
)
|
|
124
|
+
else:
|
|
125
|
+
print(
|
|
126
|
+
f"The {fullColName} column exists in the lakehouse but not in the '{tName}' table in the '{dataset}' semantic model within the '{workspace}' workspace."
|
|
127
|
+
)
|
|
128
|
+
m.SaveChanges()
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import sempy
|
|
2
|
+
import sempy.fabric as fabric
|
|
3
|
+
import numpy as np
|
|
4
|
+
from typing import List, Optional, Union
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def check_fallback_reason(dataset: str, workspace: Optional[str] = None):
|
|
8
|
+
"""
|
|
9
|
+
Shows the reason a table in a Direct Lake semantic model would fallback to DirectQuery.
|
|
10
|
+
|
|
11
|
+
Parameters
|
|
12
|
+
----------
|
|
13
|
+
dataset : str
|
|
14
|
+
Name of the semantic model.
|
|
15
|
+
workspace : str, default=None
|
|
16
|
+
The Fabric workspace name.
|
|
17
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
18
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
19
|
+
|
|
20
|
+
Returns
|
|
21
|
+
-------
|
|
22
|
+
pandas.DataFrame
|
|
23
|
+
The tables in the semantic model and their fallback reason.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
if workspace == None:
|
|
27
|
+
workspace_id = fabric.get_workspace_id()
|
|
28
|
+
workspace = fabric.resolve_workspace_name(workspace_id)
|
|
29
|
+
|
|
30
|
+
dfP = fabric.list_partitions(dataset=dataset, workspace=workspace)
|
|
31
|
+
dfP_filt = dfP[dfP["Mode"] == "DirectLake"]
|
|
32
|
+
|
|
33
|
+
if len(dfP_filt) == 0:
|
|
34
|
+
print(
|
|
35
|
+
f"The '{dataset}' semantic model is not in Direct Lake. This function is only applicable to Direct Lake semantic models."
|
|
36
|
+
)
|
|
37
|
+
else:
|
|
38
|
+
df = fabric.evaluate_dax(
|
|
39
|
+
dataset=dataset,
|
|
40
|
+
workspace=workspace,
|
|
41
|
+
dax_string="""
|
|
42
|
+
SELECT [TableName] AS [Table Name],[FallbackReason] AS [FallbackReasonID]
|
|
43
|
+
FROM $SYSTEM.TMSCHEMA_DELTA_TABLE_METADATA_STORAGES
|
|
44
|
+
""",
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
value_mapping = {
|
|
48
|
+
0: "No reason for fallback",
|
|
49
|
+
1: "This table is not framed",
|
|
50
|
+
2: "This object is a view in the lakehouse",
|
|
51
|
+
3: "The table does not exist in the lakehouse",
|
|
52
|
+
4: "Transient error",
|
|
53
|
+
5: "Using OLS will result in fallback to DQ",
|
|
54
|
+
6: "Using RLS will result in fallback to DQ",
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
# Create a new column based on the mapping
|
|
58
|
+
df["Fallback Reason Detail"] = np.vectorize(value_mapping.get)(
|
|
59
|
+
df["FallbackReasonID"]
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
return df
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
import sempy
|
|
2
|
+
import sempy.fabric as fabric
|
|
3
|
+
from sempy_labs._helper_functions import (
|
|
4
|
+
resolve_lakehouse_id,
|
|
5
|
+
resolve_lakehouse_name,
|
|
6
|
+
get_direct_lake_sql_endpoint,
|
|
7
|
+
)
|
|
8
|
+
from typing import Optional, Tuple
|
|
9
|
+
from uuid import UUID
|
|
10
|
+
from sempy_labs._helper_functions import resolve_workspace_name_and_id
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def get_direct_lake_lakehouse(
|
|
14
|
+
dataset: str,
|
|
15
|
+
workspace: Optional[str] = None,
|
|
16
|
+
lakehouse: Optional[str] = None,
|
|
17
|
+
lakehouse_workspace: Optional[str] = None,
|
|
18
|
+
) -> Tuple[str, UUID]:
|
|
19
|
+
"""
|
|
20
|
+
Identifies the lakehouse used by a Direct Lake semantic model.
|
|
21
|
+
|
|
22
|
+
Parameters
|
|
23
|
+
----------
|
|
24
|
+
dataset : str
|
|
25
|
+
Name of the semantic model.
|
|
26
|
+
workspace : str, default=None
|
|
27
|
+
The Fabric workspace name.
|
|
28
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
29
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
30
|
+
lakehouse : str, default=None
|
|
31
|
+
The Fabric lakehouse used by the Direct Lake semantic model.
|
|
32
|
+
Defaults to None which resolves to the lakehouse attached to the notebook.
|
|
33
|
+
lakehouse_workspace : str, default=None
|
|
34
|
+
The Fabric workspace used by the lakehouse.
|
|
35
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
36
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
37
|
+
|
|
38
|
+
Returns
|
|
39
|
+
-------
|
|
40
|
+
str, uuid.UUID
|
|
41
|
+
The lakehouse name and lakehouse ID.
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
workspace = fabric.resolve_workspace_name(workspace)
|
|
45
|
+
|
|
46
|
+
if lakehouse_workspace is None:
|
|
47
|
+
lakehouse_workspace = workspace
|
|
48
|
+
|
|
49
|
+
if lakehouse == None:
|
|
50
|
+
lakehouse_id = fabric.get_lakehouse_id()
|
|
51
|
+
lakehouse = resolve_lakehouse_name(lakehouse_id, lakehouse_workspace)
|
|
52
|
+
|
|
53
|
+
dfP = fabric.list_partitions(dataset=dataset, workspace=workspace)
|
|
54
|
+
dfP_filt = dfP[dfP["Mode"] == "DirectLake"]
|
|
55
|
+
|
|
56
|
+
if len(dfP_filt) == 0:
|
|
57
|
+
raise ValueError(
|
|
58
|
+
f"ERROR: The '{dataset}' semantic model within the '{workspace}' workspace is not in Direct Lake mode."
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
sqlEndpointId = get_direct_lake_sql_endpoint(dataset, workspace)
|
|
62
|
+
|
|
63
|
+
dfI = fabric.list_items(workspace=lakehouse_workspace, type="SQLEndpoint")
|
|
64
|
+
dfI_filt = dfI[dfI["Id"] == sqlEndpointId]
|
|
65
|
+
lakehouseName = dfI_filt["Display Name"].iloc[0]
|
|
66
|
+
|
|
67
|
+
lakehouseId = resolve_lakehouse_id(lakehouseName, lakehouse_workspace)
|
|
68
|
+
|
|
69
|
+
return lakehouseName, lakehouseId
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import sempy
|
|
2
|
+
import sempy.fabric as fabric
|
|
3
|
+
from sempy_labs._helper_functions import (
|
|
4
|
+
resolve_lakehouse_name,
|
|
5
|
+
resolve_workspace_name_and_id,
|
|
6
|
+
)
|
|
7
|
+
from sempy_labs._list_functions import list_lakehouses
|
|
8
|
+
from typing import Optional
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def get_shared_expression(
|
|
12
|
+
lakehouse: Optional[str] = None, workspace: Optional[str] = None
|
|
13
|
+
):
|
|
14
|
+
"""
|
|
15
|
+
Dynamically generates the M expression used by a Direct Lake model for a given lakehouse.
|
|
16
|
+
|
|
17
|
+
Parameters
|
|
18
|
+
----------
|
|
19
|
+
lakehouse : str, default=None
|
|
20
|
+
The Fabric lakehouse used by the Direct Lake semantic model.
|
|
21
|
+
Defaults to None which resolves to the lakehouse attached to the notebook.
|
|
22
|
+
workspace : str, default=None
|
|
23
|
+
The Fabric workspace used by the lakehouse.
|
|
24
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
25
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
26
|
+
|
|
27
|
+
Returns
|
|
28
|
+
-------
|
|
29
|
+
str
|
|
30
|
+
Shows the expression which can be used to connect a Direct Lake semantic model to its SQL Endpoint.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
(workspace, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
34
|
+
if lakehouse == None:
|
|
35
|
+
lakehouse_id = fabric.get_lakehouse_id()
|
|
36
|
+
lakehouse = resolve_lakehouse_name(lakehouse_id)
|
|
37
|
+
|
|
38
|
+
dfL = list_lakehouses(workspace=workspace)
|
|
39
|
+
lakeDetail = dfL[dfL["Lakehouse Name"] == lakehouse]
|
|
40
|
+
|
|
41
|
+
sqlEPCS = lakeDetail["SQL Endpoint Connection String"].iloc[0]
|
|
42
|
+
sqlepid = lakeDetail["SQL Endpoint ID"].iloc[0]
|
|
43
|
+
provStatus = lakeDetail["SQL Endpoint Provisioning Status"].iloc[0]
|
|
44
|
+
|
|
45
|
+
if provStatus == "InProgress":
|
|
46
|
+
print(
|
|
47
|
+
f"The SQL Endpoint for the '{lakehouse}' lakehouse within the '{workspace}' workspace has not yet been provisioned. Please wait until it has been provisioned."
|
|
48
|
+
)
|
|
49
|
+
return
|
|
50
|
+
|
|
51
|
+
sh = (
|
|
52
|
+
'let\n\tdatabase = Sql.Database("'
|
|
53
|
+
+ sqlEPCS
|
|
54
|
+
+ '", "'
|
|
55
|
+
+ sqlepid
|
|
56
|
+
+ '")\nin\n\tdatabase'
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
return sh
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
import sempy
|
|
2
|
+
import sempy.fabric as fabric
|
|
3
|
+
import pandas as pd
|
|
4
|
+
from typing import List, Optional, Union
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def get_direct_lake_guardrails():
|
|
8
|
+
"""
|
|
9
|
+
Shows the guardrails for when Direct Lake semantic models will fallback to Direct Query based on Microsoft's online documentation.
|
|
10
|
+
|
|
11
|
+
Parameters
|
|
12
|
+
----------
|
|
13
|
+
|
|
14
|
+
Returns
|
|
15
|
+
-------
|
|
16
|
+
pandas.DataFrame
|
|
17
|
+
A table showing the Direct Lake guardrails by SKU.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
url = "https://learn.microsoft.com/power-bi/enterprise/directlake-overview"
|
|
21
|
+
|
|
22
|
+
tables = pd.read_html(url)
|
|
23
|
+
df = tables[0]
|
|
24
|
+
df["Fabric SKUs"] = df["Fabric SKUs"].str.split("/")
|
|
25
|
+
df = df.explode("Fabric SKUs", ignore_index=True)
|
|
26
|
+
|
|
27
|
+
return df
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def get_sku_size(workspace: Optional[str] = None):
|
|
31
|
+
"""
|
|
32
|
+
Shows the SKU size for a workspace.
|
|
33
|
+
|
|
34
|
+
Parameters
|
|
35
|
+
----------
|
|
36
|
+
workspace : str, default=None
|
|
37
|
+
The Fabric workspace.
|
|
38
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
39
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
40
|
+
|
|
41
|
+
Returns
|
|
42
|
+
-------
|
|
43
|
+
str
|
|
44
|
+
The SKU size for a workspace.
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
if workspace == None:
|
|
48
|
+
workspace_id = fabric.get_workspace_id()
|
|
49
|
+
workspace = fabric.resolve_workspace_name(workspace_id)
|
|
50
|
+
|
|
51
|
+
dfC = fabric.list_capacities()
|
|
52
|
+
dfW = fabric.list_workspaces().sort_values(by="Name", ascending=True)
|
|
53
|
+
dfC.rename(columns={"Id": "Capacity Id"}, inplace=True)
|
|
54
|
+
dfCW = pd.merge(
|
|
55
|
+
dfW,
|
|
56
|
+
dfC[["Capacity Id", "Sku", "Region", "State"]],
|
|
57
|
+
on="Capacity Id",
|
|
58
|
+
how="inner",
|
|
59
|
+
)
|
|
60
|
+
sku_value = dfCW.loc[dfCW["Name"] == workspace, "Sku"].iloc[0]
|
|
61
|
+
|
|
62
|
+
return sku_value
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def get_directlake_guardrails_for_sku(sku_size: str):
|
|
66
|
+
"""
|
|
67
|
+
Shows the guardrails for Direct Lake based on the SKU used by your workspace's capacity.
|
|
68
|
+
*Use the result of the 'get_sku_size' function as an input for this function's skuSize parameter.*
|
|
69
|
+
|
|
70
|
+
Parameters
|
|
71
|
+
----------
|
|
72
|
+
sku_size : str
|
|
73
|
+
Sku size of a workspace/capacity
|
|
74
|
+
|
|
75
|
+
Returns
|
|
76
|
+
-------
|
|
77
|
+
pandas.DataFrame
|
|
78
|
+
A table showing the Direct Lake guardrails for the given SKU.
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
df = get_direct_lake_guardrails()
|
|
82
|
+
filtered_df = df[df["Fabric SKUs"] == sku_size]
|
|
83
|
+
|
|
84
|
+
return filtered_df
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import sempy
|
|
2
|
+
import sempy.fabric as fabric
|
|
3
|
+
import pandas as pd
|
|
4
|
+
from sempy_labs._list_functions import list_tables, list_annotations
|
|
5
|
+
from typing import Optional
|
|
6
|
+
from sempy._utils._log import log
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@log
|
|
10
|
+
def list_direct_lake_model_calc_tables(dataset: str, workspace: Optional[str] = None):
|
|
11
|
+
"""
|
|
12
|
+
Shows the calculated tables and their respective DAX expression for a Direct Lake model (which has been migrated from import/DirectQuery.
|
|
13
|
+
|
|
14
|
+
Parameters
|
|
15
|
+
----------
|
|
16
|
+
dataset : str
|
|
17
|
+
Name of the semantic model.
|
|
18
|
+
workspace : str, default=None
|
|
19
|
+
The Fabric workspace name.
|
|
20
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
21
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
22
|
+
|
|
23
|
+
Returns
|
|
24
|
+
-------
|
|
25
|
+
pandas.DataFrame
|
|
26
|
+
A pandas dataframe showing the calculated tables which were migrated to Direct Lake and whose DAX expressions are stored as model annotations.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
if workspace == None:
|
|
30
|
+
workspace_id = fabric.get_workspace_id()
|
|
31
|
+
workspace = fabric.resolve_workspace_name(workspace_id)
|
|
32
|
+
|
|
33
|
+
df = pd.DataFrame(columns=["Table Name", "Source Expression"])
|
|
34
|
+
|
|
35
|
+
dfP = fabric.list_partitions(dataset=dataset, workspace=workspace)
|
|
36
|
+
dfP_filt = dfP[dfP["Mode"] == "DirectLake"]
|
|
37
|
+
|
|
38
|
+
if len(dfP_filt) == 0:
|
|
39
|
+
print(f"The '{dataset}' semantic model is not in Direct Lake mode.")
|
|
40
|
+
else:
|
|
41
|
+
dfA = list_annotations(dataset, workspace)
|
|
42
|
+
dfT = list_tables(dataset, workspace)
|
|
43
|
+
dfA_filt = dfA[
|
|
44
|
+
(dfA["Object Type"] == "Model") & (dfA["Annotation Name"].isin(dfT["Name"]))
|
|
45
|
+
]
|
|
46
|
+
|
|
47
|
+
for i, r in dfA_filt.iterrows():
|
|
48
|
+
tName = r["Annotation Name"]
|
|
49
|
+
se = r["Annotation Value"]
|
|
50
|
+
|
|
51
|
+
new_data = {"Table Name": tName, "Source Expression": se}
|
|
52
|
+
df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)
|
|
53
|
+
|
|
54
|
+
return df
|