semantic-link-labs 0.4.2__py3-none-any.whl → 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of semantic-link-labs might be problematic. Click here for more details.
- {semantic_link_labs-0.4.2.dist-info → semantic_link_labs-0.6.0.dist-info}/METADATA +2 -2
- semantic_link_labs-0.6.0.dist-info/RECORD +54 -0
- {semantic_link_labs-0.4.2.dist-info → semantic_link_labs-0.6.0.dist-info}/WHEEL +1 -1
- sempy_labs/__init__.py +44 -14
- sempy_labs/_ai.py +31 -32
- sempy_labs/_clear_cache.py +5 -8
- sempy_labs/_connections.py +80 -72
- sempy_labs/_dax.py +7 -9
- sempy_labs/_generate_semantic_model.py +60 -54
- sempy_labs/_helper_functions.py +8 -10
- sempy_labs/_icons.py +15 -0
- sempy_labs/_list_functions.py +1139 -428
- sempy_labs/_model_auto_build.py +5 -6
- sempy_labs/_model_bpa.py +134 -1125
- sempy_labs/_model_bpa_rules.py +831 -0
- sempy_labs/_model_dependencies.py +21 -25
- sempy_labs/_one_lake_integration.py +10 -7
- sempy_labs/_query_scale_out.py +83 -93
- sempy_labs/_refresh_semantic_model.py +12 -16
- sempy_labs/_translations.py +214 -288
- sempy_labs/_vertipaq.py +51 -42
- sempy_labs/directlake/__init__.py +2 -0
- sempy_labs/directlake/_directlake_schema_compare.py +12 -11
- sempy_labs/directlake/_directlake_schema_sync.py +13 -23
- sempy_labs/directlake/_fallback.py +5 -7
- sempy_labs/directlake/_get_directlake_lakehouse.py +1 -1
- sempy_labs/directlake/_get_shared_expression.py +4 -8
- sempy_labs/directlake/_guardrails.py +6 -8
- sempy_labs/directlake/_list_directlake_model_calc_tables.py +18 -12
- sempy_labs/directlake/_show_unsupported_directlake_objects.py +4 -4
- sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py +9 -8
- sempy_labs/directlake/_update_directlake_partition_entity.py +129 -12
- sempy_labs/directlake/_warm_cache.py +5 -5
- sempy_labs/lakehouse/_get_lakehouse_columns.py +2 -2
- sempy_labs/lakehouse/_get_lakehouse_tables.py +4 -4
- sempy_labs/lakehouse/_lakehouse.py +3 -4
- sempy_labs/lakehouse/_shortcuts.py +17 -13
- sempy_labs/migration/__init__.py +1 -1
- sempy_labs/migration/_create_pqt_file.py +21 -24
- sempy_labs/migration/_migrate_calctables_to_lakehouse.py +16 -13
- sempy_labs/migration/_migrate_calctables_to_semantic_model.py +17 -18
- sempy_labs/migration/_migrate_model_objects_to_semantic_model.py +45 -46
- sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py +14 -14
- sempy_labs/migration/_migration_validation.py +6 -2
- sempy_labs/migration/_refresh_calc_tables.py +10 -5
- sempy_labs/report/__init__.py +2 -2
- sempy_labs/report/_generate_report.py +8 -7
- sempy_labs/report/_report_functions.py +47 -52
- sempy_labs/report/_report_rebind.py +38 -37
- sempy_labs/tom/__init__.py +1 -4
- sempy_labs/tom/_model.py +541 -180
- semantic_link_labs-0.4.2.dist-info/RECORD +0 -53
- {semantic_link_labs-0.4.2.dist-info → semantic_link_labs-0.6.0.dist-info}/LICENSE +0 -0
- {semantic_link_labs-0.4.2.dist-info → semantic_link_labs-0.6.0.dist-info}/top_level.txt +0 -0
sempy_labs/_vertipaq.py
CHANGED
|
@@ -1,8 +1,11 @@
|
|
|
1
|
-
import sempy
|
|
2
1
|
import sempy.fabric as fabric
|
|
3
2
|
import pandas as pd
|
|
4
3
|
from IPython.display import display, HTML
|
|
5
|
-
import zipfile
|
|
4
|
+
import zipfile
|
|
5
|
+
import os
|
|
6
|
+
import shutil
|
|
7
|
+
import datetime
|
|
8
|
+
import warnings
|
|
6
9
|
from pyspark.sql import SparkSession
|
|
7
10
|
from sempy_labs._helper_functions import (
|
|
8
11
|
format_dax_object_name,
|
|
@@ -12,10 +15,11 @@ from sempy_labs._helper_functions import (
|
|
|
12
15
|
from sempy_labs._list_functions import list_relationships
|
|
13
16
|
from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables
|
|
14
17
|
from sempy_labs.lakehouse._lakehouse import lakehouse_attached
|
|
15
|
-
from typing import
|
|
18
|
+
from typing import Optional
|
|
16
19
|
from sempy._utils._log import log
|
|
17
20
|
import sempy_labs._icons as icons
|
|
18
21
|
|
|
22
|
+
|
|
19
23
|
@log
|
|
20
24
|
def vertipaq_analyzer(
|
|
21
25
|
dataset: str,
|
|
@@ -51,14 +55,14 @@ def vertipaq_analyzer(
|
|
|
51
55
|
|
|
52
56
|
"""
|
|
53
57
|
|
|
58
|
+
from sempy_labs.tom import connect_semantic_model
|
|
59
|
+
|
|
54
60
|
pd.options.mode.copy_on_write = True
|
|
55
61
|
warnings.filterwarnings(
|
|
56
62
|
"ignore", message="createDataFrame attempted Arrow optimization*"
|
|
57
63
|
)
|
|
58
64
|
|
|
59
|
-
|
|
60
|
-
workspace_id = fabric.get_workspace_id()
|
|
61
|
-
workspace = fabric.resolve_workspace_name(workspace_id)
|
|
65
|
+
workspace = fabric.resolve_workspace_name(workspace)
|
|
62
66
|
|
|
63
67
|
if lakehouse_workspace is None:
|
|
64
68
|
lakehouse_workspace = workspace
|
|
@@ -73,18 +77,21 @@ def vertipaq_analyzer(
|
|
|
73
77
|
dfR["From Object"] = format_dax_object_name(dfR["From Table"], dfR["From Column"])
|
|
74
78
|
dfR["To Object"] = format_dax_object_name(dfR["To Table"], dfR["To Column"])
|
|
75
79
|
dfP = fabric.list_partitions(dataset=dataset, extended=True, workspace=workspace)
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
)
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
80
|
+
|
|
81
|
+
with connect_semantic_model(
|
|
82
|
+
dataset=dataset, readonly=True, workspace=workspace
|
|
83
|
+
) as tom:
|
|
84
|
+
compat_level = tom.model.Model.Database.CompatibilityLevel
|
|
85
|
+
is_direct_lake = tom.is_direct_lake()
|
|
86
|
+
def_mode = tom.model.DefaultMode
|
|
87
|
+
table_count = tom.model.Tables.Count
|
|
88
|
+
column_count = len(list(tom.all_columns()))
|
|
89
|
+
|
|
83
90
|
dfR["Missing Rows"] = None
|
|
84
91
|
|
|
85
92
|
# Direct Lake
|
|
86
93
|
if read_stats_from_data:
|
|
87
|
-
if
|
|
94
|
+
if is_direct_lake:
|
|
88
95
|
dfC = pd.merge(
|
|
89
96
|
dfC,
|
|
90
97
|
dfP[["Table Name", "Query", "Source Type"]],
|
|
@@ -102,8 +109,9 @@ def vertipaq_analyzer(
|
|
|
102
109
|
dfI_filt = dfI[(dfI["Id"] == sqlEndpointId)]
|
|
103
110
|
|
|
104
111
|
if len(dfI_filt) == 0:
|
|
105
|
-
|
|
106
|
-
f"{icons.red_dot} The lakehouse (SQL Endpoint) used by the '{dataset}' semantic model does not reside in the '{lakehouse_workspace}' workspace.
|
|
112
|
+
raise ValueError(
|
|
113
|
+
f"{icons.red_dot} The lakehouse (SQL Endpoint) used by the '{dataset}' semantic model does not reside in the '{lakehouse_workspace}' workspace."
|
|
114
|
+
"Please update the lakehouse_workspace parameter."
|
|
107
115
|
)
|
|
108
116
|
else:
|
|
109
117
|
lakehouseName = dfI_filt["Display Name"].iloc[0]
|
|
@@ -242,7 +250,7 @@ def vertipaq_analyzer(
|
|
|
242
250
|
|
|
243
251
|
try:
|
|
244
252
|
missingRows = result.iloc[0, 0]
|
|
245
|
-
except:
|
|
253
|
+
except Exception:
|
|
246
254
|
pass
|
|
247
255
|
|
|
248
256
|
dfR.at[i, "Missing Rows"] = missingRows
|
|
@@ -312,7 +320,6 @@ def vertipaq_analyzer(
|
|
|
312
320
|
)
|
|
313
321
|
dfTable = pd.merge(dfTable, dfTP, on="Table Name", how="left")
|
|
314
322
|
dfTable = pd.merge(dfTable, dfTC, on="Table Name", how="left")
|
|
315
|
-
dfTable = dfTable.drop_duplicates() # Drop duplicates (temporary)
|
|
316
323
|
dfTable = dfTable.sort_values(by="Total Size", ascending=False)
|
|
317
324
|
dfTable.reset_index(drop=True, inplace=True)
|
|
318
325
|
export_Table = dfTable.copy()
|
|
@@ -322,7 +329,7 @@ def vertipaq_analyzer(
|
|
|
322
329
|
pctList = ["% DB"]
|
|
323
330
|
dfTable[pctList] = dfTable[pctList].applymap("{:.2f}%".format)
|
|
324
331
|
|
|
325
|
-
|
|
332
|
+
# Relationships
|
|
326
333
|
# dfR.drop(columns=['Max From Cardinality', 'Max To Cardinality'], inplace=True)
|
|
327
334
|
dfR = pd.merge(
|
|
328
335
|
dfR,
|
|
@@ -363,12 +370,17 @@ def vertipaq_analyzer(
|
|
|
363
370
|
intList.remove("Missing Rows")
|
|
364
371
|
dfR[intList] = dfR[intList].applymap("{:,}".format)
|
|
365
372
|
|
|
366
|
-
|
|
373
|
+
# Partitions
|
|
367
374
|
dfP = dfP[
|
|
368
|
-
[
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
375
|
+
[
|
|
376
|
+
"Table Name",
|
|
377
|
+
"Partition Name",
|
|
378
|
+
"Mode",
|
|
379
|
+
"Record Count",
|
|
380
|
+
"Segment Count",
|
|
381
|
+
# "Records per Segment",
|
|
382
|
+
]
|
|
383
|
+
].sort_values(by="Record Count", ascending=False)
|
|
372
384
|
dfP["Records per Segment"] = round(
|
|
373
385
|
dfP["Record Count"] / dfP["Segment Count"], 2
|
|
374
386
|
) # Remove after records per segment is fixed
|
|
@@ -377,7 +389,7 @@ def vertipaq_analyzer(
|
|
|
377
389
|
intList = ["Record Count", "Segment Count", "Records per Segment"]
|
|
378
390
|
dfP[intList] = dfP[intList].applymap("{:,}".format)
|
|
379
391
|
|
|
380
|
-
|
|
392
|
+
# Hierarchies
|
|
381
393
|
dfH_filt = dfH[dfH["Level Ordinal"] == 0]
|
|
382
394
|
dfH_filt = dfH_filt[["Table Name", "Hierarchy Name", "Used Size"]].sort_values(
|
|
383
395
|
by="Used Size", ascending=False
|
|
@@ -387,7 +399,7 @@ def vertipaq_analyzer(
|
|
|
387
399
|
intList = ["Used Size"]
|
|
388
400
|
dfH_filt[intList] = dfH_filt[intList].applymap("{:,}".format)
|
|
389
401
|
|
|
390
|
-
|
|
402
|
+
# Model
|
|
391
403
|
if total_size >= 1000000000:
|
|
392
404
|
y = total_size / (1024**3) * 1000000000
|
|
393
405
|
elif total_size >= 1000000:
|
|
@@ -396,19 +408,14 @@ def vertipaq_analyzer(
|
|
|
396
408
|
y = total_size / (1024) * 1000
|
|
397
409
|
y = round(y)
|
|
398
410
|
|
|
399
|
-
tblCount = len(dfT)
|
|
400
|
-
colCount = len(dfC_filt)
|
|
401
|
-
compatLevel = dfD["Compatibility Level"].iloc[0]
|
|
402
|
-
defMode = dfD["Model Default Mode"].iloc[0]
|
|
403
|
-
|
|
404
411
|
dfModel = pd.DataFrame(
|
|
405
412
|
{
|
|
406
413
|
"Dataset Name": dataset,
|
|
407
414
|
"Total Size": y,
|
|
408
|
-
"Table Count":
|
|
409
|
-
"Column Count":
|
|
410
|
-
"Compatibility Level":
|
|
411
|
-
"Default Mode":
|
|
415
|
+
"Table Count": table_count,
|
|
416
|
+
"Column Count": column_count,
|
|
417
|
+
"Compatibility Level": compat_level,
|
|
418
|
+
"Default Mode": def_mode,
|
|
412
419
|
},
|
|
413
420
|
index=[0],
|
|
414
421
|
)
|
|
@@ -433,14 +440,13 @@ def vertipaq_analyzer(
|
|
|
433
440
|
|
|
434
441
|
visualize_vertipaq(dfs)
|
|
435
442
|
|
|
436
|
-
|
|
443
|
+
# Export vertipaq to delta tables in lakehouse
|
|
437
444
|
if export in ["table", "zip"]:
|
|
438
445
|
lakeAttach = lakehouse_attached()
|
|
439
446
|
if lakeAttach is False:
|
|
440
|
-
|
|
447
|
+
raise ValueError(
|
|
441
448
|
f"{icons.red_dot} In order to save the Vertipaq Analyzer results, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook."
|
|
442
449
|
)
|
|
443
|
-
return
|
|
444
450
|
|
|
445
451
|
if export == "table":
|
|
446
452
|
spark = SparkSession.builder.getOrCreate()
|
|
@@ -472,7 +478,9 @@ def vertipaq_analyzer(
|
|
|
472
478
|
"export_Model": ["Model", export_Model],
|
|
473
479
|
}
|
|
474
480
|
|
|
475
|
-
print(
|
|
481
|
+
print(
|
|
482
|
+
f"{icons.in_progress} Saving Vertipaq Analyzer to delta tables in the lakehouse...\n"
|
|
483
|
+
)
|
|
476
484
|
now = datetime.datetime.now()
|
|
477
485
|
for key, (obj, df) in dfMap.items():
|
|
478
486
|
df["Timestamp"] = now
|
|
@@ -494,7 +502,7 @@ def vertipaq_analyzer(
|
|
|
494
502
|
f"{icons.bullet} Vertipaq Analyzer results for '{obj}' have been appended to the '{delta_table_name}' delta table."
|
|
495
503
|
)
|
|
496
504
|
|
|
497
|
-
|
|
505
|
+
# Export vertipaq to zip file within the lakehouse
|
|
498
506
|
if export == "zip":
|
|
499
507
|
dataFrames = {
|
|
500
508
|
"dfModel": dfModel,
|
|
@@ -532,7 +540,8 @@ def vertipaq_analyzer(
|
|
|
532
540
|
if os.path.exists(filePath):
|
|
533
541
|
os.remove(filePath)
|
|
534
542
|
print(
|
|
535
|
-
f"{icons.green_dot} The Vertipaq Analyzer info for the '{dataset}' semantic model in the '{workspace}' workspace has been saved
|
|
543
|
+
f"{icons.green_dot} The Vertipaq Analyzer info for the '{dataset}' semantic model in the '{workspace}' workspace has been saved "
|
|
544
|
+
f"to the 'Vertipaq Analyzer/{zipFileName}' in the default lakehouse attached to this notebook."
|
|
536
545
|
)
|
|
537
546
|
|
|
538
547
|
|
|
@@ -839,7 +848,7 @@ def visualize_vertipaq(dataframes):
|
|
|
839
848
|
(tooltipDF["ViewName"] == vw) & (tooltipDF["ColumnName"] == col)
|
|
840
849
|
]
|
|
841
850
|
tt = tooltipDF_filt["Tooltip"].iloc[0]
|
|
842
|
-
except:
|
|
851
|
+
except Exception:
|
|
843
852
|
pass
|
|
844
853
|
df_html = df_html.replace(f"<th>{col}</th>", f'<th title="{tt}">{col}</th>')
|
|
845
854
|
content_html += (
|
|
@@ -21,6 +21,7 @@ from sempy_labs.directlake._update_directlake_model_lakehouse_connection import
|
|
|
21
21
|
)
|
|
22
22
|
from sempy_labs.directlake._update_directlake_partition_entity import (
|
|
23
23
|
update_direct_lake_partition_entity,
|
|
24
|
+
add_table_to_direct_lake_semantic_model,
|
|
24
25
|
)
|
|
25
26
|
from sempy_labs.directlake._warm_cache import (
|
|
26
27
|
warm_direct_lake_cache_isresident,
|
|
@@ -42,4 +43,5 @@ __all__ = [
|
|
|
42
43
|
"update_direct_lake_partition_entity",
|
|
43
44
|
"warm_direct_lake_cache_isresident",
|
|
44
45
|
"warm_direct_lake_cache_perspective",
|
|
46
|
+
"add_table_to_direct_lake_semantic_model",
|
|
45
47
|
]
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import sempy
|
|
2
1
|
import sempy.fabric as fabric
|
|
3
2
|
import pandas as pd
|
|
4
3
|
from sempy_labs._helper_functions import (
|
|
@@ -13,6 +12,7 @@ from typing import Optional
|
|
|
13
12
|
import sempy_labs._icons as icons
|
|
14
13
|
from sempy._utils._log import log
|
|
15
14
|
|
|
15
|
+
|
|
16
16
|
@log
|
|
17
17
|
def direct_lake_schema_compare(
|
|
18
18
|
dataset: str,
|
|
@@ -40,9 +40,7 @@ def direct_lake_schema_compare(
|
|
|
40
40
|
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
41
41
|
"""
|
|
42
42
|
|
|
43
|
-
|
|
44
|
-
workspace_id = fabric.get_workspace_id()
|
|
45
|
-
workspace = fabric.resolve_workspace_name(workspace_id)
|
|
43
|
+
workspace = fabric.resolve_workspace_name(workspace)
|
|
46
44
|
|
|
47
45
|
if lakehouse_workspace is None:
|
|
48
46
|
lakehouse_workspace = workspace
|
|
@@ -57,14 +55,15 @@ def direct_lake_schema_compare(
|
|
|
57
55
|
dfI_filt = dfI[(dfI["Id"] == sqlEndpointId)]
|
|
58
56
|
|
|
59
57
|
if len(dfI_filt) == 0:
|
|
60
|
-
|
|
61
|
-
f"{icons.red_dot} The SQL Endpoint in the '{dataset}' semantic model in the '{workspace} workspace does not point to the
|
|
58
|
+
raise ValueError(
|
|
59
|
+
f"{icons.red_dot} The SQL Endpoint in the '{dataset}' semantic model in the '{workspace} workspace does not point to the "
|
|
60
|
+
f"'{lakehouse}' lakehouse in the '{lakehouse_workspace}' workspace as specified."
|
|
62
61
|
)
|
|
63
|
-
return
|
|
64
62
|
|
|
65
63
|
if not any(r["Mode"] == "DirectLake" for i, r in dfP.iterrows()):
|
|
66
|
-
|
|
67
|
-
|
|
64
|
+
raise ValueError(
|
|
65
|
+
f"{icons.red_dot} The '{dataset}' semantic model is not in Direct Lake mode."
|
|
66
|
+
)
|
|
68
67
|
|
|
69
68
|
dfT = list_tables(dataset, workspace)
|
|
70
69
|
dfC = fabric.list_columns(dataset=dataset, workspace=workspace)
|
|
@@ -98,7 +97,8 @@ def direct_lake_schema_compare(
|
|
|
98
97
|
)
|
|
99
98
|
else:
|
|
100
99
|
print(
|
|
101
|
-
f"{icons.yellow_dot} The following tables exist in the '{dataset}' semantic model within the '{workspace}' workspace
|
|
100
|
+
f"{icons.yellow_dot} The following tables exist in the '{dataset}' semantic model within the '{workspace}' workspace"
|
|
101
|
+
f" but do not exist in the '{lakehouse}' lakehouse within the '{lakehouse_workspace}' workspace."
|
|
102
102
|
)
|
|
103
103
|
display(missingtbls)
|
|
104
104
|
if len(missingcols) == 0:
|
|
@@ -107,6 +107,7 @@ def direct_lake_schema_compare(
|
|
|
107
107
|
)
|
|
108
108
|
else:
|
|
109
109
|
print(
|
|
110
|
-
f"{icons.yellow_dot} The following columns exist in the '{dataset}' semantic model within the '{workspace}' workspace
|
|
110
|
+
f"{icons.yellow_dot} The following columns exist in the '{dataset}' semantic model within the '{workspace}' workspace "
|
|
111
|
+
f"but do not exist in the '{lakehouse}' lakehouse within the '{lakehouse_workspace}' workspace."
|
|
111
112
|
)
|
|
112
113
|
display(missingcols)
|
|
@@ -12,6 +12,7 @@ from typing import Optional
|
|
|
12
12
|
from sempy._utils._log import log
|
|
13
13
|
import sempy_labs._icons as icons
|
|
14
14
|
|
|
15
|
+
|
|
15
16
|
@log
|
|
16
17
|
def direct_lake_schema_sync(
|
|
17
18
|
dataset: str,
|
|
@@ -46,8 +47,7 @@ def direct_lake_schema_sync(
|
|
|
46
47
|
import Microsoft.AnalysisServices.Tabular as TOM
|
|
47
48
|
import System
|
|
48
49
|
|
|
49
|
-
|
|
50
|
-
workspace = fabric.resolve_workspace_name()
|
|
50
|
+
workspace = fabric.resolve_workspace_name(workspace)
|
|
51
51
|
|
|
52
52
|
if lakehouse_workspace is None:
|
|
53
53
|
lakehouse_workspace = workspace
|
|
@@ -62,10 +62,10 @@ def direct_lake_schema_sync(
|
|
|
62
62
|
dfI_filt = dfI[(dfI["Id"] == sqlEndpointId)]
|
|
63
63
|
|
|
64
64
|
if len(dfI_filt) == 0:
|
|
65
|
-
|
|
66
|
-
f"{icons.red_dot} The SQL Endpoint in the '{dataset}' semantic model in the '{workspace} workspace does not point to the
|
|
65
|
+
raise ValueError(
|
|
66
|
+
f"{icons.red_dot} The SQL Endpoint in the '{dataset}' semantic model in the '{workspace} workspace does not point to the "
|
|
67
|
+
f"'{lakehouse}' lakehouse in the '{lakehouse_workspace}' workspace as specified."
|
|
67
68
|
)
|
|
68
|
-
return
|
|
69
69
|
|
|
70
70
|
dfP = fabric.list_partitions(dataset=dataset, workspace=workspace)
|
|
71
71
|
dfP_filt = dfP[dfP["Source Type"] == "Entity"]
|
|
@@ -81,21 +81,9 @@ def direct_lake_schema_sync(
|
|
|
81
81
|
lc = get_lakehouse_columns(lakehouse, lakehouse_workspace)
|
|
82
82
|
lc_filt = lc[lc["Table Name"].isin(dfP_filt["Query"].values)]
|
|
83
83
|
|
|
84
|
-
mapping = {
|
|
85
|
-
"string": "String",
|
|
86
|
-
"bigint": "Int64",
|
|
87
|
-
"int": "Int64",
|
|
88
|
-
"smallint": "Int64",
|
|
89
|
-
"boolean": "Boolean",
|
|
90
|
-
"timestamp": "DateTime",
|
|
91
|
-
"date": "DateTime",
|
|
92
|
-
"decimal(38,18)": "Decimal",
|
|
93
|
-
"double": "Double",
|
|
94
|
-
}
|
|
95
|
-
|
|
96
84
|
with connect_semantic_model(
|
|
97
|
-
|
|
98
|
-
|
|
85
|
+
dataset=dataset, readonly=False, workspace=workspace
|
|
86
|
+
) as tom:
|
|
99
87
|
|
|
100
88
|
for i, r in lc_filt.iterrows():
|
|
101
89
|
lakeTName = r["Table Name"]
|
|
@@ -110,18 +98,20 @@ def direct_lake_schema_sync(
|
|
|
110
98
|
col = TOM.DataColumn()
|
|
111
99
|
col.Name = lakeCName
|
|
112
100
|
col.SourceColumn = lakeCName
|
|
113
|
-
dt =
|
|
101
|
+
dt = icons.data_type_mapping.get(dType)
|
|
114
102
|
try:
|
|
115
103
|
col.DataType = System.Enum.Parse(TOM.DataType, dt)
|
|
116
104
|
except Exception as e:
|
|
117
|
-
raise ValueError(
|
|
105
|
+
raise ValueError(
|
|
106
|
+
f"{icons.red_dot} Failed to map '{dType}' data type to the semantic model data types."
|
|
107
|
+
) from e
|
|
118
108
|
|
|
119
109
|
tom.model.Tables[tName].Columns.Add(col)
|
|
120
110
|
print(
|
|
121
|
-
f"{icons.green_dot} The '{lakeCName}' column has been added to the '{tName}' table as a '{dt}'
|
|
111
|
+
f"{icons.green_dot} The '{lakeCName}' column has been added to the '{tName}' table as a '{dt}' "
|
|
112
|
+
f"data type within the '{dataset}' semantic model within the '{workspace}' workspace."
|
|
122
113
|
)
|
|
123
114
|
else:
|
|
124
115
|
print(
|
|
125
116
|
f"{icons.yellow_dot} The {fullColName} column exists in the lakehouse but not in the '{tName}' table in the '{dataset}' semantic model within the '{workspace}' workspace."
|
|
126
117
|
)
|
|
127
|
-
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
import sempy
|
|
2
1
|
import sempy.fabric as fabric
|
|
3
2
|
import numpy as np
|
|
4
|
-
from typing import
|
|
3
|
+
from typing import Optional
|
|
5
4
|
import sempy_labs._icons as icons
|
|
6
5
|
|
|
6
|
+
|
|
7
7
|
def check_fallback_reason(dataset: str, workspace: Optional[str] = None):
|
|
8
8
|
"""
|
|
9
9
|
Shows the reason a table in a Direct Lake semantic model would fallback to DirectQuery.
|
|
@@ -23,16 +23,14 @@ def check_fallback_reason(dataset: str, workspace: Optional[str] = None):
|
|
|
23
23
|
The tables in the semantic model and their fallback reason.
|
|
24
24
|
"""
|
|
25
25
|
|
|
26
|
-
|
|
27
|
-
workspace_id = fabric.get_workspace_id()
|
|
28
|
-
workspace = fabric.resolve_workspace_name(workspace_id)
|
|
26
|
+
workspace = fabric.resolve_workspace_name(workspace)
|
|
29
27
|
|
|
30
28
|
dfP = fabric.list_partitions(dataset=dataset, workspace=workspace)
|
|
31
29
|
dfP_filt = dfP[dfP["Mode"] == "DirectLake"]
|
|
32
30
|
|
|
33
31
|
if len(dfP_filt) == 0:
|
|
34
|
-
|
|
35
|
-
f"{icons.
|
|
32
|
+
raise ValueError(
|
|
33
|
+
f"{icons.red_dot} The '{dataset}' semantic model is not in Direct Lake. This function is only applicable to Direct Lake semantic models."
|
|
36
34
|
)
|
|
37
35
|
else:
|
|
38
36
|
df = fabric.evaluate_dax(
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import sempy
|
|
2
1
|
import sempy.fabric as fabric
|
|
3
2
|
from sempy_labs._helper_functions import (
|
|
4
3
|
resolve_lakehouse_id,
|
|
@@ -9,6 +8,7 @@ from typing import Optional, Tuple
|
|
|
9
8
|
from uuid import UUID
|
|
10
9
|
import sempy_labs._icons as icons
|
|
11
10
|
|
|
11
|
+
|
|
12
12
|
def get_direct_lake_lakehouse(
|
|
13
13
|
dataset: str,
|
|
14
14
|
workspace: Optional[str] = None,
|
|
@@ -1,13 +1,10 @@
|
|
|
1
|
-
import sempy
|
|
2
1
|
import sempy.fabric as fabric
|
|
3
|
-
from sempy_labs._helper_functions import
|
|
4
|
-
resolve_lakehouse_name,
|
|
5
|
-
resolve_workspace_name_and_id,
|
|
6
|
-
)
|
|
2
|
+
from sempy_labs._helper_functions import resolve_lakehouse_name
|
|
7
3
|
from sempy_labs._list_functions import list_lakehouses
|
|
8
4
|
from typing import Optional
|
|
9
5
|
import sempy_labs._icons as icons
|
|
10
6
|
|
|
7
|
+
|
|
11
8
|
def get_shared_expression(
|
|
12
9
|
lakehouse: Optional[str] = None, workspace: Optional[str] = None
|
|
13
10
|
):
|
|
@@ -30,7 +27,7 @@ def get_shared_expression(
|
|
|
30
27
|
Shows the expression which can be used to connect a Direct Lake semantic model to its SQL Endpoint.
|
|
31
28
|
"""
|
|
32
29
|
|
|
33
|
-
|
|
30
|
+
workspace = fabric.resolve_workspace_name(workspace)
|
|
34
31
|
if lakehouse is None:
|
|
35
32
|
lakehouse_id = fabric.get_lakehouse_id()
|
|
36
33
|
lakehouse = resolve_lakehouse_name(lakehouse_id)
|
|
@@ -43,10 +40,9 @@ def get_shared_expression(
|
|
|
43
40
|
provStatus = lakeDetail["SQL Endpoint Provisioning Status"].iloc[0]
|
|
44
41
|
|
|
45
42
|
if provStatus == "InProgress":
|
|
46
|
-
|
|
43
|
+
raise ValueError(
|
|
47
44
|
f"{icons.red_dot} The SQL Endpoint for the '{lakehouse}' lakehouse within the '{workspace}' workspace has not yet been provisioned. Please wait until it has been provisioned."
|
|
48
45
|
)
|
|
49
|
-
return
|
|
50
46
|
|
|
51
47
|
sh = (
|
|
52
48
|
'let\n\tdatabase = Sql.Database("'
|
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
import sempy
|
|
2
1
|
import sempy.fabric as fabric
|
|
3
2
|
import pandas as pd
|
|
4
|
-
from typing import
|
|
3
|
+
from typing import Optional
|
|
5
4
|
|
|
6
5
|
|
|
7
|
-
def get_direct_lake_guardrails():
|
|
6
|
+
def get_direct_lake_guardrails() -> pd.DataFrame:
|
|
8
7
|
"""
|
|
9
|
-
Shows the guardrails for when Direct Lake semantic models will fallback to Direct Query
|
|
8
|
+
Shows the guardrails for when Direct Lake semantic models will fallback to Direct Query
|
|
9
|
+
based on Microsoft's `online documentation <https://learn.microsoft.com/power-bi/enterprise/directlake-overview>`_.
|
|
10
10
|
|
|
11
11
|
Parameters
|
|
12
12
|
----------
|
|
@@ -44,9 +44,7 @@ def get_sku_size(workspace: Optional[str] = None):
|
|
|
44
44
|
The SKU size for a workspace.
|
|
45
45
|
"""
|
|
46
46
|
|
|
47
|
-
|
|
48
|
-
workspace_id = fabric.get_workspace_id()
|
|
49
|
-
workspace = fabric.resolve_workspace_name(workspace_id)
|
|
47
|
+
workspace = fabric.resolve_workspace_name(workspace)
|
|
50
48
|
|
|
51
49
|
dfC = fabric.list_capacities()
|
|
52
50
|
dfW = fabric.list_workspaces().sort_values(by="Name", ascending=True)
|
|
@@ -62,7 +60,7 @@ def get_sku_size(workspace: Optional[str] = None):
|
|
|
62
60
|
return sku_value
|
|
63
61
|
|
|
64
62
|
|
|
65
|
-
def get_directlake_guardrails_for_sku(sku_size: str):
|
|
63
|
+
def get_directlake_guardrails_for_sku(sku_size: str) -> pd.DataFrame:
|
|
66
64
|
"""
|
|
67
65
|
Shows the guardrails for Direct Lake based on the SKU used by your workspace's capacity.
|
|
68
66
|
* Use the result of the 'get_sku_size' function as an input for this function's sku_size parameter.*
|
|
@@ -1,14 +1,16 @@
|
|
|
1
|
-
import sempy
|
|
2
1
|
import sempy.fabric as fabric
|
|
3
2
|
import pandas as pd
|
|
4
|
-
from sempy_labs._list_functions import list_tables
|
|
3
|
+
from sempy_labs._list_functions import list_tables
|
|
5
4
|
from sempy_labs.tom import connect_semantic_model
|
|
6
5
|
from typing import Optional
|
|
7
6
|
from sempy._utils._log import log
|
|
8
7
|
import sempy_labs._icons as icons
|
|
9
8
|
|
|
9
|
+
|
|
10
10
|
@log
|
|
11
|
-
def list_direct_lake_model_calc_tables(
|
|
11
|
+
def list_direct_lake_model_calc_tables(
|
|
12
|
+
dataset: str, workspace: Optional[str] = None
|
|
13
|
+
) -> pd.DataFrame:
|
|
12
14
|
"""
|
|
13
15
|
Shows the calculated tables and their respective DAX expression for a Direct Lake model (which has been migrated from import/DirectQuery).
|
|
14
16
|
|
|
@@ -27,24 +29,26 @@ def list_direct_lake_model_calc_tables(dataset: str, workspace: Optional[str] =
|
|
|
27
29
|
A pandas dataframe showing the calculated tables which were migrated to Direct Lake and whose DAX expressions are stored as model annotations.
|
|
28
30
|
"""
|
|
29
31
|
|
|
30
|
-
|
|
31
|
-
workspace = fabric.resolve_workspace_name()
|
|
32
|
+
workspace = fabric.resolve_workspace_name(workspace)
|
|
32
33
|
|
|
33
34
|
df = pd.DataFrame(columns=["Table Name", "Source Expression"])
|
|
34
35
|
|
|
35
36
|
with connect_semantic_model(
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
37
|
+
dataset=dataset, readonly=True, workspace=workspace
|
|
38
|
+
) as tom:
|
|
39
|
+
|
|
39
40
|
is_direct_lake = tom.is_direct_lake()
|
|
40
41
|
|
|
41
42
|
if not is_direct_lake:
|
|
42
|
-
|
|
43
|
+
raise ValueError(
|
|
44
|
+
f"{icons.red_dot} The '{dataset}' semantic model is not in Direct Lake mode."
|
|
45
|
+
)
|
|
43
46
|
else:
|
|
44
|
-
dfA = list_annotations(dataset, workspace)
|
|
47
|
+
dfA = fabric.list_annotations(dataset=dataset, workspace=workspace)
|
|
45
48
|
dfT = list_tables(dataset, workspace)
|
|
46
49
|
dfA_filt = dfA[
|
|
47
|
-
(dfA["Object Type"] == "Model")
|
|
50
|
+
(dfA["Object Type"] == "Model")
|
|
51
|
+
& (dfA["Annotation Name"].isin(dfT["Name"]))
|
|
48
52
|
]
|
|
49
53
|
|
|
50
54
|
for i, r in dfA_filt.iterrows():
|
|
@@ -52,6 +56,8 @@ def list_direct_lake_model_calc_tables(dataset: str, workspace: Optional[str] =
|
|
|
52
56
|
se = r["Annotation Value"]
|
|
53
57
|
|
|
54
58
|
new_data = {"Table Name": tName, "Source Expression": se}
|
|
55
|
-
df = pd.concat(
|
|
59
|
+
df = pd.concat(
|
|
60
|
+
[df, pd.DataFrame(new_data, index=[0])], ignore_index=True
|
|
61
|
+
)
|
|
56
62
|
|
|
57
63
|
return df
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import sempy
|
|
2
1
|
import sempy.fabric as fabric
|
|
3
2
|
import pandas as pd
|
|
4
3
|
from sempy_labs._list_functions import list_tables
|
|
@@ -6,12 +5,14 @@ from sempy_labs._helper_functions import format_dax_object_name
|
|
|
6
5
|
from typing import Optional, Tuple
|
|
7
6
|
from sempy._utils._log import log
|
|
8
7
|
|
|
8
|
+
|
|
9
9
|
@log
|
|
10
10
|
def show_unsupported_direct_lake_objects(
|
|
11
11
|
dataset: str, workspace: Optional[str] = None
|
|
12
12
|
) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
|
|
13
13
|
"""
|
|
14
|
-
Returns a list of a semantic model's objects which are not supported by Direct Lake based on
|
|
14
|
+
Returns a list of a semantic model's objects which are not supported by Direct Lake based on
|
|
15
|
+
`official documentation <https://learn.microsoft.com/power-bi/enterprise/directlake-overview#known-issues-and-limitations>`_.
|
|
15
16
|
|
|
16
17
|
Parameters
|
|
17
18
|
----------
|
|
@@ -30,8 +31,7 @@ def show_unsupported_direct_lake_objects(
|
|
|
30
31
|
|
|
31
32
|
pd.options.mode.chained_assignment = None
|
|
32
33
|
|
|
33
|
-
|
|
34
|
-
workspace = fabric.resolve_workspace_name()
|
|
34
|
+
workspace = fabric.resolve_workspace_name(workspace)
|
|
35
35
|
|
|
36
36
|
dfT = list_tables(dataset, workspace)
|
|
37
37
|
dfC = fabric.list_columns(dataset=dataset, workspace=workspace)
|
|
@@ -1,9 +1,7 @@
|
|
|
1
|
-
import sempy
|
|
2
1
|
import sempy.fabric as fabric
|
|
3
2
|
from sempy_labs.directlake._get_shared_expression import get_shared_expression
|
|
4
3
|
from sempy_labs._helper_functions import (
|
|
5
4
|
resolve_lakehouse_name,
|
|
6
|
-
resolve_workspace_name_and_id,
|
|
7
5
|
)
|
|
8
6
|
from sempy_labs.tom import connect_semantic_model
|
|
9
7
|
from typing import Optional
|
|
@@ -40,7 +38,7 @@ def update_direct_lake_model_lakehouse_connection(
|
|
|
40
38
|
|
|
41
39
|
"""
|
|
42
40
|
|
|
43
|
-
|
|
41
|
+
workspace = fabric.resolve_workspace_name(workspace)
|
|
44
42
|
|
|
45
43
|
if lakehouse_workspace is None:
|
|
46
44
|
lakehouse_workspace = workspace
|
|
@@ -54,16 +52,17 @@ def update_direct_lake_model_lakehouse_connection(
|
|
|
54
52
|
dfI_filt = dfI[(dfI["Display Name"] == lakehouse)]
|
|
55
53
|
|
|
56
54
|
if len(dfI_filt) == 0:
|
|
57
|
-
|
|
58
|
-
f"{icons.red_dot} The '{lakehouse}' lakehouse does not exist within the '{lakehouse_workspace}' workspace.
|
|
55
|
+
raise ValueError(
|
|
56
|
+
f"{icons.red_dot} The '{lakehouse}' lakehouse does not exist within the '{lakehouse_workspace}' workspace. "
|
|
57
|
+
f"Therefore it cannot be used to support the '{dataset}' semantic model within the '{workspace}' workspace."
|
|
59
58
|
)
|
|
60
59
|
|
|
61
60
|
dfP = fabric.list_partitions(dataset=dataset, workspace=workspace)
|
|
62
61
|
dfP_filt = dfP[dfP["Mode"] == "DirectLake"]
|
|
63
62
|
|
|
64
63
|
if len(dfP_filt) == 0:
|
|
65
|
-
|
|
66
|
-
f"{icons.
|
|
64
|
+
raise ValueError(
|
|
65
|
+
f"{icons.red_dot} The '{dataset}' semantic model is not in Direct Lake. This function is only applicable to Direct Lake semantic models."
|
|
67
66
|
)
|
|
68
67
|
else:
|
|
69
68
|
with connect_semantic_model(
|
|
@@ -77,4 +76,6 @@ def update_direct_lake_model_lakehouse_connection(
|
|
|
77
76
|
f"{icons.green_dot} The expression in the '{dataset}' semantic model has been updated to point to the '{lakehouse}' lakehouse in the '{lakehouse_workspace}' workspace."
|
|
78
77
|
)
|
|
79
78
|
except Exception as e:
|
|
80
|
-
raise ValueError(
|
|
79
|
+
raise ValueError(
|
|
80
|
+
f"{icons.red_dot} The expression in the '{dataset}' semantic model was not updated."
|
|
81
|
+
) from e
|