semantic-link-labs 0.8.0__py3-none-any.whl → 0.8.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of semantic-link-labs might be problematic. Click here for more details.
- {semantic_link_labs-0.8.0.dist-info → semantic_link_labs-0.8.1.dist-info}/METADATA +39 -7
- {semantic_link_labs-0.8.0.dist-info → semantic_link_labs-0.8.1.dist-info}/RECORD +47 -37
- sempy_labs/__init__.py +70 -51
- sempy_labs/_ai.py +0 -2
- sempy_labs/_capacity_migration.py +1 -2
- sempy_labs/_data_pipelines.py +118 -0
- sempy_labs/_documentation.py +144 -0
- sempy_labs/_eventhouses.py +118 -0
- sempy_labs/_eventstreams.py +118 -0
- sempy_labs/_generate_semantic_model.py +3 -3
- sempy_labs/_git.py +3 -3
- sempy_labs/_helper_functions.py +116 -26
- sempy_labs/_icons.py +21 -0
- sempy_labs/_kql_databases.py +134 -0
- sempy_labs/_kql_querysets.py +124 -0
- sempy_labs/_list_functions.py +12 -425
- sempy_labs/_mirrored_warehouses.py +50 -0
- sempy_labs/_ml_experiments.py +122 -0
- sempy_labs/_ml_models.py +120 -0
- sempy_labs/_model_auto_build.py +0 -4
- sempy_labs/_model_bpa.py +9 -11
- sempy_labs/_model_bpa_bulk.py +8 -7
- sempy_labs/_model_dependencies.py +26 -18
- sempy_labs/_notebooks.py +5 -16
- sempy_labs/_query_scale_out.py +2 -2
- sempy_labs/_refresh_semantic_model.py +7 -19
- sempy_labs/_spark.py +10 -10
- sempy_labs/_vertipaq.py +16 -18
- sempy_labs/_warehouses.py +132 -0
- sempy_labs/_workspaces.py +0 -3
- sempy_labs/admin/_basic_functions.py +92 -10
- sempy_labs/admin/_domains.py +1 -1
- sempy_labs/directlake/_directlake_schema_sync.py +1 -1
- sempy_labs/directlake/_dl_helper.py +32 -16
- sempy_labs/directlake/_guardrails.py +7 -7
- sempy_labs/directlake/_update_directlake_partition_entity.py +1 -1
- sempy_labs/directlake/_warm_cache.py +1 -1
- sempy_labs/lakehouse/_get_lakehouse_tables.py +3 -3
- sempy_labs/lakehouse/_lakehouse.py +3 -2
- sempy_labs/migration/_migrate_calctables_to_lakehouse.py +5 -0
- sempy_labs/report/_generate_report.py +1 -1
- sempy_labs/report/_report_bpa.py +13 -3
- sempy_labs/report/_reportwrapper.py +14 -16
- sempy_labs/tom/_model.py +261 -24
- {semantic_link_labs-0.8.0.dist-info → semantic_link_labs-0.8.1.dist-info}/LICENSE +0 -0
- {semantic_link_labs-0.8.0.dist-info → semantic_link_labs-0.8.1.dist-info}/WHEEL +0 -0
- {semantic_link_labs-0.8.0.dist-info → semantic_link_labs-0.8.1.dist-info}/top_level.txt +0 -0
sempy_labs/_ml_models.py
ADDED
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
import sempy.fabric as fabric
|
|
2
|
+
import pandas as pd
|
|
3
|
+
import sempy_labs._icons as icons
|
|
4
|
+
from typing import Optional
|
|
5
|
+
from sempy_labs._helper_functions import (
|
|
6
|
+
resolve_workspace_name_and_id,
|
|
7
|
+
lro,
|
|
8
|
+
pagination,
|
|
9
|
+
)
|
|
10
|
+
from sempy.fabric.exceptions import FabricHTTPException
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def list_ml_models(workspace: Optional[str] = None) -> pd.DataFrame:
|
|
14
|
+
"""
|
|
15
|
+
Shows the ML models within a workspace.
|
|
16
|
+
|
|
17
|
+
Parameters
|
|
18
|
+
----------
|
|
19
|
+
workspace : str, default=None
|
|
20
|
+
The Fabric workspace name.
|
|
21
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
22
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
23
|
+
|
|
24
|
+
Returns
|
|
25
|
+
-------
|
|
26
|
+
pandas.DataFrame
|
|
27
|
+
A pandas dataframe showing the ML models within a workspace.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
df = pd.DataFrame(columns=["ML Model Name", "ML Model Id", "Description"])
|
|
31
|
+
|
|
32
|
+
(workspace, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
33
|
+
|
|
34
|
+
client = fabric.FabricRestClient()
|
|
35
|
+
response = client.get(f"/v1/workspaces/{workspace_id}/mlModels")
|
|
36
|
+
if response.status_code != 200:
|
|
37
|
+
raise FabricHTTPException(response)
|
|
38
|
+
|
|
39
|
+
responses = pagination(client, response)
|
|
40
|
+
|
|
41
|
+
for r in responses:
|
|
42
|
+
for v in r.get("value", []):
|
|
43
|
+
model_id = v.get("id")
|
|
44
|
+
modelName = v.get("displayName")
|
|
45
|
+
desc = v.get("description")
|
|
46
|
+
|
|
47
|
+
new_data = {
|
|
48
|
+
"ML Model Name": modelName,
|
|
49
|
+
"ML Model Id": model_id,
|
|
50
|
+
"Description": desc,
|
|
51
|
+
}
|
|
52
|
+
df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)
|
|
53
|
+
|
|
54
|
+
return df
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def create_ml_model(
|
|
58
|
+
name: str, description: Optional[str] = None, workspace: Optional[str] = None
|
|
59
|
+
):
|
|
60
|
+
"""
|
|
61
|
+
Creates a Fabric ML model.
|
|
62
|
+
|
|
63
|
+
Parameters
|
|
64
|
+
----------
|
|
65
|
+
name: str
|
|
66
|
+
Name of the ML model.
|
|
67
|
+
description : str, default=None
|
|
68
|
+
A description of the environment.
|
|
69
|
+
workspace : str, default=None
|
|
70
|
+
The Fabric workspace name.
|
|
71
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
72
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
(workspace, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
76
|
+
|
|
77
|
+
request_body = {"displayName": name}
|
|
78
|
+
|
|
79
|
+
if description:
|
|
80
|
+
request_body["description"] = description
|
|
81
|
+
|
|
82
|
+
client = fabric.FabricRestClient()
|
|
83
|
+
response = client.post(f"/v1/workspaces/{workspace_id}/mlModels", json=request_body)
|
|
84
|
+
|
|
85
|
+
lro(client, response, status_codes=[201, 202])
|
|
86
|
+
|
|
87
|
+
print(
|
|
88
|
+
f"{icons.green_dot} The '{name}' ML model has been created within the '{workspace}' workspace."
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def delete_ml_model(name: str, workspace: Optional[str] = None):
|
|
93
|
+
"""
|
|
94
|
+
Deletes a Fabric ML model.
|
|
95
|
+
|
|
96
|
+
Parameters
|
|
97
|
+
----------
|
|
98
|
+
name: str
|
|
99
|
+
Name of the ML model.
|
|
100
|
+
workspace : str, default=None
|
|
101
|
+
The Fabric workspace name.
|
|
102
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
103
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
104
|
+
"""
|
|
105
|
+
|
|
106
|
+
(workspace, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
107
|
+
|
|
108
|
+
item_id = fabric.resolve_item_id(
|
|
109
|
+
item_name=name, type="MLModel", workspace=workspace
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
client = fabric.FabricRestClient()
|
|
113
|
+
response = client.delete(f"/v1/workspaces/{workspace_id}/mlModels/{item_id}")
|
|
114
|
+
|
|
115
|
+
if response.status_code != 200:
|
|
116
|
+
raise FabricHTTPException(response)
|
|
117
|
+
|
|
118
|
+
print(
|
|
119
|
+
f"{icons.green_dot} The '{name}' ML model within the '{workspace}' workspace has been deleted."
|
|
120
|
+
)
|
sempy_labs/_model_auto_build.py
CHANGED
|
@@ -34,10 +34,6 @@ def model_auto_build(
|
|
|
34
34
|
The Fabric workspace used by the lakehouse.
|
|
35
35
|
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
36
36
|
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
37
|
-
|
|
38
|
-
Returns
|
|
39
|
-
-------
|
|
40
|
-
|
|
41
37
|
"""
|
|
42
38
|
|
|
43
39
|
workspace = fabric.resolve_workspace_name(workspace)
|
sempy_labs/_model_bpa.py
CHANGED
|
@@ -3,7 +3,6 @@ import pandas as pd
|
|
|
3
3
|
import warnings
|
|
4
4
|
import datetime
|
|
5
5
|
from IPython.display import display, HTML
|
|
6
|
-
from pyspark.sql import SparkSession
|
|
7
6
|
from sempy_labs._model_dependencies import get_model_calc_dependencies
|
|
8
7
|
from sempy_labs._helper_functions import (
|
|
9
8
|
format_dax_object_name,
|
|
@@ -13,6 +12,7 @@ from sempy_labs._helper_functions import (
|
|
|
13
12
|
resolve_workspace_capacity,
|
|
14
13
|
resolve_dataset_id,
|
|
15
14
|
get_language_codes,
|
|
15
|
+
get_max_run_id,
|
|
16
16
|
)
|
|
17
17
|
from sempy_labs.lakehouse import get_lakehouse_tables, lakehouse_attached
|
|
18
18
|
from sempy_labs.tom import connect_semantic_model
|
|
@@ -30,9 +30,9 @@ def run_model_bpa(
|
|
|
30
30
|
dataset: str,
|
|
31
31
|
rules: Optional[pd.DataFrame] = None,
|
|
32
32
|
workspace: Optional[str] = None,
|
|
33
|
-
export:
|
|
34
|
-
return_dataframe:
|
|
35
|
-
extended:
|
|
33
|
+
export: bool = False,
|
|
34
|
+
return_dataframe: bool = False,
|
|
35
|
+
extended: bool = False,
|
|
36
36
|
language: Optional[str] = None,
|
|
37
37
|
**kwargs,
|
|
38
38
|
):
|
|
@@ -151,6 +151,7 @@ def run_model_bpa(
|
|
|
151
151
|
def translate_using_spark(rule_file):
|
|
152
152
|
|
|
153
153
|
from synapse.ml.services import Translate
|
|
154
|
+
from pyspark.sql import SparkSession
|
|
154
155
|
|
|
155
156
|
rules_temp = rule_file.copy()
|
|
156
157
|
rules_temp = rules_temp.drop(["Expression", "URL", "Severity"], axis=1)
|
|
@@ -346,15 +347,11 @@ def run_model_bpa(
|
|
|
346
347
|
|
|
347
348
|
dfExport["Severity"].replace(icons.severity_mapping, inplace=True)
|
|
348
349
|
|
|
349
|
-
spark = SparkSession.builder.getOrCreate()
|
|
350
|
-
query = f"SELECT MAX(RunId) FROM {lakehouse}.{delta_table_name}"
|
|
351
|
-
|
|
352
350
|
if len(lakeT_filt) == 0:
|
|
353
351
|
runId = 1
|
|
354
352
|
else:
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
runId = maxRunId + 1
|
|
353
|
+
max_run_id = get_max_run_id(table_name=delta_table_name)
|
|
354
|
+
runId = max_run_id + 1
|
|
358
355
|
|
|
359
356
|
now = datetime.datetime.now()
|
|
360
357
|
dfD = fabric.list_datasets(workspace=workspace, mode="rest")
|
|
@@ -514,4 +511,5 @@ def run_model_bpa(
|
|
|
514
511
|
tab_html += "</div>"
|
|
515
512
|
|
|
516
513
|
# Display the tabs, tab contents, and run the script
|
|
517
|
-
|
|
514
|
+
if not export:
|
|
515
|
+
return display(HTML(styles + tab_html + content_html + script))
|
sempy_labs/_model_bpa_bulk.py
CHANGED
|
@@ -1,14 +1,17 @@
|
|
|
1
1
|
import sempy.fabric as fabric
|
|
2
2
|
import pandas as pd
|
|
3
3
|
import datetime
|
|
4
|
-
from pyspark.sql import SparkSession
|
|
5
4
|
from sempy_labs._helper_functions import (
|
|
6
5
|
resolve_lakehouse_name,
|
|
7
6
|
save_as_delta_table,
|
|
8
7
|
resolve_workspace_capacity,
|
|
9
8
|
retry,
|
|
9
|
+
get_max_run_id,
|
|
10
|
+
)
|
|
11
|
+
from sempy_labs.lakehouse import (
|
|
12
|
+
get_lakehouse_tables,
|
|
13
|
+
lakehouse_attached,
|
|
10
14
|
)
|
|
11
|
-
from sempy_labs.lakehouse import get_lakehouse_tables, lakehouse_attached
|
|
12
15
|
from sempy_labs._model_bpa import run_model_bpa
|
|
13
16
|
from typing import Optional, List
|
|
14
17
|
from sempy._utils._log import log
|
|
@@ -18,7 +21,7 @@ import sempy_labs._icons as icons
|
|
|
18
21
|
@log
|
|
19
22
|
def run_model_bpa_bulk(
|
|
20
23
|
rules: Optional[pd.DataFrame] = None,
|
|
21
|
-
extended:
|
|
24
|
+
extended: bool = False,
|
|
22
25
|
language: Optional[str] = None,
|
|
23
26
|
workspace: Optional[str | List[str]] = None,
|
|
24
27
|
skip_models: Optional[str | List[str]] = ["ModelBPA", "Fabric Capacity Metrics"],
|
|
@@ -78,7 +81,6 @@ def run_model_bpa_bulk(
|
|
|
78
81
|
]
|
|
79
82
|
now = datetime.datetime.now()
|
|
80
83
|
output_table = "modelbparesults"
|
|
81
|
-
spark = SparkSession.builder.getOrCreate()
|
|
82
84
|
lakehouse_workspace = fabric.resolve_workspace_name()
|
|
83
85
|
lakehouse_id = fabric.get_lakehouse_id()
|
|
84
86
|
lakehouse = resolve_lakehouse_name(
|
|
@@ -90,9 +92,8 @@ def run_model_bpa_bulk(
|
|
|
90
92
|
if len(lakeT_filt) == 0:
|
|
91
93
|
runId = 1
|
|
92
94
|
else:
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
runId = maxRunId + 1
|
|
95
|
+
max_run_id = get_max_run_id(table_name=output_table)
|
|
96
|
+
runId = max_run_id + 1
|
|
96
97
|
|
|
97
98
|
if isinstance(workspace, str):
|
|
98
99
|
workspace = [workspace]
|
|
@@ -74,7 +74,7 @@ def get_measure_dependencies(
|
|
|
74
74
|
|
|
75
75
|
for index, dependency in dep_filt.iterrows():
|
|
76
76
|
d = True
|
|
77
|
-
if dependency[5] == "Measure":
|
|
77
|
+
if dependency.iloc[5] == "Measure":
|
|
78
78
|
d = False
|
|
79
79
|
df = pd.concat(
|
|
80
80
|
[
|
|
@@ -85,12 +85,14 @@ def get_measure_dependencies(
|
|
|
85
85
|
"Table Name": r["Table Name"],
|
|
86
86
|
"Object Name": r["Object Name"],
|
|
87
87
|
"Object Type": r["Object Type"],
|
|
88
|
-
"Referenced Object": dependency[4],
|
|
89
|
-
"Referenced Table": dependency[3],
|
|
90
|
-
"Referenced Object Type": dependency[
|
|
88
|
+
"Referenced Object": dependency.iloc[4],
|
|
89
|
+
"Referenced Table": dependency.iloc[3],
|
|
90
|
+
"Referenced Object Type": dependency.iloc[
|
|
91
|
+
5
|
|
92
|
+
],
|
|
91
93
|
"Done": d,
|
|
92
94
|
"Full Object Name": r["Full Object Name"],
|
|
93
|
-
"Referenced Full Object Name": dependency[
|
|
95
|
+
"Referenced Full Object Name": dependency.iloc[
|
|
94
96
|
7
|
|
95
97
|
],
|
|
96
98
|
"Parent Node": rObj,
|
|
@@ -110,12 +112,14 @@ def get_measure_dependencies(
|
|
|
110
112
|
"Table Name": r["Table Name"],
|
|
111
113
|
"Object Name": r["Object Name"],
|
|
112
114
|
"Object Type": r["Object Type"],
|
|
113
|
-
"Referenced Object": dependency[4],
|
|
114
|
-
"Referenced Table": dependency[3],
|
|
115
|
-
"Referenced Object Type": dependency[
|
|
115
|
+
"Referenced Object": dependency.iloc[4],
|
|
116
|
+
"Referenced Table": dependency.iloc[3],
|
|
117
|
+
"Referenced Object Type": dependency.iloc[
|
|
118
|
+
5
|
|
119
|
+
],
|
|
116
120
|
"Done": d,
|
|
117
121
|
"Full Object Name": r["Full Object Name"],
|
|
118
|
-
"Referenced Full Object Name": dependency[
|
|
122
|
+
"Referenced Full Object Name": dependency.iloc[
|
|
119
123
|
7
|
|
120
124
|
],
|
|
121
125
|
"Parent Node": rObj,
|
|
@@ -203,7 +207,7 @@ def get_model_calc_dependencies(
|
|
|
203
207
|
|
|
204
208
|
for index, dependency in dep_filt.iterrows():
|
|
205
209
|
d = True
|
|
206
|
-
if dependency[5] in objs:
|
|
210
|
+
if dependency.iloc[5] in objs:
|
|
207
211
|
d = False
|
|
208
212
|
df = pd.concat(
|
|
209
213
|
[
|
|
@@ -214,12 +218,14 @@ def get_model_calc_dependencies(
|
|
|
214
218
|
"Table Name": r["Table Name"],
|
|
215
219
|
"Object Name": r["Object Name"],
|
|
216
220
|
"Object Type": r["Object Type"],
|
|
217
|
-
"Referenced Object": dependency[4],
|
|
218
|
-
"Referenced Table": dependency[3],
|
|
219
|
-
"Referenced Object Type": dependency[
|
|
221
|
+
"Referenced Object": dependency.iloc[4],
|
|
222
|
+
"Referenced Table": dependency.iloc[3],
|
|
223
|
+
"Referenced Object Type": dependency.iloc[
|
|
224
|
+
5
|
|
225
|
+
],
|
|
220
226
|
"Done": d,
|
|
221
227
|
"Full Object Name": r["Full Object Name"],
|
|
222
|
-
"Referenced Full Object Name": dependency[
|
|
228
|
+
"Referenced Full Object Name": dependency.iloc[
|
|
223
229
|
7
|
|
224
230
|
],
|
|
225
231
|
"Parent Node": rObj,
|
|
@@ -239,12 +245,14 @@ def get_model_calc_dependencies(
|
|
|
239
245
|
"Table Name": r["Table Name"],
|
|
240
246
|
"Object Name": r["Object Name"],
|
|
241
247
|
"Object Type": r["Object Type"],
|
|
242
|
-
"Referenced Object": dependency[5],
|
|
243
|
-
"Referenced Table": dependency[4],
|
|
244
|
-
"Referenced Object Type": dependency[
|
|
248
|
+
"Referenced Object": dependency.iloc[5],
|
|
249
|
+
"Referenced Table": dependency.iloc[4],
|
|
250
|
+
"Referenced Object Type": dependency.iloc[
|
|
251
|
+
6
|
|
252
|
+
],
|
|
245
253
|
"Done": d,
|
|
246
254
|
"Full Object Name": r["Full Object Name"],
|
|
247
|
-
"Referenced Full Object Name": dependency[
|
|
255
|
+
"Referenced Full Object Name": dependency.iloc[
|
|
248
256
|
7
|
|
249
257
|
],
|
|
250
258
|
"Parent Node": rObj,
|
sempy_labs/_notebooks.py
CHANGED
|
@@ -8,13 +8,14 @@ from sempy_labs._helper_functions import (
|
|
|
8
8
|
resolve_workspace_name_and_id,
|
|
9
9
|
lro,
|
|
10
10
|
_decode_b64,
|
|
11
|
+
resolve_notebook_id,
|
|
11
12
|
)
|
|
12
13
|
from sempy.fabric.exceptions import FabricHTTPException
|
|
13
14
|
|
|
14
15
|
|
|
15
16
|
def get_notebook_definition(
|
|
16
|
-
notebook_name: str, workspace: Optional[str] = None, decode:
|
|
17
|
-
):
|
|
17
|
+
notebook_name: str, workspace: Optional[str] = None, decode: bool = True
|
|
18
|
+
) -> str:
|
|
18
19
|
"""
|
|
19
20
|
Obtains the notebook definition.
|
|
20
21
|
|
|
@@ -32,21 +33,12 @@ def get_notebook_definition(
|
|
|
32
33
|
|
|
33
34
|
Returns
|
|
34
35
|
-------
|
|
35
|
-
|
|
36
|
+
str
|
|
36
37
|
The notebook definition.
|
|
37
38
|
"""
|
|
38
39
|
|
|
39
40
|
(workspace, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
40
|
-
|
|
41
|
-
dfI = fabric.list_items(workspace=workspace, type="Notebook")
|
|
42
|
-
dfI_filt = dfI[dfI["Display Name"] == notebook_name]
|
|
43
|
-
|
|
44
|
-
if len(dfI_filt) == 0:
|
|
45
|
-
raise ValueError(
|
|
46
|
-
f"{icons.red_dot} The '{notebook_name}' notebook does not exist within the '{workspace}' workspace."
|
|
47
|
-
)
|
|
48
|
-
|
|
49
|
-
notebook_id = dfI_filt["Id"].iloc[0]
|
|
41
|
+
notebook_id = resolve_notebook_id(notebook=notebook_name, workspace=workspace)
|
|
50
42
|
client = fabric.FabricRestClient()
|
|
51
43
|
response = client.post(
|
|
52
44
|
f"v1/workspaces/{workspace_id}/notebooks/{notebook_id}/getDefinition",
|
|
@@ -90,9 +82,6 @@ def import_notebook_from_web(
|
|
|
90
82
|
The name of the workspace.
|
|
91
83
|
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
92
84
|
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
93
|
-
|
|
94
|
-
Returns
|
|
95
|
-
-------
|
|
96
85
|
"""
|
|
97
86
|
|
|
98
87
|
(workspace, workspace_id) = resolve_workspace_name_and_id(workspace)
|
sempy_labs/_query_scale_out.py
CHANGED
|
@@ -181,8 +181,8 @@ def disable_qso(dataset: str, workspace: Optional[str] = None) -> pd.DataFrame:
|
|
|
181
181
|
|
|
182
182
|
def set_qso(
|
|
183
183
|
dataset: str,
|
|
184
|
-
auto_sync:
|
|
185
|
-
max_read_only_replicas:
|
|
184
|
+
auto_sync: bool = True,
|
|
185
|
+
max_read_only_replicas: int = -1,
|
|
186
186
|
workspace: Optional[str] = None,
|
|
187
187
|
) -> pd.DataFrame:
|
|
188
188
|
"""
|
|
@@ -13,10 +13,10 @@ def refresh_semantic_model(
|
|
|
13
13
|
dataset: str,
|
|
14
14
|
tables: Optional[Union[str, List[str]]] = None,
|
|
15
15
|
partitions: Optional[Union[str, List[str]]] = None,
|
|
16
|
-
refresh_type:
|
|
17
|
-
retry_count:
|
|
18
|
-
apply_refresh_policy:
|
|
19
|
-
max_parallelism:
|
|
16
|
+
refresh_type: str = "full",
|
|
17
|
+
retry_count: int = 0,
|
|
18
|
+
apply_refresh_policy: bool = True,
|
|
19
|
+
max_parallelism: int = 10,
|
|
20
20
|
workspace: Optional[str] = None,
|
|
21
21
|
):
|
|
22
22
|
"""
|
|
@@ -30,7 +30,7 @@ def refresh_semantic_model(
|
|
|
30
30
|
A string or a list of tables to refresh.
|
|
31
31
|
partitions: str, List[str], default=None
|
|
32
32
|
A string or a list of partitions to refresh. Partitions must be formatted as such: 'Table Name'[Partition Name].
|
|
33
|
-
refresh_type : str, default=
|
|
33
|
+
refresh_type : str, default="full"
|
|
34
34
|
The type of processing to perform. Types align with the TMSL refresh command types: full, clearValues, calculate, dataOnly, automatic, and defragment. The add type isn't supported. Defaults to "full".
|
|
35
35
|
retry_count : int, default=0
|
|
36
36
|
Number of times the operation retries before failing.
|
|
@@ -48,9 +48,6 @@ def refresh_semantic_model(
|
|
|
48
48
|
|
|
49
49
|
workspace = fabric.resolve_workspace_name(workspace)
|
|
50
50
|
|
|
51
|
-
if refresh_type is None:
|
|
52
|
-
refresh_type = "full"
|
|
53
|
-
|
|
54
51
|
if isinstance(tables, str):
|
|
55
52
|
tables = [tables]
|
|
56
53
|
if isinstance(partitions, str):
|
|
@@ -74,18 +71,9 @@ def refresh_semantic_model(
|
|
|
74
71
|
refresh_type.lower().replace("only", "Only").replace("values", "Values")
|
|
75
72
|
)
|
|
76
73
|
|
|
77
|
-
|
|
78
|
-
"full",
|
|
79
|
-
"automatic",
|
|
80
|
-
"dataOnly",
|
|
81
|
-
"calculate",
|
|
82
|
-
"clearValues",
|
|
83
|
-
"defragment",
|
|
84
|
-
]
|
|
85
|
-
|
|
86
|
-
if refresh_type not in refreshTypes:
|
|
74
|
+
if refresh_type not in icons.refreshTypes:
|
|
87
75
|
raise ValueError(
|
|
88
|
-
f"{icons.red_dot} Invalid refresh type. Refresh type must be one of these values: {refreshTypes}."
|
|
76
|
+
f"{icons.red_dot} Invalid refresh type. Refresh type must be one of these values: {icons.refreshTypes}."
|
|
89
77
|
)
|
|
90
78
|
|
|
91
79
|
if len(objects) == 0:
|
sempy_labs/_spark.py
CHANGED
|
@@ -91,9 +91,9 @@ def create_custom_pool(
|
|
|
91
91
|
max_node_count: int,
|
|
92
92
|
min_executors: int,
|
|
93
93
|
max_executors: int,
|
|
94
|
-
node_family:
|
|
95
|
-
auto_scale_enabled:
|
|
96
|
-
dynamic_executor_allocation_enabled:
|
|
94
|
+
node_family: str = "MemoryOptimized",
|
|
95
|
+
auto_scale_enabled: bool = True,
|
|
96
|
+
dynamic_executor_allocation_enabled: bool = True,
|
|
97
97
|
workspace: Optional[str] = None,
|
|
98
98
|
):
|
|
99
99
|
"""
|
|
@@ -108,11 +108,11 @@ def create_custom_pool(
|
|
|
108
108
|
min_node_count : int
|
|
109
109
|
The `minimum node count <https://learn.microsoft.com/rest/api/fabric/spark/custom-pools/create-workspace-custom-pool?tabs=HTTP#autoscaleproperties>`_.
|
|
110
110
|
max_node_count : int
|
|
111
|
-
The
|
|
111
|
+
The maximum node count.
|
|
112
112
|
min_executors : int
|
|
113
113
|
The `minimum executors <https://learn.microsoft.com/rest/api/fabric/spark/custom-pools/create-workspace-custom-pool?tabs=HTTP#dynamicexecutorallocationproperties>`_.
|
|
114
114
|
max_executors : int
|
|
115
|
-
The
|
|
115
|
+
The maximum executors.
|
|
116
116
|
node_family : str, default='MemoryOptimized'
|
|
117
117
|
The `node family <https://learn.microsoft.com/rest/api/fabric/spark/custom-pools/create-workspace-custom-pool?tabs=HTTP#nodefamily>`_.
|
|
118
118
|
auto_scale_enabled : bool, default=True
|
|
@@ -182,13 +182,13 @@ def update_custom_pool(
|
|
|
182
182
|
The `minimum node count <https://learn.microsoft.com/rest/api/fabric/spark/custom-pools/create-workspace-custom-pool?tabs=HTTP#autoscaleproperties>`_.
|
|
183
183
|
Defaults to None which keeps the existing property setting.
|
|
184
184
|
max_node_count : int, default=None
|
|
185
|
-
The
|
|
185
|
+
The maximum node count.
|
|
186
186
|
Defaults to None which keeps the existing property setting.
|
|
187
187
|
min_executors : int, default=None
|
|
188
188
|
The `minimum executors <https://learn.microsoft.com/rest/api/fabric/spark/custom-pools/create-workspace-custom-pool?tabs=HTTP#dynamicexecutorallocationproperties>`_.
|
|
189
189
|
Defaults to None which keeps the existing property setting.
|
|
190
190
|
max_executors : int, default=None
|
|
191
|
-
The
|
|
191
|
+
The maximum executors.
|
|
192
192
|
Defaults to None which keeps the existing property setting.
|
|
193
193
|
node_family : str, default=None
|
|
194
194
|
The `node family <https://learn.microsoft.com/rest/api/fabric/spark/custom-pools/create-workspace-custom-pool?tabs=HTTP#nodefamily>`_.
|
|
@@ -299,7 +299,7 @@ def delete_custom_pool(pool_name: str, workspace: Optional[str] = None):
|
|
|
299
299
|
|
|
300
300
|
|
|
301
301
|
def get_spark_settings(
|
|
302
|
-
workspace: Optional[str] = None, return_dataframe:
|
|
302
|
+
workspace: Optional[str] = None, return_dataframe: bool = True
|
|
303
303
|
) -> pd.DataFrame | dict:
|
|
304
304
|
"""
|
|
305
305
|
Shows the spark settings for a workspace.
|
|
@@ -407,10 +407,10 @@ def update_spark_settings(
|
|
|
407
407
|
`Default pool <https://learn.microsoft.com/rest/api/fabric/spark/workspace-settings/update-spark-settings?tabs=HTTP#poolproperties>`_ for workspace.
|
|
408
408
|
Defaults to None which keeps the existing property setting.
|
|
409
409
|
max_node_count : int, default=None
|
|
410
|
-
The
|
|
410
|
+
The maximum node count.
|
|
411
411
|
Defaults to None which keeps the existing property setting.
|
|
412
412
|
max_executors : int, default=None
|
|
413
|
-
The
|
|
413
|
+
The maximum executors.
|
|
414
414
|
Defaults to None which keeps the existing property setting.
|
|
415
415
|
environment_name : str, default=None
|
|
416
416
|
The name of the `default environment <https://learn.microsoft.com/rest/api/fabric/spark/workspace-settings/update-spark-settings?tabs=HTTP#environmentproperties>`_. Empty string indicated there is no workspace default environment
|
sempy_labs/_vertipaq.py
CHANGED
|
@@ -13,6 +13,7 @@ from sempy_labs._helper_functions import (
|
|
|
13
13
|
resolve_dataset_id,
|
|
14
14
|
save_as_delta_table,
|
|
15
15
|
resolve_workspace_capacity,
|
|
16
|
+
get_max_run_id,
|
|
16
17
|
)
|
|
17
18
|
from sempy_labs._list_functions import list_relationships, list_tables
|
|
18
19
|
from sempy_labs.lakehouse import lakehouse_attached, get_lakehouse_tables
|
|
@@ -27,7 +28,7 @@ def vertipaq_analyzer(
|
|
|
27
28
|
dataset: str,
|
|
28
29
|
workspace: Optional[str] = None,
|
|
29
30
|
export: Optional[str] = None,
|
|
30
|
-
read_stats_from_data:
|
|
31
|
+
read_stats_from_data: bool = False,
|
|
31
32
|
**kwargs,
|
|
32
33
|
):
|
|
33
34
|
"""
|
|
@@ -336,10 +337,10 @@ def vertipaq_analyzer(
|
|
|
336
337
|
int_cols.append(k)
|
|
337
338
|
elif v in ["float", "double"] and k != "Temperature":
|
|
338
339
|
pct_cols.append(k)
|
|
339
|
-
colSize[int_cols] = colSize[int_cols].
|
|
340
|
-
temp[int_cols] = temp[int_cols].
|
|
341
|
-
colSize[pct_cols] = colSize[pct_cols].
|
|
342
|
-
temp[pct_cols] = temp[pct_cols].
|
|
340
|
+
colSize[int_cols] = colSize[int_cols].map("{:,}".format)
|
|
341
|
+
temp[int_cols] = temp[int_cols].map("{:,}".format)
|
|
342
|
+
colSize[pct_cols] = colSize[pct_cols].map("{:.2f}%".format)
|
|
343
|
+
temp[pct_cols] = temp[pct_cols].map("{:.2f}%".format)
|
|
343
344
|
|
|
344
345
|
# Tables
|
|
345
346
|
int_cols = []
|
|
@@ -351,8 +352,8 @@ def vertipaq_analyzer(
|
|
|
351
352
|
pct_cols.append(k)
|
|
352
353
|
export_Table = dfT.copy()
|
|
353
354
|
|
|
354
|
-
dfT[int_cols] = dfT[int_cols].
|
|
355
|
-
dfT[pct_cols] = dfT[pct_cols].
|
|
355
|
+
dfT[int_cols] = dfT[int_cols].map("{:,}".format)
|
|
356
|
+
dfT[pct_cols] = dfT[pct_cols].map("{:.2f}%".format)
|
|
356
357
|
|
|
357
358
|
# Relationships
|
|
358
359
|
dfR = pd.merge(
|
|
@@ -391,7 +392,7 @@ def vertipaq_analyzer(
|
|
|
391
392
|
int_cols.append(k)
|
|
392
393
|
if not read_stats_from_data:
|
|
393
394
|
int_cols.remove("Missing Rows")
|
|
394
|
-
dfR[int_cols] = dfR[int_cols].
|
|
395
|
+
dfR[int_cols] = dfR[int_cols].map("{:,}".format)
|
|
395
396
|
|
|
396
397
|
# Partitions
|
|
397
398
|
dfP = dfP[
|
|
@@ -414,7 +415,7 @@ def vertipaq_analyzer(
|
|
|
414
415
|
if v in ["int", "long", "double", "float"]:
|
|
415
416
|
int_cols.append(k)
|
|
416
417
|
intList = ["Record Count", "Segment Count", "Records per Segment"]
|
|
417
|
-
dfP[intList] = dfP[intList].
|
|
418
|
+
dfP[intList] = dfP[intList].map("{:,}".format)
|
|
418
419
|
|
|
419
420
|
# Hierarchies
|
|
420
421
|
dfH_filt = dfH[dfH["Level Ordinal"] == 0]
|
|
@@ -426,7 +427,7 @@ def vertipaq_analyzer(
|
|
|
426
427
|
dfH_filt["Used Size"] = dfH_filt["Used Size"].astype(int)
|
|
427
428
|
export_Hier = dfH_filt.copy()
|
|
428
429
|
intList = ["Used Size"]
|
|
429
|
-
dfH_filt[intList] = dfH_filt[intList].
|
|
430
|
+
dfH_filt[intList] = dfH_filt[intList].map("{:,}".format)
|
|
430
431
|
|
|
431
432
|
# Model
|
|
432
433
|
# Converting to KB/MB/GB necessitates division by 1024 * 1000.
|
|
@@ -456,7 +457,7 @@ def vertipaq_analyzer(
|
|
|
456
457
|
for k, v in vertipaq_map["Model"].items():
|
|
457
458
|
if v in ["long", "int"] and k != "Compatibility Level":
|
|
458
459
|
int_cols.append(k)
|
|
459
|
-
dfModel[int_cols] = dfModel[int_cols].
|
|
460
|
+
dfModel[int_cols] = dfModel[int_cols].map("{:,}".format)
|
|
460
461
|
|
|
461
462
|
dataFrames = {
|
|
462
463
|
"dfModel": dfModel,
|
|
@@ -483,26 +484,23 @@ def vertipaq_analyzer(
|
|
|
483
484
|
)
|
|
484
485
|
|
|
485
486
|
if export == "table":
|
|
486
|
-
spark = SparkSession.builder.getOrCreate()
|
|
487
|
+
# spark = SparkSession.builder.getOrCreate()
|
|
487
488
|
|
|
488
489
|
lakehouse_id = fabric.get_lakehouse_id()
|
|
489
490
|
lake_workspace = fabric.resolve_workspace_name()
|
|
490
491
|
lakehouse = resolve_lakehouse_name(
|
|
491
492
|
lakehouse_id=lakehouse_id, workspace=lake_workspace
|
|
492
493
|
)
|
|
493
|
-
lakeTName = "
|
|
494
|
+
lakeTName = "vertipaqanalyzer_model"
|
|
494
495
|
|
|
495
496
|
lakeT = get_lakehouse_tables(lakehouse=lakehouse, workspace=lake_workspace)
|
|
496
497
|
lakeT_filt = lakeT[lakeT["Table Name"] == lakeTName]
|
|
497
498
|
|
|
498
|
-
query = f"SELECT MAX(RunId) FROM {lakehouse}.{lakeTName}"
|
|
499
|
-
|
|
500
499
|
if len(lakeT_filt) == 0:
|
|
501
500
|
runId = 1
|
|
502
501
|
else:
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
runId = maxRunId + 1
|
|
502
|
+
max_run_id = get_max_run_id(table_name=lakeTName)
|
|
503
|
+
runId = max_run_id + 1
|
|
506
504
|
|
|
507
505
|
dfMap = {
|
|
508
506
|
"Columns": ["Columns", export_Col],
|