semantic-link-labs 0.8.2__py3-none-any.whl → 0.8.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of semantic-link-labs might be problematic. Click here for more details.
- {semantic_link_labs-0.8.2.dist-info → semantic_link_labs-0.8.3.dist-info}/METADATA +7 -3
- {semantic_link_labs-0.8.2.dist-info → semantic_link_labs-0.8.3.dist-info}/RECORD +35 -34
- sempy_labs/__init__.py +14 -0
- sempy_labs/_capacities.py +89 -11
- sempy_labs/_capacity_migration.py +167 -60
- sempy_labs/_clear_cache.py +3 -3
- sempy_labs/_data_pipelines.py +48 -0
- sempy_labs/_external_data_shares.py +188 -0
- sempy_labs/_generate_semantic_model.py +0 -1
- sempy_labs/_git.py +1 -1
- sempy_labs/_helper_functions.py +14 -11
- sempy_labs/_list_functions.py +6 -3
- sempy_labs/_model_bpa.py +5 -5
- sempy_labs/_model_bpa_bulk.py +3 -5
- sempy_labs/_notebooks.py +4 -3
- sempy_labs/_sql.py +2 -2
- sempy_labs/_translations.py +14 -14
- sempy_labs/_vertipaq.py +121 -101
- sempy_labs/_warehouses.py +11 -1
- sempy_labs/admin/__init__.py +2 -0
- sempy_labs/admin/_basic_functions.py +124 -21
- sempy_labs/directlake/_directlake_schema_sync.py +0 -5
- sempy_labs/directlake/_generate_shared_expression.py +1 -1
- sempy_labs/directlake/_guardrails.py +1 -1
- sempy_labs/directlake/_show_unsupported_directlake_objects.py +1 -1
- sempy_labs/migration/_create_pqt_file.py +2 -2
- sempy_labs/report/_generate_report.py +10 -14
- sempy_labs/report/_report_bpa.py +8 -10
- sempy_labs/report/_report_functions.py +13 -19
- sempy_labs/report/_report_rebind.py +4 -1
- sempy_labs/report/_reportwrapper.py +3 -3
- sempy_labs/tom/_model.py +109 -34
- {semantic_link_labs-0.8.2.dist-info → semantic_link_labs-0.8.3.dist-info}/LICENSE +0 -0
- {semantic_link_labs-0.8.2.dist-info → semantic_link_labs-0.8.3.dist-info}/WHEEL +0 -0
- {semantic_link_labs-0.8.2.dist-info → semantic_link_labs-0.8.3.dist-info}/top_level.txt +0 -0
sempy_labs/_helper_functions.py
CHANGED
|
@@ -961,15 +961,15 @@ class FabricTokenCredential(TokenCredential):
|
|
|
961
961
|
**kwargs: any,
|
|
962
962
|
) -> AccessToken:
|
|
963
963
|
|
|
964
|
-
|
|
964
|
+
import notebookutils
|
|
965
965
|
|
|
966
|
-
token =
|
|
966
|
+
token = notebookutils.credentials.getToken(scopes)
|
|
967
967
|
access_token = AccessToken(token, 0)
|
|
968
968
|
|
|
969
969
|
return access_token
|
|
970
970
|
|
|
971
971
|
|
|
972
|
-
def
|
|
972
|
+
def _get_adls_client(account_name):
|
|
973
973
|
|
|
974
974
|
from azure.storage.filedatalake import DataLakeServiceClient
|
|
975
975
|
|
|
@@ -1017,19 +1017,22 @@ def get_language_codes(languages: str | List[str]):
|
|
|
1017
1017
|
return languages
|
|
1018
1018
|
|
|
1019
1019
|
|
|
1020
|
-
def
|
|
1020
|
+
def _get_azure_token_credentials(
|
|
1021
1021
|
key_vault_uri: str,
|
|
1022
1022
|
key_vault_tenant_id: str,
|
|
1023
1023
|
key_vault_client_id: str,
|
|
1024
1024
|
key_vault_client_secret: str,
|
|
1025
|
+
audience: str = "https://management.azure.com/.default",
|
|
1025
1026
|
) -> Tuple[str, str, dict]:
|
|
1026
1027
|
|
|
1027
|
-
|
|
1028
|
+
import notebookutils
|
|
1028
1029
|
from azure.identity import ClientSecretCredential
|
|
1029
1030
|
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
|
|
1031
|
+
# "https://analysis.windows.net/powerbi/api/.default"
|
|
1032
|
+
|
|
1033
|
+
tenant_id = notebookutils.credentials.getSecret(key_vault_uri, key_vault_tenant_id)
|
|
1034
|
+
client_id = notebookutils.credentials.getSecret(key_vault_uri, key_vault_client_id)
|
|
1035
|
+
client_secret = notebookutils.credentials.getSecret(
|
|
1033
1036
|
key_vault_uri, key_vault_client_secret
|
|
1034
1037
|
)
|
|
1035
1038
|
|
|
@@ -1037,7 +1040,7 @@ def get_azure_token_credentials(
|
|
|
1037
1040
|
tenant_id=tenant_id, client_id=client_id, client_secret=client_secret
|
|
1038
1041
|
)
|
|
1039
1042
|
|
|
1040
|
-
token = credential.get_token(
|
|
1043
|
+
token = credential.get_token(audience).token
|
|
1041
1044
|
|
|
1042
1045
|
headers = {
|
|
1043
1046
|
"Authorization": f"Bearer {token}",
|
|
@@ -1077,7 +1080,7 @@ def resolve_environment_id(environment: str, workspace: Optional[str] = None) ->
|
|
|
1077
1080
|
)
|
|
1078
1081
|
|
|
1079
1082
|
|
|
1080
|
-
def
|
|
1083
|
+
def _make_clickable(val):
|
|
1081
1084
|
|
|
1082
1085
|
return f'<a target="_blank" href="{val}">{val}</a>'
|
|
1083
1086
|
|
|
@@ -1132,7 +1135,7 @@ def generate_guid():
|
|
|
1132
1135
|
return str(uuid.uuid4())
|
|
1133
1136
|
|
|
1134
1137
|
|
|
1135
|
-
def
|
|
1138
|
+
def _get_max_run_id(lakehouse: str, table_name: str) -> int:
|
|
1136
1139
|
|
|
1137
1140
|
from pyspark.sql import SparkSession
|
|
1138
1141
|
|
sempy_labs/_list_functions.py
CHANGED
|
@@ -1487,7 +1487,7 @@ def list_semantic_model_object_report_usage(
|
|
|
1487
1487
|
is sorted descending by 'Report Usage Count'.
|
|
1488
1488
|
"""
|
|
1489
1489
|
|
|
1490
|
-
from sempy_labs._model_dependencies import
|
|
1490
|
+
from sempy_labs._model_dependencies import get_model_calc_dependencies
|
|
1491
1491
|
from sempy_labs._helper_functions import format_dax_object_name
|
|
1492
1492
|
|
|
1493
1493
|
workspace = fabric.resolve_workspace_name(workspace)
|
|
@@ -1503,7 +1503,7 @@ def list_semantic_model_object_report_usage(
|
|
|
1503
1503
|
)
|
|
1504
1504
|
else:
|
|
1505
1505
|
df = pd.DataFrame(columns=["Table Name", "Object Name", "Object Type"])
|
|
1506
|
-
dep =
|
|
1506
|
+
dep = get_model_calc_dependencies(dataset=dataset, workspace=workspace)
|
|
1507
1507
|
|
|
1508
1508
|
for i, r in dfR.iterrows():
|
|
1509
1509
|
object_type = r["Object Type"]
|
|
@@ -1515,7 +1515,10 @@ def list_semantic_model_object_report_usage(
|
|
|
1515
1515
|
"Object Type": object_type,
|
|
1516
1516
|
}
|
|
1517
1517
|
df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)
|
|
1518
|
-
|
|
1518
|
+
df["Referenced Object Type"] = df["Referenced Object Type"].replace(
|
|
1519
|
+
"Attribute Hierarchy", "Column"
|
|
1520
|
+
)
|
|
1521
|
+
if object_type in ["Measure", "Calc Column", "Calc Table", "Hierarchy"]:
|
|
1519
1522
|
df_filt = dep[dep["Object Name"] == object_name][
|
|
1520
1523
|
["Referenced Table", "Referenced Object", "Referenced Object Type"]
|
|
1521
1524
|
]
|
sempy_labs/_model_bpa.py
CHANGED
|
@@ -12,7 +12,7 @@ from sempy_labs._helper_functions import (
|
|
|
12
12
|
resolve_workspace_capacity,
|
|
13
13
|
resolve_dataset_id,
|
|
14
14
|
get_language_codes,
|
|
15
|
-
|
|
15
|
+
_get_max_run_id,
|
|
16
16
|
)
|
|
17
17
|
from sempy_labs.lakehouse import get_lakehouse_tables, lakehouse_attached
|
|
18
18
|
from sempy_labs.tom import connect_semantic_model
|
|
@@ -220,9 +220,9 @@ def run_model_bpa(
|
|
|
220
220
|
|
|
221
221
|
rules = translate_using_spark(rules)
|
|
222
222
|
|
|
223
|
-
rules["Severity"]
|
|
224
|
-
rules["Severity"]
|
|
225
|
-
rules["Severity"]
|
|
223
|
+
rules.loc[rules["Severity"] == "Warning", "Severity"] = icons.warning
|
|
224
|
+
rules.loc[rules["Severity"] == "Error", "Severity"] = icons.error
|
|
225
|
+
rules.loc[rules["Severity"] == "Info", "Severity"] = icons.info
|
|
226
226
|
|
|
227
227
|
pd.set_option("display.max_colwidth", 1000)
|
|
228
228
|
|
|
@@ -350,7 +350,7 @@ def run_model_bpa(
|
|
|
350
350
|
if len(lakeT_filt) == 0:
|
|
351
351
|
runId = 1
|
|
352
352
|
else:
|
|
353
|
-
max_run_id =
|
|
353
|
+
max_run_id = _get_max_run_id(
|
|
354
354
|
lakehouse=lakehouse, table_name=delta_table_name
|
|
355
355
|
)
|
|
356
356
|
runId = max_run_id + 1
|
sempy_labs/_model_bpa_bulk.py
CHANGED
|
@@ -6,7 +6,7 @@ from sempy_labs._helper_functions import (
|
|
|
6
6
|
save_as_delta_table,
|
|
7
7
|
resolve_workspace_capacity,
|
|
8
8
|
retry,
|
|
9
|
-
|
|
9
|
+
_get_max_run_id,
|
|
10
10
|
)
|
|
11
11
|
from sempy_labs.lakehouse import (
|
|
12
12
|
get_lakehouse_tables,
|
|
@@ -49,8 +49,6 @@ def run_model_bpa_bulk(
|
|
|
49
49
|
The semantic models to always skip when running this analysis.
|
|
50
50
|
"""
|
|
51
51
|
|
|
52
|
-
import pyspark.sql.functions as F
|
|
53
|
-
|
|
54
52
|
if not lakehouse_attached():
|
|
55
53
|
raise ValueError(
|
|
56
54
|
f"{icons.red_dot} No lakehouse is attached to this notebook. Must attach a lakehouse to the notebook."
|
|
@@ -92,7 +90,7 @@ def run_model_bpa_bulk(
|
|
|
92
90
|
if len(lakeT_filt) == 0:
|
|
93
91
|
runId = 1
|
|
94
92
|
else:
|
|
95
|
-
max_run_id =
|
|
93
|
+
max_run_id = _get_max_run_id(lakehouse=lakehouse, table_name=output_table)
|
|
96
94
|
runId = max_run_id + 1
|
|
97
95
|
|
|
98
96
|
if isinstance(workspace, str):
|
|
@@ -162,7 +160,7 @@ def run_model_bpa_bulk(
|
|
|
162
160
|
)
|
|
163
161
|
print(e)
|
|
164
162
|
|
|
165
|
-
df["Severity"].replace(icons.severity_mapping
|
|
163
|
+
df["Severity"].replace(icons.severity_mapping)
|
|
166
164
|
|
|
167
165
|
# Append save results individually for each workspace (so as not to create a giant dataframe)
|
|
168
166
|
print(
|
sempy_labs/_notebooks.py
CHANGED
|
@@ -8,7 +8,6 @@ from sempy_labs._helper_functions import (
|
|
|
8
8
|
resolve_workspace_name_and_id,
|
|
9
9
|
lro,
|
|
10
10
|
_decode_b64,
|
|
11
|
-
resolve_notebook_id,
|
|
12
11
|
)
|
|
13
12
|
from sempy.fabric.exceptions import FabricHTTPException
|
|
14
13
|
|
|
@@ -38,10 +37,12 @@ def get_notebook_definition(
|
|
|
38
37
|
"""
|
|
39
38
|
|
|
40
39
|
(workspace, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
41
|
-
|
|
40
|
+
item_id = fabric.resolve_item_id(
|
|
41
|
+
item_name=notebook_name, type="Notebook", workspace=workspace
|
|
42
|
+
)
|
|
42
43
|
client = fabric.FabricRestClient()
|
|
43
44
|
response = client.post(
|
|
44
|
-
f"v1/workspaces/{workspace_id}/notebooks/{
|
|
45
|
+
f"v1/workspaces/{workspace_id}/notebooks/{item_id}/getDefinition",
|
|
45
46
|
)
|
|
46
47
|
|
|
47
48
|
result = lro(client, response).json()
|
sempy_labs/_sql.py
CHANGED
|
@@ -9,7 +9,7 @@ from sempy.fabric.exceptions import FabricHTTPException
|
|
|
9
9
|
from sempy_labs._helper_functions import resolve_warehouse_id, resolve_lakehouse_id
|
|
10
10
|
|
|
11
11
|
|
|
12
|
-
def
|
|
12
|
+
def _bytes2mswin_bstr(value: bytes) -> bytes:
|
|
13
13
|
"""Convert a sequence of bytes into a (MS-Windows) BSTR (as bytes).
|
|
14
14
|
|
|
15
15
|
See https://github.com/mkleehammer/pyodbc/issues/228#issuecomment-319190980
|
|
@@ -68,7 +68,7 @@ class ConnectBase:
|
|
|
68
68
|
|
|
69
69
|
# Set up the connection string
|
|
70
70
|
access_token = SynapseTokenProvider()()
|
|
71
|
-
tokenstruct =
|
|
71
|
+
tokenstruct = _bytes2mswin_bstr(access_token.encode())
|
|
72
72
|
conn_str = f"DRIVER={{ODBC Driver 18 for SQL Server}};SERVER={tds_endpoint};DATABASE={name};Encrypt=Yes;"
|
|
73
73
|
|
|
74
74
|
if timeout is not None:
|
sempy_labs/_translations.py
CHANGED
|
@@ -40,7 +40,7 @@ def translate_semantic_model(
|
|
|
40
40
|
from pyspark.sql import SparkSession
|
|
41
41
|
from sempy_labs.tom import connect_semantic_model
|
|
42
42
|
|
|
43
|
-
def
|
|
43
|
+
def _clean_text(text, exclude_chars):
|
|
44
44
|
if exclude_chars:
|
|
45
45
|
for char in exclude_chars:
|
|
46
46
|
text = text.replace(char, " ")
|
|
@@ -60,8 +60,8 @@ def translate_semantic_model(
|
|
|
60
60
|
) as tom:
|
|
61
61
|
|
|
62
62
|
for o in tom.model.Tables:
|
|
63
|
-
oName =
|
|
64
|
-
oDescription =
|
|
63
|
+
oName = _clean_text(o.Name, exclude_characters)
|
|
64
|
+
oDescription = _clean_text(o.Description, exclude_characters)
|
|
65
65
|
new_data = {
|
|
66
66
|
"Object Type": "Table",
|
|
67
67
|
"Name": o.Name,
|
|
@@ -75,9 +75,9 @@ def translate_semantic_model(
|
|
|
75
75
|
[df_prep, pd.DataFrame(new_data, index=[0])], ignore_index=True
|
|
76
76
|
)
|
|
77
77
|
for o in tom.all_columns():
|
|
78
|
-
oName =
|
|
79
|
-
oDescription =
|
|
80
|
-
oDisplayFolder =
|
|
78
|
+
oName = _clean_text(o.Name, exclude_characters)
|
|
79
|
+
oDescription = _clean_text(o.Description, exclude_characters)
|
|
80
|
+
oDisplayFolder = _clean_text(o.DisplayFolder, exclude_characters)
|
|
81
81
|
new_data = {
|
|
82
82
|
"Object Type": "Column",
|
|
83
83
|
"Name": o.Name,
|
|
@@ -91,9 +91,9 @@ def translate_semantic_model(
|
|
|
91
91
|
[df_prep, pd.DataFrame(new_data, index=[0])], ignore_index=True
|
|
92
92
|
)
|
|
93
93
|
for o in tom.all_measures():
|
|
94
|
-
oName =
|
|
95
|
-
oDescription =
|
|
96
|
-
oDisplayFolder =
|
|
94
|
+
oName = _clean_text(o.Name, exclude_characters)
|
|
95
|
+
oDescription = _clean_text(o.Description, exclude_characters)
|
|
96
|
+
oDisplayFolder = _clean_text(o.DisplayFolder, exclude_characters)
|
|
97
97
|
new_data = {
|
|
98
98
|
"Object Type": "Measure",
|
|
99
99
|
"Name": o.Name,
|
|
@@ -107,9 +107,9 @@ def translate_semantic_model(
|
|
|
107
107
|
[df_prep, pd.DataFrame(new_data, index=[0])], ignore_index=True
|
|
108
108
|
)
|
|
109
109
|
for o in tom.all_hierarchies():
|
|
110
|
-
oName =
|
|
111
|
-
oDescription =
|
|
112
|
-
oDisplayFolder =
|
|
110
|
+
oName = _clean_text(o.Name, exclude_characters)
|
|
111
|
+
oDescription = _clean_text(o.Description, exclude_characters)
|
|
112
|
+
oDisplayFolder = _clean_text(o.DisplayFolder, exclude_characters)
|
|
113
113
|
new_data = {
|
|
114
114
|
"Object Type": "Hierarchy",
|
|
115
115
|
"Name": o.Name,
|
|
@@ -123,8 +123,8 @@ def translate_semantic_model(
|
|
|
123
123
|
[df_prep, pd.DataFrame(new_data, index=[0])], ignore_index=True
|
|
124
124
|
)
|
|
125
125
|
for o in tom.all_levels():
|
|
126
|
-
oName =
|
|
127
|
-
oDescription =
|
|
126
|
+
oName = _clean_text(o.Name, exclude_characters)
|
|
127
|
+
oDescription = _clean_text(o.Description, exclude_characters)
|
|
128
128
|
new_data = {
|
|
129
129
|
"Object Type": "Level",
|
|
130
130
|
"Name": o.Name,
|
sempy_labs/_vertipaq.py
CHANGED
|
@@ -13,7 +13,7 @@ from sempy_labs._helper_functions import (
|
|
|
13
13
|
resolve_dataset_id,
|
|
14
14
|
save_as_delta_table,
|
|
15
15
|
resolve_workspace_capacity,
|
|
16
|
-
|
|
16
|
+
_get_max_run_id,
|
|
17
17
|
)
|
|
18
18
|
from sempy_labs._list_functions import list_relationships, list_tables
|
|
19
19
|
from sempy_labs.lakehouse import lakehouse_attached, get_lakehouse_tables
|
|
@@ -74,68 +74,71 @@ def vertipaq_analyzer(
|
|
|
74
74
|
data_type_timestamp = "timestamp"
|
|
75
75
|
data_type_double = "double"
|
|
76
76
|
data_type_bool = "bool"
|
|
77
|
+
int_format = "int"
|
|
78
|
+
pct_format = "pct"
|
|
79
|
+
no_format = ""
|
|
77
80
|
|
|
78
81
|
vertipaq_map = {
|
|
79
82
|
"Model": {
|
|
80
|
-
"Dataset Name": data_type_string,
|
|
81
|
-
"Total Size": data_type_long,
|
|
82
|
-
"Table Count": data_type_long,
|
|
83
|
-
"Column Count": data_type_long,
|
|
84
|
-
"Compatibility Level": data_type_long,
|
|
85
|
-
"Default Mode": data_type_string,
|
|
83
|
+
"Dataset Name": [data_type_string, no_format],
|
|
84
|
+
"Total Size": [data_type_long, int_format],
|
|
85
|
+
"Table Count": [data_type_long, int_format],
|
|
86
|
+
"Column Count": [data_type_long, int_format],
|
|
87
|
+
"Compatibility Level": [data_type_long, no_format],
|
|
88
|
+
"Default Mode": [data_type_string, no_format],
|
|
86
89
|
},
|
|
87
90
|
"Tables": {
|
|
88
|
-
"Table Name": data_type_string,
|
|
89
|
-
"Type": data_type_string,
|
|
90
|
-
"Row Count": data_type_long,
|
|
91
|
-
"Total Size": data_type_long,
|
|
92
|
-
"Dictionary Size": data_type_long,
|
|
93
|
-
"Data Size": data_type_long,
|
|
94
|
-
"Hierarchy Size": data_type_long,
|
|
95
|
-
"Relationship Size": data_type_long,
|
|
96
|
-
"User Hierarchy Size": data_type_long,
|
|
97
|
-
"Partitions": data_type_long,
|
|
98
|
-
"Columns": data_type_long,
|
|
99
|
-
"% DB": data_type_double,
|
|
91
|
+
"Table Name": [data_type_string, no_format],
|
|
92
|
+
"Type": [data_type_string, no_format],
|
|
93
|
+
"Row Count": [data_type_long, int_format],
|
|
94
|
+
"Total Size": [data_type_long, int_format],
|
|
95
|
+
"Dictionary Size": [data_type_long, int_format],
|
|
96
|
+
"Data Size": [data_type_long, int_format],
|
|
97
|
+
"Hierarchy Size": [data_type_long, int_format],
|
|
98
|
+
"Relationship Size": [data_type_long, int_format],
|
|
99
|
+
"User Hierarchy Size": [data_type_long, int_format],
|
|
100
|
+
"Partitions": [data_type_long, int_format],
|
|
101
|
+
"Columns": [data_type_long, int_format],
|
|
102
|
+
"% DB": [data_type_double, pct_format],
|
|
100
103
|
},
|
|
101
104
|
"Partitions": {
|
|
102
|
-
"Table Name": data_type_string,
|
|
103
|
-
"Partition Name": data_type_string,
|
|
104
|
-
"Mode": data_type_string,
|
|
105
|
-
"Record Count": data_type_long,
|
|
106
|
-
"Segment Count": data_type_long,
|
|
107
|
-
"Records per Segment": data_type_double,
|
|
105
|
+
"Table Name": [data_type_string, no_format],
|
|
106
|
+
"Partition Name": [data_type_string, no_format],
|
|
107
|
+
"Mode": [data_type_string, no_format],
|
|
108
|
+
"Record Count": [data_type_long, int_format],
|
|
109
|
+
"Segment Count": [data_type_long, int_format],
|
|
110
|
+
"Records per Segment": [data_type_double, int_format],
|
|
108
111
|
},
|
|
109
112
|
"Columns": {
|
|
110
|
-
"Table Name": data_type_string,
|
|
111
|
-
"Column Name": data_type_string,
|
|
112
|
-
"Type": data_type_string,
|
|
113
|
-
"Cardinality": data_type_long,
|
|
114
|
-
"Total Size": data_type_long,
|
|
115
|
-
"Data Size": data_type_long,
|
|
116
|
-
"Dictionary Size": data_type_long,
|
|
117
|
-
"Hierarchy Size": data_type_long,
|
|
118
|
-
"% Table": data_type_double,
|
|
119
|
-
"% DB": data_type_double,
|
|
120
|
-
"Data Type": data_type_string,
|
|
121
|
-
"Encoding": data_type_string,
|
|
122
|
-
"Is Resident": data_type_bool,
|
|
123
|
-
"Temperature": data_type_double,
|
|
124
|
-
"Last Accessed": data_type_timestamp,
|
|
113
|
+
"Table Name": [data_type_string, no_format],
|
|
114
|
+
"Column Name": [data_type_string, no_format],
|
|
115
|
+
"Type": [data_type_string, no_format],
|
|
116
|
+
"Cardinality": [data_type_long, int_format],
|
|
117
|
+
"Total Size": [data_type_long, int_format],
|
|
118
|
+
"Data Size": [data_type_long, int_format],
|
|
119
|
+
"Dictionary Size": [data_type_long, int_format],
|
|
120
|
+
"Hierarchy Size": [data_type_long, int_format],
|
|
121
|
+
"% Table": [data_type_double, pct_format],
|
|
122
|
+
"% DB": [data_type_double, pct_format],
|
|
123
|
+
"Data Type": [data_type_string, no_format],
|
|
124
|
+
"Encoding": [data_type_string, no_format],
|
|
125
|
+
"Is Resident": [data_type_bool, no_format],
|
|
126
|
+
"Temperature": [data_type_double, int_format],
|
|
127
|
+
"Last Accessed": [data_type_timestamp, no_format],
|
|
125
128
|
},
|
|
126
129
|
"Hierarchies": {
|
|
127
|
-
"Table Name": data_type_string,
|
|
128
|
-
"Hierarchy Name": data_type_string,
|
|
129
|
-
"Used Size": data_type_long,
|
|
130
|
+
"Table Name": [data_type_string, no_format],
|
|
131
|
+
"Hierarchy Name": [data_type_string, no_format],
|
|
132
|
+
"Used Size": [data_type_long, int_format],
|
|
130
133
|
},
|
|
131
134
|
"Relationships": {
|
|
132
|
-
"From Object": data_type_string,
|
|
133
|
-
"To Object": data_type_string,
|
|
134
|
-
"Multiplicity": data_type_string,
|
|
135
|
-
"Used Size": data_type_long,
|
|
136
|
-
"Max From Cardinality": data_type_long,
|
|
137
|
-
"Max To Cardinality": data_type_long,
|
|
138
|
-
"Missing Rows": data_type_long,
|
|
135
|
+
"From Object": [data_type_string, no_format],
|
|
136
|
+
"To Object": [data_type_string, no_format],
|
|
137
|
+
"Multiplicity": [data_type_string, no_format],
|
|
138
|
+
"Used Size": [data_type_long, int_format],
|
|
139
|
+
"Max From Cardinality": [data_type_long, int_format],
|
|
140
|
+
"Max To Cardinality": [data_type_long, int_format],
|
|
141
|
+
"Missing Rows": [data_type_long, int_format],
|
|
139
142
|
},
|
|
140
143
|
}
|
|
141
144
|
|
|
@@ -163,7 +166,8 @@ def vertipaq_analyzer(
|
|
|
163
166
|
table_count = tom.model.Tables.Count
|
|
164
167
|
column_count = len(list(tom.all_columns()))
|
|
165
168
|
|
|
166
|
-
dfR["Missing Rows"] =
|
|
169
|
+
dfR["Missing Rows"] = 0
|
|
170
|
+
dfR["Missing Rows"] = dfR["Missing Rows"].astype(int)
|
|
167
171
|
|
|
168
172
|
# Direct Lake
|
|
169
173
|
if read_stats_from_data:
|
|
@@ -323,38 +327,16 @@ def vertipaq_analyzer(
|
|
|
323
327
|
dfC["% DB"] = round((dfC["Total Size"] / db_total_size) * 100, 2)
|
|
324
328
|
columnList = list(vertipaq_map["Columns"].keys())
|
|
325
329
|
|
|
330
|
+
dfC = dfC[dfC["Type"] != "RowNumber"].reset_index(drop=True)
|
|
331
|
+
|
|
326
332
|
colSize = dfC[columnList].sort_values(by="Total Size", ascending=False)
|
|
327
333
|
temp = dfC[columnList].sort_values(by="Temperature", ascending=False)
|
|
328
334
|
colSize.reset_index(drop=True, inplace=True)
|
|
329
335
|
temp.reset_index(drop=True, inplace=True)
|
|
330
336
|
|
|
331
337
|
export_Col = colSize.copy()
|
|
332
|
-
|
|
333
|
-
int_cols = []
|
|
334
|
-
pct_cols = []
|
|
335
|
-
for k, v in vertipaq_map["Columns"].items():
|
|
336
|
-
if v in ["int", "long"]:
|
|
337
|
-
int_cols.append(k)
|
|
338
|
-
elif v in ["float", "double"] and k != "Temperature":
|
|
339
|
-
pct_cols.append(k)
|
|
340
|
-
colSize[int_cols] = colSize[int_cols].map("{:,}".format)
|
|
341
|
-
temp[int_cols] = temp[int_cols].map("{:,}".format)
|
|
342
|
-
colSize[pct_cols] = colSize[pct_cols].map("{:.2f}%".format)
|
|
343
|
-
temp[pct_cols] = temp[pct_cols].map("{:.2f}%".format)
|
|
344
|
-
|
|
345
|
-
# Tables
|
|
346
|
-
int_cols = []
|
|
347
|
-
pct_cols = []
|
|
348
|
-
for k, v in vertipaq_map["Tables"].items():
|
|
349
|
-
if v in ["int", "long"]:
|
|
350
|
-
int_cols.append(k)
|
|
351
|
-
elif v in ["float", "double"]:
|
|
352
|
-
pct_cols.append(k)
|
|
353
338
|
export_Table = dfT.copy()
|
|
354
339
|
|
|
355
|
-
dfT[int_cols] = dfT[int_cols].map("{:,}".format)
|
|
356
|
-
dfT[pct_cols] = dfT[pct_cols].map("{:.2f}%".format)
|
|
357
|
-
|
|
358
340
|
# Relationships
|
|
359
341
|
dfR = pd.merge(
|
|
360
342
|
dfR,
|
|
@@ -386,14 +368,6 @@ def vertipaq_analyzer(
|
|
|
386
368
|
dfR.reset_index(drop=True, inplace=True)
|
|
387
369
|
export_Rel = dfR.copy()
|
|
388
370
|
|
|
389
|
-
int_cols = []
|
|
390
|
-
for k, v in vertipaq_map["Relationships"].items():
|
|
391
|
-
if v in ["int", "long"]:
|
|
392
|
-
int_cols.append(k)
|
|
393
|
-
if not read_stats_from_data:
|
|
394
|
-
int_cols.remove("Missing Rows")
|
|
395
|
-
dfR[int_cols] = dfR[int_cols].map("{:,}".format)
|
|
396
|
-
|
|
397
371
|
# Partitions
|
|
398
372
|
dfP = dfP[
|
|
399
373
|
[
|
|
@@ -410,12 +384,6 @@ def vertipaq_analyzer(
|
|
|
410
384
|
) # Remove after records per segment is fixed
|
|
411
385
|
dfP.reset_index(drop=True, inplace=True)
|
|
412
386
|
export_Part = dfP.copy()
|
|
413
|
-
int_cols = []
|
|
414
|
-
for k, v in vertipaq_map["Partitions"].items():
|
|
415
|
-
if v in ["int", "long", "double", "float"]:
|
|
416
|
-
int_cols.append(k)
|
|
417
|
-
intList = ["Record Count", "Segment Count", "Records per Segment"]
|
|
418
|
-
dfP[intList] = dfP[intList].map("{:,}".format)
|
|
419
387
|
|
|
420
388
|
# Hierarchies
|
|
421
389
|
dfH_filt = dfH[dfH["Level Ordinal"] == 0]
|
|
@@ -426,8 +394,6 @@ def vertipaq_analyzer(
|
|
|
426
394
|
dfH_filt.fillna({"Used Size": 0}, inplace=True)
|
|
427
395
|
dfH_filt["Used Size"] = dfH_filt["Used Size"].astype(int)
|
|
428
396
|
export_Hier = dfH_filt.copy()
|
|
429
|
-
intList = ["Used Size"]
|
|
430
|
-
dfH_filt[intList] = dfH_filt[intList].map("{:,}".format)
|
|
431
397
|
|
|
432
398
|
# Model
|
|
433
399
|
# Converting to KB/MB/GB necessitates division by 1024 * 1000.
|
|
@@ -453,11 +419,63 @@ def vertipaq_analyzer(
|
|
|
453
419
|
dfModel.reset_index(drop=True, inplace=True)
|
|
454
420
|
dfModel["Default Mode"] = dfModel["Default Mode"].astype(str)
|
|
455
421
|
export_Model = dfModel.copy()
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
422
|
+
|
|
423
|
+
def _style_columns_based_on_types(dataframe: pd.DataFrame, column_type_mapping):
|
|
424
|
+
|
|
425
|
+
format_mapping = {
|
|
426
|
+
"int": "{:,}",
|
|
427
|
+
"pct": "{:.2f}%",
|
|
428
|
+
"": "{}",
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
format_dict = {
|
|
432
|
+
col: format_mapping[dt] for col, dt in column_type_mapping.items()
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
return dataframe.style.format(format_dict)
|
|
436
|
+
|
|
437
|
+
dfModel = _style_columns_based_on_types(
|
|
438
|
+
dfModel,
|
|
439
|
+
column_type_mapping={
|
|
440
|
+
key: values[1] for key, values in vertipaq_map["Model"].items()
|
|
441
|
+
},
|
|
442
|
+
)
|
|
443
|
+
dfT = _style_columns_based_on_types(
|
|
444
|
+
dfT,
|
|
445
|
+
column_type_mapping={
|
|
446
|
+
key: values[1] for key, values in vertipaq_map["Tables"].items()
|
|
447
|
+
},
|
|
448
|
+
)
|
|
449
|
+
dfP = _style_columns_based_on_types(
|
|
450
|
+
dfP,
|
|
451
|
+
column_type_mapping={
|
|
452
|
+
key: values[1] for key, values in vertipaq_map["Partitions"].items()
|
|
453
|
+
},
|
|
454
|
+
)
|
|
455
|
+
colSize = _style_columns_based_on_types(
|
|
456
|
+
colSize,
|
|
457
|
+
column_type_mapping={
|
|
458
|
+
key: values[1] for key, values in vertipaq_map["Columns"].items()
|
|
459
|
+
},
|
|
460
|
+
)
|
|
461
|
+
temp = _style_columns_based_on_types(
|
|
462
|
+
temp,
|
|
463
|
+
column_type_mapping={
|
|
464
|
+
key: values[1] for key, values in vertipaq_map["Columns"].items()
|
|
465
|
+
},
|
|
466
|
+
)
|
|
467
|
+
dfR = _style_columns_based_on_types(
|
|
468
|
+
dfR,
|
|
469
|
+
column_type_mapping={
|
|
470
|
+
key: values[1] for key, values in vertipaq_map["Relationships"].items()
|
|
471
|
+
},
|
|
472
|
+
)
|
|
473
|
+
dfH_filt = _style_columns_based_on_types(
|
|
474
|
+
dfH_filt,
|
|
475
|
+
column_type_mapping={
|
|
476
|
+
key: values[1] for key, values in vertipaq_map["Hierarchies"].items()
|
|
477
|
+
},
|
|
478
|
+
)
|
|
461
479
|
|
|
462
480
|
dataFrames = {
|
|
463
481
|
"dfModel": dfModel,
|
|
@@ -484,8 +502,6 @@ def vertipaq_analyzer(
|
|
|
484
502
|
)
|
|
485
503
|
|
|
486
504
|
if export == "table":
|
|
487
|
-
# spark = SparkSession.builder.getOrCreate()
|
|
488
|
-
|
|
489
505
|
lakehouse_id = fabric.get_lakehouse_id()
|
|
490
506
|
lake_workspace = fabric.resolve_workspace_name()
|
|
491
507
|
lakehouse = resolve_lakehouse_name(
|
|
@@ -499,7 +515,7 @@ def vertipaq_analyzer(
|
|
|
499
515
|
if len(lakeT_filt) == 0:
|
|
500
516
|
runId = 1
|
|
501
517
|
else:
|
|
502
|
-
max_run_id =
|
|
518
|
+
max_run_id = _get_max_run_id(lakehouse=lakehouse, table_name=lakeTName)
|
|
503
519
|
runId = max_run_id + 1
|
|
504
520
|
|
|
505
521
|
dfMap = {
|
|
@@ -560,7 +576,7 @@ def vertipaq_analyzer(
|
|
|
560
576
|
|
|
561
577
|
schema.update(
|
|
562
578
|
{
|
|
563
|
-
key.replace(" ", "_"): value
|
|
579
|
+
key.replace(" ", "_"): value[0]
|
|
564
580
|
for key, value in vertipaq_map[key_name].items()
|
|
565
581
|
}
|
|
566
582
|
)
|
|
@@ -739,7 +755,11 @@ def visualize_vertipaq(dataframes):
|
|
|
739
755
|
"ColumnName": "Column Name",
|
|
740
756
|
"Tooltip": "The name of the column",
|
|
741
757
|
},
|
|
742
|
-
{
|
|
758
|
+
{
|
|
759
|
+
"ViewName": "Column",
|
|
760
|
+
"ColumnName": "Type",
|
|
761
|
+
"Tooltip": "The type of column",
|
|
762
|
+
},
|
|
743
763
|
{
|
|
744
764
|
"ViewName": "Column",
|
|
745
765
|
"ColumnName": "Cardinality",
|
sempy_labs/_warehouses.py
CHANGED
|
@@ -11,7 +11,10 @@ from sempy.fabric.exceptions import FabricHTTPException
|
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
def create_warehouse(
|
|
14
|
-
warehouse: str,
|
|
14
|
+
warehouse: str,
|
|
15
|
+
description: Optional[str] = None,
|
|
16
|
+
case_insensitive_collation: bool = False,
|
|
17
|
+
workspace: Optional[str] = None,
|
|
15
18
|
):
|
|
16
19
|
"""
|
|
17
20
|
Creates a Fabric warehouse.
|
|
@@ -22,6 +25,8 @@ def create_warehouse(
|
|
|
22
25
|
Name of the warehouse.
|
|
23
26
|
description : str, default=None
|
|
24
27
|
A description of the warehouse.
|
|
28
|
+
case_insensitive_collation: bool, default=False
|
|
29
|
+
If True, creates the warehouse with case-insensitive collation.
|
|
25
30
|
workspace : str, default=None
|
|
26
31
|
The Fabric workspace name.
|
|
27
32
|
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
@@ -34,6 +39,11 @@ def create_warehouse(
|
|
|
34
39
|
|
|
35
40
|
if description:
|
|
36
41
|
request_body["description"] = description
|
|
42
|
+
if case_insensitive_collation:
|
|
43
|
+
request_body.setdefault("creationPayload", {})
|
|
44
|
+
request_body["creationPayload"][
|
|
45
|
+
"defaultCollation"
|
|
46
|
+
] = "Latin1_General_100_CI_AS_KS_WS_SC_UTF8"
|
|
37
47
|
|
|
38
48
|
client = fabric.FabricRestClient()
|
|
39
49
|
response = client.post(
|
sempy_labs/admin/__init__.py
CHANGED
|
@@ -12,6 +12,7 @@ from sempy_labs.admin._basic_functions import (
|
|
|
12
12
|
list_access_entities,
|
|
13
13
|
list_workspace_access_details,
|
|
14
14
|
list_items,
|
|
15
|
+
list_activity_events,
|
|
15
16
|
)
|
|
16
17
|
from sempy_labs.admin._domains import (
|
|
17
18
|
list_domains,
|
|
@@ -50,4 +51,5 @@ __all__ = [
|
|
|
50
51
|
"unassign_workspaces_from_capacity",
|
|
51
52
|
"list_external_data_shares",
|
|
52
53
|
"revoke_external_data_share",
|
|
54
|
+
"list_activity_events",
|
|
53
55
|
]
|