semantic-link-labs 0.7.2__py3-none-any.whl → 0.7.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of semantic-link-labs might be problematic. Click here for more details.
- {semantic_link_labs-0.7.2.dist-info → semantic_link_labs-0.7.4.dist-info}/METADATA +15 -3
- semantic_link_labs-0.7.4.dist-info/RECORD +134 -0
- {semantic_link_labs-0.7.2.dist-info → semantic_link_labs-0.7.4.dist-info}/WHEEL +1 -1
- sempy_labs/__init__.py +120 -24
- sempy_labs/_bpa_translation/{_translations_am-ET.po → _model/_translations_am-ET.po} +22 -0
- sempy_labs/_bpa_translation/{_translations_ar-AE.po → _model/_translations_ar-AE.po} +24 -0
- sempy_labs/_bpa_translation/_model/_translations_bg-BG.po +938 -0
- sempy_labs/_bpa_translation/_model/_translations_ca-ES.po +934 -0
- sempy_labs/_bpa_translation/{_translations_cs-CZ.po → _model/_translations_cs-CZ.po} +179 -157
- sempy_labs/_bpa_translation/{_translations_da-DK.po → _model/_translations_da-DK.po} +24 -0
- sempy_labs/_bpa_translation/{_translations_de-DE.po → _model/_translations_de-DE.po} +77 -52
- sempy_labs/_bpa_translation/{_translations_el-GR.po → _model/_translations_el-GR.po} +25 -0
- sempy_labs/_bpa_translation/{_translations_es-ES.po → _model/_translations_es-ES.po} +67 -43
- sempy_labs/_bpa_translation/{_translations_fa-IR.po → _model/_translations_fa-IR.po} +24 -0
- sempy_labs/_bpa_translation/_model/_translations_fi-FI.po +915 -0
- sempy_labs/_bpa_translation/{_translations_fr-FR.po → _model/_translations_fr-FR.po} +83 -57
- sempy_labs/_bpa_translation/{_translations_ga-IE.po → _model/_translations_ga-IE.po} +25 -0
- sempy_labs/_bpa_translation/{_translations_he-IL.po → _model/_translations_he-IL.po} +23 -0
- sempy_labs/_bpa_translation/{_translations_hi-IN.po → _model/_translations_hi-IN.po} +24 -0
- sempy_labs/_bpa_translation/{_translations_hu-HU.po → _model/_translations_hu-HU.po} +25 -0
- sempy_labs/_bpa_translation/_model/_translations_id-ID.po +918 -0
- sempy_labs/_bpa_translation/{_translations_is-IS.po → _model/_translations_is-IS.po} +25 -0
- sempy_labs/_bpa_translation/{_translations_it-IT.po → _model/_translations_it-IT.po} +25 -0
- sempy_labs/_bpa_translation/{_translations_ja-JP.po → _model/_translations_ja-JP.po} +21 -0
- sempy_labs/_bpa_translation/_model/_translations_ko-KR.po +823 -0
- sempy_labs/_bpa_translation/_model/_translations_mt-MT.po +937 -0
- sempy_labs/_bpa_translation/{_translations_nl-NL.po → _model/_translations_nl-NL.po} +80 -56
- sempy_labs/_bpa_translation/{_translations_pl-PL.po → _model/_translations_pl-PL.po} +101 -76
- sempy_labs/_bpa_translation/{_translations_pt-BR.po → _model/_translations_pt-BR.po} +25 -0
- sempy_labs/_bpa_translation/{_translations_pt-PT.po → _model/_translations_pt-PT.po} +25 -0
- sempy_labs/_bpa_translation/_model/_translations_ro-RO.po +939 -0
- sempy_labs/_bpa_translation/{_translations_ru-RU.po → _model/_translations_ru-RU.po} +25 -0
- sempy_labs/_bpa_translation/_model/_translations_sk-SK.po +925 -0
- sempy_labs/_bpa_translation/_model/_translations_sl-SL.po +922 -0
- sempy_labs/_bpa_translation/_model/_translations_sv-SE.po +914 -0
- sempy_labs/_bpa_translation/{_translations_ta-IN.po → _model/_translations_ta-IN.po} +26 -0
- sempy_labs/_bpa_translation/{_translations_te-IN.po → _model/_translations_te-IN.po} +24 -0
- sempy_labs/_bpa_translation/{_translations_th-TH.po → _model/_translations_th-TH.po} +24 -0
- sempy_labs/_bpa_translation/_model/_translations_tr-TR.po +925 -0
- sempy_labs/_bpa_translation/_model/_translations_uk-UA.po +933 -0
- sempy_labs/_bpa_translation/{_translations_zh-CN.po → _model/_translations_zh-CN.po} +116 -97
- sempy_labs/_bpa_translation/{_translations_zu-ZA.po → _model/_translations_zu-ZA.po} +25 -0
- sempy_labs/_capacities.py +541 -0
- sempy_labs/_clear_cache.py +298 -3
- sempy_labs/_connections.py +138 -0
- sempy_labs/_dataflows.py +130 -0
- sempy_labs/_deployment_pipelines.py +171 -0
- sempy_labs/_environments.py +156 -0
- sempy_labs/_generate_semantic_model.py +148 -27
- sempy_labs/_git.py +380 -0
- sempy_labs/_helper_functions.py +203 -8
- sempy_labs/_icons.py +43 -0
- sempy_labs/_list_functions.py +170 -1012
- sempy_labs/_model_bpa.py +90 -112
- sempy_labs/_model_bpa_bulk.py +3 -1
- sempy_labs/_model_bpa_rules.py +788 -800
- sempy_labs/_notebooks.py +143 -0
- sempy_labs/_query_scale_out.py +28 -7
- sempy_labs/_spark.py +465 -0
- sempy_labs/_sql.py +120 -0
- sempy_labs/_translations.py +3 -1
- sempy_labs/_vertipaq.py +160 -99
- sempy_labs/_workspace_identity.py +66 -0
- sempy_labs/_workspaces.py +294 -0
- sempy_labs/directlake/__init__.py +2 -0
- sempy_labs/directlake/_directlake_schema_compare.py +1 -2
- sempy_labs/directlake/_directlake_schema_sync.py +1 -2
- sempy_labs/directlake/_dl_helper.py +4 -7
- sempy_labs/directlake/_generate_shared_expression.py +85 -0
- sempy_labs/directlake/_show_unsupported_directlake_objects.py +1 -2
- sempy_labs/lakehouse/_get_lakehouse_tables.py +7 -3
- sempy_labs/migration/_migrate_calctables_to_lakehouse.py +5 -0
- sempy_labs/migration/_migrate_calctables_to_semantic_model.py +5 -0
- sempy_labs/migration/_migrate_model_objects_to_semantic_model.py +6 -2
- sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py +6 -5
- sempy_labs/migration/_migration_validation.py +6 -0
- sempy_labs/report/_report_functions.py +21 -42
- sempy_labs/report/_report_rebind.py +5 -0
- sempy_labs/tom/_model.py +95 -52
- semantic_link_labs-0.7.2.dist-info/RECORD +0 -111
- {semantic_link_labs-0.7.2.dist-info → semantic_link_labs-0.7.4.dist-info}/LICENSE +0 -0
- {semantic_link_labs-0.7.2.dist-info → semantic_link_labs-0.7.4.dist-info}/top_level.txt +0 -0
sempy_labs/_sql.py
ADDED
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
import sempy.fabric as fabric
|
|
2
|
+
import pandas as pd
|
|
3
|
+
from typing import Optional, Union, List
|
|
4
|
+
from sempy._utils._log import log
|
|
5
|
+
import struct
|
|
6
|
+
import uuid
|
|
7
|
+
from itertools import chain, repeat
|
|
8
|
+
from sempy.fabric.exceptions import FabricHTTPException
|
|
9
|
+
from sempy_labs._helper_functions import resolve_warehouse_id
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def bytes2mswin_bstr(value: bytes) -> bytes:
|
|
13
|
+
"""Convert a sequence of bytes into a (MS-Windows) BSTR (as bytes).
|
|
14
|
+
|
|
15
|
+
See https://github.com/mkleehammer/pyodbc/issues/228#issuecomment-319190980
|
|
16
|
+
for the original code. It appears the input is converted to an
|
|
17
|
+
MS-Windows BSTR (in 'Little-endian' format).
|
|
18
|
+
|
|
19
|
+
See https://docs.microsoft.com/en-us/openspecs/windows_protocols/ms-dtyp\
|
|
20
|
+
/692a42a9-06ce-4394-b9bc-5d2a50440168
|
|
21
|
+
for more info on BSTR.
|
|
22
|
+
|
|
23
|
+
:param value: the sequence of bytes to convert
|
|
24
|
+
:return: the converted value (as a sequence of bytes)
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
encoded_bytes = bytes(chain.from_iterable(zip(value, repeat(0))))
|
|
28
|
+
return struct.pack("<i", len(encoded_bytes)) + encoded_bytes
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class ConnectWarehouse:
|
|
32
|
+
def __init__(
|
|
33
|
+
self,
|
|
34
|
+
warehouse: str,
|
|
35
|
+
workspace: Optional[Union[str, uuid.UUID]] = None,
|
|
36
|
+
timeout: Optional[int] = None,
|
|
37
|
+
):
|
|
38
|
+
from sempy.fabric._token_provider import SynapseTokenProvider
|
|
39
|
+
import pyodbc
|
|
40
|
+
|
|
41
|
+
workspace = fabric.resolve_workspace_name(workspace)
|
|
42
|
+
workspace_id = fabric.resolve_workspace_id(workspace)
|
|
43
|
+
warehouse_id = resolve_warehouse_id(warehouse=warehouse, workspace=workspace)
|
|
44
|
+
|
|
45
|
+
# get the TDS endpoint
|
|
46
|
+
client = fabric.FabricRestClient()
|
|
47
|
+
response = client.get(f"v1/workspaces/{workspace_id}/warehouses/{warehouse_id}")
|
|
48
|
+
if response.status_code != 200:
|
|
49
|
+
raise FabricHTTPException(response)
|
|
50
|
+
tds_endpoint = response.json().get("properties", {}).get("connectionString")
|
|
51
|
+
|
|
52
|
+
access_token = SynapseTokenProvider()()
|
|
53
|
+
tokenstruct = bytes2mswin_bstr(access_token.encode())
|
|
54
|
+
conn_str = f"DRIVER={{ODBC Driver 18 for SQL Server}};SERVER={tds_endpoint};DATABASE={warehouse};Encrypt=Yes;"
|
|
55
|
+
|
|
56
|
+
if timeout is not None:
|
|
57
|
+
conn_str += f"Connect Timeout={timeout};"
|
|
58
|
+
|
|
59
|
+
self.connection = pyodbc.connect(conn_str, attrs_before={1256: tokenstruct})
|
|
60
|
+
|
|
61
|
+
@log
|
|
62
|
+
def query(
|
|
63
|
+
self, sql: Union[str, List[str]]
|
|
64
|
+
) -> Union[List[pd.DataFrame], pd.DataFrame, None]:
|
|
65
|
+
"""
|
|
66
|
+
Runs a SQL or T-SQL query (or multiple queries) against a Fabric Warehouse.
|
|
67
|
+
|
|
68
|
+
Parameters
|
|
69
|
+
----------
|
|
70
|
+
sql : str or List[str]
|
|
71
|
+
A single SQL or T-SQL query, or a list of queries to be executed.
|
|
72
|
+
|
|
73
|
+
Returns
|
|
74
|
+
-------
|
|
75
|
+
Union[List[pandas.DataFrame], pandas.DataFrame, None]
|
|
76
|
+
A list of pandas DataFrames if multiple SQL queries return results,
|
|
77
|
+
a single DataFrame if one query is executed and returns results, or None.
|
|
78
|
+
"""
|
|
79
|
+
cursor = None
|
|
80
|
+
results = [] # To store results from multiple queries if needed
|
|
81
|
+
|
|
82
|
+
# If the input is a single string, convert it to a list for consistency
|
|
83
|
+
if isinstance(sql, str):
|
|
84
|
+
sql = [sql]
|
|
85
|
+
|
|
86
|
+
try:
|
|
87
|
+
cursor = self.connection.cursor()
|
|
88
|
+
|
|
89
|
+
for sql_query in sql:
|
|
90
|
+
cursor.execute(sql_query)
|
|
91
|
+
|
|
92
|
+
# Commit for non-select queries (like CREATE, INSERT, etc.)
|
|
93
|
+
if not cursor.description:
|
|
94
|
+
self.connection.commit()
|
|
95
|
+
else:
|
|
96
|
+
# Fetch and append results for queries that return a result set
|
|
97
|
+
result = pd.DataFrame.from_records(
|
|
98
|
+
cursor.fetchall(),
|
|
99
|
+
columns=[col[0] for col in cursor.description],
|
|
100
|
+
)
|
|
101
|
+
results.append(result)
|
|
102
|
+
|
|
103
|
+
# Return results if any queries returned a result set
|
|
104
|
+
if results:
|
|
105
|
+
return results if len(results) > 1 else results[0]
|
|
106
|
+
else:
|
|
107
|
+
return None
|
|
108
|
+
|
|
109
|
+
finally:
|
|
110
|
+
if cursor:
|
|
111
|
+
cursor.close()
|
|
112
|
+
|
|
113
|
+
def __enter__(self):
|
|
114
|
+
return self
|
|
115
|
+
|
|
116
|
+
def __exit__(self, type, value, traceback):
|
|
117
|
+
self.close()
|
|
118
|
+
|
|
119
|
+
def close(self):
|
|
120
|
+
self.connection.close()
|
sempy_labs/_translations.py
CHANGED
|
@@ -3,6 +3,7 @@ import pandas as pd
|
|
|
3
3
|
from typing import List, Optional, Union
|
|
4
4
|
from sempy._utils._log import log
|
|
5
5
|
import sempy_labs._icons as icons
|
|
6
|
+
from sempy_labs._helper_functions import get_language_codes
|
|
6
7
|
|
|
7
8
|
|
|
8
9
|
@log
|
|
@@ -32,7 +33,6 @@ def translate_semantic_model(
|
|
|
32
33
|
-------
|
|
33
34
|
pandas.DataFrame
|
|
34
35
|
Shows a pandas dataframe which displays all of the translations in the semantic model.
|
|
35
|
-
|
|
36
36
|
"""
|
|
37
37
|
|
|
38
38
|
from synapse.ml.services import Translate
|
|
@@ -49,6 +49,8 @@ def translate_semantic_model(
|
|
|
49
49
|
if isinstance(languages, str):
|
|
50
50
|
languages = [languages]
|
|
51
51
|
|
|
52
|
+
languages = get_language_codes(languages)
|
|
53
|
+
|
|
52
54
|
df_prep = pd.DataFrame(
|
|
53
55
|
columns=["Object Type", "Name", "Description", "Display Folder"]
|
|
54
56
|
)
|
sempy_labs/_vertipaq.py
CHANGED
|
@@ -14,7 +14,7 @@ from sempy_labs._helper_functions import (
|
|
|
14
14
|
save_as_delta_table,
|
|
15
15
|
resolve_workspace_capacity,
|
|
16
16
|
)
|
|
17
|
-
from sempy_labs._list_functions import list_relationships
|
|
17
|
+
from sempy_labs._list_functions import list_relationships, list_tables
|
|
18
18
|
from sempy_labs.lakehouse import lakehouse_attached, get_lakehouse_tables
|
|
19
19
|
from sempy_labs.directlake import get_direct_lake_source
|
|
20
20
|
from typing import Optional
|
|
@@ -68,22 +68,93 @@ def vertipaq_analyzer(
|
|
|
68
68
|
|
|
69
69
|
workspace = fabric.resolve_workspace_name(workspace)
|
|
70
70
|
|
|
71
|
-
|
|
71
|
+
data_type_string = "string"
|
|
72
|
+
data_type_long = "long"
|
|
73
|
+
data_type_timestamp = "timestamp"
|
|
74
|
+
data_type_double = "double"
|
|
75
|
+
data_type_bool = "bool"
|
|
76
|
+
|
|
77
|
+
vertipaq_map = {
|
|
78
|
+
"Model": {
|
|
79
|
+
"Dataset Name": data_type_string,
|
|
80
|
+
"Total Size": data_type_long,
|
|
81
|
+
"Table Count": data_type_long,
|
|
82
|
+
"Column Count": data_type_long,
|
|
83
|
+
"Compatibility Level": data_type_long,
|
|
84
|
+
"Default Mode": data_type_string,
|
|
85
|
+
},
|
|
86
|
+
"Tables": {
|
|
87
|
+
"Table Name": data_type_string,
|
|
88
|
+
"Type": data_type_string,
|
|
89
|
+
"Row Count": data_type_long,
|
|
90
|
+
"Total Size": data_type_long,
|
|
91
|
+
"Dictionary Size": data_type_long,
|
|
92
|
+
"Data Size": data_type_long,
|
|
93
|
+
"Hierarchy Size": data_type_long,
|
|
94
|
+
"Relationship Size": data_type_long,
|
|
95
|
+
"User Hierarchy Size": data_type_long,
|
|
96
|
+
"Partitions": data_type_long,
|
|
97
|
+
"Columns": data_type_long,
|
|
98
|
+
"% DB": data_type_double,
|
|
99
|
+
},
|
|
100
|
+
"Partitions": {
|
|
101
|
+
"Table Name": data_type_string,
|
|
102
|
+
"Partition Name": data_type_string,
|
|
103
|
+
"Mode": data_type_string,
|
|
104
|
+
"Record Count": data_type_long,
|
|
105
|
+
"Segment Count": data_type_long,
|
|
106
|
+
"Records per Segment": data_type_double,
|
|
107
|
+
},
|
|
108
|
+
"Columns": {
|
|
109
|
+
"Table Name": data_type_string,
|
|
110
|
+
"Column Name": data_type_string,
|
|
111
|
+
"Type": data_type_string,
|
|
112
|
+
"Cardinality": data_type_long,
|
|
113
|
+
"Total Size": data_type_long,
|
|
114
|
+
"Data Size": data_type_long,
|
|
115
|
+
"Dictionary Size": data_type_long,
|
|
116
|
+
"Hierarchy Size": data_type_long,
|
|
117
|
+
"% Table": data_type_double,
|
|
118
|
+
"% DB": data_type_double,
|
|
119
|
+
"Data Type": data_type_string,
|
|
120
|
+
"Encoding": data_type_string,
|
|
121
|
+
"Is Resident": data_type_bool,
|
|
122
|
+
"Temperature": data_type_double,
|
|
123
|
+
"Last Accessed": data_type_timestamp,
|
|
124
|
+
},
|
|
125
|
+
"Hierarchies": {
|
|
126
|
+
"Table Name": data_type_string,
|
|
127
|
+
"Hierarchy Name": data_type_string,
|
|
128
|
+
"Used Size": data_type_long,
|
|
129
|
+
},
|
|
130
|
+
"Relationships": {
|
|
131
|
+
"From Object": data_type_string,
|
|
132
|
+
"To Object": data_type_string,
|
|
133
|
+
"Multiplicity": data_type_string,
|
|
134
|
+
"Used Size": data_type_long,
|
|
135
|
+
"Max From Cardinality": data_type_long,
|
|
136
|
+
"Max To Cardinality": data_type_long,
|
|
137
|
+
"Missing Rows": data_type_long,
|
|
138
|
+
},
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
dfT = list_tables(dataset=dataset, extended=True, workspace=workspace)
|
|
72
142
|
dfT.rename(columns={"Name": "Table Name"}, inplace=True)
|
|
143
|
+
columns_to_keep = list(vertipaq_map["Tables"].keys())
|
|
144
|
+
dfT = dfT[dfT.columns.intersection(columns_to_keep)]
|
|
145
|
+
|
|
73
146
|
dfC = fabric.list_columns(dataset=dataset, extended=True, workspace=workspace)
|
|
74
147
|
dfC["Column Object"] = format_dax_object_name(dfC["Table Name"], dfC["Column Name"])
|
|
75
148
|
dfC.rename(columns={"Column Cardinality": "Cardinality"}, inplace=True)
|
|
76
149
|
dfH = fabric.list_hierarchies(dataset=dataset, extended=True, workspace=workspace)
|
|
77
150
|
dfR = list_relationships(dataset=dataset, extended=True, workspace=workspace)
|
|
78
|
-
dfR["From Object"] = format_dax_object_name(dfR["From Table"], dfR["From Column"])
|
|
79
|
-
dfR["To Object"] = format_dax_object_name(dfR["To Table"], dfR["To Column"])
|
|
80
151
|
dfP = fabric.list_partitions(dataset=dataset, extended=True, workspace=workspace)
|
|
81
152
|
artifact_type, lakehouse_name, lakehouse_id, lakehouse_workspace_id = (
|
|
82
153
|
get_direct_lake_source(dataset=dataset, workspace=workspace)
|
|
83
154
|
)
|
|
84
155
|
|
|
85
156
|
with connect_semantic_model(
|
|
86
|
-
dataset=dataset,
|
|
157
|
+
dataset=dataset, workspace=workspace, readonly=True
|
|
87
158
|
) as tom:
|
|
88
159
|
compat_level = tom.model.Model.Database.CompatibilityLevel
|
|
89
160
|
is_direct_lake = tom.is_direct_lake()
|
|
@@ -230,7 +301,7 @@ def vertipaq_analyzer(
|
|
|
230
301
|
|
|
231
302
|
query = f"evaluate\nsummarizecolumns(\n\"1\",calculate(countrows('{fromTable}'),isblank({toObject}))\n)"
|
|
232
303
|
|
|
233
|
-
if isActive
|
|
304
|
+
if not isActive:
|
|
234
305
|
query = f"evaluate\nsummarizecolumns(\n\"1\",calculate(countrows('{fromTable}'),userelationship({fromObject},{toObject}),isblank({toObject}))\n)"
|
|
235
306
|
|
|
236
307
|
result = fabric.evaluate_dax(
|
|
@@ -245,81 +316,45 @@ def vertipaq_analyzer(
|
|
|
245
316
|
dfR.at[i, "Missing Rows"] = missingRows
|
|
246
317
|
dfR["Missing Rows"] = dfR["Missing Rows"].astype(int)
|
|
247
318
|
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
total_size = dfC["Total Size"].sum()
|
|
254
|
-
table_sizes = dfC.groupby("Table Name")["Total Size"].sum().reset_index()
|
|
255
|
-
table_sizes.rename(columns={"Total Size": "Table Size"}, inplace=True)
|
|
256
|
-
|
|
257
|
-
# Columns
|
|
258
|
-
dfC_filt = dfC[~dfC["Column Name"].str.startswith("RowNumber-")]
|
|
259
|
-
dfC_filt["% DB"] = round((dfC_filt["Total Size"] / total_size) * 100, 2)
|
|
260
|
-
dfC_filt = pd.merge(dfC_filt, table_sizes, on="Table Name", how="left")
|
|
261
|
-
dfC_filt["% Table"] = round(
|
|
262
|
-
(dfC_filt["Total Size"] / dfC_filt["Table Size"]) * 100, 2
|
|
263
|
-
)
|
|
264
|
-
columnList = [
|
|
265
|
-
"Table Name",
|
|
266
|
-
"Column Name",
|
|
267
|
-
"Type",
|
|
268
|
-
"Cardinality",
|
|
269
|
-
"Total Size",
|
|
270
|
-
"Data Size",
|
|
271
|
-
"Dictionary Size",
|
|
272
|
-
"Hierarchy Size",
|
|
273
|
-
"% Table",
|
|
274
|
-
"% DB",
|
|
275
|
-
"Data Type",
|
|
276
|
-
"Encoding",
|
|
277
|
-
"Is Resident",
|
|
278
|
-
"Temperature",
|
|
279
|
-
"Last Accessed",
|
|
280
|
-
]
|
|
319
|
+
table_totals = dfC.groupby("Table Name")["Total Size"].transform("sum")
|
|
320
|
+
db_total_size = dfC["Total Size"].sum()
|
|
321
|
+
dfC["% Table"] = round((dfC["Total Size"] / table_totals) * 100, 2)
|
|
322
|
+
dfC["% DB"] = round((dfC["Total Size"] / db_total_size) * 100, 2)
|
|
323
|
+
columnList = list(vertipaq_map["Columns"].keys())
|
|
281
324
|
|
|
282
|
-
colSize =
|
|
283
|
-
temp =
|
|
325
|
+
colSize = dfC[columnList].sort_values(by="Total Size", ascending=False)
|
|
326
|
+
temp = dfC[columnList].sort_values(by="Temperature", ascending=False)
|
|
284
327
|
colSize.reset_index(drop=True, inplace=True)
|
|
285
328
|
temp.reset_index(drop=True, inplace=True)
|
|
286
329
|
|
|
287
330
|
export_Col = colSize.copy()
|
|
288
331
|
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
"
|
|
293
|
-
|
|
294
|
-
"
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
temp[pctList] = temp[pctList].applymap("{:.2f}%".format)
|
|
332
|
+
int_cols = []
|
|
333
|
+
pct_cols = []
|
|
334
|
+
for k, v in vertipaq_map["Columns"].items():
|
|
335
|
+
if v in ["int", "long"]:
|
|
336
|
+
int_cols.append(k)
|
|
337
|
+
elif v in ["float", "double"] and k != "Temperature":
|
|
338
|
+
pct_cols.append(k)
|
|
339
|
+
colSize[int_cols] = colSize[int_cols].applymap("{:,}".format)
|
|
340
|
+
temp[int_cols] = temp[int_cols].applymap("{:,}".format)
|
|
341
|
+
colSize[pct_cols] = colSize[pct_cols].applymap("{:.2f}%".format)
|
|
342
|
+
temp[pct_cols] = temp[pct_cols].applymap("{:.2f}%".format)
|
|
301
343
|
|
|
302
344
|
# Tables
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
export_Table = dfTable.copy()
|
|
315
|
-
|
|
316
|
-
intList.extend(["Row Count", "Partitions", "Columns"])
|
|
317
|
-
dfTable[intList] = dfTable[intList].applymap("{:,}".format)
|
|
318
|
-
pctList = ["% DB"]
|
|
319
|
-
dfTable[pctList] = dfTable[pctList].applymap("{:.2f}%".format)
|
|
345
|
+
int_cols = []
|
|
346
|
+
pct_cols = []
|
|
347
|
+
for k, v in vertipaq_map["Tables"].items():
|
|
348
|
+
if v in ["int", "long"]:
|
|
349
|
+
int_cols.append(k)
|
|
350
|
+
elif v in ["float", "double"]:
|
|
351
|
+
pct_cols.append(k)
|
|
352
|
+
export_Table = dfT.copy()
|
|
353
|
+
|
|
354
|
+
dfT[int_cols] = dfT[int_cols].applymap("{:,}".format)
|
|
355
|
+
dfT[pct_cols] = dfT[pct_cols].applymap("{:.2f}%".format)
|
|
320
356
|
|
|
321
357
|
# Relationships
|
|
322
|
-
# dfR.drop(columns=['Max From Cardinality', 'Max To Cardinality'], inplace=True)
|
|
323
358
|
dfR = pd.merge(
|
|
324
359
|
dfR,
|
|
325
360
|
dfC[["Column Object", "Cardinality"]],
|
|
@@ -349,15 +384,14 @@ def vertipaq_analyzer(
|
|
|
349
384
|
].sort_values(by="Used Size", ascending=False)
|
|
350
385
|
dfR.reset_index(drop=True, inplace=True)
|
|
351
386
|
export_Rel = dfR.copy()
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
dfR[intList] = dfR[intList].applymap("{:,}".format)
|
|
387
|
+
|
|
388
|
+
int_cols = []
|
|
389
|
+
for k, v in vertipaq_map["Relationships"].items():
|
|
390
|
+
if v in ["int", "long"]:
|
|
391
|
+
int_cols.append(k)
|
|
392
|
+
if not read_stats_from_data:
|
|
393
|
+
int_cols.remove("Missing Rows")
|
|
394
|
+
dfR[int_cols] = dfR[int_cols].applymap("{:,}".format)
|
|
361
395
|
|
|
362
396
|
# Partitions
|
|
363
397
|
dfP = dfP[
|
|
@@ -375,6 +409,10 @@ def vertipaq_analyzer(
|
|
|
375
409
|
) # Remove after records per segment is fixed
|
|
376
410
|
dfP.reset_index(drop=True, inplace=True)
|
|
377
411
|
export_Part = dfP.copy()
|
|
412
|
+
int_cols = []
|
|
413
|
+
for k, v in vertipaq_map["Partitions"].items():
|
|
414
|
+
if v in ["int", "long", "double", "float"]:
|
|
415
|
+
int_cols.append(k)
|
|
378
416
|
intList = ["Record Count", "Segment Count", "Records per Segment"]
|
|
379
417
|
dfP[intList] = dfP[intList].applymap("{:,}".format)
|
|
380
418
|
|
|
@@ -391,12 +429,13 @@ def vertipaq_analyzer(
|
|
|
391
429
|
dfH_filt[intList] = dfH_filt[intList].applymap("{:,}".format)
|
|
392
430
|
|
|
393
431
|
# Model
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
432
|
+
# Converting to KB/MB/GB necessitates division by 1024 * 1000.
|
|
433
|
+
if db_total_size >= 1000000000:
|
|
434
|
+
y = db_total_size / (1024**3) * 1000000000
|
|
435
|
+
elif db_total_size >= 1000000:
|
|
436
|
+
y = db_total_size / (1024**2) * 1000000
|
|
437
|
+
elif db_total_size >= 1000:
|
|
438
|
+
y = db_total_size / (1024) * 1000
|
|
400
439
|
y = round(y)
|
|
401
440
|
|
|
402
441
|
dfModel = pd.DataFrame(
|
|
@@ -413,12 +452,15 @@ def vertipaq_analyzer(
|
|
|
413
452
|
dfModel.reset_index(drop=True, inplace=True)
|
|
414
453
|
dfModel["Default Mode"] = dfModel["Default Mode"].astype(str)
|
|
415
454
|
export_Model = dfModel.copy()
|
|
416
|
-
|
|
417
|
-
|
|
455
|
+
int_cols = []
|
|
456
|
+
for k, v in vertipaq_map["Model"].items():
|
|
457
|
+
if v in ["long", "int"] and k != "Compatibility Level":
|
|
458
|
+
int_cols.append(k)
|
|
459
|
+
dfModel[int_cols] = dfModel[int_cols].applymap("{:,}".format)
|
|
418
460
|
|
|
419
461
|
dataFrames = {
|
|
420
462
|
"dfModel": dfModel,
|
|
421
|
-
"
|
|
463
|
+
"dfT": dfT,
|
|
422
464
|
"dfP": dfP,
|
|
423
465
|
"colSize": colSize,
|
|
424
466
|
"temp": temp,
|
|
@@ -430,7 +472,8 @@ def vertipaq_analyzer(
|
|
|
430
472
|
for fileName, df in dataFrames.items():
|
|
431
473
|
dfs[fileName] = df
|
|
432
474
|
|
|
433
|
-
|
|
475
|
+
if export is None:
|
|
476
|
+
visualize_vertipaq(dfs)
|
|
434
477
|
|
|
435
478
|
# Export vertipaq to delta tables in lakehouse
|
|
436
479
|
if export in ["table", "zip"]:
|
|
@@ -462,12 +505,12 @@ def vertipaq_analyzer(
|
|
|
462
505
|
runId = maxRunId + 1
|
|
463
506
|
|
|
464
507
|
dfMap = {
|
|
465
|
-
"
|
|
466
|
-
"
|
|
467
|
-
"
|
|
468
|
-
"
|
|
469
|
-
"
|
|
470
|
-
"
|
|
508
|
+
"Columns": ["Columns", export_Col],
|
|
509
|
+
"Tables": ["Tables", export_Table],
|
|
510
|
+
"Partitions": ["Partitions", export_Part],
|
|
511
|
+
"Relationships": ["Relationships", export_Rel],
|
|
512
|
+
"Hierarchies": ["Hierarchies", export_Hier],
|
|
513
|
+
"Model": ["Model", export_Model],
|
|
471
514
|
}
|
|
472
515
|
|
|
473
516
|
print(
|
|
@@ -479,7 +522,7 @@ def vertipaq_analyzer(
|
|
|
479
522
|
configured_by = dfD_filt["Configured By"].iloc[0]
|
|
480
523
|
capacity_id, capacity_name = resolve_workspace_capacity(workspace=workspace)
|
|
481
524
|
|
|
482
|
-
for
|
|
525
|
+
for key_name, (obj, df) in dfMap.items():
|
|
483
526
|
df["Capacity Name"] = capacity_name
|
|
484
527
|
df["Capacity Id"] = capacity_id
|
|
485
528
|
df["Configured By"] = configured_by
|
|
@@ -507,11 +550,29 @@ def vertipaq_analyzer(
|
|
|
507
550
|
|
|
508
551
|
df.columns = df.columns.str.replace(" ", "_")
|
|
509
552
|
|
|
553
|
+
schema = {
|
|
554
|
+
"Capacity_Name": "string",
|
|
555
|
+
"Capacity_Id": "string",
|
|
556
|
+
"Workspace_Name": "string",
|
|
557
|
+
"Workspace_Id": "string",
|
|
558
|
+
"Dataset_Name": "string",
|
|
559
|
+
"Dataset_Id": "string",
|
|
560
|
+
"Configured_By": "string",
|
|
561
|
+
}
|
|
562
|
+
|
|
563
|
+
schema.update(
|
|
564
|
+
{
|
|
565
|
+
key.replace(" ", "_"): value
|
|
566
|
+
for key, value in vertipaq_map[key_name].items()
|
|
567
|
+
}
|
|
568
|
+
)
|
|
569
|
+
|
|
510
570
|
delta_table_name = f"VertipaqAnalyzer_{obj}".lower()
|
|
511
571
|
save_as_delta_table(
|
|
512
572
|
dataframe=df,
|
|
513
573
|
delta_table_name=delta_table_name,
|
|
514
574
|
write_mode="append",
|
|
575
|
+
schema=schema,
|
|
515
576
|
merge_schema=True,
|
|
516
577
|
)
|
|
517
578
|
|
|
@@ -519,7 +580,7 @@ def vertipaq_analyzer(
|
|
|
519
580
|
if export == "zip":
|
|
520
581
|
dataFrames = {
|
|
521
582
|
"dfModel": dfModel,
|
|
522
|
-
"
|
|
583
|
+
"dfT": dfT,
|
|
523
584
|
"dfP": dfP,
|
|
524
585
|
"colSize": colSize,
|
|
525
586
|
"temp": temp,
|
|
@@ -797,7 +858,7 @@ def visualize_vertipaq(dataframes):
|
|
|
797
858
|
# define the dictionary with {"Tab name":df}
|
|
798
859
|
df_dict = {
|
|
799
860
|
"Model Summary": dataframes["dfModel"],
|
|
800
|
-
"Tables": dataframes["
|
|
861
|
+
"Tables": dataframes["dfT"],
|
|
801
862
|
"Partitions": dataframes["dfP"],
|
|
802
863
|
"Columns (Total Size)": dataframes["colSize"],
|
|
803
864
|
"Columns (Temperature)": dataframes["temp"],
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import sempy.fabric as fabric
|
|
2
|
+
from sempy_labs._helper_functions import (
|
|
3
|
+
resolve_workspace_name_and_id,
|
|
4
|
+
lro,
|
|
5
|
+
)
|
|
6
|
+
from typing import Optional
|
|
7
|
+
import sempy_labs._icons as icons
|
|
8
|
+
from sempy.fabric.exceptions import FabricHTTPException
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def provision_workspace_identity(workspace: Optional[str] = None):
|
|
12
|
+
"""
|
|
13
|
+
Provisions a workspace identity for a workspace.
|
|
14
|
+
|
|
15
|
+
Parameters
|
|
16
|
+
----------
|
|
17
|
+
workspace : str, default=None
|
|
18
|
+
The Fabric workspace name.
|
|
19
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
20
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
# https://learn.microsoft.com/en-us/rest/api/fabric/core/workspaces/provision-identity?tabs=HTTP
|
|
24
|
+
|
|
25
|
+
workspace, workspace_id = resolve_workspace_name_and_id(workspace)
|
|
26
|
+
|
|
27
|
+
client = fabric.FabricRestClient()
|
|
28
|
+
response = client.post(f"/v1/workspaces/{workspace_id}/provisionIdentity")
|
|
29
|
+
|
|
30
|
+
if response.status_code not in [200, 202]:
|
|
31
|
+
raise FabricHTTPException(response)
|
|
32
|
+
|
|
33
|
+
lro(client, response)
|
|
34
|
+
|
|
35
|
+
print(
|
|
36
|
+
f"{icons.green_dot} A workspace identity has been provisioned for the '{workspace}' workspace."
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def deprovision_workspace_identity(workspace: Optional[str] = None):
|
|
41
|
+
"""
|
|
42
|
+
Deprovisions a workspace identity for a workspace.
|
|
43
|
+
|
|
44
|
+
Parameters
|
|
45
|
+
----------
|
|
46
|
+
workspace : str, default=None
|
|
47
|
+
The Fabric workspace name.
|
|
48
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
49
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
# https://learn.microsoft.com/en-us/rest/api/fabric/core/workspaces/deprovision-identity?tabs=HTTP
|
|
53
|
+
|
|
54
|
+
workspace, workspace_id = resolve_workspace_name_and_id(workspace)
|
|
55
|
+
|
|
56
|
+
client = fabric.FabricRestClient()
|
|
57
|
+
response = client.post(f"/v1/workspaces/{workspace_id}/deprovisionIdentity")
|
|
58
|
+
|
|
59
|
+
if response.status_code not in [200, 202]:
|
|
60
|
+
raise FabricHTTPException(response)
|
|
61
|
+
|
|
62
|
+
lro(client, response)
|
|
63
|
+
|
|
64
|
+
print(
|
|
65
|
+
f"{icons.green_dot} The workspace identity has been deprovisioned from the '{workspace}' workspace."
|
|
66
|
+
)
|