semantic-link-labs 0.7.4__py3-none-any.whl → 0.8.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of semantic-link-labs might be problematic. Click here for more details.
- {semantic_link_labs-0.7.4.dist-info → semantic_link_labs-0.8.1.dist-info}/METADATA +43 -7
- {semantic_link_labs-0.7.4.dist-info → semantic_link_labs-0.8.1.dist-info}/RECORD +59 -40
- {semantic_link_labs-0.7.4.dist-info → semantic_link_labs-0.8.1.dist-info}/WHEEL +1 -1
- sempy_labs/__init__.py +116 -58
- sempy_labs/_ai.py +0 -2
- sempy_labs/_capacities.py +39 -3
- sempy_labs/_capacity_migration.py +623 -0
- sempy_labs/_clear_cache.py +8 -8
- sempy_labs/_connections.py +15 -13
- sempy_labs/_data_pipelines.py +118 -0
- sempy_labs/_documentation.py +144 -0
- sempy_labs/_eventhouses.py +118 -0
- sempy_labs/_eventstreams.py +118 -0
- sempy_labs/_generate_semantic_model.py +3 -3
- sempy_labs/_git.py +23 -24
- sempy_labs/_helper_functions.py +140 -47
- sempy_labs/_icons.py +40 -0
- sempy_labs/_kql_databases.py +134 -0
- sempy_labs/_kql_querysets.py +124 -0
- sempy_labs/_list_functions.py +218 -421
- sempy_labs/_mirrored_warehouses.py +50 -0
- sempy_labs/_ml_experiments.py +122 -0
- sempy_labs/_ml_models.py +120 -0
- sempy_labs/_model_auto_build.py +0 -4
- sempy_labs/_model_bpa.py +10 -12
- sempy_labs/_model_bpa_bulk.py +8 -7
- sempy_labs/_model_dependencies.py +26 -18
- sempy_labs/_notebooks.py +5 -16
- sempy_labs/_query_scale_out.py +6 -5
- sempy_labs/_refresh_semantic_model.py +7 -19
- sempy_labs/_spark.py +40 -45
- sempy_labs/_sql.py +60 -15
- sempy_labs/_vertipaq.py +25 -25
- sempy_labs/_warehouses.py +132 -0
- sempy_labs/_workspaces.py +0 -3
- sempy_labs/admin/__init__.py +53 -0
- sempy_labs/admin/_basic_functions.py +888 -0
- sempy_labs/admin/_domains.py +411 -0
- sempy_labs/directlake/_directlake_schema_sync.py +1 -1
- sempy_labs/directlake/_dl_helper.py +32 -16
- sempy_labs/directlake/_generate_shared_expression.py +11 -14
- sempy_labs/directlake/_guardrails.py +7 -7
- sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py +14 -24
- sempy_labs/directlake/_update_directlake_partition_entity.py +1 -1
- sempy_labs/directlake/_warm_cache.py +1 -1
- sempy_labs/lakehouse/_get_lakehouse_tables.py +3 -3
- sempy_labs/lakehouse/_lakehouse.py +3 -2
- sempy_labs/migration/_migrate_calctables_to_lakehouse.py +5 -0
- sempy_labs/report/__init__.py +9 -6
- sempy_labs/report/_generate_report.py +1 -1
- sempy_labs/report/_report_bpa.py +369 -0
- sempy_labs/report/_report_bpa_rules.py +113 -0
- sempy_labs/report/_report_helper.py +254 -0
- sempy_labs/report/_report_list_functions.py +95 -0
- sempy_labs/report/_report_rebind.py +0 -4
- sempy_labs/report/_reportwrapper.py +2037 -0
- sempy_labs/tom/_model.py +333 -22
- {semantic_link_labs-0.7.4.dist-info → semantic_link_labs-0.8.1.dist-info}/LICENSE +0 -0
- {semantic_link_labs-0.7.4.dist-info → semantic_link_labs-0.8.1.dist-info}/top_level.txt +0 -0
sempy_labs/_spark.py
CHANGED
|
@@ -91,9 +91,9 @@ def create_custom_pool(
|
|
|
91
91
|
max_node_count: int,
|
|
92
92
|
min_executors: int,
|
|
93
93
|
max_executors: int,
|
|
94
|
-
node_family:
|
|
95
|
-
auto_scale_enabled:
|
|
96
|
-
dynamic_executor_allocation_enabled:
|
|
94
|
+
node_family: str = "MemoryOptimized",
|
|
95
|
+
auto_scale_enabled: bool = True,
|
|
96
|
+
dynamic_executor_allocation_enabled: bool = True,
|
|
97
97
|
workspace: Optional[str] = None,
|
|
98
98
|
):
|
|
99
99
|
"""
|
|
@@ -108,11 +108,11 @@ def create_custom_pool(
|
|
|
108
108
|
min_node_count : int
|
|
109
109
|
The `minimum node count <https://learn.microsoft.com/rest/api/fabric/spark/custom-pools/create-workspace-custom-pool?tabs=HTTP#autoscaleproperties>`_.
|
|
110
110
|
max_node_count : int
|
|
111
|
-
The
|
|
111
|
+
The maximum node count.
|
|
112
112
|
min_executors : int
|
|
113
113
|
The `minimum executors <https://learn.microsoft.com/rest/api/fabric/spark/custom-pools/create-workspace-custom-pool?tabs=HTTP#dynamicexecutorallocationproperties>`_.
|
|
114
114
|
max_executors : int
|
|
115
|
-
The
|
|
115
|
+
The maximum executors.
|
|
116
116
|
node_family : str, default='MemoryOptimized'
|
|
117
117
|
The `node family <https://learn.microsoft.com/rest/api/fabric/spark/custom-pools/create-workspace-custom-pool?tabs=HTTP#nodefamily>`_.
|
|
118
118
|
auto_scale_enabled : bool, default=True
|
|
@@ -182,13 +182,13 @@ def update_custom_pool(
|
|
|
182
182
|
The `minimum node count <https://learn.microsoft.com/rest/api/fabric/spark/custom-pools/create-workspace-custom-pool?tabs=HTTP#autoscaleproperties>`_.
|
|
183
183
|
Defaults to None which keeps the existing property setting.
|
|
184
184
|
max_node_count : int, default=None
|
|
185
|
-
The
|
|
185
|
+
The maximum node count.
|
|
186
186
|
Defaults to None which keeps the existing property setting.
|
|
187
187
|
min_executors : int, default=None
|
|
188
188
|
The `minimum executors <https://learn.microsoft.com/rest/api/fabric/spark/custom-pools/create-workspace-custom-pool?tabs=HTTP#dynamicexecutorallocationproperties>`_.
|
|
189
189
|
Defaults to None which keeps the existing property setting.
|
|
190
190
|
max_executors : int, default=None
|
|
191
|
-
The
|
|
191
|
+
The maximum executors.
|
|
192
192
|
Defaults to None which keeps the existing property setting.
|
|
193
193
|
node_family : str, default=None
|
|
194
194
|
The `node family <https://learn.microsoft.com/rest/api/fabric/spark/custom-pools/create-workspace-custom-pool?tabs=HTTP#nodefamily>`_.
|
|
@@ -298,7 +298,9 @@ def delete_custom_pool(pool_name: str, workspace: Optional[str] = None):
|
|
|
298
298
|
)
|
|
299
299
|
|
|
300
300
|
|
|
301
|
-
def get_spark_settings(
|
|
301
|
+
def get_spark_settings(
|
|
302
|
+
workspace: Optional[str] = None, return_dataframe: bool = True
|
|
303
|
+
) -> pd.DataFrame | dict:
|
|
302
304
|
"""
|
|
303
305
|
Shows the spark settings for a workspace.
|
|
304
306
|
|
|
@@ -308,10 +310,12 @@ def get_spark_settings(workspace: Optional[str] = None) -> pd.DataFrame:
|
|
|
308
310
|
The name of the Fabric workspace.
|
|
309
311
|
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
310
312
|
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
313
|
+
return_dataframe : bool, default=True
|
|
314
|
+
If True, returns a pandas dataframe. If False, returns a json dictionary.
|
|
311
315
|
|
|
312
316
|
Returns
|
|
313
317
|
-------
|
|
314
|
-
pandas.DataFrame
|
|
318
|
+
pandas.DataFrame | dict
|
|
315
319
|
A pandas dataframe showing the spark settings for a workspace.
|
|
316
320
|
"""
|
|
317
321
|
|
|
@@ -363,12 +367,15 @@ def get_spark_settings(workspace: Optional[str] = None) -> pd.DataFrame:
|
|
|
363
367
|
"High Concurrency Enabled",
|
|
364
368
|
"Customize Compute Enabled",
|
|
365
369
|
]
|
|
366
|
-
int_cols = ["Max Node Count", "Max Executors"]
|
|
370
|
+
# int_cols = ["Max Node Count", "Max Executors"]
|
|
367
371
|
|
|
368
372
|
df[bool_cols] = df[bool_cols].astype(bool)
|
|
369
|
-
df[int_cols] = df[int_cols].astype(int)
|
|
373
|
+
# df[int_cols] = df[int_cols].astype(int)
|
|
370
374
|
|
|
371
|
-
|
|
375
|
+
if return_dataframe:
|
|
376
|
+
return df
|
|
377
|
+
else:
|
|
378
|
+
return response.json()
|
|
372
379
|
|
|
373
380
|
|
|
374
381
|
def update_spark_settings(
|
|
@@ -400,10 +407,10 @@ def update_spark_settings(
|
|
|
400
407
|
`Default pool <https://learn.microsoft.com/rest/api/fabric/spark/workspace-settings/update-spark-settings?tabs=HTTP#poolproperties>`_ for workspace.
|
|
401
408
|
Defaults to None which keeps the existing property setting.
|
|
402
409
|
max_node_count : int, default=None
|
|
403
|
-
The
|
|
410
|
+
The maximum node count.
|
|
404
411
|
Defaults to None which keeps the existing property setting.
|
|
405
412
|
max_executors : int, default=None
|
|
406
|
-
The
|
|
413
|
+
The maximum executors.
|
|
407
414
|
Defaults to None which keeps the existing property setting.
|
|
408
415
|
environment_name : str, default=None
|
|
409
416
|
The name of the `default environment <https://learn.microsoft.com/rest/api/fabric/spark/workspace-settings/update-spark-settings?tabs=HTTP#environmentproperties>`_. Empty string indicated there is no workspace default environment
|
|
@@ -420,38 +427,26 @@ def update_spark_settings(
|
|
|
420
427
|
# https://learn.microsoft.com/en-us/rest/api/fabric/spark/workspace-settings/update-spark-settings?tabs=HTTP
|
|
421
428
|
(workspace, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
422
429
|
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
if automatic_log_enabled is None:
|
|
426
|
-
automatic_log_enabled = bool(dfS["Automatic Log Enabled"].iloc[0])
|
|
427
|
-
if high_concurrency_enabled is None:
|
|
428
|
-
high_concurrency_enabled = bool(dfS["High Concurrency Enabled"].iloc[0])
|
|
429
|
-
if customize_compute_enabled is None:
|
|
430
|
-
customize_compute_enabled = bool(dfS["Customize Compute Enabled"].iloc[0])
|
|
431
|
-
if default_pool_name is None:
|
|
432
|
-
default_pool_name = dfS["Default Pool Name"].iloc[0]
|
|
433
|
-
if max_node_count is None:
|
|
434
|
-
max_node_count = int(dfS["Max Node Count"].iloc[0])
|
|
435
|
-
if max_executors is None:
|
|
436
|
-
max_executors = int(dfS["Max Executors"].iloc[0])
|
|
437
|
-
if environment_name is None:
|
|
438
|
-
environment_name = dfS["Environment Name"].iloc[0]
|
|
439
|
-
if runtime_version is None:
|
|
440
|
-
runtime_version = dfS["Runtime Version"].iloc[0]
|
|
430
|
+
request_body = get_spark_settings(workspace=workspace, return_dataframe=False)
|
|
441
431
|
|
|
442
|
-
|
|
443
|
-
"automaticLog"
|
|
444
|
-
|
|
445
|
-
"
|
|
446
|
-
"
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
"
|
|
454
|
-
|
|
432
|
+
if automatic_log_enabled is not None:
|
|
433
|
+
request_body["automaticLog"]["enabled"] = automatic_log_enabled
|
|
434
|
+
if high_concurrency_enabled is not None:
|
|
435
|
+
request_body["highConcurrency"][
|
|
436
|
+
"notebookInteractiveRunEnabled"
|
|
437
|
+
] = high_concurrency_enabled
|
|
438
|
+
if customize_compute_enabled is not None:
|
|
439
|
+
request_body["pool"]["customizeComputeEnabled"] = customize_compute_enabled
|
|
440
|
+
if default_pool_name is not None:
|
|
441
|
+
request_body["pool"]["defaultPool"]["name"] = default_pool_name
|
|
442
|
+
if max_node_count is not None:
|
|
443
|
+
request_body["pool"]["starterPool"]["maxNodeCount"] = max_node_count
|
|
444
|
+
if max_executors is not None:
|
|
445
|
+
request_body["pool"]["starterPool"]["maxExecutors"] = max_executors
|
|
446
|
+
if environment_name is not None:
|
|
447
|
+
request_body["environment"]["name"] = environment_name
|
|
448
|
+
if runtime_version is not None:
|
|
449
|
+
request_body["environment"]["runtimeVersion"] = runtime_version
|
|
455
450
|
|
|
456
451
|
client = fabric.FabricRestClient()
|
|
457
452
|
response = client.patch(
|
sempy_labs/_sql.py
CHANGED
|
@@ -6,7 +6,7 @@ import struct
|
|
|
6
6
|
import uuid
|
|
7
7
|
from itertools import chain, repeat
|
|
8
8
|
from sempy.fabric.exceptions import FabricHTTPException
|
|
9
|
-
from sempy_labs._helper_functions import resolve_warehouse_id
|
|
9
|
+
from sempy_labs._helper_functions import resolve_warehouse_id, resolve_lakehouse_id
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
def bytes2mswin_bstr(value: bytes) -> bytes:
|
|
@@ -28,30 +28,48 @@ def bytes2mswin_bstr(value: bytes) -> bytes:
|
|
|
28
28
|
return struct.pack("<i", len(encoded_bytes)) + encoded_bytes
|
|
29
29
|
|
|
30
30
|
|
|
31
|
-
class
|
|
31
|
+
class ConnectBase:
|
|
32
32
|
def __init__(
|
|
33
33
|
self,
|
|
34
|
-
|
|
34
|
+
name: str,
|
|
35
35
|
workspace: Optional[Union[str, uuid.UUID]] = None,
|
|
36
36
|
timeout: Optional[int] = None,
|
|
37
|
+
endpoint_type: str = "warehouse",
|
|
37
38
|
):
|
|
38
39
|
from sempy.fabric._token_provider import SynapseTokenProvider
|
|
39
40
|
import pyodbc
|
|
40
41
|
|
|
41
42
|
workspace = fabric.resolve_workspace_name(workspace)
|
|
42
43
|
workspace_id = fabric.resolve_workspace_id(workspace)
|
|
43
|
-
warehouse_id = resolve_warehouse_id(warehouse=warehouse, workspace=workspace)
|
|
44
44
|
|
|
45
|
-
#
|
|
45
|
+
# Resolve the appropriate ID (warehouse or lakehouse)
|
|
46
|
+
if endpoint_type == "warehouse":
|
|
47
|
+
resource_id = resolve_warehouse_id(warehouse=name, workspace=workspace)
|
|
48
|
+
else:
|
|
49
|
+
resource_id = resolve_lakehouse_id(lakehouse=name, workspace=workspace)
|
|
50
|
+
|
|
51
|
+
# Get the TDS endpoint
|
|
46
52
|
client = fabric.FabricRestClient()
|
|
47
|
-
response = client.get(
|
|
53
|
+
response = client.get(
|
|
54
|
+
f"v1/workspaces/{workspace_id}/{endpoint_type}s/{resource_id}"
|
|
55
|
+
)
|
|
48
56
|
if response.status_code != 200:
|
|
49
57
|
raise FabricHTTPException(response)
|
|
50
|
-
tds_endpoint = response.json().get("properties", {}).get("connectionString")
|
|
51
58
|
|
|
59
|
+
if endpoint_type == "warehouse":
|
|
60
|
+
tds_endpoint = response.json().get("properties", {}).get("connectionString")
|
|
61
|
+
else:
|
|
62
|
+
tds_endpoint = (
|
|
63
|
+
response.json()
|
|
64
|
+
.get("properties", {})
|
|
65
|
+
.get("sqlEndpointProperties", {})
|
|
66
|
+
.get("connectionString")
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
# Set up the connection string
|
|
52
70
|
access_token = SynapseTokenProvider()()
|
|
53
71
|
tokenstruct = bytes2mswin_bstr(access_token.encode())
|
|
54
|
-
conn_str = f"DRIVER={{ODBC Driver 18 for SQL Server}};SERVER={tds_endpoint};DATABASE={
|
|
72
|
+
conn_str = f"DRIVER={{ODBC Driver 18 for SQL Server}};SERVER={tds_endpoint};DATABASE={name};Encrypt=Yes;"
|
|
55
73
|
|
|
56
74
|
if timeout is not None:
|
|
57
75
|
conn_str += f"Connect Timeout={timeout};"
|
|
@@ -63,7 +81,7 @@ class ConnectWarehouse:
|
|
|
63
81
|
self, sql: Union[str, List[str]]
|
|
64
82
|
) -> Union[List[pd.DataFrame], pd.DataFrame, None]:
|
|
65
83
|
"""
|
|
66
|
-
Runs a SQL or T-SQL query (or multiple queries) against a Fabric Warehouse.
|
|
84
|
+
Runs a SQL or T-SQL query (or multiple queries) against a Fabric Warehouse/Lakehouse.
|
|
67
85
|
|
|
68
86
|
Parameters
|
|
69
87
|
----------
|
|
@@ -76,10 +94,10 @@ class ConnectWarehouse:
|
|
|
76
94
|
A list of pandas DataFrames if multiple SQL queries return results,
|
|
77
95
|
a single DataFrame if one query is executed and returns results, or None.
|
|
78
96
|
"""
|
|
97
|
+
|
|
79
98
|
cursor = None
|
|
80
|
-
results = []
|
|
99
|
+
results = []
|
|
81
100
|
|
|
82
|
-
# If the input is a single string, convert it to a list for consistency
|
|
83
101
|
if isinstance(sql, str):
|
|
84
102
|
sql = [sql]
|
|
85
103
|
|
|
@@ -101,10 +119,7 @@ class ConnectWarehouse:
|
|
|
101
119
|
results.append(result)
|
|
102
120
|
|
|
103
121
|
# Return results if any queries returned a result set
|
|
104
|
-
if results
|
|
105
|
-
return results if len(results) > 1 else results[0]
|
|
106
|
-
else:
|
|
107
|
-
return None
|
|
122
|
+
return results if len(results) > 1 else (results[0] if results else None)
|
|
108
123
|
|
|
109
124
|
finally:
|
|
110
125
|
if cursor:
|
|
@@ -118,3 +133,33 @@ class ConnectWarehouse:
|
|
|
118
133
|
|
|
119
134
|
def close(self):
|
|
120
135
|
self.connection.close()
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
class ConnectWarehouse(ConnectBase):
|
|
139
|
+
def __init__(
|
|
140
|
+
self,
|
|
141
|
+
warehouse: str,
|
|
142
|
+
workspace: Optional[Union[str, uuid.UUID]] = None,
|
|
143
|
+
timeout: Optional[int] = None,
|
|
144
|
+
):
|
|
145
|
+
super().__init__(
|
|
146
|
+
name=warehouse,
|
|
147
|
+
workspace=workspace,
|
|
148
|
+
timeout=timeout,
|
|
149
|
+
endpoint_type="warehouse",
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
class ConnectLakehouse(ConnectBase):
|
|
154
|
+
def __init__(
|
|
155
|
+
self,
|
|
156
|
+
lakehouse: str,
|
|
157
|
+
workspace: Optional[Union[str, uuid.UUID]] = None,
|
|
158
|
+
timeout: Optional[int] = None,
|
|
159
|
+
):
|
|
160
|
+
super().__init__(
|
|
161
|
+
name=lakehouse,
|
|
162
|
+
workspace=workspace,
|
|
163
|
+
timeout=timeout,
|
|
164
|
+
endpoint_type="lakehouse",
|
|
165
|
+
)
|
sempy_labs/_vertipaq.py
CHANGED
|
@@ -13,6 +13,7 @@ from sempy_labs._helper_functions import (
|
|
|
13
13
|
resolve_dataset_id,
|
|
14
14
|
save_as_delta_table,
|
|
15
15
|
resolve_workspace_capacity,
|
|
16
|
+
get_max_run_id,
|
|
16
17
|
)
|
|
17
18
|
from sempy_labs._list_functions import list_relationships, list_tables
|
|
18
19
|
from sempy_labs.lakehouse import lakehouse_attached, get_lakehouse_tables
|
|
@@ -27,7 +28,7 @@ def vertipaq_analyzer(
|
|
|
27
28
|
dataset: str,
|
|
28
29
|
workspace: Optional[str] = None,
|
|
29
30
|
export: Optional[str] = None,
|
|
30
|
-
read_stats_from_data:
|
|
31
|
+
read_stats_from_data: bool = False,
|
|
31
32
|
**kwargs,
|
|
32
33
|
):
|
|
33
34
|
"""
|
|
@@ -336,10 +337,10 @@ def vertipaq_analyzer(
|
|
|
336
337
|
int_cols.append(k)
|
|
337
338
|
elif v in ["float", "double"] and k != "Temperature":
|
|
338
339
|
pct_cols.append(k)
|
|
339
|
-
colSize[int_cols] = colSize[int_cols].
|
|
340
|
-
temp[int_cols] = temp[int_cols].
|
|
341
|
-
colSize[pct_cols] = colSize[pct_cols].
|
|
342
|
-
temp[pct_cols] = temp[pct_cols].
|
|
340
|
+
colSize[int_cols] = colSize[int_cols].map("{:,}".format)
|
|
341
|
+
temp[int_cols] = temp[int_cols].map("{:,}".format)
|
|
342
|
+
colSize[pct_cols] = colSize[pct_cols].map("{:.2f}%".format)
|
|
343
|
+
temp[pct_cols] = temp[pct_cols].map("{:.2f}%".format)
|
|
343
344
|
|
|
344
345
|
# Tables
|
|
345
346
|
int_cols = []
|
|
@@ -351,8 +352,8 @@ def vertipaq_analyzer(
|
|
|
351
352
|
pct_cols.append(k)
|
|
352
353
|
export_Table = dfT.copy()
|
|
353
354
|
|
|
354
|
-
dfT[int_cols] = dfT[int_cols].
|
|
355
|
-
dfT[pct_cols] = dfT[pct_cols].
|
|
355
|
+
dfT[int_cols] = dfT[int_cols].map("{:,}".format)
|
|
356
|
+
dfT[pct_cols] = dfT[pct_cols].map("{:.2f}%".format)
|
|
356
357
|
|
|
357
358
|
# Relationships
|
|
358
359
|
dfR = pd.merge(
|
|
@@ -391,7 +392,7 @@ def vertipaq_analyzer(
|
|
|
391
392
|
int_cols.append(k)
|
|
392
393
|
if not read_stats_from_data:
|
|
393
394
|
int_cols.remove("Missing Rows")
|
|
394
|
-
dfR[int_cols] = dfR[int_cols].
|
|
395
|
+
dfR[int_cols] = dfR[int_cols].map("{:,}".format)
|
|
395
396
|
|
|
396
397
|
# Partitions
|
|
397
398
|
dfP = dfP[
|
|
@@ -414,7 +415,7 @@ def vertipaq_analyzer(
|
|
|
414
415
|
if v in ["int", "long", "double", "float"]:
|
|
415
416
|
int_cols.append(k)
|
|
416
417
|
intList = ["Record Count", "Segment Count", "Records per Segment"]
|
|
417
|
-
dfP[intList] = dfP[intList].
|
|
418
|
+
dfP[intList] = dfP[intList].map("{:,}".format)
|
|
418
419
|
|
|
419
420
|
# Hierarchies
|
|
420
421
|
dfH_filt = dfH[dfH["Level Ordinal"] == 0]
|
|
@@ -426,7 +427,7 @@ def vertipaq_analyzer(
|
|
|
426
427
|
dfH_filt["Used Size"] = dfH_filt["Used Size"].astype(int)
|
|
427
428
|
export_Hier = dfH_filt.copy()
|
|
428
429
|
intList = ["Used Size"]
|
|
429
|
-
dfH_filt[intList] = dfH_filt[intList].
|
|
430
|
+
dfH_filt[intList] = dfH_filt[intList].map("{:,}".format)
|
|
430
431
|
|
|
431
432
|
# Model
|
|
432
433
|
# Converting to KB/MB/GB necessitates division by 1024 * 1000.
|
|
@@ -456,7 +457,7 @@ def vertipaq_analyzer(
|
|
|
456
457
|
for k, v in vertipaq_map["Model"].items():
|
|
457
458
|
if v in ["long", "int"] and k != "Compatibility Level":
|
|
458
459
|
int_cols.append(k)
|
|
459
|
-
dfModel[int_cols] = dfModel[int_cols].
|
|
460
|
+
dfModel[int_cols] = dfModel[int_cols].map("{:,}".format)
|
|
460
461
|
|
|
461
462
|
dataFrames = {
|
|
462
463
|
"dfModel": dfModel,
|
|
@@ -483,26 +484,23 @@ def vertipaq_analyzer(
|
|
|
483
484
|
)
|
|
484
485
|
|
|
485
486
|
if export == "table":
|
|
486
|
-
spark = SparkSession.builder.getOrCreate()
|
|
487
|
+
# spark = SparkSession.builder.getOrCreate()
|
|
487
488
|
|
|
488
489
|
lakehouse_id = fabric.get_lakehouse_id()
|
|
489
490
|
lake_workspace = fabric.resolve_workspace_name()
|
|
490
491
|
lakehouse = resolve_lakehouse_name(
|
|
491
492
|
lakehouse_id=lakehouse_id, workspace=lake_workspace
|
|
492
493
|
)
|
|
493
|
-
lakeTName = "
|
|
494
|
+
lakeTName = "vertipaqanalyzer_model"
|
|
494
495
|
|
|
495
496
|
lakeT = get_lakehouse_tables(lakehouse=lakehouse, workspace=lake_workspace)
|
|
496
497
|
lakeT_filt = lakeT[lakeT["Table Name"] == lakeTName]
|
|
497
498
|
|
|
498
|
-
query = f"SELECT MAX(RunId) FROM {lakehouse}.{lakeTName}"
|
|
499
|
-
|
|
500
499
|
if len(lakeT_filt) == 0:
|
|
501
500
|
runId = 1
|
|
502
501
|
else:
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
runId = maxRunId + 1
|
|
502
|
+
max_run_id = get_max_run_id(table_name=lakeTName)
|
|
503
|
+
runId = max_run_id + 1
|
|
506
504
|
|
|
507
505
|
dfMap = {
|
|
508
506
|
"Columns": ["Columns", export_Col],
|
|
@@ -551,13 +549,13 @@ def vertipaq_analyzer(
|
|
|
551
549
|
df.columns = df.columns.str.replace(" ", "_")
|
|
552
550
|
|
|
553
551
|
schema = {
|
|
554
|
-
"Capacity_Name":
|
|
555
|
-
"Capacity_Id":
|
|
556
|
-
"Workspace_Name":
|
|
557
|
-
"Workspace_Id":
|
|
558
|
-
"Dataset_Name":
|
|
559
|
-
"Dataset_Id":
|
|
560
|
-
"Configured_By":
|
|
552
|
+
"Capacity_Name": data_type_string,
|
|
553
|
+
"Capacity_Id": data_type_string,
|
|
554
|
+
"Workspace_Name": data_type_string,
|
|
555
|
+
"Workspace_Id": data_type_string,
|
|
556
|
+
"Dataset_Name": data_type_string,
|
|
557
|
+
"Dataset_Id": data_type_string,
|
|
558
|
+
"Configured_By": data_type_string,
|
|
561
559
|
}
|
|
562
560
|
|
|
563
561
|
schema.update(
|
|
@@ -566,6 +564,8 @@ def vertipaq_analyzer(
|
|
|
566
564
|
for key, value in vertipaq_map[key_name].items()
|
|
567
565
|
}
|
|
568
566
|
)
|
|
567
|
+
schema["RunId"] = data_type_long
|
|
568
|
+
schema["Timestamp"] = data_type_timestamp
|
|
569
569
|
|
|
570
570
|
delta_table_name = f"VertipaqAnalyzer_{obj}".lower()
|
|
571
571
|
save_as_delta_table(
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
import sempy.fabric as fabric
|
|
2
|
+
from sempy_labs._helper_functions import (
|
|
3
|
+
resolve_workspace_name_and_id,
|
|
4
|
+
pagination,
|
|
5
|
+
lro,
|
|
6
|
+
)
|
|
7
|
+
import pandas as pd
|
|
8
|
+
from typing import Optional
|
|
9
|
+
import sempy_labs._icons as icons
|
|
10
|
+
from sempy.fabric.exceptions import FabricHTTPException
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def create_warehouse(
|
|
14
|
+
warehouse: str, description: Optional[str] = None, workspace: Optional[str] = None
|
|
15
|
+
):
|
|
16
|
+
"""
|
|
17
|
+
Creates a Fabric warehouse.
|
|
18
|
+
|
|
19
|
+
Parameters
|
|
20
|
+
----------
|
|
21
|
+
warehouse: str
|
|
22
|
+
Name of the warehouse.
|
|
23
|
+
description : str, default=None
|
|
24
|
+
A description of the warehouse.
|
|
25
|
+
workspace : str, default=None
|
|
26
|
+
The Fabric workspace name.
|
|
27
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
28
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
(workspace, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
32
|
+
|
|
33
|
+
request_body = {"displayName": warehouse}
|
|
34
|
+
|
|
35
|
+
if description:
|
|
36
|
+
request_body["description"] = description
|
|
37
|
+
|
|
38
|
+
client = fabric.FabricRestClient()
|
|
39
|
+
response = client.post(
|
|
40
|
+
f"/v1/workspaces/{workspace_id}/warehouses/", json=request_body
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
lro(client, response, status_codes=[201, 202])
|
|
44
|
+
|
|
45
|
+
print(
|
|
46
|
+
f"{icons.green_dot} The '{warehouse}' warehouse has been created within the '{workspace}' workspace."
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def list_warehouses(workspace: Optional[str] = None) -> pd.DataFrame:
|
|
51
|
+
"""
|
|
52
|
+
Shows the warehouses within a workspace.
|
|
53
|
+
|
|
54
|
+
Parameters
|
|
55
|
+
----------
|
|
56
|
+
workspace : str, default=None
|
|
57
|
+
The Fabric workspace name.
|
|
58
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
59
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
60
|
+
|
|
61
|
+
Returns
|
|
62
|
+
-------
|
|
63
|
+
pandas.DataFrame
|
|
64
|
+
A pandas dataframe showing the warehouses within a workspace.
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
df = pd.DataFrame(
|
|
68
|
+
columns=[
|
|
69
|
+
"Warehouse Name",
|
|
70
|
+
"Warehouse Id",
|
|
71
|
+
"Description",
|
|
72
|
+
"Connection Info",
|
|
73
|
+
"Created Date",
|
|
74
|
+
"Last Updated Time",
|
|
75
|
+
]
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
(workspace, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
79
|
+
|
|
80
|
+
client = fabric.FabricRestClient()
|
|
81
|
+
response = client.get(f"/v1/workspaces/{workspace_id}/warehouses")
|
|
82
|
+
if response.status_code != 200:
|
|
83
|
+
raise FabricHTTPException(response)
|
|
84
|
+
|
|
85
|
+
responses = pagination(client, response)
|
|
86
|
+
|
|
87
|
+
for r in responses:
|
|
88
|
+
for v in r.get("value", []):
|
|
89
|
+
prop = v.get("properties", {})
|
|
90
|
+
|
|
91
|
+
new_data = {
|
|
92
|
+
"Warehouse Name": v.get("displayName"),
|
|
93
|
+
"Warehouse Id": v.get("id"),
|
|
94
|
+
"Description": v.get("description"),
|
|
95
|
+
"Connection Info": prop.get("connectionInfo"),
|
|
96
|
+
"Created Date": prop.get("createdDate"),
|
|
97
|
+
"Last Updated Time": prop.get("lastUpdatedTime"),
|
|
98
|
+
}
|
|
99
|
+
df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)
|
|
100
|
+
|
|
101
|
+
return df
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def delete_warehouse(name: str, workspace: Optional[str] = None):
|
|
105
|
+
"""
|
|
106
|
+
Deletes a Fabric warehouse.
|
|
107
|
+
|
|
108
|
+
Parameters
|
|
109
|
+
----------
|
|
110
|
+
name: str
|
|
111
|
+
Name of the warehouse.
|
|
112
|
+
workspace : str, default=None
|
|
113
|
+
The Fabric workspace name.
|
|
114
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
115
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
116
|
+
"""
|
|
117
|
+
|
|
118
|
+
(workspace, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
119
|
+
|
|
120
|
+
item_id = fabric.resolve_item_id(
|
|
121
|
+
item_name=name, type="Warehouse", workspace=workspace
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
client = fabric.FabricRestClient()
|
|
125
|
+
response = client.delete(f"/v1/workspaces/{workspace_id}/warehouses/{item_id}")
|
|
126
|
+
|
|
127
|
+
if response.status_code != 200:
|
|
128
|
+
raise FabricHTTPException(response)
|
|
129
|
+
|
|
130
|
+
print(
|
|
131
|
+
f"{icons.green_dot} The '{name}' warehouse within the '{workspace}' workspace has been deleted."
|
|
132
|
+
)
|
sempy_labs/_workspaces.py
CHANGED
|
@@ -22,9 +22,6 @@ def delete_user_from_workspace(email_address: str, workspace: Optional[str] = No
|
|
|
22
22
|
The name of the workspace.
|
|
23
23
|
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
24
24
|
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
25
|
-
|
|
26
|
-
Returns
|
|
27
|
-
-------
|
|
28
25
|
"""
|
|
29
26
|
|
|
30
27
|
(workspace, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
from sempy_labs.admin._basic_functions import (
|
|
2
|
+
assign_workspaces_to_capacity,
|
|
3
|
+
list_capacities,
|
|
4
|
+
list_tenant_settings,
|
|
5
|
+
list_capacities_delegated_tenant_settings,
|
|
6
|
+
unassign_workspaces_from_capacity,
|
|
7
|
+
list_external_data_shares,
|
|
8
|
+
revoke_external_data_share,
|
|
9
|
+
list_workspaces,
|
|
10
|
+
list_datasets,
|
|
11
|
+
list_item_access_details,
|
|
12
|
+
list_access_entities,
|
|
13
|
+
list_workspace_access_details,
|
|
14
|
+
list_items,
|
|
15
|
+
)
|
|
16
|
+
from sempy_labs.admin._domains import (
|
|
17
|
+
list_domains,
|
|
18
|
+
list_domain_workspaces,
|
|
19
|
+
assign_domain_workspaces,
|
|
20
|
+
assign_domain_workspaces_by_capacities,
|
|
21
|
+
create_domain,
|
|
22
|
+
update_domain,
|
|
23
|
+
delete_domain,
|
|
24
|
+
resolve_domain_id,
|
|
25
|
+
unassign_domain_workspaces,
|
|
26
|
+
unassign_all_domain_workspaces,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
__all__ = [
|
|
30
|
+
"list_items",
|
|
31
|
+
"list_workspace_access_details",
|
|
32
|
+
"list_access_entities",
|
|
33
|
+
"list_item_access_details",
|
|
34
|
+
"list_datasets",
|
|
35
|
+
"list_workspaces",
|
|
36
|
+
"assign_workspaces_to_capacity",
|
|
37
|
+
"list_capacities",
|
|
38
|
+
"list_tenant_settings",
|
|
39
|
+
"list_domains",
|
|
40
|
+
"list_domain_workspaces",
|
|
41
|
+
"assign_domain_workspaces",
|
|
42
|
+
"assign_domain_workspaces_by_capacities",
|
|
43
|
+
"create_domain",
|
|
44
|
+
"update_domain",
|
|
45
|
+
"delete_domain",
|
|
46
|
+
"resolve_domain_id",
|
|
47
|
+
"unassign_domain_workspaces",
|
|
48
|
+
"unassign_all_domain_workspaces",
|
|
49
|
+
"list_capacities_delegated_tenant_settings",
|
|
50
|
+
"unassign_workspaces_from_capacity",
|
|
51
|
+
"list_external_data_shares",
|
|
52
|
+
"revoke_external_data_share",
|
|
53
|
+
]
|