semantic-link-labs 0.9.1__py3-none-any.whl → 0.9.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of semantic-link-labs might be problematic. Click here for more details.
- {semantic_link_labs-0.9.1.dist-info → semantic_link_labs-0.9.3.dist-info}/METADATA +67 -8
- {semantic_link_labs-0.9.1.dist-info → semantic_link_labs-0.9.3.dist-info}/RECORD +87 -80
- sempy_labs/__init__.py +14 -12
- sempy_labs/_ai.py +8 -5
- sempy_labs/_capacities.py +120 -142
- sempy_labs/_capacity_migration.py +61 -94
- sempy_labs/_clear_cache.py +9 -8
- sempy_labs/_connections.py +107 -104
- sempy_labs/_data_pipelines.py +47 -49
- sempy_labs/_dataflows.py +45 -51
- sempy_labs/_dax.py +228 -6
- sempy_labs/_delta_analyzer.py +321 -0
- sempy_labs/_deployment_pipelines.py +72 -66
- sempy_labs/_environments.py +39 -36
- sempy_labs/_eventhouses.py +35 -35
- sempy_labs/_eventstreams.py +38 -39
- sempy_labs/_external_data_shares.py +29 -42
- sempy_labs/_gateways.py +103 -99
- sempy_labs/_generate_semantic_model.py +22 -30
- sempy_labs/_git.py +46 -66
- sempy_labs/_graphQL.py +95 -0
- sempy_labs/_helper_functions.py +227 -36
- sempy_labs/_job_scheduler.py +47 -59
- sempy_labs/_kql_databases.py +27 -34
- sempy_labs/_kql_querysets.py +23 -30
- sempy_labs/_list_functions.py +264 -167
- sempy_labs/_managed_private_endpoints.py +52 -47
- sempy_labs/_mirrored_databases.py +110 -134
- sempy_labs/_mirrored_warehouses.py +13 -13
- sempy_labs/_ml_experiments.py +36 -36
- sempy_labs/_ml_models.py +37 -38
- sempy_labs/_model_bpa.py +2 -2
- sempy_labs/_model_bpa_rules.py +8 -6
- sempy_labs/_model_dependencies.py +2 -0
- sempy_labs/_notebooks.py +28 -29
- sempy_labs/_one_lake_integration.py +2 -0
- sempy_labs/_query_scale_out.py +63 -81
- sempy_labs/_refresh_semantic_model.py +12 -14
- sempy_labs/_spark.py +54 -79
- sempy_labs/_sql.py +7 -11
- sempy_labs/_translations.py +2 -2
- sempy_labs/_vertipaq.py +11 -6
- sempy_labs/_warehouses.py +30 -33
- sempy_labs/_workloads.py +15 -20
- sempy_labs/_workspace_identity.py +13 -17
- sempy_labs/_workspaces.py +49 -48
- sempy_labs/admin/__init__.py +2 -0
- sempy_labs/admin/_basic_functions.py +244 -281
- sempy_labs/admin/_domains.py +186 -103
- sempy_labs/admin/_external_data_share.py +26 -31
- sempy_labs/admin/_git.py +17 -22
- sempy_labs/admin/_items.py +34 -48
- sempy_labs/admin/_scanner.py +61 -49
- sempy_labs/directlake/_directlake_schema_compare.py +2 -0
- sempy_labs/directlake/_dl_helper.py +10 -11
- sempy_labs/directlake/_generate_shared_expression.py +4 -5
- sempy_labs/directlake/_get_directlake_lakehouse.py +1 -0
- sempy_labs/directlake/_list_directlake_model_calc_tables.py +1 -0
- sempy_labs/directlake/_show_unsupported_directlake_objects.py +2 -0
- sempy_labs/directlake/_warm_cache.py +2 -0
- sempy_labs/graph/__init__.py +33 -0
- sempy_labs/graph/_groups.py +402 -0
- sempy_labs/graph/_teams.py +113 -0
- sempy_labs/graph/_users.py +191 -0
- sempy_labs/lakehouse/__init__.py +4 -0
- sempy_labs/lakehouse/_get_lakehouse_columns.py +12 -12
- sempy_labs/lakehouse/_get_lakehouse_tables.py +16 -22
- sempy_labs/lakehouse/_lakehouse.py +104 -7
- sempy_labs/lakehouse/_shortcuts.py +42 -20
- sempy_labs/migration/__init__.py +4 -0
- sempy_labs/migration/_direct_lake_to_import.py +66 -0
- sempy_labs/migration/_migrate_calctables_to_lakehouse.py +3 -2
- sempy_labs/migration/_migrate_calctables_to_semantic_model.py +1 -0
- sempy_labs/migration/_migrate_model_objects_to_semantic_model.py +1 -0
- sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py +2 -0
- sempy_labs/migration/_refresh_calc_tables.py +2 -2
- sempy_labs/report/_download_report.py +8 -13
- sempy_labs/report/_generate_report.py +49 -46
- sempy_labs/report/_paginated.py +20 -26
- sempy_labs/report/_report_functions.py +52 -47
- sempy_labs/report/_report_list_functions.py +2 -0
- sempy_labs/report/_report_rebind.py +6 -10
- sempy_labs/report/_reportwrapper.py +187 -220
- sempy_labs/tom/_model.py +12 -6
- {semantic_link_labs-0.9.1.dist-info → semantic_link_labs-0.9.3.dist-info}/LICENSE +0 -0
- {semantic_link_labs-0.9.1.dist-info → semantic_link_labs-0.9.3.dist-info}/WHEEL +0 -0
- {semantic_link_labs-0.9.1.dist-info → semantic_link_labs-0.9.3.dist-info}/top_level.txt +0 -0
sempy_labs/_helper_functions.py
CHANGED
|
@@ -15,6 +15,8 @@ from azure.core.credentials import TokenCredential, AccessToken
|
|
|
15
15
|
import urllib.parse
|
|
16
16
|
import numpy as np
|
|
17
17
|
from IPython.display import display, HTML
|
|
18
|
+
import requests
|
|
19
|
+
import sempy_labs._authentication as auth
|
|
18
20
|
|
|
19
21
|
|
|
20
22
|
def _build_url(url: str, params: dict) -> str:
|
|
@@ -29,27 +31,35 @@ def _build_url(url: str, params: dict) -> str:
|
|
|
29
31
|
|
|
30
32
|
|
|
31
33
|
def create_abfss_path(
|
|
32
|
-
lakehouse_id: UUID,
|
|
34
|
+
lakehouse_id: UUID,
|
|
35
|
+
lakehouse_workspace_id: UUID,
|
|
36
|
+
delta_table_name: Optional[str] = None,
|
|
33
37
|
) -> str:
|
|
34
38
|
"""
|
|
35
39
|
Creates an abfss path for a delta table in a Fabric lakehouse.
|
|
36
40
|
|
|
37
41
|
Parameters
|
|
38
42
|
----------
|
|
39
|
-
lakehouse_id : UUID
|
|
43
|
+
lakehouse_id : uuid.UUID
|
|
40
44
|
ID of the Fabric lakehouse.
|
|
41
|
-
lakehouse_workspace_id : UUID
|
|
45
|
+
lakehouse_workspace_id : uuid.UUID
|
|
42
46
|
ID of the Fabric workspace.
|
|
43
|
-
delta_table_name : str
|
|
47
|
+
delta_table_name : str, default=None
|
|
44
48
|
Name of the delta table name.
|
|
45
49
|
|
|
46
50
|
Returns
|
|
47
51
|
-------
|
|
48
52
|
str
|
|
49
|
-
An abfss path which can be used to save/reference a delta table in a Fabric lakehouse.
|
|
53
|
+
An abfss path which can be used to save/reference a delta table in a Fabric lakehouse or lakehouse.
|
|
50
54
|
"""
|
|
51
55
|
|
|
52
|
-
|
|
56
|
+
fp = _get_default_file_path()
|
|
57
|
+
path = f"abfss://{lakehouse_workspace_id}@{fp}/{lakehouse_id}"
|
|
58
|
+
|
|
59
|
+
if delta_table_name is not None:
|
|
60
|
+
path += f"/Tables/{delta_table_name}"
|
|
61
|
+
|
|
62
|
+
return path
|
|
53
63
|
|
|
54
64
|
|
|
55
65
|
def _get_default_file_path() -> str:
|
|
@@ -166,6 +176,16 @@ def resolve_report_name(report_id: UUID, workspace: Optional[str | UUID] = None)
|
|
|
166
176
|
)
|
|
167
177
|
|
|
168
178
|
|
|
179
|
+
def resolve_item_id(
|
|
180
|
+
item: str | UUID, type: str, workspace: Optional[str] = None
|
|
181
|
+
) -> UUID:
|
|
182
|
+
|
|
183
|
+
if _is_valid_uuid(item):
|
|
184
|
+
return item
|
|
185
|
+
else:
|
|
186
|
+
return fabric.resolve_item_id(item_name=item, type=type, workspace=workspace)
|
|
187
|
+
|
|
188
|
+
|
|
169
189
|
def resolve_item_name_and_id(
|
|
170
190
|
item: str | UUID, type: Optional[str] = None, workspace: Optional[str | UUID] = None
|
|
171
191
|
) -> Tuple[str, UUID]:
|
|
@@ -244,7 +264,7 @@ def resolve_dataset_id(
|
|
|
244
264
|
|
|
245
265
|
Returns
|
|
246
266
|
-------
|
|
247
|
-
UUID
|
|
267
|
+
uuid.UUID
|
|
248
268
|
The ID of the semantic model.
|
|
249
269
|
"""
|
|
250
270
|
|
|
@@ -524,8 +544,9 @@ def save_as_delta_table(
|
|
|
524
544
|
f"{icons.red_dot} Invalid 'delta_table_name'. Delta tables in the lakehouse cannot have spaces in their names."
|
|
525
545
|
)
|
|
526
546
|
|
|
527
|
-
dataframe.columns =
|
|
528
|
-
|
|
547
|
+
dataframe.columns = [col.replace(" ", "_") for col in dataframe.columns]
|
|
548
|
+
|
|
549
|
+
spark = _create_spark_session()
|
|
529
550
|
|
|
530
551
|
type_mapping = {
|
|
531
552
|
"string": StringType(),
|
|
@@ -719,7 +740,7 @@ def resolve_item_type(item_id: UUID, workspace: Optional[str | UUID] = None) ->
|
|
|
719
740
|
|
|
720
741
|
if dfI_filt.empty:
|
|
721
742
|
raise ValueError(
|
|
722
|
-
f"Invalid 'item_id' parameter. The '{item_id}' item was not found in the '{workspace_name}' workspace."
|
|
743
|
+
f"{icons.red_dot} Invalid 'item_id' parameter. The '{item_id}' item was not found in the '{workspace_name}' workspace."
|
|
723
744
|
)
|
|
724
745
|
return dfI_filt["Type"].iloc[0]
|
|
725
746
|
|
|
@@ -780,7 +801,7 @@ def resolve_workspace_capacity(
|
|
|
780
801
|
|
|
781
802
|
Returns
|
|
782
803
|
-------
|
|
783
|
-
Tuple[UUID, str]
|
|
804
|
+
Tuple[uuid.UUID, str]
|
|
784
805
|
capacity Id; capacity came.
|
|
785
806
|
"""
|
|
786
807
|
|
|
@@ -811,7 +832,7 @@ def get_capacity_id(workspace: Optional[str | UUID] = None) -> UUID:
|
|
|
811
832
|
|
|
812
833
|
Returns
|
|
813
834
|
-------
|
|
814
|
-
UUID
|
|
835
|
+
uuid.UUID
|
|
815
836
|
The capacity Id.
|
|
816
837
|
"""
|
|
817
838
|
|
|
@@ -849,7 +870,7 @@ def get_capacity_name(workspace: Optional[str | UUID] = None) -> str:
|
|
|
849
870
|
capacity_id = get_capacity_id(workspace)
|
|
850
871
|
dfC = fabric.list_capacities()
|
|
851
872
|
dfC_filt = dfC[dfC["Id"] == capacity_id]
|
|
852
|
-
if
|
|
873
|
+
if dfC_filt.empty:
|
|
853
874
|
raise ValueError(
|
|
854
875
|
f"{icons.red_dot} The '{capacity_id}' capacity Id does not exist."
|
|
855
876
|
)
|
|
@@ -880,7 +901,7 @@ def resolve_capacity_name(capacity_id: Optional[UUID] = None) -> str:
|
|
|
880
901
|
dfC = fabric.list_capacities()
|
|
881
902
|
dfC_filt = dfC[dfC["Id"] == capacity_id]
|
|
882
903
|
|
|
883
|
-
if
|
|
904
|
+
if dfC_filt.empty:
|
|
884
905
|
raise ValueError(
|
|
885
906
|
f"{icons.red_dot} The '{capacity_id}' capacity Id does not exist."
|
|
886
907
|
)
|
|
@@ -901,7 +922,7 @@ def resolve_capacity_id(capacity_name: Optional[str] = None) -> UUID:
|
|
|
901
922
|
|
|
902
923
|
Returns
|
|
903
924
|
-------
|
|
904
|
-
UUID
|
|
925
|
+
uuid.UUID
|
|
905
926
|
The capacity Id.
|
|
906
927
|
"""
|
|
907
928
|
|
|
@@ -911,7 +932,7 @@ def resolve_capacity_id(capacity_name: Optional[str] = None) -> UUID:
|
|
|
911
932
|
dfC = fabric.list_capacities()
|
|
912
933
|
dfC_filt = dfC[dfC["Display Name"] == capacity_name]
|
|
913
934
|
|
|
914
|
-
if
|
|
935
|
+
if dfC_filt.empty:
|
|
915
936
|
raise ValueError(
|
|
916
937
|
f"{icons.red_dot} The '{capacity_name}' capacity does not exist."
|
|
917
938
|
)
|
|
@@ -994,14 +1015,14 @@ def pagination(client, response):
|
|
|
994
1015
|
return responses
|
|
995
1016
|
|
|
996
1017
|
|
|
997
|
-
def resolve_deployment_pipeline_id(deployment_pipeline: str) -> UUID:
|
|
1018
|
+
def resolve_deployment_pipeline_id(deployment_pipeline: str | UUID) -> UUID:
|
|
998
1019
|
"""
|
|
999
1020
|
Obtains the Id for a given deployment pipeline.
|
|
1000
1021
|
|
|
1001
1022
|
Parameters
|
|
1002
1023
|
----------
|
|
1003
|
-
deployment_pipeline : str
|
|
1004
|
-
The deployment pipeline name
|
|
1024
|
+
deployment_pipeline : str | uuid.UUID
|
|
1025
|
+
The deployment pipeline name or ID.
|
|
1005
1026
|
|
|
1006
1027
|
Returns
|
|
1007
1028
|
-------
|
|
@@ -1011,15 +1032,17 @@ def resolve_deployment_pipeline_id(deployment_pipeline: str) -> UUID:
|
|
|
1011
1032
|
|
|
1012
1033
|
from sempy_labs._deployment_pipelines import list_deployment_pipelines
|
|
1013
1034
|
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
raise ValueError(
|
|
1018
|
-
f"{icons.red_dot} The '{deployment_pipeline}' deployment pipeline is not valid."
|
|
1019
|
-
)
|
|
1020
|
-
deployment_pipeline_id = dfP_filt["Deployment Pipeline Id"].iloc[0]
|
|
1035
|
+
if _is_valid_uuid(deployment_pipeline):
|
|
1036
|
+
return deployment_pipeline
|
|
1037
|
+
else:
|
|
1021
1038
|
|
|
1022
|
-
|
|
1039
|
+
dfP = list_deployment_pipelines()
|
|
1040
|
+
dfP_filt = dfP[dfP["Deployment Pipeline Name"] == deployment_pipeline]
|
|
1041
|
+
if len(dfP_filt) == 0:
|
|
1042
|
+
raise ValueError(
|
|
1043
|
+
f"{icons.red_dot} The '{deployment_pipeline}' deployment pipeline is not valid."
|
|
1044
|
+
)
|
|
1045
|
+
return dfP_filt["Deployment Pipeline Id"].iloc[0]
|
|
1023
1046
|
|
|
1024
1047
|
|
|
1025
1048
|
class FabricTokenCredential(TokenCredential):
|
|
@@ -1209,7 +1232,7 @@ def resolve_notebook_id(notebook: str, workspace: Optional[str | UUID] = None) -
|
|
|
1209
1232
|
|
|
1210
1233
|
Returns
|
|
1211
1234
|
-------
|
|
1212
|
-
UUID
|
|
1235
|
+
uuid.UUID
|
|
1213
1236
|
The notebook Id.
|
|
1214
1237
|
"""
|
|
1215
1238
|
|
|
@@ -1224,23 +1247,33 @@ def generate_guid():
|
|
|
1224
1247
|
|
|
1225
1248
|
|
|
1226
1249
|
def _get_column_aggregate(
|
|
1227
|
-
lakehouse: str,
|
|
1228
1250
|
table_name: str,
|
|
1229
1251
|
column_name: str = "RunId",
|
|
1252
|
+
lakehouse: Optional[str | UUID] = None,
|
|
1253
|
+
workspace: Optional[str | UUID] = None,
|
|
1230
1254
|
function: str = "max",
|
|
1231
1255
|
default_value: int = 0,
|
|
1232
1256
|
) -> int:
|
|
1233
1257
|
|
|
1234
|
-
from pyspark.sql import
|
|
1258
|
+
from pyspark.sql.functions import approx_count_distinct
|
|
1259
|
+
from pyspark.sql import functions as F
|
|
1235
1260
|
|
|
1236
|
-
spark = SparkSession.builder.getOrCreate()
|
|
1237
1261
|
function = function.upper()
|
|
1238
|
-
|
|
1239
|
-
|
|
1240
|
-
|
|
1241
|
-
|
|
1262
|
+
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
1263
|
+
lakehouse_id = resolve_lakehouse_id(lakehouse, workspace)
|
|
1264
|
+
path = create_abfss_path(lakehouse_id, workspace_id, table_name)
|
|
1265
|
+
|
|
1266
|
+
spark = _create_spark_session()
|
|
1267
|
+
df = spark.read.format("delta").load(path)
|
|
1242
1268
|
|
|
1243
|
-
|
|
1269
|
+
if function in {"COUNTDISTINCT", "DISTINCTCOUNT"}:
|
|
1270
|
+
result = df.select(F.count_distinct(F.col(column_name)))
|
|
1271
|
+
elif "APPROX" in function:
|
|
1272
|
+
result = df.select(approx_count_distinct(column_name))
|
|
1273
|
+
else:
|
|
1274
|
+
result = df.selectExpr(f"{function}({column_name})")
|
|
1275
|
+
|
|
1276
|
+
return result.collect()[0][0] or default_value
|
|
1244
1277
|
|
|
1245
1278
|
|
|
1246
1279
|
def _make_list_unique(my_list):
|
|
@@ -1446,3 +1479,161 @@ def _get_fabric_context_setting(name: str):
|
|
|
1446
1479
|
def get_tenant_id():
|
|
1447
1480
|
|
|
1448
1481
|
_get_fabric_context_setting(name="trident.tenant.id")
|
|
1482
|
+
|
|
1483
|
+
|
|
1484
|
+
def _base_api(
|
|
1485
|
+
request: str,
|
|
1486
|
+
client: str = "fabric",
|
|
1487
|
+
method: str = "get",
|
|
1488
|
+
payload: Optional[str] = None,
|
|
1489
|
+
status_codes: Optional[int] = 200,
|
|
1490
|
+
uses_pagination: bool = False,
|
|
1491
|
+
lro_return_json: bool = False,
|
|
1492
|
+
lro_return_status_code: bool = False,
|
|
1493
|
+
):
|
|
1494
|
+
|
|
1495
|
+
from sempy_labs._authentication import _get_headers
|
|
1496
|
+
|
|
1497
|
+
if (lro_return_json or lro_return_status_code) and status_codes is None:
|
|
1498
|
+
status_codes = [200, 202]
|
|
1499
|
+
|
|
1500
|
+
if isinstance(status_codes, int):
|
|
1501
|
+
status_codes = [status_codes]
|
|
1502
|
+
|
|
1503
|
+
if client == "fabric":
|
|
1504
|
+
c = fabric.FabricRestClient()
|
|
1505
|
+
elif client == "fabric_sp":
|
|
1506
|
+
c = fabric.FabricRestClient(token_provider=auth.token_provider.get())
|
|
1507
|
+
elif client in ["azure", "graph"]:
|
|
1508
|
+
pass
|
|
1509
|
+
else:
|
|
1510
|
+
raise ValueError(f"{icons.red_dot} The '{client}' client is not supported.")
|
|
1511
|
+
|
|
1512
|
+
if client not in ["azure", "graph"]:
|
|
1513
|
+
if method == "get":
|
|
1514
|
+
response = c.get(request)
|
|
1515
|
+
elif method == "delete":
|
|
1516
|
+
response = c.delete(request)
|
|
1517
|
+
elif method == "post":
|
|
1518
|
+
response = c.post(request, json=payload)
|
|
1519
|
+
elif method == "patch":
|
|
1520
|
+
response = c.patch(request, json=payload)
|
|
1521
|
+
elif method == "put":
|
|
1522
|
+
response = c.put(request, json=payload)
|
|
1523
|
+
else:
|
|
1524
|
+
raise NotImplementedError
|
|
1525
|
+
else:
|
|
1526
|
+
headers = _get_headers(auth.token_provider.get(), audience=client)
|
|
1527
|
+
response = requests.request(
|
|
1528
|
+
method.upper(),
|
|
1529
|
+
f"https://graph.microsoft.com/v1.0/{request}",
|
|
1530
|
+
headers=headers,
|
|
1531
|
+
json=payload,
|
|
1532
|
+
)
|
|
1533
|
+
|
|
1534
|
+
if lro_return_json:
|
|
1535
|
+
return lro(c, response, status_codes).json()
|
|
1536
|
+
elif lro_return_status_code:
|
|
1537
|
+
return lro(c, response, status_codes, return_status_code=True)
|
|
1538
|
+
else:
|
|
1539
|
+
if response.status_code not in status_codes:
|
|
1540
|
+
raise FabricHTTPException(response)
|
|
1541
|
+
if uses_pagination:
|
|
1542
|
+
responses = pagination(c, response)
|
|
1543
|
+
return responses
|
|
1544
|
+
else:
|
|
1545
|
+
return response
|
|
1546
|
+
|
|
1547
|
+
|
|
1548
|
+
def _create_dataframe(columns: dict) -> pd.DataFrame:
|
|
1549
|
+
|
|
1550
|
+
return pd.DataFrame(columns=list(columns.keys()))
|
|
1551
|
+
|
|
1552
|
+
|
|
1553
|
+
def _update_dataframe_datatypes(dataframe: pd.DataFrame, column_map: dict):
|
|
1554
|
+
"""
|
|
1555
|
+
Updates the datatypes of columns in a pandas dataframe based on a column map.
|
|
1556
|
+
|
|
1557
|
+
Example:
|
|
1558
|
+
{
|
|
1559
|
+
"Order": "int",
|
|
1560
|
+
"Public": "bool",
|
|
1561
|
+
}
|
|
1562
|
+
"""
|
|
1563
|
+
|
|
1564
|
+
for column, data_type in column_map.items():
|
|
1565
|
+
if column in dataframe.columns:
|
|
1566
|
+
if data_type == "int":
|
|
1567
|
+
dataframe[column] = dataframe[column].astype(int)
|
|
1568
|
+
elif data_type == "bool":
|
|
1569
|
+
dataframe[column] = dataframe[column].astype(bool)
|
|
1570
|
+
elif data_type == "float":
|
|
1571
|
+
dataframe[column] = dataframe[column].astype(float)
|
|
1572
|
+
elif data_type == "datetime":
|
|
1573
|
+
dataframe[column] = pd.to_datetime(dataframe[column])
|
|
1574
|
+
# This is for a special case in admin.list_reports where datetime itself does not work. Coerce fixes the issue.
|
|
1575
|
+
elif data_type == "datetime_coerce":
|
|
1576
|
+
dataframe[column] = pd.to_datetime(dataframe[column], errors="coerce")
|
|
1577
|
+
# This is for list_synonyms since the weight column is float and can have NaN values.
|
|
1578
|
+
elif data_type == "float_fillna":
|
|
1579
|
+
dataframe[column] = dataframe[column].fillna(0).astype(float)
|
|
1580
|
+
# This is to avoid NaN values in integer columns (for delta analyzer)
|
|
1581
|
+
elif data_type == "int_fillna":
|
|
1582
|
+
dataframe[column] = dataframe[column].fillna(0).astype(int)
|
|
1583
|
+
elif data_type in ["str", "string"]:
|
|
1584
|
+
dataframe[column] = dataframe[column].astype(str)
|
|
1585
|
+
else:
|
|
1586
|
+
raise NotImplementedError
|
|
1587
|
+
|
|
1588
|
+
|
|
1589
|
+
def _print_success(item_name, item_type, workspace_name, action="created"):
|
|
1590
|
+
if action == "created":
|
|
1591
|
+
print(
|
|
1592
|
+
f"{icons.green_dot} The '{item_name}' {item_type} has been successfully created in the '{workspace_name}' workspace."
|
|
1593
|
+
)
|
|
1594
|
+
elif action == "deleted":
|
|
1595
|
+
print(
|
|
1596
|
+
f"{icons.green_dot} The '{item_name}' {item_type} has been successfully deleted from the '{workspace_name}' workspace."
|
|
1597
|
+
)
|
|
1598
|
+
else:
|
|
1599
|
+
raise NotImplementedError
|
|
1600
|
+
|
|
1601
|
+
|
|
1602
|
+
def _pure_python_notebook() -> bool:
|
|
1603
|
+
|
|
1604
|
+
from sempy.fabric._environment import _on_jupyter
|
|
1605
|
+
|
|
1606
|
+
return _on_jupyter()
|
|
1607
|
+
|
|
1608
|
+
|
|
1609
|
+
def _create_spark_session():
|
|
1610
|
+
|
|
1611
|
+
if _pure_python_notebook():
|
|
1612
|
+
raise ValueError(
|
|
1613
|
+
f"{icons.red_dot} This function is only available in a PySpark notebook."
|
|
1614
|
+
)
|
|
1615
|
+
|
|
1616
|
+
from pyspark.sql import SparkSession
|
|
1617
|
+
|
|
1618
|
+
return SparkSession.builder.getOrCreate()
|
|
1619
|
+
|
|
1620
|
+
|
|
1621
|
+
def _read_delta_table(path: str):
|
|
1622
|
+
|
|
1623
|
+
spark = _create_spark_session()
|
|
1624
|
+
|
|
1625
|
+
return spark.read.format("delta").load(path)
|
|
1626
|
+
|
|
1627
|
+
|
|
1628
|
+
def _delta_table_row_count(table_name: str) -> int:
|
|
1629
|
+
|
|
1630
|
+
spark = _create_spark_session()
|
|
1631
|
+
|
|
1632
|
+
return spark.table(table_name).count()
|
|
1633
|
+
|
|
1634
|
+
|
|
1635
|
+
def _run_spark_sql_query(query):
|
|
1636
|
+
|
|
1637
|
+
spark = _create_spark_session()
|
|
1638
|
+
|
|
1639
|
+
return spark.sql(query)
|
sempy_labs/_job_scheduler.py
CHANGED
|
@@ -1,14 +1,13 @@
|
|
|
1
|
-
import sempy.fabric as fabric
|
|
2
1
|
from sempy._utils._log import log
|
|
3
2
|
import pandas as pd
|
|
4
3
|
from typing import Optional
|
|
5
4
|
from sempy_labs._helper_functions import (
|
|
6
5
|
resolve_workspace_name_and_id,
|
|
7
6
|
resolve_item_name_and_id,
|
|
8
|
-
|
|
9
|
-
|
|
7
|
+
_update_dataframe_datatypes,
|
|
8
|
+
_base_api,
|
|
9
|
+
_create_dataframe,
|
|
10
10
|
)
|
|
11
|
-
from sempy.fabric.exceptions import FabricHTTPException
|
|
12
11
|
from uuid import UUID
|
|
13
12
|
import sempy_labs._icons as icons
|
|
14
13
|
|
|
@@ -44,32 +43,26 @@ def list_item_job_instances(
|
|
|
44
43
|
item=item, type=type, workspace=workspace
|
|
45
44
|
)
|
|
46
45
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
46
|
+
columns = {
|
|
47
|
+
"Job Instance Id": "string",
|
|
48
|
+
"Item Name": "string",
|
|
49
|
+
"Item Id": "string",
|
|
50
|
+
"Item Type": "string",
|
|
51
|
+
"Job Type": "string",
|
|
52
|
+
"Invoke Type": "string",
|
|
53
|
+
"Status": "string",
|
|
54
|
+
"Root Activity Id": "string",
|
|
55
|
+
"Start Time UTC": "datetime",
|
|
56
|
+
"End Time UTC": "string",
|
|
57
|
+
"Error Message": "string",
|
|
58
|
+
}
|
|
59
|
+
df = _create_dataframe(columns=columns)
|
|
60
|
+
|
|
61
|
+
responses = _base_api(
|
|
62
|
+
request=f"v1/workspaces/{workspace_id}/items/{item_id}/jobs/instances",
|
|
63
|
+
uses_pagination=True,
|
|
50
64
|
)
|
|
51
65
|
|
|
52
|
-
if response.status_code != 200:
|
|
53
|
-
raise FabricHTTPException(response)
|
|
54
|
-
|
|
55
|
-
df = pd.DataFrame(
|
|
56
|
-
columns=[
|
|
57
|
-
"Job Instance Id",
|
|
58
|
-
"Item Name",
|
|
59
|
-
"Item Id",
|
|
60
|
-
"Item Type",
|
|
61
|
-
"Job Type",
|
|
62
|
-
"Invoke Type",
|
|
63
|
-
"Status",
|
|
64
|
-
"Root Activity Id",
|
|
65
|
-
"Start Time UTC",
|
|
66
|
-
"End Time UTC",
|
|
67
|
-
"Failure Reason",
|
|
68
|
-
]
|
|
69
|
-
)
|
|
70
|
-
|
|
71
|
-
responses = pagination(client, response)
|
|
72
|
-
|
|
73
66
|
if not responses[0].get("value"):
|
|
74
67
|
return df
|
|
75
68
|
|
|
@@ -95,6 +88,8 @@ def list_item_job_instances(
|
|
|
95
88
|
if dfs:
|
|
96
89
|
df = pd.concat(dfs, ignore_index=True)
|
|
97
90
|
|
|
91
|
+
df = _update_dataframe_datatypes(dataframe=df, column_map=columns)
|
|
92
|
+
|
|
98
93
|
return df
|
|
99
94
|
|
|
100
95
|
|
|
@@ -134,31 +129,26 @@ def list_item_schedules(
|
|
|
134
129
|
item=item, type=type, workspace=workspace
|
|
135
130
|
)
|
|
136
131
|
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
132
|
+
columns = {
|
|
133
|
+
"Job Schedule Id": "string",
|
|
134
|
+
"Enabled": "bool",
|
|
135
|
+
"Created Date Time": "datetime",
|
|
136
|
+
"Start Date Time": "datetime",
|
|
137
|
+
"End Date Time": "string",
|
|
138
|
+
"Local Time Zone Id": "string",
|
|
139
|
+
"Type": "string",
|
|
140
|
+
"Interval": "string",
|
|
141
|
+
"Weekdays": "string",
|
|
142
|
+
"Times": "string",
|
|
143
|
+
"Owner Id": "string",
|
|
144
|
+
"Owner Type": "string",
|
|
145
|
+
}
|
|
146
|
+
df = _create_dataframe(columns=columns)
|
|
147
|
+
|
|
148
|
+
response = _base_api(
|
|
149
|
+
request=f"v1/workspaces/{workspace_id}/items/{item_id}/jobs/{job_type}/schedules"
|
|
152
150
|
)
|
|
153
151
|
|
|
154
|
-
client = fabric.FabricRestClient()
|
|
155
|
-
response = client.get(
|
|
156
|
-
f"v1/workspaces/{workspace_id}/items/{item_id}/jobs/{job_type}/schedules"
|
|
157
|
-
)
|
|
158
|
-
|
|
159
|
-
if response.status_code != 200:
|
|
160
|
-
raise FabricHTTPException(response)
|
|
161
|
-
|
|
162
152
|
for v in response.json().get("value", []):
|
|
163
153
|
config = v.get("configuration", {})
|
|
164
154
|
own = v.get("owner", {})
|
|
@@ -179,9 +169,7 @@ def list_item_schedules(
|
|
|
179
169
|
|
|
180
170
|
df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)
|
|
181
171
|
|
|
182
|
-
df
|
|
183
|
-
df["Created Date Time"] = pd.to_datetime(df["Created Date Time"])
|
|
184
|
-
df["Start Date Time"] = pd.to_datetime(df["Start Date Time"])
|
|
172
|
+
_update_dataframe_datatypes(dataframe=df, column_map=columns)
|
|
185
173
|
|
|
186
174
|
return df
|
|
187
175
|
|
|
@@ -217,11 +205,11 @@ def run_on_demand_item_job(
|
|
|
217
205
|
item=item, type=type, workspace=workspace
|
|
218
206
|
)
|
|
219
207
|
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
208
|
+
_base_api(
|
|
209
|
+
request=f"v1/workspaces/{workspace_id}/items/{item_id}/jobs/instances?jobType={job_type}",
|
|
210
|
+
method="post",
|
|
211
|
+
lro_return_status_code=True,
|
|
212
|
+
status_codes=202,
|
|
223
213
|
)
|
|
224
214
|
|
|
225
|
-
lro(client, response, return_status_code=True)
|
|
226
|
-
|
|
227
215
|
print(f"{icons.green_dot} The '{item_name}' {type.lower()} has been executed.")
|
sempy_labs/_kql_databases.py
CHANGED
|
@@ -4,10 +4,9 @@ import sempy_labs._icons as icons
|
|
|
4
4
|
from typing import Optional
|
|
5
5
|
from sempy_labs._helper_functions import (
|
|
6
6
|
resolve_workspace_name_and_id,
|
|
7
|
-
|
|
8
|
-
|
|
7
|
+
_base_api,
|
|
8
|
+
_create_dataframe,
|
|
9
9
|
)
|
|
10
|
-
from sempy.fabric.exceptions import FabricHTTPException
|
|
11
10
|
from uuid import UUID
|
|
12
11
|
|
|
13
12
|
|
|
@@ -30,26 +29,22 @@ def list_kql_databases(workspace: Optional[str | UUID] = None) -> pd.DataFrame:
|
|
|
30
29
|
A pandas dataframe showing the KQL databases within a workspace.
|
|
31
30
|
"""
|
|
32
31
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
)
|
|
32
|
+
columns = {
|
|
33
|
+
"KQL Database Name": "string",
|
|
34
|
+
"KQL Database Id": "string",
|
|
35
|
+
"Description": "string",
|
|
36
|
+
"Parent Eventhouse Item Id": "string",
|
|
37
|
+
"Query Service URI": "string",
|
|
38
|
+
"Ingestion Service URI": "string",
|
|
39
|
+
"Database Type": "string",
|
|
40
|
+
}
|
|
41
|
+
df = _create_dataframe(columns=columns)
|
|
44
42
|
|
|
45
43
|
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
46
44
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
raise FabricHTTPException(response)
|
|
51
|
-
|
|
52
|
-
responses = pagination(client, response)
|
|
45
|
+
responses = _base_api(
|
|
46
|
+
request=f"v1/workspaces/{workspace_id}/kqlDatabases", uses_pagination=True
|
|
47
|
+
)
|
|
53
48
|
|
|
54
49
|
for r in responses:
|
|
55
50
|
for v in r.get("value", []):
|
|
@@ -91,18 +86,19 @@ def create_kql_database(
|
|
|
91
86
|
|
|
92
87
|
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
93
88
|
|
|
94
|
-
|
|
89
|
+
payload = {"displayName": name}
|
|
95
90
|
|
|
96
91
|
if description:
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
92
|
+
payload["description"] = description
|
|
93
|
+
|
|
94
|
+
_base_api(
|
|
95
|
+
request=f"v1/workspaces/{workspace_id}/kqlDatabases",
|
|
96
|
+
method="post",
|
|
97
|
+
payload=payload,
|
|
98
|
+
status_codes=[201, 202],
|
|
99
|
+
lro_return_status_code=True,
|
|
102
100
|
)
|
|
103
101
|
|
|
104
|
-
lro(client, response, status_codes=[201, 202])
|
|
105
|
-
|
|
106
102
|
print(
|
|
107
103
|
f"{icons.green_dot} The '{name}' KQL database has been created within the '{workspace_name}' workspace."
|
|
108
104
|
)
|
|
@@ -129,13 +125,10 @@ def delete_kql_database(name: str, workspace: Optional[str | UUID] = None):
|
|
|
129
125
|
item_name=name, type="KQLDatabase", workspace=workspace_id
|
|
130
126
|
)
|
|
131
127
|
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
128
|
+
_base_api(
|
|
129
|
+
request=f"/v1/workspaces/{workspace_id}/kqlDatabases/{kql_database_id}",
|
|
130
|
+
method="delete",
|
|
135
131
|
)
|
|
136
|
-
|
|
137
|
-
if response.status_code != 200:
|
|
138
|
-
raise FabricHTTPException(response)
|
|
139
132
|
print(
|
|
140
133
|
f"{icons.green_dot} The '{name}' KQL database within the '{workspace_name}' workspace has been deleted."
|
|
141
134
|
)
|