semantic-link-labs 0.10.0__py3-none-any.whl → 0.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of semantic-link-labs might be problematic. Click here for more details.
- {semantic_link_labs-0.10.0.dist-info → semantic_link_labs-0.11.0.dist-info}/METADATA +9 -6
- {semantic_link_labs-0.10.0.dist-info → semantic_link_labs-0.11.0.dist-info}/RECORD +95 -87
- sempy_labs/__init__.py +11 -1
- sempy_labs/_a_lib_info.py +2 -0
- sempy_labs/_capacities.py +2 -0
- sempy_labs/_connections.py +11 -0
- sempy_labs/_dashboards.py +9 -4
- sempy_labs/_data_pipelines.py +5 -0
- sempy_labs/_dataflows.py +284 -17
- sempy_labs/_daxformatter.py +80 -0
- sempy_labs/_delta_analyzer_history.py +4 -1
- sempy_labs/_deployment_pipelines.py +4 -0
- sempy_labs/_documentation.py +3 -0
- sempy_labs/_environments.py +10 -1
- sempy_labs/_eventhouses.py +12 -5
- sempy_labs/_eventstreams.py +11 -3
- sempy_labs/_external_data_shares.py +8 -2
- sempy_labs/_gateways.py +26 -5
- sempy_labs/_git.py +11 -0
- sempy_labs/_graphQL.py +10 -3
- sempy_labs/_helper_functions.py +62 -10
- sempy_labs/_job_scheduler.py +54 -7
- sempy_labs/_kql_databases.py +11 -2
- sempy_labs/_kql_querysets.py +11 -3
- sempy_labs/_list_functions.py +17 -45
- sempy_labs/_managed_private_endpoints.py +11 -2
- sempy_labs/_mirrored_databases.py +17 -3
- sempy_labs/_mirrored_warehouses.py +9 -3
- sempy_labs/_ml_experiments.py +11 -3
- sempy_labs/_ml_models.py +11 -3
- sempy_labs/_model_bpa_rules.py +2 -0
- sempy_labs/_mounted_data_factories.py +12 -8
- sempy_labs/_notebooks.py +6 -3
- sempy_labs/_refresh_semantic_model.py +1 -0
- sempy_labs/_semantic_models.py +107 -0
- sempy_labs/_spark.py +7 -0
- sempy_labs/_sql_endpoints.py +208 -0
- sempy_labs/_sqldatabase.py +13 -4
- sempy_labs/_tags.py +5 -1
- sempy_labs/_user_delegation_key.py +2 -0
- sempy_labs/_variable_libraries.py +3 -1
- sempy_labs/_warehouses.py +13 -3
- sempy_labs/_workloads.py +3 -0
- sempy_labs/_workspace_identity.py +3 -0
- sempy_labs/_workspaces.py +14 -1
- sempy_labs/admin/__init__.py +2 -0
- sempy_labs/admin/_activities.py +6 -5
- sempy_labs/admin/_apps.py +31 -31
- sempy_labs/admin/_artifacts.py +8 -3
- sempy_labs/admin/_basic_functions.py +5 -0
- sempy_labs/admin/_capacities.py +39 -28
- sempy_labs/admin/_datasets.py +51 -51
- sempy_labs/admin/_domains.py +17 -1
- sempy_labs/admin/_external_data_share.py +8 -2
- sempy_labs/admin/_git.py +14 -9
- sempy_labs/admin/_items.py +15 -2
- sempy_labs/admin/_reports.py +64 -65
- sempy_labs/admin/_shared.py +7 -1
- sempy_labs/admin/_tags.py +5 -0
- sempy_labs/admin/_tenant.py +5 -2
- sempy_labs/admin/_users.py +9 -3
- sempy_labs/admin/_workspaces.py +88 -0
- sempy_labs/directlake/_dl_helper.py +2 -0
- sempy_labs/directlake/_generate_shared_expression.py +2 -0
- sempy_labs/directlake/_get_directlake_lakehouse.py +2 -4
- sempy_labs/directlake/_get_shared_expression.py +2 -0
- sempy_labs/directlake/_guardrails.py +2 -0
- sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py +5 -3
- sempy_labs/directlake/_warm_cache.py +1 -0
- sempy_labs/graph/_groups.py +22 -7
- sempy_labs/graph/_teams.py +7 -2
- sempy_labs/graph/_users.py +1 -0
- sempy_labs/lakehouse/_blobs.py +1 -0
- sempy_labs/lakehouse/_get_lakehouse_tables.py +88 -27
- sempy_labs/lakehouse/_helper.py +2 -0
- sempy_labs/lakehouse/_lakehouse.py +38 -5
- sempy_labs/lakehouse/_livy_sessions.py +2 -1
- sempy_labs/lakehouse/_shortcuts.py +7 -1
- sempy_labs/migration/_direct_lake_to_import.py +2 -0
- sempy_labs/mirrored_azure_databricks_catalog/__init__.py +15 -0
- sempy_labs/mirrored_azure_databricks_catalog/_discover.py +213 -0
- sempy_labs/mirrored_azure_databricks_catalog/_refresh_catalog_metadata.py +45 -0
- sempy_labs/report/_download_report.py +2 -1
- sempy_labs/report/_generate_report.py +2 -0
- sempy_labs/report/_paginated.py +2 -0
- sempy_labs/report/_report_bpa.py +110 -122
- sempy_labs/report/_report_bpa_rules.py +2 -0
- sempy_labs/report/_report_functions.py +7 -0
- sempy_labs/report/_reportwrapper.py +86 -48
- sempy_labs/theme/__init__.py +12 -0
- sempy_labs/theme/_org_themes.py +96 -0
- sempy_labs/tom/_model.py +702 -35
- {semantic_link_labs-0.10.0.dist-info → semantic_link_labs-0.11.0.dist-info}/WHEEL +0 -0
- {semantic_link_labs-0.10.0.dist-info → semantic_link_labs-0.11.0.dist-info}/licenses/LICENSE +0 -0
- {semantic_link_labs-0.10.0.dist-info → semantic_link_labs-0.11.0.dist-info}/top_level.txt +0 -0
sempy_labs/admin/_users.py
CHANGED
|
@@ -5,8 +5,10 @@ from sempy_labs._helper_functions import (
|
|
|
5
5
|
)
|
|
6
6
|
from uuid import UUID
|
|
7
7
|
import pandas as pd
|
|
8
|
+
from sempy._utils._log import log
|
|
8
9
|
|
|
9
10
|
|
|
11
|
+
@log
|
|
10
12
|
def list_access_entities(
|
|
11
13
|
user_email_address: str,
|
|
12
14
|
) -> pd.DataFrame:
|
|
@@ -43,6 +45,7 @@ def list_access_entities(
|
|
|
43
45
|
uses_pagination=True,
|
|
44
46
|
)
|
|
45
47
|
|
|
48
|
+
dfs = []
|
|
46
49
|
for r in responses:
|
|
47
50
|
for v in r.get("accessEntities", []):
|
|
48
51
|
new_data = {
|
|
@@ -54,11 +57,15 @@ def list_access_entities(
|
|
|
54
57
|
"additionalPermissions"
|
|
55
58
|
),
|
|
56
59
|
}
|
|
57
|
-
|
|
60
|
+
dfs.append(pd.DataFrame(new_data, index=[0]))
|
|
61
|
+
|
|
62
|
+
if dfs:
|
|
63
|
+
df = pd.concat(dfs, ignore_index=True)
|
|
58
64
|
|
|
59
65
|
return df
|
|
60
66
|
|
|
61
67
|
|
|
68
|
+
@log
|
|
62
69
|
def list_user_subscriptions(user: str | UUID) -> pd.DataFrame:
|
|
63
70
|
"""
|
|
64
71
|
Shows a list of subscriptions for the specified user. This is a preview API call.
|
|
@@ -127,7 +134,6 @@ def list_user_subscriptions(user: str | UUID) -> pd.DataFrame:
|
|
|
127
134
|
|
|
128
135
|
if rows:
|
|
129
136
|
df = pd.DataFrame(rows, columns=list(columns.keys()))
|
|
130
|
-
|
|
131
|
-
_update_dataframe_datatypes(dataframe=df, column_map=columns)
|
|
137
|
+
_update_dataframe_datatypes(dataframe=df, column_map=columns)
|
|
132
138
|
|
|
133
139
|
return df
|
sempy_labs/admin/_workspaces.py
CHANGED
|
@@ -2,15 +2,21 @@ from sempy_labs._helper_functions import (
|
|
|
2
2
|
_base_api,
|
|
3
3
|
_build_url,
|
|
4
4
|
_encode_user,
|
|
5
|
+
_update_dataframe_datatypes,
|
|
6
|
+
_create_dataframe,
|
|
5
7
|
)
|
|
8
|
+
|
|
6
9
|
from uuid import UUID
|
|
7
10
|
from typing import Optional
|
|
8
11
|
from sempy_labs.admin._basic_functions import (
|
|
9
12
|
_resolve_workspace_name_and_id,
|
|
10
13
|
)
|
|
11
14
|
import sempy_labs._icons as icons
|
|
15
|
+
import pandas as pd
|
|
16
|
+
from sempy._utils._log import log
|
|
12
17
|
|
|
13
18
|
|
|
19
|
+
@log
|
|
14
20
|
def add_user_to_workspace(
|
|
15
21
|
user: str | UUID,
|
|
16
22
|
role: str = "Member",
|
|
@@ -68,6 +74,7 @@ def add_user_to_workspace(
|
|
|
68
74
|
)
|
|
69
75
|
|
|
70
76
|
|
|
77
|
+
@log
|
|
71
78
|
def delete_user_from_workspace(
|
|
72
79
|
user: str | UUID,
|
|
73
80
|
workspace: Optional[str | UUID] = None,
|
|
@@ -116,6 +123,7 @@ def delete_user_from_workspace(
|
|
|
116
123
|
)
|
|
117
124
|
|
|
118
125
|
|
|
126
|
+
@log
|
|
119
127
|
def restore_deleted_workspace(workspace_id: UUID, name: str, email_address: str):
|
|
120
128
|
"""
|
|
121
129
|
Restores a deleted workspace.
|
|
@@ -146,3 +154,83 @@ def restore_deleted_workspace(workspace_id: UUID, name: str, email_address: str)
|
|
|
146
154
|
print(
|
|
147
155
|
f"{icons.green_dot} The '{workspace_id}' workspace has been restored as '{name}'."
|
|
148
156
|
)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
@log
|
|
160
|
+
def list_orphaned_workspaces(top: int = 100) -> pd.DataFrame:
|
|
161
|
+
"""
|
|
162
|
+
Shows a list of orphaned workspaces (those with no users or no admins).
|
|
163
|
+
|
|
164
|
+
This is a wrapper function for the following API:
|
|
165
|
+
`Admin - Groups ListGroupsAsAdmin <https://learn.microsoft.com/rest/api/power-bi/admin/groups-get-groups-as-admin>`_.
|
|
166
|
+
|
|
167
|
+
Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
|
|
168
|
+
|
|
169
|
+
Parameters
|
|
170
|
+
----------
|
|
171
|
+
top : int, default=100
|
|
172
|
+
The maximum number of results to return.
|
|
173
|
+
|
|
174
|
+
Returns
|
|
175
|
+
-------
|
|
176
|
+
pandas.DataFrame
|
|
177
|
+
A pandas dataframe showing a list of orphaned workspaces.
|
|
178
|
+
"""
|
|
179
|
+
|
|
180
|
+
# column structure with proper data types
|
|
181
|
+
columns = {
|
|
182
|
+
"Workspace Name": "string",
|
|
183
|
+
"Workspace Id": "string",
|
|
184
|
+
"Type": "string",
|
|
185
|
+
"State": "string",
|
|
186
|
+
"Is Read Only": "bool",
|
|
187
|
+
"Is On Dedicated Capacity": "bool",
|
|
188
|
+
"Capacity Migration Status": "string",
|
|
189
|
+
"Has Workspace Level Settings": "bool",
|
|
190
|
+
"Users": "list",
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
df = _create_dataframe(columns=columns)
|
|
194
|
+
|
|
195
|
+
url = (
|
|
196
|
+
"/v1.0/myorg/admin/groups?"
|
|
197
|
+
"$expand=users&"
|
|
198
|
+
"$filter=(not users/any()) or "
|
|
199
|
+
"(not users/any(u: u/groupUserAccessRight eq Microsoft.PowerBI.ServiceContracts.Api.GroupUserAccessRight'Admin'))&"
|
|
200
|
+
f"$top={top}"
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
response = _base_api(request=url, client="fabric_sp")
|
|
204
|
+
values = response.json().get("value", [])
|
|
205
|
+
df_raw = pd.json_normalize(values)
|
|
206
|
+
|
|
207
|
+
# friendly names and reorder
|
|
208
|
+
if not df_raw.empty:
|
|
209
|
+
df_raw = df_raw.rename(
|
|
210
|
+
columns={
|
|
211
|
+
"name": "Workspace Name",
|
|
212
|
+
"id": "Workspace Id",
|
|
213
|
+
"type": "Type",
|
|
214
|
+
"state": "State",
|
|
215
|
+
"isReadOnly": "Is Read Only",
|
|
216
|
+
"isOnDedicatedCapacity": "Is On Dedicated Capacity",
|
|
217
|
+
"capacityMigrationStatus": "Capacity Migration Status",
|
|
218
|
+
"hasWorkspaceLevelSettings ": "Has Workspace Level Settings", # Note the space in original
|
|
219
|
+
"users": "Users",
|
|
220
|
+
}
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
df = df_raw[list(columns.keys())].copy()
|
|
224
|
+
|
|
225
|
+
# Convert empty lists to a more readable format for Users column
|
|
226
|
+
if "Users" in df.columns:
|
|
227
|
+
df["Users"] = df["Users"].apply(
|
|
228
|
+
lambda x: x if (x is not None and len(x) > 0) else []
|
|
229
|
+
)
|
|
230
|
+
else:
|
|
231
|
+
df = _create_dataframe(columns=columns)
|
|
232
|
+
|
|
233
|
+
# proper data types
|
|
234
|
+
_update_dataframe_datatypes(dataframe=df, column_map=columns)
|
|
235
|
+
|
|
236
|
+
return df
|
|
@@ -13,6 +13,7 @@ from sempy_labs._helper_functions import (
|
|
|
13
13
|
)
|
|
14
14
|
|
|
15
15
|
|
|
16
|
+
@log
|
|
16
17
|
def check_fallback_reason(
|
|
17
18
|
dataset: str | UUID, workspace: Optional[str | UUID] = None
|
|
18
19
|
) -> pd.DataFrame:
|
|
@@ -195,6 +196,7 @@ def generate_direct_lake_semantic_model(
|
|
|
195
196
|
refresh_semantic_model(dataset=dataset, workspace=workspace_id)
|
|
196
197
|
|
|
197
198
|
|
|
199
|
+
@log
|
|
198
200
|
def get_direct_lake_source(
|
|
199
201
|
dataset: str | UUID, workspace: Optional[str | UUID] = None
|
|
200
202
|
) -> Tuple[str, str, UUID, UUID]:
|
|
@@ -8,8 +8,10 @@ from sempy_labs._helper_functions import (
|
|
|
8
8
|
from typing import Optional
|
|
9
9
|
import sempy_labs._icons as icons
|
|
10
10
|
from uuid import UUID
|
|
11
|
+
from sempy._utils._log import log
|
|
11
12
|
|
|
12
13
|
|
|
14
|
+
@log
|
|
13
15
|
def generate_shared_expression(
|
|
14
16
|
item_name: Optional[str] = None,
|
|
15
17
|
item_type: str = "Lakehouse",
|
|
@@ -1,16 +1,14 @@
|
|
|
1
1
|
import sempy.fabric as fabric
|
|
2
2
|
from sempy_labs._helper_functions import (
|
|
3
3
|
resolve_lakehouse_id,
|
|
4
|
-
resolve_lakehouse_name,
|
|
5
|
-
get_direct_lake_sql_endpoint,
|
|
6
|
-
resolve_workspace_name_and_id,
|
|
7
|
-
resolve_dataset_name_and_id,
|
|
8
4
|
)
|
|
9
5
|
from typing import Optional, Tuple
|
|
10
6
|
from uuid import UUID
|
|
11
7
|
import sempy_labs._icons as icons
|
|
8
|
+
from sempy._utils._log import log
|
|
12
9
|
|
|
13
10
|
|
|
11
|
+
@log
|
|
14
12
|
def get_direct_lake_lakehouse(
|
|
15
13
|
dataset: str | UUID,
|
|
16
14
|
workspace: Optional[str | UUID] = None,
|
|
@@ -6,8 +6,10 @@ from uuid import UUID
|
|
|
6
6
|
from sempy_labs._helper_functions import (
|
|
7
7
|
resolve_workspace_name_and_id,
|
|
8
8
|
)
|
|
9
|
+
from sempy._utils._log import log
|
|
9
10
|
|
|
10
11
|
|
|
12
|
+
@log
|
|
11
13
|
def get_direct_lake_guardrails() -> pd.DataFrame:
|
|
12
14
|
"""
|
|
13
15
|
Shows the guardrails for when Direct Lake semantic models will fallback to Direct Query
|
|
@@ -13,6 +13,7 @@ from uuid import UUID
|
|
|
13
13
|
import re
|
|
14
14
|
|
|
15
15
|
|
|
16
|
+
@log
|
|
16
17
|
def _extract_expression_list(expression):
|
|
17
18
|
"""
|
|
18
19
|
Finds the pattern for DL/SQL & DL/OL expressions in the semantic model.
|
|
@@ -37,6 +38,7 @@ def _extract_expression_list(expression):
|
|
|
37
38
|
return result
|
|
38
39
|
|
|
39
40
|
|
|
41
|
+
@log
|
|
40
42
|
def _get_direct_lake_expressions(
|
|
41
43
|
dataset: str | UUID, workspace: Optional[str | UUID] = None
|
|
42
44
|
) -> dict:
|
|
@@ -111,9 +113,9 @@ def update_direct_lake_model_connection(
|
|
|
111
113
|
|
|
112
114
|
Parameters
|
|
113
115
|
----------
|
|
114
|
-
dataset : str | UUID
|
|
116
|
+
dataset : str | uuid.UUID
|
|
115
117
|
Name or ID of the semantic model.
|
|
116
|
-
workspace : str | UUID, default=None
|
|
118
|
+
workspace : str | uuid.UUID, default=None
|
|
117
119
|
The Fabric workspace name or ID in which the semantic model exists.
|
|
118
120
|
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
119
121
|
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
@@ -122,7 +124,7 @@ def update_direct_lake_model_connection(
|
|
|
122
124
|
Defaults to None which resolves to the lakehouse attached to the notebook.
|
|
123
125
|
source_type : str, default="Lakehouse"
|
|
124
126
|
The type of source for the Direct Lake semantic model. Valid options: "Lakehouse", "Warehouse".
|
|
125
|
-
source_workspace : str | UUID, default=None
|
|
127
|
+
source_workspace : str | uuid.UUID, default=None
|
|
126
128
|
The Fabric workspace name or ID used by the lakehouse/warehouse.
|
|
127
129
|
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
128
130
|
or if no lakehouse attached, resolves to the workspace of the notebook.
|
sempy_labs/graph/_groups.py
CHANGED
|
@@ -11,6 +11,7 @@ import sempy_labs._icons as icons
|
|
|
11
11
|
from typing import List, Literal
|
|
12
12
|
|
|
13
13
|
|
|
14
|
+
@log
|
|
14
15
|
def resolve_group_id(group: str | UUID) -> UUID:
|
|
15
16
|
"""
|
|
16
17
|
Resolves the group ID from the group name or ID.
|
|
@@ -74,6 +75,7 @@ def list_groups() -> pd.DataFrame:
|
|
|
74
75
|
|
|
75
76
|
df = _create_dataframe(columns=columns)
|
|
76
77
|
|
|
78
|
+
dfs = []
|
|
77
79
|
for v in result.get("value"):
|
|
78
80
|
new_data = {
|
|
79
81
|
"Group Id": v.get("id"),
|
|
@@ -90,14 +92,16 @@ def list_groups() -> pd.DataFrame:
|
|
|
90
92
|
"Visibility": v.get("visibility"),
|
|
91
93
|
"Security Identifier": v.get("securityIdentifier"),
|
|
92
94
|
}
|
|
95
|
+
dfs.append(pd.DataFrame(new_data, index=[0]))
|
|
93
96
|
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
+
if dfs:
|
|
98
|
+
df = pd.concat(dfs, ignore_index=True)
|
|
99
|
+
_update_dataframe_datatypes(dataframe=df, column_map=columns)
|
|
97
100
|
|
|
98
101
|
return df
|
|
99
102
|
|
|
100
103
|
|
|
104
|
+
@log
|
|
101
105
|
def _get_group(group_id: UUID) -> pd.DataFrame:
|
|
102
106
|
"""
|
|
103
107
|
Shows a list of groups and their properties.
|
|
@@ -136,6 +140,7 @@ def _get_group(group_id: UUID) -> pd.DataFrame:
|
|
|
136
140
|
}
|
|
137
141
|
df = _create_dataframe(columns=columns)
|
|
138
142
|
|
|
143
|
+
dfs = []
|
|
139
144
|
for v in result.get("value"):
|
|
140
145
|
new_data = {
|
|
141
146
|
"Group Id": v.get("id"),
|
|
@@ -153,9 +158,11 @@ def _get_group(group_id: UUID) -> pd.DataFrame:
|
|
|
153
158
|
"Security Identifier": v.get("securityIdentifier"),
|
|
154
159
|
}
|
|
155
160
|
|
|
156
|
-
|
|
161
|
+
dfs.append(pd.DataFrame(new_data, index=[0]))
|
|
157
162
|
|
|
158
|
-
|
|
163
|
+
if dfs:
|
|
164
|
+
df = pd.concat(dfs, ignore_index=True)
|
|
165
|
+
_update_dataframe_datatypes(dataframe=df, column_map=columns)
|
|
159
166
|
|
|
160
167
|
return df
|
|
161
168
|
|
|
@@ -200,6 +207,7 @@ def list_group_members(group: str | UUID) -> pd.DataFrame:
|
|
|
200
207
|
|
|
201
208
|
df = _create_dataframe(columns=columns)
|
|
202
209
|
|
|
210
|
+
dfs = []
|
|
203
211
|
for v in result.get("value"):
|
|
204
212
|
new_data = {
|
|
205
213
|
"Member Id": v.get("id"),
|
|
@@ -214,8 +222,10 @@ def list_group_members(group: str | UUID) -> pd.DataFrame:
|
|
|
214
222
|
"Given Name": v.get("givenName"),
|
|
215
223
|
"Surname": v.get("surname"),
|
|
216
224
|
}
|
|
225
|
+
dfs.append(pd.DataFrame(new_data, index=[0]))
|
|
217
226
|
|
|
218
|
-
|
|
227
|
+
if dfs:
|
|
228
|
+
df = pd.concat(dfs, ignore_index=True)
|
|
219
229
|
|
|
220
230
|
return df
|
|
221
231
|
|
|
@@ -260,6 +270,7 @@ def list_group_owners(group: str | UUID) -> pd.DataFrame:
|
|
|
260
270
|
|
|
261
271
|
df = _create_dataframe(columns=columns)
|
|
262
272
|
|
|
273
|
+
dfs = []
|
|
263
274
|
for v in result.get("value"):
|
|
264
275
|
new_data = {
|
|
265
276
|
"Owner Id": v.get("id"),
|
|
@@ -274,12 +285,15 @@ def list_group_owners(group: str | UUID) -> pd.DataFrame:
|
|
|
274
285
|
"Given Name": v.get("givenName"),
|
|
275
286
|
"Surname": v.get("surname"),
|
|
276
287
|
}
|
|
288
|
+
dfs.append(pd.DataFrame(new_data, index=[0]))
|
|
277
289
|
|
|
278
|
-
|
|
290
|
+
if dfs:
|
|
291
|
+
df = pd.concat(dfs, ignore_index=True)
|
|
279
292
|
|
|
280
293
|
return df
|
|
281
294
|
|
|
282
295
|
|
|
296
|
+
@log
|
|
283
297
|
def _base_add_to_group(
|
|
284
298
|
group: str | UUID,
|
|
285
299
|
object: str | UUID,
|
|
@@ -359,6 +373,7 @@ def add_group_members(
|
|
|
359
373
|
_base_add_to_group(group=group, object=user, object_type="members")
|
|
360
374
|
|
|
361
375
|
|
|
376
|
+
@log
|
|
362
377
|
def add_group_owners(
|
|
363
378
|
group: str | UUID,
|
|
364
379
|
user: str | UUID | List[str | UUID],
|
sempy_labs/graph/_teams.py
CHANGED
|
@@ -42,6 +42,7 @@ def list_teams() -> pd.DataFrame:
|
|
|
42
42
|
|
|
43
43
|
df = _create_dataframe(columns=columns)
|
|
44
44
|
|
|
45
|
+
dfs = []
|
|
45
46
|
for v in result.get("value"):
|
|
46
47
|
new_data = {
|
|
47
48
|
"Team Id": v.get("id"),
|
|
@@ -58,13 +59,16 @@ def list_teams() -> pd.DataFrame:
|
|
|
58
59
|
"Member Count": v.get("memberCount"),
|
|
59
60
|
}
|
|
60
61
|
|
|
61
|
-
|
|
62
|
+
dfs.append(pd.DataFrame(new_data, index=[0]))
|
|
62
63
|
|
|
63
|
-
|
|
64
|
+
if dfs:
|
|
65
|
+
df = pd.concat(dfs, ignore_index=True)
|
|
66
|
+
_update_dataframe_datatypes(dataframe=df, column_map=columns)
|
|
64
67
|
|
|
65
68
|
return df
|
|
66
69
|
|
|
67
70
|
|
|
71
|
+
@log
|
|
68
72
|
def list_chats(user: str | UUID) -> pd.DataFrame:
|
|
69
73
|
"""
|
|
70
74
|
In progress...
|
|
@@ -95,6 +99,7 @@ def list_chats(user: str | UUID) -> pd.DataFrame:
|
|
|
95
99
|
return df
|
|
96
100
|
|
|
97
101
|
|
|
102
|
+
@log
|
|
98
103
|
def send_teams_message(chat_id: str, message: str):
|
|
99
104
|
"""
|
|
100
105
|
In progress...
|
sempy_labs/graph/_users.py
CHANGED
sempy_labs/lakehouse/_blobs.py
CHANGED
|
@@ -40,6 +40,8 @@ def get_lakehouse_tables(
|
|
|
40
40
|
This function can be executed in either a PySpark or pure Python notebook.
|
|
41
41
|
|
|
42
42
|
This is a wrapper function for the following API: `Tables - List Tables <https://learn.microsoft.com/rest/api/fabric/lakehouse/tables/list-tables>`_ plus extended capabilities.
|
|
43
|
+
However, the above mentioned API does not support Lakehouse schemas (Preview) until it is in GA (General Availability). This version also supports schema
|
|
44
|
+
enabled Lakehouses.
|
|
43
45
|
|
|
44
46
|
Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
|
|
45
47
|
|
|
@@ -68,6 +70,7 @@ def get_lakehouse_tables(
|
|
|
68
70
|
columns = {
|
|
69
71
|
"Workspace Name": "string",
|
|
70
72
|
"Lakehouse Name": "string",
|
|
73
|
+
"Schema Name": "string",
|
|
71
74
|
"Table Name": "string",
|
|
72
75
|
"Format": "string",
|
|
73
76
|
"Type": "string",
|
|
@@ -83,27 +86,57 @@ def get_lakehouse_tables(
|
|
|
83
86
|
if count_rows: # Setting countrows defaults to extended=True
|
|
84
87
|
extended = True
|
|
85
88
|
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
89
|
+
API_called = True
|
|
90
|
+
try:
|
|
91
|
+
responses = _base_api(
|
|
92
|
+
request=f"v1/workspaces/{workspace_id}/lakehouses/{lakehouse_id}/tables",
|
|
93
|
+
uses_pagination=True,
|
|
94
|
+
client="fabric_sp",
|
|
95
|
+
)
|
|
91
96
|
|
|
92
|
-
|
|
93
|
-
|
|
97
|
+
except Exception as e:
|
|
98
|
+
API_called = False
|
|
94
99
|
|
|
95
100
|
dfs = []
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
101
|
+
local_path = None
|
|
102
|
+
if API_called:
|
|
103
|
+
if not responses[0].get("data"):
|
|
104
|
+
return df
|
|
105
|
+
|
|
106
|
+
for r in responses:
|
|
107
|
+
for i in r.get("data", []):
|
|
108
|
+
new_data = {
|
|
109
|
+
"Workspace Name": workspace_name,
|
|
110
|
+
"Lakehouse Name": lakehouse_name,
|
|
111
|
+
"Schema Name": "",
|
|
112
|
+
"Table Name": i.get("name"),
|
|
113
|
+
"Format": i.get("format"),
|
|
114
|
+
"Type": i.get("type"),
|
|
115
|
+
"Location": i.get("location"),
|
|
116
|
+
}
|
|
117
|
+
dfs.append(pd.DataFrame(new_data, index=[0]))
|
|
118
|
+
else:
|
|
119
|
+
local_path = _mount(lakehouse=lakehouse_id, workspace=workspace_id)
|
|
120
|
+
tables_path = os.path.join(local_path, "Tables")
|
|
121
|
+
list_schema = os.listdir(tables_path)
|
|
122
|
+
|
|
123
|
+
for schema_name in list_schema:
|
|
124
|
+
schema_table_path = os.path.join(local_path, "Tables", schema_name)
|
|
125
|
+
list_tables = os.listdir(schema_table_path)
|
|
126
|
+
for table_name in list_tables:
|
|
127
|
+
location_path = create_abfss_path(
|
|
128
|
+
lakehouse_id, workspace_id, table_name, schema_name
|
|
129
|
+
)
|
|
130
|
+
new_data = {
|
|
131
|
+
"Workspace Name": workspace_name,
|
|
132
|
+
"Lakehouse Name": lakehouse_name,
|
|
133
|
+
"Schema Name": schema_name,
|
|
134
|
+
"Table Name": table_name,
|
|
135
|
+
"Format": "delta",
|
|
136
|
+
"Type": "Managed",
|
|
137
|
+
"Location": location_path,
|
|
138
|
+
}
|
|
139
|
+
dfs.append(pd.DataFrame(new_data, index=[0]))
|
|
107
140
|
|
|
108
141
|
if dfs:
|
|
109
142
|
df = pd.concat(dfs, ignore_index=True)
|
|
@@ -111,17 +144,25 @@ def get_lakehouse_tables(
|
|
|
111
144
|
if extended:
|
|
112
145
|
sku_value = get_sku_size(workspace_id)
|
|
113
146
|
guardrail = get_directlake_guardrails_for_sku(sku_value)
|
|
114
|
-
|
|
147
|
+
# Avoid mounting the lakehouse if is already mounted
|
|
148
|
+
if not local_path:
|
|
149
|
+
local_path = _mount(lakehouse=lakehouse_id, workspace=workspace_id)
|
|
115
150
|
|
|
116
151
|
df["Files"], df["Row Groups"], df["Table Size"] = None, None, None
|
|
117
152
|
if count_rows:
|
|
118
153
|
df["Row Count"] = None
|
|
119
154
|
|
|
120
155
|
for i, r in df.iterrows():
|
|
156
|
+
use_schema = True
|
|
157
|
+
schema_name = r["Schema Name"]
|
|
121
158
|
table_name = r["Table Name"]
|
|
122
159
|
if r["Type"] == "Managed" and r["Format"] == "delta":
|
|
123
|
-
delta_table_path =
|
|
124
|
-
|
|
160
|
+
delta_table_path = (
|
|
161
|
+
create_abfss_path(
|
|
162
|
+
lakehouse_id, workspace_id, table_name, schema_name
|
|
163
|
+
)
|
|
164
|
+
.replace("//", "/") # When schema_name = ""
|
|
165
|
+
.replace("abfss:/", "abfss://") # Put back the // after abfss:
|
|
125
166
|
)
|
|
126
167
|
|
|
127
168
|
if _pure_python_notebook():
|
|
@@ -135,29 +176,46 @@ def get_lakehouse_tables(
|
|
|
135
176
|
size_in_bytes = 0
|
|
136
177
|
for f in latest_files:
|
|
137
178
|
local_file_path = os.path.join(
|
|
138
|
-
local_path, "Tables", table_name,
|
|
179
|
+
local_path, "Tables", schema_name, table_name, f
|
|
139
180
|
)
|
|
181
|
+
|
|
140
182
|
if os.path.exists(local_file_path):
|
|
141
183
|
size_in_bytes += os.path.getsize(local_file_path)
|
|
142
184
|
num_latest_files = len(latest_files)
|
|
143
185
|
else:
|
|
144
186
|
delta_table = _get_delta_table(delta_table_path)
|
|
187
|
+
|
|
145
188
|
latest_files = _read_delta_table(delta_table_path).inputFiles()
|
|
146
189
|
table_df = delta_table.toDF()
|
|
147
190
|
table_details = delta_table.detail().collect()[0].asDict()
|
|
148
|
-
num_latest_files = table_details.get("numFiles", 0)
|
|
149
191
|
size_in_bytes = table_details.get("sizeInBytes", 0)
|
|
192
|
+
num_latest_files = table_details.get("numFiles", 0)
|
|
193
|
+
|
|
194
|
+
table_path = os.path.join(local_path, "Tables", schema_name, table_name)
|
|
150
195
|
|
|
151
|
-
|
|
152
|
-
|
|
196
|
+
file_paths = []
|
|
197
|
+
for file in latest_files:
|
|
198
|
+
if _pure_python_notebook():
|
|
199
|
+
file_paths.append(file)
|
|
200
|
+
else:
|
|
201
|
+
# Append the <Partition folder>/<filename> or <filename>
|
|
202
|
+
find_table = file.find(table_name)
|
|
203
|
+
len_file = len(file)
|
|
204
|
+
len_table = len(table_name)
|
|
205
|
+
last_chars = len_file - (find_table + len_table + 1)
|
|
206
|
+
file_paths.append(file[-last_chars:])
|
|
153
207
|
|
|
154
208
|
num_rowgroups = 0
|
|
155
209
|
for filename in file_paths:
|
|
156
|
-
|
|
157
|
-
|
|
210
|
+
parquet_file_path = f"{table_path}/{filename}"
|
|
211
|
+
if os.path.exists(parquet_file_path):
|
|
212
|
+
parquet_file = pq.ParquetFile(parquet_file_path)
|
|
213
|
+
num_rowgroups += parquet_file.num_row_groups
|
|
214
|
+
|
|
158
215
|
df.at[i, "Files"] = num_latest_files
|
|
159
216
|
df.at[i, "Row Groups"] = num_rowgroups
|
|
160
217
|
df.at[i, "Table Size"] = size_in_bytes
|
|
218
|
+
|
|
161
219
|
if count_rows:
|
|
162
220
|
if _pure_python_notebook():
|
|
163
221
|
row_count = delta_table.to_pyarrow_table().num_rows
|
|
@@ -165,6 +223,9 @@ def get_lakehouse_tables(
|
|
|
165
223
|
row_count = table_df.count()
|
|
166
224
|
df.at[i, "Row Count"] = row_count
|
|
167
225
|
|
|
226
|
+
# Set "Schema Name" = "dbo" when it is ""
|
|
227
|
+
df.loc[df["Schema Name"] == "", "Schema Name"] = "dbo"
|
|
228
|
+
|
|
168
229
|
if extended:
|
|
169
230
|
intColumns = ["Files", "Row Groups", "Table Size"]
|
|
170
231
|
df[intColumns] = df[intColumns].astype(int)
|
sempy_labs/lakehouse/_helper.py
CHANGED
|
@@ -54,6 +54,7 @@ def is_v_ordered(
|
|
|
54
54
|
return any(b"vorder" in key for key in ds_schema.keys())
|
|
55
55
|
|
|
56
56
|
|
|
57
|
+
@log
|
|
57
58
|
def delete_lakehouse(
|
|
58
59
|
lakehouse: str | UUID, workspace: Optional[str | UUID] = None
|
|
59
60
|
) -> None:
|
|
@@ -77,6 +78,7 @@ def delete_lakehouse(
|
|
|
77
78
|
delete_item(item=lakehouse, item_type="lakehouse", workspace=workspace)
|
|
78
79
|
|
|
79
80
|
|
|
81
|
+
@log
|
|
80
82
|
def update_lakehouse(
|
|
81
83
|
name: Optional[str] = None,
|
|
82
84
|
description: Optional[str] = None,
|