semantic-link-labs 0.10.1__py3-none-any.whl → 0.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of semantic-link-labs might be problematic. Click here for more details.
- {semantic_link_labs-0.10.1.dist-info → semantic_link_labs-0.11.0.dist-info}/METADATA +6 -5
- {semantic_link_labs-0.10.1.dist-info → semantic_link_labs-0.11.0.dist-info}/RECORD +94 -92
- sempy_labs/__init__.py +4 -0
- sempy_labs/_a_lib_info.py +1 -1
- sempy_labs/_capacities.py +2 -0
- sempy_labs/_connections.py +11 -0
- sempy_labs/_dashboards.py +9 -4
- sempy_labs/_data_pipelines.py +5 -0
- sempy_labs/_dataflows.py +284 -17
- sempy_labs/_daxformatter.py +2 -0
- sempy_labs/_delta_analyzer_history.py +4 -1
- sempy_labs/_deployment_pipelines.py +4 -0
- sempy_labs/_documentation.py +3 -0
- sempy_labs/_environments.py +10 -1
- sempy_labs/_eventhouses.py +12 -5
- sempy_labs/_eventstreams.py +11 -3
- sempy_labs/_external_data_shares.py +8 -2
- sempy_labs/_gateways.py +26 -5
- sempy_labs/_git.py +11 -0
- sempy_labs/_graphQL.py +10 -3
- sempy_labs/_helper_functions.py +62 -10
- sempy_labs/_job_scheduler.py +54 -7
- sempy_labs/_kql_databases.py +11 -2
- sempy_labs/_kql_querysets.py +11 -3
- sempy_labs/_list_functions.py +17 -2
- sempy_labs/_managed_private_endpoints.py +11 -2
- sempy_labs/_mirrored_databases.py +17 -3
- sempy_labs/_mirrored_warehouses.py +9 -3
- sempy_labs/_ml_experiments.py +11 -3
- sempy_labs/_ml_models.py +11 -3
- sempy_labs/_model_bpa_rules.py +2 -0
- sempy_labs/_mounted_data_factories.py +12 -8
- sempy_labs/_notebooks.py +3 -0
- sempy_labs/_refresh_semantic_model.py +1 -0
- sempy_labs/_semantic_models.py +6 -0
- sempy_labs/_spark.py +7 -0
- sempy_labs/_sql_endpoints.py +54 -31
- sempy_labs/_sqldatabase.py +13 -4
- sempy_labs/_tags.py +5 -1
- sempy_labs/_user_delegation_key.py +2 -0
- sempy_labs/_variable_libraries.py +3 -1
- sempy_labs/_warehouses.py +13 -3
- sempy_labs/_workloads.py +3 -0
- sempy_labs/_workspace_identity.py +3 -0
- sempy_labs/_workspaces.py +14 -1
- sempy_labs/admin/__init__.py +2 -0
- sempy_labs/admin/_activities.py +6 -5
- sempy_labs/admin/_apps.py +31 -31
- sempy_labs/admin/_artifacts.py +8 -3
- sempy_labs/admin/_basic_functions.py +5 -0
- sempy_labs/admin/_capacities.py +39 -28
- sempy_labs/admin/_datasets.py +51 -51
- sempy_labs/admin/_domains.py +17 -1
- sempy_labs/admin/_external_data_share.py +8 -2
- sempy_labs/admin/_git.py +14 -9
- sempy_labs/admin/_items.py +15 -2
- sempy_labs/admin/_reports.py +64 -65
- sempy_labs/admin/_shared.py +7 -1
- sempy_labs/admin/_tags.py +5 -0
- sempy_labs/admin/_tenant.py +5 -2
- sempy_labs/admin/_users.py +9 -3
- sempy_labs/admin/_workspaces.py +88 -0
- sempy_labs/directlake/_dl_helper.py +2 -0
- sempy_labs/directlake/_generate_shared_expression.py +2 -0
- sempy_labs/directlake/_get_directlake_lakehouse.py +2 -4
- sempy_labs/directlake/_get_shared_expression.py +2 -0
- sempy_labs/directlake/_guardrails.py +2 -0
- sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py +2 -0
- sempy_labs/directlake/_warm_cache.py +1 -0
- sempy_labs/graph/_groups.py +22 -7
- sempy_labs/graph/_teams.py +7 -2
- sempy_labs/graph/_users.py +1 -0
- sempy_labs/lakehouse/_blobs.py +1 -0
- sempy_labs/lakehouse/_get_lakehouse_tables.py +88 -27
- sempy_labs/lakehouse/_helper.py +2 -0
- sempy_labs/lakehouse/_lakehouse.py +38 -5
- sempy_labs/lakehouse/_livy_sessions.py +2 -1
- sempy_labs/lakehouse/_shortcuts.py +7 -1
- sempy_labs/migration/_direct_lake_to_import.py +2 -0
- sempy_labs/mirrored_azure_databricks_catalog/_discover.py +4 -0
- sempy_labs/mirrored_azure_databricks_catalog/_refresh_catalog_metadata.py +2 -0
- sempy_labs/report/_download_report.py +2 -1
- sempy_labs/report/_generate_report.py +2 -0
- sempy_labs/report/_paginated.py +2 -0
- sempy_labs/report/_report_bpa.py +110 -122
- sempy_labs/report/_report_bpa_rules.py +2 -0
- sempy_labs/report/_report_functions.py +7 -0
- sempy_labs/report/_reportwrapper.py +64 -31
- sempy_labs/theme/__init__.py +12 -0
- sempy_labs/theme/_org_themes.py +96 -0
- sempy_labs/tom/_model.py +509 -34
- {semantic_link_labs-0.10.1.dist-info → semantic_link_labs-0.11.0.dist-info}/WHEEL +0 -0
- {semantic_link_labs-0.10.1.dist-info → semantic_link_labs-0.11.0.dist-info}/licenses/LICENSE +0 -0
- {semantic_link_labs-0.10.1.dist-info → semantic_link_labs-0.11.0.dist-info}/top_level.txt +0 -0
sempy_labs/graph/_groups.py
CHANGED
|
@@ -11,6 +11,7 @@ import sempy_labs._icons as icons
|
|
|
11
11
|
from typing import List, Literal
|
|
12
12
|
|
|
13
13
|
|
|
14
|
+
@log
|
|
14
15
|
def resolve_group_id(group: str | UUID) -> UUID:
|
|
15
16
|
"""
|
|
16
17
|
Resolves the group ID from the group name or ID.
|
|
@@ -74,6 +75,7 @@ def list_groups() -> pd.DataFrame:
|
|
|
74
75
|
|
|
75
76
|
df = _create_dataframe(columns=columns)
|
|
76
77
|
|
|
78
|
+
dfs = []
|
|
77
79
|
for v in result.get("value"):
|
|
78
80
|
new_data = {
|
|
79
81
|
"Group Id": v.get("id"),
|
|
@@ -90,14 +92,16 @@ def list_groups() -> pd.DataFrame:
|
|
|
90
92
|
"Visibility": v.get("visibility"),
|
|
91
93
|
"Security Identifier": v.get("securityIdentifier"),
|
|
92
94
|
}
|
|
95
|
+
dfs.append(pd.DataFrame(new_data, index=[0]))
|
|
93
96
|
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
+
if dfs:
|
|
98
|
+
df = pd.concat(dfs, ignore_index=True)
|
|
99
|
+
_update_dataframe_datatypes(dataframe=df, column_map=columns)
|
|
97
100
|
|
|
98
101
|
return df
|
|
99
102
|
|
|
100
103
|
|
|
104
|
+
@log
|
|
101
105
|
def _get_group(group_id: UUID) -> pd.DataFrame:
|
|
102
106
|
"""
|
|
103
107
|
Shows a list of groups and their properties.
|
|
@@ -136,6 +140,7 @@ def _get_group(group_id: UUID) -> pd.DataFrame:
|
|
|
136
140
|
}
|
|
137
141
|
df = _create_dataframe(columns=columns)
|
|
138
142
|
|
|
143
|
+
dfs = []
|
|
139
144
|
for v in result.get("value"):
|
|
140
145
|
new_data = {
|
|
141
146
|
"Group Id": v.get("id"),
|
|
@@ -153,9 +158,11 @@ def _get_group(group_id: UUID) -> pd.DataFrame:
|
|
|
153
158
|
"Security Identifier": v.get("securityIdentifier"),
|
|
154
159
|
}
|
|
155
160
|
|
|
156
|
-
|
|
161
|
+
dfs.append(pd.DataFrame(new_data, index=[0]))
|
|
157
162
|
|
|
158
|
-
|
|
163
|
+
if dfs:
|
|
164
|
+
df = pd.concat(dfs, ignore_index=True)
|
|
165
|
+
_update_dataframe_datatypes(dataframe=df, column_map=columns)
|
|
159
166
|
|
|
160
167
|
return df
|
|
161
168
|
|
|
@@ -200,6 +207,7 @@ def list_group_members(group: str | UUID) -> pd.DataFrame:
|
|
|
200
207
|
|
|
201
208
|
df = _create_dataframe(columns=columns)
|
|
202
209
|
|
|
210
|
+
dfs = []
|
|
203
211
|
for v in result.get("value"):
|
|
204
212
|
new_data = {
|
|
205
213
|
"Member Id": v.get("id"),
|
|
@@ -214,8 +222,10 @@ def list_group_members(group: str | UUID) -> pd.DataFrame:
|
|
|
214
222
|
"Given Name": v.get("givenName"),
|
|
215
223
|
"Surname": v.get("surname"),
|
|
216
224
|
}
|
|
225
|
+
dfs.append(pd.DataFrame(new_data, index=[0]))
|
|
217
226
|
|
|
218
|
-
|
|
227
|
+
if dfs:
|
|
228
|
+
df = pd.concat(dfs, ignore_index=True)
|
|
219
229
|
|
|
220
230
|
return df
|
|
221
231
|
|
|
@@ -260,6 +270,7 @@ def list_group_owners(group: str | UUID) -> pd.DataFrame:
|
|
|
260
270
|
|
|
261
271
|
df = _create_dataframe(columns=columns)
|
|
262
272
|
|
|
273
|
+
dfs = []
|
|
263
274
|
for v in result.get("value"):
|
|
264
275
|
new_data = {
|
|
265
276
|
"Owner Id": v.get("id"),
|
|
@@ -274,12 +285,15 @@ def list_group_owners(group: str | UUID) -> pd.DataFrame:
|
|
|
274
285
|
"Given Name": v.get("givenName"),
|
|
275
286
|
"Surname": v.get("surname"),
|
|
276
287
|
}
|
|
288
|
+
dfs.append(pd.DataFrame(new_data, index=[0]))
|
|
277
289
|
|
|
278
|
-
|
|
290
|
+
if dfs:
|
|
291
|
+
df = pd.concat(dfs, ignore_index=True)
|
|
279
292
|
|
|
280
293
|
return df
|
|
281
294
|
|
|
282
295
|
|
|
296
|
+
@log
|
|
283
297
|
def _base_add_to_group(
|
|
284
298
|
group: str | UUID,
|
|
285
299
|
object: str | UUID,
|
|
@@ -359,6 +373,7 @@ def add_group_members(
|
|
|
359
373
|
_base_add_to_group(group=group, object=user, object_type="members")
|
|
360
374
|
|
|
361
375
|
|
|
376
|
+
@log
|
|
362
377
|
def add_group_owners(
|
|
363
378
|
group: str | UUID,
|
|
364
379
|
user: str | UUID | List[str | UUID],
|
sempy_labs/graph/_teams.py
CHANGED
|
@@ -42,6 +42,7 @@ def list_teams() -> pd.DataFrame:
|
|
|
42
42
|
|
|
43
43
|
df = _create_dataframe(columns=columns)
|
|
44
44
|
|
|
45
|
+
dfs = []
|
|
45
46
|
for v in result.get("value"):
|
|
46
47
|
new_data = {
|
|
47
48
|
"Team Id": v.get("id"),
|
|
@@ -58,13 +59,16 @@ def list_teams() -> pd.DataFrame:
|
|
|
58
59
|
"Member Count": v.get("memberCount"),
|
|
59
60
|
}
|
|
60
61
|
|
|
61
|
-
|
|
62
|
+
dfs.append(pd.DataFrame(new_data, index=[0]))
|
|
62
63
|
|
|
63
|
-
|
|
64
|
+
if dfs:
|
|
65
|
+
df = pd.concat(dfs, ignore_index=True)
|
|
66
|
+
_update_dataframe_datatypes(dataframe=df, column_map=columns)
|
|
64
67
|
|
|
65
68
|
return df
|
|
66
69
|
|
|
67
70
|
|
|
71
|
+
@log
|
|
68
72
|
def list_chats(user: str | UUID) -> pd.DataFrame:
|
|
69
73
|
"""
|
|
70
74
|
In progress...
|
|
@@ -95,6 +99,7 @@ def list_chats(user: str | UUID) -> pd.DataFrame:
|
|
|
95
99
|
return df
|
|
96
100
|
|
|
97
101
|
|
|
102
|
+
@log
|
|
98
103
|
def send_teams_message(chat_id: str, message: str):
|
|
99
104
|
"""
|
|
100
105
|
In progress...
|
sempy_labs/graph/_users.py
CHANGED
sempy_labs/lakehouse/_blobs.py
CHANGED
|
@@ -40,6 +40,8 @@ def get_lakehouse_tables(
|
|
|
40
40
|
This function can be executed in either a PySpark or pure Python notebook.
|
|
41
41
|
|
|
42
42
|
This is a wrapper function for the following API: `Tables - List Tables <https://learn.microsoft.com/rest/api/fabric/lakehouse/tables/list-tables>`_ plus extended capabilities.
|
|
43
|
+
However, the above mentioned API does not support Lakehouse schemas (Preview) until it is in GA (General Availability). This version also supports schema
|
|
44
|
+
enabled Lakehouses.
|
|
43
45
|
|
|
44
46
|
Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
|
|
45
47
|
|
|
@@ -68,6 +70,7 @@ def get_lakehouse_tables(
|
|
|
68
70
|
columns = {
|
|
69
71
|
"Workspace Name": "string",
|
|
70
72
|
"Lakehouse Name": "string",
|
|
73
|
+
"Schema Name": "string",
|
|
71
74
|
"Table Name": "string",
|
|
72
75
|
"Format": "string",
|
|
73
76
|
"Type": "string",
|
|
@@ -83,27 +86,57 @@ def get_lakehouse_tables(
|
|
|
83
86
|
if count_rows: # Setting countrows defaults to extended=True
|
|
84
87
|
extended = True
|
|
85
88
|
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
89
|
+
API_called = True
|
|
90
|
+
try:
|
|
91
|
+
responses = _base_api(
|
|
92
|
+
request=f"v1/workspaces/{workspace_id}/lakehouses/{lakehouse_id}/tables",
|
|
93
|
+
uses_pagination=True,
|
|
94
|
+
client="fabric_sp",
|
|
95
|
+
)
|
|
91
96
|
|
|
92
|
-
|
|
93
|
-
|
|
97
|
+
except Exception as e:
|
|
98
|
+
API_called = False
|
|
94
99
|
|
|
95
100
|
dfs = []
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
101
|
+
local_path = None
|
|
102
|
+
if API_called:
|
|
103
|
+
if not responses[0].get("data"):
|
|
104
|
+
return df
|
|
105
|
+
|
|
106
|
+
for r in responses:
|
|
107
|
+
for i in r.get("data", []):
|
|
108
|
+
new_data = {
|
|
109
|
+
"Workspace Name": workspace_name,
|
|
110
|
+
"Lakehouse Name": lakehouse_name,
|
|
111
|
+
"Schema Name": "",
|
|
112
|
+
"Table Name": i.get("name"),
|
|
113
|
+
"Format": i.get("format"),
|
|
114
|
+
"Type": i.get("type"),
|
|
115
|
+
"Location": i.get("location"),
|
|
116
|
+
}
|
|
117
|
+
dfs.append(pd.DataFrame(new_data, index=[0]))
|
|
118
|
+
else:
|
|
119
|
+
local_path = _mount(lakehouse=lakehouse_id, workspace=workspace_id)
|
|
120
|
+
tables_path = os.path.join(local_path, "Tables")
|
|
121
|
+
list_schema = os.listdir(tables_path)
|
|
122
|
+
|
|
123
|
+
for schema_name in list_schema:
|
|
124
|
+
schema_table_path = os.path.join(local_path, "Tables", schema_name)
|
|
125
|
+
list_tables = os.listdir(schema_table_path)
|
|
126
|
+
for table_name in list_tables:
|
|
127
|
+
location_path = create_abfss_path(
|
|
128
|
+
lakehouse_id, workspace_id, table_name, schema_name
|
|
129
|
+
)
|
|
130
|
+
new_data = {
|
|
131
|
+
"Workspace Name": workspace_name,
|
|
132
|
+
"Lakehouse Name": lakehouse_name,
|
|
133
|
+
"Schema Name": schema_name,
|
|
134
|
+
"Table Name": table_name,
|
|
135
|
+
"Format": "delta",
|
|
136
|
+
"Type": "Managed",
|
|
137
|
+
"Location": location_path,
|
|
138
|
+
}
|
|
139
|
+
dfs.append(pd.DataFrame(new_data, index=[0]))
|
|
107
140
|
|
|
108
141
|
if dfs:
|
|
109
142
|
df = pd.concat(dfs, ignore_index=True)
|
|
@@ -111,17 +144,25 @@ def get_lakehouse_tables(
|
|
|
111
144
|
if extended:
|
|
112
145
|
sku_value = get_sku_size(workspace_id)
|
|
113
146
|
guardrail = get_directlake_guardrails_for_sku(sku_value)
|
|
114
|
-
|
|
147
|
+
# Avoid mounting the lakehouse if is already mounted
|
|
148
|
+
if not local_path:
|
|
149
|
+
local_path = _mount(lakehouse=lakehouse_id, workspace=workspace_id)
|
|
115
150
|
|
|
116
151
|
df["Files"], df["Row Groups"], df["Table Size"] = None, None, None
|
|
117
152
|
if count_rows:
|
|
118
153
|
df["Row Count"] = None
|
|
119
154
|
|
|
120
155
|
for i, r in df.iterrows():
|
|
156
|
+
use_schema = True
|
|
157
|
+
schema_name = r["Schema Name"]
|
|
121
158
|
table_name = r["Table Name"]
|
|
122
159
|
if r["Type"] == "Managed" and r["Format"] == "delta":
|
|
123
|
-
delta_table_path =
|
|
124
|
-
|
|
160
|
+
delta_table_path = (
|
|
161
|
+
create_abfss_path(
|
|
162
|
+
lakehouse_id, workspace_id, table_name, schema_name
|
|
163
|
+
)
|
|
164
|
+
.replace("//", "/") # When schema_name = ""
|
|
165
|
+
.replace("abfss:/", "abfss://") # Put back the // after abfss:
|
|
125
166
|
)
|
|
126
167
|
|
|
127
168
|
if _pure_python_notebook():
|
|
@@ -135,29 +176,46 @@ def get_lakehouse_tables(
|
|
|
135
176
|
size_in_bytes = 0
|
|
136
177
|
for f in latest_files:
|
|
137
178
|
local_file_path = os.path.join(
|
|
138
|
-
local_path, "Tables", table_name,
|
|
179
|
+
local_path, "Tables", schema_name, table_name, f
|
|
139
180
|
)
|
|
181
|
+
|
|
140
182
|
if os.path.exists(local_file_path):
|
|
141
183
|
size_in_bytes += os.path.getsize(local_file_path)
|
|
142
184
|
num_latest_files = len(latest_files)
|
|
143
185
|
else:
|
|
144
186
|
delta_table = _get_delta_table(delta_table_path)
|
|
187
|
+
|
|
145
188
|
latest_files = _read_delta_table(delta_table_path).inputFiles()
|
|
146
189
|
table_df = delta_table.toDF()
|
|
147
190
|
table_details = delta_table.detail().collect()[0].asDict()
|
|
148
|
-
num_latest_files = table_details.get("numFiles", 0)
|
|
149
191
|
size_in_bytes = table_details.get("sizeInBytes", 0)
|
|
192
|
+
num_latest_files = table_details.get("numFiles", 0)
|
|
193
|
+
|
|
194
|
+
table_path = os.path.join(local_path, "Tables", schema_name, table_name)
|
|
150
195
|
|
|
151
|
-
|
|
152
|
-
|
|
196
|
+
file_paths = []
|
|
197
|
+
for file in latest_files:
|
|
198
|
+
if _pure_python_notebook():
|
|
199
|
+
file_paths.append(file)
|
|
200
|
+
else:
|
|
201
|
+
# Append the <Partition folder>/<filename> or <filename>
|
|
202
|
+
find_table = file.find(table_name)
|
|
203
|
+
len_file = len(file)
|
|
204
|
+
len_table = len(table_name)
|
|
205
|
+
last_chars = len_file - (find_table + len_table + 1)
|
|
206
|
+
file_paths.append(file[-last_chars:])
|
|
153
207
|
|
|
154
208
|
num_rowgroups = 0
|
|
155
209
|
for filename in file_paths:
|
|
156
|
-
|
|
157
|
-
|
|
210
|
+
parquet_file_path = f"{table_path}/{filename}"
|
|
211
|
+
if os.path.exists(parquet_file_path):
|
|
212
|
+
parquet_file = pq.ParquetFile(parquet_file_path)
|
|
213
|
+
num_rowgroups += parquet_file.num_row_groups
|
|
214
|
+
|
|
158
215
|
df.at[i, "Files"] = num_latest_files
|
|
159
216
|
df.at[i, "Row Groups"] = num_rowgroups
|
|
160
217
|
df.at[i, "Table Size"] = size_in_bytes
|
|
218
|
+
|
|
161
219
|
if count_rows:
|
|
162
220
|
if _pure_python_notebook():
|
|
163
221
|
row_count = delta_table.to_pyarrow_table().num_rows
|
|
@@ -165,6 +223,9 @@ def get_lakehouse_tables(
|
|
|
165
223
|
row_count = table_df.count()
|
|
166
224
|
df.at[i, "Row Count"] = row_count
|
|
167
225
|
|
|
226
|
+
# Set "Schema Name" = "dbo" when it is ""
|
|
227
|
+
df.loc[df["Schema Name"] == "", "Schema Name"] = "dbo"
|
|
228
|
+
|
|
168
229
|
if extended:
|
|
169
230
|
intColumns = ["Files", "Row Groups", "Table Size"]
|
|
170
231
|
df[intColumns] = df[intColumns].astype(int)
|
sempy_labs/lakehouse/_helper.py
CHANGED
|
@@ -54,6 +54,7 @@ def is_v_ordered(
|
|
|
54
54
|
return any(b"vorder" in key for key in ds_schema.keys())
|
|
55
55
|
|
|
56
56
|
|
|
57
|
+
@log
|
|
57
58
|
def delete_lakehouse(
|
|
58
59
|
lakehouse: str | UUID, workspace: Optional[str | UUID] = None
|
|
59
60
|
) -> None:
|
|
@@ -77,6 +78,7 @@ def delete_lakehouse(
|
|
|
77
78
|
delete_item(item=lakehouse, item_type="lakehouse", workspace=workspace)
|
|
78
79
|
|
|
79
80
|
|
|
81
|
+
@log
|
|
80
82
|
def update_lakehouse(
|
|
81
83
|
name: Optional[str] = None,
|
|
82
84
|
description: Optional[str] = None,
|
|
@@ -11,8 +11,14 @@ from sempy_labs._helper_functions import (
|
|
|
11
11
|
)
|
|
12
12
|
import sempy_labs._icons as icons
|
|
13
13
|
import re
|
|
14
|
+
import time
|
|
15
|
+
import pandas as pd
|
|
16
|
+
from sempy_labs._job_scheduler import (
|
|
17
|
+
_get_item_job_instance,
|
|
18
|
+
)
|
|
14
19
|
|
|
15
20
|
|
|
21
|
+
@log
|
|
16
22
|
def lakehouse_attached() -> bool:
|
|
17
23
|
"""
|
|
18
24
|
Identifies if a lakehouse is attached to the notebook.
|
|
@@ -33,6 +39,7 @@ def lakehouse_attached() -> bool:
|
|
|
33
39
|
return False
|
|
34
40
|
|
|
35
41
|
|
|
42
|
+
@log
|
|
36
43
|
def _optimize_table(path):
|
|
37
44
|
|
|
38
45
|
if _pure_python_notebook():
|
|
@@ -46,6 +53,7 @@ def _optimize_table(path):
|
|
|
46
53
|
DeltaTable.forPath(spark, path).optimize().executeCompaction()
|
|
47
54
|
|
|
48
55
|
|
|
56
|
+
@log
|
|
49
57
|
def _vacuum_table(path, retain_n_hours):
|
|
50
58
|
|
|
51
59
|
if _pure_python_notebook():
|
|
@@ -145,6 +153,7 @@ def vacuum_lakehouse_tables(
|
|
|
145
153
|
_vacuum_table(path=path, retain_n_hours=retain_n_hours)
|
|
146
154
|
|
|
147
155
|
|
|
156
|
+
@log
|
|
148
157
|
def run_table_maintenance(
|
|
149
158
|
table_name: str,
|
|
150
159
|
optimize: bool = False,
|
|
@@ -154,7 +163,7 @@ def run_table_maintenance(
|
|
|
154
163
|
schema: Optional[str] = None,
|
|
155
164
|
lakehouse: Optional[str | UUID] = None,
|
|
156
165
|
workspace: Optional[str | UUID] = None,
|
|
157
|
-
):
|
|
166
|
+
) -> pd.DataFrame:
|
|
158
167
|
"""
|
|
159
168
|
Runs table maintenance operations on the specified table within the lakehouse.
|
|
160
169
|
|
|
@@ -181,6 +190,11 @@ def run_table_maintenance(
|
|
|
181
190
|
The Fabric workspace name or ID used by the lakehouse.
|
|
182
191
|
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
183
192
|
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
193
|
+
|
|
194
|
+
Returns
|
|
195
|
+
-------
|
|
196
|
+
pandas.DataFrame
|
|
197
|
+
A DataFrame containing the job instance details of the table maintenance operation.
|
|
184
198
|
"""
|
|
185
199
|
|
|
186
200
|
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
|
|
@@ -223,13 +237,32 @@ def run_table_maintenance(
|
|
|
223
237
|
if vacuum and retention_period is not None:
|
|
224
238
|
payload["executionData"]["vacuumSettings"]["retentionPeriod"] = retention_period
|
|
225
239
|
|
|
226
|
-
_base_api(
|
|
240
|
+
response = _base_api(
|
|
227
241
|
request=f"/v1/workspaces/{workspace_id}/lakehouses/{lakehouse_id}/jobs/instances?jobType=TableMaintenance",
|
|
228
242
|
method="post",
|
|
229
243
|
payload=payload,
|
|
230
244
|
status_codes=202,
|
|
231
245
|
)
|
|
232
246
|
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
)
|
|
247
|
+
f"{icons.in_progress} The table maintenance job for the '{table_name}' table in the '{lakehouse_name}' lakehouse within the '{workspace_name}' workspace has been initiated."
|
|
248
|
+
|
|
249
|
+
status_url = response.headers.get("Location").split("fabric.microsoft.com")[1]
|
|
250
|
+
status = None
|
|
251
|
+
while status not in ["Completed", "Failed"]:
|
|
252
|
+
response = _base_api(request=status_url)
|
|
253
|
+
status = response.json().get("status")
|
|
254
|
+
time.sleep(10)
|
|
255
|
+
|
|
256
|
+
df = _get_item_job_instance(url=status_url)
|
|
257
|
+
|
|
258
|
+
if status == "Completed":
|
|
259
|
+
print(
|
|
260
|
+
f"{icons.green_dot} The table maintenance job for the '{table_name}' table in the '{lakehouse_name}' lakehouse within the '{workspace_name}' workspace has succeeded."
|
|
261
|
+
)
|
|
262
|
+
else:
|
|
263
|
+
print(status)
|
|
264
|
+
print(
|
|
265
|
+
f"{icons.red_dot} The table maintenance job for the '{table_name}' table in the '{lakehouse_name}' lakehouse within the '{workspace_name}' workspace has failed."
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
return df
|
|
@@ -8,8 +8,10 @@ from sempy_labs._helper_functions import (
|
|
|
8
8
|
import pandas as pd
|
|
9
9
|
from typing import Optional
|
|
10
10
|
from uuid import UUID
|
|
11
|
+
from sempy._utils._log import log
|
|
11
12
|
|
|
12
13
|
|
|
14
|
+
@log
|
|
13
15
|
def list_livy_sessions(
|
|
14
16
|
lakehouse: Optional[str | UUID] = None, workspace: Optional[str | UUID] = None
|
|
15
17
|
) -> pd.DataFrame:
|
|
@@ -85,7 +87,6 @@ def list_livy_sessions(
|
|
|
85
87
|
)
|
|
86
88
|
|
|
87
89
|
dfs = []
|
|
88
|
-
|
|
89
90
|
for r in responses:
|
|
90
91
|
for v in r.get("value", []):
|
|
91
92
|
queued_duration = v.get("queuedDuration", {})
|
|
@@ -216,6 +216,7 @@ def create_shortcut(
|
|
|
216
216
|
)
|
|
217
217
|
|
|
218
218
|
|
|
219
|
+
@log
|
|
219
220
|
def delete_shortcut(
|
|
220
221
|
shortcut_name: str,
|
|
221
222
|
shortcut_path: str = "Tables",
|
|
@@ -260,6 +261,7 @@ def delete_shortcut(
|
|
|
260
261
|
)
|
|
261
262
|
|
|
262
263
|
|
|
264
|
+
@log
|
|
263
265
|
def reset_shortcut_cache(workspace: Optional[str | UUID] = None):
|
|
264
266
|
"""
|
|
265
267
|
Deletes any cached files that were stored while reading from shortcuts.
|
|
@@ -369,6 +371,7 @@ def list_shortcuts(
|
|
|
369
371
|
"S3Compatible": "s3Compatible",
|
|
370
372
|
}
|
|
371
373
|
|
|
374
|
+
dfs = []
|
|
372
375
|
for r in responses:
|
|
373
376
|
for i in r.get("value", []):
|
|
374
377
|
tgt = i.get("target", {})
|
|
@@ -415,6 +418,9 @@ def list_shortcuts(
|
|
|
415
418
|
"SubPath": sub_path,
|
|
416
419
|
"Source Properties Raw": str(tgt),
|
|
417
420
|
}
|
|
418
|
-
|
|
421
|
+
dfs.append(pd.DataFrame(new_data, index=[0]))
|
|
422
|
+
|
|
423
|
+
if dfs:
|
|
424
|
+
df = pd.concat(dfs, ignore_index=True)
|
|
419
425
|
|
|
420
426
|
return df
|
|
@@ -6,8 +6,10 @@ from sempy_labs._helper_functions import (
|
|
|
6
6
|
_create_dataframe,
|
|
7
7
|
)
|
|
8
8
|
import pandas as pd
|
|
9
|
+
from sempy._utils._log import log
|
|
9
10
|
|
|
10
11
|
|
|
12
|
+
@log
|
|
11
13
|
def discover_catalogs(
|
|
12
14
|
databricks_workspace_connection_id: UUID,
|
|
13
15
|
workspace: Optional[str | UUID] = None,
|
|
@@ -70,6 +72,7 @@ def discover_catalogs(
|
|
|
70
72
|
return df
|
|
71
73
|
|
|
72
74
|
|
|
75
|
+
@log
|
|
73
76
|
def discover_schemas(
|
|
74
77
|
catalog: str,
|
|
75
78
|
databricks_workspace_connection_id: UUID,
|
|
@@ -135,6 +138,7 @@ def discover_schemas(
|
|
|
135
138
|
return df
|
|
136
139
|
|
|
137
140
|
|
|
141
|
+
@log
|
|
138
142
|
def discover_tables(
|
|
139
143
|
catalog: str,
|
|
140
144
|
schema: str,
|
|
@@ -6,8 +6,10 @@ from sempy_labs._helper_functions import (
|
|
|
6
6
|
_base_api,
|
|
7
7
|
)
|
|
8
8
|
import sempy_labs._icons as icons
|
|
9
|
+
from sempy._utils._log import log
|
|
9
10
|
|
|
10
11
|
|
|
12
|
+
@log
|
|
11
13
|
def refresh_catalog_metadata(
|
|
12
14
|
mirrored_azure_databricks_catalog: str | UUID,
|
|
13
15
|
workspace: Optional[str | UUID] = None,
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import sempy.fabric as fabric
|
|
2
1
|
import sempy_labs._icons as icons
|
|
3
2
|
from typing import Optional
|
|
4
3
|
from sempy_labs._helper_functions import (
|
|
@@ -11,8 +10,10 @@ from sempy_labs._helper_functions import (
|
|
|
11
10
|
)
|
|
12
11
|
from sempy_labs.lakehouse._lakehouse import lakehouse_attached
|
|
13
12
|
from uuid import UUID
|
|
13
|
+
from sempy._utils._log import log
|
|
14
14
|
|
|
15
15
|
|
|
16
|
+
@log
|
|
16
17
|
def download_report(
|
|
17
18
|
report: str | UUID,
|
|
18
19
|
file_name: Optional[str] = None,
|
|
@@ -18,6 +18,7 @@ from sempy._utils._log import log
|
|
|
18
18
|
from uuid import UUID
|
|
19
19
|
|
|
20
20
|
|
|
21
|
+
@log
|
|
21
22
|
def create_report_from_reportjson(
|
|
22
23
|
report: str,
|
|
23
24
|
dataset: str | UUID,
|
|
@@ -119,6 +120,7 @@ def create_report_from_reportjson(
|
|
|
119
120
|
)
|
|
120
121
|
|
|
121
122
|
|
|
123
|
+
@log
|
|
122
124
|
def update_report_from_reportjson(
|
|
123
125
|
report: str | UUID, report_json: dict, workspace: Optional[str | UUID] = None
|
|
124
126
|
):
|
sempy_labs/report/_paginated.py
CHANGED