semantic-link-labs 0.9.5__py3-none-any.whl → 0.9.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of semantic-link-labs might be problematic. Click here for more details.

Files changed (65) hide show
  1. {semantic_link_labs-0.9.5.dist-info → semantic_link_labs-0.9.7.dist-info}/METADATA +8 -5
  2. {semantic_link_labs-0.9.5.dist-info → semantic_link_labs-0.9.7.dist-info}/RECORD +65 -61
  3. {semantic_link_labs-0.9.5.dist-info → semantic_link_labs-0.9.7.dist-info}/WHEEL +1 -1
  4. sempy_labs/__init__.py +19 -1
  5. sempy_labs/_ai.py +3 -1
  6. sempy_labs/_capacities.py +37 -2
  7. sempy_labs/_capacity_migration.py +11 -14
  8. sempy_labs/_connections.py +2 -4
  9. sempy_labs/_dataflows.py +2 -2
  10. sempy_labs/_dax_query_view.py +57 -0
  11. sempy_labs/_delta_analyzer.py +16 -14
  12. sempy_labs/_delta_analyzer_history.py +298 -0
  13. sempy_labs/_environments.py +8 -1
  14. sempy_labs/_eventhouses.py +5 -1
  15. sempy_labs/_external_data_shares.py +4 -10
  16. sempy_labs/_generate_semantic_model.py +2 -1
  17. sempy_labs/_graphQL.py +5 -1
  18. sempy_labs/_helper_functions.py +440 -63
  19. sempy_labs/_icons.py +6 -6
  20. sempy_labs/_kql_databases.py +5 -1
  21. sempy_labs/_list_functions.py +8 -38
  22. sempy_labs/_managed_private_endpoints.py +9 -2
  23. sempy_labs/_mirrored_databases.py +3 -1
  24. sempy_labs/_ml_experiments.py +1 -1
  25. sempy_labs/_model_bpa.py +2 -11
  26. sempy_labs/_model_bpa_bulk.py +33 -38
  27. sempy_labs/_model_bpa_rules.py +1 -1
  28. sempy_labs/_one_lake_integration.py +2 -1
  29. sempy_labs/_semantic_models.py +20 -0
  30. sempy_labs/_sql.py +6 -2
  31. sempy_labs/_sqldatabase.py +61 -100
  32. sempy_labs/_vertipaq.py +8 -11
  33. sempy_labs/_warehouses.py +14 -3
  34. sempy_labs/_workspace_identity.py +6 -0
  35. sempy_labs/_workspaces.py +42 -2
  36. sempy_labs/admin/_basic_functions.py +29 -2
  37. sempy_labs/admin/_reports.py +1 -1
  38. sempy_labs/admin/_scanner.py +2 -4
  39. sempy_labs/admin/_tenant.py +8 -3
  40. sempy_labs/directlake/_directlake_schema_compare.py +2 -1
  41. sempy_labs/directlake/_directlake_schema_sync.py +65 -19
  42. sempy_labs/directlake/_dl_helper.py +0 -6
  43. sempy_labs/directlake/_generate_shared_expression.py +19 -12
  44. sempy_labs/directlake/_guardrails.py +2 -1
  45. sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py +90 -57
  46. sempy_labs/directlake/_update_directlake_partition_entity.py +5 -2
  47. sempy_labs/graph/_groups.py +6 -0
  48. sempy_labs/graph/_teams.py +2 -0
  49. sempy_labs/graph/_users.py +4 -0
  50. sempy_labs/lakehouse/__init__.py +12 -3
  51. sempy_labs/lakehouse/_blobs.py +231 -0
  52. sempy_labs/lakehouse/_shortcuts.py +29 -8
  53. sempy_labs/migration/_direct_lake_to_import.py +47 -10
  54. sempy_labs/migration/_migration_validation.py +0 -4
  55. sempy_labs/report/__init__.py +4 -0
  56. sempy_labs/report/_download_report.py +4 -6
  57. sempy_labs/report/_generate_report.py +6 -6
  58. sempy_labs/report/_report_functions.py +5 -4
  59. sempy_labs/report/_report_helper.py +17 -5
  60. sempy_labs/report/_report_rebind.py +8 -6
  61. sempy_labs/report/_reportwrapper.py +17 -8
  62. sempy_labs/report/_save_report.py +147 -0
  63. sempy_labs/tom/_model.py +154 -23
  64. {semantic_link_labs-0.9.5.dist-info → semantic_link_labs-0.9.7.dist-info/licenses}/LICENSE +0 -0
  65. {semantic_link_labs-0.9.5.dist-info → semantic_link_labs-0.9.7.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,298 @@
1
+ import pandas as pd
2
+ from typing import Optional
3
+ import pyarrow.parquet as pq
4
+ from sempy_labs._helper_functions import (
5
+ create_abfss_path,
6
+ resolve_workspace_id,
7
+ resolve_lakehouse_id,
8
+ _mount,
9
+ )
10
+ from sempy._utils._log import log
11
+ from tqdm.auto import tqdm
12
+ from uuid import UUID
13
+ from datetime import datetime
14
+
15
+
16
+ @log
17
+ def delta_analyzer_history(
18
+ table_name: str,
19
+ schema: Optional[str] = None,
20
+ lakehouse: Optional[str | UUID] = None,
21
+ workspace: Optional[str | UUID] = None,
22
+ ) -> pd.DataFrame:
23
+ """
24
+ Analyzes the transaction log for a specified delta table and shows the results in dataframe. One row per data modification operation.
25
+
26
+ Keeps track on the number of Parquet files, rowgroups, file size and #rows impacted by each change.
27
+
28
+ Incremental Framing effect: 100% = highly effective, 0% = no benefit at all
29
+
30
+ Parameters
31
+ ----------
32
+ table_name : str
33
+ The delta table name.
34
+ schema : str, default=None
35
+ The schema name of the delta table.
36
+ lakehouse : str | uuid.UUID, default=None
37
+ The Fabric lakehouse name or ID.
38
+ Defaults to None which resolves to the lakehouse attached to the notebook.
39
+ workspace : str | uuid.UUID, default=None
40
+ The Fabric workspace name or ID used by the lakehouse.
41
+ Defaults to None which resolves to the workspace of the attached lakehouse
42
+ or if no lakehouse attached, resolves to the workspace of the notebook.
43
+
44
+ Returns
45
+ -------
46
+ pandas.DataFrame
47
+ Displays a gantt visual showing a timeline for individual parquet files.
48
+ """
49
+
50
+ import notebookutils
51
+ from IPython.display import display, HTML
52
+
53
+ workspace_id = resolve_workspace_id(workspace=workspace)
54
+ lakehouse_id = resolve_lakehouse_id(lakehouse=lakehouse, workspace=workspace)
55
+
56
+ table_path = create_abfss_path(lakehouse_id, workspace_id, table_name, schema)
57
+ local_path = _mount(lakehouse=lakehouse, workspace=workspace)
58
+ table_path_local = f"{local_path}/Tables/{table_name}"
59
+ delta_table_path = f"{table_path}/_delta_log"
60
+
61
+ files = notebookutils.fs.ls(delta_table_path)
62
+ json_files = [file.name for file in files if file.name.endswith(".json")]
63
+
64
+ element_version = total_size = total_rows = total_files = total_rowgroups = 0
65
+ changes_array = []
66
+ parquet_files = []
67
+ my_date_time_format = "%Y-%m-%d %H:%M:%S.%f"
68
+ now_to_epoch = datetime.now().strftime(my_date_time_format)
69
+ num_latest_files = len(json_files)
70
+
71
+ for idx, file in enumerate(bar := tqdm(json_files), start=1):
72
+ bar.set_description(
73
+ f"Analyzing the '{file}' parquet file ({idx}/{num_latest_files})..."
74
+ )
75
+
76
+ change_timestamp = datetime.strptime(
77
+ "2001-01-01 12:00:00.000", my_date_time_format
78
+ )
79
+ df = pd.read_json(f"{delta_table_path}/{file}", lines=True)
80
+
81
+ rows_added = size_added = rows_deleted = size_deleted = files_added = (
82
+ files_removed
83
+ ) = row_groups_added = row_groups_removed = 0
84
+ total_files_before_change = total_files
85
+ total_row_groups_before_change = total_rowgroups
86
+ operation = predicate = tags = ""
87
+
88
+ for _, row in df.iterrows():
89
+ add_row = row.get("add")
90
+ remove_row = row.get("remove")
91
+ commit_row = row.get("commitInfo")
92
+
93
+ if isinstance(add_row, dict):
94
+ file_name = add_row["path"]
95
+ fs_filename = f"{table_path}/{file_name}"
96
+ size_added += add_row["size"]
97
+ files_added += 1
98
+ filerows_added = 0
99
+
100
+ if notebookutils.fs.exists(fs_filename):
101
+ parquet_file = pq.ParquetFile(table_path_local + f"/{file_name}")
102
+ for i in range(parquet_file.num_row_groups):
103
+ row_group = parquet_file.metadata.row_group(i)
104
+ num_rows = row_group.num_rows
105
+ filerows_added += num_rows
106
+ rows_added += num_rows
107
+
108
+ row_groups_added += parquet_file.num_row_groups
109
+
110
+ start = str(
111
+ datetime.fromtimestamp(add_row["modificationTime"] / 1000.0)
112
+ )
113
+ parquet_files.append(
114
+ {
115
+ "file": file_name,
116
+ "start": start,
117
+ "end": now_to_epoch,
118
+ "rows": filerows_added,
119
+ "isCurrent": 1,
120
+ }
121
+ )
122
+
123
+ if isinstance(remove_row, dict):
124
+ file_name = remove_row["path"]
125
+ fs_filename = f"{table_path}/{file_name}"
126
+
127
+ if notebookutils.fs.exists(fs_filename):
128
+ parquet_file = pq.ParquetFile(table_path_local + f"/{file_name}")
129
+ for i in range(parquet_file.num_row_groups):
130
+ row_group = parquet_file.metadata.row_group(i)
131
+ num_rows = row_group.num_rows
132
+ rows_deleted += num_rows
133
+
134
+ files_removed += 1
135
+ size_deleted += remove_row.get("size", 0)
136
+ row_groups_removed += parquet_file.num_row_groups
137
+
138
+ result = next(
139
+ (row for row in parquet_files if row["file"] == file_name), None
140
+ )
141
+ if result:
142
+ result.update(
143
+ {
144
+ "isCurrent": 0,
145
+ "end": str(
146
+ datetime.fromtimestamp(
147
+ remove_row["deletionTimestamp"] / 1000.0
148
+ )
149
+ ),
150
+ }
151
+ )
152
+
153
+ if isinstance(commit_row, dict):
154
+ operation = commit_row.get("operation")
155
+ tags = commit_row.get("tags")
156
+ predicate = commit_row.get("operationParameters", {}).get("predicate")
157
+
158
+ if operation == "VACUUM START":
159
+ operation_metrics = commit_row.get("operationMetrics", {})
160
+ total_files -= int(operation_metrics.get("numFilesToDelete", 0))
161
+ total_size -= int(operation_metrics.get("sizeOfDataToDelete", 0))
162
+
163
+ change_timestamp = datetime.fromtimestamp(
164
+ commit_row["timestamp"] / 1000.0
165
+ )
166
+
167
+ total_size += size_added - size_deleted
168
+ total_rows += rows_added - rows_deleted
169
+ total_files += files_added - files_removed
170
+ total_rowgroups += row_groups_added - row_groups_removed
171
+
172
+ incremental_framing_effect = 1
173
+ if size_deleted != 0:
174
+ incremental_framing_effect = (
175
+ int((total_size - size_added * 1.0) / total_size * 100000) / 1000
176
+ )
177
+ # incrementalFramingEffect = round(
178
+ # (totalSize - sizeAdded * 1.0) / totalSize, 4
179
+ # )
180
+
181
+ changes_array.append(
182
+ [
183
+ element_version,
184
+ operation,
185
+ predicate,
186
+ change_timestamp,
187
+ incremental_framing_effect,
188
+ files_added,
189
+ files_removed,
190
+ total_files_before_change - files_removed,
191
+ total_files,
192
+ size_added,
193
+ size_deleted,
194
+ total_size,
195
+ row_groups_added,
196
+ row_groups_removed,
197
+ total_row_groups_before_change - row_groups_removed,
198
+ total_rowgroups,
199
+ rows_added,
200
+ rows_deleted,
201
+ rows_added - rows_deleted,
202
+ total_rows,
203
+ tags,
204
+ ]
205
+ )
206
+
207
+ element_version += 1
208
+
209
+ # /********************************************************************************************************************
210
+ # Display Gantt Chart of files
211
+ # ********************************************************************************************************************/
212
+ spec: str = (
213
+ """{
214
+ "$$schema": 'https://vega.github.io/schema/vega-lite/v2.json',
215
+ "description": "A simple bar chart with ranged data (aka Gantt Chart).",
216
+ "width" : 1024 ,
217
+ "data": {
218
+ "values": %s
219
+ },
220
+ "layer":[
221
+ {"mark": "bar"},
222
+ {"mark": {
223
+ "type": "text",
224
+ "align": "center",
225
+ "baseline": "middle",
226
+ "dx": 40
227
+ },
228
+ "encoding": {
229
+ "text": {"field": "rows", "type": "quantitative", "format":","},
230
+ "color":{
231
+ "condition": {"test": "datum['isCurrent'] == 1", "value": "black"},
232
+ "value": "black"
233
+ }
234
+ }
235
+ }],
236
+ "encoding": {
237
+ "y": {"field": "file", "type": "ordinal","sort": "isCurrent","title":null,"axis":{"labelPadding":15,"labelLimit":360}},
238
+ "x": {"field": "start", "type": "temporal","title":null},
239
+ "x2": {"field": "end", "type": "temporal","title":null},
240
+ "color": {
241
+ "field": "isCurrent",
242
+ "scale": {"range": ["silver", "#ca8861"]}
243
+ }
244
+ }
245
+ }"""
246
+ % (parquet_files)
247
+ )
248
+
249
+ display(
250
+ HTML(
251
+ """
252
+ <!DOCTYPE html>
253
+ <html>
254
+ <head>
255
+ <script src="https://cdn.jsdelivr.net/npm/vega@5"></script>
256
+ <script src="https://cdn.jsdelivr.net/npm/vega-lite@5"></script>
257
+ <script src="https://cdn.jsdelivr.net/npm/vega-embed@6"></script>
258
+ </head>
259
+ <body>
260
+ <div id="vis"></div>
261
+ <script type="text/javascript">
262
+ var spec = """
263
+ + spec
264
+ + """;
265
+ var opt = {"renderer": "canvas", "actions": false};
266
+ vegaEmbed("#vis", spec, opt);
267
+ </script>
268
+ </body>
269
+ </html>"""
270
+ )
271
+ )
272
+
273
+ return pd.DataFrame(
274
+ changes_array,
275
+ columns=[
276
+ "Change Number",
277
+ "Change Type",
278
+ "Predicate",
279
+ "Modification Time",
280
+ "Incremental Effect",
281
+ "Files Added",
282
+ "Files Removed",
283
+ "Files Preserved",
284
+ "Files After Change",
285
+ "Size Added",
286
+ "Sized Removed",
287
+ "Size After Change",
288
+ "Rowgroups Added",
289
+ "Rowgroups Removed",
290
+ "Rowgroups Preserved",
291
+ "Rowgroups After Change",
292
+ "Rows Added",
293
+ "Rows Removed",
294
+ "Rows Delta",
295
+ "Rows After Change",
296
+ "Tags",
297
+ ],
298
+ )
@@ -48,6 +48,8 @@ def list_environments(workspace: Optional[str | UUID] = None) -> pd.DataFrame:
48
48
 
49
49
  This is a wrapper function for the following API: `Items - List Environments <https://learn.microsoft.com/rest/api/fabric/environment/items/list-environments>`_.
50
50
 
51
+ Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
52
+
51
53
  Parameters
52
54
  ----------
53
55
  workspace : str | uuid.UUID, default=None
@@ -71,7 +73,9 @@ def list_environments(workspace: Optional[str | UUID] = None) -> pd.DataFrame:
71
73
  (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
72
74
 
73
75
  responses = _base_api(
74
- request=f"/v1/workspaces/{workspace_id}/environments", uses_pagination=True
76
+ request=f"/v1/workspaces/{workspace_id}/environments",
77
+ uses_pagination=True,
78
+ client="fabric_sp",
75
79
  )
76
80
 
77
81
  for r in responses:
@@ -113,6 +117,8 @@ def publish_environment(
113
117
 
114
118
  This is a wrapper function for the following API: `Spark Libraries - Publish Environment <https://learn.microsoft.com/rest/api/fabric/environment/spark-libraries/publish-environment>`_.
115
119
 
120
+ Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
121
+
116
122
  Parameters
117
123
  ----------
118
124
  environment: str | uuid.UUID
@@ -133,6 +139,7 @@ def publish_environment(
133
139
  method="post",
134
140
  lro_return_status_code=True,
135
141
  status_codes=None,
142
+ client="fabric_sp",
136
143
  )
137
144
 
138
145
  print(
@@ -72,6 +72,8 @@ def list_eventhouses(workspace: Optional[str | UUID] = None) -> pd.DataFrame:
72
72
 
73
73
  This is a wrapper function for the following API: `Items - List Eventhouses <https://learn.microsoft.com/rest/api/fabric/environment/items/list-eventhouses>`_.
74
74
 
75
+ Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
76
+
75
77
  Parameters
76
78
  ----------
77
79
  workspace : str | uuid.UUID, default=None
@@ -95,7 +97,9 @@ def list_eventhouses(workspace: Optional[str | UUID] = None) -> pd.DataFrame:
95
97
  (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
96
98
 
97
99
  responses = _base_api(
98
- request=f"/v1/workspaces/{workspace_id}/eventhouses", uses_pagination=True
100
+ request=f"/v1/workspaces/{workspace_id}/eventhouses",
101
+ uses_pagination=True,
102
+ client="fabric_sp",
99
103
  )
100
104
 
101
105
  for r in responses:
@@ -1,4 +1,3 @@
1
- import sempy.fabric as fabric
2
1
  from uuid import UUID
3
2
  import pandas as pd
4
3
  from typing import Optional, List
@@ -7,6 +6,7 @@ from sempy_labs._helper_functions import (
7
6
  resolve_workspace_name_and_id,
8
7
  _base_api,
9
8
  _create_dataframe,
9
+ resolve_item_id,
10
10
  )
11
11
 
12
12
 
@@ -39,9 +39,7 @@ def create_external_data_share(
39
39
  """
40
40
 
41
41
  (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
42
- item_id = fabric.resolve_item_id(
43
- item_name=item_name, type=item_type, workspace=workspace_id
44
- )
42
+ item_id = resolve_item_id(item=item_name, type=item_type, workspace=workspace_id)
45
43
 
46
44
  if isinstance(paths, str):
47
45
  paths = [paths]
@@ -85,9 +83,7 @@ def revoke_external_data_share(
85
83
  """
86
84
 
87
85
  (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
88
- item_id = fabric.resolve_item_id(
89
- item_name=item_name, type=item_type, workspace=workspace_id
90
- )
86
+ item_id = resolve_item_id(item=item_name, type=item_type, workspace=workspace_id)
91
87
 
92
88
  _base_api(
93
89
  request=f"/v1/workspaces/{workspace_id}/items/{item_id}/externalDataShares/{external_data_share_id}/revoke",
@@ -124,9 +120,7 @@ def list_external_data_shares_in_item(
124
120
  """
125
121
 
126
122
  (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
127
- item_id = fabric.resolve_item_id(
128
- item_name=item_name, type=item_type, workspace=workspace_id
129
- )
123
+ item_id = resolve_item_id(item=item_name, type=item_type, workspace=workspace_id)
130
124
 
131
125
  columns = {
132
126
  "External Data Share Id": "string",
@@ -11,6 +11,7 @@ from sempy_labs._helper_functions import (
11
11
  _decode_b64,
12
12
  _base_api,
13
13
  _mount,
14
+ resolve_workspace_id,
14
15
  )
15
16
  from sempy_labs.lakehouse._lakehouse import lakehouse_attached
16
17
  import sempy_labs._icons as icons
@@ -285,7 +286,7 @@ def deploy_semantic_model(
285
286
 
286
287
  if target_workspace is None:
287
288
  target_workspace_name = source_workspace_name
288
- target_workspace_id = fabric.resolve_workspace_id(target_workspace_name)
289
+ target_workspace_id = resolve_workspace_id(workspace=target_workspace_name)
289
290
  else:
290
291
  (target_workspace_name, target_workspace_id) = resolve_workspace_name_and_id(
291
292
  target_workspace
sempy_labs/_graphQL.py CHANGED
@@ -15,6 +15,8 @@ def list_graphql_apis(workspace: Optional[str | UUID]) -> pd.DataFrame:
15
15
 
16
16
  This is a wrapper function for the following API: `Items - List GraphQLApis <https://learn.microsoft.com/rest/api/fabric/graphqlapi/items/list-graphqlapi-s>`_.
17
17
 
18
+ Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
19
+
18
20
  Parameters
19
21
  ----------
20
22
  workspace : str | uuid.UUID, default=None
@@ -38,7 +40,9 @@ def list_graphql_apis(workspace: Optional[str | UUID]) -> pd.DataFrame:
38
40
  (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
39
41
 
40
42
  responses = _base_api(
41
- request=f"/v1/workspaces/{workspace_id}/GraphQLApis", uses_pagination=True
43
+ request=f"/v1/workspaces/{workspace_id}/GraphQLApis",
44
+ uses_pagination=True,
45
+ client="fabric_sp",
42
46
  )
43
47
 
44
48
  for r in responses: