semantic-link-labs 0.9.1__py3-none-any.whl → 0.9.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of semantic-link-labs might be problematic. Click here for more details.

Files changed (87) hide show
  1. {semantic_link_labs-0.9.1.dist-info → semantic_link_labs-0.9.3.dist-info}/METADATA +67 -8
  2. {semantic_link_labs-0.9.1.dist-info → semantic_link_labs-0.9.3.dist-info}/RECORD +87 -80
  3. sempy_labs/__init__.py +14 -12
  4. sempy_labs/_ai.py +8 -5
  5. sempy_labs/_capacities.py +120 -142
  6. sempy_labs/_capacity_migration.py +61 -94
  7. sempy_labs/_clear_cache.py +9 -8
  8. sempy_labs/_connections.py +107 -104
  9. sempy_labs/_data_pipelines.py +47 -49
  10. sempy_labs/_dataflows.py +45 -51
  11. sempy_labs/_dax.py +228 -6
  12. sempy_labs/_delta_analyzer.py +321 -0
  13. sempy_labs/_deployment_pipelines.py +72 -66
  14. sempy_labs/_environments.py +39 -36
  15. sempy_labs/_eventhouses.py +35 -35
  16. sempy_labs/_eventstreams.py +38 -39
  17. sempy_labs/_external_data_shares.py +29 -42
  18. sempy_labs/_gateways.py +103 -99
  19. sempy_labs/_generate_semantic_model.py +22 -30
  20. sempy_labs/_git.py +46 -66
  21. sempy_labs/_graphQL.py +95 -0
  22. sempy_labs/_helper_functions.py +227 -36
  23. sempy_labs/_job_scheduler.py +47 -59
  24. sempy_labs/_kql_databases.py +27 -34
  25. sempy_labs/_kql_querysets.py +23 -30
  26. sempy_labs/_list_functions.py +264 -167
  27. sempy_labs/_managed_private_endpoints.py +52 -47
  28. sempy_labs/_mirrored_databases.py +110 -134
  29. sempy_labs/_mirrored_warehouses.py +13 -13
  30. sempy_labs/_ml_experiments.py +36 -36
  31. sempy_labs/_ml_models.py +37 -38
  32. sempy_labs/_model_bpa.py +2 -2
  33. sempy_labs/_model_bpa_rules.py +8 -6
  34. sempy_labs/_model_dependencies.py +2 -0
  35. sempy_labs/_notebooks.py +28 -29
  36. sempy_labs/_one_lake_integration.py +2 -0
  37. sempy_labs/_query_scale_out.py +63 -81
  38. sempy_labs/_refresh_semantic_model.py +12 -14
  39. sempy_labs/_spark.py +54 -79
  40. sempy_labs/_sql.py +7 -11
  41. sempy_labs/_translations.py +2 -2
  42. sempy_labs/_vertipaq.py +11 -6
  43. sempy_labs/_warehouses.py +30 -33
  44. sempy_labs/_workloads.py +15 -20
  45. sempy_labs/_workspace_identity.py +13 -17
  46. sempy_labs/_workspaces.py +49 -48
  47. sempy_labs/admin/__init__.py +2 -0
  48. sempy_labs/admin/_basic_functions.py +244 -281
  49. sempy_labs/admin/_domains.py +186 -103
  50. sempy_labs/admin/_external_data_share.py +26 -31
  51. sempy_labs/admin/_git.py +17 -22
  52. sempy_labs/admin/_items.py +34 -48
  53. sempy_labs/admin/_scanner.py +61 -49
  54. sempy_labs/directlake/_directlake_schema_compare.py +2 -0
  55. sempy_labs/directlake/_dl_helper.py +10 -11
  56. sempy_labs/directlake/_generate_shared_expression.py +4 -5
  57. sempy_labs/directlake/_get_directlake_lakehouse.py +1 -0
  58. sempy_labs/directlake/_list_directlake_model_calc_tables.py +1 -0
  59. sempy_labs/directlake/_show_unsupported_directlake_objects.py +2 -0
  60. sempy_labs/directlake/_warm_cache.py +2 -0
  61. sempy_labs/graph/__init__.py +33 -0
  62. sempy_labs/graph/_groups.py +402 -0
  63. sempy_labs/graph/_teams.py +113 -0
  64. sempy_labs/graph/_users.py +191 -0
  65. sempy_labs/lakehouse/__init__.py +4 -0
  66. sempy_labs/lakehouse/_get_lakehouse_columns.py +12 -12
  67. sempy_labs/lakehouse/_get_lakehouse_tables.py +16 -22
  68. sempy_labs/lakehouse/_lakehouse.py +104 -7
  69. sempy_labs/lakehouse/_shortcuts.py +42 -20
  70. sempy_labs/migration/__init__.py +4 -0
  71. sempy_labs/migration/_direct_lake_to_import.py +66 -0
  72. sempy_labs/migration/_migrate_calctables_to_lakehouse.py +3 -2
  73. sempy_labs/migration/_migrate_calctables_to_semantic_model.py +1 -0
  74. sempy_labs/migration/_migrate_model_objects_to_semantic_model.py +1 -0
  75. sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py +2 -0
  76. sempy_labs/migration/_refresh_calc_tables.py +2 -2
  77. sempy_labs/report/_download_report.py +8 -13
  78. sempy_labs/report/_generate_report.py +49 -46
  79. sempy_labs/report/_paginated.py +20 -26
  80. sempy_labs/report/_report_functions.py +52 -47
  81. sempy_labs/report/_report_list_functions.py +2 -0
  82. sempy_labs/report/_report_rebind.py +6 -10
  83. sempy_labs/report/_reportwrapper.py +187 -220
  84. sempy_labs/tom/_model.py +12 -6
  85. {semantic_link_labs-0.9.1.dist-info → semantic_link_labs-0.9.3.dist-info}/LICENSE +0 -0
  86. {semantic_link_labs-0.9.1.dist-info → semantic_link_labs-0.9.3.dist-info}/WHEEL +0 -0
  87. {semantic_link_labs-0.9.1.dist-info → semantic_link_labs-0.9.3.dist-info}/top_level.txt +0 -0
sempy_labs/_dataflows.py CHANGED
@@ -3,10 +3,12 @@ import pandas as pd
3
3
  from sempy_labs._helper_functions import (
4
4
  resolve_workspace_name_and_id,
5
5
  _is_valid_uuid,
6
+ _update_dataframe_datatypes,
7
+ _base_api,
8
+ _create_dataframe,
6
9
  )
7
10
  from typing import Optional, Tuple
8
11
  import sempy_labs._icons as icons
9
- from sempy.fabric.exceptions import FabricHTTPException
10
12
  from uuid import UUID
11
13
 
12
14
 
@@ -28,14 +30,17 @@ def list_dataflows(workspace: Optional[str | UUID] = None):
28
30
  """
29
31
 
30
32
  (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
31
- client = fabric.PowerBIRestClient()
32
- response = client.get(f"/v1.0/myorg/groups/{workspace_id}/dataflows")
33
- if response.status_code != 200:
34
- raise FabricHTTPException(response)
35
33
 
36
- df = pd.DataFrame(
37
- columns=["Dataflow Id", "Dataflow Name", "Configured By", "Users", "Generation"]
38
- )
34
+ columns = {
35
+ "Dataflow Id": "string",
36
+ "Dataflow Name": "string",
37
+ "Configured By": "string",
38
+ "Users": "string",
39
+ "Generation": "int",
40
+ }
41
+ df = _create_dataframe(columns=columns)
42
+
43
+ response = _base_api(request=f"/v1.0/myorg/groups/{workspace_id}/dataflows")
39
44
 
40
45
  data = [] # Collect rows here
41
46
 
@@ -51,7 +56,8 @@ def list_dataflows(workspace: Optional[str | UUID] = None):
51
56
 
52
57
  if data:
53
58
  df = pd.DataFrame(data)
54
- df["Generation"] = df["Generation"].astype(int)
59
+
60
+ _update_dataframe_datatypes(dataframe=df, column_map=columns)
55
61
 
56
62
  return df
57
63
 
@@ -85,16 +91,14 @@ def assign_workspace_to_dataflow_storage(
85
91
  )
86
92
 
87
93
  dataflow_storage_id = df_filt["Dataflow Storage Account ID"].iloc[0]
88
- client = fabric.PowerBIRestClient()
89
-
90
- request_body = {"dataflowStorageId": dataflow_storage_id}
94
+ payload = {"dataflowStorageId": dataflow_storage_id}
91
95
 
92
- response = client.post(
93
- f"/v1.0/myorg/groups/{workspace_id}/AssignToDataflowStorage", json=request_body
96
+ _base_api(
97
+ request=f"/v1.0/myorg/groups/{workspace_id}/AssignToDataflowStorage",
98
+ method="post",
99
+ payload=payload,
94
100
  )
95
101
 
96
- if response.status_code != 200:
97
- raise FabricHTTPException(response)
98
102
  print(
99
103
  f"{icons.green_dot} The '{dataflow_storage_account}' dataflow storage account has been assigned to the '{workspace_name}' workspacce."
100
104
  )
@@ -112,17 +116,14 @@ def list_dataflow_storage_accounts() -> pd.DataFrame:
112
116
  A pandas dataframe showing the accessible dataflow storage accounts.
113
117
  """
114
118
 
115
- df = pd.DataFrame(
116
- columns=[
117
- "Dataflow Storage Account ID",
118
- "Dataflow Storage Account Name",
119
- "Enabled",
120
- ]
121
- )
122
- client = fabric.PowerBIRestClient()
123
- response = client.get("/v1.0/myorg/dataflowStorageAccounts")
124
- if response.status_code != 200:
125
- raise FabricHTTPException(response)
119
+ columns = {
120
+ "Dataflow Storage Account ID": "string",
121
+ "Dataflow Storage Account Name": "string",
122
+ "Enabled": "bool",
123
+ }
124
+ df = _create_dataframe(columns=columns)
125
+
126
+ response = _base_api(request="/v1.0/myorg/dataflowStorageAccounts")
126
127
 
127
128
  for v in response.json().get("value", []):
128
129
  new_data = {
@@ -132,7 +133,7 @@ def list_dataflow_storage_accounts() -> pd.DataFrame:
132
133
  }
133
134
  df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)
134
135
 
135
- df["Enabled"] = df["Enabled"].astype(bool)
136
+ _update_dataframe_datatypes(dataframe=df, column_map=columns)
136
137
 
137
138
  return df
138
139
 
@@ -164,29 +165,23 @@ def list_upstream_dataflows(
164
165
  (dataflow_name, dataflow_id) = _resolve_dataflow_name_and_id(
165
166
  dataflow=dataflow, workspace=workspace_id
166
167
  )
167
- client = fabric.PowerBIRestClient()
168
-
169
- df = pd.DataFrame(
170
- columns=[
171
- "Dataflow Name",
172
- "Dataflow Id",
173
- "Workspace Name",
174
- "Workspace Id",
175
- "Upstream Dataflow Name",
176
- "Upstream Dataflow Id",
177
- "Upstream Workspace Name",
178
- "Upstream Workspace Id",
179
- ]
180
- )
181
168
 
182
- def collect_upstreams(
183
- client, dataflow_id, dataflow_name, workspace_id, workspace_name
184
- ):
185
- response = client.get(
186
- f"/v1.0/myorg/groups/{workspace_id}/dataflows/{dataflow_id}/upstreamDataflows"
169
+ columns = {
170
+ "Dataflow Name": "string",
171
+ "Dataflow Id": "string",
172
+ "Workspace Name": "string",
173
+ "Workspace Id": "string",
174
+ "Upstream Dataflow Name": "string",
175
+ "Upstream Dataflow Id": "string",
176
+ "Upstream Workspace Name": "string",
177
+ "Upstream Workspace Id": "string",
178
+ }
179
+ df = _create_dataframe(columns=columns)
180
+
181
+ def collect_upstreams(dataflow_id, dataflow_name, workspace_id, workspace_name):
182
+ response = _base_api(
183
+ request=f"/v1.0/myorg/groups/{workspace_id}/dataflows/{dataflow_id}/upstreamDataflows"
187
184
  )
188
- if response.status_code != 200:
189
- raise FabricHTTPException(response)
190
185
 
191
186
  values = response.json().get("value", [])
192
187
  for v in values:
@@ -209,14 +204,13 @@ def list_upstream_dataflows(
209
204
  }
210
205
 
211
206
  collect_upstreams(
212
- client,
213
207
  tgt_dataflow_id,
214
208
  tgt_dataflow_name,
215
209
  tgt_workspace_id,
216
210
  tgt_workspace_name,
217
211
  )
218
212
 
219
- collect_upstreams(client, dataflow_id, dataflow_name, workspace_id, workspace_name)
213
+ collect_upstreams(dataflow_id, dataflow_name, workspace_id, workspace_name)
220
214
 
221
215
  return df
222
216
 
sempy_labs/_dax.py CHANGED
@@ -4,12 +4,16 @@ from sempy_labs._helper_functions import (
4
4
  resolve_workspace_name_and_id,
5
5
  format_dax_object_name,
6
6
  resolve_dataset_name_and_id,
7
+ _base_api,
8
+ generate_guid,
7
9
  )
8
10
  from sempy_labs._model_dependencies import get_model_calc_dependencies
9
- from typing import Optional, List
11
+ from typing import Optional, List, Tuple
10
12
  from sempy._utils._log import log
11
13
  from uuid import UUID
12
14
  from sempy_labs.directlake._warm_cache import _put_columns_into_memory
15
+ import sempy_labs._icons as icons
16
+ import time
13
17
 
14
18
 
15
19
  @log
@@ -47,15 +51,15 @@ def evaluate_dax_impersonation(
47
51
  (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
48
52
  (dataset_name, dataset_id) = resolve_dataset_name_and_id(dataset, workspace_id)
49
53
 
50
- request_body = {
54
+ payload = {
51
55
  "queries": [{"query": dax_query}],
52
56
  "impersonatedUserName": user_name,
53
57
  }
54
58
 
55
- client = fabric.PowerBIRestClient()
56
- response = client.post(
57
- f"/v1.0/myorg/groups/{workspace_id}/datasets/{dataset_id}/executeQueries",
58
- json=request_body,
59
+ response = _base_api(
60
+ request=f"/v1.0/myorg/groups/{workspace_id}/datasets/{dataset_id}/executeQueries",
61
+ method="post",
62
+ payload=payload,
59
63
  )
60
64
  data = response.json()["results"][0]["tables"]
61
65
  column_names = data[0]["rows"][0].keys()
@@ -100,6 +104,8 @@ def get_dax_query_dependencies(
100
104
  (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
101
105
  (dataset_name, dataset_id) = resolve_dataset_name_and_id(dataset, workspace_id)
102
106
 
107
+ fabric.refresh_tom_cache(workspace=workspace)
108
+
103
109
  if isinstance(dax_string, str):
104
110
  dax_string = [dax_string]
105
111
 
@@ -257,3 +263,219 @@ def get_dax_query_memory_size(
257
263
  )
258
264
 
259
265
  return df["Total Size"].sum()
266
+
267
+
268
+ @log
269
+ def _dax_perf_test(
270
+ dataset: str,
271
+ dax_queries: dict,
272
+ clear_cache_before_run: bool = False,
273
+ refresh_type: Optional[str] = None,
274
+ rest_time: int = 2,
275
+ workspace: Optional[str] = None,
276
+ ) -> Tuple[pd.DataFrame, dict]:
277
+ """
278
+ Runs a performance test on a set of DAX queries.
279
+
280
+ Parameters
281
+ ----------
282
+ dataset : str
283
+ Name of the semantic model.
284
+ dax_queries : dict
285
+ The dax queries to run in a dictionary format. Here is an example:
286
+ {
287
+ "Sales Amount Test", """ """ EVALUATE SUMMARIZECOLUMNS("Sales Amount", [Sales Amount]) """ """,
288
+ "Order Quantity with Product", """ """ EVALUATE SUMMARIZECOLUMNS('Product'[Color], "Order Qty", [Order Qty]) """ """,
289
+ }
290
+ clear_cache_before_run : bool, default=False
291
+ refresh_type : str, default=None
292
+ rest_time : int, default=2
293
+ Rest time (in seconds) between the execution of each DAX query.
294
+ workspace : str, default=None
295
+ The Fabric workspace name.
296
+ Defaults to None which resolves to the workspace of the attached lakehouse
297
+ or if no lakehouse attached, resolves to the workspace of the notebook.
298
+
299
+ Returns
300
+ -------
301
+ Tuple[pandas.DataFrame, dict]
302
+ A pandas dataframe showing the SQL profiler trace results of the DAX queries.
303
+ A dictionary of the query results in pandas dataframes.
304
+ """
305
+ from sempy_labs._refresh_semantic_model import refresh_semantic_model
306
+ from sempy_labs._clear_cache import clear_cache
307
+
308
+ event_schema = {
309
+ "QueryBegin": [
310
+ "EventClass",
311
+ "EventSubclass",
312
+ "CurrentTime",
313
+ "NTUserName",
314
+ "TextData",
315
+ "StartTime",
316
+ "ApplicationName",
317
+ ],
318
+ "QueryEnd": [
319
+ "EventClass",
320
+ "EventSubclass",
321
+ "CurrentTime",
322
+ "NTUserName",
323
+ "TextData",
324
+ "StartTime",
325
+ "EndTime",
326
+ "Duration",
327
+ "CpuTime",
328
+ "Success",
329
+ "ApplicationName",
330
+ ],
331
+ "VertiPaqSEQueryBegin": [
332
+ "EventClass",
333
+ "EventSubclass",
334
+ "CurrentTime",
335
+ "NTUserName",
336
+ "TextData",
337
+ "StartTime",
338
+ ],
339
+ "VertiPaqSEQueryEnd": [
340
+ "EventClass",
341
+ "EventSubclass",
342
+ "CurrentTime",
343
+ "NTUserName",
344
+ "TextData",
345
+ "StartTime",
346
+ "EndTime",
347
+ "Duration",
348
+ "CpuTime",
349
+ "Success",
350
+ ],
351
+ "VertiPaqSEQueryCacheMatch": [
352
+ "EventClass",
353
+ "EventSubclass",
354
+ "CurrentTime",
355
+ "NTUserName",
356
+ "TextData",
357
+ ],
358
+ }
359
+
360
+ # Add Execution Metrics
361
+ event_schema["ExecutionMetrics"] = ["EventClass", "ApplicationName", "TextData"]
362
+ # Add DAX Query Plan
363
+ # event_schema["DAXQueryPlan"] = ["EventClass", "EventSubclass", "CurrentTime", "StartTime", "EndTime", "Duration", "CpuTime", "ApplicationName", "TextData"]
364
+
365
+ query_results = {}
366
+
367
+ # Establish trace connection
368
+ with fabric.create_trace_connection(
369
+ dataset=dataset, workspace=workspace
370
+ ) as trace_connection:
371
+ with trace_connection.create_trace(event_schema) as trace:
372
+ trace.start()
373
+ print(f"{icons.in_progress} Starting performance testing...")
374
+ # Loop through DAX queries
375
+ for name, dax in dax_queries.items():
376
+
377
+ if clear_cache_before_run:
378
+ clear_cache(dataset=dataset, workspace=workspace)
379
+ if refresh_type is not None:
380
+ refresh_semantic_model(
381
+ dataset=dataset, workspace=workspace, refresh_type=refresh_type
382
+ )
383
+
384
+ # EVALUATE {1} is used to initate a warm cache
385
+ fabric.evaluate_dax(
386
+ dataset=dataset, workspace=workspace, dax_string="""EVALUATE {1}"""
387
+ )
388
+ # Run DAX Query
389
+ result = fabric.evaluate_dax(
390
+ dataset=dataset, workspace=workspace, dax_string=dax
391
+ )
392
+
393
+ # Add results to output
394
+ query_results[name] = result
395
+
396
+ time.sleep(rest_time)
397
+ print(f"{icons.green_dot} The '{name}' query has completed.")
398
+
399
+ df = trace.stop()
400
+ # Allow time to collect trace results
401
+ time.sleep(5)
402
+
403
+ # Step 1: Filter out unnecessary operations
404
+ query_names = list(dax_queries.keys())
405
+ df = df[
406
+ ~df["Application Name"].isin(["PowerBI", "PowerBIEIM"])
407
+ & (~df["Text Data"].str.startswith("EVALUATE {1}"))
408
+ ]
409
+ query_begin = df["Event Class"] == "QueryBegin"
410
+ temp_column_name = "QueryName_INT"
411
+ df = df.copy()
412
+ df[temp_column_name] = query_begin.cumsum()
413
+ df[temp_column_name] = (
414
+ df[temp_column_name]
415
+ .where(query_begin, None) # Assign None to non-query begin rows
416
+ .ffill() # Forward fill None values
417
+ .astype("Int64") # Use pandas nullable integer type for numeric indices
418
+ )
419
+
420
+ df.loc[df[temp_column_name].notna(), "Query Name"] = (
421
+ df[temp_column_name]
422
+ .dropna()
423
+ .astype(int)
424
+ .map(lambda x: query_names[x - 1])
425
+ )
426
+ df = df[df[temp_column_name] != None]
427
+ df = df.drop(columns=[temp_column_name])
428
+
429
+ query_to_guid = {
430
+ name: generate_guid() for name in df["Query Name"].unique()
431
+ }
432
+ df["Query ID"] = df["Query Name"].map(query_to_guid)
433
+
434
+ df = df.reset_index(drop=True)
435
+
436
+ return df, query_results
437
+
438
+
439
+ def _dax_perf_test_bulk(
440
+ mapping: dict,
441
+ clear_cache_before_run: bool = False,
442
+ refresh_type: Optional[str] = None,
443
+ rest_time: int = 2,
444
+ ):
445
+ """
446
+ mapping is something like this:
447
+
448
+ mapping = {
449
+ "Workspace1": {
450
+ "Dataset1": {
451
+ "Query1": "EVALUATE ...",
452
+ "Query2": "EVALUATE ...",
453
+ },
454
+ "Dataset2": {
455
+ "Query3": "EVALUATE ...",
456
+ "Query4": "EVALUATE ...",
457
+ }
458
+ },
459
+ "Workspace2": {
460
+ "Dataset3": {
461
+ "Query5": "EVALUATE ...",
462
+ "Query6": "EVALUATE ...",
463
+ },
464
+ "Dataset4": {
465
+ "Query7": "EVALUATE ...",
466
+ "Query8": "EVALUATE ...",
467
+ }
468
+ }
469
+ }
470
+ """
471
+
472
+ for workspace, datasets in mapping.items():
473
+ for dataset, queries in datasets.items():
474
+ _dax_perf_test(
475
+ dataset=dataset,
476
+ dax_queries=queries,
477
+ clear_cache_before_run=clear_cache_before_run,
478
+ refresh_type=refresh_type,
479
+ rest_time=rest_time,
480
+ workspace=workspace,
481
+ )