semantic-link-labs 0.7.4__py3-none-any.whl → 0.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of semantic-link-labs might be problematic. Click here for more details.

Files changed (59) hide show
  1. {semantic_link_labs-0.7.4.dist-info → semantic_link_labs-0.8.1.dist-info}/METADATA +43 -7
  2. {semantic_link_labs-0.7.4.dist-info → semantic_link_labs-0.8.1.dist-info}/RECORD +59 -40
  3. {semantic_link_labs-0.7.4.dist-info → semantic_link_labs-0.8.1.dist-info}/WHEEL +1 -1
  4. sempy_labs/__init__.py +116 -58
  5. sempy_labs/_ai.py +0 -2
  6. sempy_labs/_capacities.py +39 -3
  7. sempy_labs/_capacity_migration.py +623 -0
  8. sempy_labs/_clear_cache.py +8 -8
  9. sempy_labs/_connections.py +15 -13
  10. sempy_labs/_data_pipelines.py +118 -0
  11. sempy_labs/_documentation.py +144 -0
  12. sempy_labs/_eventhouses.py +118 -0
  13. sempy_labs/_eventstreams.py +118 -0
  14. sempy_labs/_generate_semantic_model.py +3 -3
  15. sempy_labs/_git.py +23 -24
  16. sempy_labs/_helper_functions.py +140 -47
  17. sempy_labs/_icons.py +40 -0
  18. sempy_labs/_kql_databases.py +134 -0
  19. sempy_labs/_kql_querysets.py +124 -0
  20. sempy_labs/_list_functions.py +218 -421
  21. sempy_labs/_mirrored_warehouses.py +50 -0
  22. sempy_labs/_ml_experiments.py +122 -0
  23. sempy_labs/_ml_models.py +120 -0
  24. sempy_labs/_model_auto_build.py +0 -4
  25. sempy_labs/_model_bpa.py +10 -12
  26. sempy_labs/_model_bpa_bulk.py +8 -7
  27. sempy_labs/_model_dependencies.py +26 -18
  28. sempy_labs/_notebooks.py +5 -16
  29. sempy_labs/_query_scale_out.py +6 -5
  30. sempy_labs/_refresh_semantic_model.py +7 -19
  31. sempy_labs/_spark.py +40 -45
  32. sempy_labs/_sql.py +60 -15
  33. sempy_labs/_vertipaq.py +25 -25
  34. sempy_labs/_warehouses.py +132 -0
  35. sempy_labs/_workspaces.py +0 -3
  36. sempy_labs/admin/__init__.py +53 -0
  37. sempy_labs/admin/_basic_functions.py +888 -0
  38. sempy_labs/admin/_domains.py +411 -0
  39. sempy_labs/directlake/_directlake_schema_sync.py +1 -1
  40. sempy_labs/directlake/_dl_helper.py +32 -16
  41. sempy_labs/directlake/_generate_shared_expression.py +11 -14
  42. sempy_labs/directlake/_guardrails.py +7 -7
  43. sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py +14 -24
  44. sempy_labs/directlake/_update_directlake_partition_entity.py +1 -1
  45. sempy_labs/directlake/_warm_cache.py +1 -1
  46. sempy_labs/lakehouse/_get_lakehouse_tables.py +3 -3
  47. sempy_labs/lakehouse/_lakehouse.py +3 -2
  48. sempy_labs/migration/_migrate_calctables_to_lakehouse.py +5 -0
  49. sempy_labs/report/__init__.py +9 -6
  50. sempy_labs/report/_generate_report.py +1 -1
  51. sempy_labs/report/_report_bpa.py +369 -0
  52. sempy_labs/report/_report_bpa_rules.py +113 -0
  53. sempy_labs/report/_report_helper.py +254 -0
  54. sempy_labs/report/_report_list_functions.py +95 -0
  55. sempy_labs/report/_report_rebind.py +0 -4
  56. sempy_labs/report/_reportwrapper.py +2037 -0
  57. sempy_labs/tom/_model.py +333 -22
  58. {semantic_link_labs-0.7.4.dist-info → semantic_link_labs-0.8.1.dist-info}/LICENSE +0 -0
  59. {semantic_link_labs-0.7.4.dist-info → semantic_link_labs-0.8.1.dist-info}/top_level.txt +0 -0
sempy_labs/_spark.py CHANGED
@@ -91,9 +91,9 @@ def create_custom_pool(
91
91
  max_node_count: int,
92
92
  min_executors: int,
93
93
  max_executors: int,
94
- node_family: Optional[str] = "MemoryOptimized",
95
- auto_scale_enabled: Optional[bool] = True,
96
- dynamic_executor_allocation_enabled: Optional[bool] = True,
94
+ node_family: str = "MemoryOptimized",
95
+ auto_scale_enabled: bool = True,
96
+ dynamic_executor_allocation_enabled: bool = True,
97
97
  workspace: Optional[str] = None,
98
98
  ):
99
99
  """
@@ -108,11 +108,11 @@ def create_custom_pool(
108
108
  min_node_count : int
109
109
  The `minimum node count <https://learn.microsoft.com/rest/api/fabric/spark/custom-pools/create-workspace-custom-pool?tabs=HTTP#autoscaleproperties>`_.
110
110
  max_node_count : int
111
- The `maximum node count <https://learn.microsoft.com/en-us/rest/api/fabric/spark/custom-pools/create-workspace-custom-pool?tabs=HTTP#autoscaleproperties>`_.
111
+ The maximum node count.
112
112
  min_executors : int
113
113
  The `minimum executors <https://learn.microsoft.com/rest/api/fabric/spark/custom-pools/create-workspace-custom-pool?tabs=HTTP#dynamicexecutorallocationproperties>`_.
114
114
  max_executors : int
115
- The `maximum executors <https://learn.microsoft.com/en-us/rest/api/fabric/spark/custom-pools/create-workspace-custom-pool?tabs=HTTP#dynamicexecutorallocationproperties>`_.
115
+ The maximum executors.
116
116
  node_family : str, default='MemoryOptimized'
117
117
  The `node family <https://learn.microsoft.com/rest/api/fabric/spark/custom-pools/create-workspace-custom-pool?tabs=HTTP#nodefamily>`_.
118
118
  auto_scale_enabled : bool, default=True
@@ -182,13 +182,13 @@ def update_custom_pool(
182
182
  The `minimum node count <https://learn.microsoft.com/rest/api/fabric/spark/custom-pools/create-workspace-custom-pool?tabs=HTTP#autoscaleproperties>`_.
183
183
  Defaults to None which keeps the existing property setting.
184
184
  max_node_count : int, default=None
185
- The `maximum node count <https://learn.microsoft.com/en-us/rest/api/fabric/spark/custom-pools/create-workspace-custom-pool?tabs=HTTP#autoscaleproperties>`_.
185
+ The maximum node count.
186
186
  Defaults to None which keeps the existing property setting.
187
187
  min_executors : int, default=None
188
188
  The `minimum executors <https://learn.microsoft.com/rest/api/fabric/spark/custom-pools/create-workspace-custom-pool?tabs=HTTP#dynamicexecutorallocationproperties>`_.
189
189
  Defaults to None which keeps the existing property setting.
190
190
  max_executors : int, default=None
191
- The `maximum executors <https://learn.microsoft.com/en-us/rest/api/fabric/spark/custom-pools/create-workspace-custom-pool?tabs=HTTP#dynamicexecutorallocationproperties>`_.
191
+ The maximum executors.
192
192
  Defaults to None which keeps the existing property setting.
193
193
  node_family : str, default=None
194
194
  The `node family <https://learn.microsoft.com/rest/api/fabric/spark/custom-pools/create-workspace-custom-pool?tabs=HTTP#nodefamily>`_.
@@ -298,7 +298,9 @@ def delete_custom_pool(pool_name: str, workspace: Optional[str] = None):
298
298
  )
299
299
 
300
300
 
301
- def get_spark_settings(workspace: Optional[str] = None) -> pd.DataFrame:
301
+ def get_spark_settings(
302
+ workspace: Optional[str] = None, return_dataframe: bool = True
303
+ ) -> pd.DataFrame | dict:
302
304
  """
303
305
  Shows the spark settings for a workspace.
304
306
 
@@ -308,10 +310,12 @@ def get_spark_settings(workspace: Optional[str] = None) -> pd.DataFrame:
308
310
  The name of the Fabric workspace.
309
311
  Defaults to None which resolves to the workspace of the attached lakehouse
310
312
  or if no lakehouse attached, resolves to the workspace of the notebook.
313
+ return_dataframe : bool, default=True
314
+ If True, returns a pandas dataframe. If False, returns a json dictionary.
311
315
 
312
316
  Returns
313
317
  -------
314
- pandas.DataFrame
318
+ pandas.DataFrame | dict
315
319
  A pandas dataframe showing the spark settings for a workspace.
316
320
  """
317
321
 
@@ -363,12 +367,15 @@ def get_spark_settings(workspace: Optional[str] = None) -> pd.DataFrame:
363
367
  "High Concurrency Enabled",
364
368
  "Customize Compute Enabled",
365
369
  ]
366
- int_cols = ["Max Node Count", "Max Executors"]
370
+ # int_cols = ["Max Node Count", "Max Executors"]
367
371
 
368
372
  df[bool_cols] = df[bool_cols].astype(bool)
369
- df[int_cols] = df[int_cols].astype(int)
373
+ # df[int_cols] = df[int_cols].astype(int)
370
374
 
371
- return df
375
+ if return_dataframe:
376
+ return df
377
+ else:
378
+ return response.json()
372
379
 
373
380
 
374
381
  def update_spark_settings(
@@ -400,10 +407,10 @@ def update_spark_settings(
400
407
  `Default pool <https://learn.microsoft.com/rest/api/fabric/spark/workspace-settings/update-spark-settings?tabs=HTTP#poolproperties>`_ for workspace.
401
408
  Defaults to None which keeps the existing property setting.
402
409
  max_node_count : int, default=None
403
- The `maximum node count <https://learn.microsoft.com/en-us/rest/api/fabric/spark/workspace-settings/update-spark-settings?tabs=HTTP#starterpoolproperties>`_.
410
+ The maximum node count.
404
411
  Defaults to None which keeps the existing property setting.
405
412
  max_executors : int, default=None
406
- The `maximum executors <https://learn.microsoft.com/rest/api/fabric/spark/workspace-settings/update-spark-settings?tabs=HTTP#starterpoolproperties>`_.
413
+ The maximum executors.
407
414
  Defaults to None which keeps the existing property setting.
408
415
  environment_name : str, default=None
409
416
  The name of the `default environment <https://learn.microsoft.com/rest/api/fabric/spark/workspace-settings/update-spark-settings?tabs=HTTP#environmentproperties>`_. Empty string indicated there is no workspace default environment
@@ -420,38 +427,26 @@ def update_spark_settings(
420
427
  # https://learn.microsoft.com/en-us/rest/api/fabric/spark/workspace-settings/update-spark-settings?tabs=HTTP
421
428
  (workspace, workspace_id) = resolve_workspace_name_and_id(workspace)
422
429
 
423
- dfS = get_spark_settings(workspace=workspace)
424
-
425
- if automatic_log_enabled is None:
426
- automatic_log_enabled = bool(dfS["Automatic Log Enabled"].iloc[0])
427
- if high_concurrency_enabled is None:
428
- high_concurrency_enabled = bool(dfS["High Concurrency Enabled"].iloc[0])
429
- if customize_compute_enabled is None:
430
- customize_compute_enabled = bool(dfS["Customize Compute Enabled"].iloc[0])
431
- if default_pool_name is None:
432
- default_pool_name = dfS["Default Pool Name"].iloc[0]
433
- if max_node_count is None:
434
- max_node_count = int(dfS["Max Node Count"].iloc[0])
435
- if max_executors is None:
436
- max_executors = int(dfS["Max Executors"].iloc[0])
437
- if environment_name is None:
438
- environment_name = dfS["Environment Name"].iloc[0]
439
- if runtime_version is None:
440
- runtime_version = dfS["Runtime Version"].iloc[0]
430
+ request_body = get_spark_settings(workspace=workspace, return_dataframe=False)
441
431
 
442
- request_body = {
443
- "automaticLog": {"enabled": automatic_log_enabled},
444
- "highConcurrency": {"notebookInteractiveRunEnabled": high_concurrency_enabled},
445
- "pool": {
446
- "customizeComputeEnabled": customize_compute_enabled,
447
- "defaultPool": {"name": default_pool_name, "type": "Workspace"},
448
- "starterPool": {
449
- "maxNodeCount": max_node_count,
450
- "maxExecutors": max_executors,
451
- },
452
- },
453
- "environment": {"name": environment_name, "runtimeVersion": runtime_version},
454
- }
432
+ if automatic_log_enabled is not None:
433
+ request_body["automaticLog"]["enabled"] = automatic_log_enabled
434
+ if high_concurrency_enabled is not None:
435
+ request_body["highConcurrency"][
436
+ "notebookInteractiveRunEnabled"
437
+ ] = high_concurrency_enabled
438
+ if customize_compute_enabled is not None:
439
+ request_body["pool"]["customizeComputeEnabled"] = customize_compute_enabled
440
+ if default_pool_name is not None:
441
+ request_body["pool"]["defaultPool"]["name"] = default_pool_name
442
+ if max_node_count is not None:
443
+ request_body["pool"]["starterPool"]["maxNodeCount"] = max_node_count
444
+ if max_executors is not None:
445
+ request_body["pool"]["starterPool"]["maxExecutors"] = max_executors
446
+ if environment_name is not None:
447
+ request_body["environment"]["name"] = environment_name
448
+ if runtime_version is not None:
449
+ request_body["environment"]["runtimeVersion"] = runtime_version
455
450
 
456
451
  client = fabric.FabricRestClient()
457
452
  response = client.patch(
sempy_labs/_sql.py CHANGED
@@ -6,7 +6,7 @@ import struct
6
6
  import uuid
7
7
  from itertools import chain, repeat
8
8
  from sempy.fabric.exceptions import FabricHTTPException
9
- from sempy_labs._helper_functions import resolve_warehouse_id
9
+ from sempy_labs._helper_functions import resolve_warehouse_id, resolve_lakehouse_id
10
10
 
11
11
 
12
12
  def bytes2mswin_bstr(value: bytes) -> bytes:
@@ -28,30 +28,48 @@ def bytes2mswin_bstr(value: bytes) -> bytes:
28
28
  return struct.pack("<i", len(encoded_bytes)) + encoded_bytes
29
29
 
30
30
 
31
- class ConnectWarehouse:
31
+ class ConnectBase:
32
32
  def __init__(
33
33
  self,
34
- warehouse: str,
34
+ name: str,
35
35
  workspace: Optional[Union[str, uuid.UUID]] = None,
36
36
  timeout: Optional[int] = None,
37
+ endpoint_type: str = "warehouse",
37
38
  ):
38
39
  from sempy.fabric._token_provider import SynapseTokenProvider
39
40
  import pyodbc
40
41
 
41
42
  workspace = fabric.resolve_workspace_name(workspace)
42
43
  workspace_id = fabric.resolve_workspace_id(workspace)
43
- warehouse_id = resolve_warehouse_id(warehouse=warehouse, workspace=workspace)
44
44
 
45
- # get the TDS endpoint
45
+ # Resolve the appropriate ID (warehouse or lakehouse)
46
+ if endpoint_type == "warehouse":
47
+ resource_id = resolve_warehouse_id(warehouse=name, workspace=workspace)
48
+ else:
49
+ resource_id = resolve_lakehouse_id(lakehouse=name, workspace=workspace)
50
+
51
+ # Get the TDS endpoint
46
52
  client = fabric.FabricRestClient()
47
- response = client.get(f"v1/workspaces/{workspace_id}/warehouses/{warehouse_id}")
53
+ response = client.get(
54
+ f"v1/workspaces/{workspace_id}/{endpoint_type}s/{resource_id}"
55
+ )
48
56
  if response.status_code != 200:
49
57
  raise FabricHTTPException(response)
50
- tds_endpoint = response.json().get("properties", {}).get("connectionString")
51
58
 
59
+ if endpoint_type == "warehouse":
60
+ tds_endpoint = response.json().get("properties", {}).get("connectionString")
61
+ else:
62
+ tds_endpoint = (
63
+ response.json()
64
+ .get("properties", {})
65
+ .get("sqlEndpointProperties", {})
66
+ .get("connectionString")
67
+ )
68
+
69
+ # Set up the connection string
52
70
  access_token = SynapseTokenProvider()()
53
71
  tokenstruct = bytes2mswin_bstr(access_token.encode())
54
- conn_str = f"DRIVER={{ODBC Driver 18 for SQL Server}};SERVER={tds_endpoint};DATABASE={warehouse};Encrypt=Yes;"
72
+ conn_str = f"DRIVER={{ODBC Driver 18 for SQL Server}};SERVER={tds_endpoint};DATABASE={name};Encrypt=Yes;"
55
73
 
56
74
  if timeout is not None:
57
75
  conn_str += f"Connect Timeout={timeout};"
@@ -63,7 +81,7 @@ class ConnectWarehouse:
63
81
  self, sql: Union[str, List[str]]
64
82
  ) -> Union[List[pd.DataFrame], pd.DataFrame, None]:
65
83
  """
66
- Runs a SQL or T-SQL query (or multiple queries) against a Fabric Warehouse.
84
+ Runs a SQL or T-SQL query (or multiple queries) against a Fabric Warehouse/Lakehouse.
67
85
 
68
86
  Parameters
69
87
  ----------
@@ -76,10 +94,10 @@ class ConnectWarehouse:
76
94
  A list of pandas DataFrames if multiple SQL queries return results,
77
95
  a single DataFrame if one query is executed and returns results, or None.
78
96
  """
97
+
79
98
  cursor = None
80
- results = [] # To store results from multiple queries if needed
99
+ results = []
81
100
 
82
- # If the input is a single string, convert it to a list for consistency
83
101
  if isinstance(sql, str):
84
102
  sql = [sql]
85
103
 
@@ -101,10 +119,7 @@ class ConnectWarehouse:
101
119
  results.append(result)
102
120
 
103
121
  # Return results if any queries returned a result set
104
- if results:
105
- return results if len(results) > 1 else results[0]
106
- else:
107
- return None
122
+ return results if len(results) > 1 else (results[0] if results else None)
108
123
 
109
124
  finally:
110
125
  if cursor:
@@ -118,3 +133,33 @@ class ConnectWarehouse:
118
133
 
119
134
  def close(self):
120
135
  self.connection.close()
136
+
137
+
138
+ class ConnectWarehouse(ConnectBase):
139
+ def __init__(
140
+ self,
141
+ warehouse: str,
142
+ workspace: Optional[Union[str, uuid.UUID]] = None,
143
+ timeout: Optional[int] = None,
144
+ ):
145
+ super().__init__(
146
+ name=warehouse,
147
+ workspace=workspace,
148
+ timeout=timeout,
149
+ endpoint_type="warehouse",
150
+ )
151
+
152
+
153
+ class ConnectLakehouse(ConnectBase):
154
+ def __init__(
155
+ self,
156
+ lakehouse: str,
157
+ workspace: Optional[Union[str, uuid.UUID]] = None,
158
+ timeout: Optional[int] = None,
159
+ ):
160
+ super().__init__(
161
+ name=lakehouse,
162
+ workspace=workspace,
163
+ timeout=timeout,
164
+ endpoint_type="lakehouse",
165
+ )
sempy_labs/_vertipaq.py CHANGED
@@ -13,6 +13,7 @@ from sempy_labs._helper_functions import (
13
13
  resolve_dataset_id,
14
14
  save_as_delta_table,
15
15
  resolve_workspace_capacity,
16
+ get_max_run_id,
16
17
  )
17
18
  from sempy_labs._list_functions import list_relationships, list_tables
18
19
  from sempy_labs.lakehouse import lakehouse_attached, get_lakehouse_tables
@@ -27,7 +28,7 @@ def vertipaq_analyzer(
27
28
  dataset: str,
28
29
  workspace: Optional[str] = None,
29
30
  export: Optional[str] = None,
30
- read_stats_from_data: Optional[bool] = False,
31
+ read_stats_from_data: bool = False,
31
32
  **kwargs,
32
33
  ):
33
34
  """
@@ -336,10 +337,10 @@ def vertipaq_analyzer(
336
337
  int_cols.append(k)
337
338
  elif v in ["float", "double"] and k != "Temperature":
338
339
  pct_cols.append(k)
339
- colSize[int_cols] = colSize[int_cols].applymap("{:,}".format)
340
- temp[int_cols] = temp[int_cols].applymap("{:,}".format)
341
- colSize[pct_cols] = colSize[pct_cols].applymap("{:.2f}%".format)
342
- temp[pct_cols] = temp[pct_cols].applymap("{:.2f}%".format)
340
+ colSize[int_cols] = colSize[int_cols].map("{:,}".format)
341
+ temp[int_cols] = temp[int_cols].map("{:,}".format)
342
+ colSize[pct_cols] = colSize[pct_cols].map("{:.2f}%".format)
343
+ temp[pct_cols] = temp[pct_cols].map("{:.2f}%".format)
343
344
 
344
345
  # Tables
345
346
  int_cols = []
@@ -351,8 +352,8 @@ def vertipaq_analyzer(
351
352
  pct_cols.append(k)
352
353
  export_Table = dfT.copy()
353
354
 
354
- dfT[int_cols] = dfT[int_cols].applymap("{:,}".format)
355
- dfT[pct_cols] = dfT[pct_cols].applymap("{:.2f}%".format)
355
+ dfT[int_cols] = dfT[int_cols].map("{:,}".format)
356
+ dfT[pct_cols] = dfT[pct_cols].map("{:.2f}%".format)
356
357
 
357
358
  # Relationships
358
359
  dfR = pd.merge(
@@ -391,7 +392,7 @@ def vertipaq_analyzer(
391
392
  int_cols.append(k)
392
393
  if not read_stats_from_data:
393
394
  int_cols.remove("Missing Rows")
394
- dfR[int_cols] = dfR[int_cols].applymap("{:,}".format)
395
+ dfR[int_cols] = dfR[int_cols].map("{:,}".format)
395
396
 
396
397
  # Partitions
397
398
  dfP = dfP[
@@ -414,7 +415,7 @@ def vertipaq_analyzer(
414
415
  if v in ["int", "long", "double", "float"]:
415
416
  int_cols.append(k)
416
417
  intList = ["Record Count", "Segment Count", "Records per Segment"]
417
- dfP[intList] = dfP[intList].applymap("{:,}".format)
418
+ dfP[intList] = dfP[intList].map("{:,}".format)
418
419
 
419
420
  # Hierarchies
420
421
  dfH_filt = dfH[dfH["Level Ordinal"] == 0]
@@ -426,7 +427,7 @@ def vertipaq_analyzer(
426
427
  dfH_filt["Used Size"] = dfH_filt["Used Size"].astype(int)
427
428
  export_Hier = dfH_filt.copy()
428
429
  intList = ["Used Size"]
429
- dfH_filt[intList] = dfH_filt[intList].applymap("{:,}".format)
430
+ dfH_filt[intList] = dfH_filt[intList].map("{:,}".format)
430
431
 
431
432
  # Model
432
433
  # Converting to KB/MB/GB necessitates division by 1024 * 1000.
@@ -456,7 +457,7 @@ def vertipaq_analyzer(
456
457
  for k, v in vertipaq_map["Model"].items():
457
458
  if v in ["long", "int"] and k != "Compatibility Level":
458
459
  int_cols.append(k)
459
- dfModel[int_cols] = dfModel[int_cols].applymap("{:,}".format)
460
+ dfModel[int_cols] = dfModel[int_cols].map("{:,}".format)
460
461
 
461
462
  dataFrames = {
462
463
  "dfModel": dfModel,
@@ -483,26 +484,23 @@ def vertipaq_analyzer(
483
484
  )
484
485
 
485
486
  if export == "table":
486
- spark = SparkSession.builder.getOrCreate()
487
+ # spark = SparkSession.builder.getOrCreate()
487
488
 
488
489
  lakehouse_id = fabric.get_lakehouse_id()
489
490
  lake_workspace = fabric.resolve_workspace_name()
490
491
  lakehouse = resolve_lakehouse_name(
491
492
  lakehouse_id=lakehouse_id, workspace=lake_workspace
492
493
  )
493
- lakeTName = "vertipaq_analyzer_model"
494
+ lakeTName = "vertipaqanalyzer_model"
494
495
 
495
496
  lakeT = get_lakehouse_tables(lakehouse=lakehouse, workspace=lake_workspace)
496
497
  lakeT_filt = lakeT[lakeT["Table Name"] == lakeTName]
497
498
 
498
- query = f"SELECT MAX(RunId) FROM {lakehouse}.{lakeTName}"
499
-
500
499
  if len(lakeT_filt) == 0:
501
500
  runId = 1
502
501
  else:
503
- dfSpark = spark.sql(query)
504
- maxRunId = dfSpark.collect()[0][0]
505
- runId = maxRunId + 1
502
+ max_run_id = get_max_run_id(table_name=lakeTName)
503
+ runId = max_run_id + 1
506
504
 
507
505
  dfMap = {
508
506
  "Columns": ["Columns", export_Col],
@@ -551,13 +549,13 @@ def vertipaq_analyzer(
551
549
  df.columns = df.columns.str.replace(" ", "_")
552
550
 
553
551
  schema = {
554
- "Capacity_Name": "string",
555
- "Capacity_Id": "string",
556
- "Workspace_Name": "string",
557
- "Workspace_Id": "string",
558
- "Dataset_Name": "string",
559
- "Dataset_Id": "string",
560
- "Configured_By": "string",
552
+ "Capacity_Name": data_type_string,
553
+ "Capacity_Id": data_type_string,
554
+ "Workspace_Name": data_type_string,
555
+ "Workspace_Id": data_type_string,
556
+ "Dataset_Name": data_type_string,
557
+ "Dataset_Id": data_type_string,
558
+ "Configured_By": data_type_string,
561
559
  }
562
560
 
563
561
  schema.update(
@@ -566,6 +564,8 @@ def vertipaq_analyzer(
566
564
  for key, value in vertipaq_map[key_name].items()
567
565
  }
568
566
  )
567
+ schema["RunId"] = data_type_long
568
+ schema["Timestamp"] = data_type_timestamp
569
569
 
570
570
  delta_table_name = f"VertipaqAnalyzer_{obj}".lower()
571
571
  save_as_delta_table(
@@ -0,0 +1,132 @@
1
+ import sempy.fabric as fabric
2
+ from sempy_labs._helper_functions import (
3
+ resolve_workspace_name_and_id,
4
+ pagination,
5
+ lro,
6
+ )
7
+ import pandas as pd
8
+ from typing import Optional
9
+ import sempy_labs._icons as icons
10
+ from sempy.fabric.exceptions import FabricHTTPException
11
+
12
+
13
+ def create_warehouse(
14
+ warehouse: str, description: Optional[str] = None, workspace: Optional[str] = None
15
+ ):
16
+ """
17
+ Creates a Fabric warehouse.
18
+
19
+ Parameters
20
+ ----------
21
+ warehouse: str
22
+ Name of the warehouse.
23
+ description : str, default=None
24
+ A description of the warehouse.
25
+ workspace : str, default=None
26
+ The Fabric workspace name.
27
+ Defaults to None which resolves to the workspace of the attached lakehouse
28
+ or if no lakehouse attached, resolves to the workspace of the notebook.
29
+ """
30
+
31
+ (workspace, workspace_id) = resolve_workspace_name_and_id(workspace)
32
+
33
+ request_body = {"displayName": warehouse}
34
+
35
+ if description:
36
+ request_body["description"] = description
37
+
38
+ client = fabric.FabricRestClient()
39
+ response = client.post(
40
+ f"/v1/workspaces/{workspace_id}/warehouses/", json=request_body
41
+ )
42
+
43
+ lro(client, response, status_codes=[201, 202])
44
+
45
+ print(
46
+ f"{icons.green_dot} The '{warehouse}' warehouse has been created within the '{workspace}' workspace."
47
+ )
48
+
49
+
50
+ def list_warehouses(workspace: Optional[str] = None) -> pd.DataFrame:
51
+ """
52
+ Shows the warehouses within a workspace.
53
+
54
+ Parameters
55
+ ----------
56
+ workspace : str, default=None
57
+ The Fabric workspace name.
58
+ Defaults to None which resolves to the workspace of the attached lakehouse
59
+ or if no lakehouse attached, resolves to the workspace of the notebook.
60
+
61
+ Returns
62
+ -------
63
+ pandas.DataFrame
64
+ A pandas dataframe showing the warehouses within a workspace.
65
+ """
66
+
67
+ df = pd.DataFrame(
68
+ columns=[
69
+ "Warehouse Name",
70
+ "Warehouse Id",
71
+ "Description",
72
+ "Connection Info",
73
+ "Created Date",
74
+ "Last Updated Time",
75
+ ]
76
+ )
77
+
78
+ (workspace, workspace_id) = resolve_workspace_name_and_id(workspace)
79
+
80
+ client = fabric.FabricRestClient()
81
+ response = client.get(f"/v1/workspaces/{workspace_id}/warehouses")
82
+ if response.status_code != 200:
83
+ raise FabricHTTPException(response)
84
+
85
+ responses = pagination(client, response)
86
+
87
+ for r in responses:
88
+ for v in r.get("value", []):
89
+ prop = v.get("properties", {})
90
+
91
+ new_data = {
92
+ "Warehouse Name": v.get("displayName"),
93
+ "Warehouse Id": v.get("id"),
94
+ "Description": v.get("description"),
95
+ "Connection Info": prop.get("connectionInfo"),
96
+ "Created Date": prop.get("createdDate"),
97
+ "Last Updated Time": prop.get("lastUpdatedTime"),
98
+ }
99
+ df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)
100
+
101
+ return df
102
+
103
+
104
+ def delete_warehouse(name: str, workspace: Optional[str] = None):
105
+ """
106
+ Deletes a Fabric warehouse.
107
+
108
+ Parameters
109
+ ----------
110
+ name: str
111
+ Name of the warehouse.
112
+ workspace : str, default=None
113
+ The Fabric workspace name.
114
+ Defaults to None which resolves to the workspace of the attached lakehouse
115
+ or if no lakehouse attached, resolves to the workspace of the notebook.
116
+ """
117
+
118
+ (workspace, workspace_id) = resolve_workspace_name_and_id(workspace)
119
+
120
+ item_id = fabric.resolve_item_id(
121
+ item_name=name, type="Warehouse", workspace=workspace
122
+ )
123
+
124
+ client = fabric.FabricRestClient()
125
+ response = client.delete(f"/v1/workspaces/{workspace_id}/warehouses/{item_id}")
126
+
127
+ if response.status_code != 200:
128
+ raise FabricHTTPException(response)
129
+
130
+ print(
131
+ f"{icons.green_dot} The '{name}' warehouse within the '{workspace}' workspace has been deleted."
132
+ )
sempy_labs/_workspaces.py CHANGED
@@ -22,9 +22,6 @@ def delete_user_from_workspace(email_address: str, workspace: Optional[str] = No
22
22
  The name of the workspace.
23
23
  Defaults to None which resolves to the workspace of the attached lakehouse
24
24
  or if no lakehouse attached, resolves to the workspace of the notebook.
25
-
26
- Returns
27
- -------
28
25
  """
29
26
 
30
27
  (workspace, workspace_id) = resolve_workspace_name_and_id(workspace)
@@ -0,0 +1,53 @@
1
+ from sempy_labs.admin._basic_functions import (
2
+ assign_workspaces_to_capacity,
3
+ list_capacities,
4
+ list_tenant_settings,
5
+ list_capacities_delegated_tenant_settings,
6
+ unassign_workspaces_from_capacity,
7
+ list_external_data_shares,
8
+ revoke_external_data_share,
9
+ list_workspaces,
10
+ list_datasets,
11
+ list_item_access_details,
12
+ list_access_entities,
13
+ list_workspace_access_details,
14
+ list_items,
15
+ )
16
+ from sempy_labs.admin._domains import (
17
+ list_domains,
18
+ list_domain_workspaces,
19
+ assign_domain_workspaces,
20
+ assign_domain_workspaces_by_capacities,
21
+ create_domain,
22
+ update_domain,
23
+ delete_domain,
24
+ resolve_domain_id,
25
+ unassign_domain_workspaces,
26
+ unassign_all_domain_workspaces,
27
+ )
28
+
29
+ __all__ = [
30
+ "list_items",
31
+ "list_workspace_access_details",
32
+ "list_access_entities",
33
+ "list_item_access_details",
34
+ "list_datasets",
35
+ "list_workspaces",
36
+ "assign_workspaces_to_capacity",
37
+ "list_capacities",
38
+ "list_tenant_settings",
39
+ "list_domains",
40
+ "list_domain_workspaces",
41
+ "assign_domain_workspaces",
42
+ "assign_domain_workspaces_by_capacities",
43
+ "create_domain",
44
+ "update_domain",
45
+ "delete_domain",
46
+ "resolve_domain_id",
47
+ "unassign_domain_workspaces",
48
+ "unassign_all_domain_workspaces",
49
+ "list_capacities_delegated_tenant_settings",
50
+ "unassign_workspaces_from_capacity",
51
+ "list_external_data_shares",
52
+ "revoke_external_data_share",
53
+ ]