semantic-link-labs 0.7.2__py3-none-any.whl → 0.7.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of semantic-link-labs might be problematic. Click here for more details.

Files changed (82) hide show
  1. {semantic_link_labs-0.7.2.dist-info → semantic_link_labs-0.7.4.dist-info}/METADATA +15 -3
  2. semantic_link_labs-0.7.4.dist-info/RECORD +134 -0
  3. {semantic_link_labs-0.7.2.dist-info → semantic_link_labs-0.7.4.dist-info}/WHEEL +1 -1
  4. sempy_labs/__init__.py +120 -24
  5. sempy_labs/_bpa_translation/{_translations_am-ET.po → _model/_translations_am-ET.po} +22 -0
  6. sempy_labs/_bpa_translation/{_translations_ar-AE.po → _model/_translations_ar-AE.po} +24 -0
  7. sempy_labs/_bpa_translation/_model/_translations_bg-BG.po +938 -0
  8. sempy_labs/_bpa_translation/_model/_translations_ca-ES.po +934 -0
  9. sempy_labs/_bpa_translation/{_translations_cs-CZ.po → _model/_translations_cs-CZ.po} +179 -157
  10. sempy_labs/_bpa_translation/{_translations_da-DK.po → _model/_translations_da-DK.po} +24 -0
  11. sempy_labs/_bpa_translation/{_translations_de-DE.po → _model/_translations_de-DE.po} +77 -52
  12. sempy_labs/_bpa_translation/{_translations_el-GR.po → _model/_translations_el-GR.po} +25 -0
  13. sempy_labs/_bpa_translation/{_translations_es-ES.po → _model/_translations_es-ES.po} +67 -43
  14. sempy_labs/_bpa_translation/{_translations_fa-IR.po → _model/_translations_fa-IR.po} +24 -0
  15. sempy_labs/_bpa_translation/_model/_translations_fi-FI.po +915 -0
  16. sempy_labs/_bpa_translation/{_translations_fr-FR.po → _model/_translations_fr-FR.po} +83 -57
  17. sempy_labs/_bpa_translation/{_translations_ga-IE.po → _model/_translations_ga-IE.po} +25 -0
  18. sempy_labs/_bpa_translation/{_translations_he-IL.po → _model/_translations_he-IL.po} +23 -0
  19. sempy_labs/_bpa_translation/{_translations_hi-IN.po → _model/_translations_hi-IN.po} +24 -0
  20. sempy_labs/_bpa_translation/{_translations_hu-HU.po → _model/_translations_hu-HU.po} +25 -0
  21. sempy_labs/_bpa_translation/_model/_translations_id-ID.po +918 -0
  22. sempy_labs/_bpa_translation/{_translations_is-IS.po → _model/_translations_is-IS.po} +25 -0
  23. sempy_labs/_bpa_translation/{_translations_it-IT.po → _model/_translations_it-IT.po} +25 -0
  24. sempy_labs/_bpa_translation/{_translations_ja-JP.po → _model/_translations_ja-JP.po} +21 -0
  25. sempy_labs/_bpa_translation/_model/_translations_ko-KR.po +823 -0
  26. sempy_labs/_bpa_translation/_model/_translations_mt-MT.po +937 -0
  27. sempy_labs/_bpa_translation/{_translations_nl-NL.po → _model/_translations_nl-NL.po} +80 -56
  28. sempy_labs/_bpa_translation/{_translations_pl-PL.po → _model/_translations_pl-PL.po} +101 -76
  29. sempy_labs/_bpa_translation/{_translations_pt-BR.po → _model/_translations_pt-BR.po} +25 -0
  30. sempy_labs/_bpa_translation/{_translations_pt-PT.po → _model/_translations_pt-PT.po} +25 -0
  31. sempy_labs/_bpa_translation/_model/_translations_ro-RO.po +939 -0
  32. sempy_labs/_bpa_translation/{_translations_ru-RU.po → _model/_translations_ru-RU.po} +25 -0
  33. sempy_labs/_bpa_translation/_model/_translations_sk-SK.po +925 -0
  34. sempy_labs/_bpa_translation/_model/_translations_sl-SL.po +922 -0
  35. sempy_labs/_bpa_translation/_model/_translations_sv-SE.po +914 -0
  36. sempy_labs/_bpa_translation/{_translations_ta-IN.po → _model/_translations_ta-IN.po} +26 -0
  37. sempy_labs/_bpa_translation/{_translations_te-IN.po → _model/_translations_te-IN.po} +24 -0
  38. sempy_labs/_bpa_translation/{_translations_th-TH.po → _model/_translations_th-TH.po} +24 -0
  39. sempy_labs/_bpa_translation/_model/_translations_tr-TR.po +925 -0
  40. sempy_labs/_bpa_translation/_model/_translations_uk-UA.po +933 -0
  41. sempy_labs/_bpa_translation/{_translations_zh-CN.po → _model/_translations_zh-CN.po} +116 -97
  42. sempy_labs/_bpa_translation/{_translations_zu-ZA.po → _model/_translations_zu-ZA.po} +25 -0
  43. sempy_labs/_capacities.py +541 -0
  44. sempy_labs/_clear_cache.py +298 -3
  45. sempy_labs/_connections.py +138 -0
  46. sempy_labs/_dataflows.py +130 -0
  47. sempy_labs/_deployment_pipelines.py +171 -0
  48. sempy_labs/_environments.py +156 -0
  49. sempy_labs/_generate_semantic_model.py +148 -27
  50. sempy_labs/_git.py +380 -0
  51. sempy_labs/_helper_functions.py +203 -8
  52. sempy_labs/_icons.py +43 -0
  53. sempy_labs/_list_functions.py +170 -1012
  54. sempy_labs/_model_bpa.py +90 -112
  55. sempy_labs/_model_bpa_bulk.py +3 -1
  56. sempy_labs/_model_bpa_rules.py +788 -800
  57. sempy_labs/_notebooks.py +143 -0
  58. sempy_labs/_query_scale_out.py +28 -7
  59. sempy_labs/_spark.py +465 -0
  60. sempy_labs/_sql.py +120 -0
  61. sempy_labs/_translations.py +3 -1
  62. sempy_labs/_vertipaq.py +160 -99
  63. sempy_labs/_workspace_identity.py +66 -0
  64. sempy_labs/_workspaces.py +294 -0
  65. sempy_labs/directlake/__init__.py +2 -0
  66. sempy_labs/directlake/_directlake_schema_compare.py +1 -2
  67. sempy_labs/directlake/_directlake_schema_sync.py +1 -2
  68. sempy_labs/directlake/_dl_helper.py +4 -7
  69. sempy_labs/directlake/_generate_shared_expression.py +85 -0
  70. sempy_labs/directlake/_show_unsupported_directlake_objects.py +1 -2
  71. sempy_labs/lakehouse/_get_lakehouse_tables.py +7 -3
  72. sempy_labs/migration/_migrate_calctables_to_lakehouse.py +5 -0
  73. sempy_labs/migration/_migrate_calctables_to_semantic_model.py +5 -0
  74. sempy_labs/migration/_migrate_model_objects_to_semantic_model.py +6 -2
  75. sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py +6 -5
  76. sempy_labs/migration/_migration_validation.py +6 -0
  77. sempy_labs/report/_report_functions.py +21 -42
  78. sempy_labs/report/_report_rebind.py +5 -0
  79. sempy_labs/tom/_model.py +95 -52
  80. semantic_link_labs-0.7.2.dist-info/RECORD +0 -111
  81. {semantic_link_labs-0.7.2.dist-info → semantic_link_labs-0.7.4.dist-info}/LICENSE +0 -0
  82. {semantic_link_labs-0.7.2.dist-info → semantic_link_labs-0.7.4.dist-info}/top_level.txt +0 -0
sempy_labs/_sql.py ADDED
@@ -0,0 +1,120 @@
1
+ import sempy.fabric as fabric
2
+ import pandas as pd
3
+ from typing import Optional, Union, List
4
+ from sempy._utils._log import log
5
+ import struct
6
+ import uuid
7
+ from itertools import chain, repeat
8
+ from sempy.fabric.exceptions import FabricHTTPException
9
+ from sempy_labs._helper_functions import resolve_warehouse_id
10
+
11
+
12
+ def bytes2mswin_bstr(value: bytes) -> bytes:
13
+ """Convert a sequence of bytes into a (MS-Windows) BSTR (as bytes).
14
+
15
+ See https://github.com/mkleehammer/pyodbc/issues/228#issuecomment-319190980
16
+ for the original code. It appears the input is converted to an
17
+ MS-Windows BSTR (in 'Little-endian' format).
18
+
19
+ See https://docs.microsoft.com/en-us/openspecs/windows_protocols/ms-dtyp\
20
+ /692a42a9-06ce-4394-b9bc-5d2a50440168
21
+ for more info on BSTR.
22
+
23
+ :param value: the sequence of bytes to convert
24
+ :return: the converted value (as a sequence of bytes)
25
+ """
26
+
27
+ encoded_bytes = bytes(chain.from_iterable(zip(value, repeat(0))))
28
+ return struct.pack("<i", len(encoded_bytes)) + encoded_bytes
29
+
30
+
31
+ class ConnectWarehouse:
32
+ def __init__(
33
+ self,
34
+ warehouse: str,
35
+ workspace: Optional[Union[str, uuid.UUID]] = None,
36
+ timeout: Optional[int] = None,
37
+ ):
38
+ from sempy.fabric._token_provider import SynapseTokenProvider
39
+ import pyodbc
40
+
41
+ workspace = fabric.resolve_workspace_name(workspace)
42
+ workspace_id = fabric.resolve_workspace_id(workspace)
43
+ warehouse_id = resolve_warehouse_id(warehouse=warehouse, workspace=workspace)
44
+
45
+ # get the TDS endpoint
46
+ client = fabric.FabricRestClient()
47
+ response = client.get(f"v1/workspaces/{workspace_id}/warehouses/{warehouse_id}")
48
+ if response.status_code != 200:
49
+ raise FabricHTTPException(response)
50
+ tds_endpoint = response.json().get("properties", {}).get("connectionString")
51
+
52
+ access_token = SynapseTokenProvider()()
53
+ tokenstruct = bytes2mswin_bstr(access_token.encode())
54
+ conn_str = f"DRIVER={{ODBC Driver 18 for SQL Server}};SERVER={tds_endpoint};DATABASE={warehouse};Encrypt=Yes;"
55
+
56
+ if timeout is not None:
57
+ conn_str += f"Connect Timeout={timeout};"
58
+
59
+ self.connection = pyodbc.connect(conn_str, attrs_before={1256: tokenstruct})
60
+
61
+ @log
62
+ def query(
63
+ self, sql: Union[str, List[str]]
64
+ ) -> Union[List[pd.DataFrame], pd.DataFrame, None]:
65
+ """
66
+ Runs a SQL or T-SQL query (or multiple queries) against a Fabric Warehouse.
67
+
68
+ Parameters
69
+ ----------
70
+ sql : str or List[str]
71
+ A single SQL or T-SQL query, or a list of queries to be executed.
72
+
73
+ Returns
74
+ -------
75
+ Union[List[pandas.DataFrame], pandas.DataFrame, None]
76
+ A list of pandas DataFrames if multiple SQL queries return results,
77
+ a single DataFrame if one query is executed and returns results, or None.
78
+ """
79
+ cursor = None
80
+ results = [] # To store results from multiple queries if needed
81
+
82
+ # If the input is a single string, convert it to a list for consistency
83
+ if isinstance(sql, str):
84
+ sql = [sql]
85
+
86
+ try:
87
+ cursor = self.connection.cursor()
88
+
89
+ for sql_query in sql:
90
+ cursor.execute(sql_query)
91
+
92
+ # Commit for non-select queries (like CREATE, INSERT, etc.)
93
+ if not cursor.description:
94
+ self.connection.commit()
95
+ else:
96
+ # Fetch and append results for queries that return a result set
97
+ result = pd.DataFrame.from_records(
98
+ cursor.fetchall(),
99
+ columns=[col[0] for col in cursor.description],
100
+ )
101
+ results.append(result)
102
+
103
+ # Return results if any queries returned a result set
104
+ if results:
105
+ return results if len(results) > 1 else results[0]
106
+ else:
107
+ return None
108
+
109
+ finally:
110
+ if cursor:
111
+ cursor.close()
112
+
113
+ def __enter__(self):
114
+ return self
115
+
116
+ def __exit__(self, type, value, traceback):
117
+ self.close()
118
+
119
+ def close(self):
120
+ self.connection.close()
@@ -3,6 +3,7 @@ import pandas as pd
3
3
  from typing import List, Optional, Union
4
4
  from sempy._utils._log import log
5
5
  import sempy_labs._icons as icons
6
+ from sempy_labs._helper_functions import get_language_codes
6
7
 
7
8
 
8
9
  @log
@@ -32,7 +33,6 @@ def translate_semantic_model(
32
33
  -------
33
34
  pandas.DataFrame
34
35
  Shows a pandas dataframe which displays all of the translations in the semantic model.
35
-
36
36
  """
37
37
 
38
38
  from synapse.ml.services import Translate
@@ -49,6 +49,8 @@ def translate_semantic_model(
49
49
  if isinstance(languages, str):
50
50
  languages = [languages]
51
51
 
52
+ languages = get_language_codes(languages)
53
+
52
54
  df_prep = pd.DataFrame(
53
55
  columns=["Object Type", "Name", "Description", "Display Folder"]
54
56
  )
sempy_labs/_vertipaq.py CHANGED
@@ -14,7 +14,7 @@ from sempy_labs._helper_functions import (
14
14
  save_as_delta_table,
15
15
  resolve_workspace_capacity,
16
16
  )
17
- from sempy_labs._list_functions import list_relationships
17
+ from sempy_labs._list_functions import list_relationships, list_tables
18
18
  from sempy_labs.lakehouse import lakehouse_attached, get_lakehouse_tables
19
19
  from sempy_labs.directlake import get_direct_lake_source
20
20
  from typing import Optional
@@ -68,22 +68,93 @@ def vertipaq_analyzer(
68
68
 
69
69
  workspace = fabric.resolve_workspace_name(workspace)
70
70
 
71
- dfT = fabric.list_tables(dataset=dataset, extended=True, workspace=workspace)
71
+ data_type_string = "string"
72
+ data_type_long = "long"
73
+ data_type_timestamp = "timestamp"
74
+ data_type_double = "double"
75
+ data_type_bool = "bool"
76
+
77
+ vertipaq_map = {
78
+ "Model": {
79
+ "Dataset Name": data_type_string,
80
+ "Total Size": data_type_long,
81
+ "Table Count": data_type_long,
82
+ "Column Count": data_type_long,
83
+ "Compatibility Level": data_type_long,
84
+ "Default Mode": data_type_string,
85
+ },
86
+ "Tables": {
87
+ "Table Name": data_type_string,
88
+ "Type": data_type_string,
89
+ "Row Count": data_type_long,
90
+ "Total Size": data_type_long,
91
+ "Dictionary Size": data_type_long,
92
+ "Data Size": data_type_long,
93
+ "Hierarchy Size": data_type_long,
94
+ "Relationship Size": data_type_long,
95
+ "User Hierarchy Size": data_type_long,
96
+ "Partitions": data_type_long,
97
+ "Columns": data_type_long,
98
+ "% DB": data_type_double,
99
+ },
100
+ "Partitions": {
101
+ "Table Name": data_type_string,
102
+ "Partition Name": data_type_string,
103
+ "Mode": data_type_string,
104
+ "Record Count": data_type_long,
105
+ "Segment Count": data_type_long,
106
+ "Records per Segment": data_type_double,
107
+ },
108
+ "Columns": {
109
+ "Table Name": data_type_string,
110
+ "Column Name": data_type_string,
111
+ "Type": data_type_string,
112
+ "Cardinality": data_type_long,
113
+ "Total Size": data_type_long,
114
+ "Data Size": data_type_long,
115
+ "Dictionary Size": data_type_long,
116
+ "Hierarchy Size": data_type_long,
117
+ "% Table": data_type_double,
118
+ "% DB": data_type_double,
119
+ "Data Type": data_type_string,
120
+ "Encoding": data_type_string,
121
+ "Is Resident": data_type_bool,
122
+ "Temperature": data_type_double,
123
+ "Last Accessed": data_type_timestamp,
124
+ },
125
+ "Hierarchies": {
126
+ "Table Name": data_type_string,
127
+ "Hierarchy Name": data_type_string,
128
+ "Used Size": data_type_long,
129
+ },
130
+ "Relationships": {
131
+ "From Object": data_type_string,
132
+ "To Object": data_type_string,
133
+ "Multiplicity": data_type_string,
134
+ "Used Size": data_type_long,
135
+ "Max From Cardinality": data_type_long,
136
+ "Max To Cardinality": data_type_long,
137
+ "Missing Rows": data_type_long,
138
+ },
139
+ }
140
+
141
+ dfT = list_tables(dataset=dataset, extended=True, workspace=workspace)
72
142
  dfT.rename(columns={"Name": "Table Name"}, inplace=True)
143
+ columns_to_keep = list(vertipaq_map["Tables"].keys())
144
+ dfT = dfT[dfT.columns.intersection(columns_to_keep)]
145
+
73
146
  dfC = fabric.list_columns(dataset=dataset, extended=True, workspace=workspace)
74
147
  dfC["Column Object"] = format_dax_object_name(dfC["Table Name"], dfC["Column Name"])
75
148
  dfC.rename(columns={"Column Cardinality": "Cardinality"}, inplace=True)
76
149
  dfH = fabric.list_hierarchies(dataset=dataset, extended=True, workspace=workspace)
77
150
  dfR = list_relationships(dataset=dataset, extended=True, workspace=workspace)
78
- dfR["From Object"] = format_dax_object_name(dfR["From Table"], dfR["From Column"])
79
- dfR["To Object"] = format_dax_object_name(dfR["To Table"], dfR["To Column"])
80
151
  dfP = fabric.list_partitions(dataset=dataset, extended=True, workspace=workspace)
81
152
  artifact_type, lakehouse_name, lakehouse_id, lakehouse_workspace_id = (
82
153
  get_direct_lake_source(dataset=dataset, workspace=workspace)
83
154
  )
84
155
 
85
156
  with connect_semantic_model(
86
- dataset=dataset, readonly=True, workspace=workspace
157
+ dataset=dataset, workspace=workspace, readonly=True
87
158
  ) as tom:
88
159
  compat_level = tom.model.Model.Database.CompatibilityLevel
89
160
  is_direct_lake = tom.is_direct_lake()
@@ -230,7 +301,7 @@ def vertipaq_analyzer(
230
301
 
231
302
  query = f"evaluate\nsummarizecolumns(\n\"1\",calculate(countrows('{fromTable}'),isblank({toObject}))\n)"
232
303
 
233
- if isActive is False: # add userelationship
304
+ if not isActive:
234
305
  query = f"evaluate\nsummarizecolumns(\n\"1\",calculate(countrows('{fromTable}'),userelationship({fromObject},{toObject}),isblank({toObject}))\n)"
235
306
 
236
307
  result = fabric.evaluate_dax(
@@ -245,81 +316,45 @@ def vertipaq_analyzer(
245
316
  dfR.at[i, "Missing Rows"] = missingRows
246
317
  dfR["Missing Rows"] = dfR["Missing Rows"].astype(int)
247
318
 
248
- dfTP = dfP.groupby("Table Name")["Partition Name"].count().reset_index()
249
- dfTP.rename(columns={"Partition Name": "Partitions"}, inplace=True)
250
- dfTC = dfC.groupby("Table Name")["Column Name"].count().reset_index()
251
- dfTC.rename(columns={"Column Name": "Columns"}, inplace=True)
252
-
253
- total_size = dfC["Total Size"].sum()
254
- table_sizes = dfC.groupby("Table Name")["Total Size"].sum().reset_index()
255
- table_sizes.rename(columns={"Total Size": "Table Size"}, inplace=True)
256
-
257
- # Columns
258
- dfC_filt = dfC[~dfC["Column Name"].str.startswith("RowNumber-")]
259
- dfC_filt["% DB"] = round((dfC_filt["Total Size"] / total_size) * 100, 2)
260
- dfC_filt = pd.merge(dfC_filt, table_sizes, on="Table Name", how="left")
261
- dfC_filt["% Table"] = round(
262
- (dfC_filt["Total Size"] / dfC_filt["Table Size"]) * 100, 2
263
- )
264
- columnList = [
265
- "Table Name",
266
- "Column Name",
267
- "Type",
268
- "Cardinality",
269
- "Total Size",
270
- "Data Size",
271
- "Dictionary Size",
272
- "Hierarchy Size",
273
- "% Table",
274
- "% DB",
275
- "Data Type",
276
- "Encoding",
277
- "Is Resident",
278
- "Temperature",
279
- "Last Accessed",
280
- ]
319
+ table_totals = dfC.groupby("Table Name")["Total Size"].transform("sum")
320
+ db_total_size = dfC["Total Size"].sum()
321
+ dfC["% Table"] = round((dfC["Total Size"] / table_totals) * 100, 2)
322
+ dfC["% DB"] = round((dfC["Total Size"] / db_total_size) * 100, 2)
323
+ columnList = list(vertipaq_map["Columns"].keys())
281
324
 
282
- colSize = dfC_filt[columnList].sort_values(by="Total Size", ascending=False)
283
- temp = dfC_filt[columnList].sort_values(by="Temperature", ascending=False)
325
+ colSize = dfC[columnList].sort_values(by="Total Size", ascending=False)
326
+ temp = dfC[columnList].sort_values(by="Temperature", ascending=False)
284
327
  colSize.reset_index(drop=True, inplace=True)
285
328
  temp.reset_index(drop=True, inplace=True)
286
329
 
287
330
  export_Col = colSize.copy()
288
331
 
289
- intList = [
290
- "Cardinality",
291
- "Total Size",
292
- "Data Size",
293
- "Dictionary Size",
294
- "Hierarchy Size",
295
- ]
296
- pctList = ["% Table", "% DB"]
297
- colSize[intList] = colSize[intList].applymap("{:,}".format)
298
- temp[intList] = temp[intList].applymap("{:,}".format)
299
- colSize[pctList] = colSize[pctList].applymap("{:.2f}%".format)
300
- temp[pctList] = temp[pctList].applymap("{:.2f}%".format)
332
+ int_cols = []
333
+ pct_cols = []
334
+ for k, v in vertipaq_map["Columns"].items():
335
+ if v in ["int", "long"]:
336
+ int_cols.append(k)
337
+ elif v in ["float", "double"] and k != "Temperature":
338
+ pct_cols.append(k)
339
+ colSize[int_cols] = colSize[int_cols].applymap("{:,}".format)
340
+ temp[int_cols] = temp[int_cols].applymap("{:,}".format)
341
+ colSize[pct_cols] = colSize[pct_cols].applymap("{:.2f}%".format)
342
+ temp[pct_cols] = temp[pct_cols].applymap("{:.2f}%".format)
301
343
 
302
344
  # Tables
303
- intList = ["Total Size", "Data Size", "Dictionary Size", "Hierarchy Size"]
304
- dfCSum = dfC.groupby(["Table Name"])[intList].sum().reset_index()
305
- dfCSum["% DB"] = round((dfCSum["Total Size"] / total_size) * 100, 2)
306
-
307
- dfTable = pd.merge(
308
- dfT[["Table Name", "Type", "Row Count"]], dfCSum, on="Table Name", how="inner"
309
- )
310
- dfTable = pd.merge(dfTable, dfTP, on="Table Name", how="left")
311
- dfTable = pd.merge(dfTable, dfTC, on="Table Name", how="left")
312
- dfTable = dfTable.sort_values(by="Total Size", ascending=False)
313
- dfTable.reset_index(drop=True, inplace=True)
314
- export_Table = dfTable.copy()
315
-
316
- intList.extend(["Row Count", "Partitions", "Columns"])
317
- dfTable[intList] = dfTable[intList].applymap("{:,}".format)
318
- pctList = ["% DB"]
319
- dfTable[pctList] = dfTable[pctList].applymap("{:.2f}%".format)
345
+ int_cols = []
346
+ pct_cols = []
347
+ for k, v in vertipaq_map["Tables"].items():
348
+ if v in ["int", "long"]:
349
+ int_cols.append(k)
350
+ elif v in ["float", "double"]:
351
+ pct_cols.append(k)
352
+ export_Table = dfT.copy()
353
+
354
+ dfT[int_cols] = dfT[int_cols].applymap("{:,}".format)
355
+ dfT[pct_cols] = dfT[pct_cols].applymap("{:.2f}%".format)
320
356
 
321
357
  # Relationships
322
- # dfR.drop(columns=['Max From Cardinality', 'Max To Cardinality'], inplace=True)
323
358
  dfR = pd.merge(
324
359
  dfR,
325
360
  dfC[["Column Object", "Cardinality"]],
@@ -349,15 +384,14 @@ def vertipaq_analyzer(
349
384
  ].sort_values(by="Used Size", ascending=False)
350
385
  dfR.reset_index(drop=True, inplace=True)
351
386
  export_Rel = dfR.copy()
352
- intList = [
353
- "Used Size",
354
- "Max From Cardinality",
355
- "Max To Cardinality",
356
- "Missing Rows",
357
- ]
358
- if read_stats_from_data is False:
359
- intList.remove("Missing Rows")
360
- dfR[intList] = dfR[intList].applymap("{:,}".format)
387
+
388
+ int_cols = []
389
+ for k, v in vertipaq_map["Relationships"].items():
390
+ if v in ["int", "long"]:
391
+ int_cols.append(k)
392
+ if not read_stats_from_data:
393
+ int_cols.remove("Missing Rows")
394
+ dfR[int_cols] = dfR[int_cols].applymap("{:,}".format)
361
395
 
362
396
  # Partitions
363
397
  dfP = dfP[
@@ -375,6 +409,10 @@ def vertipaq_analyzer(
375
409
  ) # Remove after records per segment is fixed
376
410
  dfP.reset_index(drop=True, inplace=True)
377
411
  export_Part = dfP.copy()
412
+ int_cols = []
413
+ for k, v in vertipaq_map["Partitions"].items():
414
+ if v in ["int", "long", "double", "float"]:
415
+ int_cols.append(k)
378
416
  intList = ["Record Count", "Segment Count", "Records per Segment"]
379
417
  dfP[intList] = dfP[intList].applymap("{:,}".format)
380
418
 
@@ -391,12 +429,13 @@ def vertipaq_analyzer(
391
429
  dfH_filt[intList] = dfH_filt[intList].applymap("{:,}".format)
392
430
 
393
431
  # Model
394
- if total_size >= 1000000000:
395
- y = total_size / (1024**3) * 1000000000
396
- elif total_size >= 1000000:
397
- y = total_size / (1024**2) * 1000000
398
- elif total_size >= 1000:
399
- y = total_size / (1024) * 1000
432
+ # Converting to KB/MB/GB necessitates division by 1024 * 1000.
433
+ if db_total_size >= 1000000000:
434
+ y = db_total_size / (1024**3) * 1000000000
435
+ elif db_total_size >= 1000000:
436
+ y = db_total_size / (1024**2) * 1000000
437
+ elif db_total_size >= 1000:
438
+ y = db_total_size / (1024) * 1000
400
439
  y = round(y)
401
440
 
402
441
  dfModel = pd.DataFrame(
@@ -413,12 +452,15 @@ def vertipaq_analyzer(
413
452
  dfModel.reset_index(drop=True, inplace=True)
414
453
  dfModel["Default Mode"] = dfModel["Default Mode"].astype(str)
415
454
  export_Model = dfModel.copy()
416
- intList = ["Total Size", "Table Count", "Column Count"]
417
- dfModel[intList] = dfModel[intList].applymap("{:,}".format)
455
+ int_cols = []
456
+ for k, v in vertipaq_map["Model"].items():
457
+ if v in ["long", "int"] and k != "Compatibility Level":
458
+ int_cols.append(k)
459
+ dfModel[int_cols] = dfModel[int_cols].applymap("{:,}".format)
418
460
 
419
461
  dataFrames = {
420
462
  "dfModel": dfModel,
421
- "dfTable": dfTable,
463
+ "dfT": dfT,
422
464
  "dfP": dfP,
423
465
  "colSize": colSize,
424
466
  "temp": temp,
@@ -430,7 +472,8 @@ def vertipaq_analyzer(
430
472
  for fileName, df in dataFrames.items():
431
473
  dfs[fileName] = df
432
474
 
433
- visualize_vertipaq(dfs)
475
+ if export is None:
476
+ visualize_vertipaq(dfs)
434
477
 
435
478
  # Export vertipaq to delta tables in lakehouse
436
479
  if export in ["table", "zip"]:
@@ -462,12 +505,12 @@ def vertipaq_analyzer(
462
505
  runId = maxRunId + 1
463
506
 
464
507
  dfMap = {
465
- "export_Col": ["Columns", export_Col],
466
- "export_Table": ["Tables", export_Table],
467
- "export_Part": ["Partitions", export_Part],
468
- "export_Rel": ["Relationships", export_Rel],
469
- "export_Hier": ["Hierarchies", export_Hier],
470
- "export_Model": ["Model", export_Model],
508
+ "Columns": ["Columns", export_Col],
509
+ "Tables": ["Tables", export_Table],
510
+ "Partitions": ["Partitions", export_Part],
511
+ "Relationships": ["Relationships", export_Rel],
512
+ "Hierarchies": ["Hierarchies", export_Hier],
513
+ "Model": ["Model", export_Model],
471
514
  }
472
515
 
473
516
  print(
@@ -479,7 +522,7 @@ def vertipaq_analyzer(
479
522
  configured_by = dfD_filt["Configured By"].iloc[0]
480
523
  capacity_id, capacity_name = resolve_workspace_capacity(workspace=workspace)
481
524
 
482
- for key, (obj, df) in dfMap.items():
525
+ for key_name, (obj, df) in dfMap.items():
483
526
  df["Capacity Name"] = capacity_name
484
527
  df["Capacity Id"] = capacity_id
485
528
  df["Configured By"] = configured_by
@@ -507,11 +550,29 @@ def vertipaq_analyzer(
507
550
 
508
551
  df.columns = df.columns.str.replace(" ", "_")
509
552
 
553
+ schema = {
554
+ "Capacity_Name": "string",
555
+ "Capacity_Id": "string",
556
+ "Workspace_Name": "string",
557
+ "Workspace_Id": "string",
558
+ "Dataset_Name": "string",
559
+ "Dataset_Id": "string",
560
+ "Configured_By": "string",
561
+ }
562
+
563
+ schema.update(
564
+ {
565
+ key.replace(" ", "_"): value
566
+ for key, value in vertipaq_map[key_name].items()
567
+ }
568
+ )
569
+
510
570
  delta_table_name = f"VertipaqAnalyzer_{obj}".lower()
511
571
  save_as_delta_table(
512
572
  dataframe=df,
513
573
  delta_table_name=delta_table_name,
514
574
  write_mode="append",
575
+ schema=schema,
515
576
  merge_schema=True,
516
577
  )
517
578
 
@@ -519,7 +580,7 @@ def vertipaq_analyzer(
519
580
  if export == "zip":
520
581
  dataFrames = {
521
582
  "dfModel": dfModel,
522
- "dfTable": dfTable,
583
+ "dfT": dfT,
523
584
  "dfP": dfP,
524
585
  "colSize": colSize,
525
586
  "temp": temp,
@@ -797,7 +858,7 @@ def visualize_vertipaq(dataframes):
797
858
  # define the dictionary with {"Tab name":df}
798
859
  df_dict = {
799
860
  "Model Summary": dataframes["dfModel"],
800
- "Tables": dataframes["dfTable"],
861
+ "Tables": dataframes["dfT"],
801
862
  "Partitions": dataframes["dfP"],
802
863
  "Columns (Total Size)": dataframes["colSize"],
803
864
  "Columns (Temperature)": dataframes["temp"],
@@ -0,0 +1,66 @@
1
+ import sempy.fabric as fabric
2
+ from sempy_labs._helper_functions import (
3
+ resolve_workspace_name_and_id,
4
+ lro,
5
+ )
6
+ from typing import Optional
7
+ import sempy_labs._icons as icons
8
+ from sempy.fabric.exceptions import FabricHTTPException
9
+
10
+
11
+ def provision_workspace_identity(workspace: Optional[str] = None):
12
+ """
13
+ Provisions a workspace identity for a workspace.
14
+
15
+ Parameters
16
+ ----------
17
+ workspace : str, default=None
18
+ The Fabric workspace name.
19
+ Defaults to None which resolves to the workspace of the attached lakehouse
20
+ or if no lakehouse attached, resolves to the workspace of the notebook.
21
+ """
22
+
23
+ # https://learn.microsoft.com/en-us/rest/api/fabric/core/workspaces/provision-identity?tabs=HTTP
24
+
25
+ workspace, workspace_id = resolve_workspace_name_and_id(workspace)
26
+
27
+ client = fabric.FabricRestClient()
28
+ response = client.post(f"/v1/workspaces/{workspace_id}/provisionIdentity")
29
+
30
+ if response.status_code not in [200, 202]:
31
+ raise FabricHTTPException(response)
32
+
33
+ lro(client, response)
34
+
35
+ print(
36
+ f"{icons.green_dot} A workspace identity has been provisioned for the '{workspace}' workspace."
37
+ )
38
+
39
+
40
+ def deprovision_workspace_identity(workspace: Optional[str] = None):
41
+ """
42
+ Deprovisions a workspace identity for a workspace.
43
+
44
+ Parameters
45
+ ----------
46
+ workspace : str, default=None
47
+ The Fabric workspace name.
48
+ Defaults to None which resolves to the workspace of the attached lakehouse
49
+ or if no lakehouse attached, resolves to the workspace of the notebook.
50
+ """
51
+
52
+ # https://learn.microsoft.com/en-us/rest/api/fabric/core/workspaces/deprovision-identity?tabs=HTTP
53
+
54
+ workspace, workspace_id = resolve_workspace_name_and_id(workspace)
55
+
56
+ client = fabric.FabricRestClient()
57
+ response = client.post(f"/v1/workspaces/{workspace_id}/deprovisionIdentity")
58
+
59
+ if response.status_code not in [200, 202]:
60
+ raise FabricHTTPException(response)
61
+
62
+ lro(client, response)
63
+
64
+ print(
65
+ f"{icons.green_dot} The workspace identity has been deprovisioned from the '{workspace}' workspace."
66
+ )