semantic-link-labs 0.8.1__py3-none-any.whl → 0.8.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of semantic-link-labs might be problematic. Click here for more details.

Files changed (35) hide show
  1. {semantic_link_labs-0.8.1.dist-info → semantic_link_labs-0.8.3.dist-info}/METADATA +8 -4
  2. {semantic_link_labs-0.8.1.dist-info → semantic_link_labs-0.8.3.dist-info}/RECORD +35 -34
  3. sempy_labs/__init__.py +14 -0
  4. sempy_labs/_capacities.py +89 -11
  5. sempy_labs/_capacity_migration.py +167 -60
  6. sempy_labs/_clear_cache.py +3 -3
  7. sempy_labs/_data_pipelines.py +48 -0
  8. sempy_labs/_external_data_shares.py +188 -0
  9. sempy_labs/_generate_semantic_model.py +0 -1
  10. sempy_labs/_git.py +1 -1
  11. sempy_labs/_helper_functions.py +20 -16
  12. sempy_labs/_list_functions.py +6 -3
  13. sempy_labs/_model_bpa.py +7 -5
  14. sempy_labs/_model_bpa_bulk.py +3 -5
  15. sempy_labs/_notebooks.py +4 -3
  16. sempy_labs/_sql.py +2 -2
  17. sempy_labs/_translations.py +14 -14
  18. sempy_labs/_vertipaq.py +121 -101
  19. sempy_labs/_warehouses.py +11 -1
  20. sempy_labs/admin/__init__.py +2 -0
  21. sempy_labs/admin/_basic_functions.py +124 -21
  22. sempy_labs/directlake/_directlake_schema_sync.py +0 -5
  23. sempy_labs/directlake/_generate_shared_expression.py +1 -1
  24. sempy_labs/directlake/_guardrails.py +1 -1
  25. sempy_labs/directlake/_show_unsupported_directlake_objects.py +1 -1
  26. sempy_labs/migration/_create_pqt_file.py +2 -2
  27. sempy_labs/report/_generate_report.py +10 -14
  28. sempy_labs/report/_report_bpa.py +8 -10
  29. sempy_labs/report/_report_functions.py +13 -19
  30. sempy_labs/report/_report_rebind.py +4 -1
  31. sempy_labs/report/_reportwrapper.py +3 -3
  32. sempy_labs/tom/_model.py +109 -34
  33. {semantic_link_labs-0.8.1.dist-info → semantic_link_labs-0.8.3.dist-info}/LICENSE +0 -0
  34. {semantic_link_labs-0.8.1.dist-info → semantic_link_labs-0.8.3.dist-info}/WHEEL +0 -0
  35. {semantic_link_labs-0.8.1.dist-info → semantic_link_labs-0.8.3.dist-info}/top_level.txt +0 -0
@@ -13,7 +13,6 @@ from uuid import UUID
13
13
  import sempy_labs._icons as icons
14
14
  import urllib.parse
15
15
  from azure.core.credentials import TokenCredential, AccessToken
16
- import deltalake
17
16
 
18
17
 
19
18
  def create_abfss_path(
@@ -962,15 +961,15 @@ class FabricTokenCredential(TokenCredential):
962
961
  **kwargs: any,
963
962
  ) -> AccessToken:
964
963
 
965
- from notebookutils import mssparkutils
964
+ import notebookutils
966
965
 
967
- token = mssparkutils.credentials.getToken(scopes)
966
+ token = notebookutils.credentials.getToken(scopes)
968
967
  access_token = AccessToken(token, 0)
969
968
 
970
969
  return access_token
971
970
 
972
971
 
973
- def get_adls_client(account_name):
972
+ def _get_adls_client(account_name):
974
973
 
975
974
  from azure.storage.filedatalake import DataLakeServiceClient
976
975
 
@@ -1018,19 +1017,22 @@ def get_language_codes(languages: str | List[str]):
1018
1017
  return languages
1019
1018
 
1020
1019
 
1021
- def get_azure_token_credentials(
1020
+ def _get_azure_token_credentials(
1022
1021
  key_vault_uri: str,
1023
1022
  key_vault_tenant_id: str,
1024
1023
  key_vault_client_id: str,
1025
1024
  key_vault_client_secret: str,
1025
+ audience: str = "https://management.azure.com/.default",
1026
1026
  ) -> Tuple[str, str, dict]:
1027
1027
 
1028
- from notebookutils import mssparkutils
1028
+ import notebookutils
1029
1029
  from azure.identity import ClientSecretCredential
1030
1030
 
1031
- tenant_id = mssparkutils.credentials.getSecret(key_vault_uri, key_vault_tenant_id)
1032
- client_id = mssparkutils.credentials.getSecret(key_vault_uri, key_vault_client_id)
1033
- client_secret = mssparkutils.credentials.getSecret(
1031
+ # "https://analysis.windows.net/powerbi/api/.default"
1032
+
1033
+ tenant_id = notebookutils.credentials.getSecret(key_vault_uri, key_vault_tenant_id)
1034
+ client_id = notebookutils.credentials.getSecret(key_vault_uri, key_vault_client_id)
1035
+ client_secret = notebookutils.credentials.getSecret(
1034
1036
  key_vault_uri, key_vault_client_secret
1035
1037
  )
1036
1038
 
@@ -1038,7 +1040,7 @@ def get_azure_token_credentials(
1038
1040
  tenant_id=tenant_id, client_id=client_id, client_secret=client_secret
1039
1041
  )
1040
1042
 
1041
- token = credential.get_token("https://management.azure.com/.default").token
1043
+ token = credential.get_token(audience).token
1042
1044
 
1043
1045
  headers = {
1044
1046
  "Authorization": f"Bearer {token}",
@@ -1078,7 +1080,7 @@ def resolve_environment_id(environment: str, workspace: Optional[str] = None) ->
1078
1080
  )
1079
1081
 
1080
1082
 
1081
- def make_clickable(val):
1083
+ def _make_clickable(val):
1082
1084
 
1083
1085
  return f'<a target="_blank" href="{val}">{val}</a>'
1084
1086
 
@@ -1133,11 +1135,13 @@ def generate_guid():
1133
1135
  return str(uuid.uuid4())
1134
1136
 
1135
1137
 
1136
- def get_max_run_id(table_name: str) -> int:
1138
+ def _get_max_run_id(lakehouse: str, table_name: str) -> int:
1139
+
1140
+ from pyspark.sql import SparkSession
1137
1141
 
1138
- table_path = f"/lakehouse/default/Tables/{table_name}/"
1139
- delta_table = deltalake.DeltaTable(table_path)
1140
- data = delta_table.to_pandas()
1141
- max_run_id = data["RunId"].max()
1142
+ spark = SparkSession.builder.getOrCreate()
1143
+ query = f"SELECT MAX(RunId) FROM {lakehouse}.{table_name}"
1144
+ dfSpark = spark.sql(query)
1145
+ max_run_id = dfSpark.collect()[0][0]
1142
1146
 
1143
1147
  return max_run_id
@@ -1487,7 +1487,7 @@ def list_semantic_model_object_report_usage(
1487
1487
  is sorted descending by 'Report Usage Count'.
1488
1488
  """
1489
1489
 
1490
- from sempy_labs._model_dependencies import get_measure_dependencies
1490
+ from sempy_labs._model_dependencies import get_model_calc_dependencies
1491
1491
  from sempy_labs._helper_functions import format_dax_object_name
1492
1492
 
1493
1493
  workspace = fabric.resolve_workspace_name(workspace)
@@ -1503,7 +1503,7 @@ def list_semantic_model_object_report_usage(
1503
1503
  )
1504
1504
  else:
1505
1505
  df = pd.DataFrame(columns=["Table Name", "Object Name", "Object Type"])
1506
- dep = get_measure_dependencies(dataset=dataset, workspace=workspace)
1506
+ dep = get_model_calc_dependencies(dataset=dataset, workspace=workspace)
1507
1507
 
1508
1508
  for i, r in dfR.iterrows():
1509
1509
  object_type = r["Object Type"]
@@ -1515,7 +1515,10 @@ def list_semantic_model_object_report_usage(
1515
1515
  "Object Type": object_type,
1516
1516
  }
1517
1517
  df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)
1518
- if object_type == "Measure":
1518
+ df["Referenced Object Type"] = df["Referenced Object Type"].replace(
1519
+ "Attribute Hierarchy", "Column"
1520
+ )
1521
+ if object_type in ["Measure", "Calc Column", "Calc Table", "Hierarchy"]:
1519
1522
  df_filt = dep[dep["Object Name"] == object_name][
1520
1523
  ["Referenced Table", "Referenced Object", "Referenced Object Type"]
1521
1524
  ]
sempy_labs/_model_bpa.py CHANGED
@@ -12,7 +12,7 @@ from sempy_labs._helper_functions import (
12
12
  resolve_workspace_capacity,
13
13
  resolve_dataset_id,
14
14
  get_language_codes,
15
- get_max_run_id,
15
+ _get_max_run_id,
16
16
  )
17
17
  from sempy_labs.lakehouse import get_lakehouse_tables, lakehouse_attached
18
18
  from sempy_labs.tom import connect_semantic_model
@@ -220,9 +220,9 @@ def run_model_bpa(
220
220
 
221
221
  rules = translate_using_spark(rules)
222
222
 
223
- rules["Severity"].replace("Warning", icons.warning, inplace=True)
224
- rules["Severity"].replace("Error", icons.error, inplace=True)
225
- rules["Severity"].replace("Info", icons.info, inplace=True)
223
+ rules.loc[rules["Severity"] == "Warning", "Severity"] = icons.warning
224
+ rules.loc[rules["Severity"] == "Error", "Severity"] = icons.error
225
+ rules.loc[rules["Severity"] == "Info", "Severity"] = icons.info
226
226
 
227
227
  pd.set_option("display.max_colwidth", 1000)
228
228
 
@@ -350,7 +350,9 @@ def run_model_bpa(
350
350
  if len(lakeT_filt) == 0:
351
351
  runId = 1
352
352
  else:
353
- max_run_id = get_max_run_id(table_name=delta_table_name)
353
+ max_run_id = _get_max_run_id(
354
+ lakehouse=lakehouse, table_name=delta_table_name
355
+ )
354
356
  runId = max_run_id + 1
355
357
 
356
358
  now = datetime.datetime.now()
@@ -6,7 +6,7 @@ from sempy_labs._helper_functions import (
6
6
  save_as_delta_table,
7
7
  resolve_workspace_capacity,
8
8
  retry,
9
- get_max_run_id,
9
+ _get_max_run_id,
10
10
  )
11
11
  from sempy_labs.lakehouse import (
12
12
  get_lakehouse_tables,
@@ -49,8 +49,6 @@ def run_model_bpa_bulk(
49
49
  The semantic models to always skip when running this analysis.
50
50
  """
51
51
 
52
- import pyspark.sql.functions as F
53
-
54
52
  if not lakehouse_attached():
55
53
  raise ValueError(
56
54
  f"{icons.red_dot} No lakehouse is attached to this notebook. Must attach a lakehouse to the notebook."
@@ -92,7 +90,7 @@ def run_model_bpa_bulk(
92
90
  if len(lakeT_filt) == 0:
93
91
  runId = 1
94
92
  else:
95
- max_run_id = get_max_run_id(table_name=output_table)
93
+ max_run_id = _get_max_run_id(lakehouse=lakehouse, table_name=output_table)
96
94
  runId = max_run_id + 1
97
95
 
98
96
  if isinstance(workspace, str):
@@ -162,7 +160,7 @@ def run_model_bpa_bulk(
162
160
  )
163
161
  print(e)
164
162
 
165
- df["Severity"].replace(icons.severity_mapping, inplace=True)
163
+ df["Severity"].replace(icons.severity_mapping)
166
164
 
167
165
  # Append save results individually for each workspace (so as not to create a giant dataframe)
168
166
  print(
sempy_labs/_notebooks.py CHANGED
@@ -8,7 +8,6 @@ from sempy_labs._helper_functions import (
8
8
  resolve_workspace_name_and_id,
9
9
  lro,
10
10
  _decode_b64,
11
- resolve_notebook_id,
12
11
  )
13
12
  from sempy.fabric.exceptions import FabricHTTPException
14
13
 
@@ -38,10 +37,12 @@ def get_notebook_definition(
38
37
  """
39
38
 
40
39
  (workspace, workspace_id) = resolve_workspace_name_and_id(workspace)
41
- notebook_id = resolve_notebook_id(notebook=notebook_name, workspace=workspace)
40
+ item_id = fabric.resolve_item_id(
41
+ item_name=notebook_name, type="Notebook", workspace=workspace
42
+ )
42
43
  client = fabric.FabricRestClient()
43
44
  response = client.post(
44
- f"v1/workspaces/{workspace_id}/notebooks/{notebook_id}/getDefinition",
45
+ f"v1/workspaces/{workspace_id}/notebooks/{item_id}/getDefinition",
45
46
  )
46
47
 
47
48
  result = lro(client, response).json()
sempy_labs/_sql.py CHANGED
@@ -9,7 +9,7 @@ from sempy.fabric.exceptions import FabricHTTPException
9
9
  from sempy_labs._helper_functions import resolve_warehouse_id, resolve_lakehouse_id
10
10
 
11
11
 
12
- def bytes2mswin_bstr(value: bytes) -> bytes:
12
+ def _bytes2mswin_bstr(value: bytes) -> bytes:
13
13
  """Convert a sequence of bytes into a (MS-Windows) BSTR (as bytes).
14
14
 
15
15
  See https://github.com/mkleehammer/pyodbc/issues/228#issuecomment-319190980
@@ -68,7 +68,7 @@ class ConnectBase:
68
68
 
69
69
  # Set up the connection string
70
70
  access_token = SynapseTokenProvider()()
71
- tokenstruct = bytes2mswin_bstr(access_token.encode())
71
+ tokenstruct = _bytes2mswin_bstr(access_token.encode())
72
72
  conn_str = f"DRIVER={{ODBC Driver 18 for SQL Server}};SERVER={tds_endpoint};DATABASE={name};Encrypt=Yes;"
73
73
 
74
74
  if timeout is not None:
@@ -40,7 +40,7 @@ def translate_semantic_model(
40
40
  from pyspark.sql import SparkSession
41
41
  from sempy_labs.tom import connect_semantic_model
42
42
 
43
- def clean_text(text, exclude_chars):
43
+ def _clean_text(text, exclude_chars):
44
44
  if exclude_chars:
45
45
  for char in exclude_chars:
46
46
  text = text.replace(char, " ")
@@ -60,8 +60,8 @@ def translate_semantic_model(
60
60
  ) as tom:
61
61
 
62
62
  for o in tom.model.Tables:
63
- oName = clean_text(o.Name, exclude_characters)
64
- oDescription = clean_text(o.Description, exclude_characters)
63
+ oName = _clean_text(o.Name, exclude_characters)
64
+ oDescription = _clean_text(o.Description, exclude_characters)
65
65
  new_data = {
66
66
  "Object Type": "Table",
67
67
  "Name": o.Name,
@@ -75,9 +75,9 @@ def translate_semantic_model(
75
75
  [df_prep, pd.DataFrame(new_data, index=[0])], ignore_index=True
76
76
  )
77
77
  for o in tom.all_columns():
78
- oName = clean_text(o.Name, exclude_characters)
79
- oDescription = clean_text(o.Description, exclude_characters)
80
- oDisplayFolder = clean_text(o.DisplayFolder, exclude_characters)
78
+ oName = _clean_text(o.Name, exclude_characters)
79
+ oDescription = _clean_text(o.Description, exclude_characters)
80
+ oDisplayFolder = _clean_text(o.DisplayFolder, exclude_characters)
81
81
  new_data = {
82
82
  "Object Type": "Column",
83
83
  "Name": o.Name,
@@ -91,9 +91,9 @@ def translate_semantic_model(
91
91
  [df_prep, pd.DataFrame(new_data, index=[0])], ignore_index=True
92
92
  )
93
93
  for o in tom.all_measures():
94
- oName = clean_text(o.Name, exclude_characters)
95
- oDescription = clean_text(o.Description, exclude_characters)
96
- oDisplayFolder = clean_text(o.DisplayFolder, exclude_characters)
94
+ oName = _clean_text(o.Name, exclude_characters)
95
+ oDescription = _clean_text(o.Description, exclude_characters)
96
+ oDisplayFolder = _clean_text(o.DisplayFolder, exclude_characters)
97
97
  new_data = {
98
98
  "Object Type": "Measure",
99
99
  "Name": o.Name,
@@ -107,9 +107,9 @@ def translate_semantic_model(
107
107
  [df_prep, pd.DataFrame(new_data, index=[0])], ignore_index=True
108
108
  )
109
109
  for o in tom.all_hierarchies():
110
- oName = clean_text(o.Name, exclude_characters)
111
- oDescription = clean_text(o.Description, exclude_characters)
112
- oDisplayFolder = clean_text(o.DisplayFolder, exclude_characters)
110
+ oName = _clean_text(o.Name, exclude_characters)
111
+ oDescription = _clean_text(o.Description, exclude_characters)
112
+ oDisplayFolder = _clean_text(o.DisplayFolder, exclude_characters)
113
113
  new_data = {
114
114
  "Object Type": "Hierarchy",
115
115
  "Name": o.Name,
@@ -123,8 +123,8 @@ def translate_semantic_model(
123
123
  [df_prep, pd.DataFrame(new_data, index=[0])], ignore_index=True
124
124
  )
125
125
  for o in tom.all_levels():
126
- oName = clean_text(o.Name, exclude_characters)
127
- oDescription = clean_text(o.Description, exclude_characters)
126
+ oName = _clean_text(o.Name, exclude_characters)
127
+ oDescription = _clean_text(o.Description, exclude_characters)
128
128
  new_data = {
129
129
  "Object Type": "Level",
130
130
  "Name": o.Name,
sempy_labs/_vertipaq.py CHANGED
@@ -13,7 +13,7 @@ from sempy_labs._helper_functions import (
13
13
  resolve_dataset_id,
14
14
  save_as_delta_table,
15
15
  resolve_workspace_capacity,
16
- get_max_run_id,
16
+ _get_max_run_id,
17
17
  )
18
18
  from sempy_labs._list_functions import list_relationships, list_tables
19
19
  from sempy_labs.lakehouse import lakehouse_attached, get_lakehouse_tables
@@ -74,68 +74,71 @@ def vertipaq_analyzer(
74
74
  data_type_timestamp = "timestamp"
75
75
  data_type_double = "double"
76
76
  data_type_bool = "bool"
77
+ int_format = "int"
78
+ pct_format = "pct"
79
+ no_format = ""
77
80
 
78
81
  vertipaq_map = {
79
82
  "Model": {
80
- "Dataset Name": data_type_string,
81
- "Total Size": data_type_long,
82
- "Table Count": data_type_long,
83
- "Column Count": data_type_long,
84
- "Compatibility Level": data_type_long,
85
- "Default Mode": data_type_string,
83
+ "Dataset Name": [data_type_string, no_format],
84
+ "Total Size": [data_type_long, int_format],
85
+ "Table Count": [data_type_long, int_format],
86
+ "Column Count": [data_type_long, int_format],
87
+ "Compatibility Level": [data_type_long, no_format],
88
+ "Default Mode": [data_type_string, no_format],
86
89
  },
87
90
  "Tables": {
88
- "Table Name": data_type_string,
89
- "Type": data_type_string,
90
- "Row Count": data_type_long,
91
- "Total Size": data_type_long,
92
- "Dictionary Size": data_type_long,
93
- "Data Size": data_type_long,
94
- "Hierarchy Size": data_type_long,
95
- "Relationship Size": data_type_long,
96
- "User Hierarchy Size": data_type_long,
97
- "Partitions": data_type_long,
98
- "Columns": data_type_long,
99
- "% DB": data_type_double,
91
+ "Table Name": [data_type_string, no_format],
92
+ "Type": [data_type_string, no_format],
93
+ "Row Count": [data_type_long, int_format],
94
+ "Total Size": [data_type_long, int_format],
95
+ "Dictionary Size": [data_type_long, int_format],
96
+ "Data Size": [data_type_long, int_format],
97
+ "Hierarchy Size": [data_type_long, int_format],
98
+ "Relationship Size": [data_type_long, int_format],
99
+ "User Hierarchy Size": [data_type_long, int_format],
100
+ "Partitions": [data_type_long, int_format],
101
+ "Columns": [data_type_long, int_format],
102
+ "% DB": [data_type_double, pct_format],
100
103
  },
101
104
  "Partitions": {
102
- "Table Name": data_type_string,
103
- "Partition Name": data_type_string,
104
- "Mode": data_type_string,
105
- "Record Count": data_type_long,
106
- "Segment Count": data_type_long,
107
- "Records per Segment": data_type_double,
105
+ "Table Name": [data_type_string, no_format],
106
+ "Partition Name": [data_type_string, no_format],
107
+ "Mode": [data_type_string, no_format],
108
+ "Record Count": [data_type_long, int_format],
109
+ "Segment Count": [data_type_long, int_format],
110
+ "Records per Segment": [data_type_double, int_format],
108
111
  },
109
112
  "Columns": {
110
- "Table Name": data_type_string,
111
- "Column Name": data_type_string,
112
- "Type": data_type_string,
113
- "Cardinality": data_type_long,
114
- "Total Size": data_type_long,
115
- "Data Size": data_type_long,
116
- "Dictionary Size": data_type_long,
117
- "Hierarchy Size": data_type_long,
118
- "% Table": data_type_double,
119
- "% DB": data_type_double,
120
- "Data Type": data_type_string,
121
- "Encoding": data_type_string,
122
- "Is Resident": data_type_bool,
123
- "Temperature": data_type_double,
124
- "Last Accessed": data_type_timestamp,
113
+ "Table Name": [data_type_string, no_format],
114
+ "Column Name": [data_type_string, no_format],
115
+ "Type": [data_type_string, no_format],
116
+ "Cardinality": [data_type_long, int_format],
117
+ "Total Size": [data_type_long, int_format],
118
+ "Data Size": [data_type_long, int_format],
119
+ "Dictionary Size": [data_type_long, int_format],
120
+ "Hierarchy Size": [data_type_long, int_format],
121
+ "% Table": [data_type_double, pct_format],
122
+ "% DB": [data_type_double, pct_format],
123
+ "Data Type": [data_type_string, no_format],
124
+ "Encoding": [data_type_string, no_format],
125
+ "Is Resident": [data_type_bool, no_format],
126
+ "Temperature": [data_type_double, int_format],
127
+ "Last Accessed": [data_type_timestamp, no_format],
125
128
  },
126
129
  "Hierarchies": {
127
- "Table Name": data_type_string,
128
- "Hierarchy Name": data_type_string,
129
- "Used Size": data_type_long,
130
+ "Table Name": [data_type_string, no_format],
131
+ "Hierarchy Name": [data_type_string, no_format],
132
+ "Used Size": [data_type_long, int_format],
130
133
  },
131
134
  "Relationships": {
132
- "From Object": data_type_string,
133
- "To Object": data_type_string,
134
- "Multiplicity": data_type_string,
135
- "Used Size": data_type_long,
136
- "Max From Cardinality": data_type_long,
137
- "Max To Cardinality": data_type_long,
138
- "Missing Rows": data_type_long,
135
+ "From Object": [data_type_string, no_format],
136
+ "To Object": [data_type_string, no_format],
137
+ "Multiplicity": [data_type_string, no_format],
138
+ "Used Size": [data_type_long, int_format],
139
+ "Max From Cardinality": [data_type_long, int_format],
140
+ "Max To Cardinality": [data_type_long, int_format],
141
+ "Missing Rows": [data_type_long, int_format],
139
142
  },
140
143
  }
141
144
 
@@ -163,7 +166,8 @@ def vertipaq_analyzer(
163
166
  table_count = tom.model.Tables.Count
164
167
  column_count = len(list(tom.all_columns()))
165
168
 
166
- dfR["Missing Rows"] = None
169
+ dfR["Missing Rows"] = 0
170
+ dfR["Missing Rows"] = dfR["Missing Rows"].astype(int)
167
171
 
168
172
  # Direct Lake
169
173
  if read_stats_from_data:
@@ -323,38 +327,16 @@ def vertipaq_analyzer(
323
327
  dfC["% DB"] = round((dfC["Total Size"] / db_total_size) * 100, 2)
324
328
  columnList = list(vertipaq_map["Columns"].keys())
325
329
 
330
+ dfC = dfC[dfC["Type"] != "RowNumber"].reset_index(drop=True)
331
+
326
332
  colSize = dfC[columnList].sort_values(by="Total Size", ascending=False)
327
333
  temp = dfC[columnList].sort_values(by="Temperature", ascending=False)
328
334
  colSize.reset_index(drop=True, inplace=True)
329
335
  temp.reset_index(drop=True, inplace=True)
330
336
 
331
337
  export_Col = colSize.copy()
332
-
333
- int_cols = []
334
- pct_cols = []
335
- for k, v in vertipaq_map["Columns"].items():
336
- if v in ["int", "long"]:
337
- int_cols.append(k)
338
- elif v in ["float", "double"] and k != "Temperature":
339
- pct_cols.append(k)
340
- colSize[int_cols] = colSize[int_cols].map("{:,}".format)
341
- temp[int_cols] = temp[int_cols].map("{:,}".format)
342
- colSize[pct_cols] = colSize[pct_cols].map("{:.2f}%".format)
343
- temp[pct_cols] = temp[pct_cols].map("{:.2f}%".format)
344
-
345
- # Tables
346
- int_cols = []
347
- pct_cols = []
348
- for k, v in vertipaq_map["Tables"].items():
349
- if v in ["int", "long"]:
350
- int_cols.append(k)
351
- elif v in ["float", "double"]:
352
- pct_cols.append(k)
353
338
  export_Table = dfT.copy()
354
339
 
355
- dfT[int_cols] = dfT[int_cols].map("{:,}".format)
356
- dfT[pct_cols] = dfT[pct_cols].map("{:.2f}%".format)
357
-
358
340
  # Relationships
359
341
  dfR = pd.merge(
360
342
  dfR,
@@ -386,14 +368,6 @@ def vertipaq_analyzer(
386
368
  dfR.reset_index(drop=True, inplace=True)
387
369
  export_Rel = dfR.copy()
388
370
 
389
- int_cols = []
390
- for k, v in vertipaq_map["Relationships"].items():
391
- if v in ["int", "long"]:
392
- int_cols.append(k)
393
- if not read_stats_from_data:
394
- int_cols.remove("Missing Rows")
395
- dfR[int_cols] = dfR[int_cols].map("{:,}".format)
396
-
397
371
  # Partitions
398
372
  dfP = dfP[
399
373
  [
@@ -410,12 +384,6 @@ def vertipaq_analyzer(
410
384
  ) # Remove after records per segment is fixed
411
385
  dfP.reset_index(drop=True, inplace=True)
412
386
  export_Part = dfP.copy()
413
- int_cols = []
414
- for k, v in vertipaq_map["Partitions"].items():
415
- if v in ["int", "long", "double", "float"]:
416
- int_cols.append(k)
417
- intList = ["Record Count", "Segment Count", "Records per Segment"]
418
- dfP[intList] = dfP[intList].map("{:,}".format)
419
387
 
420
388
  # Hierarchies
421
389
  dfH_filt = dfH[dfH["Level Ordinal"] == 0]
@@ -426,8 +394,6 @@ def vertipaq_analyzer(
426
394
  dfH_filt.fillna({"Used Size": 0}, inplace=True)
427
395
  dfH_filt["Used Size"] = dfH_filt["Used Size"].astype(int)
428
396
  export_Hier = dfH_filt.copy()
429
- intList = ["Used Size"]
430
- dfH_filt[intList] = dfH_filt[intList].map("{:,}".format)
431
397
 
432
398
  # Model
433
399
  # Converting to KB/MB/GB necessitates division by 1024 * 1000.
@@ -453,11 +419,63 @@ def vertipaq_analyzer(
453
419
  dfModel.reset_index(drop=True, inplace=True)
454
420
  dfModel["Default Mode"] = dfModel["Default Mode"].astype(str)
455
421
  export_Model = dfModel.copy()
456
- int_cols = []
457
- for k, v in vertipaq_map["Model"].items():
458
- if v in ["long", "int"] and k != "Compatibility Level":
459
- int_cols.append(k)
460
- dfModel[int_cols] = dfModel[int_cols].map("{:,}".format)
422
+
423
+ def _style_columns_based_on_types(dataframe: pd.DataFrame, column_type_mapping):
424
+
425
+ format_mapping = {
426
+ "int": "{:,}",
427
+ "pct": "{:.2f}%",
428
+ "": "{}",
429
+ }
430
+
431
+ format_dict = {
432
+ col: format_mapping[dt] for col, dt in column_type_mapping.items()
433
+ }
434
+
435
+ return dataframe.style.format(format_dict)
436
+
437
+ dfModel = _style_columns_based_on_types(
438
+ dfModel,
439
+ column_type_mapping={
440
+ key: values[1] for key, values in vertipaq_map["Model"].items()
441
+ },
442
+ )
443
+ dfT = _style_columns_based_on_types(
444
+ dfT,
445
+ column_type_mapping={
446
+ key: values[1] for key, values in vertipaq_map["Tables"].items()
447
+ },
448
+ )
449
+ dfP = _style_columns_based_on_types(
450
+ dfP,
451
+ column_type_mapping={
452
+ key: values[1] for key, values in vertipaq_map["Partitions"].items()
453
+ },
454
+ )
455
+ colSize = _style_columns_based_on_types(
456
+ colSize,
457
+ column_type_mapping={
458
+ key: values[1] for key, values in vertipaq_map["Columns"].items()
459
+ },
460
+ )
461
+ temp = _style_columns_based_on_types(
462
+ temp,
463
+ column_type_mapping={
464
+ key: values[1] for key, values in vertipaq_map["Columns"].items()
465
+ },
466
+ )
467
+ dfR = _style_columns_based_on_types(
468
+ dfR,
469
+ column_type_mapping={
470
+ key: values[1] for key, values in vertipaq_map["Relationships"].items()
471
+ },
472
+ )
473
+ dfH_filt = _style_columns_based_on_types(
474
+ dfH_filt,
475
+ column_type_mapping={
476
+ key: values[1] for key, values in vertipaq_map["Hierarchies"].items()
477
+ },
478
+ )
461
479
 
462
480
  dataFrames = {
463
481
  "dfModel": dfModel,
@@ -484,8 +502,6 @@ def vertipaq_analyzer(
484
502
  )
485
503
 
486
504
  if export == "table":
487
- # spark = SparkSession.builder.getOrCreate()
488
-
489
505
  lakehouse_id = fabric.get_lakehouse_id()
490
506
  lake_workspace = fabric.resolve_workspace_name()
491
507
  lakehouse = resolve_lakehouse_name(
@@ -499,7 +515,7 @@ def vertipaq_analyzer(
499
515
  if len(lakeT_filt) == 0:
500
516
  runId = 1
501
517
  else:
502
- max_run_id = get_max_run_id(table_name=lakeTName)
518
+ max_run_id = _get_max_run_id(lakehouse=lakehouse, table_name=lakeTName)
503
519
  runId = max_run_id + 1
504
520
 
505
521
  dfMap = {
@@ -560,7 +576,7 @@ def vertipaq_analyzer(
560
576
 
561
577
  schema.update(
562
578
  {
563
- key.replace(" ", "_"): value
579
+ key.replace(" ", "_"): value[0]
564
580
  for key, value in vertipaq_map[key_name].items()
565
581
  }
566
582
  )
@@ -739,7 +755,11 @@ def visualize_vertipaq(dataframes):
739
755
  "ColumnName": "Column Name",
740
756
  "Tooltip": "The name of the column",
741
757
  },
742
- {"ViewName": "Column", "ColumnName": "Type", "Tooltip": "The type of column"},
758
+ {
759
+ "ViewName": "Column",
760
+ "ColumnName": "Type",
761
+ "Tooltip": "The type of column",
762
+ },
743
763
  {
744
764
  "ViewName": "Column",
745
765
  "ColumnName": "Cardinality",
sempy_labs/_warehouses.py CHANGED
@@ -11,7 +11,10 @@ from sempy.fabric.exceptions import FabricHTTPException
11
11
 
12
12
 
13
13
  def create_warehouse(
14
- warehouse: str, description: Optional[str] = None, workspace: Optional[str] = None
14
+ warehouse: str,
15
+ description: Optional[str] = None,
16
+ case_insensitive_collation: bool = False,
17
+ workspace: Optional[str] = None,
15
18
  ):
16
19
  """
17
20
  Creates a Fabric warehouse.
@@ -22,6 +25,8 @@ def create_warehouse(
22
25
  Name of the warehouse.
23
26
  description : str, default=None
24
27
  A description of the warehouse.
28
+ case_insensitive_collation: bool, default=False
29
+ If True, creates the warehouse with case-insensitive collation.
25
30
  workspace : str, default=None
26
31
  The Fabric workspace name.
27
32
  Defaults to None which resolves to the workspace of the attached lakehouse
@@ -34,6 +39,11 @@ def create_warehouse(
34
39
 
35
40
  if description:
36
41
  request_body["description"] = description
42
+ if case_insensitive_collation:
43
+ request_body.setdefault("creationPayload", {})
44
+ request_body["creationPayload"][
45
+ "defaultCollation"
46
+ ] = "Latin1_General_100_CI_AS_KS_WS_SC_UTF8"
37
47
 
38
48
  client = fabric.FabricRestClient()
39
49
  response = client.post(