PyPI - semantic-link-labs - Versions diffs - 0.8.1__py3-none-any.whl → 0.8.3__py3-none-any.whl - Mend

semantic-link-labs 0.8.1py3-none-any.whl → 0.8.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of semantic-link-labs might be problematic. Click here for more details.

Files changed (35) hide show

{semantic_link_labs-0.8.1.dist-info → semantic_link_labs-0.8.3.dist-info}/METADATA +8 -4
{semantic_link_labs-0.8.1.dist-info → semantic_link_labs-0.8.3.dist-info}/RECORD +35 -34
sempy_labs/__init__.py +14 -0
sempy_labs/_capacities.py +89 -11
sempy_labs/_capacity_migration.py +167 -60
sempy_labs/_clear_cache.py +3 -3
sempy_labs/_data_pipelines.py +48 -0
sempy_labs/_external_data_shares.py +188 -0
sempy_labs/_generate_semantic_model.py +0 -1
sempy_labs/_git.py +1 -1
sempy_labs/_helper_functions.py +20 -16
sempy_labs/_list_functions.py +6 -3
sempy_labs/_model_bpa.py +7 -5
sempy_labs/_model_bpa_bulk.py +3 -5
sempy_labs/_notebooks.py +4 -3
sempy_labs/_sql.py +2 -2
sempy_labs/_translations.py +14 -14
sempy_labs/_vertipaq.py +121 -101
sempy_labs/_warehouses.py +11 -1
sempy_labs/admin/__init__.py +2 -0
sempy_labs/admin/_basic_functions.py +124 -21
sempy_labs/directlake/_directlake_schema_sync.py +0 -5
sempy_labs/directlake/_generate_shared_expression.py +1 -1
sempy_labs/directlake/_guardrails.py +1 -1
sempy_labs/directlake/_show_unsupported_directlake_objects.py +1 -1
sempy_labs/migration/_create_pqt_file.py +2 -2
sempy_labs/report/_generate_report.py +10 -14
sempy_labs/report/_report_bpa.py +8 -10
sempy_labs/report/_report_functions.py +13 -19
sempy_labs/report/_report_rebind.py +4 -1
sempy_labs/report/_reportwrapper.py +3 -3
sempy_labs/tom/_model.py +109 -34
{semantic_link_labs-0.8.1.dist-info → semantic_link_labs-0.8.3.dist-info}/LICENSE +0 -0
{semantic_link_labs-0.8.1.dist-info → semantic_link_labs-0.8.3.dist-info}/WHEEL +0 -0
{semantic_link_labs-0.8.1.dist-info → semantic_link_labs-0.8.3.dist-info}/top_level.txt +0 -0

sempy_labs/_helper_functions.py CHANGED Viewed

@@ -13,7 +13,6 @@ from uuid import UUID
 import sempy_labs._icons as icons
 import urllib.parse
 from azure.core.credentials import TokenCredential, AccessToken
-import deltalake
 def create_abfss_path(
@@ -962,15 +961,15 @@ class FabricTokenCredential(TokenCredential):
         **kwargs: any,
     ) -> AccessToken:
-        from notebookutils import mssparkutils
+        import notebookutils
-        token = mssparkutils.credentials.getToken(scopes)
+        token = notebookutils.credentials.getToken(scopes)
         access_token = AccessToken(token, 0)
         return access_token
-def get_adls_client(account_name):
+def _get_adls_client(account_name):
     from azure.storage.filedatalake import DataLakeServiceClient
@@ -1018,19 +1017,22 @@ def get_language_codes(languages: str | List[str]):
     return languages
-def get_azure_token_credentials(
+def _get_azure_token_credentials(
     key_vault_uri: str,
     key_vault_tenant_id: str,
     key_vault_client_id: str,
     key_vault_client_secret: str,
+    audience: str = "https://management.azure.com/.default",
 ) -> Tuple[str, str, dict]:
-    from notebookutils import mssparkutils
+    import notebookutils
     from azure.identity import ClientSecretCredential
-    tenant_id = mssparkutils.credentials.getSecret(key_vault_uri, key_vault_tenant_id)
-    client_id = mssparkutils.credentials.getSecret(key_vault_uri, key_vault_client_id)
-    client_secret = mssparkutils.credentials.getSecret(
+    # "https://analysis.windows.net/powerbi/api/.default"
+    tenant_id = notebookutils.credentials.getSecret(key_vault_uri, key_vault_tenant_id)
+    client_id = notebookutils.credentials.getSecret(key_vault_uri, key_vault_client_id)
+    client_secret = notebookutils.credentials.getSecret(
         key_vault_uri, key_vault_client_secret
     )
@@ -1038,7 +1040,7 @@ def get_azure_token_credentials(
         tenant_id=tenant_id, client_id=client_id, client_secret=client_secret
     )
-    token = credential.get_token("https://management.azure.com/.default").token
+    token = credential.get_token(audience).token
     headers = {
         "Authorization": f"Bearer {token}",
@@ -1078,7 +1080,7 @@ def resolve_environment_id(environment: str, workspace: Optional[str] = None) ->
     )
-def make_clickable(val):
+def _make_clickable(val):
     return f'<a target="_blank" href="{val}">{val}</a>'
@@ -1133,11 +1135,13 @@ def generate_guid():
     return str(uuid.uuid4())
-def get_max_run_id(table_name: str) -> int:
+def _get_max_run_id(lakehouse: str, table_name: str) -> int:
+    from pyspark.sql import SparkSession
-    table_path = f"/lakehouse/default/Tables/{table_name}/"
-    delta_table = deltalake.DeltaTable(table_path)
-    data = delta_table.to_pandas()
-    max_run_id = data["RunId"].max()
+    spark = SparkSession.builder.getOrCreate()
+    query = f"SELECT MAX(RunId) FROM {lakehouse}.{table_name}"
+    dfSpark = spark.sql(query)
+    max_run_id = dfSpark.collect()[0][0]
     return max_run_id

sempy_labs/_list_functions.py CHANGED Viewed

@@ -1487,7 +1487,7 @@ def list_semantic_model_object_report_usage(
         is sorted descending by 'Report Usage Count'.
     """
-    from sempy_labs._model_dependencies import get_measure_dependencies
+    from sempy_labs._model_dependencies import get_model_calc_dependencies
     from sempy_labs._helper_functions import format_dax_object_name
     workspace = fabric.resolve_workspace_name(workspace)
@@ -1503,7 +1503,7 @@ def list_semantic_model_object_report_usage(
         )
     else:
         df = pd.DataFrame(columns=["Table Name", "Object Name", "Object Type"])
-        dep = get_measure_dependencies(dataset=dataset, workspace=workspace)
+        dep = get_model_calc_dependencies(dataset=dataset, workspace=workspace)
         for i, r in dfR.iterrows():
             object_type = r["Object Type"]
@@ -1515,7 +1515,10 @@ def list_semantic_model_object_report_usage(
                 "Object Type": object_type,
             }
             df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)
-            if object_type == "Measure":
+            df["Referenced Object Type"] = df["Referenced Object Type"].replace(
+                "Attribute Hierarchy", "Column"
+            )
+            if object_type in ["Measure", "Calc Column", "Calc Table", "Hierarchy"]:
                 df_filt = dep[dep["Object Name"] == object_name][
                     ["Referenced Table", "Referenced Object", "Referenced Object Type"]
                 ]

sempy_labs/_model_bpa.py CHANGED Viewed

@@ -12,7 +12,7 @@ from sempy_labs._helper_functions import (
     resolve_workspace_capacity,
     resolve_dataset_id,
     get_language_codes,
-    get_max_run_id,
+    _get_max_run_id,
 )
 from sempy_labs.lakehouse import get_lakehouse_tables, lakehouse_attached
 from sempy_labs.tom import connect_semantic_model
@@ -220,9 +220,9 @@ def run_model_bpa(
             rules = translate_using_spark(rules)
-        rules["Severity"].replace("Warning", icons.warning, inplace=True)
-        rules["Severity"].replace("Error", icons.error, inplace=True)
-        rules["Severity"].replace("Info", icons.info, inplace=True)
+        rules.loc[rules["Severity"] == "Warning", "Severity"] = icons.warning
+        rules.loc[rules["Severity"] == "Error", "Severity"] = icons.error
+        rules.loc[rules["Severity"] == "Info", "Severity"] = icons.info
         pd.set_option("display.max_colwidth", 1000)
@@ -350,7 +350,9 @@ def run_model_bpa(
         if len(lakeT_filt) == 0:
             runId = 1
         else:
-            max_run_id = get_max_run_id(table_name=delta_table_name)
+            max_run_id = _get_max_run_id(
+                lakehouse=lakehouse, table_name=delta_table_name
+            )
             runId = max_run_id + 1
         now = datetime.datetime.now()

sempy_labs/_model_bpa_bulk.py CHANGED Viewed

@@ -6,7 +6,7 @@ from sempy_labs._helper_functions import (
     save_as_delta_table,
     resolve_workspace_capacity,
     retry,
-    get_max_run_id,
+    _get_max_run_id,
 )
 from sempy_labs.lakehouse import (
     get_lakehouse_tables,
@@ -49,8 +49,6 @@ def run_model_bpa_bulk(
         The semantic models to always skip when running this analysis.
     """
-    import pyspark.sql.functions as F
     if not lakehouse_attached():
         raise ValueError(
             f"{icons.red_dot} No lakehouse is attached to this notebook. Must attach a lakehouse to the notebook."
@@ -92,7 +90,7 @@ def run_model_bpa_bulk(
     if len(lakeT_filt) == 0:
         runId = 1
     else:
-        max_run_id = get_max_run_id(table_name=output_table)
+        max_run_id = _get_max_run_id(lakehouse=lakehouse, table_name=output_table)
         runId = max_run_id + 1
     if isinstance(workspace, str):
@@ -162,7 +160,7 @@ def run_model_bpa_bulk(
                         )
                         print(e)
-                df["Severity"].replace(icons.severity_mapping, inplace=True)
+                df["Severity"].replace(icons.severity_mapping)
                 # Append save results individually for each workspace (so as not to create a giant dataframe)
                 print(

sempy_labs/_notebooks.py CHANGED Viewed

@@ -8,7 +8,6 @@ from sempy_labs._helper_functions import (
     resolve_workspace_name_and_id,
     lro,
     _decode_b64,
-    resolve_notebook_id,
 )
 from sempy.fabric.exceptions import FabricHTTPException
@@ -38,10 +37,12 @@ def get_notebook_definition(
     """
     (workspace, workspace_id) = resolve_workspace_name_and_id(workspace)
-    notebook_id = resolve_notebook_id(notebook=notebook_name, workspace=workspace)
+    item_id = fabric.resolve_item_id(
+        item_name=notebook_name, type="Notebook", workspace=workspace
+    )
     client = fabric.FabricRestClient()
     response = client.post(
-        f"v1/workspaces/{workspace_id}/notebooks/{notebook_id}/getDefinition",
+        f"v1/workspaces/{workspace_id}/notebooks/{item_id}/getDefinition",
     )
     result = lro(client, response).json()

sempy_labs/_sql.py CHANGED Viewed

@@ -9,7 +9,7 @@ from sempy.fabric.exceptions import FabricHTTPException
 from sempy_labs._helper_functions import resolve_warehouse_id, resolve_lakehouse_id
-def bytes2mswin_bstr(value: bytes) -> bytes:
+def _bytes2mswin_bstr(value: bytes) -> bytes:
     """Convert a sequence of bytes into a (MS-Windows) BSTR (as bytes).
     See https://github.com/mkleehammer/pyodbc/issues/228#issuecomment-319190980
@@ -68,7 +68,7 @@ class ConnectBase:
         # Set up the connection string
         access_token = SynapseTokenProvider()()
-        tokenstruct = bytes2mswin_bstr(access_token.encode())
+        tokenstruct = _bytes2mswin_bstr(access_token.encode())
         conn_str = f"DRIVER={{ODBC Driver 18 for SQL Server}};SERVER={tds_endpoint};DATABASE={name};Encrypt=Yes;"
         if timeout is not None:

sempy_labs/_translations.py CHANGED Viewed

@@ -40,7 +40,7 @@ def translate_semantic_model(
     from pyspark.sql import SparkSession
     from sempy_labs.tom import connect_semantic_model
-    def clean_text(text, exclude_chars):
+    def _clean_text(text, exclude_chars):
         if exclude_chars:
             for char in exclude_chars:
                 text = text.replace(char, " ")
@@ -60,8 +60,8 @@ def translate_semantic_model(
     ) as tom:
         for o in tom.model.Tables:
-            oName = clean_text(o.Name, exclude_characters)
-            oDescription = clean_text(o.Description, exclude_characters)
+            oName = _clean_text(o.Name, exclude_characters)
+            oDescription = _clean_text(o.Description, exclude_characters)
             new_data = {
                 "Object Type": "Table",
                 "Name": o.Name,
@@ -75,9 +75,9 @@ def translate_semantic_model(
                 [df_prep, pd.DataFrame(new_data, index=[0])], ignore_index=True
             )
         for o in tom.all_columns():
-            oName = clean_text(o.Name, exclude_characters)
-            oDescription = clean_text(o.Description, exclude_characters)
-            oDisplayFolder = clean_text(o.DisplayFolder, exclude_characters)
+            oName = _clean_text(o.Name, exclude_characters)
+            oDescription = _clean_text(o.Description, exclude_characters)
+            oDisplayFolder = _clean_text(o.DisplayFolder, exclude_characters)
             new_data = {
                 "Object Type": "Column",
                 "Name": o.Name,
@@ -91,9 +91,9 @@ def translate_semantic_model(
                 [df_prep, pd.DataFrame(new_data, index=[0])], ignore_index=True
             )
         for o in tom.all_measures():
-            oName = clean_text(o.Name, exclude_characters)
-            oDescription = clean_text(o.Description, exclude_characters)
-            oDisplayFolder = clean_text(o.DisplayFolder, exclude_characters)
+            oName = _clean_text(o.Name, exclude_characters)
+            oDescription = _clean_text(o.Description, exclude_characters)
+            oDisplayFolder = _clean_text(o.DisplayFolder, exclude_characters)
             new_data = {
                 "Object Type": "Measure",
                 "Name": o.Name,
@@ -107,9 +107,9 @@ def translate_semantic_model(
                 [df_prep, pd.DataFrame(new_data, index=[0])], ignore_index=True
             )
         for o in tom.all_hierarchies():
-            oName = clean_text(o.Name, exclude_characters)
-            oDescription = clean_text(o.Description, exclude_characters)
-            oDisplayFolder = clean_text(o.DisplayFolder, exclude_characters)
+            oName = _clean_text(o.Name, exclude_characters)
+            oDescription = _clean_text(o.Description, exclude_characters)
+            oDisplayFolder = _clean_text(o.DisplayFolder, exclude_characters)
             new_data = {
                 "Object Type": "Hierarchy",
                 "Name": o.Name,
@@ -123,8 +123,8 @@ def translate_semantic_model(
                 [df_prep, pd.DataFrame(new_data, index=[0])], ignore_index=True
             )
         for o in tom.all_levels():
-            oName = clean_text(o.Name, exclude_characters)
-            oDescription = clean_text(o.Description, exclude_characters)
+            oName = _clean_text(o.Name, exclude_characters)
+            oDescription = _clean_text(o.Description, exclude_characters)
             new_data = {
                 "Object Type": "Level",
                 "Name": o.Name,

sempy_labs/_vertipaq.py CHANGED Viewed

@@ -13,7 +13,7 @@ from sempy_labs._helper_functions import (
     resolve_dataset_id,
     save_as_delta_table,
     resolve_workspace_capacity,
-    get_max_run_id,
+    _get_max_run_id,
 )
 from sempy_labs._list_functions import list_relationships, list_tables
 from sempy_labs.lakehouse import lakehouse_attached, get_lakehouse_tables
@@ -74,68 +74,71 @@ def vertipaq_analyzer(
     data_type_timestamp = "timestamp"
     data_type_double = "double"
     data_type_bool = "bool"
+    int_format = "int"
+    pct_format = "pct"
+    no_format = ""
     vertipaq_map = {
         "Model": {
-            "Dataset Name": data_type_string,
-            "Total Size": data_type_long,
-            "Table Count": data_type_long,
-            "Column Count": data_type_long,
-            "Compatibility Level": data_type_long,
-            "Default Mode": data_type_string,
+            "Dataset Name": [data_type_string, no_format],
+            "Total Size": [data_type_long, int_format],
+            "Table Count": [data_type_long, int_format],
+            "Column Count": [data_type_long, int_format],
+            "Compatibility Level": [data_type_long, no_format],
+            "Default Mode": [data_type_string, no_format],
         },
         "Tables": {
-            "Table Name": data_type_string,
-            "Type": data_type_string,
-            "Row Count": data_type_long,
-            "Total Size": data_type_long,
-            "Dictionary Size": data_type_long,
-            "Data Size": data_type_long,
-            "Hierarchy Size": data_type_long,
-            "Relationship Size": data_type_long,
-            "User Hierarchy Size": data_type_long,
-            "Partitions": data_type_long,
-            "Columns": data_type_long,
-            "% DB": data_type_double,
+            "Table Name": [data_type_string, no_format],
+            "Type": [data_type_string, no_format],
+            "Row Count": [data_type_long, int_format],
+            "Total Size": [data_type_long, int_format],
+            "Dictionary Size": [data_type_long, int_format],
+            "Data Size": [data_type_long, int_format],
+            "Hierarchy Size": [data_type_long, int_format],
+            "Relationship Size": [data_type_long, int_format],
+            "User Hierarchy Size": [data_type_long, int_format],
+            "Partitions": [data_type_long, int_format],
+            "Columns": [data_type_long, int_format],
+            "% DB": [data_type_double, pct_format],
         },
         "Partitions": {
-            "Table Name": data_type_string,
-            "Partition Name": data_type_string,
-            "Mode": data_type_string,
-            "Record Count": data_type_long,
-            "Segment Count": data_type_long,
-            "Records per Segment": data_type_double,
+            "Table Name": [data_type_string, no_format],
+            "Partition Name": [data_type_string, no_format],
+            "Mode": [data_type_string, no_format],
+            "Record Count": [data_type_long, int_format],
+            "Segment Count": [data_type_long, int_format],
+            "Records per Segment": [data_type_double, int_format],
         },
         "Columns": {
-            "Table Name": data_type_string,
-            "Column Name": data_type_string,
-            "Type": data_type_string,
-            "Cardinality": data_type_long,
-            "Total Size": data_type_long,
-            "Data Size": data_type_long,
-            "Dictionary Size": data_type_long,
-            "Hierarchy Size": data_type_long,
-            "% Table": data_type_double,
-            "% DB": data_type_double,
-            "Data Type": data_type_string,
-            "Encoding": data_type_string,
-            "Is Resident": data_type_bool,
-            "Temperature": data_type_double,
-            "Last Accessed": data_type_timestamp,
+            "Table Name": [data_type_string, no_format],
+            "Column Name": [data_type_string, no_format],
+            "Type": [data_type_string, no_format],
+            "Cardinality": [data_type_long, int_format],
+            "Total Size": [data_type_long, int_format],
+            "Data Size": [data_type_long, int_format],
+            "Dictionary Size": [data_type_long, int_format],
+            "Hierarchy Size": [data_type_long, int_format],
+            "% Table": [data_type_double, pct_format],
+            "% DB": [data_type_double, pct_format],
+            "Data Type": [data_type_string, no_format],
+            "Encoding": [data_type_string, no_format],
+            "Is Resident": [data_type_bool, no_format],
+            "Temperature": [data_type_double, int_format],
+            "Last Accessed": [data_type_timestamp, no_format],
         },
         "Hierarchies": {
-            "Table Name": data_type_string,
-            "Hierarchy Name": data_type_string,
-            "Used Size": data_type_long,
+            "Table Name": [data_type_string, no_format],
+            "Hierarchy Name": [data_type_string, no_format],
+            "Used Size": [data_type_long, int_format],
         },
         "Relationships": {
-            "From Object": data_type_string,
-            "To Object": data_type_string,
-            "Multiplicity": data_type_string,
-            "Used Size": data_type_long,
-            "Max From Cardinality": data_type_long,
-            "Max To Cardinality": data_type_long,
-            "Missing Rows": data_type_long,
+            "From Object": [data_type_string, no_format],
+            "To Object": [data_type_string, no_format],
+            "Multiplicity": [data_type_string, no_format],
+            "Used Size": [data_type_long, int_format],
+            "Max From Cardinality": [data_type_long, int_format],
+            "Max To Cardinality": [data_type_long, int_format],
+            "Missing Rows": [data_type_long, int_format],
         },
     }
@@ -163,7 +166,8 @@ def vertipaq_analyzer(
         table_count = tom.model.Tables.Count
         column_count = len(list(tom.all_columns()))
-    dfR["Missing Rows"] = None
+    dfR["Missing Rows"] = 0
+    dfR["Missing Rows"] = dfR["Missing Rows"].astype(int)
     # Direct Lake
     if read_stats_from_data:
@@ -323,38 +327,16 @@ def vertipaq_analyzer(
     dfC["% DB"] = round((dfC["Total Size"] / db_total_size) * 100, 2)
     columnList = list(vertipaq_map["Columns"].keys())
+    dfC = dfC[dfC["Type"] != "RowNumber"].reset_index(drop=True)
     colSize = dfC[columnList].sort_values(by="Total Size", ascending=False)
     temp = dfC[columnList].sort_values(by="Temperature", ascending=False)
     colSize.reset_index(drop=True, inplace=True)
     temp.reset_index(drop=True, inplace=True)
     export_Col = colSize.copy()
-    int_cols = []
-    pct_cols = []
-    for k, v in vertipaq_map["Columns"].items():
-        if v in ["int", "long"]:
-            int_cols.append(k)
-        elif v in ["float", "double"] and k != "Temperature":
-            pct_cols.append(k)
-    colSize[int_cols] = colSize[int_cols].map("{:,}".format)
-    temp[int_cols] = temp[int_cols].map("{:,}".format)
-    colSize[pct_cols] = colSize[pct_cols].map("{:.2f}%".format)
-    temp[pct_cols] = temp[pct_cols].map("{:.2f}%".format)
-    # Tables
-    int_cols = []
-    pct_cols = []
-    for k, v in vertipaq_map["Tables"].items():
-        if v in ["int", "long"]:
-            int_cols.append(k)
-        elif v in ["float", "double"]:
-            pct_cols.append(k)
     export_Table = dfT.copy()
-    dfT[int_cols] = dfT[int_cols].map("{:,}".format)
-    dfT[pct_cols] = dfT[pct_cols].map("{:.2f}%".format)
     #  Relationships
     dfR = pd.merge(
         dfR,
@@ -386,14 +368,6 @@ def vertipaq_analyzer(
     dfR.reset_index(drop=True, inplace=True)
     export_Rel = dfR.copy()
-    int_cols = []
-    for k, v in vertipaq_map["Relationships"].items():
-        if v in ["int", "long"]:
-            int_cols.append(k)
-    if not read_stats_from_data:
-        int_cols.remove("Missing Rows")
-    dfR[int_cols] = dfR[int_cols].map("{:,}".format)
     # Partitions
     dfP = dfP[
         [
@@ -410,12 +384,6 @@ def vertipaq_analyzer(
     )  # Remove after records per segment is fixed
     dfP.reset_index(drop=True, inplace=True)
     export_Part = dfP.copy()
-    int_cols = []
-    for k, v in vertipaq_map["Partitions"].items():
-        if v in ["int", "long", "double", "float"]:
-            int_cols.append(k)
-    intList = ["Record Count", "Segment Count", "Records per Segment"]
-    dfP[intList] = dfP[intList].map("{:,}".format)
     # Hierarchies
     dfH_filt = dfH[dfH["Level Ordinal"] == 0]
@@ -426,8 +394,6 @@ def vertipaq_analyzer(
     dfH_filt.fillna({"Used Size": 0}, inplace=True)
     dfH_filt["Used Size"] = dfH_filt["Used Size"].astype(int)
     export_Hier = dfH_filt.copy()
-    intList = ["Used Size"]
-    dfH_filt[intList] = dfH_filt[intList].map("{:,}".format)
     # Model
     # Converting to KB/MB/GB necessitates division by 1024 * 1000.
@@ -453,11 +419,63 @@ def vertipaq_analyzer(
     dfModel.reset_index(drop=True, inplace=True)
     dfModel["Default Mode"] = dfModel["Default Mode"].astype(str)
     export_Model = dfModel.copy()
-    int_cols = []
-    for k, v in vertipaq_map["Model"].items():
-        if v in ["long", "int"] and k != "Compatibility Level":
-            int_cols.append(k)
-    dfModel[int_cols] = dfModel[int_cols].map("{:,}".format)
+    def _style_columns_based_on_types(dataframe: pd.DataFrame, column_type_mapping):
+        format_mapping = {
+            "int": "{:,}",
+            "pct": "{:.2f}%",
+            "": "{}",
+        }
+        format_dict = {
+            col: format_mapping[dt] for col, dt in column_type_mapping.items()
+        }
+        return dataframe.style.format(format_dict)
+    dfModel = _style_columns_based_on_types(
+        dfModel,
+        column_type_mapping={
+            key: values[1] for key, values in vertipaq_map["Model"].items()
+        },
+    )
+    dfT = _style_columns_based_on_types(
+        dfT,
+        column_type_mapping={
+            key: values[1] for key, values in vertipaq_map["Tables"].items()
+        },
+    )
+    dfP = _style_columns_based_on_types(
+        dfP,
+        column_type_mapping={
+            key: values[1] for key, values in vertipaq_map["Partitions"].items()
+        },
+    )
+    colSize = _style_columns_based_on_types(
+        colSize,
+        column_type_mapping={
+            key: values[1] for key, values in vertipaq_map["Columns"].items()
+        },
+    )
+    temp = _style_columns_based_on_types(
+        temp,
+        column_type_mapping={
+            key: values[1] for key, values in vertipaq_map["Columns"].items()
+        },
+    )
+    dfR = _style_columns_based_on_types(
+        dfR,
+        column_type_mapping={
+            key: values[1] for key, values in vertipaq_map["Relationships"].items()
+        },
+    )
+    dfH_filt = _style_columns_based_on_types(
+        dfH_filt,
+        column_type_mapping={
+            key: values[1] for key, values in vertipaq_map["Hierarchies"].items()
+        },
+    )
     dataFrames = {
         "dfModel": dfModel,
@@ -484,8 +502,6 @@ def vertipaq_analyzer(
             )
     if export == "table":
-        # spark = SparkSession.builder.getOrCreate()
         lakehouse_id = fabric.get_lakehouse_id()
         lake_workspace = fabric.resolve_workspace_name()
         lakehouse = resolve_lakehouse_name(
@@ -499,7 +515,7 @@ def vertipaq_analyzer(
         if len(lakeT_filt) == 0:
             runId = 1
         else:
-            max_run_id = get_max_run_id(table_name=lakeTName)
+            max_run_id = _get_max_run_id(lakehouse=lakehouse, table_name=lakeTName)
             runId = max_run_id + 1
         dfMap = {
@@ -560,7 +576,7 @@ def vertipaq_analyzer(
             schema.update(
                 {
-                    key.replace(" ", "_"): value
+                    key.replace(" ", "_"): value[0]
                     for key, value in vertipaq_map[key_name].items()
                 }
             )
@@ -739,7 +755,11 @@ def visualize_vertipaq(dataframes):
             "ColumnName": "Column Name",
             "Tooltip": "The name of the column",
         },
-        {"ViewName": "Column", "ColumnName": "Type", "Tooltip": "The type of column"},
+        {
+            "ViewName": "Column",
+            "ColumnName": "Type",
+            "Tooltip": "The type of column",
+        },
         {
             "ViewName": "Column",
             "ColumnName": "Cardinality",

sempy_labs/_warehouses.py CHANGED Viewed

@@ -11,7 +11,10 @@ from sempy.fabric.exceptions import FabricHTTPException
 def create_warehouse(
-    warehouse: str, description: Optional[str] = None, workspace: Optional[str] = None
+    warehouse: str,
+    description: Optional[str] = None,
+    case_insensitive_collation: bool = False,
+    workspace: Optional[str] = None,
 ):
     """
     Creates a Fabric warehouse.
@@ -22,6 +25,8 @@ def create_warehouse(
         Name of the warehouse.
     description : str, default=None
         A description of the warehouse.
+    case_insensitive_collation: bool, default=False
+        If True, creates the warehouse with case-insensitive collation.
     workspace : str, default=None
         The Fabric workspace name.
         Defaults to None which resolves to the workspace of the attached lakehouse
@@ -34,6 +39,11 @@ def create_warehouse(
     if description:
         request_body["description"] = description
+    if case_insensitive_collation:
+        request_body.setdefault("creationPayload", {})
+        request_body["creationPayload"][
+            "defaultCollation"
+        ] = "Latin1_General_100_CI_AS_KS_WS_SC_UTF8"
     client = fabric.FabricRestClient()
     response = client.post(

semantic-link-labs 0.8.1__py3-none-any.whl → 0.8.3__py3-none-any.whl

Potentially problematic release.

semantic-link-labs 0.8.1py3-none-any.whl → 0.8.3py3-none-any.whl