PyPI - semantic-link-labs - Versions diffs - 0.4.1__py3-none-any.whl - Mend

semantic-link-labs 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of semantic-link-labs might be problematic. Click here for more details.

Files changed (52) hide show

semantic_link_labs-0.4.1.dist-info/LICENSE +21 -0
semantic_link_labs-0.4.1.dist-info/METADATA +22 -0
semantic_link_labs-0.4.1.dist-info/RECORD +52 -0
semantic_link_labs-0.4.1.dist-info/WHEEL +5 -0
semantic_link_labs-0.4.1.dist-info/top_level.txt +1 -0
sempy_labs/__init__.py +154 -0
sempy_labs/_ai.py +496 -0
sempy_labs/_clear_cache.py +39 -0
sempy_labs/_connections.py +234 -0
sempy_labs/_dax.py +70 -0
sempy_labs/_generate_semantic_model.py +280 -0
sempy_labs/_helper_functions.py +506 -0
sempy_labs/_icons.py +4 -0
sempy_labs/_list_functions.py +1372 -0
sempy_labs/_model_auto_build.py +143 -0
sempy_labs/_model_bpa.py +1354 -0
sempy_labs/_model_dependencies.py +341 -0
sempy_labs/_one_lake_integration.py +155 -0
sempy_labs/_query_scale_out.py +447 -0
sempy_labs/_refresh_semantic_model.py +184 -0
sempy_labs/_tom.py +3766 -0
sempy_labs/_translations.py +378 -0
sempy_labs/_vertipaq.py +893 -0
sempy_labs/directlake/__init__.py +45 -0
sempy_labs/directlake/_directlake_schema_compare.py +110 -0
sempy_labs/directlake/_directlake_schema_sync.py +128 -0
sempy_labs/directlake/_fallback.py +62 -0
sempy_labs/directlake/_get_directlake_lakehouse.py +69 -0
sempy_labs/directlake/_get_shared_expression.py +59 -0
sempy_labs/directlake/_guardrails.py +84 -0
sempy_labs/directlake/_list_directlake_model_calc_tables.py +54 -0
sempy_labs/directlake/_show_unsupported_directlake_objects.py +89 -0
sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py +81 -0
sempy_labs/directlake/_update_directlake_partition_entity.py +64 -0
sempy_labs/directlake/_warm_cache.py +210 -0
sempy_labs/lakehouse/__init__.py +24 -0
sempy_labs/lakehouse/_get_lakehouse_columns.py +81 -0
sempy_labs/lakehouse/_get_lakehouse_tables.py +250 -0
sempy_labs/lakehouse/_lakehouse.py +85 -0
sempy_labs/lakehouse/_shortcuts.py +296 -0
sempy_labs/migration/__init__.py +29 -0
sempy_labs/migration/_create_pqt_file.py +239 -0
sempy_labs/migration/_migrate_calctables_to_lakehouse.py +429 -0
sempy_labs/migration/_migrate_calctables_to_semantic_model.py +150 -0
sempy_labs/migration/_migrate_model_objects_to_semantic_model.py +524 -0
sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py +165 -0
sempy_labs/migration/_migration_validation.py +227 -0
sempy_labs/migration/_refresh_calc_tables.py +129 -0
sempy_labs/report/__init__.py +35 -0
sempy_labs/report/_generate_report.py +253 -0
sempy_labs/report/_report_functions.py +855 -0
sempy_labs/report/_report_rebind.py +131 -0

sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py ADDED Viewed

@@ -0,0 +1,165 @@
+import sempy
+import sempy.fabric as fabric
+import pandas as pd
+import datetime, time
+from sempy_labs._list_functions import list_tables
+from sempy_labs.directlake._get_shared_expression import get_shared_expression
+from sempy_labs._helper_functions import resolve_lakehouse_name
+from sempy_labs.lakehouse._lakehouse import lakehouse_attached
+from sempy_labs._tom import connect_semantic_model
+from typing import List, Optional, Union
+from sempy._utils._log import log
+import sempy_labs._icons as icons
+@log
+def migrate_tables_columns_to_semantic_model(
+    dataset: str,
+    new_dataset: str,
+    workspace: Optional[str] = None,
+    new_dataset_workspace: Optional[str] = None,
+    lakehouse: Optional[str] = None,
+    lakehouse_workspace: Optional[str] = None,
+):
+    """
+    Adds tables/columns to the new Direct Lake semantic model based on an import/DirectQuery semantic model.
+    Parameters
+    ----------
+    dataset : str
+        Name of the import/DirectQuery semantic model.
+    new_dataset : str
+        Name of the Direct Lake semantic model.
+    workspace : str, default=None
+        The Fabric workspace name in which the import/DirectQuery semantic model exists.
+        Defaults to None which resolves to the workspace of the attached lakehouse
+        or if no lakehouse attached, resolves to the workspace of the notebook.
+    new_dataset_workspace : str
+        The Fabric workspace name in which the Direct Lake semantic model will be created.
+        Defaults to None which resolves to the workspace of the attached lakehouse
+        or if no lakehouse attached, resolves to the workspace of the notebook.
+    lakehouse : str, default=None
+        The Fabric lakehouse used by the Direct Lake semantic model.
+        Defaults to None which resolves to the lakehouse attached to the notebook.
+    lakehouse_workspace : str, default=None
+        The Fabric workspace used by the lakehouse.
+        Defaults to None which resolves to the workspace of the attached lakehouse
+        or if no lakehouse attached, resolves to the workspace of the notebook.
+    """
+    workspace = fabric.resolve_workspace_name(workspace)
+    if new_dataset_workspace == None:
+        new_dataset_workspace = workspace
+    if lakehouse_workspace == None:
+        lakehouse_workspace = new_dataset_workspace
+    if lakehouse == None:
+        lakehouse_id = fabric.get_lakehouse_id()
+        lakehouse = resolve_lakehouse_name(lakehouse_id, lakehouse_workspace)
+    # Check that lakehouse is attached to the notebook
+    lakeAttach = lakehouse_attached()
+    # Run if lakehouse is attached to the notebook or a lakehouse & lakehouse workspace are specified
+    if lakeAttach or (lakehouse is not None and lakehouse_workspace is not None):
+        shEx = get_shared_expression(lakehouse, lakehouse_workspace)
+        dfC = fabric.list_columns(dataset=dataset, workspace=workspace)
+        dfT = list_tables(dataset, workspace)
+        dfT.rename(columns={"Type": "Table Type"}, inplace=True)
+        dfC = pd.merge(
+            dfC,
+            dfT[["Name", "Table Type"]],
+            left_on="Table Name",
+            right_on="Name",
+            how="left",
+        )
+        dfT_filt = dfT[dfT["Table Type"] == "Table"]
+        dfC_filt = dfC[
+            (dfC["Table Type"] == "Table")
+            & ~(dfC["Column Name"].str.startswith("RowNumber-"))
+            & (dfC["Type"] != "Calculated")
+        ]
+        print(f"{icons.in_progress} Updating '{new_dataset}' based on '{dataset}'...")
+        start_time = datetime.datetime.now()
+        timeout = datetime.timedelta(minutes=1)
+        success = False
+        while not success:
+            try:
+                with connect_semantic_model(
+                    dataset=new_dataset, readonly=False, workspace=new_dataset_workspace
+                ) as tom:
+                    success = True
+                    try:
+                        tom.model.Expressions["DatabaseQuery"]
+                    except:
+                        tom.add_expression("DatabaseQuery", expression=shEx)
+                        print(
+                            f"{icons.green_dot} The 'DatabaseQuery' expression has been added."
+                        )
+                    for i, r in dfT_filt.iterrows():
+                        tName = r["Name"]
+                        tDC = r["Data Category"]
+                        tHid = bool(r["Hidden"])
+                        tDesc = r["Description"]
+                        try:
+                            tom.model.Tables[tName]
+                        except:
+                            tom.add_table(
+                                name=tName,
+                                description=tDesc,
+                                data_category=tDC,
+                                hidden=tHid,
+                            )
+                            tom.add_entity_partition(
+                                table_name=tName, entity_name=tName.replace(" ", "_")
+                            )
+                            print(
+                                f"{icons.green_dot} The '{tName}' table has been added."
+                            )
+                    for i, r in dfC_filt.iterrows():
+                        tName = r["Table Name"]
+                        cName = r["Column Name"]
+                        scName = r["Source"].replace(" ", "_")
+                        cHid = bool(r["Hidden"])
+                        cDataType = r["Data Type"]
+                        try:
+                            tom.model.Tables[tName].Columns[cName]
+                        except:
+                            tom.add_data_column(
+                                table_name=tName,
+                                column_name=cName,
+                                source_column=scName,
+                                hidden=cHid,
+                                data_type=cDataType,
+                            )
+                            print(
+                                f"{icons.green_dot} The '{tName}'[{cName}] column has been added."
+                            )
+                    print(
+                        f"\n{icons.green_dot} All regular tables and columns have been added to the '{new_dataset}' semantic model."
+                    )
+            except Exception as e:
+                if datetime.datetime.now() - start_time > timeout:
+                    break
+                time.sleep(1)
+    else:
+        print(
+            f"{icons.red_dot} Lakehouse not attached to notebook and lakehouse/lakehouse_workspace are not specified. Please add your lakehouse to this notebook or specify the lakehouse/lakehouse_workspace parameters."
+        )
+        print(
+            f"To attach a lakehouse to a notebook, go to the the 'Explorer' window to the left, click 'Lakehouses' to add your lakehouse to this notebook"
+        )
+        print(
+            f"\nLearn more here: https://learn.microsoft.com/fabric/data-engineering/lakehouse-notebook-explore#add-or-remove-a-lakehouse"
+        )

sempy_labs/migration/_migration_validation.py ADDED Viewed

@@ -0,0 +1,227 @@
+import sempy
+import sempy.fabric as fabric
+import pandas as pd
+from sempy_labs._helper_functions import create_relationship_name
+from sempy_labs._tom import connect_semantic_model
+from typing import List, Optional, Union
+from sempy._utils._log import log
+def list_semantic_model_objects(dataset: str, workspace: Optional[str] = None):
+    """
+    Shows a list of semantic model objects.
+    Parameters
+    ----------
+    dataset : str
+        Name of the semantic model.
+    workspace : str, default=None
+        The Fabric workspace name.
+        Defaults to None which resolves to the workspace of the attached lakehouse
+        or if no lakehouse attached, resolves to the workspace of the notebook.
+    Returns
+    -------
+    pandas.DataFrame
+        A pandas dataframe showing a list of objects in the semantic model
+    """
+    df = pd.DataFrame(columns=["Parent Name", "Object Name", "Object Type"])
+    with connect_semantic_model(
+        dataset=dataset, workspace=workspace, readonly=True
+    ) as tom:
+        for t in tom.model.Tables:
+            if t.CalculationGroup is not None:
+                new_data = {
+                    "Parent Name": t.Parent.Name,
+                    "Object Name": t.Name,
+                    "Object Type": "Calculation Group",
+                }
+                df = pd.concat(
+                    [df, pd.DataFrame(new_data, index=[0])], ignore_index=True
+                )
+                for ci in t.CalculationGroup.CalculationItems:
+                    new_data = {
+                        "Parent Name": t.Name,
+                        "Object Name": ci.Name,
+                        "Object Type": str(ci.ObjectType),
+                    }
+                    df = pd.concat(
+                        [df, pd.DataFrame(new_data, index=[0])], ignore_index=True
+                    )
+            elif any(str(p.SourceType) == "Calculated" for p in t.Partitions):
+                new_data = {
+                    "Parent Name": t.Parent.Name,
+                    "Object Name": t.Name,
+                    "Object Type": "Calculated Table",
+                }
+                df = pd.concat(
+                    [df, pd.DataFrame(new_data, index=[0])], ignore_index=True
+                )
+            else:
+                new_data = {
+                    "Parent Name": t.Parent.Name,
+                    "Object Name": t.Name,
+                    "Object Type": str(t.ObjectType),
+                }
+                df = pd.concat(
+                    [df, pd.DataFrame(new_data, index=[0])], ignore_index=True
+                )
+            for c in t.Columns:
+                if str(c.Type) != "RowNumber":
+                    if str(c.Type) == "Calculated":
+                        new_data = {
+                            "Parent Name": c.Parent.Name,
+                            "Object Name": c.Name,
+                            "Object Type": "Calculated Column",
+                        }
+                        df = pd.concat(
+                            [df, pd.DataFrame(new_data, index=[0])], ignore_index=True
+                        )
+                    else:
+                        new_data = {
+                            "Parent Name": c.Parent.Name,
+                            "Object Name": c.Name,
+                            "Object Type": str(c.ObjectType),
+                        }
+                        df = pd.concat(
+                            [df, pd.DataFrame(new_data, index=[0])], ignore_index=True
+                        )
+            for m in t.Measures:
+                new_data = {
+                    "Parent Name": m.Parent.Name,
+                    "Object Name": m.Name,
+                    "Object Type": str(m.ObjectType),
+                }
+                df = pd.concat(
+                    [df, pd.DataFrame(new_data, index=[0])], ignore_index=True
+                )
+            for h in t.Hierarchies:
+                new_data = {
+                    "Parent Name": h.Parent.Name,
+                    "Object Name": h.Name,
+                    "Object Type": str(h.ObjectType),
+                }
+                df = pd.concat(
+                    [df, pd.DataFrame(new_data, index=[0])], ignore_index=True
+                )
+                for l in h.Levels:
+                    new_data = {
+                        "Parent Name": l.Parent.Name,
+                        "Object Name": l.Name,
+                        "Object Type": str(l.ObjectType),
+                    }
+                    df = pd.concat(
+                        [df, pd.DataFrame(new_data, index=[0])], ignore_index=True
+                    )
+            for p in t.Partitions:
+                new_data = {
+                    "Parent Name": p.Parent.Name,
+                    "Object Name": p.Name,
+                    "Object Type": str(p.ObjectType),
+                }
+                df = pd.concat(
+                    [df, pd.DataFrame(new_data, index=[0])], ignore_index=True
+                )
+        for r in tom.model.Relationships:
+            rName = create_relationship_name(
+                r.FromTable.Name, r.FromColumn.Name, r.ToTable.Name, r.ToColumn.Name
+            )
+            new_data = {
+                "Parent Name": r.Parent.Name,
+                "Object Name": rName,
+                "Object Type": str(r.ObjectType),
+            }
+            df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)
+        for role in tom.model.Roles:
+            new_data = {
+                "Parent Name": role.Parent.Name,
+                "Object Name": role.Name,
+                "Object Type": str(role.ObjectType),
+            }
+            df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)
+            for rls in role.TablePermissions:
+                new_data = {
+                    "Parent Name": role.Name,
+                    "Object Name": rls.Name,
+                    "Object Type": str(rls.ObjectType),
+                }
+                df = pd.concat(
+                    [df, pd.DataFrame(new_data, index=[0])], ignore_index=True
+                )
+        for tr in tom.model.Cultures:
+            new_data = {
+                "Parent Name": tr.Parent.Name,
+                "Object Name": tr.Name,
+                "Object Type": str(tr.ObjectType),
+            }
+            df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)
+        for per in tom.model.Perspectives:
+            new_data = {
+                "Parent Name": per.Parent.Name,
+                "Object Name": per.Name,
+                "Object Type": str(per.ObjectType),
+            }
+            df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)
+    return df
+@log
+def migration_validation(
+    dataset: str,
+    new_dataset: str,
+    workspace: Optional[str] = None,
+    new_dataset_workspace: Optional[str] = None,
+) -> pd.DataFrame:
+    """
+    Shows the objects in the original semantic model and whether then were migrated successfully or not.
+    Parameters
+    ----------
+    dataset : str
+        Name of the import/DirectQuery semantic model.
+    new_dataset : str
+        Name of the Direct Lake semantic model.
+    workspace : str, default=None
+        The Fabric workspace name in which the import/DirectQuery semantic model exists.
+        Defaults to None which resolves to the workspace of the attached lakehouse
+        or if no lakehouse attached, resolves to the workspace of the notebook.
+    new_dataset_workspace : str
+        The Fabric workspace name in which the Direct Lake semantic model will be created.
+        Defaults to None which resolves to the workspace of the attached lakehouse
+        or if no lakehouse attached, resolves to the workspace of the notebook.
+    Returns
+    -------
+    pandas.DataFrame
+       A pandas dataframe showing a list of objects and whether they were successfully migrated. Also shows the % of objects which were migrated successfully.
+    """
+    dfA = list_semantic_model_objects(dataset=dataset, workspace=workspace)
+    dfB = list_semantic_model_objects(
+        dataset=new_dataset, workspace=new_dataset_workspace
+    )
+    def is_migrated(row):
+        if row["Object Type"] == "Calculated Table":
+            return (
+                (dfB["Parent Name"] == row["Parent Name"])
+                & (dfB["Object Name"] == row["Object Name"])
+                & (dfB["Object Type"].isin(["Calculated Table", "Table"]))
+            ).any()
+        else:
+            return (
+                (dfB["Parent Name"] == row["Parent Name"])
+                & (dfB["Object Name"] == row["Object Name"])
+                & (dfB["Object Type"] == row["Object Type"])
+            ).any()
+    dfA["Migrated"] = dfA.apply(is_migrated, axis=1)
+    denom = len(dfA)
+    num = len(dfA[dfA["Migrated"]])
+    print(f"{100 * round(num / denom,2)}% migrated")
+    return dfA

sempy_labs/migration/_refresh_calc_tables.py ADDED Viewed

@@ -0,0 +1,129 @@
+import sempy
+import sempy.fabric as fabric
+import pandas as pd
+import re, datetime, time
+from pyspark.sql import SparkSession
+from sempy_labs._tom import connect_semantic_model
+from typing import List, Optional, Union
+from sempy._utils._log import log
+import sempy_labs._icons as icons
+@log
+def refresh_calc_tables(dataset: str, workspace: Optional[str] = None):
+    """
+    Recreates the delta tables in the lakehouse based on the DAX expressions stored as model annotations in the Direct Lake semantic model.
+    Parameters
+    ----------
+    dataset : str
+        Name of the semantic model.
+    workspace : str, default=None
+        The Fabric workspace name.
+        Defaults to None which resolves to the workspace of the attached lakehouse
+        or if no lakehouse attached, resolves to the workspace of the notebook.
+    """
+    spark = SparkSession.builder.getOrCreate()
+    start_time = datetime.datetime.now()
+    timeout = datetime.timedelta(minutes=1)
+    success = False
+    while not success:
+        try:
+            with connect_semantic_model(
+                dataset=dataset, readonly=True, workspace=workspace
+            ) as tom:
+                success = True
+                for a in tom.model.Annotations:
+                    if any(a.Name == t.Name for t in tom.model.Tables):
+                        tName = a.Name
+                        query = a.Value
+                        if not query.startswith("EVALUATE"):
+                            daxquery = "EVALUATE \n" + query
+                        else:
+                            daxquery = query
+                        try:
+                            df = fabric.evaluate_dax(
+                                dataset=dataset,
+                                dax_string=daxquery,
+                                workspace=workspace,
+                            )
+                            # Update column names for non-field parameters
+                            if query.find("NAMEOF") == -1:
+                                for old_column_name in df.columns:
+                                    pattern = r"\[([^\]]+)\]"
+                                    matches = re.findall(pattern, old_column_name)
+                                    new_column_name = matches[0]
+                                    new_column_name = new_column_name.replace(" ", "")
+                                    df.rename(
+                                        columns={old_column_name: new_column_name},
+                                        inplace=True,
+                                    )
+                                    # Update data types for lakehouse columns
+                                    dataType = next(
+                                        str(c.DataType)
+                                        for c in tom.all_columns()
+                                        if c.Parent.Name == tName
+                                        and c.SourceColumn == new_column_name
+                                    )
+                                    # dfC_type = dfC[(dfC['Table Name'] == tName) & (dfC['Source'] == new_column_name)]
+                                    # dataType = dfC_type['Data Type'].iloc[0]
+                                    if dataType == "Int64":
+                                        df[new_column_name] = df[
+                                            new_column_name
+                                        ].astype(int)
+                                    elif dataType in ["Decimal", "Double"]:
+                                        df[new_column_name] = df[
+                                            new_column_name
+                                        ].astype(float)
+                                    elif dataType == "Boolean":
+                                        df[new_column_name] = df[
+                                            new_column_name
+                                        ].astype(bool)
+                                    elif dataType == "DateTime":
+                                        df[new_column_name] = pd.to_datetime(
+                                            df[new_column_name]
+                                        )
+                                    else:
+                                        df[new_column_name] = df[
+                                            new_column_name
+                                        ].astype(str)
+                            # else:
+                            #    second_column_name = df.columns[1]
+                            #    third_column_name = df.columns[2]
+                            #    df[third_column_name] = df[third_column_name].astype(int)
+                            # Remove calc columns from field parameters
+                            #    mask = df[second_column_name].isin(dfC_filt['Full Column Name'])
+                            #    df = df[~mask]
+                            delta_table_name = tName.replace(" ", "_")
+                            print(
+                                f"{icons.in_progress} Refresh of the '{delta_table_name}' table within the lakehouse is in progress..."
+                            )
+                            spark_df = spark.createDataFrame(df)
+                            spark_df.write.mode("overwrite").format(
+                                "delta"
+                            ).saveAsTable(delta_table_name)
+                            print(
+                                f"{icons.green_dot} Calculated table '{tName}' has been refreshed as the '{delta_table_name.lower()}' table in the lakehouse."
+                            )
+                        except:
+                            print(
+                                f"{icons.red_dot} Failed to create calculated table '{tName}' as a delta table in the lakehouse."
+                            )
+        except Exception as e:
+            if datetime.datetime.now() - start_time > timeout:
+                break
+            time.sleep(1)

sempy_labs/report/__init__.py ADDED Viewed

@@ -0,0 +1,35 @@
+from sempy_labs.report._generate_report import (
+    create_report_from_reportjson,
+    update_report_from_reportjson,
+)
+from sempy_labs.report._report_functions import (
+    get_report_json,
+    # report_dependency_tree,
+    export_report,
+    clone_report,
+    launch_report,
+    # list_report_pages,
+    # list_report_visuals,
+    # list_report_bookmarks,
+    # translate_report_titles
+)
+from sempy_labs.report._report_rebind import (
+    report_rebind,
+    report_rebind_all,
+)
+__all__ = [
+    "create_report_from_reportjson",
+    "update_report_from_reportjson",
+    "get_report_json",
+    # report_dependency_tree,
+    "export_report",
+    "clone_report",
+    "launch_report",
+    # list_report_pages,
+    # list_report_visuals,
+    # list_report_bookmarks,
+    # translate_report_titles,
+    "report_rebind",
+    "report_rebind_all",
+]