PyPI - semantic-link-labs - Versions diffs - 0.4.1__py3-none-any.whl - Mend

semantic-link-labs 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of semantic-link-labs might be problematic. Click here for more details.

Files changed (52) hide show

semantic_link_labs-0.4.1.dist-info/LICENSE +21 -0
semantic_link_labs-0.4.1.dist-info/METADATA +22 -0
semantic_link_labs-0.4.1.dist-info/RECORD +52 -0
semantic_link_labs-0.4.1.dist-info/WHEEL +5 -0
semantic_link_labs-0.4.1.dist-info/top_level.txt +1 -0
sempy_labs/__init__.py +154 -0
sempy_labs/_ai.py +496 -0
sempy_labs/_clear_cache.py +39 -0
sempy_labs/_connections.py +234 -0
sempy_labs/_dax.py +70 -0
sempy_labs/_generate_semantic_model.py +280 -0
sempy_labs/_helper_functions.py +506 -0
sempy_labs/_icons.py +4 -0
sempy_labs/_list_functions.py +1372 -0
sempy_labs/_model_auto_build.py +143 -0
sempy_labs/_model_bpa.py +1354 -0
sempy_labs/_model_dependencies.py +341 -0
sempy_labs/_one_lake_integration.py +155 -0
sempy_labs/_query_scale_out.py +447 -0
sempy_labs/_refresh_semantic_model.py +184 -0
sempy_labs/_tom.py +3766 -0
sempy_labs/_translations.py +378 -0
sempy_labs/_vertipaq.py +893 -0
sempy_labs/directlake/__init__.py +45 -0
sempy_labs/directlake/_directlake_schema_compare.py +110 -0
sempy_labs/directlake/_directlake_schema_sync.py +128 -0
sempy_labs/directlake/_fallback.py +62 -0
sempy_labs/directlake/_get_directlake_lakehouse.py +69 -0
sempy_labs/directlake/_get_shared_expression.py +59 -0
sempy_labs/directlake/_guardrails.py +84 -0
sempy_labs/directlake/_list_directlake_model_calc_tables.py +54 -0
sempy_labs/directlake/_show_unsupported_directlake_objects.py +89 -0
sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py +81 -0
sempy_labs/directlake/_update_directlake_partition_entity.py +64 -0
sempy_labs/directlake/_warm_cache.py +210 -0
sempy_labs/lakehouse/__init__.py +24 -0
sempy_labs/lakehouse/_get_lakehouse_columns.py +81 -0
sempy_labs/lakehouse/_get_lakehouse_tables.py +250 -0
sempy_labs/lakehouse/_lakehouse.py +85 -0
sempy_labs/lakehouse/_shortcuts.py +296 -0
sempy_labs/migration/__init__.py +29 -0
sempy_labs/migration/_create_pqt_file.py +239 -0
sempy_labs/migration/_migrate_calctables_to_lakehouse.py +429 -0
sempy_labs/migration/_migrate_calctables_to_semantic_model.py +150 -0
sempy_labs/migration/_migrate_model_objects_to_semantic_model.py +524 -0
sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py +165 -0
sempy_labs/migration/_migration_validation.py +227 -0
sempy_labs/migration/_refresh_calc_tables.py +129 -0
sempy_labs/report/__init__.py +35 -0
sempy_labs/report/_generate_report.py +253 -0
sempy_labs/report/_report_functions.py +855 -0
sempy_labs/report/_report_rebind.py +131 -0

sempy_labs/directlake/_show_unsupported_directlake_objects.py ADDED Viewed

@@ -0,0 +1,89 @@
+import sempy
+import sempy.fabric as fabric
+import pandas as pd
+from sempy_labs._list_functions import list_tables
+from sempy_labs._helper_functions import format_dax_object_name
+from typing import Optional, Tuple
+def show_unsupported_direct_lake_objects(
+    dataset: str, workspace: Optional[str] = None
+) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
+    """
+    Returns a list of a semantic model's objects which are not supported by Direct Lake based on `official documentation <https://learn.microsoft.com/power-bi/enterprise/directlake-overview#known-issues-and-limitations>`_.
+    Parameters
+    ----------
+    dataset : str
+        Name of the semantic model.
+    workspace : str, default=None
+        The Fabric workspace name.
+        Defaults to None which resolves to the workspace of the attached lakehouse
+        or if no lakehouse attached, resolves to the workspace of the notebook.
+    Returns
+    -------
+    pandas.DataFrame, pandas.DataFrame, pandas.DataFrame
+        3 pandas dataframes showing objects in a semantic model which are not supported by Direct Lake.
+    """
+    pd.options.mode.chained_assignment = None
+    if workspace == None:
+        workspace_id = fabric.get_workspace_id()
+        workspace = fabric.resolve_workspace_name(workspace_id)
+    dfT = list_tables(dataset, workspace)
+    dfC = fabric.list_columns(dataset=dataset, workspace=workspace)
+    dfR = fabric.list_relationships(dataset=dataset, workspace=workspace)
+    # Calc tables
+    dfT_filt = dfT[dfT["Type"] == "Calculated Table"]
+    dfT_filt.rename(columns={"Name": "Table Name"}, inplace=True)
+    t = dfT_filt[["Table Name", "Type"]]
+    # Calc columns
+    dfC_filt = dfC[(dfC["Type"] == "Calculated") | (dfC["Data Type"] == "Binary")]
+    c = dfC_filt[["Table Name", "Column Name", "Type", "Data Type", "Source"]]
+    # Relationships
+    dfC["Column Object"] = format_dax_object_name(dfC["Table Name"], dfC["Column Name"])
+    dfR["From Object"] = format_dax_object_name(dfR["From Table"], dfR["From Column"])
+    dfR["To Object"] = format_dax_object_name(dfR["To Table"], dfR["To Column"])
+    merged_from = pd.merge(
+        dfR, dfC, left_on="From Object", right_on="Column Object", how="left"
+    )
+    merged_to = pd.merge(
+        dfR, dfC, left_on="To Object", right_on="Column Object", how="left"
+    )
+    dfR["From Column Data Type"] = merged_from["Data Type"]
+    dfR["To Column Data Type"] = merged_to["Data Type"]
+    dfR_filt = dfR[
+        (
+            (dfR["From Column Data Type"] == "DateTime")
+            | (dfR["To Column Data Type"] == "DateTime")
+        )
+        | (dfR["From Column Data Type"] != dfR["To Column Data Type"])
+    ]
+    r = dfR_filt[
+        [
+            "From Table",
+            "From Column",
+            "To Table",
+            "To Column",
+            "From Column Data Type",
+            "To Column Data Type",
+        ]
+    ]
+    # print('Calculated Tables are not supported...')
+    # display(t)
+    # print("Learn more about Direct Lake limitations here: https://learn.microsoft.com/power-bi/enterprise/directlake-overview#known-issues-and-limitations")
+    # print('Calculated columns are not supported. Columns of binary data type are not supported.')
+    # display(c)
+    # print('Columns used for relationship cannot be of data type datetime and they also must be of the same data type.')
+    # display(r)
+    return t, c, r

sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py ADDED Viewed

@@ -0,0 +1,81 @@
+import sempy
+import sempy.fabric as fabric
+from sempy_labs.directlake._get_shared_expression import get_shared_expression
+from sempy_labs._helper_functions import (
+    resolve_lakehouse_name,
+    resolve_workspace_name_and_id,
+)
+from sempy_labs._tom import connect_semantic_model
+from typing import List, Optional, Union
+def update_direct_lake_model_lakehouse_connection(
+    dataset: str,
+    workspace: Optional[str] = None,
+    lakehouse: Optional[str] = None,
+    lakehouse_workspace: Optional[str] = None,
+):
+    """
+    Remaps a Direct Lake semantic model's SQL Endpoint connection to a new lakehouse.
+    Parameters
+    ----------
+    dataset : str
+        Name of the semantic model.
+    workspace : str, default=None
+        The Fabric workspace name in which the semantic model exists.
+        Defaults to None which resolves to the workspace of the attached lakehouse
+        or if no lakehouse attached, resolves to the workspace of the notebook.
+    lakehouse : str, default=None
+        The Fabric lakehouse used by the Direct Lake semantic model.
+        Defaults to None which resolves to the lakehouse attached to the notebook.
+    lakehouse_workspace : str, default=None
+        The Fabric workspace used by the lakehouse.
+        Defaults to None which resolves to the workspace of the attached lakehouse
+        or if no lakehouse attached, resolves to the workspace of the notebook.
+    Returns
+    -------
+    """
+    (workspace, workspace_id) = resolve_workspace_name_and_id(workspace)
+    if lakehouse_workspace == None:
+        lakehouse_workspace = workspace
+    if lakehouse == None:
+        lakehouse_id = fabric.get_lakehouse_id()
+        lakehouse = resolve_lakehouse_name(lakehouse_id, lakehouse_workspace)
+    # Check if lakehouse is valid
+    dfI = fabric.list_items(workspace=lakehouse_workspace, type="Lakehouse")
+    dfI_filt = dfI[(dfI["Display Name"] == lakehouse)]
+    if len(dfI_filt) == 0:
+        print(
+            f"The '{lakehouse}' lakehouse does not exist within the '{lakehouse_workspace}' workspace. Therefore it cannot be used to support the '{dataset}' semantic model within the '{workspace}' workspace."
+        )
+    dfP = fabric.list_partitions(dataset=dataset, workspace=workspace)
+    dfP_filt = dfP[dfP["Mode"] == "DirectLake"]
+    if len(dfP_filt) == 0:
+        print(
+            f"The '{dataset}' semantic model is not in Direct Lake. This function is only applicable to Direct Lake semantic models."
+        )
+    else:
+        with connect_semantic_model(
+            dataset=dataset, readonly=False, workspace=workspace
+        ) as tom:
+            shEx = get_shared_expression(lakehouse, lakehouse_workspace)
+            try:
+                tom.model.Expressions["DatabaseQuery"].Expression = shEx
+                print(
+                    f"The expression in the '{dataset}' semantic model has been updated to point to the '{lakehouse}' lakehouse in the '{lakehouse_workspace}' workspace."
+                )
+            except:
+                print(
+                    f"ERROR: The expression in the '{dataset}' semantic model was not updated."
+                )

sempy_labs/directlake/_update_directlake_partition_entity.py ADDED Viewed

@@ -0,0 +1,64 @@
+import sempy.fabric as fabric
+from sempy_labs._tom import connect_semantic_model
+from typing import List, Optional, Union
+def update_direct_lake_partition_entity(
+    dataset: str,
+    table_name: Union[str, List[str]],
+    entity_name: Union[str, List[str]],
+    workspace: Optional[str] = None,
+):
+    """
+    Remaps a table (or tables) in a Direct Lake semantic model to a table in a lakehouse.
+    Parameters
+    ----------
+    dataset : str
+        Name of the semantic model.
+    table_name : str, List[str]
+        Name of the table(s) in the semantic model.
+    entity_name : str, List[str]
+        Name of the lakehouse table to be mapped to the semantic model table.
+    workspace : str, default=None
+        The Fabric workspace name in which the semantic model exists.
+        Defaults to None which resolves to the workspace of the attached lakehouse
+        or if no lakehouse attached, resolves to the workspace of the notebook.
+    """
+    workspace = fabric.resolve_workspace_name(workspace)
+    # Support both str & list types
+    if isinstance(table_name, str):
+        table_name = [table_name]
+    if isinstance(entity_name, str):
+        entity_name = [entity_name]
+    if len(table_name) != len(entity_name):
+        print(
+            f"ERROR: The 'table_name' and 'entity_name' arrays must be of equal length."
+        )
+        return
+    with connect_semantic_model(
+        dataset=dataset, readonly=False, workspace=workspace
+    ) as tom:
+        if not tom.is_direct_lake():
+            print(
+                f"The '{dataset}' semantic model within the '{workspace}' workspace is not in Direct Lake mode."
+            )
+            return
+        for tName in table_name:
+            i = table_name.index(tName)
+            eName = entity_name[i]
+            try:
+                tom.model.Tables[tName].Partitions[0].EntityName = eName
+                print(
+                    f"The '{tName}' table in the '{dataset}' semantic model has been updated to point to the '{eName}' table in the '{lakehouse}' lakehouse within the '{lakehouse_workspace}' workspace."
+                )
+            except:
+                print(
+                    f"ERROR: The '{tName}' table in the '{dataset}' semantic model has not been updated."
+                )

sempy_labs/directlake/_warm_cache.py ADDED Viewed

@@ -0,0 +1,210 @@
+import sempy
+import sempy.fabric as fabric
+import pandas as pd
+from tqdm.auto import tqdm
+import numpy as np
+import time
+from sempy_labs._helper_functions import format_dax_object_name
+from sempy_labs._refresh_semantic_model import refresh_semantic_model
+from sempy_labs._model_dependencies import get_measure_dependencies
+from typing import Optional
+from sempy._utils._log import log
+import sempy_labs._icons as icons
+@log
+def warm_direct_lake_cache_perspective(
+    dataset: str,
+    perspective: str,
+    add_dependencies: Optional[bool] = False,
+    workspace: Optional[str] = None,
+):
+    """
+    Warms the cache of a Direct Lake semantic model by running a simple DAX query against the columns in a perspective.
+    Parameters
+    ----------
+    dataset : str
+        Name of the semantic model.
+    perspective : str
+        Name of the perspective which contains objects to be used for warming the cache.
+    add_dependencies : bool, default=False
+        Includes object dependencies in the cache warming process.
+    workspace : str, default=None
+        The Fabric workspace name.
+        Defaults to None which resolves to the workspace of the attached lakehouse
+        or if no lakehouse attached, resolves to the workspace of the notebook.
+    Returns
+    -------
+    pandas.DataFrame
+        Returns a pandas dataframe showing the columns that have been put into memory.
+    """
+    workspace = fabric.resolve_workspace_name(workspace)
+    dfP = fabric.list_partitions(dataset=dataset, workspace=workspace)
+    if not any(r["Mode"] == "DirectLake" for i, r in dfP.iterrows()):
+        print(
+            f"{icons.red_dot} The '{dataset}' semantic model in the '{workspace}' workspace is not in Direct Lake mode. This function is specifically for semantic models in Direct Lake mode."
+        )
+        return
+    dfPersp = fabric.list_perspectives(dataset=dataset, workspace=workspace)
+    dfPersp["DAX Object Name"] = format_dax_object_name(
+        dfPersp["Table Name"], dfPersp["Object Name"]
+    )
+    dfPersp_filt = dfPersp[dfPersp["Perspective Name"] == perspective]
+    if len(dfPersp_filt) == 0:
+        print(
+            f"{icons.red_dot} The '{perspective} perspective does not exist or contains no objects within the '{dataset}' semantic model in the '{workspace}' workspace."
+        )
+        return
+    dfPersp_c = dfPersp_filt[dfPersp_filt["Object Type"] == "Column"]
+    column_values = dfPersp_c["DAX Object Name"].tolist()
+    if add_dependencies:
+        # Measure dependencies
+        md = get_measure_dependencies(dataset, workspace)
+        md["Referenced Full Object"] = format_dax_object_name(
+            md["Referenced Table"], md["Referenced Object"]
+        )
+        dfPersp_m = dfPersp_filt[(dfPersp_filt["Object Type"] == "Measure")]
+        md_filt = md[
+            (md["Object Name"].isin(dfPersp_m["Object Name"].values))
+            & (md["Referenced Object Type"] == "Column")
+        ]
+        measureDep = md_filt["Referenced Full Object"].unique()
+        # Hierarchy dependencies
+        dfPersp_h = dfPersp_filt[(dfPersp_filt["Object Type"] == "Hierarchy")]
+        dfH = fabric.list_hierarchies(dataset=dataset, workspace=workspace)
+        dfH["Hierarchy Object"] = format_dax_object_name(
+            dfH["Table Name"], dfH["Hierarchy Name"]
+        )
+        dfH["Column Object"] = format_dax_object_name(
+            dfH["Table Name"], dfH["Column Name"]
+        )
+        dfH_filt = dfH[
+            dfH["Hierarchy Object"].isin(dfPersp_h["DAX Object Name"].values)
+        ]
+        hierarchyDep = dfH_filt["Column Object"].unique()
+        # Relationship dependencies
+        unique_table_names = dfPersp_filt["Table Name"].unique()
+        dfR = fabric.list_relationships(dataset=dataset, workspace=workspace)
+        dfR["From Object"] = format_dax_object_name(
+            dfR["From Table"], dfR["From Column"]
+        )
+        dfR["To Object"] = format_dax_object_name(dfR["To Table"], dfR["To Column"])
+        filtered_dfR = dfR[
+            dfR["From Table"].isin(unique_table_names)
+            & dfR["To Table"].isin(unique_table_names)
+        ]
+        fromObjects = filtered_dfR["From Object"].unique()
+        toObjects = filtered_dfR["To Object"].unique()
+        merged_list = np.concatenate(
+            [column_values, measureDep, hierarchyDep, fromObjects, toObjects]
+        )
+        merged_list_unique = list(set(merged_list))
+    else:
+        merged_list_unique = column_values
+    df = pd.DataFrame(merged_list_unique, columns=["DAX Object Name"])
+    df[["Table Name", "Column Name"]] = df["DAX Object Name"].str.split(
+        "[", expand=True
+    )
+    df["Table Name"] = df["Table Name"].str[1:-1]
+    df["Column Name"] = df["Column Name"].str[0:-1]
+    tbls = list(set(value.split("[")[0] for value in merged_list_unique))
+    for tableName in (bar := tqdm(tbls)):
+        filtered_list = [
+            value for value in merged_list_unique if value.startswith(f"{tableName}[")
+        ]
+        bar.set_description(f"Warming the '{tableName}' table...")
+        css = ",".join(map(str, filtered_list))
+        dax = """EVALUATE TOPN(1,SUMMARIZECOLUMNS(""" + css + "))" ""
+        x = fabric.evaluate_dax(dataset=dataset, dax_string=dax, workspace=workspace)
+    print(f"{icons.green_dot} The following columns have been put into memory:")
+    new_column_order = ["Table Name", "Column Name", "DAX Object Name"]
+    df = df.reindex(columns=new_column_order)
+    df = df[["Table Name", "Column Name"]].sort_values(
+        by=["Table Name", "Column Name"], ascending=True
+    )
+    return df
+@log
+def warm_direct_lake_cache_isresident(
+    dataset: str, workspace: Optional[str] = None
+) -> pd.DataFrame:
+    """
+    Performs a refresh on the semantic model and puts the columns which were in memory prior to the refresh back into memory.
+    Parameters
+    ----------
+    dataset : str
+        Name of the semantic model.
+    workspace : str, default=None
+        The Fabric workspace name.
+        Defaults to None which resolves to the workspace of the attached lakehouse
+        or if no lakehouse attached, resolves to the workspace of the notebook.
+    Returns
+    -------
+    pandas.DataFrame
+        Returns a pandas dataframe showing the columns that have been put into memory.
+    """
+    dfP = fabric.list_partitions(dataset=dataset, workspace=workspace)
+    if not any(r["Mode"] == "DirectLake" for i, r in dfP.iterrows()):
+        print(
+            f"The '{dataset}' semantic model in the '{workspace}' workspace is not in Direct Lake mode. This function is specifically for semantic models in Direct Lake mode."
+        )
+        return
+    # Identify columns which are currently in memory (Is Resident = True)
+    dfC = fabric.list_columns(dataset=dataset, workspace=workspace, extended=True)
+    dfC["DAX Object Name"] = format_dax_object_name(
+        dfC["Table Name"], dfC["Column Name"]
+    )
+    dfC_filtered = dfC[dfC["Is Resident"]]
+    if len(dfC_filtered) == 0:
+        print(
+            f"{icons.yellow_dot} At present, no columns are in memory in the '{dataset}' semantic model in the '{workspace}' workspace."
+        )
+        return
+    # Refresh/frame dataset
+    refresh_semantic_model(dataset=dataset, refresh_type="full", workspace=workspace)
+    time.sleep(2)
+    tbls = dfC_filtered["Table Name"].unique()
+    column_values = dfC_filtered["DAX Object Name"].tolist()
+    # Run basic query to get columns into memory; completed one table at a time (so as not to overload the capacity)
+    for tableName in (bar := tqdm(tbls)):
+        bar.set_description(f"Warming the '{tableName}' table...")
+        css = ",".join(map(str, column_values))
+        dax = """EVALUATE TOPN(1,SUMMARIZECOLUMNS(""" + css + "))" ""
+        x = fabric.evaluate_dax(dataset=dataset, dax_string=dax, workspace=workspace)
+    print(
+        f"{icons.green_dot} The following columns have been put into memory. Temperature indicates the column temperature prior to the semantic model refresh."
+    )
+    return dfC_filtered[
+        ["Table Name", "Column Name", "Is Resident", "Temperature"]
+    ].sort_values(by=["Table Name", "Column Name"], ascending=True)

sempy_labs/lakehouse/__init__.py ADDED Viewed

@@ -0,0 +1,24 @@
+from sempy_labs.lakehouse._get_lakehouse_columns import get_lakehouse_columns
+from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables
+from sempy_labs.lakehouse._lakehouse import (
+    lakehouse_attached,
+    optimize_lakehouse_tables,
+)
+from sempy_labs.lakehouse._shortcuts import (
+    list_shortcuts,
+    # create_shortcut,
+    create_shortcut_onelake,
+    delete_shortcut,
+)
+__all__ = [
+    "get_lakehouse_columns",
+    "get_lakehouse_tables",
+    "lakehouse_attached",
+    "optimize_lakehouse_tables",
+    "list_shortcuts",
+    # create_shortcut,
+    "create_shortcut_onelake",
+    "delete_shortcut",
+]

sempy_labs/lakehouse/_get_lakehouse_columns.py ADDED Viewed

@@ -0,0 +1,81 @@
+import sempy
+import sempy.fabric as fabric
+import pandas as pd
+from pyspark.sql import SparkSession
+from sempy_labs._helper_functions import (
+    resolve_lakehouse_name,
+    format_dax_object_name,
+    resolve_lakehouse_id,
+)
+from typing import Optional
+def get_lakehouse_columns(
+    lakehouse: Optional[str] = None, workspace: Optional[str] = None
+):
+    """
+    Shows the tables and columns of a lakehouse and their respective properties.
+    Parameters
+    ----------
+    lakehouse : str, default=None
+        The Fabric lakehouse.
+        Defaults to None which resolves to the lakehouse attached to the notebook.
+    lakehouse_workspace : str, default=None
+        The Fabric workspace used by the lakehouse.
+        Defaults to None which resolves to the workspace of the attached lakehouse
+        or if no lakehouse attached, resolves to the workspace of the notebook.
+    Returns
+    -------
+    pandas.DataFrame
+        Shows the tables/columns within a lakehouse and their properties.
+    """
+    from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables
+    from delta import DeltaTable
+    df = pd.DataFrame(
+        columns=[
+            "Workspace Name",
+            "Lakehouse Name",
+            "Table Name",
+            "Column Name",
+            "Full Column Name",
+            "Data Type",
+        ]
+    )
+    workspace = fabric.resolve_workspace_name(workspace)
+    if lakehouse == None:
+        lakehouse_id = fabric.get_lakehouse_id()
+        lakehouse = resolve_lakehouse_name(lakehouse_id, workspace)
+    else:
+        lakehouse_id = resolve_lakehouse_id(lakehouse, workspace)
+    spark = SparkSession.builder.getOrCreate()
+    tables = get_lakehouse_tables(
+        lakehouse=lakehouse, workspace=workspace, extended=False, count_rows=False
+    )
+    tables_filt = tables[tables["Format"] == "delta"]
+    for i, r in tables_filt.iterrows():
+        tName = r["Table Name"]
+        tPath = r["Location"]
+        delta_table = DeltaTable.forPath(spark, tPath)
+        sparkdf = delta_table.toDF()
+        for cName, data_type in sparkdf.dtypes:
+            tc = format_dax_object_name(tName, cName)
+            new_data = {
+                "Workspace Name": workspace,
+                "Lakehouse Name": lakehouse,
+                "Table Name": tName,
+                "Column Name": cName,
+                "Full Column Name": tc,
+                "Data Type": data_type,
+            }
+            df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)
+    return df