PyPI - semantic-link-labs - Versions diffs - 0.4.1__py3-none-any.whl - Mend

semantic-link-labs 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of semantic-link-labs might be problematic. Click here for more details.

Files changed (52) hide show

semantic_link_labs-0.4.1.dist-info/LICENSE +21 -0
semantic_link_labs-0.4.1.dist-info/METADATA +22 -0
semantic_link_labs-0.4.1.dist-info/RECORD +52 -0
semantic_link_labs-0.4.1.dist-info/WHEEL +5 -0
semantic_link_labs-0.4.1.dist-info/top_level.txt +1 -0
sempy_labs/__init__.py +154 -0
sempy_labs/_ai.py +496 -0
sempy_labs/_clear_cache.py +39 -0
sempy_labs/_connections.py +234 -0
sempy_labs/_dax.py +70 -0
sempy_labs/_generate_semantic_model.py +280 -0
sempy_labs/_helper_functions.py +506 -0
sempy_labs/_icons.py +4 -0
sempy_labs/_list_functions.py +1372 -0
sempy_labs/_model_auto_build.py +143 -0
sempy_labs/_model_bpa.py +1354 -0
sempy_labs/_model_dependencies.py +341 -0
sempy_labs/_one_lake_integration.py +155 -0
sempy_labs/_query_scale_out.py +447 -0
sempy_labs/_refresh_semantic_model.py +184 -0
sempy_labs/_tom.py +3766 -0
sempy_labs/_translations.py +378 -0
sempy_labs/_vertipaq.py +893 -0
sempy_labs/directlake/__init__.py +45 -0
sempy_labs/directlake/_directlake_schema_compare.py +110 -0
sempy_labs/directlake/_directlake_schema_sync.py +128 -0
sempy_labs/directlake/_fallback.py +62 -0
sempy_labs/directlake/_get_directlake_lakehouse.py +69 -0
sempy_labs/directlake/_get_shared_expression.py +59 -0
sempy_labs/directlake/_guardrails.py +84 -0
sempy_labs/directlake/_list_directlake_model_calc_tables.py +54 -0
sempy_labs/directlake/_show_unsupported_directlake_objects.py +89 -0
sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py +81 -0
sempy_labs/directlake/_update_directlake_partition_entity.py +64 -0
sempy_labs/directlake/_warm_cache.py +210 -0
sempy_labs/lakehouse/__init__.py +24 -0
sempy_labs/lakehouse/_get_lakehouse_columns.py +81 -0
sempy_labs/lakehouse/_get_lakehouse_tables.py +250 -0
sempy_labs/lakehouse/_lakehouse.py +85 -0
sempy_labs/lakehouse/_shortcuts.py +296 -0
sempy_labs/migration/__init__.py +29 -0
sempy_labs/migration/_create_pqt_file.py +239 -0
sempy_labs/migration/_migrate_calctables_to_lakehouse.py +429 -0
sempy_labs/migration/_migrate_calctables_to_semantic_model.py +150 -0
sempy_labs/migration/_migrate_model_objects_to_semantic_model.py +524 -0
sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py +165 -0
sempy_labs/migration/_migration_validation.py +227 -0
sempy_labs/migration/_refresh_calc_tables.py +129 -0
sempy_labs/report/__init__.py +35 -0
sempy_labs/report/_generate_report.py +253 -0
sempy_labs/report/_report_functions.py +855 -0
sempy_labs/report/_report_rebind.py +131 -0

sempy_labs/_ai.py ADDED Viewed

@@ -0,0 +1,496 @@
+import sempy
+import sempy.fabric as fabric
+import pandas as pd
+from synapse.ml.services.openai import OpenAICompletion
+from pyspark.sql.functions import col
+from pyspark.sql import SparkSession
+from typing import List, Optional, Union
+from IPython.display import display
+def optimize_semantic_model(dataset: str, workspace: Optional[str] = None):
+    from ._model_bpa import run_model_bpa
+    from .directlake._fallback import check_fallback_reason
+    from ._helper_functions import format_dax_object_name
+    modelBPA = run_model_bpa(
+        dataset=dataset, workspace=workspace, return_dataframe=True
+    )
+    dfC = fabric.list_columns(dataset=dataset, workspace=workspace, extended=True)
+    dfC["Column Object"] = format_dax_object_name(dfC["Table Name"], dfC["Column Name"])
+    dfC["Total Size"] = dfC["Total Size"].astype("int")
+    dfP = fabric.list_partitions(dataset=dataset, workspace=workspace)
+    modelBPA_col = modelBPA[modelBPA["Object Type"] == "Column"]
+    modelBPA_col = pd.merge(
+        modelBPA_col,
+        dfC[["Column Object", "Total Size"]],
+        left_on="Object Name",
+        right_on="Column Object",
+        how="left",
+    )
+    isDirectLake = any(r["Mode"] == "DirectLake" for i, r in dfP.iterrows())
+    if isDirectLake:
+        fallback = check_fallback_reason(dataset=dataset, workspace=workspace)
+        fallback_filt = fallback[fallback["FallbackReasonID"] == 2]
+        if len(fallback_filt) > 0:
+            print(
+                f"The '{dataset}' semantic model is a Direct Lake semantic model which contains views. Since views always fall back to DirectQuery, it is recommended to only use lakehouse tables and not views."
+            )
+    # Potential model reduction estimate
+    ruleNames = [
+        "Remove unnecessary columns",
+        "Set IsAvailableInMdx to false on non-attribute columns",
+    ]
+    for rule in ruleNames:
+        df = modelBPA_col[modelBPA_col["Rule Name"] == rule]
+        df_filt = df[["Object Name", "Total Size"]].sort_values(
+            by="Total Size", ascending=False
+        )
+        totSize = df["Total Size"].sum()
+        if len(df_filt) > 0:
+            print(
+                f"Potential savings of {totSize} bytes from following the '{rule}' rule."
+            )
+            display(df_filt)
+        else:
+            print(f"The '{rule}' rule has been followed.")
+def generate_measure_descriptions(
+    dataset: str,
+    measures: Union[str, List[str]],
+    gpt_model: Optional[str] = "gpt-35-turbo",
+    workspace: Optional[str] = None,
+):
+    service_name = "synapseml-openai"
+    if isinstance(measures, str):
+        measures = [measures]
+    validModels = ["gpt-35-turbo", "gpt-35-turbo-16k", "gpt-4"]
+    if gpt_model not in validModels:
+        print(
+            f"The '{gpt_model}' model is not a valid model. Enter a gpt_model from this list: {validModels}."
+        )
+        return
+    dfM = fabric.list_measures(dataset=dataset, workspace=workspace)
+    if measures is not None:
+        dfM_filt = dfM[dfM["Measure Name"].isin(measures)]
+    else:
+        dfM_filt = dfM
+    df = dfM_filt[["Table Name", "Measure Name", "Measure Expression"]]
+    df["prompt"] = (
+        f"The following is DAX code used by Microsoft Power BI. Please explain this code in simple terms:"
+        + df["Measure Expression"]
+    )
+    # Generate new column in df dataframe which has the AI-generated descriptions
+    completion = {
+        OpenAICompletion()
+        .setDeploymentName(gpt_model)
+        .setMaxTokens(200)
+        .setCustomServiceName(service_name)
+        .setPromptCol("prompt")
+        .setErrorCol("error")
+        .setOutputCol("completions")
+    }
+    completed_df = completion.transform(df).cache()
+    completed_df.select(
+        col("prompt"),
+        col("error"),
+        col("completions.choices.text").getItem(0).alias("text"),
+    )
+    # Update the model to use the new descriptions
+    tom_server = fabric.create_tom_server(readonly=False, workspace=workspace)
+    m = tom_server.Databases.GetByName(dataset).Model
+    # for t in m.Tables:
+    # tName = t.Name
+    # for ms in t.Measures:
+    # mName = ms.Name
+    # mDesc = promptValue
+    # m.SaveChanges()
+def generate_aggs(
+    dataset: str,
+    table_name: str,
+    columns: Union[str, List[str]],
+    workspace: Optional[str] = None,
+    lakehouse_workspace: Optional[str] = None,
+):
+    from ._helper_functions import (
+        get_direct_lake_sql_endpoint,
+        create_abfss_path,
+        format_dax_object_name,
+        resolve_lakehouse_id,
+    )
+    sempy.fabric._client._utils._init_analysis_services()
+    import Microsoft.AnalysisServices.Tabular as TOM
+    import System
+    # columns = {
+    #'SalesAmount': 'Sum',
+    #'ProductKey': 'GroupBy',
+    #'OrderDateKey': 'GroupBy'
+    # }
+    if workspace == None:
+        workspace_id = fabric.get_workspace_id()
+        workspace = fabric.resolve_workspace_name(workspace_id)
+    if lakehouse_workspace == None:
+        lakehouse_workspace = workspace
+        lakehouse_workspace_id = workspace_id
+    else:
+        lakehouse_workspace_id = fabric.resolve_workspace_id(lakehouse_workspace)
+    if isinstance(columns, str):
+        columns = [columns]
+    columnValues = columns.keys()
+    aggTypes = ["Sum", "Count", "Min", "Max", "GroupBy"]
+    aggTypesAggregate = ["Sum", "Count", "Min", "Max"]
+    numericTypes = ["Int64", "Double", "Decimal"]
+    if any(value not in aggTypes for value in columns.values()):
+        print(
+            f"Invalid aggregation type(s) have been specified in the 'columns' parameter. Valid aggregation types: {aggTypes}."
+        )
+        return
+    dfC = fabric.list_columns(dataset=dataset, workspace=workspace)
+    dfP = fabric.list_partitions(dataset=dataset, workspace=workspace)
+    dfM = fabric.list_measures(dataset=dataset, workspace=workspace)
+    dfR = fabric.list_relationships(dataset=dataset, workspace=workspace)
+    if not any(r["Mode"] == "DirectLake" for i, r in dfP.iterrows()):
+        print(
+            f"The '{dataset}' semantic model within the '{workspace}' workspace is not in Direct Lake mode. This function is only relevant for Direct Lake semantic models."
+        )
+        return
+    dfC_filtT = dfC[dfC["Table Name"] == table_name]
+    if len(dfC_filtT) == 0:
+        print(
+            f"The '{table_name}' table does not exist in the '{dataset}' semantic model within the '{workspace}' workspace."
+        )
+        return
+    dfC_filt = dfC[
+        (dfC["Table Name"] == table_name) & (dfC["Column Name"].isin(columnValues))
+    ]
+    if len(columns) != len(dfC_filt):
+        print(
+            f"Columns listed in '{columnValues}' do not exist in the '{table_name}' table in the '{dataset}' semantic model within the '{workspace}' workspace."
+        )
+        return
+    # Check if doing sum/count/min/max etc. on a non-number column
+    for col, agg in columns.items():
+        dfC_col = dfC_filt[dfC_filt["Column Name"] == col]
+        dataType = dfC_col["Data Type"].iloc[0]
+        if agg in aggTypesAggregate and dataType not in numericTypes:
+            print(
+                f"The '{col}' column in the '{table_name}' table is of '{dataType}' data type. Only columns of '{numericTypes}' data types can be aggregated as '{aggTypesAggregate}' aggregation types."
+            )
+            return
+    # Create/update lakehouse delta agg table
+    aggSuffix = "_agg"
+    aggTableName = f"{table_name}{aggSuffix}"
+    aggLakeTName = aggTableName.lower().replace(" ", "_")
+    dfP = fabric.list_partitions(dataset=dataset, workspace=workspace)
+    dfP_filt = dfP[dfP["Table Name"] == table_name]
+    lakeTName = dfP_filt["Query"].iloc[0]
+    sqlEndpointId = get_direct_lake_sql_endpoint(dataset=dataset, workspace=workspace)
+    dfI = fabric.list_items(workspace=lakehouse_workspace, type="SQLEndpoint")
+    dfI_filt = dfI[(dfI["Id"] == sqlEndpointId)]
+    if len(dfI_filt) == 0:
+        print(
+            f"The lakehouse (SQL Endpoint) used by the '{dataset}' semantic model does not reside in the '{lakehouse_workspace}' workspace. Please update the lakehouse_workspace parameter."
+        )
+        return
+    lakehouseName = dfI_filt["Display Name"].iloc[0]
+    lakehouse_id = resolve_lakehouse_id(
+        lakehouse=lakehouseName, workspace=lakehouse_workspace
+    )
+    # Generate SQL query
+    query = "SELECT"
+    groupBy = "\nGROUP BY"
+    for col, agg in columns.items():
+        colFilt = dfC_filt[dfC_filt["Column Name"] == col]
+        sourceCol = colFilt["Source"].iloc[0]
+        if agg == "GroupBy":
+            query = f"{query}\n{sourceCol},"
+            groupBy = f"{groupBy}\n{sourceCol},"
+        else:
+            query = f"{query}\n{agg}({sourceCol}) AS {sourceCol},"
+    query = query[:-1]
+    spark = SparkSession.builder.getOrCreate()
+    fromTablePath = create_abfss_path(
+        lakehouse_id=lakehouse_id,
+        lakehouse_workspace_id=lakehouse_workspace_id,
+        delta_table_name=lakeTName,
+    )
+    df = spark.read.format("delta").load(fromTablePath)
+    tempTableName = "delta_table_" + lakeTName
+    df.createOrReplaceTempView(tempTableName)
+    sqlQuery = f"{query} \n FROM {tempTableName} {groupBy}"
+    sqlQuery = sqlQuery[:-1]
+    print(sqlQuery)
+    # Save query to spark dataframe
+    spark_df = spark.sql(sqlQuery)
+    f"\nCreating/updating the '{aggLakeTName}' table in the lakehouse..."
+    # Write spark dataframe to delta table
+    aggFilePath = create_abfss_path(
+        lakehouse_id=lakehouse_id,
+        lakehouse_workspace_id=lakehouse_workspace_id,
+        delta_table_name=aggLakeTName,
+    )
+    spark_df.write.mode("overwrite").format("delta").save(aggFilePath)
+    f"The '{aggLakeTName}' table has been created/updated in the lakehouse."
+    # Create/update semantic model agg table
+    tom_server = fabric.create_tom_server(readonly=False, workspace=workspace)
+    m = tom_server.Databases.GetByName(dataset).Model
+    f"\nUpdating the '{dataset}' semantic model..."
+    dfC_agg = dfC[dfC["Table Name"] == aggTableName]
+    if len(dfC_agg) == 0:
+        print(f"Creating the '{aggTableName}' table...")
+        exp = m.Expressions["DatabaseQuery"]
+        tbl = TOM.Table()
+        tbl.Name = aggTableName
+        tbl.IsHidden = True
+        ep = TOM.EntityPartitionSource()
+        ep.Name = aggTableName
+        ep.EntityName = aggLakeTName
+        ep.ExpressionSource = exp
+        part = TOM.Partition()
+        part.Name = aggTableName
+        part.Source = ep
+        part.Mode = TOM.ModeType.DirectLake
+        tbl.Partitions.Add(part)
+        for i, r in dfC_filt.iterrows():
+            scName = r["Source"]
+            cName = r["Column Name"]
+            dType = r["Data Type"]
+            col = TOM.DataColumn()
+            col.Name = cName
+            col.IsHidden = True
+            col.SourceColumn = scName
+            col.DataType = System.Enum.Parse(TOM.DataType, dType)
+            tbl.Columns.Add(col)
+            print(
+                f"The '{aggTableName}'[{cName}] column has been added to the '{dataset}' semantic model."
+            )
+        m.Tables.Add(tbl)
+        print(
+            f"The '{aggTableName}' table has been added to the '{dataset}' semantic model."
+        )
+    else:
+        print(f"Updating the '{aggTableName}' table's columns...")
+        # Remove existing columns
+        for t in m.Tables:
+            tName = t.Name
+            for c in t.Columns:
+                cName = c.Name
+                if t.Name == aggTableName:
+                    m.Tables[tName].Columns.Remove(cName)
+        # Add columns
+        for i, r in dfC_filt.iterrows():
+            scName = r["Source"]
+            cName = r["Column Name"]
+            dType = r["Data Type"]
+            col = TOM.DataColumn()
+            col.Name = cName
+            col.IsHidden = True
+            col.SourceColumn = scName
+            col.DataType = System.Enum.Parse(TOM.DataType, dType)
+            m.Tables[aggTableName].Columns.Add(col)
+            print(f"The '{aggTableName}'[{cName}] column has been added.")
+    # Create relationships
+    relMap = {"m": "Many", "1": "One", "0": "None"}
+    print(f"\nGenerating necessary relationships...")
+    for i, r in dfR.iterrows():
+        fromTable = r["From Table"]
+        fromColumn = r["From Column"]
+        toTable = r["To Table"]
+        toColumn = r["To Column"]
+        cfb = r["Cross Filtering Behavior"]
+        sfb = r["Security Filtering Behavior"]
+        mult = r["Multiplicity"]
+        crossFB = System.Enum.Parse(TOM.CrossFilteringBehavior, cfb)
+        secFB = System.Enum.Parse(TOM.SecurityFilteringBehavior, sfb)
+        fromCardinality = System.Enum.Parse(
+            TOM.RelationshipEndCardinality, relMap.get(mult[0])
+        )
+        toCardinality = System.Enum.Parse(
+            TOM.RelationshipEndCardinality, relMap.get(mult[-1])
+        )
+        rel = TOM.SingleColumnRelationship()
+        rel.FromCardinality = fromCardinality
+        rel.ToCardinality = toCardinality
+        rel.IsActive = r["Active"]
+        rel.CrossFilteringBehavior = crossFB
+        rel.SecurityFilteringBehavior = secFB
+        rel.RelyOnReferentialIntegrity = r["Rely On Referential Integrity"]
+        if fromTable == table_name:
+            try:
+                rel.FromColumn = m.Tables[aggTableName].Columns[fromColumn]
+                m.Relationships.Add(rel)
+                print(
+                    f"'{aggTableName}'[{fromColumn}] -> '{toTable}'[{toColumn}] relationship has been added."
+                )
+            except:
+                print(
+                    f"'{aggTableName}'[{fromColumn}] -> '{toTable}'[{toColumn}] relationship has not been created."
+                )
+        elif toTable == table_name:
+            try:
+                rel.ToColumn = m.Tables[aggTableName].Columns[toColumn]
+                m.Relationships.Add(rel)
+                print(
+                    f"'{fromTable}'[{fromColumn}] -> '{aggTableName}'[{toColumn}] relationship has been added."
+                )
+            except:
+                print(
+                    f"'{fromTable}'[{fromColumn}] -> '{aggTableName}'[{toColumn}] relationship has not been created."
+                )
+    f"Relationship creation is complete."
+    # Create IF measure
+    f"\nCreating measure to check if the agg table can be used..."
+    aggChecker = "IF("
+    dfR_filt = dfR[
+        (dfR["From Table"] == table_name) & (~dfR["From Column"].isin(columnValues))
+    ]
+    for i, r in dfR_filt.iterrows():
+        toTable = r["To Table"]
+        aggChecker = f"{aggChecker}\nISCROSSFILTERED('{toTable}') ||"
+    aggChecker = aggChecker[:-3]
+    aggChecker = f"{aggChecker},1,0)"
+    print(aggChecker)
+    # Todo: add IFISFILTERED clause for columns
+    f"\n Creating the base measures in the agg table..."
+    # Create base agg measures
+    dep = fabric.evaluate_dax(
+        dataset=dataset,
+        workspace=workspace,
+        dax_string="""
+        SELECT
+         [TABLE] AS [Table Name]
+        ,[OBJECT] AS [Object Name]
+        ,[OBJECT_TYPE] AS [Object Type]
+        ,[REFERENCED_TABLE] AS [Referenced Table]
+        ,[REFERENCED_OBJECT] AS [Referenced Object]
+        ,[REFERENCED_OBJECT_TYPE] AS [Referenced Object Type]
+        FROM $SYSTEM.DISCOVER_CALC_DEPENDENCY
+        WHERE [OBJECT_TYPE] = 'MEASURE'
+        """,
+    )
+    baseMeasures = dep[
+        (dep["Referenced Object Type"] == "COLUMN")
+        & (dep["Referenced Table"] == table_name)
+        & (dep["Referenced Object"].isin(columnValues))
+    ]
+    for i, r in baseMeasures.iterrows():
+        tName = r["Table Name"]
+        mName = r["Object Name"]
+        cName = r["Referenced Object"]
+        dfM_filt = dfM[dfM["Measure Name"] == mName]
+        expr = dfM_filt["Measure Expression"].iloc[0]
+        colFQNonAgg = format_dax_object_name(tName, cName)
+        colFQAgg = format_dax_object_name(aggTableName, cName)
+        colNQNonAgg = f"{tName}[{cName}]"
+        if " " in tName:
+            newExpr = expr.replace(colFQNonAgg, colFQAgg)
+        else:
+            newExpr = expr.replace(colFQNonAgg, colFQAgg).replace(colNQNonAgg, colFQAgg)
+        print(expr)
+        print(newExpr)
+        aggMName = mName + aggSuffix
+        measure = TOM.Measure()
+        measure.Name = aggMName
+        measure.IsHidden = True
+        measure.Expression = newExpr
+        m.Tables[aggTableName].Measures.Add(measure)
+        f"The '{aggMName}' measure has been created in the '{aggTableName}' table."
+    # Update base detail measures
+    # m.SaveChanges()
+# Identify views used within Direct Lake model
+# workspace = 'MK Demo 6'
+# lakehouse = 'MyLakehouse'
+# dataset = 'MigrationTest'
+# lakehouse_workspace = workspace
+# dfView = pd.DataFrame(columns=['Workspace Name', 'Lakehouse Name', 'View Name'])
+# dfP = fabric.list_partitions(dataset = dataset, workspace = workspace)
+# isDirectLake = any(r['Mode'] == 'DirectLake' for i, r in dfP.iterrows())
+# spark = SparkSession.builder.getOrCreate()
+# views = spark.sql(f"SHOW VIEWS IN {lakehouse}").collect()
+# for view in views:
+#    viewName = view['viewName']
+#    isTemporary = view['isTemporary']
+#    new_data = {'Workspace Name': workspace, 'Lakehouse Name': lakehouse, 'View Name': viewName}
+#    dfView = pd.concat([dfView, pd.DataFrame(new_data, index=[0])], ignore_index=True)
+# dfView
+# lakeT = get_lakehouse_tables(lakehouse, lakehouse_workspace)
+# if not dfP['Query'].isin(lakeT['Table Name'].values):
+#    if

sempy_labs/_clear_cache.py ADDED Viewed

@@ -0,0 +1,39 @@
+import sempy
+import sempy.fabric as fabric
+from ._helper_functions import resolve_dataset_id
+from typing import List, Optional, Union
+import sempy_labs._icons as icons
+def clear_cache(dataset: str, workspace: Optional[str] = None):
+    """
+    Clears the cache of a semantic model.
+    Parameters
+    ----------
+    dataset : str
+        Name of the semantic model.
+    workspace : str, default=None
+        The Fabric workspace name.
+        Defaults to None which resolves to the workspace of the attached lakehouse
+        or if no lakehouse attached, resolves to the workspace of the notebook.
+    """
+    if workspace == None:
+        workspace_id = fabric.get_workspace_id()
+        workspace = fabric.resolve_workspace_name(workspace_id)
+    datasetID = resolve_dataset_id(dataset=dataset, workspace=workspace)
+    xmla = f"""
+            <ClearCache xmlns="http://schemas.microsoft.com/analysisservices/2003/engine">
+                <Object>
+                    <DatabaseID>{datasetID}</DatabaseID>
+                </Object>
+            </ClearCache>
+            """
+    fabric.execute_xmla(dataset=dataset, xmla_command=xmla, workspace=workspace)
+    outputtext = f"{icons.green_dot} Cache cleared for the '{dataset}' semantic model within the '{workspace}' workspace."
+    return outputtext