PyPI - semantic-link-labs - Versions diffs - 0.8.4__py3-none-any.whl → 0.8.6__py3-none-any.whl - Mend

semantic-link-labs 0.8.4py3-none-any.whl → 0.8.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of semantic-link-labs might be problematic. Click here for more details.

Files changed (49) hide show

{semantic_link_labs-0.8.4.dist-info → semantic_link_labs-0.8.6.dist-info}/METADATA +9 -3
{semantic_link_labs-0.8.4.dist-info → semantic_link_labs-0.8.6.dist-info}/RECORD +49 -47
{semantic_link_labs-0.8.4.dist-info → semantic_link_labs-0.8.6.dist-info}/WHEEL +1 -1
sempy_labs/__init__.py +29 -1
sempy_labs/_data_pipelines.py +3 -3
sempy_labs/_dataflows.py +116 -3
sempy_labs/_dax.py +189 -3
sempy_labs/_deployment_pipelines.py +3 -3
sempy_labs/_environments.py +3 -3
sempy_labs/_eventhouses.py +3 -3
sempy_labs/_eventstreams.py +3 -3
sempy_labs/_external_data_shares.py +1 -1
sempy_labs/_generate_semantic_model.py +3 -3
sempy_labs/_git.py +7 -7
sempy_labs/_helper_functions.py +25 -1
sempy_labs/_kql_databases.py +3 -3
sempy_labs/_kql_querysets.py +3 -3
sempy_labs/_mirrored_databases.py +428 -0
sempy_labs/_mirrored_warehouses.py +1 -1
sempy_labs/_ml_experiments.py +3 -3
sempy_labs/_ml_models.py +4 -4
sempy_labs/_model_bpa.py +209 -180
sempy_labs/_model_bpa_bulk.py +48 -24
sempy_labs/_model_dependencies.py +42 -86
sempy_labs/_notebooks.py +2 -2
sempy_labs/_query_scale_out.py +4 -4
sempy_labs/_refresh_semantic_model.py +2 -2
sempy_labs/_spark.py +6 -6
sempy_labs/_vertipaq.py +31 -19
sempy_labs/_warehouses.py +3 -3
sempy_labs/_workspace_identity.py +2 -2
sempy_labs/_workspaces.py +7 -7
sempy_labs/admin/__init__.py +2 -0
sempy_labs/admin/_basic_functions.py +54 -8
sempy_labs/admin/_domains.py +1 -1
sempy_labs/directlake/_update_directlake_partition_entity.py +1 -1
sempy_labs/directlake/_warm_cache.py +10 -9
sempy_labs/lakehouse/_get_lakehouse_tables.py +1 -1
sempy_labs/lakehouse/_shortcuts.py +2 -2
sempy_labs/migration/_create_pqt_file.py +9 -4
sempy_labs/report/__init__.py +2 -0
sempy_labs/report/_download_report.py +75 -0
sempy_labs/report/_generate_report.py +3 -3
sempy_labs/report/_report_functions.py +3 -3
sempy_labs/report/_report_rebind.py +1 -1
sempy_labs/report/_reportwrapper.py +4 -2
sempy_labs/tom/_model.py +71 -35
{semantic_link_labs-0.8.4.dist-info → semantic_link_labs-0.8.6.dist-info}/LICENSE +0 -0
{semantic_link_labs-0.8.4.dist-info → semantic_link_labs-0.8.6.dist-info}/top_level.txt +0 -0

sempy_labs/_model_bpa.py CHANGED Viewed

@@ -123,210 +123,239 @@ def run_model_bpa(
         dataset=dataset, workspace=workspace, readonly=True
     ) as tom:
-        dep = get_model_calc_dependencies(dataset=dataset, workspace=workspace)
-        def translate_using_po(rule_file):
-            current_dir = os.path.dirname(os.path.abspath(__file__))
-            translation_file = (
-                f"{current_dir}/_bpa_translation/_model/_translations_{language}.po"
+        # Do not run BPA for models with no tables
+        if tom.model.Tables.Count == 0:
+            print(
+                f"{icons.warning} The '{dataset}' semantic model within the '{workspace}' workspace has no tables and therefore there are no valid BPA results."
             )
-            for c in ["Category", "Description", "Rule Name"]:
-                po = polib.pofile(translation_file)
-                for entry in po:
-                    if entry.tcomment == c.lower().replace(" ", "_"):
-                        rule_file.loc[rule_file["Rule Name"] == entry.msgid, c] = (
-                            entry.msgstr
-                        )
+            finalDF = pd.DataFrame(
+                columns=[
+                    "Category",
+                    "Rule Name",
+                    "Severity",
+                    "Object Type",
+                    "Object Name",
+                    "Description",
+                    "URL",
+                ]
+            )
+        else:
+            dep = get_model_calc_dependencies(dataset=dataset, workspace=workspace)
-        translated = False
+            def translate_using_po(rule_file):
+                current_dir = os.path.dirname(os.path.abspath(__file__))
+                translation_file = (
+                    f"{current_dir}/_bpa_translation/_model/_translations_{language}.po"
+                )
+                for c in ["Category", "Description", "Rule Name"]:
+                    po = polib.pofile(translation_file)
+                    for entry in po:
+                        if entry.tcomment == c.lower().replace(" ", "_"):
+                            rule_file.loc[rule_file["Rule Name"] == entry.msgid, c] = (
+                                entry.msgstr
+                            )
-        # Translations
-        if language is not None and rules is None and language in language_list:
-            rules = model_bpa_rules(dependencies=dep)
-            translate_using_po(rules)
-            translated = True
-        if rules is None:
-            rules = model_bpa_rules(dependencies=dep)
-        if language is not None and not translated:
+            translated = False
-            def translate_using_spark(rule_file):
+            # Translations
+            if language is not None and rules is None and language in language_list:
+                rules = model_bpa_rules(dependencies=dep)
+                translate_using_po(rules)
+                translated = True
+            if rules is None:
+                rules = model_bpa_rules(dependencies=dep)
+            if language is not None and not translated:
-                from synapse.ml.services import Translate
-                from pyspark.sql import SparkSession
+                def translate_using_spark(rule_file):
-                rules_temp = rule_file.copy()
-                rules_temp = rules_temp.drop(["Expression", "URL", "Severity"], axis=1)
+                    from synapse.ml.services import Translate
+                    from pyspark.sql import SparkSession
-                schema = StructType(
-                    [
-                        StructField("Category", StringType(), True),
-                        StructField("Scope", StringType(), True),
-                        StructField("Rule Name", StringType(), True),
-                        StructField("Description", StringType(), True),
-                    ]
-                )
+                    rules_temp = rule_file.copy()
+                    rules_temp = rules_temp.drop(
+                        ["Expression", "URL", "Severity"], axis=1
+                    )
-                spark = SparkSession.builder.getOrCreate()
-                dfRules = spark.createDataFrame(rules_temp, schema)
-                columns = ["Category", "Rule Name", "Description"]
-                for clm in columns:
-                    translate = (
-                        Translate()
-                        .setTextCol(clm)
-                        .setToLanguage(language)
-                        .setOutputCol("translation")
-                        .setConcurrency(5)
+                    schema = StructType(
+                        [
+                            StructField("Category", StringType(), True),
+                            StructField("Scope", StringType(), True),
+                            StructField("Rule Name", StringType(), True),
+                            StructField("Description", StringType(), True),
+                        ]
                     )
-                    if clm == "Rule Name":
-                        transDF = (
-                            translate.transform(dfRules)
-                            .withColumn(
-                                "translation", flatten(col("translation.translations"))
-                            )
-                            .withColumn("translation", col("translation.text"))
-                            .select(clm, "translation")
+                    spark = SparkSession.builder.getOrCreate()
+                    dfRules = spark.createDataFrame(rules_temp, schema)
+                    columns = ["Category", "Rule Name", "Description"]
+                    for clm in columns:
+                        translate = (
+                            Translate()
+                            .setTextCol(clm)
+                            .setToLanguage(language)
+                            .setOutputCol("translation")
+                            .setConcurrency(5)
                         )
-                    else:
-                        transDF = (
-                            translate.transform(dfRules)
-                            .withColumn(
-                                "translation", flatten(col("translation.translations"))
+                        if clm == "Rule Name":
+                            transDF = (
+                                translate.transform(dfRules)
+                                .withColumn(
+                                    "translation",
+                                    flatten(col("translation.translations")),
+                                )
+                                .withColumn("translation", col("translation.text"))
+                                .select(clm, "translation")
+                            )
+                        else:
+                            transDF = (
+                                translate.transform(dfRules)
+                                .withColumn(
+                                    "translation",
+                                    flatten(col("translation.translations")),
+                                )
+                                .withColumn("translation", col("translation.text"))
+                                .select("Rule Name", clm, "translation")
                             )
-                            .withColumn("translation", col("translation.text"))
-                            .select("Rule Name", clm, "translation")
-                        )
-                    df_panda = transDF.toPandas()
-                    rule_file = pd.merge(
-                        rule_file,
-                        df_panda[["Rule Name", "translation"]],
-                        on="Rule Name",
-                        how="left",
-                    )
+                        df_panda = transDF.toPandas()
+                        rule_file = pd.merge(
+                            rule_file,
+                            df_panda[["Rule Name", "translation"]],
+                            on="Rule Name",
+                            how="left",
+                        )
-                    rule_file = rule_file.rename(
-                        columns={"translation": f"{clm}Translated"}
-                    )
-                    rule_file[f"{clm}Translated"] = rule_file[f"{clm}Translated"].apply(
-                        lambda x: x[0] if x is not None else None
-                    )
+                        rule_file = rule_file.rename(
+                            columns={"translation": f"{clm}Translated"}
+                        )
+                        rule_file[f"{clm}Translated"] = rule_file[
+                            f"{clm}Translated"
+                        ].apply(lambda x: x[0] if x is not None else None)
-                for clm in columns:
-                    rule_file = rule_file.drop([clm], axis=1)
-                    rule_file = rule_file.rename(columns={f"{clm}Translated": clm})
+                    for clm in columns:
+                        rule_file = rule_file.drop([clm], axis=1)
+                        rule_file = rule_file.rename(columns={f"{clm}Translated": clm})
-                return rule_file
+                    return rule_file
-            rules = translate_using_spark(rules)
+                rules = translate_using_spark(rules)
-        rules.loc[rules["Severity"] == "Warning", "Severity"] = icons.warning
-        rules.loc[rules["Severity"] == "Error", "Severity"] = icons.error
-        rules.loc[rules["Severity"] == "Info", "Severity"] = icons.info
+            rules.loc[rules["Severity"] == "Warning", "Severity"] = icons.warning
+            rules.loc[rules["Severity"] == "Error", "Severity"] = icons.error
+            rules.loc[rules["Severity"] == "Info", "Severity"] = icons.info
-        pd.set_option("display.max_colwidth", 1000)
+            pd.set_option("display.max_colwidth", 1000)
-        violations = pd.DataFrame(columns=["Object Name", "Scope", "Rule Name"])
+            violations = pd.DataFrame(columns=["Object Name", "Scope", "Rule Name"])
-        scope_to_dataframe = {
-            "Relationship": (
-                tom.model.Relationships,
-                lambda obj: create_relationship_name(
-                    obj.FromTable.Name,
-                    obj.FromColumn.Name,
-                    obj.ToTable.Name,
-                    obj.ToColumn.Name,
+            scope_to_dataframe = {
+                "Relationship": (
+                    tom.model.Relationships,
+                    lambda obj: create_relationship_name(
+                        obj.FromTable.Name,
+                        obj.FromColumn.Name,
+                        obj.ToTable.Name,
+                        obj.ToColumn.Name,
+                    ),
                 ),
-            ),
-            "Column": (
-                tom.all_columns(),
-                lambda obj: format_dax_object_name(obj.Parent.Name, obj.Name),
-            ),
-            "Measure": (tom.all_measures(), lambda obj: obj.Name),
-            "Hierarchy": (
-                tom.all_hierarchies(),
-                lambda obj: format_dax_object_name(obj.Parent.Name, obj.Name),
-            ),
-            "Table": (tom.model.Tables, lambda obj: obj.Name),
-            "Role": (tom.model.Roles, lambda obj: obj.Name),
-            "Model": (tom.model, lambda obj: obj.Model.Name),
-            "Calculation Item": (
-                tom.all_calculation_items(),
-                lambda obj: format_dax_object_name(obj.Parent.Table.Name, obj.Name),
-            ),
-            "Row Level Security": (
-                tom.all_rls(),
-                lambda obj: format_dax_object_name(obj.Parent.Name, obj.Name),
-            ),
-            "Partition": (
-                tom.all_partitions(),
-                lambda obj: format_dax_object_name(obj.Parent.Name, obj.Name),
-            ),
-        }
-        for i, r in rules.iterrows():
-            ruleName = r["Rule Name"]
-            expr = r["Expression"]
-            scopes = r["Scope"]
-            if isinstance(scopes, str):
-                scopes = [scopes]
-            for scope in scopes:
-                func = scope_to_dataframe[scope][0]
-                nm = scope_to_dataframe[scope][1]
-                if scope == "Model":
-                    x = []
-                    if expr(func, tom):
-                        x = ["Model"]
-                elif scope == "Measure":
-                    x = [nm(obj) for obj in tom.all_measures() if expr(obj, tom)]
-                elif scope == "Column":
-                    x = [nm(obj) for obj in tom.all_columns() if expr(obj, tom)]
-                elif scope == "Partition":
-                    x = [nm(obj) for obj in tom.all_partitions() if expr(obj, tom)]
-                elif scope == "Hierarchy":
-                    x = [nm(obj) for obj in tom.all_hierarchies() if expr(obj, tom)]
-                elif scope == "Table":
-                    x = [nm(obj) for obj in tom.model.Tables if expr(obj, tom)]
-                elif scope == "Relationship":
-                    x = [nm(obj) for obj in tom.model.Relationships if expr(obj, tom)]
-                elif scope == "Role":
-                    x = [nm(obj) for obj in tom.model.Roles if expr(obj, tom)]
-                elif scope == "Row Level Security":
-                    x = [nm(obj) for obj in tom.all_rls() if expr(obj, tom)]
-                elif scope == "Calculation Item":
-                    x = [
-                        nm(obj) for obj in tom.all_calculation_items() if expr(obj, tom)
-                    ]
-                if len(x) > 0:
-                    new_data = {"Object Name": x, "Scope": scope, "Rule Name": ruleName}
-                    violations = pd.concat(
-                        [violations, pd.DataFrame(new_data)], ignore_index=True
-                    )
+                "Column": (
+                    tom.all_columns(),
+                    lambda obj: format_dax_object_name(obj.Parent.Name, obj.Name),
+                ),
+                "Measure": (tom.all_measures(), lambda obj: obj.Name),
+                "Hierarchy": (
+                    tom.all_hierarchies(),
+                    lambda obj: format_dax_object_name(obj.Parent.Name, obj.Name),
+                ),
+                "Table": (tom.model.Tables, lambda obj: obj.Name),
+                "Role": (tom.model.Roles, lambda obj: obj.Name),
+                "Model": (tom.model, lambda obj: obj.Model.Name),
+                "Calculation Item": (
+                    tom.all_calculation_items(),
+                    lambda obj: format_dax_object_name(obj.Parent.Table.Name, obj.Name),
+                ),
+                "Row Level Security": (
+                    tom.all_rls(),
+                    lambda obj: format_dax_object_name(obj.Parent.Name, obj.Name),
+                ),
+                "Partition": (
+                    tom.all_partitions(),
+                    lambda obj: format_dax_object_name(obj.Parent.Name, obj.Name),
+                ),
+            }
+            for i, r in rules.iterrows():
+                ruleName = r["Rule Name"]
+                expr = r["Expression"]
+                scopes = r["Scope"]
+                if isinstance(scopes, str):
+                    scopes = [scopes]
+                for scope in scopes:
+                    func = scope_to_dataframe[scope][0]
+                    nm = scope_to_dataframe[scope][1]
+                    if scope == "Model":
+                        x = []
+                        if expr(func, tom):
+                            x = ["Model"]
+                    elif scope == "Measure":
+                        x = [nm(obj) for obj in tom.all_measures() if expr(obj, tom)]
+                    elif scope == "Column":
+                        x = [nm(obj) for obj in tom.all_columns() if expr(obj, tom)]
+                    elif scope == "Partition":
+                        x = [nm(obj) for obj in tom.all_partitions() if expr(obj, tom)]
+                    elif scope == "Hierarchy":
+                        x = [nm(obj) for obj in tom.all_hierarchies() if expr(obj, tom)]
+                    elif scope == "Table":
+                        x = [nm(obj) for obj in tom.model.Tables if expr(obj, tom)]
+                    elif scope == "Relationship":
+                        x = [
+                            nm(obj) for obj in tom.model.Relationships if expr(obj, tom)
+                        ]
+                    elif scope == "Role":
+                        x = [nm(obj) for obj in tom.model.Roles if expr(obj, tom)]
+                    elif scope == "Row Level Security":
+                        x = [nm(obj) for obj in tom.all_rls() if expr(obj, tom)]
+                    elif scope == "Calculation Item":
+                        x = [
+                            nm(obj)
+                            for obj in tom.all_calculation_items()
+                            if expr(obj, tom)
+                        ]
+                    if len(x) > 0:
+                        new_data = {
+                            "Object Name": x,
+                            "Scope": scope,
+                            "Rule Name": ruleName,
+                        }
+                        violations = pd.concat(
+                            [violations, pd.DataFrame(new_data)], ignore_index=True
+                        )
-        prepDF = pd.merge(
-            violations,
-            rules[["Rule Name", "Category", "Severity", "Description", "URL"]],
-            left_on="Rule Name",
-            right_on="Rule Name",
-            how="left",
-        )
-        prepDF.rename(columns={"Scope": "Object Type"}, inplace=True)
-        finalDF = prepDF[
-            [
-                "Category",
-                "Rule Name",
-                "Severity",
-                "Object Type",
-                "Object Name",
-                "Description",
-                "URL",
+            prepDF = pd.merge(
+                violations,
+                rules[["Rule Name", "Category", "Severity", "Description", "URL"]],
+                left_on="Rule Name",
+                right_on="Rule Name",
+                how="left",
+            )
+            prepDF.rename(columns={"Scope": "Object Type"}, inplace=True)
+            finalDF = prepDF[
+                [
+                    "Category",
+                    "Rule Name",
+                    "Severity",
+                    "Object Type",
+                    "Object Name",
+                    "Description",
+                    "URL",
+                ]
             ]
-        ]
     if export:
         if not lakehouse_attached():

sempy_labs/_model_bpa_bulk.py CHANGED Viewed

@@ -25,6 +25,7 @@ def run_model_bpa_bulk(
     language: Optional[str] = None,
     workspace: Optional[str | List[str]] = None,
     skip_models: Optional[str | List[str]] = ["ModelBPA", "Fabric Capacity Metrics"],
+    skip_models_in_workspace: Optional[dict] = None,
 ):
     """
     Runs the semantic model Best Practice Analyzer across all semantic models in a workspace (or all accessible workspaces).
@@ -33,8 +34,6 @@ def run_model_bpa_bulk(
     Parameters
     ----------
-    dataset : str
-        Name of the semantic model.
     rules : pandas.DataFrame, default=None
         A pandas dataframe containing rules to be evaluated. Based on the format of the dataframe produced by the model_bpa_rules function.
     extended : bool, default=False
@@ -47,6 +46,12 @@ def run_model_bpa_bulk(
         Defaults to None which scans all accessible workspaces.
     skip_models : str | List[str], default=['ModelBPA', 'Fabric Capacity Metrics']
         The semantic models to always skip when running this analysis.
+    skip_models_in_workspace : dict, default=None
+        A dictionary showing specific semantic models within specific workspaces to skip. See the example below:
+        {
+            "Workspace A": ["Dataset1", "Dataset2"],
+            "Workspace B": ["Dataset5", "Dataset 8"],
+        }
     """
     if not lakehouse_attached():
@@ -68,7 +73,6 @@ def run_model_bpa_bulk(
     )
     lakeT = get_lakehouse_tables(lakehouse=lakehouse, workspace=lakehouse_workspace)
     lakeT_filt = lakeT[lakeT["Table Name"] == output_table]
-    # query = f"SELECT MAX(RunId) FROM {lakehouse}.{output_table}"
     if len(lakeT_filt) == 0:
         runId = 1
     else:
@@ -84,6 +88,11 @@ def run_model_bpa_bulk(
     else:
         dfW_filt = dfW[dfW["Name"].isin(workspace)]
+    if len(dfW_filt) == 0:
+        raise ValueError(
+            f"{icons.red_dot} There are no valid workspaces to assess. This is likely due to not having proper permissions to the workspace(s) entered in the 'workspace' parameter."
+        )
     for i, r in dfW_filt.iterrows():
         wksp = r["Name"]
         wksp_id = r["Id"]
@@ -91,6 +100,13 @@ def run_model_bpa_bulk(
         df = pd.DataFrame(columns=list(icons.bpa_schema.keys()))
         dfD = fabric.list_datasets(workspace=wksp, mode="rest")
+        # Skip models in workspace
+        if skip_models_in_workspace is not None and isinstance(
+            skip_models_in_workspace, dict
+        ):
+            skip_models_wkspc = skip_models_in_workspace.get(wksp)
+            dfD = dfD[~dfD["Dataset Name"].isin(skip_models_wkspc)]
         # Exclude default semantic models
         if len(dfD) > 0:
             dfI = fabric.list_items(workspace=wksp)
@@ -132,7 +148,10 @@ def run_model_bpa_bulk(
                         bpa_df["RunId"] = bpa_df["RunId"].astype("int")
-                        df = pd.concat([df, bpa_df], ignore_index=True)
+                        if df.empty:
+                            df = bpa_df
+                        if not bpa_df.empty:
+                            df = pd.concat([df, bpa_df], ignore_index=True)
                         print(
                             f"{icons.green_dot} Collected Model BPA stats for the '{dataset_name}' semantic model within the '{wksp}' workspace."
                         )
@@ -142,28 +161,33 @@ def run_model_bpa_bulk(
                         )
                         print(e)
-                df["Severity"].replace(icons.severity_mapping)
+                if len(df) == 0:
+                    print(
+                        f"{icons.yellow_dot} No BPA results to save for the '{wksp}' workspace."
+                    )
+                else:
+                    df["Severity"].replace(icons.severity_mapping, inplace=True)
-                # Append save results individually for each workspace (so as not to create a giant dataframe)
-                print(
-                    f"{icons.in_progress} Saving the Model BPA results of the '{wksp}' workspace to the '{output_table}' within the '{lakehouse}' lakehouse within the '{lakehouse_workspace}' workspace..."
-                )
+                    # Append save results individually for each workspace (so as not to create a giant dataframe)
+                    print(
+                        f"{icons.in_progress} Saving the Model BPA results of the '{wksp}' workspace to the '{output_table}' within the '{lakehouse}' lakehouse within the '{lakehouse_workspace}' workspace..."
+                    )
-                schema = {
-                    key.replace(" ", "_"): value
-                    for key, value in icons.bpa_schema.items()
-                }
-                save_as_delta_table(
-                    dataframe=df,
-                    delta_table_name=output_table,
-                    write_mode="append",
-                    schema=schema,
-                    merge_schema=True,
-                )
-                print(
-                    f"{icons.green_dot} Saved BPA results to the '{output_table}' delta table."
-                )
+                    schema = {
+                        key.replace(" ", "_"): value
+                        for key, value in icons.bpa_schema.items()
+                    }
+                    save_as_delta_table(
+                        dataframe=df,
+                        delta_table_name=output_table,
+                        write_mode="append",
+                        schema=schema,
+                        merge_schema=True,
+                    )
+                    print(
+                        f"{icons.green_dot} Saved BPA results to the '{output_table}' delta table."
+                    )
     print(f"{icons.green_dot} Bulk BPA scan complete.")

semantic-link-labs 0.8.4__py3-none-any.whl → 0.8.6__py3-none-any.whl

Potentially problematic release.

semantic-link-labs 0.8.4py3-none-any.whl → 0.8.6py3-none-any.whl