PyPI - semantic-link-labs - Versions diffs - 0.6.0__py3-none-any.whl → 0.7.1__py3-none-any.whl - Mend

semantic-link-labs 0.6.0py3-none-any.whl → 0.7.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of semantic-link-labs might be problematic. Click here for more details.

Files changed (104) hide show

sempy_labs/lakehouse/_get_lakehouse_tables.py CHANGED Viewed

@@ -16,6 +16,7 @@ from sempy_labs.lakehouse._lakehouse import lakehouse_attached
 from typing import Optional
 import sempy_labs._icons as icons
 from sempy._utils._log import log
+from sempy.fabric.exceptions import FabricHTTPException
 @log
@@ -51,6 +52,8 @@ def get_lakehouse_tables(
         Shows the tables/columns within a lakehouse and their properties.
     """
+    from sempy_labs._helper_functions import pagination
     df = pd.DataFrame(
         columns=[
             "Workspace Name",
@@ -73,34 +76,52 @@ def get_lakehouse_tables(
     if count_rows:  # Setting countrows defaults to extended=True
         extended = True
+    if (
+        workspace_id != fabric.get_workspace_id()
+        and lakehouse_id != fabric.get_lakehouse_id()
+        and count_rows
+    ):
+        raise ValueError(
+            f"{icons.red_dot} If 'count_rows' is set to True, you must run this function against the default lakehouse attached to the notebook. "
+            "Count rows runs a spark query and cross-workspace spark queries are currently not supported."
+        )
     client = fabric.FabricRestClient()
     response = client.get(
         f"/v1/workspaces/{workspace_id}/lakehouses/{lakehouse_id}/tables"
     )
-    for i in response.json()["data"]:
-        tName = i["name"]
-        tType = i["type"]
-        tFormat = i["format"]
-        tLocation = i["location"]
-        if not extended:
+    if response.status_code != 200:
+        raise FabricHTTPException(response)
+    responses = pagination(client, response)
+    dfs = []
+    for r in responses:
+        for i in r.get("data", []):
             new_data = {
                 "Workspace Name": workspace,
                 "Lakehouse Name": lakehouse,
-                "Table Name": tName,
-                "Format": tFormat,
-                "Type": tType,
-                "Location": tLocation,
+                "Table Name": i.get("name"),
+                "Format": i.get("format"),
+                "Type": i.get("type"),
+                "Location": i.get("location"),
             }
-            df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)
-        else:
-            sku_value = get_sku_size(workspace)
-            guardrail = get_directlake_guardrails_for_sku(sku_value)
-            spark = SparkSession.builder.getOrCreate()
+            dfs.append(pd.DataFrame(new_data, index=[0]))
+    df = pd.concat(dfs, ignore_index=True)
-            intColumns = ["Files", "Row Groups", "Table Size"]
-            if tType == "Managed" and tFormat == "delta":
+    if extended:
+        sku_value = get_sku_size(workspace)
+        guardrail = get_directlake_guardrails_for_sku(sku_value)
+        spark = SparkSession.builder.getOrCreate()
+        df["Files"] = None
+        df["Row Groups"] = None
+        df["Table Size"] = None
+        if count_rows:
+            df["Row Count"] = None
+        for i, r in df.iterrows():
+            tName = r["Table Name"]
+            if r["Type"] == "Managed" and r["Format"] == "delta":
                 detail_df = spark.sql(f"DESCRIBE DETAIL `{tName}`").collect()[0]
                 num_files = detail_df.numFiles
                 size_in_bytes = detail_df.sizeInBytes
@@ -120,60 +141,31 @@ def get_lakehouse_tables(
                         ).num_row_groups
                     except FileNotFoundError:
                         continue
-                if count_rows:
-                    num_rows = spark.table(tName).count()
-                    intColumns.append("Row Count")
-                    new_data = {
-                        "Workspace Name": workspace,
-                        "Lakehouse Name": lakehouse,
-                        "Table Name": tName,
-                        "Format": tFormat,
-                        "Type": tType,
-                        "Location": tLocation,
-                        "Files": num_files,
-                        "Row Groups": num_rowgroups,
-                        "Row Count": num_rows,
-                        "Table Size": size_in_bytes,
-                    }
-                else:
-                    new_data = {
-                        "Workspace Name": workspace,
-                        "Lakehouse Name": lakehouse,
-                        "Table Name": tName,
-                        "Format": tFormat,
-                        "Type": tType,
-                        "Location": tLocation,
-                        "Files": num_files,
-                        "Row Groups": num_rowgroups,
-                        "Table Size": size_in_bytes,
-                    }
-                df = pd.concat(
-                    [df, pd.DataFrame(new_data, index=[0])], ignore_index=True
-                )
-                df[intColumns] = df[intColumns].astype(int)
-            df["SKU"] = guardrail["Fabric SKUs"].iloc[0]
-            df["Parquet File Guardrail"] = guardrail["Parquet files per table"].iloc[0]
-            df["Row Group Guardrail"] = guardrail["Row groups per table"].iloc[0]
-            df["Row Count Guardrail"] = (
-                guardrail["Rows per table (millions)"].iloc[0] * 1000000
-            )
-            df["Parquet File Guardrail Hit"] = (
-                df["Files"] > df["Parquet File Guardrail"]
-            )
-            df["Row Group Guardrail Hit"] = df["Row Groups"] > df["Row Group Guardrail"]
+                df.at[i, "Files"] = num_files
+                df.at[i, "Row Groups"] = num_rowgroups
+                df.at[i, "Table Size"] = size_in_bytes
             if count_rows:
-                df["Row Count Guardrail Hit"] = (
-                    df["Row Count"] > df["Row Count Guardrail"]
-                )
+                num_rows = spark.table(tName).count()
+                df.at[i, "Row Count"] = num_rows
+    if extended:
+        intColumns = ["Files", "Row Groups", "Table Size"]
+        df[intColumns] = df[intColumns].astype(int)
+        df["SKU"] = guardrail["Fabric SKUs"].iloc[0]
+        df["Parquet File Guardrail"] = guardrail["Parquet files per table"].iloc[0]
+        df["Row Group Guardrail"] = guardrail["Row groups per table"].iloc[0]
+        df["Row Count Guardrail"] = (
+            guardrail["Rows per table (millions)"].iloc[0] * 1000000
+        )
+        df["Parquet File Guardrail Hit"] = df["Files"] > df["Parquet File Guardrail"]
+        df["Row Group Guardrail Hit"] = df["Row Groups"] > df["Row Group Guardrail"]
+    if count_rows:
+        df["Row Count"] = df["Row Count"].astype(int)
+        df["Row Count Guardrail Hit"] = df["Row Count"] > df["Row Count Guardrail"]
     if export:
-        lakeAttach = lakehouse_attached()
-        if lakeAttach is False:
+        if not lakehouse_attached():
             raise ValueError(
                 f"{icons.red_dot} In order to save the report.json file, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook."
             )

sempy_labs/lakehouse/_lakehouse.py CHANGED Viewed

@@ -37,8 +37,9 @@ def optimize_lakehouse_tables(
     Parameters
     ----------
-    tables : str | List[str] | None
-        The table(s) to optimize. If no tables are specified, all tables in the lakehouse will be optimized.
+    tables : str | List[str], default=None
+        The table(s) to optimize.
+        Defaults to None which resovles to optimizing all tables within the lakehouse.
     lakehouse : str, default=None
         The Fabric lakehouse.
         Defaults to None which resolves to the lakehouse attached to the notebook.
@@ -68,18 +69,74 @@ def optimize_lakehouse_tables(
     else:
         tables_filt = lakeTablesDelta.copy()
-    tableCount = len(tables_filt)
     spark = SparkSession.builder.getOrCreate()
-    i = 1
     for _, r in (bar := tqdm(tables_filt.iterrows())):
         tableName = r["Table Name"]
         tablePath = r["Location"]
         bar.set_description(f"Optimizing the '{tableName}' table...")
         deltaTable = DeltaTable.forPath(spark, tablePath)
         deltaTable.optimize().executeCompaction()
-        print(
-            f"{icons.green_dot} The '{tableName}' table has been optimized. ({str(i)}/{str(tableCount)})"
-        )
-        i += 1
+@log
+def vacuum_lakehouse_tables(
+    tables: Optional[Union[str, List[str]]] = None,
+    lakehouse: Optional[str] = None,
+    workspace: Optional[str] = None,
+    retain_n_hours: Optional[int] = None,
+):
+    """
+    Runs the `VACUUM <https://docs.delta.io/latest/delta-utility.html#remove-files-no-longer-referenced-by-a-delta-table>`_ function over the specified lakehouse tables.
+    Parameters
+    ----------
+    tables : str | List[str] | None
+        The table(s) to vacuum. If no tables are specified, all tables in the lakehouse will be optimized.
+    lakehouse : str, default=None
+        The Fabric lakehouse.
+        Defaults to None which resolves to the lakehouse attached to the notebook.
+    workspace : str, default=None
+        The Fabric workspace used by the lakehouse.
+        Defaults to None which resolves to the workspace of the attached lakehouse
+        or if no lakehouse attached, resolves to the workspace of the notebook.
+    retain_n_hours : int, default=None
+        The number of hours to retain historical versions of Delta table files.
+        Files older than this retention period will be deleted during the vacuum operation.
+        If not specified, the default retention period configured for the Delta table will be used.
+        The default retention period is 168 hours (7 days) unless manually configured via table properties.
+    """
+    from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables
+    from delta import DeltaTable
+    workspace = fabric.resolve_workspace_name(workspace)
+    if lakehouse is None:
+        lakehouse_id = fabric.get_lakehouse_id()
+        lakehouse = resolve_lakehouse_name(lakehouse_id, workspace)
+    lakeTables = get_lakehouse_tables(lakehouse=lakehouse, workspace=workspace)
+    lakeTablesDelta = lakeTables[lakeTables["Format"] == "delta"]
+    if isinstance(tables, str):
+        tables = [tables]
+    if tables is not None:
+        tables_filt = lakeTablesDelta[lakeTablesDelta["Table Name"].isin(tables)]
+    else:
+        tables_filt = lakeTablesDelta.copy()
+    spark = SparkSession.builder.getOrCreate()
+    spark.conf.set("spark.databricks.delta.vacuum.parallelDelete.enabled", "true")
+    for _, r in (bar := tqdm(tables_filt.iterrows())):
+        tableName = r["Table Name"]
+        tablePath = r["Location"]
+        bar.set_description(f"Vacuuming the '{tableName}' table...")
+        deltaTable = DeltaTable.forPath(spark, tablePath)
+        if retain_n_hours is None:
+            deltaTable.vacuum()
+        else:
+            deltaTable.vacuum(retain_n_hours)

sempy_labs/lakehouse/_shortcuts.py CHANGED Viewed

@@ -53,7 +53,7 @@ def create_shortcut_onelake(
         shortcut_name = table_name
     client = fabric.FabricRestClient()
-    tablePath = "Tables/" + table_name
+    tablePath = f"Tables/{table_name}"
     request_body = {
         "path": "Tables",

sempy_labs/migration/_create_pqt_file.py CHANGED Viewed

@@ -1,9 +1,8 @@
+import sempy
 import sempy.fabric as fabric
 import json
 import os
 import shutil
-import xml.etree.ElementTree as ET
-from sempy_labs._list_functions import list_tables
 from sempy_labs.lakehouse._lakehouse import lakehouse_attached
 from sempy._utils._log import log
 from typing import Optional
@@ -20,6 +19,9 @@ def create_pqt_file(
     Dynamically generates a `Power Query Template <https://learn.microsoft.com/power-query/power-query-template>`_ file based on the semantic model. The .pqt file is
      saved within the Files section of your lakehouse.
+    Dataflows Gen2 has a `limit of 50 tables <https://learn.microsoft.com/power-query/power-query-online-limits>`_. If there are more than 50 tables, this will save multiple Power Query Template
+    files (with each file having a max of 50 tables).
     Parameters
     ----------
     dataset : str
@@ -32,9 +34,11 @@ def create_pqt_file(
         The name of the Power Query Template file to be generated.
     """
-    lakeAttach = lakehouse_attached()
+    sempy.fabric._client._utils._init_analysis_services()
+    import Microsoft.AnalysisServices.Tabular as TOM
+    from sempy_labs.tom import connect_semantic_model
-    if lakeAttach is False:
+    if not lakehouse_attached():
         raise ValueError(
             f"{icons.red_dot} In order to run the 'create_pqt_file' function, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook."
         )
@@ -45,192 +49,180 @@ def create_pqt_file(
     subFolderPath = os.path.join(folderPath, "pqtnewfolder")
     os.makedirs(subFolderPath, exist_ok=True)
-    dfP = fabric.list_partitions(dataset=dataset, workspace=workspace)
-    dfT = list_tables(dataset, workspace)
-    dfE = fabric.list_expressions(dataset=dataset, workspace=workspace)
-    # Check if M-partitions are used
-    if any(dfP["Source Type"] == "M"):
-        class QueryMetadata:
-            def __init__(
-                self,
-                QueryName,
-                QueryGroupId=None,
-                LastKnownIsParameter=None,
-                LastKnownResultTypeName=None,
-                LoadEnabled=True,
-                IsHidden=False,
-            ):
-                self.QueryName = QueryName
-                self.QueryGroupId = QueryGroupId
-                self.LastKnownIsParameter = LastKnownIsParameter
-                self.LastKnownResultTypeName = LastKnownResultTypeName
-                self.LoadEnabled = LoadEnabled
-                self.IsHidden = IsHidden
-        class RootObject:
-            def __init__(
-                self, DocumentLocale, EngineVersion, QueriesMetadata, QueryGroups=None
-            ):
-                if QueryGroups is None:
-                    QueryGroups = []
-                self.DocumentLocale = DocumentLocale
-                self.EngineVersion = EngineVersion
-                self.QueriesMetadata = QueriesMetadata
-                self.QueryGroups = QueryGroups
-        # STEP 1: Create MashupDocument.pq
-        mdfileName = "MashupDocument.pq"
-        mdFilePath = os.path.join(subFolderPath, mdfileName)
-        sb = "section Section1;"
-        for table_name in dfP["Table Name"].unique():
-            tName = '#"' + table_name + '"'
-            sourceExpression = dfT.loc[
-                (dfT["Name"] == table_name), "Source Expression"
-            ].iloc[0]
-            refreshPolicy = dfT.loc[(dfT["Name"] == table_name), "Refresh Policy"].iloc[
-                0
-            ]
-            sourceType = dfP.loc[(dfP["Table Name"] == table_name), "Source Type"].iloc[
-                0
-            ]
-            if sourceType == "M" or refreshPolicy:
-                sb = sb + "\n" + "shared " + tName + " = "
-            partitions_in_table = dfP.loc[
-                dfP["Table Name"] == table_name, "Partition Name"
-            ].unique()
-            i = 1
-            for partition_name in partitions_in_table:
-                pSourceType = dfP.loc[
-                    (dfP["Table Name"] == table_name)
-                    & (dfP["Partition Name"] == partition_name),
-                    "Source Type",
-                ].iloc[0]
-                pQuery = dfP.loc[
-                    (dfP["Table Name"] == table_name)
-                    & (dfP["Partition Name"] == partition_name),
-                    "Query",
-                ].iloc[0]
-                if pQuery is not None:
+    with connect_semantic_model(
+        dataset=dataset, workspace=workspace, readonly=True
+    ) as tom:
+        if not any(
+            p.SourceType == TOM.PartitionSourceType.M for p in tom.all_partitions()
+        ) and not any(t.RefreshPolicy for t in tom.model.Tables):
+            print(
+                f"{icons.info} The '{dataset}' semantic model within the '{workspace}' workspace has no Power Query logic."
+            )
+            return
+        table_map = {}
+        expr_map = {}
+        for t in tom.model.Tables:
+            table_name = t.Name
+            for char in icons.special_characters:
+                table_name = table_name.replace(char, "")
+            if t.RefreshPolicy:
+                table_map[table_name] = t.RefreshPolicy.SourceExpression
+            elif any(p.SourceType == TOM.PartitionSourceType.M for p in t.Partitions):
+                part_name = next(
+                    p.Name
+                    for p in t.Partitions
+                    if p.SourceType == TOM.PartitionSourceType.M
+                )
+                expr = t.Partitions[part_name].Source.Expression
+                table_map[table_name] = expr
+        for e in tom.model.Expressions:
+            expr_map[e.Name] = [str(e.Kind), e.Expression]
+        # Dataflows Gen2 max table limit is 50.
+        max_length = 50
+        table_chunks = [
+            dict(list(table_map.items())[i : i + max_length])
+            for i in range(0, len(table_map), max_length)
+        ]
+        def create_pqt(table_map: dict, expr_map: dict, file_name: str):
+            class QueryMetadata:
+                def __init__(
+                    self,
+                    QueryName,
+                    QueryGroupId=None,
+                    LastKnownIsParameter=None,
+                    LastKnownResultTypeName=None,
+                    LoadEnabled=True,
+                    IsHidden=False,
+                ):
+                    self.QueryName = QueryName
+                    self.QueryGroupId = QueryGroupId
+                    self.LastKnownIsParameter = LastKnownIsParameter
+                    self.LastKnownResultTypeName = LastKnownResultTypeName
+                    self.LoadEnabled = LoadEnabled
+                    self.IsHidden = IsHidden
+            class RootObject:
+                def __init__(
+                    self,
+                    DocumentLocale,
+                    EngineVersion,
+                    QueriesMetadata,
+                    QueryGroups=None,
+                ):
+                    if QueryGroups is None:
+                        QueryGroups = []
+                    self.DocumentLocale = DocumentLocale
+                    self.EngineVersion = EngineVersion
+                    self.QueriesMetadata = QueriesMetadata
+                    self.QueryGroups = QueryGroups
+            # STEP 1: Create MashupDocument.pq
+            mdfileName = "MashupDocument.pq"
+            mdFilePath = os.path.join(subFolderPath, mdfileName)
+            sb = "section Section1;"
+            for t_name, query in table_map.items():
+                sb = f'{sb}\nshared #"{t_name}" = '
+                if query is not None:
                     pQueryNoSpaces = (
-                        pQuery.replace(" ", "")
+                        query.replace(" ", "")
                         .replace("\n", "")
                         .replace("\t", "")
                         .replace("\r", "")
                     )
                     if pQueryNoSpaces.startswith('letSource=""'):
-                        pQuery = 'let\n\tSource = ""\nin\n\tSource'
-                if pSourceType == "M" and i == 1:
-                    sb = sb + pQuery + ";"
-                elif refreshPolicy and i == 1:
-                    sb = sb + sourceExpression + ";"
-                i += 1
-        for index, row in dfE.iterrows():
-            expr = row["Expression"]
-            eName = row["Name"]
-            eName = '#"' + eName + '"'
-            sb = sb + "\n" + "shared " + eName + " = " + expr + ";"
-        with open(mdFilePath, "w") as file:
-            file.write(sb)
-        # STEP 2: Create the MashupMetadata.json file
-        mmfileName = "MashupMetadata.json"
-        mmFilePath = os.path.join(subFolderPath, mmfileName)
-        queryMetadata = []
-        for tName in dfP["Table Name"].unique():
-            sourceType = dfP.loc[(dfP["Table Name"] == tName), "Source Type"].iloc[0]
-            refreshPolicy = dfT.loc[(dfT["Name"] == tName), "Refresh Policy"].iloc[0]
-            if sourceType == "M" or refreshPolicy:
-                queryMetadata.append(
-                    QueryMetadata(tName, None, None, None, True, False)
-                )
-        for i, r in dfE.iterrows():
-            eName = r["Name"]
-            eKind = r["Kind"]
-            if eKind == "M":
-                queryMetadata.append(
-                    QueryMetadata(eName, None, None, None, True, False)
-                )
-            else:
-                queryMetadata.append(
-                    QueryMetadata(eName, None, None, None, False, False)
-                )
-        rootObject = RootObject("en-US", "2.126.453.0", queryMetadata)
-        def obj_to_dict(obj):
-            if isinstance(obj, list):
-                return [obj_to_dict(e) for e in obj]
-            elif hasattr(obj, "__dict__"):
-                return {k: obj_to_dict(v) for k, v in obj.__dict__.items()}
-            else:
-                return obj
-        jsonContent = json.dumps(obj_to_dict(rootObject), indent=4)
-        with open(mmFilePath, "w") as json_file:
-            json_file.write(jsonContent)
-        # STEP 3: Create Metadata.json file
-        mFileName = "Metadata.json"
-        mFilePath = os.path.join(subFolderPath, mFileName)
-        metaData = {"Name": "fileName", "Description": "", "Version": "1.0.0.0"}
-        jsonContent = json.dumps(metaData, indent=4)
-        with open(mFilePath, "w") as json_file:
-            json_file.write(jsonContent)
-        # STEP 4: Create [Content_Types].xml file:
-        ns = "http://schemas.openxmlformats.org/package/2006/content-types"
-        ET.register_namespace("", ns)
-        types = ET.Element("{%s}Types" % ns)
-        # default1 = ET.SubElement(
-        #    types,
-        #    "{%s}Default" % ns,
-        #    {"Extension": "json", "ContentType": "application/json"},
-        # )
-        # default2 = ET.SubElement(
-        #    types,
-        #    "{%s}Default" % ns,
-        #    {"Extension": "pq", "ContentType": "application/x-ms-m"},
-        # )
-        xmlDocument = ET.ElementTree(types)
-        xmlFileName = "[Content_Types].xml"
-        xmlFilePath = os.path.join(subFolderPath, xmlFileName)
-        xmlDocument.write(
-            xmlFilePath, xml_declaration=True, encoding="utf-8", method="xml"
-        )
+                        query = 'let\n\tSource = ""\nin\n\tSource'
+                sb = f"{sb}{query};"
-        # STEP 5: Zip up the 4 files
-        zipFileName = file_name + ".zip"
-        zipFilePath = os.path.join(folderPath, zipFileName)
-        shutil.make_archive(zipFilePath[:-4], "zip", subFolderPath)
+            for e_name, kind_expr in expr_map.items():
+                expr = kind_expr[1]
+                sb = f'{sb}\nshared #"{e_name}" = {expr};'
-        # STEP 6: Convert the zip file back into a .pqt file
-        newExt = ".pqt"
-        directory = os.path.dirname(zipFilePath)
-        fileNameWithoutExtension = os.path.splitext(os.path.basename(zipFilePath))[0]
-        newFilePath = os.path.join(directory, fileNameWithoutExtension + newExt)
-        shutil.move(zipFilePath, newFilePath)
+            with open(mdFilePath, "w") as file:
+                file.write(sb)
-        # STEP 7: Delete subFolder directory which is no longer needed
-        shutil.rmtree(subFolderPath, ignore_errors=True)
+            # STEP 2: Create the MashupMetadata.json file
+            mmfileName = "MashupMetadata.json"
+            mmFilePath = os.path.join(subFolderPath, mmfileName)
+            queryMetadata = []
-        print(
-            f"{icons.green_dot} '{file_name}.pqt' has been created based on the '{dataset}' semantic model in the '{workspace}' workspace within the Files section of your lakehouse."
-        )
+            for t_name, query in table_map.items():
+                queryMetadata.append(
+                    QueryMetadata(t_name, None, None, None, True, False)
+                )
+            for e_name, kind_expr in expr_map.items():
+                e_kind = kind_expr[0]
+                if e_kind == "M":
+                    queryMetadata.append(
+                        QueryMetadata(e_name, None, None, None, True, False)
+                    )
+                else:
+                    queryMetadata.append(
+                        QueryMetadata(e_name, None, None, None, False, False)
+                    )
-    else:
-        print(
-            f"{icons.yellow_dot} The '{dataset}' semantic model in the '{workspace}' workspace does not use Power Query so a Power Query Template file cannot be generated."
-        )
+            rootObject = RootObject(
+                "en-US", "2.132.328.0", queryMetadata
+            )  # "2.126.453.0"
+            def obj_to_dict(obj):
+                if isinstance(obj, list):
+                    return [obj_to_dict(e) for e in obj]
+                elif hasattr(obj, "__dict__"):
+                    return {k: obj_to_dict(v) for k, v in obj.__dict__.items()}
+                else:
+                    return obj
+            jsonContent = json.dumps(obj_to_dict(rootObject), indent=4)
+            with open(mmFilePath, "w") as json_file:
+                json_file.write(jsonContent)
+            # STEP 3: Create Metadata.json file
+            mFileName = "Metadata.json"
+            mFilePath = os.path.join(subFolderPath, mFileName)
+            metaData = {"Name": f"{file_name}", "Description": "", "Version": "1.0.0.0"}
+            jsonContent = json.dumps(metaData, indent=4)
+            with open(mFilePath, "w") as json_file:
+                json_file.write(jsonContent)
+            # STEP 4: Create [Content_Types].xml file:
+            xml_content = """<?xml version="1.0" encoding="utf-8"?><Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types"><Default Extension="json" ContentType="application/json" /><Default Extension="pq" ContentType="application/x-ms-m" /></Types>"""
+            xmlFileName = "[Content_Types].xml"
+            xmlFilePath = os.path.join(subFolderPath, xmlFileName)
+            with open(xmlFilePath, "w", encoding="utf-8") as file:
+                file.write(xml_content)
+            # STEP 5: Zip up the 4 files
+            zipFileName = f"{file_name}.zip"
+            zipFilePath = os.path.join(folderPath, zipFileName)
+            shutil.make_archive(zipFilePath[:-4], "zip", subFolderPath)
+            # STEP 6: Convert the zip file back into a .pqt file
+            newExt = ".pqt"
+            directory = os.path.dirname(zipFilePath)
+            fileNameWithoutExtension = os.path.splitext(os.path.basename(zipFilePath))[
+                0
+            ]
+            newFilePath = os.path.join(directory, fileNameWithoutExtension + newExt)
+            shutil.move(zipFilePath, newFilePath)
+            # STEP 7: Delete subFolder directory which is no longer needed
+            shutil.rmtree(subFolderPath, ignore_errors=True)
+            print(
+                f"{icons.green_dot} '{file_name}.pqt' has been created based on the '{dataset}' semantic model in the '{workspace}' workspace within the Files section of your lakehouse."
+            )
+        a = 0
+        for t_map in table_chunks:
+            if a > 0:
+                file_name = f"{file_name}_{a}"
+            a += 1
+            create_pqt(t_map, expr_map, file_name=file_name)

semantic-link-labs 0.6.0__py3-none-any.whl → 0.7.1__py3-none-any.whl

Potentially problematic release.

semantic-link-labs 0.6.0py3-none-any.whl → 0.7.1py3-none-any.whl