semantic-link-labs 0.6.0__py3-none-any.whl → 0.7.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of semantic-link-labs might be problematic. Click here for more details.
- semantic_link_labs-0.7.1.dist-info/METADATA +148 -0
- semantic_link_labs-0.7.1.dist-info/RECORD +111 -0
- {semantic_link_labs-0.6.0.dist-info → semantic_link_labs-0.7.1.dist-info}/WHEEL +1 -1
- sempy_labs/__init__.py +26 -2
- sempy_labs/_ai.py +3 -65
- sempy_labs/_bpa_translation/_translations_am-ET.po +828 -0
- sempy_labs/_bpa_translation/_translations_ar-AE.po +860 -0
- sempy_labs/_bpa_translation/_translations_cs-CZ.po +894 -0
- sempy_labs/_bpa_translation/_translations_da-DK.po +894 -0
- sempy_labs/_bpa_translation/_translations_de-DE.po +933 -0
- sempy_labs/_bpa_translation/_translations_el-GR.po +936 -0
- sempy_labs/_bpa_translation/_translations_es-ES.po +915 -0
- sempy_labs/_bpa_translation/_translations_fa-IR.po +883 -0
- sempy_labs/_bpa_translation/_translations_fr-FR.po +938 -0
- sempy_labs/_bpa_translation/_translations_ga-IE.po +912 -0
- sempy_labs/_bpa_translation/_translations_he-IL.po +855 -0
- sempy_labs/_bpa_translation/_translations_hi-IN.po +892 -0
- sempy_labs/_bpa_translation/_translations_hu-HU.po +910 -0
- sempy_labs/_bpa_translation/_translations_is-IS.po +887 -0
- sempy_labs/_bpa_translation/_translations_it-IT.po +931 -0
- sempy_labs/_bpa_translation/_translations_ja-JP.po +805 -0
- sempy_labs/_bpa_translation/_translations_nl-NL.po +924 -0
- sempy_labs/_bpa_translation/_translations_pl-PL.po +913 -0
- sempy_labs/_bpa_translation/_translations_pt-BR.po +909 -0
- sempy_labs/_bpa_translation/_translations_pt-PT.po +904 -0
- sempy_labs/_bpa_translation/_translations_ru-RU.po +909 -0
- sempy_labs/_bpa_translation/_translations_ta-IN.po +922 -0
- sempy_labs/_bpa_translation/_translations_te-IN.po +896 -0
- sempy_labs/_bpa_translation/_translations_th-TH.po +873 -0
- sempy_labs/_bpa_translation/_translations_zh-CN.po +767 -0
- sempy_labs/_bpa_translation/_translations_zu-ZA.po +916 -0
- sempy_labs/_clear_cache.py +9 -4
- sempy_labs/_generate_semantic_model.py +30 -56
- sempy_labs/_helper_functions.py +361 -14
- sempy_labs/_icons.py +10 -1
- sempy_labs/_list_functions.py +539 -260
- sempy_labs/_model_bpa.py +194 -18
- sempy_labs/_model_bpa_bulk.py +367 -0
- sempy_labs/_model_bpa_rules.py +19 -8
- sempy_labs/_model_dependencies.py +12 -10
- sempy_labs/_one_lake_integration.py +7 -7
- sempy_labs/_query_scale_out.py +61 -96
- sempy_labs/_refresh_semantic_model.py +7 -0
- sempy_labs/_translations.py +154 -1
- sempy_labs/_vertipaq.py +103 -90
- sempy_labs/directlake/__init__.py +5 -1
- sempy_labs/directlake/_directlake_schema_compare.py +27 -31
- sempy_labs/directlake/_directlake_schema_sync.py +55 -66
- sempy_labs/directlake/_dl_helper.py +233 -0
- sempy_labs/directlake/_get_directlake_lakehouse.py +6 -7
- sempy_labs/directlake/_get_shared_expression.py +1 -1
- sempy_labs/directlake/_guardrails.py +17 -13
- sempy_labs/directlake/_update_directlake_partition_entity.py +54 -30
- sempy_labs/directlake/_warm_cache.py +1 -1
- sempy_labs/lakehouse/__init__.py +2 -0
- sempy_labs/lakehouse/_get_lakehouse_tables.py +61 -69
- sempy_labs/lakehouse/_lakehouse.py +66 -9
- sempy_labs/lakehouse/_shortcuts.py +1 -1
- sempy_labs/migration/_create_pqt_file.py +174 -182
- sempy_labs/migration/_migrate_calctables_to_lakehouse.py +236 -268
- sempy_labs/migration/_migrate_calctables_to_semantic_model.py +75 -73
- sempy_labs/migration/_migrate_model_objects_to_semantic_model.py +442 -426
- sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py +91 -97
- sempy_labs/migration/_refresh_calc_tables.py +92 -101
- sempy_labs/report/_BPAReportTemplate.json +232 -0
- sempy_labs/report/__init__.py +6 -2
- sempy_labs/report/_bpareporttemplate/.pbi/localSettings.json +9 -0
- sempy_labs/report/_bpareporttemplate/.platform +11 -0
- sempy_labs/report/_bpareporttemplate/StaticResources/SharedResources/BaseThemes/CY24SU06.json +710 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/page.json +11 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/1b08bce3bebabb0a27a8/visual.json +191 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/2f22ddb70c301693c165/visual.json +438 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/3b1182230aa6c600b43a/visual.json +127 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/58577ba6380c69891500/visual.json +576 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/a2a8fa5028b3b776c96c/visual.json +207 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/adfd47ef30652707b987/visual.json +506 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/b6a80ee459e716e170b1/visual.json +127 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/ce3130a721c020cc3d81/visual.json +513 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/92735ae19b31712208ad/page.json +8 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/92735ae19b31712208ad/visuals/66e60dfb526437cd78d1/visual.json +112 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/page.json +11 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/07deb8bce824e1be37d7/visual.json +513 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/0b1c68838818b32ad03b/visual.json +352 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/0c171de9d2683d10b930/visual.json +37 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/0efa01be0510e40a645e/visual.json +542 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/6bf2f0eb830ab53cc668/visual.json +221 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/88d8141cb8500b60030c/visual.json +127 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/a753273590beed656a03/visual.json +576 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/b8fdc82cddd61ac447bc/visual.json +127 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/d37dce724a0ccc30044b/page.json +9 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/d37dce724a0ccc30044b/visuals/ce8532a7e25020271077/visual.json +38 -0
- sempy_labs/report/_bpareporttemplate/definition/pages/pages.json +10 -0
- sempy_labs/report/_bpareporttemplate/definition/report.json +176 -0
- sempy_labs/report/_bpareporttemplate/definition/version.json +4 -0
- sempy_labs/report/_bpareporttemplate/definition.pbir +14 -0
- sempy_labs/report/_generate_report.py +255 -139
- sempy_labs/report/_report_functions.py +26 -33
- sempy_labs/report/_report_rebind.py +31 -26
- sempy_labs/tom/_model.py +75 -58
- semantic_link_labs-0.6.0.dist-info/METADATA +0 -22
- semantic_link_labs-0.6.0.dist-info/RECORD +0 -54
- sempy_labs/directlake/_fallback.py +0 -60
- {semantic_link_labs-0.6.0.dist-info → semantic_link_labs-0.7.1.dist-info}/LICENSE +0 -0
- {semantic_link_labs-0.6.0.dist-info → semantic_link_labs-0.7.1.dist-info}/top_level.txt +0 -0
|
@@ -16,6 +16,7 @@ from sempy_labs.lakehouse._lakehouse import lakehouse_attached
|
|
|
16
16
|
from typing import Optional
|
|
17
17
|
import sempy_labs._icons as icons
|
|
18
18
|
from sempy._utils._log import log
|
|
19
|
+
from sempy.fabric.exceptions import FabricHTTPException
|
|
19
20
|
|
|
20
21
|
|
|
21
22
|
@log
|
|
@@ -51,6 +52,8 @@ def get_lakehouse_tables(
|
|
|
51
52
|
Shows the tables/columns within a lakehouse and their properties.
|
|
52
53
|
"""
|
|
53
54
|
|
|
55
|
+
from sempy_labs._helper_functions import pagination
|
|
56
|
+
|
|
54
57
|
df = pd.DataFrame(
|
|
55
58
|
columns=[
|
|
56
59
|
"Workspace Name",
|
|
@@ -73,34 +76,52 @@ def get_lakehouse_tables(
|
|
|
73
76
|
if count_rows: # Setting countrows defaults to extended=True
|
|
74
77
|
extended = True
|
|
75
78
|
|
|
79
|
+
if (
|
|
80
|
+
workspace_id != fabric.get_workspace_id()
|
|
81
|
+
and lakehouse_id != fabric.get_lakehouse_id()
|
|
82
|
+
and count_rows
|
|
83
|
+
):
|
|
84
|
+
raise ValueError(
|
|
85
|
+
f"{icons.red_dot} If 'count_rows' is set to True, you must run this function against the default lakehouse attached to the notebook. "
|
|
86
|
+
"Count rows runs a spark query and cross-workspace spark queries are currently not supported."
|
|
87
|
+
)
|
|
88
|
+
|
|
76
89
|
client = fabric.FabricRestClient()
|
|
77
90
|
response = client.get(
|
|
78
91
|
f"/v1/workspaces/{workspace_id}/lakehouses/{lakehouse_id}/tables"
|
|
79
92
|
)
|
|
80
93
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
94
|
+
if response.status_code != 200:
|
|
95
|
+
raise FabricHTTPException(response)
|
|
96
|
+
|
|
97
|
+
responses = pagination(client, response)
|
|
98
|
+
|
|
99
|
+
dfs = []
|
|
100
|
+
for r in responses:
|
|
101
|
+
for i in r.get("data", []):
|
|
87
102
|
new_data = {
|
|
88
103
|
"Workspace Name": workspace,
|
|
89
104
|
"Lakehouse Name": lakehouse,
|
|
90
|
-
"Table Name":
|
|
91
|
-
"Format":
|
|
92
|
-
"Type":
|
|
93
|
-
"Location":
|
|
105
|
+
"Table Name": i.get("name"),
|
|
106
|
+
"Format": i.get("format"),
|
|
107
|
+
"Type": i.get("type"),
|
|
108
|
+
"Location": i.get("location"),
|
|
94
109
|
}
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
sku_value = get_sku_size(workspace)
|
|
98
|
-
guardrail = get_directlake_guardrails_for_sku(sku_value)
|
|
99
|
-
|
|
100
|
-
spark = SparkSession.builder.getOrCreate()
|
|
110
|
+
dfs.append(pd.DataFrame(new_data, index=[0]))
|
|
111
|
+
df = pd.concat(dfs, ignore_index=True)
|
|
101
112
|
|
|
102
|
-
|
|
103
|
-
|
|
113
|
+
if extended:
|
|
114
|
+
sku_value = get_sku_size(workspace)
|
|
115
|
+
guardrail = get_directlake_guardrails_for_sku(sku_value)
|
|
116
|
+
spark = SparkSession.builder.getOrCreate()
|
|
117
|
+
df["Files"] = None
|
|
118
|
+
df["Row Groups"] = None
|
|
119
|
+
df["Table Size"] = None
|
|
120
|
+
if count_rows:
|
|
121
|
+
df["Row Count"] = None
|
|
122
|
+
for i, r in df.iterrows():
|
|
123
|
+
tName = r["Table Name"]
|
|
124
|
+
if r["Type"] == "Managed" and r["Format"] == "delta":
|
|
104
125
|
detail_df = spark.sql(f"DESCRIBE DETAIL `{tName}`").collect()[0]
|
|
105
126
|
num_files = detail_df.numFiles
|
|
106
127
|
size_in_bytes = detail_df.sizeInBytes
|
|
@@ -120,60 +141,31 @@ def get_lakehouse_tables(
|
|
|
120
141
|
).num_row_groups
|
|
121
142
|
except FileNotFoundError:
|
|
122
143
|
continue
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
intColumns.append("Row Count")
|
|
127
|
-
new_data = {
|
|
128
|
-
"Workspace Name": workspace,
|
|
129
|
-
"Lakehouse Name": lakehouse,
|
|
130
|
-
"Table Name": tName,
|
|
131
|
-
"Format": tFormat,
|
|
132
|
-
"Type": tType,
|
|
133
|
-
"Location": tLocation,
|
|
134
|
-
"Files": num_files,
|
|
135
|
-
"Row Groups": num_rowgroups,
|
|
136
|
-
"Row Count": num_rows,
|
|
137
|
-
"Table Size": size_in_bytes,
|
|
138
|
-
}
|
|
139
|
-
else:
|
|
140
|
-
new_data = {
|
|
141
|
-
"Workspace Name": workspace,
|
|
142
|
-
"Lakehouse Name": lakehouse,
|
|
143
|
-
"Table Name": tName,
|
|
144
|
-
"Format": tFormat,
|
|
145
|
-
"Type": tType,
|
|
146
|
-
"Location": tLocation,
|
|
147
|
-
"Files": num_files,
|
|
148
|
-
"Row Groups": num_rowgroups,
|
|
149
|
-
"Table Size": size_in_bytes,
|
|
150
|
-
}
|
|
151
|
-
|
|
152
|
-
df = pd.concat(
|
|
153
|
-
[df, pd.DataFrame(new_data, index=[0])], ignore_index=True
|
|
154
|
-
)
|
|
155
|
-
df[intColumns] = df[intColumns].astype(int)
|
|
156
|
-
|
|
157
|
-
df["SKU"] = guardrail["Fabric SKUs"].iloc[0]
|
|
158
|
-
df["Parquet File Guardrail"] = guardrail["Parquet files per table"].iloc[0]
|
|
159
|
-
df["Row Group Guardrail"] = guardrail["Row groups per table"].iloc[0]
|
|
160
|
-
df["Row Count Guardrail"] = (
|
|
161
|
-
guardrail["Rows per table (millions)"].iloc[0] * 1000000
|
|
162
|
-
)
|
|
163
|
-
|
|
164
|
-
df["Parquet File Guardrail Hit"] = (
|
|
165
|
-
df["Files"] > df["Parquet File Guardrail"]
|
|
166
|
-
)
|
|
167
|
-
df["Row Group Guardrail Hit"] = df["Row Groups"] > df["Row Group Guardrail"]
|
|
168
|
-
|
|
144
|
+
df.at[i, "Files"] = num_files
|
|
145
|
+
df.at[i, "Row Groups"] = num_rowgroups
|
|
146
|
+
df.at[i, "Table Size"] = size_in_bytes
|
|
169
147
|
if count_rows:
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
148
|
+
num_rows = spark.table(tName).count()
|
|
149
|
+
df.at[i, "Row Count"] = num_rows
|
|
150
|
+
|
|
151
|
+
if extended:
|
|
152
|
+
intColumns = ["Files", "Row Groups", "Table Size"]
|
|
153
|
+
df[intColumns] = df[intColumns].astype(int)
|
|
154
|
+
df["SKU"] = guardrail["Fabric SKUs"].iloc[0]
|
|
155
|
+
df["Parquet File Guardrail"] = guardrail["Parquet files per table"].iloc[0]
|
|
156
|
+
df["Row Group Guardrail"] = guardrail["Row groups per table"].iloc[0]
|
|
157
|
+
df["Row Count Guardrail"] = (
|
|
158
|
+
guardrail["Rows per table (millions)"].iloc[0] * 1000000
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
df["Parquet File Guardrail Hit"] = df["Files"] > df["Parquet File Guardrail"]
|
|
162
|
+
df["Row Group Guardrail Hit"] = df["Row Groups"] > df["Row Group Guardrail"]
|
|
163
|
+
if count_rows:
|
|
164
|
+
df["Row Count"] = df["Row Count"].astype(int)
|
|
165
|
+
df["Row Count Guardrail Hit"] = df["Row Count"] > df["Row Count Guardrail"]
|
|
173
166
|
|
|
174
167
|
if export:
|
|
175
|
-
|
|
176
|
-
if lakeAttach is False:
|
|
168
|
+
if not lakehouse_attached():
|
|
177
169
|
raise ValueError(
|
|
178
170
|
f"{icons.red_dot} In order to save the report.json file, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook."
|
|
179
171
|
)
|
|
@@ -37,8 +37,9 @@ def optimize_lakehouse_tables(
|
|
|
37
37
|
|
|
38
38
|
Parameters
|
|
39
39
|
----------
|
|
40
|
-
tables : str | List[str]
|
|
41
|
-
The table(s) to optimize.
|
|
40
|
+
tables : str | List[str], default=None
|
|
41
|
+
The table(s) to optimize.
|
|
42
|
+
Defaults to None which resovles to optimizing all tables within the lakehouse.
|
|
42
43
|
lakehouse : str, default=None
|
|
43
44
|
The Fabric lakehouse.
|
|
44
45
|
Defaults to None which resolves to the lakehouse attached to the notebook.
|
|
@@ -68,18 +69,74 @@ def optimize_lakehouse_tables(
|
|
|
68
69
|
else:
|
|
69
70
|
tables_filt = lakeTablesDelta.copy()
|
|
70
71
|
|
|
71
|
-
tableCount = len(tables_filt)
|
|
72
|
-
|
|
73
72
|
spark = SparkSession.builder.getOrCreate()
|
|
74
73
|
|
|
75
|
-
i = 1
|
|
76
74
|
for _, r in (bar := tqdm(tables_filt.iterrows())):
|
|
77
75
|
tableName = r["Table Name"]
|
|
78
76
|
tablePath = r["Location"]
|
|
79
77
|
bar.set_description(f"Optimizing the '{tableName}' table...")
|
|
80
78
|
deltaTable = DeltaTable.forPath(spark, tablePath)
|
|
81
79
|
deltaTable.optimize().executeCompaction()
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
@log
|
|
83
|
+
def vacuum_lakehouse_tables(
|
|
84
|
+
tables: Optional[Union[str, List[str]]] = None,
|
|
85
|
+
lakehouse: Optional[str] = None,
|
|
86
|
+
workspace: Optional[str] = None,
|
|
87
|
+
retain_n_hours: Optional[int] = None,
|
|
88
|
+
):
|
|
89
|
+
"""
|
|
90
|
+
Runs the `VACUUM <https://docs.delta.io/latest/delta-utility.html#remove-files-no-longer-referenced-by-a-delta-table>`_ function over the specified lakehouse tables.
|
|
91
|
+
|
|
92
|
+
Parameters
|
|
93
|
+
----------
|
|
94
|
+
tables : str | List[str] | None
|
|
95
|
+
The table(s) to vacuum. If no tables are specified, all tables in the lakehouse will be optimized.
|
|
96
|
+
lakehouse : str, default=None
|
|
97
|
+
The Fabric lakehouse.
|
|
98
|
+
Defaults to None which resolves to the lakehouse attached to the notebook.
|
|
99
|
+
workspace : str, default=None
|
|
100
|
+
The Fabric workspace used by the lakehouse.
|
|
101
|
+
Defaults to None which resolves to the workspace of the attached lakehouse
|
|
102
|
+
or if no lakehouse attached, resolves to the workspace of the notebook.
|
|
103
|
+
retain_n_hours : int, default=None
|
|
104
|
+
The number of hours to retain historical versions of Delta table files.
|
|
105
|
+
Files older than this retention period will be deleted during the vacuum operation.
|
|
106
|
+
If not specified, the default retention period configured for the Delta table will be used.
|
|
107
|
+
The default retention period is 168 hours (7 days) unless manually configured via table properties.
|
|
108
|
+
"""
|
|
109
|
+
|
|
110
|
+
from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables
|
|
111
|
+
from delta import DeltaTable
|
|
112
|
+
|
|
113
|
+
workspace = fabric.resolve_workspace_name(workspace)
|
|
114
|
+
|
|
115
|
+
if lakehouse is None:
|
|
116
|
+
lakehouse_id = fabric.get_lakehouse_id()
|
|
117
|
+
lakehouse = resolve_lakehouse_name(lakehouse_id, workspace)
|
|
118
|
+
|
|
119
|
+
lakeTables = get_lakehouse_tables(lakehouse=lakehouse, workspace=workspace)
|
|
120
|
+
lakeTablesDelta = lakeTables[lakeTables["Format"] == "delta"]
|
|
121
|
+
|
|
122
|
+
if isinstance(tables, str):
|
|
123
|
+
tables = [tables]
|
|
124
|
+
|
|
125
|
+
if tables is not None:
|
|
126
|
+
tables_filt = lakeTablesDelta[lakeTablesDelta["Table Name"].isin(tables)]
|
|
127
|
+
else:
|
|
128
|
+
tables_filt = lakeTablesDelta.copy()
|
|
129
|
+
|
|
130
|
+
spark = SparkSession.builder.getOrCreate()
|
|
131
|
+
spark.conf.set("spark.databricks.delta.vacuum.parallelDelete.enabled", "true")
|
|
132
|
+
|
|
133
|
+
for _, r in (bar := tqdm(tables_filt.iterrows())):
|
|
134
|
+
tableName = r["Table Name"]
|
|
135
|
+
tablePath = r["Location"]
|
|
136
|
+
bar.set_description(f"Vacuuming the '{tableName}' table...")
|
|
137
|
+
deltaTable = DeltaTable.forPath(spark, tablePath)
|
|
138
|
+
|
|
139
|
+
if retain_n_hours is None:
|
|
140
|
+
deltaTable.vacuum()
|
|
141
|
+
else:
|
|
142
|
+
deltaTable.vacuum(retain_n_hours)
|
|
@@ -1,9 +1,8 @@
|
|
|
1
|
+
import sempy
|
|
1
2
|
import sempy.fabric as fabric
|
|
2
3
|
import json
|
|
3
4
|
import os
|
|
4
5
|
import shutil
|
|
5
|
-
import xml.etree.ElementTree as ET
|
|
6
|
-
from sempy_labs._list_functions import list_tables
|
|
7
6
|
from sempy_labs.lakehouse._lakehouse import lakehouse_attached
|
|
8
7
|
from sempy._utils._log import log
|
|
9
8
|
from typing import Optional
|
|
@@ -20,6 +19,9 @@ def create_pqt_file(
|
|
|
20
19
|
Dynamically generates a `Power Query Template <https://learn.microsoft.com/power-query/power-query-template>`_ file based on the semantic model. The .pqt file is
|
|
21
20
|
saved within the Files section of your lakehouse.
|
|
22
21
|
|
|
22
|
+
Dataflows Gen2 has a `limit of 50 tables <https://learn.microsoft.com/power-query/power-query-online-limits>`_. If there are more than 50 tables, this will save multiple Power Query Template
|
|
23
|
+
files (with each file having a max of 50 tables).
|
|
24
|
+
|
|
23
25
|
Parameters
|
|
24
26
|
----------
|
|
25
27
|
dataset : str
|
|
@@ -32,9 +34,11 @@ def create_pqt_file(
|
|
|
32
34
|
The name of the Power Query Template file to be generated.
|
|
33
35
|
"""
|
|
34
36
|
|
|
35
|
-
|
|
37
|
+
sempy.fabric._client._utils._init_analysis_services()
|
|
38
|
+
import Microsoft.AnalysisServices.Tabular as TOM
|
|
39
|
+
from sempy_labs.tom import connect_semantic_model
|
|
36
40
|
|
|
37
|
-
if
|
|
41
|
+
if not lakehouse_attached():
|
|
38
42
|
raise ValueError(
|
|
39
43
|
f"{icons.red_dot} In order to run the 'create_pqt_file' function, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook."
|
|
40
44
|
)
|
|
@@ -45,192 +49,180 @@ def create_pqt_file(
|
|
|
45
49
|
subFolderPath = os.path.join(folderPath, "pqtnewfolder")
|
|
46
50
|
os.makedirs(subFolderPath, exist_ok=True)
|
|
47
51
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
(
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
52
|
+
with connect_semantic_model(
|
|
53
|
+
dataset=dataset, workspace=workspace, readonly=True
|
|
54
|
+
) as tom:
|
|
55
|
+
if not any(
|
|
56
|
+
p.SourceType == TOM.PartitionSourceType.M for p in tom.all_partitions()
|
|
57
|
+
) and not any(t.RefreshPolicy for t in tom.model.Tables):
|
|
58
|
+
print(
|
|
59
|
+
f"{icons.info} The '{dataset}' semantic model within the '{workspace}' workspace has no Power Query logic."
|
|
60
|
+
)
|
|
61
|
+
return
|
|
62
|
+
|
|
63
|
+
table_map = {}
|
|
64
|
+
expr_map = {}
|
|
65
|
+
|
|
66
|
+
for t in tom.model.Tables:
|
|
67
|
+
table_name = t.Name
|
|
68
|
+
for char in icons.special_characters:
|
|
69
|
+
table_name = table_name.replace(char, "")
|
|
70
|
+
if t.RefreshPolicy:
|
|
71
|
+
table_map[table_name] = t.RefreshPolicy.SourceExpression
|
|
72
|
+
elif any(p.SourceType == TOM.PartitionSourceType.M for p in t.Partitions):
|
|
73
|
+
part_name = next(
|
|
74
|
+
p.Name
|
|
75
|
+
for p in t.Partitions
|
|
76
|
+
if p.SourceType == TOM.PartitionSourceType.M
|
|
77
|
+
)
|
|
78
|
+
expr = t.Partitions[part_name].Source.Expression
|
|
79
|
+
table_map[table_name] = expr
|
|
80
|
+
|
|
81
|
+
for e in tom.model.Expressions:
|
|
82
|
+
expr_map[e.Name] = [str(e.Kind), e.Expression]
|
|
83
|
+
|
|
84
|
+
# Dataflows Gen2 max table limit is 50.
|
|
85
|
+
max_length = 50
|
|
86
|
+
table_chunks = [
|
|
87
|
+
dict(list(table_map.items())[i : i + max_length])
|
|
88
|
+
for i in range(0, len(table_map), max_length)
|
|
89
|
+
]
|
|
90
|
+
|
|
91
|
+
def create_pqt(table_map: dict, expr_map: dict, file_name: str):
|
|
92
|
+
|
|
93
|
+
class QueryMetadata:
|
|
94
|
+
def __init__(
|
|
95
|
+
self,
|
|
96
|
+
QueryName,
|
|
97
|
+
QueryGroupId=None,
|
|
98
|
+
LastKnownIsParameter=None,
|
|
99
|
+
LastKnownResultTypeName=None,
|
|
100
|
+
LoadEnabled=True,
|
|
101
|
+
IsHidden=False,
|
|
102
|
+
):
|
|
103
|
+
self.QueryName = QueryName
|
|
104
|
+
self.QueryGroupId = QueryGroupId
|
|
105
|
+
self.LastKnownIsParameter = LastKnownIsParameter
|
|
106
|
+
self.LastKnownResultTypeName = LastKnownResultTypeName
|
|
107
|
+
self.LoadEnabled = LoadEnabled
|
|
108
|
+
self.IsHidden = IsHidden
|
|
109
|
+
|
|
110
|
+
class RootObject:
|
|
111
|
+
def __init__(
|
|
112
|
+
self,
|
|
113
|
+
DocumentLocale,
|
|
114
|
+
EngineVersion,
|
|
115
|
+
QueriesMetadata,
|
|
116
|
+
QueryGroups=None,
|
|
117
|
+
):
|
|
118
|
+
if QueryGroups is None:
|
|
119
|
+
QueryGroups = []
|
|
120
|
+
self.DocumentLocale = DocumentLocale
|
|
121
|
+
self.EngineVersion = EngineVersion
|
|
122
|
+
self.QueriesMetadata = QueriesMetadata
|
|
123
|
+
self.QueryGroups = QueryGroups
|
|
124
|
+
|
|
125
|
+
# STEP 1: Create MashupDocument.pq
|
|
126
|
+
mdfileName = "MashupDocument.pq"
|
|
127
|
+
mdFilePath = os.path.join(subFolderPath, mdfileName)
|
|
128
|
+
sb = "section Section1;"
|
|
129
|
+
for t_name, query in table_map.items():
|
|
130
|
+
sb = f'{sb}\nshared #"{t_name}" = '
|
|
131
|
+
if query is not None:
|
|
120
132
|
pQueryNoSpaces = (
|
|
121
|
-
|
|
133
|
+
query.replace(" ", "")
|
|
122
134
|
.replace("\n", "")
|
|
123
135
|
.replace("\t", "")
|
|
124
136
|
.replace("\r", "")
|
|
125
137
|
)
|
|
126
138
|
if pQueryNoSpaces.startswith('letSource=""'):
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
if pSourceType == "M" and i == 1:
|
|
130
|
-
sb = sb + pQuery + ";"
|
|
131
|
-
elif refreshPolicy and i == 1:
|
|
132
|
-
sb = sb + sourceExpression + ";"
|
|
133
|
-
i += 1
|
|
134
|
-
|
|
135
|
-
for index, row in dfE.iterrows():
|
|
136
|
-
expr = row["Expression"]
|
|
137
|
-
eName = row["Name"]
|
|
138
|
-
eName = '#"' + eName + '"'
|
|
139
|
-
sb = sb + "\n" + "shared " + eName + " = " + expr + ";"
|
|
140
|
-
|
|
141
|
-
with open(mdFilePath, "w") as file:
|
|
142
|
-
file.write(sb)
|
|
143
|
-
|
|
144
|
-
# STEP 2: Create the MashupMetadata.json file
|
|
145
|
-
mmfileName = "MashupMetadata.json"
|
|
146
|
-
mmFilePath = os.path.join(subFolderPath, mmfileName)
|
|
147
|
-
queryMetadata = []
|
|
148
|
-
|
|
149
|
-
for tName in dfP["Table Name"].unique():
|
|
150
|
-
sourceType = dfP.loc[(dfP["Table Name"] == tName), "Source Type"].iloc[0]
|
|
151
|
-
refreshPolicy = dfT.loc[(dfT["Name"] == tName), "Refresh Policy"].iloc[0]
|
|
152
|
-
if sourceType == "M" or refreshPolicy:
|
|
153
|
-
queryMetadata.append(
|
|
154
|
-
QueryMetadata(tName, None, None, None, True, False)
|
|
155
|
-
)
|
|
156
|
-
|
|
157
|
-
for i, r in dfE.iterrows():
|
|
158
|
-
eName = r["Name"]
|
|
159
|
-
eKind = r["Kind"]
|
|
160
|
-
if eKind == "M":
|
|
161
|
-
queryMetadata.append(
|
|
162
|
-
QueryMetadata(eName, None, None, None, True, False)
|
|
163
|
-
)
|
|
164
|
-
else:
|
|
165
|
-
queryMetadata.append(
|
|
166
|
-
QueryMetadata(eName, None, None, None, False, False)
|
|
167
|
-
)
|
|
168
|
-
|
|
169
|
-
rootObject = RootObject("en-US", "2.126.453.0", queryMetadata)
|
|
170
|
-
|
|
171
|
-
def obj_to_dict(obj):
|
|
172
|
-
if isinstance(obj, list):
|
|
173
|
-
return [obj_to_dict(e) for e in obj]
|
|
174
|
-
elif hasattr(obj, "__dict__"):
|
|
175
|
-
return {k: obj_to_dict(v) for k, v in obj.__dict__.items()}
|
|
176
|
-
else:
|
|
177
|
-
return obj
|
|
178
|
-
|
|
179
|
-
jsonContent = json.dumps(obj_to_dict(rootObject), indent=4)
|
|
180
|
-
|
|
181
|
-
with open(mmFilePath, "w") as json_file:
|
|
182
|
-
json_file.write(jsonContent)
|
|
183
|
-
|
|
184
|
-
# STEP 3: Create Metadata.json file
|
|
185
|
-
mFileName = "Metadata.json"
|
|
186
|
-
mFilePath = os.path.join(subFolderPath, mFileName)
|
|
187
|
-
metaData = {"Name": "fileName", "Description": "", "Version": "1.0.0.0"}
|
|
188
|
-
jsonContent = json.dumps(metaData, indent=4)
|
|
189
|
-
|
|
190
|
-
with open(mFilePath, "w") as json_file:
|
|
191
|
-
json_file.write(jsonContent)
|
|
192
|
-
|
|
193
|
-
# STEP 4: Create [Content_Types].xml file:
|
|
194
|
-
ns = "http://schemas.openxmlformats.org/package/2006/content-types"
|
|
195
|
-
ET.register_namespace("", ns)
|
|
196
|
-
types = ET.Element("{%s}Types" % ns)
|
|
197
|
-
# default1 = ET.SubElement(
|
|
198
|
-
# types,
|
|
199
|
-
# "{%s}Default" % ns,
|
|
200
|
-
# {"Extension": "json", "ContentType": "application/json"},
|
|
201
|
-
# )
|
|
202
|
-
# default2 = ET.SubElement(
|
|
203
|
-
# types,
|
|
204
|
-
# "{%s}Default" % ns,
|
|
205
|
-
# {"Extension": "pq", "ContentType": "application/x-ms-m"},
|
|
206
|
-
# )
|
|
207
|
-
xmlDocument = ET.ElementTree(types)
|
|
208
|
-
xmlFileName = "[Content_Types].xml"
|
|
209
|
-
xmlFilePath = os.path.join(subFolderPath, xmlFileName)
|
|
210
|
-
xmlDocument.write(
|
|
211
|
-
xmlFilePath, xml_declaration=True, encoding="utf-8", method="xml"
|
|
212
|
-
)
|
|
139
|
+
query = 'let\n\tSource = ""\nin\n\tSource'
|
|
140
|
+
sb = f"{sb}{query};"
|
|
213
141
|
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
shutil.make_archive(zipFilePath[:-4], "zip", subFolderPath)
|
|
142
|
+
for e_name, kind_expr in expr_map.items():
|
|
143
|
+
expr = kind_expr[1]
|
|
144
|
+
sb = f'{sb}\nshared #"{e_name}" = {expr};'
|
|
218
145
|
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
directory = os.path.dirname(zipFilePath)
|
|
222
|
-
fileNameWithoutExtension = os.path.splitext(os.path.basename(zipFilePath))[0]
|
|
223
|
-
newFilePath = os.path.join(directory, fileNameWithoutExtension + newExt)
|
|
224
|
-
shutil.move(zipFilePath, newFilePath)
|
|
146
|
+
with open(mdFilePath, "w") as file:
|
|
147
|
+
file.write(sb)
|
|
225
148
|
|
|
226
|
-
|
|
227
|
-
|
|
149
|
+
# STEP 2: Create the MashupMetadata.json file
|
|
150
|
+
mmfileName = "MashupMetadata.json"
|
|
151
|
+
mmFilePath = os.path.join(subFolderPath, mmfileName)
|
|
152
|
+
queryMetadata = []
|
|
228
153
|
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
154
|
+
for t_name, query in table_map.items():
|
|
155
|
+
queryMetadata.append(
|
|
156
|
+
QueryMetadata(t_name, None, None, None, True, False)
|
|
157
|
+
)
|
|
158
|
+
for e_name, kind_expr in expr_map.items():
|
|
159
|
+
e_kind = kind_expr[0]
|
|
160
|
+
if e_kind == "M":
|
|
161
|
+
queryMetadata.append(
|
|
162
|
+
QueryMetadata(e_name, None, None, None, True, False)
|
|
163
|
+
)
|
|
164
|
+
else:
|
|
165
|
+
queryMetadata.append(
|
|
166
|
+
QueryMetadata(e_name, None, None, None, False, False)
|
|
167
|
+
)
|
|
232
168
|
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
169
|
+
rootObject = RootObject(
|
|
170
|
+
"en-US", "2.132.328.0", queryMetadata
|
|
171
|
+
) # "2.126.453.0"
|
|
172
|
+
|
|
173
|
+
def obj_to_dict(obj):
|
|
174
|
+
if isinstance(obj, list):
|
|
175
|
+
return [obj_to_dict(e) for e in obj]
|
|
176
|
+
elif hasattr(obj, "__dict__"):
|
|
177
|
+
return {k: obj_to_dict(v) for k, v in obj.__dict__.items()}
|
|
178
|
+
else:
|
|
179
|
+
return obj
|
|
180
|
+
|
|
181
|
+
jsonContent = json.dumps(obj_to_dict(rootObject), indent=4)
|
|
182
|
+
|
|
183
|
+
with open(mmFilePath, "w") as json_file:
|
|
184
|
+
json_file.write(jsonContent)
|
|
185
|
+
|
|
186
|
+
# STEP 3: Create Metadata.json file
|
|
187
|
+
mFileName = "Metadata.json"
|
|
188
|
+
mFilePath = os.path.join(subFolderPath, mFileName)
|
|
189
|
+
metaData = {"Name": f"{file_name}", "Description": "", "Version": "1.0.0.0"}
|
|
190
|
+
jsonContent = json.dumps(metaData, indent=4)
|
|
191
|
+
|
|
192
|
+
with open(mFilePath, "w") as json_file:
|
|
193
|
+
json_file.write(jsonContent)
|
|
194
|
+
|
|
195
|
+
# STEP 4: Create [Content_Types].xml file:
|
|
196
|
+
xml_content = """<?xml version="1.0" encoding="utf-8"?><Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types"><Default Extension="json" ContentType="application/json" /><Default Extension="pq" ContentType="application/x-ms-m" /></Types>"""
|
|
197
|
+
xmlFileName = "[Content_Types].xml"
|
|
198
|
+
xmlFilePath = os.path.join(subFolderPath, xmlFileName)
|
|
199
|
+
with open(xmlFilePath, "w", encoding="utf-8") as file:
|
|
200
|
+
file.write(xml_content)
|
|
201
|
+
|
|
202
|
+
# STEP 5: Zip up the 4 files
|
|
203
|
+
zipFileName = f"{file_name}.zip"
|
|
204
|
+
zipFilePath = os.path.join(folderPath, zipFileName)
|
|
205
|
+
shutil.make_archive(zipFilePath[:-4], "zip", subFolderPath)
|
|
206
|
+
|
|
207
|
+
# STEP 6: Convert the zip file back into a .pqt file
|
|
208
|
+
newExt = ".pqt"
|
|
209
|
+
directory = os.path.dirname(zipFilePath)
|
|
210
|
+
fileNameWithoutExtension = os.path.splitext(os.path.basename(zipFilePath))[
|
|
211
|
+
0
|
|
212
|
+
]
|
|
213
|
+
newFilePath = os.path.join(directory, fileNameWithoutExtension + newExt)
|
|
214
|
+
shutil.move(zipFilePath, newFilePath)
|
|
215
|
+
|
|
216
|
+
# STEP 7: Delete subFolder directory which is no longer needed
|
|
217
|
+
shutil.rmtree(subFolderPath, ignore_errors=True)
|
|
218
|
+
|
|
219
|
+
print(
|
|
220
|
+
f"{icons.green_dot} '{file_name}.pqt' has been created based on the '{dataset}' semantic model in the '{workspace}' workspace within the Files section of your lakehouse."
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
a = 0
|
|
224
|
+
for t_map in table_chunks:
|
|
225
|
+
if a > 0:
|
|
226
|
+
file_name = f"{file_name}_{a}"
|
|
227
|
+
a += 1
|
|
228
|
+
create_pqt(t_map, expr_map, file_name=file_name)
|