semantic-link-labs 0.6.0__py3-none-any.whl → 0.7.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of semantic-link-labs might be problematic. Click here for more details.

Files changed (104) hide show
  1. semantic_link_labs-0.7.1.dist-info/METADATA +148 -0
  2. semantic_link_labs-0.7.1.dist-info/RECORD +111 -0
  3. {semantic_link_labs-0.6.0.dist-info → semantic_link_labs-0.7.1.dist-info}/WHEEL +1 -1
  4. sempy_labs/__init__.py +26 -2
  5. sempy_labs/_ai.py +3 -65
  6. sempy_labs/_bpa_translation/_translations_am-ET.po +828 -0
  7. sempy_labs/_bpa_translation/_translations_ar-AE.po +860 -0
  8. sempy_labs/_bpa_translation/_translations_cs-CZ.po +894 -0
  9. sempy_labs/_bpa_translation/_translations_da-DK.po +894 -0
  10. sempy_labs/_bpa_translation/_translations_de-DE.po +933 -0
  11. sempy_labs/_bpa_translation/_translations_el-GR.po +936 -0
  12. sempy_labs/_bpa_translation/_translations_es-ES.po +915 -0
  13. sempy_labs/_bpa_translation/_translations_fa-IR.po +883 -0
  14. sempy_labs/_bpa_translation/_translations_fr-FR.po +938 -0
  15. sempy_labs/_bpa_translation/_translations_ga-IE.po +912 -0
  16. sempy_labs/_bpa_translation/_translations_he-IL.po +855 -0
  17. sempy_labs/_bpa_translation/_translations_hi-IN.po +892 -0
  18. sempy_labs/_bpa_translation/_translations_hu-HU.po +910 -0
  19. sempy_labs/_bpa_translation/_translations_is-IS.po +887 -0
  20. sempy_labs/_bpa_translation/_translations_it-IT.po +931 -0
  21. sempy_labs/_bpa_translation/_translations_ja-JP.po +805 -0
  22. sempy_labs/_bpa_translation/_translations_nl-NL.po +924 -0
  23. sempy_labs/_bpa_translation/_translations_pl-PL.po +913 -0
  24. sempy_labs/_bpa_translation/_translations_pt-BR.po +909 -0
  25. sempy_labs/_bpa_translation/_translations_pt-PT.po +904 -0
  26. sempy_labs/_bpa_translation/_translations_ru-RU.po +909 -0
  27. sempy_labs/_bpa_translation/_translations_ta-IN.po +922 -0
  28. sempy_labs/_bpa_translation/_translations_te-IN.po +896 -0
  29. sempy_labs/_bpa_translation/_translations_th-TH.po +873 -0
  30. sempy_labs/_bpa_translation/_translations_zh-CN.po +767 -0
  31. sempy_labs/_bpa_translation/_translations_zu-ZA.po +916 -0
  32. sempy_labs/_clear_cache.py +9 -4
  33. sempy_labs/_generate_semantic_model.py +30 -56
  34. sempy_labs/_helper_functions.py +361 -14
  35. sempy_labs/_icons.py +10 -1
  36. sempy_labs/_list_functions.py +539 -260
  37. sempy_labs/_model_bpa.py +194 -18
  38. sempy_labs/_model_bpa_bulk.py +367 -0
  39. sempy_labs/_model_bpa_rules.py +19 -8
  40. sempy_labs/_model_dependencies.py +12 -10
  41. sempy_labs/_one_lake_integration.py +7 -7
  42. sempy_labs/_query_scale_out.py +61 -96
  43. sempy_labs/_refresh_semantic_model.py +7 -0
  44. sempy_labs/_translations.py +154 -1
  45. sempy_labs/_vertipaq.py +103 -90
  46. sempy_labs/directlake/__init__.py +5 -1
  47. sempy_labs/directlake/_directlake_schema_compare.py +27 -31
  48. sempy_labs/directlake/_directlake_schema_sync.py +55 -66
  49. sempy_labs/directlake/_dl_helper.py +233 -0
  50. sempy_labs/directlake/_get_directlake_lakehouse.py +6 -7
  51. sempy_labs/directlake/_get_shared_expression.py +1 -1
  52. sempy_labs/directlake/_guardrails.py +17 -13
  53. sempy_labs/directlake/_update_directlake_partition_entity.py +54 -30
  54. sempy_labs/directlake/_warm_cache.py +1 -1
  55. sempy_labs/lakehouse/__init__.py +2 -0
  56. sempy_labs/lakehouse/_get_lakehouse_tables.py +61 -69
  57. sempy_labs/lakehouse/_lakehouse.py +66 -9
  58. sempy_labs/lakehouse/_shortcuts.py +1 -1
  59. sempy_labs/migration/_create_pqt_file.py +174 -182
  60. sempy_labs/migration/_migrate_calctables_to_lakehouse.py +236 -268
  61. sempy_labs/migration/_migrate_calctables_to_semantic_model.py +75 -73
  62. sempy_labs/migration/_migrate_model_objects_to_semantic_model.py +442 -426
  63. sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py +91 -97
  64. sempy_labs/migration/_refresh_calc_tables.py +92 -101
  65. sempy_labs/report/_BPAReportTemplate.json +232 -0
  66. sempy_labs/report/__init__.py +6 -2
  67. sempy_labs/report/_bpareporttemplate/.pbi/localSettings.json +9 -0
  68. sempy_labs/report/_bpareporttemplate/.platform +11 -0
  69. sempy_labs/report/_bpareporttemplate/StaticResources/SharedResources/BaseThemes/CY24SU06.json +710 -0
  70. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/page.json +11 -0
  71. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/1b08bce3bebabb0a27a8/visual.json +191 -0
  72. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/2f22ddb70c301693c165/visual.json +438 -0
  73. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/3b1182230aa6c600b43a/visual.json +127 -0
  74. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/58577ba6380c69891500/visual.json +576 -0
  75. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/a2a8fa5028b3b776c96c/visual.json +207 -0
  76. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/adfd47ef30652707b987/visual.json +506 -0
  77. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/b6a80ee459e716e170b1/visual.json +127 -0
  78. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/ce3130a721c020cc3d81/visual.json +513 -0
  79. sempy_labs/report/_bpareporttemplate/definition/pages/92735ae19b31712208ad/page.json +8 -0
  80. sempy_labs/report/_bpareporttemplate/definition/pages/92735ae19b31712208ad/visuals/66e60dfb526437cd78d1/visual.json +112 -0
  81. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/page.json +11 -0
  82. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/07deb8bce824e1be37d7/visual.json +513 -0
  83. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/0b1c68838818b32ad03b/visual.json +352 -0
  84. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/0c171de9d2683d10b930/visual.json +37 -0
  85. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/0efa01be0510e40a645e/visual.json +542 -0
  86. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/6bf2f0eb830ab53cc668/visual.json +221 -0
  87. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/88d8141cb8500b60030c/visual.json +127 -0
  88. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/a753273590beed656a03/visual.json +576 -0
  89. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/b8fdc82cddd61ac447bc/visual.json +127 -0
  90. sempy_labs/report/_bpareporttemplate/definition/pages/d37dce724a0ccc30044b/page.json +9 -0
  91. sempy_labs/report/_bpareporttemplate/definition/pages/d37dce724a0ccc30044b/visuals/ce8532a7e25020271077/visual.json +38 -0
  92. sempy_labs/report/_bpareporttemplate/definition/pages/pages.json +10 -0
  93. sempy_labs/report/_bpareporttemplate/definition/report.json +176 -0
  94. sempy_labs/report/_bpareporttemplate/definition/version.json +4 -0
  95. sempy_labs/report/_bpareporttemplate/definition.pbir +14 -0
  96. sempy_labs/report/_generate_report.py +255 -139
  97. sempy_labs/report/_report_functions.py +26 -33
  98. sempy_labs/report/_report_rebind.py +31 -26
  99. sempy_labs/tom/_model.py +75 -58
  100. semantic_link_labs-0.6.0.dist-info/METADATA +0 -22
  101. semantic_link_labs-0.6.0.dist-info/RECORD +0 -54
  102. sempy_labs/directlake/_fallback.py +0 -60
  103. {semantic_link_labs-0.6.0.dist-info → semantic_link_labs-0.7.1.dist-info}/LICENSE +0 -0
  104. {semantic_link_labs-0.6.0.dist-info → semantic_link_labs-0.7.1.dist-info}/top_level.txt +0 -0
@@ -16,6 +16,7 @@ from sempy_labs.lakehouse._lakehouse import lakehouse_attached
16
16
  from typing import Optional
17
17
  import sempy_labs._icons as icons
18
18
  from sempy._utils._log import log
19
+ from sempy.fabric.exceptions import FabricHTTPException
19
20
 
20
21
 
21
22
  @log
@@ -51,6 +52,8 @@ def get_lakehouse_tables(
51
52
  Shows the tables/columns within a lakehouse and their properties.
52
53
  """
53
54
 
55
+ from sempy_labs._helper_functions import pagination
56
+
54
57
  df = pd.DataFrame(
55
58
  columns=[
56
59
  "Workspace Name",
@@ -73,34 +76,52 @@ def get_lakehouse_tables(
73
76
  if count_rows: # Setting countrows defaults to extended=True
74
77
  extended = True
75
78
 
79
+ if (
80
+ workspace_id != fabric.get_workspace_id()
81
+ and lakehouse_id != fabric.get_lakehouse_id()
82
+ and count_rows
83
+ ):
84
+ raise ValueError(
85
+ f"{icons.red_dot} If 'count_rows' is set to True, you must run this function against the default lakehouse attached to the notebook. "
86
+ "Count rows runs a spark query and cross-workspace spark queries are currently not supported."
87
+ )
88
+
76
89
  client = fabric.FabricRestClient()
77
90
  response = client.get(
78
91
  f"/v1/workspaces/{workspace_id}/lakehouses/{lakehouse_id}/tables"
79
92
  )
80
93
 
81
- for i in response.json()["data"]:
82
- tName = i["name"]
83
- tType = i["type"]
84
- tFormat = i["format"]
85
- tLocation = i["location"]
86
- if not extended:
94
+ if response.status_code != 200:
95
+ raise FabricHTTPException(response)
96
+
97
+ responses = pagination(client, response)
98
+
99
+ dfs = []
100
+ for r in responses:
101
+ for i in r.get("data", []):
87
102
  new_data = {
88
103
  "Workspace Name": workspace,
89
104
  "Lakehouse Name": lakehouse,
90
- "Table Name": tName,
91
- "Format": tFormat,
92
- "Type": tType,
93
- "Location": tLocation,
105
+ "Table Name": i.get("name"),
106
+ "Format": i.get("format"),
107
+ "Type": i.get("type"),
108
+ "Location": i.get("location"),
94
109
  }
95
- df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)
96
- else:
97
- sku_value = get_sku_size(workspace)
98
- guardrail = get_directlake_guardrails_for_sku(sku_value)
99
-
100
- spark = SparkSession.builder.getOrCreate()
110
+ dfs.append(pd.DataFrame(new_data, index=[0]))
111
+ df = pd.concat(dfs, ignore_index=True)
101
112
 
102
- intColumns = ["Files", "Row Groups", "Table Size"]
103
- if tType == "Managed" and tFormat == "delta":
113
+ if extended:
114
+ sku_value = get_sku_size(workspace)
115
+ guardrail = get_directlake_guardrails_for_sku(sku_value)
116
+ spark = SparkSession.builder.getOrCreate()
117
+ df["Files"] = None
118
+ df["Row Groups"] = None
119
+ df["Table Size"] = None
120
+ if count_rows:
121
+ df["Row Count"] = None
122
+ for i, r in df.iterrows():
123
+ tName = r["Table Name"]
124
+ if r["Type"] == "Managed" and r["Format"] == "delta":
104
125
  detail_df = spark.sql(f"DESCRIBE DETAIL `{tName}`").collect()[0]
105
126
  num_files = detail_df.numFiles
106
127
  size_in_bytes = detail_df.sizeInBytes
@@ -120,60 +141,31 @@ def get_lakehouse_tables(
120
141
  ).num_row_groups
121
142
  except FileNotFoundError:
122
143
  continue
123
-
124
- if count_rows:
125
- num_rows = spark.table(tName).count()
126
- intColumns.append("Row Count")
127
- new_data = {
128
- "Workspace Name": workspace,
129
- "Lakehouse Name": lakehouse,
130
- "Table Name": tName,
131
- "Format": tFormat,
132
- "Type": tType,
133
- "Location": tLocation,
134
- "Files": num_files,
135
- "Row Groups": num_rowgroups,
136
- "Row Count": num_rows,
137
- "Table Size": size_in_bytes,
138
- }
139
- else:
140
- new_data = {
141
- "Workspace Name": workspace,
142
- "Lakehouse Name": lakehouse,
143
- "Table Name": tName,
144
- "Format": tFormat,
145
- "Type": tType,
146
- "Location": tLocation,
147
- "Files": num_files,
148
- "Row Groups": num_rowgroups,
149
- "Table Size": size_in_bytes,
150
- }
151
-
152
- df = pd.concat(
153
- [df, pd.DataFrame(new_data, index=[0])], ignore_index=True
154
- )
155
- df[intColumns] = df[intColumns].astype(int)
156
-
157
- df["SKU"] = guardrail["Fabric SKUs"].iloc[0]
158
- df["Parquet File Guardrail"] = guardrail["Parquet files per table"].iloc[0]
159
- df["Row Group Guardrail"] = guardrail["Row groups per table"].iloc[0]
160
- df["Row Count Guardrail"] = (
161
- guardrail["Rows per table (millions)"].iloc[0] * 1000000
162
- )
163
-
164
- df["Parquet File Guardrail Hit"] = (
165
- df["Files"] > df["Parquet File Guardrail"]
166
- )
167
- df["Row Group Guardrail Hit"] = df["Row Groups"] > df["Row Group Guardrail"]
168
-
144
+ df.at[i, "Files"] = num_files
145
+ df.at[i, "Row Groups"] = num_rowgroups
146
+ df.at[i, "Table Size"] = size_in_bytes
169
147
  if count_rows:
170
- df["Row Count Guardrail Hit"] = (
171
- df["Row Count"] > df["Row Count Guardrail"]
172
- )
148
+ num_rows = spark.table(tName).count()
149
+ df.at[i, "Row Count"] = num_rows
150
+
151
+ if extended:
152
+ intColumns = ["Files", "Row Groups", "Table Size"]
153
+ df[intColumns] = df[intColumns].astype(int)
154
+ df["SKU"] = guardrail["Fabric SKUs"].iloc[0]
155
+ df["Parquet File Guardrail"] = guardrail["Parquet files per table"].iloc[0]
156
+ df["Row Group Guardrail"] = guardrail["Row groups per table"].iloc[0]
157
+ df["Row Count Guardrail"] = (
158
+ guardrail["Rows per table (millions)"].iloc[0] * 1000000
159
+ )
160
+
161
+ df["Parquet File Guardrail Hit"] = df["Files"] > df["Parquet File Guardrail"]
162
+ df["Row Group Guardrail Hit"] = df["Row Groups"] > df["Row Group Guardrail"]
163
+ if count_rows:
164
+ df["Row Count"] = df["Row Count"].astype(int)
165
+ df["Row Count Guardrail Hit"] = df["Row Count"] > df["Row Count Guardrail"]
173
166
 
174
167
  if export:
175
- lakeAttach = lakehouse_attached()
176
- if lakeAttach is False:
168
+ if not lakehouse_attached():
177
169
  raise ValueError(
178
170
  f"{icons.red_dot} In order to save the report.json file, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook."
179
171
  )
@@ -37,8 +37,9 @@ def optimize_lakehouse_tables(
37
37
 
38
38
  Parameters
39
39
  ----------
40
- tables : str | List[str] | None
41
- The table(s) to optimize. If no tables are specified, all tables in the lakehouse will be optimized.
40
+ tables : str | List[str], default=None
41
+ The table(s) to optimize.
42
+ Defaults to None which resovles to optimizing all tables within the lakehouse.
42
43
  lakehouse : str, default=None
43
44
  The Fabric lakehouse.
44
45
  Defaults to None which resolves to the lakehouse attached to the notebook.
@@ -68,18 +69,74 @@ def optimize_lakehouse_tables(
68
69
  else:
69
70
  tables_filt = lakeTablesDelta.copy()
70
71
 
71
- tableCount = len(tables_filt)
72
-
73
72
  spark = SparkSession.builder.getOrCreate()
74
73
 
75
- i = 1
76
74
  for _, r in (bar := tqdm(tables_filt.iterrows())):
77
75
  tableName = r["Table Name"]
78
76
  tablePath = r["Location"]
79
77
  bar.set_description(f"Optimizing the '{tableName}' table...")
80
78
  deltaTable = DeltaTable.forPath(spark, tablePath)
81
79
  deltaTable.optimize().executeCompaction()
82
- print(
83
- f"{icons.green_dot} The '{tableName}' table has been optimized. ({str(i)}/{str(tableCount)})"
84
- )
85
- i += 1
80
+
81
+
82
+ @log
83
+ def vacuum_lakehouse_tables(
84
+ tables: Optional[Union[str, List[str]]] = None,
85
+ lakehouse: Optional[str] = None,
86
+ workspace: Optional[str] = None,
87
+ retain_n_hours: Optional[int] = None,
88
+ ):
89
+ """
90
+ Runs the `VACUUM <https://docs.delta.io/latest/delta-utility.html#remove-files-no-longer-referenced-by-a-delta-table>`_ function over the specified lakehouse tables.
91
+
92
+ Parameters
93
+ ----------
94
+ tables : str | List[str] | None
95
+ The table(s) to vacuum. If no tables are specified, all tables in the lakehouse will be optimized.
96
+ lakehouse : str, default=None
97
+ The Fabric lakehouse.
98
+ Defaults to None which resolves to the lakehouse attached to the notebook.
99
+ workspace : str, default=None
100
+ The Fabric workspace used by the lakehouse.
101
+ Defaults to None which resolves to the workspace of the attached lakehouse
102
+ or if no lakehouse attached, resolves to the workspace of the notebook.
103
+ retain_n_hours : int, default=None
104
+ The number of hours to retain historical versions of Delta table files.
105
+ Files older than this retention period will be deleted during the vacuum operation.
106
+ If not specified, the default retention period configured for the Delta table will be used.
107
+ The default retention period is 168 hours (7 days) unless manually configured via table properties.
108
+ """
109
+
110
+ from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables
111
+ from delta import DeltaTable
112
+
113
+ workspace = fabric.resolve_workspace_name(workspace)
114
+
115
+ if lakehouse is None:
116
+ lakehouse_id = fabric.get_lakehouse_id()
117
+ lakehouse = resolve_lakehouse_name(lakehouse_id, workspace)
118
+
119
+ lakeTables = get_lakehouse_tables(lakehouse=lakehouse, workspace=workspace)
120
+ lakeTablesDelta = lakeTables[lakeTables["Format"] == "delta"]
121
+
122
+ if isinstance(tables, str):
123
+ tables = [tables]
124
+
125
+ if tables is not None:
126
+ tables_filt = lakeTablesDelta[lakeTablesDelta["Table Name"].isin(tables)]
127
+ else:
128
+ tables_filt = lakeTablesDelta.copy()
129
+
130
+ spark = SparkSession.builder.getOrCreate()
131
+ spark.conf.set("spark.databricks.delta.vacuum.parallelDelete.enabled", "true")
132
+
133
+ for _, r in (bar := tqdm(tables_filt.iterrows())):
134
+ tableName = r["Table Name"]
135
+ tablePath = r["Location"]
136
+ bar.set_description(f"Vacuuming the '{tableName}' table...")
137
+ deltaTable = DeltaTable.forPath(spark, tablePath)
138
+
139
+ if retain_n_hours is None:
140
+ deltaTable.vacuum()
141
+ else:
142
+ deltaTable.vacuum(retain_n_hours)
@@ -53,7 +53,7 @@ def create_shortcut_onelake(
53
53
  shortcut_name = table_name
54
54
 
55
55
  client = fabric.FabricRestClient()
56
- tablePath = "Tables/" + table_name
56
+ tablePath = f"Tables/{table_name}"
57
57
 
58
58
  request_body = {
59
59
  "path": "Tables",
@@ -1,9 +1,8 @@
1
+ import sempy
1
2
  import sempy.fabric as fabric
2
3
  import json
3
4
  import os
4
5
  import shutil
5
- import xml.etree.ElementTree as ET
6
- from sempy_labs._list_functions import list_tables
7
6
  from sempy_labs.lakehouse._lakehouse import lakehouse_attached
8
7
  from sempy._utils._log import log
9
8
  from typing import Optional
@@ -20,6 +19,9 @@ def create_pqt_file(
20
19
  Dynamically generates a `Power Query Template <https://learn.microsoft.com/power-query/power-query-template>`_ file based on the semantic model. The .pqt file is
21
20
  saved within the Files section of your lakehouse.
22
21
 
22
+ Dataflows Gen2 has a `limit of 50 tables <https://learn.microsoft.com/power-query/power-query-online-limits>`_. If there are more than 50 tables, this will save multiple Power Query Template
23
+ files (with each file having a max of 50 tables).
24
+
23
25
  Parameters
24
26
  ----------
25
27
  dataset : str
@@ -32,9 +34,11 @@ def create_pqt_file(
32
34
  The name of the Power Query Template file to be generated.
33
35
  """
34
36
 
35
- lakeAttach = lakehouse_attached()
37
+ sempy.fabric._client._utils._init_analysis_services()
38
+ import Microsoft.AnalysisServices.Tabular as TOM
39
+ from sempy_labs.tom import connect_semantic_model
36
40
 
37
- if lakeAttach is False:
41
+ if not lakehouse_attached():
38
42
  raise ValueError(
39
43
  f"{icons.red_dot} In order to run the 'create_pqt_file' function, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook."
40
44
  )
@@ -45,192 +49,180 @@ def create_pqt_file(
45
49
  subFolderPath = os.path.join(folderPath, "pqtnewfolder")
46
50
  os.makedirs(subFolderPath, exist_ok=True)
47
51
 
48
- dfP = fabric.list_partitions(dataset=dataset, workspace=workspace)
49
- dfT = list_tables(dataset, workspace)
50
- dfE = fabric.list_expressions(dataset=dataset, workspace=workspace)
51
-
52
- # Check if M-partitions are used
53
- if any(dfP["Source Type"] == "M"):
54
-
55
- class QueryMetadata:
56
- def __init__(
57
- self,
58
- QueryName,
59
- QueryGroupId=None,
60
- LastKnownIsParameter=None,
61
- LastKnownResultTypeName=None,
62
- LoadEnabled=True,
63
- IsHidden=False,
64
- ):
65
- self.QueryName = QueryName
66
- self.QueryGroupId = QueryGroupId
67
- self.LastKnownIsParameter = LastKnownIsParameter
68
- self.LastKnownResultTypeName = LastKnownResultTypeName
69
- self.LoadEnabled = LoadEnabled
70
- self.IsHidden = IsHidden
71
-
72
- class RootObject:
73
- def __init__(
74
- self, DocumentLocale, EngineVersion, QueriesMetadata, QueryGroups=None
75
- ):
76
- if QueryGroups is None:
77
- QueryGroups = []
78
- self.DocumentLocale = DocumentLocale
79
- self.EngineVersion = EngineVersion
80
- self.QueriesMetadata = QueriesMetadata
81
- self.QueryGroups = QueryGroups
82
-
83
- # STEP 1: Create MashupDocument.pq
84
- mdfileName = "MashupDocument.pq"
85
- mdFilePath = os.path.join(subFolderPath, mdfileName)
86
- sb = "section Section1;"
87
- for table_name in dfP["Table Name"].unique():
88
- tName = '#"' + table_name + '"'
89
- sourceExpression = dfT.loc[
90
- (dfT["Name"] == table_name), "Source Expression"
91
- ].iloc[0]
92
- refreshPolicy = dfT.loc[(dfT["Name"] == table_name), "Refresh Policy"].iloc[
93
- 0
94
- ]
95
- sourceType = dfP.loc[(dfP["Table Name"] == table_name), "Source Type"].iloc[
96
- 0
97
- ]
98
-
99
- if sourceType == "M" or refreshPolicy:
100
- sb = sb + "\n" + "shared " + tName + " = "
101
-
102
- partitions_in_table = dfP.loc[
103
- dfP["Table Name"] == table_name, "Partition Name"
104
- ].unique()
105
-
106
- i = 1
107
- for partition_name in partitions_in_table:
108
- pSourceType = dfP.loc[
109
- (dfP["Table Name"] == table_name)
110
- & (dfP["Partition Name"] == partition_name),
111
- "Source Type",
112
- ].iloc[0]
113
- pQuery = dfP.loc[
114
- (dfP["Table Name"] == table_name)
115
- & (dfP["Partition Name"] == partition_name),
116
- "Query",
117
- ].iloc[0]
118
-
119
- if pQuery is not None:
52
+ with connect_semantic_model(
53
+ dataset=dataset, workspace=workspace, readonly=True
54
+ ) as tom:
55
+ if not any(
56
+ p.SourceType == TOM.PartitionSourceType.M for p in tom.all_partitions()
57
+ ) and not any(t.RefreshPolicy for t in tom.model.Tables):
58
+ print(
59
+ f"{icons.info} The '{dataset}' semantic model within the '{workspace}' workspace has no Power Query logic."
60
+ )
61
+ return
62
+
63
+ table_map = {}
64
+ expr_map = {}
65
+
66
+ for t in tom.model.Tables:
67
+ table_name = t.Name
68
+ for char in icons.special_characters:
69
+ table_name = table_name.replace(char, "")
70
+ if t.RefreshPolicy:
71
+ table_map[table_name] = t.RefreshPolicy.SourceExpression
72
+ elif any(p.SourceType == TOM.PartitionSourceType.M for p in t.Partitions):
73
+ part_name = next(
74
+ p.Name
75
+ for p in t.Partitions
76
+ if p.SourceType == TOM.PartitionSourceType.M
77
+ )
78
+ expr = t.Partitions[part_name].Source.Expression
79
+ table_map[table_name] = expr
80
+
81
+ for e in tom.model.Expressions:
82
+ expr_map[e.Name] = [str(e.Kind), e.Expression]
83
+
84
+ # Dataflows Gen2 max table limit is 50.
85
+ max_length = 50
86
+ table_chunks = [
87
+ dict(list(table_map.items())[i : i + max_length])
88
+ for i in range(0, len(table_map), max_length)
89
+ ]
90
+
91
+ def create_pqt(table_map: dict, expr_map: dict, file_name: str):
92
+
93
+ class QueryMetadata:
94
+ def __init__(
95
+ self,
96
+ QueryName,
97
+ QueryGroupId=None,
98
+ LastKnownIsParameter=None,
99
+ LastKnownResultTypeName=None,
100
+ LoadEnabled=True,
101
+ IsHidden=False,
102
+ ):
103
+ self.QueryName = QueryName
104
+ self.QueryGroupId = QueryGroupId
105
+ self.LastKnownIsParameter = LastKnownIsParameter
106
+ self.LastKnownResultTypeName = LastKnownResultTypeName
107
+ self.LoadEnabled = LoadEnabled
108
+ self.IsHidden = IsHidden
109
+
110
+ class RootObject:
111
+ def __init__(
112
+ self,
113
+ DocumentLocale,
114
+ EngineVersion,
115
+ QueriesMetadata,
116
+ QueryGroups=None,
117
+ ):
118
+ if QueryGroups is None:
119
+ QueryGroups = []
120
+ self.DocumentLocale = DocumentLocale
121
+ self.EngineVersion = EngineVersion
122
+ self.QueriesMetadata = QueriesMetadata
123
+ self.QueryGroups = QueryGroups
124
+
125
+ # STEP 1: Create MashupDocument.pq
126
+ mdfileName = "MashupDocument.pq"
127
+ mdFilePath = os.path.join(subFolderPath, mdfileName)
128
+ sb = "section Section1;"
129
+ for t_name, query in table_map.items():
130
+ sb = f'{sb}\nshared #"{t_name}" = '
131
+ if query is not None:
120
132
  pQueryNoSpaces = (
121
- pQuery.replace(" ", "")
133
+ query.replace(" ", "")
122
134
  .replace("\n", "")
123
135
  .replace("\t", "")
124
136
  .replace("\r", "")
125
137
  )
126
138
  if pQueryNoSpaces.startswith('letSource=""'):
127
- pQuery = 'let\n\tSource = ""\nin\n\tSource'
128
-
129
- if pSourceType == "M" and i == 1:
130
- sb = sb + pQuery + ";"
131
- elif refreshPolicy and i == 1:
132
- sb = sb + sourceExpression + ";"
133
- i += 1
134
-
135
- for index, row in dfE.iterrows():
136
- expr = row["Expression"]
137
- eName = row["Name"]
138
- eName = '#"' + eName + '"'
139
- sb = sb + "\n" + "shared " + eName + " = " + expr + ";"
140
-
141
- with open(mdFilePath, "w") as file:
142
- file.write(sb)
143
-
144
- # STEP 2: Create the MashupMetadata.json file
145
- mmfileName = "MashupMetadata.json"
146
- mmFilePath = os.path.join(subFolderPath, mmfileName)
147
- queryMetadata = []
148
-
149
- for tName in dfP["Table Name"].unique():
150
- sourceType = dfP.loc[(dfP["Table Name"] == tName), "Source Type"].iloc[0]
151
- refreshPolicy = dfT.loc[(dfT["Name"] == tName), "Refresh Policy"].iloc[0]
152
- if sourceType == "M" or refreshPolicy:
153
- queryMetadata.append(
154
- QueryMetadata(tName, None, None, None, True, False)
155
- )
156
-
157
- for i, r in dfE.iterrows():
158
- eName = r["Name"]
159
- eKind = r["Kind"]
160
- if eKind == "M":
161
- queryMetadata.append(
162
- QueryMetadata(eName, None, None, None, True, False)
163
- )
164
- else:
165
- queryMetadata.append(
166
- QueryMetadata(eName, None, None, None, False, False)
167
- )
168
-
169
- rootObject = RootObject("en-US", "2.126.453.0", queryMetadata)
170
-
171
- def obj_to_dict(obj):
172
- if isinstance(obj, list):
173
- return [obj_to_dict(e) for e in obj]
174
- elif hasattr(obj, "__dict__"):
175
- return {k: obj_to_dict(v) for k, v in obj.__dict__.items()}
176
- else:
177
- return obj
178
-
179
- jsonContent = json.dumps(obj_to_dict(rootObject), indent=4)
180
-
181
- with open(mmFilePath, "w") as json_file:
182
- json_file.write(jsonContent)
183
-
184
- # STEP 3: Create Metadata.json file
185
- mFileName = "Metadata.json"
186
- mFilePath = os.path.join(subFolderPath, mFileName)
187
- metaData = {"Name": "fileName", "Description": "", "Version": "1.0.0.0"}
188
- jsonContent = json.dumps(metaData, indent=4)
189
-
190
- with open(mFilePath, "w") as json_file:
191
- json_file.write(jsonContent)
192
-
193
- # STEP 4: Create [Content_Types].xml file:
194
- ns = "http://schemas.openxmlformats.org/package/2006/content-types"
195
- ET.register_namespace("", ns)
196
- types = ET.Element("{%s}Types" % ns)
197
- # default1 = ET.SubElement(
198
- # types,
199
- # "{%s}Default" % ns,
200
- # {"Extension": "json", "ContentType": "application/json"},
201
- # )
202
- # default2 = ET.SubElement(
203
- # types,
204
- # "{%s}Default" % ns,
205
- # {"Extension": "pq", "ContentType": "application/x-ms-m"},
206
- # )
207
- xmlDocument = ET.ElementTree(types)
208
- xmlFileName = "[Content_Types].xml"
209
- xmlFilePath = os.path.join(subFolderPath, xmlFileName)
210
- xmlDocument.write(
211
- xmlFilePath, xml_declaration=True, encoding="utf-8", method="xml"
212
- )
139
+ query = 'let\n\tSource = ""\nin\n\tSource'
140
+ sb = f"{sb}{query};"
213
141
 
214
- # STEP 5: Zip up the 4 files
215
- zipFileName = file_name + ".zip"
216
- zipFilePath = os.path.join(folderPath, zipFileName)
217
- shutil.make_archive(zipFilePath[:-4], "zip", subFolderPath)
142
+ for e_name, kind_expr in expr_map.items():
143
+ expr = kind_expr[1]
144
+ sb = f'{sb}\nshared #"{e_name}" = {expr};'
218
145
 
219
- # STEP 6: Convert the zip file back into a .pqt file
220
- newExt = ".pqt"
221
- directory = os.path.dirname(zipFilePath)
222
- fileNameWithoutExtension = os.path.splitext(os.path.basename(zipFilePath))[0]
223
- newFilePath = os.path.join(directory, fileNameWithoutExtension + newExt)
224
- shutil.move(zipFilePath, newFilePath)
146
+ with open(mdFilePath, "w") as file:
147
+ file.write(sb)
225
148
 
226
- # STEP 7: Delete subFolder directory which is no longer needed
227
- shutil.rmtree(subFolderPath, ignore_errors=True)
149
+ # STEP 2: Create the MashupMetadata.json file
150
+ mmfileName = "MashupMetadata.json"
151
+ mmFilePath = os.path.join(subFolderPath, mmfileName)
152
+ queryMetadata = []
228
153
 
229
- print(
230
- f"{icons.green_dot} '{file_name}.pqt' has been created based on the '{dataset}' semantic model in the '{workspace}' workspace within the Files section of your lakehouse."
231
- )
154
+ for t_name, query in table_map.items():
155
+ queryMetadata.append(
156
+ QueryMetadata(t_name, None, None, None, True, False)
157
+ )
158
+ for e_name, kind_expr in expr_map.items():
159
+ e_kind = kind_expr[0]
160
+ if e_kind == "M":
161
+ queryMetadata.append(
162
+ QueryMetadata(e_name, None, None, None, True, False)
163
+ )
164
+ else:
165
+ queryMetadata.append(
166
+ QueryMetadata(e_name, None, None, None, False, False)
167
+ )
232
168
 
233
- else:
234
- print(
235
- f"{icons.yellow_dot} The '{dataset}' semantic model in the '{workspace}' workspace does not use Power Query so a Power Query Template file cannot be generated."
236
- )
169
+ rootObject = RootObject(
170
+ "en-US", "2.132.328.0", queryMetadata
171
+ ) # "2.126.453.0"
172
+
173
+ def obj_to_dict(obj):
174
+ if isinstance(obj, list):
175
+ return [obj_to_dict(e) for e in obj]
176
+ elif hasattr(obj, "__dict__"):
177
+ return {k: obj_to_dict(v) for k, v in obj.__dict__.items()}
178
+ else:
179
+ return obj
180
+
181
+ jsonContent = json.dumps(obj_to_dict(rootObject), indent=4)
182
+
183
+ with open(mmFilePath, "w") as json_file:
184
+ json_file.write(jsonContent)
185
+
186
+ # STEP 3: Create Metadata.json file
187
+ mFileName = "Metadata.json"
188
+ mFilePath = os.path.join(subFolderPath, mFileName)
189
+ metaData = {"Name": f"{file_name}", "Description": "", "Version": "1.0.0.0"}
190
+ jsonContent = json.dumps(metaData, indent=4)
191
+
192
+ with open(mFilePath, "w") as json_file:
193
+ json_file.write(jsonContent)
194
+
195
+ # STEP 4: Create [Content_Types].xml file:
196
+ xml_content = """<?xml version="1.0" encoding="utf-8"?><Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types"><Default Extension="json" ContentType="application/json" /><Default Extension="pq" ContentType="application/x-ms-m" /></Types>"""
197
+ xmlFileName = "[Content_Types].xml"
198
+ xmlFilePath = os.path.join(subFolderPath, xmlFileName)
199
+ with open(xmlFilePath, "w", encoding="utf-8") as file:
200
+ file.write(xml_content)
201
+
202
+ # STEP 5: Zip up the 4 files
203
+ zipFileName = f"{file_name}.zip"
204
+ zipFilePath = os.path.join(folderPath, zipFileName)
205
+ shutil.make_archive(zipFilePath[:-4], "zip", subFolderPath)
206
+
207
+ # STEP 6: Convert the zip file back into a .pqt file
208
+ newExt = ".pqt"
209
+ directory = os.path.dirname(zipFilePath)
210
+ fileNameWithoutExtension = os.path.splitext(os.path.basename(zipFilePath))[
211
+ 0
212
+ ]
213
+ newFilePath = os.path.join(directory, fileNameWithoutExtension + newExt)
214
+ shutil.move(zipFilePath, newFilePath)
215
+
216
+ # STEP 7: Delete subFolder directory which is no longer needed
217
+ shutil.rmtree(subFolderPath, ignore_errors=True)
218
+
219
+ print(
220
+ f"{icons.green_dot} '{file_name}.pqt' has been created based on the '{dataset}' semantic model in the '{workspace}' workspace within the Files section of your lakehouse."
221
+ )
222
+
223
+ a = 0
224
+ for t_map in table_chunks:
225
+ if a > 0:
226
+ file_name = f"{file_name}_{a}"
227
+ a += 1
228
+ create_pqt(t_map, expr_map, file_name=file_name)