semantic-link-labs 0.12.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (243) hide show
  1. semantic_link_labs-0.12.8.dist-info/METADATA +354 -0
  2. semantic_link_labs-0.12.8.dist-info/RECORD +243 -0
  3. semantic_link_labs-0.12.8.dist-info/WHEEL +5 -0
  4. semantic_link_labs-0.12.8.dist-info/licenses/LICENSE +21 -0
  5. semantic_link_labs-0.12.8.dist-info/top_level.txt +1 -0
  6. sempy_labs/__init__.py +606 -0
  7. sempy_labs/_a_lib_info.py +2 -0
  8. sempy_labs/_ai.py +437 -0
  9. sempy_labs/_authentication.py +264 -0
  10. sempy_labs/_bpa_translation/_model/_translations_am-ET.po +869 -0
  11. sempy_labs/_bpa_translation/_model/_translations_ar-AE.po +908 -0
  12. sempy_labs/_bpa_translation/_model/_translations_bg-BG.po +968 -0
  13. sempy_labs/_bpa_translation/_model/_translations_ca-ES.po +963 -0
  14. sempy_labs/_bpa_translation/_model/_translations_cs-CZ.po +943 -0
  15. sempy_labs/_bpa_translation/_model/_translations_da-DK.po +945 -0
  16. sempy_labs/_bpa_translation/_model/_translations_de-DE.po +988 -0
  17. sempy_labs/_bpa_translation/_model/_translations_el-GR.po +993 -0
  18. sempy_labs/_bpa_translation/_model/_translations_es-ES.po +971 -0
  19. sempy_labs/_bpa_translation/_model/_translations_fa-IR.po +933 -0
  20. sempy_labs/_bpa_translation/_model/_translations_fi-FI.po +942 -0
  21. sempy_labs/_bpa_translation/_model/_translations_fr-FR.po +994 -0
  22. sempy_labs/_bpa_translation/_model/_translations_ga-IE.po +967 -0
  23. sempy_labs/_bpa_translation/_model/_translations_he-IL.po +902 -0
  24. sempy_labs/_bpa_translation/_model/_translations_hi-IN.po +944 -0
  25. sempy_labs/_bpa_translation/_model/_translations_hu-HU.po +963 -0
  26. sempy_labs/_bpa_translation/_model/_translations_id-ID.po +946 -0
  27. sempy_labs/_bpa_translation/_model/_translations_is-IS.po +939 -0
  28. sempy_labs/_bpa_translation/_model/_translations_it-IT.po +986 -0
  29. sempy_labs/_bpa_translation/_model/_translations_ja-JP.po +846 -0
  30. sempy_labs/_bpa_translation/_model/_translations_ko-KR.po +839 -0
  31. sempy_labs/_bpa_translation/_model/_translations_mt-MT.po +967 -0
  32. sempy_labs/_bpa_translation/_model/_translations_nl-NL.po +978 -0
  33. sempy_labs/_bpa_translation/_model/_translations_pl-PL.po +962 -0
  34. sempy_labs/_bpa_translation/_model/_translations_pt-BR.po +962 -0
  35. sempy_labs/_bpa_translation/_model/_translations_pt-PT.po +957 -0
  36. sempy_labs/_bpa_translation/_model/_translations_ro-RO.po +968 -0
  37. sempy_labs/_bpa_translation/_model/_translations_ru-RU.po +964 -0
  38. sempy_labs/_bpa_translation/_model/_translations_sk-SK.po +952 -0
  39. sempy_labs/_bpa_translation/_model/_translations_sl-SL.po +950 -0
  40. sempy_labs/_bpa_translation/_model/_translations_sv-SE.po +942 -0
  41. sempy_labs/_bpa_translation/_model/_translations_ta-IN.po +976 -0
  42. sempy_labs/_bpa_translation/_model/_translations_te-IN.po +947 -0
  43. sempy_labs/_bpa_translation/_model/_translations_th-TH.po +924 -0
  44. sempy_labs/_bpa_translation/_model/_translations_tr-TR.po +953 -0
  45. sempy_labs/_bpa_translation/_model/_translations_uk-UA.po +961 -0
  46. sempy_labs/_bpa_translation/_model/_translations_zh-CN.po +804 -0
  47. sempy_labs/_bpa_translation/_model/_translations_zu-ZA.po +969 -0
  48. sempy_labs/_capacities.py +1198 -0
  49. sempy_labs/_capacity_migration.py +660 -0
  50. sempy_labs/_clear_cache.py +351 -0
  51. sempy_labs/_connections.py +610 -0
  52. sempy_labs/_dashboards.py +69 -0
  53. sempy_labs/_data_access_security.py +98 -0
  54. sempy_labs/_data_pipelines.py +162 -0
  55. sempy_labs/_dataflows.py +668 -0
  56. sempy_labs/_dax.py +501 -0
  57. sempy_labs/_daxformatter.py +80 -0
  58. sempy_labs/_delta_analyzer.py +467 -0
  59. sempy_labs/_delta_analyzer_history.py +301 -0
  60. sempy_labs/_dictionary_diffs.py +221 -0
  61. sempy_labs/_documentation.py +147 -0
  62. sempy_labs/_domains.py +51 -0
  63. sempy_labs/_eventhouses.py +182 -0
  64. sempy_labs/_external_data_shares.py +230 -0
  65. sempy_labs/_gateways.py +521 -0
  66. sempy_labs/_generate_semantic_model.py +521 -0
  67. sempy_labs/_get_connection_string.py +84 -0
  68. sempy_labs/_git.py +543 -0
  69. sempy_labs/_graphQL.py +90 -0
  70. sempy_labs/_helper_functions.py +2833 -0
  71. sempy_labs/_icons.py +149 -0
  72. sempy_labs/_job_scheduler.py +609 -0
  73. sempy_labs/_kql_databases.py +149 -0
  74. sempy_labs/_kql_querysets.py +124 -0
  75. sempy_labs/_kusto.py +137 -0
  76. sempy_labs/_labels.py +124 -0
  77. sempy_labs/_list_functions.py +1720 -0
  78. sempy_labs/_managed_private_endpoints.py +253 -0
  79. sempy_labs/_mirrored_databases.py +416 -0
  80. sempy_labs/_mirrored_warehouses.py +60 -0
  81. sempy_labs/_ml_experiments.py +113 -0
  82. sempy_labs/_model_auto_build.py +140 -0
  83. sempy_labs/_model_bpa.py +557 -0
  84. sempy_labs/_model_bpa_bulk.py +378 -0
  85. sempy_labs/_model_bpa_rules.py +859 -0
  86. sempy_labs/_model_dependencies.py +343 -0
  87. sempy_labs/_mounted_data_factories.py +123 -0
  88. sempy_labs/_notebooks.py +441 -0
  89. sempy_labs/_one_lake_integration.py +151 -0
  90. sempy_labs/_onelake.py +131 -0
  91. sempy_labs/_query_scale_out.py +433 -0
  92. sempy_labs/_refresh_semantic_model.py +435 -0
  93. sempy_labs/_semantic_models.py +468 -0
  94. sempy_labs/_spark.py +455 -0
  95. sempy_labs/_sql.py +241 -0
  96. sempy_labs/_sql_audit_settings.py +207 -0
  97. sempy_labs/_sql_endpoints.py +214 -0
  98. sempy_labs/_tags.py +201 -0
  99. sempy_labs/_translations.py +43 -0
  100. sempy_labs/_user_delegation_key.py +44 -0
  101. sempy_labs/_utils.py +79 -0
  102. sempy_labs/_vertipaq.py +1021 -0
  103. sempy_labs/_vpax.py +388 -0
  104. sempy_labs/_warehouses.py +234 -0
  105. sempy_labs/_workloads.py +140 -0
  106. sempy_labs/_workspace_identity.py +72 -0
  107. sempy_labs/_workspaces.py +595 -0
  108. sempy_labs/admin/__init__.py +170 -0
  109. sempy_labs/admin/_activities.py +167 -0
  110. sempy_labs/admin/_apps.py +145 -0
  111. sempy_labs/admin/_artifacts.py +65 -0
  112. sempy_labs/admin/_basic_functions.py +463 -0
  113. sempy_labs/admin/_capacities.py +508 -0
  114. sempy_labs/admin/_dataflows.py +45 -0
  115. sempy_labs/admin/_datasets.py +186 -0
  116. sempy_labs/admin/_domains.py +522 -0
  117. sempy_labs/admin/_external_data_share.py +100 -0
  118. sempy_labs/admin/_git.py +72 -0
  119. sempy_labs/admin/_items.py +265 -0
  120. sempy_labs/admin/_labels.py +211 -0
  121. sempy_labs/admin/_reports.py +241 -0
  122. sempy_labs/admin/_scanner.py +118 -0
  123. sempy_labs/admin/_shared.py +82 -0
  124. sempy_labs/admin/_sharing_links.py +110 -0
  125. sempy_labs/admin/_tags.py +131 -0
  126. sempy_labs/admin/_tenant.py +503 -0
  127. sempy_labs/admin/_tenant_keys.py +89 -0
  128. sempy_labs/admin/_users.py +140 -0
  129. sempy_labs/admin/_workspaces.py +236 -0
  130. sempy_labs/deployment_pipeline/__init__.py +23 -0
  131. sempy_labs/deployment_pipeline/_items.py +580 -0
  132. sempy_labs/directlake/__init__.py +57 -0
  133. sempy_labs/directlake/_autosync.py +58 -0
  134. sempy_labs/directlake/_directlake_schema_compare.py +120 -0
  135. sempy_labs/directlake/_directlake_schema_sync.py +161 -0
  136. sempy_labs/directlake/_dl_helper.py +274 -0
  137. sempy_labs/directlake/_generate_shared_expression.py +94 -0
  138. sempy_labs/directlake/_get_directlake_lakehouse.py +62 -0
  139. sempy_labs/directlake/_get_shared_expression.py +34 -0
  140. sempy_labs/directlake/_guardrails.py +96 -0
  141. sempy_labs/directlake/_list_directlake_model_calc_tables.py +70 -0
  142. sempy_labs/directlake/_show_unsupported_directlake_objects.py +90 -0
  143. sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py +239 -0
  144. sempy_labs/directlake/_update_directlake_partition_entity.py +259 -0
  145. sempy_labs/directlake/_warm_cache.py +236 -0
  146. sempy_labs/dotnet_lib/dotnet.runtime.config.json +10 -0
  147. sempy_labs/environment/__init__.py +23 -0
  148. sempy_labs/environment/_items.py +212 -0
  149. sempy_labs/environment/_pubstage.py +223 -0
  150. sempy_labs/eventstream/__init__.py +37 -0
  151. sempy_labs/eventstream/_items.py +263 -0
  152. sempy_labs/eventstream/_topology.py +652 -0
  153. sempy_labs/graph/__init__.py +59 -0
  154. sempy_labs/graph/_groups.py +651 -0
  155. sempy_labs/graph/_sensitivity_labels.py +120 -0
  156. sempy_labs/graph/_teams.py +125 -0
  157. sempy_labs/graph/_user_licenses.py +96 -0
  158. sempy_labs/graph/_users.py +516 -0
  159. sempy_labs/graph_model/__init__.py +15 -0
  160. sempy_labs/graph_model/_background_jobs.py +63 -0
  161. sempy_labs/graph_model/_items.py +149 -0
  162. sempy_labs/lakehouse/__init__.py +67 -0
  163. sempy_labs/lakehouse/_blobs.py +247 -0
  164. sempy_labs/lakehouse/_get_lakehouse_columns.py +102 -0
  165. sempy_labs/lakehouse/_get_lakehouse_tables.py +274 -0
  166. sempy_labs/lakehouse/_helper.py +250 -0
  167. sempy_labs/lakehouse/_lakehouse.py +351 -0
  168. sempy_labs/lakehouse/_livy_sessions.py +143 -0
  169. sempy_labs/lakehouse/_materialized_lake_views.py +157 -0
  170. sempy_labs/lakehouse/_partitioning.py +165 -0
  171. sempy_labs/lakehouse/_schemas.py +217 -0
  172. sempy_labs/lakehouse/_shortcuts.py +440 -0
  173. sempy_labs/migration/__init__.py +35 -0
  174. sempy_labs/migration/_create_pqt_file.py +238 -0
  175. sempy_labs/migration/_direct_lake_to_import.py +105 -0
  176. sempy_labs/migration/_migrate_calctables_to_lakehouse.py +398 -0
  177. sempy_labs/migration/_migrate_calctables_to_semantic_model.py +148 -0
  178. sempy_labs/migration/_migrate_model_objects_to_semantic_model.py +533 -0
  179. sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py +172 -0
  180. sempy_labs/migration/_migration_validation.py +71 -0
  181. sempy_labs/migration/_refresh_calc_tables.py +131 -0
  182. sempy_labs/mirrored_azure_databricks_catalog/__init__.py +15 -0
  183. sempy_labs/mirrored_azure_databricks_catalog/_discover.py +213 -0
  184. sempy_labs/mirrored_azure_databricks_catalog/_refresh_catalog_metadata.py +45 -0
  185. sempy_labs/ml_model/__init__.py +23 -0
  186. sempy_labs/ml_model/_functions.py +427 -0
  187. sempy_labs/report/_BPAReportTemplate.json +232 -0
  188. sempy_labs/report/__init__.py +55 -0
  189. sempy_labs/report/_bpareporttemplate/.pbi/localSettings.json +9 -0
  190. sempy_labs/report/_bpareporttemplate/.platform +11 -0
  191. sempy_labs/report/_bpareporttemplate/StaticResources/SharedResources/BaseThemes/CY24SU06.json +710 -0
  192. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/page.json +11 -0
  193. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/1b08bce3bebabb0a27a8/visual.json +191 -0
  194. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/2f22ddb70c301693c165/visual.json +438 -0
  195. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/3b1182230aa6c600b43a/visual.json +127 -0
  196. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/58577ba6380c69891500/visual.json +576 -0
  197. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/a2a8fa5028b3b776c96c/visual.json +207 -0
  198. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/adfd47ef30652707b987/visual.json +506 -0
  199. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/b6a80ee459e716e170b1/visual.json +127 -0
  200. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/ce3130a721c020cc3d81/visual.json +513 -0
  201. sempy_labs/report/_bpareporttemplate/definition/pages/92735ae19b31712208ad/page.json +8 -0
  202. sempy_labs/report/_bpareporttemplate/definition/pages/92735ae19b31712208ad/visuals/66e60dfb526437cd78d1/visual.json +112 -0
  203. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/page.json +11 -0
  204. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/07deb8bce824e1be37d7/visual.json +513 -0
  205. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/0b1c68838818b32ad03b/visual.json +352 -0
  206. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/0c171de9d2683d10b930/visual.json +37 -0
  207. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/0efa01be0510e40a645e/visual.json +542 -0
  208. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/6bf2f0eb830ab53cc668/visual.json +221 -0
  209. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/88d8141cb8500b60030c/visual.json +127 -0
  210. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/a753273590beed656a03/visual.json +576 -0
  211. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/b8fdc82cddd61ac447bc/visual.json +127 -0
  212. sempy_labs/report/_bpareporttemplate/definition/pages/d37dce724a0ccc30044b/page.json +9 -0
  213. sempy_labs/report/_bpareporttemplate/definition/pages/d37dce724a0ccc30044b/visuals/ce8532a7e25020271077/visual.json +38 -0
  214. sempy_labs/report/_bpareporttemplate/definition/pages/pages.json +10 -0
  215. sempy_labs/report/_bpareporttemplate/definition/report.json +176 -0
  216. sempy_labs/report/_bpareporttemplate/definition/version.json +4 -0
  217. sempy_labs/report/_bpareporttemplate/definition.pbir +14 -0
  218. sempy_labs/report/_download_report.py +76 -0
  219. sempy_labs/report/_export_report.py +257 -0
  220. sempy_labs/report/_generate_report.py +427 -0
  221. sempy_labs/report/_paginated.py +76 -0
  222. sempy_labs/report/_report_bpa.py +354 -0
  223. sempy_labs/report/_report_bpa_rules.py +115 -0
  224. sempy_labs/report/_report_functions.py +581 -0
  225. sempy_labs/report/_report_helper.py +227 -0
  226. sempy_labs/report/_report_list_functions.py +110 -0
  227. sempy_labs/report/_report_rebind.py +149 -0
  228. sempy_labs/report/_reportwrapper.py +3100 -0
  229. sempy_labs/report/_save_report.py +147 -0
  230. sempy_labs/snowflake_database/__init__.py +10 -0
  231. sempy_labs/snowflake_database/_items.py +105 -0
  232. sempy_labs/sql_database/__init__.py +21 -0
  233. sempy_labs/sql_database/_items.py +201 -0
  234. sempy_labs/sql_database/_mirroring.py +79 -0
  235. sempy_labs/theme/__init__.py +12 -0
  236. sempy_labs/theme/_org_themes.py +129 -0
  237. sempy_labs/tom/__init__.py +3 -0
  238. sempy_labs/tom/_model.py +5977 -0
  239. sempy_labs/variable_library/__init__.py +19 -0
  240. sempy_labs/variable_library/_functions.py +403 -0
  241. sempy_labs/warehouse/__init__.py +28 -0
  242. sempy_labs/warehouse/_items.py +234 -0
  243. sempy_labs/warehouse/_restore_points.py +309 -0
@@ -0,0 +1,351 @@
1
+ from tqdm.auto import tqdm
2
+ from typing import List, Optional, Union
3
+ from sempy._utils._log import log
4
+ from uuid import UUID
5
+ from sempy_labs._helper_functions import (
6
+ _base_api,
7
+ resolve_lakehouse_name_and_id,
8
+ resolve_workspace_name_and_id,
9
+ _create_spark_session,
10
+ _pure_python_notebook,
11
+ _create_dataframe,
12
+ _update_dataframe_datatypes,
13
+ resolve_workspace_id,
14
+ )
15
+ import sempy_labs._icons as icons
16
+ import re
17
+ import pandas as pd
18
+
19
+
20
+ @log
21
+ def list_lakehouses(workspace: Optional[str | UUID] = None) -> pd.DataFrame:
22
+ """
23
+ Shows the lakehouses within a workspace.
24
+
25
+ Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
26
+
27
+ Parameters
28
+ ----------
29
+ workspace : str | uuid.UUID, default=None
30
+ The Fabric workspace name or ID.
31
+ Defaults to None which resolves to the workspace of the attached lakehouse
32
+ or if no lakehouse attached, resolves to the workspace of the notebook.
33
+
34
+ Returns
35
+ -------
36
+ pandas.DataFrame
37
+ A pandas dataframe showing the lakehouses within a workspace.
38
+ """
39
+
40
+ columns = {
41
+ "Lakehouse Name": "string",
42
+ "Lakehouse ID": "string",
43
+ "Description": "string",
44
+ "OneLake Tables Path": "string",
45
+ "OneLake Files Path": "string",
46
+ "SQL Endpoint Connection String": "string",
47
+ "SQL Endpoint ID": "string",
48
+ "SQL Endpoint Provisioning Status": "string",
49
+ "Schema Enabled": "bool",
50
+ "Default Schema": "string",
51
+ "Sensitivity Label Id": "string",
52
+ }
53
+ df = _create_dataframe(columns=columns)
54
+
55
+ workspace_id = resolve_workspace_id(workspace)
56
+
57
+ responses = _base_api(
58
+ request=f"/v1/workspaces/{workspace_id}/lakehouses",
59
+ uses_pagination=True,
60
+ client="fabric_sp",
61
+ )
62
+
63
+ rows = []
64
+ for r in responses:
65
+ for v in r.get("value", []):
66
+ prop = v.get("properties", {})
67
+ sqlEPProp = prop.get("sqlEndpointProperties", {})
68
+ default_schema = prop.get("defaultSchema", None)
69
+
70
+ rows.append(
71
+ {
72
+ "Lakehouse Name": v.get("displayName"),
73
+ "Lakehouse ID": v.get("id"),
74
+ "Description": v.get("description"),
75
+ "OneLake Tables Path": prop.get("oneLakeTablesPath"),
76
+ "OneLake Files Path": prop.get("oneLakeFilesPath"),
77
+ "SQL Endpoint Connection String": sqlEPProp.get("connectionString"),
78
+ "SQL Endpoint ID": sqlEPProp.get("id"),
79
+ "SQL Endpoint Provisioning Status": sqlEPProp.get(
80
+ "provisioningStatus"
81
+ ),
82
+ "Schema Enabled": True if default_schema else False,
83
+ "Default Schema": default_schema,
84
+ "Sensitivity Label Id": v.get("sensitivityLabel", {}).get(
85
+ "sensitivityLabelId"
86
+ ),
87
+ }
88
+ )
89
+
90
+ if rows:
91
+ df = pd.DataFrame(rows, columns=list(columns.keys()))
92
+ _update_dataframe_datatypes(dataframe=df, column_map=columns)
93
+
94
+ return df
95
+
96
+
97
+ @log
98
+ def lakehouse_attached() -> bool:
99
+ """
100
+ Identifies if a lakehouse is attached to the notebook.
101
+
102
+ Returns
103
+ -------
104
+ bool
105
+ Returns True if a lakehouse is attached to the notebook.
106
+ """
107
+
108
+ from sempy_labs._helper_functions import _get_fabric_context_setting
109
+
110
+ lake_id = _get_fabric_context_setting(name="trident.lakehouse.id")
111
+
112
+ if len(lake_id) > 0:
113
+ return True
114
+ else:
115
+ return False
116
+
117
+
118
+ @log
119
+ def _optimize_table(path):
120
+
121
+ if _pure_python_notebook():
122
+ from deltalake import DeltaTable
123
+
124
+ DeltaTable(path).optimize.compact()
125
+ else:
126
+ from delta import DeltaTable
127
+
128
+ spark = _create_spark_session()
129
+ DeltaTable.forPath(spark, path).optimize().executeCompaction()
130
+
131
+
132
+ @log
133
+ def _vacuum_table(path, retain_n_hours):
134
+
135
+ if _pure_python_notebook():
136
+ from deltalake import DeltaTable
137
+
138
+ DeltaTable(path).vacuum(retention_hours=retain_n_hours)
139
+ else:
140
+ from delta import DeltaTable
141
+
142
+ spark = _create_spark_session()
143
+ spark.conf.set("spark.databricks.delta.vacuum.parallelDelete.enabled", "true")
144
+ DeltaTable.forPath(spark, path).vacuum(retain_n_hours)
145
+
146
+
147
+ @log
148
+ def optimize_lakehouse_tables(
149
+ tables: Optional[Union[str, List[str]]] = None,
150
+ lakehouse: Optional[str | UUID] = None,
151
+ workspace: Optional[str | UUID] = None,
152
+ ):
153
+ """
154
+ Runs the `OPTIMIZE <https://docs.delta.io/latest/optimizations-oss.html>`_ function over the specified lakehouse tables.
155
+
156
+ Parameters
157
+ ----------
158
+ tables : str | List[str], default=None
159
+ The table(s) to optimize.
160
+ Defaults to None which resovles to optimizing all tables within the lakehouse.
161
+ lakehouse : str | uuid.UUID, default=None
162
+ The Fabric lakehouse name or ID.
163
+ Defaults to None which resolves to the lakehouse attached to the notebook.
164
+ workspace : str | uuid.UUID, default=None
165
+ The Fabric workspace name or ID used by the lakehouse.
166
+ Defaults to None which resolves to the workspace of the attached lakehouse
167
+ or if no lakehouse attached, resolves to the workspace of the notebook.
168
+ """
169
+
170
+ from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables
171
+
172
+ df = get_lakehouse_tables(
173
+ lakehouse=lakehouse, workspace=workspace, exclude_shortcuts=True
174
+ )
175
+ df_delta = df[df["Format"] == "delta"]
176
+
177
+ if isinstance(tables, str):
178
+ tables = [tables]
179
+
180
+ df_tables = df_delta[df_delta["Table Name"].isin(tables)] if tables else df_delta
181
+ df_tables.reset_index(drop=True, inplace=True)
182
+
183
+ total = len(df_tables)
184
+ for idx, r in (bar := tqdm(df_tables.iterrows(), total=total, bar_format="{desc}")):
185
+ table_name = r["Table Name"]
186
+ path = r["Location"]
187
+ bar.set_description(
188
+ f"Optimizing the '{table_name}' table ({idx + 1}/{total})..."
189
+ )
190
+ _optimize_table(path=path)
191
+
192
+
193
+ @log
194
+ def vacuum_lakehouse_tables(
195
+ tables: Optional[Union[str, List[str]]] = None,
196
+ lakehouse: Optional[str | UUID] = None,
197
+ workspace: Optional[str | UUID] = None,
198
+ retain_n_hours: Optional[int] = None,
199
+ ):
200
+ """
201
+ Runs the `VACUUM <https://docs.delta.io/latest/delta-utility.html#remove-files-no-longer-referenced-by-a-delta-table>`_ function over the specified lakehouse tables.
202
+
203
+ Parameters
204
+ ----------
205
+ tables : str | List[str] | None
206
+ The table(s) to vacuum. If no tables are specified, all tables in the lakehouse will be vacuumed.
207
+ lakehouse : str | uuid.UUID, default=None
208
+ The Fabric lakehouse name or ID.
209
+ Defaults to None which resolves to the lakehouse attached to the notebook.
210
+ workspace : str | uuid.UUID, default=None
211
+ The Fabric workspace name or ID used by the lakehouse.
212
+ Defaults to None which resolves to the workspace of the attached lakehouse
213
+ or if no lakehouse attached, resolves to the workspace of the notebook.
214
+ retain_n_hours : int, default=None
215
+ The number of hours to retain historical versions of Delta table files.
216
+ Files older than this retention period will be deleted during the vacuum operation.
217
+ If not specified, the default retention period configured for the Delta table will be used.
218
+ The default retention period is 168 hours (7 days) unless manually configured via table properties.
219
+ """
220
+
221
+ from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables
222
+
223
+ df = get_lakehouse_tables(
224
+ lakehouse=lakehouse, workspace=workspace, exclude_shortcuts=True
225
+ )
226
+ df_delta = df[df["Format"] == "delta"]
227
+
228
+ if isinstance(tables, str):
229
+ tables = [tables]
230
+
231
+ df_tables = df_delta[df_delta["Table Name"].isin(tables)] if tables else df_delta
232
+ df_tables.reset_index(drop=True, inplace=True)
233
+
234
+ total = len(df_tables)
235
+ for idx, r in (bar := tqdm(df_tables.iterrows(), total=total, bar_format="{desc}")):
236
+ table_name = r["Table Name"]
237
+ path = r["Location"]
238
+ bar.set_description(f"Vacuuming the '{table_name}' table ({idx}/{total})...")
239
+ _vacuum_table(path=path, retain_n_hours=retain_n_hours)
240
+
241
+
242
+ @log
243
+ def run_table_maintenance(
244
+ table_name: str,
245
+ optimize: bool = False,
246
+ v_order: bool = False,
247
+ vacuum: bool = False,
248
+ retention_period: Optional[str] = None,
249
+ schema: Optional[str] = None,
250
+ lakehouse: Optional[str | UUID] = None,
251
+ workspace: Optional[str | UUID] = None,
252
+ ) -> pd.DataFrame:
253
+ """
254
+ Runs table maintenance operations on the specified table within the lakehouse.
255
+
256
+ This is a wrapper function for the following API: `Background Jobs - Run On Demand Table Maintenance <https://learn.microsoft.com/rest/api/fabric/lakehouse/background-jobs/run-on-demand-table-maintenance>`_.
257
+
258
+ Parameters
259
+ ----------
260
+ table_name : str
261
+ Name of the delta table on which to run maintenance operations.
262
+ optimize : bool, default=False
263
+ If True, the `OPTIMIZE <https://docs.delta.io/latest/optimizations-oss.html>`_ function will be run on the table.
264
+ v_order : bool, default=False
265
+ If True, v-order will be enabled for the table.
266
+ vacuum : bool, default=False
267
+ If True, the `VACUUM <https://docs.delta.io/latest/delta-utility.html#remove-files-no-longer-referenced-by-a-delta-table>`_ function will be run on the table.
268
+ retention_period : str, default=None
269
+ If specified, the retention period for the vacuum operation. Must be in the 'd:hh:mm:ss' format.
270
+ schema : str, default=None
271
+ The schema of the tables within the lakehouse.
272
+ lakehouse : str | uuid.UUID, default=None
273
+ The Fabric lakehouse name or ID.
274
+ Defaults to None which resolves to the lakehouse attached to the notebook.
275
+ workspace : str | uuid.UUID, default=None
276
+ The Fabric workspace name or ID used by the lakehouse.
277
+ Defaults to None which resolves to the workspace of the attached lakehouse
278
+ or if no lakehouse attached, resolves to the workspace of the notebook.
279
+
280
+ Returns
281
+ -------
282
+ pandas.DataFrame
283
+ A DataFrame containing the job instance details of the table maintenance operation.
284
+ """
285
+
286
+ (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
287
+ (lakehouse_name, lakehouse_id) = resolve_lakehouse_name_and_id(
288
+ lakehouse=lakehouse, workspace=workspace_id
289
+ )
290
+
291
+ if not optimize and not vacuum:
292
+ raise ValueError(
293
+ f"{icons.warning} At least one of 'optimize' or 'vacuum' must be set to True."
294
+ )
295
+ if not vacuum and retention_period is not None:
296
+ raise ValueError(
297
+ f"{icons.warning} The 'retention_period' parameter can only be set if 'vacuum' is set to True."
298
+ )
299
+ if retention_period is not None:
300
+
301
+ def is_valid_format(time_string):
302
+ pattern = r"^\d+:[0-2][0-9]:[0-5][0-9]:[0-5][0-9]$"
303
+ return bool(re.match(pattern, time_string))
304
+
305
+ if not is_valid_format(retention_period):
306
+ raise ValueError(
307
+ f"{icons.red_dot} The 'retention_period' parameter must be in the 'd:hh:mm:ss' format."
308
+ )
309
+
310
+ payload = {
311
+ "executionData": {
312
+ "tableName": table_name,
313
+ }
314
+ }
315
+ if schema is not None:
316
+ payload["executionData"]["schemaName"] = schema
317
+ if optimize:
318
+ payload["executionData"]["optimizeSettings"] = {}
319
+ if v_order:
320
+ payload["executionData"]["optimizeSettings"] = {"vOrder": True}
321
+ if vacuum:
322
+ payload["executionData"]["vacuumSettings"] = {}
323
+ if vacuum and retention_period is not None:
324
+ payload["executionData"]["vacuumSettings"]["retentionPeriod"] = retention_period
325
+
326
+ print(
327
+ f"{icons.in_progress} The table maintenance job for the '{table_name}' table in the '{lakehouse_name}' lakehouse within the '{workspace_name}' workspace has been initiated."
328
+ )
329
+
330
+ df = _base_api(
331
+ request=f"/v1/workspaces/{workspace_id}/lakehouses/{lakehouse_id}/jobs/instances?jobType=TableMaintenance",
332
+ method="post",
333
+ payload=payload,
334
+ status_codes=[200, 202],
335
+ client="fabric_sp",
336
+ lro_return_df=True,
337
+ )
338
+
339
+ status = df["Status"].iloc[0]
340
+
341
+ if status == "Completed":
342
+ print(
343
+ f"{icons.green_dot} The table maintenance job for the '{table_name}' table in the '{lakehouse_name}' lakehouse within the '{workspace_name}' workspace has succeeded."
344
+ )
345
+ else:
346
+ print(status)
347
+ print(
348
+ f"{icons.red_dot} The table maintenance job for the '{table_name}' table in the '{lakehouse_name}' lakehouse within the '{workspace_name}' workspace has failed."
349
+ )
350
+
351
+ return df
@@ -0,0 +1,143 @@
1
+ from sempy_labs._helper_functions import (
2
+ resolve_workspace_id,
3
+ resolve_lakehouse_id,
4
+ _base_api,
5
+ _create_dataframe,
6
+ _update_dataframe_datatypes,
7
+ )
8
+ import pandas as pd
9
+ from typing import Optional
10
+ from uuid import UUID
11
+ from sempy._utils._log import log
12
+
13
+
14
+ @log
15
+ def list_livy_sessions(
16
+ lakehouse: Optional[str | UUID] = None, workspace: Optional[str | UUID] = None
17
+ ) -> pd.DataFrame:
18
+ """
19
+ Shows a list of livy sessions from the specified item identifier.
20
+
21
+ This is a wrapper function for the following API: `Livy Sessions - List Livy Sessions <https://learn.microsoft.com/rest/api/fabric/lakehouse/livy-sessions/list-livy-sessions>`_.
22
+
23
+ Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
24
+
25
+ Parameters
26
+ ----------
27
+ lakehouse : str | uuid.UUID, default=None
28
+ The Fabric lakehouse name or ID.
29
+ Defaults to None which resolves to the lakehouse attached to the notebook.
30
+ workspace : str | uuid.UUID, default=None
31
+ The Fabric workspace name or ID.
32
+ Defaults to None which resolves to the workspace of the attached lakehouse
33
+ or if no lakehouse attached, resolves to the workspace of the notebook.
34
+
35
+ Returns
36
+ -------
37
+ pandas.DataFrame
38
+ A pandas dataframe showing a list of livy sessions from the specified item identifier.
39
+ """
40
+
41
+ columns = {
42
+ "Spark Application Id": "string",
43
+ "State:": "string",
44
+ "Livy Id": "string",
45
+ "Origin": "string",
46
+ "Attempt Number": "int",
47
+ "Max Number Of Attempts": "int",
48
+ "Livy Name": "string",
49
+ "Submitter Id": "string",
50
+ "Submitter Type": "string",
51
+ "Item Workspace Id": "string",
52
+ "Item Id": "string",
53
+ "Item Reference Type": "string",
54
+ "Item Name": "string",
55
+ "Item Type": "string",
56
+ "Job Type": "string",
57
+ "Submitted Date Time": "str",
58
+ "Start Date Time": "str",
59
+ "End Date Time": "string",
60
+ "Queued Duration Value": "int",
61
+ "Queued Duration Time Unit": "string",
62
+ "Running Duration Value": "int",
63
+ "Running Duration Time Unit": "string",
64
+ "Total Duration Value": "int",
65
+ "Total Duration Time Unit": "string",
66
+ "Job Instance Id": "string",
67
+ "Creator Item Workspace Id": "string",
68
+ "Creator Item Id": "string",
69
+ "Creator Item Reference Type": "string",
70
+ "Creator Item Name": "string",
71
+ "Creator Item Type": "string",
72
+ "Cancellation Reason": "string",
73
+ "Capacity Id": "string",
74
+ "Operation Name": "string",
75
+ "Runtime Version": "string",
76
+ "Livy Session Item Resource Uri": "string",
77
+ }
78
+ df = _create_dataframe(columns=columns)
79
+
80
+ workspace_id = resolve_workspace_id(workspace)
81
+ lakehouse_id = resolve_lakehouse_id(lakehouse, workspace_id)
82
+
83
+ responses = _base_api(
84
+ request=f"/v1/workspaces/{workspace_id}/lakehouses/{lakehouse_id}/livySessions",
85
+ uses_pagination=True,
86
+ client="fabric_sp",
87
+ )
88
+
89
+ rows = []
90
+ for r in responses:
91
+ for v in r.get("value", []):
92
+ queued_duration = v.get("queuedDuration", {})
93
+ running_duration = v.get("runningDuration", {})
94
+ total_duration = v.get("totalDuration", {})
95
+ rows.append(
96
+ {
97
+ "Spark Application Id": v.get("sparkApplicationId"),
98
+ "State:": v.get("state"),
99
+ "Livy Id": v.get("livyId"),
100
+ "Origin": v.get("origin"),
101
+ "Attempt Number": v.get("attemptNumber"),
102
+ "Max Number Of Attempts": v.get("maxNumberOfAttempts"),
103
+ "Livy Name": v.get("livyName"),
104
+ "Submitter Id": v["submitter"].get("id"),
105
+ "Submitter Type": v["submitter"].get("type"),
106
+ "Item Workspace Id": v["item"].get("workspaceId"),
107
+ "Item Id": v["item"].get("itemId"),
108
+ "Item Reference Type": v["item"].get("referenceType"),
109
+ "Item Name": v.get("itemName"),
110
+ "Item Type": v.get("itemType"),
111
+ "Job Type": v.get("jobType"),
112
+ "Submitted Date Time": v.get("submittedDateTime"),
113
+ "Start Date Time": v.get("startDateTime"),
114
+ "End Date Time": v.get("endDateTime"),
115
+ "Queued Duration Value": queued_duration.get("value"),
116
+ "Queued Duration Time Unit": queued_duration.get("timeUnit"),
117
+ "Running Duration Value": running_duration.get("value"),
118
+ "Running Duration Time Unit": running_duration.get("timeUnit"),
119
+ "Total Duration Value": total_duration.get("value"),
120
+ "Total Duration Time Unit": total_duration.get("timeUnit"),
121
+ "Job Instance Id": v.get("jobInstanceId"),
122
+ "Creator Item Workspace Id": v["creatorItem"].get("workspaceId"),
123
+ "Creator Item Id": v["creatorItem"].get("itemId"),
124
+ "Creator Item Reference Type": v["creatorItem"].get(
125
+ "referenceType"
126
+ ),
127
+ "Creator Item Name": v.get("creatorItemName"),
128
+ "Creator Item Type": v.get("creatorItemType"),
129
+ "Cancellation Reason": v.get("cancellationReason"),
130
+ "Capacity Id": v.get("capacityId"),
131
+ "Operation Name": v.get("operationName"),
132
+ "Runtime Version": v.get("runtimeVersion"),
133
+ "Livy Session Item Resource Uri": v.get(
134
+ "livySessionItemResourceUri"
135
+ ),
136
+ }
137
+ )
138
+
139
+ if rows:
140
+ df = pd.DataFrame(rows, columns=list(columns.keys()))
141
+ _update_dataframe_datatypes(dataframe=df, column_map=columns)
142
+
143
+ return df
@@ -0,0 +1,157 @@
1
+ from typing import Optional
2
+ from sempy_labs._helper_functions import (
3
+ resolve_workspace_id,
4
+ resolve_workspace_name_and_id,
5
+ resolve_lakehouse_name_and_id,
6
+ _base_api,
7
+ _create_dataframe,
8
+ )
9
+ from uuid import UUID
10
+ from sempy._utils._log import log
11
+ import sempy_labs._icons as icons
12
+ import pandas as pd
13
+
14
+
15
+ @log
16
+ def refresh_materialized_lake_views(
17
+ lakehouse: Optional[str | UUID] = None, workspace: Optional[str | UUID] = None
18
+ ) -> pd.DataFrame:
19
+ """
20
+ Run on-demand Refresh MaterializedLakeViews job instance.
21
+
22
+ This is a wrapper function for the following API: `Background Jobs - Run On Demand Refresh Materialized Lake Views <https://learn.microsoft.com/rest/api/fabric/lakehouse/background-jobs/run-on-demand-refresh-materialized-lake-views>`_.
23
+
24
+ Parameters
25
+ ----------
26
+ lakehouse : str | uuid.UUID, default=None
27
+ The Fabric lakehouse name or ID.
28
+ Defaults to None which resolves to the lakehouse attached to the notebook.
29
+ workspace : str | uuid.UUID, default=None
30
+ The Fabric workspace name or ID used by the lakehouse.
31
+ Defaults to None which resolves to the workspace of the attached lakehouse
32
+ or if no lakehouse attached, resolves to the workspace of the notebook.
33
+
34
+ Returns
35
+ -------
36
+ pandas.DataFrame
37
+ A DataFrame containing the job instance details of the refresh materialized lake views operation.
38
+ """
39
+
40
+ (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
41
+ (lakehouse_name, lakehouse_id) = resolve_lakehouse_name_and_id(
42
+ lakehouse=lakehouse, workspace=workspace_id
43
+ )
44
+
45
+ print(
46
+ f"{icons.in_progress} The refresh materialized lake views job for the '{lakehouse_name}' lakehouse within the '{workspace_name}' workspace has been initiated."
47
+ )
48
+
49
+ df = _base_api(
50
+ request=f"/v1/workspaces/{workspace_id}/lakehouses/{lakehouse_id}/jobs/instances?jobType=RefreshMaterializedLakeViews",
51
+ lro_return_df=True,
52
+ method="post",
53
+ )
54
+
55
+ status = df["Status"].iloc[0]
56
+
57
+ if status == "Completed":
58
+ print(
59
+ f"{icons.green_dot} The refresh materialized lake views job for the '{lakehouse_name}' lakehouse within the '{workspace_name}' workspace has succeeded."
60
+ )
61
+ else:
62
+ print(status)
63
+ print(
64
+ f"{icons.red_dot} The refresh materialized lake views job for the '{lakehouse_name}' lakehouse within the '{workspace_name}' workspace has failed."
65
+ )
66
+
67
+ return df
68
+
69
+
70
+ def _get_materialized_lake_views_schedule(
71
+ lakehouse: Optional[str | UUID] = None, workspace: Optional[str | UUID] = None
72
+ ) -> pd.DataFrame:
73
+ """
74
+ Get the schedule details for the MaterializedLakeViews job instance.
75
+
76
+ Parameters
77
+ ----------
78
+ lakehouse : str | uuid.UUID, default=None
79
+ The Fabric lakehouse name or ID.
80
+ Defaults to None which resolves to the lakehouse attached to the notebook.
81
+ workspace : str | uuid.UUID, default=None
82
+ The Fabric workspace name or ID used by the lakehouse.
83
+ Defaults to None which resolves to the workspace of the attached lakehouse
84
+ or if no lakehouse attached, resolves to the workspace of the notebook.
85
+
86
+ Returns
87
+ -------
88
+ pandas.DataFrame
89
+ A DataFrame containing the schedule details of the materialized lake views job instance.
90
+ """
91
+
92
+ workspace_id = resolve_workspace_id(workspace)
93
+ (lakehouse_name, lakehouse_id) = resolve_lakehouse_name_and_id(
94
+ lakehouse=lakehouse, workspace=workspace_id
95
+ )
96
+
97
+ columns = {
98
+ "Job Schedule Id": "string",
99
+ "Enabled": "bool",
100
+ "Created DateTime": "datetime",
101
+ "Type": "string",
102
+ "Start DateTime": "datetime",
103
+ "End DateTime": "datetime",
104
+ "Local TimeZoneId": "string",
105
+ "Interval": "int",
106
+ "Owner Id": "string",
107
+ "Owner Type": "string",
108
+ }
109
+
110
+ df = _create_dataframe(columns=columns)
111
+
112
+ response = _base_api(
113
+ request=f"/v1/workspaces/{workspace_id}/lakehouses/{lakehouse_id}/jobs/RefreshMaterializedLakeViews/schedules",
114
+ )
115
+
116
+ df = pd.json_normalize(response.json().get("value", []))
117
+
118
+ return df
119
+
120
+
121
+ @log
122
+ def _delete_materialized_lake_view_schedule(
123
+ schedule_id: UUID,
124
+ lakehouse: Optional[str | UUID] = None,
125
+ workspace: Optional[str | UUID] = None,
126
+ ):
127
+ """
128
+ Delete an existing Refresh MaterializedLakeViews schedule for a lakehouse.
129
+
130
+ This is a wrapper function for the following API: `Background Jobs - Delete Refresh Materialized Lake Views Schedule <https://learn.microsoft.com/rest/api/fabric/lakehouse/background-jobs/delete-refresh-materialized-lake-views-schedule>`_.
131
+
132
+ Parameters
133
+ ----------
134
+ schedule_id : uuid.UUID
135
+ The ID of the job schedule to delete.
136
+ lakehouse : str | uuid.UUID, default=None
137
+ The Fabric lakehouse name or ID.
138
+ Defaults to None which resolves to the lakehouse attached to the notebook.
139
+ workspace : str | uuid.UUID, default=None
140
+ The Fabric workspace name or ID used by the lakehouse.
141
+ Defaults to None which resolves to the workspace of the attached lakehouse
142
+ or if no lakehouse attached, resolves to the workspace of the notebook.
143
+ """
144
+
145
+ workspace_id = resolve_workspace_id(workspace)
146
+ (lakehouse_name, lakehouse_id) = resolve_lakehouse_name_and_id(
147
+ lakehouse=lakehouse, workspace=workspace_id
148
+ )
149
+
150
+ _base_api(
151
+ request=f"/v1/workspaces/{workspace_id}/lakehouses/{lakehouse_id}/jobs/RefreshMaterializedLakeViews/schedules/{schedule_id}",
152
+ method="delete",
153
+ )
154
+
155
+ print(
156
+ f"{icons.green_dot} The materialized lake view schedule with ID '{schedule_id}' has been deleted from the '{lakehouse_name}' lakehouse within the '{workspace_id}' workspace."
157
+ )