semantic-link-labs 0.12.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (243) hide show
  1. semantic_link_labs-0.12.8.dist-info/METADATA +354 -0
  2. semantic_link_labs-0.12.8.dist-info/RECORD +243 -0
  3. semantic_link_labs-0.12.8.dist-info/WHEEL +5 -0
  4. semantic_link_labs-0.12.8.dist-info/licenses/LICENSE +21 -0
  5. semantic_link_labs-0.12.8.dist-info/top_level.txt +1 -0
  6. sempy_labs/__init__.py +606 -0
  7. sempy_labs/_a_lib_info.py +2 -0
  8. sempy_labs/_ai.py +437 -0
  9. sempy_labs/_authentication.py +264 -0
  10. sempy_labs/_bpa_translation/_model/_translations_am-ET.po +869 -0
  11. sempy_labs/_bpa_translation/_model/_translations_ar-AE.po +908 -0
  12. sempy_labs/_bpa_translation/_model/_translations_bg-BG.po +968 -0
  13. sempy_labs/_bpa_translation/_model/_translations_ca-ES.po +963 -0
  14. sempy_labs/_bpa_translation/_model/_translations_cs-CZ.po +943 -0
  15. sempy_labs/_bpa_translation/_model/_translations_da-DK.po +945 -0
  16. sempy_labs/_bpa_translation/_model/_translations_de-DE.po +988 -0
  17. sempy_labs/_bpa_translation/_model/_translations_el-GR.po +993 -0
  18. sempy_labs/_bpa_translation/_model/_translations_es-ES.po +971 -0
  19. sempy_labs/_bpa_translation/_model/_translations_fa-IR.po +933 -0
  20. sempy_labs/_bpa_translation/_model/_translations_fi-FI.po +942 -0
  21. sempy_labs/_bpa_translation/_model/_translations_fr-FR.po +994 -0
  22. sempy_labs/_bpa_translation/_model/_translations_ga-IE.po +967 -0
  23. sempy_labs/_bpa_translation/_model/_translations_he-IL.po +902 -0
  24. sempy_labs/_bpa_translation/_model/_translations_hi-IN.po +944 -0
  25. sempy_labs/_bpa_translation/_model/_translations_hu-HU.po +963 -0
  26. sempy_labs/_bpa_translation/_model/_translations_id-ID.po +946 -0
  27. sempy_labs/_bpa_translation/_model/_translations_is-IS.po +939 -0
  28. sempy_labs/_bpa_translation/_model/_translations_it-IT.po +986 -0
  29. sempy_labs/_bpa_translation/_model/_translations_ja-JP.po +846 -0
  30. sempy_labs/_bpa_translation/_model/_translations_ko-KR.po +839 -0
  31. sempy_labs/_bpa_translation/_model/_translations_mt-MT.po +967 -0
  32. sempy_labs/_bpa_translation/_model/_translations_nl-NL.po +978 -0
  33. sempy_labs/_bpa_translation/_model/_translations_pl-PL.po +962 -0
  34. sempy_labs/_bpa_translation/_model/_translations_pt-BR.po +962 -0
  35. sempy_labs/_bpa_translation/_model/_translations_pt-PT.po +957 -0
  36. sempy_labs/_bpa_translation/_model/_translations_ro-RO.po +968 -0
  37. sempy_labs/_bpa_translation/_model/_translations_ru-RU.po +964 -0
  38. sempy_labs/_bpa_translation/_model/_translations_sk-SK.po +952 -0
  39. sempy_labs/_bpa_translation/_model/_translations_sl-SL.po +950 -0
  40. sempy_labs/_bpa_translation/_model/_translations_sv-SE.po +942 -0
  41. sempy_labs/_bpa_translation/_model/_translations_ta-IN.po +976 -0
  42. sempy_labs/_bpa_translation/_model/_translations_te-IN.po +947 -0
  43. sempy_labs/_bpa_translation/_model/_translations_th-TH.po +924 -0
  44. sempy_labs/_bpa_translation/_model/_translations_tr-TR.po +953 -0
  45. sempy_labs/_bpa_translation/_model/_translations_uk-UA.po +961 -0
  46. sempy_labs/_bpa_translation/_model/_translations_zh-CN.po +804 -0
  47. sempy_labs/_bpa_translation/_model/_translations_zu-ZA.po +969 -0
  48. sempy_labs/_capacities.py +1198 -0
  49. sempy_labs/_capacity_migration.py +660 -0
  50. sempy_labs/_clear_cache.py +351 -0
  51. sempy_labs/_connections.py +610 -0
  52. sempy_labs/_dashboards.py +69 -0
  53. sempy_labs/_data_access_security.py +98 -0
  54. sempy_labs/_data_pipelines.py +162 -0
  55. sempy_labs/_dataflows.py +668 -0
  56. sempy_labs/_dax.py +501 -0
  57. sempy_labs/_daxformatter.py +80 -0
  58. sempy_labs/_delta_analyzer.py +467 -0
  59. sempy_labs/_delta_analyzer_history.py +301 -0
  60. sempy_labs/_dictionary_diffs.py +221 -0
  61. sempy_labs/_documentation.py +147 -0
  62. sempy_labs/_domains.py +51 -0
  63. sempy_labs/_eventhouses.py +182 -0
  64. sempy_labs/_external_data_shares.py +230 -0
  65. sempy_labs/_gateways.py +521 -0
  66. sempy_labs/_generate_semantic_model.py +521 -0
  67. sempy_labs/_get_connection_string.py +84 -0
  68. sempy_labs/_git.py +543 -0
  69. sempy_labs/_graphQL.py +90 -0
  70. sempy_labs/_helper_functions.py +2833 -0
  71. sempy_labs/_icons.py +149 -0
  72. sempy_labs/_job_scheduler.py +609 -0
  73. sempy_labs/_kql_databases.py +149 -0
  74. sempy_labs/_kql_querysets.py +124 -0
  75. sempy_labs/_kusto.py +137 -0
  76. sempy_labs/_labels.py +124 -0
  77. sempy_labs/_list_functions.py +1720 -0
  78. sempy_labs/_managed_private_endpoints.py +253 -0
  79. sempy_labs/_mirrored_databases.py +416 -0
  80. sempy_labs/_mirrored_warehouses.py +60 -0
  81. sempy_labs/_ml_experiments.py +113 -0
  82. sempy_labs/_model_auto_build.py +140 -0
  83. sempy_labs/_model_bpa.py +557 -0
  84. sempy_labs/_model_bpa_bulk.py +378 -0
  85. sempy_labs/_model_bpa_rules.py +859 -0
  86. sempy_labs/_model_dependencies.py +343 -0
  87. sempy_labs/_mounted_data_factories.py +123 -0
  88. sempy_labs/_notebooks.py +441 -0
  89. sempy_labs/_one_lake_integration.py +151 -0
  90. sempy_labs/_onelake.py +131 -0
  91. sempy_labs/_query_scale_out.py +433 -0
  92. sempy_labs/_refresh_semantic_model.py +435 -0
  93. sempy_labs/_semantic_models.py +468 -0
  94. sempy_labs/_spark.py +455 -0
  95. sempy_labs/_sql.py +241 -0
  96. sempy_labs/_sql_audit_settings.py +207 -0
  97. sempy_labs/_sql_endpoints.py +214 -0
  98. sempy_labs/_tags.py +201 -0
  99. sempy_labs/_translations.py +43 -0
  100. sempy_labs/_user_delegation_key.py +44 -0
  101. sempy_labs/_utils.py +79 -0
  102. sempy_labs/_vertipaq.py +1021 -0
  103. sempy_labs/_vpax.py +388 -0
  104. sempy_labs/_warehouses.py +234 -0
  105. sempy_labs/_workloads.py +140 -0
  106. sempy_labs/_workspace_identity.py +72 -0
  107. sempy_labs/_workspaces.py +595 -0
  108. sempy_labs/admin/__init__.py +170 -0
  109. sempy_labs/admin/_activities.py +167 -0
  110. sempy_labs/admin/_apps.py +145 -0
  111. sempy_labs/admin/_artifacts.py +65 -0
  112. sempy_labs/admin/_basic_functions.py +463 -0
  113. sempy_labs/admin/_capacities.py +508 -0
  114. sempy_labs/admin/_dataflows.py +45 -0
  115. sempy_labs/admin/_datasets.py +186 -0
  116. sempy_labs/admin/_domains.py +522 -0
  117. sempy_labs/admin/_external_data_share.py +100 -0
  118. sempy_labs/admin/_git.py +72 -0
  119. sempy_labs/admin/_items.py +265 -0
  120. sempy_labs/admin/_labels.py +211 -0
  121. sempy_labs/admin/_reports.py +241 -0
  122. sempy_labs/admin/_scanner.py +118 -0
  123. sempy_labs/admin/_shared.py +82 -0
  124. sempy_labs/admin/_sharing_links.py +110 -0
  125. sempy_labs/admin/_tags.py +131 -0
  126. sempy_labs/admin/_tenant.py +503 -0
  127. sempy_labs/admin/_tenant_keys.py +89 -0
  128. sempy_labs/admin/_users.py +140 -0
  129. sempy_labs/admin/_workspaces.py +236 -0
  130. sempy_labs/deployment_pipeline/__init__.py +23 -0
  131. sempy_labs/deployment_pipeline/_items.py +580 -0
  132. sempy_labs/directlake/__init__.py +57 -0
  133. sempy_labs/directlake/_autosync.py +58 -0
  134. sempy_labs/directlake/_directlake_schema_compare.py +120 -0
  135. sempy_labs/directlake/_directlake_schema_sync.py +161 -0
  136. sempy_labs/directlake/_dl_helper.py +274 -0
  137. sempy_labs/directlake/_generate_shared_expression.py +94 -0
  138. sempy_labs/directlake/_get_directlake_lakehouse.py +62 -0
  139. sempy_labs/directlake/_get_shared_expression.py +34 -0
  140. sempy_labs/directlake/_guardrails.py +96 -0
  141. sempy_labs/directlake/_list_directlake_model_calc_tables.py +70 -0
  142. sempy_labs/directlake/_show_unsupported_directlake_objects.py +90 -0
  143. sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py +239 -0
  144. sempy_labs/directlake/_update_directlake_partition_entity.py +259 -0
  145. sempy_labs/directlake/_warm_cache.py +236 -0
  146. sempy_labs/dotnet_lib/dotnet.runtime.config.json +10 -0
  147. sempy_labs/environment/__init__.py +23 -0
  148. sempy_labs/environment/_items.py +212 -0
  149. sempy_labs/environment/_pubstage.py +223 -0
  150. sempy_labs/eventstream/__init__.py +37 -0
  151. sempy_labs/eventstream/_items.py +263 -0
  152. sempy_labs/eventstream/_topology.py +652 -0
  153. sempy_labs/graph/__init__.py +59 -0
  154. sempy_labs/graph/_groups.py +651 -0
  155. sempy_labs/graph/_sensitivity_labels.py +120 -0
  156. sempy_labs/graph/_teams.py +125 -0
  157. sempy_labs/graph/_user_licenses.py +96 -0
  158. sempy_labs/graph/_users.py +516 -0
  159. sempy_labs/graph_model/__init__.py +15 -0
  160. sempy_labs/graph_model/_background_jobs.py +63 -0
  161. sempy_labs/graph_model/_items.py +149 -0
  162. sempy_labs/lakehouse/__init__.py +67 -0
  163. sempy_labs/lakehouse/_blobs.py +247 -0
  164. sempy_labs/lakehouse/_get_lakehouse_columns.py +102 -0
  165. sempy_labs/lakehouse/_get_lakehouse_tables.py +274 -0
  166. sempy_labs/lakehouse/_helper.py +250 -0
  167. sempy_labs/lakehouse/_lakehouse.py +351 -0
  168. sempy_labs/lakehouse/_livy_sessions.py +143 -0
  169. sempy_labs/lakehouse/_materialized_lake_views.py +157 -0
  170. sempy_labs/lakehouse/_partitioning.py +165 -0
  171. sempy_labs/lakehouse/_schemas.py +217 -0
  172. sempy_labs/lakehouse/_shortcuts.py +440 -0
  173. sempy_labs/migration/__init__.py +35 -0
  174. sempy_labs/migration/_create_pqt_file.py +238 -0
  175. sempy_labs/migration/_direct_lake_to_import.py +105 -0
  176. sempy_labs/migration/_migrate_calctables_to_lakehouse.py +398 -0
  177. sempy_labs/migration/_migrate_calctables_to_semantic_model.py +148 -0
  178. sempy_labs/migration/_migrate_model_objects_to_semantic_model.py +533 -0
  179. sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py +172 -0
  180. sempy_labs/migration/_migration_validation.py +71 -0
  181. sempy_labs/migration/_refresh_calc_tables.py +131 -0
  182. sempy_labs/mirrored_azure_databricks_catalog/__init__.py +15 -0
  183. sempy_labs/mirrored_azure_databricks_catalog/_discover.py +213 -0
  184. sempy_labs/mirrored_azure_databricks_catalog/_refresh_catalog_metadata.py +45 -0
  185. sempy_labs/ml_model/__init__.py +23 -0
  186. sempy_labs/ml_model/_functions.py +427 -0
  187. sempy_labs/report/_BPAReportTemplate.json +232 -0
  188. sempy_labs/report/__init__.py +55 -0
  189. sempy_labs/report/_bpareporttemplate/.pbi/localSettings.json +9 -0
  190. sempy_labs/report/_bpareporttemplate/.platform +11 -0
  191. sempy_labs/report/_bpareporttemplate/StaticResources/SharedResources/BaseThemes/CY24SU06.json +710 -0
  192. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/page.json +11 -0
  193. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/1b08bce3bebabb0a27a8/visual.json +191 -0
  194. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/2f22ddb70c301693c165/visual.json +438 -0
  195. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/3b1182230aa6c600b43a/visual.json +127 -0
  196. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/58577ba6380c69891500/visual.json +576 -0
  197. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/a2a8fa5028b3b776c96c/visual.json +207 -0
  198. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/adfd47ef30652707b987/visual.json +506 -0
  199. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/b6a80ee459e716e170b1/visual.json +127 -0
  200. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/ce3130a721c020cc3d81/visual.json +513 -0
  201. sempy_labs/report/_bpareporttemplate/definition/pages/92735ae19b31712208ad/page.json +8 -0
  202. sempy_labs/report/_bpareporttemplate/definition/pages/92735ae19b31712208ad/visuals/66e60dfb526437cd78d1/visual.json +112 -0
  203. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/page.json +11 -0
  204. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/07deb8bce824e1be37d7/visual.json +513 -0
  205. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/0b1c68838818b32ad03b/visual.json +352 -0
  206. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/0c171de9d2683d10b930/visual.json +37 -0
  207. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/0efa01be0510e40a645e/visual.json +542 -0
  208. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/6bf2f0eb830ab53cc668/visual.json +221 -0
  209. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/88d8141cb8500b60030c/visual.json +127 -0
  210. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/a753273590beed656a03/visual.json +576 -0
  211. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/b8fdc82cddd61ac447bc/visual.json +127 -0
  212. sempy_labs/report/_bpareporttemplate/definition/pages/d37dce724a0ccc30044b/page.json +9 -0
  213. sempy_labs/report/_bpareporttemplate/definition/pages/d37dce724a0ccc30044b/visuals/ce8532a7e25020271077/visual.json +38 -0
  214. sempy_labs/report/_bpareporttemplate/definition/pages/pages.json +10 -0
  215. sempy_labs/report/_bpareporttemplate/definition/report.json +176 -0
  216. sempy_labs/report/_bpareporttemplate/definition/version.json +4 -0
  217. sempy_labs/report/_bpareporttemplate/definition.pbir +14 -0
  218. sempy_labs/report/_download_report.py +76 -0
  219. sempy_labs/report/_export_report.py +257 -0
  220. sempy_labs/report/_generate_report.py +427 -0
  221. sempy_labs/report/_paginated.py +76 -0
  222. sempy_labs/report/_report_bpa.py +354 -0
  223. sempy_labs/report/_report_bpa_rules.py +115 -0
  224. sempy_labs/report/_report_functions.py +581 -0
  225. sempy_labs/report/_report_helper.py +227 -0
  226. sempy_labs/report/_report_list_functions.py +110 -0
  227. sempy_labs/report/_report_rebind.py +149 -0
  228. sempy_labs/report/_reportwrapper.py +3100 -0
  229. sempy_labs/report/_save_report.py +147 -0
  230. sempy_labs/snowflake_database/__init__.py +10 -0
  231. sempy_labs/snowflake_database/_items.py +105 -0
  232. sempy_labs/sql_database/__init__.py +21 -0
  233. sempy_labs/sql_database/_items.py +201 -0
  234. sempy_labs/sql_database/_mirroring.py +79 -0
  235. sempy_labs/theme/__init__.py +12 -0
  236. sempy_labs/theme/_org_themes.py +129 -0
  237. sempy_labs/tom/__init__.py +3 -0
  238. sempy_labs/tom/_model.py +5977 -0
  239. sempy_labs/variable_library/__init__.py +19 -0
  240. sempy_labs/variable_library/_functions.py +403 -0
  241. sempy_labs/warehouse/__init__.py +28 -0
  242. sempy_labs/warehouse/_items.py +234 -0
  243. sempy_labs/warehouse/_restore_points.py +309 -0
@@ -0,0 +1,274 @@
1
+ import os
2
+ import pandas as pd
3
+ import pyarrow.parquet as pq
4
+ from datetime import datetime
5
+ from sempy_labs._helper_functions import (
6
+ _get_column_aggregate,
7
+ resolve_lakehouse_name_and_id,
8
+ save_as_delta_table,
9
+ resolve_workspace_id,
10
+ _read_delta_table,
11
+ _get_delta_table,
12
+ _mount,
13
+ create_abfss_path,
14
+ _pure_python_notebook,
15
+ )
16
+ from sempy_labs.directlake._guardrails import (
17
+ get_sku_size,
18
+ get_directlake_guardrails_for_sku,
19
+ )
20
+ from sempy_labs.lakehouse._lakehouse import lakehouse_attached
21
+ from typing import Optional
22
+ import sempy_labs._icons as icons
23
+ from sempy._utils._log import log
24
+ from uuid import UUID
25
+ from sempy_labs.lakehouse._schemas import list_tables
26
+
27
+
28
+ @log
29
+ def get_lakehouse_tables(
30
+ lakehouse: Optional[str | UUID] = None,
31
+ workspace: Optional[str | UUID] = None,
32
+ extended: bool = False,
33
+ count_rows: bool = False,
34
+ export: bool = False,
35
+ exclude_shortcuts: bool = False,
36
+ ) -> pd.DataFrame:
37
+ """
38
+ Shows the tables of a lakehouse and their respective properties. Option to include additional properties relevant to Direct Lake guardrails.
39
+
40
+ This function can be executed in either a PySpark or pure Python notebook.
41
+
42
+ This is a wrapper function for the following API: `Tables - List Tables <https://learn.microsoft.com/rest/api/fabric/lakehouse/tables/list-tables>`_ plus extended capabilities.
43
+ However, the above mentioned API does not support Lakehouse schemas (Preview) until it is in GA (General Availability). This version also supports schema
44
+ enabled Lakehouses.
45
+
46
+ Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
47
+
48
+ Parameters
49
+ ----------
50
+ lakehouse : str | uuid.UUID, default=None
51
+ The Fabric lakehouse name or ID.
52
+ Defaults to None which resolves to the lakehouse attached to the notebook.
53
+ workspace : str | uuid.UUID, default=None
54
+ The Fabric workspace name or ID used by the lakehouse.
55
+ Defaults to None which resolves to the workspace of the attached lakehouse
56
+ or if no lakehouse attached, resolves to the workspace of the notebook.
57
+ extended : bool, default=False
58
+ Obtains additional columns relevant to the size of each table.
59
+ count_rows : bool, default=False
60
+ Obtains a row count for each lakehouse table.
61
+ export : bool, default=False
62
+ Exports the resulting dataframe to a delta table in the lakehouse.
63
+ exclude_shortcuts : bool, default=False
64
+ If True, excludes shortcuts.
65
+
66
+ Returns
67
+ -------
68
+ pandas.DataFrame
69
+ Shows the tables/columns within a lakehouse and their properties.
70
+ """
71
+
72
+ workspace_id = resolve_workspace_id(workspace)
73
+ (lakehouse_name, lakehouse_id) = resolve_lakehouse_name_and_id(
74
+ lakehouse=lakehouse, workspace=workspace_id
75
+ )
76
+
77
+ df = list_tables(lakehouse=lakehouse, workspace=workspace)
78
+
79
+ local_path = _mount(lakehouse=lakehouse_id, workspace=workspace_id)
80
+
81
+ if extended:
82
+ sku_value = get_sku_size(workspace_id)
83
+ guardrail = get_directlake_guardrails_for_sku(sku_value)
84
+ # Avoid mounting the lakehouse if is already mounted
85
+ if not local_path:
86
+ local_path = _mount(lakehouse=lakehouse_id, workspace=workspace_id)
87
+
88
+ df["Files"], df["Row Groups"], df["Table Size"] = None, None, None
89
+ if count_rows:
90
+ df["Row Count"] = None
91
+
92
+ for i, r in df.iterrows():
93
+ schema_name = r["Schema Name"]
94
+ table_name = r["Table Name"]
95
+ if r["Type"] == "Managed" and r["Format"] == "delta":
96
+ delta_table_path = (
97
+ create_abfss_path(
98
+ lakehouse_id, workspace_id, table_name, schema_name
99
+ )
100
+ .replace("//", "/") # When schema_name = ""
101
+ .replace("abfss:/", "abfss://") # Put back the // after abfss:
102
+ )
103
+
104
+ if _pure_python_notebook():
105
+ from deltalake import DeltaTable
106
+
107
+ delta_table = DeltaTable(delta_table_path)
108
+ latest_files = [
109
+ file["path"]
110
+ for file in delta_table.get_add_actions().to_pylist()
111
+ ]
112
+ size_in_bytes = 0
113
+ for f in latest_files:
114
+ local_file_path = os.path.join(
115
+ local_path, "Tables", schema_name, table_name, f
116
+ )
117
+
118
+ if os.path.exists(local_file_path):
119
+ size_in_bytes += os.path.getsize(local_file_path)
120
+ num_latest_files = len(latest_files)
121
+ else:
122
+ delta_table = _get_delta_table(delta_table_path)
123
+
124
+ latest_files = _read_delta_table(delta_table_path).inputFiles()
125
+ table_df = delta_table.toDF()
126
+ table_details = delta_table.detail().collect()[0].asDict()
127
+ size_in_bytes = table_details.get("sizeInBytes", 0)
128
+ num_latest_files = table_details.get("numFiles", 0)
129
+
130
+ table_path = os.path.join(local_path, "Tables", schema_name, table_name)
131
+
132
+ file_paths = []
133
+ for file in latest_files:
134
+ if _pure_python_notebook():
135
+ file_paths.append(file)
136
+ else:
137
+ # Append the <Partition folder>/<filename> or <filename>
138
+ find_table = file.find(table_name)
139
+ len_file = len(file)
140
+ len_table = len(table_name)
141
+ last_chars = len_file - (find_table + len_table + 1)
142
+ file_paths.append(file[-last_chars:])
143
+
144
+ num_rowgroups = 0
145
+ for filename in file_paths:
146
+ parquet_file_path = f"{table_path}/{filename}"
147
+ if os.path.exists(parquet_file_path):
148
+ parquet_file = pq.ParquetFile(parquet_file_path)
149
+ num_rowgroups += parquet_file.num_row_groups
150
+
151
+ df.at[i, "Files"] = num_latest_files
152
+ df.at[i, "Row Groups"] = num_rowgroups
153
+ df.at[i, "Table Size"] = size_in_bytes
154
+
155
+ if count_rows:
156
+ if _pure_python_notebook():
157
+ row_count = delta_table.to_pyarrow_table().num_rows
158
+ else:
159
+ row_count = table_df.count()
160
+ df.at[i, "Row Count"] = row_count
161
+
162
+ # Set "Schema Name" = "dbo" when it is ""
163
+ df.loc[df["Schema Name"] == "", "Schema Name"] = "dbo"
164
+
165
+ if extended:
166
+ intColumns = ["Files", "Row Groups", "Table Size"]
167
+ df[intColumns] = df[intColumns].astype(int)
168
+
169
+ col_name = guardrail.columns[0]
170
+ df["SKU"] = guardrail[col_name].iloc[0]
171
+ df["Parquet File Guardrail"] = guardrail["Parquet files per table"].iloc[0]
172
+ df["Row Group Guardrail"] = guardrail["Row groups per table"].iloc[0]
173
+ df["Row Count Guardrail"] = (
174
+ guardrail["Rows per table (millions)"].iloc[0] * 1000000
175
+ )
176
+
177
+ df["Parquet File Guardrail Hit"] = df["Files"] > df["Parquet File Guardrail"]
178
+ df["Row Group Guardrail Hit"] = df["Row Groups"] > df["Row Group Guardrail"]
179
+ if count_rows:
180
+ df["Row Count"] = df["Row Count"].astype(int)
181
+ df["Row Count Guardrail Hit"] = df["Row Count"] > df["Row Count Guardrail"]
182
+
183
+ if exclude_shortcuts:
184
+ from sempy_labs.lakehouse._shortcuts import list_shortcuts
185
+
186
+ # Exclude shortcuts
187
+ shortcuts = (
188
+ list_shortcuts(lakehouse=lakehouse, workspace=workspace)
189
+ .query("`Shortcut Path`.str.startswith('/Tables')", engine="python")
190
+ .assign(
191
+ FullPath=lambda df: df["Shortcut Path"].str.rstrip("/")
192
+ + "/"
193
+ + df["Shortcut Name"]
194
+ )["FullPath"]
195
+ .tolist()
196
+ )
197
+
198
+ df["FullPath"] = df.apply(
199
+ lambda x: (
200
+ f"/Tables/{x['Table Name']}"
201
+ if pd.isna(x["Schema Name"]) or x["Schema Name"] == ""
202
+ else f"/Tables/{x['Schema Name']}/{x['Table Name']}"
203
+ ),
204
+ axis=1,
205
+ )
206
+
207
+ df = df[~df["FullPath"].isin(shortcuts)].reset_index(drop=True)
208
+
209
+ if export:
210
+ if not lakehouse_attached():
211
+ raise ValueError(
212
+ f"{icons.red_dot} In order to save the dataframe, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook."
213
+ )
214
+
215
+ lake_table_name = "lakehouse_table_details"
216
+ df_filt = df[df["Table Name"] == lake_table_name]
217
+
218
+ if df_filt.empty:
219
+ run_id = 1
220
+ else:
221
+ max_run_id = _get_column_aggregate(table_name=lake_table_name)
222
+ run_id = max_run_id + 1
223
+
224
+ export_df = df.copy()
225
+
226
+ cols = [
227
+ "Files",
228
+ "Row Groups",
229
+ "Row Count",
230
+ "Table Size",
231
+ "SKU",
232
+ "Parquet File Guardrail",
233
+ "Row Group Guardrail",
234
+ "Row Count Guardrail",
235
+ "Parquet File Guardrail Hit",
236
+ "Row Group Guardrail Hit",
237
+ "Row Count Guardrail Hit",
238
+ ]
239
+
240
+ for c in cols:
241
+ if c not in export_df:
242
+ if c in [
243
+ "Files",
244
+ "Row Groups",
245
+ "Row Count",
246
+ "Table Size",
247
+ "Parquet File Guardrail",
248
+ "Row Group Guardrail",
249
+ "Row Count Guardrail",
250
+ ]:
251
+ export_df[c] = 0
252
+ export_df[c] = export_df[c].astype(int)
253
+ elif c in ["SKU"]:
254
+ export_df[c] = None
255
+ export_df[c] = export_df[c].astype(str)
256
+ elif c in [
257
+ "Parquet File Guardrail Hit",
258
+ "Row Group Guardrail Hit",
259
+ "Row Count Guardrail Hit",
260
+ ]:
261
+ export_df[c] = False
262
+ export_df[c] = export_df[c].astype(bool)
263
+
264
+ print(
265
+ f"{icons.in_progress} Saving Lakehouse table properties to the '{lake_table_name}' table in the lakehouse...\n"
266
+ )
267
+ export_df["Timestamp"] = datetime.now()
268
+ export_df["RunId"] = run_id
269
+
270
+ save_as_delta_table(
271
+ dataframe=export_df, delta_table_name=lake_table_name, write_mode="append"
272
+ )
273
+
274
+ return df
@@ -0,0 +1,250 @@
1
+ from uuid import UUID
2
+ from typing import Optional, Literal
3
+ import pyarrow.dataset as ds
4
+ from sempy_labs._helper_functions import (
5
+ _mount,
6
+ delete_item,
7
+ _base_api,
8
+ resolve_workspace_name_and_id,
9
+ resolve_lakehouse_name_and_id,
10
+ )
11
+ from sempy._utils._log import log
12
+ import sempy_labs._icons as icons
13
+ import os
14
+ import json
15
+
16
+
17
+ @log
18
+ def is_v_ordered(
19
+ table_name: str,
20
+ lakehouse: Optional[str | UUID] = None,
21
+ workspace: Optional[str | UUID] = None,
22
+ schema: Optional[str] = None,
23
+ ) -> bool:
24
+ """
25
+ Checks if a delta table is v-ordered.
26
+
27
+ Parameters
28
+ ----------
29
+ table_name : str
30
+ The name of the table to check.
31
+ lakehouse : str | uuid.UUID, default=None
32
+ The Fabric lakehouse name or ID.
33
+ Defaults to None which resolves to the lakehouse attached to the notebook.
34
+ workspace : str | uuid.UUID, default=None
35
+ The Fabric workspace name or ID used by the lakehouse.
36
+ Defaults to None which resolves to the workspace of the attached lakehouse
37
+ or if no lakehouse attached, resolves to the workspace of the notebook.
38
+ schema : str, optional
39
+ The schema of the table to check. If not provided, the default schema is used.
40
+
41
+ Returns
42
+ -------
43
+ bool
44
+ True if the table is v-ordered, False otherwise.
45
+ """
46
+
47
+ local_path = _mount(lakehouse=lakehouse, workspace=workspace)
48
+ table_path = (
49
+ f"{local_path}/Tables/{schema}/{table_name}"
50
+ if schema
51
+ else f"{local_path}/Tables/{table_name}"
52
+ )
53
+ ds_schema = ds.dataset(table_path).schema.metadata
54
+
55
+ if ds_schema:
56
+ return any(b"vorder" in key for key in ds_schema.keys())
57
+
58
+ delta_log_path = os.path.join(table_path, "_delta_log")
59
+
60
+ def read_vorder_tag(delta_log_path):
61
+ json_files = sorted(
62
+ [f for f in os.listdir(delta_log_path) if f.endswith(".json")], reverse=True
63
+ )
64
+
65
+ if not json_files:
66
+ return False
67
+
68
+ latest_file = os.path.join(delta_log_path, json_files[0])
69
+
70
+ with open(latest_file, "r") as f:
71
+ all_data = [
72
+ json.loads(line) for line in f if line.strip()
73
+ ] # one dict per line
74
+ for data in all_data:
75
+ if "metaData" in data:
76
+ return (
77
+ data.get("metaData", {})
78
+ .get("configuration", {})
79
+ .get("delta.parquet.vorder.enabled", "false")
80
+ == "true"
81
+ )
82
+
83
+ # If no metaData, fall back to commitInfo
84
+ for data in all_data:
85
+ if "commitInfo" in data:
86
+ tags = data["commitInfo"].get("tags", {})
87
+ return tags.get("VORDER", "false").lower() == "true"
88
+
89
+ return False # Default if not found
90
+
91
+ return read_vorder_tag(delta_log_path)
92
+
93
+
94
+ @log
95
+ def delete_lakehouse(
96
+ lakehouse: str | UUID, workspace: Optional[str | UUID] = None
97
+ ) -> None:
98
+ """
99
+ Deletes a lakehouse.
100
+
101
+ This is a wrapper function for the following API: `Items - Delete Lakehouse <https://learn.microsoft.com/rest/api/fabric/lakehouse/items/delete-lakehouse>`_.
102
+
103
+ Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
104
+
105
+ Parameters
106
+ ----------
107
+ lakehouse : str | uuid.UUID
108
+ The name or ID of the lakehouse to delete.
109
+ workspace : str | uuid.UUID, default=None
110
+ The Fabric workspace name or ID used by the lakehouse.
111
+ Defaults to None which resolves to the workspace of the attached lakehouse
112
+ or if no lakehouse attached, resolves to the workspace of the notebook.
113
+ """
114
+
115
+ delete_item(item=lakehouse, item_type="lakehouse", workspace=workspace)
116
+
117
+
118
+ @log
119
+ def update_lakehouse(
120
+ name: Optional[str] = None,
121
+ description: Optional[str] = None,
122
+ lakehouse: Optional[str | UUID] = None,
123
+ workspace: Optional[str | UUID] = None,
124
+ ):
125
+ """
126
+ Updates a lakehouse.
127
+
128
+ This is a wrapper function for the following API: `Items - Update Lakehouse <https://learn.microsoft.com/rest/api/fabric/lakehouse/items/update-lakehouse>`_.
129
+
130
+ Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
131
+
132
+ Parameters
133
+ ----------
134
+ name: str, default=None
135
+ The new name of the lakehouse.
136
+ Defaults to None which does not update the name.
137
+ description: str, default=None
138
+ The new description of the lakehouse.
139
+ Defaults to None which does not update the description.
140
+ lakehouse : str | uuid.UUID, default=None
141
+ The name or ID of the lakehouse to update.
142
+ Defaults to None which resolves to the lakehouse attached to the notebook.
143
+ workspace : str | uuid.UUID, default=None
144
+ The Fabric workspace name or ID used by the lakehouse.
145
+ Defaults to None which resolves to the workspace of the attached lakehouse
146
+ or if no lakehouse attached, resolves to the workspace of the notebook.
147
+ """
148
+
149
+ if not name and not description:
150
+ raise ValueError(
151
+ f"{icons.red_dot} Either name or description must be provided."
152
+ )
153
+
154
+ (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
155
+ (lakehouse_name, lakehouse_id) = resolve_lakehouse_name_and_id(
156
+ lakehouse, workspace_id
157
+ )
158
+
159
+ payload = {}
160
+ if name:
161
+ payload["displayName"] = name
162
+ if description:
163
+ payload["description"] = description
164
+
165
+ _base_api(
166
+ request=f"/v1/workspaces/{workspace_id}/lakehouses/{lakehouse_id}",
167
+ method="patch",
168
+ client="fabric_sp",
169
+ payload=payload,
170
+ )
171
+
172
+ print(
173
+ f"{icons.green_dot} The '{lakehouse_name}' lakehouse within the '{workspace_name}' workspace has been updated accordingly."
174
+ )
175
+
176
+
177
+ @log
178
+ def load_table(
179
+ table_name: str,
180
+ file_path: str,
181
+ mode: Literal["Overwrite", "Append"],
182
+ lakehouse: Optional[str | UUID] = None,
183
+ workspace: Optional[str | UUID] = None,
184
+ ):
185
+ """
186
+ Loads a table into a lakehouse. Currently only files are supported, not folders.
187
+
188
+ This is a wrapper function for the following API: `Tables - Load Table <https://learn.microsoft.com/rest/api/fabric/lakehouse/tables/load-table>`_.
189
+
190
+ Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
191
+
192
+ Parameters
193
+ ----------
194
+ table_name : str
195
+ The name of the table to load.
196
+ file_path : str
197
+ The path to the data to load.
198
+ mode : Literal["Overwrite", "Append"]
199
+ The mode to use when loading the data.
200
+ "Overwrite" will overwrite the existing data.
201
+ "Append" will append the data to the existing data.
202
+ lakehouse : str | uuid.UUID, default=None
203
+ The name or ID of the lakehouse to load the table into.
204
+ Defaults to None which resolves to the lakehouse attached to the notebook.
205
+ workspace : str | uuid.UUID, default=None
206
+ The Fabric workspace name or ID used by the lakehouse.
207
+ Defaults to None which resolves to the workspace of the attached lakehouse
208
+ or if no lakehouse attached, resolves to the workspace of the notebook.
209
+ """
210
+
211
+ (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
212
+ (lakehouse_name, lakehouse_id) = resolve_lakehouse_name_and_id(
213
+ lakehouse, workspace_id
214
+ )
215
+
216
+ file_extension = os.path.splitext(file_path)[1]
217
+
218
+ payload = {
219
+ "relativePath": file_path,
220
+ "pathType": "File",
221
+ "mode": mode,
222
+ "formatOptions": {},
223
+ }
224
+
225
+ if file_extension == ".csv":
226
+ payload["formatOptions"] = {"format": "Csv", "header": True, "delimiter": ","}
227
+ elif file_extension == ".parquet":
228
+ payload["formatOptions"] = {
229
+ "format": "Parquet",
230
+ "header": True,
231
+ }
232
+ # Solve for loading folders
233
+ # elif file_extension == '':
234
+ # payload['pathType'] = "Folder"
235
+ # payload["recursive"] = recursive
236
+ # payload['formatOptions']
237
+ else:
238
+ raise NotImplementedError()
239
+
240
+ _base_api(
241
+ request=f"/v1/workspaces/{workspace_id}/lakehouses/{lakehouse_id}/tables/{table_name}/load",
242
+ client="fabric_sp",
243
+ method="post",
244
+ status_codes=202,
245
+ lro_return_status_code=True,
246
+ )
247
+
248
+ print(
249
+ f"{icons.green_dot} The '{table_name}' table has been loaded into the '{lakehouse_name}' lakehouse within the '{workspace_name}' workspace."
250
+ )