semantic-link-labs 0.12.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (243) hide show
  1. semantic_link_labs-0.12.8.dist-info/METADATA +354 -0
  2. semantic_link_labs-0.12.8.dist-info/RECORD +243 -0
  3. semantic_link_labs-0.12.8.dist-info/WHEEL +5 -0
  4. semantic_link_labs-0.12.8.dist-info/licenses/LICENSE +21 -0
  5. semantic_link_labs-0.12.8.dist-info/top_level.txt +1 -0
  6. sempy_labs/__init__.py +606 -0
  7. sempy_labs/_a_lib_info.py +2 -0
  8. sempy_labs/_ai.py +437 -0
  9. sempy_labs/_authentication.py +264 -0
  10. sempy_labs/_bpa_translation/_model/_translations_am-ET.po +869 -0
  11. sempy_labs/_bpa_translation/_model/_translations_ar-AE.po +908 -0
  12. sempy_labs/_bpa_translation/_model/_translations_bg-BG.po +968 -0
  13. sempy_labs/_bpa_translation/_model/_translations_ca-ES.po +963 -0
  14. sempy_labs/_bpa_translation/_model/_translations_cs-CZ.po +943 -0
  15. sempy_labs/_bpa_translation/_model/_translations_da-DK.po +945 -0
  16. sempy_labs/_bpa_translation/_model/_translations_de-DE.po +988 -0
  17. sempy_labs/_bpa_translation/_model/_translations_el-GR.po +993 -0
  18. sempy_labs/_bpa_translation/_model/_translations_es-ES.po +971 -0
  19. sempy_labs/_bpa_translation/_model/_translations_fa-IR.po +933 -0
  20. sempy_labs/_bpa_translation/_model/_translations_fi-FI.po +942 -0
  21. sempy_labs/_bpa_translation/_model/_translations_fr-FR.po +994 -0
  22. sempy_labs/_bpa_translation/_model/_translations_ga-IE.po +967 -0
  23. sempy_labs/_bpa_translation/_model/_translations_he-IL.po +902 -0
  24. sempy_labs/_bpa_translation/_model/_translations_hi-IN.po +944 -0
  25. sempy_labs/_bpa_translation/_model/_translations_hu-HU.po +963 -0
  26. sempy_labs/_bpa_translation/_model/_translations_id-ID.po +946 -0
  27. sempy_labs/_bpa_translation/_model/_translations_is-IS.po +939 -0
  28. sempy_labs/_bpa_translation/_model/_translations_it-IT.po +986 -0
  29. sempy_labs/_bpa_translation/_model/_translations_ja-JP.po +846 -0
  30. sempy_labs/_bpa_translation/_model/_translations_ko-KR.po +839 -0
  31. sempy_labs/_bpa_translation/_model/_translations_mt-MT.po +967 -0
  32. sempy_labs/_bpa_translation/_model/_translations_nl-NL.po +978 -0
  33. sempy_labs/_bpa_translation/_model/_translations_pl-PL.po +962 -0
  34. sempy_labs/_bpa_translation/_model/_translations_pt-BR.po +962 -0
  35. sempy_labs/_bpa_translation/_model/_translations_pt-PT.po +957 -0
  36. sempy_labs/_bpa_translation/_model/_translations_ro-RO.po +968 -0
  37. sempy_labs/_bpa_translation/_model/_translations_ru-RU.po +964 -0
  38. sempy_labs/_bpa_translation/_model/_translations_sk-SK.po +952 -0
  39. sempy_labs/_bpa_translation/_model/_translations_sl-SL.po +950 -0
  40. sempy_labs/_bpa_translation/_model/_translations_sv-SE.po +942 -0
  41. sempy_labs/_bpa_translation/_model/_translations_ta-IN.po +976 -0
  42. sempy_labs/_bpa_translation/_model/_translations_te-IN.po +947 -0
  43. sempy_labs/_bpa_translation/_model/_translations_th-TH.po +924 -0
  44. sempy_labs/_bpa_translation/_model/_translations_tr-TR.po +953 -0
  45. sempy_labs/_bpa_translation/_model/_translations_uk-UA.po +961 -0
  46. sempy_labs/_bpa_translation/_model/_translations_zh-CN.po +804 -0
  47. sempy_labs/_bpa_translation/_model/_translations_zu-ZA.po +969 -0
  48. sempy_labs/_capacities.py +1198 -0
  49. sempy_labs/_capacity_migration.py +660 -0
  50. sempy_labs/_clear_cache.py +351 -0
  51. sempy_labs/_connections.py +610 -0
  52. sempy_labs/_dashboards.py +69 -0
  53. sempy_labs/_data_access_security.py +98 -0
  54. sempy_labs/_data_pipelines.py +162 -0
  55. sempy_labs/_dataflows.py +668 -0
  56. sempy_labs/_dax.py +501 -0
  57. sempy_labs/_daxformatter.py +80 -0
  58. sempy_labs/_delta_analyzer.py +467 -0
  59. sempy_labs/_delta_analyzer_history.py +301 -0
  60. sempy_labs/_dictionary_diffs.py +221 -0
  61. sempy_labs/_documentation.py +147 -0
  62. sempy_labs/_domains.py +51 -0
  63. sempy_labs/_eventhouses.py +182 -0
  64. sempy_labs/_external_data_shares.py +230 -0
  65. sempy_labs/_gateways.py +521 -0
  66. sempy_labs/_generate_semantic_model.py +521 -0
  67. sempy_labs/_get_connection_string.py +84 -0
  68. sempy_labs/_git.py +543 -0
  69. sempy_labs/_graphQL.py +90 -0
  70. sempy_labs/_helper_functions.py +2833 -0
  71. sempy_labs/_icons.py +149 -0
  72. sempy_labs/_job_scheduler.py +609 -0
  73. sempy_labs/_kql_databases.py +149 -0
  74. sempy_labs/_kql_querysets.py +124 -0
  75. sempy_labs/_kusto.py +137 -0
  76. sempy_labs/_labels.py +124 -0
  77. sempy_labs/_list_functions.py +1720 -0
  78. sempy_labs/_managed_private_endpoints.py +253 -0
  79. sempy_labs/_mirrored_databases.py +416 -0
  80. sempy_labs/_mirrored_warehouses.py +60 -0
  81. sempy_labs/_ml_experiments.py +113 -0
  82. sempy_labs/_model_auto_build.py +140 -0
  83. sempy_labs/_model_bpa.py +557 -0
  84. sempy_labs/_model_bpa_bulk.py +378 -0
  85. sempy_labs/_model_bpa_rules.py +859 -0
  86. sempy_labs/_model_dependencies.py +343 -0
  87. sempy_labs/_mounted_data_factories.py +123 -0
  88. sempy_labs/_notebooks.py +441 -0
  89. sempy_labs/_one_lake_integration.py +151 -0
  90. sempy_labs/_onelake.py +131 -0
  91. sempy_labs/_query_scale_out.py +433 -0
  92. sempy_labs/_refresh_semantic_model.py +435 -0
  93. sempy_labs/_semantic_models.py +468 -0
  94. sempy_labs/_spark.py +455 -0
  95. sempy_labs/_sql.py +241 -0
  96. sempy_labs/_sql_audit_settings.py +207 -0
  97. sempy_labs/_sql_endpoints.py +214 -0
  98. sempy_labs/_tags.py +201 -0
  99. sempy_labs/_translations.py +43 -0
  100. sempy_labs/_user_delegation_key.py +44 -0
  101. sempy_labs/_utils.py +79 -0
  102. sempy_labs/_vertipaq.py +1021 -0
  103. sempy_labs/_vpax.py +388 -0
  104. sempy_labs/_warehouses.py +234 -0
  105. sempy_labs/_workloads.py +140 -0
  106. sempy_labs/_workspace_identity.py +72 -0
  107. sempy_labs/_workspaces.py +595 -0
  108. sempy_labs/admin/__init__.py +170 -0
  109. sempy_labs/admin/_activities.py +167 -0
  110. sempy_labs/admin/_apps.py +145 -0
  111. sempy_labs/admin/_artifacts.py +65 -0
  112. sempy_labs/admin/_basic_functions.py +463 -0
  113. sempy_labs/admin/_capacities.py +508 -0
  114. sempy_labs/admin/_dataflows.py +45 -0
  115. sempy_labs/admin/_datasets.py +186 -0
  116. sempy_labs/admin/_domains.py +522 -0
  117. sempy_labs/admin/_external_data_share.py +100 -0
  118. sempy_labs/admin/_git.py +72 -0
  119. sempy_labs/admin/_items.py +265 -0
  120. sempy_labs/admin/_labels.py +211 -0
  121. sempy_labs/admin/_reports.py +241 -0
  122. sempy_labs/admin/_scanner.py +118 -0
  123. sempy_labs/admin/_shared.py +82 -0
  124. sempy_labs/admin/_sharing_links.py +110 -0
  125. sempy_labs/admin/_tags.py +131 -0
  126. sempy_labs/admin/_tenant.py +503 -0
  127. sempy_labs/admin/_tenant_keys.py +89 -0
  128. sempy_labs/admin/_users.py +140 -0
  129. sempy_labs/admin/_workspaces.py +236 -0
  130. sempy_labs/deployment_pipeline/__init__.py +23 -0
  131. sempy_labs/deployment_pipeline/_items.py +580 -0
  132. sempy_labs/directlake/__init__.py +57 -0
  133. sempy_labs/directlake/_autosync.py +58 -0
  134. sempy_labs/directlake/_directlake_schema_compare.py +120 -0
  135. sempy_labs/directlake/_directlake_schema_sync.py +161 -0
  136. sempy_labs/directlake/_dl_helper.py +274 -0
  137. sempy_labs/directlake/_generate_shared_expression.py +94 -0
  138. sempy_labs/directlake/_get_directlake_lakehouse.py +62 -0
  139. sempy_labs/directlake/_get_shared_expression.py +34 -0
  140. sempy_labs/directlake/_guardrails.py +96 -0
  141. sempy_labs/directlake/_list_directlake_model_calc_tables.py +70 -0
  142. sempy_labs/directlake/_show_unsupported_directlake_objects.py +90 -0
  143. sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py +239 -0
  144. sempy_labs/directlake/_update_directlake_partition_entity.py +259 -0
  145. sempy_labs/directlake/_warm_cache.py +236 -0
  146. sempy_labs/dotnet_lib/dotnet.runtime.config.json +10 -0
  147. sempy_labs/environment/__init__.py +23 -0
  148. sempy_labs/environment/_items.py +212 -0
  149. sempy_labs/environment/_pubstage.py +223 -0
  150. sempy_labs/eventstream/__init__.py +37 -0
  151. sempy_labs/eventstream/_items.py +263 -0
  152. sempy_labs/eventstream/_topology.py +652 -0
  153. sempy_labs/graph/__init__.py +59 -0
  154. sempy_labs/graph/_groups.py +651 -0
  155. sempy_labs/graph/_sensitivity_labels.py +120 -0
  156. sempy_labs/graph/_teams.py +125 -0
  157. sempy_labs/graph/_user_licenses.py +96 -0
  158. sempy_labs/graph/_users.py +516 -0
  159. sempy_labs/graph_model/__init__.py +15 -0
  160. sempy_labs/graph_model/_background_jobs.py +63 -0
  161. sempy_labs/graph_model/_items.py +149 -0
  162. sempy_labs/lakehouse/__init__.py +67 -0
  163. sempy_labs/lakehouse/_blobs.py +247 -0
  164. sempy_labs/lakehouse/_get_lakehouse_columns.py +102 -0
  165. sempy_labs/lakehouse/_get_lakehouse_tables.py +274 -0
  166. sempy_labs/lakehouse/_helper.py +250 -0
  167. sempy_labs/lakehouse/_lakehouse.py +351 -0
  168. sempy_labs/lakehouse/_livy_sessions.py +143 -0
  169. sempy_labs/lakehouse/_materialized_lake_views.py +157 -0
  170. sempy_labs/lakehouse/_partitioning.py +165 -0
  171. sempy_labs/lakehouse/_schemas.py +217 -0
  172. sempy_labs/lakehouse/_shortcuts.py +440 -0
  173. sempy_labs/migration/__init__.py +35 -0
  174. sempy_labs/migration/_create_pqt_file.py +238 -0
  175. sempy_labs/migration/_direct_lake_to_import.py +105 -0
  176. sempy_labs/migration/_migrate_calctables_to_lakehouse.py +398 -0
  177. sempy_labs/migration/_migrate_calctables_to_semantic_model.py +148 -0
  178. sempy_labs/migration/_migrate_model_objects_to_semantic_model.py +533 -0
  179. sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py +172 -0
  180. sempy_labs/migration/_migration_validation.py +71 -0
  181. sempy_labs/migration/_refresh_calc_tables.py +131 -0
  182. sempy_labs/mirrored_azure_databricks_catalog/__init__.py +15 -0
  183. sempy_labs/mirrored_azure_databricks_catalog/_discover.py +213 -0
  184. sempy_labs/mirrored_azure_databricks_catalog/_refresh_catalog_metadata.py +45 -0
  185. sempy_labs/ml_model/__init__.py +23 -0
  186. sempy_labs/ml_model/_functions.py +427 -0
  187. sempy_labs/report/_BPAReportTemplate.json +232 -0
  188. sempy_labs/report/__init__.py +55 -0
  189. sempy_labs/report/_bpareporttemplate/.pbi/localSettings.json +9 -0
  190. sempy_labs/report/_bpareporttemplate/.platform +11 -0
  191. sempy_labs/report/_bpareporttemplate/StaticResources/SharedResources/BaseThemes/CY24SU06.json +710 -0
  192. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/page.json +11 -0
  193. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/1b08bce3bebabb0a27a8/visual.json +191 -0
  194. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/2f22ddb70c301693c165/visual.json +438 -0
  195. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/3b1182230aa6c600b43a/visual.json +127 -0
  196. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/58577ba6380c69891500/visual.json +576 -0
  197. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/a2a8fa5028b3b776c96c/visual.json +207 -0
  198. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/adfd47ef30652707b987/visual.json +506 -0
  199. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/b6a80ee459e716e170b1/visual.json +127 -0
  200. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/ce3130a721c020cc3d81/visual.json +513 -0
  201. sempy_labs/report/_bpareporttemplate/definition/pages/92735ae19b31712208ad/page.json +8 -0
  202. sempy_labs/report/_bpareporttemplate/definition/pages/92735ae19b31712208ad/visuals/66e60dfb526437cd78d1/visual.json +112 -0
  203. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/page.json +11 -0
  204. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/07deb8bce824e1be37d7/visual.json +513 -0
  205. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/0b1c68838818b32ad03b/visual.json +352 -0
  206. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/0c171de9d2683d10b930/visual.json +37 -0
  207. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/0efa01be0510e40a645e/visual.json +542 -0
  208. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/6bf2f0eb830ab53cc668/visual.json +221 -0
  209. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/88d8141cb8500b60030c/visual.json +127 -0
  210. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/a753273590beed656a03/visual.json +576 -0
  211. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/b8fdc82cddd61ac447bc/visual.json +127 -0
  212. sempy_labs/report/_bpareporttemplate/definition/pages/d37dce724a0ccc30044b/page.json +9 -0
  213. sempy_labs/report/_bpareporttemplate/definition/pages/d37dce724a0ccc30044b/visuals/ce8532a7e25020271077/visual.json +38 -0
  214. sempy_labs/report/_bpareporttemplate/definition/pages/pages.json +10 -0
  215. sempy_labs/report/_bpareporttemplate/definition/report.json +176 -0
  216. sempy_labs/report/_bpareporttemplate/definition/version.json +4 -0
  217. sempy_labs/report/_bpareporttemplate/definition.pbir +14 -0
  218. sempy_labs/report/_download_report.py +76 -0
  219. sempy_labs/report/_export_report.py +257 -0
  220. sempy_labs/report/_generate_report.py +427 -0
  221. sempy_labs/report/_paginated.py +76 -0
  222. sempy_labs/report/_report_bpa.py +354 -0
  223. sempy_labs/report/_report_bpa_rules.py +115 -0
  224. sempy_labs/report/_report_functions.py +581 -0
  225. sempy_labs/report/_report_helper.py +227 -0
  226. sempy_labs/report/_report_list_functions.py +110 -0
  227. sempy_labs/report/_report_rebind.py +149 -0
  228. sempy_labs/report/_reportwrapper.py +3100 -0
  229. sempy_labs/report/_save_report.py +147 -0
  230. sempy_labs/snowflake_database/__init__.py +10 -0
  231. sempy_labs/snowflake_database/_items.py +105 -0
  232. sempy_labs/sql_database/__init__.py +21 -0
  233. sempy_labs/sql_database/_items.py +201 -0
  234. sempy_labs/sql_database/_mirroring.py +79 -0
  235. sempy_labs/theme/__init__.py +12 -0
  236. sempy_labs/theme/_org_themes.py +129 -0
  237. sempy_labs/tom/__init__.py +3 -0
  238. sempy_labs/tom/_model.py +5977 -0
  239. sempy_labs/variable_library/__init__.py +19 -0
  240. sempy_labs/variable_library/_functions.py +403 -0
  241. sempy_labs/warehouse/__init__.py +28 -0
  242. sempy_labs/warehouse/_items.py +234 -0
  243. sempy_labs/warehouse/_restore_points.py +309 -0
sempy_labs/_vpax.py ADDED
@@ -0,0 +1,388 @@
1
+ import sempy
2
+ import re
3
+ from urllib.parse import urlparse
4
+ import sempy.fabric as fabric
5
+ import sys
6
+ from pathlib import Path
7
+ from typing import Optional
8
+ from uuid import UUID
9
+ from sempy_labs._helper_functions import (
10
+ resolve_workspace_name_and_id,
11
+ resolve_dataset_name_and_id,
12
+ resolve_lakehouse_name_and_id,
13
+ _mount,
14
+ _get_column_aggregate,
15
+ resolve_item_type,
16
+ file_exists,
17
+ create_abfss_path_from_path,
18
+ )
19
+ from sempy._utils._log import log
20
+ import sempy_labs._icons as icons
21
+ import zipfile
22
+ import requests
23
+
24
+
25
+ VPA_VERSION = "1.10.0"
26
+ NUGET_BASE_URL = "https://www.nuget.org/api/v2/package"
27
+ ASSEMBLIES = [
28
+ "Dax.Metadata",
29
+ "Dax.Model.Extractor",
30
+ "Dax.ViewVpaExport",
31
+ "Dax.Vpax",
32
+ ]
33
+
34
+ _vpa_initialized = False
35
+ current_dir = Path(__file__).parent
36
+ nuget_dir = current_dir / "nuget_dlls"
37
+
38
+
39
+ def find_lib_folder(pkg_folder: Path) -> Path:
40
+ lib_base = pkg_folder / "lib"
41
+ if not lib_base.exists():
42
+ raise FileNotFoundError(f"No 'lib' directory in package {pkg_folder}")
43
+
44
+ # Prefer netstandard2.0 if available
45
+ candidates = sorted(lib_base.iterdir())
46
+ for preferred in ["netstandard2.0", "net6.0", "net5.0", "netcoreapp3.1", "net472"]:
47
+ if (lib_base / preferred).exists():
48
+ return lib_base / preferred
49
+
50
+ # Fallback: first available folder
51
+ for candidate in candidates:
52
+ if candidate.is_dir():
53
+ return candidate
54
+
55
+ raise FileNotFoundError(f"No usable framework folder found in {lib_base}")
56
+
57
+
58
+ def download_and_extract_package(
59
+ package_name: str, version: str, target_dir: Path
60
+ ) -> Path:
61
+ nupkg_url = f"{NUGET_BASE_URL}/{package_name}/{version}"
62
+ nupkg_path = target_dir / f"{package_name}.{version}.nupkg"
63
+
64
+ if not nupkg_path.exists():
65
+ r = requests.get(nupkg_url)
66
+ r.raise_for_status()
67
+ target_dir.mkdir(parents=True, exist_ok=True)
68
+ with open(nupkg_path, "wb") as f:
69
+ f.write(r.content)
70
+
71
+ extract_path = target_dir / f"{package_name}_{version}"
72
+ if not extract_path.exists():
73
+ with zipfile.ZipFile(nupkg_path, "r") as zip_ref:
74
+ zip_ref.extractall(extract_path)
75
+ return extract_path
76
+
77
+
78
+ def download_and_load_nuget_package(
79
+ package_name, version, target_dir: Path = None, load_assembly=True
80
+ ):
81
+
82
+ from System.Reflection import Assembly
83
+
84
+ if target_dir is None:
85
+ target_dir = nuget_dir
86
+
87
+ # Download and extract
88
+ pkg_folder = download_and_extract_package(package_name, version, target_dir)
89
+ lib_folder = find_lib_folder(pkg_folder)
90
+
91
+ dll_path = lib_folder / f"{package_name}.dll"
92
+ if not dll_path.exists():
93
+ raise FileNotFoundError(f"{dll_path} not found")
94
+
95
+ sys.path.append(str(lib_folder))
96
+ if load_assembly:
97
+ Assembly.LoadFile(str(dll_path))
98
+
99
+
100
+ def init_vertipaq_analyzer():
101
+ global _vpa_initialized
102
+ if _vpa_initialized:
103
+ return
104
+
105
+ from clr_loader import get_coreclr
106
+ from pythonnet import set_runtime
107
+
108
+ # Load the runtime and set it BEFORE importing clr
109
+ runtime_config_path = current_dir / "dotnet_lib" / "dotnet.runtime.config.json"
110
+ rt = get_coreclr(runtime_config=str(runtime_config_path))
111
+ set_runtime(rt)
112
+
113
+ sempy.fabric._client._utils._init_analysis_services()
114
+
115
+ from System.Reflection import Assembly
116
+
117
+ for name in ASSEMBLIES:
118
+ download_and_load_nuget_package(
119
+ name, VPA_VERSION, nuget_dir, load_assembly=False
120
+ )
121
+
122
+ download_and_load_nuget_package("Newtonsoft.Json", "13.0.1")
123
+ download_and_load_nuget_package("System.IO.Packaging", "7.0.0")
124
+
125
+ # For some reason I have to load these after and not inside the download_and_load_nuget_package function
126
+ dll_paths = [
127
+ f"{nuget_dir}/Dax.Model.Extractor_1.10.0/lib/net6.0/Dax.Model.Extractor.dll",
128
+ f"{nuget_dir}/Dax.Metadata_1.10.0/lib/netstandard2.0/Dax.Metadata.dll",
129
+ f"{nuget_dir}/Dax.ViewVpaExport_1.10.0/lib/netstandard2.0/Dax.ViewVpaExport.dll",
130
+ f"{nuget_dir}/Dax.Vpax_1.10.0/lib/net6.0/Dax.Vpax.dll",
131
+ ]
132
+ for dll_path in dll_paths:
133
+ Assembly.LoadFile(dll_path)
134
+
135
+ _vpa_initialized = True
136
+
137
+
138
+ @log
139
+ def create_vpax(
140
+ dataset: str | UUID,
141
+ workspace: Optional[str | UUID] = None,
142
+ lakehouse: Optional[str | UUID] = None,
143
+ lakehouse_workspace: Optional[str | UUID] = None,
144
+ file_path: Optional[str] = None,
145
+ read_stats_from_data: bool = False,
146
+ read_direct_query_stats: bool = False,
147
+ direct_lake_stats_mode: str = "ResidentOnly",
148
+ overwrite: bool = False,
149
+ ):
150
+ """
151
+ Creates a .vpax file for a semantic model and saves it to a lakehouse. This is based on `SQL BI's VertiPaq Analyzer <https://www.sqlbi.com/tools/vertipaq-analyzer/>`_.
152
+
153
+ Parameters
154
+ ----------
155
+ dataset : str | uuid.UUID
156
+ Name or ID of the semantic model.
157
+ workspace : str | uuid.UUID, default=None
158
+ The workspace name or ID.
159
+ Defaults to None which resolves to the workspace of the attached lakehouse
160
+ or if no lakehouse attached, resolves to the workspace of the notebook.
161
+ lakehouse : str | uuid.UUID, default=None
162
+ The lakehouse name or ID.
163
+ Defaults to None which resolves to the attached lakehouse.
164
+ lakehouse_workspace : str | uuid.UUID, default=None
165
+ The workspace name or ID of the lakehouse.
166
+ Defaults to None which resolves to the workspace of the attached lakehouse.
167
+ file_path : str, default=None
168
+ The path where the .vpax file will be saved in the lakehouse.
169
+ Defaults to None which resolves to the dataset name.
170
+ read_stats_from_data : bool, default=False
171
+ Whether to read statistics from the data.
172
+ read_direct_query_stats : bool, default=False
173
+ Whether to analyze DirectQuery tables.
174
+ direct_lake_stats_mode : str, default='ResidentOnly'
175
+ The Direct Lake extraction mode. Options are 'ResidentOnly' or 'Full'. This parameter is ignored if read_stats_from_data is False. This parameter is only relevant for tables which use Direct Lake mode.
176
+ If set to 'ResidentOnly', column statistics are obtained only for the columns which are in memory.
177
+ If set to 'Full', column statistics are obtained for all columns - pending the proper identification of the Direct Lake source.
178
+ overwrite : bool, default=False
179
+ Whether to overwrite the .vpax file if it already exists in the lakehouse.
180
+ """
181
+
182
+ init_vertipaq_analyzer()
183
+
184
+ import notebookutils
185
+ from Dax.Metadata import DirectLakeExtractionMode
186
+ from Dax.Model.Extractor import TomExtractor
187
+ from Dax.Vpax.Tools import VpaxTools
188
+ from Dax.ViewVpaExport import Model
189
+ from System.IO import MemoryStream, FileMode, FileStream, FileAccess, FileShare
190
+
191
+ direct_lake_stats_mode = direct_lake_stats_mode.capitalize()
192
+
193
+ (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
194
+ (dataset_name, dataset_id) = resolve_dataset_name_and_id(dataset, workspace_id)
195
+ (lakehouse_workspace_name, lakehouse_workspace_id) = resolve_workspace_name_and_id(
196
+ lakehouse_workspace
197
+ )
198
+ (lakehouse_name, lakehouse_id) = resolve_lakehouse_name_and_id(
199
+ lakehouse=lakehouse, workspace=lakehouse_workspace_id
200
+ )
201
+
202
+ local_path = _mount(lakehouse=lakehouse_id, workspace=lakehouse_workspace_id)
203
+ if file_path is None:
204
+ file_path = dataset_name
205
+
206
+ if file_path.endswith(".vpax"):
207
+ file_path = file_path[:-5]
208
+ save_location = f"Files/{file_path}.vpax"
209
+ path = f"{local_path}/{save_location}"
210
+
211
+ # Check if the .vpax file already exists in the lakehouse
212
+ if not overwrite:
213
+ new_path = create_abfss_path_from_path(
214
+ lakehouse_id, lakehouse_workspace_id, save_location
215
+ )
216
+ if file_exists(new_path):
217
+ print(
218
+ f"{icons.warning} The {save_location} file already exists in the '{lakehouse_name}' lakehouse. Set overwrite=True to overwrite the file."
219
+ )
220
+ return
221
+
222
+ vpax_stream = MemoryStream()
223
+ extractor_app_name = "VPAX Notebook"
224
+ extractor_app_version = "1.0"
225
+ column_batch_size = 50
226
+ token = notebookutils.credentials.getToken("pbi")
227
+ connection_string = f"data source=powerbi://api.powerbi.com/v1.0/myorg/{workspace_name};initial catalog={dataset_name};User ID=;Password={token};Persist Security Info=True;Impersonation Level=Impersonate"
228
+
229
+ print(f"{icons.in_progress} Extracting .vpax metadata...")
230
+
231
+ # Get stats for the model; for direct lake only get is_resident
232
+ dax_model = TomExtractor.GetDaxModel(
233
+ connection_string,
234
+ extractor_app_name,
235
+ extractor_app_version,
236
+ read_stats_from_data,
237
+ 0,
238
+ read_direct_query_stats,
239
+ DirectLakeExtractionMode.ResidentOnly,
240
+ column_batch_size,
241
+ )
242
+ vpa_model = Model(dax_model)
243
+ tom_database = TomExtractor.GetDatabase(connection_string)
244
+
245
+ # Calculate Direct Lake stats for columns which are IsResident=False
246
+ from sempy_labs.tom import connect_semantic_model
247
+
248
+ with connect_semantic_model(dataset=dataset, workspace=workspace) as tom:
249
+ is_direct_lake = tom.is_direct_lake()
250
+ if read_stats_from_data and is_direct_lake and direct_lake_stats_mode == "Full":
251
+
252
+ df_not_resident = fabric.evaluate_dax(
253
+ dataset=dataset,
254
+ workspace=workspace,
255
+ dax_string=""" SELECT [DIMENSION_NAME] AS [TableName], [ATTRIBUTE_NAME] AS [ColumnName] FROM $SYSTEM.DISCOVER_STORAGE_TABLE_COLUMNS WHERE NOT [ISROWNUMBER] AND NOT [DICTIONARY_ISRESIDENT]""",
256
+ )
257
+
258
+ import Microsoft.AnalysisServices.Tabular as TOM
259
+
260
+ print(f"{icons.in_progress} Calculating Direct Lake statistics...")
261
+
262
+ # For SQL endpoints (do once)
263
+ dfI = fabric.list_items(workspace=workspace)
264
+ # Get list of tables in Direct Lake mode which have columns that are not resident
265
+ tbls = [
266
+ t
267
+ for t in tom.model.Tables
268
+ if t.Name in df_not_resident["TableName"].values
269
+ and any(p.Mode == TOM.ModeType.DirectLake for p in t.Partitions)
270
+ ]
271
+ for t in tbls:
272
+ column_cardinalities = {}
273
+ table_name = t.Name
274
+ partition = next(p for p in t.Partitions)
275
+ entity_name = partition.Source.EntityName
276
+ schema_name = partition.Source.SchemaName
277
+ if len(schema_name) == 0 or schema_name == "dbo":
278
+ schema_name = None
279
+ expr_name = partition.Source.ExpressionSource.Name
280
+ expr = tom.model.Expressions[expr_name].Expression
281
+ item_id = None
282
+ if "Sql.Database(" in expr:
283
+ matches = re.findall(r'"([^"]+)"', expr)
284
+ sql_endpoint_id = matches[1]
285
+ dfI_filt = dfI[dfI["Id"] == sql_endpoint_id]
286
+ item_name = (
287
+ dfI_filt["Display Name"].iloc[0] if not dfI_filt.empty else None
288
+ )
289
+ dfI_filt2 = dfI[
290
+ (dfI["Display Name"] == item_name)
291
+ & (dfI["Type"].isin(["Lakehouse", "Warehouse"]))
292
+ ]
293
+ item_id = dfI_filt2["Id"].iloc[0]
294
+ item_type = dfI_filt2["Type"].iloc[0]
295
+ item_workspace_id = workspace_id
296
+ elif "AzureStorage.DataLake(" in expr:
297
+ match = re.search(r'AzureStorage\.DataLake\("([^"]+)"', expr)
298
+ if match:
299
+ url = match.group(1)
300
+ path_parts = urlparse(url).path.strip("/").split("/")
301
+ if len(path_parts) >= 2:
302
+ item_workspace_id, item_id = (
303
+ path_parts[0],
304
+ path_parts[1],
305
+ )
306
+ item_type = resolve_item_type(
307
+ item_id=item_id, workspace=workspace_id
308
+ )
309
+ else:
310
+ raise NotImplementedError(
311
+ f"Direct Lake source '{expr}' is not supported. Please report this issue on GitHub (https://github.com/microsoft/semantic-link-labs/issues)."
312
+ )
313
+
314
+ if not item_id:
315
+ print(
316
+ f"{icons.info} Cannot determine the Direct Lake source of the '{table_name}' table."
317
+ )
318
+ elif item_type == "Warehouse":
319
+ print(
320
+ f"{icons.info} The '{table_name}' table references a warehouse. Warehouses are not yet supported for this method."
321
+ )
322
+ else:
323
+ df_not_resident_cols = df_not_resident[
324
+ df_not_resident["TableName"] == table_name
325
+ ]
326
+ col_dict = {
327
+ c.Name: c.SourceColumn
328
+ for c in t.Columns
329
+ if c.Type != TOM.ColumnType.RowNumber
330
+ and c.Name in df_not_resident_cols["ColumnName"].values
331
+ }
332
+ col_agg = _get_column_aggregate(
333
+ lakehouse=item_id,
334
+ workspace=item_workspace_id,
335
+ table_name=entity_name,
336
+ schema_name=schema_name,
337
+ column_name=list(col_dict.values()),
338
+ function="distinct",
339
+ )
340
+ column_cardinalities = {
341
+ column_name: col_agg[source_column]
342
+ for column_name, source_column in col_dict.items()
343
+ if source_column in col_agg
344
+ }
345
+
346
+ # Update the dax_model file with column cardinalities
347
+ tbl = next(
348
+ table
349
+ for table in dax_model.Tables
350
+ if str(table.TableName) == table_name
351
+ )
352
+ # print(
353
+ # f"{icons.in_progress} Calculating column cardinalities for the '{table_name}' table..."
354
+ # )
355
+ cols = [
356
+ col
357
+ for col in tbl.Columns
358
+ if str(col.ColumnType) != "RowNumber"
359
+ and str(col.ColumnName) in column_cardinalities
360
+ ]
361
+ for col in cols:
362
+ # print(str(col.ColumnName), col.ColumnCardinality)
363
+ col.ColumnCardinality = column_cardinalities.get(
364
+ str(col.ColumnName)
365
+ )
366
+
367
+ VpaxTools.ExportVpax(vpax_stream, dax_model, vpa_model, tom_database)
368
+
369
+ print(f"{icons.in_progress} Exporting .vpax file...")
370
+
371
+ mode = FileMode.Create
372
+ file_stream = FileStream(path, mode, FileAccess.Write, FileShare.Read)
373
+ vpax_stream.CopyTo(file_stream)
374
+ file_stream.Close()
375
+
376
+ print(
377
+ f"{icons.green_dot} The {file_path}.vpax file has been saved in the '{lakehouse_name}' lakehouse within the '{lakehouse_workspace_name}' workspace."
378
+ )
379
+
380
+
381
+ def _dax_distinctcount(table_name, columns):
382
+
383
+ dax = "EVALUATE\nROW("
384
+ for c in columns:
385
+ full_name = f"'{table_name}'[{c}]"
386
+ dax += f"""\n"{c}", DISTINCTCOUNT({full_name}),"""
387
+
388
+ return f"{dax.rstrip(',')}\n)"
@@ -0,0 +1,234 @@
1
+ from sempy_labs._helper_functions import (
2
+ resolve_workspace_name_and_id,
3
+ _base_api,
4
+ _create_dataframe,
5
+ _update_dataframe_datatypes,
6
+ delete_item,
7
+ resolve_workspace_id,
8
+ )
9
+ import pandas as pd
10
+ from typing import Optional
11
+ import sempy_labs._icons as icons
12
+ from uuid import UUID
13
+ from sempy._utils._log import log
14
+
15
+
16
+ @log
17
+ def create_warehouse(
18
+ warehouse: str,
19
+ description: Optional[str] = None,
20
+ case_insensitive_collation: bool = False,
21
+ workspace: Optional[str | UUID] = None,
22
+ ) -> UUID:
23
+ """
24
+ Creates a Fabric warehouse.
25
+
26
+ This is a wrapper function for the following API: `Items - Create Warehouse <https://learn.microsoft.com/rest/api/fabric/warehouse/items/create-warehouse>`_.
27
+
28
+ Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
29
+
30
+ Parameters
31
+ ----------
32
+ warehouse: str
33
+ Name of the warehouse.
34
+ description : str, default=None
35
+ A description of the warehouse.
36
+ case_insensitive_collation: bool, default=False
37
+ If True, creates the warehouse with case-insensitive collation.
38
+ workspace : str | uuid.UUID, default=None
39
+ The Fabric workspace name or ID.
40
+ Defaults to None which resolves to the workspace of the attached lakehouse
41
+ or if no lakehouse attached, resolves to the workspace of the notebook.
42
+
43
+ Returns
44
+ -------
45
+ uuid.UUID
46
+ The ID of the created warehouse.
47
+ """
48
+
49
+ (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
50
+
51
+ payload = {"displayName": warehouse}
52
+
53
+ if description:
54
+ payload["description"] = description
55
+ if case_insensitive_collation:
56
+ payload.setdefault("creationPayload", {})
57
+ payload["creationPayload"][
58
+ "defaultCollation"
59
+ ] = "Latin1_General_100_CI_AS_KS_WS_SC_UTF8"
60
+
61
+ result = _base_api(
62
+ request=f"/v1/workspaces/{workspace_id}/warehouses",
63
+ payload=payload,
64
+ method="post",
65
+ lro_return_json=True,
66
+ status_codes=[201, 202],
67
+ client="fabric_sp",
68
+ )
69
+
70
+ print(
71
+ f"{icons.green_dot} The '{warehouse}' warehouse has been created within the '{workspace_name}' workspace."
72
+ )
73
+
74
+ return result.get("id")
75
+
76
+
77
+ @log
78
+ def list_warehouses(workspace: Optional[str | UUID] = None) -> pd.DataFrame:
79
+ """
80
+ Shows the warehouses within a workspace.
81
+
82
+ This is a wrapper function for the following API: `Items - List Warehouses <https://learn.microsoft.com/rest/api/fabric/warehouse/items/list-warehouses>`_.
83
+
84
+ Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
85
+
86
+ Parameters
87
+ ----------
88
+ workspace : str | uuid.UUID, default=None
89
+ The Fabric workspace name or ID.
90
+ Defaults to None which resolves to the workspace of the attached lakehouse
91
+ or if no lakehouse attached, resolves to the workspace of the notebook.
92
+
93
+ Returns
94
+ -------
95
+ pandas.DataFrame
96
+ A pandas dataframe showing the warehouses within a workspace.
97
+ """
98
+
99
+ columns = {
100
+ "Warehouse Name": "string",
101
+ "Warehouse Id": "string",
102
+ "Description": "string",
103
+ "Connection Info": "string",
104
+ "Created Date": "datetime",
105
+ "Last Updated Time": "datetime",
106
+ }
107
+ df = _create_dataframe(columns=columns)
108
+
109
+ workspace_id = resolve_workspace_id(workspace)
110
+
111
+ responses = _base_api(
112
+ request=f"/v1/workspaces/{workspace_id}/warehouses",
113
+ uses_pagination=True,
114
+ client="fabric_sp",
115
+ )
116
+
117
+ rows = []
118
+ for r in responses:
119
+ for v in r.get("value", []):
120
+ prop = v.get("properties", {})
121
+
122
+ rows.append(
123
+ {
124
+ "Warehouse Name": v.get("displayName"),
125
+ "Warehouse Id": v.get("id"),
126
+ "Description": v.get("description"),
127
+ "Connection Info": prop.get("connectionInfo"),
128
+ "Created Date": prop.get("createdDate"),
129
+ "Last Updated Time": prop.get("lastUpdatedTime"),
130
+ }
131
+ )
132
+
133
+ if rows:
134
+ df = pd.DataFrame(rows, columns=list(columns.keys()))
135
+ _update_dataframe_datatypes(dataframe=df, column_map=columns)
136
+
137
+ return df
138
+
139
+
140
+ @log
141
+ def delete_warehouse(name: str | UUID, workspace: Optional[str | UUID] = None):
142
+ """
143
+ Deletes a Fabric warehouse.
144
+
145
+ This is a wrapper function for the following API: `Items - Delete Warehouse <https://learn.microsoft.com/rest/api/fabric/warehouse/items/delete-warehouse>`_.
146
+
147
+ Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
148
+
149
+ Parameters
150
+ ----------
151
+ name: str | uuid.UUID
152
+ Name or ID of the warehouse.
153
+ workspace : str | uuid.UUID, default=None
154
+ The Fabric workspace name or ID.
155
+ Defaults to None which resolves to the workspace of the attached lakehouse
156
+ or if no lakehouse attached, resolves to the workspace of the notebook.
157
+ """
158
+
159
+ delete_item(item=name, type="Warehouse", workspace=workspace)
160
+
161
+
162
+ @log
163
+ def get_warehouse_tables(
164
+ warehouse: str | UUID, workspace: Optional[str | UUID] = None
165
+ ) -> pd.DataFrame:
166
+ """
167
+ Shows a list of the tables in the Fabric warehouse. This function is based on INFORMATION_SCHEMA.TABLES.
168
+
169
+ Parameters
170
+ ----------
171
+ warehouse : str | uuid.UUID
172
+ Name or ID of the Fabric warehouse.
173
+ workspace : str | uuid.UUID, default=None
174
+ The Fabric workspace name or ID.
175
+ Defaults to None which resolves to the workspace of the attached lakehouse
176
+ or if no lakehouse attached, resolves to the workspace of the notebook.
177
+
178
+ Returns
179
+ -------
180
+ pandas.DataFrame
181
+ A pandas dataframe showing a list of the tables in the Fabric warehouse.
182
+ """
183
+
184
+ from sempy_labs._sql import ConnectWarehouse
185
+
186
+ with ConnectWarehouse(warehouse=warehouse, workspace=workspace) as sql:
187
+ df = sql.query(
188
+ """
189
+ SELECT TABLE_SCHEMA AS [Schema], TABLE_NAME AS [Table Name], TABLE_TYPE AS [Table Type]
190
+ FROM INFORMATION_SCHEMA.TABLES
191
+ WHERE TABLE_TYPE = 'BASE TABLE'
192
+ """
193
+ )
194
+
195
+ return df
196
+
197
+
198
+ @log
199
+ def get_warehouse_columns(
200
+ warehouse: str | UUID, workspace: Optional[str | UUID] = None
201
+ ) -> pd.DataFrame:
202
+ """
203
+ Shows a list of the columns in each table within the Fabric warehouse. This function is based on INFORMATION_SCHEMA.COLUMNS.
204
+
205
+ Parameters
206
+ ----------
207
+ warehouse : str | uuid.UUID
208
+ Name or ID of the Fabric warehouse.
209
+ workspace : str | uuid.UUID, default=None
210
+ The Fabric workspace name or ID.
211
+ Defaults to None which resolves to the workspace of the attached lakehouse
212
+ or if no lakehouse attached, resolves to the workspace of the notebook.
213
+
214
+ Returns
215
+ -------
216
+ pandas.DataFrame
217
+ A pandas dataframe showing a list of the columns in each table within the Fabric warehouse.
218
+ """
219
+
220
+ from sempy_labs._sql import ConnectWarehouse
221
+
222
+ with ConnectWarehouse(warehouse=warehouse, workspace=workspace) as sql:
223
+ df = sql.query(
224
+ """
225
+ SELECT t.TABLE_SCHEMA AS [Schema], t.TABLE_NAME AS [Table Name], c.COLUMN_NAME AS [Column Name], c.DATA_TYPE AS [Data Type], c.IS_NULLABLE AS [Is Nullable], c.CHARACTER_MAXIMUM_LENGTH AS [Character Max Length]
226
+ FROM INFORMATION_SCHEMA.TABLES AS t
227
+ LEFT JOIN INFORMATION_SCHEMA.COLUMNS AS c
228
+ ON t.TABLE_NAME = c.TABLE_NAME
229
+ AND t.TABLE_SCHEMA = c.TABLE_SCHEMA
230
+ WHERE t.TABLE_TYPE = 'BASE TABLE'
231
+ """
232
+ )
233
+
234
+ return df