semantic-link-labs 0.12.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (243) hide show
  1. semantic_link_labs-0.12.8.dist-info/METADATA +354 -0
  2. semantic_link_labs-0.12.8.dist-info/RECORD +243 -0
  3. semantic_link_labs-0.12.8.dist-info/WHEEL +5 -0
  4. semantic_link_labs-0.12.8.dist-info/licenses/LICENSE +21 -0
  5. semantic_link_labs-0.12.8.dist-info/top_level.txt +1 -0
  6. sempy_labs/__init__.py +606 -0
  7. sempy_labs/_a_lib_info.py +2 -0
  8. sempy_labs/_ai.py +437 -0
  9. sempy_labs/_authentication.py +264 -0
  10. sempy_labs/_bpa_translation/_model/_translations_am-ET.po +869 -0
  11. sempy_labs/_bpa_translation/_model/_translations_ar-AE.po +908 -0
  12. sempy_labs/_bpa_translation/_model/_translations_bg-BG.po +968 -0
  13. sempy_labs/_bpa_translation/_model/_translations_ca-ES.po +963 -0
  14. sempy_labs/_bpa_translation/_model/_translations_cs-CZ.po +943 -0
  15. sempy_labs/_bpa_translation/_model/_translations_da-DK.po +945 -0
  16. sempy_labs/_bpa_translation/_model/_translations_de-DE.po +988 -0
  17. sempy_labs/_bpa_translation/_model/_translations_el-GR.po +993 -0
  18. sempy_labs/_bpa_translation/_model/_translations_es-ES.po +971 -0
  19. sempy_labs/_bpa_translation/_model/_translations_fa-IR.po +933 -0
  20. sempy_labs/_bpa_translation/_model/_translations_fi-FI.po +942 -0
  21. sempy_labs/_bpa_translation/_model/_translations_fr-FR.po +994 -0
  22. sempy_labs/_bpa_translation/_model/_translations_ga-IE.po +967 -0
  23. sempy_labs/_bpa_translation/_model/_translations_he-IL.po +902 -0
  24. sempy_labs/_bpa_translation/_model/_translations_hi-IN.po +944 -0
  25. sempy_labs/_bpa_translation/_model/_translations_hu-HU.po +963 -0
  26. sempy_labs/_bpa_translation/_model/_translations_id-ID.po +946 -0
  27. sempy_labs/_bpa_translation/_model/_translations_is-IS.po +939 -0
  28. sempy_labs/_bpa_translation/_model/_translations_it-IT.po +986 -0
  29. sempy_labs/_bpa_translation/_model/_translations_ja-JP.po +846 -0
  30. sempy_labs/_bpa_translation/_model/_translations_ko-KR.po +839 -0
  31. sempy_labs/_bpa_translation/_model/_translations_mt-MT.po +967 -0
  32. sempy_labs/_bpa_translation/_model/_translations_nl-NL.po +978 -0
  33. sempy_labs/_bpa_translation/_model/_translations_pl-PL.po +962 -0
  34. sempy_labs/_bpa_translation/_model/_translations_pt-BR.po +962 -0
  35. sempy_labs/_bpa_translation/_model/_translations_pt-PT.po +957 -0
  36. sempy_labs/_bpa_translation/_model/_translations_ro-RO.po +968 -0
  37. sempy_labs/_bpa_translation/_model/_translations_ru-RU.po +964 -0
  38. sempy_labs/_bpa_translation/_model/_translations_sk-SK.po +952 -0
  39. sempy_labs/_bpa_translation/_model/_translations_sl-SL.po +950 -0
  40. sempy_labs/_bpa_translation/_model/_translations_sv-SE.po +942 -0
  41. sempy_labs/_bpa_translation/_model/_translations_ta-IN.po +976 -0
  42. sempy_labs/_bpa_translation/_model/_translations_te-IN.po +947 -0
  43. sempy_labs/_bpa_translation/_model/_translations_th-TH.po +924 -0
  44. sempy_labs/_bpa_translation/_model/_translations_tr-TR.po +953 -0
  45. sempy_labs/_bpa_translation/_model/_translations_uk-UA.po +961 -0
  46. sempy_labs/_bpa_translation/_model/_translations_zh-CN.po +804 -0
  47. sempy_labs/_bpa_translation/_model/_translations_zu-ZA.po +969 -0
  48. sempy_labs/_capacities.py +1198 -0
  49. sempy_labs/_capacity_migration.py +660 -0
  50. sempy_labs/_clear_cache.py +351 -0
  51. sempy_labs/_connections.py +610 -0
  52. sempy_labs/_dashboards.py +69 -0
  53. sempy_labs/_data_access_security.py +98 -0
  54. sempy_labs/_data_pipelines.py +162 -0
  55. sempy_labs/_dataflows.py +668 -0
  56. sempy_labs/_dax.py +501 -0
  57. sempy_labs/_daxformatter.py +80 -0
  58. sempy_labs/_delta_analyzer.py +467 -0
  59. sempy_labs/_delta_analyzer_history.py +301 -0
  60. sempy_labs/_dictionary_diffs.py +221 -0
  61. sempy_labs/_documentation.py +147 -0
  62. sempy_labs/_domains.py +51 -0
  63. sempy_labs/_eventhouses.py +182 -0
  64. sempy_labs/_external_data_shares.py +230 -0
  65. sempy_labs/_gateways.py +521 -0
  66. sempy_labs/_generate_semantic_model.py +521 -0
  67. sempy_labs/_get_connection_string.py +84 -0
  68. sempy_labs/_git.py +543 -0
  69. sempy_labs/_graphQL.py +90 -0
  70. sempy_labs/_helper_functions.py +2833 -0
  71. sempy_labs/_icons.py +149 -0
  72. sempy_labs/_job_scheduler.py +609 -0
  73. sempy_labs/_kql_databases.py +149 -0
  74. sempy_labs/_kql_querysets.py +124 -0
  75. sempy_labs/_kusto.py +137 -0
  76. sempy_labs/_labels.py +124 -0
  77. sempy_labs/_list_functions.py +1720 -0
  78. sempy_labs/_managed_private_endpoints.py +253 -0
  79. sempy_labs/_mirrored_databases.py +416 -0
  80. sempy_labs/_mirrored_warehouses.py +60 -0
  81. sempy_labs/_ml_experiments.py +113 -0
  82. sempy_labs/_model_auto_build.py +140 -0
  83. sempy_labs/_model_bpa.py +557 -0
  84. sempy_labs/_model_bpa_bulk.py +378 -0
  85. sempy_labs/_model_bpa_rules.py +859 -0
  86. sempy_labs/_model_dependencies.py +343 -0
  87. sempy_labs/_mounted_data_factories.py +123 -0
  88. sempy_labs/_notebooks.py +441 -0
  89. sempy_labs/_one_lake_integration.py +151 -0
  90. sempy_labs/_onelake.py +131 -0
  91. sempy_labs/_query_scale_out.py +433 -0
  92. sempy_labs/_refresh_semantic_model.py +435 -0
  93. sempy_labs/_semantic_models.py +468 -0
  94. sempy_labs/_spark.py +455 -0
  95. sempy_labs/_sql.py +241 -0
  96. sempy_labs/_sql_audit_settings.py +207 -0
  97. sempy_labs/_sql_endpoints.py +214 -0
  98. sempy_labs/_tags.py +201 -0
  99. sempy_labs/_translations.py +43 -0
  100. sempy_labs/_user_delegation_key.py +44 -0
  101. sempy_labs/_utils.py +79 -0
  102. sempy_labs/_vertipaq.py +1021 -0
  103. sempy_labs/_vpax.py +388 -0
  104. sempy_labs/_warehouses.py +234 -0
  105. sempy_labs/_workloads.py +140 -0
  106. sempy_labs/_workspace_identity.py +72 -0
  107. sempy_labs/_workspaces.py +595 -0
  108. sempy_labs/admin/__init__.py +170 -0
  109. sempy_labs/admin/_activities.py +167 -0
  110. sempy_labs/admin/_apps.py +145 -0
  111. sempy_labs/admin/_artifacts.py +65 -0
  112. sempy_labs/admin/_basic_functions.py +463 -0
  113. sempy_labs/admin/_capacities.py +508 -0
  114. sempy_labs/admin/_dataflows.py +45 -0
  115. sempy_labs/admin/_datasets.py +186 -0
  116. sempy_labs/admin/_domains.py +522 -0
  117. sempy_labs/admin/_external_data_share.py +100 -0
  118. sempy_labs/admin/_git.py +72 -0
  119. sempy_labs/admin/_items.py +265 -0
  120. sempy_labs/admin/_labels.py +211 -0
  121. sempy_labs/admin/_reports.py +241 -0
  122. sempy_labs/admin/_scanner.py +118 -0
  123. sempy_labs/admin/_shared.py +82 -0
  124. sempy_labs/admin/_sharing_links.py +110 -0
  125. sempy_labs/admin/_tags.py +131 -0
  126. sempy_labs/admin/_tenant.py +503 -0
  127. sempy_labs/admin/_tenant_keys.py +89 -0
  128. sempy_labs/admin/_users.py +140 -0
  129. sempy_labs/admin/_workspaces.py +236 -0
  130. sempy_labs/deployment_pipeline/__init__.py +23 -0
  131. sempy_labs/deployment_pipeline/_items.py +580 -0
  132. sempy_labs/directlake/__init__.py +57 -0
  133. sempy_labs/directlake/_autosync.py +58 -0
  134. sempy_labs/directlake/_directlake_schema_compare.py +120 -0
  135. sempy_labs/directlake/_directlake_schema_sync.py +161 -0
  136. sempy_labs/directlake/_dl_helper.py +274 -0
  137. sempy_labs/directlake/_generate_shared_expression.py +94 -0
  138. sempy_labs/directlake/_get_directlake_lakehouse.py +62 -0
  139. sempy_labs/directlake/_get_shared_expression.py +34 -0
  140. sempy_labs/directlake/_guardrails.py +96 -0
  141. sempy_labs/directlake/_list_directlake_model_calc_tables.py +70 -0
  142. sempy_labs/directlake/_show_unsupported_directlake_objects.py +90 -0
  143. sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py +239 -0
  144. sempy_labs/directlake/_update_directlake_partition_entity.py +259 -0
  145. sempy_labs/directlake/_warm_cache.py +236 -0
  146. sempy_labs/dotnet_lib/dotnet.runtime.config.json +10 -0
  147. sempy_labs/environment/__init__.py +23 -0
  148. sempy_labs/environment/_items.py +212 -0
  149. sempy_labs/environment/_pubstage.py +223 -0
  150. sempy_labs/eventstream/__init__.py +37 -0
  151. sempy_labs/eventstream/_items.py +263 -0
  152. sempy_labs/eventstream/_topology.py +652 -0
  153. sempy_labs/graph/__init__.py +59 -0
  154. sempy_labs/graph/_groups.py +651 -0
  155. sempy_labs/graph/_sensitivity_labels.py +120 -0
  156. sempy_labs/graph/_teams.py +125 -0
  157. sempy_labs/graph/_user_licenses.py +96 -0
  158. sempy_labs/graph/_users.py +516 -0
  159. sempy_labs/graph_model/__init__.py +15 -0
  160. sempy_labs/graph_model/_background_jobs.py +63 -0
  161. sempy_labs/graph_model/_items.py +149 -0
  162. sempy_labs/lakehouse/__init__.py +67 -0
  163. sempy_labs/lakehouse/_blobs.py +247 -0
  164. sempy_labs/lakehouse/_get_lakehouse_columns.py +102 -0
  165. sempy_labs/lakehouse/_get_lakehouse_tables.py +274 -0
  166. sempy_labs/lakehouse/_helper.py +250 -0
  167. sempy_labs/lakehouse/_lakehouse.py +351 -0
  168. sempy_labs/lakehouse/_livy_sessions.py +143 -0
  169. sempy_labs/lakehouse/_materialized_lake_views.py +157 -0
  170. sempy_labs/lakehouse/_partitioning.py +165 -0
  171. sempy_labs/lakehouse/_schemas.py +217 -0
  172. sempy_labs/lakehouse/_shortcuts.py +440 -0
  173. sempy_labs/migration/__init__.py +35 -0
  174. sempy_labs/migration/_create_pqt_file.py +238 -0
  175. sempy_labs/migration/_direct_lake_to_import.py +105 -0
  176. sempy_labs/migration/_migrate_calctables_to_lakehouse.py +398 -0
  177. sempy_labs/migration/_migrate_calctables_to_semantic_model.py +148 -0
  178. sempy_labs/migration/_migrate_model_objects_to_semantic_model.py +533 -0
  179. sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py +172 -0
  180. sempy_labs/migration/_migration_validation.py +71 -0
  181. sempy_labs/migration/_refresh_calc_tables.py +131 -0
  182. sempy_labs/mirrored_azure_databricks_catalog/__init__.py +15 -0
  183. sempy_labs/mirrored_azure_databricks_catalog/_discover.py +213 -0
  184. sempy_labs/mirrored_azure_databricks_catalog/_refresh_catalog_metadata.py +45 -0
  185. sempy_labs/ml_model/__init__.py +23 -0
  186. sempy_labs/ml_model/_functions.py +427 -0
  187. sempy_labs/report/_BPAReportTemplate.json +232 -0
  188. sempy_labs/report/__init__.py +55 -0
  189. sempy_labs/report/_bpareporttemplate/.pbi/localSettings.json +9 -0
  190. sempy_labs/report/_bpareporttemplate/.platform +11 -0
  191. sempy_labs/report/_bpareporttemplate/StaticResources/SharedResources/BaseThemes/CY24SU06.json +710 -0
  192. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/page.json +11 -0
  193. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/1b08bce3bebabb0a27a8/visual.json +191 -0
  194. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/2f22ddb70c301693c165/visual.json +438 -0
  195. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/3b1182230aa6c600b43a/visual.json +127 -0
  196. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/58577ba6380c69891500/visual.json +576 -0
  197. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/a2a8fa5028b3b776c96c/visual.json +207 -0
  198. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/adfd47ef30652707b987/visual.json +506 -0
  199. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/b6a80ee459e716e170b1/visual.json +127 -0
  200. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/ce3130a721c020cc3d81/visual.json +513 -0
  201. sempy_labs/report/_bpareporttemplate/definition/pages/92735ae19b31712208ad/page.json +8 -0
  202. sempy_labs/report/_bpareporttemplate/definition/pages/92735ae19b31712208ad/visuals/66e60dfb526437cd78d1/visual.json +112 -0
  203. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/page.json +11 -0
  204. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/07deb8bce824e1be37d7/visual.json +513 -0
  205. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/0b1c68838818b32ad03b/visual.json +352 -0
  206. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/0c171de9d2683d10b930/visual.json +37 -0
  207. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/0efa01be0510e40a645e/visual.json +542 -0
  208. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/6bf2f0eb830ab53cc668/visual.json +221 -0
  209. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/88d8141cb8500b60030c/visual.json +127 -0
  210. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/a753273590beed656a03/visual.json +576 -0
  211. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/b8fdc82cddd61ac447bc/visual.json +127 -0
  212. sempy_labs/report/_bpareporttemplate/definition/pages/d37dce724a0ccc30044b/page.json +9 -0
  213. sempy_labs/report/_bpareporttemplate/definition/pages/d37dce724a0ccc30044b/visuals/ce8532a7e25020271077/visual.json +38 -0
  214. sempy_labs/report/_bpareporttemplate/definition/pages/pages.json +10 -0
  215. sempy_labs/report/_bpareporttemplate/definition/report.json +176 -0
  216. sempy_labs/report/_bpareporttemplate/definition/version.json +4 -0
  217. sempy_labs/report/_bpareporttemplate/definition.pbir +14 -0
  218. sempy_labs/report/_download_report.py +76 -0
  219. sempy_labs/report/_export_report.py +257 -0
  220. sempy_labs/report/_generate_report.py +427 -0
  221. sempy_labs/report/_paginated.py +76 -0
  222. sempy_labs/report/_report_bpa.py +354 -0
  223. sempy_labs/report/_report_bpa_rules.py +115 -0
  224. sempy_labs/report/_report_functions.py +581 -0
  225. sempy_labs/report/_report_helper.py +227 -0
  226. sempy_labs/report/_report_list_functions.py +110 -0
  227. sempy_labs/report/_report_rebind.py +149 -0
  228. sempy_labs/report/_reportwrapper.py +3100 -0
  229. sempy_labs/report/_save_report.py +147 -0
  230. sempy_labs/snowflake_database/__init__.py +10 -0
  231. sempy_labs/snowflake_database/_items.py +105 -0
  232. sempy_labs/sql_database/__init__.py +21 -0
  233. sempy_labs/sql_database/_items.py +201 -0
  234. sempy_labs/sql_database/_mirroring.py +79 -0
  235. sempy_labs/theme/__init__.py +12 -0
  236. sempy_labs/theme/_org_themes.py +129 -0
  237. sempy_labs/tom/__init__.py +3 -0
  238. sempy_labs/tom/_model.py +5977 -0
  239. sempy_labs/variable_library/__init__.py +19 -0
  240. sempy_labs/variable_library/_functions.py +403 -0
  241. sempy_labs/warehouse/__init__.py +28 -0
  242. sempy_labs/warehouse/_items.py +234 -0
  243. sempy_labs/warehouse/_restore_points.py +309 -0
@@ -0,0 +1,301 @@
1
+ import pandas as pd
2
+ from typing import Optional
3
+ import pyarrow.parquet as pq
4
+ from sempy_labs._helper_functions import (
5
+ create_abfss_path,
6
+ resolve_workspace_id,
7
+ resolve_lakehouse_id,
8
+ _mount,
9
+ )
10
+ from sempy._utils._log import log
11
+ from tqdm.auto import tqdm
12
+ from uuid import UUID
13
+ from datetime import datetime
14
+
15
+
16
+ @log
17
+ def delta_analyzer_history(
18
+ table_name: str,
19
+ schema: Optional[str] = None,
20
+ lakehouse: Optional[str | UUID] = None,
21
+ workspace: Optional[str | UUID] = None,
22
+ ) -> pd.DataFrame:
23
+ """
24
+ Analyzes the transaction log for a specified delta table and shows the results in dataframe. One row per data modification operation.
25
+
26
+ Keeps track on the number of Parquet files, rowgroups, file size and #rows impacted by each change.
27
+
28
+ Incremental Framing effect: 100% = highly effective, 0% = no benefit at all
29
+
30
+ Parameters
31
+ ----------
32
+ table_name : str
33
+ The delta table name.
34
+ schema : str, default=None
35
+ The schema name of the delta table.
36
+ lakehouse : str | uuid.UUID, default=None
37
+ The Fabric lakehouse name or ID.
38
+ Defaults to None which resolves to the lakehouse attached to the notebook.
39
+ workspace : str | uuid.UUID, default=None
40
+ The Fabric workspace name or ID used by the lakehouse.
41
+ Defaults to None which resolves to the workspace of the attached lakehouse
42
+ or if no lakehouse attached, resolves to the workspace of the notebook.
43
+
44
+ Returns
45
+ -------
46
+ pandas.DataFrame
47
+ Displays a gantt visual showing a timeline for individual parquet files.
48
+ """
49
+
50
+ import notebookutils
51
+ from IPython.display import display, HTML
52
+
53
+ workspace_id = resolve_workspace_id(workspace=workspace)
54
+ lakehouse_id = resolve_lakehouse_id(lakehouse=lakehouse, workspace=workspace)
55
+
56
+ table_path = create_abfss_path(lakehouse_id, workspace_id, table_name, schema)
57
+ local_path = _mount(lakehouse=lakehouse, workspace=workspace)
58
+ if schema: # use schema if specified
59
+ table_path_local = f"{local_path}/Tables/{schema}/{table_name}"
60
+ else:
61
+ table_path_local = f"{local_path}/Tables/{table_name}"
62
+ delta_table_path = f"{table_path}/_delta_log"
63
+
64
+ files = notebookutils.fs.ls(delta_table_path)
65
+ json_files = [file.name for file in files if file.name.endswith(".json")]
66
+
67
+ element_version = total_size = total_rows = total_files = total_rowgroups = 0
68
+ changes_array = []
69
+ parquet_files = []
70
+ my_date_time_format = "%Y-%m-%d %H:%M:%S.%f"
71
+ now_to_epoch = datetime.now().strftime(my_date_time_format)
72
+ num_latest_files = len(json_files)
73
+
74
+ for idx, file in enumerate(bar := tqdm(json_files), start=1):
75
+ bar.set_description(
76
+ f"Analyzing the '{file}' parquet file ({idx}/{num_latest_files})..."
77
+ )
78
+
79
+ change_timestamp = datetime.strptime(
80
+ "2001-01-01 12:00:00.000", my_date_time_format
81
+ )
82
+ df = pd.read_json(f"{delta_table_path}/{file}", lines=True)
83
+
84
+ rows_added = size_added = rows_deleted = size_deleted = files_added = (
85
+ files_removed
86
+ ) = row_groups_added = row_groups_removed = 0
87
+ total_files_before_change = total_files
88
+ total_row_groups_before_change = total_rowgroups
89
+ operation = predicate = tags = ""
90
+
91
+ for _, row in df.iterrows():
92
+ add_row = row.get("add")
93
+ remove_row = row.get("remove")
94
+ commit_row = row.get("commitInfo")
95
+
96
+ if isinstance(add_row, dict):
97
+ file_name = add_row["path"]
98
+ fs_filename = f"{table_path}/{file_name}"
99
+ size_added += add_row["size"]
100
+ files_added += 1
101
+ filerows_added = 0
102
+
103
+ if notebookutils.fs.exists(fs_filename):
104
+ parquet_file = pq.ParquetFile(table_path_local + f"/{file_name}")
105
+ for i in range(parquet_file.num_row_groups):
106
+ row_group = parquet_file.metadata.row_group(i)
107
+ num_rows = row_group.num_rows
108
+ filerows_added += num_rows
109
+ rows_added += num_rows
110
+
111
+ row_groups_added += parquet_file.num_row_groups
112
+
113
+ start = str(
114
+ datetime.fromtimestamp(add_row["modificationTime"] / 1000.0)
115
+ )
116
+ parquet_files.append(
117
+ {
118
+ "file": file_name,
119
+ "start": start,
120
+ "end": now_to_epoch,
121
+ "rows": filerows_added,
122
+ "isCurrent": 1,
123
+ }
124
+ )
125
+
126
+ if isinstance(remove_row, dict):
127
+ file_name = remove_row["path"]
128
+ fs_filename = f"{table_path}/{file_name}"
129
+
130
+ if notebookutils.fs.exists(fs_filename):
131
+ parquet_file = pq.ParquetFile(table_path_local + f"/{file_name}")
132
+ for i in range(parquet_file.num_row_groups):
133
+ row_group = parquet_file.metadata.row_group(i)
134
+ num_rows = row_group.num_rows
135
+ rows_deleted += num_rows
136
+
137
+ files_removed += 1
138
+ size_deleted += remove_row.get("size", 0)
139
+ row_groups_removed += parquet_file.num_row_groups
140
+
141
+ result = next(
142
+ (row for row in parquet_files if row["file"] == file_name), None
143
+ )
144
+ if result:
145
+ result.update(
146
+ {
147
+ "isCurrent": 0,
148
+ "end": str(
149
+ datetime.fromtimestamp(
150
+ remove_row["deletionTimestamp"] / 1000.0
151
+ )
152
+ ),
153
+ }
154
+ )
155
+
156
+ if isinstance(commit_row, dict):
157
+ operation = commit_row.get("operation")
158
+ tags = commit_row.get("tags")
159
+ predicate = commit_row.get("operationParameters", {}).get("predicate")
160
+
161
+ if operation == "VACUUM START":
162
+ operation_metrics = commit_row.get("operationMetrics", {})
163
+ total_files -= int(operation_metrics.get("numFilesToDelete", 0))
164
+ total_size -= int(operation_metrics.get("sizeOfDataToDelete", 0))
165
+
166
+ change_timestamp = datetime.fromtimestamp(
167
+ commit_row["timestamp"] / 1000.0
168
+ )
169
+
170
+ total_size += size_added - size_deleted
171
+ total_rows += rows_added - rows_deleted
172
+ total_files += files_added - files_removed
173
+ total_rowgroups += row_groups_added - row_groups_removed
174
+
175
+ incremental_framing_effect = 1
176
+ if size_deleted != 0:
177
+ incremental_framing_effect = (
178
+ int((total_size - size_added * 1.0) / total_size * 100000) / 1000
179
+ )
180
+ # incrementalFramingEffect = round(
181
+ # (totalSize - sizeAdded * 1.0) / totalSize, 4
182
+ # )
183
+
184
+ changes_array.append(
185
+ [
186
+ element_version,
187
+ operation,
188
+ predicate,
189
+ change_timestamp,
190
+ incremental_framing_effect,
191
+ files_added,
192
+ files_removed,
193
+ total_files_before_change - files_removed,
194
+ total_files,
195
+ size_added,
196
+ size_deleted,
197
+ total_size,
198
+ row_groups_added,
199
+ row_groups_removed,
200
+ total_row_groups_before_change - row_groups_removed,
201
+ total_rowgroups,
202
+ rows_added,
203
+ rows_deleted,
204
+ rows_added - rows_deleted,
205
+ total_rows,
206
+ tags,
207
+ ]
208
+ )
209
+
210
+ element_version += 1
211
+
212
+ # /********************************************************************************************************************
213
+ # Display Gantt Chart of files
214
+ # ********************************************************************************************************************/
215
+ spec: str = (
216
+ """{
217
+ "$$schema": 'https://vega.github.io/schema/vega-lite/v2.json',
218
+ "description": "A simple bar chart with ranged data (aka Gantt Chart).",
219
+ "width" : 1024 ,
220
+ "data": {
221
+ "values": %s
222
+ },
223
+ "layer":[
224
+ {"mark": "bar"},
225
+ {"mark": {
226
+ "type": "text",
227
+ "align": "center",
228
+ "baseline": "middle",
229
+ "dx": 40
230
+ },
231
+ "encoding": {
232
+ "text": {"field": "rows", "type": "quantitative", "format":","},
233
+ "color":{
234
+ "condition": {"test": "datum['isCurrent'] == 1", "value": "black"},
235
+ "value": "black"
236
+ }
237
+ }
238
+ }],
239
+ "encoding": {
240
+ "y": {"field": "file", "type": "ordinal","sort": "isCurrent","title":null,"axis":{"labelPadding":15,"labelLimit":360}},
241
+ "x": {"field": "start", "type": "temporal","title":null},
242
+ "x2": {"field": "end", "type": "temporal","title":null},
243
+ "color": {
244
+ "field": "isCurrent",
245
+ "scale": {"range": ["silver", "#ca8861"]}
246
+ }
247
+ }
248
+ }"""
249
+ % (parquet_files)
250
+ )
251
+
252
+ display(
253
+ HTML(
254
+ """
255
+ <!DOCTYPE html>
256
+ <html>
257
+ <head>
258
+ <script src="https://cdn.jsdelivr.net/npm/vega@5"></script>
259
+ <script src="https://cdn.jsdelivr.net/npm/vega-lite@5"></script>
260
+ <script src="https://cdn.jsdelivr.net/npm/vega-embed@6"></script>
261
+ </head>
262
+ <body>
263
+ <div id="vis"></div>
264
+ <script type="text/javascript">
265
+ var spec = """
266
+ + spec
267
+ + """;
268
+ var opt = {"renderer": "canvas", "actions": false};
269
+ vegaEmbed("#vis", spec, opt);
270
+ </script>
271
+ </body>
272
+ </html>"""
273
+ )
274
+ )
275
+
276
+ return pd.DataFrame(
277
+ changes_array,
278
+ columns=[
279
+ "Change Number",
280
+ "Change Type",
281
+ "Predicate",
282
+ "Modification Time",
283
+ "Incremental Effect",
284
+ "Files Added",
285
+ "Files Removed",
286
+ "Files Preserved",
287
+ "Files After Change",
288
+ "Size Added",
289
+ "Sized Removed",
290
+ "Size After Change",
291
+ "Rowgroups Added",
292
+ "Rowgroups Removed",
293
+ "Rowgroups Preserved",
294
+ "Rowgroups After Change",
295
+ "Rows Added",
296
+ "Rows Removed",
297
+ "Rows Delta",
298
+ "Rows After Change",
299
+ "Tags",
300
+ ],
301
+ )
@@ -0,0 +1,221 @@
1
+ import re
2
+ import json
3
+ import difflib
4
+ from collections import defaultdict
5
+
6
+
7
+ def color_text(text, color_code):
8
+ return f"\033[{color_code}m{text}\033[0m"
9
+
10
+
11
+ def stringify(payload):
12
+ try:
13
+ if isinstance(payload, list):
14
+ return (
15
+ "[\n" + ",\n".join(f" {json.dumps(item)}" for item in payload) + "\n]"
16
+ )
17
+ return json.dumps(payload, indent=2, sort_keys=True)
18
+ except Exception:
19
+ return str(payload)
20
+
21
+
22
+ def extract_top_level_group(path):
23
+ # For something like: resourcePackages[1].items[1].name → resourcePackages[1].items[1]
24
+ segments = re.split(r"\.(?![^[]*\])", path) # split on dots not in brackets
25
+ return ".".join(segments[:-1]) if len(segments) > 1 else segments[0]
26
+
27
+
28
+ def get_by_path(obj, path):
29
+ """Navigate into nested dict/list based on a dot/bracket path like: a.b[1].c"""
30
+ tokens = re.findall(r"\w+|\[\d+\]", path)
31
+ for token in tokens:
32
+ if token.startswith("["):
33
+ index = int(token[1:-1])
34
+ obj = obj[index]
35
+ else:
36
+ obj = obj.get(token)
37
+ return obj
38
+
39
+
40
+ def deep_diff(d1, d2, path=""):
41
+ diffs = []
42
+ if isinstance(d1, dict) and isinstance(d2, dict):
43
+ keys = set(d1) | set(d2)
44
+ for key in sorted(keys):
45
+ new_path = f"{path}.{key}" if path else key
46
+ if key not in d1:
47
+ diffs.append(("+", new_path, None, d2[key]))
48
+ elif key not in d2:
49
+ diffs.append(("-", new_path, d1[key], None))
50
+ else:
51
+ diffs.extend(deep_diff(d1[key], d2[key], new_path))
52
+ elif isinstance(d1, list) and isinstance(d2, list):
53
+ min_len = min(len(d1), len(d2))
54
+ list_changed = False
55
+ for i in range(min_len):
56
+ if d1[i] != d2[i]:
57
+ list_changed = True
58
+ break
59
+ if list_changed or len(d1) != len(d2):
60
+ diffs.append(("~", path, d1, d2))
61
+ elif d1 != d2:
62
+ diffs.append(("~", path, d1, d2))
63
+ return diffs
64
+
65
+
66
+ def diff_parts(d1, d2):
67
+
68
+ def build_path_map(parts):
69
+ return {part["path"]: part["payload"] for part in parts}
70
+
71
+ try:
72
+ paths1 = build_path_map(d1)
73
+ except Exception:
74
+ paths1 = d1
75
+ try:
76
+ paths2 = build_path_map(d2)
77
+ except Exception:
78
+ paths2 = d2
79
+ all_paths = set(paths1) | set(paths2)
80
+
81
+ for part_path in sorted(all_paths):
82
+ p1 = paths1.get(part_path)
83
+ p2 = paths2.get(part_path)
84
+
85
+ if p1 is None:
86
+ print(color_text(f"+ {part_path}", "32")) # Green
87
+ continue
88
+ elif p2 is None:
89
+ print(color_text(f"- {part_path}", "31")) # Red
90
+ continue
91
+ elif p1 == p2:
92
+ continue
93
+
94
+ if p1 is None or p2 is None:
95
+ print(
96
+ color_text(f"+ {part_path}", "32")
97
+ if p2 and not p1
98
+ else color_text(f"- {part_path}", "31")
99
+ )
100
+ continue
101
+
102
+ # Header for the changed part
103
+ print(color_text(f"~ {part_path}", "33"))
104
+
105
+ # Collect diffs
106
+ diffs = deep_diff(p1, p2)
107
+ # If the diff is only a change of a whole list (like appending to a list), group it under its key
108
+ merged_list_diffs = []
109
+ for change_type, full_path, old_val, new_val in diffs:
110
+ if (
111
+ change_type == "~"
112
+ and isinstance(old_val, list)
113
+ and isinstance(new_val, list)
114
+ ):
115
+ merged_list_diffs.append((change_type, full_path, old_val, new_val))
116
+
117
+ # Replace individual item diffs with unified list diff
118
+ if merged_list_diffs:
119
+ diffs = merged_list_diffs
120
+
121
+ # Group diffs by common parent path (e.g. items[1])
122
+ grouped = defaultdict(list)
123
+ for change_type, full_path, old_val, new_val in diffs:
124
+ group_path = extract_top_level_group(full_path)
125
+ grouped[group_path].append((change_type, full_path, old_val, new_val))
126
+
127
+ # Print each group once with unified diff for the full substructure
128
+ for group_path in sorted(grouped):
129
+ print(" " + color_text(f"~ {group_path}", "33"))
130
+
131
+ try:
132
+ old_group = get_by_path(p1, group_path)
133
+ new_group = get_by_path(p2, group_path)
134
+ except Exception:
135
+ old_group = new_group = None
136
+
137
+ # Skip showing diffs for empty/null groups
138
+ if isinstance(old_group, dict) and isinstance(new_group, dict):
139
+ old_keys = set(old_group.keys())
140
+ new_keys = set(new_group.keys())
141
+
142
+ for key in sorted(old_keys - new_keys):
143
+ print(
144
+ " "
145
+ + color_text(f"- {key}: {json.dumps(old_group[key])}", "31")
146
+ )
147
+ for key in sorted(new_keys - old_keys):
148
+ print(
149
+ " "
150
+ + color_text(f"+ {key}: {json.dumps(new_group[key])}", "32")
151
+ )
152
+ for key in sorted(old_keys & new_keys):
153
+ if old_group[key] != new_group[key]:
154
+ print(" " + color_text(f"~ {key}:", "33"))
155
+ old_val_str = stringify(old_group[key]).splitlines()
156
+ new_val_str = stringify(new_group[key]).splitlines()
157
+ for line in difflib.unified_diff(
158
+ old_val_str,
159
+ new_val_str,
160
+ fromfile="old",
161
+ tofile="new",
162
+ lineterm="",
163
+ ):
164
+ if line.startswith("@@"):
165
+ print(" " + color_text(line, "36"))
166
+ elif line.startswith("-") and not line.startswith("---"):
167
+ print(" " + color_text(line, "31"))
168
+ elif line.startswith("+") and not line.startswith("+++"):
169
+ print(" " + color_text(line, "32"))
170
+ elif old_group is None and new_group is not None:
171
+ if isinstance(new_group, dict):
172
+ # print all added keys
173
+ for key, val in new_group.items():
174
+ print(" " + color_text(f"+ {key}: {json.dumps(val)}", "32"))
175
+ elif isinstance(new_group, list):
176
+ old_str = []
177
+ new_str = stringify(new_group).splitlines()
178
+ for line in difflib.unified_diff(
179
+ old_str, new_str, fromfile="old", tofile="new", lineterm=""
180
+ ):
181
+ if line.startswith("@@"):
182
+ print(" " + color_text(line, "36"))
183
+ elif line.startswith("-") and not line.startswith("---"):
184
+ print(" " + color_text(line, "31"))
185
+ elif line.startswith("+") and not line.startswith("+++"):
186
+ print(" " + color_text(line, "32"))
187
+ else:
188
+ print(" " + color_text(f"+ {json.dumps(new_group)}", "32"))
189
+
190
+ elif new_group is None and old_group is not None:
191
+ if isinstance(old_group, dict):
192
+ # print all removed keys
193
+ for key, val in old_group.items():
194
+ print(" " + color_text(f"- {key}: {json.dumps(val)}", "31"))
195
+ elif isinstance(old_group, list):
196
+ old_str = stringify(old_group).splitlines()
197
+ new_str = []
198
+ for line in difflib.unified_diff(
199
+ old_str, new_str, fromfile="old", tofile="new", lineterm=""
200
+ ):
201
+ if line.startswith("@@"):
202
+ print(" " + color_text(line, "36"))
203
+ elif line.startswith("-") and not line.startswith("---"):
204
+ print(" " + color_text(line, "31"))
205
+ elif line.startswith("+") and not line.startswith("+++"):
206
+ print(" " + color_text(line, "32"))
207
+ else:
208
+ print(" " + color_text(f"- {json.dumps(old_group)}", "31"))
209
+ else:
210
+ old_str = stringify(old_group).splitlines()
211
+ new_str = stringify(new_group).splitlines()
212
+
213
+ for line in difflib.unified_diff(
214
+ old_str, new_str, fromfile="old", tofile="new", lineterm=""
215
+ ):
216
+ if line.startswith("@@"):
217
+ print(" " + color_text(line, "36"))
218
+ elif line.startswith("-") and not line.startswith("---"):
219
+ print(" " + color_text(line, "31"))
220
+ elif line.startswith("+") and not line.startswith("+++"):
221
+ print(" " + color_text(line, "32"))
@@ -0,0 +1,147 @@
1
+ import sempy
2
+ import sempy.fabric as fabric
3
+ import pandas as pd
4
+ from typing import List, Optional
5
+ from sempy._utils._log import log
6
+
7
+
8
+ @log
9
+ def list_all_items(workspaces: Optional[str | List[str]] = None):
10
+
11
+ df = pd.DataFrame(
12
+ columns=[
13
+ "Workspace Name",
14
+ "Workspace Id",
15
+ "Item Name",
16
+ "Item Type",
17
+ "Description",
18
+ ]
19
+ )
20
+
21
+ if isinstance(workspaces, str):
22
+ workspaces = [workspaces]
23
+
24
+ dfW = fabric.list_workspaces()
25
+ if workspaces is not None:
26
+ dfW = dfW[dfW["Name"].isin(workspaces)]
27
+
28
+ for _, r in dfW.iterrows():
29
+ workspace_name = r["Name"]
30
+ workspace_id = r["Id"]
31
+ dfI = fabric.list_items(workspace=workspace_name)
32
+ for _, r2 in dfI.iterrows():
33
+
34
+ new_data = {
35
+ "Workspace Name": workspace_name,
36
+ "Workspace Id": workspace_id,
37
+ "Item Name": r2["Name"],
38
+ "Item Type": r2["Type"],
39
+ "Description": r2["Description"],
40
+ }
41
+ df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)
42
+
43
+ return df
44
+
45
+
46
+ @log
47
+ def data_dictionary(dataset: str, workspace: Optional[str | None] = None):
48
+
49
+ from sempy_labs.tom import connect_semantic_model
50
+
51
+ sempy.fabric._client._utils._init_analysis_services()
52
+ import Microsoft.AnalysisServices.Tabular as TOM
53
+
54
+ df = pd.DataFrame(
55
+ columns=[
56
+ "Workspace Name",
57
+ "Model Name",
58
+ "Table Name",
59
+ "Object Type",
60
+ "Object Name",
61
+ "Hidden Flag",
62
+ "Description",
63
+ "Display Folder",
64
+ "Measure Formula",
65
+ ]
66
+ )
67
+
68
+ with connect_semantic_model(
69
+ dataset=dataset, readonly=True, workspace=workspace
70
+ ) as tom:
71
+ for t in tom.model.Tables:
72
+ expr = None
73
+ if tom.is_calculated_table(table_name=t.Name):
74
+ pName = next(p.Name for p in t.Partitions)
75
+ expr = t.Partitions[pName].Source.Expression
76
+
77
+ new_data = {
78
+ "Workspace Name": workspace,
79
+ "Model Name": dataset,
80
+ "Table Name": t.Name,
81
+ "Object Type": t.ObjectType,
82
+ "Object Name": t.Name,
83
+ "Hidden Flag": t.IsHidden,
84
+ "Description": t.Description,
85
+ "Display Folder": None,
86
+ "Measure Formula": expr,
87
+ }
88
+ df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)
89
+ cols = [c for c in t.Columns if c.Type != TOM.ColumnType.RowNumber]
90
+ for c in cols:
91
+
92
+ def get_calc_column_expression(table_name, column_name):
93
+ expr = None
94
+ if tom.is_calculated_column(
95
+ table_name=table_name, column_name=column_name
96
+ ):
97
+ expr = c.Expression
98
+ return expr
99
+
100
+ new_data = {
101
+ "Workspace Name": workspace,
102
+ "Model Name": dataset,
103
+ "Table Name": t.Name,
104
+ "Object Type": c.ObjectType,
105
+ "Object Name": c.Name,
106
+ "Hidden Flag": c.IsHidden,
107
+ "Description": c.Description,
108
+ "Display Folder": c.DisplayFolder,
109
+ "Measure Formula": get_calc_column_expression(t.Name, c.Name),
110
+ }
111
+ df = pd.concat(
112
+ [df, pd.DataFrame(new_data, index=[0])], ignore_index=True
113
+ )
114
+ for m in t.Measures:
115
+ new_data = {
116
+ "Workspace Name": workspace,
117
+ "Model Name": dataset,
118
+ "Table Name": t.Name,
119
+ "Object Type": m.ObjectType,
120
+ "Object Name": m.Name,
121
+ "Hidden Flag": m.IsHidden,
122
+ "Description": m.Description,
123
+ "Display Folder": m.DisplayFolder,
124
+ "Measure Formula": m.Expression,
125
+ }
126
+ df = pd.concat(
127
+ [df, pd.DataFrame(new_data, index=[0])], ignore_index=True
128
+ )
129
+
130
+ if t.CalculationGroup is not None:
131
+ for ci in t.CalculationGroup.CalculationItems:
132
+ new_data = {
133
+ "Workspace Name": workspace,
134
+ "Model Name": dataset,
135
+ "Table Name": t.Name,
136
+ "Object Type": "Calculation Item",
137
+ "Object Name": ci.Name,
138
+ "Hidden Flag": t.IsHidden,
139
+ "Description": ci.Description,
140
+ "Display Folder": None,
141
+ "Measure Formula": ci.Expression,
142
+ }
143
+ df = pd.concat(
144
+ [df, pd.DataFrame(new_data, index=[0])], ignore_index=True
145
+ )
146
+
147
+ return df