semantic-link-labs 0.5.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of semantic-link-labs might be problematic. Click here for more details.

Files changed (113) hide show
  1. semantic_link_labs-0.7.0.dist-info/METADATA +148 -0
  2. semantic_link_labs-0.7.0.dist-info/RECORD +111 -0
  3. {semantic_link_labs-0.5.0.dist-info → semantic_link_labs-0.7.0.dist-info}/WHEEL +1 -1
  4. sempy_labs/__init__.py +45 -15
  5. sempy_labs/_ai.py +42 -85
  6. sempy_labs/_bpa_translation/_translations_am-ET.po +828 -0
  7. sempy_labs/_bpa_translation/_translations_ar-AE.po +860 -0
  8. sempy_labs/_bpa_translation/_translations_cs-CZ.po +894 -0
  9. sempy_labs/_bpa_translation/_translations_da-DK.po +894 -0
  10. sempy_labs/_bpa_translation/_translations_de-DE.po +933 -0
  11. sempy_labs/_bpa_translation/_translations_el-GR.po +936 -0
  12. sempy_labs/_bpa_translation/_translations_es-ES.po +915 -0
  13. sempy_labs/_bpa_translation/_translations_fa-IR.po +883 -0
  14. sempy_labs/_bpa_translation/_translations_fr-FR.po +938 -0
  15. sempy_labs/_bpa_translation/_translations_ga-IE.po +912 -0
  16. sempy_labs/_bpa_translation/_translations_he-IL.po +855 -0
  17. sempy_labs/_bpa_translation/_translations_hi-IN.po +892 -0
  18. sempy_labs/_bpa_translation/_translations_hu-HU.po +910 -0
  19. sempy_labs/_bpa_translation/_translations_is-IS.po +887 -0
  20. sempy_labs/_bpa_translation/_translations_it-IT.po +931 -0
  21. sempy_labs/_bpa_translation/_translations_ja-JP.po +805 -0
  22. sempy_labs/_bpa_translation/_translations_nl-NL.po +924 -0
  23. sempy_labs/_bpa_translation/_translations_pl-PL.po +913 -0
  24. sempy_labs/_bpa_translation/_translations_pt-BR.po +909 -0
  25. sempy_labs/_bpa_translation/_translations_pt-PT.po +904 -0
  26. sempy_labs/_bpa_translation/_translations_ru-RU.po +909 -0
  27. sempy_labs/_bpa_translation/_translations_ta-IN.po +922 -0
  28. sempy_labs/_bpa_translation/_translations_te-IN.po +896 -0
  29. sempy_labs/_bpa_translation/_translations_th-TH.po +873 -0
  30. sempy_labs/_bpa_translation/_translations_zh-CN.po +767 -0
  31. sempy_labs/_bpa_translation/_translations_zu-ZA.po +916 -0
  32. sempy_labs/_clear_cache.py +12 -8
  33. sempy_labs/_connections.py +77 -70
  34. sempy_labs/_dax.py +7 -9
  35. sempy_labs/_generate_semantic_model.py +75 -90
  36. sempy_labs/_helper_functions.py +371 -20
  37. sempy_labs/_icons.py +23 -0
  38. sempy_labs/_list_functions.py +855 -427
  39. sempy_labs/_model_auto_build.py +4 -3
  40. sempy_labs/_model_bpa.py +307 -1118
  41. sempy_labs/_model_bpa_bulk.py +363 -0
  42. sempy_labs/_model_bpa_rules.py +831 -0
  43. sempy_labs/_model_dependencies.py +20 -16
  44. sempy_labs/_one_lake_integration.py +18 -12
  45. sempy_labs/_query_scale_out.py +116 -129
  46. sempy_labs/_refresh_semantic_model.py +23 -10
  47. sempy_labs/_translations.py +367 -288
  48. sempy_labs/_vertipaq.py +152 -123
  49. sempy_labs/directlake/__init__.py +7 -1
  50. sempy_labs/directlake/_directlake_schema_compare.py +33 -30
  51. sempy_labs/directlake/_directlake_schema_sync.py +60 -77
  52. sempy_labs/directlake/_dl_helper.py +233 -0
  53. sempy_labs/directlake/_get_directlake_lakehouse.py +7 -8
  54. sempy_labs/directlake/_get_shared_expression.py +5 -3
  55. sempy_labs/directlake/_guardrails.py +20 -16
  56. sempy_labs/directlake/_list_directlake_model_calc_tables.py +17 -10
  57. sempy_labs/directlake/_show_unsupported_directlake_objects.py +3 -2
  58. sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py +10 -5
  59. sempy_labs/directlake/_update_directlake_partition_entity.py +169 -22
  60. sempy_labs/directlake/_warm_cache.py +7 -4
  61. sempy_labs/lakehouse/_get_lakehouse_columns.py +1 -1
  62. sempy_labs/lakehouse/_get_lakehouse_tables.py +65 -71
  63. sempy_labs/lakehouse/_lakehouse.py +5 -3
  64. sempy_labs/lakehouse/_shortcuts.py +20 -13
  65. sempy_labs/migration/__init__.py +1 -1
  66. sempy_labs/migration/_create_pqt_file.py +184 -186
  67. sempy_labs/migration/_migrate_calctables_to_lakehouse.py +240 -269
  68. sempy_labs/migration/_migrate_calctables_to_semantic_model.py +78 -77
  69. sempy_labs/migration/_migrate_model_objects_to_semantic_model.py +444 -425
  70. sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py +96 -102
  71. sempy_labs/migration/_migration_validation.py +2 -2
  72. sempy_labs/migration/_refresh_calc_tables.py +94 -100
  73. sempy_labs/report/_BPAReportTemplate.json +232 -0
  74. sempy_labs/report/__init__.py +6 -2
  75. sempy_labs/report/_bpareporttemplate/.pbi/localSettings.json +9 -0
  76. sempy_labs/report/_bpareporttemplate/.platform +11 -0
  77. sempy_labs/report/_bpareporttemplate/StaticResources/SharedResources/BaseThemes/CY24SU06.json +710 -0
  78. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/page.json +11 -0
  79. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/1b08bce3bebabb0a27a8/visual.json +191 -0
  80. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/2f22ddb70c301693c165/visual.json +438 -0
  81. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/3b1182230aa6c600b43a/visual.json +127 -0
  82. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/58577ba6380c69891500/visual.json +576 -0
  83. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/a2a8fa5028b3b776c96c/visual.json +207 -0
  84. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/adfd47ef30652707b987/visual.json +506 -0
  85. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/b6a80ee459e716e170b1/visual.json +127 -0
  86. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/ce3130a721c020cc3d81/visual.json +513 -0
  87. sempy_labs/report/_bpareporttemplate/definition/pages/92735ae19b31712208ad/page.json +8 -0
  88. sempy_labs/report/_bpareporttemplate/definition/pages/92735ae19b31712208ad/visuals/66e60dfb526437cd78d1/visual.json +112 -0
  89. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/page.json +11 -0
  90. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/07deb8bce824e1be37d7/visual.json +513 -0
  91. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/0b1c68838818b32ad03b/visual.json +352 -0
  92. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/0c171de9d2683d10b930/visual.json +37 -0
  93. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/0efa01be0510e40a645e/visual.json +542 -0
  94. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/6bf2f0eb830ab53cc668/visual.json +221 -0
  95. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/88d8141cb8500b60030c/visual.json +127 -0
  96. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/a753273590beed656a03/visual.json +576 -0
  97. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/b8fdc82cddd61ac447bc/visual.json +127 -0
  98. sempy_labs/report/_bpareporttemplate/definition/pages/d37dce724a0ccc30044b/page.json +9 -0
  99. sempy_labs/report/_bpareporttemplate/definition/pages/d37dce724a0ccc30044b/visuals/ce8532a7e25020271077/visual.json +38 -0
  100. sempy_labs/report/_bpareporttemplate/definition/pages/pages.json +10 -0
  101. sempy_labs/report/_bpareporttemplate/definition/report.json +176 -0
  102. sempy_labs/report/_bpareporttemplate/definition/version.json +4 -0
  103. sempy_labs/report/_bpareporttemplate/definition.pbir +14 -0
  104. sempy_labs/report/_generate_report.py +260 -139
  105. sempy_labs/report/_report_functions.py +90 -59
  106. sempy_labs/report/_report_rebind.py +40 -34
  107. sempy_labs/tom/__init__.py +1 -4
  108. sempy_labs/tom/_model.py +601 -181
  109. semantic_link_labs-0.5.0.dist-info/METADATA +0 -22
  110. semantic_link_labs-0.5.0.dist-info/RECORD +0 -53
  111. sempy_labs/directlake/_fallback.py +0 -58
  112. {semantic_link_labs-0.5.0.dist-info → semantic_link_labs-0.7.0.dist-info}/LICENSE +0 -0
  113. {semantic_link_labs-0.5.0.dist-info → semantic_link_labs-0.7.0.dist-info}/top_level.txt +0 -0
sempy_labs/_vertipaq.py CHANGED
@@ -1,28 +1,34 @@
1
- import sempy
2
1
  import sempy.fabric as fabric
3
2
  import pandas as pd
4
3
  from IPython.display import display, HTML
5
- import zipfile, os, shutil, datetime, warnings
4
+ import zipfile
5
+ import os
6
+ import shutil
7
+ import datetime
8
+ import warnings
6
9
  from pyspark.sql import SparkSession
7
10
  from sempy_labs._helper_functions import (
8
11
  format_dax_object_name,
9
- get_direct_lake_sql_endpoint,
10
12
  resolve_lakehouse_name,
13
+ resolve_dataset_id,
14
+ save_as_delta_table,
15
+ resolve_workspace_capacity,
11
16
  )
12
17
  from sempy_labs._list_functions import list_relationships
13
- from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables
14
- from sempy_labs.lakehouse._lakehouse import lakehouse_attached
15
- from typing import List, Optional, Union
18
+ from sempy_labs.lakehouse import lakehouse_attached, get_lakehouse_tables
19
+ from sempy_labs.directlake import get_direct_lake_source
20
+ from typing import Optional
16
21
  from sempy._utils._log import log
17
22
  import sempy_labs._icons as icons
18
23
 
24
+
19
25
  @log
20
26
  def vertipaq_analyzer(
21
27
  dataset: str,
22
28
  workspace: Optional[str] = None,
23
29
  export: Optional[str] = None,
24
- lakehouse_workspace: Optional[str] = None,
25
30
  read_stats_from_data: Optional[bool] = False,
31
+ **kwargs,
26
32
  ):
27
33
  """
28
34
  Displays an HTML visualization of the Vertipaq Analyzer statistics from a semantic model.
@@ -39,10 +45,6 @@ def vertipaq_analyzer(
39
45
  Specifying 'zip' will export the results to a zip file in your lakehouse (which can be imported using the import_vertipaq_analyzer function.
40
46
  Specifying 'table' will export the results to delta tables (appended) in your lakehouse.
41
47
  Default value: None.
42
- lakehouse_workspace : str, default=None
43
- The Fabric workspace used by the lakehouse (for Direct Lake semantic models).
44
- Defaults to None which resolves to the workspace of the attached lakehouse
45
- or if no lakehouse attached, resolves to the workspace of the notebook.
46
48
  read_stats_from_data : bool, default=False
47
49
  Setting this parameter to true has the function get Column Cardinality and Missing Rows using DAX (Direct Lake semantic models achieve this using a Spark query to the lakehouse).
48
50
 
@@ -51,6 +53,14 @@ def vertipaq_analyzer(
51
53
 
52
54
  """
53
55
 
56
+ from sempy_labs.tom import connect_semantic_model
57
+
58
+ if "lakehouse_workspace" in kwargs:
59
+ print(
60
+ f"{icons.info} The 'lakehouse_workspace' parameter has been deprecated as it is no longer necessary. Please remove this parameter from the function going forward."
61
+ )
62
+ del kwargs["lakehouse_workspace"]
63
+
54
64
  pd.options.mode.copy_on_write = True
55
65
  warnings.filterwarnings(
56
66
  "ignore", message="createDataFrame attempted Arrow optimization*"
@@ -58,9 +68,6 @@ def vertipaq_analyzer(
58
68
 
59
69
  workspace = fabric.resolve_workspace_name(workspace)
60
70
 
61
- if lakehouse_workspace is None:
62
- lakehouse_workspace = workspace
63
-
64
71
  dfT = fabric.list_tables(dataset=dataset, extended=True, workspace=workspace)
65
72
  dfT.rename(columns={"Name": "Table Name"}, inplace=True)
66
73
  dfC = fabric.list_columns(dataset=dataset, extended=True, workspace=workspace)
@@ -71,18 +78,24 @@ def vertipaq_analyzer(
71
78
  dfR["From Object"] = format_dax_object_name(dfR["From Table"], dfR["From Column"])
72
79
  dfR["To Object"] = format_dax_object_name(dfR["To Table"], dfR["To Column"])
73
80
  dfP = fabric.list_partitions(dataset=dataset, extended=True, workspace=workspace)
74
- dfD = fabric.list_datasets(
75
- workspace=workspace,
76
- additional_xmla_properties=["CompatibilityLevel", "Model.DefaultMode"],
81
+ artifact_type, lakehouse_name, lakehouse_id, lakehouse_workspace_id = (
82
+ get_direct_lake_source(dataset=dataset, workspace=workspace)
77
83
  )
78
- dfD = dfD[dfD["Dataset Name"] == dataset]
79
- dfD["Compatibility Level"] = dfD["Compatibility Level"].astype(int)
80
- isDirectLake = any(r["Mode"] == "DirectLake" for i, r in dfP.iterrows())
84
+
85
+ with connect_semantic_model(
86
+ dataset=dataset, readonly=True, workspace=workspace
87
+ ) as tom:
88
+ compat_level = tom.model.Model.Database.CompatibilityLevel
89
+ is_direct_lake = tom.is_direct_lake()
90
+ def_mode = tom.model.DefaultMode
91
+ table_count = tom.model.Tables.Count
92
+ column_count = len(list(tom.all_columns()))
93
+
81
94
  dfR["Missing Rows"] = None
82
95
 
83
96
  # Direct Lake
84
97
  if read_stats_from_data:
85
- if isDirectLake:
98
+ if is_direct_lake and artifact_type == "Lakehouse":
86
99
  dfC = pd.merge(
87
100
  dfC,
88
101
  dfP[["Table Name", "Query", "Source Type"]],
@@ -93,66 +106,54 @@ def vertipaq_analyzer(
93
106
  (dfC["Source Type"] == "Entity")
94
107
  & (~dfC["Column Name"].str.startswith("RowNumber-"))
95
108
  ]
96
- sqlEndpointId = get_direct_lake_sql_endpoint(dataset, workspace)
97
-
98
- # Get lakehouse name from SQL Endpoint ID
99
- dfI = fabric.list_items(workspace=lakehouse_workspace, type="SQLEndpoint")
100
- dfI_filt = dfI[(dfI["Id"] == sqlEndpointId)]
101
-
102
- if len(dfI_filt) == 0:
103
- raise ValueError(f"{icons.red_dot} The lakehouse (SQL Endpoint) used by the '{dataset}' semantic model does not reside in the '{lakehouse_workspace}' workspace. Please update the lakehouse_workspace parameter.")
104
- else:
105
- lakehouseName = dfI_filt["Display Name"].iloc[0]
106
-
107
- current_workspace_id = fabric.get_workspace_id()
108
- current_workspace = fabric.resolve_workspace_name(current_workspace_id)
109
- if current_workspace != lakehouse_workspace:
110
- lakeTables = get_lakehouse_tables(
111
- lakehouse=lakehouseName, workspace=lakehouse_workspace
112
- )
113
-
114
- sql_statements = []
115
- spark = SparkSession.builder.getOrCreate()
116
- # Loop through tables
117
- for lakeTName in dfC_flt["Query"].unique():
118
- query = "SELECT "
119
- columns_in_table = dfC_flt.loc[
120
- dfC_flt["Query"] == lakeTName, "Source"
121
- ].unique()
122
-
123
- # Loop through columns within those tables
124
- for scName in columns_in_table:
125
- query = query + f"COUNT(DISTINCT({scName})) AS {scName}, "
126
-
127
- query = query[:-2]
128
- if lakehouse_workspace == current_workspace:
129
- query = query + f" FROM {lakehouseName}.{lakeTName}"
130
- else:
131
- lakeTables_filt = lakeTables[
132
- lakeTables["Table Name"] == lakeTName
133
- ]
134
- tPath = lakeTables_filt["Location"].iloc[0]
135
-
136
- df = spark.read.format("delta").load(tPath)
137
- tempTableName = "delta_table_" + lakeTName
138
- df.createOrReplaceTempView(tempTableName)
139
- query = query + f" FROM {tempTableName}"
140
- sql_statements.append((lakeTName, query))
141
-
142
- for o in sql_statements:
143
- tName = o[0]
144
- query = o[1]
145
-
146
- df = spark.sql(query)
147
-
148
- for column in df.columns:
149
- x = df.collect()[0][column]
150
- for i, r in dfC.iterrows():
151
- if r["Query"] == tName and r["Source"] == column:
152
- dfC.at[i, "Cardinality"] = x
153
-
154
- # Remove column added temporarily
155
- dfC.drop(columns=["Query", "Source Type"], inplace=True)
109
+
110
+ object_workspace = fabric.resolve_workspace_name(lakehouse_workspace_id)
111
+ current_workspace_id = fabric.get_workspace_id()
112
+ if current_workspace_id != lakehouse_workspace_id:
113
+ lakeTables = get_lakehouse_tables(
114
+ lakehouse=lakehouse_name, workspace=object_workspace
115
+ )
116
+
117
+ sql_statements = []
118
+ spark = SparkSession.builder.getOrCreate()
119
+ # Loop through tables
120
+ for lakeTName in dfC_flt["Query"].unique():
121
+ query = "SELECT "
122
+ columns_in_table = dfC_flt.loc[
123
+ dfC_flt["Query"] == lakeTName, "Source"
124
+ ].unique()
125
+
126
+ # Loop through columns within those tables
127
+ for scName in columns_in_table:
128
+ query = query + f"COUNT(DISTINCT(`{scName}`)) AS `{scName}`, "
129
+
130
+ query = query[:-2]
131
+ if lakehouse_workspace_id == current_workspace_id:
132
+ query = query + f" FROM {lakehouse_name}.{lakeTName}"
133
+ else:
134
+ lakeTables_filt = lakeTables[lakeTables["Table Name"] == lakeTName]
135
+ tPath = lakeTables_filt["Location"].iloc[0]
136
+
137
+ df = spark.read.format("delta").load(tPath)
138
+ tempTableName = "delta_table_" + lakeTName
139
+ df.createOrReplaceTempView(tempTableName)
140
+ query = query + f" FROM {tempTableName}"
141
+ sql_statements.append((lakeTName, query))
142
+
143
+ for o in sql_statements:
144
+ tName = o[0]
145
+ query = o[1]
146
+
147
+ df = spark.sql(query)
148
+
149
+ for column in df.columns:
150
+ x = df.collect()[0][column]
151
+ for i, r in dfC.iterrows():
152
+ if r["Query"] == tName and r["Source"] == column:
153
+ dfC.at[i, "Cardinality"] = x
154
+
155
+ # Remove column added temporarily
156
+ dfC.drop(columns=["Query", "Source Type"], inplace=True)
156
157
 
157
158
  # Direct Lake missing rows
158
159
  dfR = pd.merge(
@@ -199,11 +200,11 @@ def vertipaq_analyzer(
199
200
  toTable = r["To Lake Table"]
200
201
  toColumn = r["To Lake Column"]
201
202
 
202
- if lakehouse_workspace == current_workspace:
203
+ if lakehouse_workspace_id == current_workspace_id:
203
204
  query = f"select count(f.{fromColumn}) as {fromColumn}\nfrom {fromTable} as f\nleft join {toTable} as c on f.{fromColumn} = c.{toColumn}\nwhere c.{toColumn} is null"
204
205
  else:
205
- tempTableFrom = "delta_table_" + fromTable
206
- tempTableTo = "delta_table_" + toTable
206
+ tempTableFrom = f"delta_table_{fromTable}"
207
+ tempTableTo = f"delta_table_{toTable}"
207
208
 
208
209
  query = f"select count(f.{fromColumn}) as {fromColumn}\nfrom {tempTableFrom} as f\nleft join {tempTableTo} as c on f.{fromColumn} = c.{toColumn}\nwhere c.{toColumn} is null"
209
210
 
@@ -214,7 +215,7 @@ def vertipaq_analyzer(
214
215
  dfR.at[i, "Missing Rows"] = missingRows
215
216
 
216
217
  dfR["Missing Rows"] = dfR["Missing Rows"].astype(int)
217
- else:
218
+ elif not is_direct_lake:
218
219
  # Calculate missing rows using DAX for non-direct lake
219
220
  for i, r in dfR.iterrows():
220
221
  fromTable = r["From Table"]
@@ -238,7 +239,7 @@ def vertipaq_analyzer(
238
239
 
239
240
  try:
240
241
  missingRows = result.iloc[0, 0]
241
- except:
242
+ except Exception:
242
243
  pass
243
244
 
244
245
  dfR.at[i, "Missing Rows"] = missingRows
@@ -308,7 +309,6 @@ def vertipaq_analyzer(
308
309
  )
309
310
  dfTable = pd.merge(dfTable, dfTP, on="Table Name", how="left")
310
311
  dfTable = pd.merge(dfTable, dfTC, on="Table Name", how="left")
311
- dfTable = dfTable.drop_duplicates() # Drop duplicates (temporary)
312
312
  dfTable = dfTable.sort_values(by="Total Size", ascending=False)
313
313
  dfTable.reset_index(drop=True, inplace=True)
314
314
  export_Table = dfTable.copy()
@@ -318,7 +318,7 @@ def vertipaq_analyzer(
318
318
  pctList = ["% DB"]
319
319
  dfTable[pctList] = dfTable[pctList].applymap("{:.2f}%".format)
320
320
 
321
- ## Relationships
321
+ # Relationships
322
322
  # dfR.drop(columns=['Max From Cardinality', 'Max To Cardinality'], inplace=True)
323
323
  dfR = pd.merge(
324
324
  dfR,
@@ -359,12 +359,17 @@ def vertipaq_analyzer(
359
359
  intList.remove("Missing Rows")
360
360
  dfR[intList] = dfR[intList].applymap("{:,}".format)
361
361
 
362
- ## Partitions
362
+ # Partitions
363
363
  dfP = dfP[
364
- ["Table Name", "Partition Name", "Mode", "Record Count", "Segment Count"]
365
- ].sort_values(
366
- by="Record Count", ascending=False
367
- ) # , 'Records per Segment'
364
+ [
365
+ "Table Name",
366
+ "Partition Name",
367
+ "Mode",
368
+ "Record Count",
369
+ "Segment Count",
370
+ # "Records per Segment",
371
+ ]
372
+ ].sort_values(by="Record Count", ascending=False)
368
373
  dfP["Records per Segment"] = round(
369
374
  dfP["Record Count"] / dfP["Segment Count"], 2
370
375
  ) # Remove after records per segment is fixed
@@ -373,17 +378,19 @@ def vertipaq_analyzer(
373
378
  intList = ["Record Count", "Segment Count", "Records per Segment"]
374
379
  dfP[intList] = dfP[intList].applymap("{:,}".format)
375
380
 
376
- ## Hierarchies
381
+ # Hierarchies
377
382
  dfH_filt = dfH[dfH["Level Ordinal"] == 0]
378
383
  dfH_filt = dfH_filt[["Table Name", "Hierarchy Name", "Used Size"]].sort_values(
379
384
  by="Used Size", ascending=False
380
385
  )
381
386
  dfH_filt.reset_index(drop=True, inplace=True)
387
+ dfH_filt.fillna({"Used Size": 0}, inplace=True)
388
+ dfH_filt["Used Size"] = dfH_filt["Used Size"].astype(int)
382
389
  export_Hier = dfH_filt.copy()
383
390
  intList = ["Used Size"]
384
391
  dfH_filt[intList] = dfH_filt[intList].applymap("{:,}".format)
385
392
 
386
- ## Model
393
+ # Model
387
394
  if total_size >= 1000000000:
388
395
  y = total_size / (1024**3) * 1000000000
389
396
  elif total_size >= 1000000:
@@ -392,23 +399,19 @@ def vertipaq_analyzer(
392
399
  y = total_size / (1024) * 1000
393
400
  y = round(y)
394
401
 
395
- tblCount = len(dfT)
396
- colCount = len(dfC_filt)
397
- compatLevel = dfD["Compatibility Level"].iloc[0]
398
- defMode = dfD["Model Default Mode"].iloc[0]
399
-
400
402
  dfModel = pd.DataFrame(
401
403
  {
402
404
  "Dataset Name": dataset,
403
405
  "Total Size": y,
404
- "Table Count": tblCount,
405
- "Column Count": colCount,
406
- "Compatibility Level": compatLevel,
407
- "Default Mode": defMode,
406
+ "Table Count": table_count,
407
+ "Column Count": column_count,
408
+ "Compatibility Level": compat_level,
409
+ "Default Mode": def_mode,
408
410
  },
409
411
  index=[0],
410
412
  )
411
413
  dfModel.reset_index(drop=True, inplace=True)
414
+ dfModel["Default Mode"] = dfModel["Default Mode"].astype(str)
412
415
  export_Model = dfModel.copy()
413
416
  intList = ["Total Size", "Table Count", "Column Count"]
414
417
  dfModel[intList] = dfModel[intList].applymap("{:,}".format)
@@ -429,22 +432,24 @@ def vertipaq_analyzer(
429
432
 
430
433
  visualize_vertipaq(dfs)
431
434
 
432
- ### Export vertipaq to delta tables in lakehouse
435
+ # Export vertipaq to delta tables in lakehouse
433
436
  if export in ["table", "zip"]:
434
- lakeAttach = lakehouse_attached()
435
- if lakeAttach is False:
436
- raise ValueError(f"{icons.red_dot} In order to save the Vertipaq Analyzer results, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook.")
437
+ if not lakehouse_attached():
438
+ raise ValueError(
439
+ f"{icons.red_dot} In order to save the Vertipaq Analyzer results, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook."
440
+ )
437
441
 
438
442
  if export == "table":
439
443
  spark = SparkSession.builder.getOrCreate()
440
444
 
441
445
  lakehouse_id = fabric.get_lakehouse_id()
446
+ lake_workspace = fabric.resolve_workspace_name()
442
447
  lakehouse = resolve_lakehouse_name(
443
- lakehouse_id=lakehouse_id, workspace=workspace
448
+ lakehouse_id=lakehouse_id, workspace=lake_workspace
444
449
  )
445
450
  lakeTName = "vertipaq_analyzer_model"
446
451
 
447
- lakeT = get_lakehouse_tables(lakehouse=lakehouse, workspace=workspace)
452
+ lakeT = get_lakehouse_tables(lakehouse=lakehouse, workspace=lake_workspace)
448
453
  lakeT_filt = lakeT[lakeT["Table Name"] == lakeTName]
449
454
 
450
455
  query = f"SELECT MAX(RunId) FROM {lakehouse}.{lakeTName}"
@@ -465,29 +470,52 @@ def vertipaq_analyzer(
465
470
  "export_Model": ["Model", export_Model],
466
471
  }
467
472
 
468
- print(f"{icons.in_progress} Saving Vertipaq Analyzer to delta tables in the lakehouse...\n")
473
+ print(
474
+ f"{icons.in_progress} Saving Vertipaq Analyzer to delta tables in the lakehouse...\n"
475
+ )
469
476
  now = datetime.datetime.now()
477
+ dfD = fabric.list_datasets(workspace=workspace, mode="rest")
478
+ dfD_filt = dfD[dfD["Dataset Name"] == dataset]
479
+ configured_by = dfD_filt["Configured By"].iloc[0]
480
+ capacity_id, capacity_name = resolve_workspace_capacity(workspace=workspace)
481
+
470
482
  for key, (obj, df) in dfMap.items():
471
- df["Timestamp"] = now
483
+ df["Capacity Name"] = capacity_name
484
+ df["Capacity Id"] = capacity_id
485
+ df["Configured By"] = configured_by
472
486
  df["Workspace Name"] = workspace
487
+ df["Workspace Id"] = fabric.resolve_workspace_id(workspace)
473
488
  df["Dataset Name"] = dataset
489
+ df["Dataset Id"] = resolve_dataset_id(dataset, workspace)
474
490
  df["RunId"] = runId
491
+ df["Timestamp"] = now
475
492
 
476
- colName = "Workspace Name"
493
+ colName = "Capacity Name"
477
494
  df.insert(0, colName, df.pop(colName))
478
- colName = "Dataset Name"
495
+ colName = "Capacity Id"
479
496
  df.insert(1, colName, df.pop(colName))
497
+ colName = "Workspace Name"
498
+ df.insert(2, colName, df.pop(colName))
499
+ colName = "Workspace Id"
500
+ df.insert(3, colName, df.pop(colName))
501
+ colName = "Dataset Name"
502
+ df.insert(4, colName, df.pop(colName))
503
+ colName = "Dataset Id"
504
+ df.insert(5, colName, df.pop(colName))
505
+ colName = "Configured By"
506
+ df.insert(6, colName, df.pop(colName))
480
507
 
481
508
  df.columns = df.columns.str.replace(" ", "_")
482
509
 
483
510
  delta_table_name = f"VertipaqAnalyzer_{obj}".lower()
484
- spark_df = spark.createDataFrame(df)
485
- spark_df.write.mode("append").format("delta").saveAsTable(delta_table_name)
486
- print(
487
- f"{icons.bullet} Vertipaq Analyzer results for '{obj}' have been appended to the '{delta_table_name}' delta table."
511
+ save_as_delta_table(
512
+ dataframe=df,
513
+ delta_table_name=delta_table_name,
514
+ write_mode="append",
515
+ merge_schema=True,
488
516
  )
489
517
 
490
- ### Export vertipaq to zip file within the lakehouse
518
+ # Export vertipaq to zip file within the lakehouse
491
519
  if export == "zip":
492
520
  dataFrames = {
493
521
  "dfModel": dfModel,
@@ -510,13 +538,13 @@ def vertipaq_analyzer(
510
538
 
511
539
  # Create CSV files based on dataframes
512
540
  for fileName, df in dataFrames.items():
513
- filePath = os.path.join(subFolderPath, fileName + ext)
541
+ filePath = os.path.join(subFolderPath, f"{fileName}{ext}")
514
542
  df.to_csv(filePath, index=False)
515
543
 
516
544
  # Create a zip file and add CSV files to it
517
545
  with zipfile.ZipFile(zipFilePath, "w") as zipf:
518
546
  for fileName in dataFrames:
519
- filePath = os.path.join(subFolderPath, fileName + ext)
547
+ filePath = os.path.join(subFolderPath, f"{fileName}{ext}")
520
548
  zipf.write(filePath, os.path.basename(filePath))
521
549
 
522
550
  # Clean up: remove the individual CSV files
@@ -525,7 +553,8 @@ def vertipaq_analyzer(
525
553
  if os.path.exists(filePath):
526
554
  os.remove(filePath)
527
555
  print(
528
- f"{icons.green_dot} The Vertipaq Analyzer info for the '{dataset}' semantic model in the '{workspace}' workspace has been saved to the 'Vertipaq Analyzer/{zipFileName}' in the default lakehouse attached to this notebook."
556
+ f"{icons.green_dot} The Vertipaq Analyzer info for the '{dataset}' semantic model in the '{workspace}' workspace has been saved "
557
+ f"to the 'Vertipaq Analyzer/{zipFileName}' in the default lakehouse attached to this notebook."
529
558
  )
530
559
 
531
560
 
@@ -832,7 +861,7 @@ def visualize_vertipaq(dataframes):
832
861
  (tooltipDF["ViewName"] == vw) & (tooltipDF["ColumnName"] == col)
833
862
  ]
834
863
  tt = tooltipDF_filt["Tooltip"].iloc[0]
835
- except:
864
+ except Exception:
836
865
  pass
837
866
  df_html = df_html.replace(f"<th>{col}</th>", f'<th title="{tt}">{col}</th>')
838
867
  content_html += (
@@ -1,7 +1,9 @@
1
1
  from sempy_labs.directlake._directlake_schema_compare import direct_lake_schema_compare
2
2
  from sempy_labs.directlake._directlake_schema_sync import direct_lake_schema_sync
3
- from sempy_labs.directlake._fallback import (
3
+ from sempy_labs.directlake._dl_helper import (
4
4
  check_fallback_reason,
5
+ generate_direct_lake_semantic_model,
6
+ get_direct_lake_source,
5
7
  )
6
8
  from sempy_labs.directlake._get_directlake_lakehouse import get_direct_lake_lakehouse
7
9
  from sempy_labs.directlake._get_shared_expression import get_shared_expression
@@ -21,6 +23,7 @@ from sempy_labs.directlake._update_directlake_model_lakehouse_connection import
21
23
  )
22
24
  from sempy_labs.directlake._update_directlake_partition_entity import (
23
25
  update_direct_lake_partition_entity,
26
+ add_table_to_direct_lake_semantic_model,
24
27
  )
25
28
  from sempy_labs.directlake._warm_cache import (
26
29
  warm_direct_lake_cache_isresident,
@@ -42,4 +45,7 @@ __all__ = [
42
45
  "update_direct_lake_partition_entity",
43
46
  "warm_direct_lake_cache_isresident",
44
47
  "warm_direct_lake_cache_perspective",
48
+ "add_table_to_direct_lake_semantic_model",
49
+ "generate_direct_lake_semantic_model",
50
+ "get_direct_lake_source",
45
51
  ]
@@ -1,24 +1,22 @@
1
- import sempy
2
1
  import sempy.fabric as fabric
3
2
  import pandas as pd
4
3
  from sempy_labs._helper_functions import (
5
4
  format_dax_object_name,
6
- resolve_lakehouse_name,
7
- get_direct_lake_sql_endpoint,
8
5
  )
9
6
  from IPython.display import display
10
- from sempy_labs.lakehouse._get_lakehouse_columns import get_lakehouse_columns
7
+ from sempy_labs.lakehouse import get_lakehouse_columns
8
+ from sempy_labs.directlake._dl_helper import get_direct_lake_source
11
9
  from sempy_labs._list_functions import list_tables
12
10
  from typing import Optional
13
11
  import sempy_labs._icons as icons
14
12
  from sempy._utils._log import log
15
13
 
14
+
16
15
  @log
17
16
  def direct_lake_schema_compare(
18
17
  dataset: str,
19
18
  workspace: Optional[str] = None,
20
- lakehouse: Optional[str] = None,
21
- lakehouse_workspace: Optional[str] = None,
19
+ **kwargs,
22
20
  ):
23
21
  """
24
22
  Checks that all the tables in a Direct Lake semantic model map to tables in their corresponding lakehouse and that the columns in each table exist.
@@ -31,38 +29,41 @@ def direct_lake_schema_compare(
31
29
  The Fabric workspace name.
32
30
  Defaults to None which resolves to the workspace of the attached lakehouse
33
31
  or if no lakehouse attached, resolves to the workspace of the notebook.
34
- lakehouse : str, default=None
35
- The Fabric lakehouse used by the Direct Lake semantic model.
36
- Defaults to None which resolves to the lakehouse attached to the notebook.
37
- lakehouse_workspace : str, default=None
38
- The Fabric workspace used by the lakehouse.
39
- Defaults to None which resolves to the workspace of the attached lakehouse
40
- or if no lakehouse attached, resolves to the workspace of the notebook.
41
32
  """
42
33
 
34
+ if "lakehouse" in kwargs:
35
+ print(
36
+ "The 'lakehouse' parameter has been deprecated as it is no longer necessary. Please remove this parameter from the function going forward."
37
+ )
38
+ del kwargs["lakehouse"]
39
+ if "lakehouse_workspace" in kwargs:
40
+ print(
41
+ "The 'lakehouse_workspace' parameter has been deprecated as it is no longer necessary. Please remove this parameter from the function going forward."
42
+ )
43
+ del kwargs["lakehouse_workspace"]
44
+
43
45
  workspace = fabric.resolve_workspace_name(workspace)
44
46
 
45
- if lakehouse_workspace is None:
46
- lakehouse_workspace = workspace
47
+ artifact_type, lakehouse_name, lakehouse_id, lakehouse_workspace_id = (
48
+ get_direct_lake_source(dataset=dataset, workspace=workspace)
49
+ )
50
+ lakehouse_workspace = fabric.resolve_workspace_name(lakehouse_workspace_id)
47
51
 
48
- if lakehouse is None:
49
- lakehouse_id = fabric.get_lakehouse_id()
50
- lakehouse = resolve_lakehouse_name(lakehouse_id, lakehouse_workspace)
52
+ if artifact_type == "Warehouse":
53
+ raise ValueError(
54
+ f"{icons.red_dot} This function is only valid for Direct Lake semantic models which source from Fabric lakehouses (not warehouses)."
55
+ )
51
56
 
52
57
  dfP = fabric.list_partitions(dataset=dataset, workspace=workspace)
53
- sqlEndpointId = get_direct_lake_sql_endpoint(dataset, workspace)
54
- dfI = fabric.list_items(workspace=lakehouse_workspace, type="SQLEndpoint")
55
- dfI_filt = dfI[(dfI["Id"] == sqlEndpointId)]
56
-
57
- if len(dfI_filt) == 0:
58
- raise ValueError(f"{icons.red_dot} The SQL Endpoint in the '{dataset}' semantic model in the '{workspace} workspace does not point to the '{lakehouse}' lakehouse in the '{lakehouse_workspace}' workspace as specified.")
59
58
 
60
59
  if not any(r["Mode"] == "DirectLake" for i, r in dfP.iterrows()):
61
- raise ValueError(f"{icons.red_dot} The '{dataset}' semantic model is not in Direct Lake mode.")
60
+ raise ValueError(
61
+ f"{icons.red_dot} The '{dataset}' semantic model is not in Direct Lake mode."
62
+ )
62
63
 
63
64
  dfT = list_tables(dataset, workspace)
64
65
  dfC = fabric.list_columns(dataset=dataset, workspace=workspace)
65
- lc = get_lakehouse_columns(lakehouse, lakehouse_workspace)
66
+ lc = get_lakehouse_columns(lakehouse_name, lakehouse_workspace)
66
67
 
67
68
  dfT.rename(columns={"Type": "Table Type"}, inplace=True)
68
69
  dfP_filt = dfP[dfP["Mode"] == "DirectLake"]
@@ -88,19 +89,21 @@ def direct_lake_schema_compare(
88
89
 
89
90
  if len(missingtbls) == 0:
90
91
  print(
91
- f"{icons.green_dot} All tables exist in the '{lakehouse}' lakehouse within the '{lakehouse_workspace}' workspace."
92
+ f"{icons.green_dot} All tables exist in the '{lakehouse_name}' lakehouse within the '{lakehouse_workspace}' workspace."
92
93
  )
93
94
  else:
94
95
  print(
95
- f"{icons.yellow_dot} The following tables exist in the '{dataset}' semantic model within the '{workspace}' workspace but do not exist in the '{lakehouse}' lakehouse within the '{lakehouse_workspace}' workspace."
96
+ f"{icons.yellow_dot} The following tables exist in the '{dataset}' semantic model within the '{workspace}' workspace"
97
+ f" but do not exist in the '{lakehouse_name}' lakehouse within the '{lakehouse_workspace}' workspace."
96
98
  )
97
99
  display(missingtbls)
98
100
  if len(missingcols) == 0:
99
101
  print(
100
- f"{icons.green_dot} All columns exist in the '{lakehouse}' lakehouse within the '{lakehouse_workspace}' workspace."
102
+ f"{icons.green_dot} All columns exist in the '{lakehouse_name}' lakehouse within the '{lakehouse_workspace}' workspace."
101
103
  )
102
104
  else:
103
105
  print(
104
- f"{icons.yellow_dot} The following columns exist in the '{dataset}' semantic model within the '{workspace}' workspace but do not exist in the '{lakehouse}' lakehouse within the '{lakehouse_workspace}' workspace."
106
+ f"{icons.yellow_dot} The following columns exist in the '{dataset}' semantic model within the '{workspace}' workspace "
107
+ f"but do not exist in the '{lakehouse_name}' lakehouse within the '{lakehouse_workspace}' workspace."
105
108
  )
106
109
  display(missingcols)