semantic-link-labs 0.6.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of semantic-link-labs might be problematic. Click here for more details.

Files changed (103) hide show
  1. semantic_link_labs-0.7.0.dist-info/METADATA +148 -0
  2. semantic_link_labs-0.7.0.dist-info/RECORD +111 -0
  3. {semantic_link_labs-0.6.0.dist-info → semantic_link_labs-0.7.0.dist-info}/WHEEL +1 -1
  4. sempy_labs/__init__.py +26 -2
  5. sempy_labs/_ai.py +3 -65
  6. sempy_labs/_bpa_translation/_translations_am-ET.po +828 -0
  7. sempy_labs/_bpa_translation/_translations_ar-AE.po +860 -0
  8. sempy_labs/_bpa_translation/_translations_cs-CZ.po +894 -0
  9. sempy_labs/_bpa_translation/_translations_da-DK.po +894 -0
  10. sempy_labs/_bpa_translation/_translations_de-DE.po +933 -0
  11. sempy_labs/_bpa_translation/_translations_el-GR.po +936 -0
  12. sempy_labs/_bpa_translation/_translations_es-ES.po +915 -0
  13. sempy_labs/_bpa_translation/_translations_fa-IR.po +883 -0
  14. sempy_labs/_bpa_translation/_translations_fr-FR.po +938 -0
  15. sempy_labs/_bpa_translation/_translations_ga-IE.po +912 -0
  16. sempy_labs/_bpa_translation/_translations_he-IL.po +855 -0
  17. sempy_labs/_bpa_translation/_translations_hi-IN.po +892 -0
  18. sempy_labs/_bpa_translation/_translations_hu-HU.po +910 -0
  19. sempy_labs/_bpa_translation/_translations_is-IS.po +887 -0
  20. sempy_labs/_bpa_translation/_translations_it-IT.po +931 -0
  21. sempy_labs/_bpa_translation/_translations_ja-JP.po +805 -0
  22. sempy_labs/_bpa_translation/_translations_nl-NL.po +924 -0
  23. sempy_labs/_bpa_translation/_translations_pl-PL.po +913 -0
  24. sempy_labs/_bpa_translation/_translations_pt-BR.po +909 -0
  25. sempy_labs/_bpa_translation/_translations_pt-PT.po +904 -0
  26. sempy_labs/_bpa_translation/_translations_ru-RU.po +909 -0
  27. sempy_labs/_bpa_translation/_translations_ta-IN.po +922 -0
  28. sempy_labs/_bpa_translation/_translations_te-IN.po +896 -0
  29. sempy_labs/_bpa_translation/_translations_th-TH.po +873 -0
  30. sempy_labs/_bpa_translation/_translations_zh-CN.po +767 -0
  31. sempy_labs/_bpa_translation/_translations_zu-ZA.po +916 -0
  32. sempy_labs/_clear_cache.py +9 -4
  33. sempy_labs/_generate_semantic_model.py +30 -56
  34. sempy_labs/_helper_functions.py +358 -14
  35. sempy_labs/_icons.py +10 -1
  36. sempy_labs/_list_functions.py +478 -237
  37. sempy_labs/_model_bpa.py +194 -18
  38. sempy_labs/_model_bpa_bulk.py +363 -0
  39. sempy_labs/_model_bpa_rules.py +4 -4
  40. sempy_labs/_model_dependencies.py +12 -10
  41. sempy_labs/_one_lake_integration.py +7 -7
  42. sempy_labs/_query_scale_out.py +45 -66
  43. sempy_labs/_refresh_semantic_model.py +7 -0
  44. sempy_labs/_translations.py +154 -1
  45. sempy_labs/_vertipaq.py +103 -90
  46. sempy_labs/directlake/__init__.py +5 -1
  47. sempy_labs/directlake/_directlake_schema_compare.py +27 -31
  48. sempy_labs/directlake/_directlake_schema_sync.py +55 -66
  49. sempy_labs/directlake/_dl_helper.py +233 -0
  50. sempy_labs/directlake/_get_directlake_lakehouse.py +6 -7
  51. sempy_labs/directlake/_get_shared_expression.py +1 -1
  52. sempy_labs/directlake/_guardrails.py +17 -13
  53. sempy_labs/directlake/_update_directlake_partition_entity.py +54 -30
  54. sempy_labs/directlake/_warm_cache.py +1 -1
  55. sempy_labs/lakehouse/_get_lakehouse_tables.py +61 -69
  56. sempy_labs/lakehouse/_lakehouse.py +3 -2
  57. sempy_labs/lakehouse/_shortcuts.py +1 -1
  58. sempy_labs/migration/_create_pqt_file.py +174 -182
  59. sempy_labs/migration/_migrate_calctables_to_lakehouse.py +236 -268
  60. sempy_labs/migration/_migrate_calctables_to_semantic_model.py +75 -73
  61. sempy_labs/migration/_migrate_model_objects_to_semantic_model.py +442 -426
  62. sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py +91 -97
  63. sempy_labs/migration/_refresh_calc_tables.py +92 -101
  64. sempy_labs/report/_BPAReportTemplate.json +232 -0
  65. sempy_labs/report/__init__.py +6 -2
  66. sempy_labs/report/_bpareporttemplate/.pbi/localSettings.json +9 -0
  67. sempy_labs/report/_bpareporttemplate/.platform +11 -0
  68. sempy_labs/report/_bpareporttemplate/StaticResources/SharedResources/BaseThemes/CY24SU06.json +710 -0
  69. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/page.json +11 -0
  70. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/1b08bce3bebabb0a27a8/visual.json +191 -0
  71. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/2f22ddb70c301693c165/visual.json +438 -0
  72. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/3b1182230aa6c600b43a/visual.json +127 -0
  73. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/58577ba6380c69891500/visual.json +576 -0
  74. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/a2a8fa5028b3b776c96c/visual.json +207 -0
  75. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/adfd47ef30652707b987/visual.json +506 -0
  76. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/b6a80ee459e716e170b1/visual.json +127 -0
  77. sempy_labs/report/_bpareporttemplate/definition/pages/01d72098bda5055bd500/visuals/ce3130a721c020cc3d81/visual.json +513 -0
  78. sempy_labs/report/_bpareporttemplate/definition/pages/92735ae19b31712208ad/page.json +8 -0
  79. sempy_labs/report/_bpareporttemplate/definition/pages/92735ae19b31712208ad/visuals/66e60dfb526437cd78d1/visual.json +112 -0
  80. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/page.json +11 -0
  81. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/07deb8bce824e1be37d7/visual.json +513 -0
  82. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/0b1c68838818b32ad03b/visual.json +352 -0
  83. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/0c171de9d2683d10b930/visual.json +37 -0
  84. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/0efa01be0510e40a645e/visual.json +542 -0
  85. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/6bf2f0eb830ab53cc668/visual.json +221 -0
  86. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/88d8141cb8500b60030c/visual.json +127 -0
  87. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/a753273590beed656a03/visual.json +576 -0
  88. sempy_labs/report/_bpareporttemplate/definition/pages/c597da16dc7e63222a82/visuals/b8fdc82cddd61ac447bc/visual.json +127 -0
  89. sempy_labs/report/_bpareporttemplate/definition/pages/d37dce724a0ccc30044b/page.json +9 -0
  90. sempy_labs/report/_bpareporttemplate/definition/pages/d37dce724a0ccc30044b/visuals/ce8532a7e25020271077/visual.json +38 -0
  91. sempy_labs/report/_bpareporttemplate/definition/pages/pages.json +10 -0
  92. sempy_labs/report/_bpareporttemplate/definition/report.json +176 -0
  93. sempy_labs/report/_bpareporttemplate/definition/version.json +4 -0
  94. sempy_labs/report/_bpareporttemplate/definition.pbir +14 -0
  95. sempy_labs/report/_generate_report.py +255 -139
  96. sempy_labs/report/_report_functions.py +26 -33
  97. sempy_labs/report/_report_rebind.py +31 -26
  98. sempy_labs/tom/_model.py +75 -58
  99. semantic_link_labs-0.6.0.dist-info/METADATA +0 -22
  100. semantic_link_labs-0.6.0.dist-info/RECORD +0 -54
  101. sempy_labs/directlake/_fallback.py +0 -60
  102. {semantic_link_labs-0.6.0.dist-info → semantic_link_labs-0.7.0.dist-info}/LICENSE +0 -0
  103. {semantic_link_labs-0.6.0.dist-info → semantic_link_labs-0.7.0.dist-info}/top_level.txt +0 -0
sempy_labs/_vertipaq.py CHANGED
@@ -9,12 +9,14 @@ import warnings
9
9
  from pyspark.sql import SparkSession
10
10
  from sempy_labs._helper_functions import (
11
11
  format_dax_object_name,
12
- get_direct_lake_sql_endpoint,
13
12
  resolve_lakehouse_name,
13
+ resolve_dataset_id,
14
+ save_as_delta_table,
15
+ resolve_workspace_capacity,
14
16
  )
15
17
  from sempy_labs._list_functions import list_relationships
16
- from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables
17
- from sempy_labs.lakehouse._lakehouse import lakehouse_attached
18
+ from sempy_labs.lakehouse import lakehouse_attached, get_lakehouse_tables
19
+ from sempy_labs.directlake import get_direct_lake_source
18
20
  from typing import Optional
19
21
  from sempy._utils._log import log
20
22
  import sempy_labs._icons as icons
@@ -25,8 +27,8 @@ def vertipaq_analyzer(
25
27
  dataset: str,
26
28
  workspace: Optional[str] = None,
27
29
  export: Optional[str] = None,
28
- lakehouse_workspace: Optional[str] = None,
29
30
  read_stats_from_data: Optional[bool] = False,
31
+ **kwargs,
30
32
  ):
31
33
  """
32
34
  Displays an HTML visualization of the Vertipaq Analyzer statistics from a semantic model.
@@ -43,10 +45,6 @@ def vertipaq_analyzer(
43
45
  Specifying 'zip' will export the results to a zip file in your lakehouse (which can be imported using the import_vertipaq_analyzer function.
44
46
  Specifying 'table' will export the results to delta tables (appended) in your lakehouse.
45
47
  Default value: None.
46
- lakehouse_workspace : str, default=None
47
- The Fabric workspace used by the lakehouse (for Direct Lake semantic models).
48
- Defaults to None which resolves to the workspace of the attached lakehouse
49
- or if no lakehouse attached, resolves to the workspace of the notebook.
50
48
  read_stats_from_data : bool, default=False
51
49
  Setting this parameter to true has the function get Column Cardinality and Missing Rows using DAX (Direct Lake semantic models achieve this using a Spark query to the lakehouse).
52
50
 
@@ -57,6 +55,12 @@ def vertipaq_analyzer(
57
55
 
58
56
  from sempy_labs.tom import connect_semantic_model
59
57
 
58
+ if "lakehouse_workspace" in kwargs:
59
+ print(
60
+ f"{icons.info} The 'lakehouse_workspace' parameter has been deprecated as it is no longer necessary. Please remove this parameter from the function going forward."
61
+ )
62
+ del kwargs["lakehouse_workspace"]
63
+
60
64
  pd.options.mode.copy_on_write = True
61
65
  warnings.filterwarnings(
62
66
  "ignore", message="createDataFrame attempted Arrow optimization*"
@@ -64,9 +68,6 @@ def vertipaq_analyzer(
64
68
 
65
69
  workspace = fabric.resolve_workspace_name(workspace)
66
70
 
67
- if lakehouse_workspace is None:
68
- lakehouse_workspace = workspace
69
-
70
71
  dfT = fabric.list_tables(dataset=dataset, extended=True, workspace=workspace)
71
72
  dfT.rename(columns={"Name": "Table Name"}, inplace=True)
72
73
  dfC = fabric.list_columns(dataset=dataset, extended=True, workspace=workspace)
@@ -77,6 +78,9 @@ def vertipaq_analyzer(
77
78
  dfR["From Object"] = format_dax_object_name(dfR["From Table"], dfR["From Column"])
78
79
  dfR["To Object"] = format_dax_object_name(dfR["To Table"], dfR["To Column"])
79
80
  dfP = fabric.list_partitions(dataset=dataset, extended=True, workspace=workspace)
81
+ artifact_type, lakehouse_name, lakehouse_id, lakehouse_workspace_id = (
82
+ get_direct_lake_source(dataset=dataset, workspace=workspace)
83
+ )
80
84
 
81
85
  with connect_semantic_model(
82
86
  dataset=dataset, readonly=True, workspace=workspace
@@ -91,7 +95,7 @@ def vertipaq_analyzer(
91
95
 
92
96
  # Direct Lake
93
97
  if read_stats_from_data:
94
- if is_direct_lake:
98
+ if is_direct_lake and artifact_type == "Lakehouse":
95
99
  dfC = pd.merge(
96
100
  dfC,
97
101
  dfP[["Table Name", "Query", "Source Type"]],
@@ -102,69 +106,54 @@ def vertipaq_analyzer(
102
106
  (dfC["Source Type"] == "Entity")
103
107
  & (~dfC["Column Name"].str.startswith("RowNumber-"))
104
108
  ]
105
- sqlEndpointId = get_direct_lake_sql_endpoint(dataset, workspace)
106
109
 
107
- # Get lakehouse name from SQL Endpoint ID
108
- dfI = fabric.list_items(workspace=lakehouse_workspace, type="SQLEndpoint")
109
- dfI_filt = dfI[(dfI["Id"] == sqlEndpointId)]
110
-
111
- if len(dfI_filt) == 0:
112
- raise ValueError(
113
- f"{icons.red_dot} The lakehouse (SQL Endpoint) used by the '{dataset}' semantic model does not reside in the '{lakehouse_workspace}' workspace."
114
- "Please update the lakehouse_workspace parameter."
110
+ object_workspace = fabric.resolve_workspace_name(lakehouse_workspace_id)
111
+ current_workspace_id = fabric.get_workspace_id()
112
+ if current_workspace_id != lakehouse_workspace_id:
113
+ lakeTables = get_lakehouse_tables(
114
+ lakehouse=lakehouse_name, workspace=object_workspace
115
115
  )
116
- else:
117
- lakehouseName = dfI_filt["Display Name"].iloc[0]
118
-
119
- current_workspace_id = fabric.get_workspace_id()
120
- current_workspace = fabric.resolve_workspace_name(current_workspace_id)
121
- if current_workspace != lakehouse_workspace:
122
- lakeTables = get_lakehouse_tables(
123
- lakehouse=lakehouseName, workspace=lakehouse_workspace
124
- )
125
-
126
- sql_statements = []
127
- spark = SparkSession.builder.getOrCreate()
128
- # Loop through tables
129
- for lakeTName in dfC_flt["Query"].unique():
130
- query = "SELECT "
131
- columns_in_table = dfC_flt.loc[
132
- dfC_flt["Query"] == lakeTName, "Source"
133
- ].unique()
134
-
135
- # Loop through columns within those tables
136
- for scName in columns_in_table:
137
- query = query + f"COUNT(DISTINCT({scName})) AS {scName}, "
138
-
139
- query = query[:-2]
140
- if lakehouse_workspace == current_workspace:
141
- query = query + f" FROM {lakehouseName}.{lakeTName}"
142
- else:
143
- lakeTables_filt = lakeTables[
144
- lakeTables["Table Name"] == lakeTName
145
- ]
146
- tPath = lakeTables_filt["Location"].iloc[0]
147
-
148
- df = spark.read.format("delta").load(tPath)
149
- tempTableName = "delta_table_" + lakeTName
150
- df.createOrReplaceTempView(tempTableName)
151
- query = query + f" FROM {tempTableName}"
152
- sql_statements.append((lakeTName, query))
153
-
154
- for o in sql_statements:
155
- tName = o[0]
156
- query = o[1]
157
-
158
- df = spark.sql(query)
159
-
160
- for column in df.columns:
161
- x = df.collect()[0][column]
162
- for i, r in dfC.iterrows():
163
- if r["Query"] == tName and r["Source"] == column:
164
- dfC.at[i, "Cardinality"] = x
165
-
166
- # Remove column added temporarily
167
- dfC.drop(columns=["Query", "Source Type"], inplace=True)
116
+
117
+ sql_statements = []
118
+ spark = SparkSession.builder.getOrCreate()
119
+ # Loop through tables
120
+ for lakeTName in dfC_flt["Query"].unique():
121
+ query = "SELECT "
122
+ columns_in_table = dfC_flt.loc[
123
+ dfC_flt["Query"] == lakeTName, "Source"
124
+ ].unique()
125
+
126
+ # Loop through columns within those tables
127
+ for scName in columns_in_table:
128
+ query = query + f"COUNT(DISTINCT(`{scName}`)) AS `{scName}`, "
129
+
130
+ query = query[:-2]
131
+ if lakehouse_workspace_id == current_workspace_id:
132
+ query = query + f" FROM {lakehouse_name}.{lakeTName}"
133
+ else:
134
+ lakeTables_filt = lakeTables[lakeTables["Table Name"] == lakeTName]
135
+ tPath = lakeTables_filt["Location"].iloc[0]
136
+
137
+ df = spark.read.format("delta").load(tPath)
138
+ tempTableName = "delta_table_" + lakeTName
139
+ df.createOrReplaceTempView(tempTableName)
140
+ query = query + f" FROM {tempTableName}"
141
+ sql_statements.append((lakeTName, query))
142
+
143
+ for o in sql_statements:
144
+ tName = o[0]
145
+ query = o[1]
146
+
147
+ df = spark.sql(query)
148
+
149
+ for column in df.columns:
150
+ x = df.collect()[0][column]
151
+ for i, r in dfC.iterrows():
152
+ if r["Query"] == tName and r["Source"] == column:
153
+ dfC.at[i, "Cardinality"] = x
154
+
155
+ # Remove column added temporarily
156
+ dfC.drop(columns=["Query", "Source Type"], inplace=True)
168
157
 
169
158
  # Direct Lake missing rows
170
159
  dfR = pd.merge(
@@ -211,11 +200,11 @@ def vertipaq_analyzer(
211
200
  toTable = r["To Lake Table"]
212
201
  toColumn = r["To Lake Column"]
213
202
 
214
- if lakehouse_workspace == current_workspace:
203
+ if lakehouse_workspace_id == current_workspace_id:
215
204
  query = f"select count(f.{fromColumn}) as {fromColumn}\nfrom {fromTable} as f\nleft join {toTable} as c on f.{fromColumn} = c.{toColumn}\nwhere c.{toColumn} is null"
216
205
  else:
217
- tempTableFrom = "delta_table_" + fromTable
218
- tempTableTo = "delta_table_" + toTable
206
+ tempTableFrom = f"delta_table_{fromTable}"
207
+ tempTableTo = f"delta_table_{toTable}"
219
208
 
220
209
  query = f"select count(f.{fromColumn}) as {fromColumn}\nfrom {tempTableFrom} as f\nleft join {tempTableTo} as c on f.{fromColumn} = c.{toColumn}\nwhere c.{toColumn} is null"
221
210
 
@@ -226,7 +215,7 @@ def vertipaq_analyzer(
226
215
  dfR.at[i, "Missing Rows"] = missingRows
227
216
 
228
217
  dfR["Missing Rows"] = dfR["Missing Rows"].astype(int)
229
- else:
218
+ elif not is_direct_lake:
230
219
  # Calculate missing rows using DAX for non-direct lake
231
220
  for i, r in dfR.iterrows():
232
221
  fromTable = r["From Table"]
@@ -395,6 +384,8 @@ def vertipaq_analyzer(
395
384
  by="Used Size", ascending=False
396
385
  )
397
386
  dfH_filt.reset_index(drop=True, inplace=True)
387
+ dfH_filt.fillna({"Used Size": 0}, inplace=True)
388
+ dfH_filt["Used Size"] = dfH_filt["Used Size"].astype(int)
398
389
  export_Hier = dfH_filt.copy()
399
390
  intList = ["Used Size"]
400
391
  dfH_filt[intList] = dfH_filt[intList].applymap("{:,}".format)
@@ -420,6 +411,7 @@ def vertipaq_analyzer(
420
411
  index=[0],
421
412
  )
422
413
  dfModel.reset_index(drop=True, inplace=True)
414
+ dfModel["Default Mode"] = dfModel["Default Mode"].astype(str)
423
415
  export_Model = dfModel.copy()
424
416
  intList = ["Total Size", "Table Count", "Column Count"]
425
417
  dfModel[intList] = dfModel[intList].applymap("{:,}".format)
@@ -442,8 +434,7 @@ def vertipaq_analyzer(
442
434
 
443
435
  # Export vertipaq to delta tables in lakehouse
444
436
  if export in ["table", "zip"]:
445
- lakeAttach = lakehouse_attached()
446
- if lakeAttach is False:
437
+ if not lakehouse_attached():
447
438
  raise ValueError(
448
439
  f"{icons.red_dot} In order to save the Vertipaq Analyzer results, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook."
449
440
  )
@@ -452,12 +443,13 @@ def vertipaq_analyzer(
452
443
  spark = SparkSession.builder.getOrCreate()
453
444
 
454
445
  lakehouse_id = fabric.get_lakehouse_id()
446
+ lake_workspace = fabric.resolve_workspace_name()
455
447
  lakehouse = resolve_lakehouse_name(
456
- lakehouse_id=lakehouse_id, workspace=workspace
448
+ lakehouse_id=lakehouse_id, workspace=lake_workspace
457
449
  )
458
450
  lakeTName = "vertipaq_analyzer_model"
459
451
 
460
- lakeT = get_lakehouse_tables(lakehouse=lakehouse, workspace=workspace)
452
+ lakeT = get_lakehouse_tables(lakehouse=lakehouse, workspace=lake_workspace)
461
453
  lakeT_filt = lakeT[lakeT["Table Name"] == lakeTName]
462
454
 
463
455
  query = f"SELECT MAX(RunId) FROM {lakehouse}.{lakeTName}"
@@ -482,24 +474,45 @@ def vertipaq_analyzer(
482
474
  f"{icons.in_progress} Saving Vertipaq Analyzer to delta tables in the lakehouse...\n"
483
475
  )
484
476
  now = datetime.datetime.now()
477
+ dfD = fabric.list_datasets(workspace=workspace, mode="rest")
478
+ dfD_filt = dfD[dfD["Dataset Name"] == dataset]
479
+ configured_by = dfD_filt["Configured By"].iloc[0]
480
+ capacity_id, capacity_name = resolve_workspace_capacity(workspace=workspace)
481
+
485
482
  for key, (obj, df) in dfMap.items():
486
- df["Timestamp"] = now
483
+ df["Capacity Name"] = capacity_name
484
+ df["Capacity Id"] = capacity_id
485
+ df["Configured By"] = configured_by
487
486
  df["Workspace Name"] = workspace
487
+ df["Workspace Id"] = fabric.resolve_workspace_id(workspace)
488
488
  df["Dataset Name"] = dataset
489
+ df["Dataset Id"] = resolve_dataset_id(dataset, workspace)
489
490
  df["RunId"] = runId
491
+ df["Timestamp"] = now
490
492
 
491
- colName = "Workspace Name"
493
+ colName = "Capacity Name"
492
494
  df.insert(0, colName, df.pop(colName))
493
- colName = "Dataset Name"
495
+ colName = "Capacity Id"
494
496
  df.insert(1, colName, df.pop(colName))
497
+ colName = "Workspace Name"
498
+ df.insert(2, colName, df.pop(colName))
499
+ colName = "Workspace Id"
500
+ df.insert(3, colName, df.pop(colName))
501
+ colName = "Dataset Name"
502
+ df.insert(4, colName, df.pop(colName))
503
+ colName = "Dataset Id"
504
+ df.insert(5, colName, df.pop(colName))
505
+ colName = "Configured By"
506
+ df.insert(6, colName, df.pop(colName))
495
507
 
496
508
  df.columns = df.columns.str.replace(" ", "_")
497
509
 
498
510
  delta_table_name = f"VertipaqAnalyzer_{obj}".lower()
499
- spark_df = spark.createDataFrame(df)
500
- spark_df.write.mode("append").format("delta").saveAsTable(delta_table_name)
501
- print(
502
- f"{icons.bullet} Vertipaq Analyzer results for '{obj}' have been appended to the '{delta_table_name}' delta table."
511
+ save_as_delta_table(
512
+ dataframe=df,
513
+ delta_table_name=delta_table_name,
514
+ write_mode="append",
515
+ merge_schema=True,
503
516
  )
504
517
 
505
518
  # Export vertipaq to zip file within the lakehouse
@@ -525,13 +538,13 @@ def vertipaq_analyzer(
525
538
 
526
539
  # Create CSV files based on dataframes
527
540
  for fileName, df in dataFrames.items():
528
- filePath = os.path.join(subFolderPath, fileName + ext)
541
+ filePath = os.path.join(subFolderPath, f"{fileName}{ext}")
529
542
  df.to_csv(filePath, index=False)
530
543
 
531
544
  # Create a zip file and add CSV files to it
532
545
  with zipfile.ZipFile(zipFilePath, "w") as zipf:
533
546
  for fileName in dataFrames:
534
- filePath = os.path.join(subFolderPath, fileName + ext)
547
+ filePath = os.path.join(subFolderPath, f"{fileName}{ext}")
535
548
  zipf.write(filePath, os.path.basename(filePath))
536
549
 
537
550
  # Clean up: remove the individual CSV files
@@ -1,7 +1,9 @@
1
1
  from sempy_labs.directlake._directlake_schema_compare import direct_lake_schema_compare
2
2
  from sempy_labs.directlake._directlake_schema_sync import direct_lake_schema_sync
3
- from sempy_labs.directlake._fallback import (
3
+ from sempy_labs.directlake._dl_helper import (
4
4
  check_fallback_reason,
5
+ generate_direct_lake_semantic_model,
6
+ get_direct_lake_source,
5
7
  )
6
8
  from sempy_labs.directlake._get_directlake_lakehouse import get_direct_lake_lakehouse
7
9
  from sempy_labs.directlake._get_shared_expression import get_shared_expression
@@ -44,4 +46,6 @@ __all__ = [
44
46
  "warm_direct_lake_cache_isresident",
45
47
  "warm_direct_lake_cache_perspective",
46
48
  "add_table_to_direct_lake_semantic_model",
49
+ "generate_direct_lake_semantic_model",
50
+ "get_direct_lake_source",
47
51
  ]
@@ -2,11 +2,10 @@ import sempy.fabric as fabric
2
2
  import pandas as pd
3
3
  from sempy_labs._helper_functions import (
4
4
  format_dax_object_name,
5
- resolve_lakehouse_name,
6
- get_direct_lake_sql_endpoint,
7
5
  )
8
6
  from IPython.display import display
9
- from sempy_labs.lakehouse._get_lakehouse_columns import get_lakehouse_columns
7
+ from sempy_labs.lakehouse import get_lakehouse_columns
8
+ from sempy_labs.directlake._dl_helper import get_direct_lake_source
10
9
  from sempy_labs._list_functions import list_tables
11
10
  from typing import Optional
12
11
  import sempy_labs._icons as icons
@@ -17,8 +16,7 @@ from sempy._utils._log import log
17
16
  def direct_lake_schema_compare(
18
17
  dataset: str,
19
18
  workspace: Optional[str] = None,
20
- lakehouse: Optional[str] = None,
21
- lakehouse_workspace: Optional[str] = None,
19
+ **kwargs,
22
20
  ):
23
21
  """
24
22
  Checks that all the tables in a Direct Lake semantic model map to tables in their corresponding lakehouse and that the columns in each table exist.
@@ -31,35 +29,33 @@ def direct_lake_schema_compare(
31
29
  The Fabric workspace name.
32
30
  Defaults to None which resolves to the workspace of the attached lakehouse
33
31
  or if no lakehouse attached, resolves to the workspace of the notebook.
34
- lakehouse : str, default=None
35
- The Fabric lakehouse used by the Direct Lake semantic model.
36
- Defaults to None which resolves to the lakehouse attached to the notebook.
37
- lakehouse_workspace : str, default=None
38
- The Fabric workspace used by the lakehouse.
39
- Defaults to None which resolves to the workspace of the attached lakehouse
40
- or if no lakehouse attached, resolves to the workspace of the notebook.
41
32
  """
42
33
 
43
- workspace = fabric.resolve_workspace_name(workspace)
44
-
45
- if lakehouse_workspace is None:
46
- lakehouse_workspace = workspace
34
+ if "lakehouse" in kwargs:
35
+ print(
36
+ "The 'lakehouse' parameter has been deprecated as it is no longer necessary. Please remove this parameter from the function going forward."
37
+ )
38
+ del kwargs["lakehouse"]
39
+ if "lakehouse_workspace" in kwargs:
40
+ print(
41
+ "The 'lakehouse_workspace' parameter has been deprecated as it is no longer necessary. Please remove this parameter from the function going forward."
42
+ )
43
+ del kwargs["lakehouse_workspace"]
47
44
 
48
- if lakehouse is None:
49
- lakehouse_id = fabric.get_lakehouse_id()
50
- lakehouse = resolve_lakehouse_name(lakehouse_id, lakehouse_workspace)
45
+ workspace = fabric.resolve_workspace_name(workspace)
51
46
 
52
- dfP = fabric.list_partitions(dataset=dataset, workspace=workspace)
53
- sqlEndpointId = get_direct_lake_sql_endpoint(dataset, workspace)
54
- dfI = fabric.list_items(workspace=lakehouse_workspace, type="SQLEndpoint")
55
- dfI_filt = dfI[(dfI["Id"] == sqlEndpointId)]
47
+ artifact_type, lakehouse_name, lakehouse_id, lakehouse_workspace_id = (
48
+ get_direct_lake_source(dataset=dataset, workspace=workspace)
49
+ )
50
+ lakehouse_workspace = fabric.resolve_workspace_name(lakehouse_workspace_id)
56
51
 
57
- if len(dfI_filt) == 0:
52
+ if artifact_type == "Warehouse":
58
53
  raise ValueError(
59
- f"{icons.red_dot} The SQL Endpoint in the '{dataset}' semantic model in the '{workspace} workspace does not point to the "
60
- f"'{lakehouse}' lakehouse in the '{lakehouse_workspace}' workspace as specified."
54
+ f"{icons.red_dot} This function is only valid for Direct Lake semantic models which source from Fabric lakehouses (not warehouses)."
61
55
  )
62
56
 
57
+ dfP = fabric.list_partitions(dataset=dataset, workspace=workspace)
58
+
63
59
  if not any(r["Mode"] == "DirectLake" for i, r in dfP.iterrows()):
64
60
  raise ValueError(
65
61
  f"{icons.red_dot} The '{dataset}' semantic model is not in Direct Lake mode."
@@ -67,7 +63,7 @@ def direct_lake_schema_compare(
67
63
 
68
64
  dfT = list_tables(dataset, workspace)
69
65
  dfC = fabric.list_columns(dataset=dataset, workspace=workspace)
70
- lc = get_lakehouse_columns(lakehouse, lakehouse_workspace)
66
+ lc = get_lakehouse_columns(lakehouse_name, lakehouse_workspace)
71
67
 
72
68
  dfT.rename(columns={"Type": "Table Type"}, inplace=True)
73
69
  dfP_filt = dfP[dfP["Mode"] == "DirectLake"]
@@ -93,21 +89,21 @@ def direct_lake_schema_compare(
93
89
 
94
90
  if len(missingtbls) == 0:
95
91
  print(
96
- f"{icons.green_dot} All tables exist in the '{lakehouse}' lakehouse within the '{lakehouse_workspace}' workspace."
92
+ f"{icons.green_dot} All tables exist in the '{lakehouse_name}' lakehouse within the '{lakehouse_workspace}' workspace."
97
93
  )
98
94
  else:
99
95
  print(
100
96
  f"{icons.yellow_dot} The following tables exist in the '{dataset}' semantic model within the '{workspace}' workspace"
101
- f" but do not exist in the '{lakehouse}' lakehouse within the '{lakehouse_workspace}' workspace."
97
+ f" but do not exist in the '{lakehouse_name}' lakehouse within the '{lakehouse_workspace}' workspace."
102
98
  )
103
99
  display(missingtbls)
104
100
  if len(missingcols) == 0:
105
101
  print(
106
- f"{icons.green_dot} All columns exist in the '{lakehouse}' lakehouse within the '{lakehouse_workspace}' workspace."
102
+ f"{icons.green_dot} All columns exist in the '{lakehouse_name}' lakehouse within the '{lakehouse_workspace}' workspace."
107
103
  )
108
104
  else:
109
105
  print(
110
106
  f"{icons.yellow_dot} The following columns exist in the '{dataset}' semantic model within the '{workspace}' workspace "
111
- f"but do not exist in the '{lakehouse}' lakehouse within the '{lakehouse_workspace}' workspace."
107
+ f"but do not exist in the '{lakehouse_name}' lakehouse within the '{lakehouse_workspace}' workspace."
112
108
  )
113
109
  display(missingcols)
@@ -1,13 +1,8 @@
1
1
  import sempy
2
2
  import sempy.fabric as fabric
3
- import pandas as pd
4
- from sempy_labs.lakehouse._get_lakehouse_columns import get_lakehouse_columns
3
+ from sempy_labs.lakehouse import get_lakehouse_columns
4
+ from sempy_labs.directlake._dl_helper import get_direct_lake_source
5
5
  from sempy_labs.tom import connect_semantic_model
6
- from sempy_labs._helper_functions import (
7
- format_dax_object_name,
8
- resolve_lakehouse_name,
9
- get_direct_lake_sql_endpoint,
10
- )
11
6
  from typing import Optional
12
7
  from sempy._utils._log import log
13
8
  import sempy_labs._icons as icons
@@ -18,8 +13,7 @@ def direct_lake_schema_sync(
18
13
  dataset: str,
19
14
  workspace: Optional[str] = None,
20
15
  add_to_model: Optional[bool] = False,
21
- lakehouse: Optional[str] = None,
22
- lakehouse_workspace: Optional[str] = None,
16
+ **kwargs,
23
17
  ):
24
18
  """
25
19
  Shows/adds columns which exist in the lakehouse but do not exist in the semantic model (only for tables in the semantic model).
@@ -34,84 +28,79 @@ def direct_lake_schema_sync(
34
28
  or if no lakehouse attached, resolves to the workspace of the notebook.
35
29
  add_to_model : bool, default=False
36
30
  If set to True, columns which exist in the lakehouse but do not exist in the semantic model are added to the semantic model. No new tables are added.
37
- lakehouse : str, default=None
38
- The Fabric lakehouse used by the Direct Lake semantic model.
39
- Defaults to None which resolves to the lakehouse attached to the notebook.
40
- lakehouse_workspace : str, default=None
41
- The Fabric workspace used by the lakehouse.
42
- Defaults to None which resolves to the workspace of the attached lakehouse
43
- or if no lakehouse attached, resolves to the workspace of the notebook.
44
31
  """
45
32
 
46
33
  sempy.fabric._client._utils._init_analysis_services()
47
34
  import Microsoft.AnalysisServices.Tabular as TOM
48
35
  import System
49
36
 
50
- workspace = fabric.resolve_workspace_name(workspace)
51
-
52
- if lakehouse_workspace is None:
53
- lakehouse_workspace = workspace
54
-
55
- if lakehouse is None:
56
- lakehouse_id = fabric.get_lakehouse_id()
57
- lakehouse = resolve_lakehouse_name(lakehouse_id, lakehouse_workspace)
37
+ if "lakehouse" in kwargs:
38
+ print(
39
+ "The 'lakehouse' parameter has been deprecated as it is no longer necessary. Please remove this parameter from the function going forward."
40
+ )
41
+ del kwargs["lakehouse"]
42
+ if "lakehouse_workspace" in kwargs:
43
+ print(
44
+ "The 'lakehouse_workspace' parameter has been deprecated as it is no longer necessary. Please remove this parameter from the function going forward."
45
+ )
46
+ del kwargs["lakehouse_workspace"]
58
47
 
59
- sqlEndpointId = get_direct_lake_sql_endpoint(dataset, workspace)
48
+ workspace = fabric.resolve_workspace_name(workspace)
60
49
 
61
- dfI = fabric.list_items(workspace=lakehouse_workspace, type="SQLEndpoint")
62
- dfI_filt = dfI[(dfI["Id"] == sqlEndpointId)]
50
+ artifact_type, lakehouse_name, lakehouse_id, lakehouse_workspace_id = (
51
+ get_direct_lake_source(dataset=dataset, workspace=workspace)
52
+ )
63
53
 
64
- if len(dfI_filt) == 0:
54
+ if artifact_type == "Warehouse":
65
55
  raise ValueError(
66
- f"{icons.red_dot} The SQL Endpoint in the '{dataset}' semantic model in the '{workspace} workspace does not point to the "
67
- f"'{lakehouse}' lakehouse in the '{lakehouse_workspace}' workspace as specified."
56
+ f"{icons.red_dot} This function is only valid for Direct Lake semantic models which source from Fabric lakehouses (not warehouses)."
68
57
  )
58
+ lakehouse_workspace = fabric.resolve_workspace_name(lakehouse_workspace_id)
69
59
 
70
- dfP = fabric.list_partitions(dataset=dataset, workspace=workspace)
71
- dfP_filt = dfP[dfP["Source Type"] == "Entity"]
72
- dfC = fabric.list_columns(dataset=dataset, workspace=workspace)
73
- dfC_filt = dfC[dfC["Table Name"].isin(dfP_filt["Table Name"].values)]
74
- dfC_filt = pd.merge(
75
- dfC_filt, dfP_filt[["Table Name", "Query"]], on="Table Name", how="left"
76
- )
77
- dfC_filt["Column Object"] = format_dax_object_name(
78
- dfC_filt["Query"], dfC_filt["Source"]
79
- )
60
+ if artifact_type == "Warehouse":
61
+ raise ValueError(
62
+ f"{icons.red_dot} This function is only valid for Direct Lake semantic models which source from Fabric lakehouses (not warehouses)."
63
+ )
80
64
 
81
- lc = get_lakehouse_columns(lakehouse, lakehouse_workspace)
82
- lc_filt = lc[lc["Table Name"].isin(dfP_filt["Query"].values)]
65
+ lc = get_lakehouse_columns(lakehouse_name, lakehouse_workspace)
83
66
 
84
67
  with connect_semantic_model(
85
68
  dataset=dataset, readonly=False, workspace=workspace
86
69
  ) as tom:
87
70
 
88
- for i, r in lc_filt.iterrows():
71
+ for i, r in lc.iterrows():
89
72
  lakeTName = r["Table Name"]
90
73
  lakeCName = r["Column Name"]
91
- fullColName = r["Full Column Name"]
92
74
  dType = r["Data Type"]
93
75
 
94
- if fullColName not in dfC_filt["Column Object"].values:
95
- dfL = dfP_filt[dfP_filt["Query"] == lakeTName]
96
- tName = dfL["Table Name"].iloc[0]
97
- if add_to_model:
98
- col = TOM.DataColumn()
99
- col.Name = lakeCName
100
- col.SourceColumn = lakeCName
101
- dt = icons.data_type_mapping.get(dType)
102
- try:
103
- col.DataType = System.Enum.Parse(TOM.DataType, dt)
104
- except Exception as e:
105
- raise ValueError(
106
- f"{icons.red_dot} Failed to map '{dType}' data type to the semantic model data types."
107
- ) from e
108
-
109
- tom.model.Tables[tName].Columns.Add(col)
110
- print(
111
- f"{icons.green_dot} The '{lakeCName}' column has been added to the '{tName}' table as a '{dt}' "
112
- f"data type within the '{dataset}' semantic model within the '{workspace}' workspace."
113
- )
114
- else:
76
+ if any(
77
+ p.Source.EntityName == lakeTName
78
+ for p in tom.all_partitions()
79
+ if p.SourceType == TOM.PartitionSourceType.Entity
80
+ ):
81
+ table_name = next(
82
+ t.Name
83
+ for t in tom.model.Tables
84
+ for p in t.Partitions
85
+ if p.SourceType == TOM.PartitionSourceType.Entity
86
+ and p.Source.EntityName == lakeTName
87
+ )
88
+
89
+ if not any(
90
+ c.SourceColumn == lakeCName and c.Parent.Name == table_name
91
+ for c in tom.all_columns()
92
+ ):
115
93
  print(
116
- f"{icons.yellow_dot} The {fullColName} column exists in the lakehouse but not in the '{tName}' table in the '{dataset}' semantic model within the '{workspace}' workspace."
94
+ f"{icons.yellow_dot} The '{lakeCName}' column exists in the '{lakeTName}' lakehouse table but not in the '{dataset}' semantic model within the '{workspace}' workspace."
117
95
  )
96
+ if add_to_model:
97
+ dt = icons.data_type_mapping.get(dType)
98
+ tom.add_data_column(
99
+ table_name=table_name,
100
+ column_name=lakeCName,
101
+ source_column=lakeCName,
102
+ data_type=System.Enum.Parse(TOM.DataType, dt),
103
+ )
104
+ print(
105
+ f"{icons.green_dot} The '{lakeCName}' column in the '{lakeTName}' lakehouse table was added to the '{dataset}' semantic model within the '{workspace}' workspace."
106
+ )