semantic-link-labs 0.9.9__py3-none-any.whl → 0.9.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of semantic-link-labs might be problematic. Click here for more details.

Files changed (49) hide show
  1. {semantic_link_labs-0.9.9.dist-info → semantic_link_labs-0.9.11.dist-info}/METADATA +30 -22
  2. {semantic_link_labs-0.9.9.dist-info → semantic_link_labs-0.9.11.dist-info}/RECORD +47 -40
  3. {semantic_link_labs-0.9.9.dist-info → semantic_link_labs-0.9.11.dist-info}/WHEEL +1 -1
  4. sempy_labs/__init__.py +28 -1
  5. sempy_labs/_clear_cache.py +12 -0
  6. sempy_labs/_dax.py +8 -2
  7. sempy_labs/_delta_analyzer.py +17 -26
  8. sempy_labs/_environments.py +19 -1
  9. sempy_labs/_generate_semantic_model.py +7 -8
  10. sempy_labs/_helper_functions.py +351 -151
  11. sempy_labs/_kql_databases.py +18 -0
  12. sempy_labs/_kusto.py +137 -0
  13. sempy_labs/_list_functions.py +18 -36
  14. sempy_labs/_model_bpa_rules.py +13 -3
  15. sempy_labs/_notebooks.py +44 -11
  16. sempy_labs/_semantic_models.py +93 -1
  17. sempy_labs/_sql.py +3 -2
  18. sempy_labs/_tags.py +194 -0
  19. sempy_labs/_variable_libraries.py +89 -0
  20. sempy_labs/_vertipaq.py +6 -6
  21. sempy_labs/_vpax.py +386 -0
  22. sempy_labs/_warehouses.py +3 -3
  23. sempy_labs/admin/__init__.py +14 -0
  24. sempy_labs/admin/_artifacts.py +3 -3
  25. sempy_labs/admin/_capacities.py +161 -1
  26. sempy_labs/admin/_dataflows.py +45 -0
  27. sempy_labs/admin/_items.py +16 -11
  28. sempy_labs/admin/_tags.py +126 -0
  29. sempy_labs/admin/_tenant.py +5 -5
  30. sempy_labs/directlake/_generate_shared_expression.py +29 -26
  31. sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py +55 -5
  32. sempy_labs/dotnet_lib/dotnet.runtime.config.json +10 -0
  33. sempy_labs/lakehouse/__init__.py +16 -0
  34. sempy_labs/lakehouse/_blobs.py +115 -63
  35. sempy_labs/lakehouse/_get_lakehouse_columns.py +41 -18
  36. sempy_labs/lakehouse/_get_lakehouse_tables.py +62 -47
  37. sempy_labs/lakehouse/_helper.py +211 -0
  38. sempy_labs/lakehouse/_lakehouse.py +45 -36
  39. sempy_labs/lakehouse/_livy_sessions.py +137 -0
  40. sempy_labs/migration/_migrate_calctables_to_lakehouse.py +7 -12
  41. sempy_labs/migration/_refresh_calc_tables.py +7 -6
  42. sempy_labs/report/_download_report.py +1 -1
  43. sempy_labs/report/_generate_report.py +5 -1
  44. sempy_labs/report/_reportwrapper.py +31 -18
  45. sempy_labs/tom/_model.py +104 -35
  46. sempy_labs/report/_bpareporttemplate/.pbi/localSettings.json +0 -9
  47. sempy_labs/report/_bpareporttemplate/.platform +0 -11
  48. {semantic_link_labs-0.9.9.dist-info → semantic_link_labs-0.9.11.dist-info}/licenses/LICENSE +0 -0
  49. {semantic_link_labs-0.9.9.dist-info → semantic_link_labs-0.9.11.dist-info}/top_level.txt +0 -0
@@ -7,7 +7,7 @@ from sempy_labs._helper_functions import (
7
7
  )
8
8
  from sempy._utils._log import log
9
9
  from sempy_labs.tom import connect_semantic_model
10
- from typing import Optional
10
+ from typing import Optional, List
11
11
  import sempy_labs._icons as icons
12
12
  from uuid import UUID
13
13
  import re
@@ -19,7 +19,9 @@ def _extract_expression_list(expression):
19
19
  """
20
20
 
21
21
  pattern_sql = r'Sql\.Database\s*\(\s*"([^"]+)"\s*,\s*"([^"]+)"\s*\)'
22
- pattern_no_sql = r'AzureDataLakeStorage\s*\{\s*"server".*?:\s*onelake\.dfs\.fabric\.microsoft\.com"\s*,\s*"path"\s*:\s*"/([\da-fA-F-]+)\s*/\s*([\da-fA-F-]+)\s*/"\s*\}'
22
+ pattern_no_sql = (
23
+ r'AzureStorage\.DataLake\(".*?/([0-9a-fA-F\-]{36})/([0-9a-fA-F\-]{36})"'
24
+ )
23
25
 
24
26
  match_sql = re.search(pattern_sql, expression)
25
27
  match_no_sql = re.search(pattern_no_sql, expression)
@@ -102,6 +104,7 @@ def update_direct_lake_model_connection(
102
104
  source_type: str = "Lakehouse",
103
105
  source_workspace: Optional[str | UUID] = None,
104
106
  use_sql_endpoint: bool = True,
107
+ tables: Optional[str | List[str]] = None,
105
108
  ):
106
109
  """
107
110
  Remaps a Direct Lake semantic model's SQL Endpoint connection to a new lakehouse/warehouse.
@@ -126,12 +129,19 @@ def update_direct_lake_model_connection(
126
129
  use_sql_endpoint : bool, default=True
127
130
  If True, the SQL Endpoint will be used for the connection.
128
131
  If False, Direct Lake over OneLake will be used.
132
+ tables : str | List[str], default=None
133
+ The name(s) of the table(s) to update in the Direct Lake semantic model.
134
+ If None, all tables will be updated (if there is only one expression).
135
+ If multiple tables are specified, they must be provided as a list.
129
136
  """
130
137
  if use_sql_endpoint:
131
138
  icons.sll_tags.append("UpdateDLConnection_SQL")
132
139
  else:
133
140
  icons.sll_tags.append("UpdateDLConnection_DLOL")
134
141
 
142
+ if isinstance(tables, str):
143
+ tables = [tables]
144
+
135
145
  (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
136
146
  (dataset_name, dataset_id) = resolve_dataset_name_and_id(dataset, workspace_id)
137
147
 
@@ -174,7 +184,12 @@ def update_direct_lake_model_connection(
174
184
  )
175
185
 
176
186
  # Update the single connection expression
177
- if len(expressions) == 1:
187
+ if len(expressions) > 1 and not tables:
188
+ print(
189
+ f"{icons.info} Multiple expressions found in the model. Please specify the tables to update using the 'tables parameter."
190
+ )
191
+ return
192
+ elif len(expressions) == 1 and not tables:
178
193
  expr = expressions[0]
179
194
  tom.model.Expressions[expr].Expression = shared_expression
180
195
 
@@ -182,6 +197,41 @@ def update_direct_lake_model_connection(
182
197
  f"{icons.green_dot} The expression in the '{dataset_name}' semantic model within the '{workspace_name}' workspace has been updated to point to the '{source}' {source_type.lower()} in the '{source_workspace}' workspace."
183
198
  )
184
199
  else:
185
- print(
186
- f"{icons.info} Multiple expressions found in the model. Please use the update_direct_lake_partition_entity function to update specific tables."
200
+ import sempy
201
+
202
+ sempy.fabric._client._utils._init_analysis_services()
203
+ import Microsoft.AnalysisServices.Tabular as TOM
204
+
205
+ expr_list = _extract_expression_list(shared_expression)
206
+
207
+ expr_name = next(
208
+ (name for name, exp in expression_dict.items() if exp == expr_list),
209
+ None,
187
210
  )
211
+
212
+ # If the expression does not already exist, create it
213
+ def generate_unique_name(existing_names):
214
+ i = 1
215
+ while True:
216
+ candidate = f"DatabaseQuery{i}"
217
+ if candidate not in existing_names:
218
+ return candidate
219
+ i += 1
220
+
221
+ if not expr_name:
222
+ expr_name = generate_unique_name(expressions)
223
+ tom.add_expression(name=expr_name, expression=shared_expression)
224
+
225
+ all_tables = [t.Name for t in tom.model.Tables]
226
+ for t_name in tables:
227
+ if t_name not in all_tables:
228
+ raise ValueError(
229
+ f"{icons.red_dot} The table '{t_name}' does not exist in the '{dataset_name}' semantic model within the '{workspace_name}' workspace."
230
+ )
231
+ p = next(p for p in tom.model.Tables[t_name].Partitions)
232
+ if p.Mode != TOM.ModeType.DirectLake:
233
+ raise ValueError(
234
+ f"{icons.red_dot} The table '{t_name}' in the '{dataset_name}' semantic model within the '{workspace_name}' workspace is not in Direct Lake mode. This function is only applicable to Direct Lake tables."
235
+ )
236
+
237
+ p.Source.ExpressionSource = tom.model.Expressions[expr_name]
@@ -0,0 +1,10 @@
1
+ {
2
+ "runtimeOptions": {
3
+ "tfm": "net6.0",
4
+ "framework": {
5
+ "name": "Microsoft.NETCore.App",
6
+ "version": "6.0.0"
7
+ },
8
+ "rollForward": "Major"
9
+ }
10
+ }
@@ -20,6 +20,16 @@ from sempy_labs.lakehouse._shortcuts import (
20
20
  from sempy_labs.lakehouse._blobs import (
21
21
  recover_lakehouse_object,
22
22
  list_blobs,
23
+ get_user_delegation_key,
24
+ )
25
+ from sempy_labs.lakehouse._livy_sessions import (
26
+ list_livy_sessions,
27
+ )
28
+ from sempy_labs.lakehouse._helper import (
29
+ is_v_ordered,
30
+ delete_lakehouse,
31
+ update_lakehouse,
32
+ load_table,
23
33
  )
24
34
 
25
35
  __all__ = [
@@ -36,4 +46,10 @@ __all__ = [
36
46
  "list_shortcuts",
37
47
  "recover_lakehouse_object",
38
48
  "list_blobs",
49
+ "list_livy_sessions",
50
+ "is_v_ordered",
51
+ "delete_lakehouse",
52
+ "update_lakehouse",
53
+ "load_table",
54
+ "get_user_delegation_key",
39
55
  ]
@@ -11,6 +11,7 @@ from typing import Optional, List
11
11
  import sempy_labs._icons as icons
12
12
  import xml.etree.ElementTree as ET
13
13
  import pandas as pd
14
+ from sempy.fabric.exceptions import FabricHTTPException
14
15
 
15
16
 
16
17
  def _request_blob_api(
@@ -18,6 +19,7 @@ def _request_blob_api(
18
19
  method: str = "get",
19
20
  payload: Optional[dict] = None,
20
21
  status_codes: int | List[int] = 200,
22
+ uses_pagination: bool = False,
21
23
  ):
22
24
 
23
25
  import requests
@@ -31,21 +33,41 @@ def _request_blob_api(
31
33
 
32
34
  headers = {
33
35
  "Authorization": f"Bearer {token}",
34
- "Content-Type": "application/json",
36
+ "Content-Type": "application/xml",
35
37
  "x-ms-version": "2025-05-05",
36
38
  }
37
39
 
38
- response = requests.request(
39
- method.upper(),
40
- f"https://onelake.blob.fabric.microsoft.com/{request}",
41
- headers=headers,
42
- json=payload,
43
- )
40
+ base_url = "https://onelake.blob.fabric.microsoft.com/"
41
+ full_url = f"{base_url}{request}"
42
+ results = []
43
+
44
+ while True:
45
+ response = requests.request(
46
+ method.upper(),
47
+ full_url,
48
+ headers=headers,
49
+ data=payload if method.lower() != "get" else None,
50
+ )
51
+
52
+ if response.status_code not in status_codes:
53
+ raise FabricHTTPException(response)
54
+
55
+ if not uses_pagination:
56
+ return response
57
+
58
+ # Parse XML to find blobs and NextMarker
59
+ root = ET.fromstring(response.content)
60
+ results.append(root)
61
+
62
+ next_marker = root.findtext(".//NextMarker")
63
+ if not next_marker:
64
+ break # No more pages
44
65
 
45
- if response.status_code not in status_codes:
46
- raise FabricHTTPException(response)
66
+ # Append the marker to the original request (assuming query string format)
67
+ delimiter = "&" if "?" in request else "?"
68
+ full_url = f"{base_url}{request}{delimiter}marker={next_marker}"
47
69
 
48
- return response
70
+ return results
49
71
 
50
72
 
51
73
  @log
@@ -90,12 +112,6 @@ def list_blobs(
90
112
  )
91
113
  path_prefix = f"{workspace_id}/{lakehouse_id}/{container}"
92
114
 
93
- response = _request_blob_api(
94
- request=f"{path_prefix}?restype=container&comp=list&include=deleted"
95
- )
96
- root = ET.fromstring(response.content)
97
- response_json = _xml_to_dict(root)
98
-
99
115
  columns = {
100
116
  "Blob Name": "str",
101
117
  "Is Deleted": "bool",
@@ -122,37 +138,55 @@ def list_blobs(
122
138
 
123
139
  df = _create_dataframe(columns=columns)
124
140
 
125
- for blob in (
126
- response_json.get("EnumerationResults", {}).get("Blobs", {}).get("Blob", {})
127
- ):
128
- p = blob.get("Properties", {})
129
- new_data = {
130
- "Blob Name": blob.get("Name"),
131
- "Is Deleted": blob.get("Deleted", False),
132
- "Deletion Id": blob.get("DeletionId"),
133
- "Creation Time": p.get("Creation-Time"),
134
- "Expiry Time": p.get("Expiry-Time"),
135
- "Etag": p.get("Etag"),
136
- "Resource Type": p.get("ResourceType"),
137
- "Content Length": p.get("Content-Length"),
138
- "Content Type": p.get("Content-Type"),
139
- "Content Encoding": p.get("Content-Encoding"),
140
- "Content Language": p.get("Content-Language"),
141
- "Content CRC64": p.get("Content-CRC64"),
142
- "Content MD5": p.get("Content-MD5"),
143
- "Cache Control": p.get("Cache-Control"),
144
- "Content Disposition": p.get("Content-Disposition"),
145
- "Blob Type": p.get("BlobType"),
146
- "Access Tier": p.get("AccessTier"),
147
- "Access Tier Inferred": p.get("AccessTierInferred"),
148
- "Server Encrypted": p.get("ServerEncrypted"),
149
- "Deleted Time": p.get("DeletedTime"),
150
- "Remaining Retention Days": p.get("RemainingRetentionDays"),
151
- }
152
-
153
- df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)
154
-
155
- _update_dataframe_datatypes(dataframe=df, column_map=columns)
141
+ url = f"{path_prefix}?restype=container&comp=list&include=deleted"
142
+
143
+ responses = _request_blob_api(
144
+ request=url,
145
+ uses_pagination=True,
146
+ )
147
+
148
+ dfs = []
149
+ for root in responses:
150
+ response_json = _xml_to_dict(root)
151
+
152
+ blobs = (
153
+ response_json.get("EnumerationResults", {}).get("Blobs", {}).get("Blob", [])
154
+ )
155
+
156
+ if isinstance(blobs, dict):
157
+ blobs = [blobs]
158
+
159
+ for blob in blobs:
160
+ p = blob.get("Properties", {})
161
+ new_data = {
162
+ "Blob Name": blob.get("Name"),
163
+ "Is Deleted": blob.get("Deleted", False),
164
+ "Deletion Id": blob.get("DeletionId"),
165
+ "Creation Time": p.get("Creation-Time"),
166
+ "Expiry Time": p.get("Expiry-Time"),
167
+ "Etag": p.get("Etag"),
168
+ "Resource Type": p.get("ResourceType"),
169
+ "Content Length": p.get("Content-Length"),
170
+ "Content Type": p.get("Content-Type"),
171
+ "Content Encoding": p.get("Content-Encoding"),
172
+ "Content Language": p.get("Content-Language"),
173
+ "Content CRC64": p.get("Content-CRC64"),
174
+ "Content MD5": p.get("Content-MD5"),
175
+ "Cache Control": p.get("Cache-Control"),
176
+ "Content Disposition": p.get("Content-Disposition"),
177
+ "Blob Type": p.get("BlobType"),
178
+ "Access Tier": p.get("AccessTier"),
179
+ "Access Tier Inferred": p.get("AccessTierInferred"),
180
+ "Server Encrypted": p.get("ServerEncrypted"),
181
+ "Deleted Time": p.get("DeletedTime"),
182
+ "Remaining Retention Days": p.get("RemainingRetentionDays"),
183
+ }
184
+
185
+ dfs.append(pd.DataFrame(new_data, index=[0]))
186
+
187
+ if dfs:
188
+ df = pd.concat(dfs, ignore_index=True)
189
+ _update_dataframe_datatypes(dataframe=df, column_map=columns)
156
190
 
157
191
  return df
158
192
 
@@ -182,7 +216,7 @@ def recover_lakehouse_object(
182
216
  workspace_id = resolve_workspace_id(workspace)
183
217
  lakehouse_id = resolve_lakehouse_id(lakehouse, workspace_id)
184
218
 
185
- blob_path_prefix = f"{lakehouse_id}/{file_path}"
219
+ blob_name = f"{lakehouse_id}/{file_path}"
186
220
 
187
221
  container = file_path.split("/")[0]
188
222
  if container not in ["Tables", "Files"]:
@@ -190,29 +224,45 @@ def recover_lakehouse_object(
190
224
  f"{icons.red_dot} Invalid container '{container}' within the file_path parameter. Expected 'Tables' or 'Files'."
191
225
  )
192
226
 
193
- df = list_blobs(lakehouse=lakehouse, workspace=workspace, container=container)
227
+ # Undelete the blob
228
+ print(f"{icons.in_progress} Attempting to recover the '{blob_name}' blob...")
194
229
 
195
- for _, r in df.iterrows():
196
- blob_name = r.get("Blob Name")
197
- is_deleted = r.get("Is Deleted")
198
- if blob_name.startswith(blob_path_prefix) and is_deleted:
199
- print(f"{icons.in_progress} Restoring the '{blob_name}' blob...")
200
- _request_blob_api(
201
- request=f"{workspace_id}/{lakehouse_id}/{file_path}?comp=undelete",
202
- method="put",
230
+ try:
231
+ _request_blob_api(
232
+ request=f"{workspace_id}/{lakehouse_id}/{file_path}?comp=undelete",
233
+ method="put",
234
+ )
235
+ print(
236
+ f"{icons.green_dot} The '{blob_name}' blob recover attempt was successful."
237
+ )
238
+ except FabricHTTPException as e:
239
+ if e.status_code == 404:
240
+ print(
241
+ f"{icons.warning} The '{blob_name}' blob was not found. No action taken."
242
+ )
243
+ else:
244
+ print(
245
+ f"{icons.red_dot} An error occurred while recovering the '{blob_name}' blob: {e}"
203
246
  )
204
- print(f"{icons.green_dot} The '{blob_name}' blob has been restored.")
205
247
 
206
248
 
207
- def _get_user_delegation_key():
249
+ def get_user_delegation_key():
250
+ """
251
+ Gets a key that can be used to sign a user delegation SAS (shared access signature). A user delegation SAS grants access to Azure Blob Storage resources by using Microsoft Entra credentials.
208
252
 
209
- # https://learn.microsoft.com/rest/api/storageservices/get-user-delegation-key
253
+ This is a wrapper function for the following API: `Get User Delegation Key <https://learn.microsoft.com/rest/api/storageservices/get-user-delegation-key>`_.
254
+
255
+ Returns
256
+ -------
257
+ str
258
+ The user delegation key value.
259
+ """
210
260
 
211
261
  from datetime import datetime, timedelta, timezone
212
262
 
213
263
  utc_now = datetime.now(timezone.utc)
214
264
  start_time = utc_now + timedelta(minutes=2)
215
- expiry_time = start_time + timedelta(minutes=45)
265
+ expiry_time = start_time + timedelta(minutes=60)
216
266
  start_str = start_time.strftime("%Y-%m-%dT%H:%M:%SZ")
217
267
  expiry_str = expiry_time.strftime("%Y-%m-%dT%H:%M:%SZ")
218
268
 
@@ -223,9 +273,11 @@ def _get_user_delegation_key():
223
273
  </KeyInfo>"""
224
274
 
225
275
  response = _request_blob_api(
226
- request="restype=service&comp=userdelegationkey",
276
+ request="?restype=service&comp=userdelegationkey",
227
277
  method="post",
228
278
  payload=payload,
229
279
  )
230
280
 
231
- return response.content
281
+ root = ET.fromstring(response.content)
282
+ response_json = _xml_to_dict(root)
283
+ return response_json.get("UserDelegationKey", {}).get("Value", None)
@@ -1,14 +1,17 @@
1
1
  import pandas as pd
2
+ import re
2
3
  from sempy_labs._helper_functions import (
3
4
  format_dax_object_name,
4
5
  resolve_workspace_name_and_id,
5
6
  resolve_lakehouse_name_and_id,
6
7
  _create_dataframe,
7
- _create_spark_session,
8
+ _get_delta_table,
9
+ _pure_python_notebook,
8
10
  )
9
11
  from typing import Optional
10
12
  from sempy._utils._log import log
11
13
  from uuid import UUID
14
+ import sempy_labs._icons as icons
12
15
 
13
16
 
14
17
  @log
@@ -16,7 +19,9 @@ def get_lakehouse_columns(
16
19
  lakehouse: Optional[str | UUID] = None, workspace: Optional[str | UUID] = None
17
20
  ) -> pd.DataFrame:
18
21
  """
19
- Shows the tables and columns of a lakehouse and their respective properties.
22
+ Shows the tables and columns of a lakehouse and their respective properties. This function can be executed in either a PySpark or pure Python notebook. Note that data types may show differently when using PySpark vs pure Python.
23
+
24
+ Service Principal Authentication is supported (see `here <https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb>`_ for examples).
20
25
 
21
26
  Parameters
22
27
  ----------
@@ -34,7 +39,6 @@ def get_lakehouse_columns(
34
39
  Shows the tables/columns within a lakehouse and their properties.
35
40
  """
36
41
  from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables
37
- from delta import DeltaTable
38
42
 
39
43
  columns = {
40
44
  "Workspace Name": "string",
@@ -51,29 +55,48 @@ def get_lakehouse_columns(
51
55
  lakehouse=lakehouse, workspace=workspace_id
52
56
  )
53
57
 
54
- spark = _create_spark_session()
55
-
56
58
  tables = get_lakehouse_tables(
57
59
  lakehouse=lakehouse_id, workspace=workspace_id, extended=False, count_rows=False
58
60
  )
59
61
  tables_filt = tables[tables["Format"] == "delta"]
60
62
 
61
- for _, r in tables_filt.iterrows():
62
- table_name = r["Table Name"]
63
- path = r["Location"]
64
- delta_table = DeltaTable.forPath(spark, path)
65
- sparkdf = delta_table.toDF()
66
-
67
- for col_name, data_type in sparkdf.dtypes:
68
- full_column_name = format_dax_object_name(table_name, col_name)
69
- new_data = {
63
+ def add_column_metadata(table_name, col_name, data_type):
64
+ new_rows.append(
65
+ {
70
66
  "Workspace Name": workspace_name,
71
- "Lakehouse Name": lakehouse,
67
+ "Lakehouse Name": lakehouse_name,
72
68
  "Table Name": table_name,
73
69
  "Column Name": col_name,
74
- "Full Column Name": full_column_name,
70
+ "Full Column Name": format_dax_object_name(table_name, col_name),
75
71
  "Data Type": data_type,
76
72
  }
77
- df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)
73
+ )
74
+
75
+ new_rows = []
76
+
77
+ for _, r in tables_filt.iterrows():
78
+ table_name = r["Table Name"]
79
+ path = r["Location"]
80
+
81
+ if _pure_python_notebook():
82
+ from deltalake import DeltaTable
83
+
84
+ table_schema = DeltaTable(path).schema()
85
+
86
+ for field in table_schema.fields:
87
+ col_name = field.name
88
+ match = re.search(r'"(.*?)"', str(field.type))
89
+ if not match:
90
+ raise ValueError(
91
+ f"{icons.red_dot} Could not find data type for column {col_name}."
92
+ )
93
+ data_type = match.group(1)
94
+ add_column_metadata(table_name, col_name, data_type)
95
+ else:
96
+ delta_table = _get_delta_table(path=path)
97
+ table_df = delta_table.toDF()
98
+
99
+ for col_name, data_type in table_df.dtypes:
100
+ add_column_metadata(table_name, col_name, data_type)
78
101
 
79
- return df
102
+ return pd.concat([df, pd.DataFrame(new_rows)], ignore_index=True)