semantic-link-labs 0.9.9__py3-none-any.whl → 0.9.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of semantic-link-labs might be problematic. Click here for more details.

@@ -7,6 +7,7 @@ from sempy_labs._helper_functions import (
7
7
  resolve_lakehouse_name_and_id,
8
8
  resolve_workspace_name_and_id,
9
9
  _create_spark_session,
10
+ _pure_python_notebook,
10
11
  )
11
12
  import sempy_labs._icons as icons
12
13
  import re
@@ -32,6 +33,33 @@ def lakehouse_attached() -> bool:
32
33
  return False
33
34
 
34
35
 
36
+ def _optimize_table(path):
37
+
38
+ if _pure_python_notebook():
39
+ from deltalake import DeltaTable
40
+
41
+ DeltaTable(path).optimize.compact()
42
+ else:
43
+ from delta import DeltaTable
44
+
45
+ spark = _create_spark_session()
46
+ DeltaTable.forPath(spark, path).optimize().executeCompaction()
47
+
48
+
49
+ def _vacuum_table(path, retain_n_hours):
50
+
51
+ if _pure_python_notebook():
52
+ from deltalake import DeltaTable
53
+
54
+ DeltaTable(path).vacuum(retention_hours=retain_n_hours)
55
+ else:
56
+ from delta import DeltaTable
57
+
58
+ spark = _create_spark_session()
59
+ spark.conf.set("spark.databricks.delta.vacuum.parallelDelete.enabled", "true")
60
+ DeltaTable.forPath(spark, path).vacuum(retain_n_hours)
61
+
62
+
35
63
  @log
36
64
  def optimize_lakehouse_tables(
37
65
  tables: Optional[Union[str, List[str]]] = None,
@@ -56,27 +84,20 @@ def optimize_lakehouse_tables(
56
84
  """
57
85
 
58
86
  from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables
59
- from delta import DeltaTable
60
87
 
61
- lakeTables = get_lakehouse_tables(lakehouse=lakehouse, workspace=workspace)
62
- lakeTablesDelta = lakeTables[lakeTables["Format"] == "delta"]
88
+ df = get_lakehouse_tables(lakehouse=lakehouse, workspace=workspace)
89
+ df_delta = df[df["Format"] == "delta"]
63
90
 
64
91
  if isinstance(tables, str):
65
92
  tables = [tables]
66
93
 
67
- if tables is not None:
68
- tables_filt = lakeTablesDelta[lakeTablesDelta["Table Name"].isin(tables)]
69
- else:
70
- tables_filt = lakeTablesDelta.copy()
71
-
72
- spark = _create_spark_session()
94
+ df_tables = df_delta[df_delta["Table Name"].isin(tables)] if tables else df_delta
73
95
 
74
- for _, r in (bar := tqdm(tables_filt.iterrows())):
75
- tableName = r["Table Name"]
76
- tablePath = r["Location"]
77
- bar.set_description(f"Optimizing the '{tableName}' table...")
78
- deltaTable = DeltaTable.forPath(spark, tablePath)
79
- deltaTable.optimize().executeCompaction()
96
+ for _, r in (bar := tqdm(df_tables.iterrows())):
97
+ table_name = r["Table Name"]
98
+ path = r["Location"]
99
+ bar.set_description(f"Optimizing the '{table_name}' table...")
100
+ _optimize_table(path=path)
80
101
 
81
102
 
82
103
  @log
@@ -108,32 +129,20 @@ def vacuum_lakehouse_tables(
108
129
  """
109
130
 
110
131
  from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables
111
- from delta import DeltaTable
112
132
 
113
- lakeTables = get_lakehouse_tables(lakehouse=lakehouse, workspace=workspace)
114
- lakeTablesDelta = lakeTables[lakeTables["Format"] == "delta"]
133
+ df = get_lakehouse_tables(lakehouse=lakehouse, workspace=workspace)
134
+ df_delta = df[df["Format"] == "delta"]
115
135
 
116
136
  if isinstance(tables, str):
117
137
  tables = [tables]
118
138
 
119
- if tables is not None:
120
- tables_filt = lakeTablesDelta[lakeTablesDelta["Table Name"].isin(tables)]
121
- else:
122
- tables_filt = lakeTablesDelta.copy()
123
-
124
- spark = _create_spark_session()
125
- spark.conf.set("spark.databricks.delta.vacuum.parallelDelete.enabled", "true")
126
-
127
- for _, r in (bar := tqdm(tables_filt.iterrows())):
128
- tableName = r["Table Name"]
129
- tablePath = r["Location"]
130
- bar.set_description(f"Vacuuming the '{tableName}' table...")
131
- deltaTable = DeltaTable.forPath(spark, tablePath)
139
+ df_tables = df_delta[df_delta["Table Name"].isin(tables)] if tables else df_delta
132
140
 
133
- if retain_n_hours is None:
134
- deltaTable.vacuum()
135
- else:
136
- deltaTable.vacuum(retain_n_hours)
141
+ for _, r in (bar := tqdm(df_tables.iterrows())):
142
+ table_name = r["Table Name"]
143
+ path = r["Location"]
144
+ bar.set_description(f"Vacuuming the '{table_name}' table...")
145
+ _vacuum_table(path=path, retain_n_hours=retain_n_hours)
137
146
 
138
147
 
139
148
  def run_table_maintenance(
@@ -6,10 +6,9 @@ from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables
6
6
  from sempy_labs._helper_functions import (
7
7
  resolve_lakehouse_name,
8
8
  resolve_lakehouse_id,
9
- create_abfss_path,
10
9
  retry,
11
10
  generate_guid,
12
- _create_spark_session,
11
+ save_as_delta_table,
13
12
  )
14
13
  from sempy_labs.tom import connect_semantic_model
15
14
  from typing import Optional
@@ -98,8 +97,6 @@ def migrate_calc_tables_to_lakehouse(
98
97
  if killFunction:
99
98
  return
100
99
 
101
- spark = _create_spark_session()
102
-
103
100
  if len(dfP_filt) == 0:
104
101
  print(
105
102
  f"{icons.yellow_dot} The '{dataset}' semantic model in the '{workspace}' workspace has no calculated tables."
@@ -198,14 +195,12 @@ def migrate_calc_tables_to_lakehouse(
198
195
 
199
196
  delta_table_name = t.Name.replace(" ", "_").lower()
200
197
 
201
- spark_df = spark.createDataFrame(df)
202
- filePath = create_abfss_path(
203
- lakehouse_id=lakehouse_id,
204
- lakehouse_workspace_id=lakehouse_workspace_id,
205
- delta_table_name=delta_table_name,
206
- )
207
- spark_df.write.mode("overwrite").format("delta").save(
208
- filePath
198
+ save_as_delta_table(
199
+ dataframe=df,
200
+ table_name=delta_table_name,
201
+ lakehouse=lakehouse,
202
+ workspace=lakehouse_workspace,
203
+ write_mode="overwrite",
209
204
  )
210
205
 
211
206
  @retry(
@@ -1,7 +1,6 @@
1
1
  import sempy.fabric as fabric
2
2
  import pandas as pd
3
3
  import re
4
- from sempy_labs._helper_functions import retry
5
4
  from sempy_labs.tom import connect_semantic_model
6
5
  from typing import Optional
7
6
  from sempy._utils._log import log
@@ -10,7 +9,8 @@ from uuid import UUID
10
9
  from sempy_labs._helper_functions import (
11
10
  resolve_workspace_name_and_id,
12
11
  resolve_dataset_name_and_id,
13
- _create_spark_session,
12
+ save_as_delta_table,
13
+ retry,
14
14
  )
15
15
 
16
16
 
@@ -29,7 +29,6 @@ def refresh_calc_tables(dataset: str | UUID, workspace: Optional[str | UUID] = N
29
29
  or if no lakehouse attached, resolves to the workspace of the notebook.
30
30
  """
31
31
 
32
- spark = _create_spark_session()
33
32
  (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
34
33
  (dataset_name, dataset_id) = resolve_dataset_name_and_id(dataset, workspace_id)
35
34
  icons.sll_tags.append("DirectLakeMigration")
@@ -117,10 +116,12 @@ def refresh_calc_tables(dataset: str | UUID, workspace: Optional[str | UUID] = N
117
116
  f"{icons.in_progress} Refresh of the '{delta_table_name}' table within the lakehouse is in progress..."
118
117
  )
119
118
 
120
- spark_df = spark.createDataFrame(df)
121
- spark_df.write.mode("overwrite").format("delta").saveAsTable(
122
- delta_table_name
119
+ save_as_delta_table(
120
+ dataframe=df,
121
+ table_name=delta_table_name,
122
+ write_mode="overwrite",
123
123
  )
124
+
124
125
  print(
125
126
  f"{icons.green_dot} Calculated table '{tName}' has been refreshed as the '{delta_table_name.lower()}' table in the lakehouse."
126
127
  )
sempy_labs/tom/_model.py CHANGED
@@ -938,19 +938,23 @@ class TOMWrapper:
938
938
  import Microsoft.AnalysisServices.Tabular as TOM
939
939
  import System
940
940
 
941
- if cross_filtering_behavior is None:
941
+ if not cross_filtering_behavior:
942
942
  cross_filtering_behavior = "Automatic"
943
- if security_filtering_behavior is None:
943
+ if not security_filtering_behavior:
944
944
  security_filtering_behavior = "OneDirection"
945
945
 
946
- from_cardinality = from_cardinality.capitalize()
947
- to_cardinality = to_cardinality.capitalize()
948
- cross_filtering_behavior = cross_filtering_behavior.capitalize()
949
- security_filtering_behavior = security_filtering_behavior.capitalize()
946
+ for var_name in [
947
+ "from_cardinality",
948
+ "to_cardinality",
949
+ "cross_filtering_behavior",
950
+ "security_filtering_behavior",
951
+ ]:
952
+ locals()[var_name] = locals()[var_name].capitalize()
953
+
954
+ cross_filtering_behavior = cross_filtering_behavior.replace("direct", "Direct")
950
955
  security_filtering_behavior = security_filtering_behavior.replace(
951
956
  "direct", "Direct"
952
957
  )
953
- cross_filtering_behavior = cross_filtering_behavior.replace("direct", "Direct")
954
958
 
955
959
  rel = TOM.SingleColumnRelationship()
956
960
  rel.FromColumn = self.model.Tables[from_table].Columns[from_column]
@@ -962,13 +966,16 @@ class TOMWrapper:
962
966
  TOM.RelationshipEndCardinality, to_cardinality
963
967
  )
964
968
  rel.IsActive = is_active
965
- rel.CrossFilteringBehavior = System.Enum.Parse(
966
- TOM.CrossFilteringBehavior, cross_filtering_behavior
967
- )
968
- rel.SecurityFilteringBehavior = System.Enum.Parse(
969
- TOM.SecurityFilteringBehavior, security_filtering_behavior
970
- )
971
- rel.RelyOnReferentialIntegrity = rely_on_referential_integrity
969
+ if cross_filtering_behavior != "Automatic":
970
+ rel.CrossFilteringBehavior = System.Enum.Parse(
971
+ TOM.CrossFilteringBehavior, cross_filtering_behavior
972
+ )
973
+ if security_filtering_behavior != "OneDirection":
974
+ rel.SecurityFilteringBehavior = System.Enum.Parse(
975
+ TOM.SecurityFilteringBehavior, security_filtering_behavior
976
+ )
977
+ if rely_on_referential_integrity:
978
+ rel.RelyOnReferentialIntegrity = True
972
979
 
973
980
  self.model.Relationships.Add(rel)
974
981