dasl-client 1.0.7__py3-none-any.whl → 1.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dasl-client might be problematic. Click here for more details.

dasl_client/auth/auth.py CHANGED
@@ -1,4 +1,5 @@
1
1
  import abc
2
+ import base64
2
3
  import time
3
4
  from datetime import datetime
4
5
 
@@ -176,15 +177,15 @@ class DatabricksSecretAuth(Authorization):
176
177
  principalName=self._principal,
177
178
  )
178
179
  handler = api.WorkspaceV1Api(api_client=self._client)
179
- resp = handler.workspace_v1_begin_authentication(
180
- workspace=self._workspace, workspace_v1_begin_authenticate_request=req
180
+ resp = handler.workspace_v1_request_secret(
181
+ workspace=self._workspace, workspace_v1_request_secret_request=req
181
182
  )
182
183
  secret_name = resp.secret_name
183
184
  secret_value = ""
184
185
  for tries in range(3):
185
186
  try:
186
- secret_value = WorkspaceClient().secrets.get_secret(
187
- secret_name, "token"
187
+ secret_value = (
188
+ WorkspaceClient().secrets.get_secret(secret_name, "token").value
188
189
  )
189
190
  break
190
191
  except ResourceDoesNotExist:
@@ -194,7 +195,7 @@ class DatabricksSecretAuth(Authorization):
194
195
  raise RuntimeError(f"failed to complete secret auth")
195
196
 
196
197
  req = WorkspaceV1AuthenticateRequest(
197
- databricks_secret=secret_value,
198
+ databricks_secret=base64.b64decode(secret_value).decode("utf-8"),
198
199
  )
199
200
  handler = api.WorkspaceV1Api(api_client=self._client)
200
201
 
dasl_client/client.py CHANGED
@@ -3,8 +3,8 @@ from typing import Any, Callable, Iterator, List, Optional, TypeVar
3
3
 
4
4
  from dasl_api import (
5
5
  CoreV1Api,
6
- CoreV1DataSourceAutoloaderSpec,
7
6
  DbuiV1Api,
7
+ ContentV1Api,
8
8
  WorkspaceV1Api,
9
9
  WorkspaceV1CreateWorkspaceRequest,
10
10
  api,
@@ -13,6 +13,7 @@ from pydantic import Field
13
13
 
14
14
  from dasl_client.auth.auth import (
15
15
  Authorization,
16
+ DatabricksSecretAuth,
16
17
  DatabricksTokenAuth,
17
18
  ServiceAccountKeyAuth,
18
19
  )
@@ -29,6 +30,8 @@ from .types import (
29
30
  WorkspaceConfig,
30
31
  TransformRequest,
31
32
  TransformResponse,
33
+ DataSourcePresetsList,
34
+ DataSourcePreset,
32
35
  )
33
36
 
34
37
 
@@ -51,7 +54,7 @@ class Client:
51
54
 
52
55
  :param auth: Authorization instance for authorizing requests to
53
56
  the dasl control plane.
54
- :returns Client
57
+ :returns: Client
55
58
  """
56
59
  self.auth = auth
57
60
 
@@ -83,7 +86,7 @@ class Client:
83
86
  :param dasl_host: The URL of the DASL server. This value should
84
87
  not generally be specified unless you are testing against
85
88
  an alternative environment.
86
- :returns Client
89
+ :returns: Client for the newly created workspace.
87
90
  """
88
91
  with error_handler():
89
92
  if workspace_url is None:
@@ -131,7 +134,7 @@ class Client:
131
134
  :param dasl_host: The URL of the DASL server. This value should
132
135
  not generally be specified unless you are testing against
133
136
  an alternative environment.
134
- :returns Client
137
+ :returns: Client for the existing workspace.
135
138
  """
136
139
  with error_handler():
137
140
  if workspace_url is None:
@@ -195,7 +198,7 @@ class Client:
195
198
  :param dasl_host: The URL of the DASL server. This value should
196
199
  not generally be specified unless you are testing against
197
200
  an alternative environment.
198
- :returns Client
201
+ :returns: Client for the newly created or existing workspace.
199
202
  """
200
203
  try:
201
204
  return Client.new_workspace(
@@ -229,6 +232,9 @@ class Client:
229
232
  def _dbui_client(self) -> DbuiV1Api:
230
233
  return DbuiV1Api(self.auth.client())
231
234
 
235
+ def _content_client(self) -> ContentV1Api:
236
+ return ContentV1Api(self.auth.client())
237
+
232
238
  def _workspace(self) -> str:
233
239
  return self.auth.workspace()
234
240
 
@@ -274,7 +280,7 @@ class Client:
274
280
  you will need to repopulate the service_principal_secret correctly
275
281
  before passing the result back to put_admin_config.
276
282
 
277
- :returns AdminConfig
283
+ :returns: AdminConfig containing the current settings.
278
284
  """
279
285
  with error_handler():
280
286
  return AdminConfig.from_api_obj(
@@ -306,6 +312,8 @@ class Client:
306
312
  Retrieve the WorkspaceConfig from the DASL server. The returned
307
313
  value can be updated directly and passed to put_config in order
308
314
  to make changes.
315
+
316
+ :returns: WorkspaceConfig containing the current configuration.
309
317
  """
310
318
  with error_handler():
311
319
  return WorkspaceConfig.from_api_obj(
@@ -348,7 +356,7 @@ class Client:
348
356
  in order to make changes.
349
357
 
350
358
  :param name: The unique name of the DataSource within this workspace
351
- :returns DataSource
359
+ :returns: DataSource
352
360
  """
353
361
  with error_handler():
354
362
  return DataSource.from_api_obj(
@@ -459,7 +467,7 @@ class Client:
459
467
  in order to make changes.
460
468
 
461
469
  :param name: The unique name of the Rule within this workspace
462
- :returns Rule
470
+ :returns: Rule
463
471
  """
464
472
  with error_handler():
465
473
  return Rule.from_api_obj(
@@ -601,7 +609,7 @@ class Client:
601
609
  :param value: The observable value
602
610
  :param cursor: A cursor to be used when paginating results
603
611
  :param limit: A limit of the number of results to return
604
- :returns EventsList
612
+ :returns: EventsList
605
613
  """
606
614
  with error_handler():
607
615
  return Dbui.ObservableEvents.EventsList.from_api_obj(
@@ -614,3 +622,31 @@ class Client:
614
622
  limit=limit,
615
623
  )
616
624
  )
625
+
626
+ def list_presets(self) -> DataSourcePresetsList:
627
+ """
628
+ List the Presets in this workspace. This will include any user defined
629
+ presets if a custom presets path has been configured in the workspace.
630
+
631
+ :returns: DataSourcePresetsList
632
+ """
633
+ with error_handler():
634
+ return DataSourcePresetsList.from_api_obj(
635
+ self._content_client().content_v1_get_preset_data_sources(
636
+ self._workspace(),
637
+ )
638
+ )
639
+
640
+ def get_preset(self, name: str) -> DataSourcePreset:
641
+ """
642
+ Get the preset with the argument name from the DASL server. If the preset name
643
+ begins with 'internal_' it will instead be collected from the user catalog,
644
+ provided a preset path is set in the workspace config.
645
+
646
+ :param name: The unique name of the DataSource preset within this workspace.
647
+ :returns: DataSourcePreset
648
+ """
649
+ with error_handler():
650
+ return DataSourcePreset.from_api_obj(
651
+ self._content_client().content_v1_get_preset_datasource(self._workspace(), name)
652
+ )
@@ -0,0 +1,4 @@
1
+ # dasl_client/preset_development/__init__.py
2
+ from .preview_parameters import *
3
+ from .errors import *
4
+ from .preview_engine import *
@@ -0,0 +1,159 @@
1
+ from pyspark.sql import DataFrame, SparkSession
2
+ from pyspark.sql.types import *
3
+ from pyspark.sql.dataframe import DataFrame
4
+ from typing import Dict, Any, List, Mapping, Tuple
5
+ from IPython import get_ipython
6
+
7
+
8
+ class PresetError(Exception):
9
+ pass
10
+
11
+
12
+ class InvalidGoldTableSchemaError(PresetError):
13
+ def __init__(self, schema: str, additional_message: str = ""):
14
+ self.schema = schema
15
+ message = (
16
+ f"Malformed gold schema provided {schema}. {additional_message}".strip()
17
+ )
18
+ super().__init__(message)
19
+
20
+
21
+ class NoSilverStageProvdedError(PresetError):
22
+ def __init__(self, additional_msg: str = ""):
23
+ message = f"No silver stage provided{additional_msg}."
24
+ super().__init__(message)
25
+
26
+
27
+ class NoSilverTransformStageProvdedError(PresetError):
28
+ def __init__(
29
+ self,
30
+ message: str = "No silver transform stage provided, but gold stage is present.",
31
+ ):
32
+ super().__init__(message)
33
+
34
+
35
+ class PreTransformNotFound(PresetError):
36
+ def __init__(
37
+ self,
38
+ message: str = "Requested silver pretransform name not found in preset's silver pretransforms.",
39
+ ):
40
+ super().__init__(message)
41
+
42
+
43
+ class NoSilverPreTransformStageProvdedError(PresetError):
44
+ def __init__(
45
+ self,
46
+ message: str = "No silver transform stage provided, but prestransform name provided.",
47
+ ):
48
+ super().__init__(message)
49
+
50
+
51
+ class MissingTableFieldError(PresetError):
52
+ def __init__(self, layer: str, table_name: str, field_name: str):
53
+ self.layer = layer
54
+ self.table_name = table_name
55
+ self.field_name = field_name
56
+ message = f"{layer} stage {table_name} is missing {field_name} field."
57
+ super().__init__(message)
58
+
59
+
60
+ class DuplicateFieldNameError(PresetError):
61
+ def __init__(self, stage: str, stage_name: str, field_name: str):
62
+ self.stage = stage
63
+ self.stage_name = stage_name
64
+ self.field_name = field_name
65
+ message = f"Duplicate field specification name found in {stage} stage {stage_name} named {field_name}."
66
+ super().__init__(message)
67
+
68
+
69
+ class MalformedFieldError(PresetError):
70
+ def __init__(self, stage: str, stage_name: str, field_name: str):
71
+ self.stage = stage
72
+ self.stage_name = stage_name
73
+ self.field_name = field_name
74
+ message = f"Please provide 1 operation only in {stage} stage {stage_name}'s field specification named {field_name}."
75
+ super().__init__(message)
76
+
77
+
78
+ class MissingFieldNameError(PresetError):
79
+ def __init__(self, stage: str, stage_name: str):
80
+ self.stage = stage
81
+ self.stage_name = stage_name
82
+ message = (
83
+ f"Field specification in {stage} stage {stage_name} missing name field."
84
+ )
85
+ super().__init__(message)
86
+
87
+
88
+ class MissingSilverKeysError(PresetError):
89
+ def __init__(self, missing_keys: str):
90
+ self.missing_keys = missing_keys
91
+ message = f"Gold table/s have no corresponding input from silver table/s: {missing_keys}"
92
+ super().__init__(message)
93
+
94
+
95
+ class MissingAutoloaderConfigError(PresetError):
96
+ def __init__(
97
+ self,
98
+ message: str = "Autoloader mode selected, but no autoloader configuration found in preset.autoloader.",
99
+ ):
100
+ super().__init__(message)
101
+
102
+
103
+ class AutoloaderMissingFieldError(PresetError):
104
+ def __init__(self, field_name: str):
105
+ self.field_name = field_name
106
+ message = f"Autoloader mode selected, but missing field {field_name} in preset."
107
+ super().__init__(message)
108
+
109
+
110
+ class UnknownGoldTableError(PresetError):
111
+ def __init__(self, table_name: str, schema: str):
112
+ self.table_name = table_name
113
+ self.schema = schema
114
+ message = (
115
+ f"The referenced Gold table name {table_name} does not exist in {schema}."
116
+ )
117
+ super().__init__(message)
118
+
119
+
120
+ class GoldTableCompatibilityError(PresetError):
121
+ def __init__(self, message: str):
122
+ super().__init__(message)
123
+
124
+
125
+ class ReferencedColumnMissingError(PresetError):
126
+ def __init__(self, operation: str, column_name: str):
127
+ self.operation = operation
128
+ self.column_name = column_name
129
+ message = f"The referenced column {column_name} was not found in the dataframe during {operation} operation."
130
+ super().__init__(message)
131
+
132
+
133
+ class MissingJoinFieldError(PresetError):
134
+ def __init__(self, field_name: str):
135
+ self.field_name = field_name
136
+ message = f"Join operation is missing required field {field_name}."
137
+ super().__init__(message)
138
+
139
+
140
+ class MissingUtilityConfigurationFieldError(PresetError):
141
+ def __init__(self, operation: str, field_name: str):
142
+ self.operation = operation
143
+ self.field_name = field_name
144
+ message = f"The required configuration field {field_name} was not suppled in the {operation} operation."
145
+ super().__init__(message)
146
+
147
+
148
+ class AssertionFailedError(PresetError):
149
+ def __init__(self, expr: str, assertion_message: str, df: DataFrame):
150
+ # Get the Databricks built-in functions out the namespace.
151
+ ipython = get_ipython()
152
+ display = ipython.user_ns["display"]
153
+
154
+ self.expr = expr
155
+ self.assertion_message = assertion_message
156
+ self.df = df
157
+ message = f"The above rows failed the assertion expression {expr} with reason: {assertion_message}\n"
158
+ display(df)
159
+ super().__init__(message)
@@ -0,0 +1,344 @@
1
+ from pyspark.sql import DataFrame, SparkSession
2
+ from pyspark.sql.types import *
3
+ from pyspark.sql.dataframe import DataFrame
4
+ from pyspark.sql.functions import lit, col as col_, sum as sum_, when
5
+ from dasl_client.preset_development.preview_parameters import *
6
+ from dasl_client.preset_development.stage import *
7
+ from dasl_client.preset_development.errors import *
8
+ import yaml
9
+ from IPython import get_ipython
10
+
11
+
12
+ class PreviewEngine:
13
+ """
14
+ This class deserializes the in-development preset's YAML and performs a series of
15
+ validation steps before attempting to compile each stage's table and execute them
16
+ based on the provided PreviewParameters.
17
+
18
+ Upon successful execution, output is generated for each successfully executed
19
+ stage's table operations. Additionally, if Gold stages are computed, their outputs
20
+ are validated against the provided Gold stage tables to ensure compatibility on a
21
+ per-table-name basis with the Unity Catalog.
22
+
23
+ For example, a preset Gold stage table named "http_activity" will be checked against
24
+ the corresponding table in the Unity Catalog schema—also named "http_activity" to
25
+ confirm that inserting into the Unity Catalog most likely not cause errors.
26
+ """
27
+
28
+ def __init__(
29
+ self, spark: SparkSession, preset_yaml_str: str, ds_params: PreviewParameters
30
+ ):
31
+ """
32
+ Creates the PreviewEngine using the given preset YAML and datasource parameters.
33
+ The YAML is deserialized here and checked to verify whether the requested
34
+ pretransform name, if provided, exists in the preset.
35
+
36
+ Instance Attributes:
37
+ ds_params (PreviewParameters): The input datasource's configuration.
38
+ preset (Dict[str, Any]): The deserialized preset YAML.
39
+ pretransform_name (str): The name of the requested pretransform. Defaults to None.
40
+ pre (Stage): Stores the pretransform Stage object internally.
41
+ silver (List[Stage]): Stores the Silver Stage objects internally.
42
+ gold (List[Stage]): Stores the Gold Stage objects internally.
43
+ """
44
+ self._spark = spark
45
+ self._ds_params = ds_params
46
+ self._preset = yaml.safe_load(preset_yaml_str)
47
+ self._pretransform_name = ds_params._pretransform_name
48
+
49
+ self._validate_gold_inputs(
50
+ self._preset.get("silver", None), self._preset.get("gold", None)
51
+ )
52
+ if self._pretransform_name:
53
+ self._validate_pretransform_name(
54
+ self._preset.get("silver", None), self._pretransform_name
55
+ )
56
+
57
+ self._pre = None
58
+ self._silver = []
59
+ self._gold = []
60
+ self._result_df_map = {}
61
+
62
+ def _validate_pretransform_name(
63
+ self, silver: Dict[str, str], pretransform_name: str
64
+ ) -> None:
65
+ """
66
+ Validates the given pretransform name exists in the provided preset's Silver
67
+ PreTransform stages.
68
+ """
69
+ if not silver:
70
+ raise NoSilverStageProvdedError(", but pretransform name provided")
71
+ if not (silver_pre_transform := silver.get("preTransform", None)):
72
+ raise NoSilverPreTransformStageProvdedError()
73
+ silver_pre_output_names = []
74
+ for table in silver_pre_transform:
75
+ if not (name := table.get("name", None)):
76
+ raise MissingTableFieldError(
77
+ "Silver pretransform",
78
+ table.get("name", "<stage missing name>"),
79
+ "name",
80
+ )
81
+ silver_pre_output_names += [name]
82
+ if pretransform_name not in silver_pre_output_names:
83
+ raise PreTransformNotFound()
84
+
85
+ def _validate_gold_inputs(
86
+ self, silver: Dict[str, str], gold: Dict[str, str]
87
+ ) -> None:
88
+ """
89
+ Validate gold tables all have a silver table to input from.
90
+ """
91
+ if not gold:
92
+ return
93
+
94
+ if not len(gold):
95
+ return
96
+
97
+ if not silver:
98
+ raise NoSilverStageProvdedError(", but gold stage is present")
99
+
100
+ gold_input_names = []
101
+ for table in gold:
102
+ if not (input := table.get("input", None)):
103
+ raise MissingTableFieldError(
104
+ "Gold", table.get("name", "<stage missing name>"), "input"
105
+ )
106
+ gold_input_names += [input]
107
+
108
+ if not (silver_transform := silver.get("transform", None)):
109
+ raise NoSilverTransformStageProvdedError()
110
+ silver_output_names = []
111
+ for table in silver_transform:
112
+ if not (name := table.get("name", None)):
113
+ raise MissingTableFieldError(
114
+ "Silver transform", table.get("name", ""), "name"
115
+ )
116
+ silver_output_names += [name]
117
+
118
+ missing_keys = set(gold_input_names) - set(silver_output_names)
119
+ if missing_keys:
120
+ raise MissingSilverKeysError(missing_keys)
121
+
122
+ def _compile_stages(self) -> None:
123
+ """
124
+ Creates Stage objects, setting pretransform to None if not provided.
125
+ """
126
+ pretransform = None
127
+ if self._pretransform_name:
128
+ for table in self._preset["silver"]["preTransform"]:
129
+ if table["name"] == self._pretransform_name:
130
+ self._pre = Stage(self._spark, "silver pretransform", table)
131
+ break
132
+
133
+ self._silver = [
134
+ Stage(self._spark, "silver transform", table)
135
+ for table in self._preset.get("silver", {}).get("transform", [])
136
+ ]
137
+ self._gold = [
138
+ Stage(self._spark, "gold", table) for table in self._preset.get("gold", [])
139
+ ]
140
+
141
+ def _run(
142
+ self, df: DataFrame
143
+ ) -> Tuple[DataFrame, Dict[str, DataFrame], Dict[str, DataFrame]]:
144
+ """
145
+ Runs all stages, in medallion stage order. This allows prior stage outputs to feed
146
+ into later stage inputs.
147
+
148
+ Returns:
149
+ Dataframes containing the output from each run Stage.
150
+ """
151
+ if self._pre:
152
+ df = self._pre.run(df)
153
+
154
+ silver_output_map = {}
155
+ for table in self._silver:
156
+ silver_output_map[table._name] = table.run(df)
157
+
158
+ gold_output_map = {}
159
+ for table in self._gold:
160
+ # We store as gold_name/silver_input to prevent clobbering on duplicate gold table use.
161
+ gold_output_map[f"{table._name}/{table._input}"] = table.run(
162
+ silver_output_map[table._input]
163
+ )
164
+
165
+ return (
166
+ (df, silver_output_map, gold_output_map)
167
+ if self._pre
168
+ else (None, silver_output_map, gold_output_map)
169
+ )
170
+
171
+ def _render_output(
172
+ self,
173
+ input_df: DataFrame,
174
+ stage_dataframes: Tuple[DataFrame, Dict[str, DataFrame], Dict[str, DataFrame]],
175
+ gold_table_schema: str,
176
+ ) -> None:
177
+ """
178
+ Displays formatted HTML output from executed Stages' DataFrames.
179
+ """
180
+ # TODO: Investigate further into using Databricks's style sheets here.
181
+
182
+ # Get the Databricks built-in functions out the namespace.
183
+ ipython = get_ipython()
184
+ displayHTML = ipython.user_ns["displayHTML"]
185
+ display = ipython.user_ns["display"]
186
+
187
+ def d(txt, lvl) -> None:
188
+ displayHTML(
189
+ f"""
190
+ <div style="background-color:
191
+ background-color: rgb(18, 23, 26); padding: 0; margin: 0;">
192
+ <h{lvl} style="margin: 0; background-color: rgb(244, 234, 229);">{txt}</h{lvl}>
193
+ </div>
194
+ """
195
+ )
196
+
197
+ def check_struct_compatibility(
198
+ target_field: StructField, df_field: StructField, prefix=""
199
+ ):
200
+ if not (
201
+ isinstance(target_field.dataType, StructType)
202
+ and isinstance(df_field.dataType, StructType)
203
+ ):
204
+ return
205
+
206
+ target_fields = {
207
+ field.name: field for field in target_field.dataType.fields
208
+ }
209
+ for field in df_field.dataType.fields:
210
+ if field.name not in target_fields:
211
+ raise GoldTableCompatibilityError(
212
+ f"Extra field found in gold stage output STRUCT column {prefix}{target_field.name}: {field.name}"
213
+ )
214
+ else:
215
+ if isinstance(field.dataType, StructType):
216
+ check_struct_compatibility(
217
+ target_fields[field.name],
218
+ field,
219
+ prefix=prefix + target_field.name + ".",
220
+ )
221
+
222
+ (pre_df, silver, gold) = stage_dataframes
223
+ d("Input", 1)
224
+ display(input_df)
225
+ d("Silver Pre-Transform", 1)
226
+ if pre_df:
227
+ display(pre_df)
228
+ else:
229
+ d("Skipped", 2)
230
+ d("Silver Transform", 1)
231
+ for name, df in silver.items():
232
+ d(f"{name}", 2)
233
+ display(df)
234
+ d("Gold", 1)
235
+ for name, df in gold.items():
236
+ d(f"{name}", 2)
237
+ d("Stage output", 3)
238
+ display(df)
239
+
240
+ # NOTE: Name is stored as Gold_name/Silver_input. So we need to get just the Gold table
241
+ # name that we are comparing the dataframe metadata to.
242
+ name = name.split("/")[0]
243
+
244
+ if not self._spark.catalog.tableExists(f"{gold_table_schema}.{name}"):
245
+ raise UnknownGoldTableError(name, gold_table_schema)
246
+
247
+ # Performs the type check.
248
+ delta_df = self._spark.table(f"{gold_table_schema}.{name}").limit(0)
249
+ unioned_df = delta_df.unionByName(df, allowMissingColumns=True)
250
+
251
+ # Now we check no new columns.
252
+ if not set(df.columns).issubset(delta_df.columns):
253
+ raise GoldTableCompatibilityError(
254
+ f"Extra columns provided: {', '.join([col for col in df.columns if col not in delta_df.columns])}"
255
+ )
256
+
257
+ # Now we check no new fields in STRUCT columns.
258
+ for field in delta_df.schema.fields:
259
+ if isinstance(field.dataType, StructType) and field.name in df.columns:
260
+ # Retrieve the corresponding field from the DataFrame's schema.
261
+ df_field = next(f for f in df.schema.fields if f.name == field.name)
262
+ check_struct_compatibility(field, df_field)
263
+
264
+ # Check nullable columns exist, and data what we are inserting is set.
265
+ non_nullable_cols = [
266
+ field.name for field in delta_df.schema.fields if not field.nullable
267
+ ]
268
+ null_checks = [
269
+ sum_(when(col_(col).isNull(), 1).otherwise(0)).alias(col)
270
+ for col in non_nullable_cols
271
+ ]
272
+ null_counts = df.select(null_checks).collect()[0].asDict()
273
+ cols_with_nulls = []
274
+ try:
275
+ cols_with_nulls = [
276
+ col_name for col_name, count in null_counts.items() if count > 0
277
+ ]
278
+ except TypeError:
279
+ # There were no records returned and so null_counts == None.
280
+ pass
281
+ if cols_with_nulls:
282
+ raise GoldTableCompatibilityError(
283
+ f"Record with null data found for non-nullable columns: {', '.join([col for col in cols_with_nulls])}"
284
+ )
285
+
286
+ d("Resultant gold table preview", 3)
287
+ display(unioned_df)
288
+
289
+ def evaluate(self, gold_table_schema: str) -> None:
290
+ """
291
+ Evaluates the loaded preset YAML using the input datasource configuration to load
292
+ records. Finally, checks that the output from the Gold stages is compatible with
293
+ the Unity Catalog Gold tables.
294
+ """
295
+ s = gold_table_schema.split(".")
296
+ if len(s) != 2:
297
+ raise InvalidGoldTableSchemaError(gold_table_schema)
298
+ catalog_name = s[0]
299
+ schema_name = s[1]
300
+ if any(
301
+ row.catalog == catalog_name
302
+ for row in self._spark.sql("SHOW CATALOGS").collect()
303
+ ):
304
+ if not any(
305
+ row.databaseName == schema_name
306
+ for row in self._spark.sql(f"SHOW SCHEMAS IN {catalog_name}").collect()
307
+ ):
308
+ raise InvalidGoldTableSchemaError(
309
+ gold_table_schema,
310
+ f"Schema {schema_name} not found in catalog {catalog_name} or insufficient permissions.",
311
+ )
312
+ else:
313
+ raise InvalidGoldTableSchemaError(
314
+ gold_table_schema,
315
+ f"Catalog {catalog_name} not found or insufficient permissions.",
316
+ )
317
+
318
+ # If we are using the autoloader, fetch format from preset and others.
319
+ if self._ds_params._mode == "autoloader":
320
+ if not (autoloader_conf := self._preset.get("autoloader", None)):
321
+ raise MissingAutoloaderConfigError()
322
+ if not (file_format := autoloader_conf.get("format", None)):
323
+ raise AutoloaderMissingFieldError("format")
324
+ self._ds_params.set_autoloader_format(file_format)
325
+ if schemaFile := autoloader_conf.get("schemaFile", None):
326
+ self._ds_params.set_autoloader_schema_file(schemaFile)
327
+ if multiline := autoloader_conf.get("multiline", None):
328
+ if multiline == "true":
329
+ self._ds_params.set_multiline(True)
330
+ else:
331
+ self._ds_params.set_multiline(False)
332
+ if cloudFiles := autoloader_conf.get("cloudFiles", None):
333
+ if schema_hints := cloudFiles.get("schemaHints", None):
334
+ self._ds_params.set_autoloader_cloudfiles_schema_hints(schema_hints)
335
+ if schema_hints_file := cloudFiles.get("schemaHintsFile", None):
336
+ self._ds_params.set_autoloader_cloudfiles_schema_hint_file(
337
+ schema_hints_file
338
+ )
339
+
340
+ self._compile_stages()
341
+
342
+ with self._ds_params as df:
343
+ self._result_df_map = self._run(df)
344
+ self._render_output(df, self._result_df_map, gold_table_schema)