dasl-client 1.0.23__py3-none-any.whl → 1.0.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dasl-client might be problematic. Click here for more details.

@@ -7,6 +7,12 @@ from dasl_client.preset_development.stage import *
7
7
  from dasl_client.preset_development.errors import *
8
8
  import yaml
9
9
  from IPython import get_ipython
10
+ from itertools import count
11
+
12
+
13
+ @udf(StringType())
14
+ def constant_udf(*args):
15
+ return "<sortable_random_id>"
10
16
 
11
17
 
12
18
  class PreviewEngine:
@@ -54,8 +60,9 @@ class PreviewEngine:
54
60
  self._preset.get("silver", None), self._pretransform_name
55
61
  )
56
62
 
63
+ self._pre_bronze = None
57
64
  self._bronze = None
58
- self._pre = None
65
+ self._pre_silver = None
59
66
  self._silver = []
60
67
  self._gold = []
61
68
  self._result_df_map: Tuple[
@@ -124,13 +131,32 @@ class PreviewEngine:
124
131
 
125
132
  def _compile_stages(self) -> None:
126
133
  """
127
- Creates Stage objects, setting pretransform to None if not provided.
134
+ Creates Stage objects, setting silver pretransform to None if not provided.
128
135
  """
136
+ pre_bronze_field_counter = count()
137
+ pre_bronze_name_counter = count()
138
+ pre_bronze_expr_groups = self._preset.get("bronze", {}).get("preTransform", [])
139
+ if pre_bronze_expr_groups:
140
+ tables = [
141
+ {
142
+ "name": f"Index {next(pre_bronze_name_counter)}",
143
+ "fields": [
144
+ {"name": str(next(pre_bronze_field_counter)), "expr": expr}
145
+ for expr in expr_group
146
+ ],
147
+ }
148
+ for expr_group in pre_bronze_expr_groups
149
+ ]
150
+ for table in tables:
151
+ self._pre_bronze = [
152
+ Stage(self._spark, "bronze pretransform", table) for table in tables
153
+ ]
154
+
129
155
  pretransform = None
130
156
  if self._pretransform_name:
131
157
  for table in self._preset["silver"]["preTransform"]:
132
158
  if table["name"] == self._pretransform_name:
133
- self._pre = Stage(self._spark, "silver pretransform", table)
159
+ self._pre_silver = Stage(self._spark, "silver pretransform", table)
134
160
  break
135
161
 
136
162
  self._silver = [
@@ -151,8 +177,56 @@ class PreviewEngine:
151
177
  Returns:
152
178
  Dataframes containing the output from each run Stage.
153
179
  """
154
- if self._pre:
155
- df = self._pre.run(df)
180
+ # If we are in silverbronze mode, and an autoloader has been provided, or we are
181
+ # not in silverbronze mode, we need to run the preBronze stage.
182
+ pre_bronze_output = {}
183
+ if (
184
+ self._ds_params._mode != "silverbronze"
185
+ or self._ds_params._autoloader_location
186
+ ):
187
+ if self._pre_bronze:
188
+ for stage in self._pre_bronze:
189
+ df = stage.run(df)
190
+ pre_bronze_output[stage._name] = df
191
+ else:
192
+ # We are in silverbronze mode with no autoloader, so we treat first
193
+ # silverbronze table as initial df.
194
+ df = (
195
+ self._spark.table(self._ds_params._bronze_tables[0].get("name", ""))
196
+ .drop("dasl_id")
197
+ .limit(self._ds_params._record_limit)
198
+ )
199
+
200
+ if time_col := self._ds_params._time_column:
201
+ df = df.filter(
202
+ f"timestamp({time_col}) >= timestamp('{self._ds_params._start_time}') AND timestamp({time_col}) < timestamp('{self._ds_params._end_time}')"
203
+ )
204
+
205
+ df = df.withColumn("dasl_id", constant_udf())
206
+
207
+ self._bronze = df
208
+
209
+ # Deal with silverbronze table joins.
210
+ # Note: We can blind get here as validation should've caught anything missing.
211
+ if self._ds_params._mode == "silverbronze":
212
+ if alias := self._ds_params._bronze_tables[0].get("alias", None):
213
+ df = df.alias(alias)
214
+ for bronze_table in self._ds_params._bronze_tables[1:]:
215
+ join_df = (
216
+ spark.table(bronze_table["name"])
217
+ .drop("dasl_id")
218
+ .limit(self._ds_params._record_limit)
219
+ )
220
+ if alias := bronze_table.get("alias", None):
221
+ join_df = join_df.alias(alias)
222
+ df = df.join(
223
+ join_df,
224
+ expr(bronze_table["joinExpr"]),
225
+ bronze_table.get("joinType", "left"),
226
+ )
227
+
228
+ if self._pre_silver:
229
+ df = self._pre_silver.run(df)
156
230
 
157
231
  silver_output_map = {}
158
232
  for table in self._silver:
@@ -166,15 +240,17 @@ class PreviewEngine:
166
240
  )
167
241
 
168
242
  return (
169
- (df, silver_output_map, gold_output_map)
170
- if self._pre
171
- else (None, silver_output_map, gold_output_map)
243
+ (df, silver_output_map, gold_output_map, pre_bronze_output)
244
+ if self._pre_silver
245
+ else (None, silver_output_map, gold_output_map, pre_bronze_output)
172
246
  )
173
247
 
174
248
  def _render_output(
175
249
  self,
176
250
  input_df: DataFrame,
177
- stage_dataframes: Tuple[DataFrame, Dict[str, DataFrame], Dict[str, DataFrame]],
251
+ stage_dataframes: Tuple[
252
+ List[DataFrame], DataFrame, Dict[str, DataFrame], Dict[str, DataFrame]
253
+ ],
178
254
  gold_table_catalog: str,
179
255
  gold_table_schema: str,
180
256
  ) -> None:
@@ -195,7 +271,7 @@ class PreviewEngine:
195
271
  def d(txt, lvl) -> None:
196
272
  displayHTML(
197
273
  f"""
198
- <div style="background-color:
274
+ <div style="background-color:
199
275
  background-color: rgb(18, 23, 26); padding: 0; margin: 0;">
200
276
  <h{lvl} style="margin: 0; background-color: rgb(244, 234, 229);">{txt}</h{lvl}>
201
277
  </div>
@@ -227,12 +303,16 @@ class PreviewEngine:
227
303
  prefix=prefix + target_field.name + ".",
228
304
  )
229
305
 
230
- (pre_df, silver, gold) = stage_dataframes
231
- d("Input", 1)
306
+ (pre_silver, silver, gold, pre_bronze) = stage_dataframes
307
+ d("Autoloader Input", 1)
232
308
  display(input_df)
309
+ d("Bronze Pre-Transform", 1)
310
+ for name, df in pre_bronze.items():
311
+ d(f"{name}", 2)
312
+ display(df)
233
313
  d("Silver Pre-Transform", 1)
234
- if pre_df:
235
- display(pre_df)
314
+ if pre_silver:
315
+ display(pre_silver)
236
316
  else:
237
317
  d("Skipped", 2)
238
318
  d("Silver Transform", 1)
@@ -326,31 +406,32 @@ class PreviewEngine:
326
406
  )
327
407
 
328
408
  # If we are using the autoloader, fetch format from preset and others.
329
- if self._ds_params._mode == "autoloader":
409
+ if self._ds_params._mode == "autoloader" or (
410
+ self._ds_params._mode == "silverbronze"
411
+ and self._ds_params._autoloader_location
412
+ ):
413
+ if self._preset.get("bronze", {}).get("loadAsSingleVariant", False) == True:
414
+ self._ds_params._set_load_as_single_variant()
330
415
  if not (autoloader_conf := self._preset.get("autoloader", None)):
331
416
  raise MissingAutoloaderConfigError()
332
417
  if not (file_format := autoloader_conf.get("format", None)):
333
418
  raise AutoloaderMissingFieldError("format")
334
- self._ds_params.set_autoloader_format(file_format)
419
+ self._ds_params._set_autoloader_format(file_format)
335
420
  if schemaFile := autoloader_conf.get("schemaFile", None):
336
- self._ds_params.set_autoloader_schema_file(schemaFile)
337
- if multiline := autoloader_conf.get("multiline", None):
338
- if multiline == "true":
339
- self._ds_params.set_multiline(True)
340
- else:
341
- self._ds_params.set_multiline(False)
421
+ self._ds_params._set_autoloader_schema_file(schemaFile)
342
422
  if cloudFiles := autoloader_conf.get("cloudFiles", None):
343
423
  if schema_hints := cloudFiles.get("schemaHints", None):
344
- self._ds_params.set_autoloader_cloudfiles_schema_hints(schema_hints)
424
+ self._ds_params._set_autoloader_cloudfiles_schema_hints(
425
+ schema_hints
426
+ )
345
427
  if schema_hints_file := cloudFiles.get("schemaHintsFile", None):
346
- self._ds_params.set_autoloader_cloudfiles_schema_hint_file(
428
+ self._ds_params._set_autoloader_cloudfiles_schema_hint_file(
347
429
  schema_hints_file
348
430
  )
349
431
 
350
432
  self._compile_stages()
351
433
 
352
434
  with self._ds_params as df:
353
- self._bronze = df
354
435
  self._result_df_map = self._run(df)
355
436
  if display:
356
437
  self._render_output(
@@ -9,11 +9,6 @@ import uuid
9
9
  from IPython import get_ipython
10
10
 
11
11
 
12
- @udf(StringType())
13
- def constant_udf(*args):
14
- return "<sortable_random_id>"
15
-
16
-
17
12
  class PreviewParameters:
18
13
  """
19
14
  This class provides three methods for supplying input records to the preset development environment.
@@ -60,6 +55,78 @@ class PreviewParameters:
60
55
  .set_table("system.access.audit")
61
56
  ```
62
57
 
58
+ **4. SilverBronze Mode:**
59
+ "silverbronze" mode, works like a more advanced "table" mode. It allows for joining of multiple
60
+ tables as input. This mode requires setting bronze table definitions. This mode behaves in 2
61
+ seperate ways depending on whether an autoloader location is set or not. If an autoloader location
62
+ is set the first entry in the bronze table definitions is used to name and alias the autoloader's
63
+ input and these can be used in later join expressions. Used in this way, the autoloader will be
64
+ loaded as in "autoloader" mode, and run through preBronze stages before being joined with the
65
+ remainder of the bronze table definitions. This mimics not skipping bronze in a DataSource and
66
+ joining what was read in silver. If an autoloader location is not set, the behaviour instead
67
+ attempts to emulate a DataSource set to skip the bronze stage. That is, all preBronze and bronze
68
+ stages will be skipped, and the name of the first entry in the given bronze table definitions will
69
+ be read from instead. Any subsequent bronze table definitions will be joined against this table.
70
+
71
+ Using no autoloader location (this will read from the first table):
72
+ ```python
73
+ bronze_tables = [
74
+ {
75
+ "name": "databricks_dev.default.sev_map",
76
+ "alias": "tab1"
77
+ },
78
+ {
79
+ "name": "databricks_dev.alan_bronze.akamai_waf",
80
+ "alias": "tab2",
81
+ "joinExpr": "id::string = tab2.serviceID",
82
+ "joinType": "left"
83
+ },
84
+ {
85
+ "name": "databricks_dev.alan_silver.cloudflare_hjttp_request",
86
+ "alias": "tab3",
87
+ "joinExpr": "tab1.id::string = tab3.ClientRequestsBytes",
88
+ "joinType": "inner"
89
+ }
90
+ ]
91
+
92
+ ds_params = (
93
+ PreviewParameters(spark)
94
+ .from_silverbronze_tables()
95
+ .set_bronze_table_definitions(bronze_tables)
96
+ )
97
+
98
+ ps = PreviewEngine(spark, yaml_string, ds_params)
99
+ ps.evaluate("stage.gold")
100
+ ```
101
+
102
+ Using an autoloader location (this will read from the autoloader and name the df tab1):
103
+ ```python
104
+ bronze_tables = [
105
+ {
106
+ "name": "tab1"
107
+ },
108
+ {
109
+ "name": "databricks_dev.alan_bronze.akamai_waf",
110
+ "alias": "tab2",
111
+ "joinExpr": "id::string = tab2.serviceID",
112
+ "joinType": "left"
113
+ },
114
+ {
115
+ "name": "databricks_dev.alan_silver.cloudflare_hjttp_request",
116
+ "alias": "tab3",
117
+ "joinExpr": "tab1.id::string = tab3.ClientRequestsBytes",
118
+ "joinType": "inner"
119
+ }
120
+ ]
121
+
122
+ ds_params = (
123
+ PreviewParameters(spark)
124
+ .from_silverbronze_tables()
125
+ .set_bronze_table_definitions(bronze_tables)
126
+ .set_autoloader_location("s3://antimatter-dasl-testing/csamples3/mars/area1/")
127
+ )
128
+ ```
129
+
63
130
  **Note:**
64
131
  When using autoloader mode, this implementation requires a location to store a temporary schema for
65
132
  the loaded records. By default, this is set to `"dbfs:/tmp/schemas"`. You can change this using
@@ -94,7 +161,7 @@ class PreviewParameters:
94
161
  df (DataFrame): Internal Spark DataFrame loaded using the specified parameters.
95
162
  """
96
163
  self._spark = spark
97
- self._mode = None # [input, autoloader]
164
+ self._mode = None # [input, table, autoloader, silverbronze]
98
165
  self._record_limit = 10
99
166
  self._autoloader_temp_schema_location = "dbfs:/tmp/schemas"
100
167
  self._gold_test_schemas = []
@@ -110,18 +177,97 @@ class PreviewParameters:
110
177
  self._cloudfiles_schema_hints = None
111
178
  self._cloudfiles_reader_case_sensitive = "true"
112
179
  self._cloudfiles_multiline = "true"
180
+ self._cloudfiles_wholetext = "false"
113
181
  self._schema_uuid_str = str(uuid.uuid4())
182
+ self._single_variant_column = None
114
183
 
115
184
  self._schema = None
116
185
  self._data = None
117
186
 
118
187
  self._table = None
119
188
 
189
+ self._bronze_tables = None
190
+
120
191
  self._pretransform_name = None
121
- self._bronze_pre_transform: Optional[List[str]] = None
122
192
 
123
193
  self._df = None
124
194
 
195
+ def __create_from_autoloader(self) -> DataFrame:
196
+ stream_df = (
197
+ self._spark.readStream.format("cloudFiles")
198
+ .option("cloudFiles.format", self._autoloader_format)
199
+ .option("readerCaseSensitive", self._cloudfiles_reader_case_sensitive)
200
+ )
201
+
202
+ # text and wholetext needs to be handled seperately.
203
+ stream_df = (
204
+ stream_df.option("multiline", self._cloudfiles_multiline)
205
+ if self._autoloader_format != "text"
206
+ else stream_df.option("wholetext", self._cloudfiles_wholetext)
207
+ )
208
+
209
+ if self._single_variant_column:
210
+ stream_df = stream_df.option(
211
+ "singleVariantColumn", self._single_variant_column
212
+ )
213
+
214
+ if self._schema_file:
215
+ with open(self._schema_file, "r") as f:
216
+ stream_df = stream_df.schema(f.read().strip())
217
+ else:
218
+ stream_df = (
219
+ stream_df.option("inferSchema", "true")
220
+ .option("cloudFiles.inferColumnTypes", "true")
221
+ .option(
222
+ "cloudFiles.schemaLocation",
223
+ f"{self._autoloader_temp_schema_location}/{self._schema_uuid_str}",
224
+ )
225
+ )
226
+
227
+ if self._cloudfiles_schema_hints:
228
+ stream_df = stream_df.option(
229
+ "cloudFiles.schemaHints", self._cloudfiles_schema_hints
230
+ )
231
+ elif self._clouldfiles_schema_hints_file:
232
+ stream_df = stream_df.option(
233
+ "cloudFiles.schemaHintsFile", self._clouldfiles_schema_hints_file
234
+ )
235
+
236
+ stream_df = stream_df.load(self._autoloader_location).limit(self._record_limit)
237
+
238
+ query = (
239
+ stream_df.writeStream.format("memory")
240
+ .queryName("batch_data")
241
+ .trigger(availableNow=True)
242
+ .start()
243
+ )
244
+
245
+ query.awaitTermination()
246
+
247
+ def __create_from_silverbronze_tables_join(self) -> DataFrame:
248
+ if not self._bronze_tables or not len(self._bronze_tables):
249
+ raise MissingBronzeTablesError()
250
+
251
+ # Validate name and joinExpr are set.
252
+ for i in range(len(self._bronze_tables)):
253
+ if not self._bronze_tables[i].get("name", None):
254
+ raise MissingBronzeTableFieldError("name")
255
+ if i > 0 and not self._bronze_tables[i].get("joinExpr", None):
256
+ raise MissingBronzeTableFieldError("joinExpr")
257
+
258
+ # If there is an autoloader location given, we create the df now and
259
+ # then allow preBronze stage to run. Otherwise we skip preBronze stages
260
+ # and as part of running the silverbronze joins we create the df from
261
+ # the first entry in the bronze tables list.
262
+ df = None
263
+ if self._autoloader_location:
264
+ self.__create_from_autoloader()
265
+ df = self._spark.table("batch_data").alias(
266
+ self._bronze_tables[0].get("name", "")
267
+ ) # Use first's name.
268
+
269
+ return df
270
+
125
271
  def __enter__(self):
126
272
  """
127
273
  Creates a DataFrame with data using the method specified. In the case of "autoloader",
@@ -137,59 +283,10 @@ class PreviewParameters:
137
283
  elif self._mode == "table":
138
284
  self._df = self._spark.table(self._table).limit(self._record_limit)
139
285
  elif self._mode == "autoloader":
140
- stream_df = (
141
- self._spark.readStream.format("cloudFiles")
142
- .option("cloudFiles.format", self._autoloader_format)
143
- .option("multiline", self._cloudfiles_multiline)
144
- .option("readerCaseSensitive", self._cloudfiles_reader_case_sensitive)
145
- )
146
-
147
- if self._schema_file:
148
- with open(self._schema_file, "r") as f:
149
- stream_df = stream_df.schema(f.read().strip())
150
- else:
151
- stream_df = (
152
- stream_df.option("inferSchema", "true")
153
- .option("cloudFiles.inferColumnTypes", "true")
154
- .option(
155
- "cloudFiles.schemaLocation",
156
- f"{self._autoloader_temp_schema_location}/{self._schema_uuid_str}",
157
- )
158
- )
159
-
160
- if self._cloudfiles_schema_hints:
161
- stream_df = stream_df.option(
162
- "cloudFiles.schemaHints", self._cloudfiles_schema_hints
163
- )
164
- elif self._clouldfiles_schema_hints_file:
165
- stream_df = stream_df.option(
166
- "cloudFiles.schemaHintsFile", self._clouldfiles_schema_hints_file
167
- )
168
-
169
- stream_df = stream_df.load(self._autoloader_location).limit(
170
- self._record_limit
171
- )
172
-
173
- if self._bronze_pre_transform is not None:
174
- stream_df = stream_df.selectExpr(*self._bronze_pre_transform)
175
-
176
- query = (
177
- stream_df.writeStream.format("memory")
178
- .queryName("batch_data")
179
- .trigger(availableNow=True)
180
- .start()
181
- )
182
-
183
- query.awaitTermination()
184
-
286
+ self.__create_from_autoloader()
185
287
  self._df = self._spark.table("batch_data")
186
-
187
- if self._time_column:
188
- self._df = self._df.filter(
189
- f"timestamp({self._time_column}) >= timestamp('{self._start_time}') AND timestamp({self._time_column}) < timestamp('{self._end_time}')"
190
- )
191
-
192
- self._df = self._df.withColumn("dasl_id", constant_udf())
288
+ elif self._mode == "silverbronze":
289
+ self._df = self.__create_from_silverbronze_tables_join()
193
290
 
194
291
  return self._df
195
292
 
@@ -254,6 +351,36 @@ class PreviewParameters:
254
351
  self._mode = "table"
255
352
  return self
256
353
 
354
+ def from_silverbronze_tables(self):
355
+ """
356
+ Set the data source loader to "bronze tables" mode. Requires a list of bronze table
357
+ definitions to be provided.
358
+
359
+ Returns:
360
+ PreviewParameters: The current instance with updated configuration.
361
+ """
362
+ self._mode = "silverbronze"
363
+ return self
364
+
365
+ def set_bronze_table_definitions(self, definitions: List[Dict[str, str]]):
366
+ """
367
+ Set the bronze table definitions for bronze tables mode. `name` and `joinExpr` are
368
+ required. If `alias` is not provided, one can use the `name` to refer to the table.
369
+ If `joinType` is not provided, "left" is used as a default value. If pr
370
+
371
+ [
372
+ {
373
+ "name": "name",
374
+ "alias": "alias1",
375
+ "joinType": "inner",
376
+ "joinExpr": "base_table.col1 = alias1.col1
377
+ },
378
+ ...
379
+ ]
380
+ """
381
+ self._bronze_tables = definitions
382
+ return self
383
+
257
384
  def set_autoloader_temp_schema_location(self, path: str):
258
385
  """
259
386
  Set the location for the autoloader's streaming mode schema to be created. This is
@@ -311,7 +438,7 @@ class PreviewParameters:
311
438
  self._autoloader_location = location
312
439
  return self
313
440
 
314
- def set_autoloader_format(self, file_format: str):
441
+ def _set_autoloader_format(self, file_format: str):
315
442
  """
316
443
  Used internally to set the autoloader format.
317
444
 
@@ -320,12 +447,16 @@ class PreviewParameters:
320
447
  """
321
448
  if file_format.lower() == "jsonl":
322
449
  self._autoloader_format = "json"
323
- self.set_autoloader_multiline(False)
450
+ self._cloudfiles_multiline = "false"
451
+ return self
452
+ if file_format.lower() == "wholetext":
453
+ self._autoloader_format = "text"
454
+ self._cloudfiles_wholetext = "true"
324
455
  return self
325
456
  self._autoloader_format = file_format
326
457
  return self
327
458
 
328
- def set_autoloader_schema_file(self, path: str):
459
+ def _set_autoloader_schema_file(self, path: str):
329
460
  """
330
461
  Set the schema file path for "autoloader" mode.
331
462
 
@@ -335,7 +466,7 @@ class PreviewParameters:
335
466
  self._schema_file = path
336
467
  return self
337
468
 
338
- def set_autoloader_cloudfiles_schema_hint_file(self, path: str):
469
+ def _set_autoloader_cloudfiles_schema_hint_file(self, path: str):
339
470
  """
340
471
  Set the cloudFiles schema hints file path for "autoloader" mode.
341
472
 
@@ -345,7 +476,7 @@ class PreviewParameters:
345
476
  self._clouldfiles_schema_hints_file = path
346
477
  return self
347
478
 
348
- def set_autoloader_cloudfiles_schema_hints(self, cloudfiles_schema_hints: str):
479
+ def _set_autoloader_cloudfiles_schema_hints(self, cloudfiles_schema_hints: str):
349
480
  """
350
481
  Set the cloudFiles schema hints string for "autoloader" mode.
351
482
 
@@ -355,26 +486,6 @@ class PreviewParameters:
355
486
  self._cloudfiles_schema_hints = cloudfiles_schema_hints
356
487
  return self
357
488
 
358
- def set_autoloader_reader_case_sensitive(self, b: bool):
359
- """
360
- Set the cloudFiles reader case-sensitive boolean for "autoloader" mode.
361
-
362
- Returns:
363
- PreviewParameters: The current instance with updated configuration.
364
- """
365
- self._cloudfiles_reader_case_sensitive = "true" if b else "false"
366
- return self
367
-
368
- def set_autoloader_multiline(self, b: bool):
369
- """
370
- Set the cloudFiles multiline boolean for "autoloader" mode.
371
-
372
- Returns:
373
- PreviewParameters: The current instance with updated configuration.
374
- """
375
- self._cloudfiles_multiline = "true" if b else "false"
376
- return self
377
-
378
489
  def set_pretransform_name(self, pretransform_name: str):
379
490
  """
380
491
  Set the pretransform name to use, if desired. If not set, Silver PreTransform
@@ -386,16 +497,6 @@ class PreviewParameters:
386
497
  self._pretransform_name = pretransform_name
387
498
  return self
388
499
 
389
- def set_bronze_pre_transform(self, expr: List[str]):
390
- """
391
- Sets a pre-transform expression that will run before data is written to bronze
392
-
393
- Returns:
394
- PreviewParameters: The current instance with updated configuration.
395
- """
396
- self._bronze_pre_transform = expr
397
- return self
398
-
399
500
  def set_date_range(self, column: str, start_time: str, end_time: str):
400
501
  """
401
502
  Set the TIMESTAMP column and date range to use as the input data filter to
@@ -431,6 +532,17 @@ class PreviewParameters:
431
532
  self._table = table_name
432
533
  return self
433
534
 
535
+ def _set_load_as_single_variant(self, col_name: Optional[str] = None):
536
+ """
537
+ Enable loadAsSingleVariant mode. This will ingest data into a single VARIANT-typed column.
538
+ The default name of that column is `data`.
539
+
540
+ Returns:
541
+ PreviewParameters: The current instance with updated configuration.
542
+ """
543
+ self._single_variant_column = col_name if col_name is not None else "data"
544
+ return self
545
+
434
546
  def add_gold_schema_table(self, gold_schema_table_name: str):
435
547
  """
436
548
  Add a gold schema temporary table name that will need to be cleaned