dasl-client 1.0.27__py3-none-any.whl → 1.0.29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dasl-client might be problematic. Click here for more details.

dasl_client/client.py CHANGED
@@ -1,24 +1,22 @@
1
1
  from copy import deepcopy
2
- from datetime import datetime, timedelta, timezone
2
+ from datetime import datetime, timedelta
3
3
  from time import sleep
4
- from typing import Any, Callable, Iterator, List, Optional, Tuple, TypeVar
5
- from pydantic import Field
6
- from pyspark.sql import DataFrame
4
+ from typing import Any, Callable, Iterator, List, Optional, TypeVar
7
5
 
8
6
  from dasl_api import (
7
+ ContentV1Api,
9
8
  CoreV1Api,
9
+ CoreV1QueryExtendRequestDateRange,
10
10
  DbuiV1Api,
11
11
  DbuiV1QueryExtendRequest,
12
- CoreV1QueryExtendRequestDateRange,
13
12
  DbuiV1QueryGenerateRequest,
14
13
  DbuiV1QueryGenerateRequestTimeRange,
15
14
  DbuiV1QueryGenerateStatus,
16
- DbuiV1QueryLookupRequest,
17
- DbuiV1QueryLookupResult,
18
15
  DbuiV1QueryHistogramRequest,
19
16
  DbuiV1QueryHistogramResult,
17
+ DbuiV1QueryLookupRequest,
20
18
  DbuiV1QueryLookupRequestPagination,
21
- ContentV1Api,
19
+ DbuiV1QueryLookupResult,
22
20
  WorkspaceV1Api,
23
21
  WorkspaceV1CreateWorkspaceRequest,
24
22
  api,
@@ -26,29 +24,28 @@ from dasl_api import (
26
24
  from dasl_client.auth.auth import (
27
25
  Authorization,
28
26
  DatabricksSecretAuth,
29
- DatabricksTokenAuth,
30
27
  ServiceAccountKeyAuth,
31
28
  )
32
29
  from dasl_client.conn.conn import get_base_conn
33
30
  from dasl_client.errors.errors import ConflictError, error_handler
34
- from .helpers import Helpers
31
+ from dasl_client.metadata import WorkspaceMetadata
32
+
35
33
  from .exec_rule import ExecRule
34
+ from .helpers import Helpers
36
35
  from .regions import Regions
37
-
38
36
  from .types import (
39
37
  AdminConfig,
40
38
  DataSource,
39
+ DataSourcePreset,
40
+ DataSourcePresetsList,
41
41
  Dbui,
42
42
  Metadata,
43
43
  Rule,
44
- WorkspaceConfig,
45
44
  TransformRequest,
46
45
  TransformResponse,
47
- DataSourcePresetsList,
48
- DataSourcePreset,
46
+ WorkspaceConfig,
49
47
  )
50
48
 
51
-
52
49
  T = TypeVar("T")
53
50
 
54
51
 
@@ -79,7 +76,7 @@ class Client:
79
76
  service_principal_id: str,
80
77
  service_principal_secret: str,
81
78
  workspace_url: Optional[str] = None,
82
- region: str = Helpers.default_region,
79
+ region: Optional[str] = None,
83
80
  dasl_host: Optional[str] = None,
84
81
  ) -> "Client":
85
82
  """
@@ -98,18 +95,24 @@ class Client:
98
95
  being registered. If you omit this value, it will be inferred
99
96
  if you are running within a Databricks notebook. Otherwise, an
100
97
  exception will be raised.
101
- :param region: The name of the DASL region.
98
+ :param region: The name of the DASL region. If not specified,
99
+ the client will auto-detect the region from the workspace
100
+ URL. For a DASL region, this includes the cloud host, e.g.
101
+ aws-us-east-1.
102
102
  :param dasl_host: The URL of the DASL server. This value should
103
103
  not generally be specified. When specified, this value
104
- overrides region.
104
+ overrides both region and auto-detection.
105
105
  :returns: Client for the newly created workspace.
106
106
  """
107
- if dasl_host is None:
108
- dasl_host = Regions.lookup(region)
109
-
110
107
  with error_handler():
111
108
  if workspace_url is None:
112
109
  workspace_url = Helpers.current_workspace_url()
110
+
111
+ # Determine the DASL host to use
112
+ dasl_host = Client._dasl_host_from_workspace_metadata(
113
+ workspace_url, dasl_host, region
114
+ )
115
+
113
116
  admin_config = AdminConfig(
114
117
  workspace_url=workspace_url,
115
118
  app_client_id=app_client_id,
@@ -137,7 +140,7 @@ class Client:
137
140
  def for_workspace(
138
141
  workspace_url: Optional[str] = None,
139
142
  service_account_token: Optional[str] = None,
140
- region: str = Helpers.default_region,
143
+ region: Optional[str] = None,
141
144
  dasl_host: Optional[str] = None,
142
145
  ) -> "Client":
143
146
  """
@@ -151,19 +154,24 @@ class Client:
151
154
  :param service_account_token: Antimatter service account token.
152
155
  If provided, the client will use this token for auth instead
153
156
  of (automatic) secret-based auth.
154
- :param region: The name of the DASL region.
157
+ :param region: The name of the DASL region. If not specified,
158
+ the client will auto-detect the region from the workspace
159
+ URL. For a DASL region, this includes the cloud host, e.g.
160
+ aws-us-east-1.
155
161
  :param dasl_host: The URL of the DASL server. This value should
156
162
  not generally be specified. When specified, this value
157
- overrides region.
163
+ overrides both region and auto-detection.
158
164
  :returns: Client for the existing workspace.
159
165
  """
160
- if dasl_host is None:
161
- dasl_host = Regions.lookup(region)
162
-
163
166
  with error_handler():
164
167
  if workspace_url is None:
165
168
  workspace_url = Helpers.current_workspace_url()
166
169
 
170
+ # Determine the DASL host to use
171
+ dasl_host = Client._dasl_host_from_workspace_metadata(
172
+ workspace_url, dasl_host, region
173
+ )
174
+
167
175
  if service_account_token is None:
168
176
  return Client(
169
177
  DatabricksSecretAuth(
@@ -188,7 +196,7 @@ class Client:
188
196
  service_principal_secret: str,
189
197
  workspace_url: Optional[str] = None,
190
198
  service_account_token: Optional[str] = None,
191
- region: str = Helpers.default_region,
199
+ region: Optional[str] = None,
192
200
  dasl_host: Optional[str] = None,
193
201
  ) -> "Client":
194
202
  """
@@ -220,10 +228,13 @@ class Client:
220
228
  If provided, the client will use this token for auth instead
221
229
  of (automatic) secret-based auth. Ignored if the workspace
222
230
  doesn't exist.
223
- :param region: The name of the DASL region.
231
+ :param region: The name of the DASL region. If not specified,
232
+ the client will auto-detect the region from the workspace
233
+ URL. For a DASL region, this includes the cloud host, e.g.
234
+ aws-us-east-1.
224
235
  :param dasl_host: The URL of the DASL server. This value should
225
236
  not generally be specified. When specified, this value
226
- overrides region.
237
+ overrides both region and auto-detection.
227
238
  :returns: Client for the newly created or existing workspace.
228
239
  """
229
240
  try:
@@ -990,3 +1001,52 @@ class Client:
990
1001
  """
991
1002
  with error_handler():
992
1003
  return self._dbui_client().dbui_v1_query_cancel(self._workspace(), id)
1004
+
1005
+ @staticmethod
1006
+ def _dasl_host_from_workspace_metadata(
1007
+ workspace_url: str, dasl_host: Optional[str], region: Optional[str]
1008
+ ) -> str:
1009
+ """
1010
+ If the dasl_host is already set, it will be returned as-is. If a region
1011
+ is set, this will return the host that region maps to. If neither are
1012
+ set, this gets the DASL host from the workspace URL with a workspace
1013
+ metadata lookup.
1014
+
1015
+ :param workspace_url: The full base URL of the Databricks workspace
1016
+ being registered. If you omit this value, it will be inferred
1017
+ if you are running within a Databricks notebook. Otherwise, an
1018
+ exception will be raised.
1019
+ :param dasl_host: The URL of the DASL server. This value should
1020
+ not generally be specified. When specified, this value
1021
+ overrides both region and auto-detection.
1022
+ :param region: The name of the DASL region. If not specified,
1023
+ the client will auto-detect the region from the workspace
1024
+ URL. For a DASL region, this includes the cloud host, e.g.
1025
+ aws-us-east-1.
1026
+ :return: The DASL host to use.
1027
+ """
1028
+ if dasl_host is None:
1029
+ if region is not None:
1030
+ # Use explicit region
1031
+ dasl_host = Regions.lookup(region)
1032
+ else:
1033
+ # Attempt auto-detection from workspace URL
1034
+ try:
1035
+ metadata = WorkspaceMetadata.get_workspace_metadata(workspace_url)
1036
+ if metadata is not None:
1037
+ dasl_host = metadata.api_url
1038
+ else:
1039
+ raise Exception(
1040
+ f"Could not determine API endpoint for workspace '{workspace_url}'. "
1041
+ f"The workspace may not be in a supported region. "
1042
+ f"Please specify 'region' or 'dasl_host' explicitly."
1043
+ )
1044
+ except Exception as e:
1045
+ if "Could not determine API endpoint" in str(e):
1046
+ raise
1047
+ else:
1048
+ raise Exception(
1049
+ f"Failed to auto-detect API endpoint for workspace '{workspace_url}': {e}. "
1050
+ f"Please specify 'region' or 'dasl_host' explicitly."
1051
+ )
1052
+ return dasl_host
@@ -3,7 +3,6 @@ from collections.abc import Callable
3
3
  from contextlib import contextmanager
4
4
 
5
5
  from dasl_api import ApiException
6
- from urllib3.exceptions import MaxRetryError, RequestError
7
6
 
8
7
 
9
8
  class ConflictError(Exception):
@@ -136,3 +135,7 @@ def error_handler():
136
135
  raise e
137
136
  except Exception as e:
138
137
  raise e
138
+
139
+
140
+ class WorkspaceLookupError(Exception):
141
+ """Internal exception wrapper for workspace lookup errors"""
@@ -0,0 +1,99 @@
1
+ import base64
2
+ from typing import Optional
3
+
4
+ from dasl_api import ApiClient, Configuration, WorkspaceV1Api
5
+ from dasl_api.models import WorkspaceV1WorkspaceMetadata
6
+ from dasl_api.exceptions import ApiException
7
+
8
+ from .errors.errors import WorkspaceLookupError
9
+
10
+
11
+ class WorkspaceMetadata:
12
+ """Workspace metadata lookup functionality for auto-detecting API endpoints."""
13
+
14
+ @staticmethod
15
+ def get_workspace_metadata(
16
+ workspace_url: str, dasl_host: Optional[str] = None
17
+ ) -> Optional[WorkspaceV1WorkspaceMetadata]:
18
+ """
19
+ Query the workspace metadata endpoint to auto-detect the correct region
20
+ and API endpoint for a given Databricks workspace.
21
+
22
+ :param workspace_url: The Databricks workspace URL to lookup
23
+ :param dasl_host: Optional DASL host to use for the lookup. If None, uses default region.
24
+ :returns: WorkspaceV1WorkspaceMetadata if successful, None if workspace not found
25
+ """
26
+ hosts = []
27
+ if dasl_host is None:
28
+ # Use default region for metadata lookup
29
+ from .regions import Regions
30
+
31
+ for region in Regions.list():
32
+ hosts.append(Regions.lookup(region))
33
+ else:
34
+ hosts.append(dasl_host)
35
+
36
+ last_exception = None
37
+ for host in hosts:
38
+ try:
39
+ metadata = WorkspaceMetadata._get_workspace_metadata(
40
+ workspace_url, host
41
+ )
42
+ if metadata:
43
+ return metadata
44
+ except WorkspaceLookupError as e:
45
+ last_exception = e
46
+ continue
47
+
48
+ if last_exception:
49
+ raise last_exception
50
+ return None
51
+
52
+ @staticmethod
53
+ def _get_workspace_metadata(
54
+ workspace_url: str, dasl_host: str
55
+ ) -> Optional[WorkspaceV1WorkspaceMetadata]:
56
+ try:
57
+ # Create an unauthenticated client for the public metadata endpoint
58
+ configuration = Configuration(host=dasl_host)
59
+ api_client = ApiClient(configuration)
60
+ workspace_api = WorkspaceV1Api(api_client)
61
+
62
+ # Base64 encode the workspace URL
63
+ encoded_workspace = base64.urlsafe_b64encode(
64
+ workspace_url.encode()
65
+ ).decode()
66
+
67
+ # Call the metadata endpoint
68
+ metadata = workspace_api.workspace_v1_get_workspace_metadata(
69
+ databricks_workspace=encoded_workspace
70
+ )
71
+
72
+ return metadata
73
+
74
+ except ApiException as e:
75
+ if e.status == 404:
76
+ # Workspace not found or not in supported region
77
+ return None
78
+ elif e.status == 400:
79
+ # Invalid workspace URL
80
+ raise ValueError(f"Invalid workspace URL: {workspace_url}")
81
+ else:
82
+ # Other API errors
83
+ raise WorkspaceLookupError(f"Failed to get workspace metadata: {e}")
84
+ except Exception as e:
85
+ # Network errors, encoding errors, etc.
86
+ raise WorkspaceLookupError(f"Failed to get workspace metadata: {e}")
87
+
88
+ @staticmethod
89
+ def get_endpoint_for_workspace(workspace_url: str) -> Optional[str]:
90
+ """
91
+ Get the API endpoint URL for a workspace.
92
+
93
+ :param workspace_url: The Databricks workspace URL
94
+ :returns: API endpoint URL if successful, None if workspace not found
95
+ """
96
+ metadata = WorkspaceMetadata.get_workspace_metadata(workspace_url)
97
+ if metadata is not None:
98
+ return metadata.api_url
99
+ return None
@@ -2,13 +2,17 @@ from pyspark.sql import DataFrame, SparkSession
2
2
  from pyspark.sql.types import *
3
3
  from pyspark.sql.dataframe import DataFrame
4
4
  from pyspark.sql.functions import lit, col as col_, sum as sum_, when
5
+
5
6
  from dasl_client.preset_development.preview_parameters import *
6
7
  from dasl_client.preset_development.stage import *
7
8
  from dasl_client.preset_development.errors import *
9
+
8
10
  import yaml
9
- from IPython import get_ipython
11
+ import os
10
12
  from itertools import count
11
13
 
14
+ from IPython import get_ipython
15
+
12
16
 
13
17
  @udf(StringType())
14
18
  def constant_udf(*args):
@@ -362,7 +366,7 @@ class PreviewEngine:
362
366
  display = ipython.user_ns["display"]
363
367
  else:
364
368
  displayHTML = lambda x: print(x)
365
- display = lambda x: x.show()
369
+ display = lambda x, **kwargs: x.show()
366
370
 
367
371
  def d(txt, lvl) -> None:
368
372
  displayHTML(
@@ -376,25 +380,50 @@ class PreviewEngine:
376
380
 
377
381
  (pre_silver, silver, gold, pre_bronze) = stage_dataframes
378
382
  d("Autoloader Input", 1)
379
- display(input_df)
383
+ display(
384
+ input_df,
385
+ checkpointLocation=os.path.join(
386
+ self._ds_params.get_checkpoint_temp_location(), "input"
387
+ ),
388
+ )
380
389
  d("Bronze Pre-Transform", 1)
381
390
  for name, df in pre_bronze.items():
382
391
  d(f"{name}", 2)
383
- display(df)
392
+ display(
393
+ df,
394
+ checkpointLocation=os.path.join(
395
+ self._ds_params.get_checkpoint_temp_location(), f"pre_bronze-{name}"
396
+ ),
397
+ )
384
398
  d("Silver Pre-Transform", 1)
385
399
  if pre_silver:
386
- display(pre_silver)
400
+ display(
401
+ pre_silver,
402
+ checkpointLocation=os.path.join(
403
+ self._ds_params.get_checkpoint_temp_location(), "pre_silver"
404
+ ),
405
+ )
387
406
  else:
388
407
  d("Skipped", 2)
389
408
  d("Silver Transform", 1)
390
409
  for name, df in silver.items():
391
410
  d(f"{name}", 2)
392
- display(df)
411
+ display(
412
+ df,
413
+ checkpointLocation=os.path.join(
414
+ self._ds_params.get_checkpoint_temp_location(), f"silver-{name}"
415
+ ),
416
+ )
393
417
  d("Gold", 1)
394
418
  for full_name, df in gold.items():
395
419
  d(f"{full_name}", 2)
396
420
  d("Stage output", 3)
397
- display(df)
421
+ display(
422
+ df,
423
+ checkpointLocation=os.path.join(
424
+ self._ds_params.get_checkpoint_temp_location(), f"gold-{full_name}"
425
+ ),
426
+ )
398
427
 
399
428
  # NOTE: Name is stored as Gold_name/Silver_input. So we need to get just the Gold table
400
429
  # name that we are comparing the dataframe metadata to.
@@ -440,7 +469,13 @@ class PreviewEngine:
440
469
  # alls good. display the output.
441
470
  d("Resultant gold table preview", 3)
442
471
  unioned_df = delta_df.unionByName(df, allowMissingColumns=True)
443
- display(unioned_df)
472
+ display(
473
+ unioned_df,
474
+ checkpointLocation=os.path.join(
475
+ self._ds_params.get_checkpoint_temp_location(),
476
+ f"gold-unioned-{full_name}",
477
+ ),
478
+ )
444
479
 
445
480
  def is_backtick_escaped(self, name: str) -> bool:
446
481
  """
@@ -1,13 +1,19 @@
1
- from typing import Optional
1
+ from typing import Optional, TYPE_CHECKING
2
2
 
3
3
  from pyspark.sql import DataFrame, SparkSession
4
4
  from pyspark.sql.types import *
5
5
  from pyspark.sql.dataframe import DataFrame
6
6
  from pyspark.sql.functions import col, lit, udf
7
7
  from dasl_client.preset_development.errors import *
8
+
8
9
  import uuid
10
+ import os
11
+
9
12
  from IPython import get_ipython
10
13
 
14
+ if TYPE_CHECKING:
15
+ from dasl_client import Client
16
+
11
17
 
12
18
  class PreviewParameters:
13
19
  """
@@ -128,13 +134,24 @@ class PreviewParameters:
128
134
  ```
129
135
 
130
136
  **Note:**
131
- When using autoloader mode, this implementation requires a location to store a temporary schema for
132
- the loaded records. By default, this is set to `"dbfs:/tmp/schemas"`. You can change this using
133
- `set_autoloader_temp_schema_location`. Regardless of whether you use the default or a custom path,
134
- you must have write permissions for that location.
137
+ When using autoloader mode, this implementation requires locations to store temporary schemas and
138
+ checkpoints. By default, these paths are automatically determined from your workspace's
139
+ `daslStoragePath` configuration:
140
+ - Schema location: `{daslStoragePath}/preset_preview/schemas`
141
+ - Checkpoint location: `{daslStoragePath}/preset_preview/checkpoints`
142
+
143
+ The workspace configuration is retrieved automatically via `Client.for_workspace()`. If you need
144
+ to use custom paths or don't have access to the DASL API, you can set them explicitly:
145
+ ```python
146
+ ds_params = (PreviewParameters(spark)
147
+ .set_autoloader_temp_schema_location('/Volumes/catalog/schema/volume/schemas')
148
+ .set_checkpoint_temp_location_base('/Volumes/catalog/schema/volume/checkpoints'))
149
+ ```
150
+
151
+ Regardless of the paths used, you must have write permissions for those locations.
135
152
  """
136
153
 
137
- def __init__(self, spark: SparkSession) -> None:
154
+ def __init__(self, spark: SparkSession, client: Optional["Client"] = None) -> None:
138
155
  """
139
156
  Initializes the PreviewParameters instance with sparse default settings.
140
157
 
@@ -142,10 +159,19 @@ class PreviewParameters:
142
159
  of records at a time. By default, the record limit is set to 10, but this can be overridden
143
160
  if needed.
144
161
 
162
+ Args:
163
+ spark: SparkSession for DataFrame operations.
164
+ client: Optional DASL client for retrieving workspace configuration.
165
+ If not provided and storage paths are not set explicitly,
166
+ a client will be created automatically via Client.for_workspace().
167
+
145
168
  Instance Attributes:
146
169
  mode (str): Indicates the source type ("input" or "autoloader").
147
170
  record_limit (int): Maximum number of records to load. Defaults to 10.
148
171
  autoloader_temp_schema_location (str): Temporary location to store the autoloader schema.
172
+ Defaults to {daslStoragePath}/preset_preview/schemas.
173
+ checkpoint_temp_location_base (str): Temporary location to store checkpoints for stream and display.
174
+ Defaults to {daslStoragePath}/preset_preview/checkpoints.
149
175
  time_column (str): Column name used for time-based filtering.
150
176
  start_time (str): Start time for filtering.
151
177
  end_time (str): End time for filtering.
@@ -161,10 +187,12 @@ class PreviewParameters:
161
187
  df (DataFrame): Internal Spark DataFrame loaded using the specified parameters.
162
188
  """
163
189
  self._spark = spark
190
+ self._client = client # Store client for lazy path resolution
164
191
  self._mode = None # [input, table, autoloader, silverbronze]
165
192
  self._record_limit = 10
166
- self._autoloader_temp_schema_location = "dbfs:/tmp/schemas"
193
+ self._autoloader_temp_schema_location = None # Will be resolved lazily
167
194
  self._gold_test_schemas = []
195
+ self._checkpoint_temp_location_base = None # Will be resolved lazily
168
196
 
169
197
  self._time_column = None
170
198
  self._start_time = None
@@ -192,6 +220,69 @@ class PreviewParameters:
192
220
 
193
221
  self._df = None
194
222
 
223
+ def _ensure_storage_paths_configured(self) -> None:
224
+ """
225
+ Ensure storage paths are configured, either from explicit user settings
226
+ or from WorkspaceConfig. Only creates Client if paths are not explicitly set.
227
+
228
+ Raises:
229
+ RuntimeError: If daslStoragePath cannot be determined and paths not set
230
+ """
231
+ # If both paths already set explicitly, nothing to do
232
+ if (
233
+ self._autoloader_temp_schema_location is not None
234
+ and self._checkpoint_temp_location_base is not None
235
+ ):
236
+ return
237
+
238
+ # Need to get daslStoragePath from WorkspaceConfig
239
+ if self._client is None:
240
+ # Try to auto-create client
241
+ try:
242
+ from dasl_client import Client
243
+
244
+ self._client = Client.for_workspace()
245
+ except Exception as e:
246
+ raise RuntimeError(
247
+ "Could not create DASL client to retrieve workspace configuration. "
248
+ "Either provide a client explicitly: PreviewParameters(spark, client=client), "
249
+ "or set storage paths manually:\n"
250
+ " .set_autoloader_temp_schema_location('/path/to/schemas')\n"
251
+ " .set_checkpoint_temp_location_base('/path/to/checkpoints')\n"
252
+ f"Client creation error: {e}"
253
+ )
254
+
255
+ # Get config and extract daslStoragePath
256
+ try:
257
+ config = self._client.get_config()
258
+ dasl_storage_path = config.dasl_storage_path
259
+ except Exception as e:
260
+ raise RuntimeError(
261
+ f"Failed to retrieve workspace configuration: {e}\n"
262
+ "Set storage paths manually if WorkspaceConfig is not available:\n"
263
+ " .set_autoloader_temp_schema_location('/path/to/schemas')\n"
264
+ " .set_checkpoint_temp_location_base('/path/to/checkpoints')"
265
+ )
266
+
267
+ if not dasl_storage_path:
268
+ raise RuntimeError(
269
+ "WorkspaceConfig.dasl_storage_path is not set. "
270
+ "Configure this in your workspace settings or set paths explicitly:\n"
271
+ " .set_autoloader_temp_schema_location('/path/to/schemas')\n"
272
+ " .set_checkpoint_temp_location_base('/path/to/checkpoints')"
273
+ )
274
+
275
+ # Build default paths from daslStoragePath
276
+ if self._autoloader_temp_schema_location is None:
277
+ self._autoloader_temp_schema_location = os.path.join(
278
+ dasl_storage_path, "preset_preview", "schemas"
279
+ )
280
+
281
+ if self._checkpoint_temp_location_base is None:
282
+ self._checkpoint_temp_location_base = os.path.join(
283
+ dasl_storage_path, "preset_preview", "checkpoints"
284
+ )
285
+
195
286
  def __create_from_autoloader(self) -> DataFrame:
196
287
  stream_df = (
197
288
  self._spark.readStream.format("cloudFiles")
@@ -220,7 +311,10 @@ class PreviewParameters:
220
311
  .option("cloudFiles.inferColumnTypes", "true")
221
312
  .option(
222
313
  "cloudFiles.schemaLocation",
223
- f"{self._autoloader_temp_schema_location}/{self._schema_uuid_str}",
314
+ os.path.join(
315
+ self.get_autoloader_temp_schema_location(),
316
+ self._schema_uuid_str,
317
+ ),
224
318
  )
225
319
  )
226
320
 
@@ -239,6 +333,10 @@ class PreviewParameters:
239
333
  stream_df.writeStream.format("memory")
240
334
  .queryName("batch_data")
241
335
  .trigger(availableNow=True)
336
+ .option(
337
+ "checkpointLocation",
338
+ os.path.join(self.get_checkpoint_temp_location(), "memory"),
339
+ )
242
340
  .start()
243
341
  )
244
342
 
@@ -294,6 +392,13 @@ class PreviewParameters:
294
392
  """
295
393
  Cleans up the temporary schema created for streaming mode, if it was created.
296
394
  """
395
+ # Only clean up if paths were actually configured
396
+ # This handles the case where __exit__ is called after an exception in __enter__
397
+ if (
398
+ self._autoloader_temp_schema_location is None
399
+ or self._checkpoint_temp_location_base is None
400
+ ):
401
+ return
297
402
 
298
403
  # Get the Databricks built-in functions out the namespace.
299
404
  ipython = get_ipython()
@@ -301,19 +406,32 @@ class PreviewParameters:
301
406
  dbutils = ipython.user_ns["dbutils"]
302
407
 
303
408
  dbutils.fs.rm(
304
- f"{self._autoloader_temp_schema_location}/{self._schema_uuid_str}",
409
+ os.path.join(
410
+ self._autoloader_temp_schema_location, self._schema_uuid_str
411
+ ),
412
+ recurse=True,
413
+ )
414
+ dbutils.fs.rm(
415
+ os.path.join(
416
+ self._checkpoint_temp_location_base, self._schema_uuid_str
417
+ ),
305
418
  recurse=True,
306
419
  )
307
420
  for gold_test_schema in self._gold_test_schemas:
308
421
  dbutils.fs.rm(
309
- f"{self._autoloader_temp_schema_location}/{gold_test_schema}",
422
+ os.path.join(
423
+ self._autoloader_temp_schema_location, gold_test_schema
424
+ ),
310
425
  recurse=True,
311
426
  )
312
427
  else:
313
428
  leaked_lines = [
314
- f"FYI, we are leaking temp data {self._autoloader_temp_schema_location}/{self._schema_uuid_str}",
429
+ f"FYI, we are leaking temp data {os.path.join(self._autoloader_temp_schema_location, self._schema_uuid_str)}",
430
+ os.path.join(
431
+ self._checkpoint_temp_location_base, self._schema_uuid_str
432
+ ),
315
433
  *[
316
- f"{self._autoloader_temp_schema_location}/{x}"
434
+ os.path.join(self._autoloader_temp_schema_location, x)
317
435
  for x in self._gold_test_schemas
318
436
  ],
319
437
  ]
@@ -396,11 +514,56 @@ class PreviewParameters:
396
514
  """
397
515
  Get the location for the autoloader's streaming mode schema to be created.
398
516
 
517
+ If not explicitly set, defaults to {daslStoragePath}/preset_preview/schemas.
518
+
399
519
  Returns:
400
520
  str: The location for the autoloader's streaming mode schema to be created.
521
+
522
+ Raises:
523
+ RuntimeError: If path cannot be determined from WorkspaceConfig
401
524
  """
525
+ self._ensure_storage_paths_configured()
402
526
  return self._autoloader_temp_schema_location
403
527
 
528
+ def set_checkpoint_temp_location_base(self, path: str):
529
+ """
530
+ Set the base location for the checkpoint to be created. This is
531
+ deleted at the end of a run.
532
+
533
+ Returns:
534
+ PreviewParameters: The current instance with updated configuration.
535
+ """
536
+ self._checkpoint_temp_location_base = path
537
+ return self
538
+
539
+ def get_checkpoint_temp_location_base(self) -> str:
540
+ """
541
+ Get the location for the checkpoint to be created.
542
+
543
+ If not explicitly set, defaults to {daslStoragePath}/preset_preview/checkpoints.
544
+
545
+ Returns:
546
+ str: The location for the checkpoint to be created.
547
+
548
+ Raises:
549
+ RuntimeError: If path cannot be determined from WorkspaceConfig
550
+ """
551
+ self._ensure_storage_paths_configured()
552
+ return self._checkpoint_temp_location_base
553
+
554
+ def get_checkpoint_temp_location(self) -> str:
555
+ """
556
+ Get the location where checkpoints to be created.
557
+
558
+ Returns:
559
+ str: The location where checkpoints to be created.
560
+
561
+ Raises:
562
+ RuntimeError: If path cannot be determined from WorkspaceConfig
563
+ """
564
+ self._ensure_storage_paths_configured()
565
+ return os.path.join(self._checkpoint_temp_location_base, self._schema_uuid_str)
566
+
404
567
  def set_data_schema(self, schema: StructType):
405
568
  """
406
569
  Set the input schema for "input" mode. For example:
dasl_client/regions.json CHANGED
@@ -1,4 +1,6 @@
1
1
  {
2
2
  "aws-us-east-1": "https://api.sl.us-east-1.cloud.databricks.com",
3
- "aws-us-west-2": "https://api.sl.us-west-2.cloud.databricks.com"
3
+ "aws-us-west-2": "https://api.sl.us-west-2.cloud.databricks.com",
4
+ "aws-ap-southeast-2": "https://api.sl.ap-southeast-2.cloud.databricks.com",
5
+ "aws-eu-central-1": "https://api.sl.eu-central-1.cloud.databricks.com"
4
6
  }
@@ -1,12 +1,12 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dasl_client
3
- Version: 1.0.27
3
+ Version: 1.0.29
4
4
  Summary: The DASL client library used for interacting with the DASL workspace
5
5
  Author-email: Antimatter Team <support@antimatter.io>
6
6
  Requires-Python: >=3.8
7
7
  Description-Content-Type: text/markdown
8
8
  License-File: LICENSE
9
- Requires-Dist: dasl_api==0.1.26
9
+ Requires-Dist: dasl_api==0.1.27
10
10
  Requires-Dist: databricks-sdk>=0.41.0
11
11
  Requires-Dist: pydantic>=2
12
12
  Requires-Dist: typing_extensions>=4.10.0
@@ -1,8 +1,9 @@
1
1
  dasl_client/__init__.py,sha256=MuaH74tnEHwfymHtlK6GqeMHRRWaSDfS6hSzbMrd7iQ,150
2
- dasl_client/client.py,sha256=L6Vlzj96CRZ-SeSW9le005IsA52YPcgb6KZBCD29LrI,38983
2
+ dasl_client/client.py,sha256=34P9E2SMQZEMbVo3tyqWR7XfHHnwTPdHpPI88MnTy4Y,42138
3
3
  dasl_client/exec_rule.py,sha256=kn-Yo-9L0fjxbulyAghiIKO1SYcqv2XHZn45F8FvUzE,3599
4
4
  dasl_client/helpers.py,sha256=kdOoNiyoVzfDHAZ5DGg5YTU4Fj9A5a8gz2RljrY8hbY,1095
5
- dasl_client/regions.json,sha256=Rvs_gslMuaod_n6LYLLrTAugtr7VNTljnFUiKf_0qNA,137
5
+ dasl_client/metadata.py,sha256=D0DkKfY9fTCKL483Gh9wtCg2h1DKlXuXN2ziwvcjIOo,3553
6
+ dasl_client/regions.json,sha256=1ADGIvA3gS4P_CODULFOaCFEPFCk4gg-LNn_z9V0t_k,287
6
7
  dasl_client/regions.py,sha256=1TIlyJ4iMLsfgeb0ShQsfsju2_NBSXnMAfjdSNMQgi8,442
7
8
  dasl_client/auth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
9
  dasl_client/auth/auth.py,sha256=yTeijYYpfJVJ_wYyq0U6kAntg4xz5MzIR37_CpVR57k,7277
@@ -10,11 +11,11 @@ dasl_client/conn/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,
10
11
  dasl_client/conn/client_identifier.py,sha256=kPrX0wPa6y7ifhKSb6dQriDSlIgPOUhBq7OoC73E7NU,657
11
12
  dasl_client/conn/conn.py,sha256=tMUjxsK7MOSOcudbf2iNunbz5gbvWu3AmNXQjRW-vP8,1467
12
13
  dasl_client/errors/__init__.py,sha256=lpH2HGF5kCRTk6MxpPEyY9ulTvsLBFKb4NnLuFFLZZA,40
13
- dasl_client/errors/errors.py,sha256=u-B8dR8zlxdNVeEdHi6UozX178jwJJ5ZJOGl9YjONRc,4008
14
+ dasl_client/errors/errors.py,sha256=mryJNKrsfni0WUHsAgQ7lLCKQtQHGC5sLr1qf8janVM,4055
14
15
  dasl_client/preset_development/__init__.py,sha256=9yC4gmQfombvYLThzo0pSfT5JMolfNVWFVQIuIg_XUA,131
15
16
  dasl_client/preset_development/errors.py,sha256=KhcMEQ0o58WC9meGSgmGJuE6s9ztkWyn9md0LQ5srWE,7708
16
- dasl_client/preset_development/preview_engine.py,sha256=yubIjQitXJaQZHA1Mq-q8qMhskYerFGsD7-GQdqoxo8,22086
17
- dasl_client/preset_development/preview_parameters.py,sha256=aZrpCkB2JrqZCp7Lqb2Tz5DMJOFc0qDMdmDxAI5tv98,20183
17
+ dasl_client/preset_development/preview_engine.py,sha256=HFknqZhGx0Ltuo1Wqmr0EfoiSn126d9q4-WEYX2Ns1U,23243
18
+ dasl_client/preset_development/preview_parameters.py,sha256=JpoVeByweNI_jYTRRWiLkjfaTx_qDSn0ciJlCSn7e60,26871
18
19
  dasl_client/preset_development/stage.py,sha256=JLkV6Dmomw-lAWIuJv82z2XvorEOEWZz0GHAr_CMJ9A,27253
19
20
  dasl_client/types/__init__.py,sha256=GsXC3eWuv21VTLPLPH9pzM95JByaKnKrPjJkh2rlZfQ,170
20
21
  dasl_client/types/admin_config.py,sha256=Kmx3Kuai9_LWMeO2NpWasRUgLihYSEXGtuYVfG0FkjU,2200
@@ -25,8 +26,8 @@ dasl_client/types/helpers.py,sha256=gLGTvrssAKrdkQT9h80twEosld2egwhvj-zAudxWFPs,
25
26
  dasl_client/types/rule.py,sha256=5zvRsqzsej5kYTFjIpP1lptb3Rtit06cxvA4sl89OMU,28734
26
27
  dasl_client/types/types.py,sha256=DeUOfdYGOhUGEy7yKOfo0OYTXYRrs57yYgNLUbu7Tlc,8806
27
28
  dasl_client/types/workspace_config.py,sha256=XkSmeuOA3OPpWQkx7apPcPNuD4Ai2QO3YKU-aFoOW8k,31030
28
- dasl_client-1.0.27.dist-info/licenses/LICENSE,sha256=M35UepUPyKmFkvENlkweeaMElheQqNoM5Emh8ADO-rs,4
29
- dasl_client-1.0.27.dist-info/METADATA,sha256=pTORHSM3o9x_3u1J47vEwpzbosz_0Ojv--r45_fMtjI,4046
30
- dasl_client-1.0.27.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
31
- dasl_client-1.0.27.dist-info/top_level.txt,sha256=kIv8ox_2oJPjGB8_yuey5vvuPCyfY8kywG138f9oSOY,12
32
- dasl_client-1.0.27.dist-info/RECORD,,
29
+ dasl_client-1.0.29.dist-info/licenses/LICENSE,sha256=M35UepUPyKmFkvENlkweeaMElheQqNoM5Emh8ADO-rs,4
30
+ dasl_client-1.0.29.dist-info/METADATA,sha256=-cEDVkDtXcuz5JI9gUby5v0u7nePZOZoa7L4Z2azZoA,4046
31
+ dasl_client-1.0.29.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
32
+ dasl_client-1.0.29.dist-info/top_level.txt,sha256=kIv8ox_2oJPjGB8_yuey5vvuPCyfY8kywG138f9oSOY,12
33
+ dasl_client-1.0.29.dist-info/RECORD,,