dasl-client 1.0.23__py3-none-any.whl → 1.0.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dasl-client might be problematic. Click here for more details.

test/test_api_changes.py DELETED
@@ -1,137 +0,0 @@
1
- import inspect
2
- import json
3
-
4
- import pytest
5
-
6
- from datetime import datetime
7
- from hashlib import md5
8
- from typing import Optional, Type, Union
9
-
10
- from dasl_api.models import *
11
- from pydantic import BaseModel
12
- from pydantic.fields import FieldInfo
13
-
14
-
15
- checked_dasl_types = {
16
- # Resources
17
- WorkspaceV1AdminConfig: "admin_config.json",
18
- CoreV1DataSource: "data_source.json",
19
- CoreV1Rule: "rule.json",
20
- WorkspaceV1WorkspaceConfig: "workspace_config.json",
21
- ContentV1DatasourcePreset: "datasource_preset.json",
22
- # Data
23
- DbuiV1ObservableEventsList: "observable_events_list.json",
24
- }
25
-
26
-
27
- simple_types = [
28
- bool,
29
- int,
30
- float,
31
- str,
32
- datetime,
33
- ]
34
-
35
-
36
- def is_simple_type(tpe: Type) -> bool:
37
- return tpe in simple_types
38
-
39
-
40
- def is_dasl_api_type(tpe: Type) -> bool:
41
- if tpe.__name__ in globals():
42
- return "dasl_api" in globals()[tpe.__name__].__module__
43
- return False
44
-
45
-
46
- def dasl_model_to_dict(tpe: Type[BaseModel]) -> dict:
47
- decorators = getattr(
48
- getattr(tpe, "__pydantic_decorators__", None), "field_validators", {}
49
- )
50
- return {
51
- "name": tpe.__name__,
52
- "fields": [
53
- field_to_dict(name, field, decorators)
54
- for name, field in tpe.model_fields.items()
55
- ],
56
- }
57
-
58
-
59
- def field_to_dict(name: str, field: FieldInfo, validators: dict) -> dict:
60
- d = {
61
- "name": name,
62
- "alias": field.alias,
63
- "is_required": field.is_required(),
64
- "is_nullable": is_nullable(field.annotation),
65
- "is_sequence": is_sequence(field.annotation),
66
- "validation_hash": field_validation_hash(name, validators),
67
- }
68
- field_type: Union[*simple_types, BaseModel] = inner_type(field.annotation)
69
- if is_simple_type(field_type):
70
- d["type"] = field_type.__name__
71
- elif is_dasl_api_type(field_type):
72
- d["type"] = dasl_model_to_dict(field_type)
73
- else:
74
- raise Exception(
75
- f"unsupported field type {field_type} encountered while converting field - {name}: {field}"
76
- )
77
- return d
78
-
79
-
80
- def is_sequence(tpe: Type) -> bool:
81
- seq_types = [list, set, frozenset, tuple]
82
- if tpe in seq_types:
83
- return True
84
- if hasattr(tpe, "__origin__"):
85
- if tpe.__origin__ in seq_types:
86
- return True
87
- if hasattr(tpe, "__args__"):
88
- return is_sequence(tpe.__args__[0])
89
- return False
90
-
91
-
92
- def is_nullable(tpe: Type) -> bool:
93
- return hasattr(tpe, "__args__") and type(None) in tpe.__args__
94
-
95
-
96
- def field_validation_hash(field_name: str, validators: dict) -> Optional[str]:
97
- for validator in validators.values():
98
- if hasattr(validator, "info") and hasattr(validator.info, "fields"):
99
- if field_name in validator.info.fields:
100
- return md5(
101
- inspect.getsource(validator.func).encode("utf-8")
102
- ).hexdigest()
103
- return None
104
-
105
-
106
- def inner_type(tpe: Type) -> Type:
107
- if hasattr(tpe, "__args__"):
108
- return inner_type(tpe.__args__[0])
109
- return tpe
110
-
111
-
112
- def dasl_model_to_string(tpe: Type[BaseModel]) -> str:
113
- d = dasl_model_to_dict(tpe)
114
- return json.dumps(d, indent=2, sort_keys=True)
115
-
116
-
117
- @pytest.mark.parametrize(
118
- "tpe",
119
- checked_dasl_types.keys(),
120
- ids=[f"{tpe.__name__} model is unchanged" for tpe in checked_dasl_types.keys()],
121
- )
122
- def test_api_model_for_changes(tpe):
123
- with open(f"test/expected_api_models/{checked_dasl_types[tpe]}", "r") as f:
124
- expected_val = f.read()
125
- assert dasl_model_to_string(tpe) == expected_val
126
-
127
-
128
- @pytest.mark.update
129
- @pytest.mark.parametrize(
130
- "tpe",
131
- checked_dasl_types.keys(),
132
- ids=[f"updating {tpe.__name__} model" for tpe in checked_dasl_types.keys()],
133
- )
134
- def test_apply_api_model_changes(tpe):
135
- model_ser = dasl_model_to_string(tpe)
136
- with open(f"test/expected_api_models/{checked_dasl_types[tpe]}", "w") as f:
137
- f.write(model_ser)
test/test_api_surface.py DELETED
@@ -1,306 +0,0 @@
1
- from dasl_client import *
2
-
3
- from .constants import *
4
-
5
-
6
- def test_admin_config(api_client):
7
- base_admin_config = AdminConfig(
8
- workspace_url=databricks_host,
9
- app_client_id=app_client_id,
10
- service_principal_id=databricks_client_id,
11
- service_principal_secret="********",
12
- )
13
-
14
- ac = api_client.get_admin_config()
15
- assert ac == base_admin_config
16
-
17
- other = AdminConfig(
18
- workspace_url=databricks_host,
19
- app_client_id=alternate_app_client_id,
20
- service_principal_id=databricks_client_id,
21
- service_principal_secret=databricks_client_secret,
22
- )
23
- api_client.put_admin_config(other)
24
-
25
- assert api_client.get_admin_config() == AdminConfig(
26
- workspace_url=databricks_host,
27
- app_client_id=alternate_app_client_id,
28
- service_principal_id=databricks_client_id,
29
- service_principal_secret="********",
30
- )
31
-
32
- ac.service_principal_secret = databricks_client_secret
33
- api_client.put_admin_config(ac)
34
- assert api_client.get_admin_config() == base_admin_config
35
-
36
-
37
- def test_workspace_config(api_client):
38
- base_workspace_config = WorkspaceConfig(
39
- metadata=Metadata(
40
- name="config",
41
- workspace=workspace,
42
- client_of_origin=get_client_identifier(),
43
- ),
44
- dasl_storage_path="/Volumes/automated_test_cases/default/test",
45
- default_sql_warehouse="ac1cff2384634cfb",
46
- system_tables_config=SystemTablesConfig(
47
- catalog_name="automated_test_cases",
48
- var_schema="default",
49
- ),
50
- default_custom_notebook_location="/Users/test/notebooks",
51
- datasources=DatasourcesConfig(
52
- bronze_schema="bronze",
53
- silver_schema="silver",
54
- gold_schema="gold",
55
- catalog_name="automated_test_cases",
56
- ),
57
- rules=RulesConfig(
58
- checkpoint_location="/Users/test/checkpoints",
59
- ),
60
- )
61
-
62
- api_client.put_config(base_workspace_config)
63
- got = api_client.get_config()
64
-
65
- # the server is going to populate created_timestamp, modified_timestamp,
66
- # version, and resource_status, so copy those over before comparing.
67
- base_workspace_config.metadata.created_timestamp = got.metadata.created_timestamp
68
- base_workspace_config.metadata.modified_timestamp = got.metadata.modified_timestamp
69
- base_workspace_config.metadata.version = got.metadata.version
70
- base_workspace_config.metadata.resource_status = got.metadata.resource_status
71
-
72
- assert api_client.get_config() == base_workspace_config
73
-
74
- base_workspace_config.datasources.bronze_schema = "bronze_new"
75
- api_client.put_config(base_workspace_config)
76
- got = api_client.get_config()
77
- base_workspace_config.metadata.modified_timestamp = got.metadata.modified_timestamp
78
- base_workspace_config.metadata.version = got.metadata.version
79
- base_workspace_config.metadata.resource_status = got.metadata.resource_status
80
-
81
- assert api_client.get_config() == base_workspace_config
82
-
83
-
84
- def test_minimal_data_source(api_client):
85
- base_data_source = DataSource(
86
- source="test",
87
- source_type="test",
88
- schedule=Schedule(
89
- at_least_every="2h",
90
- enabled=True,
91
- ),
92
- bronze=BronzeSpec(
93
- bronze_table="test_bronze_table",
94
- skip_bronze_loading=False,
95
- ),
96
- autoloader=DataSource.Autoloader(
97
- location="s3://aws-security-data-lake-us-east-1-k8vskbicklrtekgxvyufaavf36jjql/aws/S3_DATA/2.0/region=us-east-1/",
98
- format="json",
99
- ),
100
- )
101
-
102
- base_ds_1 = api_client.create_datasource("test_1", base_data_source)
103
- assert base_ds_1.source == base_data_source.source
104
- assert base_ds_1.schedule == base_data_source.schedule
105
- assert base_ds_1.bronze == base_data_source.bronze
106
- assert base_ds_1.silver == base_data_source.silver
107
- assert base_ds_1.gold == base_data_source.gold
108
-
109
- got = api_client.get_datasource("test_1")
110
- listed = []
111
- for ds in api_client.list_datasources():
112
- listed.append(ds)
113
- assert len(listed) == 1
114
- assert listed[0] == got
115
-
116
- # the server is going to populate created_timestamp, modified_timestamp,
117
- # version, and resource_status, so copy those over before comparing.
118
- base_ds_1.metadata.created_timestamp = got.metadata.created_timestamp
119
- base_ds_1.metadata.created_by = got.metadata.created_by
120
- base_ds_1.metadata.modified_timestamp = got.metadata.modified_timestamp
121
- base_ds_1.metadata.version = got.metadata.version
122
- base_ds_1.metadata.resource_status = got.metadata.resource_status
123
- assert api_client.get_datasource("test_1") == base_ds_1
124
-
125
- base_ds_2 = api_client.create_datasource("test_2", base_data_source)
126
- assert base_ds_2.source == base_data_source.source
127
- assert base_ds_2.schedule == base_data_source.schedule
128
- assert base_ds_2.bronze == base_data_source.bronze
129
- assert base_ds_2.silver == base_data_source.silver
130
- assert base_ds_2.gold == base_data_source.gold
131
-
132
- got_2 = api_client.get_datasource("test_2")
133
- listed = []
134
- for ds in api_client.list_datasources():
135
- listed.append(ds)
136
- assert len(listed) == 2
137
- assert listed[0] == got
138
- assert listed[1] == got_2
139
-
140
- base_ds_2.metadata.created_timestamp = got_2.metadata.created_timestamp
141
- base_ds_2.metadata.created_by = got_2.metadata.created_by
142
- base_ds_2.metadata.modified_timestamp = got_2.metadata.modified_timestamp
143
- base_ds_2.metadata.version = got_2.metadata.version
144
- base_ds_2.metadata.resource_status = got_2.metadata.resource_status
145
- assert api_client.get_datasource("test_2") == base_ds_2
146
-
147
- base_ds_2.bronze.bronze_table = "test_2"
148
- api_client.replace_datasource("test_2", base_ds_2)
149
-
150
- got_2 = api_client.get_datasource("test_2")
151
- base_ds_2.metadata.modified_timestamp = got_2.metadata.modified_timestamp
152
- base_ds_2.metadata.version = got_2.metadata.version
153
- base_ds_2.metadata.resource_status = got_2.metadata.resource_status
154
-
155
- assert api_client.get_datasource("test_2") == base_ds_2
156
-
157
- api_client.delete_datasource("test_1")
158
- listed = [
159
- item
160
- for item in api_client.list_datasources()
161
- if item.metadata.resource_status != "deletionPending"
162
- ]
163
- assert len(listed) == 1
164
- assert listed[0] == base_ds_2
165
-
166
-
167
- def test_minimal_rule(api_client):
168
- base_rule = Rule(
169
- schedule=Schedule(
170
- at_least_every="2h",
171
- enabled=True,
172
- ),
173
- input=Rule.Input(
174
- stream=Rule.Input.Stream(
175
- tables=[
176
- Rule.Input.Stream.Table(
177
- name="automated_test_cases.pylib.test",
178
- ),
179
- ],
180
- ),
181
- ),
182
- output=Rule.Output(
183
- summary="test",
184
- ),
185
- )
186
-
187
- base_rule_1 = api_client.create_rule("test_0", base_rule)
188
- assert base_rule_1.schedule == base_rule.schedule
189
- assert base_rule_1.input == base_rule.input
190
- assert base_rule_1.output == base_rule.output
191
-
192
- got = api_client.get_rule("test_0")
193
- listed = []
194
- for rule in api_client.list_rules():
195
- listed.append(rule)
196
- assert len(listed) == 1
197
- assert listed[0] == got
198
-
199
- # the server is going to populate created_timestamp, modified_timestamp,
200
- # version, and resource_status, so copy those over before comparing.
201
- base_rule_1.metadata.created_timestamp = got.metadata.created_timestamp
202
- base_rule_1.metadata.created_by = got.metadata.created_by
203
- base_rule_1.metadata.modified_timestamp = got.metadata.modified_timestamp
204
- base_rule_1.metadata.version = got.metadata.version
205
- base_rule_1.metadata.resource_status = got.metadata.resource_status
206
- assert api_client.get_rule("test_0") == base_rule_1
207
-
208
- base_rule_2 = api_client.create_rule("test_1", base_rule)
209
- assert base_rule_2.schedule == base_rule.schedule
210
- assert base_rule_2.input == base_rule.input
211
- assert base_rule_2.output == base_rule.output
212
-
213
- got_2 = api_client.get_rule("test_1")
214
- listed = []
215
- for rule in api_client.list_rules():
216
- listed.append(rule)
217
- assert len(listed) == 2
218
- assert listed[0] == got
219
- assert listed[1] == got_2
220
-
221
- base_rule_2.metadata.created_timestamp = got_2.metadata.created_timestamp
222
- base_rule_2.metadata.created_by = got_2.metadata.created_by
223
- base_rule_2.metadata.modified_timestamp = got_2.metadata.modified_timestamp
224
- base_rule_2.metadata.version = got_2.metadata.version
225
- base_rule_2.metadata.resource_status = got_2.metadata.resource_status
226
- assert api_client.get_rule("test_1") == base_rule_2
227
-
228
- base_rule_2.input.stream.tables[0].name = "databricks_dev.antimatter_meta.test_ip_summaries"
229
- api_client.replace_rule("test_1", base_rule_2)
230
-
231
- got_2 = api_client.get_rule("test_1")
232
- base_rule_2.metadata.modified_timestamp = got_2.metadata.modified_timestamp
233
- base_rule_2.metadata.version = got_2.metadata.version
234
- base_rule_2.metadata.resource_status = got_2.metadata.resource_status
235
-
236
- assert api_client.get_rule("test_1") == base_rule_2
237
-
238
- api_client.delete_rule("test_0")
239
- listed = [
240
- item
241
- for item in api_client.list_rules()
242
- if item.metadata.resource_status != "deletionPending"
243
- ]
244
- assert len(listed) == 1
245
- assert listed[0] == base_rule_2
246
-
247
-
248
- def test_list_pagination(api_client):
249
- base_rule = Rule(
250
- schedule=Schedule(
251
- at_least_every="2h",
252
- enabled=True,
253
- ),
254
- input=Rule.Input(
255
- stream=Rule.Input.Stream(
256
- tables=[
257
- Rule.Input.Stream.Table(
258
- name="automated_test_cases.pylib.test",
259
- ),
260
- ],
261
- ),
262
- ),
263
- output=Rule.Output(
264
- summary="test",
265
- ),
266
- )
267
-
268
- # create (remainder of) 10 rules for the test
269
- for i in range(8):
270
- api_client.create_rule(f"test_{i+2}", base_rule)
271
-
272
- # ensure all rules are returned for a list call with no params
273
- listed = []
274
- for rule in api_client.list_rules():
275
- listed.append(rule)
276
- assert len(listed) == 10
277
-
278
- for i in range(10):
279
- assert listed[i] == api_client.get_rule(f"test_{i}")
280
-
281
- # ensure the first 5 rules are returned when limit=5
282
- listed = []
283
- for rule in api_client.list_rules(limit=5):
284
- listed.append(rule)
285
- assert len(listed) == 5
286
-
287
- for i in range(5):
288
- assert listed[i] == api_client.get_rule(f"test_{i}")
289
-
290
- # ensure the last 5 rules are returned when limit=5, cursor=pagination_test_4
291
- listed = []
292
- for rule in api_client.list_rules(cursor="test_4", limit=5):
293
- listed.append(rule)
294
- assert len(listed) == 5
295
-
296
- for i in range(5):
297
- assert listed[i] == api_client.get_rule(f"test_{i+5}")
298
-
299
- # ensure the last 9 rules are returned when cursor=test_0
300
- listed = []
301
- for rule in api_client.list_rules(cursor="test_0"):
302
- listed.append(rule)
303
- assert len(listed) == 9
304
-
305
- for i in range(9):
306
- assert listed[i] == api_client.get_rule(f"test_{i+1}")
@@ -1,119 +0,0 @@
1
- import base64
2
- import datetime
3
- import os
4
- import time
5
-
6
- from databricks.sdk import WorkspaceClient
7
- from databricks.sdk.service import jobs, workspace as dbworkspace
8
-
9
- from .constants import *
10
-
11
- pylib_volume_path = os.environ["PYLIB_VOLUME_PATH"]
12
- pylib_wheel_path = os.environ["PYLIB_WHEEL_PATH"]
13
- api_wheel_path = os.environ["API_WHEEL_PATH"]
14
-
15
-
16
- def test_secret_auth(api_client):
17
- # making sure it's even possible to get a config
18
- api_client.get_config()
19
-
20
- # need to do an API operation using databricks secret auth.
21
- notebook_data = f"""
22
- %pip uninstall -y dasl-client dasl-api
23
- %pip install {api_wheel_path}
24
- %pip install {pylib_wheel_path}
25
- dbutils.library.restartPython()
26
- # COMMAND ----------
27
- from dasl_client.client import Client
28
-
29
- Client.for_workspace(
30
- workspace_url="{databricks_host}",
31
- dasl_host="{dasl_host}",
32
- ).get_config()
33
- # COMMAND ----------
34
- dbutils.notebook.exit("SUCCESS")
35
- """
36
- print(f"notebook_data={notebook_data}")
37
-
38
- wsc = WorkspaceClient()
39
- wsc.workspace.mkdirs(path=pylib_volume_path)
40
-
41
- notebook_path = f"{pylib_volume_path}/test_secret_auth_notebook"
42
- wsc.workspace.import_(
43
- path=notebook_path,
44
- format=dbworkspace.ImportFormat.SOURCE,
45
- language=dbworkspace.Language.PYTHON,
46
- content=base64.b64encode(notebook_data.encode("utf-8")).decode("utf-8"),
47
- overwrite=True,
48
- )
49
-
50
- job_id = None
51
- try:
52
- job_id = wsc.jobs.create(
53
- name="run test_secret_auth notebook",
54
- tasks=[
55
- jobs.Task(
56
- task_key="run_notebook",
57
- notebook_task=jobs.NotebookTask(notebook_path=notebook_path),
58
- ),
59
- ],
60
- ).job_id
61
-
62
- wsc.jobs.run_now(job_id=job_id)
63
-
64
- logs = []
65
- start = datetime.datetime.now()
66
- complete = False
67
- while not complete:
68
- elapsed = datetime.datetime.now() - start
69
- if elapsed > datetime.timedelta(seconds=300):
70
- raise Exception(f"timed out waiting for job")
71
-
72
- time.sleep(5)
73
-
74
- status, logs = fetch_latest_run_status_and_logs(wsc, job_id)
75
- print(f"logs={logs}")
76
-
77
- if status == jobs.TerminationCodeCode.RUN_EXECUTION_ERROR:
78
- raise Exception(f"job terminated with error")
79
-
80
- complete = status == jobs.TerminationCodeCode.SUCCESS
81
-
82
- print(logs)
83
- assert len(logs) == 1
84
- assert logs[0] == "SUCCESS"
85
- finally:
86
- wsc.workspace.delete(pylib_volume_path, recursive=True)
87
- if job_id is not None:
88
- wsc.jobs.delete(job_id=job_id)
89
-
90
-
91
- def fetch_latest_run_status_and_logs(
92
- wsc: WorkspaceClient,
93
- job_id: str,
94
- ):
95
- runs = list(wsc.jobs.list_runs(job_id=job_id, expand_tasks=True))
96
- if not runs:
97
- return "No runs found", None
98
-
99
- # Find the latest run based on the start time
100
- latest_run = max(runs, key=lambda r: r.start_time)
101
- if latest_run.status.termination_details is None:
102
- return "No runs found", None
103
- status = latest_run.status.termination_details.code
104
- logs = []
105
- for task in latest_run.tasks:
106
- output = wsc.jobs.get_run_output(task.run_id)
107
- if output.error is not None:
108
- logs.append(output.error)
109
- elif output.logs is not None:
110
- logs.append(output.logs)
111
- elif output.notebook_output is not None:
112
- logs.append(output.notebook_output.result)
113
- elif output.run_job_output is not None:
114
- raise Exception("Nested jobs are not supported")
115
- elif output.sql_output is not None:
116
- raise Exception("SQL jobs are unsupported")
117
- else:
118
- logs.append("")
119
- return status, logs