datamasque-python 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datamasque/client/__init__.py +204 -0
- datamasque/client/base.py +304 -0
- datamasque/client/connections.py +64 -0
- datamasque/client/discovery.py +286 -0
- datamasque/client/dmclient.py +49 -0
- datamasque/client/exceptions.py +75 -0
- datamasque/client/files.py +92 -0
- datamasque/client/ifm.py +301 -0
- datamasque/client/license.py +41 -0
- datamasque/client/models/__init__.py +0 -0
- datamasque/client/models/connection.py +429 -0
- datamasque/client/models/data_selection.py +62 -0
- datamasque/client/models/discovery.py +229 -0
- datamasque/client/models/dm_instance.py +39 -0
- datamasque/client/models/files.py +89 -0
- datamasque/client/models/ifm.py +177 -0
- datamasque/client/models/license.py +60 -0
- datamasque/client/models/pagination.py +29 -0
- datamasque/client/models/ruleset.py +45 -0
- datamasque/client/models/ruleset_library.py +22 -0
- datamasque/client/models/runs.py +165 -0
- datamasque/client/models/status.py +68 -0
- datamasque/client/models/user.py +69 -0
- datamasque/client/py.typed +0 -0
- datamasque/client/ruleset_libraries.py +164 -0
- datamasque/client/rulesets.py +57 -0
- datamasque/client/runs.py +189 -0
- datamasque/client/settings.py +76 -0
- datamasque/client/users.py +96 -0
- datamasque_python-1.0.0.dist-info/METADATA +113 -0
- datamasque_python-1.0.0.dist-info/RECORD +33 -0
- datamasque_python-1.0.0.dist-info/WHEEL +4 -0
- datamasque_python-1.0.0.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import zipfile
|
|
3
|
+
from io import BufferedIOBase, BytesIO, TextIOBase
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Iterator, Optional, Union
|
|
6
|
+
|
|
7
|
+
from datamasque.client.base import BaseClient, UploadFile
|
|
8
|
+
from datamasque.client.exceptions import (
|
|
9
|
+
AsyncRulesetGenerationInProgressError,
|
|
10
|
+
DataMasqueException,
|
|
11
|
+
FailedToStartError,
|
|
12
|
+
)
|
|
13
|
+
from datamasque.client.models.connection import ConnectionId
|
|
14
|
+
from datamasque.client.models.data_selection import (
|
|
15
|
+
SelectedColumns,
|
|
16
|
+
SelectedData,
|
|
17
|
+
SelectedFileData,
|
|
18
|
+
)
|
|
19
|
+
from datamasque.client.models.discovery import (
|
|
20
|
+
FileDiscoveryResult,
|
|
21
|
+
FileRulesetGenerationRequest,
|
|
22
|
+
RulesetGenerationRequest,
|
|
23
|
+
SchemaDiscoveryPage,
|
|
24
|
+
SchemaDiscoveryRequest,
|
|
25
|
+
SchemaDiscoveryResult,
|
|
26
|
+
)
|
|
27
|
+
from datamasque.client.models.ruleset import Ruleset
|
|
28
|
+
from datamasque.client.models.runs import RunId
|
|
29
|
+
from datamasque.client.models.status import AsyncRulesetGenerationTaskStatus
|
|
30
|
+
|
|
31
|
+
logger = logging.getLogger(__name__)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class DiscoveryClient(BaseClient):
|
|
35
|
+
"""Schema-discovery and ruleset-generation API methods. Mixed into `DataMasqueClient`."""
|
|
36
|
+
|
|
37
|
+
def start_async_ruleset_generation(self, connection_id: ConnectionId, selected_data: SelectedData) -> None:
|
|
38
|
+
"""
|
|
39
|
+
Starts async ruleset generation using the most recent discovery results on the given connection.
|
|
40
|
+
|
|
41
|
+
If the connection is a database connection, `selected_data` should be of type `SelectedColumns`.
|
|
42
|
+
If the connection is a file connection, `selected_data` should be of type `SelectedFileData`.
|
|
43
|
+
|
|
44
|
+
Generation runs asynchronously on the server.
|
|
45
|
+
Poll `get_async_ruleset_generation_task_status` until it returns
|
|
46
|
+
`AsyncRulesetGenerationTaskStatus.finished`,
|
|
47
|
+
then call `get_generated_rulesets` to retrieve the resulting `Ruleset`.
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
if not selected_data:
|
|
51
|
+
raise ValueError("`selected_data` is a required argument to `start_async_ruleset_generation`.")
|
|
52
|
+
|
|
53
|
+
data: dict = {}
|
|
54
|
+
if isinstance(selected_data, SelectedColumns):
|
|
55
|
+
data["selected_columns"] = selected_data.columns
|
|
56
|
+
if selected_data.hash_columns is not None:
|
|
57
|
+
data["hash_columns"] = {
|
|
58
|
+
schema: {table: cfg.model_dump(exclude_none=True) for table, cfg in tables.items()}
|
|
59
|
+
for schema, tables in selected_data.hash_columns.items()
|
|
60
|
+
}
|
|
61
|
+
elif isinstance(selected_data, SelectedFileData):
|
|
62
|
+
for user_selection in selected_data.user_selections:
|
|
63
|
+
if not (user_selection.locators and user_selection.files):
|
|
64
|
+
raise ValueError(
|
|
65
|
+
"Each `UserSelection` in `SelectedFileData.user_selections` "
|
|
66
|
+
"must have a non-null list of `locators` and `files` to be selected for."
|
|
67
|
+
)
|
|
68
|
+
data["selected_data"] = [s.model_dump() for s in selected_data.user_selections]
|
|
69
|
+
else:
|
|
70
|
+
raise TypeError(
|
|
71
|
+
f"The argument `selected_data` to `start_async_ruleset_generation` was of an invalid type, "
|
|
72
|
+
f"expected `SelectedColumns` or `SelectedFileData`, got {type(selected_data)}."
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
self.make_request(method="POST", path=f"/api/async-generate-ruleset/{connection_id}/", data=data)
|
|
76
|
+
|
|
77
|
+
def start_async_ruleset_generation_from_csv(
|
|
78
|
+
self,
|
|
79
|
+
connection_id: ConnectionId,
|
|
80
|
+
csv_content: Union[str, bytes, TextIOBase, BufferedIOBase],
|
|
81
|
+
target_size_bytes: Optional[int] = None,
|
|
82
|
+
) -> None:
|
|
83
|
+
"""
|
|
84
|
+
Generate ruleset(s) from the schema discovery CSV file obtained from `get_db_discovery_result_report()`.
|
|
85
|
+
|
|
86
|
+
`target_size_bytes` is an optional integer specifying the approximate size in bytes of each generated ruleset.
|
|
87
|
+
|
|
88
|
+
`csv_content` can be:
|
|
89
|
+
- A string (e.g. from `get_db_discovery_result_report()`)
|
|
90
|
+
- Bytes
|
|
91
|
+
- A text file handle (e.g. `open(path)`)
|
|
92
|
+
- A binary file handle (e.g. `open(path, 'rb')`)
|
|
93
|
+
|
|
94
|
+
Generation runs asynchronously on the server.
|
|
95
|
+
Poll `get_async_ruleset_generation_task_status` until it returns
|
|
96
|
+
`AsyncRulesetGenerationTaskStatus.finished`,
|
|
97
|
+
then call `get_generated_rulesets` to retrieve the resulting `Ruleset` objects.
|
|
98
|
+
"""
|
|
99
|
+
|
|
100
|
+
content: BufferedIOBase
|
|
101
|
+
if isinstance(csv_content, str):
|
|
102
|
+
content = BytesIO(csv_content.encode())
|
|
103
|
+
elif isinstance(csv_content, bytes):
|
|
104
|
+
content = BytesIO(csv_content)
|
|
105
|
+
elif isinstance(csv_content, TextIOBase):
|
|
106
|
+
content = BytesIO(csv_content.read().encode())
|
|
107
|
+
else:
|
|
108
|
+
content = csv_content
|
|
109
|
+
|
|
110
|
+
files = [
|
|
111
|
+
UploadFile(
|
|
112
|
+
field_name="csv_or_zip_file",
|
|
113
|
+
filename="ruleset.csv",
|
|
114
|
+
content=content,
|
|
115
|
+
content_type="text/csv",
|
|
116
|
+
),
|
|
117
|
+
]
|
|
118
|
+
self.make_request(
|
|
119
|
+
method="POST",
|
|
120
|
+
path=f"/api/async-generate-ruleset/{connection_id}/from-csv/",
|
|
121
|
+
data={"target_size_bytes": target_size_bytes} if target_size_bytes is not None else None,
|
|
122
|
+
files=files,
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
def get_async_ruleset_generation_task_status(self, connection_id: ConnectionId) -> AsyncRulesetGenerationTaskStatus:
|
|
126
|
+
"""Queries the status of an async ruleset generation task."""
|
|
127
|
+
|
|
128
|
+
response = self.make_request(method="GET", path=f"/api/async-generate-ruleset/{connection_id}/")
|
|
129
|
+
response_data = response.json()
|
|
130
|
+
status = response_data.get("status")
|
|
131
|
+
if not status:
|
|
132
|
+
raise DataMasqueException("Attempted to get an async ruleset generation task status but none was given.")
|
|
133
|
+
|
|
134
|
+
return AsyncRulesetGenerationTaskStatus(status)
|
|
135
|
+
|
|
136
|
+
def get_generated_rulesets(self, connection_id: ConnectionId) -> list[Ruleset]:
|
|
137
|
+
"""
|
|
138
|
+
Return the `Ruleset` objects produced by a previously-started async ruleset generation.
|
|
139
|
+
|
|
140
|
+
Use for all three async-RG flows:
|
|
141
|
+
|
|
142
|
+
- Database masking from a schema-discovery CSV (`start_async_ruleset_generation_from_csv`) -
|
|
143
|
+
returns one or more rulesets
|
|
144
|
+
- Database masking from a column selection (`start_async_ruleset_generation` with `SelectedColumns`) -
|
|
145
|
+
returns a list containing one ruleset
|
|
146
|
+
- File masking from a file/locator selection (`start_async_ruleset_generation` with `SelectedFileData`) -
|
|
147
|
+
returns a list containing one ruleset
|
|
148
|
+
|
|
149
|
+
Raises `AsyncRulesetGenerationInProgressError` if the task hasn't finished yet,
|
|
150
|
+
and `DataMasqueException` if it failed.
|
|
151
|
+
|
|
152
|
+
Note that the ruleset(s) have autogenerated names, which you may want to customize before uploading.
|
|
153
|
+
"""
|
|
154
|
+
|
|
155
|
+
status = self.get_async_ruleset_generation_task_status(connection_id)
|
|
156
|
+
if status is AsyncRulesetGenerationTaskStatus.failed:
|
|
157
|
+
logger.error("Ruleset generation failed for connection: %s", connection_id)
|
|
158
|
+
raise DataMasqueException(f"Ruleset generation failed for connection: {connection_id}")
|
|
159
|
+
|
|
160
|
+
if status is not AsyncRulesetGenerationTaskStatus.finished:
|
|
161
|
+
logger.error(
|
|
162
|
+
"Ruleset generation is still in progress for connection: %s. Status: `%s`",
|
|
163
|
+
connection_id,
|
|
164
|
+
status.value,
|
|
165
|
+
)
|
|
166
|
+
raise AsyncRulesetGenerationInProgressError(
|
|
167
|
+
f"Ruleset generation in progress or not ready. Current status: `{status.value}`."
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
# The download-rulesets endpoint returns a ZIP attachment for the CSV flow,
|
|
171
|
+
# or issues a 303 redirect back to the task-status endpoint for the column / file flows
|
|
172
|
+
# (which carries the generated ruleset inline as `generated_ruleset`).
|
|
173
|
+
# `requests` follows the 303 transparently, so we distinguish by the presence of
|
|
174
|
+
# a `Content-Disposition: attachment` header, which Django's `FileResponse` sets on the ZIP response.
|
|
175
|
+
response = self.make_request(
|
|
176
|
+
method="GET",
|
|
177
|
+
path=f"/api/async-generate-ruleset/{connection_id}/download-rulesets/",
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
if "attachment" in response.headers.get("Content-Disposition", "").lower():
|
|
181
|
+
rulesets = []
|
|
182
|
+
with zipfile.ZipFile(BytesIO(response.content)) as zip_file:
|
|
183
|
+
for file_info in zip_file.infolist():
|
|
184
|
+
if file_info.filename.endswith((".yml", ".yaml")):
|
|
185
|
+
with zip_file.open(file_info) as file:
|
|
186
|
+
yaml_content = file.read().decode("utf-8")
|
|
187
|
+
rulesets.append(Ruleset(name=Path(file_info.filename).stem, yaml=yaml_content))
|
|
188
|
+
return rulesets
|
|
189
|
+
|
|
190
|
+
generated = response.json().get("generated_ruleset")
|
|
191
|
+
if not generated:
|
|
192
|
+
raise DataMasqueException(
|
|
193
|
+
f"Ruleset generation for connection {connection_id} reported `finished` "
|
|
194
|
+
f"but no ruleset was returned on the task-status record."
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
return [Ruleset(name="generated_ruleset", yaml=generated)]
|
|
198
|
+
|
|
199
|
+
def start_schema_discovery_run(self, discovery_config: SchemaDiscoveryRequest) -> RunId:
|
|
200
|
+
"""
|
|
201
|
+
Starts a schema discovery run with the given configuration.
|
|
202
|
+
|
|
203
|
+
Args:
|
|
204
|
+
discovery_config: A `SchemaDiscoveryRequest` with connection ID and optional settings.
|
|
205
|
+
|
|
206
|
+
Returns:
|
|
207
|
+
RunId: The ID of the started discovery run
|
|
208
|
+
|
|
209
|
+
Raises:
|
|
210
|
+
FailedToStartError: If run fails to start
|
|
211
|
+
"""
|
|
212
|
+
|
|
213
|
+
data = discovery_config.model_dump(exclude_none=True, mode="json")
|
|
214
|
+
response = self.make_request(
|
|
215
|
+
"POST",
|
|
216
|
+
"/api/schema-discovery/",
|
|
217
|
+
data=data,
|
|
218
|
+
require_status_check=False,
|
|
219
|
+
)
|
|
220
|
+
run_data = response.json()
|
|
221
|
+
|
|
222
|
+
if response.status_code == 201:
|
|
223
|
+
logger.info("Schema discovery run %s started successfully", run_data["id"])
|
|
224
|
+
return RunId(run_data["id"])
|
|
225
|
+
|
|
226
|
+
logger.error("Schema discovery run failed to start: %s", run_data)
|
|
227
|
+
raise FailedToStartError(
|
|
228
|
+
f"Schema discovery run failed to start "
|
|
229
|
+
f"(server responded with status {response.status_code}: {response.text}).",
|
|
230
|
+
response=response,
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
def iter_schema_discovery_results(self, run_id: RunId) -> Iterator[SchemaDiscoveryResult]:
|
|
234
|
+
"""Lazily iterate all schema discovery results for a run via the paginated v2 endpoint."""
|
|
235
|
+
|
|
236
|
+
return self._iter_paginated(
|
|
237
|
+
f"/api/schema-discovery/v2/{run_id}/",
|
|
238
|
+
model=SchemaDiscoveryResult,
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
def list_schema_discovery_results(self, run_id: RunId) -> list[SchemaDiscoveryResult]:
|
|
242
|
+
"""Returns all schema discovery results for a run."""
|
|
243
|
+
|
|
244
|
+
return list(self.iter_schema_discovery_results(run_id))
|
|
245
|
+
|
|
246
|
+
def get_schema_discovery_page(self, run_id: RunId, *, limit: int = 50, offset: int = 0) -> SchemaDiscoveryPage:
|
|
247
|
+
"""
|
|
248
|
+
Returns a single page of schema discovery results including `table_metadata`.
|
|
249
|
+
|
|
250
|
+
Use this when you need the table-constraint metadata alongside the results.
|
|
251
|
+
"""
|
|
252
|
+
|
|
253
|
+
response = self.make_request(
|
|
254
|
+
"GET",
|
|
255
|
+
f"/api/schema-discovery/v2/{run_id}/",
|
|
256
|
+
params={"limit": limit, "offset": offset},
|
|
257
|
+
)
|
|
258
|
+
return SchemaDiscoveryPage.model_validate(response.json())
|
|
259
|
+
|
|
260
|
+
def generate_ruleset(self, generation_request: RulesetGenerationRequest) -> str:
|
|
261
|
+
"""
|
|
262
|
+
Generates database-masking ruleset YAML from the most recent discovery run on the given connection.
|
|
263
|
+
|
|
264
|
+
`generation_request` is a `RulesetGenerationRequest`.
|
|
265
|
+
"""
|
|
266
|
+
|
|
267
|
+
data = generation_request.model_dump(exclude_none=True, mode="json")
|
|
268
|
+
response = self.make_request("POST", "/api/generate-ruleset/v2/", data=data)
|
|
269
|
+
return response.content.decode("utf-8")
|
|
270
|
+
|
|
271
|
+
def generate_file_ruleset(self, generation_request: FileRulesetGenerationRequest) -> str:
|
|
272
|
+
"""
|
|
273
|
+
Generates file-masking ruleset YAML from the most recent file-data-discovery run on the given connection.
|
|
274
|
+
|
|
275
|
+
`generation_request` is a `FileRulesetGenerationRequest`.
|
|
276
|
+
"""
|
|
277
|
+
|
|
278
|
+
data = generation_request.model_dump(exclude_none=True, mode="json")
|
|
279
|
+
response = self.make_request("POST", "/api/generate-file-ruleset/", data=data)
|
|
280
|
+
return response.content.decode("utf-8")
|
|
281
|
+
|
|
282
|
+
def get_file_data_discovery_report(self, run_id: RunId) -> list[FileDiscoveryResult]:
|
|
283
|
+
"""Returns the file-data-discovery results for the specified run."""
|
|
284
|
+
|
|
285
|
+
response = self.make_request("GET", f"api/runs/{run_id}/file-discovery-results/")
|
|
286
|
+
return [FileDiscoveryResult.model_validate(d) for d in response.json()]
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
from datamasque.client.base import FileOrContent, UploadFile
|
|
2
|
+
from datamasque.client.connections import ConnectionClient
|
|
3
|
+
from datamasque.client.discovery import DiscoveryClient
|
|
4
|
+
from datamasque.client.files import FileClient
|
|
5
|
+
from datamasque.client.license import LicenseClient
|
|
6
|
+
from datamasque.client.ruleset_libraries import RulesetLibraryClient
|
|
7
|
+
from datamasque.client.rulesets import RulesetClient
|
|
8
|
+
from datamasque.client.runs import RunClient
|
|
9
|
+
from datamasque.client.settings import SettingsClient
|
|
10
|
+
from datamasque.client.users import UserClient
|
|
11
|
+
|
|
12
|
+
__all__ = ["DataMasqueClient", "FileOrContent", "UploadFile"]
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class DataMasqueClient(
|
|
16
|
+
LicenseClient,
|
|
17
|
+
ConnectionClient,
|
|
18
|
+
RulesetClient,
|
|
19
|
+
RulesetLibraryClient,
|
|
20
|
+
FileClient,
|
|
21
|
+
RunClient,
|
|
22
|
+
DiscoveryClient,
|
|
23
|
+
UserClient,
|
|
24
|
+
SettingsClient,
|
|
25
|
+
):
|
|
26
|
+
"""
|
|
27
|
+
Client for a DataMasque server instance.
|
|
28
|
+
|
|
29
|
+
Example usage:
|
|
30
|
+
|
|
31
|
+
.. code-block:: python
|
|
32
|
+
|
|
33
|
+
from datamasque.client import DataMasqueClient
|
|
34
|
+
from datamasque.client.models.dm_instance import DataMasqueInstanceConfig
|
|
35
|
+
|
|
36
|
+
config = DataMasqueInstanceConfig(
|
|
37
|
+
base_url="https://datamasque.example.com",
|
|
38
|
+
username="api_user",
|
|
39
|
+
password="api_password",
|
|
40
|
+
)
|
|
41
|
+
client = DataMasqueClient(config)
|
|
42
|
+
client.authenticate()
|
|
43
|
+
|
|
44
|
+
for connection in client.list_connections():
|
|
45
|
+
print(connection.name)
|
|
46
|
+
|
|
47
|
+
Authentication is performed on the first request if `authenticate()` is not called explicitly,
|
|
48
|
+
and is automatically retried once on a 401 response.
|
|
49
|
+
"""
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
from requests import Response
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class DataMasqueException(Exception):
|
|
5
|
+
"""Generic exception base class."""
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class DataMasqueUserError(DataMasqueException):
|
|
9
|
+
"""Raised when error occurs during user creation or configuration."""
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class DataMasqueApiError(DataMasqueException):
|
|
13
|
+
"""
|
|
14
|
+
Raised when the DataMasque server responds to a request with a non-2xx status code.
|
|
15
|
+
|
|
16
|
+
The triggering `Response` is always available on the `.response` attribute,
|
|
17
|
+
so callers can inspect the status code, headers, and body for richer error handling.
|
|
18
|
+
|
|
19
|
+
502 Bad Gateway responses are raised as `DataMasqueNotReadyError` instead.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
def __init__(self, message: str, *, response: Response) -> None:
|
|
23
|
+
super().__init__(message)
|
|
24
|
+
self.response = response
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class FailedToStartError(DataMasqueApiError):
|
|
28
|
+
"""
|
|
29
|
+
Raised when `start_masking_run` fails to create the run.
|
|
30
|
+
|
|
31
|
+
Inherits `.response` from `DataMasqueApiError`,
|
|
32
|
+
so callers can read the server's status code and error body directly.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class InvalidRulesetError(FailedToStartError):
|
|
37
|
+
"""Specific error for when runs fail to start due to having an invalid ruleset."""
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class InvalidLibraryError(FailedToStartError):
|
|
41
|
+
"""Specific error for when runs fail to start due to having an invalid ruleset library."""
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class DataMasqueTransportError(DataMasqueException):
|
|
45
|
+
"""
|
|
46
|
+
Raised when a request to the DataMasque server fails before any response is received.
|
|
47
|
+
|
|
48
|
+
Covers connection refused, timeout, DNS failure, SSL handshake failure,
|
|
49
|
+
and similar transport-layer errors.
|
|
50
|
+
The originating `requests` exception is chained via `__cause__`.
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class DataMasqueNotReadyError(DataMasqueException):
|
|
55
|
+
"""Raised when the DataMasque server is not healthy, normally because it is still starting up."""
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class AsyncRulesetGenerationInProgressError(DataMasqueException):
|
|
59
|
+
"""Raised when attempting to retrieve results from a ruleset generation request that has not yet completed."""
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class DataMasqueIfmError(DataMasqueException):
|
|
63
|
+
"""Generic base exception for IFM (in-flight masking) client errors."""
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class IfmAuthError(DataMasqueIfmError):
|
|
67
|
+
"""Raised when the IFM client cannot obtain or refresh a JWT (e.g. invalid credentials, missing scope)."""
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class RunNotCancellableError(DataMasqueUserError):
|
|
71
|
+
"""
|
|
72
|
+
Raised when `cancel_run` is called against a run that is no longer eligible for cancellation.
|
|
73
|
+
|
|
74
|
+
Typically this happens when the run is already finished, failed, or in the cancelling state itself.
|
|
75
|
+
"""
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from typing import Optional, Type, TypeVar, Union
|
|
3
|
+
|
|
4
|
+
from datamasque.client.base import BaseClient, UploadFile, read_file_or_content
|
|
5
|
+
from datamasque.client.models.files import DataMasqueFile
|
|
6
|
+
|
|
7
|
+
FileTypeT = TypeVar("FileTypeT", bound=DataMasqueFile)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class FileClient(BaseClient):
|
|
11
|
+
"""File-upload API methods. Mixed into `DataMasqueClient`."""
|
|
12
|
+
|
|
13
|
+
def upload_file(
|
|
14
|
+
self,
|
|
15
|
+
file_type: Type[FileTypeT],
|
|
16
|
+
file_name: str,
|
|
17
|
+
file_path_or_content: Union[str, bytes, Path],
|
|
18
|
+
) -> FileTypeT:
|
|
19
|
+
"""
|
|
20
|
+
Uploads a file of the given type to the DataMasque server.
|
|
21
|
+
|
|
22
|
+
`file_type` must be a concrete subclass of `DataMasqueFile`
|
|
23
|
+
(`SeedFile`, `OracleWalletFile`, `SslZipFile`, `SnowflakeKeyFile`).
|
|
24
|
+
`file_path_or_content` may be a path (as `str` or `Path`), raw `bytes`, or a file-like object.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
name, content = read_file_or_content(file_path_or_content, file_name)
|
|
28
|
+
content.seek(0)
|
|
29
|
+
|
|
30
|
+
response = self.make_request(
|
|
31
|
+
"POST",
|
|
32
|
+
file_type.get_url(),
|
|
33
|
+
data={"name": file_name},
|
|
34
|
+
files=[
|
|
35
|
+
UploadFile(
|
|
36
|
+
field_name=file_type.get_content_param_name(),
|
|
37
|
+
filename=name,
|
|
38
|
+
content=content,
|
|
39
|
+
content_type="application/octet-stream",
|
|
40
|
+
),
|
|
41
|
+
],
|
|
42
|
+
)
|
|
43
|
+
return file_type.model_validate(response.json())
|
|
44
|
+
|
|
45
|
+
def delete_file_if_exists(self, file: DataMasqueFile) -> None:
|
|
46
|
+
"""
|
|
47
|
+
Deletes a file. No-op if the file does not exist.
|
|
48
|
+
|
|
49
|
+
`file` must be an instance of a concrete subclass of `DataMasqueFile`.
|
|
50
|
+
The `file` must have its ID set.
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
if file.id is None:
|
|
54
|
+
raise ValueError("File has not yet been created")
|
|
55
|
+
|
|
56
|
+
# file.get_url() ends with a slash so no need to insert one before the id
|
|
57
|
+
self._delete_if_exists(f"{file.get_url()}{file.id}/")
|
|
58
|
+
|
|
59
|
+
def list_files_of_type(self, file_type: Type[FileTypeT]) -> list[FileTypeT]:
|
|
60
|
+
"""Returns all files of the given type (a concrete subclass of `DataMasqueFile`)."""
|
|
61
|
+
|
|
62
|
+
response = self.make_request("GET", file_type.get_url())
|
|
63
|
+
return [file_type.model_validate(file) for file in response.json()]
|
|
64
|
+
|
|
65
|
+
def get_file_of_type_by_name(self, file_type: Type[FileTypeT], name: str) -> Optional[FileTypeT]:
|
|
66
|
+
"""
|
|
67
|
+
Looks for a file of the given type (a concrete subclass of `DataMasqueFile`) with the given `name`.
|
|
68
|
+
|
|
69
|
+
Returns it if found, otherwise `None`.
|
|
70
|
+
"""
|
|
71
|
+
|
|
72
|
+
matching_files = [f for f in self.list_files_of_type(file_type) if f.name == name]
|
|
73
|
+
return matching_files[0] if matching_files else None
|
|
74
|
+
|
|
75
|
+
def upload_file_if_not_exists(self, file_type: Type[FileTypeT], file_path: Union[str, Path]) -> Optional[FileTypeT]:
|
|
76
|
+
"""
|
|
77
|
+
Upload a file only if one with the same name doesn't already exist.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
file_type: A concrete subclass of `DataMasqueFile` (e.g., SeedFile, OracleWalletFile).
|
|
81
|
+
file_path: Path to the file to upload.
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
The uploaded file object if a new file was uploaded, or None if a file
|
|
85
|
+
with the same name already exists.
|
|
86
|
+
"""
|
|
87
|
+
|
|
88
|
+
file_path = Path(file_path)
|
|
89
|
+
if self.get_file_of_type_by_name(file_type, file_path.name) is not None:
|
|
90
|
+
return None
|
|
91
|
+
|
|
92
|
+
return self.upload_file(file_type, file_path.name, file_path)
|