datamasque-python 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,204 @@
1
+ # Copyright 2026 DataMasque Ltd
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this library except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ from importlib.metadata import version
10
+
11
+ from datamasque.client.dmclient import DataMasqueClient, FileOrContent
12
+ from datamasque.client.exceptions import (
13
+ AsyncRulesetGenerationInProgressError,
14
+ DataMasqueApiError,
15
+ DataMasqueException,
16
+ DataMasqueIfmError,
17
+ DataMasqueNotReadyError,
18
+ DataMasqueTransportError,
19
+ DataMasqueUserError,
20
+ FailedToStartError,
21
+ IfmAuthError,
22
+ InvalidLibraryError,
23
+ InvalidRulesetError,
24
+ RunNotCancellableError,
25
+ )
26
+ from datamasque.client.ifm import DataMasqueIfmClient
27
+ from datamasque.client.models.connection import (
28
+ AzureConnectionConfig,
29
+ ConnectionConfig,
30
+ ConnectionId,
31
+ DatabaseConnectionConfig,
32
+ DatabaseType,
33
+ DynamoConnectionConfig,
34
+ FileConnectionConfig,
35
+ MongoConnectionConfig,
36
+ MountedShareConnectionConfig,
37
+ MssqlLinkedServerConnectionConfig,
38
+ S3ConnectionConfig,
39
+ SnowflakeConnectionConfig,
40
+ SnowflakeStageLocation,
41
+ SseConfig,
42
+ SseSelection,
43
+ )
44
+ from datamasque.client.models.data_selection import (
45
+ HashColumnsTableConfig,
46
+ JsonPath,
47
+ Locator,
48
+ SelectedColumns,
49
+ SelectedData,
50
+ SelectedFileData,
51
+ UserSelection,
52
+ )
53
+ from datamasque.client.models.discovery import (
54
+ ConstraintColumns,
55
+ DiscoveryMatch,
56
+ FileDiscoveryFile,
57
+ FileDiscoveryLocatorResult,
58
+ FileDiscoveryMatch,
59
+ FileDiscoveryResult,
60
+ FileRulesetGenerationRequest,
61
+ ForeignKeyRef,
62
+ InDataDiscoveryConfig,
63
+ InDataDiscoveryRule,
64
+ ReferencingForeignKey,
65
+ RulesetGenerationRequest,
66
+ SchemaDiscoveryColumn,
67
+ SchemaDiscoveryPage,
68
+ SchemaDiscoveryRequest,
69
+ SchemaDiscoveryResult,
70
+ TableConstraints,
71
+ )
72
+ from datamasque.client.models.dm_instance import DataMasqueInstanceConfig
73
+ from datamasque.client.models.files import (
74
+ DataMasqueFile,
75
+ FileId,
76
+ OracleWalletFile,
77
+ SeedFile,
78
+ SnowflakeKeyFile,
79
+ SslZipFile,
80
+ )
81
+ from datamasque.client.models.ifm import (
82
+ DataMasqueIfmInstanceConfig,
83
+ IfmLog,
84
+ IfmMaskRequest,
85
+ IfmMaskResult,
86
+ IfmRulesetPlanRef,
87
+ IfmTokenInfo,
88
+ RulesetPlan,
89
+ RulesetPlanCreateRequest,
90
+ RulesetPlanOptions,
91
+ RulesetPlanPartialUpdateRequest,
92
+ RulesetPlanUpdateRequest,
93
+ )
94
+ from datamasque.client.models.license import LicenseInfo, SwitchableLicenseMetadata
95
+ from datamasque.client.models.ruleset import Ruleset, RulesetId, RulesetType
96
+ from datamasque.client.models.ruleset_library import RulesetLibrary, RulesetLibraryId
97
+ from datamasque.client.models.runs import (
98
+ MaskingRunOptions,
99
+ MaskingRunRequest,
100
+ MaskType,
101
+ RunConnectionRef,
102
+ RunId,
103
+ RunInfo,
104
+ UnfinishedRun,
105
+ )
106
+ from datamasque.client.models.status import AsyncRulesetGenerationTaskStatus, MaskingRunStatus, ValidationStatus
107
+ from datamasque.client.models.user import User, UserId, UserRole
108
+
109
+ __version__ = version("datamasque-python")
110
+
111
+ __all__ = [
112
+ "AsyncRulesetGenerationInProgressError",
113
+ "AsyncRulesetGenerationTaskStatus",
114
+ "AzureConnectionConfig",
115
+ "ConnectionConfig",
116
+ "ConnectionId",
117
+ "ConstraintColumns",
118
+ "DataMasqueApiError",
119
+ "DataMasqueClient",
120
+ "DataMasqueException",
121
+ "DataMasqueFile",
122
+ "DataMasqueIfmClient",
123
+ "DataMasqueIfmError",
124
+ "DataMasqueIfmInstanceConfig",
125
+ "DataMasqueInstanceConfig",
126
+ "DataMasqueNotReadyError",
127
+ "DataMasqueTransportError",
128
+ "DataMasqueUserError",
129
+ "DatabaseConnectionConfig",
130
+ "DatabaseType",
131
+ "DiscoveryMatch",
132
+ "DynamoConnectionConfig",
133
+ "FailedToStartError",
134
+ "FileConnectionConfig",
135
+ "FileDiscoveryFile",
136
+ "FileDiscoveryLocatorResult",
137
+ "FileDiscoveryMatch",
138
+ "FileDiscoveryResult",
139
+ "FileId",
140
+ "FileOrContent",
141
+ "FileRulesetGenerationRequest",
142
+ "ForeignKeyRef",
143
+ "HashColumnsTableConfig",
144
+ "IfmAuthError",
145
+ "IfmLog",
146
+ "IfmMaskRequest",
147
+ "IfmMaskResult",
148
+ "IfmRulesetPlanRef",
149
+ "IfmTokenInfo",
150
+ "InDataDiscoveryConfig",
151
+ "InDataDiscoveryRule",
152
+ "InvalidLibraryError",
153
+ "InvalidRulesetError",
154
+ "JsonPath",
155
+ "LicenseInfo",
156
+ "Locator",
157
+ "MaskType",
158
+ "MaskingRunOptions",
159
+ "MaskingRunRequest",
160
+ "MaskingRunStatus",
161
+ "MongoConnectionConfig",
162
+ "MountedShareConnectionConfig",
163
+ "MssqlLinkedServerConnectionConfig",
164
+ "OracleWalletFile",
165
+ "ReferencingForeignKey",
166
+ "Ruleset",
167
+ "RulesetGenerationRequest",
168
+ "RulesetId",
169
+ "RulesetLibrary",
170
+ "RulesetLibraryId",
171
+ "RulesetPlan",
172
+ "RulesetPlanCreateRequest",
173
+ "RulesetPlanOptions",
174
+ "RulesetPlanPartialUpdateRequest",
175
+ "RulesetPlanUpdateRequest",
176
+ "RulesetType",
177
+ "RunConnectionRef",
178
+ "RunId",
179
+ "RunInfo",
180
+ "RunNotCancellableError",
181
+ "S3ConnectionConfig",
182
+ "SchemaDiscoveryColumn",
183
+ "SchemaDiscoveryPage",
184
+ "SchemaDiscoveryRequest",
185
+ "SchemaDiscoveryResult",
186
+ "SeedFile",
187
+ "SelectedColumns",
188
+ "SelectedData",
189
+ "SelectedFileData",
190
+ "SnowflakeConnectionConfig",
191
+ "SnowflakeKeyFile",
192
+ "SnowflakeStageLocation",
193
+ "SseConfig",
194
+ "SseSelection",
195
+ "SslZipFile",
196
+ "SwitchableLicenseMetadata",
197
+ "TableConstraints",
198
+ "UnfinishedRun",
199
+ "User",
200
+ "UserId",
201
+ "UserRole",
202
+ "UserSelection",
203
+ "ValidationStatus",
204
+ ]
@@ -0,0 +1,304 @@
1
+ import logging
2
+ import warnings
3
+ from contextlib import contextmanager
4
+ from dataclasses import dataclass
5
+ from io import BufferedIOBase, BytesIO, TextIOBase
6
+ from pathlib import Path
7
+ from typing import Any, Callable, Iterator, Optional, Type, TypeVar, Union
8
+ from urllib.parse import urljoin
9
+
10
+ import requests
11
+ from pydantic import BaseModel
12
+ from requests import Response
13
+ from urllib3.exceptions import InsecureRequestWarning
14
+
15
+ from datamasque.client.exceptions import (
16
+ DataMasqueApiError,
17
+ DataMasqueNotReadyError,
18
+ DataMasqueTransportError,
19
+ )
20
+ from datamasque.client.models.dm_instance import DataMasqueInstanceConfig
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+ FileOrContent = Union[str, bytes, TextIOBase, BufferedIOBase, Path]
25
+ _T = TypeVar("_T", bound=BaseModel)
26
+
27
+ # Substrings (case-insensitive) that mark a key whose value should be redacted
28
+ # before logging on an error path, so that passwords, API tokens, and similar secrets don't
29
+ # end up in user-visible logs when a request fails.
30
+ # Applied to both outgoing request bodies and incoming response bodies (if JSON-parseable to a dict).
31
+ SENSITIVE_DATA_KEYS = ("password", "secret", "token", "key", "credential")
32
+
33
+
34
+ def _redact_sensitive(value: Any) -> Any:
35
+ """Return `value` with sensitive keys redacted, if it's a dict; otherwise unchanged."""
36
+
37
+ if isinstance(value, dict):
38
+ return {
39
+ k: "<redacted>" if any(word in str(k).lower() for word in SENSITIVE_DATA_KEYS) else v
40
+ for k, v in value.items()
41
+ }
42
+
43
+ return value
44
+
45
+
46
+ @contextmanager
47
+ def suppress_insecure_warning_if_needed(verify_ssl: bool) -> Iterator[None]:
48
+ """Scope-limited suppression of `InsecureRequestWarning` when TLS verification is disabled."""
49
+
50
+ if verify_ssl:
51
+ yield
52
+ return
53
+ with warnings.catch_warnings():
54
+ warnings.filterwarnings("ignore", category=InsecureRequestWarning)
55
+ yield
56
+
57
+
58
+ @dataclass
59
+ class UploadFile:
60
+ """Represents a file to upload in a multipart form request."""
61
+
62
+ field_name: str
63
+ filename: str
64
+ content: BufferedIOBase
65
+ content_type: Optional[str] = None
66
+
67
+
68
+ class BaseClient:
69
+ """
70
+ Shared state and HTTP plumbing for every feature client mixin.
71
+
72
+ Holds the connection config, cached auth token, and the core `make_request` dispatcher
73
+ used by all per-feature mixins that compose `DataMasqueClient`.
74
+ """
75
+
76
+ token: str = ""
77
+ base_url: str
78
+ username: str
79
+ password: Optional[str]
80
+ verify_ssl: bool
81
+ token_source: Optional[Callable[[], str]]
82
+
83
+ def __init__(self, connection_config: DataMasqueInstanceConfig) -> None:
84
+ self.base_url = connection_config.base_url
85
+ self.username = connection_config.username
86
+ self.password = connection_config.password
87
+ self.verify_ssl = connection_config.verify_ssl
88
+ self.token_source = connection_config.token_source
89
+
90
+ @contextmanager
91
+ def _maybe_suppress_insecure_warning(self) -> Iterator[None]:
92
+ # `urllib3.disable_warnings` is global,
93
+ # so instead we scope the suppression to this single call via `warnings.catch_warnings`.
94
+ # Clients that leave `verify_ssl=True` never touch the warning filter at all.
95
+ with suppress_insecure_warning_if_needed(self.verify_ssl):
96
+ yield
97
+
98
+ def authenticate(self) -> None:
99
+ """
100
+ Authenticate against the DataMasque server and cache the resulting token.
101
+
102
+ Called implicitly by `make_request` on the first request and on a 401 response,
103
+ so you generally do not need to call this yourself.
104
+
105
+ When the client was constructed with a `token_source` callable,
106
+ the callable is invoked instead of POSTing to the login endpoint.
107
+ """
108
+
109
+ if self.token_source is not None:
110
+ self.token = f"Token {self.token_source()}"
111
+ logger.debug("Login Success via token_source")
112
+ return
113
+
114
+ login_url = urljoin(self.base_url, "/api/auth/token/login/")
115
+ response = self.make_request(
116
+ method="POST",
117
+ path=login_url,
118
+ data={"username": self.username, "password": self.password},
119
+ requires_authorization=False,
120
+ require_status_check=False,
121
+ )
122
+
123
+ if response.status_code == 200:
124
+ self.token = f"Token {response.json()['key']}"
125
+ logger.debug("Login Success: %s", self.token)
126
+ else:
127
+ logger.error("Login Failure")
128
+ raise DataMasqueApiError(
129
+ "Unable to login to DataMasque Client, please ensure that login credentials are correct",
130
+ response=response,
131
+ )
132
+
133
+ def healthcheck(self) -> None:
134
+ """
135
+ Pings the server's unauthenticated healthcheck endpoint.
136
+
137
+ Returns without error when the server is up and ready to accept requests.
138
+ """
139
+
140
+ self.make_request("GET", "/api/healthcheck/", requires_authorization=False)
141
+
142
+ def make_request(
143
+ self,
144
+ method: str,
145
+ path: str,
146
+ *,
147
+ data: Optional[dict] = None,
148
+ params: Optional[dict] = None,
149
+ files: Optional[list[UploadFile]] = None,
150
+ requires_authorization: bool = True,
151
+ require_status_check: bool = True,
152
+ ) -> Response:
153
+ """
154
+ Sends an HTTP request to the DataMasque server and returns the `Response`.
155
+
156
+ When `requires_authorization` is true (the default),
157
+ the current auth token is sent in the request headers,
158
+ and a 401 response triggers one re-auth-and-retry.
159
+
160
+ Args:
161
+ method: HTTP method (e.g. `"GET"`, `"POST"`).
162
+ path: URL path such as `/api/license/`.
163
+ Must include a trailing slash.
164
+ data: Request body.
165
+ Serialised as JSON for normal requests,
166
+ and as multipart form data when `files` is also provided.
167
+ params: Query string parameters,
168
+ merged into the URL as `?key=value&...`.
169
+ files: Multipart form uploads;
170
+ when set, the request is sent as `multipart/form-data` and `data` is sent alongside as form fields.
171
+ requires_authorization: When true (the default),
172
+ the current auth token is attached and a 401 triggers one re-auth-and-retry.
173
+ require_status_check: When true (the default),
174
+ a non-2xx response raises one of the exceptions below;
175
+ when false, the `Response` is returned regardless of status so the caller can inspect it directly.
176
+
177
+ Raises:
178
+ DataMasqueApiError: When `require_status_check` is true (the default) and the response is non-2xx.
179
+ The response object is available on the `.response` attribute of the exception.
180
+ DataMasqueNotReadyError: When `require_status_check` is true and the response is 502.
181
+ 502 typically indicates the server is still starting up.
182
+ DataMasqueTransportError: When the request fails before any response is received
183
+ (connection refused, timeout, DNS failure, SSL handshake failure, etc.).
184
+ """
185
+
186
+ url = urljoin(self.base_url, path)
187
+
188
+ def send() -> Response:
189
+ headers: Optional[dict] = {"Authorization": self.token} if requires_authorization else None
190
+ try:
191
+ with self._maybe_suppress_insecure_warning():
192
+ if files:
193
+ files_payload = {f.field_name: (f.filename, f.content, f.content_type or "") for f in files}
194
+ return requests.request(
195
+ method,
196
+ url,
197
+ data=data,
198
+ params=params,
199
+ headers=headers,
200
+ files=files_payload,
201
+ verify=self.verify_ssl,
202
+ )
203
+ return requests.request(
204
+ method, url, json=data, params=params, headers=headers, verify=self.verify_ssl
205
+ )
206
+ except requests.RequestException as e:
207
+ raise DataMasqueTransportError(f"Failed to reach DataMasque server at {url}: {e}") from e
208
+
209
+ response = send()
210
+ if response.status_code == 401:
211
+ logger.debug("Re-authenticating")
212
+ self.authenticate()
213
+ # Reset file pointers so the retry doesn't send empty files
214
+ if files:
215
+ for f in files:
216
+ f.content.seek(0)
217
+ response = send()
218
+
219
+ if require_status_check:
220
+ self._raise_for_status(response, request_data=data)
221
+
222
+ return response
223
+
224
+ def _raise_for_status(self, response: Response, *, request_data: Optional[dict] = None) -> None:
225
+ if response.ok:
226
+ return
227
+
228
+ if response.status_code == 502:
229
+ # Bad Gateway error returned when DM is still initializing
230
+ raise DataMasqueNotReadyError
231
+
232
+ # Redact sensitive keys from the response body before logging,
233
+ # in case the server echoes back caller-supplied credentials in an error payload.
234
+ try:
235
+ response_body: Any = response.json()
236
+ except ValueError:
237
+ response_body = response.text or response.content
238
+ logger.error("Error when calling API: %s", _redact_sensitive(response_body))
239
+ if isinstance(request_data, dict):
240
+ logger.error("Request data was: %s", _redact_sensitive(request_data))
241
+
242
+ raise DataMasqueApiError(
243
+ f"API request to {response.request.url} failed with status {response.status_code}",
244
+ response=response,
245
+ )
246
+
247
+ def _delete_if_exists(self, path: str, *, params: Optional[dict] = None) -> None:
248
+ response = self.make_request("DELETE", path, params=params, require_status_check=False)
249
+ if response.status_code == 404:
250
+ return
251
+
252
+ self._raise_for_status(response)
253
+
254
+ def _iter_paginated(
255
+ self,
256
+ path: str,
257
+ model: Type[_T],
258
+ *,
259
+ params: Optional[dict] = None,
260
+ page_size: int = 100,
261
+ ) -> Iterator[_T]:
262
+ """
263
+ Iterate every `T` across all pages of an admin-server list endpoint.
264
+
265
+ Opts into pagination by sending `limit`/`offset` on the first request,
266
+ then follows the absolute `next` URL returned by the server.
267
+ """
268
+
269
+ first_params = dict(params or {})
270
+ first_params.setdefault("limit", page_size)
271
+ first_params.setdefault("offset", 0)
272
+
273
+ url: Optional[str] = path
274
+ current_params: Optional[dict] = first_params
275
+
276
+ while url:
277
+ response = self.make_request("GET", url, params=current_params)
278
+ data = response.json()
279
+ yield from (model.model_validate(item) for item in data["results"])
280
+ url = data.get("next")
281
+ # The `next` URL is absolute and already contains the pagination cursor;
282
+ # do not re-send our initial params alongside it.
283
+ current_params = None
284
+
285
+
286
+ def read_file_or_content(file_or_content: FileOrContent, fallback_file_name: str) -> tuple[str, BufferedIOBase]:
287
+ """
288
+ Takes either a filename (str), file path (Path), or some file content.
289
+
290
+ Where content is provided, the filename is given by `fallback_file_name`.
291
+ Returns a tuple of the filename and a BytesIO containing the file content.
292
+ """
293
+
294
+ if isinstance(file_or_content, (str, Path)):
295
+ file_name = Path(file_or_content).name
296
+ with open(file_or_content, "rb") as file:
297
+ return file_name, BytesIO(file.read())
298
+
299
+ if isinstance(file_or_content, bytes):
300
+ file_or_content = BytesIO(file_or_content)
301
+ elif isinstance(file_or_content, TextIOBase):
302
+ file_or_content = BytesIO(file_or_content.read().encode())
303
+
304
+ return fallback_file_name, file_or_content
@@ -0,0 +1,64 @@
1
+ import logging
2
+
3
+ from datamasque.client.base import BaseClient
4
+ from datamasque.client.exceptions import DataMasqueException
5
+ from datamasque.client.models.connection import ConnectionConfig, ConnectionId, validate_connection
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+
10
+ class ConnectionClient(BaseClient):
11
+ """Connection-related API methods. Mixed into `DataMasqueClient`."""
12
+
13
+ def list_connections(self) -> list[ConnectionConfig]:
14
+ """
15
+ Lists all configured connections.
16
+
17
+ Note that database passwords and connection strings are returned encrypted over the API
18
+ and so are `None` on the returned `ConnectionConfig` objects.
19
+ """
20
+
21
+ response = self.make_request("GET", "/api/connections/")
22
+ return [validate_connection(payload) for payload in response.json()]
23
+
24
+ def create_or_update_connection(self, connection_config: ConnectionConfig) -> ConnectionConfig:
25
+ """Creates or updates the connection in DM, and sets the `id` field on the given `connection_config`."""
26
+
27
+ connection_id = connection_config.id
28
+
29
+ all_connections = self.list_connections()
30
+ connections_matching_name = [
31
+ connection for connection in all_connections if connection.name == connection_config.name
32
+ ]
33
+ if connections_matching_name:
34
+ connection_id = connections_matching_name[0].id
35
+
36
+ data = {
37
+ "version": "1.0",
38
+ } | connection_config.model_dump(exclude_none=True, by_alias=True, mode="json")
39
+ if connection_id is None:
40
+ response = self.make_request("POST", "/api/connections/", data=data)
41
+ else:
42
+ response = self.make_request("PUT", f"/api/connections/{connection_id}/", data=data)
43
+
44
+ connection_data = response.json()
45
+ server_connection_id = ConnectionId(connection_data["id"])
46
+ logger.debug("%s creation successful", type(connection_config).__name__)
47
+ connection_config.id = server_connection_id
48
+ return connection_config
49
+
50
+ def delete_connection_by_id_if_exists(self, connection_id: ConnectionId) -> None:
51
+ """Deletes the connection with the given ID. No-op if the connection does not exist."""
52
+
53
+ self._delete_if_exists(f"/api/connections/{connection_id}/")
54
+
55
+ def delete_connection_by_name_if_exists(self, connection_name: str) -> None:
56
+ """Deletes the connection with the given name. No-op if the connection does not exist."""
57
+
58
+ all_connections = self.list_connections()
59
+ connections_matching_name = [connection for connection in all_connections if connection.name == connection_name]
60
+ for connection in connections_matching_name:
61
+ if connection.id is None:
62
+ raise DataMasqueException(f'Server returned a connection named "{connection.name}" without an `id`.')
63
+
64
+ self.delete_connection_by_id_if_exists(connection.id)