datamasque-python 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,429 @@
1
+ """Connection configuration models for the DataMasque API."""
2
+
3
+ from enum import Enum
4
+ from typing import Any, Callable, Literal, NewType, Optional
5
+
6
+ from pydantic import BaseModel, ConfigDict, Field, model_serializer, model_validator
7
+
8
+ from datamasque.client.exceptions import DataMasqueException
9
+ from datamasque.client.models.files import FileId
10
+
11
+ ConnectionId = NewType("ConnectionId", str)
12
+
13
+
14
+ def unwrap_connection_id(value: Any) -> Any:
15
+ """
16
+ Coerce a `ConnectionConfig` to its `id`; pass other values through unchanged.
17
+
18
+ Used by request-model validators that accept either a `ConnectionId`
19
+ or a full `ConnectionConfig` for user convenience.
20
+ Raises `ValueError` if the config has no `id`
21
+ (i.e. the caller hasn't yet created it on the server).
22
+ """
23
+
24
+ if isinstance(value, ConnectionConfig):
25
+ if value.id is None:
26
+ raise ValueError("Connection has not been created yet (id is None)")
27
+ return value.id
28
+
29
+ return value
30
+
31
+
32
+ class DatabaseType(Enum):
33
+ """Supported database engines for `DatabaseConnectionConfig`."""
34
+
35
+ postgres = "postgres"
36
+ mysql = "mysql"
37
+ oracle = "oracle"
38
+ mariadb = "mariadb"
39
+ sql_server = "mssql"
40
+ redshift = "redshift"
41
+ dynamodb = "dynamo_db"
42
+ db2_luw = "db2_luw"
43
+ db2i = "db2i"
44
+ mssql_linked = "mssql_linked"
45
+ snowflake = "snowflake"
46
+ mongodb = "mongodb"
47
+
48
+
49
+ class SnowflakeStageLocation(str, Enum):
50
+ """Storage backend for a Snowflake connection's external stage."""
51
+
52
+ local = "local" # Not supported for production use
53
+ aws_s3 = "aws_s3"
54
+ azure_blob_storage = "azure_blob_storage"
55
+
56
+
57
+ class SseSelection(Enum):
58
+ """Mirrors the available options in the AWS console for DynamoDB Server-Side Encryption."""
59
+
60
+ dynamodb_owned = "dynamodb_owned"
61
+ aws_managed = "aws_managed"
62
+ account_managed = "account_managed"
63
+ use_source = "use_source"
64
+
65
+
66
+ class SseConfig(BaseModel):
67
+ """
68
+ Server-side encryption configuration for a DynamoDB connection.
69
+
70
+ `kms_key_id` is required when `selection` is `SseSelection.account_managed`
71
+ and must be `None` for every other selection.
72
+ """
73
+
74
+ model_config = ConfigDict(extra="forbid")
75
+
76
+ selection: SseSelection
77
+ kms_key_id: Optional[str] = None # Required when `selection` is `account_managed`; must be None otherwise
78
+
79
+ @model_validator(mode="after")
80
+ def _validate_kms_key(self) -> "SseConfig":
81
+ if self.selection is SseSelection.account_managed:
82
+ if self.kms_key_id is None:
83
+ raise ValueError(
84
+ "A KMS key ID must be specified when the SSE key is stored in your account, and owned "
85
+ "and managed by you."
86
+ )
87
+ elif self.kms_key_id is not None:
88
+ raise ValueError(
89
+ "A KMS key ID can only be specified when the SSE key is stored in your account, and "
90
+ "owned and managed by you."
91
+ )
92
+ return self
93
+
94
+
95
+ class ConnectionConfig(BaseModel):
96
+ """
97
+ Base class for all connection configurations.
98
+
99
+ Use `validate_connection(payload)` to deserialize an API response
100
+ into the appropriate concrete subclass.
101
+ """
102
+
103
+ model_config = ConfigDict(extra="allow", populate_by_name=True)
104
+
105
+ name: str
106
+ id: Optional[ConnectionId] = None
107
+
108
+
109
+ class DynamoConnectionConfig(ConnectionConfig):
110
+ """Connection configuration for a DynamoDB table."""
111
+
112
+ s3_bucket_name: Optional[str] = None
113
+ dynamo_append_datetime: bool = False
114
+ dynamo_append_suffix: str = "-MASKED"
115
+ dynamo_replace_tables: bool = True
116
+ dynamo_default_region: Optional[str] = None
117
+ dynamo_default_sse: SseConfig = SseConfig(selection=SseSelection.dynamodb_owned, kms_key_id=None)
118
+ iam_role_arn: Optional[str] = None
119
+ export_s3_prefix: Optional[str] = None
120
+
121
+ mask_type: Literal["database"] = "database"
122
+ db_type: Literal["dynamo_db"] = "dynamo_db"
123
+
124
+ @property
125
+ def database_type(self) -> DatabaseType:
126
+ return DatabaseType.dynamodb
127
+
128
+ @model_serializer(mode="wrap")
129
+ def _serialize(self, handler: Callable) -> dict:
130
+ d = handler(self)
131
+ # The admin server requires these placeholder fields for Dynamo connections.
132
+ d.setdefault("host", "")
133
+ d.setdefault("port", None)
134
+ d.setdefault("user", "")
135
+ d.setdefault("password", "")
136
+ d.setdefault("database", "")
137
+ d.setdefault("schema", "")
138
+ return d
139
+
140
+ @model_validator(mode="before")
141
+ @classmethod
142
+ def _strip_server_only_fields(cls, data: dict) -> dict:
143
+ """Drop fields that come back from the server but aren't part of this model."""
144
+ if isinstance(data, dict):
145
+ for key in ("password_encrypted", "dbpassword"):
146
+ data.pop(key, None)
147
+ return data
148
+
149
+
150
+ class MongoConnectionConfig(ConnectionConfig):
151
+ """Connection configuration for a MongoDB instance."""
152
+
153
+ host: str = ""
154
+ port: int = 27017
155
+ database: str = ""
156
+ user: str = ""
157
+ password: Optional[str] = None
158
+ auth_source: str = "admin"
159
+ tls: bool = False
160
+ direct_connection: bool = False
161
+ replica_set: str = ""
162
+ is_read_only: bool = False
163
+
164
+ mask_type: Literal["database"] = "database"
165
+ db_type: Literal["mongodb"] = "mongodb"
166
+
167
+ @property
168
+ def database_type(self) -> DatabaseType:
169
+ return DatabaseType.mongodb
170
+
171
+ @model_serializer(mode="wrap")
172
+ def _serialize(self, handler: Callable) -> dict:
173
+ d = handler(self)
174
+ # The server expects the password under the `dbpassword` key.
175
+ password = d.pop("password", None)
176
+ if password:
177
+ d["dbpassword"] = password
178
+ if not d.get("tls"):
179
+ d.pop("tls", None)
180
+ if not d.get("direct_connection"):
181
+ d.pop("direct_connection", None)
182
+ if not d.get("replica_set"):
183
+ d.pop("replica_set", None)
184
+ if not d.get("user"):
185
+ d.pop("user", None)
186
+ return d
187
+
188
+ @model_validator(mode="before")
189
+ @classmethod
190
+ def _strip_encrypted_password(cls, data: dict) -> dict:
191
+ if isinstance(data, dict):
192
+ for key in ("password_encrypted", "dbpassword"):
193
+ data.pop(key, None)
194
+ return data
195
+
196
+
197
+ class SnowflakeConnectionConfig(ConnectionConfig):
198
+ """
199
+ Connection configuration for a Snowflake database.
200
+
201
+ Supports password authentication (`password`)
202
+ and key-pair authentication (`snowflake_private_key` + optional `snowflake_private_key_passphrase`).
203
+ """
204
+
205
+ database: str
206
+ user: str
207
+ snowflake_account_id: str
208
+ snowflake_warehouse: str
209
+ snowflake_storage_integration_name: str
210
+ host: str = ""
211
+ port: Optional[int] = None
212
+ db_schema: Optional[str] = Field(default=None, alias="schema")
213
+ snowflake_role: str = ""
214
+ is_read_only: bool = False
215
+ password: Optional[str] = None
216
+ snowflake_private_key: Optional[FileId] = None
217
+ snowflake_private_key_passphrase: Optional[str] = None
218
+ snowflake_stage_location: Optional[SnowflakeStageLocation] = None
219
+ s3_bucket_name: Optional[str] = None
220
+ iam_role_arn: Optional[str] = None
221
+ snowflake_azure_container_name: Optional[str] = None
222
+ snowflake_azure_connection_string: Optional[str] = None
223
+ snowflake_azure_connection_string_encrypted: Optional[str] = None
224
+
225
+ mask_type: Literal["database"] = "database"
226
+ db_type: Literal["snowflake"] = "snowflake"
227
+
228
+ @property
229
+ def database_type(self) -> DatabaseType:
230
+ return DatabaseType.snowflake
231
+
232
+ @model_serializer(mode="wrap")
233
+ def _serialize(self, handler: Callable) -> dict:
234
+ d = handler(self)
235
+ # The server expects the password under the `dbpassword` key.
236
+ password = d.pop("password", None)
237
+ if password is not None:
238
+ d["dbpassword"] = password
239
+ # Snowflake requires `schema` even when the user hasn't set one.
240
+ if d.get("schema") is None:
241
+ d["schema"] = ""
242
+ return d
243
+
244
+ @model_validator(mode="before")
245
+ @classmethod
246
+ def _strip_encrypted_password(cls, data: dict) -> dict:
247
+ if isinstance(data, dict):
248
+ for key in ("password_encrypted", "dbpassword"):
249
+ data.pop(key, None)
250
+ return data
251
+
252
+
253
+ class DatabaseConnectionConfig(ConnectionConfig):
254
+ """
255
+ Connection configuration for a SQL database.
256
+
257
+ Use `DynamoConnectionConfig` for DynamoDB, `SnowflakeConnectionConfig` for Snowflake,
258
+ and `MongoConnectionConfig` for MongoDB.
259
+ """
260
+
261
+ host: str
262
+ port: int
263
+ database: str
264
+ user: str
265
+ password: Optional[str] = None
266
+ database_type: DatabaseType
267
+ engine_options: Optional[dict] = None
268
+ db_schema: Optional[str] = Field(default=None, alias="schema")
269
+ data_encoding: Optional[str] = None
270
+ is_read_only: bool = False
271
+ s3_bucket_name: Optional[str] = None
272
+ s3_redshift_iam_role: Optional[str] = None
273
+
274
+ @model_validator(mode="after")
275
+ def _reject_special_engines(self) -> "DatabaseConnectionConfig":
276
+ if self.database_type is DatabaseType.dynamodb:
277
+ raise ValueError("For DynamoDB, use the DynamoConnectionConfig class instead")
278
+ if self.database_type is DatabaseType.snowflake:
279
+ raise ValueError("For Snowflake, use the SnowflakeConnectionConfig class instead")
280
+ if self.database_type is DatabaseType.mongodb:
281
+ raise ValueError("For MongoDB, use the MongoConnectionConfig class instead")
282
+ return self
283
+
284
+ mask_type: Literal["database"] = "database"
285
+
286
+ @property
287
+ def db_type(self) -> str:
288
+ return self.database_type.value
289
+
290
+ @model_serializer(mode="wrap")
291
+ def _serialize(self, handler: Callable) -> dict:
292
+ d = handler(self)
293
+ # The server expects the password under the `dbpassword` key.
294
+ password = d.pop("password", None)
295
+ if password is not None:
296
+ d["dbpassword"] = password
297
+ d.pop("database_type", None)
298
+ d["db_type"] = self.db_type
299
+
300
+ # The server requires certain fields to be present or absent
301
+ # depending on the engine type.
302
+ db_type = self.database_type
303
+ if db_type in {DatabaseType.mysql, DatabaseType.mariadb} or d.get("schema") is None:
304
+ d["schema"] = ""
305
+ if db_type not in {DatabaseType.mysql, DatabaseType.mariadb, DatabaseType.oracle, DatabaseType.postgres}:
306
+ d.pop("data_encoding", None)
307
+ if db_type is not DatabaseType.redshift:
308
+ d.pop("s3_bucket_name", None)
309
+ d.pop("s3_redshift_iam_role", None)
310
+ if not d.get("engine_options"):
311
+ d.pop("engine_options", None)
312
+ return d
313
+
314
+ @model_validator(mode="before")
315
+ @classmethod
316
+ def _normalize_incoming(cls, data: dict) -> dict:
317
+ if isinstance(data, dict):
318
+ for key in ("password_encrypted", "dbpassword"):
319
+ data.pop(key, None)
320
+
321
+ # Determine the engine type from whichever key is present.
322
+ engine = data.get("database_type") or data.get("db_type", "")
323
+ if isinstance(engine, DatabaseType):
324
+ engine = engine.value
325
+
326
+ # The API returns a `schema` value for engines that don't have schemas (MySQL/MariaDB).
327
+ # Drop it so the model accurately reflects "not applicable".
328
+ if engine in {DatabaseType.mysql.value, DatabaseType.mariadb.value}:
329
+ data.pop("schema", None)
330
+
331
+ # Map `db_type` → `database_type` for incoming payloads.
332
+ if "db_type" in data and "database_type" not in data:
333
+ data["database_type"] = data.pop("db_type")
334
+ return data
335
+
336
+
337
+ class MssqlLinkedServerConnectionConfig(DatabaseConnectionConfig):
338
+ """Connection configuration for a Microsoft SQL Server linked-server setup."""
339
+
340
+ linked_server: str = ""
341
+
342
+
343
+ class FileConnectionConfig(ConnectionConfig):
344
+ """
345
+ Abstract base for file-based connections.
346
+
347
+ `is_file_mask_source` and `is_file_mask_destination`
348
+ control whether the connection can be used as the source, destination, or both of a masking run.
349
+ """
350
+
351
+ base_directory: str = ""
352
+ is_file_mask_source: bool = False
353
+ is_file_mask_destination: bool = False
354
+
355
+ mask_type: Literal["file"] = "file"
356
+
357
+
358
+ class S3ConnectionConfig(FileConnectionConfig):
359
+ """Connection configuration for an S3 bucket."""
360
+
361
+ type: Literal["s3_connection"] = "s3_connection"
362
+ bucket: str = ""
363
+ iam_role_arn: Optional[str] = None
364
+
365
+
366
+ class AzureConnectionConfig(FileConnectionConfig):
367
+ """
368
+ Connection configuration for an Azure Blob Storage container.
369
+
370
+ `connection_string` comes back encrypted from `list_connections`
371
+ and is write-only in practice.
372
+ """
373
+
374
+ type: Literal["azure_blob_connection"] = "azure_blob_connection"
375
+ container: str = ""
376
+ connection_string: Optional[str] = None
377
+
378
+ @model_validator(mode="before")
379
+ @classmethod
380
+ def _strip_encrypted_connection_string(cls, data: dict) -> dict:
381
+ if isinstance(data, dict):
382
+ # The API returns the encrypted form; drop it so `connection_string` stays None.
383
+ data.pop("connection_string_encrypted", None)
384
+ return data
385
+
386
+
387
+ class MountedShareConnectionConfig(FileConnectionConfig):
388
+ """Connection configuration for a mounted file share."""
389
+
390
+ type: Literal["mounted_share_connection"] = "mounted_share_connection"
391
+
392
+
393
+ FILE_TYPE_MAP: dict[str, type[FileConnectionConfig]] = {
394
+ "s3_connection": S3ConnectionConfig,
395
+ "azure_blob_connection": AzureConnectionConfig,
396
+ "mounted_share_connection": MountedShareConnectionConfig,
397
+ }
398
+
399
+ DB_TYPE_MAP: dict[str, type[ConnectionConfig]] = {
400
+ DatabaseType.dynamodb.value: DynamoConnectionConfig,
401
+ DatabaseType.mongodb.value: MongoConnectionConfig,
402
+ DatabaseType.snowflake.value: SnowflakeConnectionConfig,
403
+ DatabaseType.mssql_linked.value: MssqlLinkedServerConnectionConfig,
404
+ # others use the default `DatabaseConnectionConfig`
405
+ }
406
+
407
+
408
+ def validate_connection(payload: dict) -> ConnectionConfig:
409
+ """
410
+ Validate an API response payload into the appropriate concrete `ConnectionConfig` subclass.
411
+
412
+ Dispatches on `mask_type`, then on `type` (file) or `db_type` (database).
413
+ """
414
+
415
+ mask_type = payload.get("mask_type")
416
+
417
+ if mask_type == "file":
418
+ file_type = payload.get("type", "")
419
+ klass = FILE_TYPE_MAP.get(file_type)
420
+ if klass is None:
421
+ raise DataMasqueException(f"Unexpected file connection type: {file_type}")
422
+ return klass.model_validate(payload)
423
+
424
+ if mask_type == "database":
425
+ db_type = payload.get("db_type", "")
426
+ db_klass = DB_TYPE_MAP.get(db_type, DatabaseConnectionConfig)
427
+ return db_klass.model_validate(payload)
428
+
429
+ raise DataMasqueException(f"Unexpected connection mask_type: {mask_type}")
@@ -0,0 +1,62 @@
1
+ """Models related to data selection in endpoints such as /api/async-generate-ruleset."""
2
+
3
+ from typing import Optional, Union
4
+
5
+ from pydantic import BaseModel, ConfigDict
6
+
7
+ JsonPath = list[Union[str, int]]
8
+ """
9
+ A path into a JSON/structured document,
10
+ e.g. `["employees", 0, "firstName"]` or `["users", "*", "email"]`.
11
+ String elements are object keys (or the `*` wildcard), and integer elements are list indices.
12
+ """
13
+
14
+ Locator = Union[str, JsonPath]
15
+ """
16
+ A locator identifying a masked value within a file.
17
+ - Tabular files (CSV, parquet, fixed-width) use a bare string column name, e.g. `"email"`.
18
+ - Structured files (JSON) use a :data:`JsonPath`, e.g. `["employees", "*", "email"]`.
19
+ """
20
+
21
+
22
+ class UserSelection(BaseModel):
23
+ """Information about selected files and locators for file masking ruleset generation."""
24
+
25
+ model_config = ConfigDict(extra="forbid")
26
+
27
+ files: list[str]
28
+ locators: list[Locator]
29
+
30
+
31
+ class HashColumnsTableConfig(BaseModel):
32
+ """
33
+ Configuration for `hash_columns` at the table level.
34
+
35
+ `table` contains table-level hash column defaults applied to all selected columns.
36
+ `columns` contains per-column overrides (`None` or `[]` disables hashing for that column).
37
+ """
38
+
39
+ model_config = ConfigDict(extra="forbid")
40
+
41
+ table: Optional[list[str]] = None
42
+ columns: Optional[dict[str, Optional[list[str]]]] = None
43
+
44
+
45
+ class SelectedColumns(BaseModel):
46
+ """Selected columns and hash columns for database masking ruleset generation."""
47
+
48
+ model_config = ConfigDict(extra="forbid")
49
+
50
+ columns: dict[str, dict[str, list[str]]]
51
+ hash_columns: Optional[dict[str, dict[str, HashColumnsTableConfig]]] = None
52
+
53
+
54
+ class SelectedFileData(BaseModel):
55
+ """Selected files and locators for file masking ruleset generation."""
56
+
57
+ model_config = ConfigDict(extra="forbid")
58
+
59
+ user_selections: list[UserSelection]
60
+
61
+
62
+ SelectedData = Union[SelectedColumns, SelectedFileData]