datamasque-python 1.0.5__tar.gz → 1.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. datamasque_python-1.1.0/.github/workflows/release-testpypi.yml +99 -0
  2. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/HISTORY.rst +23 -0
  3. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/PKG-INFO +1 -1
  4. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/datamasque/client/__init__.py +29 -1
  5. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/datamasque/client/discovery.py +175 -0
  6. datamasque_python-1.1.0/datamasque/client/discovery_configs.py +158 -0
  7. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/datamasque/client/dmclient.py +2 -0
  8. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/datamasque/client/exceptions.py +17 -0
  9. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/datamasque/client/files.py +1 -1
  10. datamasque_python-1.1.0/datamasque/client/models/discovery.py +381 -0
  11. datamasque_python-1.1.0/datamasque/client/models/discovery_config.py +53 -0
  12. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/datamasque/client/models/files.py +11 -0
  13. datamasque_python-1.1.0/datamasque/client/models/git.py +60 -0
  14. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/datamasque/client/models/ruleset.py +12 -7
  15. datamasque_python-1.1.0/datamasque/client/models/ruleset_library.py +25 -0
  16. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/datamasque/client/models/runs.py +9 -0
  17. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/datamasque/client/models/status.py +9 -0
  18. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/datamasque/client/ruleset_libraries.py +9 -4
  19. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/datamasque/client/rulesets.py +22 -14
  20. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/pyproject.toml +1 -1
  21. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/setup.cfg +1 -1
  22. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/tests/test_discovery.py +536 -0
  23. datamasque_python-1.1.0/tests/test_discovery_configs.py +520 -0
  24. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/tests/test_files.py +39 -0
  25. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/tests/test_ruleset_library.py +120 -55
  26. datamasque_python-1.1.0/tests/test_rulesets.py +277 -0
  27. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/tests/test_runs.py +57 -1
  28. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/uv.lock +1 -1
  29. datamasque_python-1.0.5/.github/workflows/release-testpypi.yml +0 -60
  30. datamasque_python-1.0.5/datamasque/client/models/discovery.py +0 -229
  31. datamasque_python-1.0.5/datamasque/client/models/ruleset_library.py +0 -22
  32. datamasque_python-1.0.5/tests/test_rulesets.py +0 -119
  33. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/.editorconfig +0 -0
  34. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/.github/workflows/ci.yml +0 -0
  35. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/.github/workflows/release.yml +0 -0
  36. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/.gitignore +0 -0
  37. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/.readthedocs.yaml +0 -0
  38. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/CONTRIBUTING.rst +0 -0
  39. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/LICENSE +0 -0
  40. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/MANIFEST.in +0 -0
  41. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/Makefile +0 -0
  42. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/NOTICE +0 -0
  43. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/README.rst +0 -0
  44. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/datamasque/client/base.py +0 -0
  45. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/datamasque/client/connections.py +0 -0
  46. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/datamasque/client/ifm.py +0 -0
  47. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/datamasque/client/license.py +0 -0
  48. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/datamasque/client/models/__init__.py +0 -0
  49. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/datamasque/client/models/connection.py +0 -0
  50. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/datamasque/client/models/data_selection.py +0 -0
  51. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/datamasque/client/models/dm_instance.py +0 -0
  52. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/datamasque/client/models/ifm.py +0 -0
  53. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/datamasque/client/models/license.py +0 -0
  54. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/datamasque/client/models/pagination.py +0 -0
  55. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/datamasque/client/models/user.py +0 -0
  56. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/datamasque/client/py.typed +0 -0
  57. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/datamasque/client/runs.py +0 -0
  58. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/datamasque/client/settings.py +0 -0
  59. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/datamasque/client/users.py +0 -0
  60. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/docs/Makefile +0 -0
  61. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/docs/client.models.rst +0 -0
  62. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/docs/client.rst +0 -0
  63. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/docs/conf.py +0 -0
  64. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/docs/contributing.rst +0 -0
  65. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/docs/history.rst +0 -0
  66. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/docs/index.rst +0 -0
  67. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/docs/installation.rst +0 -0
  68. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/docs/make.bat +0 -0
  69. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/docs/modules.rst +0 -0
  70. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/docs/readme.rst +0 -0
  71. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/docs/usage.rst +0 -0
  72. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/tests/__init__.py +0 -0
  73. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/tests/conftest.py +0 -0
  74. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/tests/helpers.py +0 -0
  75. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/tests/test_base.py +0 -0
  76. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/tests/test_connections.py +0 -0
  77. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/tests/test_ifm.py +0 -0
  78. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/tests/test_license.py +0 -0
  79. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/tests/test_pagination.py +0 -0
  80. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/tests/test_settings.py +0 -0
  81. {datamasque_python-1.0.5 → datamasque_python-1.1.0}/tests/test_users.py +0 -0
@@ -0,0 +1,99 @@
1
+ name: Release (TestPyPI)
2
+
3
+ on:
4
+ workflow_dispatch:
5
+ inputs:
6
+ ref:
7
+ description: 'Branch, tag, or commit SHA to build from'
8
+ required: true
9
+ default: 'main'
10
+ dev_version:
11
+ description: >-
12
+ Optional PEP 440 pre/dev version override (e.g. 1.1.0.dev1, 1.1.0a2, 1.1.0rc1).
13
+ Leave blank to use the version in pyproject.toml as-is.
14
+ required: false
15
+ default: ''
16
+
17
+ jobs:
18
+ build:
19
+ name: Build sdist and wheel
20
+ runs-on: ubuntu-latest
21
+ steps:
22
+ - uses: actions/checkout@v4
23
+ with:
24
+ ref: ${{ inputs.ref }}
25
+
26
+ - name: Set up uv
27
+ uses: astral-sh/setup-uv@v5
28
+ with:
29
+ enable-cache: true
30
+
31
+ - name: Set up Python
32
+ run: uv python install 3.12
33
+
34
+ - name: Apply dev_version override
35
+ if: inputs.dev_version != ''
36
+ # Pass the input through the environment, never interpolated into the script body:
37
+ # `${{ }}` is expanded into the script text before the shell runs, so splicing it inline
38
+ # would let a crafted dev_version inject shell commands before the validation below could reject it.
39
+ env:
40
+ DEV_VERSION: ${{ inputs.dev_version }}
41
+ run: |
42
+ # Reject anything that isn't a pre-release / dev version — final releases must go through release.yml.
43
+ if ! printf '%s' "${DEV_VERSION}" | grep -Eq '^[0-9]+\.[0-9]+\.[0-9]+(a|b|rc|\.dev)[0-9]+$'; then
44
+ echo "::error::dev_version '${DEV_VERSION}' is not a PEP 440 pre/dev version (must end in aN, bN, rcN, or .devN)"
45
+ exit 1
46
+ fi
47
+ uv run python -c "
48
+ import os, re, pathlib
49
+ version = os.environ['DEV_VERSION']
50
+ path = pathlib.Path('pyproject.toml')
51
+ text = path.read_text()
52
+ new = re.sub(r'^version\s*=\s*\".*\"', f'version = \"{version}\"', text, count=1, flags=re.M)
53
+ if new == text:
54
+ raise SystemExit('Failed to locate version line in pyproject.toml')
55
+ path.write_text(new)
56
+ "
57
+
58
+ - name: Show package version
59
+ env:
60
+ REF: ${{ inputs.ref }}
61
+ run: |
62
+ VERSION="$(uv run python -c 'import tomllib; print(tomllib.loads(open("pyproject.toml","rb").read().decode())["project"]["version"])')"
63
+ echo "Publishing version: ${VERSION} (from ref ${REF})"
64
+ echo "::notice title=TestPyPI version::${VERSION} from ${REF}"
65
+
66
+ - name: Build
67
+ run: uv build
68
+
69
+ - name: Validate distributions
70
+ run: uvx twine check dist/*
71
+
72
+ - name: Upload distributions
73
+ uses: actions/upload-artifact@v4
74
+ with:
75
+ name: dist-testpypi
76
+ path: dist/
77
+ retention-days: 7
78
+
79
+ publish:
80
+ name: Publish to TestPyPI
81
+ needs: build
82
+ runs-on: ubuntu-latest
83
+ environment:
84
+ name: testpypi
85
+ url: https://test.pypi.org/p/datamasque-python
86
+ permissions:
87
+ id-token: write
88
+ contents: read
89
+ steps:
90
+ - name: Download distributions
91
+ uses: actions/download-artifact@v4
92
+ with:
93
+ name: dist-testpypi
94
+ path: dist/
95
+
96
+ - name: Publish to TestPyPI
97
+ uses: pypa/gh-action-pypi-publish@release/v1
98
+ with:
99
+ repository-url: https://test.pypi.org/legacy/
@@ -2,6 +2,29 @@
2
2
  History
3
3
  =======
4
4
 
5
+ 1.1.0 (2026-06-24)
6
+ ------------------
7
+
8
+ * Added discovery configuration models and management APIs.
9
+ * Added schema-discovery and file-data-discovery APIs that take a saved discovery configuration
10
+ (``start_schema_discovery_run_from_config`` / ``start_file_data_discovery_run_from_config``).
11
+ Adoption is recommended; the older APIs that take individual options will be deprecated in a future release.
12
+ * Corrected the file-data-discovery ``include``/``skip`` filter syntax and added ``ignore_rules`` support.
13
+ * Added ``InvalidDiscoveryConfigError`` and ``DiscoveryConfigNotFoundError``,
14
+ raised when a discovery run can't start due to an unusable or missing discovery config.
15
+ * Added ``get_discovery_run_config_snapshot_yaml`` to retrieve the discovery-config YAML
16
+ that was effective at the start of a given discovery run.
17
+ * Added ``is_user_subscribed`` to ``MaskingRunRequest`` to subscribe the requesting user to a run's email notifications.
18
+ * Added ``auto_pull`` / ``auto_pull_branch`` to ``MaskingRunOptions``
19
+ to refresh the run's ruleset from git before starting.
20
+ * Added ``validation_error`` (and ``validation_error_type`` for rulesets) to ``Ruleset`` and ``RulesetLibrary``.
21
+ * Exposed git provenance on ``Ruleset`` and ``RulesetLibrary`` as a nested ``git`` field (``GitSnapshot``).
22
+ * Read-only fields (``id``, ``is_valid``, ``validation_error``, etc.)
23
+ are no longer echoed back in ``Ruleset`` / ``RulesetLibrary`` create/update request bodies.
24
+ * Fixed ``SslZipFile`` uploads to send the required ``database_type=mysql`` form field.
25
+ * **Breaking:** ``delete_ruleset_by_name_if_exists`` now requires a ``ruleset_type`` argument,
26
+ since ruleset names are unique only per type.
27
+
5
28
  1.0.5 (2026-06-18)
6
29
  ------------------
7
30
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datamasque-python
3
- Version: 1.0.5
3
+ Version: 1.1.0
4
4
  Summary: Official Python client for the DataMasque data-masking API.
5
5
  Project-URL: Homepage, https://datamasque.com/
6
6
  Project-URL: Documentation, https://datamasque-python.readthedocs.io/
@@ -17,8 +17,10 @@ from datamasque.client.exceptions import (
17
17
  DataMasqueNotReadyError,
18
18
  DataMasqueTransportError,
19
19
  DataMasqueUserError,
20
+ DiscoveryConfigNotFoundError,
20
21
  FailedToStartError,
21
22
  IfmAuthError,
23
+ InvalidDiscoveryConfigError,
22
24
  InvalidLibraryError,
23
25
  InvalidRulesetError,
24
26
  RunNotCancellableError,
@@ -54,10 +56,15 @@ from datamasque.client.models.data_selection import (
54
56
  from datamasque.client.models.discovery import (
55
57
  ConstraintColumns,
56
58
  DiscoveryMatch,
59
+ FileDataDiscoveryFromConfigRequest,
60
+ FileDataDiscoveryOptions,
61
+ FileDataDiscoveryRequest,
57
62
  FileDiscoveryFile,
58
63
  FileDiscoveryLocatorResult,
59
64
  FileDiscoveryMatch,
60
65
  FileDiscoveryResult,
66
+ FileFilter,
67
+ FileFilterMatchAgainst,
61
68
  FileRulesetGenerationRequest,
62
69
  ForeignKeyRef,
63
70
  InDataDiscoveryConfig,
@@ -65,11 +72,13 @@ from datamasque.client.models.discovery import (
65
72
  ReferencingForeignKey,
66
73
  RulesetGenerationRequest,
67
74
  SchemaDiscoveryColumn,
75
+ SchemaDiscoveryFromConfigRequest,
68
76
  SchemaDiscoveryPage,
69
77
  SchemaDiscoveryRequest,
70
78
  SchemaDiscoveryResult,
71
79
  TableConstraints,
72
80
  )
81
+ from datamasque.client.models.discovery_config import DiscoveryConfig, DiscoveryConfigId, DiscoveryConfigType
73
82
  from datamasque.client.models.dm_instance import DataMasqueInstanceConfig
74
83
  from datamasque.client.models.files import (
75
84
  DataMasqueFile,
@@ -79,6 +88,7 @@ from datamasque.client.models.files import (
79
88
  SnowflakeKeyFile,
80
89
  SslZipFile,
81
90
  )
91
+ from datamasque.client.models.git import GitSnapshot
82
92
  from datamasque.client.models.ifm import (
83
93
  DataMasqueIfmInstanceConfig,
84
94
  IfmLog,
@@ -104,7 +114,12 @@ from datamasque.client.models.runs import (
104
114
  RunInfo,
105
115
  UnfinishedRun,
106
116
  )
107
- from datamasque.client.models.status import AsyncRulesetGenerationTaskStatus, MaskingRunStatus, ValidationStatus
117
+ from datamasque.client.models.status import (
118
+ AsyncRulesetGenerationTaskStatus,
119
+ MaskingRunStatus,
120
+ ValidationErrorType,
121
+ ValidationStatus,
122
+ )
108
123
  from datamasque.client.models.user import User, UserId, UserRole
109
124
 
110
125
  __version__ = version("datamasque-python")
@@ -130,18 +145,28 @@ __all__ = [
130
145
  "DatabaseConnectionConfig",
131
146
  "DatabaseType",
132
147
  "DatabricksConnectionConfig",
148
+ "DiscoveryConfig",
149
+ "DiscoveryConfigId",
150
+ "DiscoveryConfigNotFoundError",
151
+ "DiscoveryConfigType",
133
152
  "DiscoveryMatch",
134
153
  "DynamoConnectionConfig",
135
154
  "FailedToStartError",
136
155
  "FileConnectionConfig",
156
+ "FileDataDiscoveryFromConfigRequest",
157
+ "FileDataDiscoveryOptions",
158
+ "FileDataDiscoveryRequest",
137
159
  "FileDiscoveryFile",
138
160
  "FileDiscoveryLocatorResult",
139
161
  "FileDiscoveryMatch",
140
162
  "FileDiscoveryResult",
163
+ "FileFilter",
164
+ "FileFilterMatchAgainst",
141
165
  "FileId",
142
166
  "FileOrContent",
143
167
  "FileRulesetGenerationRequest",
144
168
  "ForeignKeyRef",
169
+ "GitSnapshot",
145
170
  "HashColumnsTableConfig",
146
171
  "IfmAuthError",
147
172
  "IfmLog",
@@ -151,6 +176,7 @@ __all__ = [
151
176
  "IfmTokenInfo",
152
177
  "InDataDiscoveryConfig",
153
178
  "InDataDiscoveryRule",
179
+ "InvalidDiscoveryConfigError",
154
180
  "InvalidLibraryError",
155
181
  "InvalidRulesetError",
156
182
  "JsonPath",
@@ -182,6 +208,7 @@ __all__ = [
182
208
  "RunNotCancellableError",
183
209
  "S3ConnectionConfig",
184
210
  "SchemaDiscoveryColumn",
211
+ "SchemaDiscoveryFromConfigRequest",
185
212
  "SchemaDiscoveryPage",
186
213
  "SchemaDiscoveryRequest",
187
214
  "SchemaDiscoveryResult",
@@ -202,5 +229,6 @@ __all__ = [
202
229
  "UserId",
203
230
  "UserRole",
204
231
  "UserSelection",
232
+ "ValidationErrorType",
205
233
  "ValidationStatus",
206
234
  ]
@@ -4,11 +4,15 @@ from io import BufferedIOBase, BytesIO, TextIOBase
4
4
  from pathlib import Path
5
5
  from typing import Iterator, Optional, Union
6
6
 
7
+ from requests import Response
8
+
7
9
  from datamasque.client.base import BaseClient, UploadFile
8
10
  from datamasque.client.exceptions import (
9
11
  AsyncRulesetGenerationInProgressError,
10
12
  DataMasqueException,
13
+ DiscoveryConfigNotFoundError,
11
14
  FailedToStartError,
15
+ InvalidDiscoveryConfigError,
12
16
  )
13
17
  from datamasque.client.models.connection import ConnectionId
14
18
  from datamasque.client.models.data_selection import (
@@ -17,9 +21,12 @@ from datamasque.client.models.data_selection import (
17
21
  SelectedFileData,
18
22
  )
19
23
  from datamasque.client.models.discovery import (
24
+ FileDataDiscoveryFromConfigRequest,
25
+ FileDataDiscoveryRequest,
20
26
  FileDiscoveryResult,
21
27
  FileRulesetGenerationRequest,
22
28
  RulesetGenerationRequest,
29
+ SchemaDiscoveryFromConfigRequest,
23
30
  SchemaDiscoveryPage,
24
31
  SchemaDiscoveryRequest,
25
32
  SchemaDiscoveryResult,
@@ -185,6 +192,13 @@ class DiscoveryClient(BaseClient):
185
192
  with zip_file.open(file_info) as file:
186
193
  yaml_content = file.read().decode("utf-8")
187
194
  rulesets.append(Ruleset(name=Path(file_info.filename).stem, yaml=yaml_content))
195
+
196
+ if not rulesets:
197
+ raise DataMasqueException(
198
+ f"Ruleset generation for connection {connection_id} reported `finished` "
199
+ f"but the downloaded archive contained no rulesets."
200
+ )
201
+
188
202
  return rulesets
189
203
 
190
204
  generated = response.json().get("generated_ruleset")
@@ -230,6 +244,148 @@ class DiscoveryClient(BaseClient):
230
244
  response=response,
231
245
  )
232
246
 
247
+ def start_file_data_discovery_run(self, request: FileDataDiscoveryRequest) -> RunId:
248
+ """
249
+ Starts a file data discovery run with the given configuration.
250
+
251
+ Args:
252
+ request: A `FileDataDiscoveryRequest` with connection and optional settings.
253
+
254
+ Returns:
255
+ RunId: The ID of the started discovery run
256
+
257
+ Raises:
258
+ FailedToStartError: If run fails to start
259
+ """
260
+
261
+ data = request.model_dump(exclude_none=True, mode="json")
262
+ response = self.make_request(
263
+ "POST",
264
+ "/api/run-file-data-discovery/",
265
+ data=data,
266
+ require_status_check=False,
267
+ )
268
+ run_data = response.json()
269
+
270
+ if response.status_code == 201:
271
+ logger.info("File data discovery run %s started successfully", run_data["id"])
272
+ return RunId(run_data["id"])
273
+
274
+ logger.error("File data discovery run failed to start: %s", run_data)
275
+ raise FailedToStartError(
276
+ f"File data discovery run failed to start "
277
+ f"(server responded with status {response.status_code}: {response.text}).",
278
+ response=response,
279
+ )
280
+
281
+ def start_schema_discovery_run_from_config(self, request: SchemaDiscoveryFromConfigRequest) -> RunId:
282
+ """
283
+ Starts a schema discovery run from a saved discovery config.
284
+
285
+ Args:
286
+ request: A `SchemaDiscoveryFromConfigRequest` with the `connection` and a required `discovery_config`
287
+ (a saved config, or `None` for the server's defaults).
288
+
289
+ Returns:
290
+ RunId: The ID of the started discovery run
291
+
292
+ Raises:
293
+ DiscoveryConfigNotFoundError: the referenced discovery config cannot be found
294
+ (it does not exist or is the wrong type for the run).
295
+ InvalidDiscoveryConfigError: the config is present but not in a `valid` validation state,
296
+ or its YAML is rejected when the run starts.
297
+ FailedToStartError: the run failed to start for any other reason.
298
+ """
299
+
300
+ return self._start_discovery_run_from_config(request, "/api/schema-discovery/v2/", "Schema discovery")
301
+
302
+ def start_file_data_discovery_run_from_config(self, request: FileDataDiscoveryFromConfigRequest) -> RunId:
303
+ """
304
+ Starts a file data discovery run from a saved discovery config.
305
+
306
+ Args:
307
+ request: A `FileDataDiscoveryFromConfigRequest` with the `connection`,
308
+ a required `discovery_config` (a saved config, or `None` for the server's defaults),
309
+ and optional run `options`.
310
+
311
+ Returns:
312
+ RunId: The ID of the started discovery run
313
+
314
+ Raises:
315
+ DiscoveryConfigNotFoundError: the referenced discovery config cannot be found
316
+ (it does not exist or is the wrong type for the run).
317
+ InvalidDiscoveryConfigError: the config is present but not in a `valid` validation state,
318
+ or its YAML is rejected when the run starts.
319
+ FailedToStartError: the run failed to start for any other reason.
320
+ """
321
+
322
+ return self._start_discovery_run_from_config(request, "/api/run-file-data-discovery/v2/", "File data discovery")
323
+
324
+ def _start_discovery_run_from_config(
325
+ self,
326
+ request: Union[SchemaDiscoveryFromConfigRequest, FileDataDiscoveryFromConfigRequest],
327
+ path: str,
328
+ run_kind: str,
329
+ ) -> RunId:
330
+ """Post a saved-config discovery request and return its run id, classifying config errors on failure."""
331
+
332
+ data = request.model_dump(exclude_none=True, mode="json")
333
+ # The server requires `discovery_config` to be present; a null selects its built-in defaults,
334
+ # so send it explicitly rather than letting `exclude_none` drop a None.
335
+ data.setdefault("discovery_config", None)
336
+ response = self.make_request("POST", path, data=data, require_status_check=False)
337
+ run_data = response.json() if response.content else {}
338
+
339
+ if response.status_code == 201:
340
+ logger.info("%s run %s started successfully", run_kind, run_data["id"])
341
+ return RunId(run_data["id"])
342
+
343
+ logger.error("%s run failed to start: %s", run_kind, run_data)
344
+ self._maybe_raise_discovery_config_error(run_data, response, run_kind)
345
+ raise FailedToStartError(
346
+ f"{run_kind} run failed to start (server responded with status {response.status_code}: {response.text}).",
347
+ response=response,
348
+ )
349
+
350
+ # Server key for a 400 that means the discovery config itself is unusable:
351
+ # a missing or wrong-type config, or one not in a `valid` validation state (string messages),
352
+ # or re-validation of broken saved-config YAML when the run starts
353
+ # (a `{"message", "line_number", "column_number"}` dict).
354
+ DISCOVERY_CONFIG_ERROR_FIELD = "discovery_config"
355
+
356
+ # The phrase the server uses when the config id cannot be resolved (a missing or wrong-type config).
357
+ MISSING_DISCOVERY_CONFIG_SIGNATURE = "object does not exist"
358
+
359
+ @classmethod
360
+ def _maybe_raise_discovery_config_error(cls, run_data: object, response: Response, run_kind: str) -> None:
361
+ """Raise a discovery-config error if the server's 400 body cites the discovery config."""
362
+ if not isinstance(run_data, dict):
363
+ return
364
+
365
+ if not (errors := run_data.get(cls.DISCOVERY_CONFIG_ERROR_FIELD)):
366
+ return
367
+
368
+ detail = cls._format_discovery_config_error(errors)
369
+ if cls.MISSING_DISCOVERY_CONFIG_SIGNATURE in detail:
370
+ raise DiscoveryConfigNotFoundError(
371
+ f"{run_kind} run failed to start: the referenced discovery config could not be found: {detail}",
372
+ response=response,
373
+ )
374
+
375
+ raise InvalidDiscoveryConfigError(
376
+ f"{run_kind} run failed to start due to discovery config error: {detail}",
377
+ response=response,
378
+ )
379
+
380
+ @staticmethod
381
+ def _format_discovery_config_error(errors: object) -> str:
382
+ """Render the first server error, handling both string and `{message, ...}` dict items."""
383
+ first = errors[0] if isinstance(errors, list) and errors else errors
384
+ if isinstance(first, dict) and "message" in first:
385
+ return str(first["message"])
386
+
387
+ return str(first)
388
+
233
389
  def iter_schema_discovery_results(self, run_id: RunId) -> Iterator[SchemaDiscoveryResult]:
234
390
  """Lazily iterate all schema discovery results for a run via the paginated v2 endpoint."""
235
391
 
@@ -284,3 +440,22 @@ class DiscoveryClient(BaseClient):
284
440
 
285
441
  response = self.make_request("GET", f"api/runs/{run_id}/file-discovery-results/")
286
442
  return [FileDiscoveryResult.model_validate(d) for d in response.json()]
443
+
444
+ def get_discovery_run_config_snapshot_yaml(self, run_id: RunId, *, timezone: Optional[str] = None) -> str:
445
+ """
446
+ Returns the discovery-config YAML that was effective at the start of the given discovery run.
447
+
448
+ The YAML is prefixed with a commented provenance header naming the saved config
449
+ (or the built-in defaults) the run used, and whether it has since been modified or deleted.
450
+ `timezone`, a `±HH:MM` UTC offset, sets the timezone of the header timestamp; the server defaults to UTC.
451
+ """
452
+
453
+ params = {"timezone": timezone} if timezone is not None else None
454
+ response = self.make_request("GET", f"/api/discovery/runs/{run_id}/config-snapshot/", params=params)
455
+ with zipfile.ZipFile(BytesIO(response.content)) as zip_file:
456
+ names = zip_file.namelist()
457
+ if not names:
458
+ raise DataMasqueException(f"Discovery run {run_id} config snapshot archive contained no files.")
459
+
460
+ with zip_file.open(names[0]) as snapshot_file:
461
+ return snapshot_file.read().decode("utf-8")
@@ -0,0 +1,158 @@
1
+ import logging
2
+ from typing import Iterator, Optional
3
+
4
+ from datamasque.client.base import BaseClient
5
+ from datamasque.client.exceptions import DataMasqueApiError, DataMasqueException
6
+ from datamasque.client.models.discovery_config import DiscoveryConfig, DiscoveryConfigId, DiscoveryConfigType
7
+ from datamasque.client.models.pagination import Page
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ class DiscoveryConfigClient(BaseClient):
13
+ """Discovery config CRUD API methods. Mixed into `DataMasqueClient`."""
14
+
15
+ def iter_discovery_configs(self) -> Iterator[DiscoveryConfig]:
16
+ """Lazily iterate all discovery configs via the paginated endpoint."""
17
+
18
+ return self._iter_paginated("/api/discovery/configs/", model=DiscoveryConfig)
19
+
20
+ def list_discovery_configs(self) -> list[DiscoveryConfig]:
21
+ """
22
+ Lists all discovery configs.
23
+
24
+ Note: the YAML content is not included in the list response for performance.
25
+ Use `get_discovery_config` to retrieve the full config with its YAML body.
26
+ """
27
+
28
+ return list(self.iter_discovery_configs())
29
+
30
+ def get_discovery_config(self, config_id: DiscoveryConfigId) -> DiscoveryConfig:
31
+ """Retrieves a single discovery config by ID."""
32
+
33
+ response = self.make_request("GET", f"/api/discovery/configs/{config_id}/")
34
+ return DiscoveryConfig.model_validate(response.json())
35
+
36
+ def _get_discovery_config_id_by_name(
37
+ self, name: str, config_type: DiscoveryConfigType
38
+ ) -> Optional[DiscoveryConfigId]:
39
+ """Return the id of the config matching name and type via a single list request, or `None`."""
40
+
41
+ response = self.make_request(
42
+ "GET",
43
+ "/api/discovery/configs/",
44
+ params={"name_exact": name, "config_type": config_type.value, "limit": 1},
45
+ )
46
+ page = Page[DiscoveryConfig].model_validate(response.json())
47
+ if not page.results:
48
+ return None
49
+
50
+ config_id = page.results[0].id
51
+ if config_id is None:
52
+ raise DataMasqueApiError(
53
+ "Server returned a discovery config list entry without an `id`.",
54
+ response=response,
55
+ )
56
+
57
+ return config_id
58
+
59
+ def get_discovery_config_by_name(self, name: str, config_type: DiscoveryConfigType) -> Optional[DiscoveryConfig]:
60
+ """
61
+ Looks for a discovery config matching the given name and type (case-sensitive, exact match).
62
+
63
+ Config names are unique per type, so a type is required to identify a single config.
64
+ Returns it if found, otherwise `None`.
65
+ """
66
+
67
+ config_id = self._get_discovery_config_id_by_name(name, config_type)
68
+ if config_id is None:
69
+ return None
70
+
71
+ return self.get_discovery_config(config_id)
72
+
73
+ def create_discovery_config(self, config: DiscoveryConfig) -> DiscoveryConfig:
74
+ """
75
+ Creates a new discovery config on the server.
76
+
77
+ Sets the config's server-assigned fields
78
+ (`id`, `is_valid`, `validation_error`, `created`, `modified`) and returns the config.
79
+ """
80
+
81
+ data = config.model_dump(exclude_none=True, by_alias=True, mode="json")
82
+ response = self.make_request("POST", "/api/discovery/configs/", data=data)
83
+ created = DiscoveryConfig.model_validate(response.json())
84
+ config.id = created.id
85
+ config.is_valid = created.is_valid
86
+ config.validation_error = created.validation_error
87
+ config.created = created.created
88
+ config.modified = created.modified
89
+ logger.info('Creation of discovery config "%s" successful', config.name)
90
+ return config
91
+
92
+ def update_discovery_config(self, config: DiscoveryConfig) -> DiscoveryConfig:
93
+ """
94
+ Performs a full update of the discovery config.
95
+
96
+ The config must have its `id` set
97
+ (i.e., it must have been previously created or retrieved from the server).
98
+ """
99
+
100
+ if config.id is None:
101
+ raise ValueError("Cannot update a discovery config that has not been created yet (id is None)")
102
+
103
+ data = config.model_dump(exclude_none=True, by_alias=True, mode="json")
104
+ response = self.make_request("PUT", f"/api/discovery/configs/{config.id}/", data=data)
105
+ updated = DiscoveryConfig.model_validate(response.json())
106
+ config.is_valid = updated.is_valid
107
+ config.validation_error = updated.validation_error
108
+ config.modified = updated.modified
109
+ logger.debug('Update of discovery config "%s" successful', config.name)
110
+ return config
111
+
112
+ def create_or_update_discovery_config(self, config: DiscoveryConfig) -> DiscoveryConfig:
113
+ """
114
+ Creates the config if it doesn't exist, or updates it if one with the same name already exists.
115
+
116
+ Sets the config's `id` property.
117
+ """
118
+
119
+ existing_id = self._get_discovery_config_id_by_name(config.name, config.config_type)
120
+ if existing_id is not None:
121
+ config.id = existing_id
122
+ return self.update_discovery_config(config)
123
+
124
+ return self.create_discovery_config(config)
125
+
126
+ def delete_discovery_config_by_id_if_exists(self, config_id: DiscoveryConfigId) -> None:
127
+ """
128
+ Deletes the discovery config with the given ID.
129
+
130
+ No-op if the config does not exist.
131
+ """
132
+
133
+ self._delete_if_exists(f"/api/discovery/configs/{config_id}/")
134
+
135
+ def delete_discovery_config_by_name_if_exists(self, name: str, config_type: DiscoveryConfigType) -> None:
136
+ """
137
+ Deletes the discovery config with the given name and type.
138
+
139
+ Config names are unique per type, so a type is required to identify a single config.
140
+ No-op if no such config exists.
141
+ """
142
+
143
+ matching = [
144
+ config
145
+ for config in self.list_discovery_configs()
146
+ if config.name == name and config.config_type is config_type
147
+ ]
148
+ for config in matching:
149
+ if config.id is None:
150
+ raise DataMasqueException(f'Server returned a discovery config named "{config.name}" without an `id`.')
151
+
152
+ self.delete_discovery_config_by_id_if_exists(config.id)
153
+
154
+ def get_default_discovery_config_yaml(self) -> str:
155
+ """Returns the server's built-in default discovery configuration as a YAML string."""
156
+
157
+ response = self.make_request("GET", "/api/discovery/configs/defaults/")
158
+ return response.content.decode("utf-8")
@@ -1,6 +1,7 @@
1
1
  from datamasque.client.base import FileOrContent, UploadFile
2
2
  from datamasque.client.connections import ConnectionClient
3
3
  from datamasque.client.discovery import DiscoveryClient
4
+ from datamasque.client.discovery_configs import DiscoveryConfigClient
4
5
  from datamasque.client.files import FileClient
5
6
  from datamasque.client.license import LicenseClient
6
7
  from datamasque.client.ruleset_libraries import RulesetLibraryClient
@@ -20,6 +21,7 @@ class DataMasqueClient(
20
21
  FileClient,
21
22
  RunClient,
22
23
  DiscoveryClient,
24
+ DiscoveryConfigClient,
23
25
  UserClient,
24
26
  SettingsClient,
25
27
  ):
@@ -41,6 +41,23 @@ class InvalidLibraryError(FailedToStartError):
41
41
  """Specific error for when runs fail to start due to having an invalid ruleset library."""
42
42
 
43
43
 
44
+ class InvalidDiscoveryConfigError(FailedToStartError):
45
+ """
46
+ Raised when a discovery run fails to start because the referenced config is present but unusable.
47
+
48
+ The config exists but is not in a `valid` validation state, or its YAML is rejected when the run starts.
49
+ A config that cannot be found raises `DiscoveryConfigNotFoundError` instead.
50
+ """
51
+
52
+
53
+ class DiscoveryConfigNotFoundError(FailedToStartError):
54
+ """
55
+ Raised when a discovery run references a discovery config that cannot be found.
56
+
57
+ The config does not exist or is the wrong type for the run.
58
+ """
59
+
60
+
44
61
  class DataMasqueTransportError(DataMasqueException):
45
62
  """
46
63
  Raised when a request to the DataMasque server fails before any response is received.
@@ -30,7 +30,7 @@ class FileClient(BaseClient):
30
30
  response = self.make_request(
31
31
  "POST",
32
32
  file_type.get_url(),
33
- data={"name": file_name},
33
+ data={"name": file_name, **file_type.get_extra_form_data()},
34
34
  files=[
35
35
  UploadFile(
36
36
  field_name=file_type.get_content_param_name(),