datasynth-py 0.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3 @@
1
+ include README.md
2
+ include LICENSE
3
+ recursive-include datasynth_py *.py py.typed
@@ -0,0 +1,132 @@
1
+ Metadata-Version: 2.4
2
+ Name: datasynth-py
3
+ Version: 0.2.1
4
+ Summary: Python wrapper for DataSynth synthetic data generation
5
+ Author-email: EY ASU RnD <michael.ivertowski@ch.ey.com>
6
+ Maintainer-email: EY ASU RnD <michael.ivertowski@ch.ey.com>
7
+ License-Expression: Apache-2.0
8
+ Project-URL: Homepage, https://github.com/ey-asu-rnd/SyntheticData
9
+ Project-URL: Documentation, https://ey-asu-rnd.github.io/SyntheticData/
10
+ Project-URL: Repository, https://github.com/ey-asu-rnd/SyntheticData
11
+ Project-URL: Changelog, https://github.com/ey-asu-rnd/SyntheticData/blob/main/CHANGELOG.md
12
+ Project-URL: Issues, https://github.com/ey-asu-rnd/SyntheticData/issues
13
+ Keywords: synthetic-data,data-generation,testing,machine-learning,financial-data,accounting,journal-entries,fraud-detection
14
+ Classifier: Development Status :: 4 - Beta
15
+ Classifier: Intended Audience :: Developers
16
+ Classifier: Intended Audience :: Financial and Insurance Industry
17
+ Classifier: Intended Audience :: Science/Research
18
+ Classifier: Operating System :: OS Independent
19
+ Classifier: Programming Language :: Python :: 3
20
+ Classifier: Programming Language :: Python :: 3.9
21
+ Classifier: Programming Language :: Python :: 3.10
22
+ Classifier: Programming Language :: Python :: 3.11
23
+ Classifier: Programming Language :: Python :: 3.12
24
+ Classifier: Programming Language :: Python :: 3.13
25
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
26
+ Classifier: Topic :: Software Development :: Testing :: Mocking
27
+ Classifier: Typing :: Typed
28
+ Requires-Python: >=3.9
29
+ Description-Content-Type: text/markdown
30
+ Provides-Extra: cli
31
+ Requires-Dist: PyYAML>=6.0; extra == "cli"
32
+ Provides-Extra: memory
33
+ Requires-Dist: pandas>=2.0; extra == "memory"
34
+ Provides-Extra: streaming
35
+ Requires-Dist: websockets>=12.0; extra == "streaming"
36
+ Provides-Extra: all
37
+ Requires-Dist: PyYAML>=6.0; extra == "all"
38
+ Requires-Dist: pandas>=2.0; extra == "all"
39
+ Requires-Dist: websockets>=12.0; extra == "all"
40
+ Provides-Extra: dev
41
+ Requires-Dist: PyYAML>=6.0; extra == "dev"
42
+ Requires-Dist: pandas>=2.0; extra == "dev"
43
+ Requires-Dist: websockets>=12.0; extra == "dev"
44
+ Requires-Dist: pytest>=7.0; extra == "dev"
45
+ Requires-Dist: pytest-asyncio>=0.21; extra == "dev"
46
+ Requires-Dist: mypy>=1.0; extra == "dev"
47
+ Requires-Dist: ruff>=0.1; extra == "dev"
48
+
49
+ # datasynth-py
50
+
51
+ Python wrapper for the DataSynth synthetic data generator.
52
+
53
+ ## Installation
54
+
55
+ ### From PyPI
56
+
57
+ ```bash
58
+ pip install datasynth-py[all]
59
+ ```
60
+
61
+ Or install specific extras:
62
+
63
+ ```bash
64
+ pip install datasynth-py # Core only (no dependencies)
65
+ pip install datasynth-py[cli] # CLI generation (PyYAML)
66
+ pip install datasynth-py[memory] # In-memory tables (pandas)
67
+ pip install datasynth-py[streaming] # Streaming (websockets)
68
+ pip install datasynth-py[all] # All optional dependencies
69
+ ```
70
+
71
+ ### From Source
72
+
73
+ ```bash
74
+ cd python
75
+ pip install -e ".[all]"
76
+ ```
77
+
78
+ ## Quick Start
79
+
80
+ ```python
81
+ from datasynth_py import DataSynth, CompanyConfig, Config, GlobalSettings, ChartOfAccountsSettings
82
+
83
+ config = Config(
84
+ global_settings=GlobalSettings(
85
+ industry="retail",
86
+ start_date="2024-01-01",
87
+ period_months=12,
88
+ ),
89
+ companies=[
90
+ CompanyConfig(code="C001", name="Retail Corp", currency="USD", country="US"),
91
+ ],
92
+ chart_of_accounts=ChartOfAccountsSettings(complexity="small"),
93
+ )
94
+
95
+ synth = DataSynth()
96
+ result = synth.generate(config=config, output={"format": "csv", "sink": "temp_dir"})
97
+ print(result.output_dir)
98
+ ```
99
+
100
+ ## Using Blueprints
101
+
102
+ ```python
103
+ from datasynth_py import DataSynth
104
+ from datasynth_py.config import blueprints
105
+
106
+ config = blueprints.retail_small(companies=4, transactions=10000)
107
+ synth = DataSynth()
108
+ result = synth.generate(config=config, output={"format": "parquet", "sink": "path", "path": "./output"})
109
+ ```
110
+
111
+ ## Requirements
112
+
113
+ The wrapper shells out to the `datasynth-data` CLI binary. Build it with:
114
+
115
+ ```bash
116
+ cargo build --release
117
+ export DATASYNTH_BINARY=target/release/datasynth-data
118
+ ```
119
+
120
+ Or pass `binary_path` when creating the client:
121
+
122
+ ```python
123
+ synth = DataSynth(binary_path="/path/to/datasynth-data")
124
+ ```
125
+
126
+ ## Documentation
127
+
128
+ See the [Python Wrapper Guide](../docs/src/user-guide/python-wrapper.md) for complete documentation.
129
+
130
+ ## License
131
+
132
+ Apache 2.0 License - see the main project LICENSE file.
@@ -0,0 +1,84 @@
1
+ # datasynth-py
2
+
3
+ Python wrapper for the DataSynth synthetic data generator.
4
+
5
+ ## Installation
6
+
7
+ ### From PyPI
8
+
9
+ ```bash
10
+ pip install datasynth-py[all]
11
+ ```
12
+
13
+ Or install specific extras:
14
+
15
+ ```bash
16
+ pip install datasynth-py # Core only (no dependencies)
17
+ pip install datasynth-py[cli] # CLI generation (PyYAML)
18
+ pip install datasynth-py[memory] # In-memory tables (pandas)
19
+ pip install datasynth-py[streaming] # Streaming (websockets)
20
+ pip install datasynth-py[all] # All optional dependencies
21
+ ```
22
+
23
+ ### From Source
24
+
25
+ ```bash
26
+ cd python
27
+ pip install -e ".[all]"
28
+ ```
29
+
30
+ ## Quick Start
31
+
32
+ ```python
33
+ from datasynth_py import DataSynth, CompanyConfig, Config, GlobalSettings, ChartOfAccountsSettings
34
+
35
+ config = Config(
36
+ global_settings=GlobalSettings(
37
+ industry="retail",
38
+ start_date="2024-01-01",
39
+ period_months=12,
40
+ ),
41
+ companies=[
42
+ CompanyConfig(code="C001", name="Retail Corp", currency="USD", country="US"),
43
+ ],
44
+ chart_of_accounts=ChartOfAccountsSettings(complexity="small"),
45
+ )
46
+
47
+ synth = DataSynth()
48
+ result = synth.generate(config=config, output={"format": "csv", "sink": "temp_dir"})
49
+ print(result.output_dir)
50
+ ```
51
+
52
+ ## Using Blueprints
53
+
54
+ ```python
55
+ from datasynth_py import DataSynth
56
+ from datasynth_py.config import blueprints
57
+
58
+ config = blueprints.retail_small(companies=4, transactions=10000)
59
+ synth = DataSynth()
60
+ result = synth.generate(config=config, output={"format": "parquet", "sink": "path", "path": "./output"})
61
+ ```
62
+
63
+ ## Requirements
64
+
65
+ The wrapper shells out to the `datasynth-data` CLI binary. Build it with:
66
+
67
+ ```bash
68
+ cargo build --release
69
+ export DATASYNTH_BINARY=target/release/datasynth-data
70
+ ```
71
+
72
+ Or pass `binary_path` when creating the client:
73
+
74
+ ```python
75
+ synth = DataSynth(binary_path="/path/to/datasynth-data")
76
+ ```
77
+
78
+ ## Documentation
79
+
80
+ See the [Python Wrapper Guide](../docs/src/user-guide/python-wrapper.md) for complete documentation.
81
+
82
+ ## License
83
+
84
+ Apache 2.0 License - see the main project LICENSE file.
@@ -0,0 +1,48 @@
1
+ """Python wrapper for DataSynth."""
2
+
3
+ from datasynth_py.client import DataSynth, GenerationResult, OutputSpec, StreamingSession
4
+ from datasynth_py.config import blueprints
5
+ from datasynth_py.config.models import (
6
+ AuditSettings,
7
+ BankingSettings,
8
+ ChartOfAccountsSettings,
9
+ CompanyConfig,
10
+ CompanySettings,
11
+ Config,
12
+ DataQualitySettings,
13
+ FraudSettings,
14
+ GlobalSettings,
15
+ GraphExportSettings,
16
+ OutputSettings,
17
+ ScenarioSettings,
18
+ TemporalDriftSettings,
19
+ TransactionSettings,
20
+ )
21
+ from datasynth_py.config.validation import ConfigValidationError
22
+ from datasynth_py.fingerprint import FidelityReport, FingerprintClient, FingerprintInfo
23
+
24
+ __all__ = [
25
+ "AuditSettings",
26
+ "BankingSettings",
27
+ "ChartOfAccountsSettings",
28
+ "CompanyConfig",
29
+ "CompanySettings",
30
+ "Config",
31
+ "ConfigValidationError",
32
+ "DataQualitySettings",
33
+ "DataSynth",
34
+ "FidelityReport",
35
+ "FingerprintClient",
36
+ "FingerprintInfo",
37
+ "FraudSettings",
38
+ "GenerationResult",
39
+ "GlobalSettings",
40
+ "GraphExportSettings",
41
+ "OutputSettings",
42
+ "OutputSpec",
43
+ "ScenarioSettings",
44
+ "StreamingSession",
45
+ "TemporalDriftSettings",
46
+ "TransactionSettings",
47
+ "blueprints",
48
+ ]
@@ -0,0 +1,344 @@
1
+ """Client entrypoint for the DataSynth Python wrapper."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import os
7
+ import pathlib
8
+ import subprocess
9
+ import tempfile
10
+ import urllib.error
11
+ import urllib.request
12
+ from dataclasses import dataclass, field
13
+ from typing import Any, AsyncIterator, Dict, List, Optional
14
+
15
+ import importlib.util
16
+
17
+ from datasynth_py.config.models import Config, MissingDependencyError
18
+
19
+
20
+ @dataclass(frozen=True)
21
+ class OutputSpec:
22
+ format: str = "csv"
23
+ sink: str = "temp_dir"
24
+ path: Optional[str] = None
25
+ compression: Optional[str] = None
26
+ table_format: str = "pandas"
27
+
28
+
29
+ @dataclass(frozen=True)
30
+ class GenerationResult:
31
+ output_dir: Optional[str] = None
32
+ tables: Optional[Dict[str, Any]] = None
33
+ metadata: Dict[str, Any] = field(default_factory=dict)
34
+
35
+
36
+ class DataSynth:
37
+ """Python wrapper for running DataSynth generation."""
38
+
39
+ def __init__(
40
+ self,
41
+ binary_path: Optional[str] = None,
42
+ server_url: str = "http://localhost:3000",
43
+ api_key: Optional[str] = None,
44
+ request_timeout: float = 30.0,
45
+ ) -> None:
46
+ self._binary_path = binary_path or os.environ.get("DATASYNTH_BINARY", "datasynth-data")
47
+ self._server_url = server_url.rstrip("/")
48
+ self._api_key = api_key
49
+ self._request_timeout = request_timeout
50
+ self._fingerprint_client: Optional["FingerprintClient"] = None
51
+
52
+ @property
53
+ def fingerprint(self) -> "FingerprintClient":
54
+ """Access fingerprint operations.
55
+
56
+ Returns:
57
+ FingerprintClient for extract, validate, info, evaluate operations.
58
+
59
+ Example:
60
+ >>> synth = DataSynth()
61
+ >>> synth.fingerprint.extract("./data/", "./fp.dsf")
62
+ >>> info = synth.fingerprint.info("./fp.dsf")
63
+ """
64
+ if self._fingerprint_client is None:
65
+ from datasynth_py.fingerprint import FingerprintClient
66
+ self._fingerprint_client = FingerprintClient(self._binary_path)
67
+ return self._fingerprint_client
68
+
69
+ def generate(
70
+ self,
71
+ config: Config,
72
+ output: Optional[OutputSpec | Dict[str, Any]] = None,
73
+ seed: Optional[int] = None,
74
+ ) -> GenerationResult:
75
+ config.validate()
76
+ output_spec = _coerce_output_spec(output)
77
+ if seed is not None:
78
+ config = config.override(**{"global": {"seed": seed}})
79
+ if output_spec.sink == "path" and not output_spec.path:
80
+ raise ValueError("OutputSpec.path must be set when sink='path'.")
81
+
82
+ output_dir = self._resolve_output_dir(output_spec)
83
+ config_path = self._write_config(config, output_dir, output_spec)
84
+ self._run_cli(config_path=config_path, output_dir=output_dir)
85
+
86
+ if output_spec.sink == "memory":
87
+ tables = _load_tables(output_dir, output_spec)
88
+ return GenerationResult(output_dir=None, tables=tables)
89
+ return GenerationResult(output_dir=output_dir, tables=None)
90
+
91
+ def stream(
92
+ self,
93
+ config: Optional[Config] = None,
94
+ events_per_second: Optional[int] = None,
95
+ max_events: Optional[int] = None,
96
+ inject_anomalies: Optional[bool] = None,
97
+ seed: Optional[int] = None,
98
+ ) -> "StreamingSession":
99
+ if config is not None:
100
+ config.validate()
101
+ payload = _config_to_server_payload(config, seed)
102
+ self._post_json("/api/config", payload)
103
+
104
+ stream_payload: Dict[str, Any] = {}
105
+ if events_per_second is not None:
106
+ stream_payload["events_per_second"] = events_per_second
107
+ if max_events is not None:
108
+ stream_payload["max_events"] = max_events
109
+ if inject_anomalies is not None:
110
+ stream_payload["inject_anomalies"] = inject_anomalies
111
+ self._post_json("/api/stream/start", stream_payload)
112
+ return StreamingSession(
113
+ server_url=self._server_url,
114
+ api_key=self._api_key,
115
+ request_timeout=self._request_timeout,
116
+ )
117
+
118
+ def _write_config(self, config: Config, output_dir: str, output_spec: OutputSpec) -> str:
119
+ yaml_spec = importlib.util.find_spec("yaml")
120
+ if yaml_spec is None:
121
+ raise MissingDependencyError(
122
+ "PyYAML is required to generate config files. Install with `pip install PyYAML`."
123
+ )
124
+ import yaml # type: ignore
125
+
126
+ payload = config.to_dict()
127
+
128
+ # Ensure output section exists with required fields
129
+ if "output" not in payload:
130
+ payload["output"] = {}
131
+ payload["output"]["output_directory"] = output_dir
132
+
133
+ # Map output format from OutputSpec
134
+ format_map = {"csv": "csv", "jsonl": "json", "parquet": "parquet"}
135
+ cli_format = format_map.get(output_spec.format, "csv")
136
+ payload["output"]["formats"] = [cli_format]
137
+
138
+ data = yaml.safe_dump(payload, sort_keys=False)
139
+ fd, path = tempfile.mkstemp(prefix="datasynth_", suffix=".yaml")
140
+ os.close(fd)
141
+ pathlib.Path(path).write_text(data, encoding="utf-8")
142
+ return path
143
+
144
+ def _resolve_output_dir(self, output: OutputSpec) -> str:
145
+ if output.sink == "path" and output.path:
146
+ return output.path
147
+ if output.sink == "temp_dir":
148
+ return tempfile.mkdtemp(prefix="datasynth_output_")
149
+ if output.sink == "memory":
150
+ return tempfile.mkdtemp(prefix="datasynth_output_")
151
+ raise ValueError(f"Unknown output sink: {output.sink}")
152
+
153
+ def _run_cli(self, config_path: str, output_dir: str) -> None:
154
+ command = [
155
+ self._binary_path,
156
+ "generate",
157
+ "--config",
158
+ config_path,
159
+ "--output",
160
+ output_dir,
161
+ ]
162
+ try:
163
+ subprocess.run(command, check=True, capture_output=True, text=True)
164
+ except FileNotFoundError as exc:
165
+ raise RuntimeError(
166
+ "datasynth-data binary not found. Build it with `cargo build --release` "
167
+ "and set DATASYNTH_BINARY or pass binary_path."
168
+ ) from exc
169
+ except subprocess.CalledProcessError as exc:
170
+ raise RuntimeError(
171
+ f"datasynth-data failed: {exc.stderr or exc.stdout}"
172
+ ) from exc
173
+
174
+ def _post_json(self, path: str, payload: Dict[str, Any]) -> Dict[str, Any]:
175
+ url = f"{self._server_url}{path}"
176
+ data = json.dumps(payload).encode("utf-8")
177
+ headers = {"Content-Type": "application/json"}
178
+ if self._api_key:
179
+ headers["X-API-Key"] = self._api_key
180
+ request = urllib.request.Request(url, data=data, headers=headers, method="POST")
181
+ try:
182
+ with urllib.request.urlopen(request, timeout=self._request_timeout) as response:
183
+ body = response.read().decode("utf-8")
184
+ except urllib.error.HTTPError as exc:
185
+ body = exc.read().decode("utf-8")
186
+ raise RuntimeError(f"Server error ({exc.code}): {body}") from exc
187
+ return json.loads(body) if body else {}
188
+
189
+
190
+ @dataclass(frozen=True)
191
+ class StreamingSession:
192
+ server_url: str
193
+ api_key: Optional[str]
194
+ request_timeout: float
195
+
196
+ def pause(self) -> Dict[str, Any]:
197
+ return self._control("/api/stream/pause")
198
+
199
+ def resume(self) -> Dict[str, Any]:
200
+ return self._control("/api/stream/resume")
201
+
202
+ def stop(self) -> Dict[str, Any]:
203
+ return self._control("/api/stream/stop")
204
+
205
+ def trigger_pattern(self, pattern: str) -> Dict[str, Any]:
206
+ """Trigger a pattern in the streaming session.
207
+
208
+ Args:
209
+ pattern: Pattern name (year_end_spike, period_end_spike, fraud_cluster, etc.)
210
+
211
+ Returns:
212
+ Response from the server.
213
+ """
214
+ return self._control(f"/api/stream/trigger/{pattern}")
215
+
216
+ def trigger_year_end(self) -> Dict[str, Any]:
217
+ """Trigger year-end closing patterns (high volume, accruals, adjustments)."""
218
+ return self.trigger_pattern("year_end_spike")
219
+
220
+ def trigger_month_end(self) -> Dict[str, Any]:
221
+ """Trigger month-end/period-end patterns."""
222
+ return self.trigger_pattern("period_end_spike")
223
+
224
+ def trigger_fraud_cluster(self) -> Dict[str, Any]:
225
+ """Trigger a cluster of fraud-related transactions."""
226
+ return self.trigger_pattern("fraud_cluster")
227
+
228
+ def trigger_quarter_end(self) -> Dict[str, Any]:
229
+ """Trigger quarter-end closing patterns."""
230
+ return self.trigger_pattern("quarter_end_spike")
231
+
232
+ async def events(self) -> AsyncIterator[Dict[str, Any]]:
233
+ websockets_spec = importlib.util.find_spec("websockets")
234
+ if websockets_spec is None:
235
+ raise MissingDependencyError(
236
+ "The websockets package is required for streaming. Install with `pip install websockets`."
237
+ )
238
+ import websockets # type: ignore
239
+
240
+ ws_url = self.server_url.replace("http", "ws") + "/ws/events"
241
+ headers = []
242
+ if self.api_key:
243
+ headers.append(("X-API-Key", self.api_key))
244
+ async with websockets.connect(ws_url, extra_headers=headers) as websocket:
245
+ async for message in websocket:
246
+ yield json.loads(message)
247
+
248
+ def _control(self, path: str) -> Dict[str, Any]:
249
+ url = f"{self.server_url}{path}"
250
+ headers = {"Content-Type": "application/json"}
251
+ if self.api_key:
252
+ headers["X-API-Key"] = self.api_key
253
+ request = urllib.request.Request(url, data=b"{}", headers=headers, method="POST")
254
+ with urllib.request.urlopen(request, timeout=self.request_timeout) as response:
255
+ body = response.read().decode("utf-8")
256
+ return json.loads(body) if body else {}
257
+
258
+
259
+ def _coerce_output_spec(value: Optional[OutputSpec | Dict[str, Any]]) -> OutputSpec:
260
+ if value is None:
261
+ return OutputSpec()
262
+ if isinstance(value, OutputSpec):
263
+ return value
264
+ return OutputSpec(**value)
265
+
266
+
267
+ def _load_tables(output_dir: str, output_spec: OutputSpec) -> Dict[str, Any]:
268
+ if output_spec.table_format != "pandas":
269
+ raise ValueError("Only pandas table_format is supported in this wrapper.")
270
+ pandas_spec = importlib.util.find_spec("pandas")
271
+ if pandas_spec is None:
272
+ raise MissingDependencyError(
273
+ "pandas is required for in-memory tables. Install with `pip install pandas`."
274
+ )
275
+ import pandas as pd # type: ignore
276
+
277
+ tables: Dict[str, Any] = {}
278
+ directory = pathlib.Path(output_dir)
279
+ if output_spec.format == "csv":
280
+ for csv_path in directory.rglob("*.csv"):
281
+ tables[csv_path.stem] = pd.read_csv(csv_path)
282
+ elif output_spec.format == "jsonl":
283
+ for json_path in directory.rglob("*.jsonl"):
284
+ tables[json_path.stem] = pd.read_json(json_path, lines=True)
285
+ elif output_spec.format == "parquet":
286
+ for parquet_path in directory.rglob("*.parquet"):
287
+ tables[parquet_path.stem] = pd.read_parquet(parquet_path)
288
+ else:
289
+ raise ValueError(f"Unsupported format for memory loading: {output_spec.format}")
290
+ return tables
291
+
292
+
293
+ def _config_to_server_payload(config: Config, seed: Optional[int]) -> Dict[str, Any]:
294
+ """Convert Config to server API payload format."""
295
+ payload = config.to_dict()
296
+ global_settings = payload.get("global", {})
297
+ companies = payload.get("companies", [])
298
+ chart_of_accounts = payload.get("chart_of_accounts", {})
299
+ fraud = payload.get("fraud", {})
300
+
301
+ # Extract values from the new schema structure
302
+ industry = global_settings.get("industry", "retail")
303
+ complexity = chart_of_accounts.get("complexity", "small")
304
+ start_date = global_settings.get("start_date", "2024-01-01")
305
+ period_months = global_settings.get("period_months", 12)
306
+ seed_value = seed if seed is not None else global_settings.get("seed")
307
+
308
+ # Companies is now a list of company configs
309
+ company_payloads: List[Dict[str, Any]] = []
310
+ if isinstance(companies, list):
311
+ for company in companies:
312
+ company_payloads.append({
313
+ "code": company.get("code", "C001"),
314
+ "name": company.get("name", "Company"),
315
+ "currency": company.get("currency", "USD"),
316
+ "country": company.get("country", "US"),
317
+ "annual_transaction_volume": 10000,
318
+ "volume_weight": company.get("volume_weight", 1.0),
319
+ })
320
+ else:
321
+ # Fallback for legacy format
322
+ company_payloads.append({
323
+ "code": "C001",
324
+ "name": "Company 1",
325
+ "currency": "USD",
326
+ "country": "US",
327
+ "annual_transaction_volume": 10000,
328
+ "volume_weight": 1.0,
329
+ })
330
+
331
+ # Extract fraud settings
332
+ fraud_enabled = fraud.get("enabled", False)
333
+ fraud_rate = fraud.get("rate", 0.0)
334
+
335
+ return {
336
+ "industry": industry,
337
+ "start_date": start_date,
338
+ "period_months": period_months,
339
+ "seed": seed_value,
340
+ "coa_complexity": complexity,
341
+ "companies": company_payloads,
342
+ "fraud_enabled": fraud_enabled,
343
+ "fraud_rate": fraud_rate,
344
+ }
@@ -0,0 +1,39 @@
1
+ """Configuration helpers for datasynth_py."""
2
+
3
+ from datasynth_py.config import blueprints
4
+ from datasynth_py.config.models import (
5
+ AuditSettings,
6
+ BankingSettings,
7
+ ChartOfAccountsSettings,
8
+ CompanyConfig,
9
+ CompanySettings, # Legacy alias
10
+ Config,
11
+ DataQualitySettings,
12
+ FraudSettings,
13
+ GlobalSettings,
14
+ GraphExportSettings,
15
+ OutputSettings,
16
+ ScenarioSettings,
17
+ TemporalDriftSettings,
18
+ TransactionSettings,
19
+ )
20
+ from datasynth_py.config.validation import ConfigValidationError
21
+
22
+ __all__ = [
23
+ "AuditSettings",
24
+ "BankingSettings",
25
+ "ChartOfAccountsSettings",
26
+ "CompanyConfig",
27
+ "CompanySettings",
28
+ "Config",
29
+ "ConfigValidationError",
30
+ "DataQualitySettings",
31
+ "FraudSettings",
32
+ "GlobalSettings",
33
+ "GraphExportSettings",
34
+ "OutputSettings",
35
+ "ScenarioSettings",
36
+ "TemporalDriftSettings",
37
+ "TransactionSettings",
38
+ "blueprints",
39
+ ]