additory 0.1.0a1__py3-none-any.whl → 0.1.0a3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- additory/__init__.py +4 -0
- additory/common/__init__.py +2 -2
- additory/common/backend.py +20 -4
- additory/common/distributions.py +1 -1
- additory/common/sample_data.py +19 -19
- additory/core/backends/arrow_bridge.py +7 -0
- additory/core/polars_expression_engine.py +66 -16
- additory/dynamic_api.py +42 -46
- additory/expressions/proxy.py +4 -1
- additory/synthetic/__init__.py +7 -95
- additory/synthetic/column_name_resolver.py +149 -0
- additory/{augment → synthetic}/distributions.py +2 -2
- additory/{augment → synthetic}/forecast.py +1 -1
- additory/synthetic/linked_list_parser.py +415 -0
- additory/synthetic/namespace_lookup.py +129 -0
- additory/{augment → synthetic}/smote.py +1 -1
- additory/{augment → synthetic}/strategies.py +11 -44
- additory/{augment/augmentor.py → synthetic/synthesizer.py} +75 -15
- additory/utilities/units.py +4 -1
- {additory-0.1.0a1.dist-info → additory-0.1.0a3.dist-info}/METADATA +12 -17
- {additory-0.1.0a1.dist-info → additory-0.1.0a3.dist-info}/RECORD +24 -40
- {additory-0.1.0a1.dist-info → additory-0.1.0a3.dist-info}/WHEEL +1 -1
- additory/augment/__init__.py +0 -24
- additory/augment/builtin_lists.py +0 -430
- additory/augment/list_registry.py +0 -177
- additory/synthetic/api.py +0 -220
- additory/synthetic/common_integration.py +0 -314
- additory/synthetic/config.py +0 -262
- additory/synthetic/engines.py +0 -529
- additory/synthetic/exceptions.py +0 -180
- additory/synthetic/file_managers.py +0 -518
- additory/synthetic/generator.py +0 -702
- additory/synthetic/generator_parser.py +0 -68
- additory/synthetic/integration.py +0 -319
- additory/synthetic/models.py +0 -241
- additory/synthetic/pattern_resolver.py +0 -573
- additory/synthetic/performance.py +0 -469
- additory/synthetic/polars_integration.py +0 -464
- additory/synthetic/proxy.py +0 -60
- additory/synthetic/schema_parser.py +0 -685
- additory/synthetic/validator.py +0 -553
- {additory-0.1.0a1.dist-info → additory-0.1.0a3.dist-info}/licenses/LICENSE +0 -0
- {additory-0.1.0a1.dist-info → additory-0.1.0a3.dist-info}/top_level.txt +0 -0
additory/synthetic/config.py
DELETED
|
@@ -1,262 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Configuration management for synthetic data generation.
|
|
3
|
-
|
|
4
|
-
Provides global configuration settings and project-wide defaults
|
|
5
|
-
for engine selection and other generation parameters with persistence.
|
|
6
|
-
"""
|
|
7
|
-
|
|
8
|
-
import os
|
|
9
|
-
import json
|
|
10
|
-
from typing import Optional, Dict, Any
|
|
11
|
-
from pathlib import Path
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
class SyntheticConfig:
|
|
15
|
-
"""
|
|
16
|
-
Configuration manager for synthetic data generation system.
|
|
17
|
-
|
|
18
|
-
Manages default settings for engine selection, file paths,
|
|
19
|
-
and other generation parameters with automatic persistence.
|
|
20
|
-
"""
|
|
21
|
-
|
|
22
|
-
def __init__(self, config_file: Optional[str] = None):
|
|
23
|
-
"""
|
|
24
|
-
Initialize configuration with optional custom config file path.
|
|
25
|
-
|
|
26
|
-
Args:
|
|
27
|
-
config_file: Optional path to config file. If None, uses default location.
|
|
28
|
-
"""
|
|
29
|
-
# Set default values
|
|
30
|
-
self._default_engine = "pandas"
|
|
31
|
-
self._default_rows = 1000
|
|
32
|
-
self._default_batch_size = 10000
|
|
33
|
-
self._schema_base_path = "reference/schema_definitions"
|
|
34
|
-
self._cache_enabled = True
|
|
35
|
-
self._validation_enabled = True
|
|
36
|
-
|
|
37
|
-
# Set up config file path
|
|
38
|
-
if config_file:
|
|
39
|
-
self._config_file = Path(config_file)
|
|
40
|
-
else:
|
|
41
|
-
# Use project-local config file
|
|
42
|
-
self._config_file = Path(".additory_config.json")
|
|
43
|
-
|
|
44
|
-
# Load existing configuration if available
|
|
45
|
-
self._load_config()
|
|
46
|
-
|
|
47
|
-
def set_default_engine(self, engine: str) -> None:
|
|
48
|
-
"""
|
|
49
|
-
Set the default engine for data generation with persistence.
|
|
50
|
-
|
|
51
|
-
Args:
|
|
52
|
-
engine: Either "pandas" or "polars"
|
|
53
|
-
|
|
54
|
-
Raises:
|
|
55
|
-
ValueError: If engine is not supported
|
|
56
|
-
"""
|
|
57
|
-
if engine not in ["pandas", "polars"]:
|
|
58
|
-
raise ValueError(f"Unsupported engine: {engine}. Must be 'pandas' or 'polars'")
|
|
59
|
-
self._default_engine = engine
|
|
60
|
-
self._save_config()
|
|
61
|
-
|
|
62
|
-
def get_default_engine(self) -> str:
|
|
63
|
-
"""Get the current default engine."""
|
|
64
|
-
return self._default_engine
|
|
65
|
-
|
|
66
|
-
def set_default_rows(self, rows: int) -> None:
|
|
67
|
-
"""
|
|
68
|
-
Set the default number of rows to generate with persistence.
|
|
69
|
-
|
|
70
|
-
Args:
|
|
71
|
-
rows: Number of rows (must be positive)
|
|
72
|
-
|
|
73
|
-
Raises:
|
|
74
|
-
ValueError: If rows is not positive
|
|
75
|
-
"""
|
|
76
|
-
if rows <= 0:
|
|
77
|
-
raise ValueError("Number of rows must be positive")
|
|
78
|
-
self._default_rows = rows
|
|
79
|
-
self._save_config()
|
|
80
|
-
|
|
81
|
-
def get_default_rows(self) -> int:
|
|
82
|
-
"""Get the default number of rows."""
|
|
83
|
-
return self._default_rows
|
|
84
|
-
|
|
85
|
-
def set_default_batch_size(self, batch_size: int) -> None:
|
|
86
|
-
"""
|
|
87
|
-
Set the default batch size for memory management with persistence.
|
|
88
|
-
|
|
89
|
-
Args:
|
|
90
|
-
batch_size: Batch size (must be positive)
|
|
91
|
-
|
|
92
|
-
Raises:
|
|
93
|
-
ValueError: If batch_size is not positive
|
|
94
|
-
"""
|
|
95
|
-
if batch_size <= 0:
|
|
96
|
-
raise ValueError("Batch size must be positive")
|
|
97
|
-
self._default_batch_size = batch_size
|
|
98
|
-
self._save_config()
|
|
99
|
-
|
|
100
|
-
def get_default_batch_size(self) -> int:
|
|
101
|
-
"""Get the default batch size."""
|
|
102
|
-
return self._default_batch_size
|
|
103
|
-
|
|
104
|
-
def set_schema_base_path(self, path: str) -> None:
|
|
105
|
-
"""
|
|
106
|
-
Set the base path for schema file resolution with persistence.
|
|
107
|
-
|
|
108
|
-
Args:
|
|
109
|
-
path: Base directory path for schema files
|
|
110
|
-
"""
|
|
111
|
-
self._schema_base_path = path
|
|
112
|
-
self._save_config()
|
|
113
|
-
|
|
114
|
-
def get_schema_base_path(self) -> str:
|
|
115
|
-
"""Get the schema base path."""
|
|
116
|
-
return self._schema_base_path
|
|
117
|
-
|
|
118
|
-
def resolve_schema_path(self, schema_name: str) -> Path:
|
|
119
|
-
"""
|
|
120
|
-
Resolve a schema name to a full file path.
|
|
121
|
-
|
|
122
|
-
Args:
|
|
123
|
-
schema_name: Schema file name (with or without .toml extension)
|
|
124
|
-
|
|
125
|
-
Returns:
|
|
126
|
-
Full path to the schema file
|
|
127
|
-
"""
|
|
128
|
-
if not schema_name.endswith('.toml'):
|
|
129
|
-
schema_name += '.toml'
|
|
130
|
-
|
|
131
|
-
return Path(self._schema_base_path) / schema_name
|
|
132
|
-
|
|
133
|
-
def resolve_properties_path(self, properties_name: str) -> Path:
|
|
134
|
-
"""
|
|
135
|
-
Resolve a properties file name to a full file path.
|
|
136
|
-
|
|
137
|
-
Args:
|
|
138
|
-
properties_name: Properties file name (with or without .properties extension)
|
|
139
|
-
|
|
140
|
-
Returns:
|
|
141
|
-
Full path to the properties file
|
|
142
|
-
"""
|
|
143
|
-
if not properties_name.endswith('.properties'):
|
|
144
|
-
properties_name += '.properties'
|
|
145
|
-
|
|
146
|
-
return Path(self._schema_base_path) / properties_name
|
|
147
|
-
|
|
148
|
-
def enable_cache(self, enabled: bool = True) -> None:
|
|
149
|
-
"""Enable or disable pattern caching with persistence."""
|
|
150
|
-
self._cache_enabled = enabled
|
|
151
|
-
self._save_config()
|
|
152
|
-
|
|
153
|
-
def is_cache_enabled(self) -> bool:
|
|
154
|
-
"""Check if caching is enabled."""
|
|
155
|
-
return self._cache_enabled
|
|
156
|
-
|
|
157
|
-
def enable_validation(self, enabled: bool = True) -> None:
|
|
158
|
-
"""Enable or disable validation with persistence."""
|
|
159
|
-
self._validation_enabled = enabled
|
|
160
|
-
self._save_config()
|
|
161
|
-
|
|
162
|
-
def is_validation_enabled(self) -> bool:
|
|
163
|
-
"""Check if validation is enabled."""
|
|
164
|
-
return self._validation_enabled
|
|
165
|
-
|
|
166
|
-
def get_all_settings(self) -> Dict[str, Any]:
|
|
167
|
-
"""Get all current configuration settings."""
|
|
168
|
-
return {
|
|
169
|
-
"default_engine": self._default_engine,
|
|
170
|
-
"default_rows": self._default_rows,
|
|
171
|
-
"default_batch_size": self._default_batch_size,
|
|
172
|
-
"schema_base_path": self._schema_base_path,
|
|
173
|
-
"cache_enabled": self._cache_enabled,
|
|
174
|
-
"validation_enabled": self._validation_enabled
|
|
175
|
-
}
|
|
176
|
-
|
|
177
|
-
def reset_to_defaults(self) -> None:
|
|
178
|
-
"""Reset all settings to their default values with persistence."""
|
|
179
|
-
self._default_engine = "pandas"
|
|
180
|
-
self._default_rows = 1000
|
|
181
|
-
self._default_batch_size = 10000
|
|
182
|
-
self._schema_base_path = "reference/schema_definitions"
|
|
183
|
-
self._cache_enabled = True
|
|
184
|
-
self._validation_enabled = True
|
|
185
|
-
self._save_config()
|
|
186
|
-
|
|
187
|
-
def _load_config(self) -> None:
|
|
188
|
-
"""Load configuration from file if it exists."""
|
|
189
|
-
if not self._config_file.exists():
|
|
190
|
-
return
|
|
191
|
-
|
|
192
|
-
try:
|
|
193
|
-
with open(self._config_file, 'r', encoding='utf-8') as f:
|
|
194
|
-
config_data = json.load(f)
|
|
195
|
-
|
|
196
|
-
# Update settings from file
|
|
197
|
-
self._default_engine = config_data.get("default_engine", self._default_engine)
|
|
198
|
-
self._default_rows = config_data.get("default_rows", self._default_rows)
|
|
199
|
-
self._default_batch_size = config_data.get("default_batch_size", self._default_batch_size)
|
|
200
|
-
self._schema_base_path = config_data.get("schema_base_path", self._schema_base_path)
|
|
201
|
-
self._cache_enabled = config_data.get("cache_enabled", self._cache_enabled)
|
|
202
|
-
self._validation_enabled = config_data.get("validation_enabled", self._validation_enabled)
|
|
203
|
-
|
|
204
|
-
except (json.JSONDecodeError, IOError, KeyError) as e:
|
|
205
|
-
# If config file is corrupted or unreadable, use defaults
|
|
206
|
-
# Could log this error in a real application
|
|
207
|
-
pass
|
|
208
|
-
|
|
209
|
-
def _save_config(self) -> None:
|
|
210
|
-
"""Save current configuration to file."""
|
|
211
|
-
config_data = {
|
|
212
|
-
"default_engine": self._default_engine,
|
|
213
|
-
"default_rows": self._default_rows,
|
|
214
|
-
"default_batch_size": self._default_batch_size,
|
|
215
|
-
"schema_base_path": self._schema_base_path,
|
|
216
|
-
"cache_enabled": self._cache_enabled,
|
|
217
|
-
"validation_enabled": self._validation_enabled
|
|
218
|
-
}
|
|
219
|
-
|
|
220
|
-
try:
|
|
221
|
-
# Ensure parent directory exists
|
|
222
|
-
self._config_file.parent.mkdir(parents=True, exist_ok=True)
|
|
223
|
-
|
|
224
|
-
with open(self._config_file, 'w', encoding='utf-8') as f:
|
|
225
|
-
json.dump(config_data, f, indent=2, ensure_ascii=False)
|
|
226
|
-
|
|
227
|
-
except IOError as e:
|
|
228
|
-
# If we can't save config, continue without persistence
|
|
229
|
-
# Could log this error in a real application
|
|
230
|
-
pass
|
|
231
|
-
|
|
232
|
-
def get_config_file_path(self) -> str:
|
|
233
|
-
"""Get the path to the configuration file."""
|
|
234
|
-
return str(self._config_file)
|
|
235
|
-
|
|
236
|
-
def delete_config_file(self) -> bool:
|
|
237
|
-
"""
|
|
238
|
-
Delete the configuration file and reset to defaults.
|
|
239
|
-
|
|
240
|
-
Returns:
|
|
241
|
-
True if file was deleted, False if file didn't exist
|
|
242
|
-
"""
|
|
243
|
-
if self._config_file.exists():
|
|
244
|
-
try:
|
|
245
|
-
self._config_file.unlink()
|
|
246
|
-
# Reset to defaults without saving (to avoid recreating the file)
|
|
247
|
-
self._default_engine = "pandas"
|
|
248
|
-
self._default_rows = 1000
|
|
249
|
-
self._default_batch_size = 10000
|
|
250
|
-
self._schema_base_path = "reference/schema_definitions"
|
|
251
|
-
self._cache_enabled = True
|
|
252
|
-
self._validation_enabled = True
|
|
253
|
-
return True
|
|
254
|
-
except IOError:
|
|
255
|
-
return False
|
|
256
|
-
return False
|
|
257
|
-
|
|
258
|
-
def __repr__(self) -> str:
|
|
259
|
-
"""String representation of the configuration."""
|
|
260
|
-
settings = self.get_all_settings()
|
|
261
|
-
settings_str = ", ".join(f"{k}={v}" for k, v in settings.items())
|
|
262
|
-
return f"SyntheticConfig({settings_str})"
|