additory 0.1.0a1__py3-none-any.whl → 0.1.0a3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. additory/__init__.py +4 -0
  2. additory/common/__init__.py +2 -2
  3. additory/common/backend.py +20 -4
  4. additory/common/distributions.py +1 -1
  5. additory/common/sample_data.py +19 -19
  6. additory/core/backends/arrow_bridge.py +7 -0
  7. additory/core/polars_expression_engine.py +66 -16
  8. additory/dynamic_api.py +42 -46
  9. additory/expressions/proxy.py +4 -1
  10. additory/synthetic/__init__.py +7 -95
  11. additory/synthetic/column_name_resolver.py +149 -0
  12. additory/{augment → synthetic}/distributions.py +2 -2
  13. additory/{augment → synthetic}/forecast.py +1 -1
  14. additory/synthetic/linked_list_parser.py +415 -0
  15. additory/synthetic/namespace_lookup.py +129 -0
  16. additory/{augment → synthetic}/smote.py +1 -1
  17. additory/{augment → synthetic}/strategies.py +11 -44
  18. additory/{augment/augmentor.py → synthetic/synthesizer.py} +75 -15
  19. additory/utilities/units.py +4 -1
  20. {additory-0.1.0a1.dist-info → additory-0.1.0a3.dist-info}/METADATA +12 -17
  21. {additory-0.1.0a1.dist-info → additory-0.1.0a3.dist-info}/RECORD +24 -40
  22. {additory-0.1.0a1.dist-info → additory-0.1.0a3.dist-info}/WHEEL +1 -1
  23. additory/augment/__init__.py +0 -24
  24. additory/augment/builtin_lists.py +0 -430
  25. additory/augment/list_registry.py +0 -177
  26. additory/synthetic/api.py +0 -220
  27. additory/synthetic/common_integration.py +0 -314
  28. additory/synthetic/config.py +0 -262
  29. additory/synthetic/engines.py +0 -529
  30. additory/synthetic/exceptions.py +0 -180
  31. additory/synthetic/file_managers.py +0 -518
  32. additory/synthetic/generator.py +0 -702
  33. additory/synthetic/generator_parser.py +0 -68
  34. additory/synthetic/integration.py +0 -319
  35. additory/synthetic/models.py +0 -241
  36. additory/synthetic/pattern_resolver.py +0 -573
  37. additory/synthetic/performance.py +0 -469
  38. additory/synthetic/polars_integration.py +0 -464
  39. additory/synthetic/proxy.py +0 -60
  40. additory/synthetic/schema_parser.py +0 -685
  41. additory/synthetic/validator.py +0 -553
  42. {additory-0.1.0a1.dist-info → additory-0.1.0a3.dist-info}/licenses/LICENSE +0 -0
  43. {additory-0.1.0a1.dist-info → additory-0.1.0a3.dist-info}/top_level.txt +0 -0
@@ -1,262 +0,0 @@
1
- """
2
- Configuration management for synthetic data generation.
3
-
4
- Provides global configuration settings and project-wide defaults
5
- for engine selection and other generation parameters with persistence.
6
- """
7
-
8
- import os
9
- import json
10
- from typing import Optional, Dict, Any
11
- from pathlib import Path
12
-
13
-
14
- class SyntheticConfig:
15
- """
16
- Configuration manager for synthetic data generation system.
17
-
18
- Manages default settings for engine selection, file paths,
19
- and other generation parameters with automatic persistence.
20
- """
21
-
22
- def __init__(self, config_file: Optional[str] = None):
23
- """
24
- Initialize configuration with optional custom config file path.
25
-
26
- Args:
27
- config_file: Optional path to config file. If None, uses default location.
28
- """
29
- # Set default values
30
- self._default_engine = "pandas"
31
- self._default_rows = 1000
32
- self._default_batch_size = 10000
33
- self._schema_base_path = "reference/schema_definitions"
34
- self._cache_enabled = True
35
- self._validation_enabled = True
36
-
37
- # Set up config file path
38
- if config_file:
39
- self._config_file = Path(config_file)
40
- else:
41
- # Use project-local config file
42
- self._config_file = Path(".additory_config.json")
43
-
44
- # Load existing configuration if available
45
- self._load_config()
46
-
47
- def set_default_engine(self, engine: str) -> None:
48
- """
49
- Set the default engine for data generation with persistence.
50
-
51
- Args:
52
- engine: Either "pandas" or "polars"
53
-
54
- Raises:
55
- ValueError: If engine is not supported
56
- """
57
- if engine not in ["pandas", "polars"]:
58
- raise ValueError(f"Unsupported engine: {engine}. Must be 'pandas' or 'polars'")
59
- self._default_engine = engine
60
- self._save_config()
61
-
62
- def get_default_engine(self) -> str:
63
- """Get the current default engine."""
64
- return self._default_engine
65
-
66
- def set_default_rows(self, rows: int) -> None:
67
- """
68
- Set the default number of rows to generate with persistence.
69
-
70
- Args:
71
- rows: Number of rows (must be positive)
72
-
73
- Raises:
74
- ValueError: If rows is not positive
75
- """
76
- if rows <= 0:
77
- raise ValueError("Number of rows must be positive")
78
- self._default_rows = rows
79
- self._save_config()
80
-
81
- def get_default_rows(self) -> int:
82
- """Get the default number of rows."""
83
- return self._default_rows
84
-
85
- def set_default_batch_size(self, batch_size: int) -> None:
86
- """
87
- Set the default batch size for memory management with persistence.
88
-
89
- Args:
90
- batch_size: Batch size (must be positive)
91
-
92
- Raises:
93
- ValueError: If batch_size is not positive
94
- """
95
- if batch_size <= 0:
96
- raise ValueError("Batch size must be positive")
97
- self._default_batch_size = batch_size
98
- self._save_config()
99
-
100
- def get_default_batch_size(self) -> int:
101
- """Get the default batch size."""
102
- return self._default_batch_size
103
-
104
- def set_schema_base_path(self, path: str) -> None:
105
- """
106
- Set the base path for schema file resolution with persistence.
107
-
108
- Args:
109
- path: Base directory path for schema files
110
- """
111
- self._schema_base_path = path
112
- self._save_config()
113
-
114
- def get_schema_base_path(self) -> str:
115
- """Get the schema base path."""
116
- return self._schema_base_path
117
-
118
- def resolve_schema_path(self, schema_name: str) -> Path:
119
- """
120
- Resolve a schema name to a full file path.
121
-
122
- Args:
123
- schema_name: Schema file name (with or without .toml extension)
124
-
125
- Returns:
126
- Full path to the schema file
127
- """
128
- if not schema_name.endswith('.toml'):
129
- schema_name += '.toml'
130
-
131
- return Path(self._schema_base_path) / schema_name
132
-
133
- def resolve_properties_path(self, properties_name: str) -> Path:
134
- """
135
- Resolve a properties file name to a full file path.
136
-
137
- Args:
138
- properties_name: Properties file name (with or without .properties extension)
139
-
140
- Returns:
141
- Full path to the properties file
142
- """
143
- if not properties_name.endswith('.properties'):
144
- properties_name += '.properties'
145
-
146
- return Path(self._schema_base_path) / properties_name
147
-
148
- def enable_cache(self, enabled: bool = True) -> None:
149
- """Enable or disable pattern caching with persistence."""
150
- self._cache_enabled = enabled
151
- self._save_config()
152
-
153
- def is_cache_enabled(self) -> bool:
154
- """Check if caching is enabled."""
155
- return self._cache_enabled
156
-
157
- def enable_validation(self, enabled: bool = True) -> None:
158
- """Enable or disable validation with persistence."""
159
- self._validation_enabled = enabled
160
- self._save_config()
161
-
162
- def is_validation_enabled(self) -> bool:
163
- """Check if validation is enabled."""
164
- return self._validation_enabled
165
-
166
- def get_all_settings(self) -> Dict[str, Any]:
167
- """Get all current configuration settings."""
168
- return {
169
- "default_engine": self._default_engine,
170
- "default_rows": self._default_rows,
171
- "default_batch_size": self._default_batch_size,
172
- "schema_base_path": self._schema_base_path,
173
- "cache_enabled": self._cache_enabled,
174
- "validation_enabled": self._validation_enabled
175
- }
176
-
177
- def reset_to_defaults(self) -> None:
178
- """Reset all settings to their default values with persistence."""
179
- self._default_engine = "pandas"
180
- self._default_rows = 1000
181
- self._default_batch_size = 10000
182
- self._schema_base_path = "reference/schema_definitions"
183
- self._cache_enabled = True
184
- self._validation_enabled = True
185
- self._save_config()
186
-
187
- def _load_config(self) -> None:
188
- """Load configuration from file if it exists."""
189
- if not self._config_file.exists():
190
- return
191
-
192
- try:
193
- with open(self._config_file, 'r', encoding='utf-8') as f:
194
- config_data = json.load(f)
195
-
196
- # Update settings from file
197
- self._default_engine = config_data.get("default_engine", self._default_engine)
198
- self._default_rows = config_data.get("default_rows", self._default_rows)
199
- self._default_batch_size = config_data.get("default_batch_size", self._default_batch_size)
200
- self._schema_base_path = config_data.get("schema_base_path", self._schema_base_path)
201
- self._cache_enabled = config_data.get("cache_enabled", self._cache_enabled)
202
- self._validation_enabled = config_data.get("validation_enabled", self._validation_enabled)
203
-
204
- except (json.JSONDecodeError, IOError, KeyError) as e:
205
- # If config file is corrupted or unreadable, use defaults
206
- # Could log this error in a real application
207
- pass
208
-
209
- def _save_config(self) -> None:
210
- """Save current configuration to file."""
211
- config_data = {
212
- "default_engine": self._default_engine,
213
- "default_rows": self._default_rows,
214
- "default_batch_size": self._default_batch_size,
215
- "schema_base_path": self._schema_base_path,
216
- "cache_enabled": self._cache_enabled,
217
- "validation_enabled": self._validation_enabled
218
- }
219
-
220
- try:
221
- # Ensure parent directory exists
222
- self._config_file.parent.mkdir(parents=True, exist_ok=True)
223
-
224
- with open(self._config_file, 'w', encoding='utf-8') as f:
225
- json.dump(config_data, f, indent=2, ensure_ascii=False)
226
-
227
- except IOError as e:
228
- # If we can't save config, continue without persistence
229
- # Could log this error in a real application
230
- pass
231
-
232
- def get_config_file_path(self) -> str:
233
- """Get the path to the configuration file."""
234
- return str(self._config_file)
235
-
236
- def delete_config_file(self) -> bool:
237
- """
238
- Delete the configuration file and reset to defaults.
239
-
240
- Returns:
241
- True if file was deleted, False if file didn't exist
242
- """
243
- if self._config_file.exists():
244
- try:
245
- self._config_file.unlink()
246
- # Reset to defaults without saving (to avoid recreating the file)
247
- self._default_engine = "pandas"
248
- self._default_rows = 1000
249
- self._default_batch_size = 10000
250
- self._schema_base_path = "reference/schema_definitions"
251
- self._cache_enabled = True
252
- self._validation_enabled = True
253
- return True
254
- except IOError:
255
- return False
256
- return False
257
-
258
- def __repr__(self) -> str:
259
- """String representation of the configuration."""
260
- settings = self.get_all_settings()
261
- settings_str = ", ".join(f"{k}={v}" for k, v in settings.items())
262
- return f"SyntheticConfig({settings_str})"