additory 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. additory/__init__.py +15 -0
  2. additory/analysis/__init__.py +48 -0
  3. additory/analysis/cardinality.py +126 -0
  4. additory/analysis/correlations.py +124 -0
  5. additory/analysis/distributions.py +376 -0
  6. additory/analysis/quality.py +158 -0
  7. additory/analysis/scan.py +400 -0
  8. additory/augment/__init__.py +24 -0
  9. additory/augment/augmentor.py +653 -0
  10. additory/augment/builtin_lists.py +430 -0
  11. additory/augment/distributions.py +22 -0
  12. additory/augment/forecast.py +1132 -0
  13. additory/augment/list_registry.py +177 -0
  14. additory/augment/smote.py +320 -0
  15. additory/augment/strategies.py +883 -0
  16. additory/common/__init__.py +157 -0
  17. additory/common/backend.py +355 -0
  18. additory/common/column_utils.py +191 -0
  19. additory/common/distributions.py +737 -0
  20. additory/common/exceptions.py +62 -0
  21. additory/common/lists.py +229 -0
  22. additory/common/patterns.py +240 -0
  23. additory/common/resolver.py +567 -0
  24. additory/common/sample_data.py +182 -0
  25. additory/common/validation.py +197 -0
  26. additory/core/__init__.py +27 -0
  27. additory/core/ast_builder.py +165 -0
  28. additory/core/backends/__init__.py +23 -0
  29. additory/core/backends/arrow_bridge.py +476 -0
  30. additory/core/backends/cudf_bridge.py +355 -0
  31. additory/core/column_positioning.py +358 -0
  32. additory/core/compiler_polars.py +166 -0
  33. additory/core/config.py +342 -0
  34. additory/core/enhanced_cache_manager.py +1119 -0
  35. additory/core/enhanced_matchers.py +473 -0
  36. additory/core/enhanced_version_manager.py +325 -0
  37. additory/core/executor.py +59 -0
  38. additory/core/integrity_manager.py +477 -0
  39. additory/core/loader.py +190 -0
  40. additory/core/logging.py +24 -0
  41. additory/core/memory_manager.py +547 -0
  42. additory/core/namespace_manager.py +657 -0
  43. additory/core/parser.py +176 -0
  44. additory/core/polars_expression_engine.py +551 -0
  45. additory/core/registry.py +176 -0
  46. additory/core/sample_data_manager.py +492 -0
  47. additory/core/user_namespace.py +751 -0
  48. additory/core/validator.py +27 -0
  49. additory/dynamic_api.py +308 -0
  50. additory/expressions/__init__.py +26 -0
  51. additory/expressions/engine.py +551 -0
  52. additory/expressions/parser.py +176 -0
  53. additory/expressions/proxy.py +546 -0
  54. additory/expressions/registry.py +313 -0
  55. additory/expressions/samples.py +492 -0
  56. additory/synthetic/__init__.py +101 -0
  57. additory/synthetic/api.py +220 -0
  58. additory/synthetic/common_integration.py +314 -0
  59. additory/synthetic/config.py +262 -0
  60. additory/synthetic/engines.py +529 -0
  61. additory/synthetic/exceptions.py +180 -0
  62. additory/synthetic/file_managers.py +518 -0
  63. additory/synthetic/generator.py +702 -0
  64. additory/synthetic/generator_parser.py +68 -0
  65. additory/synthetic/integration.py +319 -0
  66. additory/synthetic/models.py +241 -0
  67. additory/synthetic/pattern_resolver.py +573 -0
  68. additory/synthetic/performance.py +469 -0
  69. additory/synthetic/polars_integration.py +464 -0
  70. additory/synthetic/proxy.py +60 -0
  71. additory/synthetic/schema_parser.py +685 -0
  72. additory/synthetic/validator.py +553 -0
  73. additory/utilities/__init__.py +53 -0
  74. additory/utilities/encoding.py +600 -0
  75. additory/utilities/games.py +300 -0
  76. additory/utilities/keys.py +8 -0
  77. additory/utilities/lookup.py +103 -0
  78. additory/utilities/matchers.py +216 -0
  79. additory/utilities/resolvers.py +286 -0
  80. additory/utilities/settings.py +167 -0
  81. additory/utilities/units.py +746 -0
  82. additory/utilities/validators.py +153 -0
  83. additory-0.1.0a1.dist-info/METADATA +293 -0
  84. additory-0.1.0a1.dist-info/RECORD +87 -0
  85. additory-0.1.0a1.dist-info/WHEEL +5 -0
  86. additory-0.1.0a1.dist-info/licenses/LICENSE +21 -0
  87. additory-0.1.0a1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,492 @@
1
+ # sample_data_manager.py
2
+ # Enhanced sample data management for additory expressions
3
+
4
+ import os
5
+ import yaml
6
+ import pandas as pd
7
+ from typing import Dict, List, Optional, Any, Union, Tuple
8
+ from dataclasses import dataclass
9
+ import re
10
+
11
+ from .logging import log_info, log_warning
12
+ from .enhanced_version_manager import EnhancedVersionManager
13
+ from .namespace_manager import NamespaceManager
14
+ from .integrity_manager import IntegrityManager
15
+
16
+
17
+ @dataclass
18
+ class SampleDataInfo:
19
+ """Information about sample data"""
20
+ expression_name: str
21
+ version: str
22
+ has_clean: bool
23
+ has_unclean: bool
24
+ clean_rows: int
25
+ unclean_rows: int
26
+ educational_comments: List[str]
27
+ validation_errors: List[str]
28
+
29
+
30
+ class SampleDataError(Exception):
31
+ """Raised when sample data operations fail"""
32
+ pass
33
+
34
+
35
+ class SampleDataManager:
36
+ """
37
+ Enhanced sample data management system
38
+ Provides clean/unclean sample support with educational comments and validation
39
+ """
40
+
41
+ def __init__(self):
42
+ self.version_manager = EnhancedVersionManager()
43
+ self.namespace_manager = NamespaceManager()
44
+ self.integrity_manager = IntegrityManager()
45
+
46
+ # Sample data validation rules
47
+ self.validation_rules = {
48
+ "max_rows": 100, # Maximum rows in sample data
49
+ "min_rows": 1, # Minimum rows in sample data
50
+ "required_columns": [], # Will be determined from expression
51
+ "max_column_length": 50, # Maximum string length in columns
52
+ }
53
+
54
+ # Educational comment patterns for unclean data
55
+ self.educational_patterns = {
56
+ "missing_values": "# Missing values to test null handling",
57
+ "invalid_types": "# Invalid data types to test type validation",
58
+ "edge_cases": "# Edge cases to test boundary conditions",
59
+ "malformed_data": "# Malformed data to test error handling",
60
+ "duplicate_values": "# Duplicate values to test deduplication",
61
+ "extreme_values": "# Extreme values to test range validation"
62
+ }
63
+
64
+ log_info("[sample_data] Sample Data Manager initialized")
65
+
66
+ def get_clean_sample(self, expression_name: str, namespace: str = "builtin",
67
+ version: Optional[str] = None) -> pd.DataFrame:
68
+ """
69
+ Get clean sample data for an expression
70
+
71
+ Args:
72
+ expression_name: Name of the expression
73
+ namespace: Namespace ("builtin" or "user")
74
+ version: Specific version (optional)
75
+
76
+ Returns:
77
+ DataFrame with clean sample data
78
+
79
+ Raises:
80
+ SampleDataError: If sample data cannot be retrieved
81
+ """
82
+ try:
83
+ sample_data = self._get_sample_data(expression_name, namespace, version, "clean")
84
+
85
+ if sample_data is None:
86
+ # Generate default clean sample if none exists
87
+ return self._generate_default_clean_sample(expression_name)
88
+
89
+ df = pd.DataFrame(sample_data)
90
+
91
+ # Validate clean sample data
92
+ validation_errors = self._validate_clean_sample(df, expression_name)
93
+ if validation_errors:
94
+ log_warning(f"[sample_data] Clean sample validation issues for {expression_name}: {validation_errors}")
95
+
96
+ log_info(f"[sample_data] Retrieved clean sample for {expression_name} ({len(df)} rows)")
97
+ return df
98
+
99
+ except Exception as e:
100
+ log_warning(f"[sample_data] Failed to get clean sample for {expression_name}: {e}")
101
+ raise SampleDataError(f"Failed to get clean sample data: {e}")
102
+
103
+ def get_unclean_sample(self, expression_name: str, namespace: str = "builtin",
104
+ version: Optional[str] = None) -> pd.DataFrame:
105
+ """
106
+ Get unclean sample data with educational comments
107
+
108
+ Args:
109
+ expression_name: Name of the expression
110
+ namespace: Namespace ("builtin" or "user")
111
+ version: Specific version (optional)
112
+
113
+ Returns:
114
+ DataFrame with unclean sample data and educational comments
115
+
116
+ Raises:
117
+ SampleDataError: If sample data cannot be retrieved
118
+ """
119
+ try:
120
+ sample_data = self._get_sample_data(expression_name, namespace, version, "unclean")
121
+
122
+ if sample_data is None:
123
+ # Generate default unclean sample if none exists
124
+ return self._generate_default_unclean_sample(expression_name)
125
+
126
+ df = pd.DataFrame(sample_data)
127
+
128
+ # Add educational comments as metadata
129
+ df = self._add_educational_comments(df, expression_name)
130
+
131
+ log_info(f"[sample_data] Retrieved unclean sample for {expression_name} ({len(df)} rows)")
132
+ return df
133
+
134
+ except Exception as e:
135
+ log_warning(f"[sample_data] Failed to get unclean sample for {expression_name}: {e}")
136
+ raise SampleDataError(f"Failed to get unclean sample data: {e}")
137
+
138
+ def validate_sample_data(self, sample_data: Dict[str, Any],
139
+ expression_name: str, sample_type: str = "clean") -> Tuple[bool, List[str]]:
140
+ """
141
+ Validate sample data format and content
142
+
143
+ Args:
144
+ sample_data: Sample data dictionary
145
+ expression_name: Name of the expression
146
+ sample_type: "clean" or "unclean"
147
+
148
+ Returns:
149
+ Tuple of (is_valid, list_of_errors)
150
+ """
151
+ errors = []
152
+
153
+ try:
154
+ # Check if sample_data is a dictionary
155
+ if not isinstance(sample_data, dict):
156
+ errors.append("Sample data must be a dictionary")
157
+ return False, errors
158
+
159
+ # Check if sample_data has columns
160
+ if not sample_data:
161
+ errors.append("Sample data cannot be empty")
162
+ return False, errors
163
+
164
+ # Convert to DataFrame for validation
165
+ try:
166
+ df = pd.DataFrame(sample_data)
167
+ except Exception as e:
168
+ errors.append(f"Cannot convert sample data to DataFrame: {e}")
169
+ return False, errors
170
+
171
+ # Validate row count
172
+ row_count = len(df)
173
+ if row_count < self.validation_rules["min_rows"]:
174
+ errors.append(f"Sample data has too few rows: {row_count} < {self.validation_rules['min_rows']}")
175
+
176
+ if row_count > self.validation_rules["max_rows"]:
177
+ errors.append(f"Sample data has too many rows: {row_count} > {self.validation_rules['max_rows']}")
178
+
179
+ # Validate column content
180
+ for column, values in sample_data.items():
181
+ if not isinstance(values, list):
182
+ errors.append(f"Column '{column}' must be a list")
183
+ continue
184
+
185
+ # Check for consistent length
186
+ if len(values) != row_count:
187
+ errors.append(f"Column '{column}' has inconsistent length")
188
+
189
+ # Check string length limits
190
+ for i, value in enumerate(values):
191
+ if isinstance(value, str) and len(value) > self.validation_rules["max_column_length"]:
192
+ errors.append(f"Column '{column}' row {i} exceeds max length")
193
+
194
+ # Specific validation for clean vs unclean samples
195
+ if sample_type == "clean":
196
+ errors.extend(self._validate_clean_sample(df, expression_name))
197
+ else:
198
+ errors.extend(self._validate_unclean_sample(df, expression_name))
199
+
200
+ is_valid = len(errors) == 0
201
+
202
+ if is_valid:
203
+ log_info(f"[sample_data] Sample data validation passed for {expression_name}")
204
+ else:
205
+ log_warning(f"[sample_data] Sample data validation failed for {expression_name}: {errors}")
206
+
207
+ return is_valid, errors
208
+
209
+ except Exception as e:
210
+ errors.append(f"Validation error: {e}")
211
+ return False, errors
212
+
213
+ def get_sample_info(self, expression_name: str, namespace: str = "builtin",
214
+ version: Optional[str] = None) -> SampleDataInfo:
215
+ """
216
+ Get comprehensive information about sample data
217
+
218
+ Args:
219
+ expression_name: Name of the expression
220
+ namespace: Namespace ("builtin" or "user")
221
+ version: Specific version (optional)
222
+
223
+ Returns:
224
+ SampleDataInfo object with comprehensive information
225
+ """
226
+ try:
227
+ # Get sample data
228
+ clean_data = self._get_sample_data(expression_name, namespace, version, "clean")
229
+ unclean_data = self._get_sample_data(expression_name, namespace, version, "unclean")
230
+
231
+ # Analyze clean sample
232
+ has_clean = clean_data is not None
233
+ clean_rows = len(pd.DataFrame(clean_data)) if has_clean else 0
234
+
235
+ # Analyze unclean sample
236
+ has_unclean = unclean_data is not None
237
+ unclean_rows = len(pd.DataFrame(unclean_data)) if has_unclean else 0
238
+
239
+ # Extract educational comments
240
+ educational_comments = []
241
+ if has_unclean:
242
+ educational_comments = self._extract_educational_comments(unclean_data)
243
+
244
+ # Validate samples
245
+ validation_errors = []
246
+ if has_clean:
247
+ _, clean_errors = self.validate_sample_data(clean_data, expression_name, "clean")
248
+ validation_errors.extend([f"Clean: {err}" for err in clean_errors])
249
+
250
+ if has_unclean:
251
+ _, unclean_errors = self.validate_sample_data(unclean_data, expression_name, "unclean")
252
+ validation_errors.extend([f"Unclean: {err}" for err in unclean_errors])
253
+
254
+ return SampleDataInfo(
255
+ expression_name=expression_name,
256
+ version=version or self.version_manager.default_version,
257
+ has_clean=has_clean,
258
+ has_unclean=has_unclean,
259
+ clean_rows=clean_rows,
260
+ unclean_rows=unclean_rows,
261
+ educational_comments=educational_comments,
262
+ validation_errors=validation_errors
263
+ )
264
+
265
+ except Exception as e:
266
+ log_warning(f"[sample_data] Failed to get sample info for {expression_name}: {e}")
267
+ return SampleDataInfo(
268
+ expression_name=expression_name,
269
+ version=version or "unknown",
270
+ has_clean=False,
271
+ has_unclean=False,
272
+ clean_rows=0,
273
+ unclean_rows=0,
274
+ educational_comments=[],
275
+ validation_errors=[f"Failed to get sample info: {e}"]
276
+ )
277
+
278
+ def create_sample_template(self, expression_name: str, columns: List[str]) -> Dict[str, Dict[str, Any]]:
279
+ """
280
+ Create a template for sample data
281
+
282
+ Args:
283
+ expression_name: Name of the expression
284
+ columns: List of required columns
285
+
286
+ Returns:
287
+ Dictionary with clean and unclean sample templates
288
+ """
289
+ try:
290
+ # Create clean sample template
291
+ clean_template = {}
292
+ for column in columns:
293
+ clean_template[column] = [f"sample_{column}_1", f"sample_{column}_2", f"sample_{column}_3"]
294
+
295
+ # Create unclean sample template with educational comments
296
+ unclean_template = {}
297
+ for column in columns:
298
+ unclean_template[column] = [
299
+ f"valid_{column}",
300
+ None, # Missing value
301
+ f"invalid_{column}_type",
302
+ f"extreme_{column}_value"
303
+ ]
304
+
305
+ # Add educational comments
306
+ unclean_template["_comments"] = [
307
+ "# This is unclean sample data for testing error handling",
308
+ "# Row 1: Valid data",
309
+ "# Row 2: Missing values (None/null)",
310
+ "# Row 3: Invalid data types",
311
+ "# Row 4: Extreme or edge case values"
312
+ ]
313
+
314
+ template = {
315
+ "clean": clean_template,
316
+ "unclean": unclean_template
317
+ }
318
+
319
+ log_info(f"[sample_data] Created sample template for {expression_name}")
320
+ return template
321
+
322
+ except Exception as e:
323
+ log_warning(f"[sample_data] Failed to create sample template for {expression_name}: {e}")
324
+ raise SampleDataError(f"Failed to create sample template: {e}")
325
+
326
+ def _get_sample_data(self, expression_name: str, namespace: str,
327
+ version: Optional[str], sample_type: str) -> Optional[Dict[str, Any]]:
328
+ """Get raw sample data from expression file"""
329
+ try:
330
+ # Get expression file path
331
+ expression_path = self.namespace_manager.get_expression_file_path(
332
+ namespace, expression_name, version
333
+ )
334
+
335
+ if not expression_path or not os.path.exists(expression_path):
336
+ return None
337
+
338
+ # Validate integrity
339
+ if not self.integrity_manager.validate_integrity(expression_path):
340
+ log_warning(f"[sample_data] Integrity validation failed for {expression_path}")
341
+ return None
342
+
343
+ # Parse expression file
344
+ with open(expression_path, 'r') as f:
345
+ content = yaml.safe_load(f)
346
+
347
+ # Extract sample data
348
+ sample_section = content.get("sample", {})
349
+ return sample_section.get(sample_type)
350
+
351
+ except Exception as e:
352
+ log_warning(f"[sample_data] Failed to get sample data from {expression_path}: {e}")
353
+ return None
354
+
355
+ def _validate_clean_sample(self, df: pd.DataFrame, expression_name: str) -> List[str]:
356
+ """Validate clean sample data"""
357
+ errors = []
358
+
359
+ # Check for missing values in clean sample
360
+ if df.isnull().any().any():
361
+ errors.append("Clean sample should not contain missing values")
362
+
363
+ # Check for reasonable data types
364
+ for column in df.columns:
365
+ if column.startswith('_'): # Skip metadata columns
366
+ continue
367
+
368
+ series = df[column]
369
+
370
+ # Check for mixed types (should be consistent in clean data)
371
+ unique_types = set(type(x).__name__ for x in series.dropna())
372
+ if len(unique_types) > 1:
373
+ errors.append(f"Column '{column}' has mixed data types in clean sample")
374
+
375
+ return errors
376
+
377
+ def _validate_unclean_sample(self, df: pd.DataFrame, expression_name: str) -> List[str]:
378
+ """Validate unclean sample data"""
379
+ errors = []
380
+
381
+ # Unclean samples should have some issues for educational purposes
382
+ has_nulls = df.isnull().any().any()
383
+ has_mixed_types = False
384
+
385
+ for column in df.columns:
386
+ if column.startswith('_'): # Skip metadata columns
387
+ continue
388
+
389
+ series = df[column]
390
+ unique_types = set(type(x).__name__ for x in series.dropna())
391
+ if len(unique_types) > 1:
392
+ has_mixed_types = True
393
+ break
394
+
395
+ # Unclean samples should demonstrate common data issues
396
+ if not has_nulls and not has_mixed_types:
397
+ errors.append("Unclean sample should contain some data quality issues for educational purposes")
398
+
399
+ return errors
400
+
401
+ def _add_educational_comments(self, df: pd.DataFrame, expression_name: str) -> pd.DataFrame:
402
+ """Add educational comments to unclean sample data"""
403
+ try:
404
+ # Add a comments column with educational information
405
+ comments = []
406
+
407
+ for i, row in df.iterrows():
408
+ comment_parts = []
409
+
410
+ # Check for missing values
411
+ if row.isnull().any():
412
+ comment_parts.append("Contains missing values")
413
+
414
+ # Check for potential type issues
415
+ for col, val in row.items():
416
+ if col.startswith('_'):
417
+ continue
418
+ if isinstance(val, str) and val.lower() in ['invalid', 'error', 'null']:
419
+ comment_parts.append(f"'{col}' has invalid value")
420
+
421
+ if not comment_parts:
422
+ comment_parts.append("Valid data row")
423
+
424
+ comments.append(" | ".join(comment_parts))
425
+
426
+ # Add comments as a new column
427
+ df_with_comments = df.copy()
428
+ df_with_comments['_educational_comments'] = comments
429
+
430
+ return df_with_comments
431
+
432
+ except Exception as e:
433
+ log_warning(f"[sample_data] Failed to add educational comments: {e}")
434
+ return df
435
+
436
+ def _extract_educational_comments(self, sample_data: Dict[str, Any]) -> List[str]:
437
+ """Extract educational comments from sample data"""
438
+ comments = []
439
+
440
+ # Look for comment fields
441
+ if '_comments' in sample_data:
442
+ comments.extend(sample_data['_comments'])
443
+
444
+ # Generate comments based on data patterns
445
+ try:
446
+ df = pd.DataFrame({k: v for k, v in sample_data.items() if not k.startswith('_')})
447
+
448
+ if df.isnull().any().any():
449
+ comments.append("Contains missing values for null handling testing")
450
+
451
+ for column in df.columns:
452
+ series = df[column]
453
+ unique_types = set(type(x).__name__ for x in series.dropna())
454
+ if len(unique_types) > 1:
455
+ comments.append(f"Column '{column}' has mixed types for type validation testing")
456
+
457
+ except Exception:
458
+ pass # Ignore errors in comment extraction
459
+
460
+ return comments
461
+
462
+ def _generate_default_clean_sample(self, expression_name: str) -> pd.DataFrame:
463
+ """Generate default clean sample data when none exists"""
464
+ return pd.DataFrame({
465
+ "col_a": [1, 2, 3],
466
+ "col_b": [4, 5, 6],
467
+ "_info": [f"Default clean sample for '{expression_name}'"] * 3
468
+ })
469
+
470
+ def _generate_default_unclean_sample(self, expression_name: str) -> pd.DataFrame:
471
+ """Generate default unclean sample data when none exists"""
472
+ return pd.DataFrame({
473
+ "col_a": [1, None, "invalid"],
474
+ "col_b": [4, 5, -999],
475
+ "_educational_comments": [
476
+ "Valid data row",
477
+ "Missing value in col_a",
478
+ "Invalid type in col_a, extreme value in col_b"
479
+ ],
480
+ "_info": [f"Default unclean sample for '{expression_name}'"] * 3
481
+ })
482
+
483
+
484
+ # Global sample data manager instance
485
+ _sample_data_manager = None
486
+
487
+ def get_sample_data_manager() -> SampleDataManager:
488
+ """Get the global sample data manager instance"""
489
+ global _sample_data_manager
490
+ if _sample_data_manager is None:
491
+ _sample_data_manager = SampleDataManager()
492
+ return _sample_data_manager
@@ -0,0 +1,101 @@
1
+ """
2
+ Additory Synthetic Data Generation Module
3
+
4
+ This module provides polars-native synthetic data generation using regex patterns
5
+ and distribution strategies. It supports hierarchical pattern resolution and
6
+ industry-standard file formats (.properties and .toml).
7
+ """
8
+
9
+ from .api import (
10
+ synth,
11
+ config,
12
+ register_distribution_engine,
13
+ unregister_distribution_engine,
14
+ list_custom_distribution_engines
15
+ )
16
+ from .exceptions import (
17
+ SyntheticDataError,
18
+ PatternResolutionError,
19
+ ValidationError,
20
+ DistributionError,
21
+ FileFormatError,
22
+ PatternImportError,
23
+ SchemaParsingError
24
+ )
25
+ from .pattern_resolver import PatternHierarchyResolver, ResolutionTrace, PatternResolutionResult
26
+ from .engines import (
27
+ DistributionEngine,
28
+ DistributionEngineFactory,
29
+ DistributionManager,
30
+ DistributionConfig,
31
+ )
32
+ from .generator import (
33
+ RegexGenerator,
34
+ PolarsGeneratorCore,
35
+ OutputConverter,
36
+ SyntheticDataGenerator,
37
+ GenerationConfig,
38
+ )
39
+ from .performance import (
40
+ PerformanceMonitor,
41
+ PerformanceOptimizer,
42
+ PerformanceMetrics,
43
+ PerformanceComparison,
44
+ performance_monitor,
45
+ performance_optimizer
46
+ )
47
+ from .polars_integration import (
48
+ PolarsIntegrationLayer,
49
+ optimize_conversion,
50
+ enhance_result,
51
+ optimize_context,
52
+ apply_expression,
53
+ optimize_memory,
54
+ validate_compatibility,
55
+ get_integration_stats,
56
+ cleanup_integration,
57
+ benchmark_integration
58
+ )
59
+
60
+ __all__ = [
61
+ 'synth',
62
+ 'config',
63
+ 'register_distribution_engine',
64
+ 'unregister_distribution_engine',
65
+ 'list_custom_distribution_engines',
66
+ 'SyntheticDataError',
67
+ 'PatternResolutionError',
68
+ 'ValidationError',
69
+ 'DistributionError',
70
+ 'FileFormatError',
71
+ 'PatternImportError',
72
+ 'SchemaParsingError',
73
+ 'PatternHierarchyResolver',
74
+ 'ResolutionTrace',
75
+ 'PatternResolutionResult',
76
+ 'DistributionEngine',
77
+ 'DistributionEngineFactory',
78
+ 'DistributionManager',
79
+ 'DistributionConfig',
80
+ 'RegexGenerator',
81
+ 'PolarsGeneratorCore',
82
+ 'OutputConverter',
83
+ 'SyntheticDataGenerator',
84
+ 'GenerationConfig',
85
+ 'PerformanceMonitor',
86
+ 'PerformanceOptimizer',
87
+ 'PerformanceMetrics',
88
+ 'PerformanceComparison',
89
+ 'performance_monitor',
90
+ 'performance_optimizer',
91
+ 'PolarsIntegrationLayer',
92
+ 'optimize_conversion',
93
+ 'enhance_result',
94
+ 'optimize_context',
95
+ 'apply_expression',
96
+ 'optimize_memory',
97
+ 'validate_compatibility',
98
+ 'get_integration_stats',
99
+ 'cleanup_integration',
100
+ 'benchmark_integration'
101
+ ]