additory 0.1.0a1__py3-none-any.whl → 0.1.0a3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. additory/__init__.py +4 -0
  2. additory/common/__init__.py +2 -2
  3. additory/common/backend.py +20 -4
  4. additory/common/distributions.py +1 -1
  5. additory/common/sample_data.py +19 -19
  6. additory/core/backends/arrow_bridge.py +7 -0
  7. additory/core/polars_expression_engine.py +66 -16
  8. additory/dynamic_api.py +42 -46
  9. additory/expressions/proxy.py +4 -1
  10. additory/synthetic/__init__.py +7 -95
  11. additory/synthetic/column_name_resolver.py +149 -0
  12. additory/{augment → synthetic}/distributions.py +2 -2
  13. additory/{augment → synthetic}/forecast.py +1 -1
  14. additory/synthetic/linked_list_parser.py +415 -0
  15. additory/synthetic/namespace_lookup.py +129 -0
  16. additory/{augment → synthetic}/smote.py +1 -1
  17. additory/{augment → synthetic}/strategies.py +11 -44
  18. additory/{augment/augmentor.py → synthetic/synthesizer.py} +75 -15
  19. additory/utilities/units.py +4 -1
  20. {additory-0.1.0a1.dist-info → additory-0.1.0a3.dist-info}/METADATA +12 -17
  21. {additory-0.1.0a1.dist-info → additory-0.1.0a3.dist-info}/RECORD +24 -40
  22. {additory-0.1.0a1.dist-info → additory-0.1.0a3.dist-info}/WHEEL +1 -1
  23. additory/augment/__init__.py +0 -24
  24. additory/augment/builtin_lists.py +0 -430
  25. additory/augment/list_registry.py +0 -177
  26. additory/synthetic/api.py +0 -220
  27. additory/synthetic/common_integration.py +0 -314
  28. additory/synthetic/config.py +0 -262
  29. additory/synthetic/engines.py +0 -529
  30. additory/synthetic/exceptions.py +0 -180
  31. additory/synthetic/file_managers.py +0 -518
  32. additory/synthetic/generator.py +0 -702
  33. additory/synthetic/generator_parser.py +0 -68
  34. additory/synthetic/integration.py +0 -319
  35. additory/synthetic/models.py +0 -241
  36. additory/synthetic/pattern_resolver.py +0 -573
  37. additory/synthetic/performance.py +0 -469
  38. additory/synthetic/polars_integration.py +0 -464
  39. additory/synthetic/proxy.py +0 -60
  40. additory/synthetic/schema_parser.py +0 -685
  41. additory/synthetic/validator.py +0 -553
  42. {additory-0.1.0a1.dist-info → additory-0.1.0a3.dist-info}/licenses/LICENSE +0 -0
  43. {additory-0.1.0a1.dist-info → additory-0.1.0a3.dist-info}/top_level.txt +0 -0
additory/__init__.py CHANGED
@@ -2,6 +2,9 @@
2
2
 
3
3
  from .dynamic_api import add as _api_instance
4
4
 
5
+ # Version information
6
+ __version__ = "0.1.0a3"
7
+
5
8
  # Expose the API instance normally
6
9
  add = _api_instance
7
10
 
@@ -12,4 +15,5 @@ def __getattr__(name):
12
15
 
13
16
  __all__ = [
14
17
  "add",
18
+ "__version__",
15
19
  ]
@@ -1,14 +1,14 @@
1
1
  """
2
2
  Common Utilities Module
3
3
 
4
- Shared functionality used by both augment and synthetic modules:
4
+ Shared functionality used by both synthetic and expressions modules:
5
5
  - Distribution functions (normal, uniform, skewed, etc.)
6
6
  - List file management (.list format)
7
7
  - Pattern file management (.properties format)
8
8
  - Fallback resolution logic
9
9
 
10
10
  This module eliminates code duplication and provides consistent behavior
11
- across augment and synthetic data generation.
11
+ across synthetic and expression data generation.
12
12
  """
13
13
 
14
14
  from .distributions import (
@@ -180,11 +180,14 @@ def get_arrow_bridge():
180
180
  - Use for all cross-backend conversions
181
181
  - Handles pandas/polars/cuDF via Arrow
182
182
  """
183
- from additory.core.backends.arrow_bridge import EnhancedArrowBridge
183
+ from additory.core.backends.arrow_bridge import EnhancedArrowBridge, ArrowBridgeError
184
184
 
185
185
  # Singleton pattern
186
186
  if not hasattr(get_arrow_bridge, '_instance'):
187
- get_arrow_bridge._instance = EnhancedArrowBridge()
187
+ try:
188
+ get_arrow_bridge._instance = EnhancedArrowBridge()
189
+ except ArrowBridgeError:
190
+ get_arrow_bridge._instance = None
188
191
 
189
192
  return get_arrow_bridge._instance
190
193
 
@@ -194,7 +197,7 @@ def to_polars(df: Any, backend_type: BackendType = None) -> 'pl.DataFrame':
194
197
  Convert any dataframe to Polars via Arrow bridge.
195
198
 
196
199
  This is the primary conversion function for the Polars-only architecture.
197
- All operations (expressions, augment, etc.) use this to convert input
200
+ All operations (expressions, synthetic, etc.) use this to convert input
198
201
  dataframes to Polars for processing.
199
202
 
200
203
  Args:
@@ -224,7 +227,7 @@ def to_polars(df: Any, backend_type: BackendType = None) -> 'pl.DataFrame':
224
227
  )
225
228
 
226
229
  # Fast path: already Polars
227
- if isinstance(df, pl.DataFrame):
230
+ if HAS_POLARS and isinstance(df, pl.DataFrame):
228
231
  return df
229
232
 
230
233
  # Validate input
@@ -240,6 +243,13 @@ def to_polars(df: Any, backend_type: BackendType = None) -> 'pl.DataFrame':
240
243
  # Convert via Arrow bridge
241
244
  try:
242
245
  bridge = get_arrow_bridge()
246
+ if bridge is None:
247
+ # Fallback: direct conversion for pandas
248
+ if backend_type == "pandas":
249
+ if isinstance(df, pd.DataFrame):
250
+ return pl.from_pandas(df)
251
+ raise RuntimeError("Arrow bridge not available and cannot convert non-pandas DataFrame")
252
+
243
253
  arrow_table = bridge.to_arrow(df, backend_type)
244
254
  pl_df = bridge.from_arrow(arrow_table, "polars")
245
255
  return pl_df
@@ -309,6 +319,12 @@ def from_polars(pl_df: 'pl.DataFrame', target_backend: BackendType) -> Any:
309
319
  # Convert via Arrow bridge
310
320
  try:
311
321
  bridge = get_arrow_bridge()
322
+ if bridge is None:
323
+ # Fallback: direct conversion for pandas
324
+ if target_backend == "pandas":
325
+ return pl_df.to_pandas()
326
+ raise RuntimeError("Arrow bridge not available and cannot convert to non-pandas DataFrame")
327
+
312
328
  arrow_table = bridge.to_arrow(pl_df, "polars")
313
329
  result_df = bridge.from_arrow(arrow_table, target_backend)
314
330
  return result_df
@@ -1,5 +1,5 @@
1
1
  """
2
- Distribution Strategies for Data Augmentation
2
+ Distribution Strategies for Synthetic Data Generation
3
3
 
4
4
  Provides statistical distribution-based data generation:
5
5
  - Normal (Gaussian) distribution
@@ -8,8 +8,8 @@ loaded on-demand using the existing .add file parser.
8
8
  Usage:
9
9
  from additory.common.sample_data import get_sample_dataset
10
10
 
11
- # For augment
12
- df = get_sample_dataset("augment", "sample")
11
+ # For synthetic
12
+ df = get_sample_dataset("synthetic", "sample")
13
13
 
14
14
  # For expressions (future)
15
15
  df = get_sample_dataset("expressions", "sample")
@@ -25,7 +25,7 @@ from additory.common.exceptions import ValidationError
25
25
 
26
26
 
27
27
  def get_sample_dataset(
28
- module: str = "augment",
28
+ module: str = "synthetic",
29
29
  block: str = "sample",
30
30
  dataset_type: str = "clean"
31
31
  ) -> pl.DataFrame:
@@ -33,12 +33,12 @@ def get_sample_dataset(
33
33
  Load a sample dataset from .add files.
34
34
 
35
35
  This function provides centralized access to sample datasets across
36
- all additory modules (augment, expressions, utilities). Sample datasets
36
+ all additory modules (synthetic, expressions, utilities). Sample datasets
37
37
  are stored as .add files in the reference/ directory structure.
38
38
 
39
39
  Args:
40
- module: Module name ("augment", "expressions", "utilities")
41
- block: Block name within the .add file ("sample" for augment)
40
+ module: Module name ("synthetic", "expressions", "utilities")
41
+ block: Block name within the .add file ("sample" for synthetic)
42
42
  dataset_type: Type of sample data ("clean" or "unclean")
43
43
 
44
44
  Returns:
@@ -48,8 +48,8 @@ def get_sample_dataset(
48
48
  ValidationError: If module, block, or dataset_type not found
49
49
 
50
50
  Examples:
51
- >>> # Load augment sample dataset
52
- >>> df = get_sample_dataset("augment", "sample")
51
+ >>> # Load synthetic sample dataset
52
+ >>> df = get_sample_dataset("synthetic", "sample")
53
53
  >>> print(df.shape)
54
54
  (50, 10)
55
55
 
@@ -57,7 +57,7 @@ def get_sample_dataset(
57
57
  >>> df = get_sample_dataset("expressions", "sample", "clean")
58
58
  >>> df_unclean = get_sample_dataset("expressions", "sample", "unclean")
59
59
 
60
- Sample Dataset Structure (augment):
60
+ Sample Dataset Structure (synthetic):
61
61
  - id: Sequential numeric IDs (1-50)
62
62
  - emp_id: Employee IDs with pattern (EMP_001 - EMP_050)
63
63
  - order_id: Order IDs with different padding (ORD_0001 - ORD_0050)
@@ -72,8 +72,8 @@ def get_sample_dataset(
72
72
  # Construct path to .add file
73
73
  base_path = Path(__file__).parent.parent.parent / "reference"
74
74
 
75
- if module == "augment":
76
- add_file_path = base_path / "augment_definitions" / f"{block}_0.1.add"
75
+ if module == "synthetic":
76
+ add_file_path = base_path / "synthetic_definitions" / f"{block}_0.1.add"
77
77
  elif module == "expressions":
78
78
  add_file_path = base_path / "expressions_definitions" / f"{block}_0.1.add"
79
79
  elif module == "utilities":
@@ -81,7 +81,7 @@ def get_sample_dataset(
81
81
  else:
82
82
  raise ValidationError(
83
83
  f"Unknown module '{module}'. "
84
- f"Valid modules: augment, expressions, utilities"
84
+ f"Valid modules: synthetic, expressions, utilities"
85
85
  )
86
86
 
87
87
  # Check if file exists
@@ -141,7 +141,7 @@ def list_available_samples() -> dict:
141
141
  >>> samples = list_available_samples()
142
142
  >>> print(samples)
143
143
  {
144
- 'augment': ['sample'],
144
+ 'synthetic': ['sample'],
145
145
  'expressions': ['sample'],
146
146
  'utilities': []
147
147
  }
@@ -149,15 +149,15 @@ def list_available_samples() -> dict:
149
149
  base_path = Path(__file__).parent.parent.parent / "reference"
150
150
  available = {}
151
151
 
152
- # Check augment
153
- augment_path = base_path / "augment_definitions"
154
- if augment_path.exists():
155
- available['augment'] = [
152
+ # Check synthetic
153
+ synthetic_path = base_path / "synthetic_definitions"
154
+ if synthetic_path.exists():
155
+ available['synthetic'] = [
156
156
  f.stem.rsplit('_', 1)[0] # Remove version suffix
157
- for f in augment_path.glob("*.add")
157
+ for f in synthetic_path.glob("*.add")
158
158
  ]
159
159
  else:
160
- available['augment'] = []
160
+ available['synthetic'] = []
161
161
 
162
162
  # Check expressions
163
163
  expressions_path = base_path / "expressions_definitions"
@@ -16,6 +16,13 @@ try:
16
16
  except ImportError as e:
17
17
  ARROW_AVAILABLE = False
18
18
  IMPORT_ERROR = str(e)
19
+ # Create dummy classes for type annotations
20
+ class pa:
21
+ Table = Any
22
+ class pl:
23
+ DataFrame = Any
24
+ class pd:
25
+ DataFrame = Any
19
26
 
20
27
  from ..logging import log_info, log_warning
21
28
  from .cudf_bridge import get_cudf_bridge
@@ -32,7 +32,10 @@ class PolarsExpressionEngine:
32
32
  """Exclusive Polars-based expression processing engine"""
33
33
 
34
34
  def __init__(self):
35
- self.arrow_bridge = EnhancedArrowBridge()
35
+ try:
36
+ self.arrow_bridge = EnhancedArrowBridge()
37
+ except ArrowBridgeError:
38
+ self.arrow_bridge = None
36
39
  self.execution_stats = {
37
40
  "total_executions": 0,
38
41
  "total_time_ms": 0.0,
@@ -68,14 +71,28 @@ class PolarsExpressionEngine:
68
71
  try:
69
72
  # Auto-detect backend if not specified
70
73
  if backend_type is None:
71
- backend_type = self.arrow_bridge.detect_backend(df)
74
+ if self.arrow_bridge:
75
+ backend_type = self.arrow_bridge.detect_backend(df)
76
+ else:
77
+ backend_type = "pandas" # fallback
72
78
 
73
79
  # Get memory usage before processing
74
- memory_before = self.arrow_bridge._get_memory_usage_mb()
80
+ if self.arrow_bridge:
81
+ memory_before = self.arrow_bridge._get_memory_usage_mb()
82
+ else:
83
+ memory_before = 0
75
84
 
76
85
  # 1. Convert input to Arrow
77
86
  log_info(f"[polars_engine] Converting {backend_type} to Arrow")
78
- arrow_table = self.arrow_bridge.to_arrow(df, backend_type)
87
+ if self.arrow_bridge:
88
+ arrow_table = self.arrow_bridge.to_arrow(df, backend_type)
89
+ else:
90
+ # Fallback: assume pandas and convert directly
91
+ import pandas as pd
92
+ if isinstance(df, pd.DataFrame):
93
+ arrow_table = pl.from_pandas(df).to_arrow()
94
+ else:
95
+ raise RuntimeError("Arrow bridge not available and input is not pandas DataFrame")
79
96
 
80
97
  # 2. Convert Arrow to Polars
81
98
  log_info("[polars_engine] Converting Arrow to Polars")
@@ -93,11 +110,18 @@ class PolarsExpressionEngine:
93
110
 
94
111
  # 5. Convert to original backend format
95
112
  log_info(f"[polars_engine] Converting Arrow to {backend_type}")
96
- final_result = self.arrow_bridge.from_arrow(result_arrow, backend_type)
113
+ if self.arrow_bridge:
114
+ final_result = self.arrow_bridge.from_arrow(result_arrow, backend_type)
115
+ else:
116
+ # Fallback: convert back to pandas
117
+ final_result = pl.from_arrow(result_arrow).to_pandas()
97
118
 
98
119
  # Calculate execution statistics
99
120
  execution_time = (datetime.now() - start_time).total_seconds() * 1000
100
- memory_after = self.arrow_bridge._get_memory_usage_mb()
121
+ if self.arrow_bridge:
122
+ memory_after = self.arrow_bridge._get_memory_usage_mb()
123
+ else:
124
+ memory_after = 0
101
125
  memory_used = max(0, memory_after - memory_before)
102
126
 
103
127
  # Update global statistics
@@ -122,7 +146,8 @@ class PolarsExpressionEngine:
122
146
 
123
147
  finally:
124
148
  # 6. Always cleanup Arrow memory
125
- self.arrow_bridge.cleanup_arrow_memory()
149
+ if self.arrow_bridge:
150
+ self.arrow_bridge.cleanup_arrow_memory()
126
151
 
127
152
  def _execute_polars_expression(self, polars_df: pl.DataFrame,
128
153
  expression: str, output_column: str) -> pl.DataFrame:
@@ -381,14 +406,28 @@ class PolarsExpressionEngine:
381
406
  try:
382
407
  # Auto-detect backend if not specified
383
408
  if backend_type is None:
384
- backend_type = self.arrow_bridge.detect_backend(df)
409
+ if self.arrow_bridge:
410
+ backend_type = self.arrow_bridge.detect_backend(df)
411
+ else:
412
+ backend_type = "pandas"
385
413
 
386
414
  # Get memory usage before processing
387
- memory_before = self.arrow_bridge._get_memory_usage_mb()
415
+ if self.arrow_bridge:
416
+ memory_before = self.arrow_bridge._get_memory_usage_mb()
417
+ else:
418
+ memory_before = 0
388
419
 
389
420
  # Convert to Polars via Arrow
390
- arrow_table = self.arrow_bridge.to_arrow(df, backend_type)
391
- polars_df = pl.from_arrow(arrow_table)
421
+ if self.arrow_bridge:
422
+ arrow_table = self.arrow_bridge.to_arrow(df, backend_type)
423
+ polars_df = pl.from_arrow(arrow_table)
424
+ else:
425
+ # Fallback: assume pandas
426
+ import pandas as pd
427
+ if isinstance(df, pd.DataFrame):
428
+ polars_df = pl.from_pandas(df)
429
+ else:
430
+ raise RuntimeError("Arrow bridge not available and input is not pandas DataFrame")
392
431
 
393
432
  # Execute using AST
394
433
  polars_expr = self._ast_to_polars_expr(ast_tree)
@@ -396,11 +435,17 @@ class PolarsExpressionEngine:
396
435
 
397
436
  # Convert back to original format
398
437
  result_arrow = result_df.to_arrow()
399
- final_result = self.arrow_bridge.from_arrow(result_arrow, backend_type)
438
+ if self.arrow_bridge:
439
+ final_result = self.arrow_bridge.from_arrow(result_arrow, backend_type)
440
+ else:
441
+ final_result = pl.from_arrow(result_arrow).to_pandas()
400
442
 
401
443
  # Calculate statistics
402
444
  execution_time = (datetime.now() - start_time).total_seconds() * 1000
403
- memory_after = self.arrow_bridge._get_memory_usage_mb()
445
+ if self.arrow_bridge:
446
+ memory_after = self.arrow_bridge._get_memory_usage_mb()
447
+ else:
448
+ memory_after = 0
404
449
  memory_used = max(0, memory_after - memory_before)
405
450
 
406
451
  # Update statistics
@@ -422,7 +467,8 @@ class PolarsExpressionEngine:
422
467
  raise PolarsExpressionError(f"AST execution failed: {e}")
423
468
 
424
469
  finally:
425
- self.arrow_bridge.cleanup_arrow_memory()
470
+ if self.arrow_bridge:
471
+ self.arrow_bridge.cleanup_arrow_memory()
426
472
 
427
473
  def validate_expression(self, expression: str) -> bool:
428
474
  """
@@ -489,7 +535,10 @@ class PolarsExpressionEngine:
489
535
  Benchmark results
490
536
  """
491
537
  times = []
492
- backend_type = self.arrow_bridge.detect_backend(df)
538
+ if self.arrow_bridge:
539
+ backend_type = self.arrow_bridge.detect_backend(df)
540
+ else:
541
+ backend_type = "pandas"
493
542
 
494
543
  for i in range(iterations):
495
544
  try:
@@ -532,7 +581,8 @@ class PolarsExpressionEngine:
532
581
  """Cleanup callback for memory manager"""
533
582
  try:
534
583
  # Cleanup Arrow bridge memory
535
- self.arrow_bridge.cleanup_arrow_memory()
584
+ if self.arrow_bridge:
585
+ self.arrow_bridge.cleanup_arrow_memory()
536
586
 
537
587
  # Reset statistics if they get too large
538
588
  if self.execution_stats["total_executions"] > 10000:
additory/dynamic_api.py CHANGED
@@ -15,9 +15,8 @@ class AdditoryAPI(SimpleNamespace):
15
15
  Main API class for Additory functionality.
16
16
 
17
17
  Provides access to:
18
- - add.augment() - Data augmentation
18
+ - add.synthetic() - Synthetic data generation
19
19
  - add.to() - Lookup/join operations
20
- - add.synth() - Synthetic data generation
21
20
  - add.scan() - Data profiling and analysis
22
21
  - add.my - User expressions
23
22
  - add.play() - Hidden games (for the curious 😉)
@@ -31,8 +30,8 @@ class AdditoryAPI(SimpleNamespace):
31
30
  self.my = ExpressionProxy(namespace="user")
32
31
  self._builtin_proxy = ExpressionProxy(namespace="builtin")
33
32
 
34
- # Explicitly set the augment method to prevent namespace conflicts
35
- self.augment = self._augment_method
33
+ # Explicitly set the synthetic method to prevent namespace conflicts
34
+ self.synthetic = self._synthetic_method
36
35
 
37
36
  def __getattr__(self, name):
38
37
  """
@@ -61,62 +60,62 @@ class AdditoryAPI(SimpleNamespace):
61
60
  except Exception:
62
61
  return False
63
62
 
64
- def _augment_method(self, df, n_rows=5, strategy="auto", seed=None, output_format="pandas", **kwargs):
63
+ def _synthetic_method(self, df, n_rows=5, strategy="auto", seed=None, output_format="pandas", **kwargs):
65
64
  """
66
- Augment a dataframe with additional rows or create data from scratch.
65
+ Generate synthetic data by extending a dataframe or creating from scratch.
67
66
 
68
67
  Three modes:
69
- 1. Augment mode: Pass a DataFrame to add rows
68
+ 1. Extend mode: Pass a DataFrame to add synthetic rows
70
69
  2. Create mode: Pass "@new" to create data from scratch
71
70
  3. Sample mode: Pass "@sample" to load sample data
72
71
 
73
72
  Args:
74
- df: DataFrame to augment, "@new" to create, or "@sample" for sample data
75
- n_rows: Number of rows (int for create/sample, int/float/str for augment)
76
- strategy: Strategy specification (dict for create, str/dict for augment)
73
+ df: DataFrame to extend, "@new" to create, or "@sample" for sample data
74
+ n_rows: Number of rows (int for create/sample, int/float/str for extend)
75
+ strategy: Strategy specification (dict for create, str/dict for extend)
77
76
  seed: Random seed for reproducibility
78
77
  output_format: Output format ("pandas", "polars", "cudf")
79
78
  **kwargs: Additional parameters
80
79
 
81
80
  Returns:
82
- Augmented or generated DataFrame
81
+ Extended or generated DataFrame
83
82
 
84
83
  Examples:
85
- # Augment existing data
86
- result = add.augment(df, n_rows=100, strategy='auto')
84
+ # Extend existing data
85
+ result = add.synthetic(df, n_rows=100, strategy='auto')
87
86
 
88
87
  # Create from scratch
89
- result = add.augment("@new", n_rows=100, strategy={'id': 'increment', 'age': 'range:18-65'})
88
+ result = add.synthetic("@new", n_rows=100, strategy={'id': 'increment', 'age': 'range:18-65'})
90
89
 
91
90
  # Load sample data
92
- result = add.augment("@sample", n_rows=50)
91
+ result = add.synthetic("@sample", n_rows=50)
93
92
  """
94
93
  # Store reference to restore after import (in the correct namespace)
95
94
  import additory
96
- original_augment = getattr(additory, 'augment', None)
95
+ original_synthetic = getattr(additory, 'synthetic', None)
97
96
 
98
97
  try:
99
98
  # Import and call the implementation
100
- from additory.augment.augmentor import augment as augment_impl
101
- result = augment_impl(df, n_rows=n_rows, strategy=strategy, seed=seed,
102
- output_format=output_format, **kwargs)
99
+ from additory.synthetic.synthesizer import synthetic as synthetic_impl
100
+ result = synthetic_impl(df, n_rows=n_rows, strategy=strategy, seed=seed,
101
+ output_format=output_format, **kwargs)
103
102
 
104
103
  # Restore the method reference in the additory module namespace
105
- # The import above will have overridden additory.augment with the module
104
+ # The import above will have overridden additory.synthetic with the module
106
105
  # We need to restore it to point to this method
107
- if original_augment is not None:
108
- additory.augment = original_augment
106
+ if original_synthetic is not None:
107
+ additory.synthetic = original_synthetic
109
108
  else:
110
- # If there was no original augment, set it to this method
111
- additory.augment = self._augment_method
109
+ # If there was no original synthetic, set it to this method
110
+ additory.synthetic = self._synthetic_method
112
111
 
113
112
  return result
114
113
  except Exception as e:
115
114
  # Restore the method reference even if there's an error
116
- if original_augment is not None:
117
- additory.augment = original_augment
115
+ if original_synthetic is not None:
116
+ additory.synthetic = original_synthetic
118
117
  else:
119
- additory.augment = self._augment_method
118
+ additory.synthetic = self._synthetic_method
120
119
  raise
121
120
 
122
121
  def to(self, target_df, from_df=None, bring=None, against=None, **kwargs):
@@ -140,25 +139,6 @@ class AdditoryAPI(SimpleNamespace):
140
139
  from additory.utilities.lookup import to
141
140
  return to(target_df, from_df, bring=bring, against=against, **kwargs)
142
141
 
143
- def synth(self, schema_path: str, rows: int = 1000, engine: Optional[str] = None):
144
- """
145
- Generate synthetic data from a schema file.
146
-
147
- Args:
148
- schema_path: Path to the .toml schema file
149
- rows: Number of rows to generate (default: 1000)
150
- engine: Output engine ("pandas" or "polars"). If None, uses default from config
151
-
152
- Returns:
153
- Generated DataFrame in the specified format
154
-
155
- Example:
156
- df = add.synth("customer.toml", rows=5000)
157
- df = add.synth("customer.toml", rows=5000, engine="polars")
158
- """
159
- from additory.synthetic.api import synth as synth_impl
160
- return synth_impl(schema_path, rows, engine)
161
-
162
142
  def onehotencoding(self, df, columns=None, **kwargs):
163
143
  """
164
144
  One-hot encode categorical columns.
@@ -279,6 +259,22 @@ class AdditoryAPI(SimpleNamespace):
279
259
  verbose=verbose
280
260
  )
281
261
 
262
+ def games(self):
263
+ """
264
+ List available games! 🎮
265
+
266
+ Returns a list of games you can play with add.play().
267
+
268
+ Returns:
269
+ List of available game names
270
+
271
+ Example:
272
+ >>> import additory
273
+ >>> additory.add.games()
274
+ ['tictactoe', 'sudoku']
275
+ """
276
+ return ['tictactoe', 'sudoku']
277
+
282
278
  def play(self, game: str = "tictactoe"):
283
279
  """
284
280
  Play a game! 🎮
@@ -287,7 +287,10 @@ class EnhancedExpressionProxy:
287
287
  backend_type = "polars"
288
288
  else:
289
289
  # Try to detect other types
290
- backend_type = self.polars_engine.arrow_bridge.detect_backend(df)
290
+ if self.polars_engine.arrow_bridge:
291
+ backend_type = self.polars_engine.arrow_bridge.detect_backend(df)
292
+ else:
293
+ backend_type = "pandas" # fallback
291
294
 
292
295
  # Execute using Polars engine
293
296
  result = self.polars_engine.execute_expression(
@@ -1,101 +1,13 @@
1
1
  """
2
- Additory Synthetic Data Generation Module
2
+ Synthetic Module - Synthetic Data Generation Functionality
3
3
 
4
- This module provides polars-native synthetic data generation using regex patterns
5
- and distribution strategies. It supports hierarchical pattern resolution and
6
- industry-standard file formats (.properties and .toml).
4
+ This module provides synthetic data generation capabilities to add synthetic rows
5
+ to existing dataframes or create data from scratch by intelligently sampling
6
+ from existing data patterns.
7
7
  """
8
8
 
9
- from .api import (
10
- synth,
11
- config,
12
- register_distribution_engine,
13
- unregister_distribution_engine,
14
- list_custom_distribution_engines
15
- )
16
- from .exceptions import (
17
- SyntheticDataError,
18
- PatternResolutionError,
19
- ValidationError,
20
- DistributionError,
21
- FileFormatError,
22
- PatternImportError,
23
- SchemaParsingError
24
- )
25
- from .pattern_resolver import PatternHierarchyResolver, ResolutionTrace, PatternResolutionResult
26
- from .engines import (
27
- DistributionEngine,
28
- DistributionEngineFactory,
29
- DistributionManager,
30
- DistributionConfig,
31
- )
32
- from .generator import (
33
- RegexGenerator,
34
- PolarsGeneratorCore,
35
- OutputConverter,
36
- SyntheticDataGenerator,
37
- GenerationConfig,
38
- )
39
- from .performance import (
40
- PerformanceMonitor,
41
- PerformanceOptimizer,
42
- PerformanceMetrics,
43
- PerformanceComparison,
44
- performance_monitor,
45
- performance_optimizer
46
- )
47
- from .polars_integration import (
48
- PolarsIntegrationLayer,
49
- optimize_conversion,
50
- enhance_result,
51
- optimize_context,
52
- apply_expression,
53
- optimize_memory,
54
- validate_compatibility,
55
- get_integration_stats,
56
- cleanup_integration,
57
- benchmark_integration
58
- )
9
+ from additory.synthetic.synthesizer import synthetic
59
10
 
60
11
  __all__ = [
61
- 'synth',
62
- 'config',
63
- 'register_distribution_engine',
64
- 'unregister_distribution_engine',
65
- 'list_custom_distribution_engines',
66
- 'SyntheticDataError',
67
- 'PatternResolutionError',
68
- 'ValidationError',
69
- 'DistributionError',
70
- 'FileFormatError',
71
- 'PatternImportError',
72
- 'SchemaParsingError',
73
- 'PatternHierarchyResolver',
74
- 'ResolutionTrace',
75
- 'PatternResolutionResult',
76
- 'DistributionEngine',
77
- 'DistributionEngineFactory',
78
- 'DistributionManager',
79
- 'DistributionConfig',
80
- 'RegexGenerator',
81
- 'PolarsGeneratorCore',
82
- 'OutputConverter',
83
- 'SyntheticDataGenerator',
84
- 'GenerationConfig',
85
- 'PerformanceMonitor',
86
- 'PerformanceOptimizer',
87
- 'PerformanceMetrics',
88
- 'PerformanceComparison',
89
- 'performance_monitor',
90
- 'performance_optimizer',
91
- 'PolarsIntegrationLayer',
92
- 'optimize_conversion',
93
- 'enhance_result',
94
- 'optimize_context',
95
- 'apply_expression',
96
- 'optimize_memory',
97
- 'validate_compatibility',
98
- 'get_integration_stats',
99
- 'cleanup_integration',
100
- 'benchmark_integration'
101
- ]
12
+ "synthetic"
13
+ ]