additory 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. additory/__init__.py +15 -0
  2. additory/analysis/__init__.py +48 -0
  3. additory/analysis/cardinality.py +126 -0
  4. additory/analysis/correlations.py +124 -0
  5. additory/analysis/distributions.py +376 -0
  6. additory/analysis/quality.py +158 -0
  7. additory/analysis/scan.py +400 -0
  8. additory/augment/__init__.py +24 -0
  9. additory/augment/augmentor.py +653 -0
  10. additory/augment/builtin_lists.py +430 -0
  11. additory/augment/distributions.py +22 -0
  12. additory/augment/forecast.py +1132 -0
  13. additory/augment/list_registry.py +177 -0
  14. additory/augment/smote.py +320 -0
  15. additory/augment/strategies.py +883 -0
  16. additory/common/__init__.py +157 -0
  17. additory/common/backend.py +355 -0
  18. additory/common/column_utils.py +191 -0
  19. additory/common/distributions.py +737 -0
  20. additory/common/exceptions.py +62 -0
  21. additory/common/lists.py +229 -0
  22. additory/common/patterns.py +240 -0
  23. additory/common/resolver.py +567 -0
  24. additory/common/sample_data.py +182 -0
  25. additory/common/validation.py +197 -0
  26. additory/core/__init__.py +27 -0
  27. additory/core/ast_builder.py +165 -0
  28. additory/core/backends/__init__.py +23 -0
  29. additory/core/backends/arrow_bridge.py +476 -0
  30. additory/core/backends/cudf_bridge.py +355 -0
  31. additory/core/column_positioning.py +358 -0
  32. additory/core/compiler_polars.py +166 -0
  33. additory/core/config.py +342 -0
  34. additory/core/enhanced_cache_manager.py +1119 -0
  35. additory/core/enhanced_matchers.py +473 -0
  36. additory/core/enhanced_version_manager.py +325 -0
  37. additory/core/executor.py +59 -0
  38. additory/core/integrity_manager.py +477 -0
  39. additory/core/loader.py +190 -0
  40. additory/core/logging.py +24 -0
  41. additory/core/memory_manager.py +547 -0
  42. additory/core/namespace_manager.py +657 -0
  43. additory/core/parser.py +176 -0
  44. additory/core/polars_expression_engine.py +551 -0
  45. additory/core/registry.py +176 -0
  46. additory/core/sample_data_manager.py +492 -0
  47. additory/core/user_namespace.py +751 -0
  48. additory/core/validator.py +27 -0
  49. additory/dynamic_api.py +308 -0
  50. additory/expressions/__init__.py +26 -0
  51. additory/expressions/engine.py +551 -0
  52. additory/expressions/parser.py +176 -0
  53. additory/expressions/proxy.py +546 -0
  54. additory/expressions/registry.py +313 -0
  55. additory/expressions/samples.py +492 -0
  56. additory/synthetic/__init__.py +101 -0
  57. additory/synthetic/api.py +220 -0
  58. additory/synthetic/common_integration.py +314 -0
  59. additory/synthetic/config.py +262 -0
  60. additory/synthetic/engines.py +529 -0
  61. additory/synthetic/exceptions.py +180 -0
  62. additory/synthetic/file_managers.py +518 -0
  63. additory/synthetic/generator.py +702 -0
  64. additory/synthetic/generator_parser.py +68 -0
  65. additory/synthetic/integration.py +319 -0
  66. additory/synthetic/models.py +241 -0
  67. additory/synthetic/pattern_resolver.py +573 -0
  68. additory/synthetic/performance.py +469 -0
  69. additory/synthetic/polars_integration.py +464 -0
  70. additory/synthetic/proxy.py +60 -0
  71. additory/synthetic/schema_parser.py +685 -0
  72. additory/synthetic/validator.py +553 -0
  73. additory/utilities/__init__.py +53 -0
  74. additory/utilities/encoding.py +600 -0
  75. additory/utilities/games.py +300 -0
  76. additory/utilities/keys.py +8 -0
  77. additory/utilities/lookup.py +103 -0
  78. additory/utilities/matchers.py +216 -0
  79. additory/utilities/resolvers.py +286 -0
  80. additory/utilities/settings.py +167 -0
  81. additory/utilities/units.py +746 -0
  82. additory/utilities/validators.py +153 -0
  83. additory-0.1.0a1.dist-info/METADATA +293 -0
  84. additory-0.1.0a1.dist-info/RECORD +87 -0
  85. additory-0.1.0a1.dist-info/WHEEL +5 -0
  86. additory-0.1.0a1.dist-info/licenses/LICENSE +21 -0
  87. additory-0.1.0a1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,27 @@
1
+ """
2
+ Expression Validator
3
+
4
+ Validates expression syntax and structure.
5
+ """
6
+
7
+ from typing import Dict, Any, List, Tuple
8
+
9
+
10
+ def validate_expression(expression: str) -> Tuple[bool, List[str]]:
11
+ """
12
+ Validate an expression.
13
+
14
+ Args:
15
+ expression: Expression string to validate
16
+
17
+ Returns:
18
+ Tuple of (is_valid, error_messages)
19
+ """
20
+ errors = []
21
+
22
+ if not expression or not expression.strip():
23
+ errors.append("Expression cannot be empty")
24
+ return (False, errors)
25
+
26
+ # Basic validation - can be expanded later
27
+ return (True, [])
@@ -0,0 +1,308 @@
1
+ """
2
+ Dynamic API for Additory
3
+
4
+ This module provides the main API interface with dynamic attribute access.
5
+ """
6
+
7
+ from types import SimpleNamespace
8
+ from typing import Union, Optional, List, Any
9
+ import pandas as pd
10
+ import polars as pl
11
+
12
+
13
+ class AdditoryAPI(SimpleNamespace):
14
+ """
15
+ Main API class for Additory functionality.
16
+
17
+ Provides access to:
18
+ - add.augment() - Data augmentation
19
+ - add.to() - Lookup/join operations
20
+ - add.synth() - Synthetic data generation
21
+ - add.scan() - Data profiling and analysis
22
+ - add.my - User expressions
23
+ - add.play() - Hidden games (for the curious 😉)
24
+ - Expression evaluation
25
+ """
26
+
27
+ def __init__(self):
28
+ super().__init__()
29
+ # Initialize expression proxies
30
+ from additory.expressions.proxy import ExpressionProxy
31
+ self.my = ExpressionProxy(namespace="user")
32
+ self._builtin_proxy = ExpressionProxy(namespace="builtin")
33
+
34
+ # Explicitly set the augment method to prevent namespace conflicts
35
+ self.augment = self._augment_method
36
+
37
+ def __getattr__(self, name):
38
+ """
39
+ Dynamic attribute access for expressions.
40
+
41
+ Checks built-in expressions first, then user expressions.
42
+ This ensures built-in expressions take precedence.
43
+ """
44
+ # Check if it's a built-in expression first
45
+ if self._expression_exists(self._builtin_proxy, name):
46
+ return getattr(self._builtin_proxy, name)
47
+
48
+ # Check if it's a user expression
49
+ if self._expression_exists(self.my, name):
50
+ return getattr(self.my, name)
51
+
52
+ # If not found, raise AttributeError
53
+ raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}'")
54
+
55
+ def _expression_exists(self, proxy, name):
56
+ """Check if an expression exists in a proxy's namespace"""
57
+ try:
58
+ # List expressions in the proxy's namespace
59
+ expr_list = proxy.list_expressions()
60
+ return name in expr_list.get("expressions", {})
61
+ except Exception:
62
+ return False
63
+
64
+ def _augment_method(self, df, n_rows=5, strategy="auto", seed=None, output_format="pandas", **kwargs):
65
+ """
66
+ Augment a dataframe with additional rows or create data from scratch.
67
+
68
+ Three modes:
69
+ 1. Augment mode: Pass a DataFrame to add rows
70
+ 2. Create mode: Pass "@new" to create data from scratch
71
+ 3. Sample mode: Pass "@sample" to load sample data
72
+
73
+ Args:
74
+ df: DataFrame to augment, "@new" to create, or "@sample" for sample data
75
+ n_rows: Number of rows (int for create/sample, int/float/str for augment)
76
+ strategy: Strategy specification (dict for create, str/dict for augment)
77
+ seed: Random seed for reproducibility
78
+ output_format: Output format ("pandas", "polars", "cudf")
79
+ **kwargs: Additional parameters
80
+
81
+ Returns:
82
+ Augmented or generated DataFrame
83
+
84
+ Examples:
85
+ # Augment existing data
86
+ result = add.augment(df, n_rows=100, strategy='auto')
87
+
88
+ # Create from scratch
89
+ result = add.augment("@new", n_rows=100, strategy={'id': 'increment', 'age': 'range:18-65'})
90
+
91
+ # Load sample data
92
+ result = add.augment("@sample", n_rows=50)
93
+ """
94
+ # Store reference to restore after import (in the correct namespace)
95
+ import additory
96
+ original_augment = getattr(additory, 'augment', None)
97
+
98
+ try:
99
+ # Import and call the implementation
100
+ from additory.augment.augmentor import augment as augment_impl
101
+ result = augment_impl(df, n_rows=n_rows, strategy=strategy, seed=seed,
102
+ output_format=output_format, **kwargs)
103
+
104
+ # Restore the method reference in the additory module namespace
105
+ # The import above will have overridden additory.augment with the module
106
+ # We need to restore it to point to this method
107
+ if original_augment is not None:
108
+ additory.augment = original_augment
109
+ else:
110
+ # If there was no original augment, set it to this method
111
+ additory.augment = self._augment_method
112
+
113
+ return result
114
+ except Exception as e:
115
+ # Restore the method reference even if there's an error
116
+ if original_augment is not None:
117
+ additory.augment = original_augment
118
+ else:
119
+ additory.augment = self._augment_method
120
+ raise
121
+
122
+ def to(self, target_df, from_df=None, bring=None, against=None, **kwargs):
123
+ """
124
+ Add columns from reference dataframe to target dataframe.
125
+
126
+ Args:
127
+ target_df: Target dataframe to add columns to
128
+ from_df: Reference dataframe to get columns from
129
+ bring: Column(s) to bring from reference dataframe (str or list)
130
+ against: Column(s) to match on (str or list)
131
+ **kwargs: Additional parameters
132
+
133
+ Returns:
134
+ Target dataframe with new columns added
135
+
136
+ Example:
137
+ result = add.to(orders_df, from_df=products_df, bring='price', against='product_id')
138
+ result = add.to(orders_df, from_df=products_df, bring=['price', 'name'], against='product_id')
139
+ """
140
+ from additory.utilities.lookup import to
141
+ return to(target_df, from_df, bring=bring, against=against, **kwargs)
142
+
143
+ def synth(self, schema_path: str, rows: int = 1000, engine: Optional[str] = None):
144
+ """
145
+ Generate synthetic data from a schema file.
146
+
147
+ Args:
148
+ schema_path: Path to the .toml schema file
149
+ rows: Number of rows to generate (default: 1000)
150
+ engine: Output engine ("pandas" or "polars"). If None, uses default from config
151
+
152
+ Returns:
153
+ Generated DataFrame in the specified format
154
+
155
+ Example:
156
+ df = add.synth("customer.toml", rows=5000)
157
+ df = add.synth("customer.toml", rows=5000, engine="polars")
158
+ """
159
+ from additory.synthetic.api import synth as synth_impl
160
+ return synth_impl(schema_path, rows, engine)
161
+
162
+ def onehotencoding(self, df, columns=None, **kwargs):
163
+ """
164
+ One-hot encode categorical columns.
165
+
166
+ Args:
167
+ df: Input dataframe
168
+ columns: Column to encode (single column name as string)
169
+ **kwargs: Additional parameters
170
+
171
+ Returns:
172
+ DataFrame with one-hot encoded columns
173
+ """
174
+ from additory.utilities.encoding import onehotencoding
175
+ return onehotencoding(df, column=columns, **kwargs)
176
+
177
+ def harmonize_units(self, df, value_column, unit_column, target_unit=None, position="end", **kwargs):
178
+ """
179
+ Harmonize units in a dataframe.
180
+
181
+ Args:
182
+ df: Input dataframe
183
+ value_column: Column containing numeric values
184
+ unit_column: Column containing unit strings
185
+ target_unit: Target unit to convert to (auto-detected if None)
186
+ position: Where to place new columns ("end", "start", etc.)
187
+ **kwargs: Additional parameters
188
+
189
+ Returns:
190
+ DataFrame with harmonized units
191
+
192
+ Example:
193
+ result = add.harmonize_units(df, value_column='weight', unit_column='unit')
194
+ result = add.harmonize_units(df, value_column='temp', unit_column='unit', target_unit='F')
195
+ """
196
+ from additory.utilities.units import harmonize_units
197
+ return harmonize_units(df, value_column, unit_column, target_unit, position, **kwargs)
198
+
199
+ def scan(
200
+ self,
201
+ df: Union[pl.DataFrame, pd.DataFrame, Any],
202
+ preset: Optional[str] = None,
203
+ detect_distributions: bool = True,
204
+ detect_correlations: bool = True,
205
+ detect_cardinality: bool = True,
206
+ top_n_distributions: int = 3,
207
+ correlation_methods: List[str] = None,
208
+ correlation_threshold: float = 0.3,
209
+ cardinality_top_n: int = 10,
210
+ verbose: bool = True
211
+ ):
212
+ """
213
+ Scan a DataFrame to detect distributions, correlations, and cardinality.
214
+
215
+ Accepts pandas, polars, or cuDF DataFrames. Automatically converts to Polars
216
+ for processing. Returns ScanResult with analysis results.
217
+
218
+ This function provides comprehensive data profiling including:
219
+ - Distribution detection for numeric columns
220
+ - Correlation analysis between columns
221
+ - Cardinality analysis (unique values)
222
+ - Data quality metrics
223
+
224
+ Args:
225
+ df: DataFrame to analyze (pandas, polars, or cuDF)
226
+ preset: Optional preset ('quick', 'distributions', 'correlations', 'full', 'minimal')
227
+ detect_distributions: Whether to detect distributions (default: True)
228
+ detect_correlations: Whether to calculate correlations (default: True)
229
+ detect_cardinality: Whether to analyze cardinality (default: True)
230
+ top_n_distributions: Number of top distributions to return per column (default: 3)
231
+ correlation_methods: Correlation methods to use (default: ['pearson', 'spearman'])
232
+ correlation_threshold: Minimum correlation to report (default: 0.3)
233
+ cardinality_top_n: Number of top values to return per column (default: 10)
234
+ verbose: Whether to print progress messages (default: True)
235
+
236
+ Returns:
237
+ ScanResult object containing all analysis results
238
+
239
+ Presets:
240
+ - 'quick': Quality + cardinality only (fast)
241
+ - 'distributions': Distribution detection only
242
+ - 'correlations': Correlation analysis only
243
+ - 'full': All analyses enabled
244
+ - 'minimal': Quality metrics only (fastest)
245
+
246
+ Example:
247
+ >>> import pandas as pd
248
+ >>> import additory as add
249
+ >>>
250
+ >>> # Works with pandas
251
+ >>> df = pd.DataFrame({
252
+ ... 'age': [25, 30, 35, 40, 45],
253
+ ... 'income': [50000, 60000, 70000, 80000, 90000],
254
+ ... 'category': ['A', 'B', 'A', 'B', 'A']
255
+ ... })
256
+ >>>
257
+ >>> result = add.scan(df)
258
+ >>> print(result.summary())
259
+ >>>
260
+ >>> # Use presets
261
+ >>> result = add.scan(df, preset='quick')
262
+ >>> result = add.scan(df, preset='distributions', top_n_distributions=5)
263
+ """
264
+ from additory.analysis.scan import scan as scan_impl
265
+
266
+ if correlation_methods is None:
267
+ correlation_methods = ['pearson', 'spearman']
268
+
269
+ return scan_impl(
270
+ df,
271
+ preset=preset,
272
+ detect_distributions_flag=detect_distributions,
273
+ detect_correlations_flag=detect_correlations,
274
+ detect_cardinality_flag=detect_cardinality,
275
+ top_n_distributions=top_n_distributions,
276
+ correlation_methods=correlation_methods,
277
+ correlation_threshold=correlation_threshold,
278
+ cardinality_top_n=cardinality_top_n,
279
+ verbose=verbose
280
+ )
281
+
282
+ def play(self, game: str = "tictactoe"):
283
+ """
284
+ Play a game! 🎮
285
+
286
+ Hidden feature for the curious. Reinforces row-column thinking.
287
+
288
+ Available games:
289
+ - 'tictactoe' or 'ttt': Play Tic-Tac-Toe
290
+ - 'sudoku': Play Sudoku
291
+
292
+ Args:
293
+ game: Name of the game to play (default: 'tictactoe')
294
+
295
+ Example:
296
+ >>> import additory
297
+ >>> additory.add.play('tictactoe')
298
+ >>> additory.add.play('sudoku')
299
+ """
300
+ from additory.utilities.games import play as play_impl
301
+ return play_impl(game)
302
+
303
+
304
+ # Create the singleton API instance
305
+ add = AdditoryAPI()
306
+
307
+ # Export the instance
308
+ __all__ = ['add']
@@ -0,0 +1,26 @@
1
+ # additory/expressions/__init__.py
2
+ # Expression system - .add file driven functionality
3
+
4
+ """
5
+ Expression System Module
6
+
7
+ This module handles .add file driven functionality including:
8
+ - Expression parsing and compilation
9
+ - Polars-based expression execution
10
+ - Expression caching and versioning
11
+ - Sample data management
12
+ - Namespace support (builtin vs user)
13
+ """
14
+
15
+ # Core expression functionality will be imported here after migration
16
+ # from .proxy import EnhancedExpressionProxy
17
+ # from .engine import PolarsExpressionEngine
18
+ # from .parser import ExpressionParser
19
+ # from .compiler import ExpressionCompiler
20
+ # from .executor import ExpressionExecutor
21
+ # from .registry import ExpressionRegistry
22
+ # from .samples import SampleDataManager
23
+
24
+ __all__ = [
25
+ # Will be populated after migration
26
+ ]