additory 0.1.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- additory/__init__.py +15 -0
- additory/analysis/__init__.py +48 -0
- additory/analysis/cardinality.py +126 -0
- additory/analysis/correlations.py +124 -0
- additory/analysis/distributions.py +376 -0
- additory/analysis/quality.py +158 -0
- additory/analysis/scan.py +400 -0
- additory/augment/__init__.py +24 -0
- additory/augment/augmentor.py +653 -0
- additory/augment/builtin_lists.py +430 -0
- additory/augment/distributions.py +22 -0
- additory/augment/forecast.py +1132 -0
- additory/augment/list_registry.py +177 -0
- additory/augment/smote.py +320 -0
- additory/augment/strategies.py +883 -0
- additory/common/__init__.py +157 -0
- additory/common/backend.py +355 -0
- additory/common/column_utils.py +191 -0
- additory/common/distributions.py +737 -0
- additory/common/exceptions.py +62 -0
- additory/common/lists.py +229 -0
- additory/common/patterns.py +240 -0
- additory/common/resolver.py +567 -0
- additory/common/sample_data.py +182 -0
- additory/common/validation.py +197 -0
- additory/core/__init__.py +27 -0
- additory/core/ast_builder.py +165 -0
- additory/core/backends/__init__.py +23 -0
- additory/core/backends/arrow_bridge.py +476 -0
- additory/core/backends/cudf_bridge.py +355 -0
- additory/core/column_positioning.py +358 -0
- additory/core/compiler_polars.py +166 -0
- additory/core/config.py +342 -0
- additory/core/enhanced_cache_manager.py +1119 -0
- additory/core/enhanced_matchers.py +473 -0
- additory/core/enhanced_version_manager.py +325 -0
- additory/core/executor.py +59 -0
- additory/core/integrity_manager.py +477 -0
- additory/core/loader.py +190 -0
- additory/core/logging.py +24 -0
- additory/core/memory_manager.py +547 -0
- additory/core/namespace_manager.py +657 -0
- additory/core/parser.py +176 -0
- additory/core/polars_expression_engine.py +551 -0
- additory/core/registry.py +176 -0
- additory/core/sample_data_manager.py +492 -0
- additory/core/user_namespace.py +751 -0
- additory/core/validator.py +27 -0
- additory/dynamic_api.py +308 -0
- additory/expressions/__init__.py +26 -0
- additory/expressions/engine.py +551 -0
- additory/expressions/parser.py +176 -0
- additory/expressions/proxy.py +546 -0
- additory/expressions/registry.py +313 -0
- additory/expressions/samples.py +492 -0
- additory/synthetic/__init__.py +101 -0
- additory/synthetic/api.py +220 -0
- additory/synthetic/common_integration.py +314 -0
- additory/synthetic/config.py +262 -0
- additory/synthetic/engines.py +529 -0
- additory/synthetic/exceptions.py +180 -0
- additory/synthetic/file_managers.py +518 -0
- additory/synthetic/generator.py +702 -0
- additory/synthetic/generator_parser.py +68 -0
- additory/synthetic/integration.py +319 -0
- additory/synthetic/models.py +241 -0
- additory/synthetic/pattern_resolver.py +573 -0
- additory/synthetic/performance.py +469 -0
- additory/synthetic/polars_integration.py +464 -0
- additory/synthetic/proxy.py +60 -0
- additory/synthetic/schema_parser.py +685 -0
- additory/synthetic/validator.py +553 -0
- additory/utilities/__init__.py +53 -0
- additory/utilities/encoding.py +600 -0
- additory/utilities/games.py +300 -0
- additory/utilities/keys.py +8 -0
- additory/utilities/lookup.py +103 -0
- additory/utilities/matchers.py +216 -0
- additory/utilities/resolvers.py +286 -0
- additory/utilities/settings.py +167 -0
- additory/utilities/units.py +746 -0
- additory/utilities/validators.py +153 -0
- additory-0.1.0a1.dist-info/METADATA +293 -0
- additory-0.1.0a1.dist-info/RECORD +87 -0
- additory-0.1.0a1.dist-info/WHEEL +5 -0
- additory-0.1.0a1.dist-info/licenses/LICENSE +21 -0
- additory-0.1.0a1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Expression Validator
|
|
3
|
+
|
|
4
|
+
Validates expression syntax and structure.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import Dict, Any, List, Tuple
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def validate_expression(expression: str) -> Tuple[bool, List[str]]:
|
|
11
|
+
"""
|
|
12
|
+
Validate an expression.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
expression: Expression string to validate
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
Tuple of (is_valid, error_messages)
|
|
19
|
+
"""
|
|
20
|
+
errors = []
|
|
21
|
+
|
|
22
|
+
if not expression or not expression.strip():
|
|
23
|
+
errors.append("Expression cannot be empty")
|
|
24
|
+
return (False, errors)
|
|
25
|
+
|
|
26
|
+
# Basic validation - can be expanded later
|
|
27
|
+
return (True, [])
|
additory/dynamic_api.py
ADDED
|
@@ -0,0 +1,308 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Dynamic API for Additory
|
|
3
|
+
|
|
4
|
+
This module provides the main API interface with dynamic attribute access.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from types import SimpleNamespace
|
|
8
|
+
from typing import Union, Optional, List, Any
|
|
9
|
+
import pandas as pd
|
|
10
|
+
import polars as pl
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class AdditoryAPI(SimpleNamespace):
|
|
14
|
+
"""
|
|
15
|
+
Main API class for Additory functionality.
|
|
16
|
+
|
|
17
|
+
Provides access to:
|
|
18
|
+
- add.augment() - Data augmentation
|
|
19
|
+
- add.to() - Lookup/join operations
|
|
20
|
+
- add.synth() - Synthetic data generation
|
|
21
|
+
- add.scan() - Data profiling and analysis
|
|
22
|
+
- add.my - User expressions
|
|
23
|
+
- add.play() - Hidden games (for the curious 😉)
|
|
24
|
+
- Expression evaluation
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
def __init__(self):
|
|
28
|
+
super().__init__()
|
|
29
|
+
# Initialize expression proxies
|
|
30
|
+
from additory.expressions.proxy import ExpressionProxy
|
|
31
|
+
self.my = ExpressionProxy(namespace="user")
|
|
32
|
+
self._builtin_proxy = ExpressionProxy(namespace="builtin")
|
|
33
|
+
|
|
34
|
+
# Explicitly set the augment method to prevent namespace conflicts
|
|
35
|
+
self.augment = self._augment_method
|
|
36
|
+
|
|
37
|
+
def __getattr__(self, name):
|
|
38
|
+
"""
|
|
39
|
+
Dynamic attribute access for expressions.
|
|
40
|
+
|
|
41
|
+
Checks built-in expressions first, then user expressions.
|
|
42
|
+
This ensures built-in expressions take precedence.
|
|
43
|
+
"""
|
|
44
|
+
# Check if it's a built-in expression first
|
|
45
|
+
if self._expression_exists(self._builtin_proxy, name):
|
|
46
|
+
return getattr(self._builtin_proxy, name)
|
|
47
|
+
|
|
48
|
+
# Check if it's a user expression
|
|
49
|
+
if self._expression_exists(self.my, name):
|
|
50
|
+
return getattr(self.my, name)
|
|
51
|
+
|
|
52
|
+
# If not found, raise AttributeError
|
|
53
|
+
raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}'")
|
|
54
|
+
|
|
55
|
+
def _expression_exists(self, proxy, name):
|
|
56
|
+
"""Check if an expression exists in a proxy's namespace"""
|
|
57
|
+
try:
|
|
58
|
+
# List expressions in the proxy's namespace
|
|
59
|
+
expr_list = proxy.list_expressions()
|
|
60
|
+
return name in expr_list.get("expressions", {})
|
|
61
|
+
except Exception:
|
|
62
|
+
return False
|
|
63
|
+
|
|
64
|
+
def _augment_method(self, df, n_rows=5, strategy="auto", seed=None, output_format="pandas", **kwargs):
|
|
65
|
+
"""
|
|
66
|
+
Augment a dataframe with additional rows or create data from scratch.
|
|
67
|
+
|
|
68
|
+
Three modes:
|
|
69
|
+
1. Augment mode: Pass a DataFrame to add rows
|
|
70
|
+
2. Create mode: Pass "@new" to create data from scratch
|
|
71
|
+
3. Sample mode: Pass "@sample" to load sample data
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
df: DataFrame to augment, "@new" to create, or "@sample" for sample data
|
|
75
|
+
n_rows: Number of rows (int for create/sample, int/float/str for augment)
|
|
76
|
+
strategy: Strategy specification (dict for create, str/dict for augment)
|
|
77
|
+
seed: Random seed for reproducibility
|
|
78
|
+
output_format: Output format ("pandas", "polars", "cudf")
|
|
79
|
+
**kwargs: Additional parameters
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
Augmented or generated DataFrame
|
|
83
|
+
|
|
84
|
+
Examples:
|
|
85
|
+
# Augment existing data
|
|
86
|
+
result = add.augment(df, n_rows=100, strategy='auto')
|
|
87
|
+
|
|
88
|
+
# Create from scratch
|
|
89
|
+
result = add.augment("@new", n_rows=100, strategy={'id': 'increment', 'age': 'range:18-65'})
|
|
90
|
+
|
|
91
|
+
# Load sample data
|
|
92
|
+
result = add.augment("@sample", n_rows=50)
|
|
93
|
+
"""
|
|
94
|
+
# Store reference to restore after import (in the correct namespace)
|
|
95
|
+
import additory
|
|
96
|
+
original_augment = getattr(additory, 'augment', None)
|
|
97
|
+
|
|
98
|
+
try:
|
|
99
|
+
# Import and call the implementation
|
|
100
|
+
from additory.augment.augmentor import augment as augment_impl
|
|
101
|
+
result = augment_impl(df, n_rows=n_rows, strategy=strategy, seed=seed,
|
|
102
|
+
output_format=output_format, **kwargs)
|
|
103
|
+
|
|
104
|
+
# Restore the method reference in the additory module namespace
|
|
105
|
+
# The import above will have overridden additory.augment with the module
|
|
106
|
+
# We need to restore it to point to this method
|
|
107
|
+
if original_augment is not None:
|
|
108
|
+
additory.augment = original_augment
|
|
109
|
+
else:
|
|
110
|
+
# If there was no original augment, set it to this method
|
|
111
|
+
additory.augment = self._augment_method
|
|
112
|
+
|
|
113
|
+
return result
|
|
114
|
+
except Exception as e:
|
|
115
|
+
# Restore the method reference even if there's an error
|
|
116
|
+
if original_augment is not None:
|
|
117
|
+
additory.augment = original_augment
|
|
118
|
+
else:
|
|
119
|
+
additory.augment = self._augment_method
|
|
120
|
+
raise
|
|
121
|
+
|
|
122
|
+
def to(self, target_df, from_df=None, bring=None, against=None, **kwargs):
|
|
123
|
+
"""
|
|
124
|
+
Add columns from reference dataframe to target dataframe.
|
|
125
|
+
|
|
126
|
+
Args:
|
|
127
|
+
target_df: Target dataframe to add columns to
|
|
128
|
+
from_df: Reference dataframe to get columns from
|
|
129
|
+
bring: Column(s) to bring from reference dataframe (str or list)
|
|
130
|
+
against: Column(s) to match on (str or list)
|
|
131
|
+
**kwargs: Additional parameters
|
|
132
|
+
|
|
133
|
+
Returns:
|
|
134
|
+
Target dataframe with new columns added
|
|
135
|
+
|
|
136
|
+
Example:
|
|
137
|
+
result = add.to(orders_df, from_df=products_df, bring='price', against='product_id')
|
|
138
|
+
result = add.to(orders_df, from_df=products_df, bring=['price', 'name'], against='product_id')
|
|
139
|
+
"""
|
|
140
|
+
from additory.utilities.lookup import to
|
|
141
|
+
return to(target_df, from_df, bring=bring, against=against, **kwargs)
|
|
142
|
+
|
|
143
|
+
def synth(self, schema_path: str, rows: int = 1000, engine: Optional[str] = None):
|
|
144
|
+
"""
|
|
145
|
+
Generate synthetic data from a schema file.
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
schema_path: Path to the .toml schema file
|
|
149
|
+
rows: Number of rows to generate (default: 1000)
|
|
150
|
+
engine: Output engine ("pandas" or "polars"). If None, uses default from config
|
|
151
|
+
|
|
152
|
+
Returns:
|
|
153
|
+
Generated DataFrame in the specified format
|
|
154
|
+
|
|
155
|
+
Example:
|
|
156
|
+
df = add.synth("customer.toml", rows=5000)
|
|
157
|
+
df = add.synth("customer.toml", rows=5000, engine="polars")
|
|
158
|
+
"""
|
|
159
|
+
from additory.synthetic.api import synth as synth_impl
|
|
160
|
+
return synth_impl(schema_path, rows, engine)
|
|
161
|
+
|
|
162
|
+
def onehotencoding(self, df, columns=None, **kwargs):
|
|
163
|
+
"""
|
|
164
|
+
One-hot encode categorical columns.
|
|
165
|
+
|
|
166
|
+
Args:
|
|
167
|
+
df: Input dataframe
|
|
168
|
+
columns: Column to encode (single column name as string)
|
|
169
|
+
**kwargs: Additional parameters
|
|
170
|
+
|
|
171
|
+
Returns:
|
|
172
|
+
DataFrame with one-hot encoded columns
|
|
173
|
+
"""
|
|
174
|
+
from additory.utilities.encoding import onehotencoding
|
|
175
|
+
return onehotencoding(df, column=columns, **kwargs)
|
|
176
|
+
|
|
177
|
+
def harmonize_units(self, df, value_column, unit_column, target_unit=None, position="end", **kwargs):
|
|
178
|
+
"""
|
|
179
|
+
Harmonize units in a dataframe.
|
|
180
|
+
|
|
181
|
+
Args:
|
|
182
|
+
df: Input dataframe
|
|
183
|
+
value_column: Column containing numeric values
|
|
184
|
+
unit_column: Column containing unit strings
|
|
185
|
+
target_unit: Target unit to convert to (auto-detected if None)
|
|
186
|
+
position: Where to place new columns ("end", "start", etc.)
|
|
187
|
+
**kwargs: Additional parameters
|
|
188
|
+
|
|
189
|
+
Returns:
|
|
190
|
+
DataFrame with harmonized units
|
|
191
|
+
|
|
192
|
+
Example:
|
|
193
|
+
result = add.harmonize_units(df, value_column='weight', unit_column='unit')
|
|
194
|
+
result = add.harmonize_units(df, value_column='temp', unit_column='unit', target_unit='F')
|
|
195
|
+
"""
|
|
196
|
+
from additory.utilities.units import harmonize_units
|
|
197
|
+
return harmonize_units(df, value_column, unit_column, target_unit, position, **kwargs)
|
|
198
|
+
|
|
199
|
+
def scan(
|
|
200
|
+
self,
|
|
201
|
+
df: Union[pl.DataFrame, pd.DataFrame, Any],
|
|
202
|
+
preset: Optional[str] = None,
|
|
203
|
+
detect_distributions: bool = True,
|
|
204
|
+
detect_correlations: bool = True,
|
|
205
|
+
detect_cardinality: bool = True,
|
|
206
|
+
top_n_distributions: int = 3,
|
|
207
|
+
correlation_methods: List[str] = None,
|
|
208
|
+
correlation_threshold: float = 0.3,
|
|
209
|
+
cardinality_top_n: int = 10,
|
|
210
|
+
verbose: bool = True
|
|
211
|
+
):
|
|
212
|
+
"""
|
|
213
|
+
Scan a DataFrame to detect distributions, correlations, and cardinality.
|
|
214
|
+
|
|
215
|
+
Accepts pandas, polars, or cuDF DataFrames. Automatically converts to Polars
|
|
216
|
+
for processing. Returns ScanResult with analysis results.
|
|
217
|
+
|
|
218
|
+
This function provides comprehensive data profiling including:
|
|
219
|
+
- Distribution detection for numeric columns
|
|
220
|
+
- Correlation analysis between columns
|
|
221
|
+
- Cardinality analysis (unique values)
|
|
222
|
+
- Data quality metrics
|
|
223
|
+
|
|
224
|
+
Args:
|
|
225
|
+
df: DataFrame to analyze (pandas, polars, or cuDF)
|
|
226
|
+
preset: Optional preset ('quick', 'distributions', 'correlations', 'full', 'minimal')
|
|
227
|
+
detect_distributions: Whether to detect distributions (default: True)
|
|
228
|
+
detect_correlations: Whether to calculate correlations (default: True)
|
|
229
|
+
detect_cardinality: Whether to analyze cardinality (default: True)
|
|
230
|
+
top_n_distributions: Number of top distributions to return per column (default: 3)
|
|
231
|
+
correlation_methods: Correlation methods to use (default: ['pearson', 'spearman'])
|
|
232
|
+
correlation_threshold: Minimum correlation to report (default: 0.3)
|
|
233
|
+
cardinality_top_n: Number of top values to return per column (default: 10)
|
|
234
|
+
verbose: Whether to print progress messages (default: True)
|
|
235
|
+
|
|
236
|
+
Returns:
|
|
237
|
+
ScanResult object containing all analysis results
|
|
238
|
+
|
|
239
|
+
Presets:
|
|
240
|
+
- 'quick': Quality + cardinality only (fast)
|
|
241
|
+
- 'distributions': Distribution detection only
|
|
242
|
+
- 'correlations': Correlation analysis only
|
|
243
|
+
- 'full': All analyses enabled
|
|
244
|
+
- 'minimal': Quality metrics only (fastest)
|
|
245
|
+
|
|
246
|
+
Example:
|
|
247
|
+
>>> import pandas as pd
|
|
248
|
+
>>> import additory as add
|
|
249
|
+
>>>
|
|
250
|
+
>>> # Works with pandas
|
|
251
|
+
>>> df = pd.DataFrame({
|
|
252
|
+
... 'age': [25, 30, 35, 40, 45],
|
|
253
|
+
... 'income': [50000, 60000, 70000, 80000, 90000],
|
|
254
|
+
... 'category': ['A', 'B', 'A', 'B', 'A']
|
|
255
|
+
... })
|
|
256
|
+
>>>
|
|
257
|
+
>>> result = add.scan(df)
|
|
258
|
+
>>> print(result.summary())
|
|
259
|
+
>>>
|
|
260
|
+
>>> # Use presets
|
|
261
|
+
>>> result = add.scan(df, preset='quick')
|
|
262
|
+
>>> result = add.scan(df, preset='distributions', top_n_distributions=5)
|
|
263
|
+
"""
|
|
264
|
+
from additory.analysis.scan import scan as scan_impl
|
|
265
|
+
|
|
266
|
+
if correlation_methods is None:
|
|
267
|
+
correlation_methods = ['pearson', 'spearman']
|
|
268
|
+
|
|
269
|
+
return scan_impl(
|
|
270
|
+
df,
|
|
271
|
+
preset=preset,
|
|
272
|
+
detect_distributions_flag=detect_distributions,
|
|
273
|
+
detect_correlations_flag=detect_correlations,
|
|
274
|
+
detect_cardinality_flag=detect_cardinality,
|
|
275
|
+
top_n_distributions=top_n_distributions,
|
|
276
|
+
correlation_methods=correlation_methods,
|
|
277
|
+
correlation_threshold=correlation_threshold,
|
|
278
|
+
cardinality_top_n=cardinality_top_n,
|
|
279
|
+
verbose=verbose
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
def play(self, game: str = "tictactoe"):
|
|
283
|
+
"""
|
|
284
|
+
Play a game! 🎮
|
|
285
|
+
|
|
286
|
+
Hidden feature for the curious. Reinforces row-column thinking.
|
|
287
|
+
|
|
288
|
+
Available games:
|
|
289
|
+
- 'tictactoe' or 'ttt': Play Tic-Tac-Toe
|
|
290
|
+
- 'sudoku': Play Sudoku
|
|
291
|
+
|
|
292
|
+
Args:
|
|
293
|
+
game: Name of the game to play (default: 'tictactoe')
|
|
294
|
+
|
|
295
|
+
Example:
|
|
296
|
+
>>> import additory
|
|
297
|
+
>>> additory.add.play('tictactoe')
|
|
298
|
+
>>> additory.add.play('sudoku')
|
|
299
|
+
"""
|
|
300
|
+
from additory.utilities.games import play as play_impl
|
|
301
|
+
return play_impl(game)
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
# Create the singleton API instance
|
|
305
|
+
add = AdditoryAPI()
|
|
306
|
+
|
|
307
|
+
# Export the instance
|
|
308
|
+
__all__ = ['add']
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# additory/expressions/__init__.py
|
|
2
|
+
# Expression system - .add file driven functionality
|
|
3
|
+
|
|
4
|
+
"""
|
|
5
|
+
Expression System Module
|
|
6
|
+
|
|
7
|
+
This module handles .add file driven functionality including:
|
|
8
|
+
- Expression parsing and compilation
|
|
9
|
+
- Polars-based expression execution
|
|
10
|
+
- Expression caching and versioning
|
|
11
|
+
- Sample data management
|
|
12
|
+
- Namespace support (builtin vs user)
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
# Core expression functionality will be imported here after migration
|
|
16
|
+
# from .proxy import EnhancedExpressionProxy
|
|
17
|
+
# from .engine import PolarsExpressionEngine
|
|
18
|
+
# from .parser import ExpressionParser
|
|
19
|
+
# from .compiler import ExpressionCompiler
|
|
20
|
+
# from .executor import ExpressionExecutor
|
|
21
|
+
# from .registry import ExpressionRegistry
|
|
22
|
+
# from .samples import SampleDataManager
|
|
23
|
+
|
|
24
|
+
__all__ = [
|
|
25
|
+
# Will be populated after migration
|
|
26
|
+
]
|