additory 0.1.0a3__py3-none-any.whl → 0.1.1a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. additory/__init__.py +58 -14
  2. additory/common/__init__.py +31 -147
  3. additory/common/column_selector.py +255 -0
  4. additory/common/distributions.py +286 -613
  5. additory/common/extractors.py +313 -0
  6. additory/common/knn_imputation.py +332 -0
  7. additory/common/result.py +380 -0
  8. additory/common/strategy_parser.py +243 -0
  9. additory/common/unit_conversions.py +338 -0
  10. additory/common/validation.py +283 -103
  11. additory/core/__init__.py +34 -22
  12. additory/core/backend.py +258 -0
  13. additory/core/config.py +177 -305
  14. additory/core/logging.py +230 -24
  15. additory/core/memory_manager.py +157 -495
  16. additory/expressions/__init__.py +2 -23
  17. additory/expressions/compiler.py +457 -0
  18. additory/expressions/engine.py +264 -487
  19. additory/expressions/integrity.py +179 -0
  20. additory/expressions/loader.py +263 -0
  21. additory/expressions/parser.py +363 -167
  22. additory/expressions/resolver.py +274 -0
  23. additory/functions/__init__.py +1 -0
  24. additory/functions/analyze/__init__.py +144 -0
  25. additory/functions/analyze/cardinality.py +58 -0
  26. additory/functions/analyze/correlations.py +66 -0
  27. additory/functions/analyze/distributions.py +53 -0
  28. additory/functions/analyze/duplicates.py +49 -0
  29. additory/functions/analyze/features.py +61 -0
  30. additory/functions/analyze/imputation.py +66 -0
  31. additory/functions/analyze/outliers.py +65 -0
  32. additory/functions/analyze/patterns.py +65 -0
  33. additory/functions/analyze/presets.py +72 -0
  34. additory/functions/analyze/quality.py +59 -0
  35. additory/functions/analyze/timeseries.py +53 -0
  36. additory/functions/analyze/types.py +45 -0
  37. additory/functions/expressions/__init__.py +161 -0
  38. additory/functions/snapshot/__init__.py +82 -0
  39. additory/functions/snapshot/filter.py +119 -0
  40. additory/functions/synthetic/__init__.py +113 -0
  41. additory/functions/synthetic/mode_detector.py +47 -0
  42. additory/functions/synthetic/strategies/__init__.py +1 -0
  43. additory/functions/synthetic/strategies/advanced.py +35 -0
  44. additory/functions/synthetic/strategies/augmentative.py +160 -0
  45. additory/functions/synthetic/strategies/generative.py +168 -0
  46. additory/functions/synthetic/strategies/presets.py +116 -0
  47. additory/functions/to/__init__.py +188 -0
  48. additory/functions/to/lookup.py +351 -0
  49. additory/functions/to/merge.py +189 -0
  50. additory/functions/to/sort.py +91 -0
  51. additory/functions/to/summarize.py +170 -0
  52. additory/functions/transform/__init__.py +140 -0
  53. additory/functions/transform/datetime.py +79 -0
  54. additory/functions/transform/extract.py +85 -0
  55. additory/functions/transform/harmonize.py +105 -0
  56. additory/functions/transform/knn.py +62 -0
  57. additory/functions/transform/onehotencoding.py +68 -0
  58. additory/functions/transform/transpose.py +42 -0
  59. additory-0.1.1a1.dist-info/METADATA +83 -0
  60. additory-0.1.1a1.dist-info/RECORD +62 -0
  61. additory/analysis/__init__.py +0 -48
  62. additory/analysis/cardinality.py +0 -126
  63. additory/analysis/correlations.py +0 -124
  64. additory/analysis/distributions.py +0 -376
  65. additory/analysis/quality.py +0 -158
  66. additory/analysis/scan.py +0 -400
  67. additory/common/backend.py +0 -371
  68. additory/common/column_utils.py +0 -191
  69. additory/common/exceptions.py +0 -62
  70. additory/common/lists.py +0 -229
  71. additory/common/patterns.py +0 -240
  72. additory/common/resolver.py +0 -567
  73. additory/common/sample_data.py +0 -182
  74. additory/core/ast_builder.py +0 -165
  75. additory/core/backends/__init__.py +0 -23
  76. additory/core/backends/arrow_bridge.py +0 -483
  77. additory/core/backends/cudf_bridge.py +0 -355
  78. additory/core/column_positioning.py +0 -358
  79. additory/core/compiler_polars.py +0 -166
  80. additory/core/enhanced_cache_manager.py +0 -1119
  81. additory/core/enhanced_matchers.py +0 -473
  82. additory/core/enhanced_version_manager.py +0 -325
  83. additory/core/executor.py +0 -59
  84. additory/core/integrity_manager.py +0 -477
  85. additory/core/loader.py +0 -190
  86. additory/core/namespace_manager.py +0 -657
  87. additory/core/parser.py +0 -176
  88. additory/core/polars_expression_engine.py +0 -601
  89. additory/core/registry.py +0 -176
  90. additory/core/sample_data_manager.py +0 -492
  91. additory/core/user_namespace.py +0 -751
  92. additory/core/validator.py +0 -27
  93. additory/dynamic_api.py +0 -304
  94. additory/expressions/proxy.py +0 -549
  95. additory/expressions/registry.py +0 -313
  96. additory/expressions/samples.py +0 -492
  97. additory/synthetic/__init__.py +0 -13
  98. additory/synthetic/column_name_resolver.py +0 -149
  99. additory/synthetic/distributions.py +0 -22
  100. additory/synthetic/forecast.py +0 -1132
  101. additory/synthetic/linked_list_parser.py +0 -415
  102. additory/synthetic/namespace_lookup.py +0 -129
  103. additory/synthetic/smote.py +0 -320
  104. additory/synthetic/strategies.py +0 -850
  105. additory/synthetic/synthesizer.py +0 -713
  106. additory/utilities/__init__.py +0 -53
  107. additory/utilities/encoding.py +0 -600
  108. additory/utilities/games.py +0 -300
  109. additory/utilities/keys.py +0 -8
  110. additory/utilities/lookup.py +0 -103
  111. additory/utilities/matchers.py +0 -216
  112. additory/utilities/resolvers.py +0 -286
  113. additory/utilities/settings.py +0 -167
  114. additory/utilities/units.py +0 -749
  115. additory/utilities/validators.py +0 -153
  116. additory-0.1.0a3.dist-info/METADATA +0 -288
  117. additory-0.1.0a3.dist-info/RECORD +0 -71
  118. additory-0.1.0a3.dist-info/licenses/LICENSE +0 -21
  119. {additory-0.1.0a3.dist-info → additory-0.1.1a1.dist-info}/WHEEL +0 -0
  120. {additory-0.1.0a3.dist-info → additory-0.1.1a1.dist-info}/top_level.txt +0 -0
@@ -1,27 +0,0 @@
1
- """
2
- Expression Validator
3
-
4
- Validates expression syntax and structure.
5
- """
6
-
7
- from typing import Dict, Any, List, Tuple
8
-
9
-
10
- def validate_expression(expression: str) -> Tuple[bool, List[str]]:
11
- """
12
- Validate an expression.
13
-
14
- Args:
15
- expression: Expression string to validate
16
-
17
- Returns:
18
- Tuple of (is_valid, error_messages)
19
- """
20
- errors = []
21
-
22
- if not expression or not expression.strip():
23
- errors.append("Expression cannot be empty")
24
- return (False, errors)
25
-
26
- # Basic validation - can be expanded later
27
- return (True, [])
additory/dynamic_api.py DELETED
@@ -1,304 +0,0 @@
1
- """
2
- Dynamic API for Additory
3
-
4
- This module provides the main API interface with dynamic attribute access.
5
- """
6
-
7
- from types import SimpleNamespace
8
- from typing import Union, Optional, List, Any
9
- import pandas as pd
10
- import polars as pl
11
-
12
-
13
- class AdditoryAPI(SimpleNamespace):
14
- """
15
- Main API class for Additory functionality.
16
-
17
- Provides access to:
18
- - add.synthetic() - Synthetic data generation
19
- - add.to() - Lookup/join operations
20
- - add.scan() - Data profiling and analysis
21
- - add.my - User expressions
22
- - add.play() - Hidden games (for the curious 😉)
23
- - Expression evaluation
24
- """
25
-
26
- def __init__(self):
27
- super().__init__()
28
- # Initialize expression proxies
29
- from additory.expressions.proxy import ExpressionProxy
30
- self.my = ExpressionProxy(namespace="user")
31
- self._builtin_proxy = ExpressionProxy(namespace="builtin")
32
-
33
- # Explicitly set the synthetic method to prevent namespace conflicts
34
- self.synthetic = self._synthetic_method
35
-
36
- def __getattr__(self, name):
37
- """
38
- Dynamic attribute access for expressions.
39
-
40
- Checks built-in expressions first, then user expressions.
41
- This ensures built-in expressions take precedence.
42
- """
43
- # Check if it's a built-in expression first
44
- if self._expression_exists(self._builtin_proxy, name):
45
- return getattr(self._builtin_proxy, name)
46
-
47
- # Check if it's a user expression
48
- if self._expression_exists(self.my, name):
49
- return getattr(self.my, name)
50
-
51
- # If not found, raise AttributeError
52
- raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}'")
53
-
54
- def _expression_exists(self, proxy, name):
55
- """Check if an expression exists in a proxy's namespace"""
56
- try:
57
- # List expressions in the proxy's namespace
58
- expr_list = proxy.list_expressions()
59
- return name in expr_list.get("expressions", {})
60
- except Exception:
61
- return False
62
-
63
- def _synthetic_method(self, df, n_rows=5, strategy="auto", seed=None, output_format="pandas", **kwargs):
64
- """
65
- Generate synthetic data by extending a dataframe or creating from scratch.
66
-
67
- Three modes:
68
- 1. Extend mode: Pass a DataFrame to add synthetic rows
69
- 2. Create mode: Pass "@new" to create data from scratch
70
- 3. Sample mode: Pass "@sample" to load sample data
71
-
72
- Args:
73
- df: DataFrame to extend, "@new" to create, or "@sample" for sample data
74
- n_rows: Number of rows (int for create/sample, int/float/str for extend)
75
- strategy: Strategy specification (dict for create, str/dict for extend)
76
- seed: Random seed for reproducibility
77
- output_format: Output format ("pandas", "polars", "cudf")
78
- **kwargs: Additional parameters
79
-
80
- Returns:
81
- Extended or generated DataFrame
82
-
83
- Examples:
84
- # Extend existing data
85
- result = add.synthetic(df, n_rows=100, strategy='auto')
86
-
87
- # Create from scratch
88
- result = add.synthetic("@new", n_rows=100, strategy={'id': 'increment', 'age': 'range:18-65'})
89
-
90
- # Load sample data
91
- result = add.synthetic("@sample", n_rows=50)
92
- """
93
- # Store reference to restore after import (in the correct namespace)
94
- import additory
95
- original_synthetic = getattr(additory, 'synthetic', None)
96
-
97
- try:
98
- # Import and call the implementation
99
- from additory.synthetic.synthesizer import synthetic as synthetic_impl
100
- result = synthetic_impl(df, n_rows=n_rows, strategy=strategy, seed=seed,
101
- output_format=output_format, **kwargs)
102
-
103
- # Restore the method reference in the additory module namespace
104
- # The import above will have overridden additory.synthetic with the module
105
- # We need to restore it to point to this method
106
- if original_synthetic is not None:
107
- additory.synthetic = original_synthetic
108
- else:
109
- # If there was no original synthetic, set it to this method
110
- additory.synthetic = self._synthetic_method
111
-
112
- return result
113
- except Exception as e:
114
- # Restore the method reference even if there's an error
115
- if original_synthetic is not None:
116
- additory.synthetic = original_synthetic
117
- else:
118
- additory.synthetic = self._synthetic_method
119
- raise
120
-
121
- def to(self, target_df, from_df=None, bring=None, against=None, **kwargs):
122
- """
123
- Add columns from reference dataframe to target dataframe.
124
-
125
- Args:
126
- target_df: Target dataframe to add columns to
127
- from_df: Reference dataframe to get columns from
128
- bring: Column(s) to bring from reference dataframe (str or list)
129
- against: Column(s) to match on (str or list)
130
- **kwargs: Additional parameters
131
-
132
- Returns:
133
- Target dataframe with new columns added
134
-
135
- Example:
136
- result = add.to(orders_df, from_df=products_df, bring='price', against='product_id')
137
- result = add.to(orders_df, from_df=products_df, bring=['price', 'name'], against='product_id')
138
- """
139
- from additory.utilities.lookup import to
140
- return to(target_df, from_df, bring=bring, against=against, **kwargs)
141
-
142
- def onehotencoding(self, df, columns=None, **kwargs):
143
- """
144
- One-hot encode categorical columns.
145
-
146
- Args:
147
- df: Input dataframe
148
- columns: Column to encode (single column name as string)
149
- **kwargs: Additional parameters
150
-
151
- Returns:
152
- DataFrame with one-hot encoded columns
153
- """
154
- from additory.utilities.encoding import onehotencoding
155
- return onehotencoding(df, column=columns, **kwargs)
156
-
157
- def harmonize_units(self, df, value_column, unit_column, target_unit=None, position="end", **kwargs):
158
- """
159
- Harmonize units in a dataframe.
160
-
161
- Args:
162
- df: Input dataframe
163
- value_column: Column containing numeric values
164
- unit_column: Column containing unit strings
165
- target_unit: Target unit to convert to (auto-detected if None)
166
- position: Where to place new columns ("end", "start", etc.)
167
- **kwargs: Additional parameters
168
-
169
- Returns:
170
- DataFrame with harmonized units
171
-
172
- Example:
173
- result = add.harmonize_units(df, value_column='weight', unit_column='unit')
174
- result = add.harmonize_units(df, value_column='temp', unit_column='unit', target_unit='F')
175
- """
176
- from additory.utilities.units import harmonize_units
177
- return harmonize_units(df, value_column, unit_column, target_unit, position, **kwargs)
178
-
179
- def scan(
180
- self,
181
- df: Union[pl.DataFrame, pd.DataFrame, Any],
182
- preset: Optional[str] = None,
183
- detect_distributions: bool = True,
184
- detect_correlations: bool = True,
185
- detect_cardinality: bool = True,
186
- top_n_distributions: int = 3,
187
- correlation_methods: List[str] = None,
188
- correlation_threshold: float = 0.3,
189
- cardinality_top_n: int = 10,
190
- verbose: bool = True
191
- ):
192
- """
193
- Scan a DataFrame to detect distributions, correlations, and cardinality.
194
-
195
- Accepts pandas, polars, or cuDF DataFrames. Automatically converts to Polars
196
- for processing. Returns ScanResult with analysis results.
197
-
198
- This function provides comprehensive data profiling including:
199
- - Distribution detection for numeric columns
200
- - Correlation analysis between columns
201
- - Cardinality analysis (unique values)
202
- - Data quality metrics
203
-
204
- Args:
205
- df: DataFrame to analyze (pandas, polars, or cuDF)
206
- preset: Optional preset ('quick', 'distributions', 'correlations', 'full', 'minimal')
207
- detect_distributions: Whether to detect distributions (default: True)
208
- detect_correlations: Whether to calculate correlations (default: True)
209
- detect_cardinality: Whether to analyze cardinality (default: True)
210
- top_n_distributions: Number of top distributions to return per column (default: 3)
211
- correlation_methods: Correlation methods to use (default: ['pearson', 'spearman'])
212
- correlation_threshold: Minimum correlation to report (default: 0.3)
213
- cardinality_top_n: Number of top values to return per column (default: 10)
214
- verbose: Whether to print progress messages (default: True)
215
-
216
- Returns:
217
- ScanResult object containing all analysis results
218
-
219
- Presets:
220
- - 'quick': Quality + cardinality only (fast)
221
- - 'distributions': Distribution detection only
222
- - 'correlations': Correlation analysis only
223
- - 'full': All analyses enabled
224
- - 'minimal': Quality metrics only (fastest)
225
-
226
- Example:
227
- >>> import pandas as pd
228
- >>> import additory as add
229
- >>>
230
- >>> # Works with pandas
231
- >>> df = pd.DataFrame({
232
- ... 'age': [25, 30, 35, 40, 45],
233
- ... 'income': [50000, 60000, 70000, 80000, 90000],
234
- ... 'category': ['A', 'B', 'A', 'B', 'A']
235
- ... })
236
- >>>
237
- >>> result = add.scan(df)
238
- >>> print(result.summary())
239
- >>>
240
- >>> # Use presets
241
- >>> result = add.scan(df, preset='quick')
242
- >>> result = add.scan(df, preset='distributions', top_n_distributions=5)
243
- """
244
- from additory.analysis.scan import scan as scan_impl
245
-
246
- if correlation_methods is None:
247
- correlation_methods = ['pearson', 'spearman']
248
-
249
- return scan_impl(
250
- df,
251
- preset=preset,
252
- detect_distributions_flag=detect_distributions,
253
- detect_correlations_flag=detect_correlations,
254
- detect_cardinality_flag=detect_cardinality,
255
- top_n_distributions=top_n_distributions,
256
- correlation_methods=correlation_methods,
257
- correlation_threshold=correlation_threshold,
258
- cardinality_top_n=cardinality_top_n,
259
- verbose=verbose
260
- )
261
-
262
- def games(self):
263
- """
264
- List available games! 🎮
265
-
266
- Returns a list of games you can play with add.play().
267
-
268
- Returns:
269
- List of available game names
270
-
271
- Example:
272
- >>> import additory
273
- >>> additory.add.games()
274
- ['tictactoe', 'sudoku']
275
- """
276
- return ['tictactoe', 'sudoku']
277
-
278
- def play(self, game: str = "tictactoe"):
279
- """
280
- Play a game! 🎮
281
-
282
- Hidden feature for the curious. Reinforces row-column thinking.
283
-
284
- Available games:
285
- - 'tictactoe' or 'ttt': Play Tic-Tac-Toe
286
- - 'sudoku': Play Sudoku
287
-
288
- Args:
289
- game: Name of the game to play (default: 'tictactoe')
290
-
291
- Example:
292
- >>> import additory
293
- >>> additory.add.play('tictactoe')
294
- >>> additory.add.play('sudoku')
295
- """
296
- from additory.utilities.games import play as play_impl
297
- return play_impl(game)
298
-
299
-
300
- # Create the singleton API instance
301
- add = AdditoryAPI()
302
-
303
- # Export the instance
304
- __all__ = ['add']