additory 0.1.0a3__py3-none-any.whl → 0.1.1a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- additory/__init__.py +58 -14
- additory/common/__init__.py +31 -147
- additory/common/column_selector.py +255 -0
- additory/common/distributions.py +286 -613
- additory/common/extractors.py +313 -0
- additory/common/knn_imputation.py +332 -0
- additory/common/result.py +380 -0
- additory/common/strategy_parser.py +243 -0
- additory/common/unit_conversions.py +338 -0
- additory/common/validation.py +283 -103
- additory/core/__init__.py +34 -22
- additory/core/backend.py +258 -0
- additory/core/config.py +177 -305
- additory/core/logging.py +230 -24
- additory/core/memory_manager.py +157 -495
- additory/expressions/__init__.py +2 -23
- additory/expressions/compiler.py +457 -0
- additory/expressions/engine.py +264 -487
- additory/expressions/integrity.py +179 -0
- additory/expressions/loader.py +263 -0
- additory/expressions/parser.py +363 -167
- additory/expressions/resolver.py +274 -0
- additory/functions/__init__.py +1 -0
- additory/functions/analyze/__init__.py +144 -0
- additory/functions/analyze/cardinality.py +58 -0
- additory/functions/analyze/correlations.py +66 -0
- additory/functions/analyze/distributions.py +53 -0
- additory/functions/analyze/duplicates.py +49 -0
- additory/functions/analyze/features.py +61 -0
- additory/functions/analyze/imputation.py +66 -0
- additory/functions/analyze/outliers.py +65 -0
- additory/functions/analyze/patterns.py +65 -0
- additory/functions/analyze/presets.py +72 -0
- additory/functions/analyze/quality.py +59 -0
- additory/functions/analyze/timeseries.py +53 -0
- additory/functions/analyze/types.py +45 -0
- additory/functions/expressions/__init__.py +161 -0
- additory/functions/snapshot/__init__.py +82 -0
- additory/functions/snapshot/filter.py +119 -0
- additory/functions/synthetic/__init__.py +113 -0
- additory/functions/synthetic/mode_detector.py +47 -0
- additory/functions/synthetic/strategies/__init__.py +1 -0
- additory/functions/synthetic/strategies/advanced.py +35 -0
- additory/functions/synthetic/strategies/augmentative.py +160 -0
- additory/functions/synthetic/strategies/generative.py +168 -0
- additory/functions/synthetic/strategies/presets.py +116 -0
- additory/functions/to/__init__.py +188 -0
- additory/functions/to/lookup.py +351 -0
- additory/functions/to/merge.py +189 -0
- additory/functions/to/sort.py +91 -0
- additory/functions/to/summarize.py +170 -0
- additory/functions/transform/__init__.py +140 -0
- additory/functions/transform/datetime.py +79 -0
- additory/functions/transform/extract.py +85 -0
- additory/functions/transform/harmonize.py +105 -0
- additory/functions/transform/knn.py +62 -0
- additory/functions/transform/onehotencoding.py +68 -0
- additory/functions/transform/transpose.py +42 -0
- additory-0.1.1a1.dist-info/METADATA +83 -0
- additory-0.1.1a1.dist-info/RECORD +62 -0
- additory/analysis/__init__.py +0 -48
- additory/analysis/cardinality.py +0 -126
- additory/analysis/correlations.py +0 -124
- additory/analysis/distributions.py +0 -376
- additory/analysis/quality.py +0 -158
- additory/analysis/scan.py +0 -400
- additory/common/backend.py +0 -371
- additory/common/column_utils.py +0 -191
- additory/common/exceptions.py +0 -62
- additory/common/lists.py +0 -229
- additory/common/patterns.py +0 -240
- additory/common/resolver.py +0 -567
- additory/common/sample_data.py +0 -182
- additory/core/ast_builder.py +0 -165
- additory/core/backends/__init__.py +0 -23
- additory/core/backends/arrow_bridge.py +0 -483
- additory/core/backends/cudf_bridge.py +0 -355
- additory/core/column_positioning.py +0 -358
- additory/core/compiler_polars.py +0 -166
- additory/core/enhanced_cache_manager.py +0 -1119
- additory/core/enhanced_matchers.py +0 -473
- additory/core/enhanced_version_manager.py +0 -325
- additory/core/executor.py +0 -59
- additory/core/integrity_manager.py +0 -477
- additory/core/loader.py +0 -190
- additory/core/namespace_manager.py +0 -657
- additory/core/parser.py +0 -176
- additory/core/polars_expression_engine.py +0 -601
- additory/core/registry.py +0 -176
- additory/core/sample_data_manager.py +0 -492
- additory/core/user_namespace.py +0 -751
- additory/core/validator.py +0 -27
- additory/dynamic_api.py +0 -304
- additory/expressions/proxy.py +0 -549
- additory/expressions/registry.py +0 -313
- additory/expressions/samples.py +0 -492
- additory/synthetic/__init__.py +0 -13
- additory/synthetic/column_name_resolver.py +0 -149
- additory/synthetic/distributions.py +0 -22
- additory/synthetic/forecast.py +0 -1132
- additory/synthetic/linked_list_parser.py +0 -415
- additory/synthetic/namespace_lookup.py +0 -129
- additory/synthetic/smote.py +0 -320
- additory/synthetic/strategies.py +0 -850
- additory/synthetic/synthesizer.py +0 -713
- additory/utilities/__init__.py +0 -53
- additory/utilities/encoding.py +0 -600
- additory/utilities/games.py +0 -300
- additory/utilities/keys.py +0 -8
- additory/utilities/lookup.py +0 -103
- additory/utilities/matchers.py +0 -216
- additory/utilities/resolvers.py +0 -286
- additory/utilities/settings.py +0 -167
- additory/utilities/units.py +0 -749
- additory/utilities/validators.py +0 -153
- additory-0.1.0a3.dist-info/METADATA +0 -288
- additory-0.1.0a3.dist-info/RECORD +0 -71
- additory-0.1.0a3.dist-info/licenses/LICENSE +0 -21
- {additory-0.1.0a3.dist-info → additory-0.1.1a1.dist-info}/WHEEL +0 -0
- {additory-0.1.0a3.dist-info → additory-0.1.1a1.dist-info}/top_level.txt +0 -0
additory/core/backend.py
ADDED
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Backend detection and conversion utilities for Additory.
|
|
3
|
+
|
|
4
|
+
Handles automatic conversion between pandas, Polars, and cuDF DataFrames.
|
|
5
|
+
All processing happens in Polars, with transparent conversions at boundaries.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import Any
|
|
9
|
+
import polars as pl
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
# Global default backend setting
|
|
13
|
+
_DEFAULT_BACKEND = 'polars'
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def detect_backend(df: Any) -> str:
|
|
17
|
+
"""
|
|
18
|
+
Detect the backend type of a DataFrame.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
df: DataFrame to detect
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
Backend string ('polars', 'pandas', 'cudf', 'dask', 'spark')
|
|
25
|
+
|
|
26
|
+
Raises:
|
|
27
|
+
TypeError: If df is not a supported DataFrame type
|
|
28
|
+
|
|
29
|
+
Example:
|
|
30
|
+
backend = detect_backend(df)
|
|
31
|
+
# Returns: 'pandas' or 'polars' or 'cudf'
|
|
32
|
+
"""
|
|
33
|
+
# Check Polars
|
|
34
|
+
if isinstance(df, pl.DataFrame):
|
|
35
|
+
return 'polars'
|
|
36
|
+
|
|
37
|
+
# Check pandas
|
|
38
|
+
try:
|
|
39
|
+
import pandas as pd
|
|
40
|
+
if isinstance(df, pd.DataFrame):
|
|
41
|
+
return 'pandas'
|
|
42
|
+
except ImportError:
|
|
43
|
+
pass
|
|
44
|
+
|
|
45
|
+
# Check cuDF
|
|
46
|
+
if is_cudf(df):
|
|
47
|
+
return 'cudf'
|
|
48
|
+
|
|
49
|
+
# Check Dask (future support)
|
|
50
|
+
if is_dask(df):
|
|
51
|
+
return 'dask'
|
|
52
|
+
|
|
53
|
+
# Check Spark (future support)
|
|
54
|
+
if is_spark(df):
|
|
55
|
+
return 'spark'
|
|
56
|
+
|
|
57
|
+
# Unsupported type
|
|
58
|
+
raise TypeError(
|
|
59
|
+
f"Unsupported DataFrame type: {type(df).__name__}. "
|
|
60
|
+
f"Supported types: pandas.DataFrame, polars.DataFrame, cudf.DataFrame"
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def to_polars(df: Any) -> pl.DataFrame:
|
|
65
|
+
"""
|
|
66
|
+
Convert any DataFrame to Polars.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
df: DataFrame to convert
|
|
70
|
+
|
|
71
|
+
Returns:
|
|
72
|
+
Polars DataFrame
|
|
73
|
+
|
|
74
|
+
Raises:
|
|
75
|
+
TypeError: If df is not a supported DataFrame type
|
|
76
|
+
|
|
77
|
+
Example:
|
|
78
|
+
polars_df = to_polars(df) # Works with pandas, polars, cuDF
|
|
79
|
+
"""
|
|
80
|
+
# Already Polars
|
|
81
|
+
if isinstance(df, pl.DataFrame):
|
|
82
|
+
return df
|
|
83
|
+
|
|
84
|
+
# From pandas
|
|
85
|
+
try:
|
|
86
|
+
import pandas as pd
|
|
87
|
+
if isinstance(df, pd.DataFrame):
|
|
88
|
+
return pl.from_pandas(df)
|
|
89
|
+
except ImportError:
|
|
90
|
+
pass
|
|
91
|
+
|
|
92
|
+
# From cuDF
|
|
93
|
+
if is_cudf(df):
|
|
94
|
+
try:
|
|
95
|
+
return pl.from_arrow(df.to_arrow())
|
|
96
|
+
except Exception as e:
|
|
97
|
+
raise TypeError(f"Failed to convert cuDF DataFrame to Polars: {e}")
|
|
98
|
+
|
|
99
|
+
# From Dask (future support)
|
|
100
|
+
if is_dask(df):
|
|
101
|
+
raise NotImplementedError("Dask support is not yet implemented")
|
|
102
|
+
|
|
103
|
+
# From Spark (future support)
|
|
104
|
+
if is_spark(df):
|
|
105
|
+
raise NotImplementedError("Spark support is not yet implemented")
|
|
106
|
+
|
|
107
|
+
# Unsupported type
|
|
108
|
+
raise TypeError(
|
|
109
|
+
f"Unsupported DataFrame type: {type(df).__name__}. "
|
|
110
|
+
f"Supported types: pandas.DataFrame, polars.DataFrame, cudf.DataFrame"
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def from_polars(df: pl.DataFrame, target_backend: str) -> Any:
|
|
115
|
+
"""
|
|
116
|
+
Convert Polars DataFrame back to target backend.
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
df: Polars DataFrame to convert
|
|
120
|
+
target_backend: Target backend ('polars', 'pandas', 'cudf')
|
|
121
|
+
|
|
122
|
+
Returns:
|
|
123
|
+
DataFrame in target backend
|
|
124
|
+
|
|
125
|
+
Raises:
|
|
126
|
+
TypeError: If target_backend is not supported
|
|
127
|
+
|
|
128
|
+
Example:
|
|
129
|
+
result = from_polars(polars_df, 'pandas') # Returns pandas DataFrame
|
|
130
|
+
"""
|
|
131
|
+
if not isinstance(df, pl.DataFrame):
|
|
132
|
+
raise TypeError(f"Input must be a Polars DataFrame, got {type(df).__name__}")
|
|
133
|
+
|
|
134
|
+
# To Polars (no conversion needed)
|
|
135
|
+
if target_backend == 'polars':
|
|
136
|
+
return df
|
|
137
|
+
|
|
138
|
+
# To pandas
|
|
139
|
+
if target_backend == 'pandas':
|
|
140
|
+
return df.to_pandas()
|
|
141
|
+
|
|
142
|
+
# To cuDF
|
|
143
|
+
if target_backend == 'cudf':
|
|
144
|
+
try:
|
|
145
|
+
import cudf
|
|
146
|
+
return cudf.from_arrow(df.to_arrow())
|
|
147
|
+
except ImportError:
|
|
148
|
+
raise ImportError("cuDF is not installed. Install with: pip install cudf")
|
|
149
|
+
except Exception as e:
|
|
150
|
+
raise TypeError(f"Failed to convert Polars DataFrame to cuDF: {e}")
|
|
151
|
+
|
|
152
|
+
# To Dask (future support)
|
|
153
|
+
if target_backend == 'dask':
|
|
154
|
+
raise NotImplementedError("Dask support is not yet implemented")
|
|
155
|
+
|
|
156
|
+
# To Spark (future support)
|
|
157
|
+
if target_backend == 'spark':
|
|
158
|
+
raise NotImplementedError("Spark support is not yet implemented")
|
|
159
|
+
|
|
160
|
+
# Unsupported backend
|
|
161
|
+
raise TypeError(
|
|
162
|
+
f"Unsupported backend: {target_backend}. "
|
|
163
|
+
f"Supported backends: polars, pandas, cudf"
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def is_cudf(df: Any) -> bool:
|
|
168
|
+
"""
|
|
169
|
+
Check if DataFrame is cuDF.
|
|
170
|
+
|
|
171
|
+
Args:
|
|
172
|
+
df: Object to check
|
|
173
|
+
|
|
174
|
+
Returns:
|
|
175
|
+
True if cuDF DataFrame, False otherwise
|
|
176
|
+
|
|
177
|
+
Example:
|
|
178
|
+
if is_cudf(df):
|
|
179
|
+
# Handle cuDF-specific logic
|
|
180
|
+
"""
|
|
181
|
+
try:
|
|
182
|
+
import cudf
|
|
183
|
+
return isinstance(df, cudf.DataFrame)
|
|
184
|
+
except ImportError:
|
|
185
|
+
return False
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def is_dask(df: Any) -> bool:
|
|
189
|
+
"""
|
|
190
|
+
Check if DataFrame is Dask (future support).
|
|
191
|
+
|
|
192
|
+
Args:
|
|
193
|
+
df: Object to check
|
|
194
|
+
|
|
195
|
+
Returns:
|
|
196
|
+
True if Dask DataFrame, False otherwise
|
|
197
|
+
"""
|
|
198
|
+
try:
|
|
199
|
+
import dask.dataframe as dd
|
|
200
|
+
return isinstance(df, dd.DataFrame)
|
|
201
|
+
except ImportError:
|
|
202
|
+
return False
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def is_spark(df: Any) -> bool:
|
|
206
|
+
"""
|
|
207
|
+
Check if DataFrame is Spark (future support).
|
|
208
|
+
|
|
209
|
+
Args:
|
|
210
|
+
df: Object to check
|
|
211
|
+
|
|
212
|
+
Returns:
|
|
213
|
+
True if Spark DataFrame, False otherwise
|
|
214
|
+
"""
|
|
215
|
+
try:
|
|
216
|
+
from pyspark.sql import DataFrame as SparkDataFrame
|
|
217
|
+
return isinstance(df, SparkDataFrame)
|
|
218
|
+
except ImportError:
|
|
219
|
+
return False
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def get_default_backend() -> str:
|
|
223
|
+
"""
|
|
224
|
+
Get the default backend setting.
|
|
225
|
+
|
|
226
|
+
Returns:
|
|
227
|
+
Default backend string ('polars', 'pandas', 'cudf')
|
|
228
|
+
|
|
229
|
+
Example:
|
|
230
|
+
backend = get_default_backend() # Returns 'polars' by default
|
|
231
|
+
"""
|
|
232
|
+
return _DEFAULT_BACKEND
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def set_default_backend(backend: str) -> None:
|
|
236
|
+
"""
|
|
237
|
+
Set the default backend for operations.
|
|
238
|
+
|
|
239
|
+
Args:
|
|
240
|
+
backend: Backend to set ('polars', 'pandas', 'cudf')
|
|
241
|
+
|
|
242
|
+
Raises:
|
|
243
|
+
ValueError: If backend is not supported
|
|
244
|
+
|
|
245
|
+
Example:
|
|
246
|
+
import additory
|
|
247
|
+
additory.add.set_default_backend('cudf') # For GPU users
|
|
248
|
+
"""
|
|
249
|
+
global _DEFAULT_BACKEND
|
|
250
|
+
|
|
251
|
+
supported_backends = ['polars', 'pandas', 'cudf']
|
|
252
|
+
if backend not in supported_backends:
|
|
253
|
+
raise ValueError(
|
|
254
|
+
f"Unsupported backend: {backend}. "
|
|
255
|
+
f"Supported backends: {', '.join(supported_backends)}"
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
_DEFAULT_BACKEND = backend
|