additory 0.1.0a4__py3-none-any.whl → 0.1.1a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. additory/__init__.py +58 -14
  2. additory/common/__init__.py +31 -147
  3. additory/common/column_selector.py +255 -0
  4. additory/common/distributions.py +286 -613
  5. additory/common/extractors.py +313 -0
  6. additory/common/knn_imputation.py +332 -0
  7. additory/common/result.py +380 -0
  8. additory/common/strategy_parser.py +243 -0
  9. additory/common/unit_conversions.py +338 -0
  10. additory/common/validation.py +283 -103
  11. additory/core/__init__.py +34 -22
  12. additory/core/backend.py +258 -0
  13. additory/core/config.py +177 -305
  14. additory/core/logging.py +230 -24
  15. additory/core/memory_manager.py +157 -495
  16. additory/expressions/__init__.py +2 -23
  17. additory/expressions/compiler.py +457 -0
  18. additory/expressions/engine.py +264 -487
  19. additory/expressions/integrity.py +179 -0
  20. additory/expressions/loader.py +263 -0
  21. additory/expressions/parser.py +363 -167
  22. additory/expressions/resolver.py +274 -0
  23. additory/functions/__init__.py +1 -0
  24. additory/functions/analyze/__init__.py +144 -0
  25. additory/functions/analyze/cardinality.py +58 -0
  26. additory/functions/analyze/correlations.py +66 -0
  27. additory/functions/analyze/distributions.py +53 -0
  28. additory/functions/analyze/duplicates.py +49 -0
  29. additory/functions/analyze/features.py +61 -0
  30. additory/functions/analyze/imputation.py +66 -0
  31. additory/functions/analyze/outliers.py +65 -0
  32. additory/functions/analyze/patterns.py +65 -0
  33. additory/functions/analyze/presets.py +72 -0
  34. additory/functions/analyze/quality.py +59 -0
  35. additory/functions/analyze/timeseries.py +53 -0
  36. additory/functions/analyze/types.py +45 -0
  37. additory/functions/expressions/__init__.py +161 -0
  38. additory/functions/snapshot/__init__.py +82 -0
  39. additory/functions/snapshot/filter.py +119 -0
  40. additory/functions/synthetic/__init__.py +113 -0
  41. additory/functions/synthetic/mode_detector.py +47 -0
  42. additory/functions/synthetic/strategies/__init__.py +1 -0
  43. additory/functions/synthetic/strategies/advanced.py +35 -0
  44. additory/functions/synthetic/strategies/augmentative.py +160 -0
  45. additory/functions/synthetic/strategies/generative.py +168 -0
  46. additory/functions/synthetic/strategies/presets.py +116 -0
  47. additory/functions/to/__init__.py +188 -0
  48. additory/functions/to/lookup.py +351 -0
  49. additory/functions/to/merge.py +189 -0
  50. additory/functions/to/sort.py +91 -0
  51. additory/functions/to/summarize.py +170 -0
  52. additory/functions/transform/__init__.py +140 -0
  53. additory/functions/transform/datetime.py +79 -0
  54. additory/functions/transform/extract.py +85 -0
  55. additory/functions/transform/harmonize.py +105 -0
  56. additory/functions/transform/knn.py +62 -0
  57. additory/functions/transform/onehotencoding.py +68 -0
  58. additory/functions/transform/transpose.py +42 -0
  59. additory-0.1.1a1.dist-info/METADATA +83 -0
  60. additory-0.1.1a1.dist-info/RECORD +62 -0
  61. additory/analysis/__init__.py +0 -48
  62. additory/analysis/cardinality.py +0 -126
  63. additory/analysis/correlations.py +0 -124
  64. additory/analysis/distributions.py +0 -376
  65. additory/analysis/quality.py +0 -158
  66. additory/analysis/scan.py +0 -400
  67. additory/common/backend.py +0 -371
  68. additory/common/column_utils.py +0 -191
  69. additory/common/exceptions.py +0 -62
  70. additory/common/lists.py +0 -229
  71. additory/common/patterns.py +0 -240
  72. additory/common/resolver.py +0 -567
  73. additory/common/sample_data.py +0 -182
  74. additory/core/ast_builder.py +0 -165
  75. additory/core/backends/__init__.py +0 -23
  76. additory/core/backends/arrow_bridge.py +0 -483
  77. additory/core/backends/cudf_bridge.py +0 -355
  78. additory/core/column_positioning.py +0 -358
  79. additory/core/compiler_polars.py +0 -166
  80. additory/core/enhanced_cache_manager.py +0 -1119
  81. additory/core/enhanced_matchers.py +0 -473
  82. additory/core/enhanced_version_manager.py +0 -325
  83. additory/core/executor.py +0 -59
  84. additory/core/integrity_manager.py +0 -477
  85. additory/core/loader.py +0 -190
  86. additory/core/namespace_manager.py +0 -657
  87. additory/core/parser.py +0 -176
  88. additory/core/polars_expression_engine.py +0 -601
  89. additory/core/registry.py +0 -177
  90. additory/core/sample_data_manager.py +0 -492
  91. additory/core/user_namespace.py +0 -751
  92. additory/core/validator.py +0 -27
  93. additory/dynamic_api.py +0 -352
  94. additory/expressions/proxy.py +0 -549
  95. additory/expressions/registry.py +0 -313
  96. additory/expressions/samples.py +0 -492
  97. additory/synthetic/__init__.py +0 -13
  98. additory/synthetic/column_name_resolver.py +0 -149
  99. additory/synthetic/deduce.py +0 -259
  100. additory/synthetic/distributions.py +0 -22
  101. additory/synthetic/forecast.py +0 -1132
  102. additory/synthetic/linked_list_parser.py +0 -415
  103. additory/synthetic/namespace_lookup.py +0 -129
  104. additory/synthetic/smote.py +0 -320
  105. additory/synthetic/strategies.py +0 -926
  106. additory/synthetic/synthesizer.py +0 -713
  107. additory/utilities/__init__.py +0 -53
  108. additory/utilities/encoding.py +0 -600
  109. additory/utilities/games.py +0 -300
  110. additory/utilities/keys.py +0 -8
  111. additory/utilities/lookup.py +0 -103
  112. additory/utilities/matchers.py +0 -216
  113. additory/utilities/resolvers.py +0 -286
  114. additory/utilities/settings.py +0 -167
  115. additory/utilities/units.py +0 -749
  116. additory/utilities/validators.py +0 -153
  117. additory-0.1.0a4.dist-info/METADATA +0 -311
  118. additory-0.1.0a4.dist-info/RECORD +0 -72
  119. additory-0.1.0a4.dist-info/licenses/LICENSE +0 -21
  120. {additory-0.1.0a4.dist-info → additory-0.1.1a1.dist-info}/WHEEL +0 -0
  121. {additory-0.1.0a4.dist-info → additory-0.1.1a1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,258 @@
1
+ """
2
+ Backend detection and conversion utilities for Additory.
3
+
4
+ Handles automatic conversion between pandas, Polars, and cuDF DataFrames.
5
+ All processing happens in Polars, with transparent conversions at boundaries.
6
+ """
7
+
8
+ from typing import Any
9
+ import polars as pl
10
+
11
+
12
+ # Global default backend setting
13
+ _DEFAULT_BACKEND = 'polars'
14
+
15
+
16
+ def detect_backend(df: Any) -> str:
17
+ """
18
+ Detect the backend type of a DataFrame.
19
+
20
+ Args:
21
+ df: DataFrame to detect
22
+
23
+ Returns:
24
+ Backend string ('polars', 'pandas', 'cudf', 'dask', 'spark')
25
+
26
+ Raises:
27
+ TypeError: If df is not a supported DataFrame type
28
+
29
+ Example:
30
+ backend = detect_backend(df)
31
+ # Returns: 'pandas' or 'polars' or 'cudf'
32
+ """
33
+ # Check Polars
34
+ if isinstance(df, pl.DataFrame):
35
+ return 'polars'
36
+
37
+ # Check pandas
38
+ try:
39
+ import pandas as pd
40
+ if isinstance(df, pd.DataFrame):
41
+ return 'pandas'
42
+ except ImportError:
43
+ pass
44
+
45
+ # Check cuDF
46
+ if is_cudf(df):
47
+ return 'cudf'
48
+
49
+ # Check Dask (future support)
50
+ if is_dask(df):
51
+ return 'dask'
52
+
53
+ # Check Spark (future support)
54
+ if is_spark(df):
55
+ return 'spark'
56
+
57
+ # Unsupported type
58
+ raise TypeError(
59
+ f"Unsupported DataFrame type: {type(df).__name__}. "
60
+ f"Supported types: pandas.DataFrame, polars.DataFrame, cudf.DataFrame"
61
+ )
62
+
63
+
64
+ def to_polars(df: Any) -> pl.DataFrame:
65
+ """
66
+ Convert any DataFrame to Polars.
67
+
68
+ Args:
69
+ df: DataFrame to convert
70
+
71
+ Returns:
72
+ Polars DataFrame
73
+
74
+ Raises:
75
+ TypeError: If df is not a supported DataFrame type
76
+
77
+ Example:
78
+ polars_df = to_polars(df) # Works with pandas, polars, cuDF
79
+ """
80
+ # Already Polars
81
+ if isinstance(df, pl.DataFrame):
82
+ return df
83
+
84
+ # From pandas
85
+ try:
86
+ import pandas as pd
87
+ if isinstance(df, pd.DataFrame):
88
+ return pl.from_pandas(df)
89
+ except ImportError:
90
+ pass
91
+
92
+ # From cuDF
93
+ if is_cudf(df):
94
+ try:
95
+ return pl.from_arrow(df.to_arrow())
96
+ except Exception as e:
97
+ raise TypeError(f"Failed to convert cuDF DataFrame to Polars: {e}")
98
+
99
+ # From Dask (future support)
100
+ if is_dask(df):
101
+ raise NotImplementedError("Dask support is not yet implemented")
102
+
103
+ # From Spark (future support)
104
+ if is_spark(df):
105
+ raise NotImplementedError("Spark support is not yet implemented")
106
+
107
+ # Unsupported type
108
+ raise TypeError(
109
+ f"Unsupported DataFrame type: {type(df).__name__}. "
110
+ f"Supported types: pandas.DataFrame, polars.DataFrame, cudf.DataFrame"
111
+ )
112
+
113
+
114
+ def from_polars(df: pl.DataFrame, target_backend: str) -> Any:
115
+ """
116
+ Convert Polars DataFrame back to target backend.
117
+
118
+ Args:
119
+ df: Polars DataFrame to convert
120
+ target_backend: Target backend ('polars', 'pandas', 'cudf')
121
+
122
+ Returns:
123
+ DataFrame in target backend
124
+
125
+ Raises:
126
+ TypeError: If target_backend is not supported
127
+
128
+ Example:
129
+ result = from_polars(polars_df, 'pandas') # Returns pandas DataFrame
130
+ """
131
+ if not isinstance(df, pl.DataFrame):
132
+ raise TypeError(f"Input must be a Polars DataFrame, got {type(df).__name__}")
133
+
134
+ # To Polars (no conversion needed)
135
+ if target_backend == 'polars':
136
+ return df
137
+
138
+ # To pandas
139
+ if target_backend == 'pandas':
140
+ return df.to_pandas()
141
+
142
+ # To cuDF
143
+ if target_backend == 'cudf':
144
+ try:
145
+ import cudf
146
+ return cudf.from_arrow(df.to_arrow())
147
+ except ImportError:
148
+ raise ImportError("cuDF is not installed. Install with: pip install cudf")
149
+ except Exception as e:
150
+ raise TypeError(f"Failed to convert Polars DataFrame to cuDF: {e}")
151
+
152
+ # To Dask (future support)
153
+ if target_backend == 'dask':
154
+ raise NotImplementedError("Dask support is not yet implemented")
155
+
156
+ # To Spark (future support)
157
+ if target_backend == 'spark':
158
+ raise NotImplementedError("Spark support is not yet implemented")
159
+
160
+ # Unsupported backend
161
+ raise TypeError(
162
+ f"Unsupported backend: {target_backend}. "
163
+ f"Supported backends: polars, pandas, cudf"
164
+ )
165
+
166
+
167
+ def is_cudf(df: Any) -> bool:
168
+ """
169
+ Check if DataFrame is cuDF.
170
+
171
+ Args:
172
+ df: Object to check
173
+
174
+ Returns:
175
+ True if cuDF DataFrame, False otherwise
176
+
177
+ Example:
178
+ if is_cudf(df):
179
+ # Handle cuDF-specific logic
180
+ """
181
+ try:
182
+ import cudf
183
+ return isinstance(df, cudf.DataFrame)
184
+ except ImportError:
185
+ return False
186
+
187
+
188
+ def is_dask(df: Any) -> bool:
189
+ """
190
+ Check if DataFrame is Dask (future support).
191
+
192
+ Args:
193
+ df: Object to check
194
+
195
+ Returns:
196
+ True if Dask DataFrame, False otherwise
197
+ """
198
+ try:
199
+ import dask.dataframe as dd
200
+ return isinstance(df, dd.DataFrame)
201
+ except ImportError:
202
+ return False
203
+
204
+
205
+ def is_spark(df: Any) -> bool:
206
+ """
207
+ Check if DataFrame is Spark (future support).
208
+
209
+ Args:
210
+ df: Object to check
211
+
212
+ Returns:
213
+ True if Spark DataFrame, False otherwise
214
+ """
215
+ try:
216
+ from pyspark.sql import DataFrame as SparkDataFrame
217
+ return isinstance(df, SparkDataFrame)
218
+ except ImportError:
219
+ return False
220
+
221
+
222
+ def get_default_backend() -> str:
223
+ """
224
+ Get the default backend setting.
225
+
226
+ Returns:
227
+ Default backend string ('polars', 'pandas', 'cudf')
228
+
229
+ Example:
230
+ backend = get_default_backend() # Returns 'polars' by default
231
+ """
232
+ return _DEFAULT_BACKEND
233
+
234
+
235
+ def set_default_backend(backend: str) -> None:
236
+ """
237
+ Set the default backend for operations.
238
+
239
+ Args:
240
+ backend: Backend to set ('polars', 'pandas', 'cudf')
241
+
242
+ Raises:
243
+ ValueError: If backend is not supported
244
+
245
+ Example:
246
+ import additory
247
+ additory.add.set_default_backend('cudf') # For GPU users
248
+ """
249
+ global _DEFAULT_BACKEND
250
+
251
+ supported_backends = ['polars', 'pandas', 'cudf']
252
+ if backend not in supported_backends:
253
+ raise ValueError(
254
+ f"Unsupported backend: {backend}. "
255
+ f"Supported backends: {', '.join(supported_backends)}"
256
+ )
257
+
258
+ _DEFAULT_BACKEND = backend