additory 0.1.0a3__py3-none-any.whl → 0.1.1a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. additory/__init__.py +58 -14
  2. additory/common/__init__.py +31 -147
  3. additory/common/column_selector.py +255 -0
  4. additory/common/distributions.py +286 -613
  5. additory/common/extractors.py +313 -0
  6. additory/common/knn_imputation.py +332 -0
  7. additory/common/result.py +380 -0
  8. additory/common/strategy_parser.py +243 -0
  9. additory/common/unit_conversions.py +338 -0
  10. additory/common/validation.py +283 -103
  11. additory/core/__init__.py +34 -22
  12. additory/core/backend.py +258 -0
  13. additory/core/config.py +177 -305
  14. additory/core/logging.py +230 -24
  15. additory/core/memory_manager.py +157 -495
  16. additory/expressions/__init__.py +2 -23
  17. additory/expressions/compiler.py +457 -0
  18. additory/expressions/engine.py +264 -487
  19. additory/expressions/integrity.py +179 -0
  20. additory/expressions/loader.py +263 -0
  21. additory/expressions/parser.py +363 -167
  22. additory/expressions/resolver.py +274 -0
  23. additory/functions/__init__.py +1 -0
  24. additory/functions/analyze/__init__.py +144 -0
  25. additory/functions/analyze/cardinality.py +58 -0
  26. additory/functions/analyze/correlations.py +66 -0
  27. additory/functions/analyze/distributions.py +53 -0
  28. additory/functions/analyze/duplicates.py +49 -0
  29. additory/functions/analyze/features.py +61 -0
  30. additory/functions/analyze/imputation.py +66 -0
  31. additory/functions/analyze/outliers.py +65 -0
  32. additory/functions/analyze/patterns.py +65 -0
  33. additory/functions/analyze/presets.py +72 -0
  34. additory/functions/analyze/quality.py +59 -0
  35. additory/functions/analyze/timeseries.py +53 -0
  36. additory/functions/analyze/types.py +45 -0
  37. additory/functions/expressions/__init__.py +161 -0
  38. additory/functions/snapshot/__init__.py +82 -0
  39. additory/functions/snapshot/filter.py +119 -0
  40. additory/functions/synthetic/__init__.py +113 -0
  41. additory/functions/synthetic/mode_detector.py +47 -0
  42. additory/functions/synthetic/strategies/__init__.py +1 -0
  43. additory/functions/synthetic/strategies/advanced.py +35 -0
  44. additory/functions/synthetic/strategies/augmentative.py +160 -0
  45. additory/functions/synthetic/strategies/generative.py +168 -0
  46. additory/functions/synthetic/strategies/presets.py +116 -0
  47. additory/functions/to/__init__.py +188 -0
  48. additory/functions/to/lookup.py +351 -0
  49. additory/functions/to/merge.py +189 -0
  50. additory/functions/to/sort.py +91 -0
  51. additory/functions/to/summarize.py +170 -0
  52. additory/functions/transform/__init__.py +140 -0
  53. additory/functions/transform/datetime.py +79 -0
  54. additory/functions/transform/extract.py +85 -0
  55. additory/functions/transform/harmonize.py +105 -0
  56. additory/functions/transform/knn.py +62 -0
  57. additory/functions/transform/onehotencoding.py +68 -0
  58. additory/functions/transform/transpose.py +42 -0
  59. additory-0.1.1a1.dist-info/METADATA +83 -0
  60. additory-0.1.1a1.dist-info/RECORD +62 -0
  61. additory/analysis/__init__.py +0 -48
  62. additory/analysis/cardinality.py +0 -126
  63. additory/analysis/correlations.py +0 -124
  64. additory/analysis/distributions.py +0 -376
  65. additory/analysis/quality.py +0 -158
  66. additory/analysis/scan.py +0 -400
  67. additory/common/backend.py +0 -371
  68. additory/common/column_utils.py +0 -191
  69. additory/common/exceptions.py +0 -62
  70. additory/common/lists.py +0 -229
  71. additory/common/patterns.py +0 -240
  72. additory/common/resolver.py +0 -567
  73. additory/common/sample_data.py +0 -182
  74. additory/core/ast_builder.py +0 -165
  75. additory/core/backends/__init__.py +0 -23
  76. additory/core/backends/arrow_bridge.py +0 -483
  77. additory/core/backends/cudf_bridge.py +0 -355
  78. additory/core/column_positioning.py +0 -358
  79. additory/core/compiler_polars.py +0 -166
  80. additory/core/enhanced_cache_manager.py +0 -1119
  81. additory/core/enhanced_matchers.py +0 -473
  82. additory/core/enhanced_version_manager.py +0 -325
  83. additory/core/executor.py +0 -59
  84. additory/core/integrity_manager.py +0 -477
  85. additory/core/loader.py +0 -190
  86. additory/core/namespace_manager.py +0 -657
  87. additory/core/parser.py +0 -176
  88. additory/core/polars_expression_engine.py +0 -601
  89. additory/core/registry.py +0 -176
  90. additory/core/sample_data_manager.py +0 -492
  91. additory/core/user_namespace.py +0 -751
  92. additory/core/validator.py +0 -27
  93. additory/dynamic_api.py +0 -304
  94. additory/expressions/proxy.py +0 -549
  95. additory/expressions/registry.py +0 -313
  96. additory/expressions/samples.py +0 -492
  97. additory/synthetic/__init__.py +0 -13
  98. additory/synthetic/column_name_resolver.py +0 -149
  99. additory/synthetic/distributions.py +0 -22
  100. additory/synthetic/forecast.py +0 -1132
  101. additory/synthetic/linked_list_parser.py +0 -415
  102. additory/synthetic/namespace_lookup.py +0 -129
  103. additory/synthetic/smote.py +0 -320
  104. additory/synthetic/strategies.py +0 -850
  105. additory/synthetic/synthesizer.py +0 -713
  106. additory/utilities/__init__.py +0 -53
  107. additory/utilities/encoding.py +0 -600
  108. additory/utilities/games.py +0 -300
  109. additory/utilities/keys.py +0 -8
  110. additory/utilities/lookup.py +0 -103
  111. additory/utilities/matchers.py +0 -216
  112. additory/utilities/resolvers.py +0 -286
  113. additory/utilities/settings.py +0 -167
  114. additory/utilities/units.py +0 -749
  115. additory/utilities/validators.py +0 -153
  116. additory-0.1.0a3.dist-info/METADATA +0 -288
  117. additory-0.1.0a3.dist-info/RECORD +0 -71
  118. additory-0.1.0a3.dist-info/licenses/LICENSE +0 -21
  119. {additory-0.1.0a3.dist-info → additory-0.1.1a1.dist-info}/WHEEL +0 -0
  120. {additory-0.1.0a3.dist-info → additory-0.1.1a1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,258 @@
1
+ """
2
+ Backend detection and conversion utilities for Additory.
3
+
4
+ Handles automatic conversion between pandas, Polars, and cuDF DataFrames.
5
+ All processing happens in Polars, with transparent conversions at boundaries.
6
+ """
7
+
8
+ from typing import Any
9
+ import polars as pl
10
+
11
+
12
+ # Global default backend setting
13
+ _DEFAULT_BACKEND = 'polars'
14
+
15
+
16
+ def detect_backend(df: Any) -> str:
17
+ """
18
+ Detect the backend type of a DataFrame.
19
+
20
+ Args:
21
+ df: DataFrame to detect
22
+
23
+ Returns:
24
+ Backend string ('polars', 'pandas', 'cudf', 'dask', 'spark')
25
+
26
+ Raises:
27
+ TypeError: If df is not a supported DataFrame type
28
+
29
+ Example:
30
+ backend = detect_backend(df)
31
+ # Returns: 'pandas' or 'polars' or 'cudf'
32
+ """
33
+ # Check Polars
34
+ if isinstance(df, pl.DataFrame):
35
+ return 'polars'
36
+
37
+ # Check pandas
38
+ try:
39
+ import pandas as pd
40
+ if isinstance(df, pd.DataFrame):
41
+ return 'pandas'
42
+ except ImportError:
43
+ pass
44
+
45
+ # Check cuDF
46
+ if is_cudf(df):
47
+ return 'cudf'
48
+
49
+ # Check Dask (future support)
50
+ if is_dask(df):
51
+ return 'dask'
52
+
53
+ # Check Spark (future support)
54
+ if is_spark(df):
55
+ return 'spark'
56
+
57
+ # Unsupported type
58
+ raise TypeError(
59
+ f"Unsupported DataFrame type: {type(df).__name__}. "
60
+ f"Supported types: pandas.DataFrame, polars.DataFrame, cudf.DataFrame"
61
+ )
62
+
63
+
64
+ def to_polars(df: Any) -> pl.DataFrame:
65
+ """
66
+ Convert any DataFrame to Polars.
67
+
68
+ Args:
69
+ df: DataFrame to convert
70
+
71
+ Returns:
72
+ Polars DataFrame
73
+
74
+ Raises:
75
+ TypeError: If df is not a supported DataFrame type
76
+
77
+ Example:
78
+ polars_df = to_polars(df) # Works with pandas, polars, cuDF
79
+ """
80
+ # Already Polars
81
+ if isinstance(df, pl.DataFrame):
82
+ return df
83
+
84
+ # From pandas
85
+ try:
86
+ import pandas as pd
87
+ if isinstance(df, pd.DataFrame):
88
+ return pl.from_pandas(df)
89
+ except ImportError:
90
+ pass
91
+
92
+ # From cuDF
93
+ if is_cudf(df):
94
+ try:
95
+ return pl.from_arrow(df.to_arrow())
96
+ except Exception as e:
97
+ raise TypeError(f"Failed to convert cuDF DataFrame to Polars: {e}")
98
+
99
+ # From Dask (future support)
100
+ if is_dask(df):
101
+ raise NotImplementedError("Dask support is not yet implemented")
102
+
103
+ # From Spark (future support)
104
+ if is_spark(df):
105
+ raise NotImplementedError("Spark support is not yet implemented")
106
+
107
+ # Unsupported type
108
+ raise TypeError(
109
+ f"Unsupported DataFrame type: {type(df).__name__}. "
110
+ f"Supported types: pandas.DataFrame, polars.DataFrame, cudf.DataFrame"
111
+ )
112
+
113
+
114
+ def from_polars(df: pl.DataFrame, target_backend: str) -> Any:
115
+ """
116
+ Convert Polars DataFrame back to target backend.
117
+
118
+ Args:
119
+ df: Polars DataFrame to convert
120
+ target_backend: Target backend ('polars', 'pandas', 'cudf')
121
+
122
+ Returns:
123
+ DataFrame in target backend
124
+
125
+ Raises:
126
+ TypeError: If target_backend is not supported
127
+
128
+ Example:
129
+ result = from_polars(polars_df, 'pandas') # Returns pandas DataFrame
130
+ """
131
+ if not isinstance(df, pl.DataFrame):
132
+ raise TypeError(f"Input must be a Polars DataFrame, got {type(df).__name__}")
133
+
134
+ # To Polars (no conversion needed)
135
+ if target_backend == 'polars':
136
+ return df
137
+
138
+ # To pandas
139
+ if target_backend == 'pandas':
140
+ return df.to_pandas()
141
+
142
+ # To cuDF
143
+ if target_backend == 'cudf':
144
+ try:
145
+ import cudf
146
+ return cudf.from_arrow(df.to_arrow())
147
+ except ImportError:
148
+ raise ImportError("cuDF is not installed. Install with: pip install cudf")
149
+ except Exception as e:
150
+ raise TypeError(f"Failed to convert Polars DataFrame to cuDF: {e}")
151
+
152
+ # To Dask (future support)
153
+ if target_backend == 'dask':
154
+ raise NotImplementedError("Dask support is not yet implemented")
155
+
156
+ # To Spark (future support)
157
+ if target_backend == 'spark':
158
+ raise NotImplementedError("Spark support is not yet implemented")
159
+
160
+ # Unsupported backend
161
+ raise TypeError(
162
+ f"Unsupported backend: {target_backend}. "
163
+ f"Supported backends: polars, pandas, cudf"
164
+ )
165
+
166
+
167
+ def is_cudf(df: Any) -> bool:
168
+ """
169
+ Check if DataFrame is cuDF.
170
+
171
+ Args:
172
+ df: Object to check
173
+
174
+ Returns:
175
+ True if cuDF DataFrame, False otherwise
176
+
177
+ Example:
178
+ if is_cudf(df):
179
+ # Handle cuDF-specific logic
180
+ """
181
+ try:
182
+ import cudf
183
+ return isinstance(df, cudf.DataFrame)
184
+ except ImportError:
185
+ return False
186
+
187
+
188
+ def is_dask(df: Any) -> bool:
189
+ """
190
+ Check if DataFrame is Dask (future support).
191
+
192
+ Args:
193
+ df: Object to check
194
+
195
+ Returns:
196
+ True if Dask DataFrame, False otherwise
197
+ """
198
+ try:
199
+ import dask.dataframe as dd
200
+ return isinstance(df, dd.DataFrame)
201
+ except ImportError:
202
+ return False
203
+
204
+
205
+ def is_spark(df: Any) -> bool:
206
+ """
207
+ Check if DataFrame is Spark (future support).
208
+
209
+ Args:
210
+ df: Object to check
211
+
212
+ Returns:
213
+ True if Spark DataFrame, False otherwise
214
+ """
215
+ try:
216
+ from pyspark.sql import DataFrame as SparkDataFrame
217
+ return isinstance(df, SparkDataFrame)
218
+ except ImportError:
219
+ return False
220
+
221
+
222
+ def get_default_backend() -> str:
223
+ """
224
+ Get the default backend setting.
225
+
226
+ Returns:
227
+ Default backend string ('polars', 'pandas', 'cudf')
228
+
229
+ Example:
230
+ backend = get_default_backend() # Returns 'polars' by default
231
+ """
232
+ return _DEFAULT_BACKEND
233
+
234
+
235
+ def set_default_backend(backend: str) -> None:
236
+ """
237
+ Set the default backend for operations.
238
+
239
+ Args:
240
+ backend: Backend to set ('polars', 'pandas', 'cudf')
241
+
242
+ Raises:
243
+ ValueError: If backend is not supported
244
+
245
+ Example:
246
+ import additory
247
+ additory.add.set_default_backend('cudf') # For GPU users
248
+ """
249
+ global _DEFAULT_BACKEND
250
+
251
+ supported_backends = ['polars', 'pandas', 'cudf']
252
+ if backend not in supported_backends:
253
+ raise ValueError(
254
+ f"Unsupported backend: {backend}. "
255
+ f"Supported backends: {', '.join(supported_backends)}"
256
+ )
257
+
258
+ _DEFAULT_BACKEND = backend