additory 0.1.0a4__py3-none-any.whl → 0.1.1a2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- additory/__init__.py +58 -14
- additory/common/__init__.py +31 -147
- additory/common/column_selector.py +255 -0
- additory/common/distributions.py +286 -613
- additory/common/extractors.py +313 -0
- additory/common/knn_imputation.py +332 -0
- additory/common/result.py +390 -0
- additory/common/strategy_parser.py +243 -0
- additory/common/unit_conversions.py +338 -0
- additory/common/validation.py +283 -103
- additory/core/__init__.py +34 -22
- additory/core/backend.py +258 -0
- additory/core/config.py +177 -305
- additory/core/logging.py +230 -24
- additory/core/memory_manager.py +157 -495
- additory/expressions/__init__.py +2 -23
- additory/expressions/compiler.py +457 -0
- additory/expressions/engine.py +264 -487
- additory/expressions/integrity.py +179 -0
- additory/expressions/loader.py +263 -0
- additory/expressions/parser.py +363 -167
- additory/expressions/resolver.py +274 -0
- additory/functions/__init__.py +1 -0
- additory/functions/analyze/__init__.py +144 -0
- additory/functions/analyze/cardinality.py +58 -0
- additory/functions/analyze/correlations.py +66 -0
- additory/functions/analyze/distributions.py +53 -0
- additory/functions/analyze/duplicates.py +49 -0
- additory/functions/analyze/features.py +61 -0
- additory/functions/analyze/imputation.py +66 -0
- additory/functions/analyze/outliers.py +65 -0
- additory/functions/analyze/patterns.py +65 -0
- additory/functions/analyze/presets.py +72 -0
- additory/functions/analyze/quality.py +59 -0
- additory/functions/analyze/timeseries.py +53 -0
- additory/functions/analyze/types.py +45 -0
- additory/functions/expressions/__init__.py +161 -0
- additory/functions/snapshot/__init__.py +82 -0
- additory/functions/snapshot/filter.py +119 -0
- additory/functions/synthetic/__init__.py +113 -0
- additory/functions/synthetic/mode_detector.py +47 -0
- additory/functions/synthetic/strategies/__init__.py +1 -0
- additory/functions/synthetic/strategies/advanced.py +35 -0
- additory/functions/synthetic/strategies/augmentative.py +160 -0
- additory/functions/synthetic/strategies/generative.py +168 -0
- additory/functions/synthetic/strategies/presets.py +116 -0
- additory/functions/to/__init__.py +188 -0
- additory/functions/to/lookup.py +351 -0
- additory/functions/to/merge.py +189 -0
- additory/functions/to/sort.py +91 -0
- additory/functions/to/summarize.py +170 -0
- additory/functions/transform/__init__.py +140 -0
- additory/functions/transform/datetime.py +79 -0
- additory/functions/transform/extract.py +85 -0
- additory/functions/transform/harmonize.py +105 -0
- additory/functions/transform/knn.py +62 -0
- additory/functions/transform/onehotencoding.py +68 -0
- additory/functions/transform/transpose.py +42 -0
- additory-0.1.1a2.dist-info/METADATA +84 -0
- additory-0.1.1a2.dist-info/RECORD +62 -0
- additory/analysis/__init__.py +0 -48
- additory/analysis/cardinality.py +0 -126
- additory/analysis/correlations.py +0 -124
- additory/analysis/distributions.py +0 -376
- additory/analysis/quality.py +0 -158
- additory/analysis/scan.py +0 -400
- additory/common/backend.py +0 -371
- additory/common/column_utils.py +0 -191
- additory/common/exceptions.py +0 -62
- additory/common/lists.py +0 -229
- additory/common/patterns.py +0 -240
- additory/common/resolver.py +0 -567
- additory/common/sample_data.py +0 -182
- additory/core/ast_builder.py +0 -165
- additory/core/backends/__init__.py +0 -23
- additory/core/backends/arrow_bridge.py +0 -483
- additory/core/backends/cudf_bridge.py +0 -355
- additory/core/column_positioning.py +0 -358
- additory/core/compiler_polars.py +0 -166
- additory/core/enhanced_cache_manager.py +0 -1119
- additory/core/enhanced_matchers.py +0 -473
- additory/core/enhanced_version_manager.py +0 -325
- additory/core/executor.py +0 -59
- additory/core/integrity_manager.py +0 -477
- additory/core/loader.py +0 -190
- additory/core/namespace_manager.py +0 -657
- additory/core/parser.py +0 -176
- additory/core/polars_expression_engine.py +0 -601
- additory/core/registry.py +0 -177
- additory/core/sample_data_manager.py +0 -492
- additory/core/user_namespace.py +0 -751
- additory/core/validator.py +0 -27
- additory/dynamic_api.py +0 -352
- additory/expressions/proxy.py +0 -549
- additory/expressions/registry.py +0 -313
- additory/expressions/samples.py +0 -492
- additory/synthetic/__init__.py +0 -13
- additory/synthetic/column_name_resolver.py +0 -149
- additory/synthetic/deduce.py +0 -259
- additory/synthetic/distributions.py +0 -22
- additory/synthetic/forecast.py +0 -1132
- additory/synthetic/linked_list_parser.py +0 -415
- additory/synthetic/namespace_lookup.py +0 -129
- additory/synthetic/smote.py +0 -320
- additory/synthetic/strategies.py +0 -926
- additory/synthetic/synthesizer.py +0 -713
- additory/utilities/__init__.py +0 -53
- additory/utilities/encoding.py +0 -600
- additory/utilities/games.py +0 -300
- additory/utilities/keys.py +0 -8
- additory/utilities/lookup.py +0 -103
- additory/utilities/matchers.py +0 -216
- additory/utilities/resolvers.py +0 -286
- additory/utilities/settings.py +0 -167
- additory/utilities/units.py +0 -749
- additory/utilities/validators.py +0 -153
- additory-0.1.0a4.dist-info/METADATA +0 -311
- additory-0.1.0a4.dist-info/RECORD +0 -72
- additory-0.1.0a4.dist-info/licenses/LICENSE +0 -21
- {additory-0.1.0a4.dist-info → additory-0.1.1a2.dist-info}/WHEEL +0 -0
- {additory-0.1.0a4.dist-info → additory-0.1.1a2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,338 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Unit conversion utilities for Additory.
|
|
3
|
+
|
|
4
|
+
Provides conversion between different units (weight, temperature, distance, etc.).
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import Dict, Optional
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
# Conversion factors to base units
|
|
11
|
+
WEIGHT_TO_KG = {
|
|
12
|
+
'kg': 1.0,
|
|
13
|
+
'g': 0.001,
|
|
14
|
+
'lb': 0.453592,
|
|
15
|
+
'oz': 0.0283495,
|
|
16
|
+
'ton': 1000.0
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
DISTANCE_TO_M = {
|
|
20
|
+
'm': 1.0,
|
|
21
|
+
'km': 1000.0,
|
|
22
|
+
'cm': 0.01,
|
|
23
|
+
'mi': 1609.34,
|
|
24
|
+
'ft': 0.3048,
|
|
25
|
+
'in': 0.0254
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
VOLUME_TO_L = {
|
|
29
|
+
'L': 1.0,
|
|
30
|
+
'mL': 0.001,
|
|
31
|
+
'gal': 3.78541, # US gallon
|
|
32
|
+
'qt': 0.946353, # US quart
|
|
33
|
+
'pt': 0.473176, # US pint
|
|
34
|
+
'cup': 0.236588 # US cup
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
TIME_TO_S = {
|
|
38
|
+
's': 1.0,
|
|
39
|
+
'min': 60.0,
|
|
40
|
+
'h': 3600.0,
|
|
41
|
+
'day': 86400.0,
|
|
42
|
+
'week': 604800.0
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
# Built-in exchange rates (as of library version)
|
|
46
|
+
# Note: In production, these would be updated regularly
|
|
47
|
+
DEFAULT_EXCHANGE_RATES = {
|
|
48
|
+
'USD': 1.0, # Base currency
|
|
49
|
+
'EUR': 0.85,
|
|
50
|
+
'GBP': 0.73,
|
|
51
|
+
'JPY': 110.0,
|
|
52
|
+
'CAD': 1.25,
|
|
53
|
+
'AUD': 1.35,
|
|
54
|
+
'CHF': 0.92,
|
|
55
|
+
'CNY': 6.45
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
# Unit type mappings
|
|
59
|
+
UNIT_TYPES = {
|
|
60
|
+
# Weight
|
|
61
|
+
'kg': 'weight', 'g': 'weight', 'lb': 'weight', 'oz': 'weight', 'ton': 'weight',
|
|
62
|
+
|
|
63
|
+
# Temperature
|
|
64
|
+
'C': 'temperature', 'F': 'temperature', 'K': 'temperature',
|
|
65
|
+
|
|
66
|
+
# Distance
|
|
67
|
+
'm': 'distance', 'km': 'distance', 'cm': 'distance',
|
|
68
|
+
'mi': 'distance', 'ft': 'distance', 'in': 'distance',
|
|
69
|
+
|
|
70
|
+
# Volume
|
|
71
|
+
'L': 'volume', 'mL': 'volume', 'gal': 'volume',
|
|
72
|
+
'qt': 'volume', 'pt': 'volume', 'cup': 'volume',
|
|
73
|
+
|
|
74
|
+
# Time
|
|
75
|
+
's': 'time', 'min': 'time', 'h': 'time', 'day': 'time', 'week': 'time',
|
|
76
|
+
|
|
77
|
+
# Currency (common ones)
|
|
78
|
+
'USD': 'currency', 'EUR': 'currency', 'GBP': 'currency',
|
|
79
|
+
'JPY': 'currency', 'CAD': 'currency', 'AUD': 'currency',
|
|
80
|
+
'CHF': 'currency', 'CNY': 'currency'
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def convert_weight(value: float, from_unit: str, to_unit: str) -> float:
|
|
85
|
+
"""
|
|
86
|
+
Convert weight between units.
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
value: Value to convert
|
|
90
|
+
from_unit: Source unit ('kg', 'lb', 'g', 'oz', 'ton')
|
|
91
|
+
to_unit: Target unit
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
Converted value
|
|
95
|
+
|
|
96
|
+
Raises:
|
|
97
|
+
ValueError: If units are not supported
|
|
98
|
+
|
|
99
|
+
Example:
|
|
100
|
+
kg_value = convert_weight(150, from_unit='lb', to_unit='kg')
|
|
101
|
+
# Returns: 68.04
|
|
102
|
+
"""
|
|
103
|
+
if from_unit not in WEIGHT_TO_KG:
|
|
104
|
+
raise ValueError(f"Unsupported weight unit: {from_unit}")
|
|
105
|
+
if to_unit not in WEIGHT_TO_KG:
|
|
106
|
+
raise ValueError(f"Unsupported weight unit: {to_unit}")
|
|
107
|
+
|
|
108
|
+
# Convert to kg, then to target unit
|
|
109
|
+
kg_value = value * WEIGHT_TO_KG[from_unit]
|
|
110
|
+
result = kg_value / WEIGHT_TO_KG[to_unit]
|
|
111
|
+
|
|
112
|
+
return result
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def convert_temperature(value: float, from_unit: str, to_unit: str) -> float:
|
|
116
|
+
"""
|
|
117
|
+
Convert temperature between units.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
value: Value to convert
|
|
121
|
+
from_unit: Source unit ('C', 'F', 'K')
|
|
122
|
+
to_unit: Target unit
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
Converted value
|
|
126
|
+
|
|
127
|
+
Raises:
|
|
128
|
+
ValueError: If units are not supported
|
|
129
|
+
|
|
130
|
+
Example:
|
|
131
|
+
celsius = convert_temperature(98.6, from_unit='F', to_unit='C')
|
|
132
|
+
# Returns: 37.0
|
|
133
|
+
"""
|
|
134
|
+
supported_units = ['C', 'F', 'K']
|
|
135
|
+
if from_unit not in supported_units:
|
|
136
|
+
raise ValueError(f"Unsupported temperature unit: {from_unit}")
|
|
137
|
+
if to_unit not in supported_units:
|
|
138
|
+
raise ValueError(f"Unsupported temperature unit: {to_unit}")
|
|
139
|
+
|
|
140
|
+
# Convert to Celsius first
|
|
141
|
+
if from_unit == 'C':
|
|
142
|
+
celsius = value
|
|
143
|
+
elif from_unit == 'F':
|
|
144
|
+
celsius = (value - 32) * 5/9
|
|
145
|
+
elif from_unit == 'K':
|
|
146
|
+
celsius = value - 273.15
|
|
147
|
+
|
|
148
|
+
# Convert from Celsius to target
|
|
149
|
+
if to_unit == 'C':
|
|
150
|
+
return celsius
|
|
151
|
+
elif to_unit == 'F':
|
|
152
|
+
return (celsius * 9/5) + 32
|
|
153
|
+
elif to_unit == 'K':
|
|
154
|
+
return celsius + 273.15
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def convert_distance(value: float, from_unit: str, to_unit: str) -> float:
|
|
158
|
+
"""
|
|
159
|
+
Convert distance between units.
|
|
160
|
+
|
|
161
|
+
Args:
|
|
162
|
+
value: Value to convert
|
|
163
|
+
from_unit: Source unit ('m', 'km', 'mi', 'ft', 'in', 'cm')
|
|
164
|
+
to_unit: Target unit
|
|
165
|
+
|
|
166
|
+
Returns:
|
|
167
|
+
Converted value
|
|
168
|
+
|
|
169
|
+
Raises:
|
|
170
|
+
ValueError: If units are not supported
|
|
171
|
+
"""
|
|
172
|
+
if from_unit not in DISTANCE_TO_M:
|
|
173
|
+
raise ValueError(f"Unsupported distance unit: {from_unit}")
|
|
174
|
+
if to_unit not in DISTANCE_TO_M:
|
|
175
|
+
raise ValueError(f"Unsupported distance unit: {to_unit}")
|
|
176
|
+
|
|
177
|
+
# Convert to meters, then to target unit
|
|
178
|
+
meters = value * DISTANCE_TO_M[from_unit]
|
|
179
|
+
result = meters / DISTANCE_TO_M[to_unit]
|
|
180
|
+
|
|
181
|
+
return result
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def convert_currency(value: float, from_currency: str, to_currency: str,
|
|
185
|
+
rates: Optional[Dict[str, float]] = None) -> float:
|
|
186
|
+
"""
|
|
187
|
+
Convert currency between units.
|
|
188
|
+
|
|
189
|
+
Args:
|
|
190
|
+
value: Value to convert
|
|
191
|
+
from_currency: Source currency ('USD', 'EUR', 'GBP', etc.)
|
|
192
|
+
to_currency: Target currency
|
|
193
|
+
rates: Optional exchange rates dict (if None, uses built-in rates)
|
|
194
|
+
|
|
195
|
+
Returns:
|
|
196
|
+
Converted value
|
|
197
|
+
|
|
198
|
+
Raises:
|
|
199
|
+
ValueError: If currencies are not supported
|
|
200
|
+
|
|
201
|
+
Example:
|
|
202
|
+
eur_value = convert_currency(100, from_currency='USD', to_currency='EUR')
|
|
203
|
+
"""
|
|
204
|
+
# Use provided rates or default rates
|
|
205
|
+
exchange_rates = rates if rates is not None else DEFAULT_EXCHANGE_RATES
|
|
206
|
+
|
|
207
|
+
if from_currency not in exchange_rates:
|
|
208
|
+
raise ValueError(f"Unsupported currency: {from_currency}")
|
|
209
|
+
if to_currency not in exchange_rates:
|
|
210
|
+
raise ValueError(f"Unsupported currency: {to_currency}")
|
|
211
|
+
|
|
212
|
+
# Convert to USD, then to target currency
|
|
213
|
+
usd_value = value / exchange_rates[from_currency]
|
|
214
|
+
result = usd_value * exchange_rates[to_currency]
|
|
215
|
+
|
|
216
|
+
return result
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def convert_volume(value: float, from_unit: str, to_unit: str) -> float:
|
|
220
|
+
"""
|
|
221
|
+
Convert volume between units.
|
|
222
|
+
|
|
223
|
+
Args:
|
|
224
|
+
value: Value to convert
|
|
225
|
+
from_unit: Source unit ('L', 'mL', 'gal', 'qt', 'pt', 'cup')
|
|
226
|
+
to_unit: Target unit
|
|
227
|
+
|
|
228
|
+
Returns:
|
|
229
|
+
Converted value
|
|
230
|
+
|
|
231
|
+
Raises:
|
|
232
|
+
ValueError: If units are not supported
|
|
233
|
+
"""
|
|
234
|
+
if from_unit not in VOLUME_TO_L:
|
|
235
|
+
raise ValueError(f"Unsupported volume unit: {from_unit}")
|
|
236
|
+
if to_unit not in VOLUME_TO_L:
|
|
237
|
+
raise ValueError(f"Unsupported volume unit: {to_unit}")
|
|
238
|
+
|
|
239
|
+
# Convert to liters, then to target unit
|
|
240
|
+
liters = value * VOLUME_TO_L[from_unit]
|
|
241
|
+
result = liters / VOLUME_TO_L[to_unit]
|
|
242
|
+
|
|
243
|
+
return result
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def convert_time(value: float, from_unit: str, to_unit: str) -> float:
|
|
247
|
+
"""
|
|
248
|
+
Convert time between units.
|
|
249
|
+
|
|
250
|
+
Args:
|
|
251
|
+
value: Value to convert
|
|
252
|
+
from_unit: Source unit ('s', 'min', 'h', 'day', 'week')
|
|
253
|
+
to_unit: Target unit
|
|
254
|
+
|
|
255
|
+
Returns:
|
|
256
|
+
Converted value
|
|
257
|
+
|
|
258
|
+
Raises:
|
|
259
|
+
ValueError: If units are not supported
|
|
260
|
+
"""
|
|
261
|
+
if from_unit not in TIME_TO_S:
|
|
262
|
+
raise ValueError(f"Unsupported time unit: {from_unit}")
|
|
263
|
+
if to_unit not in TIME_TO_S:
|
|
264
|
+
raise ValueError(f"Unsupported time unit: {to_unit}")
|
|
265
|
+
|
|
266
|
+
# Convert to seconds, then to target unit
|
|
267
|
+
seconds = value * TIME_TO_S[from_unit]
|
|
268
|
+
result = seconds / TIME_TO_S[to_unit]
|
|
269
|
+
|
|
270
|
+
return result
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
def get_conversion_factor(from_unit: str, to_unit: str, unit_type: str) -> float:
|
|
274
|
+
"""
|
|
275
|
+
Get conversion factor between two units.
|
|
276
|
+
|
|
277
|
+
Args:
|
|
278
|
+
from_unit: Source unit
|
|
279
|
+
to_unit: Target unit
|
|
280
|
+
unit_type: Type of unit ('weight', 'distance', 'volume', 'time')
|
|
281
|
+
|
|
282
|
+
Returns:
|
|
283
|
+
Conversion factor
|
|
284
|
+
|
|
285
|
+
Raises:
|
|
286
|
+
ValueError: If unit type is not supported or units are invalid
|
|
287
|
+
"""
|
|
288
|
+
if unit_type == 'weight':
|
|
289
|
+
if from_unit not in WEIGHT_TO_KG or to_unit not in WEIGHT_TO_KG:
|
|
290
|
+
raise ValueError(f"Invalid weight units: {from_unit}, {to_unit}")
|
|
291
|
+
return WEIGHT_TO_KG[from_unit] / WEIGHT_TO_KG[to_unit]
|
|
292
|
+
|
|
293
|
+
elif unit_type == 'distance':
|
|
294
|
+
if from_unit not in DISTANCE_TO_M or to_unit not in DISTANCE_TO_M:
|
|
295
|
+
raise ValueError(f"Invalid distance units: {from_unit}, {to_unit}")
|
|
296
|
+
return DISTANCE_TO_M[from_unit] / DISTANCE_TO_M[to_unit]
|
|
297
|
+
|
|
298
|
+
elif unit_type == 'volume':
|
|
299
|
+
if from_unit not in VOLUME_TO_L or to_unit not in VOLUME_TO_L:
|
|
300
|
+
raise ValueError(f"Invalid volume units: {from_unit}, {to_unit}")
|
|
301
|
+
return VOLUME_TO_L[from_unit] / VOLUME_TO_L[to_unit]
|
|
302
|
+
|
|
303
|
+
elif unit_type == 'time':
|
|
304
|
+
if from_unit not in TIME_TO_S or to_unit not in TIME_TO_S:
|
|
305
|
+
raise ValueError(f"Invalid time units: {from_unit}, {to_unit}")
|
|
306
|
+
return TIME_TO_S[from_unit] / TIME_TO_S[to_unit]
|
|
307
|
+
|
|
308
|
+
elif unit_type == 'temperature':
|
|
309
|
+
raise ValueError("Temperature conversions require special formulas, use convert_temperature()")
|
|
310
|
+
|
|
311
|
+
elif unit_type == 'currency':
|
|
312
|
+
raise ValueError("Currency conversions require exchange rates, use convert_currency()")
|
|
313
|
+
|
|
314
|
+
else:
|
|
315
|
+
raise ValueError(f"Unsupported unit type: {unit_type}")
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
def detect_unit_type(unit: str) -> str:
|
|
319
|
+
"""
|
|
320
|
+
Detect the type of unit.
|
|
321
|
+
|
|
322
|
+
Args:
|
|
323
|
+
unit: Unit string
|
|
324
|
+
|
|
325
|
+
Returns:
|
|
326
|
+
Unit type ('weight', 'temperature', 'distance', 'currency', 'volume', 'time')
|
|
327
|
+
|
|
328
|
+
Raises:
|
|
329
|
+
ValueError: If unit is not recognized
|
|
330
|
+
|
|
331
|
+
Example:
|
|
332
|
+
unit_type = detect_unit_type('kg') # Returns: 'weight'
|
|
333
|
+
unit_type = detect_unit_type('USD') # Returns: 'currency'
|
|
334
|
+
"""
|
|
335
|
+
if unit in UNIT_TYPES:
|
|
336
|
+
return UNIT_TYPES[unit]
|
|
337
|
+
else:
|
|
338
|
+
raise ValueError(f"Unknown unit: {unit}")
|