additory 0.1.0a3__py3-none-any.whl → 0.1.1a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. additory/__init__.py +58 -14
  2. additory/common/__init__.py +31 -147
  3. additory/common/column_selector.py +255 -0
  4. additory/common/distributions.py +286 -613
  5. additory/common/extractors.py +313 -0
  6. additory/common/knn_imputation.py +332 -0
  7. additory/common/result.py +380 -0
  8. additory/common/strategy_parser.py +243 -0
  9. additory/common/unit_conversions.py +338 -0
  10. additory/common/validation.py +283 -103
  11. additory/core/__init__.py +34 -22
  12. additory/core/backend.py +258 -0
  13. additory/core/config.py +177 -305
  14. additory/core/logging.py +230 -24
  15. additory/core/memory_manager.py +157 -495
  16. additory/expressions/__init__.py +2 -23
  17. additory/expressions/compiler.py +457 -0
  18. additory/expressions/engine.py +264 -487
  19. additory/expressions/integrity.py +179 -0
  20. additory/expressions/loader.py +263 -0
  21. additory/expressions/parser.py +363 -167
  22. additory/expressions/resolver.py +274 -0
  23. additory/functions/__init__.py +1 -0
  24. additory/functions/analyze/__init__.py +144 -0
  25. additory/functions/analyze/cardinality.py +58 -0
  26. additory/functions/analyze/correlations.py +66 -0
  27. additory/functions/analyze/distributions.py +53 -0
  28. additory/functions/analyze/duplicates.py +49 -0
  29. additory/functions/analyze/features.py +61 -0
  30. additory/functions/analyze/imputation.py +66 -0
  31. additory/functions/analyze/outliers.py +65 -0
  32. additory/functions/analyze/patterns.py +65 -0
  33. additory/functions/analyze/presets.py +72 -0
  34. additory/functions/analyze/quality.py +59 -0
  35. additory/functions/analyze/timeseries.py +53 -0
  36. additory/functions/analyze/types.py +45 -0
  37. additory/functions/expressions/__init__.py +161 -0
  38. additory/functions/snapshot/__init__.py +82 -0
  39. additory/functions/snapshot/filter.py +119 -0
  40. additory/functions/synthetic/__init__.py +113 -0
  41. additory/functions/synthetic/mode_detector.py +47 -0
  42. additory/functions/synthetic/strategies/__init__.py +1 -0
  43. additory/functions/synthetic/strategies/advanced.py +35 -0
  44. additory/functions/synthetic/strategies/augmentative.py +160 -0
  45. additory/functions/synthetic/strategies/generative.py +168 -0
  46. additory/functions/synthetic/strategies/presets.py +116 -0
  47. additory/functions/to/__init__.py +188 -0
  48. additory/functions/to/lookup.py +351 -0
  49. additory/functions/to/merge.py +189 -0
  50. additory/functions/to/sort.py +91 -0
  51. additory/functions/to/summarize.py +170 -0
  52. additory/functions/transform/__init__.py +140 -0
  53. additory/functions/transform/datetime.py +79 -0
  54. additory/functions/transform/extract.py +85 -0
  55. additory/functions/transform/harmonize.py +105 -0
  56. additory/functions/transform/knn.py +62 -0
  57. additory/functions/transform/onehotencoding.py +68 -0
  58. additory/functions/transform/transpose.py +42 -0
  59. additory-0.1.1a1.dist-info/METADATA +83 -0
  60. additory-0.1.1a1.dist-info/RECORD +62 -0
  61. additory/analysis/__init__.py +0 -48
  62. additory/analysis/cardinality.py +0 -126
  63. additory/analysis/correlations.py +0 -124
  64. additory/analysis/distributions.py +0 -376
  65. additory/analysis/quality.py +0 -158
  66. additory/analysis/scan.py +0 -400
  67. additory/common/backend.py +0 -371
  68. additory/common/column_utils.py +0 -191
  69. additory/common/exceptions.py +0 -62
  70. additory/common/lists.py +0 -229
  71. additory/common/patterns.py +0 -240
  72. additory/common/resolver.py +0 -567
  73. additory/common/sample_data.py +0 -182
  74. additory/core/ast_builder.py +0 -165
  75. additory/core/backends/__init__.py +0 -23
  76. additory/core/backends/arrow_bridge.py +0 -483
  77. additory/core/backends/cudf_bridge.py +0 -355
  78. additory/core/column_positioning.py +0 -358
  79. additory/core/compiler_polars.py +0 -166
  80. additory/core/enhanced_cache_manager.py +0 -1119
  81. additory/core/enhanced_matchers.py +0 -473
  82. additory/core/enhanced_version_manager.py +0 -325
  83. additory/core/executor.py +0 -59
  84. additory/core/integrity_manager.py +0 -477
  85. additory/core/loader.py +0 -190
  86. additory/core/namespace_manager.py +0 -657
  87. additory/core/parser.py +0 -176
  88. additory/core/polars_expression_engine.py +0 -601
  89. additory/core/registry.py +0 -176
  90. additory/core/sample_data_manager.py +0 -492
  91. additory/core/user_namespace.py +0 -751
  92. additory/core/validator.py +0 -27
  93. additory/dynamic_api.py +0 -304
  94. additory/expressions/proxy.py +0 -549
  95. additory/expressions/registry.py +0 -313
  96. additory/expressions/samples.py +0 -492
  97. additory/synthetic/__init__.py +0 -13
  98. additory/synthetic/column_name_resolver.py +0 -149
  99. additory/synthetic/distributions.py +0 -22
  100. additory/synthetic/forecast.py +0 -1132
  101. additory/synthetic/linked_list_parser.py +0 -415
  102. additory/synthetic/namespace_lookup.py +0 -129
  103. additory/synthetic/smote.py +0 -320
  104. additory/synthetic/strategies.py +0 -850
  105. additory/synthetic/synthesizer.py +0 -713
  106. additory/utilities/__init__.py +0 -53
  107. additory/utilities/encoding.py +0 -600
  108. additory/utilities/games.py +0 -300
  109. additory/utilities/keys.py +0 -8
  110. additory/utilities/lookup.py +0 -103
  111. additory/utilities/matchers.py +0 -216
  112. additory/utilities/resolvers.py +0 -286
  113. additory/utilities/settings.py +0 -167
  114. additory/utilities/units.py +0 -749
  115. additory/utilities/validators.py +0 -153
  116. additory-0.1.0a3.dist-info/METADATA +0 -288
  117. additory-0.1.0a3.dist-info/RECORD +0 -71
  118. additory-0.1.0a3.dist-info/licenses/LICENSE +0 -21
  119. {additory-0.1.0a3.dist-info → additory-0.1.1a1.dist-info}/WHEEL +0 -0
  120. {additory-0.1.0a3.dist-info → additory-0.1.1a1.dist-info}/top_level.txt +0 -0
@@ -1,325 +0,0 @@
1
- # enhanced_version_manager.py
2
- # Enhanced version management for additory expressions system
3
-
4
- import os
5
- import json
6
- import re
7
- from typing import Dict, List, Optional, Tuple, Any
8
- from dataclasses import dataclass
9
- from packaging import version
10
-
11
- from .logging import log_info, log_warning
12
-
13
-
14
- @dataclass
15
- class VersionInfo:
16
- """Version information from manifest"""
17
- version: str
18
- stability: str
19
- requires_engine: str
20
- release_date: Optional[str]
21
- expressions: Dict[str, str]
22
- changelog: Optional[str]
23
-
24
-
25
- class VersionCompatibilityError(Exception):
26
- """Raised when version compatibility check fails"""
27
- pass
28
-
29
-
30
- class ManifestError(Exception):
31
- """Raised when manifest parsing fails"""
32
- pass
33
-
34
-
35
- class EnhancedVersionManager:
36
- """Enhanced version management with semantic versioning and stability levels"""
37
-
38
- def __init__(self):
39
- self.current_engine_version = "0.1.0"
40
- self.supported_expression_versions = ["0.1"]
41
- self.default_version = "0.1"
42
- self._manifest_cache = {}
43
-
44
- def load_manifest(self, base_path: str) -> Dict[str, VersionInfo]:
45
- """
46
- Load single manifest.json with multiple version entries
47
-
48
- Args:
49
- base_path: Path to directory containing manifest.json
50
-
51
- Returns:
52
- Dictionary mapping version strings to VersionInfo objects
53
-
54
- Raises:
55
- ManifestError: If manifest is missing or invalid
56
- """
57
- manifest_path = os.path.join(base_path, "manifest.json")
58
-
59
- if not os.path.exists(manifest_path):
60
- raise ManifestError(f"Manifest not found: {manifest_path}")
61
-
62
- # Check cache first
63
- cache_key = os.path.abspath(manifest_path)
64
- if cache_key in self._manifest_cache:
65
- mtime = os.path.getmtime(manifest_path)
66
- if self._manifest_cache[cache_key]["mtime"] == mtime:
67
- return self._manifest_cache[cache_key]["data"]
68
-
69
- try:
70
- with open(manifest_path, 'r', encoding='utf-8') as f:
71
- manifest_data = json.load(f)
72
- except json.JSONDecodeError as e:
73
- raise ManifestError(f"Invalid JSON in manifest: {e}")
74
- except Exception as e:
75
- raise ManifestError(f"Failed to read manifest: {e}")
76
-
77
- # Validate manifest structure
78
- if "versions" not in manifest_data:
79
- raise ManifestError("Manifest missing 'versions' section")
80
-
81
- versions = {}
82
- for version_str, version_data in manifest_data["versions"].items():
83
- try:
84
- versions[version_str] = self._parse_version_info(version_str, version_data)
85
- except Exception as e:
86
- log_warning(f"[version_manager] Skipping invalid version {version_str}: {e}")
87
-
88
- if not versions:
89
- raise ManifestError("No valid versions found in manifest")
90
-
91
- # Cache the result
92
- self._manifest_cache[cache_key] = {
93
- "data": versions,
94
- "mtime": os.path.getmtime(manifest_path)
95
- }
96
-
97
- log_info(f"[version_manager] Loaded manifest with {len(versions)} versions from {manifest_path}")
98
- return versions
99
-
100
- def _parse_version_info(self, version_str: str, version_data: dict) -> VersionInfo:
101
- """Parse version information from manifest data"""
102
-
103
- required_fields = ["stability", "requires_engine", "expressions"]
104
- for field in required_fields:
105
- if field not in version_data:
106
- raise ValueError(f"Missing required field '{field}' in version {version_str}")
107
-
108
- # Validate stability level
109
- valid_stability = ["alpha", "beta", "stable"]
110
- if version_data["stability"] not in valid_stability:
111
- raise ValueError(f"Invalid stability level: {version_data['stability']}")
112
-
113
- # Validate expressions dict
114
- if not isinstance(version_data["expressions"], dict):
115
- raise ValueError("'expressions' must be a dictionary")
116
-
117
- return VersionInfo(
118
- version=version_str,
119
- stability=version_data["stability"],
120
- requires_engine=version_data["requires_engine"],
121
- release_date=version_data.get("release_date"),
122
- expressions=version_data["expressions"],
123
- changelog=version_data.get("changelog")
124
- )
125
-
126
- def validate_compatibility(self, expression_version: str, required_engine: str) -> bool:
127
- """
128
- Check if expression version is compatible with current engine
129
-
130
- Args:
131
- expression_version: Version of expression library (e.g., "0.1")
132
- required_engine: Engine requirement string (e.g., ">=0.1.0")
133
-
134
- Returns:
135
- True if compatible, False otherwise
136
-
137
- Raises:
138
- VersionCompatibilityError: If compatibility check fails
139
- """
140
- try:
141
- # Parse engine requirement
142
- if required_engine.startswith(">="):
143
- min_version = required_engine[2:].strip()
144
- return version.parse(self.current_engine_version) >= version.parse(min_version)
145
- elif required_engine.startswith(">"):
146
- min_version = required_engine[1:].strip()
147
- return version.parse(self.current_engine_version) > version.parse(min_version)
148
- elif required_engine.startswith("=="):
149
- exact_version = required_engine[2:].strip()
150
- return version.parse(self.current_engine_version) == version.parse(exact_version)
151
- elif required_engine.startswith("<="):
152
- max_version = required_engine[2:].strip()
153
- return version.parse(self.current_engine_version) <= version.parse(max_version)
154
- elif required_engine.startswith("<"):
155
- max_version = required_engine[1:].strip()
156
- return version.parse(self.current_engine_version) < version.parse(max_version)
157
- else:
158
- # Assume exact match if no operator
159
- return version.parse(self.current_engine_version) == version.parse(required_engine)
160
-
161
- except Exception as e:
162
- raise VersionCompatibilityError(f"Invalid version requirement '{required_engine}': {e}")
163
-
164
- def get_expression_file(self, name: str, version_str: str, manifest: Dict[str, VersionInfo]) -> str:
165
- """
166
- Resolve expression filename from manifest
167
-
168
- Args:
169
- name: Expression name (e.g., "bmi")
170
- version_str: Version string (e.g., "0.1")
171
- manifest: Loaded manifest data
172
-
173
- Returns:
174
- Filename for the expression
175
-
176
- Raises:
177
- ValueError: If expression or version not found
178
- """
179
- if version_str not in manifest:
180
- available_versions = list(manifest.keys())
181
- raise ValueError(f"Version '{version_str}' not found. Available versions: {available_versions}")
182
-
183
- version_info = manifest[version_str]
184
-
185
- if name not in version_info.expressions:
186
- available_expressions = list(version_info.expressions.keys())
187
- raise ValueError(f"Expression '{name}' not found in version {version_str}. Available expressions: {available_expressions}")
188
-
189
- return version_info.expressions[name]
190
-
191
- def set_default_version(self, version_str: str):
192
- """
193
- Set the default expression version
194
-
195
- Args:
196
- version_str: Version to set as default
197
-
198
- Raises:
199
- ValueError: If version is not supported
200
- """
201
- if version_str not in self.supported_expression_versions:
202
- raise ValueError(f"Unsupported version: {version_str}. Supported: {self.supported_expression_versions}")
203
-
204
- self.default_version = version_str
205
- log_info(f"[version_manager] Default version set to {version_str}")
206
-
207
- def get_version_info(self, version_str: str, manifest: Dict[str, VersionInfo]) -> VersionInfo:
208
- """Get detailed version information"""
209
- if version_str not in manifest:
210
- raise ValueError(f"Version '{version_str}' not found in manifest")
211
-
212
- return manifest[version_str]
213
-
214
- def list_available_versions(self, manifest: Dict[str, VersionInfo]) -> List[Tuple[str, str]]:
215
- """
216
- List all available versions with their stability levels
217
-
218
- Returns:
219
- List of (version, stability) tuples
220
- """
221
- return [(v, info.stability) for v, info in manifest.items()]
222
-
223
- def get_stable_versions(self, manifest: Dict[str, VersionInfo]) -> List[str]:
224
- """Get list of stable versions only"""
225
- return [v for v, info in manifest.items() if info.stability == "stable"]
226
-
227
- def get_latest_version(self, manifest: Dict[str, VersionInfo], stability: str = None) -> Optional[str]:
228
- """
229
- Get the latest version, optionally filtered by stability
230
-
231
- Args:
232
- manifest: Loaded manifest data
233
- stability: Filter by stability level (alpha, beta, stable)
234
-
235
- Returns:
236
- Latest version string or None if no versions match
237
- """
238
- versions = manifest.keys()
239
-
240
- if stability:
241
- versions = [v for v, info in manifest.items() if info.stability == stability]
242
-
243
- if not versions:
244
- return None
245
-
246
- # Sort versions using semantic versioning
247
- try:
248
- sorted_versions = sorted(versions, key=lambda v: version.parse(v), reverse=True)
249
- return sorted_versions[0]
250
- except Exception:
251
- # Fallback to string sorting if semantic versioning fails
252
- return sorted(versions, reverse=True)[0]
253
-
254
- def check_deprecation(self, version_str: str, manifest_data: dict) -> Optional[str]:
255
- """
256
- Check if a version is deprecated
257
-
258
- Args:
259
- version_str: Version to check
260
- manifest_data: Raw manifest data (not parsed VersionInfo)
261
-
262
- Returns:
263
- Deprecation message if deprecated, None otherwise
264
- """
265
- deprecation_info = manifest_data.get("deprecation", {})
266
-
267
- if version_str in deprecation_info:
268
- if isinstance(deprecation_info[version_str], str):
269
- # Deprecation date
270
- return f"Version {version_str} deprecated on {deprecation_info[version_str]}"
271
- elif isinstance(deprecation_info[version_str], dict):
272
- # Detailed deprecation info
273
- date = deprecation_info[version_str].get("date", "unknown")
274
- message = deprecation_info[version_str].get("message", "No details provided")
275
- return f"Version {version_str} deprecated on {date}: {message}"
276
-
277
- # Check for general deprecation message
278
- if "message" in deprecation_info and version_str in deprecation_info:
279
- return deprecation_info["message"]
280
-
281
- return None
282
-
283
- def validate_version_format(self, version_str: str) -> bool:
284
- """
285
- Validate version string format
286
-
287
- Args:
288
- version_str: Version string to validate
289
-
290
- Returns:
291
- True if valid format
292
- """
293
- # Support both semantic versioning (1.0.0) and simplified (1.0)
294
- pattern = r'^(\d+)\.(\d+)(?:\.(\d+))?(?:-([a-zA-Z0-9\-\.]+))?$'
295
- return bool(re.match(pattern, version_str))
296
-
297
- def clear_cache(self):
298
- """Clear the manifest cache"""
299
- self._manifest_cache.clear()
300
- log_info("[version_manager] Manifest cache cleared")
301
-
302
- def _is_valid_version(self, version: str) -> bool:
303
- """Check if version string is valid (alias for validate_version_format)"""
304
- return self.validate_version_format(version)
305
-
306
- def get_available_versions(self) -> Dict[str, Any]:
307
- """Get all available versions from builtin path"""
308
- try:
309
- # Try to load manifest from builtin path
310
- manifest = self.load_manifest("expressions/") # Default builtin path
311
- return {
312
- "versions": list(manifest.keys()),
313
- "default": self.default_version,
314
- "current_engine": self.current_engine_version,
315
- "stability_info": {v: info.stability for v, info in manifest.items()}
316
- }
317
- except Exception as e:
318
- log_warning(f"[version_manager] Failed to get available versions: {e}")
319
- return {
320
- "versions": [self.default_version],
321
- "default": self.default_version,
322
- "current_engine": self.current_engine_version,
323
- "error": str(e)
324
- }
325
- log_info("[version_manager] Manifest cache cleared")
additory/core/executor.py DELETED
@@ -1,59 +0,0 @@
1
- # executor.py
2
- """
3
- Polars-Only Expression Executor
4
-
5
- Executes expressions using Polars engine for all input formats.
6
- Converts pandas/cuDF to Polars via Arrow bridge, processes in Polars,
7
- then converts back to original format.
8
- """
9
-
10
- from additory.common.backend import detect_backend, to_polars, from_polars
11
- from .compiler_polars import compile_polars
12
- from .logging import log_info
13
-
14
-
15
- def execute_expression(expression_name, df, ast, backend=None, output_col=None):
16
- """
17
- Unified Polars-only execution pipeline.
18
-
19
- Architecture:
20
- 1. Detect input format (pandas/polars/cuDF)
21
- 2. Convert to Polars via Arrow bridge (if needed)
22
- 3. Compile and execute expression in Polars
23
- 4. Convert back to original format via Arrow bridge
24
-
25
- Args:
26
- expression_name: Name of expression being executed
27
- df: Input dataframe (pandas, polars, or cuDF)
28
- ast: Abstract syntax tree of expression
29
- backend: Ignored (kept for API compatibility)
30
- output_col: Name of output column
31
-
32
- Returns:
33
- DataFrame in original format with new column
34
-
35
- Examples:
36
- >>> # Pandas input -> Polars processing -> Pandas output
37
- >>> result = execute_expression('bmi', pandas_df, ast, output_col='bmi')
38
-
39
- >>> # cuDF input -> Polars processing -> cuDF output
40
- >>> result = execute_expression('bmi', cudf_df, ast, output_col='bmi')
41
- """
42
- # 1. Detect input backend
43
- input_backend = detect_backend(df)
44
-
45
- log_info(f"[executor] Running '{expression_name}' on Polars engine (input: {input_backend})")
46
-
47
- # 2. Convert to Polars via Arrow bridge
48
- pl_df = to_polars(df, input_backend)
49
-
50
- # 3. Compile expression to Polars
51
- expr = compile_polars(ast)
52
-
53
- # 4. Execute in Polars
54
- pl_df = pl_df.with_columns(expr.alias(output_col))
55
-
56
- # 5. Convert back to original format via Arrow bridge
57
- result_df = from_polars(pl_df, input_backend)
58
-
59
- return result_df