additory 0.1.0a4__py3-none-any.whl → 0.1.1a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- additory/__init__.py +58 -14
- additory/common/__init__.py +31 -147
- additory/common/column_selector.py +255 -0
- additory/common/distributions.py +286 -613
- additory/common/extractors.py +313 -0
- additory/common/knn_imputation.py +332 -0
- additory/common/result.py +380 -0
- additory/common/strategy_parser.py +243 -0
- additory/common/unit_conversions.py +338 -0
- additory/common/validation.py +283 -103
- additory/core/__init__.py +34 -22
- additory/core/backend.py +258 -0
- additory/core/config.py +177 -305
- additory/core/logging.py +230 -24
- additory/core/memory_manager.py +157 -495
- additory/expressions/__init__.py +2 -23
- additory/expressions/compiler.py +457 -0
- additory/expressions/engine.py +264 -487
- additory/expressions/integrity.py +179 -0
- additory/expressions/loader.py +263 -0
- additory/expressions/parser.py +363 -167
- additory/expressions/resolver.py +274 -0
- additory/functions/__init__.py +1 -0
- additory/functions/analyze/__init__.py +144 -0
- additory/functions/analyze/cardinality.py +58 -0
- additory/functions/analyze/correlations.py +66 -0
- additory/functions/analyze/distributions.py +53 -0
- additory/functions/analyze/duplicates.py +49 -0
- additory/functions/analyze/features.py +61 -0
- additory/functions/analyze/imputation.py +66 -0
- additory/functions/analyze/outliers.py +65 -0
- additory/functions/analyze/patterns.py +65 -0
- additory/functions/analyze/presets.py +72 -0
- additory/functions/analyze/quality.py +59 -0
- additory/functions/analyze/timeseries.py +53 -0
- additory/functions/analyze/types.py +45 -0
- additory/functions/expressions/__init__.py +161 -0
- additory/functions/snapshot/__init__.py +82 -0
- additory/functions/snapshot/filter.py +119 -0
- additory/functions/synthetic/__init__.py +113 -0
- additory/functions/synthetic/mode_detector.py +47 -0
- additory/functions/synthetic/strategies/__init__.py +1 -0
- additory/functions/synthetic/strategies/advanced.py +35 -0
- additory/functions/synthetic/strategies/augmentative.py +160 -0
- additory/functions/synthetic/strategies/generative.py +168 -0
- additory/functions/synthetic/strategies/presets.py +116 -0
- additory/functions/to/__init__.py +188 -0
- additory/functions/to/lookup.py +351 -0
- additory/functions/to/merge.py +189 -0
- additory/functions/to/sort.py +91 -0
- additory/functions/to/summarize.py +170 -0
- additory/functions/transform/__init__.py +140 -0
- additory/functions/transform/datetime.py +79 -0
- additory/functions/transform/extract.py +85 -0
- additory/functions/transform/harmonize.py +105 -0
- additory/functions/transform/knn.py +62 -0
- additory/functions/transform/onehotencoding.py +68 -0
- additory/functions/transform/transpose.py +42 -0
- additory-0.1.1a1.dist-info/METADATA +83 -0
- additory-0.1.1a1.dist-info/RECORD +62 -0
- additory/analysis/__init__.py +0 -48
- additory/analysis/cardinality.py +0 -126
- additory/analysis/correlations.py +0 -124
- additory/analysis/distributions.py +0 -376
- additory/analysis/quality.py +0 -158
- additory/analysis/scan.py +0 -400
- additory/common/backend.py +0 -371
- additory/common/column_utils.py +0 -191
- additory/common/exceptions.py +0 -62
- additory/common/lists.py +0 -229
- additory/common/patterns.py +0 -240
- additory/common/resolver.py +0 -567
- additory/common/sample_data.py +0 -182
- additory/core/ast_builder.py +0 -165
- additory/core/backends/__init__.py +0 -23
- additory/core/backends/arrow_bridge.py +0 -483
- additory/core/backends/cudf_bridge.py +0 -355
- additory/core/column_positioning.py +0 -358
- additory/core/compiler_polars.py +0 -166
- additory/core/enhanced_cache_manager.py +0 -1119
- additory/core/enhanced_matchers.py +0 -473
- additory/core/enhanced_version_manager.py +0 -325
- additory/core/executor.py +0 -59
- additory/core/integrity_manager.py +0 -477
- additory/core/loader.py +0 -190
- additory/core/namespace_manager.py +0 -657
- additory/core/parser.py +0 -176
- additory/core/polars_expression_engine.py +0 -601
- additory/core/registry.py +0 -177
- additory/core/sample_data_manager.py +0 -492
- additory/core/user_namespace.py +0 -751
- additory/core/validator.py +0 -27
- additory/dynamic_api.py +0 -352
- additory/expressions/proxy.py +0 -549
- additory/expressions/registry.py +0 -313
- additory/expressions/samples.py +0 -492
- additory/synthetic/__init__.py +0 -13
- additory/synthetic/column_name_resolver.py +0 -149
- additory/synthetic/deduce.py +0 -259
- additory/synthetic/distributions.py +0 -22
- additory/synthetic/forecast.py +0 -1132
- additory/synthetic/linked_list_parser.py +0 -415
- additory/synthetic/namespace_lookup.py +0 -129
- additory/synthetic/smote.py +0 -320
- additory/synthetic/strategies.py +0 -926
- additory/synthetic/synthesizer.py +0 -713
- additory/utilities/__init__.py +0 -53
- additory/utilities/encoding.py +0 -600
- additory/utilities/games.py +0 -300
- additory/utilities/keys.py +0 -8
- additory/utilities/lookup.py +0 -103
- additory/utilities/matchers.py +0 -216
- additory/utilities/resolvers.py +0 -286
- additory/utilities/settings.py +0 -167
- additory/utilities/units.py +0 -749
- additory/utilities/validators.py +0 -153
- additory-0.1.0a4.dist-info/METADATA +0 -311
- additory-0.1.0a4.dist-info/RECORD +0 -72
- additory-0.1.0a4.dist-info/licenses/LICENSE +0 -21
- {additory-0.1.0a4.dist-info → additory-0.1.1a1.dist-info}/WHEEL +0 -0
- {additory-0.1.0a4.dist-info → additory-0.1.1a1.dist-info}/top_level.txt +0 -0
additory/expressions/proxy.py
DELETED
|
@@ -1,549 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
"""
|
|
3
|
-
Enhanced Expression Proxy for the Enhanced Expressions System
|
|
4
|
-
Integrates version management, caching, integrity validation, and Polars processing
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
import os
|
|
8
|
-
import pandas as pd
|
|
9
|
-
import polars as pl
|
|
10
|
-
from typing import Optional, Union, Dict, Any, List
|
|
11
|
-
from dataclasses import dataclass
|
|
12
|
-
|
|
13
|
-
from ..core.enhanced_version_manager import EnhancedVersionManager
|
|
14
|
-
from ..core.enhanced_cache_manager import get_cache_manager
|
|
15
|
-
from ..core.polars_expression_engine import PolarsExpressionEngine
|
|
16
|
-
from ..core.namespace_manager import NamespaceManager
|
|
17
|
-
from ..core.sample_data_manager import get_sample_data_manager, SampleDataManager
|
|
18
|
-
from ..core.parser import parse_expression
|
|
19
|
-
from ..core.logging import log_info, log_warning
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
@dataclass
|
|
23
|
-
class SampleDataRequest:
|
|
24
|
-
"""Marker class for sample data requests"""
|
|
25
|
-
sample_type: str # "clean" or "unclean"
|
|
26
|
-
|
|
27
|
-
def __repr__(self):
|
|
28
|
-
return f"SampleDataRequest(type='{self.sample_type}')"
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
# Global sample data request instances
|
|
32
|
-
sample_data = SampleDataRequest("clean")
|
|
33
|
-
sample_data_unclean = SampleDataRequest("unclean")
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
class ExpressionNotFoundError(Exception):
|
|
37
|
-
"""Raised when an expression cannot be found"""
|
|
38
|
-
pass
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
class ExpressionExecutionError(Exception):
|
|
42
|
-
"""Raised when expression execution fails"""
|
|
43
|
-
pass
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
class EnhancedExpressionCallable:
|
|
47
|
-
"""
|
|
48
|
-
Callable object representing a specific expression
|
|
49
|
-
Handles execution, sample data, and version resolution
|
|
50
|
-
"""
|
|
51
|
-
|
|
52
|
-
def __init__(self, expression_name: str, namespace: str, proxy: 'EnhancedExpressionProxy'):
|
|
53
|
-
self.expression_name = expression_name
|
|
54
|
-
self.namespace = namespace
|
|
55
|
-
self.proxy = proxy
|
|
56
|
-
|
|
57
|
-
def __call__(self, df_or_request, version: Optional[str] = None,
|
|
58
|
-
backend: Optional[str] = None, insert_at: Optional[str] = None, **kwargs):
|
|
59
|
-
"""
|
|
60
|
-
Execute expression or return sample data
|
|
61
|
-
|
|
62
|
-
Args:
|
|
63
|
-
df_or_request: DataFrame to process or SampleDataRequest
|
|
64
|
-
version: Specific version to use (optional)
|
|
65
|
-
backend: Backend hint (ignored - always uses Polars)
|
|
66
|
-
insert_at: Output column name (defaults to expression name)
|
|
67
|
-
**kwargs: Additional execution parameters
|
|
68
|
-
|
|
69
|
-
Returns:
|
|
70
|
-
DataFrame with expression results or sample data
|
|
71
|
-
"""
|
|
72
|
-
try:
|
|
73
|
-
# Handle sample data requests
|
|
74
|
-
if isinstance(df_or_request, SampleDataRequest):
|
|
75
|
-
return self.proxy._handle_sample_request(
|
|
76
|
-
self.expression_name, df_or_request.sample_type, version
|
|
77
|
-
)
|
|
78
|
-
|
|
79
|
-
# Handle normal expression execution
|
|
80
|
-
return self.proxy._execute_expression(
|
|
81
|
-
self.expression_name, df_or_request, version, insert_at, **kwargs
|
|
82
|
-
)
|
|
83
|
-
|
|
84
|
-
except Exception as e:
|
|
85
|
-
log_warning(f"[expression_proxy] Failed to execute {self.expression_name}: {e}")
|
|
86
|
-
raise ExpressionExecutionError(f"Failed to execute {self.expression_name}: {e}")
|
|
87
|
-
|
|
88
|
-
def info(self, version: Optional[str] = None) -> Dict[str, Any]:
|
|
89
|
-
"""Get detailed information about this expression"""
|
|
90
|
-
return self.proxy.get_expression_info(self.expression_name, version)
|
|
91
|
-
|
|
92
|
-
def sample(self, clean: bool = True) -> pd.DataFrame:
|
|
93
|
-
"""Get sample data for this expression"""
|
|
94
|
-
sample_type = "clean" if clean else "unclean"
|
|
95
|
-
return self.proxy._handle_sample_request(self.expression_name, sample_type)
|
|
96
|
-
|
|
97
|
-
def refresh(self, version: Optional[str] = None) -> bool:
|
|
98
|
-
"""Force refresh this expression from cache"""
|
|
99
|
-
resolved_version = version or self.proxy.version_manager.default_version
|
|
100
|
-
return self.proxy.cache_manager.force_refresh_expression(
|
|
101
|
-
self.expression_name, resolved_version, self.namespace
|
|
102
|
-
)
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
class EnhancedExpressionProxy:
|
|
106
|
-
"""
|
|
107
|
-
Enhanced expression proxy with full pipeline integration
|
|
108
|
-
Supports dual namespaces, caching, version management, and Polars processing
|
|
109
|
-
"""
|
|
110
|
-
|
|
111
|
-
def __init__(self, namespace: str = "builtin"):
|
|
112
|
-
"""
|
|
113
|
-
Initialize enhanced expression proxy
|
|
114
|
-
|
|
115
|
-
Args:
|
|
116
|
-
namespace: Namespace to operate in ("builtin" or "user")
|
|
117
|
-
"""
|
|
118
|
-
self.namespace = namespace
|
|
119
|
-
|
|
120
|
-
# Initialize core components
|
|
121
|
-
self.version_manager = EnhancedVersionManager()
|
|
122
|
-
self.cache_manager = get_cache_manager()
|
|
123
|
-
self.polars_engine = PolarsExpressionEngine()
|
|
124
|
-
self.namespace_manager = NamespaceManager()
|
|
125
|
-
self.sample_data_manager = get_sample_data_manager()
|
|
126
|
-
|
|
127
|
-
# Configuration
|
|
128
|
-
self.default_version = None # Use version manager's default
|
|
129
|
-
self.auto_cache = True
|
|
130
|
-
self.validate_integrity = True
|
|
131
|
-
|
|
132
|
-
log_info(f"[expression_proxy] Initialized for namespace: {namespace}")
|
|
133
|
-
|
|
134
|
-
def __getattr__(self, expression_name: str) -> EnhancedExpressionCallable:
|
|
135
|
-
"""
|
|
136
|
-
Get expression callable for dynamic access
|
|
137
|
-
|
|
138
|
-
Args:
|
|
139
|
-
expression_name: Name of the expression
|
|
140
|
-
|
|
141
|
-
Returns:
|
|
142
|
-
EnhancedExpressionCallable for the expression
|
|
143
|
-
"""
|
|
144
|
-
return EnhancedExpressionCallable(expression_name, self.namespace, self)
|
|
145
|
-
|
|
146
|
-
def _resolve_expression(self, expression_name: str, version: Optional[str] = None) -> Dict[str, Any]:
|
|
147
|
-
"""
|
|
148
|
-
Resolve expression from cache or source
|
|
149
|
-
|
|
150
|
-
Args:
|
|
151
|
-
expression_name: Name of the expression
|
|
152
|
-
version: Specific version (optional)
|
|
153
|
-
|
|
154
|
-
Returns:
|
|
155
|
-
Dictionary with expression data and metadata
|
|
156
|
-
|
|
157
|
-
Raises:
|
|
158
|
-
ExpressionNotFoundError: If expression cannot be found
|
|
159
|
-
"""
|
|
160
|
-
try:
|
|
161
|
-
# Determine version to use
|
|
162
|
-
resolved_version = version or self.default_version or self.version_manager.default_version
|
|
163
|
-
|
|
164
|
-
# Try to get from cache first
|
|
165
|
-
cached_path = self.cache_manager.get_cached_expression(
|
|
166
|
-
expression_name, resolved_version, self.namespace
|
|
167
|
-
)
|
|
168
|
-
|
|
169
|
-
if cached_path:
|
|
170
|
-
log_info(f"[expression_proxy] Using cached expression: {expression_name} v{resolved_version}")
|
|
171
|
-
return self._load_expression_from_file(cached_path, expression_name, resolved_version)
|
|
172
|
-
|
|
173
|
-
# Not in cache - try to load and cache from source
|
|
174
|
-
if self.auto_cache:
|
|
175
|
-
source_path = self._find_source_expression(expression_name, resolved_version)
|
|
176
|
-
if source_path:
|
|
177
|
-
# Cache the expression
|
|
178
|
-
success = self.cache_manager.cache_expression(
|
|
179
|
-
source_path, self.namespace, expression_name, resolved_version
|
|
180
|
-
)
|
|
181
|
-
|
|
182
|
-
if success:
|
|
183
|
-
cached_path = self.cache_manager.get_cached_expression(
|
|
184
|
-
expression_name, resolved_version, self.namespace
|
|
185
|
-
)
|
|
186
|
-
if cached_path:
|
|
187
|
-
return self._load_expression_from_file(cached_path, expression_name, resolved_version)
|
|
188
|
-
|
|
189
|
-
# Fallback to direct loading
|
|
190
|
-
return self._load_expression_from_file(source_path, expression_name, resolved_version)
|
|
191
|
-
|
|
192
|
-
raise ExpressionNotFoundError(
|
|
193
|
-
f"Expression '{expression_name}' version '{resolved_version}' not found in namespace '{self.namespace}'"
|
|
194
|
-
)
|
|
195
|
-
|
|
196
|
-
except Exception as e:
|
|
197
|
-
if isinstance(e, ExpressionNotFoundError):
|
|
198
|
-
raise
|
|
199
|
-
log_warning(f"[expression_proxy] Failed to resolve expression {expression_name}: {e}")
|
|
200
|
-
raise ExpressionNotFoundError(f"Failed to resolve expression {expression_name}: {e}")
|
|
201
|
-
|
|
202
|
-
def _find_source_expression(self, expression_name: str, version: str) -> Optional[str]:
|
|
203
|
-
"""Find source expression file"""
|
|
204
|
-
try:
|
|
205
|
-
# Get namespace path
|
|
206
|
-
if self.namespace == "builtin":
|
|
207
|
-
base_path = self.namespace_manager.builtin_path
|
|
208
|
-
else:
|
|
209
|
-
base_path = self.namespace_manager.user_path
|
|
210
|
-
|
|
211
|
-
# Try to load manifest to get exact filename
|
|
212
|
-
try:
|
|
213
|
-
manifest = self.version_manager.load_manifest(base_path)
|
|
214
|
-
if version in manifest and expression_name in manifest[version].expressions:
|
|
215
|
-
filename = manifest[version].expressions[expression_name]
|
|
216
|
-
source_path = os.path.join(base_path, filename)
|
|
217
|
-
if os.path.exists(source_path):
|
|
218
|
-
return source_path
|
|
219
|
-
except Exception:
|
|
220
|
-
pass
|
|
221
|
-
|
|
222
|
-
# Fallback: try standard filename pattern
|
|
223
|
-
filename = f"{expression_name}_{version}.add"
|
|
224
|
-
source_path = os.path.join(base_path, filename)
|
|
225
|
-
if os.path.exists(source_path):
|
|
226
|
-
return source_path
|
|
227
|
-
|
|
228
|
-
return None
|
|
229
|
-
|
|
230
|
-
except Exception as e:
|
|
231
|
-
log_warning(f"[expression_proxy] Failed to find source for {expression_name}: {e}")
|
|
232
|
-
return None
|
|
233
|
-
|
|
234
|
-
def _load_expression_from_file(self, file_path: str, expression_name: str, version: str) -> Dict[str, Any]:
|
|
235
|
-
"""Load and parse expression from file"""
|
|
236
|
-
try:
|
|
237
|
-
# Read file content
|
|
238
|
-
with open(file_path, 'r', encoding='utf-8') as f:
|
|
239
|
-
file_content = f.read()
|
|
240
|
-
|
|
241
|
-
# Parse the expression file
|
|
242
|
-
parsed_data = parse_expression(file_content)
|
|
243
|
-
|
|
244
|
-
return {
|
|
245
|
-
"name": expression_name,
|
|
246
|
-
"version": version,
|
|
247
|
-
"file_path": file_path,
|
|
248
|
-
"parsed_data": parsed_data,
|
|
249
|
-
"ast": parsed_data.ast,
|
|
250
|
-
"expression": parsed_data.expression,
|
|
251
|
-
"sample_clean": parsed_data.sample_clean,
|
|
252
|
-
"sample_unclean": parsed_data.sample_unclean,
|
|
253
|
-
"metadata": parsed_data.metadata
|
|
254
|
-
}
|
|
255
|
-
|
|
256
|
-
except Exception as e:
|
|
257
|
-
log_warning(f"[expression_proxy] Failed to load expression from {file_path}: {e}")
|
|
258
|
-
raise ExpressionNotFoundError(f"Failed to load expression from {file_path}: {e}")
|
|
259
|
-
|
|
260
|
-
def _execute_expression(self, expression_name: str, df: Union[pd.DataFrame, pl.DataFrame],
|
|
261
|
-
version: Optional[str] = None, output_column: Optional[str] = None,
|
|
262
|
-
**kwargs) -> Union[pd.DataFrame, pl.DataFrame]:
|
|
263
|
-
"""
|
|
264
|
-
Execute expression on dataframe
|
|
265
|
-
|
|
266
|
-
Args:
|
|
267
|
-
expression_name: Name of the expression
|
|
268
|
-
df: Input dataframe
|
|
269
|
-
version: Specific version (optional)
|
|
270
|
-
output_column: Output column name (defaults to expression name)
|
|
271
|
-
**kwargs: Additional execution parameters
|
|
272
|
-
|
|
273
|
-
Returns:
|
|
274
|
-
DataFrame with expression results
|
|
275
|
-
"""
|
|
276
|
-
try:
|
|
277
|
-
# Resolve expression
|
|
278
|
-
expression_data = self._resolve_expression(expression_name, version)
|
|
279
|
-
|
|
280
|
-
# Determine output column
|
|
281
|
-
output_col = output_column or expression_name
|
|
282
|
-
|
|
283
|
-
# Detect input backend
|
|
284
|
-
if isinstance(df, pd.DataFrame):
|
|
285
|
-
backend_type = "pandas"
|
|
286
|
-
elif isinstance(df, pl.DataFrame):
|
|
287
|
-
backend_type = "polars"
|
|
288
|
-
else:
|
|
289
|
-
# Try to detect other types
|
|
290
|
-
if self.polars_engine.arrow_bridge:
|
|
291
|
-
backend_type = self.polars_engine.arrow_bridge.detect_backend(df)
|
|
292
|
-
else:
|
|
293
|
-
backend_type = "pandas" # fallback
|
|
294
|
-
|
|
295
|
-
# Execute using Polars engine
|
|
296
|
-
result = self.polars_engine.execute_expression(
|
|
297
|
-
df, expression_data["expression"], output_col, backend_type
|
|
298
|
-
)
|
|
299
|
-
|
|
300
|
-
# Extract DataFrame from ExpressionResult if needed
|
|
301
|
-
if hasattr(result, 'dataframe'):
|
|
302
|
-
result = result.dataframe
|
|
303
|
-
|
|
304
|
-
log_info(f"[expression_proxy] Executed {expression_name} v{expression_data['version']}")
|
|
305
|
-
return result
|
|
306
|
-
|
|
307
|
-
except Exception as e:
|
|
308
|
-
log_warning(f"[expression_proxy] Expression execution failed: {e}")
|
|
309
|
-
raise ExpressionExecutionError(f"Expression execution failed: {e}")
|
|
310
|
-
|
|
311
|
-
def _handle_sample_request(self, expression_name: str, sample_type: str,
|
|
312
|
-
version: Optional[str] = None) -> pd.DataFrame:
|
|
313
|
-
"""
|
|
314
|
-
Handle sample data requests using enhanced sample data manager
|
|
315
|
-
|
|
316
|
-
Args:
|
|
317
|
-
expression_name: Name of the expression
|
|
318
|
-
sample_type: "clean" or "unclean"
|
|
319
|
-
version: Specific version (optional)
|
|
320
|
-
|
|
321
|
-
Returns:
|
|
322
|
-
DataFrame with sample data
|
|
323
|
-
"""
|
|
324
|
-
try:
|
|
325
|
-
# Use enhanced sample data manager
|
|
326
|
-
if sample_type == "clean":
|
|
327
|
-
return self.sample_data_manager.get_clean_sample(
|
|
328
|
-
expression_name, self.namespace, version
|
|
329
|
-
)
|
|
330
|
-
else:
|
|
331
|
-
return self.sample_data_manager.get_unclean_sample(
|
|
332
|
-
expression_name, self.namespace, version
|
|
333
|
-
)
|
|
334
|
-
|
|
335
|
-
except Exception as e:
|
|
336
|
-
log_warning(f"[expression_proxy] Failed to get sample data for {expression_name}: {e}")
|
|
337
|
-
return pd.DataFrame({
|
|
338
|
-
"error": [f"Failed to get {sample_type} sample data: {e}"]
|
|
339
|
-
})
|
|
340
|
-
|
|
341
|
-
def get_expression_info(self, expression_name: str, version: Optional[str] = None) -> Dict[str, Any]:
|
|
342
|
-
"""
|
|
343
|
-
Get detailed information about an expression
|
|
344
|
-
|
|
345
|
-
Args:
|
|
346
|
-
expression_name: Name of the expression
|
|
347
|
-
version: Specific version (optional)
|
|
348
|
-
|
|
349
|
-
Returns:
|
|
350
|
-
Dictionary with expression information
|
|
351
|
-
"""
|
|
352
|
-
try:
|
|
353
|
-
resolved_version = version or self.default_version or self.version_manager.default_version
|
|
354
|
-
|
|
355
|
-
# Get cache info
|
|
356
|
-
cache_info = self.cache_manager.get_expression_cache_info(
|
|
357
|
-
expression_name, resolved_version, self.namespace
|
|
358
|
-
)
|
|
359
|
-
|
|
360
|
-
# Get expression data
|
|
361
|
-
try:
|
|
362
|
-
expression_data = self._resolve_expression(expression_name, version)
|
|
363
|
-
metadata = expression_data.get("metadata", {})
|
|
364
|
-
except Exception:
|
|
365
|
-
metadata = {}
|
|
366
|
-
|
|
367
|
-
return {
|
|
368
|
-
"name": expression_name,
|
|
369
|
-
"version": resolved_version,
|
|
370
|
-
"namespace": self.namespace,
|
|
371
|
-
"metadata": metadata,
|
|
372
|
-
"cache_info": cache_info,
|
|
373
|
-
"has_clean_sample": bool(expression_data.get("sample_clean")) if 'expression_data' in locals() else False,
|
|
374
|
-
"has_unclean_sample": bool(expression_data.get("sample_unclean")) if 'expression_data' in locals() else False
|
|
375
|
-
}
|
|
376
|
-
|
|
377
|
-
except Exception as e:
|
|
378
|
-
log_warning(f"[expression_proxy] Failed to get info for {expression_name}: {e}")
|
|
379
|
-
return {
|
|
380
|
-
"name": expression_name,
|
|
381
|
-
"version": resolved_version,
|
|
382
|
-
"namespace": self.namespace,
|
|
383
|
-
"error": str(e)
|
|
384
|
-
}
|
|
385
|
-
|
|
386
|
-
def list_expressions(self, version: Optional[str] = None) -> Dict[str, Any]:
|
|
387
|
-
"""
|
|
388
|
-
List all available expressions in this namespace
|
|
389
|
-
|
|
390
|
-
Args:
|
|
391
|
-
version: Specific version (optional)
|
|
392
|
-
|
|
393
|
-
Returns:
|
|
394
|
-
Dictionary with expression list and metadata
|
|
395
|
-
"""
|
|
396
|
-
try:
|
|
397
|
-
resolved_version = version or self.default_version or self.version_manager.default_version
|
|
398
|
-
|
|
399
|
-
# Get namespace path
|
|
400
|
-
if self.namespace == "builtin":
|
|
401
|
-
base_path = self.namespace_manager.builtin_path
|
|
402
|
-
else:
|
|
403
|
-
base_path = self.namespace_manager.user_path
|
|
404
|
-
|
|
405
|
-
# Try to load manifest
|
|
406
|
-
expressions = {}
|
|
407
|
-
try:
|
|
408
|
-
manifest = self.version_manager.load_manifest(base_path)
|
|
409
|
-
if resolved_version in manifest:
|
|
410
|
-
expressions = manifest[resolved_version].expressions
|
|
411
|
-
except Exception:
|
|
412
|
-
# Fallback: scan directory for .add files
|
|
413
|
-
if os.path.exists(base_path):
|
|
414
|
-
for filename in os.listdir(base_path):
|
|
415
|
-
if filename.endswith(f"_{resolved_version}.add"):
|
|
416
|
-
expr_name = filename.replace(f"_{resolved_version}.add", "")
|
|
417
|
-
expressions[expr_name] = filename
|
|
418
|
-
|
|
419
|
-
return {
|
|
420
|
-
"namespace": self.namespace,
|
|
421
|
-
"version": resolved_version,
|
|
422
|
-
"expressions": expressions,
|
|
423
|
-
"count": len(expressions)
|
|
424
|
-
}
|
|
425
|
-
|
|
426
|
-
except Exception as e:
|
|
427
|
-
log_warning(f"[expression_proxy] Failed to list expressions: {e}")
|
|
428
|
-
return {
|
|
429
|
-
"namespace": self.namespace,
|
|
430
|
-
"version": resolved_version,
|
|
431
|
-
"expressions": {},
|
|
432
|
-
"count": 0,
|
|
433
|
-
"error": str(e)
|
|
434
|
-
}
|
|
435
|
-
|
|
436
|
-
def set_default_version(self, version: str):
|
|
437
|
-
"""Set default version for this proxy"""
|
|
438
|
-
self.default_version = version
|
|
439
|
-
log_info(f"[expression_proxy] Set default version to {version} for {self.namespace}")
|
|
440
|
-
|
|
441
|
-
def refresh_cache(self) -> Dict[str, int]:
|
|
442
|
-
"""Refresh cache for this namespace"""
|
|
443
|
-
return self.cache_manager.refresh_cache(self.namespace)
|
|
444
|
-
|
|
445
|
-
def get_cache_status(self) -> Dict[str, Any]:
|
|
446
|
-
"""Get cache status for this namespace"""
|
|
447
|
-
full_status = self.cache_manager.get_cache_status()
|
|
448
|
-
return {
|
|
449
|
-
"namespace": self.namespace,
|
|
450
|
-
"health": full_status.get("health", "unknown"),
|
|
451
|
-
"namespace_health": full_status.get("namespace_health", {}).get(self.namespace, {}),
|
|
452
|
-
"global_stats": full_status.get("global_stats", {})
|
|
453
|
-
}
|
|
454
|
-
|
|
455
|
-
def get_sample_info(self, expression_name: str, version: Optional[str] = None) -> Dict[str, Any]:
|
|
456
|
-
"""
|
|
457
|
-
Get comprehensive sample data information
|
|
458
|
-
|
|
459
|
-
Args:
|
|
460
|
-
expression_name: Name of the expression
|
|
461
|
-
version: Specific version (optional)
|
|
462
|
-
|
|
463
|
-
Returns:
|
|
464
|
-
Dictionary with sample data information
|
|
465
|
-
"""
|
|
466
|
-
try:
|
|
467
|
-
sample_info = self.sample_data_manager.get_sample_info(
|
|
468
|
-
expression_name, self.namespace, version
|
|
469
|
-
)
|
|
470
|
-
|
|
471
|
-
return {
|
|
472
|
-
"expression_name": sample_info.expression_name,
|
|
473
|
-
"version": sample_info.version,
|
|
474
|
-
"has_clean_sample": sample_info.has_clean,
|
|
475
|
-
"has_unclean_sample": sample_info.has_unclean,
|
|
476
|
-
"clean_rows": sample_info.clean_rows,
|
|
477
|
-
"unclean_rows": sample_info.unclean_rows,
|
|
478
|
-
"educational_comments": sample_info.educational_comments,
|
|
479
|
-
"validation_errors": sample_info.validation_errors,
|
|
480
|
-
"namespace": self.namespace
|
|
481
|
-
}
|
|
482
|
-
|
|
483
|
-
except Exception as e:
|
|
484
|
-
log_warning(f"[expression_proxy] Failed to get sample info for {expression_name}: {e}")
|
|
485
|
-
return {"error": str(e)}
|
|
486
|
-
|
|
487
|
-
def validate_sample_data(self, expression_name: str, sample_data: Dict[str, Any],
|
|
488
|
-
sample_type: str = "clean", version: Optional[str] = None) -> Dict[str, Any]:
|
|
489
|
-
"""
|
|
490
|
-
Validate sample data format and content
|
|
491
|
-
|
|
492
|
-
Args:
|
|
493
|
-
expression_name: Name of the expression
|
|
494
|
-
sample_data: Sample data to validate
|
|
495
|
-
sample_type: "clean" or "unclean"
|
|
496
|
-
version: Specific version (optional)
|
|
497
|
-
|
|
498
|
-
Returns:
|
|
499
|
-
Dictionary with validation results
|
|
500
|
-
"""
|
|
501
|
-
try:
|
|
502
|
-
is_valid, errors = self.sample_data_manager.validate_sample_data(
|
|
503
|
-
sample_data, expression_name, sample_type
|
|
504
|
-
)
|
|
505
|
-
|
|
506
|
-
return {
|
|
507
|
-
"is_valid": is_valid,
|
|
508
|
-
"errors": errors,
|
|
509
|
-
"sample_type": sample_type,
|
|
510
|
-
"expression_name": expression_name,
|
|
511
|
-
"namespace": self.namespace
|
|
512
|
-
}
|
|
513
|
-
|
|
514
|
-
except Exception as e:
|
|
515
|
-
log_warning(f"[expression_proxy] Failed to validate sample data for {expression_name}: {e}")
|
|
516
|
-
return {"error": str(e), "is_valid": False}
|
|
517
|
-
|
|
518
|
-
def create_sample_template(self, expression_name: str, columns: List[str]) -> Dict[str, Any]:
|
|
519
|
-
"""
|
|
520
|
-
Create sample data template for an expression
|
|
521
|
-
|
|
522
|
-
Args:
|
|
523
|
-
expression_name: Name of the expression
|
|
524
|
-
columns: List of required columns
|
|
525
|
-
|
|
526
|
-
Returns:
|
|
527
|
-
Dictionary with sample data templates
|
|
528
|
-
"""
|
|
529
|
-
try:
|
|
530
|
-
template = self.sample_data_manager.create_sample_template(expression_name, columns)
|
|
531
|
-
|
|
532
|
-
return {
|
|
533
|
-
"expression_name": expression_name,
|
|
534
|
-
"namespace": self.namespace,
|
|
535
|
-
"template": template,
|
|
536
|
-
"usage": {
|
|
537
|
-
"clean": "Use for normal testing and validation",
|
|
538
|
-
"unclean": "Use for error handling and edge case testing"
|
|
539
|
-
}
|
|
540
|
-
}
|
|
541
|
-
|
|
542
|
-
except Exception as e:
|
|
543
|
-
log_warning(f"[expression_proxy] Failed to create sample template for {expression_name}: {e}")
|
|
544
|
-
return {"error": str(e)}
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
# Backward compatibility aliases
|
|
548
|
-
ExpressionProxy = EnhancedExpressionProxy
|
|
549
|
-
ExpressionCallable = EnhancedExpressionCallable
|