additory 0.1.0a3__py3-none-any.whl → 0.1.1a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- additory/__init__.py +58 -14
- additory/common/__init__.py +31 -147
- additory/common/column_selector.py +255 -0
- additory/common/distributions.py +286 -613
- additory/common/extractors.py +313 -0
- additory/common/knn_imputation.py +332 -0
- additory/common/result.py +380 -0
- additory/common/strategy_parser.py +243 -0
- additory/common/unit_conversions.py +338 -0
- additory/common/validation.py +283 -103
- additory/core/__init__.py +34 -22
- additory/core/backend.py +258 -0
- additory/core/config.py +177 -305
- additory/core/logging.py +230 -24
- additory/core/memory_manager.py +157 -495
- additory/expressions/__init__.py +2 -23
- additory/expressions/compiler.py +457 -0
- additory/expressions/engine.py +264 -487
- additory/expressions/integrity.py +179 -0
- additory/expressions/loader.py +263 -0
- additory/expressions/parser.py +363 -167
- additory/expressions/resolver.py +274 -0
- additory/functions/__init__.py +1 -0
- additory/functions/analyze/__init__.py +144 -0
- additory/functions/analyze/cardinality.py +58 -0
- additory/functions/analyze/correlations.py +66 -0
- additory/functions/analyze/distributions.py +53 -0
- additory/functions/analyze/duplicates.py +49 -0
- additory/functions/analyze/features.py +61 -0
- additory/functions/analyze/imputation.py +66 -0
- additory/functions/analyze/outliers.py +65 -0
- additory/functions/analyze/patterns.py +65 -0
- additory/functions/analyze/presets.py +72 -0
- additory/functions/analyze/quality.py +59 -0
- additory/functions/analyze/timeseries.py +53 -0
- additory/functions/analyze/types.py +45 -0
- additory/functions/expressions/__init__.py +161 -0
- additory/functions/snapshot/__init__.py +82 -0
- additory/functions/snapshot/filter.py +119 -0
- additory/functions/synthetic/__init__.py +113 -0
- additory/functions/synthetic/mode_detector.py +47 -0
- additory/functions/synthetic/strategies/__init__.py +1 -0
- additory/functions/synthetic/strategies/advanced.py +35 -0
- additory/functions/synthetic/strategies/augmentative.py +160 -0
- additory/functions/synthetic/strategies/generative.py +168 -0
- additory/functions/synthetic/strategies/presets.py +116 -0
- additory/functions/to/__init__.py +188 -0
- additory/functions/to/lookup.py +351 -0
- additory/functions/to/merge.py +189 -0
- additory/functions/to/sort.py +91 -0
- additory/functions/to/summarize.py +170 -0
- additory/functions/transform/__init__.py +140 -0
- additory/functions/transform/datetime.py +79 -0
- additory/functions/transform/extract.py +85 -0
- additory/functions/transform/harmonize.py +105 -0
- additory/functions/transform/knn.py +62 -0
- additory/functions/transform/onehotencoding.py +68 -0
- additory/functions/transform/transpose.py +42 -0
- additory-0.1.1a1.dist-info/METADATA +83 -0
- additory-0.1.1a1.dist-info/RECORD +62 -0
- additory/analysis/__init__.py +0 -48
- additory/analysis/cardinality.py +0 -126
- additory/analysis/correlations.py +0 -124
- additory/analysis/distributions.py +0 -376
- additory/analysis/quality.py +0 -158
- additory/analysis/scan.py +0 -400
- additory/common/backend.py +0 -371
- additory/common/column_utils.py +0 -191
- additory/common/exceptions.py +0 -62
- additory/common/lists.py +0 -229
- additory/common/patterns.py +0 -240
- additory/common/resolver.py +0 -567
- additory/common/sample_data.py +0 -182
- additory/core/ast_builder.py +0 -165
- additory/core/backends/__init__.py +0 -23
- additory/core/backends/arrow_bridge.py +0 -483
- additory/core/backends/cudf_bridge.py +0 -355
- additory/core/column_positioning.py +0 -358
- additory/core/compiler_polars.py +0 -166
- additory/core/enhanced_cache_manager.py +0 -1119
- additory/core/enhanced_matchers.py +0 -473
- additory/core/enhanced_version_manager.py +0 -325
- additory/core/executor.py +0 -59
- additory/core/integrity_manager.py +0 -477
- additory/core/loader.py +0 -190
- additory/core/namespace_manager.py +0 -657
- additory/core/parser.py +0 -176
- additory/core/polars_expression_engine.py +0 -601
- additory/core/registry.py +0 -176
- additory/core/sample_data_manager.py +0 -492
- additory/core/user_namespace.py +0 -751
- additory/core/validator.py +0 -27
- additory/dynamic_api.py +0 -304
- additory/expressions/proxy.py +0 -549
- additory/expressions/registry.py +0 -313
- additory/expressions/samples.py +0 -492
- additory/synthetic/__init__.py +0 -13
- additory/synthetic/column_name_resolver.py +0 -149
- additory/synthetic/distributions.py +0 -22
- additory/synthetic/forecast.py +0 -1132
- additory/synthetic/linked_list_parser.py +0 -415
- additory/synthetic/namespace_lookup.py +0 -129
- additory/synthetic/smote.py +0 -320
- additory/synthetic/strategies.py +0 -850
- additory/synthetic/synthesizer.py +0 -713
- additory/utilities/__init__.py +0 -53
- additory/utilities/encoding.py +0 -600
- additory/utilities/games.py +0 -300
- additory/utilities/keys.py +0 -8
- additory/utilities/lookup.py +0 -103
- additory/utilities/matchers.py +0 -216
- additory/utilities/resolvers.py +0 -286
- additory/utilities/settings.py +0 -167
- additory/utilities/units.py +0 -749
- additory/utilities/validators.py +0 -153
- additory-0.1.0a3.dist-info/METADATA +0 -288
- additory-0.1.0a3.dist-info/RECORD +0 -71
- additory-0.1.0a3.dist-info/licenses/LICENSE +0 -21
- {additory-0.1.0a3.dist-info → additory-0.1.1a1.dist-info}/WHEEL +0 -0
- {additory-0.1.0a3.dist-info → additory-0.1.1a1.dist-info}/top_level.txt +0 -0
additory/core/user_namespace.py
DELETED
|
@@ -1,751 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
User Namespace Manager for Additory
|
|
3
|
-
|
|
4
|
-
This module provides functionality for managing user-defined patterns and configurations
|
|
5
|
-
in the ~/.additory/ directory. It allows users to create custom pattern files that can
|
|
6
|
-
be used alongside built-in patterns.
|
|
7
|
-
|
|
8
|
-
Author: Additory Team
|
|
9
|
-
Date: 2026-01-24
|
|
10
|
-
"""
|
|
11
|
-
|
|
12
|
-
import os
|
|
13
|
-
import json
|
|
14
|
-
from pathlib import Path
|
|
15
|
-
from typing import Dict, List, Optional, Any, Tuple
|
|
16
|
-
from datetime import datetime
|
|
17
|
-
import logging
|
|
18
|
-
|
|
19
|
-
try:
|
|
20
|
-
import tomli as toml
|
|
21
|
-
import tomli_w as toml_w
|
|
22
|
-
except ImportError:
|
|
23
|
-
import toml
|
|
24
|
-
# For writing, we'll use toml.dump which works for both
|
|
25
|
-
toml_w = toml
|
|
26
|
-
|
|
27
|
-
from additory.common.lists import load_list_file, parse_list_file
|
|
28
|
-
from additory.common.patterns import load_properties_file, parse_properties_file
|
|
29
|
-
|
|
30
|
-
logger = logging.getLogger(__name__)
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
class UserNamespaceError(Exception):
|
|
34
|
-
"""Base exception for user namespace errors."""
|
|
35
|
-
pass
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
class ConfigError(UserNamespaceError):
|
|
39
|
-
"""Exception raised for configuration errors."""
|
|
40
|
-
pass
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
class InitializationError(UserNamespaceError):
|
|
44
|
-
"""Exception raised for initialization errors."""
|
|
45
|
-
pass
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
class UserNamespaceManager:
|
|
49
|
-
"""
|
|
50
|
-
Manages user namespace for custom patterns and configurations.
|
|
51
|
-
|
|
52
|
-
The user namespace allows users to define custom patterns in ~/.additory/
|
|
53
|
-
that can be used alongside built-in patterns.
|
|
54
|
-
|
|
55
|
-
Directory structure:
|
|
56
|
-
~/.additory/
|
|
57
|
-
├── config.toml
|
|
58
|
-
├── patterns/
|
|
59
|
-
│ ├── *.list
|
|
60
|
-
│ └── *.properties
|
|
61
|
-
├── schemas/
|
|
62
|
-
│ └── *.toml
|
|
63
|
-
└── cache/
|
|
64
|
-
└── resolution_cache.json
|
|
65
|
-
|
|
66
|
-
Example:
|
|
67
|
-
>>> manager = UserNamespaceManager()
|
|
68
|
-
>>> if not manager.is_initialized():
|
|
69
|
-
... manager.initialize()
|
|
70
|
-
>>> config = manager.load_config()
|
|
71
|
-
>>> user_lists = manager.load_user_lists()
|
|
72
|
-
"""
|
|
73
|
-
|
|
74
|
-
DEFAULT_BASE_DIR = Path.home() / ".additory"
|
|
75
|
-
CONFIG_FILENAME = "config.toml"
|
|
76
|
-
|
|
77
|
-
DEFAULT_CONFIG = {
|
|
78
|
-
"additory": {
|
|
79
|
-
"version": "1.0",
|
|
80
|
-
"created": None, # Will be set during initialization
|
|
81
|
-
},
|
|
82
|
-
"patterns": {
|
|
83
|
-
"user_patterns_dir": "~/.additory/patterns",
|
|
84
|
-
"user_schemas_dir": "~/.additory/schemas",
|
|
85
|
-
"user_expressions_dir": "~/.additory/expressions", # NEW
|
|
86
|
-
"auto_discover": True,
|
|
87
|
-
"watch_for_changes": False,
|
|
88
|
-
},
|
|
89
|
-
"cache": {
|
|
90
|
-
"enable_cache": True,
|
|
91
|
-
"cache_dir": "~/.additory/cache",
|
|
92
|
-
"cache_ttl": 3600, # 1 hour
|
|
93
|
-
"max_cache_size": 100, # MB
|
|
94
|
-
},
|
|
95
|
-
"resolution": {
|
|
96
|
-
"default_prefer_mode": "default",
|
|
97
|
-
"enable_user_namespace": True,
|
|
98
|
-
"user_namespace_priority": "before_imports",
|
|
99
|
-
"user_expression_priority": "user_first", # NEW
|
|
100
|
-
},
|
|
101
|
-
"validation": {
|
|
102
|
-
"strict_mode": False,
|
|
103
|
-
"warn_on_unused_patterns": True,
|
|
104
|
-
"warn_on_duplicate_patterns": True,
|
|
105
|
-
"validate_on_load": False,
|
|
106
|
-
},
|
|
107
|
-
"logging": {
|
|
108
|
-
"log_level": "INFO",
|
|
109
|
-
"log_resolution_path": True,
|
|
110
|
-
"log_cache_hits": False,
|
|
111
|
-
},
|
|
112
|
-
}
|
|
113
|
-
|
|
114
|
-
def __init__(self, base_dir: Optional[Path] = None):
|
|
115
|
-
"""
|
|
116
|
-
Initialize the user namespace manager.
|
|
117
|
-
|
|
118
|
-
Args:
|
|
119
|
-
base_dir: Base directory for user namespace (default: ~/.additory)
|
|
120
|
-
"""
|
|
121
|
-
self.base_dir = Path(base_dir) if base_dir else self.DEFAULT_BASE_DIR
|
|
122
|
-
self.config_path = self.base_dir / self.CONFIG_FILENAME
|
|
123
|
-
self._config: Optional[Dict[str, Any]] = None
|
|
124
|
-
self._user_lists_cache: Optional[Dict[str, Dict[str, List[str]]]] = None
|
|
125
|
-
self._user_properties_cache: Optional[Dict[str, Dict[str, str]]] = None
|
|
126
|
-
|
|
127
|
-
def is_initialized(self) -> bool:
|
|
128
|
-
"""
|
|
129
|
-
Check if user namespace is initialized.
|
|
130
|
-
|
|
131
|
-
Returns:
|
|
132
|
-
True if initialized, False otherwise
|
|
133
|
-
"""
|
|
134
|
-
return (
|
|
135
|
-
self.base_dir.exists() and
|
|
136
|
-
self.config_path.exists() and
|
|
137
|
-
(self.base_dir / "patterns").exists()
|
|
138
|
-
)
|
|
139
|
-
|
|
140
|
-
def initialize(self, force: bool = False) -> None:
|
|
141
|
-
"""
|
|
142
|
-
Initialize user namespace directory structure.
|
|
143
|
-
|
|
144
|
-
Creates the directory structure and default config file.
|
|
145
|
-
|
|
146
|
-
Args:
|
|
147
|
-
force: If True, reinitialize even if already initialized
|
|
148
|
-
|
|
149
|
-
Raises:
|
|
150
|
-
InitializationError: If initialization fails
|
|
151
|
-
"""
|
|
152
|
-
if self.is_initialized() and not force:
|
|
153
|
-
logger.info(f"User namespace already initialized at {self.base_dir}")
|
|
154
|
-
return
|
|
155
|
-
|
|
156
|
-
try:
|
|
157
|
-
# Create directory structure
|
|
158
|
-
self.base_dir.mkdir(parents=True, exist_ok=True)
|
|
159
|
-
(self.base_dir / "patterns").mkdir(exist_ok=True)
|
|
160
|
-
(self.base_dir / "schemas").mkdir(exist_ok=True)
|
|
161
|
-
(self.base_dir / "expressions").mkdir(exist_ok=True) # NEW
|
|
162
|
-
(self.base_dir / "cache").mkdir(exist_ok=True)
|
|
163
|
-
|
|
164
|
-
# Create default config
|
|
165
|
-
config = self.DEFAULT_CONFIG.copy()
|
|
166
|
-
config["additory"]["created"] = datetime.now().isoformat()
|
|
167
|
-
|
|
168
|
-
# Update paths to use base_dir
|
|
169
|
-
config["patterns"]["user_patterns_dir"] = str(self.base_dir / "patterns")
|
|
170
|
-
config["patterns"]["user_schemas_dir"] = str(self.base_dir / "schemas")
|
|
171
|
-
config["patterns"]["user_expressions_dir"] = str(self.base_dir / "expressions") # NEW
|
|
172
|
-
config["cache"]["cache_dir"] = str(self.base_dir / "cache")
|
|
173
|
-
|
|
174
|
-
self._save_config(config)
|
|
175
|
-
|
|
176
|
-
# Clear cached config if reinitializing
|
|
177
|
-
if force:
|
|
178
|
-
self._config = None
|
|
179
|
-
|
|
180
|
-
# Create README files
|
|
181
|
-
self._create_readme_files()
|
|
182
|
-
|
|
183
|
-
# Create example files
|
|
184
|
-
self._create_example_files()
|
|
185
|
-
|
|
186
|
-
logger.info(f"User namespace initialized at {self.base_dir}")
|
|
187
|
-
|
|
188
|
-
except Exception as e:
|
|
189
|
-
raise InitializationError(f"Failed to initialize user namespace: {e}")
|
|
190
|
-
|
|
191
|
-
def load_config(self, reload: bool = False) -> Dict[str, Any]:
|
|
192
|
-
"""
|
|
193
|
-
Load user configuration.
|
|
194
|
-
|
|
195
|
-
Args:
|
|
196
|
-
reload: If True, reload config from disk
|
|
197
|
-
|
|
198
|
-
Returns:
|
|
199
|
-
Configuration dictionary
|
|
200
|
-
|
|
201
|
-
Raises:
|
|
202
|
-
ConfigError: If config cannot be loaded
|
|
203
|
-
"""
|
|
204
|
-
if self._config is not None and not reload:
|
|
205
|
-
return self._config
|
|
206
|
-
|
|
207
|
-
if not self.config_path.exists():
|
|
208
|
-
raise ConfigError(f"Config file not found: {self.config_path}")
|
|
209
|
-
|
|
210
|
-
try:
|
|
211
|
-
with open(self.config_path, "r") as f:
|
|
212
|
-
self._config = toml.load(f)
|
|
213
|
-
return self._config
|
|
214
|
-
except Exception as e:
|
|
215
|
-
raise ConfigError(f"Failed to load config: {e}")
|
|
216
|
-
|
|
217
|
-
def get_config(self, key: str, default: Any = None) -> Any:
|
|
218
|
-
"""
|
|
219
|
-
Get configuration value by key.
|
|
220
|
-
|
|
221
|
-
Args:
|
|
222
|
-
key: Configuration key (dot-separated, e.g., "cache.enable_cache")
|
|
223
|
-
default: Default value if key not found
|
|
224
|
-
|
|
225
|
-
Returns:
|
|
226
|
-
Configuration value
|
|
227
|
-
"""
|
|
228
|
-
config = self.load_config()
|
|
229
|
-
keys = key.split(".")
|
|
230
|
-
value = config
|
|
231
|
-
|
|
232
|
-
for k in keys:
|
|
233
|
-
if isinstance(value, dict) and k in value:
|
|
234
|
-
value = value[k]
|
|
235
|
-
else:
|
|
236
|
-
return default
|
|
237
|
-
|
|
238
|
-
return value
|
|
239
|
-
|
|
240
|
-
def set_config(self, key: str, value: Any) -> None:
|
|
241
|
-
"""
|
|
242
|
-
Set configuration value by key.
|
|
243
|
-
|
|
244
|
-
Args:
|
|
245
|
-
key: Configuration key (dot-separated)
|
|
246
|
-
value: Value to set
|
|
247
|
-
"""
|
|
248
|
-
config = self.load_config()
|
|
249
|
-
keys = key.split(".")
|
|
250
|
-
current = config
|
|
251
|
-
|
|
252
|
-
for k in keys[:-1]:
|
|
253
|
-
if k not in current:
|
|
254
|
-
current[k] = {}
|
|
255
|
-
current = current[k]
|
|
256
|
-
|
|
257
|
-
current[keys[-1]] = value
|
|
258
|
-
self._config = config
|
|
259
|
-
|
|
260
|
-
def save_config(self) -> None:
|
|
261
|
-
"""
|
|
262
|
-
Save configuration to disk.
|
|
263
|
-
|
|
264
|
-
Raises:
|
|
265
|
-
ConfigError: If config cannot be saved
|
|
266
|
-
"""
|
|
267
|
-
if self._config is None:
|
|
268
|
-
raise ConfigError("No config loaded to save")
|
|
269
|
-
|
|
270
|
-
self._save_config(self._config)
|
|
271
|
-
|
|
272
|
-
def _save_config(self, config: Dict[str, Any]) -> None:
|
|
273
|
-
"""Internal method to save config."""
|
|
274
|
-
try:
|
|
275
|
-
with open(self.config_path, "w") as f:
|
|
276
|
-
toml_w.dump(config, f)
|
|
277
|
-
except Exception as e:
|
|
278
|
-
raise ConfigError(f"Failed to save config: {e}")
|
|
279
|
-
|
|
280
|
-
def get_patterns_dir(self) -> Path:
|
|
281
|
-
"""Get user patterns directory path."""
|
|
282
|
-
patterns_dir = self.get_config("patterns.user_patterns_dir")
|
|
283
|
-
path = Path(patterns_dir).expanduser()
|
|
284
|
-
# If path is relative to base_dir, resolve it
|
|
285
|
-
if not path.is_absolute():
|
|
286
|
-
path = self.base_dir / path
|
|
287
|
-
return path
|
|
288
|
-
|
|
289
|
-
def get_schemas_dir(self) -> Path:
|
|
290
|
-
"""Get user schemas directory path."""
|
|
291
|
-
schemas_dir = self.get_config("patterns.user_schemas_dir")
|
|
292
|
-
path = Path(schemas_dir).expanduser()
|
|
293
|
-
# If path is relative to base_dir, resolve it
|
|
294
|
-
if not path.is_absolute():
|
|
295
|
-
path = self.base_dir / path
|
|
296
|
-
return path
|
|
297
|
-
|
|
298
|
-
def get_expressions_dir(self) -> Path:
|
|
299
|
-
"""Get user expressions directory path."""
|
|
300
|
-
expressions_dir = self.get_config("patterns.user_expressions_dir")
|
|
301
|
-
path = Path(expressions_dir).expanduser()
|
|
302
|
-
# If path is relative to base_dir, resolve it
|
|
303
|
-
if not path.is_absolute():
|
|
304
|
-
path = self.base_dir / path
|
|
305
|
-
return path
|
|
306
|
-
|
|
307
|
-
def get_cache_dir(self) -> Path:
|
|
308
|
-
"""Get cache directory path."""
|
|
309
|
-
cache_dir = self.get_config("cache.cache_dir")
|
|
310
|
-
path = Path(cache_dir).expanduser()
|
|
311
|
-
# If path is relative to base_dir, resolve it
|
|
312
|
-
if not path.is_absolute():
|
|
313
|
-
path = self.base_dir / path
|
|
314
|
-
return path
|
|
315
|
-
|
|
316
|
-
def discover_list_files(self) -> List[Path]:
|
|
317
|
-
"""
|
|
318
|
-
Discover all .list files in user patterns directory.
|
|
319
|
-
|
|
320
|
-
Returns:
|
|
321
|
-
List of .list file paths
|
|
322
|
-
"""
|
|
323
|
-
patterns_dir = self.get_patterns_dir()
|
|
324
|
-
if not patterns_dir.exists():
|
|
325
|
-
return []
|
|
326
|
-
|
|
327
|
-
return list(patterns_dir.glob("*.list"))
|
|
328
|
-
|
|
329
|
-
def discover_properties_files(self) -> List[Path]:
|
|
330
|
-
"""
|
|
331
|
-
Discover all .properties files in user patterns directory.
|
|
332
|
-
|
|
333
|
-
Returns:
|
|
334
|
-
List of .properties file paths
|
|
335
|
-
"""
|
|
336
|
-
patterns_dir = self.get_patterns_dir()
|
|
337
|
-
if not patterns_dir.exists():
|
|
338
|
-
return []
|
|
339
|
-
|
|
340
|
-
return list(patterns_dir.glob("*.properties"))
|
|
341
|
-
|
|
342
|
-
def load_user_lists(self, reload: bool = False) -> Dict[str, Dict[str, List[str]]]:
|
|
343
|
-
"""
|
|
344
|
-
Load all user .list files.
|
|
345
|
-
|
|
346
|
-
Args:
|
|
347
|
-
reload: If True, reload from disk
|
|
348
|
-
|
|
349
|
-
Returns:
|
|
350
|
-
Dictionary mapping file names to list data
|
|
351
|
-
Format: {"filename": {"list_name": ["value1", "value2", ...]}}
|
|
352
|
-
"""
|
|
353
|
-
if self._user_lists_cache is not None and not reload:
|
|
354
|
-
return self._user_lists_cache
|
|
355
|
-
|
|
356
|
-
user_lists = {}
|
|
357
|
-
list_files = self.discover_list_files()
|
|
358
|
-
|
|
359
|
-
for list_file in list_files:
|
|
360
|
-
try:
|
|
361
|
-
lists_data = load_list_file(str(list_file))
|
|
362
|
-
# Only add if we got valid data
|
|
363
|
-
if lists_data:
|
|
364
|
-
filename = list_file.stem # Without extension
|
|
365
|
-
user_lists[filename] = lists_data
|
|
366
|
-
logger.debug(f"Loaded user list file: {list_file}")
|
|
367
|
-
except Exception as e:
|
|
368
|
-
logger.warning(f"Failed to load user list file {list_file}: {e}")
|
|
369
|
-
|
|
370
|
-
self._user_lists_cache = user_lists
|
|
371
|
-
return user_lists
|
|
372
|
-
|
|
373
|
-
def load_user_properties(self, reload: bool = False) -> Dict[str, Dict[str, str]]:
|
|
374
|
-
"""
|
|
375
|
-
Load all user .properties files.
|
|
376
|
-
|
|
377
|
-
Args:
|
|
378
|
-
reload: If True, reload from disk
|
|
379
|
-
|
|
380
|
-
Returns:
|
|
381
|
-
Dictionary mapping file names to properties data
|
|
382
|
-
Format: {"filename": {"pattern_name": "regex_pattern"}}
|
|
383
|
-
"""
|
|
384
|
-
if self._user_properties_cache is not None and not reload:
|
|
385
|
-
return self._user_properties_cache
|
|
386
|
-
|
|
387
|
-
user_properties = {}
|
|
388
|
-
properties_files = self.discover_properties_files()
|
|
389
|
-
|
|
390
|
-
for properties_file in properties_files:
|
|
391
|
-
try:
|
|
392
|
-
properties_data = load_properties_file(str(properties_file))
|
|
393
|
-
filename = properties_file.stem # Without extension
|
|
394
|
-
user_properties[filename] = properties_data
|
|
395
|
-
logger.debug(f"Loaded user properties file: {properties_file}")
|
|
396
|
-
except Exception as e:
|
|
397
|
-
logger.warning(f"Failed to load user properties file {properties_file}: {e}")
|
|
398
|
-
|
|
399
|
-
self._user_properties_cache = user_properties
|
|
400
|
-
return user_properties
|
|
401
|
-
|
|
402
|
-
def get_all_user_patterns(self) -> Tuple[Dict[str, List[str]], Dict[str, str]]:
|
|
403
|
-
"""
|
|
404
|
-
Get all user patterns (lists and properties combined).
|
|
405
|
-
|
|
406
|
-
Returns:
|
|
407
|
-
Tuple of (all_lists, all_properties)
|
|
408
|
-
- all_lists: {"pattern_name": ["value1", "value2", ...]}
|
|
409
|
-
- all_properties: {"pattern_name": "regex_pattern"}
|
|
410
|
-
"""
|
|
411
|
-
user_lists = self.load_user_lists()
|
|
412
|
-
user_properties = self.load_user_properties()
|
|
413
|
-
|
|
414
|
-
# Flatten lists
|
|
415
|
-
all_lists = {}
|
|
416
|
-
for filename, lists_data in user_lists.items():
|
|
417
|
-
all_lists.update(lists_data)
|
|
418
|
-
|
|
419
|
-
# Flatten properties
|
|
420
|
-
all_properties = {}
|
|
421
|
-
for filename, properties_data in user_properties.items():
|
|
422
|
-
all_properties.update(properties_data)
|
|
423
|
-
|
|
424
|
-
return all_lists, all_properties
|
|
425
|
-
|
|
426
|
-
def clear_cache(self) -> None:
|
|
427
|
-
"""Clear cached user patterns."""
|
|
428
|
-
self._user_lists_cache = None
|
|
429
|
-
self._user_properties_cache = None
|
|
430
|
-
logger.debug("User patterns cache cleared")
|
|
431
|
-
|
|
432
|
-
def discover_expression_files(self) -> List[Path]:
|
|
433
|
-
"""
|
|
434
|
-
Discover all .add files in user expressions directory.
|
|
435
|
-
|
|
436
|
-
Returns:
|
|
437
|
-
List of .add file paths
|
|
438
|
-
"""
|
|
439
|
-
expressions_dir = self.get_expressions_dir()
|
|
440
|
-
if not expressions_dir.exists():
|
|
441
|
-
return []
|
|
442
|
-
|
|
443
|
-
return list(expressions_dir.glob("*.add"))
|
|
444
|
-
|
|
445
|
-
def get_expression_manifest_path(self) -> Path:
|
|
446
|
-
"""
|
|
447
|
-
Get path to user expression manifest file.
|
|
448
|
-
|
|
449
|
-
Returns:
|
|
450
|
-
Path to manifest.json
|
|
451
|
-
"""
|
|
452
|
-
return self.get_expressions_dir() / "manifest.json"
|
|
453
|
-
|
|
454
|
-
def load_expression_manifest(self) -> Optional[Dict[str, Any]]:
|
|
455
|
-
"""
|
|
456
|
-
Load user expression manifest.
|
|
457
|
-
|
|
458
|
-
Returns:
|
|
459
|
-
Manifest dictionary or None if not found
|
|
460
|
-
"""
|
|
461
|
-
manifest_path = self.get_expression_manifest_path()
|
|
462
|
-
|
|
463
|
-
if not manifest_path.exists():
|
|
464
|
-
logger.debug(f"Expression manifest not found: {manifest_path}")
|
|
465
|
-
return None
|
|
466
|
-
|
|
467
|
-
try:
|
|
468
|
-
import json
|
|
469
|
-
with open(manifest_path, "r") as f:
|
|
470
|
-
return json.load(f)
|
|
471
|
-
except Exception as e:
|
|
472
|
-
logger.warning(f"Failed to load expression manifest: {e}")
|
|
473
|
-
return None
|
|
474
|
-
|
|
475
|
-
def get_user_expressions(self) -> Dict[str, str]:
|
|
476
|
-
"""
|
|
477
|
-
Get all user expressions from manifest.
|
|
478
|
-
|
|
479
|
-
Returns:
|
|
480
|
-
Dictionary mapping expression names to file paths
|
|
481
|
-
Format: {"expression_name": "/path/to/expression.add"}
|
|
482
|
-
"""
|
|
483
|
-
manifest = self.load_expression_manifest()
|
|
484
|
-
|
|
485
|
-
if not manifest:
|
|
486
|
-
return {}
|
|
487
|
-
|
|
488
|
-
expressions_dir = self.get_expressions_dir()
|
|
489
|
-
user_expressions = {}
|
|
490
|
-
|
|
491
|
-
# Support both old and new manifest formats
|
|
492
|
-
if "versions" in manifest:
|
|
493
|
-
# New format (v2.0)
|
|
494
|
-
for version, version_data in manifest.get("versions", {}).items():
|
|
495
|
-
for expr_name, expr_file in version_data.get("expressions", {}).items():
|
|
496
|
-
expr_path = expressions_dir / expr_file
|
|
497
|
-
if expr_path.exists():
|
|
498
|
-
user_expressions[expr_name] = str(expr_path)
|
|
499
|
-
elif "expressions" in manifest:
|
|
500
|
-
# Old format (v1.0)
|
|
501
|
-
for expr_name, expr_file in manifest.get("expressions", {}).items():
|
|
502
|
-
expr_path = expressions_dir / expr_file
|
|
503
|
-
if expr_path.exists():
|
|
504
|
-
user_expressions[expr_name] = str(expr_path)
|
|
505
|
-
|
|
506
|
-
return user_expressions
|
|
507
|
-
|
|
508
|
-
def _create_readme_files(self) -> None:
|
|
509
|
-
"""Create README files in subdirectories."""
|
|
510
|
-
# Patterns README
|
|
511
|
-
patterns_readme = self.base_dir / "patterns" / "README.md"
|
|
512
|
-
patterns_readme.write_text("""# User Patterns
|
|
513
|
-
|
|
514
|
-
This directory contains your custom pattern files.
|
|
515
|
-
|
|
516
|
-
## File Types
|
|
517
|
-
|
|
518
|
-
- **`.list` files**: Value lists (TOML format)
|
|
519
|
-
- **`.properties` files**: Regex patterns (key-value format)
|
|
520
|
-
|
|
521
|
-
## Example .list file
|
|
522
|
-
|
|
523
|
-
```toml
|
|
524
|
-
[lists]
|
|
525
|
-
my_values = Value1, Value2, Value3
|
|
526
|
-
my_other_values = A, B, C
|
|
527
|
-
```
|
|
528
|
-
|
|
529
|
-
## Example .properties file
|
|
530
|
-
|
|
531
|
-
```properties
|
|
532
|
-
my_pattern = PATTERN\\d{4}
|
|
533
|
-
my_other_pattern = [A-Z]{3}-\\d{3}
|
|
534
|
-
```
|
|
535
|
-
|
|
536
|
-
## Usage
|
|
537
|
-
|
|
538
|
-
These patterns can be referenced in your TOML schemas:
|
|
539
|
-
|
|
540
|
-
```toml
|
|
541
|
-
[generation]
|
|
542
|
-
imports = ["global", "my_company"]
|
|
543
|
-
|
|
544
|
-
[schema]
|
|
545
|
-
field1 = "my_values"
|
|
546
|
-
field2 = "my_pattern"
|
|
547
|
-
```
|
|
548
|
-
""")
|
|
549
|
-
|
|
550
|
-
# Schemas README
|
|
551
|
-
schemas_readme = self.base_dir / "schemas" / "README.md"
|
|
552
|
-
schemas_readme.write_text("""# User Schemas
|
|
553
|
-
|
|
554
|
-
This directory contains your custom TOML schema files.
|
|
555
|
-
|
|
556
|
-
## Example Schema
|
|
557
|
-
|
|
558
|
-
```toml
|
|
559
|
-
[generation]
|
|
560
|
-
imports = ["global"]
|
|
561
|
-
|
|
562
|
-
[schema]
|
|
563
|
-
field1 = "pattern_name"
|
|
564
|
-
field2 = ["Value1", "Value2"]
|
|
565
|
-
field3 = "REGEX\\d+"
|
|
566
|
-
|
|
567
|
-
[metadata]
|
|
568
|
-
name = "My Schema"
|
|
569
|
-
version = "1.0"
|
|
570
|
-
```
|
|
571
|
-
""")
|
|
572
|
-
|
|
573
|
-
# Expressions README
|
|
574
|
-
expressions_readme = self.base_dir / "expressions" / "README.md"
|
|
575
|
-
expressions_readme.write_text("""# User Expressions
|
|
576
|
-
|
|
577
|
-
This directory contains your custom expression files (.add format).
|
|
578
|
-
|
|
579
|
-
## File Format
|
|
580
|
-
|
|
581
|
-
Expression files use YAML format with the following structure:
|
|
582
|
-
|
|
583
|
-
```yaml
|
|
584
|
-
formula:
|
|
585
|
-
name: my_calc
|
|
586
|
-
version: 0.1
|
|
587
|
-
description: My custom calculation
|
|
588
|
-
expression: |
|
|
589
|
-
(value1 + value2) / 2
|
|
590
|
-
|
|
591
|
-
sample:
|
|
592
|
-
clean:
|
|
593
|
-
value1: [10, 20, 30]
|
|
594
|
-
value2: [5, 10, 15]
|
|
595
|
-
expected_result: [7.5, 15.0, 22.5]
|
|
596
|
-
```
|
|
597
|
-
|
|
598
|
-
## Manifest File
|
|
599
|
-
|
|
600
|
-
Create a `manifest.json` file to register your expressions:
|
|
601
|
-
|
|
602
|
-
```json
|
|
603
|
-
{
|
|
604
|
-
"expressions": [
|
|
605
|
-
{
|
|
606
|
-
"name": "my_calc",
|
|
607
|
-
"version": "0.1",
|
|
608
|
-
"file": "my_calc_0.1.add",
|
|
609
|
-
"description": "My custom calculation"
|
|
610
|
-
}
|
|
611
|
-
]
|
|
612
|
-
}
|
|
613
|
-
```
|
|
614
|
-
|
|
615
|
-
## Usage
|
|
616
|
-
|
|
617
|
-
Reference your expressions in augment or synthetic operations:
|
|
618
|
-
|
|
619
|
-
```python
|
|
620
|
-
from additory.augment import augment
|
|
621
|
-
|
|
622
|
-
df_result = augment(
|
|
623
|
-
df,
|
|
624
|
-
expressions={"result": "my_calc"},
|
|
625
|
-
enable_user_namespace=True
|
|
626
|
-
)
|
|
627
|
-
```
|
|
628
|
-
""")
|
|
629
|
-
|
|
630
|
-
# Cache README
|
|
631
|
-
cache_readme = self.base_dir / "cache" / "README.md"
|
|
632
|
-
cache_readme.write_text("""# Cache Directory
|
|
633
|
-
|
|
634
|
-
This directory contains cached pattern resolution data.
|
|
635
|
-
|
|
636
|
-
Cache files are automatically managed and can be safely deleted.
|
|
637
|
-
""")
|
|
638
|
-
|
|
639
|
-
def _create_example_files(self) -> None:
|
|
640
|
-
"""Create example pattern files."""
|
|
641
|
-
# Example .list file
|
|
642
|
-
example_list = self.base_dir / "patterns" / "example.list"
|
|
643
|
-
example_list.write_text("""# Example .list file
|
|
644
|
-
# This file demonstrates the .list format for value lists
|
|
645
|
-
|
|
646
|
-
[lists]
|
|
647
|
-
# Simple value list
|
|
648
|
-
example_statuses = Active, Inactive, Pending, Completed
|
|
649
|
-
|
|
650
|
-
# Another example
|
|
651
|
-
example_priorities = Low, Medium, High, Critical
|
|
652
|
-
|
|
653
|
-
# You can add more lists here
|
|
654
|
-
# my_custom_list = Value1, Value2, Value3
|
|
655
|
-
""")
|
|
656
|
-
|
|
657
|
-
# Example .properties file
|
|
658
|
-
example_properties = self.base_dir / "patterns" / "example.properties"
|
|
659
|
-
example_properties.write_text("""# Example .properties file
|
|
660
|
-
# This file demonstrates the .properties format for regex patterns
|
|
661
|
-
|
|
662
|
-
# Simple pattern
|
|
663
|
-
example_id = EX\\d{6}
|
|
664
|
-
|
|
665
|
-
# Email pattern
|
|
666
|
-
example_email = [a-zA-Z0-9._%+-]+@example\\.com
|
|
667
|
-
|
|
668
|
-
# You can add more patterns here
|
|
669
|
-
# my_custom_pattern = PATTERN\\d+
|
|
670
|
-
""")
|
|
671
|
-
|
|
672
|
-
# Example expression file
|
|
673
|
-
example_expression = self.base_dir / "expressions" / "example_calc_0.1.add"
|
|
674
|
-
example_expression.write_text("""formula:
|
|
675
|
-
name: example_calc
|
|
676
|
-
version: 0.1
|
|
677
|
-
stability: alpha
|
|
678
|
-
type: cols
|
|
679
|
-
description: Example calculation - average of two values
|
|
680
|
-
author: user
|
|
681
|
-
tags:
|
|
682
|
-
- example
|
|
683
|
-
- calculation
|
|
684
|
-
expression: |
|
|
685
|
-
(value1 + value2) / 2
|
|
686
|
-
validation:
|
|
687
|
-
required_columns:
|
|
688
|
-
- value1
|
|
689
|
-
- value2
|
|
690
|
-
output_type: float64
|
|
691
|
-
|
|
692
|
-
sample:
|
|
693
|
-
clean:
|
|
694
|
-
value1: [10, 20, 30, 40]
|
|
695
|
-
value2: [5, 10, 15, 20]
|
|
696
|
-
expected_result: [7.5, 15.0, 22.5, 30.0]
|
|
697
|
-
|
|
698
|
-
documentation:
|
|
699
|
-
formula_explanation: |
|
|
700
|
-
This example calculates the average of two values.
|
|
701
|
-
It's a simple demonstration of the expression format.
|
|
702
|
-
examples:
|
|
703
|
-
- description: Average of 10 and 5
|
|
704
|
-
input:
|
|
705
|
-
value1: 10
|
|
706
|
-
value2: 5
|
|
707
|
-
output: 7.5
|
|
708
|
-
""")
|
|
709
|
-
|
|
710
|
-
# Example manifest file
|
|
711
|
-
example_manifest = self.base_dir / "expressions" / "manifest.json"
|
|
712
|
-
example_manifest.write_text("""{
|
|
713
|
-
"metadata": {
|
|
714
|
-
"name": "user-expressions",
|
|
715
|
-
"description": "User-defined expressions",
|
|
716
|
-
"maintainer": "user"
|
|
717
|
-
},
|
|
718
|
-
"versions": {
|
|
719
|
-
"0.1": {
|
|
720
|
-
"stability": "alpha",
|
|
721
|
-
"requires_engine": ">=0.1.0",
|
|
722
|
-
"expressions": {
|
|
723
|
-
"example_calc": "example_calc_0.1.add"
|
|
724
|
-
}
|
|
725
|
-
}
|
|
726
|
-
},
|
|
727
|
-
"manifest_version": "2.0"
|
|
728
|
-
}
|
|
729
|
-
""")
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
# Singleton instance
|
|
733
|
-
_user_namespace_manager: Optional[UserNamespaceManager] = None
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
def get_user_namespace_manager(base_dir: Optional[Path] = None) -> UserNamespaceManager:
|
|
737
|
-
"""
|
|
738
|
-
Get singleton user namespace manager instance.
|
|
739
|
-
|
|
740
|
-
Args:
|
|
741
|
-
base_dir: Base directory for user namespace (default: ~/.additory)
|
|
742
|
-
|
|
743
|
-
Returns:
|
|
744
|
-
UserNamespaceManager instance
|
|
745
|
-
"""
|
|
746
|
-
global _user_namespace_manager
|
|
747
|
-
|
|
748
|
-
if _user_namespace_manager is None:
|
|
749
|
-
_user_namespace_manager = UserNamespaceManager(base_dir)
|
|
750
|
-
|
|
751
|
-
return _user_namespace_manager
|