additory 0.1.0a3__py3-none-any.whl → 0.1.1a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. additory/__init__.py +58 -14
  2. additory/common/__init__.py +31 -147
  3. additory/common/column_selector.py +255 -0
  4. additory/common/distributions.py +286 -613
  5. additory/common/extractors.py +313 -0
  6. additory/common/knn_imputation.py +332 -0
  7. additory/common/result.py +380 -0
  8. additory/common/strategy_parser.py +243 -0
  9. additory/common/unit_conversions.py +338 -0
  10. additory/common/validation.py +283 -103
  11. additory/core/__init__.py +34 -22
  12. additory/core/backend.py +258 -0
  13. additory/core/config.py +177 -305
  14. additory/core/logging.py +230 -24
  15. additory/core/memory_manager.py +157 -495
  16. additory/expressions/__init__.py +2 -23
  17. additory/expressions/compiler.py +457 -0
  18. additory/expressions/engine.py +264 -487
  19. additory/expressions/integrity.py +179 -0
  20. additory/expressions/loader.py +263 -0
  21. additory/expressions/parser.py +363 -167
  22. additory/expressions/resolver.py +274 -0
  23. additory/functions/__init__.py +1 -0
  24. additory/functions/analyze/__init__.py +144 -0
  25. additory/functions/analyze/cardinality.py +58 -0
  26. additory/functions/analyze/correlations.py +66 -0
  27. additory/functions/analyze/distributions.py +53 -0
  28. additory/functions/analyze/duplicates.py +49 -0
  29. additory/functions/analyze/features.py +61 -0
  30. additory/functions/analyze/imputation.py +66 -0
  31. additory/functions/analyze/outliers.py +65 -0
  32. additory/functions/analyze/patterns.py +65 -0
  33. additory/functions/analyze/presets.py +72 -0
  34. additory/functions/analyze/quality.py +59 -0
  35. additory/functions/analyze/timeseries.py +53 -0
  36. additory/functions/analyze/types.py +45 -0
  37. additory/functions/expressions/__init__.py +161 -0
  38. additory/functions/snapshot/__init__.py +82 -0
  39. additory/functions/snapshot/filter.py +119 -0
  40. additory/functions/synthetic/__init__.py +113 -0
  41. additory/functions/synthetic/mode_detector.py +47 -0
  42. additory/functions/synthetic/strategies/__init__.py +1 -0
  43. additory/functions/synthetic/strategies/advanced.py +35 -0
  44. additory/functions/synthetic/strategies/augmentative.py +160 -0
  45. additory/functions/synthetic/strategies/generative.py +168 -0
  46. additory/functions/synthetic/strategies/presets.py +116 -0
  47. additory/functions/to/__init__.py +188 -0
  48. additory/functions/to/lookup.py +351 -0
  49. additory/functions/to/merge.py +189 -0
  50. additory/functions/to/sort.py +91 -0
  51. additory/functions/to/summarize.py +170 -0
  52. additory/functions/transform/__init__.py +140 -0
  53. additory/functions/transform/datetime.py +79 -0
  54. additory/functions/transform/extract.py +85 -0
  55. additory/functions/transform/harmonize.py +105 -0
  56. additory/functions/transform/knn.py +62 -0
  57. additory/functions/transform/onehotencoding.py +68 -0
  58. additory/functions/transform/transpose.py +42 -0
  59. additory-0.1.1a1.dist-info/METADATA +83 -0
  60. additory-0.1.1a1.dist-info/RECORD +62 -0
  61. additory/analysis/__init__.py +0 -48
  62. additory/analysis/cardinality.py +0 -126
  63. additory/analysis/correlations.py +0 -124
  64. additory/analysis/distributions.py +0 -376
  65. additory/analysis/quality.py +0 -158
  66. additory/analysis/scan.py +0 -400
  67. additory/common/backend.py +0 -371
  68. additory/common/column_utils.py +0 -191
  69. additory/common/exceptions.py +0 -62
  70. additory/common/lists.py +0 -229
  71. additory/common/patterns.py +0 -240
  72. additory/common/resolver.py +0 -567
  73. additory/common/sample_data.py +0 -182
  74. additory/core/ast_builder.py +0 -165
  75. additory/core/backends/__init__.py +0 -23
  76. additory/core/backends/arrow_bridge.py +0 -483
  77. additory/core/backends/cudf_bridge.py +0 -355
  78. additory/core/column_positioning.py +0 -358
  79. additory/core/compiler_polars.py +0 -166
  80. additory/core/enhanced_cache_manager.py +0 -1119
  81. additory/core/enhanced_matchers.py +0 -473
  82. additory/core/enhanced_version_manager.py +0 -325
  83. additory/core/executor.py +0 -59
  84. additory/core/integrity_manager.py +0 -477
  85. additory/core/loader.py +0 -190
  86. additory/core/namespace_manager.py +0 -657
  87. additory/core/parser.py +0 -176
  88. additory/core/polars_expression_engine.py +0 -601
  89. additory/core/registry.py +0 -176
  90. additory/core/sample_data_manager.py +0 -492
  91. additory/core/user_namespace.py +0 -751
  92. additory/core/validator.py +0 -27
  93. additory/dynamic_api.py +0 -304
  94. additory/expressions/proxy.py +0 -549
  95. additory/expressions/registry.py +0 -313
  96. additory/expressions/samples.py +0 -492
  97. additory/synthetic/__init__.py +0 -13
  98. additory/synthetic/column_name_resolver.py +0 -149
  99. additory/synthetic/distributions.py +0 -22
  100. additory/synthetic/forecast.py +0 -1132
  101. additory/synthetic/linked_list_parser.py +0 -415
  102. additory/synthetic/namespace_lookup.py +0 -129
  103. additory/synthetic/smote.py +0 -320
  104. additory/synthetic/strategies.py +0 -850
  105. additory/synthetic/synthesizer.py +0 -713
  106. additory/utilities/__init__.py +0 -53
  107. additory/utilities/encoding.py +0 -600
  108. additory/utilities/games.py +0 -300
  109. additory/utilities/keys.py +0 -8
  110. additory/utilities/lookup.py +0 -103
  111. additory/utilities/matchers.py +0 -216
  112. additory/utilities/resolvers.py +0 -286
  113. additory/utilities/settings.py +0 -167
  114. additory/utilities/units.py +0 -749
  115. additory/utilities/validators.py +0 -153
  116. additory-0.1.0a3.dist-info/METADATA +0 -288
  117. additory-0.1.0a3.dist-info/RECORD +0 -71
  118. additory-0.1.0a3.dist-info/licenses/LICENSE +0 -21
  119. {additory-0.1.0a3.dist-info → additory-0.1.1a1.dist-info}/WHEEL +0 -0
  120. {additory-0.1.0a3.dist-info → additory-0.1.1a1.dist-info}/top_level.txt +0 -0
@@ -1,751 +0,0 @@
1
- """
2
- User Namespace Manager for Additory
3
-
4
- This module provides functionality for managing user-defined patterns and configurations
5
- in the ~/.additory/ directory. It allows users to create custom pattern files that can
6
- be used alongside built-in patterns.
7
-
8
- Author: Additory Team
9
- Date: 2026-01-24
10
- """
11
-
12
- import os
13
- import json
14
- from pathlib import Path
15
- from typing import Dict, List, Optional, Any, Tuple
16
- from datetime import datetime
17
- import logging
18
-
19
- try:
20
- import tomli as toml
21
- import tomli_w as toml_w
22
- except ImportError:
23
- import toml
24
- # For writing, we'll use toml.dump which works for both
25
- toml_w = toml
26
-
27
- from additory.common.lists import load_list_file, parse_list_file
28
- from additory.common.patterns import load_properties_file, parse_properties_file
29
-
30
- logger = logging.getLogger(__name__)
31
-
32
-
33
- class UserNamespaceError(Exception):
34
- """Base exception for user namespace errors."""
35
- pass
36
-
37
-
38
- class ConfigError(UserNamespaceError):
39
- """Exception raised for configuration errors."""
40
- pass
41
-
42
-
43
- class InitializationError(UserNamespaceError):
44
- """Exception raised for initialization errors."""
45
- pass
46
-
47
-
48
- class UserNamespaceManager:
49
- """
50
- Manages user namespace for custom patterns and configurations.
51
-
52
- The user namespace allows users to define custom patterns in ~/.additory/
53
- that can be used alongside built-in patterns.
54
-
55
- Directory structure:
56
- ~/.additory/
57
- ├── config.toml
58
- ├── patterns/
59
- │ ├── *.list
60
- │ └── *.properties
61
- ├── schemas/
62
- │ └── *.toml
63
- └── cache/
64
- └── resolution_cache.json
65
-
66
- Example:
67
- >>> manager = UserNamespaceManager()
68
- >>> if not manager.is_initialized():
69
- ... manager.initialize()
70
- >>> config = manager.load_config()
71
- >>> user_lists = manager.load_user_lists()
72
- """
73
-
74
- DEFAULT_BASE_DIR = Path.home() / ".additory"
75
- CONFIG_FILENAME = "config.toml"
76
-
77
- DEFAULT_CONFIG = {
78
- "additory": {
79
- "version": "1.0",
80
- "created": None, # Will be set during initialization
81
- },
82
- "patterns": {
83
- "user_patterns_dir": "~/.additory/patterns",
84
- "user_schemas_dir": "~/.additory/schemas",
85
- "user_expressions_dir": "~/.additory/expressions", # NEW
86
- "auto_discover": True,
87
- "watch_for_changes": False,
88
- },
89
- "cache": {
90
- "enable_cache": True,
91
- "cache_dir": "~/.additory/cache",
92
- "cache_ttl": 3600, # 1 hour
93
- "max_cache_size": 100, # MB
94
- },
95
- "resolution": {
96
- "default_prefer_mode": "default",
97
- "enable_user_namespace": True,
98
- "user_namespace_priority": "before_imports",
99
- "user_expression_priority": "user_first", # NEW
100
- },
101
- "validation": {
102
- "strict_mode": False,
103
- "warn_on_unused_patterns": True,
104
- "warn_on_duplicate_patterns": True,
105
- "validate_on_load": False,
106
- },
107
- "logging": {
108
- "log_level": "INFO",
109
- "log_resolution_path": True,
110
- "log_cache_hits": False,
111
- },
112
- }
113
-
114
- def __init__(self, base_dir: Optional[Path] = None):
115
- """
116
- Initialize the user namespace manager.
117
-
118
- Args:
119
- base_dir: Base directory for user namespace (default: ~/.additory)
120
- """
121
- self.base_dir = Path(base_dir) if base_dir else self.DEFAULT_BASE_DIR
122
- self.config_path = self.base_dir / self.CONFIG_FILENAME
123
- self._config: Optional[Dict[str, Any]] = None
124
- self._user_lists_cache: Optional[Dict[str, Dict[str, List[str]]]] = None
125
- self._user_properties_cache: Optional[Dict[str, Dict[str, str]]] = None
126
-
127
- def is_initialized(self) -> bool:
128
- """
129
- Check if user namespace is initialized.
130
-
131
- Returns:
132
- True if initialized, False otherwise
133
- """
134
- return (
135
- self.base_dir.exists() and
136
- self.config_path.exists() and
137
- (self.base_dir / "patterns").exists()
138
- )
139
-
140
- def initialize(self, force: bool = False) -> None:
141
- """
142
- Initialize user namespace directory structure.
143
-
144
- Creates the directory structure and default config file.
145
-
146
- Args:
147
- force: If True, reinitialize even if already initialized
148
-
149
- Raises:
150
- InitializationError: If initialization fails
151
- """
152
- if self.is_initialized() and not force:
153
- logger.info(f"User namespace already initialized at {self.base_dir}")
154
- return
155
-
156
- try:
157
- # Create directory structure
158
- self.base_dir.mkdir(parents=True, exist_ok=True)
159
- (self.base_dir / "patterns").mkdir(exist_ok=True)
160
- (self.base_dir / "schemas").mkdir(exist_ok=True)
161
- (self.base_dir / "expressions").mkdir(exist_ok=True) # NEW
162
- (self.base_dir / "cache").mkdir(exist_ok=True)
163
-
164
- # Create default config
165
- config = self.DEFAULT_CONFIG.copy()
166
- config["additory"]["created"] = datetime.now().isoformat()
167
-
168
- # Update paths to use base_dir
169
- config["patterns"]["user_patterns_dir"] = str(self.base_dir / "patterns")
170
- config["patterns"]["user_schemas_dir"] = str(self.base_dir / "schemas")
171
- config["patterns"]["user_expressions_dir"] = str(self.base_dir / "expressions") # NEW
172
- config["cache"]["cache_dir"] = str(self.base_dir / "cache")
173
-
174
- self._save_config(config)
175
-
176
- # Clear cached config if reinitializing
177
- if force:
178
- self._config = None
179
-
180
- # Create README files
181
- self._create_readme_files()
182
-
183
- # Create example files
184
- self._create_example_files()
185
-
186
- logger.info(f"User namespace initialized at {self.base_dir}")
187
-
188
- except Exception as e:
189
- raise InitializationError(f"Failed to initialize user namespace: {e}")
190
-
191
- def load_config(self, reload: bool = False) -> Dict[str, Any]:
192
- """
193
- Load user configuration.
194
-
195
- Args:
196
- reload: If True, reload config from disk
197
-
198
- Returns:
199
- Configuration dictionary
200
-
201
- Raises:
202
- ConfigError: If config cannot be loaded
203
- """
204
- if self._config is not None and not reload:
205
- return self._config
206
-
207
- if not self.config_path.exists():
208
- raise ConfigError(f"Config file not found: {self.config_path}")
209
-
210
- try:
211
- with open(self.config_path, "r") as f:
212
- self._config = toml.load(f)
213
- return self._config
214
- except Exception as e:
215
- raise ConfigError(f"Failed to load config: {e}")
216
-
217
- def get_config(self, key: str, default: Any = None) -> Any:
218
- """
219
- Get configuration value by key.
220
-
221
- Args:
222
- key: Configuration key (dot-separated, e.g., "cache.enable_cache")
223
- default: Default value if key not found
224
-
225
- Returns:
226
- Configuration value
227
- """
228
- config = self.load_config()
229
- keys = key.split(".")
230
- value = config
231
-
232
- for k in keys:
233
- if isinstance(value, dict) and k in value:
234
- value = value[k]
235
- else:
236
- return default
237
-
238
- return value
239
-
240
- def set_config(self, key: str, value: Any) -> None:
241
- """
242
- Set configuration value by key.
243
-
244
- Args:
245
- key: Configuration key (dot-separated)
246
- value: Value to set
247
- """
248
- config = self.load_config()
249
- keys = key.split(".")
250
- current = config
251
-
252
- for k in keys[:-1]:
253
- if k not in current:
254
- current[k] = {}
255
- current = current[k]
256
-
257
- current[keys[-1]] = value
258
- self._config = config
259
-
260
- def save_config(self) -> None:
261
- """
262
- Save configuration to disk.
263
-
264
- Raises:
265
- ConfigError: If config cannot be saved
266
- """
267
- if self._config is None:
268
- raise ConfigError("No config loaded to save")
269
-
270
- self._save_config(self._config)
271
-
272
- def _save_config(self, config: Dict[str, Any]) -> None:
273
- """Internal method to save config."""
274
- try:
275
- with open(self.config_path, "w") as f:
276
- toml_w.dump(config, f)
277
- except Exception as e:
278
- raise ConfigError(f"Failed to save config: {e}")
279
-
280
- def get_patterns_dir(self) -> Path:
281
- """Get user patterns directory path."""
282
- patterns_dir = self.get_config("patterns.user_patterns_dir")
283
- path = Path(patterns_dir).expanduser()
284
- # If path is relative to base_dir, resolve it
285
- if not path.is_absolute():
286
- path = self.base_dir / path
287
- return path
288
-
289
- def get_schemas_dir(self) -> Path:
290
- """Get user schemas directory path."""
291
- schemas_dir = self.get_config("patterns.user_schemas_dir")
292
- path = Path(schemas_dir).expanduser()
293
- # If path is relative to base_dir, resolve it
294
- if not path.is_absolute():
295
- path = self.base_dir / path
296
- return path
297
-
298
- def get_expressions_dir(self) -> Path:
299
- """Get user expressions directory path."""
300
- expressions_dir = self.get_config("patterns.user_expressions_dir")
301
- path = Path(expressions_dir).expanduser()
302
- # If path is relative to base_dir, resolve it
303
- if not path.is_absolute():
304
- path = self.base_dir / path
305
- return path
306
-
307
- def get_cache_dir(self) -> Path:
308
- """Get cache directory path."""
309
- cache_dir = self.get_config("cache.cache_dir")
310
- path = Path(cache_dir).expanduser()
311
- # If path is relative to base_dir, resolve it
312
- if not path.is_absolute():
313
- path = self.base_dir / path
314
- return path
315
-
316
- def discover_list_files(self) -> List[Path]:
317
- """
318
- Discover all .list files in user patterns directory.
319
-
320
- Returns:
321
- List of .list file paths
322
- """
323
- patterns_dir = self.get_patterns_dir()
324
- if not patterns_dir.exists():
325
- return []
326
-
327
- return list(patterns_dir.glob("*.list"))
328
-
329
- def discover_properties_files(self) -> List[Path]:
330
- """
331
- Discover all .properties files in user patterns directory.
332
-
333
- Returns:
334
- List of .properties file paths
335
- """
336
- patterns_dir = self.get_patterns_dir()
337
- if not patterns_dir.exists():
338
- return []
339
-
340
- return list(patterns_dir.glob("*.properties"))
341
-
342
- def load_user_lists(self, reload: bool = False) -> Dict[str, Dict[str, List[str]]]:
343
- """
344
- Load all user .list files.
345
-
346
- Args:
347
- reload: If True, reload from disk
348
-
349
- Returns:
350
- Dictionary mapping file names to list data
351
- Format: {"filename": {"list_name": ["value1", "value2", ...]}}
352
- """
353
- if self._user_lists_cache is not None and not reload:
354
- return self._user_lists_cache
355
-
356
- user_lists = {}
357
- list_files = self.discover_list_files()
358
-
359
- for list_file in list_files:
360
- try:
361
- lists_data = load_list_file(str(list_file))
362
- # Only add if we got valid data
363
- if lists_data:
364
- filename = list_file.stem # Without extension
365
- user_lists[filename] = lists_data
366
- logger.debug(f"Loaded user list file: {list_file}")
367
- except Exception as e:
368
- logger.warning(f"Failed to load user list file {list_file}: {e}")
369
-
370
- self._user_lists_cache = user_lists
371
- return user_lists
372
-
373
- def load_user_properties(self, reload: bool = False) -> Dict[str, Dict[str, str]]:
374
- """
375
- Load all user .properties files.
376
-
377
- Args:
378
- reload: If True, reload from disk
379
-
380
- Returns:
381
- Dictionary mapping file names to properties data
382
- Format: {"filename": {"pattern_name": "regex_pattern"}}
383
- """
384
- if self._user_properties_cache is not None and not reload:
385
- return self._user_properties_cache
386
-
387
- user_properties = {}
388
- properties_files = self.discover_properties_files()
389
-
390
- for properties_file in properties_files:
391
- try:
392
- properties_data = load_properties_file(str(properties_file))
393
- filename = properties_file.stem # Without extension
394
- user_properties[filename] = properties_data
395
- logger.debug(f"Loaded user properties file: {properties_file}")
396
- except Exception as e:
397
- logger.warning(f"Failed to load user properties file {properties_file}: {e}")
398
-
399
- self._user_properties_cache = user_properties
400
- return user_properties
401
-
402
- def get_all_user_patterns(self) -> Tuple[Dict[str, List[str]], Dict[str, str]]:
403
- """
404
- Get all user patterns (lists and properties combined).
405
-
406
- Returns:
407
- Tuple of (all_lists, all_properties)
408
- - all_lists: {"pattern_name": ["value1", "value2", ...]}
409
- - all_properties: {"pattern_name": "regex_pattern"}
410
- """
411
- user_lists = self.load_user_lists()
412
- user_properties = self.load_user_properties()
413
-
414
- # Flatten lists
415
- all_lists = {}
416
- for filename, lists_data in user_lists.items():
417
- all_lists.update(lists_data)
418
-
419
- # Flatten properties
420
- all_properties = {}
421
- for filename, properties_data in user_properties.items():
422
- all_properties.update(properties_data)
423
-
424
- return all_lists, all_properties
425
-
426
- def clear_cache(self) -> None:
427
- """Clear cached user patterns."""
428
- self._user_lists_cache = None
429
- self._user_properties_cache = None
430
- logger.debug("User patterns cache cleared")
431
-
432
- def discover_expression_files(self) -> List[Path]:
433
- """
434
- Discover all .add files in user expressions directory.
435
-
436
- Returns:
437
- List of .add file paths
438
- """
439
- expressions_dir = self.get_expressions_dir()
440
- if not expressions_dir.exists():
441
- return []
442
-
443
- return list(expressions_dir.glob("*.add"))
444
-
445
- def get_expression_manifest_path(self) -> Path:
446
- """
447
- Get path to user expression manifest file.
448
-
449
- Returns:
450
- Path to manifest.json
451
- """
452
- return self.get_expressions_dir() / "manifest.json"
453
-
454
- def load_expression_manifest(self) -> Optional[Dict[str, Any]]:
455
- """
456
- Load user expression manifest.
457
-
458
- Returns:
459
- Manifest dictionary or None if not found
460
- """
461
- manifest_path = self.get_expression_manifest_path()
462
-
463
- if not manifest_path.exists():
464
- logger.debug(f"Expression manifest not found: {manifest_path}")
465
- return None
466
-
467
- try:
468
- import json
469
- with open(manifest_path, "r") as f:
470
- return json.load(f)
471
- except Exception as e:
472
- logger.warning(f"Failed to load expression manifest: {e}")
473
- return None
474
-
475
- def get_user_expressions(self) -> Dict[str, str]:
476
- """
477
- Get all user expressions from manifest.
478
-
479
- Returns:
480
- Dictionary mapping expression names to file paths
481
- Format: {"expression_name": "/path/to/expression.add"}
482
- """
483
- manifest = self.load_expression_manifest()
484
-
485
- if not manifest:
486
- return {}
487
-
488
- expressions_dir = self.get_expressions_dir()
489
- user_expressions = {}
490
-
491
- # Support both old and new manifest formats
492
- if "versions" in manifest:
493
- # New format (v2.0)
494
- for version, version_data in manifest.get("versions", {}).items():
495
- for expr_name, expr_file in version_data.get("expressions", {}).items():
496
- expr_path = expressions_dir / expr_file
497
- if expr_path.exists():
498
- user_expressions[expr_name] = str(expr_path)
499
- elif "expressions" in manifest:
500
- # Old format (v1.0)
501
- for expr_name, expr_file in manifest.get("expressions", {}).items():
502
- expr_path = expressions_dir / expr_file
503
- if expr_path.exists():
504
- user_expressions[expr_name] = str(expr_path)
505
-
506
- return user_expressions
507
-
508
- def _create_readme_files(self) -> None:
509
- """Create README files in subdirectories."""
510
- # Patterns README
511
- patterns_readme = self.base_dir / "patterns" / "README.md"
512
- patterns_readme.write_text("""# User Patterns
513
-
514
- This directory contains your custom pattern files.
515
-
516
- ## File Types
517
-
518
- - **`.list` files**: Value lists (TOML format)
519
- - **`.properties` files**: Regex patterns (key-value format)
520
-
521
- ## Example .list file
522
-
523
- ```toml
524
- [lists]
525
- my_values = Value1, Value2, Value3
526
- my_other_values = A, B, C
527
- ```
528
-
529
- ## Example .properties file
530
-
531
- ```properties
532
- my_pattern = PATTERN\\d{4}
533
- my_other_pattern = [A-Z]{3}-\\d{3}
534
- ```
535
-
536
- ## Usage
537
-
538
- These patterns can be referenced in your TOML schemas:
539
-
540
- ```toml
541
- [generation]
542
- imports = ["global", "my_company"]
543
-
544
- [schema]
545
- field1 = "my_values"
546
- field2 = "my_pattern"
547
- ```
548
- """)
549
-
550
- # Schemas README
551
- schemas_readme = self.base_dir / "schemas" / "README.md"
552
- schemas_readme.write_text("""# User Schemas
553
-
554
- This directory contains your custom TOML schema files.
555
-
556
- ## Example Schema
557
-
558
- ```toml
559
- [generation]
560
- imports = ["global"]
561
-
562
- [schema]
563
- field1 = "pattern_name"
564
- field2 = ["Value1", "Value2"]
565
- field3 = "REGEX\\d+"
566
-
567
- [metadata]
568
- name = "My Schema"
569
- version = "1.0"
570
- ```
571
- """)
572
-
573
- # Expressions README
574
- expressions_readme = self.base_dir / "expressions" / "README.md"
575
- expressions_readme.write_text("""# User Expressions
576
-
577
- This directory contains your custom expression files (.add format).
578
-
579
- ## File Format
580
-
581
- Expression files use YAML format with the following structure:
582
-
583
- ```yaml
584
- formula:
585
- name: my_calc
586
- version: 0.1
587
- description: My custom calculation
588
- expression: |
589
- (value1 + value2) / 2
590
-
591
- sample:
592
- clean:
593
- value1: [10, 20, 30]
594
- value2: [5, 10, 15]
595
- expected_result: [7.5, 15.0, 22.5]
596
- ```
597
-
598
- ## Manifest File
599
-
600
- Create a `manifest.json` file to register your expressions:
601
-
602
- ```json
603
- {
604
- "expressions": [
605
- {
606
- "name": "my_calc",
607
- "version": "0.1",
608
- "file": "my_calc_0.1.add",
609
- "description": "My custom calculation"
610
- }
611
- ]
612
- }
613
- ```
614
-
615
- ## Usage
616
-
617
- Reference your expressions in augment or synthetic operations:
618
-
619
- ```python
620
- from additory.augment import augment
621
-
622
- df_result = augment(
623
- df,
624
- expressions={"result": "my_calc"},
625
- enable_user_namespace=True
626
- )
627
- ```
628
- """)
629
-
630
- # Cache README
631
- cache_readme = self.base_dir / "cache" / "README.md"
632
- cache_readme.write_text("""# Cache Directory
633
-
634
- This directory contains cached pattern resolution data.
635
-
636
- Cache files are automatically managed and can be safely deleted.
637
- """)
638
-
639
- def _create_example_files(self) -> None:
640
- """Create example pattern files."""
641
- # Example .list file
642
- example_list = self.base_dir / "patterns" / "example.list"
643
- example_list.write_text("""# Example .list file
644
- # This file demonstrates the .list format for value lists
645
-
646
- [lists]
647
- # Simple value list
648
- example_statuses = Active, Inactive, Pending, Completed
649
-
650
- # Another example
651
- example_priorities = Low, Medium, High, Critical
652
-
653
- # You can add more lists here
654
- # my_custom_list = Value1, Value2, Value3
655
- """)
656
-
657
- # Example .properties file
658
- example_properties = self.base_dir / "patterns" / "example.properties"
659
- example_properties.write_text("""# Example .properties file
660
- # This file demonstrates the .properties format for regex patterns
661
-
662
- # Simple pattern
663
- example_id = EX\\d{6}
664
-
665
- # Email pattern
666
- example_email = [a-zA-Z0-9._%+-]+@example\\.com
667
-
668
- # You can add more patterns here
669
- # my_custom_pattern = PATTERN\\d+
670
- """)
671
-
672
- # Example expression file
673
- example_expression = self.base_dir / "expressions" / "example_calc_0.1.add"
674
- example_expression.write_text("""formula:
675
- name: example_calc
676
- version: 0.1
677
- stability: alpha
678
- type: cols
679
- description: Example calculation - average of two values
680
- author: user
681
- tags:
682
- - example
683
- - calculation
684
- expression: |
685
- (value1 + value2) / 2
686
- validation:
687
- required_columns:
688
- - value1
689
- - value2
690
- output_type: float64
691
-
692
- sample:
693
- clean:
694
- value1: [10, 20, 30, 40]
695
- value2: [5, 10, 15, 20]
696
- expected_result: [7.5, 15.0, 22.5, 30.0]
697
-
698
- documentation:
699
- formula_explanation: |
700
- This example calculates the average of two values.
701
- It's a simple demonstration of the expression format.
702
- examples:
703
- - description: Average of 10 and 5
704
- input:
705
- value1: 10
706
- value2: 5
707
- output: 7.5
708
- """)
709
-
710
- # Example manifest file
711
- example_manifest = self.base_dir / "expressions" / "manifest.json"
712
- example_manifest.write_text("""{
713
- "metadata": {
714
- "name": "user-expressions",
715
- "description": "User-defined expressions",
716
- "maintainer": "user"
717
- },
718
- "versions": {
719
- "0.1": {
720
- "stability": "alpha",
721
- "requires_engine": ">=0.1.0",
722
- "expressions": {
723
- "example_calc": "example_calc_0.1.add"
724
- }
725
- }
726
- },
727
- "manifest_version": "2.0"
728
- }
729
- """)
730
-
731
-
732
- # Singleton instance
733
- _user_namespace_manager: Optional[UserNamespaceManager] = None
734
-
735
-
736
- def get_user_namespace_manager(base_dir: Optional[Path] = None) -> UserNamespaceManager:
737
- """
738
- Get singleton user namespace manager instance.
739
-
740
- Args:
741
- base_dir: Base directory for user namespace (default: ~/.additory)
742
-
743
- Returns:
744
- UserNamespaceManager instance
745
- """
746
- global _user_namespace_manager
747
-
748
- if _user_namespace_manager is None:
749
- _user_namespace_manager = UserNamespaceManager(base_dir)
750
-
751
- return _user_namespace_manager