additory 0.1.0a4__py3-none-any.whl → 0.1.1a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. additory/__init__.py +58 -14
  2. additory/common/__init__.py +31 -147
  3. additory/common/column_selector.py +255 -0
  4. additory/common/distributions.py +286 -613
  5. additory/common/extractors.py +313 -0
  6. additory/common/knn_imputation.py +332 -0
  7. additory/common/result.py +380 -0
  8. additory/common/strategy_parser.py +243 -0
  9. additory/common/unit_conversions.py +338 -0
  10. additory/common/validation.py +283 -103
  11. additory/core/__init__.py +34 -22
  12. additory/core/backend.py +258 -0
  13. additory/core/config.py +177 -305
  14. additory/core/logging.py +230 -24
  15. additory/core/memory_manager.py +157 -495
  16. additory/expressions/__init__.py +2 -23
  17. additory/expressions/compiler.py +457 -0
  18. additory/expressions/engine.py +264 -487
  19. additory/expressions/integrity.py +179 -0
  20. additory/expressions/loader.py +263 -0
  21. additory/expressions/parser.py +363 -167
  22. additory/expressions/resolver.py +274 -0
  23. additory/functions/__init__.py +1 -0
  24. additory/functions/analyze/__init__.py +144 -0
  25. additory/functions/analyze/cardinality.py +58 -0
  26. additory/functions/analyze/correlations.py +66 -0
  27. additory/functions/analyze/distributions.py +53 -0
  28. additory/functions/analyze/duplicates.py +49 -0
  29. additory/functions/analyze/features.py +61 -0
  30. additory/functions/analyze/imputation.py +66 -0
  31. additory/functions/analyze/outliers.py +65 -0
  32. additory/functions/analyze/patterns.py +65 -0
  33. additory/functions/analyze/presets.py +72 -0
  34. additory/functions/analyze/quality.py +59 -0
  35. additory/functions/analyze/timeseries.py +53 -0
  36. additory/functions/analyze/types.py +45 -0
  37. additory/functions/expressions/__init__.py +161 -0
  38. additory/functions/snapshot/__init__.py +82 -0
  39. additory/functions/snapshot/filter.py +119 -0
  40. additory/functions/synthetic/__init__.py +113 -0
  41. additory/functions/synthetic/mode_detector.py +47 -0
  42. additory/functions/synthetic/strategies/__init__.py +1 -0
  43. additory/functions/synthetic/strategies/advanced.py +35 -0
  44. additory/functions/synthetic/strategies/augmentative.py +160 -0
  45. additory/functions/synthetic/strategies/generative.py +168 -0
  46. additory/functions/synthetic/strategies/presets.py +116 -0
  47. additory/functions/to/__init__.py +188 -0
  48. additory/functions/to/lookup.py +351 -0
  49. additory/functions/to/merge.py +189 -0
  50. additory/functions/to/sort.py +91 -0
  51. additory/functions/to/summarize.py +170 -0
  52. additory/functions/transform/__init__.py +140 -0
  53. additory/functions/transform/datetime.py +79 -0
  54. additory/functions/transform/extract.py +85 -0
  55. additory/functions/transform/harmonize.py +105 -0
  56. additory/functions/transform/knn.py +62 -0
  57. additory/functions/transform/onehotencoding.py +68 -0
  58. additory/functions/transform/transpose.py +42 -0
  59. additory-0.1.1a1.dist-info/METADATA +83 -0
  60. additory-0.1.1a1.dist-info/RECORD +62 -0
  61. additory/analysis/__init__.py +0 -48
  62. additory/analysis/cardinality.py +0 -126
  63. additory/analysis/correlations.py +0 -124
  64. additory/analysis/distributions.py +0 -376
  65. additory/analysis/quality.py +0 -158
  66. additory/analysis/scan.py +0 -400
  67. additory/common/backend.py +0 -371
  68. additory/common/column_utils.py +0 -191
  69. additory/common/exceptions.py +0 -62
  70. additory/common/lists.py +0 -229
  71. additory/common/patterns.py +0 -240
  72. additory/common/resolver.py +0 -567
  73. additory/common/sample_data.py +0 -182
  74. additory/core/ast_builder.py +0 -165
  75. additory/core/backends/__init__.py +0 -23
  76. additory/core/backends/arrow_bridge.py +0 -483
  77. additory/core/backends/cudf_bridge.py +0 -355
  78. additory/core/column_positioning.py +0 -358
  79. additory/core/compiler_polars.py +0 -166
  80. additory/core/enhanced_cache_manager.py +0 -1119
  81. additory/core/enhanced_matchers.py +0 -473
  82. additory/core/enhanced_version_manager.py +0 -325
  83. additory/core/executor.py +0 -59
  84. additory/core/integrity_manager.py +0 -477
  85. additory/core/loader.py +0 -190
  86. additory/core/namespace_manager.py +0 -657
  87. additory/core/parser.py +0 -176
  88. additory/core/polars_expression_engine.py +0 -601
  89. additory/core/registry.py +0 -177
  90. additory/core/sample_data_manager.py +0 -492
  91. additory/core/user_namespace.py +0 -751
  92. additory/core/validator.py +0 -27
  93. additory/dynamic_api.py +0 -352
  94. additory/expressions/proxy.py +0 -549
  95. additory/expressions/registry.py +0 -313
  96. additory/expressions/samples.py +0 -492
  97. additory/synthetic/__init__.py +0 -13
  98. additory/synthetic/column_name_resolver.py +0 -149
  99. additory/synthetic/deduce.py +0 -259
  100. additory/synthetic/distributions.py +0 -22
  101. additory/synthetic/forecast.py +0 -1132
  102. additory/synthetic/linked_list_parser.py +0 -415
  103. additory/synthetic/namespace_lookup.py +0 -129
  104. additory/synthetic/smote.py +0 -320
  105. additory/synthetic/strategies.py +0 -926
  106. additory/synthetic/synthesizer.py +0 -713
  107. additory/utilities/__init__.py +0 -53
  108. additory/utilities/encoding.py +0 -600
  109. additory/utilities/games.py +0 -300
  110. additory/utilities/keys.py +0 -8
  111. additory/utilities/lookup.py +0 -103
  112. additory/utilities/matchers.py +0 -216
  113. additory/utilities/resolvers.py +0 -286
  114. additory/utilities/settings.py +0 -167
  115. additory/utilities/units.py +0 -749
  116. additory/utilities/validators.py +0 -153
  117. additory-0.1.0a4.dist-info/METADATA +0 -311
  118. additory-0.1.0a4.dist-info/RECORD +0 -72
  119. additory-0.1.0a4.dist-info/licenses/LICENSE +0 -21
  120. {additory-0.1.0a4.dist-info → additory-0.1.1a1.dist-info}/WHEEL +0 -0
  121. {additory-0.1.0a4.dist-info → additory-0.1.1a1.dist-info}/top_level.txt +0 -0
additory/core/config.py CHANGED
@@ -1,342 +1,214 @@
1
- # config.py
2
- # Central configuration for additory engine
1
+ """
2
+ Global configuration management for Additory.
3
3
 
4
- import os
5
- import yaml
6
- from pathlib import Path
7
- from typing import Optional, Dict, Any
8
- from dataclasses import dataclass, field
9
-
10
- from .logging import log_info, log_warning
11
-
12
-
13
- # ------------------------------------------------------------
14
- # Expression Configuration
15
- # ------------------------------------------------------------
16
-
17
- @dataclass
18
- class ExpressionConfig:
19
- """Configuration for expression namespaces"""
20
-
21
- # Built-in expressions
22
- builtin_path: str = "reference/expressions_definitions/"
23
- builtin_remote_url: Optional[str] = None
24
-
25
- # User expressions
26
- user_path: str = "user_expressions/"
27
- user_remote_url: Optional[str] = None
28
-
29
- # Mode
30
- mode: str = "development" # "development" or "production"
31
-
32
- # Cache settings
33
- cache_enabled: bool = True
34
- cache_ttl: int = 3600 # seconds
35
-
36
- def is_production(self) -> bool:
37
- """Check if running in production mode"""
38
- return self.mode == "production"
39
-
40
- def get_builtin_source(self) -> str:
41
- """Get built-in expressions source (local or remote)"""
42
- if self.is_production() and self.builtin_remote_url:
43
- return self.builtin_remote_url
44
- return self.builtin_path
45
-
46
- def get_user_source(self) -> str:
47
- """Get user expressions source (local or remote)"""
48
- if self.user_remote_url:
49
- return self.user_remote_url
50
- return self.user_path
51
-
52
- def to_dict(self) -> Dict[str, Any]:
53
- """Convert to dictionary for serialization"""
54
- return {
55
- 'builtin_path': self.builtin_path,
56
- 'builtin_remote_url': self.builtin_remote_url,
57
- 'user_path': self.user_path,
58
- 'user_remote_url': self.user_remote_url,
59
- 'mode': self.mode,
60
- 'cache_enabled': self.cache_enabled,
61
- 'cache_ttl': self.cache_ttl,
62
- }
63
-
64
- @classmethod
65
- def from_dict(cls, data: Dict[str, Any]) -> 'ExpressionConfig':
66
- """Create from dictionary"""
67
- return cls(
68
- builtin_path=data.get('builtin_path', 'reference/expressions_definitions/'),
69
- builtin_remote_url=data.get('builtin_remote_url'),
70
- user_path=data.get('user_path', 'user_expressions/'),
71
- user_remote_url=data.get('user_remote_url'),
72
- mode=data.get('mode', 'development'),
73
- cache_enabled=data.get('cache_enabled', True),
74
- cache_ttl=data.get('cache_ttl', 3600),
75
- )
76
-
77
-
78
- # Global expression config instance
79
- _expression_config: Optional[ExpressionConfig] = None
80
-
81
-
82
- def get_expression_config() -> ExpressionConfig:
83
- """
84
- Get current expression configuration
85
-
86
- Returns:
87
- ExpressionConfig instance
88
- """
89
- global _expression_config
90
-
91
- if _expression_config is None:
92
- # Try to load from config file
93
- _expression_config = load_expression_config()
94
-
95
- return _expression_config
96
-
97
-
98
- def set_expression_config(config: ExpressionConfig):
99
- """
100
- Set expression configuration
101
-
102
- Args:
103
- config: ExpressionConfig instance
104
- """
105
- global _expression_config
106
- _expression_config = config
107
- log_info(f"[config] Expression config set: mode={config.mode}")
4
+ Manages only 2 global settings:
5
+ 1. Expressions folder - Path to custom expressions
6
+ 2. Default backend - Default DataFrame backend
108
7
 
8
+ Principle: Minimal configuration, maximum simplicity.
9
+ """
109
10
 
110
- def get_config_file_path() -> str:
111
- """
112
- Get path to configuration file
113
-
114
- Returns:
115
- Path to ~/.additory/config.yaml
116
- """
117
- home = Path.home()
118
- config_dir = home / '.additory'
119
- return str(config_dir / 'config.yaml')
11
+ import os
12
+ from pathlib import Path
13
+ from typing import Optional
120
14
 
121
15
 
122
- def load_expression_config() -> ExpressionConfig:
16
+ class Config:
123
17
  """
124
- Load expression configuration from file
18
+ Global configuration manager.
125
19
 
126
- Returns:
127
- ExpressionConfig instance (default if file doesn't exist)
20
+ Manages two settings:
21
+ - expressions_folder: Path to custom .add files
22
+ - default_backend: Default backend for operations
128
23
  """
129
- config_path = get_config_file_path()
130
24
 
131
- if not os.path.exists(config_path):
132
- log_info(f"[config] No config file found, using defaults")
133
- return ExpressionConfig()
25
+ def __init__(self):
26
+ """Initialize config with defaults."""
27
+ self.expressions_folder: Optional[str] = None
28
+ self.default_backend: str = 'polars'
29
+ self._custom_namespace: Optional[str] = None
134
30
 
135
- try:
136
- with open(config_path, 'r', encoding='utf-8') as f:
137
- data = yaml.safe_load(f)
31
+ def set_expressions_folder(self, path: str) -> None:
32
+ """
33
+ Set custom expressions folder.
138
34
 
139
- if not data or 'expressions' not in data:
140
- log_warning(f"[config] Invalid config file, using defaults")
141
- return ExpressionConfig()
35
+ Args:
36
+ path: Path to folder containing .add files
37
+
38
+ Raises:
39
+ ValueError: If path doesn't exist or is not a directory
40
+
41
+ Example:
42
+ config.set_expressions_folder('/path/to/my_expressions')
43
+ # Now expressions can be referenced as: my_expressions:bmi
44
+ """
45
+ # Validate path
46
+ if not os.path.exists(path):
47
+ raise ValueError(f"Expressions folder does not exist: {path}")
142
48
 
143
- expr_data = data['expressions']
144
- config = ExpressionConfig.from_dict(expr_data)
49
+ if not os.path.isdir(path):
50
+ raise ValueError(f"Expressions folder is not a directory: {path}")
145
51
 
146
- log_info(f"[config] Loaded expression config from {config_path}")
147
- log_info(f"[config] Mode: {config.mode}")
148
- log_info(f"[config] Built-in source: {config.get_builtin_source()}")
149
- log_info(f"[config] User source: {config.get_user_source()}")
52
+ # Set folder
53
+ self.expressions_folder = str(Path(path).resolve())
150
54
 
151
- return config
55
+ # Derive namespace from folder name
56
+ self._custom_namespace = derive_namespace_from_path(self.expressions_folder)
152
57
 
153
- except Exception as e:
154
- log_warning(f"[config] Failed to load config file: {e}")
155
- return ExpressionConfig()
156
-
157
-
158
- def save_expression_config(config: Optional[ExpressionConfig] = None):
159
- """
160
- Save expression configuration to file
161
-
162
- Args:
163
- config: ExpressionConfig to save (uses current if None)
164
- """
165
- if config is None:
166
- config = get_expression_config()
167
-
168
- config_path = get_config_file_path()
169
- config_dir = os.path.dirname(config_path)
170
-
171
- # Ensure directory exists
172
- os.makedirs(config_dir, exist_ok=True)
173
-
174
- # Prepare config data
175
- config_data = {
176
- 'expressions': config.to_dict()
177
- }
178
-
179
- try:
180
- with open(config_path, 'w', encoding='utf-8') as f:
181
- yaml.dump(config_data, f, default_flow_style=False, sort_keys=False)
58
+ # Log change
59
+ try:
60
+ from .logging import get_logger
61
+ logger = get_logger()
62
+ logger.info(
63
+ f"Expressions folder set to: {self.expressions_folder}",
64
+ details={'namespace': self._custom_namespace}
65
+ )
66
+ except:
67
+ pass # Ignore if logger not available
68
+
69
+ def get_expressions_folder(self) -> Optional[str]:
70
+ """
71
+ Get current expressions folder.
72
+
73
+ Returns:
74
+ Path to expressions folder or None
75
+ """
76
+ return self.expressions_folder
77
+
78
+ def get_custom_namespace(self) -> Optional[str]:
79
+ """
80
+ Get custom namespace name.
182
81
 
183
- log_info(f"[config] Saved expression config to {config_path}")
82
+ Returns:
83
+ Namespace name (folder name) or None
84
+
85
+ Example:
86
+ # If folder is '/path/to/my_expressions'
87
+ namespace = config.get_custom_namespace()
88
+ # Returns: 'my_expressions'
89
+ """
90
+ return self._custom_namespace
91
+
92
+ def set_default_backend(self, backend: str) -> None:
93
+ """
94
+ Set default backend.
184
95
 
185
- except Exception as e:
186
- log_warning(f"[config] Failed to save config file: {e}")
96
+ Args:
97
+ backend: Backend name ('polars', 'pandas', 'cudf')
98
+
99
+ Raises:
100
+ ValueError: If backend is not supported
101
+
102
+ Example:
103
+ config.set_default_backend('cudf') # For GPU users
104
+ """
105
+ valid_backends = ['polars', 'pandas', 'cudf']
106
+ if backend not in valid_backends:
107
+ raise ValueError(
108
+ f"Invalid backend '{backend}'. Must be one of: {valid_backends}"
109
+ )
110
+
111
+ self.default_backend = backend
112
+
113
+ # Log change
114
+ try:
115
+ from .logging import get_logger
116
+ logger = get_logger()
117
+ logger.info(f"Default backend set to: {backend}")
118
+ except:
119
+ pass # Ignore if logger not available
120
+
121
+ def get_default_backend(self) -> str:
122
+ """
123
+ Get default backend.
124
+
125
+ Returns:
126
+ Backend name ('polars', 'pandas', or 'cudf')
127
+ """
128
+ return self.default_backend
129
+
130
+ def reset(self) -> None:
131
+ """
132
+ Reset all settings to defaults.
133
+
134
+ Used for testing or to clear configuration.
135
+ """
136
+ self.expressions_folder = None
137
+ self.default_backend = 'polars'
138
+ self._custom_namespace = None
139
+
140
+ # Log reset
141
+ try:
142
+ from .logging import get_logger
143
+ logger = get_logger()
144
+ logger.info("Configuration reset to defaults")
145
+ except:
146
+ pass # Ignore if logger not available
187
147
 
188
148
 
189
- def set_production_mode(
190
- builtin_remote_url: str,
191
- user_remote_url: Optional[str] = None
192
- ):
193
- """
194
- Configure for production mode with remote URLs
195
-
196
- Args:
197
- builtin_remote_url: Remote URL for built-in expressions
198
- user_remote_url: Optional remote URL for user expressions
199
- """
200
- config = ExpressionConfig(
201
- builtin_remote_url=builtin_remote_url,
202
- user_remote_url=user_remote_url,
203
- mode="production"
204
- )
205
-
206
- set_expression_config(config)
207
- save_expression_config(config)
208
-
209
- log_info(f"[config] Configured for production mode")
210
- log_info(f"[config] Built-in URL: {builtin_remote_url}")
211
- if user_remote_url:
212
- log_info(f"[config] User URL: {user_remote_url}")
149
+ # Global config instance
150
+ _global_config: Optional[Config] = None
213
151
 
214
152
 
215
- def set_development_mode(
216
- builtin_path: str = "reference/expressions_definitions/",
217
- user_path: str = "user_expressions/"
218
- ):
219
- """
220
- Configure for development mode with local paths
221
-
222
- Args:
223
- builtin_path: Local path for built-in expressions
224
- user_path: Local path for user expressions
153
+ def get_config() -> Config:
225
154
  """
226
- config = ExpressionConfig(
227
- builtin_path=builtin_path,
228
- user_path=user_path,
229
- mode="development"
230
- )
155
+ Get the global config instance.
231
156
 
232
- set_expression_config(config)
233
- save_expression_config(config)
234
-
235
- log_info(f"[config] Configured for development mode")
236
- log_info(f"[config] Built-in path: {builtin_path}")
237
- log_info(f"[config] User path: {user_path}")
238
-
239
-
240
- # ------------------------------------------------------------
241
- # Engine version
242
- # ------------------------------------------------------------
243
-
244
- def get_engine_version():
245
- return "0.0.1"
246
-
247
-
248
- # ------------------------------------------------------------
249
- # Built‑in expression roots
250
- # ------------------------------------------------------------
251
- # These are optional. If you later ship built‑in formulas inside the package,
252
- # add their paths here.
253
-
254
- _BUILTIN_ROOTS = [] # e.g., ["builtin/expressions"]
255
-
256
- def get_builtin_roots():
257
- return _BUILTIN_ROOTS
258
-
259
-
260
- # ------------------------------------------------------------
261
- # Default version
262
- # ------------------------------------------------------------
263
-
264
- def get_default_version():
265
- """
266
- The version used when the user does not specify one.
267
- """
268
- return "v1"
269
-
270
-
271
- # ------------------------------------------------------------
272
- # User overrides
273
- # ------------------------------------------------------------
274
-
275
- _user_version_override = None
276
- _user_formula_root_override = None
277
- _custom_formula_path = None
278
-
279
-
280
- def get_user_version_override():
281
- """
282
- Returns a user‑set version override, if any.
283
- """
284
- return _user_version_override
285
-
286
-
287
- def set_user_version_override(v):
288
- """
289
- Allows the user to force a specific version globally.
290
- """
291
- global _user_version_override
292
- _user_version_override = v
293
-
294
-
295
- def get_user_formula_root_override():
296
- """
297
- Returns the folder where versioned expressions live.
157
+ Returns:
158
+ Global Config instance
159
+
298
160
  Example:
299
- expressions/
300
- v1/
301
- v2/
161
+ config = get_config()
162
+ folder = config.get_expressions_folder()
302
163
  """
303
- return _user_formula_root_override
164
+ global _global_config
165
+ if _global_config is None:
166
+ _global_config = Config()
167
+ return _global_config
304
168
 
305
169
 
306
- def set_user_formula_root_override(path):
170
+ def set_expressions_folder(path: str) -> None:
307
171
  """
308
- Sets the root folder for all versioned expressions.
172
+ Convenience function to set expressions folder.
173
+
174
+ Args:
175
+ path: Path to expressions folder
176
+
177
+ Example:
178
+ import additory
179
+ additory.add.set_expressions_folder('/path/to/my_expressions')
309
180
  """
310
- global _user_formula_root_override
311
- _user_formula_root_override = path
181
+ config = get_config()
182
+ config.set_expressions_folder(path)
312
183
 
313
184
 
314
- def get_custom_formula_path():
185
+ def set_default_backend(backend: str) -> None:
315
186
  """
316
- Returns a direct override path for a single formula file.
317
- If set, this bypasses versioning entirely.
187
+ Convenience function to set default backend.
188
+
189
+ Args:
190
+ backend: Backend name ('polars', 'pandas', 'cudf')
191
+
192
+ Example:
193
+ import additory
194
+ additory.add.set_default_backend('cudf')
318
195
  """
319
- return _custom_formula_path
196
+ config = get_config()
197
+ config.set_default_backend(backend)
320
198
 
321
199
 
322
- def set_custom_formula_path(path):
200
+ def derive_namespace_from_path(path: str) -> str:
323
201
  """
324
- Allows the user to point directly to a single .add file.
202
+ Derive namespace name from folder path.
203
+
204
+ Args:
205
+ path: Folder path
206
+
207
+ Returns:
208
+ Namespace name (folder name)
209
+
210
+ Example:
211
+ namespace = derive_namespace_from_path('/path/to/my_expressions')
212
+ # Returns: 'my_expressions'
325
213
  """
326
- global _custom_formula_path
327
- _custom_formula_path = path
328
-
329
-
330
- # backend preference setting
331
-
332
- _backend_preference: Optional[str] = None # "cpu", "gpu", or None
333
-
334
- def set_backend_preference(mode: Optional[str]):
335
- global _backend_preference
336
- if mode not in (None, "cpu", "gpu"):
337
- raise ValueError("backend must be 'cpu', 'gpu', or None")
338
- _backend_preference = mode
339
-
340
- def get_backend_preference() -> Optional[str]:
341
- return _backend_preference
342
-
214
+ return Path(path).name