additory 0.1.0a4__py3-none-any.whl → 0.1.1a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. additory/__init__.py +58 -14
  2. additory/common/__init__.py +31 -147
  3. additory/common/column_selector.py +255 -0
  4. additory/common/distributions.py +286 -613
  5. additory/common/extractors.py +313 -0
  6. additory/common/knn_imputation.py +332 -0
  7. additory/common/result.py +380 -0
  8. additory/common/strategy_parser.py +243 -0
  9. additory/common/unit_conversions.py +338 -0
  10. additory/common/validation.py +283 -103
  11. additory/core/__init__.py +34 -22
  12. additory/core/backend.py +258 -0
  13. additory/core/config.py +177 -305
  14. additory/core/logging.py +230 -24
  15. additory/core/memory_manager.py +157 -495
  16. additory/expressions/__init__.py +2 -23
  17. additory/expressions/compiler.py +457 -0
  18. additory/expressions/engine.py +264 -487
  19. additory/expressions/integrity.py +179 -0
  20. additory/expressions/loader.py +263 -0
  21. additory/expressions/parser.py +363 -167
  22. additory/expressions/resolver.py +274 -0
  23. additory/functions/__init__.py +1 -0
  24. additory/functions/analyze/__init__.py +144 -0
  25. additory/functions/analyze/cardinality.py +58 -0
  26. additory/functions/analyze/correlations.py +66 -0
  27. additory/functions/analyze/distributions.py +53 -0
  28. additory/functions/analyze/duplicates.py +49 -0
  29. additory/functions/analyze/features.py +61 -0
  30. additory/functions/analyze/imputation.py +66 -0
  31. additory/functions/analyze/outliers.py +65 -0
  32. additory/functions/analyze/patterns.py +65 -0
  33. additory/functions/analyze/presets.py +72 -0
  34. additory/functions/analyze/quality.py +59 -0
  35. additory/functions/analyze/timeseries.py +53 -0
  36. additory/functions/analyze/types.py +45 -0
  37. additory/functions/expressions/__init__.py +161 -0
  38. additory/functions/snapshot/__init__.py +82 -0
  39. additory/functions/snapshot/filter.py +119 -0
  40. additory/functions/synthetic/__init__.py +113 -0
  41. additory/functions/synthetic/mode_detector.py +47 -0
  42. additory/functions/synthetic/strategies/__init__.py +1 -0
  43. additory/functions/synthetic/strategies/advanced.py +35 -0
  44. additory/functions/synthetic/strategies/augmentative.py +160 -0
  45. additory/functions/synthetic/strategies/generative.py +168 -0
  46. additory/functions/synthetic/strategies/presets.py +116 -0
  47. additory/functions/to/__init__.py +188 -0
  48. additory/functions/to/lookup.py +351 -0
  49. additory/functions/to/merge.py +189 -0
  50. additory/functions/to/sort.py +91 -0
  51. additory/functions/to/summarize.py +170 -0
  52. additory/functions/transform/__init__.py +140 -0
  53. additory/functions/transform/datetime.py +79 -0
  54. additory/functions/transform/extract.py +85 -0
  55. additory/functions/transform/harmonize.py +105 -0
  56. additory/functions/transform/knn.py +62 -0
  57. additory/functions/transform/onehotencoding.py +68 -0
  58. additory/functions/transform/transpose.py +42 -0
  59. additory-0.1.1a1.dist-info/METADATA +83 -0
  60. additory-0.1.1a1.dist-info/RECORD +62 -0
  61. additory/analysis/__init__.py +0 -48
  62. additory/analysis/cardinality.py +0 -126
  63. additory/analysis/correlations.py +0 -124
  64. additory/analysis/distributions.py +0 -376
  65. additory/analysis/quality.py +0 -158
  66. additory/analysis/scan.py +0 -400
  67. additory/common/backend.py +0 -371
  68. additory/common/column_utils.py +0 -191
  69. additory/common/exceptions.py +0 -62
  70. additory/common/lists.py +0 -229
  71. additory/common/patterns.py +0 -240
  72. additory/common/resolver.py +0 -567
  73. additory/common/sample_data.py +0 -182
  74. additory/core/ast_builder.py +0 -165
  75. additory/core/backends/__init__.py +0 -23
  76. additory/core/backends/arrow_bridge.py +0 -483
  77. additory/core/backends/cudf_bridge.py +0 -355
  78. additory/core/column_positioning.py +0 -358
  79. additory/core/compiler_polars.py +0 -166
  80. additory/core/enhanced_cache_manager.py +0 -1119
  81. additory/core/enhanced_matchers.py +0 -473
  82. additory/core/enhanced_version_manager.py +0 -325
  83. additory/core/executor.py +0 -59
  84. additory/core/integrity_manager.py +0 -477
  85. additory/core/loader.py +0 -190
  86. additory/core/namespace_manager.py +0 -657
  87. additory/core/parser.py +0 -176
  88. additory/core/polars_expression_engine.py +0 -601
  89. additory/core/registry.py +0 -177
  90. additory/core/sample_data_manager.py +0 -492
  91. additory/core/user_namespace.py +0 -751
  92. additory/core/validator.py +0 -27
  93. additory/dynamic_api.py +0 -352
  94. additory/expressions/proxy.py +0 -549
  95. additory/expressions/registry.py +0 -313
  96. additory/expressions/samples.py +0 -492
  97. additory/synthetic/__init__.py +0 -13
  98. additory/synthetic/column_name_resolver.py +0 -149
  99. additory/synthetic/deduce.py +0 -259
  100. additory/synthetic/distributions.py +0 -22
  101. additory/synthetic/forecast.py +0 -1132
  102. additory/synthetic/linked_list_parser.py +0 -415
  103. additory/synthetic/namespace_lookup.py +0 -129
  104. additory/synthetic/smote.py +0 -320
  105. additory/synthetic/strategies.py +0 -926
  106. additory/synthetic/synthesizer.py +0 -713
  107. additory/utilities/__init__.py +0 -53
  108. additory/utilities/encoding.py +0 -600
  109. additory/utilities/games.py +0 -300
  110. additory/utilities/keys.py +0 -8
  111. additory/utilities/lookup.py +0 -103
  112. additory/utilities/matchers.py +0 -216
  113. additory/utilities/resolvers.py +0 -286
  114. additory/utilities/settings.py +0 -167
  115. additory/utilities/units.py +0 -749
  116. additory/utilities/validators.py +0 -153
  117. additory-0.1.0a4.dist-info/METADATA +0 -311
  118. additory-0.1.0a4.dist-info/RECORD +0 -72
  119. additory-0.1.0a4.dist-info/licenses/LICENSE +0 -21
  120. {additory-0.1.0a4.dist-info → additory-0.1.1a1.dist-info}/WHEEL +0 -0
  121. {additory-0.1.0a4.dist-info → additory-0.1.1a1.dist-info}/top_level.txt +0 -0
@@ -1,313 +0,0 @@
1
- # registry.py
2
- # Versioned registry for additory with user namespace support
3
-
4
- from dataclasses import dataclass
5
- import os
6
- import json
7
- from typing import Optional
8
-
9
- from additory.core.logging import log_info, log_warning
10
- from additory.core.config import (
11
- get_user_formula_root_override,
12
- get_custom_formula_path,
13
- get_default_version,
14
- get_user_version_override,
15
- )
16
-
17
- from additory.core.loader import load_expression
18
- from additory.core.parser import parse_expression
19
-
20
-
21
- # ------------------------------------------------------------
22
- # Resolved Formula Object
23
- # ------------------------------------------------------------
24
-
25
- @dataclass
26
- class ResolvedFormula:
27
- source: str
28
- version: str
29
- mode: str = "local"
30
- namespace: str = "builtin" # NEW: "builtin" or "user"
31
- ast: Optional[dict] = None
32
- sample_clean: Optional[dict] = None
33
- sample_unclean: Optional[dict] = None
34
-
35
-
36
- # ------------------------------------------------------------
37
- # User Namespace Support
38
- # ------------------------------------------------------------
39
-
40
- _user_namespace_manager = None
41
- _enable_user_namespace = False
42
- _user_expression_priority = "user_first" # or "builtin_first", "user_only", "builtin_only"
43
-
44
-
45
- def set_user_namespace_enabled(enabled: bool):
46
- """Enable or disable user namespace for expressions."""
47
- global _enable_user_namespace
48
- _enable_user_namespace = enabled
49
- log_info(f"[registry] User namespace {'enabled' if enabled else 'disabled'}")
50
-
51
-
52
- def set_user_expression_priority(priority: str):
53
- """
54
- Set user expression priority.
55
-
56
- Options:
57
- - "user_first": Check user expressions first (default)
58
- - "builtin_first": Check built-in expressions first
59
- - "user_only": User expressions only
60
- - "builtin_only": Built-in expressions only
61
- """
62
- global _user_expression_priority
63
- if priority not in ["user_first", "builtin_first", "user_only", "builtin_only"]:
64
- raise ValueError(f"Invalid priority: {priority}")
65
- _user_expression_priority = priority
66
- log_info(f"[registry] User expression priority set to: {priority}")
67
-
68
-
69
- def get_user_namespace_manager():
70
- """Get or create user namespace manager."""
71
- global _user_namespace_manager
72
-
73
- if _user_namespace_manager is None:
74
- try:
75
- from additory.core.user_namespace import get_user_namespace_manager
76
- _user_namespace_manager = get_user_namespace_manager()
77
- except ImportError:
78
- log_warning("[registry] User namespace module not available")
79
-
80
- return _user_namespace_manager
81
-
82
-
83
- def _resolve_from_user_namespace(formula_name: str, version: Optional[str] = None) -> Optional[ResolvedFormula]:
84
- """
85
- Resolve formula from user namespace.
86
-
87
- Args:
88
- formula_name: Name of the formula
89
- version: Optional version (not used for user expressions yet)
90
-
91
- Returns:
92
- ResolvedFormula if found, None otherwise
93
- """
94
- if not _enable_user_namespace:
95
- return None
96
-
97
- manager = get_user_namespace_manager()
98
- if not manager or not manager.is_initialized():
99
- return None
100
-
101
- try:
102
- # Get user expressions
103
- user_expressions = manager.get_user_expressions()
104
-
105
- if formula_name in user_expressions:
106
- expr_path = user_expressions[formula_name]
107
- log_info(f"[registry] Found '{formula_name}' in user namespace: {expr_path}")
108
-
109
- resolved = ResolvedFormula(
110
- source=expr_path,
111
- version=version or "user",
112
- mode="local",
113
- namespace="user"
114
- )
115
-
116
- # Load and parse
117
- text = load_expression(resolved, namespace="user")
118
- parsed = parse_expression(text)
119
-
120
- resolved.ast = parsed.ast
121
- resolved.sample_clean = parsed.sample_clean
122
- resolved.sample_unclean = parsed.sample_unclean
123
-
124
- return resolved
125
-
126
- except Exception as e:
127
- log_warning(f"[registry] Error resolving from user namespace: {e}")
128
-
129
- return None
130
-
131
-
132
- # ------------------------------------------------------------
133
- # Manifest loading
134
- # ------------------------------------------------------------
135
-
136
- def _load_manifest(root: str, version: str):
137
- manifest_path = os.path.join(root, version, "manifest.json")
138
-
139
- if not os.path.exists(manifest_path):
140
- raise FileNotFoundError(f"Manifest not found for version {version}")
141
-
142
- with open(manifest_path, "r", encoding="utf-8") as f:
143
- return json.load(f)
144
-
145
-
146
- # ------------------------------------------------------------
147
- # Resolve formula filename
148
- # ------------------------------------------------------------
149
-
150
- def _resolve_filename(formula_name: str, manifest: dict):
151
- if formula_name not in manifest:
152
- raise FileNotFoundError(f"Formula '{formula_name}' not found in manifest")
153
-
154
- return manifest[formula_name]
155
-
156
-
157
- # ------------------------------------------------------------
158
- # Main resolver (attaches AST + sample data)
159
- # ------------------------------------------------------------
160
-
161
- def resolve_formula(formula_name: str, namespace=None, version=None):
162
- """
163
- Versioned resolver with user namespace support:
164
- 1. Custom override path
165
- 2. User namespace (if enabled and priority allows)
166
- 3. User-set formula root
167
- 4. Version override
168
- 5. Default version
169
- 6. Manifest lookup
170
- 7. Load + parse expression
171
- 8. Attach AST + sample data
172
- """
173
-
174
- # --------------------------------------------------------
175
- # 1. Custom override (FIXED)
176
- # --------------------------------------------------------
177
- custom = get_custom_formula_path()
178
- if custom:
179
- log_info(f"[registry] Using custom formula path: {custom}")
180
-
181
- resolved = ResolvedFormula(source=custom, version="custom", namespace="custom")
182
-
183
- text = load_expression(resolved, namespace="custom")
184
- parsed = parse_expression(text)
185
-
186
- resolved.ast = parsed.ast
187
- resolved.sample_clean = parsed.sample_clean
188
- resolved.sample_unclean = parsed.sample_unclean
189
-
190
- return resolved
191
-
192
- # --------------------------------------------------------
193
- # 2. User namespace (based on priority)
194
- # --------------------------------------------------------
195
- if _enable_user_namespace:
196
- # user_only mode: only check user namespace
197
- if _user_expression_priority == "user_only":
198
- user_resolved = _resolve_from_user_namespace(formula_name, version)
199
- if user_resolved:
200
- return user_resolved
201
- raise FileNotFoundError(f"Formula '{formula_name}' not found in user namespace")
202
-
203
- # user_first mode: check user namespace first
204
- elif _user_expression_priority == "user_first":
205
- user_resolved = _resolve_from_user_namespace(formula_name, version)
206
- if user_resolved:
207
- return user_resolved
208
- # Fall through to built-in
209
-
210
- # builtin_only mode: skip user namespace
211
- elif _user_expression_priority == "builtin_only":
212
- pass # Skip user namespace
213
-
214
- # builtin_first mode: check built-in first, user as fallback
215
- # (handled after built-in resolution)
216
-
217
- # --------------------------------------------------------
218
- # 3. Root folder
219
- # --------------------------------------------------------
220
- root = get_user_formula_root_override()
221
- if not root:
222
- raise ValueError("Formula root not set. Use add.set_formula_root(path).")
223
-
224
- # --------------------------------------------------------
225
- # 4. Version override
226
- # --------------------------------------------------------
227
- version = (
228
- version
229
- or get_user_version_override()
230
- or get_default_version()
231
- )
232
-
233
- # --------------------------------------------------------
234
- # 5. Load manifest
235
- # --------------------------------------------------------
236
- try:
237
- manifest = _load_manifest(root, version)
238
- except FileNotFoundError as e:
239
- # If builtin_first mode, try user namespace as fallback
240
- if _enable_user_namespace and _user_expression_priority == "builtin_first":
241
- user_resolved = _resolve_from_user_namespace(formula_name, version)
242
- if user_resolved:
243
- return user_resolved
244
- raise e
245
-
246
- # --------------------------------------------------------
247
- # 6. Resolve filename
248
- # --------------------------------------------------------
249
- try:
250
- filename = _resolve_filename(formula_name, manifest)
251
- except FileNotFoundError as e:
252
- # If builtin_first mode, try user namespace as fallback
253
- if _enable_user_namespace and _user_expression_priority == "builtin_first":
254
- user_resolved = _resolve_from_user_namespace(formula_name, version)
255
- if user_resolved:
256
- return user_resolved
257
- raise e
258
-
259
- # --------------------------------------------------------
260
- # 7. Build full path
261
- # --------------------------------------------------------
262
- full_path = os.path.join(root, version, filename)
263
-
264
- if not os.path.exists(full_path):
265
- # If builtin_first mode, try user namespace as fallback
266
- if _enable_user_namespace and _user_expression_priority == "builtin_first":
267
- user_resolved = _resolve_from_user_namespace(formula_name, version)
268
- if user_resolved:
269
- return user_resolved
270
- raise FileNotFoundError(f"Expression file not found: {full_path}")
271
-
272
- resolved = ResolvedFormula(source=full_path, version=version, namespace="builtin")
273
-
274
- # --------------------------------------------------------
275
- # 8. Load + parse + attach AST + sample data
276
- # --------------------------------------------------------
277
- try:
278
- text = load_expression(resolved, namespace="builtin")
279
- parsed = parse_expression(text)
280
-
281
- resolved.ast = parsed.ast
282
- resolved.sample_clean = parsed.sample_clean
283
- resolved.sample_unclean = parsed.sample_unclean
284
-
285
- if resolved.ast is None:
286
- log_warning(f"[registry] No AST parsed for '{formula_name}'")
287
-
288
- except Exception as e:
289
- log_warning(f"[registry] Failed to load/parse '{formula_name}': {e}")
290
-
291
- return resolved
292
-
293
-
294
- # ------------------------------------------------------------
295
- # Public setters
296
- # ------------------------------------------------------------
297
-
298
- def set_formula_root(path: str):
299
- from additory.core.config import set_user_formula_root_override
300
- set_user_formula_root_override(path)
301
- log_info(f"[registry] Formula root set to: {path}")
302
-
303
-
304
- def set_formula_version(v: str):
305
- from additory.core.config import set_user_version_override
306
- set_user_version_override(v)
307
- log_info(f"[registry] Version override set to: {v}")
308
-
309
-
310
- def set_custom_formula_path(path: str):
311
- from additory.core.config import set_custom_formula_path as set_path
312
- set_path(path)
313
- log_info(f"[registry] Custom formula path set to: {path}")