additory 0.1.0a4__py3-none-any.whl → 0.1.1a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. additory/__init__.py +58 -14
  2. additory/common/__init__.py +31 -147
  3. additory/common/column_selector.py +255 -0
  4. additory/common/distributions.py +286 -613
  5. additory/common/extractors.py +313 -0
  6. additory/common/knn_imputation.py +332 -0
  7. additory/common/result.py +380 -0
  8. additory/common/strategy_parser.py +243 -0
  9. additory/common/unit_conversions.py +338 -0
  10. additory/common/validation.py +283 -103
  11. additory/core/__init__.py +34 -22
  12. additory/core/backend.py +258 -0
  13. additory/core/config.py +177 -305
  14. additory/core/logging.py +230 -24
  15. additory/core/memory_manager.py +157 -495
  16. additory/expressions/__init__.py +2 -23
  17. additory/expressions/compiler.py +457 -0
  18. additory/expressions/engine.py +264 -487
  19. additory/expressions/integrity.py +179 -0
  20. additory/expressions/loader.py +263 -0
  21. additory/expressions/parser.py +363 -167
  22. additory/expressions/resolver.py +274 -0
  23. additory/functions/__init__.py +1 -0
  24. additory/functions/analyze/__init__.py +144 -0
  25. additory/functions/analyze/cardinality.py +58 -0
  26. additory/functions/analyze/correlations.py +66 -0
  27. additory/functions/analyze/distributions.py +53 -0
  28. additory/functions/analyze/duplicates.py +49 -0
  29. additory/functions/analyze/features.py +61 -0
  30. additory/functions/analyze/imputation.py +66 -0
  31. additory/functions/analyze/outliers.py +65 -0
  32. additory/functions/analyze/patterns.py +65 -0
  33. additory/functions/analyze/presets.py +72 -0
  34. additory/functions/analyze/quality.py +59 -0
  35. additory/functions/analyze/timeseries.py +53 -0
  36. additory/functions/analyze/types.py +45 -0
  37. additory/functions/expressions/__init__.py +161 -0
  38. additory/functions/snapshot/__init__.py +82 -0
  39. additory/functions/snapshot/filter.py +119 -0
  40. additory/functions/synthetic/__init__.py +113 -0
  41. additory/functions/synthetic/mode_detector.py +47 -0
  42. additory/functions/synthetic/strategies/__init__.py +1 -0
  43. additory/functions/synthetic/strategies/advanced.py +35 -0
  44. additory/functions/synthetic/strategies/augmentative.py +160 -0
  45. additory/functions/synthetic/strategies/generative.py +168 -0
  46. additory/functions/synthetic/strategies/presets.py +116 -0
  47. additory/functions/to/__init__.py +188 -0
  48. additory/functions/to/lookup.py +351 -0
  49. additory/functions/to/merge.py +189 -0
  50. additory/functions/to/sort.py +91 -0
  51. additory/functions/to/summarize.py +170 -0
  52. additory/functions/transform/__init__.py +140 -0
  53. additory/functions/transform/datetime.py +79 -0
  54. additory/functions/transform/extract.py +85 -0
  55. additory/functions/transform/harmonize.py +105 -0
  56. additory/functions/transform/knn.py +62 -0
  57. additory/functions/transform/onehotencoding.py +68 -0
  58. additory/functions/transform/transpose.py +42 -0
  59. additory-0.1.1a1.dist-info/METADATA +83 -0
  60. additory-0.1.1a1.dist-info/RECORD +62 -0
  61. additory/analysis/__init__.py +0 -48
  62. additory/analysis/cardinality.py +0 -126
  63. additory/analysis/correlations.py +0 -124
  64. additory/analysis/distributions.py +0 -376
  65. additory/analysis/quality.py +0 -158
  66. additory/analysis/scan.py +0 -400
  67. additory/common/backend.py +0 -371
  68. additory/common/column_utils.py +0 -191
  69. additory/common/exceptions.py +0 -62
  70. additory/common/lists.py +0 -229
  71. additory/common/patterns.py +0 -240
  72. additory/common/resolver.py +0 -567
  73. additory/common/sample_data.py +0 -182
  74. additory/core/ast_builder.py +0 -165
  75. additory/core/backends/__init__.py +0 -23
  76. additory/core/backends/arrow_bridge.py +0 -483
  77. additory/core/backends/cudf_bridge.py +0 -355
  78. additory/core/column_positioning.py +0 -358
  79. additory/core/compiler_polars.py +0 -166
  80. additory/core/enhanced_cache_manager.py +0 -1119
  81. additory/core/enhanced_matchers.py +0 -473
  82. additory/core/enhanced_version_manager.py +0 -325
  83. additory/core/executor.py +0 -59
  84. additory/core/integrity_manager.py +0 -477
  85. additory/core/loader.py +0 -190
  86. additory/core/namespace_manager.py +0 -657
  87. additory/core/parser.py +0 -176
  88. additory/core/polars_expression_engine.py +0 -601
  89. additory/core/registry.py +0 -177
  90. additory/core/sample_data_manager.py +0 -492
  91. additory/core/user_namespace.py +0 -751
  92. additory/core/validator.py +0 -27
  93. additory/dynamic_api.py +0 -352
  94. additory/expressions/proxy.py +0 -549
  95. additory/expressions/registry.py +0 -313
  96. additory/expressions/samples.py +0 -492
  97. additory/synthetic/__init__.py +0 -13
  98. additory/synthetic/column_name_resolver.py +0 -149
  99. additory/synthetic/deduce.py +0 -259
  100. additory/synthetic/distributions.py +0 -22
  101. additory/synthetic/forecast.py +0 -1132
  102. additory/synthetic/linked_list_parser.py +0 -415
  103. additory/synthetic/namespace_lookup.py +0 -129
  104. additory/synthetic/smote.py +0 -320
  105. additory/synthetic/strategies.py +0 -926
  106. additory/synthetic/synthesizer.py +0 -713
  107. additory/utilities/__init__.py +0 -53
  108. additory/utilities/encoding.py +0 -600
  109. additory/utilities/games.py +0 -300
  110. additory/utilities/keys.py +0 -8
  111. additory/utilities/lookup.py +0 -103
  112. additory/utilities/matchers.py +0 -216
  113. additory/utilities/resolvers.py +0 -286
  114. additory/utilities/settings.py +0 -167
  115. additory/utilities/units.py +0 -749
  116. additory/utilities/validators.py +0 -153
  117. additory-0.1.0a4.dist-info/METADATA +0 -311
  118. additory-0.1.0a4.dist-info/RECORD +0 -72
  119. additory-0.1.0a4.dist-info/licenses/LICENSE +0 -21
  120. {additory-0.1.0a4.dist-info → additory-0.1.1a1.dist-info}/WHEEL +0 -0
  121. {additory-0.1.0a4.dist-info → additory-0.1.1a1.dist-info}/top_level.txt +0 -0
@@ -1,477 +0,0 @@
1
- # integrity_manager.py
2
- # OS timestamp-based integrity management for additory expressions
3
-
4
- import os
5
- import hashlib
6
- import yaml
7
- import platform
8
- from datetime import datetime, timezone
9
- from typing import Tuple, Optional
10
- from dataclasses import dataclass
11
-
12
- from .logging import log_info, log_warning
13
-
14
-
15
- @dataclass
16
- class IntegrityInfo:
17
- """Integrity information for an expression file"""
18
- hash: str
19
- algorithm: str
20
- generated_at: str
21
- salt_source: str
22
-
23
-
24
- class SecurityError(Exception):
25
- """Raised when security validation fails"""
26
- pass
27
-
28
-
29
- class IntegrityManager:
30
- """OS timestamp-based integrity management with SHA256 hashing"""
31
-
32
- def __init__(self):
33
- self.algorithm = "sha256"
34
- self.salt_prefix = "additory_expr"
35
-
36
- def get_file_creation_salt(self, file_path: str) -> str:
37
- """
38
- Generate salt from OS file creation timestamp
39
-
40
- Args:
41
- file_path: Path to the file
42
-
43
- Returns:
44
- Salt string based on file creation time
45
-
46
- Raises:
47
- FileNotFoundError: If file doesn't exist
48
- OSError: If unable to get file stats
49
- """
50
- if not os.path.exists(file_path):
51
- raise FileNotFoundError(f"File not found: {file_path}")
52
-
53
- try:
54
- stat_info = os.stat(file_path)
55
-
56
- # Get creation time (cross-platform)
57
- if platform.system() == "Windows":
58
- # Windows has creation time
59
- creation_time = stat_info.st_ctime
60
- elif hasattr(stat_info, 'st_birthtime'):
61
- # macOS has birth time
62
- creation_time = stat_info.st_birthtime
63
- else:
64
- # Linux uses change time as approximation
65
- creation_time = stat_info.st_ctime
66
-
67
- # Convert to readable format for salt
68
- dt = datetime.fromtimestamp(creation_time)
69
- timestamp_str = dt.strftime('%Y%m%d_%H%M%S')
70
-
71
- salt = f"{self.salt_prefix}_{timestamp_str}"
72
- log_info(f"[integrity] Generated salt for {os.path.basename(file_path)}: {salt}")
73
-
74
- return salt
75
-
76
- except OSError as e:
77
- raise OSError(f"Failed to get file stats for {file_path}: {e}")
78
-
79
- def generate_integrity_hash(self, file_path: str) -> Tuple[str, str]:
80
- """
81
- Generate integrity hash using OS timestamp as salt
82
-
83
- Args:
84
- file_path: Path to the .add file
85
-
86
- Returns:
87
- Tuple of (hash_value, salt_used)
88
-
89
- Raises:
90
- FileNotFoundError: If file doesn't exist
91
- SecurityError: If hash generation fails
92
- """
93
- try:
94
- # Read and parse content
95
- with open(file_path, 'r', encoding='utf-8') as f:
96
- content = f.read()
97
-
98
- # Parse YAML to remove integrity section if present
99
- try:
100
- data = yaml.safe_load(content)
101
- except yaml.YAMLError as e:
102
- raise SecurityError(f"Invalid YAML in {file_path}: {e}")
103
-
104
- # Remove integrity section if present
105
- if '_integrity' in data:
106
- del data['_integrity']
107
-
108
- # Create canonical content representation
109
- canonical_content = yaml.dump(data, sort_keys=True,
110
- default_flow_style=False,
111
- allow_unicode=True)
112
-
113
- # Generate salt and hash
114
- salt = self.get_file_creation_salt(file_path)
115
- salted_content = f"{salt}:{canonical_content}"
116
-
117
- # Create hash
118
- hash_obj = hashlib.sha256(salted_content.encode('utf-8'))
119
- hash_value = f"{self.algorithm}:{hash_obj.hexdigest()}"
120
-
121
- log_info(f"[integrity] Generated hash for {os.path.basename(file_path)}")
122
-
123
- return hash_value, salt
124
-
125
- except Exception as e:
126
- raise SecurityError(f"Failed to generate integrity hash for {file_path}: {e}")
127
-
128
- def validate_integrity(self, file_path: str) -> bool:
129
- """
130
- Validate file integrity against stored hash
131
-
132
- Args:
133
- file_path: Path to the .add file
134
-
135
- Returns:
136
- True if integrity is valid
137
-
138
- Raises:
139
- SecurityError: If integrity validation fails
140
- FileNotFoundError: If file doesn't exist
141
- """
142
- if not os.path.exists(file_path):
143
- raise FileNotFoundError(f"File not found: {file_path}")
144
-
145
- try:
146
- # Read and parse content
147
- with open(file_path, 'r', encoding='utf-8') as f:
148
- content = f.read()
149
-
150
- try:
151
- data = yaml.safe_load(content)
152
- except yaml.YAMLError as e:
153
- raise SecurityError(f"Invalid YAML in {file_path}: {e}")
154
-
155
- # Check integrity section
156
- if '_integrity' not in data:
157
- raise SecurityError(f"Missing integrity hash in {file_path} - file may be corrupted")
158
-
159
- integrity_info = data['_integrity']
160
-
161
- # Validate integrity section structure
162
- required_fields = ['hash', 'algorithm']
163
- for field in required_fields:
164
- if field not in integrity_info:
165
- raise SecurityError(f"Missing '{field}' in integrity section of {file_path}")
166
-
167
- stored_hash = integrity_info['hash']
168
- stored_algorithm = integrity_info['algorithm']
169
-
170
- # Verify algorithm matches
171
- if stored_algorithm != self.algorithm:
172
- raise SecurityError(f"Algorithm mismatch in {file_path}: expected {self.algorithm}, got {stored_algorithm}")
173
-
174
- # Get salt to use for validation
175
- if 'original_salt' in integrity_info:
176
- # Use stored original salt (preferred method)
177
- salt = integrity_info['original_salt']
178
- else:
179
- # Fallback to current file creation salt
180
- salt = self.get_file_creation_salt(file_path)
181
-
182
- # Recalculate hash
183
- del data['_integrity']
184
- canonical_content = yaml.dump(data, sort_keys=True,
185
- default_flow_style=False,
186
- allow_unicode=True)
187
-
188
- salted_content = f"{salt}:{canonical_content}"
189
- hash_obj = hashlib.sha256(salted_content.encode('utf-8'))
190
- calculated_hash = f"{self.algorithm}:{hash_obj.hexdigest()}"
191
-
192
- # Compare hashes
193
- if stored_hash != calculated_hash:
194
- raise SecurityError(f"Integrity check failed for {file_path} - file has been tampered with")
195
-
196
- log_info(f"[integrity] Integrity validated for {os.path.basename(file_path)}")
197
- return True
198
-
199
- except SecurityError:
200
- # Re-raise security errors as-is
201
- raise
202
- except Exception as e:
203
- raise SecurityError(f"Integrity validation failed for {file_path}: {e}")
204
-
205
- def add_integrity_hash(self, file_path: str) -> bool:
206
- """
207
- Add integrity hash to .add file
208
-
209
- Args:
210
- file_path: Path to the .add file
211
-
212
- Returns:
213
- True if hash was added successfully
214
-
215
- Raises:
216
- SecurityError: If hash addition fails
217
- FileNotFoundError: If file doesn't exist
218
- """
219
- if not os.path.exists(file_path):
220
- raise FileNotFoundError(f"File not found: {file_path}")
221
-
222
- try:
223
- # Get original creation salt before any modifications
224
- original_salt = self.get_file_creation_salt(file_path)
225
-
226
- # Read current content
227
- with open(file_path, 'r', encoding='utf-8') as f:
228
- content = f.read()
229
-
230
- try:
231
- data = yaml.safe_load(content)
232
- except yaml.YAMLError as e:
233
- raise SecurityError(f"Invalid YAML in {file_path}: {e}")
234
-
235
- # Remove integrity section if present for hash calculation
236
- if '_integrity' in data:
237
- del data['_integrity']
238
-
239
- # Create canonical content representation
240
- canonical_content = yaml.dump(data, sort_keys=True,
241
- default_flow_style=False,
242
- allow_unicode=True)
243
-
244
- # Generate hash using original salt
245
- salted_content = f"{original_salt}:{canonical_content}"
246
- hash_obj = hashlib.sha256(salted_content.encode('utf-8'))
247
- hash_value = f"{self.algorithm}:{hash_obj.hexdigest()}"
248
-
249
- # Add integrity section with original salt stored
250
- data['_integrity'] = {
251
- 'hash': hash_value,
252
- 'algorithm': self.algorithm,
253
- 'generated_at': datetime.now(timezone.utc).isoformat(),
254
- 'salt_source': 'os_creation_time',
255
- 'original_salt': original_salt # Store the salt used
256
- }
257
-
258
- # Write back to file
259
- with open(file_path, 'w', encoding='utf-8') as f:
260
- yaml.dump(data, f, default_flow_style=False, sort_keys=False, allow_unicode=True)
261
-
262
- log_info(f"[integrity] Added integrity hash to {os.path.basename(file_path)}")
263
- return True
264
-
265
- except Exception as e:
266
- raise SecurityError(f"Failed to add integrity hash to {file_path}: {e}")
267
-
268
- def remove_integrity_hash(self, file_path: str) -> bool:
269
- """
270
- Remove integrity hash from .add file (for testing purposes)
271
-
272
- Args:
273
- file_path: Path to the .add file
274
-
275
- Returns:
276
- True if hash was removed successfully
277
- """
278
- if not os.path.exists(file_path):
279
- raise FileNotFoundError(f"File not found: {file_path}")
280
-
281
- try:
282
- # Read current content
283
- with open(file_path, 'r', encoding='utf-8') as f:
284
- content = f.read()
285
-
286
- data = yaml.safe_load(content)
287
-
288
- # Remove integrity section if present
289
- if '_integrity' in data:
290
- del data['_integrity']
291
-
292
- # Write back to file
293
- with open(file_path, 'w', encoding='utf-8') as f:
294
- yaml.dump(data, f, default_flow_style=False, sort_keys=False, allow_unicode=True)
295
-
296
- log_info(f"[integrity] Removed integrity hash from {os.path.basename(file_path)}")
297
- return True
298
- else:
299
- log_warning(f"[integrity] No integrity hash found in {os.path.basename(file_path)}")
300
- return False
301
-
302
- except Exception as e:
303
- raise SecurityError(f"Failed to remove integrity hash from {file_path}: {e}")
304
-
305
- def get_integrity_info(self, file_path: str) -> Optional[IntegrityInfo]:
306
- """
307
- Get integrity information from .add file
308
-
309
- Args:
310
- file_path: Path to the .add file
311
-
312
- Returns:
313
- IntegrityInfo object or None if no integrity section
314
- """
315
- if not os.path.exists(file_path):
316
- raise FileNotFoundError(f"File not found: {file_path}")
317
-
318
- try:
319
- with open(file_path, 'r', encoding='utf-8') as f:
320
- content = f.read()
321
-
322
- data = yaml.safe_load(content)
323
-
324
- if '_integrity' not in data:
325
- return None
326
-
327
- integrity_data = data['_integrity']
328
-
329
- return IntegrityInfo(
330
- hash=integrity_data.get('hash', ''),
331
- algorithm=integrity_data.get('algorithm', ''),
332
- generated_at=integrity_data.get('generated_at', ''),
333
- salt_source=integrity_data.get('salt_source', '')
334
- )
335
-
336
- except Exception as e:
337
- log_warning(f"[integrity] Failed to get integrity info from {file_path}: {e}")
338
- return None
339
-
340
- def verify_file_unchanged(self, file_path: str, expected_hash: str) -> bool:
341
- """
342
- Verify that a file hasn't changed by comparing hashes
343
-
344
- Args:
345
- file_path: Path to the file
346
- expected_hash: Expected hash value
347
-
348
- Returns:
349
- True if file is unchanged
350
- """
351
- try:
352
- current_hash, _ = self.generate_integrity_hash(file_path)
353
- return current_hash == expected_hash
354
- except Exception:
355
- return False
356
-
357
- def batch_validate_integrity(self, file_paths: list) -> dict:
358
- """
359
- Validate integrity for multiple files
360
-
361
- Args:
362
- file_paths: List of file paths to validate
363
-
364
- Returns:
365
- Dictionary mapping file paths to validation results
366
- """
367
- results = {}
368
-
369
- for file_path in file_paths:
370
- try:
371
- results[file_path] = {
372
- 'valid': self.validate_integrity(file_path),
373
- 'error': None
374
- }
375
- except Exception as e:
376
- results[file_path] = {
377
- 'valid': False,
378
- 'error': str(e)
379
- }
380
-
381
- return results
382
-
383
- def validate_integrity_with_policy(
384
- self,
385
- file_path: str,
386
- namespace: str = "builtin"
387
- ) -> bool:
388
- """
389
- Validate integrity with namespace-specific policy
390
-
391
- Args:
392
- file_path: Path to .add file
393
- namespace: "builtin" or "user"
394
-
395
- Returns:
396
- True if valid or if user namespace allows continuation
397
-
398
- Raises:
399
- SecurityError: Only if builtin namespace fails validation
400
-
401
- Policy:
402
- Built-in namespace:
403
- - SHA present & valid: Run silently
404
- - SHA missing: STOP (SecurityError)
405
- - SHA tampered: STOP (SecurityError)
406
-
407
- User namespace:
408
- - SHA present & valid: Run silently
409
- - SHA missing: WARN "DEVELOPMENT MODE" + Continue
410
- - SHA tampered: WARN "INTEGRITY COMPROMISED" + Continue
411
- """
412
- is_builtin = (namespace == "builtin")
413
- filename = os.path.basename(file_path)
414
-
415
- # Check if file has integrity section
416
- info = self.get_integrity_info(file_path)
417
-
418
- if info is None:
419
- # No integrity section
420
- if is_builtin:
421
- raise SecurityError(
422
- f"Built-in expression '{filename}' requires integrity hash. "
423
- f"This file may be corrupted or incomplete."
424
- )
425
- else:
426
- # User namespace - no integrity is OK (development mode)
427
- log_warning(
428
- f"[integrity] Expression '{filename}' running in DEVELOPMENT MODE "
429
- f"without integrity verification. Add _integrity section for production use."
430
- )
431
- return True
432
-
433
- # Has integrity section - validate it
434
- try:
435
- is_valid = self.validate_integrity(file_path)
436
-
437
- if not is_valid:
438
- if is_builtin:
439
- raise SecurityError(
440
- f"Built-in expression '{filename}' integrity check FAILED. "
441
- f"File has been tampered with or corrupted. "
442
- f"Please reinstall or update additory."
443
- )
444
- else:
445
- log_warning(
446
- f"[integrity] Expression '{filename}' INTEGRITY COMPROMISED. "
447
- f"File may have been modified. Continuing with warning."
448
- )
449
- return False
450
-
451
- # Valid integrity
452
- log_info(f"[integrity] Expression '{filename}' integrity verified")
453
- return True
454
-
455
- except SecurityError as e:
456
- if is_builtin:
457
- raise
458
- else:
459
- log_warning(
460
- f"[integrity] Expression '{filename}' validation error: {e}. "
461
- f"Continuing with warning."
462
- )
463
- return False
464
-
465
- def get_platform_info(self) -> dict:
466
- """
467
- Get platform information for debugging
468
-
469
- Returns:
470
- Dictionary with platform details
471
- """
472
- return {
473
- 'system': platform.system(),
474
- 'platform': platform.platform(),
475
- 'python_version': platform.python_version(),
476
- 'supports_birthtime': hasattr(os.stat('.'), 'st_birthtime') if os.path.exists('.') else False
477
- }
additory/core/loader.py DELETED
@@ -1,190 +0,0 @@
1
- # loader.py
2
-
3
- import os
4
- import requests
5
- from functools import lru_cache
6
-
7
- from .logging import log_info, log_warning
8
- from .integrity_manager import IntegrityManager
9
- from .namespace_manager import NamespaceManager
10
-
11
-
12
- # ------------------------------------------------------------
13
- # Module-level managers
14
- # ------------------------------------------------------------
15
-
16
- _integrity_manager = IntegrityManager()
17
- _namespace_manager = NamespaceManager()
18
-
19
-
20
- # ------------------------------------------------------------
21
- # Public API
22
- # ------------------------------------------------------------
23
-
24
- def load_expression(resolved, namespace="builtin"):
25
- """
26
- Takes a ResolvedFormula object from the registry
27
- and returns ONLY the raw DSL text of the expression.
28
-
29
- Now includes integrity validation based on namespace policy.
30
-
31
- Parsing (AST + samples) happens in parser.py, not here.
32
-
33
- Args:
34
- resolved: ResolvedFormula object with source path
35
- namespace: "builtin" or "user" (default: "builtin")
36
-
37
- Returns:
38
- Raw DSL text of the expression
39
-
40
- Raises:
41
- SecurityError: If built-in expression fails integrity check
42
- """
43
-
44
- source = resolved.source
45
-
46
- if source.startswith("http://") or source.startswith("https://"):
47
- return _load_remote(source, resolved, namespace)
48
- else:
49
- return _load_local(source, resolved, namespace)
50
-
51
-
52
- # ------------------------------------------------------------
53
- # Remote Loading
54
- # ------------------------------------------------------------
55
-
56
- @lru_cache(maxsize=256)
57
- def _load_remote(url, resolved, namespace="builtin"):
58
- """
59
- Loads a remote .add expression file.
60
- Uses caching to avoid repeated downloads.
61
- Validates integrity before returning content.
62
-
63
- Raises:
64
- SecurityError: If built-in expression fails integrity check
65
- """
66
- from .integrity_manager import SecurityError
67
-
68
- log_info(f"[loader] Fetching remote expression: {url}")
69
-
70
- try:
71
- resp = requests.get(url, timeout=5)
72
- if resp.status_code == 200:
73
- # Cache the file locally for integrity validation
74
- filename = url.split("/")[-1]
75
- cache_path = _cache_remote_file(resp.text, filename, namespace)
76
-
77
- # Validate integrity with namespace policy
78
- _integrity_manager.validate_integrity_with_policy(cache_path, namespace)
79
-
80
- return resp.text
81
-
82
- log_warning(f"[loader] Failed to fetch {url} (status {resp.status_code})")
83
- return _fallback_remote(url, resolved, namespace)
84
-
85
- except SecurityError:
86
- # Re-raise SecurityError for built-in namespace
87
- raise
88
- except Exception as e:
89
- log_warning(f"[loader] Error fetching {url}: {e}")
90
- return _fallback_remote(url, resolved, namespace)
91
-
92
-
93
- def _fallback_remote(url, resolved, namespace="builtin"):
94
- """
95
- If the primary URL fails, try fallback roots from registry.
96
-
97
- Raises:
98
- SecurityError: If built-in expression fails integrity check
99
- """
100
- from .integrity_manager import SecurityError
101
- from .registry import get_formula_root
102
-
103
- roots = get_formula_root()
104
-
105
- filename = url.split("/")[-1]
106
-
107
- for root in roots[1:]: # skip the first, already tried
108
- fallback_url = f"{root}/{filename}"
109
- log_info(f"[loader] Trying fallback: {fallback_url}")
110
-
111
- try:
112
- resp = requests.get(fallback_url, timeout=5)
113
- if resp.status_code == 200:
114
- # Cache and validate
115
- cache_path = _cache_remote_file(resp.text, filename, namespace)
116
- _integrity_manager.validate_integrity_with_policy(cache_path, namespace)
117
- return resp.text
118
- except SecurityError:
119
- # Re-raise SecurityError
120
- raise
121
- except Exception:
122
- continue
123
-
124
- log_warning(f"[loader] All fallbacks failed for {filename}")
125
- return "" # parser will handle empty text
126
-
127
-
128
- # ------------------------------------------------------------
129
- # Local Loading (Custom Expressions)
130
- # ------------------------------------------------------------
131
-
132
- def _load_local(path, resolved, namespace="builtin"):
133
- """
134
- Loads a local .add file from the user's custom folder.
135
- Validates integrity before returning content.
136
- Returns raw text only.
137
-
138
- Raises:
139
- SecurityError: If built-in expression fails integrity check
140
- """
141
- from .integrity_manager import SecurityError
142
-
143
- log_info(f"[loader] Loading local expression: {path}")
144
-
145
- if not os.path.exists(path):
146
- log_warning(f"[loader] Local expression not found: {path}")
147
- return ""
148
-
149
- try:
150
- # Validate integrity with namespace policy
151
- _integrity_manager.validate_integrity_with_policy(path, namespace)
152
-
153
- # Load file content
154
- with open(path, "r", encoding="utf-8") as f:
155
- return f.read()
156
- except SecurityError:
157
- # Re-raise SecurityError for built-in namespace
158
- raise
159
- except Exception as e:
160
- log_warning(f"[loader] Error reading {path}: {e}")
161
- return ""
162
-
163
-
164
- # ------------------------------------------------------------
165
- # Helper Functions
166
- # ------------------------------------------------------------
167
-
168
- def _cache_remote_file(content, filename, namespace):
169
- """
170
- Cache remote file content locally for integrity validation
171
-
172
- Args:
173
- content: File content to cache
174
- filename: Name of the file
175
- namespace: "builtin" or "user"
176
-
177
- Returns:
178
- Path to cached file
179
- """
180
- cache_dir = _namespace_manager.get_cache_path(namespace)
181
- os.makedirs(cache_dir, exist_ok=True)
182
-
183
- cache_path = os.path.join(cache_dir, filename)
184
-
185
- with open(cache_path, 'w', encoding='utf-8') as f:
186
- f.write(content)
187
-
188
- log_info(f"[loader] Cached {filename} to {cache_path}")
189
-
190
- return cache_path