additory 0.1.0a4__py3-none-any.whl → 0.1.1a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- additory/__init__.py +58 -14
- additory/common/__init__.py +31 -147
- additory/common/column_selector.py +255 -0
- additory/common/distributions.py +286 -613
- additory/common/extractors.py +313 -0
- additory/common/knn_imputation.py +332 -0
- additory/common/result.py +380 -0
- additory/common/strategy_parser.py +243 -0
- additory/common/unit_conversions.py +338 -0
- additory/common/validation.py +283 -103
- additory/core/__init__.py +34 -22
- additory/core/backend.py +258 -0
- additory/core/config.py +177 -305
- additory/core/logging.py +230 -24
- additory/core/memory_manager.py +157 -495
- additory/expressions/__init__.py +2 -23
- additory/expressions/compiler.py +457 -0
- additory/expressions/engine.py +264 -487
- additory/expressions/integrity.py +179 -0
- additory/expressions/loader.py +263 -0
- additory/expressions/parser.py +363 -167
- additory/expressions/resolver.py +274 -0
- additory/functions/__init__.py +1 -0
- additory/functions/analyze/__init__.py +144 -0
- additory/functions/analyze/cardinality.py +58 -0
- additory/functions/analyze/correlations.py +66 -0
- additory/functions/analyze/distributions.py +53 -0
- additory/functions/analyze/duplicates.py +49 -0
- additory/functions/analyze/features.py +61 -0
- additory/functions/analyze/imputation.py +66 -0
- additory/functions/analyze/outliers.py +65 -0
- additory/functions/analyze/patterns.py +65 -0
- additory/functions/analyze/presets.py +72 -0
- additory/functions/analyze/quality.py +59 -0
- additory/functions/analyze/timeseries.py +53 -0
- additory/functions/analyze/types.py +45 -0
- additory/functions/expressions/__init__.py +161 -0
- additory/functions/snapshot/__init__.py +82 -0
- additory/functions/snapshot/filter.py +119 -0
- additory/functions/synthetic/__init__.py +113 -0
- additory/functions/synthetic/mode_detector.py +47 -0
- additory/functions/synthetic/strategies/__init__.py +1 -0
- additory/functions/synthetic/strategies/advanced.py +35 -0
- additory/functions/synthetic/strategies/augmentative.py +160 -0
- additory/functions/synthetic/strategies/generative.py +168 -0
- additory/functions/synthetic/strategies/presets.py +116 -0
- additory/functions/to/__init__.py +188 -0
- additory/functions/to/lookup.py +351 -0
- additory/functions/to/merge.py +189 -0
- additory/functions/to/sort.py +91 -0
- additory/functions/to/summarize.py +170 -0
- additory/functions/transform/__init__.py +140 -0
- additory/functions/transform/datetime.py +79 -0
- additory/functions/transform/extract.py +85 -0
- additory/functions/transform/harmonize.py +105 -0
- additory/functions/transform/knn.py +62 -0
- additory/functions/transform/onehotencoding.py +68 -0
- additory/functions/transform/transpose.py +42 -0
- additory-0.1.1a1.dist-info/METADATA +83 -0
- additory-0.1.1a1.dist-info/RECORD +62 -0
- additory/analysis/__init__.py +0 -48
- additory/analysis/cardinality.py +0 -126
- additory/analysis/correlations.py +0 -124
- additory/analysis/distributions.py +0 -376
- additory/analysis/quality.py +0 -158
- additory/analysis/scan.py +0 -400
- additory/common/backend.py +0 -371
- additory/common/column_utils.py +0 -191
- additory/common/exceptions.py +0 -62
- additory/common/lists.py +0 -229
- additory/common/patterns.py +0 -240
- additory/common/resolver.py +0 -567
- additory/common/sample_data.py +0 -182
- additory/core/ast_builder.py +0 -165
- additory/core/backends/__init__.py +0 -23
- additory/core/backends/arrow_bridge.py +0 -483
- additory/core/backends/cudf_bridge.py +0 -355
- additory/core/column_positioning.py +0 -358
- additory/core/compiler_polars.py +0 -166
- additory/core/enhanced_cache_manager.py +0 -1119
- additory/core/enhanced_matchers.py +0 -473
- additory/core/enhanced_version_manager.py +0 -325
- additory/core/executor.py +0 -59
- additory/core/integrity_manager.py +0 -477
- additory/core/loader.py +0 -190
- additory/core/namespace_manager.py +0 -657
- additory/core/parser.py +0 -176
- additory/core/polars_expression_engine.py +0 -601
- additory/core/registry.py +0 -177
- additory/core/sample_data_manager.py +0 -492
- additory/core/user_namespace.py +0 -751
- additory/core/validator.py +0 -27
- additory/dynamic_api.py +0 -352
- additory/expressions/proxy.py +0 -549
- additory/expressions/registry.py +0 -313
- additory/expressions/samples.py +0 -492
- additory/synthetic/__init__.py +0 -13
- additory/synthetic/column_name_resolver.py +0 -149
- additory/synthetic/deduce.py +0 -259
- additory/synthetic/distributions.py +0 -22
- additory/synthetic/forecast.py +0 -1132
- additory/synthetic/linked_list_parser.py +0 -415
- additory/synthetic/namespace_lookup.py +0 -129
- additory/synthetic/smote.py +0 -320
- additory/synthetic/strategies.py +0 -926
- additory/synthetic/synthesizer.py +0 -713
- additory/utilities/__init__.py +0 -53
- additory/utilities/encoding.py +0 -600
- additory/utilities/games.py +0 -300
- additory/utilities/keys.py +0 -8
- additory/utilities/lookup.py +0 -103
- additory/utilities/matchers.py +0 -216
- additory/utilities/resolvers.py +0 -286
- additory/utilities/settings.py +0 -167
- additory/utilities/units.py +0 -749
- additory/utilities/validators.py +0 -153
- additory-0.1.0a4.dist-info/METADATA +0 -311
- additory-0.1.0a4.dist-info/RECORD +0 -72
- additory-0.1.0a4.dist-info/licenses/LICENSE +0 -21
- {additory-0.1.0a4.dist-info → additory-0.1.1a1.dist-info}/WHEEL +0 -0
- {additory-0.1.0a4.dist-info → additory-0.1.1a1.dist-info}/top_level.txt +0 -0
|
@@ -1,477 +0,0 @@
|
|
|
1
|
-
# integrity_manager.py
|
|
2
|
-
# OS timestamp-based integrity management for additory expressions
|
|
3
|
-
|
|
4
|
-
import os
|
|
5
|
-
import hashlib
|
|
6
|
-
import yaml
|
|
7
|
-
import platform
|
|
8
|
-
from datetime import datetime, timezone
|
|
9
|
-
from typing import Tuple, Optional
|
|
10
|
-
from dataclasses import dataclass
|
|
11
|
-
|
|
12
|
-
from .logging import log_info, log_warning
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
@dataclass
|
|
16
|
-
class IntegrityInfo:
|
|
17
|
-
"""Integrity information for an expression file"""
|
|
18
|
-
hash: str
|
|
19
|
-
algorithm: str
|
|
20
|
-
generated_at: str
|
|
21
|
-
salt_source: str
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
class SecurityError(Exception):
|
|
25
|
-
"""Raised when security validation fails"""
|
|
26
|
-
pass
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
class IntegrityManager:
|
|
30
|
-
"""OS timestamp-based integrity management with SHA256 hashing"""
|
|
31
|
-
|
|
32
|
-
def __init__(self):
|
|
33
|
-
self.algorithm = "sha256"
|
|
34
|
-
self.salt_prefix = "additory_expr"
|
|
35
|
-
|
|
36
|
-
def get_file_creation_salt(self, file_path: str) -> str:
|
|
37
|
-
"""
|
|
38
|
-
Generate salt from OS file creation timestamp
|
|
39
|
-
|
|
40
|
-
Args:
|
|
41
|
-
file_path: Path to the file
|
|
42
|
-
|
|
43
|
-
Returns:
|
|
44
|
-
Salt string based on file creation time
|
|
45
|
-
|
|
46
|
-
Raises:
|
|
47
|
-
FileNotFoundError: If file doesn't exist
|
|
48
|
-
OSError: If unable to get file stats
|
|
49
|
-
"""
|
|
50
|
-
if not os.path.exists(file_path):
|
|
51
|
-
raise FileNotFoundError(f"File not found: {file_path}")
|
|
52
|
-
|
|
53
|
-
try:
|
|
54
|
-
stat_info = os.stat(file_path)
|
|
55
|
-
|
|
56
|
-
# Get creation time (cross-platform)
|
|
57
|
-
if platform.system() == "Windows":
|
|
58
|
-
# Windows has creation time
|
|
59
|
-
creation_time = stat_info.st_ctime
|
|
60
|
-
elif hasattr(stat_info, 'st_birthtime'):
|
|
61
|
-
# macOS has birth time
|
|
62
|
-
creation_time = stat_info.st_birthtime
|
|
63
|
-
else:
|
|
64
|
-
# Linux uses change time as approximation
|
|
65
|
-
creation_time = stat_info.st_ctime
|
|
66
|
-
|
|
67
|
-
# Convert to readable format for salt
|
|
68
|
-
dt = datetime.fromtimestamp(creation_time)
|
|
69
|
-
timestamp_str = dt.strftime('%Y%m%d_%H%M%S')
|
|
70
|
-
|
|
71
|
-
salt = f"{self.salt_prefix}_{timestamp_str}"
|
|
72
|
-
log_info(f"[integrity] Generated salt for {os.path.basename(file_path)}: {salt}")
|
|
73
|
-
|
|
74
|
-
return salt
|
|
75
|
-
|
|
76
|
-
except OSError as e:
|
|
77
|
-
raise OSError(f"Failed to get file stats for {file_path}: {e}")
|
|
78
|
-
|
|
79
|
-
def generate_integrity_hash(self, file_path: str) -> Tuple[str, str]:
|
|
80
|
-
"""
|
|
81
|
-
Generate integrity hash using OS timestamp as salt
|
|
82
|
-
|
|
83
|
-
Args:
|
|
84
|
-
file_path: Path to the .add file
|
|
85
|
-
|
|
86
|
-
Returns:
|
|
87
|
-
Tuple of (hash_value, salt_used)
|
|
88
|
-
|
|
89
|
-
Raises:
|
|
90
|
-
FileNotFoundError: If file doesn't exist
|
|
91
|
-
SecurityError: If hash generation fails
|
|
92
|
-
"""
|
|
93
|
-
try:
|
|
94
|
-
# Read and parse content
|
|
95
|
-
with open(file_path, 'r', encoding='utf-8') as f:
|
|
96
|
-
content = f.read()
|
|
97
|
-
|
|
98
|
-
# Parse YAML to remove integrity section if present
|
|
99
|
-
try:
|
|
100
|
-
data = yaml.safe_load(content)
|
|
101
|
-
except yaml.YAMLError as e:
|
|
102
|
-
raise SecurityError(f"Invalid YAML in {file_path}: {e}")
|
|
103
|
-
|
|
104
|
-
# Remove integrity section if present
|
|
105
|
-
if '_integrity' in data:
|
|
106
|
-
del data['_integrity']
|
|
107
|
-
|
|
108
|
-
# Create canonical content representation
|
|
109
|
-
canonical_content = yaml.dump(data, sort_keys=True,
|
|
110
|
-
default_flow_style=False,
|
|
111
|
-
allow_unicode=True)
|
|
112
|
-
|
|
113
|
-
# Generate salt and hash
|
|
114
|
-
salt = self.get_file_creation_salt(file_path)
|
|
115
|
-
salted_content = f"{salt}:{canonical_content}"
|
|
116
|
-
|
|
117
|
-
# Create hash
|
|
118
|
-
hash_obj = hashlib.sha256(salted_content.encode('utf-8'))
|
|
119
|
-
hash_value = f"{self.algorithm}:{hash_obj.hexdigest()}"
|
|
120
|
-
|
|
121
|
-
log_info(f"[integrity] Generated hash for {os.path.basename(file_path)}")
|
|
122
|
-
|
|
123
|
-
return hash_value, salt
|
|
124
|
-
|
|
125
|
-
except Exception as e:
|
|
126
|
-
raise SecurityError(f"Failed to generate integrity hash for {file_path}: {e}")
|
|
127
|
-
|
|
128
|
-
def validate_integrity(self, file_path: str) -> bool:
|
|
129
|
-
"""
|
|
130
|
-
Validate file integrity against stored hash
|
|
131
|
-
|
|
132
|
-
Args:
|
|
133
|
-
file_path: Path to the .add file
|
|
134
|
-
|
|
135
|
-
Returns:
|
|
136
|
-
True if integrity is valid
|
|
137
|
-
|
|
138
|
-
Raises:
|
|
139
|
-
SecurityError: If integrity validation fails
|
|
140
|
-
FileNotFoundError: If file doesn't exist
|
|
141
|
-
"""
|
|
142
|
-
if not os.path.exists(file_path):
|
|
143
|
-
raise FileNotFoundError(f"File not found: {file_path}")
|
|
144
|
-
|
|
145
|
-
try:
|
|
146
|
-
# Read and parse content
|
|
147
|
-
with open(file_path, 'r', encoding='utf-8') as f:
|
|
148
|
-
content = f.read()
|
|
149
|
-
|
|
150
|
-
try:
|
|
151
|
-
data = yaml.safe_load(content)
|
|
152
|
-
except yaml.YAMLError as e:
|
|
153
|
-
raise SecurityError(f"Invalid YAML in {file_path}: {e}")
|
|
154
|
-
|
|
155
|
-
# Check integrity section
|
|
156
|
-
if '_integrity' not in data:
|
|
157
|
-
raise SecurityError(f"Missing integrity hash in {file_path} - file may be corrupted")
|
|
158
|
-
|
|
159
|
-
integrity_info = data['_integrity']
|
|
160
|
-
|
|
161
|
-
# Validate integrity section structure
|
|
162
|
-
required_fields = ['hash', 'algorithm']
|
|
163
|
-
for field in required_fields:
|
|
164
|
-
if field not in integrity_info:
|
|
165
|
-
raise SecurityError(f"Missing '{field}' in integrity section of {file_path}")
|
|
166
|
-
|
|
167
|
-
stored_hash = integrity_info['hash']
|
|
168
|
-
stored_algorithm = integrity_info['algorithm']
|
|
169
|
-
|
|
170
|
-
# Verify algorithm matches
|
|
171
|
-
if stored_algorithm != self.algorithm:
|
|
172
|
-
raise SecurityError(f"Algorithm mismatch in {file_path}: expected {self.algorithm}, got {stored_algorithm}")
|
|
173
|
-
|
|
174
|
-
# Get salt to use for validation
|
|
175
|
-
if 'original_salt' in integrity_info:
|
|
176
|
-
# Use stored original salt (preferred method)
|
|
177
|
-
salt = integrity_info['original_salt']
|
|
178
|
-
else:
|
|
179
|
-
# Fallback to current file creation salt
|
|
180
|
-
salt = self.get_file_creation_salt(file_path)
|
|
181
|
-
|
|
182
|
-
# Recalculate hash
|
|
183
|
-
del data['_integrity']
|
|
184
|
-
canonical_content = yaml.dump(data, sort_keys=True,
|
|
185
|
-
default_flow_style=False,
|
|
186
|
-
allow_unicode=True)
|
|
187
|
-
|
|
188
|
-
salted_content = f"{salt}:{canonical_content}"
|
|
189
|
-
hash_obj = hashlib.sha256(salted_content.encode('utf-8'))
|
|
190
|
-
calculated_hash = f"{self.algorithm}:{hash_obj.hexdigest()}"
|
|
191
|
-
|
|
192
|
-
# Compare hashes
|
|
193
|
-
if stored_hash != calculated_hash:
|
|
194
|
-
raise SecurityError(f"Integrity check failed for {file_path} - file has been tampered with")
|
|
195
|
-
|
|
196
|
-
log_info(f"[integrity] Integrity validated for {os.path.basename(file_path)}")
|
|
197
|
-
return True
|
|
198
|
-
|
|
199
|
-
except SecurityError:
|
|
200
|
-
# Re-raise security errors as-is
|
|
201
|
-
raise
|
|
202
|
-
except Exception as e:
|
|
203
|
-
raise SecurityError(f"Integrity validation failed for {file_path}: {e}")
|
|
204
|
-
|
|
205
|
-
def add_integrity_hash(self, file_path: str) -> bool:
|
|
206
|
-
"""
|
|
207
|
-
Add integrity hash to .add file
|
|
208
|
-
|
|
209
|
-
Args:
|
|
210
|
-
file_path: Path to the .add file
|
|
211
|
-
|
|
212
|
-
Returns:
|
|
213
|
-
True if hash was added successfully
|
|
214
|
-
|
|
215
|
-
Raises:
|
|
216
|
-
SecurityError: If hash addition fails
|
|
217
|
-
FileNotFoundError: If file doesn't exist
|
|
218
|
-
"""
|
|
219
|
-
if not os.path.exists(file_path):
|
|
220
|
-
raise FileNotFoundError(f"File not found: {file_path}")
|
|
221
|
-
|
|
222
|
-
try:
|
|
223
|
-
# Get original creation salt before any modifications
|
|
224
|
-
original_salt = self.get_file_creation_salt(file_path)
|
|
225
|
-
|
|
226
|
-
# Read current content
|
|
227
|
-
with open(file_path, 'r', encoding='utf-8') as f:
|
|
228
|
-
content = f.read()
|
|
229
|
-
|
|
230
|
-
try:
|
|
231
|
-
data = yaml.safe_load(content)
|
|
232
|
-
except yaml.YAMLError as e:
|
|
233
|
-
raise SecurityError(f"Invalid YAML in {file_path}: {e}")
|
|
234
|
-
|
|
235
|
-
# Remove integrity section if present for hash calculation
|
|
236
|
-
if '_integrity' in data:
|
|
237
|
-
del data['_integrity']
|
|
238
|
-
|
|
239
|
-
# Create canonical content representation
|
|
240
|
-
canonical_content = yaml.dump(data, sort_keys=True,
|
|
241
|
-
default_flow_style=False,
|
|
242
|
-
allow_unicode=True)
|
|
243
|
-
|
|
244
|
-
# Generate hash using original salt
|
|
245
|
-
salted_content = f"{original_salt}:{canonical_content}"
|
|
246
|
-
hash_obj = hashlib.sha256(salted_content.encode('utf-8'))
|
|
247
|
-
hash_value = f"{self.algorithm}:{hash_obj.hexdigest()}"
|
|
248
|
-
|
|
249
|
-
# Add integrity section with original salt stored
|
|
250
|
-
data['_integrity'] = {
|
|
251
|
-
'hash': hash_value,
|
|
252
|
-
'algorithm': self.algorithm,
|
|
253
|
-
'generated_at': datetime.now(timezone.utc).isoformat(),
|
|
254
|
-
'salt_source': 'os_creation_time',
|
|
255
|
-
'original_salt': original_salt # Store the salt used
|
|
256
|
-
}
|
|
257
|
-
|
|
258
|
-
# Write back to file
|
|
259
|
-
with open(file_path, 'w', encoding='utf-8') as f:
|
|
260
|
-
yaml.dump(data, f, default_flow_style=False, sort_keys=False, allow_unicode=True)
|
|
261
|
-
|
|
262
|
-
log_info(f"[integrity] Added integrity hash to {os.path.basename(file_path)}")
|
|
263
|
-
return True
|
|
264
|
-
|
|
265
|
-
except Exception as e:
|
|
266
|
-
raise SecurityError(f"Failed to add integrity hash to {file_path}: {e}")
|
|
267
|
-
|
|
268
|
-
def remove_integrity_hash(self, file_path: str) -> bool:
|
|
269
|
-
"""
|
|
270
|
-
Remove integrity hash from .add file (for testing purposes)
|
|
271
|
-
|
|
272
|
-
Args:
|
|
273
|
-
file_path: Path to the .add file
|
|
274
|
-
|
|
275
|
-
Returns:
|
|
276
|
-
True if hash was removed successfully
|
|
277
|
-
"""
|
|
278
|
-
if not os.path.exists(file_path):
|
|
279
|
-
raise FileNotFoundError(f"File not found: {file_path}")
|
|
280
|
-
|
|
281
|
-
try:
|
|
282
|
-
# Read current content
|
|
283
|
-
with open(file_path, 'r', encoding='utf-8') as f:
|
|
284
|
-
content = f.read()
|
|
285
|
-
|
|
286
|
-
data = yaml.safe_load(content)
|
|
287
|
-
|
|
288
|
-
# Remove integrity section if present
|
|
289
|
-
if '_integrity' in data:
|
|
290
|
-
del data['_integrity']
|
|
291
|
-
|
|
292
|
-
# Write back to file
|
|
293
|
-
with open(file_path, 'w', encoding='utf-8') as f:
|
|
294
|
-
yaml.dump(data, f, default_flow_style=False, sort_keys=False, allow_unicode=True)
|
|
295
|
-
|
|
296
|
-
log_info(f"[integrity] Removed integrity hash from {os.path.basename(file_path)}")
|
|
297
|
-
return True
|
|
298
|
-
else:
|
|
299
|
-
log_warning(f"[integrity] No integrity hash found in {os.path.basename(file_path)}")
|
|
300
|
-
return False
|
|
301
|
-
|
|
302
|
-
except Exception as e:
|
|
303
|
-
raise SecurityError(f"Failed to remove integrity hash from {file_path}: {e}")
|
|
304
|
-
|
|
305
|
-
def get_integrity_info(self, file_path: str) -> Optional[IntegrityInfo]:
|
|
306
|
-
"""
|
|
307
|
-
Get integrity information from .add file
|
|
308
|
-
|
|
309
|
-
Args:
|
|
310
|
-
file_path: Path to the .add file
|
|
311
|
-
|
|
312
|
-
Returns:
|
|
313
|
-
IntegrityInfo object or None if no integrity section
|
|
314
|
-
"""
|
|
315
|
-
if not os.path.exists(file_path):
|
|
316
|
-
raise FileNotFoundError(f"File not found: {file_path}")
|
|
317
|
-
|
|
318
|
-
try:
|
|
319
|
-
with open(file_path, 'r', encoding='utf-8') as f:
|
|
320
|
-
content = f.read()
|
|
321
|
-
|
|
322
|
-
data = yaml.safe_load(content)
|
|
323
|
-
|
|
324
|
-
if '_integrity' not in data:
|
|
325
|
-
return None
|
|
326
|
-
|
|
327
|
-
integrity_data = data['_integrity']
|
|
328
|
-
|
|
329
|
-
return IntegrityInfo(
|
|
330
|
-
hash=integrity_data.get('hash', ''),
|
|
331
|
-
algorithm=integrity_data.get('algorithm', ''),
|
|
332
|
-
generated_at=integrity_data.get('generated_at', ''),
|
|
333
|
-
salt_source=integrity_data.get('salt_source', '')
|
|
334
|
-
)
|
|
335
|
-
|
|
336
|
-
except Exception as e:
|
|
337
|
-
log_warning(f"[integrity] Failed to get integrity info from {file_path}: {e}")
|
|
338
|
-
return None
|
|
339
|
-
|
|
340
|
-
def verify_file_unchanged(self, file_path: str, expected_hash: str) -> bool:
|
|
341
|
-
"""
|
|
342
|
-
Verify that a file hasn't changed by comparing hashes
|
|
343
|
-
|
|
344
|
-
Args:
|
|
345
|
-
file_path: Path to the file
|
|
346
|
-
expected_hash: Expected hash value
|
|
347
|
-
|
|
348
|
-
Returns:
|
|
349
|
-
True if file is unchanged
|
|
350
|
-
"""
|
|
351
|
-
try:
|
|
352
|
-
current_hash, _ = self.generate_integrity_hash(file_path)
|
|
353
|
-
return current_hash == expected_hash
|
|
354
|
-
except Exception:
|
|
355
|
-
return False
|
|
356
|
-
|
|
357
|
-
def batch_validate_integrity(self, file_paths: list) -> dict:
|
|
358
|
-
"""
|
|
359
|
-
Validate integrity for multiple files
|
|
360
|
-
|
|
361
|
-
Args:
|
|
362
|
-
file_paths: List of file paths to validate
|
|
363
|
-
|
|
364
|
-
Returns:
|
|
365
|
-
Dictionary mapping file paths to validation results
|
|
366
|
-
"""
|
|
367
|
-
results = {}
|
|
368
|
-
|
|
369
|
-
for file_path in file_paths:
|
|
370
|
-
try:
|
|
371
|
-
results[file_path] = {
|
|
372
|
-
'valid': self.validate_integrity(file_path),
|
|
373
|
-
'error': None
|
|
374
|
-
}
|
|
375
|
-
except Exception as e:
|
|
376
|
-
results[file_path] = {
|
|
377
|
-
'valid': False,
|
|
378
|
-
'error': str(e)
|
|
379
|
-
}
|
|
380
|
-
|
|
381
|
-
return results
|
|
382
|
-
|
|
383
|
-
def validate_integrity_with_policy(
|
|
384
|
-
self,
|
|
385
|
-
file_path: str,
|
|
386
|
-
namespace: str = "builtin"
|
|
387
|
-
) -> bool:
|
|
388
|
-
"""
|
|
389
|
-
Validate integrity with namespace-specific policy
|
|
390
|
-
|
|
391
|
-
Args:
|
|
392
|
-
file_path: Path to .add file
|
|
393
|
-
namespace: "builtin" or "user"
|
|
394
|
-
|
|
395
|
-
Returns:
|
|
396
|
-
True if valid or if user namespace allows continuation
|
|
397
|
-
|
|
398
|
-
Raises:
|
|
399
|
-
SecurityError: Only if builtin namespace fails validation
|
|
400
|
-
|
|
401
|
-
Policy:
|
|
402
|
-
Built-in namespace:
|
|
403
|
-
- SHA present & valid: Run silently
|
|
404
|
-
- SHA missing: STOP (SecurityError)
|
|
405
|
-
- SHA tampered: STOP (SecurityError)
|
|
406
|
-
|
|
407
|
-
User namespace:
|
|
408
|
-
- SHA present & valid: Run silently
|
|
409
|
-
- SHA missing: WARN "DEVELOPMENT MODE" + Continue
|
|
410
|
-
- SHA tampered: WARN "INTEGRITY COMPROMISED" + Continue
|
|
411
|
-
"""
|
|
412
|
-
is_builtin = (namespace == "builtin")
|
|
413
|
-
filename = os.path.basename(file_path)
|
|
414
|
-
|
|
415
|
-
# Check if file has integrity section
|
|
416
|
-
info = self.get_integrity_info(file_path)
|
|
417
|
-
|
|
418
|
-
if info is None:
|
|
419
|
-
# No integrity section
|
|
420
|
-
if is_builtin:
|
|
421
|
-
raise SecurityError(
|
|
422
|
-
f"Built-in expression '{filename}' requires integrity hash. "
|
|
423
|
-
f"This file may be corrupted or incomplete."
|
|
424
|
-
)
|
|
425
|
-
else:
|
|
426
|
-
# User namespace - no integrity is OK (development mode)
|
|
427
|
-
log_warning(
|
|
428
|
-
f"[integrity] Expression '{filename}' running in DEVELOPMENT MODE "
|
|
429
|
-
f"without integrity verification. Add _integrity section for production use."
|
|
430
|
-
)
|
|
431
|
-
return True
|
|
432
|
-
|
|
433
|
-
# Has integrity section - validate it
|
|
434
|
-
try:
|
|
435
|
-
is_valid = self.validate_integrity(file_path)
|
|
436
|
-
|
|
437
|
-
if not is_valid:
|
|
438
|
-
if is_builtin:
|
|
439
|
-
raise SecurityError(
|
|
440
|
-
f"Built-in expression '{filename}' integrity check FAILED. "
|
|
441
|
-
f"File has been tampered with or corrupted. "
|
|
442
|
-
f"Please reinstall or update additory."
|
|
443
|
-
)
|
|
444
|
-
else:
|
|
445
|
-
log_warning(
|
|
446
|
-
f"[integrity] Expression '{filename}' INTEGRITY COMPROMISED. "
|
|
447
|
-
f"File may have been modified. Continuing with warning."
|
|
448
|
-
)
|
|
449
|
-
return False
|
|
450
|
-
|
|
451
|
-
# Valid integrity
|
|
452
|
-
log_info(f"[integrity] Expression '{filename}' integrity verified")
|
|
453
|
-
return True
|
|
454
|
-
|
|
455
|
-
except SecurityError as e:
|
|
456
|
-
if is_builtin:
|
|
457
|
-
raise
|
|
458
|
-
else:
|
|
459
|
-
log_warning(
|
|
460
|
-
f"[integrity] Expression '{filename}' validation error: {e}. "
|
|
461
|
-
f"Continuing with warning."
|
|
462
|
-
)
|
|
463
|
-
return False
|
|
464
|
-
|
|
465
|
-
def get_platform_info(self) -> dict:
|
|
466
|
-
"""
|
|
467
|
-
Get platform information for debugging
|
|
468
|
-
|
|
469
|
-
Returns:
|
|
470
|
-
Dictionary with platform details
|
|
471
|
-
"""
|
|
472
|
-
return {
|
|
473
|
-
'system': platform.system(),
|
|
474
|
-
'platform': platform.platform(),
|
|
475
|
-
'python_version': platform.python_version(),
|
|
476
|
-
'supports_birthtime': hasattr(os.stat('.'), 'st_birthtime') if os.path.exists('.') else False
|
|
477
|
-
}
|
additory/core/loader.py
DELETED
|
@@ -1,190 +0,0 @@
|
|
|
1
|
-
# loader.py
|
|
2
|
-
|
|
3
|
-
import os
|
|
4
|
-
import requests
|
|
5
|
-
from functools import lru_cache
|
|
6
|
-
|
|
7
|
-
from .logging import log_info, log_warning
|
|
8
|
-
from .integrity_manager import IntegrityManager
|
|
9
|
-
from .namespace_manager import NamespaceManager
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
# ------------------------------------------------------------
|
|
13
|
-
# Module-level managers
|
|
14
|
-
# ------------------------------------------------------------
|
|
15
|
-
|
|
16
|
-
_integrity_manager = IntegrityManager()
|
|
17
|
-
_namespace_manager = NamespaceManager()
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
# ------------------------------------------------------------
|
|
21
|
-
# Public API
|
|
22
|
-
# ------------------------------------------------------------
|
|
23
|
-
|
|
24
|
-
def load_expression(resolved, namespace="builtin"):
|
|
25
|
-
"""
|
|
26
|
-
Takes a ResolvedFormula object from the registry
|
|
27
|
-
and returns ONLY the raw DSL text of the expression.
|
|
28
|
-
|
|
29
|
-
Now includes integrity validation based on namespace policy.
|
|
30
|
-
|
|
31
|
-
Parsing (AST + samples) happens in parser.py, not here.
|
|
32
|
-
|
|
33
|
-
Args:
|
|
34
|
-
resolved: ResolvedFormula object with source path
|
|
35
|
-
namespace: "builtin" or "user" (default: "builtin")
|
|
36
|
-
|
|
37
|
-
Returns:
|
|
38
|
-
Raw DSL text of the expression
|
|
39
|
-
|
|
40
|
-
Raises:
|
|
41
|
-
SecurityError: If built-in expression fails integrity check
|
|
42
|
-
"""
|
|
43
|
-
|
|
44
|
-
source = resolved.source
|
|
45
|
-
|
|
46
|
-
if source.startswith("http://") or source.startswith("https://"):
|
|
47
|
-
return _load_remote(source, resolved, namespace)
|
|
48
|
-
else:
|
|
49
|
-
return _load_local(source, resolved, namespace)
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
# ------------------------------------------------------------
|
|
53
|
-
# Remote Loading
|
|
54
|
-
# ------------------------------------------------------------
|
|
55
|
-
|
|
56
|
-
@lru_cache(maxsize=256)
|
|
57
|
-
def _load_remote(url, resolved, namespace="builtin"):
|
|
58
|
-
"""
|
|
59
|
-
Loads a remote .add expression file.
|
|
60
|
-
Uses caching to avoid repeated downloads.
|
|
61
|
-
Validates integrity before returning content.
|
|
62
|
-
|
|
63
|
-
Raises:
|
|
64
|
-
SecurityError: If built-in expression fails integrity check
|
|
65
|
-
"""
|
|
66
|
-
from .integrity_manager import SecurityError
|
|
67
|
-
|
|
68
|
-
log_info(f"[loader] Fetching remote expression: {url}")
|
|
69
|
-
|
|
70
|
-
try:
|
|
71
|
-
resp = requests.get(url, timeout=5)
|
|
72
|
-
if resp.status_code == 200:
|
|
73
|
-
# Cache the file locally for integrity validation
|
|
74
|
-
filename = url.split("/")[-1]
|
|
75
|
-
cache_path = _cache_remote_file(resp.text, filename, namespace)
|
|
76
|
-
|
|
77
|
-
# Validate integrity with namespace policy
|
|
78
|
-
_integrity_manager.validate_integrity_with_policy(cache_path, namespace)
|
|
79
|
-
|
|
80
|
-
return resp.text
|
|
81
|
-
|
|
82
|
-
log_warning(f"[loader] Failed to fetch {url} (status {resp.status_code})")
|
|
83
|
-
return _fallback_remote(url, resolved, namespace)
|
|
84
|
-
|
|
85
|
-
except SecurityError:
|
|
86
|
-
# Re-raise SecurityError for built-in namespace
|
|
87
|
-
raise
|
|
88
|
-
except Exception as e:
|
|
89
|
-
log_warning(f"[loader] Error fetching {url}: {e}")
|
|
90
|
-
return _fallback_remote(url, resolved, namespace)
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
def _fallback_remote(url, resolved, namespace="builtin"):
|
|
94
|
-
"""
|
|
95
|
-
If the primary URL fails, try fallback roots from registry.
|
|
96
|
-
|
|
97
|
-
Raises:
|
|
98
|
-
SecurityError: If built-in expression fails integrity check
|
|
99
|
-
"""
|
|
100
|
-
from .integrity_manager import SecurityError
|
|
101
|
-
from .registry import get_formula_root
|
|
102
|
-
|
|
103
|
-
roots = get_formula_root()
|
|
104
|
-
|
|
105
|
-
filename = url.split("/")[-1]
|
|
106
|
-
|
|
107
|
-
for root in roots[1:]: # skip the first, already tried
|
|
108
|
-
fallback_url = f"{root}/{filename}"
|
|
109
|
-
log_info(f"[loader] Trying fallback: {fallback_url}")
|
|
110
|
-
|
|
111
|
-
try:
|
|
112
|
-
resp = requests.get(fallback_url, timeout=5)
|
|
113
|
-
if resp.status_code == 200:
|
|
114
|
-
# Cache and validate
|
|
115
|
-
cache_path = _cache_remote_file(resp.text, filename, namespace)
|
|
116
|
-
_integrity_manager.validate_integrity_with_policy(cache_path, namespace)
|
|
117
|
-
return resp.text
|
|
118
|
-
except SecurityError:
|
|
119
|
-
# Re-raise SecurityError
|
|
120
|
-
raise
|
|
121
|
-
except Exception:
|
|
122
|
-
continue
|
|
123
|
-
|
|
124
|
-
log_warning(f"[loader] All fallbacks failed for {filename}")
|
|
125
|
-
return "" # parser will handle empty text
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
# ------------------------------------------------------------
|
|
129
|
-
# Local Loading (Custom Expressions)
|
|
130
|
-
# ------------------------------------------------------------
|
|
131
|
-
|
|
132
|
-
def _load_local(path, resolved, namespace="builtin"):
|
|
133
|
-
"""
|
|
134
|
-
Loads a local .add file from the user's custom folder.
|
|
135
|
-
Validates integrity before returning content.
|
|
136
|
-
Returns raw text only.
|
|
137
|
-
|
|
138
|
-
Raises:
|
|
139
|
-
SecurityError: If built-in expression fails integrity check
|
|
140
|
-
"""
|
|
141
|
-
from .integrity_manager import SecurityError
|
|
142
|
-
|
|
143
|
-
log_info(f"[loader] Loading local expression: {path}")
|
|
144
|
-
|
|
145
|
-
if not os.path.exists(path):
|
|
146
|
-
log_warning(f"[loader] Local expression not found: {path}")
|
|
147
|
-
return ""
|
|
148
|
-
|
|
149
|
-
try:
|
|
150
|
-
# Validate integrity with namespace policy
|
|
151
|
-
_integrity_manager.validate_integrity_with_policy(path, namespace)
|
|
152
|
-
|
|
153
|
-
# Load file content
|
|
154
|
-
with open(path, "r", encoding="utf-8") as f:
|
|
155
|
-
return f.read()
|
|
156
|
-
except SecurityError:
|
|
157
|
-
# Re-raise SecurityError for built-in namespace
|
|
158
|
-
raise
|
|
159
|
-
except Exception as e:
|
|
160
|
-
log_warning(f"[loader] Error reading {path}: {e}")
|
|
161
|
-
return ""
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
# ------------------------------------------------------------
|
|
165
|
-
# Helper Functions
|
|
166
|
-
# ------------------------------------------------------------
|
|
167
|
-
|
|
168
|
-
def _cache_remote_file(content, filename, namespace):
|
|
169
|
-
"""
|
|
170
|
-
Cache remote file content locally for integrity validation
|
|
171
|
-
|
|
172
|
-
Args:
|
|
173
|
-
content: File content to cache
|
|
174
|
-
filename: Name of the file
|
|
175
|
-
namespace: "builtin" or "user"
|
|
176
|
-
|
|
177
|
-
Returns:
|
|
178
|
-
Path to cached file
|
|
179
|
-
"""
|
|
180
|
-
cache_dir = _namespace_manager.get_cache_path(namespace)
|
|
181
|
-
os.makedirs(cache_dir, exist_ok=True)
|
|
182
|
-
|
|
183
|
-
cache_path = os.path.join(cache_dir, filename)
|
|
184
|
-
|
|
185
|
-
with open(cache_path, 'w', encoding='utf-8') as f:
|
|
186
|
-
f.write(content)
|
|
187
|
-
|
|
188
|
-
log_info(f"[loader] Cached {filename} to {cache_path}")
|
|
189
|
-
|
|
190
|
-
return cache_path
|