fraclab-sdk 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- README.md +1601 -0
- fraclab_sdk/__init__.py +34 -0
- fraclab_sdk/algorithm/__init__.py +13 -0
- fraclab_sdk/algorithm/export.py +1 -0
- fraclab_sdk/algorithm/library.py +378 -0
- fraclab_sdk/cli.py +381 -0
- fraclab_sdk/config.py +54 -0
- fraclab_sdk/devkit/__init__.py +25 -0
- fraclab_sdk/devkit/compile.py +342 -0
- fraclab_sdk/devkit/export.py +354 -0
- fraclab_sdk/devkit/validate.py +1043 -0
- fraclab_sdk/errors.py +124 -0
- fraclab_sdk/materialize/__init__.py +8 -0
- fraclab_sdk/materialize/fsops.py +125 -0
- fraclab_sdk/materialize/hash.py +28 -0
- fraclab_sdk/materialize/materializer.py +241 -0
- fraclab_sdk/models/__init__.py +52 -0
- fraclab_sdk/models/bundle_manifest.py +51 -0
- fraclab_sdk/models/dataspec.py +65 -0
- fraclab_sdk/models/drs.py +47 -0
- fraclab_sdk/models/output_contract.py +111 -0
- fraclab_sdk/models/run_output_manifest.py +119 -0
- fraclab_sdk/results/__init__.py +25 -0
- fraclab_sdk/results/preview.py +150 -0
- fraclab_sdk/results/reader.py +329 -0
- fraclab_sdk/run/__init__.py +10 -0
- fraclab_sdk/run/logs.py +42 -0
- fraclab_sdk/run/manager.py +403 -0
- fraclab_sdk/run/subprocess_runner.py +153 -0
- fraclab_sdk/runtime/__init__.py +11 -0
- fraclab_sdk/runtime/artifacts.py +303 -0
- fraclab_sdk/runtime/data_client.py +123 -0
- fraclab_sdk/runtime/runner_main.py +286 -0
- fraclab_sdk/runtime/snapshot_provider.py +1 -0
- fraclab_sdk/selection/__init__.py +11 -0
- fraclab_sdk/selection/model.py +247 -0
- fraclab_sdk/selection/validate.py +54 -0
- fraclab_sdk/snapshot/__init__.py +12 -0
- fraclab_sdk/snapshot/index.py +94 -0
- fraclab_sdk/snapshot/library.py +205 -0
- fraclab_sdk/snapshot/loader.py +217 -0
- fraclab_sdk/specs/manifest.py +89 -0
- fraclab_sdk/utils/io.py +32 -0
- fraclab_sdk-0.1.0.dist-info/METADATA +1622 -0
- fraclab_sdk-0.1.0.dist-info/RECORD +47 -0
- fraclab_sdk-0.1.0.dist-info/WHEEL +4 -0
- fraclab_sdk-0.1.0.dist-info/entry_points.txt +4 -0
|
@@ -0,0 +1,1043 @@
|
|
|
1
|
+
"""Validation tools for InputSpec, OutputContract, and run manifests.
|
|
2
|
+
|
|
3
|
+
Provides:
|
|
4
|
+
- InputSpec linting (json_schema_extra validation, show_when structure)
|
|
5
|
+
- OutputContract validation (structure, key uniqueness)
|
|
6
|
+
- Bundle validation (hash integrity)
|
|
7
|
+
- RunManifest vs OutputContract alignment validation
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import hashlib
|
|
13
|
+
import json
|
|
14
|
+
import subprocess
|
|
15
|
+
import sys
|
|
16
|
+
from dataclasses import dataclass, field
|
|
17
|
+
from enum import Enum
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
from typing import Any
|
|
20
|
+
|
|
21
|
+
from fraclab_sdk.errors import AlgorithmError
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class ValidationSeverity(Enum):
|
|
25
|
+
"""Severity level for validation issues."""
|
|
26
|
+
|
|
27
|
+
ERROR = "error"
|
|
28
|
+
WARNING = "warning"
|
|
29
|
+
INFO = "info"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class ValidationIssue:
|
|
34
|
+
"""A single validation issue."""
|
|
35
|
+
|
|
36
|
+
severity: ValidationSeverity
|
|
37
|
+
code: str
|
|
38
|
+
message: str
|
|
39
|
+
path: str | None = None
|
|
40
|
+
details: dict[str, Any] = field(default_factory=dict)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@dataclass
|
|
44
|
+
class ValidationResult:
|
|
45
|
+
"""Result of validation."""
|
|
46
|
+
|
|
47
|
+
valid: bool
|
|
48
|
+
issues: list[ValidationIssue] = field(default_factory=list)
|
|
49
|
+
|
|
50
|
+
@property
|
|
51
|
+
def errors(self) -> list[ValidationIssue]:
|
|
52
|
+
"""Get error-level issues."""
|
|
53
|
+
return [i for i in self.issues if i.severity == ValidationSeverity.ERROR]
|
|
54
|
+
|
|
55
|
+
@property
|
|
56
|
+
def warnings(self) -> list[ValidationIssue]:
|
|
57
|
+
"""Get warning-level issues."""
|
|
58
|
+
return [i for i in self.issues if i.severity == ValidationSeverity.WARNING]
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
# =============================================================================
|
|
62
|
+
# InputSpec Validation
|
|
63
|
+
# =============================================================================
|
|
64
|
+
|
|
65
|
+
# Valid show_when operators
|
|
66
|
+
VALID_SHOW_WHEN_OPS = {"eq", "neq", "in", "nin", "gt", "gte", "lt", "lte", "exists"}
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _validate_show_when_condition(
|
|
70
|
+
condition: dict[str, Any], schema: dict[str, Any], path: str, issues: list[ValidationIssue]
|
|
71
|
+
) -> None:
|
|
72
|
+
"""Validate a single show_when condition.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
condition: The condition dict {field, op, value}.
|
|
76
|
+
schema: The full JSON schema for field lookup.
|
|
77
|
+
path: Current path for error reporting.
|
|
78
|
+
issues: List to append issues to.
|
|
79
|
+
"""
|
|
80
|
+
if not isinstance(condition, dict):
|
|
81
|
+
issues.append(
|
|
82
|
+
ValidationIssue(
|
|
83
|
+
severity=ValidationSeverity.ERROR,
|
|
84
|
+
code="SHOW_WHEN_INVALID_CONDITION",
|
|
85
|
+
message="show_when condition must be a dict",
|
|
86
|
+
path=path,
|
|
87
|
+
)
|
|
88
|
+
)
|
|
89
|
+
return
|
|
90
|
+
|
|
91
|
+
# Check required keys
|
|
92
|
+
if "field" not in condition:
|
|
93
|
+
issues.append(
|
|
94
|
+
ValidationIssue(
|
|
95
|
+
severity=ValidationSeverity.ERROR,
|
|
96
|
+
code="SHOW_WHEN_MISSING_FIELD",
|
|
97
|
+
message="show_when condition missing 'field' key",
|
|
98
|
+
path=path,
|
|
99
|
+
)
|
|
100
|
+
)
|
|
101
|
+
return
|
|
102
|
+
|
|
103
|
+
field_path = condition["field"]
|
|
104
|
+
op = condition.get("op", "eq")
|
|
105
|
+
|
|
106
|
+
# Validate operator
|
|
107
|
+
if op not in VALID_SHOW_WHEN_OPS:
|
|
108
|
+
issues.append(
|
|
109
|
+
ValidationIssue(
|
|
110
|
+
severity=ValidationSeverity.ERROR,
|
|
111
|
+
code="SHOW_WHEN_INVALID_OP",
|
|
112
|
+
message=f"Invalid show_when operator: {op}. Valid: {VALID_SHOW_WHEN_OPS}",
|
|
113
|
+
path=path,
|
|
114
|
+
)
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
# Validate field path exists in schema
|
|
118
|
+
if not _field_exists_in_schema(field_path, schema):
|
|
119
|
+
issues.append(
|
|
120
|
+
ValidationIssue(
|
|
121
|
+
severity=ValidationSeverity.ERROR,
|
|
122
|
+
code="SHOW_WHEN_FIELD_NOT_FOUND",
|
|
123
|
+
message=f"show_when references non-existent field: {field_path}",
|
|
124
|
+
path=path,
|
|
125
|
+
details={"field": field_path},
|
|
126
|
+
)
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def _validate_show_when(
|
|
131
|
+
show_when: Any, schema: dict[str, Any], path: str, issues: list[ValidationIssue]
|
|
132
|
+
) -> None:
|
|
133
|
+
"""Validate show_when structure.
|
|
134
|
+
|
|
135
|
+
Supports:
|
|
136
|
+
- Single condition: {field, op, value}
|
|
137
|
+
- AND list: [{cond1}, {cond2}]
|
|
138
|
+
- OR object: {"or": [{cond1}, {cond2}]}
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
show_when: The show_when value.
|
|
142
|
+
schema: Full JSON schema for field lookup.
|
|
143
|
+
path: Current path for error reporting.
|
|
144
|
+
issues: List to append issues to.
|
|
145
|
+
"""
|
|
146
|
+
if show_when is None:
|
|
147
|
+
return
|
|
148
|
+
|
|
149
|
+
if isinstance(show_when, dict):
|
|
150
|
+
if "or" in show_when:
|
|
151
|
+
# OR object
|
|
152
|
+
or_conditions = show_when["or"]
|
|
153
|
+
if not isinstance(or_conditions, list):
|
|
154
|
+
issues.append(
|
|
155
|
+
ValidationIssue(
|
|
156
|
+
severity=ValidationSeverity.ERROR,
|
|
157
|
+
code="SHOW_WHEN_INVALID_OR",
|
|
158
|
+
message="show_when 'or' must be a list",
|
|
159
|
+
path=path,
|
|
160
|
+
)
|
|
161
|
+
)
|
|
162
|
+
else:
|
|
163
|
+
for i, cond in enumerate(or_conditions):
|
|
164
|
+
_validate_show_when_condition(cond, schema, f"{path}.or[{i}]", issues)
|
|
165
|
+
elif "and" in show_when:
|
|
166
|
+
# AND object (explicit)
|
|
167
|
+
and_conditions = show_when["and"]
|
|
168
|
+
if not isinstance(and_conditions, list):
|
|
169
|
+
issues.append(
|
|
170
|
+
ValidationIssue(
|
|
171
|
+
severity=ValidationSeverity.ERROR,
|
|
172
|
+
code="SHOW_WHEN_INVALID_AND",
|
|
173
|
+
message="show_when 'and' must be a list",
|
|
174
|
+
path=path,
|
|
175
|
+
)
|
|
176
|
+
)
|
|
177
|
+
else:
|
|
178
|
+
for i, cond in enumerate(and_conditions):
|
|
179
|
+
_validate_show_when_condition(cond, schema, f"{path}.and[{i}]", issues)
|
|
180
|
+
else:
|
|
181
|
+
# Single condition
|
|
182
|
+
_validate_show_when_condition(show_when, schema, path, issues)
|
|
183
|
+
|
|
184
|
+
elif isinstance(show_when, list):
|
|
185
|
+
# Implicit AND list
|
|
186
|
+
for i, cond in enumerate(show_when):
|
|
187
|
+
_validate_show_when_condition(cond, schema, f"{path}[{i}]", issues)
|
|
188
|
+
|
|
189
|
+
else:
|
|
190
|
+
issues.append(
|
|
191
|
+
ValidationIssue(
|
|
192
|
+
severity=ValidationSeverity.ERROR,
|
|
193
|
+
code="SHOW_WHEN_INVALID_TYPE",
|
|
194
|
+
message=f"show_when must be dict or list, got {type(show_when).__name__}",
|
|
195
|
+
path=path,
|
|
196
|
+
)
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def _field_exists_in_schema(field_path: str, schema: dict[str, Any]) -> bool:
|
|
201
|
+
"""Check if a field path exists in a JSON schema.
|
|
202
|
+
|
|
203
|
+
Supports dot notation: "parent.child.field"
|
|
204
|
+
|
|
205
|
+
Args:
|
|
206
|
+
field_path: Dot-separated field path.
|
|
207
|
+
schema: JSON schema dict.
|
|
208
|
+
|
|
209
|
+
Returns:
|
|
210
|
+
True if field exists.
|
|
211
|
+
"""
|
|
212
|
+
parts = field_path.split(".")
|
|
213
|
+
current = schema.get("properties", {})
|
|
214
|
+
|
|
215
|
+
for i, part in enumerate(parts):
|
|
216
|
+
if part not in current:
|
|
217
|
+
return False
|
|
218
|
+
prop = current[part]
|
|
219
|
+
|
|
220
|
+
# Last part - field exists
|
|
221
|
+
if i == len(parts) - 1:
|
|
222
|
+
return True
|
|
223
|
+
|
|
224
|
+
# Navigate into nested object
|
|
225
|
+
if prop.get("type") == "object":
|
|
226
|
+
current = prop.get("properties", {})
|
|
227
|
+
elif "$ref" in prop:
|
|
228
|
+
# Handle $ref - simplified, assumes $defs at root
|
|
229
|
+
ref = prop["$ref"]
|
|
230
|
+
if ref.startswith("#/$defs/"):
|
|
231
|
+
def_name = ref.split("/")[-1]
|
|
232
|
+
defs = schema.get("$defs", {})
|
|
233
|
+
if def_name in defs:
|
|
234
|
+
current = defs[def_name].get("properties", {})
|
|
235
|
+
else:
|
|
236
|
+
return False
|
|
237
|
+
else:
|
|
238
|
+
return False
|
|
239
|
+
else:
|
|
240
|
+
return False
|
|
241
|
+
|
|
242
|
+
return True
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def _validate_enum_labels(
|
|
246
|
+
enum_labels: dict[str, str],
|
|
247
|
+
enum_values: list[Any] | None,
|
|
248
|
+
path: str,
|
|
249
|
+
issues: list[ValidationIssue],
|
|
250
|
+
) -> None:
|
|
251
|
+
"""Validate enum_labels keys match enum values.
|
|
252
|
+
|
|
253
|
+
Args:
|
|
254
|
+
enum_labels: The enum_labels dict.
|
|
255
|
+
enum_values: The enum values from schema (if available).
|
|
256
|
+
path: Current path for error reporting.
|
|
257
|
+
issues: List to append issues to.
|
|
258
|
+
"""
|
|
259
|
+
if enum_values is None:
|
|
260
|
+
return
|
|
261
|
+
|
|
262
|
+
enum_values_str = {str(v) for v in enum_values}
|
|
263
|
+
for key in enum_labels:
|
|
264
|
+
if str(key) not in enum_values_str:
|
|
265
|
+
issues.append(
|
|
266
|
+
ValidationIssue(
|
|
267
|
+
severity=ValidationSeverity.WARNING,
|
|
268
|
+
code="ENUM_LABEL_UNKNOWN_VALUE",
|
|
269
|
+
message=f"enum_labels key '{key}' not in enum values: {enum_values}",
|
|
270
|
+
path=path,
|
|
271
|
+
)
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
def _extract_schema_from_workspace(workspace: Path) -> dict[str, Any]:
|
|
276
|
+
"""Extract JSON schema from workspace InputSpec.
|
|
277
|
+
|
|
278
|
+
Args:
|
|
279
|
+
workspace: Algorithm workspace.
|
|
280
|
+
|
|
281
|
+
Returns:
|
|
282
|
+
JSON schema dict.
|
|
283
|
+
"""
|
|
284
|
+
script = '''
|
|
285
|
+
import json
|
|
286
|
+
import sys
|
|
287
|
+
|
|
288
|
+
try:
|
|
289
|
+
from schema.inputspec import INPUT_SPEC
|
|
290
|
+
model = INPUT_SPEC
|
|
291
|
+
schema = model.model_json_schema()
|
|
292
|
+
print(json.dumps(schema))
|
|
293
|
+
except Exception as e:
|
|
294
|
+
print(json.dumps({"error": str(e)}))
|
|
295
|
+
'''
|
|
296
|
+
|
|
297
|
+
env = {"PYTHONPATH": str(workspace), "PYTHONUNBUFFERED": "1"}
|
|
298
|
+
result = subprocess.run(
|
|
299
|
+
[sys.executable, "-c", script],
|
|
300
|
+
cwd=workspace,
|
|
301
|
+
env={**dict(__import__("os").environ), **env},
|
|
302
|
+
capture_output=True,
|
|
303
|
+
text=True,
|
|
304
|
+
timeout=30,
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
if result.returncode != 0:
|
|
308
|
+
raise AlgorithmError(f"Failed to extract schema: {result.stderr}")
|
|
309
|
+
|
|
310
|
+
data = json.loads(result.stdout)
|
|
311
|
+
if "error" in data:
|
|
312
|
+
raise AlgorithmError(f"Failed to extract schema: {data['error']}")
|
|
313
|
+
|
|
314
|
+
return data
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
def validate_inputspec(workspace: Path) -> ValidationResult:
|
|
318
|
+
"""Validate InputSpec (schema.inputspec:INPUT_SPEC, legacy CONFIG_MODEL).
|
|
319
|
+
|
|
320
|
+
Checks:
|
|
321
|
+
- Schema can be generated
|
|
322
|
+
- json_schema_extra fields are valid
|
|
323
|
+
- show_when conditions reference existing fields
|
|
324
|
+
- enum_labels keys match enum values
|
|
325
|
+
|
|
326
|
+
Args:
|
|
327
|
+
workspace: Algorithm workspace path.
|
|
328
|
+
|
|
329
|
+
Returns:
|
|
330
|
+
ValidationResult with issues found.
|
|
331
|
+
"""
|
|
332
|
+
workspace = Path(workspace).resolve()
|
|
333
|
+
issues: list[ValidationIssue] = []
|
|
334
|
+
|
|
335
|
+
# Extract schema
|
|
336
|
+
try:
|
|
337
|
+
schema = _extract_schema_from_workspace(workspace)
|
|
338
|
+
except AlgorithmError as e:
|
|
339
|
+
issues.append(
|
|
340
|
+
ValidationIssue(
|
|
341
|
+
severity=ValidationSeverity.ERROR,
|
|
342
|
+
code="INPUTSPEC_LOAD_FAILED",
|
|
343
|
+
message=str(e),
|
|
344
|
+
)
|
|
345
|
+
)
|
|
346
|
+
return ValidationResult(valid=False, issues=issues)
|
|
347
|
+
|
|
348
|
+
# Validate properties
|
|
349
|
+
_validate_schema_properties(schema, schema, "", issues)
|
|
350
|
+
|
|
351
|
+
# Check for required fields
|
|
352
|
+
if "properties" not in schema:
|
|
353
|
+
issues.append(
|
|
354
|
+
ValidationIssue(
|
|
355
|
+
severity=ValidationSeverity.WARNING,
|
|
356
|
+
code="INPUTSPEC_NO_PROPERTIES",
|
|
357
|
+
message="Schema has no properties defined",
|
|
358
|
+
)
|
|
359
|
+
)
|
|
360
|
+
|
|
361
|
+
has_errors = any(i.severity == ValidationSeverity.ERROR for i in issues)
|
|
362
|
+
return ValidationResult(valid=not has_errors, issues=issues)
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
def _validate_schema_properties(
|
|
366
|
+
props_container: dict[str, Any],
|
|
367
|
+
full_schema: dict[str, Any],
|
|
368
|
+
path_prefix: str,
|
|
369
|
+
issues: list[ValidationIssue],
|
|
370
|
+
) -> None:
|
|
371
|
+
"""Recursively validate schema properties.
|
|
372
|
+
|
|
373
|
+
Args:
|
|
374
|
+
props_container: Dict containing 'properties' key.
|
|
375
|
+
full_schema: The full schema for field lookups.
|
|
376
|
+
path_prefix: Current path prefix for error reporting.
|
|
377
|
+
issues: List to append issues to.
|
|
378
|
+
"""
|
|
379
|
+
properties = props_container.get("properties", {})
|
|
380
|
+
|
|
381
|
+
for field_name, field_schema in properties.items():
|
|
382
|
+
field_path = f"{path_prefix}.{field_name}" if path_prefix else field_name
|
|
383
|
+
|
|
384
|
+
# Check json_schema_extra (stored in various places depending on Pydantic version)
|
|
385
|
+
extra = (
|
|
386
|
+
field_schema.get("json_schema_extra")
|
|
387
|
+
or field_schema.get("extra")
|
|
388
|
+
or {}
|
|
389
|
+
)
|
|
390
|
+
|
|
391
|
+
# Validate show_when
|
|
392
|
+
if "show_when" in extra:
|
|
393
|
+
_validate_show_when(extra["show_when"], full_schema, f"{field_path}.show_when", issues)
|
|
394
|
+
|
|
395
|
+
# Validate enum_labels
|
|
396
|
+
if "enum_labels" in extra:
|
|
397
|
+
enum_values = field_schema.get("enum")
|
|
398
|
+
_validate_enum_labels(extra["enum_labels"], enum_values, f"{field_path}.enum_labels", issues)
|
|
399
|
+
|
|
400
|
+
# Recurse into nested objects
|
|
401
|
+
if field_schema.get("type") == "object":
|
|
402
|
+
_validate_schema_properties(field_schema, full_schema, field_path, issues)
|
|
403
|
+
|
|
404
|
+
# Handle allOf, anyOf, oneOf
|
|
405
|
+
for combiner in ["allOf", "anyOf", "oneOf"]:
|
|
406
|
+
if combiner in field_schema:
|
|
407
|
+
for i, sub_schema in enumerate(field_schema[combiner]):
|
|
408
|
+
_validate_schema_properties(
|
|
409
|
+
sub_schema, full_schema, f"{field_path}.{combiner}[{i}]", issues
|
|
410
|
+
)
|
|
411
|
+
|
|
412
|
+
# Handle $defs
|
|
413
|
+
if "$defs" in props_container:
|
|
414
|
+
for def_name, def_schema in props_container["$defs"].items():
|
|
415
|
+
_validate_schema_properties(
|
|
416
|
+
def_schema, full_schema, f"$defs.{def_name}", issues
|
|
417
|
+
)
|
|
418
|
+
|
|
419
|
+
|
|
420
|
+
# =============================================================================
|
|
421
|
+
# OutputContract Validation
|
|
422
|
+
# =============================================================================
|
|
423
|
+
|
|
424
|
+
|
|
425
|
+
def validate_output_contract(workspace_or_path: Path) -> ValidationResult:
|
|
426
|
+
"""Validate OutputContract structure.
|
|
427
|
+
|
|
428
|
+
Checks:
|
|
429
|
+
- Contract can be loaded
|
|
430
|
+
- Dataset keys are unique
|
|
431
|
+
- Item keys are unique within datasets
|
|
432
|
+
- Artifact keys are unique within items
|
|
433
|
+
- kind matches schema.type
|
|
434
|
+
|
|
435
|
+
Args:
|
|
436
|
+
workspace_or_path: Workspace path or direct path to output_contract.json.
|
|
437
|
+
|
|
438
|
+
Returns:
|
|
439
|
+
ValidationResult with issues found.
|
|
440
|
+
"""
|
|
441
|
+
workspace_or_path = Path(workspace_or_path).resolve()
|
|
442
|
+
issues: list[ValidationIssue] = []
|
|
443
|
+
|
|
444
|
+
# Find contract file
|
|
445
|
+
if workspace_or_path.is_file():
|
|
446
|
+
contract_path = workspace_or_path
|
|
447
|
+
else:
|
|
448
|
+
contract_path = workspace_or_path / "dist" / "output_contract.json"
|
|
449
|
+
if not contract_path.exists():
|
|
450
|
+
# Try extracting from workspace
|
|
451
|
+
try:
|
|
452
|
+
script = '''
|
|
453
|
+
import json
|
|
454
|
+
from schema.output_contract import OUTPUT_CONTRACT
|
|
455
|
+
if hasattr(OUTPUT_CONTRACT, 'model_dump'):
|
|
456
|
+
print(json.dumps(OUTPUT_CONTRACT.model_dump(mode="json")))
|
|
457
|
+
else:
|
|
458
|
+
print(json.dumps(OUTPUT_CONTRACT.dict()))
|
|
459
|
+
'''
|
|
460
|
+
env = {"PYTHONPATH": str(workspace_or_path), "PYTHONUNBUFFERED": "1"}
|
|
461
|
+
result = subprocess.run(
|
|
462
|
+
[sys.executable, "-c", script],
|
|
463
|
+
cwd=workspace_or_path,
|
|
464
|
+
env={**dict(__import__("os").environ), **env},
|
|
465
|
+
capture_output=True,
|
|
466
|
+
text=True,
|
|
467
|
+
timeout=30,
|
|
468
|
+
)
|
|
469
|
+
if result.returncode == 0:
|
|
470
|
+
contract = json.loads(result.stdout)
|
|
471
|
+
else:
|
|
472
|
+
issues.append(
|
|
473
|
+
ValidationIssue(
|
|
474
|
+
severity=ValidationSeverity.ERROR,
|
|
475
|
+
code="OUTPUT_CONTRACT_NOT_FOUND",
|
|
476
|
+
message="output_contract.json not found and could not extract from workspace",
|
|
477
|
+
)
|
|
478
|
+
)
|
|
479
|
+
return ValidationResult(valid=False, issues=issues)
|
|
480
|
+
except Exception as e:
|
|
481
|
+
issues.append(
|
|
482
|
+
ValidationIssue(
|
|
483
|
+
severity=ValidationSeverity.ERROR,
|
|
484
|
+
code="OUTPUT_CONTRACT_LOAD_FAILED",
|
|
485
|
+
message=str(e),
|
|
486
|
+
)
|
|
487
|
+
)
|
|
488
|
+
return ValidationResult(valid=False, issues=issues)
|
|
489
|
+
else:
|
|
490
|
+
contract = json.loads(contract_path.read_text())
|
|
491
|
+
|
|
492
|
+
if "contract" not in dir():
|
|
493
|
+
contract = json.loads(contract_path.read_text())
|
|
494
|
+
|
|
495
|
+
# Validate contract structure
|
|
496
|
+
_validate_contract_structure(contract, issues)
|
|
497
|
+
|
|
498
|
+
has_errors = any(i.severity == ValidationSeverity.ERROR for i in issues)
|
|
499
|
+
return ValidationResult(valid=not has_errors, issues=issues)
|
|
500
|
+
|
|
501
|
+
|
|
502
|
+
def _validate_contract_structure(contract: dict[str, Any], issues: list[ValidationIssue]) -> None:
|
|
503
|
+
"""Validate OutputContract structure.
|
|
504
|
+
|
|
505
|
+
Args:
|
|
506
|
+
contract: Contract dict.
|
|
507
|
+
issues: List to append issues to.
|
|
508
|
+
"""
|
|
509
|
+
datasets = contract.get("datasets", [])
|
|
510
|
+
|
|
511
|
+
# Check dataset key uniqueness
|
|
512
|
+
dataset_keys = [ds.get("key") for ds in datasets if "key" in ds]
|
|
513
|
+
duplicates = [k for k in dataset_keys if dataset_keys.count(k) > 1]
|
|
514
|
+
if duplicates:
|
|
515
|
+
issues.append(
|
|
516
|
+
ValidationIssue(
|
|
517
|
+
severity=ValidationSeverity.ERROR,
|
|
518
|
+
code="OUTPUT_CONTRACT_DUPLICATE_DATASET_KEY",
|
|
519
|
+
message=f"Duplicate dataset keys: {set(duplicates)}",
|
|
520
|
+
)
|
|
521
|
+
)
|
|
522
|
+
|
|
523
|
+
allowed_kinds = {"frame", "object", "blob", "scalar"}
|
|
524
|
+
allowed_owners = {"stage", "well", "platform"}
|
|
525
|
+
allowed_cardinality = {"one", "many"}
|
|
526
|
+
allowed_roles = {"primary", "supporting", "debug"}
|
|
527
|
+
kind_schema_map = {
|
|
528
|
+
"frame": {"frame"},
|
|
529
|
+
"object": {"object"},
|
|
530
|
+
"blob": {"blob"},
|
|
531
|
+
"scalar": {"scalar"},
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
for ds in datasets:
|
|
535
|
+
ds_key = ds.get("key", "unknown")
|
|
536
|
+
kind = ds.get("kind")
|
|
537
|
+
owner = ds.get("owner")
|
|
538
|
+
cardinality = ds.get("cardinality")
|
|
539
|
+
role = ds.get("role")
|
|
540
|
+
schema = ds.get("schema") or {}
|
|
541
|
+
schema_type = schema.get("type")
|
|
542
|
+
|
|
543
|
+
if kind not in allowed_kinds:
|
|
544
|
+
issues.append(
|
|
545
|
+
ValidationIssue(
|
|
546
|
+
severity=ValidationSeverity.ERROR,
|
|
547
|
+
code="OUTPUT_CONTRACT_INVALID_KIND",
|
|
548
|
+
message=f"Invalid kind '{kind}' (expected one of {allowed_kinds})",
|
|
549
|
+
path=f"datasets.{ds_key}",
|
|
550
|
+
)
|
|
551
|
+
)
|
|
552
|
+
|
|
553
|
+
if owner not in allowed_owners:
|
|
554
|
+
issues.append(
|
|
555
|
+
ValidationIssue(
|
|
556
|
+
severity=ValidationSeverity.ERROR,
|
|
557
|
+
code="OUTPUT_CONTRACT_INVALID_OWNER",
|
|
558
|
+
message=f"Invalid owner '{owner}' (expected one of {allowed_owners})",
|
|
559
|
+
path=f"datasets.{ds_key}",
|
|
560
|
+
)
|
|
561
|
+
)
|
|
562
|
+
|
|
563
|
+
if cardinality not in allowed_cardinality:
|
|
564
|
+
issues.append(
|
|
565
|
+
ValidationIssue(
|
|
566
|
+
severity=ValidationSeverity.ERROR,
|
|
567
|
+
code="OUTPUT_CONTRACT_INVALID_CARDINALITY",
|
|
568
|
+
message=f"Invalid cardinality '{cardinality}' (expected one of {allowed_cardinality})",
|
|
569
|
+
path=f"datasets.{ds_key}",
|
|
570
|
+
)
|
|
571
|
+
)
|
|
572
|
+
|
|
573
|
+
if role and role not in allowed_roles:
|
|
574
|
+
issues.append(
|
|
575
|
+
ValidationIssue(
|
|
576
|
+
severity=ValidationSeverity.ERROR,
|
|
577
|
+
code="OUTPUT_CONTRACT_INVALID_ROLE",
|
|
578
|
+
message=f"Invalid role '{role}' (expected one of {allowed_roles})",
|
|
579
|
+
path=f"datasets.{ds_key}",
|
|
580
|
+
)
|
|
581
|
+
)
|
|
582
|
+
|
|
583
|
+
if kind and schema_type and schema_type not in kind_schema_map.get(kind, set()):
|
|
584
|
+
issues.append(
|
|
585
|
+
ValidationIssue(
|
|
586
|
+
severity=ValidationSeverity.ERROR,
|
|
587
|
+
code="OUTPUT_CONTRACT_KIND_SCHEMA_MISMATCH",
|
|
588
|
+
message=f"Schema type '{schema_type}' incompatible with kind '{kind}'",
|
|
589
|
+
path=f"datasets.{ds_key}.schema",
|
|
590
|
+
)
|
|
591
|
+
)
|
|
592
|
+
|
|
593
|
+
dimensions = ds.get("dimensions") or []
|
|
594
|
+
if not isinstance(dimensions, list):
|
|
595
|
+
issues.append(
|
|
596
|
+
ValidationIssue(
|
|
597
|
+
severity=ValidationSeverity.ERROR,
|
|
598
|
+
code="OUTPUT_CONTRACT_DIMENSIONS_NOT_LIST",
|
|
599
|
+
message="dimensions must be a list of strings",
|
|
600
|
+
path=f"datasets.{ds_key}.dimensions",
|
|
601
|
+
)
|
|
602
|
+
)
|
|
603
|
+
else:
|
|
604
|
+
dim_duplicates = [d for d in dimensions if dimensions.count(d) > 1]
|
|
605
|
+
if dim_duplicates:
|
|
606
|
+
issues.append(
|
|
607
|
+
ValidationIssue(
|
|
608
|
+
severity=ValidationSeverity.ERROR,
|
|
609
|
+
code="OUTPUT_CONTRACT_DUPLICATE_DIMENSION",
|
|
610
|
+
message=f"Duplicate dimensions: {set(dim_duplicates)}",
|
|
611
|
+
path=f"datasets.{ds_key}.dimensions",
|
|
612
|
+
)
|
|
613
|
+
)
|
|
614
|
+
|
|
615
|
+
|
|
616
|
+
# =============================================================================
|
|
617
|
+
# Bundle Validation
|
|
618
|
+
# =============================================================================
|
|
619
|
+
|
|
620
|
+
|
|
621
|
+
def validate_bundle(bundle_path: Path) -> ValidationResult:
|
|
622
|
+
"""Validate bundle hash integrity.
|
|
623
|
+
|
|
624
|
+
Checks:
|
|
625
|
+
- manifest.json exists and is valid
|
|
626
|
+
- ds.json hash matches manifest.specFiles.dsSha256
|
|
627
|
+
- drs.json hash matches manifest.specFiles.drsSha256
|
|
628
|
+
|
|
629
|
+
Args:
|
|
630
|
+
bundle_path: Path to bundle directory.
|
|
631
|
+
|
|
632
|
+
Returns:
|
|
633
|
+
ValidationResult with issues found.
|
|
634
|
+
"""
|
|
635
|
+
bundle_path = Path(bundle_path).resolve()
|
|
636
|
+
issues: list[ValidationIssue] = []
|
|
637
|
+
|
|
638
|
+
if not bundle_path.is_dir():
|
|
639
|
+
issues.append(
|
|
640
|
+
ValidationIssue(
|
|
641
|
+
severity=ValidationSeverity.ERROR,
|
|
642
|
+
code="BUNDLE_NOT_FOUND",
|
|
643
|
+
message=f"Bundle directory not found: {bundle_path}",
|
|
644
|
+
)
|
|
645
|
+
)
|
|
646
|
+
return ValidationResult(valid=False, issues=issues)
|
|
647
|
+
|
|
648
|
+
# Check required files
|
|
649
|
+
manifest_path = bundle_path / "manifest.json"
|
|
650
|
+
ds_path = bundle_path / "ds.json"
|
|
651
|
+
drs_path = bundle_path / "drs.json"
|
|
652
|
+
|
|
653
|
+
if not manifest_path.exists():
|
|
654
|
+
issues.append(
|
|
655
|
+
ValidationIssue(
|
|
656
|
+
severity=ValidationSeverity.ERROR,
|
|
657
|
+
code="BUNDLE_MANIFEST_NOT_FOUND",
|
|
658
|
+
message="manifest.json not found",
|
|
659
|
+
)
|
|
660
|
+
)
|
|
661
|
+
return ValidationResult(valid=False, issues=issues)
|
|
662
|
+
|
|
663
|
+
try:
|
|
664
|
+
manifest = json.loads(manifest_path.read_text())
|
|
665
|
+
except json.JSONDecodeError as e:
|
|
666
|
+
issues.append(
|
|
667
|
+
ValidationIssue(
|
|
668
|
+
severity=ValidationSeverity.ERROR,
|
|
669
|
+
code="BUNDLE_MANIFEST_INVALID_JSON",
|
|
670
|
+
message=f"Invalid manifest.json: {e}",
|
|
671
|
+
)
|
|
672
|
+
)
|
|
673
|
+
return ValidationResult(valid=False, issues=issues)
|
|
674
|
+
|
|
675
|
+
spec_files = manifest.get("specFiles", {})
|
|
676
|
+
|
|
677
|
+
# Validate ds.json hash
|
|
678
|
+
if ds_path.exists():
|
|
679
|
+
expected_hash = spec_files.get("dsSha256")
|
|
680
|
+
if expected_hash:
|
|
681
|
+
actual_hash = hashlib.sha256(ds_path.read_bytes()).hexdigest()
|
|
682
|
+
if actual_hash != expected_hash:
|
|
683
|
+
issues.append(
|
|
684
|
+
ValidationIssue(
|
|
685
|
+
severity=ValidationSeverity.ERROR,
|
|
686
|
+
code="BUNDLE_DS_HASH_MISMATCH",
|
|
687
|
+
message=f"ds.json hash mismatch: expected {expected_hash[:16]}..., got {actual_hash[:16]}...",
|
|
688
|
+
details={"expected": expected_hash, "actual": actual_hash},
|
|
689
|
+
)
|
|
690
|
+
)
|
|
691
|
+
else:
|
|
692
|
+
issues.append(
|
|
693
|
+
ValidationIssue(
|
|
694
|
+
severity=ValidationSeverity.ERROR,
|
|
695
|
+
code="BUNDLE_DS_NOT_FOUND",
|
|
696
|
+
message="ds.json not found",
|
|
697
|
+
)
|
|
698
|
+
)
|
|
699
|
+
|
|
700
|
+
# Validate drs.json hash
|
|
701
|
+
if drs_path.exists():
|
|
702
|
+
expected_hash = spec_files.get("drsSha256")
|
|
703
|
+
if expected_hash:
|
|
704
|
+
actual_hash = hashlib.sha256(drs_path.read_bytes()).hexdigest()
|
|
705
|
+
if actual_hash != expected_hash:
|
|
706
|
+
issues.append(
|
|
707
|
+
ValidationIssue(
|
|
708
|
+
severity=ValidationSeverity.ERROR,
|
|
709
|
+
code="BUNDLE_DRS_HASH_MISMATCH",
|
|
710
|
+
message=f"drs.json hash mismatch: expected {expected_hash[:16]}..., got {actual_hash[:16]}...",
|
|
711
|
+
details={"expected": expected_hash, "actual": actual_hash},
|
|
712
|
+
)
|
|
713
|
+
)
|
|
714
|
+
else:
|
|
715
|
+
issues.append(
|
|
716
|
+
ValidationIssue(
|
|
717
|
+
severity=ValidationSeverity.ERROR,
|
|
718
|
+
code="BUNDLE_DRS_NOT_FOUND",
|
|
719
|
+
message="drs.json not found",
|
|
720
|
+
)
|
|
721
|
+
)
|
|
722
|
+
|
|
723
|
+
has_errors = any(i.severity == ValidationSeverity.ERROR for i in issues)
|
|
724
|
+
return ValidationResult(valid=not has_errors, issues=issues)
|
|
725
|
+
|
|
726
|
+
|
|
727
|
+
# =============================================================================
|
|
728
|
+
# RunManifest vs OutputContract Validation
|
|
729
|
+
# =============================================================================
|
|
730
|
+
|
|
731
|
+
|
|
732
|
+
def validate_run_manifest(
|
|
733
|
+
manifest_path: Path,
|
|
734
|
+
contract_path: Path | None = None,
|
|
735
|
+
) -> ValidationResult:
|
|
736
|
+
"""Validate run output manifest against OutputContract.
|
|
737
|
+
|
|
738
|
+
Checks:
|
|
739
|
+
- Manifest structure is valid
|
|
740
|
+
- All contract datasets are present in manifest
|
|
741
|
+
- All contract items are present in manifest datasets
|
|
742
|
+
- All contract artifacts are present in manifest items
|
|
743
|
+
- kind/schema/mime consistency
|
|
744
|
+
- dimensions key sets match
|
|
745
|
+
|
|
746
|
+
Args:
|
|
747
|
+
manifest_path: Path to output manifest.json.
|
|
748
|
+
contract_path: Path to output_contract.json (optional).
|
|
749
|
+
|
|
750
|
+
Returns:
|
|
751
|
+
ValidationResult with issues found.
|
|
752
|
+
"""
|
|
753
|
+
manifest_path = Path(manifest_path).resolve()
|
|
754
|
+
issues: list[ValidationIssue] = []
|
|
755
|
+
|
|
756
|
+
if not manifest_path.exists():
|
|
757
|
+
issues.append(
|
|
758
|
+
ValidationIssue(
|
|
759
|
+
severity=ValidationSeverity.ERROR,
|
|
760
|
+
code="MANIFEST_NOT_FOUND",
|
|
761
|
+
message=f"Manifest not found: {manifest_path}",
|
|
762
|
+
)
|
|
763
|
+
)
|
|
764
|
+
return ValidationResult(valid=False, issues=issues)
|
|
765
|
+
|
|
766
|
+
try:
|
|
767
|
+
manifest = json.loads(manifest_path.read_text())
|
|
768
|
+
except json.JSONDecodeError as e:
|
|
769
|
+
issues.append(
|
|
770
|
+
ValidationIssue(
|
|
771
|
+
severity=ValidationSeverity.ERROR,
|
|
772
|
+
code="MANIFEST_INVALID_JSON",
|
|
773
|
+
message=f"Invalid manifest JSON: {e}",
|
|
774
|
+
)
|
|
775
|
+
)
|
|
776
|
+
return ValidationResult(valid=False, issues=issues)
|
|
777
|
+
|
|
778
|
+
# If no contract provided, just validate manifest structure
|
|
779
|
+
if contract_path is None:
|
|
780
|
+
has_errors = any(i.severity == ValidationSeverity.ERROR for i in issues)
|
|
781
|
+
return ValidationResult(valid=not has_errors, issues=issues)
|
|
782
|
+
|
|
783
|
+
contract_path = Path(contract_path).resolve()
|
|
784
|
+
if not contract_path.exists():
|
|
785
|
+
issues.append(
|
|
786
|
+
ValidationIssue(
|
|
787
|
+
severity=ValidationSeverity.ERROR,
|
|
788
|
+
code="CONTRACT_NOT_FOUND",
|
|
789
|
+
message=f"Contract not found: {contract_path}",
|
|
790
|
+
)
|
|
791
|
+
)
|
|
792
|
+
return ValidationResult(valid=False, issues=issues)
|
|
793
|
+
|
|
794
|
+
try:
|
|
795
|
+
contract = json.loads(contract_path.read_text())
|
|
796
|
+
except json.JSONDecodeError as e:
|
|
797
|
+
issues.append(
|
|
798
|
+
ValidationIssue(
|
|
799
|
+
severity=ValidationSeverity.ERROR,
|
|
800
|
+
code="CONTRACT_INVALID_JSON",
|
|
801
|
+
message=f"Invalid contract JSON: {e}",
|
|
802
|
+
)
|
|
803
|
+
)
|
|
804
|
+
return ValidationResult(valid=False, issues=issues)
|
|
805
|
+
|
|
806
|
+
# Align manifest against contract
|
|
807
|
+
_validate_manifest_against_contract(manifest, contract, issues)
|
|
808
|
+
|
|
809
|
+
has_errors = any(i.severity == ValidationSeverity.ERROR for i in issues)
|
|
810
|
+
return ValidationResult(valid=not has_errors, issues=issues)
|
|
811
|
+
|
|
812
|
+
|
|
813
|
+
def _validate_manifest_against_contract(
|
|
814
|
+
manifest: dict[str, Any],
|
|
815
|
+
contract: dict[str, Any],
|
|
816
|
+
issues: list[ValidationIssue],
|
|
817
|
+
) -> None:
|
|
818
|
+
"""Validate manifest against contract.
|
|
819
|
+
|
|
820
|
+
Args:
|
|
821
|
+
manifest: Run output manifest.
|
|
822
|
+
contract: Output contract.
|
|
823
|
+
issues: List to append issues to.
|
|
824
|
+
"""
|
|
825
|
+
contract_datasets = {ds["key"]: ds for ds in contract.get("datasets", []) if "key" in ds}
|
|
826
|
+
manifest_datasets = {
|
|
827
|
+
ds.get("datasetKey") or ds.get("key"): ds for ds in manifest.get("datasets", [])
|
|
828
|
+
}
|
|
829
|
+
|
|
830
|
+
# Check all contract datasets are in manifest (if required)
|
|
831
|
+
for ds_key, contract_ds in contract_datasets.items():
|
|
832
|
+
if ds_key not in manifest_datasets:
|
|
833
|
+
if contract_ds.get("required", True):
|
|
834
|
+
issues.append(
|
|
835
|
+
ValidationIssue(
|
|
836
|
+
severity=ValidationSeverity.ERROR,
|
|
837
|
+
code="MANIFEST_MISSING_DATASET",
|
|
838
|
+
message=f"Contract dataset '{ds_key}' not found in manifest",
|
|
839
|
+
path=f"datasets.{ds_key}",
|
|
840
|
+
)
|
|
841
|
+
)
|
|
842
|
+
continue
|
|
843
|
+
|
|
844
|
+
manifest_ds = manifest_datasets[ds_key]
|
|
845
|
+
_validate_dataset_against_contract(manifest_ds, contract_ds, ds_key, issues)
|
|
846
|
+
|
|
847
|
+
|
|
848
|
+
def _validate_dataset_against_contract(
|
|
849
|
+
manifest_ds: dict[str, Any],
|
|
850
|
+
contract_ds: dict[str, Any],
|
|
851
|
+
ds_key: str,
|
|
852
|
+
issues: list[ValidationIssue],
|
|
853
|
+
) -> None:
|
|
854
|
+
"""Validate a single dataset against contract.
|
|
855
|
+
|
|
856
|
+
Args:
|
|
857
|
+
manifest_ds: Manifest dataset.
|
|
858
|
+
contract_ds: Contract dataset.
|
|
859
|
+
ds_key: Dataset key.
|
|
860
|
+
issues: List to append issues to.
|
|
861
|
+
"""
|
|
862
|
+
manifest_items = manifest_ds.get("items", [])
|
|
863
|
+
required = contract_ds.get("required", True)
|
|
864
|
+
cardinality = contract_ds.get("cardinality", "many")
|
|
865
|
+
|
|
866
|
+
if cardinality == "one":
|
|
867
|
+
if required and len(manifest_items) != 1:
|
|
868
|
+
issues.append(
|
|
869
|
+
ValidationIssue(
|
|
870
|
+
severity=ValidationSeverity.ERROR,
|
|
871
|
+
code="MANIFEST_CARDINALITY_ONE",
|
|
872
|
+
message="Cardinality 'one' dataset must have exactly one item when required",
|
|
873
|
+
path=f"datasets.{ds_key}",
|
|
874
|
+
)
|
|
875
|
+
)
|
|
876
|
+
if not required and len(manifest_items) > 1:
|
|
877
|
+
issues.append(
|
|
878
|
+
ValidationIssue(
|
|
879
|
+
severity=ValidationSeverity.ERROR,
|
|
880
|
+
code="MANIFEST_CARDINALITY_ONE_OPTIONAL",
|
|
881
|
+
message="Cardinality 'one' optional dataset may have at most one item",
|
|
882
|
+
path=f"datasets.{ds_key}",
|
|
883
|
+
)
|
|
884
|
+
)
|
|
885
|
+
elif cardinality == "many":
|
|
886
|
+
if required and len(manifest_items) < 1:
|
|
887
|
+
issues.append(
|
|
888
|
+
ValidationIssue(
|
|
889
|
+
severity=ValidationSeverity.ERROR,
|
|
890
|
+
code="MANIFEST_CARDINALITY_MANY",
|
|
891
|
+
message="Cardinality 'many' required dataset must have at least one item",
|
|
892
|
+
path=f"datasets.{ds_key}",
|
|
893
|
+
)
|
|
894
|
+
)
|
|
895
|
+
|
|
896
|
+
for idx, manifest_item in enumerate(manifest_items):
|
|
897
|
+
_validate_item_against_contract(manifest_item, contract_ds, ds_key, f"item[{idx}]", issues)
|
|
898
|
+
|
|
899
|
+
|
|
900
|
+
def _validate_item_against_contract(
|
|
901
|
+
manifest_item: dict[str, Any],
|
|
902
|
+
contract_ds: dict[str, Any],
|
|
903
|
+
ds_key: str,
|
|
904
|
+
item_label: str,
|
|
905
|
+
issues: list[ValidationIssue],
|
|
906
|
+
) -> None:
|
|
907
|
+
"""Validate a single item against contract.
|
|
908
|
+
|
|
909
|
+
Args:
|
|
910
|
+
manifest_item: Manifest item.
|
|
911
|
+
contract_ds: Contract dataset.
|
|
912
|
+
ds_key: Dataset key.
|
|
913
|
+
item_label: Item label/index for errors.
|
|
914
|
+
issues: List to append issues to.
|
|
915
|
+
"""
|
|
916
|
+
path = f"datasets.{ds_key}.items.{item_label}"
|
|
917
|
+
|
|
918
|
+
# Owner check
|
|
919
|
+
expected_owner = contract_ds.get("owner")
|
|
920
|
+
owner = manifest_item.get("owner", {})
|
|
921
|
+
owner_ok = True
|
|
922
|
+
if expected_owner == "stage":
|
|
923
|
+
owner_ok = bool(owner.get("stageId"))
|
|
924
|
+
elif expected_owner == "well":
|
|
925
|
+
owner_ok = bool(owner.get("wellId"))
|
|
926
|
+
elif expected_owner == "platform":
|
|
927
|
+
owner_ok = bool(owner.get("platformId"))
|
|
928
|
+
|
|
929
|
+
if expected_owner and not owner_ok:
|
|
930
|
+
issues.append(
|
|
931
|
+
ValidationIssue(
|
|
932
|
+
severity=ValidationSeverity.ERROR,
|
|
933
|
+
code="MANIFEST_MISSING_OWNER",
|
|
934
|
+
message=f"Owner '{expected_owner}Id' required for dataset '{ds_key}'",
|
|
935
|
+
path=path,
|
|
936
|
+
)
|
|
937
|
+
)
|
|
938
|
+
|
|
939
|
+
# Dimensions check
|
|
940
|
+
contract_dims = set(contract_ds.get("dimensions", []) or [])
|
|
941
|
+
manifest_dims = set((manifest_item.get("dims") or {}).keys())
|
|
942
|
+
if contract_dims and manifest_dims != contract_dims:
|
|
943
|
+
missing = contract_dims - manifest_dims
|
|
944
|
+
extra = manifest_dims - contract_dims
|
|
945
|
+
if missing:
|
|
946
|
+
issues.append(
|
|
947
|
+
ValidationIssue(
|
|
948
|
+
severity=ValidationSeverity.ERROR,
|
|
949
|
+
code="MANIFEST_MISSING_DIMENSIONS",
|
|
950
|
+
message=f"Missing dimensions: {missing}",
|
|
951
|
+
path=path,
|
|
952
|
+
)
|
|
953
|
+
)
|
|
954
|
+
if extra:
|
|
955
|
+
issues.append(
|
|
956
|
+
ValidationIssue(
|
|
957
|
+
severity=ValidationSeverity.WARNING,
|
|
958
|
+
code="MANIFEST_EXTRA_DIMENSIONS",
|
|
959
|
+
message=f"Extra dimensions not in contract: {extra}",
|
|
960
|
+
path=path,
|
|
961
|
+
)
|
|
962
|
+
)
|
|
963
|
+
|
|
964
|
+
# Ensure dimension values are non-empty when present
|
|
965
|
+
dims_dict = manifest_item.get("dims") or {}
|
|
966
|
+
for dim_key in contract_dims:
|
|
967
|
+
if dim_key in dims_dict:
|
|
968
|
+
if dims_dict[dim_key] in (None, ""):
|
|
969
|
+
issues.append(
|
|
970
|
+
ValidationIssue(
|
|
971
|
+
severity=ValidationSeverity.ERROR,
|
|
972
|
+
code="MANIFEST_DIMENSION_EMPTY",
|
|
973
|
+
message=f"Dimension '{dim_key}' must have a non-empty value",
|
|
974
|
+
path=f"{path}.dims.{dim_key}",
|
|
975
|
+
)
|
|
976
|
+
)
|
|
977
|
+
|
|
978
|
+
# Artifact check
|
|
979
|
+
artifact = manifest_item.get("artifact")
|
|
980
|
+
if artifact is None:
|
|
981
|
+
issues.append(
|
|
982
|
+
ValidationIssue(
|
|
983
|
+
severity=ValidationSeverity.ERROR,
|
|
984
|
+
code="MANIFEST_MISSING_ARTIFACT",
|
|
985
|
+
message="Item missing artifact",
|
|
986
|
+
path=path,
|
|
987
|
+
)
|
|
988
|
+
)
|
|
989
|
+
return
|
|
990
|
+
|
|
991
|
+
art_key = artifact.get("artifactKey") or artifact.get("key")
|
|
992
|
+
art_type = artifact.get("type")
|
|
993
|
+
if not art_key:
|
|
994
|
+
issues.append(
|
|
995
|
+
ValidationIssue(
|
|
996
|
+
severity=ValidationSeverity.ERROR,
|
|
997
|
+
code="MANIFEST_ARTIFACT_NO_KEY",
|
|
998
|
+
message="Artifact missing artifactKey",
|
|
999
|
+
path=path,
|
|
1000
|
+
)
|
|
1001
|
+
)
|
|
1002
|
+
|
|
1003
|
+
kind_to_types = {
|
|
1004
|
+
"scalar": {"scalar"},
|
|
1005
|
+
"blob": {"blob"},
|
|
1006
|
+
"object": {"json", "object"},
|
|
1007
|
+
"frame": {"json", "parquet"},
|
|
1008
|
+
}
|
|
1009
|
+
expected_types = kind_to_types.get(contract_ds.get("kind"), set())
|
|
1010
|
+
if expected_types and art_type not in expected_types:
|
|
1011
|
+
issues.append(
|
|
1012
|
+
ValidationIssue(
|
|
1013
|
+
severity=ValidationSeverity.ERROR,
|
|
1014
|
+
code="MANIFEST_KIND_MISMATCH",
|
|
1015
|
+
message=f"Artifact type '{art_type}' incompatible with contract kind '{contract_ds.get('kind')}'",
|
|
1016
|
+
path=path,
|
|
1017
|
+
)
|
|
1018
|
+
)
|
|
1019
|
+
|
|
1020
|
+
# For blob kind, check mime/ext consistency if provided
|
|
1021
|
+
if contract_ds.get("kind") == "blob":
|
|
1022
|
+
contract_schema = contract_ds.get("schema") or {}
|
|
1023
|
+
contract_mime = contract_schema.get("mime")
|
|
1024
|
+
if contract_mime and artifact.get("mimeType") and artifact.get("mimeType") != contract_mime:
|
|
1025
|
+
issues.append(
|
|
1026
|
+
ValidationIssue(
|
|
1027
|
+
severity=ValidationSeverity.ERROR,
|
|
1028
|
+
code="MANIFEST_BLOB_MIME_MISMATCH",
|
|
1029
|
+
message=f"Artifact mimeType '{artifact.get('mimeType')}' does not match contract '{contract_mime}'",
|
|
1030
|
+
path=path,
|
|
1031
|
+
)
|
|
1032
|
+
)
|
|
1033
|
+
|
|
1034
|
+
|
|
1035
|
+
__all__ = [
|
|
1036
|
+
"ValidationSeverity",
|
|
1037
|
+
"ValidationIssue",
|
|
1038
|
+
"ValidationResult",
|
|
1039
|
+
"validate_inputspec",
|
|
1040
|
+
"validate_output_contract",
|
|
1041
|
+
"validate_bundle",
|
|
1042
|
+
"validate_run_manifest",
|
|
1043
|
+
]
|