fraclab-sdk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. README.md +1601 -0
  2. fraclab_sdk/__init__.py +34 -0
  3. fraclab_sdk/algorithm/__init__.py +13 -0
  4. fraclab_sdk/algorithm/export.py +1 -0
  5. fraclab_sdk/algorithm/library.py +378 -0
  6. fraclab_sdk/cli.py +381 -0
  7. fraclab_sdk/config.py +54 -0
  8. fraclab_sdk/devkit/__init__.py +25 -0
  9. fraclab_sdk/devkit/compile.py +342 -0
  10. fraclab_sdk/devkit/export.py +354 -0
  11. fraclab_sdk/devkit/validate.py +1043 -0
  12. fraclab_sdk/errors.py +124 -0
  13. fraclab_sdk/materialize/__init__.py +8 -0
  14. fraclab_sdk/materialize/fsops.py +125 -0
  15. fraclab_sdk/materialize/hash.py +28 -0
  16. fraclab_sdk/materialize/materializer.py +241 -0
  17. fraclab_sdk/models/__init__.py +52 -0
  18. fraclab_sdk/models/bundle_manifest.py +51 -0
  19. fraclab_sdk/models/dataspec.py +65 -0
  20. fraclab_sdk/models/drs.py +47 -0
  21. fraclab_sdk/models/output_contract.py +111 -0
  22. fraclab_sdk/models/run_output_manifest.py +119 -0
  23. fraclab_sdk/results/__init__.py +25 -0
  24. fraclab_sdk/results/preview.py +150 -0
  25. fraclab_sdk/results/reader.py +329 -0
  26. fraclab_sdk/run/__init__.py +10 -0
  27. fraclab_sdk/run/logs.py +42 -0
  28. fraclab_sdk/run/manager.py +403 -0
  29. fraclab_sdk/run/subprocess_runner.py +153 -0
  30. fraclab_sdk/runtime/__init__.py +11 -0
  31. fraclab_sdk/runtime/artifacts.py +303 -0
  32. fraclab_sdk/runtime/data_client.py +123 -0
  33. fraclab_sdk/runtime/runner_main.py +286 -0
  34. fraclab_sdk/runtime/snapshot_provider.py +1 -0
  35. fraclab_sdk/selection/__init__.py +11 -0
  36. fraclab_sdk/selection/model.py +247 -0
  37. fraclab_sdk/selection/validate.py +54 -0
  38. fraclab_sdk/snapshot/__init__.py +12 -0
  39. fraclab_sdk/snapshot/index.py +94 -0
  40. fraclab_sdk/snapshot/library.py +205 -0
  41. fraclab_sdk/snapshot/loader.py +217 -0
  42. fraclab_sdk/specs/manifest.py +89 -0
  43. fraclab_sdk/utils/io.py +32 -0
  44. fraclab_sdk-0.1.0.dist-info/METADATA +1622 -0
  45. fraclab_sdk-0.1.0.dist-info/RECORD +47 -0
  46. fraclab_sdk-0.1.0.dist-info/WHEEL +4 -0
  47. fraclab_sdk-0.1.0.dist-info/entry_points.txt +4 -0
@@ -0,0 +1,1043 @@
1
+ """Validation tools for InputSpec, OutputContract, and run manifests.
2
+
3
+ Provides:
4
+ - InputSpec linting (json_schema_extra validation, show_when structure)
5
+ - OutputContract validation (structure, key uniqueness)
6
+ - Bundle validation (hash integrity)
7
+ - RunManifest vs OutputContract alignment validation
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import hashlib
13
+ import json
14
+ import subprocess
15
+ import sys
16
+ from dataclasses import dataclass, field
17
+ from enum import Enum
18
+ from pathlib import Path
19
+ from typing import Any
20
+
21
+ from fraclab_sdk.errors import AlgorithmError
22
+
23
+
24
+ class ValidationSeverity(Enum):
25
+ """Severity level for validation issues."""
26
+
27
+ ERROR = "error"
28
+ WARNING = "warning"
29
+ INFO = "info"
30
+
31
+
32
+ @dataclass
33
+ class ValidationIssue:
34
+ """A single validation issue."""
35
+
36
+ severity: ValidationSeverity
37
+ code: str
38
+ message: str
39
+ path: str | None = None
40
+ details: dict[str, Any] = field(default_factory=dict)
41
+
42
+
43
+ @dataclass
44
+ class ValidationResult:
45
+ """Result of validation."""
46
+
47
+ valid: bool
48
+ issues: list[ValidationIssue] = field(default_factory=list)
49
+
50
+ @property
51
+ def errors(self) -> list[ValidationIssue]:
52
+ """Get error-level issues."""
53
+ return [i for i in self.issues if i.severity == ValidationSeverity.ERROR]
54
+
55
+ @property
56
+ def warnings(self) -> list[ValidationIssue]:
57
+ """Get warning-level issues."""
58
+ return [i for i in self.issues if i.severity == ValidationSeverity.WARNING]
59
+
60
+
61
+ # =============================================================================
62
+ # InputSpec Validation
63
+ # =============================================================================
64
+
65
+ # Valid show_when operators
66
+ VALID_SHOW_WHEN_OPS = {"eq", "neq", "in", "nin", "gt", "gte", "lt", "lte", "exists"}
67
+
68
+
69
+ def _validate_show_when_condition(
70
+ condition: dict[str, Any], schema: dict[str, Any], path: str, issues: list[ValidationIssue]
71
+ ) -> None:
72
+ """Validate a single show_when condition.
73
+
74
+ Args:
75
+ condition: The condition dict {field, op, value}.
76
+ schema: The full JSON schema for field lookup.
77
+ path: Current path for error reporting.
78
+ issues: List to append issues to.
79
+ """
80
+ if not isinstance(condition, dict):
81
+ issues.append(
82
+ ValidationIssue(
83
+ severity=ValidationSeverity.ERROR,
84
+ code="SHOW_WHEN_INVALID_CONDITION",
85
+ message="show_when condition must be a dict",
86
+ path=path,
87
+ )
88
+ )
89
+ return
90
+
91
+ # Check required keys
92
+ if "field" not in condition:
93
+ issues.append(
94
+ ValidationIssue(
95
+ severity=ValidationSeverity.ERROR,
96
+ code="SHOW_WHEN_MISSING_FIELD",
97
+ message="show_when condition missing 'field' key",
98
+ path=path,
99
+ )
100
+ )
101
+ return
102
+
103
+ field_path = condition["field"]
104
+ op = condition.get("op", "eq")
105
+
106
+ # Validate operator
107
+ if op not in VALID_SHOW_WHEN_OPS:
108
+ issues.append(
109
+ ValidationIssue(
110
+ severity=ValidationSeverity.ERROR,
111
+ code="SHOW_WHEN_INVALID_OP",
112
+ message=f"Invalid show_when operator: {op}. Valid: {VALID_SHOW_WHEN_OPS}",
113
+ path=path,
114
+ )
115
+ )
116
+
117
+ # Validate field path exists in schema
118
+ if not _field_exists_in_schema(field_path, schema):
119
+ issues.append(
120
+ ValidationIssue(
121
+ severity=ValidationSeverity.ERROR,
122
+ code="SHOW_WHEN_FIELD_NOT_FOUND",
123
+ message=f"show_when references non-existent field: {field_path}",
124
+ path=path,
125
+ details={"field": field_path},
126
+ )
127
+ )
128
+
129
+
130
+ def _validate_show_when(
131
+ show_when: Any, schema: dict[str, Any], path: str, issues: list[ValidationIssue]
132
+ ) -> None:
133
+ """Validate show_when structure.
134
+
135
+ Supports:
136
+ - Single condition: {field, op, value}
137
+ - AND list: [{cond1}, {cond2}]
138
+ - OR object: {"or": [{cond1}, {cond2}]}
139
+
140
+ Args:
141
+ show_when: The show_when value.
142
+ schema: Full JSON schema for field lookup.
143
+ path: Current path for error reporting.
144
+ issues: List to append issues to.
145
+ """
146
+ if show_when is None:
147
+ return
148
+
149
+ if isinstance(show_when, dict):
150
+ if "or" in show_when:
151
+ # OR object
152
+ or_conditions = show_when["or"]
153
+ if not isinstance(or_conditions, list):
154
+ issues.append(
155
+ ValidationIssue(
156
+ severity=ValidationSeverity.ERROR,
157
+ code="SHOW_WHEN_INVALID_OR",
158
+ message="show_when 'or' must be a list",
159
+ path=path,
160
+ )
161
+ )
162
+ else:
163
+ for i, cond in enumerate(or_conditions):
164
+ _validate_show_when_condition(cond, schema, f"{path}.or[{i}]", issues)
165
+ elif "and" in show_when:
166
+ # AND object (explicit)
167
+ and_conditions = show_when["and"]
168
+ if not isinstance(and_conditions, list):
169
+ issues.append(
170
+ ValidationIssue(
171
+ severity=ValidationSeverity.ERROR,
172
+ code="SHOW_WHEN_INVALID_AND",
173
+ message="show_when 'and' must be a list",
174
+ path=path,
175
+ )
176
+ )
177
+ else:
178
+ for i, cond in enumerate(and_conditions):
179
+ _validate_show_when_condition(cond, schema, f"{path}.and[{i}]", issues)
180
+ else:
181
+ # Single condition
182
+ _validate_show_when_condition(show_when, schema, path, issues)
183
+
184
+ elif isinstance(show_when, list):
185
+ # Implicit AND list
186
+ for i, cond in enumerate(show_when):
187
+ _validate_show_when_condition(cond, schema, f"{path}[{i}]", issues)
188
+
189
+ else:
190
+ issues.append(
191
+ ValidationIssue(
192
+ severity=ValidationSeverity.ERROR,
193
+ code="SHOW_WHEN_INVALID_TYPE",
194
+ message=f"show_when must be dict or list, got {type(show_when).__name__}",
195
+ path=path,
196
+ )
197
+ )
198
+
199
+
200
+ def _field_exists_in_schema(field_path: str, schema: dict[str, Any]) -> bool:
201
+ """Check if a field path exists in a JSON schema.
202
+
203
+ Supports dot notation: "parent.child.field"
204
+
205
+ Args:
206
+ field_path: Dot-separated field path.
207
+ schema: JSON schema dict.
208
+
209
+ Returns:
210
+ True if field exists.
211
+ """
212
+ parts = field_path.split(".")
213
+ current = schema.get("properties", {})
214
+
215
+ for i, part in enumerate(parts):
216
+ if part not in current:
217
+ return False
218
+ prop = current[part]
219
+
220
+ # Last part - field exists
221
+ if i == len(parts) - 1:
222
+ return True
223
+
224
+ # Navigate into nested object
225
+ if prop.get("type") == "object":
226
+ current = prop.get("properties", {})
227
+ elif "$ref" in prop:
228
+ # Handle $ref - simplified, assumes $defs at root
229
+ ref = prop["$ref"]
230
+ if ref.startswith("#/$defs/"):
231
+ def_name = ref.split("/")[-1]
232
+ defs = schema.get("$defs", {})
233
+ if def_name in defs:
234
+ current = defs[def_name].get("properties", {})
235
+ else:
236
+ return False
237
+ else:
238
+ return False
239
+ else:
240
+ return False
241
+
242
+ return True
243
+
244
+
245
+ def _validate_enum_labels(
246
+ enum_labels: dict[str, str],
247
+ enum_values: list[Any] | None,
248
+ path: str,
249
+ issues: list[ValidationIssue],
250
+ ) -> None:
251
+ """Validate enum_labels keys match enum values.
252
+
253
+ Args:
254
+ enum_labels: The enum_labels dict.
255
+ enum_values: The enum values from schema (if available).
256
+ path: Current path for error reporting.
257
+ issues: List to append issues to.
258
+ """
259
+ if enum_values is None:
260
+ return
261
+
262
+ enum_values_str = {str(v) for v in enum_values}
263
+ for key in enum_labels:
264
+ if str(key) not in enum_values_str:
265
+ issues.append(
266
+ ValidationIssue(
267
+ severity=ValidationSeverity.WARNING,
268
+ code="ENUM_LABEL_UNKNOWN_VALUE",
269
+ message=f"enum_labels key '{key}' not in enum values: {enum_values}",
270
+ path=path,
271
+ )
272
+ )
273
+
274
+
275
+ def _extract_schema_from_workspace(workspace: Path) -> dict[str, Any]:
276
+ """Extract JSON schema from workspace InputSpec.
277
+
278
+ Args:
279
+ workspace: Algorithm workspace.
280
+
281
+ Returns:
282
+ JSON schema dict.
283
+ """
284
+ script = '''
285
+ import json
286
+ import sys
287
+
288
+ try:
289
+ from schema.inputspec import INPUT_SPEC
290
+ model = INPUT_SPEC
291
+ schema = model.model_json_schema()
292
+ print(json.dumps(schema))
293
+ except Exception as e:
294
+ print(json.dumps({"error": str(e)}))
295
+ '''
296
+
297
+ env = {"PYTHONPATH": str(workspace), "PYTHONUNBUFFERED": "1"}
298
+ result = subprocess.run(
299
+ [sys.executable, "-c", script],
300
+ cwd=workspace,
301
+ env={**dict(__import__("os").environ), **env},
302
+ capture_output=True,
303
+ text=True,
304
+ timeout=30,
305
+ )
306
+
307
+ if result.returncode != 0:
308
+ raise AlgorithmError(f"Failed to extract schema: {result.stderr}")
309
+
310
+ data = json.loads(result.stdout)
311
+ if "error" in data:
312
+ raise AlgorithmError(f"Failed to extract schema: {data['error']}")
313
+
314
+ return data
315
+
316
+
317
+ def validate_inputspec(workspace: Path) -> ValidationResult:
318
+ """Validate InputSpec (schema.inputspec:INPUT_SPEC, legacy CONFIG_MODEL).
319
+
320
+ Checks:
321
+ - Schema can be generated
322
+ - json_schema_extra fields are valid
323
+ - show_when conditions reference existing fields
324
+ - enum_labels keys match enum values
325
+
326
+ Args:
327
+ workspace: Algorithm workspace path.
328
+
329
+ Returns:
330
+ ValidationResult with issues found.
331
+ """
332
+ workspace = Path(workspace).resolve()
333
+ issues: list[ValidationIssue] = []
334
+
335
+ # Extract schema
336
+ try:
337
+ schema = _extract_schema_from_workspace(workspace)
338
+ except AlgorithmError as e:
339
+ issues.append(
340
+ ValidationIssue(
341
+ severity=ValidationSeverity.ERROR,
342
+ code="INPUTSPEC_LOAD_FAILED",
343
+ message=str(e),
344
+ )
345
+ )
346
+ return ValidationResult(valid=False, issues=issues)
347
+
348
+ # Validate properties
349
+ _validate_schema_properties(schema, schema, "", issues)
350
+
351
+ # Check for required fields
352
+ if "properties" not in schema:
353
+ issues.append(
354
+ ValidationIssue(
355
+ severity=ValidationSeverity.WARNING,
356
+ code="INPUTSPEC_NO_PROPERTIES",
357
+ message="Schema has no properties defined",
358
+ )
359
+ )
360
+
361
+ has_errors = any(i.severity == ValidationSeverity.ERROR for i in issues)
362
+ return ValidationResult(valid=not has_errors, issues=issues)
363
+
364
+
365
+ def _validate_schema_properties(
366
+ props_container: dict[str, Any],
367
+ full_schema: dict[str, Any],
368
+ path_prefix: str,
369
+ issues: list[ValidationIssue],
370
+ ) -> None:
371
+ """Recursively validate schema properties.
372
+
373
+ Args:
374
+ props_container: Dict containing 'properties' key.
375
+ full_schema: The full schema for field lookups.
376
+ path_prefix: Current path prefix for error reporting.
377
+ issues: List to append issues to.
378
+ """
379
+ properties = props_container.get("properties", {})
380
+
381
+ for field_name, field_schema in properties.items():
382
+ field_path = f"{path_prefix}.{field_name}" if path_prefix else field_name
383
+
384
+ # Check json_schema_extra (stored in various places depending on Pydantic version)
385
+ extra = (
386
+ field_schema.get("json_schema_extra")
387
+ or field_schema.get("extra")
388
+ or {}
389
+ )
390
+
391
+ # Validate show_when
392
+ if "show_when" in extra:
393
+ _validate_show_when(extra["show_when"], full_schema, f"{field_path}.show_when", issues)
394
+
395
+ # Validate enum_labels
396
+ if "enum_labels" in extra:
397
+ enum_values = field_schema.get("enum")
398
+ _validate_enum_labels(extra["enum_labels"], enum_values, f"{field_path}.enum_labels", issues)
399
+
400
+ # Recurse into nested objects
401
+ if field_schema.get("type") == "object":
402
+ _validate_schema_properties(field_schema, full_schema, field_path, issues)
403
+
404
+ # Handle allOf, anyOf, oneOf
405
+ for combiner in ["allOf", "anyOf", "oneOf"]:
406
+ if combiner in field_schema:
407
+ for i, sub_schema in enumerate(field_schema[combiner]):
408
+ _validate_schema_properties(
409
+ sub_schema, full_schema, f"{field_path}.{combiner}[{i}]", issues
410
+ )
411
+
412
+ # Handle $defs
413
+ if "$defs" in props_container:
414
+ for def_name, def_schema in props_container["$defs"].items():
415
+ _validate_schema_properties(
416
+ def_schema, full_schema, f"$defs.{def_name}", issues
417
+ )
418
+
419
+
420
+ # =============================================================================
421
+ # OutputContract Validation
422
+ # =============================================================================
423
+
424
+
425
+ def validate_output_contract(workspace_or_path: Path) -> ValidationResult:
426
+ """Validate OutputContract structure.
427
+
428
+ Checks:
429
+ - Contract can be loaded
430
+ - Dataset keys are unique
431
+ - Item keys are unique within datasets
432
+ - Artifact keys are unique within items
433
+ - kind matches schema.type
434
+
435
+ Args:
436
+ workspace_or_path: Workspace path or direct path to output_contract.json.
437
+
438
+ Returns:
439
+ ValidationResult with issues found.
440
+ """
441
+ workspace_or_path = Path(workspace_or_path).resolve()
442
+ issues: list[ValidationIssue] = []
443
+
444
+ # Find contract file
445
+ if workspace_or_path.is_file():
446
+ contract_path = workspace_or_path
447
+ else:
448
+ contract_path = workspace_or_path / "dist" / "output_contract.json"
449
+ if not contract_path.exists():
450
+ # Try extracting from workspace
451
+ try:
452
+ script = '''
453
+ import json
454
+ from schema.output_contract import OUTPUT_CONTRACT
455
+ if hasattr(OUTPUT_CONTRACT, 'model_dump'):
456
+ print(json.dumps(OUTPUT_CONTRACT.model_dump(mode="json")))
457
+ else:
458
+ print(json.dumps(OUTPUT_CONTRACT.dict()))
459
+ '''
460
+ env = {"PYTHONPATH": str(workspace_or_path), "PYTHONUNBUFFERED": "1"}
461
+ result = subprocess.run(
462
+ [sys.executable, "-c", script],
463
+ cwd=workspace_or_path,
464
+ env={**dict(__import__("os").environ), **env},
465
+ capture_output=True,
466
+ text=True,
467
+ timeout=30,
468
+ )
469
+ if result.returncode == 0:
470
+ contract = json.loads(result.stdout)
471
+ else:
472
+ issues.append(
473
+ ValidationIssue(
474
+ severity=ValidationSeverity.ERROR,
475
+ code="OUTPUT_CONTRACT_NOT_FOUND",
476
+ message="output_contract.json not found and could not extract from workspace",
477
+ )
478
+ )
479
+ return ValidationResult(valid=False, issues=issues)
480
+ except Exception as e:
481
+ issues.append(
482
+ ValidationIssue(
483
+ severity=ValidationSeverity.ERROR,
484
+ code="OUTPUT_CONTRACT_LOAD_FAILED",
485
+ message=str(e),
486
+ )
487
+ )
488
+ return ValidationResult(valid=False, issues=issues)
489
+ else:
490
+ contract = json.loads(contract_path.read_text())
491
+
492
+ if "contract" not in dir():
493
+ contract = json.loads(contract_path.read_text())
494
+
495
+ # Validate contract structure
496
+ _validate_contract_structure(contract, issues)
497
+
498
+ has_errors = any(i.severity == ValidationSeverity.ERROR for i in issues)
499
+ return ValidationResult(valid=not has_errors, issues=issues)
500
+
501
+
502
+ def _validate_contract_structure(contract: dict[str, Any], issues: list[ValidationIssue]) -> None:
503
+ """Validate OutputContract structure.
504
+
505
+ Args:
506
+ contract: Contract dict.
507
+ issues: List to append issues to.
508
+ """
509
+ datasets = contract.get("datasets", [])
510
+
511
+ # Check dataset key uniqueness
512
+ dataset_keys = [ds.get("key") for ds in datasets if "key" in ds]
513
+ duplicates = [k for k in dataset_keys if dataset_keys.count(k) > 1]
514
+ if duplicates:
515
+ issues.append(
516
+ ValidationIssue(
517
+ severity=ValidationSeverity.ERROR,
518
+ code="OUTPUT_CONTRACT_DUPLICATE_DATASET_KEY",
519
+ message=f"Duplicate dataset keys: {set(duplicates)}",
520
+ )
521
+ )
522
+
523
+ allowed_kinds = {"frame", "object", "blob", "scalar"}
524
+ allowed_owners = {"stage", "well", "platform"}
525
+ allowed_cardinality = {"one", "many"}
526
+ allowed_roles = {"primary", "supporting", "debug"}
527
+ kind_schema_map = {
528
+ "frame": {"frame"},
529
+ "object": {"object"},
530
+ "blob": {"blob"},
531
+ "scalar": {"scalar"},
532
+ }
533
+
534
+ for ds in datasets:
535
+ ds_key = ds.get("key", "unknown")
536
+ kind = ds.get("kind")
537
+ owner = ds.get("owner")
538
+ cardinality = ds.get("cardinality")
539
+ role = ds.get("role")
540
+ schema = ds.get("schema") or {}
541
+ schema_type = schema.get("type")
542
+
543
+ if kind not in allowed_kinds:
544
+ issues.append(
545
+ ValidationIssue(
546
+ severity=ValidationSeverity.ERROR,
547
+ code="OUTPUT_CONTRACT_INVALID_KIND",
548
+ message=f"Invalid kind '{kind}' (expected one of {allowed_kinds})",
549
+ path=f"datasets.{ds_key}",
550
+ )
551
+ )
552
+
553
+ if owner not in allowed_owners:
554
+ issues.append(
555
+ ValidationIssue(
556
+ severity=ValidationSeverity.ERROR,
557
+ code="OUTPUT_CONTRACT_INVALID_OWNER",
558
+ message=f"Invalid owner '{owner}' (expected one of {allowed_owners})",
559
+ path=f"datasets.{ds_key}",
560
+ )
561
+ )
562
+
563
+ if cardinality not in allowed_cardinality:
564
+ issues.append(
565
+ ValidationIssue(
566
+ severity=ValidationSeverity.ERROR,
567
+ code="OUTPUT_CONTRACT_INVALID_CARDINALITY",
568
+ message=f"Invalid cardinality '{cardinality}' (expected one of {allowed_cardinality})",
569
+ path=f"datasets.{ds_key}",
570
+ )
571
+ )
572
+
573
+ if role and role not in allowed_roles:
574
+ issues.append(
575
+ ValidationIssue(
576
+ severity=ValidationSeverity.ERROR,
577
+ code="OUTPUT_CONTRACT_INVALID_ROLE",
578
+ message=f"Invalid role '{role}' (expected one of {allowed_roles})",
579
+ path=f"datasets.{ds_key}",
580
+ )
581
+ )
582
+
583
+ if kind and schema_type and schema_type not in kind_schema_map.get(kind, set()):
584
+ issues.append(
585
+ ValidationIssue(
586
+ severity=ValidationSeverity.ERROR,
587
+ code="OUTPUT_CONTRACT_KIND_SCHEMA_MISMATCH",
588
+ message=f"Schema type '{schema_type}' incompatible with kind '{kind}'",
589
+ path=f"datasets.{ds_key}.schema",
590
+ )
591
+ )
592
+
593
+ dimensions = ds.get("dimensions") or []
594
+ if not isinstance(dimensions, list):
595
+ issues.append(
596
+ ValidationIssue(
597
+ severity=ValidationSeverity.ERROR,
598
+ code="OUTPUT_CONTRACT_DIMENSIONS_NOT_LIST",
599
+ message="dimensions must be a list of strings",
600
+ path=f"datasets.{ds_key}.dimensions",
601
+ )
602
+ )
603
+ else:
604
+ dim_duplicates = [d for d in dimensions if dimensions.count(d) > 1]
605
+ if dim_duplicates:
606
+ issues.append(
607
+ ValidationIssue(
608
+ severity=ValidationSeverity.ERROR,
609
+ code="OUTPUT_CONTRACT_DUPLICATE_DIMENSION",
610
+ message=f"Duplicate dimensions: {set(dim_duplicates)}",
611
+ path=f"datasets.{ds_key}.dimensions",
612
+ )
613
+ )
614
+
615
+
616
+ # =============================================================================
617
+ # Bundle Validation
618
+ # =============================================================================
619
+
620
+
621
+ def validate_bundle(bundle_path: Path) -> ValidationResult:
622
+ """Validate bundle hash integrity.
623
+
624
+ Checks:
625
+ - manifest.json exists and is valid
626
+ - ds.json hash matches manifest.specFiles.dsSha256
627
+ - drs.json hash matches manifest.specFiles.drsSha256
628
+
629
+ Args:
630
+ bundle_path: Path to bundle directory.
631
+
632
+ Returns:
633
+ ValidationResult with issues found.
634
+ """
635
+ bundle_path = Path(bundle_path).resolve()
636
+ issues: list[ValidationIssue] = []
637
+
638
+ if not bundle_path.is_dir():
639
+ issues.append(
640
+ ValidationIssue(
641
+ severity=ValidationSeverity.ERROR,
642
+ code="BUNDLE_NOT_FOUND",
643
+ message=f"Bundle directory not found: {bundle_path}",
644
+ )
645
+ )
646
+ return ValidationResult(valid=False, issues=issues)
647
+
648
+ # Check required files
649
+ manifest_path = bundle_path / "manifest.json"
650
+ ds_path = bundle_path / "ds.json"
651
+ drs_path = bundle_path / "drs.json"
652
+
653
+ if not manifest_path.exists():
654
+ issues.append(
655
+ ValidationIssue(
656
+ severity=ValidationSeverity.ERROR,
657
+ code="BUNDLE_MANIFEST_NOT_FOUND",
658
+ message="manifest.json not found",
659
+ )
660
+ )
661
+ return ValidationResult(valid=False, issues=issues)
662
+
663
+ try:
664
+ manifest = json.loads(manifest_path.read_text())
665
+ except json.JSONDecodeError as e:
666
+ issues.append(
667
+ ValidationIssue(
668
+ severity=ValidationSeverity.ERROR,
669
+ code="BUNDLE_MANIFEST_INVALID_JSON",
670
+ message=f"Invalid manifest.json: {e}",
671
+ )
672
+ )
673
+ return ValidationResult(valid=False, issues=issues)
674
+
675
+ spec_files = manifest.get("specFiles", {})
676
+
677
+ # Validate ds.json hash
678
+ if ds_path.exists():
679
+ expected_hash = spec_files.get("dsSha256")
680
+ if expected_hash:
681
+ actual_hash = hashlib.sha256(ds_path.read_bytes()).hexdigest()
682
+ if actual_hash != expected_hash:
683
+ issues.append(
684
+ ValidationIssue(
685
+ severity=ValidationSeverity.ERROR,
686
+ code="BUNDLE_DS_HASH_MISMATCH",
687
+ message=f"ds.json hash mismatch: expected {expected_hash[:16]}..., got {actual_hash[:16]}...",
688
+ details={"expected": expected_hash, "actual": actual_hash},
689
+ )
690
+ )
691
+ else:
692
+ issues.append(
693
+ ValidationIssue(
694
+ severity=ValidationSeverity.ERROR,
695
+ code="BUNDLE_DS_NOT_FOUND",
696
+ message="ds.json not found",
697
+ )
698
+ )
699
+
700
+ # Validate drs.json hash
701
+ if drs_path.exists():
702
+ expected_hash = spec_files.get("drsSha256")
703
+ if expected_hash:
704
+ actual_hash = hashlib.sha256(drs_path.read_bytes()).hexdigest()
705
+ if actual_hash != expected_hash:
706
+ issues.append(
707
+ ValidationIssue(
708
+ severity=ValidationSeverity.ERROR,
709
+ code="BUNDLE_DRS_HASH_MISMATCH",
710
+ message=f"drs.json hash mismatch: expected {expected_hash[:16]}..., got {actual_hash[:16]}...",
711
+ details={"expected": expected_hash, "actual": actual_hash},
712
+ )
713
+ )
714
+ else:
715
+ issues.append(
716
+ ValidationIssue(
717
+ severity=ValidationSeverity.ERROR,
718
+ code="BUNDLE_DRS_NOT_FOUND",
719
+ message="drs.json not found",
720
+ )
721
+ )
722
+
723
+ has_errors = any(i.severity == ValidationSeverity.ERROR for i in issues)
724
+ return ValidationResult(valid=not has_errors, issues=issues)
725
+
726
+
727
+ # =============================================================================
728
+ # RunManifest vs OutputContract Validation
729
+ # =============================================================================
730
+
731
+
732
+ def validate_run_manifest(
733
+ manifest_path: Path,
734
+ contract_path: Path | None = None,
735
+ ) -> ValidationResult:
736
+ """Validate run output manifest against OutputContract.
737
+
738
+ Checks:
739
+ - Manifest structure is valid
740
+ - All contract datasets are present in manifest
741
+ - All contract items are present in manifest datasets
742
+ - All contract artifacts are present in manifest items
743
+ - kind/schema/mime consistency
744
+ - dimensions key sets match
745
+
746
+ Args:
747
+ manifest_path: Path to output manifest.json.
748
+ contract_path: Path to output_contract.json (optional).
749
+
750
+ Returns:
751
+ ValidationResult with issues found.
752
+ """
753
+ manifest_path = Path(manifest_path).resolve()
754
+ issues: list[ValidationIssue] = []
755
+
756
+ if not manifest_path.exists():
757
+ issues.append(
758
+ ValidationIssue(
759
+ severity=ValidationSeverity.ERROR,
760
+ code="MANIFEST_NOT_FOUND",
761
+ message=f"Manifest not found: {manifest_path}",
762
+ )
763
+ )
764
+ return ValidationResult(valid=False, issues=issues)
765
+
766
+ try:
767
+ manifest = json.loads(manifest_path.read_text())
768
+ except json.JSONDecodeError as e:
769
+ issues.append(
770
+ ValidationIssue(
771
+ severity=ValidationSeverity.ERROR,
772
+ code="MANIFEST_INVALID_JSON",
773
+ message=f"Invalid manifest JSON: {e}",
774
+ )
775
+ )
776
+ return ValidationResult(valid=False, issues=issues)
777
+
778
+ # If no contract provided, just validate manifest structure
779
+ if contract_path is None:
780
+ has_errors = any(i.severity == ValidationSeverity.ERROR for i in issues)
781
+ return ValidationResult(valid=not has_errors, issues=issues)
782
+
783
+ contract_path = Path(contract_path).resolve()
784
+ if not contract_path.exists():
785
+ issues.append(
786
+ ValidationIssue(
787
+ severity=ValidationSeverity.ERROR,
788
+ code="CONTRACT_NOT_FOUND",
789
+ message=f"Contract not found: {contract_path}",
790
+ )
791
+ )
792
+ return ValidationResult(valid=False, issues=issues)
793
+
794
+ try:
795
+ contract = json.loads(contract_path.read_text())
796
+ except json.JSONDecodeError as e:
797
+ issues.append(
798
+ ValidationIssue(
799
+ severity=ValidationSeverity.ERROR,
800
+ code="CONTRACT_INVALID_JSON",
801
+ message=f"Invalid contract JSON: {e}",
802
+ )
803
+ )
804
+ return ValidationResult(valid=False, issues=issues)
805
+
806
+ # Align manifest against contract
807
+ _validate_manifest_against_contract(manifest, contract, issues)
808
+
809
+ has_errors = any(i.severity == ValidationSeverity.ERROR for i in issues)
810
+ return ValidationResult(valid=not has_errors, issues=issues)
811
+
812
+
813
+ def _validate_manifest_against_contract(
814
+ manifest: dict[str, Any],
815
+ contract: dict[str, Any],
816
+ issues: list[ValidationIssue],
817
+ ) -> None:
818
+ """Validate manifest against contract.
819
+
820
+ Args:
821
+ manifest: Run output manifest.
822
+ contract: Output contract.
823
+ issues: List to append issues to.
824
+ """
825
+ contract_datasets = {ds["key"]: ds for ds in contract.get("datasets", []) if "key" in ds}
826
+ manifest_datasets = {
827
+ ds.get("datasetKey") or ds.get("key"): ds for ds in manifest.get("datasets", [])
828
+ }
829
+
830
+ # Check all contract datasets are in manifest (if required)
831
+ for ds_key, contract_ds in contract_datasets.items():
832
+ if ds_key not in manifest_datasets:
833
+ if contract_ds.get("required", True):
834
+ issues.append(
835
+ ValidationIssue(
836
+ severity=ValidationSeverity.ERROR,
837
+ code="MANIFEST_MISSING_DATASET",
838
+ message=f"Contract dataset '{ds_key}' not found in manifest",
839
+ path=f"datasets.{ds_key}",
840
+ )
841
+ )
842
+ continue
843
+
844
+ manifest_ds = manifest_datasets[ds_key]
845
+ _validate_dataset_against_contract(manifest_ds, contract_ds, ds_key, issues)
846
+
847
+
848
+ def _validate_dataset_against_contract(
849
+ manifest_ds: dict[str, Any],
850
+ contract_ds: dict[str, Any],
851
+ ds_key: str,
852
+ issues: list[ValidationIssue],
853
+ ) -> None:
854
+ """Validate a single dataset against contract.
855
+
856
+ Args:
857
+ manifest_ds: Manifest dataset.
858
+ contract_ds: Contract dataset.
859
+ ds_key: Dataset key.
860
+ issues: List to append issues to.
861
+ """
862
+ manifest_items = manifest_ds.get("items", [])
863
+ required = contract_ds.get("required", True)
864
+ cardinality = contract_ds.get("cardinality", "many")
865
+
866
+ if cardinality == "one":
867
+ if required and len(manifest_items) != 1:
868
+ issues.append(
869
+ ValidationIssue(
870
+ severity=ValidationSeverity.ERROR,
871
+ code="MANIFEST_CARDINALITY_ONE",
872
+ message="Cardinality 'one' dataset must have exactly one item when required",
873
+ path=f"datasets.{ds_key}",
874
+ )
875
+ )
876
+ if not required and len(manifest_items) > 1:
877
+ issues.append(
878
+ ValidationIssue(
879
+ severity=ValidationSeverity.ERROR,
880
+ code="MANIFEST_CARDINALITY_ONE_OPTIONAL",
881
+ message="Cardinality 'one' optional dataset may have at most one item",
882
+ path=f"datasets.{ds_key}",
883
+ )
884
+ )
885
+ elif cardinality == "many":
886
+ if required and len(manifest_items) < 1:
887
+ issues.append(
888
+ ValidationIssue(
889
+ severity=ValidationSeverity.ERROR,
890
+ code="MANIFEST_CARDINALITY_MANY",
891
+ message="Cardinality 'many' required dataset must have at least one item",
892
+ path=f"datasets.{ds_key}",
893
+ )
894
+ )
895
+
896
+ for idx, manifest_item in enumerate(manifest_items):
897
+ _validate_item_against_contract(manifest_item, contract_ds, ds_key, f"item[{idx}]", issues)
898
+
899
+
900
+ def _validate_item_against_contract(
901
+ manifest_item: dict[str, Any],
902
+ contract_ds: dict[str, Any],
903
+ ds_key: str,
904
+ item_label: str,
905
+ issues: list[ValidationIssue],
906
+ ) -> None:
907
+ """Validate a single item against contract.
908
+
909
+ Args:
910
+ manifest_item: Manifest item.
911
+ contract_ds: Contract dataset.
912
+ ds_key: Dataset key.
913
+ item_label: Item label/index for errors.
914
+ issues: List to append issues to.
915
+ """
916
+ path = f"datasets.{ds_key}.items.{item_label}"
917
+
918
+ # Owner check
919
+ expected_owner = contract_ds.get("owner")
920
+ owner = manifest_item.get("owner", {})
921
+ owner_ok = True
922
+ if expected_owner == "stage":
923
+ owner_ok = bool(owner.get("stageId"))
924
+ elif expected_owner == "well":
925
+ owner_ok = bool(owner.get("wellId"))
926
+ elif expected_owner == "platform":
927
+ owner_ok = bool(owner.get("platformId"))
928
+
929
+ if expected_owner and not owner_ok:
930
+ issues.append(
931
+ ValidationIssue(
932
+ severity=ValidationSeverity.ERROR,
933
+ code="MANIFEST_MISSING_OWNER",
934
+ message=f"Owner '{expected_owner}Id' required for dataset '{ds_key}'",
935
+ path=path,
936
+ )
937
+ )
938
+
939
+ # Dimensions check
940
+ contract_dims = set(contract_ds.get("dimensions", []) or [])
941
+ manifest_dims = set((manifest_item.get("dims") or {}).keys())
942
+ if contract_dims and manifest_dims != contract_dims:
943
+ missing = contract_dims - manifest_dims
944
+ extra = manifest_dims - contract_dims
945
+ if missing:
946
+ issues.append(
947
+ ValidationIssue(
948
+ severity=ValidationSeverity.ERROR,
949
+ code="MANIFEST_MISSING_DIMENSIONS",
950
+ message=f"Missing dimensions: {missing}",
951
+ path=path,
952
+ )
953
+ )
954
+ if extra:
955
+ issues.append(
956
+ ValidationIssue(
957
+ severity=ValidationSeverity.WARNING,
958
+ code="MANIFEST_EXTRA_DIMENSIONS",
959
+ message=f"Extra dimensions not in contract: {extra}",
960
+ path=path,
961
+ )
962
+ )
963
+
964
+ # Ensure dimension values are non-empty when present
965
+ dims_dict = manifest_item.get("dims") or {}
966
+ for dim_key in contract_dims:
967
+ if dim_key in dims_dict:
968
+ if dims_dict[dim_key] in (None, ""):
969
+ issues.append(
970
+ ValidationIssue(
971
+ severity=ValidationSeverity.ERROR,
972
+ code="MANIFEST_DIMENSION_EMPTY",
973
+ message=f"Dimension '{dim_key}' must have a non-empty value",
974
+ path=f"{path}.dims.{dim_key}",
975
+ )
976
+ )
977
+
978
+ # Artifact check
979
+ artifact = manifest_item.get("artifact")
980
+ if artifact is None:
981
+ issues.append(
982
+ ValidationIssue(
983
+ severity=ValidationSeverity.ERROR,
984
+ code="MANIFEST_MISSING_ARTIFACT",
985
+ message="Item missing artifact",
986
+ path=path,
987
+ )
988
+ )
989
+ return
990
+
991
+ art_key = artifact.get("artifactKey") or artifact.get("key")
992
+ art_type = artifact.get("type")
993
+ if not art_key:
994
+ issues.append(
995
+ ValidationIssue(
996
+ severity=ValidationSeverity.ERROR,
997
+ code="MANIFEST_ARTIFACT_NO_KEY",
998
+ message="Artifact missing artifactKey",
999
+ path=path,
1000
+ )
1001
+ )
1002
+
1003
+ kind_to_types = {
1004
+ "scalar": {"scalar"},
1005
+ "blob": {"blob"},
1006
+ "object": {"json", "object"},
1007
+ "frame": {"json", "parquet"},
1008
+ }
1009
+ expected_types = kind_to_types.get(contract_ds.get("kind"), set())
1010
+ if expected_types and art_type not in expected_types:
1011
+ issues.append(
1012
+ ValidationIssue(
1013
+ severity=ValidationSeverity.ERROR,
1014
+ code="MANIFEST_KIND_MISMATCH",
1015
+ message=f"Artifact type '{art_type}' incompatible with contract kind '{contract_ds.get('kind')}'",
1016
+ path=path,
1017
+ )
1018
+ )
1019
+
1020
+ # For blob kind, check mime/ext consistency if provided
1021
+ if contract_ds.get("kind") == "blob":
1022
+ contract_schema = contract_ds.get("schema") or {}
1023
+ contract_mime = contract_schema.get("mime")
1024
+ if contract_mime and artifact.get("mimeType") and artifact.get("mimeType") != contract_mime:
1025
+ issues.append(
1026
+ ValidationIssue(
1027
+ severity=ValidationSeverity.ERROR,
1028
+ code="MANIFEST_BLOB_MIME_MISMATCH",
1029
+ message=f"Artifact mimeType '{artifact.get('mimeType')}' does not match contract '{contract_mime}'",
1030
+ path=path,
1031
+ )
1032
+ )
1033
+
1034
+
1035
+ __all__ = [
1036
+ "ValidationSeverity",
1037
+ "ValidationIssue",
1038
+ "ValidationResult",
1039
+ "validate_inputspec",
1040
+ "validate_output_contract",
1041
+ "validate_bundle",
1042
+ "validate_run_manifest",
1043
+ ]