fraclab-sdk 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,12 +5,15 @@ Provides:
5
5
  - OutputContract validation (structure, key uniqueness)
6
6
  - Bundle validation (hash integrity)
7
7
  - RunManifest vs OutputContract alignment validation
8
+ - Algorithm signature validation
8
9
  """
9
10
 
10
11
  from __future__ import annotations
11
12
 
13
+ import ast
12
14
  import hashlib
13
15
  import json
16
+ import re
14
17
  import subprocess
15
18
  import sys
16
19
  from dataclasses import dataclass, field
@@ -62,8 +65,136 @@ class ValidationResult:
62
65
  # InputSpec Validation
63
66
  # =============================================================================
64
67
 
65
- # Valid show_when operators
66
- VALID_SHOW_WHEN_OPS = {"eq", "neq", "in", "nin", "gt", "gte", "lt", "lte", "exists"}
68
+ # Allowed json_schema_extra keys (spec-defined)
69
+ ALLOWED_JSON_SCHEMA_EXTRA_KEYS = {
70
+ "group", "unit", "step", "ui_type", "show_when",
71
+ "enum_labels", "order", "collapsible"
72
+ }
73
+
74
+ # Type constraints for json_schema_extra keys
75
+ JSON_SCHEMA_EXTRA_TYPES: dict[str, type | tuple[type, ...]] = {
76
+ "group": str,
77
+ "unit": str,
78
+ "ui_type": str,
79
+ "order": int,
80
+ "collapsible": bool,
81
+ "step": (int, float),
82
+ }
83
+
84
+ # Canonical show_when operators (per InputSpec spec)
85
+ CANONICAL_SHOW_WHEN_OPS = {
86
+ "equals", "not_equals", "gt", "gte", "lt", "lte", "in", "not_in"
87
+ }
88
+
89
+ # Operator aliases (normalized to canonical form)
90
+ SHOW_WHEN_OP_ALIASES = {
91
+ "eq": "equals",
92
+ "neq": "not_equals",
93
+ "nin": "not_in",
94
+ }
95
+
96
+ # Numeric operators (require numeric field and value)
97
+ NUMERIC_SHOW_WHEN_OPS = {"gt", "gte", "lt", "lte"}
98
+
99
+ # Array operators (require array value)
100
+ ARRAY_SHOW_WHEN_OPS = {"in", "not_in"}
101
+
102
+ # Pattern to detect snake_case
103
+ SNAKE_CASE_PATTERN = re.compile(r"[a-z]+_[a-z]+")
104
+
105
+
106
+ def _to_camel_case(snake_str: str) -> str:
107
+ """Convert snake_case to camelCase."""
108
+ parts = snake_str.split("_")
109
+ return parts[0] + "".join(p.capitalize() for p in parts[1:])
110
+
111
+
112
+ def _resolve_ref(ref_path: str, root_schema: dict[str, Any]) -> dict[str, Any] | None:
113
+ """Resolve a $ref path in JSON schema."""
114
+ if not ref_path.startswith("#/"):
115
+ return None
116
+ parts = ref_path[2:].split("/")
117
+ current = root_schema
118
+ for part in parts:
119
+ if isinstance(current, dict) and part in current:
120
+ current = current[part]
121
+ else:
122
+ return None
123
+ return current if isinstance(current, dict) else None
124
+
125
+
126
+ def _merge_all_of(all_of_list: list[dict], root_schema: dict[str, Any]) -> dict[str, Any]:
127
+ """Merge allOf schemas into single view for path resolution."""
128
+ merged: dict[str, Any] = {"properties": {}}
129
+ for sub in all_of_list:
130
+ resolved = sub
131
+ if "$ref" in sub:
132
+ resolved = _resolve_ref(sub["$ref"], root_schema) or sub
133
+ merged["properties"].update(resolved.get("properties", {}))
134
+ if "type" in resolved:
135
+ merged["type"] = resolved["type"]
136
+ return merged
137
+
138
+
139
+ def _unwrap_any_of(any_of_list: list[dict]) -> dict[str, Any]:
140
+ """Unwrap anyOf, preferring non-null branch."""
141
+ non_null = [s for s in any_of_list if s.get("type") != "null"]
142
+ if len(non_null) == 1:
143
+ return non_null[0]
144
+ return non_null[0] if non_null else any_of_list[0]
145
+
146
+
147
+ def _resolve_field_in_schema(
148
+ field_path: str, schema: dict[str, Any]
149
+ ) -> dict[str, Any] | None:
150
+ """Resolve field path in JSON Schema, handling $ref, allOf, anyOf/oneOf.
151
+
152
+ Args:
153
+ field_path: Dot-separated field path (e.g., "denoise.enable").
154
+ schema: Root JSON schema dict.
155
+
156
+ Returns:
157
+ Field schema if found, None otherwise.
158
+ """
159
+ segments = field_path.split(".")
160
+ current = schema
161
+
162
+ for segment in segments:
163
+ # Resolve $ref
164
+ if "$ref" in current:
165
+ resolved = _resolve_ref(current["$ref"], schema)
166
+ if resolved is None:
167
+ return None
168
+ current = resolved
169
+
170
+ # Merge allOf (common in Pydantic v2 for inheritance)
171
+ if "allOf" in current:
172
+ current = _merge_all_of(current["allOf"], schema)
173
+
174
+ # Unwrap anyOf/oneOf (find "real type")
175
+ if "anyOf" in current:
176
+ current = _unwrap_any_of(current["anyOf"])
177
+ if "oneOf" in current:
178
+ current = _unwrap_any_of(current["oneOf"])
179
+
180
+ props = current.get("properties", {})
181
+ if segment not in props:
182
+ return None
183
+ current = props[segment]
184
+
185
+ # Final resolution for the target field
186
+ if "$ref" in current:
187
+ resolved = _resolve_ref(current["$ref"], schema)
188
+ if resolved:
189
+ current = resolved
190
+ if "allOf" in current:
191
+ current = _merge_all_of(current["allOf"], schema)
192
+ if "anyOf" in current:
193
+ current = _unwrap_any_of(current["anyOf"])
194
+ if "oneOf" in current:
195
+ current = _unwrap_any_of(current["oneOf"])
196
+
197
+ return current
67
198
 
68
199
 
69
200
  def _validate_show_when_condition(
@@ -101,30 +232,115 @@ def _validate_show_when_condition(
101
232
  return
102
233
 
103
234
  field_path = condition["field"]
104
- op = condition.get("op", "eq")
235
+ op = condition.get("op", "equals")
236
+ value = condition.get("value")
237
+
238
+ # Check for snake_case in field path (must be ERROR with fix suggestion)
239
+ if SNAKE_CASE_PATTERN.search(field_path):
240
+ segments = field_path.split(".")
241
+ suggested = ".".join(
242
+ _to_camel_case(s) if "_" in s else s for s in segments
243
+ )
244
+ issues.append(
245
+ ValidationIssue(
246
+ severity=ValidationSeverity.ERROR,
247
+ code="SHOW_WHEN_SNAKE_CASE_FIELD",
248
+ message=f"snake_case in show_when.field causes UI breakage: '{field_path}'",
249
+ path=path,
250
+ details={"original": field_path, "suggested": suggested},
251
+ )
252
+ )
105
253
 
106
- # Validate operator
107
- if op not in VALID_SHOW_WHEN_OPS:
254
+ # Check operator: alias → WARNING + normalize; unknown → ERROR
255
+ if op in SHOW_WHEN_OP_ALIASES:
256
+ canonical = SHOW_WHEN_OP_ALIASES[op]
257
+ issues.append(
258
+ ValidationIssue(
259
+ severity=ValidationSeverity.WARNING,
260
+ code="SHOW_WHEN_OP_ALIAS",
261
+ message=f"Operator '{op}' is an alias; use canonical '{canonical}' instead",
262
+ path=path,
263
+ details={"alias": op, "canonical": canonical},
264
+ )
265
+ )
266
+ op = canonical
267
+ elif op not in CANONICAL_SHOW_WHEN_OPS:
108
268
  issues.append(
109
269
  ValidationIssue(
110
270
  severity=ValidationSeverity.ERROR,
111
271
  code="SHOW_WHEN_INVALID_OP",
112
- message=f"Invalid show_when operator: {op}. Valid: {VALID_SHOW_WHEN_OPS}",
272
+ message=f"Invalid show_when operator: '{op}'. Valid: {sorted(CANONICAL_SHOW_WHEN_OPS)}",
113
273
  path=path,
114
274
  )
115
275
  )
276
+ return # Can't validate further with invalid op
277
+
278
+ # Resolve field in schema for type compatibility checks
279
+ field_schema = _resolve_field_in_schema(field_path, schema)
116
280
 
117
281
  # Validate field path exists in schema
118
- if not _field_exists_in_schema(field_path, schema):
282
+ if field_schema is None:
119
283
  issues.append(
120
284
  ValidationIssue(
121
285
  severity=ValidationSeverity.ERROR,
122
286
  code="SHOW_WHEN_FIELD_NOT_FOUND",
123
- message=f"show_when references non-existent field: {field_path}",
287
+ message=f"show_when references non-existent field: '{field_path}'",
124
288
  path=path,
125
289
  details={"field": field_path},
126
290
  )
127
291
  )
292
+ return
293
+
294
+ # Type compatibility checks
295
+ field_type = field_schema.get("type")
296
+
297
+ # Numeric operators require numeric field and value
298
+ if op in NUMERIC_SHOW_WHEN_OPS:
299
+ if field_type not in ("number", "integer"):
300
+ issues.append(
301
+ ValidationIssue(
302
+ severity=ValidationSeverity.ERROR,
303
+ code="SHOW_WHEN_NUMERIC_OP_ON_NON_NUMERIC",
304
+ message=f"Numeric operator '{op}' used on non-numeric field (type: {field_type})",
305
+ path=path,
306
+ details={"op": op, "field_type": field_type},
307
+ )
308
+ )
309
+ if value is not None and not isinstance(value, (int, float)):
310
+ issues.append(
311
+ ValidationIssue(
312
+ severity=ValidationSeverity.ERROR,
313
+ code="SHOW_WHEN_NUMERIC_OP_VALUE_NOT_NUMBER",
314
+ message=f"Numeric operator '{op}' requires numeric value, got {type(value).__name__}",
315
+ path=path,
316
+ )
317
+ )
318
+
319
+ # Array operators require array value
320
+ if op in ARRAY_SHOW_WHEN_OPS:
321
+ if not isinstance(value, list):
322
+ issues.append(
323
+ ValidationIssue(
324
+ severity=ValidationSeverity.ERROR,
325
+ code="SHOW_WHEN_ARRAY_OP_VALUE_NOT_ARRAY",
326
+ message=f"Array operator '{op}' requires list value, got {type(value).__name__}",
327
+ path=path,
328
+ )
329
+ )
330
+
331
+ # equals/not_equals on enum field: check value is in enum
332
+ if op in ("equals", "not_equals"):
333
+ enum_values = field_schema.get("enum")
334
+ if enum_values is not None and value is not None and value not in enum_values:
335
+ issues.append(
336
+ ValidationIssue(
337
+ severity=ValidationSeverity.ERROR,
338
+ code="SHOW_WHEN_VALUE_NOT_IN_ENUM",
339
+ message=f"show_when value '{value}' not in enum: {enum_values}",
340
+ path=path,
341
+ details={"value": value, "enum": enum_values},
342
+ )
343
+ )
128
344
 
129
345
 
130
346
  def _validate_show_when(
@@ -197,79 +413,179 @@ def _validate_show_when(
197
413
  )
198
414
 
199
415
 
200
- def _field_exists_in_schema(field_path: str, schema: dict[str, Any]) -> bool:
201
- """Check if a field path exists in a JSON schema.
202
-
203
- Supports dot notation: "parent.child.field"
416
+ def _validate_enum_labels(
417
+ field_schema: dict[str, Any],
418
+ enum_labels: dict[str, str],
419
+ path: str,
420
+ issues: list[ValidationIssue],
421
+ ) -> None:
422
+ """Validate enum_labels keys match enum values strictly.
204
423
 
205
424
  Args:
206
- field_path: Dot-separated field path.
207
- schema: JSON schema dict.
208
-
209
- Returns:
210
- True if field exists.
425
+ field_schema: The field's JSON schema.
426
+ enum_labels: The enum_labels dict from json_schema_extra.
427
+ path: Current path for error reporting.
428
+ issues: List to append issues to.
211
429
  """
212
- parts = field_path.split(".")
213
- current = schema.get("properties", {})
214
-
215
- for i, part in enumerate(parts):
216
- if part not in current:
217
- return False
218
- prop = current[part]
219
-
220
- # Last part - field exists
221
- if i == len(parts) - 1:
222
- return True
223
-
224
- # Navigate into nested object
225
- if prop.get("type") == "object":
226
- current = prop.get("properties", {})
227
- elif "$ref" in prop:
228
- # Handle $ref - simplified, assumes $defs at root
229
- ref = prop["$ref"]
230
- if ref.startswith("#/$defs/"):
231
- def_name = ref.split("/")[-1]
232
- defs = schema.get("$defs", {})
233
- if def_name in defs:
234
- current = defs[def_name].get("properties", {})
235
- else:
236
- return False
237
- else:
238
- return False
239
- else:
240
- return False
430
+ enum_values = field_schema.get("enum")
241
431
 
242
- return True
432
+ if enum_values is None:
433
+ issues.append(
434
+ ValidationIssue(
435
+ severity=ValidationSeverity.ERROR,
436
+ code="ENUM_LABELS_ON_NON_ENUM_FIELD",
437
+ message="enum_labels provided for non-enum field",
438
+ path=path,
439
+ )
440
+ )
441
+ return
243
442
 
443
+ label_keys = set(enum_labels.keys())
444
+ enum_set = set(str(v) for v in enum_values)
244
445
 
245
- def _validate_enum_labels(
246
- enum_labels: dict[str, str],
247
- enum_values: list[Any] | None,
446
+ missing = enum_set - label_keys
447
+ extra = label_keys - enum_set
448
+
449
+ if missing:
450
+ issues.append(
451
+ ValidationIssue(
452
+ severity=ValidationSeverity.ERROR,
453
+ code="ENUM_LABELS_MISSING_KEYS",
454
+ message=f"enum_labels missing keys for enum values: {sorted(missing)}",
455
+ path=path,
456
+ details={"missing": sorted(missing)},
457
+ )
458
+ )
459
+ if extra:
460
+ issues.append(
461
+ ValidationIssue(
462
+ severity=ValidationSeverity.ERROR,
463
+ code="ENUM_LABELS_EXTRA_KEYS",
464
+ message=f"enum_labels has keys not in enum: {sorted(extra)}",
465
+ path=path,
466
+ details={"extra": sorted(extra)},
467
+ )
468
+ )
469
+
470
+
471
+ def _validate_json_schema_extra(
472
+ extra: dict[str, Any],
473
+ field_schema: dict[str, Any],
474
+ full_schema: dict[str, Any],
248
475
  path: str,
249
476
  issues: list[ValidationIssue],
477
+ orders_in_scope: set[int],
250
478
  ) -> None:
251
- """Validate enum_labels keys match enum values.
479
+ """Validate json_schema_extra keys and values.
252
480
 
253
481
  Args:
254
- enum_labels: The enum_labels dict.
255
- enum_values: The enum values from schema (if available).
482
+ extra: The json_schema_extra dict.
483
+ field_schema: The field's JSON schema.
484
+ full_schema: The full schema for show_when validation.
256
485
  path: Current path for error reporting.
257
486
  issues: List to append issues to.
487
+ orders_in_scope: Set of order values seen in current properties scope.
258
488
  """
259
- if enum_values is None:
260
- return
261
-
262
- enum_values_str = {str(v) for v in enum_values}
263
- for key in enum_labels:
264
- if str(key) not in enum_values_str:
489
+ for key, value in extra.items():
490
+ # x_* prefix → WARNING (extension keys)
491
+ if key.startswith("x_"):
265
492
  issues.append(
266
493
  ValidationIssue(
267
494
  severity=ValidationSeverity.WARNING,
268
- code="ENUM_LABEL_UNKNOWN_VALUE",
269
- message=f"enum_labels key '{key}' not in enum values: {enum_values}",
270
- path=path,
495
+ code="JSON_SCHEMA_EXTRA_EXTENSION_KEY",
496
+ message=f"Extension key '{key}' (x_* prefix) will be ignored by SDK",
497
+ path=f"{path}.{key}",
271
498
  )
272
499
  )
500
+ continue
501
+
502
+ # Unknown key (not in whitelist) → ERROR
503
+ if key not in ALLOWED_JSON_SCHEMA_EXTRA_KEYS:
504
+ issues.append(
505
+ ValidationIssue(
506
+ severity=ValidationSeverity.ERROR,
507
+ code="JSON_SCHEMA_EXTRA_UNKNOWN_KEY",
508
+ message=f"Unknown json_schema_extra key: '{key}'. Allowed: {sorted(ALLOWED_JSON_SCHEMA_EXTRA_KEYS)}",
509
+ path=f"{path}.{key}",
510
+ )
511
+ )
512
+ continue
513
+
514
+ # Type validation for known keys
515
+ if key in JSON_SCHEMA_EXTRA_TYPES:
516
+ expected_type = JSON_SCHEMA_EXTRA_TYPES[key]
517
+ if not isinstance(value, expected_type):
518
+ expected_name = (
519
+ expected_type.__name__
520
+ if isinstance(expected_type, type)
521
+ else " | ".join(t.__name__ for t in expected_type)
522
+ )
523
+ issues.append(
524
+ ValidationIssue(
525
+ severity=ValidationSeverity.ERROR,
526
+ code="JSON_SCHEMA_EXTRA_TYPE_MISMATCH",
527
+ message=f"json_schema_extra['{key}'] must be {expected_name}, got {type(value).__name__}",
528
+ path=f"{path}.{key}",
529
+ )
530
+ )
531
+ continue
532
+
533
+ # step must be > 0
534
+ if key == "step":
535
+ if value <= 0:
536
+ issues.append(
537
+ ValidationIssue(
538
+ severity=ValidationSeverity.ERROR,
539
+ code="JSON_SCHEMA_EXTRA_STEP_INVALID",
540
+ message=f"step must be > 0, got {value}",
541
+ path=f"{path}.step",
542
+ )
543
+ )
544
+
545
+ # order duplicate check within same properties scope
546
+ if key == "order":
547
+ if value in orders_in_scope:
548
+ issues.append(
549
+ ValidationIssue(
550
+ severity=ValidationSeverity.ERROR,
551
+ code="JSON_SCHEMA_EXTRA_DUPLICATE_ORDER",
552
+ message=f"Duplicate order value {value} in same properties scope",
553
+ path=f"{path}.order",
554
+ )
555
+ )
556
+ else:
557
+ orders_in_scope.add(value)
558
+
559
+ # show_when validation
560
+ if key == "show_when":
561
+ _validate_show_when(value, full_schema, f"{path}.show_when", issues)
562
+
563
+ # enum_labels validation
564
+ if key == "enum_labels":
565
+ if isinstance(value, dict):
566
+ _validate_enum_labels(field_schema, value, f"{path}.enum_labels", issues)
567
+
568
+
569
+ def _is_leaf_field(field_schema: dict[str, Any]) -> bool:
570
+ """Check if field is a leaf (no nested properties)."""
571
+ return "properties" not in field_schema
572
+
573
+
574
+ def _validate_title_requirement(
575
+ field_schema: dict[str, Any],
576
+ path: str,
577
+ issues: list[ValidationIssue],
578
+ ) -> None:
579
+ """Warn if leaf field is missing title."""
580
+ if _is_leaf_field(field_schema) and "title" not in field_schema:
581
+ issues.append(
582
+ ValidationIssue(
583
+ severity=ValidationSeverity.WARNING,
584
+ code="FIELD_MISSING_TITLE",
585
+ message="Leaf field missing 'title' for UI display",
586
+ path=path,
587
+ )
588
+ )
273
589
 
274
590
 
275
591
  def _extract_schema_from_workspace(workspace: Path) -> dict[str, Any]:
@@ -377,10 +693,22 @@ def _validate_schema_properties(
377
693
  issues: List to append issues to.
378
694
  """
379
695
  properties = props_container.get("properties", {})
696
+ orders_in_scope: set[int] = set() # Track order values within this scope
380
697
 
381
698
  for field_name, field_schema in properties.items():
382
699
  field_path = f"{path_prefix}.{field_name}" if path_prefix else field_name
383
700
 
701
+ # Resolve the actual field schema (handle $ref, allOf, anyOf)
702
+ resolved_schema = field_schema
703
+ if "$ref" in field_schema:
704
+ resolved = _resolve_ref(field_schema["$ref"], full_schema)
705
+ if resolved:
706
+ resolved_schema = resolved
707
+ if "allOf" in resolved_schema:
708
+ resolved_schema = _merge_all_of(resolved_schema["allOf"], full_schema)
709
+ if "anyOf" in resolved_schema:
710
+ resolved_schema = _unwrap_any_of(resolved_schema["anyOf"])
711
+
384
712
  # Check json_schema_extra (stored in various places depending on Pydantic version)
385
713
  extra = (
386
714
  field_schema.get("json_schema_extra")
@@ -388,26 +716,27 @@ def _validate_schema_properties(
388
716
  or {}
389
717
  )
390
718
 
391
- # Validate show_when
392
- if "show_when" in extra:
393
- _validate_show_when(extra["show_when"], full_schema, f"{field_path}.show_when", issues)
719
+ # Validate json_schema_extra comprehensively
720
+ if extra:
721
+ _validate_json_schema_extra(
722
+ extra, resolved_schema, full_schema, field_path, issues, orders_in_scope
723
+ )
394
724
 
395
- # Validate enum_labels
396
- if "enum_labels" in extra:
397
- enum_values = field_schema.get("enum")
398
- _validate_enum_labels(extra["enum_labels"], enum_values, f"{field_path}.enum_labels", issues)
725
+ # Validate title requirement for leaf fields
726
+ _validate_title_requirement(resolved_schema, field_path, issues)
399
727
 
400
728
  # Recurse into nested objects
401
- if field_schema.get("type") == "object":
402
- _validate_schema_properties(field_schema, full_schema, field_path, issues)
729
+ if resolved_schema.get("type") == "object" or "properties" in resolved_schema:
730
+ _validate_schema_properties(resolved_schema, full_schema, field_path, issues)
403
731
 
404
- # Handle allOf, anyOf, oneOf
732
+ # Handle allOf, anyOf, oneOf at field level
405
733
  for combiner in ["allOf", "anyOf", "oneOf"]:
406
734
  if combiner in field_schema:
407
735
  for i, sub_schema in enumerate(field_schema[combiner]):
408
- _validate_schema_properties(
409
- sub_schema, full_schema, f"{field_path}.{combiner}[{i}]", issues
410
- )
736
+ if "properties" in sub_schema:
737
+ _validate_schema_properties(
738
+ sub_schema, full_schema, f"{field_path}.{combiner}[{i}]", issues
739
+ )
411
740
 
412
741
  # Handle $defs
413
742
  if "$defs" in props_container:
@@ -612,6 +941,286 @@ def _validate_contract_structure(contract: dict[str, Any], issues: list[Validati
612
941
  )
613
942
  )
614
943
 
944
+ # Validate schema structure per kind
945
+ _validate_dataset_schema(ds, ds_key, issues)
946
+
947
+ # Validate dimensions don't overlap with owner-level keys
948
+ _validate_dimensions_policy(dimensions, ds_key, issues)
949
+
950
+ # Validate groupPath depth
951
+ group_path = ds.get("groupPath") or []
952
+ _validate_group_path_policy(group_path, ds_key, issues)
953
+
954
+ # Validate invariants
955
+ invariants = contract.get("invariants") or []
956
+ datasets_by_key = {ds.get("key"): ds for ds in datasets if ds.get("key")}
957
+ _validate_invariants(invariants, datasets_by_key, issues)
958
+
959
+ # Validate relations
960
+ relations = contract.get("relations") or []
961
+ _validate_relations(relations, datasets_by_key, issues)
962
+
963
+
964
+ # Schema dtype sets per spec
965
+ FRAME_COLUMN_DTYPES = {"string", "int", "float", "bool", "datetime"}
966
+ SCALAR_DTYPES = {"string", "int", "float", "bool"}
967
+ OWNER_LEVEL_KEYS = {"stageId", "wellId", "platformId"}
968
+ MAX_GROUP_PATH_DEPTH = 4
969
+
970
+
971
+ def _validate_dataset_schema(
972
+ dataset: dict[str, Any], ds_key: str, issues: list[ValidationIssue]
973
+ ) -> None:
974
+ """Validate dataset schema structure per kind."""
975
+ kind = dataset.get("kind")
976
+ schema = dataset.get("schema") or {}
977
+
978
+ if kind == "scalar":
979
+ dtype = schema.get("dtype")
980
+ if dtype and dtype not in SCALAR_DTYPES:
981
+ issues.append(
982
+ ValidationIssue(
983
+ severity=ValidationSeverity.ERROR,
984
+ code="SCALAR_INVALID_DTYPE",
985
+ message=f"Invalid scalar dtype '{dtype}'. Valid: {sorted(SCALAR_DTYPES)}",
986
+ path=f"datasets.{ds_key}.schema.dtype",
987
+ )
988
+ )
989
+
990
+ elif kind == "blob":
991
+ ext = schema.get("ext")
992
+ if ext and not re.match(r"^\.[a-zA-Z0-9]+$", ext):
993
+ issues.append(
994
+ ValidationIssue(
995
+ severity=ValidationSeverity.ERROR,
996
+ code="BLOB_EXT_INVALID_FORMAT",
997
+ message=f"Invalid blob ext format '{ext}'. Must be '.<alphanumeric>'",
998
+ path=f"datasets.{ds_key}.schema.ext",
999
+ )
1000
+ )
1001
+
1002
+ elif kind == "frame":
1003
+ # Validate index field exists in columns if specified
1004
+ index = schema.get("index")
1005
+ columns = schema.get("columns") or []
1006
+ if isinstance(index, dict):
1007
+ index_kind = index.get("kind")
1008
+ index_field = index.get("field")
1009
+ if index_kind in ("time", "depth") and index_field:
1010
+ col_names = [c.get("name") if isinstance(c, dict) else c for c in columns]
1011
+ if index_field not in col_names:
1012
+ issues.append(
1013
+ ValidationIssue(
1014
+ severity=ValidationSeverity.ERROR,
1015
+ code="FRAME_INDEX_FIELD_NOT_IN_COLUMNS",
1016
+ message=f"Frame index field '{index_field}' not found in columns",
1017
+ path=f"datasets.{ds_key}.schema.index",
1018
+ )
1019
+ )
1020
+
1021
+
1022
+ def _validate_dimensions_policy(
1023
+ dimensions: list[str], ds_key: str, issues: list[ValidationIssue]
1024
+ ) -> None:
1025
+ """Validate dimensions don't contain owner-level keys."""
1026
+ overlap = set(dimensions) & OWNER_LEVEL_KEYS
1027
+ if overlap:
1028
+ issues.append(
1029
+ ValidationIssue(
1030
+ severity=ValidationSeverity.WARNING,
1031
+ code="DIMENSIONS_CONTAINS_OWNER_KEYS",
1032
+ message=f"dimensions contains owner-level keys {sorted(overlap)}; use 'owner' instead",
1033
+ path=f"datasets.{ds_key}.dimensions",
1034
+ details={"overlap": sorted(overlap)},
1035
+ )
1036
+ )
1037
+
1038
+
1039
+ def _validate_group_path_policy(
1040
+ group_path: list[str], ds_key: str, issues: list[ValidationIssue]
1041
+ ) -> None:
1042
+ """Validate groupPath depth."""
1043
+ if group_path and len(group_path) > MAX_GROUP_PATH_DEPTH:
1044
+ issues.append(
1045
+ ValidationIssue(
1046
+ severity=ValidationSeverity.WARNING,
1047
+ code="GROUP_PATH_TOO_DEEP",
1048
+ message=f"groupPath depth {len(group_path)} exceeds recommended max {MAX_GROUP_PATH_DEPTH}",
1049
+ path=f"datasets.{ds_key}.groupPath",
1050
+ details={"depth": len(group_path)},
1051
+ )
1052
+ )
1053
+
1054
+
1055
+ def _validate_invariants(
1056
+ invariants: list[dict[str, Any]],
1057
+ datasets_by_key: dict[str, dict[str, Any]],
1058
+ issues: list[ValidationIssue],
1059
+ ) -> None:
1060
+ """Validate invariants reference valid datasets."""
1061
+ for idx, inv in enumerate(invariants):
1062
+ inv_type = inv.get("type")
1063
+ inv_path = f"invariants[{idx}]"
1064
+
1065
+ if inv_type == "sameOwner":
1066
+ level = inv.get("level")
1067
+ targets = inv.get("targets") or []
1068
+ for i, target in enumerate(targets):
1069
+ key = target.get("key") if isinstance(target, dict) else target
1070
+ if key not in datasets_by_key:
1071
+ issues.append(
1072
+ ValidationIssue(
1073
+ severity=ValidationSeverity.ERROR,
1074
+ code="INVARIANT_REFERENCES_UNKNOWN_DATASET",
1075
+ message=f"sameOwner invariant references unknown dataset '{key}'",
1076
+ path=f"{inv_path}.targets[{i}]",
1077
+ )
1078
+ )
1079
+ elif level:
1080
+ ds_owner = datasets_by_key[key].get("owner")
1081
+ if ds_owner != level:
1082
+ issues.append(
1083
+ ValidationIssue(
1084
+ severity=ValidationSeverity.ERROR,
1085
+ code="SAME_OWNER_LEVEL_MISMATCH",
1086
+ message=f"sameOwner level '{level}' doesn't match dataset owner '{ds_owner}'",
1087
+ path=f"{inv_path}.targets[{i}]",
1088
+ )
1089
+ )
1090
+
1091
+ elif inv_type == "joinOnOwner":
1092
+ left = inv.get("left") or {}
1093
+ right = inv.get("right") or {}
1094
+ for ref_name, ref in [("left", left), ("right", right)]:
1095
+ key = ref.get("key")
1096
+ if key and key not in datasets_by_key:
1097
+ issues.append(
1098
+ ValidationIssue(
1099
+ severity=ValidationSeverity.ERROR,
1100
+ code="INVARIANT_REFERENCES_UNKNOWN_DATASET",
1101
+ message=f"joinOnOwner.{ref_name} references unknown dataset '{key}'",
1102
+ path=f"{inv_path}.{ref_name}",
1103
+ )
1104
+ )
1105
+
1106
+ elif inv_type == "itemsCount":
1107
+ ds_key = inv.get("datasetKey")
1108
+ if ds_key and ds_key not in datasets_by_key:
1109
+ issues.append(
1110
+ ValidationIssue(
1111
+ severity=ValidationSeverity.ERROR,
1112
+ code="INVARIANT_REFERENCES_UNKNOWN_DATASET",
1113
+ message=f"itemsCount references unknown dataset '{ds_key}'",
1114
+ path=f"{inv_path}.datasetKey",
1115
+ )
1116
+ )
1117
+ count = inv.get("count")
1118
+ if count is not None and (not isinstance(count, int) or count < 1):
1119
+ issues.append(
1120
+ ValidationIssue(
1121
+ severity=ValidationSeverity.ERROR,
1122
+ code="ITEMS_COUNT_INVALID",
1123
+ message=f"itemsCount.count must be integer >= 1, got {count}",
1124
+ path=f"{inv_path}.count",
1125
+ )
1126
+ )
1127
+
1128
+
1129
+ def _validate_relations(
1130
+ relations: list[dict[str, Any]],
1131
+ datasets_by_key: dict[str, dict[str, Any]],
1132
+ issues: list[ValidationIssue],
1133
+ ) -> None:
1134
+ """Validate relations reference valid datasets and fields."""
1135
+ for idx, rel in enumerate(relations):
1136
+ rel_path = f"relations[{idx}]"
1137
+ from_ref = rel.get("from") or {}
1138
+ to_ref = rel.get("to") or {}
1139
+
1140
+ from_key = from_ref.get("key")
1141
+ to_key = to_ref.get("key")
1142
+
1143
+ # Keys must exist
1144
+ if from_key and from_key not in datasets_by_key:
1145
+ issues.append(
1146
+ ValidationIssue(
1147
+ severity=ValidationSeverity.ERROR,
1148
+ code="RELATION_FROM_KEY_NOT_FOUND",
1149
+ message=f"relation.from references unknown dataset '{from_key}'",
1150
+ path=f"{rel_path}.from",
1151
+ )
1152
+ )
1153
+ if to_key and to_key not in datasets_by_key:
1154
+ issues.append(
1155
+ ValidationIssue(
1156
+ severity=ValidationSeverity.ERROR,
1157
+ code="RELATION_TO_KEY_NOT_FOUND",
1158
+ message=f"relation.to references unknown dataset '{to_key}'",
1159
+ path=f"{rel_path}.to",
1160
+ )
1161
+ )
1162
+
1163
+ # blob/scalar cannot have field relations
1164
+ for key, ref_name in [(from_key, "from"), (to_key, "to")]:
1165
+ if key and key in datasets_by_key:
1166
+ kind = datasets_by_key[key].get("kind")
1167
+ if kind in ("blob", "scalar"):
1168
+ issues.append(
1169
+ ValidationIssue(
1170
+ severity=ValidationSeverity.ERROR,
1171
+ code=f"RELATION_{ref_name.upper()}_CANNOT_BE_BLOB_OR_SCALAR",
1172
+ message=f"relation.{ref_name} cannot reference {kind} dataset",
1173
+ path=f"{rel_path}.{ref_name}",
1174
+ )
1175
+ )
1176
+
1177
+ # Validate field exists in schema (for frame/object)
1178
+ _validate_relation_field(from_ref, datasets_by_key, "from", rel_path, issues)
1179
+ _validate_relation_field(to_ref, datasets_by_key, "to", rel_path, issues)
1180
+
1181
+
1182
+ def _validate_relation_field(
1183
+ ref: dict[str, Any],
1184
+ datasets_by_key: dict[str, dict[str, Any]],
1185
+ ref_name: str,
1186
+ rel_path: str,
1187
+ issues: list[ValidationIssue],
1188
+ ) -> None:
1189
+ """Validate relation field exists in dataset schema."""
1190
+ key = ref.get("key")
1191
+ field = ref.get("field")
1192
+ if not key or not field or key not in datasets_by_key:
1193
+ return
1194
+
1195
+ dataset = datasets_by_key[key]
1196
+ schema = dataset.get("schema") or {}
1197
+ kind = dataset.get("kind")
1198
+
1199
+ if kind == "frame":
1200
+ columns = schema.get("columns") or []
1201
+ col_names = [c.get("name") if isinstance(c, dict) else c for c in columns]
1202
+ if field not in col_names:
1203
+ issues.append(
1204
+ ValidationIssue(
1205
+ severity=ValidationSeverity.ERROR,
1206
+ code=f"RELATION_{ref_name.upper()}_FIELD_NOT_IN_COLUMNS",
1207
+ message=f"relation.{ref_name}.field '{field}' not in frame columns",
1208
+ path=f"{rel_path}.{ref_name}.field",
1209
+ )
1210
+ )
1211
+ elif kind == "object":
1212
+ fields = schema.get("fields") or []
1213
+ field_names = [f.get("name") if isinstance(f, dict) else f for f in fields]
1214
+ if field not in field_names:
1215
+ issues.append(
1216
+ ValidationIssue(
1217
+ severity=ValidationSeverity.ERROR,
1218
+ code=f"RELATION_{ref_name.upper()}_FIELD_NOT_IN_FIELDS",
1219
+ message=f"relation.{ref_name}.field '{field}' not in object fields",
1220
+ path=f"{rel_path}.{ref_name}.field",
1221
+ )
1222
+ )
1223
+
615
1224
 
616
1225
  # =============================================================================
617
1226
  # Bundle Validation
@@ -1032,6 +1641,157 @@ def _validate_item_against_contract(
1032
1641
  )
1033
1642
 
1034
1643
 
1644
+ # =============================================================================
1645
+ # Algorithm Signature Validation
1646
+ # =============================================================================
1647
+
1648
+
1649
+ def validate_algorithm_signature(workspace: Path) -> ValidationResult:
1650
+ """Validate algorithm run function signature.
1651
+
1652
+ Checks:
1653
+ - main.py exists
1654
+ - Top-level run function exists
1655
+ - run function is not async (sandbox doesn't support it)
1656
+ - run function has exactly 1 positional parameter
1657
+ - run function has no *args, **kwargs, or keyword-only args
1658
+
1659
+ Args:
1660
+ workspace: Algorithm workspace path.
1661
+
1662
+ Returns:
1663
+ ValidationResult with issues found.
1664
+ """
1665
+ workspace = Path(workspace).resolve()
1666
+ issues: list[ValidationIssue] = []
1667
+
1668
+ main_path = workspace / "main.py"
1669
+
1670
+ if not main_path.exists():
1671
+ issues.append(
1672
+ ValidationIssue(
1673
+ severity=ValidationSeverity.ERROR,
1674
+ code="ALGORITHM_MAIN_NOT_FOUND",
1675
+ message="main.py not found in algorithm workspace",
1676
+ path="main.py",
1677
+ )
1678
+ )
1679
+ return ValidationResult(valid=False, issues=issues)
1680
+
1681
+ try:
1682
+ source = main_path.read_text(encoding="utf-8")
1683
+ tree = ast.parse(source, filename="main.py")
1684
+ except SyntaxError as e:
1685
+ issues.append(
1686
+ ValidationIssue(
1687
+ severity=ValidationSeverity.ERROR,
1688
+ code="ALGORITHM_SYNTAX_ERROR",
1689
+ message=f"Syntax error in main.py: {e}",
1690
+ path="main.py",
1691
+ details={"error": str(e)},
1692
+ )
1693
+ )
1694
+ return ValidationResult(valid=False, issues=issues)
1695
+
1696
+ # Find TOP-LEVEL run functions only (not nested in classes/functions)
1697
+ run_funcs = [
1698
+ node
1699
+ for node in tree.body
1700
+ if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)) and node.name == "run"
1701
+ ]
1702
+
1703
+ if len(run_funcs) == 0:
1704
+ issues.append(
1705
+ ValidationIssue(
1706
+ severity=ValidationSeverity.ERROR,
1707
+ code="ALGORITHM_RUN_NOT_FOUND",
1708
+ message="Top-level 'run' function not found in main.py",
1709
+ path="main.py",
1710
+ )
1711
+ )
1712
+ return ValidationResult(valid=False, issues=issues)
1713
+
1714
+ if len(run_funcs) > 1:
1715
+ issues.append(
1716
+ ValidationIssue(
1717
+ severity=ValidationSeverity.ERROR,
1718
+ code="ALGORITHM_MULTIPLE_RUN_FUNCTIONS",
1719
+ message=f"Multiple top-level 'run' functions found ({len(run_funcs)})",
1720
+ path="main.py",
1721
+ )
1722
+ )
1723
+ return ValidationResult(valid=False, issues=issues)
1724
+
1725
+ run_func = run_funcs[0]
1726
+
1727
+ # async def run → ERROR (sandbox doesn't support it)
1728
+ if isinstance(run_func, ast.AsyncFunctionDef):
1729
+ issues.append(
1730
+ ValidationIssue(
1731
+ severity=ValidationSeverity.ERROR,
1732
+ code="ALGORITHM_ASYNC_RUN_NOT_SUPPORTED",
1733
+ message="'async def run' is not supported; sandbox requires synchronous 'def run'",
1734
+ path="main.py",
1735
+ )
1736
+ )
1737
+
1738
+ args = run_func.args
1739
+
1740
+ # No *args
1741
+ if args.vararg:
1742
+ issues.append(
1743
+ ValidationIssue(
1744
+ severity=ValidationSeverity.ERROR,
1745
+ code="ALGORITHM_RUN_HAS_VARARG",
1746
+ message=f"run function must not have *args (found: *{args.vararg.arg})",
1747
+ path="main.py",
1748
+ )
1749
+ )
1750
+
1751
+ # No **kwargs
1752
+ if args.kwarg:
1753
+ issues.append(
1754
+ ValidationIssue(
1755
+ severity=ValidationSeverity.ERROR,
1756
+ code="ALGORITHM_RUN_HAS_KWARG",
1757
+ message=f"run function must not have **kwargs (found: **{args.kwarg.arg})",
1758
+ path="main.py",
1759
+ )
1760
+ )
1761
+
1762
+ # No keyword-only args
1763
+ if args.kwonlyargs:
1764
+ kw_names = [a.arg for a in args.kwonlyargs]
1765
+ issues.append(
1766
+ ValidationIssue(
1767
+ severity=ValidationSeverity.ERROR,
1768
+ code="ALGORITHM_RUN_HAS_KWONLY_ARGS",
1769
+ message=f"run function must not have keyword-only args (found: {kw_names})",
1770
+ path="main.py",
1771
+ )
1772
+ )
1773
+
1774
+ # Exactly 1 positional parameter (excluding 'self' for methods)
1775
+ positional_args = list(args.posonlyargs) + list(args.args)
1776
+ if positional_args and positional_args[0].arg == "self":
1777
+ positional_args = positional_args[1:]
1778
+
1779
+ if len(positional_args) != 1:
1780
+ param_names = [a.arg for a in positional_args]
1781
+ issues.append(
1782
+ ValidationIssue(
1783
+ severity=ValidationSeverity.ERROR,
1784
+ code="ALGORITHM_RUN_WRONG_PARAM_COUNT",
1785
+ message=f"run function must have exactly 1 parameter (context), found {len(positional_args)}: {param_names}",
1786
+ path="main.py",
1787
+ details={"found": len(positional_args), "params": param_names},
1788
+ )
1789
+ )
1790
+
1791
+ has_errors = any(i.severity == ValidationSeverity.ERROR for i in issues)
1792
+ return ValidationResult(valid=not has_errors, issues=issues)
1793
+
1794
+
1035
1795
  __all__ = [
1036
1796
  "ValidationSeverity",
1037
1797
  "ValidationIssue",
@@ -1040,4 +1800,5 @@ __all__ = [
1040
1800
  "validate_output_contract",
1041
1801
  "validate_bundle",
1042
1802
  "validate_run_manifest",
1803
+ "validate_algorithm_signature",
1043
1804
  ]