PyPI - fraclab-sdk - Versions diffs - 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl - Mend

fraclab-sdk 0.1.1py3-none-any.whl → 0.1.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

CHANGELOG.md +50 -0
README.md +2 -0
fraclab_sdk/__init__.py +3 -0
fraclab_sdk/devkit/__init__.py +8 -0
fraclab_sdk/devkit/validate.py +836 -75
fraclab_sdk/version.py +5 -0
fraclab_sdk/workbench/pages/1_Snapshots.py +35 -4
fraclab_sdk/workbench/pages/2_Browse.py +1 -1
fraclab_sdk/workbench/pages/3_Selection.py +1 -1
fraclab_sdk/workbench/pages/4_Run.py +7 -1
fraclab_sdk/workbench/pages/5_Results.py +7 -1
fraclab_sdk/workbench/pages/6_Algorithm_Edit.py +1 -1
fraclab_sdk/workbench/pages/7_Schema_Edit.py +17 -6
fraclab_sdk/workbench/pages/8_Output_Edit.py +18 -7
fraclab_sdk/workbench/pages/9_Export_Algorithm.py +232 -84
fraclab_sdk/workbench/ui_styles.py +14 -2
{fraclab_sdk-0.1.1.dist-info → fraclab_sdk-0.1.2.dist-info}/METADATA +3 -1
{fraclab_sdk-0.1.1.dist-info → fraclab_sdk-0.1.2.dist-info}/RECORD +20 -18
{fraclab_sdk-0.1.1.dist-info → fraclab_sdk-0.1.2.dist-info}/WHEEL +0 -0
{fraclab_sdk-0.1.1.dist-info → fraclab_sdk-0.1.2.dist-info}/entry_points.txt +0 -0

fraclab_sdk/devkit/validate.py CHANGED Viewed

@@ -5,12 +5,15 @@ Provides:
 - OutputContract validation (structure, key uniqueness)
 - Bundle validation (hash integrity)
 - RunManifest vs OutputContract alignment validation
+- Algorithm signature validation
 """
 from __future__ import annotations
+import ast
 import hashlib
 import json
+import re
 import subprocess
 import sys
 from dataclasses import dataclass, field
@@ -62,8 +65,136 @@ class ValidationResult:
 # InputSpec Validation
 # =============================================================================
-# Valid show_when operators
-VALID_SHOW_WHEN_OPS = {"eq", "neq", "in", "nin", "gt", "gte", "lt", "lte", "exists"}
+# Allowed json_schema_extra keys (spec-defined)
+ALLOWED_JSON_SCHEMA_EXTRA_KEYS = {
+    "group", "unit", "step", "ui_type", "show_when",
+    "enum_labels", "order", "collapsible"
+}
+# Type constraints for json_schema_extra keys
+JSON_SCHEMA_EXTRA_TYPES: dict[str, type | tuple[type, ...]] = {
+    "group": str,
+    "unit": str,
+    "ui_type": str,
+    "order": int,
+    "collapsible": bool,
+    "step": (int, float),
+}
+# Canonical show_when operators (per InputSpec spec)
+CANONICAL_SHOW_WHEN_OPS = {
+    "equals", "not_equals", "gt", "gte", "lt", "lte", "in", "not_in"
+}
+# Operator aliases (normalized to canonical form)
+SHOW_WHEN_OP_ALIASES = {
+    "eq": "equals",
+    "neq": "not_equals",
+    "nin": "not_in",
+}
+# Numeric operators (require numeric field and value)
+NUMERIC_SHOW_WHEN_OPS = {"gt", "gte", "lt", "lte"}
+# Array operators (require array value)
+ARRAY_SHOW_WHEN_OPS = {"in", "not_in"}
+# Pattern to detect snake_case
+SNAKE_CASE_PATTERN = re.compile(r"[a-z]+_[a-z]+")
+def _to_camel_case(snake_str: str) -> str:
+    """Convert snake_case to camelCase."""
+    parts = snake_str.split("_")
+    return parts[0] + "".join(p.capitalize() for p in parts[1:])
+def _resolve_ref(ref_path: str, root_schema: dict[str, Any]) -> dict[str, Any] | None:
+    """Resolve a $ref path in JSON schema."""
+    if not ref_path.startswith("#/"):
+        return None
+    parts = ref_path[2:].split("/")
+    current = root_schema
+    for part in parts:
+        if isinstance(current, dict) and part in current:
+            current = current[part]
+        else:
+            return None
+    return current if isinstance(current, dict) else None
+def _merge_all_of(all_of_list: list[dict], root_schema: dict[str, Any]) -> dict[str, Any]:
+    """Merge allOf schemas into single view for path resolution."""
+    merged: dict[str, Any] = {"properties": {}}
+    for sub in all_of_list:
+        resolved = sub
+        if "$ref" in sub:
+            resolved = _resolve_ref(sub["$ref"], root_schema) or sub
+        merged["properties"].update(resolved.get("properties", {}))
+        if "type" in resolved:
+            merged["type"] = resolved["type"]
+    return merged
+def _unwrap_any_of(any_of_list: list[dict]) -> dict[str, Any]:
+    """Unwrap anyOf, preferring non-null branch."""
+    non_null = [s for s in any_of_list if s.get("type") != "null"]
+    if len(non_null) == 1:
+        return non_null[0]
+    return non_null[0] if non_null else any_of_list[0]
+def _resolve_field_in_schema(
+    field_path: str, schema: dict[str, Any]
+) -> dict[str, Any] | None:
+    """Resolve field path in JSON Schema, handling $ref, allOf, anyOf/oneOf.
+    Args:
+        field_path: Dot-separated field path (e.g., "denoise.enable").
+        schema: Root JSON schema dict.
+    Returns:
+        Field schema if found, None otherwise.
+    """
+    segments = field_path.split(".")
+    current = schema
+    for segment in segments:
+        # Resolve $ref
+        if "$ref" in current:
+            resolved = _resolve_ref(current["$ref"], schema)
+            if resolved is None:
+                return None
+            current = resolved
+        # Merge allOf (common in Pydantic v2 for inheritance)
+        if "allOf" in current:
+            current = _merge_all_of(current["allOf"], schema)
+        # Unwrap anyOf/oneOf (find "real type")
+        if "anyOf" in current:
+            current = _unwrap_any_of(current["anyOf"])
+        if "oneOf" in current:
+            current = _unwrap_any_of(current["oneOf"])
+        props = current.get("properties", {})
+        if segment not in props:
+            return None
+        current = props[segment]
+    # Final resolution for the target field
+    if "$ref" in current:
+        resolved = _resolve_ref(current["$ref"], schema)
+        if resolved:
+            current = resolved
+    if "allOf" in current:
+        current = _merge_all_of(current["allOf"], schema)
+    if "anyOf" in current:
+        current = _unwrap_any_of(current["anyOf"])
+    if "oneOf" in current:
+        current = _unwrap_any_of(current["oneOf"])
+    return current
 def _validate_show_when_condition(
@@ -101,30 +232,115 @@ def _validate_show_when_condition(
         return
     field_path = condition["field"]
-    op = condition.get("op", "eq")
+    op = condition.get("op", "equals")
+    value = condition.get("value")
+    # Check for snake_case in field path (must be ERROR with fix suggestion)
+    if SNAKE_CASE_PATTERN.search(field_path):
+        segments = field_path.split(".")
+        suggested = ".".join(
+            _to_camel_case(s) if "_" in s else s for s in segments
+        )
+        issues.append(
+            ValidationIssue(
+                severity=ValidationSeverity.ERROR,
+                code="SHOW_WHEN_SNAKE_CASE_FIELD",
+                message=f"snake_case in show_when.field causes UI breakage: '{field_path}'",
+                path=path,
+                details={"original": field_path, "suggested": suggested},
+            )
+        )
-    # Validate operator
-    if op not in VALID_SHOW_WHEN_OPS:
+    # Check operator: alias → WARNING + normalize; unknown → ERROR
+    if op in SHOW_WHEN_OP_ALIASES:
+        canonical = SHOW_WHEN_OP_ALIASES[op]
+        issues.append(
+            ValidationIssue(
+                severity=ValidationSeverity.WARNING,
+                code="SHOW_WHEN_OP_ALIAS",
+                message=f"Operator '{op}' is an alias; use canonical '{canonical}' instead",
+                path=path,
+                details={"alias": op, "canonical": canonical},
+            )
+        )
+        op = canonical
+    elif op not in CANONICAL_SHOW_WHEN_OPS:
         issues.append(
             ValidationIssue(
                 severity=ValidationSeverity.ERROR,
                 code="SHOW_WHEN_INVALID_OP",
-                message=f"Invalid show_when operator: {op}. Valid: {VALID_SHOW_WHEN_OPS}",
+                message=f"Invalid show_when operator: '{op}'. Valid: {sorted(CANONICAL_SHOW_WHEN_OPS)}",
                 path=path,
             )
         )
+        return  # Can't validate further with invalid op
+    # Resolve field in schema for type compatibility checks
+    field_schema = _resolve_field_in_schema(field_path, schema)
     # Validate field path exists in schema
-    if not _field_exists_in_schema(field_path, schema):
+    if field_schema is None:
         issues.append(
             ValidationIssue(
                 severity=ValidationSeverity.ERROR,
                 code="SHOW_WHEN_FIELD_NOT_FOUND",
-                message=f"show_when references non-existent field: {field_path}",
+                message=f"show_when references non-existent field: '{field_path}'",
                 path=path,
                 details={"field": field_path},
             )
         )
+        return
+    # Type compatibility checks
+    field_type = field_schema.get("type")
+    # Numeric operators require numeric field and value
+    if op in NUMERIC_SHOW_WHEN_OPS:
+        if field_type not in ("number", "integer"):
+            issues.append(
+                ValidationIssue(
+                    severity=ValidationSeverity.ERROR,
+                    code="SHOW_WHEN_NUMERIC_OP_ON_NON_NUMERIC",
+                    message=f"Numeric operator '{op}' used on non-numeric field (type: {field_type})",
+                    path=path,
+                    details={"op": op, "field_type": field_type},
+                )
+            )
+        if value is not None and not isinstance(value, (int, float)):
+            issues.append(
+                ValidationIssue(
+                    severity=ValidationSeverity.ERROR,
+                    code="SHOW_WHEN_NUMERIC_OP_VALUE_NOT_NUMBER",
+                    message=f"Numeric operator '{op}' requires numeric value, got {type(value).__name__}",
+                    path=path,
+                )
+            )
+    # Array operators require array value
+    if op in ARRAY_SHOW_WHEN_OPS:
+        if not isinstance(value, list):
+            issues.append(
+                ValidationIssue(
+                    severity=ValidationSeverity.ERROR,
+                    code="SHOW_WHEN_ARRAY_OP_VALUE_NOT_ARRAY",
+                    message=f"Array operator '{op}' requires list value, got {type(value).__name__}",
+                    path=path,
+                )
+            )
+    # equals/not_equals on enum field: check value is in enum
+    if op in ("equals", "not_equals"):
+        enum_values = field_schema.get("enum")
+        if enum_values is not None and value is not None and value not in enum_values:
+            issues.append(
+                ValidationIssue(
+                    severity=ValidationSeverity.ERROR,
+                    code="SHOW_WHEN_VALUE_NOT_IN_ENUM",
+                    message=f"show_when value '{value}' not in enum: {enum_values}",
+                    path=path,
+                    details={"value": value, "enum": enum_values},
+                )
+            )
 def _validate_show_when(
@@ -197,79 +413,179 @@ def _validate_show_when(
         )
-def _field_exists_in_schema(field_path: str, schema: dict[str, Any]) -> bool:
-    """Check if a field path exists in a JSON schema.
-    Supports dot notation: "parent.child.field"
+def _validate_enum_labels(
+    field_schema: dict[str, Any],
+    enum_labels: dict[str, str],
+    path: str,
+    issues: list[ValidationIssue],
+) -> None:
+    """Validate enum_labels keys match enum values strictly.
     Args:
-        field_path: Dot-separated field path.
-        schema: JSON schema dict.
-    Returns:
-        True if field exists.
+        field_schema: The field's JSON schema.
+        enum_labels: The enum_labels dict from json_schema_extra.
+        path: Current path for error reporting.
+        issues: List to append issues to.
     """
-    parts = field_path.split(".")
-    current = schema.get("properties", {})
-    for i, part in enumerate(parts):
-        if part not in current:
-            return False
-        prop = current[part]
-        # Last part - field exists
-        if i == len(parts) - 1:
-            return True
-        # Navigate into nested object
-        if prop.get("type") == "object":
-            current = prop.get("properties", {})
-        elif "$ref" in prop:
-            # Handle $ref - simplified, assumes $defs at root
-            ref = prop["$ref"]
-            if ref.startswith("#/$defs/"):
-                def_name = ref.split("/")[-1]
-                defs = schema.get("$defs", {})
-                if def_name in defs:
-                    current = defs[def_name].get("properties", {})
-                else:
-                    return False
-            else:
-                return False
-        else:
-            return False
+    enum_values = field_schema.get("enum")
-    return True
+    if enum_values is None:
+        issues.append(
+            ValidationIssue(
+                severity=ValidationSeverity.ERROR,
+                code="ENUM_LABELS_ON_NON_ENUM_FIELD",
+                message="enum_labels provided for non-enum field",
+                path=path,
+            )
+        )
+        return
+    label_keys = set(enum_labels.keys())
+    enum_set = set(str(v) for v in enum_values)
-def _validate_enum_labels(
-    enum_labels: dict[str, str],
-    enum_values: list[Any] | None,
+    missing = enum_set - label_keys
+    extra = label_keys - enum_set
+    if missing:
+        issues.append(
+            ValidationIssue(
+                severity=ValidationSeverity.ERROR,
+                code="ENUM_LABELS_MISSING_KEYS",
+                message=f"enum_labels missing keys for enum values: {sorted(missing)}",
+                path=path,
+                details={"missing": sorted(missing)},
+            )
+        )
+    if extra:
+        issues.append(
+            ValidationIssue(
+                severity=ValidationSeverity.ERROR,
+                code="ENUM_LABELS_EXTRA_KEYS",
+                message=f"enum_labels has keys not in enum: {sorted(extra)}",
+                path=path,
+                details={"extra": sorted(extra)},
+            )
+        )
+def _validate_json_schema_extra(
+    extra: dict[str, Any],
+    field_schema: dict[str, Any],
+    full_schema: dict[str, Any],
     path: str,
     issues: list[ValidationIssue],
+    orders_in_scope: set[int],
 ) -> None:
-    """Validate enum_labels keys match enum values.
+    """Validate json_schema_extra keys and values.
     Args:
-        enum_labels: The enum_labels dict.
-        enum_values: The enum values from schema (if available).
+        extra: The json_schema_extra dict.
+        field_schema: The field's JSON schema.
+        full_schema: The full schema for show_when validation.
         path: Current path for error reporting.
         issues: List to append issues to.
+        orders_in_scope: Set of order values seen in current properties scope.
     """
-    if enum_values is None:
-        return
-    enum_values_str = {str(v) for v in enum_values}
-    for key in enum_labels:
-        if str(key) not in enum_values_str:
+    for key, value in extra.items():
+        # x_* prefix → WARNING (extension keys)
+        if key.startswith("x_"):
             issues.append(
                 ValidationIssue(
                     severity=ValidationSeverity.WARNING,
-                    code="ENUM_LABEL_UNKNOWN_VALUE",
-                    message=f"enum_labels key '{key}' not in enum values: {enum_values}",
-                    path=path,
+                    code="JSON_SCHEMA_EXTRA_EXTENSION_KEY",
+                    message=f"Extension key '{key}' (x_* prefix) will be ignored by SDK",
+                    path=f"{path}.{key}",
                 )
             )
+            continue
+        # Unknown key (not in whitelist) → ERROR
+        if key not in ALLOWED_JSON_SCHEMA_EXTRA_KEYS:
+            issues.append(
+                ValidationIssue(
+                    severity=ValidationSeverity.ERROR,
+                    code="JSON_SCHEMA_EXTRA_UNKNOWN_KEY",
+                    message=f"Unknown json_schema_extra key: '{key}'. Allowed: {sorted(ALLOWED_JSON_SCHEMA_EXTRA_KEYS)}",
+                    path=f"{path}.{key}",
+                )
+            )
+            continue
+        # Type validation for known keys
+        if key in JSON_SCHEMA_EXTRA_TYPES:
+            expected_type = JSON_SCHEMA_EXTRA_TYPES[key]
+            if not isinstance(value, expected_type):
+                expected_name = (
+                    expected_type.__name__
+                    if isinstance(expected_type, type)
+                    else " | ".join(t.__name__ for t in expected_type)
+                )
+                issues.append(
+                    ValidationIssue(
+                        severity=ValidationSeverity.ERROR,
+                        code="JSON_SCHEMA_EXTRA_TYPE_MISMATCH",
+                        message=f"json_schema_extra['{key}'] must be {expected_name}, got {type(value).__name__}",
+                        path=f"{path}.{key}",
+                    )
+                )
+                continue
+        # step must be > 0
+        if key == "step":
+            if value <= 0:
+                issues.append(
+                    ValidationIssue(
+                        severity=ValidationSeverity.ERROR,
+                        code="JSON_SCHEMA_EXTRA_STEP_INVALID",
+                        message=f"step must be > 0, got {value}",
+                        path=f"{path}.step",
+                    )
+                )
+        # order duplicate check within same properties scope
+        if key == "order":
+            if value in orders_in_scope:
+                issues.append(
+                    ValidationIssue(
+                        severity=ValidationSeverity.ERROR,
+                        code="JSON_SCHEMA_EXTRA_DUPLICATE_ORDER",
+                        message=f"Duplicate order value {value} in same properties scope",
+                        path=f"{path}.order",
+                    )
+                )
+            else:
+                orders_in_scope.add(value)
+        # show_when validation
+        if key == "show_when":
+            _validate_show_when(value, full_schema, f"{path}.show_when", issues)
+        # enum_labels validation
+        if key == "enum_labels":
+            if isinstance(value, dict):
+                _validate_enum_labels(field_schema, value, f"{path}.enum_labels", issues)
+def _is_leaf_field(field_schema: dict[str, Any]) -> bool:
+    """Check if field is a leaf (no nested properties)."""
+    return "properties" not in field_schema
+def _validate_title_requirement(
+    field_schema: dict[str, Any],
+    path: str,
+    issues: list[ValidationIssue],
+) -> None:
+    """Warn if leaf field is missing title."""
+    if _is_leaf_field(field_schema) and "title" not in field_schema:
+        issues.append(
+            ValidationIssue(
+                severity=ValidationSeverity.WARNING,
+                code="FIELD_MISSING_TITLE",
+                message="Leaf field missing 'title' for UI display",
+                path=path,
+            )
+        )
 def _extract_schema_from_workspace(workspace: Path) -> dict[str, Any]:
@@ -377,10 +693,22 @@ def _validate_schema_properties(
         issues: List to append issues to.
     """
     properties = props_container.get("properties", {})
+    orders_in_scope: set[int] = set()  # Track order values within this scope
     for field_name, field_schema in properties.items():
         field_path = f"{path_prefix}.{field_name}" if path_prefix else field_name
+        # Resolve the actual field schema (handle $ref, allOf, anyOf)
+        resolved_schema = field_schema
+        if "$ref" in field_schema:
+            resolved = _resolve_ref(field_schema["$ref"], full_schema)
+            if resolved:
+                resolved_schema = resolved
+        if "allOf" in resolved_schema:
+            resolved_schema = _merge_all_of(resolved_schema["allOf"], full_schema)
+        if "anyOf" in resolved_schema:
+            resolved_schema = _unwrap_any_of(resolved_schema["anyOf"])
         # Check json_schema_extra (stored in various places depending on Pydantic version)
         extra = (
             field_schema.get("json_schema_extra")
@@ -388,26 +716,27 @@ def _validate_schema_properties(
             or {}
         )
-        # Validate show_when
-        if "show_when" in extra:
-            _validate_show_when(extra["show_when"], full_schema, f"{field_path}.show_when", issues)
+        # Validate json_schema_extra comprehensively
+        if extra:
+            _validate_json_schema_extra(
+                extra, resolved_schema, full_schema, field_path, issues, orders_in_scope
+            )
-        # Validate enum_labels
-        if "enum_labels" in extra:
-            enum_values = field_schema.get("enum")
-            _validate_enum_labels(extra["enum_labels"], enum_values, f"{field_path}.enum_labels", issues)
+        # Validate title requirement for leaf fields
+        _validate_title_requirement(resolved_schema, field_path, issues)
         # Recurse into nested objects
-        if field_schema.get("type") == "object":
-            _validate_schema_properties(field_schema, full_schema, field_path, issues)
+        if resolved_schema.get("type") == "object" or "properties" in resolved_schema:
+            _validate_schema_properties(resolved_schema, full_schema, field_path, issues)
-        # Handle allOf, anyOf, oneOf
+        # Handle allOf, anyOf, oneOf at field level
         for combiner in ["allOf", "anyOf", "oneOf"]:
             if combiner in field_schema:
                 for i, sub_schema in enumerate(field_schema[combiner]):
-                    _validate_schema_properties(
-                        sub_schema, full_schema, f"{field_path}.{combiner}[{i}]", issues
-                    )
+                    if "properties" in sub_schema:
+                        _validate_schema_properties(
+                            sub_schema, full_schema, f"{field_path}.{combiner}[{i}]", issues
+                        )
     # Handle $defs
     if "$defs" in props_container:
@@ -612,6 +941,286 @@ def _validate_contract_structure(contract: dict[str, Any], issues: list[Validati
                     )
                 )
+        # Validate schema structure per kind
+        _validate_dataset_schema(ds, ds_key, issues)
+        # Validate dimensions don't overlap with owner-level keys
+        _validate_dimensions_policy(dimensions, ds_key, issues)
+        # Validate groupPath depth
+        group_path = ds.get("groupPath") or []
+        _validate_group_path_policy(group_path, ds_key, issues)
+    # Validate invariants
+    invariants = contract.get("invariants") or []
+    datasets_by_key = {ds.get("key"): ds for ds in datasets if ds.get("key")}
+    _validate_invariants(invariants, datasets_by_key, issues)
+    # Validate relations
+    relations = contract.get("relations") or []
+    _validate_relations(relations, datasets_by_key, issues)
+# Schema dtype sets per spec
+FRAME_COLUMN_DTYPES = {"string", "int", "float", "bool", "datetime"}
+SCALAR_DTYPES = {"string", "int", "float", "bool"}
+OWNER_LEVEL_KEYS = {"stageId", "wellId", "platformId"}
+MAX_GROUP_PATH_DEPTH = 4
+def _validate_dataset_schema(
+    dataset: dict[str, Any], ds_key: str, issues: list[ValidationIssue]
+) -> None:
+    """Validate dataset schema structure per kind."""
+    kind = dataset.get("kind")
+    schema = dataset.get("schema") or {}
+    if kind == "scalar":
+        dtype = schema.get("dtype")
+        if dtype and dtype not in SCALAR_DTYPES:
+            issues.append(
+                ValidationIssue(
+                    severity=ValidationSeverity.ERROR,
+                    code="SCALAR_INVALID_DTYPE",
+                    message=f"Invalid scalar dtype '{dtype}'. Valid: {sorted(SCALAR_DTYPES)}",
+                    path=f"datasets.{ds_key}.schema.dtype",
+                )
+            )
+    elif kind == "blob":
+        ext = schema.get("ext")
+        if ext and not re.match(r"^\.[a-zA-Z0-9]+$", ext):
+            issues.append(
+                ValidationIssue(
+                    severity=ValidationSeverity.ERROR,
+                    code="BLOB_EXT_INVALID_FORMAT",
+                    message=f"Invalid blob ext format '{ext}'. Must be '.<alphanumeric>'",
+                    path=f"datasets.{ds_key}.schema.ext",
+                )
+            )
+    elif kind == "frame":
+        # Validate index field exists in columns if specified
+        index = schema.get("index")
+        columns = schema.get("columns") or []
+        if isinstance(index, dict):
+            index_kind = index.get("kind")
+            index_field = index.get("field")
+            if index_kind in ("time", "depth") and index_field:
+                col_names = [c.get("name") if isinstance(c, dict) else c for c in columns]
+                if index_field not in col_names:
+                    issues.append(
+                        ValidationIssue(
+                            severity=ValidationSeverity.ERROR,
+                            code="FRAME_INDEX_FIELD_NOT_IN_COLUMNS",
+                            message=f"Frame index field '{index_field}' not found in columns",
+                            path=f"datasets.{ds_key}.schema.index",
+                        )
+                    )
+def _validate_dimensions_policy(
+    dimensions: list[str], ds_key: str, issues: list[ValidationIssue]
+) -> None:
+    """Validate dimensions don't contain owner-level keys."""
+    overlap = set(dimensions) & OWNER_LEVEL_KEYS
+    if overlap:
+        issues.append(
+            ValidationIssue(
+                severity=ValidationSeverity.WARNING,
+                code="DIMENSIONS_CONTAINS_OWNER_KEYS",
+                message=f"dimensions contains owner-level keys {sorted(overlap)}; use 'owner' instead",
+                path=f"datasets.{ds_key}.dimensions",
+                details={"overlap": sorted(overlap)},
+            )
+        )
+def _validate_group_path_policy(
+    group_path: list[str], ds_key: str, issues: list[ValidationIssue]
+) -> None:
+    """Validate groupPath depth."""
+    if group_path and len(group_path) > MAX_GROUP_PATH_DEPTH:
+        issues.append(
+            ValidationIssue(
+                severity=ValidationSeverity.WARNING,
+                code="GROUP_PATH_TOO_DEEP",
+                message=f"groupPath depth {len(group_path)} exceeds recommended max {MAX_GROUP_PATH_DEPTH}",
+                path=f"datasets.{ds_key}.groupPath",
+                details={"depth": len(group_path)},
+            )
+        )
+def _validate_invariants(
+    invariants: list[dict[str, Any]],
+    datasets_by_key: dict[str, dict[str, Any]],
+    issues: list[ValidationIssue],
+) -> None:
+    """Validate invariants reference valid datasets."""
+    for idx, inv in enumerate(invariants):
+        inv_type = inv.get("type")
+        inv_path = f"invariants[{idx}]"
+        if inv_type == "sameOwner":
+            level = inv.get("level")
+            targets = inv.get("targets") or []
+            for i, target in enumerate(targets):
+                key = target.get("key") if isinstance(target, dict) else target
+                if key not in datasets_by_key:
+                    issues.append(
+                        ValidationIssue(
+                            severity=ValidationSeverity.ERROR,
+                            code="INVARIANT_REFERENCES_UNKNOWN_DATASET",
+                            message=f"sameOwner invariant references unknown dataset '{key}'",
+                            path=f"{inv_path}.targets[{i}]",
+                        )
+                    )
+                elif level:
+                    ds_owner = datasets_by_key[key].get("owner")
+                    if ds_owner != level:
+                        issues.append(
+                            ValidationIssue(
+                                severity=ValidationSeverity.ERROR,
+                                code="SAME_OWNER_LEVEL_MISMATCH",
+                                message=f"sameOwner level '{level}' doesn't match dataset owner '{ds_owner}'",
+                                path=f"{inv_path}.targets[{i}]",
+                            )
+                        )
+        elif inv_type == "joinOnOwner":
+            left = inv.get("left") or {}
+            right = inv.get("right") or {}
+            for ref_name, ref in [("left", left), ("right", right)]:
+                key = ref.get("key")
+                if key and key not in datasets_by_key:
+                    issues.append(
+                        ValidationIssue(
+                            severity=ValidationSeverity.ERROR,
+                            code="INVARIANT_REFERENCES_UNKNOWN_DATASET",
+                            message=f"joinOnOwner.{ref_name} references unknown dataset '{key}'",
+                            path=f"{inv_path}.{ref_name}",
+                        )
+                    )
+        elif inv_type == "itemsCount":
+            ds_key = inv.get("datasetKey")
+            if ds_key and ds_key not in datasets_by_key:
+                issues.append(
+                    ValidationIssue(
+                        severity=ValidationSeverity.ERROR,
+                        code="INVARIANT_REFERENCES_UNKNOWN_DATASET",
+                        message=f"itemsCount references unknown dataset '{ds_key}'",
+                        path=f"{inv_path}.datasetKey",
+                    )
+                )
+            count = inv.get("count")
+            if count is not None and (not isinstance(count, int) or count < 1):
+                issues.append(
+                    ValidationIssue(
+                        severity=ValidationSeverity.ERROR,
+                        code="ITEMS_COUNT_INVALID",
+                        message=f"itemsCount.count must be integer >= 1, got {count}",
+                        path=f"{inv_path}.count",
+                    )
+                )
+def _validate_relations(
+    relations: list[dict[str, Any]],
+    datasets_by_key: dict[str, dict[str, Any]],
+    issues: list[ValidationIssue],
+) -> None:
+    """Validate relations reference valid datasets and fields."""
+    for idx, rel in enumerate(relations):
+        rel_path = f"relations[{idx}]"
+        from_ref = rel.get("from") or {}
+        to_ref = rel.get("to") or {}
+        from_key = from_ref.get("key")
+        to_key = to_ref.get("key")
+        # Keys must exist
+        if from_key and from_key not in datasets_by_key:
+            issues.append(
+                ValidationIssue(
+                    severity=ValidationSeverity.ERROR,
+                    code="RELATION_FROM_KEY_NOT_FOUND",
+                    message=f"relation.from references unknown dataset '{from_key}'",
+                    path=f"{rel_path}.from",
+                )
+            )
+        if to_key and to_key not in datasets_by_key:
+            issues.append(
+                ValidationIssue(
+                    severity=ValidationSeverity.ERROR,
+                    code="RELATION_TO_KEY_NOT_FOUND",
+                    message=f"relation.to references unknown dataset '{to_key}'",
+                    path=f"{rel_path}.to",
+                )
+            )
+        # blob/scalar cannot have field relations
+        for key, ref_name in [(from_key, "from"), (to_key, "to")]:
+            if key and key in datasets_by_key:
+                kind = datasets_by_key[key].get("kind")
+                if kind in ("blob", "scalar"):
+                    issues.append(
+                        ValidationIssue(
+                            severity=ValidationSeverity.ERROR,
+                            code=f"RELATION_{ref_name.upper()}_CANNOT_BE_BLOB_OR_SCALAR",
+                            message=f"relation.{ref_name} cannot reference {kind} dataset",
+                            path=f"{rel_path}.{ref_name}",
+                        )
+                    )
+        # Validate field exists in schema (for frame/object)
+        _validate_relation_field(from_ref, datasets_by_key, "from", rel_path, issues)
+        _validate_relation_field(to_ref, datasets_by_key, "to", rel_path, issues)
+def _validate_relation_field(
+    ref: dict[str, Any],
+    datasets_by_key: dict[str, dict[str, Any]],
+    ref_name: str,
+    rel_path: str,
+    issues: list[ValidationIssue],
+) -> None:
+    """Validate relation field exists in dataset schema."""
+    key = ref.get("key")
+    field = ref.get("field")
+    if not key or not field or key not in datasets_by_key:
+        return
+    dataset = datasets_by_key[key]
+    schema = dataset.get("schema") or {}
+    kind = dataset.get("kind")
+    if kind == "frame":
+        columns = schema.get("columns") or []
+        col_names = [c.get("name") if isinstance(c, dict) else c for c in columns]
+        if field not in col_names:
+            issues.append(
+                ValidationIssue(
+                    severity=ValidationSeverity.ERROR,
+                    code=f"RELATION_{ref_name.upper()}_FIELD_NOT_IN_COLUMNS",
+                    message=f"relation.{ref_name}.field '{field}' not in frame columns",
+                    path=f"{rel_path}.{ref_name}.field",
+                )
+            )
+    elif kind == "object":
+        fields = schema.get("fields") or []
+        field_names = [f.get("name") if isinstance(f, dict) else f for f in fields]
+        if field not in field_names:
+            issues.append(
+                ValidationIssue(
+                    severity=ValidationSeverity.ERROR,
+                    code=f"RELATION_{ref_name.upper()}_FIELD_NOT_IN_FIELDS",
+                    message=f"relation.{ref_name}.field '{field}' not in object fields",
+                    path=f"{rel_path}.{ref_name}.field",
+                )
+            )
 # =============================================================================
 # Bundle Validation
@@ -1032,6 +1641,157 @@ def _validate_item_against_contract(
             )
+# =============================================================================
+# Algorithm Signature Validation
+# =============================================================================
+def validate_algorithm_signature(workspace: Path) -> ValidationResult:
+    """Validate algorithm run function signature.
+    Checks:
+    - main.py exists
+    - Top-level run function exists
+    - run function is not async (sandbox doesn't support it)
+    - run function has exactly 1 positional parameter
+    - run function has no *args, **kwargs, or keyword-only args
+    Args:
+        workspace: Algorithm workspace path.
+    Returns:
+        ValidationResult with issues found.
+    """
+    workspace = Path(workspace).resolve()
+    issues: list[ValidationIssue] = []
+    main_path = workspace / "main.py"
+    if not main_path.exists():
+        issues.append(
+            ValidationIssue(
+                severity=ValidationSeverity.ERROR,
+                code="ALGORITHM_MAIN_NOT_FOUND",
+                message="main.py not found in algorithm workspace",
+                path="main.py",
+            )
+        )
+        return ValidationResult(valid=False, issues=issues)
+    try:
+        source = main_path.read_text(encoding="utf-8")
+        tree = ast.parse(source, filename="main.py")
+    except SyntaxError as e:
+        issues.append(
+            ValidationIssue(
+                severity=ValidationSeverity.ERROR,
+                code="ALGORITHM_SYNTAX_ERROR",
+                message=f"Syntax error in main.py: {e}",
+                path="main.py",
+                details={"error": str(e)},
+            )
+        )
+        return ValidationResult(valid=False, issues=issues)
+    # Find TOP-LEVEL run functions only (not nested in classes/functions)
+    run_funcs = [
+        node
+        for node in tree.body
+        if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)) and node.name == "run"
+    ]
+    if len(run_funcs) == 0:
+        issues.append(
+            ValidationIssue(
+                severity=ValidationSeverity.ERROR,
+                code="ALGORITHM_RUN_NOT_FOUND",
+                message="Top-level 'run' function not found in main.py",
+                path="main.py",
+            )
+        )
+        return ValidationResult(valid=False, issues=issues)
+    if len(run_funcs) > 1:
+        issues.append(
+            ValidationIssue(
+                severity=ValidationSeverity.ERROR,
+                code="ALGORITHM_MULTIPLE_RUN_FUNCTIONS",
+                message=f"Multiple top-level 'run' functions found ({len(run_funcs)})",
+                path="main.py",
+            )
+        )
+        return ValidationResult(valid=False, issues=issues)
+    run_func = run_funcs[0]
+    # async def run → ERROR (sandbox doesn't support it)
+    if isinstance(run_func, ast.AsyncFunctionDef):
+        issues.append(
+            ValidationIssue(
+                severity=ValidationSeverity.ERROR,
+                code="ALGORITHM_ASYNC_RUN_NOT_SUPPORTED",
+                message="'async def run' is not supported; sandbox requires synchronous 'def run'",
+                path="main.py",
+            )
+        )
+    args = run_func.args
+    # No *args
+    if args.vararg:
+        issues.append(
+            ValidationIssue(
+                severity=ValidationSeverity.ERROR,
+                code="ALGORITHM_RUN_HAS_VARARG",
+                message=f"run function must not have *args (found: *{args.vararg.arg})",
+                path="main.py",
+            )
+        )
+    # No **kwargs
+    if args.kwarg:
+        issues.append(
+            ValidationIssue(
+                severity=ValidationSeverity.ERROR,
+                code="ALGORITHM_RUN_HAS_KWARG",
+                message=f"run function must not have **kwargs (found: **{args.kwarg.arg})",
+                path="main.py",
+            )
+        )
+    # No keyword-only args
+    if args.kwonlyargs:
+        kw_names = [a.arg for a in args.kwonlyargs]
+        issues.append(
+            ValidationIssue(
+                severity=ValidationSeverity.ERROR,
+                code="ALGORITHM_RUN_HAS_KWONLY_ARGS",
+                message=f"run function must not have keyword-only args (found: {kw_names})",
+                path="main.py",
+            )
+        )
+    # Exactly 1 positional parameter (excluding 'self' for methods)
+    positional_args = list(args.posonlyargs) + list(args.args)
+    if positional_args and positional_args[0].arg == "self":
+        positional_args = positional_args[1:]
+    if len(positional_args) != 1:
+        param_names = [a.arg for a in positional_args]
+        issues.append(
+            ValidationIssue(
+                severity=ValidationSeverity.ERROR,
+                code="ALGORITHM_RUN_WRONG_PARAM_COUNT",
+                message=f"run function must have exactly 1 parameter (context), found {len(positional_args)}: {param_names}",
+                path="main.py",
+                details={"found": len(positional_args), "params": param_names},
+            )
+        )
+    has_errors = any(i.severity == ValidationSeverity.ERROR for i in issues)
+    return ValidationResult(valid=not has_errors, issues=issues)
 __all__ = [
     "ValidationSeverity",
     "ValidationIssue",
@@ -1040,4 +1800,5 @@ __all__ = [
     "validate_output_contract",
     "validate_bundle",
     "validate_run_manifest",
+    "validate_algorithm_signature",
 ]

fraclab-sdk 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl

fraclab-sdk 0.1.1py3-none-any.whl → 0.1.2py3-none-any.whl