PyPI - lightassay - Versions diffs - 0.3.0__py3-none-any.whl - Mend

lightassay 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

lightassay/__init__.py +134 -0
lightassay/adapter_pack/__init__.py +295 -0
lightassay/adapter_pack/command.py +84 -0
lightassay/adapter_pack/http_driver.py +75 -0
lightassay/adapter_pack/python_callable.py +63 -0
lightassay/analyzer.py +287 -0
lightassay/backends.py +144 -0
lightassay/bootstrap.py +469 -0
lightassay/builtin_adapters/__init__.py +27 -0
lightassay/builtin_adapters/_agent_cli_common.py +281 -0
lightassay/builtin_adapters/claude_cli.py +29 -0
lightassay/builtin_adapters/codex_cli.py +28 -0
lightassay/builtin_adapters/stub.py +361 -0
lightassay/cli.py +1077 -0
lightassay/comparer.py +197 -0
lightassay/diagnostics.py +104 -0
lightassay/errors.py +94 -0
lightassay/expert.py +440 -0
lightassay/orchestrator.py +1219 -0
lightassay/preparation_config.py +109 -0
lightassay/preparer.py +1218 -0
lightassay/run_artifact_io.py +407 -0
lightassay/run_models.py +70 -0
lightassay/runner.py +298 -0
lightassay/runtime_state.py +240 -0
lightassay/semantic_config.py +102 -0
lightassay/surface.py +2635 -0
lightassay/types.py +319 -0
lightassay/workbook_models.py +151 -0
lightassay/workbook_parser.py +824 -0
lightassay/workbook_renderer.py +405 -0
lightassay/workflow_config.py +239 -0
lightassay/workflow_config_builder.py +141 -0
lightassay-0.3.0.dist-info/METADATA +163 -0
lightassay-0.3.0.dist-info/RECORD +39 -0
lightassay-0.3.0.dist-info/WHEEL +5 -0
lightassay-0.3.0.dist-info/entry_points.txt +2 -0
lightassay-0.3.0.dist-info/licenses/LICENSE +21 -0
lightassay-0.3.0.dist-info/top_level.txt +1 -0

lightassay/__init__.py ADDED Viewed

@@ -0,0 +1,134 @@
+"""lightassay: file-based orchestrator for structured evaluation of applied LLM workflows.
+One rule runs through the whole design: humans declare intent, LLMs do the
+semantic reasoning, code orchestrates execution and measures raw facts — and
+never judges output quality.  The workbook (markdown), run artifact (JSON),
+and analysis/compare artifacts (markdown) are the source of truth; the
+library is an orchestrator around them.
+The ordinary public entrypoint is the L1 library surface.  Start here::
+    from lightassay import (
+        open_session,
+        init_workbook,
+        quick_try,
+        quick_try_workbook,
+        refine_workbook,
+        explore_workbook,
+        compare_runs,
+    )
+    # Create a workbook (or use an existing one).
+    wb_path = init_workbook("my-eval", output_dir=".")
+    # Or run a one-shot quick try to see the full workbook shape.
+    quick = quick_try(
+        "my-quick-try",
+        target=EvalTarget(
+            kind="workflow",
+            name="summarize",
+            locator="myapp.pipeline.run",
+            boundary="high-level pipeline boundary",
+            sources=["myapp/pipeline.py", "myapp/prompts/summarize.py"],
+        ),
+        user_request="Check how the pipeline handles obvious failures without over-correcting.",
+        preparation_config="prep.json",
+        output_dir=".",
+    )
+    # Open a session.
+    session = open_session(
+        wb_path,
+        preparation_config="prep.json",
+        workflow_config="wf.json",
+        semantic_config="sem.json",
+    )
+    # Inspect state, prepare, run, analyze.
+    state = session.state()
+    result = session.prepare()
+    ...
+    # Compare runs (no session/workbook required).
+    compare_result = compare_runs(
+        ["run_a.json", "run_b.json"],
+        semantic_config="sem.json",
+    )
+Deeper engine internals are not part of the ordinary L1 surface.
+Use ``open_diagnostics()`` on a session to enter the L2
+diagnostics/recovery layer with structured reports, evidence, and
+bounded recovery actions.  The ``DiagnosticsHandle`` type returned
+by ``open_diagnostics()`` lives in ``lightassay.types`` but
+is not part of the ordinary top-level export set.  L2 detail types
+live in ``lightassay.diagnostics``.
+For deep inspection and bounded low-level control, escalate from
+L2 to L3 via ``diag.open_expert()``.  L3 types live in
+``lightassay.expert``.
+"""
+__version__ = "0.3.0"
+# L1 public surface ──────────────────────────────────────────────────────────
+from .errors import EvalError
+from .surface import (
+    EvalSession,
+    compare_runs,
+    continue_workbook,
+    explore_workbook,
+    init_workbook,
+    list_backends,
+    open_session,
+    quick_try,
+    quick_try_workbook,
+    quickstart,
+    refine_workbook,
+)
+from .types import (
+    AnalyzeResult,
+    CompareResult,
+    ContinueResult,
+    EvalState,
+    EvalTarget,
+    ExploreResult,
+    PreparationStage,
+    PrepareResult,
+    QuickstartResult,
+    QuickTryResult,
+    RefineResult,
+    RunResult,
+)
+__all__ = [
+    # Version
+    "__version__",
+    # L1 control
+    "open_session",
+    "init_workbook",
+    "quick_try",
+    "quick_try_workbook",
+    "refine_workbook",
+    "explore_workbook",
+    "compare_runs",
+    "quickstart",
+    "continue_workbook",
+    "list_backends",
+    "EvalSession",
+    # L1 types
+    "EvalTarget",
+    "EvalState",
+    "ExploreResult",
+    "PreparationStage",
+    "PrepareResult",
+    "QuickstartResult",
+    "QuickTryResult",
+    "ContinueResult",
+    "RefineResult",
+    "RunResult",
+    "AnalyzeResult",
+    "CompareResult",
+    # L1 error boundary
+    "EvalError",
+]

lightassay/adapter_pack/__init__.py ADDED Viewed

@@ -0,0 +1,295 @@
+"""First-party adapter pack for common workflow integration shapes.
+This module ships generic drivers for three common integration patterns:
+- ``python-callable``: call a Python function directly (no subprocess)
+- ``http``: call an HTTP endpoint with JSON request/response
+- ``command``: run an explicit command list as a subprocess
+Drivers are selected via the ``driver`` field in workflow config (see
+``docs/adapter_pack_spec.md``).  Each driver produces the same response
+contract as the raw subprocess adapter protocol.
+The legacy ``adapter`` field (raw executable path) remains supported.
+Exactly one of ``adapter`` or ``driver`` must be present in a workflow
+config.
+"""
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Union
+# ── Driver error ────────────────────────────────────────────────────────────
+class DriverError(Exception):
+    """Raised when a first-party driver fails to execute.
+    The message is recorded as the ``execution_error`` in the case record,
+    identical to how subprocess adapter failures are recorded.
+    """
+# ── Driver config types ────────────────────────────────────────────────────
+DRIVER_TYPE_PYTHON_CALLABLE = "python-callable"
+DRIVER_TYPE_HTTP = "http"
+DRIVER_TYPE_COMMAND = "command"
+KNOWN_DRIVER_TYPES = frozenset(
+    {
+        DRIVER_TYPE_PYTHON_CALLABLE,
+        DRIVER_TYPE_HTTP,
+        DRIVER_TYPE_COMMAND,
+    }
+)
+@dataclass(frozen=True)
+class PythonCallableDriverConfig:
+    """Config for the ``python-callable`` driver.
+    ``module`` is a dotted Python module path (e.g. ``my_package.adapter``).
+    ``function`` is the function name within that module.
+    The function must accept a single ``dict`` argument (the adapter request)
+    and return a ``dict`` (the adapter response) conforming to the standard
+    response contract.
+    """
+    module: str
+    function: str
+@dataclass(frozen=True)
+class HttpDriverConfig:
+    """Config for the ``http`` driver.
+    ``url`` is the full HTTP endpoint URL.
+    ``method`` is the HTTP method (e.g. ``"POST"``).
+    ``headers`` is an optional dict of extra HTTP headers.
+    ``timeout_seconds`` is an optional request timeout in seconds.
+    If ``timeout_seconds`` is absent, no timeout is enforced (consistent
+    with the v1 subprocess protocol).
+    """
+    url: str
+    method: str
+    headers: dict[str, str] | None
+    timeout_seconds: int | None
+@dataclass(frozen=True)
+class CommandDriverConfig:
+    """Config for the ``command`` driver.
+    ``command`` is a non-empty list of strings forming the subprocess
+    command (e.g. ``["python3", "my_adapter.py"]``).
+    ``config_dir`` is the absolute path to the directory containing the
+    workflow config file.  When set, the subprocess runs with this as its
+    working directory, so relative paths in the command array resolve
+    against the config file location rather than the caller's cwd.
+    This field is injected by ``load_workflow_config``, not by the user's
+    JSON config.
+    The subprocess receives the adapter request JSON on stdin and must
+    write the adapter response JSON to stdout, identical to the raw
+    subprocess protocol.
+    """
+    command: list[str]
+    config_dir: str | None = None
+DriverConfig = Union[
+    PythonCallableDriverConfig,
+    HttpDriverConfig,
+    CommandDriverConfig,
+]
+# ── Driver config validation ───────────────────────────────────────────────
+_PYTHON_CALLABLE_REQUIRED = {"module", "function"}
+_HTTP_REQUIRED = {"url", "method"}
+_HTTP_OPTIONAL = {"headers", "timeout_seconds"}
+_COMMAND_REQUIRED = {"command"}
+def validate_driver_config(data: dict) -> DriverConfig:
+    """Validate a raw driver config dict and return a typed DriverConfig.
+    Raises ``ValueError`` with a descriptive message on any violation.
+    """
+    if not isinstance(data, dict):
+        raise ValueError(f"Driver config must be a JSON object, got {type(data).__name__}")
+    if "type" not in data:
+        raise ValueError("Driver config missing required field: 'type'")
+    driver_type = data["type"]
+    if not isinstance(driver_type, str):
+        raise ValueError(
+            f"Driver config field 'type' must be a string, got {type(driver_type).__name__}"
+        )
+    if driver_type not in KNOWN_DRIVER_TYPES:
+        raise ValueError(
+            f"Unknown driver type: {driver_type!r}. "
+            f"Known types: {', '.join(sorted(KNOWN_DRIVER_TYPES))}"
+        )
+    # Remaining fields (excluding 'type') for per-type validation.
+    fields = {k: v for k, v in data.items() if k != "type"}
+    if driver_type == DRIVER_TYPE_PYTHON_CALLABLE:
+        return _validate_python_callable(fields)
+    elif driver_type == DRIVER_TYPE_HTTP:
+        return _validate_http(fields)
+    elif driver_type == DRIVER_TYPE_COMMAND:
+        return _validate_command(fields)
+    else:
+        # Unreachable due to KNOWN_DRIVER_TYPES check above.
+        raise ValueError(f"Unknown driver type: {driver_type!r}")
+def _validate_python_callable(fields: dict) -> PythonCallableDriverConfig:
+    unknown = set(fields.keys()) - _PYTHON_CALLABLE_REQUIRED
+    if unknown:
+        raise ValueError(
+            f"python-callable driver has unknown fields: "
+            f"{', '.join(sorted(unknown))}. "
+            f"Allowed: {', '.join(sorted(_PYTHON_CALLABLE_REQUIRED))}"
+        )
+    for name in sorted(_PYTHON_CALLABLE_REQUIRED):
+        if name not in fields:
+            raise ValueError(f"python-callable driver missing required field: {name!r}")
+        val = fields[name]
+        if not isinstance(val, str):
+            raise ValueError(
+                f"python-callable driver field {name!r} must be a string, got {type(val).__name__}"
+            )
+        if not val.strip():
+            raise ValueError(f"python-callable driver field {name!r} must be non-empty")
+    return PythonCallableDriverConfig(
+        module=fields["module"],
+        function=fields["function"],
+    )
+def _validate_http(fields: dict) -> HttpDriverConfig:
+    allowed = _HTTP_REQUIRED | _HTTP_OPTIONAL
+    unknown = set(fields.keys()) - allowed
+    if unknown:
+        raise ValueError(
+            f"http driver has unknown fields: "
+            f"{', '.join(sorted(unknown))}. "
+            f"Allowed: {', '.join(sorted(allowed))}"
+        )
+    for name in sorted(_HTTP_REQUIRED):
+        if name not in fields:
+            raise ValueError(f"http driver missing required field: {name!r}")
+        val = fields[name]
+        if not isinstance(val, str):
+            raise ValueError(
+                f"http driver field {name!r} must be a string, got {type(val).__name__}"
+            )
+        if not val.strip():
+            raise ValueError(f"http driver field {name!r} must be non-empty")
+    headers = None
+    if "headers" in fields:
+        h = fields["headers"]
+        if not isinstance(h, dict):
+            raise ValueError(
+                f"http driver field 'headers' must be a JSON object, got {type(h).__name__}"
+            )
+        for k, v in h.items():
+            if not isinstance(k, str) or not isinstance(v, str):
+                raise ValueError(
+                    "http driver field 'headers' must be a dict of string keys and string values"
+                )
+        headers = h
+    timeout = None
+    if "timeout_seconds" in fields:
+        t = fields["timeout_seconds"]
+        if not isinstance(t, int) or isinstance(t, bool):
+            raise ValueError(
+                f"http driver field 'timeout_seconds' must be an integer, got {type(t).__name__}"
+            )
+        if t <= 0:
+            raise ValueError("http driver field 'timeout_seconds' must be positive")
+        timeout = t
+    return HttpDriverConfig(
+        url=fields["url"],
+        method=fields["method"],
+        headers=headers,
+        timeout_seconds=timeout,
+    )
+def _validate_command(fields: dict) -> CommandDriverConfig:
+    unknown = set(fields.keys()) - _COMMAND_REQUIRED
+    if unknown:
+        raise ValueError(
+            f"command driver has unknown fields: "
+            f"{', '.join(sorted(unknown))}. "
+            f"Allowed: {', '.join(sorted(_COMMAND_REQUIRED))}"
+        )
+    if "command" not in fields:
+        raise ValueError("command driver missing required field: 'command'")
+    cmd = fields["command"]
+    if not isinstance(cmd, list):
+        raise ValueError(
+            f"command driver field 'command' must be a JSON array, got {type(cmd).__name__}"
+        )
+    if not cmd:
+        raise ValueError("command driver field 'command' must be a non-empty array")
+    for i, item in enumerate(cmd):
+        if not isinstance(item, str):
+            raise ValueError(
+                f"command driver field 'command[{i}]' must be a string, got {type(item).__name__}"
+            )
+        if not item.strip():
+            raise ValueError(f"command driver field 'command[{i}]' must be non-empty")
+    return CommandDriverConfig(command=cmd)
+# ── Driver dispatch ─────────────────────────────────────────────────────────
+def execute_driver(config: DriverConfig, request_data: dict) -> dict:
+    """Execute a first-party driver with the given request data.
+    Returns the adapter response dict on success.
+    Raises ``DriverError`` on any execution failure.
+    The response dict must conform to the standard adapter response
+    contract (``raw_response``, ``parsed_response``, ``usage``).
+    Response validation is the caller's responsibility (the runner
+    applies the same strict validation as for subprocess adapters).
+    """
+    if isinstance(config, PythonCallableDriverConfig):
+        from .python_callable import execute as _execute_callable
+        return _execute_callable(config, request_data)
+    elif isinstance(config, HttpDriverConfig):
+        from .http_driver import execute as _execute_http
+        return _execute_http(config, request_data)
+    elif isinstance(config, CommandDriverConfig):
+        from .command import execute as _execute_command
+        return _execute_command(config, request_data)
+    else:
+        raise DriverError(f"Unknown driver config type: {type(config).__name__}")

lightassay/adapter_pack/command.py ADDED Viewed

@@ -0,0 +1,84 @@
+"""command driver: run an explicit command list as a subprocess.
+Similar to the legacy raw executable adapter path, but the command is
+specified as an explicit list of strings rather than a single executable
+path.  This allows arguments, interpreters, and flags to be specified
+directly in the workflow config.
+The subprocess receives the adapter request JSON on stdin and must
+write the adapter response JSON to stdout.
+**Config-origin semantics:** when ``CommandDriverConfig.config_dir`` is
+set (always the case when the config comes from ``load_workflow_config``),
+the subprocess runs with ``cwd=config_dir``.  This means relative paths
+in the command array (e.g. ``"adapters/my_adapter.py"``) resolve against
+the directory containing the workflow config file, not the caller's cwd.
+**Non-zero exit diagnostics:** when the subprocess exits with a non-zero
+code, a bounded excerpt of its stdout is included in the ``DriverError``
+message so that adapter-side diagnostic output is not silently lost.
+"""
+from __future__ import annotations
+import json
+import subprocess
+from . import CommandDriverConfig, DriverError
+# Maximum number of characters to include from stdout when surfacing
+# a non-zero exit error.  Large enough to be diagnostic, bounded to
+# avoid unbounded error messages.
+_STDOUT_EXCERPT_LIMIT = 2000
+def execute(config: CommandDriverConfig, request_data: dict) -> dict:
+    """Execute the command driver.
+    When ``config.config_dir`` is set, the subprocess runs with that
+    directory as its working directory (config-origin semantics).
+    Raises ``DriverError`` on subprocess failures (non-zero exit,
+    invalid JSON output, non-dict response, not found, not executable).
+    On non-zero exit, the error includes a bounded stdout excerpt.
+    """
+    request_json = json.dumps(request_data, ensure_ascii=False)
+    run_kwargs: dict = {}
+    if config.config_dir is not None:
+        run_kwargs["cwd"] = config.config_dir
+    try:
+        result = subprocess.run(
+            config.command,
+            input=request_json,
+            capture_output=True,
+            text=True,
+            **run_kwargs,
+        )
+    except FileNotFoundError:
+        raise DriverError(f"command driver: command not found: {config.command[0]!r}") from None
+    except PermissionError:
+        raise DriverError(
+            f"command driver: command not executable: {config.command[0]!r}"
+        ) from None
+    if result.returncode != 0:
+        msg = f"command driver: command exited with code {result.returncode}"
+        stdout_excerpt = (result.stdout or "")[:_STDOUT_EXCERPT_LIMIT]
+        if stdout_excerpt:
+            msg += f"; stdout: {stdout_excerpt}"
+        raise DriverError(msg)
+    stdout = result.stdout
+    try:
+        response = json.loads(stdout)
+    except (json.JSONDecodeError, ValueError):
+        raise DriverError("command driver: command stdout is not valid JSON") from None
+    if not isinstance(response, dict):
+        raise DriverError(
+            f"command driver: response must be a JSON object, got {type(response).__name__}"
+        )
+    return response

lightassay/adapter_pack/http_driver.py ADDED Viewed

@@ -0,0 +1,75 @@
+"""http driver: call an HTTP endpoint with JSON request/response.
+The adapter request dict is serialized as JSON and sent to the
+configured URL using the configured HTTP method.  The response body
+is parsed as JSON and returned as the adapter response dict.
+Uses ``urllib.request`` from the standard library (no third-party
+dependency).
+"""
+from __future__ import annotations
+import json
+import urllib.error
+import urllib.request
+from . import DriverError, HttpDriverConfig
+def execute(config: HttpDriverConfig, request_data: dict) -> dict:
+    """Execute the http driver.
+    Raises ``DriverError`` on network errors, non-2xx responses,
+    invalid JSON in the response body, or if the response is not a dict.
+    """
+    request_json = json.dumps(request_data, ensure_ascii=False).encode("utf-8")
+    req = urllib.request.Request(
+        config.url,
+        data=request_json,
+        method=config.method,
+    )
+    req.add_header("Content-Type", "application/json")
+    if config.headers is not None:
+        for key, value in config.headers.items():
+            req.add_header(key, value)
+    timeout_kwargs: dict = {}
+    if config.timeout_seconds is not None:
+        timeout_kwargs["timeout"] = config.timeout_seconds
+    try:
+        with urllib.request.urlopen(req, **timeout_kwargs) as response:
+            try:
+                response_body = response.read().decode("utf-8")
+            except Exception as exc:
+                raise DriverError(
+                    f"http driver: failed to read response body: {type(exc).__name__}: {exc}"
+                ) from exc
+    except urllib.error.HTTPError as exc:
+        exc.close()
+        raise DriverError(f"http driver: HTTP {exc.code} from {config.url!r}") from exc
+    except urllib.error.URLError as exc:
+        raise DriverError(
+            f"http driver: connection failed to {config.url!r}: {exc.reason}"
+        ) from exc
+    except DriverError:
+        raise
+    except Exception as exc:
+        raise DriverError(
+            f"http driver: request failed to {config.url!r}: {type(exc).__name__}: {exc}"
+        ) from exc
+    try:
+        result = json.loads(response_body)
+    except (json.JSONDecodeError, ValueError) as exc:
+        raise DriverError("http driver: response body is not valid JSON") from exc
+    if not isinstance(result, dict):
+        raise DriverError(
+            f"http driver: response must be a JSON object, got {type(result).__name__}"
+        )
+    return result

lightassay/adapter_pack/python_callable.py ADDED Viewed

@@ -0,0 +1,63 @@
+"""python-callable driver: call a Python function directly.
+The configured ``module`` is imported via ``importlib.import_module``
+and the configured ``function`` is looked up as an attribute.
+The function must accept a single ``dict`` (adapter request) and return
+a ``dict`` (adapter response) conforming to the standard response
+contract.
+No subprocess overhead.  The function runs in the same process.
+"""
+from __future__ import annotations
+import importlib
+from . import DriverError, PythonCallableDriverConfig
+def execute(config: PythonCallableDriverConfig, request_data: dict) -> dict:
+    """Execute the python-callable driver.
+    Raises ``DriverError`` on import failure, missing function, or
+    if the function raises an exception or returns a non-dict value.
+    """
+    # Import the module.
+    try:
+        module = importlib.import_module(config.module)
+    except ImportError as exc:
+        raise DriverError(
+            f"python-callable driver: failed to import module {config.module!r}: {exc}"
+        ) from exc
+    # Look up the function.
+    if not hasattr(module, config.function):
+        raise DriverError(
+            f"python-callable driver: module {config.module!r} has no attribute {config.function!r}"
+        )
+    func = getattr(module, config.function)
+    if not callable(func):
+        raise DriverError(
+            f"python-callable driver: {config.module}.{config.function} is not callable"
+        )
+    # Call the function.
+    try:
+        response = func(request_data)
+    except Exception as exc:
+        raise DriverError(
+            f"python-callable driver: function "
+            f"{config.module}.{config.function} raised {type(exc).__name__}: "
+            f"{exc}"
+        ) from exc
+    if not isinstance(response, dict):
+        raise DriverError(
+            f"python-callable driver: function "
+            f"{config.module}.{config.function} must return a dict, "
+            f"got {type(response).__name__}"
+        )
+    return response