PyPI - nullrun - Versions diffs - 0.4.0__py3-none-any.whl - Mend

nullrun 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

nullrun/__init__.py +282 -0
nullrun/__version__.py +4 -0
nullrun/actions.py +455 -0
nullrun/breaker/__init__.py +27 -0
nullrun/breaker/circuit_breaker.py +402 -0
nullrun/breaker/exceptions.py +319 -0
nullrun/context.py +208 -0
nullrun/decorators.py +649 -0
nullrun/instrumentation/__init__.py +23 -0
nullrun/instrumentation/_safe_patch.py +99 -0
nullrun/instrumentation/auto.py +1095 -0
nullrun/instrumentation/auto_requests.py +257 -0
nullrun/instrumentation/autogen.py +163 -0
nullrun/instrumentation/crewai.py +140 -0
nullrun/instrumentation/langgraph.py +412 -0
nullrun/instrumentation/llama_index.py +110 -0
nullrun/observability.py +160 -0
nullrun/py.typed +0 -0
nullrun/runtime.py +1806 -0
nullrun/toolbox/__init__.py +20 -0
nullrun/toolbox/langgraph.py +94 -0
nullrun/tracing.py +155 -0
nullrun/transport.py +1509 -0
nullrun/transport_websocket.py +627 -0
nullrun-0.4.0.dist-info/METADATA +194 -0
nullrun-0.4.0.dist-info/RECORD +28 -0
nullrun-0.4.0.dist-info/WHEEL +4 -0
nullrun-0.4.0.dist-info/licenses/LICENSE +201 -0

nullrun/breaker/exceptions.py ADDED Viewed

@@ -0,0 +1,319 @@
+from enum import Enum
+from typing import Any
+class BreakerError(Exception):
+    """Base exception for Breaker SDK."""
+    pass
+class TransportErrorSource(str, Enum):
+    """Where a transport failure originated.
+    Surfaces the failure classification up to the caller so the
+    `decision_source` audit trail can distinguish "server said
+    block" from "server did not respond" — see ADR-008 for the full
+    rationale.
+    These values also flow through `decision_source` on
+    `execute` / `check` return dicts when the transport layer
+    degrades to a fallback instead of raising.
+    """
+    NETWORK_ERROR = "NETWORK_ERROR"  # httpx.ConnectError, timeout, DNS
+    GATEWAY_ERROR = "GATEWAY_ERROR"  # 5xx from the gateway
+    BREAKER_OPEN = "BREAKER_OPEN"  # circuit breaker tripped
+    AUTH_ERROR = "AUTH_ERROR"  # 401 / 403 from the gateway
+class NullRunTransportError(BreakerError):
+    """Raised by transport layer when the policy engine is unreachable.
+    The exception carries a `source` (TransportErrorSource) and the
+    `endpoint` that failed, so callers can implement endpoint-specific
+    recovery (e.g. fail-CLOSED for sensitive tools, fail-OPEN for
+    budget pre-checks) per ADR-008.
+    Replaces the previous behavior of swallowing the failure and
+    returning a synthetic `allow` / `block` response — that hid
+    the policy-engine outage from operators and was the root cause
+    of bug #1 / #2 fixed in ADR-008.
+    """
+    def __init__(
+        self,
+        message: str,
+        source: TransportErrorSource,
+        endpoint: str,
+        **details: Any,
+    ) -> None:
+        self.source = source
+        self.endpoint = endpoint
+        self.details = details
+        super().__init__(
+            f"Transport error on {endpoint}: {message} "
+            f"(source={source.value}, details={details})"
+        )
+class RateLimitError(NullRunTransportError):
+    """Raised when the gateway returns HTTP 429 with a ``Retry-After``
+    header (or JSON body field).
+    Phase 4: subclass of ``NullRunTransportError`` so
+    ``except NullRunTransportError`` keeps catching it. Surfaces
+    ``retry_after`` (seconds) and ``upgrade_url`` so callers can
+    schedule a retry or surface a billing upgrade prompt.
+    Attributes:
+        retry_after: Seconds the server asks the client to wait
+            before retrying. ``None`` when no ``Retry-After`` header.
+        upgrade_url: Plan-upgrade URL from the 429 body. ``None``
+            when the response did not include one.
+        body: Parsed JSON body (gateway's ``error`` / ``message``).
+    """
+    def __init__(
+        self,
+        message: str,
+        source: TransportErrorSource,
+        endpoint: str,
+        retry_after: float | None = None,
+        upgrade_url: str | None = None,
+        body: dict[str, Any] | None = None,
+        **details: Any,
+    ) -> None:
+        self.retry_after = retry_after
+        self.upgrade_url = upgrade_url
+        self.body = body or {}
+        if retry_after is not None:
+            details.setdefault("retry_after", retry_after)
+        if upgrade_url is not None:
+            details.setdefault("upgrade_url", upgrade_url)
+        super().__init__(message, source, endpoint, **details)
+class BreakerTransportError(BreakerError):
+    """
+    Raised when transport layer fails and events cannot be delivered.
+    This exception indicates a critical failure in the transport layer where
+    events are being dropped after exceeding retry limits. The caller must
+    handle this exception - events are NOT silently lost.
+    Use cases:
+    - After max_retries consecutive flush failures
+    - Transport buffer full and circuit breaker triggered
+    - Network connectivity issues preventing delivery
+    Applications should implement retry logic or alerting mechanism when this exception
+    is raised, as budget protection may be compromised.
+    """
+    def __init__(
+        self,
+        message: str,
+        events_lost: int = 0,
+        buffer_size: int = 0,
+        **details: Any,
+    ) -> None:
+        self.events_lost = events_lost
+        self.buffer_size = buffer_size
+        self.details = details
+        super().__init__(
+            f"Transport error: {message} "
+            f"(events_lost={events_lost}, buffer_size={buffer_size}, details={details})"
+        )
+class InsecureTransportError(BreakerTransportError):
+    """Raised when SDK is configured with insecure HTTP (non-localhost)."""
+    pass
+class NullRunAuthenticationError(BreakerError):
+    """
+    Raised when authentication fails and safe mode is required.
+    This exception indicates that the SDK could not authenticate with
+    the NullRun backend and will not operate in unprotected mode.
+    Applications should handle this exception and provide valid credentials.
+    """
+    def __init__(self, message: str):
+        self.message = message
+        super().__init__(message)
+class NullRunBlockedException(BreakerError):
+    """
+    Raised when NullRun circuit breaker trips.
+    This is the client-side enforcement exception that
+    immediately stops runaway agents without waiting for
+    network roundtrip to the backend.
+    Use cases:
+    - Budget exceeded
+    - Loop detected (>6 same tool calls)
+    - Retry storm (>5 retries)
+    - Rate limit exceeded
+    Attributes:
+        workflow_id: Workflow that was blocked (may be a sentinel like
+            "<unknown>" when the block fires outside a workflow context,
+            e.g. the sensitive-tool pre-check).
+        reason: Human-readable explanation of why the block fired.
+        action: One of "block" / "kill" / "pause" — the suggested
+            downstream action.
+        tool_name: Optional name of the tool that triggered the block.
+            Surfaced as a first-class attribute (not just `details`) so
+            cookbook examples and audit pipelines can read
+            `exc.tool_name` without indexing into `**details`.
+            `None` when the block is workflow-scoped rather than
+            tool-scoped.
+        details: Free-form structured payload forwarded by the caller.
+    """
+    def __init__(
+        self,
+        workflow_id: str,
+        reason: str,
+        action: str = "block",
+        tool_name: str | None = None,
+        **details: Any,
+    ) -> None:
+        self.workflow_id = workflow_id
+        self.reason = reason
+        self.action = action
+        self.tool_name = tool_name
+        self.details = details
+        tool_suffix = f", tool={tool_name}" if tool_name else ""
+        super().__init__(
+            f"Workflow {workflow_id} blocked: {reason} "
+            f"(action={action}{tool_suffix}, details={details})"
+        )
+# NOTE (Sprint 2.2): the following six exception classes were removed
+# in 0.4.0 because they had no callers in the SDK or in any
+# test. They were zombie public surface — defined but never raised.
+# If a real use case emerges in the future, they should be re-added
+# with at least one in-tree caller and a regression test that
+# exercises the raise path:
+#   - CostLimitExceeded
+#   - ApprovalRequired
+#   - BreakerTimeout
+#   - LoopDetectedException
+#   - RetryStormException
+#   - RateLimitExceededException
+class WorkflowPausedException(BreakerError):
+    """
+    Raised when workflow is paused by NullRun.
+    This allows the workflow to be resumed later after
+    human approval or automatic cooldown.
+    """
+    def __init__(self, workflow_id: str, reason: str, resume_after: float | None = None) -> None:
+        self.workflow_id = workflow_id
+        self.reason = reason
+        self.resume_after = resume_after
+        msg = f"Workflow {workflow_id} paused: {reason}"
+        if resume_after:
+            msg += f" (resume after {resume_after}s)"
+        super().__init__(msg)
+class WorkflowKilledException(BaseException):
+    """
+    DEPRECATED. Use :class:`WorkflowKilledInterrupt` instead.
+    Kept for backward compatibility: this class is the *parent* of
+    :class:`WorkflowKilledInterrupt`, so user code that does
+    ``except WorkflowKilledException`` will still catch the new raises
+    (``except X`` matches subclasses of ``X`` — and the new class is
+    a subclass of this one).
+    A ``DeprecationWarning`` is emitted on construction. The class will
+    be removed in a future major release; migrate new code to
+    :class:`WorkflowKilledInterrupt` and update existing
+    ``except WorkflowKilledException`` clauses to
+    ``except WorkflowKilledInterrupt``, or, if recovery is impossible,
+    let the exception propagate to the top of the loop.
+    This class is **not** an ``Exception`` subclass — kill is a
+    non-recoverable signal and should not be caught by generic
+    ``except Exception`` clauses. Only ``except BaseException`` or the
+    explicit ``except WorkflowKilledInterrupt`` reliably stops the work.
+    See ``docs/kill-contract.md`` §6 for the full rationale.
+    """
+    def __init__(self, workflow_id: str, reason: str) -> None:
+        import warnings as _w
+        _w.warn(
+            "WorkflowKilledException is deprecated. Catch "
+            "WorkflowKilledInterrupt (BaseException) instead. The class "
+            "is preserved for backward-compatible `except` clauses but "
+            "will be removed in a future major release.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        self.workflow_id = workflow_id
+        self.reason = reason
+        super().__init__(f"Workflow {workflow_id} killed: {reason}")
+class WorkflowKilledInterrupt(WorkflowKilledException):
+    """
+    Raised when a workflow is killed by the NullRun control plane.
+    Inherits from the deprecated :class:`WorkflowKilledException`
+    (which is itself a ``BaseException`` subclass, not ``Exception``)
+    so that:
+      * ``except WorkflowKilledInterrupt`` (new code) catches new raises
+        and only new raises.
+      * ``except WorkflowKilledException`` (legacy user code) still
+        catches new raises — back-compat.
+      * ``except Exception`` does **not** catch this signal — kill is
+        not a recoverable error. Mirrors the ``KeyboardInterrupt`` /
+        ``SystemExit`` pattern from the standard library: user code
+        that catches ``except Exception`` and re-runs the work will
+        silently bypass the kill.
+      * ``except BaseException`` catches it, like the stdlib interrupts.
+    See ``docs/kill-contract.md`` §6 for the full rationale, including
+    the four-level coverage model and the decision tree for users.
+    Fields:
+        workflow_id:  The workflow that was killed.
+        reason:       Server-supplied reason (e.g. "killed via API",
+                      "budget exhausted", "circuit-breaker tripped").
+    Catching in production
+    ----------------------
+    ``WorkflowKilledInterrupt`` is a ``BaseException`` subclass
+    (NOT ``Exception``), so a user-agent ``try / except Exception``
+    will not catch it. This is intentional — the kill signal
+    must reach the top of the loop. It does mean, however, that
+    Sentry / OpenTelemetry default error handlers (which filter
+    on ``Exception``) will not record the kill event unless the
+    user's code re-raises it under an ``except BaseException``:
+        from sentry_sdk import capture_exception
+        try:
+            agent.run()
+        except BaseException:
+            capture_exception()  # records kill, ctrl-c, system-exit
+            raise
+    ``except Exception`` will swallow non-kill errors but let the
+    kill through. ``except BaseException`` captures everything
+    including the kill — recommended for the top of an agent loop.
+    """
+    def __init__(self, workflow_id: str, reason: str) -> None:
+        # Bypass the parent's __init__ so constructing the canonical
+        # class does NOT trigger the parent's DeprecationWarning. The
+        # deprecation is about using the old *name* — not the
+        # BaseException-based hierarchy.
+        self.workflow_id = workflow_id
+        self.reason = reason
+        BaseException.__init__(self, f"Workflow {workflow_id} killed: {reason}")

nullrun/context.py ADDED Viewed

@@ -0,0 +1,208 @@
+"""
+Context management for NullRun SDK.
+Provides workflow and trace context for automatic event correlation.
+Sprint 2.7 (B27): the previously-defined ``_organization_id_var`` /
+``_api_key_id_var`` contextvars and the ``get_organization_id`` /
+``get_api_key_id`` getters were removed because:
+  1. No code path ever wrote to them — both getters always
+     returned ``None``.
+  2. ``observability.TenantFilter`` (the only consumer) was
+     removed in 0.3.1.
+  3. The structured-logging tenant-isolation feature moved to
+     the backend in the same release.
+If a future use case appears (e.g. per-API-key rate isolation),
+re-introduce the contextvars AND a setter API (token-based like
+``set_attempt_index``) AND wire them in ``NullRunRuntime.__init__``
+from the ``_authenticate`` response.
+"""
+import uuid
+from collections.abc import Generator
+from contextlib import contextmanager
+from contextvars import ContextVar
+# Context variables for workflow/trace propagation.
+_workflow_id_var: ContextVar[str | None] = ContextVar("workflow_id", default=None)
+_trace_id_var: ContextVar[str | None] = ContextVar("trace_id", default=None)
+_span_id_var: ContextVar[str | None] = ContextVar("span_id", default=None)
+_agent_id_var: ContextVar[str | None] = ContextVar("agent_id", default=None)
+_attempt_index_var: ContextVar[int] = ContextVar("attempt_index", default=0)
+# =============================================================================
+# Workflow / trace getters
+# =============================================================================
+def get_workflow_id() -> str | None:
+    """Get current workflow ID from context."""
+    return _workflow_id_var.get()
+def get_trace_id() -> str | None:
+    """Get current trace ID from context."""
+    return _trace_id_var.get()
+def get_span_id() -> str | None:
+    """Get current span ID from context."""
+    return _span_id_var.get()
+def get_agent_id() -> str | None:
+    """Get current agent ID from context."""
+    return _agent_id_var.get()
+def get_attempt_index() -> int:
+    """Get current attempt index from context (for retry correlation)."""
+    return _attempt_index_var.get()
+def set_attempt_index(index: int) -> None:
+    """Set current attempt index for retry correlation."""
+    _attempt_index_var.set(index)
+def generate_trace_id() -> str:
+    """Generate a new trace ID.
+    Returns a real UUID4 (e.g. ``95ca7c0b-8334-478a-af23-2788803ef3b8``).
+    The backend's `cost_events.trace_id` is uuid-typed, so the wire
+    value has to parse as a UUID — earlier we shipped
+    ``f"trace-{hex[:16]}"`` which silently dropped to NULL on insert
+    (the handler's `Uuid::parse_str(...).ok()` returned None).
+    """
+    return str(uuid.uuid4())
+def generate_span_id() -> str:
+    """Generate a new span ID. Real UUID4 — see generate_trace_id."""
+    return str(uuid.uuid4())
+@contextmanager
+def workflow(name: str | None = None) -> Generator[str, None, None]:
+    """
+    Context manager for workflow scope.
+    Sets up a new workflow context with auto-generated or provided workflow_id.
+    All track() calls within this context automatically use this workflow_id.
+    Usage:
+        from nullrun import workflow
+        with workflow("my-agent"):
+            # All events here auto-tagged with workflow_id
+            track({"type": "llm_call", ...})
+            agent.invoke(...)
+    Args:
+        name: Optional workflow name. Auto-generated if not provided.
+    Yields:
+        The workflow_id string
+    """
+    # Phase 5 #5.6: emit a real UUID4 with dashes (matching
+    # ``generate_trace_id``). The previous ``wf-{hex32}`` format
+    # was inconsistent with the rest of the SDK's id generation.
+    workflow_id = name or str(uuid.uuid4())
+    trace_id = generate_trace_id()
+    # Save current values
+    wf_token = _workflow_id_var.set(workflow_id)
+    trace_token = _trace_id_var.set(trace_id)
+    try:
+        yield workflow_id
+    finally:
+        # Restore previous values
+        _workflow_id_var.reset(wf_token)
+        _trace_id_var.reset(trace_token)
+@contextmanager
+def span(name: str | None = None) -> Generator[str, None, None]:
+    """
+    Context manager for a span within a workflow.
+    Usage:
+        with workflow("my-agent"):
+            with span("llm-call"):
+                result = llm.invoke(prompt)
+                track({"type": "llm_call", ...})
+    """
+    span_id = name or generate_span_id()
+    token = _span_id_var.set(span_id)
+    try:
+        yield span_id
+    finally:
+        _span_id_var.reset(token)
+@contextmanager
+def agent(name: str | None = None) -> Generator[str, None, None]:
+    """
+    Context manager for agent scope within a workflow.
+    Sets up an agent context with auto-generated or provided agent_id.
+    All track() calls within this context automatically use this agent_id
+    for per-agent cost attribution.
+    Usage:
+        from nullrun import workflow, agent, track
+        with workflow("my-workflow"):
+            with agent("my-agent"):
+                # All events here auto-tagged with agent_id
+                track({"type": "llm_call", ...})
+                agent.invoke(...)
+    Args:
+        name: Optional agent name/ID. Auto-generated if not provided.
+    Yields:
+        The agent_id string
+    """
+    agent_id = name or f"agent-{uuid.uuid4().hex}"
+    token = _agent_id_var.set(agent_id)
+    try:
+        yield agent_id
+    finally:
+        _agent_id_var.reset(token)
+@contextmanager
+def attempt(attempt_index: int) -> Generator[int, None, None]:
+    """
+    Context manager for attempt scope within a workflow (retry correlation).
+    Sets up an attempt context for correlating retries in execution attempts.
+    All track() calls within this context automatically include the attempt_index
+    for linking retries to the same ExecutionAttempt in the backend.
+    Usage:
+        from nullrun import workflow, attempt, track
+        with workflow("my-workflow"):
+            for attempt_index in range(retries):
+                with attempt(attempt_index):
+                    track({"type": "llm_call", ...})
+                    llm.invoke(prompt)
+    Args:
+        attempt_index: The attempt index (0 = first attempt, 1 = first retry, etc.)
+    Yields:
+        The attempt_index
+    """
+    token = _attempt_index_var.set(attempt_index)
+    try:
+        yield attempt_index
+    finally:
+        _attempt_index_var.reset(token)