npm - @datalayer/agent-runtimes - Versions diffs - 1.0.5 → 1.0.6 - Mend

@datalayer/agent-runtimes 1.0.5 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (111) hide show

package/README.md +157 -10
package/lib/AgentNode.d.ts +3 -0
package/lib/AgentNode.js +676 -0
package/lib/agent-node/themeStore.d.ts +3 -0
package/lib/agent-node/themeStore.js +156 -0
package/lib/agent-node-main.d.ts +1 -0
package/lib/agent-node-main.js +14 -0
package/lib/chat/Chat.js +16 -10
package/lib/chat/ChatFloating.js +1 -1
package/lib/chat/ChatSidebar.js +81 -49
package/lib/chat/base/ChatBase.js +388 -74
package/lib/chat/display/FloatingBrandButton.js +8 -1
package/lib/chat/header/ChatHeader.d.ts +3 -1
package/lib/chat/header/ChatHeader.js +15 -12
package/lib/chat/header/ChatHeaderBase.d.ts +29 -9
package/lib/chat/header/ChatHeaderBase.js +26 -3
package/lib/chat/indicators/SandboxStatusIndicator.js +82 -47
package/lib/chat/messages/ChatMessageList.js +46 -1
package/lib/chat/messages/ChatMessages.js +6 -2
package/lib/chat/prompt/InputFooter.d.ts +3 -1
package/lib/chat/prompt/InputFooter.js +8 -5
package/lib/chat/prompt/InputPrompt.d.ts +3 -1
package/lib/chat/prompt/InputPrompt.js +2 -2
package/lib/chat/prompt/InputPromptFooter.d.ts +3 -1
package/lib/chat/prompt/InputPromptFooter.js +3 -3
package/lib/client/AgentsMixin.js +14 -0
package/lib/config/AgentConfiguration.d.ts +22 -0
package/lib/config/AgentConfiguration.js +319 -64
package/lib/examples/AgUiSharedStateExample.js +2 -1
package/lib/examples/AgentCheckpointsExample.js +3 -3
package/lib/examples/AgentCodemodeExample.d.ts +3 -3
package/lib/examples/AgentCodemodeExample.js +24 -12
package/lib/examples/AgentEvalsExample.js +330 -40
package/lib/examples/AgentGuardrailsExample.js +16 -5
package/lib/examples/AgentHooksExample.js +27 -9
package/lib/examples/AgentInferenceProviderExample.d.ts +3 -0
package/lib/examples/AgentInferenceProviderExample.js +329 -0
package/lib/examples/AgentMCPExample.js +6 -5
package/lib/examples/AgentMemoryExample.d.ts +1 -2
package/lib/examples/AgentMemoryExample.js +71 -22
package/lib/examples/AgentMonitoringExample.js +5 -5
package/lib/examples/AgentNotificationsExample.d.ts +1 -2
package/lib/examples/AgentNotificationsExample.js +71 -22
package/lib/examples/AgentOtelExample.js +31 -40
package/lib/examples/AgentOutputsExample.d.ts +1 -1
package/lib/examples/AgentOutputsExample.js +67 -16
package/lib/examples/AgentParametersExample.js +10 -8
package/lib/examples/AgentSandboxExample.d.ts +1 -1
package/lib/examples/AgentSandboxExample.js +7 -6
package/lib/examples/AgentSkillsExample.js +6 -6
package/lib/examples/AgentSubagentsExample.d.ts +1 -1
package/lib/examples/AgentSubagentsExample.js +6 -6
package/lib/examples/AgentToolApprovalsExample.js +27 -11
package/lib/examples/AgentTriggersExample.js +5 -5
package/lib/examples/{AgentSpecsExample.d.ts → AgentspecsExample.d.ts} +2 -2
package/lib/examples/AgentspecsExample.js +1096 -0
package/lib/examples/ChatCustomExample.js +6 -5
package/lib/examples/ChatExample.js +6 -5
package/lib/examples/Lexical2Example.js +1 -1
package/lib/examples/LexicalAgentExample.js +1 -1
package/lib/examples/NotebookAgentExample.js +3 -3
package/lib/examples/components/ExampleWrapper.d.ts +6 -7
package/lib/examples/components/ExampleWrapper.js +27 -10
package/lib/examples/example-selector.js +2 -1
package/lib/examples/index.d.ts +2 -1
package/lib/examples/index.js +2 -1
package/lib/examples/lexical/initial-content.json +6 -6
package/lib/examples/main.js +56 -16
package/lib/examples/utils/agentId.d.ts +1 -1
package/lib/examples/utils/agentId.js +1 -1
package/lib/examples/utils/useExampleAgentRuntimesUrl.d.ts +5 -0
package/lib/examples/utils/useExampleAgentRuntimesUrl.js +19 -0
package/lib/hooks/useAIAgentsWebSocket.js +35 -0
package/lib/hooks/useAgentRuntimes.d.ts +32 -3
package/lib/hooks/useAgentRuntimes.js +114 -19
package/lib/index.d.ts +1 -1
package/lib/specs/agents/agents.d.ts +20 -13
package/lib/specs/agents/agents.js +1267 -581
package/lib/specs/benchmarks.d.ts +20 -0
package/lib/specs/benchmarks.js +205 -0
package/lib/specs/envvars.d.ts +0 -1
package/lib/specs/envvars.js +0 -11
package/lib/specs/evals.d.ts +10 -9
package/lib/specs/evals.js +128 -88
package/lib/specs/index.d.ts +0 -1
package/lib/specs/index.js +0 -1
package/lib/specs/models.d.ts +0 -2
package/lib/specs/models.js +0 -15
package/lib/specs/skills.d.ts +0 -1
package/lib/specs/skills.js +0 -18
package/lib/stores/agentRuntimeStore.d.ts +5 -1
package/lib/stores/agentRuntimeStore.js +22 -8
package/lib/stores/conversationStore.js +2 -2
package/lib/types/agents-lifecycle.d.ts +18 -0
package/lib/types/agents.d.ts +6 -0
package/lib/types/agentspecs.d.ts +4 -0
package/lib/types/benchmarks.d.ts +43 -0
package/lib/types/benchmarks.js +5 -0
package/lib/types/chat.d.ts +16 -0
package/lib/types/evals.d.ts +26 -17
package/lib/types/index.d.ts +1 -0
package/lib/types/index.js +1 -0
package/package.json +9 -5
package/scripts/codegen/__pycache__/generate_agents.cpython-313.pyc +0 -0
package/scripts/codegen/__pycache__/generate_benchmarks.cpython-313.pyc +0 -0
package/scripts/codegen/__pycache__/generate_evals.cpython-313.pyc +0 -0
package/scripts/codegen/generate_agents.py +89 -43
package/scripts/codegen/generate_benchmarks.py +441 -0
package/scripts/codegen/generate_evals.py +94 -16
package/scripts/codegen/generate_events.py +0 -1
package/lib/examples/AgentSpecsExample.js +0 -694

package/lib/examples/AgentCodemodeExample.d.ts CHANGED Viewed

@@ -1,9 +1,9 @@
 /**
  * AgentCodemodeExample
  *
- * Compares two Tavily-based agents side-by-side:
- * - Tavily MCP without codemode conversion
- * - Tavily MCP with codemode conversion
+ * Compares two tooling modes side-by-side:
+ * - MCP tools without codemode conversion
+ * - MCP tools with codemode conversion
  *
  * A sidebar gauge tracks consumed tokens for each agent in real time.
  */

package/lib/examples/AgentCodemodeExample.js CHANGED Viewed

@@ -6,9 +6,9 @@ import { jsx as _jsx, jsxs as _jsxs } from "react/jsx-runtime";
 /**
  * AgentCodemodeExample
  *
- * Compares two Tavily-based agents side-by-side:
- * - Tavily MCP without codemode conversion
- * - Tavily MCP with codemode conversion
+ * Compares two tooling modes side-by-side:
+ * - MCP tools without codemode conversion
+ * - MCP tools with codemode conversion
  *
  * A sidebar gauge tracks consumed tokens for each agent in real time.
  */
@@ -37,24 +37,24 @@ const NO_CODEMODE_BASE_URL = import.meta.env.VITE_BASE_URL_NO_CODEMODE ||
     import.meta.env.VITE_BASE_URL ||
     'http://localhost:8765';
 const CODEMODE_BASE_URL = import.meta.env.VITE_BASE_URL_CODEMODE || 'http://localhost:8766';
-const NO_CODEMODE_SUGGESTION_MESSAGE = 'Use the Tavily Extract tool to extract information from https://datalayer.ai, then use your sandbox to persist that information in a variable named "about_datalayer".';
+const NO_CODEMODE_SUGGESTION_MESSAGE = 'Use the MCP extract tool to extract information from https://datalayer.ai, then use your sandbox to persist that information in a variable named "about_datalayer".';
 const CODEMODE_SUGGESTION_MESSAGE = 'Extract information from the https://datalayer.ai website and assign it to the variable "about_datalayer", all in one step using the sandbox';
 const DEMO_AGENT_CONFIGS = [
     {
         key: 'no-codemode',
-        title: 'Tavily MCP (No Codemode)',
+        title: 'MCP Tools (No Codemode)',
         subtitle: 'Raw MCP tools without codemode conversion',
         suggestionMessage: NO_CODEMODE_SUGGESTION_MESSAGE,
-        specId: 'demo-tavily-no-codemode',
+        specId: 'example-no-codemode',
         color: '#0969DA',
         baseUrl: NO_CODEMODE_BASE_URL,
     },
     {
         key: 'codemode',
-        title: 'Tavily MCP (Codemode)',
+        title: 'Codemode Tools',
         subtitle: 'MCP tools converted into programmatic tools',
         suggestionMessage: CODEMODE_SUGGESTION_MESSAGE,
-        specId: 'demo-tavily-codemode',
+        specId: 'example-codemode',
         color: '#8250DF',
         baseUrl: CODEMODE_BASE_URL,
     },
@@ -87,13 +87,19 @@ const AgentRuntimePane = ({ config, token, onTokenConsumed, onAgentIdChange, onC
     }), [token]);
     useEffect(() => {
         let cancelled = false;
+        const launchTimeoutMs = 20_000;
         const createLocalAgent = async () => {
             setRuntimeStatus('launching');
             setHookError(null);
             setIsReconnectedAgent(false);
             try {
+                const controller = new AbortController();
+                const timeoutId = window.setTimeout(() => {
+                    controller.abort();
+                }, launchTimeoutMs);
                 const response = await authFetch(`${config.baseUrl}/api/v1/agents`, {
                     method: 'POST',
+                    signal: controller.signal,
                     body: JSON.stringify({
                         name: runtimeName,
                         description: config.subtitle,
@@ -104,6 +110,7 @@ const AgentRuntimePane = ({ config, token, onTokenConsumed, onAgentIdChange, onC
                         tools: [],
                     }),
                 });
+                window.clearTimeout(timeoutId);
                 let resolvedAgentId = runtimeName;
                 let alreadyRunning = false;
                 if (response.ok) {
@@ -139,7 +146,12 @@ const AgentRuntimePane = ({ config, token, onTokenConsumed, onAgentIdChange, onC
             }
             catch (error) {
                 if (!cancelled) {
-                    setHookError(error instanceof Error ? error.message : 'Agent failed to start');
+                    const isAbortError = error instanceof DOMException && error.name === 'AbortError';
+                    setHookError(isAbortError
+                        ? `Timed out after ${Math.round(launchTimeoutMs / 1000)}s while creating '${config.specId}' at ${config.baseUrl}. Ensure the no-codemode endpoint is reachable.`
+                        : error instanceof Error
+                            ? `${error.message} (endpoint: ${config.baseUrl}, spec: ${config.specId})`
+                            : `Agent failed to start (endpoint: ${config.baseUrl}, spec: ${config.specId})`);
                     setRuntimeStatus('error');
                 }
             }
@@ -349,7 +361,7 @@ const AgentRuntimePane = ({ config, token, onTokenConsumed, onAgentIdChange, onC
                 justifyContent: 'center',
                 flexDirection: 'column',
                 gap: 2,
-            }, children: [_jsx(Spinner, { size: "small" }), _jsxs(Text, { sx: { fontSize: 0, color: 'fg.muted' }, children: ["Launching ", config.title, "..."] })] }));
+            }, children: [_jsx(Spinner, { size: "small" }), _jsxs(Text, { sx: { fontSize: 0, color: 'fg.muted' }, children: ["Launching ", config.title, "..."] }), _jsx(Text, { sx: { fontSize: 0, color: 'fg.subtle' }, children: config.baseUrl })] }));
     }
     if (runtimeStatus === 'error' || hookError) {
         return (_jsxs(Flash, { variant: "danger", sx: { borderRadius: 2 }, children: [config.title, ": ", hookError || 'Failed to start'] }));
@@ -362,7 +374,7 @@ const AgentRuntimePane = ({ config, token, onTokenConsumed, onAgentIdChange, onC
             minHeight: 560,
             display: 'flex',
             flexDirection: 'column',
-        }, children: _jsx(Box, { sx: { flex: 1, minHeight: 0 }, children: _jsx(Chat, { protocol: "vercel-ai", baseUrl: config.baseUrl, agentId: agentId, authToken: token, title: config.title, subtitle: config.subtitle, placeholder: "Ask both agents the same request to compare behavior...", description: config.subtitle, showHeader: true, headerActions: isReconnectedAgent ? (_jsx(Label, { size: "small", variant: "attention", children: "Reconnected" })) : undefined, autoFocus: false, height: "100%", runtimeId: agentId, historyEndpoint: `${config.baseUrl}/api/v1/history`, mcpStatusData: mcpStatusData, codemodeStatusData: codemodeStatusData, codemodeEnabled: codemodeEnabled, onToggleCodemode: handleToggleCodemode, suggestions: [
+        }, children: _jsx(Box, { sx: { flex: 1, minHeight: 0 }, children: _jsx(Chat, { protocol: "vercel-ai", baseUrl: config.baseUrl, agentId: agentId, authToken: token, title: config.title, brandIcon: _jsx(CodeIcon, { size: 16 }), subtitle: config.subtitle, placeholder: "Ask both agents the same request to compare behavior...", description: config.subtitle, showHeader: true, headerActions: isReconnectedAgent ? (_jsx(Label, { size: "small", variant: "attention", children: "Reconnected" })) : undefined, autoFocus: false, height: "100%", runtimeId: agentId, historyEndpoint: `${config.baseUrl}/api/v1/history`, mcpStatusData: mcpStatusData, codemodeStatusData: codemodeStatusData, codemodeEnabled: codemodeEnabled, onToggleCodemode: handleToggleCodemode, suggestions: [
                     {
                         title: 'Datalayer extraction',
                         message: config.suggestionMessage,
@@ -529,7 +541,7 @@ const AgentCodemodeInner = ({ onLogout, }) => {
                     borderBottom: '1px solid',
                     borderColor: 'border.default',
                     flexShrink: 0,
-                }, children: [_jsx(CodeIcon, { size: 16 }), _jsx(Heading, { as: "h3", sx: { fontSize: 2, flex: 1 }, children: "Codemode \u2014 Tavily MCP vs Tavily Codemode" })] }), _jsxs(Box, { sx: { flex: 1, minHeight: 0, display: 'flex' }, children: [DEMO_AGENT_CONFIGS.filter(c => c.key === 'no-codemode').map(config => (() => {
+                }, children: [_jsx(CodeIcon, { size: 16 }), _jsx(Heading, { as: "h3", sx: { fontSize: 2, flex: 1 }, children: "Codemode \u2014 MCP Tools vs Codemode Tools" })] }), _jsxs(Box, { sx: { flex: 1, minHeight: 0, display: 'flex' }, children: [DEMO_AGENT_CONFIGS.filter(c => c.key === 'no-codemode').map(config => (() => {
                         const outcome = outcomeFor(config.key);
                         return (_jsxs(Box, { sx: {
                                 width: 320,

package/lib/examples/AgentEvalsExample.js CHANGED Viewed

@@ -15,41 +15,119 @@ import { jsx as _jsx, jsxs as _jsxs } from "react/jsx-runtime";
  *   pass/fail status, and the ability to run eval suites
  */
 /// <reference types="vite/client" />
-import { useEffect, useState, useCallback, useRef } from 'react';
+import { useEffect, useState, useCallback, useRef, useMemo, } from 'react';
 import { QueryClient, QueryClientProvider } from '@tanstack/react-query';
-import { Text, Button, Spinner, Heading, Label, Flash, ProgressBar, } from '@primer/react';
+import { Text, Button, Spinner, Heading, Label, Flash, ProgressBar, Select, FormControl, } from '@primer/react';
 import { BeakerIcon, CheckCircleIcon, XCircleIcon, PlayIcon, } from '@primer/octicons-react';
 import { Box } from '@datalayer/primer-addons';
 import { AuthRequiredView, ErrorView } from './components';
 import { ThemedProvider } from './utils/themedProvider';
 import { uniqueAgentId } from './utils/agentId';
+import { useExampleAgentRuntimesUrl } from './utils/useExampleAgentRuntimesUrl';
 import { useSimpleAuthStore } from '@datalayer/core/lib/views/otel';
+import { useCoreStore } from '@datalayer/core';
 import { Chat } from '../chat';
 import { useAgentRuntimes } from '../hooks/useAgentRuntimes';
 const queryClient = new QueryClient();
 // ─── Constants ─────────────────────────────────────────────────────────────
-const AGENT_NAME = 'eval-demo-agent';
-const AGENT_SPEC_ID = 'monitor-sales-kpis';
+const AGENT_NAME = 'eval-example-agent';
+const AGENT_SPEC_ID = 'example-evals';
+const DEFAULT_EXECUTION_TARGET = (import.meta.env.VITE_AGENT_EVALS_TARGET || 'cloud').toLowerCase() === 'local'
+    ? 'local'
+    : 'cloud';
+const normalizeHttpUrl = (value) => {
+    if (typeof value !== 'string') {
+        return null;
+    }
+    const trimmed = value.trim();
+    if (!trimmed) {
+        return null;
+    }
+    try {
+        const url = new URL(trimmed);
+        if (url.protocol !== 'http:' && url.protocol !== 'https:') {
+            return null;
+        }
+        url.pathname = '';
+        url.search = '';
+        url.hash = '';
+        return url.toString().replace(/\/$/, '');
+    }
+    catch {
+        return null;
+    }
+};
+const isLocalhostUrl = (value) => {
+    if (!value) {
+        return false;
+    }
+    try {
+        const url = new URL(value);
+        return (url.hostname === 'localhost' ||
+            url.hostname === '127.0.0.1' ||
+            url.hostname === '0.0.0.0');
+    }
+    catch {
+        return false;
+    }
+};
 // ─── Inner component (rendered after auth) ─────────────────────────────────
-const AgentEvalsInner = ({ onLogout }) => {
+const AgentEvalsInner = ({ onLogout, executionTarget, onExecutionTargetChange }) => {
     const { token } = useSimpleAuthStore();
+    const { configuration } = useCoreStore();
     const agentName = useRef(uniqueAgentId(AGENT_NAME)).current;
-    const { runtime, status: runtimeStatus, isReady, error: hookError, } = useAgentRuntimes({
+    const localRuntimeBaseUrl = useExampleAgentRuntimesUrl();
+    const cloudRuntimeBaseUrl = useMemo(() => {
+        const envRuntimesUrl = normalizeHttpUrl(import.meta.env.VITE_DATALAYER_RUNTIMES_URL);
+        const envAgentRuntimesUrl = normalizeHttpUrl(import.meta.env.VITE_DATALAYER_AGENT_RUNTIMES_URL);
+        const configuredRuntimesUrl = normalizeHttpUrl(configuration?.runtimesRunUrl);
+        if (envRuntimesUrl && !isLocalhostUrl(envRuntimesUrl)) {
+            return envRuntimesUrl;
+        }
+        if (configuredRuntimesUrl && !isLocalhostUrl(configuredRuntimesUrl)) {
+            return configuredRuntimesUrl;
+        }
+        if (envAgentRuntimesUrl && !isLocalhostUrl(envAgentRuntimesUrl)) {
+            return envAgentRuntimesUrl;
+        }
+        return 'https://r1.datalayer.run';
+    }, [configuration?.runtimesRunUrl]);
+    const { runtime, status: runtimeStatus, isReady, error: hookError, runtimeCreationBaseUrl, } = useAgentRuntimes({
         agentSpecId: AGENT_SPEC_ID,
-        autoStart: true,
+        autoStart: executionTarget === 'cloud',
+        runtimeCreationTarget: executionTarget === 'local' ? 'local-agent-runtimes' : 'backend-services',
+        runtimeCreationBaseUrl: executionTarget === 'local' ? localRuntimeBaseUrl : cloudRuntimeBaseUrl,
         agentConfig: {
             name: agentName,
-            model: 'bedrock:us.anthropic.claude-3-5-haiku-20241022-v1:0',
+            model: 'bedrock:us.anthropic.claude-sonnet-4-5-20250929-v1:0',
             protocol: 'vercel-ai',
             description: 'Agent with evaluation and quality scoring',
         },
     });
+    const [localAgentId, setLocalAgentId] = useState(null);
+    const [localStatus, setLocalStatus] = useState('launching');
+    const [localError, setLocalError] = useState(null);
     const [evalRuns, setEvalRuns] = useState([]);
     const [isRunning, setIsRunning] = useState(false);
     const [flash, setFlash] = useState(null);
-    const agentBaseUrl = runtime?.agentBaseUrl || '';
-    const agentId = runtime?.agentId || AGENT_NAME;
-    const podName = runtime?.podName || '(launching…)';
+    const [evalId, setEvalId] = useState(null);
+    const [experimentId, setExperimentId] = useState(null);
+    const [isBootstrapping, setIsBootstrapping] = useState(true);
+    const cloudAgentBaseUrl = runtime?.agentBaseUrl || '';
+    const localAgentBaseUrl = runtimeCreationBaseUrl;
+    const agentBaseUrl = executionTarget === 'local' ? localAgentBaseUrl : cloudAgentBaseUrl;
+    const agentId = executionTarget === 'local'
+        ? localAgentId || agentName
+        : runtime?.agentId || AGENT_NAME;
+    const podName = executionTarget === 'local'
+        ? `local:${agentId}`
+        : runtime?.podName || '(launching…)';
+    const controlPlaneBaseUrl = import.meta.env.VITE_RUN_URL ||
+        configuration?.runUrl ||
+        (cloudAgentBaseUrl ? new URL(cloudAgentBaseUrl).origin : '');
+    const isAgentReady = executionTarget === 'local' ? localStatus === 'ready' : isReady;
+    const agentStatus = executionTarget === 'local' ? localStatus : runtimeStatus;
+    const effectiveError = executionTarget === 'local' ? localError : hookError;
     // Authenticated fetch helper
     const authFetch = useCallback((url, opts = {}) => fetch(url, {
         ...opts,
@@ -59,67 +137,271 @@ const AgentEvalsInner = ({ onLogout }) => {
             ...(opts.headers ?? {}),
         },
     }), [token]);
+    const evalApiFetch = useCallback(async (path, opts = {}) => {
+        const response = await authFetch(`${controlPlaneBaseUrl}/api/ai-agents/v1${path}`, opts);
+        const payload = await response.json().catch(() => ({}));
+        if (!response.ok || payload?.success === false) {
+            throw new Error(payload?.detail ||
+                payload?.message ||
+                `Eval API request failed (${response.status})`);
+        }
+        return payload;
+    }, [authFetch, controlPlaneBaseUrl]);
+    const mapRuns = useCallback((rows) => {
+        return rows.map((run) => {
+            const passRateRaw = run?.metrics?.pass_rate ??
+                run?.summary?.pass_rate ??
+                run?.summary?.score ??
+                0;
+            const score = Math.max(0, Math.min(1, Number(passRateRaw) || 0));
+            const passed = Number(run?.summary?.passed ?? Math.round(score * 100));
+            const failed = Number(run?.summary?.failed ?? Math.max(0, 100 - passed));
+            return {
+                id: String(run?.id || Math.random()),
+                timestamp: String(run?.created_at || run?.updated_at || new Date().toISOString()),
+                suiteName: String(run?.summary?.suite_name || run?.summary?.name || 'default-suite'),
+                passed,
+                failed,
+                score,
+            };
+        });
+    }, []);
+    useEffect(() => {
+        if (executionTarget !== 'local' || !agentBaseUrl) {
+            return;
+        }
+        let isCancelled = false;
+        const createLocalAgent = async () => {
+            setLocalStatus('launching');
+            setLocalError(null);
+            try {
+                const response = await authFetch(`${agentBaseUrl}/api/v1/agents`, {
+                    method: 'POST',
+                    body: JSON.stringify({
+                        name: agentName,
+                        description: 'Agent with evaluation and quality scoring',
+                        agent_library: 'pydantic-ai',
+                        transport: 'vercel-ai',
+                        agent_spec_id: AGENT_SPEC_ID,
+                        enable_skills: true,
+                        tools: [],
+                    }),
+                });
+                let resolvedAgentId = agentName;
+                if (response.ok) {
+                    const payload = await response.json().catch(() => ({}));
+                    resolvedAgentId = payload?.id || agentName;
+                }
+                else {
+                    const contentType = response.headers.get('content-type') || '';
+                    let detail = '';
+                    if (contentType.includes('application/json')) {
+                        const payload = await response.json().catch(() => null);
+                        detail =
+                            (typeof payload?.detail === 'string' && payload.detail) ||
+                                (typeof payload?.message === 'string' && payload.message) ||
+                                '';
+                    }
+                    else {
+                        detail = await response.text();
+                    }
+                    if (response.status !== 409 &&
+                        !/already exists/i.test(detail || '')) {
+                        throw new Error(detail || `Failed to create local agent: ${response.status}`);
+                    }
+                }
+                if (!isCancelled) {
+                    setLocalAgentId(resolvedAgentId);
+                    setLocalStatus('ready');
+                }
+            }
+            catch (error) {
+                if (!isCancelled) {
+                    setLocalError(error instanceof Error ? error.message : 'Agent failed to start');
+                    setLocalStatus('error');
+                }
+            }
+        };
+        void createLocalAgent();
+        return () => {
+            isCancelled = true;
+        };
+    }, [executionTarget, agentBaseUrl, agentName, authFetch]);
+    useEffect(() => {
+        if (!isAgentReady || !controlPlaneBaseUrl)
+            return;
+        const bootstrap = async () => {
+            setIsBootstrapping(true);
+            try {
+                const evalName = `agent-evals-${agentId}`;
+                const evalsRes = await evalApiFetch(`/evals/evals?source=hosted&q=${encodeURIComponent(evalName)}&limit=50`);
+                const evals = Array.isArray(evalsRes?.evals)
+                    ? evalsRes.evals
+                    : [];
+                let evalRecord = evals.find((d) => d?.name === evalName);
+                if (!evalRecord) {
+                    const createdEvalRes = await evalApiFetch('/evals/evals', {
+                        method: 'POST',
+                        body: JSON.stringify({
+                            name: evalName,
+                            description: `Hosted eval for ${agentId}`,
+                            source: 'hosted',
+                            kind: 'agent-quality',
+                            schema: {},
+                            tags: ['agent-runtimes', 'example'],
+                            metadata: { agent_id: agentId },
+                            cases: [],
+                        }),
+                    });
+                    evalRecord = createdEvalRes?.eval;
+                }
+                if (!evalRecord?.id) {
+                    throw new Error('Failed to initialize eval.');
+                }
+                setEvalId(evalRecord.id);
+                const experimentsRes = await evalApiFetch(`/evals/experiments?eval_id=${encodeURIComponent(evalRecord.id)}&limit=50`);
+                const experiments = Array.isArray(experimentsRes?.experiments)
+                    ? experimentsRes.experiments
+                    : [];
+                let experiment = experiments.find((e) => e?.name === 'default-suite');
+                if (!experiment) {
+                    const createdExperimentRes = await evalApiFetch('/evals/experiments', {
+                        method: 'POST',
+                        body: JSON.stringify({
+                            eval_id: evalRecord.id,
+                            name: 'default-suite',
+                            description: 'Default evaluation suite for AgentEvalsExample.',
+                            status: 'ready',
+                            config: {
+                                mode: 'offline',
+                                target_agent_id: agentId,
+                                target_pod_name: podName,
+                            },
+                            summary: {},
+                            tags: ['example'],
+                        }),
+                    });
+                    experiment = createdExperimentRes?.experiment;
+                }
+                if (!experiment?.id) {
+                    throw new Error('Failed to initialize eval experiment.');
+                }
+                setExperimentId(experiment.id);
+            }
+            catch (error) {
+                const message = error instanceof Error ? error.message : 'Eval bootstrap failed.';
+                setFlash(message);
+            }
+            finally {
+                setIsBootstrapping(false);
+            }
+        };
+        void bootstrap();
+    }, [isAgentReady, controlPlaneBaseUrl, agentId, podName, evalApiFetch]);
     // ── Poll eval results ─────────────────────────────────────────────────
     useEffect(() => {
-        if (!isReady || !agentBaseUrl)
+        if (!isAgentReady || !controlPlaneBaseUrl || !experimentId)
             return;
         const poll = async () => {
             try {
-                const res = await authFetch(`${agentBaseUrl}/api/v1/agents/${agentId}/eval/runs`);
-                if (res.ok) {
-                    const d = await res.json();
-                    setEvalRuns(Array.isArray(d) ? d : (d.runs ?? []));
-                }
+                const res = await evalApiFetch(`/evals/experiments/${encodeURIComponent(experimentId)}/runs?limit=50`);
+                const rows = Array.isArray(res?.runs) ? res.runs : [];
+                setEvalRuns(mapRuns(rows));
             }
             catch {
                 /* ok */
             }
         };
-        poll();
+        void poll();
         const interval = setInterval(poll, 15_000);
         return () => clearInterval(interval);
-    }, [isReady, agentBaseUrl, agentId, authFetch]);
+    }, [isAgentReady, controlPlaneBaseUrl, experimentId, evalApiFetch, mapRuns]);
     // ── Run eval suite ────────────────────────────────────────────────────
     const handleRunEval = useCallback(async () => {
-        if (!agentBaseUrl)
+        if (!controlPlaneBaseUrl || !experimentId)
             return;
         setIsRunning(true);
         setFlash(null);
         try {
-            const res = await authFetch(`${agentBaseUrl}/api/v1/agents/${agentId}/eval/run`, { method: 'POST' });
-            if (res.ok) {
-                setFlash('Evaluation suite started');
-            }
-            else {
-                setFlash(`Failed to start eval (${res.status})`);
-            }
+            const syntheticScore = Number((0.75 + Math.random() * 0.2).toFixed(3));
+            const passed = Math.round(syntheticScore * 100);
+            const failed = Math.max(0, 100 - passed);
+            await evalApiFetch(`/evals/experiments/${encodeURIComponent(experimentId)}/runs`, {
+                method: 'POST',
+                body: JSON.stringify({
+                    status: 'completed',
+                    metrics: {
+                        pass_rate: syntheticScore,
+                        avg_score: syntheticScore,
+                    },
+                    summary: {
+                        suite_name: 'default-suite',
+                        passed,
+                        failed,
+                        runtime_id: podName,
+                    },
+                    report: {
+                        source: 'AgentEvalsExample',
+                        eval_id: evalId,
+                        experiment_id: experimentId,
+                        agent_id: agentId,
+                    },
+                }),
+            });
+            setFlash('Evaluation run persisted');
+            const updatedRuns = await evalApiFetch(`/evals/experiments/${encodeURIComponent(experimentId)}/runs?limit=50`);
+            const rows = Array.isArray(updatedRuns?.runs)
+                ? updatedRuns.runs
+                : [];
+            setEvalRuns(mapRuns(rows));
         }
         catch {
-            setFlash('Network error');
+            setFlash('Failed to persist evaluation run');
         }
         finally {
             setIsRunning(false);
         }
-    }, [agentBaseUrl, agentId, authFetch]);
+    }, [
+        controlPlaneBaseUrl,
+        experimentId,
+        evalApiFetch,
+        mapRuns,
+        podName,
+        evalId,
+        agentId,
+    ]);
     // ── Loading / Error ───────────────────────────────────────────────────
-    if (!isReady && runtimeStatus !== 'error') {
+    if (!isAgentReady && agentStatus !== 'error') {
         return (_jsxs(Box, { sx: {
                 display: 'flex',
                 flexDirection: 'column',
                 alignItems: 'center',
                 justifyContent: 'center',
-                height: '100vh',
+                height: '100%',
                 gap: 3,
-            }, children: [_jsx(Spinner, { size: "large" }), _jsx(Text, { sx: { color: 'fg.muted' }, children: runtimeStatus === 'launching'
-                        ? 'Launching runtime for eval agent…'
-                        : 'Creating eval demo agent…' })] }));
+            }, children: [_jsx(Spinner, { size: "large" }), _jsx(Text, { sx: { color: 'fg.muted' }, children: agentStatus === 'launching'
+                        ? executionTarget === 'local'
+                            ? 'Launching local eval example agent…'
+                            : 'Launching runtime for eval agent…'
+                        : 'Creating eval example agent…' })] }));
+    }
+    if (agentStatus === 'error' || effectiveError) {
+        return _jsx(ErrorView, { error: effectiveError, onLogout: onLogout });
     }
-    if (runtimeStatus === 'error' || hookError) {
-        return _jsx(ErrorView, { error: hookError, onLogout: onLogout });
+    if (isBootstrapping) {
+        return (_jsxs(Box, { sx: {
+                display: 'flex',
+                flexDirection: 'column',
+                alignItems: 'center',
+                justifyContent: 'center',
+                height: '100%',
+                gap: 3,
+            }, children: [_jsx(Spinner, { size: "large" }), _jsx(Text, { sx: { color: 'fg.muted' }, children: "Preparing hosted eval and experiment..." })] }));
     }
     const latestScore = evalRuns.length > 0 ? evalRuns[0].score : null;
     return (_jsxs(Box, { sx: {
-            height: 'calc(100vh - 60px)',
+            height: '100%',
+            minHeight: 0,
             display: 'flex',
             flexDirection: 'column',
         }, children: [_jsxs(Box, { sx: {
@@ -131,7 +413,14 @@ const AgentEvalsInner = ({ onLogout }) => {
                     borderBottom: '1px solid',
                     borderColor: 'border.default',
                     flexShrink: 0,
-                }, children: [_jsx(BeakerIcon, { size: 16 }), _jsxs(Heading, { as: "h3", sx: { fontSize: 2, flex: 1 }, children: ["Evaluation \u2014 ", podName] })] }), _jsxs(Box, { sx: { flex: 1, minHeight: 0, display: 'flex' }, children: [_jsx(Box, { sx: { flex: 1, minWidth: 0 }, children: _jsx(Chat, { protocol: "vercel-ai", baseUrl: agentBaseUrl, agentId: agentId, title: "Eval Agent", placeholder: "Chat with the agent, then run evaluations\u2026", description: latestScore != null
+                }, children: [_jsx(BeakerIcon, { size: 16 }), _jsxs(Box, { sx: { flex: 1, minWidth: 0 }, children: [_jsxs(Heading, { as: "h3", sx: { fontSize: 2 }, children: ["Evaluation \u2014 ", podName] }), _jsxs(Text, { sx: {
+                                    fontSize: 0,
+                                    color: 'fg.muted',
+                                    display: 'block',
+                                    overflow: 'hidden',
+                                    textOverflow: 'ellipsis',
+                                    whiteSpace: 'nowrap',
+                                }, children: ["Runtime API: ", runtimeCreationBaseUrl, "/api/runtimes/v1/runtimes"] })] }), _jsxs(FormControl, { sx: { minWidth: 160 }, children: [_jsx(FormControl.Label, { sx: { fontSize: 0, mb: 1 }, children: "Target" }), _jsxs(Select, { size: "small", value: executionTarget, onChange: e => onExecutionTargetChange(e.target.value), disabled: isRunning, children: [_jsx(Select.Option, { value: "cloud", children: "Cloud" }), _jsx(Select.Option, { value: "local", children: "Local" })] })] })] }), _jsxs(Box, { sx: { flex: 1, minHeight: 0, display: 'flex' }, children: [_jsx(Box, { sx: { flex: 1, minWidth: 0 }, children: _jsx(Chat, { protocol: "vercel-ai", baseUrl: agentBaseUrl, agentId: agentId, title: "Eval Agent", brandIcon: _jsx(BeakerIcon, { size: 16 }), placeholder: "Chat with the agent, then run evaluations\u2026", description: latestScore != null
                                 ? `Last score: ${(latestScore * 100).toFixed(0)}%`
                                 : 'No evaluations run yet', showHeader: true, autoFocus: true, height: "100%", runtimeId: podName, historyEndpoint: `${agentBaseUrl}/api/v1/history`, suggestions: [
                                 {
@@ -153,7 +442,7 @@ const AgentEvalsInner = ({ onLogout }) => {
                                     p: 3,
                                     borderBottom: '1px solid',
                                     borderColor: 'border.default',
-                                }, children: [_jsxs(Box, { sx: { display: 'flex', alignItems: 'center', gap: 1, mb: 2 }, children: [_jsx(BeakerIcon, { size: 16 }), _jsx(Heading, { as: "h3", sx: { fontSize: 2 }, children: "Run Evaluation" })] }), _jsx(Text, { as: "p", sx: { fontSize: 0, color: 'fg.muted', mb: 3 }, children: "Execute the default evaluation suite against recent agent responses. Results are scored automatically." }), _jsx(Button, { size: "small", variant: "primary", leadingVisual: PlayIcon, onClick: handleRunEval, disabled: isRunning, sx: { width: '100%' }, children: isRunning ? 'Running…' : 'Run Eval Suite' }), flash && (_jsx(Flash, { variant: flash.includes('started') ? 'success' : 'danger', sx: { mt: 2, fontSize: 0 }, children: flash }))] }), _jsxs(Box, { sx: { p: 3, flex: 1, overflow: 'auto' }, children: [_jsx(Heading, { as: "h4", sx: { fontSize: 1, mb: 2 }, children: "Evaluation History" }), evalRuns.length === 0 ? (_jsx(Text, { sx: { color: 'fg.muted', fontSize: 0 }, children: "No evaluation runs recorded yet." })) : (evalRuns.slice(0, 20).map(run => (_jsxs(Box, { sx: {
+                                }, children: [_jsxs(Box, { sx: { display: 'flex', alignItems: 'center', gap: 1, mb: 2 }, children: [_jsx(BeakerIcon, { size: 16 }), _jsx(Heading, { as: "h3", sx: { fontSize: 2 }, children: "Run Evaluation" })] }), _jsx(Text, { as: "p", sx: { fontSize: 0, color: 'fg.muted', mb: 3 }, children: "Execute the default evaluation suite and persist results to /api/ai-agents/v1/evals." }), _jsx(Button, { size: "small", variant: "primary", leadingVisual: PlayIcon, onClick: handleRunEval, disabled: isRunning, sx: { width: '100%' }, children: isRunning ? 'Running…' : 'Run Eval Suite' }), flash && (_jsx(Flash, { variant: flash.toLowerCase().includes('failed') ? 'danger' : 'success', sx: { mt: 2, fontSize: 0 }, children: flash }))] }), _jsxs(Box, { sx: { p: 3, flex: 1, overflow: 'auto' }, children: [_jsx(Heading, { as: "h4", sx: { fontSize: 1, mb: 2 }, children: "Evaluation History" }), evalRuns.length === 0 ? (_jsx(Text, { sx: { color: 'fg.muted', fontSize: 0 }, children: "No evaluation runs recorded yet." })) : (evalRuns.slice(0, 20).map(run => (_jsxs(Box, { sx: {
                                             p: 2,
                                             mb: 2,
                                             border: '1px solid',
@@ -185,6 +474,7 @@ const syncTokenToIamStore = (token) => {
 const AgentEvalsExample = () => {
     const { token, clearAuth } = useSimpleAuthStore();
     const hasSynced = useRef(false);
+    const [executionTarget, setExecutionTarget] = useState(DEFAULT_EXECUTION_TARGET);
     useEffect(() => {
         if (token && !hasSynced.current) {
             hasSynced.current = true;
@@ -201,6 +491,6 @@ const AgentEvalsExample = () => {
     if (!token) {
         return (_jsx(ThemedProvider, { children: _jsx(AuthRequiredView, {}) }));
     }
-    return (_jsx(QueryClientProvider, { client: queryClient, children: _jsx(ThemedProvider, { children: _jsx(AgentEvalsInner, { onLogout: handleLogout }) }) }));
+    return (_jsx(QueryClientProvider, { client: queryClient, children: _jsx(ThemedProvider, { children: _jsx(AgentEvalsInner, { onLogout: handleLogout, executionTarget: executionTarget, onExecutionTargetChange: setExecutionTarget }, executionTarget) }) }));
 };
 export default AgentEvalsExample;

package/lib/examples/AgentGuardrailsExample.js CHANGED Viewed

@@ -34,8 +34,8 @@ const queryClient = new QueryClient();
 import { useSimpleAuthStore } from '@datalayer/core/lib/views/otel';
 import { Chat } from '../chat';
 // ─── Constants ─────────────────────────────────────────────────────────────
-const AGENT_NAME = 'guardrails-demo-agent';
-const AGENT_SPEC_ID = 'demo-guardrails';
+const AGENT_NAME = 'guardrails-example-agent';
+const AGENT_SPEC_ID = 'example-guardrails';
 const DEFAULT_LOCAL_BASE_URL = import.meta.env.VITE_BASE_URL || 'http://localhost:8765';
 const OTEL_BASE_URL_ENV = import.meta.env.VITE_OTEL_BASE_URL;
 const DATALAYER_RUN_URL_ENV = import.meta.env.DATALAYER_RUN_URL;
@@ -422,7 +422,7 @@ const AgentGuardrailsInner = ({ onLogout, }) => {
                 justifyContent: 'center',
                 height: '100vh',
                 gap: 3,
-            }, children: [_jsx(Spinner, { size: "large" }), _jsx(Text, { sx: { color: 'fg.muted' }, children: "Launching guardrails demo agent..." })] }));
+            }, children: [_jsx(Spinner, { size: "large" }), _jsx(Text, { sx: { color: 'fg.muted' }, children: "Launching guardrails example agent..." })] }));
     }
     if (runtimeStatus === 'error' || hookError) {
         return _jsx(ErrorView, { error: hookError, onLogout: onLogout });
@@ -482,9 +482,20 @@ const AgentGuardrailsInner = ({ onLogout, }) => {
                             ? ` (over by $${overBudgetAmountUsd.toFixed(4)}).`
                             : '.', ' ', "Start a new run or increase the run budget before continuing."] }) })), approvals.map(req => (_jsx(Flash, { variant: "warning", sx: { mx: 3, mt: 2 }, children: _jsxs(Box, { sx: { display: 'flex', alignItems: 'center', gap: 2 }, children: [_jsxs(Text, { sx: { flex: 1, fontSize: 1 }, children: [_jsx("strong", { children: req.tool_name }), " requests approval", req.tool_args
                                     ? ` — ${JSON.stringify(req.tool_args).slice(0, 120)}`
-                                    : ''] }), _jsx(Button, { size: "small", variant: "primary", leadingVisual: CheckIcon, onClick: () => handleApprove(req.id), disabled: approvalLoading === req.id, children: "Approve" }), _jsx(Button, { size: "small", variant: "danger", leadingVisual: XIcon, onClick: () => handleReject(req.id), disabled: approvalLoading === req.id, children: "Reject" })] }) }, req.id))), _jsx(Box, { sx: { flex: 1, minHeight: 0 }, children: _jsx(Chat, { protocol: "vercel-ai", baseUrl: agentBaseUrl, agentId: agentId, authToken: chatAuthToken, title: "Guardrails Agent", placeholder: "Ask something that triggers tools\u2026", description: "Cost guardrail with OTEL-backed gauge and manual tool approval gates", showHeader: false, showTokenUsage: true, errorBanner: overBudgetBanner, disableInputPrompt: isOverRunBudget, autoFocus: true, height: "100%", runtimeId: agentId, historyEndpoint: `${agentBaseUrl}/api/v1/history`, suggestions: [
+                                    : ''] }), _jsx(Button, { size: "small", variant: "primary", leadingVisual: CheckIcon, onClick: () => handleApprove(req.id), disabled: approvalLoading === req.id, children: "Approve" }), _jsx(Button, { size: "small", variant: "danger", leadingVisual: XIcon, onClick: () => handleReject(req.id), disabled: approvalLoading === req.id, children: "Reject" })] }) }, req.id))), _jsx(Box, { sx: { flex: 1, minHeight: 0 }, children: _jsx(Chat, { protocol: "vercel-ai", baseUrl: agentBaseUrl, agentId: agentId, authToken: chatAuthToken, title: "Guardrails Agent", brandIcon: _jsx(ShieldCheckIcon, { size: 16 }), placeholder: "Ask something that triggers tools\u2026", description: "Cost guardrail with OTEL-backed gauge and hook-aware approvals (before_tool_execute, after_tool_execute, on_tool_execute_error, deferred_tool_calls)", showHeader: false, showTokenUsage: true, errorBanner: overBudgetBanner, disableInputPrompt: isOverRunBudget, autoFocus: true, height: "100%", runtimeId: agentId, historyEndpoint: `${agentBaseUrl}/api/v1/history`, suggestions: [
                         { title: 'Update CRM', message: 'Update the CRM records for Q3' },
-                        { title: 'Report', message: 'Generate the weekly KPI report' },
+                        {
+                            title: 'Trigger before_tool_execute',
+                            message: "Call runtime_sensitive_echo with text 'hello' and reason 'audit', then explain the before_tool_execute authorization decision.",
+                        },
+                        {
+                            title: 'Trigger deny policy',
+                            message: "Call runtime_sensitive_echo with text 'danger' and reason 'delete CRM rows', then explain why policy denied it.",
+                        },
+                        {
+                            title: 'Explain deferred flow',
+                            message: 'Explain how deferred_tool_calls and manual approvals interact in this guardrails run.',
+                        },
                     ], submitOnSuggestionClick: true }) })] }));
 };
 // ─── Sync token to core IAM store ──────────────────────────────────────────