npm - @dypai-ai/mcp - Versions diffs - 1.5.8 → 1.5.10 - Mend

@dypai-ai/mcp 1.5.8 → 1.5.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@dypai-ai/mcp",
-  "version": "1.5.8",
+  "version": "1.5.10",
   "description": "DYPAI MCP Server — AI agent toolkit for building and deploying full-stack apps",
   "type": "module",
   "main": "src/index.js",

package/src/index.js CHANGED Viewed

@@ -146,7 +146,7 @@ This stores classification metadata only. It does not create users, roles, login
     },
   },
   { name: "list_ai_models", description: "List only the DYPAI Managed AI models that are active for a project. Returns the project-gated OpenRouter model catalog priced in AI Credits per 1M tokens, RPM limit, max output tokens, active/available counts, billing metadata, and the exact node parameters to use. Call this before creating or editing an AI Agent node with DYPAI Managed models. Agents must not invent or use inactive model ids. Use provider='openrouter' and do NOT set credential_id; DYPAI uses the platform OpenRouter key and deducts usage from the organization's AI Credits.", inputSchema: { type: "object", properties: { project_id: { type: "string", description: "Project UUID whose plan and Model Gateway settings determine the active Managed AI catalog." } }, required: ["project_id"] } },
-  { name: "create_project", description: "Create a new DYPAI project (free plan). Creates a full project with database, engine, GitHub repo, and frontend hosting. BLOCKS by default until provisioning finishes (~60s typical, 120s max) — when it returns, the project_id is ready to use with execute_sql, endpoint tools, etc. Pass wait_until_ready:false for batch flows.\n\nName collision: if another project in the same org already uses the name (case-insensitive), returns {error:'name_taken', existing_project_id, suggestions:[...]}. Pick a different name or use the existing project.\n\nIMPORTANT: before calling, check for a matching template with `search_project_templates`. Passing a `template_slug` drops in a ready-made schema + endpoints + UI that cover 70% of common app types. Only create a blank project if nothing matches.", inputSchema: { type: "object", properties: { name: { type: "string", description: "Project name (e.g. 'My Veterinary App')" }, organization_id: { type: "string", description: "Optional. Uses default org if omitted." }, description: { type: "string" }, template_slug: { type: "string", description: "RECOMMENDED. Project template slug to start from (e.g. 'clinic', 'gym', 'waitlist', 'blank'). Always call search_project_templates first to find the best match." }, wait_until_ready: { type: "boolean", description: "If true (default), blocks until provisioning completes and the project is ready for all operations. If false, returns immediately with status='provisioning' — caller must poll get_project before using.", default: true } }, required: ["name"] } },
+  { name: "create_project", description: "Create a new DYPAI project (free plan). Creates a full project with database, engine, GitHub repo, and frontend hosting. BLOCKS by default until provisioning finishes (~60s typical, 120s max) — when it returns, the project_id is ready to use with execute_sql, endpoint tools, etc. Pass wait_until_ready:false for batch flows.\n\nName collision: if another project in the same org already uses the name (case-insensitive), returns {error:'name_taken', existing_project_id, suggestions:[...]}. Pick a different name or use the existing project.\n\nProject limits are enforced by the DYPAI API at organization/workspace scope according to the workspace plan. If it returns {error:'project_limit_reached'}, do not retry create_project; show list_projects for that organization and ask the user to reuse, archive/pause, upgrade the workspace to Pro, or add capacity.\n\nIMPORTANT: before calling, check for a matching template with `search_project_templates`. Passing a `template_slug` drops in a ready-made schema + endpoints + UI that cover 70% of common app types. Use built-in bases when appropriate: `private-admin` for private internal tools, `user-accounts` for apps with signup/login users, `landing-admin` for public landing plus admin, and `blank` only when no base fits.", inputSchema: { type: "object", properties: { name: { type: "string", description: "Project name (e.g. 'My Veterinary App')" }, organization_id: { type: "string", description: "Optional. Uses default org if omitted." }, description: { type: "string" }, template_slug: { type: "string", description: "RECOMMENDED. Project template slug to start from (e.g. 'clinic', 'gym', 'private-admin', 'user-accounts', 'landing-admin', 'blank'). Always call search_project_templates first to find the best match." }, wait_until_ready: { type: "boolean", description: "If true (default), blocks until provisioning completes and the project is ready for all operations. If false, returns immediately with status='provisioning' — caller must poll get_project before using.", default: true } }, required: ["name"] } },
   { name: "get_app_credentials", description: "Lists available credentials in the current application. Returns API keys, anon key, service role key, and engine URL needed for SDK configuration.", inputSchema: { type: "object", properties: { project_id: { type: "string" } }, required: [] } },
   // ── Database ──────────────────────────────────────────────────────────────
@@ -487,8 +487,9 @@ endpoint YAML and \`dypai_push\`. This tool does NOT modify the definition.`,
   // ── Knowledge ─────────────────────────────────────────────────────────────
   { name: "search_docs", description: "Search DYPAI documentation. Use this when unsure about SDK usage, auth patterns, workflow nodes, or platform features. Returns relevant documentation chunks.", inputSchema: { type: "object", properties: { query: { type: "string", description: "What you want to learn about" } }, required: ["query"] } },
+  { name: "search_design_patterns", description: "Search compact DYPAI UI/design recipes. Use before designing substantial screens.", inputSchema: { type: "object", properties: { query: { type: "string", description: "Design need, with starter/domain/screen/style context when known." }, starter_slug: { type: "string", description: "Optional: private-admin, user-accounts, landing-admin, or blank." }, app_type: { type: "string", description: "Optional domain/app type." }, screen_type: { type: "string", description: "Optional screen/workflow." }, visual_style: { type: "string", description: "Optional style." }, category: { type: "string", description: "Optional category." }, limit: { type: "integer", default: 3, minimum: 1, maximum: 4 } }, required: ["query"] } },
   { name: "search_workflow_templates", description: "Search workflow templates by description. Returns ready-to-use workflow code for common patterns: CRUD operations, payment gateways, email sending, AI chatbots, data pipelines, etc.", inputSchema: { type: "object", properties: { query: { type: "string", description: "What the workflow should do (e.g. 'send email', 'stripe payment')" }, category: { type: "string", description: "Optional: AI, Database, Payments, Communication, Logic, Storage" } }, required: ["query"] } },
-  { name: "search_project_templates", description: "Search project starter templates by description. Returns template metadata and slugs for starters like clinic, gym, waitlist, blank, auth, or landing.", inputSchema: { type: "object", properties: { query: { type: "string", description: "What kind of project starter you need (e.g. 'gym app', 'landing page', 'auth starter')" }, category: { type: "string", description: "Optional category filter" } }, required: ["query"] } },
+  { name: "search_project_templates", description: "Search project starter templates by description. Returns template metadata and slugs for marketplace templates plus built-in bases: private-admin, user-accounts, landing-admin, and blank.", inputSchema: { type: "object", properties: { query: { type: "string", description: "What kind of project starter you need (e.g. 'gym app', 'private admin dashboard', 'user accounts portal', 'landing plus admin')" }, category: { type: "string", description: "Optional category filter" } }, required: ["query"] } },
 ]
 // ── Server Instructions ──────────────────────────────────────────────────────
@@ -517,16 +518,26 @@ First reflex, always:
 1. **Acknowledge briefly** what they want to build (one short line, their language).
 2. **\`search_project_templates(query: "<keywords from their request>")\`** — keywords in their language. Templates cover common app types (gym, clinic, waitlist, saas dashboard, etc.).
-3. **Decide: template or blank?** Default is **blank**. A template is only the right pick when the match is OBVIOUS and STRONG:
+3. **Decide: marketplace template, built-in base, or blank.** Marketplace templates are only right when the match is OBVIOUS and STRONG:
    - ✅ User says *"app para mi gimnasio"* + there's \`gym-manager\` (exact domain + feature overlap) → template.
-   - ❌ User says *"algo para gestionar reservas"* + there's \`gym-manager\` (soft match, many interpretations) → **blank**. Don't assume they want the gym's specific schema (classes, memberships, check-ins) — they didn't ask for it.
-   - ❌ User is a dev with a concrete spec (*"crea un proyecto con estas 3 tablas y estos endpoints"*) → **blank**, always. Respect their design.
-   - ❌ No template returned at all → **blank**.
-4. **Call it** → \`create_project(name: "<their name>", template_slug: "<matched_slug>" | "blank")\`.
+   - ❌ User says *"algo para gestionar reservas"* + there's \`gym-manager\` (soft match, many interpretations) → use a built-in base or **blank**. Don't assume they want the gym's specific schema (classes, memberships, check-ins) — they didn't ask for it.
+   - Built-in bases are safe defaults:
+     - private/internal/admin/dashboard/backoffice/business management → \`private-admin\`
+     - end-user signup/login/customer/member portal/marketplace/SaaS accounts → \`user-accounts\`
+     - public landing/marketing site plus private admin → \`landing-admin\`
+     - no clear access pattern or explicitly custom/from scratch → \`blank\`
+   - ❌ User is a dev with a concrete spec (*"crea un proyecto con estas 3 tablas y estos endpoints"*) → usually **blank**, unless they explicitly want one of the built-in bases.
+   - ❌ No marketplace or built-in base fits → **blank**.
+4. **Call it** → \`create_project(name: "<their name>", template_slug: "<matched_slug>" | "private-admin" | "user-accounts" | "landing-admin" | "blank")\`.
    If you went with a template, acknowledge in ONE line what's included so the user can push back: *"Lo arranco con la plantilla X, que trae socios, clases y pagos. ¿Te vale o prefieres algo más simple?"*
    If you went blank, just say: *"Arranco un proyecto en blanco y lo construimos a medida."*
 5. **After \`create_project\`** → ask for an absolute workspace path, then \`dypai_pull\` + \`manage_frontend(sync)\` (see next section).
+Before designing substantial UI (app shell, dashboard, login, tables/lists,
+forms, calendars, or domain-specific screens), use \`search_design_patterns\`
+with the app/starter/screen/style context. It returns curated recipes; adapt
+them to the project instead of inventing generic starter UI.
 **The template system exists to save time when the fit is obvious, not to force-match every request.** When in doubt → blank is always correct. Iterating up from blank is cheaper than deleting 80% of a mismatched template.
 ## The one legit follow-up question
@@ -672,9 +683,10 @@ Internally this means:
 1. edit backend files
 2. validate local backend changes
-3. save them to the preview environment
-4. test the preview version when practical
-5. then tell the user it is ready to try
+3. test changed endpoint YAML with \`dypai_test_endpoint(mode:'local')\` when practical
+4. save them to the preview environment
+5. test the preview version when practical
+6. then tell the user it is ready to try
 Never ask the user whether to run the internal save-to-preview step. It is safe, reversible, and required for the user to test the actual change.
@@ -748,7 +760,7 @@ Use phrases like:
 Default is **no tool names in user-facing text**.
 # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
-# SEARCH BEFORE YOU GUESS — \`search_docs\` is your reference manual
+# SEARCH BEFORE YOU GUESS — \`search_docs\` and \`search_design_patterns\`
 # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
 This prompt is the MAP of the DYPAI platform. The detailed docs live in
@@ -870,12 +882,12 @@ Editing files inside \`dypai/\` only changes YOUR DISK. The platform doesn't see
 \`\`\`
 Practical consequences — internalize these:
-- **Never publish backend changes just to test them.** Backend changes are testable before production: save them to preview, verify with \`dypai_test_endpoint(mode:'draft')\` when possible, then tell the user exactly what to try in preview.
+- **Never publish backend changes just to test them.** First test the local YAML directly with \`dypai_test_endpoint(mode:'local')\`; only after that save to preview with \`dypai_push\` and verify the staged draft when needed.
 - **After EVERY meaningful backend change set, call \`dypai_push\`.** Don't batch a session's worth of edits hoping to push at the end — if you forget, the user tests the preview and sees the OLD behavior. The push is cheap, idempotent, and creates ONE preview version per resource (subsequent pushes overwrite the pending preview version, not stack new ones).
 - **\`dypai_push\` is the internal save-to-preview step. It is NOT a production publish.** Live traffic is untouched. You can run it repeatedly without affecting real users. In user-facing prose, say "listo para probar" or "en previsualización", not "pushed" or "draft".
 - **The preview host (\`dev-<project_id>.dypai.dev\`) only sees what you've saved to preview.** A change still only on disk is invisible to the user's preview. If the user says "I tested it and nothing changed", first check whether the backend change was saved to preview after the last edit.
 - **\`dypai_validate\` before \`dypai_push\`** — push runs validate as a pre-flight, but running it explicitly first gives you the lint output without committing. Cheap insurance.
-- **Order during a multi-step backend feature**: edit → \`dypai_validate\` → \`dypai_push\` → \`dypai_test_endpoint(mode:'draft')\` (or tell the user to test preview). Repeat per change. ONLY when the user explicitly approves production do \`manage_drafts(operation:'list')\` → \`manage_drafts(operation:'publish', confirm:true)\`.
+- **Order during a multi-step backend feature**: edit → \`dypai_validate\` → \`dypai_test_endpoint(mode:'local')\` → \`dypai_push\` → \`dypai_test_endpoint(mode:'draft')\` when you need to verify the saved preview. Repeat per coherent change. ONLY when the user explicitly approves production do \`manage_drafts(operation:'list')\` → \`manage_drafts(operation:'publish', confirm:true)\`.
 - **DDL is the exception**: \`execute_sql\` with CREATE / ALTER / DROP TABLE applies to live IMMEDIATELY (no preview layer for schema). Preview only exists for endpoints / webhooks / crons / realtime policies. Summarize destructive DDL to the user before running it.
 ## User intent → tool to call (decision table)
@@ -887,7 +899,7 @@ Use this BEFORE picking a tool. If unsure which row matches, ask the user.
 | "Create a new project" | \`search_project_templates\` (find a starter) | \`create_project(template_slug: ...)\` |
 | "Show me what we have" / "I want to work on existing project X" | \`list_projects\` → \`dypai_pull\` (backend) + \`manage_frontend(sync)\` (frontend) | Read \`dypai/\` files + \`src/\` |
 | "This is a private admin app / public site / user portal / multi-role app" | \`manage_project_access_profile(operation:'update')\` | Then implement the actual auth/UI/data behavior normally |
-| "Add/change a backend endpoint, table, cron, webhook, agent, integration" | Edit files in \`dypai/\` | \`dypai_validate\` → \`dypai_push\` |
+| "Add/change a backend endpoint, table, cron, webhook, agent, integration" | Edit files in \`dypai/\` | \`dypai_validate\` → \`dypai_test_endpoint(mode:'local')\` for changed endpoints → \`dypai_push\` |
 | "Publish my backend changes" / "make it live" | \`manage_drafts(operation:'list')\` to show what's pending | \`manage_drafts(operation:'publish', confirm:true)\` |
 | "Test an endpoint before publishing" | \`dypai_test_endpoint(mode:'local')\` (your edits) or \`(mode:'draft')\` (after push) | — |
 | "Test the new endpoint from my local frontend, end-to-end, before publishing" | Tell user: their local frontend already points to \`https://dev-<project_id>.dypai.dev\` (set by \`manage_frontend(sync)\`), which serves drafts on top of live. So after \`dypai_push\` the local UI hits the draft overlay automatically — nothing else to do. | — |
@@ -919,21 +931,23 @@ User: "Add a /api/list-tasks endpoint that returns the current user's tasks, and
 2. manage_frontend(operation:'sync', ...)        # materialize frontend if not already on disk
 3. # Backend: create the endpoint
    Write dypai/endpoints/list-tasks.yaml         # trigger.http_api auth_mode:jwt + dypai_database query
-4. dypai_validate                                # catch typos before saving to preview
-5. dypai_push                                    # saves to preview, NOT production
-6. dypai_test_endpoint(endpoint:'list-tasks', mode:'draft', as_user:'<user_id>')
-   # verifies the preview version; do NOT publish just to test
-7. # Frontend: call the new endpoint from React
+4. dypai_validate                                # catch YAML/placeholder issues
+5. dypai_test_endpoint(endpoint:'list-tasks', mode:'local', as_user:'<user_id>')
+   # verifies the local YAML before saving anything to preview
+6. dypai_push                                    # saves to preview, NOT production
+7. dypai_test_endpoint(endpoint:'list-tasks', mode:'draft', as_user:'<user_id>')
+   # optional final sanity: verifies the preview version; do NOT publish just to test
+8. # Frontend: call the new endpoint from React
    Edit src/pages/Dashboard.tsx                  # useEndpoint('list-tasks')
-8. # Test locally/browser if available. Then tell the user in plain language:
+9. # Test locally/browser if available. Then tell the user in plain language:
    # "Ya está listo para probar. Abre la previsualización y revisa la lista de tareas. Todavía no está publicado para tus usuarios."
-9. # ONLY after the user confirms it is good:
+10. # ONLY after the user confirms it is good:
    manage_drafts(operation:'list')               # internal: inspect what will publish
-10. manage_drafts(operation:'publish', confirm:true)  # backend live after explicit approval
-11. manage_frontend(operation:'deploy', sourceDirectory, confirm:true)  # frontend live after explicit approval
+11. manage_drafts(operation:'publish', confirm:true)  # backend live after explicit approval
+12. manage_frontend(operation:'deploy', sourceDirectory, confirm:true)  # frontend live after explicit approval
 \`\`\`
-**Testing rule**: never publish backend changes just to test them. Backend can be verified from the preview version. **Production order rule**: when you are truly publishing a full-stack change, publish backend BEFORE deploying the frontend; otherwise the live UI may call backend functionality that is not live yet.
+**Testing rule**: never publish backend changes just to test them. Verify local YAML first with \`dypai_test_endpoint(mode:'local')\`, then save to preview and test \`mode:'draft'\` or the dev URL when needed. **Production order rule**: when you are truly publishing a full-stack change, publish backend BEFORE deploying the frontend; otherwise the live UI may call backend functionality that is not live yet.
 ## Debugging user-reported errors — \`search_logs\` is your starting point
@@ -1019,7 +1033,7 @@ Mental translations: "edge function" → workflow with one code node; "cron" →
 ## Top gotchas (the expensive ones)
 1. **Forgetting \`WHERE user_id = \${current_user_id}\`** — users see each other's data. #1 multi-tenancy bug. The engine does NOT auto-filter. RLS doesn't exist.
-2. **Editing YAML without \`dypai_push\`** — your change is on YOUR DISK only. Local frontend (which points at the draft overlay) keeps serving the old version. Symptom: *"I tested it locally and nothing changed"*. Always push after each meaningful change set.
+2. **Editing YAML without \`dypai_push\`** — \`dypai_test_endpoint(mode:'local')\` can test your file edits, but the preview/frontend cannot see them until \`dypai_push\` saves them to draft. Symptom: *"I tested it in preview and nothing changed"*. Test local first, then push when the changed endpoint is ready for preview.
 3. **Treating \`dypai_push\` as a deploy** — it's "save as draft", not publish. Live traffic is untouched until \`manage_drafts(publish, confirm:true)\`. Push freely, only ask the user before publish.
 4. **\`public\` auth_mode with \`\${current_user_id}\`** — no JWT → placeholder empty → SQL fails or returns wrong data. Use \`jwt\` if you need the user.
 5. **Missing \`return: true\`** — endpoint returns \`null\`. Every path that should produce an HTTP response needs one node with \`return: true\`.

package/src/tools/scaffold.js CHANGED Viewed

@@ -20,7 +20,8 @@ Scaffolds a project directory with:
 - .env with engine URL
 Use search_project_templates first to find available templates, then pass the template slug here.
-Or use "blank" for an empty starter project.`,
+Use "private-admin" for private internal tools, "user-accounts" for apps with signup/login users,
+"landing-admin" for public landing plus admin, or "blank" only when no base fits.`,
   inputSchema: {
     type: "object",
@@ -35,7 +36,7 @@ Or use "blank" for an empty starter project.`,
       },
       template: {
         type: "string",
-        description: 'Template slug (e.g. "clinic", "gym", "blank"). Use search_project_templates to find available templates.',
+        description: 'Template slug (e.g. "clinic", "gym", "private-admin", "user-accounts", "landing-admin", "blank"). Use search_project_templates to find available templates.',
         default: "blank",
       },
     },

package/src/tools/sync/test.js CHANGED Viewed

@@ -1,7 +1,8 @@
 /**
- * dypai_test — YAML-defined tests against endpoints. Zero engine changes:
- * orchestrates `execute_sql` + `test_workflow` (impersonation) + `execute_sql`
- * using what already exists.
+ * dypai_test — YAML-defined tests against endpoints. By default, endpoint
+ * names run through dypai_test_endpoint(mode:'local'), so tests execute the
+ * YAML currently on disk before dypai_push. UUID endpoint_id tests keep the
+ * legacy remote test_workflow path for backward compatibility.
  *
  * File layout (committable under dypai/tests/):
  *   endpoint: create-order        # or endpoint_id: <uuid>
@@ -26,6 +27,7 @@ import { join, resolve as resolvePath } from "path"
 import YAML from "yaml"
 import { proxyToolCall } from "../proxy.js"
 import { readLocalConfig } from "./planner.js"
+import { dypaiTestEndpointTool } from "./test-endpoint.js"
 // ─── Test file discovery ────────────────────────────────────────────────────
@@ -58,8 +60,8 @@ function firstCellOf(res) {
   return k ? row[k] : undefined
 }
-/** Resolve an endpoint name to its UUID via system.endpoints (the remote
- * test_workflow tool only accepts endpoint_id, not endpoint_name). */
+/** Resolve an endpoint name to its UUID via system.endpoints.
+ * Kept for legacy endpoint_id/remote test paths. */
 const UUID_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i
 async function resolveEndpointId(projectId, ref, cache) {
   if (UUID_RE.test(ref)) return ref
@@ -167,22 +169,41 @@ async function runSingleTest(test, fileCtx) {
   }
   try {
-    // Resolve endpoint → endpoint_id (remote only accepts UUIDs)
+    // Prefer testing the local endpoint YAML by name. This keeps the normal
+    // agent loop cheap: edit file -> dypai_test/dypai_test_endpoint -> push.
     const endpointRef = test.endpoint || test.endpoint_id || fileCtx.endpoint
     if (!endpointRef) {
       return { ...result, status: "error", errors: ["No endpoint specified. Set `endpoint` at file root or test level."] }
     }
-    const endpointId = await resolveEndpointId(fileCtx.projectId, endpointRef, fileCtx.endpointCache)
-    const execArgs = {
-      project_id: fileCtx.projectId,
-      endpoint_id: endpointId,
-      data: test.input || {},
-      trace_mode: "minimal",  // keep tests fast; detail on failure via dypai_trace
+    const testMode = test.mode || fileCtx.mode || "local"
+    let runResponse
+    if (!UUID_RE.test(endpointRef)) {
+      runResponse = await dypaiTestEndpointTool.execute({
+        endpoint: endpointRef,
+        mode: testMode,
+        input: test.input || {},
+        as_user: test.as_user,
+        trace_mode: "minimal",
+        root_dir: fileCtx.rootDir,
+        project_id: fileCtx.projectId,
+        // dypai_test is often a suite with many cases; run dypai_validate once
+        // before the suite if you want lint gating. Individual endpoint tests
+        // keep their own pre-flight validation by default.
+        skip_validation: test.skip_validation ?? fileCtx.skipValidation ?? true,
+      })
+    } else {
+      // Backward-compatible path for old tests that hard-code endpoint_id.
+      // This cannot read local YAML because UUIDs refer to remote rows.
+      const execArgs = {
+        project_id: fileCtx.projectId,
+        endpoint_id: endpointRef,
+        data: test.input || {},
+        trace_mode: "minimal",
+        draft_mode: testMode === "live" ? false : true,
+      }
+      if (test.as_user) execArgs.impersonated_user_id = test.as_user
+      runResponse = await proxyToolCall("test_workflow", execArgs)
     }
-    if (test.as_user) execArgs.impersonated_user_id = test.as_user
-    const runResponse = await proxyToolCall("test_workflow", execArgs)
     // Normalize what counts as the workflow "result body" (vary by engine version)
     const body = runResponse?.result ?? runResponse?.data ?? runResponse?.output ?? runResponse
@@ -194,12 +215,16 @@ async function runSingleTest(test, fileCtx) {
     // expect.success — did the workflow complete?
     if ("success" in expect) {
       const status = trace?.status ?? trace?.workflow?.status
-      const actualSuccess = status
+      const actualSuccess = runResponse?.success === false
+        ? false
+        : status
         ? status === "completed"
         : !runResponse?.error  // fallback: presence of error field
       if (actualSuccess !== expect.success) {
         result.errors.push(`expected success=${expect.success}, got ${actualSuccess}`)
       }
+    } else if (runResponse?.success === false) {
+      result.errors.push(`execution error: ${runResponse.error || "endpoint test failed"}`)
     }
     // expect.response — match body
@@ -260,19 +285,21 @@ async function runSingleTest(test, fileCtx) {
 export const dypaiTestTool = {
   name: "dypai_test",
   description:
-    "Run YAML-defined tests from dypai/tests/*.test.yaml. Each test does: setup_sql → test_workflow (with impersonation) → response assertions → db_queries assertions → teardown_sql. " +
-    "Uses existing remote tools (execute_sql, test_workflow) — no engine changes needed. " +
+    "Run YAML-defined tests from dypai/tests/*.test.yaml. By default, endpoint tests reference endpoint names and execute the LOCAL YAML from dypai/endpoints/** without requiring dypai_push. " +
+    "Each test does: setup_sql → dypai_test_endpoint/test_workflow (with impersonation) → response assertions → db_queries assertions → teardown_sql. " +
     "Pass `only` to run a subset (substring match on test name).",
   inputSchema: {
     type: "object",
     properties: {
       project_id: { type: "string", description: "Project UUID. Auto-resolved from dypai.config.yaml." },
       root_dir: { type: "string", default: "./dypai" },
+      mode: { type: "string", enum: ["local", "draft", "live"], default: "local", description: "Endpoint source for tests that reference endpoint names. local reads YAML on disk; draft/live test engine versions." },
+      skip_validation: { type: "boolean", default: true, description: "Pass through to local endpoint tests. Default true for suites; run dypai_validate separately for lint gating." },
       only: { type: "string", description: "Only run tests whose name includes this substring." },
       file: { type: "string", description: "Relative path to a single test file under dypai/tests/." },
     },
   },
-  async execute({ project_id, root_dir = "./dypai", only, file } = {}) {
+  async execute({ project_id, root_dir = "./dypai", mode = "local", skip_validation = true, only, file } = {}) {
     const rootDir = resolvePath(process.cwd(), root_dir)
     const config = await readLocalConfig(rootDir)
     const projectId = project_id || config?.project_id
@@ -316,6 +343,8 @@ export const dypaiTestTool = {
         endpoint: doc.endpoint || doc.endpoint_id,
         rootDir,
         endpointCache: new Map(),
+        mode: doc.mode || mode,
+        skipValidation: doc.skip_validation ?? skip_validation,
       }
       for (const t of tests) {
         if (only && !(t.name || "").toLowerCase().includes(only.toLowerCase())) continue