@dypai-ai/mcp 1.5.8 → 1.5.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@dypai-ai/mcp",
3
- "version": "1.5.8",
3
+ "version": "1.5.10",
4
4
  "description": "DYPAI MCP Server — AI agent toolkit for building and deploying full-stack apps",
5
5
  "type": "module",
6
6
  "main": "src/index.js",
package/src/index.js CHANGED
@@ -146,7 +146,7 @@ This stores classification metadata only. It does not create users, roles, login
146
146
  },
147
147
  },
148
148
  { name: "list_ai_models", description: "List only the DYPAI Managed AI models that are active for a project. Returns the project-gated OpenRouter model catalog priced in AI Credits per 1M tokens, RPM limit, max output tokens, active/available counts, billing metadata, and the exact node parameters to use. Call this before creating or editing an AI Agent node with DYPAI Managed models. Agents must not invent or use inactive model ids. Use provider='openrouter' and do NOT set credential_id; DYPAI uses the platform OpenRouter key and deducts usage from the organization's AI Credits.", inputSchema: { type: "object", properties: { project_id: { type: "string", description: "Project UUID whose plan and Model Gateway settings determine the active Managed AI catalog." } }, required: ["project_id"] } },
149
- { name: "create_project", description: "Create a new DYPAI project (free plan). Creates a full project with database, engine, GitHub repo, and frontend hosting. BLOCKS by default until provisioning finishes (~60s typical, 120s max) — when it returns, the project_id is ready to use with execute_sql, endpoint tools, etc. Pass wait_until_ready:false for batch flows.\n\nName collision: if another project in the same org already uses the name (case-insensitive), returns {error:'name_taken', existing_project_id, suggestions:[...]}. Pick a different name or use the existing project.\n\nIMPORTANT: before calling, check for a matching template with `search_project_templates`. Passing a `template_slug` drops in a ready-made schema + endpoints + UI that cover 70% of common app types. Only create a blank project if nothing matches.", inputSchema: { type: "object", properties: { name: { type: "string", description: "Project name (e.g. 'My Veterinary App')" }, organization_id: { type: "string", description: "Optional. Uses default org if omitted." }, description: { type: "string" }, template_slug: { type: "string", description: "RECOMMENDED. Project template slug to start from (e.g. 'clinic', 'gym', 'waitlist', 'blank'). Always call search_project_templates first to find the best match." }, wait_until_ready: { type: "boolean", description: "If true (default), blocks until provisioning completes and the project is ready for all operations. If false, returns immediately with status='provisioning' — caller must poll get_project before using.", default: true } }, required: ["name"] } },
149
+ { name: "create_project", description: "Create a new DYPAI project (free plan). Creates a full project with database, engine, GitHub repo, and frontend hosting. BLOCKS by default until provisioning finishes (~60s typical, 120s max) — when it returns, the project_id is ready to use with execute_sql, endpoint tools, etc. Pass wait_until_ready:false for batch flows.\n\nName collision: if another project in the same org already uses the name (case-insensitive), returns {error:'name_taken', existing_project_id, suggestions:[...]}. Pick a different name or use the existing project.\n\nProject limits are enforced by the DYPAI API at organization/workspace scope according to the workspace plan. If it returns {error:'project_limit_reached'}, do not retry create_project; show list_projects for that organization and ask the user to reuse, archive/pause, upgrade the workspace to Pro, or add capacity.\n\nIMPORTANT: before calling, check for a matching template with `search_project_templates`. Passing a `template_slug` drops in a ready-made schema + endpoints + UI that cover 70% of common app types. Use built-in bases when appropriate: `private-admin` for private internal tools, `user-accounts` for apps with signup/login users, `landing-admin` for public landing plus admin, and `blank` only when no base fits.", inputSchema: { type: "object", properties: { name: { type: "string", description: "Project name (e.g. 'My Veterinary App')" }, organization_id: { type: "string", description: "Optional. Uses default org if omitted." }, description: { type: "string" }, template_slug: { type: "string", description: "RECOMMENDED. Project template slug to start from (e.g. 'clinic', 'gym', 'private-admin', 'user-accounts', 'landing-admin', 'blank'). Always call search_project_templates first to find the best match." }, wait_until_ready: { type: "boolean", description: "If true (default), blocks until provisioning completes and the project is ready for all operations. If false, returns immediately with status='provisioning' — caller must poll get_project before using.", default: true } }, required: ["name"] } },
150
150
  { name: "get_app_credentials", description: "Lists available credentials in the current application. Returns API keys, anon key, service role key, and engine URL needed for SDK configuration.", inputSchema: { type: "object", properties: { project_id: { type: "string" } }, required: [] } },
151
151
 
152
152
  // ── Database ──────────────────────────────────────────────────────────────
@@ -487,8 +487,9 @@ endpoint YAML and \`dypai_push\`. This tool does NOT modify the definition.`,
487
487
 
488
488
  // ── Knowledge ─────────────────────────────────────────────────────────────
489
489
  { name: "search_docs", description: "Search DYPAI documentation. Use this when unsure about SDK usage, auth patterns, workflow nodes, or platform features. Returns relevant documentation chunks.", inputSchema: { type: "object", properties: { query: { type: "string", description: "What you want to learn about" } }, required: ["query"] } },
490
+ { name: "search_design_patterns", description: "Search compact DYPAI UI/design recipes. Use before designing substantial screens.", inputSchema: { type: "object", properties: { query: { type: "string", description: "Design need, with starter/domain/screen/style context when known." }, starter_slug: { type: "string", description: "Optional: private-admin, user-accounts, landing-admin, or blank." }, app_type: { type: "string", description: "Optional domain/app type." }, screen_type: { type: "string", description: "Optional screen/workflow." }, visual_style: { type: "string", description: "Optional style." }, category: { type: "string", description: "Optional category." }, limit: { type: "integer", default: 3, minimum: 1, maximum: 4 } }, required: ["query"] } },
490
491
  { name: "search_workflow_templates", description: "Search workflow templates by description. Returns ready-to-use workflow code for common patterns: CRUD operations, payment gateways, email sending, AI chatbots, data pipelines, etc.", inputSchema: { type: "object", properties: { query: { type: "string", description: "What the workflow should do (e.g. 'send email', 'stripe payment')" }, category: { type: "string", description: "Optional: AI, Database, Payments, Communication, Logic, Storage" } }, required: ["query"] } },
491
- { name: "search_project_templates", description: "Search project starter templates by description. Returns template metadata and slugs for starters like clinic, gym, waitlist, blank, auth, or landing.", inputSchema: { type: "object", properties: { query: { type: "string", description: "What kind of project starter you need (e.g. 'gym app', 'landing page', 'auth starter')" }, category: { type: "string", description: "Optional category filter" } }, required: ["query"] } },
492
+ { name: "search_project_templates", description: "Search project starter templates by description. Returns template metadata and slugs for marketplace templates plus built-in bases: private-admin, user-accounts, landing-admin, and blank.", inputSchema: { type: "object", properties: { query: { type: "string", description: "What kind of project starter you need (e.g. 'gym app', 'private admin dashboard', 'user accounts portal', 'landing plus admin')" }, category: { type: "string", description: "Optional category filter" } }, required: ["query"] } },
492
493
  ]
493
494
 
494
495
  // ── Server Instructions ──────────────────────────────────────────────────────
@@ -517,16 +518,26 @@ First reflex, always:
517
518
 
518
519
  1. **Acknowledge briefly** what they want to build (one short line, their language).
519
520
  2. **\`search_project_templates(query: "<keywords from their request>")\`** — keywords in their language. Templates cover common app types (gym, clinic, waitlist, saas dashboard, etc.).
520
- 3. **Decide: template or blank?** Default is **blank**. A template is only the right pick when the match is OBVIOUS and STRONG:
521
+ 3. **Decide: marketplace template, built-in base, or blank.** Marketplace templates are only right when the match is OBVIOUS and STRONG:
521
522
  - ✅ User says *"app para mi gimnasio"* + there's \`gym-manager\` (exact domain + feature overlap) → template.
522
- - ❌ User says *"algo para gestionar reservas"* + there's \`gym-manager\` (soft match, many interpretations) → **blank**. Don't assume they want the gym's specific schema (classes, memberships, check-ins) — they didn't ask for it.
523
- - User is a dev with a concrete spec (*"crea un proyecto con estas 3 tablas y estos endpoints"*) → **blank**, always. Respect their design.
524
- - No template returned at all **blank**.
525
- 4. **Call it** → \`create_project(name: "<their name>", template_slug: "<matched_slug>" | "blank")\`.
523
+ - ❌ User says *"algo para gestionar reservas"* + there's \`gym-manager\` (soft match, many interpretations) → use a built-in base or **blank**. Don't assume they want the gym's specific schema (classes, memberships, check-ins) — they didn't ask for it.
524
+ - Built-in bases are safe defaults:
525
+ - private/internal/admin/dashboard/backoffice/business management\`private-admin\`
526
+ - end-user signup/login/customer/member portal/marketplace/SaaS accounts → \`user-accounts\`
527
+ - public landing/marketing site plus private admin → \`landing-admin\`
528
+ - no clear access pattern or explicitly custom/from scratch → \`blank\`
529
+ - ❌ User is a dev with a concrete spec (*"crea un proyecto con estas 3 tablas y estos endpoints"*) → usually **blank**, unless they explicitly want one of the built-in bases.
530
+ - ❌ No marketplace or built-in base fits → **blank**.
531
+ 4. **Call it** → \`create_project(name: "<their name>", template_slug: "<matched_slug>" | "private-admin" | "user-accounts" | "landing-admin" | "blank")\`.
526
532
  If you went with a template, acknowledge in ONE line what's included so the user can push back: *"Lo arranco con la plantilla X, que trae socios, clases y pagos. ¿Te vale o prefieres algo más simple?"*
527
533
  If you went blank, just say: *"Arranco un proyecto en blanco y lo construimos a medida."*
528
534
  5. **After \`create_project\`** → ask for an absolute workspace path, then \`dypai_pull\` + \`manage_frontend(sync)\` (see next section).
529
535
 
536
+ Before designing substantial UI (app shell, dashboard, login, tables/lists,
537
+ forms, calendars, or domain-specific screens), use \`search_design_patterns\`
538
+ with the app/starter/screen/style context. It returns curated recipes; adapt
539
+ them to the project instead of inventing generic starter UI.
540
+
530
541
  **The template system exists to save time when the fit is obvious, not to force-match every request.** When in doubt → blank is always correct. Iterating up from blank is cheaper than deleting 80% of a mismatched template.
531
542
 
532
543
  ## The one legit follow-up question
@@ -672,9 +683,10 @@ Internally this means:
672
683
 
673
684
  1. edit backend files
674
685
  2. validate local backend changes
675
- 3. save them to the preview environment
676
- 4. test the preview version when practical
677
- 5. then tell the user it is ready to try
686
+ 3. test changed endpoint YAML with \`dypai_test_endpoint(mode:'local')\` when practical
687
+ 4. save them to the preview environment
688
+ 5. test the preview version when practical
689
+ 6. then tell the user it is ready to try
678
690
 
679
691
  Never ask the user whether to run the internal save-to-preview step. It is safe, reversible, and required for the user to test the actual change.
680
692
 
@@ -748,7 +760,7 @@ Use phrases like:
748
760
  Default is **no tool names in user-facing text**.
749
761
 
750
762
  # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
751
- # SEARCH BEFORE YOU GUESS — \`search_docs\` is your reference manual
763
+ # SEARCH BEFORE YOU GUESS — \`search_docs\` and \`search_design_patterns\`
752
764
  # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
753
765
 
754
766
  This prompt is the MAP of the DYPAI platform. The detailed docs live in
@@ -870,12 +882,12 @@ Editing files inside \`dypai/\` only changes YOUR DISK. The platform doesn't see
870
882
  \`\`\`
871
883
 
872
884
  Practical consequences — internalize these:
873
- - **Never publish backend changes just to test them.** Backend changes are testable before production: save them to preview, verify with \`dypai_test_endpoint(mode:'draft')\` when possible, then tell the user exactly what to try in preview.
885
+ - **Never publish backend changes just to test them.** First test the local YAML directly with \`dypai_test_endpoint(mode:'local')\`; only after that save to preview with \`dypai_push\` and verify the staged draft when needed.
874
886
  - **After EVERY meaningful backend change set, call \`dypai_push\`.** Don't batch a session's worth of edits hoping to push at the end — if you forget, the user tests the preview and sees the OLD behavior. The push is cheap, idempotent, and creates ONE preview version per resource (subsequent pushes overwrite the pending preview version, not stack new ones).
875
887
  - **\`dypai_push\` is the internal save-to-preview step. It is NOT a production publish.** Live traffic is untouched. You can run it repeatedly without affecting real users. In user-facing prose, say "listo para probar" or "en previsualización", not "pushed" or "draft".
876
888
  - **The preview host (\`dev-<project_id>.dypai.dev\`) only sees what you've saved to preview.** A change still only on disk is invisible to the user's preview. If the user says "I tested it and nothing changed", first check whether the backend change was saved to preview after the last edit.
877
889
  - **\`dypai_validate\` before \`dypai_push\`** — push runs validate as a pre-flight, but running it explicitly first gives you the lint output without committing. Cheap insurance.
878
- - **Order during a multi-step backend feature**: edit → \`dypai_validate\` → \`dypai_push\` → \`dypai_test_endpoint(mode:'draft')\` (or tell the user to test preview). Repeat per change. ONLY when the user explicitly approves production do \`manage_drafts(operation:'list')\` → \`manage_drafts(operation:'publish', confirm:true)\`.
890
+ - **Order during a multi-step backend feature**: edit → \`dypai_validate\` → \`dypai_test_endpoint(mode:'local')\` → \`dypai_push\` → \`dypai_test_endpoint(mode:'draft')\` when you need to verify the saved preview. Repeat per coherent change. ONLY when the user explicitly approves production do \`manage_drafts(operation:'list')\` → \`manage_drafts(operation:'publish', confirm:true)\`.
879
891
  - **DDL is the exception**: \`execute_sql\` with CREATE / ALTER / DROP TABLE applies to live IMMEDIATELY (no preview layer for schema). Preview only exists for endpoints / webhooks / crons / realtime policies. Summarize destructive DDL to the user before running it.
880
892
 
881
893
  ## User intent → tool to call (decision table)
@@ -887,7 +899,7 @@ Use this BEFORE picking a tool. If unsure which row matches, ask the user.
887
899
  | "Create a new project" | \`search_project_templates\` (find a starter) | \`create_project(template_slug: ...)\` |
888
900
  | "Show me what we have" / "I want to work on existing project X" | \`list_projects\` → \`dypai_pull\` (backend) + \`manage_frontend(sync)\` (frontend) | Read \`dypai/\` files + \`src/\` |
889
901
  | "This is a private admin app / public site / user portal / multi-role app" | \`manage_project_access_profile(operation:'update')\` | Then implement the actual auth/UI/data behavior normally |
890
- | "Add/change a backend endpoint, table, cron, webhook, agent, integration" | Edit files in \`dypai/\` | \`dypai_validate\` → \`dypai_push\` |
902
+ | "Add/change a backend endpoint, table, cron, webhook, agent, integration" | Edit files in \`dypai/\` | \`dypai_validate\` → \`dypai_test_endpoint(mode:'local')\` for changed endpoints → \`dypai_push\` |
891
903
  | "Publish my backend changes" / "make it live" | \`manage_drafts(operation:'list')\` to show what's pending | \`manage_drafts(operation:'publish', confirm:true)\` |
892
904
  | "Test an endpoint before publishing" | \`dypai_test_endpoint(mode:'local')\` (your edits) or \`(mode:'draft')\` (after push) | — |
893
905
  | "Test the new endpoint from my local frontend, end-to-end, before publishing" | Tell user: their local frontend already points to \`https://dev-<project_id>.dypai.dev\` (set by \`manage_frontend(sync)\`), which serves drafts on top of live. So after \`dypai_push\` the local UI hits the draft overlay automatically — nothing else to do. | — |
@@ -919,21 +931,23 @@ User: "Add a /api/list-tasks endpoint that returns the current user's tasks, and
919
931
  2. manage_frontend(operation:'sync', ...) # materialize frontend if not already on disk
920
932
  3. # Backend: create the endpoint
921
933
  Write dypai/endpoints/list-tasks.yaml # trigger.http_api auth_mode:jwt + dypai_database query
922
- 4. dypai_validate # catch typos before saving to preview
923
- 5. dypai_push # saves to preview, NOT production
924
- 6. dypai_test_endpoint(endpoint:'list-tasks', mode:'draft', as_user:'<user_id>')
925
- # verifies the preview version; do NOT publish just to test
926
- 7. # Frontend: call the new endpoint from React
934
+ 4. dypai_validate # catch YAML/placeholder issues
935
+ 5. dypai_test_endpoint(endpoint:'list-tasks', mode:'local', as_user:'<user_id>')
936
+ # verifies the local YAML before saving anything to preview
937
+ 6. dypai_push # saves to preview, NOT production
938
+ 7. dypai_test_endpoint(endpoint:'list-tasks', mode:'draft', as_user:'<user_id>')
939
+ # optional final sanity: verifies the preview version; do NOT publish just to test
940
+ 8. # Frontend: call the new endpoint from React
927
941
  Edit src/pages/Dashboard.tsx # useEndpoint('list-tasks')
928
- 8. # Test locally/browser if available. Then tell the user in plain language:
942
+ 9. # Test locally/browser if available. Then tell the user in plain language:
929
943
  # "Ya está listo para probar. Abre la previsualización y revisa la lista de tareas. Todavía no está publicado para tus usuarios."
930
- 9. # ONLY after the user confirms it is good:
944
+ 10. # ONLY after the user confirms it is good:
931
945
  manage_drafts(operation:'list') # internal: inspect what will publish
932
- 10. manage_drafts(operation:'publish', confirm:true) # backend live after explicit approval
933
- 11. manage_frontend(operation:'deploy', sourceDirectory, confirm:true) # frontend live after explicit approval
946
+ 11. manage_drafts(operation:'publish', confirm:true) # backend live after explicit approval
947
+ 12. manage_frontend(operation:'deploy', sourceDirectory, confirm:true) # frontend live after explicit approval
934
948
  \`\`\`
935
949
 
936
- **Testing rule**: never publish backend changes just to test them. Backend can be verified from the preview version. **Production order rule**: when you are truly publishing a full-stack change, publish backend BEFORE deploying the frontend; otherwise the live UI may call backend functionality that is not live yet.
950
+ **Testing rule**: never publish backend changes just to test them. Verify local YAML first with \`dypai_test_endpoint(mode:'local')\`, then save to preview and test \`mode:'draft'\` or the dev URL when needed. **Production order rule**: when you are truly publishing a full-stack change, publish backend BEFORE deploying the frontend; otherwise the live UI may call backend functionality that is not live yet.
937
951
 
938
952
  ## Debugging user-reported errors — \`search_logs\` is your starting point
939
953
 
@@ -1019,7 +1033,7 @@ Mental translations: "edge function" → workflow with one code node; "cron" →
1019
1033
  ## Top gotchas (the expensive ones)
1020
1034
 
1021
1035
  1. **Forgetting \`WHERE user_id = \${current_user_id}\`** — users see each other's data. #1 multi-tenancy bug. The engine does NOT auto-filter. RLS doesn't exist.
1022
- 2. **Editing YAML without \`dypai_push\`** — your change is on YOUR DISK only. Local frontend (which points at the draft overlay) keeps serving the old version. Symptom: *"I tested it locally and nothing changed"*. Always push after each meaningful change set.
1036
+ 2. **Editing YAML without \`dypai_push\`** — \`dypai_test_endpoint(mode:'local')\` can test your file edits, but the preview/frontend cannot see them until \`dypai_push\` saves them to draft. Symptom: *"I tested it in preview and nothing changed"*. Test local first, then push when the changed endpoint is ready for preview.
1023
1037
  3. **Treating \`dypai_push\` as a deploy** — it's "save as draft", not publish. Live traffic is untouched until \`manage_drafts(publish, confirm:true)\`. Push freely, only ask the user before publish.
1024
1038
  4. **\`public\` auth_mode with \`\${current_user_id}\`** — no JWT → placeholder empty → SQL fails or returns wrong data. Use \`jwt\` if you need the user.
1025
1039
  5. **Missing \`return: true\`** — endpoint returns \`null\`. Every path that should produce an HTTP response needs one node with \`return: true\`.
@@ -20,7 +20,8 @@ Scaffolds a project directory with:
20
20
  - .env with engine URL
21
21
 
22
22
  Use search_project_templates first to find available templates, then pass the template slug here.
23
- Or use "blank" for an empty starter project.`,
23
+ Use "private-admin" for private internal tools, "user-accounts" for apps with signup/login users,
24
+ "landing-admin" for public landing plus admin, or "blank" only when no base fits.`,
24
25
 
25
26
  inputSchema: {
26
27
  type: "object",
@@ -35,7 +36,7 @@ Or use "blank" for an empty starter project.`,
35
36
  },
36
37
  template: {
37
38
  type: "string",
38
- description: 'Template slug (e.g. "clinic", "gym", "blank"). Use search_project_templates to find available templates.',
39
+ description: 'Template slug (e.g. "clinic", "gym", "private-admin", "user-accounts", "landing-admin", "blank"). Use search_project_templates to find available templates.',
39
40
  default: "blank",
40
41
  },
41
42
  },
@@ -1,7 +1,8 @@
1
1
  /**
2
- * dypai_test — YAML-defined tests against endpoints. Zero engine changes:
3
- * orchestrates `execute_sql` + `test_workflow` (impersonation) + `execute_sql`
4
- * using what already exists.
2
+ * dypai_test — YAML-defined tests against endpoints. By default, endpoint
3
+ * names run through dypai_test_endpoint(mode:'local'), so tests execute the
4
+ * YAML currently on disk before dypai_push. UUID endpoint_id tests keep the
5
+ * legacy remote test_workflow path for backward compatibility.
5
6
  *
6
7
  * File layout (committable under dypai/tests/):
7
8
  * endpoint: create-order # or endpoint_id: <uuid>
@@ -26,6 +27,7 @@ import { join, resolve as resolvePath } from "path"
26
27
  import YAML from "yaml"
27
28
  import { proxyToolCall } from "../proxy.js"
28
29
  import { readLocalConfig } from "./planner.js"
30
+ import { dypaiTestEndpointTool } from "./test-endpoint.js"
29
31
 
30
32
  // ─── Test file discovery ────────────────────────────────────────────────────
31
33
 
@@ -58,8 +60,8 @@ function firstCellOf(res) {
58
60
  return k ? row[k] : undefined
59
61
  }
60
62
 
61
- /** Resolve an endpoint name to its UUID via system.endpoints (the remote
62
- * test_workflow tool only accepts endpoint_id, not endpoint_name). */
63
+ /** Resolve an endpoint name to its UUID via system.endpoints.
64
+ * Kept for legacy endpoint_id/remote test paths. */
63
65
  const UUID_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i
64
66
  async function resolveEndpointId(projectId, ref, cache) {
65
67
  if (UUID_RE.test(ref)) return ref
@@ -167,22 +169,41 @@ async function runSingleTest(test, fileCtx) {
167
169
  }
168
170
 
169
171
  try {
170
- // Resolve endpoint endpoint_id (remote only accepts UUIDs)
172
+ // Prefer testing the local endpoint YAML by name. This keeps the normal
173
+ // agent loop cheap: edit file -> dypai_test/dypai_test_endpoint -> push.
171
174
  const endpointRef = test.endpoint || test.endpoint_id || fileCtx.endpoint
172
175
  if (!endpointRef) {
173
176
  return { ...result, status: "error", errors: ["No endpoint specified. Set `endpoint` at file root or test level."] }
174
177
  }
175
- const endpointId = await resolveEndpointId(fileCtx.projectId, endpointRef, fileCtx.endpointCache)
176
-
177
- const execArgs = {
178
- project_id: fileCtx.projectId,
179
- endpoint_id: endpointId,
180
- data: test.input || {},
181
- trace_mode: "minimal", // keep tests fast; detail on failure via dypai_trace
178
+ const testMode = test.mode || fileCtx.mode || "local"
179
+ let runResponse
180
+ if (!UUID_RE.test(endpointRef)) {
181
+ runResponse = await dypaiTestEndpointTool.execute({
182
+ endpoint: endpointRef,
183
+ mode: testMode,
184
+ input: test.input || {},
185
+ as_user: test.as_user,
186
+ trace_mode: "minimal",
187
+ root_dir: fileCtx.rootDir,
188
+ project_id: fileCtx.projectId,
189
+ // dypai_test is often a suite with many cases; run dypai_validate once
190
+ // before the suite if you want lint gating. Individual endpoint tests
191
+ // keep their own pre-flight validation by default.
192
+ skip_validation: test.skip_validation ?? fileCtx.skipValidation ?? true,
193
+ })
194
+ } else {
195
+ // Backward-compatible path for old tests that hard-code endpoint_id.
196
+ // This cannot read local YAML because UUIDs refer to remote rows.
197
+ const execArgs = {
198
+ project_id: fileCtx.projectId,
199
+ endpoint_id: endpointRef,
200
+ data: test.input || {},
201
+ trace_mode: "minimal",
202
+ draft_mode: testMode === "live" ? false : true,
203
+ }
204
+ if (test.as_user) execArgs.impersonated_user_id = test.as_user
205
+ runResponse = await proxyToolCall("test_workflow", execArgs)
182
206
  }
183
- if (test.as_user) execArgs.impersonated_user_id = test.as_user
184
-
185
- const runResponse = await proxyToolCall("test_workflow", execArgs)
186
207
 
187
208
  // Normalize what counts as the workflow "result body" (vary by engine version)
188
209
  const body = runResponse?.result ?? runResponse?.data ?? runResponse?.output ?? runResponse
@@ -194,12 +215,16 @@ async function runSingleTest(test, fileCtx) {
194
215
  // expect.success — did the workflow complete?
195
216
  if ("success" in expect) {
196
217
  const status = trace?.status ?? trace?.workflow?.status
197
- const actualSuccess = status
218
+ const actualSuccess = runResponse?.success === false
219
+ ? false
220
+ : status
198
221
  ? status === "completed"
199
222
  : !runResponse?.error // fallback: presence of error field
200
223
  if (actualSuccess !== expect.success) {
201
224
  result.errors.push(`expected success=${expect.success}, got ${actualSuccess}`)
202
225
  }
226
+ } else if (runResponse?.success === false) {
227
+ result.errors.push(`execution error: ${runResponse.error || "endpoint test failed"}`)
203
228
  }
204
229
 
205
230
  // expect.response — match body
@@ -260,19 +285,21 @@ async function runSingleTest(test, fileCtx) {
260
285
  export const dypaiTestTool = {
261
286
  name: "dypai_test",
262
287
  description:
263
- "Run YAML-defined tests from dypai/tests/*.test.yaml. Each test does: setup_sql test_workflow (with impersonation) response assertions db_queries assertions teardown_sql. " +
264
- "Uses existing remote tools (execute_sql, test_workflow) no engine changes needed. " +
288
+ "Run YAML-defined tests from dypai/tests/*.test.yaml. By default, endpoint tests reference endpoint names and execute the LOCAL YAML from dypai/endpoints/** without requiring dypai_push. " +
289
+ "Each test does: setup_sql → dypai_test_endpoint/test_workflow (with impersonation) response assertions db_queries assertions → teardown_sql. " +
265
290
  "Pass `only` to run a subset (substring match on test name).",
266
291
  inputSchema: {
267
292
  type: "object",
268
293
  properties: {
269
294
  project_id: { type: "string", description: "Project UUID. Auto-resolved from dypai.config.yaml." },
270
295
  root_dir: { type: "string", default: "./dypai" },
296
+ mode: { type: "string", enum: ["local", "draft", "live"], default: "local", description: "Endpoint source for tests that reference endpoint names. local reads YAML on disk; draft/live test engine versions." },
297
+ skip_validation: { type: "boolean", default: true, description: "Pass through to local endpoint tests. Default true for suites; run dypai_validate separately for lint gating." },
271
298
  only: { type: "string", description: "Only run tests whose name includes this substring." },
272
299
  file: { type: "string", description: "Relative path to a single test file under dypai/tests/." },
273
300
  },
274
301
  },
275
- async execute({ project_id, root_dir = "./dypai", only, file } = {}) {
302
+ async execute({ project_id, root_dir = "./dypai", mode = "local", skip_validation = true, only, file } = {}) {
276
303
  const rootDir = resolvePath(process.cwd(), root_dir)
277
304
  const config = await readLocalConfig(rootDir)
278
305
  const projectId = project_id || config?.project_id
@@ -316,6 +343,8 @@ export const dypaiTestTool = {
316
343
  endpoint: doc.endpoint || doc.endpoint_id,
317
344
  rootDir,
318
345
  endpointCache: new Map(),
346
+ mode: doc.mode || mode,
347
+ skipValidation: doc.skip_validation ?? skip_validation,
319
348
  }
320
349
  for (const t of tests) {
321
350
  if (only && !(t.name || "").toLowerCase().includes(only.toLowerCase())) continue