@dypai-ai/mcp 1.4.5 → 1.4.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@dypai-ai/mcp",
3
- "version": "1.4.5",
3
+ "version": "1.4.6",
4
4
  "description": "DYPAI MCP Server — AI agent toolkit for building and deploying full-stack apps",
5
5
  "type": "module",
6
6
  "main": "src/index.js",
package/src/index.js CHANGED
@@ -54,6 +54,7 @@ import { dypaiPullTool, dypaiDiffTool, dypaiPushTool, dypaiValidateTool, dypaiTe
54
54
  import { proxyToolCall } from "./tools/proxy.js"
55
55
  import { enrichSuccess, enrichError } from "./tools/enrich.js"
56
56
  import { maybeRefreshSchemaAfterExecuteSql } from "./tools/sql-side-effects.js"
57
+ import { maybeOffloadSearchLogs } from "./tools/search-logs-offload.js"
57
58
  import { withProjectContext, invalidateProjectContext } from "./tools/project-context.js"
58
59
  // summarizeDypaiTraceResponse (from ./tools/trace-summarize.js) is kept on
59
60
  // disk for when dypai_trace is re-enabled, but not imported here.
@@ -447,6 +448,26 @@ endpoint YAML and \`dypai_push\`. This tool does NOT modify the definition.`,
447
448
  },
448
449
  },
449
450
 
451
+ // ── Observability ─────────────────────────────────────────────────────────
452
+ {
453
+ name: "search_logs",
454
+ description: "Search recent errors and warnings for the current project. ALWAYS call this FIRST when the user reports any error, bug, or 'this isn't working' — don't guess from the code; check what actually broke. Returns a unified, time-ordered list mixing failed workflow executions and warn/error log lines from the engine. Defaults to the last 24h. Data retention: 7 days.\n\nWorkflow:\n 1) Call with no args (or just `since:'1h'`) → see recent failures.\n 2) Pick the relevant entry → call again with `endpoint` + tighter `query` to narrow down.\n 3) For the full step-by-step debug trace of a specific failure, set `include_trace:true` (response is much larger; you'll likely get a `file_path` to read the full JSON from disk).\n\nUse `environment:'live'` when investigating a production user complaint (excludes draft test runs). Use `environment:'draft'` when the user says 'I just tested X locally and it failed' (their local UI hits the draft overlay).",
455
+ inputSchema: {
456
+ type: "object",
457
+ properties: {
458
+ project_id: { type: "string", description: "Project UUID. Auto-detected for project tokens." },
459
+ query: { type: "string", description: "Optional substring to match (case-insensitive) in error messages and log lines. e.g. 'timeout', 'OpenAI', 'permission denied'." },
460
+ endpoint: { type: "string", description: "Optional endpoint name filter (e.g. 'create-order')." },
461
+ since: { type: "string", default: "24h", description: "Time window: relative ('15m', '1h', '24h', '7d') or ISO 8601 timestamp. Default 24h. Hard cap: 7d (retention)." },
462
+ level: { type: "string", enum: ["error", "warn", "all"], default: "all", description: "Filter by severity. 'error' includes failed/timeout executions + error logs. 'warn' is warning logs. 'all' (default) returns both." },
463
+ environment: { type: "string", enum: ["live", "draft", "all"], default: "all", description: "live = production traffic only (excludes draft overlay test runs). draft = only requests through dev-<project_id>.dypai.dev. all = both. Use 'live' for real user bug reports." },
464
+ limit: { type: "integer", default: 50, minimum: 1, maximum: 200, description: "Max items to return. Default 50, max 200." },
465
+ include_trace: { type: "boolean", default: false, description: "Attach the full step-by-step debug trace per failed execution. Verbose — combine with `query`/`endpoint` filters and a low `limit`. If the response gets large, the local proxy writes it to disk and returns a file_path you can Read." }
466
+ },
467
+ required: []
468
+ }
469
+ },
470
+
450
471
  // ── Knowledge ─────────────────────────────────────────────────────────────
451
472
  { name: "search_docs", description: "Search DYPAI documentation. Use this when unsure about SDK usage, auth patterns, workflow nodes, or platform features. Returns relevant documentation chunks.", inputSchema: { type: "object", properties: { query: { type: "string", description: "What you want to learn about" } }, required: ["query"] } },
452
473
  { name: "search_workflow_templates", description: "Search workflow templates by description. Returns ready-to-use workflow code for common patterns: CRUD operations, payment gateways, email sending, AI chatbots, data pipelines, etc.", inputSchema: { type: "object", properties: { query: { type: "string", description: "What the workflow should do (e.g. 'send email', 'stripe payment')" }, category: { type: "string", description: "Optional: AI, Database, Payments, Communication, Logic, Storage" } }, required: ["query"] } },
@@ -511,6 +532,7 @@ Use this BEFORE picking a tool. If unsure which row matches, ask the user.
511
532
  | "Publish the new UI" / "ship the frontend" | \`manage_frontend(deploy, confirm:true)\` | (deploy is the publish — there is no separate step) |
512
533
  | "Roll back" | Backend: \`get_endpoint_versions\` then write old code back. Frontend: re-deploy older source. | — |
513
534
  | "Upload a file / a CSV / seed data" | \`bulk_upsert\` (data) or \`manage_storage(upload_file)\` (binary) | — |
535
+ | "X is broken" / "I'm getting an error" / "this doesn't work" / "users are reporting Y" | \`search_logs\` FIRST (don't guess from the code) | If a specific failure is found → \`search_logs(include_trace:true, query:'...')\` for the full step-by-step trace |
514
536
 
515
537
  ## Confirm rules — the ONLY operations that need \`confirm:true\`
516
538
 
@@ -547,6 +569,49 @@ User: "Add a /api/list-tasks endpoint that returns the current user's tasks, and
547
569
 
548
570
  **Order matters**: publish backend BEFORE deploying frontend. Otherwise the new UI calls an endpoint that doesn't exist on live yet → 404s for users. The \`manage_frontend(deploy)\` confirmation hint will warn you if backend drafts are still pending.
549
571
 
572
+ ## Debugging user-reported errors — \`search_logs\` is your starting point
573
+
574
+ **Rule**: whenever the user says any of these — "X is broken", "this isn't working", "I'm getting an error", "users are reporting Y", "the page is white", "nothing happens when I click" — **call \`search_logs\` BEFORE reading any code**. The engine's logs are the ground truth; the code is your hypothesis. Trying to debug from the source first is how you waste 20 minutes solving the wrong problem.
575
+
576
+ ### The standard flow
577
+
578
+ \`\`\`
579
+ 1. search_logs({ since: "1h", level: "error" })
580
+ → Quick scan of recent failures. If empty, widen to "24h".
581
+
582
+ 2. # Did the user say "I just tested this in my local UI"?
583
+ # → add environment: "draft" (their UI hits the draft overlay)
584
+ # Did they say "production users are reporting..."?
585
+ # → add environment: "live" (excludes their own draft test runs)
586
+
587
+ 3. # Found the relevant entry? Narrow down:
588
+ search_logs({ endpoint: "create-order", query: "stripe", since: "1h" })
589
+
590
+ 4. # For the full step-by-step trace of one specific failure:
591
+ search_logs({
592
+ endpoint: "create-order",
593
+ query: "<a unique substring from the error message>",
594
+ include_trace: true,
595
+ limit: 5
596
+ })
597
+ → If the response is large the local proxy writes it to a temp file
598
+ and returns a \`file_path\`. Read that file with the Read tool ONLY
599
+ when you need fields beyond the inline summary.
600
+
601
+ 5. # Now you know exactly which node failed and why → fix the code.
602
+ \`\`\`
603
+
604
+ ### What \`search_logs\` returns
605
+
606
+ Each item has \`type\` (\`execution_failed\` | \`log\`), \`level\` (\`error\` | \`warn\`), \`time\`, \`endpoint\`, \`message\`, and \`environment\` (\`live\` | \`draft\` | null for legacy rows). Failed executions also include \`status\` (\`error\` | \`timeout\`) and \`duration_ms\`. With \`include_trace:true\` they also include \`trace\` — a per-node log of inputs, outputs, errors, and stacks.
607
+
608
+ ### Common pitfalls
609
+
610
+ - **Don't skip this and read code first.** The bug is almost never where you'd guess. Logs tell you exactly which node blew up and the exact error string.
611
+ - **Don't dump every error you see at the user.** Filter, summarize, then propose ONE fix.
612
+ - **\`environment\` matters.** A draft test failure is the user testing pending changes — fixing the draft is fine. A live failure is real users hitting production — fix urgently and follow up with backend publish.
613
+ - **Retention is 7 days.** If the user reports a bug from "last week", the data is likely gone. Tell them.
614
+
550
615
  ## What you do NOT have to think about
551
616
 
552
617
  - "Development vs production environment" — the user never sees this. Backend changes always go through draft-and-publish. Frontend changes always go through deploy. That's the whole model.
@@ -657,7 +722,7 @@ Four ways to validate a backend change, in increasing fidelity:
657
722
  Other tools:
658
723
  - **\`dypai_test\`** — YAML regression suites at \`dypai/tests/<name>.test.yaml\` with assertions (equals, matches, contains, type, exists, gte, lte) + setup_sql / teardown_sql.
659
724
  - **\`dypai_validate\`** — static linting (placeholders, tables, columns, node params, credentials). Run before EVERY push.
660
- - **Prod debugging**: \`get_recent_workflow_activity(only_errors=true)\` surfaces recent failures.
725
+ - **Prod debugging**: \`search_logs\` is the entry point — see the "Debugging user-reported errors" section above. Returns failed executions + warn/error logs from the last 7 days; pass \`environment:'live'\` to exclude draft-overlay test runs and \`include_trace:true\` for the per-node failure trace.
661
726
 
662
727
  → Deep patterns: \`search_docs("testing endpoints")\` (test setup + assertions), \`search_docs("troubleshooting")\` (common failures + fixes).
663
728
 
@@ -918,7 +983,7 @@ Pre-configured at \`src/lib/dypai.ts\`. Every method returns \`{ data, error }\`
918
983
  - **JWT verification** — jwt auth_mode validates the session token automatically. \`\${current_user_id}\` is trusted.
919
984
  - **Rate limiting** — per-plan. Returns 429 automatically.
920
985
  - **CORS** — allowed origins per project (configured in dashboard).
921
- - **Request logging** — every execution in \`system.workflow_runs\` with duration, status, tokens (for agents). View via \`get_recent_workflow_activity\`.
986
+ - **Request logging** — every execution is recorded with duration, status, environment (live/draft), and (on failure) a per-node debug trace. Warn/error \`userLog\` lines are persisted alongside. Query both via \`search_logs\` (last 7 days).
922
987
  - **Input validation** — if the endpoint has \`input:\` schema, requests with invalid payloads are rejected with 400 + details BEFORE the workflow runs.
923
988
  - **SQL injection** — placeholders bind as Postgres params. Safe by construction.
924
989
  - **Secrets management** — credentials and env vars never appear in YAML or logs.
@@ -1109,6 +1174,14 @@ async function handleRequest(msg) {
1109
1174
  }
1110
1175
  }
1111
1176
 
1177
+ // search_logs can return huge payloads when include_trace=true.
1178
+ // Offload to a temp file when the serialized response > 60 KB so
1179
+ // the agent's context stays clean — it gets a summary + file path
1180
+ // and only Reads the file when it actually needs the detail.
1181
+ if (name === "search_logs") {
1182
+ result = maybeOffloadSearchLogs(result)
1183
+ }
1184
+
1112
1185
  // Note: test_workflow is no longer agent-facing (wrapped by
1113
1186
  // dypai_test_endpoint). dypai_trace is temporarily hidden until
1114
1187
  // the engine captures debug traces for real production executions.
@@ -0,0 +1,151 @@
1
+ /**
2
+ * maybeOffloadSearchLogs — keep `search_logs` responses from blowing up the
3
+ * agent's context window.
4
+ *
5
+ * `search_logs` (especially with `include_trace=true`) can return hundreds of
6
+ * KB of JSON. Inlining that into the tool-result `text` field forces the model
7
+ * to load the whole payload into context, which is wasteful for the typical
8
+ * "show me what failed and let me drill into one of them" workflow.
9
+ *
10
+ * Strategy:
11
+ * - If the serialized response exceeds OFFLOAD_THRESHOLD_BYTES, write the
12
+ * full JSON to a temp file and return a compact summary that includes:
13
+ * · the absolute file path (so the agent can `Read` it on demand)
14
+ * · counts by level/type/environment
15
+ * · the first 5 items, trace-stripped
16
+ * - Otherwise, return the response unchanged.
17
+ *
18
+ * The offload threshold is intentionally loose (~60 KB). A normal search
19
+ * without `include_trace` is well under that and stays inline.
20
+ */
21
+
22
+ import fs from "fs"
23
+ import os from "os"
24
+ import path from "path"
25
+
26
+ // ~60 KB. Claude/GPT swallow this comfortably, but full traces (200-500 KB)
27
+ // are forced to disk so the agent can consume them selectively.
28
+ const OFFLOAD_THRESHOLD_BYTES = 60 * 1024
29
+
30
+ // Keep the on-disk dir manageable: prune files older than this on every
31
+ // offload. Cheap because it only runs when we actually offload.
32
+ const FILE_TTL_MS = 24 * 60 * 60 * 1000 // 24h
33
+
34
+ const OFFLOAD_DIR = path.join(os.tmpdir(), "dypai-mcp-search-logs")
35
+
36
+ function ensureDir() {
37
+ try {
38
+ fs.mkdirSync(OFFLOAD_DIR, { recursive: true })
39
+ } catch {
40
+ /* race-safe; mkdirSync with recursive doesn't throw on existing dirs */
41
+ }
42
+ }
43
+
44
+ function pruneOldFiles() {
45
+ try {
46
+ const cutoff = Date.now() - FILE_TTL_MS
47
+ for (const name of fs.readdirSync(OFFLOAD_DIR)) {
48
+ const full = path.join(OFFLOAD_DIR, name)
49
+ try {
50
+ const stat = fs.statSync(full)
51
+ if (stat.mtimeMs < cutoff) fs.unlinkSync(full)
52
+ } catch { /* ignore individual file errors */ }
53
+ }
54
+ } catch { /* ignore — best-effort housekeeping */ }
55
+ }
56
+
57
+ function lightItem(item) {
58
+ // Drop the heavy `trace` field from each item for the inline summary.
59
+ // Everything else stays so the agent can decide which one to drill into.
60
+ if (!item || typeof item !== "object") return item
61
+ const { trace, ...rest } = item
62
+ return trace ? { ...rest, trace_omitted: true } : rest
63
+ }
64
+
65
+ function bucket(items, key) {
66
+ const out = {}
67
+ for (const it of items) {
68
+ const v = it && it[key] != null ? String(it[key]) : "null"
69
+ out[v] = (out[v] || 0) + 1
70
+ }
71
+ return out
72
+ }
73
+
74
+ /**
75
+ * Returns either the original `result` (small enough to inline) OR a compact
76
+ * summary object that points at a temp file holding the full JSON.
77
+ *
78
+ * Never throws — on any FS error it falls back to returning the original
79
+ * payload so the agent at least gets the data, even if it's big.
80
+ */
81
+ export function maybeOffloadSearchLogs(result) {
82
+ if (!result || typeof result !== "object" || !Array.isArray(result.items)) {
83
+ return result
84
+ }
85
+
86
+ let serialized
87
+ try {
88
+ serialized = JSON.stringify(result, null, 2)
89
+ } catch {
90
+ return result
91
+ }
92
+
93
+ if (Buffer.byteLength(serialized, "utf8") <= OFFLOAD_THRESHOLD_BYTES) {
94
+ return result
95
+ }
96
+
97
+ try {
98
+ ensureDir()
99
+ pruneOldFiles()
100
+
101
+ const ts = new Date().toISOString().replace(/[:.]/g, "-")
102
+ const rand = Math.random().toString(36).slice(2, 8)
103
+ const filePath = path.join(OFFLOAD_DIR, `search-logs-${ts}-${rand}.json`)
104
+ fs.writeFileSync(filePath, serialized, "utf8")
105
+
106
+ const sizeBytes = Buffer.byteLength(serialized, "utf8")
107
+ const sizeKb = Math.round(sizeBytes / 1024)
108
+ const items = result.items
109
+ const firstFive = items.slice(0, 5).map(lightItem)
110
+
111
+ return {
112
+ offloaded_to_file: true,
113
+ file_path: filePath,
114
+ size_bytes: sizeBytes,
115
+ guidance: (
116
+ `Response was too large to inline (${sizeKb} KB > 60 KB threshold). ` +
117
+ `Full JSON written to disk — open it with the Read tool when you want ` +
118
+ `to inspect a specific item or its trace:\n Read("${filePath}")\n\n` +
119
+ `The summary below covers the whole result. Only read the file if you ` +
120
+ `need fields beyond the first 5 items or any 'trace' contents.`
121
+ ),
122
+ summary: {
123
+ total_returned: items.length,
124
+ by_level: bucket(items, "level"),
125
+ by_type: bucket(items, "type"),
126
+ by_environment: bucket(items, "environment"),
127
+ first_5: firstFive,
128
+ },
129
+ filters: {
130
+ project_id: result.project_id,
131
+ since: result.since,
132
+ level: result.level,
133
+ environment: result.environment,
134
+ endpoint: result.endpoint,
135
+ query: result.query,
136
+ include_trace: result.include_trace,
137
+ },
138
+ // Mirror the upstream guidance so the agent doesn't lose it.
139
+ upstream_guidance: result.guidance,
140
+ }
141
+ } catch (err) {
142
+ // Disk full / permissions / whatever — just return the original. The
143
+ // agent's context will take a hit but the data still gets through.
144
+ return {
145
+ ...result,
146
+ offload_warning: `Could not write large payload to disk: ${err.message}`,
147
+ }
148
+ }
149
+ }
150
+
151
+ export const _internals = { OFFLOAD_THRESHOLD_BYTES, OFFLOAD_DIR }