npm - @dypai-ai/mcp - Versions diffs - 1.4.5 → 1.4.6 - Mend

@dypai-ai/mcp 1.4.5 → 1.4.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/package.json +1 -1
package/src/index.js +75 -2
package/src/tools/search-logs-offload.js +151 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@dypai-ai/mcp",
-  "version": "1.4.5",
+  "version": "1.4.6",
   "description": "DYPAI MCP Server — AI agent toolkit for building and deploying full-stack apps",
   "type": "module",
   "main": "src/index.js",

package/src/index.js CHANGED Viewed

@@ -54,6 +54,7 @@ import { dypaiPullTool, dypaiDiffTool, dypaiPushTool, dypaiValidateTool, dypaiTe
 import { proxyToolCall } from "./tools/proxy.js"
 import { enrichSuccess, enrichError } from "./tools/enrich.js"
 import { maybeRefreshSchemaAfterExecuteSql } from "./tools/sql-side-effects.js"
+import { maybeOffloadSearchLogs } from "./tools/search-logs-offload.js"
 import { withProjectContext, invalidateProjectContext } from "./tools/project-context.js"
 // summarizeDypaiTraceResponse (from ./tools/trace-summarize.js) is kept on
 // disk for when dypai_trace is re-enabled, but not imported here.
@@ -447,6 +448,26 @@ endpoint YAML and \`dypai_push\`. This tool does NOT modify the definition.`,
     },
   },
+  // ── Observability ─────────────────────────────────────────────────────────
+  {
+    name: "search_logs",
+    description: "Search recent errors and warnings for the current project. ALWAYS call this FIRST when the user reports any error, bug, or 'this isn't working' — don't guess from the code; check what actually broke. Returns a unified, time-ordered list mixing failed workflow executions and warn/error log lines from the engine. Defaults to the last 24h. Data retention: 7 days.\n\nWorkflow:\n  1) Call with no args (or just `since:'1h'`) → see recent failures.\n  2) Pick the relevant entry → call again with `endpoint` + tighter `query` to narrow down.\n  3) For the full step-by-step debug trace of a specific failure, set `include_trace:true` (response is much larger; you'll likely get a `file_path` to read the full JSON from disk).\n\nUse `environment:'live'` when investigating a production user complaint (excludes draft test runs). Use `environment:'draft'` when the user says 'I just tested X locally and it failed' (their local UI hits the draft overlay).",
+    inputSchema: {
+      type: "object",
+      properties: {
+        project_id: { type: "string", description: "Project UUID. Auto-detected for project tokens." },
+        query: { type: "string", description: "Optional substring to match (case-insensitive) in error messages and log lines. e.g. 'timeout', 'OpenAI', 'permission denied'." },
+        endpoint: { type: "string", description: "Optional endpoint name filter (e.g. 'create-order')." },
+        since: { type: "string", default: "24h", description: "Time window: relative ('15m', '1h', '24h', '7d') or ISO 8601 timestamp. Default 24h. Hard cap: 7d (retention)." },
+        level: { type: "string", enum: ["error", "warn", "all"], default: "all", description: "Filter by severity. 'error' includes failed/timeout executions + error logs. 'warn' is warning logs. 'all' (default) returns both." },
+        environment: { type: "string", enum: ["live", "draft", "all"], default: "all", description: "live = production traffic only (excludes draft overlay test runs). draft = only requests through dev-<project_id>.dypai.dev. all = both. Use 'live' for real user bug reports." },
+        limit: { type: "integer", default: 50, minimum: 1, maximum: 200, description: "Max items to return. Default 50, max 200." },
+        include_trace: { type: "boolean", default: false, description: "Attach the full step-by-step debug trace per failed execution. Verbose — combine with `query`/`endpoint` filters and a low `limit`. If the response gets large, the local proxy writes it to disk and returns a file_path you can Read." }
+      },
+      required: []
+    }
+  },
   // ── Knowledge ─────────────────────────────────────────────────────────────
   { name: "search_docs", description: "Search DYPAI documentation. Use this when unsure about SDK usage, auth patterns, workflow nodes, or platform features. Returns relevant documentation chunks.", inputSchema: { type: "object", properties: { query: { type: "string", description: "What you want to learn about" } }, required: ["query"] } },
   { name: "search_workflow_templates", description: "Search workflow templates by description. Returns ready-to-use workflow code for common patterns: CRUD operations, payment gateways, email sending, AI chatbots, data pipelines, etc.", inputSchema: { type: "object", properties: { query: { type: "string", description: "What the workflow should do (e.g. 'send email', 'stripe payment')" }, category: { type: "string", description: "Optional: AI, Database, Payments, Communication, Logic, Storage" } }, required: ["query"] } },
@@ -511,6 +532,7 @@ Use this BEFORE picking a tool. If unsure which row matches, ask the user.
 | "Publish the new UI" / "ship the frontend" | \`manage_frontend(deploy, confirm:true)\` | (deploy is the publish — there is no separate step) |
 | "Roll back" | Backend: \`get_endpoint_versions\` then write old code back. Frontend: re-deploy older source. | — |
 | "Upload a file / a CSV / seed data" | \`bulk_upsert\` (data) or \`manage_storage(upload_file)\` (binary) | — |
+| "X is broken" / "I'm getting an error" / "this doesn't work" / "users are reporting Y" | \`search_logs\` FIRST (don't guess from the code) | If a specific failure is found → \`search_logs(include_trace:true, query:'...')\` for the full step-by-step trace |
 ## Confirm rules — the ONLY operations that need \`confirm:true\`
@@ -547,6 +569,49 @@ User: "Add a /api/list-tasks endpoint that returns the current user's tasks, and
 **Order matters**: publish backend BEFORE deploying frontend. Otherwise the new UI calls an endpoint that doesn't exist on live yet → 404s for users. The \`manage_frontend(deploy)\` confirmation hint will warn you if backend drafts are still pending.
+## Debugging user-reported errors — \`search_logs\` is your starting point
+**Rule**: whenever the user says any of these — "X is broken", "this isn't working", "I'm getting an error", "users are reporting Y", "the page is white", "nothing happens when I click" — **call \`search_logs\` BEFORE reading any code**. The engine's logs are the ground truth; the code is your hypothesis. Trying to debug from the source first is how you waste 20 minutes solving the wrong problem.
+### The standard flow
+\`\`\`
+1. search_logs({ since: "1h", level: "error" })
+   → Quick scan of recent failures. If empty, widen to "24h".
+2. # Did the user say "I just tested this in my local UI"?
+   #   → add environment: "draft"   (their UI hits the draft overlay)
+   # Did they say "production users are reporting..."?
+   #   → add environment: "live"    (excludes their own draft test runs)
+3. # Found the relevant entry? Narrow down:
+   search_logs({ endpoint: "create-order", query: "stripe", since: "1h" })
+4. # For the full step-by-step trace of one specific failure:
+   search_logs({
+     endpoint: "create-order",
+     query: "<a unique substring from the error message>",
+     include_trace: true,
+     limit: 5
+   })
+   → If the response is large the local proxy writes it to a temp file
+     and returns a \`file_path\`. Read that file with the Read tool ONLY
+     when you need fields beyond the inline summary.
+5. # Now you know exactly which node failed and why → fix the code.
+\`\`\`
+### What \`search_logs\` returns
+Each item has \`type\` (\`execution_failed\` | \`log\`), \`level\` (\`error\` | \`warn\`), \`time\`, \`endpoint\`, \`message\`, and \`environment\` (\`live\` | \`draft\` | null for legacy rows). Failed executions also include \`status\` (\`error\` | \`timeout\`) and \`duration_ms\`. With \`include_trace:true\` they also include \`trace\` — a per-node log of inputs, outputs, errors, and stacks.
+### Common pitfalls
+- **Don't skip this and read code first.** The bug is almost never where you'd guess. Logs tell you exactly which node blew up and the exact error string.
+- **Don't dump every error you see at the user.** Filter, summarize, then propose ONE fix.
+- **\`environment\` matters.** A draft test failure is the user testing pending changes — fixing the draft is fine. A live failure is real users hitting production — fix urgently and follow up with backend publish.
+- **Retention is 7 days.** If the user reports a bug from "last week", the data is likely gone. Tell them.
 ## What you do NOT have to think about
 - "Development vs production environment" — the user never sees this. Backend changes always go through draft-and-publish. Frontend changes always go through deploy. That's the whole model.
@@ -657,7 +722,7 @@ Four ways to validate a backend change, in increasing fidelity:
 Other tools:
 - **\`dypai_test\`** — YAML regression suites at \`dypai/tests/<name>.test.yaml\` with assertions (equals, matches, contains, type, exists, gte, lte) + setup_sql / teardown_sql.
 - **\`dypai_validate\`** — static linting (placeholders, tables, columns, node params, credentials). Run before EVERY push.
-- **Prod debugging**: \`get_recent_workflow_activity(only_errors=true)\` surfaces recent failures.
+- **Prod debugging**: \`search_logs\` is the entry point — see the "Debugging user-reported errors" section above. Returns failed executions + warn/error logs from the last 7 days; pass \`environment:'live'\` to exclude draft-overlay test runs and \`include_trace:true\` for the per-node failure trace.
 → Deep patterns: \`search_docs("testing endpoints")\` (test setup + assertions), \`search_docs("troubleshooting")\` (common failures + fixes).
@@ -918,7 +983,7 @@ Pre-configured at \`src/lib/dypai.ts\`. Every method returns \`{ data, error }\`
 - **JWT verification** — jwt auth_mode validates the session token automatically. \`\${current_user_id}\` is trusted.
 - **Rate limiting** — per-plan. Returns 429 automatically.
 - **CORS** — allowed origins per project (configured in dashboard).
-- **Request logging** — every execution in \`system.workflow_runs\` with duration, status, tokens (for agents). View via \`get_recent_workflow_activity\`.
+- **Request logging** — every execution is recorded with duration, status, environment (live/draft), and (on failure) a per-node debug trace. Warn/error \`userLog\` lines are persisted alongside. Query both via \`search_logs\` (last 7 days).
 - **Input validation** — if the endpoint has \`input:\` schema, requests with invalid payloads are rejected with 400 + details BEFORE the workflow runs.
 - **SQL injection** — placeholders bind as Postgres params. Safe by construction.
 - **Secrets management** — credentials and env vars never appear in YAML or logs.
@@ -1109,6 +1174,14 @@ async function handleRequest(msg) {
             }
           }
+          // search_logs can return huge payloads when include_trace=true.
+          // Offload to a temp file when the serialized response > 60 KB so
+          // the agent's context stays clean — it gets a summary + file path
+          // and only Reads the file when it actually needs the detail.
+          if (name === "search_logs") {
+            result = maybeOffloadSearchLogs(result)
+          }
           // Note: test_workflow is no longer agent-facing (wrapped by
           // dypai_test_endpoint). dypai_trace is temporarily hidden until
           // the engine captures debug traces for real production executions.

package/src/tools/search-logs-offload.js ADDED Viewed

@@ -0,0 +1,151 @@
+/**
+ * maybeOffloadSearchLogs — keep `search_logs` responses from blowing up the
+ * agent's context window.
+ *
+ * `search_logs` (especially with `include_trace=true`) can return hundreds of
+ * KB of JSON. Inlining that into the tool-result `text` field forces the model
+ * to load the whole payload into context, which is wasteful for the typical
+ * "show me what failed and let me drill into one of them" workflow.
+ *
+ * Strategy:
+ *   - If the serialized response exceeds OFFLOAD_THRESHOLD_BYTES, write the
+ *     full JSON to a temp file and return a compact summary that includes:
+ *       · the absolute file path (so the agent can `Read` it on demand)
+ *       · counts by level/type/environment
+ *       · the first 5 items, trace-stripped
+ *   - Otherwise, return the response unchanged.
+ *
+ * The offload threshold is intentionally loose (~60 KB). A normal search
+ * without `include_trace` is well under that and stays inline.
+ */
+import fs from "fs"
+import os from "os"
+import path from "path"
+// ~60 KB. Claude/GPT swallow this comfortably, but full traces (200-500 KB)
+// are forced to disk so the agent can consume them selectively.
+const OFFLOAD_THRESHOLD_BYTES = 60 * 1024
+// Keep the on-disk dir manageable: prune files older than this on every
+// offload. Cheap because it only runs when we actually offload.
+const FILE_TTL_MS = 24 * 60 * 60 * 1000 // 24h
+const OFFLOAD_DIR = path.join(os.tmpdir(), "dypai-mcp-search-logs")
+function ensureDir() {
+  try {
+    fs.mkdirSync(OFFLOAD_DIR, { recursive: true })
+  } catch {
+    /* race-safe; mkdirSync with recursive doesn't throw on existing dirs */
+  }
+}
+function pruneOldFiles() {
+  try {
+    const cutoff = Date.now() - FILE_TTL_MS
+    for (const name of fs.readdirSync(OFFLOAD_DIR)) {
+      const full = path.join(OFFLOAD_DIR, name)
+      try {
+        const stat = fs.statSync(full)
+        if (stat.mtimeMs < cutoff) fs.unlinkSync(full)
+      } catch { /* ignore individual file errors */ }
+    }
+  } catch { /* ignore — best-effort housekeeping */ }
+}
+function lightItem(item) {
+  // Drop the heavy `trace` field from each item for the inline summary.
+  // Everything else stays so the agent can decide which one to drill into.
+  if (!item || typeof item !== "object") return item
+  const { trace, ...rest } = item
+  return trace ? { ...rest, trace_omitted: true } : rest
+}
+function bucket(items, key) {
+  const out = {}
+  for (const it of items) {
+    const v = it && it[key] != null ? String(it[key]) : "null"
+    out[v] = (out[v] || 0) + 1
+  }
+  return out
+}
+/**
+ * Returns either the original `result` (small enough to inline) OR a compact
+ * summary object that points at a temp file holding the full JSON.
+ *
+ * Never throws — on any FS error it falls back to returning the original
+ * payload so the agent at least gets the data, even if it's big.
+ */
+export function maybeOffloadSearchLogs(result) {
+  if (!result || typeof result !== "object" || !Array.isArray(result.items)) {
+    return result
+  }
+  let serialized
+  try {
+    serialized = JSON.stringify(result, null, 2)
+  } catch {
+    return result
+  }
+  if (Buffer.byteLength(serialized, "utf8") <= OFFLOAD_THRESHOLD_BYTES) {
+    return result
+  }
+  try {
+    ensureDir()
+    pruneOldFiles()
+    const ts = new Date().toISOString().replace(/[:.]/g, "-")
+    const rand = Math.random().toString(36).slice(2, 8)
+    const filePath = path.join(OFFLOAD_DIR, `search-logs-${ts}-${rand}.json`)
+    fs.writeFileSync(filePath, serialized, "utf8")
+    const sizeBytes = Buffer.byteLength(serialized, "utf8")
+    const sizeKb = Math.round(sizeBytes / 1024)
+    const items = result.items
+    const firstFive = items.slice(0, 5).map(lightItem)
+    return {
+      offloaded_to_file: true,
+      file_path: filePath,
+      size_bytes: sizeBytes,
+      guidance: (
+        `Response was too large to inline (${sizeKb} KB > 60 KB threshold). ` +
+        `Full JSON written to disk — open it with the Read tool when you want ` +
+        `to inspect a specific item or its trace:\n  Read("${filePath}")\n\n` +
+        `The summary below covers the whole result. Only read the file if you ` +
+        `need fields beyond the first 5 items or any 'trace' contents.`
+      ),
+      summary: {
+        total_returned: items.length,
+        by_level: bucket(items, "level"),
+        by_type: bucket(items, "type"),
+        by_environment: bucket(items, "environment"),
+        first_5: firstFive,
+      },
+      filters: {
+        project_id: result.project_id,
+        since: result.since,
+        level: result.level,
+        environment: result.environment,
+        endpoint: result.endpoint,
+        query: result.query,
+        include_trace: result.include_trace,
+      },
+      // Mirror the upstream guidance so the agent doesn't lose it.
+      upstream_guidance: result.guidance,
+    }
+  } catch (err) {
+    // Disk full / permissions / whatever — just return the original. The
+    // agent's context will take a hit but the data still gets through.
+    return {
+      ...result,
+      offload_warning: `Could not write large payload to disk: ${err.message}`,
+    }
+  }
+}
+export const _internals = { OFFLOAD_THRESHOLD_BYTES, OFFLOAD_DIR }