npm - @dypai-ai/mcp - Versions diffs - 1.4.5 → 1.5.0 - Mend

@dypai-ai/mcp 1.4.5 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/package.json +1 -1
package/src/index.js +268 -426
package/src/tools/introspect.js +311 -0
package/src/tools/manage-database.js +546 -0
package/src/tools/run-migration.js +269 -0
package/src/tools/search-logs-offload.js +151 -0
package/src/tools/sql-guard.js +164 -0
package/src/tools/sync/codec.js +2 -1
package/src/tools/sync/pull.js +19 -3
package/src/tools/sync/transforms.js +10 -2

package/src/tools/run-migration.js ADDED Viewed

@@ -0,0 +1,269 @@
+/**
+ * run_migration — apply a versioned SQL migration file to the project DB.
+ *
+ * Thin orchestrator on top of the cloud `execute_script` tool. Adds:
+ *   - Path resolution + suspicious-path guard (matches dypai_pull/push).
+ *   - Content checksum (sha256) so re-running the SAME file is a no-op and
+ *     re-running a CHANGED file surfaces a `checksum_mismatch` instead of
+ *     silently rewriting history.
+ *   - Tracking table `system.applied_migrations` (auto-bootstrapped on first
+ *     use) with (name, checksum, statements, applied_at, applied_by).
+ *   - Dry-run mode that prints the exact script the tool would submit.
+ *
+ * What `execute_script` handles (server-side, atomic):
+ *   - Running the migration + the tracking INSERT inside ONE transaction.
+ *     If any statement fails, the tracking row never lands → next run retries.
+ *   - Enforcing the SQL guard (no writes to auth/storage/system, no COPY/LOAD,
+ *     no SET ROLE / search_path).
+ *
+ * Convention:
+ *   dypai/migrations/NNNN_description.sql
+ *
+ * Out of scope deliberately:
+ *   - Rollback / down-migrations. Reverting = author a NEW migration.
+ *   - Cross-project migration sharing. Each project gets its own table.
+ */
+import { readFile, access } from "fs/promises"
+import { createHash } from "crypto"
+import { basename, extname, isAbsolute } from "path"
+import { proxyToolCall } from "./proxy.js"
+import { validateSql, formatValidationError } from "./sql-guard.js"
+import { resolveAndGuard } from "./sync/path-resolver.js"
+// Bundled "bootstrap + body + tracking INSERT" that goes in one execute_script
+// call. CREATE SCHEMA/TABLE IF NOT EXISTS are idempotent; the INSERT at the
+// end only runs if every previous statement succeeded (single transaction).
+function buildTransactionalScript(name, checksum, statements, body) {
+  const safeName = name.replace(/'/g, "''")
+  const safeChk  = checksum.replace(/'/g, "''")
+  return [
+    "CREATE SCHEMA IF NOT EXISTS system;",
+    `CREATE TABLE IF NOT EXISTS system.applied_migrations (
+       name        text PRIMARY KEY,
+       checksum    text NOT NULL,
+       statements  int  NOT NULL,
+       applied_at  timestamptz NOT NULL DEFAULT now(),
+       applied_by  text NOT NULL DEFAULT current_user
+     );`,
+    body.trimEnd().replace(/;?\s*$/, ";"),
+    `INSERT INTO system.applied_migrations (name, checksum, statements)
+     VALUES ('${safeName}', '${safeChk}', ${Number(statements) || 0});`,
+  ].join("\n")
+}
+function sha256(s) {
+  return createHash("sha256").update(s).digest("hex")
+}
+// Rough statement counter — used for reporting, not control flow. Strips
+// comments + string literals + $tag$ bodies so the count reflects real
+// top-level statements.
+function countStatements(sql) {
+  if (!sql) return 0
+  let s = sql
+    .replace(/--[^\n]*/g, "")
+    .replace(/\/\*[\s\S]*?\*\//g, "")
+    .replace(/\$([A-Za-z_][A-Za-z0-9_]*)?\$[\s\S]*?\$\1\$/g, "''")
+    .replace(/'(?:''|[^'])*'/g, "''")
+  return s.split(";").map(x => x.trim()).filter(Boolean).length
+}
+function normalizeName(file) {
+  return basename(file, extname(file)) || file
+}
+async function pathExists(p) {
+  try { await access(p); return true } catch { return false }
+}
+// Cheap pre-check: can we read the tracking row? If the table doesn't exist
+// yet, the server returns an error; we treat that as "not applied yet" and
+// let execute_script bootstrap the table on its first run. The apply path is
+// still atomic because the INSERT is inside the same transaction.
+async function fetchAppliedRow(project_id, name) {
+  const sql = `SELECT name, checksum, statements, applied_at
+               FROM system.applied_migrations
+               WHERE name = '${name.replace(/'/g, "''")}'
+               LIMIT 1`
+  const args = project_id ? { project_id, sql } : { sql }
+  try {
+    const res = await proxyToolCall("execute_sql", args)
+    const rows = Array.isArray(res?.rows) ? res.rows : (Array.isArray(res) ? res : [])
+    return rows[0] || null
+  } catch (e) {
+    // Table-missing on the very first run is expected. Any other error we
+    // still swallow here — worst case the script tries to INSERT and fails
+    // on duplicate PK, which is strictly better than aborting the run.
+    if (/applied_migrations.*does not exist/i.test(e.message || "")) return null
+    return null
+  }
+}
+export const runMigrationTool = {
+  name: "run_migration",
+  description:
+    "Apply a numbered SQL migration file to the project DB with tracking + idempotency. " +
+    "Reads dypai/migrations/<file>, validates it, and submits it as a single transactional " +
+    "script via `execute_script` — the migration + the tracking INSERT land atomically, " +
+    "so partial application is impossible. Tracks applied migrations in " +
+    "system.applied_migrations: running the same file twice is a no-op (`skipped`); " +
+    "running a MODIFIED file returns `checksum_mismatch` so you author a new migration " +
+    "instead of silently rewriting history.\n\n" +
+    "Use this for DDL (CREATE/ALTER/DROP), multi-statement scripts, and any structural " +
+    "change that must be atomic. For ad-hoc single queries, use `execute_sql`.\n\n" +
+    "File convention: `dypai/migrations/NNNN_description.sql` (e.g. `0001_add_orders.sql`).",
+  inputSchema: {
+    type: "object",
+    properties: {
+      project_id: {
+        type: "string",
+        description: "Project UUID. Optional if your token is project-scoped.",
+      },
+      migration_file: {
+        type: "string",
+        description:
+          "Path to the .sql file. Either absolute (/Users/me/project/dypai/migrations/0001_x.sql) " +
+          "or relative to the workspace root (dypai/migrations/0001_x.sql). Prefer absolute when " +
+          "running from an IDE-hosted MCP (cwd may be $HOME).",
+      },
+      dry_run: {
+        type: "boolean",
+        description:
+          "If true, validate + print the exact script that WOULD be submitted to execute_script " +
+          "(including the tracking INSERT) without executing anything. Default false.",
+        default: false,
+      },
+      timeout_seconds: {
+        type: "integer",
+        description:
+          "Per-statement timeout inside the transaction (default 300s, max 1800s). " +
+          "Bump this for migrations that rewrite large tables.",
+      },
+    },
+    required: ["migration_file"],
+  },
+  async execute({ project_id, migration_file, dry_run = false, timeout_seconds } = {}) {
+    if (!migration_file || typeof migration_file !== "string") {
+      return { success: false, error: "migration_file is required (string path)." }
+    }
+    // Path resolution mirrors dypai_pull: absolute wins, else walk env vars
+    // and project markers. Suspicious paths (cwd → $HOME) are rejected with
+    // a fix hint instead of silently reading the wrong file.
+    let resolved
+    if (isAbsolute(migration_file)) {
+      resolved = { ok: true, path: migration_file, source: "absolute" }
+    } else {
+      resolved = resolveAndGuard(migration_file, {
+        project_id,
+        tool: "run_migration",
+        arg_name: "migration_file",
+      })
+      if (!resolved.ok) return resolved.error
+    }
+    const filePath = resolved.path
+    if (!(await pathExists(filePath))) {
+      return {
+        success: false,
+        error: `Migration file not found: ${filePath}`,
+        hint: "Create dypai/migrations/<NNNN>_<description>.sql and pass that path.",
+      }
+    }
+    const body = await readFile(filePath, "utf8")
+    if (!body.trim()) {
+      return { success: false, error: "Migration file is empty." }
+    }
+    // Pre-flight locally too — identical checks to the cloud, but failing
+    // early saves a network roundtrip and gives a cleaner error shape.
+    const v = validateSql(body)
+    if (!v.ok) {
+      return {
+        success: false,
+        error: formatValidationError(v),
+        resolved_path: filePath,
+      }
+    }
+    const name = normalizeName(filePath)
+    const checksum = sha256(body)
+    const statements = countStatements(body)
+    // Check prior state. If the tracking table doesn't exist yet, we treat
+    // as "first apply" and let execute_script bootstrap the table.
+    const existing = await fetchAppliedRow(project_id, name)
+    if (existing) {
+      if (existing.checksum !== checksum) {
+        return {
+          success: false,
+          status: "checksum_mismatch",
+          error: `Migration '${name}' was already applied but its content has changed.`,
+          applied_at: existing.applied_at,
+          previous_checksum: existing.checksum,
+          current_checksum: checksum,
+          hint:
+            "Do NOT rewrite an applied migration. Create a new migration that performs " +
+            "the additional change (e.g. <NNNN+1>_fix_previous.sql). That preserves the " +
+            "audit trail.",
+          resolved_path: filePath,
+        }
+      }
+      return {
+        success: true,
+        status: "skipped",
+        name,
+        reason: "already_applied",
+        applied_at: existing.applied_at,
+        statements: existing.statements,
+        resolved_path: filePath,
+      }
+    }
+    const script = buildTransactionalScript(name, checksum, statements, body)
+    if (dry_run) {
+      return {
+        success: true,
+        status: "dry_run",
+        name,
+        checksum,
+        statements,
+        resolved_path: filePath,
+        would_execute: script,
+      }
+    }
+    const scriptArgs = { sql: script }
+    if (project_id) scriptArgs.project_id = project_id
+    if (timeout_seconds != null) scriptArgs.timeout_seconds = timeout_seconds
+    try {
+      const res = await proxyToolCall("execute_script", scriptArgs)
+      return {
+        success: true,
+        status: "applied",
+        name,
+        checksum,
+        statements,
+        resolved_path: filePath,
+        duration_ms: res?.duration_ms,
+        message: `Migration '${name}' applied (${statements} statement${statements === 1 ? "" : "s"}).`,
+      }
+    } catch (e) {
+      return {
+        success: false,
+        status: "failed",
+        name,
+        error: e.message,
+        resolved_path: filePath,
+        hint:
+          "The migration was rolled back — nothing was applied. Fix the error and re-run; " +
+          "idempotency still holds.",
+      }
+    }
+  },
+}

package/src/tools/search-logs-offload.js ADDED Viewed

@@ -0,0 +1,151 @@
+/**
+ * maybeOffloadSearchLogs — keep `search_logs` responses from blowing up the
+ * agent's context window.
+ *
+ * `search_logs` (especially with `include_trace=true`) can return hundreds of
+ * KB of JSON. Inlining that into the tool-result `text` field forces the model
+ * to load the whole payload into context, which is wasteful for the typical
+ * "show me what failed and let me drill into one of them" workflow.
+ *
+ * Strategy:
+ *   - If the serialized response exceeds OFFLOAD_THRESHOLD_BYTES, write the
+ *     full JSON to a temp file and return a compact summary that includes:
+ *       · the absolute file path (so the agent can `Read` it on demand)
+ *       · counts by level/type/environment
+ *       · the first 5 items, trace-stripped
+ *   - Otherwise, return the response unchanged.
+ *
+ * The offload threshold is intentionally loose (~60 KB). A normal search
+ * without `include_trace` is well under that and stays inline.
+ */
+import fs from "fs"
+import os from "os"
+import path from "path"
+// ~60 KB. Claude/GPT swallow this comfortably, but full traces (200-500 KB)
+// are forced to disk so the agent can consume them selectively.
+const OFFLOAD_THRESHOLD_BYTES = 60 * 1024
+// Keep the on-disk dir manageable: prune files older than this on every
+// offload. Cheap because it only runs when we actually offload.
+const FILE_TTL_MS = 24 * 60 * 60 * 1000 // 24h
+const OFFLOAD_DIR = path.join(os.tmpdir(), "dypai-mcp-search-logs")
+function ensureDir() {
+  try {
+    fs.mkdirSync(OFFLOAD_DIR, { recursive: true })
+  } catch {
+    /* race-safe; mkdirSync with recursive doesn't throw on existing dirs */
+  }
+}
+function pruneOldFiles() {
+  try {
+    const cutoff = Date.now() - FILE_TTL_MS
+    for (const name of fs.readdirSync(OFFLOAD_DIR)) {
+      const full = path.join(OFFLOAD_DIR, name)
+      try {
+        const stat = fs.statSync(full)
+        if (stat.mtimeMs < cutoff) fs.unlinkSync(full)
+      } catch { /* ignore individual file errors */ }
+    }
+  } catch { /* ignore — best-effort housekeeping */ }
+}
+function lightItem(item) {
+  // Drop the heavy `trace` field from each item for the inline summary.
+  // Everything else stays so the agent can decide which one to drill into.
+  if (!item || typeof item !== "object") return item
+  const { trace, ...rest } = item
+  return trace ? { ...rest, trace_omitted: true } : rest
+}
+function bucket(items, key) {
+  const out = {}
+  for (const it of items) {
+    const v = it && it[key] != null ? String(it[key]) : "null"
+    out[v] = (out[v] || 0) + 1
+  }
+  return out
+}
+/**
+ * Returns either the original `result` (small enough to inline) OR a compact
+ * summary object that points at a temp file holding the full JSON.
+ *
+ * Never throws — on any FS error it falls back to returning the original
+ * payload so the agent at least gets the data, even if it's big.
+ */
+export function maybeOffloadSearchLogs(result) {
+  if (!result || typeof result !== "object" || !Array.isArray(result.items)) {
+    return result
+  }
+  let serialized
+  try {
+    serialized = JSON.stringify(result, null, 2)
+  } catch {
+    return result
+  }
+  if (Buffer.byteLength(serialized, "utf8") <= OFFLOAD_THRESHOLD_BYTES) {
+    return result
+  }
+  try {
+    ensureDir()
+    pruneOldFiles()
+    const ts = new Date().toISOString().replace(/[:.]/g, "-")
+    const rand = Math.random().toString(36).slice(2, 8)
+    const filePath = path.join(OFFLOAD_DIR, `search-logs-${ts}-${rand}.json`)
+    fs.writeFileSync(filePath, serialized, "utf8")
+    const sizeBytes = Buffer.byteLength(serialized, "utf8")
+    const sizeKb = Math.round(sizeBytes / 1024)
+    const items = result.items
+    const firstFive = items.slice(0, 5).map(lightItem)
+    return {
+      offloaded_to_file: true,
+      file_path: filePath,
+      size_bytes: sizeBytes,
+      guidance: (
+        `Response was too large to inline (${sizeKb} KB > 60 KB threshold). ` +
+        `Full JSON written to disk — open it with the Read tool when you want ` +
+        `to inspect a specific item or its trace:\n  Read("${filePath}")\n\n` +
+        `The summary below covers the whole result. Only read the file if you ` +
+        `need fields beyond the first 5 items or any 'trace' contents.`
+      ),
+      summary: {
+        total_returned: items.length,
+        by_level: bucket(items, "level"),
+        by_type: bucket(items, "type"),
+        by_environment: bucket(items, "environment"),
+        first_5: firstFive,
+      },
+      filters: {
+        project_id: result.project_id,
+        since: result.since,
+        level: result.level,
+        environment: result.environment,
+        endpoint: result.endpoint,
+        query: result.query,
+        include_trace: result.include_trace,
+      },
+      // Mirror the upstream guidance so the agent doesn't lose it.
+      upstream_guidance: result.guidance,
+    }
+  } catch (err) {
+    // Disk full / permissions / whatever — just return the original. The
+    // agent's context will take a hit but the data still gets through.
+    return {
+      ...result,
+      offload_warning: `Could not write large payload to disk: ${err.message}`,
+    }
+  }
+}
+export const _internals = { OFFLOAD_THRESHOLD_BYTES, OFFLOAD_DIR }

package/src/tools/sql-guard.js ADDED Viewed

@@ -0,0 +1,164 @@
+/**
+ * Pre-flight SQL validator.
+ *
+ * Parses a SQL payload and rejects DDL/DML that targets schemas the agent
+ * should never touch (auth, storage, system, internal Timescale, pg_*).
+ * Reads (SELECT) are always allowed so the agent can introspect freely.
+ *
+ * This is UX, not security. The real guarantee belongs to the engine's
+ * Postgres role (see backend provisioning: dypai_agent role grants
+ * CREATE/ALTER/DROP/INSERT/UPDATE/DELETE only on public.*). What we do
+ * here is fail earlier with a message that tells the agent how to recover,
+ * instead of letting it hit a raw "permission denied" from Postgres.
+ *
+ * Used by:
+ *   - execute_sql proxy hop in index.js (before calling the remote)
+ *   - run_migration local tool (validates the whole file before applying)
+ */
+// Schemas the agent must not modify. SELECT against them stays allowed.
+const PROTECTED_SCHEMAS = new Set([
+  "auth",                // better-auth user/session tables — engine-owned
+  "storage",             // file upload metadata — engine-owned
+  "system",              // DYPAI internals (endpoints, credentials, etc.)
+  "information_schema",  // PG metadata catalog
+])
+// Schema prefixes (partial match). Used for Timescale / PG internals.
+const PROTECTED_SCHEMA_PREFIXES = ["_timescaledb", "pg_"]
+// DDL/DML verbs that touch schema contents. SELECT is intentionally absent.
+const MUTATING_VERBS = new Set([
+  "ALTER", "CREATE", "DROP", "TRUNCATE",
+  "INSERT", "UPDATE", "DELETE",
+  "GRANT", "REVOKE",
+  "COMMENT", "RENAME",
+])
+// Statements that are flatly forbidden for the agent regardless of target.
+const FORBIDDEN_TOP_LEVEL = [
+  { re: /\bALTER\s+SYSTEM\b/i,       msg: "ALTER SYSTEM is superuser-only and never allowed from the agent." },
+  { re: /\bCREATE\s+EXTENSION\b/i,   msg: "CREATE EXTENSION requires superuser. Ask DYPAI support if you need a new extension." },
+  { re: /\bDROP\s+EXTENSION\b/i,     msg: "DROP EXTENSION requires superuser." },
+  { re: /\bCREATE\s+(USER|ROLE)\b/i, msg: "Managing DB roles is the engine's job. Use manage_users for app users." },
+  { re: /\bDROP\s+(USER|ROLE)\b/i,   msg: "Managing DB roles is the engine's job. Use manage_users for app users." },
+  { re: /\bCOPY\b/i,                 msg: "COPY to/from the filesystem is not available from the agent. Use bulk_upsert or a DO block with INSERT." },
+]
+function isProtectedSchema(name) {
+  const s = name.toLowerCase()
+  if (PROTECTED_SCHEMAS.has(s)) return true
+  return PROTECTED_SCHEMA_PREFIXES.some(p => s.startsWith(p))
+}
+// Strip comments + string literals so schema-name matches only hit real code.
+// Preserves length/structure by replacing with whitespace — keeps line numbers
+// roughly sane if we ever want to surface them.
+function stripSqlNoise(sql) {
+  if (!sql) return ""
+  let out = sql
+  // -- line comments
+  out = out.replace(/--[^\n]*/g, "")
+  // /* block comments */
+  out = out.replace(/\/\*[\s\S]*?\*\//g, "")
+  // $tag$ ... $tag$ quoted PL/pgSQL bodies (so keywords inside functions don't
+  // fool the validator into rejecting the statement).
+  out = out.replace(/\$([A-Za-z_][A-Za-z0-9_]*)?\$[\s\S]*?\$\1\$/g, "")
+  // single-quoted strings
+  out = out.replace(/'(?:''|[^'])*'/g, "''")
+  // double-quoted identifiers — keep intact (they're identifiers, not strings)
+  return out
+}
+/**
+ * Validate a SQL payload.
+ *
+ * @param {string} sql
+ * @param {object} [opts]
+ * @param {boolean} [opts.allowSelectOnly] If true, only SELECT is permitted.
+ * @returns {{ ok: true } | { ok: false, error: string, hint?: string }}
+ */
+export function validateSql(sql, opts = {}) {
+  if (typeof sql !== "string" || !sql.trim()) {
+    return { ok: false, error: "Empty SQL payload." }
+  }
+  const cleaned = stripSqlNoise(sql)
+  const upper = cleaned.toUpperCase()
+  // 1. Hard blocks regardless of schema.
+  for (const { re, msg } of FORBIDDEN_TOP_LEVEL) {
+    if (re.test(cleaned)) {
+      return { ok: false, error: msg }
+    }
+  }
+  // 2. allowSelectOnly mode — anything but pure reads is rejected.
+  if (opts.allowSelectOnly) {
+    // Detect any mutating verb at statement boundary.
+    for (const verb of MUTATING_VERBS) {
+      const re = new RegExp(`(^|;|\\s)${verb}\\b`, "i")
+      if (re.test(cleaned)) {
+        return {
+          ok: false,
+          error: `Only SELECT is allowed here — found '${verb}'.`,
+          hint: "For data mutations use execute_sql directly; this wrapper is read-only.",
+        }
+      }
+    }
+    return { ok: true }
+  }
+  // 3. Schema-targeted rules. We look for
+  //    "<VERB> ... <schema>.<ident>" and reject if the schema is protected.
+  //    The verb list is drawn from MUTATING_VERBS so SELECT skips this path.
+  //    This misses a few dynamic-SQL edge cases but catches 99% of real use.
+  const schemaRefs = [...cleaned.matchAll(/\b([a-z_][a-z0-9_]*)\.[a-z_][a-z0-9_"]*/gi)]
+  const mutatingMatch = upper.match(/\b(ALTER|CREATE|DROP|TRUNCATE|INSERT|UPDATE|DELETE|GRANT|REVOKE|RENAME|COMMENT)\b/)
+  if (mutatingMatch) {
+    for (const m of schemaRefs) {
+      const schema = m[1]
+      if (isProtectedSchema(schema)) {
+        return {
+          ok: false,
+          error: `Cannot modify the \`${schema}\` schema — it's managed by DYPAI.`,
+          hint:
+            schema === "auth"
+              ? "To manage users, use the manage_users tool. Read-only SELECTs against auth.* are allowed."
+              : schema === "storage"
+                ? "To manage files, use manage_storage. Read-only SELECTs against storage.* are allowed."
+                : schema === "system"
+                  ? "To manage endpoints, use dypai_push / manage_drafts / manage_users / manage_roles. Read-only SELECTs against system.* are allowed."
+                  : "Read-only SELECTs against this schema are allowed.",
+        }
+      }
+    }
+    // CREATE SCHEMA / ALTER SCHEMA <name> — catch cases where the schema is
+    // the target and not a qualifier.
+    const schemaStmtMatch = cleaned.match(
+      /\b(CREATE|ALTER|DROP)\s+SCHEMA\s+(IF\s+(NOT\s+)?EXISTS\s+)?["']?([A-Za-z_][A-Za-z0-9_]*)["']?/i
+    )
+    if (schemaStmtMatch) {
+      const name = schemaStmtMatch[4]
+      if (isProtectedSchema(name)) {
+        return {
+          ok: false,
+          error: `Cannot ${schemaStmtMatch[1].toUpperCase()} SCHEMA \`${name}\` — reserved by DYPAI.`,
+        }
+      }
+    }
+  }
+  return { ok: true }
+}
+/**
+ * Format a validator failure as a single string error suitable for returning
+ * to the agent (e.g. thrown, returned as isError, etc).
+ */
+export function formatValidationError(v) {
+  if (v.ok) return null
+  return v.hint ? `${v.error}\nHint: ${v.hint}` : v.error
+}

package/src/tools/sync/codec.js CHANGED Viewed

@@ -16,6 +16,7 @@ import {
   pullNodeParams,
   pushNodeParams,
   SQL_INLINE_MAX_CHARS,
+  CODE_INLINE_MAX_CHARS,
   PROMPT_INLINE_MAX_CHARS,
 } from "./transforms.js"
@@ -167,7 +168,7 @@ export function serializeEndpoint(row, mapsCtx) {
   ).length
   const codeNodeCount = rawNodes.filter(n =>
     (n.node_type === "javascript_code" || n.node_type === "python_code") &&
-    n.parameters?.code && n.parameters.code.length > SQL_INLINE_MAX_CHARS
+    n.parameters?.code && n.parameters.code.length > CODE_INLINE_MAX_CHARS
   ).length
   const nodes = rawNodes.map(node => {

package/src/tools/sync/pull.js CHANGED Viewed

@@ -82,7 +82,7 @@ function suspiciousPathWarning(resolvedPath, source) {
 // Subfolders that are always created so the layout is predictable. An agent
 // never has to check "does this folder exist?" before writing a new SQL/prompt/JS file.
-const CANONICAL_SUBDIRS = ["endpoints", "sql", "prompts", "code"]
+const CANONICAL_SUBDIRS = ["endpoints", "sql", "prompts", "code", "migrations"]
 const README_CONTENT = `# dypai/
@@ -92,9 +92,10 @@ Declarative snapshot of your DYPAI project's backend.
 - \`endpoints/\` — one YAML per endpoint (the workflow definition).
   Subfolders represent endpoint groups, e.g. \`endpoints/Admin/foo.yaml\` → group "Admin".
-- \`sql/\` — SQL queries extracted from \`dypai_database\` nodes when longer than 500 chars.
+- \`sql/\` — SQL queries extracted from \`dypai_database\` nodes when longer than 1500 chars.
 - \`prompts/\` — system prompts extracted from \`agent\` nodes when longer than 800 chars.
 - \`code/\` — JavaScript / Python extracted from \`javascript_code\` / \`python_code\` nodes when longer than 500 chars.
+- \`migrations/\` — numbered SQL migrations (\`NNNN_description.sql\`). Apply with \`run_migration\`; tracked in \`system.applied_migrations\` so re-runs are no-ops.
 ## Workflow
@@ -106,6 +107,21 @@ Declarative snapshot of your DYPAI project's backend.
 Paths inside YAML (e.g. \`query_file: sql/create_invoice.sql\`) are always relative
 to this folder's root, regardless of where the YAML lives.
+## Schema changes
+For DDL (CREATE / ALTER / DROP) and structural DML, author a migration file:
+    dypai/migrations/NNNN_description.sql
+Then apply with the \`run_migration\` MCP tool:
+    run_migration({ migration_file: "dypai/migrations/0001_add_orders.sql" })
+The tool tracks applied migrations in \`system.applied_migrations\` so re-runs
+are safe no-ops. For ad-hoc queries and one-off writes, \`execute_sql\` is fine.
+\`execute_sql\` and \`run_migration\` both refuse to modify \`auth\`, \`storage\`,
+\`system\` schemas — those are DYPAI-managed.
 ## Reference examples
 When the project has no endpoints yet, \`dypai_pull\` writes three reference
@@ -240,7 +256,7 @@ workflow:
       # For UPDATE: replace \`insert:\` with \`update: {...}\` + \`where: {...}\`
       # For DELETE: replace \`insert:\` with \`delete: true\` + \`where: {...}\`
       #
-      # Long SQL (>500 chars) inside \`operation: query\` is auto-extracted on
+      # Long SQL (>1500 chars) inside \`operation: query\` is auto-extracted on
       # \`dypai_pull\` to sql/<endpoint>.sql and referenced like:
       #   query_file: sql/insert_order.sql
       # Same for prompts (system_prompt_file) and JS/Python code (code_file).

package/src/tools/sync/transforms.js CHANGED Viewed

@@ -26,7 +26,15 @@
 // Only extract truly large content. Below these thresholds, SQL and prompts
 // stay inline in the YAML so the endpoint is one self-contained file.
 // Exported so codec.js can use the same cutoffs when deciding file naming.
-export const SQL_INLINE_MAX_CHARS = 500
+//
+// SQL: 1500 chars (≈ 40 lines formatted). Typical SELECTs with 2-3 JOINs
+//      stay inline where the rest of the workflow can see them. Only real
+//      multi-CTE queries get extracted.
+// CODE: 500 chars. JS/Python in YAML is painful (escape hell, no linting),
+//      so we extract earlier than SQL.
+// PROMPT: 800 chars. Real system prompts benefit from dedicated .md files.
+export const SQL_INLINE_MAX_CHARS = 1500
+export const CODE_INLINE_MAX_CHARS = 500
 export const PROMPT_INLINE_MAX_CHARS = 800
 const shouldInlineSql = (q) => !q || q.length <= SQL_INLINE_MAX_CHARS
@@ -134,7 +142,7 @@ export const NODE_FIELD_TRANSFORMS = [
     appliesWhen: (nodeType) => nodeType === "javascript_code" || nodeType === "python_code",
     pull(params, ctx) {
       if (!params.code) return {}
-      if (params.code.length <= SQL_INLINE_MAX_CHARS) {
+      if (params.code.length <= CODE_INLINE_MAX_CHARS) {
         // Short code stays inline. Must be re-emitted here because
         // `pullConsumes: ["code"]` deletes it from the base object.
         // Before this fix, short code was silently dropped from the YAML