@dypai-ai/mcp 1.4.5 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,269 @@
1
+ /**
2
+ * run_migration — apply a versioned SQL migration file to the project DB.
3
+ *
4
+ * Thin orchestrator on top of the cloud `execute_script` tool. Adds:
5
+ * - Path resolution + suspicious-path guard (matches dypai_pull/push).
6
+ * - Content checksum (sha256) so re-running the SAME file is a no-op and
7
+ * re-running a CHANGED file surfaces a `checksum_mismatch` instead of
8
+ * silently rewriting history.
9
+ * - Tracking table `system.applied_migrations` (auto-bootstrapped on first
10
+ * use) with (name, checksum, statements, applied_at, applied_by).
11
+ * - Dry-run mode that prints the exact script the tool would submit.
12
+ *
13
+ * What `execute_script` handles (server-side, atomic):
14
+ * - Running the migration + the tracking INSERT inside ONE transaction.
15
+ * If any statement fails, the tracking row never lands → next run retries.
16
+ * - Enforcing the SQL guard (no writes to auth/storage/system, no COPY/LOAD,
17
+ * no SET ROLE / search_path).
18
+ *
19
+ * Convention:
20
+ * dypai/migrations/NNNN_description.sql
21
+ *
22
+ * Out of scope deliberately:
23
+ * - Rollback / down-migrations. Reverting = author a NEW migration.
24
+ * - Cross-project migration sharing. Each project gets its own table.
25
+ */
26
+
27
+ import { readFile, access } from "fs/promises"
28
+ import { createHash } from "crypto"
29
+ import { basename, extname, isAbsolute } from "path"
30
+ import { proxyToolCall } from "./proxy.js"
31
+ import { validateSql, formatValidationError } from "./sql-guard.js"
32
+ import { resolveAndGuard } from "./sync/path-resolver.js"
33
+
34
+ // Bundled "bootstrap + body + tracking INSERT" that goes in one execute_script
35
+ // call. CREATE SCHEMA/TABLE IF NOT EXISTS are idempotent; the INSERT at the
36
+ // end only runs if every previous statement succeeded (single transaction).
37
+ function buildTransactionalScript(name, checksum, statements, body) {
38
+ const safeName = name.replace(/'/g, "''")
39
+ const safeChk = checksum.replace(/'/g, "''")
40
+ return [
41
+ "CREATE SCHEMA IF NOT EXISTS system;",
42
+ `CREATE TABLE IF NOT EXISTS system.applied_migrations (
43
+ name text PRIMARY KEY,
44
+ checksum text NOT NULL,
45
+ statements int NOT NULL,
46
+ applied_at timestamptz NOT NULL DEFAULT now(),
47
+ applied_by text NOT NULL DEFAULT current_user
48
+ );`,
49
+ body.trimEnd().replace(/;?\s*$/, ";"),
50
+ `INSERT INTO system.applied_migrations (name, checksum, statements)
51
+ VALUES ('${safeName}', '${safeChk}', ${Number(statements) || 0});`,
52
+ ].join("\n")
53
+ }
54
+
55
+ function sha256(s) {
56
+ return createHash("sha256").update(s).digest("hex")
57
+ }
58
+
59
+ // Rough statement counter — used for reporting, not control flow. Strips
60
+ // comments + string literals + $tag$ bodies so the count reflects real
61
+ // top-level statements.
62
+ function countStatements(sql) {
63
+ if (!sql) return 0
64
+ let s = sql
65
+ .replace(/--[^\n]*/g, "")
66
+ .replace(/\/\*[\s\S]*?\*\//g, "")
67
+ .replace(/\$([A-Za-z_][A-Za-z0-9_]*)?\$[\s\S]*?\$\1\$/g, "''")
68
+ .replace(/'(?:''|[^'])*'/g, "''")
69
+ return s.split(";").map(x => x.trim()).filter(Boolean).length
70
+ }
71
+
72
+ function normalizeName(file) {
73
+ return basename(file, extname(file)) || file
74
+ }
75
+
76
+ async function pathExists(p) {
77
+ try { await access(p); return true } catch { return false }
78
+ }
79
+
80
+ // Cheap pre-check: can we read the tracking row? If the table doesn't exist
81
+ // yet, the server returns an error; we treat that as "not applied yet" and
82
+ // let execute_script bootstrap the table on its first run. The apply path is
83
+ // still atomic because the INSERT is inside the same transaction.
84
+ async function fetchAppliedRow(project_id, name) {
85
+ const sql = `SELECT name, checksum, statements, applied_at
86
+ FROM system.applied_migrations
87
+ WHERE name = '${name.replace(/'/g, "''")}'
88
+ LIMIT 1`
89
+ const args = project_id ? { project_id, sql } : { sql }
90
+ try {
91
+ const res = await proxyToolCall("execute_sql", args)
92
+ const rows = Array.isArray(res?.rows) ? res.rows : (Array.isArray(res) ? res : [])
93
+ return rows[0] || null
94
+ } catch (e) {
95
+ // Table-missing on the very first run is expected. Any other error we
96
+ // still swallow here — worst case the script tries to INSERT and fails
97
+ // on duplicate PK, which is strictly better than aborting the run.
98
+ if (/applied_migrations.*does not exist/i.test(e.message || "")) return null
99
+ return null
100
+ }
101
+ }
102
+
103
+ export const runMigrationTool = {
104
+ name: "run_migration",
105
+ description:
106
+ "Apply a numbered SQL migration file to the project DB with tracking + idempotency. " +
107
+ "Reads dypai/migrations/<file>, validates it, and submits it as a single transactional " +
108
+ "script via `execute_script` — the migration + the tracking INSERT land atomically, " +
109
+ "so partial application is impossible. Tracks applied migrations in " +
110
+ "system.applied_migrations: running the same file twice is a no-op (`skipped`); " +
111
+ "running a MODIFIED file returns `checksum_mismatch` so you author a new migration " +
112
+ "instead of silently rewriting history.\n\n" +
113
+ "Use this for DDL (CREATE/ALTER/DROP), multi-statement scripts, and any structural " +
114
+ "change that must be atomic. For ad-hoc single queries, use `execute_sql`.\n\n" +
115
+ "File convention: `dypai/migrations/NNNN_description.sql` (e.g. `0001_add_orders.sql`).",
116
+ inputSchema: {
117
+ type: "object",
118
+ properties: {
119
+ project_id: {
120
+ type: "string",
121
+ description: "Project UUID. Optional if your token is project-scoped.",
122
+ },
123
+ migration_file: {
124
+ type: "string",
125
+ description:
126
+ "Path to the .sql file. Either absolute (/Users/me/project/dypai/migrations/0001_x.sql) " +
127
+ "or relative to the workspace root (dypai/migrations/0001_x.sql). Prefer absolute when " +
128
+ "running from an IDE-hosted MCP (cwd may be $HOME).",
129
+ },
130
+ dry_run: {
131
+ type: "boolean",
132
+ description:
133
+ "If true, validate + print the exact script that WOULD be submitted to execute_script " +
134
+ "(including the tracking INSERT) without executing anything. Default false.",
135
+ default: false,
136
+ },
137
+ timeout_seconds: {
138
+ type: "integer",
139
+ description:
140
+ "Per-statement timeout inside the transaction (default 300s, max 1800s). " +
141
+ "Bump this for migrations that rewrite large tables.",
142
+ },
143
+ },
144
+ required: ["migration_file"],
145
+ },
146
+
147
+ async execute({ project_id, migration_file, dry_run = false, timeout_seconds } = {}) {
148
+ if (!migration_file || typeof migration_file !== "string") {
149
+ return { success: false, error: "migration_file is required (string path)." }
150
+ }
151
+
152
+ // Path resolution mirrors dypai_pull: absolute wins, else walk env vars
153
+ // and project markers. Suspicious paths (cwd → $HOME) are rejected with
154
+ // a fix hint instead of silently reading the wrong file.
155
+ let resolved
156
+ if (isAbsolute(migration_file)) {
157
+ resolved = { ok: true, path: migration_file, source: "absolute" }
158
+ } else {
159
+ resolved = resolveAndGuard(migration_file, {
160
+ project_id,
161
+ tool: "run_migration",
162
+ arg_name: "migration_file",
163
+ })
164
+ if (!resolved.ok) return resolved.error
165
+ }
166
+ const filePath = resolved.path
167
+
168
+ if (!(await pathExists(filePath))) {
169
+ return {
170
+ success: false,
171
+ error: `Migration file not found: ${filePath}`,
172
+ hint: "Create dypai/migrations/<NNNN>_<description>.sql and pass that path.",
173
+ }
174
+ }
175
+
176
+ const body = await readFile(filePath, "utf8")
177
+ if (!body.trim()) {
178
+ return { success: false, error: "Migration file is empty." }
179
+ }
180
+
181
+ // Pre-flight locally too — identical checks to the cloud, but failing
182
+ // early saves a network roundtrip and gives a cleaner error shape.
183
+ const v = validateSql(body)
184
+ if (!v.ok) {
185
+ return {
186
+ success: false,
187
+ error: formatValidationError(v),
188
+ resolved_path: filePath,
189
+ }
190
+ }
191
+
192
+ const name = normalizeName(filePath)
193
+ const checksum = sha256(body)
194
+ const statements = countStatements(body)
195
+
196
+ // Check prior state. If the tracking table doesn't exist yet, we treat
197
+ // as "first apply" and let execute_script bootstrap the table.
198
+ const existing = await fetchAppliedRow(project_id, name)
199
+ if (existing) {
200
+ if (existing.checksum !== checksum) {
201
+ return {
202
+ success: false,
203
+ status: "checksum_mismatch",
204
+ error: `Migration '${name}' was already applied but its content has changed.`,
205
+ applied_at: existing.applied_at,
206
+ previous_checksum: existing.checksum,
207
+ current_checksum: checksum,
208
+ hint:
209
+ "Do NOT rewrite an applied migration. Create a new migration that performs " +
210
+ "the additional change (e.g. <NNNN+1>_fix_previous.sql). That preserves the " +
211
+ "audit trail.",
212
+ resolved_path: filePath,
213
+ }
214
+ }
215
+ return {
216
+ success: true,
217
+ status: "skipped",
218
+ name,
219
+ reason: "already_applied",
220
+ applied_at: existing.applied_at,
221
+ statements: existing.statements,
222
+ resolved_path: filePath,
223
+ }
224
+ }
225
+
226
+ const script = buildTransactionalScript(name, checksum, statements, body)
227
+
228
+ if (dry_run) {
229
+ return {
230
+ success: true,
231
+ status: "dry_run",
232
+ name,
233
+ checksum,
234
+ statements,
235
+ resolved_path: filePath,
236
+ would_execute: script,
237
+ }
238
+ }
239
+
240
+ const scriptArgs = { sql: script }
241
+ if (project_id) scriptArgs.project_id = project_id
242
+ if (timeout_seconds != null) scriptArgs.timeout_seconds = timeout_seconds
243
+
244
+ try {
245
+ const res = await proxyToolCall("execute_script", scriptArgs)
246
+ return {
247
+ success: true,
248
+ status: "applied",
249
+ name,
250
+ checksum,
251
+ statements,
252
+ resolved_path: filePath,
253
+ duration_ms: res?.duration_ms,
254
+ message: `Migration '${name}' applied (${statements} statement${statements === 1 ? "" : "s"}).`,
255
+ }
256
+ } catch (e) {
257
+ return {
258
+ success: false,
259
+ status: "failed",
260
+ name,
261
+ error: e.message,
262
+ resolved_path: filePath,
263
+ hint:
264
+ "The migration was rolled back — nothing was applied. Fix the error and re-run; " +
265
+ "idempotency still holds.",
266
+ }
267
+ }
268
+ },
269
+ }
@@ -0,0 +1,151 @@
1
+ /**
2
+ * maybeOffloadSearchLogs — keep `search_logs` responses from blowing up the
3
+ * agent's context window.
4
+ *
5
+ * `search_logs` (especially with `include_trace=true`) can return hundreds of
6
+ * KB of JSON. Inlining that into the tool-result `text` field forces the model
7
+ * to load the whole payload into context, which is wasteful for the typical
8
+ * "show me what failed and let me drill into one of them" workflow.
9
+ *
10
+ * Strategy:
11
+ * - If the serialized response exceeds OFFLOAD_THRESHOLD_BYTES, write the
12
+ * full JSON to a temp file and return a compact summary that includes:
13
+ * · the absolute file path (so the agent can `Read` it on demand)
14
+ * · counts by level/type/environment
15
+ * · the first 5 items, trace-stripped
16
+ * - Otherwise, return the response unchanged.
17
+ *
18
+ * The offload threshold is intentionally loose (~60 KB). A normal search
19
+ * without `include_trace` is well under that and stays inline.
20
+ */
21
+
22
+ import fs from "fs"
23
+ import os from "os"
24
+ import path from "path"
25
+
26
+ // ~60 KB. Claude/GPT swallow this comfortably, but full traces (200-500 KB)
27
+ // are forced to disk so the agent can consume them selectively.
28
+ const OFFLOAD_THRESHOLD_BYTES = 60 * 1024
29
+
30
+ // Keep the on-disk dir manageable: prune files older than this on every
31
+ // offload. Cheap because it only runs when we actually offload.
32
+ const FILE_TTL_MS = 24 * 60 * 60 * 1000 // 24h
33
+
34
+ const OFFLOAD_DIR = path.join(os.tmpdir(), "dypai-mcp-search-logs")
35
+
36
+ function ensureDir() {
37
+ try {
38
+ fs.mkdirSync(OFFLOAD_DIR, { recursive: true })
39
+ } catch {
40
+ /* race-safe; mkdirSync with recursive doesn't throw on existing dirs */
41
+ }
42
+ }
43
+
44
+ function pruneOldFiles() {
45
+ try {
46
+ const cutoff = Date.now() - FILE_TTL_MS
47
+ for (const name of fs.readdirSync(OFFLOAD_DIR)) {
48
+ const full = path.join(OFFLOAD_DIR, name)
49
+ try {
50
+ const stat = fs.statSync(full)
51
+ if (stat.mtimeMs < cutoff) fs.unlinkSync(full)
52
+ } catch { /* ignore individual file errors */ }
53
+ }
54
+ } catch { /* ignore — best-effort housekeeping */ }
55
+ }
56
+
57
+ function lightItem(item) {
58
+ // Drop the heavy `trace` field from each item for the inline summary.
59
+ // Everything else stays so the agent can decide which one to drill into.
60
+ if (!item || typeof item !== "object") return item
61
+ const { trace, ...rest } = item
62
+ return trace ? { ...rest, trace_omitted: true } : rest
63
+ }
64
+
65
+ function bucket(items, key) {
66
+ const out = {}
67
+ for (const it of items) {
68
+ const v = it && it[key] != null ? String(it[key]) : "null"
69
+ out[v] = (out[v] || 0) + 1
70
+ }
71
+ return out
72
+ }
73
+
74
+ /**
75
+ * Returns either the original `result` (small enough to inline) OR a compact
76
+ * summary object that points at a temp file holding the full JSON.
77
+ *
78
+ * Never throws — on any FS error it falls back to returning the original
79
+ * payload so the agent at least gets the data, even if it's big.
80
+ */
81
+ export function maybeOffloadSearchLogs(result) {
82
+ if (!result || typeof result !== "object" || !Array.isArray(result.items)) {
83
+ return result
84
+ }
85
+
86
+ let serialized
87
+ try {
88
+ serialized = JSON.stringify(result, null, 2)
89
+ } catch {
90
+ return result
91
+ }
92
+
93
+ if (Buffer.byteLength(serialized, "utf8") <= OFFLOAD_THRESHOLD_BYTES) {
94
+ return result
95
+ }
96
+
97
+ try {
98
+ ensureDir()
99
+ pruneOldFiles()
100
+
101
+ const ts = new Date().toISOString().replace(/[:.]/g, "-")
102
+ const rand = Math.random().toString(36).slice(2, 8)
103
+ const filePath = path.join(OFFLOAD_DIR, `search-logs-${ts}-${rand}.json`)
104
+ fs.writeFileSync(filePath, serialized, "utf8")
105
+
106
+ const sizeBytes = Buffer.byteLength(serialized, "utf8")
107
+ const sizeKb = Math.round(sizeBytes / 1024)
108
+ const items = result.items
109
+ const firstFive = items.slice(0, 5).map(lightItem)
110
+
111
+ return {
112
+ offloaded_to_file: true,
113
+ file_path: filePath,
114
+ size_bytes: sizeBytes,
115
+ guidance: (
116
+ `Response was too large to inline (${sizeKb} KB > 60 KB threshold). ` +
117
+ `Full JSON written to disk — open it with the Read tool when you want ` +
118
+ `to inspect a specific item or its trace:\n Read("${filePath}")\n\n` +
119
+ `The summary below covers the whole result. Only read the file if you ` +
120
+ `need fields beyond the first 5 items or any 'trace' contents.`
121
+ ),
122
+ summary: {
123
+ total_returned: items.length,
124
+ by_level: bucket(items, "level"),
125
+ by_type: bucket(items, "type"),
126
+ by_environment: bucket(items, "environment"),
127
+ first_5: firstFive,
128
+ },
129
+ filters: {
130
+ project_id: result.project_id,
131
+ since: result.since,
132
+ level: result.level,
133
+ environment: result.environment,
134
+ endpoint: result.endpoint,
135
+ query: result.query,
136
+ include_trace: result.include_trace,
137
+ },
138
+ // Mirror the upstream guidance so the agent doesn't lose it.
139
+ upstream_guidance: result.guidance,
140
+ }
141
+ } catch (err) {
142
+ // Disk full / permissions / whatever — just return the original. The
143
+ // agent's context will take a hit but the data still gets through.
144
+ return {
145
+ ...result,
146
+ offload_warning: `Could not write large payload to disk: ${err.message}`,
147
+ }
148
+ }
149
+ }
150
+
151
+ export const _internals = { OFFLOAD_THRESHOLD_BYTES, OFFLOAD_DIR }
@@ -0,0 +1,164 @@
1
+ /**
2
+ * Pre-flight SQL validator.
3
+ *
4
+ * Parses a SQL payload and rejects DDL/DML that targets schemas the agent
5
+ * should never touch (auth, storage, system, internal Timescale, pg_*).
6
+ * Reads (SELECT) are always allowed so the agent can introspect freely.
7
+ *
8
+ * This is UX, not security. The real guarantee belongs to the engine's
9
+ * Postgres role (see backend provisioning: dypai_agent role grants
10
+ * CREATE/ALTER/DROP/INSERT/UPDATE/DELETE only on public.*). What we do
11
+ * here is fail earlier with a message that tells the agent how to recover,
12
+ * instead of letting it hit a raw "permission denied" from Postgres.
13
+ *
14
+ * Used by:
15
+ * - execute_sql proxy hop in index.js (before calling the remote)
16
+ * - run_migration local tool (validates the whole file before applying)
17
+ */
18
+
19
+ // Schemas the agent must not modify. SELECT against them stays allowed.
20
+ const PROTECTED_SCHEMAS = new Set([
21
+ "auth", // better-auth user/session tables — engine-owned
22
+ "storage", // file upload metadata — engine-owned
23
+ "system", // DYPAI internals (endpoints, credentials, etc.)
24
+ "information_schema", // PG metadata catalog
25
+ ])
26
+
27
+ // Schema prefixes (partial match). Used for Timescale / PG internals.
28
+ const PROTECTED_SCHEMA_PREFIXES = ["_timescaledb", "pg_"]
29
+
30
+ // DDL/DML verbs that touch schema contents. SELECT is intentionally absent.
31
+ const MUTATING_VERBS = new Set([
32
+ "ALTER", "CREATE", "DROP", "TRUNCATE",
33
+ "INSERT", "UPDATE", "DELETE",
34
+ "GRANT", "REVOKE",
35
+ "COMMENT", "RENAME",
36
+ ])
37
+
38
+ // Statements that are flatly forbidden for the agent regardless of target.
39
+ const FORBIDDEN_TOP_LEVEL = [
40
+ { re: /\bALTER\s+SYSTEM\b/i, msg: "ALTER SYSTEM is superuser-only and never allowed from the agent." },
41
+ { re: /\bCREATE\s+EXTENSION\b/i, msg: "CREATE EXTENSION requires superuser. Ask DYPAI support if you need a new extension." },
42
+ { re: /\bDROP\s+EXTENSION\b/i, msg: "DROP EXTENSION requires superuser." },
43
+ { re: /\bCREATE\s+(USER|ROLE)\b/i, msg: "Managing DB roles is the engine's job. Use manage_users for app users." },
44
+ { re: /\bDROP\s+(USER|ROLE)\b/i, msg: "Managing DB roles is the engine's job. Use manage_users for app users." },
45
+ { re: /\bCOPY\b/i, msg: "COPY to/from the filesystem is not available from the agent. Use bulk_upsert or a DO block with INSERT." },
46
+ ]
47
+
48
+ function isProtectedSchema(name) {
49
+ const s = name.toLowerCase()
50
+ if (PROTECTED_SCHEMAS.has(s)) return true
51
+ return PROTECTED_SCHEMA_PREFIXES.some(p => s.startsWith(p))
52
+ }
53
+
54
+ // Strip comments + string literals so schema-name matches only hit real code.
55
+ // Preserves length/structure by replacing with whitespace — keeps line numbers
56
+ // roughly sane if we ever want to surface them.
57
+ function stripSqlNoise(sql) {
58
+ if (!sql) return ""
59
+ let out = sql
60
+ // -- line comments
61
+ out = out.replace(/--[^\n]*/g, "")
62
+ // /* block comments */
63
+ out = out.replace(/\/\*[\s\S]*?\*\//g, "")
64
+ // $tag$ ... $tag$ quoted PL/pgSQL bodies (so keywords inside functions don't
65
+ // fool the validator into rejecting the statement).
66
+ out = out.replace(/\$([A-Za-z_][A-Za-z0-9_]*)?\$[\s\S]*?\$\1\$/g, "")
67
+ // single-quoted strings
68
+ out = out.replace(/'(?:''|[^'])*'/g, "''")
69
+ // double-quoted identifiers — keep intact (they're identifiers, not strings)
70
+ return out
71
+ }
72
+
73
+ /**
74
+ * Validate a SQL payload.
75
+ *
76
+ * @param {string} sql
77
+ * @param {object} [opts]
78
+ * @param {boolean} [opts.allowSelectOnly] If true, only SELECT is permitted.
79
+ * @returns {{ ok: true } | { ok: false, error: string, hint?: string }}
80
+ */
81
+ export function validateSql(sql, opts = {}) {
82
+ if (typeof sql !== "string" || !sql.trim()) {
83
+ return { ok: false, error: "Empty SQL payload." }
84
+ }
85
+
86
+ const cleaned = stripSqlNoise(sql)
87
+ const upper = cleaned.toUpperCase()
88
+
89
+ // 1. Hard blocks regardless of schema.
90
+ for (const { re, msg } of FORBIDDEN_TOP_LEVEL) {
91
+ if (re.test(cleaned)) {
92
+ return { ok: false, error: msg }
93
+ }
94
+ }
95
+
96
+ // 2. allowSelectOnly mode — anything but pure reads is rejected.
97
+ if (opts.allowSelectOnly) {
98
+ // Detect any mutating verb at statement boundary.
99
+ for (const verb of MUTATING_VERBS) {
100
+ const re = new RegExp(`(^|;|\\s)${verb}\\b`, "i")
101
+ if (re.test(cleaned)) {
102
+ return {
103
+ ok: false,
104
+ error: `Only SELECT is allowed here — found '${verb}'.`,
105
+ hint: "For data mutations use execute_sql directly; this wrapper is read-only.",
106
+ }
107
+ }
108
+ }
109
+ return { ok: true }
110
+ }
111
+
112
+ // 3. Schema-targeted rules. We look for
113
+ // "<VERB> ... <schema>.<ident>" and reject if the schema is protected.
114
+ // The verb list is drawn from MUTATING_VERBS so SELECT skips this path.
115
+ // This misses a few dynamic-SQL edge cases but catches 99% of real use.
116
+ const schemaRefs = [...cleaned.matchAll(/\b([a-z_][a-z0-9_]*)\.[a-z_][a-z0-9_"]*/gi)]
117
+ const mutatingMatch = upper.match(/\b(ALTER|CREATE|DROP|TRUNCATE|INSERT|UPDATE|DELETE|GRANT|REVOKE|RENAME|COMMENT)\b/)
118
+
119
+ if (mutatingMatch) {
120
+ for (const m of schemaRefs) {
121
+ const schema = m[1]
122
+ if (isProtectedSchema(schema)) {
123
+ return {
124
+ ok: false,
125
+ error: `Cannot modify the \`${schema}\` schema — it's managed by DYPAI.`,
126
+ hint:
127
+ schema === "auth"
128
+ ? "To manage users, use the manage_users tool. Read-only SELECTs against auth.* are allowed."
129
+ : schema === "storage"
130
+ ? "To manage files, use manage_storage. Read-only SELECTs against storage.* are allowed."
131
+ : schema === "system"
132
+ ? "To manage endpoints, use dypai_push / manage_drafts / manage_users / manage_roles. Read-only SELECTs against system.* are allowed."
133
+ : "Read-only SELECTs against this schema are allowed.",
134
+ }
135
+ }
136
+ }
137
+
138
+ // CREATE SCHEMA / ALTER SCHEMA <name> — catch cases where the schema is
139
+ // the target and not a qualifier.
140
+ const schemaStmtMatch = cleaned.match(
141
+ /\b(CREATE|ALTER|DROP)\s+SCHEMA\s+(IF\s+(NOT\s+)?EXISTS\s+)?["']?([A-Za-z_][A-Za-z0-9_]*)["']?/i
142
+ )
143
+ if (schemaStmtMatch) {
144
+ const name = schemaStmtMatch[4]
145
+ if (isProtectedSchema(name)) {
146
+ return {
147
+ ok: false,
148
+ error: `Cannot ${schemaStmtMatch[1].toUpperCase()} SCHEMA \`${name}\` — reserved by DYPAI.`,
149
+ }
150
+ }
151
+ }
152
+ }
153
+
154
+ return { ok: true }
155
+ }
156
+
157
+ /**
158
+ * Format a validator failure as a single string error suitable for returning
159
+ * to the agent (e.g. thrown, returned as isError, etc).
160
+ */
161
+ export function formatValidationError(v) {
162
+ if (v.ok) return null
163
+ return v.hint ? `${v.error}\nHint: ${v.hint}` : v.error
164
+ }
@@ -16,6 +16,7 @@ import {
16
16
  pullNodeParams,
17
17
  pushNodeParams,
18
18
  SQL_INLINE_MAX_CHARS,
19
+ CODE_INLINE_MAX_CHARS,
19
20
  PROMPT_INLINE_MAX_CHARS,
20
21
  } from "./transforms.js"
21
22
 
@@ -167,7 +168,7 @@ export function serializeEndpoint(row, mapsCtx) {
167
168
  ).length
168
169
  const codeNodeCount = rawNodes.filter(n =>
169
170
  (n.node_type === "javascript_code" || n.node_type === "python_code") &&
170
- n.parameters?.code && n.parameters.code.length > SQL_INLINE_MAX_CHARS
171
+ n.parameters?.code && n.parameters.code.length > CODE_INLINE_MAX_CHARS
171
172
  ).length
172
173
 
173
174
  const nodes = rawNodes.map(node => {
@@ -82,7 +82,7 @@ function suspiciousPathWarning(resolvedPath, source) {
82
82
 
83
83
  // Subfolders that are always created so the layout is predictable. An agent
84
84
  // never has to check "does this folder exist?" before writing a new SQL/prompt/JS file.
85
- const CANONICAL_SUBDIRS = ["endpoints", "sql", "prompts", "code"]
85
+ const CANONICAL_SUBDIRS = ["endpoints", "sql", "prompts", "code", "migrations"]
86
86
 
87
87
  const README_CONTENT = `# dypai/
88
88
 
@@ -92,9 +92,10 @@ Declarative snapshot of your DYPAI project's backend.
92
92
 
93
93
  - \`endpoints/\` — one YAML per endpoint (the workflow definition).
94
94
  Subfolders represent endpoint groups, e.g. \`endpoints/Admin/foo.yaml\` → group "Admin".
95
- - \`sql/\` — SQL queries extracted from \`dypai_database\` nodes when longer than 500 chars.
95
+ - \`sql/\` — SQL queries extracted from \`dypai_database\` nodes when longer than 1500 chars.
96
96
  - \`prompts/\` — system prompts extracted from \`agent\` nodes when longer than 800 chars.
97
97
  - \`code/\` — JavaScript / Python extracted from \`javascript_code\` / \`python_code\` nodes when longer than 500 chars.
98
+ - \`migrations/\` — numbered SQL migrations (\`NNNN_description.sql\`). Apply with \`run_migration\`; tracked in \`system.applied_migrations\` so re-runs are no-ops.
98
99
 
99
100
  ## Workflow
100
101
 
@@ -106,6 +107,21 @@ Declarative snapshot of your DYPAI project's backend.
106
107
  Paths inside YAML (e.g. \`query_file: sql/create_invoice.sql\`) are always relative
107
108
  to this folder's root, regardless of where the YAML lives.
108
109
 
110
+ ## Schema changes
111
+
112
+ For DDL (CREATE / ALTER / DROP) and structural DML, author a migration file:
113
+
114
+ dypai/migrations/NNNN_description.sql
115
+
116
+ Then apply with the \`run_migration\` MCP tool:
117
+
118
+ run_migration({ migration_file: "dypai/migrations/0001_add_orders.sql" })
119
+
120
+ The tool tracks applied migrations in \`system.applied_migrations\` so re-runs
121
+ are safe no-ops. For ad-hoc queries and one-off writes, \`execute_sql\` is fine.
122
+ \`execute_sql\` and \`run_migration\` both refuse to modify \`auth\`, \`storage\`,
123
+ \`system\` schemas — those are DYPAI-managed.
124
+
109
125
  ## Reference examples
110
126
 
111
127
  When the project has no endpoints yet, \`dypai_pull\` writes three reference
@@ -240,7 +256,7 @@ workflow:
240
256
  # For UPDATE: replace \`insert:\` with \`update: {...}\` + \`where: {...}\`
241
257
  # For DELETE: replace \`insert:\` with \`delete: true\` + \`where: {...}\`
242
258
  #
243
- # Long SQL (>500 chars) inside \`operation: query\` is auto-extracted on
259
+ # Long SQL (>1500 chars) inside \`operation: query\` is auto-extracted on
244
260
  # \`dypai_pull\` to sql/<endpoint>.sql and referenced like:
245
261
  # query_file: sql/insert_order.sql
246
262
  # Same for prompts (system_prompt_file) and JS/Python code (code_file).
@@ -26,7 +26,15 @@
26
26
  // Only extract truly large content. Below these thresholds, SQL and prompts
27
27
  // stay inline in the YAML so the endpoint is one self-contained file.
28
28
  // Exported so codec.js can use the same cutoffs when deciding file naming.
29
- export const SQL_INLINE_MAX_CHARS = 500
29
+ //
30
+ // SQL: 1500 chars (≈ 40 lines formatted). Typical SELECTs with 2-3 JOINs
31
+ // stay inline where the rest of the workflow can see them. Only real
32
+ // multi-CTE queries get extracted.
33
+ // CODE: 500 chars. JS/Python in YAML is painful (escape hell, no linting),
34
+ // so we extract earlier than SQL.
35
+ // PROMPT: 800 chars. Real system prompts benefit from dedicated .md files.
36
+ export const SQL_INLINE_MAX_CHARS = 1500
37
+ export const CODE_INLINE_MAX_CHARS = 500
30
38
  export const PROMPT_INLINE_MAX_CHARS = 800
31
39
 
32
40
  const shouldInlineSql = (q) => !q || q.length <= SQL_INLINE_MAX_CHARS
@@ -134,7 +142,7 @@ export const NODE_FIELD_TRANSFORMS = [
134
142
  appliesWhen: (nodeType) => nodeType === "javascript_code" || nodeType === "python_code",
135
143
  pull(params, ctx) {
136
144
  if (!params.code) return {}
137
- if (params.code.length <= SQL_INLINE_MAX_CHARS) {
145
+ if (params.code.length <= CODE_INLINE_MAX_CHARS) {
138
146
  // Short code stays inline. Must be re-emitted here because
139
147
  // `pullConsumes: ["code"]` deletes it from the base object.
140
148
  // Before this fix, short code was silently dropped from the YAML