@dypai-ai/mcp 1.4.3 → 1.4.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -188,6 +188,25 @@ function extractPlaceholders(s) {
188
188
  return out
189
189
  }
190
190
 
191
+ /**
192
+ * Extract the first identifier (a-z, A-Z, _, 0-9 — JS-ident shape) from the
193
+ * head of an expression, ignoring any trailing template filter or coalesce
194
+ * operator. Examples:
195
+ * "input.limit | default(100)" → "input.limit"
196
+ * "input.page ?? 1" → "input.page"
197
+ * "nodes.foo.bar || 'x'" → "nodes.foo.bar"
198
+ * "current_user_id | trim" → "current_user_id"
199
+ * Returns the trimmed/cleaned head (NOT just the leaf identifier — keeps
200
+ * dotted paths intact so callers that split on '.' still work).
201
+ */
202
+ function stripExprTail(expr) {
203
+ // Cut at the first character that can't be part of a path/identifier:
204
+ // whitespace, pipe (jinja-style filter), `?`, or `|`. Bracket access
205
+ // (e.g. items[0]) is preserved — callers split on '[' if they need to.
206
+ const m = /^[\s]*([A-Za-z_$][A-Za-z_$0-9.\[\]]*)/.exec(expr)
207
+ return m ? m[1] : expr.trim()
208
+ }
209
+
191
210
  /** Minimal Levenshtein distance, caps at 3 for "did you mean" typo suggestions. */
192
211
  function levenshteinSmall(a, b) {
193
212
  if (a === b) return 0
@@ -223,12 +242,53 @@ async function readSchemaTables(rootDir) {
223
242
  /** Extract referenced table names from a SQL string: `FROM public.X`, `JOIN public.X`, `INTO public.X`, `UPDATE public.X`. */
224
243
  function extractSqlTables(sql) {
225
244
  const tables = new Set()
226
- const re = /(?:FROM|JOIN|INTO|UPDATE)\s+public\.(\w+)/gi
245
+ if (typeof sql !== "string" || sql.length === 0) return tables
246
+ // Strip comments and string literals so words inside them are not mistaken
247
+ // for table refs (e.g. `'JOIN ...'` inside a string column, or `-- FROM x`).
248
+ const clean = sql
249
+ .replace(/--[^\n]*/g, " ")
250
+ .replace(/\/\*[\s\S]*?\*\//g, " ")
251
+ .replace(/'(?:[^']|'')*'/g, "''")
252
+ // Single regex that captures BOTH a possible schema and the table name,
253
+ // optionally preceded by `ONLY` (Postgres inheritance modifier). Splitting
254
+ // schema/table into separate groups makes filtering by schema trivial.
255
+ // FROM x | FROM "x" | FROM <schema>.x | FROM "<schema>"."x" | FROM ONLY <schema>.x
256
+ const re = /(?:FROM|JOIN|INTO|UPDATE)\s+(?:ONLY\s+)?(?:"?([a-zA-Z_]\w*)"?\s*\.\s*)?(?:"([a-zA-Z_]\w*)"|([a-zA-Z_]\w*))/gi
227
257
  let m
228
- while ((m = re.exec(sql)) !== null) tables.add(m[1])
258
+ while ((m = re.exec(clean)) !== null) {
259
+ const schema = m[1] // undefined if no schema given
260
+ const tableName = m[2] || m[3]
261
+ if (!tableName) continue
262
+ // Skip Postgres modifiers that would accidentally land here as `tableName`
263
+ // when the regex was greedy enough (defensive — the optional `ONLY` above
264
+ // already handles the common case).
265
+ if (SQL_KEYWORDS_AFTER_FROM.has(tableName.toUpperCase())) continue
266
+ // Only validate tables in the user-managed `public` schema. System
267
+ // schemas (auth, system, ext, pg_catalog, information_schema) are
268
+ // managed by the engine and not present in dypai/schema.sql.
269
+ if (schema && schema.toLowerCase() !== "public") continue
270
+ tables.add(tableName)
271
+ }
229
272
  return tables
230
273
  }
231
274
 
275
+ const SQL_KEYWORDS_AFTER_FROM = new Set([
276
+ "ONLY", "LATERAL", "SELECT", "VALUES", "TABLE",
277
+ ])
278
+
279
+ // Legacy `dypai_database.operation` values that perform writes. New code
280
+ // should use `operation: mutation`, but old YAMLs may still use these and
281
+ // the engine still accepts them.
282
+ const LEGACY_WRITE_OPS = new Set(["insert", "update", "delete", "upsert"])
283
+
284
+ // Legacy ops whose SQL lives in `node.query` (custom_query, raw select).
285
+ const LEGACY_OPS_THAT_USE_QUERY = new Set(["custom_query", "select"])
286
+
287
+ // Legacy ops whose target table is in `node.table` (no SQL string).
288
+ const LEGACY_OPS_THAT_USE_TABLE_FIELD = new Set([
289
+ "select", "insert", "update", "delete", "upsert", "aggregate",
290
+ ])
291
+
232
292
  // ─── Rules ──────────────────────────────────────────────────────────────────
233
293
 
234
294
  function ruleUsesJwt(trigger) {
@@ -259,11 +319,22 @@ function validateEndpoint(entry, ctx) {
259
319
  // Collect SQL tables referenced (before checking each individually)
260
320
  const referencedTables = new Set()
261
321
 
322
+ // Aggregate missing input.X refs across the whole endpoint so we emit ONE
323
+ // diagnostic per endpoint instead of N (an endpoint with 11 stray refs
324
+ // produces 11 near-identical errors otherwise — pure noise). Map keeps
325
+ // first-seen `loc` for context. Iteration order of Map preserves insertion.
326
+ const missingInputProps = new Map() // propName -> { loc, expr }
327
+ const missingNodeRefs = new Map() // nodeId -> { loc, expr } — same idea for ${nodes.X.Y}
328
+
262
329
  for (const { source, loc, value } of sources) {
263
330
  // --- Placeholder checks ---
264
331
  for (const expr of extractPlaceholders(value)) {
265
- // Strip leading/trailing whitespace in the expression
266
- const e = expr.trim()
332
+ // Normalize: trim whitespace AND strip any template tail like
333
+ // ` | default(100)`, ` ?? 1`, ` || 'x'` — we only care about the
334
+ // path/identifier head. Without this, `${input.limit | default(100)}`
335
+ // would be parsed as property name "limit | default(100)" → false
336
+ // positive `input_placeholder_missing`.
337
+ const e = stripExprTail(expr)
267
338
 
268
339
  // ${input.X} or ${input.X.Y}
269
340
  // Only validate against the input schema if one is declared; DYPAI allows
@@ -273,13 +344,9 @@ function validateEndpoint(entry, ctx) {
273
344
  const first = e.slice(6).split(/[.\[]/)[0]
274
345
  const hasSchema = Object.keys(inputProps).length > 0
275
346
  if (hasSchema && !inputProps[first]) {
276
- diagnostics.push({
277
- severity: "error",
278
- rule: "input_placeholder_missing",
279
- endpoint: name, file, loc,
280
- message: `\${${expr}} references input.${first}, but the endpoint's input schema has no '${first}' property.`,
281
- fix_hint: `Valid properties: ${Object.keys(inputProps).join(", ")}`,
282
- })
347
+ if (!missingInputProps.has(first)) {
348
+ missingInputProps.set(first, { loc, expr })
349
+ }
283
350
  } else if (!hasSchema) {
284
351
  // One warning per endpoint max — accumulate in a set
285
352
  ctx.schemaless ??= new Set()
@@ -300,15 +367,9 @@ function validateEndpoint(entry, ctx) {
300
367
  else if (e.startsWith("nodes.")) {
301
368
  const nodeId = e.slice(6).split(/[.\[]/)[0]
302
369
  if (!nodeIds.has(nodeId)) {
303
- diagnostics.push({
304
- severity: "error",
305
- rule: "node_ref_missing",
306
- endpoint: name, file, loc,
307
- message: `\${${expr}} references node '${nodeId}' but that node is not declared in this workflow.`,
308
- fix_hint: nodeIds.size
309
- ? `Known nodes: ${[...nodeIds].join(", ")}`
310
- : "This endpoint has no nodes yet.",
311
- })
370
+ if (!missingNodeRefs.has(nodeId)) {
371
+ missingNodeRefs.set(nodeId, { loc, expr })
372
+ }
312
373
  }
313
374
  }
314
375
 
@@ -326,16 +387,73 @@ function validateEndpoint(entry, ctx) {
326
387
  }
327
388
  }
328
389
 
329
- // --- SQL: collect referenced tables for later comparison against schema.sql ---
330
- // Heuristic: look like SQL (contains SELECT/INSERT/UPDATE/DELETE/WITH)
331
- if (/\b(SELECT|INSERT|UPDATE|DELETE|WITH)\b/i.test(value)) {
332
- for (const t of extractSqlTables(value)) referencedTables.add(t)
390
+ // NOTE: SQL table extraction used to live here (anywhere a string looked
391
+ // like SQL). That generated false positives whenever a prompt, comment,
392
+ // or label happened to contain words like "INSERT" or "SELECT". Table
393
+ // extraction is now done ONLY inside the per-node loop below, restricted
394
+ // to dypai_database nodes' actual SQL fields. See the dedicated block.
395
+ }
396
+
397
+ // --- Emit aggregated input_placeholder_missing (one diag per endpoint) ---
398
+ // Single agg ergonomically beats N near-identical diagnostics: gives the
399
+ // agent ONE actionable item ("add these to input.properties OR drop these
400
+ // refs") instead of a wall of similar errors that visually drown the others.
401
+ if (missingInputProps.size > 0) {
402
+ const propNames = [...missingInputProps.keys()]
403
+ const validProps = Object.keys(inputProps)
404
+ if (propNames.length === 1) {
405
+ const [first] = propNames
406
+ const { loc, expr } = missingInputProps.get(first)
407
+ diagnostics.push({
408
+ severity: "error",
409
+ rule: "input_placeholder_missing",
410
+ endpoint: name, file, loc,
411
+ message: `\${${expr}} references input.${first}, but the endpoint's input schema has no '${first}' property.`,
412
+ fix_hint: `Valid properties: ${validProps.join(", ") || "(none declared)"}`,
413
+ })
414
+ } else {
415
+ // Aggregated form for endpoints with multiple stray refs.
416
+ const firstLoc = missingInputProps.values().next().value?.loc
417
+ diagnostics.push({
418
+ severity: "error",
419
+ rule: "input_placeholder_missing",
420
+ endpoint: name, file, loc: firstLoc,
421
+ message:
422
+ `Endpoint references ${propNames.length} input properties not declared in the input schema: ` +
423
+ `${propNames.join(", ")}.`,
424
+ fix_hint:
425
+ `Either add them to input.properties (so request validation lets them through) ` +
426
+ `OR remove the \${input.X} references that the workflow no longer uses. ` +
427
+ `Currently declared: ${validProps.join(", ") || "(none)"}.`,
428
+ })
429
+ }
430
+ }
333
431
 
334
- // NOTE: an earlier version of this validator warned about manual
335
- // `'${current_user_id}'::uuid` casts as "redundant", under the assumption
336
- // that the engine auto-cast UUID-shaped values. That auto-cast was
337
- // removed because it broke postgres.js binding in production. Manual
338
- // ::uuid casts are now legitimate again, so no warning is emitted here.
432
+ // Same aggregation for ${nodes.X.Y} where X is not declared.
433
+ if (missingNodeRefs.size > 0) {
434
+ const nodeIdsMissing = [...missingNodeRefs.keys()]
435
+ const known = nodeIds.size ? `Known nodes: ${[...nodeIds].join(", ")}` : "This endpoint has no nodes yet."
436
+ if (nodeIdsMissing.length === 1) {
437
+ const [first] = nodeIdsMissing
438
+ const { loc, expr } = missingNodeRefs.get(first)
439
+ diagnostics.push({
440
+ severity: "error",
441
+ rule: "node_ref_missing",
442
+ endpoint: name, file, loc,
443
+ message: `\${${expr}} references node '${first}' but that node is not declared in this workflow.`,
444
+ fix_hint: known,
445
+ })
446
+ } else {
447
+ const firstLoc = missingNodeRefs.values().next().value?.loc
448
+ diagnostics.push({
449
+ severity: "error",
450
+ rule: "node_ref_missing",
451
+ endpoint: name, file, loc: firstLoc,
452
+ message:
453
+ `Endpoint references ${nodeIdsMissing.length} nodes that are not declared: ` +
454
+ `${nodeIdsMissing.join(", ")}.`,
455
+ fix_hint: known,
456
+ })
339
457
  }
340
458
  }
341
459
 
@@ -387,6 +505,30 @@ function validateEndpoint(entry, ctx) {
387
505
  // dypai_database — coherence checks for the new canonical operations.
388
506
  if (nodeType === "dypai_database") {
389
507
  const op = node.operation
508
+
509
+ // Extract referenced tables from real SQL fields ONLY. Doing this here
510
+ // (instead of in the generic walkStrings pass) eliminates the class of
511
+ // false positive where a prompt/comment/label happens to contain words
512
+ // like "INSERT" or "SELECT". Also covers `mutation` (table: <name>)
513
+ // since that's a guaranteed table reference.
514
+ if (op === "query" || (op && LEGACY_OPS_THAT_USE_QUERY.has(op))) {
515
+ const sqlText = typeof node.query === "string" ? node.query : ""
516
+ if (sqlText) {
517
+ for (const t of extractSqlTables(sqlText)) referencedTables.add(t)
518
+ }
519
+ }
520
+ if (op === "mutation" && typeof node.table === "string") {
521
+ referencedTables.add(node.table)
522
+ }
523
+ // Legacy ops like `select` / `insert` / `update` / `delete` use `table:`
524
+ // as the target table directly.
525
+ if (op && LEGACY_OPS_THAT_USE_TABLE_FIELD.has(op) && typeof node.table === "string") {
526
+ referencedTables.add(node.table)
527
+ }
528
+ // Resolved query_file content also counts as SQL (loaded by the codec).
529
+ if (typeof node.query === "string" && node.query.length > 0) {
530
+ for (const t of extractSqlTables(node.query)) referencedTables.add(t)
531
+ }
390
532
  const LEGACY_OPS = new Set(["select", "insert", "update", "delete", "upsert", "aggregate", "copy_to", "custom_query"])
391
533
  if (op && LEGACY_OPS.has(op)) {
392
534
  const suggested = (op === "custom_query") ? "query" : (op === "select") ? "query" : "mutation"
@@ -437,14 +579,26 @@ function validateEndpoint(entry, ctx) {
437
579
  fix_hint: `Pick exactly one: insert OR update OR delete (split into separate nodes if you need more).`,
438
580
  })
439
581
  }
440
- if ((wantsUpdate || wantsDelete) && !node.where) {
441
- diagnostics.push({
442
- severity: "error",
443
- rule: "mutation_missing_where",
444
- endpoint: name, file, loc: `workflow.nodes[${node.id}]`,
445
- message: `Node '${node.id}' (mutation ${wantsUpdate ? "update" : "delete"}) is missing 'where:' — refusing to operate on every row.`,
446
- fix_hint: `Add 'where: { id: \${input.id}, user_id: \${current_user_id} }' (or whatever filter applies).`,
447
- })
582
+ if (wantsUpdate || wantsDelete) {
583
+ // `where: {}` is just as dangerous as omitting `where:` entirely — both
584
+ // produce an unconstrained UPDATE/DELETE in the engine.
585
+ const whereVal = node.where
586
+ const whereIsEmpty =
587
+ whereVal === undefined ||
588
+ whereVal === null ||
589
+ (typeof whereVal === "object" && !Array.isArray(whereVal) && Object.keys(whereVal).length === 0)
590
+ if (whereIsEmpty) {
591
+ const action = wantsUpdate ? "update" : "delete"
592
+ diagnostics.push({
593
+ severity: "error",
594
+ rule: "mutation_missing_where",
595
+ endpoint: name, file, loc: `workflow.nodes[${node.id}]`,
596
+ message: whereVal === undefined || whereVal === null
597
+ ? `Node '${node.id}' (mutation ${action}) is missing 'where:' — refusing to operate on every row.`
598
+ : `Node '${node.id}' (mutation ${action}) has an empty 'where: {}' — that would ${action} every row in the table.`,
599
+ fix_hint: `Add at least one filter, e.g. 'where: { id: \${input.id}, user_id: \${current_user_id} }'.`,
600
+ })
601
+ }
448
602
  }
449
603
  // Foreign fields that belong to `operation: query`
450
604
  const QUERY_ONLY = ["query", "query_file", "params"]
@@ -520,12 +674,35 @@ function validateEndpoint(entry, ctx) {
520
674
  // automatically from displayOptions.show — so the catalog is the single
521
675
  // source of truth and we don't hardcode node names anywhere.
522
676
  const allRequired = []
677
+ const allOf = Array.isArray(schema.inputs?.allOf) ? schema.inputs.allOf : []
523
678
 
524
- // Universal requirements
525
- for (const req of required) allRequired.push({ name: req, condition: null })
679
+ // Polymorphic-node guard.
680
+ //
681
+ // Some node schemas in the catalog are polymorphic (the real param set
682
+ // depends on a discriminator like `operation`) but they declare ALL
683
+ // params from ALL branches inside a flat `required` without using
684
+ // `allOf[].if/then` to model the conditionality. The validator can't
685
+ // tell which branch the user picked, so it would emit a missing_required
686
+ // warning for every cross-branch param — pure noise.
687
+ //
688
+ // Heuristic: if the schema has an `operation` enum AND the node sets
689
+ // a value, OR if it's `dypai_database` (where the dedicated coherence
690
+ // block above already validates the real per-operation requirements),
691
+ // skip the flat `required` and rely on the catalog's `allOf` blocks
692
+ // (when present) for the actual conditional requirements.
693
+ const hasOperationEnum = schema.inputs?.properties?.operation?.enum?.length > 0
694
+ const isPolymorphicNode =
695
+ nodeType === "dypai_database" ||
696
+ (hasOperationEnum && node.operation !== undefined)
697
+ const trustFlatRequired = !isPolymorphicNode || allOf.length > 0
698
+
699
+ // Universal requirements (skipped for polymorphic nodes when the
700
+ // catalog provides no conditional structure to disambiguate).
701
+ if (trustFlatRequired) {
702
+ for (const req of required) allRequired.push({ name: req, condition: null })
703
+ }
526
704
 
527
705
  // Conditional requirements from allOf[]
528
- const allOf = Array.isArray(schema.inputs?.allOf) ? schema.inputs.allOf : []
529
706
  for (const rule of allOf) {
530
707
  const ifProps = rule?.if?.properties || {}
531
708
  const thenRequired = rule?.then?.required || []
@@ -543,10 +720,26 @@ function validateEndpoint(entry, ctx) {
543
720
  }
544
721
  }
545
722
 
723
+ // The codec resolves user-friendly NAMES to engine UUIDs at push time:
724
+ // credential: "openai-prod" → credential_id: "<uuid>"
725
+ // tools: ["my-endpoint"] → tool_ids: ["<uuid>"]
726
+ // endpoint: "my-endpoint" → endpoint_id: "<uuid>"
727
+ // The catalog only knows the *_id form (what the engine ultimately
728
+ // receives), so a user who wrote `credential: openai-prod` correctly
729
+ // would otherwise be told `credential_id` is missing. Treat the
730
+ // human-friendly alias as satisfying the *_id requirement.
731
+ const ALIAS_FOR_REQUIRED = {
732
+ credential_id: "credential",
733
+ tool_ids: "tools",
734
+ endpoint_id: "endpoint",
735
+ }
736
+
546
737
  for (const { name: req, condition } of allRequired) {
547
738
  if (!paramKeys.includes(req)) {
548
739
  const hasFileEquivalent = META_KEYS.has(`${req}_file`) && node[`${req}_file`]
549
- if (!hasFileEquivalent) {
740
+ const aliasField = ALIAS_FOR_REQUIRED[req]
741
+ const hasAlias = aliasField && node[aliasField] !== undefined && node[aliasField] !== null
742
+ if (!hasFileEquivalent && !hasAlias) {
550
743
  diagnostics.push({
551
744
  severity: "warn",
552
745
  rule: "missing_required_param",
@@ -562,11 +755,39 @@ function validateEndpoint(entry, ctx) {
562
755
  }
563
756
  }
564
757
 
565
- // Unknown/typo params?
758
+ // Unknown/typo params? Two-phase emission to avoid catalog-staleness noise:
759
+ // Phase 1: collect all unknowns + their typo suggestions
760
+ // Phase 2: if 3+ unknowns AND none has a near-typo suggestion, treat
761
+ // as catalog-staleness (one consolidated diag) rather than spamming
762
+ // N near-identical warnings.
763
+ const knownKeys = Object.keys(properties)
764
+ const unknownsForNode = []
566
765
  for (const key of paramKeys) {
567
766
  if (!properties[key]) {
568
- const knownKeys = Object.keys(properties)
569
767
  const suggestions = knownKeys.filter(k => levenshteinSmall(k, key) <= 2).slice(0, 2)
768
+ unknownsForNode.push({ key, suggestions })
769
+ }
770
+ }
771
+ const STALE_THRESHOLD = 3
772
+ const anyHasSuggestion = unknownsForNode.some(u => u.suggestions.length > 0)
773
+ if (unknownsForNode.length >= STALE_THRESHOLD && !anyHasSuggestion) {
774
+ // Likely catalog stale (or this node type just accepts more params
775
+ // than the catalog declares). One warning instead of N.
776
+ diagnostics.push({
777
+ severity: "warn",
778
+ rule: "unknown_params_bulk",
779
+ endpoint: name, file, loc: `workflow.nodes[${node.id}]`,
780
+ message:
781
+ `Node '${node.id}' (type '${nodeType}') has ${unknownsForNode.length} parameters not in the catalog: ` +
782
+ `${unknownsForNode.map(u => u.key).join(", ")}.`,
783
+ fix_hint:
784
+ `node-catalog.json may be stale (the engine often accepts more params than the catalog lists). ` +
785
+ `Run dypai_pull to refresh, or ignore if these params work in production. ` +
786
+ `Catalog-known params: ${knownKeys.slice(0, 8).join(", ")}${knownKeys.length > 8 ? "…" : ""}.`,
787
+ })
788
+ } else {
789
+ // Per-param emission — preserves "Did you mean?" hints when useful.
790
+ for (const { key, suggestions } of unknownsForNode) {
570
791
  diagnostics.push({
571
792
  severity: "warn",
572
793
  rule: "unknown_param",
@@ -576,7 +797,11 @@ function validateEndpoint(entry, ctx) {
576
797
  ? `Did you mean: ${suggestions.join(", ")}?`
577
798
  : `Valid params: ${knownKeys.slice(0, 8).join(", ")}${knownKeys.length > 8 ? "…" : ""}`,
578
799
  })
579
- } else {
800
+ }
801
+ }
802
+ // Enum/range checks still need to run on every key that DID match the schema.
803
+ for (const key of paramKeys) {
804
+ if (properties[key]) {
580
805
  // Enum / range checks for primitive values
581
806
  const prop = properties[key]
582
807
  const v = node[key]
@@ -586,12 +811,23 @@ function validateEndpoint(entry, ctx) {
586
811
  // dypai_storage / dypai_database nodes.
587
812
  const hasPlaceholder = typeof v === "string" && v.includes("${")
588
813
  if (prop.enum && typeof v === "string" && !hasPlaceholder && !prop.enum.includes(v)) {
814
+ // WARN, not error: node-catalog.json is generated from the central
815
+ // control plane and routinely lags behind the actual engine
816
+ // (operations are added/renamed faster than catalog regeneration
817
+ // happens). When the catalog says `operation` ∈ [a, b, c] but
818
+ // the engine actually accepts `d` too, blocking push with an
819
+ // ERROR would be wrong — the engine is the source of truth at
820
+ // runtime. Surface as warning so the user sees a "double-check
821
+ // this" hint without blocking the workflow. Real typos (`fnid`
822
+ // for `find`) still surface clearly.
589
823
  diagnostics.push({
590
- severity: "error",
824
+ severity: "warn",
591
825
  rule: "param_enum_violation",
592
826
  endpoint: name, file, loc: `workflow.nodes[${node.id}].${key}`,
593
- message: `Node '${node.id}' parameter '${key}' = '${v}' is not one of: ${prop.enum.join(", ")}.`,
594
- fix_hint: `Allowed values: ${prop.enum.join(", ")}`,
827
+ message: `Node '${node.id}' parameter '${key}' = '${v}' is not in the catalog enum: ${prop.enum.join(", ")}.`,
828
+ fix_hint:
829
+ `Either fix to one of: ${prop.enum.join(", ")} — OR if the engine accepts '${v}' ` +
830
+ `(catalog may be stale), refresh node-catalog.json with dypai_pull and ignore this if the warning persists.`,
595
831
  })
596
832
  }
597
833
  if (prop.type === "number" || prop.type === "integer") {
@@ -654,7 +890,138 @@ function validateEndpoint(entry, ctx) {
654
890
  }
655
891
  }
656
892
 
657
- return diagnostics
893
+ // ── Edge sanity: catch typos in workflow.edges before runtime ────────────
894
+ // The engine silently skips edges whose `from`/`to` doesn't resolve to a
895
+ // node id, which manifests as "node never ran" — extremely hard to debug.
896
+ const edges = doc.workflow?.edges || []
897
+ if (Array.isArray(edges)) {
898
+ for (let i = 0; i < edges.length; i++) {
899
+ const edge = edges[i]
900
+ if (!edge || typeof edge !== "object") continue
901
+ const from = edge.from ?? edge.source
902
+ const to = edge.to ?? edge.target
903
+ const known = nodeIds.size ? `Known nodes: ${[...nodeIds].join(", ")}` : "Add the node to workflow.nodes[] first."
904
+ if (from && !nodeIds.has(from)) {
905
+ diagnostics.push({
906
+ severity: "error",
907
+ rule: "edge_unknown_node",
908
+ endpoint: name, file, loc: `workflow.edges[${i}].from`,
909
+ message: `Edge from '${from}' but no node with that id is declared.`,
910
+ fix_hint: known,
911
+ })
912
+ }
913
+ if (to && !nodeIds.has(to)) {
914
+ diagnostics.push({
915
+ severity: "error",
916
+ rule: "edge_unknown_node",
917
+ endpoint: name, file, loc: `workflow.edges[${i}].to`,
918
+ message: `Edge to '${to}' but no node with that id is declared.`,
919
+ fix_hint: known,
920
+ })
921
+ }
922
+ }
923
+ }
924
+
925
+ // ── Ambiguous response: multiple terminal nodes without explicit return ──
926
+ // The engine's behavior (graphScheduler.ts:202): if no node is marked
927
+ // is_return, it returns the result of whichever node ran LAST. With a
928
+ // single terminal node (linear chain or sole node) that's deterministic
929
+ // and fine — no warning needed. With MULTIPLE terminal nodes (branches
930
+ // that don't reconverge), "last to finish" is non-deterministic and
931
+ // almost certainly not what the author intended.
932
+ const allNodes = doc.workflow?.nodes || []
933
+ if (allNodes.length > 1) {
934
+ const hasReturn = allNodes.some(n => n?.return === true || n?.is_return === true)
935
+ if (!hasReturn) {
936
+ const triggerKeys = Object.keys(doc.trigger || {})
937
+ const NEEDS_RESPONSE = new Set(["http_api", "webhook"])
938
+ const needsResponse = triggerKeys.some(k => NEEDS_RESPONSE.has(k))
939
+ if (needsResponse) {
940
+ // Find terminal nodes (no outgoing edges).
941
+ const edgeList = Array.isArray(doc.workflow?.edges) ? doc.workflow.edges : []
942
+ const hasOutgoing = new Set()
943
+ for (const e of edgeList) {
944
+ const from = e?.from ?? e?.source
945
+ if (from) hasOutgoing.add(from)
946
+ }
947
+ const terminals = allNodes.filter(n => !hasOutgoing.has(n.id))
948
+ if (terminals.length > 1) {
949
+ diagnostics.push({
950
+ severity: "warn",
951
+ rule: "ambiguous_return",
952
+ endpoint: name, file, loc: "workflow.nodes",
953
+ message:
954
+ `Endpoint has ${terminals.length} terminal nodes (${terminals.map(t => t.id).join(", ")}) but none is marked 'return: true'. ` +
955
+ `The engine will return whichever finishes last, which is non-deterministic.`,
956
+ fix_hint: `Mark exactly one terminal node with 'return: true' to make the response unambiguous.`,
957
+ })
958
+ }
959
+ }
960
+ }
961
+ }
962
+
963
+ // ── tool: true requires tool_description ─────────────────────────────────
964
+ // Without a description, an agent's LLM has nothing to base "should I call
965
+ // this?" decisions on, and the engine will accept the tool but it will
966
+ // never actually be picked.
967
+ if (doc.tool === true && (!doc.tool_description || String(doc.tool_description).trim() === "")) {
968
+ diagnostics.push({
969
+ severity: "warn",
970
+ rule: "tool_missing_description",
971
+ endpoint: name, file,
972
+ message: `Endpoint marked 'tool: true' but has no 'tool_description' — LLMs won't know when to invoke it.`,
973
+ fix_hint: `Add 'tool_description: <plain-language description of what it does and when an agent should call it>'.`,
974
+ })
975
+ }
976
+
977
+ // ── auth_mode: public + write operation = security hole ──────────────────
978
+ // Public endpoints are anonymous and unrate-limited per-user; combining
979
+ // that with a write means anyone on the internet can mutate state.
980
+ const authMode = doc.trigger?.http_api?.auth_mode
981
+ if (authMode === "public") {
982
+ for (const node of allNodes) {
983
+ const nodeType = node?.type ?? node?.node_type
984
+ if (nodeType !== "dypai_database") continue
985
+ const op = node.operation
986
+ let writeKind = null
987
+ if (op === "mutation") {
988
+ if (node.insert !== undefined && node.insert !== null) writeKind = "INSERT"
989
+ else if (node.update !== undefined && node.update !== null) writeKind = "UPDATE"
990
+ else if (node.delete === true) writeKind = "DELETE"
991
+ } else if (op === "query") {
992
+ const sql = String(node.query || "")
993
+ if (/\b(INSERT|UPDATE|DELETE|TRUNCATE|DROP|ALTER|CREATE)\b/i.test(sql)) {
994
+ writeKind = "write SQL"
995
+ }
996
+ } else if (LEGACY_WRITE_OPS.has(op)) {
997
+ writeKind = op.toUpperCase()
998
+ }
999
+ if (writeKind) {
1000
+ diagnostics.push({
1001
+ severity: "error",
1002
+ rule: "public_auth_with_write",
1003
+ endpoint: name, file, loc: `workflow.nodes[${node.id}]`,
1004
+ message: `auth_mode: public + ${writeKind} write — anyone can call this anonymously and mutate data.`,
1005
+ fix_hint: `Change trigger.http_api.auth_mode to 'jwt' (user-scoped) or 'api_key' (server-to-server). 'public' is for anonymous READS only.`,
1006
+ })
1007
+ }
1008
+ }
1009
+ }
1010
+
1011
+ // Dedupe identical diagnostics within an endpoint (same rule + loc + message).
1012
+ // The walkStrings pass naturally emits one diag per occurrence of a placeholder
1013
+ // — but a SQL block with `${current_user_id}` repeated for INSERT and WHERE
1014
+ // produces two literally-identical errors at the same loc. Collapsing them
1015
+ // is harmless (one fix addresses both occurrences) and keeps the output clean.
1016
+ const seen = new Set()
1017
+ const unique = []
1018
+ for (const d of diagnostics) {
1019
+ const key = `${d.rule}|${d.loc || ""}|${d.message}`
1020
+ if (seen.has(key)) continue
1021
+ seen.add(key)
1022
+ unique.push(d)
1023
+ }
1024
+ return unique
658
1025
  }
659
1026
 
660
1027
  // ─── Schema staleness detection ─────────────────────────────────────────────