nodebench-mcp 2.31.1 → 2.32.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +14 -6
- package/dist/engine/server.js +14 -4
- package/dist/engine/server.js.map +1 -1
- package/dist/index.js +1946 -670
- package/dist/index.js.map +1 -1
- package/dist/security/SecurityError.d.ts +18 -0
- package/dist/security/SecurityError.js +22 -0
- package/dist/security/SecurityError.js.map +1 -0
- package/dist/security/__tests__/security.test.d.ts +8 -0
- package/dist/security/__tests__/security.test.js +295 -0
- package/dist/security/__tests__/security.test.js.map +1 -0
- package/dist/security/auditLog.d.ts +36 -0
- package/dist/security/auditLog.js +178 -0
- package/dist/security/auditLog.js.map +1 -0
- package/dist/security/commandSandbox.d.ts +33 -0
- package/dist/security/commandSandbox.js +159 -0
- package/dist/security/commandSandbox.js.map +1 -0
- package/dist/security/config.d.ts +23 -0
- package/dist/security/config.js +43 -0
- package/dist/security/config.js.map +1 -0
- package/dist/security/credentialRedactor.d.ts +22 -0
- package/dist/security/credentialRedactor.js +118 -0
- package/dist/security/credentialRedactor.js.map +1 -0
- package/dist/security/index.d.ts +20 -0
- package/dist/security/index.js +21 -0
- package/dist/security/index.js.map +1 -0
- package/dist/security/pathSandbox.d.ts +23 -0
- package/dist/security/pathSandbox.js +160 -0
- package/dist/security/pathSandbox.js.map +1 -0
- package/dist/security/urlValidator.d.ts +23 -0
- package/dist/security/urlValidator.js +125 -0
- package/dist/security/urlValidator.js.map +1 -0
- package/dist/tools/agentBootstrapTools.js +22 -29
- package/dist/tools/agentBootstrapTools.js.map +1 -1
- package/dist/tools/contextSandboxTools.js +7 -9
- package/dist/tools/contextSandboxTools.js.map +1 -1
- package/dist/tools/deepSimTools.d.ts +2 -0
- package/dist/tools/deepSimTools.js +404 -0
- package/dist/tools/deepSimTools.js.map +1 -0
- package/dist/tools/dimensionTools.d.ts +2 -0
- package/dist/tools/dimensionTools.js +246 -0
- package/dist/tools/dimensionTools.js.map +1 -0
- package/dist/tools/executionTraceTools.d.ts +2 -0
- package/dist/tools/executionTraceTools.js +446 -0
- package/dist/tools/executionTraceTools.js.map +1 -0
- package/dist/tools/founderTools.d.ts +13 -0
- package/dist/tools/founderTools.js +595 -0
- package/dist/tools/founderTools.js.map +1 -0
- package/dist/tools/gitWorkflowTools.js +14 -10
- package/dist/tools/gitWorkflowTools.js.map +1 -1
- package/dist/tools/githubTools.js +19 -2
- package/dist/tools/githubTools.js.map +1 -1
- package/dist/tools/index.d.ts +87 -0
- package/dist/tools/index.js +102 -0
- package/dist/tools/index.js.map +1 -0
- package/dist/tools/localFileTools.js +24 -12
- package/dist/tools/localFileTools.js.map +1 -1
- package/dist/tools/memoryDecay.d.ts +70 -0
- package/dist/tools/memoryDecay.js +247 -0
- package/dist/tools/memoryDecay.js.map +1 -0
- package/dist/tools/missionHarnessTools.d.ts +32 -0
- package/dist/tools/missionHarnessTools.js +972 -0
- package/dist/tools/missionHarnessTools.js.map +1 -0
- package/dist/tools/observabilityTools.d.ts +15 -0
- package/dist/tools/observabilityTools.js +787 -0
- package/dist/tools/observabilityTools.js.map +1 -0
- package/dist/tools/openclawTools.js +151 -36
- package/dist/tools/openclawTools.js.map +1 -1
- package/dist/tools/progressiveDiscoveryTools.js +5 -4
- package/dist/tools/progressiveDiscoveryTools.js.map +1 -1
- package/dist/tools/qualityGateTools.js +118 -2
- package/dist/tools/qualityGateTools.js.map +1 -1
- package/dist/tools/rssTools.js +3 -0
- package/dist/tools/rssTools.js.map +1 -1
- package/dist/tools/scraplingTools.js +15 -0
- package/dist/tools/scraplingTools.js.map +1 -1
- package/dist/tools/seoTools.js +66 -1
- package/dist/tools/seoTools.js.map +1 -1
- package/dist/tools/sessionMemoryTools.js +50 -11
- package/dist/tools/sessionMemoryTools.js.map +1 -1
- package/dist/tools/temporalIntelligenceTools.d.ts +12 -0
- package/dist/tools/temporalIntelligenceTools.js +1068 -0
- package/dist/tools/temporalIntelligenceTools.js.map +1 -0
- package/dist/tools/toolRegistry.d.ts +19 -0
- package/dist/tools/toolRegistry.js +857 -31
- package/dist/tools/toolRegistry.js.map +1 -1
- package/dist/tools/webTools.js +14 -1
- package/dist/tools/webTools.js.map +1 -1
- package/dist/tools/webmcpTools.js +13 -2
- package/dist/tools/webmcpTools.js.map +1 -1
- package/dist/toolsetRegistry.js +13 -0
- package/dist/toolsetRegistry.js.map +1 -1
- package/dist/types.d.ts +10 -0
- package/package.json +124 -124
|
@@ -1322,6 +1322,97 @@ const REGISTRY_ENTRIES = [
|
|
|
1322
1322
|
phase: "ship",
|
|
1323
1323
|
},
|
|
1324
1324
|
// ═══ RESEARCH WRITING ═══
|
|
1325
|
+
{
|
|
1326
|
+
name: "start_execution_run",
|
|
1327
|
+
category: "platform",
|
|
1328
|
+
tags: ["execution-trace", "run", "start", "session", "receipt", "workflow", "traceable", "begin"],
|
|
1329
|
+
quickRef: {
|
|
1330
|
+
nextAction: "Execution run started. Record the first meaningful step immediately so the trace has a visible timeline.",
|
|
1331
|
+
nextTools: ["record_execution_step", "attach_execution_evidence", "record_execution_decision"],
|
|
1332
|
+
methodology: "agent_bootstrap",
|
|
1333
|
+
tip: "Use one run per user-visible workflow. Keep the title operator-friendly because it appears in the UI.",
|
|
1334
|
+
},
|
|
1335
|
+
phase: "implement",
|
|
1336
|
+
complexity: "low",
|
|
1337
|
+
},
|
|
1338
|
+
{
|
|
1339
|
+
name: "complete_execution_run",
|
|
1340
|
+
category: "platform",
|
|
1341
|
+
tags: ["execution-trace", "run", "complete", "finish", "close", "status", "traceable", "ship"],
|
|
1342
|
+
quickRef: {
|
|
1343
|
+
nextAction: "Execution run closed. Review the resulting Execution Trace tabs to confirm evidence, decisions, and verification all landed correctly.",
|
|
1344
|
+
nextTools: ["record_learning", "save_session_note"],
|
|
1345
|
+
methodology: "closed_loop",
|
|
1346
|
+
tip: "Pass token usage and toolsUsed when available so the run is useful for later benchmarking.",
|
|
1347
|
+
},
|
|
1348
|
+
phase: "ship",
|
|
1349
|
+
complexity: "low",
|
|
1350
|
+
},
|
|
1351
|
+
{
|
|
1352
|
+
name: "record_execution_step",
|
|
1353
|
+
category: "platform",
|
|
1354
|
+
tags: ["execution-trace", "receipt", "step", "timeline", "workflow", "action", "traceable", "span"],
|
|
1355
|
+
quickRef: {
|
|
1356
|
+
nextAction: "Step recorded. Add evidence for supporting facts and record a decision if the step changed direction or selected an option.",
|
|
1357
|
+
nextTools: ["attach_execution_evidence", "record_execution_decision", "record_execution_verification"],
|
|
1358
|
+
methodology: "closed_loop",
|
|
1359
|
+
tip: "Use this for meaningful transitions only. Good traces read like operator receipts, not noisy debug logs.",
|
|
1360
|
+
},
|
|
1361
|
+
phase: "implement",
|
|
1362
|
+
complexity: "low",
|
|
1363
|
+
},
|
|
1364
|
+
{
|
|
1365
|
+
name: "record_execution_decision",
|
|
1366
|
+
category: "platform",
|
|
1367
|
+
tags: ["execution-trace", "decision", "ranking", "selection", "basis", "alternatives", "confidence", "traceable"],
|
|
1368
|
+
quickRef: {
|
|
1369
|
+
nextAction: "Decision recorded. Attach the evidence that supports it and add a limitation note if the choice depends on incomplete information.",
|
|
1370
|
+
nextTools: ["attach_execution_evidence", "record_execution_verification", "complete_execution_run"],
|
|
1371
|
+
methodology: "verification",
|
|
1372
|
+
tip: "Record the basis and alternatives considered. That gives explainability without exposing raw hidden reasoning.",
|
|
1373
|
+
},
|
|
1374
|
+
phase: "verify",
|
|
1375
|
+
complexity: "low",
|
|
1376
|
+
},
|
|
1377
|
+
{
|
|
1378
|
+
name: "record_execution_verification",
|
|
1379
|
+
category: "platform",
|
|
1380
|
+
tags: ["execution-trace", "verification", "qa", "check", "render", "formula", "artifact", "traceable"],
|
|
1381
|
+
quickRef: {
|
|
1382
|
+
nextAction: "Verification recorded. If it failed, fix the issue and record a follow-up verification so the trace shows the correction loop clearly.",
|
|
1383
|
+
nextTools: ["record_execution_step", "complete_execution_run", "record_learning"],
|
|
1384
|
+
methodology: "closed_loop",
|
|
1385
|
+
tip: "Use warnings for incomplete checks, failed for blocking issues, and fixed when the trace should show a successful repair.",
|
|
1386
|
+
},
|
|
1387
|
+
phase: "test",
|
|
1388
|
+
complexity: "low",
|
|
1389
|
+
},
|
|
1390
|
+
{
|
|
1391
|
+
name: "attach_execution_evidence",
|
|
1392
|
+
category: "platform",
|
|
1393
|
+
tags: ["execution-trace", "evidence", "sources", "truth-boundary", "urls", "files", "support", "claims"],
|
|
1394
|
+
quickRef: {
|
|
1395
|
+
nextAction: "Evidence attached. Cross-check that unsupported claims are listed explicitly before you finalize the run.",
|
|
1396
|
+
nextTools: ["record_execution_decision", "record_execution_verification", "complete_execution_run"],
|
|
1397
|
+
methodology: "reconnaissance",
|
|
1398
|
+
tip: "Use supportedClaims and unsupportedClaims to make the truth boundary visible in the run, not just in the final answer.",
|
|
1399
|
+
},
|
|
1400
|
+
phase: "research",
|
|
1401
|
+
complexity: "low",
|
|
1402
|
+
},
|
|
1403
|
+
{
|
|
1404
|
+
name: "request_execution_approval",
|
|
1405
|
+
category: "platform",
|
|
1406
|
+
tags: ["execution-trace", "approval", "human-in-the-loop", "risk", "gate", "policy", "handoff", "traceable"],
|
|
1407
|
+
quickRef: {
|
|
1408
|
+
nextAction: "Approval requested. Pause risky execution and let the operator resolve the pending gate before continuing.",
|
|
1409
|
+
nextTools: ["record_execution_step", "record_execution_verification", "complete_execution_run"],
|
|
1410
|
+
methodology: "quality_gates",
|
|
1411
|
+
tip: "Use for externally visible writes, destructive edits, or any action you would want an operator to justify later.",
|
|
1412
|
+
},
|
|
1413
|
+
phase: "verify",
|
|
1414
|
+
complexity: "low",
|
|
1415
|
+
},
|
|
1325
1416
|
{
|
|
1326
1417
|
name: "polish_academic_text",
|
|
1327
1418
|
category: "research_writing",
|
|
@@ -3080,6 +3171,514 @@ const REGISTRY_ENTRIES = [
|
|
|
3080
3171
|
phase: "meta",
|
|
3081
3172
|
complexity: "low",
|
|
3082
3173
|
},
|
|
3174
|
+
// ═══ OBSERVABILITY ═══
|
|
3175
|
+
{
|
|
3176
|
+
name: "get_system_pulse",
|
|
3177
|
+
category: "observability",
|
|
3178
|
+
tags: ["health", "status", "pulse", "monitoring", "dashboard", "uptime", "errors", "diagnostics"],
|
|
3179
|
+
quickRef: {
|
|
3180
|
+
nextAction: "Pulse captured. If healthScore < 70, run get_drift_report for details. If critical, run run_self_heal.",
|
|
3181
|
+
nextTools: ["get_drift_report", "run_self_heal", "get_uptime_stats"],
|
|
3182
|
+
tip: "Call this first when investigating system issues — it gives you the full picture in one shot.",
|
|
3183
|
+
},
|
|
3184
|
+
phase: "utility",
|
|
3185
|
+
complexity: "low",
|
|
3186
|
+
},
|
|
3187
|
+
{
|
|
3188
|
+
name: "get_drift_report",
|
|
3189
|
+
category: "observability",
|
|
3190
|
+
tags: ["drift", "detection", "orphaned", "stale", "bloat", "maintenance", "audit", "cleanup"],
|
|
3191
|
+
quickRef: {
|
|
3192
|
+
nextAction: "Drift detected. Review healable issues, then call run_self_heal with targets to auto-fix.",
|
|
3193
|
+
nextTools: ["run_self_heal", "get_system_pulse", "cleanup_stale_runs"],
|
|
3194
|
+
tip: "Include include_history=true to see trend over time — one-off spikes are different from sustained degradation.",
|
|
3195
|
+
},
|
|
3196
|
+
phase: "verify",
|
|
3197
|
+
complexity: "low",
|
|
3198
|
+
},
|
|
3199
|
+
{
|
|
3200
|
+
name: "run_self_heal",
|
|
3201
|
+
category: "observability",
|
|
3202
|
+
tags: ["heal", "repair", "fix", "autonomous", "maintenance", "cleanup", "self-healing", "auto-fix"],
|
|
3203
|
+
quickRef: {
|
|
3204
|
+
nextAction: "Healing complete. Re-run get_drift_report to verify fixes took effect.",
|
|
3205
|
+
nextTools: ["get_drift_report", "get_system_pulse"],
|
|
3206
|
+
tip: "Use dry_run=true first to preview what would be fixed without actually changing anything.",
|
|
3207
|
+
},
|
|
3208
|
+
phase: "utility",
|
|
3209
|
+
complexity: "low",
|
|
3210
|
+
},
|
|
3211
|
+
{
|
|
3212
|
+
name: "get_uptime_stats",
|
|
3213
|
+
category: "observability",
|
|
3214
|
+
tags: ["uptime", "metrics", "calls", "errors", "trends", "rate", "performance", "statistics"],
|
|
3215
|
+
quickRef: {
|
|
3216
|
+
nextAction: "Stats captured. Check error trend direction — if 'increasing', investigate with get_drift_report.",
|
|
3217
|
+
nextTools: ["get_drift_report", "get_system_pulse", "get_trajectory_analysis"],
|
|
3218
|
+
tip: "Compare 1hr vs 24hr error rates — a recent spike in an otherwise stable system needs different treatment than chronic errors.",
|
|
3219
|
+
},
|
|
3220
|
+
phase: "utility",
|
|
3221
|
+
complexity: "low",
|
|
3222
|
+
},
|
|
3223
|
+
{
|
|
3224
|
+
name: "set_watchdog_config",
|
|
3225
|
+
category: "observability",
|
|
3226
|
+
tags: ["watchdog", "config", "interval", "thresholds", "monitoring", "background", "schedule"],
|
|
3227
|
+
quickRef: {
|
|
3228
|
+
nextAction: "Watchdog reconfigured. Changes take effect immediately. Check get_watchdog_log after one cycle to verify.",
|
|
3229
|
+
nextTools: ["get_watchdog_log", "get_system_pulse"],
|
|
3230
|
+
tip: "Set interval_minutes=1 for debugging, then raise to 5-10 for normal operation to reduce overhead.",
|
|
3231
|
+
},
|
|
3232
|
+
phase: "utility",
|
|
3233
|
+
complexity: "low",
|
|
3234
|
+
},
|
|
3235
|
+
{
|
|
3236
|
+
name: "get_watchdog_log",
|
|
3237
|
+
category: "observability",
|
|
3238
|
+
tags: ["watchdog", "log", "history", "trend", "background", "audit", "timeline"],
|
|
3239
|
+
quickRef: {
|
|
3240
|
+
nextAction: "Log reviewed. If trend is 'degrading', investigate the most common issue type with get_drift_report.",
|
|
3241
|
+
nextTools: ["get_drift_report", "set_watchdog_config", "get_system_pulse"],
|
|
3242
|
+
tip: "Use only_issues=true to filter noise and focus on entries where something actually went wrong.",
|
|
3243
|
+
},
|
|
3244
|
+
phase: "utility",
|
|
3245
|
+
complexity: "low",
|
|
3246
|
+
},
|
|
3247
|
+
{
|
|
3248
|
+
name: "get_sentinel_report",
|
|
3249
|
+
category: "observability",
|
|
3250
|
+
tags: ["sentinel", "probes", "quality", "testing", "build", "e2e", "voice", "a11y", "visual", "performance"],
|
|
3251
|
+
quickRef: {
|
|
3252
|
+
nextAction: "Report reviewed. For failing probes, check diagnosis root causes and apply suggested fixes.",
|
|
3253
|
+
nextTools: ["get_drift_report", "get_system_pulse", "run_self_heal"],
|
|
3254
|
+
tip: "Use probe_filter to focus on specific areas like 'build,e2e' instead of reviewing all 9 probes.",
|
|
3255
|
+
},
|
|
3256
|
+
phase: "verify",
|
|
3257
|
+
complexity: "low",
|
|
3258
|
+
},
|
|
3259
|
+
{
|
|
3260
|
+
name: "get_observability_summary",
|
|
3261
|
+
category: "observability",
|
|
3262
|
+
tags: ["summary", "unified", "health", "sentinel", "watchdog", "quick-check", "overview"],
|
|
3263
|
+
quickRef: {
|
|
3264
|
+
nextAction: "Summary reviewed. Follow nextActions recommendations for highest-impact improvements.",
|
|
3265
|
+
nextTools: ["get_drift_report", "run_self_heal", "get_sentinel_report", "get_uptime_stats"],
|
|
3266
|
+
tip: "Best starting point for any session — gives you MCP health, sentinel status, and watchdog state in one call.",
|
|
3267
|
+
},
|
|
3268
|
+
phase: "utility",
|
|
3269
|
+
complexity: "low",
|
|
3270
|
+
},
|
|
3271
|
+
// ═══ TEMPORAL INTELLIGENCE (Unified Temporal Agentic OS) ═══
|
|
3272
|
+
{
|
|
3273
|
+
name: "ingest_temporal_observation",
|
|
3274
|
+
category: "temporal_intelligence",
|
|
3275
|
+
tags: ["temporal", "observation", "ingest", "time-series", "stream", "signal", "data", "event"],
|
|
3276
|
+
quickRef: {
|
|
3277
|
+
nextAction: "Observation ingested. Run detect_temporal_signal on the same streamKey to find patterns, or ingest more observations to build a richer time series.",
|
|
3278
|
+
nextTools: ["detect_temporal_signal", "build_causal_chain", "query_temporal_signals"],
|
|
3279
|
+
methodology: "temporal_agentic_os",
|
|
3280
|
+
tip: "Use consistent streamKey naming (e.g. 'github/commits/repo', 'jira/velocity/team') for clean signal detection.",
|
|
3281
|
+
},
|
|
3282
|
+
phase: "research",
|
|
3283
|
+
complexity: "low",
|
|
3284
|
+
},
|
|
3285
|
+
{
|
|
3286
|
+
name: "detect_temporal_signal",
|
|
3287
|
+
category: "temporal_intelligence",
|
|
3288
|
+
tags: ["temporal", "signal", "detect", "anomaly", "momentum", "regime-shift", "trend", "analysis", "statistics"],
|
|
3289
|
+
quickRef: {
|
|
3290
|
+
nextAction: "Signals detected. Build a causal_chain to explain significant signals, or generate a zero_draft to communicate findings. Use query_temporal_signals to retrieve stored signals.",
|
|
3291
|
+
nextTools: ["build_causal_chain", "generate_zero_draft", "query_temporal_signals", "forecast_temporal_trend"],
|
|
3292
|
+
methodology: "temporal_agentic_os",
|
|
3293
|
+
tip: "Need 5+ numeric observations for momentum, 10+ for regime shift detection. Use lookbackDays to control analysis window.",
|
|
3294
|
+
},
|
|
3295
|
+
phase: "research",
|
|
3296
|
+
complexity: "medium",
|
|
3297
|
+
},
|
|
3298
|
+
{
|
|
3299
|
+
name: "build_causal_chain",
|
|
3300
|
+
category: "temporal_intelligence",
|
|
3301
|
+
tags: ["temporal", "causal", "chain", "causality", "root-cause", "analysis", "timeline", "explanation"],
|
|
3302
|
+
quickRef: {
|
|
3303
|
+
nextAction: "Causal chain built. Generate a zero_draft to communicate the analysis, or create a proof_pack to verify the chain's conclusions.",
|
|
3304
|
+
nextTools: ["generate_zero_draft", "create_proof_pack", "detect_temporal_signal"],
|
|
3305
|
+
methodology: "temporal_agentic_os",
|
|
3306
|
+
tip: "Nodes must be chronological. Link evidenceObservationIds to ground each causal step in data.",
|
|
3307
|
+
},
|
|
3308
|
+
phase: "research",
|
|
3309
|
+
complexity: "medium",
|
|
3310
|
+
},
|
|
3311
|
+
{
|
|
3312
|
+
name: "generate_zero_draft",
|
|
3313
|
+
category: "temporal_intelligence",
|
|
3314
|
+
tags: ["temporal", "draft", "artifact", "generate", "email", "slack", "spec", "pr", "content", "auto-draft"],
|
|
3315
|
+
quickRef: {
|
|
3316
|
+
nextAction: "Draft generated. Review the bodyMarkdown, edit as needed, then approve or create a proof_pack before sending.",
|
|
3317
|
+
nextTools: ["create_proof_pack", "detect_temporal_signal", "build_causal_chain"],
|
|
3318
|
+
methodology: "temporal_agentic_os",
|
|
3319
|
+
tip: "Link signal IDs and chain IDs to auto-populate the draft with evidence. Always review before approving.",
|
|
3320
|
+
},
|
|
3321
|
+
phase: "implement",
|
|
3322
|
+
complexity: "high",
|
|
3323
|
+
},
|
|
3324
|
+
{
|
|
3325
|
+
name: "create_proof_pack",
|
|
3326
|
+
category: "temporal_intelligence",
|
|
3327
|
+
tags: ["temporal", "proof", "pack", "verification", "checklist", "metrics", "dogfood", "immutable", "audit"],
|
|
3328
|
+
quickRef: {
|
|
3329
|
+
nextAction: "Proof pack created. If pass rate is 100%, status is 'ready' for approval. Otherwise, address failing items and create a new pack.",
|
|
3330
|
+
nextTools: ["query_temporal_signals", "generate_zero_draft", "detect_temporal_signal"],
|
|
3331
|
+
methodology: "temporal_agentic_os",
|
|
3332
|
+
tip: "100% pass rate auto-sets status to 'ready'. Include metrics for cost/performance tracking.",
|
|
3333
|
+
},
|
|
3334
|
+
phase: "verify",
|
|
3335
|
+
complexity: "medium",
|
|
3336
|
+
},
|
|
3337
|
+
{
|
|
3338
|
+
name: "query_temporal_signals",
|
|
3339
|
+
category: "temporal_intelligence",
|
|
3340
|
+
tags: ["temporal", "signal", "query", "search", "filter", "retrieve", "list", "status"],
|
|
3341
|
+
quickRef: {
|
|
3342
|
+
nextAction: "Signals retrieved. Investigate high-confidence signals with build_causal_chain, or forecast trends with forecast_temporal_trend.",
|
|
3343
|
+
nextTools: ["build_causal_chain", "forecast_temporal_trend", "detect_temporal_signal", "generate_zero_draft"],
|
|
3344
|
+
methodology: "temporal_agentic_os",
|
|
3345
|
+
tip: "Filter by status='open' to focus on unresolved signals. Use date range to scope analysis.",
|
|
3346
|
+
},
|
|
3347
|
+
phase: "research",
|
|
3348
|
+
complexity: "low",
|
|
3349
|
+
},
|
|
3350
|
+
{
|
|
3351
|
+
name: "forecast_temporal_trend",
|
|
3352
|
+
category: "temporal_intelligence",
|
|
3353
|
+
tags: ["temporal", "forecast", "trend", "prediction", "time-series", "regression", "smoothing", "statistics"],
|
|
3354
|
+
quickRef: {
|
|
3355
|
+
nextAction: "Forecast generated. Compare predictions with actual observations as they arrive. Use detect_temporal_signal to monitor for deviations from forecast.",
|
|
3356
|
+
nextTools: ["detect_temporal_signal", "ingest_temporal_observation", "query_temporal_signals", "generate_zero_draft"],
|
|
3357
|
+
methodology: "temporal_agentic_os",
|
|
3358
|
+
tip: "Linear method works best with clear trends. Exponential smoothing handles noisy data better. Naive is a baseline.",
|
|
3359
|
+
},
|
|
3360
|
+
phase: "research",
|
|
3361
|
+
complexity: "high",
|
|
3362
|
+
},
|
|
3363
|
+
// ── Mission Harness (Hierarchical execution) ──────────────────────────
|
|
3364
|
+
{
|
|
3365
|
+
name: "plan_decompose_mission",
|
|
3366
|
+
category: "mission_harness",
|
|
3367
|
+
tags: ["mission", "planner", "decompose", "subtask", "verifiability", "orchestration", "hierarchy", "execution"],
|
|
3368
|
+
quickRef: {
|
|
3369
|
+
nextAction: "Mission decomposed. Assign agents to subtasks, then use judge_verify_subtask as each completes.",
|
|
3370
|
+
nextTools: ["judge_verify_subtask", "harness_get_mission_status", "harness_list_runs"],
|
|
3371
|
+
methodology: "mission_execution_harness",
|
|
3372
|
+
tip: "Every subtask needs verifiabilityTier + outputContract. Tier 1 = machine-checkable, Tier 2 = expert-checkable.",
|
|
3373
|
+
},
|
|
3374
|
+
phase: "research",
|
|
3375
|
+
complexity: "high",
|
|
3376
|
+
},
|
|
3377
|
+
{
|
|
3378
|
+
name: "judge_verify_subtask",
|
|
3379
|
+
category: "mission_harness",
|
|
3380
|
+
tags: ["judge", "verify", "review", "evidence", "artifact", "verdict", "quality", "gate"],
|
|
3381
|
+
quickRef: {
|
|
3382
|
+
nextAction: "Subtask verified. If passed and requiresSniffCheck, use sniff_record_human_review. If failed, use judge_request_retry.",
|
|
3383
|
+
nextTools: ["sniff_record_human_review", "judge_request_retry", "merge_compose_output", "harness_get_mission_status"],
|
|
3384
|
+
methodology: "mission_execution_harness",
|
|
3385
|
+
tip: "No hardcoded score floors — 0 means 0. Evidence refs create the traceability chain.",
|
|
3386
|
+
},
|
|
3387
|
+
phase: "verify",
|
|
3388
|
+
complexity: "high",
|
|
3389
|
+
},
|
|
3390
|
+
{
|
|
3391
|
+
name: "judge_request_retry",
|
|
3392
|
+
category: "mission_harness",
|
|
3393
|
+
tags: ["retry", "escalate", "replan", "budget", "failure", "recovery", "resilience"],
|
|
3394
|
+
quickRef: {
|
|
3395
|
+
nextAction: "Retry requested. Worker should re-attempt with newInstructions. If budget exhausted, auto-escalates.",
|
|
3396
|
+
nextTools: ["judge_verify_subtask", "harness_get_mission_status", "plan_decompose_mission"],
|
|
3397
|
+
methodology: "mission_execution_harness",
|
|
3398
|
+
tip: "Retry budget enforced — exhausted budget auto-escalates. Use 'stop' only for unverifiable subtasks.",
|
|
3399
|
+
},
|
|
3400
|
+
phase: "verify",
|
|
3401
|
+
complexity: "medium",
|
|
3402
|
+
},
|
|
3403
|
+
{
|
|
3404
|
+
name: "merge_compose_output",
|
|
3405
|
+
category: "mission_harness",
|
|
3406
|
+
tags: ["merge", "compose", "output", "artifact", "boundary", "orchestration", "finalize"],
|
|
3407
|
+
quickRef: {
|
|
3408
|
+
nextAction: "Output merged. If requiresJudgeReview, run judge_verify_subtask on the merge. Otherwise check mission status.",
|
|
3409
|
+
nextTools: ["judge_verify_subtask", "sniff_record_human_review", "harness_get_mission_status"],
|
|
3410
|
+
methodology: "mission_execution_harness",
|
|
3411
|
+
tip: "Judge-gated: all subtasks must be 'passed' before merge. No shared free-for-all editing.",
|
|
3412
|
+
},
|
|
3413
|
+
phase: "ship",
|
|
3414
|
+
complexity: "high",
|
|
3415
|
+
},
|
|
3416
|
+
{
|
|
3417
|
+
name: "sniff_record_human_review",
|
|
3418
|
+
category: "mission_harness",
|
|
3419
|
+
tags: ["human", "review", "sniff", "check", "approval", "block", "concern", "quality"],
|
|
3420
|
+
quickRef: {
|
|
3421
|
+
nextAction: "Sniff-check recorded. If 'block', subtask enters force-retry. If 'pass', proceed to merge.",
|
|
3422
|
+
nextTools: ["merge_compose_output", "judge_request_retry", "harness_get_mission_status"],
|
|
3423
|
+
methodology: "mission_execution_harness",
|
|
3424
|
+
tip: "Issue tags: unsupported_claim, weak_evidence, not_credible, too_risky, scope_drift, missing_source, contradictory, stale_data.",
|
|
3425
|
+
},
|
|
3426
|
+
phase: "verify",
|
|
3427
|
+
complexity: "low",
|
|
3428
|
+
},
|
|
3429
|
+
{
|
|
3430
|
+
name: "harness_get_mission_status",
|
|
3431
|
+
category: "mission_harness",
|
|
3432
|
+
tags: ["status", "mission", "dashboard", "trace", "receipt", "progress", "overview"],
|
|
3433
|
+
quickRef: {
|
|
3434
|
+
nextAction: "Review subtask states and decide next action: verify pending subtasks, merge passed ones, or record sniff-checks.",
|
|
3435
|
+
nextTools: ["judge_verify_subtask", "merge_compose_output", "sniff_record_human_review", "harness_list_runs"],
|
|
3436
|
+
methodology: "mission_execution_harness",
|
|
3437
|
+
tip: "Use includeEvidence=true for full traceability audit. Default omits evidence for performance.",
|
|
3438
|
+
},
|
|
3439
|
+
phase: "verify",
|
|
3440
|
+
complexity: "low",
|
|
3441
|
+
},
|
|
3442
|
+
{
|
|
3443
|
+
name: "harness_list_runs",
|
|
3444
|
+
category: "mission_harness",
|
|
3445
|
+
tags: ["list", "runs", "missions", "history", "discovery", "overview"],
|
|
3446
|
+
quickRef: {
|
|
3447
|
+
nextAction: "Pick a run to inspect with harness_get_mission_status, or create a new mission with plan_decompose_mission.",
|
|
3448
|
+
nextTools: ["harness_get_mission_status", "plan_decompose_mission"],
|
|
3449
|
+
methodology: "mission_execution_harness",
|
|
3450
|
+
tip: "Filter by status to find active, failed, or completed runs.",
|
|
3451
|
+
},
|
|
3452
|
+
phase: "research",
|
|
3453
|
+
complexity: "low",
|
|
3454
|
+
},
|
|
3455
|
+
{
|
|
3456
|
+
name: "compute_dimension_profile",
|
|
3457
|
+
category: "mission_harness",
|
|
3458
|
+
tags: ["deeptrace", "dimension", "profile", "regime", "company", "capital", "capability", "time"],
|
|
3459
|
+
quickRef: {
|
|
3460
|
+
nextAction: "Profile computed. Export the full bundle, inspect evidence rows and interaction effects, then record any regime-sensitive recommendation in the execution trace.",
|
|
3461
|
+
nextTools: ["export_dimension_bundle", "list_dimension_evidence", "list_dimension_interactions", "record_execution_decision"],
|
|
3462
|
+
methodology: "mission_execution_harness",
|
|
3463
|
+
tip: "Recompute after new company evidence, hiring signals, financing events, or world events land.",
|
|
3464
|
+
},
|
|
3465
|
+
phase: "research",
|
|
3466
|
+
complexity: "medium",
|
|
3467
|
+
},
|
|
3468
|
+
{
|
|
3469
|
+
name: "get_dimension_profile",
|
|
3470
|
+
category: "mission_harness",
|
|
3471
|
+
tags: ["deeptrace", "dimension", "profile", "regime", "policy_context", "confidence", "company"],
|
|
3472
|
+
quickRef: {
|
|
3473
|
+
nextAction: "Read the latest normalized state, regime label, and policy context. If it looks stale, recompute. If it looks material, drill into bundle details.",
|
|
3474
|
+
nextTools: ["compute_dimension_profile", "export_dimension_bundle", "list_dimension_snapshots"],
|
|
3475
|
+
methodology: "mission_execution_harness",
|
|
3476
|
+
tip: "Use this for a fast read before pulling the heavier evidence and snapshot bundle.",
|
|
3477
|
+
},
|
|
3478
|
+
phase: "research",
|
|
3479
|
+
complexity: "low",
|
|
3480
|
+
},
|
|
3481
|
+
{
|
|
3482
|
+
name: "list_dimension_snapshots",
|
|
3483
|
+
category: "mission_harness",
|
|
3484
|
+
tags: ["deeptrace", "dimension", "snapshots", "history", "regime_transition", "timeline"],
|
|
3485
|
+
quickRef: {
|
|
3486
|
+
nextAction: "Review how the entity moved across regimes over time, then use those transitions to qualify the current recommendation.",
|
|
3487
|
+
nextTools: ["get_dimension_profile", "export_dimension_bundle", "record_execution_verification"],
|
|
3488
|
+
methodology: "mission_execution_harness",
|
|
3489
|
+
tip: "Use snapshots to answer whether a company became stronger after funding, hiring, or strategic events rather than assuming a static state.",
|
|
3490
|
+
},
|
|
3491
|
+
phase: "research",
|
|
3492
|
+
complexity: "low",
|
|
3493
|
+
},
|
|
3494
|
+
{
|
|
3495
|
+
name: "list_dimension_evidence",
|
|
3496
|
+
category: "mission_harness",
|
|
3497
|
+
tags: ["deeptrace", "dimension", "evidence", "audit", "verified", "estimated", "inferred"],
|
|
3498
|
+
quickRef: {
|
|
3499
|
+
nextAction: "Audit the evidence behind each score and availability status. If a recommendation depends on a weak signal, call that out explicitly.",
|
|
3500
|
+
nextTools: ["list_dimension_interactions", "record_execution_decision", "record_execution_verification"],
|
|
3501
|
+
methodology: "mission_execution_harness",
|
|
3502
|
+
tip: "Availability labels matter. Verified and inferred evidence should not be treated as equally strong.",
|
|
3503
|
+
},
|
|
3504
|
+
phase: "verify",
|
|
3505
|
+
complexity: "low",
|
|
3506
|
+
},
|
|
3507
|
+
{
|
|
3508
|
+
name: "list_dimension_interactions",
|
|
3509
|
+
category: "mission_harness",
|
|
3510
|
+
tags: ["deeptrace", "dimension", "interaction", "causal", "capital", "network", "fragility"],
|
|
3511
|
+
quickRef: {
|
|
3512
|
+
nextAction: "Use interaction effects to explain why the recommendation changes under different regimes instead of collapsing everything into one score.",
|
|
3513
|
+
nextTools: ["export_dimension_bundle", "record_execution_decision", "record_execution_verification"],
|
|
3514
|
+
methodology: "mission_execution_harness",
|
|
3515
|
+
tip: "Interaction effects are where capital, capability, and narrative signals become causal rather than just descriptive.",
|
|
3516
|
+
},
|
|
3517
|
+
phase: "research",
|
|
3518
|
+
complexity: "medium",
|
|
3519
|
+
},
|
|
3520
|
+
{
|
|
3521
|
+
name: "export_dimension_bundle",
|
|
3522
|
+
category: "mission_harness",
|
|
3523
|
+
tags: ["deeptrace", "dimension", "bundle", "profile", "snapshots", "evidence", "interactions", "audit"],
|
|
3524
|
+
quickRef: {
|
|
3525
|
+
nextAction: "Use the bundle as the auditable substrate for your memo, execution trace, or judge review. Cite the profile, evidence, and interactions directly.",
|
|
3526
|
+
nextTools: ["run_research_cell", "record_execution_step", "record_execution_decision", "record_execution_verification"],
|
|
3527
|
+
methodology: "mission_execution_harness",
|
|
3528
|
+
tip: "This is the safest handoff artifact for Claude Code because it preserves the profile, evidence, and history in one fetch.",
|
|
3529
|
+
},
|
|
3530
|
+
phase: "research",
|
|
3531
|
+
complexity: "medium",
|
|
3532
|
+
},
|
|
3533
|
+
{
|
|
3534
|
+
name: "run_research_cell",
|
|
3535
|
+
category: "mission_harness",
|
|
3536
|
+
tags: ["deeptrace", "research", "reanalysis", "confidence", "coverage", "evidence", "gaps", "counter_hypothesis"],
|
|
3537
|
+
quickRef: {
|
|
3538
|
+
nextAction: "Review the merged findings for gaps, counter-hypotheses, and coverage deficiencies. If evidence is still sparse, escalate to due-diligence orchestrator for external acquisition.",
|
|
3539
|
+
nextTools: ["export_dimension_bundle", "compute_dimension_profile", "run_entity_intelligence_mission", "record_execution_decision"],
|
|
3540
|
+
methodology: "mission_execution_harness",
|
|
3541
|
+
tip: "This cell re-analyzes existing DeepTrace data — it does NOT acquire new evidence. Use it to surface what is missing before committing to expensive external research.",
|
|
3542
|
+
},
|
|
3543
|
+
phase: "research",
|
|
3544
|
+
complexity: "high",
|
|
3545
|
+
},
|
|
3546
|
+
{
|
|
3547
|
+
name: "run_entity_intelligence_mission",
|
|
3548
|
+
category: "mission_harness",
|
|
3549
|
+
tags: ["deeptrace", "mission", "entity", "intelligence", "investigation", "relationship", "ownership", "supply_chain", "research_cell"],
|
|
3550
|
+
quickRef: {
|
|
3551
|
+
nextAction: "Review the unified mission output (graph, ownership, supply chain, signals, causal chains). If researchCell was enabled or forceResearchCell was used, check whether the cell triggered and review its findings.",
|
|
3552
|
+
nextTools: ["run_research_cell", "export_dimension_bundle", "record_execution_step", "record_execution_verification"],
|
|
3553
|
+
methodology: "mission_execution_harness",
|
|
3554
|
+
tip: "Pass researchCell=true for threshold-driven bounded re-analysis, or forceResearchCell=true when an operator wants the cell to run even if confidence and coverage look healthy.",
|
|
3555
|
+
},
|
|
3556
|
+
phase: "research",
|
|
3557
|
+
complexity: "high",
|
|
3558
|
+
},
|
|
3559
|
+
// ═══ DEEP SIM (claim graph → simulation → decision memo) ═══
|
|
3560
|
+
{
|
|
3561
|
+
name: "build_claim_graph",
|
|
3562
|
+
category: "deep_sim",
|
|
3563
|
+
tags: ["deeptrace", "claims", "evidence", "graph", "provenance"],
|
|
3564
|
+
quickRef: {
|
|
3565
|
+
nextAction: "Claim graph built. Extract variables to identify levers, or generate countermodels to stress-test the graph.",
|
|
3566
|
+
nextTools: ["extract_variables", "generate_countermodels"],
|
|
3567
|
+
methodology: "deep_sim",
|
|
3568
|
+
},
|
|
3569
|
+
phase: "research",
|
|
3570
|
+
complexity: "high",
|
|
3571
|
+
},
|
|
3572
|
+
{
|
|
3573
|
+
name: "extract_variables",
|
|
3574
|
+
category: "deep_sim",
|
|
3575
|
+
tags: ["deeptrace", "variables", "weights", "sensitivity"],
|
|
3576
|
+
quickRef: {
|
|
3577
|
+
nextAction: "Variables extracted with sensitivity weights. Generate countermodels to falsify, run a sim to explore branches, or score compounding drift.",
|
|
3578
|
+
nextTools: ["generate_countermodels", "run_deep_sim", "score_compounding"],
|
|
3579
|
+
methodology: "deep_sim",
|
|
3580
|
+
},
|
|
3581
|
+
phase: "research",
|
|
3582
|
+
complexity: "high",
|
|
3583
|
+
},
|
|
3584
|
+
{
|
|
3585
|
+
name: "generate_countermodels",
|
|
3586
|
+
category: "deep_sim",
|
|
3587
|
+
tags: ["deeptrace", "counter", "hypothesis", "falsification"],
|
|
3588
|
+
quickRef: {
|
|
3589
|
+
nextAction: "Countermodels generated. Run a deep sim to test them under branching scenarios, or rank interventions by delta.",
|
|
3590
|
+
nextTools: ["run_deep_sim", "rank_interventions"],
|
|
3591
|
+
methodology: "deep_sim",
|
|
3592
|
+
},
|
|
3593
|
+
phase: "research",
|
|
3594
|
+
complexity: "high",
|
|
3595
|
+
},
|
|
3596
|
+
{
|
|
3597
|
+
name: "run_deep_sim",
|
|
3598
|
+
category: "deep_sim",
|
|
3599
|
+
tags: ["deeptrace", "simulation", "scenario", "branching", "agents"],
|
|
3600
|
+
quickRef: {
|
|
3601
|
+
nextAction: "Simulation complete. Rank interventions by impact delta, render a decision memo, or score compounding trajectory drift.",
|
|
3602
|
+
nextTools: ["rank_interventions", "render_decision_memo", "score_compounding"],
|
|
3603
|
+
methodology: "deep_sim",
|
|
3604
|
+
},
|
|
3605
|
+
phase: "research",
|
|
3606
|
+
complexity: "high",
|
|
3607
|
+
},
|
|
3608
|
+
{
|
|
3609
|
+
name: "rank_interventions",
|
|
3610
|
+
category: "deep_sim",
|
|
3611
|
+
tags: ["deeptrace", "interventions", "ranking", "delta"],
|
|
3612
|
+
quickRef: {
|
|
3613
|
+
nextAction: "Interventions ranked by delta. Render a decision memo for stakeholders, or score compounding to check trajectory drift.",
|
|
3614
|
+
nextTools: ["render_decision_memo", "score_compounding"],
|
|
3615
|
+
methodology: "deep_sim",
|
|
3616
|
+
},
|
|
3617
|
+
phase: "research",
|
|
3618
|
+
complexity: "medium",
|
|
3619
|
+
},
|
|
3620
|
+
{
|
|
3621
|
+
name: "score_compounding",
|
|
3622
|
+
category: "deep_sim",
|
|
3623
|
+
tags: ["deeptrace", "trajectory", "compounding", "drift", "score"],
|
|
3624
|
+
quickRef: {
|
|
3625
|
+
nextAction: "Compounding score computed. Render a decision memo summarizing the trajectory, or re-rank interventions if drift is significant.",
|
|
3626
|
+
nextTools: ["render_decision_memo", "rank_interventions"],
|
|
3627
|
+
methodology: "deep_sim",
|
|
3628
|
+
},
|
|
3629
|
+
phase: "verify",
|
|
3630
|
+
complexity: "medium",
|
|
3631
|
+
},
|
|
3632
|
+
{
|
|
3633
|
+
name: "render_decision_memo",
|
|
3634
|
+
category: "deep_sim",
|
|
3635
|
+
tags: ["deeptrace", "memo", "decision", "executive", "report"],
|
|
3636
|
+
quickRef: {
|
|
3637
|
+
nextAction: "Decision memo rendered. Share with stakeholders. To iterate, rebuild the claim graph or extract new variables.",
|
|
3638
|
+
nextTools: ["build_claim_graph", "extract_variables"],
|
|
3639
|
+
methodology: "deep_sim",
|
|
3640
|
+
},
|
|
3641
|
+
phase: "ship",
|
|
3642
|
+
complexity: "medium",
|
|
3643
|
+
},
|
|
3644
|
+
// ═══ FOUNDER ═══
|
|
3645
|
+
{
|
|
3646
|
+
name: "founder_deep_context_gather",
|
|
3647
|
+
category: "founder",
|
|
3648
|
+
tags: ["founder", "artifact", "packet", "context", "gather", "deep-search", "ocd", "nudge"],
|
|
3649
|
+
quickRef: {
|
|
3650
|
+
nextAction: "Context gather protocol returned. Complete ALL required steps, then call founder_packet_validate before saving.",
|
|
3651
|
+
nextTools: ["founder_packet_validate", "founder_packet_diff"],
|
|
3652
|
+
methodology: "founder",
|
|
3653
|
+
tip: "Always call this BEFORE generating an artifact packet. It ensures OCD-level thoroughness.",
|
|
3654
|
+
},
|
|
3655
|
+
phase: "research",
|
|
3656
|
+
complexity: "medium",
|
|
3657
|
+
},
|
|
3658
|
+
{
|
|
3659
|
+
name: "founder_packet_validate",
|
|
3660
|
+
category: "founder",
|
|
3661
|
+
tags: ["founder", "artifact", "packet", "validate", "quality-gate", "audit"],
|
|
3662
|
+
quickRef: {
|
|
3663
|
+
nextAction: "Packet validated. If passed, save and export. If failed, fix the flagged issues and re-validate.",
|
|
3664
|
+
nextTools: ["founder_packet_diff", "founder_deep_context_gather"],
|
|
3665
|
+
methodology: "founder",
|
|
3666
|
+
},
|
|
3667
|
+
phase: "verify",
|
|
3668
|
+
complexity: "low",
|
|
3669
|
+
},
|
|
3670
|
+
{
|
|
3671
|
+
name: "founder_packet_diff",
|
|
3672
|
+
category: "founder",
|
|
3673
|
+
tags: ["founder", "artifact", "packet", "diff", "history", "drift", "comparison"],
|
|
3674
|
+
quickRef: {
|
|
3675
|
+
nextAction: "Diff generated. Surface new contradictions and unresolved actions to the founder. Feed findings into the next packet generation.",
|
|
3676
|
+
nextTools: ["founder_deep_context_gather", "founder_packet_validate"],
|
|
3677
|
+
methodology: "founder",
|
|
3678
|
+
},
|
|
3679
|
+
phase: "research",
|
|
3680
|
+
complexity: "low",
|
|
3681
|
+
},
|
|
3083
3682
|
];
|
|
3084
3683
|
// ── Exported lookup structures ───────────────────────────────────────────
|
|
3085
3684
|
/** Map of tool name → registry entry for O(1) lookup */
|
|
@@ -3090,6 +3689,13 @@ export const ALL_REGISTRY_ENTRIES = REGISTRY_ENTRIES;
|
|
|
3090
3689
|
export function getQuickRef(toolName) {
|
|
3091
3690
|
return TOOL_REGISTRY.get(toolName)?.quickRef ?? null;
|
|
3092
3691
|
}
|
|
3692
|
+
/**
|
|
3693
|
+
* Compatibility helper for older callers that expect a "related tools" list.
|
|
3694
|
+
* The current registry models this through quickRef.nextTools.
|
|
3695
|
+
*/
|
|
3696
|
+
export function computeRelatedTools(toolName) {
|
|
3697
|
+
return getQuickRef(toolName)?.nextTools ?? [];
|
|
3698
|
+
}
|
|
3093
3699
|
/** Get all tools in a category */
|
|
3094
3700
|
export function getToolsByCategory(category) {
|
|
3095
3701
|
return REGISTRY_ENTRIES.filter((e) => e.category === category);
|
|
@@ -3222,6 +3828,135 @@ export function getToolComplexity(toolName) {
|
|
|
3222
3828
|
return CATEGORY_COMPLEXITY[entry.category] ?? "medium";
|
|
3223
3829
|
return "medium";
|
|
3224
3830
|
}
|
|
3831
|
+
/**
|
|
3832
|
+
* Category-level annotation defaults.
|
|
3833
|
+
* Every tool inherits its category's annotations unless overridden per-tool.
|
|
3834
|
+
*
|
|
3835
|
+
* Classification logic:
|
|
3836
|
+
* - readOnlyHint: true → category only reads/analyzes, no mutations
|
|
3837
|
+
* - destructiveHint: true → category creates, writes, deletes, or sends data
|
|
3838
|
+
* - openWorldHint: true → category hits external services (network, APIs)
|
|
3839
|
+
*/
|
|
3840
|
+
const CATEGORY_ANNOTATIONS = {
|
|
3841
|
+
// ── Read-only categories (no side effects, no network) ──
|
|
3842
|
+
reconnaissance: { readOnlyHint: true },
|
|
3843
|
+
progressive_discovery: { readOnlyHint: true },
|
|
3844
|
+
meta: { readOnlyHint: true },
|
|
3845
|
+
toon: { readOnlyHint: true },
|
|
3846
|
+
pattern: { readOnlyHint: true },
|
|
3847
|
+
local_file: { readOnlyHint: true },
|
|
3848
|
+
architect: { readOnlyHint: true },
|
|
3849
|
+
local_dashboard: { readOnlyHint: true },
|
|
3850
|
+
design_governance: { readOnlyHint: true },
|
|
3851
|
+
agent_traverse: { readOnlyHint: true },
|
|
3852
|
+
observability: { readOnlyHint: true },
|
|
3853
|
+
research_optimizer: { readOnlyHint: true },
|
|
3854
|
+
documentation: { readOnlyHint: true },
|
|
3855
|
+
security: { readOnlyHint: true },
|
|
3856
|
+
gaia_solvers: { readOnlyHint: true },
|
|
3857
|
+
ui_ux_dive: { readOnlyHint: true },
|
|
3858
|
+
ui_ux_dive_v2: { readOnlyHint: true },
|
|
3859
|
+
// ── Stateful but non-destructive categories (write to local DB/state) ──
|
|
3860
|
+
verification: { readOnlyHint: false, destructiveHint: false },
|
|
3861
|
+
eval: { readOnlyHint: false, destructiveHint: false },
|
|
3862
|
+
quality_gate: { readOnlyHint: false, destructiveHint: false },
|
|
3863
|
+
learning: { readOnlyHint: false, destructiveHint: false },
|
|
3864
|
+
flywheel: { readOnlyHint: false, destructiveHint: false },
|
|
3865
|
+
session_memory: { readOnlyHint: false, destructiveHint: false },
|
|
3866
|
+
self_eval: { readOnlyHint: false, destructiveHint: false },
|
|
3867
|
+
critter: { readOnlyHint: false, destructiveHint: false },
|
|
3868
|
+
engine_context: { readOnlyHint: false, destructiveHint: false },
|
|
3869
|
+
qa_orchestration: { readOnlyHint: false, destructiveHint: false },
|
|
3870
|
+
skill_update: { readOnlyHint: false, destructiveHint: false },
|
|
3871
|
+
benchmark: { readOnlyHint: false, destructiveHint: false },
|
|
3872
|
+
thompson_protocol: { readOnlyHint: false, destructiveHint: false },
|
|
3873
|
+
parallel_agents: { readOnlyHint: false, destructiveHint: false },
|
|
3874
|
+
research_writing: { readOnlyHint: false, destructiveHint: false },
|
|
3875
|
+
platform: { readOnlyHint: false, destructiveHint: false },
|
|
3876
|
+
// ── Destructive categories (create, write, delete, execute) ──
|
|
3877
|
+
boilerplate: { destructiveHint: true },
|
|
3878
|
+
bootstrap: { destructiveHint: true },
|
|
3879
|
+
git_workflow: { destructiveHint: true },
|
|
3880
|
+
context_sandbox: { destructiveHint: true },
|
|
3881
|
+
// ── Open-world categories (external network access) ──
|
|
3882
|
+
web: { openWorldHint: true },
|
|
3883
|
+
web_scraping: { openWorldHint: true },
|
|
3884
|
+
github: { openWorldHint: true },
|
|
3885
|
+
llm: { openWorldHint: true },
|
|
3886
|
+
email: { openWorldHint: true, destructiveHint: true },
|
|
3887
|
+
rss: { openWorldHint: true },
|
|
3888
|
+
voice_bridge: { openWorldHint: true },
|
|
3889
|
+
mcp_bridge: { openWorldHint: true },
|
|
3890
|
+
flicker_detection: { openWorldHint: true },
|
|
3891
|
+
figma_flow: { openWorldHint: true },
|
|
3892
|
+
seo: { readOnlyHint: true, openWorldHint: true },
|
|
3893
|
+
visual_qa: { readOnlyHint: true, openWorldHint: true },
|
|
3894
|
+
ui_capture: { readOnlyHint: false, openWorldHint: true },
|
|
3895
|
+
vision: { readOnlyHint: true, openWorldHint: true },
|
|
3896
|
+
};
|
|
3897
|
+
/**
|
|
3898
|
+
* Per-tool annotation overrides (when category default is wrong).
|
|
3899
|
+
* Sparse — only tools that deviate from their category.
|
|
3900
|
+
*/
|
|
3901
|
+
const TOOL_ANNOTATION_OVERRIDES = {
|
|
3902
|
+
// ── Explicitly destructive tools ──
|
|
3903
|
+
send_email: { destructiveHint: true, openWorldHint: true },
|
|
3904
|
+
execute_shell_command: { destructiveHint: true },
|
|
3905
|
+
sandbox_execute: { destructiveHint: true },
|
|
3906
|
+
scaffold_nodebench_project: { destructiveHint: true },
|
|
3907
|
+
scaffold_research_pipeline: { destructiveHint: true },
|
|
3908
|
+
git_create_branch: { destructiveHint: true },
|
|
3909
|
+
git_commit_changes: { destructiveHint: true },
|
|
3910
|
+
git_push_branch: { destructiveHint: true, openWorldHint: true },
|
|
3911
|
+
create_visual_pr: { destructiveHint: true, openWorldHint: true },
|
|
3912
|
+
cleanup_stale_runs: { destructiveHint: true },
|
|
3913
|
+
// ── Explicitly read-only tools in otherwise mutable categories ──
|
|
3914
|
+
get_verification_status: { readOnlyHint: true },
|
|
3915
|
+
list_verification_cycles: { readOnlyHint: true },
|
|
3916
|
+
list_eval_runs: { readOnlyHint: true },
|
|
3917
|
+
compare_eval_runs: { readOnlyHint: true },
|
|
3918
|
+
get_gate_history: { readOnlyHint: true },
|
|
3919
|
+
get_gate_preset: { readOnlyHint: true },
|
|
3920
|
+
get_flywheel_status: { readOnlyHint: true },
|
|
3921
|
+
get_parallel_status: { readOnlyHint: true },
|
|
3922
|
+
get_agent_role: { readOnlyHint: true },
|
|
3923
|
+
list_agent_tasks: { readOnlyHint: true },
|
|
3924
|
+
get_project_context: { readOnlyHint: true },
|
|
3925
|
+
get_boilerplate_status: { readOnlyHint: true },
|
|
3926
|
+
load_session_notes: { readOnlyHint: true },
|
|
3927
|
+
refresh_task_context: { readOnlyHint: true },
|
|
3928
|
+
get_engine_context_health: { readOnlyHint: true },
|
|
3929
|
+
get_workflow_history: { readOnlyHint: true },
|
|
3930
|
+
search_content_archive: { readOnlyHint: true },
|
|
3931
|
+
search_all_knowledge: { readOnlyHint: true },
|
|
3932
|
+
get_recon_summary: { readOnlyHint: true },
|
|
3933
|
+
save_session_note: { destructiveHint: false },
|
|
3934
|
+
// ── Open-world overrides for specific tools ──
|
|
3935
|
+
fetch_url: { openWorldHint: true, readOnlyHint: true },
|
|
3936
|
+
web_search: { openWorldHint: true, readOnlyHint: true },
|
|
3937
|
+
search_github: { openWorldHint: true, readOnlyHint: true },
|
|
3938
|
+
check_mcp_setup: { readOnlyHint: true, openWorldHint: true },
|
|
3939
|
+
scrapling_crawl_stop: { destructiveHint: false, openWorldHint: true },
|
|
3940
|
+
// ── Discovery tools are always read-only ──
|
|
3941
|
+
discover_tools: { readOnlyHint: true },
|
|
3942
|
+
get_tool_quick_ref: { readOnlyHint: true },
|
|
3943
|
+
get_workflow_chain: { readOnlyHint: true },
|
|
3944
|
+
findTools: { readOnlyHint: true },
|
|
3945
|
+
getMethodology: { readOnlyHint: true },
|
|
3946
|
+
};
|
|
3947
|
+
/**
|
|
3948
|
+
* Get MCP security annotations for a tool.
|
|
3949
|
+
* Resolution: per-tool override merged ON TOP of category default → empty (no hints).
|
|
3950
|
+
*/
|
|
3951
|
+
export function getToolAnnotations(toolName) {
|
|
3952
|
+
const entry = TOOL_REGISTRY.get(toolName);
|
|
3953
|
+
const categoryDefaults = entry ? (CATEGORY_ANNOTATIONS[entry.category] ?? {}) : {};
|
|
3954
|
+
const overrides = TOOL_ANNOTATION_OVERRIDES[toolName];
|
|
3955
|
+
if (overrides) {
|
|
3956
|
+
return { ...categoryDefaults, ...overrides };
|
|
3957
|
+
}
|
|
3958
|
+
return categoryDefaults;
|
|
3959
|
+
}
|
|
3225
3960
|
// ── Synonym / semantic expansion map ──────────────────────────────────────
|
|
3226
3961
|
const SYNONYM_MAP = {
|
|
3227
3962
|
// ── Existing technical synonyms ──
|
|
@@ -3474,7 +4209,7 @@ export function _setDbAccessor(accessor) {
|
|
|
3474
4209
|
* Approach: for each session, pull the ordered tool sequence, then count
|
|
3475
4210
|
* pairs within a sliding window of 5 calls. O(n) per session, no self-join.
|
|
3476
4211
|
*/
|
|
3477
|
-
function getCooccurrenceEdges() {
|
|
4212
|
+
export function getCooccurrenceEdges() {
|
|
3478
4213
|
const now = Date.now();
|
|
3479
4214
|
if (_cooccurrenceCache && now - _cooccurrenceCacheTime < COOCCURRENCE_TTL_MS) {
|
|
3480
4215
|
return _cooccurrenceCache;
|
|
@@ -3971,6 +4706,28 @@ export const WORKFLOW_CHAINS = {
|
|
|
3971
4706
|
{ tool: "save_session_note", action: "Save traceability note — cite original request, record root cause and fix" },
|
|
3972
4707
|
],
|
|
3973
4708
|
},
|
|
4709
|
+
autonomous_qa_bug: {
|
|
4710
|
+
name: "Autonomous QA Bug Verdict",
|
|
4711
|
+
description: "Evidence-first bug reproduction with trigger/verify split, bounded retries, blocked-infra classification, and anomaly isolation",
|
|
4712
|
+
steps: [
|
|
4713
|
+
{ tool: "search_all_knowledge", action: "Check prior bug signatures, setup blockers, and learned repro patterns before touching the workflow" },
|
|
4714
|
+
{ tool: "start_execution_run", action: "Open an execution trace so setup, trigger, verification, and verdict all land in one auditable run" },
|
|
4715
|
+
{ tool: "plan_decompose_mission", action: "Break the bug into setup, trigger, verify, evidence, and verdict subtasks with bounded contracts" },
|
|
4716
|
+
{ tool: "record_execution_step", action: "Log environment setup and preconditions before attempting reproduction" },
|
|
4717
|
+
{ tool: "record_execution_verification", action: "Verify setup state explicitly before trigger; classify missing environment or auth as blocked infra" },
|
|
4718
|
+
{ tool: "record_execution_step", action: "Execute the smallest trigger needed to reproduce the reported symptom" },
|
|
4719
|
+
{ tool: "attach_execution_evidence", action: "Attach screenshots, logs, videos, metrics, or diffs that show actual behavior" },
|
|
4720
|
+
{ tool: "get_gate_preset", action: "Load the agent_bug_verdict gate so the pre-verdict checks stay explicit and boolean" },
|
|
4721
|
+
{ tool: "run_quality_gate", action: "Run the agent_bug_verdict gate before deciding pass/fail/block" },
|
|
4722
|
+
{ tool: "judge_verify_subtask", action: "Judge the primary bug against the output contract with evidence-backed verdict and confidence" },
|
|
4723
|
+
{ tool: "judge_request_retry", action: "Retry only the failing trigger or setup step, up to budget; escalate blocked infra instead of looping blindly" },
|
|
4724
|
+
{ tool: "log_gap", action: "Log anomalies or newly found bugs separately so they do not overwrite the main bug verdict" },
|
|
4725
|
+
{ tool: "sniff_record_human_review", action: "Record human sniff-check when the verdict is high-risk, ambiguous, or externally visible" },
|
|
4726
|
+
{ tool: "complete_execution_run", action: "Close the trace with final status, evidence summary, and any drift from the original bug mission" },
|
|
4727
|
+
{ tool: "save_session_note", action: "Save traceability note — cite original bug, blocker classification, evidence path, and final verdict" },
|
|
4728
|
+
{ tool: "record_learning", action: "Record the reproduction pattern, blocker signature, and anomaly handling guidance for future runs" },
|
|
4729
|
+
],
|
|
4730
|
+
},
|
|
3974
4731
|
ui_change: {
|
|
3975
4732
|
name: "UI/UX Change",
|
|
3976
4733
|
description: "Frontend implementation with visual verification",
|
|
@@ -4043,7 +4800,7 @@ export const WORKFLOW_CHAINS = {
|
|
|
4043
4800
|
},
|
|
4044
4801
|
security_audit: {
|
|
4045
4802
|
name: "Security Audit",
|
|
4046
|
-
description: "
|
|
4803
|
+
description: "Security audit of dependencies, code, and terminal history",
|
|
4047
4804
|
steps: [
|
|
4048
4805
|
{ tool: "search_all_knowledge", action: "Check past security findings" },
|
|
4049
4806
|
{ tool: "scan_dependencies", action: "Check npm/pip packages for known CVEs" },
|
|
@@ -4058,7 +4815,7 @@ export const WORKFLOW_CHAINS = {
|
|
|
4058
4815
|
},
|
|
4059
4816
|
code_review: {
|
|
4060
4817
|
name: "Code Review",
|
|
4061
|
-
description: "
|
|
4818
|
+
description: "Code review with quality gates and learning capture",
|
|
4062
4819
|
steps: [
|
|
4063
4820
|
{ tool: "search_all_knowledge", action: "Check for relevant past patterns and gotchas" },
|
|
4064
4821
|
{ tool: "run_closed_loop", action: "Verify code compiles and tests pass" },
|
|
@@ -4102,7 +4859,7 @@ export const WORKFLOW_CHAINS = {
|
|
|
4102
4859
|
},
|
|
4103
4860
|
coordinator_spawn: {
|
|
4104
4861
|
name: "Coordinator → Subagent Spawn",
|
|
4105
|
-
description: "
|
|
4862
|
+
description: "Coordinate parallel subagents with task locks and gates",
|
|
4106
4863
|
steps: [
|
|
4107
4864
|
{ tool: "search_all_knowledge", action: "Check prior coordination patterns" },
|
|
4108
4865
|
{ tool: "get_parallel_status", action: "Check current agent activity" },
|
|
@@ -4118,7 +4875,7 @@ export const WORKFLOW_CHAINS = {
|
|
|
4118
4875
|
},
|
|
4119
4876
|
self_setup: {
|
|
4120
4877
|
name: "Self-Setup / Capability Escalation",
|
|
4121
|
-
description: "Detect and resolve missing capabilities before
|
|
4878
|
+
description: "Detect and resolve missing capabilities before work",
|
|
4122
4879
|
steps: [
|
|
4123
4880
|
{ tool: "discover_tools", action: "Search for needed capability" },
|
|
4124
4881
|
{ tool: "get_tool_quick_ref", action: "Check if tool exists but needs configuration" },
|
|
@@ -4132,7 +4889,7 @@ export const WORKFLOW_CHAINS = {
|
|
|
4132
4889
|
},
|
|
4133
4890
|
flicker_detection: {
|
|
4134
4891
|
name: "Android Flicker Detection",
|
|
4135
|
-
description: "Detect
|
|
4892
|
+
description: "Detect Android UI flicker via 4-layer pipeline",
|
|
4136
4893
|
steps: [
|
|
4137
4894
|
{ tool: "search_all_knowledge", action: "Check past flicker patterns and known issues" },
|
|
4138
4895
|
{ tool: "capture_surface_stats", action: "L0: Capture SurfaceFlinger jank metrics" },
|
|
@@ -4145,7 +4902,7 @@ export const WORKFLOW_CHAINS = {
|
|
|
4145
4902
|
},
|
|
4146
4903
|
figma_flow_analysis: {
|
|
4147
4904
|
name: "Figma Flow Analysis",
|
|
4148
|
-
description: "Extract, cluster, and visualize Figma
|
|
4905
|
+
description: "Extract, cluster, and visualize Figma flows",
|
|
4149
4906
|
steps: [
|
|
4150
4907
|
{ tool: "search_all_knowledge", action: "Check past design flow analysis patterns" },
|
|
4151
4908
|
{ tool: "extract_figma_frames", action: "Phase 1: Depth-3 tree traversal for frames" },
|
|
@@ -4157,7 +4914,7 @@ export const WORKFLOW_CHAINS = {
|
|
|
4157
4914
|
},
|
|
4158
4915
|
agent_eval: {
|
|
4159
4916
|
name: "Agent Evaluation Pipeline",
|
|
4160
|
-
description: "Measure
|
|
4917
|
+
description: "Measure and improve agent performance via closed-loop eval",
|
|
4161
4918
|
steps: [
|
|
4162
4919
|
{ tool: "check_contract_compliance", action: "Score the agent session against the 6-dimension contract (front-door, self-setup, pre-impl, parallel, ship-gates, efficiency)" },
|
|
4163
4920
|
{ tool: "get_trajectory_analysis", action: "Analyze tool usage patterns — frequency, errors, sequential bigrams, phase distribution" },
|
|
@@ -4172,7 +4929,7 @@ export const WORKFLOW_CHAINS = {
|
|
|
4172
4929
|
},
|
|
4173
4930
|
contract_compliance: {
|
|
4174
4931
|
name: "Contract Compliance Audit",
|
|
4175
|
-
description: "Verify
|
|
4932
|
+
description: "Verify agent session followed the NodeBench contract",
|
|
4176
4933
|
steps: [
|
|
4177
4934
|
{ tool: "log_tool_call", action: "Ensure all tool calls in the session are logged (auto-instrumented or manual)" },
|
|
4178
4935
|
{ tool: "check_contract_compliance", action: "Score the session across 6 dimensions (25 front-door + 10 self-setup + 15 pre-impl + 10 parallel + 30 ship-gates + 10 efficiency = 100)" },
|
|
@@ -4183,7 +4940,7 @@ export const WORKFLOW_CHAINS = {
|
|
|
4183
4940
|
},
|
|
4184
4941
|
ablation_eval: {
|
|
4185
4942
|
name: "Ablation Evaluation (Prove NodeBench MCP Value)",
|
|
4186
|
-
description: "
|
|
4943
|
+
description: "A/B test agent performance across 5 conditions with eval stats",
|
|
4187
4944
|
steps: [
|
|
4188
4945
|
{ tool: "create_task_bank", action: "Step 1: Define tasks with deterministic success criteria, forbidden behaviors, and budgets. Target 30-200 tasks." },
|
|
4189
4946
|
{ tool: "get_gate_preset", action: "Step 2: Load agent_comparison gate preset — 10 boolean rules covering outcome + process quality" },
|
|
@@ -4199,7 +4956,7 @@ export const WORKFLOW_CHAINS = {
|
|
|
4199
4956
|
},
|
|
4200
4957
|
session_recovery: {
|
|
4201
4958
|
name: "Session Recovery (Post-Compaction)",
|
|
4202
|
-
description: "Recover state after
|
|
4959
|
+
description: "Recover state after compaction, /clear, or session resume",
|
|
4203
4960
|
steps: [
|
|
4204
4961
|
{ tool: "load_session_notes", action: "Step 1: Load today's session notes from filesystem" },
|
|
4205
4962
|
{ tool: "refresh_task_context", action: "Step 2: Re-inject active verification cycle, open gaps, and recent learnings" },
|
|
@@ -4211,7 +4968,7 @@ export const WORKFLOW_CHAINS = {
|
|
|
4211
4968
|
},
|
|
4212
4969
|
attention_refresh: {
|
|
4213
4970
|
name: "Attention Refresh (Mid-Session)",
|
|
4214
|
-
description: "
|
|
4971
|
+
description: "Re-inject goals and re-anchor focus after 30+ tool calls",
|
|
4215
4972
|
steps: [
|
|
4216
4973
|
{ tool: "refresh_task_context", action: "Step 1: Re-inject current goals, open gaps, and session stats" },
|
|
4217
4974
|
{ tool: "save_session_note", action: "Step 2: Save progress checkpoint before continuing" },
|
|
@@ -4221,7 +4978,7 @@ export const WORKFLOW_CHAINS = {
|
|
|
4221
4978
|
},
|
|
4222
4979
|
task_bank_setup: {
|
|
4223
4980
|
name: "Task Bank Setup (50-Task Starter Kit)",
|
|
4224
|
-
description: "Build a
|
|
4981
|
+
description: "Build a task bank for agent eval across 7 categories",
|
|
4225
4982
|
steps: [
|
|
4226
4983
|
{ tool: "search_all_knowledge", action: "Step 1: Search past learnings and recon findings for real bugs/tasks to include" },
|
|
4227
4984
|
{ tool: "create_task_bank", action: "Step 2: Add 10 bugfix tasks (easy→expert) with test-based success criteria" },
|
|
@@ -4236,7 +4993,7 @@ export const WORKFLOW_CHAINS = {
|
|
|
4236
4993
|
},
|
|
4237
4994
|
pr_review: {
|
|
4238
4995
|
name: "Pull Request Review",
|
|
4239
|
-
description: "
|
|
4996
|
+
description: "PR review with git compliance and merge gate",
|
|
4240
4997
|
steps: [
|
|
4241
4998
|
{ tool: "check_git_compliance", action: "Verify branch state and commit conventions" },
|
|
4242
4999
|
{ tool: "review_pr_checklist", action: "Run structured PR checklist with verification cross-reference" },
|
|
@@ -4247,7 +5004,7 @@ export const WORKFLOW_CHAINS = {
|
|
|
4247
5004
|
},
|
|
4248
5005
|
seo_audit: {
|
|
4249
5006
|
name: "Full SEO Audit",
|
|
4250
|
-
description: "
|
|
5007
|
+
description: "SEO audit: technical, content, performance, WordPress",
|
|
4251
5008
|
steps: [
|
|
4252
5009
|
{ tool: "seo_audit_url", action: "Analyze meta tags, headings, images, structured data" },
|
|
4253
5010
|
{ tool: "analyze_seo_content", action: "Check readability, keyword density, link ratios" },
|
|
@@ -4271,7 +5028,7 @@ export const WORKFLOW_CHAINS = {
|
|
|
4271
5028
|
},
|
|
4272
5029
|
intentionality_check: {
|
|
4273
5030
|
name: "Intentionality Check (Critter)",
|
|
4274
|
-
description: "
|
|
5031
|
+
description: "Articulate why and who before acting, then proceed",
|
|
4275
5032
|
steps: [
|
|
4276
5033
|
{ tool: "critter_check", action: "Answer: Why are you doing this? Who is it for? Score your intentionality" },
|
|
4277
5034
|
{ tool: "save_session_note", action: "Persist the critter check so it survives context compaction" },
|
|
@@ -4280,7 +5037,7 @@ export const WORKFLOW_CHAINS = {
|
|
|
4280
5037
|
},
|
|
4281
5038
|
research_digest: {
|
|
4282
5039
|
name: "Automated Research Digest",
|
|
4283
|
-
description: "Subscribe to RSS/Atom feeds,
|
|
5040
|
+
description: "Subscribe to RSS/Atom feeds, build digest, email it",
|
|
4284
5041
|
steps: [
|
|
4285
5042
|
{ tool: "add_rss_source", action: "Register RSS/Atom feed URLs for topics of interest (arXiv, blogs, news)" },
|
|
4286
5043
|
{ tool: "fetch_rss_feeds", action: "Pull latest articles from all registered sources — new items stored in SQLite" },
|
|
@@ -4292,7 +5049,7 @@ export const WORKFLOW_CHAINS = {
|
|
|
4292
5049
|
},
|
|
4293
5050
|
email_assistant: {
|
|
4294
5051
|
name: "Email Draft Assistant",
|
|
4295
|
-
description: "Read inbox, draft
|
|
5052
|
+
description: "Read inbox, draft replies, review, and send via agent",
|
|
4296
5053
|
steps: [
|
|
4297
5054
|
{ tool: "read_emails", action: "Fetch recent/unread emails from IMAP inbox to understand what needs attention" },
|
|
4298
5055
|
{ tool: "draft_email_reply", action: "Generate a professional reply draft from original email context and your instructions" },
|
|
@@ -4302,7 +5059,7 @@ export const WORKFLOW_CHAINS = {
|
|
|
4302
5059
|
},
|
|
4303
5060
|
webmcp_discovery: {
|
|
4304
5061
|
name: "WebMCP Origin Discovery",
|
|
4305
|
-
description: "Connect to
|
|
5062
|
+
description: "Connect to WebMCP origin, discover and invoke tools",
|
|
4306
5063
|
steps: [
|
|
4307
5064
|
{ tool: "connect_webmcp_origin", action: "Connect to the target origin URL and establish a WebMCP session" },
|
|
4308
5065
|
{ tool: "list_webmcp_tools", action: "List all tools exposed by the origin with schemas and annotations" },
|
|
@@ -4312,7 +5069,7 @@ export const WORKFLOW_CHAINS = {
|
|
|
4312
5069
|
},
|
|
4313
5070
|
batch_autopilot: {
|
|
4314
5071
|
name: "Batch Autopilot Run",
|
|
4315
|
-
description: "Set up
|
|
5072
|
+
description: "Set up operator profile and run batch autopilot session",
|
|
4316
5073
|
steps: [
|
|
4317
5074
|
{ tool: "setup_operator_profile", action: "Create or update USER.md and operator profile for autopilot context" },
|
|
4318
5075
|
{ tool: "get_autopilot_status", action: "Check current autopilot readiness, profile completeness, and last run status" },
|
|
@@ -4323,7 +5080,7 @@ export const WORKFLOW_CHAINS = {
|
|
|
4323
5080
|
},
|
|
4324
5081
|
daily_review: {
|
|
4325
5082
|
name: "Daily Brief Review",
|
|
4326
|
-
description: "Pull
|
|
5083
|
+
description: "Pull daily brief, review narratives, check ops dashboard",
|
|
4327
5084
|
steps: [
|
|
4328
5085
|
{ tool: "sync_daily_brief", action: "Pull today's brief and narrative from Convex into local SQLite" },
|
|
4329
5086
|
{ tool: "get_daily_brief_summary", action: "Get the full brief summary with key signals and insights" },
|
|
@@ -4334,7 +5091,7 @@ export const WORKFLOW_CHAINS = {
|
|
|
4334
5091
|
},
|
|
4335
5092
|
deep_interaction: {
|
|
4336
5093
|
name: "Deep Interaction Discovery & Capture",
|
|
4337
|
-
description: "
|
|
5094
|
+
description: "Discover, capture, and verify interactive UI behaviors",
|
|
4338
5095
|
steps: [
|
|
4339
5096
|
{ tool: "dive_auto_discover", action: "Auto-discover interactive components (buttons, drawers, modals, expandable rows) across all routes" },
|
|
4340
5097
|
{ tool: "start_ui_dive", action: "Start a structured UI dive session to track interaction coverage" },
|
|
@@ -4350,7 +5107,7 @@ export const WORKFLOW_CHAINS = {
|
|
|
4350
5107
|
},
|
|
4351
5108
|
gemini_qa: {
|
|
4352
5109
|
name: "Gemini Vision QA Loop",
|
|
4353
|
-
description: "
|
|
5110
|
+
description: "Gemini vision QA loop: capture, score, fix, repeat",
|
|
4354
5111
|
steps: [
|
|
4355
5112
|
{ tool: "check_mcp_setup", action: "Verify Gemini API key (GOOGLE_AI_KEY) and vision domain are ready" },
|
|
4356
5113
|
{ tool: "start_verification_cycle", action: "Open a verification cycle titled 'Gemini QA Loop' to track progress" },
|
|
@@ -4363,9 +5120,53 @@ export const WORKFLOW_CHAINS = {
|
|
|
4363
5120
|
{ tool: "record_learning", action: "Record QA trajectory and Gemini finding patterns for regression tracking" },
|
|
4364
5121
|
],
|
|
4365
5122
|
},
|
|
5123
|
+
six_hour_qa: {
|
|
5124
|
+
name: "6-Hour Comprehensive QA Workflow",
|
|
5125
|
+
description: "9-phase automated pipeline covering all 39 routes, 18 interaction scenarios (before/during/after captures), 12 animation-critical routes (SSIM burst analysis), 6 screenshot variants (dark/light × desktop/mobile × normal/reduced-motion), 15 Jony Ive aesthetic criteria, Gemini Vision dogfood, 10 agent eval scenarios via LLM judge, learning loop, and final verdict synthesis. Parallelized in batches of 6 concurrent routes.",
|
|
5126
|
+
steps: [
|
|
5127
|
+
{ tool: "start_verification_cycle", action: "Phase 1 SETUP: Create root QA session, run vite build + tsc --noEmit + vitest, capture baseline test counts and screenshot manifest" },
|
|
5128
|
+
{ tool: "run_closed_loop", action: "Phase 1 SETUP: Verify build compiles, zero type errors, all tests pass — establish baseline metrics" },
|
|
5129
|
+
{ tool: "get_gate_preset", action: "Phase 2 APP_QA: Load a11y gate (12 WCAG 2.1 AA rules) — ARIA, contrast, keyboard, focus, skip-link, tab-order, touch-targets" },
|
|
5130
|
+
{ tool: "run_quality_gate", action: "Phase 2 APP_QA: Run a11y + visual_regression + code_review + ui_ux_qa + performance gates on all 39 routes (batched ×6 parallel)" },
|
|
5131
|
+
{ tool: "capture_ui_screenshot", action: "Phase 3 INTERACTIONS: Capture BEFORE state for 18 interaction scenarios (command palette, sidebar hover, tab switch, entity search, etc.)" },
|
|
5132
|
+
{ tool: "run_visual_qa_suite", action: "Phase 3 INTERACTIONS: Trigger each interaction, capture DURING state (tooltip visible, modal open, thread expanding), wait settle delay" },
|
|
5133
|
+
{ tool: "diff_screenshots", action: "Phase 3 INTERACTIONS: Capture AFTER state (settled, restored), diff BEFORE→AFTER to verify clean state restoration" },
|
|
5134
|
+
{ tool: "run_visual_qa_suite", action: "Phase 4 ANIMATION: Burst capture 12 animation-critical routes (10-15 frames each, 40-100ms interval), compute SSIM stability scores" },
|
|
5135
|
+
{ tool: "compute_web_stability", action: "Phase 4 ANIMATION: Verify no jank frames (SSIM>threshold), effective FPS>30, frame delta variance<2× median per route" },
|
|
5136
|
+
{ tool: "run_visual_qa_suite", action: "Phase 4 ANIMATION: Re-test all 12 routes with prefers-reduced-motion:reduce — SSIM must be >0.98 (near-static)" },
|
|
5137
|
+
{ tool: "analyze_screenshot", action: "Phase 5 AESTHETIC: Gemini Vision Pro review of 39 routes × 4 variants — 15 Jony Ive criteria (earned complexity, visual hierarchy, spacing, typography, color harmony, alignment, whitespace, icons, loading elegance, empty states, mobile adaptation, dark mode refinement, animation purpose, focus states, error states)" },
|
|
5138
|
+
{ tool: "save_session_note", action: "Phase 6 DOGFOOD: Trigger Gemini Vision dogfood QA (screenshotQa + videoQa) on 6 screenshot variants, compute score (100 - P0×10 - P1×6 - P2×2 - P3×1)" },
|
|
5139
|
+
{ tool: "start_eval_run", action: "Phase 7 AGENT_EVAL: Create eval suite with 10 agent scenarios (research thesis, DD verify, QA bug, contract compliance, workflow chain, discovery, evidence gathering, cross-check, multi-agent coordination, error recovery)" },
|
|
5140
|
+
{ tool: "save_session_note", action: "Phase 7 AGENT_EVAL: Execute each scenario, grade with LLM judge (8 boolean criteria), record evalResults with per-scenario reasoning" },
|
|
5141
|
+
{ tool: "complete_eval_run", action: "Phase 7 AGENT_EVAL: Finalize eval run — pass rate, critical criteria check (noHallucination + noForbiddenActions), failure patterns" },
|
|
5142
|
+
{ tool: "compare_eval_runs", action: "Phase 7 AGENT_EVAL: Compare against baseline — DEPLOY/REVERT/INVESTIGATE recommendation" },
|
|
5143
|
+
{ tool: "get_improvement_recommendations", action: "Phase 8 LEARNING: Extract failure patterns from all 9 phases — gate failures, interaction mismatches, jank, aesthetic violations, agent failures" },
|
|
5144
|
+
{ tool: "record_learning", action: "Phase 8 LEARNING: 5-whys root cause → targeted fix → re-eval → compare. Bank edge cases for regression prevention" },
|
|
5145
|
+
{ tool: "save_session_note", action: "Phase 9 SYNTHESIS: Cross-check all evidence, compute final verdict (verified/provisionally_verified/needs_review/failed), generate proof pack with coverage: 39 routes × 6 variants × 18 interactions × 12 animation routes" },
|
|
5146
|
+
],
|
|
5147
|
+
},
|
|
5148
|
+
comprehensive_qa: {
|
|
5149
|
+
name: "Comprehensive QA Suite",
|
|
5150
|
+
description: "Full QA pipeline: accessibility audit, visual regression, code review, deploy readiness, and verdict derivation",
|
|
5151
|
+
steps: [
|
|
5152
|
+
{ tool: "start_verification_cycle", action: "Open a QA verification cycle to track all checks in one auditable run" },
|
|
5153
|
+
{ tool: "get_gate_preset", action: "Load the a11y gate preset — 8 WCAG 2.1 AA rules for accessibility compliance" },
|
|
5154
|
+
{ tool: "run_quality_gate", action: "Run the a11y gate against changed components — check ARIA, contrast, keyboard, focus, motion, forms, landmarks" },
|
|
5155
|
+
{ tool: "get_gate_preset", action: "Load the visual_regression gate — 6 rules for baseline comparison, layout shift, responsive, dark/light" },
|
|
5156
|
+
{ tool: "run_quality_gate", action: "Run the visual_regression gate — compare screenshots against baselines at 3 viewports" },
|
|
5157
|
+
{ tool: "get_gate_preset", action: "Load the code_review gate — compile, lint, tests, secrets, error handling, patterns, regression test" },
|
|
5158
|
+
{ tool: "run_quality_gate", action: "Run the code_review gate against all changed files" },
|
|
5159
|
+
{ tool: "run_closed_loop", action: "Execute compile→lint→test→debug closed loop until full green" },
|
|
5160
|
+
{ tool: "get_gate_preset", action: "Load deploy_readiness gate — all tests, no critical gaps, eval scores, learnings, no TODOs" },
|
|
5161
|
+
{ tool: "run_quality_gate", action: "Run deploy_readiness gate to confirm the change is ready to ship" },
|
|
5162
|
+
{ tool: "log_test_result", action: "Record the full QA suite result with layer=integration and all gate scores" },
|
|
5163
|
+
{ tool: "record_learning", action: "Bank QA findings, edge cases, and accessibility patterns for future runs" },
|
|
5164
|
+
{ tool: "save_session_note", action: "Save traceability note linking this QA run to the original request, with citedFrom reference" },
|
|
5165
|
+
],
|
|
5166
|
+
},
|
|
4366
5167
|
content_pipeline: {
|
|
4367
5168
|
name: "Daily Content Pipeline",
|
|
4368
|
-
description: "
|
|
5169
|
+
description: "Gather signals, build digest, generate 3-post thread, publish",
|
|
4369
5170
|
steps: [
|
|
4370
5171
|
{ tool: "fetch_rss_feeds", action: "Pull latest articles from all registered RSS/Atom sources — new items stored in SQLite" },
|
|
4371
5172
|
{ tool: "web_search", action: "Search for breaking developments in target topics (AI, infrastructure, security) to supplement RSS" },
|
|
@@ -4380,7 +5181,7 @@ export const WORKFLOW_CHAINS = {
|
|
|
4380
5181
|
},
|
|
4381
5182
|
content_publish: {
|
|
4382
5183
|
name: "Content Publish & Distribute",
|
|
4383
|
-
description: "
|
|
5184
|
+
description: "Distribute content across email, LinkedIn, and archive",
|
|
4384
5185
|
steps: [
|
|
4385
5186
|
{ tool: "search_all_knowledge", action: "Load the latest content pipeline output from session notes or knowledge base" },
|
|
4386
5187
|
{ tool: "call_llm", action: "Format content for target platform (LinkedIn character limits, email HTML, markdown archive)" },
|
|
@@ -4392,7 +5193,7 @@ export const WORKFLOW_CHAINS = {
|
|
|
4392
5193
|
},
|
|
4393
5194
|
agent_traversal: {
|
|
4394
5195
|
name: "Agent Frontend Traversal",
|
|
4395
|
-
description: "Navigate
|
|
5196
|
+
description: "Navigate frontend views, invoke per-view tools, traverse feeds",
|
|
4396
5197
|
steps: [
|
|
4397
5198
|
{ tool: "list_available_views", action: "Discover all 27 views with capabilities and available tools" },
|
|
4398
5199
|
{ tool: "get_traversal_plan", action: "Generate a goal-based traversal plan ranking views by relevance" },
|
|
@@ -4406,7 +5207,7 @@ export const WORKFLOW_CHAINS = {
|
|
|
4406
5207
|
},
|
|
4407
5208
|
research_optimizer: {
|
|
4408
5209
|
name: "Research Optimization Pipeline",
|
|
4409
|
-
description: "
|
|
5210
|
+
description: "Deep research, extract, score, and rank multi-attribute options",
|
|
4410
5211
|
steps: [
|
|
4411
5212
|
{ tool: "web_search", action: "Search for options and pricing across multiple queries (hotels, flights, products)" },
|
|
4412
5213
|
{ tool: "fetch_url", action: "Fetch detailed pages for top search results — extract pricing, reviews, specs" },
|
|
@@ -4419,7 +5220,7 @@ export const WORKFLOW_CHAINS = {
|
|
|
4419
5220
|
},
|
|
4420
5221
|
parallel_research: {
|
|
4421
5222
|
name: "Parallel Multi-Agent Research",
|
|
4422
|
-
description: "
|
|
5223
|
+
description: "Spawn parallel sub-agents for research, merge and score results",
|
|
4423
5224
|
steps: [
|
|
4424
5225
|
{ tool: "bootstrap_parallel_agents", action: "Scaffold parallel agent infrastructure — define roles for each research domain" },
|
|
4425
5226
|
{ tool: "claim_task", action: "Each sub-agent claims a research domain (pricing, reviews, logistics, availability)" },
|
|
@@ -4435,7 +5236,7 @@ export const WORKFLOW_CHAINS = {
|
|
|
4435
5236
|
},
|
|
4436
5237
|
competitive_intel: {
|
|
4437
5238
|
name: "Competitive Intelligence Pipeline",
|
|
4438
|
-
description: "Stealth-fetch competitor pages, extract
|
|
5239
|
+
description: "Stealth-fetch competitor pages, extract and rank data",
|
|
4439
5240
|
steps: [
|
|
4440
5241
|
{ tool: "web_search", action: "Identify competitor URLs and market landscape" },
|
|
4441
5242
|
{ tool: "scrapling_batch_fetch", action: "Stealth-fetch 5-10 competitor pages in parallel with anti-bot bypass" },
|
|
@@ -4448,7 +5249,7 @@ export const WORKFLOW_CHAINS = {
|
|
|
4448
5249
|
},
|
|
4449
5250
|
price_monitor: {
|
|
4450
5251
|
name: "Price Monitoring Pipeline",
|
|
4451
|
-
description: "Crawl product pages, track
|
|
5252
|
+
description: "Crawl product pages, track prices, alert on changes",
|
|
4452
5253
|
steps: [
|
|
4453
5254
|
{ tool: "scrapling_crawl", action: "Start multi-page crawl of product catalog or competitor pricing pages" },
|
|
4454
5255
|
{ tool: "scrapling_crawl_status", action: "Poll crawl progress and collect extracted items" },
|
|
@@ -4462,7 +5263,7 @@ export const WORKFLOW_CHAINS = {
|
|
|
4462
5263
|
},
|
|
4463
5264
|
thompson_protocol: {
|
|
4464
5265
|
name: "Thompson Protocol Content Pipeline",
|
|
4465
|
-
description: "Transform complex topics into
|
|
5266
|
+
description: "Transform complex topics into plain-English content via 4 agents",
|
|
4466
5267
|
steps: [
|
|
4467
5268
|
{ tool: "thompson_pipeline", action: "Initialize the full pipeline — generates execution plan with all agent prompts and handoff points" },
|
|
4468
5269
|
{ tool: "thompson_write", action: "Transform the complex topic into plain-English sections with jargon translations, analogies, and difficulty acknowledgments" },
|
|
@@ -4476,5 +5277,30 @@ export const WORKFLOW_CHAINS = {
|
|
|
4476
5277
|
{ tool: "record_learning", action: "Record which analogies, styles, and audience levels produced the best engagement" },
|
|
4477
5278
|
],
|
|
4478
5279
|
},
|
|
5280
|
+
system_observability: {
|
|
5281
|
+
name: "system_observability",
|
|
5282
|
+
description: "System health check, drift detection, and auto-maintenance",
|
|
5283
|
+
steps: [
|
|
5284
|
+
{ tool: "get_system_pulse", action: "Capture real-time health snapshot — DB, dashboards, errors, embedding cache, health score" },
|
|
5285
|
+
{ tool: "get_drift_report", action: "Detect configuration and state drift — orphaned cycles, stale runs, DB bloat, error spikes" },
|
|
5286
|
+
{ tool: "run_self_heal", action: "Auto-fix healable drift issues — abandoned cycles, stale runs, log pruning (use dry_run first)" },
|
|
5287
|
+
{ tool: "get_uptime_stats", action: "Review call rates, error trends, and top tools across time windows" },
|
|
5288
|
+
{ tool: "get_watchdog_log", action: "Check background watchdog history — health score trend, auto-healed actions" },
|
|
5289
|
+
{ tool: "save_session_note", action: "Record health findings and any manual interventions for future reference" },
|
|
5290
|
+
],
|
|
5291
|
+
},
|
|
5292
|
+
mission_execution: {
|
|
5293
|
+
name: "Mission Execution Harness",
|
|
5294
|
+
description: "Hierarchical Planner → Worker → Judge → Human Sniff-Check → Merge pipeline for verifiable work",
|
|
5295
|
+
steps: [
|
|
5296
|
+
{ tool: "plan_decompose_mission", action: "Decompose mission into subtasks with verifiability tiers, judge methods, retry budgets, and output contracts" },
|
|
5297
|
+
{ tool: "harness_get_mission_status", action: "Check execution board — which subtasks are pending, assigned, or blocked" },
|
|
5298
|
+
{ tool: "judge_verify_subtask", action: "Judge reviews subtask output against output contract — verdict + evidence + artifacts" },
|
|
5299
|
+
{ tool: "judge_request_retry", action: "If failed: retry (with new instructions), replan, escalate, or stop if unverifiable" },
|
|
5300
|
+
{ tool: "sniff_record_human_review", action: "Human sniff-check: pass / concern / block with issue tags (weak_evidence, unsupported_claim, etc.)" },
|
|
5301
|
+
{ tool: "merge_compose_output", action: "Judge-gated merge of passed subtask artifacts into composed output" },
|
|
5302
|
+
{ tool: "harness_get_mission_status", action: "Final traceability audit — receipts, evidence refs, decisions, verifications, diffs, approvals" },
|
|
5303
|
+
],
|
|
5304
|
+
},
|
|
4479
5305
|
};
|
|
4480
5306
|
//# sourceMappingURL=toolRegistry.js.map
|