evil-omo 3.17.0 → 3.17.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/README.ja.md +4 -0
  2. package/README.ko.md +4 -0
  3. package/README.ru.md +4 -0
  4. package/README.zh-cn.md +4 -0
  5. package/dist/cli/doctor/spawn-with-timeout.d.ts +8 -0
  6. package/dist/cli/index.js +1009 -643
  7. package/dist/cli/install-validators.d.ts +1 -0
  8. package/dist/cli/model-fallback-types.d.ts +1 -0
  9. package/dist/cli/provider-model-id-transform.d.ts +1 -1
  10. package/dist/cli/types.d.ts +3 -0
  11. package/dist/config/schema/agent-definitions.d.ts +3 -0
  12. package/dist/config/schema/evil-omo-config.d.ts +11 -2
  13. package/dist/create-tools.d.ts +2 -1
  14. package/dist/evil-omo.schema.json +24 -1
  15. package/dist/features/background-agent/index.d.ts +2 -0
  16. package/dist/features/background-agent/session-existence.d.ts +1 -1
  17. package/dist/features/background-agent/subagent-spawn-limits.d.ts +1 -1
  18. package/dist/features/background-agent/task-poller.d.ts +1 -0
  19. package/dist/features/background-agent/wait-for-task-session.d.ts +17 -0
  20. package/dist/features/builtin-commands/commands.d.ts +2 -1
  21. package/dist/features/claude-code-agent-loader/agent-definitions-loader.d.ts +3 -0
  22. package/dist/features/claude-code-agent-loader/index.d.ts +3 -0
  23. package/dist/features/claude-code-agent-loader/json-agent-loader.d.ts +2 -0
  24. package/dist/features/claude-code-agent-loader/loader.d.ts +2 -0
  25. package/dist/features/claude-code-agent-loader/opencode-config-agents-reader.d.ts +2 -0
  26. package/dist/features/claude-code-agent-loader/types.d.ts +9 -1
  27. package/dist/features/tool-metadata-store/index.d.ts +7 -0
  28. package/dist/features/tool-metadata-store/publish-tool-metadata.d.ts +9 -0
  29. package/dist/features/tool-metadata-store/recover-tool-metadata.d.ts +3 -0
  30. package/dist/features/tool-metadata-store/resolve-tool-call-id.d.ts +6 -0
  31. package/dist/features/tool-metadata-store/task-metadata-contract.d.ts +10 -0
  32. package/dist/hooks/atlas/system-reminder-templates.d.ts +2 -2
  33. package/dist/hooks/directory-agents-injector/hook.d.ts +7 -7
  34. package/dist/hooks/directory-readme-injector/hook.d.ts +7 -7
  35. package/dist/hooks/keyword-detector/ultrawork/planner.d.ts +1 -1
  36. package/dist/hooks/model-fallback/next-fallback.d.ts +8 -0
  37. package/dist/index.js +17457 -16688
  38. package/dist/openclaw/config.d.ts +1 -1
  39. package/dist/openclaw/dispatcher.d.ts +0 -1
  40. package/dist/openclaw/gateway-url-validation.d.ts +1 -0
  41. package/dist/shared/agent-tool-restrictions.d.ts +0 -5
  42. package/dist/shared/index.d.ts +3 -0
  43. package/dist/shared/opencode-provider-auth.d.ts +3 -0
  44. package/dist/shared/parse-tools-config.d.ts +6 -0
  45. package/dist/shared/resolve-agent-definition-paths.d.ts +1 -0
  46. package/dist/tools/call-omo-agent/agent-resolver.d.ts +17 -0
  47. package/dist/tools/call-omo-agent/constants.d.ts +1 -1
  48. package/dist/tools/delegate-task/delegated-model-config.d.ts +3 -0
  49. package/dist/tools/delegate-task/fallback-entry-resolution.d.ts +12 -0
  50. package/dist/tools/delegate-task/fallback-entry-settings.d.ts +7 -0
  51. package/dist/tools/delegate-task/subagent-discovery.d.ts +15 -0
  52. package/dist/tools/delegate-task/sync-task-fallback.d.ts +12 -0
  53. package/dist/tools/skill/constants.d.ts +1 -1
  54. package/package.json +12 -12
  55. package/dist/tools/delegate-task/resolve-call-id.d.ts +0 -2
@@ -30,4 +30,5 @@ export declare function detectedToInitialValues(detected: DetectedConfig): {
30
30
  zaiCodingPlan: BooleanArg;
31
31
  kimiForCoding: BooleanArg;
32
32
  opencodeGo: BooleanArg;
33
+ vercelAiGateway: BooleanArg;
33
34
  };
@@ -10,6 +10,7 @@ export interface ProviderAvailability {
10
10
  zai: boolean;
11
11
  kimiForCoding: boolean;
12
12
  opencodeGo: boolean;
13
+ vercelAiGateway: boolean;
13
14
  isMaxPlan: boolean;
14
15
  }
15
16
  export interface AgentConfig {
@@ -1 +1 @@
1
- export { transformModelForProvider } from "../shared/provider-model-id-transform";
1
+ export declare function transformModelForProvider(provider: string, model: string): string;
@@ -10,6 +10,7 @@ export interface InstallArgs {
10
10
  zaiCodingPlan?: BooleanArg;
11
11
  kimiForCoding?: BooleanArg;
12
12
  opencodeGo?: BooleanArg;
13
+ vercelAiGateway?: BooleanArg;
13
14
  skipAuth?: boolean;
14
15
  }
15
16
  export interface InstallConfig {
@@ -22,6 +23,7 @@ export interface InstallConfig {
22
23
  hasZaiCodingPlan: boolean;
23
24
  hasKimiForCoding: boolean;
24
25
  hasOpencodeGo: boolean;
26
+ hasVercelAiGateway: boolean;
25
27
  }
26
28
  export interface ConfigMergeResult {
27
29
  success: boolean;
@@ -40,4 +42,5 @@ export interface DetectedConfig {
40
42
  hasZaiCodingPlan: boolean;
41
43
  hasKimiForCoding: boolean;
42
44
  hasOpencodeGo: boolean;
45
+ hasVercelAiGateway: boolean;
43
46
  }
@@ -0,0 +1,3 @@
1
+ import { z } from "zod";
2
+ export declare const AgentDefinitionPathSchema: z.ZodString;
3
+ export declare const AgentDefinitionsConfigSchema: z.ZodOptional<z.ZodArray<z.ZodString>>;
@@ -3,9 +3,18 @@ export declare const OhMyOpenCodeConfigSchema: z.ZodObject<{
3
3
  $schema: z.ZodOptional<z.ZodString>;
4
4
  new_task_system_enabled: z.ZodOptional<z.ZodBoolean>;
5
5
  default_run_agent: z.ZodOptional<z.ZodString>;
6
+ agent_definitions: z.ZodOptional<z.ZodArray<z.ZodString>>;
6
7
  disabled_mcps: z.ZodOptional<z.ZodArray<z.ZodString>>;
7
8
  disabled_agents: z.ZodOptional<z.ZodArray<z.ZodString>>;
8
- disabled_skills: z.ZodOptional<z.ZodArray<z.ZodString>>;
9
+ disabled_skills: z.ZodOptional<z.ZodArray<z.ZodEnum<{
10
+ playwright: "playwright";
11
+ "agent-browser": "agent-browser";
12
+ "dev-browser": "dev-browser";
13
+ "frontend-ui-ux": "frontend-ui-ux";
14
+ "git-master": "git-master";
15
+ "review-work": "review-work";
16
+ "ai-slop-remover": "ai-slop-remover";
17
+ }>>>;
9
18
  disabled_hooks: z.ZodOptional<z.ZodArray<z.ZodString>>;
10
19
  disabled_commands: z.ZodOptional<z.ZodArray<z.ZodEnum<{
11
20
  "init-deep": "init-deep";
@@ -2043,7 +2052,7 @@ export declare const OhMyOpenCodeConfigSchema: z.ZodObject<{
2043
2052
  babysitting: z.ZodOptional<z.ZodObject<{
2044
2053
  timeout_ms: z.ZodDefault<z.ZodNumber>;
2045
2054
  }, z.core.$strip>>;
2046
- git_master: z.ZodOptional<z.ZodObject<{
2055
+ git_master: z.ZodDefault<z.ZodObject<{
2047
2056
  commit_footer: z.ZodDefault<z.ZodUnion<readonly [z.ZodBoolean, z.ZodString]>>;
2048
2057
  include_co_authored_by: z.ZodDefault<z.ZodBoolean>;
2049
2058
  git_env_prefix: z.ZodDefault<z.ZodString>;
@@ -4,7 +4,7 @@ import type { BrowserAutomationProvider } from "./config/schema/browser-automati
4
4
  import type { LoadedSkill } from "./features/opencode-skill-loader/types";
5
5
  import type { PluginContext, ToolsRecord } from "./plugin/types";
6
6
  import type { Managers } from "./create-managers";
7
- export type CreateToolsResult = {
7
+ type CreateToolsResult = {
8
8
  filteredTools: ToolsRecord;
9
9
  mergedSkills: LoadedSkill[];
10
10
  availableSkills: AvailableSkill[];
@@ -18,3 +18,4 @@ export declare function createTools(args: {
18
18
  pluginConfig: OhMyOpenCodeConfig;
19
19
  managers: Pick<Managers, "backgroundManager" | "tmuxSessionManager" | "skillMcpManager">;
20
20
  }): Promise<CreateToolsResult>;
21
+ export {};
@@ -14,6 +14,13 @@
14
14
  "default_run_agent": {
15
15
  "type": "string"
16
16
  },
17
+ "agent_definitions": {
18
+ "type": "array",
19
+ "items": {
20
+ "type": "string",
21
+ "minLength": 1
22
+ }
23
+ },
17
24
  "disabled_mcps": {
18
25
  "type": "array",
19
26
  "items": {
@@ -32,7 +39,15 @@
32
39
  "type": "array",
33
40
  "items": {
34
41
  "type": "string",
35
- "minLength": 1
42
+ "enum": [
43
+ "playwright",
44
+ "agent-browser",
45
+ "dev-browser",
46
+ "frontend-ui-ux",
47
+ "git-master",
48
+ "review-work",
49
+ "ai-slop-remover"
50
+ ]
36
51
  }
37
52
  },
38
53
  "disabled_hooks": {
@@ -5860,6 +5875,11 @@
5860
5875
  "additionalProperties": false
5861
5876
  },
5862
5877
  "git_master": {
5878
+ "default": {
5879
+ "commit_footer": true,
5880
+ "include_co_authored_by": true,
5881
+ "git_env_prefix": "GIT_MASTER=1"
5882
+ },
5863
5883
  "type": "object",
5864
5884
  "properties": {
5865
5885
  "commit_footer": {
@@ -6058,5 +6078,8 @@
6058
6078
  }
6059
6079
  }
6060
6080
  },
6081
+ "required": [
6082
+ "git_master"
6083
+ ],
6061
6084
  "additionalProperties": false
6062
6085
  }
@@ -1,2 +1,4 @@
1
1
  export * from "./types";
2
2
  export { BackgroundManager, type SubagentSessionCreatedEvent, type OnSubagentSessionCreated } from "./manager";
3
+ export { waitForTaskSessionID } from "./wait-for-task-session";
4
+ export type { WaitForTaskSessionIDOptions } from "./wait-for-task-session";
@@ -1,3 +1,3 @@
1
1
  import type { OpencodeClient } from "./opencode-client";
2
2
  export declare const MIN_SESSION_GONE_POLLS = 3;
3
- export declare function verifySessionExists(client: OpencodeClient, sessionID: string): Promise<boolean>;
3
+ export declare function verifySessionExists(client: OpencodeClient, sessionID: string, directory?: string): Promise<boolean>;
@@ -9,7 +9,7 @@ export interface SubagentSpawnContext {
9
9
  }
10
10
  export declare function getMaxSubagentDepth(config?: BackgroundTaskConfig): number;
11
11
  export declare function getMaxRootSessionSpawnBudget(config?: BackgroundTaskConfig): number;
12
- export declare function resolveSubagentSpawnContext(client: OpencodeClient, parentSessionID: string): Promise<SubagentSpawnContext>;
12
+ export declare function resolveSubagentSpawnContext(client: OpencodeClient, parentSessionID: string, directory?: string): Promise<SubagentSpawnContext>;
13
13
  export declare function createSubagentDepthLimitError(input: {
14
14
  childDepth: number;
15
15
  maxDepth: number;
@@ -14,6 +14,7 @@ export type SessionStatusMap = Record<string, {
14
14
  export declare function checkAndInterruptStaleTasks(args: {
15
15
  tasks: Iterable<BackgroundTask>;
16
16
  client: OpencodeClient;
17
+ directory?: string;
17
18
  config: BackgroundTaskConfig | undefined;
18
19
  concurrencyManager: ConcurrencyManager;
19
20
  notifyParentSession: (task: BackgroundTask) => Promise<void>;
@@ -0,0 +1,17 @@
1
+ import type { BackgroundTaskStatus } from "./types";
2
+ type AbortSignalLike = {
3
+ aborted: boolean;
4
+ };
5
+ interface TaskReader {
6
+ getTask(taskID: string): {
7
+ sessionID?: string;
8
+ status?: BackgroundTaskStatus;
9
+ } | undefined;
10
+ }
11
+ export interface WaitForTaskSessionIDOptions {
12
+ timeoutMs?: number;
13
+ intervalMs?: number;
14
+ signal?: AbortSignalLike;
15
+ }
16
+ export declare function waitForTaskSessionID(manager: TaskReader, taskID: string, options?: WaitForTaskSessionIDOptions): Promise<string | undefined>;
17
+ export {};
@@ -1,5 +1,6 @@
1
1
  import type { BuiltinCommandName, BuiltinCommands } from "./types";
2
- export interface LoadBuiltinCommandsOptions {
2
+ interface LoadBuiltinCommandsOptions {
3
3
  useRegisteredAgents?: boolean;
4
4
  }
5
5
  export declare function loadBuiltinCommands(disabledCommands?: BuiltinCommandName[], options?: LoadBuiltinCommandsOptions): BuiltinCommands;
6
+ export {};
@@ -0,0 +1,3 @@
1
+ import type { AgentScope, ClaudeCodeAgentConfig, LoadedAgent } from "./types";
2
+ export declare function parseMarkdownAgentFile(filePath: string, scope: AgentScope): LoadedAgent | null;
3
+ export declare function loadAgentDefinitions(paths: string[], scope: AgentScope): Record<string, ClaudeCodeAgentConfig>;
@@ -1,2 +1,5 @@
1
1
  export * from "./types";
2
2
  export * from "./loader";
3
+ export * from "./agent-definitions-loader";
4
+ export * from "./opencode-config-agents-reader";
5
+ export * from "./json-agent-loader";
@@ -0,0 +1,2 @@
1
+ import type { AgentScope, LoadedAgent } from "./types";
2
+ export declare function parseJsonAgentFile(filePath: string, scope: AgentScope): LoadedAgent | null;
@@ -1,3 +1,5 @@
1
1
  import type { ClaudeCodeAgentConfig } from "./types";
2
2
  export declare function loadUserAgents(): Record<string, ClaudeCodeAgentConfig>;
3
3
  export declare function loadProjectAgents(directory?: string): Record<string, ClaudeCodeAgentConfig>;
4
+ export declare function loadOpencodeGlobalAgents(): Record<string, ClaudeCodeAgentConfig>;
5
+ export declare function loadOpencodeProjectAgents(directory?: string): Record<string, ClaudeCodeAgentConfig>;
@@ -0,0 +1,2 @@
1
+ import type { ClaudeCodeAgentConfig } from "./types";
2
+ export declare function readOpencodeConfigAgents(directory: string): Record<string, ClaudeCodeAgentConfig>;
@@ -1,5 +1,5 @@
1
1
  import type { AgentConfig } from "@opencode-ai/sdk";
2
- export type AgentScope = "user" | "project";
2
+ export type AgentScope = "user" | "project" | "opencode" | "opencode-project" | "definition-file" | "opencode-config";
3
3
  export type ClaudeCodeAgentConfig = Omit<AgentConfig, "model"> & {
4
4
  model?: string | {
5
5
  providerID: string;
@@ -13,6 +13,14 @@ export interface AgentFrontmatter {
13
13
  tools?: string;
14
14
  mode?: "subagent" | "primary" | "all";
15
15
  }
16
+ export interface AgentJsonDefinition {
17
+ name: string;
18
+ description?: string;
19
+ model?: string;
20
+ tools?: string | string[];
21
+ mode?: "subagent" | "primary" | "all";
22
+ prompt: string;
23
+ }
16
24
  export interface LoadedAgent {
17
25
  name: string;
18
26
  path: string;
@@ -1,2 +1,9 @@
1
1
  export { clearPendingStore, consumeToolMetadata, getPendingStoreSize, storeToolMetadata, } from "./store";
2
2
  export type { PendingToolMetadata } from "./store";
3
+ export { resolveToolCallID } from "./resolve-tool-call-id";
4
+ export type { ToolCallIDCarrier } from "./resolve-tool-call-id";
5
+ export { buildTaskMetadataBlock, extractTaskLink, parseTaskMetadataBlock } from "./task-metadata-contract";
6
+ export type { TaskLink } from "./task-metadata-contract";
7
+ export { publishToolMetadata } from "./publish-tool-metadata";
8
+ export { recoverToolMetadata } from "./recover-tool-metadata";
9
+ export type { ToolMetadataPublisherContext } from "./publish-tool-metadata";
@@ -0,0 +1,9 @@
1
+ import { type ToolCallIDCarrier } from "./resolve-tool-call-id";
2
+ import { type PendingToolMetadata } from "./store";
3
+ export interface ToolMetadataPublisherContext extends ToolCallIDCarrier {
4
+ sessionID: string;
5
+ metadata?: (input: PendingToolMetadata) => void | Promise<void>;
6
+ }
7
+ export declare function publishToolMetadata(ctx: ToolMetadataPublisherContext, payload: PendingToolMetadata): Promise<{
8
+ stored: boolean;
9
+ }>;
@@ -0,0 +1,3 @@
1
+ import { type PendingToolMetadata } from "./store";
2
+ import { type ToolCallIDCarrier } from "./resolve-tool-call-id";
3
+ export declare function recoverToolMetadata(sessionID: string, source: ToolCallIDCarrier | string | undefined): PendingToolMetadata | undefined;
@@ -0,0 +1,6 @@
1
+ export interface ToolCallIDCarrier {
2
+ callID?: string;
3
+ callId?: string;
4
+ call_id?: string;
5
+ }
6
+ export declare function resolveToolCallID(ctx: ToolCallIDCarrier): string | undefined;
@@ -0,0 +1,10 @@
1
+ export interface TaskLink {
2
+ sessionId?: string;
3
+ taskId?: string;
4
+ backgroundTaskId?: string;
5
+ agent?: string;
6
+ category?: string;
7
+ }
8
+ export declare function buildTaskMetadataBlock(link: TaskLink): string;
9
+ export declare function parseTaskMetadataBlock(text: string): TaskLink;
10
+ export declare function extractTaskLink(metadata: unknown, outputText: string): TaskLink;
@@ -1,6 +1,6 @@
1
1
  export declare const DIRECT_WORK_REMINDER: string;
2
2
  export declare const BOULDER_CONTINUATION_PROMPT: string;
3
- export declare const VERIFICATION_REMINDER = "**THE SUBAGENT JUST CLAIMED THIS TASK IS DONE. THEY ARE PROBABLY LYING.**\n\nSubagents say \"done\" when code has errors, tests pass trivially, logic is wrong,\nor they quietly added features nobody asked for. This happens EVERY TIME.\nAssume the work is broken until YOU prove otherwise.\n\n---\n\n**PHASE 1: READ THE CODE FIRST (before running anything)**\n\nDo NOT run tests yet. Read the code FIRST so you know what you're testing.\n\n1. `Bash(\"git diff --stat\")` - see exactly which files changed. Any file outside expected scope = scope creep.\n2. `Read` EVERY changed file - no exceptions, no skimming.\n3. For EACH file, critically ask:\n - Does this code ACTUALLY do what the task required? (Re-read the task, compare line by line)\n - Any stubs, TODOs, placeholders, hardcoded values? (`Grep` for TODO, FIXME, HACK, xxx)\n - Logic errors? Trace the happy path AND the error path in your head.\n - Anti-patterns? (`Grep` for `as any`, `@ts-ignore`, empty catch, console.log in changed files)\n - Scope creep? Did the subagent touch things or add features NOT in the task spec?\n4. Cross-check every claim:\n - Said \"Updated X\" - READ X. Actually updated, or just superficially touched?\n - Said \"Added tests\" - READ the tests. Do they test REAL behavior or just `expect(true).toBe(true)`?\n - Said \"Follows patterns\" - OPEN a reference file. Does it ACTUALLY match?\n\n**If you cannot explain what every changed line does, you have NOT reviewed it.**\n\n**PHASE 2: RUN AUTOMATED CHECKS (targeted, then broad)**\n\nNow that you understand the code, verify mechanically:\n1. `lsp_diagnostics` on EACH changed file - ZERO new errors\n2. Run tests for changed modules FIRST, then full suite\n3. Build/typecheck - exit 0\n\nIf Phase 1 found issues but Phase 2 passes: Phase 2 is WRONG. The code has bugs that tests don't cover. Fix the code.\n\n**PHASE 3: HANDS-ON QA - ACTUALLY RUN IT (MANDATORY for user-facing changes)**\n\nTests and linters CANNOT catch: visual bugs, wrong CLI output, broken user flows, API response shape issues.\n\n**If this task produced anything a user would SEE or INTERACT with, you MUST launch it and verify yourself.**\n\n- **Frontend/UI**: `/playwright` skill - load the page, click through the flow, check console. Verify: page loads, interactions work, console clean, responsive.\n- **TUI/CLI**: `interactive_bash` - run the command, try good input, try bad input, try --help. Verify: command runs, output correct, error messages helpful, edge inputs handled.\n- **API/Backend**: `Bash` with curl - hit the endpoint, check response body, send malformed input. Verify: returns 200, body correct, error cases return proper errors.\n- **Config/Build**: Actually start the service or import the config. Verify: loads without error, backward compatible.\n\nThis is NOT optional \"if applicable\". If the deliverable is user-facing and you did not run it, you are shipping untested work.\n\n**PHASE 4: GATE DECISION - Should you proceed to the next task?**\n\nAnswer honestly:\n1. Can I explain what EVERY changed line does? (If no - back to Phase 1)\n2. Did I SEE it work with my own eyes? (If user-facing and no - back to Phase 3)\n3. Am I confident nothing existing is broken? (If no - run broader tests)\n\nALL three must be YES. \"Probably\" = NO. \"I think so\" = NO. Investigate until CERTAIN.\n\n- **All 3 YES** - Proceed: mark task complete, move to next.\n- **Any NO** - Reject: resume session with `session_id`, fix the specific issue.\n- **Unsure** - Reject: \"unsure\" = \"no\". Investigate until you have a definitive answer.\n\n**DO NOT proceed to the next task until all 4 phases are complete and the gate passes.**";
4
- export declare const VERIFICATION_REMINDER_GEMINI = "**THE SUBAGENT HAS FINISHED. THEIR WORK IS EXTREMELY SUSPICIOUS.**\n\nThe subagent CLAIMS this task is done. Based on thousands of executions, subagent claims are FALSE more often than true.\nThey ROUTINELY:\n- Ship code with syntax errors they didn't bother to check\n- Create stub implementations with TODOs and call it \"done\"\n- Write tests that pass trivially (testing nothing meaningful)\n- Implement logic that does NOT match what was requested\n- Add features nobody asked for and call it \"improvement\"\n- Report \"all tests pass\" when they didn't run any tests\n\n**This is NOT a theoretical warning. This WILL happen on this task. Assume the work is BROKEN.**\n\n**YOU MUST VERIFY WITH ACTUAL TOOL CALLS. NOT REASONING. TOOL CALLS.**\nThinking \"it looks correct\" is NOT verification. Running `lsp_diagnostics` IS.\n\n---\n\n**PHASE 1: READ THE CODE FIRST (DO NOT SKIP - DO NOT RUN TESTS YET)**\n\nRead the code FIRST so you know what you're testing.\n\n1. `Bash(\"git diff --stat\")` - see exactly which files changed.\n2. `Read` EVERY changed file - no exceptions, no skimming.\n3. For EACH file:\n - Does this code ACTUALLY do what the task required? RE-READ the task spec.\n - Any stubs, TODOs, placeholders? `Grep` for TODO, FIXME, HACK, xxx\n - Anti-patterns? `Grep` for `as any`, `@ts-ignore`, empty catch\n - Scope creep? Did the subagent add things NOT in the task spec?\n4. Cross-check EVERY claim against actual code.\n\n**If you cannot explain what every changed line does, GO BACK AND READ AGAIN.**\n\n**PHASE 2: RUN AUTOMATED CHECKS**\n\n1. `lsp_diagnostics` on EACH changed file - ZERO new errors. ACTUALLY RUN THIS.\n2. Run tests for changed modules, then full suite. ACTUALLY RUN THESE.\n3. Build/typecheck - exit 0.\n\nIf Phase 1 found issues but Phase 2 passes: Phase 2 is WRONG. Fix the code.\n\n**PHASE 3: HANDS-ON QA (MANDATORY for user-facing changes)**\n\n- **Frontend/UI**: `/playwright`\n- **TUI/CLI**: `interactive_bash`\n- **API/Backend**: `Bash` with curl\n\n**If user-facing and you did not run it, you are shipping UNTESTED BROKEN work.**\n\n**PHASE 4: GATE DECISION**\n\n1. Can I explain what EVERY changed line does? (If no \u2192 Phase 1)\n2. Did I SEE it work via tool calls? (If user-facing and no \u2192 Phase 3)\n3. Am I confident nothing is broken? (If no \u2192 broader tests)\n\nALL three must be YES. \"Probably\" = NO. \"I think so\" = NO.\n\n**DO NOT proceed to the next task until all 4 phases are complete.**";
3
+ export declare const VERIFICATION_REMINDER = "**THE SUBAGENT JUST CLAIMED THIS TASK IS DONE. THEY ARE PROBABLY LYING.**\n\nSubagents say \"done\" when code has errors, tests pass trivially, logic is wrong,\nor they quietly added features nobody asked for. This happens EVERY TIME.\nAssume the work is broken until YOU prove otherwise.\n\n---\n\n**PHASE 1: READ THE CODE FIRST (before running anything)**\n\nDo NOT run tests yet. Read the code FIRST so you know what you're testing.\n\n1. `Bash(\"git diff --stat -- ':!node_modules'\")` - see exactly which files changed. Any file outside expected scope = scope creep.\n2. `Read` EVERY changed file - no exceptions, no skimming.\n3. For EACH file, critically ask:\n - Does this code ACTUALLY do what the task required? (Re-read the task, compare line by line)\n - Any stubs, TODOs, placeholders, hardcoded values? (`Grep` for TODO, FIXME, HACK, xxx)\n - Logic errors? Trace the happy path AND the error path in your head.\n - Anti-patterns? (`Grep` for `as any`, `@ts-ignore`, empty catch, console.log in changed files)\n - Scope creep? Did the subagent touch things or add features NOT in the task spec?\n4. Cross-check every claim:\n - Said \"Updated X\" - READ X. Actually updated, or just superficially touched?\n - Said \"Added tests\" - READ the tests. Do they test REAL behavior or just `expect(true).toBe(true)`?\n - Said \"Follows patterns\" - OPEN a reference file. Does it ACTUALLY match?\n\n**If you cannot explain what every changed line does, you have NOT reviewed it.**\n\n**PHASE 2: RUN AUTOMATED CHECKS (targeted, then broad)**\n\nNow that you understand the code, verify mechanically:\n1. `lsp_diagnostics` on EACH changed file - ZERO new errors\n2. Run tests for changed modules FIRST, then full suite\n3. Build/typecheck - exit 0\n\nIf Phase 1 found issues but Phase 2 passes: Phase 2 is WRONG. The code has bugs that tests don't cover. Fix the code.\n\n**PHASE 3: HANDS-ON QA - ACTUALLY RUN IT (MANDATORY for user-facing changes)**\n\nTests and linters CANNOT catch: visual bugs, wrong CLI output, broken user flows, API response shape issues.\n\n**If this task produced anything a user would SEE or INTERACT with, you MUST launch it and verify yourself.**\n\n- **Frontend/UI**: `/playwright` skill - load the page, click through the flow, check console. Verify: page loads, interactions work, console clean, responsive.\n- **TUI/CLI**: `interactive_bash` - run the command, try good input, try bad input, try --help. Verify: command runs, output correct, error messages helpful, edge inputs handled.\n- **API/Backend**: `Bash` with curl - hit the endpoint, check response body, send malformed input. Verify: returns 200, body correct, error cases return proper errors.\n- **Config/Build**: Actually start the service or import the config. Verify: loads without error, backward compatible.\n\nThis is NOT optional \"if applicable\". If the deliverable is user-facing and you did not run it, you are shipping untested work.\n\n**PHASE 4: GATE DECISION - Should you proceed to the next task?**\n\nAnswer honestly:\n1. Can I explain what EVERY changed line does? (If no - back to Phase 1)\n2. Did I SEE it work with my own eyes? (If user-facing and no - back to Phase 3)\n3. Am I confident nothing existing is broken? (If no - run broader tests)\n\nALL three must be YES. \"Probably\" = NO. \"I think so\" = NO. Investigate until CERTAIN.\n\n- **All 3 YES** - Proceed: mark task complete, move to next.\n- **Any NO** - Reject: resume session with `session_id`, fix the specific issue.\n- **Unsure** - Reject: \"unsure\" = \"no\". Investigate until you have a definitive answer.\n\n**DO NOT proceed to the next task until all 4 phases are complete and the gate passes.**";
4
+ export declare const VERIFICATION_REMINDER_GEMINI = "**THE SUBAGENT HAS FINISHED. THEIR WORK IS EXTREMELY SUSPICIOUS.**\n\nThe subagent CLAIMS this task is done. Based on thousands of executions, subagent claims are FALSE more often than true.\nThey ROUTINELY:\n- Ship code with syntax errors they didn't bother to check\n- Create stub implementations with TODOs and call it \"done\"\n- Write tests that pass trivially (testing nothing meaningful)\n- Implement logic that does NOT match what was requested\n- Add features nobody asked for and call it \"improvement\"\n- Report \"all tests pass\" when they didn't run any tests\n\n**This is NOT a theoretical warning. This WILL happen on this task. Assume the work is BROKEN.**\n\n**YOU MUST VERIFY WITH ACTUAL TOOL CALLS. NOT REASONING. TOOL CALLS.**\nThinking \"it looks correct\" is NOT verification. Running `lsp_diagnostics` IS.\n\n---\n\n**PHASE 1: READ THE CODE FIRST (DO NOT SKIP - DO NOT RUN TESTS YET)**\n\nRead the code FIRST so you know what you're testing.\n\n1. `Bash(\"git diff --stat -- ':!node_modules'\")` - see exactly which files changed.\n2. `Read` EVERY changed file - no exceptions, no skimming.\n3. For EACH file:\n - Does this code ACTUALLY do what the task required? RE-READ the task spec.\n - Any stubs, TODOs, placeholders? `Grep` for TODO, FIXME, HACK, xxx\n - Anti-patterns? `Grep` for `as any`, `@ts-ignore`, empty catch\n - Scope creep? Did the subagent add things NOT in the task spec?\n4. Cross-check EVERY claim against actual code.\n\n**If you cannot explain what every changed line does, GO BACK AND READ AGAIN.**\n\n**PHASE 2: RUN AUTOMATED CHECKS**\n\n1. `lsp_diagnostics` on EACH changed file - ZERO new errors. ACTUALLY RUN THIS.\n2. Run tests for changed modules, then full suite. ACTUALLY RUN THESE.\n3. Build/typecheck - exit 0.\n\nIf Phase 1 found issues but Phase 2 passes: Phase 2 is WRONG. Fix the code.\n\n**PHASE 3: HANDS-ON QA (MANDATORY for user-facing changes)**\n\n- **Frontend/UI**: `/playwright`\n- **TUI/CLI**: `interactive_bash`\n- **API/Backend**: `Bash` with curl\n\n**If user-facing and you did not run it, you are shipping UNTESTED BROKEN work.**\n\n**PHASE 4: GATE DECISION**\n\n1. Can I explain what EVERY changed line does? (If no \u2192 Phase 1)\n2. Did I SEE it work via tool calls? (If user-facing and no \u2192 Phase 3)\n3. Am I confident nothing is broken? (If no \u2192 broader tests)\n\nALL three must be YES. \"Probably\" = NO. \"I think so\" = NO.\n\n**DO NOT proceed to the next task until all 4 phases are complete.**";
5
5
  export declare const ORCHESTRATOR_DELEGATION_REQUIRED: string;
6
6
  export declare const SINGLE_TASK_DIRECTIVE: string;
@@ -9,8 +9,12 @@ interface ToolExecuteOutput {
9
9
  output: string;
10
10
  metadata: unknown;
11
11
  }
12
- interface ToolExecuteBeforeOutput {
13
- args: unknown;
12
+ interface DirectoryAgentsInjectorHook {
13
+ "tool.execute.before"?: (input: ToolExecuteInput, output: {
14
+ args: unknown;
15
+ }) => Promise<void>;
16
+ "tool.execute.after": (input: ToolExecuteInput, output: ToolExecuteOutput) => Promise<void>;
17
+ event: (input: EventInput) => Promise<void>;
14
18
  }
15
19
  interface EventInput {
16
20
  event: {
@@ -20,9 +24,5 @@ interface EventInput {
20
24
  }
21
25
  export declare function createDirectoryAgentsInjectorHook(ctx: PluginInput, modelCacheState?: {
22
26
  anthropicContext1MEnabled: boolean;
23
- }): {
24
- "tool.execute.before": (input: ToolExecuteInput, output: ToolExecuteBeforeOutput) => Promise<void>;
25
- "tool.execute.after": (input: ToolExecuteInput, output: ToolExecuteOutput) => Promise<void>;
26
- event: ({ event }: EventInput) => Promise<void>;
27
- };
27
+ }): DirectoryAgentsInjectorHook;
28
28
  export {};
@@ -9,8 +9,12 @@ interface ToolExecuteOutput {
9
9
  output: string;
10
10
  metadata: unknown;
11
11
  }
12
- interface ToolExecuteBeforeOutput {
13
- args: unknown;
12
+ interface DirectoryReadmeInjectorHook {
13
+ "tool.execute.before"?: (input: ToolExecuteInput, output: {
14
+ args: unknown;
15
+ }) => Promise<void>;
16
+ "tool.execute.after": (input: ToolExecuteInput, output: ToolExecuteOutput) => Promise<void>;
17
+ event: (input: EventInput) => Promise<void>;
14
18
  }
15
19
  interface EventInput {
16
20
  event: {
@@ -20,9 +24,5 @@ interface EventInput {
20
24
  }
21
25
  export declare function createDirectoryReadmeInjectorHook(ctx: PluginInput, modelCacheState?: {
22
26
  anthropicContext1MEnabled: boolean;
23
- }): {
24
- "tool.execute.before": (input: ToolExecuteInput, output: ToolExecuteBeforeOutput) => Promise<void>;
25
- "tool.execute.after": (input: ToolExecuteInput, output: ToolExecuteOutput) => Promise<void>;
26
- event: ({ event }: EventInput) => Promise<void>;
27
- };
27
+ }): DirectoryReadmeInjectorHook;
28
28
  export {};
@@ -2,5 +2,5 @@
2
2
  * Ultrawork message section for planner agents (Prometheus).
3
3
  * Planner agents should NOT be told to call plan agent - they ARE the planner.
4
4
  */
5
- export declare const ULTRAWORK_PLANNER_SECTION = "## CRITICAL: YOU ARE A PLANNER, NOT AN IMPLEMENTER\n\n**IDENTITY CONSTRAINT (NON-NEGOTIABLE):**\nYou ARE the planner. You ARE NOT an implementer. You DO NOT write code. You DO NOT execute tasks.\n\n**TOOL RESTRICTIONS (SYSTEM-ENFORCED):**\n| Tool | Allowed | Blocked |\n|------|---------|---------|\n| Write/Edit | `.sisyphus/**/*.md` ONLY | Everything else |\n| Read | All files | - |\n| Bash | Research commands only | Implementation commands |\n| task | explore, librarian | - |\n\n**IF YOU TRY TO WRITE/EDIT OUTSIDE `.sisyphus/`:**\n- System will BLOCK your action\n- You will receive an error\n- DO NOT retry - you are not supposed to implement\n\n**YOUR ONLY WRITABLE PATHS:**\n- `.sisyphus/plans/*.md` - Final work plans\n- `.sisyphus/drafts/*.md` - Working drafts during interview\n\n**WHEN USER ASKS YOU TO IMPLEMENT:**\nREFUSE. Say: \"I'm a planner. I create work plans, not implementations. Run `/start-work` after I finish planning.\"\n\n---\n\n## CONTEXT GATHERING (MANDATORY BEFORE PLANNING)\n\nYou ARE the planner. Your job: create bulletproof work plans.\n**Before drafting ANY plan, gather context via explore/librarian agents.**\n\n### Research Protocol\n1. **Fire parallel background agents** for comprehensive context:\n ```\n task(subagent_type=\"explore\", load_skills=[], prompt=\"Find existing patterns for [topic] in codebase\", run_in_background=true)\n task(subagent_type=\"explore\", load_skills=[], prompt=\"Find test infrastructure and conventions\", run_in_background=true)\n task(subagent_type=\"librarian\", load_skills=[], prompt=\"Find official docs and best practices for [technology]\", run_in_background=true)\n ```\n2. **Wait for results** before planning - rushed plans fail\n3. **Synthesize findings** into informed requirements\n\n### What to Research\n- Existing codebase patterns and conventions\n- Test infrastructure (TDD possible?)\n- External library APIs and constraints\n- Similar implementations in OSS (via librarian)\n\n**NEVER plan blind. Context first, plan second.**\n\n---\n\n## MANDATORY OUTPUT: PARALLEL TASK GRAPH + TODO LIST\n\n**YOUR PRIMARY OUTPUT IS A PARALLEL EXECUTION TASK GRAPH.**\n\nWhen you finalize a plan, you MUST structure it for maximum parallel execution:\n\n### 1. Parallel Execution Waves (REQUIRED)\n\nAnalyze task dependencies and group independent tasks into parallel waves:\n\n```\nWave 1 (Start Immediately - No Dependencies):\n\u251C\u2500\u2500 Task 1: [description] \u2192 category: X, skills: [a, b]\n\u2514\u2500\u2500 Task 4: [description] \u2192 category: Y, skills: [c]\n\nWave 2 (After Wave 1 Completes):\n\u251C\u2500\u2500 Task 2: [depends: 1] \u2192 category: X, skills: [a]\n\u251C\u2500\u2500 Task 3: [depends: 1] \u2192 category: Z, skills: [d]\n\u2514\u2500\u2500 Task 5: [depends: 4] \u2192 category: Y, skills: [c]\n\nWave 3 (After Wave 2 Completes):\n\u2514\u2500\u2500 Task 6: [depends: 2, 3] \u2192 category: X, skills: [a, b]\n\nCritical Path: Task 1 \u2192 Task 2 \u2192 Task 6\nEstimated Parallel Speedup: ~40% faster than sequential\n```\n\n### 2. Dependency Matrix (REQUIRED)\n\n| Task | Depends On | Blocks | Can Parallelize With |\n|------|------------|--------|---------------------|\n| 1 | None | 2, 3 | 4 |\n| 2 | 1 | 6 | 3, 5 |\n| 3 | 1 | 6 | 2, 5 |\n| 4 | None | 5 | 1 |\n| 5 | 4 | None | 2, 3 |\n| 6 | 2, 3 | None | None (final) |\n\n### 3. TODO List Structure (REQUIRED)\n\nEach TODO item MUST include:\n\n```markdown\n- [ ] N. [Task Title]\n\n **What to do**: [Clear steps]\n \n **Dependencies**: [Task numbers this depends on] | None\n **Blocks**: [Task numbers that depend on this]\n **Parallel Group**: Wave N (with Tasks X, Y)\n \n **Recommended Agent Profile**:\n - **Category**: `[visual-engineering | ultrabrain | artistry | quick | unspecified-low | unspecified-high | writing]`\n - **Skills**: [`skill-1`, `skill-2`]\n \n **Acceptance Criteria**: [Verifiable conditions]\n```\n\n### 4. Agent Dispatch Summary (REQUIRED)\n\n| Wave | Tasks | Dispatch Command |\n|------|-------|------------------|\n| 1 | 1, 4 | `task(category=\"...\", load_skills=[...], run_in_background=false)` \u00D7 2 |\n| 2 | 2, 3, 5 | `task(...)` \u00D7 3 after Wave 1 completes |\n| 3 | 6 | `task(...)` final integration |\n\n**WHY PARALLEL TASK GRAPH IS MANDATORY:**\n- Orchestrator (Sisyphus) executes tasks in parallel waves\n- Independent tasks run simultaneously via background agents\n- Proper dependency tracking prevents race conditions\n- Category + skills ensure optimal model routing per task";
5
+ export declare const ULTRAWORK_PLANNER_SECTION = "## CRITICAL: YOU ARE A PLANNER, NOT AN IMPLEMENTER\n\n**IDENTITY CONSTRAINT (NON-NEGOTIABLE):**\nYou ARE the planner. You ARE NOT an implementer. You DO NOT write code. You DO NOT execute tasks.\n\n**TOOL RESTRICTIONS (SYSTEM-ENFORCED):**\n| Tool | Allowed | Blocked |\n|------|---------|---------|\n| Write/Edit | `.sisyphus/**/*.md` ONLY | Everything else |\n| Read | All files | - |\n| Bash | Research commands only | Implementation commands |\n| task | explore, librarian | - |\n\n**IF YOU TRY TO WRITE/EDIT OUTSIDE `.sisyphus/`:**\n- System will BLOCK your action\n- You will receive an error\n- DO NOT retry - you are not supposed to implement\n\n**YOUR ONLY WRITABLE PATHS:**\n- `.sisyphus/plans/*.md` - Final work plans\n- `.sisyphus/drafts/*.md` - Working drafts during interview\n\n**WHEN USER ASKS YOU TO IMPLEMENT:**\nREFUSE. Say: \"I'm a planner. I create work plans, not implementations. Run `/start-work` after I finish planning.\"\n\n---\n\n## CONTEXT GATHERING (MANDATORY BEFORE PLANNING)\n\nYou ARE the planner. Your job: create bulletproof work plans.\n**Before drafting ANY plan, gather context via explore/librarian agents.**\n\n### Research Protocol\n1. **Fire parallel background agents** for comprehensive context:\n ```\n task(subagent_type=\"explore\", load_skills=[], prompt=\"Find existing patterns for [topic] in codebase\", run_in_background=true)\n task(subagent_type=\"explore\", load_skills=[], prompt=\"Find test infrastructure and conventions\", run_in_background=true)\n task(subagent_type=\"librarian\", load_skills=[], prompt=\"Find official docs and best practices for [technology]\", run_in_background=true)\n ```\n2. **Wait for results** before planning - rushed plans fail\n3. **Synthesize findings** into informed requirements\n\n### What to Research\n- Existing codebase patterns and conventions\n- Test infrastructure (TDD possible?)\n- External library APIs and constraints\n- Similar implementations in OSS (via librarian)\n\n**NEVER plan blind. Context first, plan second.**\n\n---\n\n## MANDATORY OUTPUT: PARALLEL TASK GRAPH + TODO LIST\n\n**YOUR PRIMARY OUTPUT IS A PARALLEL EXECUTION TASK GRAPH.**\n\nWhen you finalize a plan, you MUST structure it for maximum parallel execution:\n\n### 1. Parallel Execution Waves (REQUIRED)\n\nAnalyze task dependencies and group independent tasks into parallel waves:\n\n```\nWave 1 (Start Immediately - No Dependencies):\n\u251C\u2500\u2500 Task 1: [description] \u2192 category: X, skills: [a, b]\n\u2514\u2500\u2500 Task 4: [description] \u2192 category: Y, skills: [c]\n\nWave 2 (After Wave 1 Completes):\n\u251C\u2500\u2500 Task 2: [depends: 1] \u2192 category: X, skills: [a]\n\u251C\u2500\u2500 Task 3: [depends: 1] \u2192 category: Z, skills: [d]\n\u2514\u2500\u2500 Task 5: [depends: 4] \u2192 category: Y, skills: [c]\n\nWave 3 (After Wave 2 Completes):\n\u2514\u2500\u2500 Task 6: [depends: 2, 3] \u2192 category: X, skills: [a, b]\n\nCritical Path: Task 1 \u2192 Task 2 \u2192 Task 6\nEstimated Parallel Speedup: ~40% faster than sequential\n```\n\n### 2. Dependency Matrix (REQUIRED)\n\n| Task | Depends On | Blocks | Can Parallelize With |\n|------|------------|--------|---------------------|\n| 1 | None | 2, 3 | 4 |\n| 2 | 1 | 6 | 3, 5 |\n| 3 | 1 | 6 | 2, 5 |\n| 4 | None | 5 | 1 |\n| 5 | 4 | None | 2, 3 |\n| 6 | 2, 3 | None | None (final) |\n\n### 3. TODO List Structure (REQUIRED)\n\nEach TODO item MUST include:\n\n```markdown\n- [ ] N. [Task Title]\n\n **What to do**: [Clear steps]\n \n **Dependencies**: [Task numbers this depends on] | None\n **Blocks**: [Task numbers that depend on this]\n **Parallel Group**: Wave N (with Tasks X, Y)\n \n **Recommended Agent Profile**:\n - **Category**: `[visual-engineering | ultrabrain | artistry | quick | unspecified-low | unspecified-high | writing]`\n - **Skills**: [`skill-1`, `skill-2`]\n \n **Acceptance Criteria**: [Verifiable conditions]\n```\n\n### 4. Agent Dispatch Summary (REQUIRED)\n\n| Wave | Tasks | Dispatch Command |\n|------|-------|------------------|\n| 1 | 1, 4 | `task(category=\"...\", load_skills=[...], run_in_background=true)` \u00D7 2 |\n| 2 | 2, 3, 5 | `task(...)` \u00D7 3 after Wave 1 completes |\n| 3 | 6 | `task(...)` final integration |\n\n**WHY PARALLEL TASK GRAPH IS MANDATORY:**\n- Orchestrator (Sisyphus) executes tasks in parallel waves\n- Independent tasks run simultaneously via background agents\n- Proper dependency tracking prevents race conditions\n- Category + skills ensure optimal model routing per task";
6
6
  export declare function getPlannerUltraworkMessage(): string;
@@ -3,4 +3,12 @@ export declare function getNextReachableFallback(sessionID: string, state: Model
3
3
  providerID: string;
4
4
  modelID: string;
5
5
  variant?: string;
6
+ reasoningEffort?: string;
7
+ temperature?: number;
8
+ top_p?: number;
9
+ maxTokens?: number;
10
+ thinking?: {
11
+ type: "enabled" | "disabled";
12
+ budgetTokens?: number;
13
+ };
6
14
  } | null;