goatchain 0.0.24 → 0.0.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +247 -72
- package/dist/agent/agent.d.ts +4 -4
- package/dist/agent/hooks/index.d.ts +1 -1
- package/dist/agent/hooks/manager.d.ts +10 -3
- package/dist/agent/hooks/types.d.ts +95 -8
- package/dist/agent/index.d.ts +3 -2
- package/dist/agent/middleware.d.ts +31 -2
- package/dist/agent/tokenCounter.d.ts +48 -2
- package/dist/agent/types.d.ts +15 -1
- package/dist/index.d.ts +12 -4
- package/dist/index.js +618 -467
- package/dist/lib/access-control/index.d.ts +1 -1
- package/dist/lib/access-control/policies.d.ts +10 -0
- package/dist/lib/session-blob.d.ts +50 -0
- package/dist/mcp-client/manager.d.ts +2 -0
- package/dist/middleware/attachmentMiddleware.d.ts +34 -0
- package/dist/middleware/commitModeMiddleware.d.ts +1 -1
- package/dist/middleware/contextCompressionMiddleware.d.ts +28 -2
- package/dist/middleware/envInfoMiddleware.d.ts +6 -0
- package/dist/middleware/gitUtils.d.ts +4 -0
- package/dist/middleware/longRunningMiddleware.d.ts +100 -0
- package/dist/middleware/parallelSubagentMiddleware.d.ts +2 -2
- package/dist/middleware/planModeMiddleware.d.ts +3 -3
- package/dist/middleware/reviewMiddleware.d.ts +1 -1
- package/dist/middleware/skillsMiddleware.d.ts +8 -0
- package/dist/model/anthropic/createAnthropicAdapter.d.ts +1 -1
- package/dist/model/codex/createCodexAdapter.d.ts +1 -1
- package/dist/model/index.d.ts +2 -0
- package/dist/model/openai/createOpenAIResponsesAdapter.d.ts +28 -0
- package/dist/session/completion/composite.d.ts +25 -0
- package/dist/session/completion/index.d.ts +8 -0
- package/dist/session/completion/strategies/reflection-decision-tool.d.ts +51 -0
- package/dist/session/completion/strategies/rule-based.d.ts +16 -0
- package/dist/session/completion/strategies/self-reflection.d.ts +54 -0
- package/dist/session/completion/strategies/todo-based.d.ts +17 -0
- package/dist/session/completion/types.d.ts +53 -0
- package/dist/session/executors/ToolExecutor.d.ts +4 -4
- package/dist/session/session.d.ts +9 -0
- package/dist/state/types.d.ts +3 -2
- package/dist/subagent/index.d.ts +1 -0
- package/dist/subagent/self-reflection-critic.d.ts +35 -0
- package/dist/tool/builtin/bash.d.ts +24 -0
- package/dist/tool/builtin/edit.d.ts +12 -0
- package/dist/tool/builtin/index.d.ts +2 -2
- package/dist/tool/builtin/pathProtection.d.ts +25 -0
- package/dist/tool/builtin/read.d.ts +69 -112
- package/dist/tool/builtin/task.d.ts +8 -0
- package/dist/tool/builtin/webFetch.d.ts +27 -4
- package/dist/tool/builtin/write.d.ts +15 -0
- package/dist/tool/index.d.ts +2 -2
- package/dist/types/common.d.ts +35 -0
- package/dist/types/event.d.ts +55 -3
- package/dist/types/snapshot.d.ts +1 -1
- package/package.json +4 -2
|
@@ -24,6 +24,6 @@
|
|
|
24
24
|
* ```
|
|
25
25
|
*/
|
|
26
26
|
export { ToolAccessController } from './controller';
|
|
27
|
-
export { EXPLORE_POLICY, GENERAL_PURPOSE_POLICY, PLAN_MODE_POLICY, } from './policies';
|
|
27
|
+
export { EXPLORE_POLICY, GENERAL_PURPOSE_POLICY, PLAN_MODE_POLICY, SELF_REFLECTION_POLICY, } from './policies';
|
|
28
28
|
export type { ToolAccessMode, ToolAccessPolicy, ToolValidationResult } from './types';
|
|
29
29
|
export { assertReadOnlyBashCommand, FORBIDDEN_PATTERNS, isReadOnlyBashCommand, READ_ONLY_COMMANDS, } from './validators';
|
|
@@ -38,3 +38,13 @@ export declare const EXPLORE_POLICY: ToolAccessPolicy;
|
|
|
38
38
|
* Full access to all tools except Task (to prevent infinite recursion).
|
|
39
39
|
*/
|
|
40
40
|
export declare const GENERAL_PURPOSE_POLICY: ToolAccessPolicy;
|
|
41
|
+
/**
|
|
42
|
+
* Self-Reflection Critic Subagent Policy
|
|
43
|
+
*
|
|
44
|
+
* Read-only access for verifying the main agent's work, plus the
|
|
45
|
+
* ReflectionDecision tool for recording the final verdict.
|
|
46
|
+
*
|
|
47
|
+
* Similar to EXPLORE_POLICY but also allows WebSearch, WebFetch,
|
|
48
|
+
* and the ReflectionDecision tool.
|
|
49
|
+
*/
|
|
50
|
+
export declare const SELF_REFLECTION_POLICY: ToolAccessPolicy;
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
export declare const IMAGE_CACHE_DIR = "image-cache";
|
|
2
|
+
export declare const BLOB_DIR = "blobs";
|
|
3
|
+
export declare const META_DIR = "meta";
|
|
4
|
+
export declare const BLOB_TTL_MS: number;
|
|
5
|
+
export declare const FILE_ID_PATTERN: RegExp;
|
|
6
|
+
export type SessionBlobWriteResult = Readonly<{
|
|
7
|
+
fileId: string;
|
|
8
|
+
filePath: string;
|
|
9
|
+
lineCount: number;
|
|
10
|
+
sha256: string;
|
|
11
|
+
byteSize: number;
|
|
12
|
+
}>;
|
|
13
|
+
export type BlobLogger = (event: string, payload?: unknown) => void;
|
|
14
|
+
export declare function resolveStateDir(): string;
|
|
15
|
+
export declare function resolveSessionCacheDir(sessionId: string): string;
|
|
16
|
+
export declare function resolveBlobPathFromFileId(fileId: string, sessionId: string): string;
|
|
17
|
+
export declare function normalizeSessionId(raw: unknown): string | null;
|
|
18
|
+
export declare function parseBlobPathInfo(absPath: string): {
|
|
19
|
+
sessionId: string;
|
|
20
|
+
fileId: string;
|
|
21
|
+
} | null;
|
|
22
|
+
export declare function writeSessionBlob(opts: Readonly<{
|
|
23
|
+
content: string;
|
|
24
|
+
sessionId: string;
|
|
25
|
+
source?: string;
|
|
26
|
+
logger?: BlobLogger;
|
|
27
|
+
}>): Promise<SessionBlobWriteResult | null>;
|
|
28
|
+
/**
|
|
29
|
+
* Write text to a session-scoped blob with automatic TTL cleanup.
|
|
30
|
+
* Returns null if content is empty, sessionId is invalid, or write fails.
|
|
31
|
+
*/
|
|
32
|
+
export declare function cacheTextToSessionBlob(opts: Readonly<{
|
|
33
|
+
content: string;
|
|
34
|
+
sessionId?: string;
|
|
35
|
+
source?: string;
|
|
36
|
+
logger?: BlobLogger;
|
|
37
|
+
}>): Promise<SessionBlobWriteResult | null>;
|
|
38
|
+
export declare function deleteSessionBlobByPath(absPath: string, logger?: BlobLogger): Promise<boolean>;
|
|
39
|
+
export declare function relocateSessionBlobPath(absPath: string, targetSessionId: string): Promise<string | null>;
|
|
40
|
+
export declare function readBlobMeta(sessionId: string, fileId: string): Promise<Record<string, unknown> | null>;
|
|
41
|
+
export declare function cleanupSessionCache(sessionId: string, logger?: BlobLogger): Promise<void>;
|
|
42
|
+
export declare function cleanupExpiredSessionCaches(opts?: Readonly<{
|
|
43
|
+
ttlMs?: number;
|
|
44
|
+
logger?: BlobLogger;
|
|
45
|
+
}>): Promise<void>;
|
|
46
|
+
/**
|
|
47
|
+
* Reset the internal cleanup timestamp. Only useful for tests.
|
|
48
|
+
* @internal
|
|
49
|
+
*/
|
|
50
|
+
export declare function _resetCleanupTimer(): void;
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import type { Middleware } from '../agent/middleware';
|
|
2
|
+
import type { AgentLoopState } from '../agent/types';
|
|
3
|
+
export interface AttachmentMiddlewareOptions {
|
|
4
|
+
/** Whether the model supports vision (images). Default: `false` */
|
|
5
|
+
supportsVision?: boolean;
|
|
6
|
+
/** Whether the model supports native PDFs / documents. Default: `false` */
|
|
7
|
+
supportsPdfs?: boolean;
|
|
8
|
+
}
|
|
9
|
+
/**
|
|
10
|
+
* Create a middleware that injects image/PDF attachment data into the
|
|
11
|
+
* conversation as user messages before each model call.
|
|
12
|
+
*
|
|
13
|
+
* **Design:**
|
|
14
|
+
*
|
|
15
|
+
* - The Read tool stores binary data in `structuredContent.attachments`.
|
|
16
|
+
* - The Session layer splits this into two metadata fields:
|
|
17
|
+
* - `_toolAttachmentRefs` (small, persisted in checkpoint)
|
|
18
|
+
* - `_toolAttachmentData` (large base64, transient / in-memory only)
|
|
19
|
+
* - This middleware, on each iteration:
|
|
20
|
+
* 1. Finds the **trailing unanswered** tool messages (no assistant response after them)
|
|
21
|
+
* 2. Collects matching attachment data (re-reads files if data is missing after resume)
|
|
22
|
+
* 3. Injects a single **user message** with image/document content blocks
|
|
23
|
+
* 4. Evicts old attachment data to free memory
|
|
24
|
+
*
|
|
25
|
+
* **Registration order:** Must be the **last** (innermost) middleware so that
|
|
26
|
+
* context compression runs first on clean messages.
|
|
27
|
+
*
|
|
28
|
+
* @example
|
|
29
|
+
* ```typescript
|
|
30
|
+
* const mw = createAttachmentMiddleware({ supportsVision: true, supportsPdfs: true }) // explicitly enable
|
|
31
|
+
* agent.use(mw, 'attachment') // register last
|
|
32
|
+
* ```
|
|
33
|
+
*/
|
|
34
|
+
export declare function createAttachmentMiddleware(options?: AttachmentMiddlewareOptions): Middleware<AgentLoopState>;
|
|
@@ -16,7 +16,7 @@ export interface ContextCompressionOptions {
|
|
|
16
16
|
/**
|
|
17
17
|
* Number of recent conversation turns to always protect (default: 2).
|
|
18
18
|
* A turn is a user message + assistant response pair.
|
|
19
|
-
* The
|
|
19
|
+
* The last N turns will always be preserved from summary compression.
|
|
20
20
|
* Tool compression ignores this setting and only preserves tool results from
|
|
21
21
|
* the most recent completed turn.
|
|
22
22
|
*/
|
|
@@ -49,6 +49,21 @@ export interface ContextCompressionOptions {
|
|
|
49
49
|
* Default: 'compression-logs.jsonl'
|
|
50
50
|
*/
|
|
51
51
|
logFilePath?: string;
|
|
52
|
+
/**
|
|
53
|
+
* Token limit of the model used for summarization.
|
|
54
|
+
* Used to determine when chunked summarization is needed.
|
|
55
|
+
* When the content to summarize exceeds this limit, the middleware
|
|
56
|
+
* automatically splits it into chunks and summarizes sequentially.
|
|
57
|
+
* Defaults to maxTokens if not specified.
|
|
58
|
+
*/
|
|
59
|
+
summaryModelMaxTokens?: number;
|
|
60
|
+
/**
|
|
61
|
+
* Maximum characters per individual message when formatting for summary.
|
|
62
|
+
* More aggressive truncation reduces the chance of needing chunked summarization.
|
|
63
|
+
* Applies to tool outputs, user messages, and assistant messages.
|
|
64
|
+
* Default: 3000
|
|
65
|
+
*/
|
|
66
|
+
maxMessageCharsForSummary?: number;
|
|
52
67
|
}
|
|
53
68
|
/**
|
|
54
69
|
* Options for manual compression.
|
|
@@ -85,6 +100,17 @@ export interface ManualCompressionOptions {
|
|
|
85
100
|
* Default: 'compression-logs.jsonl'
|
|
86
101
|
*/
|
|
87
102
|
logFilePath?: string;
|
|
103
|
+
/**
|
|
104
|
+
* Token limit of the model used for summarization.
|
|
105
|
+
* Used to determine when chunked summarization is needed.
|
|
106
|
+
* Defaults to 128000 if not specified.
|
|
107
|
+
*/
|
|
108
|
+
summaryModelMaxTokens?: number;
|
|
109
|
+
/**
|
|
110
|
+
* Maximum characters per individual message when formatting for summary.
|
|
111
|
+
* Default: 3000
|
|
112
|
+
*/
|
|
113
|
+
maxMessageCharsForSummary?: number;
|
|
88
114
|
}
|
|
89
115
|
/**
|
|
90
116
|
* Result of manual compression.
|
|
@@ -126,7 +152,7 @@ export interface CompressionStats {
|
|
|
126
152
|
* This middleware automatically compresses conversation history using a two-stage strategy:
|
|
127
153
|
*
|
|
128
154
|
* **Stage 1 (Summary Compression) - Triggers at 80% of maxTokens:**
|
|
129
|
-
* - Summarizes messages
|
|
155
|
+
* - Summarizes all messages before the last N protected turns
|
|
130
156
|
* - Generates rolling summary (AI merges old + new summaries)
|
|
131
157
|
* - Removes entire messages from array
|
|
132
158
|
* - Injects summary into system message
|
|
@@ -37,6 +37,12 @@ export interface EnvInfoMiddlewareOptions {
|
|
|
37
37
|
* Defaults to true.
|
|
38
38
|
*/
|
|
39
39
|
respectGitignore?: boolean;
|
|
40
|
+
/**
|
|
41
|
+
* Model name/ID to display in environment information.
|
|
42
|
+
* Can be a string or a function that returns a string.
|
|
43
|
+
* Use a function to support dynamic model switching at runtime.
|
|
44
|
+
*/
|
|
45
|
+
modelName?: string | (() => string);
|
|
40
46
|
}
|
|
41
47
|
/**
|
|
42
48
|
* Create an environment information middleware.
|
|
@@ -37,6 +37,10 @@ export declare function getStagedFiles(cwd: string): StagedFileInfo[];
|
|
|
37
37
|
* Check if there are any unstaged changes (including untracked files)
|
|
38
38
|
*/
|
|
39
39
|
export declare function hasUnstagedChanges(cwd: string): boolean;
|
|
40
|
+
/**
|
|
41
|
+
* Append Co-Authored-By trailer to a commit message if not already present.
|
|
42
|
+
*/
|
|
43
|
+
export declare function appendCoAuthoredBy(message: string): string;
|
|
40
44
|
/**
|
|
41
45
|
* Create a git commit with the given message
|
|
42
46
|
* @param message - Commit message
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
import type { Middleware } from '../agent/middleware';
|
|
2
|
+
import type { AgentLoopState } from '../agent/types';
|
|
3
|
+
import type { ModelClient } from '../model';
|
|
4
|
+
import type { CompletionAssessor } from '../session/completion/types';
|
|
5
|
+
import type { ToolRegistry } from '../tool';
|
|
6
|
+
/**
|
|
7
|
+
* Options for self-reflection critic subagent execution.
|
|
8
|
+
*
|
|
9
|
+
* When provided to `createLongRunningMiddleware`, the middleware will
|
|
10
|
+
* directly create and run a `SelfReflectionCritic` subagent in the
|
|
11
|
+
* `__afterResponse` phase (no dependency on `createParallelSubagentMiddleware`).
|
|
12
|
+
*/
|
|
13
|
+
export interface SelfReflectionOptions {
|
|
14
|
+
/**
|
|
15
|
+
* Model client for the critic subagent.
|
|
16
|
+
* Usually the same model as the main agent.
|
|
17
|
+
*/
|
|
18
|
+
model: ModelClient;
|
|
19
|
+
/**
|
|
20
|
+
* Tool registry for the critic subagent.
|
|
21
|
+
* Tools are filtered by the critic's read-only access policy.
|
|
22
|
+
*/
|
|
23
|
+
globalToolRegistry: ToolRegistry;
|
|
24
|
+
/**
|
|
25
|
+
* Maximum length of the agent's response to include in the review context.
|
|
26
|
+
* Longer responses are truncated to save tokens.
|
|
27
|
+
*
|
|
28
|
+
* @default 4000
|
|
29
|
+
*/
|
|
30
|
+
maxResponseLength?: number;
|
|
31
|
+
/**
|
|
32
|
+
* Enable debug logging for the critic subagent.
|
|
33
|
+
*
|
|
34
|
+
* @default false
|
|
35
|
+
*/
|
|
36
|
+
debug?: boolean;
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Options for the long_running middleware.
|
|
40
|
+
*/
|
|
41
|
+
export interface LongRunningMiddlewareOptions {
|
|
42
|
+
/**
|
|
43
|
+
* Optional custom completion assessor.
|
|
44
|
+
*
|
|
45
|
+
* When provided, runs before the self-reflection critic. If it says
|
|
46
|
+
* "not complete", a follow-up message is injected and the critic is
|
|
47
|
+
* skipped. If it says "complete" (or is omitted), the critic runs.
|
|
48
|
+
*
|
|
49
|
+
* Use this for cheap, deterministic checks (e.g. file-existence,
|
|
50
|
+
* regex-based quality gates) that can short-circuit before the
|
|
51
|
+
* expensive critic subagent.
|
|
52
|
+
*/
|
|
53
|
+
assessor?: CompletionAssessor;
|
|
54
|
+
/**
|
|
55
|
+
* Enable the self-reflection critic subagent.
|
|
56
|
+
*
|
|
57
|
+
* Provide a `SelfReflectionOptions` object with `model` and `globalToolRegistry`
|
|
58
|
+
* to enable direct critic subagent execution within the middleware.
|
|
59
|
+
* No dependency on `createParallelSubagentMiddleware`.
|
|
60
|
+
*
|
|
61
|
+
* Omit or set to `undefined` to disable.
|
|
62
|
+
*/
|
|
63
|
+
selfReflection?: SelfReflectionOptions;
|
|
64
|
+
/**
|
|
65
|
+
* Maximum number of follow-up rounds before force-stopping.
|
|
66
|
+
* Prevents infinite loops when the assessor keeps saying "not complete".
|
|
67
|
+
*
|
|
68
|
+
* @default 3
|
|
69
|
+
*/
|
|
70
|
+
maxFollowUpRounds?: number;
|
|
71
|
+
}
|
|
72
|
+
/**
|
|
73
|
+
* Create a long_running middleware that drives loop continuation.
|
|
74
|
+
*
|
|
75
|
+
* The core mechanism is the **self-reflection critic subagent**: when the
|
|
76
|
+
* main agent stops calling tools, the middleware spawns a read-only critic
|
|
77
|
+
* to review the agent's work and decide whether it is truly complete.
|
|
78
|
+
*
|
|
79
|
+
* Optionally, a custom `assessor` can run before the critic for cheap,
|
|
80
|
+
* deterministic checks (e.g. file-existence, regex-based quality gates).
|
|
81
|
+
*
|
|
82
|
+
* @example
|
|
83
|
+
* ```ts
|
|
84
|
+
* // Self-reflection critic (recommended)
|
|
85
|
+
* agent.use(createLongRunningMiddleware({
|
|
86
|
+
* selfReflection: { model, globalToolRegistry: tools },
|
|
87
|
+
* maxFollowUpRounds: 3,
|
|
88
|
+
* }))
|
|
89
|
+
*
|
|
90
|
+
* // With a custom pre-check assessor + critic
|
|
91
|
+
* agent.use(createLongRunningMiddleware({
|
|
92
|
+
* assessor: new MyQualityGateAssessor(),
|
|
93
|
+
* selfReflection: { model, globalToolRegistry: tools },
|
|
94
|
+
* }))
|
|
95
|
+
*
|
|
96
|
+
* // Pass-through (no assessment, same as not using the middleware)
|
|
97
|
+
* agent.use(createLongRunningMiddleware())
|
|
98
|
+
* ```
|
|
99
|
+
*/
|
|
100
|
+
export declare function createLongRunningMiddleware(options?: LongRunningMiddlewareOptions): Middleware<AgentLoopState>;
|
|
@@ -32,7 +32,7 @@ export interface ParallelSubagentMiddlewareOptions {
|
|
|
32
32
|
*/
|
|
33
33
|
debug?: boolean;
|
|
34
34
|
/**
|
|
35
|
-
* 可选:middleware 名称(默认: '
|
|
35
|
+
* 可选:middleware 名称(默认: 'parallel_subagent')
|
|
36
36
|
*/
|
|
37
37
|
name?: string;
|
|
38
38
|
}
|
|
@@ -43,7 +43,7 @@ export interface ParallelSubagentMiddlewareOptions {
|
|
|
43
43
|
* 1. 自动创建并注册 TaskTool(包含 subagent 定义和 executor)
|
|
44
44
|
* 2. 启用并行执行模式:当 LLM 同时调用多个 Task 工具时自动并行执行
|
|
45
45
|
*
|
|
46
|
-
* TaskTool 会被注册为 `<middlewareName>_Task`,例如 `
|
|
46
|
+
* TaskTool 会被注册为 `<middlewareName>_Task`,例如 `parallel_subagent_Task`
|
|
47
47
|
*
|
|
48
48
|
* @example 使用默认 executor(推荐)
|
|
49
49
|
* ```typescript
|
|
@@ -17,7 +17,7 @@ export interface PlanModeMiddlewareOptions {
|
|
|
17
17
|
insertEveryIteration?: boolean;
|
|
18
18
|
/**
|
|
19
19
|
* Custom name for this middleware (optional).
|
|
20
|
-
* If not provided, defaults to '
|
|
20
|
+
* If not provided, defaults to 'plan_mode'.
|
|
21
21
|
*/
|
|
22
22
|
name?: string;
|
|
23
23
|
/**
|
|
@@ -49,7 +49,7 @@ export interface PlanModeMiddlewareOptions {
|
|
|
49
49
|
* execution, prohibiting file edits and encouraging thoughtful analysis.
|
|
50
50
|
*
|
|
51
51
|
* When registered via `agent.use()`, this middleware also automatically registers
|
|
52
|
-
* the `ExitPlanMode` tool with a namespace prefix (e.g., `
|
|
52
|
+
* the `ExitPlanMode` tool with a namespace prefix (e.g., `plan_mode_ExitPlanMode`).
|
|
53
53
|
*
|
|
54
54
|
* @param options - Configuration options for the middleware
|
|
55
55
|
* @returns A middleware function that injects plan mode instructions
|
|
@@ -62,7 +62,7 @@ export interface PlanModeMiddlewareOptions {
|
|
|
62
62
|
* model,
|
|
63
63
|
* middleware: [createPlanModeMiddleware()],
|
|
64
64
|
* })
|
|
65
|
-
* // Tools automatically registered: '
|
|
65
|
+
* // Tools automatically registered: 'plan_mode_ExitPlanMode'
|
|
66
66
|
* ```
|
|
67
67
|
*
|
|
68
68
|
* @example Custom configuration
|
|
@@ -34,6 +34,14 @@ export interface SkillsMiddlewareOptions {
|
|
|
34
34
|
* If not provided, defaults to 'skills'.
|
|
35
35
|
*/
|
|
36
36
|
name?: string;
|
|
37
|
+
/**
|
|
38
|
+
* Skills to force-activate regardless of tool calls in message history.
|
|
39
|
+
* These skills will always be injected into the system message.
|
|
40
|
+
*
|
|
41
|
+
* Can be a static array or a function that returns an array for dynamic resolution.
|
|
42
|
+
* Use a function to support server-side state that may change at runtime.
|
|
43
|
+
*/
|
|
44
|
+
forcedSkills?: string[] | (() => string[]);
|
|
37
45
|
}
|
|
38
46
|
/**
|
|
39
47
|
* Parsed skill from SKILL.md file with YAML frontmatter.
|
|
@@ -43,7 +43,7 @@ interface AnthropicToolUseBlock {
|
|
|
43
43
|
interface AnthropicToolResultBlock {
|
|
44
44
|
type: 'tool_result';
|
|
45
45
|
tool_use_id: string;
|
|
46
|
-
content: string
|
|
46
|
+
content: string | Array<AnthropicTextBlock | AnthropicImageBlock | AnthropicDocumentBlock>;
|
|
47
47
|
is_error?: boolean;
|
|
48
48
|
}
|
|
49
49
|
interface AnthropicThinkingBlock {
|
|
@@ -28,7 +28,7 @@ export interface CodexAdapterOptions {
|
|
|
28
28
|
accountId?: string | (() => string | Promise<string>);
|
|
29
29
|
/**
|
|
30
30
|
* Default model ID to use when not specified in the request.
|
|
31
|
-
* @example 'gpt-5.
|
|
31
|
+
* @example 'gpt-5.3-codex', 'gpt-5.2-codex', 'gpt-5.1-codex-mini'
|
|
32
32
|
*/
|
|
33
33
|
defaultModelId?: string;
|
|
34
34
|
/**
|
package/dist/model/index.d.ts
CHANGED
|
@@ -9,6 +9,8 @@ export { createGeminiAdapter } from './gemini/createGeminiAdapter';
|
|
|
9
9
|
export type { GeminiAdapterOptions } from './gemini/createGeminiAdapter';
|
|
10
10
|
export { createOpenAIAdapter } from './openai/createOpenAIAdapter';
|
|
11
11
|
export type { OpenAIAdapterOptions } from './openai/createOpenAIAdapter';
|
|
12
|
+
export { createOpenAIResponsesAdapter } from './openai/createOpenAIResponsesAdapter';
|
|
13
|
+
export type { OpenAIResponsesAdapterOptions } from './openai/createOpenAIResponsesAdapter';
|
|
12
14
|
export type { ModelClient, ModelDeltaChunk, ModelId, ModelRef, ModelRequest, ModelRunResult, ModelStopReason, ModelStreamEvent, OpenAITool, ProviderId, } from './types';
|
|
13
15
|
export { RetryPolicy } from './utils/retry';
|
|
14
16
|
export type { JitterStrategy, RetryPolicyOptions, RetryStrategy } from './utils/retry';
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import type { ModelAdapter } from '../adapter';
|
|
2
|
+
export interface OpenAIResponsesAdapterOptions {
|
|
3
|
+
/**
|
|
4
|
+
* OpenAI SDK baseURL override.
|
|
5
|
+
*
|
|
6
|
+
* Examples:
|
|
7
|
+
* - Official: https://api.openai.com/v1
|
|
8
|
+
* - Proxy gateway base URL: https://your-proxy.example.com/v1
|
|
9
|
+
*
|
|
10
|
+
* If you pass a URL that already ends with `/responses`, we treat it as an exact
|
|
11
|
+
* Responses endpoint and will force all requests to hit it as-is.
|
|
12
|
+
*/
|
|
13
|
+
baseUrl?: string;
|
|
14
|
+
/**
|
|
15
|
+
* OpenAI API key (recommended). If not provided, falls back to env `OPENAI_API_KEY`.
|
|
16
|
+
*/
|
|
17
|
+
apiKey?: string | (() => string | undefined);
|
|
18
|
+
/**
|
|
19
|
+
* Default model ID to use when not specified in routing.
|
|
20
|
+
*/
|
|
21
|
+
defaultModelId?: string;
|
|
22
|
+
/**
|
|
23
|
+
* Optional headers.
|
|
24
|
+
*/
|
|
25
|
+
organization?: string;
|
|
26
|
+
project?: string;
|
|
27
|
+
}
|
|
28
|
+
export declare function createOpenAIResponsesAdapter(options?: OpenAIResponsesAdapterOptions): ModelAdapter;
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import type { CompletionAssessment, CompletionAssessor, CompletionContext } from './types';
|
|
2
|
+
/**
|
|
3
|
+
* Composite completion assessor.
|
|
4
|
+
*
|
|
5
|
+
* Runs multiple assessors in priority order. The first assessor that
|
|
6
|
+
* returns `isComplete: false` wins (fail-fast). If all assessors say
|
|
7
|
+
* "complete", the task is considered done.
|
|
8
|
+
*
|
|
9
|
+
* Recommended ordering: cheapest/fastest assessors first.
|
|
10
|
+
*
|
|
11
|
+
* @example
|
|
12
|
+
* ```typescript
|
|
13
|
+
* const assessor = new CompositeAssessor([
|
|
14
|
+
* new RuleBasedAssessor(), // zero cost
|
|
15
|
+
* new TodoBasedAssessor(), // zero cost
|
|
16
|
+
* new SelfReflectionAssessor({ model }), // optional, adds cost
|
|
17
|
+
* ])
|
|
18
|
+
* ```
|
|
19
|
+
*/
|
|
20
|
+
export declare class CompositeAssessor implements CompletionAssessor {
|
|
21
|
+
readonly name: string;
|
|
22
|
+
private readonly assessors;
|
|
23
|
+
constructor(assessors: CompletionAssessor[]);
|
|
24
|
+
assess(context: CompletionContext): Promise<CompletionAssessment>;
|
|
25
|
+
}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
export type { CompletionAssessment, CompletionAssessor, CompletionContext, } from './types';
|
|
2
|
+
export { CompositeAssessor } from './composite';
|
|
3
|
+
export { RuleBasedAssessor } from './strategies/rule-based';
|
|
4
|
+
export { TodoBasedAssessor } from './strategies/todo-based';
|
|
5
|
+
export { SelfReflectionAssessor } from './strategies/self-reflection';
|
|
6
|
+
export type { SelfReflectionAssessorOptions } from './strategies/self-reflection';
|
|
7
|
+
export { ReflectionDecisionTool } from './strategies/reflection-decision-tool';
|
|
8
|
+
export type { ReflectionDecision, ReflectionDecisionCallback } from './strategies/reflection-decision-tool';
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import type { CallToolResult, ToolInputSchema } from '../../../types';
|
|
2
|
+
import type { ToolExecutionContext } from '../../../tool/types';
|
|
3
|
+
import { BaseTool } from '../../../tool/base';
|
|
4
|
+
/**
|
|
5
|
+
* Parsed decision from the ReflectionDecisionTool.
|
|
6
|
+
*/
|
|
7
|
+
export interface ReflectionDecision {
|
|
8
|
+
/** Whether the task is truly complete from the user's perspective */
|
|
9
|
+
isComplete: boolean;
|
|
10
|
+
/** Explanation of why the reviewer reached this conclusion */
|
|
11
|
+
reason: string;
|
|
12
|
+
/** Potential issues found (even if isComplete is true, for informational purposes) */
|
|
13
|
+
potentialIssues?: string[];
|
|
14
|
+
/** Follow-up questions the user would likely ask (only meaningful when isComplete is false) */
|
|
15
|
+
followUpQuestions?: string[];
|
|
16
|
+
}
|
|
17
|
+
/**
|
|
18
|
+
* Callback invoked when the critic subagent makes a decision.
|
|
19
|
+
*/
|
|
20
|
+
export type ReflectionDecisionCallback = (decision: ReflectionDecision) => void;
|
|
21
|
+
/**
|
|
22
|
+
* Get the latest decision recorded by the ReflectionDecisionTool.
|
|
23
|
+
* Returns `null` if no decision has been recorded (or it was cleared).
|
|
24
|
+
*/
|
|
25
|
+
export declare function getLatestReflectionDecision(): ReflectionDecision | null;
|
|
26
|
+
/**
|
|
27
|
+
* Clear the stored decision. Should be called at the start of a new
|
|
28
|
+
* reflection cycle (round 0) to avoid stale data from previous runs.
|
|
29
|
+
*/
|
|
30
|
+
export declare function clearLatestReflectionDecision(): void;
|
|
31
|
+
/**
|
|
32
|
+
* ReflectionDecisionTool - the critic subagent's verdict tool.
|
|
33
|
+
*
|
|
34
|
+
* This tool is the **only** way for the reflection subagent to conclude its
|
|
35
|
+
* review. When called, it records the structured decision (complete / not
|
|
36
|
+
* complete, potential issues, follow-up questions) and signals the outer
|
|
37
|
+
* `SelfReflectionAssessor` via both:
|
|
38
|
+
* 1. A module-level store (always) — so the assessor can read the verdict
|
|
39
|
+
* 2. An optional callback (if provided) — for custom integrations
|
|
40
|
+
*
|
|
41
|
+
* The tool forces the model to make an explicit, structured decision rather
|
|
42
|
+
* than embedding its verdict in free-form text.
|
|
43
|
+
*/
|
|
44
|
+
export declare class ReflectionDecisionTool extends BaseTool {
|
|
45
|
+
readonly name = "ReflectionDecision";
|
|
46
|
+
readonly description = "Record your final review decision. You MUST call this tool exactly once to conclude your reflection.\n\nRules:\n- isComplete: true ONLY if you found ZERO issues and the work is perfect\n- isComplete: false if you found ANY issue at all, no matter how minor\n- reason: brief explanation referencing specific evidence from your verification\n- potentialIssues: every issue you found. If this list is non-empty, isComplete MUST be false\n- followUpQuestions: specific actionable instructions for the agent to fix each issue (required when isComplete is false)\n\nABSOLUTE RULE: potentialIssues non-empty => isComplete MUST be false. No exceptions.";
|
|
47
|
+
readonly parameters: ToolInputSchema;
|
|
48
|
+
private readonly _callback;
|
|
49
|
+
constructor(callback?: ReflectionDecisionCallback);
|
|
50
|
+
execute(args: Record<string, unknown>, _ctx: ToolExecutionContext): Promise<CallToolResult>;
|
|
51
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import type { CompletionAssessment, CompletionAssessor, CompletionContext } from '../types';
|
|
2
|
+
/**
|
|
3
|
+
* Rule-based completion assessor.
|
|
4
|
+
*
|
|
5
|
+
* Uses zero-cost pattern matching on the model's response and state to
|
|
6
|
+
* detect obviously incomplete work:
|
|
7
|
+
* - Response truncated by model (lastModelStopReason === 'length')
|
|
8
|
+
* - Response contains phrases suggesting incomplete work
|
|
9
|
+
*
|
|
10
|
+
* This assessor is intentionally conservative -- it only flags clear signals
|
|
11
|
+
* of incompleteness to avoid false positives.
|
|
12
|
+
*/
|
|
13
|
+
export declare class RuleBasedAssessor implements CompletionAssessor {
|
|
14
|
+
readonly name = "RuleBasedAssessor";
|
|
15
|
+
assess(context: CompletionContext): Promise<CompletionAssessment>;
|
|
16
|
+
}
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import type { CompletionAssessment, CompletionAssessor, CompletionContext } from '../types';
|
|
2
|
+
/**
|
|
3
|
+
* Options for the SelfReflectionAssessor.
|
|
4
|
+
*/
|
|
5
|
+
export interface SelfReflectionAssessorOptions {
|
|
6
|
+
/**
|
|
7
|
+
* Maximum length of the agent's response to include in the review context.
|
|
8
|
+
* Longer responses are truncated to save tokens.
|
|
9
|
+
*
|
|
10
|
+
* @default 4000
|
|
11
|
+
*/
|
|
12
|
+
maxResponseLength?: number;
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* Self-reflection completion assessor (thin trigger).
|
|
16
|
+
*
|
|
17
|
+
* Instead of running its own subagent, this assessor composes a
|
|
18
|
+
* follow-up message that instructs the main agent to invoke the
|
|
19
|
+
* `SelfReflectionCritic` subagent via the Task tool. The critic
|
|
20
|
+
* subagent is defined in `src/subagent/self-reflection-critic.ts`
|
|
21
|
+
* and must be registered with the parallel subagent middleware.
|
|
22
|
+
*
|
|
23
|
+
* Flow:
|
|
24
|
+
* 1. On first assessment (followUpRound === 0), the assessor returns
|
|
25
|
+
* `{ isComplete: false, followUpMessage }` -- the message tells
|
|
26
|
+
* the main agent to call `Task(subagent_type: 'SelfReflectionCritic', ...)`
|
|
27
|
+
* 2. The main agent invokes the critic subagent, which searches the
|
|
28
|
+
* codebase, verifies claims, and records its verdict via the
|
|
29
|
+
* `ReflectionDecision` tool.
|
|
30
|
+
* 3. On round 1, the assessor checks the critic's **actual verdict**:
|
|
31
|
+
* - If the critic listed ANY issues (even if isComplete=true), the
|
|
32
|
+
* assessor overrides to NOT COMPLETE and forwards issues to the agent.
|
|
33
|
+
* - Only if the critic said COMPLETE with zero issues (or wasn't
|
|
34
|
+
* reached) does the assessor return `{ isComplete: true }`.
|
|
35
|
+
* 4. On round 2+, the assessor trusts the agent's follow-up corrections.
|
|
36
|
+
*/
|
|
37
|
+
export declare class SelfReflectionAssessor implements CompletionAssessor {
|
|
38
|
+
readonly name = "SelfReflectionAssessor";
|
|
39
|
+
private readonly maxResponseLength;
|
|
40
|
+
private static readonly LOG_PREFIX;
|
|
41
|
+
constructor(options?: SelfReflectionAssessorOptions);
|
|
42
|
+
private _log;
|
|
43
|
+
assess(context: CompletionContext): Promise<CompletionAssessment>;
|
|
44
|
+
/**
|
|
45
|
+
* Build a follow-up message from the critic's decision when it found issues.
|
|
46
|
+
* This message is injected back into the conversation so the main agent
|
|
47
|
+
* can address the problems before the task is marked complete.
|
|
48
|
+
*/
|
|
49
|
+
private _buildCriticFollowUp;
|
|
50
|
+
/**
|
|
51
|
+
* Build the review context that will be passed to the critic subagent.
|
|
52
|
+
*/
|
|
53
|
+
private _buildReviewPrompt;
|
|
54
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import type { CompletionAssessment, CompletionAssessor, CompletionContext } from '../types';
|
|
2
|
+
/**
|
|
3
|
+
* Todo-based completion assessor.
|
|
4
|
+
*
|
|
5
|
+
* Checks the TodoWriteTool's structured state to determine if all tasks
|
|
6
|
+
* have been completed. This is cheap and reliable because it uses
|
|
7
|
+
* data that the agent itself created.
|
|
8
|
+
*
|
|
9
|
+
* Assessment logic:
|
|
10
|
+
* - If there are no todos, assume complete (agent may not have used todos)
|
|
11
|
+
* - If all todos are 'completed', task is complete
|
|
12
|
+
* - If any todos are 'pending' or 'in_progress', task is not complete
|
|
13
|
+
*/
|
|
14
|
+
export declare class TodoBasedAssessor implements CompletionAssessor {
|
|
15
|
+
readonly name = "TodoBasedAssessor";
|
|
16
|
+
assess(context: CompletionContext): Promise<CompletionAssessment>;
|
|
17
|
+
}
|