npm - @botiverse/kimi-code-sdk - Versions diffs - 0.20.1 → 0.21.0 - Mend

@botiverse/kimi-code-sdk 0.20.1 → 0.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/index.d.mts CHANGED Viewed

@@ -22,6 +22,7 @@ import type { OAuthClientMetadata } from '@modelcontextprotocol/sdk/shared/auth.
 import type { OAuthClientProvider } from '@modelcontextprotocol/sdk/client/auth.js';
 import type { OAuthDiscoveryState } from '@modelcontextprotocol/sdk/client/auth.js';
 import type { OAuthTokens } from '@modelcontextprotocol/sdk/shared/auth.js';
+import { PluginCommandActivatedEvent } from '@moonshot-ai/protocol';
 import type { Readable } from 'node:stream';
 import { SessionMetaUpdatedEvent } from '@moonshot-ai/protocol';
 import type { SessionWarning } from '@moonshot-ai/protocol';
@@ -52,6 +53,18 @@ import { WarningEvent } from '@moonshot-ai/protocol';
 import type { Writable } from 'node:stream';
 import { z } from 'zod';
+declare interface ActivatePluginCommandPayload {
+    readonly pluginId: string;
+    readonly commandName: string;
+    readonly args?: string | undefined;
+}
+declare interface ActivatePluginCommandRpcInput extends SessionIdRpcInput {
+    readonly pluginId: string;
+    readonly commandName: string;
+    readonly args?: string | undefined;
+}
 declare interface ActivateSkillPayload {
     readonly name: string;
     readonly args?: string | undefined;
@@ -100,6 +113,7 @@ declare class Agent {
     readonly rpc?: Partial<SDKAgentRPC>;
     readonly toolServices?: ToolServices;
     readonly pluginSessionStarts: readonly EnabledPluginSessionStart[];
+    readonly pluginCommands: readonly PluginCommandDef[];
     readonly rawGenerate: typeof generate;
     readonly modelProvider?: ModelProvider;
     readonly subagentHost?: SessionSubagentHost;
@@ -128,13 +142,25 @@ declare class Agent {
     readonly goal: GoalMode;
     readonly replayBuilder: ReplayBuilder;
     private additionalDirs;
+    private activeProfile?;
+    private brandHome?;
+    private readonly systemPromptContextProvider?;
     constructor(options: AgentOptions);
     setKaos(kaos: Kaos): void;
     getAdditionalDirs(): readonly string[];
     setAdditionalDirs(additionalDirs: readonly string[]): void;
     get generate(): typeof generate;
     get llm(): KosongLLM;
-    useProfile(profile: ResolvedAgentProfile, context?: PreparedSystemPromptContext): void;
+    useProfile(profile: ResolvedAgentProfile, context?: PreparedSystemPromptContext, brandHome?: string): void;
+    setActiveProfile(profile: ResolvedAgentProfile, brandHome?: string): void;
+    /**
+     * Re-render the system prompt with freshly gathered runtime context (cwd
+     * listing, AGENTS.md, additional-dirs info, skill list). Called after
+     * compaction so the post-compaction turns do not keep a snapshot captured
+     * at session bootstrap. Invalidates the prompt-cache prefix by design.
+     */
+    refreshSystemPrompt(): Promise<void>;
+    private updateSystemPromptFromProfile;
     resume(options?: AgentRecordsReplayOptions): Promise<{
         warning?: string;
     }>;
@@ -170,6 +196,7 @@ declare interface AgentAPI {
     detachBackground: (payload: DetachBackgroundPayload) => BackgroundTaskInfo | undefined;
     clearContext: (payload: EmptyPayload) => void;
     activateSkill: (payload: ActivateSkillPayload) => void;
+    activatePluginCommand: (payload: ActivatePluginCommandPayload) => void;
     startBtw: (payload: EmptyPayload) => string;
     createGoal: (payload: CreateGoalPayload) => GoalSnapshot;
     getGoal: (payload: EmptyPayload) => GoalToolResult;
@@ -202,7 +229,7 @@ declare interface AgentConfigData {
     modelAlias?: string;
     modelCapabilities: ModelCapability;
     profileName?: string;
-    thinkingLevel: string;
+    thinkingEffort: string;
     systemPrompt: string;
 }
@@ -210,7 +237,7 @@ declare type AgentConfigUpdateData = Partial<{
     cwd: string;
     modelAlias: string;
     profileName: string;
-    thinkingLevel: string;
+    thinkingEffort: string;
     systemPrompt: string;
 }>;
@@ -248,9 +275,11 @@ declare interface AgentOptions {
     readonly log?: Logger;
     readonly telemetry?: TelemetryClient | undefined;
     readonly pluginSessionStarts?: readonly EnabledPluginSessionStart[];
+    readonly pluginCommands?: readonly PluginCommandDef[];
     readonly experimentalFlags?: ExperimentalFlagResolver;
     readonly replay?: ReplayBuilderOptions;
     readonly additionalDirs?: readonly string[];
+    readonly systemPromptContextProvider?: (() => Promise<PreparedSystemPromptContext>) | undefined;
 }
 declare type AgentRecord = {
@@ -431,6 +460,15 @@ declare interface AnthropicOptions {
      * encode a parseable Claude version. Leave undefined to infer from the name.
      */
     adaptiveThinking?: boolean | undefined;
+    /**
+     * Use the Anthropic **beta** Messages API (`client.beta.messages.create`,
+     * `POST /v1/messages?beta=true`) instead of the standard Messages API.
+     *
+     * Beta features (`betaFeatures`) are then sent via the request `betas`
+     * field rather than the `anthropic-beta` header. Defaults to false, which
+     * keeps the standard endpoint + header behavior.
+     */
+    betaApi?: boolean | undefined;
     clientFactory?: (auth: ProviderRequestAuth) => Anthropic;
 }
@@ -942,7 +980,7 @@ declare interface ChatProvider {
     readonly name: string;
     /** Model name passed to the upstream API (e.g. `"moonshot-v1-auto"`). */
     readonly modelName: string;
-    /** Current thinking-effort level, or `null` if thinking is not configured. */
+    /** Current thinking effort, or `null` if thinking is not configured. */
     readonly thinkingEffort: ThinkingEffort | null;
     /**
      * Send a conversation to the LLM and return a streamed response.
@@ -1010,11 +1048,46 @@ export { CompactionCancelledEvent }
 export { CompactionCompletedEvent }
+/**
+ * Inputs `ContextMemory.applyCompaction` needs to derive a `CompactionResult`.
+ * `tokensAfter` / `keptUserMessageCount` / `droppedCount` are optional: the live
+ * path fills in what it knows, while restore passes the persisted record so its
+ * historical values are preserved verbatim.
+ */
+declare type CompactionInput = Pick<CompactionResult, 'summary' | 'compactedCount' | 'tokensBefore'> & Partial<Pick<CompactionResult, 'contextSummary' | 'tokensAfter' | 'keptUserMessageCount' | 'droppedCount'>>;
 export declare interface CompactionResult {
+    /** Human-facing summary text produced by the compaction model. */
     summary: string;
+    /**
+     * Exact summary message stored in the live model context. It includes the
+     * compaction prefix that tells the next model this is handoff context rather
+     * than a real user prompt. Optional for backward compatibility with older
+     * wire records, where `summary` was also the model-context text.
+     */
+    contextSummary?: string;
     compactedCount: number;
     tokensBefore: number;
     tokensAfter: number;
+    /**
+     * Number of real user messages kept verbatim ahead of the summary in the
+     * post-compaction live context. Written by `ContextMemory.applyCompaction`
+     * (the single derivation point for the post-compaction shape) so the
+     * wire-transcript reducer can reproduce the live folded length without
+     * re-deriving it from the full transcript. Optional for backward
+     * compatibility with older wire records.
+     */
+    keptUserMessageCount?: number;
+    /**
+     * Number of oldest messages trimmed from the summarizer input when the
+     * compaction request itself overflowed the model window. These messages are
+     * not covered by the produced summary — a real-user message among them may
+     * still be retained verbatim in the live context via `keptUserMessageCount`,
+     * but assistant/tool messages are lost. Surfacing the count lets records and
+     * telemetry report the summary's blind spot honestly. Optional for backward
+     * compatibility with older wire records.
+     */
+    droppedCount?: number;
 }
 declare type CompactionSource = 'manual' | 'auto';
@@ -1024,10 +1097,9 @@ export { CompactionStartedEvent }
 declare interface CompactionStrategy {
     shouldCompact(usedSize: number): boolean;
     shouldBlock(usedSize: number): boolean;
-    computeCompactCount(messages: readonly Message[], source: CompactionSource): number;
-    reduceCompactOnOverflow(messages: readonly Message[]): number;
     readonly checkAfterStep: boolean;
     readonly maxCompactionPerTurn: number;
+    readonly maxOverflowCompactionAttempts: number;
 }
 declare interface CompactionSummaryOrigin {
@@ -1056,7 +1128,7 @@ declare class ConfigState {
     private _cwd;
     private _modelAlias;
     private _profileName;
-    private _thinkingLevel;
+    private _thinkingEffort;
     private _systemPrompt;
     constructor(agent: Agent);
     update(changed: AgentConfigUpdateData): void;
@@ -1068,8 +1140,8 @@ declare class ConfigState {
     get provider(): ChatProvider;
     get model(): string;
     get modelAlias(): string | undefined;
-    get thinkingLevel(): ThinkingEffort;
-    private get alwaysThinkingModel();
+    get thinkingEffort(): ThinkingEffort;
+    private get currentModel();
     get profileName(): string | undefined;
     get systemPrompt(): string;
     get modelCapabilities(): ModelCapability;
@@ -1096,6 +1168,7 @@ declare class ContextMemory {
     private pendingToolResultIds;
     private deferredMessages;
     private _lastAssistantAt;
+    private lastProjectionRepairSignature;
     constructor(agent: Agent);
     get lastAssistantAt(): number | null;
     appendUserMessage(content: readonly ContentPart[], origin?: PromptOrigin): void;
@@ -1115,13 +1188,15 @@ declare class ContextMemory {
     popMatchedMessage(matcher: (origin: PromptOrigin | undefined) => boolean): boolean;
     clear(): void;
     undo(count: number): void;
-    applyCompaction(result: CompactionResult): void;
+    applyCompaction(input: CompactionInput): CompactionResult;
     data(): AgentContextData;
     get tokenCount(): number;
     get tokenCountWithPending(): number;
     get history(): readonly ContextMessage[];
-    project(messages: readonly ContextMessage[]): Message[];
+    project(messages: readonly ContextMessage[], options?: ProjectOptions): Message[];
+    private reportProjectionRepairs;
     get messages(): Message[];
+    get strictMessages(): Message[];
     useProjectedHistoryFrom(source: ContextMemory): void;
     finishResume(): void;
     private closePendingToolResults;
@@ -1903,7 +1978,7 @@ declare const FLAG_DEFINITIONS: readonly [{
     readonly title: "Micro compaction";
     readonly description: "Trim older large tool results from context while keeping recent conversation intact.";
     readonly env: "KIMI_CODE_EXPERIMENTAL_MICRO_COMPACTION";
-    readonly default: true;
+    readonly default: false;
     readonly surface: "core";
 }];
@@ -1962,13 +2037,21 @@ declare class FullCompaction {
         promise: Promise<void>;
         blockedByTurn: boolean;
     } | null;
+    private readonly observedMaxContextTokensByModel;
+    private lastCompactedTokenCount;
+    private consecutiveOverflowCompactions;
     protected readonly strategy: CompactionStrategy;
     constructor(agent: Agent, strategy?: CompactionStrategy);
     get isCompacting(): boolean;
+    getEffectiveMaxContextTokens(): number;
+    estimateCurrentRequestTokens(): number;
+    shouldRecoverFromContextOverflow(error: unknown, estimatedRequestTokens?: number): boolean;
+    observeContextOverflow(estimatedRequestTokens: number): void;
     begin(data: Readonly<CompactionBeginData>): void;
     cancel(): void;
     markCompleted(): void;
     private get tokenCountWithPending();
+    private estimateRequestTokens;
     resetForTurn(): void;
     handleOverflowError(signal: AbortSignal, error: unknown): Promise<void>;
     beforeStep(signal: AbortSignal): Promise<void>;
@@ -1977,10 +2060,11 @@ declare class FullCompaction {
     private beginAutoCompaction;
     private block;
     private compactionWorker;
+    private buildInstruction;
+    private postProcessSummary;
     private compactionRound;
     private triggerPreCompactHook;
     private triggerPostCompactHook;
-    private postProcessSummary;
 }
 /**
@@ -2044,11 +2128,23 @@ declare interface GenerateOptions {
      * provider adapter's generate call.
      */
     onRequestStart?: () => void;
+    /**
+     * Host-side instrumentation hook fired by the provider adapter immediately
+     * before it dispatches the network request to the upstream API. The window
+     * between {@link onRequestStart} and this hook is in-process request-building
+     * time (message serialization, param assembly) spent by the client; the
+     * window between this hook and the first streamed part is network + server
+     * time. Splitting time-to-first-token across this boundary lets hosts
+     * attribute latency to the client vs. the API server.
+     */
+    onRequestSent?: () => void;
     /**
      * Host-side instrumentation hook fired after the provider stream is fully
-     * drained, before post-processing the assembled response.
+     * drained, before post-processing the assembled response. Receives the
+     * {@link StreamDecodeStats} accounting accumulated across the stream when at
+     * least one part was streamed, or `undefined` for an empty stream.
      */
-    onStreamEnd?: () => void;
+    onStreamEnd?: (stats?: StreamDecodeStats) => void;
 }
 /**
@@ -2094,7 +2190,6 @@ declare interface GenerationKwargs {
     presence_penalty?: number | undefined;
     frequency_penalty?: number | undefined;
     stop?: string | string[] | undefined;
-    reasoning_effort?: string | undefined;
     prompt_cache_key?: string | undefined;
     extra_body?: ExtraBody;
 }
@@ -2385,6 +2480,7 @@ declare interface GoogleGenAIOptions {
     project?: string | undefined;
     location?: string | undefined;
     stream?: boolean | undefined;
+    defaultHeaders?: Record<string, string>;
     clientFactory?: (auth: ProviderRequestAuth) => GoogleGenAI;
 }
@@ -2512,8 +2608,19 @@ declare class InjectionManager {
      * mode is off, the agent is not the main agent, or there is nothing to inject.
      */
     injectGoal(): Promise<void>;
+    injectAfterCompaction(): Promise<void>;
+    /**
+     * Post-compaction only: re-surface still-running background tasks. Folding the
+     * live context to [recent user prompts, summary] drops the messages that
+     * started them and their status updates, so without this the model can forget
+     * a task is running and spawn a duplicate. Appended as an `injection`-origin
+     * reminder, so the next compaction drops and rebuilds it — kept fresh, never
+     * stacked. Runs only on the live path: restore replays the persisted reminder
+     * and `FullCompaction.begin` short-circuits before compaction there.
+     */
+    private injectActiveBackgroundTasks;
     onContextClear(): void;
-    onContextCompacted(compactedCount: number): void;
+    onContextCompacted(): void;
     onContextMessageRemoved(index: number): void;
     /** Per-step injectors plus the boundary goal injector, for lifecycle events. */
     private lifecycleInjectors;
@@ -3149,19 +3256,18 @@ declare const KimiConfigPatchSchema: z.ZodObject<{
         capabilities: z.ZodOptional<z.ZodOptional<z.ZodArray<z.ZodString>>>;
         displayName: z.ZodOptional<z.ZodOptional<z.ZodString>>;
         reasoningKey: z.ZodOptional<z.ZodOptional<z.ZodString>>;
+        protocol: z.ZodOptional<z.ZodOptional<z.ZodLiteral<"anthropic">>>;
         adaptiveThinking: z.ZodOptional<z.ZodOptional<z.ZodBoolean>>;
+        supportEfforts: z.ZodOptional<z.ZodOptional<z.ZodArray<z.ZodString>>>;
+        defaultEffort: z.ZodOptional<z.ZodOptional<z.ZodString>>;
+        betaApi: z.ZodOptional<z.ZodOptional<z.ZodBoolean>>;
     }, z.core.$strip>>>;
     thinking: z.ZodOptional<z.ZodObject<{
-        mode: z.ZodOptional<z.ZodOptional<z.ZodEnum<{
-            off: "off";
-            auto: "auto";
-            on: "on";
-        }>>>;
+        enabled: z.ZodOptional<z.ZodOptional<z.ZodBoolean>>;
         effort: z.ZodOptional<z.ZodOptional<z.ZodString>>;
     }, z.core.$strip>>;
     planMode: z.ZodOptional<z.ZodBoolean>;
     yolo: z.ZodOptional<z.ZodBoolean>;
-    defaultThinking: z.ZodOptional<z.ZodBoolean>;
     defaultPermissionMode: z.ZodOptional<z.ZodEnum<{
         auto: "auto";
         manual: "manual";
@@ -3251,6 +3357,10 @@ declare const KimiConfigPatchSchema: z.ZodObject<{
         killGracePeriodMs: z.ZodOptional<z.ZodOptional<z.ZodNumber>>;
         printWaitCeilingS: z.ZodOptional<z.ZodOptional<z.ZodNumber>>;
     }, z.core.$strip>>;
+    modelCatalog: z.ZodOptional<z.ZodObject<{
+        refreshIntervalMs: z.ZodOptional<z.ZodOptional<z.ZodNumber>>;
+        refreshOnStart: z.ZodOptional<z.ZodOptional<z.ZodBoolean>>;
+    }, z.core.$strip>>;
     experimental: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodBoolean>>;
     telemetry: z.ZodOptional<z.ZodBoolean>;
 }, z.core.$strict>;
@@ -3302,19 +3412,18 @@ declare const KimiConfigSchema: z.ZodObject<{
         capabilities: z.ZodOptional<z.ZodArray<z.ZodString>>;
         displayName: z.ZodOptional<z.ZodString>;
         reasoningKey: z.ZodOptional<z.ZodString>;
+        protocol: z.ZodOptional<z.ZodLiteral<"anthropic">>;
         adaptiveThinking: z.ZodOptional<z.ZodBoolean>;
+        supportEfforts: z.ZodOptional<z.ZodArray<z.ZodString>>;
+        defaultEffort: z.ZodOptional<z.ZodString>;
+        betaApi: z.ZodOptional<z.ZodBoolean>;
     }, z.core.$strip>>>;
     thinking: z.ZodOptional<z.ZodObject<{
-        mode: z.ZodOptional<z.ZodEnum<{
-            off: "off";
-            auto: "auto";
-            on: "on";
-        }>>;
+        enabled: z.ZodOptional<z.ZodBoolean>;
         effort: z.ZodOptional<z.ZodString>;
     }, z.core.$strip>>;
     planMode: z.ZodOptional<z.ZodBoolean>;
     yolo: z.ZodOptional<z.ZodBoolean>;
-    defaultThinking: z.ZodOptional<z.ZodBoolean>;
     defaultPermissionMode: z.ZodOptional<z.ZodEnum<{
         auto: "auto";
         manual: "manual";
@@ -3404,6 +3513,10 @@ declare const KimiConfigSchema: z.ZodObject<{
         killGracePeriodMs: z.ZodOptional<z.ZodNumber>;
         printWaitCeilingS: z.ZodOptional<z.ZodNumber>;
     }, z.core.$strip>>;
+    modelCatalog: z.ZodOptional<z.ZodObject<{
+        refreshIntervalMs: z.ZodOptional<z.ZodNumber>;
+        refreshOnStart: z.ZodOptional<z.ZodBoolean>;
+    }, z.core.$strip>>;
     experimental: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodBoolean>>;
     telemetry: z.ZodOptional<z.ZodBoolean>;
     raw: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
@@ -3488,6 +3601,7 @@ declare class KimiCore implements PromisableMethods<CoreAPI> {
     detachBackground({ sessionId, ...payload }: SessionAgentPayload<DetachBackgroundPayload>): Promise<BackgroundTaskInfo | undefined>;
     clearContext({ sessionId, ...payload }: SessionAgentPayload<EmptyPayload>): Promise<void>;
     activateSkill({ sessionId, ...payload }: SessionAgentPayload<ActivateSkillPayload>): Promise<void>;
+    activatePluginCommand({ sessionId, ...payload }: SessionAgentPayload<ActivatePluginCommandPayload>): Promise<void>;
     getBackgroundOutput({ sessionId, ...payload }: SessionAgentPayload<GetBackgroundOutputPayload>): Promise<string>;
     getContext({ sessionId, ...payload }: SessionAgentPayload<EmptyPayload>): Promise<AgentContextData>;
     getConfig({ sessionId, ...payload }: SessionAgentPayload<EmptyPayload>): Promise<AgentConfigData>;
@@ -3499,6 +3613,7 @@ declare class KimiCore implements PromisableMethods<CoreAPI> {
     updateSessionMetadata({ sessionId, ...payload }: UpdateSessionMetadataRequest): Promise<void>;
     getSessionMetadata({ sessionId, ...payload }: SessionScopedPayload<EmptyPayload>): SessionMeta;
     listSkills({ sessionId, ...payload }: SessionScopedPayload<EmptyPayload>): Promise<readonly SkillSummary[]>;
+    listPluginCommands({ sessionId, ...payload }: SessionScopedPayload<EmptyPayload>): readonly PluginCommandDef[];
     listMcpServers({ sessionId, ...payload }: SessionScopedPayload<EmptyPayload>): readonly McpServerInfo[];
     getMcpStartupMetrics({ sessionId, ...payload }: SessionScopedPayload<EmptyPayload>): Promise<McpStartupMetrics>;
     reconnectMcpServer({ sessionId, ...payload }: SessionScopedPayload<ReconnectMcpServerPayload>): Promise<void>;
@@ -3712,6 +3827,10 @@ declare interface KimiOptions {
     stream?: boolean | undefined;
     defaultHeaders?: Record<string, string> | undefined;
     generationKwargs?: GenerationKwargs | undefined;
+    /** Efforts the model advertises (e.g. ["low", "high", "max"]). When
+     * present and non-empty, withThinking sends the chosen effort on the wire;
+     * when absent/empty, only thinking.type is sent. */
+    supportEfforts?: readonly string[] | undefined;
     clientFactory?: (auth: ProviderRequestAuth) => OpenAI;
 }
@@ -3824,6 +3943,27 @@ declare class LlmRequestLogger {
 declare interface LLMStreamTiming {
     readonly firstTokenLatencyMs: number;
     readonly streamDurationMs: number;
+    /**
+     * Portion of `firstTokenLatencyMs` spent in-process building the request
+     * (message serialization, param assembly) before the provider dispatched the
+     * network call. `undefined` when the provider does not report the
+     * client/server boundary (no `onRequestSent`).
+     */
+    readonly requestBuildMs?: number;
+    /**
+     * Portion of `firstTokenLatencyMs` spent waiting on the network + API server
+     * from request dispatch to the first streamed token. `undefined` when the
+     * provider does not report the client/server boundary.
+     */
+    readonly serverFirstTokenMs?: number;
+    /**
+     * Split of `streamDurationMs` (the decode window): time spent awaiting parts
+     * from the provider (`serverDecodeMs`, server + network) vs. time spent
+     * processing parts in-process (`clientConsumeMs`, host callbacks / merge).
+     * `undefined` when the provider stream did not report decode accounting.
+     */
+    readonly serverDecodeMs?: number;
+    readonly clientConsumeMs?: number;
 }
 /**
@@ -3940,6 +4080,20 @@ declare interface LoopStepEndEvent {
     readonly finishReason?: LoopStepStopReason | undefined;
     readonly llmFirstTokenLatencyMs?: number | undefined;
     readonly llmStreamDurationMs?: number | undefined;
+    /**
+     * Split of `llmFirstTokenLatencyMs`: in-process request-building time on the
+     * client vs. network + API-server time to the first token. Both `undefined`
+     * when the provider does not report the client/server boundary.
+     */
+    readonly llmRequestBuildMs?: number | undefined;
+    readonly llmServerFirstTokenMs?: number | undefined;
+    /**
+     * Split of `llmStreamDurationMs` (the decode window): time awaiting parts
+     * from the provider vs. time processing parts in-process. Both `undefined`
+     * when the provider stream did not report decode accounting.
+     */
+    readonly llmServerDecodeMs?: number | undefined;
+    readonly llmClientConsumeMs?: number | undefined;
     /**
      * Provider diagnostics are optional and must not drive loop control.
      * Use `finishReason` for normalized behavior.
@@ -4412,7 +4566,11 @@ declare const ModelAliasSchema: z.ZodObject<{
     capabilities: z.ZodOptional<z.ZodArray<z.ZodString>>;
     displayName: z.ZodOptional<z.ZodString>;
     reasoningKey: z.ZodOptional<z.ZodString>;
+    protocol: z.ZodOptional<z.ZodLiteral<"anthropic">>;
     adaptiveThinking: z.ZodOptional<z.ZodBoolean>;
+    supportEfforts: z.ZodOptional<z.ZodArray<z.ZodString>>;
+    defaultEffort: z.ZodOptional<z.ZodString>;
+    betaApi: z.ZodOptional<z.ZodBoolean>;
 }, z.core.$strip>;
 /**
@@ -4689,6 +4847,38 @@ declare interface PluginCapabilityState {
     readonly mcpServers?: Readonly<Record<string, PluginMcpServerState>>;
 }
+export { PluginCommandActivatedEvent }
+export declare interface PluginCommandDef {
+    readonly pluginId: string;
+    readonly name: string;
+    readonly description: string;
+    readonly body: string;
+    readonly path: string;
+}
+/**
+ * A resolved command file plus its namespace-preserving name.
+ *
+ * `name` is the path of the file relative to the declared `commands` entry
+ * (without the `.md` extension, using `/` separators), so a file at
+ * `commands/frontend/component.md` yields the name `frontend/component`.
+ * Frontmatter `name` in the file itself takes precedence over this at load time.
+ */
+declare interface PluginCommandEntry {
+    readonly path: string;
+    readonly name: string;
+}
+declare interface PluginCommandOrigin {
+    readonly kind: 'plugin_command';
+    readonly activationId: string;
+    readonly pluginId: string;
+    readonly commandName: string;
+    readonly commandArgs?: string | undefined;
+    readonly trigger: 'user-slash';
+}
 declare interface PluginDiagnostic {
     readonly severity: PluginDiagnosticSeverity;
     readonly message: string;
@@ -4744,6 +4934,7 @@ declare class PluginManager {
     enabledSessionStarts(): readonly EnabledPluginSessionStart[];
     enabledMcpServers(): Record<string, McpServerConfig>;
     enabledHooks(): readonly HookDef[];
+    enabledCommands(): Promise<readonly PluginCommandDef[]>;
     summaries(): readonly PluginSummary[];
     info(id: string): PluginInfo | undefined;
     private persist;
@@ -4766,6 +4957,7 @@ declare interface PluginManifest {
     readonly sessionStart?: PluginSessionStart;
     readonly mcpServers?: Readonly<Record<string, McpServerConfig>>;
     readonly hooks?: readonly HookDefConfig[];
+    readonly commands?: readonly PluginCommandEntry[];
     readonly interface?: PluginInterface;
     readonly skillInstructions?: string;
 }
@@ -4827,6 +5019,7 @@ export declare interface PluginSummary {
     readonly mcpServerCount: number;
     readonly enabledMcpServerCount: number;
     readonly hookCount: number;
+    readonly commandCount: number;
     readonly hasErrors: boolean;
     readonly source: PluginSource;
     readonly originalSource?: string;
@@ -4853,6 +5046,99 @@ export declare interface ProcessBackgroundTaskInfo extends BackgroundTaskInfoBas
     readonly exitCode: number | null;
 }
+/**
+ * A repair the projector applied to make the history wire-valid. Each one means
+ * the stored history was not directly sendable to a strict provider.
+ */
+declare type ProjectionAnomaly =
+/** A recorded result was not adjacent to its call and had to be moved up. */
+    {
+    readonly kind: 'tool_result_reordered';
+    readonly toolCallId: string;
+}
+/**
+* No result existed for a call, so a placeholder was synthesized. `trailing`
+* is true when it closed a still-open tail call (expected under
+* `synthesizeMissing`), false when it closed a mid-history orphan whose result
+* was lost (a genuine defect worth investigating).
+*/
+| {
+    readonly kind: 'tool_result_synthesized';
+    readonly toolCallId: string;
+    readonly trailing: boolean;
+}
+/** A result with no matching call anywhere was dropped (strict resend only). */
+| {
+    readonly kind: 'orphan_tool_result_dropped';
+    readonly toolCallId: string;
+}
+/** A leading non-user message was dropped so the first turn is user (strict). */
+| {
+    readonly kind: 'leading_non_user_dropped';
+    readonly role: string;
+}
+/** Two adjacent assistant turns were merged into one (strict). */
+| {
+    readonly kind: 'consecutive_assistants_merged';
+}
+/** A non-empty but all-whitespace text block was dropped (always). */
+| {
+    readonly kind: 'whitespace_text_dropped';
+    readonly role: string;
+};
+declare interface ProjectOptions {
+    /**
+     * When `true`, emit a synthetic `tool_result` for *every* assistant `tool_use`
+     * whose result is not present in the provided messages — including a trailing,
+     * still-in-flight call. Used by full compaction, where the compacted prefix is
+     * a slice that may exclude a delayed result preserved in the retained tail; the
+     * synthetic result keeps the exchange closed so the summary request is not
+     * rejected. Leave `false` for normal turns: a *trailing* missing result there
+     * means the call is still in-flight and must not be closed prematurely. (A
+     * *non-trailing* missing result is always closed regardless of this flag — see
+     * `repairToolExchangeAdjacency` — because a later turn proves it is not
+     * in-flight.)
+     */
+    readonly synthesizeMissing?: boolean;
+    /**
+     * When `true`, drop any `tool_result` whose `toolCallId` matches no assistant
+     * `tool_use` anywhere in the provided messages. Strict providers reject such a
+     * stray result as an "unexpected `tool_result`". Off by default so the normal
+     * path never silently discards recorded output; the post-400 strict-resend
+     * fallback enables it (together with `synthesizeMissing`) as a last resort to
+     * force a wire-compliant request out of an otherwise-bricked session.
+     */
+    readonly dropOrphanResults?: boolean;
+    /**
+     * When `true`, drop leading messages until the first one is a user turn. Strict
+     * providers require the first message to be `user`; a history that (after
+     * dropping/compaction) starts with an assistant or tool message is rejected.
+     * Strict-resend only — the normal path keeps the original opening.
+     */
+    readonly dropLeadingNonUser?: boolean;
+    /**
+     * When `true`, merge back-to-back assistant messages into one. Strict providers
+     * reject consecutive same-role turns ("roles must alternate"); consecutive user
+     * turns are already merged at the provider boundary, but consecutive assistant
+     * turns are not. Strict-resend only. Content is concatenated verbatim — callers
+     * must not rely on this when extended-thinking ordering matters, but two
+     * consecutive assistant turns do not arise in well-formed transcripts.
+     */
+    readonly mergeConsecutiveAssistants?: boolean;
+    /**
+     * Optional sink invoked for every repair the projector applies to keep the
+     * outgoing wire valid: a displaced result moved back next to its call, a
+     * synthetic result invented for a missing one, a stray result dropped, a
+     * leading non-user message dropped, or consecutive assistants merged. The
+     * projection itself stays a pure transform; the caller decides whether/how to
+     * surface these (the context logs them so a silently-mangled history is never
+     * papered over without a trace). Not called when the history is already
+     * well-formed.
+     */
+    readonly onAnomaly?: (anomaly: ProjectionAnomaly) => void;
+}
 declare type Promisable<T> = [T] extends [Promise<any>] ? T | Awaited<T> : T | Promise<T>;
 declare type PromisableMethods<T> = {
@@ -4863,7 +5149,7 @@ declare type Promisify<T> = [T] extends [Promise<any>] ? T : Promise<T>;
 export declare type PromptInput = readonly PromptPart[];
-export declare type PromptOrigin = UserPromptOrigin | SkillActivationOrigin | InjectionOrigin | ShellCommandOrigin | CompactionSummaryOrigin | SystemTriggerOrigin | BackgroundTaskOrigin | CronJobOrigin | CronMissedOrigin | HookResultOrigin | RetryOrigin;
+export declare type PromptOrigin = UserPromptOrigin | SkillActivationOrigin | PluginCommandOrigin | InjectionOrigin | ShellCommandOrigin | CompactionSummaryOrigin | SystemTriggerOrigin | BackgroundTaskOrigin | CronJobOrigin | CronMissedOrigin | HookResultOrigin | RetryOrigin;
 export declare type PromptPart = Extract<ContentPart, {
     type: 'text' | 'image_url' | 'video_url';
@@ -4929,6 +5215,7 @@ declare interface ProviderManagerOptions {
     readonly kimiRequestHeaders?: Record<string, string>;
     readonly resolveOAuthTokenProvider?: OAuthTokenProviderResolver;
     readonly promptCacheKey?: string;
+    readonly adaptiveThinkingOverride?: () => boolean | undefined;
 }
 /**
@@ -5134,6 +5421,10 @@ declare interface ResolvedRuntimeProvider {
     /** Declared 'always_thinking' capability — the model cannot disable thinking. */
     readonly alwaysThinking?: boolean;
     readonly maxOutputSize?: number;
+    /** Configured provider wire type (`provider.type`), before any model-level protocol override. */
+    readonly type: ProviderType;
+    /** Model-level protocol override (`alias.protocol`); when set, takes precedence over `type` for transport selection. */
+    readonly protocol: ModelAlias['protocol'];
 }
 declare interface ResolvedToolExecutionHookContext extends ToolExecutionHookContext {
@@ -5369,6 +5660,7 @@ export declare abstract class SDKRpcClientBase {
     getUsage(input: SessionIdRpcInput): Promise<SessionUsage>;
     getStatus(input: SessionIdRpcInput): Promise<SessionStatus>;
     listSkills(input: SessionIdRpcInput): Promise<readonly SkillSummary[]>;
+    listPluginCommands(input: SessionIdRpcInput): Promise<readonly PluginCommandDef[]>;
     listBackgroundTasks(input: SessionIdRpcInput & {
         activeOnly?: boolean;
         limit?: number;
@@ -5400,6 +5692,7 @@ export declare abstract class SDKRpcClientBase {
     reloadPlugins(): Promise<ReloadSummary>;
     getPluginInfo(id: string): Promise<PluginInfo>;
     activateSkill(input: ActivateSkillRpcInput): Promise<void>;
+    activatePluginCommand(input: ActivatePluginCommandRpcInput): Promise<void>;
     onEvent(listener: (event: Event_2) => void): Unsubscribe;
     receiveEvent(event: Event_2): void;
     setApprovalHandler(sessionId: string, handler: ApprovalHandler | undefined): void;
@@ -5501,7 +5794,7 @@ export declare class Session {
     startBtw(): Promise<string>;
     cancel(): Promise<void>;
     setModel(model: string): Promise<void>;
-    setThinking(level: string): Promise<void>;
+    setThinking(effort: ThinkingEffort): Promise<void>;
     setPermission(mode: PermissionMode): Promise<void>;
     setPlanMode(enabled: boolean): Promise<void>;
     setSwarmMode(enabled: boolean, trigger: SwarmModeTrigger): Promise<void>;
@@ -5514,6 +5807,7 @@ export declare class Session {
     getUsage(): Promise<SessionUsage>;
     getStatus(): Promise<SessionStatus>;
     listSkills(): Promise<readonly SkillSummary[]>;
+    listPluginCommands(): Promise<readonly PluginCommandDef[]>;
     /**
      * List background tasks for this session's interactive agent.
      *
@@ -5564,6 +5858,7 @@ export declare class Session {
     reloadPlugins(): Promise<ReloadSummary>;
     getPluginInfo(id: string): Promise<PluginInfo>;
     activateSkill(name: string, args?: string | undefined): Promise<void>;
+    activatePluginCommand(pluginId: string, commandName: string, args?: string | undefined): Promise<void>;
     close(): Promise<void>;
     /** @internal */
     emitMetaUpdated(patch: {
@@ -5588,6 +5883,7 @@ declare class Session_2 {
     private toolKaos;
     private persistenceKaos;
     private additionalDirs;
+    private readonly pluginCommands;
     private agentIdCounter;
     private readonly skillsReady;
     metadata: SessionMeta;
@@ -5654,6 +5950,7 @@ declare class Session_2 {
     readMetadata(): Promise<SessionMeta>;
     flushMetadata(): Promise<void>;
     listSkills(): Promise<readonly SkillSummary_2[]>;
+    listPluginCommands(): readonly PluginCommandDef[];
     private loadSkills;
     private loadMcpServers;
     private emitInitialMcpLoadError;
@@ -5666,6 +5963,8 @@ declare class Session_2 {
     private resolveAgentEntry;
     private resumeAgent;
     private resumePersistedAgent;
+    private restoreAgentProfileHandle;
+    private resolvePersistedProfile;
     private nextGeneratedAgentId;
     private requireMainAgent;
     private triggerSessionStart;
@@ -5679,6 +5978,7 @@ declare interface SessionAPI extends AgentAPIWithId {
     updateSessionMetadata: (payload: UpdateSessionMetadataPayload) => void;
     getSessionMetadata: (payload: EmptyPayload) => SessionMeta;
     listSkills: (payload: EmptyPayload) => readonly SkillSummary[];
+    listPluginCommands: (payload: EmptyPayload) => readonly PluginCommandDef[];
     listMcpServers: (payload: EmptyPayload) => readonly McpServerInfo[];
     getMcpStartupMetrics: (payload: EmptyPayload) => McpStartupMetrics;
     reconnectMcpServer: (payload: ReconnectMcpServerPayload) => void;
@@ -5803,6 +6103,7 @@ declare interface SessionOptions_2 {
     readonly mcpConfig?: SessionMcpConfig;
     readonly telemetry?: TelemetryClient | undefined;
     readonly pluginSessionStarts?: readonly EnabledPluginSessionStart[];
+    readonly pluginCommands?: readonly PluginCommandDef[];
     readonly appVersion?: string;
     readonly experimentalFlags?: ExperimentalFlagResolver;
     readonly additionalDirs?: readonly string[];
@@ -5858,7 +6159,7 @@ declare class SessionSkillRegistry implements SkillRegistry {
 export declare interface SessionStatus {
     readonly model?: string;
-    readonly thinkingLevel: string;
+    readonly thinkingEffort: string;
     readonly permission: PermissionMode;
     readonly planMode: boolean;
     readonly swarmMode?: boolean | undefined;
@@ -5984,11 +6285,11 @@ declare type SetSessionSwarmModeRpcInput = (SessionIdRpcInput & {
 });
 declare interface SetSessionThinkingRpcInput extends SessionIdRpcInput {
-    readonly level: string;
+    readonly effort: string;
 }
 declare interface SetThinkingPayload {
-    readonly level: string;
+    readonly effort: string;
 }
 declare interface ShellCommandOrigin {
@@ -6157,6 +6458,24 @@ declare interface StopBackgroundPayload {
     readonly reason?: string;
 }
+/**
+ * Decode-phase accounting for a single streamed generation. Splits the window
+ * from the first streamed part to stream end into the time spent waiting on the
+ * provider for the next part (server + network) versus the time spent
+ * processing each part in-process (deep copy, host callbacks, part merging).
+ *
+ * Because both buckets are wall-clock measured on the single JS thread, a
+ * stop-the-world GC pause that lands while awaiting the next part is counted in
+ * {@link serverDecodeMs}; a non-trivial {@link clientConsumeMs} share is the
+ * unambiguous signal that the host's per-part processing is throttling decode.
+ */
+declare interface StreamDecodeStats {
+    /** Cumulative time spent awaiting the next streamed part (server + network). */
+    readonly serverDecodeMs: number;
+    /** Cumulative time spent processing streamed parts in-process (client). */
+    readonly clientConsumeMs: number;
+}
 /**
  * An async-iterable stream of message parts produced by a single LLM response.
  *
@@ -6311,31 +6630,33 @@ export declare type ThinkingConfig = z.infer<typeof ThinkingConfigSchema>;
 declare interface ThinkingConfig_2 {
     type?: 'enabled' | 'disabled';
+    effort?: string;
     keep?: unknown;
     [key: string]: unknown;
 }
 declare const ThinkingConfigSchema: z.ZodObject<{
-    mode: z.ZodOptional<z.ZodEnum<{
-        off: "off";
-        auto: "auto";
-        on: "on";
-    }>>;
+    enabled: z.ZodOptional<z.ZodBoolean>;
     effort: z.ZodOptional<z.ZodString>;
 }, z.core.$strip>;
 export { ThinkingDeltaEvent }
 /**
- * Normalized thinking effort level used across providers.
+ * Thinking effort passed to {@link ChatProvider.withThinking}.
+ *
+ * `'off'` and `'on'` are the only reserved values: `'off'` disables thinking,
+ * and `'on'` is the on-signal for boolean models (models that do not declare
+ * `support_efforts`). Everything else is a model-declared effort (e.g.
+ * `"low"`, `"high"`, `"max"`) carried as an open string. The type collapses to
+ * `string` at runtime; it exists purely as a semantic marker that a value is
+ * expected to be `'off'`, `'on'`, or a model-declared effort.
  *
- * Values above `high` are provider/model-specific and may be clamped by the
- * adapter when the native API has no matching level. OpenAI maps `max` to its
- * `xhigh` ceiling; Kimi and Gemini cap `xhigh`/`max` at `high`; Anthropic
- * supports `xhigh`/`max` only on selected models and otherwise clamps to
- * `high`.
+ * The model's `support_efforts` is the single source of truth for which
+ * efforts are valid — providers normalize any unrecognized effort by omitting
+ * the effort on the wire rather than rejecting it.
  */
-declare type ThinkingEffort = 'off' | 'low' | 'medium' | 'high' | 'xhigh' | 'max';
+export declare type ThinkingEffort = 'off' | 'on' | (string & {});
 declare interface ThinkPart {
     type: 'think';
@@ -6548,6 +6869,7 @@ declare class ToolManager {
     initializeBuiltinTools(): void;
     refreshBuiltinTools(): void;
     private createVideoUploader;
+    private videoUploadTelemetryProps;
     get loopTools(): readonly ExecutableTool[];
 }
@@ -6661,6 +6983,16 @@ declare class TurnFlow {
     waitForTurnFirstRequest(): Promise<void>;
     private abortTurn;
     private flushSteerBuffer;
+    /**
+     * Replay inputs (prompts or steers) that were deferred while a manual compaction
+     * held the context. Called by `FullCompaction` once the compaction lifecycle
+     * (summary + reinjection) is done — and on cancel/failure — so deferred input is
+     * never lost or stuck. If a turn is somehow already active (e.g. one that raced
+     * and cancelled the compaction), let it consume the buffer like any other steer;
+     * otherwise launch a fresh turn from the first buffered item, with the rest
+     * draining into it via `flushSteerBuffer`.
+     */
+    onCompactionFinished(): void;
     finishResume(): void;
     /**
      * The body of the single in-flight `activeTurn`. Routes to the goal driver
@@ -6699,6 +7031,13 @@ declare class TurnFlow {
     private hasPriorStepToolCallKey;
     private trackTurnInterrupted;
     private telemetryMode;
+    /**
+     * Resolve the current model's provider wire type and any model-level protocol
+     * override for request telemetry. Never throws — telemetry must not break a
+     * turn over an unresolvable provider config (the step loop will surface that
+     * error on its own).
+     */
+    private requestProtocolProps;
     private shouldTrackApiError;
 }