qlogicagent 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/dist/agent.js +7 -7
  2. package/dist/cli.js +204 -195
  3. package/dist/index.js +204 -195
  4. package/dist/orchestration.js +10 -10
  5. package/dist/types/agent/tool-loop.d.ts +2 -0
  6. package/dist/types/agent/types.d.ts +54 -1
  7. package/dist/types/cli/stdio-server.d.ts +10 -0
  8. package/dist/types/cli/tool-bootstrap.d.ts +13 -1
  9. package/dist/types/llm/index.d.ts +1 -1
  10. package/dist/types/llm/llm-client.d.ts +1 -1
  11. package/dist/types/llm/media-client.d.ts +3 -4
  12. package/dist/types/llm/media-transport.d.ts +75 -4
  13. package/dist/types/llm/provider-def.d.ts +124 -3
  14. package/dist/types/llm/provider-registry.d.ts +5 -0
  15. package/dist/types/llm/provider-tool-api.d.ts +44 -0
  16. package/dist/types/llm/retry.d.ts +37 -0
  17. package/dist/types/llm/transport.d.ts +161 -2
  18. package/dist/types/llm/transports/anthropic-messages.d.ts +7 -0
  19. package/dist/types/llm/transports/minimax-media.d.ts +5 -0
  20. package/dist/types/llm/transports/openai-chat.d.ts +44 -3
  21. package/dist/types/llm/transports/realtime-transport.d.ts +183 -0
  22. package/dist/types/llm/transports/volcengine-grounding.d.ts +58 -0
  23. package/dist/types/llm/transports/volcengine-media.d.ts +50 -0
  24. package/dist/types/llm/transports/volcengine-responses.d.ts +60 -0
  25. package/dist/types/llm/transports/zhipu-media.d.ts +60 -0
  26. package/dist/types/llm/transports/zhipu-tool-api.d.ts +35 -0
  27. package/dist/types/orchestration/error-handling/error-classification.d.ts +1 -10
  28. package/dist/types/orchestration/index.d.ts +1 -1
  29. package/dist/types/orchestration/tool-cascade.d.ts +40 -0
  30. package/dist/types/orchestration/tool-loop/tool-schema.d.ts +4 -1
  31. package/dist/types/protocol/methods.d.ts +19 -0
  32. package/dist/types/skills/memory/memory-extractor.d.ts +1 -1
  33. package/dist/types/skills/tools/file-management-tool.d.ts +90 -0
  34. package/dist/types/skills/tools/image-generate-tool.d.ts +13 -1
  35. package/dist/types/skills/tools/music-generate-tool.d.ts +25 -0
  36. package/dist/types/skills/tools/stt-tool.d.ts +33 -0
  37. package/dist/types/skills/tools/three-d-generate-tool.d.ts +45 -0
  38. package/dist/types/skills/tools/tts-tool.d.ts +12 -0
  39. package/dist/types/skills/tools/video-edit-tool.d.ts +5 -2
  40. package/dist/types/skills/tools/video-generate-tool.d.ts +102 -2
  41. package/dist/types/skills/tools/video-merge-tool.d.ts +1 -1
  42. package/dist/types/skills/tools/video-upscale-tool.d.ts +1 -1
  43. package/dist/types/skills/tools/voice-clone-tool.d.ts +40 -0
  44. package/package.json +1 -1
@@ -0,0 +1,183 @@
1
+ /**
2
+ * Realtime WebSocket Transport — bidirectional audio/voice streaming
3
+ * via the OpenAI Realtime API protocol (also compatible with GLM Realtime).
4
+ *
5
+ * ## Protocol: WebSocket JSON events
6
+ *
7
+ * Client → Server:
8
+ * - session.update: configure session (model, voice, tools, etc.)
9
+ * - input_audio_buffer.append: send audio chunks (base64 PCM16)
10
+ * - input_audio_buffer.commit: signal end of audio input
11
+ * - conversation.item.create: inject text/function_result items
12
+ * - response.create: request a model response
13
+ * - response.cancel: abort in-progress response
14
+ *
15
+ * Server → Client:
16
+ * - session.created: session initialized
17
+ * - session.updated: config acknowledged
18
+ * - input_audio_buffer.speech_started: VAD detected speech
19
+ * - input_audio_buffer.speech_stopped: VAD detected silence
20
+ * - response.created: response generation started
21
+ * - response.output_item.added: new output item (text/audio/function_call)
22
+ * - response.audio.delta: audio chunk (base64 PCM16)
23
+ * - response.audio_transcript.delta: transcript of generated speech
24
+ * - response.text.delta: text generation delta
25
+ * - response.function_call_arguments.delta: tool call args delta
26
+ * - response.function_call_arguments.done: tool call complete
27
+ * - response.output_item.done: output item finished
28
+ * - response.done: full response complete
29
+ * - error: server error
30
+ *
31
+ * ## Architecture
32
+ *
33
+ * RealtimeTransport manages a single persistent WebSocket connection per session.
34
+ * It exposes an event-driven API (AsyncGenerator) that the agent tool-loop
35
+ * can consume for voice-enabled interactions.
36
+ *
37
+ * Docs:
38
+ * - OpenAI: https://platform.openai.com/docs/api-reference/realtime
39
+ * - GLM: https://docs.bigmodel.cn/cn/guide/develop/realtime-api
40
+ */
41
+ export interface RealtimeConfig {
42
+ /** WebSocket endpoint (e.g. "wss://api.openai.com/v1/realtime") */
43
+ baseUrl: string;
44
+ /** Model to use (e.g. "gpt-4o-realtime-preview", "glm-realtime") */
45
+ model: string;
46
+ /** API key */
47
+ apiKey: string;
48
+ /** Voice for TTS output */
49
+ voice?: string;
50
+ /** Input modalities: "text", "audio", or both */
51
+ inputModalities?: Array<"text" | "audio">;
52
+ /** Output modalities: "text", "audio", or both */
53
+ outputModalities?: Array<"text" | "audio">;
54
+ /** Temperature for generation */
55
+ temperature?: number;
56
+ /** Tool definitions for function calling */
57
+ tools?: RealtimeTool[];
58
+ /** Voice Activity Detection mode */
59
+ vadMode?: "server_vad" | "none";
60
+ /** VAD threshold (0.0-1.0) */
61
+ vadThreshold?: number;
62
+ /** Auth type: "header" (OpenAI) or "query" (GLM) */
63
+ authMode?: "header" | "query";
64
+ }
65
+ export interface RealtimeTool {
66
+ type: "function";
67
+ name: string;
68
+ description: string;
69
+ parameters: Record<string, unknown>;
70
+ }
71
+ export type RealtimeEvent = {
72
+ type: "session_created";
73
+ sessionId: string;
74
+ } | {
75
+ type: "speech_started";
76
+ } | {
77
+ type: "speech_stopped";
78
+ audioEndMs: number;
79
+ } | {
80
+ type: "audio_delta";
81
+ delta: string;
82
+ } | {
83
+ type: "audio_transcript_delta";
84
+ delta: string;
85
+ } | {
86
+ type: "text_delta";
87
+ delta: string;
88
+ } | {
89
+ type: "function_call_start";
90
+ callId: string;
91
+ name: string;
92
+ } | {
93
+ type: "function_call_delta";
94
+ callId: string;
95
+ delta: string;
96
+ } | {
97
+ type: "function_call_done";
98
+ callId: string;
99
+ name: string;
100
+ arguments: string;
101
+ } | {
102
+ type: "response_done";
103
+ usage?: RealtimeUsage;
104
+ } | {
105
+ type: "error";
106
+ code: string;
107
+ message: string;
108
+ } | {
109
+ type: "closed";
110
+ code: number;
111
+ reason: string;
112
+ };
113
+ export interface RealtimeUsage {
114
+ inputTokens: number;
115
+ outputTokens: number;
116
+ inputAudioTokens?: number;
117
+ outputAudioTokens?: number;
118
+ }
119
+ /**
120
+ * Manages a persistent WebSocket connection for real-time audio/voice
121
+ * interactions with an LLM provider.
122
+ *
123
+ * Usage:
124
+ * ```ts
125
+ * const rt = new RealtimeTransport(config);
126
+ * rt.connect();
127
+ *
128
+ * // Send audio
129
+ * rt.appendAudio(base64Chunk);
130
+ * rt.commitAudio();
131
+ *
132
+ * // Or send text
133
+ * rt.sendText("Hello!");
134
+ *
135
+ * // Submit function results
136
+ * rt.sendFunctionResult(callId, result);
137
+ *
138
+ * // Consume events
139
+ * for await (const event of rt.events()) {
140
+ * switch (event.type) {
141
+ * case "audio_delta": playAudio(event.delta); break;
142
+ * case "function_call_done": handleToolCall(event); break;
143
+ * }
144
+ * }
145
+ *
146
+ * rt.close();
147
+ * ```
148
+ */
149
+ export declare class RealtimeTransport {
150
+ private ws;
151
+ private config;
152
+ private eventQueue;
153
+ private waiters;
154
+ private closed;
155
+ constructor(config: RealtimeConfig);
156
+ /** Open WebSocket connection and configure session. */
157
+ connect(): Promise<void>;
158
+ /** Send audio data (base64 PCM16). */
159
+ appendAudio(base64Chunk: string): void;
160
+ /** Mark end of audio input and trigger response. */
161
+ commitAudio(): void;
162
+ /** Send a text message. */
163
+ sendText(text: string): void;
164
+ /** Submit a function call result back to the model. */
165
+ sendFunctionResult(callId: string, output: string): void;
166
+ /** Trigger a model response (e.g. after sending text). */
167
+ requestResponse(): void;
168
+ /** Cancel an in-progress response. */
169
+ cancelResponse(): void;
170
+ /** Async iterator of server events. */
171
+ events(): AsyncGenerator<RealtimeEvent>;
172
+ /** Close the WebSocket connection. */
173
+ close(): void;
174
+ private buildUrl;
175
+ private sendSessionUpdate;
176
+ private send;
177
+ private push;
178
+ private drainWaiters;
179
+ /**
180
+ * Parse a server-sent JSON event into our typed event(s).
181
+ */
182
+ private parseServerEvent;
183
+ }
@@ -0,0 +1,58 @@
1
+ /**
2
+ * Volcengine Grounding — spatial coordinate parser (volcengine-ProviderMax §14).
3
+ *
4
+ * Parses model-emitted spatial reference tags from text output:
5
+ * - <bbox>x_min y_min x_max y_max</bbox> → bounding box
6
+ * - <point>x y</point> → single point
7
+ * - <polygon>x1 y1 x2 y2 ...</polygon> → polygon vertices
8
+ *
9
+ * All coordinates are in normalized 1000×1000 space, range [0, 999].
10
+ * Use `toPixelCoords()` to convert to actual image pixel coordinates.
11
+ */
12
+ export type SpatialReference = {
13
+ type: "bbox";
14
+ x1: number;
15
+ y1: number;
16
+ x2: number;
17
+ y2: number;
18
+ space: "normalized_1000";
19
+ } | {
20
+ type: "point";
21
+ x: number;
22
+ y: number;
23
+ space: "normalized_1000";
24
+ } | {
25
+ type: "polygon";
26
+ points: Array<{
27
+ x: number;
28
+ y: number;
29
+ }>;
30
+ space: "normalized_1000";
31
+ };
32
+ export interface PixelBbox {
33
+ x1: number;
34
+ y1: number;
35
+ x2: number;
36
+ y2: number;
37
+ }
38
+ export interface PixelPoint {
39
+ x: number;
40
+ y: number;
41
+ }
42
+ /**
43
+ * Extract all spatial references from model output text.
44
+ * Returns an empty array if no grounding tags are found.
45
+ */
46
+ export declare function parseGroundingTags(text: string): SpatialReference[];
47
+ /**
48
+ * Convert a normalized 1000×1000 bounding box to pixel coordinates.
49
+ */
50
+ export declare function bboxToPixels(ref: Extract<SpatialReference, {
51
+ type: "bbox";
52
+ }>, width: number, height: number): PixelBbox;
53
+ /**
54
+ * Convert a normalized 1000×1000 point to pixel coordinates.
55
+ */
56
+ export declare function pointToPixels(ref: Extract<SpatialReference, {
57
+ type: "point";
58
+ }>, width: number, height: number): PixelPoint;
@@ -27,8 +27,58 @@ export declare class VolcengineMediaTransport implements MediaTransport {
27
27
  */
28
28
  canHandle(request: MediaRequest): boolean;
29
29
  private generateImage;
30
+ /**
31
+ * Parse streaming image SSE — yields progressive image quality upgrades.
32
+ * Final event contains the full-quality image URL.
33
+ */
34
+ private parseStreamingImage;
30
35
  private generateVideo;
31
36
  private generate3D;
37
+ /**
38
+ * List video generation tasks with optional filters.
39
+ * GET /v3/contents/generations/tasks
40
+ */
41
+ listVideoTasks(apiKey: string, options?: {
42
+ after?: string;
43
+ limit?: number;
44
+ status?: string;
45
+ }, signal?: AbortSignal): Promise<Record<string, unknown>>;
46
+ /**
47
+ * Cancel or delete a video generation task.
48
+ * DELETE /v3/contents/generations/tasks/{taskId}
49
+ */
50
+ deleteVideoTask(taskId: string, apiKey: string, signal?: AbortSignal): Promise<void>;
51
+ /**
52
+ * Upload a file to Volcengine Files API for reuse in multimodal requests.
53
+ * POST /v3/files
54
+ */
55
+ uploadFile(file: Blob | Buffer, apiKey: string, options?: {
56
+ purpose?: string;
57
+ filename?: string;
58
+ }, signal?: AbortSignal): Promise<{
59
+ id: string;
60
+ status: string;
61
+ }>;
62
+ /**
63
+ * Get file info by ID.
64
+ * GET /v3/files/{fileId}
65
+ */
66
+ getFile(fileId: string, apiKey: string, signal?: AbortSignal): Promise<Record<string, unknown>>;
67
+ /**
68
+ * List uploaded files.
69
+ * GET /v3/files
70
+ */
71
+ listFiles(apiKey: string, options?: {
72
+ after?: string;
73
+ limit?: number;
74
+ purpose?: string;
75
+ order?: "asc" | "desc";
76
+ }, signal?: AbortSignal): Promise<Record<string, unknown>>;
77
+ /**
78
+ * Delete a file.
79
+ * DELETE /v3/files/{fileId}
80
+ */
81
+ deleteFile(fileId: string, apiKey: string, signal?: AbortSignal): Promise<void>;
32
82
  private submitTask;
33
83
  private pollTask;
34
84
  }
@@ -0,0 +1,60 @@
1
+ /**
2
+ * Volcengine Responses API Transport — SSE streaming implementation.
3
+ *
4
+ * Implements the fire mountain ark Responses API (`/api/v3/responses`),
5
+ * which is the officially recommended primary path for Doubao LLM text generation
6
+ * (250615+ models: doubao-seed-2.0 series).
7
+ *
8
+ * Key differences from OpenAI Chat Completions:
9
+ * - Endpoint: POST {baseUrl}/v3/responses
10
+ * - Request body uses `input` (not `messages`), `instructions`, `thinking`, `reasoning`
11
+ * - SSE events: response.output_text.delta, response.reasoning_summary_text.delta,
12
+ * response.function_call_arguments.delta, response.completed, etc.
13
+ * - Tool calling: function_call / function_call_output with call_id
14
+ * - Context persistence: previous_response_id for server-side session continuation
15
+ * - Deep thinking: thinking.type (enabled/disabled/auto) + reasoning.effort
16
+ *
17
+ * Docs: https://www.volcengine.com/docs/82379/1399008
18
+ */
19
+ import type { LLMChunk, LLMRequest, LLMTransport } from "../transport.js";
20
+ import type { ProviderQuirks } from "../provider-def.js";
21
+ export interface VolcengineResponsesTransportConfig {
22
+ baseUrl: string;
23
+ extraHeaders?: Record<string, string>;
24
+ timeoutMs?: number;
25
+ quirks?: ProviderQuirks;
26
+ }
27
+ export declare class VolcengineResponsesTransport implements LLMTransport {
28
+ private baseUrl;
29
+ private extraHeaders;
30
+ private timeoutMs;
31
+ private quirks;
32
+ constructor(config: VolcengineResponsesTransportConfig);
33
+ stream(request: LLMRequest, apiKey: string, signal?: AbortSignal): AsyncGenerator<LLMChunk>;
34
+ /**
35
+ * Resolve known Volcengine Responses API incompatibilities:
36
+ * - instructions + caching → drop caching (§20.7)
37
+ * - caching + json_schema → downgrade to json_object (§20.10)
38
+ * - caching + builtin_web_search/image_process → drop those builtin tools
39
+ * Returns a shallow copy with fields adjusted; never mutates the original.
40
+ */
41
+ private resolveConstraints;
42
+ private buildRequestBody;
43
+ private fetchAndStream;
44
+ private handleNonStreamingResponse;
45
+ /**
46
+ * Parse Volcengine Responses API SSE stream.
47
+ *
48
+ * Event format: "event: <type>\ndata: <json>\n\n"
49
+ * Key events:
50
+ * - response.output_text.delta → text content delta
51
+ * - response.reasoning_summary_text.delta → thinking/reasoning text
52
+ * - response.function_call_arguments.delta → tool call arguments streaming
53
+ * - response.output_item.added → new output item started
54
+ * - response.output_item.done → output item completed
55
+ * - response.completed → full response complete with usage
56
+ * - response.failed → error
57
+ */
58
+ private parseSSEStream;
59
+ private processEvent;
60
+ }
@@ -0,0 +1,60 @@
1
+ /**
2
+ * Zhipu (GLM) Media Transport — CogView (image), CogVideoX (video), TTS, STT, Embedding.
3
+ *
4
+ * API reference (docs.bigmodel.cn):
5
+ * Image sync: POST /images/generations (CogView-4, cogview-3-flash)
6
+ * Image async: POST /async/images/generations (glm-image)
7
+ * Video async: POST /videos/generations (CogVideoX)
8
+ * TTS sync: POST /audio/speech (glm-tts, returns audio bytes)
9
+ * STT sync: POST /audio/transcriptions (glm-asr, multipart/form-data)
10
+ * Embedding: POST /embeddings (embedding-3/2)
11
+ * Async poll: GET /async-result/{id} (unified poll for all async tasks)
12
+ *
13
+ * Base URL: https://open.bigmodel.cn/api/paas/v4
14
+ * Auth: Authorization: Bearer $ZHIPU_API_KEY
15
+ */
16
+ import type { MediaTransport, MediaRequest, MediaResult, MediaType } from "../media-transport.js";
17
+ export interface ZhipuMediaConfig {
18
+ /** Base URL, e.g. "https://open.bigmodel.cn/api/paas/v4" */
19
+ baseUrl: string;
20
+ timeoutMs?: number;
21
+ }
22
+ export declare class ZhipuMediaTransport implements MediaTransport {
23
+ readonly supportedTypes: readonly MediaType[];
24
+ private baseUrl;
25
+ private timeoutMs;
26
+ constructor(config: ZhipuMediaConfig);
27
+ generate(request: MediaRequest, apiKey: string, signal?: AbortSignal): Promise<MediaResult>;
28
+ private generateImage;
29
+ /** CogView-4 / cogview-3-flash — sync, returns URL directly */
30
+ private generateImageSync;
31
+ /** glm-image — async submit + poll */
32
+ private generateImageAsync;
33
+ private generateVideo;
34
+ private generateTTS;
35
+ private generateSTT;
36
+ private generateEmbedding;
37
+ private generateVoiceClone;
38
+ private generateDocumentParsing;
39
+ private generateRerank;
40
+ private postJSON;
41
+ /**
42
+ * Unified async result polling — GET /async-result/{id}
43
+ * Returns the result object when task_status === "SUCCESS".
44
+ * Throws on "FAIL" or timeout.
45
+ */
46
+ private pollAsyncResult;
47
+ /**
48
+ * List cloned voices — GET /voice/
49
+ * Returns all voice clones for the current user.
50
+ */
51
+ listVoices(apiKey: string, signal?: AbortSignal): Promise<Array<{
52
+ voice_id: string;
53
+ voice_name: string;
54
+ status: string;
55
+ }>>;
56
+ /**
57
+ * Delete a cloned voice — DELETE /voice/{voice_id}
58
+ */
59
+ deleteVoice(voiceId: string, apiKey: string, signal?: AbortSignal): Promise<void>;
60
+ }
@@ -0,0 +1,35 @@
1
+ /**
2
+ * ZhipuToolAPI — Zhipu-specific utility endpoints.
3
+ *
4
+ * Implements ProviderToolAPI for Zhipu GLM platform independent APIs:
5
+ * C1: Web Search — POST /tools/web-search
6
+ * C2: Reader — POST /tools/reader (extract web page content)
7
+ * C3: Tokenizer — POST /tokenizer
8
+ * C4: Moderations — POST /moderations
9
+ *
10
+ * Base URL: https://open.bigmodel.cn/api/paas/v4
11
+ * Auth: Authorization: Bearer $ZHIPU_API_KEY
12
+ *
13
+ * C5 (File Parser) is handled by document_parsing media handler.
14
+ * C6 (Realtime API) requires WebSocket — out of scope for this interface.
15
+ */
16
+ import type { ProviderToolAPI, ProviderToolCapability, WebSearchResult, ReaderResult, TokenizerResult, ModerationResult } from "../provider-tool-api.js";
17
+ export interface ZhipuToolAPIConfig {
18
+ baseUrl: string;
19
+ apiKey: string;
20
+ timeoutMs?: number;
21
+ }
22
+ export declare class ZhipuToolAPI implements ProviderToolAPI {
23
+ readonly capabilities: readonly ProviderToolCapability[];
24
+ private baseUrl;
25
+ private apiKey;
26
+ private timeoutMs;
27
+ constructor(config: ZhipuToolAPIConfig);
28
+ webSearch(query: string, options?: {
29
+ maxResults?: number;
30
+ }): Promise<WebSearchResult[]>;
31
+ reader(pageUrl: string): Promise<ReaderResult>;
32
+ tokenize(text: string, model: string): Promise<TokenizerResult>;
33
+ moderate(text: string): Promise<ModerationResult>;
34
+ private postJSON;
35
+ }
@@ -1,12 +1,3 @@
1
- import type { FailoverReason } from "./failover-classification.js";
2
1
  export type ErrorCategory = "RETRYABLE_TRANSIENT" | "RETRYABLE_DEGRADED" | "NON_RETRYABLE_AUTH" | "NON_RETRYABLE_CONTENT" | "NON_RETRYABLE_QUOTA" | "TOOL_EXECUTION_FAILED";
3
- export interface RetryStrategy {
4
- retryable: boolean;
5
- maxRetries: number;
6
- baseDelayMs: number;
7
- backoffMultiplier: number;
8
- switchProvider: boolean;
9
- }
10
2
  export declare function classifyError(status: number | undefined, message?: string): ErrorCategory;
11
- export declare function classifyErrorFromReason(reason: FailoverReason): ErrorCategory;
12
- export declare function getRetryStrategy(category: ErrorCategory): RetryStrategy;
3
+ export declare function isRetryableCategory(category: ErrorCategory): boolean;
@@ -1,5 +1,5 @@
1
1
  export { buildAssistantToolCallMessage, buildToolResultMessage, type FunctionToolDefinition, } from "./tool-loop/tool-schema.js";
2
- export { classifyError, getRetryStrategy, type ErrorCategory, type RetryStrategy, } from "./error-handling/error-classification.js";
2
+ export { classifyError, isRetryableCategory, type ErrorCategory, } from "./error-handling/error-classification.js";
3
3
  export { composeStrategies, composeAsyncStrategies, SlidingWindowStrategy, SummarizeOldStrategy, ToolResultTrimStrategy, HeadTailProtectedStrategy, IncrementalCompactStrategy, CacheAwareCompressionStrategy, CompressionMetricsCollector, ContextEngineRegistry, MicroCompactStrategy, postCompactFileRecovery, type PostCompactRecoveryConfig, buildStructuredSummaryPrompt, computeAdaptiveBudget, isAsyncCompressionStrategy, selectCompressionTier, DEFAULT_ADAPTIVE_BUDGET_CONFIG, type AdaptiveBudgetConfig, type AsyncCompressionStrategy, type CacheAwareCompressionConfig, type CompressibleMessage, type CompressionEvent, type CompressionMetrics, type CompressionMetricsSnapshot, type CompressionResult, type CompressionStrategy, type CompressionTier, type ContextEngine, type HeadTailProtectionConfig, type IncrementalCompactConfig, type SummarizeFn, } from "./context/context-compression.js";
4
4
  export { snipCompactIfNeeded, type SnipResult, } from "./context/context-compression.js";
5
5
  export { applyCollapsesIfNeeded as applyContextCollapsesIfNeeded, recoverFromOverflow as recoverContextCollapseFromOverflow, createCollapseStore, type CollapseStore, type CollapseStage, } from "./context/context-collapse.js";
@@ -0,0 +1,40 @@
1
+ /**
2
+ * Tool Cascade — provider-native tool augmentation layer.
3
+ *
4
+ * When a provider (e.g. Zhipu GLM) offers native utility APIs (web search, reader,
5
+ * tokenizer, etc.), the tool cascade automatically routes through the provider API
6
+ * first, falling back to the host-provided implementation (e.g. SearXNG) on failure.
7
+ *
8
+ * ## Architecture
9
+ *
10
+ * Two levels of provider tool integration:
11
+ *
12
+ * 1. **D-level (builtin tools)**: Injected directly into chat completion request
13
+ * via `builtinWebSearch` / `builtinCodeInterpreter` quirks.
14
+ * The LLM decides to use these inline during generation — no agent intervention.
15
+ * → Handled by openai-chat.ts transport layer.
16
+ *
17
+ * 2. **C-level (independent APIs)**: Discrete API endpoints the agent calls explicitly
18
+ * as tool steps (web_search, reader, tokenizer, moderations).
19
+ * → Handled by this cascade layer — wraps host-provided tool deps with
20
+ * provider-native backends, falling back on error.
21
+ *
22
+ * ## Usage
23
+ *
24
+ * ```ts
25
+ * import { cascadeWebSearch } from "./tool-cascade.js";
26
+ * import { ZhipuToolAPI } from "../llm/transports/zhipu-tool-api.js";
27
+ *
28
+ * const providerApi = new ZhipuToolAPI({ baseUrl, apiKey });
29
+ * const cascadedSearch = cascadeWebSearch(providerApi, fallbackSearchFn);
30
+ * const tool = createWebSearchTool({ search: cascadedSearch });
31
+ * ```
32
+ */
33
+ import type { ProviderToolAPI } from "../llm/provider-tool-api.js";
34
+ import type { WebSearchToolDeps } from "../skills/tools/web-search-tool.js";
35
+ /**
36
+ * Cascade web search: provider-native search → host fallback.
37
+ * Returns a WebSearchToolDeps.search function that tries the provider's
38
+ * webSearch API first, falling back to the host's search on error.
39
+ */
40
+ export declare function cascadeWebSearch(providerApi: ProviderToolAPI | undefined, fallbackSearch: WebSearchToolDeps["search"]): WebSearchToolDeps["search"];
@@ -29,7 +29,10 @@ export declare function parseOpenAiToolCallsFromChatResponse(responseBody: strin
29
29
  toolCalls: OpenAiToolCall[];
30
30
  responseText: string;
31
31
  };
32
- export declare function buildAssistantToolCallMessage(toolCalls: OpenAiToolCall[], text?: string): Record<string, unknown>;
32
+ export declare function buildAssistantToolCallMessage(toolCalls: OpenAiToolCall[], text?: string, thinkingBlocks?: Array<{
33
+ thinking: string;
34
+ signature: string;
35
+ }>, reasoningContent?: string): Record<string, unknown>;
33
36
  export declare function buildToolResultMessage(callId: string, result: {
34
37
  ok: boolean;
35
38
  payload?: unknown;
@@ -190,6 +190,21 @@ export interface ToolsListResult {
190
190
  parameters?: Record<string, unknown>;
191
191
  }>;
192
192
  }
193
+ export interface MediaListModelsParams {
194
+ /** Filter by media type (image/video/music/tts/3d/stt/embedding). Omit to list all. */
195
+ mediaType?: string;
196
+ }
197
+ export interface MediaListModelsResult {
198
+ models: Array<{
199
+ providerId: string;
200
+ providerName: string;
201
+ modelId: string;
202
+ modelName: string;
203
+ mediaType: string;
204
+ /** Fine-grained capability metadata (operations, formats, limits) */
205
+ capabilities?: Record<string, unknown>;
206
+ }>;
207
+ }
193
208
  export interface ConfigGetParams {
194
209
  keys?: string[];
195
210
  }
@@ -339,6 +354,10 @@ export interface RpcMethodMap {
339
354
  params: ToolsListParams;
340
355
  result: ToolsListResult;
341
356
  };
357
+ "media.listModels": {
358
+ params: MediaListModelsParams;
359
+ result: MediaListModelsResult;
360
+ };
342
361
  "config.get": {
343
362
  params: ConfigGetParams;
344
363
  result: ConfigGetResult;
@@ -44,7 +44,7 @@ export type ExtractionCompleteFn = (params: {
44
44
  }>;
45
45
  }>;
46
46
  export interface MemoryExtractorOptions {
47
- /** Model to use for extraction (default: "deepseek-chat"). */
47
+ /** Model to use for extraction (default: "deepseek-v4-flash"). */
48
48
  model?: string;
49
49
  /** Timeout in ms for the LLM call (default: 30_000). */
50
50
  timeoutMs?: number;
@@ -0,0 +1,90 @@
1
+ import type { PortableTool } from "../portable-tool.js";
2
+ export declare const FILE_UPLOAD_TOOL_NAME: "file_upload";
3
+ export interface FileUploadToolParams {
4
+ file_path: string;
5
+ purpose?: string;
6
+ }
7
+ export declare const FILE_UPLOAD_TOOL_SCHEMA: {
8
+ readonly type: "object";
9
+ readonly properties: {
10
+ readonly file_path: {
11
+ readonly type: "string";
12
+ readonly description: string;
13
+ };
14
+ readonly purpose: {
15
+ readonly type: "string";
16
+ readonly description: string;
17
+ };
18
+ };
19
+ readonly required: readonly ["file_path"];
20
+ };
21
+ export interface FileUploadResult {
22
+ fileId: string;
23
+ url?: string;
24
+ filename: string;
25
+ bytes: number;
26
+ provider: string;
27
+ }
28
+ export interface FileUploadToolDeps {
29
+ uploadFile(params: {
30
+ filePath: string;
31
+ purpose?: string;
32
+ }): Promise<FileUploadResult>;
33
+ }
34
+ export declare function createFileUploadTool(deps: FileUploadToolDeps): PortableTool<FileUploadToolParams>;
35
+ export declare const FILE_QUERY_TOOL_NAME: "file_query";
36
+ export interface FileQueryToolParams {
37
+ file_id?: string;
38
+ limit?: number;
39
+ }
40
+ export declare const FILE_QUERY_TOOL_SCHEMA: {
41
+ readonly type: "object";
42
+ readonly properties: {
43
+ readonly file_id: {
44
+ readonly type: "string";
45
+ readonly description: "Query a specific file by ID. If omitted, lists recent uploaded files.";
46
+ };
47
+ readonly limit: {
48
+ readonly type: "number";
49
+ readonly description: "Max number of files to list when file_id is omitted. Default: 10, max: 100.";
50
+ };
51
+ };
52
+ readonly required: readonly [];
53
+ };
54
+ export interface FileInfo {
55
+ id: string;
56
+ filename: string;
57
+ bytes: number;
58
+ status: string;
59
+ createdAt?: string;
60
+ url?: string;
61
+ }
62
+ export interface FileQueryToolDeps {
63
+ queryFile(params: {
64
+ fileId: string;
65
+ }): Promise<FileInfo>;
66
+ listFiles(params: {
67
+ limit?: number;
68
+ }): Promise<FileInfo[]>;
69
+ }
70
+ export declare function createFileQueryTool(deps: FileQueryToolDeps): PortableTool<FileQueryToolParams>;
71
+ export declare const FILE_DELETE_TOOL_NAME: "file_delete";
72
+ export interface FileDeleteToolParams {
73
+ file_id: string;
74
+ }
75
+ export declare const FILE_DELETE_TOOL_SCHEMA: {
76
+ readonly type: "object";
77
+ readonly properties: {
78
+ readonly file_id: {
79
+ readonly type: "string";
80
+ readonly description: "ID of the uploaded file to delete.";
81
+ };
82
+ };
83
+ readonly required: readonly ["file_id"];
84
+ };
85
+ export interface FileDeleteToolDeps {
86
+ deleteFile(params: {
87
+ fileId: string;
88
+ }): Promise<void>;
89
+ }
90
+ export declare function createFileDeleteTool(deps: FileDeleteToolDeps): PortableTool<FileDeleteToolParams>;