qlogicagent 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent.js +7 -7
- package/dist/cli.js +204 -195
- package/dist/index.js +204 -195
- package/dist/orchestration.js +10 -10
- package/dist/types/agent/tool-loop.d.ts +2 -0
- package/dist/types/agent/types.d.ts +54 -1
- package/dist/types/cli/stdio-server.d.ts +10 -0
- package/dist/types/cli/tool-bootstrap.d.ts +13 -1
- package/dist/types/llm/index.d.ts +1 -1
- package/dist/types/llm/llm-client.d.ts +1 -1
- package/dist/types/llm/media-client.d.ts +3 -4
- package/dist/types/llm/media-transport.d.ts +75 -4
- package/dist/types/llm/provider-def.d.ts +124 -3
- package/dist/types/llm/provider-registry.d.ts +5 -0
- package/dist/types/llm/provider-tool-api.d.ts +44 -0
- package/dist/types/llm/retry.d.ts +37 -0
- package/dist/types/llm/transport.d.ts +161 -2
- package/dist/types/llm/transports/anthropic-messages.d.ts +7 -0
- package/dist/types/llm/transports/minimax-media.d.ts +5 -0
- package/dist/types/llm/transports/openai-chat.d.ts +44 -3
- package/dist/types/llm/transports/realtime-transport.d.ts +183 -0
- package/dist/types/llm/transports/volcengine-grounding.d.ts +58 -0
- package/dist/types/llm/transports/volcengine-media.d.ts +50 -0
- package/dist/types/llm/transports/volcengine-responses.d.ts +60 -0
- package/dist/types/llm/transports/zhipu-media.d.ts +60 -0
- package/dist/types/llm/transports/zhipu-tool-api.d.ts +35 -0
- package/dist/types/orchestration/error-handling/error-classification.d.ts +1 -10
- package/dist/types/orchestration/index.d.ts +1 -1
- package/dist/types/orchestration/tool-cascade.d.ts +40 -0
- package/dist/types/orchestration/tool-loop/tool-schema.d.ts +4 -1
- package/dist/types/protocol/methods.d.ts +19 -0
- package/dist/types/skills/memory/memory-extractor.d.ts +1 -1
- package/dist/types/skills/tools/file-management-tool.d.ts +90 -0
- package/dist/types/skills/tools/image-generate-tool.d.ts +13 -1
- package/dist/types/skills/tools/music-generate-tool.d.ts +25 -0
- package/dist/types/skills/tools/stt-tool.d.ts +33 -0
- package/dist/types/skills/tools/three-d-generate-tool.d.ts +45 -0
- package/dist/types/skills/tools/tts-tool.d.ts +12 -0
- package/dist/types/skills/tools/video-edit-tool.d.ts +5 -2
- package/dist/types/skills/tools/video-generate-tool.d.ts +102 -2
- package/dist/types/skills/tools/video-merge-tool.d.ts +1 -1
- package/dist/types/skills/tools/video-upscale-tool.d.ts +1 -1
- package/dist/types/skills/tools/voice-clone-tool.d.ts +40 -0
- package/package.json +1 -1
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Realtime WebSocket Transport — bidirectional audio/voice streaming
|
|
3
|
+
* via the OpenAI Realtime API protocol (also compatible with GLM Realtime).
|
|
4
|
+
*
|
|
5
|
+
* ## Protocol: WebSocket JSON events
|
|
6
|
+
*
|
|
7
|
+
* Client → Server:
|
|
8
|
+
* - session.update: configure session (model, voice, tools, etc.)
|
|
9
|
+
* - input_audio_buffer.append: send audio chunks (base64 PCM16)
|
|
10
|
+
* - input_audio_buffer.commit: signal end of audio input
|
|
11
|
+
* - conversation.item.create: inject text/function_result items
|
|
12
|
+
* - response.create: request a model response
|
|
13
|
+
* - response.cancel: abort in-progress response
|
|
14
|
+
*
|
|
15
|
+
* Server → Client:
|
|
16
|
+
* - session.created: session initialized
|
|
17
|
+
* - session.updated: config acknowledged
|
|
18
|
+
* - input_audio_buffer.speech_started: VAD detected speech
|
|
19
|
+
* - input_audio_buffer.speech_stopped: VAD detected silence
|
|
20
|
+
* - response.created: response generation started
|
|
21
|
+
* - response.output_item.added: new output item (text/audio/function_call)
|
|
22
|
+
* - response.audio.delta: audio chunk (base64 PCM16)
|
|
23
|
+
* - response.audio_transcript.delta: transcript of generated speech
|
|
24
|
+
* - response.text.delta: text generation delta
|
|
25
|
+
* - response.function_call_arguments.delta: tool call args delta
|
|
26
|
+
* - response.function_call_arguments.done: tool call complete
|
|
27
|
+
* - response.output_item.done: output item finished
|
|
28
|
+
* - response.done: full response complete
|
|
29
|
+
* - error: server error
|
|
30
|
+
*
|
|
31
|
+
* ## Architecture
|
|
32
|
+
*
|
|
33
|
+
* RealtimeTransport manages a single persistent WebSocket connection per session.
|
|
34
|
+
* It exposes an event-driven API (AsyncGenerator) that the agent tool-loop
|
|
35
|
+
* can consume for voice-enabled interactions.
|
|
36
|
+
*
|
|
37
|
+
* Docs:
|
|
38
|
+
* - OpenAI: https://platform.openai.com/docs/api-reference/realtime
|
|
39
|
+
* - GLM: https://docs.bigmodel.cn/cn/guide/develop/realtime-api
|
|
40
|
+
*/
|
|
41
|
+
export interface RealtimeConfig {
|
|
42
|
+
/** WebSocket endpoint (e.g. "wss://api.openai.com/v1/realtime") */
|
|
43
|
+
baseUrl: string;
|
|
44
|
+
/** Model to use (e.g. "gpt-4o-realtime-preview", "glm-realtime") */
|
|
45
|
+
model: string;
|
|
46
|
+
/** API key */
|
|
47
|
+
apiKey: string;
|
|
48
|
+
/** Voice for TTS output */
|
|
49
|
+
voice?: string;
|
|
50
|
+
/** Input modalities: "text", "audio", or both */
|
|
51
|
+
inputModalities?: Array<"text" | "audio">;
|
|
52
|
+
/** Output modalities: "text", "audio", or both */
|
|
53
|
+
outputModalities?: Array<"text" | "audio">;
|
|
54
|
+
/** Temperature for generation */
|
|
55
|
+
temperature?: number;
|
|
56
|
+
/** Tool definitions for function calling */
|
|
57
|
+
tools?: RealtimeTool[];
|
|
58
|
+
/** Voice Activity Detection mode */
|
|
59
|
+
vadMode?: "server_vad" | "none";
|
|
60
|
+
/** VAD threshold (0.0-1.0) */
|
|
61
|
+
vadThreshold?: number;
|
|
62
|
+
/** Auth type: "header" (OpenAI) or "query" (GLM) */
|
|
63
|
+
authMode?: "header" | "query";
|
|
64
|
+
}
|
|
65
|
+
export interface RealtimeTool {
|
|
66
|
+
type: "function";
|
|
67
|
+
name: string;
|
|
68
|
+
description: string;
|
|
69
|
+
parameters: Record<string, unknown>;
|
|
70
|
+
}
|
|
71
|
+
export type RealtimeEvent = {
|
|
72
|
+
type: "session_created";
|
|
73
|
+
sessionId: string;
|
|
74
|
+
} | {
|
|
75
|
+
type: "speech_started";
|
|
76
|
+
} | {
|
|
77
|
+
type: "speech_stopped";
|
|
78
|
+
audioEndMs: number;
|
|
79
|
+
} | {
|
|
80
|
+
type: "audio_delta";
|
|
81
|
+
delta: string;
|
|
82
|
+
} | {
|
|
83
|
+
type: "audio_transcript_delta";
|
|
84
|
+
delta: string;
|
|
85
|
+
} | {
|
|
86
|
+
type: "text_delta";
|
|
87
|
+
delta: string;
|
|
88
|
+
} | {
|
|
89
|
+
type: "function_call_start";
|
|
90
|
+
callId: string;
|
|
91
|
+
name: string;
|
|
92
|
+
} | {
|
|
93
|
+
type: "function_call_delta";
|
|
94
|
+
callId: string;
|
|
95
|
+
delta: string;
|
|
96
|
+
} | {
|
|
97
|
+
type: "function_call_done";
|
|
98
|
+
callId: string;
|
|
99
|
+
name: string;
|
|
100
|
+
arguments: string;
|
|
101
|
+
} | {
|
|
102
|
+
type: "response_done";
|
|
103
|
+
usage?: RealtimeUsage;
|
|
104
|
+
} | {
|
|
105
|
+
type: "error";
|
|
106
|
+
code: string;
|
|
107
|
+
message: string;
|
|
108
|
+
} | {
|
|
109
|
+
type: "closed";
|
|
110
|
+
code: number;
|
|
111
|
+
reason: string;
|
|
112
|
+
};
|
|
113
|
+
export interface RealtimeUsage {
|
|
114
|
+
inputTokens: number;
|
|
115
|
+
outputTokens: number;
|
|
116
|
+
inputAudioTokens?: number;
|
|
117
|
+
outputAudioTokens?: number;
|
|
118
|
+
}
|
|
119
|
+
/**
|
|
120
|
+
* Manages a persistent WebSocket connection for real-time audio/voice
|
|
121
|
+
* interactions with an LLM provider.
|
|
122
|
+
*
|
|
123
|
+
* Usage:
|
|
124
|
+
* ```ts
|
|
125
|
+
* const rt = new RealtimeTransport(config);
|
|
126
|
+
* rt.connect();
|
|
127
|
+
*
|
|
128
|
+
* // Send audio
|
|
129
|
+
* rt.appendAudio(base64Chunk);
|
|
130
|
+
* rt.commitAudio();
|
|
131
|
+
*
|
|
132
|
+
* // Or send text
|
|
133
|
+
* rt.sendText("Hello!");
|
|
134
|
+
*
|
|
135
|
+
* // Submit function results
|
|
136
|
+
* rt.sendFunctionResult(callId, result);
|
|
137
|
+
*
|
|
138
|
+
* // Consume events
|
|
139
|
+
* for await (const event of rt.events()) {
|
|
140
|
+
* switch (event.type) {
|
|
141
|
+
* case "audio_delta": playAudio(event.delta); break;
|
|
142
|
+
* case "function_call_done": handleToolCall(event); break;
|
|
143
|
+
* }
|
|
144
|
+
* }
|
|
145
|
+
*
|
|
146
|
+
* rt.close();
|
|
147
|
+
* ```
|
|
148
|
+
*/
|
|
149
|
+
export declare class RealtimeTransport {
|
|
150
|
+
private ws;
|
|
151
|
+
private config;
|
|
152
|
+
private eventQueue;
|
|
153
|
+
private waiters;
|
|
154
|
+
private closed;
|
|
155
|
+
constructor(config: RealtimeConfig);
|
|
156
|
+
/** Open WebSocket connection and configure session. */
|
|
157
|
+
connect(): Promise<void>;
|
|
158
|
+
/** Send audio data (base64 PCM16). */
|
|
159
|
+
appendAudio(base64Chunk: string): void;
|
|
160
|
+
/** Mark end of audio input and trigger response. */
|
|
161
|
+
commitAudio(): void;
|
|
162
|
+
/** Send a text message. */
|
|
163
|
+
sendText(text: string): void;
|
|
164
|
+
/** Submit a function call result back to the model. */
|
|
165
|
+
sendFunctionResult(callId: string, output: string): void;
|
|
166
|
+
/** Trigger a model response (e.g. after sending text). */
|
|
167
|
+
requestResponse(): void;
|
|
168
|
+
/** Cancel an in-progress response. */
|
|
169
|
+
cancelResponse(): void;
|
|
170
|
+
/** Async iterator of server events. */
|
|
171
|
+
events(): AsyncGenerator<RealtimeEvent>;
|
|
172
|
+
/** Close the WebSocket connection. */
|
|
173
|
+
close(): void;
|
|
174
|
+
private buildUrl;
|
|
175
|
+
private sendSessionUpdate;
|
|
176
|
+
private send;
|
|
177
|
+
private push;
|
|
178
|
+
private drainWaiters;
|
|
179
|
+
/**
|
|
180
|
+
* Parse a server-sent JSON event into our typed event(s).
|
|
181
|
+
*/
|
|
182
|
+
private parseServerEvent;
|
|
183
|
+
}
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Volcengine Grounding — spatial coordinate parser (volcengine-ProviderMax §14).
|
|
3
|
+
*
|
|
4
|
+
* Parses model-emitted spatial reference tags from text output:
|
|
5
|
+
* - <bbox>x_min y_min x_max y_max</bbox> → bounding box
|
|
6
|
+
* - <point>x y</point> → single point
|
|
7
|
+
* - <polygon>x1 y1 x2 y2 ...</polygon> → polygon vertices
|
|
8
|
+
*
|
|
9
|
+
* All coordinates are in normalized 1000×1000 space, range [0, 999].
|
|
10
|
+
* Use `toPixelCoords()` to convert to actual image pixel coordinates.
|
|
11
|
+
*/
|
|
12
|
+
export type SpatialReference = {
|
|
13
|
+
type: "bbox";
|
|
14
|
+
x1: number;
|
|
15
|
+
y1: number;
|
|
16
|
+
x2: number;
|
|
17
|
+
y2: number;
|
|
18
|
+
space: "normalized_1000";
|
|
19
|
+
} | {
|
|
20
|
+
type: "point";
|
|
21
|
+
x: number;
|
|
22
|
+
y: number;
|
|
23
|
+
space: "normalized_1000";
|
|
24
|
+
} | {
|
|
25
|
+
type: "polygon";
|
|
26
|
+
points: Array<{
|
|
27
|
+
x: number;
|
|
28
|
+
y: number;
|
|
29
|
+
}>;
|
|
30
|
+
space: "normalized_1000";
|
|
31
|
+
};
|
|
32
|
+
export interface PixelBbox {
|
|
33
|
+
x1: number;
|
|
34
|
+
y1: number;
|
|
35
|
+
x2: number;
|
|
36
|
+
y2: number;
|
|
37
|
+
}
|
|
38
|
+
export interface PixelPoint {
|
|
39
|
+
x: number;
|
|
40
|
+
y: number;
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* Extract all spatial references from model output text.
|
|
44
|
+
* Returns an empty array if no grounding tags are found.
|
|
45
|
+
*/
|
|
46
|
+
export declare function parseGroundingTags(text: string): SpatialReference[];
|
|
47
|
+
/**
|
|
48
|
+
* Convert a normalized 1000×1000 bounding box to pixel coordinates.
|
|
49
|
+
*/
|
|
50
|
+
export declare function bboxToPixels(ref: Extract<SpatialReference, {
|
|
51
|
+
type: "bbox";
|
|
52
|
+
}>, width: number, height: number): PixelBbox;
|
|
53
|
+
/**
|
|
54
|
+
* Convert a normalized 1000×1000 point to pixel coordinates.
|
|
55
|
+
*/
|
|
56
|
+
export declare function pointToPixels(ref: Extract<SpatialReference, {
|
|
57
|
+
type: "point";
|
|
58
|
+
}>, width: number, height: number): PixelPoint;
|
|
@@ -27,8 +27,58 @@ export declare class VolcengineMediaTransport implements MediaTransport {
|
|
|
27
27
|
*/
|
|
28
28
|
canHandle(request: MediaRequest): boolean;
|
|
29
29
|
private generateImage;
|
|
30
|
+
/**
|
|
31
|
+
* Parse streaming image SSE — yields progressive image quality upgrades.
|
|
32
|
+
* Final event contains the full-quality image URL.
|
|
33
|
+
*/
|
|
34
|
+
private parseStreamingImage;
|
|
30
35
|
private generateVideo;
|
|
31
36
|
private generate3D;
|
|
37
|
+
/**
|
|
38
|
+
* List video generation tasks with optional filters.
|
|
39
|
+
* GET /v3/contents/generations/tasks
|
|
40
|
+
*/
|
|
41
|
+
listVideoTasks(apiKey: string, options?: {
|
|
42
|
+
after?: string;
|
|
43
|
+
limit?: number;
|
|
44
|
+
status?: string;
|
|
45
|
+
}, signal?: AbortSignal): Promise<Record<string, unknown>>;
|
|
46
|
+
/**
|
|
47
|
+
* Cancel or delete a video generation task.
|
|
48
|
+
* DELETE /v3/contents/generations/tasks/{taskId}
|
|
49
|
+
*/
|
|
50
|
+
deleteVideoTask(taskId: string, apiKey: string, signal?: AbortSignal): Promise<void>;
|
|
51
|
+
/**
|
|
52
|
+
* Upload a file to Volcengine Files API for reuse in multimodal requests.
|
|
53
|
+
* POST /v3/files
|
|
54
|
+
*/
|
|
55
|
+
uploadFile(file: Blob | Buffer, apiKey: string, options?: {
|
|
56
|
+
purpose?: string;
|
|
57
|
+
filename?: string;
|
|
58
|
+
}, signal?: AbortSignal): Promise<{
|
|
59
|
+
id: string;
|
|
60
|
+
status: string;
|
|
61
|
+
}>;
|
|
62
|
+
/**
|
|
63
|
+
* Get file info by ID.
|
|
64
|
+
* GET /v3/files/{fileId}
|
|
65
|
+
*/
|
|
66
|
+
getFile(fileId: string, apiKey: string, signal?: AbortSignal): Promise<Record<string, unknown>>;
|
|
67
|
+
/**
|
|
68
|
+
* List uploaded files.
|
|
69
|
+
* GET /v3/files
|
|
70
|
+
*/
|
|
71
|
+
listFiles(apiKey: string, options?: {
|
|
72
|
+
after?: string;
|
|
73
|
+
limit?: number;
|
|
74
|
+
purpose?: string;
|
|
75
|
+
order?: "asc" | "desc";
|
|
76
|
+
}, signal?: AbortSignal): Promise<Record<string, unknown>>;
|
|
77
|
+
/**
|
|
78
|
+
* Delete a file.
|
|
79
|
+
* DELETE /v3/files/{fileId}
|
|
80
|
+
*/
|
|
81
|
+
deleteFile(fileId: string, apiKey: string, signal?: AbortSignal): Promise<void>;
|
|
32
82
|
private submitTask;
|
|
33
83
|
private pollTask;
|
|
34
84
|
}
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Volcengine Responses API Transport — SSE streaming implementation.
|
|
3
|
+
*
|
|
4
|
+
* Implements the fire mountain ark Responses API (`/api/v3/responses`),
|
|
5
|
+
* which is the officially recommended primary path for Doubao LLM text generation
|
|
6
|
+
* (250615+ models: doubao-seed-2.0 series).
|
|
7
|
+
*
|
|
8
|
+
* Key differences from OpenAI Chat Completions:
|
|
9
|
+
* - Endpoint: POST {baseUrl}/v3/responses
|
|
10
|
+
* - Request body uses `input` (not `messages`), `instructions`, `thinking`, `reasoning`
|
|
11
|
+
* - SSE events: response.output_text.delta, response.reasoning_summary_text.delta,
|
|
12
|
+
* response.function_call_arguments.delta, response.completed, etc.
|
|
13
|
+
* - Tool calling: function_call / function_call_output with call_id
|
|
14
|
+
* - Context persistence: previous_response_id for server-side session continuation
|
|
15
|
+
* - Deep thinking: thinking.type (enabled/disabled/auto) + reasoning.effort
|
|
16
|
+
*
|
|
17
|
+
* Docs: https://www.volcengine.com/docs/82379/1399008
|
|
18
|
+
*/
|
|
19
|
+
import type { LLMChunk, LLMRequest, LLMTransport } from "../transport.js";
|
|
20
|
+
import type { ProviderQuirks } from "../provider-def.js";
|
|
21
|
+
export interface VolcengineResponsesTransportConfig {
|
|
22
|
+
baseUrl: string;
|
|
23
|
+
extraHeaders?: Record<string, string>;
|
|
24
|
+
timeoutMs?: number;
|
|
25
|
+
quirks?: ProviderQuirks;
|
|
26
|
+
}
|
|
27
|
+
export declare class VolcengineResponsesTransport implements LLMTransport {
|
|
28
|
+
private baseUrl;
|
|
29
|
+
private extraHeaders;
|
|
30
|
+
private timeoutMs;
|
|
31
|
+
private quirks;
|
|
32
|
+
constructor(config: VolcengineResponsesTransportConfig);
|
|
33
|
+
stream(request: LLMRequest, apiKey: string, signal?: AbortSignal): AsyncGenerator<LLMChunk>;
|
|
34
|
+
/**
|
|
35
|
+
* Resolve known Volcengine Responses API incompatibilities:
|
|
36
|
+
* - instructions + caching → drop caching (§20.7)
|
|
37
|
+
* - caching + json_schema → downgrade to json_object (§20.10)
|
|
38
|
+
* - caching + builtin_web_search/image_process → drop those builtin tools
|
|
39
|
+
* Returns a shallow copy with fields adjusted; never mutates the original.
|
|
40
|
+
*/
|
|
41
|
+
private resolveConstraints;
|
|
42
|
+
private buildRequestBody;
|
|
43
|
+
private fetchAndStream;
|
|
44
|
+
private handleNonStreamingResponse;
|
|
45
|
+
/**
|
|
46
|
+
* Parse Volcengine Responses API SSE stream.
|
|
47
|
+
*
|
|
48
|
+
* Event format: "event: <type>\ndata: <json>\n\n"
|
|
49
|
+
* Key events:
|
|
50
|
+
* - response.output_text.delta → text content delta
|
|
51
|
+
* - response.reasoning_summary_text.delta → thinking/reasoning text
|
|
52
|
+
* - response.function_call_arguments.delta → tool call arguments streaming
|
|
53
|
+
* - response.output_item.added → new output item started
|
|
54
|
+
* - response.output_item.done → output item completed
|
|
55
|
+
* - response.completed → full response complete with usage
|
|
56
|
+
* - response.failed → error
|
|
57
|
+
*/
|
|
58
|
+
private parseSSEStream;
|
|
59
|
+
private processEvent;
|
|
60
|
+
}
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Zhipu (GLM) Media Transport — CogView (image), CogVideoX (video), TTS, STT, Embedding.
|
|
3
|
+
*
|
|
4
|
+
* API reference (docs.bigmodel.cn):
|
|
5
|
+
* Image sync: POST /images/generations (CogView-4, cogview-3-flash)
|
|
6
|
+
* Image async: POST /async/images/generations (glm-image)
|
|
7
|
+
* Video async: POST /videos/generations (CogVideoX)
|
|
8
|
+
* TTS sync: POST /audio/speech (glm-tts, returns audio bytes)
|
|
9
|
+
* STT sync: POST /audio/transcriptions (glm-asr, multipart/form-data)
|
|
10
|
+
* Embedding: POST /embeddings (embedding-3/2)
|
|
11
|
+
* Async poll: GET /async-result/{id} (unified poll for all async tasks)
|
|
12
|
+
*
|
|
13
|
+
* Base URL: https://open.bigmodel.cn/api/paas/v4
|
|
14
|
+
* Auth: Authorization: Bearer $ZHIPU_API_KEY
|
|
15
|
+
*/
|
|
16
|
+
import type { MediaTransport, MediaRequest, MediaResult, MediaType } from "../media-transport.js";
|
|
17
|
+
export interface ZhipuMediaConfig {
|
|
18
|
+
/** Base URL, e.g. "https://open.bigmodel.cn/api/paas/v4" */
|
|
19
|
+
baseUrl: string;
|
|
20
|
+
timeoutMs?: number;
|
|
21
|
+
}
|
|
22
|
+
export declare class ZhipuMediaTransport implements MediaTransport {
|
|
23
|
+
readonly supportedTypes: readonly MediaType[];
|
|
24
|
+
private baseUrl;
|
|
25
|
+
private timeoutMs;
|
|
26
|
+
constructor(config: ZhipuMediaConfig);
|
|
27
|
+
generate(request: MediaRequest, apiKey: string, signal?: AbortSignal): Promise<MediaResult>;
|
|
28
|
+
private generateImage;
|
|
29
|
+
/** CogView-4 / cogview-3-flash — sync, returns URL directly */
|
|
30
|
+
private generateImageSync;
|
|
31
|
+
/** glm-image — async submit + poll */
|
|
32
|
+
private generateImageAsync;
|
|
33
|
+
private generateVideo;
|
|
34
|
+
private generateTTS;
|
|
35
|
+
private generateSTT;
|
|
36
|
+
private generateEmbedding;
|
|
37
|
+
private generateVoiceClone;
|
|
38
|
+
private generateDocumentParsing;
|
|
39
|
+
private generateRerank;
|
|
40
|
+
private postJSON;
|
|
41
|
+
/**
|
|
42
|
+
* Unified async result polling — GET /async-result/{id}
|
|
43
|
+
* Returns the result object when task_status === "SUCCESS".
|
|
44
|
+
* Throws on "FAIL" or timeout.
|
|
45
|
+
*/
|
|
46
|
+
private pollAsyncResult;
|
|
47
|
+
/**
|
|
48
|
+
* List cloned voices — GET /voice/
|
|
49
|
+
* Returns all voice clones for the current user.
|
|
50
|
+
*/
|
|
51
|
+
listVoices(apiKey: string, signal?: AbortSignal): Promise<Array<{
|
|
52
|
+
voice_id: string;
|
|
53
|
+
voice_name: string;
|
|
54
|
+
status: string;
|
|
55
|
+
}>>;
|
|
56
|
+
/**
|
|
57
|
+
* Delete a cloned voice — DELETE /voice/{voice_id}
|
|
58
|
+
*/
|
|
59
|
+
deleteVoice(voiceId: string, apiKey: string, signal?: AbortSignal): Promise<void>;
|
|
60
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ZhipuToolAPI — Zhipu-specific utility endpoints.
|
|
3
|
+
*
|
|
4
|
+
* Implements ProviderToolAPI for Zhipu GLM platform independent APIs:
|
|
5
|
+
* C1: Web Search — POST /tools/web-search
|
|
6
|
+
* C2: Reader — POST /tools/reader (extract web page content)
|
|
7
|
+
* C3: Tokenizer — POST /tokenizer
|
|
8
|
+
* C4: Moderations — POST /moderations
|
|
9
|
+
*
|
|
10
|
+
* Base URL: https://open.bigmodel.cn/api/paas/v4
|
|
11
|
+
* Auth: Authorization: Bearer $ZHIPU_API_KEY
|
|
12
|
+
*
|
|
13
|
+
* C5 (File Parser) is handled by document_parsing media handler.
|
|
14
|
+
* C6 (Realtime API) requires WebSocket — out of scope for this interface.
|
|
15
|
+
*/
|
|
16
|
+
import type { ProviderToolAPI, ProviderToolCapability, WebSearchResult, ReaderResult, TokenizerResult, ModerationResult } from "../provider-tool-api.js";
|
|
17
|
+
export interface ZhipuToolAPIConfig {
|
|
18
|
+
baseUrl: string;
|
|
19
|
+
apiKey: string;
|
|
20
|
+
timeoutMs?: number;
|
|
21
|
+
}
|
|
22
|
+
export declare class ZhipuToolAPI implements ProviderToolAPI {
|
|
23
|
+
readonly capabilities: readonly ProviderToolCapability[];
|
|
24
|
+
private baseUrl;
|
|
25
|
+
private apiKey;
|
|
26
|
+
private timeoutMs;
|
|
27
|
+
constructor(config: ZhipuToolAPIConfig);
|
|
28
|
+
webSearch(query: string, options?: {
|
|
29
|
+
maxResults?: number;
|
|
30
|
+
}): Promise<WebSearchResult[]>;
|
|
31
|
+
reader(pageUrl: string): Promise<ReaderResult>;
|
|
32
|
+
tokenize(text: string, model: string): Promise<TokenizerResult>;
|
|
33
|
+
moderate(text: string): Promise<ModerationResult>;
|
|
34
|
+
private postJSON;
|
|
35
|
+
}
|
|
@@ -1,12 +1,3 @@
|
|
|
1
|
-
import type { FailoverReason } from "./failover-classification.js";
|
|
2
1
|
export type ErrorCategory = "RETRYABLE_TRANSIENT" | "RETRYABLE_DEGRADED" | "NON_RETRYABLE_AUTH" | "NON_RETRYABLE_CONTENT" | "NON_RETRYABLE_QUOTA" | "TOOL_EXECUTION_FAILED";
|
|
3
|
-
export interface RetryStrategy {
|
|
4
|
-
retryable: boolean;
|
|
5
|
-
maxRetries: number;
|
|
6
|
-
baseDelayMs: number;
|
|
7
|
-
backoffMultiplier: number;
|
|
8
|
-
switchProvider: boolean;
|
|
9
|
-
}
|
|
10
2
|
export declare function classifyError(status: number | undefined, message?: string): ErrorCategory;
|
|
11
|
-
export declare function
|
|
12
|
-
export declare function getRetryStrategy(category: ErrorCategory): RetryStrategy;
|
|
3
|
+
export declare function isRetryableCategory(category: ErrorCategory): boolean;
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
export { buildAssistantToolCallMessage, buildToolResultMessage, type FunctionToolDefinition, } from "./tool-loop/tool-schema.js";
|
|
2
|
-
export { classifyError,
|
|
2
|
+
export { classifyError, isRetryableCategory, type ErrorCategory, } from "./error-handling/error-classification.js";
|
|
3
3
|
export { composeStrategies, composeAsyncStrategies, SlidingWindowStrategy, SummarizeOldStrategy, ToolResultTrimStrategy, HeadTailProtectedStrategy, IncrementalCompactStrategy, CacheAwareCompressionStrategy, CompressionMetricsCollector, ContextEngineRegistry, MicroCompactStrategy, postCompactFileRecovery, type PostCompactRecoveryConfig, buildStructuredSummaryPrompt, computeAdaptiveBudget, isAsyncCompressionStrategy, selectCompressionTier, DEFAULT_ADAPTIVE_BUDGET_CONFIG, type AdaptiveBudgetConfig, type AsyncCompressionStrategy, type CacheAwareCompressionConfig, type CompressibleMessage, type CompressionEvent, type CompressionMetrics, type CompressionMetricsSnapshot, type CompressionResult, type CompressionStrategy, type CompressionTier, type ContextEngine, type HeadTailProtectionConfig, type IncrementalCompactConfig, type SummarizeFn, } from "./context/context-compression.js";
|
|
4
4
|
export { snipCompactIfNeeded, type SnipResult, } from "./context/context-compression.js";
|
|
5
5
|
export { applyCollapsesIfNeeded as applyContextCollapsesIfNeeded, recoverFromOverflow as recoverContextCollapseFromOverflow, createCollapseStore, type CollapseStore, type CollapseStage, } from "./context/context-collapse.js";
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tool Cascade — provider-native tool augmentation layer.
|
|
3
|
+
*
|
|
4
|
+
* When a provider (e.g. Zhipu GLM) offers native utility APIs (web search, reader,
|
|
5
|
+
* tokenizer, etc.), the tool cascade automatically routes through the provider API
|
|
6
|
+
* first, falling back to the host-provided implementation (e.g. SearXNG) on failure.
|
|
7
|
+
*
|
|
8
|
+
* ## Architecture
|
|
9
|
+
*
|
|
10
|
+
* Two levels of provider tool integration:
|
|
11
|
+
*
|
|
12
|
+
* 1. **D-level (builtin tools)**: Injected directly into chat completion request
|
|
13
|
+
* via `builtinWebSearch` / `builtinCodeInterpreter` quirks.
|
|
14
|
+
* The LLM decides to use these inline during generation — no agent intervention.
|
|
15
|
+
* → Handled by openai-chat.ts transport layer.
|
|
16
|
+
*
|
|
17
|
+
* 2. **C-level (independent APIs)**: Discrete API endpoints the agent calls explicitly
|
|
18
|
+
* as tool steps (web_search, reader, tokenizer, moderations).
|
|
19
|
+
* → Handled by this cascade layer — wraps host-provided tool deps with
|
|
20
|
+
* provider-native backends, falling back on error.
|
|
21
|
+
*
|
|
22
|
+
* ## Usage
|
|
23
|
+
*
|
|
24
|
+
* ```ts
|
|
25
|
+
* import { cascadeWebSearch } from "./tool-cascade.js";
|
|
26
|
+
* import { ZhipuToolAPI } from "../llm/transports/zhipu-tool-api.js";
|
|
27
|
+
*
|
|
28
|
+
* const providerApi = new ZhipuToolAPI({ baseUrl, apiKey });
|
|
29
|
+
* const cascadedSearch = cascadeWebSearch(providerApi, fallbackSearchFn);
|
|
30
|
+
* const tool = createWebSearchTool({ search: cascadedSearch });
|
|
31
|
+
* ```
|
|
32
|
+
*/
|
|
33
|
+
import type { ProviderToolAPI } from "../llm/provider-tool-api.js";
|
|
34
|
+
import type { WebSearchToolDeps } from "../skills/tools/web-search-tool.js";
|
|
35
|
+
/**
|
|
36
|
+
* Cascade web search: provider-native search → host fallback.
|
|
37
|
+
* Returns a WebSearchToolDeps.search function that tries the provider's
|
|
38
|
+
* webSearch API first, falling back to the host's search on error.
|
|
39
|
+
*/
|
|
40
|
+
export declare function cascadeWebSearch(providerApi: ProviderToolAPI | undefined, fallbackSearch: WebSearchToolDeps["search"]): WebSearchToolDeps["search"];
|
|
@@ -29,7 +29,10 @@ export declare function parseOpenAiToolCallsFromChatResponse(responseBody: strin
|
|
|
29
29
|
toolCalls: OpenAiToolCall[];
|
|
30
30
|
responseText: string;
|
|
31
31
|
};
|
|
32
|
-
export declare function buildAssistantToolCallMessage(toolCalls: OpenAiToolCall[], text?: string
|
|
32
|
+
export declare function buildAssistantToolCallMessage(toolCalls: OpenAiToolCall[], text?: string, thinkingBlocks?: Array<{
|
|
33
|
+
thinking: string;
|
|
34
|
+
signature: string;
|
|
35
|
+
}>, reasoningContent?: string): Record<string, unknown>;
|
|
33
36
|
export declare function buildToolResultMessage(callId: string, result: {
|
|
34
37
|
ok: boolean;
|
|
35
38
|
payload?: unknown;
|
|
@@ -190,6 +190,21 @@ export interface ToolsListResult {
|
|
|
190
190
|
parameters?: Record<string, unknown>;
|
|
191
191
|
}>;
|
|
192
192
|
}
|
|
193
|
+
export interface MediaListModelsParams {
|
|
194
|
+
/** Filter by media type (image/video/music/tts/3d/stt/embedding). Omit to list all. */
|
|
195
|
+
mediaType?: string;
|
|
196
|
+
}
|
|
197
|
+
export interface MediaListModelsResult {
|
|
198
|
+
models: Array<{
|
|
199
|
+
providerId: string;
|
|
200
|
+
providerName: string;
|
|
201
|
+
modelId: string;
|
|
202
|
+
modelName: string;
|
|
203
|
+
mediaType: string;
|
|
204
|
+
/** Fine-grained capability metadata (operations, formats, limits) */
|
|
205
|
+
capabilities?: Record<string, unknown>;
|
|
206
|
+
}>;
|
|
207
|
+
}
|
|
193
208
|
export interface ConfigGetParams {
|
|
194
209
|
keys?: string[];
|
|
195
210
|
}
|
|
@@ -339,6 +354,10 @@ export interface RpcMethodMap {
|
|
|
339
354
|
params: ToolsListParams;
|
|
340
355
|
result: ToolsListResult;
|
|
341
356
|
};
|
|
357
|
+
"media.listModels": {
|
|
358
|
+
params: MediaListModelsParams;
|
|
359
|
+
result: MediaListModelsResult;
|
|
360
|
+
};
|
|
342
361
|
"config.get": {
|
|
343
362
|
params: ConfigGetParams;
|
|
344
363
|
result: ConfigGetResult;
|
|
@@ -44,7 +44,7 @@ export type ExtractionCompleteFn = (params: {
|
|
|
44
44
|
}>;
|
|
45
45
|
}>;
|
|
46
46
|
export interface MemoryExtractorOptions {
|
|
47
|
-
/** Model to use for extraction (default: "deepseek-
|
|
47
|
+
/** Model to use for extraction (default: "deepseek-v4-flash"). */
|
|
48
48
|
model?: string;
|
|
49
49
|
/** Timeout in ms for the LLM call (default: 30_000). */
|
|
50
50
|
timeoutMs?: number;
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
import type { PortableTool } from "../portable-tool.js";
|
|
2
|
+
export declare const FILE_UPLOAD_TOOL_NAME: "file_upload";
|
|
3
|
+
export interface FileUploadToolParams {
|
|
4
|
+
file_path: string;
|
|
5
|
+
purpose?: string;
|
|
6
|
+
}
|
|
7
|
+
export declare const FILE_UPLOAD_TOOL_SCHEMA: {
|
|
8
|
+
readonly type: "object";
|
|
9
|
+
readonly properties: {
|
|
10
|
+
readonly file_path: {
|
|
11
|
+
readonly type: "string";
|
|
12
|
+
readonly description: string;
|
|
13
|
+
};
|
|
14
|
+
readonly purpose: {
|
|
15
|
+
readonly type: "string";
|
|
16
|
+
readonly description: string;
|
|
17
|
+
};
|
|
18
|
+
};
|
|
19
|
+
readonly required: readonly ["file_path"];
|
|
20
|
+
};
|
|
21
|
+
export interface FileUploadResult {
|
|
22
|
+
fileId: string;
|
|
23
|
+
url?: string;
|
|
24
|
+
filename: string;
|
|
25
|
+
bytes: number;
|
|
26
|
+
provider: string;
|
|
27
|
+
}
|
|
28
|
+
export interface FileUploadToolDeps {
|
|
29
|
+
uploadFile(params: {
|
|
30
|
+
filePath: string;
|
|
31
|
+
purpose?: string;
|
|
32
|
+
}): Promise<FileUploadResult>;
|
|
33
|
+
}
|
|
34
|
+
export declare function createFileUploadTool(deps: FileUploadToolDeps): PortableTool<FileUploadToolParams>;
|
|
35
|
+
export declare const FILE_QUERY_TOOL_NAME: "file_query";
|
|
36
|
+
export interface FileQueryToolParams {
|
|
37
|
+
file_id?: string;
|
|
38
|
+
limit?: number;
|
|
39
|
+
}
|
|
40
|
+
export declare const FILE_QUERY_TOOL_SCHEMA: {
|
|
41
|
+
readonly type: "object";
|
|
42
|
+
readonly properties: {
|
|
43
|
+
readonly file_id: {
|
|
44
|
+
readonly type: "string";
|
|
45
|
+
readonly description: "Query a specific file by ID. If omitted, lists recent uploaded files.";
|
|
46
|
+
};
|
|
47
|
+
readonly limit: {
|
|
48
|
+
readonly type: "number";
|
|
49
|
+
readonly description: "Max number of files to list when file_id is omitted. Default: 10, max: 100.";
|
|
50
|
+
};
|
|
51
|
+
};
|
|
52
|
+
readonly required: readonly [];
|
|
53
|
+
};
|
|
54
|
+
export interface FileInfo {
|
|
55
|
+
id: string;
|
|
56
|
+
filename: string;
|
|
57
|
+
bytes: number;
|
|
58
|
+
status: string;
|
|
59
|
+
createdAt?: string;
|
|
60
|
+
url?: string;
|
|
61
|
+
}
|
|
62
|
+
export interface FileQueryToolDeps {
|
|
63
|
+
queryFile(params: {
|
|
64
|
+
fileId: string;
|
|
65
|
+
}): Promise<FileInfo>;
|
|
66
|
+
listFiles(params: {
|
|
67
|
+
limit?: number;
|
|
68
|
+
}): Promise<FileInfo[]>;
|
|
69
|
+
}
|
|
70
|
+
export declare function createFileQueryTool(deps: FileQueryToolDeps): PortableTool<FileQueryToolParams>;
|
|
71
|
+
export declare const FILE_DELETE_TOOL_NAME: "file_delete";
|
|
72
|
+
export interface FileDeleteToolParams {
|
|
73
|
+
file_id: string;
|
|
74
|
+
}
|
|
75
|
+
export declare const FILE_DELETE_TOOL_SCHEMA: {
|
|
76
|
+
readonly type: "object";
|
|
77
|
+
readonly properties: {
|
|
78
|
+
readonly file_id: {
|
|
79
|
+
readonly type: "string";
|
|
80
|
+
readonly description: "ID of the uploaded file to delete.";
|
|
81
|
+
};
|
|
82
|
+
};
|
|
83
|
+
readonly required: readonly ["file_id"];
|
|
84
|
+
};
|
|
85
|
+
export interface FileDeleteToolDeps {
|
|
86
|
+
deleteFile(params: {
|
|
87
|
+
fileId: string;
|
|
88
|
+
}): Promise<void>;
|
|
89
|
+
}
|
|
90
|
+
export declare function createFileDeleteTool(deps: FileDeleteToolDeps): PortableTool<FileDeleteToolParams>;
|